From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Jul 2016 21:03:52 +0000 (-0700)
Subject: Merge tag 'leds_for_4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewsk... 
X-Git-Tag: v4.8-rc1~136
X-Git-Url: http://git.samba.org/samba.git/?p=sfrench%2Fcifs-2.6.git;a=commitdiff_plain;h=607e11ab6654e167b1b0ec132cedc73e220f63c6;hp=5706c01fcfb313cef7aed6a283b43c5452cf8336

Merge tag 'leds_for_4.8' of git://git./linux/kernel/git/j.anaszewski/linux-leds

Pull LED updates from Jacek Anaszewski:
 "New LED class driver:
   - LED driver for TI LP3952 6-Channel Color LED

  LED core improvements:
   - Only descend into leds directory when CONFIG_NEW_LEDS is set
   - Add no-op gpio_led_register_device when LED subsystem is disabled
   - MAINTAINERS: Add file patterns for led device tree bindings

  LED Trigger core improvements:
   - return error if invalid trigger name is provided via sysfs

  LED class drivers improvements
   - is31fl32xx: define complete i2c_device_id table
   - is31fl32xx: fix typo in id and match table names
   - leds-gpio: Set of_node for created LED devices
   - pca9532: Add device tree support

  Conversion of IDE trigger to common disk trigger:
   - leds: convert IDE trigger to common disk trigger
   - leds: documentation: 'ide-disk' to 'disk-activity'
   - unicore32: use the new LED disk activity trigger
   - parisc: use the new LED disk activity trigger
   - mips: use the new LED disk activity trigger
   - arm: use the new LED disk activity trigger
   - powerpc: use the new LED disk activity trigger"

* tag 'leds_for_4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds:
  leds: is31fl32xx: define complete i2c_device_id table
  leds: is31fl32xx: fix typo in id and match table names
  leds: LED driver for TI LP3952 6-Channel Color LED
  leds: leds-gpio: Set of_node for created LED devices
  leds: triggers: return error if invalid trigger name is provided via sysfs
  leds: Only descend into leds directory when CONFIG_NEW_LEDS is set
  leds: Add no-op gpio_led_register_device when LED subsystem is disabled
  unicore32: use the new LED disk activity trigger
  parisc: use the new LED disk activity trigger
  mips: use the new LED disk activity trigger
  arm: use the new LED disk activity trigger
  powerpc: use the new LED disk activity trigger
  leds: documentation: 'ide-disk' to 'disk-activity'
  leds: convert IDE trigger to common disk trigger
  leds: pca9532: Add device tree support
  MAINTAINERS: Add file patterns for led device tree bindings
---

diff --git a/.mailmap b/.mailmap
index 779a9caadc7f..d2acafb09e60 100644
--- a/.mailmap
+++ b/.mailmap
@@ -11,6 +11,7 @@ Aaron Durbin <adurbin@google.com>
 Adam Oldham <oldhamca@gmail.com>
 Adam Radford <aradford@gmail.com>
 Adrian Bunk <bunk@stusta.de>
+Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com>
 Alan Cox <alan@lxorguk.ukuu.org.uk>
 Alan Cox <root@hraefn.swansea.linux.org.uk>
 Aleksey Gorelov <aleksey_gorelov@phoenix.com>
@@ -21,6 +22,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antoine Tenart <antoine.tenart@free-electrons.com>
 Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
@@ -30,6 +32,9 @@ Axel Lin <axel.lin@gmail.com>
 Ben Gardner <bgardner@wabtec.com>
 Ben M Cahill <ben.m.cahill@intel.com>
 BjÃ¶rn Steinbrink <B.Steinbrink@gmx.de>
+Boris Brezillon <boris.brezillon@free-electrons.com>
+Boris Brezillon <boris.brezillon@free-electrons.com> <b.brezillon.dev@gmail.com>
+Boris Brezillon <boris.brezillon@free-electrons.com> <b.brezillon@overkiz.com>
 Brian Avery <b.avery@hp.com>
 Brian King <brking@us.ibm.com>
 Christoph Hellwig <hch@lst.de>
@@ -90,6 +95,8 @@ Linas Vepstas <linas@austin.ibm.com>
 Mark Brown <broonie@sirena.org.uk>
 Matthieu CASTET <castet.matthieu@free.fr>
 Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com> <mchehab@infradead.org> <mchehab@redhat.com> <m.chehab@samsung.com> <mchehab@osg.samsung.com> <mchehab@s-opensource.com>
+Matt Ranostay <mranostay@gmail.com> Matthew Ranostay <mranostay@embeddedalley.com>
+Matt Ranostay <mranostay@gmail.com> <matt.ranostay@intel.com>
 Mayuresh Janorkar <mayur@ti.com>
 Michael Buesch <m@bues.ch>
 Michel DÃ¤nzer <michel@tungstengraphics.com>
diff --git a/Documentation/.gitignore b/Documentation/.gitignore
new file mode 100644
index 000000000000..e74fec8693b2
--- /dev/null
+++ b/Documentation/.gitignore
@@ -0,0 +1,2 @@
+output
+*.pyc
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index cd077ca0e1b8..cb9a6c6fa83b 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -255,10 +255,10 @@ kbuild/
 	- directory with info about the kernel build process.
 kdump/
 	- directory with mini HowTo on getting the crash dump code to work.
-kernel-doc-nano-HOWTO.txt
-	- mini HowTo on generation and location of kernel documentation files.
 kernel-docs.txt
 	- listing of various WWW + books that document kernel internals.
+kernel-documentation.rst
+	- how to write and format reStructuredText kernel documentation
 kernel-parameters.txt
 	- summary listing of command line / boot prompt args for the kernel.
 kernel-per-CPU-kthreads.txt
diff --git a/Documentation/ABI/testing/configfs-acpi b/Documentation/ABI/testing/configfs-acpi
new file mode 100644
index 000000000000..4ab4e99aa863
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-acpi
@@ -0,0 +1,36 @@
+What:		/config/acpi
+Date:		July 2016
+KernelVersion:	4.8
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		This represents the ACPI subsystem entry point directory. It
+		contains sub-groups corresponding to ACPI configurable options.
+
+What:		/config/acpi/table
+Date:		July 2016
+KernelVersion:	4.8
+Description:
+
+		This group contains the configuration for user defined ACPI
+		tables. The attributes of a user define table are:
+
+		aml 		- a binary attribute that the user can use to
+				fill in the ACPI aml definitions. Once the aml
+				data is written to this file and the file is
+				closed the table will be loaded and ACPI devices
+				will be enumerated. To check if the operation is
+				successful the user must check the error code
+				for close(). If the operation is successful,
+				subsequent writes to this attribute will fail.
+
+		The rest of the attributes are read-only and are valid only
+		after the table has been loaded by filling the aml entry:
+
+		signature 	- ASCII table signature
+		length 		- length of table in bytes, including the header
+		revision 	- ACPI Specification minor version number
+		oem_id 		- ASCII OEM identification
+		oem_table_id 	- ASCII OEM table identification
+		oem_revision 	- OEM revision number
+		asl_compiler_id - ASCII ASL compiler vendor ID
+		asl_compiler_revision - ASL compiler version
diff --git a/Documentation/ABI/testing/configfs-iio b/Documentation/ABI/testing/configfs-iio
index 2483756fccf5..aebda53ec0f7 100644
--- a/Documentation/ABI/testing/configfs-iio
+++ b/Documentation/ABI/testing/configfs-iio
@@ -19,3 +19,16 @@ KernelVersion:	4.4
 Description:
 		High resolution timers directory. Creating a directory here
 		will result in creating a hrtimer trigger in the IIO subsystem.
+
+What:		/config/iio/devices
+Date:		April 2016
+KernelVersion:	4.7
+Description:
+		Industrial IO software devices directory.
+
+What:		/config/iio/devices/dummy
+Date:		April 2016
+KernelVersion:	4.7
+Description:
+		Dummy IIO devices directory. Creating a directory here will result
+		in creating a dummy IIO device in the IIO subystem.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index df44998e7506..fee35c00cc4e 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -32,6 +32,13 @@ Description:
 		Description of the physical chip / device for device X.
 		Typically a part number.
 
+What:		/sys/bus/iio/devices/iio:deviceX/timestamp_clock
+KernelVersion:	4.5
+Contact:	linux-iio@vger.kernel.org
+Description:
+		String identifying current posix clock used to timestamp
+		buffered samples and events for device X.
+
 What:		/sys/bus/iio/devices/iio:deviceX/sampling_frequency
 What:		/sys/bus/iio/devices/iio:deviceX/buffer/sampling_frequency
 What:		/sys/bus/iio/devices/triggerX/sampling_frequency
@@ -1565,3 +1572,10 @@ Description:
 		* X is in the plane of the propellers, perpendicular to Y axis,
 		  and positive towards the starboard side of the UAV ;
 		* Z is perpendicular to propellers plane and positive upwards.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_electricalconductivity_raw
+KernelVersion:	4.8
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Raw (unscaled no offset etc.) electric conductivity reading that
+		can be processed to siemens per meter.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-health-afe440x b/Documentation/ABI/testing/sysfs-bus-iio-health-afe440x
index 3740f253d406..6adba9058b22 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-health-afe440x
+++ b/Documentation/ABI/testing/sysfs-bus-iio-health-afe440x
@@ -1,54 +1,41 @@
-What:		/sys/bus/iio/devices/iio:deviceX/tia_resistanceY
-		/sys/bus/iio/devices/iio:deviceX/tia_capacitanceY
-Date:		December 2015
-KernelVersion:
-Contact:	Andrew F. Davis <afd@ti.com>
-Description:
-		Get and set the resistance and the capacitance settings for the
-		Transimpedance Amplifier. Y is 1 for Rf1 and Cf1, Y is 2 for
-		Rf2 and Cf2 values.
-
-What:		/sys/bus/iio/devices/iio:deviceX/tia_separate_en
-Date:		December 2015
-KernelVersion:
-Contact:	Andrew F. Davis <afd@ti.com>
-Description:
-		Enable or disable separate settings for the TransImpedance
-		Amplifier above, when disabled both values are set by the
-		first channel.
-
-What:		/sys/bus/iio/devices/iio:deviceX/in_intensity_ledY_raw
-		/sys/bus/iio/devices/iio:deviceX/in_intensity_ledY_ambient_raw
-Date:		December 2015
+What:		/sys/bus/iio/devices/iio:deviceX/in_intensityY_raw
+Date:		May 2016
 KernelVersion:
 Contact:	Andrew F. Davis <afd@ti.com>
 Description:
 		Get measured values from the ADC for these stages. Y is the
-		specific LED number. The values are expressed in 24-bit twos
-		complement.
+		specific stage number corresponding to datasheet stage names
+		as follows:
+		1 -> LED2
+		2 -> ALED2/LED3
+		3 -> LED1
+		4 -> ALED1/LED4
+		Note that channels 5 and 6 represent LED2-ALED2 and LED1-ALED1
+		respectively which simply helper channels containing the
+		calculated difference in the value of stage 1 - 2 and 3 - 4.
+		The values are expressed in 24-bit twos complement.
 
-What:		/sys/bus/iio/devices/iio:deviceX/in_intensity_ledY-ledY_ambient_raw
-Date:		December 2015
+What:		/sys/bus/iio/devices/iio:deviceX/in_intensityY_offset
+Date:		May 2016
 KernelVersion:
 Contact:	Andrew F. Davis <afd@ti.com>
 Description:
-		Get differential values from the ADC for these stages. Y is the
-		specific LED number. The values are expressed in 24-bit twos
-		complement for the specified LEDs.
+		Get and set the offset cancellation DAC setting for these
+		stages. The values are expressed in 5-bit sign-magnitude.
 
-What:		/sys/bus/iio/devices/iio:deviceX/out_current_ledY_offset
-		/sys/bus/iio/devices/iio:deviceX/out_current_ledY_ambient_offset
-Date:		December 2015
+What:		/sys/bus/iio/devices/iio:deviceX/in_intensityY_resistance
+What:		/sys/bus/iio/devices/iio:deviceX/in_intensityY_capacitance
+Date:		May 2016
 KernelVersion:
 Contact:	Andrew F. Davis <afd@ti.com>
 Description:
-		Get and set the offset cancellation DAC setting for these
-		stages. The values are expressed in 5-bit sign-magnitude.
+		Get and set the resistance and the capacitance settings for the
+		Transimpedance Amplifier during the associated stage.
 
-What:		/sys/bus/iio/devices/iio:deviceX/out_current_ledY_raw
-Date:		December 2015
+What:		/sys/bus/iio/devices/iio:deviceX/out_currentY_raw
+Date:		May 2016
 KernelVersion:
 Contact:	Andrew F. Davis <afd@ti.com>
 Description:
-		Get and set the LED current for the specified LED. Y is the
-		specific LED number.
+		Get and set the LED current for the specified LED active during
+		this stage. Y is the specific stage number.
diff --git a/Documentation/ABI/testing/sysfs-class-net-batman-adv b/Documentation/ABI/testing/sysfs-class-net-batman-adv
index 518f6a1dbc0c..898106849e27 100644
--- a/Documentation/ABI/testing/sysfs-class-net-batman-adv
+++ b/Documentation/ABI/testing/sysfs-class-net-batman-adv
@@ -1,19 +1,10 @@
 
-What:		/sys/class/net/<iface>/batman-adv/throughput_override
-Date:		Feb 2014
-Contact:	Antonio Quartulli <antonio@meshcoding.com>
-description:
-		Defines the throughput value to be used by B.A.T.M.A.N. V
-		when estimating the link throughput using this interface.
-		If the value is set to 0 then batman-adv will try to
-		estimate the throughput by itself.
-
 What:           /sys/class/net/<iface>/batman-adv/elp_interval
 Date:           Feb 2014
 Contact:        Linus LÃ¼ssing <linus.luessing@web.de>
 Description:
                 Defines the interval in milliseconds in which batman
-                sends its probing packets for link quality measurements.
+                emits probing packets for neighbor sensing (ELP).
 
 What:           /sys/class/net/<iface>/batman-adv/iface_status
 Date:           May 2010
@@ -28,3 +19,12 @@ Description:
                 The /sys/class/net/<iface>/batman-adv/mesh_iface file
                 displays the batman mesh interface this <iface>
                 currently is associated with.
+
+What:           /sys/class/net/<iface>/batman-adv/throughput_override
+Date:           Feb 2014
+Contact:        Antonio Quartulli <a@unstable.cc>
+description:
+                Defines the throughput value to be used by B.A.T.M.A.N. V
+                when estimating the link throughput using this interface.
+                If the value is set to 0 then batman-adv will try to
+                estimate the throughput by itself.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 16501334b99f..498741737055 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -340,3 +340,13 @@ Description:	POWERNV CPUFreq driver's frequency throttle stats directory and
 		'policyX/throttle_stats' directory and all the attributes are same as
 		the /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats directory and
 		attributes which give the frequency throttle information of the chip.
+
+What:		/sys/devices/system/cpu/cpuX/regs/
+		/sys/devices/system/cpu/cpuX/regs/identification/
+		/sys/devices/system/cpu/cpuX/regs/identification/midr_el1
+		/sys/devices/system/cpu/cpuX/regs/identification/revidr_el1
+Date:		June 2016
+Contact:	Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
+Description:	AArch64 CPU registers
+		'identification' directory exposes the CPU ID registers for
+		 identifying model and revision of the CPU.
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 9a70ddd16584..a096836723ca 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -458,7 +458,7 @@ of the function, telling people what it does, and possibly WHY it does
 it.
 
 When commenting the kernel API functions, please use the kernel-doc format.
-See the files Documentation/kernel-doc-nano-HOWTO.txt and scripts/kernel-doc
+See the files Documentation/kernel-documentation.rst and scripts/kernel-doc
 for details.
 
 Linux style for comments is the C89 "/* ... */" style.
diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
index 5f7c55999c77..800fe7a9024c 100644
--- a/Documentation/DocBook/80211.tmpl
+++ b/Documentation/DocBook/80211.tmpl
@@ -136,6 +136,7 @@
 !Finclude/net/cfg80211.h cfg80211_ibss_joined
 !Finclude/net/cfg80211.h cfg80211_connect_result
 !Finclude/net/cfg80211.h cfg80211_connect_bss
+!Finclude/net/cfg80211.h cfg80211_connect_timeout
 !Finclude/net/cfg80211.h cfg80211_roamed
 !Finclude/net/cfg80211.h cfg80211_disconnected
 !Finclude/net/cfg80211.h cfg80211_ready_on_channel
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index d70f9b68174e..01bab5014a4a 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -6,6 +6,8 @@
 # To add a new book the only step required is to add the book to the
 # list of DOCBOOKS.
 
+ifeq ($(IGNORE_DOCBOOKS),)
+
 DOCBOOKS := z8530book.xml device-drivers.xml \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
 	    writing_usb_driver.xml networking.xml \
@@ -33,10 +35,6 @@ PDF_METHOD	= $(prefer-db2x)
 PS_METHOD	= $(prefer-db2x)
 
 
-###
-# The targets that may be used.
-PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs cleandocs
-
 targets += $(DOCBOOKS)
 BOOKS := $(addprefix $(obj)/,$(DOCBOOKS))
 xmldocs: $(BOOKS)
@@ -63,6 +61,9 @@ installmandocs: mandocs
 		sort -k 2 -k 1 | uniq -f 1 | sed -e 's: :/:' | \
 		xargs install -m 644 -t /usr/local/man/man9/
 
+# no-op for the DocBook toolchain
+epubdocs:
+
 ###
 #External programs used
 KERNELDOCXMLREF = $(srctree)/scripts/kernel-doc-xml-ref
@@ -216,10 +217,24 @@ silent_gen_xml = :
 	       -e "s/>/\\&gt;/g";     \
 	   echo "</programlisting>")  > $@
 
+else
+
+# Needed, due to cleanmediadocs
+include Documentation/DocBook/media/Makefile
+
+htmldocs:
+pdfdocs:
+psdocs:
+xmldocs:
+installmandocs:
+
+endif # IGNORE_DOCBOOKS
+
+
 ###
 # Help targets as used by the top-level makefile
 dochelp:
-	@echo  ' Linux kernel internal documentation in different formats:'
+	@echo  ' Linux kernel internal documentation in different formats (DocBook):'
 	@echo  '  htmldocs        - HTML'
 	@echo  '  pdfdocs         - PDF'
 	@echo  '  psdocs          - Postscript'
@@ -228,8 +243,11 @@ dochelp:
 	@echo  '  installmandocs  - install man pages generated by mandocs'
 	@echo  '  cleandocs       - clean all generated DocBook files'
 	@echo
-	@echo  'make DOCBOOKS="s1.xml s2.xml" [target] Generate only docs s1.xml s2.xml'
+	@echo  '  make DOCBOOKS="s1.xml s2.xml" [target] Generate only docs s1.xml s2.xml'
 	@echo  '  valid values for DOCBOOKS are: $(DOCBOOKS)'
+	@echo
+	@echo  "  make IGNORE_DOCBOOKS=1 [target] Don't generate docs from Docbook"
+	@echo  '     This is useful to generate only the ReST docs (Sphinx)'
 
 
 ###
diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl
index d55dc5a39bad..fb2a1526f6ec 100644
--- a/Documentation/DocBook/crypto-API.tmpl
+++ b/Documentation/DocBook/crypto-API.tmpl
@@ -440,8 +440,8 @@
      The type flag specifies the type of the cipher algorithm.
      The caller usually provides a 0 when the caller wants the
      default handling. Otherwise, the caller may provide the
-     following selections which match the the aforementioned
-     cipher types:
+     following selections which match the aforementioned cipher
+     types:
     </para>
 
     <itemizedlist>
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index 8c68768ebee5..58af32b01b90 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -300,6 +300,9 @@ X!Isound/sound_firmware.c
 !Iinclude/media/media-devnode.h
 !Iinclude/media/media-entity.h
     </sect1>
+    <sect1><title>Consumer Electronics Control devices</title>
+!Iinclude/media/cec-edid.h
+    </sect1>
 
   </chapter>
 
diff --git a/Documentation/DocBook/iio.tmpl b/Documentation/DocBook/iio.tmpl
index f525bf56d1dd..e2ab6a1f223e 100644
--- a/Documentation/DocBook/iio.tmpl
+++ b/Documentation/DocBook/iio.tmpl
@@ -594,7 +594,7 @@
 
     irqreturn_t sensor_iio_pollfunc(int irq, void *p)
     {
-        pf->timestamp = iio_get_time_ns();
+        pf->timestamp = iio_get_time_ns((struct indio_dev *)p);
         return IRQ_WAKE_THREAD;
     }
 
diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile
index 2840ff483d5a..fdc138624800 100644
--- a/Documentation/DocBook/media/Makefile
+++ b/Documentation/DocBook/media/Makefile
@@ -64,6 +64,7 @@ IOCTLS = \
 	$(shell perl -ne 'print "$$1 " if /\#define\s+([A-Z][^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/net.h) \
 	$(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/video.h) \
 	$(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/media.h) \
+	$(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/linux/cec.h) \
 	$(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/v4l2-subdev.h) \
 
 DEFINES = \
@@ -100,6 +101,7 @@ STRUCTS = \
 	$(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s]+)\s+/ && !/_old/)' $(srctree)/include/uapi/linux/dvb/net.h) \
 	$(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/video.h) \
 	$(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/media.h) \
+	$(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/linux/cec.h) \
 	$(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-subdev.h) \
 	$(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-mediabus.h)
 
diff --git a/Documentation/DocBook/media/v4l/biblio.xml b/Documentation/DocBook/media/v4l/biblio.xml
index 9beb30f0071b..87f1d24958aa 100644
--- a/Documentation/DocBook/media/v4l/biblio.xml
+++ b/Documentation/DocBook/media/v4l/biblio.xml
@@ -342,6 +342,16 @@ in the frequency range from 87,5 to 108,0 MHz</title>
       <subtitle>Specification Version 1.4a</subtitle>
     </biblioentry>
 
+    <biblioentry id="hdmi2">
+      <abbrev>HDMI2</abbrev>
+      <authorgroup>
+	<corpauthor>HDMI Licensing LLC
+(<ulink url="http://www.hdmi.org">http://www.hdmi.org</ulink>)</corpauthor>
+      </authorgroup>
+      <title>High-Definition Multimedia Interface</title>
+      <subtitle>Specification Version 2.0</subtitle>
+    </biblioentry>
+
     <biblioentry id="dp">
       <abbrev>DP</abbrev>
       <authorgroup>
diff --git a/Documentation/DocBook/media/v4l/cec-api.xml b/Documentation/DocBook/media/v4l/cec-api.xml
new file mode 100644
index 000000000000..7062c1fa4904
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-api.xml
@@ -0,0 +1,75 @@
+<partinfo>
+  <authorgroup>
+    <author>
+      <firstname>Hans</firstname>
+      <surname>Verkuil</surname>
+      <affiliation><address><email>hans.verkuil@cisco.com</email></address></affiliation>
+      <contrib>Initial version.</contrib>
+    </author>
+  </authorgroup>
+  <copyright>
+    <year>2016</year>
+    <holder>Hans Verkuil</holder>
+  </copyright>
+
+  <revhistory>
+    <!-- Put document revisions here, newest first. -->
+    <revision>
+      <revnumber>1.0.0</revnumber>
+      <date>2016-03-17</date>
+      <authorinitials>hv</authorinitials>
+      <revremark>Initial revision</revremark>
+    </revision>
+  </revhistory>
+</partinfo>
+
+<title>CEC API</title>
+
+<chapter id="cec-api">
+  <title>CEC: Consumer Electronics Control</title>
+
+  <section id="cec-intro">
+    <title>Introduction</title>
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+    <para>HDMI connectors provide a single pin for use by the Consumer Electronics
+    Control protocol. This protocol allows different devices connected by an HDMI cable
+    to communicate. The protocol for CEC version 1.4 is defined in supplements 1 (CEC)
+    and 2 (HEAC or HDMI Ethernet and Audio Return Channel) of the HDMI 1.4a
+    (<xref linkend="hdmi" />) specification and the extensions added to CEC version 2.0
+    are defined in chapter 11 of the HDMI 2.0 (<xref linkend="hdmi2" />) specification.
+    </para>
+
+    <para>The bitrate is very slow (effectively no more than 36 bytes per second) and
+    is based on the ancient AV.link protocol used in old SCART connectors. The protocol
+    closely resembles a crazy Rube Goldberg contraption and is an unholy mix of low and
+    high level messages. Some messages, especially those part of the HEAC protocol layered
+    on top of CEC, need to be handled by the kernel, others can be handled either by the
+    kernel or by userspace.</para>
+
+    <para>In addition, CEC can be implemented in HDMI receivers, transmitters and in USB
+    devices that have an HDMI input and an HDMI output and that control just the CEC pin.</para>
+
+    <para>Drivers that support CEC will create a CEC device node (/dev/cecX)
+    to give userspace access to the CEC adapter. The &CEC-ADAP-G-CAPS; ioctl will tell userspace
+    what it is allowed to do.</para>
+  </section>
+</chapter>
+
+<appendix id="cec-user-func">
+  <title>Function Reference</title>
+  <!-- Keep this alphabetically sorted. -->
+  &sub-cec-func-open;
+  &sub-cec-func-close;
+  &sub-cec-func-ioctl;
+  &sub-cec-func-poll;
+  <!-- All ioctls go here. -->
+  &sub-cec-ioc-adap-g-caps;
+  &sub-cec-ioc-adap-g-log-addrs;
+  &sub-cec-ioc-adap-g-phys-addr;
+  &sub-cec-ioc-dqevent;
+  &sub-cec-ioc-g-mode;
+  &sub-cec-ioc-receive;
+</appendix>
diff --git a/Documentation/DocBook/media/v4l/cec-func-close.xml b/Documentation/DocBook/media/v4l/cec-func-close.xml
new file mode 100644
index 000000000000..0812c8cd9634
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-func-close.xml
@@ -0,0 +1,64 @@
+<refentry id="cec-func-close">
+  <refmeta>
+    <refentrytitle>cec close()</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>cec-close</refname>
+    <refpurpose>Close a cec device</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcsynopsisinfo>#include &lt;unistd.h&gt;</funcsynopsisinfo>
+      <funcprototype>
+	<funcdef>int <function>close</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fd;</para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>Closes the cec device. Resources associated with the file descriptor
+    are freed. The device configuration remain unchanged.</para>
+  </refsect1>
+
+  <refsect1>
+    <title>Return Value</title>
+
+    <para><function>close</function> returns 0 on success. On error, -1 is
+    returned, and <varname>errno</varname> is set appropriately. Possible error
+    codes are:</para>
+
+    <variablelist>
+      <varlistentry>
+	<term><errorcode>EBADF</errorcode></term>
+	<listitem>
+	  <para><parameter>fd</parameter> is not a valid open file descriptor.
+	  </para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-func-ioctl.xml b/Documentation/DocBook/media/v4l/cec-func-ioctl.xml
new file mode 100644
index 000000000000..f92817a2dc80
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-func-ioctl.xml
@@ -0,0 +1,78 @@
+<refentry id="cec-func-ioctl">
+  <refmeta>
+    <refentrytitle>cec ioctl()</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>cec-ioctl</refname>
+    <refpurpose>Control a cec device</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcsynopsisinfo>#include &lt;sys/ioctl.h&gt;</funcsynopsisinfo>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>void *<parameter>argp</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fd;</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>CEC ioctl request code as defined in the cec.h header file,
+	  for example CEC_ADAP_G_CAPS.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>argp</parameter></term>
+	<listitem>
+	  <para>Pointer to a request-specific structure.</para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>The <function>ioctl()</function> function manipulates cec device
+    parameters. The argument <parameter>fd</parameter> must be an open file
+    descriptor.</para>
+    <para>The ioctl <parameter>request</parameter> code specifies the cec
+    function to be called. It has encoded in it whether the argument is an
+    input, output or read/write parameter, and the size of the argument
+    <parameter>argp</parameter> in bytes.</para>
+    <para>Macros and structures definitions specifying cec ioctl requests and
+    their parameters are located in the cec.h header file. All cec ioctl
+    requests, their respective function and parameters are specified in
+    <xref linkend="cec-user-func" />.</para>
+  </refsect1>
+
+  <refsect1>
+    &return-value;
+
+    <para>Request-specific error codes are listed in the
+    individual requests descriptions.</para>
+    <para>When an ioctl that takes an output or read/write parameter fails,
+    the parameter remains unmodified.</para>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-func-open.xml b/Documentation/DocBook/media/v4l/cec-func-open.xml
new file mode 100644
index 000000000000..2edc5555b81a
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-func-open.xml
@@ -0,0 +1,104 @@
+<refentry id="cec-func-open">
+  <refmeta>
+    <refentrytitle>cec open()</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>cec-open</refname>
+    <refpurpose>Open a cec device</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcsynopsisinfo>#include &lt;fcntl.h&gt;</funcsynopsisinfo>
+      <funcprototype>
+	<funcdef>int <function>open</function></funcdef>
+	<paramdef>const char *<parameter>device_name</parameter></paramdef>
+	<paramdef>int <parameter>flags</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>device_name</parameter></term>
+	<listitem>
+	  <para>Device to be opened.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>flags</parameter></term>
+	<listitem>
+	  <para>Open flags. Access mode must be <constant>O_RDWR</constant>.
+	  </para>
+	  <para>When the <constant>O_NONBLOCK</constant> flag is
+given, the &CEC-RECEIVE; ioctl will return &EAGAIN; when no message is
+available, and the &CEC-TRANSMIT;, &CEC-ADAP-S-PHYS-ADDR; and
+&CEC-ADAP-S-LOG-ADDRS; ioctls all act in non-blocking mode.</para>
+	  <para>Other flags have no effect.</para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>To open a cec device applications call <function>open()</function>
+    with the desired device name. The function has no side effects; the device
+    configuration remain unchanged.</para>
+    <para>When the device is opened in read-only mode, attempts to modify its
+    configuration will result in an error, and <varname>errno</varname> will be
+    set to <errorcode>EBADF</errorcode>.</para>
+  </refsect1>
+  <refsect1>
+    <title>Return Value</title>
+
+    <para><function>open</function> returns the new file descriptor on success.
+    On error, -1 is returned, and <varname>errno</varname> is set appropriately.
+    Possible error codes include:</para>
+
+    <variablelist>
+      <varlistentry>
+	<term><errorcode>EACCES</errorcode></term>
+	<listitem>
+	  <para>The requested access to the file is not allowed.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><errorcode>EMFILE</errorcode></term>
+	<listitem>
+	  <para>The  process  already  has  the  maximum number of files open.
+	  </para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><errorcode>ENFILE</errorcode></term>
+	<listitem>
+	  <para>The system limit on the total number of open files has been
+	  reached.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><errorcode>ENOMEM</errorcode></term>
+	<listitem>
+	  <para>Insufficient kernel memory was available.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><errorcode>ENXIO</errorcode></term>
+	<listitem>
+	  <para>No device corresponding to this device special file exists.
+	  </para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-func-poll.xml b/Documentation/DocBook/media/v4l/cec-func-poll.xml
new file mode 100644
index 000000000000..1bddbde0142d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-func-poll.xml
@@ -0,0 +1,94 @@
+<refentry id="cec-func-poll">
+  <refmeta>
+    <refentrytitle>cec poll()</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>cec-poll</refname>
+    <refpurpose>Wait for some event on a file descriptor</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcsynopsisinfo>#include &lt;sys/poll.h&gt;</funcsynopsisinfo>
+      <funcprototype>
+	<funcdef>int <function>poll</function></funcdef>
+	<paramdef>struct pollfd *<parameter>ufds</parameter></paramdef>
+	<paramdef>unsigned int <parameter>nfds</parameter></paramdef>
+	<paramdef>int <parameter>timeout</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>With the <function>poll()</function> function applications
+can wait for CEC events.</para>
+
+    <para>On success <function>poll()</function> returns the number of
+file descriptors that have been selected (that is, file descriptors
+for which the <structfield>revents</structfield> field of the
+respective <structname>pollfd</structname> structure is non-zero).
+CEC devices set the <constant>POLLIN</constant> and
+<constant>POLLRDNORM</constant> flags in the
+<structfield>revents</structfield> field if there are messages in the
+receive queue. If the transmit queue has room for new messages, the
+<constant>POLLOUT</constant> and <constant>POLLWRNORM</constant>
+flags are set. If there are events in the event queue, then the
+<constant>POLLPRI</constant> flag is set.
+When the function timed out it returns a value of zero, on
+failure it returns <returnvalue>-1</returnvalue> and the
+<varname>errno</varname> variable is set appropriately.
+</para>
+
+    <para>For more details see the
+<function>poll()</function> manual page.</para>
+  </refsect1>
+
+  <refsect1>
+    <title>Return Value</title>
+
+    <para>On success, <function>poll()</function> returns the number
+structures which have non-zero <structfield>revents</structfield>
+fields, or zero if the call timed out. On error
+<returnvalue>-1</returnvalue> is returned, and the
+<varname>errno</varname> variable is set appropriately:</para>
+
+    <variablelist>
+      <varlistentry>
+	<term><errorcode>EBADF</errorcode></term>
+	<listitem>
+	  <para>One or more of the <parameter>ufds</parameter> members
+specify an invalid file descriptor.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><errorcode>EFAULT</errorcode></term>
+	<listitem>
+	  <para><parameter>ufds</parameter> references an inaccessible
+memory area.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><errorcode>EINTR</errorcode></term>
+	<listitem>
+	  <para>The call was interrupted by a signal.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><errorcode>EINVAL</errorcode></term>
+	<listitem>
+	  <para>The <parameter>nfds</parameter> argument is greater
+than <constant>OPEN_MAX</constant>.</para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-ioc-adap-g-caps.xml b/Documentation/DocBook/media/v4l/cec-ioc-adap-g-caps.xml
new file mode 100644
index 000000000000..3523ef2259b1
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-ioc-adap-g-caps.xml
@@ -0,0 +1,151 @@
+<refentry id="cec-ioc-adap-g-caps">
+  <refmeta>
+    <refentrytitle>ioctl CEC_ADAP_G_CAPS</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>CEC_ADAP_G_CAPS</refname>
+    <refpurpose>Query device capabilities</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>struct cec_caps *<parameter>argp</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>File descriptor returned by
+	  <link linkend='cec-func-open'><function>open()</function></link>.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>CEC_ADAP_G_CAPS</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>argp</parameter></term>
+	<listitem>
+	  <para></para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>All cec devices must support the <constant>CEC_ADAP_G_CAPS</constant>
+    ioctl. To query device information, applications call the ioctl with a
+    pointer to a &cec-caps;. The driver fills the structure and returns
+    the information to the application.
+    The ioctl never fails.</para>
+
+    <table pgwide="1" frame="none" id="cec-caps">
+      <title>struct <structname>cec_caps</structname></title>
+      <tgroup cols="3">
+	&cs-str;
+	<tbody valign="top">
+	  <row>
+	    <entry>char</entry>
+	    <entry><structfield>driver[32]</structfield></entry>
+	    <entry>The name of the cec adapter driver.</entry>
+	  </row>
+	  <row>
+	    <entry>char</entry>
+	    <entry><structfield>name[32]</structfield></entry>
+	    <entry>The name of this CEC adapter. The combination <structfield>driver</structfield>
+	    and <structfield>name</structfield> must be unique.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>capabilities</structfield></entry>
+	    <entry>The capabilities of the CEC adapter, see <xref
+		linkend="cec-capabilities" />.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>version</structfield></entry>
+	    <entry>CEC Framework API version, formatted with the
+	    <constant>KERNEL_VERSION()</constant> macro.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-capabilities">
+      <title>CEC Capabilities Flags</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_CAP_PHYS_ADDR</constant></entry>
+	    <entry>0x00000001</entry>
+	    <entry>Userspace has to configure the physical address by
+	    calling &CEC-ADAP-S-PHYS-ADDR;. If this capability isn't set,
+	    then setting the physical address is handled by the kernel
+	    whenever the EDID is set (for an HDMI receiver) or read (for
+	    an HDMI transmitter).</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_CAP_LOG_ADDRS</constant></entry>
+	    <entry>0x00000002</entry>
+	    <entry>Userspace has to configure the logical addresses by
+	    calling &CEC-ADAP-S-LOG-ADDRS;. If this capability isn't set,
+	    then the kernel will have configured this.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_CAP_TRANSMIT</constant></entry>
+	    <entry>0x00000004</entry>
+	    <entry>Userspace can transmit CEC messages by calling &CEC-TRANSMIT;. This
+	    implies that userspace can be a follower as well, since being able to
+	    transmit messages is a prerequisite of becoming a follower. If this
+	    capability isn't set, then the kernel will handle all CEC transmits
+	    and process all CEC messages it receives.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_CAP_PASSTHROUGH</constant></entry>
+	    <entry>0x00000008</entry>
+	    <entry>Userspace can use the passthrough mode by
+	    calling &CEC-S-MODE;.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_CAP_RC</constant></entry>
+	    <entry>0x00000010</entry>
+	    <entry>This adapter supports the remote control protocol.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_CAP_MONITOR_ALL</constant></entry>
+	    <entry>0x00000020</entry>
+	    <entry>The CEC hardware can monitor all messages, not just directed and
+	    broadcast messages.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+  </refsect1>
+
+  <refsect1>
+    &return-value;
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-ioc-adap-g-log-addrs.xml b/Documentation/DocBook/media/v4l/cec-ioc-adap-g-log-addrs.xml
new file mode 100644
index 000000000000..302b8294f7fc
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-ioc-adap-g-log-addrs.xml
@@ -0,0 +1,329 @@
+<refentry id="cec-ioc-adap-g-log-addrs">
+  <refmeta>
+    <refentrytitle>ioctl CEC_ADAP_G_LOG_ADDRS, CEC_ADAP_S_LOG_ADDRS</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>CEC_ADAP_G_LOG_ADDRS</refname>
+    <refname>CEC_ADAP_S_LOG_ADDRS</refname>
+    <refpurpose>Get or set the logical addresses</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>struct cec_log_addrs *<parameter>argp</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>File descriptor returned by
+	  <link linkend='cec-func-open'><function>open()</function></link>.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>CEC_ADAP_G_LOG_ADDRS, CEC_ADAP_S_LOG_ADDRS</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>argp</parameter></term>
+	<listitem>
+	  <para></para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>To query the current CEC logical addresses, applications call the
+<constant>CEC_ADAP_G_LOG_ADDRS</constant> ioctl with a pointer to a
+<structname>cec_log_addrs</structname> structure where the drivers stores the
+logical addresses.</para>
+
+    <para>To set new logical addresses, applications fill in struct <structname>cec_log_addrs</structname>
+and call the <constant>CEC_ADAP_S_LOG_ADDRS</constant> ioctl with a pointer to this struct.
+The <constant>CEC_ADAP_S_LOG_ADDRS</constant> ioctl is only available if
+<constant>CEC_CAP_LOG_ADDRS</constant> is set (&ENOTTY; is returned otherwise). This ioctl will block until all
+requested logical addresses have been claimed. <constant>CEC_ADAP_S_LOG_ADDRS</constant>
+can only be called by a file handle in initiator mode (see &CEC-S-MODE;).</para>
+
+    <table pgwide="1" frame="none" id="cec-log-addrs">
+      <title>struct <structname>cec_log_addrs</structname></title>
+      <tgroup cols="3">
+	&cs-str;
+	<tbody valign="top">
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>log_addr</structfield>[CEC_MAX_LOG_ADDRS]</entry>
+	    <entry>The actual logical addresses that were claimed. This is set by the
+	    driver. If no logical address could be claimed, then it is set to
+	    <constant>CEC_LOG_ADDR_INVALID</constant>. If this adapter is Unregistered,
+	    then <structfield>log_addr[0]</structfield> is set to 0xf and all others to
+	    <constant>CEC_LOG_ADDR_INVALID</constant>.</entry>
+	  </row>
+	  <row>
+	    <entry>__u16</entry>
+	    <entry><structfield>log_addr_mask</structfield></entry>
+	    <entry>The bitmask of all logical addresses this adapter has claimed.
+	    If this adapter is Unregistered then <structfield>log_addr_mask</structfield>
+	    sets bit 15 and clears all other bits. If this adapter is not configured at all, then
+	    <structfield>log_addr_mask</structfield> is set to 0. Set by the driver.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>cec_version</structfield></entry>
+	    <entry>The CEC version that this adapter shall use. See
+	    <xref linkend="cec-versions" />.
+	    Used to implement the <constant>CEC_MSG_CEC_VERSION</constant> and
+	    <constant>CEC_MSG_REPORT_FEATURES</constant> messages. Note that
+	    <constant>CEC_OP_CEC_VERSION_1_3A</constant> is not allowed
+	    by the CEC framework.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>num_log_addrs</structfield></entry>
+	    <entry>Number of logical addresses to set up. Must be &le;
+	    <structfield>available_log_addrs</structfield> as returned by
+	    &CEC-ADAP-G-CAPS;. All arrays in this structure are only filled up to
+	    index <structfield>available_log_addrs</structfield>-1. The remaining
+	    array elements will be ignored. Note that the CEC 2.0 standard allows
+	    for a maximum of 2 logical addresses, although some hardware has support
+	    for more. <constant>CEC_MAX_LOG_ADDRS</constant> is 4. The driver will
+	    return the actual number of logical addresses it could claim, which may
+	    be less than what was requested. If this field is set to 0, then the
+	    CEC adapter shall clear all claimed logical addresses and all other
+	    fields will be ignored.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>vendor_id</structfield></entry>
+	    <entry>The vendor ID is a 24-bit number that identifies the specific
+	    vendor or entity. Based on this ID vendor specific commands may be
+	    defined. If you do not want a vendor ID then set it to
+	    <constant>CEC_VENDOR_ID_NONE</constant>.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>flags</structfield></entry>
+	    <entry>Flags. No flags are defined yet, so set this to 0.</entry>
+	  </row>
+	  <row>
+	    <entry>char</entry>
+	    <entry><structfield>osd_name</structfield>[15]</entry>
+	    <entry>The On-Screen Display name as is returned by the
+	    <constant>CEC_MSG_SET_OSD_NAME</constant> message.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>primary_device_type</structfield>[CEC_MAX_LOG_ADDRS]</entry>
+	    <entry>Primary device type for each logical address. See
+	    <xref linkend="cec-prim-dev-types" /> for possible types.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>log_addr_type</structfield>[CEC_MAX_LOG_ADDRS]</entry>
+	    <entry>Logical address types. See <xref linkend="cec-log-addr-types" /> for
+	    possible types. The driver will update this with the actual logical address
+	    type that it claimed (e.g. it may have to fallback to
+	    <constant>CEC_LOG_ADDR_TYPE_UNREGISTERED</constant>).</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>all_device_types</structfield>[CEC_MAX_LOG_ADDRS]</entry>
+	    <entry>CEC 2.0 specific: all device types. See <xref linkend="cec-all-dev-types-flags" />.
+	    Used to implement the <constant>CEC_MSG_REPORT_FEATURES</constant> message.
+	    This field is ignored if <structfield>cec_version</structfield> &lt;
+	    <constant>CEC_OP_CEC_VERSION_2_0</constant>.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>features</structfield>[CEC_MAX_LOG_ADDRS][12]</entry>
+	    <entry>Features for each logical address. Used to implement the
+	    <constant>CEC_MSG_REPORT_FEATURES</constant> message. The 12 bytes include
+	    both the RC Profile and the Device Features.
+	    This field is ignored if <structfield>cec_version</structfield> &lt;
+	    <constant>CEC_OP_CEC_VERSION_2_0</constant>.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-versions">
+      <title>CEC Versions</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_OP_CEC_VERSION_1_3A</constant></entry>
+	    <entry>4</entry>
+	    <entry>CEC version according to the HDMI 1.3a standard.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_CEC_VERSION_1_4B</constant></entry>
+	    <entry>5</entry>
+	    <entry>CEC version according to the HDMI 1.4b standard.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_CEC_VERSION_2_0</constant></entry>
+	    <entry>6</entry>
+	    <entry>CEC version according to the HDMI 2.0 standard.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-prim-dev-types">
+      <title>CEC Primary Device Types</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_OP_PRIM_DEVTYPE_TV</constant></entry>
+	    <entry>0</entry>
+	    <entry>Use for a TV.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_PRIM_DEVTYPE_RECORD</constant></entry>
+	    <entry>1</entry>
+	    <entry>Use for a recording device.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_PRIM_DEVTYPE_TUNER</constant></entry>
+	    <entry>3</entry>
+	    <entry>Use for a device with a tuner.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_PRIM_DEVTYPE_PLAYBACK</constant></entry>
+	    <entry>4</entry>
+	    <entry>Use for a playback device.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_PRIM_DEVTYPE_AUDIOSYSTEM</constant></entry>
+	    <entry>5</entry>
+	    <entry>Use for an audio system (e.g. an audio/video receiver).</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_PRIM_DEVTYPE_SWITCH</constant></entry>
+	    <entry>6</entry>
+	    <entry>Use for a CEC switch.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_PRIM_DEVTYPE_VIDEOPROC</constant></entry>
+	    <entry>7</entry>
+	    <entry>Use for a video processor device.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-log-addr-types">
+      <title>CEC Logical Address Types</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_LOG_ADDR_TYPE_TV</constant></entry>
+	    <entry>0</entry>
+	    <entry>Use for a TV.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_LOG_ADDR_TYPE_RECORD</constant></entry>
+	    <entry>1</entry>
+	    <entry>Use for a recording device.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_LOG_ADDR_TYPE_TUNER</constant></entry>
+	    <entry>2</entry>
+	    <entry>Use for a tuner device.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_LOG_ADDR_TYPE_PLAYBACK</constant></entry>
+	    <entry>3</entry>
+	    <entry>Use for a playback device.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_LOG_ADDR_TYPE_AUDIOSYSTEM</constant></entry>
+	    <entry>4</entry>
+	    <entry>Use for an audio system device.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_LOG_ADDR_TYPE_SPECIFIC</constant></entry>
+	    <entry>5</entry>
+	    <entry>Use for a second TV or for a video processor device.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_LOG_ADDR_TYPE_UNREGISTERED</constant></entry>
+	    <entry>6</entry>
+	    <entry>Use this if you just want to remain unregistered.
+	    Used for pure CEC switches or CDC-only devices (CDC:
+	    Capability Discovery and Control).</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-all-dev-types-flags">
+      <title>CEC All Device Types Flags</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_OP_ALL_DEVTYPE_TV</constant></entry>
+	    <entry>0x80</entry>
+	    <entry>This supports the TV type.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_ALL_DEVTYPE_RECORD</constant></entry>
+	    <entry>0x40</entry>
+	    <entry>This supports the Recording type.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_ALL_DEVTYPE_TUNER</constant></entry>
+	    <entry>0x20</entry>
+	    <entry>This supports the Tuner type.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_ALL_DEVTYPE_PLAYBACK</constant></entry>
+	    <entry>0x10</entry>
+	    <entry>This supports the Playback type.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_ALL_DEVTYPE_AUDIOSYSTEM</constant></entry>
+	    <entry>0x08</entry>
+	    <entry>This supports the Audio System type.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_OP_ALL_DEVTYPE_SWITCH</constant></entry>
+	    <entry>0x04</entry>
+	    <entry>This supports the CEC Switch or Video Processing type.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+  </refsect1>
+
+  <refsect1>
+    &return-value;
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-ioc-adap-g-phys-addr.xml b/Documentation/DocBook/media/v4l/cec-ioc-adap-g-phys-addr.xml
new file mode 100644
index 000000000000..d95f1785080c
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-ioc-adap-g-phys-addr.xml
@@ -0,0 +1,86 @@
+<refentry id="cec-ioc-adap-g-phys-addr">
+  <refmeta>
+    <refentrytitle>ioctl CEC_ADAP_G_PHYS_ADDR, CEC_ADAP_S_PHYS_ADDR</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>CEC_ADAP_G_PHYS_ADDR</refname>
+    <refname>CEC_ADAP_S_PHYS_ADDR</refname>
+    <refpurpose>Get or set the physical address</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>__u16 *<parameter>argp</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>File descriptor returned by
+	  <link linkend='cec-func-open'><function>open()</function></link>.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>CEC_ADAP_G_PHYS_ADDR, CEC_ADAP_S_PHYS_ADDR</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>argp</parameter></term>
+	<listitem>
+	  <para></para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>To query the current physical address applications call the
+<constant>CEC_ADAP_G_PHYS_ADDR</constant> ioctl with a pointer to an __u16
+where the driver stores the physical address.</para>
+
+    <para>To set a new physical address applications store the physical address in
+an __u16 and call the <constant>CEC_ADAP_S_PHYS_ADDR</constant> ioctl with a
+pointer to this integer. <constant>CEC_ADAP_S_PHYS_ADDR</constant> is only
+available if <constant>CEC_CAP_PHYS_ADDR</constant> is set (&ENOTTY; will be returned
+otherwise). <constant>CEC_ADAP_S_PHYS_ADDR</constant>
+can only be called by a file handle in initiator mode (see &CEC-S-MODE;), if not
+&EBUSY; will be returned.</para>
+
+    <para>The physical address is a 16-bit number where each group of 4 bits
+represent a digit of the physical address a.b.c.d where the most significant
+4 bits represent 'a'. The CEC root device (usually the TV) has address 0.0.0.0.
+Every device that is hooked up to an input of the TV has address a.0.0.0 (where
+'a' is &ge; 1), devices hooked up to those in turn have addresses a.b.0.0, etc.
+So a topology of up to 5 devices deep is supported. The physical address a
+device shall use is stored in the EDID of the sink.</para>
+
+<para>For example, the EDID for each HDMI input of the TV will have a different
+physical address of the form a.0.0.0 that the sources will read out and use as
+their physical address.</para>
+  </refsect1>
+
+  <refsect1>
+    &return-value;
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-ioc-dqevent.xml b/Documentation/DocBook/media/v4l/cec-ioc-dqevent.xml
new file mode 100644
index 000000000000..697dde575cd4
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-ioc-dqevent.xml
@@ -0,0 +1,202 @@
+<refentry id="cec-ioc-g-event">
+  <refmeta>
+    <refentrytitle>ioctl CEC_DQEVENT</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>CEC_DQEVENT</refname>
+    <refpurpose>Dequeue a CEC event</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>struct cec_event *<parameter>argp</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>File descriptor returned by
+	  <link linkend='cec-func-open'><function>open()</function></link>.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>CEC_DQEVENT</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>argp</parameter></term>
+	<listitem>
+	  <para></para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>CEC devices can send asynchronous events. These can be retrieved by calling
+    the <constant>CEC_DQEVENT</constant> ioctl. If the file descriptor is in non-blocking
+    mode and no event is pending, then it will return -1 and set errno to the &EAGAIN;.</para>
+
+    <para>The internal event queues are per-filehandle and per-event type. If there is
+    no more room in a queue then the last event is overwritten with the new one. This
+    means that intermediate results can be thrown away but that the latest event is always
+    available. This also means that is it possible to read two successive events that have
+    the same value (e.g. two CEC_EVENT_STATE_CHANGE events with the same state). In that
+    case the intermediate state changes were lost but it is guaranteed that the state
+    did change in between the two events.</para>
+
+    <table pgwide="1" frame="none" id="cec-event-state-change">
+      <title>struct <structname>cec_event_state_change</structname></title>
+      <tgroup cols="3">
+	&cs-str;
+	<tbody valign="top">
+	  <row>
+	    <entry>__u16</entry>
+	    <entry><structfield>phys_addr</structfield></entry>
+	    <entry>The current physical address.</entry>
+	  </row>
+	  <row>
+	    <entry>__u16</entry>
+	    <entry><structfield>log_addr_mask</structfield></entry>
+	    <entry>The current set of claimed logical addresses.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-event-lost-msgs">
+      <title>struct <structname>cec_event_lost_msgs</structname></title>
+      <tgroup cols="3">
+	&cs-str;
+	<tbody valign="top">
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>lost_msgs</structfield></entry>
+	    <entry>Set to the number of lost messages since the filehandle
+	    was opened or since the last time this event was dequeued for
+	    this filehandle. The messages lost are the oldest messages. So
+	    when a new message arrives and there is no more room, then the
+	    oldest message is discarded to make room for the new one. The
+	    internal size of the message queue guarantees that all messages
+	    received in the last two seconds will be stored. Since messages
+	    should be replied to within a second according to the CEC
+	    specification, this is more than enough.
+	    </entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-event">
+      <title>struct <structname>cec_event</structname></title>
+      <tgroup cols="4">
+	&cs-str;
+	<tbody valign="top">
+	  <row>
+	    <entry>__u64</entry>
+	    <entry><structfield>ts</structfield></entry>
+	    <entry>Timestamp of the event in ns.</entry>
+	    <entry></entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>event</structfield></entry>
+	    <entry>The CEC event type, see <xref linkend="cec-events" />.</entry>
+	    <entry></entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>flags</structfield></entry>
+	    <entry>Event flags, see <xref linkend="cec-event-flags" />.</entry>
+	    <entry></entry>
+	  </row>
+	  <row>
+	    <entry>union</entry>
+	    <entry>(anonymous)</entry>
+	    <entry></entry>
+	    <entry></entry>
+	  </row>
+	  <row>
+	    <entry></entry>
+	    <entry>struct cec_event_state_change</entry>
+	    <entry><structfield>state_change</structfield></entry>
+	    <entry>The new adapter state as sent by the <constant>CEC_EVENT_STATE_CHANGE</constant>
+	    event.</entry>
+	  </row>
+	  <row>
+	    <entry></entry>
+	    <entry>struct cec_event_lost_msgs</entry>
+	    <entry><structfield>lost_msgs</structfield></entry>
+	    <entry>The number of lost messages as sent by the <constant>CEC_EVENT_LOST_MSGS</constant>
+	    event.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-events">
+      <title>CEC Events Types</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_EVENT_STATE_CHANGE</constant></entry>
+	    <entry>1</entry>
+	    <entry>Generated when the CEC Adapter's state changes. When open() is
+	    called an initial event will be generated for that filehandle with the
+	    CEC Adapter's state at that time.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_EVENT_LOST_MSGS</constant></entry>
+	    <entry>2</entry>
+	    <entry>Generated if one or more CEC messages were lost because the
+	    application didn't dequeue CEC messages fast enough.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-event-flags">
+      <title>CEC Event Flags</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_EVENT_FL_INITIAL_VALUE</constant></entry>
+	    <entry>1</entry>
+	    <entry>Set for the initial events that are generated when the device is
+	    opened. See the table above for which events do this. This allows
+	    applications to learn the initial state of the CEC adapter at open()
+	    time.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+  </refsect1>
+
+  <refsect1>
+    &return-value;
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-ioc-g-mode.xml b/Documentation/DocBook/media/v4l/cec-ioc-g-mode.xml
new file mode 100644
index 000000000000..26b4282ad134
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-ioc-g-mode.xml
@@ -0,0 +1,255 @@
+<refentry id="cec-ioc-g-mode">
+  <refmeta>
+    <refentrytitle>ioctl CEC_G_MODE, CEC_S_MODE</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>CEC_G_MODE</refname>
+    <refname>CEC_S_MODE</refname>
+    <refpurpose>Get or set exclusive use of the CEC adapter</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>__u32 *<parameter>argp</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>File descriptor returned by
+	  <link linkend='cec-func-open'><function>open()</function></link>.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>CEC_G_MODE, CEC_S_MODE</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>argp</parameter></term>
+	<listitem>
+	  <para></para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>By default any filehandle can use &CEC-TRANSMIT; and &CEC-RECEIVE;, but
+in order to prevent applications from stepping on each others toes it must be possible
+to obtain exclusive access to the CEC adapter. This ioctl sets the filehandle
+to initiator and/or follower mode which can be exclusive depending on the chosen
+mode. The initiator is the filehandle that is used
+to initiate messages, i.e. it commands other CEC devices. The follower is the filehandle
+that receives messages sent to the CEC adapter and processes them. The same filehandle
+can be both initiator and follower, or this role can be taken by two different
+filehandles.</para>
+
+    <para>When a CEC message is received, then the CEC framework will decide how
+it will be processed. If the message is a reply to an earlier transmitted message,
+then the reply is sent back to the filehandle that is waiting for it. In addition
+the CEC framework will process it.</para>
+
+    <para>If the message is not a reply, then the CEC framework will process it
+first. If there is no follower, then the message is just discarded and a feature
+abort is sent back to the initiator if the framework couldn't process it. If there
+is a follower, then the message is passed on to the follower who will use
+&CEC-RECEIVE; to dequeue the new message. The framework expects the follower to
+make the right decisions.</para>
+
+    <para>The CEC framework will process core messages unless requested otherwise
+by the follower. The follower can enable the passthrough mode. In that case, the
+CEC framework will pass on most core messages without processing them and
+the follower will have to implement those messages. There are some messages
+that the core will always process, regardless of the passthrough mode. See
+<xref linkend="cec-core-processing" /> for details.</para>
+
+    <para>If there is no initiator, then any CEC filehandle can use &CEC-TRANSMIT;.
+If there is an exclusive initiator then only that initiator can call &CEC-TRANSMIT;.
+The follower can of course always call &CEC-TRANSMIT;.</para>
+
+    <para>Available initiator modes are:</para>
+
+    <table pgwide="1" frame="none" id="cec-mode-initiator">
+      <title>Initiator Modes</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_MODE_NO_INITIATOR</constant></entry>
+	    <entry>0x0</entry>
+	    <entry>This is not an initiator, i.e. it cannot transmit CEC messages
+	    or make any other changes to the CEC adapter.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MODE_INITIATOR</constant></entry>
+	    <entry>0x1</entry>
+	    <entry>This is an initiator (the default when the device is opened) and it
+	    can transmit CEC messages and make changes to the CEC adapter, unless there
+	    is an exclusive initiator.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MODE_EXCL_INITIATOR</constant></entry>
+	    <entry>0x2</entry>
+	    <entry>This is an exclusive initiator and this file descriptor is the only one
+	    that can transmit CEC messages and make changes to the CEC adapter. If someone
+	    else is already the exclusive initiator then an attempt to become one will return
+	    the &EBUSY; error.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <para>Available follower modes are:</para>
+
+    <table pgwide="1" frame="none" id="cec-mode-follower">
+      <title>Follower Modes</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_MODE_NO_FOLLOWER</constant></entry>
+	    <entry>0x00</entry>
+	    <entry>This is not a follower (the default when the device is opened).</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MODE_FOLLOWER</constant></entry>
+	    <entry>0x10</entry>
+	    <entry>This is a follower and it will receive CEC messages unless there is
+	    an exclusive follower. You cannot become a follower if <constant>CEC_CAP_TRANSMIT</constant>
+	    is not set or if <constant>CEC_MODE_NO_INITIATOR</constant> was specified,
+	    &EINVAL; is returned in that case.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MODE_EXCL_FOLLOWER</constant></entry>
+	    <entry>0x20</entry>
+	    <entry>This is an exclusive follower and only this file descriptor will receive
+	    CEC messages for processing. If someone else is already the exclusive follower
+	    then an attempt to become one will return the &EBUSY; error. You cannot become
+	    a follower if <constant>CEC_CAP_TRANSMIT</constant> is not set or if
+	    <constant>CEC_MODE_NO_INITIATOR</constant> was specified, &EINVAL; is returned
+	    in that case.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MODE_EXCL_FOLLOWER_PASSTHRU</constant></entry>
+	    <entry>0x30</entry>
+	    <entry>This is an exclusive follower and only this file descriptor will receive
+	    CEC messages for processing. In addition it will put the CEC device into
+	    passthrough mode, allowing the exclusive follower to handle most core messages
+	    instead of relying on the CEC framework for that. If someone else is already the
+	    exclusive follower then an attempt to become one will return the &EBUSY; error.
+	    You cannot become a follower if <constant>CEC_CAP_TRANSMIT</constant>
+            is not set or if <constant>CEC_MODE_NO_INITIATOR</constant> was specified,
+            &EINVAL; is returned in that case.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MODE_MONITOR</constant></entry>
+	    <entry>0xe0</entry>
+	    <entry>Put the file descriptor into monitor mode. Can only be used in combination
+	    with <constant>CEC_MODE_NO_INITIATOR</constant>, otherwise &EINVAL; will be
+	    returned. In monitor mode all messages this CEC device transmits and all messages
+	    it receives (both broadcast messages and directed messages for one its logical
+	    addresses) will be reported. This is very useful for debugging. This is only
+	    allowed if the process has the <constant>CAP_NET_ADMIN</constant>
+	    capability. If that is not set, then &EPERM; is returned.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MODE_MONITOR_ALL</constant></entry>
+	    <entry>0xf0</entry>
+	    <entry>Put the file descriptor into 'monitor all' mode. Can only be used in combination
+            with <constant>CEC_MODE_NO_INITIATOR</constant>, otherwise &EINVAL; will be
+            returned. In 'monitor all' mode all messages this CEC device transmits and all messages
+            it receives, including directed messages for other CEC devices will be reported. This
+	    is very useful for debugging, but not all devices support this. This mode requires that
+	    the <constant>CEC_CAP_MONITOR_ALL</constant> capability is set, otherwise &EINVAL; is
+	    returned. This is only allowed if the process has the <constant>CAP_NET_ADMIN</constant>
+	    capability. If that is not set, then &EPERM; is returned.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <para>Core message processing details:</para>
+
+    <table pgwide="1" frame="none" id="cec-core-processing">
+      <title>Core Message Processing</title>
+      <tgroup cols="2">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_MSG_GET_CEC_VERSION</constant></entry>
+	    <entry>When in passthrough mode this message has to be handled by userspace,
+	    otherwise the core will return the CEC version that was set with &CEC-ADAP-S-LOG-ADDRS;.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MSG_GIVE_DEVICE_VENDOR_ID</constant></entry>
+	    <entry>When in passthrough mode this message has to be handled by userspace,
+	    otherwise the core will return the vendor ID that was set with &CEC-ADAP-S-LOG-ADDRS;.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MSG_ABORT</constant></entry>
+	    <entry>When in passthrough mode this message has to be handled by userspace,
+	    otherwise the core will return a feature refused message as per the specification.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MSG_GIVE_PHYSICAL_ADDR</constant></entry>
+	    <entry>When in passthrough mode this message has to be handled by userspace,
+	    otherwise the core will report the current physical address.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MSG_GIVE_OSD_NAME</constant></entry>
+	    <entry>When in passthrough mode this message has to be handled by userspace,
+	    otherwise the core will report the current OSD name as was set with
+	    &CEC-ADAP-S-LOG-ADDRS;.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MSG_GIVE_FEATURES</constant></entry>
+	    <entry>When in passthrough mode this message has to be handled by userspace,
+	    otherwise the core will report the current features as was set with
+	    &CEC-ADAP-S-LOG-ADDRS; or the message is ignore if the CEC version was
+	    older than 2.0.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MSG_USER_CONTROL_PRESSED</constant></entry>
+	    <entry>If <constant>CEC_CAP_RC</constant> is set, then generate a remote control
+	    key press. This message is always passed on to userspace.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MSG_USER_CONTROL_RELEASED</constant></entry>
+	    <entry>If <constant>CEC_CAP_RC</constant> is set, then generate a remote control
+	    key release. This message is always passed on to userspace.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_MSG_REPORT_PHYSICAL_ADDR</constant></entry>
+	    <entry>The CEC framework will make note of the reported physical address
+	    and then just pass the message on to userspace.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+  </refsect1>
+
+  <refsect1>
+    &return-value;
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/cec-ioc-receive.xml b/Documentation/DocBook/media/v4l/cec-ioc-receive.xml
new file mode 100644
index 000000000000..fde9f8678e67
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/cec-ioc-receive.xml
@@ -0,0 +1,274 @@
+<refentry id="cec-ioc-receive">
+  <refmeta>
+    <refentrytitle>ioctl CEC_RECEIVE, CEC_TRANSMIT</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>CEC_RECEIVE</refname>
+    <refname>CEC_TRANSMIT</refname>
+    <refpurpose>Receive or transmit a CEC message</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>struct cec_msg *<parameter>argp</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>File descriptor returned by
+	  <link linkend='cec-func-open'><function>open()</function></link>.</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>CEC_RECEIVE, CEC_TRANSMIT</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>argp</parameter></term>
+	<listitem>
+	  <para></para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Note: this documents the proposed CEC API. This API is not yet finalized and
+      is currently only available as a staging kernel module.
+    </para>
+
+    <para>To receive a CEC message the application has to fill in the
+    <structname>cec_msg</structname> structure and pass it to the
+    <constant>CEC_RECEIVE</constant> ioctl. <constant>CEC_RECEIVE</constant> is
+    only available if <constant>CEC_CAP_RECEIVE</constant> is set. If the
+    file descriptor is in non-blocking mode and there are no received
+    messages pending, then it will return -1 and set errno to the &EAGAIN;.
+    If the file descriptor is in blocking mode and <structfield>timeout</structfield>
+    is non-zero and no message arrived within <structfield>timeout</structfield>
+    milliseconds, then it will return -1 and set errno to the &ETIMEDOUT;.</para>
+
+    <para>To send a CEC message the application has to fill in the
+    <structname>cec_msg</structname> structure and pass it to the
+    <constant>CEC_TRANSMIT</constant> ioctl. <constant>CEC_TRANSMIT</constant> is
+    only available if <constant>CEC_CAP_TRANSMIT</constant> is set.
+    If there is no more room in the transmit queue, then it will return
+    -1 and set errno to the &EBUSY;.</para>
+
+    <table pgwide="1" frame="none" id="cec-msg">
+      <title>struct <structname>cec_msg</structname></title>
+      <tgroup cols="3">
+	&cs-str;
+	<tbody valign="top">
+	  <row>
+	    <entry>__u64</entry>
+	    <entry><structfield>ts</structfield></entry>
+	    <entry>Timestamp of when the message was transmitted in ns in the case
+	    of <constant>CEC_TRANSMIT</constant> with <structfield>reply</structfield>
+	    set to 0, or the timestamp of the received message in all other cases.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>len</structfield></entry>
+	    <entry>The length of the message. For <constant>CEC_TRANSMIT</constant> this
+	    is filled in by the application. The driver will fill this in for
+	    <constant>CEC_RECEIVE</constant> and for <constant>CEC_TRANSMIT</constant>
+	    it will be filled in with the length of the reply message if
+	    <structfield>reply</structfield> was set.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>timeout</structfield></entry>
+	    <entry>The timeout in milliseconds. This is the time the device will wait for a message to
+	    be received before timing out. If it is set to 0, then it will wait indefinitely when it
+	    is called by <constant>CEC_RECEIVE</constant>. If it is 0 and it is called by
+	    <constant>CEC_TRANSMIT</constant>, then it will be replaced by 1000 if the
+	    <structfield>reply</structfield> is non-zero or ignored if <structfield>reply</structfield>
+	    is 0.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>sequence</structfield></entry>
+	    <entry>The sequence number is automatically assigned by the CEC
+	    framework for all transmitted messages. It can be later used by the
+	    framework to generate an event if a reply for a message was
+	    requested and the message was transmitted in a non-blocking mode.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>flags</structfield></entry>
+	    <entry>Flags. No flags are defined yet, so set this to 0.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>rx_status</structfield></entry>
+	    <entry>The status bits of the received message. See <xref linkend="cec-rx-status" />
+	    for the possible status values. It is 0 if this message was transmitted, not
+	    received, unless this is the reply to a transmitted message. In that case both
+	    <structfield>rx_status</structfield> and <structfield>tx_status</structfield>
+	    are set.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>tx_status</structfield></entry>
+	    <entry>The status bits of the transmitted message. See <xref linkend="cec-tx-status" />
+	    for the possible status values. It is 0 if this messages was received, not
+	    transmitted.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>msg</structfield>[16]</entry>
+	    <entry>The message payload. For <constant>CEC_TRANSMIT</constant> this
+	    is filled in by the application. The driver will fill this in for
+	    <constant>CEC_RECEIVE</constant> and for <constant>CEC_TRANSMIT</constant>
+	    it will be filled in with the payload of the reply message if
+	    <structfield>reply</structfield> was set.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>reply</structfield></entry>
+	    <entry>Wait until this message is replied. If <structfield>reply</structfield>
+	    is 0 and the <structfield>timeout</structfield> is 0, then don't wait for a reply but
+	    return after transmitting the message. If there was an error as indicated by a non-zero
+	    <structfield>tx_status</structfield> field, then <structfield>reply</structfield> and
+	    <structfield>timeout</structfield> are both set to 0 by the driver. Ignored by
+	    <constant>CEC_RECEIVE</constant>. The case where <structfield>reply</structfield> is 0
+	    (this is the opcode for the Feature Abort message) and <structfield>timeout</structfield>
+	    is non-zero is specifically allowed to send a message and wait up to <structfield>timeout</structfield>
+	    milliseconds for a Feature Abort reply. In this case <structfield>rx_status</structfield>
+	    will either be set to <constant>CEC_RX_STATUS_TIMEOUT</constant> or
+	    <constant>CEC_RX_STATUS_FEATURE_ABORT</constant>.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>tx_arb_lost_cnt</structfield></entry>
+	    <entry>A counter of the number of transmit attempts that resulted in the
+	    Arbitration Lost error. This is only set if the hardware supports this, otherwise
+	    it is always 0. This counter is only valid if the <constant>CEC_TX_STATUS_ARB_LOST</constant>
+	    status bit is set.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>tx_nack_cnt</structfield></entry>
+	    <entry>A counter of the number of transmit attempts that resulted in the
+	    Not Acknowledged error. This is only set if the hardware supports this, otherwise
+	    it is always 0. This counter is only valid if the <constant>CEC_TX_STATUS_NACK</constant>
+            status bit is set.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>tx_low_drive_cnt</structfield></entry>
+	    <entry>A counter of the number of transmit attempts that resulted in the
+	    Arbitration Lost error. This is only set if the hardware supports this, otherwise
+	    it is always 0. This counter is only valid if the <constant>CEC_TX_STATUS_LOW_DRIVE</constant>
+            status bit is set.</entry>
+	  </row>
+	  <row>
+	    <entry>__u8</entry>
+	    <entry><structfield>tx_error_cnt</structfield></entry>
+	    <entry>A counter of the number of transmit errors other than Arbitration Lost
+	    or Not Acknowledged. This is only set if the hardware supports this, otherwise
+	    it is always 0. This counter is only valid if the <constant>CEC_TX_STATUS_ERROR</constant>
+	    status bit is set.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-tx-status">
+      <title>CEC Transmit Status</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_TX_STATUS_OK</constant></entry>
+	    <entry>0x01</entry>
+	    <entry>The message was transmitted successfully. This is mutually exclusive with
+	    <constant>CEC_TX_STATUS_MAX_RETRIES</constant>. Other bits can still be set if
+	    earlier attempts met with failure before the transmit was eventually successful.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_TX_STATUS_ARB_LOST</constant></entry>
+	    <entry>0x02</entry>
+	    <entry>CEC line arbitration was lost.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_TX_STATUS_NACK</constant></entry>
+	    <entry>0x04</entry>
+	    <entry>Message was not acknowledged.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_TX_STATUS_LOW_DRIVE</constant></entry>
+	    <entry>0x08</entry>
+	    <entry>Low drive was detected on the CEC bus. This indicates that a follower
+	    detected an error on the bus and requests a retransmission.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_TX_STATUS_ERROR</constant></entry>
+	    <entry>0x10</entry>
+	    <entry>Some error occurred. This is used for any errors that do not
+	    fit the previous two, either because the hardware could not tell
+	    which error occurred, or because the hardware tested for other conditions
+	    besides those two.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_TX_STATUS_MAX_RETRIES</constant></entry>
+	    <entry>0x20</entry>
+	    <entry>The transmit failed after one or more retries. This status bit is mutually
+	    exclusive with <constant>CEC_TX_STATUS_OK</constant>. Other bits can still be set
+	    to explain which failures were seen.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="cec-rx-status">
+      <title>CEC Receive Status</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>CEC_RX_STATUS_OK</constant></entry>
+	    <entry>0x01</entry>
+	    <entry>The message was received successfully.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_RX_STATUS_TIMEOUT</constant></entry>
+	    <entry>0x02</entry>
+	    <entry>The reply to an earlier transmitted message timed out.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>CEC_RX_STATUS_FEATURE_ABORT</constant></entry>
+	    <entry>0x04</entry>
+	    <entry>The message was received successfully but the reply was
+	    <constant>CEC_MSG_FEATURE_ABORT</constant>. This status is only
+	    set if this message was the reply to an earlier transmitted
+	    message.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+  </refsect1>
+
+  <refsect1>
+    &return-value;
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
index e09025db92bd..21a3dde8f95d 100644
--- a/Documentation/DocBook/media/v4l/io.xml
+++ b/Documentation/DocBook/media/v4l/io.xml
@@ -88,7 +88,7 @@ function.<footnote>
 <structfield>capabilities</structfield> field of &v4l2-capability;
 returned by the &VIDIOC-QUERYCAP; ioctl is set. There are two
 streaming methods, to determine if the memory mapping flavor is
-supported applications must call the &VIDIOC-REQBUFS; ioctl.</para>
+supported applications must call the &VIDIOC-REQBUFS; ioctl with the memory type set to <constant>V4L2_MEMORY_MMAP</constant>.</para>
 
     <para>Streaming is an I/O method where only pointers to buffers
 are exchanged between application and driver, the data itself is not
@@ -369,7 +369,7 @@ rest should be evident.</para>
 <structfield>capabilities</structfield> field of &v4l2-capability;
 returned by the &VIDIOC-QUERYCAP; ioctl is set. If the particular user
 pointer method (not only memory mapping) is supported must be
-determined by calling the &VIDIOC-REQBUFS; ioctl.</para>
+determined by calling the &VIDIOC-REQBUFS; ioctl with the memory type set to <constant>V4L2_MEMORY_USERPTR</constant>.</para>
 
     <para>This I/O method combines advantages of the read/write and
 memory mapping methods. Buffers (planes) are allocated by the application
diff --git a/Documentation/DocBook/media/v4l/lirc_device_interface.xml b/Documentation/DocBook/media/v4l/lirc_device_interface.xml
index 34cada2ca710..71f9dbb81ec7 100644
--- a/Documentation/DocBook/media/v4l/lirc_device_interface.xml
+++ b/Documentation/DocBook/media/v4l/lirc_device_interface.xml
@@ -157,7 +157,7 @@ on working with the default settings initially.</para>
   <varlistentry>
     <term>LIRC_SET_{SEND,REC}_CARRIER</term>
     <listitem>
-      <para>Set send/receive carrier (in Hz).</para>
+      <para>Set send/receive carrier (in Hz). Return 0 on success.</para>
     </listitem>
   </varlistentry>
   <varlistentry>
diff --git a/Documentation/DocBook/media/v4l/media-types.xml b/Documentation/DocBook/media/v4l/media-types.xml
index 5e3f20fdcf17..95aa1f9c836a 100644
--- a/Documentation/DocBook/media/v4l/media-types.xml
+++ b/Documentation/DocBook/media/v4l/media-types.xml
@@ -121,6 +121,70 @@
 	    <entry><constant>MEDIA_ENT_F_AUDIO_MIXER</constant></entry>
 	    <entry>Audio Mixer Function Entity.</entry>
 	  </row>
+	  <row>
+	    <entry><constant>MEDIA_ENT_F_PROC_VIDEO_COMPOSER</constant></entry>
+	    <entry>Video composer (blender). An entity capable of video
+		   composing must have at least two sink pads and one source
+		   pad, and composes input video frames onto output video
+		   frames. Composition can be performed using alpha blending,
+		   color keying, raster operations (ROP), stitching or any other
+		   means.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER</constant></entry>
+	    <entry>Video pixel formatter. An entity capable of pixel formatting
+		   must have at least one sink pad and one source pad. Read
+		   pixel formatters read pixels from memory and perform a subset
+		   of unpacking, cropping, color keying, alpha multiplication
+		   and pixel encoding conversion. Write pixel formatters perform
+		   a subset of dithering, pixel encoding conversion and packing
+		   and write pixels to memory.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>MEDIA_ENT_F_PROC_VIDEO_PIXEL_ENC_CONV</constant></entry>
+	    <entry>Video pixel encoding converter. An entity capable of pixel
+		   enconding conversion must have at least one sink pad and one
+		   source pad, and convert the encoding of pixels received on
+		   its sink pad(s) to a different encoding output on its source
+		   pad(s). Pixel encoding conversion includes but isn't limited
+		   to RGB to/from HSV, RGB to/from YUV and CFA (Bayer) to RGB
+		   conversions.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>MEDIA_ENT_F_PROC_VIDEO_LUT</constant></entry>
+	    <entry>Video look-up table. An entity capable of video lookup table
+		   processing must have one sink pad and one source pad. It uses
+		   the values of the pixels received on its sink pad to look up
+		   entries in internal tables and output them on its source pad.
+		   The lookup processing can be performed on all components
+		   separately or combine them for multi-dimensional table
+		   lookups.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>MEDIA_ENT_F_PROC_VIDEO_SCALER</constant></entry>
+	    <entry>Video scaler. An entity capable of video scaling must have
+		   at least one sink pad and one source pad, and scale the
+		   video frame(s) received on its sink pad(s) to a different
+		   resolution output on its source pad(s). The range of
+		   supported scaling ratios is entity-specific and can differ
+		   between the horizontal and vertical directions (in particular
+		   scaling can be supported in one direction only). Binning and
+		   skipping are considered as scaling.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>MEDIA_ENT_F_PROC_VIDEO_STATISTICS</constant></entry>
+	    <entry>Video statistics computation (histogram, 3A, ...). An entity
+		   capable of statistics computation must have one sink pad and
+		   one source pad. It computes statistics over the frames
+		   received on its sink pad and outputs the statistics data on
+		   its source pad.
+	    </entry>
+	  </row>
 	</tbody>
       </tgroup>
     </table>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-z16.xml b/Documentation/DocBook/media/v4l/pixfmt-z16.xml
index 3d87e4bf87b8..1d9cb1684bd3 100644
--- a/Documentation/DocBook/media/v4l/pixfmt-z16.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt-z16.xml
@@ -5,7 +5,7 @@
   </refmeta>
   <refnamediv>
     <refname><constant>V4L2_PIX_FMT_Z16</constant></refname>
-    <refpurpose>Interleaved grey-scale image, e.g. from a stereo-pair</refpurpose>
+    <refpurpose>16-bit depth data with distance values at each pixel</refpurpose>
   </refnamediv>
   <refsect1>
     <title>Description</title>
diff --git a/Documentation/DocBook/media/v4l/vidioc-reqbufs.xml b/Documentation/DocBook/media/v4l/vidioc-reqbufs.xml
index 0f193fda0470..6f529e100ea4 100644
--- a/Documentation/DocBook/media/v4l/vidioc-reqbufs.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-reqbufs.xml
@@ -6,7 +6,7 @@
 
   <refnamediv>
     <refname>VIDIOC_REQBUFS</refname>
-    <refpurpose>Initiate Memory Mapping or User Pointer I/O</refpurpose>
+    <refpurpose>Initiate Memory Mapping, User Pointer or DMA Buffer I/O</refpurpose>
   </refnamediv>
 
   <refsynopsisdiv>
diff --git a/Documentation/DocBook/media_api.tmpl b/Documentation/DocBook/media_api.tmpl
index 7b77e0f7b87d..a2765d8ad05c 100644
--- a/Documentation/DocBook/media_api.tmpl
+++ b/Documentation/DocBook/media_api.tmpl
@@ -75,7 +75,7 @@
 	    </mediaobject>
 	</figure>
 	<para>The media infrastructure API was designed to control such
-	    devices. It is divided into four parts.</para>
+	    devices. It is divided into five parts.</para>
 	<para>The first part covers radio, video capture and output,
 		cameras, analog TV devices and codecs.</para>
 	<para>The second part covers the
@@ -87,6 +87,7 @@
 		<xref linkend="fe-delivery-system-t" />.</para>
 	<para>The third part covers the Remote Controller API.</para>
 	<para>The fourth part covers the Media Controller API.</para>
+	<para>The fifth part covers the CEC (Consumer Electronics Control) API.</para>
 	<para>It should also be noted that a media device may also have audio
 	      components, like mixers, PCM capture, PCM playback, etc, which
 	      are controlled via ALSA API.</para>
@@ -107,6 +108,9 @@
 <part id="media_common">
 &sub-media-controller;
 </part>
+<part id="cec">
+&sub-cec-api;
+</part>
 
 <chapter id="gen_errors">
 &sub-gen-errors;
diff --git a/Documentation/Makefile.sphinx b/Documentation/Makefile.sphinx
new file mode 100644
index 000000000000..d8d13c92a178
--- /dev/null
+++ b/Documentation/Makefile.sphinx
@@ -0,0 +1,78 @@
+# -*- makefile -*-
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXBUILD   = sphinx-build
+SPHINXOPTS    =
+PAPER         =
+BUILDDIR      = $(obj)/output
+
+# User-friendly check for sphinx-build
+HAVE_SPHINX := $(shell if which $(SPHINXBUILD) >/dev/null 2>&1; then echo 1; else echo 0; fi)
+
+ifeq ($(HAVE_SPHINX),0)
+
+.DEFAULT:
+	$(warning The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed and in PATH, or set the SPHINXBUILD make variable to point to the full path of the '$(SPHINXBUILD)' executable.)
+	@echo "  SKIP    Sphinx $@ target."
+
+else ifneq ($(DOCBOOKS),)
+
+# Skip Sphinx build if the user explicitly requested DOCBOOKS.
+.DEFAULT:
+	@echo "  SKIP    Sphinx $@ target (DOCBOOKS specified)."
+
+else # HAVE_SPHINX
+
+# User-friendly check for rst2pdf
+HAVE_RST2PDF := $(shell if python -c "import rst2pdf" >/dev/null 2>&1; then echo 1; else echo 0; fi)
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+KERNELDOC       = $(srctree)/scripts/kernel-doc
+KERNELDOC_CONF  = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
+ALLSPHINXOPTS   = -D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) -d $(BUILDDIR)/.doctrees $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) -c $(srctree)/$(src) $(SPHINXOPTS) $(srctree)/$(src)
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+quiet_cmd_sphinx = SPHINX  $@
+      cmd_sphinx = $(SPHINXBUILD) -b $2 $(ALLSPHINXOPTS) $(BUILDDIR)/$2
+
+htmldocs:
+	$(call cmd,sphinx,html)
+
+pdfdocs:
+ifeq ($(HAVE_RST2PDF),0)
+	$(warning The Python 'rst2pdf' module was not found. Make sure you have the module installed to produce PDF output.)
+	@echo "  SKIP    Sphinx $@ target."
+else # HAVE_RST2PDF
+	$(call cmd,sphinx,pdf)
+endif # HAVE_RST2PDF
+
+epubdocs:
+	$(call cmd,sphinx,epub)
+
+xmldocs:
+	$(call cmd,sphinx,xml)
+
+# no-ops for the Sphinx toolchain
+sgmldocs:
+psdocs:
+mandocs:
+installmandocs:
+cleanmediadocs:
+
+cleandocs:
+	$(Q)rm -rf $(BUILDDIR)
+
+dochelp:
+	@echo  ' Linux kernel internal documentation in different formats (Sphinx):'
+	@echo  '  htmldocs        - HTML'
+	@echo  '  pdfdocs         - PDF'
+	@echo  '  epubdocs        - EPUB'
+	@echo  '  xmldocs         - XML'
+	@echo  '  cleandocs       - clean all generated files'
+
+endif # HAVE_SPHINX
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index e7e24b3e86e2..ece410f40436 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2391,6 +2391,41 @@ and <tt>RCU_NONIDLE()</tt> on the other while inspecting
 idle-loop code.
 Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
 which is used quite heavily in the idle loop.
+However, there are some restrictions on the code placed within
+<tt>RCU_NONIDLE()</tt>:
+
+<ol>
+<li>	Blocking is prohibited.
+	In practice, this is not a serious restriction given that idle
+	tasks are prohibited from blocking to begin with.
+<li>	Although nesting <tt>RCU_NONIDLE()</tt> is permited, they cannot
+	nest indefinitely deeply.
+	However, given that they can be nested on the order of a million
+	deep, even on 32-bit systems, this should not be a serious
+	restriction.
+	This nesting limit would probably be reached long after the
+	compiler OOMed or the stack overflowed.
+<li>	Any code path that enters <tt>RCU_NONIDLE()</tt> must sequence
+	out of that same <tt>RCU_NONIDLE()</tt>.
+	For example, the following is grossly illegal:
+
+	<blockquote>
+	<pre>
+ 1     RCU_NONIDLE({
+ 2       do_something();
+ 3       goto bad_idea;  /* BUG!!! */
+ 4       do_something_else();});
+ 5   bad_idea:
+	</pre>
+	</blockquote>
+
+	<p>
+	It is just as illegal to transfer control into the middle of
+	<tt>RCU_NONIDLE()</tt>'s argument.
+	Yes, in theory, you could transfer in as long as you also
+	transferred out, but in practice you could also expect to get sharply
+	worded review comments.
+</ol>
 
 <p>
 It is similarly socially unacceptable to interrupt an
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 0f7fb4298e7e..e93d04133fe7 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -49,7 +49,7 @@ rcupdate.rcu_task_stall_timeout
 	This boot/sysfs parameter controls the RCU-tasks stall warning
 	interval.  A value of zero or less suppresses RCU-tasks stall
 	warnings.  A positive value sets the stall-warning interval
-	in jiffies.  An RCU-tasks stall warning starts wtih the line:
+	in jiffies.  An RCU-tasks stall warning starts with the line:
 
 		INFO: rcu_tasks detected stalls on tasks:
 
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 111770ffa10e..204422719197 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -5,6 +5,9 @@ to start learning about RCU:
 2.	What is RCU? Part 2: Usage   http://lwn.net/Articles/263130/
 3.	RCU part 3: the RCU API      http://lwn.net/Articles/264090/
 4.	The RCU API, 2010 Edition    http://lwn.net/Articles/418853/
+	2010 Big API Table           http://lwn.net/Articles/419086/
+5.	The RCU API, 2014 Edition    http://lwn.net/Articles/609904/
+	2014 Big API Table           http://lwn.net/Articles/609973/
 
 
 What is RCU?
diff --git a/Documentation/acpi/aml-debugger.txt b/Documentation/acpi/aml-debugger.txt
new file mode 100644
index 000000000000..5f62aa4a493b
--- /dev/null
+++ b/Documentation/acpi/aml-debugger.txt
@@ -0,0 +1,66 @@
+The AML Debugger
+
+Copyright (C) 2016, Intel Corporation
+Author: Lv Zheng <lv.zheng@intel.com>
+
+
+This document describes the usage of the AML debugger embedded in the Linux
+kernel.
+
+1. Build the debugger
+
+   The following kernel configuration items are required to enable the AML
+   debugger interface from the Linux kernel:
+
+   CONFIG_ACPI_DEBUGGER=y
+   CONFIG_ACPI_DEBUGGER_USER=m
+
+   The userspace utlities can be built from the kernel source tree using
+   the following commands:
+
+   $ cd tools
+   $ make acpi
+
+   The resultant userspace tool binary is then located at:
+
+     tools/acpi/power/acpi/acpidbg/acpidbg
+
+   It can be installed to system directories by running "make install" (as a
+   sufficiently privileged user).
+
+2. Start the userspace debugger interface
+
+   After booting the kernel with the debugger built-in, the debugger can be
+   started by using the following commands:
+
+   # mount -t debugfs none /sys/kernel/debug
+   # modprobe acpi_dbg
+   # tools/acpi/power/acpi/acpidbg/acpidbg
+
+   That spawns the interactive AML debugger environment where you can execute
+   debugger commands.
+
+   The commands are documented in the "ACPICA Overview and Programmer Reference"
+   that can be downloaded from
+
+   https://acpica.org/documentation
+
+   The detailed debugger commands reference is located in Chapter 12 "ACPICA
+   Debugger Reference".  The "help" command can be used for a quick reference.
+
+3. Stop the userspace debugger interface
+
+   The interactive debugger interface can be closed by pressing Ctrl+C or using
+   the "quit" or "exit" commands.  When finished, unload the module with:
+
+   # rmmod acpi_dbg
+
+   The module unloading may fail if there is an acpidbg instance running.
+
+4. Run the debugger in a script
+
+   It may be useful to run the AML debugger in a test script. "acpidbg" supports
+   this in a special "batch" mode.  For example, the following command outputs
+   the entire ACPI namespace:
+
+   # acpidbg -b "namespace"
diff --git a/Documentation/acpi/linuxized-acpica.txt b/Documentation/acpi/linuxized-acpica.txt
new file mode 100644
index 000000000000..defe2eec5331
--- /dev/null
+++ b/Documentation/acpi/linuxized-acpica.txt
@@ -0,0 +1,262 @@
+Linuxized ACPICA - Introduction to ACPICA Release Automation
+
+Copyright (C) 2013-2016, Intel Corporation
+Author: Lv Zheng <lv.zheng@intel.com>
+
+
+Abstract:
+
+This document describes the ACPICA project and the relationship between
+ACPICA and Linux.  It also describes how ACPICA code in drivers/acpi/acpica,
+include/acpi and tools/power/acpi is automatically updated to follow the
+upstream.
+
+
+1. ACPICA Project
+
+   The ACPI Component Architecture (ACPICA) project provides an operating
+   system (OS)-independent reference implementation of the Advanced
+   Configuration and Power Interface Specification (ACPI).  It has been
+   adapted by various host OSes.  By directly integrating ACPICA, Linux can
+   also benefit from the application experiences of ACPICA from other host
+   OSes.
+
+   The homepage of ACPICA project is: www.acpica.org, it is maintained and
+   supported by Intel Corporation.
+
+   The following figure depicts the Linux ACPI subystem where the ACPICA
+   adaptation is included:
+
+      +---------------------------------------------------------+
+      |                                                         |
+      |   +---------------------------------------------------+ |
+      |   | +------------------+                              | |
+      |   | | Table Management |                              | |
+      |   | +------------------+                              | |
+      |   | +----------------------+                          | |
+      |   | | Namespace Management |                          | |
+      |   | +----------------------+                          | |
+      |   | +------------------+       ACPICA Components      | |
+      |   | | Event Management |                              | |
+      |   | +------------------+                              | |
+      |   | +---------------------+                           | |
+      |   | | Resource Management |                           | |
+      |   | +---------------------+                           | |
+      |   | +---------------------+                           | |
+      |   | | Hardware Management |                           | |
+      |   | +---------------------+                           | |
+      | +---------------------------------------------------+ | |
+      | | |                            +------------------+ | | |
+      | | |                            | OS Service Layer | | | |
+      | | |                            +------------------+ | | |
+      | | +-------------------------------------------------|-+ |
+      | |   +--------------------+                          |   |
+      | |   | Device Enumeration |                          |   |
+      | |   +--------------------+                          |   |
+      | |   +------------------+                            |   |
+      | |   | Power Management |                            |   |
+      | |   +------------------+     Linux/ACPI Components  |   |
+      | |   +--------------------+                          |   |
+      | |   | Thermal Management |                          |   |
+      | |   +--------------------+                          |   |
+      | |   +--------------------------+                    |   |
+      | |   | Drivers for ACPI Devices |                    |   |
+      | |   +--------------------------+                    |   |
+      | |   +--------+                                      |   |
+      | |   | ...... |                                      |   |
+      | |   +--------+                                      |   |
+      | +---------------------------------------------------+   |
+      |                                                         |
+      +---------------------------------------------------------+
+
+                 Figure 1. Linux ACPI Software Components
+
+   NOTE:
+    A. OS Service Layer - Provided by Linux to offer OS dependent
+       implementation of the predefined ACPICA interfaces (acpi_os_*).
+         include/acpi/acpiosxf.h
+         drivers/acpi/osl.c
+         include/acpi/platform
+         include/asm/acenv.h
+    B. ACPICA Functionality - Released from ACPICA code base to offer
+       OS independent implementation of the ACPICA interfaces (acpi_*).
+         drivers/acpi/acpica
+         include/acpi/ac*.h
+         tools/power/acpi
+    C. Linux/ACPI Functionality - Providing Linux specific ACPI
+       functionality to the other Linux kernel subsystems and user space
+       programs.
+         drivers/acpi
+         include/linux/acpi.h
+         include/linux/acpi*.h
+         include/acpi
+         tools/power/acpi
+    D. Architecture Specific ACPICA/ACPI Functionalities - Provided by the
+       ACPI subsystem to offer architecture specific implementation of the
+       ACPI interfaces.  They are Linux specific components and are out of
+       the scope of this document.
+         include/asm/acpi.h
+         include/asm/acpi*.h
+         arch/*/acpi
+
+2. ACPICA Release
+
+   The ACPICA project maintains its code base at the following repository URL:
+   https://github.com/acpica/acpica.git. As a rule, a release is made every
+   month.
+
+   As the coding style adopted by the ACPICA project is not acceptable by
+   Linux, there is a release process to convert the ACPICA git commits into
+   Linux patches.  The patches generated by this process are referred to as
+   "linuxized ACPICA patches".  The release process is carried out on a local
+   copy the ACPICA git repository.  Each commit in the monthly release is
+   converted into a linuxized ACPICA patch.  Together, they form the montly
+   ACPICA release patchset for the Linux ACPI community.  This process is
+   illustrated in the following figure:
+
+    +-----------------------------+
+    | acpica / master (-) commits |
+    +-----------------------------+
+       /|\         |
+        |         \|/
+        |  /---------------------\    +----------------------+
+        | < Linuxize repo Utility >-->| old linuxized acpica |--+
+        |  \---------------------/    +----------------------+  |
+        |                                                       |
+     /---------\                                                |
+    < git reset >                                                \
+     \---------/                                                  \
+       /|\                                                        /+-+
+        |                                                        /   |
+    +-----------------------------+                             |    |
+    | acpica / master (+) commits |                             |    |
+    +-----------------------------+                             |    |
+                   |                                            |    |
+                  \|/                                           |    |
+         /-----------------------\    +----------------------+  |    |
+        < Linuxize repo Utilities >-->| new linuxized acpica |--+    |
+         \-----------------------/    +----------------------+       |
+                                                                    \|/
+    +--------------------------+                  /----------------------\
+    | Linuxized ACPICA Patches |<----------------< Linuxize patch Utility >
+    +--------------------------+                  \----------------------/
+                   |
+                  \|/
+     /---------------------------\
+    < Linux ACPI Community Review >
+     \---------------------------/
+                   |
+                  \|/
+    +-----------------------+    /------------------\    +----------------+
+    | linux-pm / linux-next |-->< Linux Merge Window >-->| linux / master |
+    +-----------------------+    \------------------/    +----------------+
+
+                Figure 2. ACPICA -> Linux Upstream Process
+
+   NOTE:
+    A. Linuxize Utilities - Provided by the ACPICA repository, including a
+       utility located in source/tools/acpisrc folder and a number of
+       scripts located in generate/linux folder.
+    B. acpica / master - "master" branch of the git repository at
+       <https://github.com/acpica/acpica.git>.
+    C. linux-pm / linux-next - "linux-next" branch of the git repository at
+       <http://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git>.
+    D. linux / master - "master" branch of the git repository at
+       <http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git>.
+
+   Before the linuxized ACPICA patches are sent to the Linux ACPI community
+   for review, there is a quality ensurance build test process to reduce
+   porting issues.  Currently this build process only takes care of the
+   following kernel configuration options:
+   CONFIG_ACPI/CONFIG_ACPI_DEBUG/CONFIG_ACPI_DEBUGGER
+
+3. ACPICA Divergences
+
+   Ideally, all of the ACPICA commits should be converted into Linux patches
+   automatically without manual modifications, the "linux / master" tree should
+   contain the ACPICA code that exactly corresponds to the ACPICA code
+   contained in "new linuxized acpica" tree and it should be possible to run
+   the release process fully automatically.
+
+   As a matter of fact, however, there are source code differences between
+   the ACPICA code in Linux and the upstream ACPICA code, referred to as
+   "ACPICA Divergences".
+
+   The various sources of ACPICA divergences include:
+   1. Legacy divergences - Before the current ACPICA release process was
+      established, there already had been divergences between Linux and
+      ACPICA. Over the past several years those divergences have been greatly
+      reduced, but there still are several ones and it takes time to figure
+      out the underlying reasons for their existence.
+   2. Manual modifications - Any manual modification (eg. coding style fixes)
+      made directly in the Linux sources obviously hurts the ACPICA release
+      automation.  Thus it is recommended to fix such issues in the ACPICA
+      upstream source code and generate the linuxized fix using the ACPICA
+      release utilities (please refer to Section 4 below for the details).
+   3. Linux specific features - Sometimes it's impossible to use the
+      current ACPICA APIs to implement features required by the Linux kernel,
+      so Linux developers occasionaly have to change ACPICA code directly.
+      Those changes may not be acceptable by ACPICA upstream and in such cases
+      they are left as committed ACPICA divergences unless the ACPICA side can
+      implement new mechanisms as replacements for them.
+   4. ACPICA release fixups - ACPICA only tests commits using a set of the
+      user space simulation utilies, thus the linuxized ACPICA patches may
+      break the Linux kernel, leaving us build/boot failures.  In order to
+      avoid breaking Linux bisection, fixes are applied directly to the
+      linuxized ACPICA patches during the release process.  When the release
+      fixups are backported to the upstream ACPICA sources, they must follow
+      the upstream ACPICA rules and so further modifications may appear.
+      That may result in the appearance of new divergences.
+   5. Fast tracking of ACPICA commits - Some ACPICA commits are regression
+      fixes or stable-candidate material, so they are applied in advance with
+      respect to the ACPICA release process.  If such commits are reverted or
+      rebased on the ACPICA side in order to offer better solutions, new ACPICA
+      divergences are generated.
+
+4. ACPICA Development
+
+   This paragraph guides Linux developers to use the ACPICA upstream release
+   utilities to obtain Linux patches corresponding to upstream ACPICA commits
+   before they become available from the ACPICA release process.
+
+   1. Cherry-pick an ACPICA commit
+
+   First you need to git clone the ACPICA repository and the ACPICA change
+   you want to cherry pick must be committed into the local repository.
+
+   Then the gen-patch.sh command can help to cherry-pick an ACPICA commit
+   from the ACPICA local repository:
+
+   $ git clone https://github.com/acpica/acpica
+   $ cd acpica
+   $ generate/linux/gen-patch.sh -u [commit ID]
+
+   Here the commit ID is the ACPICA local repository commit ID you want to
+   cherry pick.  It can be omitted if the commit is "HEAD".
+
+   2. Cherry-pick recent ACPICA commits
+
+   Sometimes you need to rebase your code on top of the most recent ACPICA
+   changes that haven't been applied to Linux yet.
+
+   You can generate the ACPICA release series yourself and rebase your code on
+   top of the generated ACPICA release patches:
+
+   $ git clone https://github.com/acpica/acpica
+   $ cd acpica
+   $ generate/linux/make-patches.sh -u [commit ID]
+
+   The commit ID should be the last ACPICA commit accepted by Linux.  Usually,
+   it is the commit modifying ACPI_CA_VERSION.  It can be found by executing
+   "git blame source/include/acpixf.h" and referencing the line that contains
+   "ACPI_CA_VERSION".
+
+   3. Inspect the current divergences
+
+   If you have local copies of both Linux and upstream ACPICA, you can generate
+   a diff file indicating the state of the current divergences:
+
+   # git clone https://github.com/acpica/acpica
+   # git clone http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+   # cd acpica
+   # generate/linux/divergences.sh -s ../linux
diff --git a/Documentation/acpi/ssdt-overlays.txt b/Documentation/acpi/ssdt-overlays.txt
new file mode 100644
index 000000000000..5ae13f161ea2
--- /dev/null
+++ b/Documentation/acpi/ssdt-overlays.txt
@@ -0,0 +1,172 @@
+
+In order to support ACPI open-ended hardware configurations (e.g. development
+boards) we need a way to augment the ACPI configuration provided by the firmware
+image. A common example is connecting sensors on I2C / SPI buses on development
+boards.
+
+Although this can be accomplished by creating a kernel platform driver or
+recompiling the firmware image with updated ACPI tables, neither is practical:
+the former proliferates board specific kernel code while the latter requires
+access to firmware tools which are often not publicly available.
+
+Because ACPI supports external references in AML code a more practical
+way to augment firmware ACPI configuration is by dynamically loading
+user defined SSDT tables that contain the board specific information.
+
+For example, to enumerate a Bosch BMA222E accelerometer on the I2C bus of the
+Minnowboard MAX development board exposed via the LSE connector [1], the
+following ASL code can be used:
+
+DefinitionBlock ("minnowmax.aml", "SSDT", 1, "Vendor", "Accel", 0x00000003)
+{
+    External (\_SB.I2C6, DeviceObj)
+
+    Scope (\_SB.I2C6)
+    {
+        Device (STAC)
+        {
+            Name (_ADR, Zero)
+            Name (_HID, "BMA222E")
+
+            Method (_CRS, 0, Serialized)
+            {
+                Name (RBUF, ResourceTemplate ()
+                {
+                    I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
+                                  AddressingMode7Bit, "\\_SB.I2C6", 0x00,
+                                  ResourceConsumer, ,)
+                    GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
+                             "\\_SB.GPO2", 0x00, ResourceConsumer, , )
+                    { // Pin list
+                        0
+                    }
+                })
+                Return (RBUF)
+            }
+        }
+    }
+}
+
+which can then be compiled to AML binary format:
+
+$ iasl minnowmax.asl
+
+Intel ACPI Component Architecture
+ASL Optimizing Compiler version 20140214-64 [Mar 29 2014]
+Copyright (c) 2000 - 2014 Intel Corporation
+
+ASL Input:     minnomax.asl - 30 lines, 614 bytes, 7 keywords
+AML Output:    minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
+
+[1] http://wiki.minnowboard.org/MinnowBoard_MAX#Low_Speed_Expansion_Connector_.28Top.29
+
+The resulting AML code can then be loaded by the kernel using one of the methods
+below.
+
+== Loading ACPI SSDTs from initrd ==
+
+This option allows loading of user defined SSDTs from initrd and it is useful
+when the system does not support EFI or when there is not enough EFI storage.
+
+It works in a similar way with initrd based ACPI tables override/upgrade: SSDT
+aml code must be placed in the first, uncompressed, initrd under the
+"kernel/firmware/acpi" path. Multiple files can be used and this will translate
+in loading multiple tables. Only SSDT and OEM tables are allowed. See
+initrd_table_override.txt for more details.
+
+Here is an example:
+
+# Add the raw ACPI tables to an uncompressed cpio archive.
+# They must be put into a /kernel/firmware/acpi directory inside the
+# cpio archive.
+# The uncompressed cpio archive must be the first.
+# Other, typically compressed cpio archives, must be
+# concatenated on top of the uncompressed one.
+mkdir -p kernel/firmware/acpi
+cp ssdt.aml kernel/firmware/acpi
+
+# Create the uncompressed cpio archive and concatenate the original initrd
+# on top:
+find kernel | cpio -H newc --create > /boot/instrumented_initrd
+cat /boot/initrd >>/boot/instrumented_initrd
+
+== Loading ACPI SSDTs from EFI variables ==
+
+This is the preferred method, when EFI is supported on the platform, because it
+allows a persistent, OS independent way of storing the user defined SSDTs. There
+is also work underway to implement EFI support for loading user defined SSDTs
+and using this method will make it easier to convert to the EFI loading
+mechanism when that will arrive.
+
+In order to load SSDTs from an EFI variable the efivar_ssdt kernel command line
+parameter can be used. The argument for the option is the variable name to
+use. If there are multiple variables with the same name but with different
+vendor GUIDs, all of them will be loaded.
+
+In order to store the AML code in an EFI variable the efivarfs filesystem can be
+used. It is enabled and mounted by default in /sys/firmware/efi/efivars in all
+recent distribution.
+
+Creating a new file in /sys/firmware/efi/efivars will automatically create a new
+EFI variable. Updating a file in /sys/firmware/efi/efivars will update the EFI
+variable. Please note that the file name needs to be specially formatted as
+"Name-GUID" and that the first 4 bytes in the file (little-endian format)
+represent the attributes of the EFI variable (see EFI_VARIABLE_MASK in
+include/linux/efi.h). Writing to the file must also be done with one write
+operation.
+
+For example, you can use the following bash script to create/update an EFI
+variable with the content from a given file:
+
+#!/bin/sh -e
+
+while ! [ -z "$1" ]; do
+        case "$1" in
+        "-f") filename="$2"; shift;;
+        "-g") guid="$2"; shift;;
+        *) name="$1";;
+        esac
+        shift
+done
+
+usage()
+{
+        echo "Syntax: ${0##*/} -f filename [ -g guid ] name"
+        exit 1
+}
+
+[ -n "$name" -a -f "$filename" ] || usage
+
+EFIVARFS="/sys/firmware/efi/efivars"
+
+[ -d "$EFIVARFS" ] || exit 2
+
+if stat -tf $EFIVARFS | grep -q -v de5e81e4; then
+        mount -t efivarfs none $EFIVARFS
+fi
+
+# try to pick up an existing GUID
+[ -n "$guid" ] || guid=$(find "$EFIVARFS" -name "$name-*" | head -n1 | cut -f2- -d-)
+
+# use a randomly generated GUID
+[ -n "$guid" ] || guid="$(cat /proc/sys/kernel/random/uuid)"
+
+# efivarfs expects all of the data in one write
+tmp=$(mktemp)
+/bin/echo -ne "\007\000\000\000" | cat - $filename > $tmp
+dd if=$tmp of="$EFIVARFS/$name-$guid" bs=$(stat -c %s $tmp)
+rm $tmp
+
+== Loading ACPI SSDTs from configfs ==
+
+This option allows loading of user defined SSDTs from userspace via the configfs
+interface. The CONFIG_ACPI_CONFIGFS option must be select and configfs must be
+mounted. In the following examples, we assume that configfs has been mounted in
+/config.
+
+New tables can be loading by creating new directories in /config/acpi/table/ and
+writing the SSDT aml code in the aml attribute:
+
+cd /config/acpi/table
+mkdir my_ssdt
+cat ~/ssdt.aml > my_ssdt/aml
diff --git a/Documentation/arm64/acpi_object_usage.txt b/Documentation/arm64/acpi_object_usage.txt
index a6e1a1805e51..c77010c5c1f0 100644
--- a/Documentation/arm64/acpi_object_usage.txt
+++ b/Documentation/arm64/acpi_object_usage.txt
@@ -13,14 +13,14 @@ For ACPI on arm64, tables also fall into the following categories:
 
        -- Required: DSDT, FADT, GTDT, MADT, MCFG, RSDP, SPCR, XSDT
 
-       -- Recommended: BERT, EINJ, ERST, HEST, SSDT
+       -- Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT
 
-       -- Optional: BGRT, CPEP, CSRT, DRTM, ECDT, FACS, FPDT, MCHI, MPST,
-          MSCT, RASF, SBST, SLIT, SPMI, SRAT, TCPA, TPM2, UEFI
-
-       -- Not supported: BOOT, DBG2, DBGP, DMAR, ETDT, HPET, IBFT, IVRS,
-          LPIT, MSDM, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+       -- Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IORT,
+          MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT, STAO,
+	  TCPA, TPM2, UEFI, XENV
 
+       -- Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IBFT, IVRS, LPIT,
+          MSDM, OEMx, PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
 
 Table  Usage for ARMv8 Linux
 -----  ----------------------------------------------------------------
@@ -50,7 +50,8 @@ CSRT   Signature Reserved (signature == "CSRT")
 
 DBG2   Signature Reserved (signature == "DBG2")
        == DeBuG port table 2 ==
-       Microsoft only table, will not be supported.
+       License has changed and should be usable.  Optional if used instead
+       of earlycon=<device> on the command line.
 
 DBGP   Signature Reserved (signature == "DBGP")
        == DeBuG Port table ==
@@ -133,10 +134,11 @@ GTDT   Section 5.2.24 (signature == "GTDT")
 
 HEST   Section 18.3.2 (signature == "HEST")
        == Hardware Error Source Table ==
-       Until further error source types are defined, use only types 6 (AER
-       Root Port), 7 (AER Endpoint), 8 (AER Bridge), or 9 (Generic Hardware
-       Error Source).  Firmware first error handling is possible if and only
-       if Trusted Firmware is being used on arm64.
+       ARM-specific error sources have been defined; please use those or the
+       PCI types such as type 6 (AER Root Port), 7 (AER Endpoint), or 8 (AER
+       Bridge), or use type 9 (Generic Hardware Error Source).  Firmware first
+       error handling is possible if and only if Trusted Firmware is being
+       used on arm64.
 
        Must be supplied if RAS support is provided by the platform.  It
        is recommended this table be supplied.
@@ -149,20 +151,30 @@ IBFT   Signature Reserved (signature == "IBFT")
        == iSCSI Boot Firmware Table ==
        Microsoft defined table, support TBD.
 
+IORT   Signature Reserved (signature == "IORT")
+       == Input Output Remapping Table ==
+       arm64 only table, required in order to describe IO topology, SMMUs,
+       and GIC ITSs, and how those various components are connected together,
+       such as identifying which components are behind which SMMUs/ITSs.
+       This table will only be required on certain SBSA platforms (e.g.,
+       when using GICv3-ITS and an SMMU); on SBSA Level 0 platforms, it 
+       remains optional.
+
 IVRS   Signature Reserved (signature == "IVRS")
        == I/O Virtualization Reporting Structure ==
        x86_64 (AMD) only table, will not be supported.
 
 LPIT   Signature Reserved (signature == "LPIT")
        == Low Power Idle Table ==
-       x86 only table as of ACPI 5.1; future versions have been adapted for
-       use with ARM and will be recommended in order to support ACPI power
-       management.
+       x86 only table as of ACPI 5.1; starting with ACPI 6.0, processor
+       descriptions and power states on ARM platforms should use the DSDT
+       and define processor container devices (_HID ACPI0010, Section 8.4,
+       and more specifically 8.4.3 and and 8.4.4).
 
 MADT   Section 5.2.12 (signature == "APIC")
        == Multiple APIC Description Table ==
        Required for arm64.  Only the GIC interrupt controller structures
-       should be used (types 0xA - 0xE).
+       should be used (types 0xA - 0xF).
 
 MCFG   Signature Reserved (signature == "MCFG")
        == Memory-mapped ConFiGuration space ==
@@ -176,14 +188,38 @@ MPST   Section 5.2.21 (signature == "MPST")
        == Memory Power State Table ==
        Optional, not currently supported.
 
+MSCT   Section 5.2.19 (signature == "MSCT")
+       == Maximum System Characteristic Table ==
+       Optional, not currently supported.
+
 MSDM   Signature Reserved (signature == "MSDM")
        == Microsoft Data Management table ==
        Microsoft only table, will not be supported.
 
-MSCT   Section 5.2.19 (signature == "MSCT")
-       == Maximum System Characteristic Table ==
+NFIT   Section 5.2.25 (signature == "NFIT")
+       == NVDIMM Firmware Interface Table ==
+       Optional, not currently supported.
+
+OEMx   Signature of "OEMx" only
+       == OEM Specific Tables ==
+       All tables starting with a signature of "OEM" are reserved for OEM
+       use.  Since these are not meant to be of general use but are limited
+       to very specific end users, they are not recommended for use and are
+       not supported by the kernel for arm64.
+
+PCCT   Section 14.1 (signature == "PCCT)
+       == Platform Communications Channel Table ==
+       Recommend for use on arm64; use of PCC is recommended when using CPPC
+       to control performance and power for platform processors.
+
+PMTT   Section 5.2.21.12 (signature == "PMTT")
+       == Platform Memory Topology Table ==
        Optional, not currently supported.
 
+PSDT   Section 5.2.11.3 (signature == "PSDT")
+       == Persistent System Description Table ==
+       Obsolete table, will not be supported.
+
 RASF   Section 5.2.20 (signature == "RASF")
        == RAS Feature table ==
        Optional, not currently supported.
@@ -195,7 +231,7 @@ RSDP   Section 5.2.5 (signature == "RSD PTR")
 RSDT   Section 5.2.7 (signature == "RSDT")
        == Root System Description Table ==
        Since this table can only provide 32-bit addresses, it is deprecated
-       on arm64, and will not be used.
+       on arm64, and will not be used.  If provided, it will be ignored.
 
 SBST   Section 5.2.14 (signature == "SBST")
        == Smart Battery Subsystem Table ==
@@ -220,7 +256,7 @@ SPMI   Signature Reserved (signature == "SPMI")
 SRAT   Section 5.2.16 (signature == "SRAT")
        == System Resource Affinity Table ==
        Optional, but if used, only the GICC Affinity structures are read.
-       To support NUMA, this table is required.
+       To support arm64 NUMA, this table is required.
 
 SSDT   Section 5.2.11.2 (signature == "SSDT")
        == Secondary System Description Table ==
@@ -235,6 +271,11 @@ SSDT   Section 5.2.11.2 (signature == "SSDT")
        These tables are optional, however.  ACPI tables should contain only
        one DSDT but can contain many SSDTs.
 
+STAO   Signature Reserved (signature == "STAO")
+       == _STA Override table ==
+       Optional, but only necessary in virtualized environments in order to
+       hide devices from guest OSs.
+
 TCPA   Signature Reserved (signature == "TCPA")
        == Trusted Computing Platform Alliance table ==
        Optional, not currently supported, and may need changes to fully
@@ -266,6 +307,10 @@ WPBT   Signature Reserved (signature == "WPBT")
        == Windows Platform Binary Table ==
        Microsoft only table, will not be supported.
 
+XENV   Signature Reserved (signature == "XENV")
+       == Xen project table ==
+       Optional, used only by Xen at present.
+
 XSDT   Section 5.2.8 (signature == "XSDT")
        == eXtended System Description Table ==
        Required for arm64.
@@ -273,44 +318,46 @@ XSDT   Section 5.2.8 (signature == "XSDT")
 
 ACPI Objects
 ------------
-The expectations on individual ACPI objects are discussed in the list that
-follows:
+The expectations on individual ACPI objects that are likely to be used are
+shown in the list that follows; any object not explicitly mentioned below
+should be used as needed for a particular platform or particular subsystem,
+such as power management or PCI.
 
 Name   Section         Usage for ARMv8 Linux
 ----   ------------    -------------------------------------------------
-_ADR   6.1.1           Use as needed.
-
-_BBN   6.5.5           Use as needed; PCI-specific.
+_CCA   6.2.17          This method must be defined for all bus masters
+                       on arm64 -- there are no assumptions made about
+                       whether such devices are cache coherent or not.
+                       The _CCA value is inherited by all descendants of
+                       these devices so it does not need to be repeated.
+                       Without _CCA on arm64, the kernel does not know what
+                       to do about setting up DMA for the device.
 
-_BDN   6.5.3           Optional; not likely to be used on arm64.
+                       NB: this method provides default cache coherency
+                       attributes; the presence of an SMMU can be used to
+                       modify that, however.  For example, a master could
+                       default to non-coherent, but be made coherent with
+                       the appropriate SMMU configuration (see Table 17 of
+                       the IORT specification, ARM Document DEN 0049B).
 
-_CCA   6.2.17          This method should be defined for all bus masters
-                       on arm64.  While cache coherency is assumed, making
-                       it explicit ensures the kernel will set up DMA as
-                       it should.
+_CID   6.1.2           Use as needed, see also _HID.
 
-_CDM   6.2.1           Optional, to be used only for processor devices.
+_CLS   6.1.3           Use as needed, see also _HID.
 
-_CID   6.1.2           Use as needed.
-
-_CLS   6.1.3           Use as needed.
+_CPC   8.4.7.1         Use as needed, power management specific.  CPPC is
+                       recommended on arm64.
 
 _CRS   6.2.2           Required on arm64.
 
-_DCK   6.5.2           Optional; not likely to be used on arm64.
+_CSD   8.4.2.2         Use as needed, used only in conjunction with _CST.
+
+_CST   8.4.2.1         Low power idle states (8.4.4) are recommended instead
+                       of C-states.
 
 _DDN   6.1.4           This field can be used for a device name.  However,
                        it is meant for DOS device names (e.g., COM1), so be
                        careful of its use across OSes.
 
-_DEP   6.5.8           Use as needed.
-
-_DIS   6.2.3           Optional, for power management use.
-
-_DLM   5.7.5           Optional.
-
-_DMA   6.2.4           Optional.
-
 _DSD   6.2.5           To be used with caution.  If this object is used, try
                        to use it within the constraints already defined by the
                        Device Properties UUID.  Only in rare circumstances
@@ -325,20 +372,10 @@ _DSD   6.2.5           To be used with caution.  If this object is used, try
                        with the UEFI Forum; this may cause some iteration as
                        more than one OS will be registering entries.
 
-_DSM                   Do not use this method.  It is not standardized, the
+_DSM   9.1.1           Do not use this method.  It is not standardized, the
                        return values are not well documented, and it is
                        currently a frequent source of error.
 
-_DSW   7.2.1           Use as needed; power management specific.
-
-_EDL   6.3.1           Optional.
-
-_EJD   6.3.2           Optional.
-
-_EJx   6.3.3           Optional.
-
-_FIX   6.2.7           x86 specific, not used on arm64.
-
 \_GL   5.7.1           This object is not to be used in hardware reduced
                        mode, and therefore should not be used on arm64.
 
@@ -349,35 +386,22 @@ _GLK   6.5.7           This object requires a global lock be defined; there
 \_GPE  5.3.1           This namespace is for x86 use only.  Do not use it
                        on arm64.
 
-_GSB   6.2.7           Optional.
-
-_HID   6.1.5           Use as needed.  This is the primary object to use in
-                       device probing, though _CID and _CLS may also be used.
-
-_HPP   6.2.8           Optional, PCI specific.
-
-_HPX   6.2.9           Optional, PCI specific.
-
-_HRV   6.1.6           Optional, use as needed to clarify device behavior; in
-                       some cases, this may be easier to use than _DSD.
+_HID   6.1.5           This is the primary object to use in device probing,
+		       though _CID and _CLS may also be used.
 
 _INI   6.5.1           Not required, but can be useful in setting up devices
                        when UEFI leaves them in a state that may not be what
                        the driver expects before it starts probing.
 
-_IRC   7.2.15          Use as needed; power management specific.
-
-_LCK   6.3.4           Optional.
-
-_MAT   6.2.10          Optional; see also the MADT.
+_LPI   8.4.4.3         Recommended for use with processor definitions (_HID
+		       ACPI0010) on arm64.  See also _RDI.
 
-_MLS   6.1.7           Optional, but highly recommended for use in
-                       internationalization.
+_MLS   6.1.7           Highly recommended for use in internationalization.
 
-_OFF   7.1.2           It is recommended to define this method for any device
+_OFF   7.2.2           It is recommended to define this method for any device
                        that can be turned on or off.
 
-_ON    7.1.3           It is recommended to define this method for any device
+_ON    7.2.3           It is recommended to define this method for any device
                        that can be turned on or off.
 
 \_OS   5.7.3           This method will return "Linux" by default (this is
@@ -398,122 +422,107 @@ _OSC   6.2.11          This method can be a global method in ACPI (i.e.,
                        by the kernel community, then register it with the
                        UEFI Forum.
 
-\_OSI  5.7.2           Deprecated on ARM64.  Any invocation of this method
-                       will print a warning on the console and return false.
-                       That is, as far as ACPI firmware is concerned, _OSI
-                       cannot be used to determine what sort of system is
-                       being used or what functionality is provided.  The
-                       _OSC method is to be used instead.
-
-_OST   6.3.5           Optional.
+\_OSI  5.7.2           Deprecated on ARM64.  As far as ACPI firmware is 
+		       concerned, _OSI is not to be used to determine what 
+		       sort of system is being used or what functionality
+		       is provided.  The _OSC method is to be used instead.
 
 _PDC   8.4.1           Deprecated, do not use on arm64.
 
 \_PIC  5.8.1           The method should not be used.  On arm64, the only
                        interrupt model available is GIC.
 
-_PLD   6.1.8           Optional.
-
 \_PR   5.3.1           This namespace is for x86 use only on legacy systems.
                        Do not use it on arm64.
 
-_PRS   6.2.12          Optional.
-
 _PRT   6.2.13          Required as part of the definition of all PCI root
                        devices.
 
-_PRW   7.2.13          Use as needed; power management specific.
-
-_PRx   7.2.8-11        Use as needed; power management specific.  If _PR0 is
+_PRx   7.3.8-11        Use as needed; power management specific.  If _PR0 is
                        defined, _PR3 must also be defined.
 
-_PSC   7.2.6           Use as needed; power management specific.
-
-_PSE   7.2.7           Use as needed; power management specific.
-
-_PSW   7.2.14          Use as needed; power management specific.
-
-_PSx   7.2.2-5         Use as needed; power management specific.  If _PS0 is
+_PSx   7.3.2-5         Use as needed; power management specific.  If _PS0 is
                        defined, _PS3 must also be defined.  If clocks or
                        regulators need adjusting to be consistent with power
                        usage, change them in these methods.
 
-\_PTS  7.3.1           Use as needed; power management specific.
-
-_PXM   6.2.14          Optional.
-
-_REG   6.5.4           Use as needed.
+_RDI   8.4.4.4         Recommended for use with processor definitions (_HID
+		       ACPI0010) on arm64.  This should only be used in 
+		       conjunction with _LPI.
 
 \_REV  5.7.4           Always returns the latest version of ACPI supported.
 
-_RMV   6.3.6           Optional.
-
 \_SB   5.3.1           Required on arm64; all devices must be defined in this
                        namespace.
 
-_SEG   6.5.6           Use as needed; PCI-specific.
-
-\_SI   5.3.1,          Optional.
-       9.1
-
-_SLI   6.2.15          Optional; recommended when SLIT table is in use.
+_SLI   6.2.15          Use is recommended when SLIT table is in use.
 
 _STA   6.3.7,          It is recommended to define this method for any device
-       7.1.4           that can be turned on or off.
+       7.2.4           that can be turned on or off.  See also the STAO table
+                       that provides overrides to hide devices in virtualized
+                       environments.
 
-_SRS   6.2.16          Optional; see also _PRS.
+_SRS   6.2.16          Use as needed; see also _PRS.
 
 _STR   6.1.10          Recommended for conveying device names to end users;
                        this is preferred over using _DDN.
 
 _SUB   6.1.9           Use as needed; _HID or _CID are preferred.
 
-_SUN   6.1.11          Optional.
-
-\_Sx   7.3.2           Use as needed; power management specific.
-
-_SxD   7.2.16-19       Use as needed; power management specific.
-
-_SxW   7.2.20-24       Use as needed; power management specific.
+_SUN   6.1.11          Use as needed, but recommended.
 
-_SWS   7.3.3           Use as needed; power management specific; this may
+_SWS   7.4.3           Use as needed; power management specific; this may
                        require specification changes for use on arm64.
 
-\_TTS  7.3.4           Use as needed; power management specific.
-
-\_TZ   5.3.1           Optional.
-
 _UID   6.1.12          Recommended for distinguishing devices of the same
                        class; define it if at all possible.
 
-\_WAK  7.3.5           Use as needed; power management specific.
+
 
 
 ACPI Event Model
 ----------------
 Do not use GPE block devices; these are not supported in the hardware reduced
 profile used by arm64.  Since there are no GPE blocks defined for use on ARM
-platforms, GPIO-signaled interrupts should be used for creating system events.
+platforms, ACPI events must be signaled differently.
+
+There are two options: GPIO-signaled interrupts (Section 5.6.5), and
+interrupt-signaled events (Section 5.6.9).  Interrupt-signaled events are a
+new feature in the ACPI 6.1 specification.  Either -- or both -- can be used
+on a given platform, and which to use may be dependent of limitations in any
+given SoC.  If possible, interrupt-signaled events are recommended.
 
 
 ACPI Processor Control
 ----------------------
-Section 8 of the ACPI specification is currently undergoing change that
-should be completed in the 6.0 version of the specification.  Processor
-performance control will be handled differently for arm64 at that point
-in time.  Processor aggregator devices (section 8.5) will not be used,
-for example, but another similar mechanism instead.
-
-While UEFI constrains what we can say until the release of 6.0, it is
-recommended that CPPC (8.4.5) be used as the primary model.  This will
-still be useful into the future.  C-states and P-states will still be
-provided, but most of the current design work appears to favor CPPC.
+Section 8 of the ACPI specification changed significantly in version 6.0.
+Processors should now be defined as Device objects with _HID ACPI0007; do
+not use the deprecated Processor statement in ASL.  All multiprocessor systems
+should also define a hierarchy of processors, done with Processor Container
+Devices (see Section 8.4.3.1, _HID ACPI0010); do not use processor aggregator
+devices (Section 8.5) to describe processor topology.  Section 8.4 of the
+specification describes the semantics of these object definitions and how
+they interrelate.
+
+Most importantly, the processor hierarchy defined also defines the low power
+idle states that are available to the platform, along with the rules for
+determining which processors can be turned on or off and the circumstances
+that control that.  Without this information, the processors will run in
+whatever power state they were left in by UEFI.
+
+Note too, that the processor Device objects defined and the entries in the
+MADT for GICs are expected to be in synchronization.  The _UID of the Device
+object must correspond to processor IDs used in the MADT.
+
+It is recommended that CPPC (8.4.5) be used as the primary model for processor
+performance control on arm64.  C-states and P-states may become available at
+some point in the future, but most current design work appears to favor CPPC.
 
 Further, it is essential that the ARMv8 SoC provide a fully functional
 implementation of PSCI; this will be the only mechanism supported by ACPI
-to control CPU power state (including secondary CPU booting).
-
-More details will be provided on the release of the ACPI 6.0 specification.
+to control CPU power state.  Booting of secondary CPUs using the ACPI
+parking protocol is possible, but discouraged, since only PSCI is supported
+for ARM servers.
 
 
 ACPI System Address Map Interfaces
@@ -535,21 +544,25 @@ used to indicate fatal errors that cannot be corrected, and require immediate
 attention.
 
 Since there is no direct equivalent of the x86 SCI or NMI, arm64 handles
-these slightly differently.  The SCI is handled as a normal GPIO-signaled
-interrupt; given that these are corrected (or correctable) errors being
-reported, this is sufficient.  The NMI is emulated as the highest priority
-GPIO-signaled interrupt possible.  This implies some caution must be used
-since there could be interrupts at higher privilege levels or even interrupts
-at the same priority as the emulated NMI.  In Linux, this should not be the
-case but one should be aware it could happen.
+these slightly differently.  The SCI is handled as a high priority interrupt;
+given that these are corrected (or correctable) errors being reported, this
+is sufficient.  The NMI is emulated as the highest priority interrupt
+possible.  This implies some caution must be used since there could be
+interrupts at higher privilege levels or even interrupts at the same priority
+as the emulated NMI.  In Linux, this should not be the case but one should
+be aware it could happen.
 
 
 ACPI Objects Not Supported on ARM64
 -----------------------------------
 While this may change in the future, there are several classes of objects
 that can be defined, but are not currently of general interest to ARM servers.
+Some of these objects have x86 equivalents, and may actually make sense in ARM
+servers.  However, there is either no hardware available at present, or there
+may not even be a non-ARM implementation yet.  Hence, they are not currently
+supported.
 
-These are not supported:
+The following classes of objects are not supported:
 
        -- Section 9.2: ambient light sensor devices
 
@@ -571,16 +584,6 @@ These are not supported:
 
        -- Section 9.18: time and alarm devices (see 9.15)
 
-
-ACPI Objects Not Yet Implemented
---------------------------------
-While these objects have x86 equivalents, and they do make some sense in ARM
-servers, there is either no hardware available at present, or in some cases
-there may not yet be a non-ARM implementation.  Hence, they are currently not
-implemented though that may change in the future.
-
-Not yet implemented are:
-
        -- Section 10: power source and power meter devices
 
        -- Section 11: thermal management
@@ -589,5 +592,31 @@ Not yet implemented are:
 
        -- Section 13: SMBus interfaces
 
-       -- Section 17: NUMA support (prototypes have been submitted for
-          review)
+
+This also means that there is no support for the following objects:
+
+Name   Section                     Name   Section
+----   ------------                ----   ------------
+_ALC   9.3.4                       _FDM   9.10.3
+_ALI   9.3.2                       _FIX   6.2.7
+_ALP   9.3.6                       _GAI   10.4.5
+_ALR   9.3.5                       _GHL   10.4.7
+_ALT   9.3.3                       _GTM   9.9.2.1.1
+_BCT   10.2.2.10                   _LID   9.5.1
+_BDN   6.5.3                       _PAI   10.4.4
+_BIF   10.2.2.1                    _PCL   10.3.2
+_BIX   10.2.2.1                    _PIF   10.3.3
+_BLT   9.2.3                       _PMC   10.4.1
+_BMA   10.2.2.4                    _PMD   10.4.8
+_BMC   10.2.2.12                   _PMM   10.4.3
+_BMD   10.2.2.11                   _PRL   10.3.4
+_BMS   10.2.2.5                    _PSR   10.3.1
+_BST   10.2.2.6                    _PTP   10.4.2
+_BTH   10.2.2.7                    _SBS   10.1.3
+_BTM   10.2.2.9                    _SHL   10.4.6
+_BTP   10.2.2.8                    _STM   9.9.2.1.1
+_DCK   6.5.2                       _UPD   9.16.1
+_EC    12.12                       _UPP   9.16.2
+_FDE   9.10.1                      _WPC   10.5.2
+_FDI   9.10.2                      _WPP   10.5.3
+
diff --git a/Documentation/arm64/arm-acpi.txt b/Documentation/arm64/arm-acpi.txt
index 570a4f8e1a01..1a74a041a443 100644
--- a/Documentation/arm64/arm-acpi.txt
+++ b/Documentation/arm64/arm-acpi.txt
@@ -34,7 +34,7 @@ of the summary text almost directly, to be honest.
 
 The short form of the rationale for ACPI on ARM is:
 
--- ACPIâs bytecode (AML) allows the platform to encode hardware behavior,
+-- ACPIâs byte code (AML) allows the platform to encode hardware behavior,
    while DT explicitly does not support this.  For hardware vendors, being
    able to encode behavior is a key tool used in supporting operating
    system releases on new hardware.
@@ -57,11 +57,11 @@ The short form of the rationale for ACPI on ARM is:
 
 -- The new ACPI governance process works well and Linux is now at the same
    table as hardware vendors and other OS vendors.  In fact, there is no
-   longer any reason to feel that ACPI is only belongs to Windows or that
+   longer any reason to feel that ACPI only belongs to Windows or that
    Linux is in any way secondary to Microsoft in this arena.  The move of
    ACPI governance into the UEFI forum has significantly opened up the
    specification development process, and currently, a large portion of the
-   changes being made to ACPI is being driven by Linux.
+   changes being made to ACPI are being driven by Linux.
 
 Key to the use of ACPI is the support model.  For servers in general, the
 responsibility for hardware behaviour cannot solely be the domain of the
@@ -110,7 +110,7 @@ ACPI support in drivers and subsystems for ARMv8 should never be mutually
 exclusive with DT support at compile time.
 
 At boot time the kernel will only use one description method depending on
-parameters passed from the bootloader (including kernel bootargs).
+parameters passed from the boot loader (including kernel bootargs).
 
 Regardless of whether DT or ACPI is used, the kernel must always be capable
 of booting with either scheme (in kernels with both schemes enabled at compile
@@ -159,7 +159,7 @@ Further, the ACPI core will only use the 64-bit address fields in the FADT
 (Fixed ACPI Description Table).  Any 32-bit address fields in the FADT will
 be ignored on arm64.
 
-Hardware reduced mode (see Section 4.1 of the ACPI 5.1 specification) will
+Hardware reduced mode (see Section 4.1 of the ACPI 6.1 specification) will
 be enforced by the ACPI core on arm64.  Doing so allows the ACPI core to
 run less complex code since it no longer has to provide support for legacy
 hardware from other architectures.  Any fields that are not to be used for
@@ -167,7 +167,7 @@ hardware reduced mode must be set to zero.
 
 For the ACPI core to operate properly, and in turn provide the information
 the kernel needs to configure devices, it expects to find the following
-tables (all section numbers refer to the ACPI 5.1 specfication):
+tables (all section numbers refer to the ACPI 6.1 specification):
 
     -- RSDP (Root System Description Pointer), section 5.2.5
 
@@ -185,9 +185,23 @@ tables (all section numbers refer to the ACPI 5.1 specfication):
     -- If PCI is supported, the MCFG (Memory mapped ConFiGuration
        Table), section 5.2.6, specifically Table 5-31.
 
+    -- If booting without a console=<device> kernel parameter is
+       supported, the SPCR (Serial Port Console Redirection table),
+       section 5.2.6, specifically Table 5-31.
+
+    -- If necessary to describe the I/O topology, SMMUs and GIC ITSs,
+       the IORT (Input Output Remapping Table, section 5.2.6, specifically
+       Table 5-31).
+
+    -- If NUMA is supported, the SRAT (System Resource Affinity Table)
+       and SLIT (System Locality distance Information Table), sections
+       5.2.16 and 5.2.17, respectively.
+
 If the above tables are not all present, the kernel may or may not be
 able to boot properly since it may not be able to configure all of the
-devices available.
+devices available.  This list of tables is not meant to be all inclusive;
+in some environments other tables may be needed (e.g., any of the APEI
+tables from section 18) to support specific functionality.
 
 
 ACPI Detection
@@ -198,7 +212,7 @@ the device structure.  This is detailed further in the "Driver
 Recommendations" section.
 
 In non-driver code, if the presence of ACPI needs to be detected at
-runtime, then check the value of acpi_disabled. If CONFIG_ACPI is not
+run time, then check the value of acpi_disabled. If CONFIG_ACPI is not
 set, acpi_disabled will always be 1.
 
 
@@ -233,7 +247,7 @@ that looks like this: Name(KEY0, "value0").  An ACPI device driver would
 then retrieve the value of the property by evaluating the KEY0 object.
 However, using Name() this way has multiple problems: (1) ACPI limits
 names ("KEY0") to four characters unlike DT; (2) there is no industry
-wide registry that maintains a list of names, minimzing re-use; (3)
+wide registry that maintains a list of names, minimizing re-use; (3)
 there is also no registry for the definition of property values ("value0"),
 again making re-use difficult; and (4) how does one maintain backward
 compatibility as new hardware comes out?  The _DSD method was created
@@ -434,7 +448,8 @@ The ACPI specification changes regularly.  During the year 2014, for instance,
 version 5.1 was released and version 6.0 substantially completed, with most of
 the changes being driven by ARM-specific requirements.  Proposed changes are
 presented and discussed in the ASWG (ACPI Specification Working Group) which
-is a part of the UEFI Forum.
+is a part of the UEFI Forum.  The current version of the ACPI specification
+is 6.1 release in January 2016.
 
 Participation in this group is open to all UEFI members.  Please see
 http://www.uefi.org/workinggroup for details on group membership.
@@ -443,7 +458,7 @@ It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
 as closely as possible, and to only implement functionality that complies with
 the released standards from UEFI ASWG.  As a practical matter, there will be
 vendors that provide bad ACPI tables or violate the standards in some way.
-If this is because of errors, quirks and fixups may be necessary, but will
+If this is because of errors, quirks and fix-ups may be necessary, but will
 be avoided if possible.  If there are features missing from ACPI that preclude
 it from being used on a platform, ECRs (Engineering Change Requests) should be
 submitted to ASWG and go through the normal approval process; for those that
@@ -480,8 +495,7 @@ References
     Software on ARM Platforms", dated 16 Aug 2014
 
 [2] http://www.secretlab.ca/archives/151, 10 Jan 2015, Copyright (c) 2015,
-    Linaro Ltd., written by Grant Likely.  A copy of the verbatim text (apart
-    from formatting) is also in Documentation/arm64/why_use_acpi.txt.
+    Linaro Ltd., written by Grant Likely.
 
 [3] AMD ACPI for Seattle platform documentation:
     http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt
index 32b6c3189d98..a9259b562d5c 100644
--- a/Documentation/bcache.txt
+++ b/Documentation/bcache.txt
@@ -1,4 +1,4 @@
-Say you've got a big slow raid 6, and an X-25E or three. Wouldn't it be
+Say you've got a big slow raid 6, and an ssd or three. Wouldn't it be
 nice if you could use them as cache... Hence bcache.
 
 Wiki and git repositories are at:
@@ -8,7 +8,7 @@ Wiki and git repositories are at:
 
 It's designed around the performance characteristics of SSDs - it only allocates
 in erase block sized buckets, and it uses a hybrid btree/log to track cached
-extants (which can be anywhere from a single sector to the bucket size). It's
+extents (which can be anywhere from a single sector to the bucket size). It's
 designed to avoid random writes at all costs; it fills up an erase block
 sequentially, then issues a discard before reusing it.
 
@@ -55,7 +55,10 @@ immediately.  Without udev, you can manually register devices like this:
 Registering the backing device makes the bcache device show up in /dev; you can
 now format it and use it as normal. But the first time using a new bcache
 device, it'll be running in passthrough mode until you attach it to a cache.
-See the section on attaching.
+If you are thinking about using bcache later, it is recommended to setup all your
+slow devices as bcache backing devices without a cache, and you can choose to add
+a caching device later.
+See 'ATTACHING' section below.
 
 The devices show up as:
 
@@ -72,12 +75,14 @@ To get started:
   mount /dev/bcache0 /mnt
 
 You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache .
+You can also control them through /sys/fs//bcache/<cset-uuid>/ .
 
 Cache devices are managed as sets; multiple caches per set isn't supported yet
 but will allow for mirroring of metadata and dirty data in the future. Your new
 cache set shows up as /sys/fs/bcache/<UUID>
 
-ATTACHING:
+ATTACHING
+---------
 
 After your cache device and backing device are registered, the backing device
 must be attached to your cache set to enable caching. Attaching a backing
@@ -105,7 +110,8 @@ but all the cached data will be invalidated. If there was dirty data in the
 cache, don't expect the filesystem to be recoverable - you will have massive
 filesystem corruption, though ext4's fsck does work miracles.
 
-ERROR HANDLING:
+ERROR HANDLING
+--------------
 
 Bcache tries to transparently handle IO errors to/from the cache device without
 affecting normal operation; if it sees too many errors (the threshold is
@@ -127,12 +133,181 @@ the backing devices to passthrough mode.
    writeback mode). It currently doesn't do anything intelligent if it fails to
    read some of the dirty data, though.
 
-TROUBLESHOOTING PERFORMANCE:
+
+HOWTO/COOKBOOK
+--------------
+
+A) Starting a bcache with a missing caching device
+
+If registering the backing device doesn't help, it's already there, you just need
+to force it to run without the cache:
+	host:~# echo /dev/sdb1 > /sys/fs/bcache/register
+	[  119.844831] bcache: register_bcache() error opening /dev/sdb1: device already registered
+
+Next, you try to register your caching device if it's present. However
+if it's absent, or registration fails for some reason, you can still
+start your bcache without its cache, like so:
+	host:/sys/block/sdb/sdb1/bcache# echo 1 > running
+
+Note that this may cause data loss if you were running in writeback mode.
+
+
+B) Bcache does not find its cache
+
+	host:/sys/block/md5/bcache# echo 0226553a-37cf-41d5-b3ce-8b1e944543a8 > attach
+	[ 1933.455082] bcache: bch_cached_dev_attach() Couldn't find uuid for md5 in set
+	[ 1933.478179] bcache: __cached_dev_store() Can't attach 0226553a-37cf-41d5-b3ce-8b1e944543a8
+	[ 1933.478179] : cache set not found
+
+In this case, the caching device was simply not registered at boot
+or disappeared and came back, and needs to be (re-)registered:
+	host:/sys/block/md5/bcache# echo /dev/sdh2 > /sys/fs/bcache/register
+
+
+C) Corrupt bcache crashes the kernel at device registration time:
+
+This should never happen.  If it does happen, then you have found a bug!
+Please report it to the bcache development list: linux-bcache@vger.kernel.org
+
+Be sure to provide as much information that you can including kernel dmesg
+output if available so that we may assist.
+
+
+D) Recovering data without bcache:
+
+If bcache is not available in the kernel, a filesystem on the backing
+device is still available at an 8KiB offset. So either via a loopdev
+of the backing device created with --offset 8K, or any value defined by
+--data-offset when you originally formatted bcache with `make-bcache`.
+
+For example:
+	losetup -o 8192 /dev/loop0 /dev/your_bcache_backing_dev
+
+This should present your unmodified backing device data in /dev/loop0
+
+If your cache is in writethrough mode, then you can safely discard the
+cache device without loosing data.
+
+
+E) Wiping a cache device
+
+host:~# wipefs -a /dev/sdh2
+16 bytes were erased at offset 0x1018 (bcache)
+they were: c6 85 73 f6 4e 1a 45 ca 82 65 f5 7f 48 ba 6d 81
+
+After you boot back with bcache enabled, you recreate the cache and attach it:
+host:~# make-bcache -C /dev/sdh2
+UUID:                   7be7e175-8f4c-4f99-94b2-9c904d227045
+Set UUID:               5bc072a8-ab17-446d-9744-e247949913c1
+version:                0
+nbuckets:               106874
+block_size:             1
+bucket_size:            1024
+nr_in_set:              1
+nr_this_dev:            0
+first_bucket:           1
+[  650.511912] bcache: run_cache_set() invalidating existing data
+[  650.549228] bcache: register_cache() registered cache device sdh2
+
+start backing device with missing cache:
+host:/sys/block/md5/bcache# echo 1 > running
+
+attach new cache:
+host:/sys/block/md5/bcache# echo 5bc072a8-ab17-446d-9744-e247949913c1 > attach
+[  865.276616] bcache: bch_cached_dev_attach() Caching md5 as bcache0 on set 5bc072a8-ab17-446d-9744-e247949913c1
+
+
+F) Remove or replace a caching device
+
+	host:/sys/block/sda/sda7/bcache# echo 1 > detach
+	[  695.872542] bcache: cached_dev_detach_finish() Caching disabled for sda7
+
+	host:~# wipefs -a /dev/nvme0n1p4
+	wipefs: error: /dev/nvme0n1p4: probing initialization failed: Device or resource busy
+	Ooops, it's disabled, but not unregistered, so it's still protected
+
+We need to go and unregister it:
+	host:/sys/fs/bcache/b7ba27a1-2398-4649-8ae3-0959f57ba128# ls -l cache0
+	lrwxrwxrwx 1 root root 0 Feb 25 18:33 cache0 -> ../../../devices/pci0000:00/0000:00:1d.0/0000:70:00.0/nvme/nvme0/nvme0n1/nvme0n1p4/bcache/
+	host:/sys/fs/bcache/b7ba27a1-2398-4649-8ae3-0959f57ba128# echo 1 > stop
+	kernel: [  917.041908] bcache: cache_set_free() Cache set b7ba27a1-2398-4649-8ae3-0959f57ba128 unregistered
+
+Now we can wipe it:
+	host:~# wipefs -a /dev/nvme0n1p4
+	/dev/nvme0n1p4: 16 bytes were erased at offset 0x00001018 (bcache): c6 85 73 f6 4e 1a 45 ca 82 65 f5 7f 48 ba 6d 81
+
+
+G) dm-crypt and bcache
+
+First setup bcache unencrypted and then install dmcrypt on top of
+/dev/bcache<N> This will work faster than if you dmcrypt both the backing
+and caching devices and then install bcache on top. [benchmarks?]
+
+
+H) Stop/free a registered bcache to wipe and/or recreate it
+
+Suppose that you need to free up all bcache references so that you can
+fdisk run and re-register a changed partition table, which won't work
+if there are any active backing or caching devices left on it:
+
+1) Is it present in /dev/bcache* ? (there are times where it won't be)
+
+If so, it's easy:
+	host:/sys/block/bcache0/bcache# echo 1 > stop
+
+2) But if your backing device is gone, this won't work:
+	host:/sys/block/bcache0# cd bcache
+	bash: cd: bcache: No such file or directory
+
+In this case, you may have to unregister the dmcrypt block device that
+references this bcache to free it up:
+	host:~# dmsetup remove oldds1
+	bcache: bcache_device_free() bcache0 stopped
+	bcache: cache_set_free() Cache set 5bc072a8-ab17-446d-9744-e247949913c1 unregistered
+
+This causes the backing bcache to be removed from /sys/fs/bcache and
+then it can be reused.  This would be true of any block device stacking
+where bcache is a lower device.
+
+3) In other cases, you can also look in /sys/fs/bcache/:
+
+host:/sys/fs/bcache# ls -l */{cache?,bdev?}
+lrwxrwxrwx 1 root root 0 Mar  5 09:39 0226553a-37cf-41d5-b3ce-8b1e944543a8/bdev1 -> ../../../devices/virtual/block/dm-1/bcache/
+lrwxrwxrwx 1 root root 0 Mar  5 09:39 0226553a-37cf-41d5-b3ce-8b1e944543a8/cache0 -> ../../../devices/virtual/block/dm-4/bcache/
+lrwxrwxrwx 1 root root 0 Mar  5 09:39 5bc072a8-ab17-446d-9744-e247949913c1/cache0 -> ../../../devices/pci0000:00/0000:00:01.0/0000:01:00.0/ata10/host9/target9:0:0/9:0:0:0/block/sdl/sdl2/bcache/
+
+The device names will show which UUID is relevant, cd in that directory
+and stop the cache:
+	host:/sys/fs/bcache/5bc072a8-ab17-446d-9744-e247949913c1# echo 1 > stop
+
+This will free up bcache references and let you reuse the partition for
+other purposes.
+
+
+
+TROUBLESHOOTING PERFORMANCE
+---------------------------
 
 Bcache has a bunch of config options and tunables. The defaults are intended to
 be reasonable for typical desktop and server workloads, but they're not what you
 want for getting the best possible numbers when benchmarking.
 
+ - Backing device alignment
+
+   The default metadata size in bcache is 8k.  If your backing device is
+   RAID based, then be sure to align this by a multiple of your stride
+   width using `make-bcache --data-offset`. If you intend to expand your
+   disk array in the future, then multiply a series of primes by your
+   raid stripe size to get the disk multiples that you would like.
+
+   For example:  If you have a 64k stripe size, then the following offset
+   would provide alignment for many common RAID5 data spindle counts:
+	64k * 2*2*2*3*3*5*7 bytes = 161280k
+
+   That space is wasted, but for only 157.5MB you can grow your RAID 5
+   volume to the following data-spindle counts without re-aligning:
+	3,4,5,6,7,8,9,10,12,14,15,18,20,21 ...
+
  - Bad write performance
 
    If write performance is not what you expected, you probably wanted to be
@@ -140,7 +315,7 @@ want for getting the best possible numbers when benchmarking.
    maturity, but simply because in writeback mode you'll lose data if something
    happens to your SSD)
 
-   # echo writeback > /sys/block/bcache0/cache_mode
+   # echo writeback > /sys/block/bcache0/bcache/cache_mode
 
  - Bad performance, or traffic not going to the SSD that you'd expect
 
@@ -193,7 +368,9 @@ want for getting the best possible numbers when benchmarking.
    Solution: warm the cache by doing writes, or use the testing branch (there's
    a fix for the issue there).
 
-SYSFS - BACKING DEVICE:
+
+SYSFS - BACKING DEVICE
+----------------------
 
 Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
 (if attached) /sys/fs/bcache/<cset-uuid>/bdev*
@@ -238,7 +415,7 @@ sequential_merge
   against all new requests to determine which new requests are sequential
   continuations of previous requests for the purpose of determining sequential
   cutoff. This is necessary if the sequential cutoff value is greater than the
-  maximum acceptable sequential size for any single request. 
+  maximum acceptable sequential size for any single request.
 
 state
   The backing device can be in one of four different states:
@@ -325,7 +502,7 @@ bucket_size
   Size of buckets
 
 cache<0..n>
-  Symlink to each of the cache devices comprising this cache set. 
+  Symlink to each of the cache devices comprising this cache set.
 
 cache_available_percent
   Percentage of cache device which doesn't contain dirty data, and could
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index dce25d848d92..d515d58962b9 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -53,7 +53,7 @@ disk.
 
 logical_block_size (RO)
 -----------------------
-This is the logcal block size of the device, in bytes.
+This is the logical block size of the device, in bytes.
 
 max_hw_sectors_kb (RO)
 ----------------------
diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt
index 59e0516cbf6b..8a6bdada5f6b 100644
--- a/Documentation/block/writeback_cache_control.txt
+++ b/Documentation/block/writeback_cache_control.txt
@@ -20,11 +20,11 @@ a forced cache flush, and the Force Unit Access (FUA) flag for requests.
 Explicit cache flushes
 ----------------------
 
-The REQ_FLUSH flag can be OR ed into the r/w flags of a bio submitted from
+The REQ_PREFLUSH flag can be OR ed into the r/w flags of a bio submitted from
 the filesystem and will make sure the volatile cache of the storage device
 has been flushed before the actual I/O operation is started.  This explicitly
 guarantees that previously completed write requests are on non-volatile
-storage before the flagged bio starts. In addition the REQ_FLUSH flag can be
+storage before the flagged bio starts. In addition the REQ_PREFLUSH flag can be
 set on an otherwise empty bio structure, which causes only an explicit cache
 flush without any dependent I/O.  It is recommend to use
 the blkdev_issue_flush() helper for a pure cache flush.
@@ -41,21 +41,21 @@ signaled after the data has been committed to non-volatile storage.
 Implementation details for filesystems
 --------------------------------------
 
-Filesystems can simply set the REQ_FLUSH and REQ_FUA bits and do not have to
+Filesystems can simply set the REQ_PREFLUSH and REQ_FUA bits and do not have to
 worry if the underlying devices need any explicit cache flushing and how
-the Forced Unit Access is implemented.  The REQ_FLUSH and REQ_FUA flags
+the Forced Unit Access is implemented.  The REQ_PREFLUSH and REQ_FUA flags
 may both be set on a single bio.
 
 
 Implementation details for make_request_fn based block drivers
 --------------------------------------------------------------
 
-These drivers will always see the REQ_FLUSH and REQ_FUA bits as they sit
+These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
 directly below the submit_bio interface.  For remapping drivers the REQ_FUA
 bits need to be propagated to underlying devices, and a global flush needs
-to be implemented for bios with the REQ_FLUSH bit set.  For real device
-drivers that do not have a volatile cache the REQ_FLUSH and REQ_FUA bits
-on non-empty bios can simply be ignored, and REQ_FLUSH requests without
+to be implemented for bios with the REQ_PREFLUSH bit set.  For real device
+drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits
+on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without
 data can be completed successfully without doing any work.  Drivers for
 devices with volatile caches need to implement the support for these
 flags themselves without any help from the block layer.
@@ -65,17 +65,17 @@ Implementation details for request_fn based block drivers
 --------------------------------------------------------------
 
 For devices that do not support volatile write caches there is no driver
-support required, the block layer completes empty REQ_FLUSH requests before
-entering the driver and strips off the REQ_FLUSH and REQ_FUA bits from
+support required, the block layer completes empty REQ_PREFLUSH requests before
+entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
 requests that have a payload.  For devices with volatile write caches the
 driver needs to tell the block layer that it supports flushing caches by
 doing:
 
 	blk_queue_write_cache(sdkp->disk->queue, true, false);
 
-and handle empty REQ_FLUSH requests in its prep_fn/request_fn.  Note that
-REQ_FLUSH requests with a payload are automatically turned into a sequence
-of an empty REQ_FLUSH request followed by the actual write by the block
+and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn.  Note that
+REQ_PREFLUSH requests with a payload are automatically turned into a sequence
+of an empty REQ_OP_FLUSH request followed by the actual write by the block
 layer.  For devices that also support the FUA bit the block layer needs
 to be told to pass through the REQ_FUA bit using:
 
@@ -83,4 +83,4 @@ to be told to pass through the REQ_FUA bit using:
 
 and the driver must handle write requests that have the REQ_FUA bit set
 in prep_fn/request_fn.  If the FUA bit is not natively supported the block
-layer turns it into an empty REQ_FLUSH request after the actual write.
+layer turns it into an empty REQ_OP_FLUSH request after the actual write.
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 13100fb3c26d..0535ae1f73e5 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -59,23 +59,23 @@ num_devices parameter is optional and tells zram how many devices should be
 pre-created. Default: 1.
 
 2) Set max number of compression streams
-	Regardless the value passed to this attribute, ZRAM will always
-	allocate multiple compression streams - one per online CPUs - thus
-	allowing several concurrent compression operations. The number of
-	allocated compression streams goes down when some of the CPUs
-	become offline. There is no single-compression-stream mode anymore,
-	unless you are running a UP system or has only 1 CPU online.
-
-	To find out how many streams are currently available:
+Regardless the value passed to this attribute, ZRAM will always
+allocate multiple compression streams - one per online CPUs - thus
+allowing several concurrent compression operations. The number of
+allocated compression streams goes down when some of the CPUs
+become offline. There is no single-compression-stream mode anymore,
+unless you are running a UP system or has only 1 CPU online.
+
+To find out how many streams are currently available:
 	cat /sys/block/zram0/max_comp_streams
 
 3) Select compression algorithm
-	Using comp_algorithm device attribute one can see available and
-	currently selected (shown in square brackets) compression algorithms,
-	change selected compression algorithm (once the device is initialised
-	there is no way to change compression algorithm).
+Using comp_algorithm device attribute one can see available and
+currently selected (shown in square brackets) compression algorithms,
+change selected compression algorithm (once the device is initialised
+there is no way to change compression algorithm).
 
-	Examples:
+Examples:
 	#show supported compression algorithms
 	cat /sys/block/zram0/comp_algorithm
 	lzo [lz4]
@@ -83,17 +83,27 @@ pre-created. Default: 1.
 	#select lzo compression algorithm
 	echo lzo > /sys/block/zram0/comp_algorithm
 
+For the time being, the `comp_algorithm' content does not necessarily
+show every compression algorithm supported by the kernel. We keep this
+list primarily to simplify device configuration and one can configure
+a new device with a compression algorithm that is not listed in
+`comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API
+and, if some of the algorithms were built as modules, it's impossible
+to list all of them using, for instance, /proc/crypto or any other
+method. This, however, has an advantage of permitting the usage of
+custom crypto compression modules (implementing S/W or H/W compression).
+
 4) Set Disksize
-        Set disk size by writing the value to sysfs node 'disksize'.
-        The value can be either in bytes or you can use mem suffixes.
-        Examples:
-            # Initialize /dev/zram0 with 50MB disksize
-            echo $((50*1024*1024)) > /sys/block/zram0/disksize
+Set disk size by writing the value to sysfs node 'disksize'.
+The value can be either in bytes or you can use mem suffixes.
+Examples:
+	# Initialize /dev/zram0 with 50MB disksize
+	echo $((50*1024*1024)) > /sys/block/zram0/disksize
 
-            # Using mem suffixes
-            echo 256K > /sys/block/zram0/disksize
-            echo 512M > /sys/block/zram0/disksize
-            echo 1G > /sys/block/zram0/disksize
+	# Using mem suffixes
+	echo 256K > /sys/block/zram0/disksize
+	echo 512M > /sys/block/zram0/disksize
+	echo 1G > /sys/block/zram0/disksize
 
 Note:
 There is little point creating a zram of greater than twice the size of memory
@@ -101,20 +111,20 @@ since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
 size of the disk when not in use so a huge zram is wasteful.
 
 5) Set memory limit: Optional
-	Set memory limit by writing the value to sysfs node 'mem_limit'.
-	The value can be either in bytes or you can use mem suffixes.
-	In addition, you could change the value in runtime.
-	Examples:
-	    # limit /dev/zram0 with 50MB memory
-	    echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
-
-	    # Using mem suffixes
-	    echo 256K > /sys/block/zram0/mem_limit
-	    echo 512M > /sys/block/zram0/mem_limit
-	    echo 1G > /sys/block/zram0/mem_limit
-
-	    # To disable memory limit
-	    echo 0 > /sys/block/zram0/mem_limit
+Set memory limit by writing the value to sysfs node 'mem_limit'.
+The value can be either in bytes or you can use mem suffixes.
+In addition, you could change the value in runtime.
+Examples:
+	# limit /dev/zram0 with 50MB memory
+	echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
+
+	# Using mem suffixes
+	echo 256K > /sys/block/zram0/mem_limit
+	echo 512M > /sys/block/zram0/mem_limit
+	echo 1G > /sys/block/zram0/mem_limit
+
+	# To disable memory limit
+	echo 0 > /sys/block/zram0/mem_limit
 
 6) Activate:
 	mkswap /dev/zram0
diff --git a/Documentation/cec.txt b/Documentation/cec.txt
new file mode 100644
index 000000000000..75155fe37153
--- /dev/null
+++ b/Documentation/cec.txt
@@ -0,0 +1,267 @@
+CEC Kernel Support
+==================
+
+The CEC framework provides a unified kernel interface for use with HDMI CEC
+hardware. It is designed to handle a multiple types of hardware (receivers,
+transmitters, USB dongles). The framework also gives the option to decide
+what to do in the kernel driver and what should be handled by userspace
+applications. In addition it integrates the remote control passthrough
+feature into the kernel's remote control framework.
+
+
+The CEC Protocol
+----------------
+
+The CEC protocol enables consumer electronic devices to communicate with each
+other through the HDMI connection. The protocol uses logical addresses in the
+communication. The logical address is strictly connected with the functionality
+provided by the device. The TV acting as the communication hub is always
+assigned address 0. The physical address is determined by the physical
+connection between devices.
+
+The CEC framework described here is up to date with the CEC 2.0 specification.
+It is documented in the HDMI 1.4 specification with the new 2.0 bits documented
+in the HDMI 2.0 specification. But for most of the features the freely available
+HDMI 1.3a specification is sufficient:
+
+http://www.microprocessor.org/HDMISpecification13a.pdf
+
+
+The Kernel Interface
+====================
+
+CEC Adapter
+-----------
+
+The struct cec_adapter represents the CEC adapter hardware. It is created by
+calling cec_allocate_adapter() and deleted by calling cec_delete_adapter():
+
+struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
+	       void *priv, const char *name, u32 caps, u8 available_las,
+	       struct device *parent);
+void cec_delete_adapter(struct cec_adapter *adap);
+
+To create an adapter you need to pass the following information:
+
+ops: adapter operations which are called by the CEC framework and that you
+have to implement.
+
+priv: will be stored in adap->priv and can be used by the adapter ops.
+
+name: the name of the CEC adapter. Note: this name will be copied.
+
+caps: capabilities of the CEC adapter. These capabilities determine the
+	capabilities of the hardware and which parts are to be handled
+	by userspace and which parts are handled by kernelspace. The
+	capabilities are returned by CEC_ADAP_G_CAPS.
+
+available_las: the number of simultaneous logical addresses that this
+	adapter can handle. Must be 1 <= available_las <= CEC_MAX_LOG_ADDRS.
+
+parent: the parent device.
+
+
+To register the /dev/cecX device node and the remote control device (if
+CEC_CAP_RC is set) you call:
+
+int cec_register_adapter(struct cec_adapter *adap);
+
+To unregister the devices call:
+
+void cec_unregister_adapter(struct cec_adapter *adap);
+
+Note: if cec_register_adapter() fails, then call cec_delete_adapter() to
+clean up. But if cec_register_adapter() succeeded, then only call
+cec_unregister_adapter() to clean up, never cec_delete_adapter(). The
+unregister function will delete the adapter automatically once the last user
+of that /dev/cecX device has closed its file handle.
+
+
+Implementing the Low-Level CEC Adapter
+--------------------------------------
+
+The following low-level adapter operations have to be implemented in
+your driver:
+
+struct cec_adap_ops {
+	/* Low-level callbacks */
+	int (*adap_enable)(struct cec_adapter *adap, bool enable);
+	int (*adap_monitor_all_enable)(struct cec_adapter *adap, bool enable);
+	int (*adap_log_addr)(struct cec_adapter *adap, u8 logical_addr);
+	int (*adap_transmit)(struct cec_adapter *adap, u8 attempts,
+			     u32 signal_free_time, struct cec_msg *msg);
+	void (*adap_log_status)(struct cec_adapter *adap);
+
+	/* High-level callbacks */
+	...
+};
+
+The three low-level ops deal with various aspects of controlling the CEC adapter
+hardware:
+
+
+To enable/disable the hardware:
+
+	int (*adap_enable)(struct cec_adapter *adap, bool enable);
+
+This callback enables or disables the CEC hardware. Enabling the CEC hardware
+means powering it up in a state where no logical addresses are claimed. This
+op assumes that the physical address (adap->phys_addr) is valid when enable is
+true and will not change while the CEC adapter remains enabled. The initial
+state of the CEC adapter after calling cec_allocate_adapter() is disabled.
+
+Note that adap_enable must return 0 if enable is false.
+
+
+To enable/disable the 'monitor all' mode:
+
+	int (*adap_monitor_all_enable)(struct cec_adapter *adap, bool enable);
+
+If enabled, then the adapter should be put in a mode to also monitor messages
+that not for us. Not all hardware supports this and this function is only
+called if the CEC_CAP_MONITOR_ALL capability is set. This callback is optional
+(some hardware may always be in 'monitor all' mode).
+
+Note that adap_monitor_all_enable must return 0 if enable is false.
+
+
+To program a new logical address:
+
+	int (*adap_log_addr)(struct cec_adapter *adap, u8 logical_addr);
+
+If logical_addr == CEC_LOG_ADDR_INVALID then all programmed logical addresses
+are to be erased. Otherwise the given logical address should be programmed.
+If the maximum number of available logical addresses is exceeded, then it
+should return -ENXIO. Once a logical address is programmed the CEC hardware
+can receive directed messages to that address.
+
+Note that adap_log_addr must return 0 if logical_addr is CEC_LOG_ADDR_INVALID.
+
+
+To transmit a new message:
+
+	int (*adap_transmit)(struct cec_adapter *adap, u8 attempts,
+			     u32 signal_free_time, struct cec_msg *msg);
+
+This transmits a new message. The attempts argument is the suggested number of
+attempts for the transmit.
+
+The signal_free_time is the number of data bit periods that the adapter should
+wait when the line is free before attempting to send a message. This value
+depends on whether this transmit is a retry, a message from a new initiator or
+a new message for the same initiator. Most hardware will handle this
+automatically, but in some cases this information is needed.
+
+The CEC_FREE_TIME_TO_USEC macro can be used to convert signal_free_time to
+microseconds (one data bit period is 2.4 ms).
+
+
+To log the current CEC hardware status:
+
+	void (*adap_status)(struct cec_adapter *adap, struct seq_file *file);
+
+This optional callback can be used to show the status of the CEC hardware.
+The status is available through debugfs: cat /sys/kernel/debug/cec/cecX/status
+
+
+Your adapter driver will also have to react to events (typically interrupt
+driven) by calling into the framework in the following situations:
+
+When a transmit finished (successfully or otherwise):
+
+void cec_transmit_done(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt,
+		       u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt);
+
+The status can be one of:
+
+CEC_TX_STATUS_OK: the transmit was successful.
+CEC_TX_STATUS_ARB_LOST: arbitration was lost: another CEC initiator
+took control of the CEC line and you lost the arbitration.
+CEC_TX_STATUS_NACK: the message was nacked (for a directed message) or
+acked (for a broadcast message). A retransmission is needed.
+CEC_TX_STATUS_LOW_DRIVE: low drive was detected on the CEC bus. This
+indicates that a follower detected an error on the bus and requested a
+retransmission.
+CEC_TX_STATUS_ERROR: some unspecified error occurred: this can be one of
+the previous two if the hardware cannot differentiate or something else
+entirely.
+CEC_TX_STATUS_MAX_RETRIES: could not transmit the message after
+trying multiple times. Should only be set by the driver if it has hardware
+support for retrying messages. If set, then the framework assumes that it
+doesn't have to make another attempt to transmit the message since the
+hardware did that already.
+
+The *_cnt arguments are the number of error conditions that were seen.
+This may be 0 if no information is available. Drivers that do not support
+hardware retry can just set the counter corresponding to the transmit error
+to 1, if the hardware does support retry then either set these counters to
+0 if the hardware provides no feedback of which errors occurred and how many
+times, or fill in the correct values as reported by the hardware.
+
+When a CEC message was received:
+
+void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg);
+
+Speaks for itself.
+
+Implementing the High-Level CEC Adapter
+---------------------------------------
+
+The low-level operations drive the hardware, the high-level operations are
+CEC protocol driven. The following high-level callbacks are available:
+
+struct cec_adap_ops {
+	/* Low-level callbacks */
+	...
+
+	/* High-level CEC message callback */
+	int (*received)(struct cec_adapter *adap, struct cec_msg *msg);
+};
+
+The received() callback allows the driver to optionally handle a newly
+received CEC message
+
+	int (*received)(struct cec_adapter *adap, struct cec_msg *msg);
+
+If the driver wants to process a CEC message, then it can implement this
+callback. If it doesn't want to handle this message, then it should return
+-ENOMSG, otherwise the CEC framework assumes it processed this message and
+it will not no anything with it.
+
+
+CEC framework functions
+-----------------------
+
+CEC Adapter drivers can call the following CEC framework functions:
+
+int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg,
+		     bool block);
+
+Transmit a CEC message. If block is true, then wait until the message has been
+transmitted, otherwise just queue it and return.
+
+void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block);
+
+Change the physical address. This function will set adap->phys_addr and
+send an event if it has changed. If cec_s_log_addrs() has been called and
+the physical address has become valid, then the CEC framework will start
+claiming the logical addresses. If block is true, then this function won't
+return until this process has finished.
+
+When the physical address is set to a valid value the CEC adapter will
+be enabled (see the adap_enable op). When it is set to CEC_PHYS_ADDR_INVALID,
+then the CEC adapter will be disabled. If you change a valid physical address
+to another valid physical address, then this function will first set the
+address to CEC_PHYS_ADDR_INVALID before enabling the new physical address.
+
+int cec_s_log_addrs(struct cec_adapter *adap,
+		    struct cec_log_addrs *log_addrs, bool block);
+
+Claim the CEC logical addresses. Should never be called if CEC_CAP_LOG_ADDRS
+is set. If block is true, then wait until the logical addresses have been
+claimed, otherwise just queue it and return. To unconfigure all logical
+addresses call this function with log_addrs set to NULL or with
+log_addrs->num_log_addrs set to 0. The block argument is ignored when
+unconfiguring. This function will just return if the physical address is
+invalid. Once the physical address becomes valid, then the framework will
+attempt to claim these logical addresses.
diff --git a/Documentation/conf.py b/Documentation/conf.py
new file mode 100644
index 000000000000..792b6338ef19
--- /dev/null
+++ b/Documentation/conf.py
@@ -0,0 +1,414 @@
+# -*- coding: utf-8 -*-
+#
+# The Linux Kernel documentation build configuration file, created by
+# sphinx-quickstart on Fri Feb 12 13:51:46 2016.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('sphinx'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['kernel-doc', 'rstFlatTable']
+
+# Gracefully handle missing rst2pdf.
+try:
+    import rst2pdf
+    extensions += ['rst2pdf.pdfbuilder']
+except ImportError:
+    pass
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = 'The Linux Kernel'
+copyright = '2016, The kernel development community'
+author = 'The kernel development community'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# In a normal build, version and release are are set to KERNELVERSION and
+# KERNELRELEASE, respectively, from the Makefile via Sphinx command line
+# arguments.
+#
+# The following code tries to extract the information by reading the Makefile,
+# when Sphinx is run directly (e.g. by Read the Docs).
+try:
+    makefile_version = None
+    makefile_patchlevel = None
+    for line in open('../Makefile'):
+        key, val = [x.strip() for x in line.split('=', 2)]
+        if key == 'VERSION':
+            makefile_version = val
+        elif key == 'PATCHLEVEL':
+            makefile_patchlevel = val
+        if makefile_version and makefile_patchlevel:
+            break
+except:
+    pass
+finally:
+    if makefile_version and makefile_patchlevel:
+        version = release = makefile_version + '.' + makefile_patchlevel
+    else:
+        sys.stderr.write('Warning: Could not extract kernel version\n')
+        version = release = "unknown version"
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['output']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+primary_domain = 'C'
+highlight_language = 'C'
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+
+# The Read the Docs theme is available from
+# - https://github.com/snide/sphinx_rtd_theme
+# - https://pypi.python.org/pypi/sphinx_rtd_theme
+# - python-sphinx-rtd-theme package (on Debian)
+try:
+    import sphinx_rtd_theme
+    html_theme = 'sphinx_rtd_theme'
+    html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+except ImportError:
+    sys.stderr.write('Warning: The Sphinx \'sphinx_rtd_theme\' HTML theme was not found. Make sure you have the theme installed to produce pretty HTML output. Falling back to the default theme.\n')
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+#html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
+#html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# Now only 'ja' uses this config value
+#html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'TheLinuxKerneldoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+
+# Latex figure (float) alignment
+#'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'TheLinuxKernel.tex', 'The Linux Kernel Documentation',
+     'The kernel development community', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'thelinuxkernel', 'The Linux Kernel Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'TheLinuxKernel', 'The Linux Kernel Documentation',
+     author, 'TheLinuxKernel', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
+
+
+# -- Options for Epub output ----------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = project
+epub_author = author
+epub_publisher = author
+epub_copyright = copyright
+
+# The basename for the epub file. It defaults to the project name.
+#epub_basename = project
+
+# The HTML theme for the epub output. Since the default themes are not
+# optimized for small screen space, using the same theme for HTML and epub
+# output is usually not wise. This defaults to 'epub', a theme designed to save
+# visual space.
+#epub_theme = 'epub'
+
+# The language of the text. It defaults to the language option
+# or 'en' if the language is not set.
+#epub_language = ''
+
+# The scheme of the identifier. Typical schemes are ISBN or URL.
+#epub_scheme = ''
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#epub_identifier = ''
+
+# A unique identification for the text.
+#epub_uid = ''
+
+# A tuple containing the cover image and cover page html template filenames.
+#epub_cover = ()
+
+# A sequence of (type, uri, title) tuples for the guide element of content.opf.
+#epub_guide = ()
+
+# HTML files that should be inserted before the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_pre_files = []
+
+# HTML files that should be inserted after the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_post_files = []
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+
+# The depth of the table of contents in toc.ncx.
+#epub_tocdepth = 3
+
+# Allow duplicate toc entries.
+#epub_tocdup = True
+
+# Choose between 'default' and 'includehidden'.
+#epub_tocscope = 'default'
+
+# Fix unsupported image types using the Pillow.
+#epub_fix_images = False
+
+# Scale large images.
+#epub_max_image_width = 0
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#epub_show_urls = 'inline'
+
+# If false, no index is generated.
+#epub_use_index = True
+
+#=======
+# rst2pdf
+#
+# Grouping the document tree into PDF files. List of tuples
+# (source start file, target name, title, author, options).
+#
+# See the Sphinx chapter of http://ralsina.me/static/manual.pdf
+#
+# FIXME: Do not add the index file here; the result will be too big. Adding
+# multiple PDF files here actually tries to get the cross-referencing right
+# *between* PDF files.
+pdf_documents = [
+    ('index', u'Kernel', u'Kernel', u'J. Random Bozo'),
+]
+
+# kernel-doc extension configuration for running Sphinx directly (e.g. by Read
+# the Docs). In a normal build, these are supplied from the Makefile via command
+# line arguments.
+kerneldoc_bin = '../scripts/kernel-doc'
+kerneldoc_srctree = '..'
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
index ba78e7c2a069..4bc7287806de 100644
--- a/Documentation/cpu-freq/core.txt
+++ b/Documentation/cpu-freq/core.txt
@@ -96,7 +96,7 @@ new	- new frequency
 For details about OPP, see Documentation/power/opp.txt
 
 dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
-	cpufreq_frequency_table_cpuinfo which is provided with the list of
+	cpufreq_table_validate_and_show() which is provided with the list of
 	frequencies that are available for operation. This function provides
 	a ready to use conversion routine to translate the OPP layer's internal
 	information about the available frequencies into a format readily
@@ -110,7 +110,7 @@ dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
 		/* Do things */
 		r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
 		if (!r)
-			cpufreq_frequency_table_cpuinfo(policy, freq_table);
+			cpufreq_table_validate_and_show(policy, freq_table);
 		/* Do other things */
 	 }
 
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index 14f4e6336d88..772b94fde264 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -231,7 +231,7 @@ if you want to skip one entry in the table, set the frequency to
 CPUFREQ_ENTRY_INVALID. The entries don't need to be in ascending
 order.
 
-By calling cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
+By calling cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
 					struct cpufreq_frequency_table *table);
 the cpuinfo.min_freq and cpuinfo.max_freq values are detected, and
 policy->min and policy->max are set to the same values. This is
@@ -244,14 +244,12 @@ policy->max, and all other criteria are met. This is helpful for the
 ->verify call.
 
 int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-                                   struct cpufreq_frequency_table *table,
                                    unsigned int target_freq,
-                                   unsigned int relation,
-                                   unsigned int *index);
+                                   unsigned int relation);
 
 is the corresponding frequency table helper for the ->target
-stage. Just pass the values to this function, and the unsigned int
-index returns the number of the frequency table entry which contains
+stage. Just pass the values to this function, and this function
+returns the number of the frequency table entry which contains
 the frequency the CPU shall be set to.
 
 The following macros can be used as iterators over cpufreq_frequency_table:
diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt
index 0a94224ad296..9e3c3b33514c 100644
--- a/Documentation/cpu-freq/pcc-cpufreq.txt
+++ b/Documentation/cpu-freq/pcc-cpufreq.txt
@@ -159,8 +159,8 @@ to be strictly associated with a P-state.
 
 2.2 cpuinfo_transition_latency:
 -------------------------------
-The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
-does not include a field to expose this value currently.
+The cpuinfo_transition_latency field is 0. The PCC specification does
+not include a field to expose this value currently.
 
 2.3 cpuinfo_cur_freq:
 ---------------------
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 12b1b25b4da9..f722f227a73b 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -20,48 +20,70 @@ to /proc/cpuinfo output of some architectures:
 	identifier (rather than the kernel's).	The actual value is
 	architecture and platform dependent.
 
-4) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
+4) /sys/devices/system/cpu/cpuX/topology/drawer_id:
+
+	the drawer ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).	The actual value is
+	architecture and platform dependent.
+
+5) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	core as cpuX.
 
-5) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
+6) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	core as cpuX.
 
-6) /sys/devices/system/cpu/cpuX/topology/core_siblings:
+7) /sys/devices/system/cpu/cpuX/topology/core_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	physical_package_id.
 
-7) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
+8) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	physical_package_id.
 
-8) /sys/devices/system/cpu/cpuX/topology/book_siblings:
+9) /sys/devices/system/cpu/cpuX/topology/book_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	book_id.
 
-9) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
+10) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	book_id.
 
+11) /sys/devices/system/cpu/cpuX/topology/drawer_siblings:
+
+	internal kernel map of cpuX's hardware threads within the same
+	drawer_id.
+
+12) /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list:
+
+	human-readable list of cpuX's hardware threads within the same
+	drawer_id.
+
 To implement it in an architecture-neutral way, a new source file,
-drivers/base/topology.c, is to export the 6 or 9 attributes. The three book
-related sysfs files will only be created if CONFIG_SCHED_BOOK is selected.
+drivers/base/topology.c, is to export the 6 to 12 attributes. The book
+and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK
+and CONFIG_SCHED_DRAWER are selected.
+
+CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where
+they reflect the cpu and cache hierarchy.
 
 For an architecture to support this feature, it must define some of
 these macros in include/asm-XXX/topology.h:
 #define topology_physical_package_id(cpu)
 #define topology_core_id(cpu)
 #define topology_book_id(cpu)
+#define topology_drawer_id(cpu)
 #define topology_sibling_cpumask(cpu)
 #define topology_core_cpumask(cpu)
 #define topology_book_cpumask(cpu)
+#define topology_drawer_cpumask(cpu)
 
 The type of **_id macros is int.
 The type of **_cpumask macros is (const) struct cpumask *. The latter
@@ -78,6 +100,8 @@ not defined by include/asm-XXX/topology.h:
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
+For architectures that don't support drawes (CONFIG_SCHED_DRAWER) there are
+no default definitions for topology_drawer_id() and topology_drawer_cpumask().
 
 Additionally, CPU topology information is provided under
 /sys/devices/system/cpu and includes these files.  The internal
diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
index 8c07e0ea6bc0..2b7816dea370 100644
--- a/Documentation/crypto/asymmetric-keys.txt
+++ b/Documentation/crypto/asymmetric-keys.txt
@@ -76,7 +76,7 @@ the criterion string:
 Looking in /proc/keys, the last 8 hex digits of the key fingerprint are
 displayed, along with the subtype:
 
-	1a39e171 I-----     1 perm 3f010000     0     0 asymmetri modsign.0: DSA 5acc2142 []
+	1a39e171 I-----     1 perm 3f010000     0     0 asymmetric modsign.0: DSA 5acc2142 []
 
 
 =========================
diff --git a/Documentation/development-process/4.Coding b/Documentation/development-process/4.Coding
index e3cb6a56653a..9a3ee77cefb1 100644
--- a/Documentation/development-process/4.Coding
+++ b/Documentation/development-process/4.Coding
@@ -346,7 +346,7 @@ which have not been so documented, there is no harm in adding kerneldoc
 comments for the future; indeed, this can be a useful activity for
 beginning kernel developers.  The format of these comments, along with some
 information on how to create kerneldoc templates can be found in the file
-Documentation/kernel-doc-nano-HOWTO.txt.
+Documentation/kernel-documentation.rst.
 
 Anybody who reads through a significant amount of existing kernel code will
 note that, often, comments are most notable by their absence.  Once again,
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index df2d636b6088..e5b6497116f4 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -14,8 +14,12 @@ The target is named "raid" and it accepts the following parameters:
     <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
 
 <raid_type>:
+  raid0		RAID0 striping (no resilience)
   raid1		RAID1 mirroring
-  raid4		RAID4 dedicated parity disk
+  raid4		RAID4 with dedicated last parity disk
+  raid5_n 	RAID5 with dedicated last parity disk suporting takeover
+		Same as raid4
+		-Transitory layout
   raid5_la	RAID5 left asymmetric
 		- rotating parity 0 with data continuation
   raid5_ra	RAID5 right asymmetric
@@ -30,7 +34,19 @@ The target is named "raid" and it accepts the following parameters:
 		- rotating parity N (right-to-left) with data restart
   raid6_nc	RAID6 N continue
 		- rotating parity N (right-to-left) with data continuation
+  raid6_n_6	RAID6 with dedicate parity disks
+		- parity and Q-syndrome on the last 2 disks;
+		  laylout for takeover from/to raid4/raid5_n
+  raid6_la_6	Same as "raid_la" plus dedicated last Q-syndrome disk
+		- layout for takeover from raid5_la from/to raid6
+  raid6_ra_6	Same as "raid5_ra" dedicated last Q-syndrome disk
+		- layout for takeover from raid5_ra from/to raid6
+  raid6_ls_6	Same as "raid5_ls" dedicated last Q-syndrome disk
+		- layout for takeover from raid5_ls from/to raid6
+  raid6_rs_6	Same as "raid5_rs" dedicated last Q-syndrome disk
+		- layout for takeover from raid5_rs from/to raid6
   raid10        Various RAID10 inspired algorithms chosen by additional params
+		(see raid10_format and raid10_copies below)
 		- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
 		- RAID1E: Integrated Adjacent Stripe Mirroring
 		- RAID1E: Integrated Offset Stripe Mirroring
@@ -116,10 +132,41 @@ The target is named "raid" and it accepts the following parameters:
 		Here we see layouts closely akin to 'RAID1E - Integrated
 		Offset Stripe Mirroring'.
 
+        [delta_disks <N>]
+		The delta_disks option value (-251 < N < +251) triggers
+		device removal (negative value) or device addition (positive
+		value) to any reshape supporting raid levels 4/5/6 and 10.
+		RAID levels 4/5/6 allow for addition of devices (metadata
+		and data device tupel), raid10_near and raid10_offset only
+		allow for device addtion. raid10_far does not support any
+		reshaping at all.
+		A minimum of devices have to be kept to enforce resilience,
+		which is 3 devices for raid4/5 and 4 devices for raid6.
+
+        [data_offset <sectors>]
+		This option value defines the offset into each data device
+		where the data starts. This is used to provide out-of-place
+		reshaping space to avoid writing over data whilst
+		changing the layout of stripes, hence an interruption/crash
+		may happen at any time without the risk of losing data.
+		E.g. when adding devices to an existing raid set during
+		forward reshaping, the out-of-place space will be allocated
+		at the beginning of each raid device. The kernel raid4/5/6/10
+		MD personalities supporting such device addition will read the data from
+		the existing first stripes (those with smaller number of stripes)
+		starting at data_offset to fill up a new stripe with the larger
+		number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
+		and write that new stripe to offset 0. Same will be applied to all
+		N-1 other new stripes. This out-of-place scheme is used to change
+		the RAID type (i.e. the allocation algorithm) as well, e.g.
+		changing from raid5_ls to raid5_n.
+
 <#raid_devs>: The number of devices composing the array.
 	Each device consists of two entries.  The first is the device
 	containing the metadata (if any); the second is the one containing the
-	data.
+	data. A Maximum of 64 metadata/data device entries are supported
+	up to target version 1.8.0.
+	1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.
 
 	If a drive has failed or is missing at creation time, a '-' can be
 	given for both the metadata and data drives for a given position.
@@ -207,7 +254,6 @@ include:
 	"recover"- Initiate/continue a recover process.
 	"check"  - Initiate a check (i.e. a "scrub") of the array.
 	"repair" - Initiate a repair of the array.
-	"reshape"- Currently unsupported (-EINVAL).
 
 
 Discard Support
@@ -257,3 +303,9 @@ Version History
 1.5.2   'mismatch_cnt' is zero unless [last_]sync_action is "check".
 1.6.0   Add discard support (and devices_handle_discard_safely module param).
 1.7.0   Add support for MD RAID0 mappings.
+1.8.0   Explictely check for compatible flags in the superblock metadata
+	and reject to start the raid set if any are set by a newer
+	target version, thus avoiding data corruption on a raid set
+	with a reshape in progress.
+1.9.0   Add support for RAID level takeover/reshape/region size
+	and set size reduction.
diff --git a/Documentation/device-mapper/log-writes.txt b/Documentation/device-mapper/log-writes.txt
index c10f30c9b534..f4ebcbaf50f3 100644
--- a/Documentation/device-mapper/log-writes.txt
+++ b/Documentation/device-mapper/log-writes.txt
@@ -14,14 +14,14 @@ Log Ordering
 
 We log things in order of completion once we are sure the write is no longer in
 cache.  This means that normal WRITE requests are not actually logged until the
-next REQ_FLUSH request.  This is to make it easier for userspace to replay the
-log in a way that correlates to what is on disk and not what is in cache, to
-make it easier to detect improper waiting/flushing.
+next REQ_PREFLUSH request.  This is to make it easier for userspace to replay
+the log in a way that correlates to what is on disk and not what is in cache,
+to make it easier to detect improper waiting/flushing.
 
 This works by attaching all WRITE requests to a list once the write completes.
-Once we see a REQ_FLUSH request we splice this list onto the request and once
+Once we see a REQ_PREFLUSH request we splice this list onto the request and once
 the FLUSH request completes we log all of the WRITEs and then the FLUSH.  Only
-completed WRITEs, at the time the REQ_FLUSH is issued, are added in order to
+completed WRITEs, at the time the REQ_PREFLUSH is issued, are added in order to
 simulate the worst case scenario with regard to power failures.  Consider the
 following example (W means write, C means complete):
 
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
index 5a6b16070a33..b545856a444f 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
@@ -61,7 +61,9 @@ Required Properties:
 - #address-cells: must be 1
 - #size-cells: must be 1
 - interrupts : Should be single bit error interrupt, then double bit error
-	interrupt. Note the rising edge type.
+	interrupt.
+- interrupt-controller : boolean indicator that ECC Manager is an interrupt controller
+- #interrupt-cells : must be set to 2.
 - ranges : standard definition, should translate from local addresses
 
 Subcomponents:
@@ -70,11 +72,23 @@ L2 Cache ECC
 Required Properties:
 - compatible : Should be "altr,socfpga-a10-l2-ecc"
 - reg : Address and size for ECC error interrupt clear registers.
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
 
 On-Chip RAM ECC
 Required Properties:
 - compatible : Should be "altr,socfpga-a10-ocram-ecc"
 - reg        : Address and size for ECC block registers.
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+Ethernet FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-eth-mac-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent Ethernet node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
 
 Example:
 
@@ -85,15 +99,37 @@ Example:
 		#size-cells = <1>;
 		interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>,
 			     <0 0 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
 		ranges;
 
 		l2-ecc@ffd06010 {
 			compatible = "altr,socfpga-a10-l2-ecc";
 			reg = <0xffd06010 0x4>;
+			interrupts = <0 IRQ_TYPE_LEVEL_HIGH>,
+				     <32 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
 		ocram-ecc@ff8c3000 {
 			compatible = "altr,socfpga-a10-ocram-ecc";
 			reg = <0xff8c3000 0x90>;
+			interrupts = <1 IRQ_TYPE_LEVEL_HIGH>,
+				     <33 IRQ_TYPE_LEVEL_HIGH> ;
+		};
+
+		emac0-rx-ecc@ff8c0800 {
+			compatible = "altr,socfpga-eth-mac-ecc";
+			reg = <0xff8c0800 0x400>;
+			altr,ecc-parent = <&gmac0>;
+			interrupts = <4 IRQ_TYPE_LEVEL_HIGH>,
+				     <36 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		emac0-tx-ecc@ff8c0c00 {
+			compatible = "altr,socfpga-eth-mac-ecc";
+			reg = <0xff8c0c00 0x400>;
+			altr,ecc-parent = <&gmac0>;
+			interrupts = <5 IRQ_TYPE_LEVEL_HIGH>,
+				     <37 IRQ_TYPE_LEVEL_HIGH>;
 		};
 	};
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 74d5417d0410..61c8b4620415 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -39,7 +39,9 @@ Optional properties:
                        When using a PPI, specifies a list of phandles to CPU
 		       nodes corresponding to the set of CPUs which have
 		       a PMU of this type signalling the PPI listed in the
-		       interrupts property.
+		       interrupts property, unless this is already specified
+		       by the PPI interrupt specifier itself (in which case
+		       the interrupt-affinity property shouldn't be present).
 
                        This property should be present when there is more than
 		       a single SPI.
diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt
index 0f7b9c2109f8..c9b9321434ea 100644
--- a/Documentation/devicetree/bindings/arm/xen.txt
+++ b/Documentation/devicetree/bindings/arm/xen.txt
@@ -11,10 +11,32 @@ the following properties:
   memory where the grant table should be mapped to, using an
   HYPERVISOR_memory_op hypercall. The memory region is large enough to map
   the whole grant table (it is larger or equal to gnttab_max_grant_frames()).
+  This property is unnecessary when booting Dom0 using ACPI.
 
 - interrupts: the interrupt used by Xen to inject event notifications.
   A GIC node is also required.
+  This property is unnecessary when booting Dom0 using ACPI.
 
+To support UEFI on Xen ARM virtual platforms, Xen populates the FDT "uefi" node
+under /hypervisor with following parameters:
+
+________________________________________________________________________________
+Name                      | Size   | Description
+================================================================================
+xen,uefi-system-table     | 64-bit | Guest physical address of the UEFI System
+			  |	   | Table.
+--------------------------------------------------------------------------------
+xen,uefi-mmap-start       | 64-bit | Guest physical address of the UEFI memory
+			  |	   | map.
+--------------------------------------------------------------------------------
+xen,uefi-mmap-size        | 32-bit | Size in bytes of the UEFI memory map
+                          |        | pointed to in previous entry.
+--------------------------------------------------------------------------------
+xen,uefi-mmap-desc-size   | 32-bit | Size in bytes of each entry in the UEFI
+                          |        | memory map.
+--------------------------------------------------------------------------------
+xen,uefi-mmap-desc-ver    | 32-bit | Version of the mmap descriptor format.
+--------------------------------------------------------------------------------
 
 Example (assuming #address-cells = <2> and #size-cells = <2>):
 
@@ -22,4 +44,17 @@ hypervisor {
 	compatible = "xen,xen-4.3", "xen,xen";
 	reg = <0 0xb0000000 0 0x20000>;
 	interrupts = <1 15 0xf08>;
+	uefi {
+		xen,uefi-system-table = <0xXXXXXXXX>;
+		xen,uefi-mmap-start = <0xXXXXXXXX>;
+		xen,uefi-mmap-size = <0xXXXXXXXX>;
+		xen,uefi-mmap-desc-size = <0xXXXXXXXX>;
+		xen,uefi-mmap-desc-ver = <0xXXXXXXXX>;
+        };
 };
+
+The format and meaning of the "xen,uefi-*" parameters are similar to those in
+Documentation/arm/uefi.txt, which are provided by the regular UEFI stub. However
+they differ because they are provided by the Xen hypervisor, together with a set
+of UEFI runtime services implemented via hypercalls, see
+http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,platform.h.html.
diff --git a/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt
similarity index 81%
rename from Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt
rename to Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt
index 60872838f1ad..0a5b3b47f217 100644
--- a/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt
+++ b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt
@@ -1,13 +1,14 @@
-* Broadcom SATA3 AHCI Controller for STB
+* Broadcom SATA3 AHCI Controller
 
 SATA nodes are defined to describe on-chip Serial ATA controllers.
 Each SATA controller should have its own node.
 
 Required properties:
 - compatible         : should be one or more of
-                       "brcm,bcm7425-ahci"
-                       "brcm,bcm7445-ahci"
-                       "brcm,sata3-ahci"
+			"brcm,bcm7425-ahci"
+			"brcm,bcm7445-ahci"
+			"brcm,bcm-nsp-ahci"
+			"brcm,sata3-ahci"
 - reg                : register mappings for AHCI and SATA_TOP_CTRL
 - reg-names          : "ahci" and "top-ctrl"
 - interrupts         : interrupt mapping for SATA IRQ
diff --git a/Documentation/devicetree/bindings/extcon/extcon-arizona.txt b/Documentation/devicetree/bindings/extcon/extcon-arizona.txt
index e27341f8a4c7..7f3d94ae81ff 100644
--- a/Documentation/devicetree/bindings/extcon/extcon-arizona.txt
+++ b/Documentation/devicetree/bindings/extcon/extcon-arizona.txt
@@ -46,7 +46,8 @@ Optional properties:
     The second cell represents the MICBIAS to be used.
     The third cell represents the value of the micd-pol-gpio pin.
 
-  - wlf,gpsw : Settings for the general purpose switch
+  - wlf,gpsw : Settings for the general purpose switch, set as one of the
+    ARIZONA_GPSW_XXX defines.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt b/Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt
index 94ae9f82dcf8..fd42e7280f72 100644
--- a/Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt
+++ b/Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt
@@ -1,7 +1,7 @@
 * ARM Cirrus Logic CLPS711X SYSFLG1 MCTRL GPIOs
 
 Required properties:
-- compatible: Should contain "cirrus,clps711x-mctrl-gpio".
+- compatible: Should contain "cirrus,ep7209-mctrl-gpio".
 - gpio-controller: Marks the device node as a gpio controller.
 - #gpio-cells: Should be two. The first cell is the pin number and
   the second cell is used to specify the gpio polarity:
@@ -11,7 +11,7 @@ Required properties:
 Example:
 	sysgpio: sysgpio {
 		compatible = "cirrus,ep7312-mctrl-gpio",
-			     "cirrus,clps711x-mctrl-gpio";
+			     "cirrus,ep7209-mctrl-gpio";
 		gpio-controller;
 		#gpio-cells = <2>;
 	};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-clps711x.txt b/Documentation/devicetree/bindings/gpio/gpio-clps711x.txt
index e0d0446a6b78..0a304ad29d81 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-clps711x.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-clps711x.txt
@@ -1,7 +1,7 @@
 Cirrus Logic CLPS711X GPIO controller
 
 Required properties:
-- compatible: Should be "cirrus,clps711x-gpio"
+- compatible: Should be "cirrus,ep7209-gpio"
 - reg: Physical base GPIO controller registers location and length.
   There should be two registers, first is DATA register, the second
   is DIRECTION.
@@ -21,7 +21,7 @@ aliases {
 };
 
 porta: gpio@80000000 {
-	compatible = "cirrus,clps711x-gpio";
+	compatible = "cirrus,ep7312-gpio","cirrus,ep7209-gpio";
 	reg = <0x80000000 0x1>, <0x80000040 0x1>;
 	gpio-controller;
 	#gpio-cells = <2>;
diff --git a/Documentation/devicetree/bindings/gpio/gpio-max77620.txt b/Documentation/devicetree/bindings/gpio/gpio-max77620.txt
new file mode 100644
index 000000000000..410e716fd3d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-max77620.txt
@@ -0,0 +1,25 @@
+GPIO driver for MAX77620 Power management IC from Maxim Semiconductor.
+
+Device has 8 GPIO pins which can be configured as GPIO as well as the
+special IO functions.
+
+Required properties:
+-------------------
+- gpio-controller : 	Marks the device node as a gpio controller.
+- #gpio-cells : 	Should be two.  The first cell is the pin number and
+			the second cell is used to specify the gpio polarity:
+				0 = active high
+				1 = active low
+For more details, please refer generic GPIO DT binding document
+<devicetree/bindings/gpio/gpio.txt>.
+
+Example:
+--------
+#include <dt-bindings/mfd/max77620.h>
+...
+max77620@3c {
+	compatible = "maxim,max77620";
+
+	gpio-controller;
+	#gpio-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
index 6b4a98f74be3..08dd15f89ba9 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
@@ -21,6 +21,7 @@ Required properties:
 	maxim,max7313
 	maxim,max7315
 	ti,pca6107
+	ti,pca9536
 	ti,tca6408
 	ti,tca6416
 	ti,tca6424
diff --git a/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt b/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
index f60e2f477e93..8da26b35b5c3 100644
--- a/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
+++ b/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
@@ -7,6 +7,7 @@ Required Properties:
     - "renesas,gpio-r8a7779": for R8A7779 (R-Car H1) compatible GPIO controller.
     - "renesas,gpio-r8a7790": for R8A7790 (R-Car H2) compatible GPIO controller.
     - "renesas,gpio-r8a7791": for R8A7791 (R-Car M2-W) compatible GPIO controller.
+    - "renesas,gpio-r8a7792": for R8A7792 (R-Car V2H) compatible GPIO controller.
     - "renesas,gpio-r8a7793": for R8A7793 (R-Car M2-N) compatible GPIO controller.
     - "renesas,gpio-r8a7794": for R8A7794 (R-Car E2) compatible GPIO controller.
     - "renesas,gpio-r8a7795": for R8A7795 (R-Car H3) compatible GPIO controller.
diff --git a/Documentation/devicetree/bindings/hwmon/apm-xgene-hwmon.txt b/Documentation/devicetree/bindings/hwmon/apm-xgene-hwmon.txt
new file mode 100644
index 000000000000..59b38557f1bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/apm-xgene-hwmon.txt
@@ -0,0 +1,14 @@
+APM X-Gene hwmon driver
+
+APM X-Gene SOC sensors are accessed over the "SLIMpro" mailbox.
+
+Required properties :
+ - compatible : should be "apm,xgene-slimpro-hwmon"
+ - mboxes : use the label reference for the mailbox as the first parameter.
+	    The second parameter is the channel number.
+
+Example :
+	hwmonslimpro {
+		compatible = "apm,xgene-slimpro-hwmon";
+		mboxes = <&mailbox 7>;
+	};
diff --git a/Documentation/devicetree/bindings/hwmon/jc42.txt b/Documentation/devicetree/bindings/hwmon/jc42.txt
new file mode 100644
index 000000000000..07a250498fbb
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/jc42.txt
@@ -0,0 +1,42 @@
+Properties for Jedec JC-42.4 compatible temperature sensors
+
+Required properties:
+- compatible: May include a device-specific string consisting of the
+	      manufacturer and the name of the chip. A list of supported
+	      chip names follows.
+	      Must include "jedec,jc-42.4-temp" for any Jedec JC-42.4
+	      compatible temperature sensor.
+
+	      Supported chip names:
+		adi,adt7408
+		atmel,at30ts00
+		atmel,at30tse004
+		onnn,cat6095
+		onnn,cat34ts02
+		maxim,max6604
+		microchip,mcp9804
+		microchip,mcp9805
+		microchip,mcp9808
+		microchip,mcp98243
+		microchip,mcp98244
+		microchip,mcp9843
+		nxp,se97
+		nxp,se98
+		st,stts2002
+		st,stts2004
+		st,stts3000
+		st,stts424
+		st,stts424e
+		idt,tse2002
+		idt,tse2004
+		idt,ts3000
+		idt,ts3001
+
+- reg: I2C address
+
+Example:
+
+temp-sensor@1a {
+	compatible = "jedec,jc-42.4-temp";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
index 539874490492..acc5cd64711c 100644
--- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -56,6 +56,70 @@ maxim,ds1050		5 Bit Programmable, Pulse-Width Modulator
 maxim,max1237		Low-Power, 4-/12-Channel, 2-Wire Serial, 12-Bit ADCs
 maxim,max6625		9-Bit/12-Bit Temperature Sensors with IÂ²C-Compatible Serial Interface
 mc,rv3029c2		Real Time Clock Module with I2C-Bus
+microchip,mcp4531-502	Microchip 7-bit Single I2C Digital Potentiometer (5k)
+microchip,mcp4531-103	Microchip 7-bit Single I2C Digital Potentiometer (10k)
+microchip,mcp4531-503	Microchip 7-bit Single I2C Digital Potentiometer (50k)
+microchip,mcp4531-104	Microchip 7-bit Single I2C Digital Potentiometer (100k)
+microchip,mcp4532-502	Microchip 7-bit Single I2C Digital Potentiometer (5k)
+microchip,mcp4532-103	Microchip 7-bit Single I2C Digital Potentiometer (10k)
+microchip,mcp4532-503	Microchip 7-bit Single I2C Digital Potentiometer (50k)
+microchip,mcp4532-104	Microchip 7-bit Single I2C Digital Potentiometer (100k)
+microchip,mcp4541-502	Microchip 7-bit Single I2C Digital Potentiometer with NV Memory (5k)
+microchip,mcp4541-103	Microchip 7-bit Single I2C Digital Potentiometer with NV Memory (10k)
+microchip,mcp4541-503	Microchip 7-bit Single I2C Digital Potentiometer with NV Memory (50k)
+microchip,mcp4541-104	Microchip 7-bit Single I2C Digital Potentiometer with NV Memory (100k)
+microchip,mcp4542-502	Microchip 7-bit Single I2C Digital Potentiometer with NV Memory (5k)
+microchip,mcp4542-103	Microchip 7-bit Single I2C Digital Potentiometer with NV Memory (10k)
+microchip,mcp4542-503	Microchip 7-bit Single I2C Digital Potentiometer with NV Memory (50k)
+microchip,mcp4542-104	Microchip 7-bit Single I2C Digital Potentiometer with NV Memory (100k)
+microchip,mcp4551-502	Microchip 8-bit Single I2C Digital Potentiometer (5k)
+microchip,mcp4551-103	Microchip 8-bit Single I2C Digital Potentiometer (10k)
+microchip,mcp4551-503	Microchip 8-bit Single I2C Digital Potentiometer (50k)
+microchip,mcp4551-104	Microchip 8-bit Single I2C Digital Potentiometer (100k)
+microchip,mcp4552-502	Microchip 8-bit Single I2C Digital Potentiometer (5k)
+microchip,mcp4552-103	Microchip 8-bit Single I2C Digital Potentiometer (10k)
+microchip,mcp4552-503	Microchip 8-bit Single I2C Digital Potentiometer (50k)
+microchip,mcp4552-104	Microchip 8-bit Single I2C Digital Potentiometer (100k)
+microchip,mcp4561-502	Microchip 8-bit Single I2C Digital Potentiometer with NV Memory (5k)
+microchip,mcp4561-103	Microchip 8-bit Single I2C Digital Potentiometer with NV Memory (10k)
+microchip,mcp4561-503	Microchip 8-bit Single I2C Digital Potentiometer with NV Memory (50k)
+microchip,mcp4561-104	Microchip 8-bit Single I2C Digital Potentiometer with NV Memory (100k)
+microchip,mcp4562-502	Microchip 8-bit Single I2C Digital Potentiometer with NV Memory (5k)
+microchip,mcp4562-103	Microchip 8-bit Single I2C Digital Potentiometer with NV Memory (10k)
+microchip,mcp4562-503	Microchip 8-bit Single I2C Digital Potentiometer with NV Memory (50k)
+microchip,mcp4562-104	Microchip 8-bit Single I2C Digital Potentiometer with NV Memory (100k)
+microchip,mcp4631-502	Microchip 7-bit Dual I2C Digital Potentiometer (5k)
+microchip,mcp4631-103	Microchip 7-bit Dual I2C Digital Potentiometer (10k)
+microchip,mcp4631-503	Microchip 7-bit Dual I2C Digital Potentiometer (50k)
+microchip,mcp4631-104	Microchip 7-bit Dual I2C Digital Potentiometer (100k)
+microchip,mcp4632-502	Microchip 7-bit Dual I2C Digital Potentiometer (5k)
+microchip,mcp4632-103	Microchip 7-bit Dual I2C Digital Potentiometer (10k)
+microchip,mcp4632-503	Microchip 7-bit Dual I2C Digital Potentiometer (50k)
+microchip,mcp4632-104	Microchip 7-bit Dual I2C Digital Potentiometer (100k)
+microchip,mcp4641-502	Microchip 7-bit Dual I2C Digital Potentiometer with NV Memory (5k)
+microchip,mcp4641-103	Microchip 7-bit Dual I2C Digital Potentiometer with NV Memory (10k)
+microchip,mcp4641-503	Microchip 7-bit Dual I2C Digital Potentiometer with NV Memory (50k)
+microchip,mcp4641-104	Microchip 7-bit Dual I2C Digital Potentiometer with NV Memory (100k)
+microchip,mcp4642-502	Microchip 7-bit Dual I2C Digital Potentiometer with NV Memory (5k)
+microchip,mcp4642-103	Microchip 7-bit Dual I2C Digital Potentiometer with NV Memory (10k)
+microchip,mcp4642-503	Microchip 7-bit Dual I2C Digital Potentiometer with NV Memory (50k)
+microchip,mcp4642-104	Microchip 7-bit Dual I2C Digital Potentiometer with NV Memory (100k)
+microchip,mcp4651-502	Microchip 8-bit Dual I2C Digital Potentiometer (5k)
+microchip,mcp4651-103	Microchip 8-bit Dual I2C Digital Potentiometer (10k)
+microchip,mcp4651-503	Microchip 8-bit Dual I2C Digital Potentiometer (50k)
+microchip,mcp4651-104	Microchip 8-bit Dual I2C Digital Potentiometer (100k)
+microchip,mcp4652-502	Microchip 8-bit Dual I2C Digital Potentiometer (5k)
+microchip,mcp4652-103	Microchip 8-bit Dual I2C Digital Potentiometer (10k)
+microchip,mcp4652-503	Microchip 8-bit Dual I2C Digital Potentiometer (50k)
+microchip,mcp4652-104	Microchip 8-bit Dual I2C Digital Potentiometer (100k)
+microchip,mcp4661-502	Microchip 8-bit Dual I2C Digital Potentiometer with NV Memory (5k)
+microchip,mcp4661-103	Microchip 8-bit Dual I2C Digital Potentiometer with NV Memory (10k)
+microchip,mcp4661-503	Microchip 8-bit Dual I2C Digital Potentiometer with NV Memory (50k)
+microchip,mcp4661-104	Microchip 8-bit Dual I2C Digital Potentiometer with NV Memory (100k)
+microchip,mcp4662-502	Microchip 8-bit Dual I2C Digital Potentiometer with NV Memory (5k)
+microchip,mcp4662-103	Microchip 8-bit Dual I2C Digital Potentiometer with NV Memory (10k)
+microchip,mcp4662-503	Microchip 8-bit Dual I2C Digital Potentiometer with NV Memory (50k)
+microchip,mcp4662-104	Microchip 8-bit Dual I2C Digital Potentiometer with NV Memory (100k)
 national,lm63		Temperature sensor with integrated fan control
 national,lm75		I2C TEMP SENSOR
 national,lm80		Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor
diff --git a/Documentation/devicetree/bindings/iio/adc/brcm,iproc-static-adc.txt b/Documentation/devicetree/bindings/iio/adc/brcm,iproc-static-adc.txt
new file mode 100644
index 000000000000..caaaed765ce4
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/brcm,iproc-static-adc.txt
@@ -0,0 +1,41 @@
+* Broadcom's IPROC Static ADC controller
+
+Broadcom iProc ADC controller has 8 channels 10bit ADC.
+Allows user to convert analog input voltage values to digital.
+
+Required properties:
+
+- compatible: Must be "brcm,iproc-static-adc"
+
+- adc-syscon: Handler of syscon node defining physical base address of the
+  controller and length of memory mapped region.
+
+- #io-channel-cells = <1>; As ADC has multiple outputs
+  refer to Documentation/devicetree/bindings/iio/iio-bindings.txt for details.
+
+- io-channel-ranges:
+  refer to Documentation/devicetree/bindings/iio/iio-bindings.txt for details.
+
+- clocks: Clock used for this block.
+
+- clock-names: Clock name should be given as tsc_clk.
+
+- interrupts: interrupt line number.
+
+For example:
+
+	ts_adc_syscon: ts_adc_syscon@180a6000 {
+		compatible = "brcm,iproc-ts-adc-syscon","syscon";
+		reg = <0x180a6000 0xc30>;
+	};
+
+	adc: adc@180a6000 {
+		compatible = "brcm,iproc-static-adc";
+		adc-syscon = <&ts_adc_syscon>;
+		#io-channel-cells = <1>;
+		io-channel-ranges;
+		clocks = <&asiu_clks BCM_CYGNUS_ASIU_ADC_CLK>;
+		clock-names = "tsc_clk";
+		interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
+		status = "disabled";
+	};
diff --git a/Documentation/devicetree/bindings/iio/adc/max1363.txt b/Documentation/devicetree/bindings/iio/adc/max1363.txt
new file mode 100644
index 000000000000..94a9011dd860
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/max1363.txt
@@ -0,0 +1,63 @@
+* Maxim 1x3x/136x/116xx Analog to Digital Converter (ADC)
+
+The node for this driver must be a child node of a I2C controller, hence
+all mandatory properties for your controller must be specified. See directory:
+
+        Documentation/devicetree/bindings/i2c
+
+for more details.
+
+Required properties:
+  - compatible: Should be one of
+		"maxim,max1361"
+		"maxim,max1362"
+		"maxim,max1363"
+		"maxim,max1364"
+		"maxim,max1036"
+		"maxim,max1037"
+		"maxim,max1038"
+		"maxim,max1039"
+		"maxim,max1136"
+		"maxim,max1137"
+		"maxim,max1138"
+		"maxim,max1139"
+		"maxim,max1236"
+		"maxim,max1237"
+		"maxim,max1238"
+		"maxim,max1239"
+		"maxim,max11600"
+		"maxim,max11601"
+		"maxim,max11602"
+		"maxim,max11603"
+		"maxim,max11604"
+		"maxim,max11605"
+		"maxim,max11606"
+		"maxim,max11607"
+		"maxim,max11608"
+		"maxim,max11609"
+		"maxim,max11610"
+		"maxim,max11611"
+		"maxim,max11612"
+		"maxim,max11613"
+		"maxim,max11614"
+		"maxim,max11615"
+		"maxim,max11616"
+		"maxim,max11617"
+		"maxim,max11644"
+		"maxim,max11645"
+		"maxim,max11646"
+		"maxim,max11647"
+  - reg: Should contain the ADC I2C address
+
+Optional properties:
+  - vcc-supply: phandle to the regulator that provides power to the ADC.
+  - vref-supply: phandle to the regulator for ADC reference voltage.
+  - interrupts: IRQ line for the ADC. If not used the driver will use
+    polling.
+
+Example:
+adc: max11644@36 {
+	compatible = "maxim,max11644";
+	reg = <0x36>;
+	vref-supply = <&adc_vref>;
+};
diff --git a/Documentation/devicetree/bindings/iio/chemical/atlas,ec-sm.txt b/Documentation/devicetree/bindings/iio/chemical/atlas,ec-sm.txt
new file mode 100644
index 000000000000..2962bd9a2b3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/chemical/atlas,ec-sm.txt
@@ -0,0 +1,22 @@
+* Atlas Scientific EC-SM OEM sensor
+
+http://www.atlas-scientific.com/_files/_datasheets/_oem/EC_oem_datasheet.pdf
+
+Required properties:
+
+  - compatible: must be "atlas,ec-sm"
+  - reg: the I2C address of the sensor
+  - interrupt-parent: should be the phandle for the interrupt controller
+  - interrupts: the sole interrupt generated by the device
+
+  Refer to interrupt-controller/interrupts.txt for generic interrupt client
+  node bindings.
+
+Example:
+
+atlas@64 {
+	compatible = "atlas,ec-sm";
+	reg = <0x64>;
+	interrupt-parent = <&gpio1>;
+	interrupts = <16 2>;
+};
diff --git a/Documentation/devicetree/bindings/iio/dac/ad5755.txt b/Documentation/devicetree/bindings/iio/dac/ad5755.txt
new file mode 100644
index 000000000000..f0bbd7e1029b
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/dac/ad5755.txt
@@ -0,0 +1,124 @@
+* Analog Device AD5755 IIO Multi-Channel DAC Linux Driver
+
+Required properties:
+ - compatible: Has to contain one of the following:
+	adi,ad5755
+	adi,ad5755-1
+	adi,ad5757
+	adi,ad5735
+	adi,ad5737
+
+ - reg: spi chip select number for the device
+ - spi-cpha or spi-cpol: is the only modes that is supported
+
+Recommended properties:
+ - spi-max-frequency: Definition as per
+		Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Optional properties:
+See include/dt-bindings/iio/ad5755.h
+ - adi,ext-dc-dc-compenstation-resistor: boolean set if the hardware have an
+					 external resistor and thereby bypasses
+					 the internal compensation resistor.
+ - adi,dc-dc-phase:
+	Valid values for DC DC Phase control is:
+	0: All dc-to-dc converters clock on the same edge.
+	1: Channel A and Channel B clock on the same edge,
+	   Channel C and Channel D clock on opposite edges.
+	2: Channel A and Channel C clock on the same edge,
+	   Channel B and Channel D clock on opposite edges.
+	3: Channel A, Channel B, Channel C, and Channel D
+	   clock 90 degrees out of phase from each other.
+ - adi,dc-dc-freq-hz:
+	Valid values for DC DC frequency is [Hz]:
+	250000
+	410000
+	650000
+ - adi,dc-dc-max-microvolt:
+	Valid values for the maximum allowed Vboost voltage supplied by
+	the dc-to-dc converter is:
+	23000000
+	24500000
+	27000000
+	29500000
+
+Optional for every channel:
+ - adi,mode:
+	Valid values for DAC modes is:
+	0: 0 V to 5 V voltage range.
+	1: 0 V to 10 V voltage range.
+	2: Plus minus 5 V voltage range.
+	3: Plus minus 10 V voltage range.
+	4: 4 mA to 20 mA current range.
+	5: 0 mA to 20 mA current range.
+	6: 0 mA to 24 mA current range.
+ - adi,ext-current-sense-resistor: boolean set if the hardware a external
+				   current sense resistor.
+ - adi,enable-voltage-overrange: boolean enable voltage overrange
+ - adi,slew: Array of slewrate settings should contain 3 fields:
+	1: Should be either 0 or 1 in order to enable or disable slewrate.
+	2: Slew rate settings:
+		Valid values for the slew rate update frequency:
+		64000
+		32000
+		16000
+		8000
+		4000
+		2000
+		1000
+		500
+		250
+		125
+		64
+		32
+		16
+		8
+		4
+		0
+	3: Slew step size:
+		Valid values for the step size LSBs:
+		1
+		2
+		4
+		16
+		32
+		64
+		128
+		256
+
+Example:
+dac@0 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "adi,ad5755";
+	reg = <0>;
+	spi-max-frequency = <1000000>;
+	spi-cpha;
+	adi,dc-dc-phase = <0>;
+	adi,dc-dc-freq-hz = <410000>;
+	adi,dc-dc-max-microvolt = <23000000>;
+	channel@0 {
+		reg = <0>;
+		adi,mode = <4>;
+		adi,ext-current-sense-resistor;
+		adi,slew = <0 64000 1>;
+	};
+	channel@1 {
+		reg = <1>;
+		adi,mode = <4>;
+		adi,ext-current-sense-resistor;
+		adi,slew = <0 64000 1>;
+	};
+	channel@2 {
+		reg = <2>;
+		adi,mode = <4>;
+		adi,ext-current-sense-resistor;
+		adi,slew = <0 64000 1>;
+	};
+	channel@3 {
+		reg = <3>;
+		adi,mode = <4>;
+		adi,ext-current-sense-resistor;
+		adi,slew = <0 64000 1>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/iio/pressure/bmp085.txt b/Documentation/devicetree/bindings/iio/pressure/bmp085.txt
index d7a6deb6b21e..c7198a03c906 100644
--- a/Documentation/devicetree/bindings/iio/pressure/bmp085.txt
+++ b/Documentation/devicetree/bindings/iio/pressure/bmp085.txt
@@ -1,7 +1,11 @@
-BMP085/BMP18x digital pressure sensors
+BMP085/BMP18x/BMP28x digital pressure sensors
 
 Required properties:
-- compatible: bosch,bmp085
+- compatible: must be one of:
+  "bosch,bmp085"
+  "bosch,bmp180"
+  "bosch,bmp280"
+  "bosch,bme280"
 
 Optional properties:
 - chip-id: configurable chip id for non-default chip revisions
@@ -10,6 +14,10 @@ Optional properties:
   value range is 0-3 with rising sensitivity.
 - interrupt-parent: should be the phandle for the interrupt controller
 - interrupts: interrupt mapping for IRQ
+- reset-gpios: a GPIO line handling reset of the sensor: as the line is
+  active low, it should be marked GPIO_ACTIVE_LOW (see gpio/gpio.txt)
+- vddd-supply: digital voltage regulator (see regulator/regulator.txt)
+- vdda-supply: analog voltage regulator (see regulator/regulator.txt)
 
 Example:
 
@@ -21,4 +29,7 @@ pressure@77 {
 	default-oversampling = <2>;
 	interrupt-parent = <&gpio0>;
 	interrupts = <25 IRQ_TYPE_EDGE_RISING>;
+	reset-gpios = <&gpio0 26 GPIO_ACTIVE_LOW>;
+	vddd-supply = <&foo>;
+	vdda-supply = <&bar>;
 };
diff --git a/Documentation/devicetree/bindings/iio/st-sensors.txt b/Documentation/devicetree/bindings/iio/st-sensors.txt
index 5844cf72862d..e41fe340162b 100644
--- a/Documentation/devicetree/bindings/iio/st-sensors.txt
+++ b/Documentation/devicetree/bindings/iio/st-sensors.txt
@@ -64,3 +64,4 @@ Pressure sensors:
 - st,lps001wp-press
 - st,lps25h-press
 - st,lps331ap-press
+- st,lps22hb-press
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
index 793c20ff8fcc..5393e2a45a42 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
@@ -21,6 +21,7 @@ Main node required properties:
 	"arm,pl390"
 	"arm,tc11mp-gic"
 	"brcm,brahma-b15-gic"
+	"nvidia,tegra210-agic"
 	"qcom,msm-8660-qgic"
 	"qcom,msm-qgic2"
 - interrupt-controller : Identifies the node as an interrupt controller
@@ -68,7 +69,7 @@ Optional
 	"ic_clk" (for "arm,arm11mp-gic")
 	"PERIPHCLKEN" (for "arm,cortex-a15-gic")
 	"PERIPHCLK", "PERIPHCLKEN" (for "arm,cortex-a9-gic")
-	"clk" (for "arm,gic-400")
+	"clk" (for "arm,gic-400" and "nvidia,tegra210")
 	"gclk" (for "arm,pl390")
 
 - power-domains : A phandle and PM domain specifier as defined by bindings of
diff --git a/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.txt b/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.txt
new file mode 100644
index 000000000000..6c6e85324b9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.txt
@@ -0,0 +1,22 @@
+Aspeed Vectored Interrupt Controller
+
+These bindings are for the Aspeed AST2400 interrupt controller register layout.
+The SoC has an legacy register layout, but this driver does not support that
+mode of operation.
+
+Required properties:
+
+- compatible : should be "aspeed,ast2400-vic".
+
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The value shall be 1.
+
+Example:
+
+ vic: interrupt-controller@1e6c0080 {
+      compatible = "aspeed,ast2400-vic";
+      interrupt-controller;
+      #interrupt-cells = <1>;
+      reg = <0x1e6c0080 0x80>;
+ };
diff --git a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
new file mode 100644
index 000000000000..59a47a5b924b
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
@@ -0,0 +1,59 @@
+Mediatek Video Codec
+
+Mediatek Video Codec is the video codec hw present in Mediatek SoCs which
+supports high resolution encoding functionalities.
+
+Required properties:
+- compatible : "mediatek,mt8173-vcodec-enc" for encoder
+- reg : Physical base address of the video codec registers and length of
+  memory mapped region.
+- interrupts : interrupt number to the cpu.
+- mediatek,larb : must contain the local arbiters in the current Socs.
+- clocks : list of clock specifiers, corresponding to entries in
+  the clock-names property.
+- clock-names: encoder must contain "venc_sel_src", "venc_sel",
+- "venc_lt_sel_src", "venc_lt_sel".
+- iommus : should point to the respective IOMMU block with master port as
+  argument, see Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
+  for details.
+- mediatek,vpu : the node of video processor unit
+
+Example:
+vcodec_enc: vcodec@0x18002000 {
+    compatible = "mediatek,mt8173-vcodec-enc";
+    reg = <0 0x18002000 0 0x1000>,    /*VENC_SYS*/
+          <0 0x19002000 0 0x1000>;    /*VENC_LT_SYS*/
+    interrupts = <GIC_SPI 198 IRQ_TYPE_LEVEL_LOW>,
+		 <GIC_SPI 202 IRQ_TYPE_LEVEL_LOW>;
+    mediatek,larb = <&larb3>,
+		    <&larb5>;
+    iommus = <&iommu M4U_PORT_VENC_RCPU>,
+             <&iommu M4U_PORT_VENC_REC>,
+             <&iommu M4U_PORT_VENC_BSDMA>,
+             <&iommu M4U_PORT_VENC_SV_COMV>,
+             <&iommu M4U_PORT_VENC_RD_COMV>,
+             <&iommu M4U_PORT_VENC_CUR_LUMA>,
+             <&iommu M4U_PORT_VENC_CUR_CHROMA>,
+             <&iommu M4U_PORT_VENC_REF_LUMA>,
+             <&iommu M4U_PORT_VENC_REF_CHROMA>,
+             <&iommu M4U_PORT_VENC_NBM_RDMA>,
+             <&iommu M4U_PORT_VENC_NBM_WDMA>,
+             <&iommu M4U_PORT_VENC_RCPU_SET2>,
+             <&iommu M4U_PORT_VENC_REC_FRM_SET2>,
+             <&iommu M4U_PORT_VENC_BSDMA_SET2>,
+             <&iommu M4U_PORT_VENC_SV_COMA_SET2>,
+             <&iommu M4U_PORT_VENC_RD_COMA_SET2>,
+             <&iommu M4U_PORT_VENC_CUR_LUMA_SET2>,
+             <&iommu M4U_PORT_VENC_CUR_CHROMA_SET2>,
+             <&iommu M4U_PORT_VENC_REF_LUMA_SET2>,
+             <&iommu M4U_PORT_VENC_REC_CHROMA_SET2>;
+    mediatek,vpu = <&vpu>;
+    clocks = <&topckgen CLK_TOP_VENCPLL_D2>,
+             <&topckgen CLK_TOP_VENC_SEL>,
+             <&topckgen CLK_TOP_UNIVPLL1_D2>,
+             <&topckgen CLK_TOP_VENC_LT_SEL>;
+    clock-names = "venc_sel_src",
+                  "venc_sel",
+                  "venc_lt_sel_src",
+                  "venc_lt_sel";
+  };
diff --git a/Documentation/devicetree/bindings/media/mediatek-vpu.txt b/Documentation/devicetree/bindings/media/mediatek-vpu.txt
new file mode 100644
index 000000000000..2a5bac37f9a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/mediatek-vpu.txt
@@ -0,0 +1,31 @@
+* Mediatek Video Processor Unit
+
+Video Processor Unit is a HW video controller. It controls HW Codec including
+H.264/VP8/VP9 Decode, H.264/VP8 Encode and Image Processor (scale/rotate/color convert).
+
+Required properties:
+  - compatible: "mediatek,mt8173-vpu"
+  - reg: Must contain an entry for each entry in reg-names.
+  - reg-names: Must include the following entries:
+    "tcm": tcm base
+    "cfg_reg": Main configuration registers base
+  - interrupts: interrupt number to the cpu.
+  - clocks : clock name from clock manager
+  - clock-names: must be main. It is the main clock of VPU
+
+Optional properties:
+  - memory-region: phandle to a node describing memory (see
+    Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt)
+    to be used for VPU extended memory; if not present, VPU may be located
+    anywhere in the memory
+
+Example:
+	vpu: vpu@10020000 {
+		compatible = "mediatek,mt8173-vpu";
+		reg = <0 0x10020000 0 0x30000>,
+		      <0 0x10050000 0 0x100>;
+		reg-names = "tcm", "cfg_reg";
+		interrupts = <GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&topckgen TOP_SCP_SEL>;
+		clock-names = "main";
+	};
diff --git a/Documentation/devicetree/bindings/media/renesas,fcp.txt b/Documentation/devicetree/bindings/media/renesas,fcp.txt
new file mode 100644
index 000000000000..6a12960609d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/renesas,fcp.txt
@@ -0,0 +1,32 @@
+Renesas R-Car Frame Compression Processor (FCP)
+-----------------------------------------------
+
+The FCP is a companion module of video processing modules in the Renesas R-Car
+Gen3 SoCs. It provides data compression and decompression, data caching, and
+conversion of AXI transactions in order to reduce the memory bandwidth.
+
+There are three types of FCP: FCP for Codec (FCPC), FCP for VSP (FCPV) and FCP
+for FDP (FCPF). Their configuration and behaviour depend on the module they
+are paired with. These DT bindings currently support the FCPV only.
+
+ - compatible: Must be one or more of the following
+
+   - "renesas,r8a7795-fcpv" for R8A7795 (R-Car H3) compatible 'FCP for VSP'
+   - "renesas,fcpv" for generic compatible 'FCP for VSP'
+
+   When compatible with the generic version, nodes must list the
+   SoC-specific version corresponding to the platform first, followed by the
+   family-specific and/or generic versions.
+
+ - reg: the register base and size for the device registers
+ - clocks: Reference to the functional clock
+
+
+Device node example
+-------------------
+
+	fcpvd1: fcp@fea2f000 {
+		compatible = "renesas,r8a7795-fcpv", "renesas,fcpv";
+		reg = <0 0xfea2f000 0 0x200>;
+		clocks = <&cpg CPG_MOD 602>;
+	};
diff --git a/Documentation/devicetree/bindings/media/renesas,vsp1.txt b/Documentation/devicetree/bindings/media/renesas,vsp1.txt
index 627405abd144..9b695bcbf219 100644
--- a/Documentation/devicetree/bindings/media/renesas,vsp1.txt
+++ b/Documentation/devicetree/bindings/media/renesas,vsp1.txt
@@ -14,6 +14,11 @@ Required properties:
   - interrupts: VSP interrupt specifier.
   - clocks: A phandle + clock-specifier pair for the VSP functional clock.
 
+Optional properties:
+
+  - renesas,fcp: A phandle referencing the FCP that handles memory accesses
+                 for the VSP. Not needed on Gen2, mandatory on Gen3.
+
 
 Example: R8A7790 (R-Car H2) VSP1-S node
 
diff --git a/Documentation/devicetree/bindings/media/s5p-cec.txt b/Documentation/devicetree/bindings/media/s5p-cec.txt
new file mode 100644
index 000000000000..925ab4d72eaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/s5p-cec.txt
@@ -0,0 +1,31 @@
+* Samsung HDMI CEC driver
+
+The HDMI CEC module is present is Samsung SoCs and its purpose is to
+handle communication between HDMI connected devices over the CEC bus.
+
+Required properties:
+  - compatible : value should be following
+	"samsung,s5p-cec"
+
+  - reg : Physical base address of the IP registers and length of memory
+	  mapped region.
+
+  - interrupts : HDMI CEC interrupt number to the CPU.
+  - clocks : from common clock binding: handle to HDMI CEC clock.
+  - clock-names : from common clock binding: must contain "hdmicec",
+		  corresponding to entry in the clocks property.
+  - samsung,syscon-phandle - phandle to the PMU system controller
+
+Example:
+
+hdmicec: cec@100B0000 {
+	compatible = "samsung,s5p-cec";
+	reg = <0x100B0000 0x200>;
+	interrupts = <0 114 0>;
+	clocks = <&clock CLK_HDMI_CEC>;
+	clock-names = "hdmicec";
+	samsung,syscon-phandle = <&pmu_system_controller>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&hdmi_cec>;
+	status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/media/s5p-mfc.txt b/Documentation/devicetree/bindings/media/s5p-mfc.txt
index 2d5787eac91a..92c94f5ecbf1 100644
--- a/Documentation/devicetree/bindings/media/s5p-mfc.txt
+++ b/Documentation/devicetree/bindings/media/s5p-mfc.txt
@@ -21,15 +21,18 @@ Required properties:
   - clock-names : from common clock binding: must contain "mfc",
 		  corresponding to entry in the clocks property.
 
-  - samsung,mfc-r : Base address of the first memory bank used by MFC
-		    for DMA contiguous memory allocation and its size.
-
-  - samsung,mfc-l : Base address of the second memory bank used by MFC
-		    for DMA contiguous memory allocation and its size.
-
 Optional properties:
   - power-domains : power-domain property defined with a phandle
 			   to respective power domain.
+  - memory-region : from reserved memory binding: phandles to two reserved
+	memory regions, first is for "left" mfc memory bus interfaces,
+	second if for the "right" mfc memory bus, used when no SYSMMU
+	support is available
+
+Obsolete properties:
+  - samsung,mfc-r, samsung,mfc-l : support removed, please use memory-region
+	property instead
+
 
 Example:
 SoC specific DT entry:
@@ -43,9 +46,29 @@ mfc: codec@13400000 {
 	clock-names = "mfc";
 };
 
+Reserved memory specific DT entry for given board (see reserved memory binding
+for more information):
+
+reserved-memory {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	ranges;
+
+	mfc_left: region@51000000 {
+		compatible = "shared-dma-pool";
+		no-map;
+		reg = <0x51000000 0x800000>;
+	};
+
+	mfc_right: region@43000000 {
+		compatible = "shared-dma-pool";
+		no-map;
+		reg = <0x43000000 0x800000>;
+	};
+};
+
 Board specific DT entry:
 
 codec@13400000 {
-	samsung,mfc-r = <0x43000000 0x800000>;
-	samsung,mfc-l = <0x51000000 0x800000>;
+	memory-region = <&mfc_left>, <&mfc_right>;
 };
diff --git a/Documentation/devicetree/bindings/mfd/axp20x.txt b/Documentation/devicetree/bindings/mfd/axp20x.txt
index d20b1034e967..585a95546288 100644
--- a/Documentation/devicetree/bindings/mfd/axp20x.txt
+++ b/Documentation/devicetree/bindings/mfd/axp20x.txt
@@ -22,6 +22,11 @@ Optional properties:
 		      AXP152/20X: range:  750-1875, Default: 1.5 MHz
 		      AXP22X/80X: range: 1800-4050, Default: 3   MHz
 
+- x-powers,drive-vbus-en: axp221 / axp223 only boolean, set this when the
+		  N_VBUSEN pin is used as an output pin to control an external
+		  regulator to drive the OTG VBus, rather then as an input pin
+		  which signals whether the board is driving OTG VBus or not.
+
 - <input>-supply: a phandle to the regulator supply node. May be omitted if
 		  inputs are unregulated, such as using the IPSOUT output
 		  from the PMIC.
@@ -79,6 +84,7 @@ ELDO3		: LDO		: eldoin-supply		: shared supply
 LDO_IO0		: LDO		: ips-supply		: GPIO 0
 LDO_IO1		: LDO		: ips-supply		: GPIO 1
 RTC_LDO		: LDO		: ips-supply		: always on
+DRIVEVBUS	: Enable output	: drivevbus-supply	: external regulator
 
 AXP809 regulators, type, and corresponding input supply names:
 
diff --git a/Documentation/devicetree/bindings/mfd/rn5t618.txt b/Documentation/devicetree/bindings/mfd/rn5t618.txt
index 937785a3eddc..9e6770b105c9 100644
--- a/Documentation/devicetree/bindings/mfd/rn5t618.txt
+++ b/Documentation/devicetree/bindings/mfd/rn5t618.txt
@@ -1,18 +1,21 @@
-* Ricoh RN5T618 PMIC
+* Ricoh RN5T567/RN5T618 PMIC
 
-Ricoh RN5T618 is a power management IC which integrates 3 step-down
-DCDC converters, 7 low-dropout regulators, a Li-ion battery charger,
-fuel gauge, ADC, GPIOs and a watchdog timer. It can be controlled
-through a I2C interface.
+Ricoh RN5T567/RN5T618 is a power management IC family which integrates
+3 to 4 step-down DCDC converters, 7 low-dropout regulators, GPIOs and
+a watchdog timer. The RN5T618 provides additionally a Li-ion battery
+charger, fuel gauge and an ADC. It can be controlled through an I2C
+interface.
 
 Required properties:
- - compatible: should be "ricoh,rn5t618"
+ - compatible: must be one of
+		"ricoh,rn5t567"
+		"ricoh,rn5t618"
  - reg: the I2C slave address of the device
 
 Sub-nodes:
  - regulators: the node is required if the regulator functionality is
-   needed. The valid regulator names are: DCDC1, DCDC2, DCDC3, LDO1,
-   LDO2, LDO3, LDO4, LDO5, LDORTC1 and LDORTC2.
+   needed. The valid regulator names are: DCDC1, DCDC2, DCDC3, DCDC4
+   (RN5T567), LDO1, LDO2, LDO3, LDO4, LDO5, LDORTC1 and LDORTC2.
    The common bindings for each individual regulator can be found in:
    Documentation/devicetree/bindings/regulator/regulator.txt
 
diff --git a/Documentation/devicetree/bindings/misc/ramoops.txt b/Documentation/devicetree/bindings/misc/ramoops.txt
new file mode 100644
index 000000000000..cd02cec67d38
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/ramoops.txt
@@ -0,0 +1,48 @@
+Ramoops oops/panic logger
+=========================
+
+ramoops provides persistent RAM storage for oops and panics, so they can be
+recovered after a reboot. It is a backend to pstore, so this node is named
+"ramoops" after the backend, rather than "pstore" which is the subsystem.
+
+Parts of this storage may be set aside for other persistent log buffers, such
+as kernel log messages, or for optional ECC error-correction data.  The total
+size of these optional buffers must fit in the reserved region.
+
+Any remaining space will be used for a circular buffer of oops and panic
+records.  These records have a configurable size, with a size of 0 indicating
+that they should be disabled.
+
+At least one of "record-size", "console-size", "ftrace-size", or "pmsg-size"
+must be set non-zero, but are otherwise optional as listed below.
+
+
+Required properties:
+
+- compatible: must be "ramoops"
+
+- memory-region: phandle to a region of memory that is preserved between
+  reboots
+
+
+Optional properties:
+
+- ecc-size: enables ECC support and specifies ECC buffer size in bytes
+  (defaults to 0: no ECC)
+
+- record-size: maximum size in bytes of each dump done on oops/panic
+  (defaults to 0: disabled)
+
+- console-size: size in bytes of log buffer reserved for kernel messages
+  (defaults to 0: disabled)
+
+- ftrace-size: size in bytes of log buffer reserved for function tracing and
+  profiling (defaults to 0: disabled)
+
+- pmsg-size: size in bytes of log buffer reserved for userspace messages
+  (defaults to 0: disabled)
+
+- unbuffered: if present, use unbuffered mappings to map the reserved region
+  (defaults to buffered mappings)
+
+- no-dump-oops: if present, only dump panics (defaults to panics and oops)
diff --git a/Documentation/devicetree/bindings/net/apm-xgene-mdio.txt b/Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
new file mode 100644
index 000000000000..78722d74cea8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
@@ -0,0 +1,37 @@
+APM X-Gene SoC MDIO node
+
+MDIO node is defined to describe on-chip MDIO controller.
+
+Required properties:
+	- compatible: Must be "apm,xgene-mdio-rgmii" or "apm,xgene-mdio-xfi"
+	- #address-cells: Must be <1>.
+	- #size-cells: Must be <0>.
+	- reg: Address and length of the register set
+	- clocks: Reference to the clock entry
+
+For the phys on the mdio bus, there must be a node with the following fields:
+	- compatible: PHY identifier.  Please refer ./phy.txt for the format.
+	- reg: The ID number for the phy.
+
+Example:
+
+	mdio: mdio@17020000 {
+		compatible = "apm,xgene-mdio-rgmii";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x0 0x17020000 0x0 0xd100>;
+		clocks = <&menetclk 0>;
+	};
+
+	/* Board-specific peripheral configurations */
+	&mdio {
+		menetphy: phy@3 {
+			reg = <0x3>;
+		};
+		sgenet0phy: phy@4 {
+			reg = <0x4>;
+		};
+		sgenet1phy: phy@5 {
+			reg = <0x5>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.txt b/Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.txt
new file mode 100644
index 000000000000..dfe287a5d6f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.txt
@@ -0,0 +1,59 @@
+Properties for an MDIO bus multiplexer found in Broadcom iProc based SoCs.
+
+This MDIO bus multiplexer defines buses that could be internal as well as
+external to SoCs and could accept MDIO transaction compatible to C-22 or
+C-45 Clause. When child bus is selected, one needs to select these two
+properties as well to generate desired MDIO transaction on appropriate bus.
+
+Required properties in addition to the generic multiplexer properties:
+
+MDIO multiplexer node:
+- compatible: brcm,mdio-mux-iproc.
+
+Every non-ethernet PHY requires a compatible so that it could be probed based
+on this compatible string.
+
+Additional information regarding generic multiplexer properties can be found
+at- Documentation/devicetree/bindings/net/mdio-mux.txt
+
+
+for example:
+		mdio_mux_iproc: mdio-mux@6602023c {
+			compatible = "brcm,mdio-mux-iproc";
+			reg = <0x6602023c 0x14>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			mdio@0 {
+				reg = <0x0>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				pci_phy0: pci-phy@0 {
+					compatible = "brcm,ns2-pcie-phy";
+					reg = <0x0>;
+					#phy-cells = <0>;
+				};
+			};
+
+			mdio@7 {
+				reg = <0x7>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				pci_phy1: pci-phy@0 {
+					compatible = "brcm,ns2-pcie-phy";
+					reg = <0x0>;
+					#phy-cells = <0>;
+				};
+			};
+			mdio@10 {
+				reg = <0x10>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				gphy0: eth-phy@10 {
+					reg = <0x10>;
+				};
+			};
+		};
diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
new file mode 100644
index 000000000000..22a6f10bab05
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
@@ -0,0 +1,96 @@
+Renesas R-Car CAN FD controller Device Tree Bindings
+----------------------------------------------------
+
+Required properties:
+- compatible: Must contain one or more of the following:
+  - "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller.
+  - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
+
+  When compatible with the generic version, nodes must list the
+  SoC-specific version corresponding to the platform first, followed by the
+  family-specific and/or generic versions.
+
+- reg: physical base address and size of the R-Car CAN FD register map.
+- interrupts: interrupt specifier for the Global & Channel interrupts
+- clocks: phandles and clock specifiers for 3 clock inputs.
+- clock-names: 3 clock input name strings: "fck", "canfd", "can_clk".
+- pinctrl-0: pin control group to be used for this controller.
+- pinctrl-names: must be "default".
+
+Required child nodes:
+The controller supports two channels and each is represented as a child node.
+The name of the child nodes are "channel0" and "channel1" respectively. Each
+child node supports the "status" property only, which is used to
+enable/disable the respective channel.
+
+Required properties for "renesas,r8a7795-canfd" compatible:
+In R8A7795 SoC, canfd clock is a div6 clock and can be used by both CAN
+and CAN FD controller at the same time. It needs to be scaled to maximum
+frequency if any of these controllers use it. This is done using the
+below properties.
+
+- assigned-clocks: phandle of canfd clock.
+- assigned-clock-rates: maximum frequency of this clock.
+
+Optional property:
+The controller can operate in either CAN FD only mode (default) or
+Classical CAN only mode. The mode is global to both the channels. In order to
+enable the later, define the following optional property.
+ - renesas,no-can-fd: puts the controller in Classical CAN only mode.
+
+Example
+-------
+
+SoC common .dtsi file:
+
+		canfd: can@e66c0000 {
+			compatible = "renesas,r8a7795-canfd",
+				     "renesas,rcar-gen3-canfd";
+			reg = <0 0xe66c0000 0 0x8000>;
+			interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+				   <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&cpg CPG_MOD 914>,
+			       <&cpg CPG_CORE R8A7795_CLK_CANFD>,
+			       <&can_clk>;
+			clock-names = "fck", "canfd", "can_clk";
+			assigned-clocks = <&cpg CPG_CORE R8A7795_CLK_CANFD>;
+			assigned-clock-rates = <40000000>;
+			power-domains = <&cpg>;
+			status = "disabled";
+
+			channel0 {
+				status = "disabled";
+			};
+
+			channel1 {
+				status = "disabled";
+			};
+		};
+
+Board specific .dts file:
+
+E.g. below enables Channel 1 alone in the board in Classical CAN only mode.
+
+&canfd {
+	pinctrl-0 = <&canfd1_pins>;
+	pinctrl-names = "default";
+	renesas,no-can-fd;
+	status = "okay";
+
+	channel1 {
+		status = "okay";
+	};
+};
+
+E.g. below enables Channel 0 alone in the board using External clock
+as fCAN clock.
+
+&canfd {
+	pinctrl-0 = <&canfd0_pins &can_clk_pins>;
+	pinctrl-names = "default";
+	status = "okay";
+
+	channel0 {
+		status = "okay";
+	};
+};
diff --git a/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt b/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt
new file mode 100644
index 000000000000..c070076bacb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt
@@ -0,0 +1,13 @@
+* Cirrus Logic CS8900/CS8920 Network Controller
+
+Required properties:
+- compatible	: Should be "cirrus,cs8900" or "cirrus,cs8920".
+- reg		: Address and length of the IO space.
+- interrupts	: Should contain the controller interrupt line.
+
+Examples:
+	eth0: eth@10000000 {
+		compatible = "cirrus,cs8900";
+		reg = <0x10000000 0x400>;
+		interrupts = <10>;
+	};
diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 0ae06491b430..5ad439f30135 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -15,7 +15,6 @@ Required properties:
 - cpdma_channels 	: Specifies number of channels in CPDMA
 - ale_entries		: Specifies No of entries ALE can hold
 - bd_ram_size		: Specifies internal descriptor RAM size
-- rx_descs		: Specifies number of Rx descriptors
 - mac_control		: Specifies Default MAC control register content
 			  for the specific platform
 - slaves		: Specifies number for slaves
diff --git a/Documentation/devicetree/bindings/net/davinci-mdio.txt b/Documentation/devicetree/bindings/net/davinci-mdio.txt
index 0369e25aabd2..621156ca4ffd 100644
--- a/Documentation/devicetree/bindings/net/davinci-mdio.txt
+++ b/Documentation/devicetree/bindings/net/davinci-mdio.txt
@@ -2,7 +2,10 @@ TI SoC Davinci/Keystone2 MDIO Controller Device Tree Bindings
 ---------------------------------------------------
 
 Required properties:
-- compatible		: Should be "ti,davinci_mdio" or "ti,keystone_mdio"
+- compatible		: Should be "ti,davinci_mdio"
+			  and "ti,keystone_mdio" for Keystone 2 SoCs
+			  and "ti,cpsw-mdio" for am335x, am472x, am57xx/dra7, dm814x SoCs
+			  and "ti,am4372-mdio" for am472x SoC
 - reg			: physical base address and size of the davinci mdio
 			  registers map
 - bus_freq		: Mdio Bus frequency
diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt
new file mode 100644
index 000000000000..d6c6e41648d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/b53.txt
@@ -0,0 +1,97 @@
+Broadcom BCM53xx Ethernet switches
+==================================
+
+Required properties:
+
+- compatible: For external switch chips, compatible string must be exactly one
+  of: "brcm,bcm5325"
+      "brcm,bcm53115"
+      "brcm,bcm53125"
+      "brcm,bcm53128"
+      "brcm,bcm5365"
+      "brcm,bcm5395"
+      "brcm,bcm5397"
+      "brcm,bcm5398"
+
+  For the BCM5310x SoCs with an integrated switch, must be one of:
+      "brcm,bcm53010-srab"
+      "brcm,bcm53011-srab"
+      "brcm,bcm53012-srab"
+      "brcm,bcm53018-srab"
+      "brcm,bcm53019-srab" and the mandatory "brcm,bcm5301x-srab" string
+
+  For the BCM585xx/586XX/88312 SoCs with an integrated switch, must be one of:
+      "brcm,bcm58522-srab"
+      "brcm,bcm58523-srab"
+      "brcm,bcm58525-srab"
+      "brcm,bcm58622-srab"
+      "brcm,bcm58623-srab"
+      "brcm,bcm58625-srab"
+      "brcm,bcm88312-srab" and the mandatory "brcm,nsp-srab string
+
+  For the BCM63xx/33xx SoCs with an integrated switch, must be one of:
+      "brcm,bcm3384-switch"
+      "brcm,bcm6328-switch"
+      "brcm,bcm6368-switch" and the mandatory "brcm,bcm63xx-switch"
+
+See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional
+required and optional properties.
+
+Examples:
+
+Ethernet switch connected via MDIO to the host, CPU port wired to eth0:
+
+	eth0: ethernet@10001000 {
+		compatible = "brcm,unimac";
+		reg = <0x10001000 0x1000>;
+
+		fixed-link {
+			speed = <1000>;
+			duplex-full;
+		};
+	};
+
+	mdio0: mdio@10000000 {
+		compatible = "brcm,unimac-mdio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		switch0: ethernet-switch@30 {
+			compatible = "brcm,bcm53125";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			ports {
+				port0@0 {
+					reg = <0>;
+					label = "lan1";
+				};
+
+				port1@1 {
+					reg = <1>;
+					label = "lan2";
+				};
+
+				port5@5 {
+					reg = <5>;
+					label = "cable-modem";
+					fixed-link {
+						speed = <1000>;
+						duplex-full;
+					};
+					phy-mode = "rgmii-txid";
+				};
+
+				port8@8 {
+					reg = <8>;
+					label = "cpu";
+					fixed-link {
+						speed = <1000>;
+						duplex-full;
+					};
+					phy-mode = "rgmii-txid";
+					ethernet = <&eth0>;
+				};
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt
index 9f4807f90c31..9bbbe7f87d67 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.txt
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt
@@ -1,5 +1,279 @@
-Marvell Distributed Switch Architecture Device Tree Bindings
-------------------------------------------------------------
+Distributed Switch Architecture Device Tree Bindings
+----------------------------------------------------
+
+Two bindings exist, one of which has been deprecated due to
+limitations.
+
+Current Binding
+---------------
+
+Switches are true Linux devices and can be probes by any means. Once
+probed, they register to the DSA framework, passing a node
+pointer. This node is expected to fulfil the following binding, and
+may contain additional properties as required by the device it is
+embedded within.
+
+Required properties:
+
+- ports		: A container for child nodes representing switch ports.
+
+Optional properties:
+
+- dsa,member	: A two element list indicates which DSA cluster, and position
+		  within the cluster a switch takes. <0 0> is cluster 0,
+		  switch 0. <0 1> is cluster 0, switch 1. <1 0> is cluster 1,
+		  switch 0. A switch not part of any cluster (single device
+		  hanging off a CPU port) must not specify this property
+
+The ports container has the following properties
+
+Required properties:
+
+- #address-cells	: Must be 1
+- #size-cells		: Must be 0
+
+Each port children node must have the following mandatory properties:
+- reg			: Describes the port address in the switch
+- label			: Describes the label associated with this port, which
+                          will become the netdev name. Special labels are
+			  "cpu" to indicate a CPU port and "dsa" to
+			  indicate an uplink/downlink port between switches in
+			  the cluster.
+
+A port labelled "dsa" has the following mandatory property:
+
+- link			: Should be a list of phandles to other switch's DSA
+			  port. This port is used as the outgoing port
+			  towards the phandle ports. The full routing
+			  information must be given, not just the one hop
+			  routes to neighbouring switches.
+
+A port labelled "cpu" has the following mandatory property:
+
+- ethernet		: Should be a phandle to a valid Ethernet device node.
+                          This host device is what the switch port is
+			  connected to.
+
+Port child nodes may also contain the following optional standardised
+properties, described in binding documents:
+
+- phy-handle		: Phandle to a PHY on an MDIO bus. See
+			  Documentation/devicetree/bindings/net/ethernet.txt
+			  for details.
+
+- phy-mode		: See
+			  Documentation/devicetree/bindings/net/ethernet.txt
+			  for details.
+
+- fixed-link		: Fixed-link subnode describing a link to a non-MDIO
+			  managed entity. See
+			  Documentation/devicetree/bindings/net/fixed-link.txt
+			  for details.
+
+Example
+
+The following example shows three switches on three MDIO busses,
+linked into one DSA cluster.
+
+&mdio1 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	switch0: switch0@0 {
+		compatible = "marvell,mv88e6085";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0>;
+
+		dsa,member = <0 0>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			port@0 {
+				reg = <0>;
+				label = "lan0";
+			};
+
+			port@1 {
+				reg = <1>;
+				label = "lan1";
+			};
+
+			port@2 {
+				reg = <2>;
+				label = "lan2";
+			};
+
+			switch0port5: port@5 {
+				reg = <5>;
+				label = "dsa";
+				phy-mode = "rgmii-txid";
+				link = <&switch1port6
+					&switch2port9>;
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+
+			port@6 {
+				reg = <6>;
+				label = "cpu";
+				ethernet = <&fec1>;
+				fixed-link {
+					speed = <100>;
+					full-duplex;
+				};
+			};
+		};
+	};
+};
+
+&mdio2 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	switch1: switch1@0 {
+		compatible = "marvell,mv88e6085";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0>;
+
+		dsa,member = <0 1>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			port@0 {
+				reg = <0>;
+				label = "lan3";
+				phy-handle = <&switch1phy0>;
+			};
+
+			port@1 {
+				reg = <1>;
+				label = "lan4";
+				phy-handle = <&switch1phy1>;
+			};
+
+			port@2 {
+				reg = <2>;
+				label = "lan5";
+				phy-handle = <&switch1phy2>;
+			};
+
+			switch1port5: port@5 {
+				reg = <5>;
+				label = "dsa";
+				link = <&switch2port9>;
+				phy-mode = "rgmii-txid";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+
+			switch1port6: port@6 {
+				reg = <6>;
+				label = "dsa";
+				phy-mode = "rgmii-txid";
+				link = <&switch0port5>;
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+		};
+		mdio-bus {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			switch1phy0: switch1phy0@0 {
+				reg = <0>;
+			};
+			switch1phy1: switch1phy0@1 {
+				reg = <1>;
+			};
+			switch1phy2: switch1phy0@2 {
+				reg = <2>;
+			};
+		};
+	 };
+};
+
+&mdio4 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	switch2: switch2@0 {
+		compatible = "marvell,mv88e6085";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0>;
+
+		dsa,member = <0 2>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			port@0 {
+				reg = <0>;
+				label = "lan6";
+			};
+
+			port@1 {
+				reg = <1>;
+				label = "lan7";
+			};
+
+			port@2 {
+				reg = <2>;
+				label = "lan8";
+			};
+
+			port@3 {
+				reg = <3>;
+				label = "optical3";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+					link-gpios = <&gpio6 2
+					      GPIO_ACTIVE_HIGH>;
+				};
+			};
+
+			port@4 {
+				reg = <4>;
+				label = "optical4";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+					link-gpios = <&gpio6 3
+					      GPIO_ACTIVE_HIGH>;
+				};
+			};
+
+			switch2port9: port@9 {
+				reg = <9>;
+				label = "dsa";
+				phy-mode = "rgmii-txid";
+				link = <&switch1port5
+					&switch0port5>;
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+		};
+	};
+};
+
+Deprecated Binding
+------------------
+
+The deprecated binding makes use of a platform device to represent the
+switches. The switches themselves are not Linux devices, and make use
+of an MDIO bus for management.
 
 Required properties:
 - compatible		: Should be "marvell,dsa"
diff --git a/Documentation/devicetree/bindings/net/hisilicon-femac-mdio.txt b/Documentation/devicetree/bindings/net/hisilicon-femac-mdio.txt
new file mode 100644
index 000000000000..23a39a309d17
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/hisilicon-femac-mdio.txt
@@ -0,0 +1,22 @@
+Hisilicon Fast Ethernet MDIO Controller interface
+
+Required properties:
+- compatible: should be "hisilicon,hisi-femac-mdio".
+- reg: address and length of the register set for the device.
+- clocks: A phandle to the reference clock for this device.
+
+- PHY subnode: inherits from phy binding [1]
+[1] Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+mdio: mdio@10091100 {
+	compatible = "hisilicon,hisi-femac-mdio";
+	reg = <0x10091100 0x10>;
+	clocks = <&crg HI3516CV300_MDIO_CLK>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	phy0: phy@1 {
+		reg = <1>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/net/hisilicon-femac.txt b/Documentation/devicetree/bindings/net/hisilicon-femac.txt
new file mode 100644
index 000000000000..d11af5ecace8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/hisilicon-femac.txt
@@ -0,0 +1,39 @@
+Hisilicon Fast Ethernet MAC controller
+
+Required properties:
+- compatible: should contain one of the following version strings:
+	* "hisilicon,hisi-femac-v1"
+	* "hisilicon,hisi-femac-v2"
+	and the soc string "hisilicon,hi3516cv300-femac".
+- reg: specifies base physical address(s) and size of the device registers.
+  The first region is the MAC core register base and size.
+  The second region is the global MAC control register.
+- interrupts: should contain the MAC interrupt.
+- clocks: A phandle to the MAC main clock.
+- resets: should contain the phandle to the MAC reset signal(required) and
+	the PHY reset signal(optional).
+- reset-names: should contain the reset signal name "mac"(required)
+	and "phy"(optional).
+- mac-address: see ethernet.txt [1].
+- phy-mode: see ethernet.txt [1].
+- phy-handle: see ethernet.txt [1].
+- hisilicon,phy-reset-delays-us: triplet of delays if PHY reset signal given.
+	The 1st cell is reset pre-delay in micro seconds.
+	The 2nd cell is reset pulse in micro seconds.
+	The 3rd cell is reset post-delay in micro seconds.
+
+[1] Documentation/devicetree/bindings/net/ethernet.txt
+
+Example:
+	hisi_femac: ethernet@10090000 {
+		compatible = "hisilicon,hi3516cv300-femac","hisilicon,hisi-femac-v2";
+		reg = <0x10090000 0x1000>,<0x10091300 0x200>;
+		interrupts = <12>;
+		clocks = <&crg HI3518EV200_ETH_CLK>;
+		resets = <&crg 0xec 0>,<&crg 0xec 3>;
+		reset-names = "mac","phy";
+		mac-address = [00 00 00 00 00 00];
+		phy-mode = "mii";
+		phy-handle = <&phy0>;
+		hisilicon,phy-reset-delays-us = <10000 20000 20000>;
+	};
diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt
index b30ab6b5cbfa..04ba1dc34fd6 100644
--- a/Documentation/devicetree/bindings/net/keystone-netcp.txt
+++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt
@@ -2,7 +2,7 @@ This document describes the device tree bindings associated with the
 keystone network coprocessor(NetCP) driver support.
 
 The network coprocessor (NetCP) is a hardware accelerator that processes
-Ethernet packets. NetCP has a gigabit Ethernet (GbE) subsytem with a ethernet
+Ethernet packets. NetCP has a gigabit Ethernet (GbE) subsystem with a ethernet
 switch sub-module to send and receive packets. NetCP also includes a packet
 accelerator (PA) module to perform packet classification operations such as
 header matching, and packet modification operations such as checksum
diff --git a/Documentation/devicetree/bindings/net/mdio-mux.txt b/Documentation/devicetree/bindings/net/mdio-mux.txt
index 491f5bd55203..f58571f36570 100644
--- a/Documentation/devicetree/bindings/net/mdio-mux.txt
+++ b/Documentation/devicetree/bindings/net/mdio-mux.txt
@@ -5,11 +5,12 @@ numbered uniquely in a device dependent manner.  The nodes for an MDIO
 bus multiplexer/switch will have one child node for each child bus.
 
 Required properties:
-- mdio-parent-bus : phandle to the parent MDIO bus.
 - #address-cells = <1>;
 - #size-cells = <0>;
 
 Optional properties:
+- mdio-parent-bus : phandle to the parent MDIO bus.
+
 - Other properties specific to the multiplexer/switch hardware.
 
 Required properties for child nodes:
diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt
index 87496a8c64ab..8d157f0295a5 100644
--- a/Documentation/devicetree/bindings/net/micrel.txt
+++ b/Documentation/devicetree/bindings/net/micrel.txt
@@ -35,3 +35,13 @@ Optional properties:
 	supported clocks:
 	- KSZ8021, KSZ8031, KSZ8081, KSZ8091: "rmii-ref": The RMII reference
 	  input clock. Used to determine the XI input clock.
+
+ - micrel,fiber-mode: If present the PHY is configured to operate in fiber mode
+
+	Some PHYs, such as the KSZ8041FTL variant, support fiber mode, enabled
+	by the FXEN boot strapping pin. It can't be determined from the PHY
+	registers whether the PHY is in fiber mode, so this boolean device tree
+	property can be used to describe it.
+
+	In fiber mode, auto-negotiation is disabled and the PHY can only work in
+	100base-fx (full and half duplex) modes.
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index 93eac7ce1446..cccd945fc45b 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -3,7 +3,8 @@ Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC)
 The device node has following properties.
 
 Required properties:
- - compatible: Can be one of "rockchip,rk3288-gmac", "rockchip,rk3368-gmac"
+ - compatible: Can be one of "rockchip,rk3228-gmac", "rockchip,rk3288-gmac",
+                             "rockchip,rk3368-gmac"
  - reg: addresses and length of the register sets for the device.
  - interrupts: Should contain the GMAC interrupts.
  - interrupt-names: Should contain the interrupt names "macirq".
diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
index 72d82d684342..2e68a3cd8513 100644
--- a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
@@ -17,9 +17,26 @@ Required properties:
 Optional properties:
 altr,emac-splitter: Should be the phandle to the emac splitter soft IP node if
 		DWMAC controller is connected emac splitter.
+phy-mode: The phy mode the ethernet operates in
+altr,sgmii-to-sgmii-converter: phandle to the TSE SGMII converter
+
+This device node has additional phandle dependency, the sgmii converter:
+
+Required properties:
+ - compatible	: Should be altr,gmii-to-sgmii-2.0
+ - reg-names	: Should be "eth_tse_control_port"
 
 Example:
 
+gmii_to_sgmii_converter: phy@0x100000240 {
+	compatible = "altr,gmii-to-sgmii-2.0";
+	reg = <0x00000001 0x00000240 0x00000008>,
+		<0x00000001 0x00000200 0x00000040>;
+	reg-names = "eth_tse_control_port";
+	clocks = <&sgmii_1_clk_0 &emac1 1 &sgmii_clk_125 &sgmii_clk_125>;
+	clock-names = "tse_pcs_ref_clk_clock_connection", "tse_rx_cdr_refclk";
+};
+
 gmac0: ethernet@ff700000 {
 	compatible = "altr,socfpga-stmmac", "snps,dwmac-3.70a", "snps,dwmac";
 	altr,sysmgr-syscon = <&sysmgr 0x60 0>;
@@ -30,4 +47,6 @@ gmac0: ethernet@ff700000 {
 	mac-address = [00 00 00 00 00 00];/* Filled in by U-Boot */
 	clocks = <&emac_0_clk>;
 	clock-names = "stmmaceth";
+	phy-mode = "sgmii";
+	altr,gmii-to-sgmii-converter = <&gmii_to_sgmii_converter>;
 };
diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index 95816c5fc589..41b49e6075f5 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -47,6 +47,9 @@ Optional properties:
 				supported by this device instance
 - snps,perfect-filter-entries:	Number of perfect filter entries supported
 				by this device instance
+- snps,ps-speed: port selection speed that can be passed to the core when
+		 PCS is supported. For example, this is used in case of SGMII
+		 and MAC2MAC connection.
 - AXI BUS Mode parameters: below the list of all the parameters to program the
 			   AXI register inside the DMA module:
 	- snps,lpi_en: enable Low Power Interface
diff --git a/Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt b/Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt
index 9180724e182c..8f9ced076fe1 100644
--- a/Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt
+++ b/Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt
@@ -1,19 +1,30 @@
-* Texas Instruments wl1271 wireless lan controller
+* Texas Instruments wl12xx/wl18xx wireless lan controller
 
-The wl1271 chip can be connected via SPI or via SDIO. This
+The wl12xx/wl18xx chips can be connected via SPI or via SDIO. This
 document describes the binding for the SPI connected chip.
 
 Required properties:
-- compatible :          Should be "ti,wl1271"
+- compatible :          Should be one of the following:
+    * "ti,wl1271"
+    * "ti,wl1273"
+    * "ti,wl1281"
+    * "ti,wl1283"
+    * "ti,wl1801"
+    * "ti,wl1805"
+    * "ti,wl1807"
+    * "ti,wl1831"
+    * "ti,wl1835"
+    * "ti,wl1837"
 - reg :                 Chip select address of device
 - spi-max-frequency :   Maximum SPI clocking speed of device in Hz
-- ref-clock-frequency : Reference clock frequency
 - interrupt-parent, interrupts :
                         Should contain parameters for 1 interrupt line.
                         Interrupt parameters: parent, line number, type.
-- vwlan-supply :        Point the node of the regulator that powers/enable the wl1271 chip
+- vwlan-supply :        Point the node of the regulator that powers/enable the
+                        wl12xx/wl18xx chip
 
 Optional properties:
+- ref-clock-frequency : Reference clock frequency (should be set for wl12xx)
 - clock-xtal :          boolean, clock is generated from XTAL
 
 - Please consult Documentation/devicetree/bindings/spi/spi-bus.txt
@@ -21,16 +32,28 @@ Optional properties:
 
 Examples:
 
+For wl12xx family:
 &spi1 {
-	wl1271@1 {
+	wlcore: wlcore@1 {
 		compatible = "ti,wl1271";
-
 		reg = <1>;
 		spi-max-frequency = <48000000>;
-		clock-xtal;
-		ref-clock-frequency = <38400000>;
 		interrupt-parent = <&gpio3>;
 		interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
 		vwlan-supply = <&vwlan_fixed>;
+		clock-xtal;
+		ref-clock-frequency = <38400000>;
+	};
+};
+
+For wl18xx family:
+&spi0 {
+	wlcore: wlcore@0 {
+		compatible = "ti,wl1835";
+		reg = <0>;
+		spi-max-frequency = <48000000>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <27 IRQ_TYPE_EDGE_RISING>;
+		vwlan-supply = <&vwlan_fixed>;
 	};
 };
diff --git a/Documentation/devicetree/bindings/phy/brcm,mdio-mux-bus-pci.txt b/Documentation/devicetree/bindings/phy/brcm,mdio-mux-bus-pci.txt
new file mode 100644
index 000000000000..5b51007c6f24
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/brcm,mdio-mux-bus-pci.txt
@@ -0,0 +1,27 @@
+* Broadcom NS2 PCIe PHY binding document
+
+Required bus properties:
+- reg: MDIO Bus number for the MDIO interface
+- #address-cells: must be 1
+- #size-cells: must be 0
+
+Required PHY properties:
+- compatible: should be "brcm,ns2-pcie-phy"
+- reg: MDIO Phy ID for the MDIO interface
+- #phy-cells: must be 0
+
+This is a child bus node of "brcm,mdio-mux-iproc" node.
+
+Example:
+
+mdio@0 {
+	reg = <0x0>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	pci_phy0: pci-phy@0 {
+		compatible = "brcm,ns2-pcie-phy";
+		reg = <0x0>;
+		#phy-cells = <0>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
index d0231209d846..6ccce09d8bbf 100644
--- a/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
+++ b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
@@ -5,6 +5,7 @@ Required properties:
      "brcm,bcm7425-sata-phy"
      "brcm,bcm7445-sata-phy"
      "brcm,iproc-ns2-sata-phy"
+     "brcm,iproc-nsp-sata-phy"
      "brcm,phy-sata3"
 - address-cells: should be 1
 - size-cells: should be 0
@@ -22,7 +23,8 @@ Sub-nodes required properties:
 
 Sub-nodes optional properties:
 - brcm,enable-ssc: use spread spectrum clocking (SSC) on this port
-     This property is not applicable for "brcm,iproc-ns2-sata-phy".
+     This property is not applicable for "brcm,iproc-ns2-sata-phy" and
+     "brcm,iproc-nsp-sata-phy".
 
 Example:
 	sata-phy@f0458100 {
diff --git a/Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt b/Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt
new file mode 100644
index 000000000000..c26478be391b
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt
@@ -0,0 +1,40 @@
+TI DA8xx/OMAP-L1xx/AM18xx USB PHY
+
+Required properties:
+ - compatible: must be "ti,da830-usb-phy".
+ - #phy-cells: must be 1.
+
+This device controls the PHY for both the USB 1.1 OHCI and USB 2.0 OTG
+controllers on DA8xx SoCs. Consumers of this device should use index 0 for
+the USB 2.0 phy device and index 1 for the USB 1.1 phy device.
+
+It also requires a "syscon" node with compatible = "ti,da830-cfgchip", "syscon"
+to access the CFGCHIP2 register.
+
+Example:
+
+	cfgchip: cfgchip@1417c {
+		compatible = "ti,da830-cfgchip", "syscon";
+		reg = <0x1417c 0x14>;
+	};
+
+	usb_phy: usb-phy {
+		compatible = "ti,da830-usb-phy";
+		#phy-cells = <1>;
+	};
+
+	usb20: usb@200000 {
+		compatible = "ti,da830-musb";
+		reg = <0x200000 0x1000>;
+		interrupts = <58>;
+		phys = <&usb_phy 0>;
+		phy-names = "usb-phy";
+	};
+
+	usb11: usb@225000 {
+		compatible = "ti,da830-ohci";
+		reg = <0x225000 0x1000>;
+		interrupts = <59>;
+		phys = <&usb_phy 1>;
+		phy-names = "usb-phy";
+	};
diff --git a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt
index 68498d560354..cc6be9680a6d 100644
--- a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt
+++ b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt
@@ -5,11 +5,13 @@ Required properties:
      "rockchip,rk3066a-usb-phy"
      "rockchip,rk3188-usb-phy"
      "rockchip,rk3288-usb-phy"
- - rockchip,grf : phandle to the syscon managing the "general
-   register files"
  - #address-cells: should be 1
  - #size-cells: should be 0
 
+Deprecated properties:
+ - rockchip,grf : phandle to the syscon managing the "general
+   register files" - phy should be a child of the GRF instead
+
 Sub-nodes:
 Each PHY should be represented as a sub-node.
 
@@ -28,14 +30,19 @@ Optional Properties:
 
 Example:
 
-usbphy: phy {
-	compatible = "rockchip,rk3288-usb-phy";
-	rockchip,grf = <&grf>;
-	#address-cells = <1>;
-	#size-cells = <0>;
+grf: syscon@ff770000 {
+	compatible = "rockchip,rk3288-grf", "syscon", "simple-mfd";
+
+...
+
+	usbphy: phy {
+		compatible = "rockchip,rk3288-usb-phy";
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-	usbphy0: usb-phy0 {
-		#phy-cells = <0>;
-		reg = <0x320>;
+		usbphy0: usb-phy0 {
+			#phy-cells = <0>;
+			reg = <0x320>;
+		};
 	};
 };
diff --git a/Documentation/devicetree/bindings/power/max8903-charger.txt b/Documentation/devicetree/bindings/power/max8903-charger.txt
new file mode 100644
index 000000000000..f0f4e12b076e
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/max8903-charger.txt
@@ -0,0 +1,25 @@
+Maxim Semiconductor MAX8903 Battery Charger bindings
+
+Required properties:
+- compatible: "maxim,max8903" for MAX8903 Battery Charger
+- dok-gpios: Valid DC power has been detected (active low, input), optional if uok-gpios is provided
+- uok-gpios: Valid USB power has been detected (active low, input), optional if dok-gpios is provided
+
+Optional properties:
+- cen-gpios: Charge enable pin (active low, output)
+- chg-gpios: Charger status pin (active low, input)
+- flt-gpios: Fault pin (active low, output)
+- dcm-gpios: Current limit mode setting (DC=1 or USB=0, output)
+- usus-gpios: USB suspend pin (active high, output)
+
+
+Example:
+
+	max8903-charger {
+		compatible = "maxim,max8903";
+		dok-gpios = <&gpio2 3 GPIO_ACTIVE_LOW>;
+		flt-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>;
+		chg-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>;
+		cen-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>;
+		status = "okay";
+	};
diff --git a/Documentation/devicetree/bindings/reset/brcm,bcm21664-resetmgr.txt b/Documentation/devicetree/bindings/power/reset/brcm,bcm21664-resetmgr.txt
similarity index 100%
rename from Documentation/devicetree/bindings/reset/brcm,bcm21664-resetmgr.txt
rename to Documentation/devicetree/bindings/power/reset/brcm,bcm21664-resetmgr.txt
diff --git a/Documentation/devicetree/bindings/power/reset/reboot-mode.txt b/Documentation/devicetree/bindings/power/reset/reboot-mode.txt
new file mode 100644
index 000000000000..de34f27d509e
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/reboot-mode.txt
@@ -0,0 +1,25 @@
+Generic reboot mode core map driver
+
+This driver get reboot mode arguments and call the write
+interface to store the magic value in special register
+or ram. Then the bootloader can read it and take different
+action according to the argument stored.
+
+All mode properties are vendor specific, it is a indication to tell
+the bootloader what to do when the system reboots, and should be named
+as mode-xxx = <magic> (xxx is mode name, magic should be a none-zero value).
+
+For example modes common on Android platform:
+- mode-normal: Normal reboot mode, system reboot with command "reboot".
+- mode-recovery: Android Recovery mode, it is a mode to format the device or update a new image.
+- mode-bootloader: Android fastboot mode, it's a mode to re-flash partitions on the Android based device.
+- mode-loader: A bootloader mode, it's a mode used to download image on Rockchip platform,
+	       usually used in development.
+
+Example:
+	reboot-mode {
+		mode-normal = <BOOT_NORMAL>;
+		mode-recovery = <BOOT_RECOVERY>;
+		mode-bootloader = <BOOT_FASTBOOT>;
+		mode-loader = <BOOT_BL_DOWNLOAD>;
+	}
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.txt b/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.txt
new file mode 100644
index 000000000000..f7ce1d8af04a
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.txt
@@ -0,0 +1,35 @@
+SYSCON reboot mode driver
+
+This driver gets reboot mode magic value form reboot-mode driver
+and stores it in a SYSCON mapped register. Then the bootloader
+can read it and take different action according to the magic
+value stored.
+
+This DT node should be represented as a sub-node of a "syscon", "simple-mfd"
+node.
+
+Required properties:
+- compatible: should be "syscon-reboot-mode"
+- offset: offset in the register map for the storage register (in bytes)
+
+Optional property:
+- mask: bits mask of the bits in the register to store the reboot mode magic value,
+  default set to 0xffffffff if missing.
+
+The rest of the properties should follow the generic reboot-mode description
+found in reboot-mode.txt
+
+Example:
+	pmu: pmu@20004000 {
+		compatible = "rockchip,rk3066-pmu", "syscon", "simple-mfd";
+		reg = <0x20004000 0x100>;
+
+		reboot-mode {
+			compatible = "syscon-reboot-mode";
+			offset = <0x40>;
+			mode-normal = <BOOT_NORMAL>;
+			mode-recovery = <BOOT_RECOVERY>;
+			mode-bootloader = <BOOT_FASTBOOT>;
+			mode-loader = <BOOT_BL_DOWNLOAD>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/power_supply/axp20x_usb_power.txt b/Documentation/devicetree/bindings/power_supply/axp20x_usb_power.txt
index 862f4a49dc49..f1d7beec45bf 100644
--- a/Documentation/devicetree/bindings/power_supply/axp20x_usb_power.txt
+++ b/Documentation/devicetree/bindings/power_supply/axp20x_usb_power.txt
@@ -1,7 +1,8 @@
 AXP20x USB power supply
 
 Required Properties:
--compatible: "x-powers,axp202-usb-power-supply"
+-compatible: One of: "x-powers,axp202-usb-power-supply"
+                     "x-powers,axp221-usb-power-supply"
 
 This node is a subnode of the axp20x PMIC.
 
diff --git a/Documentation/devicetree/bindings/regulator/da9210.txt b/Documentation/devicetree/bindings/regulator/da9210.txt
index 7aa9b1fa6b21..58065ca9e3b4 100644
--- a/Documentation/devicetree/bindings/regulator/da9210.txt
+++ b/Documentation/devicetree/bindings/regulator/da9210.txt
@@ -1,4 +1,4 @@
-* Dialog Semiconductor DA9210 Voltage Regulator
+* Dialog Semiconductor DA9210 Multi-phase 12A DCDC BUCK Converter
 
 Required properties:
 
@@ -18,8 +18,12 @@ Example:
 		compatible = "dlg,da9210";
 		reg = <0x68>;
 
-		regulator-min-microvolt = <900000>;
-		regulator-max-microvolt = <1000000>;
+		interrupt-parent = <...>;
+		interrupts = <...>;
+
+		regulator-min-microvolt = <300000>;
+		regulator-max-microvolt = <1570000>;
+		regulator-min-microamp = <1600000>;
+		regulator-max-microamp = <4600000>;
 		regulator-boot-on;
-		regulator-always-on;
 	};
diff --git a/Documentation/devicetree/bindings/regulator/da9211.txt b/Documentation/devicetree/bindings/regulator/da9211.txt
index c620493e8dbe..0f2a6f8fcafd 100644
--- a/Documentation/devicetree/bindings/regulator/da9211.txt
+++ b/Documentation/devicetree/bindings/regulator/da9211.txt
@@ -1,7 +1,8 @@
-* Dialog Semiconductor DA9211/DA9213/DA9215 Voltage Regulator
+* Dialog Semiconductor DA9211/DA9212/DA9213/DA9214/DA9215 Voltage Regulator
 
 Required properties:
-- compatible: "dlg,da9211" or "dlg,da9213" or "dlg,da9215"
+- compatible: "dlg,da9211" or "dlg,da9212" or "dlg,da9213"
+  or "dlg,da9214" or "dlg,da9215"
 - reg: I2C slave address, usually 0x68.
 - interrupts: the interrupt outputs of the controller
 - regulators: A node that houses a sub-node for each regulator within the
@@ -21,6 +22,25 @@ Example 1) DA9211
 		reg = <0x68>;
 		interrupts = <3 27>;
 
+		regulators {
+			BUCKA {
+				regulator-name = "VBUCKA";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <2000000>;
+				regulator-max-microamp 	= <5000000>;
+				enable-gpios = <&gpio 27 0>;
+			};
+		};
+	};
+
+Example 2) DA9212
+
+	pmic: da9212@68 {
+		compatible = "dlg,da9212";
+		reg = <0x68>;
+		interrupts = <3 27>;
+
 		regulators {
 			BUCKA {
 				regulator-name = "VBUCKA";
@@ -41,12 +61,30 @@ Example 1) DA9211
 		};
 	};
 
-Example 2) DA9213
+Example 3) DA9213
 	pmic: da9213@68 {
 		compatible = "dlg,da9213";
 		reg = <0x68>;
 		interrupts = <3 27>;
 
+		regulators {
+			BUCKA {
+				regulator-name = "VBUCKA";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <3000000>;
+				regulator-max-microamp 	= <6000000>;
+				enable-gpios = <&gpio 27 0>;
+			};
+		};
+	};
+
+Example 4) DA9214
+	pmic: da9214@68 {
+		compatible = "dlg,da9214";
+		reg = <0x68>;
+		interrupts = <3 27>;
+
 		regulators {
 			BUCKA {
 				regulator-name = "VBUCKA";
@@ -67,8 +105,7 @@ Example 2) DA9213
 		};
 	};
 
-
-Example 3) DA9215
+Example 5) DA9215
 	pmic: da9215@68 {
 		compatible = "dlg,da9215";
 		reg = <0x68>;
diff --git a/Documentation/devicetree/bindings/regulator/mt6323-regulator.txt b/Documentation/devicetree/bindings/regulator/mt6323-regulator.txt
new file mode 100644
index 000000000000..c35d878b0960
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/mt6323-regulator.txt
@@ -0,0 +1,237 @@
+Mediatek MT6323 Regulator Driver
+
+All voltage regulators are defined as subnodes of the regulators node. A list
+of regulators provided by this controller are defined as subnodes of the
+PMIC's node. Each regulator is named according to its regulator type,
+buck_<name> and ldo_<name>. The definition for each of these nodes is defined
+using the standard binding for regulators at
+Documentation/devicetree/bindings/regulator/regulator.txt.
+
+The valid names for regulators are::
+BUCK:
+  buck_vproc, buck_vsys, buck_vpa
+LDO:
+  ldo_vtcxo, ldo_vcn28, ldo_vcn33_bt, ldo_vcn33_wifi, ldo_va, ldo_vcama,
+  ldo_vio28, ldo_vusb, ldo_vmc, ldo_vmch, ldo_vemc3v3, ldo_vgp1, ldo_vgp2,
+  ldo_vgp3, ldo_vcn18, ldo_vsim1, ldo_vsim2, ldo_vrtc, ldo_vcamaf, ldo_vibr,
+  ldo_vrf18, ldo_vm, ldo_vio18, ldo_vcamd, ldo_vcamio
+
+Example:
+
+	pmic: mt6323 {
+		mt6323regulator: regulators {
+			mt6323_vproc_reg: buck_vproc{
+				regulator-name = "vproc";
+				regulator-min-microvolt = < 700000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-ramp-delay = <12500>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			mt6323_vsys_reg: buck_vsys{
+				regulator-name = "vsys";
+				regulator-min-microvolt = <1400000>;
+				regulator-max-microvolt = <2987500>;
+				regulator-ramp-delay = <25000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			mt6323_vpa_reg: buck_vpa{
+				regulator-name = "vpa";
+				regulator-min-microvolt = < 500000>;
+				regulator-max-microvolt = <3650000>;
+			};
+
+			mt6323_vtcxo_reg: ldo_vtcxo{
+				regulator-name = "vtcxo";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-enable-ramp-delay = <90>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			mt6323_vcn28_reg: ldo_vcn28{
+				regulator-name = "vcn28";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-enable-ramp-delay = <185>;
+			};
+
+			mt6323_vcn33_bt_reg: ldo_vcn33_bt{
+				regulator-name = "vcn33_bt";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3600000>;
+				regulator-enable-ramp-delay = <185>;
+			};
+
+			mt6323_vcn33_wifi_reg: ldo_vcn33_wifi{
+				regulator-name = "vcn33_wifi";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3600000>;
+				regulator-enable-ramp-delay = <185>;
+			};
+
+			mt6323_va_reg: ldo_va{
+				regulator-name = "va";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-enable-ramp-delay = <216>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			mt6323_vcama_reg: ldo_vcama{
+				regulator-name = "vcama";
+				regulator-min-microvolt = <1500000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vio28_reg: ldo_vio28{
+				regulator-name = "vio28";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-enable-ramp-delay = <216>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			mt6323_vusb_reg: ldo_vusb{
+				regulator-name = "vusb";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-enable-ramp-delay = <216>;
+				regulator-boot-on;
+			};
+
+			mt6323_vmc_reg: ldo_vmc{
+				regulator-name = "vmc";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-enable-ramp-delay = <36>;
+				regulator-boot-on;
+			};
+
+			mt6323_vmch_reg: ldo_vmch{
+				regulator-name = "vmch";
+				regulator-min-microvolt = <3000000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-enable-ramp-delay = <36>;
+				regulator-boot-on;
+			};
+
+			mt6323_vemc3v3_reg: ldo_vemc3v3{
+				regulator-name = "vemc3v3";
+				regulator-min-microvolt = <3000000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-enable-ramp-delay = <36>;
+				regulator-boot-on;
+			};
+
+			mt6323_vgp1_reg: ldo_vgp1{
+				regulator-name = "vgp1";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vgp2_reg: ldo_vgp2{
+				regulator-name = "vgp2";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vgp3_reg: ldo_vgp3{
+				regulator-name = "vgp3";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vcn18_reg: ldo_vcn18{
+				regulator-name = "vcn18";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vsim1_reg: ldo_vsim1{
+				regulator-name = "vsim1";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vsim2_reg: ldo_vsim2{
+				regulator-name = "vsim2";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vrtc_reg: ldo_vrtc{
+				regulator-name = "vrtc";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			mt6323_vcamaf_reg: ldo_vcamaf{
+				regulator-name = "vcamaf";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vibr_reg: ldo_vibr{
+				regulator-name = "vibr";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-enable-ramp-delay = <36>;
+			};
+
+			mt6323_vrf18_reg: ldo_vrf18{
+				regulator-name = "vrf18";
+				regulator-min-microvolt = <1825000>;
+				regulator-max-microvolt = <1825000>;
+				regulator-enable-ramp-delay = <187>;
+			};
+
+			mt6323_vm_reg: ldo_vm{
+				regulator-name = "vm";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-enable-ramp-delay = <216>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			mt6323_vio18_reg: ldo_vio18{
+				regulator-name = "vio18";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-enable-ramp-delay = <216>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			mt6323_vcamd_reg: ldo_vcamd{
+				regulator-name = "vcamd";
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+
+			mt6323_vcamio_reg: ldo_vcamio{
+				regulator-name = "vcamio";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-enable-ramp-delay = <216>;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/regulator/pwm-regulator.txt b/Documentation/devicetree/bindings/regulator/pwm-regulator.txt
index ed936f0f34f2..dd6f59cf1455 100644
--- a/Documentation/devicetree/bindings/regulator/pwm-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/pwm-regulator.txt
@@ -38,13 +38,18 @@ NB: To be clear, if voltage-table is provided, then the device will be used
 in Voltage Table Mode.  If no voltage-table is provided, then the device will
 be used in Continuous Voltage Mode.
 
+Optional properties:
+--------------------
+- enable-gpios:		GPIO to use to enable/disable the regulator
+
 Any property defined as part of the core regulator binding can also be used.
 (See: ../regulator/regulator.txt)
 
-Continuous Voltage Example:
+Continuous Voltage With Enable GPIO Example:
 	pwm_regulator {
 		compatible = "pwm-regulator;
 		pwms = <&pwm1 0 8448 0>;
+		enable-gpios = <&gpio0 23 GPIO_ACTIVE_HIGH>;
 		regulator-min-microvolt = <1016000>;
 		regulator-max-microvolt = <1114000>;
 		regulator-name = "vdd_logic";
diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index 46c6f3ed1a1c..0fa3b0fac129 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
@@ -113,9 +113,9 @@ pm8916:
 	l14, l15, l16, l17, l18
 
 pm8941:
-	s1, s2, s3, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14,
-	l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2, lvs3,
-	mvs1, mvs2
+	s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13,
+	l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2, lvs3,
+	5vs1, 5vs2
 
 pm8994:
 	s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, l1, l2, l3, l4, l5,
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
index 07ccdaa68324..26542690b578 100644
--- a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
+++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
@@ -2,7 +2,8 @@ BCM2835 Random number generator
 
 Required properties:
 
-- compatible : should be "brcm,bcm2835-rng"
+- compatible : should be "brcm,bcm2835-rng"  or "brcm,bcm-nsp-rng" or
+  "brcm,bcm5301x-rng"
 - reg : Specifies base physical address and size of the registers.
 
 Example:
@@ -11,3 +12,8 @@ rng {
         compatible = "brcm,bcm2835-rng";
         reg = <0x7e104000 0x10>;
 };
+
+rng@18033000 {
+	compatible = "brcm,bcm-nsp-rng";
+	reg = <0x18033000 0x14>;
+};
diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt
index 936ab5b87324..f5561ac7e17e 100644
--- a/Documentation/devicetree/bindings/serial/8250.txt
+++ b/Documentation/devicetree/bindings/serial/8250.txt
@@ -42,6 +42,9 @@ Optional properties:
 - auto-flow-control: one way to enable automatic flow control support. The
   driver is allowed to detect support for the capability even without this
   property.
+- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD
+  line respectively. It will use specified GPIO instead of the peripheral
+  function pin for the UART feature. If unsure, don't specify this property.
 
 Note:
 * fsl,ns16550:
@@ -63,3 +66,19 @@ Example:
 		interrupts = <10>;
 		reg-shift = <2>;
 	};
+
+Example for OMAP UART using GPIO-based modem control signals:
+
+	uart4: serial@49042000 {
+		compatible = "ti,omap3-uart";
+		reg = <0x49042000 0x400>;
+		interrupts = <80>;
+		ti,hwmods = "uart4";
+		clock-frequency = <48000000>;
+		cts-gpios = <&gpio3 5 GPIO_ACTIVE_LOW>;
+		rts-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>;
+		dtr-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
+		dsr-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
+		dcd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
+		rng-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
+	};
diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
index 528c3b90f23c..1e4000d83aee 100644
--- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
+++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
@@ -31,6 +31,8 @@ Required properties:
     - "renesas,hscif-r8a7794" for R8A7794 (R-Car E2) HSCIF compatible UART.
     - "renesas,scif-r8a7795" for R8A7795 (R-Car H3) SCIF compatible UART.
     - "renesas,hscif-r8a7795" for R8A7795 (R-Car H3) HSCIF compatible UART.
+    - "renesas,scif-r8a7796" for R8A7796 (R-Car M3-W) SCIF compatible UART.
+    - "renesas,hscif-r8a7796" for R8A7796 (R-Car M3-W) HSCIF compatible UART.
     - "renesas,scifa-sh73a0" for SH73A0 (SH-Mobile AG5) SCIFA compatible UART.
     - "renesas,scifb-sh73a0" for SH73A0 (SH-Mobile AG5) SCIFB compatible UART.
     - "renesas,rcar-gen1-scif" for R-Car Gen1 SCIF compatible UART,
@@ -76,6 +78,10 @@ Optional properties:
   - dmas: Must contain a list of two references to DMA specifiers, one for
 	  transmission, and one for reception.
   - dma-names: Must contain a list of two DMA names, "tx" and "rx".
+  - {cts,dsr,dcd,rng,rts,dtr}-gpios: Specify GPIOs for modem lines, cfr. the
+    generic serial DT bindings in serial.txt.
+  - uart-has-rtscts: Indicates dedicated lines for RTS/CTS hardware flow
+    control, cfr. the generic serial DT bindings in serial.txt.
 
 Example:
 	aliases {
diff --git a/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt b/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt
new file mode 100644
index 000000000000..3ca89cd1caef
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt
@@ -0,0 +1,17 @@
+Oxford Semiconductor OXNAS SoCs Family RPS Timer
+================================================
+
+Required properties:
+- compatible: Should be "oxsemi,ox810se-rps-timer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts : The interrupts of the two timers
+- clocks : The phandle of the timer clock source
+
+example:
+
+timer0: timer@200 {
+	compatible = "oxsemi,ox810se-rps-timer";
+	reg = <0x200 0x40>;
+	clocks = <&rpsclk>;
+	interrupts = <4 5>;
+};
diff --git a/Documentation/devicetree/bindings/timer/rockchip,rk3288-timer.txt b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt
similarity index 75%
rename from Documentation/devicetree/bindings/timer/rockchip,rk3288-timer.txt
rename to Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt
index 87f0b0042bae..a41b184d5538 100644
--- a/Documentation/devicetree/bindings/timer/rockchip,rk3288-timer.txt
+++ b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt
@@ -1,7 +1,9 @@
-Rockchip rk3288 timer
+Rockchip rk timer
 
 Required properties:
-- compatible: shall be "rockchip,rk3288-timer"
+- compatible: shall be one of:
+  "rockchip,rk3288-timer" - for rk3066, rk3036, rk3188, rk322x, rk3288, rk3368
+  "rockchip,rk3399-timer" - for rk3399
 - reg: base address of the timer register starting with TIMERS CONTROL register
 - interrupts: should contain the interrupts for Timer0
 - clocks : must contain an entry for each entry in clock-names
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
index 1084e2bcbe1c..341dc67f3472 100644
--- a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
@@ -93,7 +93,7 @@ Example:
 		phys = <&usb_phy0>;
 		phy-names = "usb-phy";
 		vbus-supply = <&reg_usb0_vbus>;
-		gadget-itc-setting = <0x4>; /* 4 micro-frames */
+		itc-setting = <0x4>; /* 4 micro-frames */
 		 /* Incremental burst of unspecified length */
 		ahb-burst-config = <0x0>;
 		tx-burst-size-dword = <0x10>; /* 64 bytes */
diff --git a/Documentation/devicetree/bindings/usb/usb-ohci.txt b/Documentation/devicetree/bindings/usb/usb-ohci.txt
index 19233b7365e1..9df456968596 100644
--- a/Documentation/devicetree/bindings/usb/usb-ohci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-ohci.txt
@@ -14,7 +14,7 @@ Optional properties:
 - clocks : a list of phandle + clock specifier pairs
 - phys : phandle + phy specifier pair
 - phy-names : "usb"
-- resets : phandle + reset specifier pair
+- resets : a list of phandle + reset specifier pairs
 
 Example:
 
diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
index 122b7f4876bb..91ce82d5f0c4 100644
--- a/Documentation/dmaengine/provider.txt
+++ b/Documentation/dmaengine/provider.txt
@@ -323,7 +323,7 @@ supported.
    * device_resume
      - Resumes a transfer on the channel
      - This command should operate synchronously on the channel,
-       pausing right away the work of the given channel
+       resuming right away the work of the given channel
 
    * device_terminate_all
      - Aborts all the pending and ongoing transfers on the channel
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 75eea7ce3d7c..5a7386e38e2d 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -195,7 +195,9 @@ prototypes:
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
 	int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
+	bool (*isolate_page) (struct page *, isolate_mode_t);
 	int (*migratepage)(struct address_space *, struct page *, struct page *);
+	void (*putback_page) (struct page *);
 	int (*launder_page)(struct page *);
 	int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
 	int (*error_remove_page)(struct address_space *, struct page *);
@@ -219,7 +221,9 @@ invalidatepage:		yes
 releasepage:		yes
 freepage:		yes
 direct_IO:
+isolate_page:		yes
 migratepage:		yes (both)
+putback_page:		yes
 launder_page:		yes
 is_partially_uptodate:	yes
 error_remove_page:	yes
@@ -544,13 +548,13 @@ subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
 locked. The VM will unlock the page.
 
 	->map_pages() is called when VM asks to map easy accessible pages.
-Filesystem should find and map pages associated with offsets from "pgoff"
-till "max_pgoff". ->map_pages() is called with page table locked and must
+Filesystem should find and map pages associated with offsets from "start_pgoff"
+till "end_pgoff". ->map_pages() is called with page table locked and must
 not block.  If it's not possible to reach a page without blocking,
 filesystem should skip it. Filesystem should use do_set_pte() to setup
-page table entry. Pointer to entry associated with offset "pgoff" is
-passed in "pte" field in vm_fault structure. Pointers to entries for other
-offsets should be calculated relative to "pte".
+page table entry. Pointer to entry associated with the page is passed in
+"pte" field in fault_env structure. Pointers to entries for other offsets
+should be calculated relative to "pte".
 
 	->page_mkwrite() is called when a previously read-only pte is
 about to become writeable. The filesystem again must ensure that there are
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index ce4587d257d2..0c16a22521a8 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -49,6 +49,7 @@ These block devices may be used for inspiration:
 - axonram: Axon DDR2 device driver
 - brd: RAM backed block device driver
 - dcssblk: s390 dcss block device driver
+- pmem: NVDIMM persistent memory driver
 
 
 Implementation Tips for Filesystem Writers
@@ -75,8 +76,9 @@ calls to get_block() (for example by a page-fault racing with a read()
 or a write()) work correctly.
 
 These filesystems may be used for inspiration:
-- ext2: the second extended filesystem, see Documentation/filesystems/ext2.txt
-- ext4: the fourth extended filesystem, see Documentation/filesystems/ext4.txt
+- ext2: see Documentation/filesystems/ext2.txt
+- ext4: see Documentation/filesystems/ext4.txt
+- xfs:  see Documentation/filesystems/xfs.txt
 
 
 Handling Media Errors
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index e1c9f0849da6..ecd808088362 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -109,7 +109,9 @@ background_gc=%s       Turn on/off cleaning operations, namely garbage
 disable_roll_forward   Disable the roll-forward recovery routine
 norecovery             Disable the roll-forward recovery routine, mounted read-
                        only (i.e., -o ro,disable_roll_forward)
-discard                Issue discard/TRIM commands when a segment is cleaned.
+discard/nodiscard      Enable/disable real-time discard in f2fs, if discard is
+                       enabled, f2fs will issue discard/TRIM commands when a
+		       segment is cleaned.
 no_heap                Disable heap-style segment allocation which finds free
                        segments for data from the beginning of main area, while
 		       for node from the end of main area.
@@ -151,6 +153,9 @@ noinline_data          Disable the inline data feature, inline data feature is
                        enabled by default.
 data_flush             Enable data flushing before checkpoint in order to
                        persist data of regular and symlink.
+mode=%s                Control block allocation mode which supports "adaptive"
+                       and "lfs". In "lfs" mode, there should be no random
+                       writes towards main area.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/Documentation/filesystems/ocfs2-online-filecheck.txt b/Documentation/filesystems/ocfs2-online-filecheck.txt
index 1ab07860430d..139fab175c8a 100644
--- a/Documentation/filesystems/ocfs2-online-filecheck.txt
+++ b/Documentation/filesystems/ocfs2-online-filecheck.txt
@@ -5,12 +5,12 @@ This document will describe OCFS2 online file check feature.
 
 Introduction
 ============
-OCFS2 is often used in high-availaibility systems. However, OCFS2 usually
+OCFS2 is often used in high-availability systems. However, OCFS2 usually
 converts the filesystem to read-only when encounters an error. This may not be
 necessary, since turning the filesystem read-only would affect other running
 processes as well, decreasing availability.
 Then, a mount option (errors=continue) is introduced, which would return the
--EIO errno to the calling process and terminate furhter processing so that the
+-EIO errno to the calling process and terminate further processing so that the
 filesystem is not corrupted further. The filesystem is not converted to
 read-only, and the problematic file's inode number is reported in the kernel
 log. The user can try to check/fix this file via online filecheck feature.
@@ -44,7 +44,7 @@ There is a sysfs directory for each OCFS2 file system mounting:
 
   /sys/fs/ocfs2/<devname>/filecheck
 
-Here, <devname> indicates the name of OCFS2 volumn device which has been already
+Here, <devname> indicates the name of OCFS2 volume device which has been already
 mounted. The file above would accept inode numbers. This could be used to
 communicate with kernel space, tell which file(inode number) will be checked or
 fixed. Currently, three operations are supported, which includes checking
@@ -76,14 +76,14 @@ The output is like this:
 This time, the <ERROR> column indicates whether this fix is successful or not.
 
 3. The record cache is used to store the history of check/fix results. It's
-defalut size is 10, and can be adjust between the range of 10 ~ 100. You can
+default size is 10, and can be adjust between the range of 10 ~ 100. You can
 adjust the size like this:
 
   # echo "<size>" > /sys/fs/ocfs2/<devname>/filecheck/set
 
 Fixing stuff
 ============
-On receivng the inode, the filesystem would read the inode and the
+On receiving the inode, the filesystem would read the inode and the
 file metadata. In case of errors, the filesystem would fix the errors
 and report the problems it fixed in the kernel log. As a precautionary measure,
 the inode must first be checked for errors before performing a final fix.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index e8d00759bfa5..68080ad6a75e 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -436,6 +436,7 @@ Private_Dirty:         0 kB
 Referenced:          892 kB
 Anonymous:             0 kB
 AnonHugePages:         0 kB
+ShmemPmdMapped:        0 kB
 Shared_Hugetlb:        0 kB
 Private_Hugetlb:       0 kB
 Swap:                  0 kB
@@ -464,6 +465,8 @@ accessed.
 a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
 and a page is modified, the file page is replaced by a private anonymous copy.
 "AnonHugePages" shows the ammount of memory backed by transparent hugepage.
+"ShmemPmdMapped" shows the ammount of shared (shmem/tmpfs) memory backed by
+huge pages.
 "Shared_Hugetlb" and "Private_Hugetlb" show the ammounts of memory backed by
 hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical
 reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field.
@@ -725,7 +728,7 @@ IRQ, you can set it by doing:
   > echo 1 > /proc/irq/10/smp_affinity
 
 This means that only the first CPU will handle the IRQ, but you can also echo
-5 which means that only the first and fourth CPU can handle the IRQ.
+5 which means that only the first and third CPU can handle the IRQ.
 
 The contents of each smp_affinity file is the same by default:
 
@@ -868,6 +871,9 @@ VmallocTotal:   112216 kB
 VmallocUsed:       428 kB
 VmallocChunk:   111088 kB
 AnonHugePages:   49152 kB
+ShmemHugePages:      0 kB
+ShmemPmdMapped:      0 kB
+
 
     MemTotal: Total usable ram (i.e. physical ram minus a few reserved
               bits and the kernel binary code)
@@ -912,6 +918,9 @@ MemAvailable: An estimate of how much memory is available for starting new
 AnonHugePages: Non-file backed huge pages mapped into userspace page tables
       Mapped: files which have been mmaped, such as libraries
        Shmem: Total memory used by shared memory (shmem) and tmpfs
+ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated
+              with huge pages
+ShmemPmdMapped: Shared memory mapped into userspace with huge pages
         Slab: in-kernel data structures cache
 SReclaimable: Part of Slab, that might be reclaimed, such as caches
   SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index c61a223ef3ff..900360cbcdae 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -592,9 +592,14 @@ struct address_space_operations {
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
 	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
+	/* isolate a page for migration */
+	bool (*isolate_page) (struct page *, isolate_mode_t);
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct page *, struct page *);
+	/* put migration-failed page back to right list */
+	void (*putback_page) (struct page *);
 	int (*launder_page) (struct page *);
+
 	int (*is_partially_uptodate) (struct page *, unsigned long,
 					unsigned long);
 	void (*is_dirty_writeback) (struct page *, bool *, bool *);
@@ -747,6 +752,10 @@ struct address_space_operations {
         and transfer data directly between the storage and the
         application's address space.
 
+  isolate_page: Called by the VM when isolating a movable non-lru page.
+	If page is successfully isolated, VM marks the page as PG_isolated
+	via __SetPageIsolated.
+
   migrate_page:  This is used to compact the physical memory usage.
         If the VM wants to relocate a page (maybe off a memory card
         that is signalling imminent failure) it will pass a new page
@@ -754,6 +763,8 @@ struct address_space_operations {
 	transfer any private data across and update any references
         that it has to the page.
 
+  putback_page: Called by the VM when isolated page's migration fails.
+
   launder_page: Called before freeing a page - it writes back the dirty page. To
   	prevent redirtying the page, it is kept locked during the whole
 	operation.
diff --git a/Documentation/gdb-kernel-debugging.txt b/Documentation/gdb-kernel-debugging.txt
index 4ab7d43d0754..7050ce8794b9 100644
--- a/Documentation/gdb-kernel-debugging.txt
+++ b/Documentation/gdb-kernel-debugging.txt
@@ -139,27 +139,6 @@ Examples of using the Linux-provided gdb helpers
       start_comm = "swapper/2\000\000\000\000\000\000"
     }
 
- o Dig into a radix tree data structure, such as the IRQ descriptors:
-    (gdb) print (struct irq_desc)$lx_radix_tree_lookup(irq_desc_tree, 18)
-    $6 = {
-      irq_common_data = {
-        state_use_accessors = 67584,
-        handler_data = 0x0 <__vectors_start>,
-        msi_desc = 0x0 <__vectors_start>,
-        affinity = {{
-            bits = {65535}
-          }}
-      },
-      irq_data = {
-        mask = 0,
-        irq = 18,
-        hwirq = 27,
-        common = 0xee803d80,
-        chip = 0xc0eb0854 <gic_data>,
-        domain = 0xee808000,
-        parent_data = 0x0 <__vectors_start>,
-        chip_data = 0xc0eb0854 <gic_data>
-      } <... trimmed ...>
 
 List of commands and functions
 ------------------------------
diff --git a/Documentation/gpio/drivers-on-gpio.txt b/Documentation/gpio/drivers-on-gpio.txt
index 14bf95a13bae..306513251713 100644
--- a/Documentation/gpio/drivers-on-gpio.txt
+++ b/Documentation/gpio/drivers-on-gpio.txt
@@ -37,15 +37,16 @@ hardware descriptions such as device tree or ACPI:
   external connector status, such as a headset line for an audio driver or an
   HDMI connector. It will provide a better userspace sysfs interface than GPIO.
 
-- restart-gpio: drivers/power/gpio-restart.c is used to restart/reboot the
-  system by pulling a GPIO line and will register a restart handler so
+- restart-gpio: drivers/power/reset/gpio-restart.c is used to restart/reboot
+  the system by pulling a GPIO line and will register a restart handler so
   userspace can issue the right system call to restart the system.
 
-- poweroff-gpio: drivers/power/gpio-poweroff.c is used to power the system down
-  by pulling a GPIO line and will register a pm_power_off() callback so that
-  userspace can issue the right system call to power down the system.
+- poweroff-gpio: drivers/power/reset/gpio-poweroff.c is used to power the
+  system down by pulling a GPIO line and will register a pm_power_off()
+  callback so that userspace can issue the right system call to power down the
+  system.
 
-- gpio-gate-clock: drivers/clk/clk-gpio-gate.c is used to control a gated clock
+- gpio-gate-clock: drivers/clk/clk-gpio.c is used to control a gated clock
   (off/on) that uses a GPIO, and integrated with the clock subsystem.
 
 - i2c-gpio: drivers/i2c/busses/i2c-gpio.c is used to drive an I2C bus
diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru
index f1d4fe4c366c..44013d23b3f0 100644
--- a/Documentation/hwmon/abituguru
+++ b/Documentation/hwmon/abituguru
@@ -24,7 +24,7 @@ Supported chips:
 				 AW9D-MAX) (2)
 	1) For revisions 2 and 3 uGuru's the driver can autodetect the
 	   sensortype (Volt or Temp) for bank1 sensors, for revision 1 uGuru's
-	   this doesnot always work. For these uGuru's the autodection can
+	   this does not always work. For these uGuru's the autodetection can
 	   be overridden with the bank1_types module param. For all 3 known
 	   revison 1 motherboards the correct use of this param is:
 	   bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
diff --git a/Documentation/hwmon/ftsteutates b/Documentation/hwmon/ftsteutates
new file mode 100644
index 000000000000..2a1bf69c6a26
--- /dev/null
+++ b/Documentation/hwmon/ftsteutates
@@ -0,0 +1,23 @@
+Kernel driver ftsteutates
+=====================
+
+Supported chips:
+  * FTS Teutates
+    Prefix: 'ftsteutates'
+    Addresses scanned: I2C 0x73 (7-Bit)
+
+Author: Thilo Cestonaro <thilo.cestonaro@ts.fujitsu.com>
+
+
+Description
+-----------
+The BMC Teutates is the Eleventh generation of Superior System
+monitoring and thermal management solution. It is builds on the basic
+functionality of the BMC Theseus and contains several new features and
+enhancements. It can monitor up to 4 voltages, 16 temperatures and
+8 fans. It also contains an integrated watchdog which is currently
+implemented in this driver.
+
+Specification of the chip can be found here:
+ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
+ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
diff --git a/Documentation/hwmon/ina3221 b/Documentation/hwmon/ina3221
new file mode 100644
index 000000000000..0ff74854cb2e
--- /dev/null
+++ b/Documentation/hwmon/ina3221
@@ -0,0 +1,35 @@
+Kernel driver ina3221
+=====================
+
+Supported chips:
+  * Texas Instruments INA3221
+    Prefix: 'ina3221'
+    Addresses: I2C 0x40 - 0x43
+    Datasheet: Publicly available at the Texas Instruments website
+               http://www.ti.com/
+
+Author: Andrew F. Davis <afd@ti.com>
+
+Description
+-----------
+
+The Texas Instruments INA3221 monitors voltage, current, and power on the high
+side of up to three D.C. power supplies. The INA3221 monitors both shunt drop
+and supply voltage, with programmable conversion times and averaging, current
+and power are calculated host-side from these.
+
+Sysfs entries
+-------------
+
+in[123]_input           Bus voltage(mV) channels
+curr[123]_input         Current(mA) measurement channels
+shunt[123]_resistor     Shunt resistance(uOhm) channels
+curr[123]_crit          Critical alert current(mA) setting, activates the
+                          corresponding alarm when the respective current
+                          is above this value
+curr[123]_crit_alarm    Critical alert current limit exceeded
+curr[123]_max           Warning alert current(mA) setting, activates the
+                          corresponding alarm when the respective current
+                          average is above this value.
+curr[123]_max_alarm     Warning alert current limit exceeded
+in[456]_input           Shunt voltage(uV) for channels 1, 2, and 3 respectively
diff --git a/Documentation/hwmon/jc42 b/Documentation/hwmon/jc42
index f7f1830a2566..b4b671f22453 100644
--- a/Documentation/hwmon/jc42
+++ b/Documentation/hwmon/jc42
@@ -18,10 +18,11 @@ Supported chips:
   * Maxim MAX6604
     Datasheets:
 	http://datasheets.maxim-ic.com/en/ds/MAX6604.pdf
-  * Microchip MCP9804, MCP9805, MCP98242, MCP98243, MCP98244, MCP9843
+  * Microchip MCP9804, MCP9805, MCP9808, MCP98242, MCP98243, MCP98244, MCP9843
     Datasheets:
 	http://ww1.microchip.com/downloads/en/DeviceDoc/22203C.pdf
 	http://ww1.microchip.com/downloads/en/DeviceDoc/21977b.pdf
+	http://ww1.microchip.com/downloads/en/DeviceDoc/25095A.pdf
 	http://ww1.microchip.com/downloads/en/DeviceDoc/21996a.pdf
 	http://ww1.microchip.com/downloads/en/DeviceDoc/22153c.pdf
 	http://ww1.microchip.com/downloads/en/DeviceDoc/22327A.pdf
diff --git a/Documentation/hwmon/max1668 b/Documentation/hwmon/max1668
index 0616ed9758df..8f9d570dbfec 100644
--- a/Documentation/hwmon/max1668
+++ b/Documentation/hwmon/max1668
@@ -17,7 +17,7 @@ This driver implements support for the Maxim MAX1668, MAX1805 and MAX1989
 chips.
 
 The three devices are very similar, but the MAX1805 has a reduced feature
-set; only two remote temperature inputs vs the four avaible on the other
+set; only two remote temperature inputs vs the four available on the other
 two ICs.
 
 The driver is able to distinguish between the devices and creates sysfs
diff --git a/Documentation/hwmon/sht3x b/Documentation/hwmon/sht3x
new file mode 100644
index 000000000000..b0d88184f48e
--- /dev/null
+++ b/Documentation/hwmon/sht3x
@@ -0,0 +1,76 @@
+Kernel driver sht3x
+===================
+
+Supported chips:
+  * Sensirion SHT3x-DIS
+    Prefix: 'sht3x'
+    Addresses scanned: none
+    Datasheet: http://www.sensirion.com/fileadmin/user_upload/customers/sensirion/Dokumente/Humidity/Sensirion_Humidity_Datasheet_SHT3x_DIS.pdf
+
+Author:
+  David Frey <david.frey@sensirion.com>
+  Pascal Sachs <pascal.sachs@sensirion.com>
+
+Description
+-----------
+
+This driver implements support for the Sensirion SHT3x-DIS chip, a humidity
+and temperature sensor. Temperature is measured in degrees celsius, relative
+humidity is expressed as a percentage. In the sysfs interface, all values are
+scaled by 1000, i.e. the value for 31.5 degrees celsius is 31500.
+
+The device communicates with the I2C protocol. Sensors can have the I2C
+addresses 0x44 or 0x45, depending on the wiring. See
+Documentation/i2c/instantiating-devices for methods to instantiate the device.
+
+There are two options configurable by means of sht3x_platform_data:
+1. blocking (pull the I2C clock line down while performing the measurement) or
+   non-blocking mode. Blocking mode will guarantee the fastest result but
+   the I2C bus will be busy during that time. By default, non-blocking mode
+   is used. Make sure clock-stretching works properly on your device if you
+   want to use blocking mode.
+2. high or low accuracy. High accuracy is used by default and using it is
+   strongly recommended.
+
+The sht3x sensor supports a single shot mode as well as 5 periodic measure
+modes, which can be controlled with the update_interval sysfs interface.
+The allowed update_interval in milliseconds are as follows:
+  *     0   single shot mode
+  *  2000   0.5 Hz periodic measurement
+  *  1000   1   Hz periodic measurement
+  *   500   2   Hz periodic measurement
+  *   250   4   Hz periodic measurement
+  *   100  10   Hz periodic measurement
+
+In the periodic measure mode, the sensor automatically triggers a measurement
+with the configured update interval on the chip. When a temperature or humidity
+reading exceeds the configured limits, the alert attribute is set to 1 and
+the alert pin on the sensor is set to high.
+When the temperature and humidity readings move back between the hysteresis
+values, the alert bit is set to 0 and the alert pin on the sensor is set to
+low.
+
+sysfs-Interface
+---------------
+
+temp1_input:        temperature input
+humidity1_input:    humidity input
+temp1_max:          temperature max value
+temp1_max_hyst:     temperature hysteresis value for max limit
+humidity1_max:      humidity max value
+humidity1_max_hyst: humidity hysteresis value for max limit
+temp1_min:          temperature min value
+temp1_min_hyst:     temperature hysteresis value for min limit
+humidity1_min:      humidity min value
+humidity1_min_hyst: humidity hysteresis value for min limit
+temp1_alarm:        alarm flag is set to 1 if the temperature is outside the
+                    configured limits. Alarm only works in periodic measure mode
+humidity1_alarm:    alarm flag is set to 1 if the humidity is outside the
+                    configured limits. Alarm only works in periodic measure mode
+heater_enable:      heater enable, heating element removes excess humidity from
+                    sensor
+                        0: turned off
+                        1: turned on
+update_interval:    update interval, 0 for single shot, interval in msec
+                    for periodic measurement. If the interval is not supported
+                    by the sensor, the next faster interval is chosen
diff --git a/Documentation/hwmon/submitting-patches b/Documentation/hwmon/submitting-patches
index d201828d202f..57f60307accc 100644
--- a/Documentation/hwmon/submitting-patches
+++ b/Documentation/hwmon/submitting-patches
@@ -15,10 +15,15 @@ increase the chances of your change being accepted.
     Documentation/SubmittingPatches
     Documentation/CodingStyle
 
-* If your patch generates checkpatch warnings, please refrain from explanations
-  such as "I don't like that coding style". Keep in mind that each unnecessary
-  warning helps hiding a real problem. If you don't like the kernel coding
-  style, don't write kernel drivers.
+* Please run your patch through 'checkpatch --strict'. There should be no
+  errors, no warnings, and few if any check messages. If there are any
+  messages, please be prepared to explain.
+
+* If your patch generates checkpatch errors, warnings, or check messages,
+  please refrain from explanations such as "I prefer that coding style".
+  Keep in mind that each unnecessary message helps hiding a real problem,
+  and a consistent coding style makes it easier for others to understand
+  and review the code.
 
 * Please test your patch thoroughly. We are not your test group.
   Sometimes a patch can not or not completely be tested because of missing
@@ -61,15 +66,30 @@ increase the chances of your change being accepted.
 
 * Make sure that all dependencies are listed in Kconfig.
 
+* Please list include files in alphabetic order.
+
+* Please align continuation lines with '(' on the previous line.
+
 * Avoid forward declarations if you can. Rearrange the code if necessary.
 
+* Avoid macros to generate groups of sensor attributes. It not only confuses
+  checkpatch, but also makes it more difficult to review the code.
+
 * Avoid calculations in macros and macro-generated functions. While such macros
   may save a line or so in the source, it obfuscates the code and makes code
   review more difficult. It may also result in code which is more complicated
   than necessary. Use inline functions or just regular functions instead.
 
+* Limit the number of kernel log messages. In general, your driver should not
+  generate an error message just because a runtime operation failed. Report
+  errors to user space instead, using an appropriate error code. Keep in mind
+  that kernel error log messages not only fill up the kernel log, but also are
+  printed synchronously, most likely with interrupt disabled, often to a serial
+  console. Excessive logging can seriously affect system performance.
+
 * Use devres functions whenever possible to allocate resources. For rationale
   and supported functions, please see Documentation/driver-model/devres.txt.
+  If a function is not supported by devres, consider using devm_add_action().
 
 * If the driver has a detect function, make sure it is silent. Debug messages
   and messages printed after a successful detection are acceptable, but it
@@ -96,8 +116,16 @@ increase the chances of your change being accepted.
   writing to it might cause a bad misconfiguration.
 
 * Make sure there are no race conditions in the probe function. Specifically,
-  completely initialize your chip first, then create sysfs entries and register
-  with the hwmon subsystem.
+  completely initialize your chip and your driver first, then register with
+  the hwmon subsystem.
+
+* Use devm_hwmon_device_register_with_groups() or, if your driver needs a remove
+  function, hwmon_device_register_with_groups() to register your driver with the
+  hwmon subsystem. Try using devm_add_action() instead of a remove function if
+  possible. Do not use hwmon_device_register().
+
+* Your driver should be buildable as module. If not, please be prepared to
+  explain why it has to be built into the kernel.
 
 * Do not provide support for deprecated sysfs attributes.
 
diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401
index 711f75e189eb..2d9ca42213cf 100644
--- a/Documentation/hwmon/tmp401
+++ b/Documentation/hwmon/tmp401
@@ -22,6 +22,9 @@ Supported chips:
     Prefix: 'tmp435'
     Addresses scanned: I2C 0x48 - 0x4f
     Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp435.html
+  * Texas Instruments TMP461
+    Prefix: 'tmp461'
+    Datasheet: http://www.ti.com/product/tmp461
 
 Authors:
          Hans de Goede <hdegoede@redhat.com>
@@ -31,8 +34,8 @@ Description
 -----------
 
 This driver implements support for Texas Instruments TMP401, TMP411,
-TMP431, TMP432 and TMP435 chips. These chips implement one or two remote
-and one local temperature sensors. Temperature is measured in degrees
+TMP431, TMP432, TMP435, and TMP461 chips. These chips implement one or two
+remote and one local temperature sensors. Temperature is measured in degrees
 Celsius. Resolution of the remote sensor is 0.0625 degree. Local
 sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not
 supported by the driver so far, so using the default resolution of 0.5
@@ -55,3 +58,10 @@ some additional features.
 
 TMP432 is compatible with TMP401 and TMP431. It supports two external
 temperature sensors.
+
+TMP461 is compatible with TMP401. It supports offset correction
+that is applied to the remote sensor.
+
+* Sensor offset values are temperature values
+
+  Exported via sysfs attribute tempX_offset
diff --git a/Documentation/index.rst b/Documentation/index.rst
new file mode 100644
index 000000000000..f92854f01773
--- /dev/null
+++ b/Documentation/index.rst
@@ -0,0 +1,22 @@
+.. The Linux Kernel documentation master file, created by
+   sphinx-quickstart on Fri Feb 12 13:51:46 2016.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to The Linux Kernel's documentation!
+============================================
+
+Nothing for you to see here *yet*. Please move along.
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   kernel-documentation
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/Documentation/ioctl/cdrom.txt b/Documentation/ioctl/cdrom.txt
index 59df81c8da2b..a4d62a9d6771 100644
--- a/Documentation/ioctl/cdrom.txt
+++ b/Documentation/ioctl/cdrom.txt
@@ -340,7 +340,8 @@ CDROMSUBCHNL			Read subchannel data (struct cdrom_subchnl)
 	  EINVAL	format not CDROM_MSF or CDROM_LBA
 
 	notes:
-	  Format is converted to CDROM_MSF on return
+	  Format is converted to CDROM_MSF or CDROM_LBA
+	  as per user request on return
 
 
 
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 13f888a02a3d..385a5ef41c17 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -47,6 +47,7 @@ This document describes the Linux kernel Makefiles.
 		--- 7.2 genhdr-y
 		--- 7.3 destination-y
 		--- 7.4 generic-y
+		--- 7.5 generated-y
 
 	=== 8 Kbuild Variables
 	=== 9 Makefile language
@@ -1319,6 +1320,19 @@ See subsequent chapter for the syntax of the Kbuild file.
 		Example: termios.h
 			#include <asm-generic/termios.h>
 
+	--- 7.5 generated-y
+
+	If an architecture generates other header files alongside generic-y
+	wrappers, and not included in genhdr-y, then generated-y specifies
+	them.
+
+	This prevents them being treated as stale asm-generic wrappers and
+	removed.
+
+		Example:
+			#arch/x86/include/asm/Kbuild
+			generated-y += syscalls_32.h
+
 === 8 Kbuild Variables
 
 The top Makefile exports the following variables:
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
index 78f69cdc9b3f..062e3af271b7 100644
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -1,3 +1,6 @@
+NOTE: this document is outdated and will eventually be removed.  See
+Documentation/kernel-documentation.rst for current information.
+
 kernel-doc nano-HOWTO
 =====================
 
diff --git a/Documentation/kernel-documentation.rst b/Documentation/kernel-documentation.rst
new file mode 100644
index 000000000000..c4eb5049da39
--- /dev/null
+++ b/Documentation/kernel-documentation.rst
@@ -0,0 +1,654 @@
+==========================
+Linux Kernel Documentation
+==========================
+
+Introduction
+============
+
+The Linux kernel uses `Sphinx`_ to generate pretty documentation from
+`reStructuredText`_ files under ``Documentation``. To build the documentation in
+HTML or PDF formats, use ``make htmldocs`` or ``make pdfdocs``. The generated
+documentation is placed in ``Documentation/output``.
+
+.. _Sphinx: http://www.sphinx-doc.org/
+.. _reStructuredText: http://docutils.sourceforge.net/rst.html
+
+The reStructuredText files may contain directives to include structured
+documentation comments, or kernel-doc comments, from source files. Usually these
+are used to describe the functions and types and design of the code. The
+kernel-doc comments have some special structure and formatting, but beyond that
+they are also treated as reStructuredText.
+
+There is also the deprecated DocBook toolchain to generate documentation from
+DocBook XML template files under ``Documentation/DocBook``. The DocBook files
+are to be converted to reStructuredText, and the toolchain is slated to be
+removed.
+
+Finally, there are thousands of plain text documentation files scattered around
+``Documentation``. Some of these will likely be converted to reStructuredText
+over time, but the bulk of them will remain in plain text.
+
+Sphinx Build
+============
+
+The usual way to generate the documentation is to run ``make htmldocs`` or
+``make pdfdocs``. There are also other formats available, see the documentation
+section of ``make help``. The generated documentation is placed in
+format-specific subdirectories under ``Documentation/output``.
+
+To generate documentation, Sphinx (``sphinx-build``) must obviously be
+installed. For prettier HTML output, the Read the Docs Sphinx theme
+(``sphinx_rtd_theme``) is used if available. For PDF output, ``rst2pdf`` is also
+needed. All of these are widely available and packaged in distributions.
+
+To pass extra options to Sphinx, you can use the ``SPHINXOPTS`` make
+variable. For example, use ``make SPHINXOPTS=-v htmldocs`` to get more verbose
+output.
+
+To remove the generated documentation, run ``make cleandocs``.
+
+Writing Documentation
+=====================
+
+Adding new documentation can be as simple as:
+
+1. Add a new ``.rst`` file somewhere under ``Documentation``.
+2. Refer to it from the Sphinx main `TOC tree`_ in ``Documentation/index.rst``.
+
+.. _TOC tree: http://www.sphinx-doc.org/en/stable/markup/toctree.html
+
+This is usually good enough for simple documentation (like the one you're
+reading right now), but for larger documents it may be advisable to create a
+subdirectory (or use an existing one). For example, the graphics subsystem
+documentation is under ``Documentation/gpu``, split to several ``.rst`` files,
+and has a separate ``index.rst`` (with a ``toctree`` of its own) referenced from
+the main index.
+
+See the documentation for `Sphinx`_ and `reStructuredText`_ on what you can do
+with them. In particular, the Sphinx `reStructuredText Primer`_ is a good place
+to get started with reStructuredText. There are also some `Sphinx specific
+markup constructs`_.
+
+.. _reStructuredText Primer: http://www.sphinx-doc.org/en/stable/rest.html
+.. _Sphinx specific markup constructs: http://www.sphinx-doc.org/en/stable/markup/index.html
+
+Specific guidelines for the kernel documentation
+------------------------------------------------
+
+Here are some specific guidelines for the kernel documentation:
+
+* Please don't go overboard with reStructuredText markup. Keep it simple.
+
+* Please stick to this order of heading adornments:
+
+  1. ``=`` with overline for document title::
+
+       ==============
+       Document title
+       ==============
+
+  2. ``=`` for chapters::
+
+       Chapters
+       ========
+
+  3. ``-`` for sections::
+
+       Section
+       -------
+
+  4. ``~`` for subsections::
+
+       Subsection
+       ~~~~~~~~~~
+
+  Although RST doesn't mandate a specific order ("Rather than imposing a fixed
+  number and order of section title adornment styles, the order enforced will be
+  the order as encountered."), having the higher levels the same overall makes
+  it easier to follow the documents.
+
+list tables
+-----------
+
+We recommend the use of *list table* formats. The *list table* formats are
+double-stage lists. Compared to the ASCII-art they might not be as
+comfortable for 
+readers of the text files. Their advantage is that they are easy to
+create or modify and that the diff of a modification is much more meaningful,
+because it is limited to the modified content.
+
+The ``flat-table`` is a double-stage list similar to the ``list-table`` with
+some additional features:
+
+* column-span: with the role ``cspan`` a cell can be extended through
+  additional columns
+
+* row-span: with the role ``rspan`` a cell can be extended through
+  additional rows
+
+* auto span rightmost cell of a table row over the missing cells on the right
+  side of that table-row.  With Option ``:fill-cells:`` this behavior can
+  changed from *auto span* to *auto fill*, which automatically inserts (empty)
+  cells instead of spanning the last cell.
+
+options:
+
+* ``:header-rows:``   [int] count of header rows
+* ``:stub-columns:``  [int] count of stub columns
+* ``:widths:``        [[int] [int] ... ] widths of columns
+* ``:fill-cells:``    instead of auto-spanning missing cells, insert missing cells
+
+roles:
+
+* ``:cspan:`` [int] additional columns (*morecols*)
+* ``:rspan:`` [int] additional rows (*morerows*)
+
+The example below shows how to use this markup.  The first level of the staged
+list is the *table-row*. In the *table-row* there is only one markup allowed,
+the list of the cells in this *table-row*. Exceptions are *comments* ( ``..`` )
+and *targets* (e.g. a ref to ``:ref:`last row <last row>``` / :ref:`last row
+<last row>`).
+
+.. code-block:: rst
+
+   .. flat-table:: table title
+      :widths: 2 1 1 3
+
+      * - head col 1
+        - head col 2
+        - head col 3
+        - head col 4
+
+      * - column 1
+        - field 1.1
+        - field 1.2 with autospan
+
+      * - column 2
+        - field 2.1
+        - :rspan:`1` :cspan:`1` field 2.2 - 3.3
+
+      * .. _`last row`:
+
+        - column 3
+
+Rendered as:
+
+   .. flat-table:: table title
+      :widths: 2 1 1 3
+
+      * - head col 1
+        - head col 2
+        - head col 3
+        - head col 4
+
+      * - column 1
+        - field 1.1
+        - field 1.2 with autospan
+
+      * - column 2
+        - field 2.1
+        - :rspan:`1` :cspan:`1` field 2.2 - 3.3
+
+      * .. _`last row`:
+
+        - column 3
+
+
+Including kernel-doc comments
+=============================
+
+The Linux kernel source files may contain structured documentation comments, or
+kernel-doc comments to describe the functions and types and design of the
+code. The documentation comments may be included to any of the reStructuredText
+documents using a dedicated kernel-doc Sphinx directive extension.
+
+The kernel-doc directive is of the format::
+
+  .. kernel-doc:: source
+     :option:
+
+The *source* is the path to a source file, relative to the kernel source
+tree. The following directive options are supported:
+
+export: *[source-pattern ...]*
+  Include documentation for all functions in *source* that have been exported
+  using ``EXPORT_SYMBOL`` or ``EXPORT_SYMBOL_GPL`` either in *source* or in any
+  of the files specified by *source-pattern*.
+
+  The *source-pattern* is useful when the kernel-doc comments have been placed
+  in header files, while ``EXPORT_SYMBOL`` and ``EXPORT_SYMBOL_GPL`` are next to
+  the function definitions.
+
+  Examples::
+
+    .. kernel-doc:: lib/bitmap.c
+       :export:
+
+    .. kernel-doc:: include/net/mac80211.h
+       :export: net/mac80211/*.c
+
+internal: *[source-pattern ...]*
+  Include documentation for all functions and types in *source* that have
+  **not** been exported using ``EXPORT_SYMBOL`` or ``EXPORT_SYMBOL_GPL`` either
+  in *source* or in any of the files specified by *source-pattern*.
+
+  Example::
+
+    .. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+       :internal:
+
+doc: *title*
+  Include documentation for the ``DOC:`` paragraph identified by *title* in
+  *source*. Spaces are allowed in *title*; do not quote the *title*. The *title*
+  is only used as an identifier for the paragraph, and is not included in the
+  output. Please make sure to have an appropriate heading in the enclosing
+  reStructuredText document.
+
+  Example::
+
+    .. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+       :doc: High Definition Audio over HDMI and Display Port
+
+functions: *function* *[...]*
+  Include documentation for each *function* in *source*.
+
+  Example::
+
+    .. kernel-doc:: lib/bitmap.c
+       :functions: bitmap_parselist bitmap_parselist_user
+
+Without options, the kernel-doc directive includes all documentation comments
+from the source file.
+
+The kernel-doc extension is included in the kernel source tree, at
+``Documentation/sphinx/kernel-doc.py``. Internally, it uses the
+``scripts/kernel-doc`` script to extract the documentation comments from the
+source.
+
+Writing kernel-doc comments
+===========================
+
+In order to provide embedded, "C" friendly, easy to maintain, but consistent and
+extractable overview, function and type documentation, the Linux kernel has
+adopted a consistent style for documentation comments. The format for this
+documentation is called the kernel-doc format, described below. This style
+embeds the documentation within the source files, using a few simple conventions
+for adding documentation paragraphs and documenting functions and their
+parameters, structures and unions and their members, enumerations, and typedefs.
+
+.. note:: The kernel-doc format is deceptively similar to gtk-doc or Doxygen,
+   yet distinctively different, for historical reasons. The kernel source
+   contains tens of thousands of kernel-doc comments. Please stick to the style
+   described here.
+
+The ``scripts/kernel-doc`` script is used by the Sphinx kernel-doc extension in
+the documentation build to extract this embedded documentation into the various
+HTML, PDF, and other format documents.
+
+In order to provide good documentation of kernel functions and data structures,
+please use the following conventions to format your kernel-doc comments in the
+Linux kernel source.
+
+How to format kernel-doc comments
+---------------------------------
+
+The opening comment mark ``/**`` is reserved for kernel-doc comments. Only
+comments so marked will be considered by the ``kernel-doc`` tool. Use it only
+for comment blocks that contain kernel-doc formatted comments. The usual ``*/``
+should be used as the closing comment marker. The lines in between should be
+prefixed by ``Â *Â `` (space star space).
+
+The function and type kernel-doc comments should be placed just before the
+function or type being described. The overview kernel-doc comments may be freely
+placed at the top indentation level.
+
+Example kernel-doc function comment::
+
+  /**
+   * foobar() - Brief description of foobar.
+   * @arg: Description of argument of foobar.
+   *
+   * Longer description of foobar.
+   *
+   * Return: Description of return value of foobar.
+   */
+  int foobar(int arg)
+
+The format is similar for documentation for structures, enums, paragraphs,
+etc. See the sections below for details.
+
+The kernel-doc structure is extracted from the comments, and proper `Sphinx C
+Domain`_ function and type descriptions with anchors are generated for them. The
+descriptions are filtered for special kernel-doc highlights and
+cross-references. See below for details.
+
+.. _Sphinx C Domain: http://www.sphinx-doc.org/en/stable/domains.html
+
+Highlights and cross-references
+-------------------------------
+
+The following special patterns are recognized in the kernel-doc comment
+descriptive text and converted to proper reStructuredText markup and `Sphinx C
+Domain`_ references.
+
+.. attention:: The below are **only** recognized within kernel-doc comments,
+	       **not** within normal reStructuredText documents.
+
+``funcname()``
+  Function reference.
+
+``@parameter``
+  Name of a function parameter. (No cross-referencing, just formatting.)
+
+``%CONST``
+  Name of a constant. (No cross-referencing, just formatting.)
+
+``$ENVVAR``
+  Name of an environment variable. (No cross-referencing, just formatting.)
+
+``&struct name``
+  Structure reference.
+
+``&enum name``
+  Enum reference.
+
+``&typedef name``
+  Typedef reference.
+
+``&struct_name->member`` or ``&struct_name.member``
+  Structure or union member reference. The cross-reference will be to the struct
+  or union definition, not the member directly.
+
+``&name``
+  A generic type reference. Prefer using the full reference described above
+  instead. This is mostly for legacy comments.
+
+Cross-referencing from reStructuredText
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. highlight:: none
+
+To cross-reference the functions and types defined in the kernel-doc comments
+from reStructuredText documents, please use the `Sphinx C Domain`_
+references. For example::
+
+  See function :c:func:`foo` and struct/union/enum/typedef :c:type:`bar`.
+
+While the type reference works with just the type name, without the
+struct/union/enum/typedef part in front, you may want to use::
+
+  See :c:type:`struct foo <foo>`.
+  See :c:type:`union bar <bar>`.
+  See :c:type:`enum baz <baz>`.
+  See :c:type:`typedef meh <meh>`.
+
+This will produce prettier links, and is in line with how kernel-doc does the
+cross-references.
+
+For further details, please refer to the `Sphinx C Domain`_ documentation.
+
+Function documentation
+----------------------
+
+.. highlight:: c
+
+The general format of a function and function-like macro kernel-doc comment is::
+
+  /**
+   * function_name() - Brief description of function.
+   * @arg1: Describe the first argument.
+   * @arg2: Describe the second argument.
+   *        One can provide multiple line descriptions
+   *        for arguments.
+   *
+   * A longer description, with more discussion of the function function_name()
+   * that might be useful to those using or modifying it. Begins with an
+   * empty comment line, and may include additional embedded empty
+   * comment lines.
+   *
+   * The longer description may have multiple paragraphs.
+   *
+   * Return: Describe the return value of foobar.
+   *
+   * The return value description can also have multiple paragraphs, and should
+   * be placed at the end of the comment block.
+   */
+
+The brief description following the function name may span multiple lines, and
+ends with an ``@argument:`` description, a blank comment line, or the end of the
+comment block.
+
+The kernel-doc function comments describe each parameter to the function, in
+order, with the ``@argument:`` descriptions. The ``@argument:`` descriptions
+must begin on the very next line following the opening brief function
+description line, with no intervening blank comment lines. The ``@argument:``
+descriptions may span multiple lines. The continuation lines may contain
+indentation. If a function parameter is ``...`` (varargs), it should be listed
+in kernel-doc notation as: ``@...:``.
+
+The return value, if any, should be described in a dedicated section at the end
+of the comment starting with "Return:".
+
+Structure, union, and enumeration documentation
+-----------------------------------------------
+
+The general format of a struct, union, and enum kernel-doc comment is::
+
+  /**
+   * struct struct_name - Brief description.
+   * @member_name: Description of member member_name.
+   *
+   * Description of the structure.
+   */
+
+Below, "struct" is used to mean structs, unions and enums, and "member" is used
+to mean struct and union members as well as enumerations in an enum.
+
+The brief description following the structure name may span multiple lines, and
+ends with a ``@member:`` description, a blank comment line, or the end of the
+comment block.
+
+The kernel-doc data structure comments describe each member of the structure, in
+order, with the ``@member:`` descriptions. The ``@member:`` descriptions must
+begin on the very next line following the opening brief function description
+line, with no intervening blank comment lines. The ``@member:`` descriptions may
+span multiple lines. The continuation lines may contain indentation.
+
+In-line member documentation comments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The structure members may also be documented in-line within the definition::
+
+  /**
+   * struct foo - Brief description.
+   * @foo: The Foo member.
+   */
+  struct foo {
+        int foo;
+        /**
+         * @bar: The Bar member.
+         */
+        int bar;
+        /**
+         * @baz: The Baz member.
+         *
+         * Here, the member description may contain several paragraphs.
+         */
+        int baz;
+  }
+
+Private members
+~~~~~~~~~~~~~~~
+
+Inside a struct description, you can use the "private:" and "public:" comment
+tags. Structure fields that are inside a "private:" area are not listed in the
+generated output documentation.  The "private:" and "public:" tags must begin
+immediately following a ``/*`` comment marker.  They may optionally include
+comments between the ``:`` and the ending ``*/`` marker.
+
+Example::
+
+  /**
+   * struct my_struct - short description
+   * @a: first member
+   * @b: second member
+   *
+   * Longer description
+   */
+  struct my_struct {
+      int a;
+      int b;
+  /* private: internal use only */
+      int c;
+  };
+
+
+Typedef documentation
+---------------------
+
+The general format of a typedef kernel-doc comment is::
+
+  /**
+   * typedef type_name - Brief description.
+   *
+   * Description of the type.
+   */
+
+Overview documentation comments
+-------------------------------
+
+To facilitate having source code and comments close together, you can include
+kernel-doc documentation blocks that are free-form comments instead of being
+kernel-doc for functions, structures, unions, enums, or typedefs. This could be
+used for something like a theory of operation for a driver or library code, for
+example.
+
+This is done by using a ``DOC:`` section keyword with a section title.
+
+The general format of an overview or high-level documentation comment is::
+
+  /**
+   * DOC: Theory of Operation
+   *
+   * The whizbang foobar is a dilly of a gizmo. It can do whatever you
+   * want it to do, at any time. It reads your mind. Here's how it works.
+   *
+   * foo bar splat
+   *
+   * The only drawback to this gizmo is that is can sometimes damage
+   * hardware, software, or its subject(s).
+   */
+
+The title following ``DOC:`` acts as a heading within the source file, but also
+as an identifier for extracting the documentation comment. Thus, the title must
+be unique within the file.
+
+Recommendations
+---------------
+
+We definitely need kernel-doc formatted documentation for functions that are
+exported to loadable modules using ``EXPORT_SYMBOL`` or ``EXPORT_SYMBOL_GPL``.
+
+We also look to provide kernel-doc formatted documentation for functions
+externally visible to other kernel files (not marked "static").
+
+We also recommend providing kernel-doc formatted documentation for private (file
+"static") routines, for consistency of kernel source code layout. But this is
+lower priority and at the discretion of the MAINTAINER of that kernel source
+file.
+
+Data structures visible in kernel include files should also be documented using
+kernel-doc formatted comments.
+
+DocBook XML [DEPRECATED]
+========================
+
+.. attention::
+
+   This section describes the deprecated DocBook XML toolchain. Please do not
+   create new DocBook XML template files. Please consider converting existing
+   DocBook XML templates files to Sphinx/reStructuredText.
+
+Converting DocBook to Sphinx
+----------------------------
+
+.. highlight:: none
+
+Over time, we expect all of the documents under ``Documentation/DocBook`` to be
+converted to Sphinx and reStructuredText. For most DocBook XML documents, a good
+enough solution is to use the simple ``Documentation/sphinx/tmplcvt`` script,
+which uses ``pandoc`` under the hood. For example::
+
+  $ cd Documentation/sphinx
+  $ ./tmplcvt ../DocBook/in.tmpl ../out.rst
+
+Then edit the resulting rst files to fix any remaining issues, and add the
+document in the ``toctree`` in ``Documentation/index.rst``.
+
+Components of the kernel-doc system
+-----------------------------------
+
+Many places in the source tree have extractable documentation in the form of
+block comments above functions. The components of this system are:
+
+- ``scripts/kernel-doc``
+
+  This is a perl script that hunts for the block comments and can mark them up
+  directly into reStructuredText, DocBook, man, text, and HTML. (No, not
+  texinfo.)
+
+- ``Documentation/DocBook/*.tmpl``
+
+  These are XML template files, which are normal XML files with special
+  place-holders for where the extracted documentation should go.
+
+- ``scripts/docproc.c``
+
+  This is a program for converting XML template files into XML files. When a
+  file is referenced it is searched for symbols exported (EXPORT_SYMBOL), to be
+  able to distinguish between internal and external functions.
+
+  It invokes kernel-doc, giving it the list of functions that are to be
+  documented.
+
+  Additionally it is used to scan the XML template files to locate all the files
+  referenced herein. This is used to generate dependency information as used by
+  make.
+
+- ``Makefile``
+
+  The targets 'xmldocs', 'psdocs', 'pdfdocs', and 'htmldocs' are used to build
+  DocBook XML files, PostScript files, PDF files, and html files in
+  Documentation/DocBook. The older target 'sgmldocs' is equivalent to 'xmldocs'.
+
+- ``Documentation/DocBook/Makefile``
+
+  This is where C files are associated with SGML templates.
+
+How to use kernel-doc comments in DocBook XML template files
+------------------------------------------------------------
+
+DocBook XML template files (\*.tmpl) are like normal XML files, except that they
+can contain escape sequences where extracted documentation should be inserted.
+
+``!E<filename>`` is replaced by the documentation, in ``<filename>``, for
+functions that are exported using ``EXPORT_SYMBOL``: the function list is
+collected from files listed in ``Documentation/DocBook/Makefile``.
+
+``!I<filename>`` is replaced by the documentation for functions that are **not**
+exported using ``EXPORT_SYMBOL``.
+
+``!D<filename>`` is used to name additional files to search for functions
+exported using ``EXPORT_SYMBOL``.
+
+``!F<filename> <function [functions...]>`` is replaced by the documentation, in
+``<filename>``, for the functions listed.
+
+``!P<filename> <section title>`` is replaced by the contents of the ``DOC:``
+section titled ``<section title>`` from ``<filename>``. Spaces are allowed in
+``<section title>``; do not quote the ``<section title>``.
+
+``!C<filename>`` is replaced by nothing, but makes the tools check that all DOC:
+sections and documented functions, symbols, etc. are used. This makes sense to
+use when you use ``!F`` or ``!P`` only and want to verify that all documentation
+is included.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 82b42c958d1c..769db8399ac8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -582,6 +582,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	bootmem_debug	[KNL] Enable bootmem allocator debug messages.
 
+	bert_disable	[ACPI]
+			Disable BERT OS support on buggy BIOSes.
+
 	bttv.card=	[HW,V4L] bttv (bt848 + bt878 based grabber cards)
 	bttv.radio=	Most important insmod options are available as
 			kernel args too.
@@ -687,6 +690,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			[SPARC64] tick
 			[X86-64] hpet,tsc
 
+	clocksource.arm_arch_timer.evtstrm=
+			[ARM,ARM64]
+			Format: <bool>
+			Enable/disable the eventstream feature of the ARM
+			architected timer so that code using WFE-based polling
+			loops can be debugged more effectively on production
+			systems.
+
 	clearcpuid=BITNUM [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
@@ -1185,6 +1196,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Address Range Mirroring feature even if your box
 			doesn't support it.
 
+	efivar_ssdt=	[EFI; X86] Name of an EFI variable that contains an SSDT
+			that is to be dynamically loaded by Linux. If there are
+			multiple variables with the same name but with different
+			vendor GUIDs, all of them will be loaded. See
+			Documentation/acpi/ssdt-overlays.txt for details.
+
+
 	eisa_irq_edge=	[PARISC,HW]
 			See header of drivers/parisc/eisa.c.
 
@@ -1803,12 +1821,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	js=		[HW,JOY] Analog joystick
 			See Documentation/input/joystick.txt.
 
-	kaslr/nokaslr	[X86]
-			Enable/disable kernel and module base offset ASLR
-			(Address Space Layout Randomization) if built into
-			the kernel. When CONFIG_HIBERNATION is selected,
-			kASLR is disabled by default. When kASLR is enabled,
-			hibernation will be disabled.
+	nokaslr		[KNL]
+			When CONFIG_RANDOMIZE_BASE is set, this disables
+			kernel and module base offset ASLR (Address Space
+			Layout Randomization).
 
 	keepinitrd	[HW,ARM]
 
@@ -2788,8 +2804,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			timer: [X86] Force use of architectural NMI
 				timer mode (see also oprofile.timer
 				for generic hr timer mode)
-				[s390] Force legacy basic mode sampling
-                                (report cpu_type "timer")
 
 	oops=panic	Always panic on oopses. Default is to just kill the
 			process, but there is a small probability of
@@ -3594,6 +3608,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 				present during boot.
 		nocompress	Don't compress/decompress hibernation images.
 		no		Disable hibernation and resume.
+		protect_image	Turn on image protection during restoration
+				(that will set all pages holding image data
+				during restoration read-only).
 
 	retain_initrd	[RAM] Keep initrd memory after extraction
 
@@ -3992,8 +4009,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	trace_event=[event-list]
 			[FTRACE] Set and start specified trace events in order
-			to facilitate early boot debugging.
-			See also Documentation/trace/events.txt
+			to facilitate early boot debugging. The event-list is a
+			comma separated list of trace events to enable. See
+			also Documentation/trace/events.txt
 
 	trace_options=[option-list]
 			[FTRACE] Enable or disable tracer options at boot.
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 1a2ada46aaed..d6e2fcf27337 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -602,7 +602,7 @@ These currently include
 
   stripe_cache_size  (currently raid5 only)
       number of entries in the stripe cache.  This is writable, but
-      there are upper and lower limits (32768, 16).  Default is 128.
+      there are upper and lower limits (32768, 17).  Default is 256.
   strip_cache_active (currently raid5 only)
       number of active entries in the stripe cache
   preread_bypass_threshold (currently raid5 only)
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 147ae8ec836f..a4d0a99de04d 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -806,6 +806,41 @@ out-guess your code.  More generally, although READ_ONCE() does force
 the compiler to actually emit code for a given load, it does not force
 the compiler to use the results.
 
+In addition, control dependencies apply only to the then-clause and
+else-clause of the if-statement in question.  In particular, it does
+not necessarily apply to code following the if-statement:
+
+	q = READ_ONCE(a);
+	if (q) {
+		WRITE_ONCE(b, p);
+	} else {
+		WRITE_ONCE(b, r);
+	}
+	WRITE_ONCE(c, 1);  /* BUG: No ordering against the read from "a". */
+
+It is tempting to argue that there in fact is ordering because the
+compiler cannot reorder volatile accesses and also cannot reorder
+the writes to "b" with the condition.  Unfortunately for this line
+of reasoning, the compiler might compile the two writes to "b" as
+conditional-move instructions, as in this fanciful pseudo-assembly
+language:
+
+	ld r1,a
+	ld r2,p
+	ld r3,r
+	cmp r1,$0
+	cmov,ne r4,r2
+	cmov,eq r4,r3
+	st r4,b
+	st $1,c
+
+A weakly ordered CPU would have no dependency of any sort between the load
+from "a" and the store to "c".  The control dependencies would extend
+only to the pair of cmov instructions and the store depending on them.
+In short, control dependencies apply only to the stores in the then-clause
+and else-clause of the if-statement in question (including functions
+invoked by those two clauses), not to code following that if-statement.
+
 Finally, control dependencies do -not- provide transitivity.  This is
 demonstrated by two related examples, with the initial values of
 x and y both being zero:
@@ -869,6 +904,12 @@ In summary:
       atomic{,64}_read() can help to preserve your control dependency.
       Please see the COMPILER BARRIER section for more information.
 
+  (*) Control dependencies apply only to the then-clause and else-clause
+      of the if-statement containing the control dependency, including
+      any functions that these two clauses call.  Control dependencies
+      do -not- apply to code following the if-statement containing the
+      control dependency.
+
   (*) Control dependencies pair normally with other types of barriers.
 
   (*) Control dependencies do -not- provide transitivity.  If you
diff --git a/Documentation/mic/mpssd/mpssd.c b/Documentation/mic/mpssd/mpssd.c
index 30fb842a976d..49db1def1721 100644
--- a/Documentation/mic/mpssd/mpssd.c
+++ b/Documentation/mic/mpssd/mpssd.c
@@ -1538,9 +1538,9 @@ set_cmdline(struct mic_info *mic)
 
 	len = snprintf(buffer, PATH_MAX,
 		"clocksource=tsc highres=off nohz=off ");
-	len += snprintf(buffer + len, PATH_MAX,
+	len += snprintf(buffer + len, PATH_MAX - len,
 		"cpufreq_on;corec6_off;pc3_off;pc6_off ");
-	len += snprintf(buffer + len, PATH_MAX,
+	len += snprintf(buffer + len, PATH_MAX - len,
 		"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
 		mic->id + 1);
 
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index d58ff8467953..aa15b9ee2e70 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -31,6 +31,7 @@ This file contains
       4.2.4 Broadcast Manager message sequence transmission
       4.2.5 Broadcast Manager receive filter timers
       4.2.6 Broadcast Manager multiplex message receive filter
+      4.2.7 Broadcast Manager CAN FD support
     4.3 connected transport protocols (SOCK_SEQPACKET)
     4.4 unconnected transport protocols (SOCK_DGRAM)
 
@@ -799,7 +800,7 @@ solution for a couple of reasons:
     } mytxmsg;
 
     (..)
-    mytxmsg.nframes = 4;
+    mytxmsg.msg_head.nframes = 4;
     (..)
 
     write(s, &mytxmsg, sizeof(mytxmsg));
@@ -852,6 +853,28 @@ solution for a couple of reasons:
 
     write(s, &msg, sizeof(msg));
 
+  4.2.7 Broadcast Manager CAN FD support
+
+  The programming API of the CAN_BCM depends on struct can_frame which is
+  given as array directly behind the bcm_msg_head structure. To follow this
+  schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head
+  flags indicates that the concatenated CAN frame structures behind the
+  bcm_msg_head are defined as struct canfd_frame.
+
+    struct {
+            struct bcm_msg_head msg_head;
+            struct canfd_frame frame[5];
+    } msg;
+
+    msg.msg_head.opcode  = RX_SETUP;
+    msg.msg_head.can_id  = 0x42;
+    msg.msg_head.flags   = CAN_FD_FRAME;
+    msg.msg_head.nframes = 5;
+    (..)
+
+  When using CAN FD frames for multiplex filtering the MUX mask is still
+  expected in the first 64 bit of the struct canfd_frame data section.
+
   4.3 connected transport protocols (SOCK_SEQPACKET)
   4.4 unconnected transport protocols (SOCK_DGRAM)
 
diff --git a/Documentation/networking/gen_stats.txt b/Documentation/networking/gen_stats.txt
index ff630a87b511..179b18ce45ff 100644
--- a/Documentation/networking/gen_stats.txt
+++ b/Documentation/networking/gen_stats.txt
@@ -21,7 +21,7 @@ struct mystruct {
 	...
 };
 
-Update statistics:
+Update statistics, in dequeue() methods only, (while owning qdisc->running)
 mystruct->tstats.packet++;
 mystruct->qstats.backlog += skb->pkt_len;
 
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index f55599c62c9d..4fb51d32fccc 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -7,12 +7,13 @@ nf_conntrack_acct - BOOLEAN
 	Enable connection tracking flow accounting. 64-bit byte and packet
 	counters per flow are added.
 
-nf_conntrack_buckets - INTEGER (read-only)
+nf_conntrack_buckets - INTEGER
 	Size of hash table. If not specified as parameter during module
 	loading, the default size is calculated by dividing total memory
 	by 16384 to determine the number of buckets but the hash table will
 	never have fewer than 32 and limited to 16384 buckets. For systems
 	with more than 4GB of memory it will be 65536 buckets.
+	This sysctl is only writeable in the initial net namespace.
 
 nf_conntrack_checksum - BOOLEAN
 	0 - disabled
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
index 9d219d856d46..0235ae69af2a 100644
--- a/Documentation/networking/rds.txt
+++ b/Documentation/networking/rds.txt
@@ -85,7 +85,8 @@ Socket Interface
 
   bind(fd, &sockaddr_in, ...)
         This binds the socket to a local IP address and port, and a
-        transport.
+        transport, if one has not already been selected via the
+	SO_RDS_TRANSPORT socket option
 
   sendmsg(fd, ...)
         Sends a message to the indicated recipient. The kernel will
@@ -146,6 +147,20 @@ Socket Interface
         operation. In this case, it would use RDS_CANCEL_SENT_TO to
         nuke any pending messages.
 
+  setsockopt(fd, SOL_RDS, SO_RDS_TRANSPORT, (int *)&transport ..)
+  getsockopt(fd, SOL_RDS, SO_RDS_TRANSPORT, (int *)&transport ..)
+	Set or read an integer defining  the underlying
+	encapsulating transport to be used for RDS packets on the
+	socket. When setting the option, integer argument may be
+	one of RDS_TRANS_TCP or RDS_TRANS_IB. When retrieving the
+	value, RDS_TRANS_NONE will be returned on an unbound socket.
+	This socket option may only be set exactly once on the socket,
+	prior to binding it via the bind(2) system call. Attempts to
+	set SO_RDS_TRANSPORT on a socket for which the transport has
+	been previously attached explicitly (by SO_RDS_TRANSPORT) or
+	implicitly (via bind(2)) will return an error of EOPNOTSUPP.
+	An attempt to set SO_RDS_TRANSPPORT to RDS_TRANS_NONE will
+	always return EINVAL.
 
 RDMA for RDS
 ============
@@ -350,4 +365,59 @@ The recv path
     handle CMSGs
     return to application
 
+Multipath RDS (mprds)
+=====================
+  Mprds is multipathed-RDS, primarily intended for RDS-over-TCP
+  (though the concept can be extended to other transports). The classical
+  implementation of RDS-over-TCP is implemented by demultiplexing multiple
+  PF_RDS sockets between any 2 endpoints (where endpoint == [IP address,
+  port]) over a single TCP socket between the 2 IP addresses involved. This
+  has the limitation that it ends up funneling multiple RDS flows over a
+  single TCP flow, thus it is
+  (a) upper-bounded to the single-flow bandwidth,
+  (b) suffers from head-of-line blocking for all the RDS sockets.
+
+  Better throughput (for a fixed small packet size, MTU) can be achieved
+  by having multiple TCP/IP flows per rds/tcp connection, i.e., multipathed
+  RDS (mprds).  Each such TCP/IP flow constitutes a path for the rds/tcp
+  connection. RDS sockets will be attached to a path based on some hash
+  (e.g., of local address and RDS port number) and packets for that RDS
+  socket will be sent over the attached path using TCP to segment/reassemble
+  RDS datagrams on that path.
+
+  Multipathed RDS is implemented by splitting the struct rds_connection into
+  a common (to all paths) part, and a per-path struct rds_conn_path. All
+  I/O workqs and reconnect threads are driven from the rds_conn_path.
+  Transports such as TCP that are multipath capable may then set up a
+  TPC socket per rds_conn_path, and this is managed by the transport via
+  the transport privatee cp_transport_data pointer.
+
+  Transports announce themselves as multipath capable by setting the
+  t_mp_capable bit during registration with the rds core module. When the
+  transport is multipath-capable, rds_sendmsg() hashes outgoing traffic
+  across multiple paths. The outgoing hash is computed based on the
+  local address and port that the PF_RDS socket is bound to.
+
+  Additionally, even if the transport is MP capable, we may be
+  peering with some node that does not support mprds, or supports
+  a different number of paths. As a result, the peering nodes need
+  to agree on the number of paths to be used for the connection.
+  This is done by sending out a control packet exchange before the
+  first data packet. The control packet exchange must have completed
+  prior to outgoing hash completion in rds_sendmsg() when the transport
+  is mutlipath capable.
+
+  The control packet is an RDS ping packet (i.e., packet to rds dest
+  port 0) with the ping packet having a rds extension header option  of
+  type RDS_EXTHDR_NPATHS, length 2 bytes, and the value is the
+  number of paths supported by the sender. The "probe" ping packet will
+  get sent from some reserved port, RDS_FLAG_PROBE_PORT (in <linux/rds.h>)
+  The receiver of a ping from RDS_FLAG_PROBE_PORT will thus immediately
+  be able to compute the min(sender_paths, rcvr_paths). The pong
+  sent in response to a probe-ping should contain the rcvr's npaths
+  when the rcvr is mprds-capable.
+
+  If the rcvr is not mprds-capable, the exthdr in the ping will be
+  ignored.  In this case the pong will not have any exthdrs, so the sender
+  of the probe-ping can default to single-path mprds.
 
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 671fe3dd56d3..e226f8925c9e 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -285,6 +285,7 @@ Please see the following document:
  o mmc_core.c/mmc.h: Management MAC Counters;
  o stmmac_hwtstamp.c: HW timestamp support for PTP;
  o stmmac_ptp.c: PTP 1588 clock;
+ o stmmac_pcs.h: Physical Coding Sublayer common implementation;
  o dwmac-<XXX>.c: these are for the platform glue-logic file; e.g. dwmac-sti.c
    for STMicroelectronics SoCs.
 
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
index 5da679c573d2..755dab856392 100644
--- a/Documentation/networking/vrf.txt
+++ b/Documentation/networking/vrf.txt
@@ -15,9 +15,9 @@ the use of higher priority ip rules (Policy Based Routing, PBR) to take
 precedence over the VRF device rules directing specific traffic as desired.
 
 In addition, VRF devices allow VRFs to be nested within namespaces. For
-example network namespaces provide separation of network interfaces at L1
-(Layer 1 separation), VLANs on the interfaces within a namespace provide
-L2 separation and then VRF devices provide L3 separation.
+example network namespaces provide separation of network interfaces at the
+device layer, VLANs on the interfaces within a namespace provide L2 separation
+and then VRF devices provide L3 separation.
 
 Design
 ------
@@ -37,21 +37,22 @@ are then enslaved to a VRF device:
                               +------+ +------+
 
 Packets received on an enslaved device and are switched to the VRF device
-using an rx_handler which gives the impression that packets flow through
-the VRF device. Similarly on egress routing rules are used to send packets
-to the VRF device driver before getting sent out the actual interface. This
-allows tcpdump on a VRF device to capture all packets into and out of the
-VRF as a whole.[1] Similarly, netfilter [2] and tc rules can be applied
-using the VRF device to specify rules that apply to the VRF domain as a whole.
+in the IPv4 and IPv6 processing stacks giving the impression that packets
+flow through the VRF device. Similarly on egress routing rules are used to
+send packets to the VRF device driver before getting sent out the actual
+interface. This allows tcpdump on a VRF device to capture all packets into
+and out of the VRF as a whole.[1] Similarly, netfilter[2] and tc rules can be
+applied using the VRF device to specify rules that apply to the VRF domain
+as a whole.
 
 [1] Packets in the forwarded state do not flow through the device, so those
     packets are not seen by tcpdump. Will revisit this limitation in a
     future release.
 
-[2] Iptables on ingress is limited to NF_INET_PRE_ROUTING only with skb->dev
-    set to real ingress device and egress is limited to NF_INET_POST_ROUTING.
-    Will revisit this limitation in a future release.
-
+[2] Iptables on ingress supports PREROUTING with skb->dev set to the real
+    ingress device and both INPUT and PREROUTING rules with skb->dev set to
+    the VRF device. For egress POSTROUTING and OUTPUT rules can be written
+    using either the VRF device or real egress device.
 
 Setup
 -----
@@ -59,23 +60,33 @@ Setup
    e.g, ip link add vrf-blue type vrf table 10
         ip link set dev vrf-blue up
 
-2. Rules are added that send lookups to the associated FIB table when the
-   iif or oif is the VRF device. e.g.,
+2. An l3mdev FIB rule directs lookups to the table associated with the device.
+   A single l3mdev rule is sufficient for all VRFs. The VRF device adds the
+   l3mdev rule for IPv4 and IPv6 when the first device is created with a
+   default preference of 1000. Users may delete the rule if desired and add
+   with a different priority or install per-VRF rules.
+
+   Prior to the v4.8 kernel iif and oif rules are needed for each VRF device:
        ip ru add oif vrf-blue table 10
        ip ru add iif vrf-blue table 10
 
-   Set the default route for the table (and hence default route for the VRF).
-   e.g, ip route add table 10 prohibit default
+3. Set the default route for the table (and hence default route for the VRF).
+       ip route add table 10 unreachable default
 
-3. Enslave L3 interfaces to a VRF device.
-   e.g,  ip link set dev eth1 master vrf-blue
+4. Enslave L3 interfaces to a VRF device.
+       ip link set dev eth1 master vrf-blue
 
    Local and connected routes for enslaved devices are automatically moved to
    the table associated with VRF device. Any additional routes depending on
-   the enslaved device will need to be reinserted following the enslavement.
+   the enslaved device are dropped and will need to be reinserted to the VRF
+   FIB table following the enslavement.
+
+   The IPv6 sysctl option keep_addr_on_down can be enabled to keep IPv6 global
+   addresses as VRF enslavement changes.
+       sysctl -w net.ipv6.conf.all.keep_addr_on_down=1
 
-4. Additional VRF routes are added to associated table.
-   e.g., ip route add table 10 ...
+5. Additional VRF routes are added to associated table.
+       ip route add table 10 ...
 
 
 Applications
@@ -87,39 +98,34 @@ VRF device:
 
 or to specify the output device using cmsg and IP_PKTINFO.
 
+TCP services running in the default VRF context (ie., not bound to any VRF
+device) can work across all VRF domains by enabling the tcp_l3mdev_accept
+sysctl option:
+    sysctl -w net.ipv4.tcp_l3mdev_accept=1
 
-Limitations
------------
-Index of original ingress interface is not available via cmsg. Will address
-soon.
+netfilter rules on the VRF device can be used to limit access to services
+running in the default VRF context as well.
+
+The default VRF does not have limited scope with respect to port bindings.
+That is, if a process does a wildcard bind to a port in the default VRF it
+owns the port across all VRF domains within the network namespace.
 
 ################################################################################
 
 Using iproute2 for VRFs
 =======================
-VRF devices do *not* have to start with 'vrf-'. That is a convention used here
-for emphasis of the device type, similar to use of 'br' in bridge names.
+iproute2 supports the vrf keyword as of v4.7. For backwards compatibility this
+section lists both commands where appropriate -- with the vrf keyword and the
+older form without it.
 
 1. Create a VRF
 
    To instantiate a VRF device and associate it with a table:
        $ ip link add dev NAME type vrf table ID
 
-   Remember to add the ip rules as well:
-       $ ip ru add oif NAME table 10
-       $ ip ru add iif NAME table 10
-       $ ip -6 ru add oif NAME table 10
-       $ ip -6 ru add iif NAME table 10
-
-   Without the rules route lookups are not directed to the table.
-
-   For example:
-   $ ip link add dev vrf-blue type vrf table 10
-   $ ip ru add pref 200 oif vrf-blue table 10
-   $ ip ru add pref 200 iif vrf-blue table 10
-   $ ip -6 ru add pref 200 oif vrf-blue table 10
-   $ ip -6 ru add pref 200 iif vrf-blue table 10
-
+   As of v4.8 the kernel supports the l3mdev FIB rule where a single rule
+   covers all VRFs. The l3mdev rule is created for IPv4 and IPv6 on first
+   device create.
 
 2. List VRFs
 
@@ -129,16 +135,16 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
 
    For example:
    $ ip -d link show type vrf
-   11: vrf-mgmt: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+   11: mgmt: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
        link/ether 72:b3:ba:91:e2:24 brd ff:ff:ff:ff:ff:ff promiscuity 0
        vrf table 1 addrgenmode eui64
-   12: vrf-red: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+   12: red: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
        link/ether b6:6f:6e:f6:da:73 brd ff:ff:ff:ff:ff:ff promiscuity 0
        vrf table 10 addrgenmode eui64
-   13: vrf-blue: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+   13: blue: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
        link/ether 36:62:e8:7d:bb:8c brd ff:ff:ff:ff:ff:ff promiscuity 0
        vrf table 66 addrgenmode eui64
-   14: vrf-green: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+   14: green: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
        link/ether e6:28:b8:63:70:bb brd ff:ff:ff:ff:ff:ff promiscuity 0
        vrf table 81 addrgenmode eui64
 
@@ -146,43 +152,44 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
    Or in brief output:
 
    $ ip -br link show type vrf
-   vrf-mgmt         UP             72:b3:ba:91:e2:24 <NOARP,MASTER,UP,LOWER_UP>
-   vrf-red          UP             b6:6f:6e:f6:da:73 <NOARP,MASTER,UP,LOWER_UP>
-   vrf-blue         UP             36:62:e8:7d:bb:8c <NOARP,MASTER,UP,LOWER_UP>
-   vrf-green        UP             e6:28:b8:63:70:bb <NOARP,MASTER,UP,LOWER_UP>
+   mgmt         UP             72:b3:ba:91:e2:24 <NOARP,MASTER,UP,LOWER_UP>
+   red          UP             b6:6f:6e:f6:da:73 <NOARP,MASTER,UP,LOWER_UP>
+   blue         UP             36:62:e8:7d:bb:8c <NOARP,MASTER,UP,LOWER_UP>
+   green        UP             e6:28:b8:63:70:bb <NOARP,MASTER,UP,LOWER_UP>
 
 
 3. Assign a Network Interface to a VRF
 
    Network interfaces are assigned to a VRF by enslaving the netdevice to a
    VRF device:
-       $ ip link set dev NAME master VRF-NAME
+       $ ip link set dev NAME master NAME
 
    On enslavement connected and local routes are automatically moved to the
    table associated with the VRF device.
 
    For example:
-   $ ip link set dev eth0 master vrf-mgmt
+   $ ip link set dev eth0 master mgmt
 
 
 4. Show Devices Assigned to a VRF
 
    To show devices that have been assigned to a specific VRF add the master
    option to the ip command:
-       $ ip link show master VRF-NAME
+       $ ip link show vrf NAME
+       $ ip link show master NAME
 
    For example:
-   $ ip link show master vrf-red
-   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP mode DEFAULT group default qlen 1000
+   $ ip link show vrf red
+   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
        link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
-   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP mode DEFAULT group default qlen 1000
+   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
        link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
-   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master vrf-red state DOWN mode DEFAULT group default qlen 1000
+   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN mode DEFAULT group default qlen 1000
        link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
 
 
    Or using the brief output:
-   $ ip -br link show master vrf-red
+   $ ip -br link show vrf red
    eth1             UP             02:00:00:00:02:02 <BROADCAST,MULTICAST,UP,LOWER_UP>
    eth2             UP             02:00:00:00:02:03 <BROADCAST,MULTICAST,UP,LOWER_UP>
    eth5             DOWN           02:00:00:00:02:06 <BROADCAST,MULTICAST>
@@ -192,26 +199,28 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
 
    To list neighbor entries associated with devices enslaved to a VRF device
    add the master option to the ip command:
-       $ ip [-6] neigh show master VRF-NAME
+       $ ip [-6] neigh show vrf NAME
+       $ ip [-6] neigh show master NAME
 
    For example:
-   $  ip neigh show master vrf-red
+   $  ip neigh show vrf red
    10.2.1.254 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
    10.2.2.254 dev eth2 lladdr 5e:54:01:6a:ee:80 REACHABLE
 
-    $ ip -6 neigh show master vrf-red
-    2002:1::64 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
+   $ ip -6 neigh show vrf red
+   2002:1::64 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
 
 
 6. Show Addresses for a VRF
 
    To show addresses for interfaces associated with a VRF add the master
    option to the ip command:
-       $ ip addr show master VRF-NAME
+       $ ip addr show vrf NAME
+       $ ip addr show master NAME
 
    For example:
-   $ ip addr show master vrf-red
-   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP group default qlen 1000
+   $ ip addr show vrf red
+   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
        link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
        inet 10.2.1.2/24 brd 10.2.1.255 scope global eth1
           valid_lft forever preferred_lft forever
@@ -219,7 +228,7 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
           valid_lft forever preferred_lft forever
        inet6 fe80::ff:fe00:202/64 scope link
           valid_lft forever preferred_lft forever
-   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP group default qlen 1000
+   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
        link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
        inet 10.2.2.2/24 brd 10.2.2.255 scope global eth2
           valid_lft forever preferred_lft forever
@@ -227,11 +236,11 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
           valid_lft forever preferred_lft forever
        inet6 fe80::ff:fe00:203/64 scope link
           valid_lft forever preferred_lft forever
-   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master vrf-red state DOWN group default qlen 1000
+   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN group default qlen 1000
        link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
 
    Or in brief format:
-   $ ip -br addr show master vrf-red
+   $ ip -br addr show vrf red
    eth1             UP             10.2.1.2/24 2002:1::2/120 fe80::ff:fe00:202/64
    eth2             UP             10.2.2.2/24 2002:2::2/120 fe80::ff:fe00:203/64
    eth5             DOWN
@@ -241,10 +250,11 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
 
    To show routes for a VRF use the ip command to display the table associated
    with the VRF device:
+       $ ip [-6] route show vrf NAME
        $ ip [-6] route show table ID
 
    For example:
-   $ ip route show table vrf-red
+   $ ip route show vrf red
    prohibit default
    broadcast 10.2.1.0 dev eth1  proto kernel  scope link  src 10.2.1.2
    10.2.1.0/24 dev eth1  proto kernel  scope link  src 10.2.1.2
@@ -255,7 +265,7 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
    local 10.2.2.2 dev eth2  proto kernel  scope host  src 10.2.2.2
    broadcast 10.2.2.255 dev eth2  proto kernel  scope link  src 10.2.2.2
 
-   $ ip -6 route show table vrf-red
+   $ ip -6 route show vrf red
    local 2002:1:: dev lo  proto none  metric 0  pref medium
    local 2002:1::2 dev lo  proto none  metric 0  pref medium
    2002:1::/120 dev eth1  proto kernel  metric 256  pref medium
@@ -268,23 +278,24 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
    local fe80::ff:fe00:203 dev lo  proto none  metric 0  pref medium
    fe80::/64 dev eth1  proto kernel  metric 256  pref medium
    fe80::/64 dev eth2  proto kernel  metric 256  pref medium
-   ff00::/8 dev vrf-red  metric 256  pref medium
+   ff00::/8 dev red  metric 256  pref medium
    ff00::/8 dev eth1  metric 256  pref medium
    ff00::/8 dev eth2  metric 256  pref medium
 
 
 8. Route Lookup for a VRF
 
-   A test route lookup can be done for a VRF by adding the oif option to ip:
-       $ ip [-6] route get oif VRF-NAME ADDRESS
+   A test route lookup can be done for a VRF:
+       $ ip [-6] route get vrf NAME ADDRESS
+       $ ip [-6] route get oif NAME ADDRESS
 
    For example:
-   $ ip route get 10.2.1.40 oif vrf-red
-   10.2.1.40 dev eth1  table vrf-red  src 10.2.1.2
+   $ ip route get 10.2.1.40 vrf red
+   10.2.1.40 dev eth1  table red  src 10.2.1.2
        cache
 
-   $ ip -6 route get 2002:1::32 oif vrf-red
-   2002:1::32 from :: dev eth1  table vrf-red  proto kernel  src 2002:1::2  metric 256  pref medium
+   $ ip -6 route get 2002:1::32 vrf red
+   2002:1::32 from :: dev eth1  table red  proto kernel  src 2002:1::2  metric 256  pref medium
 
 
 9. Removing Network Interface from a VRF
@@ -303,46 +314,40 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
 
 Commands used in this example:
 
-cat >> /etc/iproute2/rt_tables <<EOF
-1  vrf-mgmt
-10 vrf-red
-66 vrf-blue
-81 vrf-green
+cat >> /etc/iproute2/rt_tables.d/vrf.conf <<EOF
+1  mgmt
+10 red
+66 blue
+81 green
 EOF
 
 function vrf_create
 {
     VRF=$1
     TBID=$2
-    # create VRF device
-    ip link add vrf-${VRF} type vrf table ${TBID}
 
-    # add rules that direct lookups to vrf table
-    ip ru add pref 200 oif vrf-${VRF} table ${TBID}
-    ip ru add pref 200 iif vrf-${VRF} table ${TBID}
-    ip -6 ru add pref 200 oif vrf-${VRF} table ${TBID}
-    ip -6 ru add pref 200 iif vrf-${VRF} table ${TBID}
+    # create VRF device
+    ip link add ${VRF} type vrf table ${TBID}
 
     if [ "${VRF}" != "mgmt" ]; then
-        ip route add table ${TBID} prohibit default
+        ip route add table ${TBID} unreachable default
     fi
-    ip link set dev vrf-${VRF} up
-    ip link set dev vrf-${VRF} state up
+    ip link set dev ${VRF} up
 }
 
 vrf_create mgmt 1
-ip link set dev eth0 master vrf-mgmt
+ip link set dev eth0 master mgmt
 
 vrf_create red 10
-ip link set dev eth1 master vrf-red
-ip link set dev eth2 master vrf-red
-ip link set dev eth5 master vrf-red
+ip link set dev eth1 master red
+ip link set dev eth2 master red
+ip link set dev eth5 master red
 
 vrf_create blue 66
-ip link set dev eth3 master vrf-blue
+ip link set dev eth3 master blue
 
 vrf_create green 81
-ip link set dev eth4 master vrf-green
+ip link set dev eth4 master green
 
 
 Interface addresses from /etc/network/interfaces:
diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt
index 5d8675615e59..9264bcab4099 100644
--- a/Documentation/ramoops.txt
+++ b/Documentation/ramoops.txt
@@ -45,7 +45,7 @@ corrupt, but usually it is restorable.
 
 2. Setting the parameters
 
-Setting the ramoops parameters can be done in 2 different manners:
+Setting the ramoops parameters can be done in 3 different manners:
  1. Use the module parameters (which have the names of the variables described
  as before).
  For quick debugging, you can also reserve parts of memory during boot
@@ -54,7 +54,9 @@ Setting the ramoops parameters can be done in 2 different manners:
  kernel to use only the first 128 MB of memory, and place ECC-protected ramoops
  region at 128 MB boundary:
  "mem=128M ramoops.mem_address=0x8000000 ramoops.ecc=1"
- 2. Use a platform device and set the platform data. The parameters can then
+ 2. Use Device Tree bindings, as described in
+ Documentation/device-tree/bindings/misc/ramoops.txt.
+ 3. Use a platform device and set the platform data. The parameters can then
  be set through that platform data. An example of doing that is:
 
 #include <linux/pstore_ram.h>
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index 3da163383c93..61329fd62e89 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -405,7 +405,7 @@ Example:
 
 > ls /sys/kernel/debug/s390dbf/dasd
 flush  hex_ascii  level pages raw
-> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort +1
+> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
 00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
 00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
 00 00974733272:682213 2 - 02 0006adf6  07 ea 4a 90 | ....
diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 8638f61c8c9d..37eca00796ee 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -263,19 +263,23 @@ scmd->allowed.
 
  3. scmd recovered
     ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd
-	- shost->host_failed--
 	- clear scmd->eh_eflags
 	- scsi_setup_cmd_retry()
 	- move from local eh_work_q to local eh_done_q
     LOCKING: none
+    CONCURRENCY: at most one thread per separate eh_work_q to
+		 keep queue manipulation lockless
 
  4. EH completes
     ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper
-	    layer of failure.
+	    layer of failure. May be called concurrently but must have
+	    a no more than one thread per separate eh_work_q to
+	    manipulate the queue locklessly
 	- scmd is removed from eh_done_q and scmd->eh_entry is cleared
 	- if retry is necessary, scmd is requeued using
           scsi_queue_insert()
 	- otherwise, scsi_finish_command() is invoked for scmd
+	- zero shost->host_failed
     LOCKING: queue or finish function performs appropriate locking
 
 
diff --git a/Documentation/security/self-protection.txt b/Documentation/security/self-protection.txt
index babd6378ec05..3010576c9fca 100644
--- a/Documentation/security/self-protection.txt
+++ b/Documentation/security/self-protection.txt
@@ -183,8 +183,9 @@ provide meaningful defenses.
 ### Canaries, blinding, and other secrets
 
 It should be noted that things like the stack canary discussed earlier
-are technically statistical defenses, since they rely on a (leakable)
-secret value.
+are technically statistical defenses, since they rely on a secret value,
+and such values may become discoverable through an information exposure
+flaw.
 
 Blinding literal values for things like JITs, where the executable
 contents may be partially under the control of userspace, need a similar
@@ -199,8 +200,8 @@ working?) in order to maximize their success.
 Since the location of kernel memory is almost always instrumental in
 mounting a successful attack, making the location non-deterministic
 raises the difficulty of an exploit. (Note that this in turn makes
-the value of leaks higher, since they may be used to discover desired
-memory locations.)
+the value of information exposures higher, since they may be used to
+discover desired memory locations.)
 
 #### Text and module base
 
@@ -222,14 +223,21 @@ become more difficult to locate.
 Much of the kernel's dynamic memory (e.g. kmalloc, vmalloc, etc) ends up
 being relatively deterministic in layout due to the order of early-boot
 initializations. If the base address of these areas is not the same
-between boots, targeting them is frustrated, requiring a leak specific
-to the region.
+between boots, targeting them is frustrated, requiring an information
+exposure specific to the region.
+
+#### Structure layout
+
+By performing a per-build randomization of the layout of sensitive
+structures, attacks must either be tuned to known kernel builds or expose
+enough kernel memory to determine structure layouts before manipulating
+them.
 
 
-## Preventing Leaks
+## Preventing Information Exposures
 
 Since the locations of sensitive structures are the primary target for
-attacks, it is important to defend against leaks of both kernel memory
+attacks, it is important to defend against exposure of both kernel memory
 addresses and kernel memory contents (since they may contain kernel
 addresses or other sensitive things like canary values).
 
@@ -250,8 +258,8 @@ sure structure holes are cleared.
 When releasing memory, it is best to poison the contents (clear stack on
 syscall return, wipe heap memory on a free), to avoid reuse attacks that
 rely on the old contents of memory. This frustrates many uninitialized
-variable attacks, stack info leaks, heap info leaks, and use-after-free
-attacks.
+variable attacks, stack content exposures, heap content exposures, and
+use-after-free attacks.
 
 ### Destination tracking
 
diff --git a/Documentation/sphinx/convert_template.sed b/Documentation/sphinx/convert_template.sed
new file mode 100644
index 000000000000..c1503fcca4ec
--- /dev/null
+++ b/Documentation/sphinx/convert_template.sed
@@ -0,0 +1,18 @@
+#
+# Pandoc doesn't grok <function> or <structname>, so convert them
+# ahead of time.
+#
+# Use the following escapes to pass through pandoc:
+#	$bq = "`"
+#	$lt = "<"
+#	$gt = ">"
+#
+s%<function>\([^<(]\+\)()</function>%:c:func:$bq\1()$bq%g
+s%<function>\([^<(]\+\)</function>%:c:func:$bq\1()$bq%g
+s%<structname>struct *\([^<]\+\)</structname>%:c:type:$bqstruct \1 $lt\1$gt$bq%g
+s%struct <structname>\([^<]\+\)</structname>%:c:type:$bqstruct \1 $lt\1$gt$bq%g
+s%<structname>\([^<]\+\)</structname>%:c:type:$bqstruct \1 $lt\1$gt$bq%g
+#
+# Wrap docproc directives in para and code blocks.
+#
+s%^\(!.*\)$%<para><code>DOCPROC: \1</code></para>%
diff --git a/Documentation/sphinx/kernel-doc.py b/Documentation/sphinx/kernel-doc.py
new file mode 100644
index 000000000000..f6920c0af6ee
--- /dev/null
+++ b/Documentation/sphinx/kernel-doc.py
@@ -0,0 +1,141 @@
+# coding=utf-8
+#
+# Copyright Â© 2016 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Jani Nikula <jani.nikula@intel.com>
+#
+# Please make sure this works on both python2 and python3.
+#
+
+import os
+import subprocess
+import sys
+import re
+import glob
+
+from docutils import nodes, statemachine
+from docutils.statemachine import ViewList
+from docutils.parsers.rst import directives
+from sphinx.util.compat import Directive
+from sphinx.ext.autodoc import AutodocReporter
+
+class KernelDocDirective(Directive):
+    """Extract kernel-doc comments from the specified file"""
+    required_argument = 1
+    optional_arguments = 4
+    option_spec = {
+        'doc': directives.unchanged_required,
+        'functions': directives.unchanged_required,
+        'export': directives.unchanged,
+        'internal': directives.unchanged,
+    }
+    has_content = False
+
+    def run(self):
+        env = self.state.document.settings.env
+        cmd = [env.config.kerneldoc_bin, '-rst', '-enable-lineno']
+
+        filename = env.config.kerneldoc_srctree + '/' + self.arguments[0]
+        export_file_patterns = []
+
+        # Tell sphinx of the dependency
+        env.note_dependency(os.path.abspath(filename))
+
+        tab_width = self.options.get('tab-width', self.state.document.settings.tab_width)
+
+        # FIXME: make this nicer and more robust against errors
+        if 'export' in self.options:
+            cmd += ['-export']
+            export_file_patterns = str(self.options.get('export')).split()
+        elif 'internal' in self.options:
+            cmd += ['-internal']
+            export_file_patterns = str(self.options.get('internal')).split()
+        elif 'doc' in self.options:
+            cmd += ['-function', str(self.options.get('doc'))]
+        elif 'functions' in self.options:
+            for f in str(self.options.get('functions')).split():
+                cmd += ['-function', f]
+
+        for pattern in export_file_patterns:
+            for f in glob.glob(env.config.kerneldoc_srctree + '/' + pattern):
+                env.note_dependency(os.path.abspath(f))
+                cmd += ['-export-file', f]
+
+        cmd += [filename]
+
+        try:
+            env.app.verbose('calling kernel-doc \'%s\'' % (" ".join(cmd)))
+
+            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+            out, err = p.communicate()
+
+            # python2 needs conversion to unicode.
+            # python3 with universal_newlines=True returns strings.
+            if sys.version_info.major < 3:
+                out, err = unicode(out, 'utf-8'), unicode(err, 'utf-8')
+
+            if p.returncode != 0:
+                sys.stderr.write(err)
+
+                env.app.warn('kernel-doc \'%s\' failed with return code %d' % (" ".join(cmd), p.returncode))
+                return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))]
+            elif env.config.kerneldoc_verbosity > 0:
+                sys.stderr.write(err)
+
+            lines = statemachine.string2lines(out, tab_width, convert_whitespace=True)
+            result = ViewList()
+
+            lineoffset = 0;
+            line_regex = re.compile("^#define LINENO ([0-9]+)$")
+            for line in lines:
+                match = line_regex.search(line)
+                if match:
+                    # sphinx counts lines from 0
+                    lineoffset = int(match.group(1)) - 1
+                    # we must eat our comments since the upset the markup
+                else:
+                    result.append(line, filename, lineoffset)
+                    lineoffset += 1
+
+            node = nodes.section()
+            buf = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter
+            self.state.memo.reporter = AutodocReporter(result, self.state.memo.reporter)
+            self.state.memo.title_styles, self.state.memo.section_level = [], 0
+            try:
+                self.state.nested_parse(result, 0, node, match_titles=1)
+            finally:
+                self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = buf
+
+            return node.children
+
+        except Exception as e:  # pylint: disable=W0703
+            env.app.warn('kernel-doc \'%s\' processing failed with: %s' %
+                         (" ".join(cmd), str(e)))
+            return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))]
+
+def setup(app):
+    app.add_config_value('kerneldoc_bin', None, 'env')
+    app.add_config_value('kerneldoc_srctree', None, 'env')
+    app.add_config_value('kerneldoc_verbosity', 1, 'env')
+
+    app.add_directive('kernel-doc', KernelDocDirective)
diff --git a/Documentation/sphinx/post_convert.sed b/Documentation/sphinx/post_convert.sed
new file mode 100644
index 000000000000..392770bac53b
--- /dev/null
+++ b/Documentation/sphinx/post_convert.sed
@@ -0,0 +1,23 @@
+#
+# Unescape.
+#
+s/$bq/`/g
+s/$lt/</g
+s/$gt/>/g
+#
+# pandoc thinks that both "_" needs to be escaped.  Remove the extra
+# backslashes.
+#
+s/\\_/_/g
+#
+# Unwrap docproc directives.
+#
+s/^``DOCPROC: !E\(.*\)``$/.. kernel-doc:: \1\n   :export:/
+s/^``DOCPROC: !I\(.*\)``$/.. kernel-doc:: \1\n   :internal:/
+s/^``DOCPROC: !F\([^ ]*\) \(.*\)``$/.. kernel-doc:: \1\n   :functions: \2/
+s/^``DOCPROC: !P\([^ ]*\) \(.*\)``$/.. kernel-doc:: \1\n   :doc: \2/
+s/^``DOCPROC: \(!.*\)``$/.. WARNING: DOCPROC directive not supported: \1/
+#
+# Trim trailing whitespace.
+#
+s/[[:space:]]*$//
diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py
new file mode 100644
index 000000000000..26db852e3c74
--- /dev/null
+++ b/Documentation/sphinx/rstFlatTable.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8; mode: python -*-
+# pylint: disable=C0330, R0903, R0912
+
+u"""
+    flat-table
+    ~~~~~~~~~~
+
+    Implementation of the ``flat-table`` reST-directive.
+
+    :copyright:  Copyright (C) 2016  Markus Heiser
+    :license:    GPL Version 2, June 1991 see linux/COPYING for details.
+
+    The ``flat-table`` (:py:class:`FlatTable`) is a double-stage list similar to
+    the ``list-table`` with some additional features:
+
+    * *column-span*: with the role ``cspan`` a cell can be extended through
+      additional columns
+
+    * *row-span*: with the role ``rspan`` a cell can be extended through
+      additional rows
+
+    * *auto span* rightmost cell of a table row over the missing cells on the
+      right side of that table-row.  With Option ``:fill-cells:`` this behavior
+      can changed from *auto span* to *auto fill*, which automaticly inserts
+      (empty) cells instead of spanning the last cell.
+
+    Options:
+
+    * header-rows:   [int] count of header rows
+    * stub-columns:  [int] count of stub columns
+    * widths:        [[int] [int] ... ] widths of columns
+    * fill-cells:    instead of autospann missing cells, insert missing cells
+
+    roles:
+
+    * cspan: [int] additionale columns (*morecols*)
+    * rspan: [int] additionale rows (*morerows*)
+"""
+
+# ==============================================================================
+# imports
+# ==============================================================================
+
+import sys
+
+from docutils import nodes
+from docutils.parsers.rst import directives, roles
+from docutils.parsers.rst.directives.tables import Table
+from docutils.utils import SystemMessagePropagation
+
+# ==============================================================================
+# common globals
+# ==============================================================================
+
+# The version numbering follows numbering of the specification
+# (Documentation/books/kernel-doc-HOWTO).
+__version__  = '1.0'
+
+PY3 = sys.version_info[0] == 3
+PY2 = sys.version_info[0] == 2
+
+if PY3:
+    # pylint: disable=C0103, W0622
+    unicode     = str
+    basestring  = str
+
+# ==============================================================================
+def setup(app):
+# ==============================================================================
+
+    app.add_directive("flat-table", FlatTable)
+    roles.register_local_role('cspan', c_span)
+    roles.register_local_role('rspan', r_span)
+
+# ==============================================================================
+def c_span(name, rawtext, text, lineno, inliner, options=None, content=None):
+# ==============================================================================
+    # pylint: disable=W0613
+
+    options  = options if options is not None else {}
+    content  = content if content is not None else []
+    nodelist = [colSpan(span=int(text))]
+    msglist  = []
+    return nodelist, msglist
+
+# ==============================================================================
+def r_span(name, rawtext, text, lineno, inliner, options=None, content=None):
+# ==============================================================================
+    # pylint: disable=W0613
+
+    options  = options if options is not None else {}
+    content  = content if content is not None else []
+    nodelist = [rowSpan(span=int(text))]
+    msglist  = []
+    return nodelist, msglist
+
+
+# ==============================================================================
+class rowSpan(nodes.General, nodes.Element): pass # pylint: disable=C0103,C0321
+class colSpan(nodes.General, nodes.Element): pass # pylint: disable=C0103,C0321
+# ==============================================================================
+
+# ==============================================================================
+class FlatTable(Table):
+# ==============================================================================
+
+    u"""FlatTable (``flat-table``) directive"""
+
+    option_spec = {
+        'name': directives.unchanged
+        , 'class': directives.class_option
+        , 'header-rows': directives.nonnegative_int
+        , 'stub-columns': directives.nonnegative_int
+        , 'widths': directives.positive_int_list
+        , 'fill-cells' : directives.flag }
+
+    def run(self):
+
+        if not self.content:
+            error = self.state_machine.reporter.error(
+                'The "%s" directive is empty; content required.' % self.name,
+                nodes.literal_block(self.block_text, self.block_text),
+                line=self.lineno)
+            return [error]
+
+        title, messages = self.make_title()
+        node = nodes.Element()          # anonymous container for parsing
+        self.state.nested_parse(self.content, self.content_offset, node)
+
+        tableBuilder = ListTableBuilder(self)
+        tableBuilder.parseFlatTableNode(node)
+        tableNode = tableBuilder.buildTableNode()
+        # SDK.CONSOLE()  # print --> tableNode.asdom().toprettyxml()
+        if title:
+            tableNode.insert(0, title)
+        return [tableNode] + messages
+
+
+# ==============================================================================
+class ListTableBuilder(object):
+# ==============================================================================
+
+    u"""Builds a table from a double-stage list"""
+
+    def __init__(self, directive):
+        self.directive = directive
+        self.rows      = []
+        self.max_cols  = 0
+
+    def buildTableNode(self):
+
+        colwidths    = self.directive.get_column_widths(self.max_cols)
+        stub_columns = self.directive.options.get('stub-columns', 0)
+        header_rows  = self.directive.options.get('header-rows', 0)
+
+        table = nodes.table()
+        tgroup = nodes.tgroup(cols=len(colwidths))
+        table += tgroup
+
+
+        for colwidth in colwidths:
+            colspec = nodes.colspec(colwidth=colwidth)
+            # FIXME: It seems, that the stub method only works well in the
+            # absence of rowspan (observed by the html buidler, the docutils-xml
+            # build seems OK).  This is not extraordinary, because there exists
+            # no table directive (except *this* flat-table) which allows to
+            # define coexistent of rowspan and stubs (there was no use-case
+            # before flat-table). This should be reviewed (later).
+            if stub_columns:
+                colspec.attributes['stub'] = 1
+                stub_columns -= 1
+            tgroup += colspec
+        stub_columns = self.directive.options.get('stub-columns', 0)
+
+        if header_rows:
+            thead = nodes.thead()
+            tgroup += thead
+            for row in self.rows[:header_rows]:
+                thead += self.buildTableRowNode(row)
+
+        tbody = nodes.tbody()
+        tgroup += tbody
+
+        for row in self.rows[header_rows:]:
+            tbody += self.buildTableRowNode(row)
+        return table
+
+    def buildTableRowNode(self, row_data, classes=None):
+        classes = [] if classes is None else classes
+        row = nodes.row()
+        for cell in row_data:
+            if cell is None:
+                continue
+            cspan, rspan, cellElements = cell
+
+            attributes = {"classes" : classes}
+            if rspan:
+                attributes['morerows'] = rspan
+            if cspan:
+                attributes['morecols'] = cspan
+            entry = nodes.entry(**attributes)
+            entry.extend(cellElements)
+            row += entry
+        return row
+
+    def raiseError(self, msg):
+        error =  self.directive.state_machine.reporter.error(
+            msg
+            , nodes.literal_block(self.directive.block_text
+                                  , self.directive.block_text)
+            , line = self.directive.lineno )
+        raise SystemMessagePropagation(error)
+
+    def parseFlatTableNode(self, node):
+        u"""parses the node from a :py:class:`FlatTable` directive's body"""
+
+        if len(node) != 1 or not isinstance(node[0], nodes.bullet_list):
+            self.raiseError(
+                'Error parsing content block for the "%s" directive: '
+                'exactly one bullet list expected.' % self.directive.name )
+
+        for rowNum, rowItem in enumerate(node[0]):
+            row = self.parseRowItem(rowItem, rowNum)
+            self.rows.append(row)
+        self.roundOffTableDefinition()
+
+    def roundOffTableDefinition(self):
+        u"""Round off the table definition.
+
+        This method rounds off the table definition in :py:member:`rows`.
+
+        * This method inserts the needed ``None`` values for the missing cells
+        arising from spanning cells over rows and/or columns.
+
+        * recount the :py:member:`max_cols`
+
+        * Autospan or fill (option ``fill-cells``) missing cells on the right
+          side of the table-row
+        """
+
+        y = 0
+        while y < len(self.rows):
+            x = 0
+
+            while x < len(self.rows[y]):
+                cell = self.rows[y][x]
+                if cell is None:
+                    x += 1
+                    continue
+                cspan, rspan = cell[:2]
+                # handle colspan in current row
+                for c in range(cspan):
+                    try:
+                        self.rows[y].insert(x+c+1, None)
+                    except: # pylint: disable=W0702
+                        # the user sets ambiguous rowspans
+                        pass # SDK.CONSOLE()
+                # handle colspan in spanned rows
+                for r in range(rspan):
+                    for c in range(cspan + 1):
+                        try:
+                            self.rows[y+r+1].insert(x+c, None)
+                        except: # pylint: disable=W0702
+                            # the user sets ambiguous rowspans
+                            pass # SDK.CONSOLE()
+                x += 1
+            y += 1
+
+        # Insert the missing cells on the right side. For this, first
+        # re-calculate the max columns.
+
+        for row in self.rows:
+            if self.max_cols < len(row):
+                self.max_cols = len(row)
+
+        # fill with empty cells or cellspan?
+
+        fill_cells = False
+        if 'fill-cells' in self.directive.options:
+            fill_cells = True
+
+        for row in self.rows:
+            x =  self.max_cols - len(row)
+            if x and not fill_cells:
+                if row[-1] is None:
+                    row.append( ( x - 1, 0, []) )
+                else:
+                    cspan, rspan, content = row[-1]
+                    row[-1] = (cspan + x, rspan, content)
+            elif x and fill_cells:
+                for i in range(x):
+                    row.append( (0, 0, nodes.comment()) )
+
+    def pprint(self):
+        # for debugging
+        retVal = "[   "
+        for row in self.rows:
+            retVal += "[ "
+            for col in row:
+                if col is None:
+                    retVal += ('%r' % col)
+                    retVal += "\n    , "
+                else:
+                    content = col[2][0].astext()
+                    if len (content) > 30:
+                        content = content[:30] + "..."
+                    retVal += ('(cspan=%s, rspan=%s, %r)'
+                               % (col[0], col[1], content))
+                    retVal += "]\n    , "
+            retVal = retVal[:-2]
+            retVal += "]\n  , "
+        retVal = retVal[:-2]
+        return retVal + "]"
+
+    def parseRowItem(self, rowItem, rowNum):
+        row = []
+        childNo = 0
+        error   = False
+        cell    = None
+        target  = None
+
+        for child in rowItem:
+            if (isinstance(child , nodes.comment)
+                or isinstance(child, nodes.system_message)):
+                pass
+            elif isinstance(child , nodes.target):
+                target = child
+            elif isinstance(child, nodes.bullet_list):
+                childNo += 1
+                cell = child
+            else:
+                error = True
+                break
+
+        if childNo != 1 or error:
+            self.raiseError(
+                'Error parsing content block for the "%s" directive: '
+                'two-level bullet list expected, but row %s does not '
+                'contain a second-level bullet list.'
+                % (self.directive.name, rowNum + 1))
+
+        for cellItem in cell:
+            cspan, rspan, cellElements = self.parseCellItem(cellItem)
+            if target is not None:
+                cellElements.insert(0, target)
+            row.append( (cspan, rspan, cellElements) )
+        return row
+
+    def parseCellItem(self, cellItem):
+        # search and remove cspan, rspan colspec from the first element in
+        # this listItem (field).
+        cspan = rspan = 0
+        if not len(cellItem):
+            return cspan, rspan, []
+        for elem in cellItem[0]:
+            if isinstance(elem, colSpan):
+                cspan = elem.get("span")
+                elem.parent.remove(elem)
+                continue
+            if isinstance(elem, rowSpan):
+                rspan = elem.get("span")
+                elem.parent.remove(elem)
+                continue
+        return cspan, rspan, cellItem[:]
diff --git a/Documentation/sphinx/tmplcvt b/Documentation/sphinx/tmplcvt
new file mode 100755
index 000000000000..909a73065e0a
--- /dev/null
+++ b/Documentation/sphinx/tmplcvt
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Convert a template file into something like RST
+#
+# fix <function>
+# feed to pandoc
+# fix \_
+# title line?
+#
+
+in=$1
+rst=$2
+tmp=$rst.tmp
+
+cp $in $tmp
+sed --in-place -f convert_template.sed $tmp
+pandoc -s -S -f docbook -t rst -o $rst $tmp
+sed --in-place -f post_convert.sed $rst
+rm $tmp
diff --git a/Documentation/sync_file.txt b/Documentation/sync_file.txt
index eaf8297dbca2..e8e2ebafe5fa 100644
--- a/Documentation/sync_file.txt
+++ b/Documentation/sync_file.txt
@@ -6,8 +6,8 @@
 
 This document serves as a guide for device drivers writers on what the
 sync_file API is, and how drivers can support it. Sync file is the carrier of
-the fences(struct fence) that needs to synchronized between drivers or across
-process boundaries.
+the fences(struct fence) that are needed to synchronize between drivers or
+across process boundaries.
 
 The sync_file API is meant to be used to send and receive fence information
 to/from userspace. It enables userspace to do explicit fencing, where instead
@@ -32,7 +32,7 @@ in-fences and out-fences
 Sync files can go either to or from userspace. When a sync_file is sent from
 the driver to userspace we call the fences it contains 'out-fences'. They are
 related to a buffer that the driver is processing or is going to process, so
-the driver an create out-fence to be able to notify, through fence_signal(),
+the driver creates an out-fence to be able to notify, through fence_signal(),
 when it has finished using (or processing) that buffer. Out-fences are fences
 that the driver creates.
 
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a3683ce2a2f3..33204604de6c 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -58,6 +58,7 @@ show up in /proc/sys/kernel:
 - panic_on_stackoverflow
 - panic_on_unrecovered_nmi
 - panic_on_warn
+- panic_on_rcu_stall
 - perf_cpu_time_max_percent
 - perf_event_paranoid
 - perf_event_max_stack
@@ -618,6 +619,17 @@ a kernel rebuild when attempting to kdump at the location of a WARN().
 
 ==============================================================
 
+panic_on_rcu_stall:
+
+When set to 1, calls panic() after RCU stall detection messages. This
+is useful to define the root cause of RCU stalls using a vmcore.
+
+0: do not panic() when RCU stall takes place, default behavior.
+
+1: panic() after printing RCU stall messages.
+
+==============================================================
+
 perf_cpu_time_max_percent:
 
 Hints to the kernel how much CPU time it should be allowed to
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 720355cbdf45..95ccbe6d79ce 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -61,6 +61,7 @@ Currently, these files are in /proc/sys/vm:
 - swappiness
 - user_reserve_kbytes
 - vfs_cache_pressure
+- watermark_scale_factor
 - zone_reclaim_mode
 
 ==============================================================
diff --git a/Documentation/thermal/intel_powerclamp.txt b/Documentation/thermal/intel_powerclamp.txt
index 332de4a39b5a..60073dc9f748 100644
--- a/Documentation/thermal/intel_powerclamp.txt
+++ b/Documentation/thermal/intel_powerclamp.txt
@@ -121,7 +121,7 @@ time is considered largely as a non-causal system where its behavior
 cannot be based on the past or current input. Therefore, the
 intel_powerclamp driver attempts to enforce the desired idle time
 instantly as given input (target idle ratio). After injection,
-powerclamp moniors the actual idle for a given time window and adjust
+powerclamp monitors the actual idle for a given time window and adjust
 the next injection accordingly to avoid over/under correction.
 
 When used in a causal control system, such as a temperature control,
diff --git a/Documentation/usb/gadget_multi.txt b/Documentation/usb/gadget_multi.txt
index 5faf514047e9..b3146dd7aa43 100644
--- a/Documentation/usb/gadget_multi.txt
+++ b/Documentation/usb/gadget_multi.txt
@@ -36,7 +36,7 @@ configuration with CDC ECM which should work better under Linux.
 
 ** Windows host drivers
 
-For the gadget two work under Windows two conditions have to be met:
+For the gadget to work under Windows two conditions have to be met:
 
 *** Detecting as composite gadget
 
diff --git a/Documentation/video4linux/CARDLIST.cx23885 b/Documentation/video4linux/CARDLIST.cx23885
index 85a8fdcfcdaa..c9b4959fd04e 100644
--- a/Documentation/video4linux/CARDLIST.cx23885
+++ b/Documentation/video4linux/CARDLIST.cx23885
@@ -54,3 +54,4 @@
  53 -> Hauppauge WinTV Starburst                           [0070:c12a]
  54 -> ViewCast 260e                                       [1576:0260]
  55 -> ViewCast 460e                                       [1576:0460]
+ 56 -> Hauppauge WinTV-quadHD (DVB)               [0070:6a28,0070:6b28]
diff --git a/Documentation/video4linux/v4l2-controls.txt b/Documentation/video4linux/v4l2-controls.txt
index 5e759cab4538..f930b80e9111 100644
--- a/Documentation/video4linux/v4l2-controls.txt
+++ b/Documentation/video4linux/v4l2-controls.txt
@@ -96,21 +96,6 @@ Basic usage for V4L2 and sub-device drivers
 
   Where foo->sd is of type struct v4l2_subdev.
 
-  And set all core control ops in your struct v4l2_subdev_core_ops to these
-  helpers:
-
-	.queryctrl = v4l2_subdev_queryctrl,
-	.querymenu = v4l2_subdev_querymenu,
-	.g_ctrl = v4l2_subdev_g_ctrl,
-	.s_ctrl = v4l2_subdev_s_ctrl,
-	.g_ext_ctrls = v4l2_subdev_g_ext_ctrls,
-	.try_ext_ctrls = v4l2_subdev_try_ext_ctrls,
-	.s_ext_ctrls = v4l2_subdev_s_ext_ctrls,
-
-  Note: this is a temporary solution only. Once all V4L2 drivers that depend
-  on subdev drivers are converted to the control framework these helpers will
-  no longer be needed.
-
 1.4) Clean up the handler at the end:
 
 	v4l2_ctrl_handler_free(&foo->ctrl_handler);
diff --git a/Documentation/video4linux/vivid.txt b/Documentation/video4linux/vivid.txt
index 8da5d2a576bc..1b26519c6ddc 100644
--- a/Documentation/video4linux/vivid.txt
+++ b/Documentation/video4linux/vivid.txt
@@ -74,7 +74,8 @@ Section 11: Cropping, Composing, Scaling
 Section 12: Formats
 Section 13: Capture Overlay
 Section 14: Output Overlay
-Section 15: Some Future Improvements
+Section 15: CEC (Consumer Electronics Control)
+Section 16: Some Future Improvements
 
 
 Section 1: Configuring the driver
@@ -364,7 +365,11 @@ For HDMI inputs it is possible to set the EDID. By default a simple EDID
 is provided. You can only set the EDID for HDMI inputs. Internally, however,
 the EDID is shared between all HDMI inputs.
 
-No interpretation is done of the EDID data.
+No interpretation is done of the EDID data with the exception of the
+physical address. See the CEC section for more details.
+
+There is a maximum of 15 HDMI inputs (if there are more, then they will be
+reduced to 15) since that's the limitation of the EDID physical address.
 
 
 Section 3: Video Output
@@ -409,6 +414,9 @@ standard, and for all others a 1:1 pixel aspect ratio is returned.
 
 An HDMI output has a valid EDID which can be obtained through VIDIOC_G_EDID.
 
+There is a maximum of 15 HDMI outputs (if there are more, then they will be
+reduced to 15) since that's the limitation of the EDID physical address. See
+also the CEC section for more details.
 
 Section 4: VBI Capture
 ----------------------
@@ -1108,7 +1116,26 @@ capabilities will slow down the video loop considerably as a lot of checks have
 to be done per pixel.
 
 
-Section 15: Some Future Improvements
+Section 15: CEC (Consumer Electronics Control)
+----------------------------------------------
+
+If there are HDMI inputs then a CEC adapter will be created that has
+the same number of input ports. This is the equivalent of e.g. a TV that
+has that number of inputs. Each HDMI output will also create a
+CEC adapter that is hooked up to the corresponding input port, or (if there
+are more outputs than inputs) is not hooked up at all. In other words,
+this is the equivalent of hooking up each output device to an input port of
+the TV. Any remaining output devices remain unconnected.
+
+The EDID that each output reads reports a unique CEC physical address that is
+based on the physical address of the EDID of the input. So if the EDID of the
+receiver has physical address A.B.0.0, then each output will see an EDID
+containing physical address A.B.C.0 where C is 1 to the number of inputs. If
+there are more outputs than inputs then the remaining outputs have a CEC adapter
+that is disabled and reports an invalid physical address.
+
+
+Section 16: Some Future Improvements
 ------------------------------------
 
 Just as a reminder and in no particular order:
@@ -1121,8 +1148,6 @@ Just as a reminder and in no particular order:
 - Fix sequence/field numbering when looping of video with alternate fields
 - Add support for V4L2_CID_BG_COLOR for video outputs
 - Add ARGB888 overlay support: better testing of the alpha channel
-- Add custom DV timings support
-- Add support for V4L2_DV_FL_REDUCED_FPS
 - Improve pixel aspect support in the tpg code by passing a real v4l2_fract
 - Use per-queue locks and/or per-device locks to improve throughput
 - Add support to loop from a specific output to a specific input across
@@ -1133,3 +1158,4 @@ Just as a reminder and in no particular order:
 - Make a thread for the RDS generation, that would help in particular for the
   "Controls" RDS Rx I/O Mode as the read-only RDS controls could be updated
   in real-time.
+- Changing the EDID should cause hotplug detect emulation to happen.
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
index fea5c0864170..94bd9c11c4e0 100644
--- a/Documentation/vm/page_migration
+++ b/Documentation/vm/page_migration
@@ -142,5 +142,111 @@ Steps:
 20. The new page is moved to the LRU and can be scanned by the swapper
     etc again.
 
-Christoph Lameter, May 8, 2006.
+C. Non-LRU page migration
+-------------------------
+
+Although original migration aimed for reducing the latency of memory access
+for NUMA, compaction who want to create high-order page is also main customer.
+
+Current problem of the implementation is that it is designed to migrate only
+*LRU* pages. However, there are potential non-lru pages which can be migrated
+in drivers, for example, zsmalloc, virtio-balloon pages.
+
+For virtio-balloon pages, some parts of migration code path have been hooked
+up and added virtio-balloon specific functions to intercept migration logics.
+It's too specific to a driver so other drivers who want to make their pages
+movable would have to add own specific hooks in migration path.
+
+To overclome the problem, VM supports non-LRU page migration which provides
+generic functions for non-LRU movable pages without driver specific hooks
+migration path.
+
+If a driver want to make own pages movable, it should define three functions
+which are function pointers of struct address_space_operations.
+
+1. bool (*isolate_page) (struct page *page, isolate_mode_t mode);
+
+What VM expects on isolate_page function of driver is to return *true*
+if driver isolates page successfully. On returing true, VM marks the page
+as PG_isolated so concurrent isolation in several CPUs skip the page
+for isolation. If a driver cannot isolate the page, it should return *false*.
+
+Once page is successfully isolated, VM uses page.lru fields so driver
+shouldn't expect to preserve values in that fields.
+
+2. int (*migratepage) (struct address_space *mapping,
+		struct page *newpage, struct page *oldpage, enum migrate_mode);
+
+After isolation, VM calls migratepage of driver with isolated page.
+The function of migratepage is to move content of the old page to new page
+and set up fields of struct page newpage. Keep in mind that you should
+indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
+under page_lock if you migrated the oldpage successfully and returns
+MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
+can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
+because VM interprets -EAGAIN as "temporal migration failure". On returning
+any error except -EAGAIN, VM will give up the page migration without retrying
+in this time.
+
+Driver shouldn't touch page.lru field VM using in the functions.
+
+3. void (*putback_page)(struct page *);
+
+If migration fails on isolated page, VM should return the isolated page
+to the driver so VM calls driver's putback_page with migration failed page.
+In this function, driver should put the isolated page back to the own data
+structure.
 
+4. non-lru movable page flags
+
+There are two page flags for supporting non-lru movable page.
+
+* PG_movable
+
+Driver should use the below function to make page movable under page_lock.
+
+	void __SetPageMovable(struct page *page, struct address_space *mapping)
+
+It needs argument of address_space for registering migration family functions
+which will be called by VM. Exactly speaking, PG_movable is not a real flag of
+struct page. Rather than, VM reuses page->mapping's lower bits to represent it.
+
+	#define PAGE_MAPPING_MOVABLE 0x2
+	page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
+
+so driver shouldn't access page->mapping directly. Instead, driver should
+use page_mapping which mask off the low two bits of page->mapping under
+page lock so it can get right struct address_space.
+
+For testing of non-lru movable page, VM supports __PageMovable function.
+However, it doesn't guarantee to identify non-lru movable page because
+page->mapping field is unified with other variables in struct page.
+As well, if driver releases the page after isolation by VM, page->mapping
+doesn't have stable value although it has PAGE_MAPPING_MOVABLE
+(Look at __ClearPageMovable). But __PageMovable is cheap to catch whether
+page is LRU or non-lru movable once the page has been isolated. Because
+LRU pages never can have PAGE_MAPPING_MOVABLE in page->mapping. It is also
+good for just peeking to test non-lru movable pages before more expensive
+checking with lock_page in pfn scanning to select victim.
+
+For guaranteeing non-lru movable page, VM provides PageMovable function.
+Unlike __PageMovable, PageMovable functions validates page->mapping and
+mapping->a_ops->isolate_page under lock_page. The lock_page prevents sudden
+destroying of page->mapping.
+
+Driver using __SetPageMovable should clear the flag via __ClearMovablePage
+under page_lock before the releasing the page.
+
+* PG_isolated
+
+To prevent concurrent isolation among several CPUs, VM marks isolated page
+as PG_isolated under lock_page. So if a CPU encounters PG_isolated non-lru
+movable page, it can skip it. Driver doesn't need to manipulate the flag
+because VM will set/clear it automatically. Keep in mind that if driver
+sees PG_isolated page, it means the page have been isolated by VM so it
+shouldn't touch page.lru field.
+PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag
+for own purpose.
+
+Christoph Lameter, May 8, 2006.
+Minchan Kim, Mar 28, 2016.
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index 7c871d6beb63..2ec6adb5a4ce 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -9,8 +9,8 @@ using huge pages for the backing of virtual memory with huge pages
 that supports the automatic promotion and demotion of page sizes and
 without the shortcomings of hugetlbfs.
 
-Currently it only works for anonymous memory mappings but in the
-future it can expand over the pagecache layer starting with tmpfs.
+Currently it only works for anonymous memory mappings and tmpfs/shmem.
+But in the future it can expand to other filesystems.
 
 The reason applications are running faster is because of two
 factors. The first factor is almost completely irrelevant and it's not
@@ -57,10 +57,6 @@ miss is going to run faster.
   feature that applies to all dynamic high order allocations in the
   kernel)
 
-- this initial support only offers the feature in the anonymous memory
-  regions but it'd be ideal to move it to tmpfs and the pagecache
-  later
-
 Transparent Hugepage Support maximizes the usefulness of free memory
 if compared to the reservation approach of hugetlbfs by allowing all
 unused memory to be used as cache or other movable (or even unmovable
@@ -94,21 +90,21 @@ madvise(MADV_HUGEPAGE) on their critical mmapped regions.
 
 == sysfs ==
 
-Transparent Hugepage Support can be entirely disabled (mostly for
-debugging purposes) or only enabled inside MADV_HUGEPAGE regions (to
-avoid the risk of consuming more memory resources) or enabled system
-wide. This can be achieved with one of:
+Transparent Hugepage Support for anonymous memory can be entirely disabled
+(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
+regions (to avoid the risk of consuming more memory resources) or enabled
+system wide. This can be achieved with one of:
 
 echo always >/sys/kernel/mm/transparent_hugepage/enabled
 echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
 echo never >/sys/kernel/mm/transparent_hugepage/enabled
 
 It's also possible to limit defrag efforts in the VM to generate
-hugepages in case they're not immediately free to madvise regions or
-to never try to defrag memory and simply fallback to regular pages
-unless hugepages are immediately available. Clearly if we spend CPU
-time to defrag memory, we would expect to gain even more by the fact
-we use hugepages later instead of regular pages. This isn't always
+anonymous hugepages in case they're not immediately free to madvise
+regions or to never try to defrag memory and simply fallback to regular
+pages unless hugepages are immediately available. Clearly if we spend CPU
+time to defrag memory, we would expect to gain even more by the fact we
+use hugepages later instead of regular pages. This isn't always
 guaranteed, but it may be more likely in case the allocation is for a
 MADV_HUGEPAGE region.
 
@@ -133,9 +129,9 @@ that are have used madvise(MADV_HUGEPAGE). This is the default behaviour.
 
 "never" should be self-explanatory.
 
-By default kernel tries to use huge zero page on read page fault.
-It's possible to disable huge zero page by writing 0 or enable it
-back by writing 1:
+By default kernel tries to use huge zero page on read page fault to
+anonymous mapping. It's possible to disable huge zero page by writing 0
+or enable it back by writing 1:
 
 echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
 echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
@@ -204,21 +200,67 @@ Support by passing the parameter "transparent_hugepage=always" or
 "transparent_hugepage=madvise" or "transparent_hugepage=never"
 (without "") to the kernel command line.
 
+== Hugepages in tmpfs/shmem ==
+
+You can control hugepage allocation policy in tmpfs with mount option
+"huge=". It can have following values:
+
+  - "always":
+    Attempt to allocate huge pages every time we need a new page;
+
+  - "never":
+    Do not allocate huge pages;
+
+  - "within_size":
+    Only allocate huge page if it will be fully within i_size.
+    Also respect fadvise()/madvise() hints;
+
+  - "advise:
+    Only allocate huge pages if requested with fadvise()/madvise();
+
+The default policy is "never".
+
+"mount -o remount,huge= /mountpoint" works fine after mount: remounting
+huge=never will not attempt to break up huge pages at all, just stop more
+from being allocated.
+
+There's also sysfs knob to control hugepage allocation policy for internal
+shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
+is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
+MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
+
+In addition to policies listed above, shmem_enabled allows two further
+values:
+
+  - "deny":
+    For use in emergencies, to force the huge option off from
+    all mounts;
+  - "force":
+    Force the huge option on for all - very useful for testing;
+
 == Need of application restart ==
 
-The transparent_hugepage/enabled values only affect future
-behavior. So to make them effective you need to restart any
-application that could have been using hugepages. This also applies to
-the regions registered in khugepaged.
+The transparent_hugepage/enabled values and tmpfs mount option only affect
+future behavior. So to make them effective you need to restart any
+application that could have been using hugepages. This also applies to the
+regions registered in khugepaged.
 
 == Monitoring usage ==
 
-The number of transparent huge pages currently used by the system is
-available by reading the AnonHugePages field in /proc/meminfo. To
-identify what applications are using transparent huge pages, it is
-necessary to read /proc/PID/smaps and count the AnonHugePages fields
-for each mapping. Note that reading the smaps file is expensive and
-reading it frequently will incur overhead.
+The number of anonymous transparent huge pages currently used by the
+system is available by reading the AnonHugePages field in /proc/meminfo.
+To identify what applications are using anonymous transparent huge pages,
+it is necessary to read /proc/PID/smaps and count the AnonHugePages fields
+for each mapping.
+
+The number of file transparent huge pages mapped to userspace is available
+by reading ShmemPmdMapped and ShmemHugePages fields in /proc/meminfo.
+To identify what applications are mapping file  transparent huge pages, it
+is necessary to read /proc/PID/smaps and count the FileHugeMapped fields
+for each mapping.
+
+Note that reading the smaps file is expensive and reading it
+frequently will incur overhead.
 
 There are a number of counters in /proc/vmstat that may be used to
 monitor how successfully the system is providing huge pages for use.
@@ -238,6 +280,12 @@ thp_collapse_alloc_failed is incremented if khugepaged found a range
 	of pages that should be collapsed into one huge page but failed
 	the allocation.
 
+thp_file_alloc is incremented every time a file huge page is successfully
+i	allocated.
+
+thp_file_mapped is incremented every time a file huge page is mapped into
+	user address space.
+
 thp_split_page is incremented every time a huge page is split into base
 	pages. This can happen for a variety of reasons but a common
 	reason is that a huge page is old and is being reclaimed.
@@ -403,19 +451,27 @@ pages:
     on relevant sub-page of the compound page.
 
   - map/unmap of the whole compound page accounted in compound_mapcount
-    (stored in first tail page).
+    (stored in first tail page). For file huge pages, we also increment
+    ->_mapcount of all sub-pages in order to have race-free detection of
+    last unmap of subpages.
 
-PageDoubleMap() indicates that ->_mapcount in all subpages is offset up by one.
-This additional reference is required to get race-free detection of unmap of
-subpages when we have them mapped with both PMDs and PTEs.
+PageDoubleMap() indicates that the page is *possibly* mapped with PTEs.
+
+For anonymous pages PageDoubleMap() also indicates ->_mapcount in all
+subpages is offset up by one. This additional reference is required to
+get race-free detection of unmap of subpages when we have them mapped with
+both PMDs and PTEs.
 
 This is optimization required to lower overhead of per-subpage mapcount
 tracking. The alternative is alter ->_mapcount in all subpages on each
 map/unmap of the whole compound page.
 
-We set PG_double_map when a PMD of the page got split for the first time,
-but still have PMD mapping. The additional references go away with last
-compound_mapcount.
+For anonymous pages, we set PG_double_map when a PMD of the page got split
+for the first time, but still have PMD mapping. The additional references
+go away with last compound_mapcount.
+
+File pages get PG_double_map set on first map of the page with PTE and
+goes away when the page gets evicted from page cache.
 
 split_huge_page internally has to distribute the refcounts in the head
 page to the tail pages before clearing all PG_head/tail bits from the page
@@ -427,7 +483,7 @@ sum of mapcount of all sub-pages plus one (split_huge_page caller must
 have reference for head page).
 
 split_huge_page uses migration entries to stabilize page->_refcount and
-page->_mapcount.
+page->_mapcount of anonymous pages. File pages just got unmapped.
 
 We safe against physical memory scanners too: the only legitimate way
 scanner can get reference to a page is get_page_unless_zero().
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index fa3b527086fa..0026a8d33fc0 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -461,6 +461,27 @@ unevictable LRU is enabled, the work of compaction is mostly handled by
 the page migration code and the same work flow as described in MIGRATING
 MLOCKED PAGES will apply.
 
+MLOCKING TRANSPARENT HUGE PAGES
+-------------------------------
+
+A transparent huge page is represented by a single entry on an LRU list.
+Therefore, we can only make unevictable an entire compound page, not
+individual subpages.
+
+If a user tries to mlock() part of a huge page, we want the rest of the
+page to be reclaimable.
+
+We cannot just split the page on partial mlock() as split_huge_page() can
+fail and new intermittent failure mode for the syscall is undesirable.
+
+We handle this by keeping PTE-mapped huge pages on normal LRU lists: the
+PMD on border of VM_LOCKED VMA will be split into PTE table.
+
+This way the huge page is accessible for vmscan. Under memory pressure the
+page will be split, subpages which belong to VM_LOCKED VMAs will be moved
+to unevictable LRU and the rest can be reclaimed.
+
+See also comment in follow_trans_huge_pmd().
 
 mmap(MAP_LOCKED) SYSTEM CALL HANDLING
 -------------------------------------
diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index 5e0e05c5183e..c49e3178178d 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -169,7 +169,7 @@ resources, scheduled and executed.
   WQ_UNBOUND
 
 	Work items queued to an unbound wq are served by the special
-	woker-pools which host workers which are not bound to any
+	worker-pools which host workers which are not bound to any
 	specific CPU.  This makes the wq behave as a simple execution
 	context provider without concurrency management.  The unbound
 	worker-pools try to start execution of work items as soon as
diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
index 1a5a12184a35..85d0549ad846 100644
--- a/Documentation/x86/intel_mpx.txt
+++ b/Documentation/x86/intel_mpx.txt
@@ -45,7 +45,7 @@ is how we expect the compiler, application and kernel to work together.
    MPX-instrumented.
 3) The kernel detects that the CPU has MPX, allows the new prctl() to
    succeed, and notes the location of the bounds directory. Userspace is
-   expected to keep the bounds directory at that locationWe note it
+   expected to keep the bounds directory at that location. We note it
    instead of reading it each time because the 'xsave' operation needed
    to access the bounds directory register is an expensive operation.
 4) If the application needs to spill bounds out of the 4 registers, it
@@ -167,7 +167,7 @@ If a #BR is generated due to a bounds violation caused by MPX.
 We need to decode MPX instructions to get violation address and
 set this address into extended struct siginfo.
 
-The _sigfault feild of struct siginfo is extended as follow:
+The _sigfault field of struct siginfo is extended as follow:
 
 87		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
 88		struct {
@@ -240,5 +240,5 @@ them at the same bounds table.
 This is allowed architecturally.  See more information "Intel(R) Architecture
 Instruction Set Extensions Programming Reference" (9.3.4).
 
-However, if users did this, the kernel might be fooled in to unmaping an
+However, if users did this, the kernel might be fooled in to unmapping an
 in-use bounds table since it does not recognize sharing.
diff --git a/Documentation/x86/tlb.txt b/Documentation/x86/tlb.txt
index 39d172326703..6a0607b99ed8 100644
--- a/Documentation/x86/tlb.txt
+++ b/Documentation/x86/tlb.txt
@@ -5,7 +5,7 @@ memory, it has two choices:
     from areas other than the one we are trying to flush will be
     destroyed and must be refilled later, at some cost.
  2. Use the invlpg instruction to invalidate a single page at a
-    time.  This could potentialy cost many more instructions, but
+    time.  This could potentially cost many more instructions, but
     it is a much more precise operation, causing no collateral
     damage to other TLB entries.
 
@@ -19,7 +19,7 @@ Which method to do depends on a few things:
     work.
  3. The size of the TLB.  The larger the TLB, the more collateral
     damage we do with a full flush.  So, the larger the TLB, the
-    more attrative an individual flush looks.  Data and
+    more attractive an individual flush looks.  Data and
     instructions have separate TLBs, as do different page sizes.
  4. The microarchitecture.  The TLB has become a multi-level
     cache on modern CPUs, and the global flushes have become more
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
index b1fb30273286..d0648a74fceb 100644
--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
@@ -36,7 +36,7 @@ between all CPUs.
 
 check_interval
 	How often to poll for corrected machine check errors, in seconds
-	(Note output is hexademical). Default 5 minutes.  When the poller
+	(Note output is hexadecimal). Default 5 minutes.  When the poller
 	finds MCEs it triggers an exponential speedup (poll more often) on
 	the polling interval.  When the poller stops finding MCEs, it
 	triggers an exponential backoff (poll less often) on the polling
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 5aa738346062..8c7dd5957ae1 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -39,4 +39,8 @@ memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
 during EFI runtime calls.
 
+Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+Their order is preserved but their base will be offset early at boot time.
+
 -Andi Kleen, Jul 2004
diff --git a/Documentation/zh_CN/CodingStyle b/Documentation/zh_CN/CodingStyle
index 654afd72eb24..12717791baac 100644
--- a/Documentation/zh_CN/CodingStyle
+++ b/Documentation/zh_CN/CodingStyle
@@ -24,34 +24,33 @@ Documentation/CodingStyleçä¸­æç¿»è¯
 
 		Linuxåæ ¸ä»£ç é£æ ¼
 
-è¿æ¯ä¸ä¸ªç®ç­çææ¡£ï¼æè¿°äºlinuxåæ ¸çé¦éä»£ç é£æ ¼ãä»£ç é£æ ¼æ¯å äººèå¼çï¼èä¸æ
-ä¸æ¿æææçè§ç¹å¼ºå ç»ä»»ä½äººï¼ä¸è¿è¿éæè®²è¿°çæ¯æå¿é¡»è¦ç»´æ¤çä»£ç æéµå®çé£æ ¼ï¼
-å¹¶ä¸æä¹å¸æç»å¤§å¤æ°å¶ä»ä»£ç ä¹è½éµå®è¿ä¸ªé£æ ¼ãè¯·å¨åä»£ç æ¶è³å°èèä¸ä¸æ¬ææè¿°ç
-é£æ ¼ã
+è¿æ¯ä¸ä¸ªç®ç­çææ¡£ï¼æè¿°äº linux åæ ¸çé¦éä»£ç é£æ ¼ãä»£ç é£æ ¼æ¯å äººèå¼çï¼èä¸æ
+ä¸æ¿ææèªå·±çè§ç¹å¼ºå ç»ä»»ä½äººï¼ä½è¿å°±åæå»åä»»ä½äºæé½å¿é¡»éµå¾ªçååé£æ ·ï¼æä¹
+å¸æå¨ç»å¤§å¤æ°äºä¸ä¿æè¿ç§çæåº¦ãè¯·ï¼å¨åä»£ç æ¶ï¼è³å°èèä¸ä¸è¿éçä»£ç é£æ ¼ã
 
-é¦åï¼æå»ºè®®ä½ æå°ä¸ä»½GNUä»£ç è§èï¼ç¶åä¸è¦è¯»å®ãç§äºå®ï¼è¿æ¯ä¸ä¸ªå·æéå¤§è±¡å¾æ§
-æä¹çå¨ä½ã
+é¦åï¼æå»ºè®®ä½ æå°ä¸ä»½ GNU ä»£ç è§èï¼ç¶åä¸è¦è¯»ãç§äºå®ï¼è¿æ¯ä¸ä¸ªå·æéå¤§è±¡å¾æ§æä¹
+çå¨ä½ã
 
 ä¸ç®¡ææ ·ï¼ç°å¨æä»¬å¼å§ï¼
 
 
-	 	ç¬¬ä¸ç« ï¼ç¼©è¿
+		ç¬¬ä¸ç« ï¼ç¼©è¿
 
-å¶è¡¨ç¬¦æ¯8ä¸ªå­ç¬¦ï¼æä»¥ç¼©è¿ä¹æ¯8ä¸ªå­ç¬¦ãæäºå¼ç«¯è¿å¨è¯å¾å°ç¼©è¿åä¸º4ï¼ä¹è³2ï¼ä¸ªå­ç¬¦
-æ·±ï¼è¿å ä¹ç¸å½äºå°è¯å°åå¨ççå¼å®ä¹ä¸º3ã
+å¶è¡¨ç¬¦æ¯ 8 ä¸ªå­ç¬¦ï¼æä»¥ç¼©è¿ä¹æ¯ 8 ä¸ªå­ç¬¦ãæäºå¼ç«¯è¿å¨è¯å¾å°ç¼©è¿åä¸º 4ï¼çè³ 2ï¼ï¼
+ä¸ªå­ç¬¦æ·±ï¼è¿å ä¹ç¸å½äºå°è¯å°åå¨ççå¼å®ä¹ä¸º 3ã
 
 çç±ï¼ç¼©è¿çå¨é¨æä¹å°±å¨äºæ¸æ¥çå®ä¹ä¸ä¸ªæ§å¶åèµ·æ­¢äºä½å¤ãå°¤å¶æ¯å½ä½ ç¯çä½ çå±å¹
-è¿ç»­çäº20å°æ¶ä¹åï¼ä½ å°ä¼åç°å¤§ä¸ç¹çç¼©è¿ä¼ä½¿ä½ æ´å®¹æåè¾¨ç¼©è¿ã
+è¿ç»­çäº 20 å°æ¶ä¹åï¼ä½ å°ä¼åç°å¤§ä¸ç¹çç¼©è¿ä¼ä½¿ä½ æ´å®¹æåè¾¨ç¼©è¿ã
 
-ç°å¨ï¼æäºäººä¼æ±æ¨8ä¸ªå­ç¬¦çç¼©è¿ä¼ä½¿ä»£ç åå³è¾¹ç§»å¨çå¤ªè¿ï¼å¨80ä¸ªå­ç¬¦çç»ç«¯å±å¹ä¸
-å°±å¾é¾è¯»è¿æ ·çä»£ç ãè¿ä¸ªé®é¢çç­æ¡æ¯ï¼å¦æä½ éè¦3çº§ä»¥ä¸çç¼©è¿ï¼ä¸ç®¡ç¨ä½ç§æ¹å¼ä½ 
+ç°å¨ï¼æäºäººä¼æ±æ¨ 8 ä¸ªå­ç¬¦çç¼©è¿ä¼ä½¿ä»£ç åå³è¾¹ç§»å¨çå¤ªè¿ï¼å¨ 80 ä¸ªå­ç¬¦çç»ç«¯å±å¹ä¸
+å°±å¾é¾è¯»è¿æ ·çä»£ç ãè¿ä¸ªé®é¢çç­æ¡æ¯ï¼å¦æä½ éè¦ 3 çº§ä»¥ä¸çç¼©è¿ï¼ä¸ç®¡ç¨ä½ç§æ¹å¼ä½ 
 çä»£ç å·²ç»æé®é¢äºï¼åºè¯¥ä¿®æ­£ä½ çç¨åºã
 
-ç®èè¨ä¹ï¼8ä¸ªå­ç¬¦çç¼©è¿å¯ä»¥è®©ä»£ç æ´å®¹æéè¯»ï¼è¿æä¸ä¸ªå¥½å¤æ¯å½ä½ çå½æ°åµå¥å¤ªæ·±ç
+ç®èè¨ä¹ï¼8 ä¸ªå­ç¬¦çç¼©è¿å¯ä»¥è®©ä»£ç æ´å®¹æéè¯»ï¼è¿æä¸ä¸ªå¥½å¤æ¯å½ä½ çå½æ°åµå¥å¤ªæ·±ç
 æ¶åå¯ä»¥ç»ä½ è­¦åãçå¿è¿ä¸ªè­¦åã
 
-å¨switchè¯­å¥ä¸­æ¶é¤å¤çº§ç¼©è¿çé¦éçæ¹å¼æ¯è®©âswitchâåä»å±äºå®çâcaseâæ ç­¾å¯¹é½äºå
-ä¸åï¼èä¸è¦âä¸¤æ¬¡ç¼©è¿ââcaseâæ ç­¾ãæ¯å¦ï¼
+å¨ switch è¯­å¥ä¸­æ¶é¤å¤çº§ç¼©è¿çé¦éçæ¹å¼æ¯è®© âswitchâ åä»å±äºå®ç âcaseâ æ ç­¾
+å¯¹é½äºåä¸åï¼èä¸è¦ âä¸¤æ¬¡ç¼©è¿â âcaseâ æ ç­¾ãæ¯å¦ï¼
 
 	switch (suffix) {
 	case 'G':
@@ -70,7 +69,6 @@ Documentation/CodingStyleçä¸­æç¿»è¯
 		break;
 	}
 
-
 ä¸è¦æå¤ä¸ªè¯­å¥æ¾å¨ä¸è¡éï¼é¤éä½ æä»ä¹ä¸è¥¿è¦éèï¼
 
 	if (condition) do_this;
@@ -79,7 +77,7 @@ Documentation/CodingStyleçä¸­æç¿»è¯
 ä¹ä¸è¦å¨ä¸è¡éæ¾å¤ä¸ªèµå¼è¯­å¥ãåæ ¸ä»£ç é£æ ¼è¶çº§ç®åãå°±æ¯é¿åå¯è½å¯¼è´å«äººè¯¯è¯»çè¡¨
 è¾¾å¼ã
 
-é¤äºæ³¨éãææ¡£åKconfigä¹å¤ï¼ä¸è¦ä½¿ç¨ç©ºæ ¼æ¥ç¼©è¿ï¼åé¢çä¾å­æ¯ä¾å¤ï¼æ¯ææä¸ºä¹ã
+é¤äºæ³¨éãææ¡£å Kconfig ä¹å¤ï¼ä¸è¦ä½¿ç¨ç©ºæ ¼æ¥ç¼©è¿ï¼åé¢çä¾å­æ¯ä¾å¤ï¼æ¯ææä¸ºä¹ã
 
 éç¨ä¸ä¸ªå¥½çç¼è¾å¨ï¼ä¸è¦å¨è¡å°¾çç©ºæ ¼ã
 
@@ -88,27 +86,18 @@ Documentation/CodingStyleçä¸­æç¿»è¯
 
 ä»£ç é£æ ¼çæä¹å°±å¨äºä½¿ç¨å¹³å¸¸ä½¿ç¨çå·¥å·æ¥ç»´æä»£ç çå¯è¯»æ§åå¯ç»´æ¤æ§ã
 
-æ¯ä¸è¡çé¿åº¦çéå¶æ¯80åï¼æä»¬å¼ºçå»ºè®®æ¨éµå®è¿ä¸ªæ¯ä¾ã
+æ¯ä¸è¡çé¿åº¦çéå¶æ¯ 80 åï¼æä»¬å¼ºçå»ºè®®æ¨éµå®è¿ä¸ªæ¯ä¾ã
 
-é¿äº80åçè¯­å¥è¦ææ£æææä¹ççæ®µãæ¯ä¸ªçæ®µè¦ææ¾ç­äºåæ¥çè¯­å¥ï¼èä¸æ¾ç½®çä½ç½®
-ä¹ææ¾çé å³ãåæ ·çè§åä¹éç¨äºæå¾é¿åæ°åè¡¨çå½æ°å¤´ãé¿å­ç¬¦ä¸²ä¹è¦ææ£æè¾ç­ç
-å­ç¬¦ä¸²ãå¯ä¸çä¾å¤æ¯è¶è¿80åå¯ä»¥å¤§å¹åº¦æé«å¯è¯»æ§å¹¶ä¸ä¸ä¼éèä¿¡æ¯çæåµã
-
-void fun(int a, int b, int c)
-{
-	if (condition)
-		printk(KERN_WARNING "Warning this is a long printk with "
-						"3 parameters a: %u b: %u "
-						"c: %u \n", a, b, c);
-	else
-		next_statement;
-}
+é¿äº 80 åçè¯­å¥è¦ææ£æææä¹ççæ®µãé¤éè¶è¿ 80 åè½æ¾èå¢å å¯è¯»æ§ï¼å¹¶ä¸ä¸ä¼éè
+ä¿¡æ¯ãå­çæ®µè¦ææ¾ç­äºæ¯çæ®µï¼å¹¶ææ¾é å³ãè¿åæ ·éç¨äºæçå¾é¿åæ°åè¡¨çå½æ°å¤´ã
+ç¶èï¼ç»å¯¹ä¸è¦ææ£å¯¹ç¨æ·å¯è§çå­ç¬¦ä¸²ï¼ä¾å¦ printk ä¿¡æ¯ï¼å ä¸ºè¿å°å¯¼è´æ æ³ grep è¿äº
+ä¿¡æ¯ã
 
 		ç¬¬ä¸ç« ï¼å¤§æ¬å·åç©ºæ ¼çæ¾ç½®
 
 Cè¯­è¨é£æ ¼ä¸­å¦å¤ä¸ä¸ªå¸¸è§é®é¢æ¯å¤§æ¬å·çæ¾ç½®ãåç¼©è¿å¤§å°ä¸åï¼éæ©æå¼ç¨æç§æ¾ç½®ç­
-ç¥å¹¶æ²¡æå¤å°ææ¯ä¸çåå ï¼ä¸è¿é¦éçæ¹å¼ï¼å°±åKernighanåRitchieå±ç¤ºç»æä»¬çï¼æ¯
-æèµ·å§å¤§æ¬å·æ¾å¨è¡å°¾ï¼èæç»æå¤§æ¬å·æ¾å¨è¡é¦ï¼æä»¥ï¼
+ç¥å¹¶æ²¡æå¤å°ææ¯ä¸çåå ï¼ä¸è¿é¦éçæ¹å¼ï¼å°±å Kernighan å Ritchie å±ç¤ºç»æä»¬çï¼
+æ¯æèµ·å§å¤§æ¬å·æ¾å¨è¡å°¾ï¼èæç»æå¤§æ¬å·æ¾å¨è¡é¦ï¼æä»¥ï¼
 
 	if (x is true) {
 		we do y
@@ -134,12 +123,12 @@ Cè¯­è¨é£æ ¼ä¸­å¦å¤ä¸ä¸ªå¸¸è§é®é¢æ¯å¤§æ¬å·çæ¾ç½®ãåç¼©è¿å¤§å°
 		body of function
 	}
 
-å¨ä¸ççå¼ç«¯å¯è½ä¼æ±æ¨è¿ä¸ªä¸ä¸è´æ§æ¯â¦â¦åâ¦â¦ä¸ä¸è´çï¼ä¸è¿æææç»´å¥å¨çäººé½ç¥éï¼
-aï¼K&Ræ¯_æ­£ç¡®ç_ï¼å¹¶ä¸ï¼bï¼K&Ræ¯æ­£ç¡®çãæ­¤å¤ï¼ä¸ç®¡ææ ·å½æ°é½æ¯ç¹æ®çï¼å¨Cè¯­è¨ä¸­
-ï¼å½æ°æ¯ä¸è½åµå¥çï¼ã
+å¨ä¸ççå¼ç«¯å¯è½ä¼æ±æ¨è¿ä¸ªä¸ä¸è´æ§æ¯â¦â¦åâ¦â¦ä¸ä¸è´çï¼ä¸è¿æææç»´å¥å¨çäººé½ç¥é
+(a) K&R æ¯ _æ­£ç¡®ç_ï¼å¹¶ä¸ (b) K&R æ¯æ­£ç¡®çãæ­¤å¤ï¼ä¸ç®¡ææ ·å½æ°é½æ¯ç¹æ®çï¼C
+å½æ°æ¯ä¸è½åµå¥çï¼ã
 
-æ³¨æç»æå¤§æ¬å·ç¬èªå æ®ä¸è¡ï¼é¤éå®åé¢è·çåä¸ä¸ªè¯­å¥çå©ä½é¨åï¼ä¹å°±æ¯doè¯­å¥ä¸­ç
-âwhileâæèifè¯­å¥ä¸­çâelseâï¼åè¿æ ·ï¼
+æ³¨æç»æå¤§æ¬å·ç¬èªå æ®ä¸è¡ï¼é¤éå®åé¢è·çåä¸ä¸ªè¯­å¥çå©ä½é¨åï¼ä¹å°±æ¯ do è¯­å¥ä¸­ç
+âwhileâ æè if è¯­å¥ä¸­ç âelseâï¼åè¿æ ·ï¼
 
 	do {
 		body of do-loop
@@ -158,41 +147,50 @@ aï¼K&Ræ¯_æ­£ç¡®ç_ï¼å¹¶ä¸ï¼bï¼K&Ræ¯æ­£ç¡®çãæ­¤å¤ï¼ä¸ç®¡ææ ·å½
 çç±ï¼K&Rã
 
 ä¹è¯·æ³¨æè¿ç§å¤§æ¬å·çæ¾ç½®æ¹å¼ä¹è½ä½¿ç©ºï¼æèå·®ä¸å¤ç©ºçï¼è¡çæ°éæå°åï¼åæ¶ä¸å¤±å¯
-è¯»æ§ãå æ­¤ï¼ç±äºä½ çå±å¹ä¸çæ°è¡æ¯ä¸å¯åçèµæºï¼æ³æ³25è¡çç»ç«¯å±å¹ï¼ï¼ä½ å°ä¼ææ´
+è¯»æ§ãå æ­¤ï¼ç±äºä½ çå±å¹ä¸çæ°è¡æ¯ä¸å¯åçèµæºï¼æ³æ³ 25 è¡çç»ç«¯å±å¹ï¼ï¼ä½ å°ä¼ææ´
 å¤çç©ºè¡æ¥æ¾ç½®æ³¨éã
 
 å½åªæä¸ä¸ªåç¬çè¯­å¥çæ¶åï¼ä¸ç¨å ä¸å¿è¦çå¤§æ¬å·ã
 
-if (condition)
-	action();
+	if (condition)
+		action();
+
+å
+
+	if (condition)
+		do_this();
+	else
+		do_that();
 
-è¿ç¹ä¸éç¨äºæ¬èº«ä¸ºæä¸ªæ¡ä»¶è¯­å¥çä¸ä¸ªåæ¯çåç¬è¯­å¥ãè¿æ¶éè¦å¨ä¸¤ä¸ªåæ¯éé½ä½¿ç¨å¤§
-æ¬å·ã
+è¿å¹¶ä¸éç¨äºåªæä¸ä¸ªæ¡ä»¶åæ¯æ¯åè¯­å¥çæåµï¼è¿æ¶ææåæ¯é½è¦ä½¿ç¨å¤§æ¬å·ï¼
 
-if (condition) {
-	do_this();
-	do_that();
-} else {
-	otherwise();
-}
+	if (condition) {
+		do_this();
+		do_that();
+	} else {
+		otherwise();
+	}
 
 		3.1ï¼ç©ºæ ¼
 
-Linuxåæ ¸çç©ºæ ¼ä½¿ç¨æ¹å¼ï¼ä¸»è¦ï¼åå³äºå®æ¯ç¨äºå½æ°è¿æ¯å³é®å­ãï¼å¤§å¤æ°ï¼å³é®å­å
-è¦å ä¸ä¸ªç©ºæ ¼ãå¼å¾æ³¨æçä¾å¤æ¯sizeofãtypeofãalignofå__attribute__ï¼è¿äºå³é®å­
-æäºç¨åº¦ä¸çèµ·æ¥æ´åå½æ°ï¼å®ä»¬å¨Linuxéä¹å¸¸å¸¸ä¼´éå°æ¬å·èä½¿ç¨ï¼å°½ç®¡å¨Cè¯­è¨éè¿æ ·
-çå°æ¬å·ä¸æ¯å¿éçï¼å°±åâstruct fileinfo infoâå£°æè¿åçâsizeof infoâï¼ã
+Linux åæ ¸çç©ºæ ¼ä½¿ç¨æ¹å¼ï¼ä¸»è¦ï¼åå³äºå®æ¯ç¨äºå½æ°è¿æ¯å³é®å­ãï¼å¤§å¤æ°ï¼å³é®å­å
+è¦å ä¸ä¸ªç©ºæ ¼ãå¼å¾æ³¨æçä¾å¤æ¯ sizeofãtypeofãalignof å __attribute__ï¼è¿äº
+å³é®å­æäºç¨åº¦ä¸çèµ·æ¥æ´åå½æ°ï¼å®ä»¬å¨ Linux éä¹å¸¸å¸¸ä¼´éå°æ¬å·èä½¿ç¨ï¼å°½ç®¡å¨ C é
+è¿æ ·çå°æ¬å·ä¸æ¯å¿éçï¼å°±å âstruct fileinfo infoâ å£°æè¿åç âsizeof infoâï¼ã
 
 æä»¥å¨è¿äºå³é®å­ä¹åæ¾ä¸ä¸ªç©ºæ ¼ï¼
+
 	if, switch, case, for, do, while
-ä½æ¯ä¸è¦å¨sizeofãtypeofãalignofæè__attribute__è¿äºå³é®å­ä¹åæ¾ç©ºæ ¼ãä¾å¦ï¼
+
+ä½æ¯ä¸è¦å¨ sizeofãtypeofãalignof æè __attribute__ è¿äºå³é®å­ä¹åæ¾ç©ºæ ¼ãä¾å¦ï¼
+
 	s = sizeof(struct file);
 
 ä¸è¦å¨å°æ¬å·éçè¡¨è¾¾å¼ä¸¤ä¾§å ç©ºæ ¼ãè¿æ¯ä¸ä¸ªåä¾ï¼
 
 	s = sizeof( struct file );
 
-å½å£°ææéç±»åæèè¿åæéç±»åçå½æ°æ¶ï¼â*âçé¦éä½¿ç¨æ¹å¼æ¯ä½¿ä¹é è¿åéåæèå½
+å½å£°ææéç±»åæèè¿åæéç±»åçå½æ°æ¶ï¼â*â çé¦éä½¿ç¨æ¹å¼æ¯ä½¿ä¹é è¿åéåæèå½
 æ°åï¼èä¸æ¯é è¿ç±»ååãä¾å­ï¼
 
 	char *linux_banner;
@@ -204,15 +202,18 @@ Linuxåæ ¸çç©ºæ ¼ä½¿ç¨æ¹å¼ï¼ä¸»è¦ï¼åå³äºå®æ¯ç¨äºå½æ°è¿æ¯å³
 	=  +  -  <  >  *  /  %  |  &  ^  <=  >=  ==  !=  ?  :
 
 ä½æ¯ä¸åæä½ç¬¦åä¸è¦å ç©ºæ ¼ï¼
+
 	&  *  +  -  ~  !  sizeof  typeof  alignof  __attribute__  defined
 
 åç¼èªå åèªåä¸åæä½ç¬¦åä¸å ç©ºæ ¼ï¼
+
 	++  --
 
 åç¼èªå åèªåä¸åæä½ç¬¦åä¸å ç©ºæ ¼ï¼
+
 	++  --
 
-â.âåâ->âç»æä½æåæä½ç¬¦ååä¸å ç©ºæ ¼ã
+â.â å â->â ç»æä½æåæä½ç¬¦ååä¸å ç©ºæ ¼ã
 
 ä¸è¦å¨è¡å°¾çç©ºç½ãæäºå¯ä»¥èªå¨ç¼©è¿çç¼è¾å¨ä¼å¨æ°è¡çè¡é¦å å¥ééçç©ºç½ï¼ç¶åä½ 
 å°±å¯ä»¥ç´æ¥å¨é£ä¸è¡è¾å¥ä»£ç ãä¸è¿åå¦ä½ æåæ²¡æå¨é£ä¸è¡è¾å¥ä»£ç ï¼æäºç¼è¾å¨å°±ä¸
@@ -225,23 +226,23 @@ Linuxåæ ¸çç©ºæ ¼ä½¿ç¨æ¹å¼ï¼ä¸»è¦ï¼åå³äºå®æ¯ç¨äºå½æ°è¿æ¯å³
 
 		ç¬¬åç« ï¼å½å
 
-Cæ¯ä¸ä¸ªç®æ´çè¯­è¨ï¼ä½ çå½åä¹åºè¯¥è¿æ ·ãåModula-2åPascalç¨åºåä¸åï¼Cç¨åºåä¸ä½¿
-ç¨ç±»ä¼¼ThisVariableIsATemporaryCounterè¿æ ·åä¸½çåå­ãCç¨åºåä¼ç§°é£ä¸ªåéä¸ºâtmpâ
-ï¼è¿æ ·åèµ·æ¥ä¼æ´å®¹æï¼èä¸è³å°ä¸ä¼ä»¤å¶é¾äºçè§£ã
+Cæ¯ä¸ä¸ªç®æ´çè¯­è¨ï¼ä½ çå½åä¹åºè¯¥è¿æ ·ãå Modula-2 å Pascal ç¨åºåä¸åï¼C ç¨åºå
+ä¸ä½¿ç¨ç±»ä¼¼ ThisVariableIsATemporaryCounter è¿æ ·åä¸½çåå­ãC ç¨åºåä¼ç§°é£ä¸ªåé
+ä¸º âtmpâï¼è¿æ ·åèµ·æ¥ä¼æ´å®¹æï¼èä¸è³å°ä¸ä¼ä»¤å¶é¾äºçè§£ã
 
 ä¸è¿ï¼è½ç¶æ··ç¨å¤§å°åçåå­æ¯ä¸æå¡ä½¿ç¨çï¼ä½æ¯å¨å±åéè¿æ¯éè¦ä¸ä¸ªå·æè¿°æ§çåå­
-ãç§°ä¸ä¸ªå¨å±å½æ°ä¸ºâfooâæ¯ä¸ä¸ªé¾ä»¥é¥¶æçéè¯¯ã
+ãç§°ä¸ä¸ªå¨å±å½æ°ä¸º âfooâ æ¯ä¸ä¸ªé¾ä»¥é¥¶æçéè¯¯ã
 
 å¨å±åéï¼åªæå½ä½ çæ­£éè¦å®ä»¬çæ¶ååç¨å®ï¼éè¦æä¸ä¸ªå·æè¿°æ§çåå­ï¼å°±åå¨å±å½
-æ°ãå¦æä½ æä¸ä¸ªå¯ä»¥è®¡ç®æ´»å¨ç¨æ·æ°éçå½æ°ï¼ä½ åºè¯¥å«å®âcount_active_users()âæè
-ç±»ä¼¼çåå­ï¼ä½ ä¸åºè¯¥å«å®âcntuser()âã
+æ°ãå¦æä½ æä¸ä¸ªå¯ä»¥è®¡ç®æ´»å¨ç¨æ·æ°éçå½æ°ï¼ä½ åºè¯¥å«å® âcount_active_users()â
+æèç±»ä¼¼çåå­ï¼ä½ ä¸åºè¯¥å«å® âcntuser()âã
 
 å¨å½æ°åä¸­åå«å½æ°ç±»åï¼æè°çåçå©å½åæ³ï¼æ¯èå­åºäºé®é¢ââç¼è¯å¨ç¥éé£äºç±»åè
 ä¸è½å¤æ£æ¥é£äºç±»åï¼è¿æ ·ååªè½æç¨åºåå¼ç³æ¶äºãé¾æªå¾®è½¯æ»æ¯å¶é åºæé®é¢çç¨åºã
 
 æ¬å°åéååºè¯¥ç®ç­ï¼èä¸è½å¤è¡¨è¾¾ç¸å³çå«ä¹ãå¦æä½ æä¸äºéæºçæ´æ°åçå¾ªç¯è®¡æ°å¨
-ï¼å®åºè¯¥è¢«ç§°ä¸ºâiâãå«å®âloop_counterâå¹¶æ çå¤ï¼å¦æå®æ²¡æè¢«è¯¯è§£çå¯è½çè¯ãç±»ä¼¼
-çï¼âtmpâå¯ä»¥ç¨æ¥ç§°å¼ä»»æç±»åçä¸´æ¶åéã
+ï¼å®åºè¯¥è¢«ç§°ä¸º âiâãå«å® âloop_counterâ å¹¶æ çå¤ï¼å¦æå®æ²¡æè¢«è¯¯è§£çå¯è½çè¯ã
+ç±»ä¼¼çï¼âtmpâ å¯ä»¥ç¨æ¥ç§°å¼ä»»æç±»åçä¸´æ¶åéã
 
 å¦æä½ ææ··æ·äºä½ çæ¬å°åéåï¼ä½ å°±éå°å¦ä¸ä¸ªé®é¢äºï¼å«åå½æ°å¢é¿è·å°èå¤±è¡¡ç»¼åç
 ãè¯·çç¬¬å­ç« ï¼å½æ°ï¼ã
@@ -249,9 +250,9 @@ Cæ¯ä¸ä¸ªç®æ´çè¯­è¨ï¼ä½ çå½åä¹åºè¯¥è¿æ ·ãåModula-2åPascal
 
 		ç¬¬äºç« ï¼Typedef
 
-ä¸è¦ä½¿ç¨ç±»ä¼¼âvps_tâä¹ç±»çä¸è¥¿ã
+ä¸è¦ä½¿ç¨ç±»ä¼¼ âvps_tâ ä¹ç±»çä¸è¥¿ã
 
-å¯¹ç»æä½åæéä½¿ç¨typedefæ¯ä¸ä¸ªéè¯¯ãå½ä½ å¨ä»£ç éçå°ï¼
+å¯¹ç»æä½åæéä½¿ç¨ typedef æ¯ä¸ä¸ªéè¯¯ãå½ä½ å¨ä»£ç éçå°ï¼
 
 	vps_t a;
 
@@ -261,91 +262,91 @@ Cæ¯ä¸ä¸ªç®æ´çè¯­è¨ï¼ä½ çå½åä¹åºè¯¥è¿æ ·ãåModula-2åPascal
 
 	struct virtual_container *a;
 
-ä½ å°±ç¥éâaâæ¯ä»ä¹äºã
+ä½ å°±ç¥é âaâ æ¯ä»ä¹äºã
 
-å¾å¤äººè®¤ä¸ºtypedefâè½æé«å¯è¯»æ§âãå®éä¸æ¯è¿æ ·çãå®ä»¬åªå¨ä¸åæåµä¸æç¨ï¼
+å¾å¤äººè®¤ä¸º typedef âè½æé«å¯è¯»æ§âãå®éä¸æ¯è¿æ ·çãå®ä»¬åªå¨ä¸åæåµä¸æç¨ï¼
 
- (a) å®å¨ä¸éæçå¯¹è±¡ï¼è¿ç§æåµä¸è¦ä¸»å¨ä½¿ç¨typedefæ¥éèè¿ä¸ªå¯¹è±¡å®éä¸æ¯ä»ä¹ï¼ã
+ (a) å®å¨ä¸éæçå¯¹è±¡ï¼è¿ç§æåµä¸è¦ä¸»å¨ä½¿ç¨ typedef æ¥éèè¿ä¸ªå¯¹è±¡å®éä¸æ¯ä»ä¹ï¼ã
 
-     ä¾å¦ï¼âpte_tâç­ä¸éæå¯¹è±¡ï¼ä½ åªè½ç¨åéçè®¿é®å½æ°æ¥è®¿é®å®ä»¬ã
+     ä¾å¦ï¼âpte_tâ ç­ä¸éæå¯¹è±¡ï¼ä½ åªè½ç¨åéçè®¿é®å½æ°æ¥è®¿é®å®ä»¬ã
 
-     æ³¨æï¼ä¸éææ§åâè®¿é®å½æ°âæ¬èº«æ¯ä¸å¥½çãæä»¬ä½¿ç¨pte_tç­ç±»åçåå å¨äºççæ¯
+     æ³¨æï¼ä¸éææ§åâè®¿é®å½æ°âæ¬èº«æ¯ä¸å¥½çãæä»¬ä½¿ç¨ pte_t ç­ç±»åçåå å¨äºççæ¯
      å®å¨æ²¡æä»»ä½å±ç¨çå¯è®¿é®ä¿¡æ¯ã
 
- (b) æ¸æ¥çæ´æ°ç±»åï¼å¦æ­¤ï¼è¿å±æ½è±¡å°±å¯ä»¥å¸®å©æ¶é¤å°åºæ¯âintâè¿æ¯âlongâçæ··æ·ã
+ (b) æ¸æ¥çæ´æ°ç±»åï¼å¦æ­¤ï¼è¿å±æ½è±¡å°±å¯ä»¥å¸®å©æ¶é¤å°åºæ¯ âintâ è¿æ¯ âlongâ çæ··æ·ã
 
-     u8/u16/u32æ¯å®å¨æ²¡æé®é¢çtypedefï¼ä¸è¿å®ä»¬æ´ç¬¦åç±»å«(d)èä¸æ¯è¿éã
+     u8/u16/u32 æ¯å®å¨æ²¡æé®é¢ç typedefï¼ä¸è¿å®ä»¬æ´ç¬¦åç±»å« (d) èä¸æ¯è¿éã
 
-     åæ¬¡æ³¨æï¼è¦è¿æ ·åï¼å¿é¡»äºåºæå ãå¦ææä¸ªåéæ¯âunsigned longâï¼é£ä¹æ²¡æå¿è¦
+     åæ¬¡æ³¨æï¼è¦è¿æ ·åï¼å¿é¡»äºåºæå ãå¦ææä¸ªåéæ¯ âunsigned longâï¼é£ä¹æ²¡æå¿è¦
 
 	typedef unsigned long myflags_t;
 
-     ä¸è¿å¦ææä¸ä¸ªæç¡®çåå ï¼æ¯å¦å®å¨æç§æåµä¸å¯è½ä¼æ¯ä¸ä¸ªâunsigned intâèå¨
-     å¶ä»æåµä¸å¯è½ä¸ºâunsigned longâï¼é£ä¹å°±ä¸è¦ç¹è±«ï¼è¯·å¡å¿ä½¿ç¨typedefã
+     ä¸è¿å¦ææä¸ä¸ªæç¡®çåå ï¼æ¯å¦å®å¨æç§æåµä¸å¯è½ä¼æ¯ä¸ä¸ª âunsigned intâ èå¨
+     å¶ä»æåµä¸å¯è½ä¸º âunsigned longâï¼é£ä¹å°±ä¸è¦ç¹è±«ï¼è¯·å¡å¿ä½¿ç¨ typedefã
 
  (c) å½ä½ ä½¿ç¨sparseæå­é¢çåå»ºä¸ä¸ªæ°ç±»åæ¥åç±»åæ£æ¥çæ¶åã
 
  (d) åæ åC99ç±»åç¸åçç±»åï¼å¨æäºä¾å¤çæåµä¸ã
 
-     è½ç¶è®©ç¼çåèç­æ¥éåºæ°çæ åç±»åæ¯å¦âuint32_tâä¸éè¦è±å¾å¤æ¶é´ï¼å¯æ¯æäº
+     è½ç¶è®©ç¼çåèç­æ¥éåºæ°çæ åç±»åæ¯å¦ âuint32_tâ ä¸éè¦è±å¾å¤æ¶é´ï¼å¯æ¯æäº
      äººä»ç¶æç»ä½¿ç¨å®ä»¬ã
 
-     å æ­¤ï¼Linuxç¹æçç­åäºæ åç±»åçâu8/u16/u32/u64âç±»ååå®ä»¬çæç¬¦å·ç±»åæ¯è¢«
+     å æ­¤ï¼Linux ç¹æçç­åäºæ åç±»åç âu8/u16/u32/u64â ç±»ååå®ä»¬çæç¬¦å·ç±»åæ¯è¢«
      åè®¸çââå°½ç®¡å¨ä½ èªå·±çæ°ä»£ç ä¸­ï¼å®ä»¬ä¸æ¯å¼ºå¶è¦æ±è¦ä½¿ç¨çã
 
      å½ç¼è¾å·²ç»ä½¿ç¨äºæä¸ªç±»åéçå·²æä»£ç æ¶ï¼ä½ åºè¯¥éµå¾ªé£äºä»£ç ä¸­å·²ç»ååºçéæ©ã
 
  (e) å¯ä»¥å¨ç¨æ·ç©ºé´å®å¨ä½¿ç¨çç±»åã
 
-     å¨æäºç¨æ·ç©ºé´å¯è§çç»æä½éï¼æä»¬ä¸è½è¦æ±C99ç±»åèä¸ä¸è½ç¨ä¸é¢æå°çâu32â
-     ç±»åãå æ­¤ï¼æä»¬å¨ä¸ç¨æ·ç©ºé´å±äº«çææç»æä½ä¸­ä½¿ç¨__u32åç±»ä¼¼çç±»åã
+     å¨æäºç¨æ·ç©ºé´å¯è§çç»æä½éï¼æä»¬ä¸è½è¦æ±C99ç±»åèä¸ä¸è½ç¨ä¸é¢æå°ç âu32â
+     ç±»åãå æ­¤ï¼æä»¬å¨ä¸ç¨æ·ç©ºé´å±äº«çææç»æä½ä¸­ä½¿ç¨ __u32 åç±»ä¼¼çç±»åã
 
-å¯è½è¿æå¶ä»çæåµï¼ä¸è¿åºæ¬çè§åæ¯æ°¸è¿ä¸è¦ä½¿ç¨typedefï¼é¤éä½ å¯ä»¥æç¡®çåºç¨ä¸
+å¯è½è¿æå¶ä»çæåµï¼ä¸è¿åºæ¬çè§åæ¯æ°¸è¿ä¸è¦ä½¿ç¨ typedefï¼é¤éä½ å¯ä»¥æç¡®çåºç¨ä¸
 è¿°æä¸ªè§åä¸­çä¸ä¸ªã
 
 æ»çæ¥è¯´ï¼å¦æä¸ä¸ªæéæèä¸ä¸ªç»æä½éçåç´ å¯ä»¥åççè¢«ç´æ¥è®¿é®å°ï¼é£ä¹å®ä»¬å°±ä¸
-åºè¯¥æ¯ä¸ä¸ªtypedefã
+åºè¯¥æ¯ä¸ä¸ª typedefã
 
 
 		ç¬¬å­ç« ï¼å½æ°
 
 å½æ°åºè¯¥ç®ç­èæ¼äº®ï¼å¹¶ä¸åªå®æä¸ä»¶äºæãå½æ°åºè¯¥å¯ä»¥ä¸å±æèä¸¤å±æ¾ç¤ºå®ï¼æä»¬é½ç¥
-éISO/ANSIå±å¹å¤§å°æ¯80x24ï¼ï¼åªåä¸ä»¶äºæï¼èä¸æå®åå¥½ã
+é ISO/ANSI å±å¹å¤§å°æ¯ 80x24ï¼ï¼åªåä¸ä»¶äºæï¼èä¸æå®åå¥½ã
 
 ä¸ä¸ªå½æ°çæå¤§é¿åº¦æ¯åè¯¥å½æ°çå¤æåº¦åç¼©è¿çº§æ°æåæ¯çãæä»¥ï¼å¦æä½ æä¸ä¸ªçè®ºä¸
-å¾ç®åçåªæä¸ä¸ªå¾é¿ï¼ä½æ¯ç®åï¼çcaseè¯­å¥çå½æ°ï¼èä¸ä½ éè¦å¨æ¯ä¸ªcaseéåå¾å¤å¾
-å°çäºæï¼è¿æ ·çå½æ°å°½ç®¡å¾é¿ï¼ä½ä¹æ¯å¯ä»¥çã
+å¾ç®åçåªæä¸ä¸ªå¾é¿ï¼ä½æ¯ç®åï¼ç case è¯­å¥çå½æ°ï¼èä¸ä½ éè¦å¨æ¯ä¸ª case éå
+å¾å¤å¾å°çäºæï¼è¿æ ·çå½æ°å°½ç®¡å¾é¿ï¼ä½ä¹æ¯å¯ä»¥çã
 
 ä¸è¿ï¼å¦æä½ æä¸ä¸ªå¤æçå½æ°ï¼èä¸ä½ æçä¸ä¸ªå¤©åä¸æ¯å¾é«çé«ä¸­ä¸å¹´çº§å­¦çå¯è½çè³
 æä¸æ¸æ¥è¿ä¸ªå½æ°çç®çï¼ä½ åºè¯¥ä¸¥æ ¼çéµå®åé¢æå°çé¿åº¦éå¶ãä½¿ç¨è¾å©å½æ°ï¼å¹¶ä¸ºä¹
 åä¸ªå·æè¿°æ§çåå­ï¼å¦æä½ è§å¾å®ä»¬çæ§è½å¾éè¦çè¯ï¼å¯ä»¥è®©ç¼è¯å¨åèå®ä»¬ï¼è¿æ ·ç
 ææå¾å¾ä¼æ¯ä½ åä¸ä¸ªå¤æå½æ°çææè¦å¥½ãï¼
 
-å½æ°çå¦å¤ä¸ä¸ªè¡¡éæ åæ¯æ¬å°åéçæ°éãæ­¤æ°éä¸åºè¶è¿5ï¼10ä¸ªï¼å¦åä½ çå½æ°å°±æ
+å½æ°çå¦å¤ä¸ä¸ªè¡¡éæ åæ¯æ¬å°åéçæ°éãæ­¤æ°éä¸åºè¶è¿ 5ï¼10 ä¸ªï¼å¦åä½ çå½æ°å°±æ
 é®é¢äºãéæ°èèä¸ä¸ä½ çå½æ°ï¼æå®åæææ´å°çå½æ°ãäººçå¤§èä¸è¬å¯ä»¥è½»æ¾çåæ¶è·
-è¸ª7ä¸ªä¸åçäºç©ï¼å¦æåå¢å¤çè¯ï¼å°±ä¼ç³æ¶äºãå³ä¾¿ä½ èªé¢è¿äººï¼ä½ ä¹å¯è½ä¼è®°ä¸æ¸ä½ 2
-ä¸ªææååè¿çäºæã
+è¸ª 7 ä¸ªä¸åçäºç©ï¼å¦æåå¢å¤çè¯ï¼å°±ä¼ç³æ¶äºãå³ä¾¿ä½ èªé¢è¿äººï¼ä½ ä¹å¯è½ä¼è®°ä¸æ¸ä½ 
+2 ä¸ªææååè¿çäºæã
 
-å¨æºæä»¶éï¼ä½¿ç¨ç©ºè¡éå¼ä¸åçå½æ°ãå¦æè¯¥å½æ°éè¦è¢«å¯¼åºï¼å®çEXPORT*å®åºè¯¥ç´§è´´
+å¨æºæä»¶éï¼ä½¿ç¨ç©ºè¡éå¼ä¸åçå½æ°ãå¦æè¯¥å½æ°éè¦è¢«å¯¼åºï¼å®ç EXPORT* å®åºè¯¥ç´§è´´
 å¨å®çç»æå¤§æ¬å·ä¹ä¸ãæ¯å¦ï¼
 
-int system_is_up(void)
-{
-	return system_state == SYSTEM_RUNNING;
-}
-EXPORT_SYMBOL(system_is_up);
+	int system_is_up(void)
+	{
+		return system_state == SYSTEM_RUNNING;
+	}
+	EXPORT_SYMBOL(system_is_up);
 
-å¨å½æ°ååä¸­ï¼åå«å½æ°ååå®ä»¬çæ°æ®ç±»åãè½ç¶Cè¯­è¨éæ²¡æè¿æ ·çè¦æ±ï¼å¨Linuxéè¿
+å¨å½æ°ååä¸­ï¼åå«å½æ°ååå®ä»¬çæ°æ®ç±»åãè½ç¶Cè¯­è¨éæ²¡æè¿æ ·çè¦æ±ï¼å¨ Linux éè¿
 æ¯æå¡çåæ³ï¼å ä¸ºè¿æ ·å¯ä»¥å¾ç®åçç»è¯»èæä¾æ´å¤çæä»·å¼çä¿¡æ¯ã
 
 
 		ç¬¬ä¸ç« ï¼éä¸­çå½æ°éåºéå¾
 
-è½ç¶è¢«æäºäººå£°ç§°å·²ç»è¿æ¶ï¼ä½æ¯gotoè¯­å¥çç­ä»·ç©è¿æ¯ç»å¸¸è¢«ç¼è¯å¨æä½¿ç¨ï¼å·ä½å½¢å¼æ¯
+è½ç¶è¢«æäºäººå£°ç§°å·²ç»è¿æ¶ï¼ä½æ¯ goto è¯­å¥çç­ä»·ç©è¿æ¯ç»å¸¸è¢«ç¼è¯å¨æä½¿ç¨ï¼å·ä½å½¢å¼æ¯
 æ æ¡ä»¶è·³è½¬æä»¤ã
 
-å½ä¸ä¸ªå½æ°ä»å¤ä¸ªä½ç½®éåºå¹¶ä¸éè¦åä¸äºéç¨çæ¸çå·¥ä½çæ¶åï¼gotoçå¥½å¤å°±æ¾ç°åºæ¥
-äºã
+å½ä¸ä¸ªå½æ°ä»å¤ä¸ªä½ç½®éåºï¼å¹¶ä¸éè¦åä¸äºç±»ä¼¼æ¸ççå¸¸è§æä½æ¶ï¼goto è¯­å¥å°±å¾æ¹ä¾¿äºã
+å¦æå¹¶ä¸éè¦æ¸çæä½ï¼é£ä¹ç´æ¥ return å³å¯ã
 
 çç±æ¯ï¼
 
@@ -354,26 +355,37 @@ EXPORT_SYMBOL(system_is_up);
 - å¯ä»¥é¿åç±äºä¿®æ¹æ¶å¿è®°æ´æ°æä¸ªåç¬çéåºç¹èå¯¼è´çéè¯¯
 - åè½»äºç¼è¯å¨çå·¥ä½ï¼æ éå é¤åä½ä»£ç ;)
 
-int fun(int a)
-{
-	int result = 0;
-	char *buffer = kmalloc(SIZE);
-
-	if (buffer == NULL)
-		return -ENOMEM;
-
-	if (condition1) {
-		while (loop1) {
-			...
+	int fun(int a)
+	{
+		int result = 0;
+		char *buffer;
+
+		buffer = kmalloc(SIZE, GFP_KERNEL);
+		if (!buffer)
+			return -ENOMEM;
+
+		if (condition1) {
+			while (loop1) {
+				...
+			}
+			result = 1;
+			goto out_buffer;
 		}
-		result = 1;
-		goto out;
+		...
+	out_buffer:
+		kfree(buffer);
+		return result;
 	}
-	...
-out:
-	kfree(buffer);
-	return result;
-}
+
+ä¸ä¸ªéè¦æ³¨æçå¸¸è§éè¯¯æ¯âä¸ä¸ª err éè¯¯âï¼å°±åè¿æ ·ï¼
+
+	err:
+		kfree(foo->bar);
+		kfree(foo);
+		return ret;
+
+è¿æ®µä»£ç çéè¯¯æ¯ï¼å¨æäºéåºè·¯å¾ä¸ âfooâ æ¯ NULLãéå¸¸æåµä¸ï¼éè¿æå®åç¦»æä¸¤ä¸ª
+éè¯¯æ ç­¾ âerr_bar:â å âerr_foo:â æ¥ä¿®å¤è¿ä¸ªéè¯¯ã
 
 		ç¬¬å«ç« ï¼æ³¨é
 
@@ -386,10 +398,10 @@ out:
 å å¤ªå¤ãä½ åºè¯¥åçï¼æ¯ææ³¨éæ¾å¨å½æ°çå¤´é¨ï¼åè¯äººä»¬å®åäºä»ä¹ï¼ä¹å¯ä»¥å ä¸å®åè¿
 äºäºæçåå ã
 
-å½æ³¨éåæ ¸APIå½æ°æ¶ï¼è¯·ä½¿ç¨kernel-docæ ¼å¼ãè¯·ç
-Documentation/kernel-doc-nano-HOWTO.txtåscripts/kernel-docä»¥è·å¾è¯¦ç»ä¿¡æ¯ã
+å½æ³¨éåæ ¸APIå½æ°æ¶ï¼è¯·ä½¿ç¨ kernel-doc æ ¼å¼ãè¯·ç
+Documentation/kernel-documentation.rståscripts/kernel-doc ä»¥è·å¾è¯¦ç»ä¿¡æ¯ã
 
-Linuxçæ³¨éé£æ ¼æ¯C89â/* ... */âé£æ ¼ãä¸è¦ä½¿ç¨C99é£æ ¼â// ...âæ³¨éã
+Linuxçæ³¨éé£æ ¼æ¯ C89 â/* ... */â é£æ ¼ãä¸è¦ä½¿ç¨ C99 é£æ ¼ â// ...â æ³¨éã
 
 é¿ï¼å¤è¡ï¼çé¦éæ³¨éé£æ ¼æ¯ï¼
 
@@ -402,6 +414,15 @@ Linuxçæ³¨éé£æ ¼æ¯C89â/* ... */âé£æ ¼ãä¸è¦ä½¿ç¨C99é£æ ¼â// ...
 	 * with beginning and ending almost-blank lines.
 	 */
 
+å¯¹äºå¨ net/ å drivers/net/ çæä»¶ï¼é¦éçé¿ï¼å¤è¡ï¼æ³¨éé£æ ¼æäºä¸åã
+
+	/* The preferred comment style for files in net/ and drivers/net
+	 * looks like this.
+	 *
+	 * It is nearly the same as the generally preferred comment style,
+	 * but there is no initial almost-blank line.
+	 */
+
 æ³¨éæ°æ®ä¹æ¯å¾éè¦çï¼ä¸ç®¡æ¯åºæ¬ç±»åè¿æ¯è¡çç±»åãä¸ºäºæ¹ä¾¿å®ç°è¿ä¸ç¹ï¼æ¯ä¸è¡åºåª
 å£°æä¸ä¸ªæ°æ®ï¼ä¸è¦ä½¿ç¨éå·æ¥ä¸æ¬¡å£°æå¤ä¸ªæ°æ®ï¼ãè¿æ ·ä½ å°±æç©ºé´æ¥ä¸ºæ¯ä¸ªæ°æ®åä¸æ®µ
 å°æ³¨éæ¥è§£éå®ä»¬çç¨éäºã
@@ -409,49 +430,63 @@ Linuxçæ³¨éé£æ ¼æ¯C89â/* ... */âé£æ ¼ãä¸è¦ä½¿ç¨C99é£æ ¼â// ...
 
 		ç¬¬ä¹ç« ï¼ä½ å·²ç»æäºæå¼ç³äº
 
-è¿æ²¡ä»ä¹ï¼æä»¬é½æ¯è¿æ ·ãå¯è½ä½ çä½¿ç¨äºå¾é¿æ¶é´Unixçæåå·²ç»åè¯ä½ âGNU emacsâè½
-èªå¨å¸®ä½ æ ¼å¼åCæºä»£ç ï¼èä¸ä½ ä¹æ³¨æå°äºï¼ç¡®å®æ¯è¿æ ·ï¼ä¸è¿å®æä½¿ç¨çé»è®¤å¼åæä»¬
-æ³è¦çç¸å»çè¿ï¼å®éä¸ï¼çè³æ¯éæºæçè¿è¦å·®ââæ æ°ä¸ªç´å­å¨GNU emacséæå­æ°¸è¿ä¸
-ä¼åé åºä¸ä¸ªå¥½ç¨åºï¼ï¼è¯æ³¨ï¼è¯·åèInfinite Monkey Theoremï¼
-
-æä»¥ä½ è¦ä¹æ¾å¼GNU emacsï¼è¦ä¹æ¹åå®è®©å®ä½¿ç¨æ´åççè®¾å®ãè¦éç¨åä¸ä¸ªæ¹æ¡ï¼ä½ å¯
-ä»¥æä¸é¢è¿æ®µç²è´´å°ä½ ç.emacsæä»¶éã
-
-(defun linux-c-mode ()
-  "C mode with adjusted defaults for use with the Linux kernel."
-  (interactive)
-  (c-mode)
-  (c-set-style "K&R")
-  (setq tab-width 8)
-  (setq indent-tabs-mode t)
-  (setq c-basic-offset 8))
-
-è¿æ ·å°±å®ä¹äºM-x linux-c-modeå½ä»¤ãå½ä½ hackä¸ä¸ªæ¨¡åçæ¶åï¼å¦æä½ æå­ç¬¦ä¸²
--*- linux-c -*-æ¾å¨å¤´ä¸¤è¡çæä¸ªä½ç½®ï¼è¿ä¸ªæ¨¡å¼å°ä¼è¢«èªå¨è°ç¨ãå¦æä½ å¸æå¨ä½ ä¿®æ¹
-/usr/src/linuxéçæä»¶æ¶é­æ¯è¬èªå¨æå¼linux-c-modeçè¯ï¼ä½ ä¹å¯è½éè¦æ·»å 
-
-(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode)
-			auto-mode-alist))
-
-å°ä½ ç.emacsæä»¶éã
-
-ä¸è¿å°±ç®ä½ å°è¯è®©emacsæ­£ç¡®çæ ¼å¼åä»£ç å¤±è´¥äºï¼ä¹å¹¶ä¸æå³çä½ å¤±å»äºä¸åï¼è¿å¯ä»¥ç¨â
-indentâã
-
-ä¸è¿ï¼GNU indentä¹æåGNU emacsä¸æ ·æé®é¢çè®¾å®ï¼æä»¥ä½ éè¦ç»å®ä¸äºå½ä»¤éé¡¹ãä¸
-è¿ï¼è¿è¿ä¸ç®å¤ªç³ç³ï¼å ä¸ºå°±ç®æ¯GNU indentçä½èä¹è®¤åK&Rçæå¨æ§ï¼GNUçäººå¹¶ä¸æ¯å
-äººï¼ä»ä»¬åªæ¯å¨è¿ä¸ªé®é¢ä¸è¢«ä¸¥éçè¯¯å¯¼äºï¼ï¼æä»¥ä½ åªè¦ç»indentæå®éé¡¹â-kr -i8â
-ï¼ä»£è¡¨âK&Rï¼8ä¸ªå­ç¬¦ç¼©è¿âï¼ï¼æèä½¿ç¨âscripts/Lindentâï¼è¿æ ·å°±å¯ä»¥ä»¥ææ¶é«¦çæ¹å¼
+è¿æ²¡ä»ä¹ï¼æä»¬é½æ¯è¿æ ·ãå¯è½ä½ çä½¿ç¨äºå¾é¿æ¶é´ Unix çæåå·²ç»åè¯ä½  âGNU emacsâ è½
+èªå¨å¸®ä½ æ ¼å¼å C æºä»£ç ï¼èä¸ä½ ä¹æ³¨æå°äºï¼ç¡®å®æ¯è¿æ ·ï¼ä¸è¿å®æä½¿ç¨çé»è®¤å¼åæä»¬
+æ³è¦çç¸å»çè¿ï¼å®éä¸ï¼çè³æ¯éæºæçè¿è¦å·®ââæ æ°ä¸ªç´å­å¨ GNU emacs éæå­æ°¸è¿ä¸
+ä¼åé åºä¸ä¸ªå¥½ç¨åºï¼ï¼è¯æ³¨ï¼è¯·åè Infinite Monkey Theoremï¼
+
+æä»¥ä½ è¦ä¹æ¾å¼ GNU emacsï¼è¦ä¹æ¹åå®è®©å®ä½¿ç¨æ´åççè®¾å®ãè¦éç¨åä¸ä¸ªæ¹æ¡ï¼ä½ å¯
+ä»¥æä¸é¢è¿æ®µç²è´´å°ä½ ç .emacs æä»¶éã
+
+(defun c-lineup-arglist-tabs-only (ignored)
+  "Line up argument lists by tabs, not spaces"
+  (let* ((anchor (c-langelem-pos c-syntactic-element))
+         (column (c-langelem-2nd-pos c-syntactic-element))
+         (offset (- (1+ column) anchor))
+         (steps (floor offset c-basic-offset)))
+    (* (max steps 1)
+       c-basic-offset)))
+
+(add-hook 'c-mode-common-hook
+          (lambda ()
+            ;; Add kernel style
+            (c-add-style
+             "linux-tabs-only"
+             '("linux" (c-offsets-alist
+                        (arglist-cont-nonempty
+                         c-lineup-gcc-asm-reg
+                         c-lineup-arglist-tabs-only))))))
+
+(add-hook 'c-mode-hook
+          (lambda ()
+            (let ((filename (buffer-file-name)))
+              ;; Enable kernel mode for the appropriate files
+              (when (and filename
+                         (string-match (expand-file-name "~/src/linux-trees")
+                                       filename))
+                (setq indent-tabs-mode t)
+                (setq show-trailing-whitespace t)
+                (c-set-style "linux-tabs-only")))))
+
+è¿ä¼è®© emacs å¨ ~/src/linux-trees ç®å½ä¸ç C æºæä»¶è·å¾æ´å¥½çåæ ¸ä»£ç é£æ ¼ã
+
+ä¸è¿å°±ç®ä½ å°è¯è®© emacs æ­£ç¡®çæ ¼å¼åä»£ç å¤±è´¥äºï¼ä¹å¹¶ä¸æå³çä½ å¤±å»äºä¸åï¼è¿å¯ä»¥ç¨
+âindentâã
+
+ä¸è¿ï¼GNU indent ä¹æå GNU emacs ä¸æ ·æé®é¢çè®¾å®ï¼æä»¥ä½ éè¦ç»å®ä¸äºå½ä»¤éé¡¹ãä¸
+è¿ï¼è¿è¿ä¸ç®å¤ªç³ç³ï¼å ä¸ºå°±ç®æ¯ GNU indent çä½èä¹è®¤å K&R çæå¨æ§ï¼GNU çäººå¹¶ä¸æ¯
+åäººï¼ä»ä»¬åªæ¯å¨è¿ä¸ªé®é¢ä¸è¢«ä¸¥éçè¯¯å¯¼äºï¼ï¼æä»¥ä½ åªè¦ç» indent æå®éé¡¹ â-kr -i8â
+ï¼ä»£è¡¨ âK&Rï¼8 ä¸ªå­ç¬¦ç¼©è¿âï¼ï¼æèä½¿ç¨ âscripts/Lindentâï¼è¿æ ·å°±å¯ä»¥ä»¥ææ¶é«¦çæ¹å¼
 ç¼©è¿æºä»£ç ã
 
-âindentâæå¾å¤éé¡¹ï¼ç¹å«æ¯éæ°æ ¼å¼åæ³¨éçæ¶åï¼ä½ å¯è½éè¦çä¸ä¸å®çæåé¡µãä¸è¿
-è®°ä½ï¼âindentâä¸è½ä¿®æ­£åçç¼ç¨ä¹ æ¯ã
+âindentâ æå¾å¤éé¡¹ï¼ç¹å«æ¯éæ°æ ¼å¼åæ³¨éçæ¶åï¼ä½ å¯è½éè¦çä¸ä¸å®çæåé¡µãä¸è¿
+è®°ä½ï¼âindentâ ä¸è½ä¿®æ­£åçç¼ç¨ä¹ æ¯ã
 
 
-		ç¬¬åç« ï¼Kconfigéç½®æä»¶
+		ç¬¬åç« ï¼Kconfig éç½®æä»¶
 
-å¯¹äºéå¸æºç æ çææKconfig*éç½®æä»¶æ¥è¯´ï¼å®ä»¬ç¼©è¿æ¹å¼ä¸Cä»£ç ç¸æ¯ææä¸åãç´§æ¨
-å¨âconfigâå®ä¹ä¸é¢çè¡ç¼©è¿ä¸ä¸ªå¶è¡¨ç¬¦ï¼å¸®å©ä¿¡æ¯ååå¤ç¼©è¿2ä¸ªç©ºæ ¼ãæ¯å¦ï¼
+å¯¹äºéå¸æºç æ çææ Kconfig* éç½®æä»¶æ¥è¯´ï¼å®ä»¬ç¼©è¿æ¹å¼ä¸ C ä»£ç ç¸æ¯ææä¸åãç´§æ¨
+å¨ âconfigâ å®ä¹ä¸é¢çè¡ç¼©è¿ä¸ä¸ªå¶è¡¨ç¬¦ï¼å¸®å©ä¿¡æ¯ååå¤ç¼©è¿ 2 ä¸ªç©ºæ ¼ãæ¯å¦ï¼
 
 config AUDIT
 	bool "Auditing support"
@@ -470,7 +505,7 @@ config ADFS_FS_RW
 	depends on ADFS_FS
 	...
 
-è¦æ¥çéç½®æä»¶çå®æ´ææ¡£ï¼è¯·çDocumentation/kbuild/kconfig-language.txtã
+è¦æ¥çéç½®æä»¶çå®æ´ææ¡£ï¼è¯·ç Documentation/kbuild/kconfig-language.txtã
 
 
 		ç¬¬åä¸ç« ï¼æ°æ®ç»æ
@@ -489,11 +524,11 @@ config ADFS_FS_RW
 å¾å¤æ°æ®ç»æå®éä¸æ2çº§å¼ç¨è®¡æ°ï¼å®ä»¬éå¸¸æä¸åâç±»âçç¨æ·ãå­ç±»è®¡æ°å¨ç»è®¡å­ç±»ç¨
 æ·çæ°éï¼æ¯å½å­ç±»è®¡æ°å¨åè³é¶æ¶ï¼å¨å±è®¡æ°å¨åä¸ã
 
-è¿ç§âå¤çº§å¼ç¨è®¡æ°âçä¾å­å¯ä»¥å¨åå­ç®¡çï¼âstruct mm_structâï¼mm_usersåmm_countï¼
+è¿ç§âå¤çº§å¼ç¨è®¡æ°âçä¾å­å¯ä»¥å¨åå­ç®¡çï¼âstruct mm_structâï¼mm_users å mm_countï¼
 åæä»¶ç³»ç»ï¼âstruct super_blockâï¼s_countås_activeï¼ä¸­æ¾å°ã
 
 è®°ä½ï¼å¦æå¦ä¸ä¸ªæ§è¡çº¿ç´¢å¯ä»¥æ¾å°ä½ çæ°æ®ç»æï¼ä½æ¯è¿ä¸ªæ°æ®ç»ææ²¡æå¼ç¨è®¡æ°å¨ï¼è¿
-éå ä¹è¯å®æ¯ä¸ä¸ªbugã
+éå ä¹è¯å®æ¯ä¸ä¸ª bugã
 
 
 		ç¬¬åäºç« ï¼å®ï¼æä¸¾åRTL
@@ -508,102 +543,128 @@ config ADFS_FS_RW
 
 ä¸è¬çï¼å¦æè½åæåèå½æ°å°±ä¸è¦åæåå½æ°çå®ã
 
-å«æå¤ä¸ªè¯­å¥çå®åºè¯¥è¢«åå«å¨ä¸ä¸ªdo-whileä»£ç åéï¼
+å«æå¤ä¸ªè¯­å¥çå®åºè¯¥è¢«åå«å¨ä¸ä¸ª do-while ä»£ç åéï¼
 
-#define macrofun(a, b, c) 			\
-	do {					\
-		if (a == 5)			\
-			do_this(b, c);		\
-	} while (0)
+	#define macrofun(a, b, c)			\
+		do {					\
+			if (a == 5)			\
+				do_this(b, c);		\
+		} while (0)
 
 ä½¿ç¨å®çæ¶ååºé¿åçäºæï¼
 
 1) å½±åæ§å¶æµç¨çå®ï¼
 
-#define FOO(x)					\
-	do {					\
-		if (blah(x) < 0)		\
-			return -EBUGGERED;	\
-	} while(0)
+	#define FOO(x)					\
+		do {					\
+			if (blah(x) < 0)		\
+				return -EBUGGERED;	\
+		} while (0)
 
 éå¸¸ä¸å¥½ãå®çèµ·æ¥åä¸ä¸ªå½æ°ï¼ä¸è¿å´è½å¯¼è´âè°ç¨âå®çå½æ°éåºï¼ä¸è¦æä¹±è¯»èå¤§èé
 çè¯­æ³åæå¨ã
 
 2) ä¾èµäºä¸ä¸ªåºå®åå­çæ¬å°åéçå®ï¼
 
-#define FOO(val) bar(index, val)
+	#define FOO(val) bar(index, val)
 
 å¯è½çèµ·æ¥åæ¯ä¸ªä¸éçä¸è¥¿ï¼ä¸è¿å®éå¸¸å®¹ææè¯»ä»£ç çäººæç³æ¶ï¼èä¸å®¹æå¯¼è´çèµ·æ¥
 ä¸ç¸å³çæ¹å¨å¸¦æ¥éè¯¯ã
 
-3) ä½ä¸ºå·¦å¼çå¸¦åæ°çå®ï¼ FOO(x) = yï¼å¦ææäººæFOOåæä¸ä¸ªåèå½æ°çè¯ï¼è¿ç§ç¨
+3) ä½ä¸ºå·¦å¼çå¸¦åæ°çå®ï¼ FOO(x) = yï¼å¦ææäººæ FOO åæä¸ä¸ªåèå½æ°çè¯ï¼è¿ç§ç¨
 æ³å°±ä¼åºéäºã
 
 4) å¿è®°äºä¼åçº§ï¼ä½¿ç¨è¡¨è¾¾å¼å®ä¹å¸¸éçå®å¿é¡»å°è¡¨è¾¾å¼ç½®äºä¸å¯¹å°æ¬å·ä¹åãå¸¦åæ°ç
 å®ä¹è¦æ³¨ææ­¤ç±»é®é¢ã
 
-#define CONSTANT 0x4000
-#define CONSTEXP (CONSTANT | 3)
+	#define CONSTANT 0x4000
+	#define CONSTEXP (CONSTANT | 3)
+
+5) å¨å®éå®ä¹ç±»ä¼¼å½æ°çæ¬å°åéæ¶å½åå²çªï¼
 
-cppæåå¯¹å®çè®²è§£å¾è¯¦ç»ãGcc internalsæåä¹è¯¦ç»è®²è§£äºRTLï¼è¯æ³¨ï¼register
+	#define FOO(x)				\
+	({					\
+		typeof(x) ret;			\
+		ret = calc_ret(x);		\
+		(ret);				\
+	})
+
+ret æ¯æ¬å°åéçéç¨åå­ - __foo_ret æ´ä¸å®¹æä¸ä¸ä¸ªå·²å­å¨çåéå²çªã
+
+cpp æåå¯¹å®çè®²è§£å¾è¯¦ç»ãgcc internals æåä¹è¯¦ç»è®²è§£äº RTLï¼è¯æ³¨ï¼register
 transfer languageï¼ï¼åæ ¸éçæ±ç¼è¯­è¨ç»å¸¸ç¨å°å®ã
 
 
 		ç¬¬åä¸ç« ï¼æå°åæ ¸æ¶æ¯
 
 åæ ¸å¼åèåºè¯¥æ¯åè¿è¯å¥½æè²çãè¯·ä¸å®æ³¨æåæ ¸ä¿¡æ¯çæ¼åï¼ä»¥ç»äººä»¥å¥½çå°è±¡ãä¸è¦
-ç¨ä¸è§èçåè¯æ¯å¦âdontâï¼èè¦ç¨âdo notâæèâdon'tâãä¿è¯è¿äºä¿¡æ¯ç®åãæäºãæ 
-æ­§ä¹ã
+ç¨ä¸è§èçåè¯æ¯å¦ âdontâï¼èè¦ç¨ âdo notâæè âdon'tâãä¿è¯è¿äºä¿¡æ¯ç®åãæäºã
+æ æ­§ä¹ã
 
 åæ ¸ä¿¡æ¯ä¸å¿ä»¥å¥å·ï¼è¯æ³¨ï¼è±æå¥å·ï¼å³ç¹ï¼ç»æã
 
-å¨å°æ¬å·éæå°æ°å­(%d)æ²¡æä»»ä½ä»·å¼ï¼åºè¯¥é¿åè¿æ ·åã
+å¨å°æ¬å·éæå°æ°å­ (%d) æ²¡æä»»ä½ä»·å¼ï¼åºè¯¥é¿åè¿æ ·åã
 
-<linux/device.h>éæä¸äºé©±å¨æ¨¡åè¯æ­å®ï¼ä½ åºè¯¥ä½¿ç¨å®ä»¬ï¼ä»¥ç¡®ä¿ä¿¡æ¯å¯¹åºäºæ­£ç¡®ç
-è®¾å¤åé©±å¨ï¼å¹¶ä¸è¢«æ è®°äºæ­£ç¡®çæ¶æ¯çº§å«ãè¿äºå®æï¼dev_err(), dev_warn(),
-dev_info()ç­ç­ãå¯¹äºé£äºä¸åæä¸ªç¹å®è®¾å¤ç¸å³è¿çä¿¡æ¯ï¼<linux/kernel.h>å®ä¹äº
-pr_debug()åpr_info()ã
+<linux/device.h> éæä¸äºé©±å¨æ¨¡åè¯æ­å®ï¼ä½ åºè¯¥ä½¿ç¨å®ä»¬ï¼ä»¥ç¡®ä¿ä¿¡æ¯å¯¹åºäºæ­£ç¡®ç
+è®¾å¤åé©±å¨ï¼å¹¶ä¸è¢«æ è®°äºæ­£ç¡®çæ¶æ¯çº§å«ãè¿äºå®æï¼dev_err()ï¼dev_warn()ï¼
+dev_info() ç­ç­ãå¯¹äºé£äºä¸åæä¸ªç¹å®è®¾å¤ç¸å³è¿çä¿¡æ¯ï¼<linux/printk.h> å®ä¹äº
+pr_notice()ï¼pr_info()ï¼pr_warn()ï¼pr_err() åå¶ä»ã
 
-ååºå¥½çè°è¯ä¿¡æ¯å¯ä»¥æ¯ä¸ä¸ªå¾å¤§çææï¼å½ä½ ååºæ¥ä¹åï¼è¿äºä¿¡æ¯å¨è¿ç¨é¤éçæ¶å
-å°±ä¼æä¸ºæå¤§çå¸®å©ãå½DEBUGç¬¦å·æ²¡æè¢«å®ä¹çæ¶åï¼è¿äºä¿¡æ¯ä¸åºè¯¥è¢«ç¼è¯è¿åæ ¸é
-ï¼ä¹å°±æ¯è¯´ï¼é»è®¤å°ï¼å®ä»¬ä¸åºè¯¥è¢«åå«å¨åï¼ãå¦æä½ ä½¿ç¨dev_dbg()æèpr_debug()ï¼
-å°±è½èªå¨è¾¾å°è¿ä¸ªææãå¾å¤å­ç³»ç»æ¥æKconfigéé¡¹æ¥å¯ç¨-DDEBUGãè¿æä¸ä¸ªç¸å³çæ¯ä¾
-æ¯ä½¿ç¨VERBOSE_DEBUGæ¥æ·»å dev_vdbg()æ¶æ¯å°é£äºå·²ç»ç±DEBUGå¯ç¨çæ¶æ¯ä¹ä¸ã
+ååºå¥½çè°è¯ä¿¡æ¯å¯ä»¥æ¯ä¸ä¸ªå¾å¤§çææï¼ä¸æ¦ä½ ååºåï¼è¿äºä¿¡æ¯å¨è¿ç¨é¤éæ¶è½æä¾æå¤§
+çå¸®å©ãç¶èæå°è°è¯ä¿¡æ¯çå¤çæ¹å¼åæå°éè°è¯ä¿¡æ¯ä¸åãå¶ä» pr_XXX() å½æ°è½æ æ¡ä»¶å°
+æå°ï¼pr_debug() å´ä¸ï¼é»è®¤æåµä¸å®ä¸ä¼è¢«ç¼è¯ï¼é¤éå®ä¹äº DEBUG æè®¾å®äº
+CONFIG_DYNAMIC_DEBUGãå®éè¿åæ ·æ¯ä¸ºäº dev_dbg()ï¼ä¸ä¸ªç¸å³çº¦å®æ¯å¨ä¸ä¸ªå·²ç»å¼å¯äº
+DEBUG æ¶ï¼ä½¿ç¨ VERBOSE_DEBUG æ¥æ·»å  dev_vdbg()ã
+
+è®¸å¤å­ç³»ç»æ¥æ Kconfig è°è¯éé¡¹æ¥å¼å¯ -DDEBUG å¨å¯¹åºç Makefile éé¢ï¼å¨å¶ä»
+æåµä¸ï¼ç¹æ®æä»¶ä½¿ç¨ #define DEBUGãå½ä¸æ¡è°è¯ä¿¡æ¯éè¦è¢«æ æ¡ä»¶æå°æ¶ï¼ä¾å¦ï¼å¦æ
+å·²ç»åå«ä¸ä¸ªè°è¯ç¸å³ç #ifdef æ¡ä»¶ï¼printk(KERN_DEBUG ...) å°±å¯è¢«ä½¿ç¨ã
 
 
 		ç¬¬ååç« ï¼åéåå­
 
-åæ ¸æä¾äºä¸é¢çä¸è¬ç¨éçåå­åéå½æ°ï¼kmalloc()ï¼kzalloc()ï¼kcalloc()å
-vmalloc()ãè¯·åèAPIææ¡£ä»¥è·åæå³å®ä»¬çè¯¦ç»ä¿¡æ¯ã
+åæ ¸æä¾äºä¸é¢çä¸è¬ç¨éçåå­åéå½æ°ï¼
+kmalloc()ï¼kzalloc()ï¼kmalloc_array()ï¼kcalloc()ï¼vmalloc() å vzalloc()ã
+è¯·åè API ææ¡£ä»¥è·åæå³å®ä»¬çè¯¦ç»ä¿¡æ¯ã
 
 ä¼ éç»æä½å¤§å°çé¦éå½¢å¼æ¯è¿æ ·çï¼
 
 	p = kmalloc(sizeof(*p), ...);
 
-å¦å¤ä¸ç§ä¼ éæ¹å¼ä¸­ï¼sizeofçæä½æ°æ¯ç»æä½çåå­ï¼è¿æ ·ä¼éä½å¯è¯»æ§ï¼å¹¶ä¸å¯è½ä¼å¼
-å¥bugãæå¯è½æéåéç±»åè¢«æ¹åæ¶ï¼èå¯¹åºçä¼ éç»åå­åéå½æ°çsizeofçç»æä¸åã
+å¦å¤ä¸ç§ä¼ éæ¹å¼ä¸­ï¼sizeof çæä½æ°æ¯ç»æä½çåå­ï¼è¿æ ·ä¼éä½å¯è¯»æ§ï¼å¹¶ä¸å¯è½ä¼å¼
+å¥ bugãæå¯è½æéåéç±»åè¢«æ¹åæ¶ï¼èå¯¹åºçä¼ éç»åå­åéå½æ°ç sizeof çç»æä¸åã
 
-å¼ºå¶è½¬æ¢ä¸ä¸ªvoidæéè¿åå¼æ¯å¤ä½çãCè¯­è¨æ¬èº«ä¿è¯äºä»voidæéå°å¶ä»ä»»ä½æéç±»å
+å¼ºå¶è½¬æ¢ä¸ä¸ª void æéè¿åå¼æ¯å¤ä½çãC è¯­è¨æ¬èº«ä¿è¯äºä» void æéå°å¶ä»ä»»ä½æéç±»å
 çè½¬æ¢æ¯æ²¡æé®é¢çã
 
+åéä¸ä¸ªæ°ç»çé¦éå½¢å¼æ¯è¿æ ·çï¼
+
+	p = kmalloc_array(n, sizeof(...), ...);
+
+åéä¸ä¸ªé¶é¿æ°ç»çé¦éå½¢å¼æ¯è¿æ ·çï¼
+
+	p = kcalloc(n, sizeof(...), ...);
+
+ä¸¤ç§å½¢å¼æ£æ¥åéå¤§å° n * sizeof(...) çæº¢åºï¼å¦ææº¢åºè¿å NULLã
+
 
 		ç¬¬åäºç« ï¼åèå¼ç
 
-æä¸ä¸ªå¸¸è§çè¯¯è§£æ¯åèå½æ°æ¯gccæä¾çå¯ä»¥è®©ä»£ç è¿è¡æ´å¿«çä¸ä¸ªéé¡¹ãè½ç¶ä½¿ç¨åè
+æä¸ä¸ªå¸¸è§çè¯¯è§£æ¯åèå½æ°æ¯ gcc æä¾çå¯ä»¥è®©ä»£ç è¿è¡æ´å¿«çä¸ä¸ªéé¡¹ãè½ç¶ä½¿ç¨åè
 å½æ°ææ¶åæ¯æ°å½çï¼æ¯å¦ä½ä¸ºä¸ç§æ¿ä»£å®çæ¹å¼ï¼è¯·çç¬¬åäºç« ï¼ï¼ä¸è¿å¾å¤æåµä¸ä¸æ¯
-è¿æ ·ãinlineå³é®å­çè¿åº¦ä½¿ç¨ä¼ä½¿åæ ¸åå¤§ï¼ä»èä½¿æ´ä¸ªç³»ç»è¿è¡éåº¦åæ¢ãå ä¸ºå¤§åæ ¸
+è¿æ ·ãinline å³é®å­çè¿åº¦ä½¿ç¨ä¼ä½¿åæ ¸åå¤§ï¼ä»èä½¿æ´ä¸ªç³»ç»è¿è¡éåº¦åæ¢ãå ä¸ºå¤§åæ ¸
 ä¼å ç¨æ´å¤çæä»¤é«éç¼å­ï¼è¯æ³¨ï¼ä¸çº§ç¼å­éå¸¸æ¯æä»¤ç¼å­åæ°æ®ç¼å­åå¼çï¼èä¸ä¼å¯¼
-è´pagecacheçå¯ç¨åå­åå°ãæ³è±¡ä¸ä¸ï¼ä¸æ¬¡pagecacheæªå½ä¸­å°±ä¼å¯¼è´ä¸æ¬¡ç£çå¯»åï¼å°
-èæ¶5æ¯«ç§ã5æ¯«ç§çæ¶é´åCPUè½æ§è¡å¾å¤å¾å¤æä»¤ã
+è´ pagecache çå¯ç¨åå­åå°ãæ³è±¡ä¸ä¸ï¼ä¸æ¬¡pagecacheæªå½ä¸­å°±ä¼å¯¼è´ä¸æ¬¡ç£çå¯»åï¼
+å°èæ¶ 5 æ¯«ç§ã5 æ¯«ç§çæ¶é´å CPU è½æ§è¡å¾å¤å¾å¤æä»¤ã
 
-ä¸ä¸ªåºæ¬çååæ¯å¦æä¸ä¸ªå½æ°æ3è¡ä»¥ä¸ï¼å°±ä¸è¦æå®åæåèå½æ°ãè¿ä¸ªååçä¸ä¸ªä¾
+ä¸ä¸ªåºæ¬çååæ¯å¦æä¸ä¸ªå½æ°æ 3 è¡ä»¥ä¸ï¼å°±ä¸è¦æå®åæåèå½æ°ãè¿ä¸ªååçä¸ä¸ªä¾
 å¤æ¯ï¼å¦æä½ ç¥éæä¸ªåæ°æ¯ä¸ä¸ªç¼è¯æ¶å¸¸éï¼èä¸å ä¸ºè¿ä¸ªå¸¸éä½ ç¡®å®ç¼è¯å¨å¨ç¼è¯æ¶è½
-ä¼åæä½ çå½æ°çå¤§é¨åä»£ç ï¼é£ä»ç¶å¯ä»¥ç»å®å ä¸inlineå³é®å­ãkmalloc()åèå½æ°å°±
+ä¼åæä½ çå½æ°çå¤§é¨åä»£ç ï¼é£ä»ç¶å¯ä»¥ç»å®å ä¸ inline å³é®å­ãkmalloc() åèå½æ°å°±
 æ¯ä¸ä¸ªå¾å¥½çä¾å­ã
 
-äººä»¬ç»å¸¸ä¸»å¼ ç»staticçèä¸åªç¨äºä¸æ¬¡çå½æ°å ä¸inlineï¼å¦æ­¤ä¸ä¼æä»»ä½æå¤±ï¼å ä¸ºæ²¡
-æä»ä¹å¥½æè¡¡çãè½ç¶ä»ææ¯ä¸è¯´è¿æ¯æ­£ç¡®çï¼ä½æ¯å®éä¸è¿ç§æåµä¸å³ä½¿ä¸å inline gcc
-ä¹å¯ä»¥èªå¨ä½¿å¶åèãèä¸å¶ä»ç¨æ·å¯è½ä¼è¦æ±ç§»é¤inlineï¼ç±æ­¤èæ¥çäºè®ºä¼æµæ¶inline
+äººä»¬ç»å¸¸ä¸»å¼ ç» static çèä¸åªç¨äºä¸æ¬¡çå½æ°å ä¸ inlineï¼å¦æ­¤ä¸ä¼æä»»ä½æå¤±ï¼å ä¸ºæ²¡
+æä»ä¹å¥½æè¡¡çãè½ç¶ä»ææ¯ä¸è¯´è¿æ¯æ­£ç¡®çï¼ä½æ¯å®éä¸è¿ç§æåµä¸å³ä½¿ä¸å  inline gcc
+ä¹å¯ä»¥èªå¨ä½¿å¶åèãèä¸å¶ä»ç¨æ·å¯è½ä¼è¦æ±ç§»é¤ inlineï¼ç±æ­¤èæ¥çäºè®ºä¼æµæ¶ inline
 èªèº«çæ½å¨ä»·å¼ï¼å¾ä¸å¿å¤±ã
 
 
@@ -613,37 +674,37 @@ vmalloc()ãè¯·åèAPIææ¡£ä»¥è·åæå³å®ä»¬çè¯¦ç»ä¿¡æ¯ã
 çä¸ä¸ªå¼å¯ä»¥è¡¨ç¤ºä¸ºä¸ä¸ªéè¯¯ä»£ç æ´æ°ï¼-Exxxï¼å¤±è´¥ï¼0ï¼æåï¼æèä¸ä¸ªâæåâå¸å°å¼ï¼
 0ï¼å¤±è´¥ï¼é0ï¼æåï¼ã
 
-æ··åä½¿ç¨è¿ä¸¤ç§è¡¨è¾¾æ¹å¼æ¯é¾äºåç°çbugçæ¥æºãå¦æCè¯­è¨æ¬èº«ä¸¥æ ¼åºåæ´å½¢åå¸å°åå
-éï¼é£ä¹ç¼è¯å¨å°±è½å¤å¸®æä»¬åç°è¿äºéè¯¯â¦â¦ä¸è¿Cè¯­è¨ä¸åºåãä¸ºäºé¿åäº§çè¿ç§bugï¼è¯·
+æ··åä½¿ç¨è¿ä¸¤ç§è¡¨è¾¾æ¹å¼æ¯é¾äºåç°ç bug çæ¥æºãå¦æ C è¯­è¨æ¬èº«ä¸¥æ ¼åºåæ´å½¢åå¸å°åå
+éï¼é£ä¹ç¼è¯å¨å°±è½å¤å¸®æä»¬åç°è¿äºéè¯¯â¦â¦ä¸è¿ C è¯­è¨ä¸åºåãä¸ºäºé¿åäº§çè¿ç§ bugï¼è¯·
 éµå¾ªä¸é¢çæ¯ä¾ï¼
 
 	å¦æå½æ°çåå­æ¯ä¸ä¸ªå¨ä½æèå¼ºå¶æ§çå½ä»¤ï¼é£ä¹è¿ä¸ªå½æ°åºè¯¥è¿åéè¯¯ä»£ç æ´
 	æ°ãå¦ææ¯ä¸ä¸ªå¤æ­ï¼é£ä¹å½æ°åºè¯¥è¿åä¸ä¸ªâæåâå¸å°å¼ã
 
-æ¯å¦ï¼âadd workâæ¯ä¸ä¸ªå½ä»¤ï¼æä»¥add_work()å½æ°å¨æåæ¶è¿å0ï¼å¨å¤±è´¥æ¶è¿å-EBUSYã
-ç±»ä¼¼çï¼å ä¸ºâPCI device presentâæ¯ä¸ä¸ªå¤æ­ï¼æä»¥pci_dev_present()å½æ°å¨æåæ¾å°
-ä¸ä¸ªå¹éçè®¾å¤æ¶åºè¯¥è¿å1ï¼å¦ææ¾ä¸å°æ¶åºè¯¥è¿å0ã
+æ¯å¦ï¼âadd workâ æ¯ä¸ä¸ªå½ä»¤ï¼æä»¥ add_work() å½æ°å¨æåæ¶è¿å 0ï¼å¨å¤±è´¥æ¶è¿å -EBUSYã
+ç±»ä¼¼çï¼å ä¸º âPCI device presentâ æ¯ä¸ä¸ªå¤æ­ï¼æä»¥ pci_dev_present() å½æ°å¨æåæ¾å°
+ä¸ä¸ªå¹éçè®¾å¤æ¶åºè¯¥è¿å 1ï¼å¦ææ¾ä¸å°æ¶åºè¯¥è¿å 0ã
 
 ææå¯¼åºï¼è¯æ³¨ï¼EXPORTï¼çå½æ°é½å¿é¡»éµå®è¿ä¸ªæ¯ä¾ï¼ææçå¬å±å½æ°ä¹é½åºè¯¥å¦æ­¤ãç§
 æï¼staticï¼å½æ°ä¸éè¦å¦æ­¤ï¼ä½æ¯æä»¬ä¹æ¨èè¿æ ·åã
 
 è¿åå¼æ¯å®éè®¡ç®ç»æèä¸æ¯è®¡ç®æ¯å¦æåçæ å¿çå½æ°ä¸åæ­¤æ¯ä¾çéå¶ãä¸è¬çï¼ä»ä»¬
 éè¿è¿åä¸äºæ­£å¸¸å¼èå´ä¹å¤çç»ææ¥è¡¨ç¤ºåºéãå¸åçä¾å­æ¯è¿åæéçå½æ°ï¼ä»ä»¬ä½¿ç¨
-NULLæèERR_PTRæºå¶æ¥æ¥åéè¯¯ã
+NULL æè ERR_PTR æºå¶æ¥æ¥åéè¯¯ã
 
 
 		ç¬¬åä¸ç« ï¼ä¸è¦éæ°åæåæ ¸å®
 
-å¤´æä»¶include/linux/kernel.håå«äºä¸äºå®ï¼ä½ åºè¯¥ä½¿ç¨å®ä»¬ï¼èä¸è¦èªå·±åä¸äºå®ä»¬ç
+å¤´æä»¶ include/linux/kernel.h åå«äºä¸äºå®ï¼ä½ åºè¯¥ä½¿ç¨å®ä»¬ï¼èä¸è¦èªå·±åä¸äºå®ä»¬ç
 åç§ãæ¯å¦ï¼å¦æä½ éè¦è®¡ç®ä¸ä¸ªæ°ç»çé¿åº¦ï¼ä½¿ç¨è¿ä¸ªå®
 
-  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
 ç±»ä¼¼çï¼å¦æä½ è¦è®¡ç®æç»æä½æåçå¤§å°ï¼ä½¿ç¨
 
-  #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+	#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 
-è¿æå¯ä»¥åä¸¥æ ¼çç±»åæ£æ¥çmin()åmax()å®ï¼å¦æä½ éè¦å¯ä»¥ä½¿ç¨å®ä»¬ãä½ å¯ä»¥èªå·±çç
+è¿æå¯ä»¥åä¸¥æ ¼çç±»åæ£æ¥ç min() å max() å®ï¼å¦æä½ éè¦å¯ä»¥ä½¿ç¨å®ä»¬ãä½ å¯ä»¥èªå·±çç
 é£ä¸ªå¤´æä»¶éè¿å®ä¹äºä»ä¹ä½ å¯ä»¥æ¿æ¥ç¨çä¸è¥¿ï¼å¦ææå®ä¹çè¯ï¼ä½ å°±ä¸åºå¨ä½ çä»£ç é
 èªå·±éæ°å®ä¹ã
 
@@ -653,42 +714,100 @@ NULLæèERR_PTRæºå¶æ¥æ¥åéè¯¯ã
 æä¸äºç¼è¾å¨å¯ä»¥è§£éåµå¥å¨æºæä»¶éçç±ä¸äºç¹æ®æ è®°æ æçéç½®ä¿¡æ¯ãæ¯å¦ï¼emacs
 è½å¤è§£éè¢«æ è®°æè¿æ ·çè¡ï¼
 
--*- mode: c -*-
+	-*- mode: c -*-
 
 æèè¿æ ·çï¼
 
-/*
-Local Variables:
-compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
-End:
-*/
+	/*
+	Local Variables:
+	compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
+	End:
+	*/
 
-Vimè½å¤è§£éè¿æ ·çæ è®°ï¼
+Vim è½å¤è§£éè¿æ ·çæ è®°ï¼
 
-/* vim:set sw=8 noet */
+	/* vim:set sw=8 noet */
 
 ä¸è¦å¨æºä»£ç ä¸­åå«ä»»ä½è¿æ ·çåå®¹ãæ¯ä¸ªäººé½æä»èªå·±çç¼è¾å¨éç½®ï¼ä½ çæºæä»¶ä¸åº
 è¯¥è¦çå«äººçéç½®ãè¿åæ¬æå³ç¼©è¿åæ¨¡å¼éç½®çæ è®°ãäººä»¬å¯ä»¥ä½¿ç¨ä»ä»¬èªå·±å®å¶çæ¨¡
 å¼ï¼æèä½¿ç¨å¶ä»å¯ä»¥äº§çæ­£ç¡®çç¼©è¿çå·§å¦æ¹æ³ã
 
 
+		ç¬¬åä¹ç« ï¼åèæ±ç¼
+
+å¨ç¹å®æ¶æçä»£ç ä¸­ï¼ä½ ä¹è®¸éè¦åèæ±ç¼æ¥ä½¿ç¨ CPU æ¥å£åå¹³å°ç¸å³åè½ãå¨éè¦
+è¿ä¹åæ¶ï¼ä¸è¦ç¹è±«ãç¶èï¼å½ C å¯ä»¥å®æå·¥ä½æ¶ï¼ä¸è¦æ ç«¯å°ä½¿ç¨åèæ±ç¼ãå¦æ
+å¯è½ï¼ä½ å¯ä»¥å¹¶ä¸åºè¯¥ç¨ C åç¡¬ä»¶äº¤äºã
+
+èèå»åéç¨ä¸ç¹çåèæ±ç¼ä½ä¸ºç®æçè¾å©å½æ°ï¼èä¸æ¯éå¤åä¸å®ä»¬çç»èãè®°ä½
+åèæ±ç¼å¯ä»¥ä½¿ç¨ C åæ°ã
+
+å¤§èç¹æ®çæ±ç¼å½æ°åºè¯¥æ¾å¨ .S æä»¶ä¸­ï¼å¯¹åº C çååå®ä¹å¨ C å¤´æä»¶ä¸­ãæ±ç¼
+å½æ°ç C åååºè¯¥ä½¿ç¨ âasmlinkageâã
+
+ä½ å¯è½éè¦å°ä½ çæ±ç¼è¯­å¥æ è®°ä¸º volatileï¼æ¥é»æ­¢ GCC å¨æ²¡åç°ä»»ä½å¯ä½ç¨åå°±
+ç§»é¤äºå®ãä½ ä¸å¿æ»æ¯è¿æ ·åï¼è½ç¶ï¼è¿æ ·å¯ä»¥éå¶ä¸å¿è¦çä¼åã
+
+å¨åä¸ä¸ªåå«å¤æ¡æä»¤çåä¸ªåèæ±ç¼è¯­å¥æ¶ï¼ææ¯æ¡æä»¤ç¨å¼å·å­ç¬¦ä¸²åç¦»ï¼å¹¶åå¨
+åç¬ä¸è¡ï¼å¨æ¯ä¸ªå­ç¬¦ä¸²ç»å°¾ï¼é¤äº \n\t ç»å°¾ä¹å¤ï¼å¨æ±ç¼è¾åºä¸­éå½å°ç¼©è¿ä¸
+ä¸æ¡æä»¤ï¼
+
+	asm ("magic %reg1, #42\n\t"
+	     "more_magic %reg2, %reg3"
+	     : /* outputs */ : /* inputs */ : /* clobbers */);
+
+
+		ç¬¬äºåç« ï¼æ¡ä»¶ç¼è¯
+
+åªè¦å¯è½ï¼å°±ä¸è¦å¨ .c æä»¶éé¢ä½¿ç¨é¢å¤çæ¡ä»¶ï¼è¿æ ·åè®©ä»£ç æ´é¾éè¯»å¹¶ä¸é»è¾é¾ä»¥
+è·è¸ªãæ¿ä»£æ¹æ¡æ¯ï¼å¨å¤´æä»¶å®ä¹å½æ°å¨è¿äº .c æä»¶ä¸­ä½¿ç¨è¿ç±»çæ¡ä»¶è¡¨è¾¾å¼ï¼æä¾ç©º
+æä½çæ¡©çæ¬ï¼è¯æ³¨ï¼æ¡©ç¨åºï¼æ¯æç¨æ¥æ¿æ¢ä¸é¨ååè½çç¨åºæ®µï¼å¨ #else æåµä¸ï¼
+åä» .c æä»¶ä¸­æ æ¡ä»¶å°è°ç¨è¿äºå½æ°ãç¼è¯å¨ä¼é¿åçæä»»ä½æ¡©è°ç¨çä»£ç ï¼äº§çä¸è´
+çç»æï¼ä½é»è¾å°æ´å æ¸æ°ã
+
+å®å¯ç¼è¯æ´ä¸ªå½æ°ï¼èä¸æ¯é¨åå½æ°æé¨åè¡¨è¾¾å¼ãèä¸æ¯å¨ä¸ä¸ªè¡¨è¾¾å¼æ·»å  ifdefï¼
+è§£æé¨åæå¨é¨è¡¨è¾¾å¼å°ä¸ä¸ªåç¬çè¾å©å½æ°ï¼å¹¶åºç¨æ¡ä»¶å°è¯¥å½æ°åã
+
+å¦æä½ æä¸ä¸ªå¨ç¹å®éç½®ä¸­å¯è½æ¯æªä½¿ç¨çå½æ°æåéï¼ç¼è¯å¨å°è­¦åå®å®ä¹äºä½æªä½¿ç¨ï¼
+æ è®°è¿ä¸ªå®ä¹ä¸º __maybe_unused èä¸æ¯å°å®åå«å¨ä¸ä¸ªé¢å¤çæ¡ä»¶ä¸­ãï¼ç¶èï¼å¦æ
+ä¸ä¸ªå½æ°æåéæ»æ¯æªä½¿ç¨çï¼å°±ç´æ¥å é¤å®ãï¼
+
+å¨ä»£ç ä¸­ï¼å¯è½çæåµä¸ï¼ä½¿ç¨ IS_ENABLED å®æ¥è½¬åæä¸ª Kconfig æ è®°ä¸º C çå¸å°
+è¡¨è¾¾å¼ï¼å¹¶å¨æ­£å¸¸ç C æ¡ä»¶ä¸­ä½¿ç¨å®ï¼
+
+	if (IS_ENABLED(CONFIG_SOMETHING)) {
+		...
+	}
+
+ç¼è¯å¨ä¼æ æ¡ä»¶å°åå¸¸æ°åå¹¶ï¼å°±åä½¿ç¨ #ifdef é£æ ·ï¼åå«ææé¤ä»£ç åï¼æä»¥è¿ä¸ä¼
+å¸¦æ¥ä»»ä½è¿è¡æ¶å¼éãç¶èï¼è¿ç§æ¹æ³ä¾æ§åè®¸ C ç¼è¯å¨æ¥çååçä»£ç ï¼å¹¶æ£æ¥å®çæ­£ç¡®
+æ§ï¼è¯­æ³ï¼ç±»åï¼ç¬¦å·å¼ç¨ï¼ç­ç­ï¼ãå æ­¤ï¼å¦ææ¡ä»¶ä¸æ»¡è¶³ï¼ä»£ç ååçå¼ç¨ç¬¦å·å°ä¸å­å¨ï¼
+ä½ å¿é¡»ç»§ç»­ä½¿ç¨ #ifdefã
+
+å¨ä»»ä½ææä¹ç #if æ #ifdef åçæ«å°¾ï¼è¶è¿å è¡ï¼ï¼å¨ #endif åä¸è¡çåé¢åä¸
+æ³¨éï¼æåºè¯¥æ¡ä»¶è¡¨è¾¾å¼è¢«ä½¿ç¨ãä¾å¦ï¼
+
+	#ifdef CONFIG_SOMETHING
+	...
+	#endif /* CONFIG_SOMETHING */
+
 
 		éå½ Iï¼åè
 
-The C Programming Language, ç¬¬äºç, ä½èBrian W. KernighanåDenni
-M. Ritchie. Prentice Hall, Inc., 1988. ISBN 0-13-110362-8 (è½¯ç®),
-0-13-110370-9 (ç¡¬ç®). URL: http://cm.bell-labs.com/cm/cs/cbook/
+The C Programming Language, ç¬¬äºç
+ä½èï¼Brian W. Kernighan å Denni M. Ritchie.
+Prentice Hall, Inc., 1988.
+ISBN 0-13-110362-8 (è½¯ç®), 0-13-110370-9 (ç¡¬ç®).
 
-The Practice of Programming ä½èBrian W. KernighanåRob Pike.  Addison-Wesley,
-Inc., 1999.  ISBN 0-201-61586-X.  URL: http://cm.bell-labs.com/cm/cs/tpop/
+The Practice of Programming
+ä½èï¼Brian W. Kernighan å Rob Pike.
+Addison-Wesley, Inc., 1999.
+ISBN 0-201-61586-X.
 
-cppï¼gccï¼gcc internalsåindentçGNUæåââåK&Råæ¬æç¸ç¬¦åçé¨åï¼å¨é¨å¯ä»¥å¨
-http://www.gnu.org/manual/æ¾å°
+GNU æå - éµå¾ª K&R æ ååæ­¤ææ¬ - cpp, gcc, gcc internals and indent,
+é½å¯ä»¥ä» http://www.gnu.org/manual/ æ¾å°
 
 WG14æ¯Cè¯­è¨çå½éæ ååå·¥ä½ç»ï¼URL: http://www.open-std.org/JTC1/SC22/WG14/
 
-Kernel CodingStyleï¼ä½ègreg@kroah.comåè¡¨äºOLS 2002ï¼
+Kernel CodingStyleï¼ä½è greg@kroah.com åè¡¨äºOLS 2002ï¼
 http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
-
---
-æåæ´æ°äº2007å¹´7æ13æ¥ã
diff --git a/MAINTAINERS b/MAINTAINERS
index 419763e3b52e..3a7b3998e845 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -288,6 +288,7 @@ F:	include/linux/acpi.h
 F:	include/acpi/
 F:	Documentation/acpi/
 F:	Documentation/ABI/testing/sysfs-bus-acpi
+F:	Documentation/ABI/testing/configfs-acpi
 F:	drivers/pci/*acpi*
 F:	drivers/pci/*/*acpi*
 F:	drivers/pci/*/*/*acpi*
@@ -595,6 +596,10 @@ S:	Odd Fixes
 L:	linux-alpha@vger.kernel.org
 F:	arch/alpha/
 
+ALPS PS/2 TOUCHPAD DRIVER
+R:	Pali RohÃ¡r <pali.rohar@gmail.com>
+F:	drivers/input/mouse/alps.*
+
 ALTERA MAILBOX DRIVER
 M:	Ley Foon Tan <lftan@altera.com>
 L:	nios2-dev@lists.rocketboards.org (moderated for non-subscribers)
@@ -835,7 +840,9 @@ M:	Iyappan Subramanian <isubramanian@apm.com>
 M:	Keyur Chudgar <kchudgar@apm.com>
 S:	Supported
 F:	drivers/net/ethernet/apm/xgene/
+F:	drivers/net/phy/mdio-xgene.c
 F:	Documentation/devicetree/bindings/net/apm-xgene-enet.txt
+F:	Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
 
 APTINA CAMERA SENSOR PLL
 M:	Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
@@ -1643,6 +1650,13 @@ L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/platform/s5p-tv/
 
+ARM/SAMSUNG S5P SERIES HDMI CEC SUBSYSTEM SUPPORT
+M:	Kyungmin Park <kyungmin.park@samsung.com>
+L:	linux-arm-kernel@lists.infradead.org
+L:	linux-media@vger.kernel.org
+S:	Maintained
+F:	drivers/staging/media/platform/s5p-cec/
+
 ARM/SAMSUNG S5P SERIES JPEG CODEC SUPPORT
 M:	Andrzej Pietrasiewicz <andrzej.p@samsung.com>
 M:	Jacek Anaszewski <j.anaszewski@samsung.com>
@@ -1665,7 +1679,6 @@ F:	arch/arm/boot/dts/sh*
 F:	arch/arm/configs/shmobile_defconfig
 F:	arch/arm/include/debug/renesas-scif.S
 F:	arch/arm/mach-shmobile/
-F:	drivers/sh/
 F:	drivers/soc/renesas/
 F:	include/linux/soc/renesas/
 
@@ -1690,8 +1703,6 @@ S:	Maintained
 F:	drivers/edac/altera_edac.
 
 ARM/STI ARCHITECTURE
-M:	Srinivas Kandagatla <srinivas.kandagatla@gmail.com>
-M:	Maxime Coquelin <maxime.coquelin@st.com>
 M:	Patrice Chotard <patrice.chotard@st.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	kernel@stlinux.com
@@ -1724,6 +1735,7 @@ F:	drivers/ata/ahci_st.c
 
 ARM/STM32 ARCHITECTURE
 M:	Maxime Coquelin <mcoquelin.stm32@gmail.com>
+M:	Alexandre Torgue <alexandre.torgue@st.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mcoquelin/stm32.git
@@ -2293,6 +2305,7 @@ S:	Maintained
 F:	Documentation/ABI/testing/sysfs-class-net-batman-adv
 F:	Documentation/ABI/testing/sysfs-class-net-mesh
 F:	Documentation/networking/batman-adv.txt
+F:	include/uapi/linux/batman_adv.h
 F:	net/batman-adv/
 
 BAYCOM/HDLCDRV DRIVERS FOR AX.25
@@ -2456,6 +2469,14 @@ L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/b44.*
 
+BROADCOM B53 ETHERNET SWITCH DRIVER
+M:	Florian Fainelli <f.fainelli@gmail.com>
+L:	netdev@vger.kernel.org
+L:	openwrt-devel@lists.openwrt.org (subscribers-only)
+S:	Supported
+F:	drivers/net/dsa/b53/*
+F:	include/linux/platform_data/b53.h
+
 BROADCOM GENET ETHERNET DRIVER
 M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	netdev@vger.kernel.org
@@ -2572,12 +2593,11 @@ S:	Supported
 F:	drivers/net/ethernet/broadcom/tg3.*
 
 BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
-M:	Brett Rudley <brudley@broadcom.com>
-M:	Arend van Spriel <arend@broadcom.com>
-M:	Franky (Zhenhui) Lin <frankyl@broadcom.com>
-M:	Hante Meuleman <meuleman@broadcom.com>
+M:	Arend van Spriel <arend.vanspriel@broadcom.com>
+M:	Franky Lin <franky.lin@broadcom.com>
+M:	Hante Meuleman <hante.meuleman@broadcom.com>
 L:	linux-wireless@vger.kernel.org
-L:	brcm80211-dev-list@broadcom.com
+L:	brcm80211-dev-list.pdl@broadcom.com
 S:	Supported
 F:	drivers/net/wireless/broadcom/brcm80211/
 
@@ -2776,9 +2796,9 @@ F:	include/net/caif/
 F:	net/caif/
 
 CALGARY x86-64 IOMMU
-M:	Muli Ben-Yehuda <muli@il.ibm.com>
-M:	"Jon D. Mason" <jdmason@kudzu.us>
-L:	discuss@x86-64.org
+M:	Muli Ben-Yehuda <mulix@mulix.org>
+M:	Jon Mason <jdmason@kudzu.us>
+L:	iommu@lists.linux-foundation.org
 S:	Maintained
 F:	arch/x86/kernel/pci-calgary_64.c
 F:	arch/x86/kernel/tce_64.c
@@ -2809,6 +2829,7 @@ W:	https://github.com/linux-can
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
 S:	Maintained
+F:	Documentation/devicetree/bindings/net/can/
 F:	drivers/net/can/
 F:	include/linux/can/dev.h
 F:	include/linux/can/platform/
@@ -2848,6 +2869,22 @@ F:	drivers/net/ieee802154/cc2520.c
 F:	include/linux/spi/cc2520.h
 F:	Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
 
+CEC DRIVER
+M:	Hans Verkuil <hans.verkuil@cisco.com>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+W:	http://linuxtv.org
+S:	Supported
+F:	Documentation/cec.txt
+F:	Documentation/DocBook/media/v4l/cec*
+F:	drivers/staging/media/cec/
+F:	drivers/media/cec-edid.c
+F:	drivers/media/rc/keymaps/rc-cec.c
+F:	include/media/cec.h
+F:	include/media/cec-edid.h
+F:	include/linux/cec.h
+F:	include/linux/cec-funcs.h
+
 CELL BROADBAND ENGINE ARCHITECTURE
 M:	Arnd Bergmann <arnd@arndb.de>
 L:	linuxppc-dev@lists.ozlabs.org
@@ -3284,6 +3321,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git
 S:	Maintained
 F:	Documentation/crypto/
+F:	Documentation/devicetree/bindings/crypto/
 F:	Documentation/DocBook/crypto-API.tmpl
 F:	arch/*/crypto/
 F:	crypto/
@@ -4473,7 +4511,7 @@ S:	Orphan
 F:	fs/efs/
 
 EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
-M:	Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
+M:	Douglas Miller <dougmill@linux.vnet.ibm.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/ibm/ehea/
@@ -4883,6 +4921,13 @@ F:	drivers/net/ethernet/freescale/gianfar*
 X:	drivers/net/ethernet/freescale/gianfar_ptp.c
 F:	Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
 
+FREESCALE QUICC ENGINE UCC HDLC DRIVER
+M:	Zhao Qiang <qiang.zhao@nxp.com>
+L:	netdev@vger.kernel.org
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Maintained
+F:	drivers/net/wan/fsl_ucc_hdlc*
+
 FREESCALE QUICC ENGINE UCC UART DRIVER
 M:	Timur Tabi <timur@tabi.org>
 L:	linuxppc-dev@lists.ozlabs.org
@@ -4938,6 +4983,13 @@ F:	Documentation/filesystems/caching/
 F:	fs/fscache/
 F:	include/linux/fscache*.h
 
+FS-CRYPTO: FILE SYSTEM LEVEL ENCRYPTION SUPPORT
+M:	Theodore Y. Ts'o <tytso@mit.edu>
+M:	Jaegeuk Kim <jaegeuk@kernel.org>
+S:	Supported
+F:	fs/crypto/
+F:	include/linux/fscrypto.h
+
 F2FS FILE SYSTEM
 M:	Jaegeuk Kim <jaegeuk@kernel.org>
 M:	Changman Lee <cm224.lee@samsung.com>
@@ -5166,10 +5218,10 @@ S:	Maintained
 F:	drivers/media/usb/gspca/m5602/
 
 GSPCA PAC207 SONIXB SUBDRIVER
-M:	Hans de Goede <hdegoede@redhat.com>
+M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
-S:	Maintained
+S:	Odd Fixes
 F:	drivers/media/usb/gspca/pac207.c
 
 GSPCA SN9C20X SUBDRIVER
@@ -5187,10 +5239,10 @@ S:	Maintained
 F:	drivers/media/usb/gspca/t613.c
 
 GSPCA USB WEBCAM DRIVER
-M:	Hans de Goede <hdegoede@redhat.com>
+M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
-S:	Maintained
+S:	Odd Fixes
 F:	drivers/media/usb/gspca/
 
 GUID PARTITION TABLE (GPT)
@@ -5271,6 +5323,7 @@ M:	Matt Mackall <mpm@selenic.com>
 M:	Herbert Xu <herbert@gondor.apana.org.au>
 L:	linux-crypto@vger.kernel.org
 S:	Odd fixes
+F:	Documentation/devicetree/bindings/rng/
 F:	Documentation/hw_random.txt
 F:	drivers/char/hw_random/
 F:	include/linux/hw_random.h
@@ -5418,6 +5471,15 @@ F:	include/uapi/linux/if_hippi.h
 F:	net/802/hippi.c
 F:	drivers/net/hippi/
 
+HISILICON NETWORK SUBSYSTEM DRIVER
+M:	Yisen Zhuang <yisen.zhuang@huawei.com>
+M:	Salil Mehta <salil.mehta@huawei.com>
+L:	netdev@vger.kernel.org
+W:	http://www.hisilicon.com
+S:	Maintained
+F:	drivers/net/ethernet/hisilicon/
+F:	Documentation/devicetree/bindings/net/hisilicon*.txt
+
 HISILICON SAS Controller
 M:	John Garry <john.garry@huawei.com>
 W:	http://www.hisilicon.com
@@ -5785,7 +5847,9 @@ R:	Hartmut Knaack <knaack.h@gmx.de>
 R:	Lars-Peter Clausen <lars@metafoo.de>
 R:	Peter Meerwald-Stadler <pmeerw@pmeerw.net>
 L:	linux-iio@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git
 S:	Maintained
+F:	Documentation/devicetree/bindings/iio/
 F:	drivers/iio/
 F:	drivers/staging/iio/
 F:	include/linux/iio/
@@ -5915,6 +5979,12 @@ L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/intel-hid.c
 
+INTEL VIRTUAL BUTTON DRIVER
+M:	AceLan Kao <acelan.kao@canonical.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	drivers/platform/x86/intel-vbtn.c
+
 INTEL IDLE DRIVER
 M:	Len Brown <lenb@kernel.org>
 L:	linux-pm@vger.kernel.org
@@ -6017,6 +6087,12 @@ L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/hw/i40iw/
 
+INTEL MERRIFIELD GPIO DRIVER
+M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+F:	drivers/gpio/gpio-merrifield.c
+
 INTEL-MID GPIO DRIVER
 M:	David Cohen <david.a.cohen@linux.intel.com>
 L:	linux-gpio@vger.kernel.org
@@ -6231,7 +6307,6 @@ F:	Documentation/devicetree/bindings/interrupt-controller/
 F:	drivers/irqchip/
 
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
-M:	Jiang Liu <jiang.liu@linux.intel.com>
 M:	Marc Zyngier <marc.zyngier@arm.com>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
@@ -6764,6 +6839,7 @@ S:	Maintained
 F:	drivers/ata/
 F:	include/linux/ata.h
 F:	include/linux/libata.h
+F:	Documentation/devicetree/bindings/ata/
 
 LIBATA PATA ARASAN COMPACT FLASH CONTROLLER
 M:	Viresh Kumar <vireshk@kernel.org>
@@ -6968,7 +7044,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/livepatching.git
 LINUX KERNEL DUMP TEST MODULE (LKDTM)
 M:	Kees Cook <keescook@chromium.org>
 S:	Maintained
-F:	drivers/misc/lkdtm.c
+F:	drivers/misc/lkdtm*
 
 LLC (802.2)
 M:	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
@@ -7021,15 +7097,23 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 S:	Maintained
 F:	drivers/media/usb/dvb-usb-v2/lmedm04*
 
-LOCKDEP AND LOCKSTAT
+LOCKING PRIMITIVES
 M:	Peter Zijlstra <peterz@infradead.org>
 M:	Ingo Molnar <mingo@redhat.com>
 L:	linux-kernel@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
 S:	Maintained
-F:	Documentation/locking/lockdep*.txt
-F:	Documentation/locking/lockstat.txt
+F:	Documentation/locking/
 F:	include/linux/lockdep.h
+F:	include/linux/spinlock*.h
+F:	arch/*/include/asm/spinlock*.h
+F:	include/linux/rwlock*.h
+F:	include/linux/mutex*.h
+F:	arch/*/include/asm/mutex*.h
+F:	include/linux/rwsem*.h
+F:	arch/*/include/asm/rwsem.h
+F:	include/linux/seqlock.h
+F:	lib/locking*.[ch]
 F:	kernel/locking/
 
 LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks)
@@ -7170,6 +7254,12 @@ W:	http://www.kernel.org/doc/man-pages
 L:	linux-man@vger.kernel.org
 S:	Maintained
 
+MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER
+M:	Andrew Lunn <andrew@lunn.ch>
+M:	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+S:	Maintained
+F:	drivers/net/dsa/mv88e6xxx/
+
 MARVELL ARMADA DRM SUPPORT
 M:	Russell King <rmk+kernel@armlinux.org.uk>
 S:	Maintained
@@ -7177,11 +7267,6 @@ F:	drivers/gpu/drm/armada/
 F:	include/uapi/drm/armada_drm.h
 F:	Documentation/devicetree/bindings/display/armada/
 
-MARVELL 88E6352 DSA support
-M:	Guenter Roeck <linux@roeck-us.net>
-S:	Maintained
-F:	drivers/net/dsa/mv88e6352.c
-
 MARVELL CRYPTO DRIVER
 M:	Boris Brezillon <boris.brezillon@free-electrons.com>
 M:	Arnaud Ebalard <arno@natisbad.org>
@@ -7317,6 +7402,16 @@ L:	linux-iio@vger.kernel.org
 S:	Maintained
 F:	drivers/iio/potentiometer/mcp4531.c
 
+MEDIA DRIVERS FOR RENESAS - FCP
+M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+L:	linux-media@vger.kernel.org
+L:	linux-renesas-soc@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Supported
+F:	Documentation/devicetree/bindings/media/renesas,fcp.txt
+F:	drivers/media/platform/rcar-fcp.c
+F:	include/media/rcar-fcp.h
+
 MEDIA DRIVERS FOR RENESAS - VSP1
 M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:	linux-media@vger.kernel.org
@@ -7326,8 +7421,18 @@ S:	Supported
 F:	Documentation/devicetree/bindings/media/renesas,vsp1.txt
 F:	drivers/media/platform/vsp1/
 
+MEDIA DRIVERS FOR HELENE
+M:	Abylay Ospan <aospan@netup.ru>
+L:	linux-media@vger.kernel.org
+W:	https://linuxtv.org
+W:	http://netup.tv/
+T:	git git://linuxtv.org/media_tree.git
+S:	Supported
+F:	drivers/media/dvb-frontends/helene*
+
 MEDIA DRIVERS FOR ASCOT2E
 M:	Sergey Kozlov <serjk@netup.ru>
+M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 W:	http://netup.tv/
@@ -7337,6 +7442,7 @@ F:	drivers/media/dvb-frontends/ascot2e*
 
 MEDIA DRIVERS FOR CXD2841ER
 M:	Sergey Kozlov <serjk@netup.ru>
+M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 W:	http://netup.tv/
@@ -7346,6 +7452,7 @@ F:	drivers/media/dvb-frontends/cxd2841er*
 
 MEDIA DRIVERS FOR HORUS3A
 M:	Sergey Kozlov <serjk@netup.ru>
+M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 W:	http://netup.tv/
@@ -7355,6 +7462,7 @@ F:	drivers/media/dvb-frontends/horus3a*
 
 MEDIA DRIVERS FOR LNBH25
 M:	Sergey Kozlov <serjk@netup.ru>
+M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 W:	http://netup.tv/
@@ -7364,6 +7472,7 @@ F:	drivers/media/dvb-frontends/lnbh25*
 
 MEDIA DRIVERS FOR NETUP PCI UNIVERSAL DVB devices
 M:	Sergey Kozlov <serjk@netup.ru>
+M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 W:	http://netup.tv/
@@ -7421,7 +7530,7 @@ F:	drivers/scsi/megaraid.*
 F:	drivers/scsi/megaraid/
 
 MELLANOX ETHERNET DRIVER (mlx4_en)
-M: 	Eugenia Emantayev <eugenia@mellanox.com>
+M:	Tariq Toukan <tariqt@mellanox.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.mellanox.com
@@ -7473,6 +7582,7 @@ Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
 T:	git git://git.infradead.org/linux-mtd.git
 T:	git git://git.infradead.org/l2-mtd.git
 S:	Maintained
+F:	Documentation/devicetree/bindings/mtd/
 F:	drivers/mtd/
 F:	include/linux/mtd/
 F:	include/uapi/mtd/
@@ -7612,10 +7722,8 @@ L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 S:	Maintained
-F:	drivers/staging/media/mn88472/
-F:	drivers/media/dvb-frontends/mn88472.h
+F:	drivers/media/dvb-frontends/mn88472*
 
 MN88473 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
@@ -8170,6 +8278,13 @@ S:	Supported
 F:	drivers/nvme/host/
 F:	include/linux/nvme.h
 
+NVM EXPRESS TARGET DRIVER
+M:	Christoph Hellwig <hch@lst.de>
+M:	Sagi Grimberg <sagi@grimberg.me>
+L:	linux-nvme@lists.infradead.org
+S:	Supported
+F:	drivers/nvme/target/
+
 NVMEM FRAMEWORK
 M:	Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
 M:	Maxime Ripard <maxime.ripard@free-electrons.com>
@@ -8960,6 +9075,7 @@ L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/pinctrl/
+F:	Documentation/pinctrl.txt
 F:	drivers/pinctrl/
 F:	include/linux/pinctrl/
 
@@ -9082,6 +9198,8 @@ M:	David Woodhouse <dwmw2@infradead.org>
 L:	linux-pm@vger.kernel.org
 T:	git git://git.infradead.org/battery-2.6.git
 S:	Maintained
+F:	Documentation/devicetree/bindings/power/
+F:	Documentation/devicetree/bindings/power_supply/
 F:	include/linux/power_supply.h
 F:	drivers/power/
 X:	drivers/power/avs/
@@ -9219,6 +9337,13 @@ F:	include/linux/tracehook.h
 F:	include/uapi/linux/ptrace.h
 F:	kernel/ptrace.c
 
+PULSE8-CEC DRIVER
+M:	Hans Verkuil <hverkuil@xs4all.nl>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Maintained
+F:	drivers/staging/media/pulse8-cec
+
 PVRUSB2 VIDEO4LINUX DRIVER
 M:	Mike Isely <isely@pobox.com>
 L:	pvrusb2@isely.net	(subscribers-only)
@@ -9230,10 +9355,10 @@ F:	Documentation/video4linux/README.pvrusb2
 F:	drivers/media/usb/pvrusb2/
 
 PWC WEBCAM DRIVER
-M:	Hans de Goede <hdegoede@redhat.com>
+M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
-S:	Maintained
+S:	Odd Fixes
 F:	drivers/media/usb/pwc/*
 
 PWM FAN DRIVER
@@ -9306,7 +9431,8 @@ L:	rtc-linux@googlegroups.com
 S:	Maintained
 
 QAT DRIVER
-M:	Tadeusz Struk <tadeusz.struk@intel.com>
+M:	Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+M:	Salvatore Benedetto <salvatore.benedetto@intel.com>
 L:	qat-linux@intel.com
 S:	Supported
 F:	drivers/crypto/qat/
@@ -9448,14 +9574,14 @@ F:	drivers/video/fbdev/aty/radeon*
 F:	include/uapi/linux/radeonfb.h
 
 RADIOSHARK RADIO DRIVER
-M:	Hans de Goede <hdegoede@redhat.com>
+M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 S:	Maintained
 F:	drivers/media/radio/radio-shark.c
 
 RADIOSHARK2 RADIO DRIVER
-M:	Hans de Goede <hdegoede@redhat.com>
+M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 S:	Maintained
@@ -9529,7 +9655,7 @@ M:	Florian Fainelli <florian@openwrt.org>
 S:	Maintained
 
 RDC R6040 FAST ETHERNET DRIVER
-M:	Florian Fainelli <florian@openwrt.org>
+M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/rdc/r6040.c
@@ -9596,6 +9722,7 @@ M:	Mark Brown <broonie@kernel.org>
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git
 S:	Supported
+F:	Documentation/devicetree/bindings/regmap/
 F:	drivers/base/regmap/
 F:	include/linux/regmap.h
 
@@ -9680,7 +9807,6 @@ F:	Documentation/ABI/*/sysfs-driver-hid-roccat*
 
 ROCKER DRIVER
 M:	Jiri Pirko <jiri@resnulli.us>
-M:	Scott Feldman <sfeldma@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/rocker/
@@ -9997,6 +10123,7 @@ SERIAL DRIVERS
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 L:	linux-serial@vger.kernel.org
 S:	Maintained
+F:	Documentation/devicetree/bindings/serial/
 F:	drivers/tty/serial/
 
 SYNOPSYS DESIGNWARE DMAC DRIVER
@@ -10277,10 +10404,9 @@ W:	http://www.avagotech.com
 S:	Supported
 F:	drivers/scsi/be2iscsi/
 
-Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER
+Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER (be2net)
 M:	Sathya Perla <sathya.perla@broadcom.com>
 M:	Ajit Khaparde <ajit.khaparde@broadcom.com>
-M:	Padmanabh Ratnakar <padmanabh.ratnakar@broadcom.com>
 M:	Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
 M:	Somnath Kotur <somnath.kotur@broadcom.com>
 L:	netdev@vger.kernel.org
@@ -10857,6 +10983,7 @@ STAGING - INDUSTRIAL IO
 M:	Jonathan Cameron <jic23@kernel.org>
 L:	linux-iio@vger.kernel.org
 S:	Odd Fixes
+F:	Documentation/devicetree/bindings/staging/iio/
 F:	drivers/staging/iio/
 
 STAGING - LIRC (LINUX INFRARED REMOTE CONTROL) DRIVERS
diff --git a/Makefile b/Makefile
index 4fb6beac5f09..d384848478b9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION =
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
@@ -363,12 +363,14 @@ CHECK		= sparse
 
 CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
 		  -Wbitwise -Wno-return-void $(CF)
+NOSTDINC_FLAGS  =
 CFLAGS_MODULE   =
 AFLAGS_MODULE   =
 LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
-CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
+LDFLAGS_vmlinux =
+CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im
 CFLAGS_KCOV	= -fsanitize-coverage=trace-pc
 
 
@@ -617,12 +619,13 @@ ARCH_CFLAGS :=
 include arch/$(SRCARCH)/Makefile
 
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
+KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
+KBUILD_CFLAGS	+= -Os
 else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
-KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
+KBUILD_CFLAGS	+= -O2
 else
 KBUILD_CFLAGS   += -O2
 endif
@@ -645,41 +648,28 @@ ifneq ($(CONFIG_FRAME_WARN),0)
 KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 
-# Handle stack protector mode.
-#
-# Since kbuild can potentially perform two passes (first with the old
-# .config values and then with updated .config values), we cannot error out
-# if a desired compiler option is unsupported. If we were to error, kbuild
-# could never get to the second pass and actually notice that we changed
-# the option to something that was supported.
-#
-# Additionally, we don't want to fallback and/or silently change which compiler
-# flags will be used, since that leads to producing kernels with different
-# security feature characteristics depending on the compiler used. ("But I
-# selected CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
-#
-# The middle ground is to warn here so that the failed option is obvious, but
-# to let the build fail with bad compiler flags so that we can't produce a
-# kernel when there is a CONFIG and compiler mismatch.
-#
+# This selects the stack protector compiler flag. Testing it is delayed
+# until after .config has been reprocessed, in the prepare-compiler-check
+# target.
 ifdef CONFIG_CC_STACKPROTECTOR_REGULAR
   stackp-flag := -fstack-protector
-  ifeq ($(call cc-option, $(stackp-flag)),)
-    $(warning Cannot use CONFIG_CC_STACKPROTECTOR_REGULAR: \
-             -fstack-protector not supported by compiler)
-  endif
+  stackp-name := REGULAR
 else
 ifdef CONFIG_CC_STACKPROTECTOR_STRONG
   stackp-flag := -fstack-protector-strong
-  ifeq ($(call cc-option, $(stackp-flag)),)
-    $(warning Cannot use CONFIG_CC_STACKPROTECTOR_STRONG: \
-	      -fstack-protector-strong not supported by compiler)
-  endif
+  stackp-name := STRONG
 else
   # Force off for distro compilers that enable stack protector by default.
   stackp-flag := $(call cc-option, -fno-stack-protector)
 endif
 endif
+# Find arch-specific stack protector compiler sanity-checking script.
+ifdef CONFIG_CC_STACKPROTECTOR
+  stackp-path := $(srctree)/scripts/gcc-$(ARCH)_$(BITS)-has-stack-protector.sh
+  ifneq ($(wildcard $(stackp-path)),)
+    stackp-check := $(stackp-path)
+  endif
+endif
 KBUILD_CFLAGS += $(stackp-flag)
 
 ifdef CONFIG_KCOV
@@ -1015,8 +1005,10 @@ ifneq ($(KBUILD_SRC),)
 	fi;
 endif
 
-# prepare2 creates a makefile if using a separate output directory
-prepare2: prepare3 outputmakefile asm-generic
+# prepare2 creates a makefile if using a separate output directory.
+# From this point forward, .config has been reprocessed, so any rules
+# that need to depend on updated CONFIG_* values can be checked here.
+prepare2: prepare3 prepare-compiler-check outputmakefile asm-generic
 
 prepare1: prepare2 $(version_h) include/generated/utsrelease.h \
                    include/config/auto.conf
@@ -1038,7 +1030,7 @@ ifdef CONFIG_STACK_VALIDATION
   ifeq ($(has_libelf),1)
     objtool_target := tools/objtool FORCE
   else
-    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev or elfutils-libelf-devel")
+    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
     SKIP_STACK_VALIDATION := 1
     export SKIP_STACK_VALIDATION
   endif
@@ -1047,6 +1039,32 @@ endif
 PHONY += prepare-objtool
 prepare-objtool: $(objtool_target)
 
+# Check for CONFIG flags that require compiler support. Abort the build
+# after .config has been processed, but before the kernel build starts.
+#
+# For security-sensitive CONFIG options, we don't want to fallback and/or
+# silently change which compiler flags will be used, since that leads to
+# producing kernels with different security feature characteristics
+# depending on the compiler used. (For example, "But I selected
+# CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
+PHONY += prepare-compiler-check
+prepare-compiler-check: FORCE
+# Make sure compiler supports requested stack protector flag.
+ifdef stackp-name
+  ifeq ($(call cc-option, $(stackp-flag)),)
+	@echo Cannot use CONFIG_CC_STACKPROTECTOR_$(stackp-name): \
+		  $(stackp-flag) not supported by compiler >&2 && exit 1
+  endif
+endif
+# Make sure compiler does not have buggy stack-protector support.
+ifdef stackp-check
+  ifneq ($(shell $(CONFIG_SHELL) $(stackp-check) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
+	@echo Cannot use CONFIG_CC_STACKPROTECTOR_$(stackp-name): \
+                  $(stackp-flag) available but compiler is broken >&2 && exit 1
+  endif
+endif
+	@:
+
 # Generate some files
 # ---------------------------------------------------------------------------
 
@@ -1364,6 +1382,8 @@ help:
 	@$(MAKE) $(build)=$(package-dir) help
 	@echo  ''
 	@echo  'Documentation targets:'
+	@$(MAKE) -f $(srctree)/Documentation/Makefile.sphinx dochelp
+	@echo  ''
 	@$(MAKE) -f $(srctree)/Documentation/DocBook/Makefile dochelp
 	@echo  ''
 	@echo  'Architecture specific targets ($(SRCARCH)):'
@@ -1412,8 +1432,11 @@ $(help-board-dirs): help-%:
 
 # Documentation targets
 # ---------------------------------------------------------------------------
-%docs: scripts_basic FORCE
+DOC_TARGETS := xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs epubdocs cleandocs cleanmediadocs
+PHONY += $(DOC_TARGETS)
+$(DOC_TARGETS): scripts_basic FORCE
 	$(Q)$(MAKE) $(build)=scripts build_docproc build_check-lc_ctype
+	$(Q)$(MAKE) $(build)=Documentation -f $(srctree)/Documentation/Makefile.sphinx $@
 	$(Q)$(MAKE) $(build)=Documentation/DocBook $@
 
 else # KBUILD_EXTMOD
diff --git a/arch/Kconfig b/arch/Kconfig
index e9734796531f..15996290fed4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -226,8 +226,8 @@ config ARCH_INIT_TASK
 config ARCH_TASK_STRUCT_ALLOCATOR
 	bool
 
-# Select if arch has its private alloc_thread_info() function
-config ARCH_THREAD_INFO_ALLOCATOR
+# Select if arch has its private alloc_thread_stack() function
+config ARCH_THREAD_STACK_ALLOCATOR
 	bool
 
 # Select if arch wants to size task_struct dynamically via arch_task_struct_size:
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 572b228c44c7..498933a7df97 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, asm_op)					\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 {									\
 	long temp, result;						\
-	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldl_l %0,%1\n"						\
 	"	" #asm_op " %0,%3,%2\n"					\
@@ -61,7 +60,23 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_mb();							\
+	return result;							\
+}
+
+#define ATOMIC_FETCH_OP(op, asm_op)					\
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	long temp, result;						\
+	__asm__ __volatile__(						\
+	"1:	ldl_l %2,%1\n"						\
+	"	" #asm_op " %2,%3,%0\n"					\
+	"	stl_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
+	:"Ir" (i), "m" (v->counter) : "memory");			\
 	return result;							\
 }
 
@@ -82,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, asm_op)					\
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v)	\
 {									\
 	long temp, result;						\
-	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldq_l %0,%1\n"						\
 	"	" #asm_op " %0,%3,%2\n"					\
@@ -97,34 +111,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_mb();							\
+	return result;							\
+}
+
+#define ATOMIC64_FETCH_OP(op, asm_op)					\
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)	\
+{									\
+	long temp, result;						\
+	__asm__ __volatile__(						\
+	"1:	ldq_l %2,%1\n"						\
+	"	" #asm_op " %2,%3,%0\n"					\
+	"	stq_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
+	:"Ir" (i), "m" (v->counter) : "memory");			\
 	return result;							\
 }
 
 #define ATOMIC_OPS(op)							\
 	ATOMIC_OP(op, op##l)						\
 	ATOMIC_OP_RETURN(op, op##l)					\
+	ATOMIC_FETCH_OP(op, op##l)					\
 	ATOMIC64_OP(op, op##q)						\
-	ATOMIC64_OP_RETURN(op, op##q)
+	ATOMIC64_OP_RETURN(op, op##q)					\
+	ATOMIC64_FETCH_OP(op, op##q)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+
 #define atomic_andnot atomic_andnot
 #define atomic64_andnot atomic64_andnot
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, bis)
-ATOMIC_OP(xor, xor)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, bis)
-ATOMIC64_OP(xor, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm)						\
+	ATOMIC_OP(op, asm)						\
+	ATOMIC_FETCH_OP(op, asm)					\
+	ATOMIC64_OP(op, asm)						\
+	ATOMIC64_FETCH_OP(op, asm)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, bis)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
 
 #undef ATOMIC_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index aab14a019c20..c2ebb6f36c9d 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -40,7 +40,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pmd_t *
 pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return ret;
 }
 
@@ -53,7 +53,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 0131a7058778..77873d0ad293 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
-	oldcount = sem->count;
-	sem->count += RWSEM_ACTIVE_READ_BIAS;
+	oldcount = sem->count.counter;
+	sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
 	long old, new, res;
 
-	res = sem->count;
+	res = atomic_long_read(&sem->count);
 	do {
 		new = res + RWSEM_ACTIVE_READ_BIAS;
 		if (new <= 0)
 			break;
 		old = res;
-		res = cmpxchg(&sem->count, old, new);
+		res = atomic_long_cmpxchg(&sem->count, old, new);
 	} while (res != old);
 	return res >= 0 ? 1 : 0;
 }
@@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
-	oldcount = sem->count;
-	sem->count += RWSEM_ACTIVE_WRITE_BIAS;
+	oldcount = sem->count.counter;
+	sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
  */
 static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
-	long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+	long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
 			   RWSEM_ACTIVE_WRITE_BIAS);
 	if (ret == RWSEM_UNLOCKED_VALUE)
 		return 1;
@@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
-	oldcount = sem->count;
-	sem->count -= RWSEM_ACTIVE_READ_BIAS;
+	oldcount = sem->count.counter;
+	sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem)
 {
 	long count;
 #ifndef	CONFIG_SMP
-	sem->count -= RWSEM_ACTIVE_WRITE_BIAS;
-	count = sem->count;
+	sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
+	count = sem->count.counter;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
-	oldcount = sem->count;
-	sem->count -= RWSEM_WAITING_BIAS;
+	oldcount = sem->count.counter;
+	sem->count.counter -= RWSEM_WAITING_BIAS;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 		rwsem_downgrade_wake(sem);
 }
 
-static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem)
-{
-#ifndef	CONFIG_SMP
-	sem->count += val;
-#else
-	long temp;
-	__asm__ __volatile__(
-	"1:	ldq_l	%0,%1\n"
-	"	addq	%0,%2,%0\n"
-	"	stq_c	%0,%1\n"
-	"	beq	%0,2f\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (temp), "=m" (sem->count)
-	:"Ir" (val), "m" (sem->count));
-#endif
-}
-
-static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
-{
-#ifndef	CONFIG_SMP
-	sem->count += val;
-	return sem->count;
-#else
-	long ret, temp;
-	__asm__ __volatile__(
-	"1:	ldq_l	%0,%1\n"
-	"	addq 	%0,%3,%2\n"
-	"	addq	%0,%3,%0\n"
-	"	stq_c	%2,%1\n"
-	"	beq	%2,2f\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (ret), "=m" (sem->count), "=&r" (temp)
-	:"Ir" (val), "m" (sem->count));
-
-	return ret;
-#endif
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index fed9c6f44c19..a40b9fc0c6c3 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -3,6 +3,8 @@
 
 #include <linux/kernel.h>
 #include <asm/current.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
@@ -13,8 +15,11 @@
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 #define arch_spin_is_locked(x)	((x)->lock != 0)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while ((x)->lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 4a905bd667e2..83e9eee57a55 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -147,7 +147,7 @@ retry:
 	/* If for any reason at all we couldn't handle the fault,
 	   make sure we exit gracefully rather than endlessly redo
 	   the fault.  */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index d4df6be66d58..601ed173080b 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -66,8 +66,6 @@ endif
 
 endif
 
-cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables
-
 # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
 ifeq ($(atleast_gcc48),y)
 cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -gdwarf-2
@@ -76,9 +74,7 @@ endif
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 # Generic build system uses -O2, we want -O3
 # Note: No need to add to cflags-y as that happens anyways
-#
-# Disable the false maybe-uninitialized warings gcc spits out at -O3
-ARCH_CFLAGS += -O3 $(call cc-disable-warning,maybe-uninitialized,)
+ARCH_CFLAGS += -O3
 endif
 
 # small data is default for elf32 tool-chain. If not usable, disable it
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index dd683995bc9d..4e3c1b6b0806 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -67,6 +67,33 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return val;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val, orig;						\
+									\
+	/*								\
+	 * Explicit full memory barrier needed before/after as		\
+	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
+	 */								\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[orig], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"						\n"		\
+	: [val]	"=&r"	(val),						\
+	  [orig] "=&r" (orig)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	smp_mb();							\
+									\
+	return orig;							\
+}
+
 #else	/* !CONFIG_ARC_HAS_LLSC */
 
 #ifndef CONFIG_SMP
@@ -129,25 +156,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+	unsigned long orig;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	orig = v->counter;						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return orig;							\
+}
+
 #endif /* !CONFIG_ARC_HAS_LLSC */
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
-#undef SCOND_FAIL_RETRY_VAR_DEF
-#undef SCOND_FAIL_RETRY_ASM
-#undef SCOND_FAIL_RETRY_VARS
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
 
 #else /* CONFIG_ARC_PLAT_EZNPS */
 
@@ -208,22 +254,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int temp = i;						\
+									\
+	/* Explicit full memory barrier needed before/after */		\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	"	mov %0, r2"						\
+	: "+r"(temp)							\
+	: "r"(&v->counter), "i"(asm_op)					\
+	: "r2", "r3", "memory");					\
+									\
+	smp_mb();							\
+									\
+	return temp;							\
+}
+
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
 #define atomic_sub(i, v) atomic_add(-(i), (v))
 #define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
 
-ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
 #define atomic_andnot(mask, v) atomic_and(~(mask), (v))
-ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
+ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
+ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 
 #endif /* CONFIG_ARC_PLAT_EZNPS */
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 86ed671286df..3749234b7419 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -95,7 +95,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO,
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 					 __get_order_pte());
 
 	return pte;
@@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pgtable_t pte_pg;
 	struct page *page;
 
-	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
+	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
 	if (!pte_pg)
 		return 0;
 	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index cded4a9b5438..233d5ffe6ec7 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -15,8 +15,11 @@
 
 #define arch_spin_is_locked(x)	((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
 #define arch_spin_lock_flags(lock, flags)	arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index e0efff15a5ae..b9192a653b7e 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 	 * prelogue is setup (callee regs saved and then fp set and not other
 	 * way around
 	 */
-	pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
+	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
 	return 0;
 
 #endif
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 4549ab255dd1..98f22d2eb563 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -116,19 +116,19 @@ static struct clocksource arc_counter_gfrc = {
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init arc_cs_setup_gfrc(struct device_node *node)
+static int __init arc_cs_setup_gfrc(struct device_node *node)
 {
 	int exists = cpuinfo_arc700[0].extn.gfrc;
 	int ret;
 
 	if (WARN(!exists, "Global-64-bit-Ctr clocksource not detected"))
-		return;
+		return -ENXIO;
 
 	ret = arc_get_timer_clk(node);
 	if (ret)
-		return;
+		return ret;
 
-	clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq);
+	return clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq);
 }
 CLOCKSOURCE_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
 
@@ -172,25 +172,25 @@ static struct clocksource arc_counter_rtc = {
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init arc_cs_setup_rtc(struct device_node *node)
+static int __init arc_cs_setup_rtc(struct device_node *node)
 {
 	int exists = cpuinfo_arc700[smp_processor_id()].extn.rtc;
 	int ret;
 
 	if (WARN(!exists, "Local-64-bit-Ctr clocksource not detected"))
-		return;
+		return -ENXIO;
 
 	/* Local to CPU hence not usable in SMP */
 	if (WARN(IS_ENABLED(CONFIG_SMP), "Local-64-bit-Ctr not usable in SMP"))
-		return;
+		return -EINVAL;
 
 	ret = arc_get_timer_clk(node);
 	if (ret)
-		return;
+		return ret;
 
 	write_aux_reg(AUX_RTC_CTRL, 1);
 
-	clocksource_register_hz(&arc_counter_rtc, arc_timer_freq);
+	return clocksource_register_hz(&arc_counter_rtc, arc_timer_freq);
 }
 CLOCKSOURCE_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
 
@@ -213,23 +213,23 @@ static struct clocksource arc_counter_timer1 = {
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init arc_cs_setup_timer1(struct device_node *node)
+static int __init arc_cs_setup_timer1(struct device_node *node)
 {
 	int ret;
 
 	/* Local to CPU hence not usable in SMP */
 	if (IS_ENABLED(CONFIG_SMP))
-		return;
+		return -EINVAL;
 
 	ret = arc_get_timer_clk(node);
 	if (ret)
-		return;
+		return ret;
 
 	write_aux_reg(ARC_REG_TIMER1_LIMIT, ARC_TIMER_MAX);
 	write_aux_reg(ARC_REG_TIMER1_CNT, 0);
 	write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH);
 
-	clocksource_register_hz(&arc_counter_timer1, arc_timer_freq);
+	return clocksource_register_hz(&arc_counter_timer1, arc_timer_freq);
 }
 
 /********** Clock Event Device *********/
@@ -324,20 +324,28 @@ static struct notifier_block arc_timer_cpu_nb = {
 /*
  * clockevent setup for boot CPU
  */
-static void __init arc_clockevent_setup(struct device_node *node)
+static int __init arc_clockevent_setup(struct device_node *node)
 {
 	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
 	int ret;
 
-	register_cpu_notifier(&arc_timer_cpu_nb);
+	ret = register_cpu_notifier(&arc_timer_cpu_nb);
+	if (ret) {
+		pr_err("Failed to register cpu notifier");
+		return ret;
+	}
 
 	arc_timer_irq = irq_of_parse_and_map(node, 0);
-	if (arc_timer_irq <= 0)
-		panic("clockevent: missing irq");
+	if (arc_timer_irq <= 0) {
+		pr_err("clockevent: missing irq");
+		return -EINVAL;
+	}
 
 	ret = arc_get_timer_clk(node);
-	if (ret)
-		panic("clockevent: missing clk");
+	if (ret) {
+		pr_err("clockevent: missing clk");
+		return ret;
+	}
 
 	evt->irq = arc_timer_irq;
 	evt->cpumask = cpumask_of(smp_processor_id());
@@ -347,22 +355,29 @@ static void __init arc_clockevent_setup(struct device_node *node)
 	/* Needs apriori irq_set_percpu_devid() done in intc map function */
 	ret = request_percpu_irq(arc_timer_irq, timer_irq_handler,
 				 "Timer0 (per-cpu-tick)", evt);
-	if (ret)
-		panic("clockevent: unable to request irq\n");
+	if (ret) {
+		pr_err("clockevent: unable to request irq\n");
+		return ret;
+	}
 
 	enable_percpu_irq(arc_timer_irq, 0);
+
+	return 0;
 }
 
-static void __init arc_of_timer_init(struct device_node *np)
+static int __init arc_of_timer_init(struct device_node *np)
 {
 	static int init_count = 0;
+	int ret;
 
 	if (!init_count) {
 		init_count = 1;
-		arc_clockevent_setup(np);
+		ret = arc_clockevent_setup(np);
 	} else {
-		arc_cs_setup_timer1(np);
+		ret = arc_cs_setup_timer1(np);
 	}
+
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_of_timer_init);
 
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index af63f4a13e60..e94e5aa33985 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -137,7 +137,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
 	if (unlikely(fatal_signal_pending(current))) {
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 90542db1220d..f0636ec94903 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -358,10 +358,10 @@ config ARCH_CLPS711X
 	bool "Cirrus Logic CLPS711x/EP721x/EP731x-based"
 	select ARCH_REQUIRE_GPIOLIB
 	select AUTO_ZRELADDR
-	select CLKSRC_MMIO
 	select COMMON_CLK
 	select CPU_ARM720T
 	select GENERIC_CLOCKEVENTS
+	select CLPS711X_TIMER
 	select MFD_SYSCON
 	select SOC_BUS
 	help
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 52be48bbd2dd..7fa295155543 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -766,7 +766,6 @@
 			ale_entries = <1024>;
 			bd_ram_size = <0x2000>;
 			no_bd_ram = <0>;
-			rx_descs = <64>;
 			mac_control = <0x20>;
 			slaves = <2>;
 			active_slave = <0>;
@@ -789,7 +788,7 @@
 			status = "disabled";
 
 			davinci_mdio: mdio@4a101000 {
-				compatible = "ti,davinci_mdio";
+				compatible = "ti,cpsw-mdio","ti,davinci_mdio";
 				#address-cells = <1>;
 				#size-cells = <0>;
 				ti,hwmods = "davinci_mdio";
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 12fcde4d4d2e..cd81ecf12731 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -626,7 +626,6 @@
 			ale_entries = <1024>;
 			bd_ram_size = <0x2000>;
 			no_bd_ram = <0>;
-			rx_descs = <64>;
 			mac_control = <0x20>;
 			slaves = <2>;
 			active_slave = <0>;
@@ -636,7 +635,7 @@
 			syscon = <&scm_conf>;
 
 			davinci_mdio: mdio@4a101000 {
-				compatible = "ti,am4372-mdio","ti,davinci_mdio";
+				compatible = "ti,am4372-mdio","ti,cpsw-mdio","ti,davinci_mdio";
 				reg = <0x4a101000 0x100>;
 				#address-cells = <1>;
 				#size-cells = <0>;
diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index 8450944b28e6..22f7a13e20b4 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -58,8 +58,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0xf1100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0xf1110000 0x10000>;
+			  MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000>;
 
 		internal-regs {
 
diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi
index def9e783b5c6..6a40ed7d0502 100644
--- a/arch/arm/boot/dts/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/bcm-nsp.dtsi
@@ -206,6 +206,11 @@
 			brcm,nand-has-wp;
 		};
 
+		rng: rng@33000 {
+			compatible = "brcm,bcm-nsp-rng";
+			reg = <0x33000 0x14>;
+		};
+
 		ccbtimer0: timer@34000 {
 			compatible = "arm,sp804";
 			reg = <0x34000 0x1000>;
@@ -266,6 +271,48 @@
 			      <0x30028 0x04>,
 			      <0x3f408 0x04>;
 		};
+
+		sata_phy: sata_phy@40100 {
+			compatible = "brcm,iproc-nsp-sata-phy";
+			reg = <0x40100 0x340>;
+			reg-names = "phy";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			sata_phy0: sata-phy@0 {
+				reg = <0>;
+				#phy-cells = <0>;
+				status = "disabled";
+			};
+
+			sata_phy1: sata-phy@1 {
+				reg = <1>;
+				#phy-cells = <0>;
+				status = "disabled";
+			};
+		};
+
+		sata: ahci@41000 {
+			compatible = "brcm,bcm-nsp-ahci";
+			reg-names = "ahci", "top-ctrl";
+			reg = <0x41000 0x1000>, <0x40020 0x1c>;
+			interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+
+			sata0: sata-port@0 {
+				reg = <0>;
+				phys = <&sata_phy0>;
+				phy-names = "sata-phy";
+			};
+
+			sata1: sata-port@1 {
+				reg = <1>;
+				phys = <&sata_phy1>;
+				phy-names = "sata-phy";
+			};
+		};
 	};
 
 	pcie0: pcie@18012000 {
diff --git a/arch/arm/boot/dts/bcm958625k.dts b/arch/arm/boot/dts/bcm958625k.dts
index e298450b49b2..2d8422632b2b 100644
--- a/arch/arm/boot/dts/bcm958625k.dts
+++ b/arch/arm/boot/dts/bcm958625k.dts
@@ -68,6 +68,18 @@
 	status = "okay";
 };
 
+&sata_phy0 {
+	status = "okay";
+};
+
+&sata_phy1 {
+	status = "okay";
+};
+
+&sata {
+	status = "okay";
+};
+
 &nand {
 	nandcs@0 {
 		compatible = "brcm,nandcs";
diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi
index d4537dc61497..f23cae0c2179 100644
--- a/arch/arm/boot/dts/dm814x.dtsi
+++ b/arch/arm/boot/dts/dm814x.dtsi
@@ -509,7 +509,6 @@
 			ale_entries = <1024>;
 			bd_ram_size = <0x2000>;
 			no_bd_ram = <0>;
-			rx_descs = <64>;
 			mac_control = <0x20>;
 			slaves = <2>;
 			active_slave = <0>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 3a8f3976f6f9..de559f6e4fee 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1628,7 +1628,6 @@
 			ale_entries = <1024>;
 			bd_ram_size = <0x2000>;
 			no_bd_ram = <0>;
-			rx_descs = <64>;
 			mac_control = <0x20>;
 			slaves = <2>;
 			active_slave = <0>;
@@ -1663,7 +1662,7 @@
 			status = "disabled";
 
 			davinci_mdio: mdio@48485000 {
-				compatible = "ti,davinci_mdio";
+				compatible = "ti,cpsw-mdio","ti,davinci_mdio";
 				#address-cells = <1>;
 				#size-cells = <0>;
 				ti,hwmods = "davinci_mdio";
diff --git a/arch/arm/boot/dts/meson8-minix-neo-x8.dts b/arch/arm/boot/dts/meson8-minix-neo-x8.dts
index 4f536bb1f002..8bceb8d343f6 100644
--- a/arch/arm/boot/dts/meson8-minix-neo-x8.dts
+++ b/arch/arm/boot/dts/meson8-minix-neo-x8.dts
@@ -80,6 +80,7 @@
 	pmic@32 {
 		compatible = "ricoh,rn5t618";
 		reg = <0x32>;
+		system-power-controller;
 
 		regulators {
 		};
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 3b44ef3cff12..3ebee530f2b0 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -539,8 +539,9 @@
 	gmac: ethernet@ff290000 {
 		compatible = "rockchip,rk3288-gmac";
 		reg = <0xff290000 0x10000>;
-		interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "macirq";
+		interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>,
+				<GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "macirq", "eth_wake_irq";
 		rockchip,grf = <&grf>;
 		clocks = <&cru SCLK_MAC>,
 			<&cru SCLK_MAC_RX>, <&cru SCLK_MAC_TX>,
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 2827e7ab5ebc..5dd2734e67ba 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -232,7 +232,7 @@
 		};
 
 		usb1: ohci@00400000 {
-			compatible = "atmel,at91rm9200-ohci", "usb-ohci";
+			compatible = "atmel,sama5d2-ohci", "usb-ohci";
 			reg = <0x00400000 0x100000>;
 			interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>;
 			clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi
index 17e81dc9213e..5820b70c95b3 100644
--- a/arch/arm/boot/dts/socfpga_arria10.dtsi
+++ b/arch/arm/boot/dts/socfpga_arria10.dtsi
@@ -621,6 +621,22 @@
 				compatible = "altr,socfpga-a10-ocram-ecc";
 				reg = <0xff8c3000 0x400>;
 			};
+
+			emac0-rx-ecc@ff8c0800 {
+				compatible = "altr,socfpga-eth-mac-ecc";
+				reg = <0xff8c0800 0x400>;
+				altr,ecc-parent = <&gmac0>;
+				interrupts = <4 IRQ_TYPE_LEVEL_HIGH>,
+					     <36 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			emac0-tx-ecc@ff8c0c00 {
+				compatible = "altr,socfpga-eth-mac-ecc";
+				reg = <0xff8c0c00 0x400>;
+				altr,ecc-parent = <&gmac0>;
+				interrupts = <5 IRQ_TYPE_LEVEL_HIGH>,
+					     <37 IRQ_TYPE_LEVEL_HIGH>;
+			};
 		};
 
 		rst: rstmgr@ffd05000 {
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index a03e56fb5dbc..ca58eb279d55 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -65,8 +65,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-hdmi";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
-				 <&ahb_gates 44>, <&dram_gates 26>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 43>, <&ahb_gates 44>,
+				 <&dram_gates 26>;
 			status = "disabled";
 		};
 
@@ -74,8 +75,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
-				 <&ahb_gates 44>, <&ahb_gates 46>,
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 43>, <&ahb_gates 44>,
+				 <&ahb_gates 46>,
 				 <&dram_gates 25>, <&dram_gates 26>;
 			status = "disabled";
 		};
@@ -84,9 +86,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_fe0-de_be0-lcd0";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>,
-				 <&ahb_gates 46>, <&dram_gates 25>,
-				 <&dram_gates 26>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 44>, <&ahb_gates 46>,
+				 <&dram_gates 25>, <&dram_gates 26>;
 			status = "disabled";
 		};
 
@@ -94,8 +96,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0";
-			clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>,
-				 <&ahb_gates 44>, <&ahb_gates 46>,
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>,
+				 <&ahb_gates 36>, <&ahb_gates 44>,
+				 <&ahb_gates 46>,
 				 <&dram_gates 5>, <&dram_gates 25>, <&dram_gates 26>;
 			status = "disabled";
 		};
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index bddd0de88af6..367f33012493 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -65,8 +65,8 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-hdmi";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
-				 <&ahb_gates 44>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 43>, <&ahb_gates 44>;
 			status = "disabled";
 		};
 
@@ -74,7 +74,8 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 44>;
 			status = "disabled";
 		};
 
@@ -82,8 +83,8 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-tve0";
-			clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>,
-				 <&ahb_gates 44>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>,
+				 <&ahb_gates 36>, <&ahb_gates 44>;
 			status = "disabled";
 		};
 	};
diff --git a/arch/arm/boot/dts/sun5i-r8-chip.dts b/arch/arm/boot/dts/sun5i-r8-chip.dts
index a8d8b4582397..f694482bdeb6 100644
--- a/arch/arm/boot/dts/sun5i-r8-chip.dts
+++ b/arch/arm/boot/dts/sun5i-r8-chip.dts
@@ -52,7 +52,7 @@
 
 / {
 	model = "NextThing C.H.I.P.";
-	compatible = "nextthing,chip", "allwinner,sun5i-r8";
+	compatible = "nextthing,chip", "allwinner,sun5i-r8", "allwinner,sun5i-a13";
 
 	aliases {
 		i2c0 = &i2c0;
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index febdf4c72fb0..2c34bbbb9570 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -67,8 +67,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-hdmi";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
-				 <&ahb_gates 44>, <&dram_gates 26>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 43>, <&ahb_gates 44>,
+				 <&dram_gates 26>;
 			status = "disabled";
 		};
 
@@ -76,8 +77,8 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>,
-				 <&dram_gates 26>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 44>, <&dram_gates 26>;
 			status = "disabled";
 		};
 
@@ -85,7 +86,7 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-tve0";
-			clocks = <&pll5 1>,
+			clocks = <&pll3>, <&pll5 1>,
 				 <&ahb_gates 34>, <&ahb_gates 36>, <&ahb_gates 44>,
 				 <&dram_gates 5>, <&dram_gates 26>;
 			status = "disabled";
@@ -231,6 +232,7 @@
 		pll3x2: pll3x2_clk {
 			#clock-cells = <0>;
 			compatible = "fixed-factor-clock";
+			clocks = <&pll3>;
 			clock-div = <1>;
 			clock-mult = <2>;
 			clock-output-names = "pll3-2x";
@@ -272,6 +274,7 @@
 		pll7x2: pll7x2_clk {
 			#clock-cells = <0>;
 			compatible = "fixed-factor-clock";
+			clocks = <&pll7>;
 			clock-div = <1>;
 			clock-mult = <2>;
 			clock-output-names = "pll7-2x";
diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts
index 1eca3b28ac64..b6da15d823a6 100644
--- a/arch/arm/boot/dts/tegra30-beaver.dts
+++ b/arch/arm/boot/dts/tegra30-beaver.dts
@@ -1843,7 +1843,7 @@
 
 				ldo5_reg: ldo5 {
 					regulator-name = "vddio_sdmmc,avdd_vdac";
-					regulator-min-microvolt = <3300000>;
+					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <3300000>;
 					regulator-always-on;
 				};
@@ -1914,6 +1914,7 @@
 
 	sdhci@78000000 {
 		status = "okay";
+		vqmmc-supply = <&ldo5_reg>;
 		cd-gpios = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>;
 		wp-gpios = <&gpio TEGRA_GPIO(T, 3) GPIO_ACTIVE_HIGH>;
 		power-gpios = <&gpio TEGRA_GPIO(D, 7) GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
index 6c60b7f91104..5c1fcab4a6f7 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
@@ -85,187 +85,199 @@
 			reg = <1>;
 			#address-cells = <1>;
 			#size-cells = <0>;
+
+			switch0: switch0@0 {
+				compatible = "marvell,mv88e6085";
+				#address-cells = <1>;
+				#size-cells = <0>;
+				reg = <0>;
+				dsa,member = <0 0>;
+
+				ports {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					port@0 {
+						reg = <0>;
+						label = "lan0";
+					};
+
+					port@1 {
+						reg = <1>;
+						label = "lan1";
+					};
+
+					port@2 {
+						reg = <2>;
+						label = "lan2";
+					};
+
+					switch0port5: port@5 {
+						reg = <5>;
+						label = "dsa";
+						phy-mode = "rgmii-txid";
+						link = <&switch1port6
+							&switch2port9>;
+						fixed-link {
+							speed = <1000>;
+							full-duplex;
+						};
+					};
+
+					port@6 {
+						reg = <6>;
+						label = "cpu";
+						ethernet = <&fec1>;
+						fixed-link {
+							speed = <100>;
+							full-duplex;
+						};
+					};
+				};
+			};
 		};
 
 		mdio_mux_2: mdio@2 {
 			reg = <2>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-		};
-
-		mdio_mux_4: mdio@4 {
-			reg = <4>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-		};
-
-		mdio_mux_8: mdio@8 {
-			reg = <8>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-		};
-	};
-
-	dsa {
-		compatible = "marvell,dsa";
-		#address-cells = <2>;
-		#size-cells = <0>;
-		dsa,ethernet = <&fec1>;
-		dsa,mii-bus = <&mdio_mux_1>;
-
-		/* 6352 - Primary - 7 ports */
-		switch0: switch@0-0 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <0x00 0>;
-			eeprom-length = <512>;
 
-			port@0 {
+			switch1: switch1@0 {
+				compatible = "marvell,mv88e6085";
+				#address-cells = <1>;
+				#size-cells = <0>;
 				reg = <0>;
-				label = "lan0";
-			};
-
-			port@1 {
-				reg = <1>;
-				label = "lan1";
-			};
-
-			port@2 {
-				reg = <2>;
-				label = "lan2";
-			};
-
-			switch0port5: port@5 {
-				reg = <5>;
-				label = "dsa";
-				phy-mode = "rgmii-txid";
-				link = <&switch1port6
-					&switch2port9>;
-
-				fixed-link {
-					speed = <1000>;
-					full-duplex;
+				dsa,member = <0 1>;
+
+				ports {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					port@0 {
+						reg = <0>;
+						label = "lan3";
+						phy-handle = <&switch1phy0>;
+					};
+
+					port@1 {
+						reg = <1>;
+						label = "lan4";
+						phy-handle = <&switch1phy1>;
+					};
+
+					port@2 {
+						reg = <2>;
+						label = "lan5";
+						phy-handle = <&switch1phy2>;
+					};
+
+					switch1port5: port@5 {
+						reg = <5>;
+						label = "dsa";
+						link = <&switch2port9>;
+						phy-mode = "rgmii-txid";
+						fixed-link {
+							speed = <1000>;
+							full-duplex;
+						};
+					};
+
+					switch1port6: port@6 {
+						reg = <6>;
+						label = "dsa";
+						phy-mode = "rgmii-txid";
+						link = <&switch0port5>;
+						fixed-link {
+							speed = <1000>;
+							full-duplex;
+						};
+					};
 				};
-			};
-
-			port@6 {
-				reg = <6>;
-				label = "cpu";
-
-				fixed-link {
-					speed = <100>;
-					full-duplex;
+				mdio {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					switch1phy0: switch1phy0@0 {
+						reg = <0>;
+					};
+					switch1phy1: switch1phy0@1 {
+						reg = <1>;
+					};
+					switch1phy2: switch1phy0@2 {
+						reg = <2>;
+					};
 				};
 			};
-
 		};
 
-		/* 6352 - Secondary - 7 ports */
-		switch1: switch@0-1 {
+		mdio_mux_4: mdio@4 {
 			#address-cells = <1>;
 			#size-cells = <0>;
-			reg = <0x00 1>;
-			eeprom-length = <512>;
-			mii-bus = <&mdio_mux_2>;
+			reg = <4>;
 
-			port@0 {
+			switch2: switch2@0 {
+				compatible = "marvell,mv88e6085";
+				#address-cells = <1>;
+				#size-cells = <0>;
 				reg = <0>;
-				label = "lan3";
-			};
-
-			port@1 {
-				reg = <1>;
-				label = "lan4";
-			};
-
-			port@2 {
-				reg = <2>;
-				label = "lan5";
-			};
-
-			switch1port5: port@5 {
-				reg = <5>;
-				label = "dsa";
-				link = <&switch2port9>;
-				phy-mode = "rgmii-txid";
-
-				fixed-link {
-					speed = <1000>;
-					full-duplex;
-				};
-			};
-
-			switch1port6: port@6 {
-				reg = <6>;
-				label = "dsa";
-				phy-mode = "rgmii-txid";
-				link = <&switch0port5>;
-
-				fixed-link {
-					speed = <1000>;
-					full-duplex;
+				dsa,member = <0 2>;
+
+				ports {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					port@0 {
+						reg = <0>;
+						label = "lan6";
+					};
+
+					port@1 {
+						reg = <1>;
+						label = "lan7";
+					};
+
+					port@2 {
+						reg = <2>;
+						label = "lan8";
+					};
+
+					port@3 {
+						reg = <3>;
+						label = "optical3";
+						fixed-link {
+							speed = <1000>;
+							full-duplex;
+							link-gpios = <&gpio6 2
+							      GPIO_ACTIVE_HIGH>;
+						};
+					};
+
+					port@4 {
+						reg = <4>;
+						label = "optical4";
+						fixed-link {
+							speed = <1000>;
+							full-duplex;
+							link-gpios = <&gpio6 3
+							      GPIO_ACTIVE_HIGH>;
+						};
+					};
+
+					switch2port9: port@9 {
+						reg = <9>;
+						label = "dsa";
+						phy-mode = "rgmii-txid";
+						link = <&switch1port5
+							&switch0port5>;
+						fixed-link {
+							speed = <1000>;
+							full-duplex;
+						};
+					};
 				};
 			};
 		};
 
-		/* 6185 - 10 ports */
-		switch2: switch@0-2 {
+		mdio_mux_8: mdio@8 {
+			reg = <8>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			reg = <0x00 2>;
-			mii-bus = <&mdio_mux_4>;
-
-			port@0 {
-				reg = <0>;
-				label = "lan6";
-			};
-
-			port@1 {
-				reg = <1>;
-				label = "lan7";
-			};
-
-			port@2 {
-				reg = <2>;
-				label = "lan8";
-			};
-
-			port@3 {
-				reg = <3>;
-				label = "optical3";
-
-				fixed-link {
-					speed = <1000>;
-					full-duplex;
-					link-gpios = <&gpio6 2
-						      GPIO_ACTIVE_HIGH>;
-				};
-			};
-
-			port@4 {
-				reg = <4>;
-				label = "optical4";
-
-				fixed-link {
-					speed = <1000>;
-					full-duplex;
-					link-gpios = <&gpio6 3
-						      GPIO_ACTIVE_HIGH>;
-				};
-			};
-
-			switch2port9: port@9 {
-				reg = <9>;
-				label = "dsa";
-				phy-mode = "rgmii-txid";
-				link = <&switch1port5
-					&switch0port5>;
-
-				fixed-link {
-					speed = <1000>;
-					full-duplex;
-				};
-			};
 		};
 	};
 
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 03a39fe29246..1568cb5cd870 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -154,30 +154,23 @@ static int ghash_async_init(struct ahash_request *req)
 	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct ahash_request *cryptd_req = ahash_request_ctx(req);
 	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+	struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+	struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
 
-	if (!may_use_simd()) {
-		memcpy(cryptd_req, req, sizeof(*req));
-		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
-		return crypto_ahash_init(cryptd_req);
-	} else {
-		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
-		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
-
-		desc->tfm = child;
-		desc->flags = req->base.flags;
-		return crypto_shash_init(desc);
-	}
+	desc->tfm = child;
+	desc->flags = req->base.flags;
+	return crypto_shash_init(desc);
 }
 
 static int ghash_async_update(struct ahash_request *req)
 {
 	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
 
-	if (!may_use_simd()) {
-		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
+	if (!may_use_simd() ||
+	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
 		memcpy(cryptd_req, req, sizeof(*req));
 		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
 		return crypto_ahash_update(cryptd_req);
@@ -190,12 +183,12 @@ static int ghash_async_update(struct ahash_request *req)
 static int ghash_async_final(struct ahash_request *req)
 {
 	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
 
-	if (!may_use_simd()) {
-		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
+	if (!may_use_simd() ||
+	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
 		memcpy(cryptd_req, req, sizeof(*req));
 		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
 		return crypto_ahash_final(cryptd_req);
@@ -212,7 +205,8 @@ static int ghash_async_digest(struct ahash_request *req)
 	struct ahash_request *cryptd_req = ahash_request_ctx(req);
 	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
 
-	if (!may_use_simd()) {
+	if (!may_use_simd() ||
+	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
 		memcpy(cryptd_req, req, sizeof(*req));
 		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
 		return crypto_ahash_digest(cryptd_req);
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9e10c4567eb4..66d0e215a773 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 	return result;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned long tmp;						\
+	int result, val;						\
+									\
+	prefetchw(&v->counter);						\
+									\
+	__asm__ __volatile__("@ atomic_fetch_" #op "\n"			\
+"1:	ldrex	%0, [%4]\n"						\
+"	" #asm_op "	%1, %0, %5\n"					\
+"	strex	%2, %1, [%4]\n"						\
+"	teq	%2, #0\n"						\
+"	bne	1b"							\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter)	\
+	: "r" (&v->counter), "Ir" (i)					\
+	: "cc");							\
+									\
+	return result;							\
+}
+
 #define atomic_add_return_relaxed	atomic_add_return_relaxed
 #define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
 
 static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 {
@@ -159,6 +187,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return val;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+	int val;							\
+									\
+	raw_local_irq_save(flags);					\
+	val = v->counter;						\
+	v->counter c_op i;						\
+	raw_local_irq_restore(flags);					\
+									\
+	return val;							\
+}
+
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
@@ -187,19 +229,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or,  |=, orr)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or,  |=, orr)
+ATOMIC_OPS(xor, ^=, eor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -317,24 +366,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v)		\
 	return result;							\
 }
 
+#define ATOMIC64_FETCH_OP(op, op1, op2)					\
+static inline long long							\
+atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v)		\
+{									\
+	long long result, val;						\
+	unsigned long tmp;						\
+									\
+	prefetchw(&v->counter);						\
+									\
+	__asm__ __volatile__("@ atomic64_fetch_" #op "\n"		\
+"1:	ldrexd	%0, %H0, [%4]\n"					\
+"	" #op1 " %Q1, %Q0, %Q5\n"					\
+"	" #op2 " %R1, %R0, %R5\n"					\
+"	strexd	%2, %1, %H1, [%4]\n"					\
+"	teq	%2, #0\n"						\
+"	bne	1b"							\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter)	\
+	: "r" (&v->counter), "r" (i)					\
+	: "cc");							\
+									\
+	return result;							\
+}
+
 #define ATOMIC64_OPS(op, op1, op2)					\
 	ATOMIC64_OP(op, op1, op2)					\
-	ATOMIC64_OP_RETURN(op, op1, op2)
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
 
 ATOMIC64_OPS(add, adds, adc)
 ATOMIC64_OPS(sub, subs, sbc)
 
 #define atomic64_add_return_relaxed	atomic64_add_return_relaxed
 #define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_FETCH_OP(op, op1, op2)
 
 #define atomic64_andnot atomic64_andnot
 
-ATOMIC64_OP(and, and, and)
-ATOMIC64_OP(andnot, bic, bic)
-ATOMIC64_OP(or,  orr, orr)
-ATOMIC64_OP(xor, eor, eor)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or,  orr, orr)
+ATOMIC64_OPS(xor, eor, eor)
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index a708fa1f0905..766bf9b78160 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -28,10 +28,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 #define arch_efi_call_virt_setup()	efi_virtmap_load()
 #define arch_efi_call_virt_teardown()	efi_virtmap_unload()
 
-#define arch_efi_call_virt(f, args...)					\
+#define arch_efi_call_virt(p, f, args...)				\
 ({									\
 	efi_##f##_t *__f;						\
-	__f = efi.systab->runtime->f;					\
+	__f = p->f;							\
 	__f(args);							\
 })
 
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 19cfab526d13..b2902a5cd780 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -29,7 +29,7 @@
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
@@ -57,7 +57,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 static inline void clean_pte_table(pte_t *pte)
 {
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 0fa418463f49..4bec45442072 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -6,6 +6,8 @@
 #endif
 
 #include <linux/prefetch.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * sev and wfe are ARMv6K extensions.  Uniprocessor ARMv6 may not have the K
@@ -50,8 +52,21 @@ static inline void dsb_sev(void)
  * memory.
  */
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	u16 owner = READ_ONCE(lock->tickets.owner);
+
+	for (;;) {
+		arch_spinlock_t tmp = READ_ONCE(*lock);
+
+		if (tmp.tickets.owner == tmp.tickets.next ||
+		    tmp.tickets.owner != owner)
+			break;
+
+		wfe();
+	}
+	smp_acquire__after_ctrl_dep();
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 3cadb726ec88..1e25cd80589e 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -209,17 +209,38 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 		tlb_flush(tlb);
 }
 
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+	if (tlb->nr == tlb->max)
+		return true;
 	tlb->pages[tlb->nr++] = page;
-	VM_BUG_ON(tlb->nr > tlb->max);
-	return tlb->max - tlb->nr;
+	return false;
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (__tlb_remove_page(tlb, page)) {
 		tlb_flush_mmu(tlb);
+		__tlb_remove_page(tlb, page);
+	}
+}
+
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+					  struct page *page, int page_size)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
+					 struct page *page)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline void tlb_remove_page_size(struct mmu_gather *tlb,
+					struct page *page, int page_size)
+{
+	return tlb_remove_page(tlb, page);
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h
index b6b962d70db9..9d874db13c0e 100644
--- a/arch/arm/include/asm/xen/hypercall.h
+++ b/arch/arm/include/asm/xen/hypercall.h
@@ -52,6 +52,7 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
 int HYPERVISOR_physdev_op(int cmd, void *arg);
 int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
 int HYPERVISOR_tmem_op(void *arg);
+int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type);
 int HYPERVISOR_platform_op_raw(void *arg);
 static inline int HYPERVISOR_platform_op(struct xen_platform_op *op)
 {
diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h
new file mode 100644
index 000000000000..ec154e719b11
--- /dev/null
+++ b/arch/arm/include/asm/xen/xen-ops.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_XEN_OPS_H
+#define _ASM_XEN_OPS_H
+
+void xen_efi_runtime_setup(void);
+
+#endif /* _ASM_XEN_OPS_H */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index df3f60cb1168..a2b3eb313a25 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -139,8 +139,8 @@ struct kvm_arch_memory_slot {
 #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
 
 #define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
-#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14)
 
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 7b5350060612..261dae6f3fec 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1064,6 +1064,7 @@ void __init setup_arch(char **cmdline_p)
 	early_paging_init(mdesc);
 #endif
 	setup_dma_zone(mdesc);
+	xen_early_init();
 	efi_init();
 	sanity_check_meminfo();
 	arm_memblock_init(mdesc);
@@ -1080,7 +1081,6 @@ void __init setup_arch(char **cmdline_p)
 
 	arm_dt_init_cpu_maps();
 	psci_dt_init();
-	xen_early_init();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
 		if (!mdesc->smp_init || !mdesc->smp_init()) {
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 1bfa7a7f5533..b6ec65e68009 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -390,7 +390,7 @@ int __init twd_local_timer_register(struct twd_local_timer *tlt)
 }
 
 #ifdef CONFIG_OF
-static void __init twd_local_timer_of_register(struct device_node *np)
+static int __init twd_local_timer_of_register(struct device_node *np)
 {
 	int err;
 
@@ -410,6 +410,7 @@ static void __init twd_local_timer_of_register(struct device_node *np)
 
 out:
 	WARN(err, "twd_local_timer_of_register failed (%d)\n", err);
+	return err;
 }
 CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
 CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 893941ec98dc..f1bde7c4e736 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 	kvm_timer_vcpu_terminate(vcpu);
 	kvm_vgic_vcpu_destroy(vcpu);
 	kvm_pmu_vcpu_destroy(vcpu);
+	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 68ab6412392a..4f1709b31822 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -89,6 +89,7 @@ config ARCH_BCM_MOBILE
 	select HAVE_ARM_ARCH_TIMER
 	select PINCTRL
 	select ARCH_BCM_MOBILE_SMP if SMP
+	select BCM_KONA_TIMER
 	help
 	  This enables support for systems based on Broadcom mobile SoCs.
 
@@ -143,6 +144,7 @@ config ARCH_BCM2835
 	select ARM_TIMER_SP804
 	select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7
 	select CLKSRC_OF
+	select BCM2835_TIMER
 	select PINCTRL
 	select PINCTRL_BCM2835
 	help
diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig
index b2a85ba13f08..291262e5aeaf 100644
--- a/arch/arm/mach-integrator/Kconfig
+++ b/arch/arm/mach-integrator/Kconfig
@@ -20,7 +20,7 @@ if ARCH_INTEGRATOR
 
 config ARCH_INTEGRATOR_AP
 	bool "Support Integrator/AP and Integrator/PP2 platforms"
-	select CLKSRC_MMIO
+	select INTEGRATOR_AP_TIMER
 	select MIGHT_HAVE_PCI
 	select SERIAL_AMBA_PL010 if TTY
 	select SERIAL_AMBA_PL010_CONSOLE if TTY
diff --git a/arch/arm/mach-keystone/Kconfig b/arch/arm/mach-keystone/Kconfig
index ea955f6db8b7..bac577badc7e 100644
--- a/arch/arm/mach-keystone/Kconfig
+++ b/arch/arm/mach-keystone/Kconfig
@@ -4,7 +4,7 @@ config ARCH_KEYSTONE
 	depends on ARM_PATCH_PHYS_VIRT
 	select ARM_GIC
 	select HAVE_ARM_ARCH_TIMER
-	select CLKSRC_MMIO
+	select KEYSTONE_TIMER
 	select ARM_ERRATA_798181 if SMP
 	select COMMON_CLK_KEYSTONE
 	select ARCH_SUPPORTS_BIG_ENDIAN
diff --git a/arch/arm/mach-moxart/Kconfig b/arch/arm/mach-moxart/Kconfig
index 180d9d216719..ddc79cea32d3 100644
--- a/arch/arm/mach-moxart/Kconfig
+++ b/arch/arm/mach-moxart/Kconfig
@@ -3,7 +3,7 @@ menuconfig ARCH_MOXART
 	depends on ARCH_MULTI_V4
 	select CPU_FA526
 	select ARM_DMA_MEM_BUFFERABLE
-	select CLKSRC_MMIO
+	select MOXART_TIMER
 	select GENERIC_IRQ_CHIP
 	select ARCH_REQUIRE_GPIOLIB
 	select PHYLIB if NETDEVICES
diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile
index ecf9e0c3b107..e53c6cfcab51 100644
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -7,9 +7,15 @@ CFLAGS_pmsu.o			:= -march=armv7-a
 obj-$(CONFIG_MACH_MVEBU_ANY)	 += system-controller.o mvebu-soc-id.o
 
 ifeq ($(CONFIG_MACH_MVEBU_V7),y)
-obj-y				 += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o pm.o pm-board.o
+obj-y				 += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o
+
+obj-$(CONFIG_PM)		 += pm.o pm-board.o
 obj-$(CONFIG_SMP)		 += platsmp.o headsmp.o platsmp-a9.o headsmp-a9.o
 endif
 
 obj-$(CONFIG_MACH_DOVE)		 += dove.o
-obj-$(CONFIG_MACH_KIRKWOOD)	 += kirkwood.o kirkwood-pm.o
+
+ifeq ($(CONFIG_MACH_KIRKWOOD),y)
+obj-y				 += kirkwood.o
+obj-$(CONFIG_PM)		 += kirkwood-pm.o
+endif
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index 7e989d61159c..e80f0dde2189 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -162,22 +162,16 @@ exit:
 }
 
 /*
- * This ioremap hook is used on Armada 375/38x to ensure that PCIe
- * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
- * is needed as a workaround for a deadlock issue between the PCIe
- * interface and the cache controller.
+ * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
+ * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
+ * needed for the HW I/O coherency mechanism to work properly without
+ * deadlock.
  */
 static void __iomem *
-armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
-			      unsigned int mtype, void *caller)
+armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+			 unsigned int mtype, void *caller)
 {
-	struct resource pcie_mem;
-
-	mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
-
-	if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
-		mtype = MT_UNCACHED;
-
+	mtype = MT_UNCACHED;
 	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
 }
 
@@ -186,7 +180,8 @@ static void __init armada_375_380_coherency_init(struct device_node *np)
 	struct device_node *cache_dn;
 
 	coherency_cpu_base = of_iomap(np, 0);
-	arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
+	arch_ioremap_caller = armada_wa_ioremap_caller;
+	pci_ioremap_set_mem_type(MT_UNCACHED);
 
 	/*
 	 * We should switch the PL310 to I/O coherency mode only if
diff --git a/arch/arm/mach-mxs/Kconfig b/arch/arm/mach-mxs/Kconfig
index 84794137b175..68a3a9ec605d 100644
--- a/arch/arm/mach-mxs/Kconfig
+++ b/arch/arm/mach-mxs/Kconfig
@@ -16,7 +16,7 @@ config ARCH_MXS
 	bool "Freescale MXS (i.MX23, i.MX28) support"
 	depends on ARCH_MULTI_V5
 	select ARCH_REQUIRE_GPIOLIB
-	select CLKSRC_MMIO
+	select MXS_TIMER
 	select PINCTRL
 	select SOC_BUS
 	select SOC_IMX23
diff --git a/arch/arm/mach-nspire/Kconfig b/arch/arm/mach-nspire/Kconfig
index bc41f26c1a12..d4985305cab2 100644
--- a/arch/arm/mach-nspire/Kconfig
+++ b/arch/arm/mach-nspire/Kconfig
@@ -7,5 +7,6 @@ config ARCH_NSPIRE
 	select ARM_AMBA
 	select ARM_VIC
 	select ARM_TIMER_SP804
+	select NSPIRE_TIMER
 	help
 	  This enables support for systems using the TI-NSPIRE CPU
diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index 0cf4426183cf..9e938f2961cf 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -28,6 +28,7 @@ config ARCH_ATLAS7
 	default y
 	select ARM_GIC
 	select CPU_V7
+	select ATLAS7_TIMER
 	select HAVE_ARM_SCU if SMP
 	select HAVE_SMP
 	help
@@ -38,6 +39,7 @@ config ARCH_PRIMA2
 	default y
 	select SIRF_IRQ
 	select ZONE_DMA
+	select PRIMA2_TIMER
 	help
           Support for CSR SiRFSoC ARM Cortex A9 Platform
 
diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig
index 301a98498453..4fdc3425ffbd 100644
--- a/arch/arm/mach-u300/Kconfig
+++ b/arch/arm/mach-u300/Kconfig
@@ -4,7 +4,7 @@ menuconfig ARCH_U300
 	select ARCH_REQUIRE_GPIOLIB
 	select ARM_AMBA
 	select ARM_VIC
-	select CLKSRC_MMIO
+	select U300_TIMER
 	select CPU_ARM926T
 	select HAVE_TCM
 	select PINCTRL
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index ad5841856007..3a2e678b8d30 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -243,7 +243,7 @@ good_area:
 		goto out;
 	}
 
-	return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags);
+	return handle_mm_fault(vma, addr & PAGE_MASK, flags);
 
 check_stack:
 	/* Don't allow expansion below FIRST_USER_ADDRESS */
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index b8d477321730..c1c1a5c67da1 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -23,7 +23,7 @@
 #define __pgd_alloc()	kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL)
 #define __pgd_free(pgd)	kfree(pgd)
 #else
-#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2)
+#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL, 2)
 #define __pgd_free(pgd)	free_pages((unsigned long)pgd, 2)
 #endif
 
diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile
index 12969523414c..227952103b0b 100644
--- a/arch/arm/xen/Makefile
+++ b/arch/arm/xen/Makefile
@@ -1 +1,2 @@
 obj-y		:= enlighten.o hypercall.o grant-table.o p2m.o mm.o
+obj-$(CONFIG_XEN_EFI) += efi.o
diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c
new file mode 100644
index 000000000000..16db419f9e90
--- /dev/null
+++ b/arch/arm/xen/efi.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015, Linaro Limited, Shannon Zhao
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/efi.h>
+#include <xen/xen-ops.h>
+#include <asm/xen/xen-ops.h>
+
+/* Set XEN EFI runtime services function pointers. Other fields of struct efi,
+ * e.g. efi.systab, will be set like normal EFI.
+ */
+void __init xen_efi_runtime_setup(void)
+{
+	efi.get_time                 = xen_efi_get_time;
+	efi.set_time                 = xen_efi_set_time;
+	efi.get_wakeup_time          = xen_efi_get_wakeup_time;
+	efi.set_wakeup_time          = xen_efi_set_wakeup_time;
+	efi.get_variable             = xen_efi_get_variable;
+	efi.get_next_variable        = xen_efi_get_next_variable;
+	efi.set_variable             = xen_efi_set_variable;
+	efi.query_variable_info      = xen_efi_query_variable_info;
+	efi.update_capsule           = xen_efi_update_capsule;
+	efi.query_capsule_caps       = xen_efi_query_capsule_caps;
+	efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
+	efi.reset_system             = NULL; /* Functionality provided by Xen. */
+}
+EXPORT_SYMBOL_GPL(xen_efi_runtime_setup);
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 75cd7345c654..0bea3d271f6e 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -12,14 +12,16 @@
 #include <xen/page.h>
 #include <xen/interface/sched.h>
 #include <xen/xen-ops.h>
-#include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#include <asm/xen/xen-ops.h>
 #include <asm/system_misc.h>
+#include <asm/efi.h>
 #include <linux/interrupt.h>
 #include <linux/irqreturn.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_fdt.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/cpuidle.h>
@@ -30,6 +32,7 @@
 #include <linux/time64.h>
 #include <linux/timekeeping.h>
 #include <linux/timekeeper_internal.h>
+#include <linux/acpi.h>
 
 #include <linux/mm.h>
 
@@ -46,14 +49,16 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 static struct vcpu_info __percpu *xen_vcpu_info;
 
+/* Linux <-> Xen vCPU id mapping */
+DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+
 /* These are unused until we support booting "pre-ballooned" */
 unsigned long xen_released_pages;
 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 
 static __read_mostly unsigned int xen_events_irq;
 
-static __initdata struct device_node *xen_node;
-
 int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
 			       unsigned long addr,
 			       xen_pfn_t *gfn, int nr,
@@ -84,19 +89,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
 
-static unsigned long long xen_stolen_accounting(int cpu)
-{
-	struct vcpu_runstate_info state;
-
-	BUG_ON(cpu != smp_processor_id());
-
-	xen_get_runstate_snapshot(&state);
-
-	WARN_ON(state.state != RUNSTATE_running);
-
-	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
-}
-
 static void xen_read_wallclock(struct timespec64 *ts)
 {
 	u32 version;
@@ -179,10 +171,14 @@ static void xen_percpu_init(void)
 	pr_info("Xen: initializing cpu%d\n", cpu);
 	vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
 
+	/* Direct vCPU id mapping for ARM guests. */
+	per_cpu(xen_vcpu_id, cpu) = cpu;
+
 	info.mfn = virt_to_gfn(vcpup);
 	info.offset = xen_offset_in_page(vcpup);
 
-	err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+	err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
+				 &info);
 	BUG_ON(err);
 	per_cpu(xen_vcpu, cpu) = vcpup;
 
@@ -237,6 +233,46 @@ static irqreturn_t xen_arm_callback(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static __initdata struct {
+	const char *compat;
+	const char *prefix;
+	const char *version;
+	bool found;
+} hyper_node = {"xen,xen", "xen,xen-", NULL, false};
+
+static int __init fdt_find_hyper_node(unsigned long node, const char *uname,
+				      int depth, void *data)
+{
+	const void *s = NULL;
+	int len;
+
+	if (depth != 1 || strcmp(uname, "hypervisor") != 0)
+		return 0;
+
+	if (of_flat_dt_is_compatible(node, hyper_node.compat))
+		hyper_node.found = true;
+
+	s = of_get_flat_dt_prop(node, "compatible", &len);
+	if (strlen(hyper_node.prefix) + 3  < len &&
+	    !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix)))
+		hyper_node.version = s + strlen(hyper_node.prefix);
+
+	/*
+	 * Check if Xen supports EFI by checking whether there is the
+	 * "/hypervisor/uefi" node in DT. If so, runtime services are available
+	 * through proxy functions (e.g. in case of Xen dom0 EFI implementation
+	 * they call special hypercall which executes relevant EFI functions)
+	 * and that is why they are always enabled.
+	 */
+	if (IS_ENABLED(CONFIG_XEN_EFI)) {
+		if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) &&
+		    !efi_runtime_disabled())
+			set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+	}
+
+	return 0;
+}
+
 /*
  * see Documentation/devicetree/bindings/arm/xen.txt for the
  * documentation of the Xen Device Tree format.
@@ -244,26 +280,18 @@ static irqreturn_t xen_arm_callback(int irq, void *arg)
 #define GRANT_TABLE_PHYSADDR 0
 void __init xen_early_init(void)
 {
-	int len;
-	const char *s = NULL;
-	const char *version = NULL;
-	const char *xen_prefix = "xen,xen-";
-
-	xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
-	if (!xen_node) {
+	of_scan_flat_dt(fdt_find_hyper_node, NULL);
+	if (!hyper_node.found) {
 		pr_debug("No Xen support\n");
 		return;
 	}
-	s = of_get_property(xen_node, "compatible", &len);
-	if (strlen(xen_prefix) + 3  < len &&
-			!strncmp(xen_prefix, s, strlen(xen_prefix)))
-		version = s + strlen(xen_prefix);
-	if (version == NULL) {
+
+	if (hyper_node.version == NULL) {
 		pr_debug("Xen version not found\n");
 		return;
 	}
 
-	pr_info("Xen %s support found\n", version);
+	pr_info("Xen %s support found\n", hyper_node.version);
 
 	xen_domain_type = XEN_HVM_DOMAIN;
 
@@ -278,28 +306,68 @@ void __init xen_early_init(void)
 		add_preferred_console("hvc", 0, NULL);
 }
 
+static void __init xen_acpi_guest_init(void)
+{
+#ifdef CONFIG_ACPI
+	struct xen_hvm_param a;
+	int interrupt, trigger, polarity;
+
+	a.domid = DOMID_SELF;
+	a.index = HVM_PARAM_CALLBACK_IRQ;
+
+	if (HYPERVISOR_hvm_op(HVMOP_get_param, &a)
+	    || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) {
+		xen_events_irq = 0;
+		return;
+	}
+
+	interrupt = a.value & 0xff;
+	trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE
+					 : ACPI_LEVEL_SENSITIVE;
+	polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW
+					  : ACPI_ACTIVE_HIGH;
+	xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity);
+#endif
+}
+
+static void __init xen_dt_guest_init(void)
+{
+	struct device_node *xen_node;
+
+	xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
+	if (!xen_node) {
+		pr_err("Xen support was detected before, but it has disappeared\n");
+		return;
+	}
+
+	xen_events_irq = irq_of_parse_and_map(xen_node, 0);
+}
+
 static int __init xen_guest_init(void)
 {
 	struct xen_add_to_physmap xatp;
 	struct shared_info *shared_info_page = NULL;
-	struct resource res;
-	phys_addr_t grant_frames;
 
 	if (!xen_domain())
 		return 0;
 
-	if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) {
-		pr_err("Xen grant table base address not found\n");
-		return -ENODEV;
-	}
-	grant_frames = res.start;
+	if (!acpi_disabled)
+		xen_acpi_guest_init();
+	else
+		xen_dt_guest_init();
 
-	xen_events_irq = irq_of_parse_and_map(xen_node, 0);
 	if (!xen_events_irq) {
 		pr_err("Xen event channel interrupt not found\n");
 		return -ENODEV;
 	}
 
+	/*
+	 * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI
+	 * parameters are found. Force enable runtime services.
+	 */
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
+		xen_efi_runtime_setup();
+
 	shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
 
 	if (!shared_info_page) {
@@ -328,7 +396,13 @@ static int __init xen_guest_init(void)
 	if (xen_vcpu_info == NULL)
 		return -ENOMEM;
 
-	if (gnttab_setup_auto_xlat_frames(grant_frames)) {
+	/* Direct vCPU id mapping for ARM guests. */
+	per_cpu(xen_vcpu_id, 0) = 0;
+
+	xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
+	if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
+					  &xen_auto_xlat_grant_frames.vaddr,
+					  xen_auto_xlat_grant_frames.count)) {
 		free_percpu(xen_vcpu_info);
 		return -ENOMEM;
 	}
@@ -355,8 +429,8 @@ static int __init xen_guest_init(void)
 
 	register_cpu_notifier(&xen_cpu_notifier);
 
-	pv_time_ops.steal_clock = xen_stolen_accounting;
-	static_key_slow_inc(&paravirt_steal_enabled);
+	xen_time_setup_guest();
+
 	if (xen_initial_domain())
 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 
@@ -403,4 +477,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
+EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
 EXPORT_SYMBOL_GPL(privcmd_call);
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
index 9a36f4f49c10..a648dfc3be30 100644
--- a/arch/arm/xen/hypercall.S
+++ b/arch/arm/xen/hypercall.S
@@ -91,6 +91,7 @@ HYPERCALL3(vcpu_op);
 HYPERCALL1(tmem_op);
 HYPERCALL1(platform_op_raw);
 HYPERCALL2(multicall);
+HYPERCALL2(vm_assist);
 
 ENTRY(privcmd_call)
 	stmdb sp!, {r4}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5a0a691d4220..9f8b99e20557 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -4,9 +4,11 @@ config ARM64
 	select ACPI_GENERIC_GSI if ACPI
 	select ACPI_REDUCED_HARDWARE_ONLY if ACPI
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_KCOV
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_USE_CMPXCHG_LOCKREF
@@ -85,8 +87,11 @@ config ARM64
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RCU_TABLE_FREE
 	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_KPROBES
+	select HAVE_KRETPROBES if HAVE_KPROBES
 	select IOMMU_DMA if IOMMU_SUPPORT
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
@@ -664,6 +669,16 @@ config PARAVIRT_TIME_ACCOUNTING
 
 	  If in doubt, say N here.
 
+config KEXEC
+	depends on PM_SLEEP_SMP
+	select KEXEC_CORE
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
@@ -872,7 +887,7 @@ config RELOCATABLE
 
 config RANDOMIZE_BASE
 	bool "Randomize the address of the kernel image"
-	select ARM64_MODULE_PLTS
+	select ARM64_MODULE_PLTS if MODULES
 	select RELOCATABLE
 	help
 	  Randomizes the virtual address at which the kernel image is
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7085e322dc42..d59b6908a21a 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -12,7 +12,6 @@
 
 LDFLAGS_vmlinux	:=-p --no-undefined -X
 CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
-OBJCOPYFLAGS	:=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 GZFLAGS		:=-9
 
 ifneq ($(CONFIG_RELOCATABLE),)
@@ -95,7 +94,7 @@ boot := arch/arm64/boot
 Image: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-Image.%: vmlinux
+Image.%: Image
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 zinstall install:
@@ -121,6 +120,16 @@ archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
 	$(Q)$(MAKE) $(clean)=$(boot)/dts
 
+# We need to generate vdso-offsets.h before compiling certain files in kernel/.
+# In order to do that, we should use the archprepare target, but we can't since
+# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
+# asm-offsets.h is built in prepare0, for which archprepare is a dependency.
+# Therefore we need to generate the header after prepare0 has been made, hence
+# this hack.
+prepare: vdso_prepare
+vdso_prepare: prepare0
+	$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
+
 define archhelp
   echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
   echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index 305c552b5ec1..1f012c506434 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -14,6 +14,8 @@
 # Based on the ia64 boot/Makefile.
 #
 
+OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+
 targets := Image Image.gz
 
 $(obj)/Image: vmlinux FORCE
diff --git a/arch/arm64/boot/dts/apm/apm-merlin.dts b/arch/arm64/boot/dts/apm/apm-merlin.dts
index 387c6a8d0da9..b0f64414c1b0 100644
--- a/arch/arm64/boot/dts/apm/apm-merlin.dts
+++ b/arch/arm64/boot/dts/apm/apm-merlin.dts
@@ -83,3 +83,9 @@
 		status = "ok";
 	};
 };
+
+&mdio {
+	sgenet0phy: phy@0 {
+		reg = <0x0>;
+	};
+};
diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
index 44db32ec5e9c..b7fb5d9295c2 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -79,3 +79,15 @@
 &mmc0 {
 	status = "ok";
 };
+
+&mdio {
+	menet0phy: phy@3 {
+		reg = <0x3>;
+	};
+	sgenet0phy: phy@4 {
+		reg = <0x4>;
+	};
+	sgenet1phy: phy@5 {
+		reg = <0x5>;
+	};
+};
diff --git a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi
index c569f761d090..2e1e5daa1dc7 100644
--- a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi
@@ -625,10 +625,18 @@
 			apm,irq-start = <8>;
 		};
 
+		mdio: mdio@1f610000 {
+			compatible = "apm,xgene-mdio-xfi";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x0 0x1f610000 0x0 0xd100>;
+			clocks = <&xge0clk 0>;
+		};
+
 		sgenet0: ethernet@1f610000 {
 			compatible = "apm,xgene2-sgenet";
 			status = "disabled";
-			reg = <0x0 0x1f610000 0x0 0x10000>,
+			reg = <0x0 0x1f610000 0x0 0xd100>,
 			      <0x0 0x1f600000 0x0 0Xd100>,
 			      <0x0 0x20000000 0x0 0X20000>;
 			interrupts = <0 96 4>,
@@ -637,6 +645,7 @@
 			clocks = <&xge0clk 0>;
 			local-mac-address = [00 01 73 00 00 01];
 			phy-connection-type = "sgmii";
+			phy-handle = <&sgenet0phy>;
 		};
 
 		xgenet1: ethernet@1f620000 {
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index 5147d7698924..6bf7cbe2e72d 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -237,20 +237,11 @@
 				clocks = <&socplldiv2 0>;
 				reg = <0x0 0x1f21c000 0x0 0x1000>;
 				reg-names = "csr-reg";
-				csr-mask = <0x3>;
+				csr-mask = <0xa>;
+				enable-mask = <0xf>;
 				clock-output-names = "sge0clk";
 			};
 
-			sge1clk: sge1clk@1f21c000 {
-				compatible = "apm,xgene-device-clock";
-				#clock-cells = <1>;
-				clocks = <&socplldiv2 0>;
-				reg = <0x0 0x1f21c000 0x0 0x1000>;
-				reg-names = "csr-reg";
-				csr-mask = <0xc>;
-				clock-output-names = "sge1clk";
-			};
-
 			xge0clk: xge0clk@1f61c000 {
 				compatible = "apm,xgene-device-clock";
 				#clock-cells = <1>;
@@ -921,6 +912,14 @@
 			clocks = <&rtcclk 0>;
 		};
 
+		mdio: mdio@17020000 {
+			compatible = "apm,xgene-mdio-rgmii";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x0 0x17020000 0x0 0xd100>;
+			clocks = <&menetclk 0>;
+		};
+
 		menet: ethernet@17020000 {
 			compatible = "apm,xgene-enet";
 			status = "disabled";
@@ -934,7 +933,7 @@
 			/* mac address will be overwritten by the bootloader */
 			local-mac-address = [00 00 00 00 00 00];
 			phy-connection-type = "rgmii";
-			phy-handle = <&menetphy>;
+			phy-handle = <&menet0phy>,<&menetphy>;
 			mdio {
 				compatible = "apm,xgene-mdio";
 				#address-cells = <1>;
@@ -960,6 +959,7 @@
 			clocks = <&sge0clk 0>;
 			local-mac-address = [00 00 00 00 00 00];
 			phy-connection-type = "sgmii";
+			phy-handle = <&sgenet0phy>;
 		};
 
 		sgenet1: ethernet@1f210030 {
@@ -973,9 +973,9 @@
 				     <0x0 0xAD 0x4>;
 			port-id = <1>;
 			dma-coherent;
-			clocks = <&sge1clk 0>;
 			local-mac-address = [00 00 00 00 00 00];
 			phy-connection-type = "sgmii";
+			phy-handle = <&sgenet1phy>;
 		};
 
 		xgenet: ethernet@1f610000 {
diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts
index 54ca40c9f711..ea5603fd106a 100644
--- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts
+++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts
@@ -52,6 +52,14 @@
 	};
 };
 
+&pci_phy0 {
+	status = "ok";
+};
+
+&pci_phy1 {
+	status = "ok";
+};
+
 &pcie0 {
 	status = "ok";
 };
@@ -132,3 +140,11 @@
 		#size-cells = <1>;
 	};
 };
+
+&mdio_mux_iproc {
+	mdio@10 {
+		gphy0: eth-phy@10 {
+			reg = <0x10>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi
index ec68ec1a80c8..46b78fa89f4c 100644
--- a/arch/arm64/boot/dts/broadcom/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi
@@ -263,6 +263,45 @@
 				      IRQ_TYPE_LEVEL_HIGH)>;
 		};
 
+		mdio_mux_iproc: mdio-mux@6602023c {
+			compatible = "brcm,mdio-mux-iproc";
+			reg = <0x6602023c 0x14>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			mdio@0 {
+				reg = <0x0>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				pci_phy0: pci-phy@0 {
+					compatible = "brcm,ns2-pcie-phy";
+					reg = <0x0>;
+					#phy-cells = <0>;
+					status = "disabled";
+				};
+			};
+
+			mdio@7 {
+				reg = <0x7>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				pci_phy1: pci-phy@0 {
+					compatible = "brcm,ns2-pcie-phy";
+					reg = <0x0>;
+					#phy-cells = <0>;
+					status = "disabled";
+				};
+			};
+
+			mdio@10 {
+				reg = <0x10>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
 		timer0: timer@66030000 {
 			compatible = "arm,sp804", "arm,primecell";
 			reg = <0x66030000 0x1000>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
index f895fc02ab06..40846319be69 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
@@ -49,6 +49,10 @@
 
 / {
 	model = "LS1043A RDB Board";
+
+	aliases {
+		crypto = &crypto;
+	};
 };
 
 &i2c0 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
index de0323b48b1e..6bd46c133010 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
@@ -159,6 +159,49 @@
 			big-endian;
 		};
 
+		crypto: crypto@1700000 {
+			compatible = "fsl,sec-v5.4", "fsl,sec-v5.0",
+				     "fsl,sec-v4.0";
+			fsl,sec-era = <3>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x0 0x00 0x1700000 0x100000>;
+			reg = <0x00 0x1700000 0x0 0x100000>;
+			interrupts = <0 75 0x4>;
+
+			sec_jr0: jr@10000 {
+				compatible = "fsl,sec-v5.4-job-ring",
+					     "fsl,sec-v5.0-job-ring",
+					     "fsl,sec-v4.0-job-ring";
+				reg	   = <0x10000 0x10000>;
+				interrupts = <0 71 0x4>;
+			};
+
+			sec_jr1: jr@20000 {
+				compatible = "fsl,sec-v5.4-job-ring",
+					     "fsl,sec-v5.0-job-ring",
+					     "fsl,sec-v4.0-job-ring";
+				reg	   = <0x20000 0x10000>;
+				interrupts = <0 72 0x4>;
+			};
+
+			sec_jr2: jr@30000 {
+				compatible = "fsl,sec-v5.4-job-ring",
+					     "fsl,sec-v5.0-job-ring",
+					     "fsl,sec-v4.0-job-ring";
+				reg	   = <0x30000 0x10000>;
+				interrupts = <0 73 0x4>;
+			};
+
+			sec_jr3: jr@40000 {
+				compatible = "fsl,sec-v5.4-job-ring",
+					     "fsl,sec-v5.0-job-ring",
+					     "fsl,sec-v4.0-job-ring";
+				reg	   = <0x40000 0x10000>;
+				interrupts = <0 74 0x4>;
+			};
+		};
+
 		dcfg: dcfg@1ee0000 {
 			compatible = "fsl,ls1043a-dcfg", "syscon";
 			reg = <0x0 0x1ee0000 0x0 0x10000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index 05f89c4a5413..77b8c4e388ca 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -168,6 +168,18 @@
 		};
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		vpu_dma_reserved: vpu_dma_mem_region {
+			compatible = "shared-dma-pool";
+			reg = <0 0xb7000000 0 0x500000>;
+			alignment = <0x1000>;
+			no-map;
+		};
+	};
+
 	timer {
 		compatible = "arm,armv8-timer";
 		interrupt-parent = <&gic>;
@@ -312,6 +324,17 @@
 			clock-names = "spi", "wrap";
 		};
 
+		vpu: vpu@10020000 {
+			compatible = "mediatek,mt8173-vpu";
+			reg = <0 0x10020000 0 0x30000>,
+			      <0 0x10050000 0 0x100>;
+			reg-names = "tcm", "cfg_reg";
+			interrupts = <GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&topckgen CLK_TOP_SCP_SEL>;
+			clock-names = "main";
+			memory-region = <&vpu_dma_reserved>;
+		};
+
 		sysirq: intpol-controller@10200620 {
 			compatible = "mediatek,mt8173-sysirq",
 				     "mediatek,mt6577-sysirq";
@@ -754,6 +777,45 @@
 			clock-names = "apb", "smi";
 		};
 
+		vcodec_enc: vcodec@18002000 {
+			compatible = "mediatek,mt8173-vcodec-enc";
+			reg = <0 0x18002000 0 0x1000>,	/* VENC_SYS */
+			      <0 0x19002000 0 0x1000>;	/* VENC_LT_SYS */
+			interrupts = <GIC_SPI 198 IRQ_TYPE_LEVEL_LOW>,
+				     <GIC_SPI 202 IRQ_TYPE_LEVEL_LOW>;
+			mediatek,larb = <&larb3>,
+					<&larb5>;
+			iommus = <&iommu M4U_PORT_VENC_RCPU>,
+				 <&iommu M4U_PORT_VENC_REC>,
+				 <&iommu M4U_PORT_VENC_BSDMA>,
+				 <&iommu M4U_PORT_VENC_SV_COMV>,
+				 <&iommu M4U_PORT_VENC_RD_COMV>,
+				 <&iommu M4U_PORT_VENC_CUR_LUMA>,
+				 <&iommu M4U_PORT_VENC_CUR_CHROMA>,
+				 <&iommu M4U_PORT_VENC_REF_LUMA>,
+				 <&iommu M4U_PORT_VENC_REF_CHROMA>,
+				 <&iommu M4U_PORT_VENC_NBM_RDMA>,
+				 <&iommu M4U_PORT_VENC_NBM_WDMA>,
+				 <&iommu M4U_PORT_VENC_RCPU_SET2>,
+				 <&iommu M4U_PORT_VENC_REC_FRM_SET2>,
+				 <&iommu M4U_PORT_VENC_BSDMA_SET2>,
+				 <&iommu M4U_PORT_VENC_SV_COMA_SET2>,
+				 <&iommu M4U_PORT_VENC_RD_COMA_SET2>,
+				 <&iommu M4U_PORT_VENC_CUR_LUMA_SET2>,
+				 <&iommu M4U_PORT_VENC_CUR_CHROMA_SET2>,
+				 <&iommu M4U_PORT_VENC_REF_LUMA_SET2>,
+				 <&iommu M4U_PORT_VENC_REC_CHROMA_SET2>;
+			mediatek,vpu = <&vpu>;
+			clocks = <&topckgen CLK_TOP_VENCPLL_D2>,
+				 <&topckgen CLK_TOP_VENC_SEL>,
+				 <&topckgen CLK_TOP_UNIVPLL1_D2>,
+				 <&topckgen CLK_TOP_VENC_LT_SEL>;
+			clock-names = "venc_sel_src",
+				      "venc_sel",
+				      "venc_lt_sel_src",
+				      "venc_lt_sel";
+		};
+
 		vencltsys: clock-controller@19000000 {
 			compatible = "mediatek,mt8173-vencltsys", "syscon";
 			reg = <0 0x19000000 0 0x1000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index d7f8e06910bc..188bbeab92b9 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -492,6 +492,14 @@
 		interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
 	};
 
+	rktimer: rktimer@ff850000 {
+		compatible = "rockchip,rk3399-timer";
+		reg = <0x0 0xff850000 0x0 0x1000>;
+		interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru PCLK_TIMER0>, <&cru SCLK_TIMER00>;
+		clock-names = "pclk", "timer";
+	};
+
 	spdif: spdif@ff870000 {
 		compatible = "rockchip,rk3399-spdif";
 		reg = <0x0 0xff870000 0x0 0x1000>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index fd2d74d0491e..4ed4756dfa97 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -70,6 +70,7 @@ CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
 CONFIG_XEN=y
+CONFIG_KEXEC=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
 CONFIG_CPU_IDLE=y
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index cff532a6744e..f43d2c44c765 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,6 +1,5 @@
 generic-y += bug.h
 generic-y += bugs.h
-generic-y += checksum.h
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += current.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index aee323b13802..5420cb0fcb3e 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -113,4 +113,14 @@ static inline const char *acpi_get_enable_method(int cpu)
 pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
 #endif
 
+#ifdef CONFIG_ACPI_NUMA
+int arm64_acpi_numa_init(void);
+int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
+#else
+static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
+static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; }
+#endif /* CONFIG_ACPI_NUMA */
+
+#define ACPI_TABLE_UPGRADE_MAX_PHYS MEMBLOCK_ALLOC_ACCESSIBLE
+
 #endif /*_ASM_ACPI_H*/
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index beccbdefa106..8746ff6abd77 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -95,13 +95,11 @@ void apply_alternatives(void *start, size_t length);
  * The code that follows this macro will be assembled and linked as
  * normal. There are no restrictions on this code.
  */
-.macro alternative_if_not cap, enable = 1
-	.if \enable
+.macro alternative_if_not cap
 	.pushsection .altinstructions, "a"
 	altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
 	.popsection
 661:
-	.endif
 .endm
 
 /*
@@ -118,27 +116,27 @@ void apply_alternatives(void *start, size_t length);
  *    alternative sequence it is defined in (branches into an
  *    alternative sequence are not fixed up).
  */
-.macro alternative_else, enable = 1
-	.if \enable
+.macro alternative_else
 662:	.pushsection .altinstr_replacement, "ax"
 663:
-	.endif
 .endm
 
 /*
  * Complete an alternative code sequence.
  */
-.macro alternative_endif, enable = 1
-	.if \enable
+.macro alternative_endif
 664:	.popsection
 	.org	. - (664b-663b) + (662b-661b)
 	.org	. - (662b-661b) + (664b-663b)
-	.endif
 .endm
 
 #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)	\
 	alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
 
+.macro user_alt, label, oldinstr, newinstr, cond
+9999:	alternative_insn "\oldinstr", "\newinstr", \cond
+	_ASM_EXTABLE 9999b, \label
+.endm
 
 /*
  * Generate the assembly for UAO alternatives with exception table entries.
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 10b017c4bdd8..d5025c69ca81 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -24,6 +24,7 @@
 #define __ASM_ASSEMBLER_H
 
 #include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
 #include <asm/page.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
@@ -261,7 +262,16 @@ lr	.req	x30		// link register
 	add	\size, \kaddr, \size
 	sub	\tmp2, \tmp1, #1
 	bic	\kaddr, \kaddr, \tmp2
-9998:	dc	\op, \kaddr
+9998:
+	.if	(\op == cvau || \op == cvac)
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+	dc	\op, \kaddr
+alternative_else
+	dc	civac, \kaddr
+alternative_endif
+	.else
+	dc	\op, \kaddr
+	.endif
 	add	\kaddr, \kaddr, \tmp1
 	cmp	\kaddr, \size
 	b.lo	9998b
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index f3a3586a421c..c0235e0ff849 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -76,6 +76,36 @@
 #define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_add_acquire	atomic_fetch_add_acquire
+#define atomic_fetch_add_release	atomic_fetch_add_release
+#define atomic_fetch_add		atomic_fetch_add
+
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+#define atomic_fetch_sub_acquire	atomic_fetch_sub_acquire
+#define atomic_fetch_sub_release	atomic_fetch_sub_release
+#define atomic_fetch_sub		atomic_fetch_sub
+
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_and_acquire	atomic_fetch_and_acquire
+#define atomic_fetch_and_release	atomic_fetch_and_release
+#define atomic_fetch_and		atomic_fetch_and
+
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
+#define atomic_fetch_andnot_acquire	atomic_fetch_andnot_acquire
+#define atomic_fetch_andnot_release	atomic_fetch_andnot_release
+#define atomic_fetch_andnot		atomic_fetch_andnot
+
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_or_acquire		atomic_fetch_or_acquire
+#define atomic_fetch_or_release		atomic_fetch_or_release
+#define atomic_fetch_or			atomic_fetch_or
+
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+#define atomic_fetch_xor_acquire	atomic_fetch_xor_acquire
+#define atomic_fetch_xor_release	atomic_fetch_xor_release
+#define atomic_fetch_xor		atomic_fetch_xor
+
 #define atomic_xchg_relaxed(v, new)	xchg_relaxed(&((v)->counter), (new))
 #define atomic_xchg_acquire(v, new)	xchg_acquire(&((v)->counter), (new))
 #define atomic_xchg_release(v, new)	xchg_release(&((v)->counter), (new))
@@ -125,6 +155,36 @@
 #define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
 
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_add_acquire	atomic64_fetch_add_acquire
+#define atomic64_fetch_add_release	atomic64_fetch_add_release
+#define atomic64_fetch_add		atomic64_fetch_add
+
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+#define atomic64_fetch_sub_acquire	atomic64_fetch_sub_acquire
+#define atomic64_fetch_sub_release	atomic64_fetch_sub_release
+#define atomic64_fetch_sub		atomic64_fetch_sub
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_and_acquire	atomic64_fetch_and_acquire
+#define atomic64_fetch_and_release	atomic64_fetch_and_release
+#define atomic64_fetch_and		atomic64_fetch_and
+
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_andnot_acquire	atomic64_fetch_andnot_acquire
+#define atomic64_fetch_andnot_release	atomic64_fetch_andnot_release
+#define atomic64_fetch_andnot		atomic64_fetch_andnot
+
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_or_acquire	atomic64_fetch_or_acquire
+#define atomic64_fetch_or_release	atomic64_fetch_or_release
+#define atomic64_fetch_or		atomic64_fetch_or
+
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+#define atomic64_fetch_xor_acquire	atomic64_fetch_xor_acquire
+#define atomic64_fetch_xor_release	atomic64_fetch_xor_release
+#define atomic64_fetch_xor		atomic64_fetch_xor
+
 #define atomic64_xchg_relaxed		atomic_xchg_relaxed
 #define atomic64_xchg_acquire		atomic_xchg_acquire
 #define atomic64_xchg_release		atomic_xchg_release
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f61c84f6ba02..f819fdcff1ac 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v))		\
 }									\
 __LL_SC_EXPORT(atomic_##op##_return##name);
 
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)		\
+__LL_SC_INLINE int							\
+__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v))		\
+{									\
+	unsigned long tmp;						\
+	int val, result;						\
+									\
+	asm volatile("// atomic_fetch_" #op #name "\n"			\
+"	prfm	pstl1strm, %3\n"					\
+"1:	ld" #acq "xr	%w0, %3\n"					\
+"	" #asm_op "	%w1, %w0, %w4\n"				\
+"	st" #rel "xr	%w2, %w1, %3\n"					\
+"	cbnz	%w2, 1b\n"						\
+"	" #mb								\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
+	: "Ir" (i)							\
+	: cl);								\
+									\
+	return result;							\
+}									\
+__LL_SC_EXPORT(atomic_fetch_##op##name);
+
 #define ATOMIC_OPS(...)							\
 	ATOMIC_OP(__VA_ARGS__)						\
-	ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)
-
-#define ATOMIC_OPS_RLX(...)						\
-	ATOMIC_OPS(__VA_ARGS__)						\
+	ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)\
 	ATOMIC_OP_RETURN(_relaxed,        ,  ,  ,         , __VA_ARGS__)\
 	ATOMIC_OP_RETURN(_acquire,        , a,  , "memory", __VA_ARGS__)\
-	ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)
+	ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (        , dmb ish,  , l, "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS_RLX(add, add)
-ATOMIC_OPS_RLX(sub, sub)
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(...)							\
+	ATOMIC_OP(__VA_ARGS__)						\
+	ATOMIC_FETCH_OP (        , dmb ish,  , l, "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, orr)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, orr)
+ATOMIC_OPS(xor, eor)
 
-#undef ATOMIC_OPS_RLX
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v))	\
 }									\
 __LL_SC_EXPORT(atomic64_##op##_return##name);
 
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)		\
+__LL_SC_INLINE long							\
+__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v))	\
+{									\
+	long result, val;						\
+	unsigned long tmp;						\
+									\
+	asm volatile("// atomic64_fetch_" #op #name "\n"		\
+"	prfm	pstl1strm, %3\n"					\
+"1:	ld" #acq "xr	%0, %3\n"					\
+"	" #asm_op "	%1, %0, %4\n"					\
+"	st" #rel "xr	%w2, %1, %3\n"					\
+"	cbnz	%w2, 1b\n"						\
+"	" #mb								\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
+	: "Ir" (i)							\
+	: cl);								\
+									\
+	return result;							\
+}									\
+__LL_SC_EXPORT(atomic64_fetch_##op##name);
+
 #define ATOMIC64_OPS(...)						\
 	ATOMIC64_OP(__VA_ARGS__)					\
-	ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)
-
-#define ATOMIC64_OPS_RLX(...)						\
-	ATOMIC64_OPS(__VA_ARGS__)					\
+	ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)	\
 	ATOMIC64_OP_RETURN(_relaxed,,  ,  ,         , __VA_ARGS__)	\
 	ATOMIC64_OP_RETURN(_acquire,, a,  , "memory", __VA_ARGS__)	\
-	ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)
+	ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (, dmb ish,  , l, "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_relaxed,,  ,  ,         , __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OPS_RLX(add, add)
-ATOMIC64_OPS_RLX(sub, sub)
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, sub)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(...)						\
+	ATOMIC64_OP(__VA_ARGS__)					\
+	ATOMIC64_FETCH_OP (, dmb ish,  , l, "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_relaxed,,  ,  ,         , __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, orr)
-ATOMIC64_OP(xor, eor)
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(andnot, bic)
+ATOMIC64_OPS(or, orr)
+ATOMIC64_OPS(xor, eor)
 
-#undef ATOMIC64_OPS_RLX
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 39c1d340fec5..b5890be8f257 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -26,54 +26,57 @@
 #endif
 
 #define __LL_SC_ATOMIC(op)	__LL_SC_CALL(atomic_##op)
-
-static inline void atomic_andnot(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
-	"	stclr	%w[i], %[v]\n")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
+#define ATOMIC_OP(op, asm_op)						\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op),		\
+"	" #asm_op "	%w[i], %[v]\n")					\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS);						\
 }
 
-static inline void atomic_or(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
+ATOMIC_OP(andnot, stclr)
+ATOMIC_OP(or, stset)
+ATOMIC_OP(xor, steor)
+ATOMIC_OP(add, stadd)
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
-	"	stset	%w[i], %[v]\n")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
+#undef ATOMIC_OP
 
-static inline void atomic_xor(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
-	"	steor	%w[i], %[v]\n")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
+#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...)			\
+static inline int atomic_fetch_##op##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	__LL_SC_ATOMIC(fetch_##op##name),				\
+	/* LSE atomics */						\
+"	" #asm_op #mb "	%w[i], %w[i], %[v]")				\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return w0;							\
 }
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
+#define ATOMIC_FETCH_OPS(op, asm_op)					\
+	ATOMIC_FETCH_OP(_relaxed,   , op, asm_op)			\
+	ATOMIC_FETCH_OP(_acquire,  a, op, asm_op, "memory")		\
+	ATOMIC_FETCH_OP(_release,  l, op, asm_op, "memory")		\
+	ATOMIC_FETCH_OP(        , al, op, asm_op, "memory")
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
-	"	stadd	%w[i], %[v]\n")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
+ATOMIC_FETCH_OPS(andnot, ldclr)
+ATOMIC_FETCH_OPS(or, ldset)
+ATOMIC_FETCH_OPS(xor, ldeor)
+ATOMIC_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_FETCH_OPS
 
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)				\
 static inline int atomic_add_return##name(int i, atomic_t *v)		\
@@ -119,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v)
 	: __LL_SC_CLOBBERS);
 }
 
+#define ATOMIC_FETCH_OP_AND(name, mb, cl...)				\
+static inline int atomic_fetch_and##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC(fetch_and##name),				\
+	/* LSE atomics */						\
+	"	mvn	%w[i], %w[i]\n"					\
+	"	ldclr" #mb "	%w[i], %w[i], %[v]")			\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return w0;							\
+}
+
+ATOMIC_FETCH_OP_AND(_relaxed,   )
+ATOMIC_FETCH_OP_AND(_acquire,  a, "memory")
+ATOMIC_FETCH_OP_AND(_release,  l, "memory")
+ATOMIC_FETCH_OP_AND(        , al, "memory")
+
+#undef ATOMIC_FETCH_OP_AND
+
 static inline void atomic_sub(int i, atomic_t *v)
 {
 	register int w0 asm ("w0") = i;
@@ -164,57 +194,87 @@ ATOMIC_OP_SUB_RETURN(_release,  l, "memory")
 ATOMIC_OP_SUB_RETURN(        , al, "memory")
 
 #undef ATOMIC_OP_SUB_RETURN
-#undef __LL_SC_ATOMIC
-
-#define __LL_SC_ATOMIC64(op)	__LL_SC_CALL(atomic64_##op)
-
-static inline void atomic64_andnot(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
-	"	stclr	%[i], %[v]\n")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
+#define ATOMIC_FETCH_OP_SUB(name, mb, cl...)				\
+static inline int atomic_fetch_sub##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC(fetch_sub##name),				\
+	/* LSE atomics */						\
+	"	neg	%w[i], %w[i]\n"					\
+	"	ldadd" #mb "	%w[i], %w[i], %[v]")			\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return w0;							\
 }
 
-static inline void atomic64_or(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
+ATOMIC_FETCH_OP_SUB(_relaxed,   )
+ATOMIC_FETCH_OP_SUB(_acquire,  a, "memory")
+ATOMIC_FETCH_OP_SUB(_release,  l, "memory")
+ATOMIC_FETCH_OP_SUB(        , al, "memory")
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
-	"	stset	%[i], %[v]\n")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
+#undef ATOMIC_FETCH_OP_SUB
+#undef __LL_SC_ATOMIC
+
+#define __LL_SC_ATOMIC64(op)	__LL_SC_CALL(atomic64_##op)
+#define ATOMIC64_OP(op, asm_op)						\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	register long x0 asm ("x0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op),	\
+"	" #asm_op "	%[i], %[v]\n")					\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS);						\
 }
 
-static inline void atomic64_xor(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
+ATOMIC64_OP(andnot, stclr)
+ATOMIC64_OP(or, stset)
+ATOMIC64_OP(xor, steor)
+ATOMIC64_OP(add, stadd)
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
-	"	steor	%[i], %[v]\n")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
+#undef ATOMIC64_OP
+
+#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...)			\
+static inline long atomic64_fetch_##op##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("x0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	__LL_SC_ATOMIC64(fetch_##op##name),				\
+	/* LSE atomics */						\
+"	" #asm_op #mb "	%[i], %[i], %[v]")				\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return x0;							\
 }
 
-static inline void atomic64_add(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_FETCH_OPS(op, asm_op)					\
+	ATOMIC64_FETCH_OP(_relaxed,   , op, asm_op)			\
+	ATOMIC64_FETCH_OP(_acquire,  a, op, asm_op, "memory")		\
+	ATOMIC64_FETCH_OP(_release,  l, op, asm_op, "memory")		\
+	ATOMIC64_FETCH_OP(        , al, op, asm_op, "memory")
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
-	"	stadd	%[i], %[v]\n")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
+ATOMIC64_FETCH_OPS(andnot, ldclr)
+ATOMIC64_FETCH_OPS(or, ldset)
+ATOMIC64_FETCH_OPS(xor, ldeor)
+ATOMIC64_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_FETCH_OPS
 
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)				\
 static inline long atomic64_add_return##name(long i, atomic64_t *v)	\
@@ -260,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v)
 	: __LL_SC_CLOBBERS);
 }
 
+#define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
+static inline long atomic64_fetch_and##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("w0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC64(fetch_and##name),				\
+	/* LSE atomics */						\
+	"	mvn	%[i], %[i]\n"					\
+	"	ldclr" #mb "	%[i], %[i], %[v]")			\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return x0;							\
+}
+
+ATOMIC64_FETCH_OP_AND(_relaxed,   )
+ATOMIC64_FETCH_OP_AND(_acquire,  a, "memory")
+ATOMIC64_FETCH_OP_AND(_release,  l, "memory")
+ATOMIC64_FETCH_OP_AND(        , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_AND
+
 static inline void atomic64_sub(long i, atomic64_t *v)
 {
 	register long x0 asm ("x0") = i;
@@ -306,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 
 #undef ATOMIC64_OP_SUB_RETURN
 
+#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)				\
+static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("w0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC64(fetch_sub##name),				\
+	/* LSE atomics */						\
+	"	neg	%[i], %[i]\n"					\
+	"	ldadd" #mb "	%[i], %[i], %[v]")			\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return x0;							\
+}
+
+ATOMIC64_FETCH_OP_SUB(_relaxed,   )
+ATOMIC64_FETCH_OP_SUB(_acquire,  a, "memory")
+ATOMIC64_FETCH_OP_SUB(_release,  l, "memory")
+ATOMIC64_FETCH_OP_SUB(        , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_SUB
+
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
 	register long x0 asm ("x0") = (long)v;
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index dae5c49618db..4eea7f618dce 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -91,6 +91,19 @@ do {									\
 	__u.__val;							\
 })
 
+#define smp_cond_load_acquire(ptr, cond_expr)				\
+({									\
+	typeof(ptr) __PTR = (ptr);					\
+	typeof(*ptr) VAL;						\
+	for (;;) {							\
+		VAL = smp_load_acquire(__PTR);				\
+		if (cond_expr)						\
+			break;						\
+		__cmpwait_relaxed(__PTR, VAL);				\
+	}								\
+	VAL;								\
+})
+
 #include <asm-generic/barrier.h>
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h
new file mode 100644
index 000000000000..09f65339d66d
--- /dev/null
+++ b/arch/arm64/include/asm/checksum.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CHECKSUM_H
+#define __ASM_CHECKSUM_H
+
+#include <linux/types.h>
+
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	sum += (sum >> 16) | (sum << 16);
+	return ~(__force __sum16)(sum >> 16);
+}
+#define csum_fold csum_fold
+
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	__uint128_t tmp;
+	u64 sum;
+
+	tmp = *(const __uint128_t *)iph;
+	iph += 16;
+	ihl -= 4;
+	tmp += ((tmp >> 64) | (tmp << 64));
+	sum = tmp >> 64;
+	do {
+		sum += *(const u32 *)iph;
+		iph += 4;
+	} while (--ihl);
+
+	sum += ((sum >> 32) | (sum << 32));
+	return csum_fold(sum >> 32);
+}
+#define ip_fast_csum ip_fast_csum
+
+#include <asm-generic/checksum.h>
+
+#endif	/* __ASM_CHECKSUM_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 510c7b404454..bd86a79491bc 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb)
 	__ret;								\
 })
 
+#define __CMPWAIT_CASE(w, sz, name)					\
+static inline void __cmpwait_case_##name(volatile void *ptr,		\
+					 unsigned long val)		\
+{									\
+	unsigned long tmp;						\
+									\
+	asm volatile(							\
+	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"		\
+	"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
+	"	cbnz	%" #w "[tmp], 1f\n"				\
+	"	wfe\n"							\
+	"1:"								\
+	: [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr)		\
+	: [val] "r" (val));						\
+}
+
+__CMPWAIT_CASE(w, b, 1);
+__CMPWAIT_CASE(w, h, 2);
+__CMPWAIT_CASE(w,  , 4);
+__CMPWAIT_CASE( ,  , 8);
+
+#undef __CMPWAIT_CASE
+
+#define __CMPWAIT_GEN(sfx)						\
+static inline void __cmpwait##sfx(volatile void *ptr,			\
+				  unsigned long val,			\
+				  int size)				\
+{									\
+	switch (size) {							\
+	case 1:								\
+		return __cmpwait_case##sfx##_1(ptr, (u8)val);		\
+	case 2:								\
+		return __cmpwait_case##sfx##_2(ptr, (u16)val);		\
+	case 4:								\
+		return __cmpwait_case##sfx##_4(ptr, val);		\
+	case 8:								\
+		return __cmpwait_case##sfx##_8(ptr, val);		\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+									\
+	unreachable();							\
+}
+
+__CMPWAIT_GEN()
+
+#undef __CMPWAIT_GEN
+
+#define __cmpwait_relaxed(ptr, val) \
+	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
 #endif	/* __ASM_CMPXCHG_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 13a6103130cd..889226b4c6e1 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -25,10 +25,12 @@
  */
 struct cpuinfo_arm64 {
 	struct cpu	cpu;
+	struct kobject	kobj;
 	u32		reg_ctr;
 	u32		reg_cntfrq;
 	u32		reg_dczid;
 	u32		reg_midr;
+	u32		reg_revidr;
 
 	u64		reg_id_aa64dfr0;
 	u64		reg_id_aa64dfr1;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 224efe730e46..49dd1bd3ea50 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -191,7 +191,9 @@ void __init setup_cpu_features(void);
 
 void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info);
+void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps);
 void check_local_cpu_errata(void);
+void __init enable_errata_workarounds(void);
 
 void verify_local_cpu_errata(void);
 void verify_local_cpu_capabilities(void);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 87e1985f3be8..9d9fd4b9a72e 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -80,12 +80,14 @@
 #define APM_CPU_PART_POTENZA		0x000
 
 #define CAVIUM_CPU_PART_THUNDERX	0x0A1
+#define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2
 
 #define BRCM_CPU_PART_VULCAN		0x516
 
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 2fcb9b7c876c..4b6b3f72a215 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -66,6 +66,11 @@
 
 #define CACHE_FLUSH_IS_SAFE		1
 
+/* kprobes BRK opcodes with ESR encoding  */
+#define BRK64_ESR_MASK		0xFFFF
+#define BRK64_ESR_KPROBES	0x0004
+#define BRK64_OPCODE_KPROBES	(AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5))
+
 /* AArch32 */
 #define DBG_ESR_EVT_BKPT	0x4
 #define DBG_ESR_EVT_VECC	0x5
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 622db3c6474e..a9e54aad15ef 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -14,8 +14,7 @@ extern void efi_init(void);
 #endif
 
 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
-
-#define efi_set_mapping_permissions	efi_create_mapping
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
 #define arch_efi_call_virt_setup()					\
 ({									\
@@ -23,10 +22,10 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 	efi_virtmap_load();						\
 })
 
-#define arch_efi_call_virt(f, args...)					\
+#define arch_efi_call_virt(p, f, args...)				\
 ({									\
 	efi_##f##_t *__f;						\
-	__f = efi.systab->runtime->f;					\
+	__f = p->f;							\
 	__f(args);							\
 })
 
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 77eeb2cc648f..f772e15c4766 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -74,6 +74,7 @@
 
 #define ESR_ELx_EC_SHIFT	(26)
 #define ESR_ELx_EC_MASK		(UL(0x3F) << ESR_ELx_EC_SHIFT)
+#define ESR_ELx_EC(esr)		(((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
 
 #define ESR_ELx_IL		(UL(1) << 25)
 #define ESR_ELx_ISS_MASK	(ESR_ELx_IL - 1)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 30e50eb54a67..1dbaa901d7e5 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -120,6 +120,29 @@ enum aarch64_insn_register {
 	AARCH64_INSN_REG_SP = 31  /* Stack pointer: as load/store base reg */
 };
 
+enum aarch64_insn_special_register {
+	AARCH64_INSN_SPCLREG_SPSR_EL1	= 0xC200,
+	AARCH64_INSN_SPCLREG_ELR_EL1	= 0xC201,
+	AARCH64_INSN_SPCLREG_SP_EL0	= 0xC208,
+	AARCH64_INSN_SPCLREG_SPSEL	= 0xC210,
+	AARCH64_INSN_SPCLREG_CURRENTEL	= 0xC212,
+	AARCH64_INSN_SPCLREG_DAIF	= 0xDA11,
+	AARCH64_INSN_SPCLREG_NZCV	= 0xDA10,
+	AARCH64_INSN_SPCLREG_FPCR	= 0xDA20,
+	AARCH64_INSN_SPCLREG_DSPSR_EL0	= 0xDA28,
+	AARCH64_INSN_SPCLREG_DLR_EL0	= 0xDA29,
+	AARCH64_INSN_SPCLREG_SPSR_EL2	= 0xE200,
+	AARCH64_INSN_SPCLREG_ELR_EL2	= 0xE201,
+	AARCH64_INSN_SPCLREG_SP_EL1	= 0xE208,
+	AARCH64_INSN_SPCLREG_SPSR_INQ	= 0xE218,
+	AARCH64_INSN_SPCLREG_SPSR_ABT	= 0xE219,
+	AARCH64_INSN_SPCLREG_SPSR_UND	= 0xE21A,
+	AARCH64_INSN_SPCLREG_SPSR_FIQ	= 0xE21B,
+	AARCH64_INSN_SPCLREG_SPSR_EL3	= 0xF200,
+	AARCH64_INSN_SPCLREG_ELR_EL3	= 0xF201,
+	AARCH64_INSN_SPCLREG_SP_EL2	= 0xF210
+};
+
 enum aarch64_insn_variant {
 	AARCH64_INSN_VARIANT_32BIT,
 	AARCH64_INSN_VARIANT_64BIT
@@ -223,8 +246,15 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
 static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
 { return (val); }
 
+__AARCH64_INSN_FUNCS(adr_adrp,	0x1F000000, 0x10000000)
+__AARCH64_INSN_FUNCS(prfm_lit,	0xFF000000, 0xD8000000)
 __AARCH64_INSN_FUNCS(str_reg,	0x3FE0EC00, 0x38206800)
 __AARCH64_INSN_FUNCS(ldr_reg,	0x3FE0EC00, 0x38606800)
+__AARCH64_INSN_FUNCS(ldr_lit,	0xBF000000, 0x18000000)
+__AARCH64_INSN_FUNCS(ldrsw_lit,	0xFF000000, 0x98000000)
+__AARCH64_INSN_FUNCS(exclusive,	0x3F800000, 0x08000000)
+__AARCH64_INSN_FUNCS(load_ex,	0x3F400000, 0x08400000)
+__AARCH64_INSN_FUNCS(store_ex,	0x3F400000, 0x08000000)
 __AARCH64_INSN_FUNCS(stp_post,	0x7FC00000, 0x28800000)
 __AARCH64_INSN_FUNCS(ldp_post,	0x7FC00000, 0x28C00000)
 __AARCH64_INSN_FUNCS(stp_pre,	0x7FC00000, 0x29800000)
@@ -273,10 +303,15 @@ __AARCH64_INSN_FUNCS(svc,	0xFFE0001F, 0xD4000001)
 __AARCH64_INSN_FUNCS(hvc,	0xFFE0001F, 0xD4000002)
 __AARCH64_INSN_FUNCS(smc,	0xFFE0001F, 0xD4000003)
 __AARCH64_INSN_FUNCS(brk,	0xFFE0001F, 0xD4200000)
+__AARCH64_INSN_FUNCS(exception,	0xFF000000, 0xD4000000)
 __AARCH64_INSN_FUNCS(hint,	0xFFFFF01F, 0xD503201F)
 __AARCH64_INSN_FUNCS(br,	0xFFFFFC1F, 0xD61F0000)
 __AARCH64_INSN_FUNCS(blr,	0xFFFFFC1F, 0xD63F0000)
 __AARCH64_INSN_FUNCS(ret,	0xFFFFFC1F, 0xD65F0000)
+__AARCH64_INSN_FUNCS(eret,	0xFFFFFFFF, 0xD69F03E0)
+__AARCH64_INSN_FUNCS(mrs,	0xFFF00000, 0xD5300000)
+__AARCH64_INSN_FUNCS(msr_imm,	0xFFF8F01F, 0xD500401F)
+__AARCH64_INSN_FUNCS(msr_reg,	0xFFF00000, 0xD5100000)
 
 #undef	__AARCH64_INSN_FUNCS
 
@@ -286,6 +321,8 @@ bool aarch64_insn_is_branch_imm(u32 insn);
 int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+bool aarch64_insn_uses_literal(u32 insn);
+bool aarch64_insn_is_branch(u32 insn);
 u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
 u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 				  u32 insn, u64 imm);
@@ -367,9 +404,13 @@ bool aarch32_insn_is_wide(u32 insn);
 #define A32_RT_OFFSET	12
 #define A32_RT2_OFFSET	 0
 
+u32 aarch64_insn_extract_system_reg(u32 insn);
 u32 aarch32_insn_extract_reg_num(u32 insn, int offset);
 u32 aarch32_insn_mcr_extract_opc2(u32 insn);
 u32 aarch32_insn_mcr_extract_crm(u32 insn);
+
+typedef bool (pstate_check_t)(unsigned long);
+extern pstate_check_t * const aarch32_opcode_cond_checks[16];
 #endif /* __ASSEMBLY__ */
 
 #endif	/* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 44be1e03ed65..9b6e408cfa51 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -174,13 +174,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 #define iounmap				__iounmap
 
 /*
- * io{read,write}{16,32}be() macros
+ * io{read,write}{16,32,64}be() macros
  */
 #define ioread16be(p)		({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
 #define ioread32be(p)		({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
+#define ioread64be(p)		({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; })
 
 #define iowrite16be(v,p)	({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); })
 #define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
+#define iowrite64be(v,p)	({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); })
 
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 11cc941bd107..8c581281fa12 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -110,8 +110,5 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
 		: : "r" (flags) : "memory");				\
 	} while (0)
 
-#define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
-#define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
-
 #endif
 #endif
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 000000000000..04744dc5fb61
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,48 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_KEXEC_H
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+#define KEXEC_ARCH KEXEC_ARCH_AARCH64
+
+#ifndef __ASSEMBLY__
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
new file mode 100644
index 000000000000..61b49150dfa3
--- /dev/null
+++ b/arch/arm64/include/asm/kprobes.h
@@ -0,0 +1,62 @@
+/*
+ * arch/arm64/include/asm/kprobes.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KPROBES_H
+#define _ARM_KPROBES_H
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE			1
+#define MAX_STACK_SIZE			128
+
+#define flush_insn_slot(p)		do { } while (0)
+#define kretprobe_blacklist_size	0
+
+#include <asm/probes.h>
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned int status;
+};
+
+/* Single step context for kprobe */
+struct kprobe_step_ctx {
+	unsigned long ss_pending;
+	unsigned long match_addr;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned int kprobe_status;
+	unsigned long saved_irqflag;
+	struct prev_kprobe prev_kprobe;
+	struct kprobe_step_ctx ss_ctx;
+	struct pt_regs jprobe_saved_regs;
+	char jprobes_stack[MAX_STACK_SIZE];
+};
+
+void arch_remove_kprobe(struct kprobe *);
+int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data);
+int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr);
+int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr);
+void kretprobe_trampoline(void);
+void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
+
+#endif /* _ARM_KPROBES_H */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 40bc1681b6d5..4cdeae3b17c6 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -210,7 +210,7 @@ static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
 
 static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
 {
-	return kvm_vcpu_get_hsr(vcpu) >> ESR_ELx_EC_SHIFT;
+	return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
 }
 
 static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 97b1d8f26b9c..8d9fce037b2f 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -34,7 +34,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
 extern void init_mem_pgprot(void);
 extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       unsigned long virt, phys_addr_t size,
-			       pgprot_t prot);
+			       pgprot_t prot, bool allow_block_mappings);
 extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
 
 #endif
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index e9b4f2942335..600887e491fd 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -5,6 +5,8 @@
 
 #ifdef CONFIG_NUMA
 
+#define NR_NODE_MEMBLKS		(MAX_NUMNODES * 2)
+
 /* currently, arm64 implements flat NUMA topology */
 #define parent_node(node)	(node)
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index ff98585d085a..d25f4f137c2a 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -26,7 +26,7 @@
 
 #define check_pgt_cache()		do { } while (0)
 
-#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
new file mode 100644
index 000000000000..5af574d632fa
--- /dev/null
+++ b/arch/arm64/include/asm/probes.h
@@ -0,0 +1,35 @@
+/*
+ * arch/arm64/include/asm/probes.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef _ARM_PROBES_H
+#define _ARM_PROBES_H
+
+#include <asm/opcodes.h>
+
+struct kprobe;
+struct arch_specific_insn;
+
+typedef u32 kprobe_opcode_t;
+typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *);
+
+/* architecture specific copy of original instruction */
+struct arch_specific_insn {
+	kprobe_opcode_t *insn;
+	pstate_check_t *pstate_cc;
+	kprobes_handler_t *handler;
+	/* restore address after step xol */
+	unsigned long restore;
+};
+
+#endif
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index cef1cf398356..ace0a96e7d6e 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -192,5 +192,6 @@ static inline void spin_lock_prefetch(const void *ptr)
 
 void cpu_enable_pan(void *__unused);
 void cpu_enable_uao(void *__unused);
+void cpu_enable_cache_maint_trap(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
new file mode 100644
index 000000000000..07b8ed037dee
--- /dev/null
+++ b/arch/arm64/include/asm/ptdump.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PTDUMP_H
+#define __ASM_PTDUMP_H
+
+#ifdef CONFIG_ARM64_PTDUMP
+
+#include <linux/mm_types.h>
+
+struct addr_marker {
+	unsigned long start_address;
+	char *name;
+};
+
+struct ptdump_info {
+	struct mm_struct		*mm;
+	const struct addr_marker	*markers;
+	unsigned long			base_addr;
+	unsigned long			max_addr;
+};
+
+int ptdump_register(struct ptdump_info *info, const char *name);
+
+#else
+static inline int ptdump_register(struct ptdump_info *info, const char *name)
+{
+	return 0;
+}
+#endif /* CONFIG_ARM64_PTDUMP */
+
+#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index a307eb6e7fa8..ada08b5b036d 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -46,7 +46,6 @@
 #define COMPAT_PSR_MODE_UND	0x0000001b
 #define COMPAT_PSR_MODE_SYS	0x0000001f
 #define COMPAT_PSR_T_BIT	0x00000020
-#define COMPAT_PSR_E_BIT	0x00000200
 #define COMPAT_PSR_F_BIT	0x00000040
 #define COMPAT_PSR_I_BIT	0x00000080
 #define COMPAT_PSR_A_BIT	0x00000100
@@ -74,6 +73,7 @@
 #define COMPAT_PT_DATA_ADDR		0x10004
 #define COMPAT_PT_TEXT_END_ADDR		0x10008
 #ifndef __ASSEMBLY__
+#include <linux/bug.h>
 
 /* sizeof(struct user) for AArch32 */
 #define COMPAT_USER_SZ	296
@@ -117,8 +117,12 @@ struct pt_regs {
 	};
 	u64 orig_x0;
 	u64 syscallno;
+	u64 orig_addr_limit;
+	u64 unused;	// maintain 16 byte alignment
 };
 
+#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate)
+
 #define arch_has_single_step()	(1)
 
 #ifdef CONFIG_COMPAT
@@ -144,9 +148,58 @@ struct pt_regs {
 #define fast_interrupts_enabled(regs) \
 	(!((regs)->pstate & PSR_F_BIT))
 
-#define user_stack_pointer(regs) \
+#define GET_USP(regs) \
 	(!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp)
 
+#define SET_USP(ptregs, value) \
+	(!compat_user_mode(regs) ? ((regs)->sp = value) : ((regs)->compat_sp = value))
+
+extern int regs_query_register_offset(const char *name);
+extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+					       unsigned int n);
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:	pt_regs from which register value is gotten
+ * @offset:	offset of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs.
+ * The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+	u64 val = 0;
+
+	WARN_ON(offset & 7);
+
+	offset >>= 3;
+	switch (offset) {
+	case 0 ... 30:
+		val = regs->regs[offset];
+		break;
+	case offsetof(struct pt_regs, sp) >> 3:
+		val = regs->sp;
+		break;
+	case offsetof(struct pt_regs, pc) >> 3:
+		val = regs->pc;
+		break;
+	case offsetof(struct pt_regs, pstate) >> 3:
+		val = regs->pstate;
+		break;
+	default:
+		val = 0;
+	}
+
+	return val;
+}
+
+/* Valid only for Kernel mode traps. */
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+	return regs->sp;
+}
+
 static inline unsigned long regs_return_value(struct pt_regs *regs)
 {
 	return regs->regs[0];
@@ -156,8 +209,15 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
 struct task_struct;
 int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
 
-#define instruction_pointer(regs)	((unsigned long)(regs)->pc)
+#define GET_IP(regs)		((unsigned long)(regs)->pc)
+#define SET_IP(regs, value)	((regs)->pc = ((u64) (value)))
+
+#define GET_FP(ptregs)		((unsigned long)(ptregs)->regs[29])
+#define SET_FP(ptregs, value)	((ptregs)->regs[29] = ((u64) (value)))
+
+#include <asm-generic/ptrace.h>
 
+#undef profile_pc
 extern unsigned long profile_pc(struct pt_regs *regs);
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 433e50405274..022644704a93 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void)
 	cpu_park_loop();
 }
 
+/*
+ * If a secondary CPU enters the kernel but fails to come online,
+ * (e.g. due to mismatched features), and cannot exit the kernel,
+ * we increment cpus_stuck_in_kernel and leave the CPU in a
+ * quiesecent loop within the kernel text. The memory containing
+ * this loop must not be re-used for anything else as the 'stuck'
+ * core is executing it.
+ *
+ * This function is used to inhibit features like kexec and hibernate.
+ */
+bool cpus_are_stuck_in_kernel(void);
+
 #endif /* ifndef __ASSEMBLY__ */
 
 #endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 751e901c8d37..cc06794b7346 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -98,11 +98,11 @@
 			 SCTLR_ELx_SA | SCTLR_ELx_I)
 
 /* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_UCI		(1 << 26)
 #define SCTLR_EL1_SPAN		(1 << 23)
 #define SCTLR_EL1_SED		(1 << 8)
 #define SCTLR_EL1_CP15BEN	(1 << 5)
 
-
 /* id_aa64isar0 */
 #define ID_AA64ISAR0_RDM_SHIFT		28
 #define ID_AA64ISAR0_ATOMICS_SHIFT	20
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 0cc2f29bf9da..9cd03f3e812f 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -34,6 +34,8 @@ struct undef_hook {
 void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
 
+void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static inline int __in_irqentry_text(unsigned long ptr)
 {
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 9e397a542756..5e834d10b291 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -21,6 +21,7 @@
 /*
  * User space memory access functions
  */
+#include <linux/kasan-checks.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
 
@@ -256,15 +257,29 @@ do {									\
 		-EFAULT;						\
 })
 
-extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
-extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
 extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n);
 extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
 
+static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	kasan_check_write(to, n);
+	return  __arch_copy_from_user(to, from, n);
+}
+
+static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	kasan_check_read(from, n);
+	return  __arch_copy_to_user(to, from, n);
+}
+
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+	kasan_check_write(to, n);
+
 	if (access_ok(VERIFY_READ, from, n))
-		n = __copy_from_user(to, from, n);
+		n = __arch_copy_from_user(to, from, n);
 	else /* security hole - plug it */
 		memset(to, 0, n);
 	return n;
@@ -272,8 +287,10 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u
 
 static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+	kasan_check_read(from, n);
+
 	if (access_ok(VERIFY_WRITE, to, n))
-		n = __copy_to_user(to, from, n);
+		n = __arch_copy_to_user(to, from, n);
 	return n;
 }
 
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
index de66199673d7..2b9a63771eda 100644
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ b/arch/arm64/include/asm/vdso_datapage.h
@@ -22,6 +22,8 @@
 
 struct vdso_data {
 	__u64 cs_cycle_last;	/* Timebase at clocksource init */
+	__u64 raw_time_sec;	/* Raw time */
+	__u64 raw_time_nsec;
 	__u64 xtime_clock_sec;	/* Kernel time */
 	__u64 xtime_clock_nsec;
 	__u64 xtime_coarse_sec;	/* Coarse time */
@@ -29,8 +31,10 @@ struct vdso_data {
 	__u64 wtm_clock_sec;	/* Wall to monotonic time */
 	__u64 wtm_clock_nsec;
 	__u32 tb_seq_count;	/* Timebase sequence counter */
-	__u32 cs_mult;		/* Clocksource multiplier */
-	__u32 cs_shift;		/* Clocksource shift */
+	/* cs_* members must be adjacent and in this order (ldp accesses) */
+	__u32 cs_mono_mult;	/* NTP-adjusted clocksource multiplier */
+	__u32 cs_shift;		/* Clocksource shift (mono = raw) */
+	__u32 cs_raw_mult;	/* Raw clocksource multiplier */
 	__u32 tz_minuteswest;	/* Whacky timezone stuff */
 	__u32 tz_dsttime;
 	__u32 use_syscall;
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index dcbcf8dcbefb..bbc6a8cf83f1 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -34,6 +34,11 @@
  */
 #define HVC_SET_VECTORS 1
 
+/*
+ * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine.
+ */
+#define HVC_SOFT_RESTART 2
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h
new file mode 100644
index 000000000000..ec154e719b11
--- /dev/null
+++ b/arch/arm64/include/asm/xen/xen-ops.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_XEN_OPS_H
+#define _ASM_XEN_OPS_H
+
+void xen_efi_runtime_setup(void);
+
+#endif /* _ASM_XEN_OPS_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2173149d8954..14f7b651c787 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -26,8 +26,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
 	$(call if_changed,objcopy)
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
-					   sys_compat.o entry32.o		\
-					   ../../arm/kernel/opcodes.o
+					   sys_compat.o entry32.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o
@@ -42,16 +41,15 @@ arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+arm64-obj-$(CONFIG_ACPI_NUMA)		+= acpi_numa.o
 arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 arm64-obj-$(CONFIG_PARAVIRT)		+= paravirt.o
 arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
+					   cpu-reset.o
 
-obj-y					+= $(arm64-obj-y) vdso/
+obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-m					+= $(arm64-obj-m)
 head-y					:= head.o
 extra-y					+= $(head-y) vmlinux.lds
-
-# vDSO - this must be built first to generate the symbol offsets
-$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
-$(obj)/vdso/vdso-offsets.h: $(obj)/vdso
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
new file mode 100644
index 000000000000..f85149cc7c71
--- /dev/null
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -0,0 +1,112 @@
+/*
+ * ACPI 5.1 based NUMA setup for ARM64
+ * Lots of code was borrowed from arch/x86/mm/srat.c
+ *
+ * Copyright 2004 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2013-2016, Linaro Ltd.
+ *		Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *
+ * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
+ *
+ * Called from acpi_numa_init while reading the SRAT and SLIT tables.
+ * Assumes all memory regions belonging to a single proximity domain
+ * are in one chunk. Holes between them will be included in the node.
+ */
+
+#define pr_fmt(fmt) "ACPI: NUMA: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitmap.h>
+#include <linux/bootmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include <acpi/processor.h>
+#include <asm/numa.h>
+
+static int cpus_in_srat;
+
+struct __node_cpu_hwid {
+	u32 node_id;    /* logical node containing this CPU */
+	u64 cpu_hwid;   /* MPIDR for this CPU */
+};
+
+static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
+[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
+
+int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
+{
+	int i;
+
+	for (i = 0; i < cpus_in_srat; i++) {
+		if (hwid == early_node_cpu_hwid[i].cpu_hwid)
+			return early_node_cpu_hwid[i].node_id;
+	}
+
+	return NUMA_NO_NODE;
+}
+
+/* Callback for Proximity Domain -> ACPI processor UID mapping */
+void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
+{
+	int pxm, node;
+	phys_cpuid_t mpidr;
+
+	if (srat_disabled())
+		return;
+
+	if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) {
+		pr_err("SRAT: Invalid SRAT header length: %d\n",
+			pa->header.length);
+		bad_srat();
+		return;
+	}
+
+	if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
+		return;
+
+	if (cpus_in_srat >= NR_CPUS) {
+		pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n",
+			     NR_CPUS);
+		return;
+	}
+
+	pxm = pa->proximity_domain;
+	node = acpi_map_pxm_to_node(pxm);
+
+	if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+		pr_err("SRAT: Too many proximity domains %d\n", pxm);
+		bad_srat();
+		return;
+	}
+
+	mpidr = acpi_map_madt_entry(pa->acpi_processor_uid);
+	if (mpidr == PHYS_CPUID_INVALID) {
+		pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
+			pxm, pa->acpi_processor_uid);
+		bad_srat();
+		return;
+	}
+
+	early_node_cpu_hwid[cpus_in_srat].node_id = node;
+	early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
+	node_set(node, numa_nodes_parsed);
+	cpus_in_srat++;
+	pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d\n",
+		pxm, mpidr, node);
+}
+
+int __init arm64_acpi_numa_init(void)
+{
+	int ret;
+
+	ret = acpi_numa_init();
+	if (ret)
+		return ret;
+
+	return srat_disabled() ? -EINVAL : 0;
+}
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 678f30b05a45..78f368039c79 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -27,6 +27,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/arm-smccc.h>
+#include <linux/kprobes.h>
 
 #include <asm/checksum.h>
 
@@ -34,8 +35,8 @@ EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
 	/* user mem (segment) */
-EXPORT_SYMBOL(__copy_from_user);
-EXPORT_SYMBOL(__copy_to_user);
+EXPORT_SYMBOL(__arch_copy_from_user);
+EXPORT_SYMBOL(__arch_copy_to_user);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__copy_in_user);
 
@@ -68,6 +69,7 @@ EXPORT_SYMBOL(test_and_change_bit);
 
 #ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(_mcount);
+NOKPROBE_SYMBOL(_mcount);
 #endif
 
 	/* arm-smccc */
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index c37202c0c838..5f72475e2e3b 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -316,28 +316,6 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
  */
 #define TYPE_SWPB (1 << 22)
 
-/*
- * Set up process info to signal segmentation fault - called on access error.
- */
-static void set_segfault(struct pt_regs *regs, unsigned long addr)
-{
-	siginfo_t info;
-
-	down_read(&current->mm->mmap_sem);
-	if (find_vma(current->mm, addr) == NULL)
-		info.si_code = SEGV_MAPERR;
-	else
-		info.si_code = SEGV_ACCERR;
-	up_read(&current->mm->mmap_sem);
-
-	info.si_signo = SIGSEGV;
-	info.si_errno = 0;
-	info.si_addr  = (void *) instruction_pointer(regs);
-
-	pr_debug("SWP{B} emulation: access caused memory abort!\n");
-	arm64_notify_die("Illegal memory access", regs, &info, 0);
-}
-
 static int emulate_swpX(unsigned int address, unsigned int *data,
 			unsigned int type)
 {
@@ -366,6 +344,21 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
 	return res;
 }
 
+#define	ARM_OPCODE_CONDITION_UNCOND	0xf
+
+static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
+{
+	u32 cc_bits  = opcode >> 28;
+
+	if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+		if ((*aarch32_opcode_cond_checks[cc_bits])(psr))
+			return ARM_OPCODE_CONDTEST_PASS;
+		else
+			return ARM_OPCODE_CONDTEST_FAIL;
+	}
+	return ARM_OPCODE_CONDTEST_UNCOND;
+}
+
 /*
  * swp_handler logs the id of calling process, dissects the instruction, sanity
  * checks the memory location, calls emulate_swpX for the actual operation and
@@ -380,7 +373,7 @@ static int swp_handler(struct pt_regs *regs, u32 instr)
 
 	type = instr & TYPE_SWPB;
 
-	switch (arm_check_condition(instr, regs->pstate)) {
+	switch (aarch32_check_condition(instr, regs->pstate)) {
 	case ARM_OPCODE_CONDTEST_PASS:
 		break;
 	case ARM_OPCODE_CONDTEST_FAIL:
@@ -430,7 +423,8 @@ ret:
 	return 0;
 
 fault:
-	set_segfault(regs, address);
+	pr_debug("SWP{B} emulation: access caused memory abort!\n");
+	arm64_notify_segfault(regs, address);
 
 	return 0;
 }
@@ -461,7 +455,7 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr)
 {
 	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
 
-	switch (arm_check_condition(instr, regs->pstate)) {
+	switch (aarch32_check_condition(instr, regs->pstate)) {
 	case ARM_OPCODE_CONDTEST_PASS:
 		break;
 	case ARM_OPCODE_CONDTEST_FAIL:
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f8e5d47f0880..05070b72fc28 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -51,6 +51,17 @@ int main(void)
   DEFINE(S_X5,			offsetof(struct pt_regs, regs[5]));
   DEFINE(S_X6,			offsetof(struct pt_regs, regs[6]));
   DEFINE(S_X7,			offsetof(struct pt_regs, regs[7]));
+  DEFINE(S_X8,			offsetof(struct pt_regs, regs[8]));
+  DEFINE(S_X10,			offsetof(struct pt_regs, regs[10]));
+  DEFINE(S_X12,			offsetof(struct pt_regs, regs[12]));
+  DEFINE(S_X14,			offsetof(struct pt_regs, regs[14]));
+  DEFINE(S_X16,			offsetof(struct pt_regs, regs[16]));
+  DEFINE(S_X18,			offsetof(struct pt_regs, regs[18]));
+  DEFINE(S_X20,			offsetof(struct pt_regs, regs[20]));
+  DEFINE(S_X22,			offsetof(struct pt_regs, regs[22]));
+  DEFINE(S_X24,			offsetof(struct pt_regs, regs[24]));
+  DEFINE(S_X26,			offsetof(struct pt_regs, regs[26]));
+  DEFINE(S_X28,			offsetof(struct pt_regs, regs[28]));
   DEFINE(S_LR,			offsetof(struct pt_regs, regs[30]));
   DEFINE(S_SP,			offsetof(struct pt_regs, sp));
 #ifdef CONFIG_COMPAT
@@ -60,6 +71,7 @@ int main(void)
   DEFINE(S_PC,			offsetof(struct pt_regs, pc));
   DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
+  DEFINE(S_ORIG_ADDR_LIMIT,	offsetof(struct pt_regs, orig_addr_limit));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
@@ -77,6 +89,7 @@ int main(void)
   BLANK();
   DEFINE(CLOCK_REALTIME,	CLOCK_REALTIME);
   DEFINE(CLOCK_MONOTONIC,	CLOCK_MONOTONIC);
+  DEFINE(CLOCK_MONOTONIC_RAW,	CLOCK_MONOTONIC_RAW);
   DEFINE(CLOCK_REALTIME_RES,	MONOTONIC_RES_NSEC);
   DEFINE(CLOCK_REALTIME_COARSE,	CLOCK_REALTIME_COARSE);
   DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
@@ -84,6 +97,8 @@ int main(void)
   DEFINE(NSEC_PER_SEC,		NSEC_PER_SEC);
   BLANK();
   DEFINE(VDSO_CS_CYCLE_LAST,	offsetof(struct vdso_data, cs_cycle_last));
+  DEFINE(VDSO_RAW_TIME_SEC,	offsetof(struct vdso_data, raw_time_sec));
+  DEFINE(VDSO_RAW_TIME_NSEC,	offsetof(struct vdso_data, raw_time_nsec));
   DEFINE(VDSO_XTIME_CLK_SEC,	offsetof(struct vdso_data, xtime_clock_sec));
   DEFINE(VDSO_XTIME_CLK_NSEC,	offsetof(struct vdso_data, xtime_clock_nsec));
   DEFINE(VDSO_XTIME_CRS_SEC,	offsetof(struct vdso_data, xtime_coarse_sec));
@@ -91,7 +106,8 @@ int main(void)
   DEFINE(VDSO_WTM_CLK_SEC,	offsetof(struct vdso_data, wtm_clock_sec));
   DEFINE(VDSO_WTM_CLK_NSEC,	offsetof(struct vdso_data, wtm_clock_nsec));
   DEFINE(VDSO_TB_SEQ_COUNT,	offsetof(struct vdso_data, tb_seq_count));
-  DEFINE(VDSO_CS_MULT,		offsetof(struct vdso_data, cs_mult));
+  DEFINE(VDSO_CS_MONO_MULT,	offsetof(struct vdso_data, cs_mono_mult));
+  DEFINE(VDSO_CS_RAW_MULT,	offsetof(struct vdso_data, cs_raw_mult));
   DEFINE(VDSO_CS_SHIFT,		offsetof(struct vdso_data, cs_shift));
   DEFINE(VDSO_TZ_MINWEST,	offsetof(struct vdso_data, tz_minuteswest));
   DEFINE(VDSO_TZ_DSTTIME,	offsetof(struct vdso_data, tz_dsttime));
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
new file mode 100644
index 000000000000..65f42d257414
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -0,0 +1,54 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+.text
+.pushsection    .idmap.text, "ax"
+
+/*
+ * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
+ * cpu_soft_restart.
+ *
+ * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @entry: Location to jump to for soft reset.
+ * arg0: First argument passed to @entry.
+ * arg1: Second argument passed to @entry.
+ * arg2: Third argument passed to @entry.
+ *
+ * Put the CPU into the same state as it would be if it had been reset, and
+ * branch to what would be the reset vector. It must be executed with the
+ * flat identity mapping.
+ */
+ENTRY(__cpu_soft_restart)
+	/* Clear sctlr_el1 flags. */
+	mrs	x12, sctlr_el1
+	ldr	x13, =SCTLR_ELx_FLAGS
+	bic	x12, x12, x13
+	msr	sctlr_el1, x12
+	isb
+
+	cbz	x0, 1f				// el2_switch?
+	mov	x0, #HVC_SOFT_RESTART
+	hvc	#0				// no return
+
+1:	mov	x18, x1				// entry
+	mov	x0, x2				// arg0
+	mov	x1, x3				// arg1
+	mov	x2, x4				// arg2
+	br	x18
+ENDPROC(__cpu_soft_restart)
+
+.popsection
diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h
new file mode 100644
index 000000000000..d4e9ecb264f0
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.h
@@ -0,0 +1,34 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_CPU_RESET_H
+#define _ARM64_CPU_RESET_H
+
+#include <asm/virt.h>
+
+void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
+	unsigned long arg0, unsigned long arg1, unsigned long arg2);
+
+static inline void __noreturn cpu_soft_restart(unsigned long el2_switch,
+	unsigned long entry, unsigned long arg0, unsigned long arg1,
+	unsigned long arg2)
+{
+	typeof(__cpu_soft_restart) *restart;
+
+	el2_switch = el2_switch && !is_kernel_in_hyp_mode() &&
+		is_hyp_mode_available();
+	restart = (void *)virt_to_phys(__cpu_soft_restart);
+
+	cpu_install_idmap();
+	restart(el2_switch, entry, arg0, arg1, arg2);
+	unreachable();
+}
+
+#endif
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index d42789499f17..82b0fc2e637b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -46,6 +46,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.desc = "ARM errata 826319, 827319, 824069",
 		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
 		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02),
+		.enable = cpu_enable_cache_maint_trap,
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_819472
@@ -54,6 +55,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.desc = "ARM errata 819472",
 		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
 		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01),
+		.enable = cpu_enable_cache_maint_trap,
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_832075
@@ -98,6 +100,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		MIDR_RANGE(MIDR_THUNDERX, 0x00,
 			   (1 << MIDR_VARIANT_SHIFT) | 1),
 	},
+	{
+	/* Cavium ThunderX, T81 pass 1.0 */
+		.desc = "Cavium erratum 27456",
+		.capability = ARM64_WORKAROUND_CAVIUM_27456,
+		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
+	},
 #endif
 	{
 	}
@@ -127,3 +135,8 @@ void check_local_cpu_errata(void)
 {
 	update_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
+
+void __init enable_errata_workarounds(void)
+{
+	enable_cpu_capabilities(arm64_errata);
+}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 811773d1c1d0..916d27ad79c1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -913,8 +913,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
  * Run through the enabled capabilities and enable() it on all active
  * CPUs
  */
-static void __init
-enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
 	for (; caps->matches; caps++)
 		if (caps->enable && cpus_have_cap(caps->capability))
@@ -1036,6 +1035,7 @@ void __init setup_cpu_features(void)
 
 	/* Set the CPU feature capabilies */
 	setup_feature_capabilities();
+	enable_errata_workarounds();
 	setup_elf_hwcaps(arm64_elf_hwcaps);
 
 	if (system_supports_32bit_el0())
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index e11857fce05f..75a0f8acef66 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -9,13 +9,16 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/acpi.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 
 #include <asm/cpuidle.h>
 #include <asm/cpu_ops.h>
 
-int __init arm_cpuidle_init(unsigned int cpu)
+int arm_cpuidle_init(unsigned int cpu)
 {
 	int ret = -EOPNOTSUPP;
 
@@ -39,3 +42,18 @@ int arm_cpuidle_suspend(int index)
 
 	return cpu_ops[cpu]->cpu_suspend(index);
 }
+
+#ifdef CONFIG_ACPI
+
+#include <acpi/processor.h>
+
+int acpi_processor_ffh_lpi_probe(unsigned int cpu)
+{
+	return arm_cpuidle_init(cpu);
+}
+
+int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
+{
+	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index);
+}
+#endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index c173d329397f..ed1b84fe6925 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -183,6 +183,123 @@ const struct seq_operations cpuinfo_op = {
 	.show	= c_show
 };
 
+
+static struct kobj_type cpuregs_kobj_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+/*
+ * The ARM ARM uses the phrase "32-bit register" to describe a register
+ * whose upper 32 bits are RES0 (per C5.1.1, ARM DDI 0487A.i), however
+ * no statement is made as to whether the upper 32 bits will or will not
+ * be made use of in future, and between ARM DDI 0487A.c and ARM DDI
+ * 0487A.d CLIDR_EL1 was expanded from 32-bit to 64-bit.
+ *
+ * Thus, while both MIDR_EL1 and REVIDR_EL1 are described as 32-bit
+ * registers, we expose them both as 64 bit values to cater for possible
+ * future expansion without an ABI break.
+ */
+#define kobj_to_cpuinfo(kobj)	container_of(kobj, struct cpuinfo_arm64, kobj)
+#define CPUREGS_ATTR_RO(_name, _field)						\
+	static ssize_t _name##_show(struct kobject *kobj,			\
+			struct kobj_attribute *attr, char *buf)			\
+	{									\
+		struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj);		\
+										\
+		if (info->reg_midr)						\
+			return sprintf(buf, "0x%016x\n", info->reg_##_field);	\
+		else								\
+			return 0;						\
+	}									\
+	static struct kobj_attribute cpuregs_attr_##_name = __ATTR_RO(_name)
+
+CPUREGS_ATTR_RO(midr_el1, midr);
+CPUREGS_ATTR_RO(revidr_el1, revidr);
+
+static struct attribute *cpuregs_id_attrs[] = {
+	&cpuregs_attr_midr_el1.attr,
+	&cpuregs_attr_revidr_el1.attr,
+	NULL
+};
+
+static struct attribute_group cpuregs_attr_group = {
+	.attrs = cpuregs_id_attrs,
+	.name = "identification"
+};
+
+static int cpuid_add_regs(int cpu)
+{
+	int rc;
+	struct device *dev;
+	struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+
+	dev = get_cpu_device(cpu);
+	if (!dev) {
+		rc = -ENODEV;
+		goto out;
+	}
+	rc = kobject_add(&info->kobj, &dev->kobj, "regs");
+	if (rc)
+		goto out;
+	rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group);
+	if (rc)
+		kobject_del(&info->kobj);
+out:
+	return rc;
+}
+
+static int cpuid_remove_regs(int cpu)
+{
+	struct device *dev;
+	struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+
+	dev = get_cpu_device(cpu);
+	if (!dev)
+		return -ENODEV;
+	if (info->kobj.parent) {
+		sysfs_remove_group(&info->kobj, &cpuregs_attr_group);
+		kobject_del(&info->kobj);
+	}
+
+	return 0;
+}
+
+static int cpuid_callback(struct notifier_block *nb,
+			 unsigned long action, void *hcpu)
+{
+	int rc = 0;
+	unsigned long cpu = (unsigned long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+		rc = cpuid_add_regs(cpu);
+		break;
+	case CPU_DEAD:
+		rc = cpuid_remove_regs(cpu);
+		break;
+	}
+
+	return notifier_from_errno(rc);
+}
+
+static int __init cpuinfo_regs_init(void)
+{
+	int cpu;
+
+	cpu_notifier_register_begin();
+
+	for_each_possible_cpu(cpu) {
+		struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+
+		kobject_init(&info->kobj, &cpuregs_kobj_type);
+		if (cpu_online(cpu))
+			cpuid_add_regs(cpu);
+	}
+	__hotcpu_notifier(cpuid_callback, 0);
+
+	cpu_notifier_register_done();
+	return 0;
+}
 static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 {
 	unsigned int cpu = smp_processor_id();
@@ -212,6 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_ctr = read_cpuid_cachetype();
 	info->reg_dczid = read_cpuid(DCZID_EL0);
 	info->reg_midr = read_cpuid_id();
+	info->reg_revidr = read_cpuid(REVIDR_EL1);
 
 	info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
 	info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
@@ -264,3 +382,5 @@ void __init cpuinfo_store_boot_cpu(void)
 	boot_cpu_data = *info;
 	init_cpu_features(&boot_cpu_data);
 }
+
+device_initcall(cpuinfo_regs_init);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 4fbf3c54275c..91fff48d0f57 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -23,6 +23,7 @@
 #include <linux/hardirq.h>
 #include <linux/init.h>
 #include <linux/ptrace.h>
+#include <linux/kprobes.h>
 #include <linux/stat.h>
 #include <linux/uaccess.h>
 
@@ -48,6 +49,7 @@ static void mdscr_write(u32 mdscr)
 	asm volatile("msr mdscr_el1, %0" :: "r" (mdscr));
 	local_dbg_restore(flags);
 }
+NOKPROBE_SYMBOL(mdscr_write);
 
 static u32 mdscr_read(void)
 {
@@ -55,6 +57,7 @@ static u32 mdscr_read(void)
 	asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr));
 	return mdscr;
 }
+NOKPROBE_SYMBOL(mdscr_read);
 
 /*
  * Allow root to disable self-hosted debug from userspace.
@@ -103,6 +106,7 @@ void enable_debug_monitors(enum dbg_active_el el)
 		mdscr_write(mdscr);
 	}
 }
+NOKPROBE_SYMBOL(enable_debug_monitors);
 
 void disable_debug_monitors(enum dbg_active_el el)
 {
@@ -123,6 +127,7 @@ void disable_debug_monitors(enum dbg_active_el el)
 		mdscr_write(mdscr);
 	}
 }
+NOKPROBE_SYMBOL(disable_debug_monitors);
 
 /*
  * OS lock clearing.
@@ -151,7 +156,6 @@ static int debug_monitors_init(void)
 	/* Clear the OS lock. */
 	on_each_cpu(clear_os_lock, NULL, 1);
 	isb();
-	local_dbg_enable();
 
 	/* Register hotplug handler. */
 	__register_cpu_notifier(&os_lock_nb);
@@ -166,22 +170,15 @@ postcore_initcall(debug_monitors_init);
  */
 static void set_regs_spsr_ss(struct pt_regs *regs)
 {
-	unsigned long spsr;
-
-	spsr = regs->pstate;
-	spsr &= ~DBG_SPSR_SS;
-	spsr |= DBG_SPSR_SS;
-	regs->pstate = spsr;
+	regs->pstate |= DBG_SPSR_SS;
 }
+NOKPROBE_SYMBOL(set_regs_spsr_ss);
 
 static void clear_regs_spsr_ss(struct pt_regs *regs)
 {
-	unsigned long spsr;
-
-	spsr = regs->pstate;
-	spsr &= ~DBG_SPSR_SS;
-	regs->pstate = spsr;
+	regs->pstate &= ~DBG_SPSR_SS;
 }
+NOKPROBE_SYMBOL(clear_regs_spsr_ss);
 
 /* EL1 Single Step Handler hooks */
 static LIST_HEAD(step_hook);
@@ -225,6 +222,7 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr)
 
 	return retval;
 }
+NOKPROBE_SYMBOL(call_step_hook);
 
 static void send_user_sigtrap(int si_code)
 {
@@ -266,6 +264,10 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
 		 */
 		user_rewind_single_step(current);
 	} else {
+#ifdef	CONFIG_KPROBES
+		if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+			return 0;
+#endif
 		if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
 			return 0;
 
@@ -279,6 +281,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
 
 	return 0;
 }
+NOKPROBE_SYMBOL(single_step_handler);
 
 /*
  * Breakpoint handler is re-entrant as another breakpoint can
@@ -316,19 +319,28 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
 
 	return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
 }
+NOKPROBE_SYMBOL(call_break_hook);
 
 static int brk_handler(unsigned long addr, unsigned int esr,
 		       struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
 		send_user_sigtrap(TRAP_BRKPT);
-	} else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
-		pr_warning("Unexpected kernel BRK exception at EL1\n");
+	}
+#ifdef	CONFIG_KPROBES
+	else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
+		if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED)
+			return -EFAULT;
+	}
+#endif
+	else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
+		pr_warn("Unexpected kernel BRK exception at EL1\n");
 		return -EFAULT;
 	}
 
 	return 0;
 }
+NOKPROBE_SYMBOL(brk_handler);
 
 int aarch32_break_handler(struct pt_regs *regs)
 {
@@ -365,6 +377,7 @@ int aarch32_break_handler(struct pt_regs *regs)
 	send_user_sigtrap(TRAP_BRKPT);
 	return 0;
 }
+NOKPROBE_SYMBOL(aarch32_break_handler);
 
 static int __init debug_traps_init(void)
 {
@@ -386,6 +399,7 @@ void user_rewind_single_step(struct task_struct *task)
 	if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP))
 		set_regs_spsr_ss(task_pt_regs(task));
 }
+NOKPROBE_SYMBOL(user_rewind_single_step);
 
 void user_fastforward_single_step(struct task_struct *task)
 {
@@ -401,6 +415,7 @@ void kernel_enable_single_step(struct pt_regs *regs)
 	mdscr_write(mdscr_read() | DBG_MDSCR_SS);
 	enable_debug_monitors(DBG_ACTIVE_EL1);
 }
+NOKPROBE_SYMBOL(kernel_enable_single_step);
 
 void kernel_disable_single_step(void)
 {
@@ -408,12 +423,14 @@ void kernel_disable_single_step(void)
 	mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
 	disable_debug_monitors(DBG_ACTIVE_EL1);
 }
+NOKPROBE_SYMBOL(kernel_disable_single_step);
 
 int kernel_active_single_step(void)
 {
 	WARN_ON(!irqs_disabled());
 	return mdscr_read() & DBG_MDSCR_SS;
 }
+NOKPROBE_SYMBOL(kernel_active_single_step);
 
 /* ptrace API */
 void user_enable_single_step(struct task_struct *task)
@@ -421,8 +438,10 @@ void user_enable_single_step(struct task_struct *task)
 	set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
 	set_regs_spsr_ss(task_pt_regs(task));
 }
+NOKPROBE_SYMBOL(user_enable_single_step);
 
 void user_disable_single_step(struct task_struct *task)
 {
 	clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
 }
+NOKPROBE_SYMBOL(user_disable_single_step);
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 78f52488f9ff..ba9bee389fd5 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -62,13 +62,61 @@ struct screen_info screen_info __section(.data);
 int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
 {
 	pteval_t prot_val = create_mapping_protection(md);
+	bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE &&
+				     md->type != EFI_RUNTIME_SERVICES_DATA);
+
+	if (!PAGE_ALIGNED(md->phys_addr) ||
+	    !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) {
+		/*
+		 * If the end address of this region is not aligned to page
+		 * size, the mapping is rounded up, and may end up sharing a
+		 * page frame with the next UEFI memory region. If we create
+		 * a block entry now, we may need to split it again when mapping
+		 * the next region, and support for that is going to be removed
+		 * from the MMU routines. So avoid block mappings altogether in
+		 * that case.
+		 */
+		allow_block_mappings = false;
+	}
 
 	create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
 			   md->num_pages << EFI_PAGE_SHIFT,
-			   __pgprot(prot_val | PTE_NG));
+			   __pgprot(prot_val | PTE_NG), allow_block_mappings);
+	return 0;
+}
+
+static int __init set_permissions(pte_t *ptep, pgtable_t token,
+				  unsigned long addr, void *data)
+{
+	efi_memory_desc_t *md = data;
+	pte_t pte = *ptep;
+
+	if (md->attribute & EFI_MEMORY_RO)
+		pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+	if (md->attribute & EFI_MEMORY_XP)
+		pte = set_pte_bit(pte, __pgprot(PTE_PXN));
+	set_pte(ptep, pte);
 	return 0;
 }
 
+int __init efi_set_mapping_permissions(struct mm_struct *mm,
+				       efi_memory_desc_t *md)
+{
+	BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
+	       md->type != EFI_RUNTIME_SERVICES_DATA);
+
+	/*
+	 * Calling apply_to_page_range() is only safe on regions that are
+	 * guaranteed to be mapped down to pages. Since we are only called
+	 * for regions that have been mapped using efi_create_mapping() above
+	 * (and this is checked by the generic Memory Attributes table parsing
+	 * routines), there is no need to check that again here.
+	 */
+	return apply_to_page_range(mm, md->virt_addr,
+				   md->num_pages << EFI_PAGE_SHIFT,
+				   set_permissions, md);
+}
+
 static int __init arm64_dmi_init(void)
 {
 	/*
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 12e8d2bcb3f9..96e4a2b64cc1 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -28,6 +28,7 @@
 #include <asm/errno.h>
 #include <asm/esr.h>
 #include <asm/irq.h>
+#include <asm/memory.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 
@@ -97,7 +98,14 @@
 	mov	x29, xzr			// fp pointed to user-space
 	.else
 	add	x21, sp, #S_FRAME_SIZE
-	.endif
+	get_thread_info tsk
+	/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
+	ldr	x20, [tsk, #TI_ADDR_LIMIT]
+	str	x20, [sp, #S_ORIG_ADDR_LIMIT]
+	mov	x20, #TASK_SIZE_64
+	str	x20, [tsk, #TI_ADDR_LIMIT]
+	ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+	.endif /* \el == 0 */
 	mrs	x22, elr_el1
 	mrs	x23, spsr_el1
 	stp	lr, x21, [sp, #S_LR]
@@ -128,6 +136,14 @@
 	.endm
 
 	.macro	kernel_exit, el
+	.if	\el != 0
+	/* Restore the task's original addr_limit. */
+	ldr	x20, [sp, #S_ORIG_ADDR_LIMIT]
+	str	x20, [tsk, #TI_ADDR_LIMIT]
+
+	/* No need to restore UAO, it will be restored from SPSR_EL1 */
+	.endif
+
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
 	.if	\el == 0
 	ct_user_enter
@@ -242,6 +258,7 @@ tsk	.req	x28		// current thread_info
 /*
  * Exception vectors.
  */
+	.pushsection ".entry.text", "ax"
 
 	.align	11
 ENTRY(vectors)
@@ -406,7 +423,6 @@ el1_irq:
 	bl	trace_hardirqs_off
 #endif
 
-	get_thread_info tsk
 	irq_handler
 
 #ifdef CONFIG_PREEMPT
@@ -451,7 +467,7 @@ el0_sync:
 	cmp	x24, #ESR_ELx_EC_FP_EXC64	// FP/ASIMD exception
 	b.eq	el0_fpsimd_exc
 	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
-	b.eq	el0_undef
+	b.eq	el0_sys
 	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
 	b.eq	el0_sp_pc
 	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
@@ -532,7 +548,7 @@ el0_ia:
 	enable_dbg_and_irq
 	ct_user_exit
 	mov	x0, x26
-	orr	x1, x25, #1 << 24		// use reserved ISS bit for instruction aborts
+	mov	x1, x25
 	mov	x2, sp
 	bl	do_mem_abort
 	b	ret_to_user
@@ -579,6 +595,16 @@ el0_undef:
 	mov	x0, sp
 	bl	do_undefinstr
 	b	ret_to_user
+el0_sys:
+	/*
+	 * System instructions, for trapped cache maintenance instructions
+	 */
+	enable_dbg_and_irq
+	ct_user_exit
+	mov	x0, x25
+	mov	x1, sp
+	bl	do_sysinstr
+	b	ret_to_user
 el0_dbg:
 	/*
 	 * Debug exception handling
@@ -774,6 +800,8 @@ __ni_sys_trace:
 	bl	do_ni_syscall
 	b	__sys_trace_return
 
+	.popsection				// .entry.text
+
 /*
  * Special system call wrappers.
  */
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index f8df75d740f4..21ab5df9fa76 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -33,6 +33,7 @@
 #include <asm/pgtable.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/sections.h>
+#include <asm/smp.h>
 #include <asm/suspend.h>
 #include <asm/virt.h>
 
@@ -236,6 +237,11 @@ int swsusp_arch_suspend(void)
 	unsigned long flags;
 	struct sleep_stack_data state;
 
+	if (cpus_are_stuck_in_kernel()) {
+		pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n");
+		return -EBUSY;
+	}
+
 	local_dbg_save(flags);
 
 	if (__cpu_suspend_enter(&state)) {
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index ce21aa88263f..26a6bf77d272 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -24,6 +24,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/smp.h>
@@ -127,6 +128,7 @@ static u64 read_wb_reg(int reg, int n)
 
 	return val;
 }
+NOKPROBE_SYMBOL(read_wb_reg);
 
 static void write_wb_reg(int reg, int n, u64 val)
 {
@@ -140,6 +142,7 @@ static void write_wb_reg(int reg, int n, u64 val)
 	}
 	isb();
 }
+NOKPROBE_SYMBOL(write_wb_reg);
 
 /*
  * Convert a breakpoint privilege level to the corresponding exception
@@ -157,6 +160,7 @@ static enum dbg_active_el debug_exception_level(int privilege)
 		return -EINVAL;
 	}
 }
+NOKPROBE_SYMBOL(debug_exception_level);
 
 enum hw_breakpoint_ops {
 	HW_BREAKPOINT_INSTALL,
@@ -575,6 +579,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
 		write_wb_reg(reg, i, ctrl);
 	}
 }
+NOKPROBE_SYMBOL(toggle_bp_registers);
 
 /*
  * Debug exception handlers.
@@ -654,6 +659,7 @@ unlock:
 
 	return 0;
 }
+NOKPROBE_SYMBOL(breakpoint_handler);
 
 static int watchpoint_handler(unsigned long addr, unsigned int esr,
 			      struct pt_regs *regs)
@@ -756,6 +762,7 @@ unlock:
 
 	return 0;
 }
+NOKPROBE_SYMBOL(watchpoint_handler);
 
 /*
  * Handle single-step exception.
@@ -813,6 +820,7 @@ int reinstall_suspended_bps(struct pt_regs *regs)
 
 	return !handled_exception;
 }
+NOKPROBE_SYMBOL(reinstall_suspended_bps);
 
 /*
  * Context-switcher for restoring suspended breakpoints.
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 8727f4490772..d3b5f75e652e 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -71,8 +71,16 @@ el1_sync:
 	msr	vbar_el2, x1
 	b	9f
 
+2:	cmp	x0, #HVC_SOFT_RESTART
+	b.ne	3f
+	mov	x0, x2
+	mov	x2, x4
+	mov	x4, x1
+	mov	x1, x3
+	br	x4				// no return
+
 	/* Someone called kvm_call_hyp() against the hyp-stub... */
-2:	mov     x0, #ARM_EXCEPTION_HYP_GONE
+3:	mov	x0, #ARM_EXCEPTION_HYP_GONE
 
 9:	eret
 ENDPROC(el1_sync)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 368c08290dd8..63f9432d05e8 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -30,6 +30,7 @@
 #include <asm/cacheflush.h>
 #include <asm/debug-monitors.h>
 #include <asm/fixmap.h>
+#include <asm/opcodes.h>
 #include <asm/insn.h>
 
 #define AARCH64_INSN_SF_BIT	BIT(31)
@@ -162,6 +163,32 @@ static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
 		aarch64_insn_is_nop(insn);
 }
 
+bool __kprobes aarch64_insn_uses_literal(u32 insn)
+{
+	/* ldr/ldrsw (literal), prfm */
+
+	return aarch64_insn_is_ldr_lit(insn) ||
+		aarch64_insn_is_ldrsw_lit(insn) ||
+		aarch64_insn_is_adr_adrp(insn) ||
+		aarch64_insn_is_prfm_lit(insn);
+}
+
+bool __kprobes aarch64_insn_is_branch(u32 insn)
+{
+	/* b, bl, cb*, tb*, b.cond, br, blr */
+
+	return aarch64_insn_is_b(insn) ||
+		aarch64_insn_is_bl(insn) ||
+		aarch64_insn_is_cbz(insn) ||
+		aarch64_insn_is_cbnz(insn) ||
+		aarch64_insn_is_tbz(insn) ||
+		aarch64_insn_is_tbnz(insn) ||
+		aarch64_insn_is_ret(insn) ||
+		aarch64_insn_is_br(insn) ||
+		aarch64_insn_is_blr(insn) ||
+		aarch64_insn_is_bcond(insn);
+}
+
 /*
  * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
  * Section B2.6.5 "Concurrent modification and execution of instructions":
@@ -1175,6 +1202,14 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset)
 	BUG();
 }
 
+/*
+ * Extract the Op/CR data from a msr/mrs instruction.
+ */
+u32 aarch64_insn_extract_system_reg(u32 insn)
+{
+	return (insn & 0x1FFFE0) >> 5;
+}
+
 bool aarch32_insn_is_wide(u32 insn)
 {
 	return insn >= 0xe800;
@@ -1200,3 +1235,101 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn)
 {
 	return insn & CRM_MASK;
 }
+
+static bool __kprobes __check_eq(unsigned long pstate)
+{
+	return (pstate & PSR_Z_BIT) != 0;
+}
+
+static bool __kprobes __check_ne(unsigned long pstate)
+{
+	return (pstate & PSR_Z_BIT) == 0;
+}
+
+static bool __kprobes __check_cs(unsigned long pstate)
+{
+	return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_cc(unsigned long pstate)
+{
+	return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_mi(unsigned long pstate)
+{
+	return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_pl(unsigned long pstate)
+{
+	return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_vs(unsigned long pstate)
+{
+	return (pstate & PSR_V_BIT) != 0;
+}
+
+static bool __kprobes __check_vc(unsigned long pstate)
+{
+	return (pstate & PSR_V_BIT) == 0;
+}
+
+static bool __kprobes __check_hi(unsigned long pstate)
+{
+	pstate &= ~(pstate >> 1);	/* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_ls(unsigned long pstate)
+{
+	pstate &= ~(pstate >> 1);	/* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_ge(unsigned long pstate)
+{
+	pstate ^= (pstate << 3);	/* PSR_N_BIT ^= PSR_V_BIT */
+	return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_lt(unsigned long pstate)
+{
+	pstate ^= (pstate << 3);	/* PSR_N_BIT ^= PSR_V_BIT */
+	return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_gt(unsigned long pstate)
+{
+	/*PSR_N_BIT ^= PSR_V_BIT */
+	unsigned long temp = pstate ^ (pstate << 3);
+
+	temp |= (pstate << 1);	/*PSR_N_BIT |= PSR_Z_BIT */
+	return (temp & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_le(unsigned long pstate)
+{
+	/*PSR_N_BIT ^= PSR_V_BIT */
+	unsigned long temp = pstate ^ (pstate << 3);
+
+	temp |= (pstate << 1);	/*PSR_N_BIT |= PSR_Z_BIT */
+	return (temp & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_al(unsigned long pstate)
+{
+	return true;
+}
+
+/*
+ * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
+ * it behaves identically to 0b1110 ("al").
+ */
+pstate_check_t * const aarch32_opcode_cond_checks[16] = {
+	__check_eq, __check_ne, __check_cs, __check_cc,
+	__check_mi, __check_pl, __check_vs, __check_vc,
+	__check_hi, __check_ls, __check_ge, __check_lt,
+	__check_gt, __check_le, __check_al, __check_al
+};
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index b5f063e5eff7..8c57f6496e56 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -22,6 +22,7 @@
 #include <linux/irq.h>
 #include <linux/kdebug.h>
 #include <linux/kgdb.h>
+#include <linux/kprobes.h>
 #include <asm/traps.h>
 
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
@@ -230,6 +231,7 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
 	return 0;
 }
+NOKPROBE_SYMBOL(kgdb_brk_fn)
 
 static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
 {
@@ -238,12 +240,14 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
 
 	return 0;
 }
+NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
 
 static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
 {
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
 	return 0;
 }
+NOKPROBE_SYMBOL(kgdb_step_brk_fn);
 
 static struct break_hook kgdb_brkpt_hook = {
 	.esr_mask	= 0xffffffff,
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 000000000000..bc96c8a7fc79
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,212 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/mmu_context.h>
+
+#include "cpu-reset.h"
+
+/* Global variables for the arm64_relocate_new_kernel routine. */
+extern const unsigned char arm64_relocate_new_kernel[];
+extern const unsigned long arm64_relocate_new_kernel_size;
+
+static unsigned long kimage_start;
+
+/**
+ * kexec_image_info - For debugging output.
+ */
+#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
+static void _kexec_image_info(const char *func, int line,
+	const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_debug("%s:%d:\n", func, line);
+	pr_debug("  kexec kimage info:\n");
+	pr_debug("    type:        %d\n", kimage->type);
+	pr_debug("    start:       %lx\n", kimage->start);
+	pr_debug("    head:        %lx\n", kimage->head);
+	pr_debug("    nr_segments: %lu\n", kimage->nr_segments);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_debug("      segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
+			i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz,
+			kimage->segment[i].memsz,
+			kimage->segment[i].memsz /  PAGE_SIZE);
+	}
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
+ * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
+ */
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	kimage_start = kimage->start;
+
+	kexec_image_info(kimage);
+
+	if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
+		pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
+ */
+static void kexec_list_flush(struct kimage *kimage)
+{
+	kimage_entry_t *entry;
+
+	for (entry = &kimage->head; ; entry++) {
+		unsigned int flag;
+		void *addr;
+
+		/* flush the list entries. */
+		__flush_dcache_area(entry, sizeof(kimage_entry_t));
+
+		flag = *entry & IND_FLAGS;
+		if (flag == IND_DONE)
+			break;
+
+		addr = phys_to_virt(*entry & PAGE_MASK);
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			/* Set entry point just before the new list page. */
+			entry = (kimage_entry_t *)addr - 1;
+			break;
+		case IND_SOURCE:
+			/* flush the source pages. */
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DESTINATION:
+			break;
+		default:
+			BUG();
+		}
+	}
+}
+
+/**
+ * kexec_segment_flush - Helper to flush the kimage segments to PoC.
+ */
+static void kexec_segment_flush(const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_debug("%s:\n", __func__);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_debug("  segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
+			i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz,
+			kimage->segment[i].memsz,
+			kimage->segment[i].memsz /  PAGE_SIZE);
+
+		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
+			kimage->segment[i].memsz);
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+void machine_kexec(struct kimage *kimage)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+
+	/*
+	 * New cpus may have become stuck_in_kernel after we loaded the image.
+	 */
+	BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1));
+
+	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
+	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+	kexec_image_info(kimage);
+
+	pr_debug("%s:%d: control_code_page:        %p\n", __func__, __LINE__,
+		kimage->control_code_page);
+	pr_debug("%s:%d: reboot_code_buffer_phys:  %pa\n", __func__, __LINE__,
+		&reboot_code_buffer_phys);
+	pr_debug("%s:%d: reboot_code_buffer:       %p\n", __func__, __LINE__,
+		reboot_code_buffer);
+	pr_debug("%s:%d: relocate_new_kernel:      %p\n", __func__, __LINE__,
+		arm64_relocate_new_kernel);
+	pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
+		__func__, __LINE__, arm64_relocate_new_kernel_size,
+		arm64_relocate_new_kernel_size);
+
+	/*
+	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+	flush_icache_range((uintptr_t)reboot_code_buffer,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the kimage list and its buffers. */
+	kexec_list_flush(kimage);
+
+	/* Flush the new image if already in place. */
+	if (kimage->head & IND_DONE)
+		kexec_segment_flush(kimage);
+
+	pr_info("Bye!\n");
+
+	/* Disable all DAIF exceptions. */
+	asm volatile ("msr daifset, #0xf" : : : "memory");
+
+	/*
+	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
+	 * transfer control to the reboot_code_buffer which contains a copy of
+	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
+	 * uses physical addressing to relocate the new image to its final
+	 * position and transfers control to the image entry point when the
+	 * relocation is complete.
+	 */
+
+	cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
+		kimage_start, 0);
+
+	BUG(); /* Should never get here. */
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
new file mode 100644
index 000000000000..ce06312e3d34
--- /dev/null
+++ b/arch/arm64/kernel/probes/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_KPROBES)		+= kprobes.o decode-insn.o	\
+				   kprobes_trampoline.o		\
+				   simulate-insn.o
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
new file mode 100644
index 000000000000..37e47a9d617e
--- /dev/null
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -0,0 +1,174 @@
+/*
+ * arch/arm64/kernel/probes/decode-insn.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <asm/kprobes.h>
+#include <asm/insn.h>
+#include <asm/sections.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+static bool __kprobes aarch64_insn_is_steppable(u32 insn)
+{
+	/*
+	 * Branch instructions will write a new value into the PC which is
+	 * likely to be relative to the XOL address and therefore invalid.
+	 * Deliberate generation of an exception during stepping is also not
+	 * currently safe. Lastly, MSR instructions can do any number of nasty
+	 * things we can't handle during single-stepping.
+	 */
+	if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) {
+		if (aarch64_insn_is_branch(insn) ||
+		    aarch64_insn_is_msr_imm(insn) ||
+		    aarch64_insn_is_msr_reg(insn) ||
+		    aarch64_insn_is_exception(insn) ||
+		    aarch64_insn_is_eret(insn))
+			return false;
+
+		/*
+		 * The MRS instruction may not return a correct value when
+		 * executing in the single-stepping environment. We do make one
+		 * exception, for reading the DAIF bits.
+		 */
+		if (aarch64_insn_is_mrs(insn))
+			return aarch64_insn_extract_system_reg(insn)
+			     != AARCH64_INSN_SPCLREG_DAIF;
+
+		/*
+		 * The HINT instruction is is problematic when single-stepping,
+		 * except for the NOP case.
+		 */
+		if (aarch64_insn_is_hint(insn))
+			return aarch64_insn_is_nop(insn);
+
+		return true;
+	}
+
+	/*
+	 * Instructions which load PC relative literals are not going to work
+	 * when executed from an XOL slot. Instructions doing an exclusive
+	 * load/store are not going to complete successfully when single-step
+	 * exception handling happens in the middle of the sequence.
+	 */
+	if (aarch64_insn_uses_literal(insn) ||
+	    aarch64_insn_is_exclusive(insn))
+		return false;
+
+	return true;
+}
+
+/* Return:
+ *   INSN_REJECTED     If instruction is one not allowed to kprobe,
+ *   INSN_GOOD         If instruction is supported and uses instruction slot,
+ *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ */
+static enum kprobe_insn __kprobes
+arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	/*
+	 * Instructions reading or modifying the PC won't work from the XOL
+	 * slot.
+	 */
+	if (aarch64_insn_is_steppable(insn))
+		return INSN_GOOD;
+
+	if (aarch64_insn_is_bcond(insn)) {
+		asi->handler = simulate_b_cond;
+	} else if (aarch64_insn_is_cbz(insn) ||
+	    aarch64_insn_is_cbnz(insn)) {
+		asi->handler = simulate_cbz_cbnz;
+	} else if (aarch64_insn_is_tbz(insn) ||
+	    aarch64_insn_is_tbnz(insn)) {
+		asi->handler = simulate_tbz_tbnz;
+	} else if (aarch64_insn_is_adr_adrp(insn)) {
+		asi->handler = simulate_adr_adrp;
+	} else if (aarch64_insn_is_b(insn) ||
+	    aarch64_insn_is_bl(insn)) {
+		asi->handler = simulate_b_bl;
+	} else if (aarch64_insn_is_br(insn) ||
+	    aarch64_insn_is_blr(insn) ||
+	    aarch64_insn_is_ret(insn)) {
+		asi->handler = simulate_br_blr_ret;
+	} else if (aarch64_insn_is_ldr_lit(insn)) {
+		asi->handler = simulate_ldr_literal;
+	} else if (aarch64_insn_is_ldrsw_lit(insn)) {
+		asi->handler = simulate_ldrsw_literal;
+	} else {
+		/*
+		 * Instruction cannot be stepped out-of-line and we don't
+		 * (yet) simulate it.
+		 */
+		return INSN_REJECTED;
+	}
+
+	return INSN_GOOD_NO_SLOT;
+}
+
+static bool __kprobes
+is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
+{
+	while (scan_start > scan_end) {
+		/*
+		 * atomic region starts from exclusive load and ends with
+		 * exclusive store.
+		 */
+		if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start)))
+			return false;
+		else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start)))
+			return true;
+		scan_start--;
+	}
+
+	return false;
+}
+
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
+{
+	enum kprobe_insn decoded;
+	kprobe_opcode_t insn = le32_to_cpu(*addr);
+	kprobe_opcode_t *scan_start = addr - 1;
+	kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+	struct module *mod;
+#endif
+
+	if (addr >= (kprobe_opcode_t *)_text &&
+	    scan_end < (kprobe_opcode_t *)_text)
+		scan_end = (kprobe_opcode_t *)_text;
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+	else {
+		preempt_disable();
+		mod = __module_address((unsigned long)addr);
+		if (mod && within_module_init((unsigned long)addr, mod) &&
+			!within_module_init((unsigned long)scan_end, mod))
+			scan_end = (kprobe_opcode_t *)mod->init_layout.base;
+		else if (mod && within_module_core((unsigned long)addr, mod) &&
+			!within_module_core((unsigned long)scan_end, mod))
+			scan_end = (kprobe_opcode_t *)mod->core_layout.base;
+		preempt_enable();
+	}
+#endif
+	decoded = arm_probe_decode_insn(insn, asi);
+
+	if (decoded == INSN_REJECTED ||
+			is_probed_address_atomic(scan_start, scan_end))
+		return INSN_REJECTED;
+
+	return decoded;
+}
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
new file mode 100644
index 000000000000..d438289646a6
--- /dev/null
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -0,0 +1,35 @@
+/*
+ * arch/arm64/kernel/probes/decode-insn.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_ARM64_H
+#define _ARM_KERNEL_KPROBES_ARM64_H
+
+/*
+ * ARM strongly recommends a limit of 128 bytes between LoadExcl and
+ * StoreExcl instructions in a single thread of execution. So keep the
+ * max atomic context size as 32.
+ */
+#define MAX_ATOMIC_CONTEXT_SIZE	(128 / sizeof(kprobe_opcode_t))
+
+enum kprobe_insn {
+	INSN_REJECTED,
+	INSN_GOOD_NO_SLOT,
+	INSN_GOOD,
+};
+
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
+
+#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
new file mode 100644
index 000000000000..bf9768588288
--- /dev/null
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -0,0 +1,686 @@
+/*
+ * arch/arm64/kernel/probes/kprobes.c
+ *
+ * Kprobes support for ARM64
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <linux/stringify.h>
+#include <asm/traps.h>
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
+#include <asm/system_misc.h>
+#include <asm/insn.h>
+#include <asm/uaccess.h>
+#include <asm/irq.h>
+#include <asm-generic/sections.h>
+
+#include "decode-insn.h"
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
+
+static inline unsigned long min_stack_size(unsigned long addr)
+{
+	unsigned long size;
+
+	if (on_irq_stack(addr, raw_smp_processor_id()))
+		size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr;
+	else
+		size = (unsigned long)current_thread_info() + THREAD_START_SP - addr;
+
+	return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack));
+}
+
+static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
+{
+	/* prepare insn slot */
+	p->ainsn.insn[0] = cpu_to_le32(p->opcode);
+
+	flush_icache_range((uintptr_t) (p->ainsn.insn),
+			   (uintptr_t) (p->ainsn.insn) +
+			   MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+
+	/*
+	 * Needs restoring of return address after stepping xol.
+	 */
+	p->ainsn.restore = (unsigned long) p->addr +
+	  sizeof(kprobe_opcode_t);
+}
+
+static void __kprobes arch_prepare_simulate(struct kprobe *p)
+{
+	/* This instructions is not executed xol. No need to adjust the PC */
+	p->ainsn.restore = 0;
+}
+
+static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (p->ainsn.handler)
+		p->ainsn.handler((u32)p->opcode, (long)p->addr, regs);
+
+	/* single step simulated, now go for post processing */
+	post_kprobe_handler(kcb, regs);
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	unsigned long probe_addr = (unsigned long)p->addr;
+	extern char __start_rodata[];
+	extern char __end_rodata[];
+
+	if (probe_addr & 0x3)
+		return -EINVAL;
+
+	/* copy instruction */
+	p->opcode = le32_to_cpu(*p->addr);
+
+	if (in_exception_text(probe_addr))
+		return -EINVAL;
+	if (probe_addr >= (unsigned long) __start_rodata &&
+	    probe_addr <= (unsigned long) __end_rodata)
+		return -EINVAL;
+
+	/* decode instruction */
+	switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) {
+	case INSN_REJECTED:	/* insn not supported */
+		return -EINVAL;
+
+	case INSN_GOOD_NO_SLOT:	/* insn need simulation */
+		p->ainsn.insn = NULL;
+		break;
+
+	case INSN_GOOD:	/* instruction uses slot */
+		p->ainsn.insn = get_insn_slot();
+		if (!p->ainsn.insn)
+			return -ENOMEM;
+		break;
+	};
+
+	/* prepare the instruction */
+	if (p->ainsn.insn)
+		arch_prepare_ss_slot(p);
+	else
+		arch_prepare_simulate(p);
+
+	return 0;
+}
+
+static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+{
+	void *addrs[1];
+	u32 insns[1];
+
+	addrs[0] = (void *)addr;
+	insns[0] = (u32)opcode;
+
+	return aarch64_insn_patch_text(addrs, insns, 1);
+}
+
+/* arm kprobe: install breakpoint in text */
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	patch_text(p->addr, BRK64_OPCODE_KPROBES);
+}
+
+/* disarm kprobe: remove breakpoint from text */
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	patch_text(p->addr, p->opcode);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+	__this_cpu_write(current_kprobe, p);
+}
+
+/*
+ * The D-flag (Debug mask) is set (masked) upon debug exception entry.
+ * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive
+ * probe i.e. when probe hit from kprobe handler context upon
+ * executing the pre/post handlers. In this case we return with
+ * D-flag clear so that single-stepping can be carried-out.
+ *
+ * Leave D-flag set in all other cases.
+ */
+static void __kprobes
+spsr_set_debug_flag(struct pt_regs *regs, int mask)
+{
+	unsigned long spsr = regs->pstate;
+
+	if (mask)
+		spsr |= PSR_D_BIT;
+	else
+		spsr &= ~PSR_D_BIT;
+
+	regs->pstate = spsr;
+}
+
+/*
+ * Interrupts need to be disabled before single-step mode is set, and not
+ * reenabled until after single-step mode ends.
+ * Without disabling interrupt on local CPU, there is a chance of
+ * interrupt occurrence in the period of exception return and  start of
+ * out-of-line single-step, that result in wrongly single stepping
+ * into the interrupt handler.
+ */
+static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
+						struct pt_regs *regs)
+{
+	kcb->saved_irqflag = regs->pstate;
+	regs->pstate |= PSR_I_BIT;
+}
+
+static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
+						struct pt_regs *regs)
+{
+	if (kcb->saved_irqflag & PSR_I_BIT)
+		regs->pstate |= PSR_I_BIT;
+	else
+		regs->pstate &= ~PSR_I_BIT;
+}
+
+static void __kprobes
+set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr)
+{
+	kcb->ss_ctx.ss_pending = true;
+	kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t);
+}
+
+static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
+{
+	kcb->ss_ctx.ss_pending = false;
+	kcb->ss_ctx.match_addr = 0;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p,
+				       struct pt_regs *regs,
+				       struct kprobe_ctlblk *kcb, int reenter)
+{
+	unsigned long slot;
+
+	if (reenter) {
+		save_previous_kprobe(kcb);
+		set_current_kprobe(p);
+		kcb->kprobe_status = KPROBE_REENTER;
+	} else {
+		kcb->kprobe_status = KPROBE_HIT_SS;
+	}
+
+
+	if (p->ainsn.insn) {
+		/* prepare for single stepping */
+		slot = (unsigned long)p->ainsn.insn;
+
+		set_ss_context(kcb, slot);	/* mark pending ss */
+
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			spsr_set_debug_flag(regs, 0);
+		else
+			WARN_ON(regs->pstate & PSR_D_BIT);
+
+		/* IRQs and single stepping do not mix well. */
+		kprobes_save_local_irqflag(kcb, regs);
+		kernel_enable_single_step(regs);
+		instruction_pointer_set(regs, slot);
+	} else {
+		/* insn simulation */
+		arch_simulate_insn(p, regs);
+	}
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+				    struct pt_regs *regs,
+				    struct kprobe_ctlblk *kcb)
+{
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SSDONE:
+	case KPROBE_HIT_ACTIVE:
+		kprobes_inc_nmissed_count(p);
+		setup_singlestep(p, regs, kcb, 1);
+		break;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr);
+		dump_kprobe(p);
+		BUG();
+		break;
+	default:
+		WARN_ON(1);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+
+	if (!cur)
+		return;
+
+	/* return addr restore if non-branching insn */
+	if (cur->ainsn.restore != 0)
+		instruction_pointer_set(regs, cur->ainsn.restore);
+
+	/* restore back original saved kprobe variables and continue */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		return;
+	}
+	/* call post handler */
+	kcb->kprobe_status = KPROBE_HIT_SSDONE;
+	if (cur->post_handler)	{
+		/* post_handler can hit breakpoint and single step
+		 * again, so we enable D-flag for recursive exception.
+		 */
+		cur->post_handler(cur, regs, 0);
+	}
+
+	reset_current_kprobe();
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the ip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		instruction_pointer_set(regs, (unsigned long) cur->addr);
+		if (!instruction_pointer(regs))
+			BUG();
+
+		kernel_disable_single_step();
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			spsr_set_debug_flag(regs, 1);
+
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accounting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if (fixup_exception(regs))
+			return 1;
+	}
+	return 0;
+}
+
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	return NOTIFY_DONE;
+}
+
+static void __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p, *cur_kprobe;
+	struct kprobe_ctlblk *kcb;
+	unsigned long addr = instruction_pointer(regs);
+
+	kcb = get_kprobe_ctlblk();
+	cur_kprobe = kprobe_running();
+
+	p = get_kprobe((kprobe_opcode_t *) addr);
+
+	if (p) {
+		if (cur_kprobe) {
+			if (reenter_kprobe(p, regs, kcb))
+				return;
+		} else {
+			/* Probe hit */
+			set_current_kprobe(p);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+			/*
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with normal processing.  If we have a
+			 * pre-handler and it returned non-zero, it prepped
+			 * for calling the break_handler below on re-entry,
+			 * so get out doing nothing more here.
+			 *
+			 * pre_handler can hit a breakpoint and can step thru
+			 * before return, keep PSTATE D-flag enabled until
+			 * pre_handler return back.
+			 */
+			if (!p->pre_handler || !p->pre_handler(p, regs)) {
+				setup_singlestep(p, regs, kcb, 0);
+				return;
+			}
+		}
+	} else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) ==
+	    BRK64_OPCODE_KPROBES) && cur_kprobe) {
+		/* We probably hit a jprobe.  Call its break handler. */
+		if (cur_kprobe->break_handler  &&
+		     cur_kprobe->break_handler(cur_kprobe, regs)) {
+			setup_singlestep(cur_kprobe, regs, kcb, 0);
+			return;
+		}
+	}
+	/*
+	 * The breakpoint instruction was removed right
+	 * after we hit it.  Another cpu has removed
+	 * either a probepoint or a debugger breakpoint
+	 * at this address.  In either case, no further
+	 * handling of this interrupt is appropriate.
+	 * Return back to original instruction, and continue.
+	 */
+}
+
+static int __kprobes
+kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
+{
+	if ((kcb->ss_ctx.ss_pending)
+	    && (kcb->ss_ctx.match_addr == addr)) {
+		clear_ss_context(kcb);	/* clear pending ss */
+		return DBG_HOOK_HANDLED;
+	}
+	/* not ours, kprobes should ignore it */
+	return DBG_HOOK_ERROR;
+}
+
+int __kprobes
+kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	int retval;
+
+	/* return error if this is not our step */
+	retval = kprobe_ss_hit(kcb, instruction_pointer(regs));
+
+	if (retval == DBG_HOOK_HANDLED) {
+		kprobes_restore_local_irqflag(kcb, regs);
+		kernel_disable_single_step();
+
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			spsr_set_debug_flag(regs, 1);
+
+		post_kprobe_handler(kcb, regs);
+	}
+
+	return retval;
+}
+
+int __kprobes
+kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
+{
+	kprobe_handler(regs);
+	return DBG_HOOK_HANDLED;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	long stack_ptr = kernel_stack_pointer(regs);
+
+	kcb->jprobe_saved_regs = *regs;
+	/*
+	 * As Linus pointed out, gcc assumes that the callee
+	 * owns the argument space and could overwrite it, e.g.
+	 * tailcall optimization. So, to be absolutely safe
+	 * we also save and restore enough stack bytes to cover
+	 * the argument area.
+	 */
+	kasan_disable_current();
+	memcpy(kcb->jprobes_stack, (void *)stack_ptr,
+	       min_stack_size(stack_ptr));
+	kasan_enable_current();
+
+	instruction_pointer_set(regs, (unsigned long) jp->entry);
+	preempt_disable();
+	pause_graph_tracing();
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	/*
+	 * Jprobe handler return by entering break exception,
+	 * encoded same as kprobe, but with following conditions
+	 * -a special PC to identify it from the other kprobes.
+	 * -restore stack addr to original saved pt_regs
+	 */
+	asm volatile("				mov sp, %0	\n"
+		     "jprobe_return_break:	brk %1		\n"
+		     :
+		     : "r" (kcb->jprobe_saved_regs.sp),
+		       "I" (BRK64_ESR_KPROBES)
+		     : "memory");
+
+	unreachable();
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	long stack_addr = kcb->jprobe_saved_regs.sp;
+	long orig_sp = kernel_stack_pointer(regs);
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	extern const char jprobe_return_break[];
+
+	if (instruction_pointer(regs) != (u64) jprobe_return_break)
+		return 0;
+
+	if (orig_sp != stack_addr) {
+		struct pt_regs *saved_regs =
+		    (struct pt_regs *)kcb->jprobe_saved_regs.sp;
+		pr_err("current sp %lx does not match saved sp %lx\n",
+		       orig_sp, stack_addr);
+		pr_err("Saved registers for jprobe %p\n", jp);
+		show_regs(saved_regs);
+		pr_err("Current registers\n");
+		show_regs(regs);
+		BUG();
+	}
+	unpause_graph_tracing();
+	*regs = kcb->jprobe_saved_regs;
+	kasan_disable_current();
+	memcpy((void *)stack_addr, kcb->jprobes_stack,
+	       min_stack_size(stack_addr));
+	kasan_enable_current();
+	preempt_enable_no_resched();
+	return 1;
+}
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+	extern char __idmap_text_start[], __idmap_text_end[];
+	extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+
+	if ((addr >= (unsigned long)__kprobes_text_start &&
+	    addr < (unsigned long)__kprobes_text_end) ||
+	    (addr >= (unsigned long)__entry_text_start &&
+	    addr < (unsigned long)__entry_text_end) ||
+	    (addr >= (unsigned long)__idmap_text_start &&
+	    addr < (unsigned long)__idmap_text_end) ||
+	    !!search_exception_tables(addr))
+		return true;
+
+	if (!is_kernel_in_hyp_mode()) {
+		if ((addr >= (unsigned long)__hyp_text_start &&
+		    addr < (unsigned long)__hyp_text_end) ||
+		    (addr >= (unsigned long)__hyp_idmap_text_start &&
+		    addr < (unsigned long)__hyp_idmap_text_end))
+			return true;
+	}
+
+	return false;
+}
+
+void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head, empty_rp;
+	struct hlist_node *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address =
+		(unsigned long)&kretprobe_trampoline;
+	kprobe_opcode_t *correct_ret_addr = NULL;
+
+	INIT_HLIST_HEAD(&empty_rp);
+	kretprobe_hash_lock(current, &head, &flags);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because multiple functions in the call path have
+	 * return probes installed on them, and/or more than one
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always pushed into the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *	 function, the (chronologically) first instance's ret_addr
+	 *	 will be the real return address, and all the rest will
+	 *	 point to kretprobe_trampoline.
+	 */
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+	correct_ret_addr = ri->ret_addr;
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		if (ri->rp && ri->rp->handler) {
+			__this_cpu_write(current_kprobe, &ri->rp->kp);
+			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+			ri->ret_addr = correct_ret_addr;
+			ri->rp->handler(ri, regs);
+			__this_cpu_write(current_kprobe, NULL);
+		}
+
+		recycle_rp_inst(ri, &empty_rp);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_hash_unlock(current, &flags);
+
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+		hlist_del(&ri->hlist);
+		kfree(ri);
+	}
+	return (void *)orig_ret_address;
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+				      struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *)regs->regs[30];
+
+	/* replace return addr (x30) with trampoline */
+	regs->regs[30] = (long)&kretprobe_trampoline;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	return 0;
+}
+
+int __init arch_init_kprobes(void)
+{
+	return 0;
+}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
new file mode 100644
index 000000000000..5d6e7f14638c
--- /dev/null
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -0,0 +1,81 @@
+/*
+ * trampoline entry and return code for kretprobes.
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+
+	.text
+
+	.macro	save_all_base_regs
+	stp x0, x1, [sp, #S_X0]
+	stp x2, x3, [sp, #S_X2]
+	stp x4, x5, [sp, #S_X4]
+	stp x6, x7, [sp, #S_X6]
+	stp x8, x9, [sp, #S_X8]
+	stp x10, x11, [sp, #S_X10]
+	stp x12, x13, [sp, #S_X12]
+	stp x14, x15, [sp, #S_X14]
+	stp x16, x17, [sp, #S_X16]
+	stp x18, x19, [sp, #S_X18]
+	stp x20, x21, [sp, #S_X20]
+	stp x22, x23, [sp, #S_X22]
+	stp x24, x25, [sp, #S_X24]
+	stp x26, x27, [sp, #S_X26]
+	stp x28, x29, [sp, #S_X28]
+	add x0, sp, #S_FRAME_SIZE
+	stp lr, x0, [sp, #S_LR]
+	/*
+	 * Construct a useful saved PSTATE
+	 */
+	mrs x0, nzcv
+	mrs x1, daif
+	orr x0, x0, x1
+	mrs x1, CurrentEL
+	orr x0, x0, x1
+	mrs x1, SPSel
+	orr x0, x0, x1
+	stp xzr, x0, [sp, #S_PC]
+	.endm
+
+	.macro	restore_all_base_regs
+	ldr x0, [sp, #S_PSTATE]
+	and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
+	msr nzcv, x0
+	ldp x0, x1, [sp, #S_X0]
+	ldp x2, x3, [sp, #S_X2]
+	ldp x4, x5, [sp, #S_X4]
+	ldp x6, x7, [sp, #S_X6]
+	ldp x8, x9, [sp, #S_X8]
+	ldp x10, x11, [sp, #S_X10]
+	ldp x12, x13, [sp, #S_X12]
+	ldp x14, x15, [sp, #S_X14]
+	ldp x16, x17, [sp, #S_X16]
+	ldp x18, x19, [sp, #S_X18]
+	ldp x20, x21, [sp, #S_X20]
+	ldp x22, x23, [sp, #S_X22]
+	ldp x24, x25, [sp, #S_X24]
+	ldp x26, x27, [sp, #S_X26]
+	ldp x28, x29, [sp, #S_X28]
+	.endm
+
+ENTRY(kretprobe_trampoline)
+	sub sp, sp, #S_FRAME_SIZE
+
+	save_all_base_regs
+
+	mov x0, sp
+	bl trampoline_probe_handler
+	/*
+	 * Replace trampoline address in lr with actual orig_ret_addr return
+	 * address.
+	 */
+	mov lr, x0
+
+	restore_all_base_regs
+
+	add sp, sp, #S_FRAME_SIZE
+	ret
+
+ENDPROC(kretprobe_trampoline)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
new file mode 100644
index 000000000000..8977ce9d009d
--- /dev/null
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -0,0 +1,217 @@
+/*
+ * arch/arm64/kernel/probes/simulate-insn.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "simulate-insn.h"
+
+#define sign_extend(x, signbit)		\
+	((x) | (0 - ((x) & (1 << (signbit)))))
+
+#define bbl_displacement(insn)		\
+	sign_extend(((insn) & 0x3ffffff) << 2, 27)
+
+#define bcond_displacement(insn)	\
+	sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+#define cbz_displacement(insn)	\
+	sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+#define tbz_displacement(insn)	\
+	sign_extend(((insn >> 5) & 0x3fff) << 2, 15)
+
+#define ldr_displacement(insn)	\
+	sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val)
+{
+	if (reg < 31)
+		regs->regs[reg] = val;
+}
+
+static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val)
+{
+	if (reg < 31)
+		regs->regs[reg] = lower_32_bits(val);
+}
+
+static inline u64 get_x_reg(struct pt_regs *regs, int reg)
+{
+	if (reg < 31)
+		return regs->regs[reg];
+	else
+		return 0;
+}
+
+static inline u32 get_w_reg(struct pt_regs *regs, int reg)
+{
+	if (reg < 31)
+		return lower_32_bits(regs->regs[reg]);
+	else
+		return 0;
+}
+
+static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
+{
+	int xn = opcode & 0x1f;
+
+	return (opcode & (1 << 31)) ?
+	    (get_x_reg(regs, xn) == 0) : (get_w_reg(regs, xn) == 0);
+}
+
+static bool __kprobes check_cbnz(u32 opcode, struct pt_regs *regs)
+{
+	int xn = opcode & 0x1f;
+
+	return (opcode & (1 << 31)) ?
+	    (get_x_reg(regs, xn) != 0) : (get_w_reg(regs, xn) != 0);
+}
+
+static bool __kprobes check_tbz(u32 opcode, struct pt_regs *regs)
+{
+	int xn = opcode & 0x1f;
+	int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f);
+
+	return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) == 0;
+}
+
+static bool __kprobes check_tbnz(u32 opcode, struct pt_regs *regs)
+{
+	int xn = opcode & 0x1f;
+	int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f);
+
+	return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) != 0;
+}
+
+/*
+ * instruction simulation functions
+ */
+void __kprobes
+simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs)
+{
+	long imm, xn, val;
+
+	xn = opcode & 0x1f;
+	imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3);
+	imm = sign_extend(imm, 20);
+	if (opcode & 0x80000000)
+		val = (imm<<12) + (addr & 0xfffffffffffff000);
+	else
+		val = imm + addr;
+
+	set_x_reg(regs, xn, val);
+
+	instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
+
+void __kprobes
+simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
+{
+	int disp = bbl_displacement(opcode);
+
+	/* Link register is x30 */
+	if (opcode & (1 << 31))
+		set_x_reg(regs, 30, addr + 4);
+
+	instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs)
+{
+	int disp = 4;
+
+	if (aarch32_opcode_cond_checks[opcode & 0xf](regs->pstate & 0xffffffff))
+		disp = bcond_displacement(opcode);
+
+	instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs)
+{
+	int xn = (opcode >> 5) & 0x1f;
+
+	/* update pc first in case we're doing a "blr lr" */
+	instruction_pointer_set(regs, get_x_reg(regs, xn));
+
+	/* Link register is x30 */
+	if (((opcode >> 21) & 0x3) == 1)
+		set_x_reg(regs, 30, addr + 4);
+}
+
+void __kprobes
+simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs)
+{
+	int disp = 4;
+
+	if (opcode & (1 << 24)) {
+		if (check_cbnz(opcode, regs))
+			disp = cbz_displacement(opcode);
+	} else {
+		if (check_cbz(opcode, regs))
+			disp = cbz_displacement(opcode);
+	}
+	instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs)
+{
+	int disp = 4;
+
+	if (opcode & (1 << 24)) {
+		if (check_tbnz(opcode, regs))
+			disp = tbz_displacement(opcode);
+	} else {
+		if (check_tbz(opcode, regs))
+			disp = tbz_displacement(opcode);
+	}
+	instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
+{
+	u64 *load_addr;
+	int xn = opcode & 0x1f;
+	int disp;
+
+	disp = ldr_displacement(opcode);
+	load_addr = (u64 *) (addr + disp);
+
+	if (opcode & (1 << 30))	/* x0-x30 */
+		set_x_reg(regs, xn, *load_addr);
+	else			/* w0-w30 */
+		set_w_reg(regs, xn, *load_addr);
+
+	instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
+
+void __kprobes
+simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs)
+{
+	s32 *load_addr;
+	int xn = opcode & 0x1f;
+	int disp;
+
+	disp = ldr_displacement(opcode);
+	load_addr = (s32 *) (addr + disp);
+
+	set_x_reg(regs, xn, *load_addr);
+
+	instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h
new file mode 100644
index 000000000000..050bde683c2d
--- /dev/null
+++ b/arch/arm64/kernel/probes/simulate-insn.h
@@ -0,0 +1,28 @@
+/*
+ * arch/arm64/kernel/probes/simulate-insn.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_SIMULATE_INSN_H
+#define _ARM_KERNEL_KPROBES_SIMULATE_INSN_H
+
+void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs);
+
+#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 3f6cd5c5234f..030c1d5aa46d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -48,6 +48,107 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+#define GPR_OFFSET_NAME(r) \
+	{.name = "x" #r, .offset = offsetof(struct pt_regs, regs[r])}
+
+static const struct pt_regs_offset regoffset_table[] = {
+	GPR_OFFSET_NAME(0),
+	GPR_OFFSET_NAME(1),
+	GPR_OFFSET_NAME(2),
+	GPR_OFFSET_NAME(3),
+	GPR_OFFSET_NAME(4),
+	GPR_OFFSET_NAME(5),
+	GPR_OFFSET_NAME(6),
+	GPR_OFFSET_NAME(7),
+	GPR_OFFSET_NAME(8),
+	GPR_OFFSET_NAME(9),
+	GPR_OFFSET_NAME(10),
+	GPR_OFFSET_NAME(11),
+	GPR_OFFSET_NAME(12),
+	GPR_OFFSET_NAME(13),
+	GPR_OFFSET_NAME(14),
+	GPR_OFFSET_NAME(15),
+	GPR_OFFSET_NAME(16),
+	GPR_OFFSET_NAME(17),
+	GPR_OFFSET_NAME(18),
+	GPR_OFFSET_NAME(19),
+	GPR_OFFSET_NAME(20),
+	GPR_OFFSET_NAME(21),
+	GPR_OFFSET_NAME(22),
+	GPR_OFFSET_NAME(23),
+	GPR_OFFSET_NAME(24),
+	GPR_OFFSET_NAME(25),
+	GPR_OFFSET_NAME(26),
+	GPR_OFFSET_NAME(27),
+	GPR_OFFSET_NAME(28),
+	GPR_OFFSET_NAME(29),
+	GPR_OFFSET_NAME(30),
+	{.name = "lr", .offset = offsetof(struct pt_regs, regs[30])},
+	REG_OFFSET_NAME(sp),
+	REG_OFFSET_NAME(pc),
+	REG_OFFSET_NAME(pstate),
+	REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:	the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @addr:      address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+	return ((addr & ~(THREAD_SIZE - 1))  ==
+		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
+		on_irq_stack(addr, raw_smp_processor_id());
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @n:		stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+	unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+	addr += n;
+	if (regs_within_kernel_stack(regs, (unsigned long)addr))
+		return *addr;
+	else
+		return 0;
+}
+
 /*
  * TODO: does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..51b73cdde287
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,130 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/page.h>
+#include <asm/sysreg.h>
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new image to its final location.  To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * control_code_page, a special page which has been set up to be preserved
+ * during the copy operation.
+ */
+ENTRY(arm64_relocate_new_kernel)
+
+	/* Setup the list loop variables. */
+	mov	x17, x1				/* x17 = kimage_start */
+	mov	x16, x0				/* x16 = kimage_head */
+	dcache_line_size x15, x0		/* x15 = dcache line size */
+	mov	x14, xzr			/* x14 = entry ptr */
+	mov	x13, xzr			/* x13 = copy dest */
+
+	/* Clear the sctlr_el2 flags. */
+	mrs	x0, CurrentEL
+	cmp	x0, #CurrentEL_EL2
+	b.ne	1f
+	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_ELx_FLAGS
+	bic	x0, x0, x1
+	msr	sctlr_el2, x0
+	isb
+1:
+
+	/* Check if the new image needs relocation. */
+	tbnz	x16, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x12, x16, PAGE_MASK		/* x12 = addr */
+
+	/* Test the entry flags. */
+.Ltest_source:
+	tbz	x16, IND_SOURCE_BIT, .Ltest_indirection
+
+	/* Invalidate dest page to PoC. */
+	mov     x0, x13
+	add     x20, x0, #PAGE_SIZE
+	sub     x1, x15, #1
+	bic     x0, x0, x1
+2:	dc      ivac, x0
+	add     x0, x0, x15
+	cmp     x0, x20
+	b.lo    2b
+	dsb     sy
+
+	mov x20, x13
+	mov x21, x12
+	copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
+
+	/* dest += PAGE_SIZE */
+	add	x13, x13, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x16, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+	mov	x14, x12
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x16, IND_DESTINATION_BIT, .Lnext
+
+	/* dest = addr */
+	mov	x13, x12
+
+.Lnext:
+	/* entry = *ptr++ */
+	ldr	x16, [x14], #8
+
+	/* while (!(entry & DONE)) */
+	tbz	x16, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	/* wait for writes from copy_page to finish */
+	dsb	nsh
+	ic	iallu
+	dsb	nsh
+	isb
+
+	/* Start new image. */
+	mov	x0, xzr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x17
+
+ENDPROC(arm64_relocate_new_kernel)
+
+.ltorg
+
+.align 3	/* To keep the 64-bit values below naturally aligned. */
+
+.Lcopy_end:
+.org	KEXEC_CONTROL_PAGE_SIZE
+
+/*
+ * arm64_relocate_new_kernel_size - Number of bytes to copy to the
+ * control_code_page.
+ */
+.globl arm64_relocate_new_kernel_size
+arm64_relocate_new_kernel_size:
+	.quad	.Lcopy_end - arm64_relocate_new_kernel
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 3279defabaa2..2981f1bdd073 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -202,7 +202,7 @@ static void __init request_standard_resources(void)
 	struct resource *res;
 
 	kernel_code.start   = virt_to_phys(_text);
-	kernel_code.end     = virt_to_phys(_etext - 1);
+	kernel_code.end     = virt_to_phys(__init_begin - 1);
 	kernel_data.start   = virt_to_phys(_sdata);
 	kernel_data.end     = virt_to_phys(_end - 1);
 
@@ -257,14 +257,17 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	cpu_uninstall_idmap();
 
+	xen_early_init();
 	efi_init();
 	arm64_memblock_init();
 
+	paging_init();
+
+	acpi_table_upgrade();
+
 	/* Parse the ACPI tables for possible boot-time configuration */
 	acpi_boot_table_init();
 
-	paging_init();
-
 	if (acpi_disabled)
 		unflatten_device_tree();
 
@@ -281,8 +284,6 @@ void __init setup_arch(char **cmdline_p)
 	else
 		psci_acpi_init();
 
-	xen_early_init();
-
 	cpu_read_bootcpu_ops();
 	smp_init_cpus();
 	smp_build_mpidr_hash();
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 678e0842cb3b..76a6d9263908 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -267,7 +267,6 @@ asmlinkage void secondary_start_kernel(void)
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
-	local_dbg_enable();
 	local_irq_enable();
 	local_async_enable();
 
@@ -437,9 +436,9 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 	cpuinfo_store_boot_cpu();
 	save_boot_cpu_run_el();
-	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 }
 
 static u64 __init of_get_cpu_mpidr(struct device_node *dn)
@@ -560,6 +559,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
 	 */
 	acpi_set_mailbox_entry(cpu_count, processor);
 
+	early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid));
+
 	cpu_count++;
 }
 
@@ -693,6 +694,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 
 	smp_store_cpu_info(smp_processor_id());
 
+	/*
+	 * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
+	 * secondary CPUs present.
+	 */
+	if (max_cpus == 0)
+		return;
+
 	/*
 	 * Initialise the present map (which describes the set of CPUs
 	 * actually populated at the present time) and release the
@@ -909,3 +917,21 @@ int setup_profiling_timer(unsigned int multiplier)
 {
 	return -EINVAL;
 }
+
+static bool have_cpu_die(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int any_cpu = raw_smp_processor_id();
+
+	if (cpu_ops[any_cpu]->cpu_die)
+		return true;
+#endif
+	return false;
+}
+
+bool cpus_are_stuck_in_kernel(void)
+{
+	bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
+
+	return !!cpus_stuck_in_kernel || smp_spin_tables;
+}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 2a43012616b7..e04f83873af7 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -41,6 +41,7 @@
 #include <asm/stacktrace.h>
 #include <asm/exception.h>
 #include <asm/system_misc.h>
+#include <asm/sysreg.h>
 
 static const char *handler[]= {
 	"Synchronous Abort",
@@ -52,15 +53,14 @@ static const char *handler[]= {
 int show_unhandled_signals = 1;
 
 /*
- * Dump out the contents of some memory nicely...
+ * Dump out the contents of some kernel memory nicely...
  */
 static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
-		     unsigned long top, bool compat)
+		     unsigned long top)
 {
 	unsigned long first;
 	mm_segment_t fs;
 	int i;
-	unsigned int width = compat ? 4 : 8;
 
 	/*
 	 * We need to switch to kernel mode so that we can use __get_user
@@ -78,22 +78,15 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 		memset(str, ' ', sizeof(str));
 		str[sizeof(str) - 1] = '\0';
 
-		for (p = first, i = 0; i < (32 / width)
-					&& p < top; i++, p += width) {
+		for (p = first, i = 0; i < (32 / 8)
+					&& p < top; i++, p += 8) {
 			if (p >= bottom && p < top) {
 				unsigned long val;
 
-				if (width == 8) {
-					if (__get_user(val, (unsigned long *)p) == 0)
-						sprintf(str + i * 17, " %016lx", val);
-					else
-						sprintf(str + i * 17, " ????????????????");
-				} else {
-					if (__get_user(val, (unsigned int *)p) == 0)
-						sprintf(str + i * 9, " %08lx", val);
-					else
-						sprintf(str + i * 9, " ????????");
-				}
+				if (__get_user(val, (unsigned long *)p) == 0)
+					sprintf(str + i * 17, " %016lx", val);
+				else
+					sprintf(str + i * 17, " ????????????????");
 			}
 		}
 		printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
@@ -216,7 +209,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 				stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
 
 			dump_mem("", "Exception stack", stack,
-				 stack + sizeof(struct pt_regs), false);
+				 stack + sizeof(struct pt_regs));
 		}
 	}
 }
@@ -254,10 +247,9 @@ static int __die(const char *str, int err, struct thread_info *thread,
 	pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
 		 TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
 
-	if (!user_mode(regs) || in_interrupt()) {
+	if (!user_mode(regs)) {
 		dump_mem(KERN_EMERG, "Stack: ", regs->sp,
-			 THREAD_SIZE + (unsigned long)task_stack_page(tsk),
-			 compat_user_mode(regs));
+			 THREAD_SIZE + (unsigned long)task_stack_page(tsk));
 		dump_backtrace(regs, tsk);
 		dump_instr(KERN_EMERG, regs);
 	}
@@ -373,11 +365,59 @@ exit:
 	return fn ? fn(regs, instr) : 1;
 }
 
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+static void force_signal_inject(int signal, int code, struct pt_regs *regs,
+				unsigned long address)
 {
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
+	const char *desc;
 
+	switch (signal) {
+	case SIGILL:
+		desc = "undefined instruction";
+		break;
+	case SIGSEGV:
+		desc = "illegal memory access";
+		break;
+	default:
+		desc = "bad mode";
+		break;
+	}
+
+	if (unhandled_signal(current, signal) &&
+	    show_unhandled_signals_ratelimited()) {
+		pr_info("%s[%d]: %s: pc=%p\n",
+			current->comm, task_pid_nr(current), desc, pc);
+		dump_instr(KERN_INFO, regs);
+	}
+
+	info.si_signo = signal;
+	info.si_errno = 0;
+	info.si_code  = code;
+	info.si_addr  = pc;
+
+	arm64_notify_die(desc, regs, &info, 0);
+}
+
+/*
+ * Set up process info to signal segmentation fault - called on access error.
+ */
+void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr)
+{
+	int code;
+
+	down_read(&current->mm->mmap_sem);
+	if (find_vma(current->mm, addr) == NULL)
+		code = SEGV_MAPERR;
+	else
+		code = SEGV_ACCERR;
+	up_read(&current->mm->mmap_sem);
+
+	force_signal_inject(SIGSEGV, code, regs, addr);
+}
+
+asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+{
 	/* check for AArch32 breakpoint instructions */
 	if (!aarch32_break_handler(regs))
 		return;
@@ -385,18 +425,66 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	if (call_undef_hook(regs) == 0)
 		return;
 
-	if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) {
-		pr_info("%s[%d]: undefined instruction: pc=%p\n",
-			current->comm, task_pid_nr(current), pc);
-		dump_instr(KERN_INFO, regs);
-	}
+	force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+}
 
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code  = ILL_ILLOPC;
-	info.si_addr  = pc;
+void cpu_enable_cache_maint_trap(void *__unused)
+{
+	config_sctlr_el1(SCTLR_EL1_UCI, 0);
+}
+
+#define __user_cache_maint(insn, address, res)			\
+	asm volatile (						\
+		"1:	" insn ", %1\n"				\
+		"	mov	%w0, #0\n"			\
+		"2:\n"						\
+		"	.pushsection .fixup,\"ax\"\n"		\
+		"	.align	2\n"				\
+		"3:	mov	%w0, %w2\n"			\
+		"	b	2b\n"				\
+		"	.popsection\n"				\
+		_ASM_EXTABLE(1b, 3b)				\
+		: "=r" (res)					\
+		: "r" (address), "i" (-EFAULT) )
+
+asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+{
+	unsigned long address;
+	int ret;
 
-	arm64_notify_die("Oops - undefined instruction", regs, &info, 0);
+	/* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */
+	if ((esr & 0x01fffc01) == 0x0012dc00) {
+		int rt = (esr >> 5) & 0x1f;
+		int crm = (esr >> 1) & 0x0f;
+
+		address = (rt == 31) ? 0 : regs->regs[rt];
+
+		switch (crm) {
+		case 11:		/* DC CVAU, gets promoted */
+			__user_cache_maint("dc civac", address, ret);
+			break;
+		case 10:		/* DC CVAC, gets promoted */
+			__user_cache_maint("dc civac", address, ret);
+			break;
+		case 14:		/* DC CIVAC */
+			__user_cache_maint("dc civac", address, ret);
+			break;
+		case 5:			/* IC IVAU */
+			__user_cache_maint("ic ivau", address, ret);
+			break;
+		default:
+			force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+			return;
+		}
+	} else {
+		force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+		return;
+	}
+
+	if (ret)
+		arm64_notify_segfault(regs, address);
+	else
+		regs->pc += 4;
 }
 
 long compat_arm_syscall(struct pt_regs *regs);
@@ -465,7 +553,7 @@ static const char *esr_class_str[] = {
 
 const char *esr_get_class_string(u32 esr)
 {
-	return esr_class_str[esr >> ESR_ELx_EC_SHIFT];
+	return esr_class_str[ESR_ELx_EC(esr)];
 }
 
 /*
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 9fefb005812a..076312b17d4f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -214,10 +214,16 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
 	if (!use_syscall) {
+		/* tkr_mono.cycle_last == tkr_raw.cycle_last */
 		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
+		vdso_data->raw_time_sec		= tk->raw_time.tv_sec;
+		vdso_data->raw_time_nsec	= tk->raw_time.tv_nsec;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
 		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
-		vdso_data->cs_mult		= tk->tkr_mono.mult;
+		/* tkr_raw.xtime_nsec == 0 */
+		vdso_data->cs_mono_mult		= tk->tkr_mono.mult;
+		vdso_data->cs_raw_mult		= tk->tkr_raw.mult;
+		/* tkr_mono.shift == tkr_raw.shift */
 		vdso_data->cs_shift		= tk->tkr_mono.shift;
 	}
 
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index b467fd0a384b..62c84f7cb01b 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -23,7 +23,7 @@ GCOV_PROFILE := n
 ccflags-y += -Wl,-shared
 
 obj-y += vdso.o
-extra-y += vdso.lds vdso-offsets.h
+extra-y += vdso.lds
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
 # Force dependency (incbin is bad)
@@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
 quiet_cmd_vdsosym = VDSOSYM $@
 define cmd_vdsosym
-	$(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \
-	cp $@ include/generated/
+	$(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
 endef
 
-$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
 	$(call if_changed,vdsosym)
 
 # Assembly rules for the .S files
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index efa79e8d4196..e00b4671bd7c 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -26,24 +26,109 @@
 #define NSEC_PER_SEC_HI16	0x3b9a
 
 vdso_data	.req	x6
-use_syscall	.req	w7
-seqcnt		.req	w8
+seqcnt		.req	w7
+w_tmp		.req	w8
+x_tmp		.req	x8
+
+/*
+ * Conventions for macro arguments:
+ * - An argument is write-only if its name starts with "res".
+ * - All other arguments are read-only, unless otherwise specified.
+ */
 
 	.macro	seqcnt_acquire
 9999:	ldr	seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
 	tbnz	seqcnt, #0, 9999b
 	dmb	ishld
-	ldr	use_syscall, [vdso_data, #VDSO_USE_SYSCALL]
 	.endm
 
-	.macro	seqcnt_read, cnt
+	.macro	seqcnt_check fail
 	dmb	ishld
-	ldr	\cnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
+	ldr	w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
+	cmp	w_tmp, seqcnt
+	b.ne	\fail
 	.endm
 
-	.macro	seqcnt_check, cnt, fail
-	cmp	\cnt, seqcnt
-	b.ne	\fail
+	.macro	syscall_check fail
+	ldr	w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
+	cbnz	w_tmp, \fail
+	.endm
+
+	.macro get_nsec_per_sec res
+	mov	\res, #NSEC_PER_SEC_LO16
+	movk	\res, #NSEC_PER_SEC_HI16, lsl #16
+	.endm
+
+	/*
+	 * Returns the clock delta, in nanoseconds left-shifted by the clock
+	 * shift.
+	 */
+	.macro	get_clock_shifted_nsec res, cycle_last, mult
+	/* Read the virtual counter. */
+	isb
+	mrs	x_tmp, cntvct_el0
+	/* Calculate cycle delta and convert to ns. */
+	sub	\res, x_tmp, \cycle_last
+	/* We can only guarantee 56 bits of precision. */
+	movn	x_tmp, #0xff00, lsl #48
+	and	\res, x_tmp, \res
+	mul	\res, \res, \mult
+	.endm
+
+	/*
+	 * Returns in res_{sec,nsec} the REALTIME timespec, based on the
+	 * "wall time" (xtime) and the clock_mono delta.
+	 */
+	.macro	get_ts_realtime res_sec, res_nsec, \
+			clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
+	add	\res_nsec, \clock_nsec, \xtime_nsec
+	udiv	x_tmp, \res_nsec, \nsec_to_sec
+	add	\res_sec, \xtime_sec, x_tmp
+	msub	\res_nsec, x_tmp, \nsec_to_sec, \res_nsec
+	.endm
+
+	/*
+	 * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
+	 * used for CLOCK_MONOTONIC_RAW.
+	 */
+	.macro	get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
+	udiv	\res_sec, \clock_nsec, \nsec_to_sec
+	msub	\res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
+	.endm
+
+	/* sec and nsec are modified in place. */
+	.macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
+	/* Add timespec. */
+	add	\sec, \sec, \ts_sec
+	add	\nsec, \nsec, \ts_nsec
+
+	/* Normalise the new timespec. */
+	cmp	\nsec, \nsec_to_sec
+	b.lt	9999f
+	sub	\nsec, \nsec, \nsec_to_sec
+	add	\sec, \sec, #1
+9999:
+	cmp	\nsec, #0
+	b.ge	9998f
+	add	\nsec, \nsec, \nsec_to_sec
+	sub	\sec, \sec, #1
+9998:
+	.endm
+
+	.macro clock_gettime_return, shift=0
+	.if \shift == 1
+	lsr	x11, x11, x12
+	.endif
+	stp	x10, x11, [x1, #TSPEC_TV_SEC]
+	mov	x0, xzr
+	ret
+	.endm
+
+	.macro jump_slot jumptable, index, label
+	.if (. - \jumptable) != 4 * (\index)
+	.error "Jump slot index mismatch"
+	.endif
+	b	\label
 	.endm
 
 	.text
@@ -51,18 +136,25 @@ seqcnt		.req	w8
 /* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
 ENTRY(__kernel_gettimeofday)
 	.cfi_startproc
-	mov	x2, x30
-	.cfi_register x30, x2
-
-	/* Acquire the sequence counter and get the timespec. */
 	adr	vdso_data, _vdso_data
-1:	seqcnt_acquire
-	cbnz	use_syscall, 4f
-
 	/* If tv is NULL, skip to the timezone code. */
 	cbz	x0, 2f
-	bl	__do_get_tspec
-	seqcnt_check w9, 1b
+
+	/* Compute the time of day. */
+1:	seqcnt_acquire
+	syscall_check fail=4f
+	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+	/* w11 = cs_mono_mult, w12 = cs_shift */
+	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+	seqcnt_check fail=1b
+
+	get_nsec_per_sec res=x9
+	lsl	x9, x9, x12
+
+	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+	get_ts_realtime res_sec=x10, res_nsec=x11, \
+		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
 
 	/* Convert ns to us. */
 	mov	x13, #1000
@@ -76,95 +168,126 @@ ENTRY(__kernel_gettimeofday)
 	stp	w4, w5, [x1, #TZ_MINWEST]
 3:
 	mov	x0, xzr
-	ret	x2
+	ret
 4:
 	/* Syscall fallback. */
 	mov	x8, #__NR_gettimeofday
 	svc	#0
-	ret	x2
+	ret
 	.cfi_endproc
 ENDPROC(__kernel_gettimeofday)
 
+#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
+
 /* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
 ENTRY(__kernel_clock_gettime)
 	.cfi_startproc
-	cmp	w0, #CLOCK_REALTIME
-	ccmp	w0, #CLOCK_MONOTONIC, #0x4, ne
-	b.ne	2f
+	cmp	w0, #JUMPSLOT_MAX
+	b.hi	syscall
+	adr	vdso_data, _vdso_data
+	adr	x_tmp, jumptable
+	add	x_tmp, x_tmp, w0, uxtw #2
+	br	x_tmp
+
+	ALIGN
+jumptable:
+	jump_slot jumptable, CLOCK_REALTIME, realtime
+	jump_slot jumptable, CLOCK_MONOTONIC, monotonic
+	b	syscall
+	b	syscall
+	jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
+	jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
+	jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
+
+	.if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
+	.error	"Wrong jumptable size"
+	.endif
+
+	ALIGN
+realtime:
+	seqcnt_acquire
+	syscall_check fail=syscall
+	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+	/* w11 = cs_mono_mult, w12 = cs_shift */
+	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+	seqcnt_check fail=realtime
 
-	mov	x2, x30
-	.cfi_register x30, x2
+	/* All computations are done with left-shifted nsecs. */
+	get_nsec_per_sec res=x9
+	lsl	x9, x9, x12
 
-	/* Get kernel timespec. */
-	adr	vdso_data, _vdso_data
-1:	seqcnt_acquire
-	cbnz	use_syscall, 7f
+	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+	get_ts_realtime res_sec=x10, res_nsec=x11, \
+		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
+	clock_gettime_return, shift=1
 
-	bl	__do_get_tspec
-	seqcnt_check w9, 1b
+	ALIGN
+monotonic:
+	seqcnt_acquire
+	syscall_check fail=syscall
+	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+	/* w11 = cs_mono_mult, w12 = cs_shift */
+	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+	ldp	x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
+	seqcnt_check fail=monotonic
 
-	mov	x30, x2
+	/* All computations are done with left-shifted nsecs. */
+	lsl	x4, x4, x12
+	get_nsec_per_sec res=x9
+	lsl	x9, x9, x12
 
-	cmp	w0, #CLOCK_MONOTONIC
-	b.ne	6f
+	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+	get_ts_realtime res_sec=x10, res_nsec=x11, \
+		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
 
-	/* Get wtm timespec. */
-	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+	add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
+	clock_gettime_return, shift=1
 
-	/* Check the sequence counter. */
-	seqcnt_read w9
-	seqcnt_check w9, 1b
-	b	4f
-2:
-	cmp	w0, #CLOCK_REALTIME_COARSE
-	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
-	b.ne	8f
+	ALIGN
+monotonic_raw:
+	seqcnt_acquire
+	syscall_check fail=syscall
+	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+	/* w11 = cs_raw_mult, w12 = cs_shift */
+	ldp	w12, w11, [vdso_data, #VDSO_CS_SHIFT]
+	ldp	x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
+	seqcnt_check fail=monotonic_raw
 
-	/* xtime_coarse_nsec is already right-shifted */
-	mov	x12, #0
+	/* All computations are done with left-shifted nsecs. */
+	lsl	x14, x14, x12
+	get_nsec_per_sec res=x9
+	lsl	x9, x9, x12
 
-	/* Get coarse timespec. */
-	adr	vdso_data, _vdso_data
-3:	seqcnt_acquire
+	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+	get_ts_clock_raw res_sec=x10, res_nsec=x11, \
+		clock_nsec=x15, nsec_to_sec=x9
+
+	add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
+	clock_gettime_return, shift=1
+
+	ALIGN
+realtime_coarse:
+	seqcnt_acquire
 	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
+	seqcnt_check fail=realtime_coarse
+	clock_gettime_return
 
-	/* Get wtm timespec. */
+	ALIGN
+monotonic_coarse:
+	seqcnt_acquire
+	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
 	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+	seqcnt_check fail=monotonic_coarse
 
-	/* Check the sequence counter. */
-	seqcnt_read w9
-	seqcnt_check w9, 3b
+	/* Computations are done in (non-shifted) nsecs. */
+	get_nsec_per_sec res=x9
+	add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
+	clock_gettime_return
 
-	cmp	w0, #CLOCK_MONOTONIC_COARSE
-	b.ne	6f
-4:
-	/* Add on wtm timespec. */
-	add	x10, x10, x13
-	lsl	x14, x14, x12
-	add	x11, x11, x14
-
-	/* Normalise the new timespec. */
-	mov	x15, #NSEC_PER_SEC_LO16
-	movk	x15, #NSEC_PER_SEC_HI16, lsl #16
-	lsl	x15, x15, x12
-	cmp	x11, x15
-	b.lt	5f
-	sub	x11, x11, x15
-	add	x10, x10, #1
-5:
-	cmp	x11, #0
-	b.ge	6f
-	add	x11, x11, x15
-	sub	x10, x10, #1
-
-6:	/* Store to the user timespec. */
-	lsr	x11, x11, x12
-	stp	x10, x11, [x1, #TSPEC_TV_SEC]
-	mov	x0, xzr
-	ret
-7:
-	mov	x30, x2
-8:	/* Syscall fallback. */
+	ALIGN
+syscall: /* Syscall fallback. */
 	mov	x8, #__NR_clock_gettime
 	svc	#0
 	ret
@@ -176,6 +299,7 @@ ENTRY(__kernel_clock_getres)
 	.cfi_startproc
 	cmp	w0, #CLOCK_REALTIME
 	ccmp	w0, #CLOCK_MONOTONIC, #0x4, ne
+	ccmp	w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
 	b.ne	1f
 
 	ldr	x2, 5f
@@ -203,46 +327,3 @@ ENTRY(__kernel_clock_getres)
 	.quad	CLOCK_COARSE_RES
 	.cfi_endproc
 ENDPROC(__kernel_clock_getres)
-
-/*
- * Read the current time from the architected counter.
- * Expects vdso_data to be initialised.
- * Clobbers the temporary registers (x9 - x15).
- * Returns:
- *  - w9		= vDSO sequence counter
- *  - (x10, x11)	= (ts->tv_sec, shifted ts->tv_nsec)
- *  - w12		= cs_shift
- */
-ENTRY(__do_get_tspec)
-	.cfi_startproc
-
-	/* Read from the vDSO data page. */
-	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
-	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
-	seqcnt_read w9
-
-	/* Read the virtual counter. */
-	isb
-	mrs	x15, cntvct_el0
-
-	/* Calculate cycle delta and convert to ns. */
-	sub	x10, x15, x10
-	/* We can only guarantee 56 bits of precision. */
-	movn	x15, #0xff00, lsl #48
-	and	x10, x15, x10
-	mul	x10, x10, x11
-
-	/* Use the kernel time to calculate the new timespec. */
-	mov	x11, #NSEC_PER_SEC_LO16
-	movk	x11, #NSEC_PER_SEC_HI16, lsl #16
-	lsl	x11, x11, x12
-	add	x15, x10, x14
-	udiv	x14, x15, x11
-	add	x10, x13, x14
-	mul	x13, x14, x11
-	sub	x11, x15, x13
-
-	ret
-	.cfi_endproc
-ENDPROC(__do_get_tspec)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 435e820e898d..89d6e177ecbd 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -118,9 +118,11 @@ SECTIONS
 			__exception_text_end = .;
 			IRQENTRY_TEXT
 			SOFTIRQENTRY_TEXT
+			ENTRY_TEXT
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
+			KPROBES_TEXT
 			HYPERVISOR_TEXT
 			IDMAP_TEXT
 			HIBERNATE_TEXT
@@ -131,12 +133,13 @@ SECTIONS
 	}
 
 	. = ALIGN(SEGMENT_ALIGN);
-	RO_DATA(PAGE_SIZE)		/* everything from this point to */
-	EXCEPTION_TABLE(8)		/* _etext will be marked RO NX   */
+	_etext = .;			/* End of text section */
+
+	RO_DATA(PAGE_SIZE)		/* everything from this point to     */
+	EXCEPTION_TABLE(8)		/* __init_begin will be marked RO NX */
 	NOTES
 
 	. = ALIGN(SEGMENT_ALIGN);
-	_etext = .;			/* End of text and rodata section */
 	__init_begin = .;
 
 	INIT_TEXT_SECTION(8)
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 3246c4aba5b1..fa96fe2bd469 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -106,7 +106,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	run->exit_reason = KVM_EXIT_DEBUG;
 	run->debug.arch.hsr = hsr;
 
-	switch (hsr >> ESR_ELx_EC_SHIFT) {
+	switch (ESR_ELx_EC(hsr)) {
 	case ESR_ELx_EC_WATCHPT_LOW:
 		run->debug.arch.far = vcpu->arch.fault.far_el2;
 		/* fall through */
@@ -149,7 +149,7 @@ static exit_handle_fn arm_exit_handlers[] = {
 static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 {
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
-	u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT;
+	u8 hsr_ec = ESR_ELx_EC(hsr);
 
 	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
 	    !arm_exit_handlers[hsr_ec]) {
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 778d0effa2af..0c85febcc1eb 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -17,6 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
 
+# KVM code is run at a different exception code with a different map, so
+# compiler instrumentation that inserts callbacks or checks into the code may
+# cause crashes. Just disable it.
 GCOV_PROFILE	:= n
 KASAN_SANITIZE	:= n
 UBSAN_SANITIZE	:= n
+KCOV_INSTRUMENT	:= n
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 437cfad5e3d8..4373997d1a70 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -198,7 +198,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
 static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
 {
 	u64 esr = read_sysreg_el2(esr);
-	u8 ec = esr >> ESR_ELx_EC_SHIFT;
+	u8 ec = ESR_ELx_EC(esr);
 	u64 hpfar, far;
 
 	vcpu->arch.fault.esr_el2 = esr;
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 0f7c40eb3f53..934137647837 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
 /*
  * Non-VHE: Both host and guest must save everything.
  *
- * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and
- * guest must save everything.
+ * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc,
+ * pstate, and guest must save everything.
  */
 
 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@@ -37,6 +37,7 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
 	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
 	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);
+	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
 	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0);
 	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr);
 	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr);
@@ -61,7 +62,6 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
 	ctxt->sys_regs[AMAIR_EL1]	= read_sysreg_el1(amair);
 	ctxt->sys_regs[CNTKCTL_EL1]	= read_sysreg_el1(cntkctl);
 	ctxt->sys_regs[PAR_EL1]		= read_sysreg(par_el1);
-	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
 
 	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);
 	ctxt->gp_regs.elr_el1		= read_sysreg_el1(elr);
@@ -90,6 +90,7 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
 	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);
 	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
 	write_sysreg(ctxt->sys_regs[TPIDR_EL1],	  tpidr_el1);
+	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);
 	write_sysreg(ctxt->gp_regs.regs.sp,	  sp_el0);
 	write_sysreg_el2(ctxt->gp_regs.regs.pc,	  elr);
 	write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
@@ -114,7 +115,6 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
 	write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1],	amair);
 	write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], 	cntkctl);
 	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1);
-	write_sysreg(ctxt->sys_regs[MDSCR_EL1],		mdscr_el1);
 
 	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);
 	write_sysreg_el1(ctxt->gp_regs.elr_el1,		elr);
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 17e8306dca29..0b90497d4424 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -66,7 +66,7 @@
 	.endm
 
 end	.req	x5
-ENTRY(__copy_from_user)
+ENTRY(__arch_copy_from_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	add	end, x0, x2
@@ -75,7 +75,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x0, #0				// Nothing to copy
 	ret
-ENDPROC(__copy_from_user)
+ENDPROC(__arch_copy_from_user)
 
 	.section .fixup,"ax"
 	.align	2
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 21faae60f988..7a7efe255034 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -65,7 +65,7 @@
 	.endm
 
 end	.req	x5
-ENTRY(__copy_to_user)
+ENTRY(__arch_copy_to_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	add	end, x0, x2
@@ -74,7 +74,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x0, #0
 	ret
-ENDPROC(__copy_to_user)
+ENDPROC(__arch_copy_to_user)
 
 	.section .fixup,"ax"
 	.align	2
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 50ff9ba3a236..07d7352d7c38 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -52,7 +52,7 @@ ENTRY(__flush_cache_user_range)
 	sub	x3, x2, #1
 	bic	x4, x0, x3
 1:
-USER(9f, dc	cvau, x4	)		// clean D line to PoU
+user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
 	add	x4, x4, x2
 	cmp	x4, x1
 	b.lo	1b
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b7b397802088..efcf1f7ef1e4 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 						 &asid_generation);
 	flush_context(cpu);
 
-	/* We have at least 1 ASID per CPU, so this will always succeed */
+	/* We have more ASIDs than CPUs, so this will always succeed */
 	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 
 set_asid:
@@ -227,8 +227,11 @@ switch_mm_fastpath:
 static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
-	/* If we end up with more CPUs than ASIDs, expect things to crash */
-	WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+	/*
+	 * Expect allocation after rollover to fail if we don't have at least
+	 * one more ASID than CPUs. ASID #0 is reserved for init_mm.
+	 */
+	WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus());
 	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
 	asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
 			   GFP_KERNEL);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c566ec83719f..f6c55afab3e2 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -19,6 +19,7 @@
 
 #include <linux/gfp.h>
 #include <linux/acpi.h>
+#include <linux/bootmem.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/genalloc.h>
@@ -29,6 +30,8 @@
 
 #include <asm/cacheflush.h>
 
+static int swiotlb __read_mostly;
+
 static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
 				 bool coherent)
 {
@@ -341,6 +344,13 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
 	return ret;
 }
 
+static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
+{
+	if (swiotlb)
+		return swiotlb_dma_supported(hwdev, mask);
+	return 1;
+}
+
 static struct dma_map_ops swiotlb_dma_ops = {
 	.alloc = __dma_alloc,
 	.free = __dma_free,
@@ -354,7 +364,7 @@ static struct dma_map_ops swiotlb_dma_ops = {
 	.sync_single_for_device = __swiotlb_sync_single_for_device,
 	.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
 	.sync_sg_for_device = __swiotlb_sync_sg_for_device,
-	.dma_supported = swiotlb_dma_supported,
+	.dma_supported = __swiotlb_dma_supported,
 	.mapping_error = swiotlb_dma_mapping_error,
 };
 
@@ -513,6 +523,9 @@ EXPORT_SYMBOL(dummy_dma_ops);
 
 static int __init arm64_dma_init(void)
 {
+	if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+		swiotlb = 1;
+
 	return atomic_pool_init();
 }
 arch_initcall(arm64_dma_init);
@@ -848,15 +861,16 @@ static int __iommu_attach_notifier(struct notifier_block *nb,
 {
 	struct iommu_dma_notifier_data *master, *tmp;
 
-	if (action != BUS_NOTIFY_ADD_DEVICE)
+	if (action != BUS_NOTIFY_BIND_DRIVER)
 		return 0;
 
 	mutex_lock(&iommu_dma_notifier_lock);
 	list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) {
-		if (do_iommu_attach(master->dev, master->ops,
-				master->dma_base, master->size)) {
+		if (data == master->dev && do_iommu_attach(master->dev,
+				master->ops, master->dma_base, master->size)) {
 			list_del(&master->list);
 			kfree(master);
+			break;
 		}
 	}
 	mutex_unlock(&iommu_dma_notifier_lock);
@@ -870,17 +884,8 @@ static int __init register_iommu_dma_ops_notifier(struct bus_type *bus)
 
 	if (!nb)
 		return -ENOMEM;
-	/*
-	 * The device must be attached to a domain before the driver probe
-	 * routine gets a chance to start allocating DMA buffers. However,
-	 * the IOMMU driver also needs a chance to configure the iommu_group
-	 * via its add_device callback first, so we need to make the attach
-	 * happen between those two points. Since the IOMMU core uses a bus
-	 * notifier with default priority for add_device, do the same but
-	 * with a lower priority to ensure the appropriate ordering.
-	 */
+
 	nb->notifier_call = __iommu_attach_notifier;
-	nb->priority = -100;
 
 	ret = bus_register_notifier(bus, nb);
 	if (ret) {
@@ -904,10 +909,6 @@ static int __init __iommu_dma_init(void)
 	if (!ret)
 		ret = register_iommu_dma_ops_notifier(&pci_bus_type);
 #endif
-
-	/* handle devices queued before this arch_initcall */
-	if (!ret)
-		__iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL);
 	return ret;
 }
 arch_initcall(__iommu_dma_init);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index ccfde237d6e6..f94b80eb295d 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -27,11 +27,7 @@
 #include <asm/memory.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable-hwdef.h>
-
-struct addr_marker {
-	unsigned long start_address;
-	const char *name;
-};
+#include <asm/ptdump.h>
 
 static const struct addr_marker address_markers[] = {
 #ifdef CONFIG_KASAN
@@ -290,7 +286,8 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 	}
 }
 
-static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start)
+static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
+		     unsigned long start)
 {
 	pgd_t *pgd = pgd_offset(mm, 0UL);
 	unsigned i;
@@ -309,12 +306,13 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long st
 
 static int ptdump_show(struct seq_file *m, void *v)
 {
+	struct ptdump_info *info = m->private;
 	struct pg_state st = {
 		.seq = m,
-		.marker = address_markers,
+		.marker = info->markers,
 	};
 
-	walk_pgd(&st, &init_mm, VA_START);
+	walk_pgd(&st, info->mm, info->base_addr);
 
 	note_page(&st, 0, 0, 0);
 	return 0;
@@ -322,7 +320,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 
 static int ptdump_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ptdump_show, NULL);
+	return single_open(file, ptdump_show, inode->i_private);
 }
 
 static const struct file_operations ptdump_fops = {
@@ -332,7 +330,7 @@ static const struct file_operations ptdump_fops = {
 	.release	= single_release,
 };
 
-static int ptdump_init(void)
+int ptdump_register(struct ptdump_info *info, const char *name)
 {
 	struct dentry *pe;
 	unsigned i, j;
@@ -342,8 +340,18 @@ static int ptdump_init(void)
 			for (j = 0; j < pg_level[i].num; j++)
 				pg_level[i].mask |= pg_level[i].bits[j].mask;
 
-	pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
-				 &ptdump_fops);
+	pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
 	return pe ? 0 : -ENOMEM;
 }
+
+static struct ptdump_info kernel_ptdump_info = {
+	.mm		= &init_mm,
+	.markers	= address_markers,
+	.base_addr	= VA_START,
+};
+
+static int ptdump_init(void)
+{
+	return ptdump_register(&kernel_ptdump_info, "kernel_page_tables");
+}
 device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 013e2cbe7924..c8beaa0da7df 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -41,6 +41,28 @@
 
 static const char *fault_name(unsigned int esr);
 
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
+{
+	int ret = 0;
+
+	/* kprobe_running() needs smp_processor_id() */
+	if (!user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, esr))
+			ret = 1;
+		preempt_enable();
+	}
+
+	return ret;
+}
+#else
+static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
+{
+	return 0;
+}
+#endif
+
 /*
  * Dump out the page tables associated with 'addr' in mm 'mm'.
  */
@@ -202,8 +224,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
 #define VM_FAULT_BADMAP		0x010000
 #define VM_FAULT_BADACCESS	0x020000
 
-#define ESR_LNX_EXEC		(1 << 24)
-
 static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
 			   unsigned int mm_flags, unsigned long vm_flags,
 			   struct task_struct *tsk)
@@ -233,7 +253,7 @@ good_area:
 		goto out;
 	}
 
-	return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);
+	return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
 
 check_stack:
 	if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
@@ -242,14 +262,19 @@ out:
 	return fault;
 }
 
-static inline int permission_fault(unsigned int esr)
+static inline bool is_permission_fault(unsigned int esr)
 {
-	unsigned int ec       = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT;
+	unsigned int ec       = ESR_ELx_EC(esr);
 	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
 
 	return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
 }
 
+static bool is_el0_instruction_abort(unsigned int esr)
+{
+	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
+}
+
 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 				   struct pt_regs *regs)
 {
@@ -259,6 +284,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
+	if (notify_page_fault(regs, esr))
+		return 0;
+
 	tsk = current;
 	mm  = tsk->mm;
 
@@ -272,15 +300,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	if (user_mode(regs))
 		mm_flags |= FAULT_FLAG_USER;
 
-	if (esr & ESR_LNX_EXEC) {
+	if (is_el0_instruction_abort(esr)) {
 		vm_flags = VM_EXEC;
 	} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
 		vm_flags = VM_WRITE;
 		mm_flags |= FAULT_FLAG_WRITE;
 	}
 
-	if (permission_fault(esr) && (addr < USER_DS)) {
-		if (get_fs() == KERNEL_DS)
+	if (is_permission_fault(esr) && (addr < USER_DS)) {
+		/* regs->orig_addr_limit may be 0 if we entered from EL0 */
+		if (regs->orig_addr_limit == KERNEL_DS)
 			die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
 
 		if (!search_exception_tables(regs->pc))
@@ -629,6 +658,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
 
 	return rv;
 }
+NOKPROBE_SYMBOL(do_debug_exception);
 
 #ifdef CONFIG_ARM64_PAN
 void cpu_enable_pan(void *__unused)
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index dbd12ea8ce68..43a76b07eb32 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
 {
 	struct page *page = pte_page(pte);
 
-	/* no flushing needed for anonymous pages */
-	if (!page_mapping(page))
-		return;
-
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		sync_icache_aliases(page_address(page),
 				    PAGE_SIZE << compound_order(page));
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d45f8627012c..2ade7a6a10a7 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -160,12 +160,10 @@ static void __init arm64_memory_present(void)
 static void __init arm64_memory_present(void)
 {
 	struct memblock_region *reg;
-	int nid = 0;
 
 	for_each_memblock(memory, reg) {
-#ifdef CONFIG_NUMA
-		nid = reg->nid;
-#endif
+		int nid = memblock_get_region_node(reg);
+
 		memory_present(nid, memblock_region_memory_base_pfn(reg),
 				memblock_region_memory_end_pfn(reg));
 	}
@@ -403,7 +401,8 @@ static void __init free_unused_memmap(void)
  */
 void __init mem_init(void)
 {
-	swiotlb_init(1);
+	if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+		swiotlb_init(1);
 
 	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
 
@@ -430,9 +429,9 @@ void __init mem_init(void)
 	pr_cont("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
 		MLG(VMALLOC_START, VMALLOC_END));
 	pr_cont("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
-		MLK_ROUNDUP(_text, __start_rodata));
+		MLK_ROUNDUP(_text, _etext));
 	pr_cont("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
-		MLK_ROUNDUP(__start_rodata, _etext));
+		MLK_ROUNDUP(__start_rodata, __init_begin));
 	pr_cont("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 		MLK_ROUNDUP(__init_begin, __init_end));
 	pr_cont("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0f85a46c3e18..51a558195bb9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -77,7 +77,6 @@ static phys_addr_t __init early_pgtable_alloc(void)
 	void *ptr;
 
 	phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-	BUG_ON(!phys);
 
 	/*
 	 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
@@ -97,24 +96,6 @@ static phys_addr_t __init early_pgtable_alloc(void)
 	return phys;
 }
 
-/*
- * remap a PMD into pages
- */
-static void split_pmd(pmd_t *pmd, pte_t *pte)
-{
-	unsigned long pfn = pmd_pfn(*pmd);
-	int i = 0;
-
-	do {
-		/*
-		 * Need to have the least restrictive permissions available
-		 * permissions will be fixed up later
-		 */
-		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
-		pfn++;
-	} while (pte++, i++, i < PTRS_PER_PTE);
-}
-
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
 				  pgprot_t prot,
@@ -122,15 +103,13 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 {
 	pte_t *pte;
 
-	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
+	BUG_ON(pmd_sect(*pmd));
+	if (pmd_none(*pmd)) {
 		phys_addr_t pte_phys;
 		BUG_ON(!pgtable_alloc);
 		pte_phys = pgtable_alloc();
 		pte = pte_set_fixmap(pte_phys);
-		if (pmd_sect(*pmd))
-			split_pmd(pmd, pte);
 		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
-		flush_tlb_all();
 		pte_clear_fixmap();
 	}
 	BUG_ON(pmd_bad(*pmd));
@@ -144,41 +123,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	pte_clear_fixmap();
 }
 
-static void split_pud(pud_t *old_pud, pmd_t *pmd)
-{
-	unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
-	pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
-	int i = 0;
-
-	do {
-		set_pmd(pmd, __pmd(addr | pgprot_val(prot)));
-		addr += PMD_SIZE;
-	} while (pmd++, i++, i < PTRS_PER_PMD);
-}
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
-{
-
-	/*
-	 * If debug_page_alloc is enabled we must map the linear map
-	 * using pages. However, other mappings created by
-	 * create_mapping_noalloc must use sections in some cases. Allow
-	 * sections to be used in those cases, where no pgtable_alloc
-	 * function is provided.
-	 */
-	return !pgtable_alloc || !debug_pagealloc_enabled();
-}
-#else
-static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
-{
-	return true;
-}
-#endif
-
 static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  phys_addr_t (*pgtable_alloc)(void))
+				  phys_addr_t (*pgtable_alloc)(void),
+				  bool allow_block_mappings)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -186,20 +134,13 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 	/*
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
-	if (pud_none(*pud) || pud_sect(*pud)) {
+	BUG_ON(pud_sect(*pud));
+	if (pud_none(*pud)) {
 		phys_addr_t pmd_phys;
 		BUG_ON(!pgtable_alloc);
 		pmd_phys = pgtable_alloc();
 		pmd = pmd_set_fixmap(pmd_phys);
-		if (pud_sect(*pud)) {
-			/*
-			 * need to have the 1G of mappings continue to be
-			 * present
-			 */
-			split_pud(pud, pmd);
-		}
 		__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
-		flush_tlb_all();
 		pmd_clear_fixmap();
 	}
 	BUG_ON(pud_bad(*pud));
@@ -209,7 +150,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
-		      block_mappings_allowed(pgtable_alloc)) {
+		      allow_block_mappings) {
 			pmd_t old_pmd =*pmd;
 			pmd_set_huge(pmd, phys, prot);
 			/*
@@ -248,7 +189,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 
 static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  phys_addr_t (*pgtable_alloc)(void))
+				  phys_addr_t (*pgtable_alloc)(void),
+				  bool allow_block_mappings)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -268,8 +210,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		/*
 		 * For 4K granule only, attempt to put down a 1GB block
 		 */
-		if (use_1G_block(addr, next, phys) &&
-		    block_mappings_allowed(pgtable_alloc)) {
+		if (use_1G_block(addr, next, phys) && allow_block_mappings) {
 			pud_t old_pud = *pud;
 			pud_set_huge(pud, phys, prot);
 
@@ -290,7 +231,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 			}
 		} else {
 			alloc_init_pmd(pud, addr, next, phys, prot,
-				       pgtable_alloc);
+				       pgtable_alloc, allow_block_mappings);
 		}
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
@@ -298,15 +239,14 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 	pud_clear_fixmap();
 }
 
-/*
- * Create the page directory entries and any necessary page tables for the
- * mapping specified by 'md'.
- */
-static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
-				    phys_addr_t size, pgprot_t prot,
-				    phys_addr_t (*pgtable_alloc)(void))
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+				 unsigned long virt, phys_addr_t size,
+				 pgprot_t prot,
+				 phys_addr_t (*pgtable_alloc)(void),
+				 bool allow_block_mappings)
 {
 	unsigned long addr, length, end, next;
+	pgd_t *pgd = pgd_offset_raw(pgdir, virt);
 
 	/*
 	 * If the virtual and physical address don't have the same offset
@@ -322,29 +262,23 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
+		alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+			       allow_block_mappings);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
 
-static phys_addr_t late_pgtable_alloc(void)
+static phys_addr_t pgd_pgtable_alloc(void)
 {
 	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
-	BUG_ON(!ptr);
+	if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+		BUG();
 
 	/* Ensure the zeroed page is visible to the page table walker */
 	dsb(ishst);
 	return __pa(ptr);
 }
 
-static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
-				 unsigned long virt, phys_addr_t size,
-				 pgprot_t prot,
-				 phys_addr_t (*alloc)(void))
-{
-	init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
-}
-
 /*
  * This function can only be used to modify existing table entries,
  * without allocating new levels of table. Note that this permits the
@@ -358,16 +292,17 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
 			&phys, virt);
 		return;
 	}
-	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
-			     NULL);
+	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       unsigned long virt, phys_addr_t size,
-			       pgprot_t prot)
+			       pgprot_t prot, bool allow_block_mappings)
 {
+	BUG_ON(mm == &init_mm);
+
 	__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
-			     late_pgtable_alloc);
+			     pgd_pgtable_alloc, allow_block_mappings);
 }
 
 static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -380,51 +315,54 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 	}
 
 	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
-			     late_pgtable_alloc);
+			     NULL, !debug_pagealloc_enabled());
 }
 
 static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
 	unsigned long kernel_start = __pa(_text);
-	unsigned long kernel_end = __pa(_etext);
+	unsigned long kernel_end = __pa(__init_begin);
 
 	/*
 	 * Take care not to create a writable alias for the
 	 * read-only text and rodata sections of the kernel image.
 	 */
 
-	/* No overlap with the kernel text */
+	/* No overlap with the kernel text/rodata */
 	if (end < kernel_start || start >= kernel_end) {
 		__create_pgd_mapping(pgd, start, __phys_to_virt(start),
 				     end - start, PAGE_KERNEL,
-				     early_pgtable_alloc);
+				     early_pgtable_alloc,
+				     !debug_pagealloc_enabled());
 		return;
 	}
 
 	/*
-	 * This block overlaps the kernel text mapping.
+	 * This block overlaps the kernel text/rodata mappings.
 	 * Map the portion(s) which don't overlap.
 	 */
 	if (start < kernel_start)
 		__create_pgd_mapping(pgd, start,
 				     __phys_to_virt(start),
 				     kernel_start - start, PAGE_KERNEL,
-				     early_pgtable_alloc);
+				     early_pgtable_alloc,
+				     !debug_pagealloc_enabled());
 	if (kernel_end < end)
 		__create_pgd_mapping(pgd, kernel_end,
 				     __phys_to_virt(kernel_end),
 				     end - kernel_end, PAGE_KERNEL,
-				     early_pgtable_alloc);
+				     early_pgtable_alloc,
+				     !debug_pagealloc_enabled());
 
 	/*
-	 * Map the linear alias of the [_text, _etext) interval as
+	 * Map the linear alias of the [_text, __init_begin) interval as
 	 * read-only/non-executable. This makes the contents of the
 	 * region accessible to subsystems such as hibernate, but
 	 * protects it from inadvertent modification or execution.
 	 */
 	__create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
 			     kernel_end - kernel_start, PAGE_KERNEL_RO,
-			     early_pgtable_alloc);
+			     early_pgtable_alloc, !debug_pagealloc_enabled());
 }
 
 static void __init map_mem(pgd_t *pgd)
@@ -449,14 +387,14 @@ void mark_rodata_ro(void)
 {
 	unsigned long section_size;
 
-	section_size = (unsigned long)__start_rodata - (unsigned long)_text;
+	section_size = (unsigned long)_etext - (unsigned long)_text;
 	create_mapping_late(__pa(_text), (unsigned long)_text,
 			    section_size, PAGE_KERNEL_ROX);
 	/*
-	 * mark .rodata as read only. Use _etext rather than __end_rodata to
-	 * cover NOTES and EXCEPTION_TABLE.
+	 * mark .rodata as read only. Use __init_begin rather than __end_rodata
+	 * to cover NOTES and EXCEPTION_TABLE.
 	 */
-	section_size = (unsigned long)_etext - (unsigned long)__start_rodata;
+	section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
 	create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
 			    section_size, PAGE_KERNEL_RO);
 }
@@ -481,7 +419,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
 	BUG_ON(!PAGE_ALIGNED(size));
 
 	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
-			     early_pgtable_alloc);
+			     early_pgtable_alloc, !debug_pagealloc_enabled());
 
 	vma->addr	= va_start;
 	vma->phys_addr	= pa_start;
@@ -499,8 +437,8 @@ static void __init map_kernel(pgd_t *pgd)
 {
 	static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
 
-	map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
-	map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
+	map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
+	map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata);
 	map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
 			   &vmlinux_init);
 	map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 98dc1047f2a2..c7fe3ec70774 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/module.h>
@@ -29,7 +30,7 @@ static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
 
 static int numa_distance_cnt;
 static u8 *numa_distance;
-static int numa_off;
+static bool numa_off;
 
 static __init int numa_parse_early_param(char *opt)
 {
@@ -37,7 +38,7 @@ static __init int numa_parse_early_param(char *opt)
 		return -EINVAL;
 	if (!strncmp(opt, "off", 3)) {
 		pr_info("%s\n", "NUMA turned off");
-		numa_off = 1;
+		numa_off = true;
 	}
 	return 0;
 }
@@ -131,25 +132,25 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid)
  * numa_add_memblk - Set node id to memblk
  * @nid: NUMA node ID of the new memblk
  * @start: Start address of the new memblk
- * @size:  Size of the new memblk
+ * @end:  End address of the new memblk
  *
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int __init numa_add_memblk(int nid, u64 start, u64 size)
+int __init numa_add_memblk(int nid, u64 start, u64 end)
 {
 	int ret;
 
-	ret = memblock_set_node(start, size, &memblock.memory, nid);
+	ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
 	if (ret < 0) {
 		pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n",
-			start, (start + size - 1), nid);
+			start, (end - 1), nid);
 		return ret;
 	}
 
 	node_set(nid, numa_nodes_parsed);
 	pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n",
-			start, (start + size - 1), nid);
+			start, (end - 1), nid);
 	return ret;
 }
 
@@ -362,12 +363,15 @@ static int __init dummy_numa_init(void)
 	int ret;
 	struct memblock_region *mblk;
 
-	pr_info("%s\n", "No NUMA configuration found");
+	if (numa_off)
+		pr_info("NUMA disabled\n"); /* Forced off on command line. */
+	else
+		pr_info("No NUMA configuration found\n");
 	pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
 	       0LLU, PFN_PHYS(max_pfn) - 1);
 
 	for_each_memblock(memory, mblk) {
-		ret = numa_add_memblk(0, mblk->base, mblk->size);
+		ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
 		if (!ret)
 			continue;
 
@@ -375,7 +379,7 @@ static int __init dummy_numa_init(void)
 		return ret;
 	}
 
-	numa_off = 1;
+	numa_off = true;
 	return 0;
 }
 
@@ -388,7 +392,9 @@ static int __init dummy_numa_init(void)
 void __init arm64_numa_init(void)
 {
 	if (!numa_off) {
-		if (!numa_init(of_numa_init))
+		if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
+			return;
+		if (acpi_disabled && !numa_init(of_numa_init))
 			return;
 	}
 
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c4317879b938..5bb61de23201 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -180,6 +180,8 @@ ENTRY(__cpu_setup)
 	msr	cpacr_el1, x0			// Enable FP/ASIMD
 	mov	x0, #1 << 12			// Reset mdscr_el1 and disable
 	msr	mdscr_el1, x0			// access to the DCC from EL0
+	isb					// Unmask debug exceptions now,
+	enable_dbg				// since this is per-cpu
 	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
 	/*
 	 * Memory region attributes for LPAE:
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index aee5637ea436..7c16e547ccb2 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -1,7 +1,7 @@
 /*
  * BPF JIT compiler for ARM64
  *
- * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com>
+ * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -55,6 +55,7 @@
 #define A64_BL(imm26) A64_BRANCH((imm26) << 2, LINK)
 
 /* Unconditional branch (register) */
+#define A64_BR(Rn)  aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_NOLINK)
 #define A64_BLR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_LINK)
 #define A64_RET(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_RETURN)
 
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 49ba37e4bfc0..b2fc97a2c56c 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -18,6 +18,7 @@
 
 #define pr_fmt(fmt) "bpf_jit: " fmt
 
+#include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/printk.h>
 #include <linux/skbuff.h>
@@ -33,6 +34,7 @@ int bpf_jit_enable __read_mostly;
 
 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
+#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
 
 /* Map BPF registers to A64 registers */
 static const int bpf2a64[] = {
@@ -54,6 +56,8 @@ static const int bpf2a64[] = {
 	/* temporary registers for internal BPF JIT */
 	[TMP_REG_1] = A64_R(10),
 	[TMP_REG_2] = A64_R(11),
+	/* tail_call_cnt */
+	[TCALL_CNT] = A64_R(26),
 	/* temporary register for blinding constants */
 	[BPF_REG_AX] = A64_R(9),
 };
@@ -146,13 +150,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 
 #define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
 
-static void build_prologue(struct jit_ctx *ctx)
+#define PROLOGUE_OFFSET 8
+
+static int build_prologue(struct jit_ctx *ctx)
 {
 	const u8 r6 = bpf2a64[BPF_REG_6];
 	const u8 r7 = bpf2a64[BPF_REG_7];
 	const u8 r8 = bpf2a64[BPF_REG_8];
 	const u8 r9 = bpf2a64[BPF_REG_9];
 	const u8 fp = bpf2a64[BPF_REG_FP];
+	const u8 tcc = bpf2a64[TCALL_CNT];
+	const int idx0 = ctx->idx;
+	int cur_offset;
 
 	/*
 	 * BPF prog stack layout
@@ -162,8 +171,6 @@ static void build_prologue(struct jit_ctx *ctx)
 	 *                        |FP/LR|
 	 * current A64_FP =>  -16:+-----+
 	 *                        | ... | callee saved registers
-	 *                        +-----+
-	 *                        |     | x25/x26
 	 * BPF fp register => -64:+-----+ <= (BPF_FP)
 	 *                        |     |
 	 *                        | ... | BPF prog stack
@@ -183,18 +190,90 @@ static void build_prologue(struct jit_ctx *ctx)
 	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
 
-	/* Save callee-saved register */
+	/* Save callee-saved registers */
 	emit(A64_PUSH(r6, r7, A64_SP), ctx);
 	emit(A64_PUSH(r8, r9, A64_SP), ctx);
+	emit(A64_PUSH(fp, tcc, A64_SP), ctx);
 
-	/* Save fp (x25) and x26. SP requires 16 bytes alignment */
-	emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
-
-	/* Set up BPF prog stack base register (x25) */
+	/* Set up BPF prog stack base register */
 	emit(A64_MOV(1, fp, A64_SP), ctx);
 
+	/* Initialize tail_call_cnt */
+	emit(A64_MOVZ(1, tcc, 0, 0), ctx);
+
 	/* Set up function call stack */
 	emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
+
+	cur_offset = ctx->idx - idx0;
+	if (cur_offset != PROLOGUE_OFFSET) {
+		pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
+			    cur_offset, PROLOGUE_OFFSET);
+		return -1;
+	}
+	return 0;
+}
+
+static int out_offset = -1; /* initialized on the first pass of build_body() */
+static int emit_bpf_tail_call(struct jit_ctx *ctx)
+{
+	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
+	const u8 r2 = bpf2a64[BPF_REG_2];
+	const u8 r3 = bpf2a64[BPF_REG_3];
+
+	const u8 tmp = bpf2a64[TMP_REG_1];
+	const u8 prg = bpf2a64[TMP_REG_2];
+	const u8 tcc = bpf2a64[TCALL_CNT];
+	const int idx0 = ctx->idx;
+#define cur_offset (ctx->idx - idx0)
+#define jmp_offset (out_offset - (cur_offset))
+	size_t off;
+
+	/* if (index >= array->map.max_entries)
+	 *     goto out;
+	 */
+	off = offsetof(struct bpf_array, map.max_entries);
+	emit_a64_mov_i64(tmp, off, ctx);
+	emit(A64_LDR32(tmp, r2, tmp), ctx);
+	emit(A64_CMP(0, r3, tmp), ctx);
+	emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
+
+	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *     goto out;
+	 * tail_call_cnt++;
+	 */
+	emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
+	emit(A64_CMP(1, tcc, tmp), ctx);
+	emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
+	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
+
+	/* prog = array->ptrs[index];
+	 * if (prog == NULL)
+	 *     goto out;
+	 */
+	off = offsetof(struct bpf_array, ptrs);
+	emit_a64_mov_i64(tmp, off, ctx);
+	emit(A64_LDR64(tmp, r2, tmp), ctx);
+	emit(A64_LDR64(prg, tmp, r3), ctx);
+	emit(A64_CBZ(1, prg, jmp_offset), ctx);
+
+	/* goto *(prog->bpf_func + prologue_size); */
+	off = offsetof(struct bpf_prog, bpf_func);
+	emit_a64_mov_i64(tmp, off, ctx);
+	emit(A64_LDR64(tmp, prg, tmp), ctx);
+	emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
+	emit(A64_BR(tmp), ctx);
+
+	/* out: */
+	if (out_offset == -1)
+		out_offset = cur_offset;
+	if (cur_offset != out_offset) {
+		pr_err_once("tail_call out_offset = %d, expected %d!\n",
+			    cur_offset, out_offset);
+		return -1;
+	}
+	return 0;
+#undef cur_offset
+#undef jmp_offset
 }
 
 static void build_epilogue(struct jit_ctx *ctx)
@@ -499,13 +578,15 @@ emit_cond_jmp:
 		const u64 func = (u64)__bpf_call_base + imm;
 
 		emit_a64_mov_i64(tmp, func, ctx);
-		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
-		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
 		emit(A64_BLR(tmp), ctx);
 		emit(A64_MOV(1, r0, A64_R(0)), ctx);
-		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
 		break;
 	}
+	/* tail call */
+	case BPF_JMP | BPF_CALL | BPF_X:
+		if (emit_bpf_tail_call(ctx))
+			return -EFAULT;
+		break;
 	/* function return */
 	case BPF_JMP | BPF_EXIT:
 		/* Optimization: when last instruction is EXIT,
@@ -650,11 +731,8 @@ emit_cond_jmp:
 		emit_a64_mov_i64(r3, size, ctx);
 		emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx);
 		emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
-		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
-		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
 		emit(A64_BLR(r5), ctx);
 		emit(A64_MOV(1, r0, A64_R(0)), ctx);
-		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
 
 		jmp_offset = epilogue_offset(ctx);
 		check_imm19(jmp_offset);
@@ -780,7 +858,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		goto out_off;
 	}
 
-	build_prologue(&ctx);
+	if (build_prologue(&ctx)) {
+		prog = orig_prog;
+		goto out_off;
+	}
 
 	ctx.epilogue_offset = ctx.idx;
 	build_epilogue(&ctx);
diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile
index 74a8d87e542b..8ff8aa9c6228 100644
--- a/arch/arm64/xen/Makefile
+++ b/arch/arm64/xen/Makefile
@@ -1,2 +1,3 @@
 xen-arm-y	+= $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o)
 obj-y		:= xen-arm.o hypercall.o
+obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o)
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 70df80e8da2c..329c8027b0a9 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -82,6 +82,7 @@ HYPERCALL3(vcpu_op);
 HYPERCALL1(tmem_op);
 HYPERCALL1(platform_op_raw);
 HYPERCALL2(multicall);
+HYPERCALL2(vm_assist);
 
 ENTRY(privcmd_call)
 	mov x16, x0
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index d74fd8ce980a..3d5ce38a6f0b 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -41,21 +41,49 @@ static inline int __atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
+#define ATOMIC_FETCH_OP(op, asm_op, asm_con)				\
+static inline int __atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	int result, val;						\
+									\
+	asm volatile(							\
+		"/* atomic_fetch_" #op " */\n"				\
+		"1:	ssrf	5\n"					\
+		"	ld.w	%0, %3\n"				\
+		"	mov	%1, %0\n"				\
+		"	" #asm_op "	%1, %4\n"			\
+		"	stcond	%2, %1\n"				\
+		"	brne	1b"					\
+		: "=&r" (result), "=&r" (val), "=o" (v->counter)	\
+		: "m" (v->counter), #asm_con (i)			\
+		: "cc");						\
+									\
+	return result;							\
+}
+
 ATOMIC_OP_RETURN(sub, sub, rKs21)
 ATOMIC_OP_RETURN(add, add, r)
+ATOMIC_FETCH_OP (sub, sub, rKs21)
+ATOMIC_FETCH_OP (add, add, r)
 
-#define ATOMIC_OP(op, asm_op)						\
+#define ATOMIC_OPS(op, asm_op)						\
 ATOMIC_OP_RETURN(op, asm_op, r)						\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
 	(void)__atomic_##op##_return(i, v);				\
+}									\
+ATOMIC_FETCH_OP(op, asm_op, r)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	return __atomic_fetch_##op(i, v);				\
 }
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, eor)
 
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
 /*
@@ -87,6 +115,14 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	return __atomic_add_return(i, v);
 }
 
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	if (IS_21BIT_CONST(i))
+		return __atomic_fetch_sub(-i, v);
+
+	return __atomic_fetch_add(i, v);
+}
+
 /*
  * atomic_sub_return - subtract the atomic variable
  * @i: integer value to subtract
@@ -102,6 +138,14 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	return __atomic_add_return(-i, v);
 }
 
+static inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+	if (IS_21BIT_CONST(i))
+		return __atomic_fetch_sub(i, v);
+
+	return __atomic_fetch_add(-i, v);
+}
+
 /*
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h
index 1aba19d68c5e..db039cb368be 100644
--- a/arch/avr32/include/asm/pgalloc.h
+++ b/arch/avr32/include/asm/pgalloc.h
@@ -43,7 +43,7 @@ static inline void pgd_ctor(void *x)
  */
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor);
+	return quicklist_alloc(QUICK_PGD, GFP_KERNEL, pgd_ctor);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -63,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 	struct page *page;
 	void *pg;
 
-	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 	if (!pg)
 		return NULL;
 
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index c03533937a9f..a4b7edac8f10 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -134,7 +134,7 @@ good_area:
 	 * sure we exit gracefully rather than endlessly redo the
 	 * fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 1c1c42330c99..63c7deceeeb6 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -17,6 +17,7 @@
 
 asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr);
 asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value);
 
 asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value);
 asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value);
@@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
 #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
 
+#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i)
+#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i))
+
 #define atomic_or(i, v)  (void)__raw_atomic_or_asm(&(v)->counter, i)
 #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
 #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
 
+#define atomic_fetch_or(i, v)  __raw_atomic_or_asm(&(v)->counter, i)
+#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i)
+#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i)
+
 #endif
 
 #include <asm-generic/atomic.h>
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 490c7caa02d9..c58f4a83ed6f 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -12,6 +12,8 @@
 #else
 
 #include <linux/atomic.h>
+#include <asm/processor.h>
+#include <asm/barrier.h>
 
 asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr);
 asmlinkage void __raw_spin_lock_asm(volatile int *ptr);
@@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
-	while (arch_spin_is_locked(lock))
-		cpu_relax();
+	smp_cond_load_acquire(&lock->lock, !VAL);
 }
 
 static inline int arch_read_can_lock(arch_rwlock_t *rw)
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index a401c27b69b4..68096e8f787f 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16);
 
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(__raw_atomic_add_asm);
+EXPORT_SYMBOL(__raw_atomic_xadd_asm);
 EXPORT_SYMBOL(__raw_atomic_and_asm);
 EXPORT_SYMBOL(__raw_atomic_or_asm);
 EXPORT_SYMBOL(__raw_atomic_xor_asm);
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S
index 26fccb5568b9..1e2989c5d6b2 100644
--- a/arch/blackfin/mach-bf561/atomic.S
+++ b/arch/blackfin/mach-bf561/atomic.S
@@ -605,6 +605,28 @@ ENTRY(___raw_atomic_add_asm)
 	rts;
 ENDPROC(___raw_atomic_add_asm)
 
+/*
+ * r0 = ptr
+ * r1 = value
+ *
+ * ADD a signed value to a 32bit word and return the old value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_xadd_asm)
+	p1 = r0;
+	r3 = r1;
+	[--sp] = rets;
+	call _get_core_lock;
+	r3 = [p1];
+	r2 = r3 + r2;
+	[p1] = r2;
+	r1 = p1;
+	call _put_core_lock;
+	r0 = r3;
+	rets = [sp++];
+	rts;
+ENDPROC(___raw_atomic_add_asm)
+
 /*
  * r0 = ptr
  * r1 = mask
@@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm)
 	r3 = r1;
 	[--sp] = rets;
 	call _get_core_lock;
-	r2 = [p1];
-	r3 = r2 & r3;
-	[p1] = r3;
-	r3 = r2;
+	r3 = [p1];
+	r2 = r2 & r3;
+	[p1] = r2;
 	r1 = p1;
 	call _put_core_lock;
 	r0 = r3;
@@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm)
 	r3 = r1;
 	[--sp] = rets;
 	call _get_core_lock;
-	r2 = [p1];
-	r3 = r2 | r3;
-	[p1] = r3;
-	r3 = r2;
+	r3 = [p1];
+	r2 = r2 | r3;
+	[p1] = r2;
 	r1 = p1;
 	call _put_core_lock;
 	r0 = r3;
@@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm)
 	r3 = r1;
 	[--sp] = rets;
 	call _get_core_lock;
-	r2 = [p1];
-	r3 = r2 ^ r3;
-	[p1] = r3;
-	r3 = r2;
+	r3 = [p1];
+	r2 = r2 ^ r3;
+	[p1] = r2;
 	r1 = p1;
 	call _put_core_lock;
 	r0 = r3;
diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c
index aad5d7416886..9231e5a72b93 100644
--- a/arch/blackfin/mach-bf609/boards/ezkit.c
+++ b/arch/blackfin/mach-bf609/boards/ezkit.c
@@ -1002,14 +1002,12 @@ static struct adv7842_output_format adv7842_opf[] = {
 	{
 		.op_ch_sel = ADV7842_OP_CH_SEL_BRG,
 		.op_format_sel = ADV7842_OP_FORMAT_SEL_SDR_ITU656_8,
-		.op_656_range = 1,
 		.blank_data = 1,
 		.insert_av_codes = 1,
 	},
 	{
 		.op_ch_sel = ADV7842_OP_CH_SEL_RGB,
 		.op_format_sel = ADV7842_OP_FORMAT_SEL_SDR_ITU656_16,
-		.op_656_range = 1,
 		.blank_data = 1,
 	},
 };
diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h
index 235ece437ddd..42f1affb9c2d 100644
--- a/arch/cris/include/asm/pgalloc.h
+++ b/arch/cris/include/asm/pgalloc.h
@@ -24,14 +24,14 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-  	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
  	return pte;
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 3066d40a6db1..112ef26c7f2e 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -168,7 +168,7 @@ retry:
 	 * the fault.
 	 */
 
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 64f02d451aa8..1c2a5e264fc7 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return atomic_add_return(i, v) < 0;
 }
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-	atomic_add_return(i, v);
-}
-
-static inline void atomic_sub(int i, atomic_t *v)
-{
-	atomic_sub_return(i, v);
-}
-
 static inline void atomic_inc(atomic_t *v)
 {
 	atomic_inc_return(v);
@@ -136,16 +126,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v)
 	return atomic64_add_return(i, v) < 0;
 }
 
-static inline void atomic64_add(long long i, atomic64_t *v)
-{
-	atomic64_add_return(i, v);
-}
-
-static inline void atomic64_sub(long long i, atomic64_t *v)
-{
-	atomic64_sub_return(i, v);
-}
-
 static inline void atomic64_inc(atomic64_t *v)
 {
 	atomic64_inc_return(v);
@@ -182,11 +162,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 }
 
 #define ATOMIC_OP(op)							\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	return __atomic32_fetch_##op(i, &v->counter);			\
+}									\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
 	(void)__atomic32_fetch_##op(i, &v->counter);			\
 }									\
 									\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
+{									\
+	return __atomic64_fetch_##op(i, &v->counter);			\
+}									\
 static inline void atomic64_##op(long long i, atomic64_t *v)		\
 {									\
 	(void)__atomic64_fetch_##op(i, &v->counter);			\
@@ -195,6 +183,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v)		\
 ATOMIC_OP(or)
 ATOMIC_OP(and)
 ATOMIC_OP(xor)
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
 
 #undef ATOMIC_OP
 
diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h
index 36e126d2f801..d4912c88b829 100644
--- a/arch/frv/include/asm/atomic_defs.h
+++ b/arch/frv/include/asm/atomic_defs.h
@@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op);
 ATOMIC_FETCH_OP(or)
 ATOMIC_FETCH_OP(and)
 ATOMIC_FETCH_OP(xor)
+ATOMIC_FETCH_OP(add)
+ATOMIC_FETCH_OP(sub)
 
 ATOMIC_OP_RETURN(add)
 ATOMIC_OP_RETURN(sub)
diff --git a/arch/frv/include/asm/serial.h b/arch/frv/include/asm/serial.h
index bce0d0d07e60..614c6d76789a 100644
--- a/arch/frv/include/asm/serial.h
+++ b/arch/frv/include/asm/serial.h
@@ -12,7 +12,3 @@
  * the base baud is derived from the clock speed and so is variable
  */
 #define BASE_BAUD 0
-
-#define STD_COM_FLAGS		UPF_BOOT_AUTOCONF
-
-#define SERIAL_PORT_DFNS
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
index 61d99767fe16..614a46c413d2 100644
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -164,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, ear0, flags);
+	fault = handle_mm_fault(vma, ear0, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 41907d25ed38..c9ed14f6c67d 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -22,7 +22,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE)));
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (pte)
 		clear_page(pte);
 	return pte;
@@ -33,9 +33,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *page;
 
 #ifdef CONFIG_HIGHPTE
-	page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+	page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0);
 #else
-	page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	page = alloc_pages(GFP_KERNEL, 0);
 #endif
 	if (!page)
 		return NULL;
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 4435a445ae7e..349a47a918db 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v)	\
 	return ret;						\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)		\
+{								\
+	h8300flags flags;					\
+	int ret;						\
+								\
+	flags = arch_local_irq_save();				\
+	ret = v->counter;					\
+	v->counter c_op i;					\
+	arch_local_irq_restore(flags);				\
+	return ret;						\
+}
+
 #define ATOMIC_OP(op, c_op)					\
 static inline void atomic_##op(int i, atomic_t *v)		\
 {								\
@@ -41,17 +54,21 @@ static inline void atomic_##op(int i, atomic_t *v)		\
 ATOMIC_OP_RETURN(add, +=)
 ATOMIC_OP_RETURN(sub, -=)
 
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or,  |=)
-ATOMIC_OP(xor, ^=)
+#define ATOMIC_OPS(op, c_op)					\
+	ATOMIC_OP(op, c_op)					\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or,  |=)
+ATOMIC_OPS(xor, ^=)
+ATOMIC_OPS(add, +=)
+ATOMIC_OPS(sub, -=)
 
+#undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_add(i, v)		(void)atomic_add_return(i, v)
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-#define atomic_sub(i, v)		(void)atomic_sub_return(i, v)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return(i, v) == 0)
 
 #define atomic_inc_return(v)		atomic_add_return(1, v)
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 57298e7b4867..1941e4baaee6 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -8,8 +8,7 @@ config HEXAGON
 	# select HAVE_REGS_AND_STACK_ACCESS_API
 	# select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	# select ARCH_HAS_CPU_IDLE_WAIT
-	# select ARCH_WANT_OPTIONAL_GPIOLIB
-	# select ARCH_REQUIRE_GPIOLIB
+	# select GPIOLIB
 	# select HAVE_CLK
 	# select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_ATOMIC64
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 55696c4100d4..a62ba368b27d 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 	);								\
 }									\
 
-#define ATOMIC_OP_RETURN(op)							\
+#define ATOMIC_OP_RETURN(op)						\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	int output;							\
@@ -127,16 +127,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return output;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int output, val;						\
+									\
+	__asm__ __volatile__ (						\
+		"1:	%0 = memw_locked(%2);\n"			\
+		"	%1 = "#op "(%0,%3);\n"				\
+		"	memw_locked(%2,P3)=%1;\n"			\
+		"	if !P3 jump 1b;\n"				\
+		: "=&r" (output), "=&r" (val)				\
+		: "r" (&v->counter), "r" (i)				\
+		: "memory", "p3"					\
+	);								\
+	return output;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 77da3b0ae3c2..eeebf862c46c 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -64,7 +64,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	pte = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
@@ -78,7 +78,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	gfp_t flags =  GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
 }
 
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index 12ca4ebc0338..a1c55788c5d6 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -23,6 +23,8 @@
 #define _ASM_SPINLOCK_H
 
 #include <asm/irqflags.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * This file is pulled in for SMP builds.
@@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
  * SMP spinlocks are intended to allow only a single CPU at the lock
  */
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(lock) \
-	do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
+
 #define arch_spin_is_locked(x) ((x)->lock != 0)
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 8704c9320032..bd7c251e2bce 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -101,7 +101,7 @@ good_area:
 		break;
 	}
 
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index f80758cb7157..6a15083cc366 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -39,13 +39,12 @@ config IA64
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_LEGACY
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_IOMAP
 	select GENERIC_SMP_IDLE_THREAD
 	select ARCH_INIT_TASK
 	select ARCH_TASK_STRUCT_ALLOCATOR
-	select ARCH_THREAD_INFO_ALLOCATOR
+	select ARCH_THREAD_STACK_ALLOCATOR
 	select ARCH_CLOCKSOURCE_DATA
 	select GENERIC_TIME_VSYSCALL_OLD
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index aa0fdf125aba..a3d0211970e9 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -140,6 +140,9 @@ static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
 		}
 	}
 }
+
+extern void acpi_numa_fixup(void);
+
 #endif /* CONFIG_ACPI_NUMA */
 
 #endif /*__KERNEL__*/
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 8dfb5f6f6c35..f565ad376142 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v)					\
 	return new;							\
 }
 
-ATOMIC_OP(add, +)
-ATOMIC_OP(sub, -)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static __inline__ int							\
+ia64_atomic_fetch_##op (int i, atomic_t *v)				\
+{									\
+	__s32 old, new;							\
+	CMPXCHG_BUGCHECK_DECL						\
+									\
+	do {								\
+		CMPXCHG_BUGCHECK(v);					\
+		old = atomic_read(v);					\
+		new = old c_op i;					\
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \
+	return old;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(add, +)
+ATOMIC_OPS(sub, -)
 
 #define atomic_add_return(i,v)						\
 ({									\
@@ -69,14 +88,44 @@ ATOMIC_OP(sub, -)
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
 
-ATOMIC_OP(and, &)
-ATOMIC_OP(or, |)
-ATOMIC_OP(xor, ^)
+#define atomic_fetch_add(i,v)						\
+({									\
+	int __ia64_aar_i = (i);						\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\
+		: ia64_atomic_fetch_add(__ia64_aar_i, v);		\
+})
+
+#define atomic_fetch_sub(i,v)						\
+({									\
+	int __ia64_asr_i = (i);						\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\
+		: ia64_atomic_fetch_sub(__ia64_asr_i, v);		\
+})
+
+ATOMIC_FETCH_OP(and, &)
+ATOMIC_FETCH_OP(or, |)
+ATOMIC_FETCH_OP(xor, ^)
+
+#define atomic_and(i,v)	(void)ia64_atomic_fetch_and(i,v)
+#define atomic_or(i,v)	(void)ia64_atomic_fetch_or(i,v)
+#define atomic_xor(i,v)	(void)ia64_atomic_fetch_xor(i,v)
 
-#define atomic_and(i,v)	(void)ia64_atomic_and(i,v)
-#define atomic_or(i,v)	(void)ia64_atomic_or(i,v)
-#define atomic_xor(i,v)	(void)ia64_atomic_xor(i,v)
+#define atomic_fetch_and(i,v)	ia64_atomic_fetch_and(i,v)
+#define atomic_fetch_or(i,v)	ia64_atomic_fetch_or(i,v)
+#define atomic_fetch_xor(i,v)	ia64_atomic_fetch_xor(i,v)
 
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP
 
 #define ATOMIC64_OP(op, c_op)						\
@@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v)				\
 	return new;							\
 }
 
-ATOMIC64_OP(add, +)
-ATOMIC64_OP(sub, -)
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+static __inline__ long							\
+ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v)			\
+{									\
+	__s64 old, new;							\
+	CMPXCHG_BUGCHECK_DECL						\
+									\
+	do {								\
+		CMPXCHG_BUGCHECK(v);					\
+		old = atomic64_read(v);					\
+		new = old c_op i;					\
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \
+	return old;							\
+}
+
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(add, +)
+ATOMIC64_OPS(sub, -)
 
 #define atomic64_add_return(i,v)					\
 ({									\
@@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -)
 		: ia64_atomic64_sub(__ia64_asr_i, v);			\
 })
 
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define atomic64_fetch_add(i,v)						\
+({									\
+	long __ia64_aar_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\
+		: ia64_atomic64_fetch_add(__ia64_aar_i, v);		\
+})
+
+#define atomic64_fetch_sub(i,v)						\
+({									\
+	long __ia64_asr_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\
+		: ia64_atomic64_fetch_sub(__ia64_asr_i, v);		\
+})
+
+ATOMIC64_FETCH_OP(and, &)
+ATOMIC64_FETCH_OP(or, |)
+ATOMIC64_FETCH_OP(xor, ^)
+
+#define atomic64_and(i,v)	(void)ia64_atomic64_fetch_and(i,v)
+#define atomic64_or(i,v)	(void)ia64_atomic64_fetch_or(i,v)
+#define atomic64_xor(i,v)	(void)ia64_atomic64_fetch_xor(i,v)
 
-#define atomic64_and(i,v)	(void)ia64_atomic64_and(i,v)
-#define atomic64_or(i,v)	(void)ia64_atomic64_or(i,v)
-#define atomic64_xor(i,v)	(void)ia64_atomic64_xor(i,v)
+#define atomic64_fetch_and(i,v)	ia64_atomic64_fetch_and(i,v)
+#define atomic64_fetch_or(i,v)	ia64_atomic64_fetch_or(i,v)
+#define atomic64_fetch_xor(i,v)	ia64_atomic64_fetch_xor(i,v)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index f41e66d65e31..28cb819e0ff9 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	if (cmpxchg_acq(count, 1, 0) == 1)
+	if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
 		return 1;
 	return 0;
 }
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 8b23e070b844..8fa98dd303b4 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -40,7 +40,7 @@
 static inline void
 __down_read (struct rw_semaphore *sem)
 {
-	long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
+	long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
 
 	if (result < 0)
 		rwsem_down_read_failed(sem);
@@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem)
 	long old, new;
 
 	do {
-		old = sem->count;
+		old = atomic_long_read(&sem->count);
 		new = old + RWSEM_ACTIVE_WRITE_BIAS;
-	} while (cmpxchg_acq(&sem->count, old, new) != old);
+	} while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
 
 	return old;
 }
@@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem)
 static inline void
 __up_read (struct rw_semaphore *sem)
 {
-	long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
+	long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
 
 	if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
 		rwsem_wake(sem);
@@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem)
 	long old, new;
 
 	do {
-		old = sem->count;
+		old = atomic_long_read(&sem->count);
 		new = old - RWSEM_ACTIVE_WRITE_BIAS;
-	} while (cmpxchg_rel(&sem->count, old, new) != old);
+	} while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
 
 	if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
 		rwsem_wake(sem);
@@ -115,8 +115,8 @@ static inline int
 __down_read_trylock (struct rw_semaphore *sem)
 {
 	long tmp;
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+	while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+		if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
 			return 1;
 		}
 	}
@@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem)
 static inline int
 __down_write_trylock (struct rw_semaphore *sem)
 {
-	long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
-			      RWSEM_ACTIVE_WRITE_BIAS);
+	long tmp = atomic_long_cmpxchg_acquire(&sem->count,
+			RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
 	return tmp == RWSEM_UNLOCKED_VALUE;
 }
 
@@ -143,19 +143,12 @@ __downgrade_write (struct rw_semaphore *sem)
 	long old, new;
 
 	do {
-		old = sem->count;
+		old = atomic_long_read(&sem->count);
 		new = old - RWSEM_WAITING_BIAS;
-	} while (cmpxchg_rel(&sem->count, old, new) != old);
+	} while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
 
 	if (old < 0)
 		rwsem_downgrade_wake(sem);
 }
 
-/*
- * Implement atomic add functionality.  These used to be "inline" functions, but GCC v3.1
- * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
- */
-#define rwsem_atomic_add(delta, sem)	atomic64_add(delta, (atomic64_t *)(&(sem)->count))
-#define rwsem_atomic_update(delta, sem)	atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-
 #endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 45698cd15b7b..ca9e76149a4a 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -15,6 +15,8 @@
 
 #include <linux/atomic.h>
 #include <asm/intrinsics.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 #define arch_spin_lock_init(x)			((x)->lock = 0)
 
@@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
 			return;
 		cpu_relax();
 	}
+
+	smp_acquire__after_ctrl_dep();
 }
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index aa995b67c3f5..d1212b84fb83 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -48,15 +48,15 @@ struct thread_info {
 #ifndef ASM_OFFSETS_C
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_info_node(tsk, node)	\
-		((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#define alloc_thread_stack_node(tsk, node)	\
+		((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
 #define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #else
 #define current_thread_info()	((struct thread_info *) 0)
-#define alloc_thread_info_node(tsk, node)	((struct thread_info *) 0)
+#define alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
 #define task_thread_info(tsk)	((struct thread_info *) 0)
 #endif
-#define free_thread_info(ti)	/* nothing */
+#define free_thread_stack(ti)	/* nothing */
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 39d64e0df1de..77e541cf0e5d 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -205,17 +205,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
  * must be delayed until after the TLB has been flushed (see comments at the beginning of
  * this file).
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+	if (tlb->nr == tlb->max)
+		return true;
+
 	tlb->need_flush = 1;
 
 	if (!tlb->nr && tlb->pages == tlb->local)
 		__tlb_alloc_page(tlb);
 
 	tlb->pages[tlb->nr++] = page;
-	VM_BUG_ON(tlb->nr > tlb->max);
-
-	return tlb->max - tlb->nr;
+	return false;
 }
 
 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
@@ -235,8 +236,28 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (__tlb_remove_page(tlb, page)) {
 		tlb_flush_mmu(tlb);
+		__tlb_remove_page(tlb, page);
+	}
+}
+
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+					  struct page *page, int page_size)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
+					 struct page *page)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline void tlb_remove_page_size(struct mmu_gather *tlb,
+					struct page *page, int page_size)
+{
+	return tlb_remove_page(tlb, page);
 }
 
 /*
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index b1698bc042c8..92b7bc956795 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -524,7 +524,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	return 0;
 }
 
-void __init acpi_numa_arch_fixup(void)
+void __init acpi_numa_fixup(void)
 {
 	int i, j, node_from, node_to;
 
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index f9efe9739d3f..0eaa89f3defd 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
  * handled. This is done by having a special ".data..init_task" section...
  */
 #define init_thread_info	init_task_mem.s.thread_info
+#define init_stack		init_task_mem.stack
 
 union {
 	struct {
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 2029a38a72ae..afddb3e80a29 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -552,6 +552,7 @@ setup_arch (char **cmdline_p)
 	early_acpi_boot_init();
 # ifdef CONFIG_ACPI_NUMA
 	acpi_numa_init();
+	acpi_numa_fixup();
 #  ifdef CONFIG_ACPI_HOTPLUG_CPU
 	prefill_possible_map();
 #  endif
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 70b40d1205a6..fa6ad95e992e 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -159,7 +159,7 @@ retry:
 	 * sure we exit gracefully rather than endlessly redo the
 	 * fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/m32r/boot/compressed/m32r_sio.c b/arch/m32r/boot/compressed/m32r_sio.c
index 01d877c6868f..cf3023dced49 100644
--- a/arch/m32r/boot/compressed/m32r_sio.c
+++ b/arch/m32r/boot/compressed/m32r_sio.c
@@ -8,12 +8,13 @@
 
 #include <asm/processor.h>
 
-static void putc(char c);
+static void m32r_putc(char c);
 
 static int puts(const char *s)
 {
 	char c;
-	while ((c = *s++)) putc(c);
+	while ((c = *s++))
+		m32r_putc(c);
 	return 0;
 }
 
@@ -41,7 +42,7 @@ static int puts(const char *s)
 #define BOOT_SIO0TXB	PLD_ESIO0TXB
 #endif
 
-static void putc(char c)
+static void m32r_putc(char c)
 {
 	while ((*BOOT_SIO0STS & 0x3) != 0x3)
 		cpu_relax();
@@ -61,7 +62,7 @@ static void putc(char c)
 #define SIO0TXB	(volatile unsigned short *)(0x00efd000 + 30)
 #endif
 
-static void putc(char c)
+static void m32r_putc(char c)
 {
 	while ((*SIO0STS & 0x1) == 0)
 		cpu_relax();
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index ea35160d632b..640cc1c7099f 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -89,16 +89,44 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	int result, val;						\
+									\
+	local_irq_save(flags);						\
+	__asm__ __volatile__ (						\
+		"# atomic_fetch_" #op "		\n\t"			\
+		DCACHE_CLEAR("%0", "r4", "%2")				\
+		M32R_LOCK" %1, @%2;		\n\t"			\
+		"mv %0, %1			\n\t" 			\
+		#op " %1, %3;			\n\t"			\
+		M32R_UNLOCK" %1, @%2;		\n\t"			\
+		: "=&r" (result), "=&r" (val)				\
+		: "r" (&v->counter), "r" (i)				\
+		: "memory"						\
+		__ATOMIC_CLOBBER					\
+	);								\
+	local_irq_restore(flags);					\
+									\
+	return result;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index fa13694eaae3..323c7fc953cd 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -13,6 +13,8 @@
 #include <linux/atomic.h>
 #include <asm/dcache_clear.h>
 #include <asm/page.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -27,8 +29,11 @@
 
 #define arch_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, VAL > 0);
+}
 
 /**
  * arch_spin_trylock - Try spin lock and return a result
diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c
index b727e693c805..23f26f4adfff 100644
--- a/arch/m32r/kernel/m32r_ksyms.c
+++ b/arch/m32r/kernel/m32r_ksyms.c
@@ -41,6 +41,9 @@ EXPORT_SYMBOL(cpu_data);
 EXPORT_SYMBOL(smp_flush_tlb_page);
 #endif
 
+extern int __ucmpdi2(unsigned long long a, unsigned long long b);
+EXPORT_SYMBOL(__ucmpdi2);
+
 /* compiler generated symbol */
 extern void __ashldi3(void);
 extern void __ashrdi3(void);
diff --git a/arch/m32r/lib/Makefile b/arch/m32r/lib/Makefile
index d16b4e40d1ae..5889eb9610b5 100644
--- a/arch/m32r/lib/Makefile
+++ b/arch/m32r/lib/Makefile
@@ -3,5 +3,5 @@
 #
 
 lib-y  := checksum.o ashxdi3.o memset.o memcpy.o \
-	  delay.o strlen.o usercopy.o csum_partial_copy.o
-
+	  delay.o strlen.o usercopy.o csum_partial_copy.o \
+	  ucmpdi2.o
diff --git a/arch/m32r/lib/libgcc.h b/arch/m32r/lib/libgcc.h
new file mode 100644
index 000000000000..267aa435bc35
--- /dev/null
+++ b/arch/m32r/lib/libgcc.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_LIBGCC_H
+#define __ASM_LIBGCC_H
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN
+struct DWstruct {
+	int high, low;
+};
+#elif defined(__LITTLE_ENDIAN)
+struct DWstruct {
+	int low, high;
+};
+#else
+#error I feel sick.
+#endif
+
+typedef union {
+	struct DWstruct s;
+	long long ll;
+} DWunion;
+
+#endif /* __ASM_LIBGCC_H */
diff --git a/arch/m32r/lib/ucmpdi2.c b/arch/m32r/lib/ucmpdi2.c
new file mode 100644
index 000000000000..9d3c682c89b5
--- /dev/null
+++ b/arch/m32r/lib/ucmpdi2.c
@@ -0,0 +1,17 @@
+#include "libgcc.h"
+
+int __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+	const DWunion au = {.ll = a};
+	const DWunion bu = {.ll = b};
+
+	if ((unsigned int)au.s.high < (unsigned int)bu.s.high)
+		return 0;
+	else if ((unsigned int)au.s.high > (unsigned int)bu.s.high)
+		return 2;
+	if ((unsigned int)au.s.low < (unsigned int)bu.s.low)
+		return 0;
+	else if ((unsigned int)au.s.low > (unsigned int)bu.s.low)
+		return 2;
+	return 1;
+}
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index 8f9875b7933d..a3785d3644c2 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -196,7 +196,7 @@ good_area:
 	 */
 	addr = (address & PAGE_MASK);
 	set_thread_fault_code(error_code);
-	fault = handle_mm_fault(mm, vma, addr, flags);
+	fault = handle_mm_fault(vma, addr, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/m68k/coldfire/head.S b/arch/m68k/coldfire/head.S
index fa31be297b85..73d92ea0ce65 100644
--- a/arch/m68k/coldfire/head.S
+++ b/arch/m68k/coldfire/head.S
@@ -288,7 +288,7 @@ _clear_bss:
 #endif
 
 	/*
-	 *	Assember start up done, start code proper.
+	 *	Assembler start up done, start code proper.
 	 */
 	jsr	start_kernel			/* start Linux kernel */
 
diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c
index c525e4c08f84..9abb1a441da0 100644
--- a/arch/m68k/coldfire/m5272.c
+++ b/arch/m68k/coldfire/m5272.c
@@ -111,7 +111,7 @@ void __init config_BSP(char *commandp, int size)
 /***************************************************************************/
 
 /*
- * Some 5272 based boards have the FEC ethernet diectly connected to
+ * Some 5272 based boards have the FEC ethernet directly connected to
  * an ethernet switch. In this case we need to use the fixed phy type,
  * and we need to declare it early in boot.
  */
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index 821de928dc3f..6a640be48568 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -42,7 +42,7 @@ static unsigned long iospace;
 
 /*
  * We need to be carefull probing on bus 0 (directly connected to host
- * bridge). We should only acccess the well defined possible devices in
+ * bridge). We should only access the well defined possible devices in
  * use, ignore aliases and the like.
  */
 static unsigned char mcf_host_slot2sid[32] = {
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 3ee6976f6088..8f5b6f7dd136 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -359,6 +360,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -553,7 +555,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index e96787ffcbce..31bded9c83d4 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -341,6 +342,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -512,7 +514,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 083fe6beac14..0d7739e04ae2 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -350,6 +351,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -533,7 +535,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 475130c06dcb..2cbb5c465fec 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -340,6 +341,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 4339658c200f..96102a42c156 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -341,6 +342,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -514,7 +516,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 831cc8c3a2e2..97d88f7dc5a7 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -357,6 +358,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -536,7 +538,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 6377afeb522b..be25ef208f0f 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -390,6 +391,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -616,7 +618,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 4304b3d56262..a008344360c9 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -339,6 +340,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 074bda4094ff..6735a25f36d4 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -340,6 +341,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 07b9fa8d7f2e..780c6e9f6cf9 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -346,6 +347,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -527,7 +529,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 36e6fae02d45..44693cf361e5 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -337,6 +338,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 903acf929511..ef0071d61158 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -337,6 +338,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S
index 78cb60f5bb4d..9bbffebe3eb5 100644
--- a/arch/m68k/ifpsp060/src/fpsp.S
+++ b/arch/m68k/ifpsp060/src/fpsp.S
@@ -10191,7 +10191,7 @@ xdnrm_con:
 xdnrm_sd:
 	mov.l		%a1,-(%sp)
 	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
-	smi.b		%d1			# set d0 accodingly
+	smi.b		%d1			# set d0 accordingly
 	bsr.l		unf_sub
 	mov.l		(%sp)+,%a1
 xdnrm_exit:
@@ -10990,7 +10990,7 @@ src_qnan_m:
 # routines where an instruction is selected by an index into
 # a large jump table corresponding to a given instruction which
 # has been decoded. Flow continues here where we now decode
-# further accoding to the source operand type.
+# further according to the source operand type.
 #
 
 	global		fsinh
@@ -23196,14 +23196,14 @@ m_sign:
 #
 #  1. Branch on the sign of the adjusted exponent.
 #  2p.(positive exp)
-#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
+#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
 #   3. Add one for each zero encountered until a non-zero digit.
 #   4. Subtract the count from the exp.
 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
 #	   and set SE.
 #	6. Multiply the mantissa by 10**count.
 #  2n.(negative exp)
-#   2. Check the digits in lwords 3 and 2 in decending order.
+#   2. Check the digits in lwords 3 and 2 in descending order.
 #   3. Add one for each zero encountered until a non-zero digit.
 #   4. Add the count to the exp.
 #   5. Check if the exp has crossed zero in #3 above; clear SE.
diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S
index 4aedef973cf6..3535e6c87eec 100644
--- a/arch/m68k/ifpsp060/src/pfpsp.S
+++ b/arch/m68k/ifpsp060/src/pfpsp.S
@@ -13156,14 +13156,14 @@ m_sign:
 #
 #  1. Branch on the sign of the adjusted exponent.
 #  2p.(positive exp)
-#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
+#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
 #   3. Add one for each zero encountered until a non-zero digit.
 #   4. Subtract the count from the exp.
 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
 #	   and set SE.
 #	6. Multiply the mantissa by 10**count.
 #  2n.(negative exp)
-#   2. Check the digits in lwords 3 and 2 in decending order.
+#   2. Check the digits in lwords 3 and 2 in descending order.
 #   3. Add one for each zero encountered until a non-zero digit.
 #   4. Add the count to the exp.
 #   5. Check if the exp has crossed zero in #3 above; clear SE.
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 4858178260f9..cf4c3a7b1a45 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -53,6 +53,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return t;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int t, tmp;							\
+									\
+	__asm__ __volatile__(						\
+			"1:	movel %2,%1\n"				\
+			"	" #asm_op "l %3,%1\n"			\
+			"	casl %2,%1,%0\n"			\
+			"	jne 1b"					\
+			: "+m" (*v), "=&d" (t), "=&d" (tmp)		\
+			: "g" (i), "2" (atomic_read(v)));		\
+	return tmp;							\
+}
+
 #else
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
@@ -68,20 +83,41 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	return t;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+{									\
+	unsigned long flags;						\
+	int t;								\
+									\
+	local_irq_save(flags);						\
+	t = v->counter;							\
+	v->counter c_op i;						\
+	local_irq_restore(flags);					\
+									\
+	return t;							\
+}
+
 #endif /* CONFIG_RMW_INSNS */
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, eor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/m68k/include/asm/dma.h b/arch/m68k/include/asm/dma.h
index 429fe26e320c..208b4daa14b3 100644
--- a/arch/m68k/include/asm/dma.h
+++ b/arch/m68k/include/asm/dma.h
@@ -18,7 +18,7 @@
  * AUG/22/2000 : added support for 32-bit Dual-Address-Mode (K) 2000
  *               Oliver Kamphenkel (O.Kamphenkel@tu-bs.de)
  *
- * AUG/25/2000 : addad support for 8, 16 and 32-bit Single-Address-Mode (K)2000
+ * AUG/25/2000 : added support for 8, 16 and 32-bit Single-Address-Mode (K)2000
  *               Oliver Kamphenkel (O.Kamphenkel@tu-bs.de)
  *
  * APR/18/2002 : added proper support for MCF5272 DMA controller.
diff --git a/arch/m68k/include/asm/m525xsim.h b/arch/m68k/include/asm/m525xsim.h
index f186459072e9..699f20c8a0fe 100644
--- a/arch/m68k/include/asm/m525xsim.h
+++ b/arch/m68k/include/asm/m525xsim.h
@@ -123,10 +123,10 @@
 /*
  *	I2C module.
  */
-#define MCFI2C_BASE0		(MCF_MBAR + 0x280)	/* Base addreess I2C0 */
+#define MCFI2C_BASE0		(MCF_MBAR + 0x280)	/* Base address I2C0 */
 #define MCFI2C_SIZE0		0x20			/* Register set size */
 
-#define MCFI2C_BASE1		(MCF_MBAR2 + 0x440)	/* Base addreess I2C1 */
+#define MCFI2C_BASE1		(MCF_MBAR2 + 0x440)	/* Base address I2C1 */
 #define MCFI2C_SIZE1		0x20			/* Register set size */
 
 /*
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index f9924fbcfe42..fb95aed5f428 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -14,7 +14,7 @@ extern const char bad_pmd_string[];
 extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	unsigned long address)
 {
-	unsigned long page = __get_free_page(GFP_DMA|__GFP_REPEAT);
+	unsigned long page = __get_free_page(GFP_DMA);
 
 	if (!page)
 		return NULL;
@@ -51,7 +51,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	unsigned long address)
 {
-	struct page *page = alloc_pages(GFP_DMA|__GFP_REPEAT, 0);
+	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
 
 	if (!page)
diff --git a/arch/m68k/include/asm/mcfmmu.h b/arch/m68k/include/asm/mcfmmu.h
index 26cc3d5a63f8..8824236e303f 100644
--- a/arch/m68k/include/asm/mcfmmu.h
+++ b/arch/m68k/include/asm/mcfmmu.h
@@ -38,7 +38,7 @@
 /*
  *	MMU Operation register.
  */
-#define	MMUOR_UAA	0x00000001		/* Update allocatiom address */
+#define	MMUOR_UAA	0x00000001		/* Update allocation address */
 #define	MMUOR_ACC	0x00000002		/* TLB access */
 #define	MMUOR_RD	0x00000004		/* TLB access read */
 #define	MMUOR_WR	0x00000000		/* TLB access write */
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 24bcba496c75..c895b987202c 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad
 {
 	pte_t *pte;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	if (pte) {
 		__flush_page_to_ram(pte);
 		flush_tlb_kernel_page(pte);
@@ -32,7 +32,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres
 	struct page *page;
 	pte_t *pte;
 
-	page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
 	if(!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h
index fc5b36278d04..c48d21b68f04 100644
--- a/arch/m68k/include/asm/q40_master.h
+++ b/arch/m68k/include/asm/q40_master.h
@@ -1,6 +1,6 @@
 /*
  * Q40 master Chip Control
- * RTC stuff merged for compactnes..
+ * RTC stuff merged for compactness.
 */
 
 #ifndef _Q40_MASTER_H
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 0931388de47f..1901f61f926f 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -37,7 +37,7 @@ do {							\
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	unsigned long page = __get_free_page(GFP_KERNEL);
 
 	if (!page)
 		return NULL;
@@ -49,7 +49,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 					unsigned long address)
 {
-        struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+        struct page *page = alloc_pages(GFP_KERNEL, 0);
 
 	if (page == NULL)
 		return NULL;
diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index 4d2adfb32a2a..7990b6f50105 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -60,7 +60,7 @@
  *
  * The host talks to the IOPs using a rather simple message-passing scheme via
  * a shared memory area in the IOP RAM. Each IOP has seven "channels"; each
- * channel is conneced to a specific software driver on the IOP. For example
+ * channel is connected to a specific software driver on the IOP. For example
  * on the SCC IOP there is one channel for each serial port. Each channel has
  * an incoming and and outgoing message queue with a depth of one.
  *
diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h
index 759679d9ab96..6d1e760e2a0e 100644
--- a/arch/m68k/math-emu/fp_decode.h
+++ b/arch/m68k/math-emu/fp_decode.h
@@ -130,7 +130,7 @@ do_fscc=0
 	bfextu	%d2{#13,#3},%d0
 .endm
 
-| decode the 8bit diplacement from the brief extension word
+| decode the 8bit displacement from the brief extension word
 .macro	fp_decode_disp8
 	move.b	%d2,%d0
 	ext.w	%d0
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 6a94cdd0c830..bd66a0b20c6b 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -136,7 +136,7 @@ good_area:
 	 * the fault.
 	 */
 
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 	pr_debug("handle_mm_fault returns %d\n", fault);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 88fa25fae8bd..def2c642f053 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int result, temp;						\
+									\
+	smp_mb();							\
+									\
+	asm volatile (							\
+		"1:	LNKGETD %1, [%2]\n"				\
+		"	" #op "	%0, %1, %3\n"				\
+		"	LNKSETD [%2], %0\n"				\
+		"	DEFR	%0, TXSTAT\n"				\
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"		\
+		"	CMPT	%0, #HI(0x02000000)\n"			\
+		"	BNZ 1b\n"					\
+		: "=&d" (temp), "=&d" (result)				\
+		: "da" (&v->counter), "bd" (i)				\
+		: "cc");						\
+									\
+	smp_mb();							\
+									\
+	return result;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 0295d9b8d5bf..6c1380a8a0d4 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long result;						\
+	unsigned long flags;						\
+									\
+	__global_lock1(flags);						\
+	result = v->counter;						\
+	fence();							\
+	v->counter c_op i;						\
+	__global_unlock1(flags);					\
+									\
+	return result;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_OP_RETURN(op, c_op)					\
+	ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h
index 3104df0a4822..c2caa1ee4360 100644
--- a/arch/metag/include/asm/pgalloc.h
+++ b/arch/metag/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT |
-					      __GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 	return pte;
 }
 
@@ -51,7 +50,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 				      unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL  | __GFP_ZERO, 0);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
index 86a7cf3d1386..c0c7a22be1ae 100644
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -1,14 +1,24 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 #ifdef CONFIG_METAG_ATOMICITY_LOCK1
 #include <asm/spinlock_lock1.h>
 #else
 #include <asm/spinlock_lnkget.h>
 #endif
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+/*
+ * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1
+ * locked.
+ */
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index f57edca63609..372783a67dda 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -133,7 +133,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return 0;
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 636e0720fb20..86f65721e629 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -3,7 +3,6 @@ config MICROBLAZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select BUILDTIME_EXTABLE_SORT
 	select CLKSRC_OF
 	select CLONE_BACKWARDS3
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 61436d69775c..7c89390c0c13 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -116,9 +116,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	struct page *ptepage;
 
 #ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+	int flags = GFP_KERNEL | __GFP_HIGHMEM;
 #else
-	int flags = GFP_KERNEL | __GFP_REPEAT;
+	int flags = GFP_KERNEL;
 #endif
 
 	ptepage = alloc_pages(flags, 0);
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 67e2ef48d2d0..5bbf38b916ef 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -170,7 +170,7 @@ static struct irqaction timer_irqaction = {
 	.dev_id = &clockevent_xilinx_timer,
 };
 
-static __init void xilinx_clockevent_init(void)
+static __init int xilinx_clockevent_init(void)
 {
 	clockevent_xilinx_timer.mult =
 		div_sc(timer_clock_freq, NSEC_PER_SEC,
@@ -181,6 +181,8 @@ static __init void xilinx_clockevent_init(void)
 		clockevent_delta2ns(1, &clockevent_xilinx_timer);
 	clockevent_xilinx_timer.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_xilinx_timer);
+
+	return 0;
 }
 
 static u64 xilinx_clock_read(void)
@@ -229,8 +231,14 @@ static struct clocksource clocksource_microblaze = {
 
 static int __init xilinx_clocksource_init(void)
 {
-	if (clocksource_register_hz(&clocksource_microblaze, timer_clock_freq))
-		panic("failed to register clocksource");
+	int ret;
+
+	ret = clocksource_register_hz(&clocksource_microblaze,
+				      timer_clock_freq);
+	if (ret) {
+		pr_err("failed to register clocksource");
+		return ret;
+	}
 
 	/* stop timer1 */
 	write_fn(read_fn(timer_baseaddr + TCSR1) & ~TCSR_ENT,
@@ -239,16 +247,16 @@ static int __init xilinx_clocksource_init(void)
 	write_fn(TCSR_TINT|TCSR_ENT|TCSR_ARHT, timer_baseaddr + TCSR1);
 
 	/* register timecounter - for ftrace support */
-	init_xilinx_timecounter();
-	return 0;
+	return init_xilinx_timecounter();
 }
 
-static void __init xilinx_timer_init(struct device_node *timer)
+static int __init xilinx_timer_init(struct device_node *timer)
 {
 	struct clk *clk;
 	static int initialized;
 	u32 irq;
 	u32 timer_num = 1;
+	int ret;
 
 	if (initialized)
 		return;
@@ -258,7 +266,7 @@ static void __init xilinx_timer_init(struct device_node *timer)
 	timer_baseaddr = of_iomap(timer, 0);
 	if (!timer_baseaddr) {
 		pr_err("ERROR: invalid timer base address\n");
-		BUG();
+		return -ENXIO;
 	}
 
 	write_fn = timer_write32;
@@ -271,11 +279,15 @@ static void __init xilinx_timer_init(struct device_node *timer)
 	}
 
 	irq = irq_of_parse_and_map(timer, 0);
+	if (irq <= 0) {
+		pr_err("Failed to parse and map irq");
+		return -EINVAL;
+	}
 
 	of_property_read_u32(timer, "xlnx,one-timer-only", &timer_num);
 	if (timer_num) {
-		pr_emerg("Please enable two timers in HW\n");
-		BUG();
+		pr_err("Please enable two timers in HW\n");
+		return -EINVAL;
 	}
 
 	pr_info("%s: irq=%d\n", timer->full_name, irq);
@@ -297,14 +309,27 @@ static void __init xilinx_timer_init(struct device_node *timer)
 
 	freq_div_hz = timer_clock_freq / HZ;
 
-	setup_irq(irq, &timer_irqaction);
+	ret = setup_irq(irq, &timer_irqaction);
+	if (ret) {
+		pr_err("Failed to setup IRQ");
+		return ret;
+	}
+
 #ifdef CONFIG_HEART_BEAT
 	microblaze_setup_heartbeat();
 #endif
-	xilinx_clocksource_init();
-	xilinx_clockevent_init();
+
+	ret = xilinx_clocksource_init();
+	if (ret)
+		return ret;
+
+	ret = xilinx_clockevent_init();
+	if (ret)
+		return ret;
 
 	sched_clock_register(xilinx_clock_read, 32, timer_clock_freq);
+
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a",
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 177dfc003643..abb678ccde6f 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -216,7 +216,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 4f4520e779a5..eb99fcc76088 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -239,8 +239,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 	if (mem_init_done) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL |
-					__GFP_REPEAT | __GFP_ZERO);
+		pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 	} else {
 		pte = (pte_t *)early_get_page();
 		if (pte)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 835b402e4574..0ab176bdb8e8 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			      \
 			"	" #asm_op " %0, %2			\n"   \
 			"	sc	%0, %1				\n"   \
 			"	.set	mips0				\n"   \
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)  \
 			: "Ir" (i));					      \
 		} while (unlikely(!temp));				      \
 	} else {							      \
@@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			      \
 }
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
+static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	      \
 {									      \
 	int result;							      \
 									      \
-	smp_mb__before_llsc();						      \
-									      \
 	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
 		int temp;						      \
 									      \
@@ -125,23 +123,84 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 									      \
-	smp_llsc_mb();							      \
+	return result;							      \
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				      \
+static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	      \
+{									      \
+	int result;							      \
+									      \
+	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
+		int temp;						      \
+									      \
+		__asm__ __volatile__(					      \
+		"	.set	arch=r4000				\n"   \
+		"1:	ll	%1, %2		# atomic_fetch_" #op "	\n"   \
+		"	" #asm_op " %0, %1, %3				\n"   \
+		"	sc	%0, %2					\n"   \
+		"	beqzl	%0, 1b					\n"   \
+		"	move	%0, %1					\n"   \
+		"	.set	mips0					\n"   \
+		: "=&r" (result), "=&r" (temp),				      \
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
+		: "Ir" (i));						      \
+	} else if (kernel_uses_llsc) {					      \
+		int temp;						      \
+									      \
+		do {							      \
+			__asm__ __volatile__(				      \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
+			"	ll	%1, %2	# atomic_fetch_" #op "	\n"   \
+			"	" #asm_op " %0, %1, %3			\n"   \
+			"	sc	%0, %2				\n"   \
+			"	.set	mips0				\n"   \
+			: "=&r" (result), "=&r" (temp),			      \
+			  "+" GCC_OFF_SMALL_ASM() (v->counter)		      \
+			: "Ir" (i));					      \
+		} while (unlikely(!result));				      \
+									      \
+		result = temp;						      \
+	} else {							      \
+		unsigned long flags;					      \
+									      \
+		raw_local_irq_save(flags);				      \
+		result = v->counter;					      \
+		v->counter c_op i;					      \
+		raw_local_irq_restore(flags);				      \
+	}								      \
 									      \
 	return result;							      \
 }
 
 #define ATOMIC_OPS(op, c_op, asm_op)					      \
 	ATOMIC_OP(op, c_op, asm_op)					      \
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, addu)
 ATOMIC_OPS(sub, -=, subu)
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					      \
+	ATOMIC_OP(op, c_op, asm_op)					      \
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -362,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		      \
 }
 
 #define ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	      \
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
 {									      \
 	long result;							      \
 									      \
-	smp_mb__before_llsc();						      \
-									      \
 	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
 		long temp;						      \
 									      \
@@ -409,22 +466,85 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 									      \
-	smp_llsc_mb();							      \
+	return result;							      \
+}
+
+#define ATOMIC64_FETCH_OP(op, c_op, asm_op)				      \
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)  \
+{									      \
+	long result;							      \
+									      \
+	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
+		long temp;						      \
+									      \
+		__asm__ __volatile__(					      \
+		"	.set	arch=r4000				\n"   \
+		"1:	lld	%1, %2		# atomic64_fetch_" #op "\n"   \
+		"	" #asm_op " %0, %1, %3				\n"   \
+		"	scd	%0, %2					\n"   \
+		"	beqzl	%0, 1b					\n"   \
+		"	move	%0, %1					\n"   \
+		"	.set	mips0					\n"   \
+		: "=&r" (result), "=&r" (temp),				      \
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
+		: "Ir" (i));						      \
+	} else if (kernel_uses_llsc) {					      \
+		long temp;						      \
+									      \
+		do {							      \
+			__asm__ __volatile__(				      \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
+			"	lld	%1, %2	# atomic64_fetch_" #op "\n"   \
+			"	" #asm_op " %0, %1, %3			\n"   \
+			"	scd	%0, %2				\n"   \
+			"	.set	mips0				\n"   \
+			: "=&r" (result), "=&r" (temp),			      \
+			  "=" GCC_OFF_SMALL_ASM() (v->counter)		      \
+			: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)	      \
+			: "memory");					      \
+		} while (unlikely(!result));				      \
+									      \
+		result = temp;						      \
+	} else {							      \
+		unsigned long flags;					      \
+									      \
+		raw_local_irq_save(flags);				      \
+		result = v->counter;					      \
+		v->counter c_op i;					      \
+		raw_local_irq_restore(flags);				      \
+	}								      \
 									      \
 	return result;							      \
 }
 
 #define ATOMIC64_OPS(op, c_op, asm_op)					      \
 	ATOMIC64_OP(op, c_op, asm_op)					      \
-	ATOMIC64_OP_RETURN(op, c_op, asm_op)
+	ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
+	ATOMIC64_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC64_OPS(add, +=, daddu)
 ATOMIC64_OPS(sub, -=, dsubu)
-ATOMIC64_OP(and, &=, and)
-ATOMIC64_OP(or, |=, or)
-ATOMIC64_OP(xor, ^=, xor)
+
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op, asm_op)					      \
+	ATOMIC64_OP(op, c_op, asm_op)					      \
+	ATOMIC64_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC64_OPS(and, &=, and)
+ATOMIC64_OPS(or, |=, or)
+ATOMIC64_OPS(xor, ^=, xor)
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index b336037e8768..93c079a1cfc8 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -69,7 +69,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -79,7 +79,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (!pte)
 		return NULL;
 	clear_highpage(pte);
@@ -113,7 +113,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pmd_t *pmd;
 
-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PMD_ORDER);
+	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ORDER);
 	if (pmd)
 		pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
 	return pmd;
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index a6b611f1da43..7d44e888134f 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -24,7 +24,7 @@ struct mm_struct;
 struct vm_area_struct;
 
 #define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
-				 _CACHE_CACHABLE_NONCOHERENT)
+				 _page_cachable_default)
 #define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
 				 _page_cachable_default)
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \
@@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	pte.pte_low  &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
 	pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
 	pte.pte_low  |= pgprot_val(newprot) & ~_PFNX_MASK;
-	pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK;
+	pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
 	return pte;
 }
 #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
+		     (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
 }
 #endif
 
@@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd)
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	pmd_val(pmd) = (pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HUGE)) |
+		       (pgprot_val(newprot) & ~_PAGE_CHG_MASK);
 	return pmd;
 }
 
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 40196bebe849..f485afe51514 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -12,6 +12,7 @@
 #include <linux/compiler.h>
 
 #include <asm/barrier.h>
+#include <asm/processor.h>
 #include <asm/compiler.h>
 #include <asm/war.h>
 
@@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 }
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	while (arch_spin_is_locked(x)) { cpu_relax(); }
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	u16 owner = READ_ONCE(lock->h.serving_now);
+	smp_rmb();
+	for (;;) {
+		arch_spinlock_t tmp = READ_ONCE(*lock);
+
+		if (tmp.h.serving_now == tmp.h.ticket ||
+		    tmp.h.serving_now != owner)
+			break;
+
+		cpu_relax();
+	}
+	smp_acquire__after_ctrl_dep();
+}
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 4b88fa031891..9560ad731120 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -153,7 +153,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c
index 3ad0b0794f7d..f24eee04e16a 100644
--- a/arch/mips/ralink/cevt-rt3352.c
+++ b/arch/mips/ralink/cevt-rt3352.c
@@ -117,11 +117,13 @@ static int systick_set_oneshot(struct clock_event_device *evt)
 	return 0;
 }
 
-static void __init ralink_systick_init(struct device_node *np)
+static int __init ralink_systick_init(struct device_node *np)
 {
+	int ret;
+
 	systick.membase = of_iomap(np, 0);
 	if (!systick.membase)
-		return;
+		return -ENXIO;
 
 	systick_irqaction.name = np->name;
 	systick.dev.name = np->name;
@@ -131,16 +133,21 @@ static void __init ralink_systick_init(struct device_node *np)
 	systick.dev.irq = irq_of_parse_and_map(np, 0);
 	if (!systick.dev.irq) {
 		pr_err("%s: request_irq failed", np->name);
-		return;
+		return -EINVAL;
 	}
 
-	clocksource_mmio_init(systick.membase + SYSTICK_COUNT, np->name,
-			SYSTICK_FREQ, 301, 16, clocksource_mmio_readl_up);
+	ret = clocksource_mmio_init(systick.membase + SYSTICK_COUNT, np->name,
+				    SYSTICK_FREQ, 301, 16,
+				    clocksource_mmio_readl_up);
+	if (ret)
+		return ret;
 
 	clockevents_register_device(&systick.dev);
 
 	pr_info("%s: running - mult: %d, shift: %d\n",
 			np->name, systick.dev.mult, systick.dev.shift);
+
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init);
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index ce318d5ab23b..36389efd45e8 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -84,16 +84,41 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return retval;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int retval, status;						\
+									\
+	asm volatile(							\
+		"1:	mov	%4,(_AAR,%3)	\n"			\
+		"	mov	(_ADR,%3),%1	\n"			\
+		"	mov	%1,%0		\n"			\
+		"	" #op "	%5,%0		\n"			\
+		"	mov	%0,(_ADR,%3)	\n"			\
+		"	mov	(_ADR,%3),%0	\n"	/* flush */	\
+		"	mov	(_ASR,%3),%0	\n"			\
+		"	or	%0,%0		\n"			\
+		"	bne	1b		\n"			\
+		: "=&r"(status), "=&r"(retval), "=m"(v->counter)	\
+		: "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i)	\
+		: "memory", "cc");					\
+	return retval;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 1ae580f38933..9c7b8f7942d8 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -12,6 +12,8 @@
 #define _ASM_SPINLOCK_H
 
 #include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 #include <asm/rwlock.h>
 #include <asm/page.h>
 
@@ -23,7 +25,11 @@
  */
 
 #define arch_spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) != 0)
-#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 4861a78c7160..f5f90bbf019d 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void)
 }
 
 #ifndef CONFIG_KGDB
-void arch_release_thread_info(struct thread_info *ti);
+void arch_release_thread_stack(unsigned long *stack);
 #endif
 #define get_thread_info(ti)	get_task_struct((ti)->task)
 #define put_thread_info(ti)	put_task_struct((ti)->task)
diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c
index 99770823451a..2d7986c386fe 100644
--- a/arch/mn10300/kernel/kgdb.c
+++ b/arch/mn10300/kernel/kgdb.c
@@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs)
  * single-step state is cleared.  At this point the breakpoints should have
  * been removed by __switch_to().
  */
-void arch_release_thread_info(struct thread_info *ti)
+void arch_release_thread_stack(unsigned long *stack)
 {
+	struct thread_info *ti = (void *)stack;
 	if (kgdb_sstep_thread == ti) {
 		kgdb_sstep_thread = NULL;
 
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index 4a1d181ed32f..f23781d6bbb3 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -254,7 +254,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c
index e77a7c728081..9577cf768875 100644
--- a/arch/mn10300/mm/pgtable.c
+++ b/arch/mn10300/mm/pgtable.c
@@ -63,7 +63,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (pte)
 		clear_page(pte);
 	return pte;
@@ -74,9 +74,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL, 0);
 #endif
 	if (!pte)
 		return NULL;
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 6e2985e0a7b9..bb47d08c8ef7 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO,
-					PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -53,7 +52,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (pte) {
 		if (!pgtable_page_ctor(pte)) {
 			__free_page(pte);
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index e835dda2bfe2..d9563ddb337e 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -206,15 +206,21 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void __init nios2_timer_get_base_and_freq(struct device_node *np,
+static int __init nios2_timer_get_base_and_freq(struct device_node *np,
 				void __iomem **base, u32 *freq)
 {
 	*base = of_iomap(np, 0);
-	if (!*base)
-		panic("Unable to map reg for %s\n", np->name);
+	if (!*base) {
+		pr_crit("Unable to map reg for %s\n", np->name);
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "clock-frequency", freq)) {
+		pr_crit("Unable to get %s clock frequency\n", np->name);
+		return -EINVAL;
+	}
 
-	if (of_property_read_u32(np, "clock-frequency", freq))
-		panic("Unable to get %s clock frequency\n", np->name);
+	return 0;
 }
 
 static struct nios2_clockevent_dev nios2_ce = {
@@ -231,17 +237,21 @@ static struct nios2_clockevent_dev nios2_ce = {
 	},
 };
 
-static __init void nios2_clockevent_init(struct device_node *timer)
+static __init int nios2_clockevent_init(struct device_node *timer)
 {
 	void __iomem *iobase;
 	u32 freq;
-	int irq;
+	int irq, ret;
 
-	nios2_timer_get_base_and_freq(timer, &iobase, &freq);
+	ret = nios2_timer_get_base_and_freq(timer, &iobase, &freq);
+	if (ret)
+		return ret;
 
 	irq = irq_of_parse_and_map(timer, 0);
-	if (!irq)
-		panic("Unable to parse timer irq\n");
+	if (!irq) {
+		pr_crit("Unable to parse timer irq\n");
+		return -EINVAL;
+	}
 
 	nios2_ce.timer.base = iobase;
 	nios2_ce.timer.freq = freq;
@@ -253,25 +263,35 @@ static __init void nios2_clockevent_init(struct device_node *timer)
 	/* clear pending interrupt */
 	timer_writew(&nios2_ce.timer, 0, ALTERA_TIMER_STATUS_REG);
 
-	if (request_irq(irq, timer_interrupt, IRQF_TIMER, timer->name,
-		&nios2_ce.ced))
-		panic("Unable to setup timer irq\n");
+	ret = request_irq(irq, timer_interrupt, IRQF_TIMER, timer->name,
+			  &nios2_ce.ced);
+	if (ret) {
+		pr_crit("Unable to setup timer irq\n");
+		return ret;
+	}
 
 	clockevents_config_and_register(&nios2_ce.ced, freq, 1, ULONG_MAX);
+
+	return 0;
 }
 
-static __init void nios2_clocksource_init(struct device_node *timer)
+static __init int nios2_clocksource_init(struct device_node *timer)
 {
 	unsigned int ctrl;
 	void __iomem *iobase;
 	u32 freq;
+	int ret;
 
-	nios2_timer_get_base_and_freq(timer, &iobase, &freq);
+	ret = nios2_timer_get_base_and_freq(timer, &iobase, &freq);
+	if (ret)
+		return ret;
 
 	nios2_cs.timer.base = iobase;
 	nios2_cs.timer.freq = freq;
 
-	clocksource_register_hz(&nios2_cs.cs, freq);
+	ret = clocksource_register_hz(&nios2_cs.cs, freq);
+	if (ret)
+		return ret;
 
 	timer_writew(&nios2_cs.timer, USHRT_MAX, ALTERA_TIMER_PERIODL_REG);
 	timer_writew(&nios2_cs.timer, USHRT_MAX, ALTERA_TIMER_PERIODH_REG);
@@ -282,6 +302,8 @@ static __init void nios2_clocksource_init(struct device_node *timer)
 
 	/* Calibrate the delay loop directly */
 	lpj_fine = freq / HZ;
+
+	return 0;
 }
 
 /*
@@ -289,22 +311,25 @@ static __init void nios2_clocksource_init(struct device_node *timer)
  * more instances, the second one gets used as clocksource and all
  * others are unused.
 */
-static void __init nios2_time_init(struct device_node *timer)
+static int __init nios2_time_init(struct device_node *timer)
 {
 	static int num_called;
+	int ret;
 
 	switch (num_called) {
 	case 0:
-		nios2_clockevent_init(timer);
+		ret = nios2_clockevent_init(timer);
 		break;
 	case 1:
-		nios2_clocksource_init(timer);
+		ret = nios2_clocksource_init(timer);
 		break;
 	default:
 		break;
 	}
 
 	num_called++;
+
+	return ret;
 }
 
 void read_persistent_clock(struct timespec *ts)
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index b51878b0c6b8..affc4eb3f89e 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -131,7 +131,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 142cb057c41b..489e7f909286 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -10,7 +10,7 @@ config OPENRISC
 	select IRQ_DOMAIN
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_MEMBLOCK
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
         select HAVE_ARCH_TRACEHOOK
 	select GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_PROBE
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 21484e5b9e9a..87eebd185089 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -77,7 +77,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL, 0);
 	if (!pte)
 		return NULL;
 	clear_page(page_address(pte));
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 230ac20ae794..e94cd225e816 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -163,7 +163,7 @@ good_area:
 	 * the fault.
 	 */
 
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 62b08ef392be..5b2a95116e8f 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -122,7 +122,7 @@ pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm,
 	pte_t *pte;
 
 	if (likely(mem_init_done)) {
-		pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT);
+		pte = (pte_t *) __get_free_page(GFP_KERNEL);
 	} else {
 		pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 #if 0
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 1d109990a022..5394b9c5f914 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -121,16 +121,39 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 	return ret;							\
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	int ret;							\
+									\
+	_atomic_spin_lock_irqsave(v, flags);				\
+	ret = v->counter;						\
+	v->counter c_op i;						\
+	_atomic_spin_unlock_irqrestore(v, flags);			\
+									\
+	return ret;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_OP_RETURN(op, c_op)					\
+	ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
 
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -185,15 +208,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)	\
 	return ret;							\
 }
 
-#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op)
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v)		\
+{									\
+	unsigned long flags;						\
+	s64 ret;							\
+									\
+	_atomic_spin_lock_irqsave(v, flags);				\
+	ret = v->counter;						\
+	v->counter c_op i;						\
+	_atomic_spin_unlock_irqrestore(v, flags);			\
+									\
+	return ret;							\
+}
+
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_OP_RETURN(op, c_op)					\
+	ATOMIC64_FETCH_OP(op, c_op)
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index f2fd327dce2e..f08dda3f0995 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -63,8 +63,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT,
-					       PMD_ORDER);
+	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER);
 	if (pmd)
 		memset(pmd, 0, PAGE_SIZE<<PMD_ORDER);
 	return pmd;
@@ -124,7 +123,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
@@ -137,7 +136,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 64f2992e439f..e32936cd7f10 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
 }
 
 #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
+{
+	volatile unsigned int *a = __ldcw_align(x);
+
+	smp_cond_load_acquire(a, VAL);
+}
 
 static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 					 unsigned long flags)
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 16dbe81c97c9..163af2c31d76 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -239,7 +239,7 @@ good_area:
 	 * fault.
 	 */
 
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 01f7464d9fea..d111044f41a2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -98,7 +98,6 @@ config PPC
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select SYSCTL_EXCEPTION_TRACE
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select VIRT_TO_BUS if !PPC64
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
@@ -128,7 +127,7 @@ config PPC
 	select IRQ_FORCED_THREADING
 	select HAVE_RCU_TABLE_FREE if SMP
 	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_CBPF_JIT
+	select HAVE_CBPF_JIT if CPU_BIG_ENDIAN
 	select HAVE_ARCH_JUMP_LABEL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 9c221b69c181..7998c177f0a2 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -9,9 +9,11 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
 obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
 obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
 obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
+obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o
 
 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
 sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
 sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
 sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
+crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h
index 30d217b399c3..2cc3a2caadae 100644
--- a/arch/powerpc/crypto/aes-spe-regs.h
+++ b/arch/powerpc/crypto/aes-spe-regs.h
@@ -18,7 +18,7 @@
 #define rLN r7	/* length of data to be processed			*/
 #define rIP r8	/* potiner to IV (CBC/CTR/XTS modes)			*/
 #define rKT r9	/* pointer to tweak key (XTS mode)			*/
-#define rT0 r11	/* pointers to en-/decrpytion tables			*/
+#define rT0 r11	/* pointers to en-/decryption tables			*/
 #define rT1 r10
 #define rD0 r9	/* data 						*/
 #define rD1 r14
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/crypto/crc32c-vpmsum_asm.S
new file mode 100644
index 000000000000..dc640b212299
--- /dev/null
+++ b/arch/powerpc/crypto/crc32c-vpmsum_asm.S
@@ -0,0 +1,1553 @@
+/*
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
+ * 16 bytes.
+ *
+ * The first step is to reduce it to 1024 bits. We do this in 8 parallel
+ * chunks in order to mask the latency of the vpmsum instructions. If we
+ * have more than 32 kB of data to checksum we repeat this step multiple
+ * times, passing in the previous 1024 bits.
+ *
+ * The next step is to reduce the 1024 bits to 64 bits. This step adds
+ * 32 bits of 0s to the end - this matches what a CRC does. We just
+ * calculate constants that land the data in this 32 bits.
+ *
+ * We then use fixed point Barrett reduction to compute a mod n over GF(2)
+ * for n = CRC using POWER8 instructions. We use x = 32.
+ *
+ * http://en.wikipedia.org/wiki/Barrett_reduction
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+	.section	.rodata
+.balign 16
+
+.byteswap_constant:
+	/* byte reverse permute constant */
+	.octa 0x0F0E0D0C0B0A09080706050403020100
+
+#define MAX_SIZE	32768
+.constants:
+
+	/* Reduce 262144 kbits to 1024 bits */
+	/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+	.octa 0x00000000b6ca9e20000000009c37c408
+
+	/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+	.octa 0x00000000350249a800000001b51df26c
+
+	/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+	.octa 0x00000001862dac54000000000724b9d0
+
+	/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+	.octa 0x00000001d87fb48c00000001c00532fe
+
+	/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+	.octa 0x00000001f39b699e00000000f05a9362
+
+	/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+	.octa 0x0000000101da11b400000001e1007970
+
+	/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+	.octa 0x00000001cab571e000000000a57366ee
+
+	/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+	.octa 0x00000000c7020cfe0000000192011284
+
+	/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+	.octa 0x00000000cdaed1ae0000000162716d9a
+
+	/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+	.octa 0x00000001e804effc00000000cd97ecde
+
+	/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+	.octa 0x0000000077c3ea3a0000000058812bc0
+
+	/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+	.octa 0x0000000068df31b40000000088b8c12e
+
+	/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+	.octa 0x00000000b059b6c200000001230b234c
+
+	/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+	.octa 0x0000000145fb8ed800000001120b416e
+
+	/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+	.octa 0x00000000cbc0916800000001974aecb0
+
+	/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+	.octa 0x000000005ceeedc2000000008ee3f226
+
+	/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+	.octa 0x0000000047d74e8600000001089aba9a
+
+	/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+	.octa 0x00000001407e9e220000000065113872
+
+	/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+	.octa 0x00000001da967bda000000005c07ec10
+
+	/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+	.octa 0x000000006c8983680000000187590924
+
+	/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+	.octa 0x00000000f2d14c9800000000e35da7c6
+
+	/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+	.octa 0x00000001993c6ad4000000000415855a
+
+	/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+	.octa 0x000000014683d1ac0000000073617758
+
+	/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+	.octa 0x00000001a7c93e6c0000000176021d28
+
+	/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+	.octa 0x000000010211e90a00000001c358fd0a
+
+	/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+	.octa 0x000000001119403e00000001ff7a2c18
+
+	/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+	.octa 0x000000001c3261aa00000000f2d9f7e4
+
+	/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+	.octa 0x000000014e37a634000000016cf1f9c8
+
+	/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+	.octa 0x0000000073786c0c000000010af9279a
+
+	/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+	.octa 0x000000011dc037f80000000004f101e8
+
+	/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+	.octa 0x0000000031433dfc0000000070bcf184
+
+	/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+	.octa 0x000000009cde8348000000000a8de642
+
+	/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+	.octa 0x0000000038d3c2a60000000062ea130c
+
+	/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+	.octa 0x000000011b25f26000000001eb31cbb2
+
+	/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+	.octa 0x000000001629e6f00000000170783448
+
+	/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+	.octa 0x0000000160838b4c00000001a684b4c6
+
+	/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+	.octa 0x000000007a44011c00000000253ca5b4
+
+	/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+	.octa 0x00000000226f417a0000000057b4b1e2
+
+	/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+	.octa 0x0000000045eb2eb400000000b6bd084c
+
+	/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+	.octa 0x000000014459d70c0000000123c2d592
+
+	/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+	.octa 0x00000001d406ed8200000000159dafce
+
+	/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+	.octa 0x0000000160c8e1a80000000127e1a64e
+
+	/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+	.octa 0x0000000027ba80980000000056860754
+
+	/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+	.octa 0x000000006d92d01800000001e661aae8
+
+	/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+	.octa 0x000000012ed7e3f200000000f82c6166
+
+	/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+	.octa 0x000000002dc8778800000000c4f9c7ae
+
+	/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+	.octa 0x0000000018240bb80000000074203d20
+
+	/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+	.octa 0x000000001ad381580000000198173052
+
+	/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+	.octa 0x00000001396b78f200000001ce8aba54
+
+	/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+	.octa 0x000000011a68133400000001850d5d94
+
+	/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+	.octa 0x000000012104732e00000001d609239c
+
+	/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+	.octa 0x00000000a140d90c000000001595f048
+
+	/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+	.octa 0x00000001b7215eda0000000042ccee08
+
+	/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+	.octa 0x00000001aaf1df3c000000010a389d74
+
+	/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+	.octa 0x0000000029d15b8a000000012a840da6
+
+	/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+	.octa 0x00000000f1a96922000000001d181c0c
+
+	/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+	.octa 0x00000001ac80d03c0000000068b7d1f6
+
+	/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+	.octa 0x000000000f11d56a000000005b0f14fc
+
+	/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+	.octa 0x00000001f1c022a20000000179e9e730
+
+	/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+	.octa 0x0000000173d00ae200000001ce1368d6
+
+	/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+	.octa 0x00000001d4ffe4ac0000000112c3a84c
+
+	/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+	.octa 0x000000016edc5ae400000000de940fee
+
+	/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+	.octa 0x00000001f1a0214000000000fe896b7e
+
+	/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+	.octa 0x00000000ca0b28a000000001f797431c
+
+	/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+	.octa 0x00000001928e30a20000000053e989ba
+
+	/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+	.octa 0x0000000097b1b002000000003920cd16
+
+	/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+	.octa 0x00000000b15bf90600000001e6f579b8
+
+	/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+	.octa 0x00000000411c5d52000000007493cb0a
+
+	/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+	.octa 0x00000001c36f330000000001bdd376d8
+
+	/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+	.octa 0x00000001119227e0000000016badfee6
+
+	/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+	.octa 0x00000000114d47020000000071de5c58
+
+	/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+	.octa 0x00000000458b5b9800000000453f317c
+
+	/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+	.octa 0x000000012e31fb8e0000000121675cce
+
+	/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+	.octa 0x000000005cf619d800000001f409ee92
+
+	/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+	.octa 0x0000000063f4d8b200000000f36b9c88
+
+	/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+	.octa 0x000000004138dc8a0000000036b398f4
+
+	/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+	.octa 0x00000001d29ee8e000000001748f9adc
+
+	/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+	.octa 0x000000006a08ace800000001be94ec00
+
+	/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+	.octa 0x0000000127d4201000000000b74370d6
+
+	/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+	.octa 0x0000000019d76b6200000001174d0b98
+
+	/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+	.octa 0x00000001b1471f6e00000000befc06a4
+
+	/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+	.octa 0x00000001f64c19cc00000001ae125288
+
+	/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+	.octa 0x00000000003c0ea00000000095c19b34
+
+	/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+	.octa 0x000000014d73abf600000001a78496f2
+
+	/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+	.octa 0x00000001620eb84400000001ac5390a0
+
+	/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+	.octa 0x0000000147655048000000002a80ed6e
+
+	/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+	.octa 0x0000000067b5077e00000001fa9b0128
+
+	/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+	.octa 0x0000000010ffe20600000001ea94929e
+
+	/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+	.octa 0x000000000fee8f1e0000000125f4305c
+
+	/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+	.octa 0x00000001da26fbae00000001471e2002
+
+	/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+	.octa 0x00000001b3a8bd880000000132d2253a
+
+	/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+	.octa 0x00000000e8f3898e00000000f26b3592
+
+	/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+	.octa 0x00000000b0d0d28c00000000bc8b67b0
+
+	/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+	.octa 0x0000000030f2a798000000013a826ef2
+
+	/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+	.octa 0x000000000fba10020000000081482c84
+
+	/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+	.octa 0x00000000bdb9bd7200000000e77307c2
+
+	/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+	.octa 0x0000000075d3bf5a00000000d4a07ec8
+
+	/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+	.octa 0x00000000ef1f98a00000000017102100
+
+	/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+	.octa 0x00000000689c760200000000db406486
+
+	/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+	.octa 0x000000016d5fa5fe0000000192db7f88
+
+	/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+	.octa 0x00000001d0d2b9ca000000018bf67b1e
+
+	/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+	.octa 0x0000000041e7b470000000007c09163e
+
+	/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+	.octa 0x00000001cbb6495e000000000adac060
+
+	/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+	.octa 0x000000010052a0b000000000bd8316ae
+
+	/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+	.octa 0x00000001d8effb5c000000019f09ab54
+
+	/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+	.octa 0x00000001d969853c0000000125155542
+
+	/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+	.octa 0x00000000523ccce2000000018fdb5882
+
+	/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+	.octa 0x000000001e2436bc00000000e794b3f4
+
+	/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+	.octa 0x00000000ddd1c3a2000000016f9bb022
+
+	/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+	.octa 0x0000000019fcfe3800000000290c9978
+
+	/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+	.octa 0x00000001ce95db640000000083c0f350
+
+	/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+	.octa 0x00000000af5828060000000173ea6628
+
+	/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+	.octa 0x00000001006388f600000001c8b4e00a
+
+	/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+	.octa 0x0000000179eca00a00000000de95d6aa
+
+	/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+	.octa 0x0000000122410a6a000000010b7f7248
+
+	/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+	.octa 0x000000004288e87c00000001326e3a06
+
+	/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+	.octa 0x000000016c5490da00000000bb62c2e6
+
+	/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+	.octa 0x00000000d1c71f6e0000000156a4b2c2
+
+	/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+	.octa 0x00000001b4ce08a6000000011dfe763a
+
+	/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+	.octa 0x00000001466ba60c000000007bcca8e2
+
+	/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+	.octa 0x00000001f6c488a40000000186118faa
+
+	/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+	.octa 0x000000013bfb06820000000111a65a88
+
+	/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+	.octa 0x00000000690e9e54000000003565e1c4
+
+	/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+	.octa 0x00000000281346b6000000012ed02a82
+
+	/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+	.octa 0x000000015646402400000000c486ecfc
+
+	/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+	.octa 0x000000016063a8dc0000000001b951b2
+
+	/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+	.octa 0x0000000116a663620000000048143916
+
+	/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+	.octa 0x000000017e8aa4d200000001dc2ae124
+
+	/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+	.octa 0x00000001728eb10c00000001416c58d6
+
+	/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+	.octa 0x00000001b08fd7fa00000000a479744a
+
+	/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+	.octa 0x00000001092a16e80000000096ca3a26
+
+	/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+	.octa 0x00000000a505637c00000000ff223d4e
+
+	/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+	.octa 0x00000000d94869b2000000010e84da42
+
+	/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+	.octa 0x00000001c8b203ae00000001b61ba3d0
+
+	/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+	.octa 0x000000005704aea000000000680f2de8
+
+	/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+	.octa 0x000000012e295fa2000000008772a9a8
+
+	/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+	.octa 0x000000011d0908bc0000000155f295bc
+
+	/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+	.octa 0x0000000193ed97ea00000000595f9282
+
+	/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+	.octa 0x000000013a0f1c520000000164b1c25a
+
+	/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+	.octa 0x000000010c2c40c000000000fbd67c50
+
+	/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+	.octa 0x00000000ff6fac3e0000000096076268
+
+	/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+	.octa 0x000000017b3609c000000001d288e4cc
+
+	/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+	.octa 0x0000000088c8c92200000001eaac1bdc
+
+	/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+	.octa 0x00000001751baae600000001f1ea39e2
+
+	/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+	.octa 0x000000010795297200000001eb6506fc
+
+	/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+	.octa 0x0000000162b00abe000000010f806ffe
+
+	/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+	.octa 0x000000000d7b404c000000010408481e
+
+	/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+	.octa 0x00000000763b13d40000000188260534
+
+	/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+	.octa 0x00000000f6dc22d80000000058fc73e0
+
+	/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+	.octa 0x000000007daae06000000000391c59b8
+
+	/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+	.octa 0x000000013359ab7c000000018b638400
+
+	/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+	.octa 0x000000008add438a000000011738f5c4
+
+	/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+	.octa 0x00000001edbefdea000000008cf7c6da
+
+	/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+	.octa 0x000000004104e0f800000001ef97fb16
+
+	/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+	.octa 0x00000000b48a82220000000102130e20
+
+	/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+	.octa 0x00000001bcb4684400000000db968898
+
+	/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+	.octa 0x000000013293ce0a00000000b5047b5e
+
+	/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+	.octa 0x00000001710d0844000000010b90fdb2
+
+	/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+	.octa 0x0000000117907f6e000000004834a32e
+
+	/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+	.octa 0x0000000087ddf93e0000000059c8f2b0
+
+	/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+	.octa 0x000000005970e9b00000000122cec508
+
+	/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+	.octa 0x0000000185b2b7d0000000000a330cda
+
+	/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+	.octa 0x00000001dcee0efc000000014a47148c
+
+	/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+	.octa 0x0000000030da27220000000042c61cb8
+
+	/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+	.octa 0x000000012f925a180000000012fe6960
+
+	/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+	.octa 0x00000000dd2e357c00000000dbda2c20
+
+	/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+	.octa 0x00000000071c80de000000011122410c
+
+	/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+	.octa 0x000000011513140a00000000977b2070
+
+	/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+	.octa 0x00000001df876e8e000000014050438e
+
+	/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+	.octa 0x000000015f81d6ce0000000147c840e8
+
+	/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+	.octa 0x000000019dd94dbe00000001cc7c88ce
+
+	/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+	.octa 0x00000001373d206e00000001476b35a4
+
+	/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+	.octa 0x00000000668ccade000000013d52d508
+
+	/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+	.octa 0x00000001b192d268000000008e4be32e
+
+	/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+	.octa 0x00000000e30f3a7800000000024120fe
+
+	/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+	.octa 0x000000010ef1f7bc00000000ddecddb4
+
+	/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+	.octa 0x00000001f5ac738000000000d4d403bc
+
+	/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+	.octa 0x000000011822ea7000000001734b89aa
+
+	/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+	.octa 0x00000000c3a33848000000010e7a58d6
+
+	/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+	.octa 0x00000001bd151c2400000001f9f04e9c
+
+	/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+	.octa 0x0000000056002d7600000000b692225e
+
+	/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+	.octa 0x000000014657c4f4000000019b8d3f3e
+
+	/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+	.octa 0x0000000113742d7c00000001a874f11e
+
+	/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+	.octa 0x000000019c5920ba000000010d5a4254
+
+	/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+	.octa 0x000000005216d2d600000000bbb2f5d6
+
+	/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+	.octa 0x0000000136f5ad8a0000000179cc0e36
+
+	/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+	.octa 0x000000018b07beb600000001dca1da4a
+
+	/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+	.octa 0x00000000db1e93b000000000feb1a192
+
+	/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+	.octa 0x000000000b96fa3a00000000d1eeedd6
+
+	/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+	.octa 0x00000001d9968af0000000008fad9bb4
+
+	/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+	.octa 0x000000000e4a77a200000001884938e4
+
+	/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+	.octa 0x00000000508c2ac800000001bc2e9bc0
+
+	/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+	.octa 0x0000000021572a8000000001f9658a68
+
+	/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+	.octa 0x00000001b859daf2000000001b9224fc
+
+	/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+	.octa 0x000000016f7884740000000055b2fb84
+
+	/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+	.octa 0x00000001b438810e000000018b090348
+
+	/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+	.octa 0x0000000095ddc6f2000000011ccbd5ea
+
+	/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+	.octa 0x00000001d977c20c0000000007ae47f8
+
+	/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+	.octa 0x00000000ebedb99a0000000172acbec0
+
+	/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+	.octa 0x00000001df9e9e9200000001c6e3ff20
+
+	/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+	.octa 0x00000001a4a3f95200000000e1b38744
+
+	/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+	.octa 0x00000000e2f5122000000000791585b2
+
+	/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+	.octa 0x000000004aa01f3e00000000ac53b894
+
+	/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+	.octa 0x00000000b3e90a5800000001ed5f2cf4
+
+	/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+	.octa 0x000000000c9ca2aa00000001df48b2e0
+
+	/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+	.octa 0x000000015168231600000000049c1c62
+
+	/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+	.octa 0x0000000036fce78c000000017c460c12
+
+	/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+	.octa 0x000000009037dc10000000015be4da7e
+
+	/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+	.octa 0x00000000d3298582000000010f38f668
+
+	/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+	.octa 0x00000001b42e8ad60000000039f40a00
+
+	/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+	.octa 0x00000000142a983800000000bd4c10c4
+
+	/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+	.octa 0x0000000109c7f1900000000042db1d98
+
+	/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+	.octa 0x0000000056ff931000000001c905bae6
+
+	/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+	.octa 0x00000001594513aa00000000069d40ea
+
+	/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+	.octa 0x00000001e3b5b1e8000000008e4fbad0
+
+	/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+	.octa 0x000000011dd5fc080000000047bedd46
+
+	/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+	.octa 0x00000001675f0cc20000000026396bf8
+
+	/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+	.octa 0x00000000d1c8dd4400000000379beb92
+
+	/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+	.octa 0x0000000115ebd3d8000000000abae54a
+
+	/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+	.octa 0x00000001ecbd0dac0000000007e6a128
+
+	/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+	.octa 0x00000000cdf67af2000000000ade29d2
+
+	/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+	.octa 0x000000004c01ff4c00000000f974c45c
+
+	/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+	.octa 0x00000000f2d8657e00000000e77ac60a
+
+	/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+	.octa 0x000000006bae74c40000000145895816
+
+	/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+	.octa 0x0000000152af8aa00000000038e362be
+
+	/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+	.octa 0x0000000004663802000000007f991a64
+
+	/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+	.octa 0x00000001ab2f5afc00000000fa366d3a
+
+	/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+	.octa 0x0000000074a4ebd400000001a2bb34f0
+
+	/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+	.octa 0x00000001d7ab3a4c0000000028a9981e
+
+	/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+	.octa 0x00000001a8da60c600000001dbc672be
+
+	/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+	.octa 0x000000013cf6382000000000b04d77f6
+
+	/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+	.octa 0x00000000bec12e1e0000000124400d96
+
+	/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+	.octa 0x00000001c6368010000000014ca4b414
+
+	/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+	.octa 0x00000001e6e78758000000012fe2c938
+
+	/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+	.octa 0x000000008d7f2b3c00000001faed01e6
+
+	/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+	.octa 0x000000016b4a156e000000007e80ecfe
+
+	/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+	.octa 0x00000001c63cfeb60000000098daee94
+
+	/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+	.octa 0x000000015f902670000000010a04edea
+
+	/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+	.octa 0x00000001cd5de11e00000001c00b4524
+
+	/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+	.octa 0x000000001acaec540000000170296550
+
+	/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+	.octa 0x000000002bd0ca780000000181afaa48
+
+	/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+	.octa 0x0000000032d63d5c0000000185a31ffa
+
+	/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+	.octa 0x000000001c6d4e4c000000002469f608
+
+	/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+	.octa 0x0000000106a60b92000000006980102a
+
+	/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+	.octa 0x00000000d3855e120000000111ea9ca8
+
+	/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+	.octa 0x00000000e312563600000001bd1d29ce
+
+	/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+	.octa 0x000000009e8f7ea400000001b34b9580
+
+	/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+	.octa 0x00000001c82e562c000000003076054e
+
+	/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+	.octa 0x00000000ca9f09ce000000012a608ea4
+
+	/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+	.octa 0x00000000c63764e600000000784d05fe
+
+	/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+	.octa 0x0000000168d2e49e000000016ef0d82a
+
+	/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+	.octa 0x00000000e986c1480000000075bda454
+
+	/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+	.octa 0x00000000cfb65894000000003dc0a1c4
+
+	/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+	.octa 0x0000000111cadee400000000e9a5d8be
+
+	/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+	.octa 0x0000000171fb63ce00000001609bc4b4
+
+.short_constants:
+
+	/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
+	/* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
+	.octa 0x7fec2963e5bf80485cf015c388e56f72
+
+	/* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
+	.octa 0x38e888d4844752a9963a18920246e2e6
+
+	/* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
+	.octa 0x42316c00730206ad419a441956993a31
+
+	/* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
+	.octa 0x543d5c543e65ddf9924752ba2b830011
+
+	/* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
+	.octa 0x78e87aaf56767c9255bd7f9518e4a304
+
+	/* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
+	.octa 0x8f68fcec1903da7f6d76739fe0553f1e
+
+	/* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
+	.octa 0x3f4840246791d588c133722b1fe0b5c3
+
+	/* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
+	.octa 0x34c96751b04de25a64b67ee0e55ef1f3
+
+	/* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
+	.octa 0x156c8e180b4a395b069db049b8fdb1e7
+
+	/* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
+	.octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
+
+	/* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
+	.octa 0x041d37768cd75659817cdc5119b29a35
+
+	/* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
+	.octa 0x3a0777818cfaa9651ce9d94b36c41f1c
+
+	/* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
+	.octa 0x0e148e8252377a554f256efcb82be955
+
+	/* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
+	.octa 0x9c25531d19e65ddeec1631edb2dea967
+
+	/* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
+	.octa 0x790606ff9957c0a65d27e147510ac59a
+
+	/* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
+	.octa 0x82f63b786ea2d55ca66805eb18b8ea18
+
+
+.barrett_constants:
+	/* 33 bit reflected Barrett constant m - (4^32)/n */
+	.octa 0x000000000000000000000000dea713f1	/* x^64 div p(x)` */
+	/* 33 bit reflected Barrett constant n */
+	.octa 0x00000000000000000000000105ec76f1
+
+	.text
+
+#if defined(__BIG_ENDIAN__)
+#define BYTESWAP_DATA
+#else
+#undef BYTESWAP_DATA
+#endif
+
+#define off16		r25
+#define off32		r26
+#define off48		r27
+#define off64		r28
+#define off80		r29
+#define off96		r30
+#define off112		r31
+
+#define const1		v24
+#define const2		v25
+
+#define byteswap	v26
+#define	mask_32bit	v27
+#define	mask_64bit	v28
+#define zeroes		v29
+
+#ifdef BYTESWAP_DATA
+#define VPERM(A, B, C, D) vperm	A, B, C, D
+#else
+#define VPERM(A, B, C, D)
+#endif
+
+/* unsigned int __crc32c_vpmsum(unsigned int crc, void *p, unsigned long len) */
+FUNC_START(__crc32c_vpmsum)
+	std	r31,-8(r1)
+	std	r30,-16(r1)
+	std	r29,-24(r1)
+	std	r28,-32(r1)
+	std	r27,-40(r1)
+	std	r26,-48(r1)
+	std	r25,-56(r1)
+
+	li	off16,16
+	li	off32,32
+	li	off48,48
+	li	off64,64
+	li	off80,80
+	li	off96,96
+	li	off112,112
+	li	r0,0
+
+	/* Enough room for saving 10 non volatile VMX registers */
+	subi	r6,r1,56+10*16
+	subi	r7,r1,56+2*16
+
+	stvx	v20,0,r6
+	stvx	v21,off16,r6
+	stvx	v22,off32,r6
+	stvx	v23,off48,r6
+	stvx	v24,off64,r6
+	stvx	v25,off80,r6
+	stvx	v26,off96,r6
+	stvx	v27,off112,r6
+	stvx	v28,0,r7
+	stvx	v29,off16,r7
+
+	mr	r10,r3
+
+	vxor	zeroes,zeroes,zeroes
+	vspltisw v0,-1
+
+	vsldoi	mask_32bit,zeroes,v0,4
+	vsldoi	mask_64bit,zeroes,v0,8
+
+	/* Get the initial value into v8 */
+	vxor	v8,v8,v8
+	MTVRD(v8, R3)
+	vsldoi	v8,zeroes,v8,8	/* shift into bottom 32 bits */
+
+#ifdef BYTESWAP_DATA
+	addis	r3,r2,.byteswap_constant@toc@ha
+	addi	r3,r3,.byteswap_constant@toc@l
+
+	lvx	byteswap,0,r3
+	addi	r3,r3,16
+#endif
+
+	cmpdi	r5,256
+	blt	.Lshort
+
+	rldicr	r6,r5,0,56
+
+	/* Checksum in blocks of MAX_SIZE */
+1:	lis	r7,MAX_SIZE@h
+	ori	r7,r7,MAX_SIZE@l
+	mr	r9,r7
+	cmpd	r6,r7
+	bgt	2f
+	mr	r7,r6
+2:	subf	r6,r7,r6
+
+	/* our main loop does 128 bytes at a time */
+	srdi	r7,r7,7
+
+	/*
+	 * Work out the offset into the constants table to start at. Each
+	 * constant is 16 bytes, and it is used against 128 bytes of input
+	 * data - 128 / 16 = 8
+	 */
+	sldi	r8,r7,4
+	srdi	r9,r9,3
+	subf	r8,r8,r9
+
+	/* We reduce our final 128 bytes in a separate step */
+	addi	r7,r7,-1
+	mtctr	r7
+
+	addis	r3,r2,.constants@toc@ha
+	addi	r3,r3,.constants@toc@l
+
+	/* Find the start of our constants */
+	add	r3,r3,r8
+
+	/* zero v0-v7 which will contain our checksums */
+	vxor	v0,v0,v0
+	vxor	v1,v1,v1
+	vxor	v2,v2,v2
+	vxor	v3,v3,v3
+	vxor	v4,v4,v4
+	vxor	v5,v5,v5
+	vxor	v6,v6,v6
+	vxor	v7,v7,v7
+
+	lvx	const1,0,r3
+
+	/*
+	 * If we are looping back to consume more data we use the values
+	 * already in v16-v23.
+	 */
+	cmpdi	r0,1
+	beq	2f
+
+	/* First warm up pass */
+	lvx	v16,0,r4
+	lvx	v17,off16,r4
+	VPERM(v16,v16,v16,byteswap)
+	VPERM(v17,v17,v17,byteswap)
+	lvx	v18,off32,r4
+	lvx	v19,off48,r4
+	VPERM(v18,v18,v18,byteswap)
+	VPERM(v19,v19,v19,byteswap)
+	lvx	v20,off64,r4
+	lvx	v21,off80,r4
+	VPERM(v20,v20,v20,byteswap)
+	VPERM(v21,v21,v21,byteswap)
+	lvx	v22,off96,r4
+	lvx	v23,off112,r4
+	VPERM(v22,v22,v22,byteswap)
+	VPERM(v23,v23,v23,byteswap)
+	addi	r4,r4,8*16
+
+	/* xor in initial value */
+	vxor	v16,v16,v8
+
+2:	bdz	.Lfirst_warm_up_done
+
+	addi	r3,r3,16
+	lvx	const2,0,r3
+
+	/* Second warm up pass */
+	VPMSUMD(v8,v16,const1)
+	lvx	v16,0,r4
+	VPERM(v16,v16,v16,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v9,v17,const1)
+	lvx	v17,off16,r4
+	VPERM(v17,v17,v17,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v10,v18,const1)
+	lvx	v18,off32,r4
+	VPERM(v18,v18,v18,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v11,v19,const1)
+	lvx	v19,off48,r4
+	VPERM(v19,v19,v19,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v12,v20,const1)
+	lvx	v20,off64,r4
+	VPERM(v20,v20,v20,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v13,v21,const1)
+	lvx	v21,off80,r4
+	VPERM(v21,v21,v21,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v14,v22,const1)
+	lvx	v22,off96,r4
+	VPERM(v22,v22,v22,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v15,v23,const1)
+	lvx	v23,off112,r4
+	VPERM(v23,v23,v23,byteswap)
+
+	addi	r4,r4,8*16
+
+	bdz	.Lfirst_cool_down
+
+	/*
+	 * main loop. We modulo schedule it such that it takes three iterations
+	 * to complete - first iteration load, second iteration vpmsum, third
+	 * iteration xor.
+	 */
+	.balign	16
+4:	lvx	const1,0,r3
+	addi	r3,r3,16
+	ori	r2,r2,0
+
+	vxor	v0,v0,v8
+	VPMSUMD(v8,v16,const2)
+	lvx	v16,0,r4
+	VPERM(v16,v16,v16,byteswap)
+	ori	r2,r2,0
+
+	vxor	v1,v1,v9
+	VPMSUMD(v9,v17,const2)
+	lvx	v17,off16,r4
+	VPERM(v17,v17,v17,byteswap)
+	ori	r2,r2,0
+
+	vxor	v2,v2,v10
+	VPMSUMD(v10,v18,const2)
+	lvx	v18,off32,r4
+	VPERM(v18,v18,v18,byteswap)
+	ori	r2,r2,0
+
+	vxor	v3,v3,v11
+	VPMSUMD(v11,v19,const2)
+	lvx	v19,off48,r4
+	VPERM(v19,v19,v19,byteswap)
+	lvx	const2,0,r3
+	ori	r2,r2,0
+
+	vxor	v4,v4,v12
+	VPMSUMD(v12,v20,const1)
+	lvx	v20,off64,r4
+	VPERM(v20,v20,v20,byteswap)
+	ori	r2,r2,0
+
+	vxor	v5,v5,v13
+	VPMSUMD(v13,v21,const1)
+	lvx	v21,off80,r4
+	VPERM(v21,v21,v21,byteswap)
+	ori	r2,r2,0
+
+	vxor	v6,v6,v14
+	VPMSUMD(v14,v22,const1)
+	lvx	v22,off96,r4
+	VPERM(v22,v22,v22,byteswap)
+	ori	r2,r2,0
+
+	vxor	v7,v7,v15
+	VPMSUMD(v15,v23,const1)
+	lvx	v23,off112,r4
+	VPERM(v23,v23,v23,byteswap)
+
+	addi	r4,r4,8*16
+
+	bdnz	4b
+
+.Lfirst_cool_down:
+	/* First cool down pass */
+	lvx	const1,0,r3
+	addi	r3,r3,16
+
+	vxor	v0,v0,v8
+	VPMSUMD(v8,v16,const1)
+	ori	r2,r2,0
+
+	vxor	v1,v1,v9
+	VPMSUMD(v9,v17,const1)
+	ori	r2,r2,0
+
+	vxor	v2,v2,v10
+	VPMSUMD(v10,v18,const1)
+	ori	r2,r2,0
+
+	vxor	v3,v3,v11
+	VPMSUMD(v11,v19,const1)
+	ori	r2,r2,0
+
+	vxor	v4,v4,v12
+	VPMSUMD(v12,v20,const1)
+	ori	r2,r2,0
+
+	vxor	v5,v5,v13
+	VPMSUMD(v13,v21,const1)
+	ori	r2,r2,0
+
+	vxor	v6,v6,v14
+	VPMSUMD(v14,v22,const1)
+	ori	r2,r2,0
+
+	vxor	v7,v7,v15
+	VPMSUMD(v15,v23,const1)
+	ori	r2,r2,0
+
+.Lsecond_cool_down:
+	/* Second cool down pass */
+	vxor	v0,v0,v8
+	vxor	v1,v1,v9
+	vxor	v2,v2,v10
+	vxor	v3,v3,v11
+	vxor	v4,v4,v12
+	vxor	v5,v5,v13
+	vxor	v6,v6,v14
+	vxor	v7,v7,v15
+
+	/*
+	 * vpmsumd produces a 96 bit result in the least significant bits
+	 * of the register. Since we are bit reflected we have to shift it
+	 * left 32 bits so it occupies the least significant bits in the
+	 * bit reflected domain.
+	 */
+	vsldoi	v0,v0,zeroes,4
+	vsldoi	v1,v1,zeroes,4
+	vsldoi	v2,v2,zeroes,4
+	vsldoi	v3,v3,zeroes,4
+	vsldoi	v4,v4,zeroes,4
+	vsldoi	v5,v5,zeroes,4
+	vsldoi	v6,v6,zeroes,4
+	vsldoi	v7,v7,zeroes,4
+
+	/* xor with last 1024 bits */
+	lvx	v8,0,r4
+	lvx	v9,off16,r4
+	VPERM(v8,v8,v8,byteswap)
+	VPERM(v9,v9,v9,byteswap)
+	lvx	v10,off32,r4
+	lvx	v11,off48,r4
+	VPERM(v10,v10,v10,byteswap)
+	VPERM(v11,v11,v11,byteswap)
+	lvx	v12,off64,r4
+	lvx	v13,off80,r4
+	VPERM(v12,v12,v12,byteswap)
+	VPERM(v13,v13,v13,byteswap)
+	lvx	v14,off96,r4
+	lvx	v15,off112,r4
+	VPERM(v14,v14,v14,byteswap)
+	VPERM(v15,v15,v15,byteswap)
+
+	addi	r4,r4,8*16
+
+	vxor	v16,v0,v8
+	vxor	v17,v1,v9
+	vxor	v18,v2,v10
+	vxor	v19,v3,v11
+	vxor	v20,v4,v12
+	vxor	v21,v5,v13
+	vxor	v22,v6,v14
+	vxor	v23,v7,v15
+
+	li	r0,1
+	cmpdi	r6,0
+	addi	r6,r6,128
+	bne	1b
+
+	/* Work out how many bytes we have left */
+	andi.	r5,r5,127
+
+	/* Calculate where in the constant table we need to start */
+	subfic	r6,r5,128
+	add	r3,r3,r6
+
+	/* How many 16 byte chunks are in the tail */
+	srdi	r7,r5,4
+	mtctr	r7
+
+	/*
+	 * Reduce the previously calculated 1024 bits to 64 bits, shifting
+	 * 32 bits to include the trailing 32 bits of zeros
+	 */
+	lvx	v0,0,r3
+	lvx	v1,off16,r3
+	lvx	v2,off32,r3
+	lvx	v3,off48,r3
+	lvx	v4,off64,r3
+	lvx	v5,off80,r3
+	lvx	v6,off96,r3
+	lvx	v7,off112,r3
+	addi	r3,r3,8*16
+
+	VPMSUMW(v0,v16,v0)
+	VPMSUMW(v1,v17,v1)
+	VPMSUMW(v2,v18,v2)
+	VPMSUMW(v3,v19,v3)
+	VPMSUMW(v4,v20,v4)
+	VPMSUMW(v5,v21,v5)
+	VPMSUMW(v6,v22,v6)
+	VPMSUMW(v7,v23,v7)
+
+	/* Now reduce the tail (0 - 112 bytes) */
+	cmpdi	r7,0
+	beq	1f
+
+	lvx	v16,0,r4
+	lvx	v17,0,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off32,r4
+	lvx	v17,off32,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off64,r4
+	lvx	v17,off64,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off96,r4
+	lvx	v17,off96,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+
+	/* Now xor all the parallel chunks together */
+1:	vxor	v0,v0,v1
+	vxor	v2,v2,v3
+	vxor	v4,v4,v5
+	vxor	v6,v6,v7
+
+	vxor	v0,v0,v2
+	vxor	v4,v4,v6
+
+	vxor	v0,v0,v4
+
+.Lbarrett_reduction:
+	/* Barrett constants */
+	addis	r3,r2,.barrett_constants@toc@ha
+	addi	r3,r3,.barrett_constants@toc@l
+
+	lvx	const1,0,r3
+	lvx	const2,off16,r3
+
+	vsldoi	v1,v0,v0,8
+	vxor	v0,v0,v1		/* xor two 64 bit results together */
+
+	/* shift left one bit */
+	vspltisb v1,1
+	vsl	v0,v0,v1
+
+	vand	v0,v0,mask_64bit
+
+	/*
+	 * The reflected version of Barrett reduction. Instead of bit
+	 * reflecting our data (which is expensive to do), we bit reflect our
+	 * constants and our algorithm, which means the intermediate data in
+	 * our vector registers goes from 0-63 instead of 63-0. We can reflect
+	 * the algorithm because we don't carry in mod 2 arithmetic.
+	 */
+	vand	v1,v0,mask_32bit	/* bottom 32 bits of a */
+	VPMSUMD(v1,v1,const1)		/* ma */
+	vand	v1,v1,mask_32bit	/* bottom 32bits of ma */
+	VPMSUMD(v1,v1,const2)		/* qn */
+	vxor	v0,v0,v1		/* a - qn, subtraction is xor in GF(2) */
+
+	/*
+	 * Since we are bit reflected, the result (ie the low 32 bits) is in
+	 * the high 32 bits. We just need to shift it left 4 bytes
+	 * V0 [ 0 1 X 3 ]
+	 * V0 [ 0 X 2 3 ]
+	 */
+	vsldoi	v0,v0,zeroes,4		/* shift result into top 64 bits of */
+
+	/* Get it into r3 */
+	MFVRD(R3, v0)
+
+.Lout:
+	subi	r6,r1,56+10*16
+	subi	r7,r1,56+2*16
+
+	lvx	v20,0,r6
+	lvx	v21,off16,r6
+	lvx	v22,off32,r6
+	lvx	v23,off48,r6
+	lvx	v24,off64,r6
+	lvx	v25,off80,r6
+	lvx	v26,off96,r6
+	lvx	v27,off112,r6
+	lvx	v28,0,r7
+	lvx	v29,off16,r7
+
+	ld	r31,-8(r1)
+	ld	r30,-16(r1)
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	ld	r27,-40(r1)
+	ld	r26,-48(r1)
+	ld	r25,-56(r1)
+
+	blr
+
+.Lfirst_warm_up_done:
+	lvx	const1,0,r3
+	addi	r3,r3,16
+
+	VPMSUMD(v8,v16,const1)
+	VPMSUMD(v9,v17,const1)
+	VPMSUMD(v10,v18,const1)
+	VPMSUMD(v11,v19,const1)
+	VPMSUMD(v12,v20,const1)
+	VPMSUMD(v13,v21,const1)
+	VPMSUMD(v14,v22,const1)
+	VPMSUMD(v15,v23,const1)
+
+	b	.Lsecond_cool_down
+
+.Lshort:
+	cmpdi	r5,0
+	beq	.Lzero
+
+	addis	r3,r2,.short_constants@toc@ha
+	addi	r3,r3,.short_constants@toc@l
+
+	/* Calculate where in the constant table we need to start */
+	subfic	r6,r5,256
+	add	r3,r3,r6
+
+	/* How many 16 byte chunks? */
+	srdi	r7,r5,4
+	mtctr	r7
+
+	vxor	v19,v19,v19
+	vxor	v20,v20,v20
+
+	lvx	v0,0,r4
+	lvx	v16,0,r3
+	VPERM(v0,v0,v16,byteswap)
+	vxor	v0,v0,v8	/* xor in initial value */
+	VPMSUMW(v0,v0,v16)
+	bdz	.Lv0
+
+	lvx	v1,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v1,v1,v17,byteswap)
+	VPMSUMW(v1,v1,v17)
+	bdz	.Lv1
+
+	lvx	v2,off32,r4
+	lvx	v16,off32,r3
+	VPERM(v2,v2,v16,byteswap)
+	VPMSUMW(v2,v2,v16)
+	bdz	.Lv2
+
+	lvx	v3,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v3,v3,v17,byteswap)
+	VPMSUMW(v3,v3,v17)
+	bdz	.Lv3
+
+	lvx	v4,off64,r4
+	lvx	v16,off64,r3
+	VPERM(v4,v4,v16,byteswap)
+	VPMSUMW(v4,v4,v16)
+	bdz	.Lv4
+
+	lvx	v5,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v5,v5,v17,byteswap)
+	VPMSUMW(v5,v5,v17)
+	bdz	.Lv5
+
+	lvx	v6,off96,r4
+	lvx	v16,off96,r3
+	VPERM(v6,v6,v16,byteswap)
+	VPMSUMW(v6,v6,v16)
+	bdz	.Lv6
+
+	lvx	v7,off112,r4
+	lvx	v17,off112,r3
+	VPERM(v7,v7,v17,byteswap)
+	VPMSUMW(v7,v7,v17)
+	bdz	.Lv7
+
+	addi	r3,r3,128
+	addi	r4,r4,128
+
+	lvx	v8,0,r4
+	lvx	v16,0,r3
+	VPERM(v8,v8,v16,byteswap)
+	VPMSUMW(v8,v8,v16)
+	bdz	.Lv8
+
+	lvx	v9,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v9,v9,v17,byteswap)
+	VPMSUMW(v9,v9,v17)
+	bdz	.Lv9
+
+	lvx	v10,off32,r4
+	lvx	v16,off32,r3
+	VPERM(v10,v10,v16,byteswap)
+	VPMSUMW(v10,v10,v16)
+	bdz	.Lv10
+
+	lvx	v11,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v11,v11,v17,byteswap)
+	VPMSUMW(v11,v11,v17)
+	bdz	.Lv11
+
+	lvx	v12,off64,r4
+	lvx	v16,off64,r3
+	VPERM(v12,v12,v16,byteswap)
+	VPMSUMW(v12,v12,v16)
+	bdz	.Lv12
+
+	lvx	v13,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v13,v13,v17,byteswap)
+	VPMSUMW(v13,v13,v17)
+	bdz	.Lv13
+
+	lvx	v14,off96,r4
+	lvx	v16,off96,r3
+	VPERM(v14,v14,v16,byteswap)
+	VPMSUMW(v14,v14,v16)
+	bdz	.Lv14
+
+	lvx	v15,off112,r4
+	lvx	v17,off112,r3
+	VPERM(v15,v15,v17,byteswap)
+	VPMSUMW(v15,v15,v17)
+
+.Lv15:	vxor	v19,v19,v15
+.Lv14:	vxor	v20,v20,v14
+.Lv13:	vxor	v19,v19,v13
+.Lv12:	vxor	v20,v20,v12
+.Lv11:	vxor	v19,v19,v11
+.Lv10:	vxor	v20,v20,v10
+.Lv9:	vxor	v19,v19,v9
+.Lv8:	vxor	v20,v20,v8
+.Lv7:	vxor	v19,v19,v7
+.Lv6:	vxor	v20,v20,v6
+.Lv5:	vxor	v19,v19,v5
+.Lv4:	vxor	v20,v20,v4
+.Lv3:	vxor	v19,v19,v3
+.Lv2:	vxor	v20,v20,v2
+.Lv1:	vxor	v19,v19,v1
+.Lv0:	vxor	v20,v20,v0
+
+	vxor	v0,v19,v20
+
+	b	.Lbarrett_reduction
+
+.Lzero:
+	mr	r3,r10
+	b	.Lout
+
+FUNC_END(__crc32_vpmsum)
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
new file mode 100644
index 000000000000..bfe3d37a24ef
--- /dev/null
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -0,0 +1,167 @@
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <asm/switch_to.h>
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+#define VMX_ALIGN		16
+#define VMX_ALIGN_MASK		(VMX_ALIGN-1)
+
+#define VECTOR_BREAKPOINT	512
+
+u32 __crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len);
+
+static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len)
+{
+	unsigned int prealign;
+	unsigned int tail;
+
+	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt())
+		return __crc32c_le(crc, p, len);
+
+	if ((unsigned long)p & VMX_ALIGN_MASK) {
+		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+		crc = __crc32c_le(crc, p, prealign);
+		len -= prealign;
+		p += prealign;
+	}
+
+	if (len & ~VMX_ALIGN_MASK) {
+		pagefault_disable();
+		enable_kernel_altivec();
+		crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+		pagefault_enable();
+	}
+
+	tail = len & VMX_ALIGN_MASK;
+	if (tail) {
+		p += len & ~VMX_ALIGN_MASK;
+		crc = __crc32c_le(crc, p, tail);
+	}
+
+	return crc;
+}
+
+static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
+{
+	u32 *key = crypto_tfm_ctx(tfm);
+
+	*key = 0;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key,
+			       unsigned int keylen)
+{
+	u32 *mctx = crypto_shash_ctx(hash);
+
+	if (keylen != sizeof(u32)) {
+		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	*mctx = le32_to_cpup((__le32 *)key);
+	return 0;
+}
+
+static int crc32c_vpmsum_init(struct shash_desc *desc)
+{
+	u32 *mctx = crypto_shash_ctx(desc->tfm);
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = *mctx;
+
+	return 0;
+}
+
+static int crc32c_vpmsum_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = crc32c_vpmsum(*crcp, data, len);
+
+	return 0;
+}
+
+static int __crc32c_vpmsum_finup(u32 *crcp, const u8 *data, unsigned int len,
+				u8 *out)
+{
+	*(__le32 *)out = ~cpu_to_le32(crc32c_vpmsum(*crcp, data, len));
+
+	return 0;
+}
+
+static int crc32c_vpmsum_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return __crc32c_vpmsum_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32c_vpmsum_final(struct shash_desc *desc, u8 *out)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*(__le32 *)out = ~cpu_to_le32p(crcp);
+
+	return 0;
+}
+
+static int crc32c_vpmsum_digest(struct shash_desc *desc, const u8 *data,
+			       unsigned int len, u8 *out)
+{
+	return __crc32c_vpmsum_finup(crypto_shash_ctx(desc->tfm), data, len,
+				     out);
+}
+
+static struct shash_alg alg = {
+	.setkey		= crc32c_vpmsum_setkey,
+	.init		= crc32c_vpmsum_init,
+	.update		= crc32c_vpmsum_update,
+	.final		= crc32c_vpmsum_final,
+	.finup		= crc32c_vpmsum_finup,
+	.digest		= crc32c_vpmsum_digest,
+	.descsize	= sizeof(u32),
+	.digestsize	= CHKSUM_DIGEST_SIZE,
+	.base		= {
+		.cra_name		= "crc32c",
+		.cra_driver_name	= "crc32c-vpmsum",
+		.cra_priority		= 200,
+		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(u32),
+		.cra_module		= THIS_MODULE,
+		.cra_init		= crc32c_vpmsum_cra_init,
+	}
+};
+
+static int __init crc32c_vpmsum_mod_init(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	return crypto_register_shash(&alg);
+}
+
+static void __exit crc32c_vpmsum_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(crc32c_vpmsum_mod_init);
+module_exit(crc32c_vpmsum_mod_fini);
+
+MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
+MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-vpmsum");
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index ae0751ef8788..f08d567e0ca4 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v)	\
 	return t;							\
 }
 
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op)				\
+static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
+{									\
+	int res, t;							\
+									\
+	__asm__ __volatile__(						\
+"1:	lwarx	%0,0,%4		# atomic_fetch_" #op "_relaxed\n"	\
+	#asm_op " %1,%3,%0\n"						\
+	PPC405_ERR77(0, %4)						\
+"	stwcx.	%1,0,%4\n"						\
+"	bne-	1b\n"							\
+	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
+	: "r" (a), "r" (&v->counter)					\
+	: "cc");							\
+									\
+	return res;							\
+}
+
 #define ATOMIC_OPS(op, asm_op)						\
 	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_OP_RETURN_RELAXED(op, asm_op)
+	ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
 
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, subf)
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, xor)
-
 #define atomic_add_return_relaxed atomic_add_return_relaxed
 #define atomic_sub_return_relaxed atomic_sub_return_relaxed
 
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op)						\
+	ATOMIC_OP(op, asm_op)						\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed  atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP_RELAXED
 #undef ATOMIC_OP_RETURN_RELAXED
 #undef ATOMIC_OP
 
@@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v)			\
 	return t;							\
 }
 
+#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op)				\
+static inline long							\
+atomic64_fetch_##op##_relaxed(long a, atomic64_t *v)			\
+{									\
+	long res, t;							\
+									\
+	__asm__ __volatile__(						\
+"1:	ldarx	%0,0,%4		# atomic64_fetch_" #op "_relaxed\n"	\
+	#asm_op " %1,%3,%0\n"						\
+"	stdcx.	%1,0,%4\n"						\
+"	bne-	1b\n"							\
+	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
+	: "r" (a), "r" (&v->counter)					\
+	: "cc");							\
+									\
+	return res;							\
+}
+
 #define ATOMIC64_OPS(op, asm_op)					\
 	ATOMIC64_OP(op, asm_op)						\
-	ATOMIC64_OP_RETURN_RELAXED(op, asm_op)
+	ATOMIC64_OP_RETURN_RELAXED(op, asm_op)				\
+	ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
 
 ATOMIC64_OPS(add, add)
 ATOMIC64_OPS(sub, subf)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(or, or)
-ATOMIC64_OP(xor, xor)
 
 #define atomic64_add_return_relaxed atomic64_add_return_relaxed
 #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
 
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op)					\
+	ATOMIC64_OP(op, asm_op)						\
+	ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(or, or)
+ATOMIC64_OPS(xor, xor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed  atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+
 #undef ATOPIC64_OPS
+#undef ATOMIC64_FETCH_OP_RELAXED
 #undef ATOMIC64_OP_RETURN_RELAXED
 #undef ATOMIC64_OP
 
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index a2350194fc76..8e21bb492dca 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -102,7 +102,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb,
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
-	tlb_flush_pgtable(tlb, address);
 	pgtable_page_dtor(table);
 	pgtable_free_tlb(tlb, page_address(table), 0);
 }
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 290157e8d5b2..74839f24f412 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -88,6 +88,7 @@
 #define HPTE_R_RPN_SHIFT	12
 #define HPTE_R_RPN		ASM_CONST(0x0ffffffffffff000)
 #define HPTE_R_PP		ASM_CONST(0x0000000000000003)
+#define HPTE_R_PPP		ASM_CONST(0x8000000000000003)
 #define HPTE_R_N		ASM_CONST(0x0000000000000004)
 #define HPTE_R_G		ASM_CONST(0x0000000000000008)
 #define HPTE_R_M		ASM_CONST(0x0000000000000010)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 488279edb1f0..cd5e7aa8cc34 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -41,7 +41,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
 
 extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
 extern void pte_fragment_free(unsigned long *, int);
@@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
 	return (pgd_t *)__get_free_page(PGALLOC_GFP);
 #else
 	struct page *page;
-	page = alloc_pages(PGALLOC_GFP, 4);
+	page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4);
 	if (!page)
 		return NULL;
 	return (pgd_t *) page_address(page);
@@ -93,8 +93,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -110,13 +109,17 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
                                   unsigned long address)
 {
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
         pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
@@ -127,6 +130,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
                                   unsigned long address)
 {
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
         return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX);
 }
 
@@ -151,7 +159,7 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -198,7 +206,11 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
-	tlb_flush_pgtable(tlb, address);
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
 	pgtable_free_tlb(tlb, table, 0);
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 88a5ecaa157b..ab84c89c9e98 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size;
 #define KERN_VIRT_SIZE  __kernel_virt_size
 extern struct page *vmemmap;
 extern unsigned long ioremap_bot;
+extern unsigned long pci_io_base;
 #endif /* __ASSEMBLY__ */
 
 #include <asm/book3s/64/hash.h>
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 937d4e247ac3..df294224e280 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -228,5 +228,20 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
 
 extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 				 pgprot_t flags, unsigned int psz);
+
+static inline unsigned long radix__get_tree_size(void)
+{
+	unsigned long rts_field;
+	/*
+	 * we support 52 bits, hence 52-31 = 21, 0b10101
+	 * RTS encoding details
+	 * bits 0 - 3 of rts -> bits 6 - 8 unsigned long
+	 * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long
+	 */
+	rts_field = (0x5UL << 5); /* 6 - 8 bits */
+	rts_field |= (0x2UL << 61);
+
+	return rts_field;
+}
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 13ef38828dfe..3fa94fcac628 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -18,16 +18,19 @@ extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
 extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 				    unsigned long ap, int nid);
+extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 extern void radix__tlb_flush(struct mmu_gather *tlb);
 #ifdef CONFIG_SMP
 extern void radix__flush_tlb_mm(struct mm_struct *mm);
 extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 			      unsigned long ap, int nid);
+extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 #else
 #define radix__flush_tlb_mm(mm)		radix__local_flush_tlb_mm(mm)
 #define radix__flush_tlb_page(vma,addr)	radix__local_flush_tlb_page(vma,addr)
 #define radix___flush_tlb_page(mm,addr,p,i)	radix___local_flush_tlb_page(mm,addr,p,i)
+#define radix__flush_tlb_pwc(tlb, addr)	radix__local_flush_tlb_pwc(tlb, addr)
 #endif
 
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index d98424ae356c..96e5769b18b0 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -72,5 +72,19 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
 #define flush_tlb_page(vma, addr)	local_flush_tlb_page(vma, addr)
 #endif /* CONFIG_SMP */
+/*
+ * flush the page walk cache for the address
+ */
+static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+	/*
+	 * Flush the page table walk cache on freeing a page table. We already
+	 * have marked the upper/higher level page table entry none by now.
+	 * So it is safe to flush PWC here.
+	 */
+	if (!radix_enabled())
+		return;
 
+	radix__flush_tlb_pwc(tlb, address);
+}
 #endif /*  _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h
index 54f591e9572e..c0a69ae92256 100644
--- a/arch/powerpc/include/asm/book3s/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/pgalloc.h
@@ -4,11 +4,6 @@
 #include <linux/mm.h>
 
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
-				     unsigned long address)
-{
-
-}
 
 #ifdef CONFIG_PPC64
 #include <asm/book3s/64/pgalloc.h>
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 127ab23e1f6c..078155fa1189 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+	if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
 		return 1;
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 069369f6414b..897d2e1c8a9b 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -57,8 +57,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -88,7 +87,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -190,8 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index ee09e99097f0..9bd87f269d6d 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -71,10 +71,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 					       bool *is_thp, unsigned *shift)
 {
-	if (!arch_irqs_disabled()) {
-		pr_info("%s called with irq enabled\n", __func__);
-		dump_stack();
-	}
+	VM_WARN(!arch_irqs_disabled(),
+		"%s called with irq enabled\n", __func__);
 	return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
 }
 
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1d035c1cc889..49cd8760aa7c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -174,6 +174,8 @@
 #define PPC_INST_MFSPR_DSCR_USER_MASK	0xfc1fffff
 #define PPC_INST_MTSPR_DSCR_USER	0x7c0303a6
 #define PPC_INST_MTSPR_DSCR_USER_MASK	0xfc1fffff
+#define PPC_INST_MFVSRD			0x7c000066
+#define PPC_INST_MTVSRD			0x7c000166
 #define PPC_INST_SLBFEE			0x7c0007a7
 
 #define PPC_INST_STRING			0x7c00042a
@@ -188,6 +190,8 @@
 #define PPC_INST_WAIT			0x7c00007c
 #define PPC_INST_TLBIVAX		0x7c000624
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
+#define PPC_INST_VPMSUMW		0x10000488
+#define PPC_INST_VPMSUMD		0x100004c8
 #define PPC_INST_XXLOR			0xf0000510
 #define PPC_INST_XXSWAPD		0xf0000250
 #define PPC_INST_XVCPSGNDP		0xf0000780
@@ -359,6 +363,14 @@
 					       VSX_XX1((s), a, b))
 #define LXVD2X(s, a, b)		stringify_in_c(.long PPC_INST_LXVD2X | \
 					       VSX_XX1((s), a, b))
+#define MFVRD(a, t)		stringify_in_c(.long PPC_INST_MFVSRD | \
+					       VSX_XX1((t)+32, a, R0))
+#define MTVRD(t, a)		stringify_in_c(.long PPC_INST_MTVSRD | \
+					       VSX_XX1((t)+32, a, R0))
+#define VPMSUMW(t, a, b)	stringify_in_c(.long PPC_INST_VPMSUMW | \
+					       VSX_XX3((t), a, b))
+#define VPMSUMD(t, a, b)	stringify_in_c(.long PPC_INST_VPMSUMD | \
+					       VSX_XX3((t), a, b))
 #define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
 					       VSX_XX3((t), a, b))
 #define XXSWAPD(t, a)		stringify_in_c(.long PPC_INST_XXSWAPD | \
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 2b31632376a5..051af612a7e1 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -286,6 +286,9 @@ n:
 
 #endif
 
+#define FUNC_START(name)	_GLOBAL(name)
+#define FUNC_END(name)
+
 /* 
  * LOAD_REG_IMMEDIATE(rn, expr)
  *   Loads the value of the constant expression 'expr' into register 'rn'
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 2714a3b81d24..d70101e1e25c 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -642,13 +642,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 		if (pe->type & EEH_PE_VF) {
 			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
 		} else {
-			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 			pci_lock_rescan_remove();
 			pci_hp_remove_devices(bus);
 			pci_unlock_rescan_remove();
 		}
 	} else if (frozen_bus) {
-		eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data);
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
 	}
 
 	/*
@@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 		 */
 		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-		if (pe->type & EEH_PE_VF)
+		if (pe->type & EEH_PE_VF) {
 			eeh_add_virt_device(edev, NULL);
-		else
+		} else {
+			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 			pci_hp_add_devices(bus);
+		}
 	} else if (frozen_bus && rmv_data->removed) {
 		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
 		ssleep(5);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c9440629128..8bcc1b457115 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
 	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
 
 	mtlr	r10
-BEGIN_MMU_FTR_SECTION
-	b	2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
 	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
+BEGIN_MMU_FTR_SECTION
 	beq-	2f
+FTR_SECTION_ELSE
+	b	2f
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
 
 .machine	push
 .machine	"power4"
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 12e48d56f771..3963f0b68d52 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -38,6 +38,18 @@ EXPORT_SYMBOL(ioread16);
 EXPORT_SYMBOL(ioread16be);
 EXPORT_SYMBOL(ioread32);
 EXPORT_SYMBOL(ioread32be);
+#ifdef __powerpc64__
+u64 ioread64(void __iomem *addr)
+{
+	return readq(addr);
+}
+u64 ioread64be(void __iomem *addr)
+{
+	return readq_be(addr);
+}
+EXPORT_SYMBOL(ioread64);
+EXPORT_SYMBOL(ioread64be);
+#endif /* __powerpc64__ */
 
 void iowrite8(u8 val, void __iomem *addr)
 {
@@ -64,6 +76,18 @@ EXPORT_SYMBOL(iowrite16);
 EXPORT_SYMBOL(iowrite16be);
 EXPORT_SYMBOL(iowrite32);
 EXPORT_SYMBOL(iowrite32be);
+#ifdef __powerpc64__
+void iowrite64(u64 val, void __iomem *addr)
+{
+	writeq(val, addr);
+}
+void iowrite64be(u64 val, void __iomem *addr)
+{
+	writeq_be(val, addr);
+}
+EXPORT_SYMBOL(iowrite64);
+EXPORT_SYMBOL(iowrite64be);
+#endif /* __powerpc64__ */
 
 /*
  * These are the "repeat read/write" functions. Note the
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 856f9a7944cd..64174bf95611 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -444,7 +444,8 @@ static int nvram_pstore_write(enum pstore_type_id type,
  */
 static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
 				int *count, struct timespec *time, char **buf,
-				bool *compressed, struct pstore_info *psi)
+				bool *compressed, ssize_t *ecc_notice_size,
+				struct pstore_info *psi)
 {
 	struct oops_log_info *oops_hdr;
 	unsigned int err_type, id_no, size = 0;
@@ -545,6 +546,7 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
 			return -ENOMEM;
 		kfree(buff);
 
+		*ecc_notice_size = 0;
 		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
 			*compressed = true;
 		else
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 3759df52bd67..a5ae49a2dcc4 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -47,7 +47,6 @@ static int __init pcibios_init(void)
 
 	printk(KERN_INFO "PCI: Probing PCI hardware\n");
 
-	pci_io_base = ISA_IO_BASE;
 	/* For now, override phys_mem_access_prot. If we need it,g
 	 * later, we may move that initialization to each ppc_md
 	 */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e2f12cbcade9..0b93893424f5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 		current->thread.regs = regs - 1;
 	}
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Clear any transactional state, we're exec()ing. The cause is
+	 * not important as there will never be a recheckpoint so it's not
+	 * user visible.
+	 */
+	if (MSR_TM_SUSPENDED(mfmsr()))
+		tm_reclaim_current(0);
+#endif
+
 	memset(regs->gpr, 0, sizeof(regs->gpr));
 	regs->ctr = 0;
 	regs->link = 0;
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index bf8f34a58670..b7019b559ddb 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim)
 	std	r3, STK_PARAM(R3)(r1)
 	SAVE_NVGPRS(r1)
 
-	/* We need to setup MSR for VSX register save instructions.  Here we
-	 * also clear the MSR RI since when we do the treclaim, we won't have a
-	 * valid kernel pointer for a while.  We clear RI here as it avoids
-	 * adding another mtmsr closer to the treclaim.  This makes the region
-	 * maked as non-recoverable wider than it needs to be but it saves on
-	 * inserting another mtmsrd later.
-	 */
+	/* We need to setup MSR for VSX register save instructions. */
 	mfmsr	r14
 	mr	r15, r14
 	ori	r15, r15, MSR_FP
-	li	r16, MSR_RI
+	li	r16, 0
 	ori	r16, r16, MSR_EE /* IRQs hard off */
 	andc	r15, r15, r16
 	oris	r15, r15, MSR_VEC@h
@@ -176,7 +170,17 @@ dont_backup_fp:
 1:	tdeqi   r6, 0
 	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
 
-	/* The moment we treclaim, ALL of our GPRs will switch
+	/* Clear MSR RI since we are about to change r1, EE is already off. */
+	li	r4, 0
+	mtmsrd	r4, 1
+
+	/*
+	 * BE CAREFUL HERE:
+	 * At this point we can't take an SLB miss since we have MSR_RI
+	 * off. Load only to/from the stack/paca which are in SLB bolted regions
+	 * until we turn MSR RI back on.
+	 *
+	 * The moment we treclaim, ALL of our GPRs will switch
 	 * to user register state.  (FPRs, CCR etc. also!)
 	 * Use an sprg and a tm_scratch in the PACA to shuffle.
 	 */
@@ -197,6 +201,11 @@ dont_backup_fp:
 
 	/* Store the PPR in r11 and reset to decent value */
 	std	r11, GPR11(r1)			/* Temporary stash */
+
+	/* Reset MSR RI so we can take SLB faults again */
+	li	r11, MSR_RI
+	mtmsrd	r11, 1
+
 	mfspr	r11, SPRN_PPR
 	HMT_MEDIUM
 
@@ -397,11 +406,6 @@ restore_gprs:
 	ld	r5, THREAD_TM_DSCR(r3)
 	ld	r6, THREAD_TM_PPR(r3)
 
-	/* Clear the MSR RI since we are about to change R1.  EE is already off
-	 */
-	li	r4, 0
-	mtmsrd	r4, 1
-
 	REST_GPR(0, r7)				/* GPR0 */
 	REST_2GPRS(2, r7)			/* GPR2-3 */
 	REST_GPR(4, r7)				/* GPR4 */
@@ -439,10 +443,33 @@ restore_gprs:
 	ld	r6, _CCR(r7)
 	mtcr    r6
 
-	REST_GPR(1, r7)				/* GPR1 */
-	REST_GPR(5, r7)				/* GPR5-7 */
 	REST_GPR(6, r7)
-	ld	r7, GPR7(r7)
+
+	/*
+	 * Store r1 and r5 on the stack so that we can access them
+	 * after we clear MSR RI.
+	 */
+
+	REST_GPR(5, r7)
+	std	r5, -8(r1)
+	ld	r5, GPR1(r7)
+	std	r5, -16(r1)
+
+	REST_GPR(7, r7)
+
+	/* Clear MSR RI since we are about to change r1. EE is already off */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/*
+	 * BE CAREFUL HERE:
+	 * At this point we can't take an SLB miss since we have MSR_RI
+	 * off. Load only to/from the stack/paca which are in SLB bolted regions
+	 * until we turn MSR RI back on.
+	 */
+
+	ld	r5, -8(r1)
+	ld	r1, -16(r1)
 
 	/* Commit register state as checkpointed state: */
 	TRECHKPT
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 6527882ce05e..bb0354222b11 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -75,7 +75,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
 	}
 
 	ret = 0;
-	*flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+	*flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(*flt & VM_FAULT_ERROR)) {
 		if (*flt & VM_FAULT_OOM) {
 			ret = -ENOMEM;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a67c6d781c52..a4db22f65021 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -429,7 +429,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 	if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
 		if (fault & VM_FAULT_SIGSEGV)
 			goto bad_area;
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 40e05e7f43de..f8a871a72985 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -316,8 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 			DBG_LOW(" -> hit\n");
 			/* Update the HPTE */
 			hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
-						~(HPTE_R_PP | HPTE_R_N)) |
-					       (newpp & (HPTE_R_PP | HPTE_R_N |
+						~(HPTE_R_PPP | HPTE_R_N)) |
+					       (newpp & (HPTE_R_PPP | HPTE_R_N |
 							 HPTE_R_C)));
 		}
 		native_unlock_hpte(hptep);
@@ -385,8 +385,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 
 	/* Update the HPTE */
 	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
-			~(HPTE_R_PP | HPTE_R_N)) |
-		(newpp & (HPTE_R_PP | HPTE_R_N)));
+				~(HPTE_R_PPP | HPTE_R_N)) |
+			       (newpp & (HPTE_R_PPP | HPTE_R_N)));
 	/*
 	 * Ensure it is out of the tlb too. Bolted entries base and
 	 * actual page size will be same.
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b2740c67e172..2971ea18c768 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -201,9 +201,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	/*
 	 * We can't allow hardware to update hpte bits. Hence always
 	 * set 'R' bit and set 'C' if it is a write fault
-	 * Memory coherence is always enabled
 	 */
-	rflags |=  HPTE_R_R | HPTE_R_M;
+	rflags |=  HPTE_R_R;
 
 	if (pteflags & _PAGE_DIRTY)
 		rflags |= HPTE_R_C;
@@ -213,10 +212,15 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 
 	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
 		rflags |= HPTE_R_I;
-	if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT)
+	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
 		rflags |= (HPTE_R_I | HPTE_R_G);
-	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
-		rflags |= (HPTE_R_I | HPTE_R_W);
+	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+		rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
+	else
+		/*
+		 * Add memory coherence if cache inhibited is not set
+		 */
+		rflags |= HPTE_R_M;
 
 	return rflags;
 }
@@ -918,6 +922,10 @@ void __init hash__early_init_mmu(void)
 	vmemmap = (struct page *)H_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
 
+#ifdef CONFIG_PCI
+	pci_io_base = ISA_IO_BASE;
+#endif
+
 	/* Initialize the MMU Hash table and create the linear mapping
 	 * of memory. Has to be done before SLB initialization as this is
 	 * currently where the page size encoding is obtained.
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 5aac1a3f86cd..119d18611500 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 	cachep = PGT_CACHE(pdshift - pshift);
 #endif
 
-	new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
+	new = kmem_cache_zalloc(cachep, GFP_KERNEL);
 
 	BUG_ON(pshift > HUGEPD_SHIFT_MASK);
 	BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 227b2a6c4544..196222227e82 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index)
 	/*
 	 * set the process table entry,
 	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+	rts_field = radix__get_tree_size();
 	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
 	return 0;
 }
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c939e6e57a9e..7931e1496f0d 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -160,9 +160,8 @@ redo:
 	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
 	/*
 	 * Fill in the process table.
-	 * we support 52 bits, hence 52-28 = 24, 11000
 	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+	rts_field = radix__get_tree_size();
 	process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
 	/*
 	 * Fill in the partition table. We are suppose to use effective address
@@ -176,10 +175,8 @@ redo:
 static void __init radix_init_partition_table(void)
 {
 	unsigned long rts_field;
-	/*
-	 * we support 52 bits, hence 52-28 = 24, 11000
-	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+
+	rts_field = radix__get_tree_size();
 
 	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
 	partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
@@ -331,6 +328,11 @@ void __init radix__early_init_mmu(void)
 	__vmalloc_end = RADIX_VMALLOC_END;
 	vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+	pci_io_base = ISA_IO_BASE;
+#endif
+
 	/*
 	 * For now radix also use the same frag size
 	 */
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bf7bf32b54f8..7f922f557936 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
 	pte_t *pte;
 
 	if (slab_is_available()) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	} else {
 		pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
 		if (pte)
@@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *ptepage;
 
-	gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+	gfp_t flags = GFP_KERNEL | __GFP_ZERO;
 
 	ptepage = alloc_pages(flags, 0);
 	if (!ptepage)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e009e0604a8a..f5e8d4edb808 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm)
 static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 {
 	void *ret = NULL;
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!kernel && !pgtable_page_ctor(page)) {
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 54efba2fd66e..ab2f60e812e2 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -18,16 +18,20 @@
 
 static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
-static inline void __tlbiel_pid(unsigned long pid, int set)
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
+static inline void __tlbiel_pid(unsigned long pid, int set,
+				unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = PPC_BIT(53); /* IS = 1 */
 	rb |= set << PPC_BITLSHIFT(51);
 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 2;  /* invalidate all the caches */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
@@ -39,25 +43,24 @@ static inline void __tlbiel_pid(unsigned long pid, int set)
 /*
  * We use 128 set in radix mode and 256 set in hpt mode.
  */
-static inline void _tlbiel_pid(unsigned long pid)
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 {
 	int set;
 
 	for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
-		__tlbiel_pid(pid, set);
+		__tlbiel_pid(pid, set, ric);
 	}
 	return;
 }
 
-static inline void _tlbie_pid(unsigned long pid)
+static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = PPC_BIT(53); /* IS = 1 */
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 2;  /* invalidate all the caches */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
@@ -67,16 +70,15 @@ static inline void _tlbie_pid(unsigned long pid)
 }
 
 static inline void _tlbiel_va(unsigned long va, unsigned long pid,
-			      unsigned long ap)
+			      unsigned long ap, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = va & ~(PPC_BITMASK(52, 63));
 	rb |= ap << PPC_BITLSHIFT(58);
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 0;  /* no cluster flush yet */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
@@ -86,16 +88,15 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
 }
 
 static inline void _tlbie_va(unsigned long va, unsigned long pid,
-			     unsigned long ap)
+			     unsigned long ap, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = va & ~(PPC_BITMASK(52, 63));
 	rb |= ap << PPC_BITLSHIFT(58);
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 0;  /* no cluster flush yet */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
@@ -122,11 +123,26 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
 	preempt_disable();
 	pid = mm->context.id;
 	if (pid != MMU_NO_CONTEXT)
-		_tlbiel_pid(pid);
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 	preempt_enable();
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
 
+void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+	unsigned long pid;
+	struct mm_struct *mm = tlb->mm;
+
+	preempt_disable();
+
+	pid = mm->context.id;
+	if (pid != MMU_NO_CONTEXT)
+		_tlbiel_pid(pid, RIC_FLUSH_PWC);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
+
 void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 			    unsigned long ap, int nid)
 {
@@ -135,7 +151,7 @@ void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 	preempt_disable();
 	pid = mm ? mm->context.id : 0;
 	if (pid != MMU_NO_CONTEXT)
-		_tlbiel_va(vmaddr, pid, ap);
+		_tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 	preempt_enable();
 }
 
@@ -172,16 +188,42 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
-		_tlbie_pid(pid);
+		_tlbie_pid(pid, RIC_FLUSH_ALL);
 		if (lock_tlbie)
 			raw_spin_unlock(&native_tlbie_lock);
 	} else
-		_tlbiel_pid(pid);
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 no_context:
 	preempt_enable();
 }
 EXPORT_SYMBOL(radix__flush_tlb_mm);
 
+void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+	unsigned long pid;
+	struct mm_struct *mm = tlb->mm;
+
+	preempt_disable();
+
+	pid = mm->context.id;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto no_context;
+
+	if (!mm_is_core_local(mm)) {
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+		if (lock_tlbie)
+			raw_spin_lock(&native_tlbie_lock);
+		_tlbie_pid(pid, RIC_FLUSH_PWC);
+		if (lock_tlbie)
+			raw_spin_unlock(&native_tlbie_lock);
+	} else
+		_tlbiel_pid(pid, RIC_FLUSH_PWC);
+no_context:
+	preempt_enable();
+}
+EXPORT_SYMBOL(radix__flush_tlb_pwc);
+
 void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 		       unsigned long ap, int nid)
 {
@@ -196,11 +238,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
-		_tlbie_va(vmaddr, pid, ap);
+		_tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 		if (lock_tlbie)
 			raw_spin_unlock(&native_tlbie_lock);
 	} else
-		_tlbiel_va(vmaddr, pid, ap);
+		_tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 bail:
 	preempt_enable();
 }
@@ -224,7 +266,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 	if (lock_tlbie)
 		raw_spin_lock(&native_tlbie_lock);
-	_tlbie_pid(0);
+	_tlbie_pid(0, RIC_FLUSH_ALL);
 	if (lock_tlbie)
 		raw_spin_unlock(&native_tlbie_lock);
 }
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index 6e287f1294fa..e3257f24a8a1 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -137,7 +137,7 @@ config STB03xxx
 config PPC4xx_GPIO
 	bool "PPC4xx GPIO support"
 	depends on 40x
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	help
 	  Enable gpiolib support for ppc40x based boards
 
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 5538e57c36c1..48fc18041ff6 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -273,7 +273,7 @@ config PPC44x_SIMPLE
 config PPC4xx_GPIO
 	bool "PPC4xx GPIO support"
 	depends on 44x
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	help
 	  Enable gpiolib support for ppc440 based boards
 
diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
index f09016f6b3a6..bf7ae5cbd07a 100644
--- a/arch/powerpc/platforms/512x/Kconfig
+++ b/arch/powerpc/platforms/512x/Kconfig
@@ -6,7 +6,6 @@ config PPC_MPC512x
 	select IPIC
 	select PPC_PCI_CHOICE
 	select FSL_PCI if PCI
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD
 	select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD
 
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 2bdc8c862c46..4ef7f1cd05b7 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -116,7 +116,6 @@ endif
 # used for usb & gpio
 config PPC_MPC831x
 	bool
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 
 # used for math-emu
 config PPC_MPC832x
@@ -125,9 +124,7 @@ config PPC_MPC832x
 # used for usb & gpio
 config PPC_MPC834x
 	bool
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 
 # used for usb & gpio
 config PPC_MPC837x
 	bool
-	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index e626461a63bd..df25a3ed489d 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -225,7 +225,7 @@ config GE_IMP3A
 	select DEFAULT_UIMAGE
 	select SWIOTLB
 	select MMIO_NVRAM
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select GE_FPGA
 	help
 	  This option enables support for the GE Intelligent Platforms IMP3A
@@ -272,7 +272,7 @@ config CORENET_GENERIC
 	select PPC_E500MC
 	select PHYS_64BIT
 	select SWIOTLB
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select GPIO_MPC8XXX
 	select HAS_RAPIDIO
 	select PPC_EPAPR_HV_PIC
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 1afd1e4a2dd2..3988f16e46c1 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -4,7 +4,6 @@ menuconfig PPC_86xx
 	depends on 6xx
 	select FSL_SOC
 	select ALTIVEC
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	help
 	  The Freescale E600 SoCs have 74xx cores.
 
@@ -37,7 +36,7 @@ config GEF_PPC9A
 	bool "GE PPC9A"
 	select DEFAULT_UIMAGE
 	select MMIO_NVRAM
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select GE_FPGA
 	help
 	  This option enables support for the GE PPC9A.
@@ -46,7 +45,7 @@ config GEF_SBC310
 	bool "GE SBC310"
 	select DEFAULT_UIMAGE
 	select MMIO_NVRAM
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select GE_FPGA
 	help
 	  This option enables support for the GE SBC310.
@@ -55,7 +54,7 @@ config GEF_SBC610
 	bool "GE SBC610"
 	select DEFAULT_UIMAGE
 	select MMIO_NVRAM
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select GE_FPGA
 	select HAS_RAPIDIO
 	help
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 157250426b56..564d99bb2a26 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -109,7 +109,7 @@ config 8xx_COPYBACK
 
 config 8xx_GPIO
 	bool "GPIO API Support"
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	help
 	  Saying Y here will cause the ports on an MPC8xx processor to be used
 	  with the GPIO API.  If you say N here, the kernel needs less memory.
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 46a3533d3acb..3663f71fd913 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -275,7 +275,7 @@ config TAU_AVERAGE
 config QE_GPIO
 	bool "QE GPIO support"
 	depends on QUICC_ENGINE
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	help
 	  Say Y here if you're going to use hardware that connects to the
 	  QE GPIOs.
@@ -285,7 +285,7 @@ config CPM2
 	depends on (FSL_SOC_BOOKE && PPC32) || 8260
 	select CPM
 	select PPC_PCI_CHOICE
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	help
 	  The CPM2 (Communications Processor Module) is a coprocessor on
 	  embedded CPUs made by Freescale.  Selecting this option means that
@@ -324,7 +324,7 @@ config OF_RTC
 config SIMPLE_GPIO
 	bool "Support for simple, memory-mapped GPIO controllers"
 	depends on PPC
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	help
 	  Say Y here to support simple, memory-mapped GPIO controllers.
 	  These are usually BCSRs used to control board's switches, LEDs,
@@ -334,7 +334,7 @@ config SIMPLE_GPIO
 config MCU_MPC8349EMITX
 	bool "MPC8349E-mITX MCU driver"
 	depends on I2C=y && PPC_83xx
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	help
 	  Say Y here to enable soft power-off functionality on the Freescale
 	  boards with the MPC8349E-mITX-compatible MCU chips. This driver will
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 82607d621aca..88301e53f085 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -85,61 +85,57 @@ static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
 	cancel_delayed_work_sync(&info->work);
 }
 
-static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
+static int spu_gov_start(struct cpufreq_policy *policy)
 {
 	unsigned int cpu = policy->cpu;
-	struct spu_gov_info_struct *info, *affected_info;
+	struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+	struct spu_gov_info_struct *affected_info;
 	int i;
-	int ret = 0;
 
-	info = &per_cpu(spu_gov_info, cpu);
-
-	switch (event) {
-	case CPUFREQ_GOV_START:
-		if (!cpu_online(cpu)) {
-			printk(KERN_ERR "cpu %d is not online\n", cpu);
-			ret = -EINVAL;
-			break;
-		}
+	if (!cpu_online(cpu)) {
+		printk(KERN_ERR "cpu %d is not online\n", cpu);
+		return -EINVAL;
+	}
 
-		if (!policy->cur) {
-			printk(KERN_ERR "no cpu specified in policy\n");
-			ret = -EINVAL;
-			break;
-		}
+	if (!policy->cur) {
+		printk(KERN_ERR "no cpu specified in policy\n");
+		return -EINVAL;
+	}
 
-		/* initialize spu_gov_info for all affected cpus */
-		for_each_cpu(i, policy->cpus) {
-			affected_info = &per_cpu(spu_gov_info, i);
-			affected_info->policy = policy;
-		}
+	/* initialize spu_gov_info for all affected cpus */
+	for_each_cpu(i, policy->cpus) {
+		affected_info = &per_cpu(spu_gov_info, i);
+		affected_info->policy = policy;
+	}
 
-		info->poll_int = POLL_TIME;
+	info->poll_int = POLL_TIME;
 
-		/* setup timer */
-		spu_gov_init_work(info);
+	/* setup timer */
+	spu_gov_init_work(info);
 
-		break;
+	return 0;
+}
 
-	case CPUFREQ_GOV_STOP:
-		/* cancel timer */
-		spu_gov_cancel_work(info);
+static void spu_gov_stop(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+	int i;
 
-		/* clean spu_gov_info for all affected cpus */
-		for_each_cpu (i, policy->cpus) {
-			info = &per_cpu(spu_gov_info, i);
-			info->policy = NULL;
-		}
+	/* cancel timer */
+	spu_gov_cancel_work(info);
 
-		break;
+	/* clean spu_gov_info for all affected cpus */
+	for_each_cpu (i, policy->cpus) {
+		info = &per_cpu(spu_gov_info, i);
+		info->policy = NULL;
 	}
-
-	return ret;
 }
 
 static struct cpufreq_governor spu_governor = {
 	.name = "spudemand",
-	.governor = spu_gov_govern,
+	.start = spu_gov_start,
+	.stop = spu_gov_stop,
 	.owner = THIS_MODULE,
 };
 
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ff75d70f7285..f9af6461521a 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -223,7 +223,6 @@ static int axon_ram_probe(struct platform_device *device)
 	bank->disk->first_minor = azfs_minor;
 	bank->disk->fops = &axon_ram_devops;
 	bank->disk->private_data = bank;
-	bank->disk->driverfs_dev = &device->dev;
 
 	sprintf(bank->disk->disk_name, "%s%d",
 			AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
@@ -238,7 +237,7 @@ static int axon_ram_probe(struct platform_device *device)
 	set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
 	blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
 	blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
-	add_disk(bank->disk);
+	device_add_disk(&device->dev, bank->disk);
 
 	bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
 	if (bank->irq_id == NO_IRQ) {
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a8c259059adf..9e607bf2d640 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -72,6 +72,7 @@ config S390
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_KCOV
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK
@@ -163,6 +164,7 @@ config S390
 	select NO_BOOTMEM
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
+	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select TTY
 	select VIRT_CPU_ACCOUNTING
@@ -477,6 +479,9 @@ config SCHED_MC
 config SCHED_BOOK
 	def_bool n
 
+config SCHED_DRAWER
+	def_bool n
+
 config SCHED_TOPOLOGY
 	def_bool y
 	prompt "Topology scheduler support"
@@ -484,6 +489,7 @@ config SCHED_TOPOLOGY
 	select SCHED_SMT
 	select SCHED_MC
 	select SCHED_BOOK
+	select SCHED_DRAWER
 	help
 	  Topology scheduler support improves the CPU scheduler's decision
 	  making when dealing with machines that have multi-threading,
@@ -605,16 +611,6 @@ config PCI_NR_FUNCTIONS
 	  This allows you to specify the maximum number of PCI functions which
 	  this kernel will support.
 
-config PCI_NR_MSI
-	int "Maximum number of MSI interrupts (64-32768)"
-	range 64 32768
-	default "256"
-	help
-	  This defines the number of virtual interrupts the kernel will
-	  provide for MSI interrupts. If you configure your system to have
-	  too few drivers will fail to allocate MSI interrupts for all
-	  PCI devices.
-
 source "drivers/pci/Kconfig"
 
 endif	# PCI
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 1dd210347e12..98ec652cc332 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -4,6 +4,8 @@
 # create a compressed vmlinux image from the original vmlinux
 #
 
+KCOV_INSTRUMENT := n
+
 targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
 targets += misc.o piggy.o sizes.h head.o
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index d5ec71b2ed02..889ea3450210 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -678,6 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index f46a35115d2d..1bcfd764910a 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -616,6 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index ba0f2a58b8cd..13ff090139c8 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -615,6 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 7f0b7cda6259..d1033de4c4ee 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -9,3 +9,6 @@ obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
+obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+
+crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 7554a8bb2adc..2ea18b050309 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -22,6 +22,7 @@
 
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
@@ -44,7 +45,7 @@ struct s390_aes_ctx {
 	long dec;
 	int key_len;
 	union {
-		struct crypto_blkcipher *blk;
+		struct crypto_skcipher *blk;
 		struct crypto_cipher *cip;
 	} fallback;
 };
@@ -63,7 +64,7 @@ struct s390_xts_ctx {
 	long enc;
 	long dec;
 	int key_len;
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 };
 
 /*
@@ -237,16 +238,16 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 	unsigned int ret;
 
-	sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
-	sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags &
-			CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
+						      CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len);
+
+	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) &
+			  CRYPTO_TFM_RES_MASK;
 
-	ret = crypto_blkcipher_setkey(sctx->fallback.blk, key, len);
-	if (ret) {
-		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags &
-				CRYPTO_TFM_RES_MASK);
-	}
 	return ret;
 }
 
@@ -255,15 +256,17 @@ static int fallback_blk_dec(struct blkcipher_desc *desc,
 		unsigned int nbytes)
 {
 	unsigned int ret;
-	struct crypto_blkcipher *tfm;
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
+	SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
 
-	tfm = desc->tfm;
-	desc->tfm = sctx->fallback.blk;
+	skcipher_request_set_tfm(req, sctx->fallback.blk);
+	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 
-	ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+	ret = crypto_skcipher_decrypt(req);
 
-	desc->tfm = tfm;
+	skcipher_request_zero(req);
 	return ret;
 }
 
@@ -272,15 +275,15 @@ static int fallback_blk_enc(struct blkcipher_desc *desc,
 		unsigned int nbytes)
 {
 	unsigned int ret;
-	struct crypto_blkcipher *tfm;
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
+	SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
 
-	tfm = desc->tfm;
-	desc->tfm = sctx->fallback.blk;
+	skcipher_request_set_tfm(req, sctx->fallback.blk);
+	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 
-	ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
-
-	desc->tfm = tfm;
+	ret = crypto_skcipher_encrypt(req);
 	return ret;
 }
 
@@ -370,8 +373,9 @@ static int fallback_init_blk(struct crypto_tfm *tfm)
 	const char *name = tfm->__crt_alg->cra_name;
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	sctx->fallback.blk = crypto_alloc_blkcipher(name, 0,
-			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	sctx->fallback.blk = crypto_alloc_skcipher(name, 0,
+						   CRYPTO_ALG_ASYNC |
+						   CRYPTO_ALG_NEED_FALLBACK);
 
 	if (IS_ERR(sctx->fallback.blk)) {
 		pr_err("Allocating AES fallback algorithm %s failed\n",
@@ -386,8 +390,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_blkcipher(sctx->fallback.blk);
-	sctx->fallback.blk = NULL;
+	crypto_free_skcipher(sctx->fallback.blk);
 }
 
 static struct crypto_alg ecb_aes_alg = {
@@ -536,16 +539,16 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
 	unsigned int ret;
 
-	xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
-	xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags &
-			CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
+						     CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+
+	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) &
+			  CRYPTO_TFM_RES_MASK;
 
-	ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len);
-	if (ret) {
-		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags &
-				CRYPTO_TFM_RES_MASK);
-	}
 	return ret;
 }
 
@@ -553,16 +556,18 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc,
 		struct scatterlist *dst, struct scatterlist *src,
 		unsigned int nbytes)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct crypto_blkcipher *tfm;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
+	SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
 	unsigned int ret;
 
-	tfm = desc->tfm;
-	desc->tfm = xts_ctx->fallback;
+	skcipher_request_set_tfm(req, xts_ctx->fallback);
+	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 
-	ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+	ret = crypto_skcipher_decrypt(req);
 
-	desc->tfm = tfm;
+	skcipher_request_zero(req);
 	return ret;
 }
 
@@ -570,16 +575,18 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc,
 		struct scatterlist *dst, struct scatterlist *src,
 		unsigned int nbytes)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct crypto_blkcipher *tfm;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
+	SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
 	unsigned int ret;
 
-	tfm = desc->tfm;
-	desc->tfm = xts_ctx->fallback;
+	skcipher_request_set_tfm(req, xts_ctx->fallback);
+	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 
-	ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+	ret = crypto_skcipher_encrypt(req);
 
-	desc->tfm = tfm;
+	skcipher_request_zero(req);
 	return ret;
 }
 
@@ -700,8 +707,9 @@ static int xts_fallback_init(struct crypto_tfm *tfm)
 	const char *name = tfm->__crt_alg->cra_name;
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
 
-	xts_ctx->fallback = crypto_alloc_blkcipher(name, 0,
-			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+						  CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_NEED_FALLBACK);
 
 	if (IS_ERR(xts_ctx->fallback)) {
 		pr_err("Allocating XTS fallback algorithm %s failed\n",
@@ -715,8 +723,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
 {
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_blkcipher(xts_ctx->fallback);
-	xts_ctx->fallback = NULL;
+	crypto_free_skcipher(xts_ctx->fallback);
 }
 
 static struct crypto_alg xts_aes_alg = {
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
new file mode 100644
index 000000000000..577ae1d4ae89
--- /dev/null
+++ b/arch/s390/crypto/crc32-vx.c
@@ -0,0 +1,310 @@
+/*
+ * Crypto-API module for CRC-32 algorithms implemented with the
+ * z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"crc32-vx"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <asm/fpu/api.h>
+
+
+#define CRC32_BLOCK_SIZE	1
+#define CRC32_DIGEST_SIZE	4
+
+#define VX_MIN_LEN		64
+#define VX_ALIGNMENT		16L
+#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
+
+struct crc_ctx {
+	u32 key;
+};
+
+struct crc_desc_ctx {
+	u32 crc;
+};
+
+/* Prototypes for functions in assembly files */
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used.   Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ *
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
+	static u32 __pure ___fname(u32 crc,				    \
+				unsigned char const *data, size_t datalen)  \
+	{								    \
+		struct kernel_fpu vxstate;				    \
+		unsigned long prealign, aligned, remaining;		    \
+									    \
+		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
+			prealign = VX_ALIGNMENT -			    \
+				  ((unsigned long)data & VX_ALIGN_MASK);    \
+			datalen -= prealign;				    \
+			crc = ___crc32_sw(crc, data, prealign);		    \
+			data = (void *)((unsigned long)data + prealign);    \
+		}							    \
+									    \
+		if (datalen < VX_MIN_LEN)				    \
+			return ___crc32_sw(crc, data, datalen);		    \
+									    \
+		aligned = datalen & ~VX_ALIGN_MASK;			    \
+		remaining = datalen & VX_ALIGN_MASK;			    \
+									    \
+		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
+		crc = ___crc32_vx(crc, data, aligned);			    \
+		kernel_fpu_end(&vxstate);				    \
+									    \
+		if (remaining)						    \
+			crc = ___crc32_sw(crc, data + aligned, remaining);  \
+									    \
+		return crc;						    \
+	}
+
+DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
+DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
+DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
+
+
+static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
+{
+	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = 0;
+	return 0;
+}
+
+static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
+{
+	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static int crc32_vx_init(struct shash_desc *desc)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+	return 0;
+}
+
+static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+			   unsigned int newkeylen)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (newkeylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = le32_to_cpu(*(__le32 *)newkey);
+	return 0;
+}
+
+static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+			     unsigned int newkeylen)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (newkeylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = be32_to_cpu(*(__be32 *)newkey);
+	return 0;
+}
+
+static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__le32 *)out = cpu_to_le32p(&ctx->crc);
+	return 0;
+}
+
+static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__be32 *)out = cpu_to_be32p(&ctx->crc);
+	return 0;
+}
+
+static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	/*
+	 * Perform a final XOR with 0xFFFFFFFF to be in sync
+	 * with the generic crc32c shash implementation.
+	 */
+	*(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
+	return 0;
+}
+
+static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	*(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
+	return 0;
+}
+
+static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	*(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
+	return 0;
+}
+
+static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			     u8 *out)
+{
+	/*
+	 * Perform a final XOR with 0xFFFFFFFF to be in sync
+	 * with the generic crc32c shash implementation.
+	 */
+	*(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
+	return 0;
+}
+
+
+#define CRC32_VX_FINUP(alg, func)					      \
+	static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data,  \
+				   unsigned int datalen, u8 *out)	      \
+	{								      \
+		return __ ## alg ## _vx_finup(shash_desc_ctx(desc),	      \
+					      data, datalen, out);	      \
+	}
+
+CRC32_VX_FINUP(crc32le, crc32_le_vx)
+CRC32_VX_FINUP(crc32be, crc32_be_vx)
+CRC32_VX_FINUP(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_DIGEST(alg, func)					      \
+	static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
+				     unsigned int len, u8 *out)		      \
+	{								      \
+		return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm),    \
+					      data, len, out);		      \
+	}
+
+CRC32_VX_DIGEST(crc32le, crc32_le_vx)
+CRC32_VX_DIGEST(crc32be, crc32_be_vx)
+CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_UPDATE(alg, func)					      \
+	static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
+				     unsigned int datalen)		      \
+	{								      \
+		struct crc_desc_ctx *ctx = shash_desc_ctx(desc);	      \
+		ctx->crc = func(ctx->crc, data, datalen);		      \
+		return 0;						      \
+	}
+
+CRC32_VX_UPDATE(crc32le, crc32_le_vx)
+CRC32_VX_UPDATE(crc32be, crc32_be_vx)
+CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
+
+
+static struct shash_alg crc32_vx_algs[] = {
+	/* CRC-32 LE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32_vx_setkey,
+		.update		=	crc32le_vx_update,
+		.final		=	crc32le_vx_final,
+		.finup		=	crc32le_vx_finup,
+		.digest		=	crc32le_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32",
+			.cra_driver_name = "crc32-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_zero,
+		},
+	},
+	/* CRC-32 BE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32be_vx_setkey,
+		.update		=	crc32be_vx_update,
+		.final		=	crc32be_vx_final,
+		.finup		=	crc32be_vx_finup,
+		.digest		=	crc32be_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32be",
+			.cra_driver_name = "crc32be-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_zero,
+		},
+	},
+	/* CRC-32C LE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32_vx_setkey,
+		.update		=	crc32c_vx_update,
+		.final		=	crc32c_vx_final,
+		.finup		=	crc32c_vx_finup,
+		.digest		=	crc32c_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32c",
+			.cra_driver_name = "crc32c-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_invert,
+		},
+	},
+};
+
+
+static int __init crc_vx_mod_init(void)
+{
+	return crypto_register_shashes(crc32_vx_algs,
+				       ARRAY_SIZE(crc32_vx_algs));
+}
+
+static void __exit crc_vx_mod_exit(void)
+{
+	crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
+}
+
+module_cpu_feature_match(VXRS, crc_vx_mod_init);
+module_exit(crc_vx_mod_exit);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_CRYPTO("crc32");
+MODULE_ALIAS_CRYPTO("crc32-vx");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-vx");
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
new file mode 100644
index 000000000000..8013989cd2e5
--- /dev/null
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -0,0 +1,207 @@
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of CRC-32 checksums.
+ *
+ * This CRC-32 implementation algorithm processes the most-significant
+ * bit first (BE).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_R1R2		%v9
+#define CONST_R3R4		%v10
+#define CONST_R5		%v11
+#define CONST_R6		%v12
+#define CONST_RU_POLY		%v13
+#define CONST_CRC_POLY		%v14
+
+.data
+.align 8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these defintions:
+ *
+ *	R1 = x4*128+64 mod P(x)
+ *	R2 = x4*128    mod P(x)
+ *	R3 = x128+64   mod P(x)
+ *	R4 = x128      mod P(x)
+ *	R5 = x96       mod P(x)
+ *	R6 = x64       mod P(x)
+ *
+ *	Barret reduction constant, u, is defined as floor(x**64 / P(x)).
+ *
+ *	where P(x) is the polynomial in the normal domain and the P'(x) is the
+ *	polynomial in the reversed (bitreflected) domain.
+ *
+ * Note that the constant definitions below are extended in order to compute
+ * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
+ * The righmost doubleword can be 0 to prevent contribution to the result or
+ * can be multiplied by 1 to perform an XOR without the need for a separate
+ * VECTOR EXCLUSIVE OR instruction.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ *	P(x)  = 0x04C11DB7
+ *	P'(x) = 0xEDB88320
+ */
+
+.Lconstants_CRC_32_BE:
+	.quad		0x08833794c, 0x0e6228b11	# R1, R2
+	.quad		0x0c5b9cd4c, 0x0e8a45605	# R3, R4
+	.quad		0x0f200aa66, 1 << 32		# R5, x32
+	.quad		0x0490d678d, 1			# R6, 1
+	.quad		0x104d101df, 0			# u
+	.quad		0x104C11DB7, 0			# P(x)
+
+.previous
+
+.text
+/*
+ * The CRC-32 function(s) use these calling conventions:
+ *
+ * Parameters:
+ *
+ *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
+ *	%r3:	Input buffer pointer, performance might be improved if the
+ *		buffer is on a doubleword boundary.
+ *	%r4:	Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ *	%r5:	CRC-32 constant pool base pointer.
+ *	V0:	Initial CRC value and intermediate constants and results.
+ *	V1..V4:	Data for CRC computation.
+ *	V5..V8:	Next data chunks that are fetched from the input buffer.
+ *
+ *	V9..V14: CRC-32 constants.
+ */
+ENTRY(crc32_be_vgfm_16)
+	/* Load CRC-32 constants */
+	larl	%r5,.Lconstants_CRC_32_BE
+	VLM	CONST_R1R2,CONST_CRC_POLY,0,%r5
+
+	/* Load the initial CRC value into the leftmost word of V0. */
+	VZERO	%v0
+	VLVGF	%v0,%r2,0
+
+	/* Load a 64-byte data chunk and XOR with CRC */
+	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
+	VX	%v1,%v0,%v1		/* V1 ^= CRC */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	/* Check remaining buffer size and jump to proper folding method */
+	cghi	%r4,64
+	jl	.Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+	/* Load the next 64-byte data chunk into V5 to V8 */
+	VLM	%v5,%v8,0,%r3
+
+	/*
+	 * Perform a GF(2) multiplication of the doublewords in V1 with
+	 * the reduction constants in V0.  The intermediate result is
+	 * then folded (accumulated) with the next data chunk in V5 and
+	 * stored in V1.  Repeat this step for the register contents
+	 * in V2, V3, and V4 respectively.
+	 */
+	VGFMAG	%v1,CONST_R1R2,%v1,%v5
+	VGFMAG	%v2,CONST_R1R2,%v2,%v6
+	VGFMAG	%v3,CONST_R1R2,%v3,%v7
+	VGFMAG	%v4,CONST_R1R2,%v4,%v8
+
+	/* Adjust buffer pointer and length for next loop */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jnl	.Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+	/* Fold V1 to V4 into a single 128-bit value in V1 */
+	VGFMAG	%v1,CONST_R3R4,%v1,%v2
+	VGFMAG	%v1,CONST_R3R4,%v1,%v3
+	VGFMAG	%v1,CONST_R3R4,%v1,%v4
+
+	/* Check whether to continue with 64-bit folding */
+	cghi	%r4,16
+	jl	.Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+	VL	%v2,0,,%r3		/* Load next data chunk */
+	VGFMAG	%v1,CONST_R3R4,%v1,%v2	/* Fold next data chunk */
+
+	/* Adjust buffer pointer and size for folding next data chunk */
+	aghi	%r3,16
+	aghi	%r4,-16
+
+	/* Process remaining data chunks */
+	cghi	%r4,16
+	jnl	.Lfold_16bytes_loop
+
+.Lfinal_fold:
+	/*
+	 * The R5 constant is used to fold a 128-bit value into an 96-bit value
+	 * that is XORed with the next 96-bit input data chunk.  To use a single
+	 * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
+	 * form an intermediate 96-bit value (with appended zeros) which is then
+	 * XORed with the intermediate reduction result.
+	 */
+	VGFMG	%v1,CONST_R5,%v1
+
+	/*
+	 * Further reduce the remaining 96-bit value to a 64-bit value using a
+	 * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
+	 * intermediate result is then XORed with the product of the leftmost
+	 * doubleword with R6.	The result is a 64-bit value and is subject to
+	 * the Barret reduction.
+	 */
+	VGFMG	%v1,CONST_R6,%v1
+
+	/*
+	 * The input values to the Barret reduction are the degree-63 polynomial
+	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
+	 * P(x).
+	 *
+	 * The Barret reduction algorithm is defined as:
+	 *
+	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
+	 *
+	 * Note: To compensate the division by x^32, use the vector unpack
+	 * instruction to move the leftmost word into the leftmost doubleword
+	 * of the vector register.  The rightmost doubleword is multiplied
+	 * with zero to not contribute to the intermedate results.
+	 */
+
+	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+	VUPLLF	%v2,%v1
+	VGFMG	%v2,CONST_RU_POLY,%v2
+
+	/*
+	 * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
+	 * V2 and XOR the intermediate result, T2(x),  with the value in V1.
+	 * The final result is in the rightmost word of V2.
+	 */
+	VUPLLF	%v2,%v2
+	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+	VLGVF	%r2,%v2,3
+	br	%r14
+
+.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
new file mode 100644
index 000000000000..17f2504c2633
--- /dev/null
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -0,0 +1,268 @@
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
+ * and Castagnoli.
+ *
+ * This CRC-32 implementation algorithm is bitreflected and processes
+ * the least-significant bit first (Little-Endian).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_PERM_LE2BE	%v9
+#define CONST_R2R1		%v10
+#define CONST_R4R3		%v11
+#define CONST_R5		%v12
+#define CONST_RU_POLY		%v13
+#define CONST_CRC_POLY		%v14
+
+.data
+.align 8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these definitions:
+ *
+ *	R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
+ *	R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
+ *	R3 = [(x128+32 mod P'(x) << 32)]'   << 1
+ *	R4 = [(x128-32 mod P'(x) << 32)]'   << 1
+ *	R5 = [(x64 mod P'(x) << 32)]'	    << 1
+ *	R6 = [(x32 mod P'(x) << 32)]'	    << 1
+ *
+ *	The bitreflected Barret reduction constant, u', is defined as
+ *	the bit reversal of floor(x**64 / P(x)).
+ *
+ *	where P(x) is the polynomial in the normal domain and the P'(x) is the
+ *	polynomial in the reversed (bitreflected) domain.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ *	P(x)  = 0x04C11DB7
+ *	P'(x) = 0xEDB88320
+ *
+ * CRC-32C (Castagnoli) polynomials:
+ *
+ *	P(x)  = 0x1EDC6F41
+ *	P'(x) = 0x82F63B78
+ */
+
+.Lconstants_CRC_32_LE:
+	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
+	.quad		0x1c6e41596, 0x154442bd4		# R2, R1
+	.quad		0x0ccaa009e, 0x1751997d0		# R4, R3
+	.octa		0x163cd6124				# R5
+	.octa		0x1F7011641				# u'
+	.octa		0x1DB710641				# P'(x) << 1
+
+.Lconstants_CRC_32C_LE:
+	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
+	.quad		0x09e4addf8, 0x740eef02			# R2, R1
+	.quad		0x14cd00bd6, 0xf20c0dfe			# R4, R3
+	.octa		0x0dd45aab8				# R5
+	.octa		0x0dea713f1				# u'
+	.octa		0x105ec76f0				# P'(x) << 1
+
+.previous
+
+
+.text
+
+/*
+ * The CRC-32 functions use these calling conventions:
+ *
+ * Parameters:
+ *
+ *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
+ *	%r3:	Input buffer pointer, performance might be improved if the
+ *		buffer is on a doubleword boundary.
+ *	%r4:	Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ *	%r5:	CRC-32 constant pool base pointer.
+ *	V0:	Initial CRC value and intermediate constants and results.
+ *	V1..V4:	Data for CRC computation.
+ *	V5..V8:	Next data chunks that are fetched from the input buffer.
+ *	V9:	Constant for BE->LE conversion and shift operations
+ *
+ *	V10..V14: CRC-32 constants.
+ */
+
+ENTRY(crc32_le_vgfm_16)
+	larl	%r5,.Lconstants_CRC_32_LE
+	j	crc32_le_vgfm_generic
+
+ENTRY(crc32c_le_vgfm_16)
+	larl	%r5,.Lconstants_CRC_32C_LE
+	j	crc32_le_vgfm_generic
+
+
+crc32_le_vgfm_generic:
+	/* Load CRC-32 constants */
+	VLM	CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
+
+	/*
+	 * Load the initial CRC value.
+	 *
+	 * The CRC value is loaded into the rightmost word of the
+	 * vector register and is later XORed with the LSB portion
+	 * of the loaded input data.
+	 */
+	VZERO	%v0			/* Clear V0 */
+	VLVGF	%v0,%r2,3		/* Load CRC into rightmost word */
+
+	/* Load a 64-byte data chunk and XOR with CRC */
+	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
+	VPERM	%v1,%v1,%v1,CONST_PERM_LE2BE
+	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
+	VPERM	%v3,%v3,%v3,CONST_PERM_LE2BE
+	VPERM	%v4,%v4,%v4,CONST_PERM_LE2BE
+
+	VX	%v1,%v0,%v1		/* V1 ^= CRC */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jl	.Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+	/* Load the next 64-byte data chunk into V5 to V8 */
+	VLM	%v5,%v8,0,%r3
+	VPERM	%v5,%v5,%v5,CONST_PERM_LE2BE
+	VPERM	%v6,%v6,%v6,CONST_PERM_LE2BE
+	VPERM	%v7,%v7,%v7,CONST_PERM_LE2BE
+	VPERM	%v8,%v8,%v8,CONST_PERM_LE2BE
+
+	/*
+	 * Perform a GF(2) multiplication of the doublewords in V1 with
+	 * the R1 and R2 reduction constants in V0.  The intermediate result
+	 * is then folded (accumulated) with the next data chunk in V5 and
+	 * stored in V1. Repeat this step for the register contents
+	 * in V2, V3, and V4 respectively.
+	 */
+	VGFMAG	%v1,CONST_R2R1,%v1,%v5
+	VGFMAG	%v2,CONST_R2R1,%v2,%v6
+	VGFMAG	%v3,CONST_R2R1,%v3,%v7
+	VGFMAG	%v4,CONST_R2R1,%v4,%v8
+
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jnl	.Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+	/*
+	 * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
+	 * and R4 and accumulating the next 128-bit chunk until a single 128-bit
+	 * value remains.
+	 */
+	VGFMAG	%v1,CONST_R4R3,%v1,%v2
+	VGFMAG	%v1,CONST_R4R3,%v1,%v3
+	VGFMAG	%v1,CONST_R4R3,%v1,%v4
+
+	cghi	%r4,16
+	jl	.Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+	VL	%v2,0,,%r3		/* Load next data chunk */
+	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
+	VGFMAG	%v1,CONST_R4R3,%v1,%v2	/* Fold next data chunk */
+
+	aghi	%r3,16
+	aghi	%r4,-16
+
+	cghi	%r4,16
+	jnl	.Lfold_16bytes_loop
+
+.Lfinal_fold:
+	/*
+	 * Set up a vector register for byte shifts.  The shift value must
+	 * be loaded in bits 1-4 in byte element 7 of a vector register.
+	 * Shift by 8 bytes: 0x40
+	 * Shift by 4 bytes: 0x20
+	 */
+	VLEIB	%v9,0x40,7
+
+	/*
+	 * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
+	 * to move R4 into the rightmost doubleword and set the leftmost
+	 * doubleword to 0x1.
+	 */
+	VSRLB	%v0,CONST_R4R3,%v9
+	VLEIG	%v0,1,0
+
+	/*
+	 * Compute GF(2) product of V1 and V0.	The rightmost doubleword
+	 * of V1 is multiplied with R4.  The leftmost doubleword of V1 is
+	 * multiplied by 0x1 and is then XORed with rightmost product.
+	 * Implicitly, the intermediate leftmost product becomes padded
+	 */
+	VGFMG	%v1,%v0,%v1
+
+	/*
+	 * Now do the final 32-bit fold by multiplying the rightmost word
+	 * in V1 with R5 and XOR the result with the remaining bits in V1.
+	 *
+	 * To achieve this by a single VGFMAG, right shift V1 by a word
+	 * and store the result in V2 which is then accumulated.  Use the
+	 * vector unpack instruction to load the rightmost half of the
+	 * doubleword into the rightmost doubleword element of V1; the other
+	 * half is loaded in the leftmost doubleword.
+	 * The vector register with CONST_R5 contains the R5 constant in the
+	 * rightmost doubleword and the leftmost doubleword is zero to ignore
+	 * the leftmost product of V1.
+	 */
+	VLEIB	%v9,0x20,7		  /* Shift by words */
+	VSRLB	%v2,%v1,%v9		  /* Store remaining bits in V2 */
+	VUPLLF	%v1,%v1			  /* Split rightmost doubleword */
+	VGFMAG	%v1,CONST_R5,%v1,%v2	  /* V1 = (V1 * R5) XOR V2 */
+
+	/*
+	 * Apply a Barret reduction to compute the final 32-bit CRC value.
+	 *
+	 * The input values to the Barret reduction are the degree-63 polynomial
+	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
+	 * P(x).
+	 *
+	 * The Barret reduction algorithm is defined as:
+	 *
+	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
+	 *
+	 *  Note: The leftmost doubleword of vector register containing
+	 *  CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
+	 *  is zero and does not contribute to the final result.
+	 */
+
+	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+	VUPLLF	%v2,%v1
+	VGFMG	%v2,CONST_RU_POLY,%v2
+
+	/*
+	 * Compute the GF(2) product of the CRC polynomial with T1(x) in
+	 * V2 and XOR the intermediate result, T2(x), with the value in V1.
+	 * The final result is stored in word element 2 of V2.
+	 */
+	VUPLLF	%v2,%v2
+	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+	VLGVF	%r2,%v2,2
+	br	%r14
+
+.previous
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 3f571ea89509..ccccebeeaaf6 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -225,12 +225,16 @@ CONFIG_CRYPTO_DEFLATE=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
 CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_CRC7=m
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_POWERPC is not set
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 045035796ca7..67d43a0eabb4 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -337,25 +337,27 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
 
 /* Diagnose 204 functions */
 
-static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
 {
-	register unsigned long _subcode asm("0") = subcode;
+	register unsigned long _subcode asm("0") = *subcode;
 	register unsigned long _size asm("1") = size;
 
 	asm volatile(
 		"	diag	%2,%0,0x204\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
-	if (_subcode)
-		return -1;
+	*subcode = _subcode;
 	return _size;
 }
 
 static int diag204(unsigned long subcode, unsigned long size, void *addr)
 {
 	diag_stat_inc(DIAG_STAT_X204);
-	return __diag204(subcode, size, addr);
+	size = __diag204(&subcode, size, addr);
+	if (subcode)
+		return -1;
+	return size;
 }
 
 /*
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 44feac38ccfc..012919d9833b 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -70,7 +70,7 @@ static int diag2fc(int size, char* query, void *addr)
 	diag_stat_inc(DIAG_STAT_X2FC);
 	asm volatile(
 		"	diag    %0,%1,0x2fc\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory");
 
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 911064aa59b2..d28cc2f5b7b2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
 }
 
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER);
+}
+
 static inline void atomic_add(int i, atomic_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -114,22 +119,27 @@ static inline void atomic_add(int i, atomic_t *v)
 #define atomic_inc_and_test(_v)		(atomic_add_return(1, _v) == 0)
 #define atomic_sub(_i, _v)		atomic_add(-(int)(_i), _v)
 #define atomic_sub_return(_i, _v)	atomic_add_return(-(int)(_i), _v)
+#define atomic_fetch_sub(_i, _v)	atomic_fetch_add(-(int)(_i), _v)
 #define atomic_sub_and_test(_i, _v)	(atomic_sub_return(_i, _v) == 0)
 #define atomic_dec(_v)			atomic_sub(1, _v)
 #define atomic_dec_return(_v)		atomic_sub_return(1, _v)
 #define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
-#define ATOMIC_OP(op, OP)						\
+#define ATOMIC_OPS(op, OP)						\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
 	__ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER);	\
+}									\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER);	\
 }
 
-ATOMIC_OP(and, AND)
-ATOMIC_OP(or, OR)
-ATOMIC_OP(xor, XOR)
+ATOMIC_OPS(and, AND)
+ATOMIC_OPS(or, OR)
+ATOMIC_OPS(xor, XOR)
 
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
@@ -236,6 +246,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
 	return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
 }
 
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+	return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER);
+}
+
 static inline void atomic64_add(long long i, atomic64_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -264,17 +279,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
 	return old;
 }
 
-#define ATOMIC64_OP(op, OP)						\
+#define ATOMIC64_OPS(op, OP)						\
 static inline void atomic64_##op(long i, atomic64_t *v)			\
 {									\
 	__ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER);	\
+}									\
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
+{									\
+	return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \
 }
 
-ATOMIC64_OP(and, AND)
-ATOMIC64_OP(or, OR)
-ATOMIC64_OP(xor, XOR)
+ATOMIC64_OPS(and, AND)
+ATOMIC64_OPS(or, OR)
+ATOMIC64_OPS(xor, XOR)
 
-#undef ATOMIC64_OP
+#undef ATOMIC64_OPS
 #undef __ATOMIC64_LOOP
 
 static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
@@ -315,6 +334,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
 #define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
 #define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long long)(_i), _v)
+#define atomic64_fetch_sub(_i, _v)	atomic64_fetch_add(-(long long)(_i), _v)
 #define atomic64_sub(_i, _v)		atomic64_add(-(long long)(_i), _v)
 #define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
 #define atomic64_dec(_v)		atomic64_sub(1, _v)
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index 22da3b34c655..05219a5e0b2f 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h
@@ -13,9 +13,6 @@
 #define L1_CACHE_SHIFT     8
 #define NET_SKB_PAD	   32
 
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
-
-/* Read-only memory is marked before mark_rodata_ro() is called. */
-#define __ro_after_init __read_mostly
+#define __read_mostly __section(.data..read_mostly)
 
 #endif
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index d1e7b0a0feeb..f7ed88cc066e 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -320,7 +320,7 @@ struct cio_iplinfo {
 extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
 
 /* Function from drivers/s390/cio/chsc.c */
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl);
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
 int chsc_sstpi(void *page, void *result, size_t size);
 
 #endif
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 9dd04b9e9782..03516476127b 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -169,16 +169,27 @@ static inline int lcctl(u64 ctl)
 }
 
 /* Extract CPU counter */
-static inline int ecctr(u64 ctr, u64 *val)
+static inline int __ecctr(u64 ctr, u64 *content)
 {
-	register u64 content asm("4") = 0;
+	register u64 _content asm("4") = 0;
 	int cc;
 
 	asm volatile (
 		"	.insn	rre,0xb2e40000,%0,%2\n"
 		"	ipm	%1\n"
 		"	srl	%1,28\n"
-		: "=d" (content), "=d" (cc) : "d" (ctr) : "cc");
+		: "=d" (_content), "=d" (cc) : "d" (ctr) : "cc");
+	*content = _content;
+	return cc;
+}
+
+/* Extract CPU counter */
+static inline int ecctr(u64 ctr, u64 *val)
+{
+	u64 content;
+	int cc;
+
+	cc = __ecctr(ctr, &content);
 	if (!cc)
 		*val = content;
 	return cc;
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 5fac921c1c42..86cae09e076a 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -49,7 +49,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
 	diag_stat_inc(DIAG_STAT_X010);
 	asm volatile(
 		"0:	diag	%0,%1,0x10\n"
-		"1:\n"
+		"1:	nopr	%%r7\n"
 		EX_TABLE(0b, 1b)
 		EX_TABLE(1b, 1b)
 		: : "a" (start_addr), "a" (end_addr));
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
deleted file mode 100644
index 105f90e63a0e..000000000000
--- a/arch/s390/include/asm/etr.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- *  Copyright IBM Corp. 2006
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#ifndef __S390_ETR_H
-#define __S390_ETR_H
-
-/* ETR attachment control register */
-struct etr_eacr {
-	unsigned int e0		: 1;	/* port 0 stepping control */
-	unsigned int e1		: 1;	/* port 1 stepping control */
-	unsigned int _pad0	: 5;	/* must be 00100 */
-	unsigned int dp		: 1;	/* data port control */
-	unsigned int p0		: 1;	/* port 0 change recognition control */
-	unsigned int p1		: 1;	/* port 1 change recognition control */
-	unsigned int _pad1	: 3;	/* must be 000 */
-	unsigned int ea		: 1;	/* ETR alert control */
-	unsigned int es		: 1;	/* ETR sync check control */
-	unsigned int sl		: 1;	/* switch to local control */
-} __attribute__ ((packed));
-
-/* Port state returned by steai */
-enum etr_psc {
-	etr_psc_operational = 0,
-	etr_psc_semi_operational = 1,
-	etr_psc_protocol_error =  4,
-	etr_psc_no_symbols = 8,
-	etr_psc_no_signal = 12,
-	etr_psc_pps_mode = 13
-};
-
-/* Logical port state returned by stetr */
-enum etr_lpsc {
-	etr_lpsc_operational_step = 0,
-	etr_lpsc_operational_alt = 1,
-	etr_lpsc_semi_operational = 2,
-	etr_lpsc_protocol_error =  4,
-	etr_lpsc_no_symbol_sync = 8,
-	etr_lpsc_no_signal = 12,
-	etr_lpsc_pps_mode = 13
-};
-
-/* ETR status words */
-struct etr_esw {
-	struct etr_eacr eacr;		/* attachment control register */
-	unsigned int y		: 1;	/* stepping mode */
-	unsigned int _pad0	: 5;	/* must be 00000 */
-	unsigned int p		: 1;	/* stepping port number */
-	unsigned int q		: 1;	/* data port number */
-	unsigned int psc0	: 4;	/* port 0 state code */
-	unsigned int psc1	: 4;	/* port 1 state code */
-} __attribute__ ((packed));
-
-/* Second level data register status word */
-struct etr_slsw {
-	unsigned int vv1	: 1;	/* copy of validity bit data frame 1 */
-	unsigned int vv2	: 1;	/* copy of validity bit data frame 2 */
-	unsigned int vv3	: 1;	/* copy of validity bit data frame 3 */
-	unsigned int vv4	: 1;	/* copy of validity bit data frame 4 */
-	unsigned int _pad0	: 19;	/* must by all zeroes */
-	unsigned int n		: 1;	/* EAF port number */
-	unsigned int v1		: 1;	/* validity bit ETR data frame 1 */
-	unsigned int v2		: 1;	/* validity bit ETR data frame 2 */
-	unsigned int v3		: 1;	/* validity bit ETR data frame 3 */
-	unsigned int v4		: 1;	/* validity bit ETR data frame 4 */
-	unsigned int _pad1	: 4;	/* must be 0000 */
-} __attribute__ ((packed));
-
-/* ETR data frames */
-struct etr_edf1 {
-	unsigned int u		: 1;	/* untuned bit */
-	unsigned int _pad0	: 1;	/* must be 0 */
-	unsigned int r		: 1;	/* service request bit */
-	unsigned int _pad1	: 4;	/* must be 0000 */
-	unsigned int a		: 1;	/* time adjustment bit */
-	unsigned int net_id	: 8;	/* ETR network id */
-	unsigned int etr_id	: 8;	/* id of ETR which sends data frames */
-	unsigned int etr_pn	: 8;	/* port number of ETR output port */
-} __attribute__ ((packed));
-
-struct etr_edf2 {
-	unsigned int etv	: 32;	/* Upper 32 bits of TOD. */
-} __attribute__ ((packed));
-
-struct etr_edf3 {
-	unsigned int rc		: 8;	/* failure reason code */
-	unsigned int _pad0	: 3;	/* must be 000 */
-	unsigned int c		: 1;	/* ETR coupled bit */
-	unsigned int tc		: 4;	/* ETR type code */
-	unsigned int blto	: 8;	/* biased local time offset */
-					/* (blto - 128) * 15 = minutes */
-	unsigned int buo	: 8;	/* biased utc offset */
-					/* (buo - 128) = leap seconds */
-} __attribute__ ((packed));
-
-struct etr_edf4 {
-	unsigned int ed		: 8;	/* ETS device dependent data */
-	unsigned int _pad0	: 1;	/* must be 0 */
-	unsigned int buc	: 5;	/* biased ut1 correction */
-					/* (buc - 16) * 0.1 seconds */
-	unsigned int em		: 6;	/* ETS error magnitude */
-	unsigned int dc		: 6;	/* ETS drift code */
-	unsigned int sc		: 6;	/* ETS steering code */
-} __attribute__ ((packed));
-
-/*
- * ETR attachment information block, two formats
- * format 1 has 4 reserved words with a size of 64 bytes
- * format 2 has 16 reserved words with a size of 96 bytes
- */
-struct etr_aib {
-	struct etr_esw esw;
-	struct etr_slsw slsw;
-	unsigned long long tsp;
-	struct etr_edf1 edf1;
-	struct etr_edf2 edf2;
-	struct etr_edf3 edf3;
-	struct etr_edf4 edf4;
-	unsigned int reserved[16];
-} __attribute__ ((packed,aligned(8)));
-
-/* ETR interruption parameter */
-struct etr_irq_parm {
-	unsigned int _pad0	: 8;
-	unsigned int pc0	: 1;	/* port 0 state change */
-	unsigned int pc1	: 1;	/* port 1 state change */
-	unsigned int _pad1	: 3;
-	unsigned int eai	: 1;	/* ETR alert indication */
-	unsigned int _pad2	: 18;
-} __attribute__ ((packed));
-
-/* Query TOD offset result */
-struct etr_ptff_qto {
-	unsigned long long physical_clock;
-	unsigned long long tod_offset;
-	unsigned long long logical_tod_offset;
-	unsigned long long tod_epoch_difference;
-} __attribute__ ((packed));
-
-/* Inline assembly helper functions */
-static inline int etr_setr(struct etr_eacr *ctrl)
-{
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2160000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*ctrl));
-	return rc;
-}
-
-/* Stores a format 1 aib with 64 bytes */
-static inline int etr_stetr(struct etr_aib *aib)
-{
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2170000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*aib));
-	return rc;
-}
-
-/* Stores a format 2 aib with 96 bytes for specified port */
-static inline int etr_steai(struct etr_aib *aib, unsigned int func)
-{
-	register unsigned int reg0 asm("0") = func;
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2b30000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*aib), "d" (reg0));
-	return rc;
-}
-
-/* Function codes for the steai instruction. */
-#define ETR_STEAI_STEPPING_PORT		0x10
-#define ETR_STEAI_ALTERNATE_PORT	0x11
-#define ETR_STEAI_PORT_0		0x12
-#define ETR_STEAI_PORT_1		0x13
-
-static inline int etr_ptff(void *ptff_block, unsigned int func)
-{
-	register unsigned int reg0 asm("0") = func;
-	register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.word	0x0104\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (rc), "=m" (ptff_block)
-		: "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc");
-	return rc;
-}
-
-/* Function codes for the ptff instruction. */
-#define ETR_PTFF_QAF	0x00	/* query available functions */
-#define ETR_PTFF_QTO	0x01	/* query tod offset */
-#define ETR_PTFF_QSI	0x02	/* query steering information */
-#define ETR_PTFF_ATO	0x40	/* adjust tod offset */
-#define ETR_PTFF_STO	0x41	/* set tod offset */
-#define ETR_PTFF_SFS	0x42	/* set fine steering rate */
-#define ETR_PTFF_SGS	0x43	/* set gross steering rate */
-
-/* Functions needed by the machine check handler */
-int etr_switch_to_local(void);
-int etr_sync_check(void);
-void etr_queue_work(void);
-
-/* notifier for syncs */
-extern struct atomic_notifier_head s390_epoch_delta_notifier;
-
-/* STP interruption parameter */
-struct stp_irq_parm {
-	unsigned int _pad0	: 14;
-	unsigned int tsc	: 1;	/* Timing status change */
-	unsigned int lac	: 1;	/* Link availability change */
-	unsigned int tcpc	: 1;	/* Time control parameter change */
-	unsigned int _pad2	: 15;
-} __attribute__ ((packed));
-
-#define STP_OP_SYNC	1
-#define STP_OP_CTRL	3
-
-struct stp_sstpi {
-	unsigned int rsvd0;
-	unsigned int rsvd1 : 8;
-	unsigned int stratum : 8;
-	unsigned int vbits : 16;
-	unsigned int leaps : 16;
-	unsigned int tmd : 4;
-	unsigned int ctn : 4;
-	unsigned int rsvd2 : 3;
-	unsigned int c : 1;
-	unsigned int tst : 4;
-	unsigned int tzo : 16;
-	unsigned int dsto : 16;
-	unsigned int ctrl : 16;
-	unsigned int rsvd3 : 16;
-	unsigned int tto;
-	unsigned int rsvd4;
-	unsigned int ctnid[3];
-	unsigned int rsvd5;
-	unsigned int todoff[4];
-	unsigned int rsvd6[48];
-} __attribute__ ((packed));
-
-/* Functions needed by the machine check handler */
-int stp_sync_check(void);
-int stp_island_check(void);
-void stp_queue_work(void);
-
-#endif /* __S390_ETR_H */
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
index 7ecb92b469b6..04cb4b4bcc5f 100644
--- a/arch/s390/include/asm/fcx.h
+++ b/arch/s390/include/asm/fcx.h
@@ -6,7 +6,7 @@
  */
 
 #ifndef _ASM_S390_FCX_H
-#define _ASM_S390_FCX_H _ASM_S390_FCX_H
+#define _ASM_S390_FCX_H
 
 #include <linux/types.h>
 
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 5e04f3cbd320..6aba6fc406ad 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -1,6 +1,41 @@
 /*
  * In-kernel FPU support functions
  *
+ *
+ * Consider these guidelines before using in-kernel FPU functions:
+ *
+ *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
+ *     use of floating-point or vector registers and instructions.
+ *
+ *  2. For kernel_fpu_begin(), specify the vector register range you want to
+ *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
+ *
+ *     a) If your function typically runs in process-context, use the lower
+ *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
+ *     b) If your function typically runs in soft-irq or hard-irq context,
+ *	  prefer using the upper half of the vector registers, for example,
+ *	  specify KERNEL_VXR_HIGH.
+ *
+ *     If you adhere to these guidelines, an interrupted process context
+ *     does not require to save and restore vector registers because of
+ *     disjoint register ranges.
+ *
+ *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
+ *     includes logic to save and restore up to 16 vector registers at once.
+ *
+ *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
+ *     struct kernel_fpu states.  Vector registers that are in use by outer
+ *     levels are saved and restored.  You can minimize the save and restore
+ *     effort by choosing disjoint vector register ranges.
+ *
+ *  5. To use vector floating-point instructions, specify the KERNEL_FPC
+ *     flag to save and restore floating-point controls in addition to any
+ *     vector register range.
+ *
+ *  6. To use floating-point registers and instructions only, specify the
+ *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
+ *     registers V0 to V15 and floating-point controls.
+ *
  * Copyright IBM Corp. 2015
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
@@ -8,6 +43,8 @@
 #ifndef _ASM_S390_FPU_API_H
 #define _ASM_S390_FPU_API_H
 
+#include <linux/preempt.h>
+
 void save_fpu_regs(void);
 
 static inline int test_fp_ctl(u32 fpc)
@@ -22,9 +59,47 @@ static inline int test_fp_ctl(u32 fpc)
 		"	la	%0,0\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
+		: "=d" (rc), "=&d" (orig_fpc)
 		: "d" (fpc), "0" (-EINVAL));
 	return rc;
 }
 
+#define KERNEL_VXR_V0V7		1
+#define KERNEL_VXR_V8V15	2
+#define KERNEL_VXR_V16V23	4
+#define KERNEL_VXR_V24V31	8
+#define KERNEL_FPR		16
+#define KERNEL_FPC		256
+
+#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
+#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
+#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
+
+#define KERNEL_FPU_MASK		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR)
+
+struct kernel_fpu;
+
+/*
+ * Note the functions below must be called with preemption disabled.
+ * Do not enable preemption before calling __kernel_fpu_end() to prevent
+ * an corruption of an existing kernel FPU state.
+ *
+ * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
+ */
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
+void __kernel_fpu_end(struct kernel_fpu *state);
+
+
+static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	preempt_disable();
+	__kernel_fpu_begin(state, flags);
+}
+
+static inline void kernel_fpu_end(struct kernel_fpu *state)
+{
+	__kernel_fpu_end(state);
+	preempt_enable();
+}
+
 #endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
index fe937c9b6471..bce255ead72b 100644
--- a/arch/s390/include/asm/fpu/types.h
+++ b/arch/s390/include/asm/fpu/types.h
@@ -24,4 +24,14 @@ struct fpu {
 /* VX array structure for address operand constraints in inline assemblies */
 struct vx_array { __vector128 _[__NUM_VXRS]; };
 
+/* In-kernel FPU state structure */
+struct kernel_fpu {
+	u32	    mask;
+	u32	    fpc;
+	union {
+		freg_t fprs[__NUM_FPRS];
+		__vector128 vxrs[__NUM_VXRS];
+	};
+};
+
 #endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index d9be7c0c1291..4c7fac75090e 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -41,7 +41,10 @@ static inline int prepare_hugepage_range(struct file *file,
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep)
 {
-	pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pte_val(*ptep) = _REGION3_ENTRY_EMPTY;
+	else
+		pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
 }
 
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 6fc44dca193e..4da22b2f0521 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -141,11 +141,11 @@ extern void setup_ipl(void);
  * DIAG 308 support
  */
 enum diag308_subcode  {
-	DIAG308_REL_HSA	= 2,
-	DIAG308_IPL	= 3,
-	DIAG308_DUMP	= 4,
-	DIAG308_SET	= 5,
-	DIAG308_STORE	= 6,
+	DIAG308_REL_HSA = 2,
+	DIAG308_LOAD_CLEAR = 3,
+	DIAG308_LOAD_NORMAL_DUMP = 4,
+	DIAG308_SET = 5,
+	DIAG308_STORE = 6,
 };
 
 enum diag308_ipl_type {
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index f97b055de76a..70c9bce766f5 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -7,11 +7,8 @@
 
 #define NR_IRQS_BASE	3
 
-#ifdef CONFIG_PCI_NR_MSI
-# define NR_IRQS	(NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
-#else
-# define NR_IRQS	NR_IRQS_BASE
-#endif
+#define NR_IRQS	NR_IRQS_BASE
+#define NR_IRQS_LEGACY NR_IRQS_BASE
 
 /* External interruption codes */
 #define EXT_IRQ_INTERRUPT_KEY	0x0040
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 7f9fd5e3f1bf..9be198f5ee79 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -4,6 +4,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/stringify.h>
 
 #define JUMP_LABEL_NOP_SIZE 6
 #define JUMP_LABEL_NOP_OFFSET 2
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index b47ad3b642cc..591e5a5279b0 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -43,9 +43,9 @@ typedef u16 kprobe_opcode_t;
 #define MAX_INSN_SIZE		0x0003
 #define MAX_STACK_SIZE		64
 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
-	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	(((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) \
 	? (MAX_STACK_SIZE) \
-	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+	: (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR)))
 
 #define kretprobe_blacklist_size 0
 
diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h
deleted file mode 100644
index 614dfaf47f71..000000000000
--- a/arch/s390/include/asm/mathemu.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *    IEEE floating point emulation.
- *
- *  S390 version
- *    Copyright IBM Corp. 1999
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-
-#ifndef __MATHEMU__
-#define __MATHEMU__
-
-extern int math_emu_b3(__u8 *, struct pt_regs *);
-extern int math_emu_ed(__u8 *, struct pt_regs *);
-extern int math_emu_ldr(__u8 *);
-extern int math_emu_ler(__u8 *);
-extern int math_emu_std(__u8 *, struct pt_regs *);
-extern int math_emu_ld(__u8 *, struct pt_regs *);
-extern int math_emu_ste(__u8 *, struct pt_regs *);
-extern int math_emu_le(__u8 *, struct pt_regs *);
-extern int math_emu_lfpc(__u8 *, struct pt_regs *);
-extern int math_emu_stfpc(__u8 *, struct pt_regs *);
-extern int math_emu_srnm(__u8 *, struct pt_regs *);
-
-#endif                                 /* __MATHEMU__                      */
-
-
-
-
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 081b2ad99d73..18226437a832 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -6,7 +6,7 @@
 
 typedef struct {
 	cpumask_t cpu_attach_mask;
-	atomic_t attach_count;
+	atomic_t flush_count;
 	unsigned int flush_mm;
 	spinlock_t list_lock;
 	struct list_head pgtable_list;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c837b79b455d..f77c638bf397 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -19,7 +19,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
-	atomic_set(&mm->context.attach_count, 0);
+	atomic_set(&mm->context.flush_count, 0);
 	mm->context.flush_mm = 0;
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste;
@@ -90,15 +90,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	S390_lowcore.user_asce = next->context.asce;
 	if (prev == next)
 		return;
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, mm_cpumask(next));
 	/* Clear old ASCE by loading the kernel ASCE. */
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
-	atomic_inc(&next->context.attach_count);
-	atomic_dec(&prev->context.attach_count);
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+	cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }
 
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
@@ -110,10 +107,9 @@ static inline void finish_arch_post_lock_switch(void)
 	load_kernel_asce();
 	if (mm) {
 		preempt_disable();
-		while (atomic_read(&mm->context.attach_count) >> 16)
+		while (atomic_read(&mm->context.flush_count))
 			cpu_relax();
 
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
 		if (mm->context.flush_mm)
 			__tlb_flush_mm(mm);
 		preempt_enable();
@@ -128,7 +124,6 @@ static inline void activate_mm(struct mm_struct *prev,
                                struct mm_struct *next)
 {
 	switch_mm(prev, next, current);
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
 	set_user_asce(next);
 }
 
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 53eacbd4f09b..b2146c4119b2 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -21,6 +21,7 @@
 #define HPAGE_SIZE	(1UL << HPAGE_SHIFT)
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#define HUGE_MAX_HSTATE		2
 
 #define ARCH_HAS_SETCLEAR_HUGE_PTE
 #define ARCH_HAS_HUGE_PTE_TYPE
@@ -30,11 +31,12 @@
 #include <asm/setup.h>
 #ifndef __ASSEMBLY__
 
+void __storage_key_init_range(unsigned long start, unsigned long end);
+
 static inline void storage_key_init_range(unsigned long start, unsigned long end)
 {
-#if PAGE_DEFAULT_KEY
-	__storage_key_init_range(start, end);
-#endif
+	if (PAGE_DEFAULT_KEY)
+		__storage_key_init_range(start, end);
 }
 
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 1f7ff85c5e4c..c64c0befd3f3 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -86,16 +86,4 @@ struct sf_raw_sample {
 	u8		    padding[];	  /* Padding to next multiple of 8 */
 } __packed;
 
-/* Perf hardware reserve and release functions */
-#ifdef CONFIG_PERF_EVENTS
-int perf_reserve_sampling(void);
-void perf_release_sampling(void);
-#else /* CONFIG_PERF_EVENTS */
-static inline int perf_reserve_sampling(void)
-{
-	return 0;
-}
-static inline void perf_release_sampling(void) {}
-#endif /* CONFIG_PERF_EVENTS */
-
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 18d2beb89340..ea1533e07271 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -28,12 +28,33 @@
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
 #include <linux/radix-tree.h>
+#include <linux/atomic.h>
 #include <asm/bug.h>
 #include <asm/page.h>
 
-extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
+extern pgd_t swapper_pg_dir[];
 extern void paging_init(void);
 extern void vmem_map_init(void);
+pmd_t *vmem_pmd_alloc(void);
+pte_t *vmem_pte_alloc(void);
+
+enum {
+	PG_DIRECT_MAP_4K = 0,
+	PG_DIRECT_MAP_1M,
+	PG_DIRECT_MAP_2G,
+	PG_DIRECT_MAP_MAX
+};
+
+extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+
+static inline void update_page_count(int level, long count)
+{
+	if (IS_ENABLED(CONFIG_PROC_FS))
+		atomic_long_add(count, &direct_pages_count[level]);
+}
+
+struct seq_file;
+void arch_report_meminfo(struct seq_file *m);
 
 /*
  * The S390 doesn't have any external MMU info: the kernel page
@@ -270,8 +291,23 @@ static inline int is_module_addr(void *addr)
 #define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
 #define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
 
-#define _REGION3_ENTRY_LARGE	0x400	/* RTTE-format control, large page  */
-#define _REGION3_ENTRY_RO	0x200	/* page protection bit		    */
+#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address	     */
+#define _REGION3_ENTRY_ORIGIN  ~0x7ffUL/* region third table origin	     */
+
+#define _REGION3_ENTRY_DIRTY	0x2000	/* SW region dirty bit */
+#define _REGION3_ENTRY_YOUNG	0x1000	/* SW region young bit */
+#define _REGION3_ENTRY_LARGE	0x0400	/* RTTE-format control, large page  */
+#define _REGION3_ENTRY_READ	0x0002	/* SW region read bit */
+#define _REGION3_ENTRY_WRITE	0x0001	/* SW region write bit */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
+#else
+#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
+#endif
+
+#define _REGION_ENTRY_BITS	 0xfffffffffffff227UL
+#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL
 
 /* Bits in the segment table entry */
 #define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
@@ -297,7 +333,8 @@ static inline int is_module_addr(void *addr)
 #endif
 
 /*
- * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ * Segment table and region3 table entry encoding
+ * (R = read-only, I = invalid, y = young bit):
  *				dy..R...I...rw
  * prot-none, clean, old	00..1...1...00
  * prot-none, clean, young	01..1...1...00
@@ -391,6 +428,33 @@ static inline int is_module_addr(void *addr)
 				 _SEGMENT_ENTRY_READ)
 #define SEGMENT_WRITE	__pgprot(_SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_WRITE)
+#define SEGMENT_KERNEL	__pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_WRITE | \
+				 _SEGMENT_ENTRY_YOUNG | \
+				 _SEGMENT_ENTRY_DIRTY)
+#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_YOUNG |	\
+				 _SEGMENT_ENTRY_PROTECT)
+
+/*
+ * Region3 entry (large page) protection definitions.
+ */
+
+#define REGION3_KERNEL	__pgprot(_REGION_ENTRY_TYPE_R3 | \
+				 _REGION3_ENTRY_LARGE |	 \
+				 _REGION3_ENTRY_READ |	 \
+				 _REGION3_ENTRY_WRITE |	 \
+				 _REGION3_ENTRY_YOUNG |	 \
+				 _REGION3_ENTRY_DIRTY)
+#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
+				   _REGION3_ENTRY_LARGE |  \
+				   _REGION3_ENTRY_READ |   \
+				   _REGION3_ENTRY_YOUNG |  \
+				   _REGION_ENTRY_PROTECT)
 
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
@@ -424,6 +488,53 @@ static inline int mm_use_skey(struct mm_struct *mm)
 	return 0;
 }
 
+static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	unsigned long address = (unsigned long)ptr | 1;
+
+	asm volatile(
+		"	csp	%0,%3"
+		: "+d" (reg2), "+m" (*ptr)
+		: "d" (reg3), "d" (address)
+		: "cc");
+}
+
+static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	unsigned long address = (unsigned long)ptr | 1;
+
+	asm volatile(
+		"	.insn	rre,0xb98a0000,%0,%3"
+		: "+d" (reg2), "+m" (*ptr)
+		: "d" (reg3), "d" (address)
+		: "cc");
+}
+
+#define CRDTE_DTT_PAGE		0x00UL
+#define CRDTE_DTT_SEGMENT	0x10UL
+#define CRDTE_DTT_REGION3	0x14UL
+#define CRDTE_DTT_REGION2	0x18UL
+#define CRDTE_DTT_REGION1	0x1cUL
+
+static inline void crdte(unsigned long old, unsigned long new,
+			 unsigned long table, unsigned long dtt,
+			 unsigned long address, unsigned long asce)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	register unsigned long reg4 asm("4") = table | dtt;
+	register unsigned long reg5 asm("5") = address;
+
+	asm volatile(".insn rrf,0xb98f0000,%0,%2,%4,0"
+		     : "+d" (reg2)
+		     : "d" (reg3), "d" (reg4), "d" (reg5), "a" (asce)
+		     : "memory", "cc");
+}
+
 /*
  * pgd/pmd/pte query functions
  */
@@ -465,7 +576,7 @@ static inline int pud_none(pud_t pud)
 {
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
 		return 0;
-	return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
+	return pud_val(pud) == _REGION3_ENTRY_EMPTY;
 }
 
 static inline int pud_large(pud_t pud)
@@ -475,17 +586,35 @@ static inline int pud_large(pud_t pud)
 	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
 }
 
+static inline unsigned long pud_pfn(pud_t pud)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _REGION3_ENTRY_ORIGIN;
+	if (pud_large(pud))
+		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+	return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	if (pmd_large(pmd))
+		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
+}
+
 static inline int pud_bad(pud_t pud)
 {
-	/*
-	 * With dynamic page table levels the pud can be a region table
-	 * entry or a segment table entry. Check for the bit that are
-	 * invalid for either table entry.
-	 */
-	unsigned long mask =
-		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
-		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
-	return (pud_val(pud) & mask) != 0;
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+		return pmd_bad(__pmd(pud_val(pud)));
+	if (pud_large(pud))
+		return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
+	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
 }
 
 static inline int pmd_present(pmd_t pmd)
@@ -498,11 +627,6 @@ static inline int pmd_none(pmd_t pmd)
 	return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
 }
 
-static inline int pmd_large(pmd_t pmd)
-{
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
-}
-
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
 	unsigned long origin_mask;
@@ -513,13 +637,6 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 	return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
 }
 
-static inline int pmd_bad(pmd_t pmd)
-{
-	if (pmd_large(pmd))
-		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
-	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
-}
-
 #define __HAVE_ARCH_PMD_WRITE
 static inline int pmd_write(pmd_t pmd)
 {
@@ -963,6 +1080,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
 
 #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+#define pud_page(pud) pfn_to_page(pud_pfn(pud))
 
 /* Find an entry in the lowest level page table.. */
 #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
@@ -970,20 +1088,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_unmap(pte) do { } while (0)
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
-static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
-{
-	/*
-	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
-	 * Convert to segment table entry format.
-	 */
-	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
-		return pgprot_val(SEGMENT_NONE);
-	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
-		return pgprot_val(SEGMENT_READ);
-	return pgprot_val(SEGMENT_WRITE);
-}
-
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
 	pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
@@ -1020,6 +1124,56 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
 	return pmd;
 }
 
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+	pud_val(pud) &= ~_REGION3_ENTRY_WRITE;
+	pud_val(pud) |= _REGION_ENTRY_PROTECT;
+	return pud;
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+	pud_val(pud) |= _REGION3_ENTRY_WRITE;
+	if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
+		return pud;
+	pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	return pud;
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+	if (pud_large(pud)) {
+		pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+		pud_val(pud) |= _REGION_ENTRY_PROTECT;
+	}
+	return pud;
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+	if (pud_large(pud)) {
+		pud_val(pud) |= _REGION3_ENTRY_DIRTY |
+				_REGION3_ENTRY_SOFT_DIRTY;
+		if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+			pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	}
+	return pud;
+}
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+	/*
+	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
+	 * Convert to segment table entry format.
+	 */
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+		return pgprot_val(SEGMENT_NONE);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+		return pgprot_val(SEGMENT_READ);
+	return pgprot_val(SEGMENT_WRITE);
+}
+
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
 	if (pmd_large(pmd)) {
@@ -1068,15 +1222,8 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
 
 static inline void __pmdp_csp(pmd_t *pmdp)
 {
-	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
-	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
-					       _SEGMENT_ENTRY_INVALID;
-	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
-	asm volatile(
-		"	csp %1,%3"
-		: "=m" (*pmdp)
-		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+	csp((unsigned int *)pmdp + 1, pmd_val(*pmdp),
+	    pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
 }
 
 static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
@@ -1091,6 +1238,19 @@ static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
 		: "cc" );
 }
 
+static inline void __pudp_idte(unsigned long address, pud_t *pudp)
+{
+	unsigned long r3o;
+
+	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+	r3o |= _ASCE_TYPE_REGION3;
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
+		: "=m" (*pudp)
+		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		: "cc");
+}
+
 static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
 {
 	unsigned long sto;
@@ -1103,8 +1263,22 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
 		: "cc" );
 }
 
+static inline void __pudp_idte_local(unsigned long address, pud_t *pudp)
+{
+	unsigned long r3o;
+
+	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+	r3o |= _ASCE_TYPE_REGION3;
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
+		: "=m" (*pudp)
+		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		: "cc");
+}
+
 pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
 pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 9d4d311d7e52..09529202ea77 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -77,7 +77,10 @@ static inline void get_cpu_id(struct cpuid *ptr)
 	asm volatile("stidp %0" : "=Q" (*ptr));
 }
 
-extern void s390_adjust_jiffies(void);
+void s390_adjust_jiffies(void);
+void s390_update_cpu_mhz(void);
+void cpu_detect_mhz_feature(void);
+
 extern const struct seq_operations cpuinfo_op;
 extern int sysctl_ieee_emulation_warnings;
 extern void execve_tail(void);
@@ -233,6 +236,18 @@ void cpu_relax(void);
 
 #define cpu_relax_lowlatency()  barrier()
 
+#define ECAG_CACHE_ATTRIBUTE	0
+#define ECAG_CPU_ATTRIBUTE	1
+
+static inline unsigned long __ecag(unsigned int asi, unsigned char parm)
+{
+	unsigned long val;
+
+	asm volatile(".insn	rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
+		     : "=d" (val) : "a" (asi << 8 | parm));
+	return val;
+}
+
 static inline void psw_set_key(unsigned int key)
 {
 	asm volatile("spka 0(%0)" : : "d" (key));
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index c75e4471e618..597e7e96b59e 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 		rwsem_downgrade_wake(sem);
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-	signed long old, new;
-
-	asm volatile(
-		"	lg	%0,%2\n"
-		"0:	lgr	%1,%0\n"
-		"	agr	%1,%4\n"
-		"	csg	%0,%1,%2\n"
-		"	jl	0b"
-		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
-		: "Q" (sem->count), "d" (delta)
-		: "cc", "memory");
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-	signed long old, new;
-
-	asm volatile(
-		"	lg	%0,%2\n"
-		"0:	lgr	%1,%0\n"
-		"	agr	%1,%4\n"
-		"	csg	%0,%1,%2\n"
-		"	jl	0b"
-		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
-		: "Q" (sem->count), "d" (delta)
-		: "cc", "memory");
-	return new;
-}
-
 #endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index fbd9116eb17b..5ce29fe100ba 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -4,5 +4,6 @@
 #include <asm-generic/sections.h>
 
 extern char _eshared[], _ehead[];
+extern char __start_ro_after_init[], __end_ro_after_init[];
 
 #endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index c0f0efbb6ab5..5e8d57e1cc5e 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -86,9 +86,13 @@ extern char vmpoff_cmd[];
 #define CONSOLE_IS_SCLP		(console_mode == 1)
 #define CONSOLE_IS_3215		(console_mode == 2)
 #define CONSOLE_IS_3270		(console_mode == 3)
+#define CONSOLE_IS_VT220	(console_mode == 4)
+#define CONSOLE_IS_HVC		(console_mode == 5)
 #define SET_CONSOLE_SCLP	do { console_mode = 1; } while (0)
 #define SET_CONSOLE_3215	do { console_mode = 2; } while (0)
 #define SET_CONSOLE_3270	do { console_mode = 3; } while (0)
+#define SET_CONSOLE_VT220	do { console_mode = 4; } while (0)
+#define SET_CONSOLE_HVC		do { console_mode = 5; } while (0)
 
 #define NSS_NAME_SIZE	8
 extern char kernel_nss_name[];
diff --git a/arch/s390/include/asm/sfp-machine.h b/arch/s390/include/asm/sfp-machine.h
deleted file mode 100644
index 4e16aede4b06..000000000000
--- a/arch/s390/include/asm/sfp-machine.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* Machine-dependent software floating-point definitions.
-   S/390 kernel version.
-   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Richard Henderson (rth@cygnus.com),
-		  Jakub Jelinek (jj@ultra.linux.cz),
-		  David S. Miller (davem@redhat.com) and
-		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If
-   not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-#ifndef _SFP_MACHINE_H
-#define _SFP_MACHINE_H
-   
-
-#define _FP_W_TYPE_SIZE		32
-#define _FP_W_TYPE		unsigned int
-#define _FP_WS_TYPE		signed int
-#define _FP_I_TYPE		int
-
-#define _FP_MUL_MEAT_S(R,X,Y)					\
-  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
-#define _FP_MUL_MEAT_D(R,X,Y)					\
-  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
-#define _FP_MUL_MEAT_Q(R,X,Y)					\
-  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
-
-#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv(S,R,X,Y)
-#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
-#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
-
-#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
-#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
-#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
-#define _FP_NANSIGN_S		0
-#define _FP_NANSIGN_D		0
-#define _FP_NANSIGN_Q		0
-
-#define _FP_KEEPNANFRACP 1
-
-/*
- * If one NaN is signaling and the other is not,
- * we choose that one, otherwise we choose X.
- */
-#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)                      \
-  do {                                                          \
-    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)          \
-        && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))     \
-      {                                                         \
-        R##_s = Y##_s;                                          \
-        _FP_FRAC_COPY_##wc(R,Y);                                \
-      }                                                         \
-    else                                                        \
-      {                                                         \
-        R##_s = X##_s;                                          \
-        _FP_FRAC_COPY_##wc(R,X);                                \
-      }                                                         \
-    R##_c = FP_CLS_NAN;                                         \
-  } while (0)
-
-/* Some assembly to speed things up. */
-#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({		\
-	unsigned int __r2 = (x2) + (y2);			\
-	unsigned int __r1 = (x1);				\
-	unsigned int __r0 = (x0);				\
-	asm volatile(						\
-		"	alr	%2,%3\n"			\
-		"	brc	12,0f\n"			\
-		"	lhi	0,1\n"				\
-		"	alr	%1,0\n"				\
-		"	brc	12,0f\n"			\
-		"	alr	%0,0\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		: "d" (y0), "i" (1) : "cc", "0" );		\
-	asm volatile(						\
-		"	alr	%1,%2\n"			\
-		"	brc	12,0f\n"			\
-		"	ahi	%0,1\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1)			\
-		: "d" (y1) : "cc");				\
-	(r2) = __r2;						\
-	(r1) = __r1;						\
-	(r0) = __r0;						\
-})
-
-#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({		\
-	unsigned int __r2 = (x2) - (y2);			\
-	unsigned int __r1 = (x1);				\
-	unsigned int __r0 = (x0);				\
-	asm volatile(						\
-		"	slr   %2,%3\n"				\
-		"	brc	3,0f\n"				\
-		"	lhi	0,1\n"				\
-		"	slr	%1,0\n"				\
-		"	brc	3,0f\n"				\
-		"	slr	%0,0\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		: "d" (y0) : "cc", "0");			\
-	asm volatile(						\
-		"	slr	%1,%2\n"			\
-		"	brc	3,0f\n"				\
-		"	ahi	%0,-1\n"			\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1)			\
-		: "d" (y1) : "cc");				\
-	(r2) = __r2;						\
-	(r1) = __r1;						\
-	(r0) = __r0;						\
-})
-
-#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0)
-
-/* Obtain the current rounding mode. */
-#define FP_ROUNDMODE	mode
-
-/* Exception flags. */
-#define FP_EX_INVALID		0x800000
-#define FP_EX_DIVZERO		0x400000
-#define FP_EX_OVERFLOW		0x200000
-#define FP_EX_UNDERFLOW		0x100000
-#define FP_EX_INEXACT		0x080000
-
-/* We write the results always */
-#define FP_INHIBIT_RESULTS 0
-
-#endif
diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h
deleted file mode 100644
index c8b7cf9d6279..000000000000
--- a/arch/s390/include/asm/sfp-util.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({		\
-	unsigned int __sh = (ah);			\
-	unsigned int __sl = (al);			\
-	asm volatile(					\
-		"	alr	%1,%3\n"		\
-		"	brc	12,0f\n"		\
-		"	ahi	%0,1\n"			\
-		"0:	alr  %0,%2"			\
-		: "+&d" (__sh), "+d" (__sl)		\
-		: "d" (bh), "d" (bl) : "cc");		\
-	(sh) = __sh;					\
-	(sl) = __sl;					\
-})
-
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({		\
-	unsigned int __sh = (ah);			\
-	unsigned int __sl = (al);			\
-	asm volatile(					\
-		"	slr	%1,%3\n"		\
-		"	brc	3,0f\n"			\
-		"	ahi	%0,-1\n"		\
-		"0:	slr	%0,%2"			\
-		: "+&d" (__sh), "+d" (__sl)		\
-		: "d" (bh), "d" (bl) : "cc");		\
-	(sh) = __sh;					\
-	(sl) = __sl;					\
-})
-
-/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
-#define umul_ppmm(wh, wl, u, v) ({			\
-	unsigned int __wh = u;				\
-	unsigned int __wl = v;				\
-	asm volatile(					\
-		"	ltr	1,%0\n"			\
-		"	mr	0,%1\n"			\
-		"	jnm	0f\n"				\
-		"	alr	0,%1\n"			\
-		"0:	ltr	%1,%1\n"			\
-		"	jnm	1f\n"				\
-		"	alr	0,%0\n"			\
-		"1:	lr	%0,0\n"			\
-		"	lr	%1,1\n"			\
-		: "+d" (__wh), "+d" (__wl)		\
-		: : "0", "1", "cc");			\
-	wh = __wh;					\
-	wl = __wl;					\
-})
-
-#define udiv_qrnnd(q, r, n1, n0, d)			\
-  do { unsigned long __n;				\
-       unsigned int __r, __d;				\
-    __n = ((unsigned long)(n1) << 32) + n0;		\
-    __d = (d);						\
-    (q) = __n / __d;					\
-    (r) = __n % __d;					\
-  } while (0)
-
-#define UDIV_NEEDS_NORMALIZATION 0
-
-#define abort() BUG()
-
-#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 1c8f33fca356..72df5f2de6b0 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -37,8 +37,8 @@
 
 #ifndef __ASSEMBLY__
 
-static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
-			      u32 *status)
+static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+				u32 *status)
 {
 	register unsigned long reg1 asm ("1") = parm;
 	int cc;
@@ -48,8 +48,19 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
 		"	ipm	%0\n"
 		"	srl	%0,28\n"
 		: "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+	*status = reg1;
+	return cc;
+}
+
+static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+			      u32 *status)
+{
+	u32 _status;
+	int cc;
+
+	cc = ____pcpu_sigp(addr, order, parm, &_status);
 	if (status && cc == 1)
-		*status = reg1;
+		*status = _status;
 	return cc;
 }
 
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 63ebf37d3143..7e9e09f600fa 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -10,6 +10,8 @@
 #define __ASM_SPINLOCK_H
 
 #include <linux/smp.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
 
@@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (arch_spin_is_locked(lock))
 		arch_spin_relax(lock);
+	smp_acquire__after_ctrl_dep();
 }
 
 /*
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
new file mode 100644
index 000000000000..7689727585b2
--- /dev/null
+++ b/arch/s390/include/asm/stp.h
@@ -0,0 +1,51 @@
+/*
+ *  Copyright IBM Corp. 2006
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#ifndef __S390_STP_H
+#define __S390_STP_H
+
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
+
+/* STP interruption parameter */
+struct stp_irq_parm {
+	unsigned int _pad0	: 14;
+	unsigned int tsc	: 1;	/* Timing status change */
+	unsigned int lac	: 1;	/* Link availability change */
+	unsigned int tcpc	: 1;	/* Time control parameter change */
+	unsigned int _pad2	: 15;
+} __attribute__ ((packed));
+
+#define STP_OP_SYNC	1
+#define STP_OP_CTRL	3
+
+struct stp_sstpi {
+	unsigned int rsvd0;
+	unsigned int rsvd1 : 8;
+	unsigned int stratum : 8;
+	unsigned int vbits : 16;
+	unsigned int leaps : 16;
+	unsigned int tmd : 4;
+	unsigned int ctn : 4;
+	unsigned int rsvd2 : 3;
+	unsigned int c : 1;
+	unsigned int tst : 4;
+	unsigned int tzo : 16;
+	unsigned int dsto : 16;
+	unsigned int ctrl : 16;
+	unsigned int rsvd3 : 16;
+	unsigned int tto;
+	unsigned int rsvd4;
+	unsigned int ctnid[3];
+	unsigned int rsvd5;
+	unsigned int todoff[4];
+	unsigned int rsvd6[48];
+} __attribute__ ((packed));
+
+/* Functions needed by the machine check handler */
+int stp_sync_check(void);
+int stp_island_check(void);
+void stp_queue_work(void);
+
+#endif /* __S390_STP_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index dcb6312a0b91..0bb08f341c09 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -52,6 +52,70 @@ static inline void store_clock_comparator(__u64 *time)
 
 void clock_comparator_work(void);
 
+void __init ptff_init(void);
+
+extern unsigned char ptff_function_mask[16];
+extern unsigned long lpar_offset;
+extern unsigned long initial_leap_seconds;
+
+/* Function codes for the ptff instruction. */
+#define PTFF_QAF	0x00	/* query available functions */
+#define PTFF_QTO	0x01	/* query tod offset */
+#define PTFF_QSI	0x02	/* query steering information */
+#define PTFF_QUI	0x04	/* query UTC information */
+#define PTFF_ATO	0x40	/* adjust tod offset */
+#define PTFF_STO	0x41	/* set tod offset */
+#define PTFF_SFS	0x42	/* set fine steering rate */
+#define PTFF_SGS	0x43	/* set gross steering rate */
+
+/* Query TOD offset result */
+struct ptff_qto {
+	unsigned long long physical_clock;
+	unsigned long long tod_offset;
+	unsigned long long logical_tod_offset;
+	unsigned long long tod_epoch_difference;
+} __packed;
+
+static inline int ptff_query(unsigned int nr)
+{
+	unsigned char *ptr;
+
+	ptr = ptff_function_mask + (nr >> 3);
+	return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/* Query UTC information result */
+struct ptff_qui {
+	unsigned int tm : 2;
+	unsigned int ts : 2;
+	unsigned int : 28;
+	unsigned int pad_0x04;
+	unsigned long leap_event;
+	short old_leap;
+	short new_leap;
+	unsigned int pad_0x14;
+	unsigned long prt[5];
+	unsigned long cst[3];
+	unsigned int skew;
+	unsigned int pad_0x5c[41];
+} __packed;
+
+static inline int ptff(void *ptff_block, size_t len, unsigned int func)
+{
+	typedef struct { char _[len]; } addrtype;
+	register unsigned int reg0 asm("0") = func;
+	register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
+	int rc;
+
+	asm volatile(
+		"	.word	0x0104\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc), "+m" (*(addrtype *) ptff_block)
+		: "d" (reg0), "d" (reg1) : "cc");
+	return rc;
+}
+
 static inline unsigned long long local_tick_disable(void)
 {
 	unsigned long long old;
@@ -105,7 +169,7 @@ static inline cycles_t get_cycles(void)
 	return (cycles_t) get_tod_clock() >> 2;
 }
 
-int get_sync_clock(unsigned long long *clock);
+int get_phys_clock(unsigned long long *clock);
 void init_cpu_timer(void);
 unsigned long long monotonic_clock(void);
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 7a92e69c50bc..15711de10403 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -87,10 +87,10 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb,
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
@@ -98,6 +98,24 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 	free_page_and_swap_cache(page);
 }
 
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+					  struct page *page, int page_size)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
+					 struct page *page)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline void tlb_remove_page_size(struct mmu_gather *tlb,
+					struct page *page, int page_size)
+{
+	return tlb_remove_page(tlb, page);
+}
+
 /*
  * pte_free_tlb frees a pte table and clears the CRSTE for the
  * page table from the tlb.
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index a2e6ef32e054..1a691ef740cf 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 #include <asm/processor.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 
 /*
  * Flush all TLB entries on the local CPU.
@@ -44,17 +45,9 @@ void smp_ptlb_all(void);
  */
 static inline void __tlb_flush_global(void)
 {
-	register unsigned long reg2 asm("2");
-	register unsigned long reg3 asm("3");
-	register unsigned long reg4 asm("4");
-	long dummy;
-
-	dummy = 0;
-	reg2 = reg3 = 0;
-	reg4 = ((unsigned long) &dummy) + 1;
-	asm volatile(
-		"	csp	%0,%2"
-		: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
+	unsigned int dummy = 0;
+
+	csp(&dummy, 0, 0);
 }
 
 /*
@@ -64,7 +57,7 @@ static inline void __tlb_flush_global(void)
 static inline void __tlb_flush_full(struct mm_struct *mm)
 {
 	preempt_disable();
-	atomic_add(0x10000, &mm->context.attach_count);
+	atomic_inc(&mm->context.flush_count);
 	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 		/* Local TLB flush */
 		__tlb_flush_local();
@@ -76,21 +69,19 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
 			cpumask_copy(mm_cpumask(mm),
 				     &mm->context.cpu_attach_mask);
 	}
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
 
 /*
- * Flush TLB entries for a specific ASCE on all CPUs.
+ * Flush TLB entries for a specific ASCE on all CPUs. Should never be used
+ * when more than one asce (e.g. gmap) ran on this mm.
  */
 static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 {
-	int active, count;
-
 	preempt_disable();
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 		__tlb_flush_idte_local(asce);
 	} else {
@@ -103,7 +94,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 			cpumask_copy(mm_cpumask(mm),
 				     &mm->context.cpu_attach_mask);
 	}
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
 
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 6b53962e807e..f15f5571ca2b 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -14,10 +14,12 @@ struct cpu_topology_s390 {
 	unsigned short core_id;
 	unsigned short socket_id;
 	unsigned short book_id;
+	unsigned short drawer_id;
 	unsigned short node_id;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
+	cpumask_t drawer_mask;
 };
 
 DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
@@ -30,6 +32,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 #define topology_core_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).core_mask)
 #define topology_book_id(cpu)		  (per_cpu(cpu_topology, cpu).book_id)
 #define topology_book_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).book_mask)
+#define topology_drawer_id(cpu)		  (per_cpu(cpu_topology, cpu).drawer_id)
+#define topology_drawer_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).drawer_mask)
 
 #define mc_capable() 1
 
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index e0900ddf91dd..9b49cf1daa8f 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -151,8 +151,65 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from,
 	__rc;							\
 })
 
-#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL)
-#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL)
+static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+{
+	unsigned long spec = 0x810000UL;
+	int rc;
+
+	switch (size) {
+	case 1:
+		rc = __put_get_user_asm((unsigned char __user *)ptr,
+					(unsigned char *)x,
+					size, spec);
+		break;
+	case 2:
+		rc = __put_get_user_asm((unsigned short __user *)ptr,
+					(unsigned short *)x,
+					size, spec);
+		break;
+	case 4:
+		rc = __put_get_user_asm((unsigned int __user *)ptr,
+					(unsigned int *)x,
+					size, spec);
+		break;
+	case 8:
+		rc = __put_get_user_asm((unsigned long __user *)ptr,
+					(unsigned long *)x,
+					size, spec);
+		break;
+	};
+	return rc;
+}
+
+static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+{
+	unsigned long spec = 0x81UL;
+	int rc;
+
+	switch (size) {
+	case 1:
+		rc = __put_get_user_asm((unsigned char *)x,
+					(unsigned char __user *)ptr,
+					size, spec);
+		break;
+	case 2:
+		rc = __put_get_user_asm((unsigned short *)x,
+					(unsigned short __user *)ptr,
+					size, spec);
+		break;
+	case 4:
+		rc = __put_get_user_asm((unsigned int *)x,
+					(unsigned int __user *)ptr,
+					size, spec);
+		break;
+	case 8:
+		rc = __put_get_user_asm((unsigned long *)x,
+					(unsigned long __user *)ptr,
+					size, spec);
+		break;
+	};
+	return rc;
+}
 
 #else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
 
@@ -191,7 +248,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s
 		__put_user_bad();				\
 		break;						\
 	 }							\
-	__pu_err;						\
+	__builtin_expect(__pu_err, 0);				\
 })
 
 #define put_user(x, ptr)					\
@@ -240,7 +297,7 @@ int __put_user_bad(void) __attribute__((noreturn));
 		__get_user_bad();				\
 		break;						\
 	}							\
-	__gu_err;						\
+	__builtin_expect(__gu_err, 0);				\
 })
 
 #define get_user(x, ptr)					\
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index a150f4fabe43..77630c74f13b 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -359,9 +359,9 @@ typedef struct
 		per_cr_bits    bits;
 	} control_regs;
 	/*
-	 * Use these flags instead of setting em_instruction_fetch
-	 * directly they are used so that single stepping can be
-	 * switched on & off while not affecting other tracing
+	 * The single_step and instruction_fetch bits are obsolete,
+	 * the kernel always sets them to zero. To enable single
+	 * stepping use ptrace(PTRACE_SINGLESTEP) instead.
 	 */
 	unsigned  single_step       : 1;
 	unsigned  instruction_fetch : 1;
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2f5586ab8a6a..f37be37edd3a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -2,6 +2,9 @@
 # Makefile for the linux kernel.
 #
 
+KCOV_INSTRUMENT_early.o := n
+KCOV_INSTRUMENT_sclp.o := n
+
 ifdef CONFIG_FUNCTION_TRACER
 # Don't trace early setup code and tracing code
 CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
@@ -45,7 +48,7 @@ obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y	+= runtime_instr.o cache.o dumpstack.o
+obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o
 obj-y	+= entry.o reipl.o relocate_kernel.o
 
 extra-y				+= head.o head64.o vmlinux.lds
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 77a84bd78be2..c8a83276a4dc 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -99,12 +99,7 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
 
 static inline unsigned long ecag(int ai, int li, int ti)
 {
-	unsigned long cmd, val;
-
-	cmd = ai << 4 | li << 1 | ti;
-	asm volatile(".insn	rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
-		     : "=d" (val) : "a" (cmd));
-	return val;
+	return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti);
 }
 
 static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 8cb9bfdd3ea8..43446fa2a4e5 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -26,7 +26,6 @@
 #include <asm/dis.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
-#include <asm/mathemu.h>
 #include <asm/cpcmd.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 69f9908ac44c..6693383bc01b 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -78,14 +78,10 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
 	sp = __dump_trace(func, data, sp,
 			  S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
 			  S390_lowcore.async_stack + frame_size);
-	if (task)
-		__dump_trace(func, data, sp,
-			     (unsigned long)task_stack_page(task),
-			     (unsigned long)task_stack_page(task) + THREAD_SIZE);
-	else
-		__dump_trace(func, data, sp,
-			     S390_lowcore.thread_info,
-			     S390_lowcore.thread_info + THREAD_SIZE);
+	task = task ?: current;
+	__dump_trace(func, data, sp,
+		     (unsigned long)task_stack_page(task),
+		     (unsigned long)task_stack_page(task) + THREAD_SIZE);
 }
 EXPORT_SYMBOL_GPL(dump_trace);
 
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index a0684de5a93b..717b03aa16b5 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -231,6 +231,26 @@ static noinline __init void detect_machine_type(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }
 
+static noinline __init void setup_arch_string(void)
+{
+	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+
+	if (stsi(mach, 1, 1, 1))
+		return;
+	EBCASC(mach->manufacturer, sizeof(mach->manufacturer));
+	EBCASC(mach->type, sizeof(mach->type));
+	EBCASC(mach->model, sizeof(mach->model));
+	EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
+	dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)",
+				 mach->manufacturer,
+				 mach->type,
+				 mach->model,
+				 mach->model_capacity,
+				 MACHINE_IS_LPAR ? "LPAR" :
+				 MACHINE_IS_VM ? "z/VM" :
+				 MACHINE_IS_KVM ? "KVM" : "unknown");
+}
+
 static __init void setup_topology(void)
 {
 	int max_mnest;
@@ -447,11 +467,13 @@ void __init startup_init(void)
 	ipl_save_parameters();
 	rescue_initrd();
 	clear_bss_section();
+	ptff_init();
 	init_kernel_storage_key();
 	lockdep_off();
 	setup_lowcore_early();
 	setup_facility_list();
 	detect_machine_type();
+	setup_arch_string();
 	ipl_update_parameters();
 	setup_boot_command_line();
 	create_kernel_nss();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 2d47f9cfcb36..c51650a1ed16 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -163,6 +163,16 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.endm
 
 	.section .kprobes.text, "ax"
+.Ldummy:
+	/*
+	 * This nop exists only in order to avoid that __switch_to starts at
+	 * the beginning of the kprobes text section. In that case we would
+	 * have several symbols at the same address. E.g. objdump would take
+	 * an arbitrary symbol name when disassembling this code.
+	 * With the added nop in between the __switch_to symbol is unique
+	 * again.
+	 */
+	nop	0
 
 /*
  * Scheduler resume function, called by switch_to
@@ -175,7 +185,6 @@ ENTRY(__switch_to)
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
 	lgr	%r1,%r2
 	aghi	%r1,__TASK_thread		# thread_struct of prev task
-	lg	%r4,__TASK_thread_info(%r2)	# get thread_info of prev
 	lg	%r5,__TASK_thread_info(%r3)	# get thread_info of next
 	stg	%r15,__THREAD_ksp(%r1)		# store kernel stack of prev
 	lgr	%r1,%r3
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
new file mode 100644
index 000000000000..81d1d1887507
--- /dev/null
+++ b/arch/s390/kernel/fpu.c
@@ -0,0 +1,249 @@
+/*
+ * In-kernel vector facility support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/api.h>
+
+/*
+ * Per-CPU variable to maintain FPU register ranges that are in use
+ * by the kernel.
+ */
+static DEFINE_PER_CPU(u32, kernel_fpu_state);
+
+#define KERNEL_FPU_STATE_MASK	(KERNEL_FPU_MASK|KERNEL_FPC)
+
+
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	if (!__this_cpu_read(kernel_fpu_state)) {
+		/*
+		 * Save user space FPU state and register contents.  Multiple
+		 * calls because of interruptions do not matter and return
+		 * immediately.  This also sets CIF_FPU to lazy restore FP/VX
+		 * register contents when returning to user space.
+		 */
+		save_fpu_regs();
+	}
+
+	/* Update flags to use the vector facility for KERNEL_FPR */
+	if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
+		flags |= KERNEL_VXR_LOW | KERNEL_FPC;
+		flags &= ~KERNEL_FPR;
+	}
+
+	/* Save and update current kernel VX state */
+	state->mask = __this_cpu_read(kernel_fpu_state);
+	__this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
+
+	/*
+	 * If this is the first call to __kernel_fpu_begin(), no additional
+	 * work is required.
+	 */
+	if (!(state->mask & KERNEL_FPU_STATE_MASK))
+		return;
+
+	/*
+	 * If KERNEL_FPR is still set, the vector facility is not available
+	 * and, thus, save floating-point control and registers only.
+	 */
+	if (state->mask & KERNEL_FPR) {
+		asm volatile("stfpc %0" : "=Q" (state->fpc));
+		asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+		asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+		asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+		asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+		asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+		asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+		asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+		asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+		asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+		asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+		asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+		asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+		asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+		asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+		asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+		asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		return;
+	}
+
+	/*
+	 * If this is a nested call to __kernel_fpu_begin(), check the saved
+	 * state mask to save and later restore the vector registers that
+	 * are already in use.	Let's start with checking floating-point
+	 * controls.
+	 */
+	if (state->mask & KERNEL_FPC)
+		asm volatile("stfpc %0" : "=m" (state->fpc));
+
+	/* Test and save vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector register must be saved and, if so,
+		 * test if all register can be saved.
+		 */
+		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
+		"	jz	20f\n"		/* no work -> done */
+		"	la	1,%[vxrs]\n"	/* load save area */
+		"	jo	18f\n"		/* -> save V0..V31 */
+
+		/*
+		 * Test if V8..V23 can be saved at once... this speeds up
+		 * for KERNEL_fpu_MID only. Otherwise continue to split the
+		 * range of vector registers into two halves and test them
+		 * separately.
+		 */
+		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
+		"	jo	17f\n"		/* -> save V8..V23 */
+
+		/* Test and save the first half of 16 vector registers */
+		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
+		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> save V0..V15 */
+		"	brc	4,3f\n"		/* 01 -> save V0..V7  */
+		"	brc	2,4f\n"		/* 10 -> save V8..V15 */
+
+		/* Test and save the second half of 16 vector registers */
+		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
+		"	jo	19f\n"		/* 11 -> save V16..V31 */
+		"	brc	4,11f\n"	/* 01 -> save V16..V23	*/
+		"	brc	2,12f\n"	/* 10 -> save V24..V31 */
+		"	j	20f\n"		/* 00 -> done */
+
+		/*
+		 * Below are the vstm combinations to save multiple vector
+		 * registers at once.
+		 */
+		"2:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"3:	.word	0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"4:	.word	0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"\n"
+		"11:	.word	0xe707,0x1100,0x0c3e\n"	/* vstm 16,23,256(1) */
+		"	j	20f\n"			/* -> done */
+		"12:	.word	0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
+		"	j	20f\n"			/* -> done */
+		"\n"
+		"17:	.word	0xe787,0x1080,0x043e\n"	/* vstm 8,23,128(1) */
+		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
+		"	j	1b\n"			/* -> VXR_LOW */
+		"\n"
+		"18:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"19:	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		"20:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (state->mask)
+		: "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(struct kernel_fpu *state)
+{
+	/* Just update the per-CPU state if there is nothing to restore */
+	if (!(state->mask & KERNEL_FPU_STATE_MASK))
+		goto update_fpu_state;
+
+	/*
+	 * If KERNEL_FPR is specified, the vector facility is not available
+	 * and, thus, restore floating-point control and registers only.
+	 */
+	if (state->mask & KERNEL_FPR) {
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+		asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+		asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+		asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+		asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+		asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+		asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+		asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+		asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+		asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+		asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+		asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+		asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+		asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+		asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+		asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+		asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		goto update_fpu_state;
+	}
+
+	/* Test and restore floating-point controls */
+	if (state->mask & KERNEL_FPC)
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+
+	/* Test and restore (load) vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector registers must be loaded and, if so,
+		 * test if all registers can be loaded at once.
+		 */
+		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
+		"	jz	20f\n"		/* no work -> done */
+		"	la	1,%[vxrs]\n"	/* load load area */
+		"	jo	18f\n"		/* -> load V0..V31 */
+
+		/*
+		 * Test if V8..V23 can be restored at once... this speeds up
+		 * for KERNEL_VXR_MID only. Otherwise continue to split the
+		 * range of vector registers into two halves and test them
+		 * separately.
+		 */
+		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
+		"	jo	17f\n"		/* -> load V8..V23 */
+
+		/* Test and load the first half of 16 vector registers */
+		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
+		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> load V0..V15 */
+		"	brc	4,3f\n"		/* 01 -> load V0..V7  */
+		"	brc	2,4f\n"		/* 10 -> load V8..V15 */
+
+		/* Test and load the second half of 16 vector registers */
+		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
+		"	jo	19f\n"		/* 11 -> load V16..V31 */
+		"	brc	4,11f\n"	/* 01 -> load V16..V23	*/
+		"	brc	2,12f\n"	/* 10 -> load V24..V31 */
+		"	j	20f\n"		/* 00 -> done */
+
+		/*
+		 * Below are the vstm combinations to load multiple vector
+		 * registers at once.
+		 */
+		"2:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"3:	.word	0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"4:	.word	0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"\n"
+		"11:	.word	0xe707,0x1100,0x0c36\n"	/* vlm 16,23,256(1) */
+		"	j	20f\n"			/* -> done */
+		"12:	.word	0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */
+		"	j	20f\n"			/* -> done */
+		"\n"
+		"17:	.word	0xe787,0x1080,0x0436\n"	/* vlm 8,23,128(1) */
+		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
+		"	j	1b\n"			/* -> VXR_LOW */
+		"\n"
+		"18:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"19:	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+		"20:"
+		:
+		: [vxrs] "Q" (*(struct vx_array *) &state->vxrs),
+		  [m] "d" (state->mask)
+		: "1", "cc");
+
+update_fpu_state:
+	/* Update current kernel VX state */
+	__this_cpu_write(kernel_fpu_state, state->mask);
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f20abdb5630a..295bfb7124bc 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -121,9 +121,9 @@ static char *dump_type_str(enum dump_type type)
  * Must be in data section since the bss section
  * is not cleared when these are accessed.
  */
-static u8 ipl_ssid __attribute__((__section__(".data"))) = 0;
-static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
-u32 ipl_flags __attribute__((__section__(".data"))) = 0;
+static u8 ipl_ssid __section(.data) = 0;
+static u16 ipl_devno __section(.data) = 0;
+u32 ipl_flags __section(.data) = 0;
 
 enum ipl_method {
 	REIPL_METHOD_CCW_CIO,
@@ -174,7 +174,7 @@ static inline int __diag308(unsigned long subcode, void *addr)
 
 	asm volatile(
 		"	diag	%0,%2,0x308\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: "+d" (_addr), "+d" (_rc)
 		: "d" (subcode) : "cc", "memory");
@@ -563,7 +563,7 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-	diag308(DIAG308_IPL, NULL);
+	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
 	else if (ipl_info.type == IPL_TYPE_CCW)
@@ -1085,21 +1085,24 @@ static void __reipl_run(void *unused)
 		break;
 	case REIPL_METHOD_CCW_DIAG:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		diag308(DIAG308_IPL, NULL);
+		if (MACHINE_IS_LPAR)
+			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
+		else
+			diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RW_DIAG:
 		diag308(DIAG308_SET, reipl_block_fcp);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RO_DIAG:
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RO_VM:
 		__cpcmd("IPL", NULL, 0, NULL);
 		break;
 	case REIPL_METHOD_NSS_DIAG:
 		diag308(DIAG308_SET, reipl_block_nss);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_NSS:
 		get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
@@ -1108,7 +1111,7 @@ static void __reipl_run(void *unused)
 	case REIPL_METHOD_DEFAULT:
 		if (MACHINE_IS_VM)
 			__cpcmd("IPL", NULL, 0, NULL);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_DUMP:
 		break;
@@ -1423,7 +1426,7 @@ static void diag308_dump(void *dump_block)
 {
 	diag308(DIAG308_SET, dump_block);
 	while (1) {
-		if (diag308(DIAG308_DUMP, NULL) != 0x302)
+		if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
 			break;
 		udelay_simple(USEC_PER_SEC);
 	}
@@ -2064,12 +2067,5 @@ void s390_reset_system(void)
 	S390_lowcore.program_new_psw.addr =
 		(unsigned long) s390_base_pgm_handler;
 
-	/*
-	 * Clear subchannel ID and number to signal new kernel that no CCW or
-	 * SCSI IPL has been done (for kexec and kdump)
-	 */
-	S390_lowcore.subchannel_id = 0;
-	S390_lowcore.subchannel_nr = 0;
-
 	do_reset_calls();
 }
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index c373a1d41d10..285d6561076d 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -127,9 +127,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_printf(p, "CPU%d       ", cpu);
 		seq_putc(p, '\n');
 	}
-	if (index < NR_IRQS) {
-		if (index >= NR_IRQS_BASE)
-			goto out;
+	if (index < NR_IRQS_BASE) {
 		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
 		irq = irqclass_main_desc[index].irq;
 		for_each_online_cpu(cpu)
@@ -137,6 +135,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 		goto out;
 	}
+	if (index > NR_IRQS_BASE)
+		goto out;
+
 	for (index = 0; index < NR_ARCH_IRQS; index++) {
 		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
 		irq = irqclass_sub_desc[index].irq;
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 0e64f08d3d69..3074c1d83829 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -24,6 +24,7 @@
 #include <asm/diag.h>
 #include <asm/elf.h>
 #include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
 #include <asm/os_info.h>
 #include <asm/switch_to.h>
 
@@ -60,8 +61,6 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
 static int __init machine_kdump_pm_init(void)
 {
 	pm_notifier(machine_kdump_pm_cb, 0);
-	/* Create initial mapping for crashkernel memory */
-	arch_kexec_unprotect_crashkres();
 	return 0;
 }
 arch_initcall(machine_kdump_pm_init);
@@ -150,42 +149,40 @@ static int kdump_csum_valid(struct kimage *image)
 
 #ifdef CONFIG_CRASH_DUMP
 
-/*
- * Map or unmap crashkernel memory
- */
-static void crash_map_pages(int enable)
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
 {
-	unsigned long size = resource_size(&crashk_res);
-
-	BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
-	       size % KEXEC_CRASH_MEM_ALIGN);
-	if (enable)
-		vmem_add_mapping(crashk_res.start, size);
-	else {
-		vmem_remove_mapping(crashk_res.start, size);
-		if (size)
-			os_info_crashkernel_add(crashk_res.start, size);
-		else
-			os_info_crashkernel_add(0, 0);
-	}
+	unsigned long addr, size;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE)
+		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+	size = begin - crashk_res.start;
+	if (size)
+		os_info_crashkernel_add(crashk_res.start, size);
+	else
+		os_info_crashkernel_add(0, 0);
+}
+
+static void crash_protect_pages(int protect)
+{
+	unsigned long size;
+
+	if (!crashk_res.end)
+		return;
+	size = resource_size(&crashk_res);
+	if (protect)
+		set_memory_ro(crashk_res.start, size >> PAGE_SHIFT);
+	else
+		set_memory_rw(crashk_res.start, size >> PAGE_SHIFT);
 }
 
-/*
- * Unmap crashkernel memory
- */
 void arch_kexec_protect_crashkres(void)
 {
-	if (crashk_res.end)
-		crash_map_pages(0);
+	crash_protect_pages(1);
 }
 
-/*
- * Map crashkernel memory
- */
 void arch_kexec_unprotect_crashkres(void)
 {
-	if (crashk_res.end)
-		crash_map_pages(1);
+	crash_protect_pages(0);
 }
 
 #endif
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 07302ce37648..29376f0e725c 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <asm/lowcore.h>
 #include <asm/smp.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/cputime.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
@@ -27,7 +27,6 @@ struct mcck_struct {
 	unsigned int kill_task : 1;
 	unsigned int channel_report : 1;
 	unsigned int warning : 1;
-	unsigned int etr_queue : 1;
 	unsigned int stp_queue : 1;
 	unsigned long mcck_code;
 };
@@ -82,8 +81,6 @@ void s390_handle_mcck(void)
 		if (xchg(&mchchk_wng_posted, 1) == 0)
 			kill_cad_pid(SIGPWR, 1);
 	}
-	if (mcck.etr_queue)
-		etr_queue_work();
 	if (mcck.stp_queue)
 		stp_queue_work();
 	if (mcck.kill_task) {
@@ -241,8 +238,6 @@ static int notrace s390_validate_registers(union mci mci)
 
 #define ED_STP_ISLAND	6	/* External damage STP island check */
 #define ED_STP_SYNC	7	/* External damage STP sync check */
-#define ED_ETR_SYNC	12	/* External damage ETR sync check */
-#define ED_ETR_SWITCH	13	/* External damage ETR switch to local */
 
 /*
  * machine check handler.
@@ -325,15 +320,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	}
 	if (mci.ed && mci.ec) {
 		/* External damage */
-		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
-			mcck->etr_queue |= etr_sync_check();
-		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
-			mcck->etr_queue |= etr_switch_to_local();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 			mcck->stp_queue |= stp_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 			mcck->stp_queue |= stp_island_check();
-		if (mcck->etr_queue || mcck->stp_queue)
+		if (mcck->stp_queue)
 			set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	if (mci.se)
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 59215c518f37..7ec63b1d920d 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
 
 /* Performance monitoring unit for s390x */
 static struct pmu cpumf_pmu = {
+	.task_ctx_nr  = perf_sw_context,
+	.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
 	.pmu_enable   = cpumf_pmu_enable,
 	.pmu_disable  = cpumf_pmu_disable,
 	.event_init   = cpumf_pmu_event_init,
@@ -708,12 +710,6 @@ static int __init cpumf_pmu_init(void)
 		goto out;
 	}
 
-	/* The CPU measurement counter facility does not have overflow
-	 * interrupts to do sampling.  Sampling must be provided by
-	 * external means, for example, by timers.
-	 */
-	cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
 	cpumf_pmu.attr_groups = cpumf_cf_event_group();
 	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
 	if (rc) {
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index a8e832166417..53acf2d76fa9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -601,17 +601,12 @@ static void release_pmc_hardware(void)
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
-	perf_release_sampling();
 }
 
 static int reserve_pmc_hardware(void)
 {
 	int flags = PMC_INIT;
-	int err;
 
-	err = perf_reserve_sampling();
-	if (err)
-		return err;
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
 	if (flags & PMC_FAILURE) {
 		release_pmc_hardware();
@@ -979,12 +974,15 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 	struct pt_regs regs;
 	struct perf_sf_sde_regs *sde_regs;
 	struct perf_sample_data data;
-	struct perf_raw_record raw;
+	struct perf_raw_record raw = {
+		.frag = {
+			.size = sfr->size,
+			.data = sfr,
+		},
+	};
 
 	/* Setup perf sample */
 	perf_sample_data_init(&data, 0, event->hw.last_period);
-	raw.size = sfr->size;
-	raw.data = sfr;
 	data.raw = &raw;
 
 	/* Setup pt_regs to look like an CPU-measurement external interrupt
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 87035fa58bbe..17431f63de00 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -248,33 +248,3 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%04llx,name=%s\n",
 		       pmu_attr->id, attr->attr.name);
 }
-
-/* Reserve/release functions for sharing perf hardware */
-static DEFINE_SPINLOCK(perf_hw_owner_lock);
-static void *perf_sampling_owner;
-
-int perf_reserve_sampling(void)
-{
-	int err;
-
-	err = 0;
-	spin_lock(&perf_hw_owner_lock);
-	if (perf_sampling_owner) {
-		pr_warn("The sampling facility is already reserved by %p\n",
-			perf_sampling_owner);
-		err = -EBUSY;
-	} else
-		perf_sampling_owner = __builtin_return_address(0);
-	spin_unlock(&perf_hw_owner_lock);
-	return err;
-}
-EXPORT_SYMBOL(perf_reserve_sampling);
-
-void perf_release_sampling(void)
-{
-	spin_lock(&perf_hw_owner_lock);
-	WARN_ON(!perf_sampling_owner);
-	perf_sampling_owner = NULL;
-	spin_unlock(&perf_hw_owner_lock);
-}
-EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index de7451065c34..81d0808085e6 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -13,12 +13,45 @@
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <asm/diag.h>
+#include <asm/facility.h>
 #include <asm/elf.h>
 #include <asm/lowcore.h>
 #include <asm/param.h>
 #include <asm/smp.h>
 
-static DEFINE_PER_CPU(struct cpuid, cpu_id);
+struct cpu_info {
+	unsigned int cpu_mhz_dynamic;
+	unsigned int cpu_mhz_static;
+	struct cpuid cpu_id;
+};
+
+static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+
+static bool machine_has_cpu_mhz;
+
+void __init cpu_detect_mhz_feature(void)
+{
+	if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL)
+		machine_has_cpu_mhz = 1;
+}
+
+static void update_cpu_mhz(void *arg)
+{
+	unsigned long mhz;
+	struct cpu_info *c;
+
+	mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+	c = this_cpu_ptr(&cpu_info);
+	c->cpu_mhz_dynamic = mhz >> 32;
+	c->cpu_mhz_static = mhz & 0xffffffff;
+}
+
+void s390_update_cpu_mhz(void)
+{
+	s390_adjust_jiffies();
+	if (machine_has_cpu_mhz)
+		on_each_cpu(update_cpu_mhz, NULL, 0);
+}
 
 void notrace cpu_relax(void)
 {
@@ -35,9 +68,11 @@ EXPORT_SYMBOL(cpu_relax);
  */
 void cpu_init(void)
 {
-	struct cpuid *id = this_cpu_ptr(&cpu_id);
+	struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id);
 
 	get_cpu_id(id);
+	if (machine_has_cpu_mhz)
+		update_cpu_mhz(NULL);
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
@@ -53,10 +88,7 @@ int cpu_have_feature(unsigned int num)
 }
 EXPORT_SYMBOL(cpu_have_feature);
 
-/*
- * show_cpuinfo - Get information on one CPU for use by procfs.
- */
-static int show_cpuinfo(struct seq_file *m, void *v)
+static void show_cpu_summary(struct seq_file *m, void *v)
 {
 	static const char *hwcap_str[] = {
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
@@ -65,34 +97,55 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	static const char * const int_hwcap_str[] = {
 		"sie"
 	};
-	unsigned long n = (unsigned long) v - 1;
-	int i;
-
-	if (!n) {
-		s390_adjust_jiffies();
-		seq_printf(m, "vendor_id       : IBM/S390\n"
-			   "# processors    : %i\n"
-			   "bogomips per cpu: %lu.%02lu\n",
-			   num_online_cpus(), loops_per_jiffy/(500000/HZ),
-			   (loops_per_jiffy/(5000/HZ))%100);
-		seq_puts(m, "features\t: ");
-		for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
-			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
-				seq_printf(m, "%s ", hwcap_str[i]);
-		for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
-			if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
-				seq_printf(m, "%s ", int_hwcap_str[i]);
-		seq_puts(m, "\n");
-		show_cacheinfo(m);
-	}
-	if (cpu_online(n)) {
-		struct cpuid *id = &per_cpu(cpu_id, n);
-		seq_printf(m, "processor %li: "
+	int i, cpu;
+
+	seq_printf(m, "vendor_id       : IBM/S390\n"
+		   "# processors    : %i\n"
+		   "bogomips per cpu: %lu.%02lu\n",
+		   num_online_cpus(), loops_per_jiffy/(500000/HZ),
+		   (loops_per_jiffy/(5000/HZ))%100);
+	seq_printf(m, "max thread id   : %d\n", smp_cpu_mtid);
+	seq_puts(m, "features\t: ");
+	for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+		if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", hwcap_str[i]);
+	for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
+		if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", int_hwcap_str[i]);
+	seq_puts(m, "\n");
+	show_cacheinfo(m);
+	for_each_online_cpu(cpu) {
+		struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
+
+		seq_printf(m, "processor %d: "
 			   "version = %02X,  "
 			   "identification = %06X,  "
 			   "machine = %04X\n",
-			   n, id->version, id->ident, id->machine);
+			   cpu, id->version, id->ident, id->machine);
 	}
+}
+
+static void show_cpu_mhz(struct seq_file *m, unsigned long n)
+{
+	struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+
+	seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
+	seq_printf(m, "cpu MHz static  : %d\n", c->cpu_mhz_static);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	unsigned long n = (unsigned long) v - 1;
+
+	if (!n)
+		show_cpu_summary(m, v);
+	if (!machine_has_cpu_mhz)
+		return 0;
+	seq_printf(m, "\ncpu number      : %ld\n", n);
+	show_cpu_mhz(m, n);
 	return 0;
 }
 
@@ -126,4 +179,3 @@ const struct seq_operations cpuinfo_op = {
 	.stop	= c_stop,
 	.show	= show_cpuinfo,
 };
-
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f31939147ccd..ba5f456edaa9 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -130,17 +130,14 @@ __setup("condev=", condev_setup);
 
 static void __init set_preferred_console(void)
 {
-	if (MACHINE_IS_KVM) {
-		if (sclp.has_vt220)
-			add_preferred_console("ttyS", 1, NULL);
-		else if (sclp.has_linemode)
-			add_preferred_console("ttyS", 0, NULL);
-		else
-			add_preferred_console("hvc", 0, NULL);
-	} else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 		add_preferred_console("ttyS", 0, NULL);
 	else if (CONSOLE_IS_3270)
 		add_preferred_console("tty3270", 0, NULL);
+	else if (CONSOLE_IS_VT220)
+		add_preferred_console("ttyS", 1, NULL);
+	else if (CONSOLE_IS_HVC)
+		add_preferred_console("hvc", 0, NULL);
 }
 
 static int __init conmode_setup(char *str)
@@ -206,6 +203,15 @@ static void __init conmode_default(void)
 			SET_CONSOLE_SCLP;
 #endif
 		}
+	} else if (MACHINE_IS_KVM) {
+		if (sclp.has_vt220 &&
+		    config_enabled(CONFIG_SCLP_VT220_CONSOLE))
+			SET_CONSOLE_VT220;
+		else if (sclp.has_linemode &&
+			 config_enabled(CONFIG_SCLP_CONSOLE))
+			SET_CONSOLE_SCLP;
+		else
+			SET_CONSOLE_HVC;
 	} else {
 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 		SET_CONSOLE_SCLP;
@@ -289,7 +295,7 @@ static int __init parse_vmalloc(char *arg)
 }
 early_param("vmalloc", parse_vmalloc);
 
-void *restart_stack __attribute__((__section__(".data")));
+void *restart_stack __section(.data);
 
 static void __init setup_lowcore(void)
 {
@@ -432,6 +438,20 @@ static void __init setup_resources(void)
 			}
 		}
 	}
+#ifdef CONFIG_CRASH_DUMP
+	/*
+	 * Re-add removed crash kernel memory as reserved memory. This makes
+	 * sure it will be mapped with the identity mapping and struct pages
+	 * will be created, so it can be resized later on.
+	 * However add it later since the crash kernel resource should not be
+	 * part of the System RAM resource.
+	 */
+	if (crashk_res.end) {
+		memblock_add(crashk_res.start, resource_size(&crashk_res));
+		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
+		insert_resource(&iomem_resource, &crashk_res);
+	}
+#endif
 }
 
 static void __init setup_memory_end(void)
@@ -602,7 +622,6 @@ static void __init reserve_crashkernel(void)
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
-	insert_resource(&iomem_resource, &crashk_res);
 	memblock_remove(crash_base, crash_size);
 	pr_info("Reserving %lluMB of memory at %lluMB "
 		"for crashkernel (System RAM: %luMB)\n",
@@ -901,6 +920,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_vmcoreinfo();
 	setup_lowcore();
 	smp_fill_possible_mask();
+	cpu_detect_mhz_feature();
         cpu_init();
 	numa_setup();
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 7b89a7572100..35531fe1c5ea 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 {
 	struct lowcore *lc = pcpu->lowcore;
 
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
-	atomic_inc(&init_mm.context.attach_count);
 	lc->cpu_nr = cpu;
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
 	lc->percpu_offset = __per_cpu_offset[cpu];
@@ -320,17 +318,11 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
  */
 static int pcpu_set_smt(unsigned int mtid)
 {
-	register unsigned long reg1 asm ("1") = (unsigned long) mtid;
 	int cc;
 
 	if (smp_cpu_mtid == mtid)
 		return 0;
-	asm volatile(
-		"	sigp	%1,0,%2	# sigp set multi-threading\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING)
-		: "cc");
+	cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
 	if (cc == 0) {
 		smp_cpu_mtid = mtid;
 		smp_cpu_mt_shift = 0;
@@ -876,10 +868,8 @@ void __cpu_die(unsigned int cpu)
 	while (!pcpu_stopped(pcpu))
 		cpu_relax();
 	pcpu_free_lowcore(pcpu);
-	atomic_dec(&init_mm.context.attach_count);
 	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
 }
 
 void __noreturn cpu_die(void)
@@ -897,7 +887,7 @@ void __init smp_fill_possible_mask(void)
 
 	sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
 	sclp_max = min(smp_max_threads, sclp_max);
-	sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids;
+	sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
 	possible = setup_possible_cpus ?: nr_cpu_ids;
 	possible = min(possible, sclp_max);
 	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index f7dba3887a54..050b8d067d3b 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -16,21 +16,11 @@
 #include <asm/sysinfo.h>
 #include <asm/cpcmd.h>
 #include <asm/topology.h>
-
-/* Sigh, math-emu. Don't ask. */
-#include <asm/sfp-util.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
+#include <asm/fpu/api.h>
 
 int topology_max_mnest;
 
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
+static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
 {
 	register int r0 asm("0") = (fc << 28) | sel1;
 	register int r1 asm("1") = sel2;
@@ -45,9 +35,24 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2)
 		: "+d" (r0), "+d" (rc)
 		: "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
 		: "cc", "memory");
+	*lvl = ((unsigned int) r0) >> 28;
+	return rc;
+}
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	int lvl, rc;
+
+	rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
 	if (rc)
 		return rc;
-	return fc ? 0 : ((unsigned int) r0) >> 28;
+	return fc ? 0 : lvl;
 }
 EXPORT_SYMBOL(stsi);
 
@@ -414,10 +419,8 @@ subsys_initcall(create_proc_service_level);
 void s390_adjust_jiffies(void)
 {
 	struct sysinfo_1_2_2 *info;
-	const unsigned int fmil = 0x4b189680;	/* 1e7 as 32-bit float. */
-	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-	FP_DECL_EX;
-	unsigned int capability;
+	unsigned long capability;
+	struct kernel_fpu fpu;
 
 	info = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!info)
@@ -433,15 +436,25 @@ void s390_adjust_jiffies(void)
 		 * higher cpu capacity. Bogomips are the other way round.
 		 * To get to a halfway suitable number we divide 1e7
 		 * by the cpu capability number. Yes, that means a floating
-		 * point division .. math-emu here we come :-)
+		 * point division ..
 		 */
-		FP_UNPACK_SP(SA, &fmil);
-		if ((info->capability >> 23) == 0)
-			FP_FROM_INT_S(SB, (long) info->capability, 64, long);
-		else
-			FP_UNPACK_SP(SB, &info->capability);
-		FP_DIV_S(SR, SA, SB);
-		FP_TO_INT_S(capability, SR, 32, 0);
+		kernel_fpu_begin(&fpu, KERNEL_FPR);
+		asm volatile(
+			"	sfpc	%3\n"
+			"	l	%0,%1\n"
+			"	tmlh	%0,0xff80\n"
+			"	jnz	0f\n"
+			"	cefbr	%%f2,%0\n"
+			"	j	1f\n"
+			"0:	le	%%f2,%1\n"
+			"1:	cefbr	%%f0,%2\n"
+			"	debr	%%f0,%%f2\n"
+			"	cgebr	%0,5,%%f0\n"
+			: "=&d" (capability)
+			: "Q" (info->capability), "d" (10000000), "d" (0)
+			: "cc"
+			);
+		kernel_fpu_end(&fpu);
 	} else
 		/*
 		 * Really old machine without stsi block for basic
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 9409d32f285e..4e9949800562 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -39,13 +39,14 @@
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
 #include <asm/uaccess.h>
+#include <asm/facility.h>
 #include <asm/delay.h>
 #include <asm/div64.h>
 #include <asm/vdso.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/vtimer.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/cio.h>
 #include "entry.h"
 
@@ -61,6 +62,32 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
 ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
 EXPORT_SYMBOL(s390_epoch_delta_notifier);
 
+unsigned char ptff_function_mask[16];
+unsigned long lpar_offset;
+unsigned long initial_leap_seconds;
+
+/*
+ * Get time offsets with PTFF
+ */
+void __init ptff_init(void)
+{
+	struct ptff_qto qto;
+	struct ptff_qui qui;
+
+	if (!test_facility(28))
+		return;
+	ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
+
+	/* get LPAR offset */
+	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+		lpar_offset = qto.tod_epoch_difference;
+
+	/* get initial leap seconds */
+	if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
+		initial_leap_seconds = (unsigned long)
+			((long) qui.old_leap * 4096000000L);
+}
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -162,30 +189,32 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
 		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-static void etr_timing_alert(struct etr_irq_parm *);
 static void stp_timing_alert(struct stp_irq_parm *);
 
 static void timing_alert_interrupt(struct ext_code ext_code,
 				   unsigned int param32, unsigned long param64)
 {
 	inc_irq_stat(IRQEXT_TLA);
-	if (param32 & 0x00c40000)
-		etr_timing_alert((struct etr_irq_parm *) &param32);
 	if (param32 & 0x00038000)
 		stp_timing_alert((struct stp_irq_parm *) &param32);
 }
 
-static void etr_reset(void);
 static void stp_reset(void);
 
 void read_persistent_clock64(struct timespec64 *ts)
 {
-	tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
+	__u64 clock;
+
+	clock = get_tod_clock() - initial_leap_seconds;
+	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 
 void read_boot_clock64(struct timespec64 *ts)
 {
-	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+	__u64 clock;
+
+	clock = sched_clock_base_cc - initial_leap_seconds;
+	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 
 static cycle_t read_tod_clock(struct clocksource *cs)
@@ -269,7 +298,6 @@ void update_vsyscall_tz(void)
 void __init time_init(void)
 {
 	/* Reset time synchronization interfaces. */
-	etr_reset();
 	stp_reset();
 
 	/* request the clock comparator external interrupt */
@@ -337,20 +365,20 @@ static unsigned long clock_sync_flags;
 #define CLOCK_SYNC_STP		3
 
 /*
- * The synchronous get_clock function. It will write the current clock
- * value to the clock pointer and return 0 if the clock is in sync with
- * the external time source. If the clock mode is local it will return
- * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
- * reference.
+ * The get_clock function for the physical clock. It will get the current
+ * TOD clock, subtract the LPAR offset and write the result to *clock.
+ * The function returns 0 if the clock is in sync with the external time
+ * source. If the clock mode is local it will return -EOPNOTSUPP and
+ * -EAGAIN if the clock is not in sync with the external reference.
  */
-int get_sync_clock(unsigned long long *clock)
+int get_phys_clock(unsigned long long *clock)
 {
 	atomic_t *sw_ptr;
 	unsigned int sw0, sw1;
 
 	sw_ptr = &get_cpu_var(clock_sync_word);
 	sw0 = atomic_read(sw_ptr);
-	*clock = get_tod_clock();
+	*clock = get_tod_clock() - lpar_offset;
 	sw1 = atomic_read(sw_ptr);
 	put_cpu_var(clock_sync_word);
 	if (sw0 == sw1 && (sw0 & 0x80000000U))
@@ -364,7 +392,7 @@ int get_sync_clock(unsigned long long *clock)
 		return -EACCES;
 	return -EAGAIN;
 }
-EXPORT_SYMBOL(get_sync_clock);
+EXPORT_SYMBOL(get_phys_clock);
 
 /*
  * Make get_sync_clock return -EAGAIN.
@@ -416,301 +444,6 @@ static void __init time_init_wq(void)
 	time_sync_wq = create_singlethread_workqueue("timesync");
 }
 
-/*
- * External Time Reference (ETR) code.
- */
-static int etr_port0_online;
-static int etr_port1_online;
-static int etr_steai_available;
-
-static int __init early_parse_etr(char *p)
-{
-	if (strncmp(p, "off", 3) == 0)
-		etr_port0_online = etr_port1_online = 0;
-	else if (strncmp(p, "port0", 5) == 0)
-		etr_port0_online = 1;
-	else if (strncmp(p, "port1", 5) == 0)
-		etr_port1_online = 1;
-	else if (strncmp(p, "on", 2) == 0)
-		etr_port0_online = etr_port1_online = 1;
-	return 0;
-}
-early_param("etr", early_parse_etr);
-
-enum etr_event {
-	ETR_EVENT_PORT0_CHANGE,
-	ETR_EVENT_PORT1_CHANGE,
-	ETR_EVENT_PORT_ALERT,
-	ETR_EVENT_SYNC_CHECK,
-	ETR_EVENT_SWITCH_LOCAL,
-	ETR_EVENT_UPDATE,
-};
-
-/*
- * Valid bit combinations of the eacr register are (x = don't care):
- * e0 e1 dp p0 p1 ea es sl
- *  0  0  x  0	0  0  0  0  initial, disabled state
- *  0  0  x  0	1  1  0  0  port 1 online
- *  0  0  x  1	0  1  0  0  port 0 online
- *  0  0  x  1	1  1  0  0  both ports online
- *  0  1  x  0	1  1  0  0  port 1 online and usable, ETR or PPS mode
- *  0  1  x  0	1  1  0  1  port 1 online, usable and ETR mode
- *  0  1  x  0	1  1  1  0  port 1 online, usable, PPS mode, in-sync
- *  0  1  x  0	1  1  1  1  port 1 online, usable, ETR mode, in-sync
- *  0  1  x  1	1  1  0  0  both ports online, port 1 usable
- *  0  1  x  1	1  1  1  0  both ports online, port 1 usable, PPS mode, in-sync
- *  0  1  x  1	1  1  1  1  both ports online, port 1 usable, ETR mode, in-sync
- *  1  0  x  1	0  1  0  0  port 0 online and usable, ETR or PPS mode
- *  1  0  x  1	0  1  0  1  port 0 online, usable and ETR mode
- *  1  0  x  1	0  1  1  0  port 0 online, usable, PPS mode, in-sync
- *  1  0  x  1	0  1  1  1  port 0 online, usable, ETR mode, in-sync
- *  1  0  x  1	1  1  0  0  both ports online, port 0 usable
- *  1  0  x  1	1  1  1  0  both ports online, port 0 usable, PPS mode, in-sync
- *  1  0  x  1	1  1  1  1  both ports online, port 0 usable, ETR mode, in-sync
- *  1  1  x  1	1  1  1  0  both ports online & usable, ETR, in-sync
- *  1  1  x  1	1  1  1  1  both ports online & usable, ETR, in-sync
- */
-static struct etr_eacr etr_eacr;
-static u64 etr_tolec;			/* time of last eacr update */
-static struct etr_aib etr_port0;
-static int etr_port0_uptodate;
-static struct etr_aib etr_port1;
-static int etr_port1_uptodate;
-static unsigned long etr_events;
-static struct timer_list etr_timer;
-
-static void etr_timeout(unsigned long dummy);
-static void etr_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(etr_work_mutex);
-static DECLARE_WORK(etr_work, etr_work_fn);
-
-/*
- * Reset ETR attachment.
- */
-static void etr_reset(void)
-{
-	etr_eacr =  (struct etr_eacr) {
-		.e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
-		.p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
-		.es = 0, .sl = 0 };
-	if (etr_setr(&etr_eacr) == 0) {
-		etr_tolec = get_tod_clock();
-		set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-	} else if (etr_port0_online || etr_port1_online) {
-		pr_warn("The real or virtual hardware system does not provide an ETR interface\n");
-		etr_port0_online = etr_port1_online = 0;
-	}
-}
-
-static int __init etr_init(void)
-{
-	struct etr_aib aib;
-
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
-		return 0;
-	time_init_wq();
-	/* Check if this machine has the steai instruction. */
-	if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
-		etr_steai_available = 1;
-	setup_timer(&etr_timer, etr_timeout, 0UL);
-	if (etr_port0_online) {
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-	if (etr_port1_online) {
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-	return 0;
-}
-
-arch_initcall(etr_init);
-
-/*
- * Two sorts of ETR machine checks. The architecture reads:
- * "When a machine-check niterruption occurs and if a switch-to-local or
- *  ETR-sync-check interrupt request is pending but disabled, this pending
- *  disabled interruption request is indicated and is cleared".
- * Which means that we can get etr_switch_to_local events from the machine
- * check handler although the interruption condition is disabled. Lovely..
- */
-
-/*
- * Switch to local machine check. This is called when the last usable
- * ETR port goes inactive. After switch to local the clock is not in sync.
- */
-int etr_switch_to_local(void)
-{
-	if (!etr_eacr.sl)
-		return 0;
-	disable_sync_clock(NULL);
-	if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
-		etr_eacr.es = etr_eacr.sl = 0;
-		etr_setr(&etr_eacr);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * ETR sync check machine check. This is called when the ETR OTE and the
- * local clock OTE are farther apart than the ETR sync check tolerance.
- * After a ETR sync check the clock is not in sync. The machine check
- * is broadcasted to all cpus at the same time.
- */
-int etr_sync_check(void)
-{
-	if (!etr_eacr.es)
-		return 0;
-	disable_sync_clock(NULL);
-	if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
-		etr_eacr.es = 0;
-		etr_setr(&etr_eacr);
-		return 1;
-	}
-	return 0;
-}
-
-void etr_queue_work(void)
-{
-	queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * ETR timing alert. There are two causes:
- * 1) port state change, check the usability of the port
- * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
- *    sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
- *    or ETR-data word 4 (edf4) has changed.
- */
-static void etr_timing_alert(struct etr_irq_parm *intparm)
-{
-	if (intparm->pc0)
-		/* ETR port 0 state change. */
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-	if (intparm->pc1)
-		/* ETR port 1 state change. */
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-	if (intparm->eai)
-		/*
-		 * ETR port alert on either port 0, 1 or both.
-		 * Both ports are not up-to-date now.
-		 */
-		set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
-	queue_work(time_sync_wq, &etr_work);
-}
-
-static void etr_timeout(unsigned long dummy)
-{
-	set_bit(ETR_EVENT_UPDATE, &etr_events);
-	queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * Check if the etr mode is pss.
- */
-static inline int etr_mode_is_pps(struct etr_eacr eacr)
-{
-	return eacr.es && !eacr.sl;
-}
-
-/*
- * Check if the etr mode is etr.
- */
-static inline int etr_mode_is_etr(struct etr_eacr eacr)
-{
-	return eacr.es && eacr.sl;
-}
-
-/*
- * Check if the port can be used for TOD synchronization.
- * For PPS mode the port has to receive OTEs. For ETR mode
- * the port has to receive OTEs, the ETR stepping bit has to
- * be zero and the validity bits for data frame 1, 2, and 3
- * have to be 1.
- */
-static int etr_port_valid(struct etr_aib *aib, int port)
-{
-	unsigned int psc;
-
-	/* Check that this port is receiving OTEs. */
-	if (aib->tsp == 0)
-		return 0;
-
-	psc = port ? aib->esw.psc1 : aib->esw.psc0;
-	if (psc == etr_lpsc_pps_mode)
-		return 1;
-	if (psc == etr_lpsc_operational_step)
-		return !aib->esw.y && aib->slsw.v1 &&
-			aib->slsw.v2 && aib->slsw.v3;
-	return 0;
-}
-
-/*
- * Check if two ports are on the same network.
- */
-static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2)
-{
-	// FIXME: any other fields we have to compare?
-	return aib1->edf1.net_id == aib2->edf1.net_id;
-}
-
-/*
- * Wrapper for etr_stei that converts physical port states
- * to logical port states to be consistent with the output
- * of stetr (see etr_psc vs. etr_lpsc).
- */
-static void etr_steai_cv(struct etr_aib *aib, unsigned int func)
-{
-	BUG_ON(etr_steai(aib, func) != 0);
-	/* Convert port state to logical port state. */
-	if (aib->esw.psc0 == 1)
-		aib->esw.psc0 = 2;
-	else if (aib->esw.psc0 == 0 && aib->esw.p == 0)
-		aib->esw.psc0 = 1;
-	if (aib->esw.psc1 == 1)
-		aib->esw.psc1 = 2;
-	else if (aib->esw.psc1 == 0 && aib->esw.p == 1)
-		aib->esw.psc1 = 1;
-}
-
-/*
- * Check if the aib a2 is still connected to the same attachment as
- * aib a1, the etv values differ by one and a2 is valid.
- */
-static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
-{
-	int state_a1, state_a2;
-
-	/* Paranoia check: e0/e1 should better be the same. */
-	if (a1->esw.eacr.e0 != a2->esw.eacr.e0 ||
-	    a1->esw.eacr.e1 != a2->esw.eacr.e1)
-		return 0;
-
-	/* Still connected to the same etr ? */
-	state_a1 = p ? a1->esw.psc1 : a1->esw.psc0;
-	state_a2 = p ? a2->esw.psc1 : a2->esw.psc0;
-	if (state_a1 == etr_lpsc_operational_step) {
-		if (state_a2 != etr_lpsc_operational_step ||
-		    a1->edf1.net_id != a2->edf1.net_id ||
-		    a1->edf1.etr_id != a2->edf1.etr_id ||
-		    a1->edf1.etr_pn != a2->edf1.etr_pn)
-			return 0;
-	} else if (state_a2 != etr_lpsc_pps_mode)
-		return 0;
-
-	/* The ETV value of a2 needs to be ETV of a1 + 1. */
-	if (a1->edf2.etv + 1 != a2->edf2.etv)
-		return 0;
-
-	if (!etr_port_valid(a2, p))
-		return 0;
-
-	return 1;
-}
-
 struct clock_sync_data {
 	atomic_t cpus;
 	int in_sync;
@@ -747,688 +480,6 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
 	fixup_clock_comparator(sync->fixup_cc);
 }
 
-/*
- * Sync the TOD clock using the port referred to by aibp. This port
- * has to be enabled and the other port has to be disabled. The
- * last eacr update has to be more than 1.6 seconds in the past.
- */
-static int etr_sync_clock(void *data)
-{
-	static int first;
-	unsigned long long clock, old_clock, clock_delta, delay, delta;
-	struct clock_sync_data *etr_sync;
-	struct etr_aib *sync_port, *aib;
-	int port;
-	int rc;
-
-	etr_sync = data;
-
-	if (xchg(&first, 1) == 1) {
-		/* Slave */
-		clock_sync_cpu(etr_sync);
-		return 0;
-	}
-
-	/* Wait until all other cpus entered the sync function. */
-	while (atomic_read(&etr_sync->cpus) != 0)
-		cpu_relax();
-
-	port = etr_sync->etr_port;
-	aib = etr_sync->etr_aib;
-	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-	enable_sync_clock();
-
-	/* Set clock to next OTE. */
-	__ctl_set_bit(14, 21);
-	__ctl_set_bit(0, 29);
-	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
-	old_clock = get_tod_clock();
-	if (set_tod_clock(clock) == 0) {
-		__udelay(1);	/* Wait for the clock to start. */
-		__ctl_clear_bit(0, 29);
-		__ctl_clear_bit(14, 21);
-		etr_stetr(aib);
-		/* Adjust Linux timing variables. */
-		delay = (unsigned long long)
-			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		delta = adjust_time(old_clock, clock, delay);
-		clock_delta = clock - old_clock;
-		atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
-					   &clock_delta);
-		etr_sync->fixup_cc = delta;
-		fixup_clock_comparator(delta);
-		/* Verify that the clock is properly set. */
-		if (!etr_aib_follows(sync_port, aib, port)) {
-			/* Didn't work. */
-			disable_sync_clock(NULL);
-			etr_sync->in_sync = -EAGAIN;
-			rc = -EAGAIN;
-		} else {
-			etr_sync->in_sync = 1;
-			rc = 0;
-		}
-	} else {
-		/* Could not set the clock ?!? */
-		__ctl_clear_bit(0, 29);
-		__ctl_clear_bit(14, 21);
-		disable_sync_clock(NULL);
-		etr_sync->in_sync = -EAGAIN;
-		rc = -EAGAIN;
-	}
-	xchg(&first, 0);
-	return rc;
-}
-
-static int etr_sync_clock_stop(struct etr_aib *aib, int port)
-{
-	struct clock_sync_data etr_sync;
-	struct etr_aib *sync_port;
-	int follows;
-	int rc;
-
-	/* Check if the current aib is adjacent to the sync port aib. */
-	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-	follows = etr_aib_follows(sync_port, aib, port);
-	memcpy(sync_port, aib, sizeof(*aib));
-	if (!follows)
-		return -EAGAIN;
-	memset(&etr_sync, 0, sizeof(etr_sync));
-	etr_sync.etr_aib = aib;
-	etr_sync.etr_port = port;
-	get_online_cpus();
-	atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
-	rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask);
-	put_online_cpus();
-	return rc;
-}
-
-/*
- * Handle the immediate effects of the different events.
- * The port change event is used for online/offline changes.
- */
-static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
-{
-	if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events))
-		eacr.es = 0;
-	if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events))
-		eacr.es = eacr.sl = 0;
-	if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events))
-		etr_port0_uptodate = etr_port1_uptodate = 0;
-
-	if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) {
-		if (eacr.e0)
-			/*
-			 * Port change of an enabled port. We have to
-			 * assume that this can have caused an stepping
-			 * port switch.
-			 */
-			etr_tolec = get_tod_clock();
-		eacr.p0 = etr_port0_online;
-		if (!eacr.p0)
-			eacr.e0 = 0;
-		etr_port0_uptodate = 0;
-	}
-	if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) {
-		if (eacr.e1)
-			/*
-			 * Port change of an enabled port. We have to
-			 * assume that this can have caused an stepping
-			 * port switch.
-			 */
-			etr_tolec = get_tod_clock();
-		eacr.p1 = etr_port1_online;
-		if (!eacr.p1)
-			eacr.e1 = 0;
-		etr_port1_uptodate = 0;
-	}
-	clear_bit(ETR_EVENT_UPDATE, &etr_events);
-	return eacr;
-}
-
-/*
- * Set up a timer that expires after the etr_tolec + 1.6 seconds if
- * one of the ports needs an update.
- */
-static void etr_set_tolec_timeout(unsigned long long now)
-{
-	unsigned long micros;
-
-	if ((!etr_eacr.p0 || etr_port0_uptodate) &&
-	    (!etr_eacr.p1 || etr_port1_uptodate))
-		return;
-	micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0;
-	micros = (micros > 1600000) ? 0 : 1600000 - micros;
-	mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1);
-}
-
-/*
- * Set up a time that expires after 1/2 second.
- */
-static void etr_set_sync_timeout(void)
-{
-	mod_timer(&etr_timer, jiffies + HZ/2);
-}
-
-/*
- * Update the aib information for one or both ports.
- */
-static struct etr_eacr etr_handle_update(struct etr_aib *aib,
-					 struct etr_eacr eacr)
-{
-	/* With both ports disabled the aib information is useless. */
-	if (!eacr.e0 && !eacr.e1)
-		return eacr;
-
-	/* Update port0 or port1 with aib stored in etr_work_fn. */
-	if (aib->esw.q == 0) {
-		/* Information for port 0 stored. */
-		if (eacr.p0 && !etr_port0_uptodate) {
-			etr_port0 = *aib;
-			if (etr_port0_online)
-				etr_port0_uptodate = 1;
-		}
-	} else {
-		/* Information for port 1 stored. */
-		if (eacr.p1 && !etr_port1_uptodate) {
-			etr_port1 = *aib;
-			if (etr_port0_online)
-				etr_port1_uptodate = 1;
-		}
-	}
-
-	/*
-	 * Do not try to get the alternate port aib if the clock
-	 * is not in sync yet.
-	 */
-	if (!eacr.es || !check_sync_clock())
-		return eacr;
-
-	/*
-	 * If steai is available we can get the information about
-	 * the other port immediately. If only stetr is available the
-	 * data-port bit toggle has to be used.
-	 */
-	if (etr_steai_available) {
-		if (eacr.p0 && !etr_port0_uptodate) {
-			etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
-			etr_port0_uptodate = 1;
-		}
-		if (eacr.p1 && !etr_port1_uptodate) {
-			etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1);
-			etr_port1_uptodate = 1;
-		}
-	} else {
-		/*
-		 * One port was updated above, if the other
-		 * port is not uptodate toggle dp bit.
-		 */
-		if ((eacr.p0 && !etr_port0_uptodate) ||
-		    (eacr.p1 && !etr_port1_uptodate))
-			eacr.dp ^= 1;
-		else
-			eacr.dp = 0;
-	}
-	return eacr;
-}
-
-/*
- * Write new etr control register if it differs from the current one.
- * Return 1 if etr_tolec has been updated as well.
- */
-static void etr_update_eacr(struct etr_eacr eacr)
-{
-	int dp_changed;
-
-	if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0)
-		/* No change, return. */
-		return;
-	/*
-	 * The disable of an active port of the change of the data port
-	 * bit can/will cause a change in the data port.
-	 */
-	dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 ||
-		(etr_eacr.dp ^ eacr.dp) != 0;
-	etr_eacr = eacr;
-	etr_setr(&etr_eacr);
-	if (dp_changed)
-		etr_tolec = get_tod_clock();
-}
-
-/*
- * ETR work. In this function you'll find the main logic. In
- * particular this is the only function that calls etr_update_eacr(),
- * it "controls" the etr control register.
- */
-static void etr_work_fn(struct work_struct *work)
-{
-	unsigned long long now;
-	struct etr_eacr eacr;
-	struct etr_aib aib;
-	int sync_port;
-
-	/* prevent multiple execution. */
-	mutex_lock(&etr_work_mutex);
-
-	/* Create working copy of etr_eacr. */
-	eacr = etr_eacr;
-
-	/* Check for the different events and their immediate effects. */
-	eacr = etr_handle_events(eacr);
-
-	/* Check if ETR is supposed to be active. */
-	eacr.ea = eacr.p0 || eacr.p1;
-	if (!eacr.ea) {
-		/* Both ports offline. Reset everything. */
-		eacr.dp = eacr.es = eacr.sl = 0;
-		on_each_cpu(disable_sync_clock, NULL, 1);
-		del_timer_sync(&etr_timer);
-		etr_update_eacr(eacr);
-		goto out_unlock;
-	}
-
-	/* Store aib to get the current ETR status word. */
-	BUG_ON(etr_stetr(&aib) != 0);
-	etr_port0.esw = etr_port1.esw = aib.esw;	/* Copy status word. */
-	now = get_tod_clock();
-
-	/*
-	 * Update the port information if the last stepping port change
-	 * or data port change is older than 1.6 seconds.
-	 */
-	if (now >= etr_tolec + (1600000 << 12))
-		eacr = etr_handle_update(&aib, eacr);
-
-	/*
-	 * Select ports to enable. The preferred synchronization mode is PPS.
-	 * If a port can be enabled depends on a number of things:
-	 * 1) The port needs to be online and uptodate. A port is not
-	 *    disabled just because it is not uptodate, but it is only
-	 *    enabled if it is uptodate.
-	 * 2) The port needs to have the same mode (pps / etr).
-	 * 3) The port needs to be usable -> etr_port_valid() == 1
-	 * 4) To enable the second port the clock needs to be in sync.
-	 * 5) If both ports are useable and are ETR ports, the network id
-	 *    has to be the same.
-	 * The eacr.sl bit is used to indicate etr mode vs. pps mode.
-	 */
-	if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) {
-		eacr.sl = 0;
-		eacr.e0 = 1;
-		if (!etr_mode_is_pps(etr_eacr))
-			eacr.es = 0;
-		if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode)
-			eacr.e1 = 0;
-		// FIXME: uptodate checks ?
-		else if (etr_port0_uptodate && etr_port1_uptodate)
-			eacr.e1 = 1;
-		sync_port = (etr_port0_uptodate &&
-			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
-		eacr.sl = 0;
-		eacr.e0 = 0;
-		eacr.e1 = 1;
-		if (!etr_mode_is_pps(etr_eacr))
-			eacr.es = 0;
-		sync_port = (etr_port1_uptodate &&
-			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-	} else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
-		eacr.sl = 1;
-		eacr.e0 = 1;
-		if (!etr_mode_is_etr(etr_eacr))
-			eacr.es = 0;
-		if (!eacr.es || !eacr.p1 ||
-		    aib.esw.psc1 != etr_lpsc_operational_alt)
-			eacr.e1 = 0;
-		else if (etr_port0_uptodate && etr_port1_uptodate &&
-			 etr_compare_network(&etr_port0, &etr_port1))
-			eacr.e1 = 1;
-		sync_port = (etr_port0_uptodate &&
-			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
-		eacr.sl = 1;
-		eacr.e0 = 0;
-		eacr.e1 = 1;
-		if (!etr_mode_is_etr(etr_eacr))
-			eacr.es = 0;
-		sync_port = (etr_port1_uptodate &&
-			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-	} else {
-		/* Both ports not usable. */
-		eacr.es = eacr.sl = 0;
-		sync_port = -1;
-	}
-
-	/*
-	 * If the clock is in sync just update the eacr and return.
-	 * If there is no valid sync port wait for a port update.
-	 */
-	if ((eacr.es && check_sync_clock()) || sync_port < 0) {
-		etr_update_eacr(eacr);
-		etr_set_tolec_timeout(now);
-		goto out_unlock;
-	}
-
-	/*
-	 * Prepare control register for clock syncing
-	 * (reset data port bit, set sync check control.
-	 */
-	eacr.dp = 0;
-	eacr.es = 1;
-
-	/*
-	 * Update eacr and try to synchronize the clock. If the update
-	 * of eacr caused a stepping port switch (or if we have to
-	 * assume that a stepping port switch has occurred) or the
-	 * clock syncing failed, reset the sync check control bit
-	 * and set up a timer to try again after 0.5 seconds
-	 */
-	etr_update_eacr(eacr);
-	if (now < etr_tolec + (1600000 << 12) ||
-	    etr_sync_clock_stop(&aib, sync_port) != 0) {
-		/* Sync failed. Try again in 1/2 second. */
-		eacr.es = 0;
-		etr_update_eacr(eacr);
-		etr_set_sync_timeout();
-	} else
-		etr_set_tolec_timeout(now);
-out_unlock:
-	mutex_unlock(&etr_work_mutex);
-}
-
-/*
- * Sysfs interface functions
- */
-static struct bus_type etr_subsys = {
-	.name		= "etr",
-	.dev_name	= "etr",
-};
-
-static struct device etr_port0_dev = {
-	.id	= 0,
-	.bus	= &etr_subsys,
-};
-
-static struct device etr_port1_dev = {
-	.id	= 1,
-	.bus	= &etr_subsys,
-};
-
-/*
- * ETR subsys attributes
- */
-static ssize_t etr_stepping_port_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%i\n", etr_port0.esw.p);
-}
-
-static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
-
-static ssize_t etr_stepping_mode_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	char *mode_str;
-
-	if (etr_mode_is_pps(etr_eacr))
-		mode_str = "pps";
-	else if (etr_mode_is_etr(etr_eacr))
-		mode_str = "etr";
-	else
-		mode_str = "local";
-	return sprintf(buf, "%s\n", mode_str);
-}
-
-static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
-
-/*
- * ETR port attributes
- */
-static inline struct etr_aib *etr_aib_from_dev(struct device *dev)
-{
-	if (dev == &etr_port0_dev)
-		return etr_port0_online ? &etr_port0 : NULL;
-	else
-		return etr_port1_online ? &etr_port1 : NULL;
-}
-
-static ssize_t etr_online_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	unsigned int online;
-
-	online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online;
-	return sprintf(buf, "%i\n", online);
-}
-
-static ssize_t etr_online_store(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
-{
-	unsigned int value;
-
-	value = simple_strtoul(buf, NULL, 0);
-	if (value != 0 && value != 1)
-		return -EINVAL;
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
-		return -EOPNOTSUPP;
-	mutex_lock(&clock_sync_mutex);
-	if (dev == &etr_port0_dev) {
-		if (etr_port0_online == value)
-			goto out;	/* Nothing to do. */
-		etr_port0_online = value;
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		else
-			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	} else {
-		if (etr_port1_online == value)
-			goto out;	/* Nothing to do. */
-		etr_port1_online = value;
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		else
-			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-out:
-	mutex_unlock(&clock_sync_mutex);
-	return count;
-}
-
-static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store);
-
-static ssize_t etr_stepping_control_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
-		       etr_eacr.e0 : etr_eacr.e1);
-}
-
-static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
-
-static ssize_t etr_mode_code_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	if (!etr_port0_online && !etr_port1_online)
-		/* Status word is not uptodate if both ports are offline. */
-		return -ENODATA;
-	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
-		       etr_port0.esw.psc0 : etr_port0.esw.psc1);
-}
-
-static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL);
-
-static ssize_t etr_untuned_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.u);
-}
-
-static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL);
-
-static ssize_t etr_network_id_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.net_id);
-}
-
-static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL);
-
-static ssize_t etr_id_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.etr_id);
-}
-
-static DEVICE_ATTR(id, 0400, etr_id_show, NULL);
-
-static ssize_t etr_port_number_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.etr_pn);
-}
-
-static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL);
-
-static ssize_t etr_coupled_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.c);
-}
-
-static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL);
-
-static ssize_t etr_local_time_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.blto);
-}
-
-static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL);
-
-static ssize_t etr_utc_offset_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.buo);
-}
-
-static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
-
-static struct device_attribute *etr_port_attributes[] = {
-	&dev_attr_online,
-	&dev_attr_stepping_control,
-	&dev_attr_state_code,
-	&dev_attr_untuned,
-	&dev_attr_network,
-	&dev_attr_id,
-	&dev_attr_port,
-	&dev_attr_coupled,
-	&dev_attr_local_time,
-	&dev_attr_utc_offset,
-	NULL
-};
-
-static int __init etr_register_port(struct device *dev)
-{
-	struct device_attribute **attr;
-	int rc;
-
-	rc = device_register(dev);
-	if (rc)
-		goto out;
-	for (attr = etr_port_attributes; *attr; attr++) {
-		rc = device_create_file(dev, *attr);
-		if (rc)
-			goto out_unreg;
-	}
-	return 0;
-out_unreg:
-	for (; attr >= etr_port_attributes; attr--)
-		device_remove_file(dev, *attr);
-	device_unregister(dev);
-out:
-	return rc;
-}
-
-static void __init etr_unregister_port(struct device *dev)
-{
-	struct device_attribute **attr;
-
-	for (attr = etr_port_attributes; *attr; attr++)
-		device_remove_file(dev, *attr);
-	device_unregister(dev);
-}
-
-static int __init etr_init_sysfs(void)
-{
-	int rc;
-
-	rc = subsys_system_register(&etr_subsys, NULL);
-	if (rc)
-		goto out;
-	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-	if (rc)
-		goto out_unreg_subsys;
-	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-	if (rc)
-		goto out_remove_stepping_port;
-	rc = etr_register_port(&etr_port0_dev);
-	if (rc)
-		goto out_remove_stepping_mode;
-	rc = etr_register_port(&etr_port1_dev);
-	if (rc)
-		goto out_remove_port0;
-	return 0;
-
-out_remove_port0:
-	etr_unregister_port(&etr_port0_dev);
-out_remove_stepping_mode:
-	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-out_remove_stepping_port:
-	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-out_unreg_subsys:
-	bus_unregister(&etr_subsys);
-out:
-	return rc;
-}
-
-device_initcall(etr_init_sysfs);
-
 /*
  * Server Time Protocol (STP) code.
  */
@@ -1455,7 +506,7 @@ static void __init stp_reset(void)
 	int rc;
 
 	stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
 	if (rc == 0)
 		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
 	else if (stp_online) {
@@ -1533,6 +584,7 @@ static int stp_sync_clock(void *data)
 	static int first;
 	unsigned long long old_clock, delta, new_clock, clock_delta;
 	struct clock_sync_data *stp_sync;
+	struct ptff_qto qto;
 	int rc;
 
 	stp_sync = data;
@@ -1554,11 +606,14 @@ static int stp_sync_clock(void *data)
 	    stp_info.todoff[2] || stp_info.todoff[3] ||
 	    stp_info.tmd != 2) {
 		old_clock = get_tod_clock();
-		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta);
 		if (rc == 0) {
-			new_clock = get_tod_clock();
+			new_clock = old_clock + clock_delta;
 			delta = adjust_time(old_clock, new_clock, 0);
-			clock_delta = new_clock - old_clock;
+			if (ptff_query(PTFF_QTO) &&
+			    ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+				/* Update LPAR offset */
+				lpar_offset = qto.tod_epoch_difference;
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 						   0, &clock_delta);
 			fixup_clock_comparator(delta);
@@ -1590,12 +645,12 @@ static void stp_work_fn(struct work_struct *work)
 	mutex_lock(&stp_work_mutex);
 
 	if (!stp_online) {
-		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
 		del_timer_sync(&stp_timer);
 		goto out_unlock;
 	}
 
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
 	if (rc)
 		goto out_unlock;
 
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 64298a867589..e959c02e0cac 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn);
  */
 static struct mask_info socket_info;
 static struct mask_info book_info;
+static struct mask_info drawer_info;
 
 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
@@ -79,10 +80,10 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 	return mask;
 }
 
-static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
-					  struct mask_info *book,
-					  struct mask_info *socket,
-					  int one_socket_per_cpu)
+static void add_cpus_to_mask(struct topology_core *tl_core,
+			     struct mask_info *drawer,
+			     struct mask_info *book,
+			     struct mask_info *socket)
 {
 	struct cpu_topology_s390 *topo;
 	unsigned int core;
@@ -97,21 +98,17 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
 			continue;
 		for (i = 0; i <= smp_cpu_mtid; i++) {
 			topo = &per_cpu(cpu_topology, lcpu + i);
+			topo->drawer_id = drawer->id;
 			topo->book_id = book->id;
+			topo->socket_id = socket->id;
 			topo->core_id = rcore;
 			topo->thread_id = lcpu + i;
+			cpumask_set_cpu(lcpu + i, &drawer->mask);
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
-			if (one_socket_per_cpu)
-				topo->socket_id = rcore;
-			else
-				topo->socket_id = socket->id;
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 		}
-		if (one_socket_per_cpu)
-			socket = socket->next;
 	}
-	return socket;
 }
 
 static void clear_masks(void)
@@ -128,6 +125,11 @@ static void clear_masks(void)
 		cpumask_clear(&info->mask);
 		info = info->next;
 	}
+	info = &drawer_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
 }
 
 static union topology_entry *next_tle(union topology_entry *tle)
@@ -137,16 +139,22 @@ static union topology_entry *next_tle(union topology_entry *tle)
 	return (union topology_entry *)((struct topology_container *)tle + 1);
 }
 
-static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
+static void tl_to_masks(struct sysinfo_15_1_x *info)
 {
 	struct mask_info *socket = &socket_info;
 	struct mask_info *book = &book_info;
+	struct mask_info *drawer = &drawer_info;
 	union topology_entry *tle, *end;
 
+	clear_masks();
 	tle = info->tle;
 	end = (union topology_entry *)((unsigned long)info + info->length);
 	while (tle < end) {
 		switch (tle->nl) {
+		case 3:
+			drawer = drawer->next;
+			drawer->id = tle->container.id;
+			break;
 		case 2:
 			book = book->next;
 			book->id = tle->container.id;
@@ -156,32 +164,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
 			socket->id = tle->container.id;
 			break;
 		case 0:
-			add_cpus_to_mask(&tle->cpu, book, socket, 0);
-			break;
-		default:
-			clear_masks();
-			return;
-		}
-		tle = next_tle(tle);
-	}
-}
-
-static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
-{
-	struct mask_info *socket = &socket_info;
-	struct mask_info *book = &book_info;
-	union topology_entry *tle, *end;
-
-	tle = info->tle;
-	end = (union topology_entry *)((unsigned long)info + info->length);
-	while (tle < end) {
-		switch (tle->nl) {
-		case 1:
-			book = book->next;
-			book->id = tle->container.id;
-			break;
-		case 0:
-			socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+			add_cpus_to_mask(&tle->cpu, drawer, book, socket);
 			break;
 		default:
 			clear_masks();
@@ -191,22 +174,6 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
 	}
 }
 
-static void tl_to_masks(struct sysinfo_15_1_x *info)
-{
-	struct cpuid cpu_id;
-
-	get_cpu_id(&cpu_id);
-	clear_masks();
-	switch (cpu_id.machine) {
-	case 0x2097:
-	case 0x2098:
-		__tl_to_masks_z10(info);
-		break;
-	default:
-		__tl_to_masks_generic(info);
-	}
-}
-
 static void topology_update_polarization_simple(void)
 {
 	int cpu;
@@ -257,11 +224,13 @@ static void update_cpu_masks(void)
 		topo->thread_mask = cpu_thread_map(cpu);
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
+		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
 		if (!MACHINE_HAS_TOPOLOGY) {
 			topo->thread_id = cpu;
 			topo->core_id = cpu;
 			topo->socket_id = cpu;
 			topo->book_id = cpu;
+			topo->drawer_id = cpu;
 		}
 	}
 	numa_update_cpu_topology();
@@ -269,10 +238,7 @@ static void update_cpu_masks(void)
 
 void store_topology(struct sysinfo_15_1_x *info)
 {
-	if (topology_max_mnest >= 3)
-		stsi(info, 15, 1, 3);
-	else
-		stsi(info, 15, 1, 2);
+	stsi(info, 15, 1, min(topology_max_mnest, 4));
 }
 
 int arch_update_cpu_topology(void)
@@ -442,6 +408,11 @@ static const struct cpumask *cpu_book_mask(int cpu)
 	return &per_cpu(cpu_topology, cpu).book_mask;
 }
 
+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+	return &per_cpu(cpu_topology, cpu).drawer_mask;
+}
+
 static int __init early_parse_topology(char *p)
 {
 	return kstrtobool(p, &topology_enabled);
@@ -452,6 +423,7 @@ static struct sched_domain_topology_level s390_topology[] = {
 	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+	{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
 	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
 	{ NULL, },
 };
@@ -487,6 +459,7 @@ static int __init s390_topology_init(void)
 	printk(KERN_CONT " / %d\n", info->mnest);
 	alloc_masks(info, &socket_info, 1);
 	alloc_masks(info, &book_info, 2);
+	alloc_masks(info, &drawer_info, 3);
 	set_sched_topology(s390_topology);
 	return 0;
 }
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index f9c459586649..68145456fee2 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -1,5 +1,7 @@
 # List of files in the vdso, has to be asm only for now
 
+KCOV_INSTRUMENT := n
+
 obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 
 # Build rules
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 058659c1b8cf..0b0fd22c869a 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,5 +1,7 @@
 # List of files in the vdso, has to be asm only for now
 
+KCOV_INSTRUMENT := n
+
 obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 
 # Build rules
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 0f41a8286378..429bfd111961 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -4,6 +4,16 @@
 
 #include <asm/thread_info.h>
 #include <asm/page.h>
+
+/*
+ * Put .bss..swapper_pg_dir as the first thing in .bss. This will
+ * make sure it has 16k alignment.
+ */
+#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
+
+/* Handle ro_after_init data on our own. */
+#define RO_AFTER_INIT_DATA
+
 #include <asm-generic/vmlinux.lds.h>
 
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
@@ -49,7 +59,14 @@ SECTIONS
 	_eshared = .;		/* End of shareable data */
 	_sdata = .;		/* Start of data section */
 
-	EXCEPTION_TABLE(16) :data
+	. = ALIGN(PAGE_SIZE);
+	__start_ro_after_init = .;
+	.data..ro_after_init : {
+		 *(.data..ro_after_init)
+	}
+	EXCEPTION_TABLE(16)
+	. = ALIGN(PAGE_SIZE);
+	__end_ro_after_init = .;
 
 	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
 
@@ -81,7 +98,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 
-	BSS_SECTION(0, 2, 0)
+	BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
 
 	_end = . ;
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 43f2a2b80490..6f5c344cd785 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,7 +28,7 @@
 #include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include <asm/nmi.h>
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index b647d5ff0ad9..e390bbb16443 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -236,6 +236,26 @@ char * strrchr(const char * s, int c)
 }
 EXPORT_SYMBOL(strrchr);
 
+static inline int clcle(const char *s1, unsigned long l1,
+			const char *s2, unsigned long l2,
+			int *diff)
+{
+	register unsigned long r2 asm("2") = (unsigned long) s1;
+	register unsigned long r3 asm("3") = (unsigned long) l2;
+	register unsigned long r4 asm("4") = (unsigned long) s2;
+	register unsigned long r5 asm("5") = (unsigned long) l2;
+	int cc;
+
+	asm volatile ("0: clcle %1,%3,0\n"
+		      "   jo    0b\n"
+		      "   ipm   %0\n"
+		      "   srl   %0,28"
+		      : "=&d" (cc), "+a" (r2), "+a" (r3),
+			"+a" (r4), "+a" (r5) : : "cc");
+	*diff = *(char *)r2 - *(char *)r4;
+	return cc;
+}
+
 /**
  * strstr - Find the first substring in a %NUL terminated string
  * @s1: The string to be searched
@@ -250,18 +270,9 @@ char * strstr(const char * s1,const char * s2)
 		return (char *) s1;
 	l1 = __strend(s1) - s1;
 	while (l1-- >= l2) {
-		register unsigned long r2 asm("2") = (unsigned long) s1;
-		register unsigned long r3 asm("3") = (unsigned long) l2;
-		register unsigned long r4 asm("4") = (unsigned long) s2;
-		register unsigned long r5 asm("5") = (unsigned long) l2;
-		int cc;
-
-		asm volatile ("0: clcle %1,%3,0\n"
-			      "   jo    0b\n"
-			      "   ipm   %0\n"
-			      "   srl   %0,28"
-			      : "=&d" (cc), "+a" (r2), "+a" (r3),
-			        "+a" (r4), "+a" (r5) : : "cc" );
+		int cc, dummy;
+
+		cc = clcle(s1, l1, s2, l2, &dummy);
 		if (!cc)
 			return (char *) s1;
 		s1++;
@@ -302,20 +313,11 @@ EXPORT_SYMBOL(memchr);
  */
 int memcmp(const void *cs, const void *ct, size_t n)
 {
-	register unsigned long r2 asm("2") = (unsigned long) cs;
-	register unsigned long r3 asm("3") = (unsigned long) n;
-	register unsigned long r4 asm("4") = (unsigned long) ct;
-	register unsigned long r5 asm("5") = (unsigned long) n;
-	int ret;
+	int ret, diff;
 
-	asm volatile ("0: clcle %1,%3,0\n"
-		      "   jo    0b\n"
-		      "   ipm   %0\n"
-		      "   srl   %0,28"
-		      : "=&d" (ret), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5)
-		      : : "cc" );
+	ret = clcle(cs, n, ct, n, &diff);
 	if (ret)
-		ret = *(char *) r2 - *(char *) r4;
+		ret = diff;
 	return ret;
 }
 EXPORT_SYMBOL(memcmp);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index ae4de559e3a0..d96596128e9f 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -49,7 +49,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
 		"   jnm   5b\n"
 		"   ex    %4,0(%3)\n"
 		"   j     8f\n"
-		"7:slgr  %0,%0\n"
+		"7: slgr  %0,%0\n"
 		"8:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
 		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
@@ -93,7 +93,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 		"   jnm   6b\n"
 		"   ex    %4,0(%3)\n"
 		"   j     9f\n"
-		"8:slgr  %0,%0\n"
+		"8: slgr  %0,%0\n"
 		"9: sacf  768\n"
 		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
 		EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
@@ -266,7 +266,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
 		"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
 		"   slgr  %0,%3\n"
 		"   j	  5f\n"
-		"4:slgr  %0,%0\n"
+		"4: slgr  %0,%0\n"
 		"5:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
 		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 8556d6be9b54..861880df12c7 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
 		pud = pud_offset(pgd, addr);
 		if (!pud_none(*pud))
 			if (pud_large(*pud)) {
-				prot = pud_val(*pud) & _REGION3_ENTRY_RO;
+				prot = pud_val(*pud) & _REGION_ENTRY_PROTECT;
 				note_page(m, st, prot, 2);
 			} else
 				walk_pmd_level(m, st, pud, addr);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 19288c1b36d3..25783dc3c813 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -456,7 +456,7 @@ retry:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 	/* No reason to continue if interrupted by SIGKILL. */
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
 		fault = VM_FAULT_SIGNAL;
@@ -624,7 +624,7 @@ void pfault_fini(void)
 	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
 		"	diag	%0,0,0x258\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: : "a" (&refbk), "m" (refbk) : "cc");
 }
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index cace818d86eb..063c721ec0dc 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL_GPL(gmap_alloc);
 static void gmap_flush_tlb(struct gmap *gmap)
 {
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, gmap->asce);
+		__tlb_flush_idte(gmap->asce);
 	else
 		__tlb_flush_global();
 }
@@ -124,7 +124,7 @@ void gmap_free(struct gmap *gmap)
 
 	/* Flush tlb. */
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, gmap->asce);
+		__tlb_flush_idte(gmap->asce);
 	else
 		__tlb_flush_global();
 
@@ -430,6 +430,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	VM_BUG_ON(pgd_none(*pgd));
 	pud = pud_offset(pgd, vmaddr);
 	VM_BUG_ON(pud_none(*pud));
+	/* large puds cannot yet be handled */
+	if (pud_large(*pud))
+		return -EFAULT;
 	pmd = pmd_offset(pud, vmaddr);
 	VM_BUG_ON(pmd_none(*pmd));
 	/* large pmds cannot yet be handled */
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index a8a6765f1a51..adb0c34bf431 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -128,6 +128,44 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 	return 1;
 }
 
+static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	struct page *head, *page;
+	unsigned long mask;
+	int refs;
+
+	mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
+	if ((pud_val(pud) & mask) != 0)
+		return 0;
+	VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
+
+	refs = 0;
+	head = pud_page(pud);
+	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+	do {
+		VM_BUG_ON_PAGE(compound_head(page) != head, page);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pud_val(pud) != pud_val(*pudp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	return 1;
+}
+
 static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
@@ -144,7 +182,12 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 		next = pud_addr_end(addr, end);
 		if (pud_none(pud))
 			return 0;
-		if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
+		if (unlikely(pud_large(pud))) {
+			if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
+					  nr))
+				return 0;
+		} else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
+					  nr))
 			return 0;
 	} while (pudp++, addr = next, addr != end);
 
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 1b5e8983f4f3..e19d853883be 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -1,19 +1,22 @@
 /*
  *  IBM System z Huge TLB Page Support for Kernel.
  *
- *    Copyright IBM Corp. 2007
+ *    Copyright IBM Corp. 2007,2016
  *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  */
 
+#define KMSG_COMPONENT "hugetlb"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 
-static inline pmd_t __pte_to_pmd(pte_t pte)
+static inline unsigned long __pte_to_rste(pte_t pte)
 {
-	pmd_t pmd;
+	unsigned long rste;
 
 	/*
-	 * Convert encoding		  pte bits	   pmd bits
+	 * Convert encoding		  pte bits	pmd / pud bits
 	 *				lIR.uswrdy.p	dy..R...I...wr
 	 * empty			010.000000.0 -> 00..0...1...00
 	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
@@ -33,25 +36,31 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
 	 *	    u unused, l large
 	 */
 	if (pte_present(pte)) {
-		pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4;
-		pmd_val(pmd) |=	(pte_val(pte) & _PAGE_INVALID) >> 5;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
+		rste = pte_val(pte) & PAGE_MASK;
+		rste |= (pte_val(pte) & _PAGE_READ) >> 4;
+		rste |= (pte_val(pte) & _PAGE_WRITE) >> 4;
+		rste |= (pte_val(pte) & _PAGE_INVALID) >> 5;
+		rste |= (pte_val(pte) & _PAGE_PROTECT);
+		rste |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+		rste |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+		rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
 	} else
-		pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
-	return pmd;
+		rste = _SEGMENT_ENTRY_INVALID;
+	return rste;
 }
 
-static inline pte_t __pmd_to_pte(pmd_t pmd)
+static inline pte_t __rste_to_pte(unsigned long rste)
 {
+	int present;
 	pte_t pte;
 
+	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		present = pud_present(__pud(rste));
+	else
+		present = pmd_present(__pmd(rste));
+
 	/*
-	 * Convert encoding		   pmd bits	    pte bits
+	 * Convert encoding		pmd / pud bits	    pte bits
 	 *				dy..R...I...wr	  lIR.uswrdy.p
 	 * empty			00..0...1...00 -> 010.000000.0
 	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
@@ -70,16 +79,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
 	 *	    u unused, l large
 	 */
-	if (pmd_present(pmd)) {
-		pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
+	if (present) {
+		pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
 		pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT);
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
 	} else
 		pte_val(pte) = _PAGE_INVALID;
 	return pte;
@@ -88,27 +97,33 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte)
 {
-	pmd_t pmd = __pte_to_pmd(pte);
-
-	pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
-	*(pmd_t *) ptep = pmd;
+	unsigned long rste = __pte_to_rste(pte);
+
+	/* Set correct table type for 2G hugepages */
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
+	else
+		rste |= _SEGMENT_ENTRY_LARGE;
+	pte_val(*ptep) = rste;
 }
 
 pte_t huge_ptep_get(pte_t *ptep)
 {
-	pmd_t pmd = *(pmd_t *) ptep;
-
-	return __pmd_to_pte(pmd);
+	return __rste_to_pte(pte_val(*ptep));
 }
 
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 {
+	pte_t pte = huge_ptep_get(ptep);
 	pmd_t *pmdp = (pmd_t *) ptep;
-	pmd_t old;
+	pud_t *pudp = (pud_t *) ptep;
 
-	old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
-	return __pmd_to_pte(old);
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
+	else
+		pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+	return pte;
 }
 
 pte_t *huge_pte_alloc(struct mm_struct *mm,
@@ -120,8 +135,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 
 	pgdp = pgd_offset(mm, addr);
 	pudp = pud_alloc(mm, pgdp, addr);
-	if (pudp)
-		pmdp = pmd_alloc(mm, pudp, addr);
+	if (pudp) {
+		if (sz == PUD_SIZE)
+			return (pte_t *) pudp;
+		else if (sz == PMD_SIZE)
+			pmdp = pmd_alloc(mm, pudp, addr);
+	}
 	return (pte_t *) pmdp;
 }
 
@@ -134,8 +153,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	pgdp = pgd_offset(mm, addr);
 	if (pgd_present(*pgdp)) {
 		pudp = pud_offset(pgdp, addr);
-		if (pud_present(*pudp))
+		if (pud_present(*pudp)) {
+			if (pud_large(*pudp))
+				return (pte_t *) pudp;
 			pmdp = pmd_offset(pudp, addr);
+		}
 	}
 	return (pte_t *) pmdp;
 }
@@ -147,5 +169,34 @@ int pmd_huge(pmd_t pmd)
 
 int pud_huge(pud_t pud)
 {
-	return 0;
+	return pud_large(pud);
+}
+
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+		pud_t *pud, int flags)
+{
+	if (flags & FOLL_GET)
+		return NULL;
+
+	return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long size;
+	char *string = opt;
+
+	size = memparse(opt, &opt);
+	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
+		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else {
+		pr_err("hugepagesz= specifies an unsupported page size %s\n",
+			string);
+		return 0;
+	}
+	return 1;
 }
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 2489b2e917c8..f56a39bd8ba6 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -40,7 +40,7 @@
 #include <asm/ctl_reg.h>
 #include <asm/sclp.h>
 
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
 
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
@@ -111,17 +111,16 @@ void __init paging_init(void)
 
 void mark_rodata_ro(void)
 {
-	/* Text and rodata are already protected. Nothing to do here. */
-	pr_info("Write protecting the kernel read-only data: %luk\n",
-		((unsigned long)&_eshared - (unsigned long)&_stext) >> 10);
+	unsigned long size = __end_ro_after_init - __start_ro_after_init;
+
+	set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+	pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
 }
 
 void __init mem_init(void)
 {
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
-	atomic_set(&init_mm.context.attach_count, 1);
 
 	set_max_mapnr(max_low_pfn);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index a90d45e9dfb0..3330ea124eec 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -34,20 +34,25 @@ static int __init cmma(char *str)
 }
 __setup("cmma=", cmma);
 
-void __init cmma_init(void)
+static inline int cmma_test_essa(void)
 {
 	register unsigned long tmp asm("0") = 0;
 	register int rc asm("1") = -EOPNOTSUPP;
 
-	if (!cmma_flag)
-		return;
 	asm volatile(
 		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
 		"0:     la      %0,0\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
 		: "+&d" (rc), "+&d" (tmp));
-	if (rc)
+	return rc;
+}
+
+void __init cmma_init(void)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_test_essa())
 		cmma_flag = 0;
 }
 
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index f2a5c29a97e9..7104ffb5a67f 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 
-#if PAGE_DEFAULT_KEY
 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
 {
 	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
@@ -22,6 +21,8 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 {
 	unsigned long boundary, size;
 
+	if (!PAGE_DEFAULT_KEY)
+		return;
 	while (start < end) {
 		if (MACHINE_HAS_EDAT1) {
 			/* set storage keys for a 1MB frame */
@@ -38,56 +39,254 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 		start += PAGE_SIZE;
 	}
 }
-#endif
 
-static pte_t *walk_page_table(unsigned long addr)
+#ifdef CONFIG_PROC_FS
+atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+
+void arch_report_meminfo(struct seq_file *m)
 {
-	pgd_t *pgdp;
-	pud_t *pudp;
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
+	seq_printf(m, "DirectMap1M:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
+	seq_printf(m, "DirectMap2G:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
+}
+#endif /* CONFIG_PROC_FS */
+
+static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
+		    unsigned long dtt)
+{
+	unsigned long table, mask;
+
+	mask = 0;
+	if (MACHINE_HAS_EDAT2) {
+		switch (dtt) {
+		case CRDTE_DTT_REGION3:
+			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
+			break;
+		case CRDTE_DTT_SEGMENT:
+			mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+			break;
+		case CRDTE_DTT_PAGE:
+			mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+			break;
+		}
+		table = (unsigned long)old & mask;
+		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
+	} else if (MACHINE_HAS_IDTE) {
+		cspg(old, *old, new);
+	} else {
+		csp((unsigned int *)old + 1, *old, new);
+	}
+}
+
+struct cpa {
+	unsigned int set_ro	: 1;
+	unsigned int clear_ro	: 1;
+};
+
+static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	pte_t *ptep, new;
+
+	ptep = pte_offset(pmdp, addr);
+	do {
+		if (pte_none(*ptep))
+			return -EINVAL;
+		if (cpa.set_ro)
+			new = pte_wrprotect(*ptep);
+		else if (cpa.clear_ro)
+			new = pte_mkwrite(pte_mkdirty(*ptep));
+		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
+		ptep++;
+		addr += PAGE_SIZE;
+		cond_resched();
+	} while (addr < end);
+	return 0;
+}
+
+static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
+{
+	unsigned long pte_addr, prot;
+	pte_t *pt_dir, *ptep;
+	pmd_t new;
+	int i, ro;
+
+	pt_dir = vmem_pte_alloc();
+	if (!pt_dir)
+		return -ENOMEM;
+	pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
+	ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
+	prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+	ptep = pt_dir;
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte_val(*ptep) = pte_addr | prot;
+		pte_addr += PAGE_SIZE;
+		ptep++;
+	}
+	pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY;
+	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+	update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
+	update_page_count(PG_DIRECT_MAP_1M, -1);
+	return 0;
+}
+
+static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, struct cpa cpa)
+{
+	pmd_t new;
+
+	if (cpa.set_ro)
+		new = pmd_wrprotect(*pmdp);
+	else if (cpa.clear_ro)
+		new = pmd_mkwrite(pmd_mkdirty(*pmdp));
+	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+}
+
+static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	unsigned long next;
 	pmd_t *pmdp;
-	pte_t *ptep;
+	int rc = 0;
 
-	pgdp = pgd_offset_k(addr);
-	if (pgd_none(*pgdp))
-		return NULL;
-	pudp = pud_offset(pgdp, addr);
-	if (pud_none(*pudp) || pud_large(*pudp))
-		return NULL;
 	pmdp = pmd_offset(pudp, addr);
-	if (pmd_none(*pmdp) || pmd_large(*pmdp))
-		return NULL;
-	ptep = pte_offset_kernel(pmdp, addr);
-	if (pte_none(*ptep))
-		return NULL;
-	return ptep;
+	do {
+		if (pmd_none(*pmdp))
+			return -EINVAL;
+		next = pmd_addr_end(addr, end);
+		if (pmd_large(*pmdp)) {
+			if (addr & ~PMD_MASK || addr + PMD_SIZE > next) {
+				rc = split_pmd_page(pmdp, addr);
+				if (rc)
+					return rc;
+				continue;
+			}
+			modify_pmd_page(pmdp, addr, cpa);
+		} else {
+			rc = walk_pte_level(pmdp, addr, next, cpa);
+			if (rc)
+				return rc;
+		}
+		pmdp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end);
+	return rc;
 }
 
-static void change_page_attr(unsigned long addr, int numpages,
-			     pte_t (*set) (pte_t))
+static int split_pud_page(pud_t *pudp, unsigned long addr)
 {
-	pte_t *ptep;
-	int i;
+	unsigned long pmd_addr, prot;
+	pmd_t *pm_dir, *pmdp;
+	pud_t new;
+	int i, ro;
 
-	for (i = 0; i < numpages; i++) {
-		ptep = walk_page_table(addr);
-		if (WARN_ON_ONCE(!ptep))
-			break;
-		*ptep = set(*ptep);
-		addr += PAGE_SIZE;
+	pm_dir = vmem_pmd_alloc();
+	if (!pm_dir)
+		return -ENOMEM;
+	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
+	ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
+	prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
+	pmdp = pm_dir;
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd_val(*pmdp) = pmd_addr | prot;
+		pmd_addr += PMD_SIZE;
+		pmdp++;
 	}
-	__tlb_flush_kernel();
+	pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY;
+	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+	update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
+	update_page_count(PG_DIRECT_MAP_2G, -1);
+	return 0;
+}
+
+static void modify_pud_page(pud_t *pudp, unsigned long addr, struct cpa cpa)
+{
+	pud_t new;
+
+	if (cpa.set_ro)
+		new = pud_wrprotect(*pudp);
+	else if (cpa.clear_ro)
+		new = pud_mkwrite(pud_mkdirty(*pudp));
+	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+}
+
+static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	unsigned long next;
+	pud_t *pudp;
+	int rc = 0;
+
+	pudp = pud_offset(pgd, addr);
+	do {
+		if (pud_none(*pudp))
+			return -EINVAL;
+		next = pud_addr_end(addr, end);
+		if (pud_large(*pudp)) {
+			if (addr & ~PUD_MASK || addr + PUD_SIZE > next) {
+				rc = split_pud_page(pudp, addr);
+				if (rc)
+					break;
+				continue;
+			}
+			modify_pud_page(pudp, addr, cpa);
+		} else {
+			rc = walk_pmd_level(pudp, addr, next, cpa);
+		}
+		pudp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end && !rc);
+	return rc;
+}
+
+static DEFINE_MUTEX(cpa_mutex);
+
+static int change_page_attr(unsigned long addr, unsigned long end,
+			    struct cpa cpa)
+{
+	unsigned long next;
+	int rc = -EINVAL;
+	pgd_t *pgdp;
+
+	if (end >= MODULES_END)
+		return -EINVAL;
+	mutex_lock(&cpa_mutex);
+	pgdp = pgd_offset_k(addr);
+	do {
+		if (pgd_none(*pgdp))
+			break;
+		next = pgd_addr_end(addr, end);
+		rc = walk_pud_level(pgdp, addr, next, cpa);
+		if (rc)
+			break;
+		cond_resched();
+	} while (pgdp++, addr = next, addr < end && !rc);
+	mutex_unlock(&cpa_mutex);
+	return rc;
 }
 
 int set_memory_ro(unsigned long addr, int numpages)
 {
-	change_page_attr(addr, numpages, pte_wrprotect);
-	return 0;
+	struct cpa cpa = {
+		.set_ro = 1,
+	};
+
+	addr &= PAGE_MASK;
+	return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
 }
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
-	change_page_attr(addr, numpages, pte_mkwrite);
-	return 0;
+	struct cpa cpa = {
+		.clear_ro = 1,
+	};
+
+	addr &= PAGE_MASK;
+	return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
 }
 
 /* not possible */
@@ -138,7 +337,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 		nr = min(numpages - i, nr);
 		if (enable) {
 			for (j = 0; j < nr; j++) {
-				pte_val(*pte) = __pa(address);
+				pte_val(*pte) = address | pgprot_val(PAGE_KERNEL);
 				address += PAGE_SIZE;
 				pte++;
 			}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index e8b5962ac12a..e2565d2d0c32 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 			return table;
 	}
 	/* Allocate a fresh page */
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	page = alloc_page(GFP_KERNEL);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4324b87f9398..b98d1a152d46 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -27,40 +27,37 @@
 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
 {
-	int active, count;
 	pte_t old;
 
 	old = *ptep;
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		__ptep_ipte_local(addr, ptep);
 	else
 		__ptep_ipte(addr, ptep);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
 static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 				    unsigned long addr, pte_t *ptep)
 {
-	int active, count;
 	pte_t old;
 
 	old = *ptep;
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if ((count & 0xffff) <= active) {
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(&mm->context.cpu_attach_mask,
+			  cpumask_of(smp_processor_id()))) {
 		pte_val(*ptep) |= _PAGE_INVALID;
 		mm->context.flush_mm = 1;
 	} else
 		__ptep_ipte(addr, ptep);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
@@ -70,7 +67,6 @@ static inline pgste_t pgste_get_lock(pte_t *ptep)
 #ifdef CONFIG_PGSTE
 	unsigned long old;
 
-	preempt_disable();
 	asm(
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
@@ -93,7 +89,6 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 		: "=Q" (ptep[PTRS_PER_PTE])
 		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
 		: "cc", "memory");
-	preempt_enable();
 #endif
 }
 
@@ -230,9 +225,11 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pte_t old;
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_direct(mm, addr, ptep);
 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	preempt_enable();
 	return old;
 }
 EXPORT_SYMBOL(ptep_xchg_direct);
@@ -243,9 +240,11 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pte_t old;
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_lazy(mm, addr, ptep);
 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	preempt_enable();
 	return old;
 }
 EXPORT_SYMBOL(ptep_xchg_lazy);
@@ -256,6 +255,7 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pte_t old;
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_lazy(mm, addr, ptep);
 	if (mm_has_pgste(mm)) {
@@ -279,13 +279,13 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 	} else {
 		*ptep = pte;
 	}
+	preempt_enable();
 }
 EXPORT_SYMBOL(ptep_modify_prot_commit);
 
 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pmd_t *pmdp)
 {
-	int active, count;
 	pmd_t old;
 
 	old = *pmdp;
@@ -295,36 +295,34 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 		__pmdp_csp(pmdp);
 		return old;
 	}
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		__pmdp_idte_local(addr, pmdp);
 	else
 		__pmdp_idte(addr, pmdp);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 				    unsigned long addr, pmd_t *pmdp)
 {
-	int active, count;
 	pmd_t old;
 
 	old = *pmdp;
 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if ((count & 0xffff) <= active) {
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(&mm->context.cpu_attach_mask,
+			  cpumask_of(smp_processor_id()))) {
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		mm->context.flush_mm = 1;
 	} else if (MACHINE_HAS_IDTE)
 		__pmdp_idte(addr, pmdp);
 	else
 		__pmdp_csp(pmdp);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
@@ -333,8 +331,10 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 {
 	pmd_t old;
 
+	preempt_disable();
 	old = pmdp_flush_direct(mm, addr, pmdp);
 	*pmdp = new;
+	preempt_enable();
 	return old;
 }
 EXPORT_SYMBOL(pmdp_xchg_direct);
@@ -344,12 +344,53 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 {
 	pmd_t old;
 
+	preempt_disable();
 	old = pmdp_flush_lazy(mm, addr, pmdp);
 	*pmdp = new;
+	preempt_enable();
 	return old;
 }
 EXPORT_SYMBOL(pmdp_xchg_lazy);
 
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pud_t *pudp)
+{
+	pud_t old;
+
+	old = *pudp;
+	if (pud_val(old) & _REGION_ENTRY_INVALID)
+		return old;
+	if (!MACHINE_HAS_IDTE) {
+		/*
+		 * Invalid bit position is the same for pmd and pud, so we can
+		 * re-use _pmd_csp() here
+		 */
+		__pmdp_csp((pmd_t *) pudp);
+		return old;
+	}
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__pudp_idte_local(addr, pudp);
+	else
+		__pudp_idte(addr, pudp);
+	atomic_dec(&mm->context.flush_count);
+	return old;
+}
+
+pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+		       pud_t *pudp, pud_t new)
+{
+	pud_t old;
+
+	preempt_disable();
+	old = pudp_flush_direct(mm, addr, pudp);
+	*pudp = new;
+	preempt_enable();
+	return old;
+}
+EXPORT_SYMBOL(pudp_xchg_direct);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 				pgtable_t pgtable)
@@ -398,20 +439,24 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 
 	/* the mm_has_pgste() check is done in set_pte_at() */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
 	pgste_set_key(ptep, pgste, entry, mm);
 	pgste = pgste_set_pte(ptep, pgste, entry);
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 
 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	pgste_t pgste;
 
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) |= PGSTE_IN_BIT;
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 
 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
@@ -434,10 +479,11 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 	pte_t pte;
 
 	/* Zap unused and logically-zero pages */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgstev = pgste_val(pgste);
 	pte = *ptep;
-	if (pte_swap(pte) &&
+	if (!reset && pte_swap(pte) &&
 	    ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
 	     (pgstev & _PGSTE_GPS_ZERO))) {
 		ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
@@ -446,6 +492,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 	if (reset)
 		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 
 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -454,6 +501,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	pgste_t pgste;
 
 	/* Clear storage key */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
 			      PGSTE_GR_BIT | PGSTE_GC_BIT);
@@ -461,6 +509,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 
 /*
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index d48cf25cfe99..1848292766ef 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -11,6 +11,7 @@
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
+#include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
@@ -29,9 +30,11 @@ static LIST_HEAD(mem_segs);
 
 static void __ref *vmem_alloc_pages(unsigned int order)
 {
+	unsigned long size = PAGE_SIZE << order;
+
 	if (slab_is_available())
 		return (void *)__get_free_pages(GFP_KERNEL, order);
-	return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
+	return alloc_bootmem_align(size, size);
 }
 
 static inline pud_t *vmem_pud_alloc(void)
@@ -45,7 +48,7 @@ static inline pud_t *vmem_pud_alloc(void)
 	return pud;
 }
 
-static inline pmd_t *vmem_pmd_alloc(void)
+pmd_t *vmem_pmd_alloc(void)
 {
 	pmd_t *pmd = NULL;
 
@@ -56,7 +59,7 @@ static inline pmd_t *vmem_pmd_alloc(void)
 	return pmd;
 }
 
-static pte_t __ref *vmem_pte_alloc(void)
+pte_t __ref *vmem_pte_alloc(void)
 {
 	pte_t *pte;
 
@@ -75,8 +78,9 @@ static pte_t __ref *vmem_pte_alloc(void)
 /*
  * Add a physical memory range to the 1:1 mapping.
  */
-static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
+static int vmem_add_mem(unsigned long start, unsigned long size)
 {
+	unsigned long pages4k, pages1m, pages2g;
 	unsigned long end = start + size;
 	unsigned long address = start;
 	pgd_t *pg_dir;
@@ -85,6 +89,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 	pte_t *pt_dir;
 	int ret = -ENOMEM;
 
+	pages4k = pages1m = pages2g = 0;
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
@@ -97,10 +102,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
 		     !debug_pagealloc_enabled()) {
-			pud_val(*pu_dir) = __pa(address) |
-				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
-				(ro ? _REGION_ENTRY_PROTECT : 0);
+			pud_val(*pu_dir) = address | pgprot_val(REGION3_KERNEL);
 			address += PUD_SIZE;
+			pages2g++;
 			continue;
 		}
 		if (pud_none(*pu_dir)) {
@@ -113,11 +117,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
 		    !debug_pagealloc_enabled()) {
-			pmd_val(*pm_dir) = __pa(address) |
-				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
-				_SEGMENT_ENTRY_YOUNG |
-				(ro ? _SEGMENT_ENTRY_PROTECT : 0);
+			pmd_val(*pm_dir) = address | pgprot_val(SEGMENT_KERNEL);
 			address += PMD_SIZE;
+			pages1m++;
 			continue;
 		}
 		if (pmd_none(*pm_dir)) {
@@ -128,12 +130,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		}
 
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		pte_val(*pt_dir) = __pa(address) |
-			pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+		pte_val(*pt_dir) = address |  pgprot_val(PAGE_KERNEL);
 		address += PAGE_SIZE;
+		pages4k++;
 	}
 	ret = 0;
 out:
+	update_page_count(PG_DIRECT_MAP_4K, pages4k);
+	update_page_count(PG_DIRECT_MAP_1M, pages1m);
+	update_page_count(PG_DIRECT_MAP_2G, pages2g);
 	return ret;
 }
 
@@ -143,15 +148,15 @@ out:
  */
 static void vmem_remove_range(unsigned long start, unsigned long size)
 {
+	unsigned long pages4k, pages1m, pages2g;
 	unsigned long end = start + size;
 	unsigned long address = start;
 	pgd_t *pg_dir;
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
 	pte_t *pt_dir;
-	pte_t  pte;
 
-	pte_val(pte) = _PAGE_INVALID;
+	pages4k = pages1m = pages2g = 0;
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
@@ -166,6 +171,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 		if (pud_large(*pu_dir)) {
 			pud_clear(pu_dir);
 			address += PUD_SIZE;
+			pages2g++;
 			continue;
 		}
 		pm_dir = pmd_offset(pu_dir, address);
@@ -176,13 +182,18 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 		if (pmd_large(*pm_dir)) {
 			pmd_clear(pm_dir);
 			address += PMD_SIZE;
+			pages1m++;
 			continue;
 		}
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		*pt_dir = pte;
+		pte_clear(&init_mm, address, pt_dir);
 		address += PAGE_SIZE;
+		pages4k++;
 	}
 	flush_tlb_kernel_range(start, end);
+	update_page_count(PG_DIRECT_MAP_4K, -pages4k);
+	update_page_count(PG_DIRECT_MAP_1M, -pages1m);
+	update_page_count(PG_DIRECT_MAP_2G, -pages2g);
 }
 
 /*
@@ -341,7 +352,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
 	if (ret)
 		goto out_free;
 
-	ret = vmem_add_mem(start, size, 0);
+	ret = vmem_add_mem(start, size);
 	if (ret)
 		goto out_remove;
 	goto out;
@@ -362,31 +373,13 @@ out:
  */
 void __init vmem_map_init(void)
 {
-	unsigned long ro_start, ro_end;
+	unsigned long size = _eshared - _stext;
 	struct memblock_region *reg;
-	phys_addr_t start, end;
 
-	ro_start = PFN_ALIGN((unsigned long)&_stext);
-	ro_end = (unsigned long)&_eshared & PAGE_MASK;
-	for_each_memblock(memory, reg) {
-		start = reg->base;
-		end = reg->base + reg->size;
-		if (start >= ro_end || end <= ro_start)
-			vmem_add_mem(start, end - start, 0);
-		else if (start >= ro_start && end <= ro_end)
-			vmem_add_mem(start, end - start, 1);
-		else if (start >= ro_start) {
-			vmem_add_mem(start, ro_end - start, 1);
-			vmem_add_mem(ro_end, end - ro_end, 0);
-		} else if (end < ro_end) {
-			vmem_add_mem(start, ro_start - start, 0);
-			vmem_add_mem(ro_start, end - ro_start, 1);
-		} else {
-			vmem_add_mem(start, ro_start - start, 0);
-			vmem_add_mem(ro_start, ro_end - ro_start, 1);
-			vmem_add_mem(ro_end, end - ro_end, 0);
-		}
-	}
+	for_each_memblock(memory, reg)
+		vmem_add_mem(reg->base, reg->size);
+	set_memory_ro((unsigned long)_stext, size >> PAGE_SHIFT);
+	pr_info("Write protected kernel read-only data: %luk\n", size >> 10);
 }
 
 /*
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 828d0695d0d4..fbc394e16b2c 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -34,7 +34,8 @@
 #define DIST_CORE	1
 #define DIST_MC		2
 #define DIST_BOOK	3
-#define DIST_MAX	4
+#define DIST_DRAWER	4
+#define DIST_MAX	5
 
 /* Node distance reported to common code */
 #define EMU_NODE_DIST	10
@@ -43,7 +44,7 @@
 #define NODE_ID_FREE	-1
 
 /* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
 
 /* The two toptree IDs */
 enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
@@ -113,6 +114,14 @@ static int cores_free(struct toptree *tree)
  * Return node of core
  */
 static struct toptree *core_node(struct toptree *core)
+{
+	return core->parent->parent->parent->parent;
+}
+
+/*
+ * Return drawer of core
+ */
+static struct toptree *core_drawer(struct toptree *core)
 {
 	return core->parent->parent->parent;
 }
@@ -138,6 +147,8 @@ static struct toptree *core_mc(struct toptree *core)
  */
 static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
 {
+	if (core_drawer(core1)->id != core_drawer(core2)->id)
+		return DIST_DRAWER;
 	if (core_book(core1)->id != core_book(core2)->id)
 		return DIST_BOOK;
 	if (core_mc(core1)->id != core_mc(core2)->id)
@@ -262,6 +273,8 @@ static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
 	struct toptree *core;
 
 	/* Always try to move perfectly fitting structures first */
+	move_level_to_numa(numa, phys, DRAWER, true);
+	move_level_to_numa(numa, phys, DRAWER, false);
 	move_level_to_numa(numa, phys, BOOK, true);
 	move_level_to_numa(numa, phys, BOOK, false);
 	move_level_to_numa(numa, phys, MC, true);
@@ -335,7 +348,7 @@ static struct toptree *toptree_to_numa(struct toptree *phys)
  */
 static struct toptree *toptree_from_topology(void)
 {
-	struct toptree *phys, *node, *book, *mc, *core;
+	struct toptree *phys, *node, *drawer, *book, *mc, *core;
 	struct cpu_topology_s390 *top;
 	int cpu;
 
@@ -344,10 +357,11 @@ static struct toptree *toptree_from_topology(void)
 	for_each_online_cpu(cpu) {
 		top = &per_cpu(cpu_topology, cpu);
 		node = toptree_get_child(phys, 0);
-		book = toptree_get_child(node, top->book_id);
+		drawer = toptree_get_child(node, top->drawer_id);
+		book = toptree_get_child(drawer, top->book_id);
 		mc = toptree_get_child(book, top->socket_id);
 		core = toptree_get_child(mc, top->core_id);
-		if (!book || !mc || !core)
+		if (!drawer || !book || !mc || !core)
 			panic("NUMA emulation could not allocate memory");
 		cpumask_set_cpu(cpu, &core->mask);
 		toptree_update_mask(mc);
@@ -368,6 +382,7 @@ static void topology_add_core(struct toptree *core)
 		cpumask_copy(&top->thread_mask, &core->mask);
 		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
 		cpumask_copy(&top->book_mask, &core_book(core)->mask);
+		cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
 		cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
 		top->node_id = core_node(core)->id;
 	}
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 496e4a7ee00e..e9dd41b0b8d3 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -7,4 +7,3 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		timer_int.o )
 
 oprofile-y :=	$(DRIVER_OBJS) init.o
-oprofile-y +=	hwsampler.o
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
deleted file mode 100644
index ff9b4eb34589..000000000000
--- a/arch/s390/oprofile/hwsampler.c
+++ /dev/null
@@ -1,1178 +0,0 @@
-/*
- * Copyright IBM Corp. 2010
- * Author: Heinz Graalfs <graalfs@de.ibm.com>
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/semaphore.h>
-#include <linux/oom.h>
-#include <linux/oprofile.h>
-
-#include <asm/facility.h>
-#include <asm/cpu_mf.h>
-#include <asm/irq.h>
-
-#include "hwsampler.h"
-#include "op_counter.h"
-
-#define MAX_NUM_SDB 511
-#define MIN_NUM_SDB 1
-
-DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
-
-struct hws_execute_parms {
-	void *buffer;
-	signed int rc;
-};
-
-DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
-EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer);
-
-static DEFINE_MUTEX(hws_sem);
-static DEFINE_MUTEX(hws_sem_oom);
-
-static unsigned char hws_flush_all;
-static unsigned int hws_oom;
-static unsigned int hws_alert;
-static struct workqueue_struct *hws_wq;
-
-static unsigned int hws_state;
-enum {
-	HWS_INIT = 1,
-	HWS_DEALLOCATED,
-	HWS_STOPPED,
-	HWS_STARTED,
-	HWS_STOPPING };
-
-/* set to 1 if called by kernel during memory allocation */
-static unsigned char oom_killer_was_active;
-/* size of SDBT and SDB as of allocate API */
-static unsigned long num_sdbt = 100;
-static unsigned long num_sdb = 511;
-/* sampling interval (machine cycles) */
-static unsigned long interval;
-
-static unsigned long min_sampler_rate;
-static unsigned long max_sampler_rate;
-
-static void execute_qsi(void *parms)
-{
-	struct hws_execute_parms *ep = parms;
-
-	ep->rc = qsi(ep->buffer);
-}
-
-static void execute_ssctl(void *parms)
-{
-	struct hws_execute_parms *ep = parms;
-
-	ep->rc = lsctl(ep->buffer);
-}
-
-static int smp_ctl_ssctl_stop(int cpu)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.es = 0;
-	cb->ssctl.cs = 0;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc) {
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-		dump_stack();
-	}
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	if (cb->qsi.es || cb->qsi.cs) {
-		printk(KERN_EMERG "CPUMF sampling did not stop properly.\n");
-		dump_stack();
-	}
-
-	return rc;
-}
-
-static int smp_ctl_ssctl_deactivate(int cpu)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.es = 1;
-	cb->ssctl.cs = 0;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	if (cb->qsi.cs)
-		printk(KERN_EMERG "CPUMF sampling was not set inactive.\n");
-
-	return rc;
-}
-
-static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.h = 1;
-	cb->ssctl.tear = cb->first_sdbt;
-	cb->ssctl.dear = *(unsigned long *) cb->first_sdbt;
-	cb->ssctl.interval = interval;
-	cb->ssctl.es = 1;
-	cb->ssctl.cs = 1;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-	if (ep.rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu);
-
-	return rc;
-}
-
-static int smp_ctl_qsi(int cpu)
-{
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	return ep.rc;
-}
-
-static void hws_ext_handler(struct ext_code ext_code,
-			    unsigned int param32, unsigned long param64)
-{
-	struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer);
-
-	if (!(param32 & CPU_MF_INT_SF_MASK))
-		return;
-
-	if (!hws_alert)
-		return;
-
-	inc_irq_stat(IRQEXT_CMS);
-	atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
-
-	if (hws_wq)
-		queue_work(hws_wq, &cb->worker);
-}
-
-static void worker(struct work_struct *work);
-
-static void add_samples_to_oprofile(unsigned cpu, unsigned long *,
-				unsigned long *dear);
-
-static void init_all_cpu_buffers(void)
-{
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		memset(cb, 0, sizeof(struct hws_cpu_buffer));
-	}
-}
-
-static void prepare_cpu_buffers(void)
-{
-	struct hws_cpu_buffer *cb;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		atomic_set(&cb->ext_params, 0);
-		cb->worker_entry = 0;
-		cb->sample_overflow = 0;
-		cb->req_alert = 0;
-		cb->incorrect_sdbt_entry = 0;
-		cb->invalid_entry_address = 0;
-		cb->loss_of_sample_data = 0;
-		cb->sample_auth_change_alert = 0;
-		cb->finish = 0;
-		cb->oom = 0;
-		cb->stop_mode = 0;
-	}
-}
-
-/*
- * allocate_sdbt() - allocate sampler memory
- * @cpu: the cpu for which sampler memory is allocated
- *
- * A 4K page is allocated for each requested SDBT.
- * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs.
- * Set ALERT_REQ mask in each SDBs trailer.
- * Returns zero if successful, <0 otherwise.
- */
-static int allocate_sdbt(int cpu)
-{
-	int j, k, rc;
-	unsigned long *sdbt;
-	unsigned long  sdb;
-	unsigned long *tail;
-	unsigned long *trailer;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	if (cb->first_sdbt)
-		return -EINVAL;
-
-	sdbt = NULL;
-	tail = sdbt;
-
-	for (j = 0; j < num_sdbt; j++) {
-		sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
-
-		mutex_lock(&hws_sem_oom);
-		/* OOM killer might have been activated */
-		barrier();
-		if (oom_killer_was_active || !sdbt) {
-			if (sdbt)
-				free_page((unsigned long)sdbt);
-
-			goto allocate_sdbt_error;
-		}
-		if (cb->first_sdbt == 0)
-			cb->first_sdbt = (unsigned long)sdbt;
-
-		/* link current page to tail of chain */
-		if (tail)
-			*tail = (unsigned long)(void *)sdbt + 1;
-
-		mutex_unlock(&hws_sem_oom);
-
-		for (k = 0; k < num_sdb; k++) {
-			/* get and set SDB page */
-			sdb = get_zeroed_page(GFP_KERNEL);
-
-			mutex_lock(&hws_sem_oom);
-			/* OOM killer might have been activated */
-			barrier();
-			if (oom_killer_was_active || !sdb) {
-				if (sdb)
-					free_page(sdb);
-
-				goto allocate_sdbt_error;
-			}
-			*sdbt = sdb;
-			trailer = trailer_entry_ptr(*sdbt);
-			*trailer = SDB_TE_ALERT_REQ_MASK;
-			sdbt++;
-			mutex_unlock(&hws_sem_oom);
-		}
-		tail = sdbt;
-	}
-	mutex_lock(&hws_sem_oom);
-	if (oom_killer_was_active)
-		goto allocate_sdbt_error;
-
-	rc = 0;
-	if (tail)
-		*tail = (unsigned long)
-			((void *)cb->first_sdbt) + 1;
-
-allocate_sdbt_exit:
-	mutex_unlock(&hws_sem_oom);
-	return rc;
-
-allocate_sdbt_error:
-	rc = -ENOMEM;
-	goto allocate_sdbt_exit;
-}
-
-/*
- * deallocate_sdbt() - deallocate all sampler memory
- *
- * For each online CPU all SDBT trees are deallocated.
- * Returns the number of freed pages.
- */
-static int deallocate_sdbt(void)
-{
-	int cpu;
-	int counter;
-
-	counter = 0;
-
-	for_each_online_cpu(cpu) {
-		unsigned long start;
-		unsigned long sdbt;
-		unsigned long *curr;
-		struct hws_cpu_buffer *cb;
-
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-		if (!cb->first_sdbt)
-			continue;
-
-		sdbt = cb->first_sdbt;
-		curr = (unsigned long *) sdbt;
-		start = sdbt;
-
-		/* we'll free the SDBT after all SDBs are processed... */
-		while (1) {
-			if (!*curr || !sdbt)
-				break;
-
-			/* watch for link entry reset if found */
-			if (is_link_entry(curr)) {
-				curr = get_next_sdbt(curr);
-				if (sdbt)
-					free_page(sdbt);
-
-				/* we are done if we reach the start */
-				if ((unsigned long) curr == start)
-					break;
-				else
-					sdbt = (unsigned long) curr;
-			} else {
-				/* process SDB pointer */
-				if (*curr) {
-					free_page(*curr);
-					curr++;
-				}
-			}
-			counter++;
-		}
-		cb->first_sdbt = 0;
-	}
-	return counter;
-}
-
-static int start_sampling(int cpu)
-{
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	rc = smp_ctl_ssctl_enable_activate(cpu, interval);
-	if (rc) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu);
-		goto start_exit;
-	}
-
-	rc = -EINVAL;
-	if (!cb->qsi.es) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu);
-		goto start_exit;
-	}
-
-	if (!cb->qsi.cs) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu);
-		goto start_exit;
-	}
-
-	printk(KERN_INFO
-		"hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n",
-		cpu, interval);
-
-	rc = 0;
-
-start_exit:
-	return rc;
-}
-
-static int stop_sampling(int cpu)
-{
-	unsigned long v;
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = smp_ctl_qsi(cpu);
-	WARN_ON(rc);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (!rc && !cb->qsi.es)
-		printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu);
-
-	rc = smp_ctl_ssctl_stop(cpu);
-	if (rc) {
-		printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n",
-				cpu, rc);
-		goto stop_exit;
-	}
-
-	printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu);
-
-stop_exit:
-	v = cb->req_alert;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->loss_of_sample_data;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->invalid_entry_address;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->incorrect_sdbt_entry;
-	if (v)
-		printk(KERN_ERR
-				"hwsampler: CPU %d CPUMF Incorrect SDBT address,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->sample_auth_change_alert;
-	if (v)
-		printk(KERN_ERR
-				"hwsampler: CPU %d CPUMF Sample authorization change,"
-				" count=%lu.\n", cpu, v);
-
-	return rc;
-}
-
-static int check_hardware_prerequisites(void)
-{
-	if (!test_facility(68))
-		return -EOPNOTSUPP;
-	return 0;
-}
-/*
- * hws_oom_callback() - the OOM callback function
- *
- * In case the callback is invoked during memory allocation for the
- *  hw sampler, all obtained memory is deallocated and a flag is set
- *  so main sampler memory allocation can exit with a failure code.
- * In case the callback is invoked during sampling the hw sampler
- *  is deactivated for all CPUs.
- */
-static int hws_oom_callback(struct notifier_block *nfb,
-	unsigned long dummy, void *parm)
-{
-	unsigned long *freed;
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	freed = parm;
-
-	mutex_lock(&hws_sem_oom);
-
-	if (hws_state == HWS_DEALLOCATED) {
-		/* during memory allocation */
-		if (oom_killer_was_active == 0) {
-			oom_killer_was_active = 1;
-			*freed += deallocate_sdbt();
-		}
-	} else {
-		int i;
-		cpu = get_cpu();
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-		if (!cb->oom) {
-			for_each_online_cpu(i) {
-				smp_ctl_ssctl_deactivate(i);
-				cb->oom = 1;
-			}
-			cb->finish = 1;
-
-			printk(KERN_INFO
-				"hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n",
-				cpu);
-		}
-	}
-
-	mutex_unlock(&hws_sem_oom);
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block hws_oom_notifier = {
-	.notifier_call = hws_oom_callback
-};
-
-static int hws_cpu_callback(struct notifier_block *nfb,
-	unsigned long action, void *hcpu)
-{
-	/* We do not have sampler space available for all possible CPUs.
-	   All CPUs should be online when hw sampling is activated. */
-	return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD;
-}
-
-static struct notifier_block hws_cpu_notifier = {
-	.notifier_call = hws_cpu_callback
-};
-
-/**
- * hwsampler_deactivate() - set hardware sampling temporarily inactive
- * @cpu:  specifies the CPU to be set inactive.
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_deactivate(unsigned int cpu)
-{
-	/*
-	 * Deactivate hw sampling temporarily and flush the buffer
-	 * by pushing all the pending samples to oprofile buffer.
-	 *
-	 * This function can be called under one of the following conditions:
-	 *     Memory unmap, task is exiting.
-	 */
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	mutex_lock(&hws_sem);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (hws_state == HWS_STARTED) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (cb->qsi.cs) {
-			rc = smp_ctl_ssctl_deactivate(cpu);
-			if (rc) {
-				printk(KERN_INFO
-				"hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu);
-				cb->finish = 1;
-				hws_state = HWS_STOPPING;
-			} else  {
-				hws_flush_all = 1;
-				/* Add work to queue to read pending samples.*/
-				queue_work_on(cpu, hws_wq, &cb->worker);
-			}
-		}
-	}
-	mutex_unlock(&hws_sem);
-
-	if (hws_wq)
-		flush_workqueue(hws_wq);
-
-	return rc;
-}
-
-/**
- * hwsampler_activate() - activate/resume hardware sampling which was deactivated
- * @cpu:  specifies the CPU to be set active.
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_activate(unsigned int cpu)
-{
-	/*
-	 * Re-activate hw sampling. This should be called in pair with
-	 * hwsampler_deactivate().
-	 */
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	mutex_lock(&hws_sem);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (hws_state == HWS_STARTED) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (!cb->qsi.cs) {
-			hws_flush_all = 0;
-			rc = smp_ctl_ssctl_enable_activate(cpu, interval);
-			if (rc) {
-				printk(KERN_ERR
-				"CPU %d, CPUMF activate sampling failed.\n",
-					 cpu);
-			}
-		}
-	}
-
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}
-
-static int check_qsi_on_setup(void)
-{
-	int rc;
-	unsigned int cpu;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (rc)
-			return -EOPNOTSUPP;
-
-		if (!cb->qsi.as) {
-			printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n");
-			return -EINVAL;
-		}
-
-		if (cb->qsi.es) {
-			printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n");
-			rc = smp_ctl_ssctl_stop(cpu);
-			if (rc)
-				return -EINVAL;
-
-			printk(KERN_INFO
-				"CPU %d, CPUMF Sampling stopped now.\n", cpu);
-		}
-	}
-	return 0;
-}
-
-static int check_qsi_on_start(void)
-{
-	unsigned int cpu;
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-
-		if (!cb->qsi.as)
-			return -EINVAL;
-
-		if (cb->qsi.es)
-			return -EINVAL;
-
-		if (cb->qsi.cs)
-			return -EINVAL;
-	}
-	return 0;
-}
-
-static void worker_on_start(unsigned int cpu)
-{
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	cb->worker_entry = cb->first_sdbt;
-}
-
-static int worker_check_error(unsigned int cpu, int ext_params)
-{
-	int rc;
-	unsigned long *sdbt;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	sdbt = (unsigned long *) cb->worker_entry;
-
-	if (!sdbt || !*sdbt)
-		return -EINVAL;
-
-	if (ext_params & CPU_MF_INT_SF_PRA)
-		cb->req_alert++;
-
-	if (ext_params & CPU_MF_INT_SF_LSDA)
-		cb->loss_of_sample_data++;
-
-	if (ext_params & CPU_MF_INT_SF_IAE) {
-		cb->invalid_entry_address++;
-		rc = -EINVAL;
-	}
-
-	if (ext_params & CPU_MF_INT_SF_ISE) {
-		cb->incorrect_sdbt_entry++;
-		rc = -EINVAL;
-	}
-
-	if (ext_params & CPU_MF_INT_SF_SACA) {
-		cb->sample_auth_change_alert++;
-		rc = -EINVAL;
-	}
-
-	return rc;
-}
-
-static void worker_on_finish(unsigned int cpu)
-{
-	int rc, i;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	if (cb->finish) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (cb->qsi.es) {
-			printk(KERN_INFO
-				"hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n",
-				cpu);
-			rc = smp_ctl_ssctl_stop(cpu);
-			if (rc)
-				printk(KERN_INFO
-					"hwsampler: CPU %d, CPUMF Deactivation failed.\n",
-					cpu);
-
-			for_each_online_cpu(i) {
-				if (i == cpu)
-					continue;
-				if (!cb->finish) {
-					cb->finish = 1;
-					queue_work_on(i, hws_wq,
-						&cb->worker);
-				}
-			}
-		}
-	}
-}
-
-static void worker_on_interrupt(unsigned int cpu)
-{
-	unsigned long *sdbt;
-	unsigned char done;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	sdbt = (unsigned long *) cb->worker_entry;
-
-	done = 0;
-	/* do not proceed if stop was entered,
-	 * forget the buffers not yet processed */
-	while (!done && !cb->stop_mode) {
-		unsigned long *trailer;
-		struct hws_trailer_entry *te;
-		unsigned long *dear = 0;
-
-		trailer = trailer_entry_ptr(*sdbt);
-		/* leave loop if no more work to do */
-		if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
-			done = 1;
-			if (!hws_flush_all)
-				continue;
-		}
-
-		te = (struct hws_trailer_entry *)trailer;
-		cb->sample_overflow += te->overflow;
-
-		add_samples_to_oprofile(cpu, sdbt, dear);
-
-		/* reset trailer */
-		xchg((unsigned char *) te, 0x40);
-
-		/* advance to next sdb slot in current sdbt */
-		sdbt++;
-		/* in case link bit is set use address w/o link bit */
-		if (is_link_entry(sdbt))
-			sdbt = get_next_sdbt(sdbt);
-
-		cb->worker_entry = (unsigned long)sdbt;
-	}
-}
-
-static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
-		unsigned long *dear)
-{
-	struct hws_basic_entry *sample_data_ptr;
-	unsigned long *trailer;
-
-	trailer = trailer_entry_ptr(*sdbt);
-	if (dear) {
-		if (dear > trailer)
-			return;
-		trailer = dear;
-	}
-
-	sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
-
-	while ((unsigned long *)sample_data_ptr < trailer) {
-		struct pt_regs *regs = NULL;
-		struct task_struct *tsk = NULL;
-
-		/*
-		 * Check sampling mode, 1 indicates basic (=customer) sampling
-		 * mode.
-		 */
-		if (sample_data_ptr->def != 1) {
-			/* sample slot is not yet written */
-			break;
-		} else {
-			/* make sure we don't use it twice,
-			 * the next time the sampler will set it again */
-			sample_data_ptr->def = 0;
-		}
-
-		/* Get pt_regs. */
-		if (sample_data_ptr->P == 1) {
-			/* userspace sample */
-			unsigned int pid = sample_data_ptr->prim_asn;
-			if (!counter_config.user)
-				goto skip_sample;
-			rcu_read_lock();
-			tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
-			if (tsk)
-				regs = task_pt_regs(tsk);
-			rcu_read_unlock();
-		} else {
-			/* kernelspace sample */
-			if (!counter_config.kernel)
-				goto skip_sample;
-			regs = task_pt_regs(current);
-		}
-
-		mutex_lock(&hws_sem);
-		oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
-				!sample_data_ptr->P, tsk);
-		mutex_unlock(&hws_sem);
-	skip_sample:
-		sample_data_ptr++;
-	}
-}
-
-static void worker(struct work_struct *work)
-{
-	unsigned int cpu;
-	int ext_params;
-	struct hws_cpu_buffer *cb;
-
-	cb = container_of(work, struct hws_cpu_buffer, worker);
-	cpu = smp_processor_id();
-	ext_params = atomic_xchg(&cb->ext_params, 0);
-
-	if (!cb->worker_entry)
-		worker_on_start(cpu);
-
-	if (worker_check_error(cpu, ext_params))
-		return;
-
-	if (!cb->finish)
-		worker_on_interrupt(cpu);
-
-	if (cb->finish)
-		worker_on_finish(cpu);
-}
-
-/**
- * hwsampler_allocate() - allocate memory for the hardware sampler
- * @sdbt:  number of SDBTs per online CPU (must be > 0)
- * @sdb:   number of SDBs per SDBT (minimum 1, maximum 511)
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_allocate(unsigned long sdbt, unsigned long sdb)
-{
-	int cpu, rc;
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state != HWS_DEALLOCATED)
-		goto allocate_exit;
-
-	if (sdbt < 1)
-		goto allocate_exit;
-
-	if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB)
-		goto allocate_exit;
-
-	num_sdbt = sdbt;
-	num_sdb = sdb;
-
-	oom_killer_was_active = 0;
-	register_oom_notifier(&hws_oom_notifier);
-
-	for_each_online_cpu(cpu) {
-		if (allocate_sdbt(cpu)) {
-			unregister_oom_notifier(&hws_oom_notifier);
-			goto allocate_error;
-		}
-	}
-	unregister_oom_notifier(&hws_oom_notifier);
-	if (oom_killer_was_active)
-		goto allocate_error;
-
-	hws_state = HWS_STOPPED;
-	rc = 0;
-
-allocate_exit:
-	mutex_unlock(&hws_sem);
-	return rc;
-
-allocate_error:
-	rc = -ENOMEM;
-	printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n");
-	goto allocate_exit;
-}
-
-/**
- * hwsampler_deallocate() - deallocate hardware sampler memory
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_deallocate(void)
-{
-	int rc;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state != HWS_STOPPED)
-		goto deallocate_exit;
-
-	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-	hws_alert = 0;
-	deallocate_sdbt();
-
-	hws_state = HWS_DEALLOCATED;
-	rc = 0;
-
-deallocate_exit:
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}
-
-unsigned long hwsampler_query_min_interval(void)
-{
-	return min_sampler_rate;
-}
-
-unsigned long hwsampler_query_max_interval(void)
-{
-	return max_sampler_rate;
-}
-
-unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
-{
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	return cb->sample_overflow;
-}
-
-int hwsampler_setup(void)
-{
-	int rc;
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state)
-		goto setup_exit;
-
-	hws_state = HWS_INIT;
-
-	init_all_cpu_buffers();
-
-	rc = check_hardware_prerequisites();
-	if (rc)
-		goto setup_exit;
-
-	rc = check_qsi_on_setup();
-	if (rc)
-		goto setup_exit;
-
-	rc = -EINVAL;
-	hws_wq = create_workqueue("hwsampler");
-	if (!hws_wq)
-		goto setup_exit;
-
-	register_cpu_notifier(&hws_cpu_notifier);
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		INIT_WORK(&cb->worker, worker);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (min_sampler_rate != cb->qsi.min_sampl_rate) {
-			if (min_sampler_rate) {
-				printk(KERN_WARNING
-					"hwsampler: different min sampler rate values.\n");
-				if (min_sampler_rate < cb->qsi.min_sampl_rate)
-					min_sampler_rate =
-						cb->qsi.min_sampl_rate;
-			} else
-				min_sampler_rate = cb->qsi.min_sampl_rate;
-		}
-		if (max_sampler_rate != cb->qsi.max_sampl_rate) {
-			if (max_sampler_rate) {
-				printk(KERN_WARNING
-					"hwsampler: different max sampler rate values.\n");
-				if (max_sampler_rate > cb->qsi.max_sampl_rate)
-					max_sampler_rate =
-						cb->qsi.max_sampl_rate;
-			} else
-				max_sampler_rate = cb->qsi.max_sampl_rate;
-		}
-	}
-	register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
-
-	hws_state = HWS_DEALLOCATED;
-	rc = 0;
-
-setup_exit:
-	mutex_unlock(&hws_sem);
-	return rc;
-}
-
-int hwsampler_shutdown(void)
-{
-	int rc;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) {
-		mutex_unlock(&hws_sem);
-
-		if (hws_wq)
-			flush_workqueue(hws_wq);
-
-		mutex_lock(&hws_sem);
-
-		if (hws_state == HWS_STOPPED) {
-			irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-			hws_alert = 0;
-			deallocate_sdbt();
-		}
-		if (hws_wq) {
-			destroy_workqueue(hws_wq);
-			hws_wq = NULL;
-		}
-
-		unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
-		hws_state = HWS_INIT;
-		rc = 0;
-	}
-	mutex_unlock(&hws_sem);
-
-	unregister_cpu_notifier(&hws_cpu_notifier);
-
-	return rc;
-}
-
-/**
- * hwsampler_start_all() - start hardware sampling on all online CPUs
- * @rate:  specifies the used interval when samples are taken
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_start_all(unsigned long rate)
-{
-	int rc, cpu;
-
-	mutex_lock(&hws_sem);
-
-	hws_oom = 0;
-
-	rc = -EINVAL;
-	if (hws_state != HWS_STOPPED)
-		goto start_all_exit;
-
-	interval = rate;
-
-	/* fail if rate is not valid */
-	if (interval < min_sampler_rate || interval > max_sampler_rate)
-		goto start_all_exit;
-
-	rc = check_qsi_on_start();
-	if (rc)
-		goto start_all_exit;
-
-	prepare_cpu_buffers();
-
-	for_each_online_cpu(cpu) {
-		rc = start_sampling(cpu);
-		if (rc)
-			break;
-	}
-	if (rc) {
-		for_each_online_cpu(cpu) {
-			stop_sampling(cpu);
-		}
-		goto start_all_exit;
-	}
-	hws_state = HWS_STARTED;
-	rc = 0;
-
-start_all_exit:
-	mutex_unlock(&hws_sem);
-
-	if (rc)
-		return rc;
-
-	register_oom_notifier(&hws_oom_notifier);
-	hws_oom = 1;
-	hws_flush_all = 0;
-	/* now let them in, 1407 CPUMF external interrupts */
-	hws_alert = 1;
-	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
-	return 0;
-}
-
-/**
- * hwsampler_stop_all() - stop hardware sampling on all online CPUs
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_stop_all(void)
-{
-	int tmp_rc, rc, cpu;
-	struct hws_cpu_buffer *cb;
-
-	mutex_lock(&hws_sem);
-
-	rc = 0;
-	if (hws_state == HWS_INIT) {
-		mutex_unlock(&hws_sem);
-		return 0;
-	}
-	hws_state = HWS_STOPPING;
-	mutex_unlock(&hws_sem);
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		cb->stop_mode = 1;
-		tmp_rc = stop_sampling(cpu);
-		if (tmp_rc)
-			rc = tmp_rc;
-	}
-
-	if (hws_wq)
-		flush_workqueue(hws_wq);
-
-	mutex_lock(&hws_sem);
-	if (hws_oom) {
-		unregister_oom_notifier(&hws_oom_notifier);
-		hws_oom = 0;
-	}
-	hws_state = HWS_STOPPED;
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
deleted file mode 100644
index a483d06f2fa7..000000000000
--- a/arch/s390/oprofile/hwsampler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * CPUMF HW sampler functions and internal structures
- *
- *    Copyright IBM Corp. 2010
- *    Author(s): Heinz Graalfs <graalfs@de.ibm.com>
- */
-
-#ifndef HWSAMPLER_H_
-#define HWSAMPLER_H_
-
-#include <linux/workqueue.h>
-#include <asm/cpu_mf.h>
-
-struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
-{ /* bytes 0 - 7  Bit(s) */
-	unsigned int s:1;           /* 0: maximum buffer indicator       */
-	unsigned int h:1;           /* 1: part. level reserved for VM use*/
-	unsigned long b2_53:52;     /* 2-53: zeros                       */
-	unsigned int es:1;          /* 54: sampling enable control       */
-	unsigned int b55_61:7;      /* 55-61: - zeros                    */
-	unsigned int cs:1;          /* 62: sampling activation control   */
-	unsigned int b63:1;         /* 63: zero                          */
-	unsigned long interval;     /* 8-15: sampling interval           */
-	unsigned long tear;         /* 16-23: TEAR contents              */
-	unsigned long dear;         /* 24-31: DEAR contents              */
-	/* 32-63:                                                        */
-	unsigned long rsvrd1;       /* reserved                          */
-	unsigned long rsvrd2;       /* reserved                          */
-	unsigned long rsvrd3;       /* reserved                          */
-	unsigned long rsvrd4;       /* reserved                          */
-};
-
-struct hws_cpu_buffer {
-	unsigned long first_sdbt;       /* @ of 1st SDB-Table for this CP*/
-	unsigned long worker_entry;
-	unsigned long sample_overflow;  /* taken from SDB ...            */
-	struct hws_qsi_info_block qsi;
-	struct hws_ssctl_request_block ssctl;
-	struct work_struct worker;
-	atomic_t ext_params;
-	unsigned long req_alert;
-	unsigned long loss_of_sample_data;
-	unsigned long invalid_entry_address;
-	unsigned long incorrect_sdbt_entry;
-	unsigned long sample_auth_change_alert;
-	unsigned int finish:1;
-	unsigned int oom:1;
-	unsigned int stop_mode:1;
-};
-
-int hwsampler_setup(void);
-int hwsampler_shutdown(void);
-int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
-int hwsampler_deallocate(void);
-unsigned long hwsampler_query_min_interval(void);
-unsigned long hwsampler_query_max_interval(void);
-int hwsampler_start_all(unsigned long interval);
-int hwsampler_stop_all(void);
-int hwsampler_deactivate(unsigned int cpu);
-int hwsampler_activate(unsigned int cpu);
-unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
-
-#endif /*HWSAMPLER_H_*/
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 791935a65800..16f4c3960b87 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -10,488 +10,8 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/perf_event.h>
 #include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/module.h>
 #include <asm/processor.h>
-#include <asm/perf_event.h>
-
-#include "../../../drivers/oprofile/oprof.h"
-
-#include "hwsampler.h"
-#include "op_counter.h"
-
-#define DEFAULT_INTERVAL	4127518
-
-#define DEFAULT_SDBT_BLOCKS	1
-#define DEFAULT_SDB_BLOCKS	511
-
-static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
-static unsigned long oprofile_min_interval;
-static unsigned long oprofile_max_interval;
-
-static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
-static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
-
-static int hwsampler_enabled;
-static int hwsampler_running;	/* start_mutex must be held to change */
-static int hwsampler_available;
-
-static struct oprofile_operations timer_ops;
-
-struct op_counter_config counter_config;
-
-enum __force_cpu_type {
-	reserved = 0,		/* do not force */
-	timer,
-};
-static int force_cpu_type;
-
-static int set_cpu_type(const char *str, struct kernel_param *kp)
-{
-	if (!strcmp(str, "timer")) {
-		force_cpu_type = timer;
-		printk(KERN_INFO "oprofile: forcing timer to be returned "
-		                 "as cpu type\n");
-	} else {
-		force_cpu_type = 0;
-	}
-
-	return 0;
-}
-module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
-MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
-		           "(report cpu_type \"timer\"");
-
-static int __oprofile_hwsampler_start(void)
-{
-	int retval;
-
-	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
-	if (retval)
-		return retval;
-
-	retval = hwsampler_start_all(oprofile_hw_interval);
-	if (retval)
-		hwsampler_deallocate();
-
-	return retval;
-}
-
-static int oprofile_hwsampler_start(void)
-{
-	int retval;
-
-	hwsampler_running = hwsampler_enabled;
-
-	if (!hwsampler_running)
-		return timer_ops.start();
-
-	retval = perf_reserve_sampling();
-	if (retval)
-		return retval;
-
-	retval = __oprofile_hwsampler_start();
-	if (retval)
-		perf_release_sampling();
-
-	return retval;
-}
-
-static void oprofile_hwsampler_stop(void)
-{
-	if (!hwsampler_running) {
-		timer_ops.stop();
-		return;
-	}
-
-	hwsampler_stop_all();
-	hwsampler_deallocate();
-	perf_release_sampling();
-	return;
-}
-
-/*
- * File ops used for:
- * /dev/oprofile/0/enabled
- * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
- */
-
-static ssize_t hwsampler_read(struct file *file, char __user *buf,
-		size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
-}
-
-static ssize_t hwsampler_write(struct file *file, char const __user *buf,
-		size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	if (oprofile_started)
-		/*
-		 * save to do without locking as we set
-		 * hwsampler_running in start() when start_mutex is
-		 * held
-		 */
-		return -EBUSY;
-
-	hwsampler_enabled = val;
-
-	return count;
-}
-
-static const struct file_operations hwsampler_fops = {
-	.read		= hwsampler_read,
-	.write		= hwsampler_write,
-};
-
-/*
- * File ops used for:
- * /dev/oprofile/0/count
- * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
- *
- * Make sure that the value is within the hardware range.
- */
-
-static ssize_t hw_interval_read(struct file *file, char __user *buf,
-				size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
-					count, offset);
-}
-
-static ssize_t hw_interval_write(struct file *file, char const __user *buf,
-				 size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-	if (val < oprofile_min_interval)
-		oprofile_hw_interval = oprofile_min_interval;
-	else if (val > oprofile_max_interval)
-		oprofile_hw_interval = oprofile_max_interval;
-	else
-		oprofile_hw_interval = val;
-
-	return count;
-}
-
-static const struct file_operations hw_interval_fops = {
-	.read		= hw_interval_read,
-	.write		= hw_interval_write,
-};
-
-/*
- * File ops used for:
- * /dev/oprofile/0/event
- * Only a single event with number 0 is supported with this counter.
- *
- * /dev/oprofile/0/unit_mask
- * This is a dummy file needed by the user space tools.
- * No value other than 0 is accepted or returned.
- */
-
-static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
-				    size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(0, buf, count, offset);
-}
-
-static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
-				     size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-	if (val != 0)
-		return -EINVAL;
-	return count;
-}
-
-static const struct file_operations zero_fops = {
-	.read		= hwsampler_zero_read,
-	.write		= hwsampler_zero_write,
-};
-
-/* /dev/oprofile/0/kernel file ops.  */
-
-static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
-				     size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(counter_config.kernel,
-					buf, count, offset);
-}
-
-static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
-				      size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	counter_config.kernel = val;
-
-	return count;
-}
-
-static const struct file_operations kernel_fops = {
-	.read		= hwsampler_kernel_read,
-	.write		= hwsampler_kernel_write,
-};
-
-/* /dev/oprofile/0/user file ops. */
-
-static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
-				   size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(counter_config.user,
-					buf, count, offset);
-}
-
-static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
-				    size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	counter_config.user = val;
-
-	return count;
-}
-
-static const struct file_operations user_fops = {
-	.read		= hwsampler_user_read,
-	.write		= hwsampler_user_write,
-};
-
-
-/*
- * File ops used for: /dev/oprofile/timer/enabled
- * The value always has to be the inverted value of hwsampler_enabled. So
- * no separate variable is created. That way we do not need locking.
- */
-
-static ssize_t timer_enabled_read(struct file *file, char __user *buf,
-				  size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
-}
-
-static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
-				   size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	/* Timer cannot be disabled without having hardware sampling.  */
-	if (val == 0 && !hwsampler_available)
-		return -EINVAL;
-
-	if (oprofile_started)
-		/*
-		 * save to do without locking as we set
-		 * hwsampler_running in start() when start_mutex is
-		 * held
-		 */
-		return -EBUSY;
-
-	hwsampler_enabled = !val;
-
-	return count;
-}
-
-static const struct file_operations timer_enabled_fops = {
-	.read		= timer_enabled_read,
-	.write		= timer_enabled_write,
-};
-
-
-static int oprofile_create_hwsampling_files(struct dentry *root)
-{
-	struct dentry *dir;
-
-	dir = oprofilefs_mkdir(root, "timer");
-	if (!dir)
-		return -EINVAL;
-
-	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
-
-	if (!hwsampler_available)
-		return 0;
-
-	/* reinitialize default values */
-	hwsampler_enabled = 1;
-	counter_config.kernel = 1;
-	counter_config.user = 1;
-
-	if (!force_cpu_type) {
-		/*
-		 * Create the counter file system.  A single virtual
-		 * counter is created which can be used to
-		 * enable/disable hardware sampling dynamically from
-		 * user space.  The user space will configure a single
-		 * counter with a single event.  The value of 'event'
-		 * and 'unit_mask' are not evaluated by the kernel code
-		 * and can only be set to 0.
-		 */
-
-		dir = oprofilefs_mkdir(root, "0");
-		if (!dir)
-			return -EINVAL;
-
-		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
-		oprofilefs_create_file(dir, "event", &zero_fops);
-		oprofilefs_create_file(dir, "count", &hw_interval_fops);
-		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
-		oprofilefs_create_file(dir, "kernel", &kernel_fops);
-		oprofilefs_create_file(dir, "user", &user_fops);
-		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
-					&oprofile_sdbt_blocks);
-
-	} else {
-		/*
-		 * Hardware sampling can be used but the cpu_type is
-		 * forced to timer in order to deal with legacy user
-		 * space tools.  The /dev/oprofile/hwsampling fs is
-		 * provided in that case.
-		 */
-		dir = oprofilefs_mkdir(root, "hwsampling");
-		if (!dir)
-			return -EINVAL;
-
-		oprofilefs_create_file(dir, "hwsampler",
-				       &hwsampler_fops);
-		oprofilefs_create_file(dir, "hw_interval",
-				       &hw_interval_fops);
-		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
-					   &oprofile_min_interval);
-		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
-					   &oprofile_max_interval);
-		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
-					&oprofile_sdbt_blocks);
-	}
-	return 0;
-}
-
-static int oprofile_hwsampler_init(struct oprofile_operations *ops)
-{
-	/*
-	 * Initialize the timer mode infrastructure as well in order
-	 * to be able to switch back dynamically.  oprofile_timer_init
-	 * is not supposed to fail.
-	 */
-	if (oprofile_timer_init(ops))
-		BUG();
-
-	memcpy(&timer_ops, ops, sizeof(timer_ops));
-	ops->create_files = oprofile_create_hwsampling_files;
-
-	/*
-	 * If the user space tools do not support newer cpu types,
-	 * the force_cpu_type module parameter
-	 * can be used to always return \"timer\" as cpu type.
-	 */
-	if (force_cpu_type != timer) {
-		struct cpuid id;
-
-		get_cpu_id (&id);
-
-		switch (id.machine) {
-		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
-		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
-		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
-		case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
-		default: return -ENODEV;
-		}
-	}
-
-	if (hwsampler_setup())
-		return -ENODEV;
-
-	/*
-	 * Query the range for the sampling interval from the
-	 * hardware.
-	 */
-	oprofile_min_interval = hwsampler_query_min_interval();
-	if (oprofile_min_interval == 0)
-		return -ENODEV;
-	oprofile_max_interval = hwsampler_query_max_interval();
-	if (oprofile_max_interval == 0)
-		return -ENODEV;
-
-	/* The initial value should be sane */
-	if (oprofile_hw_interval < oprofile_min_interval)
-		oprofile_hw_interval = oprofile_min_interval;
-	if (oprofile_hw_interval > oprofile_max_interval)
-		oprofile_hw_interval = oprofile_max_interval;
-
-	printk(KERN_INFO "oprofile: System z hardware sampling "
-	       "facility found.\n");
-
-	ops->start = oprofile_hwsampler_start;
-	ops->stop = oprofile_hwsampler_stop;
-
-	return 0;
-}
-
-static void oprofile_hwsampler_exit(void)
-{
-	hwsampler_shutdown();
-}
 
 static int __s390_backtrace(void *data, unsigned long address)
 {
@@ -514,18 +34,9 @@ static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	ops->backtrace = s390_backtrace;
-
-	/*
-	 * -ENODEV is not reported to the caller.  The module itself
-         * will use the timer mode sampling as fallback and this is
-         * always available.
-	 */
-	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
-
 	return 0;
 }
 
 void oprofile_arch_exit(void)
 {
-	oprofile_hwsampler_exit();
 }
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
deleted file mode 100644
index 61b2531eef17..000000000000
--- a/arch/s390/oprofile/op_counter.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *   Copyright IBM Corp. 2011
- *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
- *
- * @remark Copyright 2011 OProfile authors
- */
-
-#ifndef OP_COUNTER_H
-#define OP_COUNTER_H
-
-struct op_counter_config {
-	/* `enabled' maps to the hwsampler_file variable.  */
-	/* `count' maps to the oprofile_hw_interval variable.  */
-	/* `event' and `unit_mask' are unused. */
-	unsigned long kernel;
-	unsigned long user;
-};
-
-extern struct op_counter_config counter_config;
-
-#endif /* OP_COUNTER_H */
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 1ea8c07eab84..070f1ae5cfad 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -226,7 +226,8 @@ static unsigned long __dma_alloc_iommu(struct device *dev,
 	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 			      PAGE_SIZE) >> PAGE_SHIFT;
 	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
-				start, size, 0, boundary_size, 0);
+				start, size, zdev->start_dma >> PAGE_SHIFT,
+				boundary_size, 0);
 }
 
 static unsigned long dma_alloc_iommu(struct device *dev, int size)
@@ -469,6 +470,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 	 * Also set zdev->end_dma to the actual end address of the usable
 	 * range, instead of the theoretical maximum as reported by hardware.
 	 */
+	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
 	zdev->iommu_size = min3((u64) high_memory,
 				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
 				zdev->end_dma - zdev->start_dma + 1);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index fb2a9a560fdc..c2b27ad8e94d 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -145,8 +145,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 	default:
 		break;
 	}
-	if (pdev)
-		pci_dev_put(pdev);
+	pci_dev_put(pdev);
 }
 
 void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 10ca15dcab11..fa8d7d4b9751 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -99,7 +99,7 @@ void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
 }
 
 /* PCI Load */
-static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 {
 	register u64 __req asm("2") = req;
 	register u64 __offset asm("3") = offset;
@@ -116,6 +116,16 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 		:  "d" (__offset)
 		: "cc");
 	*status = __req >> 24 & 0xff;
+	*data = __data;
+	return cc;
+}
+
+static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+	u64 __data;
+	int cc;
+
+	cc = ____pcilg(&__data, req, offset, status);
 	if (!cc)
 		*data = __data;
 
diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h
index 2e067657db98..49b012d78c1a 100644
--- a/arch/score/include/asm/pgalloc.h
+++ b/arch/score/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO,
-					PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -53,7 +52,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (!pte)
 		return NULL;
 	clear_highpage(pte);
diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
index 37a6c2e0e969..995b71e4db4b 100644
--- a/arch/score/mm/fault.c
+++ b/arch/score/mm/fault.c
@@ -111,7 +111,7 @@ good_area:
 	* make sure we exit gracefully rather than endlessly redo
 	* the fault.
 	*/
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index e803a836cb7c..0d5f3a9bb315 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -264,7 +264,6 @@ config CPU_SUBTYPE_SH7203
 	select CPU_HAS_FPU
 	select SYS_SUPPORTS_SH_CMT
 	select SYS_SUPPORTS_SH_MTU2
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select PINCTRL
 
 config CPU_SUBTYPE_SH7206
@@ -353,7 +352,6 @@ config CPU_SUBTYPE_SH7720
 	select CPU_SH3
 	select CPU_HAS_DSP
 	select SYS_SUPPORTS_SH_CMT
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select USB_OHCI_SH if USB_OHCI_HCD
 	select PINCTRL
 	help
@@ -419,7 +417,6 @@ config CPU_SUBTYPE_SH7723
 	select ARCH_SHMOBILE
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_SH_CMT
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select PINCTRL
 	help
 	  Select SH7723 if you have an SH-MobileR2 CPU.
@@ -431,7 +428,6 @@ config CPU_SUBTYPE_SH7724
 	select ARCH_SHMOBILE
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_SH_CMT
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select PINCTRL
 	help
 	  Select SH7724 if you have an SH-MobileR2R CPU.
@@ -440,7 +436,6 @@ config CPU_SUBTYPE_SH7734
 	bool "Support SH7734 processor"
 	select CPU_SH4A
 	select CPU_SHX2
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select PINCTRL
 	help
 	  Select SH7734 if you have a SH4A SH7734 CPU.
@@ -449,7 +444,6 @@ config CPU_SUBTYPE_SH7757
 	bool "Support SH7757 processor"
 	select CPU_SH4A
 	select CPU_SHX2
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select PINCTRL
 	help
 	  Select SH7757 if you have a SH4A SH7757 CPU.
@@ -475,7 +469,6 @@ config CPU_SUBTYPE_SH7785
 	select CPU_SHX2
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select PINCTRL
 
 config CPU_SUBTYPE_SH7786
@@ -484,7 +477,6 @@ config CPU_SUBTYPE_SH7786
 	select CPU_SHX3
 	select CPU_HAS_PTEAEX
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select USB_OHCI_SH if USB_OHCI_HCD
 	select USB_EHCI_SH if USB_EHCI_HCD
 	select PINCTRL
@@ -494,7 +486,7 @@ config CPU_SUBTYPE_SHX3
 	select CPU_SH4A
 	select CPU_SHX3
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select PINCTRL
 
 # SH4AL-DSP Processor Support
@@ -513,7 +505,6 @@ config CPU_SUBTYPE_SH7722
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
 	select SYS_SUPPORTS_SH_CMT
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select PINCTRL
 
 config CPU_SUBTYPE_SH7366
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index 5e52d5362292..e0db04664e2e 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -70,7 +70,7 @@ config SH_7724_SOLUTION_ENGINE
 	bool "SolutionEngine7724"
 	select SOLUTION_ENGINE
 	depends on CPU_SUBTYPE_SH7724
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select SND_SOC_AK4642 if SND_SIMPLE_CARD
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 	help
@@ -174,7 +174,6 @@ config SH_SDK7786
 	depends on CPU_SUBTYPE_SH7786
 	select SYS_SUPPORTS_PCI
 	select NO_IOPORT_MAP if !PCI
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_SRAM_POOL
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 	help
@@ -190,7 +189,7 @@ config SH_HIGHLANDER
 config SH_SH7757LCR
 	bool "SH7757LCR"
 	depends on CPU_SUBTYPE_SH7757
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 
 config SH_SH7785LCR
@@ -217,14 +216,14 @@ config SH_SH7785LCR_PT
 config SH_URQUELL
 	bool "Urquell"
 	depends on CPU_SUBTYPE_SH7786
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select SYS_SUPPORTS_PCI
 	select NO_IOPORT_MAP if !PCI
 
 config SH_MIGOR
 	bool "Migo-R"
 	depends on CPU_SUBTYPE_SH7722
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 	help
 	  Select Migo-R if configuring for the SH7722 Migo-R platform
@@ -233,7 +232,7 @@ config SH_MIGOR
 config SH_AP325RXA
 	bool "AP-325RXA"
 	depends on CPU_SUBTYPE_SH7723
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 	help
 	  Renesas "AP-325RXA" support.
@@ -242,7 +241,7 @@ config SH_AP325RXA
 config SH_KFR2R09
 	bool "KFR2R09"
 	depends on CPU_SUBTYPE_SH7724
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 	help
 	  "Kit For R2R for 2009" support.
@@ -250,7 +249,7 @@ config SH_KFR2R09
 config SH_ECOVEC
 	bool "EcoVec"
 	depends on CPU_SUBTYPE_SH7724
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select SND_SOC_DA7210 if SND_SIMPLE_CARD
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 	help
@@ -327,7 +326,7 @@ config SH_X3PROTO
 config SH_MAGIC_PANEL_R2
 	bool "Magic Panel R2"
 	depends on CPU_SUBTYPE_SH7720
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 	help
 	  Select Magic Panel R2 if configuring for Magic Panel R2.
diff --git a/arch/sh/boards/mach-highlander/Kconfig b/arch/sh/boards/mach-highlander/Kconfig
index def49cc0a7b9..42f5589b4bf3 100644
--- a/arch/sh/boards/mach-highlander/Kconfig
+++ b/arch/sh/boards/mach-highlander/Kconfig
@@ -18,7 +18,7 @@ config SH_R7780MP
 config SH_R7785RP
 	bool "R7785RP board support"
 	depends on CPU_SUBTYPE_SH7785
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 
 endchoice
 
diff --git a/arch/sh/boards/mach-rsk/Kconfig b/arch/sh/boards/mach-rsk/Kconfig
index 458a11ffd022..0b9b2c4952c1 100644
--- a/arch/sh/boards/mach-rsk/Kconfig
+++ b/arch/sh/boards/mach-rsk/Kconfig
@@ -10,17 +10,17 @@ config SH_RSK7201
 
 config SH_RSK7203
 	bool "RSK7203"
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	depends on CPU_SUBTYPE_SH7203
 
 config SH_RSK7264
 	bool "RSK2+SH7264"
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	depends on CPU_SUBTYPE_SH7264
 
 config SH_RSK7269
 	bool "RSK2+SH7269"
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	depends on CPU_SUBTYPE_SH7269
 
 endchoice
diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index b94df40e5f2d..d755e96c3064 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return tmp;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int res, tmp;							\
+									\
+	__asm__ __volatile__ (						\
+		"   .align 2              \n\t"				\
+		"   mova    1f,   r0      \n\t" /* r0 = end point */	\
+		"   mov    r15,   r1      \n\t" /* r1 = saved sp */	\
+		"   mov    #-6,   r15     \n\t" /* LOGIN: r15 = size */	\
+		"   mov.l  @%2,   %0      \n\t" /* load old value */	\
+		"   mov     %0,   %1      \n\t" /* save old value */	\
+		" " #op "   %3,   %0      \n\t" /* $op */		\
+		"   mov.l   %0,   @%2     \n\t" /* store new value */	\
+		"1: mov     r1,   r15     \n\t" /* LOGOUT */		\
+		: "=&r" (tmp), "=&r" (res), "+r"  (v)			\
+		: "r"   (i)						\
+		: "memory" , "r0", "r1");				\
+									\
+	return res;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h
index 23fcdad5773e..8e2da5fa0178 100644
--- a/arch/sh/include/asm/atomic-irq.h
+++ b/arch/sh/include/asm/atomic-irq.h
@@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long temp, flags;					\
+									\
+	raw_local_irq_save(flags);					\
+	temp = v->counter;						\
+	v->counter c_op i;						\
+	raw_local_irq_restore(flags);					\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_OP_RETURN(op, c_op)					\
+	ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 33d34b16d4d6..caea2c45f6c2 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long res, temp;					\
+									\
+	__asm__ __volatile__ (						\
+"1:	movli.l @%3, %0		! atomic_fetch_" #op "	\n"		\
+"	mov %0, %1					\n"		\
+"	" #op "	%2, %0					\n"		\
+"	movco.l	%0, @%3					\n"		\
+"	bf	1b					\n"		\
+"	synco						\n"		\
+	: "=&z" (temp), "=&z" (res)					\
+	: "r" (i), "r" (&v->counter)					\
+	: "t");								\
+									\
+	return res;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index a33673b3687d..f3f42c84c40f 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -43,7 +43,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 	struct page *page;
 	void *pg;
 
-	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 	if (!pg)
 		return NULL;
 	page = virt_to_page(pg);
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index bdc0f3b6c56a..416834b60ad0 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -19,14 +19,20 @@
 #error "Need movli.l/movco.l for spinlocks"
 #endif
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
 #define arch_spin_is_locked(x)		((x)->lock <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 62f80d2a9df9..025cdb1032f6 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -101,7 +101,7 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
@@ -109,6 +109,24 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 	__tlb_remove_page(tlb, page);
 }
 
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+					  struct page *page, int page_size)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
+					 struct page *page)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline void tlb_remove_page_size(struct mmu_gather *tlb,
+					struct page *page, int page_size)
+{
+	return tlb_remove_page(tlb, page);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
 #define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 79d8276377d1..9bf876780cef 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -487,7 +487,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
 		if (mm_fault_error(regs, error_code, address, fault))
diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c
index 26e03a1f7ca4..a62bd8696779 100644
--- a/arch/sh/mm/pgtable.c
+++ b/arch/sh/mm/pgtable.c
@@ -1,7 +1,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO
 
 static struct kmem_cache *pgd_cachep;
 #if PAGETABLE_LEVELS > 2
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 7dcbebbcaec6..ee3f11c43cda 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -20,9 +20,10 @@
 #define ATOMIC_INIT(i)  { (i) }
 
 int atomic_add_return(int, atomic_t *);
-void atomic_and(int, atomic_t *);
-void atomic_or(int, atomic_t *);
-void atomic_xor(int, atomic_t *);
+int atomic_fetch_add(int, atomic_t *);
+int atomic_fetch_and(int, atomic_t *);
+int atomic_fetch_or(int, atomic_t *);
+int atomic_fetch_xor(int, atomic_t *);
 int atomic_cmpxchg(atomic_t *, int, int);
 int atomic_xchg(atomic_t *, int);
 int __atomic_add_unless(atomic_t *, int, int);
@@ -35,7 +36,13 @@ void atomic_set(atomic_t *, int);
 #define atomic_inc(v)		((void)atomic_add_return(        1, (v)))
 #define atomic_dec(v)		((void)atomic_add_return(       -1, (v)))
 
+#define atomic_and(i, v)	((void)atomic_fetch_and((i), (v)))
+#define atomic_or(i, v)		((void)atomic_fetch_or((i), (v)))
+#define atomic_xor(i, v)	((void)atomic_fetch_xor((i), (v)))
+
 #define atomic_sub_return(i, v)	(atomic_add_return(-(int)(i), (v)))
+#define atomic_fetch_sub(i, v)  (atomic_fetch_add (-(int)(i), (v)))
+
 #define atomic_inc_return(v)	(atomic_add_return(        1, (v)))
 #define atomic_dec_return(v)	(atomic_add_return(       -1, (v)))
 
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index f2fbf9e16faf..24827a3f733a 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *);
 int atomic_##op##_return(int, atomic_t *);				\
 long atomic64_##op##_return(long, atomic64_t *);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+int atomic_fetch_##op(int, atomic_t *);					\
+long atomic64_fetch_##op(long, atomic64_t *);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 5e3187185b4a..3529f1378cd8 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -41,8 +41,7 @@ static inline void __pud_populate(pud_t *pud, pmd_t *pmd)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(pgtable_cache,
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -52,8 +51,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(pgtable_cache,
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index bcc98fc35281..d9c5876c6121 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,12 +9,15 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/psr.h>
+#include <asm/barrier.h>
 #include <asm/processor.h> /* for cpu_relax */
 
 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 968917694978..87990b7c6b0d 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -8,6 +8,9 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
+#include <asm/barrier.h>
+
 /* To get debugging spinlocks which detect and catch
  * deadlock situations, set CONFIG_DEBUG_SPINLOCK
  * and rebuild your kernel.
@@ -23,9 +26,10 @@
 
 #define arch_spin_is_locked(lp)	((lp)->lock != 0)
 
-#define arch_spin_unlock_wait(lp)	\
-	do {	rmb();			\
-	} while((lp)->lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index e22416ce56ea..34a7930b76ef 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -242,7 +242,7 @@ unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
 {
 	int irq;
 
-	irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL);
+	irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL, NULL);
 	if (irq <= 0)
 		goto out;
 
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index b9d63c0a7aab..2c373329d5cb 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -27,39 +27,44 @@ static DEFINE_SPINLOCK(dummy);
 
 #endif /* SMP */
 
-#define ATOMIC_OP_RETURN(op, c_op)					\
-int atomic_##op##_return(int i, atomic_t *v)				\
+#define ATOMIC_FETCH_OP(op, c_op)					\
+int atomic_fetch_##op(int i, atomic_t *v)				\
 {									\
 	int ret;							\
 	unsigned long flags;						\
 	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
 									\
-	ret = (v->counter c_op i);					\
+	ret = v->counter;						\
+	v->counter c_op i;						\
 									\
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
 	return ret;							\
 }									\
-EXPORT_SYMBOL(atomic_##op##_return);
+EXPORT_SYMBOL(atomic_fetch_##op);
 
-#define ATOMIC_OP(op, c_op)						\
-void atomic_##op(int i, atomic_t *v)					\
+#define ATOMIC_OP_RETURN(op, c_op)					\
+int atomic_##op##_return(int i, atomic_t *v)				\
 {									\
+	int ret;							\
 	unsigned long flags;						\
 	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
 									\
-	v->counter c_op i;						\
+	ret = (v->counter c_op i);					\
 									\
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
+	return ret;							\
 }									\
-EXPORT_SYMBOL(atomic_##op);
+EXPORT_SYMBOL(atomic_##op##_return);
 
 ATOMIC_OP_RETURN(add, +=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
+ATOMIC_FETCH_OP(add, +=)
+ATOMIC_FETCH_OP(and, &=)
+ATOMIC_FETCH_OP(or, |=)
+ATOMIC_FETCH_OP(xor, ^=)
+
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
 
 int atomic_xchg(atomic_t *v, int new)
 {
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index d6b0363f345b..a5c5a0279ccc 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -9,10 +9,11 @@
 
 	.text
 
-	/* Two versions of the atomic routines, one that
+	/* Three versions of the atomic routines, one that
 	 * does not return a value and does not perform
-	 * memory barriers, and a second which returns
-	 * a value and does the barriers.
+	 * memory barriers, and a two which return
+	 * a value, the new and old value resp. and does the
+	 * barriers.
 	 */
 
 #define ATOMIC_OP(op)							\
@@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
 ENDPROC(atomic_##op##_return);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
+	BACKOFF_SETUP(%o2);						\
+1:	lduw	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	cas	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 sra	%g1, 0, %o0;						\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
 ENDPROC(atomic64_##op##_return);
 
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op)
+#define ATOMIC64_FETCH_OP(op)						\
+ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
+	BACKOFF_SETUP(%o2);						\
+1:	ldx	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	casx	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 mov	%g1, %o0;						\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic64_fetch_##op);
+
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 8eb454cfe05c..de5e97817bdb 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op);
 EXPORT_SYMBOL(atomic_##op##_return);					\
 EXPORT_SYMBOL(atomic64_##op##_return);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+EXPORT_SYMBOL(atomic_fetch_##op);					\
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index b6c559cbd64d..4714061d6cd3 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -241,7 +241,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
@@ -411,7 +411,7 @@ good_area:
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
-	switch (handle_mm_fault(mm, vma, address, flags)) {
+	switch (handle_mm_fault(vma, address, flags)) {
 	case VM_FAULT_SIGBUS:
 	case VM_FAULT_OOM:
 		goto do_sigbus;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index cb841a33da59..6c43b924a7a2 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -436,7 +436,7 @@ good_area:
 			goto bad_area;
 	}
 
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		goto exit_exception;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 14bb0d5ed3c6..aec508e37490 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2704,8 +2704,7 @@ void __flush_tlb_all(void)
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 			    unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	pte_t *pte = NULL;
 
 	if (page)
@@ -2717,8 +2716,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 pgtable_t pte_alloc_one(struct mm_struct *mm,
 			unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 9fc0107a9c5e..8dda3c8ff5ab 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -46,6 +46,8 @@ static inline int atomic_read(const atomic_t *v)
  */
 #define atomic_sub_return(i, v)		atomic_add_return((int)(-(i)), (v))
 
+#define atomic_fetch_sub(i, v)		atomic_fetch_add(-(int)(i), (v))
+
 /**
  * atomic_sub - subtract integer from atomic variable
  * @i: integer value to subtract
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index d320ce253d86..a93774255136 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v)
 	_atomic_xchg_add(&v->counter, i);
 }
 
-#define ATOMIC_OP(op)							\
-unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \
+#define ATOMIC_OPS(op)							\
+unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
-	_atomic_##op((unsigned long *)&v->counter, i);			\
+	_atomic_fetch_##op((unsigned long *)&v->counter, i);		\
+}									\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	smp_mb();							\
+	return _atomic_fetch_##op((unsigned long *)&v->counter, i);	\
 }
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	smp_mb();
+	return _atomic_xchg_add(&v->counter, i);
+}
 
 /**
  * atomic_add_return - add integer and return
@@ -126,16 +137,29 @@ static inline void atomic64_add(long long i, atomic64_t *v)
 	_atomic64_xchg_add(&v->counter, i);
 }
 
-#define ATOMIC64_OP(op)						\
-long long _atomic64_##op(long long *v, long long n);		\
+#define ATOMIC64_OPS(op)					\
+long long _atomic64_fetch_##op(long long *v, long long n);	\
 static inline void atomic64_##op(long long i, atomic64_t *v)	\
 {								\
-	_atomic64_##op(&v->counter, i);				\
+	_atomic64_fetch_##op(&v->counter, i);			\
+}								\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
+{								\
+	smp_mb();						\
+	return _atomic64_fetch_##op(&v->counter, i);		\
 }
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+	smp_mb();
+	return _atomic64_xchg_add(&v->counter, i);
+}
 
 /**
  * atomic64_add_return - add integer and return
@@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n)
 #define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
 #define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
 #define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
 #define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_sub(i, v)		atomic64_add(-(i), (v))
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
@@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n)
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
-
 #endif /* !__ASSEMBLY__ */
 
 /*
@@ -242,16 +266,16 @@ struct __get_user {
 	unsigned long val;
 	int err;
 };
-extern struct __get_user __atomic_cmpxchg(volatile int *p,
+extern struct __get_user __atomic32_cmpxchg(volatile int *p,
 					  int *lock, int o, int n);
-extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add_unless(volatile int *p,
 						  int *lock, int o, int n);
-extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_and(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n);
 extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
 					long long o, long long n);
 extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
@@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
 					long long n);
 extern long long __atomic64_xchg_add_unless(volatile long long *p,
 					int *lock, long long o, long long n);
-extern long long __atomic64_and(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_or(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_xor(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n);
 
 /* Return failure from the atomic wrappers. */
 struct __get_user __atomic_bad_address(int __user *addr);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index b0531a623653..4cefa0c9fd81 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -32,11 +32,6 @@
  * on any routine which updates memory and returns a value.
  */
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-	__insn_fetchadd4((void *)&v->counter, i);
-}
-
 /*
  * Note a subtlety of the locking here.  We are required to provide a
  * full memory barrier before and after the operation.  However, we
@@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	return val;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#define ATOMIC_OPS(op)							\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int val;							\
+	smp_mb();							\
+	val = __insn_fetch##op##4((void *)&v->counter, i);		\
+	smp_mb();							\
+	return val;							\
+}									\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	__insn_fetch##op##4((void *)&v->counter, i);			\
+}
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
 {
 	int guess, oldval = v->counter;
+	smp_mb();
 	do {
-		if (oldval == u)
-			break;
 		guess = oldval;
-		oldval = cmpxchg(&v->counter, guess, guess + a);
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch4(&v->counter, guess ^ i);
 	} while (guess != oldval);
+	smp_mb();
 	return oldval;
 }
 
-static inline void atomic_and(int i, atomic_t *v)
-{
-	__insn_fetchand4((void *)&v->counter, i);
-}
-
-static inline void atomic_or(int i, atomic_t *v)
-{
-	__insn_fetchor4((void *)&v->counter, i);
-}
-
 static inline void atomic_xor(int i, atomic_t *v)
 {
 	int guess, oldval = v->counter;
@@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v)
 	} while (guess != oldval);
 }
 
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int guess, oldval = v->counter;
+	do {
+		if (oldval == u)
+			break;
+		guess = oldval;
+		oldval = cmpxchg(&v->counter, guess, guess + a);
+	} while (guess != oldval);
+	return oldval;
+}
+
 /* Now the true 64-bit operations. */
 
 #define ATOMIC64_INIT(i)	{ (i) }
@@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v)
 #define atomic64_read(v)	READ_ONCE((v)->counter)
 #define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
-static inline void atomic64_add(long i, atomic64_t *v)
-{
-	__insn_fetchadd((void *)&v->counter, i);
-}
-
 static inline long atomic64_add_return(long i, atomic64_t *v)
 {
 	int val;
@@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	return val;
 }
 
-static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+#define ATOMIC64_OPS(op)						\
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
+{									\
+	long val;							\
+	smp_mb();							\
+	val = __insn_fetch##op((void *)&v->counter, i);			\
+	smp_mb();							\
+	return val;							\
+}									\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	__insn_fetch##op((void *)&v->counter, i);			\
+}
+
+ATOMIC64_OPS(add)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+
+#undef ATOMIC64_OPS
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
 {
 	long guess, oldval = v->counter;
+	smp_mb();
 	do {
-		if (oldval == u)
-			break;
 		guess = oldval;
-		oldval = cmpxchg(&v->counter, guess, guess + a);
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch(&v->counter, guess ^ i);
 	} while (guess != oldval);
-	return oldval != u;
-}
-
-static inline void atomic64_and(long i, atomic64_t *v)
-{
-	__insn_fetchand((void *)&v->counter, i);
-}
-
-static inline void atomic64_or(long i, atomic64_t *v)
-{
-	__insn_fetchor((void *)&v->counter, i);
+	smp_mb();
+	return oldval;
 }
 
 static inline void atomic64_xor(long i, atomic64_t *v)
@@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v)
 	} while (guess != oldval);
 }
 
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+	long guess, oldval = v->counter;
+	do {
+		if (oldval == u)
+			break;
+		guess = oldval;
+		oldval = cmpxchg(&v->counter, guess, guess + a);
+	} while (guess != oldval);
+	return oldval != u;
+}
+
 #define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
 #define atomic64_sub(i, v)		atomic64_add(-(i), (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index d55222806c2f..4c419ab95ab7 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -87,6 +87,13 @@ mb_incoherent(void)
 #define __smp_mb__after_atomic()	__smp_mb()
 #endif
 
+/*
+ * The TILE architecture does not do speculative reads; this ensures
+ * that a control dependency also orders against loads and already provides
+ * a LOAD->{LOAD,STORE} order and can forgo the additional RMB.
+ */
+#define smp_acquire__after_ctrl_dep()	barrier()
+
 #include <asm-generic/barrier.h>
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index bbf7b666f21d..d1406a95f6b7 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -19,9 +19,9 @@
 #include <asm/barrier.h>
 
 /* Tile-specific routines to support <asm/bitops.h>. */
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask);
 
 /**
  * set_bit - Atomically set a bit in memory
@@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
  */
 static inline void set_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
  */
 static inline void change_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_or(addr, mask) & mask) != 0;
+	return (_atomic_fetch_or(addr, mask) & mask) != 0;
 }
 
 /**
@@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_andn(addr, mask) & mask) != 0;
+	return (_atomic_fetch_andn(addr, mask) & mask) != 0;
 }
 
 /**
@@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr,
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_xor(addr, mask) & mask) != 0;
+	return (_atomic_fetch_xor(addr, mask) & mask) != 0;
 }
 
 #include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index c505d77e4d06..e9d54a06736f 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -129,6 +129,7 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int executable_stack);
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
 #define ARCH_DLINFO \
 do { \
 	NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index 1a6ef1b69cb1..e64a1b75fc38 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -80,16 +80,16 @@
 		ret = gu.err;						\
 	}
 
-#define __futex_set() __futex_call(__atomic_xchg)
-#define __futex_add() __futex_call(__atomic_xchg_add)
-#define __futex_or() __futex_call(__atomic_or)
-#define __futex_andn() __futex_call(__atomic_andn)
-#define __futex_xor() __futex_call(__atomic_xor)
+#define __futex_set() __futex_call(__atomic32_xchg)
+#define __futex_add() __futex_call(__atomic32_xchg_add)
+#define __futex_or() __futex_call(__atomic32_fetch_or)
+#define __futex_andn() __futex_call(__atomic32_fetch_andn)
+#define __futex_xor() __futex_call(__atomic32_fetch_xor)
 
 #define __futex_cmpxchg()						\
 	{								\
-		struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
-							lock, oldval, oparg); \
+		struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \
+							  lock, oldval, oparg); \
 		val = gu.val;						\
 		ret = gu.err;						\
 	}
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
index e98909033e5b..2a0347af0702 100644
--- a/arch/tile/include/asm/setup.h
+++ b/arch/tile/include/asm/setup.h
@@ -25,7 +25,12 @@
 #define MAXMEM_PFN	PFN_DOWN(MAXMEM)
 
 int tile_console_write(const char *buf, int count);
+
+#ifdef CONFIG_EARLY_PRINTK
 void early_panic(const char *fmt, ...);
+#else
+#define early_panic panic
+#endif
 
 /* Init-time routine to do tile-specific per-cpu setup. */
 void setup_cpu(int boot);
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 4b7cef9e94e0..c1467ac59ce6 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -78,7 +78,7 @@ struct thread_info {
 
 #ifndef __ASSEMBLY__
 
-void arch_release_thread_info(struct thread_info *info);
+void arch_release_thread_stack(unsigned long *stack);
 
 /* How to get the thread information struct from C. */
 register unsigned long stack_pointer __asm__("sp");
diff --git a/arch/tile/include/uapi/asm/auxvec.h b/arch/tile/include/uapi/asm/auxvec.h
index c93e92709f14..f497123ed980 100644
--- a/arch/tile/include/uapi/asm/auxvec.h
+++ b/arch/tile/include/uapi/asm/auxvec.h
@@ -18,4 +18,6 @@
 /* The vDSO location. */
 #define AT_SYSINFO_EHDR         33
 
+#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
+
 #endif /* _ASM_TILE_AUXVEC_H */
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index 49120843ff96..bdaf71d31a4a 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -23,42 +23,50 @@
 #include <linux/uaccess.h>
 #include <linux/signal.h>
 #include <asm/syscalls.h>
+#include <asm/byteorder.h>
 
 /*
  * Syscalls that take 64-bit numbers traditionally take them in 32-bit
  * "high" and "low" value parts on 32-bit architectures.
  * In principle, one could imagine passing some register arguments as
  * fully 64-bit on TILE-Gx in 32-bit mode, but it seems easier to
- * adapt the usual convention.
+ * adopt the usual convention.
  */
 
+#ifdef __BIG_ENDIAN
+#define SYSCALL_PAIR(name) u32, name ## _hi, u32, name ## _lo
+#else
+#define SYSCALL_PAIR(name) u32, name ## _lo, u32, name ## _hi
+#endif
+
 COMPAT_SYSCALL_DEFINE4(truncate64, char __user *, filename, u32, dummy,
-                       u32, low, u32, high)
+		       SYSCALL_PAIR(length))
 {
-	return sys_truncate(filename, ((loff_t)high << 32) | low);
+	return sys_truncate(filename, ((loff_t)length_hi << 32) | length_lo);
 }
 
 COMPAT_SYSCALL_DEFINE4(ftruncate64, unsigned int, fd, u32, dummy,
-                       u32, low, u32, high)
+		       SYSCALL_PAIR(length))
 {
-	return sys_ftruncate(fd, ((loff_t)high << 32) | low);
+	return sys_ftruncate(fd, ((loff_t)length_hi << 32) | length_lo);
 }
 
 COMPAT_SYSCALL_DEFINE6(pread64, unsigned int, fd, char __user *, ubuf,
-                       size_t, count, u32, dummy, u32, low, u32, high)
+		       size_t, count, u32, dummy, SYSCALL_PAIR(offset))
 {
-	return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low);
+	return sys_pread64(fd, ubuf, count,
+			   ((loff_t)offset_hi << 32) | offset_lo);
 }
 
 COMPAT_SYSCALL_DEFINE6(pwrite64, unsigned int, fd, char __user *, ubuf,
-                       size_t, count, u32, dummy, u32, low, u32, high)
+		       size_t, count, u32, dummy, SYSCALL_PAIR(offset))
 {
-	return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low);
+	return sys_pwrite64(fd, ubuf, count,
+			    ((loff_t)offset_hi << 32) | offset_lo);
 }
 
 COMPAT_SYSCALL_DEFINE6(sync_file_range2, int, fd, unsigned int, flags,
-                       u32, offset_lo, u32, offset_hi,
-                       u32, nbytes_lo, u32, nbytes_hi)
+		       SYSCALL_PAIR(offset), SYSCALL_PAIR(nbytes))
 {
 	return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo,
 				   ((loff_t)nbytes_hi << 32) | nbytes_lo,
@@ -66,8 +74,7 @@ COMPAT_SYSCALL_DEFINE6(sync_file_range2, int, fd, unsigned int, flags,
 }
 
 COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode,
-                       u32, offset_lo, u32, offset_hi,
-                       u32, len_lo, u32, len_hi)
+		       SYSCALL_PAIR(offset), SYSCALL_PAIR(len))
 {
 	return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo,
 			     ((loff_t)len_hi << 32) | len_lo);
@@ -77,6 +84,8 @@ COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode,
  * Avoid bug in generic sys_llseek() that specifies offset_high and
  * offset_low as "unsigned long", thus making it possible to pass
  * a sign-extended high 32 bits in offset_low.
+ * Note that we do not use SYSCALL_PAIR here since glibc passes the
+ * high and low parts explicitly in that order.
  */
 COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high,
 		       unsigned int, offset_low, loff_t __user *, result,
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 6b705ccc9cc1..a465d8372edd 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -73,8 +73,9 @@ void arch_cpu_idle(void)
 /*
  * Release a thread_info structure
  */
-void arch_release_thread_info(struct thread_info *info)
+void arch_release_thread_stack(unsigned long *stack)
 {
+	struct thread_info *info = (void *)stack;
 	struct single_step_state *step_state = info->step_state;
 
 	if (step_state) {
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
index 38debe706061..c7418dcbbb08 100644
--- a/arch/tile/kernel/sys.c
+++ b/arch/tile/kernel/sys.c
@@ -33,6 +33,7 @@
 #include <asm/pgtable.h>
 #include <asm/homecache.h>
 #include <asm/cachectl.h>
+#include <asm/byteorder.h>
 #include <arch/chip.h>
 
 SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len,
@@ -59,13 +60,19 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len,
 
 #if !defined(__tilegx__) || defined(CONFIG_COMPAT)
 
-ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count)
+#ifdef __BIG_ENDIAN
+#define SYSCALL_PAIR(name) u32 name ## _hi, u32 name ## _lo
+#else
+#define SYSCALL_PAIR(name) u32 name ## _lo, u32 name ## _hi
+#endif
+
+ssize_t sys32_readahead(int fd, SYSCALL_PAIR(offset), u32 count)
 {
 	return sys_readahead(fd, ((loff_t)offset_hi << 32) | offset_lo, count);
 }
 
-int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
-		       u32 len_lo, u32 len_hi, int advice)
+int sys32_fadvise64_64(int fd, SYSCALL_PAIR(offset),
+		       SYSCALL_PAIR(len), int advice)
 {
 	return sys_fadvise64_64(fd, ((loff_t)offset_hi << 32) | offset_lo,
 				((loff_t)len_hi << 32) | len_lo, advice);
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 298df1e9912a..f8128800dbf5 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v)
 
 int _atomic_xchg(int *v, int n)
 {
-	return __atomic_xchg(v, __atomic_setup(v), n).val;
+	return __atomic32_xchg(v, __atomic_setup(v), n).val;
 }
 EXPORT_SYMBOL(_atomic_xchg);
 
 int _atomic_xchg_add(int *v, int i)
 {
-	return __atomic_xchg_add(v, __atomic_setup(v), i).val;
+	return __atomic32_xchg_add(v, __atomic_setup(v), i).val;
 }
 EXPORT_SYMBOL(_atomic_xchg_add);
 
@@ -78,39 +78,39 @@ int _atomic_xchg_add_unless(int *v, int a, int u)
 	 * to use the first argument consistently as the "old value"
 	 * in the assembly, as is done for _atomic_cmpxchg().
 	 */
-	return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val;
+	return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val;
 }
 EXPORT_SYMBOL(_atomic_xchg_add_unless);
 
 int _atomic_cmpxchg(int *v, int o, int n)
 {
-	return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val;
+	return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val;
 }
 EXPORT_SYMBOL(_atomic_cmpxchg);
 
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_or);
+EXPORT_SYMBOL(_atomic_fetch_or);
 
-unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_and((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_and);
+EXPORT_SYMBOL(_atomic_fetch_and);
 
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_andn);
+EXPORT_SYMBOL(_atomic_fetch_andn);
 
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_xor);
+EXPORT_SYMBOL(_atomic_fetch_xor);
 
 
 long long _atomic64_xchg(long long *v, long long n)
@@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n)
 }
 EXPORT_SYMBOL(_atomic64_cmpxchg);
 
-long long _atomic64_and(long long *v, long long n)
+long long _atomic64_fetch_and(long long *v, long long n)
 {
-	return __atomic64_and(v, __atomic_setup(v), n);
+	return __atomic64_fetch_and(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_and);
+EXPORT_SYMBOL(_atomic64_fetch_and);
 
-long long _atomic64_or(long long *v, long long n)
+long long _atomic64_fetch_or(long long *v, long long n)
 {
-	return __atomic64_or(v, __atomic_setup(v), n);
+	return __atomic64_fetch_or(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_or);
+EXPORT_SYMBOL(_atomic64_fetch_or);
 
-long long _atomic64_xor(long long *v, long long n)
+long long _atomic64_fetch_xor(long long *v, long long n)
 {
-	return __atomic64_xor(v, __atomic_setup(v), n);
+	return __atomic64_fetch_xor(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_xor);
+EXPORT_SYMBOL(_atomic64_fetch_xor);
 
 /*
  * If any of the atomic or futex routines hit a bad address (not in
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index f611265633d6..1a70e6c0f259 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic)
 	.endif
 	.endm
 
-atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
-atomic_op _xchg, 32, "move r24, r2"
-atomic_op _xchg_add, 32, "add r24, r22, r2"
-atomic_op _xchg_add_unless, 32, \
+
+/*
+ * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
+ */
+
+atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op 32_xchg, 32, "move r24, r2"
+atomic_op 32_xchg_add, 32, "add r24, r22, r2"
+atomic_op 32_xchg_add_unless, 32, \
 	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op _or, 32, "or r24, r22, r2"
-atomic_op _and, 32, "and r24, r22, r2"
-atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op _xor, 32, "xor r24, r22, r2"
+atomic_op 32_fetch_or, 32, "or r24, r22, r2"
+atomic_op 32_fetch_and, 32, "and r24, r22, r2"
+atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
 
 atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
 	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
@@ -192,9 +197,9 @@ atomic_op 64_xchg_add_unless, 64, \
 	{ bbns r26, 3f; add r24, r22, r4 }; \
 	{ bbns r27, 3f; add r25, r23, r5 }; \
 	slt_u r26, r24, r22; add r25, r25, r26"
-atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
-atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
-atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
+atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
+atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
+atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
 
 	jrp     lr              /* happy backtracer */
 
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 9d171ca4302c..c5369fe643c7 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -77,7 +77,11 @@ uint64_t __umoddi3(uint64_t dividend, uint64_t divisor);
 EXPORT_SYMBOL(__umoddi3);
 int64_t __moddi3(int64_t dividend, int64_t divisor);
 EXPORT_SYMBOL(__moddi3);
-#ifndef __tilegx__
+#ifdef __tilegx__
+typedef int TItype __attribute__((mode(TI)));
+TItype __multi3(TItype a, TItype b);
+EXPORT_SYMBOL(__multi3);  /* required for gcc 7 and later */
+#else
 int64_t __muldi3(int64_t, int64_t);
 EXPORT_SYMBOL(__muldi3);
 uint64_t __lshrdi3(uint64_t, unsigned int);
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 88c2a53362e7..076c6cc43113 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 	do {
 		delay_backoff(iterations++);
 	} while (READ_ONCE(lock->current_ticket) == curr);
+
+	/*
+	 * The TILE architecture doesn't do read speculation; therefore
+	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+	 */
+	barrier();
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index c8d1f94ff1fe..a4b5b2cbce93 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 	do {
 		delay_backoff(iterations++);
 	} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
+
+	/*
+	 * The TILE architecture doesn't do read speculation; therefore
+	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+	 */
+	barrier();
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 26734214818c..beba986589e5 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -434,7 +434,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return 0;
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 7bf2491a9c1f..c4d5bf841a7f 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -231,7 +231,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
 			       int order)
 {
-	gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
+	gfp_t flags = GFP_KERNEL|__GFP_ZERO;
 	struct page *p;
 	int i;
 
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 17e96dc29596..f3540270d096 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -801,6 +801,7 @@ static void ubd_device_release(struct device *dev)
 static int ubd_disk_register(int major, u64 size, int unit,
 			     struct gendisk **disk_out)
 {
+	struct device *parent = NULL;
 	struct gendisk *disk;
 
 	disk = alloc_disk(1 << UBD_SHIFT);
@@ -823,12 +824,12 @@ static int ubd_disk_register(int major, u64 size, int unit,
 		ubd_devs[unit].pdev.dev.release = ubd_device_release;
 		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 		platform_device_register(&ubd_devs[unit].pdev);
-		disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
+		parent = &ubd_devs[unit].pdev.dev;
 	}
 
 	disk->private_data = &ubd_devs[unit];
 	disk->queue = ubd_devs[unit].queue;
-	add_disk(disk);
+	device_add_disk(parent, disk);
 
 	*disk_out = disk;
 	return 0;
@@ -1286,7 +1287,7 @@ static void do_ubd_request(struct request_queue *q)
 
 		req = dev->request;
 
-		if (req->cmd_flags & REQ_FLUSH) {
+		if (req_op(req) == REQ_OP_FLUSH) {
 			io_req = kmalloc(sizeof(struct io_thread_req),
 					 GFP_ATOMIC);
 			if (io_req == NULL) {
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 16eb63fac57d..821ff0acfe17 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -102,7 +102,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
+	return false; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
@@ -110,6 +110,24 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 	__tlb_remove_page(tlb, page);
 }
 
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+					  struct page *page, int page_size)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
+					 struct page *page)
+{
+	return __tlb_remove_page(tlb, page);
+}
+
+static inline void tlb_remove_page_size(struct mmu_gather *tlb,
+					struct page *page, int page_size)
+{
+	return tlb_remove_page(tlb, page);
+}
+
 /**
  * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
  *
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index b2a2dff50b4e..e7437ec62710 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -204,7 +204,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
@@ -212,7 +212,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
 
-	pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 98783dd0fa2e..ad8f206ab5e8 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -73,7 +73,7 @@ good_area:
 	do {
 		int fault;
 
-		fault = handle_mm_fault(mm, vma, address, flags);
+		fault = handle_mm_fault(vma, address, flags);
 
 		if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 			goto out_nosemaphore;
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index e5602ee9c610..0769066929c6 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -80,7 +80,7 @@ config ARCH_PUV3
 	select CPU_UCV2
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 
 # CONFIGs for ARCH_PUV3
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index 2e02d1356fdf..26775793c204 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
 #define pgd_alloc(mm)			get_pgd_slow(mm)
 #define pgd_free(mm, pgd)		free_pgd_slow(mm, pgd)
 
-#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 /*
  * Allocate one PTE table.
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index 2ec3d3adcefc..6c7f70bcaae3 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -194,7 +194,7 @@ good_area:
 	 * If for any reason at all we couldn't handle the fault, make
 	 * sure we exit gracefully rather than endlessly redo the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, flags);
+	fault = handle_mm_fault(vma, addr & PAGE_MASK, flags);
 	return fault;
 
 check_stack:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d9a94da0c29f..2fa55851d2a9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86
 	select ANON_INODES
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_DISCARD_MEMBLOCK
+	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
@@ -49,7 +50,6 @@ config X86
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION	if X86_32
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select BUILDTIME_EXTABLE_SORT
 	select CLKEVT_I8253
 	select CLKSRC_I8253			if X86_32
@@ -294,11 +294,6 @@ config X86_32_LAZY_GS
 	def_bool y
 	depends on X86_32 && !CC_STACKPROTECTOR
 
-config ARCH_HWEIGHT_CFLAGS
-	string
-	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
-	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-
 config ARCH_SUPPORTS_UPROBES
 	def_bool y
 
@@ -643,7 +638,7 @@ config STA2X11
 	select X86_DMA_REMAP
 	select SWIOTLB
 	select MFD_STA2X11
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	default n
 	---help---
 	  This adds support for boards based on the STA2X11 IO-Hub,
@@ -1934,21 +1929,26 @@ config RANDOMIZE_BASE
 	  attempts relying on knowledge of the location of kernel
 	  code internals.
 
-	  The kernel physical and virtual address can be randomized
-	  from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that
-	  using RANDOMIZE_BASE reduces the memory space available to
-	  kernel modules from 1.5GB to 1GB.)
+	  On 64-bit, the kernel physical and virtual addresses are
+	  randomized separately. The physical address will be anywhere
+	  between 16MB and the top of physical memory (up to 64TB). The
+	  virtual address will be randomized from 16MB up to 1GB (9 bits
+	  of entropy). Note that this also reduces the memory space
+	  available to kernel modules from 1.5GB to 1GB.
+
+	  On 32-bit, the kernel physical and virtual addresses are
+	  randomized together. They will be randomized from 16MB up to
+	  512MB (8 bits of entropy).
 
 	  Entropy is generated using the RDRAND instruction if it is
 	  supported. If RDTSC is supported, its value is mixed into
 	  the entropy pool as well. If neither RDRAND nor RDTSC are
-	  supported, then entropy is read from the i8254 timer.
-
-	  Since the kernel is built using 2GB addressing, and
-	  PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of
-	  entropy is theoretically possible. Currently, with the
-	  default value for PHYSICAL_ALIGN and due to page table
-	  layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits.
+	  supported, then entropy is read from the i8254 timer. The
+	  usable entropy is limited by the kernel being built using
+	  2GB addressing, and that PHYSICAL_ALIGN must be at a
+	  minimum of 2MB. As a result, only 10 bits of entropy are
+	  theoretically possible, but the implementations are further
+	  limited due to memory layouts.
 
 	  If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
 	  time. To enable it, boot with "kaslr" on the kernel command
@@ -1988,6 +1988,38 @@ config PHYSICAL_ALIGN
 
 	  Don't change this unless you know what you are doing.
 
+config RANDOMIZE_MEMORY
+	bool "Randomize the kernel memory sections"
+	depends on X86_64
+	depends on RANDOMIZE_BASE
+	default RANDOMIZE_BASE
+	---help---
+	   Randomizes the base virtual address of kernel memory sections
+	   (physical memory mapping, vmalloc & vmemmap). This security feature
+	   makes exploits relying on predictable memory locations less reliable.
+
+	   The order of allocations remains unchanged. Entropy is generated in
+	   the same way as RANDOMIZE_BASE. Current implementation in the optimal
+	   configuration have in average 30,000 different possible virtual
+	   addresses for each memory section.
+
+	   If unsure, say N.
+
+config RANDOMIZE_MEMORY_PHYSICAL_PADDING
+	hex "Physical memory mapping padding" if EXPERT
+	depends on RANDOMIZE_MEMORY
+	default "0xa" if MEMORY_HOTPLUG
+	default "0x0"
+	range 0x1 0x40 if MEMORY_HOTPLUG
+	range 0x0 0x40
+	---help---
+	   Define the padding in terabytes added to the existing physical
+	   memory size during kernel memory randomization. It is useful
+	   for memory hotplug support but reduces the entropy available for
+	   address randomization.
+
+	   If unsure, leave at the default value.
+
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 6fce7f096b88..830ed391e7ef 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -126,14 +126,6 @@ else
         KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
 endif
 
-# Make sure compiler does not have buggy stack-protector support.
-ifdef CONFIG_CC_STACKPROTECTOR
-	cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
-        ifneq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
-                $(warning stack-protector enabled but compiler support broken)
-        endif
-endif
-
 ifdef CONFIG_X86_X32
 	x32_ld_ok := $(call try-run,\
 			/bin/echo -e '1: .quad 1b' | \
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 878e4b9940d9..0d41d68131cc 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -16,14 +16,16 @@
 #define BOOT_BITOPS_H
 #define _LINUX_BITOPS_H		/* Inhibit inclusion of <linux/bitops.h> */
 
-static inline int constant_test_bit(int nr, const void *addr)
+#include <linux/types.h>
+
+static inline bool constant_test_bit(int nr, const void *addr)
 {
 	const u32 *p = (const u32 *)addr;
 	return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
 }
-static inline int variable_test_bit(int nr, const void *addr)
+static inline bool variable_test_bit(int nr, const void *addr)
 {
-	u8 v;
+	bool v;
 	const u32 *p = (const u32 *)addr;
 
 	asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 9011a88353de..e5612f3e3b57 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -24,6 +24,7 @@
 #include <linux/types.h>
 #include <linux/edd.h>
 #include <asm/setup.h>
+#include <asm/asm.h>
 #include "bitops.h"
 #include "ctype.h"
 #include "cpuflags.h"
@@ -176,18 +177,18 @@ static inline void wrgs32(u32 v, addr_t addr)
 }
 
 /* Note: these only return true/false, not a signed return value! */
-static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
+static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
 {
-	u8 diff;
-	asm volatile("fs; repe; cmpsb; setnz %0"
-		     : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+	bool diff;
+	asm volatile("fs; repe; cmpsb" CC_SET(nz)
+		     : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
 	return diff;
 }
-static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
+static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
 {
-	u8 diff;
-	asm volatile("gs; repe; cmpsb; setnz %0"
-		     : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+	bool diff;
+	asm volatile("gs; repe; cmpsb" CC_SET(nz)
+		     : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
 	return diff;
 }
 
@@ -294,6 +295,7 @@ static inline int cmdline_find_option_bool(const char *option)
 
 /* cpu.c, cpucheck.c */
 int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
+int check_knl_erratum(void);
 int validate_cpu(void);
 
 /* early_serial_console.c */
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f1356889204e..536ccfcc01c6 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -85,7 +85,25 @@ vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
 	$(objtree)/drivers/firmware/efi/libstub/lib.a
 vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 
+# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
+# can place it anywhere in memory and it will still run. However, since
+# it is executed as-is without any ELF relocation processing performed
+# (and has already had all relocation sections stripped from the binary),
+# none of the code can use data relocations (e.g. static assignments of
+# pointer values), since they will be meaningless at runtime. This check
+# will refuse to link the vmlinux if any of these relocations are found.
+quiet_cmd_check_data_rel = DATAREL $@
+define cmd_check_data_rel
+	for obj in $(filter %.o,$^); do \
+		readelf -S $$obj | grep -qF .rel.local && { \
+			echo "error: $$obj has data relocations!" >&2; \
+			exit 1; \
+		} || true; \
+	done
+endef
+
 $(obj)/vmlinux: $(vmlinux-objs-y) FORCE
+	$(call if_changed,check_data_rel)
 	$(call if_changed,ld)
 
 OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 52fef606bc54..ff574dad95cc 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -757,7 +757,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	struct boot_params *boot_params;
 	struct apm_bios_info *bi;
 	struct setup_header *hdr;
-	struct efi_info *efi;
 	efi_loaded_image_t *image;
 	void *options, *handle;
 	efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
@@ -800,7 +799,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	memset(boot_params, 0x0, 0x4000);
 
 	hdr = &boot_params->hdr;
-	efi = &boot_params->efi_info;
 	bi = &boot_params->apm_bios_info;
 
 	/* Copy the second sector to boot_params */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index cfeb0259ed81..a66854d99ee1 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -12,10 +12,6 @@
 #include "misc.h"
 #include "error.h"
 
-#include <asm/msr.h>
-#include <asm/archrandom.h>
-#include <asm/e820.h>
-
 #include <generated/compile.h>
 #include <linux/module.h>
 #include <linux/uts.h>
@@ -26,26 +22,6 @@
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
 
-#define I8254_PORT_CONTROL	0x43
-#define I8254_PORT_COUNTER0	0x40
-#define I8254_CMD_READBACK	0xC0
-#define I8254_SELECT_COUNTER0	0x02
-#define I8254_STATUS_NOTREADY	0x40
-static inline u16 i8254(void)
-{
-	u16 status, timer;
-
-	do {
-		outb(I8254_PORT_CONTROL,
-		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
-		status = inb(I8254_PORT_COUNTER0);
-		timer  = inb(I8254_PORT_COUNTER0);
-		timer |= inb(I8254_PORT_COUNTER0) << 8;
-	} while (status & I8254_STATUS_NOTREADY);
-
-	return timer;
-}
-
 static unsigned long rotate_xor(unsigned long hash, const void *area,
 				size_t size)
 {
@@ -62,7 +38,7 @@ static unsigned long rotate_xor(unsigned long hash, const void *area,
 }
 
 /* Attempt to create a simple but unpredictable starting entropy. */
-static unsigned long get_random_boot(void)
+static unsigned long get_boot_seed(void)
 {
 	unsigned long hash = 0;
 
@@ -72,50 +48,8 @@ static unsigned long get_random_boot(void)
 	return hash;
 }
 
-static unsigned long get_random_long(const char *purpose)
-{
-#ifdef CONFIG_X86_64
-	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
-#else
-	const unsigned long mix_const = 0x3f39e593UL;
-#endif
-	unsigned long raw, random = get_random_boot();
-	bool use_i8254 = true;
-
-	debug_putstr(purpose);
-	debug_putstr(" KASLR using");
-
-	if (has_cpuflag(X86_FEATURE_RDRAND)) {
-		debug_putstr(" RDRAND");
-		if (rdrand_long(&raw)) {
-			random ^= raw;
-			use_i8254 = false;
-		}
-	}
-
-	if (has_cpuflag(X86_FEATURE_TSC)) {
-		debug_putstr(" RDTSC");
-		raw = rdtsc();
-
-		random ^= raw;
-		use_i8254 = false;
-	}
-
-	if (use_i8254) {
-		debug_putstr(" i8254");
-		random ^= i8254();
-	}
-
-	/* Circular multiply for better bit diffusion */
-	asm("mul %3"
-	    : "=a" (random), "=d" (raw)
-	    : "a" (random), "rm" (mix_const));
-	random += raw;
-
-	debug_putstr("...\n");
-
-	return random;
-}
+#define KASLR_COMPRESSED_BOOT
+#include "../../lib/kaslr.c"
 
 struct mem_vector {
 	unsigned long start;
@@ -132,17 +66,6 @@ enum mem_avoid_index {
 
 static struct mem_vector mem_avoid[MEM_AVOID_MAX];
 
-static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
-{
-	/* Item at least partially before region. */
-	if (item->start < region->start)
-		return false;
-	/* Item at least partially after region. */
-	if (item->start + item->size > region->start + region->size)
-		return false;
-	return true;
-}
-
 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
 {
 	/* Item one is entirely before item two. */
@@ -296,6 +219,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
 		if (mem_overlaps(img, &mem_avoid[i]) &&
 		    mem_avoid[i].start < earliest) {
 			*overlap = mem_avoid[i];
+			earliest = overlap->start;
 			is_overlapping = true;
 		}
 	}
@@ -310,6 +234,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
 
 		if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
 			*overlap = avoid;
+			earliest = overlap->start;
 			is_overlapping = true;
 		}
 
@@ -319,8 +244,6 @@ static bool mem_avoid_overlap(struct mem_vector *img,
 	return is_overlapping;
 }
 
-static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN];
-
 struct slot_area {
 	unsigned long addr;
 	int num;
@@ -351,36 +274,44 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
 	}
 }
 
-static void slots_append(unsigned long addr)
-{
-	/* Overflowing the slots list should be impossible. */
-	if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN)
-		return;
-
-	slots[slot_max++] = addr;
-}
-
 static unsigned long slots_fetch_random(void)
 {
+	unsigned long slot;
+	int i;
+
 	/* Handle case of no slots stored. */
 	if (slot_max == 0)
 		return 0;
 
-	return slots[get_random_long("Physical") % slot_max];
+	slot = kaslr_get_random_long("Physical") % slot_max;
+
+	for (i = 0; i < slot_area_index; i++) {
+		if (slot >= slot_areas[i].num) {
+			slot -= slot_areas[i].num;
+			continue;
+		}
+		return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
+	}
+
+	if (i == slot_area_index)
+		debug_putstr("slots_fetch_random() failed!?\n");
+	return 0;
 }
 
 static void process_e820_entry(struct e820entry *entry,
 			       unsigned long minimum,
 			       unsigned long image_size)
 {
-	struct mem_vector region, img, overlap;
+	struct mem_vector region, overlap;
+	struct slot_area slot_area;
+	unsigned long start_orig;
 
 	/* Skip non-RAM entries. */
 	if (entry->type != E820_RAM)
 		return;
 
-	/* Ignore entries entirely above our maximum. */
-	if (entry->addr >= KERNEL_IMAGE_SIZE)
+	/* On 32-bit, ignore entries entirely above our maximum. */
+	if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
 		return;
 
 	/* Ignore entries entirely below our minimum. */
@@ -390,31 +321,55 @@ static void process_e820_entry(struct e820entry *entry,
 	region.start = entry->addr;
 	region.size = entry->size;
 
-	/* Potentially raise address to minimum location. */
-	if (region.start < minimum)
-		region.start = minimum;
+	/* Give up if slot area array is full. */
+	while (slot_area_index < MAX_SLOT_AREA) {
+		start_orig = region.start;
 
-	/* Potentially raise address to meet alignment requirements. */
-	region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+		/* Potentially raise address to minimum location. */
+		if (region.start < minimum)
+			region.start = minimum;
 
-	/* Did we raise the address above the bounds of this e820 region? */
-	if (region.start > entry->addr + entry->size)
-		return;
+		/* Potentially raise address to meet alignment needs. */
+		region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
 
-	/* Reduce size by any delta from the original address. */
-	region.size -= region.start - entry->addr;
+		/* Did we raise the address above this e820 region? */
+		if (region.start > entry->addr + entry->size)
+			return;
 
-	/* Reduce maximum size to fit end of image within maximum limit. */
-	if (region.start + region.size > KERNEL_IMAGE_SIZE)
-		region.size = KERNEL_IMAGE_SIZE - region.start;
+		/* Reduce size by any delta from the original address. */
+		region.size -= region.start - start_orig;
 
-	/* Walk each aligned slot and check for avoided areas. */
-	for (img.start = region.start, img.size = image_size ;
-	     mem_contains(&region, &img) ;
-	     img.start += CONFIG_PHYSICAL_ALIGN) {
-		if (mem_avoid_overlap(&img, &overlap))
-			continue;
-		slots_append(img.start);
+		/* On 32-bit, reduce region size to fit within max size. */
+		if (IS_ENABLED(CONFIG_X86_32) &&
+		    region.start + region.size > KERNEL_IMAGE_SIZE)
+			region.size = KERNEL_IMAGE_SIZE - region.start;
+
+		/* Return if region can't contain decompressed kernel */
+		if (region.size < image_size)
+			return;
+
+		/* If nothing overlaps, store the region and return. */
+		if (!mem_avoid_overlap(&region, &overlap)) {
+			store_slot_info(&region, image_size);
+			return;
+		}
+
+		/* Store beginning of region if holds at least image_size. */
+		if (overlap.start > region.start + image_size) {
+			struct mem_vector beginning;
+
+			beginning.start = region.start;
+			beginning.size = overlap.start - region.start;
+			store_slot_info(&beginning, image_size);
+		}
+
+		/* Return if overlap extends to or past end of region. */
+		if (overlap.start + overlap.size >= region.start + region.size)
+			return;
+
+		/* Clip off the overlapping region and start over. */
+		region.size -= overlap.start - region.start + overlap.size;
+		region.start = overlap.start + overlap.size;
 	}
 }
 
@@ -431,6 +386,10 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
 	for (i = 0; i < boot_params->e820_entries; i++) {
 		process_e820_entry(&boot_params->e820_map[i], minimum,
 				   image_size);
+		if (slot_area_index == MAX_SLOT_AREA) {
+			debug_putstr("Aborted e820 scan (slot_areas full)!\n");
+			break;
+		}
 	}
 
 	return slots_fetch_random();
@@ -454,7 +413,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
 	slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
 		 CONFIG_PHYSICAL_ALIGN + 1;
 
-	random_addr = get_random_long("Virtual") % slots;
+	random_addr = kaslr_get_random_long("Virtual") % slots;
 
 	return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
 }
@@ -463,48 +422,54 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
  * Since this function examines addresses much more numerically,
  * it takes the input and output pointers as 'unsigned long'.
  */
-unsigned char *choose_random_location(unsigned long input,
-				      unsigned long input_size,
-				      unsigned long output,
-				      unsigned long output_size)
+void choose_random_location(unsigned long input,
+			    unsigned long input_size,
+			    unsigned long *output,
+			    unsigned long output_size,
+			    unsigned long *virt_addr)
 {
-	unsigned long choice = output;
-	unsigned long random_addr;
+	unsigned long random_addr, min_addr;
+
+	/* By default, keep output position unchanged. */
+	*virt_addr = *output;
 
-#ifdef CONFIG_HIBERNATION
-	if (!cmdline_find_option_bool("kaslr")) {
-		warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected).");
-		goto out;
-	}
-#else
 	if (cmdline_find_option_bool("nokaslr")) {
 		warn("KASLR disabled: 'nokaslr' on cmdline.");
-		goto out;
+		return;
 	}
-#endif
 
 	boot_params->hdr.loadflags |= KASLR_FLAG;
 
+	/* Prepare to add new identity pagetables on demand. */
+	initialize_identity_maps();
+
 	/* Record the various known unsafe memory ranges. */
-	mem_avoid_init(input, input_size, output);
+	mem_avoid_init(input, input_size, *output);
+
+	/*
+	 * Low end of the randomization range should be the
+	 * smaller of 512M or the initial kernel image
+	 * location:
+	 */
+	min_addr = min(*output, 512UL << 20);
 
 	/* Walk e820 and find a random address. */
-	random_addr = find_random_phys_addr(output, output_size);
+	random_addr = find_random_phys_addr(min_addr, output_size);
 	if (!random_addr) {
 		warn("KASLR disabled: could not find suitable E820 region!");
-		goto out;
+	} else {
+		/* Update the new physical address location. */
+		if (*output != random_addr) {
+			add_identity_map(random_addr, output_size);
+			*output = random_addr;
+		}
 	}
 
-	/* Always enforce the minimum. */
-	if (random_addr < choice)
-		goto out;
-
-	choice = random_addr;
-
-	add_identity_map(choice, output_size);
-
 	/* This actually loads the identity pagetable on x86_64. */
 	finalize_identity_maps();
-out:
-	return (unsigned char *)choice;
+
+	/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
+	if (IS_ENABLED(CONFIG_X86_64))
+		random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
+	*virt_addr = random_addr;
 }
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index f14db4e21654..b3c5a5f030ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -170,7 +170,8 @@ void __puthex(unsigned long value)
 }
 
 #if CONFIG_X86_NEED_RELOCS
-static void handle_relocations(void *output, unsigned long output_len)
+static void handle_relocations(void *output, unsigned long output_len,
+			       unsigned long virt_addr)
 {
 	int *reloc;
 	unsigned long delta, map, ptr;
@@ -182,11 +183,6 @@ static void handle_relocations(void *output, unsigned long output_len)
 	 * and where it was actually loaded.
 	 */
 	delta = min_addr - LOAD_PHYSICAL_ADDR;
-	if (!delta) {
-		debug_putstr("No relocation needed... ");
-		return;
-	}
-	debug_putstr("Performing relocations... ");
 
 	/*
 	 * The kernel contains a table of relocation addresses. Those
@@ -197,6 +193,20 @@ static void handle_relocations(void *output, unsigned long output_len)
 	 */
 	map = delta - __START_KERNEL_map;
 
+	/*
+	 * 32-bit always performs relocations. 64-bit relocations are only
+	 * needed if KASLR has chosen a different starting address offset
+	 * from __START_KERNEL_map.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64))
+		delta = virt_addr - LOAD_PHYSICAL_ADDR;
+
+	if (!delta) {
+		debug_putstr("No relocation needed... ");
+		return;
+	}
+	debug_putstr("Performing relocations... ");
+
 	/*
 	 * Process relocations: 32 bit relocations first then 64 bit after.
 	 * Three sets of binary relocations are added to the end of the kernel
@@ -250,7 +260,8 @@ static void handle_relocations(void *output, unsigned long output_len)
 #endif
 }
 #else
-static inline void handle_relocations(void *output, unsigned long output_len)
+static inline void handle_relocations(void *output, unsigned long output_len,
+				      unsigned long virt_addr)
 { }
 #endif
 
@@ -327,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 				  unsigned long output_len)
 {
 	const unsigned long kernel_total_size = VO__end - VO__text;
-	unsigned char *output_orig = output;
+	unsigned long virt_addr = (unsigned long)output;
 
 	/* Retain x86 boot parameters pointer passed from startup_32/64. */
 	boot_params = rmode;
@@ -366,13 +377,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	 * the entire decompressed kernel plus relocation table, or the
 	 * entire decompressed kernel plus .bss and .brk sections.
 	 */
-	output = choose_random_location((unsigned long)input_data, input_len,
-					(unsigned long)output,
-					max(output_len, kernel_total_size));
+	choose_random_location((unsigned long)input_data, input_len,
+				(unsigned long *)&output,
+				max(output_len, kernel_total_size),
+				&virt_addr);
 
 	/* Validate memory location choices. */
 	if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
-		error("Destination address inappropriately aligned");
+		error("Destination physical address inappropriately aligned");
+	if (virt_addr & (MIN_KERNEL_ALIGN - 1))
+		error("Destination virtual address inappropriately aligned");
 #ifdef CONFIG_X86_64
 	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
@@ -382,19 +396,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #endif
 #ifndef CONFIG_RELOCATABLE
 	if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
-		error("Wrong destination address");
+		error("Destination address does not match LOAD_PHYSICAL_ADDR");
+	if ((unsigned long)output != virt_addr)
+		error("Destination virtual address changed when not relocatable");
 #endif
 
 	debug_putstr("\nDecompressing Linux... ");
 	__decompress(input_data, input_len, NULL, NULL, output, output_len,
 			NULL, error);
 	parse_elf(output);
-	/*
-	 * 32-bit always performs relocations. 64-bit relocations are only
-	 * needed if kASLR has chosen a different load address.
-	 */
-	if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
-		handle_relocations(output, output_len);
+	handle_relocations(output, output_len, virt_addr);
 	debug_putstr("done.\nBooting the kernel.\n");
 	return output;
 }
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index b6fec1ff10e4..1c8355eadbd1 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -67,28 +67,33 @@ int cmdline_find_option_bool(const char *option);
 
 #if CONFIG_RANDOMIZE_BASE
 /* kaslr.c */
-unsigned char *choose_random_location(unsigned long input_ptr,
-				      unsigned long input_size,
-				      unsigned long output_ptr,
-				      unsigned long output_size);
+void choose_random_location(unsigned long input,
+			    unsigned long input_size,
+			    unsigned long *output,
+			    unsigned long output_size,
+			    unsigned long *virt_addr);
 /* cpuflags.c */
 bool has_cpuflag(int flag);
 #else
-static inline
-unsigned char *choose_random_location(unsigned long input_ptr,
-				      unsigned long input_size,
-				      unsigned long output_ptr,
-				      unsigned long output_size)
+static inline void choose_random_location(unsigned long input,
+					  unsigned long input_size,
+					  unsigned long *output,
+					  unsigned long output_size,
+					  unsigned long *virt_addr)
 {
-	return (unsigned char *)output_ptr;
+	/* No change from existing output location. */
+	*virt_addr = *output;
 }
 #endif
 
 #ifdef CONFIG_X86_64
+void initialize_identity_maps(void);
 void add_identity_map(unsigned long start, unsigned long size);
 void finalize_identity_maps(void);
 extern unsigned char _pgtable[];
 #else
+static inline void initialize_identity_maps(void)
+{ }
 static inline void add_identity_map(unsigned long start, unsigned long size)
 { }
 static inline void finalize_identity_maps(void)
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 34b95df14e69..56589d0a804b 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -2,6 +2,9 @@
  * This code is used on x86_64 to create page table identity mappings on
  * demand by building up a new set of page tables (or appending to the
  * existing ones), and then switching over to them when ready.
+ *
+ * Copyright (C) 2015-2016  Yinghai Lu
+ * Copyright (C)      2016  Kees Cook
  */
 
 /*
@@ -17,6 +20,9 @@
 /* These actually do the work of building the kernel identity maps. */
 #include <asm/init.h>
 #include <asm/pgtable.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
 #include "../../mm/ident_map.c"
 
 /* Used by pgtable.h asm code to force instruction serialization. */
@@ -59,9 +65,21 @@ static struct alloc_pgt_data pgt_data;
 /* The top level page table entry pointer. */
 static unsigned long level4p;
 
+/*
+ * Mapping information structure passed to kernel_ident_mapping_init().
+ * Due to relocation, pointers must be assigned at run time not build time.
+ */
+static struct x86_mapping_info mapping_info = {
+	.pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
+};
+
 /* Locates and clears a region for a new top level page table. */
-static void prepare_level4(void)
+void initialize_identity_maps(void)
 {
+	/* Init mapping_info with run-time function/buffer pointers. */
+	mapping_info.alloc_pgt_page = alloc_pgt_page;
+	mapping_info.context = &pgt_data;
+
 	/*
 	 * It should be impossible for this not to already be true,
 	 * but since calling this a second time would rewind the other
@@ -96,17 +114,8 @@ static void prepare_level4(void)
  */
 void add_identity_map(unsigned long start, unsigned long size)
 {
-	struct x86_mapping_info mapping_info = {
-		.alloc_pgt_page	= alloc_pgt_page,
-		.context	= &pgt_data,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
-	};
 	unsigned long end = start + size;
 
-	/* Make sure we have a top level page table ready to use. */
-	if (!level4p)
-		prepare_level4();
-
 	/* Align boundary to 2M. */
 	start = round_down(start, PMD_SIZE);
 	end = round_up(end, PMD_SIZE);
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 29207f69ae8c..26240dde081e 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -93,6 +93,8 @@ int validate_cpu(void)
 		show_cap_strs(err_flags);
 		putchar('\n');
 		return -1;
+	} else if (check_knl_erratum()) {
+		return -1;
 	} else {
 		return 0;
 	}
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 1fd7d575092e..4ad7d70e8739 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -24,6 +24,7 @@
 # include "boot.h"
 #endif
 #include <linux/types.h>
+#include <asm/intel-family.h>
 #include <asm/processor-flags.h>
 #include <asm/required-features.h>
 #include <asm/msr-index.h>
@@ -175,6 +176,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 			puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
 		}
 	}
+	if (!err)
+		err = check_knl_erratum();
 
 	if (err_flags_ptr)
 		*err_flags_ptr = err ? err_flags : NULL;
@@ -185,3 +188,33 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 
 	return (cpu.level < req_level || err) ? -1 : 0;
 }
+
+int check_knl_erratum(void)
+{
+	/*
+	 * First check for the affected model/family:
+	 */
+	if (!is_intel() ||
+	    cpu.family != 6 ||
+	    cpu.model != INTEL_FAM6_XEON_PHI_KNL)
+		return 0;
+
+	/*
+	 * This erratum affects the Accessed/Dirty bits, and can
+	 * cause stray bits to be set in !Present PTEs.  We have
+	 * enough bits in our 64-bit PTEs (which we have on real
+	 * 64-bit mode or PAE) to avoid using these troublesome
+	 * bits.  But, we do not have enough space in our 32-bit
+	 * PTEs.  So, refuse to run on 32-bit non-PAE kernels.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE))
+		return 0;
+
+	puts("This 32-bit kernel can not run on this Xeon Phi x200\n"
+	     "processor due to a processor erratum.  Use a 64-bit\n"
+	     "kernel, or enable PAE in this 32-bit kernel.\n\n");
+
+	return -1;
+}
+
+
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index 431fa5f84537..6687ab953257 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -102,6 +102,7 @@ void get_cpuflags(void)
 			cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
 			      &cpu.flags[0]);
 			cpu.level = (tfms >> 8) & 15;
+			cpu.family = cpu.level;
 			cpu.model = (tfms >> 4) & 15;
 			if (cpu.level >= 6)
 				cpu.model += ((tfms >> 16) & 0xf) << 4;
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index 4cb404fd45ce..15ad56a3f905 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -6,6 +6,7 @@
 
 struct cpu_features {
 	int level;		/* Family, or 64 for x86-64 */
+	int family;		/* Family, always */
 	int model;
 	u32 flags[NCAPINTS];
 };
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 318b8465d302..cc3bd583dce1 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -17,7 +17,7 @@
 
 int memcmp(const void *s1, const void *s2, size_t len)
 {
-	u8 diff;
+	bool diff;
 	asm("repe; cmpsb; setnz %0"
 	    : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
 	return diff;
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b9b912a44d61..34b3fa2889d1 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -49,7 +49,9 @@ endif
 ifeq ($(avx2_supported),yes)
 	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
 	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
-	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/
+	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
+	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
+	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
 endif
 
 aes-i586-y := aes-i586-asm_32.o aes_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 5b7fa1471007..0ab5ee1c26af 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -59,17 +59,6 @@ struct aesni_rfc4106_gcm_ctx {
 	u8 nonce[4];
 };
 
-struct aesni_gcm_set_hash_subkey_result {
-	int err;
-	struct completion completion;
-};
-
-struct aesni_hash_subkey_req_data {
-	u8 iv[16];
-	struct aesni_gcm_set_hash_subkey_result result;
-	struct scatterlist sg;
-};
-
 struct aesni_lrw_ctx {
 	struct lrw_table_ctx lrw_table;
 	u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
@@ -809,71 +798,28 @@ static void rfc4106_exit(struct crypto_aead *aead)
 	cryptd_free_aead(*ctx);
 }
 
-static void
-rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err)
-{
-	struct aesni_gcm_set_hash_subkey_result *result = req->data;
-
-	if (err == -EINPROGRESS)
-		return;
-	result->err = err;
-	complete(&result->completion);
-}
-
 static int
 rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
 {
-	struct crypto_ablkcipher *ctr_tfm;
-	struct ablkcipher_request *req;
-	int ret = -EINVAL;
-	struct aesni_hash_subkey_req_data *req_data;
+	struct crypto_cipher *tfm;
+	int ret;
 
-	ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0);
-	if (IS_ERR(ctr_tfm))
-		return PTR_ERR(ctr_tfm);
+	tfm = crypto_alloc_cipher("aes", 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
 
-	ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len);
+	ret = crypto_cipher_setkey(tfm, key, key_len);
 	if (ret)
-		goto out_free_ablkcipher;
-
-	ret = -ENOMEM;
-	req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL);
-	if (!req)
-		goto out_free_ablkcipher;
-
-	req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
-	if (!req_data)
-		goto out_free_request;
-
-	memset(req_data->iv, 0, sizeof(req_data->iv));
+		goto out_free_cipher;
 
 	/* Clear the data in the hash sub key container to zero.*/
 	/* We want to cipher all zeros to create the hash sub key. */
 	memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
 
-	init_completion(&req_data->result.completion);
-	sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE);
-	ablkcipher_request_set_tfm(req, ctr_tfm);
-	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
-					CRYPTO_TFM_REQ_MAY_BACKLOG,
-					rfc4106_set_hash_subkey_done,
-					&req_data->result);
-
-	ablkcipher_request_set_crypt(req, &req_data->sg,
-		&req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv);
-
-	ret = crypto_ablkcipher_encrypt(req);
-	if (ret == -EINPROGRESS || ret == -EBUSY) {
-		ret = wait_for_completion_interruptible
-			(&req_data->result.completion);
-		if (!ret)
-			ret = req_data->result.err;
-	}
-	kfree(req_data);
-out_free_request:
-	ablkcipher_request_free(req);
-out_free_ablkcipher:
-	crypto_free_ablkcipher(ctr_tfm);
+	crypto_cipher_encrypt_one(tfm, hash_subkey, hash_subkey);
+
+out_free_cipher:
+	crypto_free_cipher(tfm);
 	return ret;
 }
 
@@ -1098,9 +1044,12 @@ static int rfc4106_encrypt(struct aead_request *req)
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
 	struct cryptd_aead *cryptd_tfm = *ctx;
 
-	aead_request_set_tfm(req, irq_fpu_usable() ?
-				  cryptd_aead_child(cryptd_tfm) :
-				  &cryptd_tfm->base);
+	tfm = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		tfm = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, tfm);
 
 	return crypto_aead_encrypt(req);
 }
@@ -1111,9 +1060,12 @@ static int rfc4106_decrypt(struct aead_request *req)
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
 	struct cryptd_aead *cryptd_tfm = *ctx;
 
-	aead_request_set_tfm(req, irq_fpu_usable() ?
-				  cryptd_aead_child(cryptd_tfm) :
-				  &cryptd_tfm->base);
+	tfm = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		tfm = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, tfm);
 
 	return crypto_aead_decrypt(req);
 }
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 2d5c2e0bd939..f910d1d449f0 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -70,7 +70,7 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
 	struct blkcipher_walk walk;
 	int err;
 
-	if (!may_use_simd())
+	if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd())
 		return crypto_chacha20_crypt(desc, dst, src, nbytes);
 
 	state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index a69321a77783..0420bab19efb 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -168,30 +168,23 @@ static int ghash_async_init(struct ahash_request *req)
 	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct ahash_request *cryptd_req = ahash_request_ctx(req);
 	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+	struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+	struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
 
-	if (!irq_fpu_usable()) {
-		memcpy(cryptd_req, req, sizeof(*req));
-		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
-		return crypto_ahash_init(cryptd_req);
-	} else {
-		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
-		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
-
-		desc->tfm = child;
-		desc->flags = req->base.flags;
-		return crypto_shash_init(desc);
-	}
+	desc->tfm = child;
+	desc->flags = req->base.flags;
+	return crypto_shash_init(desc);
 }
 
 static int ghash_async_update(struct ahash_request *req)
 {
 	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
 
-	if (!irq_fpu_usable()) {
-		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
+	if (!irq_fpu_usable() ||
+	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
 		memcpy(cryptd_req, req, sizeof(*req));
 		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
 		return crypto_ahash_update(cryptd_req);
@@ -204,12 +197,12 @@ static int ghash_async_update(struct ahash_request *req)
 static int ghash_async_final(struct ahash_request *req)
 {
 	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
 
-	if (!irq_fpu_usable()) {
-		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
+	if (!irq_fpu_usable() ||
+	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
 		memcpy(cryptd_req, req, sizeof(*req));
 		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
 		return crypto_ahash_final(cryptd_req);
@@ -249,7 +242,8 @@ static int ghash_async_digest(struct ahash_request *req)
 	struct ahash_request *cryptd_req = ahash_request_ctx(req);
 	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
 
-	if (!irq_fpu_usable()) {
+	if (!irq_fpu_usable() ||
+	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
 		memcpy(cryptd_req, req, sizeof(*req));
 		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
 		return crypto_ahash_digest(cryptd_req);
diff --git a/arch/x86/crypto/sha-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
similarity index 100%
rename from arch/x86/crypto/sha-mb/Makefile
rename to arch/x86/crypto/sha1-mb/Makefile
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
similarity index 81%
rename from arch/x86/crypto/sha-mb/sha1_mb.c
rename to arch/x86/crypto/sha1-mb/sha1_mb.c
index 9c5af331a956..9e5b67127a09 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -67,7 +67,7 @@
 #include <asm/byteorder.h>
 #include <linux/hardirq.h>
 #include <asm/fpu/api.h>
-#include "sha_mb_ctx.h"
+#include "sha1_mb_ctx.h"
 
 #define FLUSH_INTERVAL 1000 /* in usec */
 
@@ -77,30 +77,34 @@ struct sha1_mb_ctx {
 	struct mcryptd_ahash *mcryptd_tfm;
 };
 
-static inline struct mcryptd_hash_request_ctx *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
+static inline struct mcryptd_hash_request_ctx
+		*cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
 {
-	struct shash_desc *desc;
+	struct ahash_request *areq;
 
-	desc = container_of((void *) hash_ctx, struct shash_desc, __ctx);
-	return container_of(desc, struct mcryptd_hash_request_ctx, desc);
+	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
+	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
 }
 
-static inline struct ahash_request *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
+static inline struct ahash_request
+		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
 {
 	return container_of((void *) ctx, struct ahash_request, __ctx);
 }
 
 static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
-				struct shash_desc *desc)
+				struct ahash_request *areq)
 {
 	rctx->flag = HASH_UPDATE;
 }
 
 static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)(struct sha1_mb_mgr *state,
-							  struct job_sha1 *job);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
+			(struct sha1_mb_mgr *state, struct job_sha1 *job);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
+						(struct sha1_mb_mgr *state);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
+						(struct sha1_mb_mgr *state);
 
 static inline void sha1_init_digest(uint32_t *digest)
 {
@@ -131,7 +135,8 @@ static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
 	return i >> SHA1_LOG2_BLOCK_SIZE;
 }
 
-static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, struct sha1_hash_ctx *ctx)
+static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
+						struct sha1_hash_ctx *ctx)
 {
 	while (ctx) {
 		if (ctx->status & HASH_CTX_STS_COMPLETE) {
@@ -177,8 +182,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str
 
 				ctx->job.buffer = (uint8_t *) buffer;
 				ctx->job.len = len;
-				ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr,
-										  &ctx->job);
+				ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
+										&ctx->job);
 				continue;
 			}
 		}
@@ -191,13 +196,15 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str
 		if (ctx->status & HASH_CTX_STS_LAST) {
 
 			uint8_t *buf = ctx->partial_block_buffer;
-			uint32_t n_extra_blocks = sha1_pad(buf, ctx->total_length);
+			uint32_t n_extra_blocks =
+					sha1_pad(buf, ctx->total_length);
 
 			ctx->status = (HASH_CTX_STS_PROCESSING |
 				       HASH_CTX_STS_COMPLETE);
 			ctx->job.buffer = buf;
 			ctx->job.len = (uint32_t) n_extra_blocks;
-			ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
+			ctx = (struct sha1_hash_ctx *)
+				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
 			continue;
 		}
 
@@ -208,14 +215,17 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str
 	return NULL;
 }
 
-static struct sha1_hash_ctx *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
+static struct sha1_hash_ctx
+			*sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
 {
 	/*
 	 * If get_comp_job returns NULL, there are no jobs complete.
-	 * If get_comp_job returns a job, verify that it is safe to return to the user.
+	 * If get_comp_job returns a job, verify that it is safe to return to
+	 * the user.
 	 * If it is not ready, resubmit the job to finish processing.
 	 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
-	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr still need processing.
+	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
+	 * still need processing.
 	 */
 	struct sha1_hash_ctx *ctx;
 
@@ -235,7 +245,10 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 					  int flags)
 {
 	if (flags & (~HASH_ENTIRE)) {
-		/* User should not pass anything other than FIRST, UPDATE, or LAST */
+		/*
+		 * User should not pass anything other than FIRST, UPDATE, or
+		 * LAST
+		 */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		return ctx;
 	}
@@ -264,14 +277,20 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 		ctx->partial_block_buffer_length = 0;
 	}
 
-	/* If we made it here, there were no errors during this call to submit */
+	/*
+	 * If we made it here, there were no errors during this call to
+	 * submit
+	 */
 	ctx->error = HASH_CTX_ERROR_NONE;
 
 	/* Store buffer ptr info from user */
 	ctx->incoming_buffer = buffer;
 	ctx->incoming_buffer_length = len;
 
-	/* Store the user's request flags and mark this ctx as currently being processed. */
+	/*
+	 * Store the user's request flags and mark this ctx as currently
+	 * being processed.
+	 */
 	ctx->status = (flags & HASH_LAST) ?
 			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
 			HASH_CTX_STS_PROCESSING;
@@ -285,9 +304,13 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 	 * Or if the user's buffer contains less than a whole block,
 	 * append as much as possible to the extra block.
 	 */
-	if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) {
-		/* Compute how many bytes to copy from user buffer into extra block */
-		uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length;
+	if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
+		/*
+		 * Compute how many bytes to copy from user buffer into
+		 * extra block
+		 */
+		uint32_t copy_len = SHA1_BLOCK_SIZE -
+					ctx->partial_block_buffer_length;
 		if (len < copy_len)
 			copy_len = len;
 
@@ -297,20 +320,28 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 				buffer, copy_len);
 
 			ctx->partial_block_buffer_length += copy_len;
-			ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len);
+			ctx->incoming_buffer = (const void *)
+					((const char *)buffer + copy_len);
 			ctx->incoming_buffer_length = len - copy_len;
 		}
 
-		/* The extra block should never contain more than 1 block here */
+		/*
+		 * The extra block should never contain more than 1 block
+		 * here
+		 */
 		assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
 
-		/* If the extra block buffer contains exactly 1 block, it can be hashed. */
+		/*
+		 * If the extra block buffer contains exactly 1 block, it can
+		 * be hashed.
+		 */
 		if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
 			ctx->partial_block_buffer_length = 0;
 
 			ctx->job.buffer = ctx->partial_block_buffer;
 			ctx->job.len = 1;
-			ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
+			ctx = (struct sha1_hash_ctx *)
+				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
 		}
 	}
 
@@ -329,23 +360,24 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
 			return NULL;
 
 		/*
-		 * If flush returned a job, resubmit the job to finish processing.
+		 * If flush returned a job, resubmit the job to finish
+		 * processing.
 		 */
 		ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
 
 		/*
-		 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
-		 * Otherwise, all jobs currently being managed by the sha1_ctx_mgr
-		 * still need processing. Loop.
+		 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
+		 * returned. Otherwise, all jobs currently being managed by the
+		 * sha1_ctx_mgr still need processing. Loop.
 		 */
 		if (ctx)
 			return ctx;
 	}
 }
 
-static int sha1_mb_init(struct shash_desc *desc)
+static int sha1_mb_init(struct ahash_request *areq)
 {
-	struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
 
 	hash_ctx_init(sctx);
 	sctx->job.result_digest[0] = SHA1_H0;
@@ -363,7 +395,7 @@ static int sha1_mb_init(struct shash_desc *desc)
 static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
 {
 	int	i;
-	struct	sha1_hash_ctx *sctx = shash_desc_ctx(&rctx->desc);
+	struct	sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
 	__be32	*dst = (__be32 *) rctx->out;
 
 	for (i = 0; i < 5; ++i)
@@ -394,9 +426,11 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
 				flag |= HASH_LAST;
 
 		}
-		sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(&rctx->desc);
+		sha_ctx = (struct sha1_hash_ctx *)
+						ahash_request_ctx(&rctx->areq);
 		kernel_fpu_begin();
-		sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag);
+		sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
+						rctx->walk.data, nbytes, flag);
 		if (!sha_ctx) {
 			if (flush)
 				sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
@@ -485,11 +519,10 @@ static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
 	mcryptd_arm_flusher(cstate, delay);
 }
 
-static int sha1_mb_update(struct shash_desc *desc, const u8 *data,
-			  unsigned int len)
+static int sha1_mb_update(struct ahash_request *areq)
 {
 	struct mcryptd_hash_request_ctx *rctx =
-			container_of(desc, struct mcryptd_hash_request_ctx, desc);
+		container_of(areq, struct mcryptd_hash_request_ctx, areq);
 	struct mcryptd_alg_cstate *cstate =
 				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
 
@@ -505,7 +538,7 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data,
 	}
 
 	/* need to init context */
-	req_ctx_init(rctx, desc);
+	req_ctx_init(rctx, areq);
 
 	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
 
@@ -518,10 +551,11 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data,
 		rctx->flag |= HASH_DONE;
 
 	/* submit */
-	sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
 	sha1_mb_add_list(rctx, cstate);
 	kernel_fpu_begin();
-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, HASH_UPDATE);
+	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+							nbytes, HASH_UPDATE);
 	kernel_fpu_end();
 
 	/* check if anything is returned */
@@ -544,11 +578,10 @@ done:
 	return ret;
 }
 
-static int sha1_mb_finup(struct shash_desc *desc, const u8 *data,
-			     unsigned int len, u8 *out)
+static int sha1_mb_finup(struct ahash_request *areq)
 {
 	struct mcryptd_hash_request_ctx *rctx =
-			container_of(desc, struct mcryptd_hash_request_ctx, desc);
+		container_of(areq, struct mcryptd_hash_request_ctx, areq);
 	struct mcryptd_alg_cstate *cstate =
 				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
 
@@ -563,7 +596,7 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data,
 	}
 
 	/* need to init context */
-	req_ctx_init(rctx, desc);
+	req_ctx_init(rctx, areq);
 
 	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
 
@@ -576,15 +609,15 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data,
 		rctx->flag |= HASH_DONE;
 		flag = HASH_LAST;
 	}
-	rctx->out = out;
 
 	/* submit */
 	rctx->flag |= HASH_FINAL;
-	sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
 	sha1_mb_add_list(rctx, cstate);
 
 	kernel_fpu_begin();
-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag);
+	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+								nbytes, flag);
 	kernel_fpu_end();
 
 	/* check if anything is returned */
@@ -605,10 +638,10 @@ done:
 	return ret;
 }
 
-static int sha1_mb_final(struct shash_desc *desc, u8 *out)
+static int sha1_mb_final(struct ahash_request *areq)
 {
 	struct mcryptd_hash_request_ctx *rctx =
-			container_of(desc, struct mcryptd_hash_request_ctx, desc);
+		container_of(areq, struct mcryptd_hash_request_ctx, areq);
 	struct mcryptd_alg_cstate *cstate =
 				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
 
@@ -623,16 +656,16 @@ static int sha1_mb_final(struct shash_desc *desc, u8 *out)
 	}
 
 	/* need to init context */
-	req_ctx_init(rctx, desc);
+	req_ctx_init(rctx, areq);
 
-	rctx->out = out;
 	rctx->flag |= HASH_DONE | HASH_FINAL;
 
-	sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
 	/* flag HASH_FINAL and 0 data size */
 	sha1_mb_add_list(rctx, cstate);
 	kernel_fpu_begin();
-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, HASH_LAST);
+	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
+								HASH_LAST);
 	kernel_fpu_end();
 
 	/* check if anything is returned */
@@ -654,48 +687,98 @@ done:
 	return ret;
 }
 
-static int sha1_mb_export(struct shash_desc *desc, void *out)
+static int sha1_mb_export(struct ahash_request *areq, void *out)
 {
-	struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
 
 	memcpy(out, sctx, sizeof(*sctx));
 
 	return 0;
 }
 
-static int sha1_mb_import(struct shash_desc *desc, const void *in)
+static int sha1_mb_import(struct ahash_request *areq, const void *in)
 {
-	struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
 
 	memcpy(sctx, in, sizeof(*sctx));
 
 	return 0;
 }
 
+static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
+{
+	struct mcryptd_ahash *mcryptd_tfm;
+	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct mcryptd_hash_ctx *mctx;
 
-static struct shash_alg sha1_mb_shash_alg = {
-	.digestsize	=	SHA1_DIGEST_SIZE,
+	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
+						CRYPTO_ALG_INTERNAL,
+						CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(mcryptd_tfm))
+		return PTR_ERR(mcryptd_tfm);
+	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
+	mctx->alg_state = &sha1_mb_alg_state;
+	ctx->mcryptd_tfm = mcryptd_tfm;
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				sizeof(struct ahash_request) +
+				crypto_ahash_reqsize(&mcryptd_tfm->base));
+
+	return 0;
+}
+
+static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
+{
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				sizeof(struct ahash_request) +
+				sizeof(struct sha1_hash_ctx));
+
+	return 0;
+}
+
+static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static struct ahash_alg sha1_mb_areq_alg = {
 	.init		=	sha1_mb_init,
 	.update		=	sha1_mb_update,
 	.final		=	sha1_mb_final,
 	.finup		=	sha1_mb_finup,
 	.export		=	sha1_mb_export,
 	.import		=	sha1_mb_import,
-	.descsize	=	sizeof(struct sha1_hash_ctx),
-	.statesize	=	sizeof(struct sha1_hash_ctx),
-	.base		=	{
-		.cra_name	 = "__sha1-mb",
-		.cra_driver_name = "__intel_sha1-mb",
-		.cra_priority	 = 100,
-		/*
-		 * use ASYNC flag as some buffers in multi-buffer
-		 * algo may not have completed before hashing thread sleep
-		 */
-		.cra_flags	 = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC |
-				   CRYPTO_ALG_INTERNAL,
-		.cra_blocksize	 = SHA1_BLOCK_SIZE,
-		.cra_module	 = THIS_MODULE,
-		.cra_list	 = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list),
+	.halg		=	{
+		.digestsize	=	SHA1_DIGEST_SIZE,
+		.statesize	=	sizeof(struct sha1_hash_ctx),
+		.base		=	{
+			.cra_name	 = "__sha1-mb",
+			.cra_driver_name = "__intel_sha1-mb",
+			.cra_priority	 = 100,
+			/*
+			 * use ASYNC flag as some buffers in multi-buffer
+			 * algo may not have completed before hashing thread
+			 * sleep
+			 */
+			.cra_flags	= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_INTERNAL,
+			.cra_blocksize	= SHA1_BLOCK_SIZE,
+			.cra_module	= THIS_MODULE,
+			.cra_list	= LIST_HEAD_INIT
+					(sha1_mb_areq_alg.halg.base.cra_list),
+			.cra_init	= sha1_mb_areq_init_tfm,
+			.cra_exit	= sha1_mb_areq_exit_tfm,
+			.cra_ctxsize	= sizeof(struct sha1_hash_ctx),
+		}
 	}
 };
 
@@ -780,46 +863,20 @@ static int sha1_mb_async_import(struct ahash_request *req, const void *in)
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-	struct crypto_shash *child = mcryptd_ahash_child(mcryptd_tfm);
+	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
 	struct mcryptd_hash_request_ctx *rctx;
-	struct shash_desc *desc;
+	struct ahash_request *areq;
 
 	memcpy(mcryptd_req, req, sizeof(*req));
 	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
 	rctx = ahash_request_ctx(mcryptd_req);
-	desc = &rctx->desc;
-	desc->tfm = child;
-	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	return crypto_ahash_import(mcryptd_req, in);
-}
-
-static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
-	struct mcryptd_ahash *mcryptd_tfm;
-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct mcryptd_hash_ctx *mctx;
+	areq = &rctx->areq;
 
-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
-					  CRYPTO_ALG_INTERNAL,
-					  CRYPTO_ALG_INTERNAL);
-	if (IS_ERR(mcryptd_tfm))
-		return PTR_ERR(mcryptd_tfm);
-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
-	mctx->alg_state = &sha1_mb_alg_state;
-	ctx->mcryptd_tfm = mcryptd_tfm;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct ahash_request) +
-				 crypto_ahash_reqsize(&mcryptd_tfm->base));
+	ahash_request_set_tfm(areq, child);
+	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
+					rctx->complete, req);
 
-	return 0;
-}
-
-static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
+	return crypto_ahash_import(mcryptd_req, in);
 }
 
 static struct ahash_alg sha1_mb_async_alg = {
@@ -866,7 +923,8 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
 		if (time_before(cur_time, rctx->tag.expire))
 			break;
 		kernel_fpu_begin();
-		sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr);
+		sha_ctx = (struct sha1_hash_ctx *)
+					sha1_ctx_mgr_flush(cstate->mgr);
 		kernel_fpu_end();
 		if (!sha_ctx) {
 			pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
@@ -927,7 +985,7 @@ static int __init sha1_mb_mod_init(void)
 	}
 	sha1_mb_alg_state.flusher = &sha1_mb_flusher;
 
-	err = crypto_register_shash(&sha1_mb_shash_alg);
+	err = crypto_register_ahash(&sha1_mb_areq_alg);
 	if (err)
 		goto err2;
 	err = crypto_register_ahash(&sha1_mb_async_alg);
@@ -937,7 +995,7 @@ static int __init sha1_mb_mod_init(void)
 
 	return 0;
 err1:
-	crypto_unregister_shash(&sha1_mb_shash_alg);
+	crypto_unregister_ahash(&sha1_mb_areq_alg);
 err2:
 	for_each_possible_cpu(cpu) {
 		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
@@ -953,7 +1011,7 @@ static void __exit sha1_mb_mod_fini(void)
 	struct mcryptd_alg_cstate *cpu_state;
 
 	crypto_unregister_ahash(&sha1_mb_async_alg);
-	crypto_unregister_shash(&sha1_mb_shash_alg);
+	crypto_unregister_ahash(&sha1_mb_areq_alg);
 	for_each_possible_cpu(cpu) {
 		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
 		kfree(cpu_state->mgr);
diff --git a/arch/x86/crypto/sha-mb/sha_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
similarity index 99%
rename from arch/x86/crypto/sha-mb/sha_mb_ctx.h
rename to arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
index e36069d0c1bd..98a35bcc6f4a 100644
--- a/arch/x86/crypto/sha-mb/sha_mb_ctx.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
@@ -54,7 +54,7 @@
 #ifndef _SHA_MB_CTX_INTERNAL_H
 #define _SHA_MB_CTX_INTERNAL_H
 
-#include "sha_mb_mgr.h"
+#include "sha1_mb_mgr.h"
 
 #define HASH_UPDATE          0x00
 #define HASH_FIRST           0x01
diff --git a/arch/x86/crypto/sha-mb/sha_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
similarity index 100%
rename from arch/x86/crypto/sha-mb/sha_mb_mgr.h
rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
similarity index 100%
rename from arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S
rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
similarity index 100%
rename from arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
similarity index 99%
rename from arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
index 822acb5b464c..d2add0d35f43 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
@@ -51,7 +51,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include "sha_mb_mgr.h"
+#include "sha1_mb_mgr.h"
 
 void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
 {
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
similarity index 100%
rename from arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
rename to arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
diff --git a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
similarity index 100%
rename from arch/x86/crypto/sha-mb/sha1_x8_avx2.S
rename to arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 1024e378a358..fc61739150e7 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -374,3 +374,9 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
 
 MODULE_ALIAS_CRYPTO("sha1");
+MODULE_ALIAS_CRYPTO("sha1-ssse3");
+MODULE_ALIAS_CRYPTO("sha1-avx");
+MODULE_ALIAS_CRYPTO("sha1-avx2");
+#ifdef CONFIG_AS_SHA1_NI
+MODULE_ALIAS_CRYPTO("sha1-ni");
+#endif
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
new file mode 100644
index 000000000000..41089e7c400c
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/Makefile
@@ -0,0 +1,11 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
+                                $(comma)4)$(comma)%ymm2,yes,no)
+ifeq ($(avx2_supported),yes)
+	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
+	sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
+	     sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
+endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
new file mode 100644
index 000000000000..89fa85e8b10c
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -0,0 +1,1030 @@
+/*
+ * Multi buffer SHA256 algorithm Glue Code
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *	Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/mcryptd.h>
+#include <crypto/crypto_wq.h>
+#include <asm/byteorder.h>
+#include <linux/hardirq.h>
+#include <asm/fpu/api.h>
+#include "sha256_mb_ctx.h"
+
+#define FLUSH_INTERVAL 1000 /* in usec */
+
+static struct mcryptd_alg_state sha256_mb_alg_state;
+
+struct sha256_mb_ctx {
+	struct mcryptd_ahash *mcryptd_tfm;
+};
+
+static inline struct mcryptd_hash_request_ctx
+		*cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
+{
+	struct ahash_request *areq;
+
+	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
+	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
+}
+
+static inline struct ahash_request
+		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
+{
+	return container_of((void *) ctx, struct ahash_request, __ctx);
+}
+
+static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
+				struct ahash_request *areq)
+{
+	rctx->flag = HASH_UPDATE;
+}
+
+static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
+static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
+			(struct sha256_mb_mgr *state, struct job_sha256 *job);
+static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
+			(struct sha256_mb_mgr *state);
+static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
+			(struct sha256_mb_mgr *state);
+
+inline void sha256_init_digest(uint32_t *digest)
+{
+	static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = {
+				SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7};
+	memcpy(digest, initial_digest, sizeof(initial_digest));
+}
+
+inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
+			 uint32_t total_len)
+{
+	uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
+
+	memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
+	padblock[i] = 0x80;
+
+	i += ((SHA256_BLOCK_SIZE - 1) &
+	      (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
+	     + 1 + SHA256_PADLENGTHFIELD_SIZE;
+
+#if SHA256_PADLENGTHFIELD_SIZE == 16
+	*((uint64_t *) &padblock[i - 16]) = 0;
+#endif
+
+	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
+
+	/* Number of extra blocks to hash */
+	return i >> SHA256_LOG2_BLOCK_SIZE;
+}
+
+static struct sha256_hash_ctx
+		*sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
+					struct sha256_hash_ctx *ctx)
+{
+	while (ctx) {
+		if (ctx->status & HASH_CTX_STS_COMPLETE) {
+			/* Clear PROCESSING bit */
+			ctx->status = HASH_CTX_STS_COMPLETE;
+			return ctx;
+		}
+
+		/*
+		 * If the extra blocks are empty, begin hashing what remains
+		 * in the user's buffer.
+		 */
+		if (ctx->partial_block_buffer_length == 0 &&
+		    ctx->incoming_buffer_length) {
+
+			const void *buffer = ctx->incoming_buffer;
+			uint32_t len = ctx->incoming_buffer_length;
+			uint32_t copy_len;
+
+			/*
+			 * Only entire blocks can be hashed.
+			 * Copy remainder to extra blocks buffer.
+			 */
+			copy_len = len & (SHA256_BLOCK_SIZE-1);
+
+			if (copy_len) {
+				len -= copy_len;
+				memcpy(ctx->partial_block_buffer,
+				       ((const char *) buffer + len),
+				       copy_len);
+				ctx->partial_block_buffer_length = copy_len;
+			}
+
+			ctx->incoming_buffer_length = 0;
+
+			/* len should be a multiple of the block size now */
+			assert((len % SHA256_BLOCK_SIZE) == 0);
+
+			/* Set len to the number of blocks to be hashed */
+			len >>= SHA256_LOG2_BLOCK_SIZE;
+
+			if (len) {
+
+				ctx->job.buffer = (uint8_t *) buffer;
+				ctx->job.len = len;
+				ctx = (struct sha256_hash_ctx *)
+				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
+				continue;
+			}
+		}
+
+		/*
+		 * If the extra blocks are not empty, then we are
+		 * either on the last block(s) or we need more
+		 * user input before continuing.
+		 */
+		if (ctx->status & HASH_CTX_STS_LAST) {
+
+			uint8_t *buf = ctx->partial_block_buffer;
+			uint32_t n_extra_blocks =
+				sha256_pad(buf, ctx->total_length);
+
+			ctx->status = (HASH_CTX_STS_PROCESSING |
+				       HASH_CTX_STS_COMPLETE);
+			ctx->job.buffer = buf;
+			ctx->job.len = (uint32_t) n_extra_blocks;
+			ctx = (struct sha256_hash_ctx *)
+				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
+			continue;
+		}
+
+		ctx->status = HASH_CTX_STS_IDLE;
+		return ctx;
+	}
+
+	return NULL;
+}
+
+static struct sha256_hash_ctx
+		*sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
+{
+	/*
+	 * If get_comp_job returns NULL, there are no jobs complete.
+	 * If get_comp_job returns a job, verify that it is safe to return to
+	 * the user. If it is not ready, resubmit the job to finish processing.
+	 * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
+	 * returned. Otherwise, all jobs currently being managed by the
+	 * hash_ctx_mgr still need processing.
+	 */
+	struct sha256_hash_ctx *ctx;
+
+	ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
+	return sha256_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
+{
+	sha256_job_mgr_init(&mgr->mgr);
+}
+
+static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
+					  struct sha256_hash_ctx *ctx,
+					  const void *buffer,
+					  uint32_t len,
+					  int flags)
+{
+	if (flags & (~HASH_ENTIRE)) {
+		/* User should not pass anything other than FIRST, UPDATE
+		 * or LAST
+		 */
+		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+		return ctx;
+	}
+
+	if (ctx->status & HASH_CTX_STS_PROCESSING) {
+		/* Cannot submit to a currently processing job. */
+		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+		return ctx;
+	}
+
+	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+		/* Cannot update a finished job. */
+		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+		return ctx;
+	}
+
+	if (flags & HASH_FIRST) {
+		/* Init digest */
+		sha256_init_digest(ctx->job.result_digest);
+
+		/* Reset byte counter */
+		ctx->total_length = 0;
+
+		/* Clear extra blocks */
+		ctx->partial_block_buffer_length = 0;
+	}
+
+	/* If we made it here, there was no error during this call to submit */
+	ctx->error = HASH_CTX_ERROR_NONE;
+
+	/* Store buffer ptr info from user */
+	ctx->incoming_buffer = buffer;
+	ctx->incoming_buffer_length = len;
+
+	/*
+	 * Store the user's request flags and mark this ctx as currently
+	 * being processed.
+	 */
+	ctx->status = (flags & HASH_LAST) ?
+			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+			HASH_CTX_STS_PROCESSING;
+
+	/* Advance byte counter */
+	ctx->total_length += len;
+
+	/*
+	 * If there is anything currently buffered in the extra blocks,
+	 * append to it until it contains a whole block.
+	 * Or if the user's buffer contains less than a whole block,
+	 * append as much as possible to the extra block.
+	 */
+	if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
+		/*
+		 * Compute how many bytes to copy from user buffer into
+		 * extra block
+		 */
+		uint32_t copy_len = SHA256_BLOCK_SIZE -
+					ctx->partial_block_buffer_length;
+		if (len < copy_len)
+			copy_len = len;
+
+		if (copy_len) {
+			/* Copy and update relevant pointers and counters */
+			memcpy(
+		&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
+				buffer, copy_len);
+
+			ctx->partial_block_buffer_length += copy_len;
+			ctx->incoming_buffer = (const void *)
+					((const char *)buffer + copy_len);
+			ctx->incoming_buffer_length = len - copy_len;
+		}
+
+		/* The extra block should never contain more than 1 block */
+		assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
+
+		/*
+		 * If the extra block buffer contains exactly 1 block,
+		 * it can be hashed.
+		 */
+		if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
+			ctx->partial_block_buffer_length = 0;
+
+			ctx->job.buffer = ctx->partial_block_buffer;
+			ctx->job.len = 1;
+			ctx = (struct sha256_hash_ctx *)
+				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
+		}
+	}
+
+	return sha256_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
+{
+	struct sha256_hash_ctx *ctx;
+
+	while (1) {
+		ctx = (struct sha256_hash_ctx *)
+					sha256_job_mgr_flush(&mgr->mgr);
+
+		/* If flush returned 0, there are no more jobs in flight. */
+		if (!ctx)
+			return NULL;
+
+		/*
+		 * If flush returned a job, resubmit the job to finish
+		 * processing.
+		 */
+		ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
+
+		/*
+		 * If sha256_ctx_mgr_resubmit returned a job, it is ready to
+		 * be returned. Otherwise, all jobs currently being managed by
+		 * the sha256_ctx_mgr still need processing. Loop.
+		 */
+		if (ctx)
+			return ctx;
+	}
+}
+
+static int sha256_mb_init(struct ahash_request *areq)
+{
+	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
+
+	hash_ctx_init(sctx);
+	sctx->job.result_digest[0] = SHA256_H0;
+	sctx->job.result_digest[1] = SHA256_H1;
+	sctx->job.result_digest[2] = SHA256_H2;
+	sctx->job.result_digest[3] = SHA256_H3;
+	sctx->job.result_digest[4] = SHA256_H4;
+	sctx->job.result_digest[5] = SHA256_H5;
+	sctx->job.result_digest[6] = SHA256_H6;
+	sctx->job.result_digest[7] = SHA256_H7;
+	sctx->total_length = 0;
+	sctx->partial_block_buffer_length = 0;
+	sctx->status = HASH_CTX_STS_IDLE;
+
+	return 0;
+}
+
+static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
+{
+	int	i;
+	struct	sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
+	__be32	*dst = (__be32 *) rctx->out;
+
+	for (i = 0; i < 8; ++i)
+		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
+
+	return 0;
+}
+
+static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
+			struct mcryptd_alg_cstate *cstate, bool flush)
+{
+	int	flag = HASH_UPDATE;
+	int	nbytes, err = 0;
+	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
+	struct sha256_hash_ctx *sha_ctx;
+
+	/* more work ? */
+	while (!(rctx->flag & HASH_DONE)) {
+		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
+		if (nbytes < 0) {
+			err = nbytes;
+			goto out;
+		}
+		/* check if the walk is done */
+		if (crypto_ahash_walk_last(&rctx->walk)) {
+			rctx->flag |= HASH_DONE;
+			if (rctx->flag & HASH_FINAL)
+				flag |= HASH_LAST;
+
+		}
+		sha_ctx = (struct sha256_hash_ctx *)
+						ahash_request_ctx(&rctx->areq);
+		kernel_fpu_begin();
+		sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
+						rctx->walk.data, nbytes, flag);
+		if (!sha_ctx) {
+			if (flush)
+				sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
+		}
+		kernel_fpu_end();
+		if (sha_ctx)
+			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		else {
+			rctx = NULL;
+			goto out;
+		}
+	}
+
+	/* copy the results */
+	if (rctx->flag & HASH_FINAL)
+		sha256_mb_set_results(rctx);
+
+out:
+	*ret_rctx = rctx;
+	return err;
+}
+
+static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
+			    struct mcryptd_alg_cstate *cstate,
+			    int err)
+{
+	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+	struct sha256_hash_ctx *sha_ctx;
+	struct mcryptd_hash_request_ctx *req_ctx;
+	int ret;
+
+	/* remove from work list */
+	spin_lock(&cstate->work_lock);
+	list_del(&rctx->waiter);
+	spin_unlock(&cstate->work_lock);
+
+	if (irqs_disabled())
+		rctx->complete(&req->base, err);
+	else {
+		local_bh_disable();
+		rctx->complete(&req->base, err);
+		local_bh_enable();
+	}
+
+	/* check to see if there are other jobs that are done */
+	sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
+	while (sha_ctx) {
+		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		ret = sha_finish_walk(&req_ctx, cstate, false);
+		if (req_ctx) {
+			spin_lock(&cstate->work_lock);
+			list_del(&req_ctx->waiter);
+			spin_unlock(&cstate->work_lock);
+
+			req = cast_mcryptd_ctx_to_req(req_ctx);
+			if (irqs_disabled())
+				rctx->complete(&req->base, ret);
+			else {
+				local_bh_disable();
+				rctx->complete(&req->base, ret);
+				local_bh_enable();
+			}
+		}
+		sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
+	}
+
+	return 0;
+}
+
+static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
+			     struct mcryptd_alg_cstate *cstate)
+{
+	unsigned long next_flush;
+	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
+
+	/* initialize tag */
+	rctx->tag.arrival = jiffies;    /* tag the arrival time */
+	rctx->tag.seq_num = cstate->next_seq_num++;
+	next_flush = rctx->tag.arrival + delay;
+	rctx->tag.expire = next_flush;
+
+	spin_lock(&cstate->work_lock);
+	list_add_tail(&rctx->waiter, &cstate->work_list);
+	spin_unlock(&cstate->work_lock);
+
+	mcryptd_arm_flusher(cstate, delay);
+}
+
+static int sha256_mb_update(struct ahash_request *areq)
+{
+	struct mcryptd_hash_request_ctx *rctx =
+		container_of(areq, struct mcryptd_hash_request_ctx, areq);
+	struct mcryptd_alg_cstate *cstate =
+				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
+
+	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+	struct sha256_hash_ctx *sha_ctx;
+	int ret = 0, nbytes;
+
+	/* sanity check */
+	if (rctx->tag.cpu != smp_processor_id()) {
+		pr_err("mcryptd error: cpu clash\n");
+		goto done;
+	}
+
+	/* need to init context */
+	req_ctx_init(rctx, areq);
+
+	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+	if (nbytes < 0) {
+		ret = nbytes;
+		goto done;
+	}
+
+	if (crypto_ahash_walk_last(&rctx->walk))
+		rctx->flag |= HASH_DONE;
+
+	/* submit */
+	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
+	sha256_mb_add_list(rctx, cstate);
+	kernel_fpu_begin();
+	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+							nbytes, HASH_UPDATE);
+	kernel_fpu_end();
+
+	/* check if anything is returned */
+	if (!sha_ctx)
+		return -EINPROGRESS;
+
+	if (sha_ctx->error) {
+		ret = sha_ctx->error;
+		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		goto done;
+	}
+
+	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+	ret = sha_finish_walk(&rctx, cstate, false);
+
+	if (!rctx)
+		return -EINPROGRESS;
+done:
+	sha_complete_job(rctx, cstate, ret);
+	return ret;
+}
+
+static int sha256_mb_finup(struct ahash_request *areq)
+{
+	struct mcryptd_hash_request_ctx *rctx =
+		container_of(areq, struct mcryptd_hash_request_ctx, areq);
+	struct mcryptd_alg_cstate *cstate =
+				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
+
+	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+	struct sha256_hash_ctx *sha_ctx;
+	int ret = 0, flag = HASH_UPDATE, nbytes;
+
+	/* sanity check */
+	if (rctx->tag.cpu != smp_processor_id()) {
+		pr_err("mcryptd error: cpu clash\n");
+		goto done;
+	}
+
+	/* need to init context */
+	req_ctx_init(rctx, areq);
+
+	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+	if (nbytes < 0) {
+		ret = nbytes;
+		goto done;
+	}
+
+	if (crypto_ahash_walk_last(&rctx->walk)) {
+		rctx->flag |= HASH_DONE;
+		flag = HASH_LAST;
+	}
+
+	/* submit */
+	rctx->flag |= HASH_FINAL;
+	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
+	sha256_mb_add_list(rctx, cstate);
+
+	kernel_fpu_begin();
+	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+								nbytes, flag);
+	kernel_fpu_end();
+
+	/* check if anything is returned */
+	if (!sha_ctx)
+		return -EINPROGRESS;
+
+	if (sha_ctx->error) {
+		ret = sha_ctx->error;
+		goto done;
+	}
+
+	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+	ret = sha_finish_walk(&rctx, cstate, false);
+	if (!rctx)
+		return -EINPROGRESS;
+done:
+	sha_complete_job(rctx, cstate, ret);
+	return ret;
+}
+
+static int sha256_mb_final(struct ahash_request *areq)
+{
+	struct mcryptd_hash_request_ctx *rctx =
+			container_of(areq, struct mcryptd_hash_request_ctx,
+			areq);
+	struct mcryptd_alg_cstate *cstate =
+				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
+
+	struct sha256_hash_ctx *sha_ctx;
+	int ret = 0;
+	u8 data;
+
+	/* sanity check */
+	if (rctx->tag.cpu != smp_processor_id()) {
+		pr_err("mcryptd error: cpu clash\n");
+		goto done;
+	}
+
+	/* need to init context */
+	req_ctx_init(rctx, areq);
+
+	rctx->flag |= HASH_DONE | HASH_FINAL;
+
+	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
+	/* flag HASH_FINAL and 0 data size */
+	sha256_mb_add_list(rctx, cstate);
+	kernel_fpu_begin();
+	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
+								HASH_LAST);
+	kernel_fpu_end();
+
+	/* check if anything is returned */
+	if (!sha_ctx)
+		return -EINPROGRESS;
+
+	if (sha_ctx->error) {
+		ret = sha_ctx->error;
+		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		goto done;
+	}
+
+	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+	ret = sha_finish_walk(&rctx, cstate, false);
+	if (!rctx)
+		return -EINPROGRESS;
+done:
+	sha_complete_job(rctx, cstate, ret);
+	return ret;
+}
+
+static int sha256_mb_export(struct ahash_request *areq, void *out)
+{
+	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha256_mb_import(struct ahash_request *areq, const void *in)
+{
+	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
+{
+	struct mcryptd_ahash *mcryptd_tfm;
+	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct mcryptd_hash_ctx *mctx;
+
+	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
+						CRYPTO_ALG_INTERNAL,
+						CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(mcryptd_tfm))
+		return PTR_ERR(mcryptd_tfm);
+	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
+	mctx->alg_state = &sha256_mb_alg_state;
+	ctx->mcryptd_tfm = mcryptd_tfm;
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				sizeof(struct ahash_request) +
+				crypto_ahash_reqsize(&mcryptd_tfm->base));
+
+	return 0;
+}
+
+static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
+{
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				sizeof(struct ahash_request) +
+				sizeof(struct sha256_hash_ctx));
+
+	return 0;
+}
+
+static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static struct ahash_alg sha256_mb_areq_alg = {
+	.init		=	sha256_mb_init,
+	.update		=	sha256_mb_update,
+	.final		=	sha256_mb_final,
+	.finup		=	sha256_mb_finup,
+	.export		=	sha256_mb_export,
+	.import		=	sha256_mb_import,
+	.halg		=	{
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.statesize	=	sizeof(struct sha256_hash_ctx),
+		.base		=	{
+			.cra_name	 = "__sha256-mb",
+			.cra_driver_name = "__intel_sha256-mb",
+			.cra_priority	 = 100,
+			/*
+			 * use ASYNC flag as some buffers in multi-buffer
+			 * algo may not have completed before hashing thread
+			 * sleep
+			 */
+			.cra_flags	= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_INTERNAL,
+			.cra_blocksize	= SHA256_BLOCK_SIZE,
+			.cra_module	= THIS_MODULE,
+			.cra_list	= LIST_HEAD_INIT
+					(sha256_mb_areq_alg.halg.base.cra_list),
+			.cra_init	= sha256_mb_areq_init_tfm,
+			.cra_exit	= sha256_mb_areq_exit_tfm,
+			.cra_ctxsize	= sizeof(struct sha256_hash_ctx),
+		}
+	}
+};
+
+static int sha256_mb_async_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_init(mcryptd_req);
+}
+
+static int sha256_mb_async_update(struct ahash_request *req)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_update(mcryptd_req);
+}
+
+static int sha256_mb_async_finup(struct ahash_request *req)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_finup(mcryptd_req);
+}
+
+static int sha256_mb_async_final(struct ahash_request *req)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_final(mcryptd_req);
+}
+
+static int sha256_mb_async_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_digest(mcryptd_req);
+}
+
+static int sha256_mb_async_export(struct ahash_request *req, void *out)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_export(mcryptd_req, out);
+}
+
+static int sha256_mb_async_import(struct ahash_request *req, const void *in)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
+	struct mcryptd_hash_request_ctx *rctx;
+	struct ahash_request *areq;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	rctx = ahash_request_ctx(mcryptd_req);
+	areq = &rctx->areq;
+
+	ahash_request_set_tfm(areq, child);
+	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
+					rctx->complete, req);
+
+	return crypto_ahash_import(mcryptd_req, in);
+}
+
+static struct ahash_alg sha256_mb_async_alg = {
+	.init           = sha256_mb_async_init,
+	.update         = sha256_mb_async_update,
+	.final          = sha256_mb_async_final,
+	.finup          = sha256_mb_async_finup,
+	.export         = sha256_mb_async_export,
+	.import         = sha256_mb_async_import,
+	.digest         = sha256_mb_async_digest,
+	.halg = {
+		.digestsize     = SHA256_DIGEST_SIZE,
+		.statesize      = sizeof(struct sha256_hash_ctx),
+		.base = {
+			.cra_name               = "sha256",
+			.cra_driver_name        = "sha256_mb",
+			.cra_priority           = 200,
+			.cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+							CRYPTO_ALG_ASYNC,
+			.cra_blocksize          = SHA256_BLOCK_SIZE,
+			.cra_type               = &crypto_ahash_type,
+			.cra_module             = THIS_MODULE,
+			.cra_list               = LIST_HEAD_INIT
+				(sha256_mb_async_alg.halg.base.cra_list),
+			.cra_init               = sha256_mb_async_init_tfm,
+			.cra_exit               = sha256_mb_async_exit_tfm,
+			.cra_ctxsize		= sizeof(struct sha256_mb_ctx),
+			.cra_alignmask		= 0,
+		},
+	},
+};
+
+static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
+{
+	struct mcryptd_hash_request_ctx *rctx;
+	unsigned long cur_time;
+	unsigned long next_flush = 0;
+	struct sha256_hash_ctx *sha_ctx;
+
+
+	cur_time = jiffies;
+
+	while (!list_empty(&cstate->work_list)) {
+		rctx = list_entry(cstate->work_list.next,
+				struct mcryptd_hash_request_ctx, waiter);
+		if (time_before(cur_time, rctx->tag.expire))
+			break;
+		kernel_fpu_begin();
+		sha_ctx = (struct sha256_hash_ctx *)
+					sha256_ctx_mgr_flush(cstate->mgr);
+		kernel_fpu_end();
+		if (!sha_ctx) {
+			pr_err("sha256_mb error: nothing got"
+					" flushed for non-empty list\n");
+			break;
+		}
+		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		sha_finish_walk(&rctx, cstate, true);
+		sha_complete_job(rctx, cstate, 0);
+	}
+
+	if (!list_empty(&cstate->work_list)) {
+		rctx = list_entry(cstate->work_list.next,
+				struct mcryptd_hash_request_ctx, waiter);
+		/* get the hash context and then flush time */
+		next_flush = rctx->tag.expire;
+		mcryptd_arm_flusher(cstate, get_delay(next_flush));
+	}
+	return next_flush;
+}
+
+static int __init sha256_mb_mod_init(void)
+{
+
+	int cpu;
+	int err;
+	struct mcryptd_alg_cstate *cpu_state;
+
+	/* check for dependent cpu features */
+	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+	    !boot_cpu_has(X86_FEATURE_BMI2))
+		return -ENODEV;
+
+	/* initialize multibuffer structures */
+	sha256_mb_alg_state.alg_cstate = alloc_percpu
+						(struct mcryptd_alg_cstate);
+
+	sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
+	sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
+	sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
+	sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
+
+	if (!sha256_mb_alg_state.alg_cstate)
+		return -ENOMEM;
+	for_each_possible_cpu(cpu) {
+		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
+		cpu_state->next_flush = 0;
+		cpu_state->next_seq_num = 0;
+		cpu_state->flusher_engaged = false;
+		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
+		cpu_state->cpu = cpu;
+		cpu_state->alg_state = &sha256_mb_alg_state;
+		cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
+					GFP_KERNEL);
+		if (!cpu_state->mgr)
+			goto err2;
+		sha256_ctx_mgr_init(cpu_state->mgr);
+		INIT_LIST_HEAD(&cpu_state->work_list);
+		spin_lock_init(&cpu_state->work_lock);
+	}
+	sha256_mb_alg_state.flusher = &sha256_mb_flusher;
+
+	err = crypto_register_ahash(&sha256_mb_areq_alg);
+	if (err)
+		goto err2;
+	err = crypto_register_ahash(&sha256_mb_async_alg);
+	if (err)
+		goto err1;
+
+
+	return 0;
+err1:
+	crypto_unregister_ahash(&sha256_mb_areq_alg);
+err2:
+	for_each_possible_cpu(cpu) {
+		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
+		kfree(cpu_state->mgr);
+	}
+	free_percpu(sha256_mb_alg_state.alg_cstate);
+	return -ENODEV;
+}
+
+static void __exit sha256_mb_mod_fini(void)
+{
+	int cpu;
+	struct mcryptd_alg_cstate *cpu_state;
+
+	crypto_unregister_ahash(&sha256_mb_async_alg);
+	crypto_unregister_ahash(&sha256_mb_areq_alg);
+	for_each_possible_cpu(cpu) {
+		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
+		kfree(cpu_state->mgr);
+	}
+	free_percpu(sha256_mb_alg_state.alg_cstate);
+}
+
+module_init(sha256_mb_mod_init);
+module_exit(sha256_mb_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
+
+MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
new file mode 100644
index 000000000000..edd252b73206
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
@@ -0,0 +1,136 @@
+/*
+ * Header file for multi buffer SHA256 context
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *	Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SHA_MB_CTX_INTERNAL_H
+#define _SHA_MB_CTX_INTERNAL_H
+
+#include "sha256_mb_mgr.h"
+
+#define HASH_UPDATE          0x00
+#define HASH_FIRST           0x01
+#define HASH_LAST            0x02
+#define HASH_ENTIRE          0x03
+#define HASH_DONE	     0x04
+#define HASH_FINAL	     0x08
+
+#define HASH_CTX_STS_IDLE       0x00
+#define HASH_CTX_STS_PROCESSING 0x01
+#define HASH_CTX_STS_LAST       0x02
+#define HASH_CTX_STS_COMPLETE   0x04
+
+enum hash_ctx_error {
+	HASH_CTX_ERROR_NONE               =  0,
+	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
+	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
+	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
+
+#ifdef HASH_CTX_DEBUG
+	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
+#endif
+};
+
+
+#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
+#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
+#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
+#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
+#define hash_ctx_status(ctx)     ((ctx)->status)
+#define hash_ctx_error(ctx)      ((ctx)->error)
+#define hash_ctx_init(ctx) \
+	do { \
+		(ctx)->error = HASH_CTX_ERROR_NONE; \
+		(ctx)->status = HASH_CTX_STS_COMPLETE; \
+	} while (0)
+
+
+/* Hash Constants and Typedefs */
+#define SHA256_DIGEST_LENGTH        8
+#define SHA256_LOG2_BLOCK_SIZE        6
+
+#define SHA256_PADLENGTHFIELD_SIZE    8
+
+#ifdef SHA_MB_DEBUG
+#define assert(expr) \
+do { \
+	if (unlikely(!(expr))) { \
+		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
+		#expr, __FILE__, __func__, __LINE__); \
+	} \
+} while (0)
+#else
+#define assert(expr) do {} while (0)
+#endif
+
+struct sha256_ctx_mgr {
+	struct sha256_mb_mgr mgr;
+};
+
+/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
+
+struct sha256_hash_ctx {
+	/* Must be at struct offset 0 */
+	struct job_sha256       job;
+	/* status flag */
+	int status;
+	/* error flag */
+	int error;
+
+	uint32_t	total_length;
+	const void	*incoming_buffer;
+	uint32_t	incoming_buffer_length;
+	uint8_t		partial_block_buffer[SHA256_BLOCK_SIZE * 2];
+	uint32_t	partial_block_buffer_length;
+	void		*user_data;
+};
+
+#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
new file mode 100644
index 000000000000..b01ae408c56d
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
@@ -0,0 +1,108 @@
+/*
+ * Header file for multi buffer SHA256 algorithm manager
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *	Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __SHA_MB_MGR_H
+#define __SHA_MB_MGR_H
+
+#include <linux/types.h>
+
+#define NUM_SHA256_DIGEST_WORDS 8
+
+enum job_sts {	STS_UNKNOWN = 0,
+		STS_BEING_PROCESSED = 1,
+		STS_COMPLETED = 2,
+		STS_INTERNAL_ERROR = 3,
+		STS_ERROR = 4
+};
+
+struct job_sha256 {
+	u8	*buffer;
+	u32	len;
+	u32	result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
+	enum	job_sts status;
+	void	*user_data;
+};
+
+/* SHA256 out-of-order scheduler */
+
+/* typedef uint32_t sha8_digest_array[8][8]; */
+
+struct sha256_args_x8 {
+	uint32_t	digest[8][8];
+	uint8_t		*data_ptr[8];
+};
+
+struct sha256_lane_data {
+	struct job_sha256 *job_in_lane;
+};
+
+struct sha256_mb_mgr {
+	struct sha256_args_x8 args;
+
+	uint32_t lens[8];
+
+	/* each byte is index (0...7) of unused lanes */
+	uint64_t unused_lanes;
+	/* byte 4 is set to FF as a flag */
+	struct sha256_lane_data ldata[8];
+};
+
+
+#define SHA256_MB_MGR_NUM_LANES_AVX2 8
+
+void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
+struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
+					 struct job_sha256 *job);
+struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
+struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
+
+#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
new file mode 100644
index 000000000000..5c377bac21d0
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
@@ -0,0 +1,304 @@
+/*
+ * Header file for multi buffer SHA256 algorithm data structure
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ *     Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+# Macros for defining data structures
+
+# Usage example
+
+#START_FIELDS	# JOB_AES
+###	name		size	align
+#FIELD	_plaintext,	8,	8	# pointer to plaintext
+#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
+#FIELD	_IV,		16,	8	# IV
+#FIELD	_keys,		8,	8	# pointer to keys
+#FIELD	_len,		4,	4	# length in bytes
+#FIELD	_status,	4,	4	# status enumeration
+#FIELD	_user_data,	8,	8	# pointer to user data
+#UNION  _union,         size1,  align1, \
+#	                size2,  align2, \
+#	                size3,  align3, \
+#	                ...
+#END_FIELDS
+#%assign _JOB_AES_size	_FIELD_OFFSET
+#%assign _JOB_AES_align	_STRUCT_ALIGN
+
+#########################################################################
+
+# Alternate "struc-like" syntax:
+#	STRUCT job_aes2
+#	RES_Q	.plaintext,	1
+#	RES_Q	.ciphertext, 	1
+#	RES_DQ	.IV,		1
+#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
+#	RES_U	.union,		size1, align1, \
+#				size2, align2, \
+#				...
+#	ENDSTRUCT
+#	# Following only needed if nesting
+#	%assign job_aes2_size	_FIELD_OFFSET
+#	%assign job_aes2_align	_STRUCT_ALIGN
+#
+# RES_* macros take a name, a count and an optional alignment.
+# The count in in terms of the base size of the macro, and the
+# default alignment is the base size.
+# The macros are:
+# Macro    Base size
+# RES_B	    1
+# RES_W	    2
+# RES_D     4
+# RES_Q     8
+# RES_DQ   16
+# RES_Y    32
+# RES_Z    64
+#
+# RES_U defines a union. It's arguments are a name and two or more
+# pairs of "size, alignment"
+#
+# The two assigns are only needed if this structure is being nested
+# within another. Even if the assigns are not done, one can still use
+# STRUCT_NAME_size as the size of the structure.
+#
+# Note that for nesting, you still need to assign to STRUCT_NAME_size.
+#
+# The differences between this and using "struc" directly are that each
+# type is implicitly aligned to its natural length (although this can be
+# over-ridden with an explicit third parameter), and that the structure
+# is padded at the end to its overall alignment.
+#
+
+#########################################################################
+
+#ifndef _DATASTRUCT_ASM_
+#define _DATASTRUCT_ASM_
+
+#define SZ8			8*SHA256_DIGEST_WORD_SIZE
+#define ROUNDS			64*SZ8
+#define PTR_SZ                  8
+#define SHA256_DIGEST_WORD_SIZE 4
+#define MAX_SHA256_LANES        8
+#define SHA256_DIGEST_WORDS 8
+#define SHA256_DIGEST_ROW_SIZE  (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
+#define SHA256_DIGEST_SIZE      (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
+#define SHA256_BLK_SZ           64
+
+# START_FIELDS
+.macro START_FIELDS
+ _FIELD_OFFSET = 0
+ _STRUCT_ALIGN = 0
+.endm
+
+# FIELD name size align
+.macro FIELD name size align
+ _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
+ \name	= _FIELD_OFFSET
+ _FIELD_OFFSET = _FIELD_OFFSET + (\size)
+.if (\align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = \align
+.endif
+.endm
+
+# END_FIELDS
+.macro END_FIELDS
+ _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
+.endm
+
+########################################################################
+
+.macro STRUCT p1
+START_FIELDS
+.struc \p1
+.endm
+
+.macro ENDSTRUCT
+ tmp = _FIELD_OFFSET
+ END_FIELDS
+ tmp = (_FIELD_OFFSET - %%tmp)
+.if (tmp > 0)
+	.lcomm	tmp
+.endif
+.endstruc
+.endm
+
+## RES_int name size align
+.macro RES_int p1 p2 p3
+ name = \p1
+ size = \p2
+ align = .\p3
+
+ _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
+.align align
+.lcomm name size
+ _FIELD_OFFSET = _FIELD_OFFSET + (size)
+.if (align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = align
+.endif
+.endm
+
+# macro RES_B name, size [, align]
+.macro RES_B _name, _size, _align=1
+RES_int _name _size _align
+.endm
+
+# macro RES_W name, size [, align]
+.macro RES_W _name, _size, _align=2
+RES_int _name 2*(_size) _align
+.endm
+
+# macro RES_D name, size [, align]
+.macro RES_D _name, _size, _align=4
+RES_int _name 4*(_size) _align
+.endm
+
+# macro RES_Q name, size [, align]
+.macro RES_Q _name, _size, _align=8
+RES_int _name 8*(_size) _align
+.endm
+
+# macro RES_DQ name, size [, align]
+.macro RES_DQ _name, _size, _align=16
+RES_int _name 16*(_size) _align
+.endm
+
+# macro RES_Y name, size [, align]
+.macro RES_Y _name, _size, _align=32
+RES_int _name 32*(_size) _align
+.endm
+
+# macro RES_Z name, size [, align]
+.macro RES_Z _name, _size, _align=64
+RES_int _name 64*(_size) _align
+.endm
+
+#endif
+
+
+########################################################################
+#### Define SHA256 Out Of Order Data Structures
+########################################################################
+
+START_FIELDS    # LANE_DATA
+###     name            size    align
+FIELD   _job_in_lane,   8,      8       # pointer to job object
+END_FIELDS
+
+ _LANE_DATA_size = _FIELD_OFFSET
+ _LANE_DATA_align = _STRUCT_ALIGN
+
+########################################################################
+
+START_FIELDS    # SHA256_ARGS_X4
+###     name            size    align
+FIELD   _digest,        4*8*8,  4       # transposed digest
+FIELD   _data_ptr,      8*8,    8       # array of pointers to data
+END_FIELDS
+
+ _SHA256_ARGS_X4_size  =  _FIELD_OFFSET
+ _SHA256_ARGS_X4_align = _STRUCT_ALIGN
+ _SHA256_ARGS_X8_size  =	_FIELD_OFFSET
+ _SHA256_ARGS_X8_align =	_STRUCT_ALIGN
+
+#######################################################################
+
+START_FIELDS    # MB_MGR
+###     name            size    align
+FIELD   _args,          _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
+FIELD   _lens,          4*8,    8
+FIELD   _unused_lanes,  8,      8
+FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
+END_FIELDS
+
+ _MB_MGR_size  =  _FIELD_OFFSET
+ _MB_MGR_align =  _STRUCT_ALIGN
+
+_args_digest   =     _args + _digest
+_args_data_ptr =     _args + _data_ptr
+
+#######################################################################
+
+START_FIELDS    #STACK_FRAME
+###     name            size    align
+FIELD   _data,		16*SZ8,   1       # transposed digest
+FIELD   _digest,         8*SZ8,   1       # array of pointers to data
+FIELD   _ytmp,           4*SZ8,   1
+FIELD   _rsp,            8,       1
+END_FIELDS
+
+ _STACK_FRAME_size  =  _FIELD_OFFSET
+ _STACK_FRAME_align =  _STRUCT_ALIGN
+
+#######################################################################
+
+########################################################################
+#### Define constants
+########################################################################
+
+#define STS_UNKNOWN             0
+#define STS_BEING_PROCESSED     1
+#define STS_COMPLETED           2
+
+########################################################################
+#### Define JOB_SHA256 structure
+########################################################################
+
+START_FIELDS    # JOB_SHA256
+
+###     name                            size    align
+FIELD   _buffer,                        8,      8       # pointer to buffer
+FIELD   _len,                           8,      8       # length in bytes
+FIELD   _result_digest,                 8*4,    32      # Digest (output)
+FIELD   _status,                        4,      4
+FIELD   _user_data,                     8,      8
+END_FIELDS
+
+ _JOB_SHA256_size = _FIELD_OFFSET
+ _JOB_SHA256_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
new file mode 100644
index 000000000000..b691da981cd9
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
@@ -0,0 +1,304 @@
+/*
+ * Flush routine for SHA256 multibuffer
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *      Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include "sha256_mb_mgr_datastruct.S"
+
+.extern sha256_x8_avx2
+
+#LINUX register definitions
+#define arg1	%rdi
+#define arg2	%rsi
+
+# Common register definitions
+#define state	arg1
+#define job	arg2
+#define len2	arg2
+
+# idx must be a register not clobberred by sha1_mult
+#define idx		%r8
+#define DWORD_idx	%r8d
+
+#define unused_lanes	%rbx
+#define lane_data	%rbx
+#define tmp2		%rbx
+#define tmp2_w		%ebx
+
+#define job_rax		%rax
+#define tmp1		%rax
+#define size_offset	%rax
+#define tmp		%rax
+#define start_offset	%rax
+
+#define tmp3		%arg1
+
+#define extra_blocks	%arg2
+#define p		%arg2
+
+.macro LABEL prefix n
+\prefix\n\():
+.endm
+
+.macro JNE_SKIP i
+jne     skip_\i
+.endm
+
+.altmacro
+.macro SET_OFFSET _offset
+offset = \_offset
+.endm
+.noaltmacro
+
+# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
+# arg 1 : rcx : state
+ENTRY(sha256_mb_mgr_flush_avx2)
+	FRAME_BEGIN
+        push    %rbx
+
+	# If bit (32+3) is set, then all lanes are empty
+	mov	_unused_lanes(state), unused_lanes
+	bt	$32+3, unused_lanes
+	jc	return_null
+
+	# find a lane with a non-null job
+	xor	idx, idx
+	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
+	cmpq	$0, offset(state)
+	cmovne	one(%rip), idx
+	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
+	cmpq	$0, offset(state)
+	cmovne	two(%rip), idx
+	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
+	cmpq	$0, offset(state)
+	cmovne	three(%rip), idx
+	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
+	cmpq	$0, offset(state)
+	cmovne	four(%rip), idx
+	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
+	cmpq	$0, offset(state)
+	cmovne	five(%rip), idx
+	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
+	cmpq	$0, offset(state)
+	cmovne	six(%rip), idx
+	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
+	cmpq	$0, offset(state)
+	cmovne	seven(%rip), idx
+
+	# copy idx to empty lanes
+copy_lane_data:
+	offset =  (_args + _data_ptr)
+	mov	offset(state,idx,8), tmp
+
+	I = 0
+.rep 8
+	offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
+	cmpq	$0, offset(state)
+.altmacro
+	JNE_SKIP %I
+	offset =  (_args + _data_ptr + 8*I)
+	mov	tmp, offset(state)
+	offset =  (_lens + 4*I)
+	movl	$0xFFFFFFFF, offset(state)
+LABEL skip_ %I
+	I = (I+1)
+.noaltmacro
+.endr
+
+	# Find min length
+	vmovdqa _lens+0*16(state), %xmm0
+	vmovdqa _lens+1*16(state), %xmm1
+
+	vpminud %xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
+	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
+	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
+	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
+	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
+
+	vmovd	%xmm2, DWORD_idx
+	mov	idx, len2
+	and	$0xF, idx
+	shr	$4, len2
+	jz	len_is_0
+
+	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
+	vpshufd	$0, %xmm2, %xmm2
+
+	vpsubd	%xmm2, %xmm0, %xmm0
+	vpsubd	%xmm2, %xmm1, %xmm1
+
+	vmovdqa	%xmm0, _lens+0*16(state)
+	vmovdqa	%xmm1, _lens+1*16(state)
+
+	# "state" and "args" are the same address, arg1
+	# len is arg2
+	call	sha256_x8_avx2
+	# state and idx are intact
+
+len_is_0:
+	# process completed job "idx"
+	imul	$_LANE_DATA_size, idx, lane_data
+	lea	_ldata(state, lane_data), lane_data
+
+	mov	_job_in_lane(lane_data), job_rax
+	movq	$0, _job_in_lane(lane_data)
+	movl	$STS_COMPLETED, _status(job_rax)
+	mov	_unused_lanes(state), unused_lanes
+	shl	$4, unused_lanes
+	or	idx, unused_lanes
+
+	mov	unused_lanes, _unused_lanes(state)
+	movl	$0xFFFFFFFF, _lens(state,idx,4)
+
+	vmovd	_args_digest(state , idx, 4) , %xmm0
+	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
+	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
+	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
+	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
+	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
+	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
+	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
+
+	vmovdqu	%xmm0, _result_digest(job_rax)
+	offset =  (_result_digest + 1*16)
+	vmovdqu	%xmm1, offset(job_rax)
+
+return:
+	pop     %rbx
+	FRAME_END
+	ret
+
+return_null:
+	xor	job_rax, job_rax
+	jmp	return
+ENDPROC(sha256_mb_mgr_flush_avx2)
+
+##############################################################################
+
+.align 16
+ENTRY(sha256_mb_mgr_get_comp_job_avx2)
+	push	%rbx
+
+	## if bit 32+3 is set, then all lanes are empty
+	mov	_unused_lanes(state), unused_lanes
+	bt	$(32+3), unused_lanes
+	jc	.return_null
+
+	# Find min length
+	vmovdqa	_lens(state), %xmm0
+	vmovdqa	_lens+1*16(state), %xmm1
+
+	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
+	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
+	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
+	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
+	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
+
+	vmovd	%xmm2, DWORD_idx
+	test	$~0xF, idx
+	jnz	.return_null
+
+	# process completed job "idx"
+	imul	$_LANE_DATA_size, idx, lane_data
+	lea	_ldata(state, lane_data), lane_data
+
+	mov	_job_in_lane(lane_data), job_rax
+	movq	$0,  _job_in_lane(lane_data)
+	movl	$STS_COMPLETED, _status(job_rax)
+	mov	_unused_lanes(state), unused_lanes
+	shl	$4, unused_lanes
+	or	idx, unused_lanes
+	mov	unused_lanes, _unused_lanes(state)
+
+	movl	$0xFFFFFFFF, _lens(state,  idx, 4)
+
+	vmovd	_args_digest(state, idx, 4), %xmm0
+	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
+	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
+	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
+	movl	_args_digest+4*32(state, idx, 4), tmp2_w
+	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
+	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
+	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
+
+	vmovdqu	%xmm0, _result_digest(job_rax)
+	movl	tmp2_w, _result_digest+1*16(job_rax)
+
+	pop	%rbx
+
+	ret
+
+.return_null:
+	xor	job_rax, job_rax
+	pop	%rbx
+	ret
+ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
+
+.data
+
+.align 16
+clear_low_nibble:
+.octa	0x000000000000000000000000FFFFFFF0
+one:
+.quad	1
+two:
+.quad	2
+three:
+.quad	3
+four:
+.quad	4
+five:
+.quad	5
+six:
+.quad	6
+seven:
+.quad  7
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
new file mode 100644
index 000000000000..b0c498371e67
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
@@ -0,0 +1,65 @@
+/*
+ * Initialization code for multi buffer SHA256 algorithm for AVX2
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *      Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "sha256_mb_mgr.h"
+
+void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
+{
+	unsigned int j;
+
+	state->unused_lanes = 0xF76543210ULL;
+	for (j = 0; j < 8; j++) {
+		state->lens[j] = 0xFFFFFFFF;
+		state->ldata[j].job_in_lane = NULL;
+	}
+}
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
new file mode 100644
index 000000000000..7ea670e25acc
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
@@ -0,0 +1,215 @@
+/*
+ * Buffer submit code for multi buffer SHA256 algorithm
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *      Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include "sha256_mb_mgr_datastruct.S"
+
+.extern sha256_x8_avx2
+
+# LINUX register definitions
+arg1		= %rdi
+arg2		= %rsi
+size_offset	= %rcx
+tmp2		= %rcx
+extra_blocks	= %rdx
+
+# Common definitions
+#define state	arg1
+#define job	%rsi
+#define len2	arg2
+#define p2	arg2
+
+# idx must be a register not clobberred by sha1_x8_avx2
+idx		= %r8
+DWORD_idx	= %r8d
+last_len	= %r8
+
+p		= %r11
+start_offset	= %r11
+
+unused_lanes	= %rbx
+BYTE_unused_lanes = %bl
+
+job_rax		= %rax
+len		= %rax
+DWORD_len	= %eax
+
+lane		= %r12
+tmp3		= %r12
+
+tmp		= %r9
+DWORD_tmp	= %r9d
+
+lane_data	= %r10
+
+# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
+# arg 1 : rcx : state
+# arg 2 : rdx : job
+ENTRY(sha256_mb_mgr_submit_avx2)
+	FRAME_BEGIN
+	push	%rbx
+	push	%r12
+
+	mov	_unused_lanes(state), unused_lanes
+	mov	unused_lanes, lane
+	and	$0xF, lane
+	shr	$4, unused_lanes
+	imul	$_LANE_DATA_size, lane, lane_data
+	movl	$STS_BEING_PROCESSED, _status(job)
+	lea	_ldata(state, lane_data), lane_data
+	mov	unused_lanes, _unused_lanes(state)
+	movl	_len(job),  DWORD_len
+
+	mov	job, _job_in_lane(lane_data)
+	shl	$4, len
+	or	lane, len
+
+	movl	DWORD_len,  _lens(state , lane, 4)
+
+	# Load digest words from result_digest
+	vmovdqu	_result_digest(job), %xmm0
+	vmovdqu	_result_digest+1*16(job), %xmm1
+	vmovd	%xmm0, _args_digest(state, lane, 4)
+	vpextrd	$1, %xmm0, _args_digest+1*32(state , lane, 4)
+	vpextrd	$2, %xmm0, _args_digest+2*32(state , lane, 4)
+	vpextrd	$3, %xmm0, _args_digest+3*32(state , lane, 4)
+	vmovd	%xmm1, _args_digest+4*32(state , lane, 4)
+
+	vpextrd	$1, %xmm1, _args_digest+5*32(state , lane, 4)
+	vpextrd	$2, %xmm1, _args_digest+6*32(state , lane, 4)
+	vpextrd	$3, %xmm1, _args_digest+7*32(state , lane, 4)
+
+	mov	_buffer(job), p
+	mov	p, _args_data_ptr(state, lane, 8)
+
+	cmp	$0xF, unused_lanes
+	jne	return_null
+
+start_loop:
+	# Find min length
+	vmovdqa	_lens(state), %xmm0
+	vmovdqa	_lens+1*16(state), %xmm1
+
+	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
+	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
+	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
+	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
+	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
+
+	vmovd	%xmm2, DWORD_idx
+	mov	idx, len2
+	and	$0xF, idx
+	shr	$4, len2
+	jz	len_is_0
+
+	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
+	vpshufd	$0, %xmm2, %xmm2
+
+	vpsubd	%xmm2, %xmm0, %xmm0
+	vpsubd	%xmm2, %xmm1, %xmm1
+
+	vmovdqa	%xmm0, _lens + 0*16(state)
+	vmovdqa	%xmm1, _lens + 1*16(state)
+
+	# "state" and "args" are the same address, arg1
+	# len is arg2
+	call	sha256_x8_avx2
+
+	# state and idx are intact
+
+len_is_0:
+	# process completed job "idx"
+	imul	$_LANE_DATA_size, idx, lane_data
+	lea	_ldata(state, lane_data), lane_data
+
+	mov	_job_in_lane(lane_data), job_rax
+	mov	_unused_lanes(state), unused_lanes
+	movq	$0, _job_in_lane(lane_data)
+	movl	$STS_COMPLETED, _status(job_rax)
+	shl	$4, unused_lanes
+	or	idx, unused_lanes
+	mov	unused_lanes, _unused_lanes(state)
+
+	movl	$0xFFFFFFFF, _lens(state,idx,4)
+
+	vmovd	_args_digest(state, idx, 4), %xmm0
+	vpinsrd	$1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
+	vpinsrd	$2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
+	vpinsrd	$3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
+	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
+
+	vpinsrd	$1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
+	vpinsrd	$2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
+	vpinsrd	$3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
+
+	vmovdqu	%xmm0, _result_digest(job_rax)
+	vmovdqu	%xmm1, _result_digest+1*16(job_rax)
+
+return:
+	pop     %r12
+        pop     %rbx
+        FRAME_END
+	ret
+
+return_null:
+	xor	job_rax, job_rax
+	jmp	return
+
+ENDPROC(sha256_mb_mgr_submit_avx2)
+
+.data
+
+.align 16
+clear_low_nibble:
+	.octa	0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
new file mode 100644
index 000000000000..aa21aea4c722
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
@@ -0,0 +1,593 @@
+/*
+ * Multi-buffer SHA256 algorithm hash compute routine
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *	Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include "sha256_mb_mgr_datastruct.S"
+
+## code to compute oct SHA256 using SSE-256
+## outer calling routine takes care of save and restore of XMM registers
+## Logic designed/laid out by JDG
+
+## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15; %ymm0-15
+## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
+## Linux preserves:                       rdi rbp r8
+##
+## clobbers %ymm0-15
+
+arg1 = %rdi
+arg2 = %rsi
+reg3 = %rcx
+reg4 = %rdx
+
+# Common definitions
+STATE = arg1
+INP_SIZE = arg2
+
+IDX = %rax
+ROUND = %rbx
+TBL = reg3
+
+inp0 = %r9
+inp1 = %r10
+inp2 = %r11
+inp3 = %r12
+inp4 = %r13
+inp5 = %r14
+inp6 = %r15
+inp7 = reg4
+
+a = %ymm0
+b = %ymm1
+c = %ymm2
+d = %ymm3
+e = %ymm4
+f = %ymm5
+g = %ymm6
+h = %ymm7
+
+T1 = %ymm8
+
+a0 = %ymm12
+a1 = %ymm13
+a2 = %ymm14
+TMP = %ymm15
+TMP0 = %ymm6
+TMP1 = %ymm7
+
+TT0 = %ymm8
+TT1 = %ymm9
+TT2 = %ymm10
+TT3 = %ymm11
+TT4 = %ymm12
+TT5 = %ymm13
+TT6 = %ymm14
+TT7 = %ymm15
+
+# Define stack usage
+
+# Assume stack aligned to 32 bytes before call
+# Therefore FRAMESZ mod 32 must be 32-8 = 24
+
+#define FRAMESZ	0x388
+
+#define VMOVPS	vmovups
+
+# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
+# "transpose" data in {r0...r7} using temps {t0...t1}
+# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
+# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
+# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
+# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
+# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
+# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
+# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
+# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
+# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
+#
+# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
+# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
+# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
+# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
+# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
+# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
+# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
+# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
+# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
+#
+
+.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
+	# process top half (r0..r3) {a...d}
+	vshufps	$0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
+	vshufps	$0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
+	vshufps	$0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
+	vshufps	$0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
+	vshufps	$0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
+	vshufps	$0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
+	vshufps	$0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
+	vshufps	$0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
+
+	# use r2 in place of t0
+	# process bottom half (r4..r7) {e...h}
+	vshufps	$0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
+	vshufps	$0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
+	vshufps	$0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
+	vshufps	$0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
+	vshufps	$0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
+	vshufps	$0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
+	vshufps	$0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
+	vshufps	$0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
+
+	vperm2f128	$0x13, \r1, \r5, \r6  # h6...a6
+	vperm2f128	$0x02, \r1, \r5, \r2  # h2...a2
+	vperm2f128	$0x13, \r3, \r7, \r5  # h5...a5
+	vperm2f128	$0x02, \r3, \r7, \r1  # h1...a1
+	vperm2f128	$0x13, \r0, \r4, \r7  # h7...a7
+	vperm2f128	$0x02, \r0, \r4, \r3  # h3...a3
+	vperm2f128	$0x13, \t0, \t1, \r4  # h4...a4
+	vperm2f128	$0x02, \t0, \t1, \r0  # h0...a0
+
+.endm
+
+.macro ROTATE_ARGS
+TMP_ = h
+h = g
+g = f
+f = e
+e = d
+d = c
+c = b
+b = a
+a = TMP_
+.endm
+
+.macro _PRORD reg imm tmp
+	vpslld	$(32-\imm),\reg,\tmp
+	vpsrld	$\imm,\reg, \reg
+	vpor	\tmp,\reg, \reg
+.endm
+
+# PRORD_nd reg, imm, tmp, src
+.macro _PRORD_nd reg imm tmp src
+	vpslld	$(32-\imm), \src, \tmp
+	vpsrld	$\imm, \src, \reg
+	vpor	\tmp, \reg, \reg
+.endm
+
+# PRORD dst/src, amt
+.macro PRORD reg imm
+	_PRORD	\reg,\imm,TMP
+.endm
+
+# PRORD_nd dst, src, amt
+.macro PRORD_nd reg tmp imm
+	_PRORD_nd	\reg, \imm, TMP, \tmp
+.endm
+
+# arguments passed implicitly in preprocessor symbols i, a...h
+.macro ROUND_00_15 _T1 i
+	PRORD_nd	a0,e,5	# sig1: a0 = (e >> 5)
+
+	vpxor	g, f, a2	# ch: a2 = f^g
+	vpand	e,a2, a2	# ch: a2 = (f^g)&e
+	vpxor	g, a2, a2	# a2 = ch
+
+	PRORD_nd	a1,e,25	# sig1: a1 = (e >> 25)
+
+	vmovdqu	\_T1,(SZ8*(\i & 0xf))(%rsp)
+	vpaddd	(TBL,ROUND,1), \_T1, \_T1	# T1 = W + K
+	vpxor	e,a0, a0	# sig1: a0 = e ^ (e >> 5)
+	PRORD	a0, 6		# sig1: a0 = (e >> 6) ^ (e >> 11)
+	vpaddd	a2, h, h	# h = h + ch
+	PRORD_nd	a2,a,11	# sig0: a2 = (a >> 11)
+	vpaddd	\_T1,h, h 	# h = h + ch + W + K
+	vpxor	a1, a0, a0	# a0 = sigma1
+	PRORD_nd	a1,a,22	# sig0: a1 = (a >> 22)
+	vpxor	c, a, \_T1	# maj: T1 = a^c
+	add	$SZ8, ROUND	# ROUND++
+	vpand	b, \_T1, \_T1	# maj: T1 = (a^c)&b
+	vpaddd	a0, h, h
+	vpaddd	h, d, d
+	vpxor	a, a2, a2	# sig0: a2 = a ^ (a >> 11)
+	PRORD	a2,2		# sig0: a2 = (a >> 2) ^ (a >> 13)
+	vpxor	a1, a2, a2	# a2 = sig0
+	vpand	c, a, a1	# maj: a1 = a&c
+	vpor	\_T1, a1, a1 	# a1 = maj
+	vpaddd	a1, h, h	# h = h + ch + W + K + maj
+	vpaddd	a2, h, h	# h = h + ch + W + K + maj + sigma0
+	ROTATE_ARGS
+.endm
+
+# arguments passed implicitly in preprocessor symbols i, a...h
+.macro ROUND_16_XX _T1 i
+	vmovdqu	(SZ8*((\i-15)&0xf))(%rsp), \_T1
+	vmovdqu	(SZ8*((\i-2)&0xf))(%rsp), a1
+	vmovdqu	\_T1, a0
+	PRORD	\_T1,11
+	vmovdqu	a1, a2
+	PRORD	a1,2
+	vpxor	a0, \_T1, \_T1
+	PRORD	\_T1, 7
+	vpxor	a2, a1, a1
+	PRORD	a1, 17
+	vpsrld	$3, a0, a0
+	vpxor	a0, \_T1, \_T1
+	vpsrld	$10, a2, a2
+	vpxor	a2, a1, a1
+	vpaddd	(SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
+	vpaddd	(SZ8*((\i-7)&0xf))(%rsp), a1, a1
+	vpaddd	a1, \_T1, \_T1
+
+	ROUND_00_15 \_T1,\i
+.endm
+
+# SHA256_ARGS:
+#   UINT128 digest[8];  // transposed digests
+#   UINT8  *data_ptr[4];
+
+# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
+# arg 1 : STATE : pointer to array of pointers to input data
+# arg 2 : INP_SIZE  : size of input in blocks
+	# general registers preserved in outer calling routine
+	# outer calling routine saves all the XMM registers
+	# save rsp, allocate 32-byte aligned for local variables
+ENTRY(sha256_x8_avx2)
+
+	# save callee-saved clobbered registers to comply with C function ABI
+	push    %r12
+	push    %r13
+	push    %r14
+	push    %r15
+
+	mov	%rsp, IDX
+	sub	$FRAMESZ, %rsp
+	and	$~0x1F, %rsp
+	mov	IDX, _rsp(%rsp)
+
+	# Load the pre-transposed incoming digest.
+	vmovdqu	0*SHA256_DIGEST_ROW_SIZE(STATE),a
+	vmovdqu	1*SHA256_DIGEST_ROW_SIZE(STATE),b
+	vmovdqu	2*SHA256_DIGEST_ROW_SIZE(STATE),c
+	vmovdqu	3*SHA256_DIGEST_ROW_SIZE(STATE),d
+	vmovdqu	4*SHA256_DIGEST_ROW_SIZE(STATE),e
+	vmovdqu	5*SHA256_DIGEST_ROW_SIZE(STATE),f
+	vmovdqu	6*SHA256_DIGEST_ROW_SIZE(STATE),g
+	vmovdqu	7*SHA256_DIGEST_ROW_SIZE(STATE),h
+
+	lea	K256_8(%rip),TBL
+
+	# load the address of each of the 4 message lanes
+	# getting ready to transpose input onto stack
+	mov	_args_data_ptr+0*PTR_SZ(STATE),inp0
+	mov	_args_data_ptr+1*PTR_SZ(STATE),inp1
+	mov	_args_data_ptr+2*PTR_SZ(STATE),inp2
+	mov	_args_data_ptr+3*PTR_SZ(STATE),inp3
+	mov	_args_data_ptr+4*PTR_SZ(STATE),inp4
+	mov	_args_data_ptr+5*PTR_SZ(STATE),inp5
+	mov	_args_data_ptr+6*PTR_SZ(STATE),inp6
+	mov	_args_data_ptr+7*PTR_SZ(STATE),inp7
+
+	xor	IDX, IDX
+lloop:
+	xor	ROUND, ROUND
+
+	# save old digest
+	vmovdqu	a, _digest(%rsp)
+	vmovdqu	b, _digest+1*SZ8(%rsp)
+	vmovdqu	c, _digest+2*SZ8(%rsp)
+	vmovdqu	d, _digest+3*SZ8(%rsp)
+	vmovdqu	e, _digest+4*SZ8(%rsp)
+	vmovdqu	f, _digest+5*SZ8(%rsp)
+	vmovdqu	g, _digest+6*SZ8(%rsp)
+	vmovdqu	h, _digest+7*SZ8(%rsp)
+	i = 0
+.rep 2
+	VMOVPS	i*32(inp0, IDX), TT0
+	VMOVPS	i*32(inp1, IDX), TT1
+	VMOVPS	i*32(inp2, IDX), TT2
+	VMOVPS	i*32(inp3, IDX), TT3
+	VMOVPS	i*32(inp4, IDX), TT4
+	VMOVPS	i*32(inp5, IDX), TT5
+	VMOVPS	i*32(inp6, IDX), TT6
+	VMOVPS	i*32(inp7, IDX), TT7
+	vmovdqu	g, _ytmp(%rsp)
+	vmovdqu	h, _ytmp+1*SZ8(%rsp)
+	TRANSPOSE8	TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7,   TMP0, TMP1
+	vmovdqu	PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
+	vmovdqu	_ytmp(%rsp), g
+	vpshufb	TMP1, TT0, TT0
+	vpshufb	TMP1, TT1, TT1
+	vpshufb	TMP1, TT2, TT2
+	vpshufb	TMP1, TT3, TT3
+	vpshufb	TMP1, TT4, TT4
+	vpshufb	TMP1, TT5, TT5
+	vpshufb	TMP1, TT6, TT6
+	vpshufb	TMP1, TT7, TT7
+	vmovdqu	_ytmp+1*SZ8(%rsp), h
+	vmovdqu	TT4, _ytmp(%rsp)
+	vmovdqu	TT5, _ytmp+1*SZ8(%rsp)
+	vmovdqu	TT6, _ytmp+2*SZ8(%rsp)
+	vmovdqu	TT7, _ytmp+3*SZ8(%rsp)
+	ROUND_00_15	TT0,(i*8+0)
+	vmovdqu	_ytmp(%rsp), TT0
+	ROUND_00_15	TT1,(i*8+1)
+	vmovdqu	_ytmp+1*SZ8(%rsp), TT1
+	ROUND_00_15	TT2,(i*8+2)
+	vmovdqu	_ytmp+2*SZ8(%rsp), TT2
+	ROUND_00_15	TT3,(i*8+3)
+	vmovdqu	_ytmp+3*SZ8(%rsp), TT3
+	ROUND_00_15	TT0,(i*8+4)
+	ROUND_00_15	TT1,(i*8+5)
+	ROUND_00_15	TT2,(i*8+6)
+	ROUND_00_15	TT3,(i*8+7)
+	i = (i+1)
+.endr
+	add	$64, IDX
+	i = (i*8)
+
+	jmp	Lrounds_16_xx
+.align 16
+Lrounds_16_xx:
+.rep 16
+	ROUND_16_XX	T1, i
+	i = (i+1)
+.endr
+
+	cmp	$ROUNDS,ROUND
+	jb	Lrounds_16_xx
+
+	# add old digest
+	vpaddd	_digest+0*SZ8(%rsp), a, a
+	vpaddd	_digest+1*SZ8(%rsp), b, b
+	vpaddd	_digest+2*SZ8(%rsp), c, c
+	vpaddd	_digest+3*SZ8(%rsp), d, d
+	vpaddd	_digest+4*SZ8(%rsp), e, e
+	vpaddd	_digest+5*SZ8(%rsp), f, f
+	vpaddd	_digest+6*SZ8(%rsp), g, g
+	vpaddd	_digest+7*SZ8(%rsp), h, h
+
+	sub	$1, INP_SIZE  # unit is blocks
+	jne	lloop
+
+	# write back to memory (state object) the transposed digest
+	vmovdqu	a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
+	vmovdqu	b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
+	vmovdqu	c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
+	vmovdqu	d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
+	vmovdqu	e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
+	vmovdqu	f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
+	vmovdqu	g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
+	vmovdqu	h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
+
+	# update input pointers
+	add	IDX, inp0
+	mov	inp0, _args_data_ptr+0*8(STATE)
+	add	IDX, inp1
+	mov	inp1, _args_data_ptr+1*8(STATE)
+	add	IDX, inp2
+	mov	inp2, _args_data_ptr+2*8(STATE)
+	add	IDX, inp3
+	mov	inp3, _args_data_ptr+3*8(STATE)
+	add	IDX, inp4
+	mov	inp4, _args_data_ptr+4*8(STATE)
+	add	IDX, inp5
+	mov	inp5, _args_data_ptr+5*8(STATE)
+	add	IDX, inp6
+	mov	inp6, _args_data_ptr+6*8(STATE)
+	add	IDX, inp7
+	mov	inp7, _args_data_ptr+7*8(STATE)
+
+	# Postamble
+	mov	_rsp(%rsp), %rsp
+
+	# restore callee-saved clobbered registers
+	pop     %r15
+	pop     %r14
+	pop     %r13
+	pop     %r12
+
+	ret
+ENDPROC(sha256_x8_avx2)
+.data
+.align 64
+K256_8:
+	.octa	0x428a2f98428a2f98428a2f98428a2f98
+	.octa	0x428a2f98428a2f98428a2f98428a2f98
+	.octa	0x71374491713744917137449171374491
+	.octa	0x71374491713744917137449171374491
+	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
+	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
+	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
+	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
+	.octa	0x3956c25b3956c25b3956c25b3956c25b
+	.octa	0x3956c25b3956c25b3956c25b3956c25b
+	.octa	0x59f111f159f111f159f111f159f111f1
+	.octa	0x59f111f159f111f159f111f159f111f1
+	.octa	0x923f82a4923f82a4923f82a4923f82a4
+	.octa	0x923f82a4923f82a4923f82a4923f82a4
+	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
+	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
+	.octa	0xd807aa98d807aa98d807aa98d807aa98
+	.octa	0xd807aa98d807aa98d807aa98d807aa98
+	.octa	0x12835b0112835b0112835b0112835b01
+	.octa	0x12835b0112835b0112835b0112835b01
+	.octa	0x243185be243185be243185be243185be
+	.octa	0x243185be243185be243185be243185be
+	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
+	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
+	.octa	0x72be5d7472be5d7472be5d7472be5d74
+	.octa	0x72be5d7472be5d7472be5d7472be5d74
+	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
+	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
+	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
+	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
+	.octa	0xc19bf174c19bf174c19bf174c19bf174
+	.octa	0xc19bf174c19bf174c19bf174c19bf174
+	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
+	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
+	.octa	0xefbe4786efbe4786efbe4786efbe4786
+	.octa	0xefbe4786efbe4786efbe4786efbe4786
+	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
+	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
+	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
+	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
+	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
+	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
+	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
+	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
+	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
+	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
+	.octa	0x76f988da76f988da76f988da76f988da
+	.octa	0x76f988da76f988da76f988da76f988da
+	.octa	0x983e5152983e5152983e5152983e5152
+	.octa	0x983e5152983e5152983e5152983e5152
+	.octa	0xa831c66da831c66da831c66da831c66d
+	.octa	0xa831c66da831c66da831c66da831c66d
+	.octa	0xb00327c8b00327c8b00327c8b00327c8
+	.octa	0xb00327c8b00327c8b00327c8b00327c8
+	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
+	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
+	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
+	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
+	.octa	0xd5a79147d5a79147d5a79147d5a79147
+	.octa	0xd5a79147d5a79147d5a79147d5a79147
+	.octa	0x06ca635106ca635106ca635106ca6351
+	.octa	0x06ca635106ca635106ca635106ca6351
+	.octa	0x14292967142929671429296714292967
+	.octa	0x14292967142929671429296714292967
+	.octa	0x27b70a8527b70a8527b70a8527b70a85
+	.octa	0x27b70a8527b70a8527b70a8527b70a85
+	.octa	0x2e1b21382e1b21382e1b21382e1b2138
+	.octa	0x2e1b21382e1b21382e1b21382e1b2138
+	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
+	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
+	.octa	0x53380d1353380d1353380d1353380d13
+	.octa	0x53380d1353380d1353380d1353380d13
+	.octa	0x650a7354650a7354650a7354650a7354
+	.octa	0x650a7354650a7354650a7354650a7354
+	.octa	0x766a0abb766a0abb766a0abb766a0abb
+	.octa	0x766a0abb766a0abb766a0abb766a0abb
+	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
+	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
+	.octa	0x92722c8592722c8592722c8592722c85
+	.octa	0x92722c8592722c8592722c8592722c85
+	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
+	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
+	.octa	0xa81a664ba81a664ba81a664ba81a664b
+	.octa	0xa81a664ba81a664ba81a664ba81a664b
+	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
+	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
+	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
+	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
+	.octa	0xd192e819d192e819d192e819d192e819
+	.octa	0xd192e819d192e819d192e819d192e819
+	.octa	0xd6990624d6990624d6990624d6990624
+	.octa	0xd6990624d6990624d6990624d6990624
+	.octa	0xf40e3585f40e3585f40e3585f40e3585
+	.octa	0xf40e3585f40e3585f40e3585f40e3585
+	.octa	0x106aa070106aa070106aa070106aa070
+	.octa	0x106aa070106aa070106aa070106aa070
+	.octa	0x19a4c11619a4c11619a4c11619a4c116
+	.octa	0x19a4c11619a4c11619a4c11619a4c116
+	.octa	0x1e376c081e376c081e376c081e376c08
+	.octa	0x1e376c081e376c081e376c081e376c08
+	.octa	0x2748774c2748774c2748774c2748774c
+	.octa	0x2748774c2748774c2748774c2748774c
+	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
+	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
+	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
+	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
+	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
+	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
+	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
+	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
+	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
+	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
+	.octa	0x748f82ee748f82ee748f82ee748f82ee
+	.octa	0x748f82ee748f82ee748f82ee748f82ee
+	.octa	0x78a5636f78a5636f78a5636f78a5636f
+	.octa	0x78a5636f78a5636f78a5636f78a5636f
+	.octa	0x84c8781484c8781484c8781484c87814
+	.octa	0x84c8781484c8781484c8781484c87814
+	.octa	0x8cc702088cc702088cc702088cc70208
+	.octa	0x8cc702088cc702088cc702088cc70208
+	.octa	0x90befffa90befffa90befffa90befffa
+	.octa	0x90befffa90befffa90befffa90befffa
+	.octa	0xa4506ceba4506ceba4506ceba4506ceb
+	.octa	0xa4506ceba4506ceba4506ceba4506ceb
+	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
+	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
+	.octa	0xc67178f2c67178f2c67178f2c67178f2
+	.octa	0xc67178f2c67178f2c67178f2c67178f2
+PSHUFFLE_BYTE_FLIP_MASK:
+.octa 0x0c0d0e0f08090a0b0405060700010203
+.octa 0x0c0d0e0f08090a0b0405060700010203
+
+.align 64
+.global K256
+K256:
+	.int	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	.int	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	.int	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	.int	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	.int	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	.int	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	.int	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	.int	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	.int	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	.int	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	.int	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	.int	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	.int	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	.int	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	.int	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	.int	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 3ae0f43ebd37..9e79baf03a4b 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -427,4 +427,14 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
 
 MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha256-ssse3");
+MODULE_ALIAS_CRYPTO("sha256-avx");
+MODULE_ALIAS_CRYPTO("sha256-avx2");
 MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha224-ssse3");
+MODULE_ALIAS_CRYPTO("sha224-avx");
+MODULE_ALIAS_CRYPTO("sha224-avx2");
+#ifdef CONFIG_AS_SHA256_NI
+MODULE_ALIAS_CRYPTO("sha256-ni");
+MODULE_ALIAS_CRYPTO("sha224-ni");
+#endif
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
new file mode 100644
index 000000000000..0a57e2103980
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/Makefile
@@ -0,0 +1,11 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
+                                $(comma)4)$(comma)%ymm2,yes,no)
+ifeq ($(avx2_supported),yes)
+	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
+	sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
+	     sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
+endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
new file mode 100644
index 000000000000..f4cf5b78fd36
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -0,0 +1,1046 @@
+/*
+ * Multi buffer SHA512 algorithm Glue Code
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ *	Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/mcryptd.h>
+#include <crypto/crypto_wq.h>
+#include <asm/byteorder.h>
+#include <linux/hardirq.h>
+#include <asm/fpu/api.h>
+#include "sha512_mb_ctx.h"
+
+#define FLUSH_INTERVAL 1000 /* in usec */
+
+static struct mcryptd_alg_state sha512_mb_alg_state;
+
+struct sha512_mb_ctx {
+	struct mcryptd_ahash *mcryptd_tfm;
+};
+
+static inline struct mcryptd_hash_request_ctx
+		*cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
+{
+	struct ahash_request *areq;
+
+	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
+	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
+}
+
+static inline struct ahash_request
+		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
+{
+	return container_of((void *) ctx, struct ahash_request, __ctx);
+}
+
+static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
+				struct ahash_request *areq)
+{
+	rctx->flag = HASH_UPDATE;
+}
+
+static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
+static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
+						(struct sha512_mb_mgr *state,
+						struct job_sha512 *job);
+static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
+						(struct sha512_mb_mgr *state);
+static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
+						(struct sha512_mb_mgr *state);
+
+inline void sha512_init_digest(uint64_t *digest)
+{
+	static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = {
+					SHA512_H0, SHA512_H1, SHA512_H2,
+					SHA512_H3, SHA512_H4, SHA512_H5,
+					SHA512_H6, SHA512_H7 };
+	memcpy(digest, initial_digest, sizeof(initial_digest));
+}
+
+inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
+			 uint32_t total_len)
+{
+	uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
+
+	memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
+	padblock[i] = 0x80;
+
+	i += ((SHA512_BLOCK_SIZE - 1) &
+	      (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
+	     + 1 + SHA512_PADLENGTHFIELD_SIZE;
+
+#if SHA512_PADLENGTHFIELD_SIZE == 16
+	*((uint64_t *) &padblock[i - 16]) = 0;
+#endif
+
+	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
+
+	/* Number of extra blocks to hash */
+	return i >> SHA512_LOG2_BLOCK_SIZE;
+}
+
+static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
+		(struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
+{
+	while (ctx) {
+		if (ctx->status & HASH_CTX_STS_COMPLETE) {
+			/* Clear PROCESSING bit */
+			ctx->status = HASH_CTX_STS_COMPLETE;
+			return ctx;
+		}
+
+		/*
+		 * If the extra blocks are empty, begin hashing what remains
+		 * in the user's buffer.
+		 */
+		if (ctx->partial_block_buffer_length == 0 &&
+		    ctx->incoming_buffer_length) {
+
+			const void *buffer = ctx->incoming_buffer;
+			uint32_t len = ctx->incoming_buffer_length;
+			uint32_t copy_len;
+
+			/*
+			 * Only entire blocks can be hashed.
+			 * Copy remainder to extra blocks buffer.
+			 */
+			copy_len = len & (SHA512_BLOCK_SIZE-1);
+
+			if (copy_len) {
+				len -= copy_len;
+				memcpy(ctx->partial_block_buffer,
+				       ((const char *) buffer + len),
+				       copy_len);
+				ctx->partial_block_buffer_length = copy_len;
+			}
+
+			ctx->incoming_buffer_length = 0;
+
+			/* len should be a multiple of the block size now */
+			assert((len % SHA512_BLOCK_SIZE) == 0);
+
+			/* Set len to the number of blocks to be hashed */
+			len >>= SHA512_LOG2_BLOCK_SIZE;
+
+			if (len) {
+
+				ctx->job.buffer = (uint8_t *) buffer;
+				ctx->job.len = len;
+				ctx = (struct sha512_hash_ctx *)
+					sha512_job_mgr_submit(&mgr->mgr,
+					&ctx->job);
+				continue;
+			}
+		}
+
+		/*
+		 * If the extra blocks are not empty, then we are
+		 * either on the last block(s) or we need more
+		 * user input before continuing.
+		 */
+		if (ctx->status & HASH_CTX_STS_LAST) {
+
+			uint8_t *buf = ctx->partial_block_buffer;
+			uint32_t n_extra_blocks =
+					sha512_pad(buf, ctx->total_length);
+
+			ctx->status = (HASH_CTX_STS_PROCESSING |
+				       HASH_CTX_STS_COMPLETE);
+			ctx->job.buffer = buf;
+			ctx->job.len = (uint32_t) n_extra_blocks;
+			ctx = (struct sha512_hash_ctx *)
+				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
+			continue;
+		}
+
+		if (ctx)
+			ctx->status = HASH_CTX_STS_IDLE;
+		return ctx;
+	}
+
+	return NULL;
+}
+
+static struct sha512_hash_ctx
+		*sha512_ctx_mgr_get_comp_ctx(struct sha512_ctx_mgr *mgr)
+{
+	/*
+	 * If get_comp_job returns NULL, there are no jobs complete.
+	 * If get_comp_job returns a job, verify that it is safe to return to
+	 * the user.
+	 * If it is not ready, resubmit the job to finish processing.
+	 * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
+	 * returned.
+	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
+	 * still need processing.
+	 */
+	struct sha512_hash_ctx *ctx;
+
+	ctx = (struct sha512_hash_ctx *)
+				sha512_job_mgr_get_comp_job(&mgr->mgr);
+	return sha512_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
+{
+	sha512_job_mgr_init(&mgr->mgr);
+}
+
+static struct sha512_hash_ctx
+			*sha512_ctx_mgr_submit(struct sha512_ctx_mgr *mgr,
+					  struct sha512_hash_ctx *ctx,
+					  const void *buffer,
+					  uint32_t len,
+					  int flags)
+{
+	if (flags & (~HASH_ENTIRE)) {
+		/*
+		 * User should not pass anything other than FIRST, UPDATE, or
+		 * LAST
+		 */
+		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+		return ctx;
+	}
+
+	if (ctx->status & HASH_CTX_STS_PROCESSING) {
+		/* Cannot submit to a currently processing job. */
+		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+		return ctx;
+	}
+
+	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+		/* Cannot update a finished job. */
+		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+		return ctx;
+	}
+
+
+	if (flags & HASH_FIRST) {
+		/* Init digest */
+		sha512_init_digest(ctx->job.result_digest);
+
+		/* Reset byte counter */
+		ctx->total_length = 0;
+
+		/* Clear extra blocks */
+		ctx->partial_block_buffer_length = 0;
+	}
+
+	/*
+	 * If we made it here, there were no errors during this call to
+	 * submit
+	 */
+	ctx->error = HASH_CTX_ERROR_NONE;
+
+	/* Store buffer ptr info from user */
+	ctx->incoming_buffer = buffer;
+	ctx->incoming_buffer_length = len;
+
+	/*
+	 * Store the user's request flags and mark this ctx as currently being
+	 * processed.
+	 */
+	ctx->status = (flags & HASH_LAST) ?
+			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+			HASH_CTX_STS_PROCESSING;
+
+	/* Advance byte counter */
+	ctx->total_length += len;
+
+	/*
+	 * If there is anything currently buffered in the extra blocks,
+	 * append to it until it contains a whole block.
+	 * Or if the user's buffer contains less than a whole block,
+	 * append as much as possible to the extra block.
+	 */
+	if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
+		/* Compute how many bytes to copy from user buffer into extra
+		 * block
+		 */
+		uint32_t copy_len = SHA512_BLOCK_SIZE -
+					ctx->partial_block_buffer_length;
+		if (len < copy_len)
+			copy_len = len;
+
+		if (copy_len) {
+			/* Copy and update relevant pointers and counters */
+			memcpy
+		(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
+				buffer, copy_len);
+
+			ctx->partial_block_buffer_length += copy_len;
+			ctx->incoming_buffer = (const void *)
+					((const char *)buffer + copy_len);
+			ctx->incoming_buffer_length = len - copy_len;
+		}
+
+		/* The extra block should never contain more than 1 block
+		 * here
+		 */
+		assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
+
+		/* If the extra block buffer contains exactly 1 block, it can
+		 * be hashed.
+		 */
+		if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
+			ctx->partial_block_buffer_length = 0;
+
+			ctx->job.buffer = ctx->partial_block_buffer;
+			ctx->job.len = 1;
+			ctx = (struct sha512_hash_ctx *)
+				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
+		}
+	}
+
+	return sha512_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr)
+{
+	struct sha512_hash_ctx *ctx;
+
+	while (1) {
+		ctx = (struct sha512_hash_ctx *)
+					sha512_job_mgr_flush(&mgr->mgr);
+
+		/* If flush returned 0, there are no more jobs in flight. */
+		if (!ctx)
+			return NULL;
+
+		/*
+		 * If flush returned a job, resubmit the job to finish
+		 * processing.
+		 */
+		ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
+
+		/*
+		 * If sha512_ctx_mgr_resubmit returned a job, it is ready to
+		 * be returned. Otherwise, all jobs currently being managed by
+		 * the sha512_ctx_mgr still need processing. Loop.
+		 */
+		if (ctx)
+			return ctx;
+	}
+}
+
+static int sha512_mb_init(struct ahash_request *areq)
+{
+	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
+
+	hash_ctx_init(sctx);
+	sctx->job.result_digest[0] = SHA512_H0;
+	sctx->job.result_digest[1] = SHA512_H1;
+	sctx->job.result_digest[2] = SHA512_H2;
+	sctx->job.result_digest[3] = SHA512_H3;
+	sctx->job.result_digest[4] = SHA512_H4;
+	sctx->job.result_digest[5] = SHA512_H5;
+	sctx->job.result_digest[6] = SHA512_H6;
+	sctx->job.result_digest[7] = SHA512_H7;
+	sctx->total_length = 0;
+	sctx->partial_block_buffer_length = 0;
+	sctx->status = HASH_CTX_STS_IDLE;
+
+	return 0;
+}
+
+static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
+{
+	int	i;
+	struct	sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
+	__be64	*dst = (__be64 *) rctx->out;
+
+	for (i = 0; i < 8; ++i)
+		dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
+
+	return 0;
+}
+
+static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
+			struct mcryptd_alg_cstate *cstate, bool flush)
+{
+	int	flag = HASH_UPDATE;
+	int	nbytes, err = 0;
+	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
+	struct sha512_hash_ctx *sha_ctx;
+
+	/* more work ? */
+	while (!(rctx->flag & HASH_DONE)) {
+		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
+		if (nbytes < 0) {
+			err = nbytes;
+			goto out;
+		}
+		/* check if the walk is done */
+		if (crypto_ahash_walk_last(&rctx->walk)) {
+			rctx->flag |= HASH_DONE;
+			if (rctx->flag & HASH_FINAL)
+				flag |= HASH_LAST;
+
+		}
+		sha_ctx = (struct sha512_hash_ctx *)
+						ahash_request_ctx(&rctx->areq);
+		kernel_fpu_begin();
+		sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx,
+						rctx->walk.data, nbytes, flag);
+		if (!sha_ctx) {
+			if (flush)
+				sha_ctx = sha512_ctx_mgr_flush(cstate->mgr);
+		}
+		kernel_fpu_end();
+		if (sha_ctx)
+			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		else {
+			rctx = NULL;
+			goto out;
+		}
+	}
+
+	/* copy the results */
+	if (rctx->flag & HASH_FINAL)
+		sha512_mb_set_results(rctx);
+
+out:
+	*ret_rctx = rctx;
+	return err;
+}
+
+static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
+			    struct mcryptd_alg_cstate *cstate,
+			    int err)
+{
+	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+	struct sha512_hash_ctx *sha_ctx;
+	struct mcryptd_hash_request_ctx *req_ctx;
+	int ret;
+
+	/* remove from work list */
+	spin_lock(&cstate->work_lock);
+	list_del(&rctx->waiter);
+	spin_unlock(&cstate->work_lock);
+
+	if (irqs_disabled())
+		rctx->complete(&req->base, err);
+	else {
+		local_bh_disable();
+		rctx->complete(&req->base, err);
+		local_bh_enable();
+	}
+
+	/* check to see if there are other jobs that are done */
+	sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
+	while (sha_ctx) {
+		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		ret = sha_finish_walk(&req_ctx, cstate, false);
+		if (req_ctx) {
+			spin_lock(&cstate->work_lock);
+			list_del(&req_ctx->waiter);
+			spin_unlock(&cstate->work_lock);
+
+			req = cast_mcryptd_ctx_to_req(req_ctx);
+			if (irqs_disabled())
+				rctx->complete(&req->base, ret);
+			else {
+				local_bh_disable();
+				rctx->complete(&req->base, ret);
+				local_bh_enable();
+			}
+		}
+		sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
+	}
+
+	return 0;
+}
+
+static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
+			     struct mcryptd_alg_cstate *cstate)
+{
+	unsigned long next_flush;
+	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
+
+	/* initialize tag */
+	rctx->tag.arrival = jiffies;    /* tag the arrival time */
+	rctx->tag.seq_num = cstate->next_seq_num++;
+	next_flush = rctx->tag.arrival + delay;
+	rctx->tag.expire = next_flush;
+
+	spin_lock(&cstate->work_lock);
+	list_add_tail(&rctx->waiter, &cstate->work_list);
+	spin_unlock(&cstate->work_lock);
+
+	mcryptd_arm_flusher(cstate, delay);
+}
+
+static int sha512_mb_update(struct ahash_request *areq)
+{
+	struct mcryptd_hash_request_ctx *rctx =
+			container_of(areq, struct mcryptd_hash_request_ctx,
+									areq);
+	struct mcryptd_alg_cstate *cstate =
+				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
+
+	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+	struct sha512_hash_ctx *sha_ctx;
+	int ret = 0, nbytes;
+
+
+	/* sanity check */
+	if (rctx->tag.cpu != smp_processor_id()) {
+		pr_err("mcryptd error: cpu clash\n");
+		goto done;
+	}
+
+	/* need to init context */
+	req_ctx_init(rctx, areq);
+
+	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+	if (nbytes < 0) {
+		ret = nbytes;
+		goto done;
+	}
+
+	if (crypto_ahash_walk_last(&rctx->walk))
+		rctx->flag |= HASH_DONE;
+
+	/* submit */
+	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
+	sha512_mb_add_list(rctx, cstate);
+	kernel_fpu_begin();
+	sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+							nbytes, HASH_UPDATE);
+	kernel_fpu_end();
+
+	/* check if anything is returned */
+	if (!sha_ctx)
+		return -EINPROGRESS;
+
+	if (sha_ctx->error) {
+		ret = sha_ctx->error;
+		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		goto done;
+	}
+
+	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+	ret = sha_finish_walk(&rctx, cstate, false);
+
+	if (!rctx)
+		return -EINPROGRESS;
+done:
+	sha_complete_job(rctx, cstate, ret);
+	return ret;
+}
+
+static int sha512_mb_finup(struct ahash_request *areq)
+{
+	struct mcryptd_hash_request_ctx *rctx =
+			container_of(areq, struct mcryptd_hash_request_ctx,
+									areq);
+	struct mcryptd_alg_cstate *cstate =
+				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
+
+	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+	struct sha512_hash_ctx *sha_ctx;
+	int ret = 0, flag = HASH_UPDATE, nbytes;
+
+	/* sanity check */
+	if (rctx->tag.cpu != smp_processor_id()) {
+		pr_err("mcryptd error: cpu clash\n");
+		goto done;
+	}
+
+	/* need to init context */
+	req_ctx_init(rctx, areq);
+
+	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+	if (nbytes < 0) {
+		ret = nbytes;
+		goto done;
+	}
+
+	if (crypto_ahash_walk_last(&rctx->walk)) {
+		rctx->flag |= HASH_DONE;
+		flag = HASH_LAST;
+	}
+
+	/* submit */
+	rctx->flag |= HASH_FINAL;
+	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
+	sha512_mb_add_list(rctx, cstate);
+
+	kernel_fpu_begin();
+	sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+								nbytes, flag);
+	kernel_fpu_end();
+
+	/* check if anything is returned */
+	if (!sha_ctx)
+		return -EINPROGRESS;
+
+	if (sha_ctx->error) {
+		ret = sha_ctx->error;
+		goto done;
+	}
+
+	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+	ret = sha_finish_walk(&rctx, cstate, false);
+	if (!rctx)
+		return -EINPROGRESS;
+done:
+	sha_complete_job(rctx, cstate, ret);
+	return ret;
+}
+
+static int sha512_mb_final(struct ahash_request *areq)
+{
+	struct mcryptd_hash_request_ctx *rctx =
+			container_of(areq, struct mcryptd_hash_request_ctx,
+									areq);
+	struct mcryptd_alg_cstate *cstate =
+				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
+
+	struct sha512_hash_ctx *sha_ctx;
+	int ret = 0;
+	u8 data;
+
+	/* sanity check */
+	if (rctx->tag.cpu != smp_processor_id()) {
+		pr_err("mcryptd error: cpu clash\n");
+		goto done;
+	}
+
+	/* need to init context */
+	req_ctx_init(rctx, areq);
+
+	rctx->flag |= HASH_DONE | HASH_FINAL;
+
+	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
+	/* flag HASH_FINAL and 0 data size */
+	sha512_mb_add_list(rctx, cstate);
+	kernel_fpu_begin();
+	sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
+								HASH_LAST);
+	kernel_fpu_end();
+
+	/* check if anything is returned */
+	if (!sha_ctx)
+		return -EINPROGRESS;
+
+	if (sha_ctx->error) {
+		ret = sha_ctx->error;
+		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		goto done;
+	}
+
+	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+	ret = sha_finish_walk(&rctx, cstate, false);
+	if (!rctx)
+		return -EINPROGRESS;
+done:
+	sha_complete_job(rctx, cstate, ret);
+	return ret;
+}
+
+static int sha512_mb_export(struct ahash_request *areq, void *out)
+{
+	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha512_mb_import(struct ahash_request *areq, const void *in)
+{
+	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
+{
+	struct mcryptd_ahash *mcryptd_tfm;
+	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct mcryptd_hash_ctx *mctx;
+
+	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
+						CRYPTO_ALG_INTERNAL,
+						CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(mcryptd_tfm))
+		return PTR_ERR(mcryptd_tfm);
+	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
+	mctx->alg_state = &sha512_mb_alg_state;
+	ctx->mcryptd_tfm = mcryptd_tfm;
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				sizeof(struct ahash_request) +
+				crypto_ahash_reqsize(&mcryptd_tfm->base));
+
+	return 0;
+}
+
+static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
+{
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				sizeof(struct ahash_request) +
+				sizeof(struct sha512_hash_ctx));
+
+	return 0;
+}
+
+static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static struct ahash_alg sha512_mb_areq_alg = {
+	.init		=	sha512_mb_init,
+	.update		=	sha512_mb_update,
+	.final		=	sha512_mb_final,
+	.finup		=	sha512_mb_finup,
+	.export		=	sha512_mb_export,
+	.import		=	sha512_mb_import,
+	.halg		=	{
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.statesize	=	sizeof(struct sha512_hash_ctx),
+	.base		=	{
+			.cra_name	 = "__sha512-mb",
+			.cra_driver_name = "__intel_sha512-mb",
+			.cra_priority	 = 100,
+			/*
+			 * use ASYNC flag as some buffers in multi-buffer
+			 * algo may not have completed before hashing thread
+			 * sleep
+			 */
+			.cra_flags	= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_INTERNAL,
+			.cra_blocksize	= SHA512_BLOCK_SIZE,
+			.cra_module	= THIS_MODULE,
+			.cra_list	= LIST_HEAD_INIT
+					(sha512_mb_areq_alg.halg.base.cra_list),
+			.cra_init	= sha512_mb_areq_init_tfm,
+			.cra_exit	= sha512_mb_areq_exit_tfm,
+			.cra_ctxsize	= sizeof(struct sha512_hash_ctx),
+		}
+	}
+};
+
+static int sha512_mb_async_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_init(mcryptd_req);
+}
+
+static int sha512_mb_async_update(struct ahash_request *req)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_update(mcryptd_req);
+}
+
+static int sha512_mb_async_finup(struct ahash_request *req)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_finup(mcryptd_req);
+}
+
+static int sha512_mb_async_final(struct ahash_request *req)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_final(mcryptd_req);
+}
+
+static int sha512_mb_async_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_digest(mcryptd_req);
+}
+
+static int sha512_mb_async_export(struct ahash_request *req, void *out)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	return crypto_ahash_export(mcryptd_req, out);
+}
+
+static int sha512_mb_async_import(struct ahash_request *req, const void *in)
+{
+	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
+	struct mcryptd_hash_request_ctx *rctx;
+	struct ahash_request *areq;
+
+	memcpy(mcryptd_req, req, sizeof(*req));
+	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+	rctx = ahash_request_ctx(mcryptd_req);
+
+	areq = &rctx->areq;
+
+	ahash_request_set_tfm(areq, child);
+	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
+					rctx->complete, req);
+
+	return crypto_ahash_import(mcryptd_req, in);
+}
+
+static struct ahash_alg sha512_mb_async_alg = {
+	.init           = sha512_mb_async_init,
+	.update         = sha512_mb_async_update,
+	.final          = sha512_mb_async_final,
+	.finup          = sha512_mb_async_finup,
+	.digest         = sha512_mb_async_digest,
+	.export		= sha512_mb_async_export,
+	.import		= sha512_mb_async_import,
+	.halg = {
+		.digestsize     = SHA512_DIGEST_SIZE,
+		.statesize      = sizeof(struct sha512_hash_ctx),
+		.base = {
+			.cra_name               = "sha512",
+			.cra_driver_name        = "sha512_mb",
+			.cra_priority           = 200,
+			.cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+							CRYPTO_ALG_ASYNC,
+			.cra_blocksize          = SHA512_BLOCK_SIZE,
+			.cra_type               = &crypto_ahash_type,
+			.cra_module             = THIS_MODULE,
+			.cra_list               = LIST_HEAD_INIT
+				(sha512_mb_async_alg.halg.base.cra_list),
+			.cra_init               = sha512_mb_async_init_tfm,
+			.cra_exit               = sha512_mb_async_exit_tfm,
+			.cra_ctxsize		= sizeof(struct sha512_mb_ctx),
+			.cra_alignmask		= 0,
+		},
+	},
+};
+
+static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
+{
+	struct mcryptd_hash_request_ctx *rctx;
+	unsigned long cur_time;
+	unsigned long next_flush = 0;
+	struct sha512_hash_ctx *sha_ctx;
+
+
+	cur_time = jiffies;
+
+	while (!list_empty(&cstate->work_list)) {
+		rctx = list_entry(cstate->work_list.next,
+				struct mcryptd_hash_request_ctx, waiter);
+		if time_before(cur_time, rctx->tag.expire)
+			break;
+		kernel_fpu_begin();
+		sha_ctx = (struct sha512_hash_ctx *)
+					sha512_ctx_mgr_flush(cstate->mgr);
+		kernel_fpu_end();
+		if (!sha_ctx) {
+			pr_err("sha512_mb error: nothing got flushed for"
+							" non-empty list\n");
+			break;
+		}
+		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+		sha_finish_walk(&rctx, cstate, true);
+		sha_complete_job(rctx, cstate, 0);
+	}
+
+	if (!list_empty(&cstate->work_list)) {
+		rctx = list_entry(cstate->work_list.next,
+				struct mcryptd_hash_request_ctx, waiter);
+		/* get the hash context and then flush time */
+		next_flush = rctx->tag.expire;
+		mcryptd_arm_flusher(cstate, get_delay(next_flush));
+	}
+	return next_flush;
+}
+
+static int __init sha512_mb_mod_init(void)
+{
+
+	int cpu;
+	int err;
+	struct mcryptd_alg_cstate *cpu_state;
+
+	/* check for dependent cpu features */
+	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+	    !boot_cpu_has(X86_FEATURE_BMI2))
+		return -ENODEV;
+
+	/* initialize multibuffer structures */
+	sha512_mb_alg_state.alg_cstate =
+				alloc_percpu(struct mcryptd_alg_cstate);
+
+	sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
+	sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
+	sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
+	sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
+
+	if (!sha512_mb_alg_state.alg_cstate)
+		return -ENOMEM;
+	for_each_possible_cpu(cpu) {
+		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
+		cpu_state->next_flush = 0;
+		cpu_state->next_seq_num = 0;
+		cpu_state->flusher_engaged = false;
+		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
+		cpu_state->cpu = cpu;
+		cpu_state->alg_state = &sha512_mb_alg_state;
+		cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
+								GFP_KERNEL);
+		if (!cpu_state->mgr)
+			goto err2;
+		sha512_ctx_mgr_init(cpu_state->mgr);
+		INIT_LIST_HEAD(&cpu_state->work_list);
+		spin_lock_init(&cpu_state->work_lock);
+	}
+	sha512_mb_alg_state.flusher = &sha512_mb_flusher;
+
+	err = crypto_register_ahash(&sha512_mb_areq_alg);
+	if (err)
+		goto err2;
+	err = crypto_register_ahash(&sha512_mb_async_alg);
+	if (err)
+		goto err1;
+
+
+	return 0;
+err1:
+	crypto_unregister_ahash(&sha512_mb_areq_alg);
+err2:
+	for_each_possible_cpu(cpu) {
+		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
+		kfree(cpu_state->mgr);
+	}
+	free_percpu(sha512_mb_alg_state.alg_cstate);
+	return -ENODEV;
+}
+
+static void __exit sha512_mb_mod_fini(void)
+{
+	int cpu;
+	struct mcryptd_alg_cstate *cpu_state;
+
+	crypto_unregister_ahash(&sha512_mb_async_alg);
+	crypto_unregister_ahash(&sha512_mb_areq_alg);
+	for_each_possible_cpu(cpu) {
+		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
+		kfree(cpu_state->mgr);
+	}
+	free_percpu(sha512_mb_alg_state.alg_cstate);
+}
+
+module_init(sha512_mb_mod_init);
+module_exit(sha512_mb_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
+
+MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
new file mode 100644
index 000000000000..9d4b2c8208d5
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
@@ -0,0 +1,130 @@
+/*
+ * Header file for multi buffer SHA512 context
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *      Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SHA_MB_CTX_INTERNAL_H
+#define _SHA_MB_CTX_INTERNAL_H
+
+#include "sha512_mb_mgr.h"
+
+#define HASH_UPDATE          0x00
+#define HASH_FIRST           0x01
+#define HASH_LAST            0x02
+#define HASH_ENTIRE          0x03
+#define HASH_DONE            0x04
+#define HASH_FINAL           0x08
+
+#define HASH_CTX_STS_IDLE       0x00
+#define HASH_CTX_STS_PROCESSING 0x01
+#define HASH_CTX_STS_LAST       0x02
+#define HASH_CTX_STS_COMPLETE   0x04
+
+enum hash_ctx_error {
+	HASH_CTX_ERROR_NONE               =  0,
+	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
+	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
+	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
+};
+
+#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
+#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
+#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
+#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
+#define hash_ctx_status(ctx)     ((ctx)->status)
+#define hash_ctx_error(ctx)      ((ctx)->error)
+#define hash_ctx_init(ctx) \
+	do { \
+		(ctx)->error = HASH_CTX_ERROR_NONE; \
+		(ctx)->status = HASH_CTX_STS_COMPLETE; \
+	} while (0)
+
+/* Hash Constants and Typedefs */
+#define SHA512_DIGEST_LENGTH          8
+#define SHA512_LOG2_BLOCK_SIZE        7
+
+#define SHA512_PADLENGTHFIELD_SIZE    16
+
+#ifdef SHA_MB_DEBUG
+#define assert(expr) \
+do { \
+	if (unlikely(!(expr))) { \
+		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
+		#expr, __FILE__, __func__, __LINE__); \
+	} \
+} while (0)
+#else
+#define assert(expr) do {} while (0)
+#endif
+
+struct sha512_ctx_mgr {
+	struct sha512_mb_mgr mgr;
+};
+
+/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
+
+struct sha512_hash_ctx {
+	/* Must be at struct offset 0 */
+	struct job_sha512       job;
+	/* status flag */
+	int status;
+	/* error flag */
+	int error;
+
+	uint32_t        total_length;
+	const void      *incoming_buffer;
+	uint32_t        incoming_buffer_length;
+	uint8_t         partial_block_buffer[SHA512_BLOCK_SIZE * 2];
+	uint32_t        partial_block_buffer_length;
+	void            *user_data;
+};
+
+#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
new file mode 100644
index 000000000000..178f17eef382
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
@@ -0,0 +1,104 @@
+/*
+ * Header file for multi buffer SHA512 algorithm manager
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *      Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SHA_MB_MGR_H
+#define __SHA_MB_MGR_H
+
+#include <linux/types.h>
+
+#define NUM_SHA512_DIGEST_WORDS 8
+
+enum job_sts {STS_UNKNOWN = 0,
+	STS_BEING_PROCESSED = 1,
+	STS_COMPLETED =       2,
+	STS_INTERNAL_ERROR = 3,
+	STS_ERROR = 4
+};
+
+struct job_sha512 {
+	u8  *buffer;
+	u64  len;
+	u64  result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
+	enum job_sts status;
+	void   *user_data;
+};
+
+struct sha512_args_x4 {
+	uint64_t        digest[8][4];
+	uint8_t         *data_ptr[4];
+};
+
+struct sha512_lane_data {
+	struct job_sha512 *job_in_lane;
+};
+
+struct sha512_mb_mgr {
+	struct sha512_args_x4 args;
+
+	uint64_t lens[4];
+
+	/* each byte is index (0...7) of unused lanes */
+	uint64_t unused_lanes;
+	/* byte 4 is set to FF as a flag */
+	struct sha512_lane_data ldata[4];
+};
+
+#define SHA512_MB_MGR_NUM_LANES_AVX2 4
+
+void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
+struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
+						struct job_sha512 *job);
+struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
+struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
+
+#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
new file mode 100644
index 000000000000..cf2636d4c9ba
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
@@ -0,0 +1,281 @@
+/*
+ * Header file for multi buffer SHA256 algorithm data structure
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Contact Information:
+ *      Megha Dey <megha.dey@linux.intel.com>
+ *
+ *  BSD LICENSE
+ *
+ *  Copyright(c) 2016 Intel Corporation.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+# Macros for defining data structures
+
+# Usage example
+
+#START_FIELDS   # JOB_AES
+###     name            size    align
+#FIELD  _plaintext,     8,      8       # pointer to plaintext
+#FIELD  _ciphertext,    8,      8       # pointer to ciphertext
+#FIELD  _IV,            16,     8       # IV
+#FIELD  _keys,          8,      8       # pointer to keys
+#FIELD  _len,           4,      4       # length in bytes
+#FIELD  _status,        4,      4       # status enumeration
+#FIELD  _user_data,     8,      8       # pointer to user data
+#UNION  _union,         size1,  align1, \
+#                       size2,  align2, \
+#                       size3,  align3, \
+#                       ...
+#END_FIELDS
+#%assign _JOB_AES_size  _FIELD_OFFSET
+#%assign _JOB_AES_align _STRUCT_ALIGN
+
+#########################################################################
+
+# Alternate "struc-like" syntax:
+#       STRUCT job_aes2
+#       RES_Q   .plaintext,     1
+#       RES_Q   .ciphertext,    1
+#       RES_DQ  .IV,            1
+#       RES_B   .nested,        _JOB_AES_SIZE, _JOB_AES_ALIGN
+#       RES_U   .union,         size1, align1, \
+#                               size2, align2, \
+#                               ...
+#       ENDSTRUCT
+#       # Following only needed if nesting
+#       %assign job_aes2_size   _FIELD_OFFSET
+#       %assign job_aes2_align  _STRUCT_ALIGN
+#
+# RES_* macros take a name, a count and an optional alignment.
+# The count in in terms of the base size of the macro, and the
+# default alignment is the base size.
+# The macros are:
+# Macro    Base size
+# RES_B     1
+# RES_W     2
+# RES_D     4
+# RES_Q     8
+# RES_DQ   16
+# RES_Y    32
+# RES_Z    64
+#
+# RES_U defines a union. It's arguments are a name and two or more
+# pairs of "size, alignment"
+#
+# The two assigns are only needed if this structure is being nested
+# within another. Even if the assigns are not done, one can still use
+# STRUCT_NAME_size as the size of the structure.
+#
+# Note that for nesting, you still need to assign to STRUCT_NAME_size.
+#
+# The differences between this and using "struc" directly are that each
+# type is implicitly aligned to its natural length (although this can be
+# over-ridden with an explicit third parameter), and that the structure
+# is padded at the end to its overall alignment.
+#
+
+#########################################################################
+
+#ifndef _DATASTRUCT_ASM_
+#define _DATASTRUCT_ASM_
+
+#define PTR_SZ                  8
+#define SHA512_DIGEST_WORD_SIZE 8
+#define SHA512_MB_MGR_NUM_LANES_AVX2 4
+#define NUM_SHA512_DIGEST_WORDS 8
+#define SZ4                     4*SHA512_DIGEST_WORD_SIZE
+#define ROUNDS                  80*SZ4
+#define SHA512_DIGEST_ROW_SIZE  (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
+
+# START_FIELDS
+.macro START_FIELDS
+ _FIELD_OFFSET = 0
+ _STRUCT_ALIGN = 0
+.endm
+
+# FIELD name size align
+.macro FIELD name size align
+ _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
+ \name  = _FIELD_OFFSET
+ _FIELD_OFFSET = _FIELD_OFFSET + (\size)
+.if (\align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = \align
+.endif
+.endm
+
+# END_FIELDS
+.macro END_FIELDS
+ _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
+.endm
+
+.macro STRUCT p1
+START_FIELDS
+.struc \p1
+.endm
+
+.macro ENDSTRUCT
+ tmp = _FIELD_OFFSET
+ END_FIELDS
+ tmp = (_FIELD_OFFSET - ##tmp)
+.if (tmp > 0)
+        .lcomm  tmp
+.endm
+
+## RES_int name size align
+.macro RES_int p1 p2 p3
+ name = \p1
+ size = \p2
+ align = .\p3
+
+ _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
+.align align
+.lcomm name size
+ _FIELD_OFFSET = _FIELD_OFFSET + (size)
+.if (align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = align
+.endif
+.endm
+
+# macro RES_B name, size [, align]
+.macro RES_B _name, _size, _align=1
+RES_int _name _size _align
+.endm
+
+# macro RES_W name, size [, align]
+.macro RES_W _name, _size, _align=2
+RES_int _name 2*(_size) _align
+.endm
+
+# macro RES_D name, size [, align]
+.macro RES_D _name, _size, _align=4
+RES_int _name 4*(_size) _align
+.endm
+
+# macro RES_Q name, size [, align]
+.macro RES_Q _name, _size, _align=8
+RES_int _name 8*(_size) _align
+.endm
+
+# macro RES_DQ name, size [, align]
+.macro RES_DQ _name, _size, _align=16
+RES_int _name 16*(_size) _align
+.endm
+
+# macro RES_Y name, size [, align]
+.macro RES_Y _name, _size, _align=32
+RES_int _name 32*(_size) _align
+.endm
+
+# macro RES_Z name, size [, align]
+.macro RES_Z _name, _size, _align=64
+RES_int _name 64*(_size) _align
+.endm
+
+#endif
+
+###################################################################
+### Define SHA512 Out Of Order Data Structures
+###################################################################
+
+START_FIELDS    # LANE_DATA
+###     name            size    align
+FIELD   _job_in_lane,   8,      8       # pointer to job object
+END_FIELDS
+
+ _LANE_DATA_size = _FIELD_OFFSET
+ _LANE_DATA_align = _STRUCT_ALIGN
+
+####################################################################
+
+START_FIELDS    # SHA512_ARGS_X4
+###     name            size    align
+FIELD   _digest,        8*8*4,  4      # transposed digest
+FIELD   _data_ptr,      8*4,    8       # array of pointers to data
+END_FIELDS
+
+ _SHA512_ARGS_X4_size  =  _FIELD_OFFSET
+ _SHA512_ARGS_X4_align =  _STRUCT_ALIGN
+
+#####################################################################
+
+START_FIELDS    # MB_MGR
+###     name            size    align
+FIELD   _args,          _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
+FIELD   _lens,          8*4,    8
+FIELD   _unused_lanes,  8,      8
+FIELD   _ldata,         _LANE_DATA_size*4, _LANE_DATA_align
+END_FIELDS
+
+ _MB_MGR_size  =  _FIELD_OFFSET
+ _MB_MGR_align =  _STRUCT_ALIGN
+
+_args_digest = _args + _digest
+_args_data_ptr = _args + _data_ptr
+
+#######################################################################
+
+#######################################################################
+#### Define constants
+#######################################################################
+
+#define STS_UNKNOWN             0
+#define STS_BEING_PROCESSED     1
+#define STS_COMPLETED           2
+
+#######################################################################
+#### Define JOB_SHA512 structure
+#######################################################################
+
+START_FIELDS    # JOB_SHA512
+###     name                            size    align
+FIELD   _buffer,                        8,      8       # pointer to buffer
+FIELD   _len,                           8,      8       # length in bytes
+FIELD   _result_digest,                 8*8,    32      # Digest (output)
+FIELD   _status,                        4,      4
+FIELD   _user_data,                     8,      8
+END_FIELDS
+
+ _JOB_SHA512_size = _FIELD_OFFSET
+ _JOB_SHA512_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
new file mode 100644
index 000000000000..3ddba19a0db6
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
@@ -0,0 +1,291 @@
+/*
+ * Flush routine for SHA512 multibuffer
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ *     Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include "sha512_mb_mgr_datastruct.S"
+
+.extern sha512_x4_avx2
+
+# LINUX register definitions
+#define arg1    %rdi
+#define arg2    %rsi
+
+# idx needs to be other than arg1, arg2, rbx, r12
+#define idx     %rdx
+
+# Common definitions
+#define state   arg1
+#define job     arg2
+#define len2    arg2
+
+#define unused_lanes    %rbx
+#define lane_data       %rbx
+#define tmp2            %rbx
+
+#define job_rax         %rax
+#define tmp1            %rax
+#define size_offset     %rax
+#define tmp             %rax
+#define start_offset    %rax
+
+#define tmp3            arg1
+
+#define extra_blocks    arg2
+#define p               arg2
+
+#define tmp4            %r8
+#define lens0           %r8
+
+#define lens1           %r9
+#define lens2           %r10
+#define lens3           %r11
+
+.macro LABEL prefix n
+\prefix\n\():
+.endm
+
+.macro JNE_SKIP i
+jne     skip_\i
+.endm
+
+.altmacro
+.macro SET_OFFSET _offset
+offset = \_offset
+.endm
+.noaltmacro
+
+# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
+# arg 1 : rcx : state
+ENTRY(sha512_mb_mgr_flush_avx2)
+	FRAME_BEGIN
+	push	%rbx
+
+	# If bit (32+3) is set, then all lanes are empty
+	mov     _unused_lanes(state), unused_lanes
+        bt      $32+7, unused_lanes
+        jc      return_null
+
+        # find a lane with a non-null job
+	xor     idx, idx
+        offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
+        cmpq    $0, offset(state)
+        cmovne  one(%rip), idx
+        offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
+        cmpq    $0, offset(state)
+        cmovne  two(%rip), idx
+        offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
+        cmpq    $0, offset(state)
+        cmovne  three(%rip), idx
+
+        # copy idx to empty lanes
+copy_lane_data:
+	offset =  (_args + _data_ptr)
+        mov     offset(state,idx,8), tmp
+
+        I = 0
+.rep 4
+	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
+        cmpq    $0, offset(state)
+.altmacro
+        JNE_SKIP %I
+        offset =  (_args + _data_ptr + 8*I)
+        mov     tmp, offset(state)
+        offset =  (_lens + 8*I +4)
+        movl    $0xFFFFFFFF, offset(state)
+LABEL skip_ %I
+        I = (I+1)
+.noaltmacro
+.endr
+
+        # Find min length
+        mov     _lens + 0*8(state),lens0
+        mov     lens0,idx
+        mov     _lens + 1*8(state),lens1
+        cmp     idx,lens1
+        cmovb   lens1,idx
+        mov     _lens + 2*8(state),lens2
+        cmp     idx,lens2
+        cmovb   lens2,idx
+        mov     _lens + 3*8(state),lens3
+        cmp     idx,lens3
+        cmovb   lens3,idx
+        mov     idx,len2
+        and     $0xF,idx
+        and     $~0xFF,len2
+	jz      len_is_0
+
+        sub     len2, lens0
+        sub     len2, lens1
+        sub     len2, lens2
+        sub     len2, lens3
+        shr     $32,len2
+        mov     lens0, _lens + 0*8(state)
+        mov     lens1, _lens + 1*8(state)
+        mov     lens2, _lens + 2*8(state)
+        mov     lens3, _lens + 3*8(state)
+
+        # "state" and "args" are the same address, arg1
+        # len is arg2
+        call    sha512_x4_avx2
+        # state and idx are intact
+
+len_is_0:
+        # process completed job "idx"
+	imul    $_LANE_DATA_size, idx, lane_data
+        lea     _ldata(state, lane_data), lane_data
+
+        mov     _job_in_lane(lane_data), job_rax
+        movq    $0,  _job_in_lane(lane_data)
+        movl    $STS_COMPLETED, _status(job_rax)
+        mov     _unused_lanes(state), unused_lanes
+        shl     $8, unused_lanes
+        or      idx, unused_lanes
+        mov     unused_lanes, _unused_lanes(state)
+
+	movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
+
+	vmovq _args_digest+0*32(state, idx, 8), %xmm0
+        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
+	vmovq _args_digest+2*32(state, idx, 8), %xmm1
+        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
+	vmovq _args_digest+4*32(state, idx, 8), %xmm2
+        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
+	vmovq _args_digest+6*32(state, idx, 8), %xmm3
+	vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
+
+	vmovdqu %xmm0, _result_digest(job_rax)
+	vmovdqu %xmm1, _result_digest+1*16(job_rax)
+	vmovdqu %xmm2, _result_digest+2*16(job_rax)
+	vmovdqu %xmm3, _result_digest+3*16(job_rax)
+
+return:
+	pop	%rbx
+	FRAME_END
+        ret
+
+return_null:
+        xor     job_rax, job_rax
+        jmp     return
+ENDPROC(sha512_mb_mgr_flush_avx2)
+.align 16
+
+ENTRY(sha512_mb_mgr_get_comp_job_avx2)
+        push    %rbx
+
+	mov     _unused_lanes(state), unused_lanes
+        bt      $(32+7), unused_lanes
+        jc      .return_null
+
+        # Find min length
+        mov     _lens(state),lens0
+        mov     lens0,idx
+        mov     _lens+1*8(state),lens1
+        cmp     idx,lens1
+        cmovb   lens1,idx
+        mov     _lens+2*8(state),lens2
+        cmp     idx,lens2
+        cmovb   lens2,idx
+        mov     _lens+3*8(state),lens3
+        cmp     idx,lens3
+        cmovb   lens3,idx
+        test    $~0xF,idx
+        jnz     .return_null
+        and     $0xF,idx
+
+        #process completed job "idx"
+	imul    $_LANE_DATA_size, idx, lane_data
+        lea     _ldata(state, lane_data), lane_data
+
+        mov     _job_in_lane(lane_data), job_rax
+        movq    $0,  _job_in_lane(lane_data)
+        movl    $STS_COMPLETED, _status(job_rax)
+        mov     _unused_lanes(state), unused_lanes
+        shl     $8, unused_lanes
+        or      idx, unused_lanes
+        mov     unused_lanes, _unused_lanes(state)
+
+        movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
+
+	vmovq   _args_digest(state, idx, 8), %xmm0
+        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
+	vmovq    _args_digest+2*32(state, idx, 8), %xmm1
+        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
+	vmovq    _args_digest+4*32(state, idx, 8), %xmm2
+        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
+        vmovq    _args_digest+6*32(state, idx, 8), %xmm3
+        vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
+
+	vmovdqu %xmm0, _result_digest+0*16(job_rax)
+	vmovdqu %xmm1, _result_digest+1*16(job_rax)
+	vmovdqu %xmm2, _result_digest+2*16(job_rax)
+	vmovdqu %xmm3, _result_digest+3*16(job_rax)
+
+	pop     %rbx
+
+        ret
+
+.return_null:
+        xor     job_rax, job_rax
+	pop     %rbx
+        ret
+ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
+.data
+
+.align 16
+one:
+.quad  1
+two:
+.quad  2
+three:
+.quad  3
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
new file mode 100644
index 000000000000..36870b26067a
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -0,0 +1,67 @@
+/*
+ * Initialization code for multi buffer SHA256 algorithm for AVX2
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ *     Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "sha512_mb_mgr.h"
+
+void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
+{
+	unsigned int j;
+
+	state->lens[0] = 0;
+	state->lens[1] = 1;
+	state->lens[2] = 2;
+	state->lens[3] = 3;
+	state->unused_lanes = 0xFF03020100;
+	for (j = 0; j < 4; j++)
+		state->ldata[j].job_in_lane = NULL;
+}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
new file mode 100644
index 000000000000..815f07bdd1f8
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
@@ -0,0 +1,222 @@
+/*
+ * Buffer submit code for multi buffer SHA512 algorithm
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ *     Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include "sha512_mb_mgr_datastruct.S"
+
+.extern sha512_x4_avx2
+
+#define arg1    %rdi
+#define arg2    %rsi
+
+#define idx             %rdx
+#define last_len        %rdx
+
+#define size_offset     %rcx
+#define tmp2            %rcx
+
+# Common definitions
+#define state   arg1
+#define job     arg2
+#define len2    arg2
+#define p2      arg2
+
+#define p               %r11
+#define start_offset    %r11
+
+#define unused_lanes    %rbx
+
+#define job_rax         %rax
+#define len             %rax
+
+#define lane            %r12
+#define tmp3            %r12
+#define lens3           %r12
+
+#define extra_blocks    %r8
+#define lens0           %r8
+
+#define tmp             %r9
+#define lens1           %r9
+
+#define lane_data       %r10
+#define lens2           %r10
+
+#define DWORD_len %eax
+
+# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
+# arg 1 : rcx : state
+# arg 2 : rdx : job
+ENTRY(sha512_mb_mgr_submit_avx2)
+	FRAME_BEGIN
+	push	%rbx
+	push	%r12
+
+        mov     _unused_lanes(state), unused_lanes
+        movzb     %bl,lane
+        shr     $8, unused_lanes
+        imul    $_LANE_DATA_size, lane,lane_data
+        movl    $STS_BEING_PROCESSED, _status(job)
+	lea     _ldata(state, lane_data), lane_data
+        mov     unused_lanes, _unused_lanes(state)
+        movl    _len(job),  DWORD_len
+
+	mov     job, _job_in_lane(lane_data)
+        movl    DWORD_len,_lens+4(state , lane, 8)
+
+	# Load digest words from result_digest
+	vmovdqu	_result_digest+0*16(job), %xmm0
+	vmovdqu _result_digest+1*16(job), %xmm1
+	vmovdqu	_result_digest+2*16(job), %xmm2
+        vmovdqu	_result_digest+3*16(job), %xmm3
+
+	vmovq    %xmm0, _args_digest(state, lane, 8)
+	vpextrq  $1, %xmm0, _args_digest+1*32(state , lane, 8)
+	vmovq    %xmm1, _args_digest+2*32(state , lane, 8)
+	vpextrq  $1, %xmm1, _args_digest+3*32(state , lane, 8)
+	vmovq    %xmm2, _args_digest+4*32(state , lane, 8)
+	vpextrq  $1, %xmm2, _args_digest+5*32(state , lane, 8)
+	vmovq    %xmm3, _args_digest+6*32(state , lane, 8)
+	vpextrq  $1, %xmm3, _args_digest+7*32(state , lane, 8)
+
+	mov     _buffer(job), p
+	mov     p, _args_data_ptr(state, lane, 8)
+
+	cmp     $0xFF, unused_lanes
+	jne     return_null
+
+start_loop:
+
+	# Find min length
+	mov     _lens+0*8(state),lens0
+	mov     lens0,idx
+	mov     _lens+1*8(state),lens1
+	cmp     idx,lens1
+	cmovb   lens1, idx
+	mov     _lens+2*8(state),lens2
+	cmp     idx,lens2
+	cmovb   lens2,idx
+	mov     _lens+3*8(state),lens3
+	cmp     idx,lens3
+	cmovb   lens3,idx
+	mov     idx,len2
+	and     $0xF,idx
+	and     $~0xFF,len2
+	jz      len_is_0
+
+	sub     len2,lens0
+	sub     len2,lens1
+	sub     len2,lens2
+	sub     len2,lens3
+	shr     $32,len2
+	mov     lens0, _lens + 0*8(state)
+	mov     lens1, _lens + 1*8(state)
+	mov     lens2, _lens + 2*8(state)
+	mov     lens3, _lens + 3*8(state)
+
+	# "state" and "args" are the same address, arg1
+	# len is arg2
+	call    sha512_x4_avx2
+	# state and idx are intact
+
+len_is_0:
+
+	# process completed job "idx"
+	imul    $_LANE_DATA_size, idx, lane_data
+	lea     _ldata(state, lane_data), lane_data
+
+	mov     _job_in_lane(lane_data), job_rax
+	mov     _unused_lanes(state), unused_lanes
+	movq    $0, _job_in_lane(lane_data)
+	movl    $STS_COMPLETED, _status(job_rax)
+	shl     $8, unused_lanes
+	or      idx, unused_lanes
+	mov     unused_lanes, _unused_lanes(state)
+
+	movl	$0xFFFFFFFF,_lens+4(state,idx,8)
+	vmovq    _args_digest+0*32(state , idx, 8), %xmm0
+	vpinsrq  $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
+	vmovq    _args_digest+2*32(state , idx, 8), %xmm1
+	vpinsrq  $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
+	vmovq    _args_digest+4*32(state , idx, 8), %xmm2
+	vpinsrq  $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
+	vmovq    _args_digest+6*32(state , idx, 8), %xmm3
+	vpinsrq  $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
+
+	vmovdqu  %xmm0, _result_digest + 0*16(job_rax)
+	vmovdqu  %xmm1, _result_digest + 1*16(job_rax)
+	vmovdqu  %xmm2, _result_digest + 2*16(job_rax)
+	vmovdqu  %xmm3, _result_digest + 3*16(job_rax)
+
+return:
+	pop	%r12
+	pop	%rbx
+	FRAME_END
+	ret
+
+return_null:
+	xor     job_rax, job_rax
+	jmp     return
+ENDPROC(sha512_mb_mgr_submit_avx2)
+.data
+
+.align 16
+H0:     .int  0x6a09e667
+H1:     .int  0xbb67ae85
+H2:     .int  0x3c6ef372
+H3:     .int  0xa54ff53a
+H4:     .int  0x510e527f
+H5:     .int  0x9b05688c
+H6:     .int  0x1f83d9ab
+H7:     .int  0x5be0cd19
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
new file mode 100644
index 000000000000..31ab1eff6413
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
@@ -0,0 +1,529 @@
+/*
+ * Multi-buffer SHA512 algorithm hash compute routine
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ *     Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+# code to compute quad SHA512 using AVX2
+# use YMMs to tackle the larger digest size
+# outer calling routine takes care of save and restore of XMM registers
+# Logic designed/laid out by JDG
+
+# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
+# Stack must be aligned to 32 bytes before call
+# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
+# Linux preserves: rcx rdx rdi rbp r13 r14 r15
+# clobbers ymm0-15
+
+#include <linux/linkage.h>
+#include "sha512_mb_mgr_datastruct.S"
+
+arg1 = %rdi
+arg2 = %rsi
+
+# Common definitions
+STATE = arg1
+INP_SIZE = arg2
+
+IDX = %rax
+ROUND = %rbx
+TBL = %r8
+
+inp0 = %r9
+inp1 = %r10
+inp2 = %r11
+inp3 = %r12
+
+a = %ymm0
+b = %ymm1
+c = %ymm2
+d = %ymm3
+e = %ymm4
+f = %ymm5
+g = %ymm6
+h = %ymm7
+
+a0 = %ymm8
+a1 = %ymm9
+a2 = %ymm10
+
+TT0 = %ymm14
+TT1 = %ymm13
+TT2 = %ymm12
+TT3 = %ymm11
+TT4 = %ymm10
+TT5 = %ymm9
+
+T1 = %ymm14
+TMP = %ymm15
+
+# Define stack usage
+STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
+
+#define VMOVPD	vmovupd
+_digest = SZ4*16
+
+# transpose r0, r1, r2, r3, t0, t1
+# "transpose" data in {r0..r3} using temps {t0..t3}
+# Input looks like: {r0 r1 r2 r3}
+# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
+# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
+# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
+# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
+#
+# output looks like: {t0 r1 r0 r3}
+# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
+# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
+# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
+# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
+
+.macro TRANSPOSE r0 r1 r2 r3 t0 t1
+	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
+        vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
+        vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
+        vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
+
+	vperm2f128      $0x20, \r2, \r0, \r1  # h6...a6
+        vperm2f128      $0x31, \r2, \r0, \r3  # h2...a2
+        vperm2f128      $0x31, \t1, \t0, \r0  # h5...a5
+        vperm2f128      $0x20, \t1, \t0, \t0  # h1...a1
+.endm
+
+.macro ROTATE_ARGS
+TMP_ = h
+h = g
+g = f
+f = e
+e = d
+d = c
+c = b
+b = a
+a = TMP_
+.endm
+
+# PRORQ reg, imm, tmp
+# packed-rotate-right-double
+# does a rotate by doing two shifts and an or
+.macro _PRORQ reg imm tmp
+	vpsllq	$(64-\imm),\reg,\tmp
+	vpsrlq	$\imm,\reg, \reg
+	vpor	\tmp,\reg, \reg
+.endm
+
+# non-destructive
+# PRORQ_nd reg, imm, tmp, src
+.macro _PRORQ_nd reg imm tmp src
+	vpsllq	$(64-\imm), \src, \tmp
+	vpsrlq	$\imm, \src, \reg
+	vpor	\tmp, \reg, \reg
+.endm
+
+# PRORQ dst/src, amt
+.macro PRORQ reg imm
+	_PRORQ	\reg, \imm, TMP
+.endm
+
+# PRORQ_nd dst, src, amt
+.macro PRORQ_nd reg tmp imm
+	_PRORQ_nd	\reg, \imm, TMP, \tmp
+.endm
+
+#; arguments passed implicitly in preprocessor symbols i, a...h
+.macro ROUND_00_15 _T1 i
+	PRORQ_nd a0, e, (18-14)	# sig1: a0 = (e >> 4)
+
+	vpxor   g, f, a2        # ch: a2 = f^g
+        vpand   e,a2, a2                # ch: a2 = (f^g)&e
+        vpxor   g, a2, a2               # a2 = ch
+
+        PRORQ_nd        a1,e,41         # sig1: a1 = (e >> 25)
+
+        offset = SZ4*(\i & 0xf)
+        vmovdqu \_T1,offset(%rsp)
+        vpaddq  (TBL,ROUND,1), \_T1, \_T1       # T1 = W + K
+        vpxor   e,a0, a0        # sig1: a0 = e ^ (e >> 5)
+        PRORQ   a0, 14           # sig1: a0 = (e >> 6) ^ (e >> 11)
+        vpaddq  a2, h, h        # h = h + ch
+        PRORQ_nd        a2,a,6  # sig0: a2 = (a >> 11)
+        vpaddq  \_T1,h, h       # h = h + ch + W + K
+        vpxor   a1, a0, a0      # a0 = sigma1
+	vmovdqu a,\_T1
+        PRORQ_nd        a1,a,39 # sig0: a1 = (a >> 22)
+        vpxor   c, \_T1, \_T1      # maj: T1 = a^c
+        add     $SZ4, ROUND     # ROUND++
+        vpand   b, \_T1, \_T1   # maj: T1 = (a^c)&b
+        vpaddq  a0, h, h
+        vpaddq  h, d, d
+        vpxor   a, a2, a2       # sig0: a2 = a ^ (a >> 11)
+        PRORQ   a2,28            # sig0: a2 = (a >> 2) ^ (a >> 13)
+        vpxor   a1, a2, a2      # a2 = sig0
+        vpand   c, a, a1        # maj: a1 = a&c
+        vpor    \_T1, a1, a1    # a1 = maj
+        vpaddq  a1, h, h        # h = h + ch + W + K + maj
+        vpaddq  a2, h, h        # h = h + ch + W + K + maj + sigma0
+        ROTATE_ARGS
+.endm
+
+
+#; arguments passed implicitly in preprocessor symbols i, a...h
+.macro ROUND_16_XX _T1 i
+	vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
+        vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
+        vmovdqu \_T1, a0
+        PRORQ   \_T1,7
+        vmovdqu a1, a2
+        PRORQ   a1,42
+        vpxor   a0, \_T1, \_T1
+        PRORQ   \_T1, 1
+        vpxor   a2, a1, a1
+        PRORQ   a1, 19
+        vpsrlq  $7, a0, a0
+        vpxor   a0, \_T1, \_T1
+        vpsrlq  $6, a2, a2
+        vpxor   a2, a1, a1
+        vpaddq  SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
+        vpaddq  SZ4*((\i-7)&0xf)(%rsp), a1, a1
+        vpaddq  a1, \_T1, \_T1
+
+        ROUND_00_15 \_T1,\i
+.endm
+
+
+# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
+# arg 1 : STATE    : pointer to input data
+# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
+ENTRY(sha512_x4_avx2)
+	# general registers preserved in outer calling routine
+	# outer calling routine saves all the XMM registers
+	# save callee-saved clobbered registers to comply with C function ABI
+	push    %r12
+	push    %r13
+	push    %r14
+	push    %r15
+
+	sub     $STACK_SPACE1, %rsp
+
+        # Load the pre-transposed incoming digest.
+        vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
+        vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
+        vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
+        vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
+        vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
+        vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
+        vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
+        vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
+
+        lea     K512_4(%rip),TBL
+
+        # load the address of each of the 4 message lanes
+        # getting ready to transpose input onto stack
+        mov     _data_ptr+0*PTR_SZ(STATE),inp0
+        mov     _data_ptr+1*PTR_SZ(STATE),inp1
+        mov     _data_ptr+2*PTR_SZ(STATE),inp2
+        mov     _data_ptr+3*PTR_SZ(STATE),inp3
+
+        xor     IDX, IDX
+lloop:
+        xor     ROUND, ROUND
+
+	# save old digest
+        vmovdqu a, _digest(%rsp)
+        vmovdqu b, _digest+1*SZ4(%rsp)
+        vmovdqu c, _digest+2*SZ4(%rsp)
+        vmovdqu d, _digest+3*SZ4(%rsp)
+        vmovdqu e, _digest+4*SZ4(%rsp)
+        vmovdqu f, _digest+5*SZ4(%rsp)
+        vmovdqu g, _digest+6*SZ4(%rsp)
+        vmovdqu h, _digest+7*SZ4(%rsp)
+        i = 0
+.rep 4
+	vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
+        VMOVPD  i*32(inp0, IDX), TT2
+        VMOVPD  i*32(inp1, IDX), TT1
+        VMOVPD  i*32(inp2, IDX), TT4
+        VMOVPD  i*32(inp3, IDX), TT3
+	TRANSPOSE	TT2, TT1, TT4, TT3, TT0, TT5
+	vpshufb	TMP, TT0, TT0
+	vpshufb	TMP, TT1, TT1
+	vpshufb	TMP, TT2, TT2
+	vpshufb	TMP, TT3, TT3
+	ROUND_00_15	TT0,(i*4+0)
+	ROUND_00_15	TT1,(i*4+1)
+	ROUND_00_15	TT2,(i*4+2)
+	ROUND_00_15	TT3,(i*4+3)
+	i = (i+1)
+.endr
+        add     $128, IDX
+
+        i = (i*4)
+
+        jmp     Lrounds_16_xx
+.align 16
+Lrounds_16_xx:
+.rep 16
+        ROUND_16_XX     T1, i
+        i = (i+1)
+.endr
+        cmp     $0xa00,ROUND
+        jb      Lrounds_16_xx
+
+	# add old digest
+        vpaddq  _digest(%rsp), a, a
+        vpaddq  _digest+1*SZ4(%rsp), b, b
+        vpaddq  _digest+2*SZ4(%rsp), c, c
+        vpaddq  _digest+3*SZ4(%rsp), d, d
+        vpaddq  _digest+4*SZ4(%rsp), e, e
+        vpaddq  _digest+5*SZ4(%rsp), f, f
+        vpaddq  _digest+6*SZ4(%rsp), g, g
+        vpaddq  _digest+7*SZ4(%rsp), h, h
+
+        sub     $1, INP_SIZE  # unit is blocks
+        jne     lloop
+
+        # write back to memory (state object) the transposed digest
+        vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
+        vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
+        vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
+        vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
+        vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
+        vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
+        vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
+        vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
+
+	# update input data pointers
+	add     IDX, inp0
+        mov     inp0, _data_ptr+0*PTR_SZ(STATE)
+        add     IDX, inp1
+        mov     inp1, _data_ptr+1*PTR_SZ(STATE)
+        add     IDX, inp2
+        mov     inp2, _data_ptr+2*PTR_SZ(STATE)
+        add     IDX, inp3
+        mov     inp3, _data_ptr+3*PTR_SZ(STATE)
+
+	#;;;;;;;;;;;;;;;
+	#; Postamble
+	add $STACK_SPACE1, %rsp
+	# restore callee-saved clobbered registers
+
+	pop     %r15
+	pop     %r14
+	pop     %r13
+	pop     %r12
+
+	# outer calling routine restores XMM and other GP registers
+	ret
+ENDPROC(sha512_x4_avx2)
+
+.data
+.align 64
+K512_4:
+	.octa 0x428a2f98d728ae22428a2f98d728ae22,\
+		0x428a2f98d728ae22428a2f98d728ae22
+	.octa 0x7137449123ef65cd7137449123ef65cd,\
+		0x7137449123ef65cd7137449123ef65cd
+	.octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
+		0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
+	.octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
+		0xe9b5dba58189dbbce9b5dba58189dbbc
+	.octa 0x3956c25bf348b5383956c25bf348b538,\
+		0x3956c25bf348b5383956c25bf348b538
+	.octa 0x59f111f1b605d01959f111f1b605d019,\
+		0x59f111f1b605d01959f111f1b605d019
+	.octa 0x923f82a4af194f9b923f82a4af194f9b,\
+		0x923f82a4af194f9b923f82a4af194f9b
+	.octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
+		0xab1c5ed5da6d8118ab1c5ed5da6d8118
+	.octa 0xd807aa98a3030242d807aa98a3030242,\
+		0xd807aa98a3030242d807aa98a3030242
+	.octa 0x12835b0145706fbe12835b0145706fbe,\
+		0x12835b0145706fbe12835b0145706fbe
+	.octa 0x243185be4ee4b28c243185be4ee4b28c,\
+		0x243185be4ee4b28c243185be4ee4b28c
+	.octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
+		0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
+	.octa 0x72be5d74f27b896f72be5d74f27b896f,\
+		0x72be5d74f27b896f72be5d74f27b896f
+	.octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
+		0x80deb1fe3b1696b180deb1fe3b1696b1
+	.octa 0x9bdc06a725c712359bdc06a725c71235,\
+		0x9bdc06a725c712359bdc06a725c71235
+	.octa 0xc19bf174cf692694c19bf174cf692694,\
+		0xc19bf174cf692694c19bf174cf692694
+	.octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
+		0xe49b69c19ef14ad2e49b69c19ef14ad2
+	.octa 0xefbe4786384f25e3efbe4786384f25e3,\
+		0xefbe4786384f25e3efbe4786384f25e3
+	.octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
+		0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
+	.octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
+		0x240ca1cc77ac9c65240ca1cc77ac9c65
+	.octa 0x2de92c6f592b02752de92c6f592b0275,\
+		0x2de92c6f592b02752de92c6f592b0275
+	.octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
+		0x4a7484aa6ea6e4834a7484aa6ea6e483
+	.octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
+		0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
+	.octa 0x76f988da831153b576f988da831153b5,\
+		0x76f988da831153b576f988da831153b5
+	.octa 0x983e5152ee66dfab983e5152ee66dfab,\
+		0x983e5152ee66dfab983e5152ee66dfab
+	.octa 0xa831c66d2db43210a831c66d2db43210,\
+		0xa831c66d2db43210a831c66d2db43210
+	.octa 0xb00327c898fb213fb00327c898fb213f,\
+		0xb00327c898fb213fb00327c898fb213f
+	.octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
+		0xbf597fc7beef0ee4bf597fc7beef0ee4
+	.octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
+		0xc6e00bf33da88fc2c6e00bf33da88fc2
+	.octa 0xd5a79147930aa725d5a79147930aa725,\
+		0xd5a79147930aa725d5a79147930aa725
+	.octa 0x06ca6351e003826f06ca6351e003826f,\
+		0x06ca6351e003826f06ca6351e003826f
+	.octa 0x142929670a0e6e70142929670a0e6e70,\
+		0x142929670a0e6e70142929670a0e6e70
+	.octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
+		0x27b70a8546d22ffc27b70a8546d22ffc
+	.octa 0x2e1b21385c26c9262e1b21385c26c926,\
+		0x2e1b21385c26c9262e1b21385c26c926
+	.octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
+		0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
+	.octa 0x53380d139d95b3df53380d139d95b3df,\
+		0x53380d139d95b3df53380d139d95b3df
+	.octa 0x650a73548baf63de650a73548baf63de,\
+		0x650a73548baf63de650a73548baf63de
+	.octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
+		0x766a0abb3c77b2a8766a0abb3c77b2a8
+	.octa 0x81c2c92e47edaee681c2c92e47edaee6,\
+		0x81c2c92e47edaee681c2c92e47edaee6
+	.octa 0x92722c851482353b92722c851482353b,\
+		0x92722c851482353b92722c851482353b
+	.octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
+		0xa2bfe8a14cf10364a2bfe8a14cf10364
+	.octa 0xa81a664bbc423001a81a664bbc423001,\
+		0xa81a664bbc423001a81a664bbc423001
+	.octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
+		0xc24b8b70d0f89791c24b8b70d0f89791
+	.octa 0xc76c51a30654be30c76c51a30654be30,\
+		0xc76c51a30654be30c76c51a30654be30
+	.octa 0xd192e819d6ef5218d192e819d6ef5218,\
+		0xd192e819d6ef5218d192e819d6ef5218
+	.octa 0xd69906245565a910d69906245565a910,\
+		0xd69906245565a910d69906245565a910
+	.octa 0xf40e35855771202af40e35855771202a,\
+		0xf40e35855771202af40e35855771202a
+	.octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
+		0x106aa07032bbd1b8106aa07032bbd1b8
+	.octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
+		0x19a4c116b8d2d0c819a4c116b8d2d0c8
+	.octa 0x1e376c085141ab531e376c085141ab53,\
+		0x1e376c085141ab531e376c085141ab53
+	.octa 0x2748774cdf8eeb992748774cdf8eeb99,\
+		0x2748774cdf8eeb992748774cdf8eeb99
+	.octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
+		0x34b0bcb5e19b48a834b0bcb5e19b48a8
+	.octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
+		0x391c0cb3c5c95a63391c0cb3c5c95a63
+	.octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
+		0x4ed8aa4ae3418acb4ed8aa4ae3418acb
+	.octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
+		0x5b9cca4f7763e3735b9cca4f7763e373
+	.octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
+		0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
+	.octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
+		0x748f82ee5defb2fc748f82ee5defb2fc
+	.octa 0x78a5636f43172f6078a5636f43172f60,\
+		0x78a5636f43172f6078a5636f43172f60
+	.octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
+		0x84c87814a1f0ab7284c87814a1f0ab72
+	.octa 0x8cc702081a6439ec8cc702081a6439ec,\
+		0x8cc702081a6439ec8cc702081a6439ec
+	.octa 0x90befffa23631e2890befffa23631e28,\
+		0x90befffa23631e2890befffa23631e28
+	.octa 0xa4506cebde82bde9a4506cebde82bde9,\
+		0xa4506cebde82bde9a4506cebde82bde9
+	.octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
+		0xbef9a3f7b2c67915bef9a3f7b2c67915
+	.octa 0xc67178f2e372532bc67178f2e372532b,\
+		0xc67178f2e372532bc67178f2e372532b
+	.octa 0xca273eceea26619cca273eceea26619c,\
+		0xca273eceea26619cca273eceea26619c
+	.octa 0xd186b8c721c0c207d186b8c721c0c207,\
+		0xd186b8c721c0c207d186b8c721c0c207
+	.octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
+		0xeada7dd6cde0eb1eeada7dd6cde0eb1e
+	.octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
+		0xf57d4f7fee6ed178f57d4f7fee6ed178
+	.octa 0x06f067aa72176fba06f067aa72176fba,\
+		0x06f067aa72176fba06f067aa72176fba
+	.octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
+		0x0a637dc5a2c898a60a637dc5a2c898a6
+	.octa 0x113f9804bef90dae113f9804bef90dae,\
+		0x113f9804bef90dae113f9804bef90dae
+	.octa 0x1b710b35131c471b1b710b35131c471b,\
+		0x1b710b35131c471b1b710b35131c471b
+	.octa 0x28db77f523047d8428db77f523047d84,\
+		0x28db77f523047d8428db77f523047d84
+	.octa 0x32caab7b40c7249332caab7b40c72493,\
+		0x32caab7b40c7249332caab7b40c72493
+	.octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
+		0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
+	.octa 0x431d67c49c100d4c431d67c49c100d4c,\
+		0x431d67c49c100d4c431d67c49c100d4c
+	.octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
+		0x4cc5d4becb3e42b64cc5d4becb3e42b6
+	.octa 0x597f299cfc657e2a597f299cfc657e2a,\
+		0x597f299cfc657e2a597f299cfc657e2a
+	.octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
+		0x5fcb6fab3ad6faec5fcb6fab3ad6faec
+	.octa 0x6c44198c4a4758176c44198c4a475817,\
+		0x6c44198c4a4758176c44198c4a475817
+
+PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
+                         .octa 0x18191a1b1c1d1e1f1011121314151617
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 0b17c83d027d..2b0e2a6825f3 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -346,4 +346,10 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
 
 MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha512-ssse3");
+MODULE_ALIAS_CRYPTO("sha512-avx");
+MODULE_ALIAS_CRYPTO("sha512-avx2");
 MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha384-ssse3");
+MODULE_ALIAS_CRYPTO("sha384-avx");
+MODULE_ALIAS_CRYPTO("sha384-avx2");
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e538c44..9e1e27d31c6d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,10 +40,10 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
 
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+__visible inline void enter_from_user_mode(void)
 {
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
-	user_exit();
+	user_exit_irqoff();
 }
 #else
 static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	ti->status &= ~TS_COMPAT;
 #endif
 
-	user_enter();
+	user_enter_irqoff();
 }
 
 #define SYSCALL_EXIT_WORK_FLAGS				\
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 983e5d3a0d27..0b56666e6039 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1153,3 +1153,14 @@ ENTRY(async_page_fault)
 	jmp	error_code
 END(async_page_fault)
 #endif
+
+ENTRY(rewind_stack_do_exit)
+	/* Prevent any naive code from trying to unwind to our caller. */
+	xorl	%ebp, %ebp
+
+	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
+	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
+
+	call	do_exit
+1:	jmp 1b
+END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9ee0da1807ed..b846875aeea6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1423,3 +1423,14 @@ ENTRY(ignore_sysret)
 	mov	$-ENOSYS, %eax
 	sysret
 END(ignore_sysret)
+
+ENTRY(rewind_stack_do_exit)
+	/* Prevent any naive code from trying to unwind to our caller. */
+	xorl	%ebp, %ebp
+
+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rax
+	leaq	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
+
+	call	do_exit
+1:	jmp 1b
+END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 555263e385c9..e9ce9c7c39b4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,5 +374,5 @@
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		compat_sys_execveat/ptregs
-534	x32	preadv2			compat_sys_preadv2
-535	x32	pwritev2		compat_sys_pwritev2
+546	x32	preadv2			compat_sys_preadv64v2
+547	x32	pwritev2		compat_sys_pwritev64v2
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 027aec4a74df..627ecbcb2e62 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -33,7 +33,7 @@
 	.endif
 
 	call \func
-	jmp  restore
+	jmp  .L_restore
 	_ASM_NOKPROBE(\name)
 	.endm
 
@@ -54,7 +54,7 @@
 #if defined(CONFIG_TRACE_IRQFLAGS) \
  || defined(CONFIG_DEBUG_LOCK_ALLOC) \
  || defined(CONFIG_PREEMPT)
-restore:
+.L_restore:
 	popq %r11
 	popq %r10
 	popq %r9
@@ -66,5 +66,5 @@ restore:
 	popq %rdi
 	popq %rbp
 	ret
-	_ASM_NOKPROBE(restore)
+	_ASM_NOKPROBE(.L_restore)
 #endif
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 253b72eaade6..6ba89a1ab0e5 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -55,7 +55,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
 hostprogs-y			+= vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
@@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
 
 targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
+targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
 targets += vdso32/vclock_gettime.o
 
 KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
@@ -156,7 +156,8 @@ $(obj)/vdso32.so.dbg: FORCE \
 		      $(obj)/vdso32/vdso32.lds \
 		      $(obj)/vdso32/vclock_gettime.o \
 		      $(obj)/vdso32/note.o \
-		      $(obj)/vdso32/system_call.o
+		      $(obj)/vdso32/system_call.o \
+		      $(obj)/vdso32/sigreturn.o
 	$(call if_changed,vdso)
 
 #
diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
index d7ec4e251c0a..20633e026e82 100644
--- a/arch/x86/entry/vdso/vdso32/sigreturn.S
+++ b/arch/x86/entry/vdso/vdso32/sigreturn.S
@@ -1,11 +1,3 @@
-/*
- * Common code for the sigreturn entry points in vDSO images.
- * So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by int80.S et al to define them first thing.
- * The kernel assumes that the addresses of these routines are constant
- * for all vDSO implementations.
- */
-
 #include <linux/linkage.h>
 #include <asm/unistd_32.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 0109ac6cb79c..ed4bc9731cbb 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -2,16 +2,11 @@
  * AT_SYSINFO entry point
 */
 
+#include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
-/*
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
 	.text
 	.globl __kernel_vsyscall
 	.type __kernel_vsyscall,@function
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index ab220ac9b3b9..3329844e3c43 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -97,10 +98,40 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+static void vdso_fix_landing(const struct vdso_image *image,
+		struct vm_area_struct *new_vma)
+{
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+	if (in_ia32_syscall() && image == &vdso_image_32) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long vdso_land = image->sym_int80_landing_pad;
+		unsigned long old_land_addr = vdso_land +
+			(unsigned long)current->mm->context.vdso;
+
+		/* Fixing userspace landing - look at do_fast_syscall_32 */
+		if (regs->ip == old_land_addr)
+			regs->ip = new_vma->vm_start + vdso_land;
+	}
+#endif
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	const struct vdso_image *image = current->mm->context.vdso_image;
+
+	if (image->size != new_size)
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
+		return -EFAULT;
+
+	vdso_fix_landing(image, new_vma);
+	current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -151,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -185,7 +222,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 174c2549939d..75fc719b7f31 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -96,7 +96,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
 {
 	/*
 	 * XXX: if access_ok, get_user, and put_user handled
-	 * sig_on_uaccess_error, this could go away.
+	 * sig_on_uaccess_err, this could go away.
 	 */
 
 	if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
@@ -125,7 +125,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	struct task_struct *tsk;
 	unsigned long caller;
 	int vsyscall_nr, syscall_nr, tmp;
-	int prev_sig_on_uaccess_error;
+	int prev_sig_on_uaccess_err;
 	long ret;
 
 	/*
@@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
 	 * preserve that behavior to make writing exploits harder.
 	 */
-	prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
-	current_thread_info()->sig_on_uaccess_error = 1;
+	prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
+	current->thread.sig_on_uaccess_err = 1;
 
 	ret = -EFAULT;
 	switch (vsyscall_nr) {
@@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 		break;
 	}
 
-	current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+	current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
 
 check_fault:
 	if (ret == -EFAULT) {
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index feb90f6730e8..72dea2f40fc4 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -655,8 +655,12 @@ fail:
 	}
 
 	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
-		raw.size = sizeof(u32) + ibs_data.size;
-		raw.data = ibs_data.data;
+		raw = (struct perf_raw_record){
+			.frag = {
+				.size = sizeof(u32) + ibs_data.size,
+				.data = ibs_data.data,
+			},
+		};
 		data.raw = &raw;
 	}
 
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee817f0..dfebbde2a4cc 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -263,7 +263,7 @@ static bool check_hw_exists(void)
 
 msr_fail:
 	pr_cont("Broken PMU hardware detected, using software events only.\n");
-	pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+	printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
 		boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
 		reg, val_new);
 
@@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
 }
 EXPORT_SYMBOL_GPL(events_sysfs_show);
 
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+			  char *page)
+{
+	struct perf_pmu_events_ht_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+	/*
+	 * Report conditional events depending on Hyper-Threading.
+	 *
+	 * This is overly conservative as usually the HT special
+	 * handling is not needed if the other CPU thread is idle.
+	 *
+	 * Note this does not (and cannot) handle the case when thread
+	 * siblings are invisible, for example with virtualization
+	 * if they are owned by some other guest.  The user tool
+	 * has to re-read when a thread sibling gets onlined later.
+	 */
+	return sprintf(page, "%s",
+			topology_max_smt_threads() > 1 ?
+			pmu_attr->event_str_ht :
+			pmu_attr->event_str_noht);
+}
+
 EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
 EVENT_ATTR(instructions,		INSTRUCTIONS		);
 EVENT_ATTR(cache-references,		CACHE_REFERENCES	);
@@ -2319,7 +2342,7 @@ void
 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
 	struct stack_frame frame;
-	const void __user *fp;
+	const unsigned long __user *fp;
 
 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 		/* TODO: We don't support guest os callchain now */
@@ -2332,7 +2355,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 	if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
 		return;
 
-	fp = (void __user *)regs->bp;
+	fp = (unsigned long __user *)regs->bp;
 
 	perf_callchain_store(entry, regs->ip);
 
@@ -2345,16 +2368,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 	pagefault_disable();
 	while (entry->nr < entry->max_stack) {
 		unsigned long bytes;
+
 		frame.next_frame	     = NULL;
 		frame.return_address = 0;
 
-		if (!access_ok(VERIFY_READ, fp, 16))
+		if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
 			break;
 
-		bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+		bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
 		if (bytes != 0)
 			break;
-		bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
+		bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
 		if (bytes != 0)
 			break;
 
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 3660b2cf245a..06c2baa51814 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -1,8 +1,8 @@
 obj-$(CONFIG_CPU_SUP_INTEL)		+= core.o bts.o cqm.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= ds.o knc.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= lbr.o p4.o p6.o pt.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl.o
-intel-rapl-objs				:= rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl-perf.o
+intel-rapl-perf-objs			:= rapl.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel-uncore.o
 intel-uncore-objs			:= uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE)	+= intel-cstate.o
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c666958a625..0974ba11e954 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -16,6 +16,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/hardirq.h>
+#include <asm/intel-family.h>
 #include <asm/apic.h>
 
 #include "../perf_event.h"
@@ -115,6 +116,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
 	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 
+	/*
+	 * When HT is off these events can only run on the bottom 4 counters
+	 * When HT is on, they are impacted by the HT bug and require EXCL access
+	 */
 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -139,6 +144,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
 	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 
+	/*
+	 * When HT is off these events can only run on the bottom 4 counters
+	 * When HT is on, they are impacted by the HT bug and require EXCL access
+	 */
 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -177,19 +186,27 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
 	INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
+
+	/*
+	 * when HT is off, these can only run on the bottom 4 counters
+	 */
+	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),	/* MEM_INST_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),	/* MEM_LOAD_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),	/* MEM_LOAD_L3_HIT_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xcd, 0xf),	/* MEM_TRANS_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xc6, 0xf),	/* FRONTEND_RETIRED.* */
+
 	EVENT_CONSTRAINT_END
 };
 
 static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
-	INTEL_UEVENT_EXTRA_REG(0x01b7,
-			       MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
-	INTEL_UEVENT_EXTRA_REG(0x02b7,
-			       MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
 	EVENT_EXTRA_END
 };
 
@@ -225,14 +242,51 @@ EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
 EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
 
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
 	EVENT_PTR(mem_ld_nhm),
 	NULL,
 };
 
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+	"event=0x3c,umask=0x0",			/* cpu_clk_unhalted.thread */
+	"event=0x3c,umask=0x0,any=1");		/* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+	"event=0xe,umask=0x1");			/* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+	"event=0xc2,umask=0x2");		/* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+	"event=0x9c,umask=0x1");		/* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+	"event=0xd,umask=0x3,cmask=1",		/* int_misc.recovery_cycles */
+	"event=0xd,umask=0x3,cmask=1,any=1");	/* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+	"4", "2");
+
+static struct attribute *snb_events_attrs[] = {
 	EVENT_PTR(mem_ld_snb),
 	EVENT_PTR(mem_st_snb),
+	EVENT_PTR(td_slots_issued),
+	EVENT_PTR(td_slots_retired),
+	EVENT_PTR(td_fetch_bubbles),
+	EVENT_PTR(td_total_slots),
+	EVENT_PTR(td_total_slots_scale),
+	EVENT_PTR(td_recovery_bubbles),
+	EVENT_PTR(td_recovery_bubbles_scale),
 	NULL,
 };
 
@@ -250,6 +304,10 @@ static struct event_constraint intel_hsw_event_constraints[] = {
 	/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
 	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
 
+	/*
+	 * When HT is off these events can only run on the bottom 4 counters
+	 * When HT is on, they are impacted by the HT bug and require EXCL access
+	 */
 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -258,12 +316,19 @@ static struct event_constraint intel_hsw_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
 	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
 	INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),	/* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+	/*
+	 * when HT is off, these can only run on the bottom 4 counters
+	 */
+	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),	/* MEM_INST_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),	/* MEM_LOAD_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),	/* MEM_LOAD_L3_HIT_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xcd, 0xf),	/* MEM_TRANS_RETIRED.* */
 	EVENT_CONSTRAINT_END
 };
 
@@ -1332,6 +1397,29 @@ static __initconst const u64 atom_hw_cache_event_ids
  },
 };
 
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+	       "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+	       "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+	       "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+	EVENT_PTR(td_total_slots_slm),
+	EVENT_PTR(td_total_slots_scale_slm),
+	EVENT_PTR(td_fetch_bubbles_slm),
+	EVENT_PTR(td_fetch_bubbles_scale_slm),
+	EVENT_PTR(td_slots_issued_slm),
+	EVENT_PTR(td_slots_retired_slm),
+	NULL
+};
+
 static struct extra_reg intel_slm_extra_regs[] __read_mostly =
 {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3261,11 +3349,11 @@ static int intel_snb_pebs_broken(int cpu)
 	u32 rev = UINT_MAX; /* default to broken for unknown models */
 
 	switch (cpu_data(cpu).x86_model) {
-	case 42: /* SNB */
+	case INTEL_FAM6_SANDYBRIDGE:
 		rev = 0x28;
 		break;
 
-	case 45: /* SNB-EP */
+	case INTEL_FAM6_SANDYBRIDGE_X:
 		switch (cpu_data(cpu).x86_mask) {
 		case 6: rev = 0x618; break;
 		case 7: rev = 0x70c; break;
@@ -3302,6 +3390,13 @@ static void intel_snb_check_microcode(void)
 	}
 }
 
+static bool is_lbr_from(unsigned long msr)
+{
+	unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
+
+	return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
+}
+
 /*
  * Under certain circumstances, access certain MSR may cause #GP.
  * The function tests if the input MSR can be safely accessed.
@@ -3322,13 +3417,24 @@ static bool check_msr(unsigned long msr, u64 mask)
 	 * Only change the bits which can be updated by wrmsrl.
 	 */
 	val_tmp = val_old ^ mask;
+
+	if (is_lbr_from(msr))
+		val_tmp = lbr_from_signext_quirk_wr(val_tmp);
+
 	if (wrmsrl_safe(msr, val_tmp) ||
 	    rdmsrl_safe(msr, &val_new))
 		return false;
 
+	/*
+	 * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
+	 * should equal rdmsrl()'s even with the quirk.
+	 */
 	if (val_new != val_tmp)
 		return false;
 
+	if (is_lbr_from(msr))
+		val_old = lbr_from_signext_quirk_wr(val_old);
+
 	/* Here it's sure that the MSR can be safely accessed.
 	 * Restore the old value and return.
 	 */
@@ -3437,6 +3543,13 @@ static struct attribute *hsw_events_attrs[] = {
 	EVENT_PTR(cycles_ct),
 	EVENT_PTR(mem_ld_hsw),
 	EVENT_PTR(mem_st_hsw),
+	EVENT_PTR(td_slots_issued),
+	EVENT_PTR(td_slots_retired),
+	EVENT_PTR(td_fetch_bubbles),
+	EVENT_PTR(td_total_slots),
+	EVENT_PTR(td_total_slots_scale),
+	EVENT_PTR(td_recovery_bubbles),
+	EVENT_PTR(td_recovery_bubbles_scale),
 	NULL
 };
 
@@ -3508,15 +3621,15 @@ __init int intel_pmu_init(void)
 	 * Install the hw-cache-events table:
 	 */
 	switch (boot_cpu_data.x86_model) {
-	case 14: /* 65nm Core "Yonah" */
+	case INTEL_FAM6_CORE_YONAH:
 		pr_cont("Core events, ");
 		break;
 
-	case 15: /* 65nm Core2 "Merom"          */
+	case INTEL_FAM6_CORE2_MEROM:
 		x86_add_quirk(intel_clovertown_quirk);
-	case 22: /* 65nm Core2 "Merom-L"        */
-	case 23: /* 45nm Core2 "Penryn"         */
-	case 29: /* 45nm Core2 "Dunnington (MP) */
+	case INTEL_FAM6_CORE2_MEROM_L:
+	case INTEL_FAM6_CORE2_PENRYN:
+	case INTEL_FAM6_CORE2_DUNNINGTON:
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -3527,9 +3640,9 @@ __init int intel_pmu_init(void)
 		pr_cont("Core2 events, ");
 		break;
 
-	case 30: /* 45nm Nehalem    */
-	case 26: /* 45nm Nehalem-EP */
-	case 46: /* 45nm Nehalem-EX */
+	case INTEL_FAM6_NEHALEM:
+	case INTEL_FAM6_NEHALEM_EP:
+	case INTEL_FAM6_NEHALEM_EX:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3557,11 +3670,11 @@ __init int intel_pmu_init(void)
 		pr_cont("Nehalem events, ");
 		break;
 
-	case 28: /* 45nm Atom "Pineview"   */
-	case 38: /* 45nm Atom "Lincroft"   */
-	case 39: /* 32nm Atom "Penwell"    */
-	case 53: /* 32nm Atom "Cloverview" */
-	case 54: /* 32nm Atom "Cedarview"  */
+	case INTEL_FAM6_ATOM_PINEVIEW:
+	case INTEL_FAM6_ATOM_LINCROFT:
+	case INTEL_FAM6_ATOM_PENWELL:
+	case INTEL_FAM6_ATOM_CLOVERVIEW:
+	case INTEL_FAM6_ATOM_CEDARVIEW:
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -3573,9 +3686,9 @@ __init int intel_pmu_init(void)
 		pr_cont("Atom events, ");
 		break;
 
-	case 55: /* 22nm Atom "Silvermont"                */
-	case 76: /* 14nm Atom "Airmont"                   */
-	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_SILVERMONT2:
+	case INTEL_FAM6_ATOM_AIRMONT:
 		memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
 			sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3587,11 +3700,12 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_slm_extra_regs;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.cpu_events = slm_events_attrs;
 		pr_cont("Silvermont events, ");
 		break;
 
-	case 92: /* 14nm Atom "Goldmont" */
-	case 95: /* 14nm Atom "Goldmont Denverton" */
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_ATOM_DENVERTON:
 		memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3614,9 +3728,9 @@ __init int intel_pmu_init(void)
 		pr_cont("Goldmont events, ");
 		break;
 
-	case 37: /* 32nm Westmere    */
-	case 44: /* 32nm Westmere-EP */
-	case 47: /* 32nm Westmere-EX */
+	case INTEL_FAM6_WESTMERE:
+	case INTEL_FAM6_WESTMERE_EP:
+	case INTEL_FAM6_WESTMERE_EX:
 		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3643,8 +3757,8 @@ __init int intel_pmu_init(void)
 		pr_cont("Westmere events, ");
 		break;
 
-	case 42: /* 32nm SandyBridge         */
-	case 45: /* 32nm SandyBridge-E/EN/EP */
+	case INTEL_FAM6_SANDYBRIDGE:
+	case INTEL_FAM6_SANDYBRIDGE_X:
 		x86_add_quirk(intel_sandybridge_quirk);
 		x86_add_quirk(intel_ht_bug);
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -3657,7 +3771,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
-		if (boot_cpu_data.x86_model == 45)
+		if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
 			x86_pmu.extra_regs = intel_snbep_extra_regs;
 		else
 			x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3679,8 +3793,8 @@ __init int intel_pmu_init(void)
 		pr_cont("SandyBridge events, ");
 		break;
 
-	case 58: /* 22nm IvyBridge       */
-	case 62: /* 22nm IvyBridge-EP/EX */
+	case INTEL_FAM6_IVYBRIDGE:
+	case INTEL_FAM6_IVYBRIDGE_X:
 		x86_add_quirk(intel_ht_bug);
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
@@ -3696,7 +3810,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
 		x86_pmu.pebs_prec_dist = true;
-		if (boot_cpu_data.x86_model == 62)
+		if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
 			x86_pmu.extra_regs = intel_snbep_extra_regs;
 		else
 			x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3714,10 +3828,10 @@ __init int intel_pmu_init(void)
 		break;
 
 
-	case 60: /* 22nm Haswell Core */
-	case 63: /* 22nm Haswell Server */
-	case 69: /* 22nm Haswell ULT */
-	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+	case INTEL_FAM6_HASWELL_CORE:
+	case INTEL_FAM6_HASWELL_X:
+	case INTEL_FAM6_HASWELL_ULT:
+	case INTEL_FAM6_HASWELL_GT3E:
 		x86_add_quirk(intel_ht_bug);
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -3741,10 +3855,10 @@ __init int intel_pmu_init(void)
 		pr_cont("Haswell events, ");
 		break;
 
-	case 61: /* 14nm Broadwell Core-M */
-	case 86: /* 14nm Broadwell Xeon D */
-	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-	case 79: /* 14nm Broadwell Server */
+	case INTEL_FAM6_BROADWELL_CORE:
+	case INTEL_FAM6_BROADWELL_XEON_D:
+	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL_X:
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3777,7 +3891,7 @@ __init int intel_pmu_init(void)
 		pr_cont("Broadwell events, ");
 		break;
 
-	case 87: /* Knights Landing Xeon Phi */
+	case INTEL_FAM6_XEON_PHI_KNL:
 		memcpy(hw_cache_event_ids,
 		       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs,
@@ -3795,16 +3909,22 @@ __init int intel_pmu_init(void)
 		pr_cont("Knights Landing events, ");
 		break;
 
-	case 142: /* 14nm Kabylake Mobile */
-	case 158: /* 14nm Kabylake Desktop */
-	case 78: /* 14nm Skylake Mobile */
-	case 94: /* 14nm Skylake Desktop */
-	case 85: /* 14nm Skylake Server */
+	case INTEL_FAM6_SKYLAKE_MOBILE:
+	case INTEL_FAM6_SKYLAKE_DESKTOP:
+	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_KABYLAKE_MOBILE:
+	case INTEL_FAM6_KABYLAKE_DESKTOP:
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 		intel_pmu_lbr_init_skl();
 
+		/* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+		event_attr_td_recovery_bubbles.event_str_noht =
+			"event=0xd,umask=0x1,cmask=1";
+		event_attr_td_recovery_bubbles.event_str_ht =
+			"event=0xd,umask=0x1,cmask=1,any=1";
+
 		x86_pmu.event_constraints = intel_skl_event_constraints;
 		x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_skl_extra_regs;
@@ -3885,6 +4005,8 @@ __init int intel_pmu_init(void)
 			x86_pmu.lbr_nr = 0;
 	}
 
+	if (x86_pmu.lbr_nr)
+		pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
 	/*
 	 * Access extra MSR may cause #GP under certain circumstances.
 	 * E.g. KVM doesn't support offcore event
@@ -3917,16 +4039,14 @@ __init int intel_pmu_init(void)
  */
 static __init int fixup_ht_bug(void)
 {
-	int cpu = smp_processor_id();
-	int w, c;
+	int c;
 	/*
 	 * problem not present on this CPU model, nothing to do
 	 */
 	if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
 		return 0;
 
-	w = cpumask_weight(topology_sibling_cpumask(cpu));
-	if (w > 1) {
+	if (topology_max_smt_threads() > 1) {
 		pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
 		return 0;
 	}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9ba4e4136a15..4c7638b91fa5 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,6 +89,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include "../perf_event.h"
 
 MODULE_LICENSE("GPL");
@@ -511,37 +512,37 @@ static const struct cstate_model slm_cstates __initconst = {
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
 
 static const struct x86_cpu_id intel_cstates_match[] __initconst = {
-	X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
-	X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
-	X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
+	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM,    nhm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
 
-	X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
-	X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
-	X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
+	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE,    nhm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
 
-	X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
-	X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
+	X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
 
-	X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
-	X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
+	X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
 
-	X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
-	X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
-	X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X,	   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
 
-	X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
 
-	X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
-	X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
-	X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,     slm_cstates),
 
-	X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
-	X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
-	X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
-	X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X,      snb_cstates),
 
-	X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
-	X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 9e2b40cdb05f..707d358e0dff 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -77,9 +77,11 @@ static enum {
 	 LBR_IND_JMP	|\
 	 LBR_FAR)
 
-#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
-#define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
-#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
+#define LBR_FROM_FLAG_MISPRED	BIT_ULL(63)
+#define LBR_FROM_FLAG_IN_TX	BIT_ULL(62)
+#define LBR_FROM_FLAG_ABORT	BIT_ULL(61)
+
+#define LBR_FROM_SIGNEXT_2MSB	(BIT_ULL(60) | BIT_ULL(59))
 
 /*
  * x86control flow change classification
@@ -235,6 +237,97 @@ enum {
 	LBR_VALID,
 };
 
+/*
+ * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
+ * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
+ * TSX is not supported they have no consistent behavior:
+ *
+ *   - For wrmsr(), bits 61:62 are considered part of the sign extension.
+ *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
+ *     part of the sign extension.
+ *
+ * Therefore, if:
+ *
+ *   1) LBR has TSX format
+ *   2) CPU has no TSX support enabled
+ *
+ * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
+ * value from rdmsr() must be converted to have a 61 bits sign extension,
+ * ignoring the TSX flags.
+ */
+static inline bool lbr_from_signext_quirk_needed(void)
+{
+	int lbr_format = x86_pmu.intel_cap.lbr_format;
+	bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
+			   boot_cpu_has(X86_FEATURE_RTM);
+
+	return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+}
+
+DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+
+/* If quirk is enabled, ensure sign extension is 63 bits: */
+inline u64 lbr_from_signext_quirk_wr(u64 val)
+{
+	if (static_branch_unlikely(&lbr_from_quirk_key)) {
+		/*
+		 * Sign extend into bits 61:62 while preserving bit 63.
+		 *
+		 * Quirk is enabled when TSX is disabled. Therefore TSX bits
+		 * in val are always OFF and must be changed to be sign
+		 * extension bits. Since bits 59:60 are guaranteed to be
+		 * part of the sign extension bits, we can just copy them
+		 * to 61:62.
+		 */
+		val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
+	}
+	return val;
+}
+
+/*
+ * If quirk is needed, ensure sign extension is 61 bits:
+ */
+u64 lbr_from_signext_quirk_rd(u64 val)
+{
+	if (static_branch_unlikely(&lbr_from_quirk_key)) {
+		/*
+		 * Quirk is on when TSX is not enabled. Therefore TSX
+		 * flags must be read as OFF.
+		 */
+		val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
+	}
+	return val;
+}
+
+static inline void wrlbr_from(unsigned int idx, u64 val)
+{
+	val = lbr_from_signext_quirk_wr(val);
+	wrmsrl(x86_pmu.lbr_from + idx, val);
+}
+
+static inline void wrlbr_to(unsigned int idx, u64 val)
+{
+	wrmsrl(x86_pmu.lbr_to + idx, val);
+}
+
+static inline u64 rdlbr_from(unsigned int idx)
+{
+	u64 val;
+
+	rdmsrl(x86_pmu.lbr_from + idx, val);
+
+	return lbr_from_signext_quirk_rd(val);
+}
+
+static inline u64 rdlbr_to(unsigned int idx)
+{
+	u64 val;
+
+	rdmsrl(x86_pmu.lbr_to + idx, val);
+
+	return val;
+}
+
 static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 {
 	int i;
@@ -251,8 +344,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 	tos = task_ctx->tos;
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
-		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
-		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+		wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
+		wrlbr_to  (lbr_idx, task_ctx->lbr_to[i]);
+
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
@@ -262,9 +356,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 
 static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 {
-	int i;
 	unsigned lbr_idx, mask;
 	u64 tos;
+	int i;
 
 	if (task_ctx->lbr_callstack_users == 0) {
 		task_ctx->lbr_stack_state = LBR_NONE;
@@ -275,8 +369,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 	tos = intel_pmu_lbr_tos();
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
-		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
-		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+		task_ctx->lbr_from[i] = rdlbr_from(lbr_idx);
+		task_ctx->lbr_to[i]   = rdlbr_to(lbr_idx);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
@@ -452,8 +546,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		u16 cycles = 0;
 		int lbr_flags = lbr_desc[lbr_format];
 
-		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
-		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
+		from = rdlbr_from(lbr_idx);
+		to   = rdlbr_to(lbr_idx);
 
 		if (lbr_format == LBR_FORMAT_INFO && need_info) {
 			u64 info;
@@ -956,7 +1050,6 @@ void __init intel_pmu_lbr_init_core(void)
 	 * SW branch filter usage:
 	 * - compensate for lack of HW filter
 	 */
-	pr_cont("4-deep LBR, ");
 }
 
 /* nehalem/westmere */
@@ -977,7 +1070,6 @@ void __init intel_pmu_lbr_init_nhm(void)
 	 *   That requires LBR_FAR but that means far
 	 *   jmp need to be filtered out
 	 */
-	pr_cont("16-deep LBR, ");
 }
 
 /* sandy bridge */
@@ -997,7 +1089,6 @@ void __init intel_pmu_lbr_init_snb(void)
 	 *   That requires LBR_FAR but that means far
 	 *   jmp need to be filtered out
 	 */
-	pr_cont("16-deep LBR, ");
 }
 
 /* haswell */
@@ -1011,7 +1102,8 @@ void intel_pmu_lbr_init_hsw(void)
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
 
-	pr_cont("16-deep LBR, ");
+	if (lbr_from_signext_quirk_needed())
+		static_branch_enable(&lbr_from_quirk_key);
 }
 
 /* skylake */
@@ -1031,7 +1123,6 @@ __init void intel_pmu_lbr_init_skl(void)
 	 *   That requires LBR_FAR but that means far
 	 *   jmp need to be filtered out
 	 */
-	pr_cont("32-deep LBR, ");
 }
 
 /* atom */
@@ -1057,7 +1148,6 @@ void __init intel_pmu_lbr_init_atom(void)
 	 * SW branch filter usage:
 	 * - compensate for lack of HW filter
 	 */
-	pr_cont("8-deep LBR, ");
 }
 
 /* slm */
@@ -1088,6 +1178,4 @@ void intel_pmu_lbr_init_knl(void)
 
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
-
-	pr_cont("8-deep LBR, ");
 }
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index e30eef4f29a6..d0c58b35155f 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,6 +55,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include "../perf_event.h"
 
 MODULE_LICENSE("GPL");
@@ -786,26 +787,27 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
 };
 
 static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
-	X86_RAPL_MODEL_MATCH(42, snb_rapl_init),	/* Sandy Bridge */
-	X86_RAPL_MODEL_MATCH(45, snbep_rapl_init),	/* Sandy Bridge-EP */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,   snb_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(58, snb_rapl_init),	/* Ivy Bridge */
-	X86_RAPL_MODEL_MATCH(62, snbep_rapl_init),	/* IvyTown */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,   snb_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(60, hsw_rapl_init),	/* Haswell */
-	X86_RAPL_MODEL_MATCH(63, hsx_rapl_init),	/* Haswell-Server */
-	X86_RAPL_MODEL_MATCH(69, hsw_rapl_init),	/* Haswell-Celeron */
-	X86_RAPL_MODEL_MATCH(70, hsw_rapl_init),	/* Haswell GT3e */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,  hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(61, hsw_rapl_init),	/* Broadwell */
-	X86_RAPL_MODEL_MATCH(71, hsw_rapl_init),	/* Broadwell-H */
-	X86_RAPL_MODEL_MATCH(79, hsx_rapl_init),	/* Broadwell-Server */
-	X86_RAPL_MODEL_MATCH(86, hsx_rapl_init),	/* Broadwell Xeon D */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(87, knl_rapl_init),	/* Knights Landing */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(78, skl_rapl_init),	/* Skylake */
-	X86_RAPL_MODEL_MATCH(94, skl_rapl_init),	/* Skylake H/S */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,	 hsx_rapl_init),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fce74062d981..59b4974c697f 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,5 @@
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include "uncore.h"
 
 static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -882,7 +883,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct intel_uncore_type *type;
-	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_pmu *pmu = NULL;
 	struct intel_uncore_box *box;
 	int phys_id, pkg, ret;
 
@@ -903,20 +904,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 	}
 
 	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
 	/*
-	 * for performance monitoring unit with multiple boxes,
-	 * each box has a different function id.
-	 */
-	pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
-	/* Knights Landing uses a common PCI device ID for multiple instances of
-	 * an uncore PMU device type. There is only one entry per device type in
-	 * the knl_uncore_pci_ids table inspite of multiple devices present for
-	 * some device types. Hence PCI device idx would be 0 for all devices.
-	 * So increment pmu pointer to point to an unused array element.
+	 * Some platforms, e.g.  Knights Landing, use a common PCI device ID
+	 * for multiple instances of an uncore PMU device type. We should check
+	 * PCI slot and func to indicate the uncore box.
 	 */
-	if (boot_cpu_data.x86_model == 87) {
-		while (pmu->func_id >= 0)
-			pmu++;
+	if (id->driver_data & ~0xffff) {
+		struct pci_driver *pci_drv = pdev->driver;
+		const struct pci_device_id *ids = pci_drv->id_table;
+		unsigned int devfn;
+
+		while (ids && ids->vendor) {
+			if ((ids->vendor == pdev->vendor) &&
+			    (ids->device == pdev->device)) {
+				devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+						  UNCORE_PCI_DEV_FUNC(ids->driver_data));
+				if (devfn == pdev->devfn) {
+					pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+					break;
+				}
+			}
+			ids++;
+		}
+		if (pmu == NULL)
+			return -ENODEV;
+	} else {
+		/*
+		 * for performance monitoring unit with multiple boxes,
+		 * each box has a different function id.
+		 */
+		pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
 	}
 
 	if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
@@ -956,7 +974,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 
 static void uncore_pci_remove(struct pci_dev *pdev)
 {
-	struct intel_uncore_box *box = pci_get_drvdata(pdev);
+	struct intel_uncore_box *box;
 	struct intel_uncore_pmu *pmu;
 	int i, phys_id, pkg;
 
@@ -1361,30 +1379,32 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
 };
 
 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+	.cpu_init = skl_uncore_cpu_init,
 	.pci_init = skl_uncore_pci_init,
 };
 
 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
-	X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),	/* Nehalem */
-	X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
-	X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),	/* Westmere */
-	X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
-	X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),	/* Sandy Bridge */
-	X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),	/* Ivy Bridge */
-	X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),	/* Haswell */
-	X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),	/* Haswell Celeron */
-	X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),	/* Haswell */
-	X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),	/* Broadwell */
-	X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),	/* Broadwell */
-	X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),	/* Sandy Bridge-EP */
-	X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),	/* Nehalem-EX */
-	X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),	/* Westmere-EX aka. Xeon E7 */
-	X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),	/* Ivy Bridge-EP */
-	X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),	/* Haswell-EP */
-	X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),	/* BDX-EP */
-	X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),	/* BDX-DE */
-	X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),	/* Knights Landing */
-	X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),	/* SkyLake */
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,	  nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,	  nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE,	  nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP,	  nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,	  snb_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,	  ivb_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X,  snbep_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX,	  nhmex_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX,	  nhmex_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X,	  ivbep_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X,	  hswep_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  bdx_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,	  knl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79766b9a3580..d6063e438158 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -15,7 +15,11 @@
 #define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
 #define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
 
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx)	\
+		((dev << 24) | (func << 16) | (type << 8) | idx)
 #define UNCORE_PCI_DEV_DATA(type, idx)	((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data)	((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data)	((data >> 16) & 0xff)
 #define UNCORE_PCI_DEV_TYPE(data)	((data >> 8) & 0xff)
 #define UNCORE_PCI_DEV_IDX(data)	(data & 0xff)
 #define UNCORE_EXTRA_PCI_DEV		0xff
@@ -360,6 +364,7 @@ int bdw_uncore_pci_init(void);
 int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
+void skl_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
 
 /* perf_event_intel_uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 96531d2b843f..97a69dbba649 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,4 +1,4 @@
-/* Nehalem/SandBridge/Haswell uncore support */
+/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
 #include "uncore.h"
 
 /* Uncore IMC PCI IDs */
@@ -9,6 +9,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
 #define PCI_DEVICE_ID_INTEL_SKL_IMC	0x191f
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC	0x190c
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -64,6 +65,10 @@
 #define NHM_UNC_PERFEVTSEL0                     0x3c0
 #define NHM_UNC_UNCORE_PMC0                     0x3b0
 
+/* SKL uncore global control */
+#define SKL_UNC_PERF_GLOBAL_CTL			0xe01
+#define SKL_UNC_GLOBAL_CTL_CORE_ALL		((1 << 5) - 1)
+
 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -179,6 +184,60 @@ void snb_uncore_cpu_init(void)
 		snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
 }
 
+static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->pmu_idx == 0) {
+		wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+			SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+	}
+}
+
+static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->pmu_idx == 0)
+		wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static struct intel_uncore_ops skl_uncore_msr_ops = {
+	.init_box	= skl_uncore_msr_init_box,
+	.exit_box	= skl_uncore_msr_exit_box,
+	.disable_event	= snb_uncore_msr_disable_event,
+	.enable_event	= snb_uncore_msr_enable_event,
+	.read_counter	= uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type skl_uncore_cbox = {
+	.name		= "cbox",
+	.num_counters   = 4,
+	.num_boxes	= 5,
+	.perf_ctr_bits	= 44,
+	.fixed_ctr_bits	= 48,
+	.perf_ctr	= SNB_UNC_CBO_0_PER_CTR0,
+	.event_ctl	= SNB_UNC_CBO_0_PERFEVTSEL0,
+	.fixed_ctr	= SNB_UNC_FIXED_CTR,
+	.fixed_ctl	= SNB_UNC_FIXED_CTR_CTRL,
+	.single_fixed	= 1,
+	.event_mask	= SNB_UNC_RAW_EVENT_MASK,
+	.msr_offset	= SNB_UNC_CBO_MSR_OFFSET,
+	.ops		= &skl_uncore_msr_ops,
+	.format_group	= &snb_uncore_format_group,
+	.event_descs	= snb_uncore_events,
+};
+
+static struct intel_uncore_type *skl_msr_uncores[] = {
+	&skl_uncore_cbox,
+	&snb_uncore_arb,
+	NULL,
+};
+
+void skl_uncore_cpu_init(void)
+{
+	uncore_msr_uncores = skl_msr_uncores;
+	if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+		skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+	snb_uncore_arb.ops = &skl_uncore_msr_ops;
+}
+
 enum {
 	SNB_PCI_UNCORE_IMC,
 };
@@ -544,6 +603,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+
 	{ /* end: all zeroes */ },
 };
 
@@ -587,6 +651,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
 	IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
+	IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core U */
 	{  /* end marker */ }
 };
 
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 874e8bd64d1d..824e54086e07 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = {
 */
 
 static const struct pci_device_id knl_uncore_pci_ids[] = {
-	{ /* MC UClk */
+	{ /* MC0 UClk */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
-		.driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0),
 	},
-	{ /* MC DClk Channel */
+	{ /* MC1 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1),
+	},
+	{ /* MC0 DClk CH 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0),
+	},
+	{ /* MC0 DClk CH 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1),
+	},
+	{ /* MC0 DClk CH 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2),
+	},
+	{ /* MC1 DClk CH 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3),
+	},
+	{ /* MC1 DClk CH 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
-		.driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4),
+	},
+	{ /* MC1 DClk CH 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5),
+	},
+	{ /* EDC0 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0),
+	},
+	{ /* EDC1 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1),
+	},
+	{ /* EDC2 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2),
+	},
+	{ /* EDC3 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3),
 	},
-	{ /* EDC UClk */
+	{ /* EDC4 UClk */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
-		.driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4),
+	},
+	{ /* EDC5 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5),
+	},
+	{ /* EDC6 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6),
+	},
+	{ /* EDC7 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7),
+	},
+	{ /* EDC0 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0),
+	},
+	{ /* EDC1 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1),
+	},
+	{ /* EDC2 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2),
+	},
+	{ /* EDC3 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3),
+	},
+	{ /* EDC4 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4),
+	},
+	{ /* EDC5 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5),
+	},
+	{ /* EDC6 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6),
 	},
-	{ /* EDC EClk */
+	{ /* EDC7 EClk */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
-		.driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7),
 	},
 	{ /* M2PCIe */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 85ef3c2e80e0..50b3a056f96b 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,4 +1,5 @@
 #include <linux/perf_event.h>
+#include <asm/intel-family.h>
 
 enum perf_msr_id {
 	PERF_MSR_TSC			= 0,
@@ -34,39 +35,43 @@ static bool test_intel(int idx)
 		return false;
 
 	switch (boot_cpu_data.x86_model) {
-	case 30: /* 45nm Nehalem    */
-	case 26: /* 45nm Nehalem-EP */
-	case 46: /* 45nm Nehalem-EX */
-
-	case 37: /* 32nm Westmere    */
-	case 44: /* 32nm Westmere-EP */
-	case 47: /* 32nm Westmere-EX */
-
-	case 42: /* 32nm SandyBridge         */
-	case 45: /* 32nm SandyBridge-E/EN/EP */
-
-	case 58: /* 22nm IvyBridge       */
-	case 62: /* 22nm IvyBridge-EP/EX */
-
-	case 60: /* 22nm Haswell Core */
-	case 63: /* 22nm Haswell Server */
-	case 69: /* 22nm Haswell ULT */
-	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-	case 61: /* 14nm Broadwell Core-M */
-	case 86: /* 14nm Broadwell Xeon D */
-	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-	case 79: /* 14nm Broadwell Server */
-
-	case 55: /* 22nm Atom "Silvermont"                */
-	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-	case 76: /* 14nm Atom "Airmont"                   */
+	case INTEL_FAM6_NEHALEM:
+	case INTEL_FAM6_NEHALEM_EP:
+	case INTEL_FAM6_NEHALEM_EX:
+
+	case INTEL_FAM6_WESTMERE:
+	case INTEL_FAM6_WESTMERE2:
+	case INTEL_FAM6_WESTMERE_EP:
+	case INTEL_FAM6_WESTMERE_EX:
+
+	case INTEL_FAM6_SANDYBRIDGE:
+	case INTEL_FAM6_SANDYBRIDGE_X:
+
+	case INTEL_FAM6_IVYBRIDGE:
+	case INTEL_FAM6_IVYBRIDGE_X:
+
+	case INTEL_FAM6_HASWELL_CORE:
+	case INTEL_FAM6_HASWELL_X:
+	case INTEL_FAM6_HASWELL_ULT:
+	case INTEL_FAM6_HASWELL_GT3E:
+
+	case INTEL_FAM6_BROADWELL_CORE:
+	case INTEL_FAM6_BROADWELL_XEON_D:
+	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL_X:
+
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_SILVERMONT2:
+	case INTEL_FAM6_ATOM_AIRMONT:
 		if (idx == PERF_MSR_SMI)
 			return true;
 		break;
 
-	case 78: /* 14nm Skylake Mobile */
-	case 94: /* 14nm Skylake Desktop */
+	case INTEL_FAM6_SKYLAKE_MOBILE:
+	case INTEL_FAM6_SKYLAKE_DESKTOP:
+	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_KABYLAKE_MOBILE:
+	case INTEL_FAM6_KABYLAKE_DESKTOP:
 		if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
 			return true;
 		break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764df815d..8c4a47706296 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = {			\
 	.event_str	= str,						\
 };
 
+#define EVENT_ATTR_STR_HT(_name, v, noht, ht)				\
+static struct perf_pmu_events_ht_attr event_attr_##v = {		\
+	.attr		= __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
+	.id		= 0,						\
+	.event_str_noht	= noht,						\
+	.event_str_ht	= ht,						\
+}
+
 extern struct x86_pmu x86_pmu __read_mostly;
 
 static inline bool x86_pmu_has_lbr_callstack(void)
@@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b);
 
 ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
 			  char *page);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+			  char *page);
 
 #ifdef CONFIG_CPU_SUP_AMD
 
@@ -892,6 +902,8 @@ void intel_ds_init(void);
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
 
+u64 lbr_from_signext_quirk_wr(u64 val);
+
 void intel_pmu_lbr_reset(void);
 
 void intel_pmu_lbr_enable(struct perf_event *event);
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index aeac434c9feb..2cfed174e3c9 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,5 +1,11 @@
 
 
+generated-y += syscalls_32.h
+generated-y += syscalls_64.h
+generated-y += unistd_32_ia32.h
+generated-y += unistd_64_x32.h
+generated-y += xen-hypercalls.h
+
 genhdr-y += unistd_32.h
 genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 94c18ebfd68c..5391b0ae7cc3 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -145,7 +145,6 @@ static inline void disable_acpi(void) { }
 #define ARCH_HAS_POWER_INIT	1
 
 #ifdef CONFIG_ACPI_NUMA
-extern int acpi_numa;
 extern int x86_acpi_numa_init(void);
 #endif /* CONFIG_ACPI_NUMA */
 
@@ -170,4 +169,6 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 }
 #endif
 
+#define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT)
+
 #endif /* _ASM_X86_ACPI_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index bc27611fa58f..f5befd4945f2 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -300,7 +300,6 @@ struct apic {
 
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
-	unsigned long apic_id_mask;
 
 	int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
 				      const struct cpumask *andmask,
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 20370c6db74b..93eebc636c76 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -45,11 +45,11 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
 		: "memory", "cc");
 }
 
-static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
-						u32 ecx_in, u32 *eax)
+static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
+					    u32 ecx_in, u32 *eax)
 {
 	int	cx, dx, si;
-	u8	error;
+	bool	error;
 
 	/*
 	 * N.B. We do NOT need a cld after the BIOS call
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 02e799fa43d1..e7cd63175de4 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -4,8 +4,8 @@
 #include <asm/cpufeatures.h>
 
 #ifdef CONFIG_64BIT
-/* popcnt %edi, %eax -- redundant REX prefix for alignment */
-#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
+/* popcnt %edi, %eax */
+#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
 /* popcnt %rdi, %rax */
 #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
 #define REG_IN "D"
@@ -17,19 +17,15 @@
 #define REG_OUT "a"
 #endif
 
-/*
- * __sw_hweightXX are called from within the alternatives below
- * and callee-clobbered registers need to be taken care of. See
- * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
- * compiler switches.
- */
+#define __HAVE_ARCH_SW_HWEIGHT
+
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
-	unsigned int res = 0;
+	unsigned int res;
 
 	asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
-		     : "="REG_OUT (res)
-		     : REG_IN (w));
+			 : "="REG_OUT (res)
+			 : REG_IN (w));
 
 	return res;
 }
@@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w)
 #else
 static __always_inline unsigned long __arch_hweight64(__u64 w)
 {
-	unsigned long res = 0;
+	unsigned long res;
 
 	asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
-		     : "="REG_OUT (res)
-		     : REG_IN (w));
+			 : "="REG_OUT (res)
+			 : REG_IN (w));
 
 	return res;
 }
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 69f1366f1aa3..5b0579abb398 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -25,8 +25,6 @@
 
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
-#include <asm/alternative.h>
-#include <asm/nops.h>
 
 #define RDRAND_RETRY_LOOPS	10
 
@@ -40,97 +38,91 @@
 # define RDSEED_LONG	RDSEED_INT
 #endif
 
-#ifdef CONFIG_ARCH_RANDOM
+/* Unconditional execution of RDRAND and RDSEED */
 
-/* Instead of arch_get_random_long() when alternatives haven't run. */
-static inline int rdrand_long(unsigned long *v)
+static inline bool rdrand_long(unsigned long *v)
 {
-	int ok;
-	asm volatile("1: " RDRAND_LONG "\n\t"
-		     "jc 2f\n\t"
-		     "decl %0\n\t"
-		     "jnz 1b\n\t"
-		     "2:"
-		     : "=r" (ok), "=a" (*v)
-		     : "0" (RDRAND_RETRY_LOOPS));
-	return ok;
+	bool ok;
+	unsigned int retry = RDRAND_RETRY_LOOPS;
+	do {
+		asm volatile(RDRAND_LONG "\n\t"
+			     CC_SET(c)
+			     : CC_OUT(c) (ok), "=a" (*v));
+		if (ok)
+			return true;
+	} while (--retry);
+	return false;
+}
+
+static inline bool rdrand_int(unsigned int *v)
+{
+	bool ok;
+	unsigned int retry = RDRAND_RETRY_LOOPS;
+	do {
+		asm volatile(RDRAND_INT "\n\t"
+			     CC_SET(c)
+			     : CC_OUT(c) (ok), "=a" (*v));
+		if (ok)
+			return true;
+	} while (--retry);
+	return false;
 }
 
-/* A single attempt at RDSEED */
 static inline bool rdseed_long(unsigned long *v)
 {
-	unsigned char ok;
+	bool ok;
 	asm volatile(RDSEED_LONG "\n\t"
-		     "setc %0"
-		     : "=qm" (ok), "=a" (*v));
+		     CC_SET(c)
+		     : CC_OUT(c) (ok), "=a" (*v));
 	return ok;
 }
 
-#define GET_RANDOM(name, type, rdrand, nop)			\
-static inline int name(type *v)					\
-{								\
-	int ok;							\
-	alternative_io("movl $0, %0\n\t"			\
-		       nop,					\
-		       "\n1: " rdrand "\n\t"			\
-		       "jc 2f\n\t"				\
-		       "decl %0\n\t"                            \
-		       "jnz 1b\n\t"                             \
-		       "2:",                                    \
-		       X86_FEATURE_RDRAND,                      \
-		       ASM_OUTPUT2("=r" (ok), "=a" (*v)),       \
-		       "0" (RDRAND_RETRY_LOOPS));		\
-	return ok;						\
-}
-
-#define GET_SEED(name, type, rdseed, nop)			\
-static inline int name(type *v)					\
-{								\
-	unsigned char ok;					\
-	alternative_io("movb $0, %0\n\t"			\
-		       nop,					\
-		       rdseed "\n\t"				\
-		       "setc %0",				\
-		       X86_FEATURE_RDSEED,                      \
-		       ASM_OUTPUT2("=q" (ok), "=a" (*v)));	\
-	return ok;						\
+static inline bool rdseed_int(unsigned int *v)
+{
+	bool ok;
+	asm volatile(RDSEED_INT "\n\t"
+		     CC_SET(c)
+		     : CC_OUT(c) (ok), "=a" (*v));
+	return ok;
 }
 
-#ifdef CONFIG_X86_64
-
-GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
-GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
-
-GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5);
-GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
-
-#else
-
-GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
-GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
-
-GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4);
-GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
-
-#endif /* CONFIG_X86_64 */
-
+/* Conditional execution based on CPU type */
 #define arch_has_random()	static_cpu_has(X86_FEATURE_RDRAND)
 #define arch_has_random_seed()	static_cpu_has(X86_FEATURE_RDSEED)
 
-#else
+/*
+ * These are the generic interfaces; they must not be declared if the
+ * stubs in <linux/random.h> are to be invoked,
+ * i.e. CONFIG_ARCH_RANDOM is not defined.
+ */
+#ifdef CONFIG_ARCH_RANDOM
 
-static inline int rdrand_long(unsigned long *v)
+static inline bool arch_get_random_long(unsigned long *v)
 {
-	return 0;
+	return arch_has_random() ? rdrand_long(v) : false;
 }
 
-static inline bool rdseed_long(unsigned long *v)
+static inline bool arch_get_random_int(unsigned int *v)
 {
-	return 0;
+	return arch_has_random() ? rdrand_int(v) : false;
 }
 
-#endif  /* CONFIG_ARCH_RANDOM */
+static inline bool arch_get_random_seed_long(unsigned long *v)
+{
+	return arch_has_random_seed() ? rdseed_long(v) : false;
+}
+
+static inline bool arch_get_random_seed_int(unsigned int *v)
+{
+	return arch_has_random_seed() ? rdseed_int(v) : false;
+}
 
 extern void x86_init_rdrand(struct cpuinfo_x86 *c);
 
+#else  /* !CONFIG_ARCH_RANDOM */
+
+static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { }
+
+#endif  /* !CONFIG_ARCH_RANDOM */
+
 #endif /* ASM_X86_ARCHRANDOM_H */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index f5063b6659eb..7acb51c49fec 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -42,6 +42,18 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+/*
+ * Macros to generate condition code outputs from inline assembly,
+ * The output operand must be type "bool".
+ */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
+# define CC_OUT(c) "=@cc" #c
+#else
+# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
+# define CC_OUT(c) [_cc_ ## c] "=qm"
+#endif
+
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 3e8674288198..14635c5ea025 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -75,9 +75,9 @@ static __always_inline void atomic_sub(int i, atomic_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-static __always_inline int atomic_sub_and_test(int i, atomic_t *v)
+static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
 }
 
 /**
@@ -112,9 +112,9 @@ static __always_inline void atomic_dec(atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static __always_inline int atomic_dec_and_test(atomic_t *v)
+static __always_inline bool atomic_dec_and_test(atomic_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
 }
 
 /**
@@ -125,9 +125,9 @@ static __always_inline int atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static __always_inline int atomic_inc_and_test(atomic_t *v)
+static __always_inline bool atomic_inc_and_test(atomic_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
 }
 
 /**
@@ -139,9 +139,9 @@ static __always_inline int atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static __always_inline int atomic_add_negative(int i, atomic_t *v)
+static __always_inline bool atomic_add_negative(int i, atomic_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
 }
 
 /**
@@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v)
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
 
+static __always_inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	return xadd(&v->counter, i);
+}
+
+static __always_inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+	return xadd(&v->counter, -i);
+}
+
 static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	return cmpxchg(&v->counter, old, new);
@@ -190,10 +200,29 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 			: "memory");					\
 }
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	int old, val = atomic_read(v);					\
+	for (;;) {							\
+		old = atomic_cmpxchg(v, val, val c_op i);		\
+		if (old == val)						\
+			break;						\
+		val = old;						\
+	}								\
+	return old;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op)							\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &)
+ATOMIC_OPS(or , |)
+ATOMIC_OPS(xor, ^)
 
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP
 
 /**
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index a984111135b1..71d7705fb303 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v)		\
 		c = old;						\
 }
 
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
+{									\
+	long long old, c = 0;						\
+	while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)		\
+		c = old;						\
+	return old;							\
+}
+
+ATOMIC64_FETCH_OP(add, +)
+
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
+
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 037351022f54..89ed2f6ae2f7 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -70,9 +70,9 @@ static inline void atomic64_sub(long i, atomic64_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic64_sub_and_test(long i, atomic64_t *v)
+static inline bool atomic64_sub_and_test(long i, atomic64_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
 }
 
 /**
@@ -109,9 +109,9 @@ static __always_inline void atomic64_dec(atomic64_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline int atomic64_dec_and_test(atomic64_t *v)
+static inline bool atomic64_dec_and_test(atomic64_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
 }
 
 /**
@@ -122,9 +122,9 @@ static inline int atomic64_dec_and_test(atomic64_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic64_inc_and_test(atomic64_t *v)
+static inline bool atomic64_inc_and_test(atomic64_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
 }
 
 /**
@@ -136,9 +136,9 @@ static inline int atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline int atomic64_add_negative(long i, atomic64_t *v)
+static inline bool atomic64_add_negative(long i, atomic64_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
 }
 
 /**
@@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 	return atomic64_add_return(-i, v);
 }
 
+static inline long atomic64_fetch_add(long i, atomic64_t *v)
+{
+	return xadd(&v->counter, i);
+}
+
+static inline long atomic64_fetch_sub(long i, atomic64_t *v)
+{
+	return xadd(&v->counter, -i);
+}
+
 #define atomic64_inc_return(v)  (atomic64_add_return(1, (v)))
 #define atomic64_dec_return(v)  (atomic64_sub_return(1, (v)))
 
@@ -180,7 +190,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
+static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 {
 	long c, old;
 	c = atomic64_read(v);
@@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v)			\
 			: "memory");					\
 }
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
+{									\
+	long old, val = atomic64_read(v);				\
+	for (;;) {							\
+		old = atomic64_cmpxchg(v, val, val c_op i);		\
+		if (old == val)						\
+			break;						\
+		val = old;						\
+	}								\
+	return old;							\
+}
+
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op)							\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index 2b00c776f223..4b7b8e71607e 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h
@@ -17,7 +17,7 @@ static inline unsigned int get_bios_ebda(void)
 	return address;	/* 0 means none */
 }
 
-void reserve_ebda_region(void);
+void reserve_bios_regions(void);
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 /*
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 7766d1cf096e..68557f52b961 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -201,9 +201,9 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -213,7 +213,7 @@ static __always_inline int test_and_set_bit(long nr, volatile unsigned long *add
  *
  * This is the same as test_and_set_bit on x86.
  */
-static __always_inline int
+static __always_inline bool
 test_and_set_bit_lock(long nr, volatile unsigned long *addr)
 {
 	return test_and_set_bit(nr, addr);
@@ -228,13 +228,13 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	bool oldbit;
 
 	asm("bts %2,%1\n\t"
-	    "sbb %0,%0"
-	    : "=r" (oldbit), ADDR
+	    CC_SET(c)
+	    : CC_OUT(c) (oldbit), ADDR
 	    : "Ir" (nr));
 	return oldbit;
 }
@@ -247,9 +247,9 @@ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *a
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -268,25 +268,25 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a
  * accessed from a hypervisor on the same CPU if running in a VM: don't change
  * this without also updating arch/x86/kernel/kvm.c
  */
-static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	bool oldbit;
 
 	asm volatile("btr %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit), ADDR
+		     CC_SET(c)
+		     : CC_OUT(c) (oldbit), ADDR
 		     : "Ir" (nr));
 	return oldbit;
 }
 
 /* WARNING: non atomic and it can be reordered! */
-static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	bool oldbit;
 
 	asm volatile("btc %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit), ADDR
+		     CC_SET(c)
+		     : CC_OUT(c) (oldbit), ADDR
 		     : "Ir" (nr) : "memory");
 
 	return oldbit;
@@ -300,24 +300,24 @@ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
 }
 
-static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
+static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
 {
 	return ((1UL << (nr & (BITS_PER_LONG-1))) &
 		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
 }
 
-static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
+static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
 {
-	int oldbit;
+	bool oldbit;
 
 	asm volatile("bt %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit)
+		     CC_SET(c)
+		     : CC_OUT(c) (oldbit)
 		     : "m" (*(unsigned long *)addr), "Ir" (nr));
 
 	return oldbit;
@@ -329,7 +329,7 @@ static __always_inline int variable_test_bit(long nr, volatile const unsigned lo
  * @nr: bit number to test
  * @addr: Address to start counting from
  */
-static int test_bit(int nr, const volatile unsigned long *addr);
+static bool test_bit(int nr, const volatile unsigned long *addr);
 #endif
 
 #define test_bit(nr, addr)			\
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 532f85e6651f..7b53743ed267 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -2,8 +2,7 @@
 #define _ASM_X86_CHECKSUM_32_H
 
 #include <linux/in6.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * computes the checksum of a memory block at buff, length len,
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 5a3b2c119ed0..a18806165fe4 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -40,6 +40,7 @@ typedef s32		compat_long_t;
 typedef s64 __attribute__((aligned(4))) compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
+typedef u32		compat_u32;
 typedef u64 __attribute__((aligned(4))) compat_u64;
 typedef u32		compat_uptr_t;
 
@@ -181,6 +182,16 @@ typedef struct compat_siginfo {
 		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
 		struct {
 			unsigned int _addr;	/* faulting insn/memory ref. */
+			short int _addr_lsb;	/* Valid LSB of the reported address. */
+			union {
+				/* used when si_code=SEGV_BNDERR */
+				struct {
+					compat_uptr_t _lower;
+					compat_uptr_t _upper;
+				} _addr_bnd;
+				/* used when si_code=SEGV_PKUERR */
+				compat_u32 _pkey;
+			};
 		} _sigfault;
 
 		/* SIGPOLL */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 678637ad7476..9b7fa6313f1a 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -16,8 +16,8 @@ extern void prefill_possible_map(void);
 static inline void prefill_possible_map(void) {}
 
 #define cpu_physical_id(cpu)			boot_cpu_physical_apicid
+#define cpu_acpi_id(cpu)			0
 #define safe_smp_processor_id()			0
-#define stack_smp_processor_id()		0
 
 #endif /* CONFIG_SMP */
 
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4a413485f9eb..c64b1e9c5d1a 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -301,10 +301,6 @@
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
@@ -312,5 +308,7 @@
  */
 #define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
+#define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 78d1e7467eae..d0bb76d81402 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -41,10 +41,9 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 /*
  * Wrap all the virtual calls in a way that forces the parameters on the stack.
  */
-#define arch_efi_call_virt(f, args...)					\
+#define arch_efi_call_virt(p, f, args...)				\
 ({									\
-	((efi_##f##_t __attribute__((regparm(0)))*)			\
-		efi.systab->runtime->f)(args);				\
+	((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args);	\
 })
 
 #define efi_ioremap(addr, size, type, attr)	ioremap_cache(addr, size)
@@ -81,8 +80,8 @@ struct efi_scratch {
 	}								\
 })
 
-#define arch_efi_call_virt(f, args...)					\
-	efi_call((void *)efi.systab->runtime->f, args)			\
+#define arch_efi_call_virt(p, f, args...)				\
+	efi_call((void *)p->f, args)					\
 
 #define arch_efi_call_virt_teardown()					\
 ({									\
@@ -125,7 +124,6 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
 extern void efi_sync_low_kernel_mappings(void);
 extern int __init efi_alloc_page_tables(void);
 extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
-extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
 extern void __init old_map_region(efi_memory_desc_t *md);
 extern void __init runtime_code_page_mkexec(void);
 extern void __init efi_runtime_update_mappings(void);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 31ac8e6d9f36..116b58347501 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -18,6 +18,7 @@
 #include <asm/fpu/api.h>
 #include <asm/fpu/xstate.h>
 #include <asm/cpufeature.h>
+#include <asm/trace/fpu.h>
 
 /*
  * High level FPU state handling functions:
@@ -524,6 +525,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
 
 	fpu->fpregs_active = 0;
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+	trace_x86_fpu_regs_deactivated(fpu);
 }
 
 /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
@@ -533,6 +535,7 @@ static inline void __fpregs_activate(struct fpu *fpu)
 
 	fpu->fpregs_active = 1;
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
+	trace_x86_fpu_regs_activated(fpu);
 }
 
 /*
@@ -604,11 +607,13 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
 
 		/* But leave fpu_fpregs_owner_ctx! */
 		old_fpu->fpregs_active = 0;
+		trace_x86_fpu_regs_deactivated(old_fpu);
 
 		/* Don't change CR0.TS if we just switch! */
 		if (fpu.preload) {
 			new_fpu->counter++;
 			__fpregs_activate(new_fpu);
+			trace_x86_fpu_regs_activated(new_fpu);
 			prefetch(&new_fpu->state);
 		} else {
 			__fpregs_deactivate_hw();
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 36b90bbfc69f..48df486b02f9 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -122,6 +122,7 @@ enum xfeature {
 #define XFEATURE_MASK_OPMASK		(1 << XFEATURE_OPMASK)
 #define XFEATURE_MASK_ZMM_Hi256		(1 << XFEATURE_ZMM_Hi256)
 #define XFEATURE_MASK_Hi16_ZMM		(1 << XFEATURE_Hi16_ZMM)
+#define XFEATURE_MASK_PT		(1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
 
 #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
@@ -230,6 +231,12 @@ struct xstate_header {
 	u64				reserved[6];
 } __attribute__((packed));
 
+/*
+ * xstate_header.xcomp_bv[63] indicates that the extended_state_area
+ * is in compacted format.
+ */
+#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63)
+
 /*
  * This is our most modern FPU state format, as saved by the XSAVE
  * and restored by the XRSTOR instructions.
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 38951b0fcc5a..ae55a43e09c0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -18,6 +18,9 @@
 #define XSAVE_YMM_SIZE	    256
 #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
+/* Supervisor features */
+#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
+
 /* Supported features which support lazy state saving */
 #define XFEATURE_MASK_LAZY	(XFEATURE_MASK_FP | \
 				 XFEATURE_MASK_SSE | \
@@ -39,7 +42,6 @@
 #define REX_PREFIX
 #endif
 
-extern unsigned int xstate_size;
 extern u64 xfeatures_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
@@ -48,5 +50,9 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
-
+int using_compacted_format(void);
+int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+			void __user *ubuf, struct xregs_state *xsave);
+int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 74a2e312e8a2..02aff0867211 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -48,6 +48,7 @@
 /* AVX VEX prefixes */
 #define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
 #define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX	15	/* EVEX prefix */
 
 #define INAT_LSTPFX_MAX	3
 #define INAT_LGCPFX_MAX	11
@@ -89,6 +90,7 @@
 #define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
 #define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
 #define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY	(1 << (INAT_FLAG_OFFS + 7))
 /* Attribute making macros for attribute tables */
 #define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
 #define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
@@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr)
 static inline int inat_is_vex_prefix(insn_attr_t attr)
 {
 	attr &= INAT_PFX_MASK;
-	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+	       attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
 }
 
 static inline int inat_is_vex3_prefix(insn_attr_t attr)
@@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
 
 static inline int inat_must_vex(insn_attr_t attr)
 {
-	return attr & INAT_VEXONLY;
+	return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+	return attr & INAT_EVEXONLY;
 }
 #endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index e7814b74caf8..b3e32b010ab1 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -91,6 +91,7 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
+#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
@@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn)
 	return (insn->vex_prefix.value != 0);
 }
 
+static inline int insn_is_evex(struct insn *insn)
+{
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return (insn->vex_prefix.nbytes == 4);
+}
+
 /* Ensure this instruction is decoded completely */
 static inline int insn_complete(struct insn *insn)
 {
@@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
 	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
 		return X86_VEX2_M;
-	else
+	else if (insn->vex_prefix.nbytes == 3)	/* 3 bytes VEX */
 		return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+	else					/* EVEX */
+		return X86_EVEX_M(insn->vex_prefix.bytes[1]);
 }
 
 static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 7c5af123bdbd..9d6b097aa73d 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -12,9 +12,17 @@
 #define _ASM_X86_INTEL_MID_H
 
 #include <linux/sfi.h>
+#include <linux/pci.h>
 #include <linux/platform_device.h>
 
 extern int intel_mid_pci_init(void);
+extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+
+#define INTEL_MID_PWR_LSS_OFFSET	4
+#define INTEL_MID_PWR_LSS_TYPE		(1 << 7)
+
+extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev);
+
 extern int get_gpio_by_name(const char *name);
 extern void intel_scu_device_register(struct platform_device *pdev);
 extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
@@ -34,13 +42,28 @@ struct devs_id {
 	void *(*get_platform_data)(void *info);
 	/* Custom handler for devices */
 	void (*device_handler)(struct sfi_device_table_entry *pentry,
-				struct devs_id *dev);
+			       struct devs_id *dev);
 };
 
-#define sfi_device(i)   \
-	static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \
+#define sfi_device(i)								\
+	static const struct devs_id *const __intel_mid_sfi_##i##_dev __used	\
 	__attribute__((__section__(".x86_intel_mid_dev.init"))) = &i
 
+/**
+* struct mid_sd_board_info - template for SD device creation
+* @name:		identifies the driver
+* @bus_num:		board-specific identifier for a given SD controller
+* @max_clk:		the maximum frequency device supports
+* @platform_data:	the particular data stored there is driver-specific
+*/
+struct mid_sd_board_info {
+	char		name[SFI_NAME_LEN];
+	int		bus_num;
+	unsigned short	addr;
+	u32		max_clk;
+	void		*platform_data;
+};
+
 /*
  * Medfield is the follow-up of Moorestown, it combines two chip solution into
  * one. Other than that it also added always-on and constant tsc and lapic
@@ -60,7 +83,7 @@ extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
 /**
  * struct intel_mid_ops - Interface between intel-mid & sub archs
  * @arch_setup: arch_setup function to re-initialize platform
- *             structures (x86_init, x86_platform_init)
+ *		structures (x86_init, x86_platform_init)
  *
  * This structure can be extended if any new interface is required
  * between intel-mid & its sub arch files.
@@ -70,20 +93,20 @@ struct intel_mid_ops {
 };
 
 /* Helper API's for INTEL_MID_OPS_INIT */
-#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid)	\
-				[cpuid] = get_##cpuname##_ops
+#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid)				\
+	[cpuid] = get_##cpuname##_ops
 
 /* Maximum number of CPU ops */
-#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *))
+#define MAX_CPU_OPS(a)			(sizeof(a)/sizeof(void *))
 
 /*
  * For every new cpu addition, a weak get_<cpuname>_ops() function needs be
  * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h.
  */
-#define INTEL_MID_OPS_INIT {\
-	DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \
-	DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \
-	DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \
+#define INTEL_MID_OPS_INIT {							\
+	DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL),	\
+	DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW),	\
+	DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER)		\
 };
 
 #ifdef CONFIG_X86_INTEL_MID
@@ -100,8 +123,8 @@ static inline bool intel_mid_has_msic(void)
 
 #else /* !CONFIG_X86_INTEL_MID */
 
-#define intel_mid_identify_cpu()    (0)
-#define intel_mid_has_msic()    (0)
+#define intel_mid_identify_cpu()	0
+#define intel_mid_has_msic()		0
 
 #endif /* !CONFIG_X86_INTEL_MID */
 
@@ -117,35 +140,38 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
  * Penwell uses spread spectrum clock, so the freq number is not exactly
  * the same as reported by MSR based on SDM.
  */
-#define FSB_FREQ_83SKU	83200
-#define FSB_FREQ_100SKU	99840
-#define FSB_FREQ_133SKU	133000
+#define FSB_FREQ_83SKU			83200
+#define FSB_FREQ_100SKU			99840
+#define FSB_FREQ_133SKU			133000
 
-#define FSB_FREQ_167SKU	167000
-#define FSB_FREQ_200SKU	200000
-#define FSB_FREQ_267SKU	267000
-#define FSB_FREQ_333SKU	333000
-#define FSB_FREQ_400SKU	400000
+#define FSB_FREQ_167SKU			167000
+#define FSB_FREQ_200SKU			200000
+#define FSB_FREQ_267SKU			267000
+#define FSB_FREQ_333SKU			333000
+#define FSB_FREQ_400SKU			400000
 
 /* Bus Select SoC Fuse value */
-#define BSEL_SOC_FUSE_MASK	0x7
-#define BSEL_SOC_FUSE_001	0x1 /* FSB 133MHz */
-#define BSEL_SOC_FUSE_101	0x5 /* FSB 100MHz */
-#define BSEL_SOC_FUSE_111	0x7 /* FSB 83MHz */
+#define BSEL_SOC_FUSE_MASK		0x7
+/* FSB 133MHz */
+#define BSEL_SOC_FUSE_001		0x1
+/* FSB 100MHz */
+#define BSEL_SOC_FUSE_101		0x5
+/* FSB 83MHz */
+#define BSEL_SOC_FUSE_111		0x7
 
-#define SFI_MTMR_MAX_NUM 8
-#define SFI_MRTC_MAX	8
+#define SFI_MTMR_MAX_NUM		8
+#define SFI_MRTC_MAX			8
 
 extern void intel_scu_devices_create(void);
 extern void intel_scu_devices_destroy(void);
 
 /* VRTC timer */
-#define MRST_VRTC_MAP_SZ	(1024)
-/*#define MRST_VRTC_PGOFFSET	(0xc00) */
+#define MRST_VRTC_MAP_SZ		1024
+/* #define MRST_VRTC_PGOFFSET		0xc00 */
 
 extern void intel_mid_rtc_init(void);
 
-/* the offset for the mapping of global gpio pin to irq */
-#define INTEL_MID_IRQ_OFFSET 0x100
+/* The offset for the mapping of global gpio pin to irq */
+#define INTEL_MID_IRQ_OFFSET		0x100
 
 #endif /* _ASM_X86_INTEL_MID_H */
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
new file mode 100644
index 000000000000..2674ee3de748
--- /dev/null
+++ b/arch/x86/include/asm/kaslr.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_KASLR_H_
+#define _ASM_KASLR_H_
+
+unsigned long kaslr_get_random_long(const char *purpose);
+
+#ifdef CONFIG_RANDOMIZE_MEMORY
+extern unsigned long page_offset_base;
+extern unsigned long vmalloc_base;
+
+void kernel_randomize_memory(void);
+#else
+static inline void kernel_randomize_memory(void) { }
+#endif /* CONFIG_RANDOMIZE_MEMORY */
+
+#endif
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index e5f5dc9787d5..1ef9d581b5d9 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_trace(struct task_struct *t, struct pt_regs *regs,
 		       unsigned long *sp, unsigned long bp);
+extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, int all);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4421b5da409d..d1d1e5094c28 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -38,12 +38,11 @@ typedef u8 kprobe_opcode_t;
 #define RELATIVECALL_OPCODE 0xe8
 #define RELATIVE_ADDR_SIZE 4
 #define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR)					       \
-	(((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
-			      THREAD_SIZE - (unsigned long)(ADDR)))    \
-	 ? (MAX_STACK_SIZE)					       \
-	 : (((unsigned long)current_thread_info()) +		       \
-	    THREAD_SIZE - (unsigned long)(ADDR)))
+#define CUR_STACK_SIZE(ADDR) \
+	(current_top_of_stack() - (unsigned long)(ADDR))
+#define MIN_STACK_SIZE(ADDR)				\
+	(MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ?	\
+	 MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR))
 
 #define flush_insn_slot(p)	do { } while (0)
 
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 4ad6560847b1..7511978093eb 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -50,9 +50,9 @@ static inline void local_sub(long i, local_t *l)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline int local_sub_and_test(long i, local_t *l)
+static inline bool local_sub_and_test(long i, local_t *l)
 {
-	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e);
 }
 
 /**
@@ -63,9 +63,9 @@ static inline int local_sub_and_test(long i, local_t *l)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline int local_dec_and_test(local_t *l)
+static inline bool local_dec_and_test(local_t *l)
 {
-	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
+	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e);
 }
 
 /**
@@ -76,9 +76,9 @@ static inline int local_dec_and_test(local_t *l)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline int local_inc_and_test(local_t *l)
+static inline bool local_inc_and_test(local_t *l)
 {
-	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
+	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e);
 }
 
 /**
@@ -90,9 +90,9 @@ static inline int local_inc_and_test(local_t *l)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline int local_add_negative(long i, local_t *l)
+static inline bool local_add_negative(long i, local_t *l)
 {
-	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s);
 }
 
 /**
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5a73a9c62c39..56f4c6676b29 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -64,8 +64,6 @@
 
 #define MSR_OFFCORE_RSP_0		0x000001a6
 #define MSR_OFFCORE_RSP_1		0x000001a7
-#define MSR_NHM_TURBO_RATIO_LIMIT	0x000001ad
-#define MSR_IVT_TURBO_RATIO_LIMIT	0x000001ae
 #define MSR_TURBO_RATIO_LIMIT		0x000001ad
 #define MSR_TURBO_RATIO_LIMIT1		0x000001ae
 #define MSR_TURBO_RATIO_LIMIT2		0x000001af
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 85e6cda45a02..e9355a84fc67 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
 					   int (*fail_fn)(atomic_t *))
 {
 	/* cmpxchg because it never induces a false contention state. */
-	if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+	if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
 		return 1;
 
 	return 0;
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 07537a44216e..d9850758464e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -118,10 +118,10 @@ do {								\
 static inline int __mutex_fastpath_trylock(atomic_t *count,
 					   int (*fail_fn)(atomic_t *))
 {
-	if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+	if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
 		return 1;
-	else
-		return 0;
+
+	return 0;
 }
 
 #endif /* _ASM_X86_MUTEX_64_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d5c2f8b40faa..9215e0527647 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,6 +1,10 @@
 #ifndef _ASM_X86_PAGE_64_DEFS_H
 #define _ASM_X86_PAGE_64_DEFS_H
 
+#ifndef __ASSEMBLY__
+#include <asm/kaslr.h>
+#endif
+
 #ifdef CONFIG_KASAN
 #define KASAN_STACK_ORDER 1
 #else
@@ -32,7 +36,12 @@
  * hypervisor to fit.  Choosing 16 slots here is arbitrary, but it's
  * what Xen requires.
  */
-#define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
+#define __PAGE_OFFSET_BASE      _AC(0xffff880000000000, UL)
+#ifdef CONFIG_RANDOMIZE_MEMORY
+#define __PAGE_OFFSET           page_offset_base
+#else
+#define __PAGE_OFFSET           __PAGE_OFFSET_BASE
+#endif /* CONFIG_RANDOMIZE_MEMORY */
 
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
 
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e0ba66ca68c6..e02e3f80d363 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -510,14 +510,15 @@ do {									\
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\
-	int old__;							\
-	asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"		\
-		     : "=r" (old__), "+m" (var)				\
+	bool old__;							\
+	asm volatile("btr %2,"__percpu_arg(1)"\n\t"			\
+		     CC_SET(c)						\
+		     : CC_OUT(c) (old__), "+m" (var)			\
 		     : "dIr" (bit));					\
 	old__;								\
 })
 
-static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
                         const unsigned long __percpu *addr)
 {
 	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
@@ -529,14 +530,14 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
 #endif
 }
 
-static inline int x86_this_cpu_variable_test_bit(int nr,
+static inline bool x86_this_cpu_variable_test_bit(int nr,
                         const unsigned long __percpu *addr)
 {
-	int oldbit;
+	bool oldbit;
 
 	asm volatile("bt "__percpu_arg(2)",%1\n\t"
-			"sbb %0,%0"
-			: "=r" (oldbit)
+			CC_SET(c)
+			: CC_OUT(c) (oldbit)
 			: "m" (*(unsigned long *)addr), "Ir" (nr));
 
 	return oldbit;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index bf7f8b55b0f9..b6d425999f99 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -81,7 +81,11 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *page;
-	page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
+
+	if (mm == &init_mm)
+		gfp &= ~__GFP_ACCOUNT;
+	page = alloc_pages(gfp, 0);
 	if (!page)
 		return NULL;
 	if (!pgtable_pmd_page_ctor(page)) {
@@ -125,7 +129,11 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	gfp_t gfp = GFP_KERNEL_ACCOUNT;
+
+	if (mm == &init_mm)
+		gfp &= ~__GFP_ACCOUNT;
+	return (pud_t *)get_zeroed_page(gfp);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1a27396b6ea0..437feb436efa 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -480,7 +480,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
 
 static inline int pte_none(pte_t pte)
 {
-	return !pte.pte;
+	return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK));
 }
 
 #define __HAVE_ARCH_PTE_SAME
@@ -552,7 +552,8 @@ static inline int pmd_none(pmd_t pmd)
 {
 	/* Only check low word on 32-bit platforms, since it might be
 	   out of sync with upper half. */
-	return (unsigned long)native_pmd_val(pmd) == 0;
+	unsigned long val = native_pmd_val(pmd);
+	return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0;
 }
 
 static inline unsigned long pmd_page_vaddr(pmd_t pmd)
@@ -616,7 +617,7 @@ static inline unsigned long pages_to_mb(unsigned long npg)
 #if CONFIG_PGTABLE_LEVELS > 2
 static inline int pud_none(pud_t pud)
 {
-	return native_pud_val(pud) == 0;
+	return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
 }
 
 static inline int pud_present(pud_t pud)
@@ -694,6 +695,12 @@ static inline int pgd_bad(pgd_t pgd)
 
 static inline int pgd_none(pgd_t pgd)
 {
+	/*
+	 * There is no need to do a workaround for the KNL stray
+	 * A/D bit erratum here.  PGDs only point to page tables
+	 * except on 32-bit non-PAE which is not supported on
+	 * KNL.
+	 */
 	return !native_pgd_val(pgd);
 }
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
@@ -729,6 +736,23 @@ extern int direct_gbpages;
 void init_mem_mapping(void);
 void early_alloc_pgt_buf(void);
 
+#ifdef CONFIG_X86_64
+/* Realmode trampoline initialization. */
+extern pgd_t trampoline_pgd_entry;
+static inline void __meminit init_trampoline_default(void)
+{
+	/* Default trampoline pgd value */
+	trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)];
+}
+# ifdef CONFIG_RANDOMIZE_MEMORY
+void __meminit init_trampoline(void);
+# else
+#  define init_trampoline init_trampoline_default
+# endif
+#else
+static inline void init_trampoline(void) { }
+#endif
+
 /* local pte updates need not use xchg for locking */
 static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
 {
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 2ee781114d34..7e8ec7ae10fa 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -140,18 +140,32 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
 #define pte_unmap(pte) ((void)(pte))/* NOP */
 
-/* Encode and de-code a swap entry */
+/*
+ * Encode and de-code a swap entry
+ *
+ * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
+ * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
+ * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ *
+ * G (8) is aliased and used as a PROT_NONE indicator for
+ * !present ptes.  We need to start storing swap entries above
+ * there.  We also need to avoid using A and D because of an
+ * erratum where they can be incorrectly set by hardware on
+ * non-present PTEs.
+ */
+#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
+/* Place the offset above the type: */
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 
-#define __swp_type(x)			(((x).val >> (_PAGE_BIT_PRESENT + 1)) \
+#define __swp_type(x)			(((x).val >> (SWP_TYPE_FIRST_BIT)) \
 					 & ((1U << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)			((x).val >> SWP_OFFSET_SHIFT)
+#define __swp_offset(x)			((x).val >> SWP_OFFSET_FIRST_BIT)
 #define __swp_entry(type, offset)	((swp_entry_t) { \
-					 ((type) << (_PAGE_BIT_PRESENT + 1)) \
-					 | ((offset) << SWP_OFFSET_SHIFT) })
+					 ((type) << (SWP_TYPE_FIRST_BIT)) \
+					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index e6844dfb4471..6fdef9eef2d5 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -5,6 +5,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
+#include <asm/kaslr.h>
 
 /*
  * These are used to make use of C type-checking..
@@ -53,10 +54,16 @@ typedef struct { pteval_t pte; } pte_t;
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START    _AC(0xffffc90000000000, UL)
-#define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)
-#define VMEMMAP_START	 _AC(0xffffea0000000000, UL)
+#define MAXMEM		_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define VMALLOC_SIZE_TB	_AC(32, UL)
+#define __VMALLOC_BASE	_AC(0xffffc90000000000, UL)
+#define VMEMMAP_START	_AC(0xffffea0000000000, UL)
+#ifdef CONFIG_RANDOMIZE_MEMORY
+#define VMALLOC_START	vmalloc_base
+#else
+#define VMALLOC_START	__VMALLOC_BASE
+#endif /* CONFIG_RANDOMIZE_MEMORY */
+#define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
 #define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b5efe264eff..f1218f512f62 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -70,6 +70,12 @@
 			 _PAGE_PKEY_BIT2 | \
 			 _PAGE_PKEY_BIT3)
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+#define _PAGE_KNL_ERRATUM_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)
+#else
+#define _PAGE_KNL_ERRATUM_MASK 0
+#endif
+
 #ifdef CONFIG_KMEMCHECK
 #define _PAGE_HIDDEN	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
 #else
@@ -475,8 +481,6 @@ extern pmd_t *lookup_pmd_address(unsigned long address);
 extern phys_addr_t slow_virt_to_phys(void *__address);
 extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
 				   unsigned numpages, unsigned long page_flags);
-void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
-			       unsigned numpages);
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index d397deb58146..17f218645701 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
+	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
 }
 
 /*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3cc5d3..63def9537a2d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,10 +367,15 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
-extern unsigned int xstate_size;
+extern unsigned int fpu_kernel_xstate_size;
+extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
 
+typedef struct {
+	unsigned long		seg;
+} mm_segment_t;
+
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -419,6 +424,11 @@ struct thread_struct {
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
 
+	mm_segment_t		addr_limit;
+
+	unsigned int		sig_on_uaccess_err:1;
+	unsigned int		uaccess_err:1;	/* uaccess failed */
+
 	/* Floating point and extended processor state */
 	struct fpu		fpu;
 	/*
@@ -490,11 +500,6 @@ static inline void load_sp0(struct tss_struct *tss,
 #define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT */
 
-typedef struct {
-	unsigned long		seg;
-} mm_segment_t;
-
-
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
@@ -716,6 +721,7 @@ static inline void spin_lock_prefetch(const void *x)
 	.sp0			= TOP_OF_INIT_STACK,			  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
+	.addr_limit		= KERNEL_DS,				  \
 }
 
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
@@ -765,8 +771,9 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 #define STACK_TOP		TASK_SIZE
 #define STACK_TOP_MAX		TASK_SIZE_MAX
 
-#define INIT_THREAD  { \
-	.sp0 = TOP_OF_INIT_STACK \
+#define INIT_THREAD  {						\
+	.sp0			= TOP_OF_INIT_STACK,		\
+	.addr_limit		= KERNEL_DS,			\
 }
 
 /*
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index fdcc04020636..7c1c89598688 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -68,30 +68,23 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 	return product;
 }
 
-static __always_inline
-u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
-{
-	u64 delta = rdtsc_ordered() - src->tsc_timestamp;
-	return pvclock_scale_delta(delta, src->tsc_to_system_mul,
-				   src->tsc_shift);
-}
-
 static __always_inline
 unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 			       cycle_t *cycles, u8 *flags)
 {
 	unsigned version;
-	cycle_t ret, offset;
-	u8 ret_flags;
+	cycle_t offset;
+	u64 delta;
 
 	version = src->version;
+	/* Make the latest version visible */
+	smp_rmb();
 
-	offset = pvclock_get_nsec_offset(src);
-	ret = src->system_time + offset;
-	ret_flags = src->flags;
-
-	*cycles = ret;
-	*flags = ret_flags;
+	delta = rdtsc_ordered() - src->tsc_timestamp;
+	offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+				   src->tsc_shift);
+	*cycles = src->system_time + offset;
+	*flags = src->flags;
 	return version;
 }
 
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 8f7866a5b9a4..661dd305694a 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -1,11 +1,13 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
-#ifdef CC_HAVE_ASM_GOTO
+#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
+
+/* Use asm goto */
 
 #define __GEN_RMWcc(fullop, var, cc, ...)				\
 do {									\
-	asm_volatile_goto (fullop "; j" cc " %l[cc_label]"		\
+	asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"		\
 			: : "m" (var), ## __VA_ARGS__ 			\
 			: "memory" : cc_label);				\
 	return 0;							\
@@ -19,15 +21,17 @@ cc_label:								\
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
 	__GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
 
-#else /* !CC_HAVE_ASM_GOTO */
+#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+
+/* Use flags output or a set instruction */
 
 #define __GEN_RMWcc(fullop, var, cc, ...)				\
 do {									\
-	char c;								\
-	asm volatile (fullop "; set" cc " %1"				\
-			: "+m" (var), "=qm" (c)				\
+	bool c;								\
+	asm volatile (fullop ";" CC_SET(cc)				\
+			: "+m" (var), CC_OUT(cc) (c)			\
 			: __VA_ARGS__ : "memory");			\
-	return c != 0;							\
+	return c;							\
 } while (0)
 
 #define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
@@ -36,6 +40,6 @@ do {									\
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
 	__GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
 
-#endif /* CC_HAVE_ASM_GOTO */
+#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 #endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 453744c1d347..8dbc762ad132 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -77,7 +77,7 @@ static inline void __down_read(struct rw_semaphore *sem)
 /*
  * trylock for reading -- returns 1 if successful, 0 if contention
  */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
+static inline bool __down_read_trylock(struct rw_semaphore *sem)
 {
 	long result, tmp;
 	asm volatile("# beginning __down_read_trylock\n\t"
@@ -93,7 +93,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 		     : "+m" (sem->count), "=&a" (result), "=&r" (tmp)
 		     : "i" (RWSEM_ACTIVE_READ_BIAS)
 		     : "memory", "cc");
-	return result >= 0 ? 1 : 0;
+	return result >= 0;
 }
 
 /*
@@ -134,9 +134,10 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
+static inline bool __down_write_trylock(struct rw_semaphore *sem)
 {
-	long result, tmp;
+	bool result;
+	long tmp0, tmp1;
 	asm volatile("# beginning __down_write_trylock\n\t"
 		     "  mov          %0,%1\n\t"
 		     "1:\n\t"
@@ -144,14 +145,14 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 		     /* was the active mask 0 before? */
 		     "  jnz          2f\n\t"
 		     "  mov          %1,%2\n\t"
-		     "  add          %3,%2\n\t"
+		     "  add          %4,%2\n\t"
 		     LOCK_PREFIX "  cmpxchg  %2,%0\n\t"
 		     "  jnz	     1b\n\t"
 		     "2:\n\t"
-		     "  sete         %b1\n\t"
-		     "  movzbl       %b1, %k1\n\t"
+		     CC_SET(e)
 		     "# ending __down_write_trylock\n\t"
-		     : "+m" (sem->count), "=&a" (result), "=&r" (tmp)
+		     : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1),
+		       CC_OUT(e) (result)
 		     : "er" (RWSEM_ACTIVE_WRITE_BIAS)
 		     : "memory", "cc");
 	return result;
@@ -213,23 +214,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 		     : "memory", "cc");
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-	asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
-		     : "+m" (sem->count)
-		     : "er" (delta));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-	return delta + xadd(&sem->count, delta);
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 2138c9ae19ee..dd1e7d6387ab 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -81,9 +81,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
 
 static inline int __gen_sigismember(sigset_t *set, int _sig)
 {
-	int ret;
-	asm("btl %2,%1\n\tsbbl %0,%0"
-	    : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+	unsigned char ret;
+	asm("btl %2,%1\n\tsetc %0"
+	    : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
 	return ret;
 }
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 66b057306f40..ebd0c164cd4e 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -33,6 +33,7 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
 }
 
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
+DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
@@ -135,6 +136,7 @@ int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
 int common_cpu_die(unsigned int cpu);
 void native_cpu_die(unsigned int cpu);
+void hlt_play_dead(void);
 void native_play_dead(void);
 void play_dead_common(void);
 void wbinvd_on_cpu(int cpu);
@@ -147,6 +149,7 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
 void smp_store_boot_cpu_info(void);
 void smp_store_cpu_info(int id);
 #define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
+#define cpu_acpi_id(cpu)	per_cpu(x86_cpu_to_acpiid, cpu)
 
 #else /* !CONFIG_SMP */
 #define wbinvd_on_cpu(cpu)     wbinvd()
@@ -172,12 +175,6 @@ extern int safe_smp_processor_id(void);
 #elif defined(CONFIG_X86_64_SMP)
 #define raw_smp_processor_id() (this_cpu_read(cpu_number))
 
-#define stack_smp_processor_id()					\
-({								\
-	struct thread_info *ti;						\
-	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
-	ti->cpu;							\
-})
 #define safe_smp_processor_id()		smp_processor_id()
 
 #endif
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 7c247e7404be..0944218af9e2 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -14,7 +14,7 @@ extern int kstack_depth_to_print;
 struct thread_info;
 struct stacktrace_ops;
 
-typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
+typedef unsigned long (*walk_stack_t)(struct task_struct *task,
 				      unsigned long *stack,
 				      unsigned long bp,
 				      const struct stacktrace_ops *ops,
@@ -23,13 +23,13 @@ typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
 				      int *graph);
 
 extern unsigned long
-print_context_stack(struct thread_info *tinfo,
+print_context_stack(struct task_struct *task,
 		    unsigned long *stack, unsigned long bp,
 		    const struct stacktrace_ops *ops, void *data,
 		    unsigned long *end, int *graph);
 
 extern unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
+print_context_stack_bp(struct task_struct *task,
 		       unsigned long *stack, unsigned long bp,
 		       const struct stacktrace_ops *ops, void *data,
 		       unsigned long *end, int *graph);
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index f28a24b51dc7..cbf8847d02a0 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -79,10 +79,10 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
-	asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
-		     : "=r" (oldbit), "+m" (ADDR)
+	asm volatile("lock; bts %2,%1\n\tsetc %0"
+		     : "=qm" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
 }
@@ -97,10 +97,10 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
-	asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
-		     : "=r" (oldbit), "+m" (ADDR)
+	asm volatile("lock; btr %2,%1\n\tsetc %0"
+		     : "=qm" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
 }
@@ -115,10 +115,10 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
-	asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
-		     : "=r" (oldbit), "+m" (ADDR)
+	asm volatile("lock; btc %2,%1\n\tsetc %0"
+		     : "=qm" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
 }
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 30c133ac05cd..89bff044a6f5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -57,9 +57,6 @@ struct thread_info {
 	__u32			flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
-	mm_segment_t		addr_limit;
-	unsigned int		sig_on_uaccess_error:1;
-	unsigned int		uaccess_err:1;	/* uaccess failed */
 };
 
 #define INIT_THREAD_INFO(tsk)			\
@@ -67,7 +64,6 @@ struct thread_info {
 	.task		= &tsk,			\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.addr_limit	= KERNEL_DS,		\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
@@ -186,11 +182,6 @@ static inline unsigned long current_stack_pointer(void)
 # define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
 #endif
 
-/* Load thread_info address into "reg" */
-#define GET_THREAD_INFO(reg) \
-	_ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
-	_ASM_SUB $(THREAD_SIZE),reg ;
-
 /*
  * ASM operand which evaluates to a 'thread_info' address of
  * the current task, if it is known that "reg" is exactly "off"
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7f991bd5031b..43e87a3dd95c 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -25,16 +25,6 @@
 #ifndef _ASM_X86_TOPOLOGY_H
 #define _ASM_X86_TOPOLOGY_H
 
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_SMP
-#  define ENABLE_TOPO_DEFINES
-# endif
-#else
-# ifdef CONFIG_SMP
-#  define ENABLE_TOPO_DEFINES
-# endif
-#endif
-
 /*
  * to preserve the visibility of NUMA_NO_NODE definition,
  * moved to there from here.  May be used independent of
@@ -123,12 +113,20 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 
-#ifdef ENABLE_TOPO_DEFINES
+#ifdef CONFIG_SMP
 #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
 
 extern unsigned int __max_logical_packages;
 #define topology_max_packages()			(__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+	return __max_smt_threads;
+}
+
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 extern int topology_phys_to_logical_pkg(unsigned int pkg);
 #else
@@ -136,6 +134,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg);
 static inline int
 topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
new file mode 100644
index 000000000000..9217ab1f5bf6
--- /dev/null
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -0,0 +1,119 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM x86_fpu
+
+#if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FPU_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(x86_fpu,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu),
+
+	TP_STRUCT__entry(
+		__field(struct fpu *, fpu)
+		__field(bool, fpregs_active)
+		__field(bool, fpstate_active)
+		__field(int, counter)
+		__field(u64, xfeatures)
+		__field(u64, xcomp_bv)
+		),
+
+	TP_fast_assign(
+		__entry->fpu		= fpu;
+		__entry->fpregs_active	= fpu->fpregs_active;
+		__entry->fpstate_active	= fpu->fpstate_active;
+		__entry->counter	= fpu->counter;
+		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
+			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
+		}
+	),
+	TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
+			__entry->fpu,
+			__entry->fpregs_active,
+			__entry->fpstate_active,
+			__entry->counter,
+			__entry->xfeatures,
+			__entry->xcomp_bv
+	)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_after_save,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_before_restore,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_after_restore,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE fpu
+#endif /* _TRACE_FPU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 7428697c5b8d..33b6365c22fe 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -35,7 +35,7 @@ extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
-extern int check_tsc_disabled(void);
+extern unsigned long native_calibrate_cpu(void);
 extern unsigned long native_calibrate_tsc(void);
 extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
 
@@ -52,7 +52,6 @@ extern int notsc_setup(char *);
 extern void tsc_save_sched_clock_state(void);
 extern void tsc_restore_sched_clock_state(void);
 
-/* MSR based TSC calibration for Intel Atom SoC platforms */
-unsigned long try_msr_calibrate_tsc(void);
+unsigned long cpu_khz_from_msr(void);
 
 #endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 2982387ba817..c03bfb68c503 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -29,12 +29,12 @@
 #define USER_DS 	MAKE_MM_SEG(TASK_SIZE_MAX)
 
 #define get_ds()	(KERNEL_DS)
-#define get_fs()	(current_thread_info()->addr_limit)
-#define set_fs(x)	(current_thread_info()->addr_limit = (x))
+#define get_fs()	(current->thread.addr_limit)
+#define set_fs(x)	(current->thread.addr_limit = (x))
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
-#define user_addr_max() (current_thread_info()->addr_limit.seg)
+#define user_addr_max() (current->thread.addr_limit.seg)
 #define __addr_ok(addr) 	\
 	((unsigned long __force)(addr) < user_addr_max())
 
@@ -342,7 +342,26 @@ do {									\
 } while (0)
 
 #ifdef CONFIG_X86_32
-#define __get_user_asm_u64(x, ptr, retval, errret)	(x) = __get_user_bad()
+#define __get_user_asm_u64(x, ptr, retval, errret)			\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:	movl %2,%%eax\n"			\
+		     "2:	movl %3,%%edx\n"			\
+		     "3: " ASM_CLAC "\n"				\
+		     ".section .fixup,\"ax\"\n"				\
+		     "4:	mov %4,%0\n"				\
+		     "	xorl %%eax,%%eax\n"				\
+		     "	xorl %%edx,%%edx\n"				\
+		     "	jmp 3b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE(1b, 4b)				\
+		     _ASM_EXTABLE(2b, 4b)				\
+		     : "=r" (retval), "=A"(x)				\
+		     : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1),	\
+		       "i" (errret), "0" (retval));			\
+})
+
 #define __get_user_asm_ex_u64(x, ptr)			(x) = __get_user_bad()
 #else
 #define __get_user_asm_u64(x, ptr, retval, errret) \
@@ -429,7 +448,7 @@ do {									\
 #define __get_user_nocheck(x, ptr, size)				\
 ({									\
 	int __gu_err;							\
-	unsigned long __gu_val;						\
+	__inttype(*(ptr)) __gu_val;					\
 	__uaccess_begin();						\
 	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
 	__uaccess_end();						\
@@ -468,13 +487,13 @@ struct __large_struct { unsigned long buf[100]; };
  * uaccess_try and catch
  */
 #define uaccess_try	do {						\
-	current_thread_info()->uaccess_err = 0;				\
+	current->thread.uaccess_err = 0;				\
 	__uaccess_begin();						\
 	barrier();
 
 #define uaccess_catch(err)						\
 	__uaccess_end();						\
-	(err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);	\
+	(err) |= (current->thread.uaccess_err ? -EFAULT : 0);		\
 } while (0)
 
 /**
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 2b19caa4081c..32712a925f26 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -26,6 +26,8 @@
 #  define __ARCH_WANT_COMPAT_SYS_GETDENTS64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
+#  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
+#  define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
 
 # endif
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 4dcdf74dfed8..6ba793178441 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -168,20 +168,22 @@ struct x86_legacy_devices {
  * struct x86_legacy_features - legacy x86 features
  *
  * @rtc: this device has a CMOS real-time clock present
- * @ebda_search: it's safe to search for the EBDA signature in the hardware's
- * 	low RAM
+ * @reserve_bios_regions: boot code will search for the EBDA address and the
+ * 	start of the 640k - 1M BIOS region.  If false, the platform must
+ * 	ensure that its memory map correctly reserves sub-1MB regions as needed.
  * @devices: legacy x86 devices, refer to struct x86_legacy_devices
  * 	documentation for further details.
  */
 struct x86_legacy_features {
 	int rtc;
-	int ebda_search;
+	int reserve_bios_regions;
 	struct x86_legacy_devices devices;
 };
 
 /**
  * struct x86_platform_ops - platform specific runtime functions
- * @calibrate_tsc:		calibrate TSC
+ * @calibrate_cpu:		calibrate CPU
+ * @calibrate_tsc:		calibrate TSC, if different from CPU
  * @get_wallclock:		get time from HW clock like RTC etc.
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
@@ -200,6 +202,7 @@ struct x86_legacy_features {
  * 				semantics.
  */
 struct x86_platform_ops {
+	unsigned long (*calibrate_cpu)(void);
 	unsigned long (*calibrate_tsc)(void);
 	void (*get_wallclock)(struct timespec *ts);
 	int (*set_wallclock)(const struct timespec *ts);
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index 0d809e9fc975..3bdd10d71223 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -76,15 +76,18 @@
 /*
  * Leaf 5 (0x40000x04)
  * HVM-specific features
+ * EAX: Features
+ * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
  */
 
-/* EAX Features */
 /* Virtualized APIC registers */
 #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0)
 /* Virtualized x2APIC accesses */
 #define XEN_HVM_CPUID_X2APIC_VIRT      (1u << 1)
 /* Memory mapped from other domains has valid IOMMU entries */
 #define XEN_HVM_CPUID_IOMMU_MAPPINGS   (1u << 2)
+/* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_VCPU_ID_PRESENT  (1u << 3)
 
 #define XEN_CPUID_MAX_NUM_LEAVES 4
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 9414f84584e4..6738e5c82cca 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -161,13 +161,15 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
 /**
  * acpi_register_lapic - register a local apic and generates a logic cpu number
  * @id: local apic id to register
+ * @acpiid: ACPI id to register
  * @enabled: this cpu is enabled or not
  *
  * Returns the logic cpu number which maps to the local apic
  */
-static int acpi_register_lapic(int id, u8 enabled)
+static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 {
 	unsigned int ver = 0;
+	int cpu;
 
 	if (id >= MAX_LOCAL_APIC) {
 		printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
@@ -182,7 +184,11 @@ static int acpi_register_lapic(int id, u8 enabled)
 	if (boot_cpu_physical_apicid != -1U)
 		ver = apic_version[boot_cpu_physical_apicid];
 
-	return generic_processor_info(id, ver);
+	cpu = generic_processor_info(id, ver);
+	if (cpu >= 0)
+		early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
+
+	return cpu;
 }
 
 static int __init
@@ -212,7 +218,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 	if (!apic->apic_id_valid(apic_id) && enabled)
 		printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
 	else
-		acpi_register_lapic(apic_id, enabled);
+		acpi_register_lapic(apic_id, processor->uid, enabled);
 #else
 	printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
 #endif
@@ -240,6 +246,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
 	 * when we use CPU hotplug.
 	 */
 	acpi_register_lapic(processor->id,	/* APIC ID */
+			    processor->processor_id, /* ACPI ID */
 			    processor->lapic_flags & ACPI_MADT_ENABLED);
 
 	return 0;
@@ -258,6 +265,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
 	acpi_table_print_madt_entry(header);
 
 	acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
+			    processor->processor_id, /* ACPI ID */
 			    processor->lapic_flags & ACPI_MADT_ENABLED);
 
 	return 0;
@@ -714,7 +722,7 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
 {
 	int cpu;
 
-	cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED);
+	cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED);
 	if (cpu < 0) {
 		pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
 		return cpu;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index a147e676fc7b..e991d5c8bb3a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -71,8 +71,8 @@ int amd_cache_northbridges(void)
 	while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
 		i++;
 
-	if (i == 0)
-		return 0;
+	if (!i)
+		return -ENODEV;
 
 	nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
 	if (!nb)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 60078a67d7e3..ac8d8ad8b009 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -92,8 +92,10 @@ static int apic_extnmi = APIC_EXTNMI_BSP;
  */
 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
 
 #ifdef CONFIG_X86_32
 
@@ -2045,7 +2047,7 @@ int generic_processor_info(int apicid, int version)
 		int thiscpu = max + disabled_cpus - 1;
 
 		pr_warning(
-			"ACPI: NR_CPUS/possible_cpus limit of %i almost"
+			"APIC: NR_CPUS/possible_cpus limit of %i almost"
 			" reached. Keeping one slot for boot cpu."
 			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
 
@@ -2057,7 +2059,7 @@ int generic_processor_info(int apicid, int version)
 		int thiscpu = max + disabled_cpus;
 
 		pr_warning(
-			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
+			"APIC: NR_CPUS/possible_cpus limit of %i reached."
 			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
 
 		disabled_cpus++;
@@ -2085,7 +2087,7 @@ int generic_processor_info(int apicid, int version)
 	if (topology_update_package_map(apicid, cpu) < 0) {
 		int thiscpu = max + disabled_cpus;
 
-		pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+		pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
 			   thiscpu, apicid);
 		disabled_cpus++;
 		return -ENOSPC;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 76f89e2b245a..048747778d37 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -181,7 +181,6 @@ static struct apic apic_flat =  {
 
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
-	.apic_id_mask			= 0xFFu << 24,
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
@@ -278,7 +277,6 @@ static struct apic apic_physflat =  {
 
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
-	.apic_id_mask			= 0xFFu << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 13d19ed58514..2cebf59092d8 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -141,7 +141,6 @@ struct apic apic_noop = {
 
 	.get_apic_id			= noop_get_apic_id,
 	.set_apic_id			= NULL,
-	.apic_id_mask			= 0x0F << 24,
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index ab5c2c685a3c..714d4fda0d52 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -269,7 +269,6 @@ static const struct apic apic_numachip1 __refconst = {
 
 	.get_apic_id			= numachip1_get_apic_id,
 	.set_apic_id			= numachip1_set_apic_id,
-	.apic_id_mask			= 0xffU << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
@@ -321,7 +320,6 @@ static const struct apic apic_numachip2 __refconst = {
 
 	.get_apic_id			= numachip2_get_apic_id,
 	.set_apic_id			= numachip2_set_apic_id,
-	.apic_id_mask			= 0xffU << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cf9bd896c12d..06dbaa458bfe 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -171,7 +171,6 @@ static struct apic apic_bigsmp = {
 
 	.get_apic_id			= bigsmp_get_apic_id,
 	.set_apic_id			= NULL,
-	.apic_id_mask			= 0xFF << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 446702ed99dc..f072b9572634 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -981,7 +981,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
 
 	return __irq_domain_alloc_irqs(domain, irq, 1,
 				       ioapic_alloc_attr_node(info),
-				       info, legacy);
+				       info, legacy, NULL);
 }
 
 /*
@@ -1014,7 +1014,8 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
 					  info->ioapic_pin))
 			return -ENOMEM;
 	} else {
-		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true);
+		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
+					      NULL);
 		if (irq >= 0) {
 			irq_data = irq_domain_get_irq_data(domain, irq);
 			data = irq_data->chip_data;
@@ -2567,29 +2568,25 @@ static struct resource * __init ioapic_setup_resources(void)
 	unsigned long n;
 	struct resource *res;
 	char *mem;
-	int i, num = 0;
+	int i;
 
-	for_each_ioapic(i)
-		num++;
-	if (num == 0)
+	if (nr_ioapics == 0)
 		return NULL;
 
 	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
-	n *= num;
+	n *= nr_ioapics;
 
 	mem = alloc_bootmem(n);
 	res = (void *)mem;
 
-	mem += sizeof(struct resource) * num;
+	mem += sizeof(struct resource) * nr_ioapics;
 
-	num = 0;
 	for_each_ioapic(i) {
-		res[num].name = mem;
-		res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res[i].name = mem;
+		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
-		ioapics[i].iomem_res = &res[num];
-		num++;
+		ioapics[i].iomem_res = &res[i];
 	}
 
 	ioapic_resources = res;
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index f316e34abb42..93edfa01b408 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -101,7 +101,6 @@ static struct apic apic_default = {
 
 	.get_apic_id			= default_get_apic_id,
 	.set_apic_id			= NULL,
-	.apic_id_mask			= 0x0F << 24,
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index aca8b75c1552..24170d0809ba 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -270,7 +270,6 @@ static struct apic apic_x2apic_cluster = {
 
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
-	.apic_id_mask			= 0xFFFFFFFFu,
 
 	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a1242e2c12e6..4f13f54f1b1f 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -126,7 +126,6 @@ static struct apic apic_x2apic_phys = {
 
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
-	.apic_id_mask			= 0xFFFFFFFFu,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 29003154fafd..64dd38fbf218 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -582,7 +582,6 @@ static struct apic __refdata apic_x2apic_uv_x = {
 
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= set_apic_id,
-	.apic_id_mask			= 0xFFFFFFFFu,
 
 	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,
 
@@ -919,7 +918,7 @@ static void uv_heartbeat(unsigned long ignored)
 	uv_set_scir_bits(bits);
 
 	/* enable next timer period */
-	mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
 }
 
 static void uv_heartbeat_enable(int cpu)
@@ -928,7 +927,7 @@ static void uv_heartbeat_enable(int cpu)
 		struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
 
 		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
-		setup_timer(timer, uv_heartbeat, cpu);
+		setup_pinned_timer(timer, uv_heartbeat, cpu);
 		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
 		add_timer_on(timer, cpu);
 		uv_cpu_scir_info(cpu)->enabled = 1;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 674134e9f5e5..2bd5c6ff7ee7 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -31,7 +31,9 @@ void common(void) {
 	BLANK();
 	OFFSET(TI_flags, thread_info, flags);
 	OFFSET(TI_status, thread_info, status);
-	OFFSET(TI_addr_limit, thread_info, addr_limit);
+
+	BLANK();
+	OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
 
 	BLANK();
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0fe6953f421c..d22a7b9c4f0e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1452,7 +1452,7 @@ void cpu_init(void)
 	struct task_struct *me;
 	struct tss_struct *t;
 	unsigned long v;
-	int cpu = stack_smp_processor_id();
+	int cpu = raw_smp_processor_id();
 	int i;
 
 	wait_for_master_cpu(cpu);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 6e2ffbebbcdb..c1a89bc026ac 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -300,15 +300,14 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
 	}
 
 	/*
-	 * P4 Xeon errata 037 workaround.
+	 * P4 Xeon erratum 037 workaround.
 	 * Hardware prefetcher may cause stale data to be loaded into the cache.
 	 */
 	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
 		if (msr_set_bit(MSR_IA32_MISC_ENABLE,
-				MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
-		    > 0) {
+				MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
 			pr_info("CPU: C0 stepping P4 Xeon detected.\n");
-			pr_info("CPU: Disabling hardware prefetching (Errata 037)\n");
+			pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n");
 		}
 	}
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 34c89a3e8260..83f1a98d37db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -46,7 +46,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 		return;
 
 	mce_setup(&m);
-	m.bank = 1;
+	m.bank = -1;
 	/* Fake a memory read error with unknown channel */
 	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 92e5e37d97bf..79d8ec849468 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -425,7 +425,7 @@ static u64 mce_rdmsrl(u32 msr)
 	}
 
 	if (rdmsrl_safe(msr, &v)) {
-		WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
+		WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
 		/*
 		 * Return zero in case the access faulted. This should
 		 * not happen normally but can happen if the CPU does
@@ -1309,7 +1309,7 @@ static void __restart_timer(struct timer_list *t, unsigned long interval)
 
 	if (timer_pending(t)) {
 		if (time_before(when, t->expires))
-			mod_timer_pinned(t, when);
+			mod_timer(t, when);
 	} else {
 		t->expires = round_jiffies(when);
 		add_timer_on(t, smp_processor_id());
@@ -1735,7 +1735,7 @@ static void __mcheck_cpu_init_timer(void)
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	unsigned int cpu = smp_processor_id();
 
-	setup_timer(t, mce_timer_fn, cpu);
+	setup_pinned_timer(t, mce_timer_fn, cpu);
 	mce_start_timer(cpu, t);
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 10b0661651e0..7b7f3be783d4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -93,7 +93,7 @@ const char * const amd_df_mcablock_names[] = {
 EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
 
 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
-static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
+static DEFINE_PER_CPU(unsigned int, bank_map);	/* see which banks are on */
 
 static void amd_threshold_interrupt(void);
 static void amd_deferred_error_interrupt(void);
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index f6f50c4ceaec..cfa97ff67bda 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -39,9 +39,9 @@ __setup("nordrand", x86_rdrand_setup);
  */
 #define SANITY_CHECK_LOOPS 8
 
+#ifdef CONFIG_ARCH_RANDOM
 void x86_init_rdrand(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_ARCH_RANDOM
 	unsigned long tmp;
 	int i;
 
@@ -55,5 +55,5 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
 			return;
 		}
 	}
-#endif
 }
+#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2bb25c3fe2e8..92e8f0a7159c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -42,16 +42,14 @@ void printk_address(unsigned long address)
 static void
 print_ftrace_graph_addr(unsigned long addr, void *data,
 			const struct stacktrace_ops *ops,
-			struct thread_info *tinfo, int *graph)
+			struct task_struct *task, int *graph)
 {
-	struct task_struct *task;
 	unsigned long ret_addr;
 	int index;
 
 	if (addr != (unsigned long)return_to_handler)
 		return;
 
-	task = tinfo->task;
 	index = task->curr_ret_stack;
 
 	if (!task->ret_stack || index < *graph)
@@ -68,7 +66,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
 static inline void
 print_ftrace_graph_addr(unsigned long addr, void *data,
 			const struct stacktrace_ops *ops,
-			struct thread_info *tinfo, int *graph)
+			struct task_struct *task, int *graph)
 { }
 #endif
 
@@ -79,28 +77,36 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-static inline int valid_stack_ptr(struct thread_info *tinfo,
+static inline int valid_stack_ptr(struct task_struct *task,
 			void *p, unsigned int size, void *end)
 {
-	void *t = tinfo;
+	void *t = task_stack_page(task);
 	if (end) {
 		if (p < end && p >= (end-THREAD_SIZE))
 			return 1;
 		else
 			return 0;
 	}
-	return p > t && p < t + THREAD_SIZE - size;
+	return p >= t && p < t + THREAD_SIZE - size;
 }
 
 unsigned long
-print_context_stack(struct thread_info *tinfo,
+print_context_stack(struct task_struct *task,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data,
 		unsigned long *end, int *graph)
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
-	while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+	/*
+	 * If we overflowed the stack into a guard page, jump back to the
+	 * bottom of the usable stack.
+	 */
+	if ((unsigned long)task_stack_page(task) - (unsigned long)stack <
+	    PAGE_SIZE)
+		stack = (unsigned long *)task_stack_page(task);
+
+	while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
 		unsigned long addr;
 
 		addr = *stack;
@@ -112,7 +118,7 @@ print_context_stack(struct thread_info *tinfo,
 			} else {
 				ops->address(data, addr, 0);
 			}
-			print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+			print_ftrace_graph_addr(addr, data, ops, task, graph);
 		}
 		stack++;
 	}
@@ -121,7 +127,7 @@ print_context_stack(struct thread_info *tinfo,
 EXPORT_SYMBOL_GPL(print_context_stack);
 
 unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
+print_context_stack_bp(struct task_struct *task,
 		       unsigned long *stack, unsigned long bp,
 		       const struct stacktrace_ops *ops, void *data,
 		       unsigned long *end, int *graph)
@@ -129,7 +135,7 @@ print_context_stack_bp(struct thread_info *tinfo,
 	struct stack_frame *frame = (struct stack_frame *)bp;
 	unsigned long *ret_addr = &frame->return_address;
 
-	while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
+	while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
 		unsigned long addr = *ret_addr;
 
 		if (!__kernel_text_address(addr))
@@ -139,7 +145,7 @@ print_context_stack_bp(struct thread_info *tinfo,
 			break;
 		frame = frame->next_frame;
 		ret_addr = &frame->return_address;
-		print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+		print_ftrace_graph_addr(addr, data, ops, task, graph);
 	}
 
 	return (unsigned long)frame;
@@ -199,6 +205,11 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 	show_stack_log_lvl(task, NULL, sp, bp, "");
 }
 
+void show_stack_regs(struct pt_regs *regs)
+{
+	show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, "");
+}
+
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
@@ -228,6 +239,8 @@ unsigned long oops_begin(void)
 EXPORT_SYMBOL_GPL(oops_begin);
 NOKPROBE_SYMBOL(oops_begin);
 
+void __noreturn rewind_stack_do_exit(int signr);
+
 void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
 	if (regs && kexec_should_crash(current))
@@ -249,7 +262,13 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
 		panic("Fatal exception");
-	do_exit(signr);
+
+	/*
+	 * We're not going to return, but we might be on an IST stack or
+	 * have very little stack space left.  Rewind the stack and kill
+	 * the task.
+	 */
+	rewind_stack_do_exit(signr);
 }
 NOKPROBE_SYMBOL(oops_end);
 
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 464ffd69b92e..948d77da3881 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -61,15 +61,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		bp = stack_frame(task, regs);
 
 	for (;;) {
-		struct thread_info *context;
 		void *end_stack;
 
 		end_stack = is_hardirq_stack(stack, cpu);
 		if (!end_stack)
 			end_stack = is_softirq_stack(stack, cpu);
 
-		context = task_thread_info(task);
-		bp = ops->walk_stack(context, stack, bp, ops, data,
+		bp = ops->walk_stack(task, stack, bp, ops, data,
 				     end_stack, &graph);
 
 		/* Stop if not on irq stack */
@@ -98,7 +96,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	int i;
 
 	if (sp == NULL) {
-		if (task)
+		if (regs)
+			sp = (unsigned long *)regs->sp;
+		else if (task)
 			sp = (unsigned long *)task->thread.sp;
 		else
 			sp = (unsigned long *)&sp;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5f1c6266eb30..6dede08dd98b 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -153,7 +153,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
-	struct thread_info *tinfo;
 	unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
 	unsigned long dummy;
 	unsigned used = 0;
@@ -179,7 +178,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	 * current stack address. If the stacks consist of nested
 	 * exceptions
 	 */
-	tinfo = task_thread_info(task);
 	while (!done) {
 		unsigned long *stack_end;
 		enum stack_type stype;
@@ -202,7 +200,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 			if (ops->stack(data, id) < 0)
 				break;
 
-			bp = ops->walk_stack(tinfo, stack, bp, ops,
+			bp = ops->walk_stack(task, stack, bp, ops,
 					     data, stack_end, &graph);
 			ops->stack(data, "<EOE>");
 			/*
@@ -218,7 +216,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 
 			if (ops->stack(data, "IRQ") < 0)
 				break;
-			bp = ops->walk_stack(tinfo, stack, bp,
+			bp = ops->walk_stack(task, stack, bp,
 				     ops, data, stack_end, &graph);
 			/*
 			 * We link to the next stack (which would be
@@ -240,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	/*
 	 * This handles the process stack:
 	 */
-	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
+	bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph);
 	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);
@@ -266,7 +264,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	 * back trace for this cpu:
 	 */
 	if (sp == NULL) {
-		if (task)
+		if (regs)
+			sp = (unsigned long *)regs->sp;
+		else if (task)
 			sp = (unsigned long *)task->thread.sp;
 		else
 			sp = (unsigned long *)&sp;
@@ -274,6 +274,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 	stack = sp;
 	for (i = 0; i < kstack_depth_to_print; i++) {
+		unsigned long word;
+
 		if (stack >= irq_stack && stack <= irq_stack_end) {
 			if (stack == irq_stack_end) {
 				stack = (unsigned long *) (irq_stack_end[-1]);
@@ -283,12 +285,18 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		if (kstack_end(stack))
 			break;
 		}
+
+		if (probe_kernel_address(stack, word))
+			break;
+
 		if ((i % STACKSLOTS_PER_LINE) == 0) {
 			if (i != 0)
 				pr_cont("\n");
-			printk("%s %016lx", log_lvl, *stack++);
+			printk("%s %016lx", log_lvl, word);
 		} else
-			pr_cont(" %016lx", *stack++);
+			pr_cont(" %016lx", word);
+
+		stack++;
 		touch_nmi_watchdog();
 	}
 	preempt_enable();
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index bca14c899137..57b71373bae3 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -11,7 +11,11 @@
 
 #include <linux/pci.h>
 #include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
 #include <linux/pci_ids.h>
+#include <linux/bcma/bcma.h>
+#include <linux/bcma/bcma_regs.h>
 #include <drm/i915_drm.h>
 #include <asm/pci-direct.h>
 #include <asm/dma.h>
@@ -21,6 +25,9 @@
 #include <asm/iommu.h>
 #include <asm/gart.h>
 #include <asm/irq_remapping.h>
+#include <asm/early_ioremap.h>
+
+#define dev_err(msg)  pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
@@ -75,6 +82,13 @@ static void __init nvidia_bugs(int num, int slot, int func)
 {
 #ifdef CONFIG_ACPI
 #ifdef CONFIG_X86_IO_APIC
+	/*
+	 * Only applies to Nvidia root ports (bus 0) and not to
+	 * Nvidia graphics cards with PCI ports on secondary buses.
+	 */
+	if (num)
+		return;
+
 	/*
 	 * All timer overrides on Nvidia are
 	 * wrong unless HPET is enabled.
@@ -590,6 +604,61 @@ static void __init force_disable_hpet(int num, int slot, int func)
 #endif
 }
 
+#define BCM4331_MMIO_SIZE	16384
+#define BCM4331_PM_CAP		0x40
+#define bcma_aread32(reg)	ioread32(mmio + 1 * BCMA_CORE_SIZE + reg)
+#define bcma_awrite32(reg, val)	iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg)
+
+static void __init apple_airport_reset(int bus, int slot, int func)
+{
+	void __iomem *mmio;
+	u16 pmcsr;
+	u64 addr;
+	int i;
+
+	if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc."))
+		return;
+
+	/* Card may have been put into PCI_D3hot by grub quirk */
+	pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
+
+	if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
+		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+		write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr);
+		mdelay(10);
+
+		pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
+		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
+			dev_err("Cannot power up Apple AirPort card\n");
+			return;
+		}
+	}
+
+	addr  =      read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+	addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32;
+	addr &= PCI_BASE_ADDRESS_MEM_MASK;
+
+	mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
+	if (!mmio) {
+		dev_err("Cannot iomap Apple AirPort card\n");
+		return;
+	}
+
+	pr_info("Resetting Apple AirPort card (left enabled by EFI)\n");
+
+	for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++)
+		udelay(10);
+
+	bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET);
+	bcma_aread32(BCMA_RESET_CTL);
+	udelay(1);
+
+	bcma_awrite32(BCMA_RESET_CTL, 0);
+	bcma_aread32(BCMA_RESET_CTL);
+	udelay(10);
+
+	early_iounmap(mmio, BCM4331_MMIO_SIZE);
+}
 
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
@@ -603,12 +672,6 @@ struct chipset {
 	void (*f)(int num, int slot, int func);
 };
 
-/*
- * Only works for devices on the root bus. If you add any devices
- * not on bus 0 readd another loop level in early_quirks(). But
- * be careful because at least the Nvidia quirk here relies on
- * only matching on bus 0.
- */
 static struct chipset early_qrk[] __initdata = {
 	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
 	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
@@ -638,9 +701,13 @@ static struct chipset early_qrk[] __initdata = {
 	 */
 	{ PCI_VENDOR_ID_INTEL, 0x0f00,
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
+	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
 	{}
 };
 
+static void __init early_pci_scan_bus(int bus);
+
 /**
  * check_dev_quirk - apply early quirks to a given PCI device
  * @num: bus number
@@ -649,7 +716,7 @@ static struct chipset early_qrk[] __initdata = {
  *
  * Check the vendor & device ID against the early quirks table.
  *
- * If the device is single function, let early_quirks() know so we don't
+ * If the device is single function, let early_pci_scan_bus() know so we don't
  * poke at this device again.
  */
 static int __init check_dev_quirk(int num, int slot, int func)
@@ -658,6 +725,7 @@ static int __init check_dev_quirk(int num, int slot, int func)
 	u16 vendor;
 	u16 device;
 	u8 type;
+	u8 sec;
 	int i;
 
 	class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
@@ -685,25 +753,36 @@ static int __init check_dev_quirk(int num, int slot, int func)
 
 	type = read_pci_config_byte(num, slot, func,
 				    PCI_HEADER_TYPE);
+
+	if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) {
+		sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS);
+		if (sec > num)
+			early_pci_scan_bus(sec);
+	}
+
 	if (!(type & 0x80))
 		return -1;
 
 	return 0;
 }
 
-void __init early_quirks(void)
+static void __init early_pci_scan_bus(int bus)
 {
 	int slot, func;
 
-	if (!early_pci_allowed())
-		return;
-
 	/* Poor man's PCI discovery */
-	/* Only scan the root bus */
 	for (slot = 0; slot < 32; slot++)
 		for (func = 0; func < 8; func++) {
 			/* Only probe function 0 on single fn devices */
-			if (check_dev_quirk(0, slot, func))
+			if (check_dev_quirk(bus, slot, func))
 				break;
 		}
 }
+
+void __init early_quirks(void)
+{
+	if (!early_pci_allowed())
+		return;
+
+	early_pci_scan_bus(0);
+}
diff --git a/arch/x86/kernel/ebda.c b/arch/x86/kernel/ebda.c
index afe65dffee80..4312f8ae71b7 100644
--- a/arch/x86/kernel/ebda.c
+++ b/arch/x86/kernel/ebda.c
@@ -6,66 +6,92 @@
 #include <asm/bios_ebda.h>
 
 /*
+ * This function reserves all conventional PC system BIOS related
+ * firmware memory areas (some of which are data, some of which
+ * are code), that must not be used by the kernel as available
+ * RAM.
+ *
  * The BIOS places the EBDA/XBDA at the top of conventional
  * memory, and usually decreases the reported amount of
- * conventional memory (int 0x12) too. This also contains a
- * workaround for Dell systems that neglect to reserve EBDA.
- * The same workaround also avoids a problem with the AMD768MPX
- * chipset: reserve a page before VGA to prevent PCI prefetch
- * into it (errata #56). Usually the page is reserved anyways,
- * unless you have no PS/2 mouse plugged in.
+ * conventional memory (int 0x12) too.
+ *
+ * This means that as a first approximation on most systems we can
+ * guess the reserved BIOS area by looking at the low BIOS RAM size
+ * value and assume that everything above that value (up to 1MB) is
+ * reserved.
+ *
+ * But life in firmware country is not that simple:
+ *
+ * - This code also contains a quirk for Dell systems that neglect
+ *   to reserve the EBDA area in the 'RAM size' value ...
+ *
+ * - The same quirk also avoids a problem with the AMD768MPX
+ *   chipset: reserve a page before VGA to prevent PCI prefetch
+ *   into it (errata #56). (Usually the page is reserved anyways,
+ *   unless you have no PS/2 mouse plugged in.)
+ *
+ * - Plus paravirt systems don't have a reliable value in the
+ *   'BIOS RAM size' pointer we can rely on, so we must quirk
+ *   them too.
+ *
+ * Due to those various problems this function is deliberately
+ * very conservative and tries to err on the side of reserving
+ * too much, to not risk reserving too little.
+ *
+ * Losing a small amount of memory in the bottom megabyte is
+ * rarely a problem, as long as we have enough memory to install
+ * the SMP bootup trampoline which *must* be in this area.
  *
- * This functions is deliberately very conservative.  Losing
- * memory in the bottom megabyte is rarely a problem, as long
- * as we have enough memory to install the trampoline.  Using
- * memory that is in use by the BIOS or by some DMA device
- * the BIOS didn't shut down *is* a big problem.
+ * Using memory that is in use by the BIOS or by some DMA device
+ * the BIOS didn't shut down *is* a big problem to the kernel,
+ * obviously.
  */
 
-#define BIOS_LOWMEM_KILOBYTES	0x413
-#define LOWMEM_CAP		0x9f000U	/* Absolute maximum */
-#define INSANE_CUTOFF		0x20000U	/* Less than this = insane */
+#define BIOS_RAM_SIZE_KB_PTR	0x413
 
-void __init reserve_ebda_region(void)
+#define BIOS_START_MIN		0x20000U	/* 128K, less than this is insane */
+#define BIOS_START_MAX		0x9f000U	/* 640K, absolute maximum */
+
+void __init reserve_bios_regions(void)
 {
-	unsigned int lowmem, ebda_addr;
+	unsigned int bios_start, ebda_start;
 
 	/*
-	 * To determine the position of the EBDA and the
-	 * end of conventional memory, we need to look at
-	 * the BIOS data area. In a paravirtual environment
-	 * that area is absent. We'll just have to assume
-	 * that the paravirt case can handle memory setup
-	 * correctly, without our help.
+	 * NOTE: In a paravirtual environment the BIOS reserved
+	 * area is absent. We'll just have to assume that the
+	 * paravirt case can handle memory setup correctly,
+	 * without our help.
 	 */
-	if (!x86_platform.legacy.ebda_search)
+	if (!x86_platform.legacy.reserve_bios_regions)
 		return;
 
-	/* end of low (conventional) memory */
-	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
-	lowmem <<= 10;
-
-	/* start of EBDA area */
-	ebda_addr = get_bios_ebda();
-
 	/*
-	 * Note: some old Dells seem to need 4k EBDA without
-	 * reporting so, so just consider the memory above 0x9f000
-	 * to be off limits (bugzilla 2990).
+	 * BIOS RAM size is encoded in kilobytes, convert it
+	 * to bytes to get a first guess at where the BIOS
+	 * firmware area starts:
 	 */
+	bios_start = *(unsigned short *)__va(BIOS_RAM_SIZE_KB_PTR);
+	bios_start <<= 10;
 
-	/* If the EBDA address is below 128K, assume it is bogus */
-	if (ebda_addr < INSANE_CUTOFF)
-		ebda_addr = LOWMEM_CAP;
+	/*
+	 * If bios_start is less than 128K, assume it is bogus
+	 * and bump it up to 640K.  Similarly, if bios_start is above 640K,
+	 * don't trust it.
+	 */
+	if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
+		bios_start = BIOS_START_MAX;
 
-	/* If lowmem is less than 128K, assume it is bogus */
-	if (lowmem < INSANE_CUTOFF)
-		lowmem = LOWMEM_CAP;
+	/* Get the start address of the EBDA page: */
+	ebda_start = get_bios_ebda();
 
-	/* Use the lower of the lowmem and EBDA markers as the cutoff */
-	lowmem = min(lowmem, ebda_addr);
-	lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */
+	/*
+	 * If the EBDA start address is sane and is below the BIOS region,
+	 * then also reserve everything from the EBDA start address up to
+	 * the BIOS region.
+	 */
+	if (ebda_start >= BIOS_START_MIN && ebda_start < bios_start)
+		bios_start = ebda_start;
 
-	/* reserve all memory between lowmem and the 1MB mark */
-	memblock_reserve(lowmem, 0x100000 - lowmem);
+	/* Reserve all memory between bios_start and the 1MB mark: */
+	memblock_reserve(bios_start, 0x100000 - bios_start);
 }
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 4d38416e2a7f..04f89caef9c4 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -57,7 +57,7 @@
 # error "Need more than one PGD for the ESPFIX hack"
 #endif
 
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 /* This contains the *bottom* address of the espfix stack */
 DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 97027545a72d..3fc03a09a93b 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -8,10 +8,14 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/regset.h>
 #include <asm/fpu/signal.h>
+#include <asm/fpu/types.h>
 #include <asm/traps.h>
 
 #include <linux/hardirq.h>
 
+#define CREATE_TRACE_POINTS
+#include <asm/trace/fpu.h>
+
 /*
  * Represents the initial FPU state. It's mostly (but not completely) zeroes,
  * depending on the FPU hardware format:
@@ -192,6 +196,7 @@ void fpu__save(struct fpu *fpu)
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	preempt_disable();
+	trace_x86_fpu_before_save(fpu);
 	if (fpu->fpregs_active) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			if (use_eager_fpu())
@@ -200,6 +205,7 @@ void fpu__save(struct fpu *fpu)
 				fpregs_deactivate(fpu);
 		}
 	}
+	trace_x86_fpu_after_save(fpu);
 	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(fpu__save);
@@ -222,7 +228,14 @@ void fpstate_init(union fpregs_state *state)
 		return;
 	}
 
-	memset(state, 0, xstate_size);
+	memset(state, 0, fpu_kernel_xstate_size);
+
+	/*
+	 * XRSTORS requires that this bit is set in xcomp_bv, or
+	 * it will #GP. Make sure it is replaced after the memset().
+	 */
+	if (static_cpu_has(X86_FEATURE_XSAVES))
+		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
@@ -247,7 +260,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * leak into the child task:
 	 */
 	if (use_eager_fpu())
-		memset(&dst_fpu->state.xsave, 0, xstate_size);
+		memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 
 	/*
 	 * Save current FPU registers directly into the child
@@ -266,7 +279,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 */
 	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+		memcpy(&src_fpu->state, &dst_fpu->state,
+		       fpu_kernel_xstate_size);
 
 		if (use_eager_fpu())
 			copy_kernel_to_fpregs(&src_fpu->state);
@@ -275,6 +289,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	}
 	preempt_enable();
 
+	trace_x86_fpu_copy_src(src_fpu);
+	trace_x86_fpu_copy_dst(dst_fpu);
+
 	return 0;
 }
 
@@ -288,7 +305,9 @@ void fpu__activate_curr(struct fpu *fpu)
 
 	if (!fpu->fpstate_active) {
 		fpstate_init(&fpu->state);
+		trace_x86_fpu_init_state(fpu);
 
+		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for the current task: */
 		fpu->fpstate_active = 1;
 	}
@@ -314,7 +333,9 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 	} else {
 		if (!fpu->fpstate_active) {
 			fpstate_init(&fpu->state);
+			trace_x86_fpu_init_state(fpu);
 
+			trace_x86_fpu_activate_state(fpu);
 			/* Safe to do for current and for stopped child tasks: */
 			fpu->fpstate_active = 1;
 		}
@@ -347,7 +368,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 		fpu->last_cpu = -1;
 	} else {
 		fpstate_init(&fpu->state);
+		trace_x86_fpu_init_state(fpu);
 
+		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for stopped child tasks: */
 		fpu->fpstate_active = 1;
 	}
@@ -432,9 +455,11 @@ void fpu__restore(struct fpu *fpu)
 
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 	kernel_fpu_disable();
+	trace_x86_fpu_before_restore(fpu);
 	fpregs_activate(fpu);
 	copy_kernel_to_fpregs(&fpu->state);
 	fpu->counter++;
+	trace_x86_fpu_after_restore(fpu);
 	kernel_fpu_enable();
 }
 EXPORT_SYMBOL_GPL(fpu__restore);
@@ -463,6 +488,8 @@ void fpu__drop(struct fpu *fpu)
 
 	fpu->fpstate_active = 0;
 
+	trace_x86_fpu_dropped(fpu);
+
 	preempt_enable();
 }
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index aacfd7a82cec..93982aebb398 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void)
  * This is inherent to the XSAVE architecture which puts all state
  * components into a single, continuous memory block:
  */
-unsigned int xstate_size;
-EXPORT_SYMBOL_GPL(xstate_size);
+unsigned int fpu_kernel_xstate_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * Add back the dynamically-calculated register state
 	 * size.
 	 */
-	task_size += xstate_size;
+	task_size += fpu_kernel_xstate_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate sizes based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	on_boot_cpu = 0;
 
 	/*
-	 * Note that xstate_size might be overwriten later during
+	 * Note that xstate sizes might be overwritten later during
 	 * fpu__init_system_xstate().
 	 */
 
@@ -219,27 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-		xstate_size = sizeof(struct swregs_state);
+		fpu_kernel_xstate_size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			xstate_size = sizeof(struct fxregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fxregs_state);
 		else
-			xstate_size = sizeof(struct fregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fregs_state);
 	}
-	/*
-	 * Quirk: we don't yet handle the XSAVES* instructions
-	 * correctly, as we don't correctly convert between
-	 * standard and compacted format when interfacing
-	 * with user-space - so disable it for now.
-	 *
-	 * The difference is small: with recent CPUs the
-	 * compacted format is only marginally smaller than
-	 * the standard FPU state format.
-	 *
-	 * ( This is easy to backport while we are fixing
-	 *   XSAVES* support. )
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+	fpu_user_xstate_size = fpu_kernel_xstate_size;
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 81422dfb152b..c114b132d121 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -4,6 +4,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
@@ -85,21 +86,26 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 
-	fpu__activate_fpstate_read(fpu);
-
 	xsave = &fpu->state.xsave;
 
-	/*
-	 * Copy the 48bytes defined by the software first into the xstate
-	 * memory layout in the thread struct, so that we can copy the entire
-	 * xstateregs to the user using one user_regset_copyout().
-	 */
-	memcpy(&xsave->i387.sw_reserved,
-		xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-	/*
-	 * Copy the xstate memory layout.
-	 */
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	fpu__activate_fpstate_read(fpu);
+
+	if (using_compacted_format()) {
+		ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+	} else {
+		fpstate_sanitize_xstate(fpu);
+		/*
+		 * Copy the 48 bytes defined by the software into the xsave
+		 * area in the thread struct, so that we can copy the whole
+		 * area to user using one user_regset_copyout().
+		 */
+		memcpy(&xsave->i387.sw_reserved, xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
+
+		/*
+		 * Copy the xstate memory layout.
+		 */
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	}
 	return ret;
 }
 
@@ -114,11 +120,27 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 
-	fpu__activate_fpstate_write(fpu);
+	/*
+	 * A whole standard-format XSAVE buffer is needed:
+	 */
+	if ((pos != 0) || (count < fpu_user_xstate_size))
+		return -EFAULT;
 
 	xsave = &fpu->state.xsave;
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	fpu__activate_fpstate_write(fpu);
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		ret = copyin_to_xsaves(kbuf, ubuf, xsave);
+	else
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+
+	/*
+	 * In case of failure, mark all states as init:
+	 */
+	if (ret)
+		fpstate_init(&fpu->state);
+
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 31c6a60505e6..9e231d88bb33 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -8,8 +8,10 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/sigframe.h>
+#include <asm/trace/fpu.h>
 
 static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
 
@@ -31,7 +33,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf,
 	/* Check for the first magic field and other error scenarios. */
 	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
 	    fx_sw->xstate_size < min_xstate_size ||
-	    fx_sw->xstate_size > xstate_size ||
+	    fx_sw->xstate_size > fpu_user_xstate_size ||
 	    fx_sw->xstate_size > fx_sw->extended_size)
 		return -1;
 
@@ -88,7 +90,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 	if (!use_xsave())
 		return err;
 
-	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+	err |= __put_user(FP_XSTATE_MAGIC2,
+			  (__u32 *)(buf + fpu_user_xstate_size));
 
 	/*
 	 * Read the xfeatures which we copied (directly from the cpu or
@@ -125,7 +128,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
 	else
 		err = copy_fregs_to_user((struct fregs_state __user *) buf);
 
-	if (unlikely(err) && __clear_user(buf, xstate_size))
+	if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
 		err = -EFAULT;
 	return err;
 }
@@ -167,7 +170,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active()) {
+	if (fpregs_active() || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
@@ -175,8 +178,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 		if (ia32_fxstate)
 			copy_fxregs_to_kernel(&tsk->thread.fpu);
 	} else {
+		/*
+		 * It is a *bug* if kernel uses compacted-format for xsave
+		 * area and we copy it out directly to a signal frame. It
+		 * should have been handled above by saving the registers
+		 * directly.
+		 */
+		if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+			WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
+			return -1;
+		}
+
 		fpstate_sanitize_xstate(&tsk->thread.fpu);
-		if (__copy_to_user(buf_fx, xsave, xstate_size))
+		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
 
@@ -250,7 +264,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
-	int state_size = xstate_size;
+	int state_size = fpu_kernel_xstate_size;
 	u64 xfeatures = 0;
 	int fx_only = 0;
 
@@ -282,6 +296,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			 */
 			state_size = sizeof(struct fxregs_state);
 			fx_only = 1;
+			trace_x86_fpu_xstate_check_failed(fpu);
 		} else {
 			state_size = fx_sw_user.xstate_size;
 			xfeatures = fx_sw_user.xfeatures;
@@ -308,9 +323,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		fpu__drop(fpu);
 
-		if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) ||
-		    __copy_from_user(&env, buf, sizeof(env))) {
+		if (using_compacted_format()) {
+			err = copyin_to_xsaves(NULL, buf_fx,
+					       &fpu->state.xsave);
+		} else {
+			err = __copy_from_user(&fpu->state.xsave,
+					       buf_fx, state_size);
+		}
+
+		if (err || __copy_from_user(&env, buf, sizeof(env))) {
 			fpstate_init(&fpu->state);
+			trace_x86_fpu_init_state(fpu);
 			err = -1;
 		} else {
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
@@ -341,7 +364,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 static inline int xstate_sigframe_size(void)
 {
-	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+	return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+			fpu_user_xstate_size;
 }
 
 /*
@@ -385,12 +409,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+	int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xfeatures = xfeatures_mask;
-	fx_sw_reserved.xstate_size = xstate_size;
+	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
 
 	if (config_enabled(CONFIG_IA32_EMULATION) ||
 	    config_enabled(CONFIG_X86_32)) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4ea2a59483c7..680049aa4593 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -11,6 +11,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/tlbflush.h>
 
@@ -43,6 +44,13 @@ static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] =
 static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] = -1};
 static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
 
+/*
+ * The XSAVE area of kernel can be in standard or compacted format;
+ * it is always in standard format for user mode. This is the user
+ * mode standard format size used for signal and ptrace frames.
+ */
+unsigned int fpu_user_xstate_size;
+
 /*
  * Clear all of the X86_FEATURE_* bits that are unavailable
  * when the CPU has no XSAVE support.
@@ -105,6 +113,27 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
 }
 EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
 
+static int xfeature_is_supervisor(int xfeature_nr)
+{
+	/*
+	 * We currently do not support supervisor states, but if
+	 * we did, we could find out like this.
+	 *
+	 * SDM says: If state component 'i' is a user state component,
+	 * ECX[0] return 0; if state component i is a supervisor
+	 * state component, ECX[0] returns 1.
+	 */
+	u32 eax, ebx, ecx, edx;
+
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	return !!(ecx & 1);
+}
+
+static int xfeature_is_user(int xfeature_nr)
+{
+	return !xfeature_is_supervisor(xfeature_nr);
+}
+
 /*
  * When executing XSAVEOPT (or other optimized XSAVE instructions), if
  * a processor implementation detects that an FPU state component is still
@@ -171,7 +200,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	 */
 	while (xfeatures) {
 		if (xfeatures & 0x1) {
-			int offset = xstate_offsets[feature_bit];
+			int offset = xstate_comp_offsets[feature_bit];
 			int size = xstate_sizes[feature_bit];
 
 			memcpy((void *)fx + offset,
@@ -192,6 +221,15 @@ void fpu__init_cpu_xstate(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
 		return;
+	/*
+	 * Make it clear that XSAVES supervisor states are not yet
+	 * implemented should anyone expect it to work by changing
+	 * bits in XFEATURE_MASK_* macros and XCR0.
+	 */
+	WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
+		"x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+
+	xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
 
 	cr4_set_bits(X86_CR4_OSXSAVE);
 	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
@@ -217,13 +255,29 @@ static void __init setup_xstate_features(void)
 	/* start at the beginnning of the "extended state" */
 	unsigned int last_good_offset = offsetof(struct xregs_state,
 						 extended_state_area);
+	/*
+	 * The FP xstates and SSE xstates are legacy states. They are always
+	 * in the fixed offsets in the xsave area in either compacted form
+	 * or standard form.
+	 */
+	xstate_offsets[0] = 0;
+	xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
+	xstate_offsets[1] = xstate_sizes[0];
+	xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space);
 
 	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 		if (!xfeature_enabled(i))
 			continue;
 
 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
-		xstate_offsets[i] = ebx;
+
+		/*
+		 * If an xfeature is supervisor state, the offset
+		 * in EBX is invalid. We leave it to -1.
+		 */
+		if (xfeature_is_user(i))
+			xstate_offsets[i] = ebx;
+
 		xstate_sizes[i] = eax;
 		/*
 		 * In our xstate size checks, we assume that the
@@ -233,8 +287,6 @@ static void __init setup_xstate_features(void)
 		WARN_ONCE(last_good_offset > xstate_offsets[i],
 			"x86/fpu: misordered xstate at %d\n", last_good_offset);
 		last_good_offset = xstate_offsets[i];
-
-		printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax);
 	}
 }
 
@@ -262,6 +314,33 @@ static void __init print_xstate_features(void)
 	print_xstate_feature(XFEATURE_MASK_PKRU);
 }
 
+/*
+ * This check is important because it is easy to get XSTATE_*
+ * confused with XSTATE_BIT_*.
+ */
+#define CHECK_XFEATURE(nr) do {		\
+	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
+	WARN_ON(nr >= XFEATURE_MAX);	\
+} while (0)
+
+/*
+ * We could cache this like xstate_size[], but we only use
+ * it here, so it would be a waste of space.
+ */
+static int xfeature_is_aligned(int xfeature_nr)
+{
+	u32 eax, ebx, ecx, edx;
+
+	CHECK_XFEATURE(xfeature_nr);
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	/*
+	 * The value returned by ECX[1] indicates the alignment
+	 * of state component 'i' when the compacted format
+	 * of the extended region of an XSAVE area is used:
+	 */
+	return !!(ecx & 2);
+}
+
 /*
  * This function sets up offsets and sizes of all extended states in
  * xsave area. This supports both standard format and compacted format
@@ -299,10 +378,29 @@ static void __init setup_xstate_comp(void)
 		else
 			xstate_comp_sizes[i] = 0;
 
-		if (i > FIRST_EXTENDED_XFEATURE)
+		if (i > FIRST_EXTENDED_XFEATURE) {
 			xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
 					+ xstate_comp_sizes[i-1];
 
+			if (xfeature_is_aligned(i))
+				xstate_comp_offsets[i] =
+					ALIGN(xstate_comp_offsets[i], 64);
+		}
+	}
+}
+
+/*
+ * Print out xstate component offsets and sizes
+ */
+static void __init print_xstate_offset_size(void)
+{
+	int i;
+
+	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		if (!xfeature_enabled(i))
+			continue;
+		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
+			 i, xstate_comp_offsets[i], i, xstate_sizes[i]);
 	}
 }
 
@@ -322,13 +420,11 @@ static void __init setup_init_fpu_buf(void)
 	setup_xstate_features();
 	print_xstate_features();
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
-		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
-	}
 
 	/*
-	 * Init all the features state with header_bv being 0x0
+	 * Init all the features state with header.xfeatures being 0x0
 	 */
 	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
 
@@ -339,58 +435,19 @@ static void __init setup_init_fpu_buf(void)
 	copy_xregs_to_kernel_booting(&init_fpstate.xsave);
 }
 
-static int xfeature_is_supervisor(int xfeature_nr)
-{
-	/*
-	 * We currently do not support supervisor states, but if
-	 * we did, we could find out like this.
-	 *
-	 * SDM says: If state component i is a user state component,
-	 * ECX[0] return 0; if state component i is a supervisor
-	 * state component, ECX[0] returns 1.
-	u32 eax, ebx, ecx, edx;
-	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx;
-	return !!(ecx & 1);
-	*/
-	return 0;
-}
-/*
-static int xfeature_is_user(int xfeature_nr)
-{
-	return !xfeature_is_supervisor(xfeature_nr);
-}
-*/
-
-/*
- * This check is important because it is easy to get XSTATE_*
- * confused with XSTATE_BIT_*.
- */
-#define CHECK_XFEATURE(nr) do {		\
-	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
-	WARN_ON(nr >= XFEATURE_MAX);	\
-} while (0)
-
-/*
- * We could cache this like xstate_size[], but we only use
- * it here, so it would be a waste of space.
- */
-static int xfeature_is_aligned(int xfeature_nr)
+static int xfeature_uncompacted_offset(int xfeature_nr)
 {
 	u32 eax, ebx, ecx, edx;
 
-	CHECK_XFEATURE(xfeature_nr);
-	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
 	/*
-	 * The value returned by ECX[1] indicates the alignment
-	 * of state component i when the compacted format
-	 * of the extended region of an XSAVE area is used
+	 * Only XSAVES supports supervisor states and it uses compacted
+	 * format. Checking a supervisor state's uncompacted offset is
+	 * an error.
 	 */
-	return !!(ecx & 2);
-}
-
-static int xfeature_uncompacted_offset(int xfeature_nr)
-{
-	u32 eax, ebx, ecx, edx;
+	if (XFEATURE_MASK_SUPERVISOR & (1 << xfeature_nr)) {
+		WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
+		return -1;
+	}
 
 	CHECK_XFEATURE(xfeature_nr);
 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
@@ -415,7 +472,7 @@ static int xfeature_size(int xfeature_nr)
  * that it is obvious which aspect of 'XSAVES' is being handled
  * by the calling code.
  */
-static int using_compacted_format(void)
+int using_compacted_format(void)
 {
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
@@ -530,11 +587,12 @@ static void do_extra_xstate_size_checks(void)
 		 */
 		paranoid_xstate_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
+	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
 }
 
+
 /*
- * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0/xfeatures_mask.
  *
  * Note the SDM's wording here.  "sub-function 0" only enumerates
  * the size of the *user* states.  If we use it to size a buffer
@@ -544,34 +602,33 @@ static void do_extra_xstate_size_checks(void)
  * Note that we do not currently set any bits on IA32_XSS so
  * 'XCR0 | IA32_XSS == XCR0' for now.
  */
-static unsigned int __init calculate_xstate_size(void)
+static unsigned int __init get_xsaves_size(void)
 {
 	unsigned int eax, ebx, ecx, edx;
-	unsigned int calculated_xstate_size;
+	/*
+	 * - CPUID function 0DH, sub-function 1:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVES instruction for an XSAVE area
+	 *    containing all the state components
+	 *    corresponding to bits currently set in
+	 *    XCR0 | IA32_XSS.
+	 */
+	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
 
-	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
-		/*
-		 * - CPUID function 0DH, sub-function 0:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVE instruction for an XSAVE area
-		 *    containing all the *user* state components
-		 *    corresponding to bits currently set in XCR0.
-		 */
-		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	} else {
-		/*
-		 * - CPUID function 0DH, sub-function 1:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVES instruction for an XSAVE area
-		 *    containing all the state components
-		 *    corresponding to bits currently set in
-		 *    XCR0 | IA32_XSS.
-		 */
-		cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	}
-	return calculated_xstate_size;
+static unsigned int __init get_xsave_size(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	/*
+	 * - CPUID function 0DH, sub-function 0:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVE instruction for an XSAVE area
+	 *    containing all the *user* state components
+	 *    corresponding to bits currently set in XCR0.
+	 */
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	return ebx;
 }
 
 /*
@@ -591,7 +648,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
 static int init_xstate_size(void)
 {
 	/* Recompute the context size for enabled features: */
-	unsigned int possible_xstate_size = calculate_xstate_size();
+	unsigned int possible_xstate_size;
+	unsigned int xsave_size;
+
+	xsave_size = get_xsave_size();
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		possible_xstate_size = get_xsaves_size();
+	else
+		possible_xstate_size = xsave_size;
 
 	/* Ensure we have the space to store all enabled: */
 	if (!is_supported_xstate_size(possible_xstate_size))
@@ -601,8 +666,13 @@ static int init_xstate_size(void)
 	 * The size is OK, we are definitely going to use xsave,
 	 * make it known to the world that we need more space.
 	 */
-	xstate_size = possible_xstate_size;
+	fpu_kernel_xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
+
+	/*
+	 * User space is always in standard format.
+	 */
+	fpu_user_xstate_size = xsave_size;
 	return 0;
 }
 
@@ -644,8 +714,13 @@ void __init fpu__init_system_xstate(void)
 	xfeatures_mask = eax + ((u64)edx << 32);
 
 	if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+		/*
+		 * This indicates that something really unexpected happened
+		 * with the enumeration.  Disable XSAVE and try to continue
+		 * booting without it.  This is too early to BUG().
+		 */
 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
-		BUG();
+		goto out_disable;
 	}
 
 	xfeatures_mask &= fpu__get_supported_xfeatures_mask();
@@ -653,21 +728,29 @@ void __init fpu__init_system_xstate(void)
 	/* Enable xstate instructions to be able to continue with initialization: */
 	fpu__init_cpu_xstate();
 	err = init_xstate_size();
-	if (err) {
-		/* something went wrong, boot without any XSAVE support */
-		fpu__init_disable_system_xstate();
-		return;
-	}
+	if (err)
+		goto out_disable;
+
+	/*
+	 * Update info used for ptrace frames; use standard-format size and no
+	 * supervisor xstates:
+	 */
+	update_regset_xstate_info(fpu_user_xstate_size,	xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
 
-	update_regset_xstate_info(xstate_size, xfeatures_mask);
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
+	print_xstate_offset_size();
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
-		xstate_size,
+		fpu_kernel_xstate_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
+	return;
+
+out_disable:
+	/* something went wrong, try to boot without any XSAVE support */
+	fpu__init_disable_system_xstate();
 }
 
 /*
@@ -693,6 +776,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
 {
 	int feature_nr = fls64(xstate_feature_mask) - 1;
 
+	if (!xfeature_enabled(feature_nr)) {
+		WARN_ON_FPU(1);
+		return NULL;
+	}
+
 	return (void *)xsave + xstate_comp_offsets[feature_nr];
 }
 /*
@@ -887,16 +975,16 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return -EINVAL;
 
-	/* Set the bits we need in PKRU  */
+	/* Set the bits we need in PKRU:  */
 	if (init_val & PKEY_DISABLE_ACCESS)
 		new_pkru_bits |= PKRU_AD_BIT;
 	if (init_val & PKEY_DISABLE_WRITE)
 		new_pkru_bits |= PKRU_WD_BIT;
 
-	/* Shift the bits in to the correct place in PKRU for pkey. */
+	/* Shift the bits in to the correct place in PKRU for pkey: */
 	new_pkru_bits <<= pkey_shift;
 
-	/* Locate old copy of the state in the xsave buffer */
+	/* Locate old copy of the state in the xsave buffer: */
 	old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
 
 	/*
@@ -909,9 +997,10 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	else
 		new_pkru_state.pkru = old_pkru_state->pkru;
 
-	/* mask off any old bits in place */
+	/* Mask off any old bits in place: */
 	new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-	/* Set the newly-requested bits */
+
+	/* Set the newly-requested bits: */
 	new_pkru_state.pkru |= new_pkru_bits;
 
 	/*
@@ -925,8 +1014,168 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	 */
 	new_pkru_state.pad = 0;
 
-	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state,
-			sizeof(new_pkru_state));
+	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+
+	return 0;
+}
+
+/*
+ * This is similar to user_regset_copyout(), but will not add offset to
+ * the source data pointer or increment pos, count, kbuf, and ubuf.
+ */
+static inline int xstate_copyout(unsigned int pos, unsigned int count,
+				 void *kbuf, void __user *ubuf,
+				 const void *data, const int start_pos,
+				 const int end_pos)
+{
+	if ((count == 0) || (pos < start_pos))
+		return 0;
+
+	if (end_pos < 0 || pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+
+		if (kbuf) {
+			memcpy(kbuf + pos, data, copy);
+		} else {
+			if (__copy_to_user(ubuf + pos, data, copy))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a ptrace buffer. It supports partial copy but pos always starts from
+ * zero. This is called from xstateregs_get() and there we check the CPU
+ * has XSAVES.
+ */
+int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+			void __user *ubuf, struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int ret, i;
+	struct xstate_header header;
+
+	/*
+	 * Currently copy_regset_to_user() starts from pos 0:
+	 */
+	if (unlikely(pos != 0))
+		return -EFAULT;
+
+	/*
+	 * The destination is a ptrace buffer; we put in only user xstates:
+	 */
+	memset(&header, 0, sizeof(header));
+	header.xfeatures = xsave->header.xfeatures;
+	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Copy xregs_state->header:
+	 */
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(header);
+
+	ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
+
+	if (ret)
+		return ret;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		/*
+		 * Copy only in-use xstates:
+		 */
+		if ((header.xfeatures >> i) & 1) {
+			void *src = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+
+			if (ret)
+				return ret;
+
+			if (offset + size >= count)
+				break;
+		}
+
+	}
+
+	/*
+	 * Fill xsave->i387.sw_reserved value for ptrace frame:
+	 */
+	offset = offsetof(struct fxregs_state, sw_reserved);
+	size = sizeof(xstate_fx_sw_bytes);
+
+	ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
+
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Convert from a ptrace standard-format buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set() and
+ * there we check the CPU has XSAVES and a whole standard-sized buffer
+ * exists.
+ */
+int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+		     struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int i;
+	u64 xfeatures;
+	u64 allowed_features;
+
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(xfeatures);
+
+	if (kbuf) {
+		memcpy(&xfeatures, kbuf + offset, size);
+	} else {
+		if (__copy_from_user(&xfeatures, ubuf + offset, size))
+			return -EFAULT;
+	}
+
+	/*
+	 * Reject if the user sets any disabled or supervisor features:
+	 */
+	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+
+	if (xfeatures & ~allowed_features)
+		return -EINVAL;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		u64 mask = ((u64)1 << i);
+
+		if (xfeatures & mask) {
+			void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			if (kbuf) {
+				memcpy(dst, kbuf + offset, size);
+			} else {
+				if (__copy_from_user(dst, ubuf + offset, size))
+					return -EFAULT;
+			}
+		}
+	}
+
+	/*
+	 * The state that came in from userspace was user-state only.
+	 * Mask all the user states out of 'xfeatures':
+	 */
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Add back in the features that came in from userspace:
+	 */
+	xsave->header.xfeatures |= xfeatures;
 
 	return 0;
 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index d784bb547a9d..2dda0bc4576e 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -26,7 +26,7 @@ static void __init i386_default_early_setup(void)
 	x86_init.resources.reserve_resources = i386_reserve_resources;
 	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
 
-	reserve_ebda_region();
+	reserve_bios_regions();
 }
 
 asmlinkage __visible void __init i386_start_kernel(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b72fb0b71dd1..99d48e7d2974 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -183,7 +183,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
 		copy_bootdata(__va(real_mode_data));
 
 	x86_early_init_platform_quirks();
-	reserve_ebda_region();
+	reserve_bios_regions();
 
 	switch (boot_params.hdr.hardware_subarch) {
 	case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 5df831ef1442..9f8efc9f0075 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,7 +38,7 @@
 
 #define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
-L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
+L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
 L4_START_KERNEL = pgd_index(__START_KERNEL_map)
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
@@ -299,6 +299,7 @@ ENTRY(secondary_startup_64)
 	pushq	$__KERNEL_CS	# set correct cs
 	pushq	%rax		# target address in negative space
 	lretq
+ENDPROC(secondary_startup_64)
 
 #include "verify_cpu.S"
 
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 64341aa485ae..d40ee8a38fed 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(___preempt_schedule);
 EXPORT_SYMBOL(___preempt_schedule_notrace);
 #endif
+
+EXPORT_SYMBOL(__sw_hweight32);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 38da8f29a9c8..c627bf8d98ad 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -130,11 +130,9 @@ void irq_ctx_init(int cpu)
 
 void do_softirq_own_stack(void)
 {
-	struct thread_info *curstk;
 	struct irq_stack *irqstk;
 	u32 *isp, *prev_esp;
 
-	curstk = current_stack();
 	irqstk = __this_cpu_read(softirq_stack);
 
 	/* build the stack frame on the softirq stack */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 38cf7a741250..7847e5c0e0b5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -961,7 +961,19 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * normal page fault.
 		 */
 		regs->ip = (unsigned long)cur->addr;
+		/*
+		 * Trap flag (TF) has been set here because this fault
+		 * happened where the single stepping will be done.
+		 * So clear it by resetting the current kprobe:
+		 */
+		regs->flags &= ~X86_EFLAGS_TF;
+
+		/*
+		 * If the TF flag was set before the kprobe hit,
+		 * don't touch it:
+		 */
 		regs->flags |= kcb->kprobe_old_flags;
+
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index eea2a6f72b31..1ef5e48b3a36 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -301,8 +301,6 @@ static void kvm_register_steal_time(void)
 	if (!has_steal_clock)
 		return;
 
-	memset(st, 0, sizeof(*st));
-
 	wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
 	pr_info("kvm-stealtime: cpu %d, msr %llx\n",
 		cpu, (unsigned long long) slow_virt_to_phys(st));
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index b2f8a33b36ff..24a50301f150 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -7,12 +7,12 @@
 void __init x86_early_init_platform_quirks(void)
 {
 	x86_platform.legacy.rtc = 1;
-	x86_platform.legacy.ebda_search = 0;
+	x86_platform.legacy.reserve_bios_regions = 0;
 	x86_platform.legacy.devices.pnpbios = 1;
 
 	switch (boot_params.hdr.hardware_subarch) {
 	case X86_SUBARCH_PC:
-		x86_platform.legacy.ebda_search = 1;
+		x86_platform.legacy.reserve_bios_regions = 1;
 		break;
 	case X86_SUBARCH_XEN:
 	case X86_SUBARCH_LGUEST:
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 99bfc025111d..06c58ce46762 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -61,11 +61,16 @@ void pvclock_resume(void)
 u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
 {
 	unsigned version;
-	cycle_t ret;
 	u8 flags;
 
 	do {
-		version = __pvclock_read_cycles(src, &ret, &flags);
+		version = src->version;
+		/* Make the latest version visible */
+		smp_rmb();
+
+		flags = src->flags;
+		/* Make sure that the version double-check is last. */
+		smp_rmb();
 	} while ((src->version & 1) || version != src->version);
 
 	return flags & valid_flags;
@@ -80,6 +85,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
 
 	do {
 		version = __pvclock_read_cycles(src, &ret, &flags);
+		/* Make sure that the version double-check is last. */
+		smp_rmb();
 	} while ((src->version & 1) || version != src->version);
 
 	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a9b31eb815f2..15ed70f8278b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -54,6 +54,19 @@ bool port_cf9_safe = false;
  * Dell Inc. so their systems "just work". :-)
  */
 
+/*
+ * Some machines require the "reboot=a" commandline options
+ */
+static int __init set_acpi_reboot(const struct dmi_system_id *d)
+{
+	if (reboot_type != BOOT_ACPI) {
+		reboot_type = BOOT_ACPI;
+		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+			d->ident, "ACPI");
+	}
+	return 0;
+}
+
 /*
  * Some machines require the "reboot=b" or "reboot=k"  commandline options,
  * this quirk makes that automatic.
@@ -395,6 +408,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
 		},
 	},
+	{	/* Handle problems with rebooting on Dell Optiplex 7450 AIO */
+		.callback = set_acpi_reboot,
+		.ident = "Dell OptiPlex 7450 AIO",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7450 AIO"),
+		},
+	},
 
 	/* Hewlett-Packard */
 	{	/* Handle problems with rebooting on HP laptops */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c4e7b3991b60..6cb2b02fcc87 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -113,6 +113,7 @@
 #include <asm/prom.h>
 #include <asm/microcode.h>
 #include <asm/mmu_context.h>
+#include <asm/kaslr.h>
 
 /*
  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -399,10 +400,6 @@ static void __init reserve_initrd(void)
 	memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
 }
 
-static void __init early_initrd_acpi_init(void)
-{
-	early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start);
-}
 #else
 static void __init early_reserve_initrd(void)
 {
@@ -410,9 +407,6 @@ static void __init early_reserve_initrd(void)
 static void __init reserve_initrd(void)
 {
 }
-static void __init early_initrd_acpi_init(void)
-{
-}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 static void __init parse_setup_data(void)
@@ -942,6 +936,8 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_init.oem.arch_setup();
 
+	kernel_randomize_memory();
+
 	iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
 	setup_memory_map();
 	parse_setup_data();
@@ -1146,7 +1142,7 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_initrd();
 
-	early_initrd_acpi_init();
+	acpi_table_upgrade();
 
 	vsmp_init();
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e4fcb87ba7a6..7a40e068302d 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -236,6 +236,8 @@ void __init setup_per_cpu_areas(void)
 			early_per_cpu_map(x86_cpu_to_apicid, cpu);
 		per_cpu(x86_bios_cpu_apicid, cpu) =
 			early_per_cpu_map(x86_bios_cpu_apicid, cpu);
+		per_cpu(x86_cpu_to_acpiid, cpu) =
+			early_per_cpu_map(x86_cpu_to_acpiid, cpu);
 #endif
 #ifdef CONFIG_X86_32
 		per_cpu(x86_cpu_to_logical_apicid, cpu) =
@@ -271,6 +273,7 @@ void __init setup_per_cpu_areas(void)
 #ifdef CONFIG_X86_LOCAL_APIC
 	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
 	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
+	early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL;
 #endif
 #ifdef CONFIG_X86_32
 	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index dc3c0b1c816f..b44564bf86a8 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -1,11 +1,104 @@
 #include <linux/compat.h>
 #include <linux/uaccess.h>
 
+/*
+ * The compat_siginfo_t structure and handing code is very easy
+ * to break in several ways.  It must always be updated when new
+ * updates are made to the main siginfo_t, and
+ * copy_siginfo_to_user32() must be updated when the
+ * (arch-independent) copy_siginfo_to_user() is updated.
+ *
+ * It is also easy to put a new member in the compat_siginfo_t
+ * which has implicit alignment which can move internal structure
+ * alignment around breaking the ABI.  This can happen if you,
+ * for instance, put a plain 64-bit value in there.
+ */
+static inline void signal_compat_build_tests(void)
+{
+	int _sifields_offset = offsetof(compat_siginfo_t, _sifields);
+
+	/*
+	 * If adding a new si_code, there is probably new data in
+	 * the siginfo.  Make sure folks bumping the si_code
+	 * limits also have to look at this code.  Make sure any
+	 * new fields are handled in copy_siginfo_to_user32()!
+	 */
+	BUILD_BUG_ON(NSIGILL  != 8);
+	BUILD_BUG_ON(NSIGFPE  != 8);
+	BUILD_BUG_ON(NSIGSEGV != 4);
+	BUILD_BUG_ON(NSIGBUS  != 5);
+	BUILD_BUG_ON(NSIGTRAP != 4);
+	BUILD_BUG_ON(NSIGCHLD != 6);
+	BUILD_BUG_ON(NSIGSYS  != 1);
+
+	/* This is part of the ABI and can never change in size: */
+	BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128);
+	/*
+	 * The offsets of all the (unioned) si_fields are fixed
+	 * in the ABI, of course.  Make sure none of them ever
+	 * move and are always at the beginning:
+	 */
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
+#define CHECK_CSI_OFFSET(name)	  BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
+
+	 /*
+	 * Ensure that the size of each si_field never changes.
+	 * If it does, it is a sign that the
+	 * copy_siginfo_to_user32() code below needs to updated
+	 * along with the size in the CHECK_SI_SIZE().
+	 *
+	 * We repeat this check for both the generic and compat
+	 * siginfos.
+	 *
+	 * Note: it is OK for these to grow as long as the whole
+	 * structure stays within the padding size (checked
+	 * above).
+	 */
+#define CHECK_CSI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((compat_siginfo_t *)0)->_sifields.name))
+#define CHECK_SI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((siginfo_t *)0)->_sifields.name))
+
+	CHECK_CSI_OFFSET(_kill);
+	CHECK_CSI_SIZE  (_kill, 2*sizeof(int));
+	CHECK_SI_SIZE   (_kill, 2*sizeof(int));
+
+	CHECK_CSI_OFFSET(_timer);
+	CHECK_CSI_SIZE  (_timer, 5*sizeof(int));
+	CHECK_SI_SIZE   (_timer, 6*sizeof(int));
+
+	CHECK_CSI_OFFSET(_rt);
+	CHECK_CSI_SIZE  (_rt, 3*sizeof(int));
+	CHECK_SI_SIZE   (_rt, 4*sizeof(int));
+
+	CHECK_CSI_OFFSET(_sigchld);
+	CHECK_CSI_SIZE  (_sigchld, 5*sizeof(int));
+	CHECK_SI_SIZE   (_sigchld, 8*sizeof(int));
+
+	CHECK_CSI_OFFSET(_sigchld_x32);
+	CHECK_CSI_SIZE  (_sigchld_x32, 7*sizeof(int));
+	/* no _sigchld_x32 in the generic siginfo_t */
+
+	CHECK_CSI_OFFSET(_sigfault);
+	CHECK_CSI_SIZE  (_sigfault, 4*sizeof(int));
+	CHECK_SI_SIZE   (_sigfault, 8*sizeof(int));
+
+	CHECK_CSI_OFFSET(_sigpoll);
+	CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));
+	CHECK_SI_SIZE   (_sigpoll, 4*sizeof(int));
+
+	CHECK_CSI_OFFSET(_sigsys);
+	CHECK_CSI_SIZE  (_sigsys, 3*sizeof(int));
+	CHECK_SI_SIZE   (_sigsys, 4*sizeof(int));
+
+	/* any new si_fields should be added here */
+}
+
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err = 0;
 	bool ia32 = test_thread_flag(TIF_IA32);
 
+	signal_compat_build_tests();
+
 	if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
 		return -EFAULT;
 
@@ -32,6 +125,21 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 					  &to->_sifields._pad[0]);
 			switch (from->si_code >> 16) {
 			case __SI_FAULT >> 16:
+				if (from->si_signo == SIGBUS &&
+				    (from->si_code == BUS_MCEERR_AR ||
+				     from->si_code == BUS_MCEERR_AO))
+					put_user_ex(from->si_addr_lsb, &to->si_addr_lsb);
+
+				if (from->si_signo == SIGSEGV) {
+					if (from->si_code == SEGV_BNDERR) {
+						compat_uptr_t lower = (unsigned long)&to->si_lower;
+						compat_uptr_t upper = (unsigned long)&to->si_upper;
+						put_user_ex(lower, &to->si_lower);
+						put_user_ex(upper, &to->si_upper);
+					}
+					if (from->si_code == SEGV_PKUERR)
+						put_user_ex(from->si_pkey, &to->si_pkey);
+				}
 				break;
 			case __SI_SYS >> 16:
 				put_user_ex(from->si_syscall, &to->si_syscall);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b923cac..c93609c97406 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
 
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
 	unsigned long flags;
@@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu)
 	bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct cpuinfo_x86 *o;
-	int i;
+	int i, threads;
 
 	cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
 
@@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu)
 		if (match_die(c, o) && !topology_same_node(c, o))
 			primarily_use_numa_for_topology();
 	}
+
+	threads = cpumask_weight(topology_sibling_cpumask(cpu));
+	if (threads > __max_smt_threads)
+		__max_smt_threads = threads;
 }
 
 /* maps the cpu to the sched domain representing multi-core */
@@ -1285,7 +1292,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	cpumask_copy(cpu_callin_mask, cpumask_of(0));
 	mb();
 
-	current_thread_info()->cpu = 0;  /* needed? */
 	for_each_possible_cpu(i) {
 		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
 		zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
@@ -1441,6 +1447,21 @@ __init void prefill_possible_map(void)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+	int max_threads, cpu;
+
+	max_threads = 0;
+	for_each_online_cpu (cpu) {
+		int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+		if (threads > max_threads)
+			max_threads = threads;
+	}
+	__max_smt_threads = max_threads;
+}
+
 static void remove_siblinginfo(int cpu)
 {
 	int sibling;
@@ -1465,6 +1486,7 @@ static void remove_siblinginfo(int cpu)
 	c->phys_proc_id = 0;
 	c->cpu_core_id = 0;
 	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+	recompute_smt_state();
 }
 
 static void remove_cpu_from_maps(int cpu)
@@ -1622,7 +1644,7 @@ static inline void mwait_play_dead(void)
 	}
 }
 
-static inline void hlt_play_dead(void)
+void hlt_play_dead(void)
 {
 	if (__this_cpu_read(cpu_info.x86) >= 4)
 		wbinvd();
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 38ba6de56ede..a804b5ab32d0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -239,7 +239,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 	return ns;
 }
 
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+static void set_cyc2ns_scale(unsigned long khz, int cpu)
 {
 	unsigned long long tsc_now, ns_now;
 	struct cyc2ns_data *data;
@@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	local_irq_save(flags);
 	sched_clock_idle_sleep_event();
 
-	if (!cpu_khz)
+	if (!khz)
 		goto done;
 
 	data = cyc2ns_write_begin(cpu);
@@ -261,7 +261,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	 * time function is continuous; see the comment near struct
 	 * cyc2ns_data.
 	 */
-	clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
+	clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
 			       NSEC_PER_MSEC, 0);
 
 	/*
@@ -335,12 +335,6 @@ int check_tsc_unstable(void)
 }
 EXPORT_SYMBOL_GPL(check_tsc_unstable);
 
-int check_tsc_disabled(void)
-{
-	return tsc_disabled;
-}
-EXPORT_SYMBOL_GPL(check_tsc_disabled);
-
 #ifdef CONFIG_X86_TSC
 int __init notsc_setup(char *str)
 {
@@ -665,19 +659,77 @@ success:
 }
 
 /**
- * native_calibrate_tsc - calibrate the tsc on boot
+ * native_calibrate_tsc
+ * Determine TSC frequency via CPUID, else return 0.
  */
 unsigned long native_calibrate_tsc(void)
+{
+	unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
+	unsigned int crystal_khz;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
+	if (boot_cpu_data.cpuid_level < 0x15)
+		return 0;
+
+	eax_denominator = ebx_numerator = ecx_hz = edx = 0;
+
+	/* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
+	cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
+
+	if (ebx_numerator == 0 || eax_denominator == 0)
+		return 0;
+
+	crystal_khz = ecx_hz / 1000;
+
+	if (crystal_khz == 0) {
+		switch (boot_cpu_data.x86_model) {
+		case 0x4E:	/* SKL */
+		case 0x5E:	/* SKL */
+			crystal_khz = 24000;	/* 24.0 MHz */
+			break;
+		case 0x5C:	/* BXT */
+			crystal_khz = 19200;	/* 19.2 MHz */
+			break;
+		}
+	}
+
+	return crystal_khz * ebx_numerator / eax_denominator;
+}
+
+static unsigned long cpu_khz_from_cpuid(void)
+{
+	unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
+	if (boot_cpu_data.cpuid_level < 0x16)
+		return 0;
+
+	eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
+
+	cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
+
+	return eax_base_mhz * 1000;
+}
+
+/**
+ * native_calibrate_cpu - calibrate the cpu on boot
+ */
+unsigned long native_calibrate_cpu(void)
 {
 	u64 tsc1, tsc2, delta, ref1, ref2;
 	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
 	unsigned long flags, latch, ms, fast_calibrate;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
-	/* Calibrate TSC using MSR for Intel Atom SoCs */
-	local_irq_save(flags);
-	fast_calibrate = try_msr_calibrate_tsc();
-	local_irq_restore(flags);
+	fast_calibrate = cpu_khz_from_cpuid();
+	if (fast_calibrate)
+		return fast_calibrate;
+
+	fast_calibrate = cpu_khz_from_msr();
 	if (fast_calibrate)
 		return fast_calibrate;
 
@@ -837,8 +889,12 @@ int recalibrate_cpu_khz(void)
 	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return -ENODEV;
 
+	cpu_khz = x86_platform.calibrate_cpu();
 	tsc_khz = x86_platform.calibrate_tsc();
-	cpu_khz = tsc_khz;
+	if (tsc_khz == 0)
+		tsc_khz = cpu_khz;
+	else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
+		cpu_khz = tsc_khz;
 	cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
 						    cpu_khz_old, cpu_khz);
 
@@ -1244,8 +1300,18 @@ void __init tsc_init(void)
 		return;
 	}
 
+	cpu_khz = x86_platform.calibrate_cpu();
 	tsc_khz = x86_platform.calibrate_tsc();
-	cpu_khz = tsc_khz;
+
+	/*
+	 * Trust non-zero tsc_khz as authorative,
+	 * and use it to sanity check cpu_khz,
+	 * which will be off if system timer is off.
+	 */
+	if (tsc_khz == 0)
+		tsc_khz = cpu_khz;
+	else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
+		cpu_khz = tsc_khz;
 
 	if (!tsc_khz) {
 		mark_tsc_unstable("could not calculate TSC khz");
@@ -1265,7 +1331,7 @@ void __init tsc_init(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		cyc2ns_init(cpu);
-		set_cyc2ns_scale(cpu_khz, cpu);
+		set_cyc2ns_scale(tsc_khz, cpu);
 	}
 
 	if (tsc_disabled > 0)
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 9911a0620f9a..0fe720d64fef 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -1,14 +1,5 @@
 /*
- * tsc_msr.c - MSR based TSC calibration on Intel Atom SoC platforms.
- *
- * TSC in Intel Atom SoC runs at a constant rate which can be figured
- * by this formula:
- * <maximum core-clock to bus-clock ratio> * <maximum resolved frequency>
- * See Intel 64 and IA-32 System Programming Guid section 16.12 and 30.11.5
- * for details.
- * Especially some Intel Atom SoCs don't have PIT(i8254) or HPET, so MSR
- * based calibration is the only option.
- *
+ * tsc_msr.c - TSC frequency enumeration via MSR
  *
  * Copyright (C) 2013 Intel Corporation
  * Author: Bin Gao <bin.gao@intel.com>
@@ -22,18 +13,10 @@
 #include <asm/apic.h>
 #include <asm/param.h>
 
-/* CPU reference clock frequency: in KHz */
-#define FREQ_80		80000
-#define FREQ_83		83200
-#define FREQ_100	99840
-#define FREQ_133	133200
-#define FREQ_166	166400
-
-#define MAX_NUM_FREQS	8
+#define MAX_NUM_FREQS	9
 
 /*
- * According to Intel 64 and IA-32 System Programming Guide,
- * if MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
+ * If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
  * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40].
  * Unfortunately some Intel Atom SoCs aren't quite compliant to this,
  * so we need manually differentiate SoC families. This is what the
@@ -48,17 +31,18 @@ struct freq_desc {
 
 static struct freq_desc freq_desc_tables[] = {
 	/* PNW */
-	{ 6, 0x27, 0, { 0, 0, 0, 0, 0, FREQ_100, 0, FREQ_83 } },
+	{ 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } },
 	/* CLV+ */
-	{ 6, 0x35, 0, { 0, FREQ_133, 0, 0, 0, FREQ_100, 0, FREQ_83 } },
-	/* TNG */
-	{ 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } },
-	/* VLV2 */
-	{ 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
-	/* ANN */
-	{ 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
-	/* AIRMONT */
-	{ 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80,	0, 0, 0 } },
+	{ 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } },
+	/* TNG - Intel Atom processor Z3400 series */
+	{ 6, 0x4a, 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } },
+	/* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */
+	{ 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } },
+	/* ANN - Intel Atom processor Z3500 series */
+	{ 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } },
+	/* AMT - Intel Atom processor X7-Z8000 and X5-Z8000 series */
+	{ 6, 0x4c, 1, { 83300, 100000, 133300, 116700,
+			80000, 93300, 90000, 88900, 87500 } },
 };
 
 static int match_cpu(u8 family, u8 model)
@@ -79,16 +63,20 @@ static int match_cpu(u8 family, u8 model)
 	(freq_desc_tables[cpu_index].freqs[freq_id])
 
 /*
- * Do MSR calibration only for known/supported CPUs.
+ * MSR-based CPU/TSC frequency discovery for certain CPUs.
  *
- * Returns the calibration value or 0 if MSR calibration failed.
+ * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy
+ * Return processor base frequency in KHz, or 0 on failure.
  */
-unsigned long try_msr_calibrate_tsc(void)
+unsigned long cpu_khz_from_msr(void)
 {
 	u32 lo, hi, ratio, freq_id, freq;
 	unsigned long res;
 	int cpu_index;
 
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
 	cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model);
 	if (cpu_index < 0)
 		return 0;
@@ -100,31 +88,17 @@ unsigned long try_msr_calibrate_tsc(void)
 		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 		ratio = (hi >> 8) & 0x1f;
 	}
-	pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio);
-
-	if (!ratio)
-		goto fail;
 
 	/* Get FSB FREQ ID */
 	rdmsr(MSR_FSB_FREQ, lo, hi);
 	freq_id = lo & 0x7;
 	freq = id_to_freq(cpu_index, freq_id);
-	pr_info("Resolved frequency ID: %u, frequency: %u KHz\n",
-				freq_id, freq);
-	if (!freq)
-		goto fail;
 
 	/* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
 	res = freq * ratio;
-	pr_info("TSC runs at %lu KHz\n", res);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	lapic_timer_frequency = (freq * 1000) / HZ;
-	pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency);
 #endif
 	return res;
-
-fail:
-	pr_warn("Fast TSC calibration using MSR failed\n");
-	return 0;
 }
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 3dce1ca0a653..01f30e56f99e 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -440,10 +440,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
 
 static inline int is_revectored(int nr, struct revectored_struct *bitmap)
 {
-	__asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
-		:"=r" (nr)
-		:"m" (*bitmap), "r" (nr));
-	return nr;
+	return test_bit(nr, bitmap->__map);
 }
 
 #define val_byte(val, n) (((__u8 *)&val)[n])
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index cd05942bc918..f1aebfb49c36 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page);
 
 EXPORT_SYMBOL(csum_partial);
 
+EXPORT_SYMBOL(__sw_hweight32);
+EXPORT_SYMBOL(__sw_hweight64);
+
 /*
  * Export string functions. We normally rely on gcc builtin for most of these,
  * but gcc sometimes decides not to inline them.
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index dad5fe9633a3..58b459296e13 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ static void default_nmi_init(void) { };
 static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform = {
+	.calibrate_cpu			= native_calibrate_cpu,
 	.calibrate_tsc			= native_calibrate_tsc,
 	.get_wallclock			= mach_get_cmos_time,
 	.set_wallclock			= mach_set_rtc_mmss,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbb5b283ff63..a397200281c1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 
 	/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
 	if (guest_tsc < tsc_deadline)
-		__delay(tsc_deadline - guest_tsc);
+		__delay(min(tsc_deadline - guest_tsc,
+			nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
 }
 
 static void start_apic_timer(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index c146f3c262c3..0149ac59c273 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -539,6 +539,7 @@ static void mtrr_lookup_var_start(struct mtrr_iter *iter)
 
 	iter->fixed = false;
 	iter->start_max = iter->start;
+	iter->range = NULL;
 	iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
 
 	__mtrr_lookup_var_next(iter);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 003618e324ce..7758680db20b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4979,6 +4979,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	if (vmx_xsaves_supported())
 		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
+	if (enable_pml) {
+		ASSERT(vmx->pml_pg);
+		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+	}
+
 	return 0;
 }
 
@@ -6671,7 +6677,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 
 	/* Checks for #GP/#SS exceptions. */
 	exn = false;
-	if (is_protmode(vcpu)) {
+	if (is_long_mode(vcpu)) {
+		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
+		 * non-canonical form. This is the only check on the memory
+		 * destination for long mode!
+		 */
+		exn = is_noncanonical_address(*ret);
+	} else if (is_protmode(vcpu)) {
 		/* Protected mode: apply checks for segment validity in the
 		 * following order:
 		 * - segment type check (#GP(0) may be thrown)
@@ -6688,17 +6700,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 			 * execute-only code segment
 			 */
 			exn = ((s.type & 0xa) == 8);
-	}
-	if (exn) {
-		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
-		return 1;
-	}
-	if (is_long_mode(vcpu)) {
-		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
-		 * non-canonical form. This is an only check for long mode.
-		 */
-		exn = is_noncanonical_address(*ret);
-	} else if (is_protmode(vcpu)) {
+		if (exn) {
+			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+			return 1;
+		}
 		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
 		 */
 		exn = (s.unusable != 0);
@@ -7938,22 +7943,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
-static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
-{
-	struct page *pml_pg;
-
-	pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!pml_pg)
-		return -ENOMEM;
-
-	vmx->pml_pg = pml_pg;
-
-	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
-	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
-
-	return 0;
-}
-
 static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
 {
 	if (vmx->pml_pg) {
@@ -8225,6 +8214,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
 			exit_reason != EXIT_REASON_EPT_VIOLATION &&
+			exit_reason != EXIT_REASON_PML_FULL &&
 			exit_reason != EXIT_REASON_TASK_SWITCH)) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
@@ -8855,6 +8845,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
 	put_cpu();
 }
 
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int r;
+
+       r = vcpu_load(vcpu);
+       BUG_ON(r);
+       vmx_load_vmcs01(vcpu);
+       free_nested(vmx);
+       vcpu_put(vcpu);
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8863,8 +8869,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 		vmx_destroy_pml_buffer(vmx);
 	free_vpid(vmx->vpid);
 	leave_guest_mode(vcpu);
-	vmx_load_vmcs01(vcpu);
-	free_nested(vmx);
+	vmx_free_vcpu_nested(vcpu);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	kfree(vmx->guest_msrs);
 	kvm_vcpu_uninit(vcpu);
@@ -8886,14 +8891,26 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_vcpu;
 
+	err = -ENOMEM;
+
+	/*
+	 * If PML is turned on, failure on enabling PML just results in failure
+	 * of creating the vcpu, therefore we can simplify PML logic (by
+	 * avoiding dealing with cases, such as enabling PML partially on vcpus
+	 * for the guest, etc.
+	 */
+	if (enable_pml) {
+		vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!vmx->pml_pg)
+			goto uninit_vcpu;
+	}
+
 	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
 	BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
 		     > PAGE_SIZE);
 
-	err = -ENOMEM;
-	if (!vmx->guest_msrs) {
-		goto uninit_vcpu;
-	}
+	if (!vmx->guest_msrs)
+		goto free_pml;
 
 	vmx->loaded_vmcs = &vmx->vmcs01;
 	vmx->loaded_vmcs->vmcs = alloc_vmcs();
@@ -8937,18 +8954,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx->nested.current_vmptr = -1ull;
 	vmx->nested.current_vmcs12 = NULL;
 
-	/*
-	 * If PML is turned on, failure on enabling PML just results in failure
-	 * of creating the vcpu, therefore we can simplify PML logic (by
-	 * avoiding dealing with cases, such as enabling PML partially on vcpus
-	 * for the guest, etc.
-	 */
-	if (enable_pml) {
-		err = vmx_create_pml_buffer(vmx);
-		if (err)
-			goto free_vmcs;
-	}
-
 	return &vmx->vcpu;
 
 free_vmcs:
@@ -8956,6 +8961,8 @@ free_vmcs:
 	free_loaded_vmcs(vmx->loaded_vmcs);
 free_msrs:
 	kfree(vmx->guest_msrs);
+free_pml:
+	vmx_destroy_pml_buffer(vmx);
 uninit_vcpu:
 	kvm_vcpu_uninit(&vmx->vcpu);
 free_vcpu:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 902d9da12392..b2766723c951 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -55,9 +55,6 @@
 #include <linux/irqbypass.h>
 #include <trace/events/kvm.h>
 
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
 #include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
@@ -68,6 +65,9 @@
 #include <asm/div64.h>
 #include <asm/irq_remapping.h>
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
 static unsigned long max_tsc_khz;
 
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
-{
-	return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
-				   vcpu->arch.virtual_tsc_shift);
-}
-
 static u32 adjust_tsc_khz(u32 khz, s32 ppm)
 {
 	u64 v = (u64)khz * (1000000 + ppm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7ce3634ab5fe..a82ca466b62e 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
 #define ARCH_X86_KVM_X86_H
 
 #include <linux/kvm_host.h>
+#include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
 
 #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
@@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns;
 
 extern struct static_key kvm_no_apic_vcpu;
 
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+{
+	return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+				   vcpu->arch.virtual_tsc_shift);
+}
+
 /* Same "calling convention" as do_div:
  * - divide (n << 32) by base
  * - put result in n
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 3847e736702e..25da5bc8d83d 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1233,8 +1233,6 @@ static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val)
 static void probe_pci_console(void)
 {
 	u8 cap, common_cap = 0, device_cap = 0;
-	/* Offset within BAR0 */
-	u32 device_offset;
 	u32 device_len;
 
 	/* Avoid recursive printk into here. */
@@ -1258,24 +1256,16 @@ static void probe_pci_console(void)
 		u8 vndr = read_pci_config_byte(0, 1, 0, cap);
 		if (vndr == PCI_CAP_ID_VNDR) {
 			u8 type, bar;
-			u32 offset, length;
 
 			type = read_pci_config_byte(0, 1, 0,
 			    cap + offsetof(struct virtio_pci_cap, cfg_type));
 			bar = read_pci_config_byte(0, 1, 0,
 			    cap + offsetof(struct virtio_pci_cap, bar));
-			offset = read_pci_config(0, 1, 0,
-			    cap + offsetof(struct virtio_pci_cap, offset));
-			length = read_pci_config(0, 1, 0,
-			    cap + offsetof(struct virtio_pci_cap, length));
 
 			switch (type) {
 			case VIRTIO_PCI_CAP_DEVICE_CFG:
-				if (bar == 0) {
+				if (bar == 0)
 					device_cap = cap;
-					device_offset = offset;
-					device_len = length;
-				}
 				break;
 			case VIRTIO_PCI_CAP_PCI_CFG:
 				console_access_cap = cap;
@@ -1297,13 +1287,16 @@ static void probe_pci_console(void)
 	 * emerg_wr.  If it doesn't support VIRTIO_CONSOLE_F_EMERG_WRITE
 	 * it should ignore the access.
 	 */
+	device_len = read_pci_config(0, 1, 0,
+			device_cap + offsetof(struct virtio_pci_cap, length));
 	if (device_len < (offsetof(struct virtio_console_config, emerg_wr)
 			  + sizeof(u32))) {
 		printk(KERN_ERR "lguest: console missing emerg_wr field\n");
 		return;
 	}
 
-	console_cfg_offset = device_offset;
+	console_cfg_offset = read_pci_config(0, 1, 0,
+			device_cap + offsetof(struct virtio_pci_cap, offset));
 	printk(KERN_INFO "lguest: Console via virtio-pci emerg_wr\n");
 }
 
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 72a576752a7e..34a74131a12c 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -24,8 +24,9 @@ lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 
-obj-y += msr.o msr-reg.o msr-reg-export.o
+obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
 ifeq ($(CONFIG_X86_32),y)
         obj-y += atomic64_32.o
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 2b0ef26da0bd..bf603ebbfd8e 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -17,11 +17,11 @@
 
 /* Standard copy_to_user with segment limit checking */
 ENTRY(_copy_to_user)
-	GET_THREAD_INFO(%rax)
+	mov PER_CPU_VAR(current_task), %rax
 	movq %rdi,%rcx
 	addq %rdx,%rcx
 	jc bad_to_user
-	cmpq TI_addr_limit(%rax),%rcx
+	cmpq TASK_addr_limit(%rax),%rcx
 	ja bad_to_user
 	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
 		      "jmp copy_user_generic_string",		\
@@ -32,11 +32,11 @@ ENDPROC(_copy_to_user)
 
 /* Standard copy_from_user with segment limit checking */
 ENTRY(_copy_from_user)
-	GET_THREAD_INFO(%rax)
+	mov PER_CPU_VAR(current_task), %rax
 	movq %rsi,%rcx
 	addq %rdx,%rcx
 	jc bad_from_user
-	cmpq TI_addr_limit(%rax),%rcx
+	cmpq TASK_addr_limit(%rax),%rcx
 	ja bad_from_user
 	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
 		      "jmp copy_user_generic_string",		\
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 28a6654f0d08..b6fcb9a9ddbc 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -6,6 +6,7 @@
  */
 #include <asm/checksum.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <asm/smap.h>
 
 /**
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 46668cda4ffd..0ef5128c2de8 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -35,8 +35,8 @@
 
 	.text
 ENTRY(__get_user_1)
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	ASM_STAC
 1:	movzbl (%_ASM_AX),%edx
@@ -48,8 +48,8 @@ ENDPROC(__get_user_1)
 ENTRY(__get_user_2)
 	add $1,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	ASM_STAC
 2:	movzwl -1(%_ASM_AX),%edx
@@ -61,8 +61,8 @@ ENDPROC(__get_user_2)
 ENTRY(__get_user_4)
 	add $3,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	ASM_STAC
 3:	movl -3(%_ASM_AX),%edx
@@ -75,8 +75,8 @@ ENTRY(__get_user_8)
 #ifdef CONFIG_X86_64
 	add $7,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	ASM_STAC
 4:	movq -7(%_ASM_AX),%rdx
@@ -86,8 +86,8 @@ ENTRY(__get_user_8)
 #else
 	add $7,%_ASM_AX
 	jc bad_get_user_8
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user_8
 	ASM_STAC
 4:	movl -7(%_ASM_AX),%edx
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
new file mode 100644
index 000000000000..02de3d74d2c5
--- /dev/null
+++ b/arch/x86/lib/hweight.S
@@ -0,0 +1,77 @@
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+
+/*
+ * unsigned int __sw_hweight32(unsigned int w)
+ * %rdi: w
+ */
+ENTRY(__sw_hweight32)
+
+#ifdef CONFIG_X86_64
+	movl %edi, %eax				# w
+#endif
+	__ASM_SIZE(push,) %__ASM_REG(dx)
+	movl %eax, %edx				# w -> t
+	shrl %edx				# t >>= 1
+	andl $0x55555555, %edx			# t &= 0x55555555
+	subl %edx, %eax				# w -= t
+
+	movl %eax, %edx				# w -> t
+	shrl $2, %eax				# w_tmp >>= 2
+	andl $0x33333333, %edx			# t	&= 0x33333333
+	andl $0x33333333, %eax			# w_tmp &= 0x33333333
+	addl %edx, %eax				# w = w_tmp + t
+
+	movl %eax, %edx				# w -> t
+	shrl $4, %edx				# t >>= 4
+	addl %edx, %eax				# w_tmp += t
+	andl  $0x0f0f0f0f, %eax			# w_tmp &= 0x0f0f0f0f
+	imull $0x01010101, %eax, %eax		# w_tmp *= 0x01010101
+	shrl $24, %eax				# w = w_tmp >> 24
+	__ASM_SIZE(pop,) %__ASM_REG(dx)
+	ret
+ENDPROC(__sw_hweight32)
+
+ENTRY(__sw_hweight64)
+#ifdef CONFIG_X86_64
+	pushq   %rdx
+
+	movq    %rdi, %rdx                      # w -> t
+	movabsq $0x5555555555555555, %rax
+	shrq    %rdx                            # t >>= 1
+	andq    %rdx, %rax                      # t &= 0x5555555555555555
+	movabsq $0x3333333333333333, %rdx
+	subq    %rax, %rdi                      # w -= t
+
+	movq    %rdi, %rax                      # w -> t
+	shrq    $2, %rdi                        # w_tmp >>= 2
+	andq    %rdx, %rax                      # t     &= 0x3333333333333333
+	andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
+	addq    %rdx, %rax                      # w = w_tmp + t
+
+	movq    %rax, %rdx                      # w -> t
+	shrq    $4, %rdx                        # t >>= 4
+	addq    %rdx, %rax                      # w_tmp += t
+	movabsq $0x0f0f0f0f0f0f0f0f, %rdx
+	andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
+	movabsq $0x0101010101010101, %rdx
+	imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101
+	shrq    $56, %rax                       # w = w_tmp >> 56
+
+	popq    %rdx
+	ret
+#else /* CONFIG_X86_32 */
+	/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
+	pushl   %ecx
+
+	call    __sw_hweight32
+	movl    %eax, %ecx                      # stash away result
+	movl    %edx, %eax                      # second part of input
+	call    __sw_hweight32
+	addl    %ecx, %eax                      # result
+
+	popl    %ecx
+	ret
+#endif
+ENDPROC(__sw_hweight64)
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 1a416935bac9..1088eb8f3a5f 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -155,14 +155,24 @@ found:
 			/*
 			 * In 32-bits mode, if the [7:6] bits (mod bits of
 			 * ModRM) on the second byte are not 11b, it is
-			 * LDS or LES.
+			 * LDS or LES or BOUND.
 			 */
 			if (X86_MODRM_MOD(b2) != 3)
 				goto vex_end;
 		}
 		insn->vex_prefix.bytes[0] = b;
 		insn->vex_prefix.bytes[1] = b2;
-		if (inat_is_vex3_prefix(attr)) {
+		if (inat_is_evex_prefix(attr)) {
+			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+			insn->vex_prefix.bytes[2] = b2;
+			b2 = peek_nbyte_next(insn_byte_t, insn, 3);
+			insn->vex_prefix.bytes[3] = b2;
+			insn->vex_prefix.nbytes = 4;
+			insn->next_byte += 4;
+			if (insn->x86_64 && X86_VEX_W(b2))
+				/* VEX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		} else if (inat_is_vex3_prefix(attr)) {
 			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
 			insn->vex_prefix.bytes[2] = b2;
 			insn->vex_prefix.nbytes = 3;
@@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn)
 		m = insn_vex_m_bits(insn);
 		p = insn_vex_p_bits(insn);
 		insn->attr = inat_get_avx_attribute(op, m, p);
-		if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
+		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+		    (!inat_accept_vex(insn->attr) &&
+		     !inat_is_group(insn->attr)))
 			insn->attr = 0;	/* This instruction is bad */
 		goto end;	/* VEX has only 1 byte for opcode */
 	}
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
new file mode 100644
index 000000000000..f7dfeda83e5c
--- /dev/null
+++ b/arch/x86/lib/kaslr.c
@@ -0,0 +1,90 @@
+/*
+ * Entropy functions used on early boot for KASLR base and memory
+ * randomization. The base randomization is done in the compressed
+ * kernel and memory randomization is done early when the regular
+ * kernel starts. This file is included in the compressed kernel and
+ * normally linked in the regular.
+ */
+#include <asm/kaslr.h>
+#include <asm/msr.h>
+#include <asm/archrandom.h>
+#include <asm/e820.h>
+#include <asm/io.h>
+
+/*
+ * When built for the regular kernel, several functions need to be stubbed out
+ * or changed to their regular kernel equivalent.
+ */
+#ifndef KASLR_COMPRESSED_BOOT
+#include <asm/cpufeature.h>
+#include <asm/setup.h>
+
+#define debug_putstr(v) early_printk(v)
+#define has_cpuflag(f) boot_cpu_has(f)
+#define get_boot_seed() kaslr_offset()
+#endif
+
+#define I8254_PORT_CONTROL	0x43
+#define I8254_PORT_COUNTER0	0x40
+#define I8254_CMD_READBACK	0xC0
+#define I8254_SELECT_COUNTER0	0x02
+#define I8254_STATUS_NOTREADY	0x40
+static inline u16 i8254(void)
+{
+	u16 status, timer;
+
+	do {
+		outb(I8254_PORT_CONTROL,
+		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
+		status = inb(I8254_PORT_COUNTER0);
+		timer  = inb(I8254_PORT_COUNTER0);
+		timer |= inb(I8254_PORT_COUNTER0) << 8;
+	} while (status & I8254_STATUS_NOTREADY);
+
+	return timer;
+}
+
+unsigned long kaslr_get_random_long(const char *purpose)
+{
+#ifdef CONFIG_X86_64
+	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
+#else
+	const unsigned long mix_const = 0x3f39e593UL;
+#endif
+	unsigned long raw, random = get_boot_seed();
+	bool use_i8254 = true;
+
+	debug_putstr(purpose);
+	debug_putstr(" KASLR using");
+
+	if (has_cpuflag(X86_FEATURE_RDRAND)) {
+		debug_putstr(" RDRAND");
+		if (rdrand_long(&raw)) {
+			random ^= raw;
+			use_i8254 = false;
+		}
+	}
+
+	if (has_cpuflag(X86_FEATURE_TSC)) {
+		debug_putstr(" RDTSC");
+		raw = rdtsc();
+
+		random ^= raw;
+		use_i8254 = false;
+	}
+
+	if (use_i8254) {
+		debug_putstr(" i8254");
+		random ^= i8254();
+	}
+
+	/* Circular multiply for better bit diffusion */
+	asm("mul %3"
+	    : "=a" (random), "=d" (raw)
+	    : "a" (random), "rm" (mix_const));
+	random += raw;
+
+	debug_putstr("...\n");
+
+	return random;
+}
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index e0817a12d323..c891ece81e5b 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -29,14 +29,14 @@
  * as they get called from within inline assembly.
  */
 
-#define ENTER	GET_THREAD_INFO(%_ASM_BX)
+#define ENTER	mov PER_CPU_VAR(current_task), %_ASM_BX
 #define EXIT	ASM_CLAC ;	\
 		ret
 
 .text
 ENTRY(__put_user_1)
 	ENTER
-	cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
+	cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
 	jae bad_put_user
 	ASM_STAC
 1:	movb %al,(%_ASM_CX)
@@ -46,7 +46,7 @@ ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
 	ENTER
-	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $1,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
@@ -58,7 +58,7 @@ ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	ENTER
-	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $3,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
@@ -70,7 +70,7 @@ ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 	ENTER
-	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $7,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0a42327a59d7..9f760cdcaf40 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -6,7 +6,7 @@
  * Copyright 2002 Andi Kleen <ak@suse.de>
  */
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Zero Userspace
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index d388de72eaca..ec378cd7b71e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -13,12 +13,17 @@
 # opcode: escape # escaped-name
 # EndTable
 #
+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
+# mnemonics that begin with lowercase 'k' accept a VEX prefix
+#
 #<group maps>
 # GrpTable: GrpXXX
 # reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
 # EndTable
 #
 # AVX Superscripts
+#  (ev): this opcode requires EVEX prefix.
+#  (evo): this opcode is changed by EVEX prefix (EVEX opcode)
 #  (v): this opcode requires VEX prefix.
 #  (v1): this opcode only supports 128bit VEX.
 #
@@ -137,7 +142,7 @@ AVXcode:
 # 0x60 - 0x6f
 60: PUSHA/PUSHAD (i64)
 61: POPA/POPAD (i64)
-62: BOUND Gv,Ma (i64)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix)
 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
 64: SEG=FS (Prefix)
 65: SEG=GS (Prefix)
@@ -399,17 +404,17 @@ AVXcode: 1
 3f:
 # 0x0f 0x40-0x4f
 40: CMOVO Gv,Ev
-41: CMOVNO Gv,Ev
-42: CMOVB/C/NAE Gv,Ev
+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
 43: CMOVAE/NB/NC Gv,Ev
-44: CMOVE/Z Gv,Ev
-45: CMOVNE/NZ Gv,Ev
-46: CMOVBE/NA Gv,Ev
-47: CMOVA/NBE Gv,Ev
+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
 48: CMOVS Gv,Ev
 49: CMOVNS Gv,Ev
-4a: CMOVP/PE Gv,Ev
-4b: CMOVNP/PO Gv,Ev
+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
 4c: CMOVL/NGE Gv,Ev
 4d: CMOVNL/GE Gv,Ev
 4e: CMOVLE/NG Gv,Ev
@@ -426,7 +431,7 @@ AVXcode: 1
 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
-5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
@@ -447,7 +452,7 @@ AVXcode: 1
 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
-6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
 # 0x0f 0x70-0x7f
 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
 71: Grp12 (1A)
@@ -458,14 +463,14 @@ AVXcode: 1
 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
 # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
 77: emms | vzeroupper | vzeroall
-78: VMREAD Ey,Gy
-79: VMWRITE Gy,Ey
-7a:
-7b:
+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
-7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
 # 0x0f 0x80-0x8f
 # Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 80: JO Jz (f64)
@@ -485,16 +490,16 @@ AVXcode: 1
 8e: JLE/JNG Jz (f64)
 8f: JNLE/JG Jz (f64)
 # 0x0f 0x90-0x9f
-90: SETO Eb
-91: SETNO Eb
-92: SETB/C/NAE Eb
-93: SETAE/NB/NC Eb
+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
 94: SETE/Z Eb
 95: SETNE/NZ Eb
 96: SETBE/NA Eb
 97: SETA/NBE Eb
-98: SETS Eb
-99: SETNS Eb
+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
 9a: SETP/PE Eb
 9b: SETNP/PO Eb
 9c: SETL/NGE Eb
@@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
 d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
 d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
 da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
-db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
 dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
 dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
 de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
-df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0xe0-0xef
 e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
 e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
@@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
 e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
 e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
 e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
-e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
 e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
 e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
 e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
 ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
-eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
 ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
 ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
 ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
-ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0xf0-0xff
 f0: vlddqu Vx,Mx (F2)
 f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
@@ -626,81 +631,105 @@ AVXcode: 2
 0e: vtestps Vx,Wx (66),(v)
 0f: vtestpd Vx,Wx (66),(v)
 # 0x0f 0x38 0x10-0x1f
-10: pblendvb Vdq,Wdq (66)
-11:
-12:
-13: vcvtph2ps Vx,Wx,Ib (66),(v)
-14: blendvps Vdq,Wdq (66)
-15: blendvpd Vdq,Wdq (66)
-16: vpermps Vqq,Hqq,Wqq (66),(v)
+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
 17: vptest Vx,Wx (66)
 18: vbroadcastss Vx,Wd (66),(v)
-19: vbroadcastsd Vqq,Wq (66),(v)
-1a: vbroadcastf128 Vqq,Mdq (66),(v)
-1b:
+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
-1f:
+1f: vpabsq Vx,Wx (66),(ev)
 # 0x0f 0x38 0x20-0x2f
-20: vpmovsxbw Vx,Ux/Mq (66),(v1)
-21: vpmovsxbd Vx,Ux/Md (66),(v1)
-22: vpmovsxbq Vx,Ux/Mw (66),(v1)
-23: vpmovsxwd Vx,Ux/Mq (66),(v1)
-24: vpmovsxwq Vx,Ux/Md (66),(v1)
-25: vpmovsxdq Vx,Ux/Mq (66),(v1)
-26:
-27:
-28: vpmuldq Vx,Hx,Wx (66),(v1)
-29: vpcmpeqq Vx,Hx,Wx (66),(v1)
-2a: vmovntdqa Vx,Mx (66),(v1)
+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
 2b: vpackusdw Vx,Hx,Wx (66),(v1)
-2c: vmaskmovps Vx,Hx,Mx (66),(v)
-2d: vmaskmovpd Vx,Hx,Mx (66),(v)
+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
 2e: vmaskmovps Mx,Hx,Vx (66),(v)
 2f: vmaskmovpd Mx,Hx,Vx (66),(v)
 # 0x0f 0x38 0x30-0x3f
-30: vpmovzxbw Vx,Ux/Mq (66),(v1)
-31: vpmovzxbd Vx,Ux/Md (66),(v1)
-32: vpmovzxbq Vx,Ux/Mw (66),(v1)
-33: vpmovzxwd Vx,Ux/Mq (66),(v1)
-34: vpmovzxwq Vx,Ux/Md (66),(v1)
-35: vpmovzxdq Vx,Ux/Mq (66),(v1)
-36: vpermd Vqq,Hqq,Wqq (66),(v)
+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
 37: vpcmpgtq Vx,Hx,Wx (66),(v1)
-38: vpminsb Vx,Hx,Wx (66),(v1)
-39: vpminsd Vx,Hx,Wx (66),(v1)
-3a: vpminuw Vx,Hx,Wx (66),(v1)
-3b: vpminud Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
 3c: vpmaxsb Vx,Hx,Wx (66),(v1)
-3d: vpmaxsd Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
 3e: vpmaxuw Vx,Hx,Wx (66),(v1)
-3f: vpmaxud Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0x38 0x40-0x8f
-40: vpmulld Vx,Hx,Wx (66),(v1)
+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
 41: vphminposuw Vdq,Wdq (66),(v1)
-42:
-43:
-44:
+42: vgetexpps/d Vx,Wx (66),(ev)
+43: vgetexpss/d Vx,Hx,Wx (66),(ev)
+44: vplzcntd/q Vx,Wx (66),(ev)
 45: vpsrlvd/q Vx,Hx,Wx (66),(v)
-46: vpsravd Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
 47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x57
+# Skip 0x48-0x4b
+4c: vrcp14ps/d Vpd,Wpd (66),(ev)
+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
+# Skip 0x50-0x57
 58: vpbroadcastd Vx,Wx (66),(v)
-59: vpbroadcastq Vx,Wx (66),(v)
-5a: vbroadcasti128 Vqq,Mdq (66),(v)
-# Skip 0x5b-0x77
+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
+# Skip 0x5c-0x63
+64: vpblendmd/q Vx,Hx,Wx (66),(ev)
+65: vblendmps/d Vx,Hx,Wx (66),(ev)
+66: vpblendmb/w Vx,Hx,Wx (66),(ev)
+# Skip 0x67-0x74
+75: vpermi2b/w Vx,Hx,Wx (66),(ev)
+76: vpermi2d/q Vx,Hx,Wx (66),(ev)
+77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
 78: vpbroadcastb Vx,Wx (66),(v)
 79: vpbroadcastw Vx,Wx (66),(v)
-# Skip 0x7a-0x7f
+7a: vpbroadcastb Vx,Rv (66),(ev)
+7b: vpbroadcastw Vx,Rv (66),(ev)
+7c: vpbroadcastd/q Vx,Rv (66),(ev)
+7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
+7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
 81: INVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
+83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+88: vexpandps/d Vpd,Wpd (66),(ev)
+89: vpexpandd/q Vx,Wx (66),(ev)
+8a: vcompressps/d Wx,Vx (66),(ev)
+8b: vpcompressd/q Wx,Vx (66),(ev)
 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8d: vpermb/w Vx,Hx,Wx (66),(ev)
 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
 # 0x0f 0x38 0x90-0xbf (FMA)
-90: vgatherdd/q Vx,Hx,Wx (66),(v)
-91: vgatherqd/q Vx,Hx,Wx (66),(v)
+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
 92: vgatherdps/d Vx,Hx,Wx (66),(v)
 93: vgatherqps/d Vx,Hx,Wx (66),(v)
 94:
@@ -715,6 +744,10 @@ AVXcode: 2
 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a0: vpscatterdd/q Wx,Vx (66),(ev)
+a1: vpscatterqd/q Wx,Vx (66),(ev)
+a2: vscatterdps/d Wx,Vx (66),(ev)
+a3: vscatterqps/d Wx,Vx (66),(ev)
 a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
 a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
 a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
@@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
 ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
 ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
 af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
+b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
 b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
 b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
 b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
@@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
 be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
 bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
 # 0x0f 0x38 0xc0-0xff
-c8: sha1nexte Vdq,Wdq
+c4: vpconflictd/q Vx,Wx (66),(ev)
+c6: Grp18 (1A)
+c7: Grp19 (1A)
+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
 c9: sha1msg1 Vdq,Wdq
-ca: sha1msg2 Vdq,Wdq
-cb: sha256rnds2 Vdq,Wdq
-cc: sha256msg1 Vdq,Wdq
-cd: sha256msg2 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
 db: VAESIMC Vdq,Wdq (66),(v1)
 dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
 dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@@ -763,15 +801,15 @@ AVXcode: 3
 00: vpermq Vqq,Wqq,Ib (66),(v)
 01: vpermpd Vqq,Wqq,Ib (66),(v)
 02: vpblendd Vx,Hx,Wx,Ib (66),(v)
-03:
+03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
 04: vpermilps Vx,Wx,Ib (66),(v)
 05: vpermilpd Vx,Wx,Ib (66),(v)
 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
 07:
-08: vroundps Vx,Wx,Ib (66)
-09: vroundpd Vx,Wx,Ib (66)
-0a: vroundss Vss,Wss,Ib (66),(v1)
-0b: vroundsd Vsd,Wsd,Ib (66),(v1)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
 0c: vblendps Vx,Hx,Wx,Ib (66)
 0d: vblendpd Vx,Hx,Wx,Ib (66)
 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
@@ -780,26 +818,51 @@ AVXcode: 3
 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
 16: vpextrd/q Ey,Vdq,Ib (66),(v1)
 17: vextractps Ed,Vdq,Ib (66),(v1)
-18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
-19: vextractf128 Wdq,Vqq,Ib (66),(v)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
 1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
-38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
-39: vextracti128 Wdq,Vqq,Ib (66),(v)
+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+30: kshiftrb/w Vk,Uk,Ib (66),(v)
+31: kshiftrd/q Vk,Uk,Ib (66),(v)
+32: kshiftlb/w Vk,Uk,Ib (66),(v)
+33: kshiftld/q Vk,Uk,Ib (66),(v)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
 40: vdpps Vx,Hx,Wx,Ib (66)
 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
-42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
 4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
 f0: RORX Gy,Ey,Ib (F2),(v)
@@ -927,8 +990,10 @@ GrpTable: Grp12
 EndTable
 
 GrpTable: Grp13
+0: vprord/q Hx,Wx,Ib (66),(ev)
+1: vprold/q Hx,Wx,Ib (66),(ev)
 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
-4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
 EndTable
 
@@ -963,6 +1028,20 @@ GrpTable: Grp17
 3: BLSI By,Ey (v)
 EndTable
 
+GrpTable: Grp18
+1: vgatherpf0dps/d Wx (66),(ev)
+2: vgatherpf1dps/d Wx (66),(ev)
+5: vscatterpf0dps/d Wx (66),(ev)
+6: vscatterpf1dps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp19
+1: vgatherpf0qps/d Wx (66),(ev)
+2: vgatherpf1qps/d Wx (66),(ev)
+5: vscatterpf0qps/d Wx (66),(ev)
+6: vscatterpf1qps/d Wx (66),(ev)
+EndTable
+
 # AMD's Prefetch Group
 GrpTable: GrpP
 0: PREFETCH
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 62c0043a5fd5..96d2b847e09e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -37,4 +37,5 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
 
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 99bfb192803f..9a17250bcbe0 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -72,9 +72,9 @@ static struct addr_marker address_markers[] = {
 	{ 0, "User Space" },
 #ifdef CONFIG_X86_64
 	{ 0x8000000000000000UL, "Kernel Space" },
-	{ PAGE_OFFSET,		"Low Kernel Mapping" },
-	{ VMALLOC_START,        "vmalloc() Area" },
-	{ VMEMMAP_START,        "Vmemmap" },
+	{ 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
+	{ 0/* VMALLOC_START */, "vmalloc() Area" },
+	{ 0/* VMEMMAP_START */, "Vmemmap" },
 # ifdef CONFIG_X86_ESPFIX64
 	{ ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
 # endif
@@ -434,8 +434,16 @@ void ptdump_walk_pgd_level_checkwx(void)
 
 static int __init pt_dump_init(void)
 {
+	/*
+	 * Various markers are not compile-time constants, so assign them
+	 * here.
+	 */
+#ifdef CONFIG_X86_64
+	address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
+	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
+	address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+#endif
 #ifdef CONFIG_X86_32
-	/* Not a compile-time constant on x86-32 */
 	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
 	address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
 # ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 4bb53b89f3c5..832b98f822be 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
 #include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/traps.h>
+#include <asm/kdebug.h>
 
 typedef bool (*ex_handler_t)(const struct exception_table_entry *,
 			    struct pt_regs *, int);
@@ -37,7 +38,7 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
 		   struct pt_regs *regs, int trapnr)
 {
 	/* Special hack for uaccess_err */
-	current_thread_info()->uaccess_err = 1;
+	current->thread.uaccess_err = 1;
 	regs->ip = ex_fixup_addr(fixup);
 	return true;
 }
@@ -46,8 +47,9 @@ EXPORT_SYMBOL(ex_handler_ext);
 bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
 			     struct pt_regs *regs, int trapnr)
 {
-	WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n",
-		  (unsigned int)regs->cx);
+	if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
+			 (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
+		show_stack_regs(regs);
 
 	/* Pretend that the read succeeded and returned 0. */
 	regs->ip = ex_fixup_addr(fixup);
@@ -60,9 +62,10 @@ EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
 bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
 			     struct pt_regs *regs, int trapnr)
 {
-	WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
-		  (unsigned int)regs->cx,
-		  (unsigned int)regs->dx, (unsigned int)regs->ax);
+	if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
+			 (unsigned int)regs->cx, (unsigned int)regs->dx,
+			 (unsigned int)regs->ax,  regs->ip, (void *)regs->ip))
+		show_stack_regs(regs);
 
 	/* Pretend that the write succeeded. */
 	regs->ip = ex_fixup_addr(fixup);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d1fa7cd2374..dc8023060456 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -439,7 +439,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	 * happen within a race in page table update. In the later
 	 * case just flush:
 	 */
-	pgd = pgd_offset(current->active_mm, address);
+	pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
 	pgd_ref = pgd_offset_k(address);
 	if (pgd_none(*pgd_ref))
 		return -1;
@@ -737,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		 * In this case we need to make sure we're not recursively
 		 * faulting through the emulate_vsyscall() logic.
 		 */
-		if (current_thread_info()->sig_on_uaccess_error && signal) {
+		if (current->thread.sig_on_uaccess_err && signal) {
 			tsk->thread.trap_nr = X86_TRAP_PF;
 			tsk->thread.error_code = error_code | PF_USER;
 			tsk->thread.cr2 = address;
@@ -1353,7 +1353,7 @@ good_area:
 	 * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
 	 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 	major |= fault & VM_FAULT_MAJOR;
 
 	/*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 372aad2b3291..cc82830bc8c4 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -17,6 +17,7 @@
 #include <asm/proto.h>
 #include <asm/dma.h>		/* for MAX_DMA_PFN */
 #include <asm/microcode.h>
+#include <asm/kaslr.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -590,6 +591,9 @@ void __init init_mem_mapping(void)
 	/* the ISA range is always mapped regardless of memory holes */
 	init_memory_mapping(0, ISA_END_ADDRESS);
 
+	/* Init the trampoline, possibly with KASLR memory offset */
+	init_trampoline();
+
 	/*
 	 * If the allocation is in bottom-up direction, we setup direct mapping
 	 * in bottom-up, otherwise we setup direct mapping in top-down.
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bce2e5d9edd4..53cc2256cf23 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -328,22 +328,30 @@ void __init cleanup_highmap(void)
 	}
 }
 
+/*
+ * Create PTE level page table mapping for physical addresses.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
+phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
 	      pgprot_t prot)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
+	pte_t *pte;
 	int i;
 
-	pte_t *pte = pte_page + pte_index(addr);
+	pte = pte_page + pte_index(paddr);
+	i = pte_index(paddr);
 
-	for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
-		next = (addr & PAGE_MASK) + PAGE_SIZE;
-		if (addr >= end) {
+	for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
+		paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pte(pte, __pte(0));
 			continue;
 		}
@@ -354,54 +362,61 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
 		 * pagetable pages as RO. So assume someone who pre-setup
 		 * these mappings are more intelligent.
 		 */
-		if (pte_val(*pte)) {
+		if (!pte_none(*pte)) {
 			if (!after_bootmem)
 				pages++;
 			continue;
 		}
 
 		if (0)
-			printk("   pte=%p addr=%lx pte=%016lx\n",
-			       pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
+			pr_info("   pte=%p addr=%lx pte=%016lx\n", pte, paddr,
+				pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
 		pages++;
-		set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
-		last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
+		set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+		paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
 	}
 
 	update_page_count(PG_LEVEL_4K, pages);
 
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create PMD level page table mapping for physical addresses. The virtual
+ * and physical address have to be aligned at this level.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
+phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 	      unsigned long page_size_mask, pgprot_t prot)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
 
-	int i = pmd_index(address);
+	int i = pmd_index(paddr);
 
-	for (; i < PTRS_PER_PMD; i++, address = next) {
-		pmd_t *pmd = pmd_page + pmd_index(address);
+	for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
+		pmd_t *pmd = pmd_page + pmd_index(paddr);
 		pte_t *pte;
 		pgprot_t new_prot = prot;
 
-		next = (address & PMD_MASK) + PMD_SIZE;
-		if (address >= end) {
+		paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PMD_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PMD_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pmd(pmd, __pmd(0));
 			continue;
 		}
 
-		if (pmd_val(*pmd)) {
+		if (!pmd_none(*pmd)) {
 			if (!pmd_large(*pmd)) {
 				spin_lock(&init_mm.page_table_lock);
 				pte = (pte_t *)pmd_page_vaddr(*pmd);
-				last_map_addr = phys_pte_init(pte, address,
-								end, prot);
+				paddr_last = phys_pte_init(pte, paddr,
+							   paddr_end, prot);
 				spin_unlock(&init_mm.page_table_lock);
 				continue;
 			}
@@ -420,7 +435,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			if (page_size_mask & (1 << PG_LEVEL_2M)) {
 				if (!after_bootmem)
 					pages++;
-				last_map_addr = next;
+				paddr_last = paddr_next;
 				continue;
 			}
 			new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
@@ -430,51 +445,65 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
 			set_pte((pte_t *)pmd,
-				pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
+				pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
 					__pgprot(pgprot_val(prot) | _PAGE_PSE)));
 			spin_unlock(&init_mm.page_table_lock);
-			last_map_addr = next;
+			paddr_last = paddr_next;
 			continue;
 		}
 
 		pte = alloc_low_page();
-		last_map_addr = phys_pte_init(pte, address, end, new_prot);
+		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
 
 		spin_lock(&init_mm.page_table_lock);
 		pmd_populate_kernel(&init_mm, pmd, pte);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 	update_page_count(PG_LEVEL_2M, pages);
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create PUD level page table mapping for physical addresses. The virtual
+ * and physical address do not have to be aligned at this level. KASLR can
+ * randomize virtual addresses up to this level.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
-			 unsigned long page_size_mask)
+phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
+	      unsigned long page_size_mask)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
-	int i = pud_index(addr);
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
+	unsigned long vaddr = (unsigned long)__va(paddr);
+	int i = pud_index(vaddr);
 
-	for (; i < PTRS_PER_PUD; i++, addr = next) {
-		pud_t *pud = pud_page + pud_index(addr);
+	for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
+		pud_t *pud;
 		pmd_t *pmd;
 		pgprot_t prot = PAGE_KERNEL;
 
-		next = (addr & PUD_MASK) + PUD_SIZE;
-		if (addr >= end) {
+		vaddr = (unsigned long)__va(paddr);
+		pud = pud_page + pud_index(vaddr);
+		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
+
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PUD_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PUD_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pud(pud, __pud(0));
 			continue;
 		}
 
-		if (pud_val(*pud)) {
+		if (!pud_none(*pud)) {
 			if (!pud_large(*pud)) {
 				pmd = pmd_offset(pud, 0);
-				last_map_addr = phys_pmd_init(pmd, addr, end,
-							 page_size_mask, prot);
+				paddr_last = phys_pmd_init(pmd, paddr,
+							   paddr_end,
+							   page_size_mask,
+							   prot);
 				__flush_tlb_all();
 				continue;
 			}
@@ -493,7 +522,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			if (page_size_mask & (1 << PG_LEVEL_1G)) {
 				if (!after_bootmem)
 					pages++;
-				last_map_addr = next;
+				paddr_last = paddr_next;
 				continue;
 			}
 			prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -503,16 +532,16 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
 			set_pte((pte_t *)pud,
-				pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
+				pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
 					PAGE_KERNEL_LARGE));
 			spin_unlock(&init_mm.page_table_lock);
-			last_map_addr = next;
+			paddr_last = paddr_next;
 			continue;
 		}
 
 		pmd = alloc_low_page();
-		last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
-					      prot);
+		paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
+					   page_size_mask, prot);
 
 		spin_lock(&init_mm.page_table_lock);
 		pud_populate(&init_mm, pud, pmd);
@@ -522,38 +551,44 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 
 	update_page_count(PG_LEVEL_1G, pages);
 
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. The virtual and physical addresses have to be aligned on PMD level
+ * down. It returns the last physical address mapped.
+ */
 unsigned long __meminit
-kernel_physical_mapping_init(unsigned long start,
-			     unsigned long end,
+kernel_physical_mapping_init(unsigned long paddr_start,
+			     unsigned long paddr_end,
 			     unsigned long page_size_mask)
 {
 	bool pgd_changed = false;
-	unsigned long next, last_map_addr = end;
-	unsigned long addr;
+	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
 
-	start = (unsigned long)__va(start);
-	end = (unsigned long)__va(end);
-	addr = start;
+	paddr_last = paddr_end;
+	vaddr = (unsigned long)__va(paddr_start);
+	vaddr_end = (unsigned long)__va(paddr_end);
+	vaddr_start = vaddr;
 
-	for (; start < end; start = next) {
-		pgd_t *pgd = pgd_offset_k(start);
+	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+		pgd_t *pgd = pgd_offset_k(vaddr);
 		pud_t *pud;
 
-		next = (start & PGDIR_MASK) + PGDIR_SIZE;
+		vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
 
 		if (pgd_val(*pgd)) {
 			pud = (pud_t *)pgd_page_vaddr(*pgd);
-			last_map_addr = phys_pud_init(pud, __pa(start),
-						 __pa(end), page_size_mask);
+			paddr_last = phys_pud_init(pud, __pa(vaddr),
+						   __pa(vaddr_end),
+						   page_size_mask);
 			continue;
 		}
 
 		pud = alloc_low_page();
-		last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
-						 page_size_mask);
+		paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
+					   page_size_mask);
 
 		spin_lock(&init_mm.page_table_lock);
 		pgd_populate(&init_mm, pgd, pud);
@@ -562,11 +597,11 @@ kernel_physical_mapping_init(unsigned long start,
 	}
 
 	if (pgd_changed)
-		sync_global_pgds(addr, end - 1, 0);
+		sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
 
 	__flush_tlb_all();
 
-	return last_map_addr;
+	return paddr_last;
 }
 
 #ifndef CONFIG_NUMA
@@ -673,7 +708,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
 
 	for (i = 0; i < PTRS_PER_PTE; i++) {
 		pte = pte_start + i;
-		if (pte_val(*pte))
+		if (!pte_none(*pte))
 			return;
 	}
 
@@ -691,7 +726,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 
 	for (i = 0; i < PTRS_PER_PMD; i++) {
 		pmd = pmd_start + i;
-		if (pmd_val(*pmd))
+		if (!pmd_none(*pmd))
 			return;
 	}
 
@@ -702,27 +737,6 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 	spin_unlock(&init_mm.page_table_lock);
 }
 
-/* Return true if pgd is changed, otherwise return false. */
-static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
-{
-	pud_t *pud;
-	int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++) {
-		pud = pud_start + i;
-		if (pud_val(*pud))
-			return false;
-	}
-
-	/* free a pud table */
-	free_pagetable(pgd_page(*pgd), 0);
-	spin_lock(&init_mm.page_table_lock);
-	pgd_clear(pgd);
-	spin_unlock(&init_mm.page_table_lock);
-
-	return true;
-}
-
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 		 bool direct)
@@ -913,7 +927,6 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 	unsigned long addr;
 	pgd_t *pgd;
 	pud_t *pud;
-	bool pgd_changed = false;
 
 	for (addr = start; addr < end; addr = next) {
 		next = pgd_addr_end(addr, end);
@@ -924,13 +937,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 
 		pud = (pud_t *)pgd_page_vaddr(*pgd);
 		remove_pud_table(pud, addr, next, direct);
-		if (free_pud_table(pud, pgd))
-			pgd_changed = true;
 	}
 
-	if (pgd_changed)
-		sync_global_pgds(start, end - 1, 1);
-
 	flush_tlb_all();
 }
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 1b1110fa0057..0493c17b8a51 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -54,8 +54,8 @@ static int kasan_die_handler(struct notifier_block *self,
 			     void *data)
 {
 	if (val == DIE_GPF) {
-		pr_emerg("CONFIG_KASAN_INLINE enabled");
-		pr_emerg("GPF could be caused by NULL-ptr deref or user memory access");
+		pr_emerg("CONFIG_KASAN_INLINE enabled\n");
+		pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
 	}
 	return NOTIFY_OK;
 }
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
new file mode 100644
index 000000000000..26dccd6c0df1
--- /dev/null
+++ b/arch/x86/mm/kaslr.c
@@ -0,0 +1,172 @@
+/*
+ * This file implements KASLR memory randomization for x86_64. It randomizes
+ * the virtual address space of kernel memory regions (physical memory
+ * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates
+ * exploits relying on predictable kernel addresses.
+ *
+ * Entropy is generated using the KASLR early boot functions now shared in
+ * the lib directory (originally written by Kees Cook). Randomization is
+ * done on PGD & PUD page table levels to increase possible addresses. The
+ * physical memory mapping code was adapted to support PUD level virtual
+ * addresses. This implementation on the best configuration provides 30,000
+ * possible virtual addresses in average for each memory region. An additional
+ * low memory page is used to ensure each CPU can start with a PGD aligned
+ * virtual address (for realmode).
+ *
+ * The order of each memory region is not changed. The feature looks at
+ * the available space for the regions based on different configuration
+ * options and randomizes the base and space between each. The size of the
+ * physical memory mapping is the available physical memory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/random.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/kaslr.h>
+
+#include "mm_internal.h"
+
+#define TB_SHIFT 40
+
+/*
+ * Virtual address start and end range for randomization. The end changes base
+ * on configuration to have the highest amount of space for randomization.
+ * It increases the possible random position for each randomized region.
+ *
+ * You need to add an if/def entry if you introduce a new memory region
+ * compatible with KASLR. Your entry must be in logical order with memory
+ * layout. For example, ESPFIX is before EFI because its virtual address is
+ * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
+ * ensure that this order is correct and won't be changed.
+ */
+static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
+static const unsigned long vaddr_end = VMEMMAP_START;
+
+/* Default values */
+unsigned long page_offset_base = __PAGE_OFFSET_BASE;
+EXPORT_SYMBOL(page_offset_base);
+unsigned long vmalloc_base = __VMALLOC_BASE;
+EXPORT_SYMBOL(vmalloc_base);
+
+/*
+ * Memory regions randomized by KASLR (except modules that use a separate logic
+ * earlier during boot). The list is ordered based on virtual addresses. This
+ * order is kept after randomization.
+ */
+static __initdata struct kaslr_memory_region {
+	unsigned long *base;
+	unsigned long size_tb;
+} kaslr_regions[] = {
+	{ &page_offset_base, 64/* Maximum */ },
+	{ &vmalloc_base, VMALLOC_SIZE_TB },
+};
+
+/* Get size in bytes used by the memory region */
+static inline unsigned long get_padding(struct kaslr_memory_region *region)
+{
+	return (region->size_tb << TB_SHIFT);
+}
+
+/*
+ * Apply no randomization if KASLR was disabled at boot or if KASAN
+ * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
+ */
+static inline bool kaslr_memory_enabled(void)
+{
+	return kaslr_enabled() && !config_enabled(CONFIG_KASAN);
+}
+
+/* Initialize base and padding for each memory region randomized with KASLR */
+void __init kernel_randomize_memory(void)
+{
+	size_t i;
+	unsigned long vaddr = vaddr_start;
+	unsigned long rand, memory_tb;
+	struct rnd_state rand_state;
+	unsigned long remain_entropy;
+
+	if (!kaslr_memory_enabled())
+		return;
+
+	/*
+	 * Update Physical memory mapping to available and
+	 * add padding if needed (especially for memory hotplug support).
+	 */
+	BUG_ON(kaslr_regions[0].base != &page_offset_base);
+	memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
+		CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
+
+	/* Adapt phyiscal memory region size based on available memory */
+	if (memory_tb < kaslr_regions[0].size_tb)
+		kaslr_regions[0].size_tb = memory_tb;
+
+	/* Calculate entropy available between regions */
+	remain_entropy = vaddr_end - vaddr_start;
+	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
+		remain_entropy -= get_padding(&kaslr_regions[i]);
+
+	prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
+
+	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
+		unsigned long entropy;
+
+		/*
+		 * Select a random virtual address using the extra entropy
+		 * available.
+		 */
+		entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
+		prandom_bytes_state(&rand_state, &rand, sizeof(rand));
+		entropy = (rand % (entropy + 1)) & PUD_MASK;
+		vaddr += entropy;
+		*kaslr_regions[i].base = vaddr;
+
+		/*
+		 * Jump the region and add a minimum padding based on
+		 * randomization alignment.
+		 */
+		vaddr += get_padding(&kaslr_regions[i]);
+		vaddr = round_up(vaddr + 1, PUD_SIZE);
+		remain_entropy -= entropy;
+	}
+}
+
+/*
+ * Create PGD aligned trampoline table to allow real mode initialization
+ * of additional CPUs. Consume only 1 low memory page.
+ */
+void __meminit init_trampoline(void)
+{
+	unsigned long paddr, paddr_next;
+	pgd_t *pgd;
+	pud_t *pud_page, *pud_page_tramp;
+	int i;
+
+	if (!kaslr_memory_enabled()) {
+		init_trampoline_default();
+		return;
+	}
+
+	pud_page_tramp = alloc_low_page();
+
+	paddr = 0;
+	pgd = pgd_offset_k((unsigned long)__va(paddr));
+	pud_page = (pud_t *) pgd_page_vaddr(*pgd);
+
+	for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
+		pud_t *pud, *pud_tramp;
+		unsigned long vaddr = (unsigned long)__va(paddr);
+
+		pud_tramp = pud_page_tramp + pud_index(paddr);
+		pud = pud_page + pud_index(vaddr);
+		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
+
+		*pud_tramp = *pud;
+	}
+
+	set_pgd(&trampoline_pgd_entry,
+		__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 9c086c57105c..968ac028c34e 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,4 +1,5 @@
 /* Common code for 32 and 64-bit NUMA */
+#include <linux/acpi.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/string.h>
@@ -15,7 +16,6 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/dma.h>
-#include <asm/acpi.h>
 #include <asm/amd_nb.h>
 
 #include "numa_internal.h"
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7a1f7bbf4105..849dc09fa4f0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void)
 
 static inline unsigned long highmap_end_pfn(void)
 {
-	return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+	/* Do not reference physical address outside the kernel. */
+	return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
 }
 
 #endif
@@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end)
 	return addr >= start && addr < end;
 }
 
+static inline int
+within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+{
+	return addr >= start && addr <= end;
+}
+
 /*
  * Flushing functions
  */
@@ -746,18 +753,6 @@ static bool try_to_free_pmd_page(pmd_t *pmd)
 	return true;
 }
 
-static bool try_to_free_pud_page(pud_t *pud)
-{
-	int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++)
-		if (!pud_none(pud[i]))
-			return false;
-
-	free_page((unsigned long)pud);
-	return true;
-}
-
 static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
 {
 	pte_t *pte = pte_offset_kernel(pmd, start);
@@ -871,16 +866,6 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
 	 */
 }
 
-static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
-{
-	pgd_t *pgd_entry = root + pgd_index(addr);
-
-	unmap_pud_range(pgd_entry, addr, end);
-
-	if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
-		pgd_clear(pgd_entry);
-}
-
 static int alloc_pte_page(pmd_t *pmd)
 {
 	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
@@ -1113,7 +1098,12 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 
 	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
 	if (ret < 0) {
-		unmap_pgd_range(cpa->pgd, addr,
+		/*
+		 * Leave the PUD page in place in case some other CPU or thread
+		 * already found it, but remove any useless entries we just
+		 * added to it.
+		 */
+		unmap_pud_range(pgd_entry, addr,
 				addr + (cpa->numpages << PAGE_SHIFT));
 		return ret;
 	}
@@ -1185,7 +1175,7 @@ repeat:
 		return __cpa_process_fault(cpa, address, primary);
 
 	old_pte = *kpte;
-	if (!pte_val(old_pte))
+	if (pte_none(old_pte))
 		return __cpa_process_fault(cpa, address, primary);
 
 	if (level == PG_LEVEL_4K) {
@@ -1316,7 +1306,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	 * to touch the high mapped kernel as well:
 	 */
 	if (!within(vaddr, (unsigned long)_text, _brk_end) &&
-	    within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+	    within_inclusive(cpa->pfn, highmap_start_pfn(),
+			     highmap_end_pfn())) {
 		unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
 					       __START_KERNEL_map - phys_base;
 		alias_cpa = *cpa;
@@ -1991,12 +1982,6 @@ out:
 	return retval;
 }
 
-void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
-			       unsigned numpages)
-{
-	unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
-}
-
 /*
  * The testcases use internal knowledge of the implementation that shouldn't
  * be exposed to the rest of the kernel. Include these directly here.
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index fb0604f11eec..db00e3e2f3dc 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -755,11 +755,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 		return 1;
 
 	while (cursor < to) {
-		if (!devmem_is_allowed(pfn)) {
-			pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
-				current->comm, from, to - 1);
+		if (!devmem_is_allowed(pfn))
 			return 0;
-		}
 		cursor += PAGE_SIZE;
 		pfn++;
 	}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 4eb287e25043..3feec5af4e67 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,7 +6,7 @@
 #include <asm/fixmap.h>
 #include <asm/mtrr.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO)
 
 #ifdef CONFIG_HIGHPTE
 #define PGALLOC_USER_GFP __GFP_HIGHMEM
@@ -18,7 +18,7 @@ gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	return (pte_t *)__get_free_page(PGALLOC_GFP);
+	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
 pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -207,9 +207,13 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
 {
 	int i;
 	bool failed = false;
+	gfp_t gfp = PGALLOC_GFP;
+
+	if (mm == &init_mm)
+		gfp &= ~__GFP_ACCOUNT;
 
 	for(i = 0; i < PREALLOCATED_PMDS; i++) {
-		pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
+		pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
 		if (!pmd)
 			failed = true;
 		if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 75cc0978d45d..e67ae0e6c59d 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -47,7 +47,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 		return;
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	if (pte_val(pteval))
+	if (!pte_none(pteval))
 		set_pte_at(&init_mm, vaddr, pte, pteval);
 	else
 		pte_clear(&init_mm, vaddr, pte);
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index b5f821881465..b1ecff460a46 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -15,8 +15,6 @@
 #include <linux/bitmap.h>
 #include <linux/module.h>
 #include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <asm/proto.h>
 #include <asm/numa.h>
@@ -24,51 +22,6 @@
 #include <asm/apic.h>
 #include <asm/uv/uv.h>
 
-int acpi_numa __initdata;
-
-static __init int setup_node(int pxm)
-{
-	return acpi_map_pxm_to_node(pxm);
-}
-
-static __init void bad_srat(void)
-{
-	printk(KERN_ERR "SRAT: SRAT not used.\n");
-	acpi_numa = -1;
-}
-
-static __init inline int srat_disabled(void)
-{
-	return acpi_numa < 0;
-}
-
-/*
- * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
- * I/O localities since SRAT does not list them.  I/O localities are
- * not supported at this point.
- */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
-	int i, j;
-
-	for (i = 0; i < slit->locality_count; i++) {
-		const int from_node = pxm_to_node(i);
-
-		if (from_node == NUMA_NO_NODE)
-			continue;
-
-		for (j = 0; j < slit->locality_count; j++) {
-			const int to_node = pxm_to_node(j);
-
-			if (to_node == NUMA_NO_NODE)
-				continue;
-
-			numa_set_distance(from_node, to_node,
-				slit->entry[slit->locality_count * i + j]);
-		}
-	}
-}
-
 /* Callback for Proximity Domain -> x2APIC mapping */
 void __init
 acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
@@ -91,7 +44,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 			 pxm, apic_id);
 		return;
 	}
-	node = setup_node(pxm);
+	node = acpi_map_pxm_to_node(pxm);
 	if (node < 0) {
 		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
 		bad_srat();
@@ -104,7 +57,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 	set_apicid_to_node(apic_id, node);
 	node_set(node, numa_nodes_parsed);
-	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
 	       pxm, apic_id, node);
 }
@@ -127,7 +79,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	pxm = pa->proximity_domain_lo;
 	if (acpi_srat_revision >= 2)
 		pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
-	node = setup_node(pxm);
+	node = acpi_map_pxm_to_node(pxm);
 	if (node < 0) {
 		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
 		bad_srat();
@@ -146,74 +98,10 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 
 	set_apicid_to_node(apic_id, node);
 	node_set(node, numa_nodes_parsed);
-	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
 	       pxm, apic_id, node);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline int save_add_info(void) {return 1;}
-#else
-static inline int save_add_info(void) {return 0;}
-#endif
-
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-int __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
-{
-	u64 start, end;
-	u32 hotpluggable;
-	int node, pxm;
-
-	if (srat_disabled())
-		goto out_err;
-	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
-		goto out_err_bad_srat;
-	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
-		goto out_err;
-	hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
-	if (hotpluggable && !save_add_info())
-		goto out_err;
-
-	start = ma->base_address;
-	end = start + ma->length;
-	pxm = ma->proximity_domain;
-	if (acpi_srat_revision <= 1)
-		pxm &= 0xff;
-
-	node = setup_node(pxm);
-	if (node < 0) {
-		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
-		goto out_err_bad_srat;
-	}
-
-	if (numa_add_memblk(node, start, end) < 0)
-		goto out_err_bad_srat;
-
-	node_set(node, numa_nodes_parsed);
-
-	pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
-		node, pxm,
-		(unsigned long long) start, (unsigned long long) end - 1,
-		hotpluggable ? " hotplug" : "",
-		ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
-
-	/* Mark hotplug range in memblock. */
-	if (hotpluggable && memblock_mark_hotplug(start, ma->length))
-		pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
-			(unsigned long long)start, (unsigned long long)end - 1);
-
-	max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
-
-	return 0;
-out_err_bad_srat:
-	bad_srat();
-out_err:
-	return -1;
-}
-
-void __init acpi_numa_arch_fixup(void) {}
-
 int __init x86_acpi_numa_init(void)
 {
 	int ret;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index b2a4e2a61f6b..3cd69832d7f4 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -396,6 +396,7 @@ int __init pci_acpi_init(void)
 		return -ENODEV;
 
 	printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+	acpi_irq_penalty_init();
 	pcibios_enable_irq = acpi_pci_irq_enable;
 	pcibios_disable_irq = acpi_pci_irq_disable;
 	x86_init.pci.init_irq = x86_init_noop;
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 8b93e634af84..5a18aedcb341 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -36,7 +36,8 @@
 #define PCIE_CAP_OFFSET	0x100
 
 /* Quirks for the listed devices */
-#define PCI_DEVICE_ID_INTEL_MRFL_MMC	0x1190
+#define PCI_DEVICE_ID_INTEL_MRFLD_MMC	0x1190
+#define PCI_DEVICE_ID_INTEL_MRFLD_HSU	0x1191
 
 /* Fixed BAR fields */
 #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD) */
@@ -224,14 +225,21 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 		/* Special treatment for IRQ0 */
 		if (dev->irq == 0) {
+			/*
+			 * Skip HS UART common registers device since it has
+			 * IRQ0 assigned and not used by the kernel.
+			 */
+			if (dev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU)
+				return -EBUSY;
 			/*
 			 * TNG has IRQ0 assigned to eMMC controller. But there
 			 * are also other devices with bogus PCI configuration
 			 * that have IRQ0 assigned. This check ensures that
-			 * eMMC gets it.
+			 * eMMC gets it. The rest of devices still could be
+			 * enabled without interrupt line being allocated.
 			 */
-			if (dev->device != PCI_DEVICE_ID_INTEL_MRFL_MMC)
-				return -EBUSY;
+			if (dev->device != PCI_DEVICE_ID_INTEL_MRFLD_MMC)
+				return 0;
 		}
 		break;
 	default:
@@ -308,14 +316,39 @@ static void pci_d3delay_fixup(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
 
-static void mrst_power_off_unused_dev(struct pci_dev *dev)
+static void mid_power_off_one_device(struct pci_dev *dev)
 {
+	u16 pmcsr;
+
+	/*
+	 * Update current state first, otherwise PCI core enforces PCI_D0 in
+	 * pci_set_power_state() for devices which status was PCI_UNKNOWN.
+	 */
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+	dev->current_state = (pci_power_t __force)(pmcsr & PCI_PM_CTRL_STATE_MASK);
+
 	pci_set_power_state(dev, PCI_D3hot);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
+
+static void mid_power_off_devices(struct pci_dev *dev)
+{
+	int id;
+
+	if (!pci_soc_mode)
+		return;
+
+	id = intel_mid_pwr_get_lss_id(dev);
+	if (id < 0)
+		return;
+
+	/*
+	 * This sets only PMCSR bits. The actual power off will happen in
+	 * arch/x86/platform/intel-mid/pwr.c.
+	 */
+	mid_power_off_one_device(dev);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, mid_power_off_devices);
 
 /*
  * Langwell devices reside at fixed offsets, don't try to move them.
diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c
index 7792aba266df..613cac7395c4 100644
--- a/arch/x86/pci/vmd.c
+++ b/arch/x86/pci/vmd.c
@@ -195,7 +195,7 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
 	vmdirq->virq = virq;
 
 	irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip,
-			    vmdirq, handle_simple_irq, vmd, NULL);
+			    vmdirq, handle_untracked_irq, vmd, NULL);
 	return 0;
 }
 
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 81c769e80614..8ff7b9355416 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -23,10 +23,9 @@
 #include <linux/seq_file.h>
 #include <linux/io.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
 
-/* Power gate status reg */
-#define PWRGT_STATUS		0x61
 /* Subsystem config/status Video processor */
 #define VED_SS_PM0		0x32
 /* Subsystem config/status ISP (Image Signal Processor) */
@@ -35,12 +34,16 @@
 #define MIO_SS_PM		0x3B
 /* Shift bits for getting status for video, isp and i/o */
 #define SSS_SHIFT		24
+
+/* Power gate status reg */
+#define PWRGT_STATUS		0x61
 /* Shift bits for getting status for graphics rendering */
 #define RENDER_POS		0
 /* Shift bits for getting status for media control */
 #define MEDIA_POS		2
 /* Shift bits for getting status for Valley View/Baytrail display */
 #define VLV_DISPLAY_POS		6
+
 /* Subsystem config/status display for Cherry Trail SOC */
 #define CHT_DSP_SSS		0x36
 /* Shift bits for getting status for display */
@@ -52,6 +55,14 @@ struct punit_device {
 	int sss_pos;
 };
 
+static const struct punit_device punit_device_tng[] = {
+	{ "DISPLAY",	CHT_DSP_SSS,	SSS_SHIFT },
+	{ "VED",	VED_SS_PM0,	SSS_SHIFT },
+	{ "ISP",	ISP_SS_PM0,	SSS_SHIFT },
+	{ "MIO",	MIO_SS_PM,	SSS_SHIFT },
+	{ NULL }
+};
+
 static const struct punit_device punit_device_byt[] = {
 	{ "GFX RENDER",	PWRGT_STATUS,	RENDER_POS },
 	{ "GFX MEDIA",	PWRGT_STATUS,	MEDIA_POS },
@@ -143,8 +154,9 @@ static void punit_dbgfs_unregister(void)
 	  (kernel_ulong_t)&drv_data }
 
 static const struct x86_cpu_id intel_punit_cpu_ids[] = {
-	ICPU(55, punit_device_byt), /* Valleyview, Bay Trail */
-	ICPU(76, punit_device_cht), /* Braswell, Cherry Trail */
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
+	ICPU(INTEL_FAM6_ATOM_MERRIFIELD1, punit_device_tng),
+	ICPU(INTEL_FAM6_ATOM_AIRMONT,	  punit_device_cht),
 	{}
 };
 
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f93545e7dc54..17c8bbd4e2f0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -98,21 +98,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-void efi_get_time(struct timespec *now)
-{
-	efi_status_t status;
-	efi_time_t eft;
-	efi_time_cap_t cap;
-
-	status = efi.get_time(&eft, &cap);
-	if (status != EFI_SUCCESS)
-		pr_err("Oops: efitime: can't read time!\n");
-
-	now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
-			     eft.minute, eft.second);
-	now->tv_nsec = 0;
-}
-
 void __init efi_find_mirror(void)
 {
 	efi_memory_desc_t *md;
@@ -978,8 +963,6 @@ static void __init __efi_enter_virtual_mode(void)
 	 * EFI mixed mode we need all of memory to be accessible when
 	 * we pass parameters to the EFI runtime services in the
 	 * thunking code.
-	 *
-	 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
 	 */
 	free_pages((unsigned long)new_memmap, pg_shift);
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 338402b91d2e..cef39b097649 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -49,9 +49,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
 	return 0;
 }
-void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
-{
-}
 
 void __init efi_map_region(efi_memory_desc_t *md)
 {
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6e7242be1c87..3e12c44f88a2 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -139,7 +139,7 @@ int __init efi_alloc_page_tables(void)
 	if (efi_enabled(EFI_OLD_MEMMAP))
 		return 0;
 
-	gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO;
+	gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
 	efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
 	if (!efi_pgd)
 		return -ENOMEM;
@@ -285,11 +285,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	return 0;
 }
 
-void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
-{
-	kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
-}
-
 static void __init __map_region(efi_memory_desc_t *md, u64 va)
 {
 	unsigned long flags = _PAGE_RW;
@@ -466,22 +461,17 @@ extern efi_status_t efi64_thunk(u32, ...);
 #define efi_thunk(f, ...)						\
 ({									\
 	efi_status_t __s;						\
-	unsigned long flags;						\
-	u32 func;							\
-									\
-	efi_sync_low_kernel_mappings();					\
-	local_irq_save(flags);						\
+	unsigned long __flags;						\
+	u32 __func;							\
 									\
-	efi_scratch.prev_cr3 = read_cr3();				\
-	write_cr3((unsigned long)efi_scratch.efi_pgt);			\
-	__flush_tlb_all();						\
+	local_irq_save(__flags);					\
+	arch_efi_call_virt_setup();					\
 									\
-	func = runtime_service32(f);					\
-	__s = efi64_thunk(func, __VA_ARGS__);			\
+	__func = runtime_service32(f);					\
+	__s = efi64_thunk(__func, __VA_ARGS__);				\
 									\
-	write_cr3(efi_scratch.prev_cr3);				\
-	__flush_tlb_all();						\
-	local_irq_restore(flags);					\
+	arch_efi_call_virt_teardown();					\
+	local_irq_restore(__flags);					\
 									\
 	__s;								\
 })
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 0ce1b1913673..fa021dfab088 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o
+obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfld.o pwr.o
 
 # SFI specific code
 ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 91ec9f8704bf..fc135bf70511 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -1,3 +1,5 @@
+# Family-Level Interface Shim (FLIS)
+obj-$(subst m,y,$(CONFIG_PINCTRL_MERRIFIELD)) += platform_mrfld_pinctrl.o
 # IPC Devices
 obj-y += platform_ipc.o
 obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o
@@ -8,14 +10,18 @@ obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o
 obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
 obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
 obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
+# SPI Devices
+obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
-obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o
 obj-$(subst m,y,$(CONFIG_INPUT_MPU3050)) += platform_mpu3050.o
 obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o
-obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o
+# I2C GPIO Expanders
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
 obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c
new file mode 100644
index 000000000000..4de8a664e6a1
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c
@@ -0,0 +1,43 @@
+/*
+ * Intel Merrifield FLIS platform device initialization file
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+
+#include <asm/intel-mid.h>
+
+#define FLIS_BASE_ADDR			0xff0c0000
+#define FLIS_LENGTH			0x8000
+
+static struct resource mrfld_pinctrl_mmio_resource = {
+	.start		= FLIS_BASE_ADDR,
+	.end		= FLIS_BASE_ADDR + FLIS_LENGTH - 1,
+	.flags		= IORESOURCE_MEM,
+};
+
+static struct platform_device mrfld_pinctrl_device = {
+	.name		= "pinctrl-merrifield",
+	.id		= PLATFORM_DEVID_NONE,
+	.resource	= &mrfld_pinctrl_mmio_resource,
+	.num_resources	= 1,
+};
+
+static int __init mrfld_pinctrl_init(void)
+{
+	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
+		return platform_device_register(&mrfld_pinctrl_device);
+
+	return -ENODEV;
+}
+arch_initcall(mrfld_pinctrl_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c
new file mode 100644
index 000000000000..429a94192671
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c
@@ -0,0 +1,99 @@
+/*
+ * PCAL9555a platform data initilization file
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *	    Dan O'Donovan <dan@emutex.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/platform_data/pca953x.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+
+#define PCAL9555A_NUM	4
+
+static struct pca953x_platform_data pcal9555a_pdata[PCAL9555A_NUM];
+static int nr;
+
+static void __init *pcal9555a_platform_data(void *info)
+{
+	struct i2c_board_info *i2c_info = info;
+	char *type = i2c_info->type;
+	struct pca953x_platform_data *pcal9555a;
+	char base_pin_name[SFI_NAME_LEN + 1];
+	char intr_pin_name[SFI_NAME_LEN + 1];
+	int gpio_base, intr;
+
+	snprintf(base_pin_name, sizeof(base_pin_name), "%s_base", type);
+	snprintf(intr_pin_name, sizeof(intr_pin_name), "%s_int", type);
+
+	gpio_base = get_gpio_by_name(base_pin_name);
+	intr = get_gpio_by_name(intr_pin_name);
+
+	/* Check if the SFI record valid */
+	if (gpio_base == -1)
+		return NULL;
+
+	if (nr >= PCAL9555A_NUM) {
+		pr_err("%s: Too many instances, only %d supported\n", __func__,
+		       PCAL9555A_NUM);
+		return NULL;
+	}
+
+	pcal9555a = &pcal9555a_pdata[nr++];
+	pcal9555a->gpio_base = gpio_base;
+
+	if (intr >= 0) {
+		i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+		pcal9555a->irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
+	} else {
+		i2c_info->irq = -1;
+		pcal9555a->irq_base = -1;
+	}
+
+	strcpy(type, "pcal9555a");
+	return pcal9555a;
+}
+
+static const struct devs_id pcal9555a_1_dev_id __initconst = {
+	.name			= "pcal9555a-1",
+	.type			= SFI_DEV_TYPE_I2C,
+	.delay			= 1,
+	.get_platform_data	= &pcal9555a_platform_data,
+};
+
+static const struct devs_id pcal9555a_2_dev_id __initconst = {
+	.name			= "pcal9555a-2",
+	.type			= SFI_DEV_TYPE_I2C,
+	.delay			= 1,
+	.get_platform_data	= &pcal9555a_platform_data,
+};
+
+static const struct devs_id pcal9555a_3_dev_id __initconst = {
+	.name			= "pcal9555a-3",
+	.type			= SFI_DEV_TYPE_I2C,
+	.delay			= 1,
+	.get_platform_data	= &pcal9555a_platform_data,
+};
+
+static const struct devs_id pcal9555a_4_dev_id __initconst = {
+	.name			= "pcal9555a-4",
+	.type			= SFI_DEV_TYPE_I2C,
+	.delay			= 1,
+	.get_platform_data	= &pcal9555a_platform_data,
+};
+
+sfi_device(pcal9555a_1_dev_id);
+sfi_device(pcal9555a_2_dev_id);
+sfi_device(pcal9555a_3_dev_id);
+sfi_device(pcal9555a_4_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
new file mode 100644
index 000000000000..30c601b399ee
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
@@ -0,0 +1,50 @@
+/*
+ * spidev platform data initilization file
+ *
+ * (C) Copyright 2014, 2016 Intel Corporation
+ * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *	    Dan O'Donovan <dan@emutex.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/spi/pxa2xx_spi.h>
+#include <linux/spi/spi.h>
+
+#include <asm/intel-mid.h>
+
+#define MRFLD_SPI_DEFAULT_DMA_BURST	8
+#define MRFLD_SPI_DEFAULT_TIMEOUT	500
+
+/* GPIO pin for spidev chipselect */
+#define MRFLD_SPIDEV_GPIO_CS		111
+
+static struct pxa2xx_spi_chip spidev_spi_chip = {
+	.dma_burst_size		= MRFLD_SPI_DEFAULT_DMA_BURST,
+	.timeout		= MRFLD_SPI_DEFAULT_TIMEOUT,
+	.gpio_cs		= MRFLD_SPIDEV_GPIO_CS,
+};
+
+static void __init *spidev_platform_data(void *info)
+{
+	struct spi_board_info *spi_info = info;
+
+	spi_info->mode = SPI_MODE_0;
+	spi_info->controller_data = &spidev_spi_chip;
+
+	return NULL;
+}
+
+static const struct devs_id spidev_dev_id __initconst = {
+	.name			= "spidev",
+	.type			= SFI_DEV_TYPE_SPI,
+	.delay			= 0,
+	.get_platform_data	= &spidev_platform_data,
+};
+
+sfi_device(spidev_dev_id);
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 90bb997ed0a2..abbf49c6e9d3 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/regulator/machine.h>
 #include <linux/scatterlist.h>
 #include <linux/sfi.h>
 #include <linux/irq.h>
@@ -144,6 +145,15 @@ static void intel_mid_arch_setup(void)
 out:
 	if (intel_mid_ops->arch_setup)
 		intel_mid_ops->arch_setup();
+
+	/*
+	 * Intel MID platforms are using explicitly defined regulators.
+	 *
+	 * Let the regulator core know that we do not have any additional
+	 * regulators left. This lets it substitute unprovided regulators with
+	 * dummy ones:
+	 */
+	regulator_has_full_constraints();
 }
 
 /* MID systems don't have i8042 controller */
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfld.c
similarity index 97%
rename from arch/x86/platform/intel-mid/mrfl.c
rename to arch/x86/platform/intel-mid/mrfld.c
index bd1adc621781..59253db41bbc 100644
--- a/arch/x86/platform/intel-mid/mrfl.c
+++ b/arch/x86/platform/intel-mid/mrfld.c
@@ -1,5 +1,5 @@
 /*
- * mrfl.c: Intel Merrifield platform specific setup code
+ * Intel Merrifield platform specific setup code
  *
  * (C) Copyright 2013 Intel Corporation
  *
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
new file mode 100644
index 000000000000..5bc90dd102d4
--- /dev/null
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -0,0 +1,418 @@
+/*
+ * Intel MID Power Management Unit (PWRMU) device driver
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * Intel MID Power Management Unit device driver handles the South Complex PCI
+ * devices such as GPDMA, SPI, I2C, PWM, and so on. By default PCI core
+ * modifies bits in PMCSR register in the PCI configuration space. This is not
+ * enough on some SoCs like Intel Tangier. In such case PCI core sets a new
+ * power state of the device in question through a PM hook registered in struct
+ * pci_platform_pm_ops (see drivers/pci/pci-mid.c).
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+
+#include <asm/intel-mid.h>
+
+/* Registers */
+#define PM_STS			0x00
+#define PM_CMD			0x04
+#define PM_ICS			0x08
+#define PM_WKC(x)		(0x10 + (x) * 4)
+#define PM_WKS(x)		(0x18 + (x) * 4)
+#define PM_SSC(x)		(0x20 + (x) * 4)
+#define PM_SSS(x)		(0x30 + (x) * 4)
+
+/* Bits in PM_STS */
+#define PM_STS_BUSY		(1 << 8)
+
+/* Bits in PM_CMD */
+#define PM_CMD_CMD(x)		((x) << 0)
+#define PM_CMD_IOC		(1 << 8)
+#define PM_CMD_D3cold		(1 << 21)
+
+/* List of commands */
+#define CMD_SET_CFG		0x01
+
+/* Bits in PM_ICS */
+#define PM_ICS_INT_STATUS(x)	((x) & 0xff)
+#define PM_ICS_IE		(1 << 8)
+#define PM_ICS_IP		(1 << 9)
+#define PM_ICS_SW_INT_STS	(1 << 10)
+
+/* List of interrupts */
+#define INT_INVALID		0
+#define INT_CMD_COMPLETE	1
+#define INT_CMD_ERR		2
+#define INT_WAKE_EVENT		3
+#define INT_LSS_POWER_ERR	4
+#define INT_S0iX_MSG_ERR	5
+#define INT_NO_C6		6
+#define INT_TRIGGER_ERR		7
+#define INT_INACTIVITY		8
+
+/* South Complex devices */
+#define LSS_MAX_SHARED_DEVS	4
+#define LSS_MAX_DEVS		64
+
+#define LSS_WS_BITS		1	/* wake state width */
+#define LSS_PWS_BITS		2	/* power state width */
+
+/* Supported device IDs */
+#define PCI_DEVICE_ID_PENWELL	0x0828
+#define PCI_DEVICE_ID_TANGIER	0x11a1
+
+struct mid_pwr_dev {
+	struct pci_dev *pdev;
+	pci_power_t state;
+};
+
+struct mid_pwr {
+	struct device *dev;
+	void __iomem *regs;
+	int irq;
+	bool available;
+
+	struct mutex lock;
+	struct mid_pwr_dev lss[LSS_MAX_DEVS][LSS_MAX_SHARED_DEVS];
+};
+
+static struct mid_pwr *midpwr;
+
+static u32 mid_pwr_get_state(struct mid_pwr *pwr, int reg)
+{
+	return readl(pwr->regs + PM_SSS(reg));
+}
+
+static void mid_pwr_set_state(struct mid_pwr *pwr, int reg, u32 value)
+{
+	writel(value, pwr->regs + PM_SSC(reg));
+}
+
+static void mid_pwr_set_wake(struct mid_pwr *pwr, int reg, u32 value)
+{
+	writel(value, pwr->regs + PM_WKC(reg));
+}
+
+static void mid_pwr_interrupt_disable(struct mid_pwr *pwr)
+{
+	writel(~PM_ICS_IE, pwr->regs + PM_ICS);
+}
+
+static bool mid_pwr_is_busy(struct mid_pwr *pwr)
+{
+	return !!(readl(pwr->regs + PM_STS) & PM_STS_BUSY);
+}
+
+/* Wait 500ms that the latest PWRMU command finished */
+static int mid_pwr_wait(struct mid_pwr *pwr)
+{
+	unsigned int count = 500000;
+	bool busy;
+
+	do {
+		busy = mid_pwr_is_busy(pwr);
+		if (!busy)
+			return 0;
+		udelay(1);
+	} while (--count);
+
+	return -EBUSY;
+}
+
+static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd)
+{
+	writel(PM_CMD_CMD(cmd), pwr->regs + PM_CMD);
+	return mid_pwr_wait(pwr);
+}
+
+static int __update_power_state(struct mid_pwr *pwr, int reg, int bit, int new)
+{
+	int curstate;
+	u32 power;
+	int ret;
+
+	/* Check if the device is already in desired state */
+	power = mid_pwr_get_state(pwr, reg);
+	curstate = (power >> bit) & 3;
+	if (curstate == new)
+		return 0;
+
+	/* Update the power state */
+	mid_pwr_set_state(pwr, reg, (power & ~(3 << bit)) | (new << bit));
+
+	/* Send command to SCU */
+	ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
+	if (ret)
+		return ret;
+
+	/* Check if the device is already in desired state */
+	power = mid_pwr_get_state(pwr, reg);
+	curstate = (power >> bit) & 3;
+	if (curstate != new)
+		return -EAGAIN;
+
+	return 0;
+}
+
+static pci_power_t __find_weakest_power_state(struct mid_pwr_dev *lss,
+					      struct pci_dev *pdev,
+					      pci_power_t state)
+{
+	pci_power_t weakest = PCI_D3hot;
+	unsigned int j;
+
+	/* Find device in cache or first free cell */
+	for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) {
+		if (lss[j].pdev == pdev || !lss[j].pdev)
+			break;
+	}
+
+	/* Store the desired state in cache */
+	if (j < LSS_MAX_SHARED_DEVS) {
+		lss[j].pdev = pdev;
+		lss[j].state = state;
+	} else {
+		dev_WARN(&pdev->dev, "No room for device in PWRMU LSS cache\n");
+		weakest = state;
+	}
+
+	/* Find the power state we may use */
+	for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) {
+		if (lss[j].state < weakest)
+			weakest = lss[j].state;
+	}
+
+	return weakest;
+}
+
+static int __set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev,
+			     pci_power_t state, int id, int reg, int bit)
+{
+	const char *name;
+	int ret;
+
+	state = __find_weakest_power_state(pwr->lss[id], pdev, state);
+	name = pci_power_name(state);
+
+	ret = __update_power_state(pwr, reg, bit, (__force int)state);
+	if (ret) {
+		dev_warn(&pdev->dev, "Can't set power state %s: %d\n", name, ret);
+		return ret;
+	}
+
+	dev_vdbg(&pdev->dev, "Set power state %s\n", name);
+	return 0;
+}
+
+static int mid_pwr_set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev,
+				   pci_power_t state)
+{
+	int id, reg, bit;
+	int ret;
+
+	id = intel_mid_pwr_get_lss_id(pdev);
+	if (id < 0)
+		return id;
+
+	reg = (id * LSS_PWS_BITS) / 32;
+	bit = (id * LSS_PWS_BITS) % 32;
+
+	/* We support states between PCI_D0 and PCI_D3hot */
+	if (state < PCI_D0)
+		state = PCI_D0;
+	if (state > PCI_D3hot)
+		state = PCI_D3hot;
+
+	mutex_lock(&pwr->lock);
+	ret = __set_power_state(pwr, pdev, state, id, reg, bit);
+	mutex_unlock(&pwr->lock);
+	return ret;
+}
+
+int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
+{
+	struct mid_pwr *pwr = midpwr;
+	int ret = 0;
+
+	might_sleep();
+
+	if (pwr && pwr->available)
+		ret = mid_pwr_set_power_state(pwr, pdev, state);
+	dev_vdbg(&pdev->dev, "set_power_state() returns %d\n", ret);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
+
+int intel_mid_pwr_get_lss_id(struct pci_dev *pdev)
+{
+	int vndr;
+	u8 id;
+
+	/*
+	 * Mapping to PWRMU index is kept in the Logical SubSystem ID byte of
+	 * Vendor capability.
+	 */
+	vndr = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
+	if (!vndr)
+		return -EINVAL;
+
+	/* Read the Logical SubSystem ID byte */
+	pci_read_config_byte(pdev, vndr + INTEL_MID_PWR_LSS_OFFSET, &id);
+	if (!(id & INTEL_MID_PWR_LSS_TYPE))
+		return -ENODEV;
+
+	id &= ~INTEL_MID_PWR_LSS_TYPE;
+	if (id >= LSS_MAX_DEVS)
+		return -ERANGE;
+
+	return id;
+}
+
+static irqreturn_t mid_pwr_irq_handler(int irq, void *dev_id)
+{
+	struct mid_pwr *pwr = dev_id;
+	u32 ics;
+
+	ics = readl(pwr->regs + PM_ICS);
+	if (!(ics & PM_ICS_IP))
+		return IRQ_NONE;
+
+	writel(ics | PM_ICS_IP, pwr->regs + PM_ICS);
+
+	dev_warn(pwr->dev, "Unexpected IRQ: %#x\n", PM_ICS_INT_STATUS(ics));
+	return IRQ_HANDLED;
+}
+
+struct mid_pwr_device_info {
+	int (*set_initial_state)(struct mid_pwr *pwr);
+};
+
+static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct mid_pwr_device_info *info = (void *)id->driver_data;
+	struct device *dev = &pdev->dev;
+	struct mid_pwr *pwr;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "error: could not enable device\n");
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
+	if (ret) {
+		dev_err(&pdev->dev, "I/O memory remapping failed\n");
+		return ret;
+	}
+
+	pwr = devm_kzalloc(dev, sizeof(*pwr), GFP_KERNEL);
+	if (!pwr)
+		return -ENOMEM;
+
+	pwr->dev = dev;
+	pwr->regs = pcim_iomap_table(pdev)[0];
+	pwr->irq = pdev->irq;
+
+	mutex_init(&pwr->lock);
+
+	/* Disable interrupts */
+	mid_pwr_interrupt_disable(pwr);
+
+	if (info && info->set_initial_state) {
+		ret = info->set_initial_state(pwr);
+		if (ret)
+			dev_warn(dev, "Can't set initial state: %d\n", ret);
+	}
+
+	ret = devm_request_irq(dev, pdev->irq, mid_pwr_irq_handler,
+			       IRQF_NO_SUSPEND, pci_name(pdev), pwr);
+	if (ret)
+		return ret;
+
+	pwr->available = true;
+	midpwr = pwr;
+
+	pci_set_drvdata(pdev, pwr);
+	return 0;
+}
+
+static int mid_set_initial_state(struct mid_pwr *pwr)
+{
+	unsigned int i, j;
+	int ret;
+
+	/*
+	 * Enable wake events.
+	 *
+	 * PWRMU supports up to 32 sources for wake up the system. Ungate them
+	 * all here.
+	 */
+	mid_pwr_set_wake(pwr, 0, 0xffffffff);
+	mid_pwr_set_wake(pwr, 1, 0xffffffff);
+
+	/*
+	 * Power off South Complex devices.
+	 *
+	 * There is a map (see a note below) of 64 devices with 2 bits per each
+	 * on 32-bit HW registers. The following calls set all devices to one
+	 * known initial state, i.e. PCI_D3hot. This is done in conjunction
+	 * with PMCSR setting in arch/x86/pci/intel_mid_pci.c.
+	 *
+	 * NOTE: The actual device mapping is provided by a platform at run
+	 * time using vendor capability of PCI configuration space.
+	 */
+	mid_pwr_set_state(pwr, 0, 0xffffffff);
+	mid_pwr_set_state(pwr, 1, 0xffffffff);
+	mid_pwr_set_state(pwr, 2, 0xffffffff);
+	mid_pwr_set_state(pwr, 3, 0xffffffff);
+
+	/* Send command to SCU */
+	ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < LSS_MAX_DEVS; i++) {
+		for (j = 0; j < LSS_MAX_SHARED_DEVS; j++)
+			pwr->lss[i][j].state = PCI_D3hot;
+	}
+
+	return 0;
+}
+
+static const struct mid_pwr_device_info mid_info = {
+	.set_initial_state = mid_set_initial_state,
+};
+
+static const struct pci_device_id mid_pwr_pci_ids[] = {
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&mid_info },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&mid_info },
+	{}
+};
+MODULE_DEVICE_TABLE(pci, mid_pwr_pci_ids);
+
+static struct pci_driver mid_pwr_pci_driver = {
+	.name		= "intel_mid_pwr",
+	.probe		= mid_pwr_probe,
+	.id_table	= mid_pwr_pci_ids,
+};
+
+builtin_pci_driver(mid_pwr_pci_driver);
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 5ee360a951ce..1555672d436f 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -407,6 +407,32 @@ static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry,
 		i2c_register_board_info(pentry->host_num, &i2c_info, 1);
 }
 
+static void __init sfi_handle_sd_dev(struct sfi_device_table_entry *pentry,
+					struct devs_id *dev)
+{
+	struct mid_sd_board_info sd_info;
+	void *pdata;
+
+	memset(&sd_info, 0, sizeof(sd_info));
+	strncpy(sd_info.name, pentry->name, SFI_NAME_LEN);
+	sd_info.bus_num = pentry->host_num;
+	sd_info.max_clk = pentry->max_freq;
+	sd_info.addr = pentry->addr;
+	pr_debug("SD bus = %d, name = %16.16s, max_clk = %d, addr = 0x%x\n",
+		 sd_info.bus_num,
+		 sd_info.name,
+		 sd_info.max_clk,
+		 sd_info.addr);
+	pdata = intel_mid_sfi_get_pdata(dev, &sd_info);
+	if (IS_ERR(pdata))
+		return;
+
+	/* Nothing we can do with this for now */
+	sd_info.platform_data = pdata;
+
+	pr_debug("Successfully registered %16.16s", sd_info.name);
+}
+
 extern struct devs_id *const __x86_intel_mid_dev_start[],
 		      *const __x86_intel_mid_dev_end[];
 
@@ -490,6 +516,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 			case SFI_DEV_TYPE_I2C:
 				sfi_handle_i2c_dev(pentry, dev);
 				break;
+			case SFI_DEV_TYPE_SD:
+				sfi_handle_sd_dev(pentry, dev);
+				break;
 			case SFI_DEV_TYPE_UART:
 			case SFI_DEV_TYPE_HSI:
 			default:
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 815fec6e05e2..66b2166ea4a1 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -40,8 +40,7 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 		 */
 		return BIOS_STATUS_UNIMPLEMENTED;
 
-	ret = efi_call((void *)__va(tab->function), (u64)which,
-			a1, a2, a3, a4, a5);
+	ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(uv_bios_call);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index d5f64996394a..b12c26e2e309 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/smp.h>
 #include <linux/perf_event.h>
+#include <linux/tboot.h>
 
 #include <asm/pgtable.h>
 #include <asm/proto.h>
@@ -266,6 +267,35 @@ void notrace restore_processor_state(void)
 EXPORT_SYMBOL(restore_processor_state);
 #endif
 
+#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HOTPLUG_CPU)
+static void resume_play_dead(void)
+{
+	play_dead_common();
+	tboot_shutdown(TB_SHUTDOWN_WFS);
+	hlt_play_dead();
+}
+
+int hibernate_resume_nonboot_cpu_disable(void)
+{
+	void (*play_dead)(void) = smp_ops.play_dead;
+	int ret;
+
+	/*
+	 * Ensure that MONITOR/MWAIT will not be used in the "play dead" loop
+	 * during hibernate image restoration, because it is likely that the
+	 * monitored address will be actually written to at that time and then
+	 * the "dead" CPU will attempt to execute instructions again, but the
+	 * address in its instruction pointer may not be possible to resolve
+	 * any more at that point (the page tables used by it previously may
+	 * have been overwritten by hibernate image data).
+	 */
+	smp_ops.play_dead = resume_play_dead;
+	ret = disable_nonboot_cpus();
+	smp_ops.play_dead = play_dead;
+	return ret;
+}
+#endif
+
 /*
  * When bsp_check() is called in hibernate and suspend, cpu hotplug
  * is disabled already. So it's unnessary to handle race condition between
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 009947d419a6..f2b5e6a5cf95 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -19,6 +19,7 @@
 #include <asm/mtrr.h>
 #include <asm/sections.h>
 #include <asm/suspend.h>
+#include <asm/tlbflush.h>
 
 /* Defined in hibernate_asm_64.S */
 extern asmlinkage __visible int restore_image(void);
@@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void);
  * kernel's text (this value is passed in the image header).
  */
 unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
 
 /*
  * Value of the cr3 register from before the hibernation (this value is passed
@@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible;
 
 pgd_t *temp_level4_pgt __visible;
 
-void *relocated_restore_code __visible;
+unsigned long relocated_restore_code __visible;
+
+static int set_up_temporary_text_mapping(void)
+{
+	pmd_t *pmd;
+	pud_t *pud;
+
+	/*
+	 * The new mapping only has to cover the page containing the image
+	 * kernel's entry point (jump_address_phys), because the switch over to
+	 * it is carried out by relocated code running from a page allocated
+	 * specifically for this purpose and covered by the identity mapping, so
+	 * the temporary kernel text mapping is only needed for the final jump.
+	 * Moreover, in that mapping the virtual address of the image kernel's
+	 * entry point must be the same as its virtual address in the image
+	 * kernel (restore_jump_address), so the image kernel's
+	 * restore_registers() code doesn't find itself in a different area of
+	 * the virtual address space after switching over to the original page
+	 * tables used by the image kernel.
+	 */
+	pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+	if (!pud)
+		return -ENOMEM;
+
+	pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+	if (!pmd)
+		return -ENOMEM;
+
+	set_pmd(pmd + pmd_index(restore_jump_address),
+		__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
+	set_pud(pud + pud_index(restore_jump_address),
+		__pud(__pa(pmd) | _KERNPG_TABLE));
+	set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+		__pgd(__pa(pud) | _KERNPG_TABLE));
+
+	return 0;
+}
 
 static void *alloc_pgt_page(void *context)
 {
@@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void)
 	if (!temp_level4_pgt)
 		return -ENOMEM;
 
-	/* It is safe to reuse the original kernel mapping */
-	set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
-		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+	/* Prepare a temporary mapping for the kernel text */
+	result = set_up_temporary_text_mapping();
+	if (result)
+		return result;
 
 	/* Set up the direct mapping from scratch */
 	for (i = 0; i < nr_pfn_mapped; i++) {
@@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void)
 	return 0;
 }
 
+static int relocate_restore_code(void)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+
+	relocated_restore_code = get_safe_page(GFP_ATOMIC);
+	if (!relocated_restore_code)
+		return -ENOMEM;
+
+	memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
+
+	/* Make the page containing the relocated code executable */
+	pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
+	pud = pud_offset(pgd, relocated_restore_code);
+	if (pud_large(*pud)) {
+		set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+	} else {
+		pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
+
+		if (pmd_large(*pmd)) {
+			set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+		} else {
+			pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
+
+			set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+		}
+	}
+	__flush_tlb_all();
+
+	return 0;
+}
+
 int swsusp_arch_resume(void)
 {
 	int error;
 
 	/* We have got enough memory and from now on we cannot recover */
-	if ((error = set_up_temporary_mappings()))
+	error = set_up_temporary_mappings();
+	if (error)
 		return error;
 
-	relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
-	if (!relocated_restore_code)
-		return -ENOMEM;
-	memcpy(relocated_restore_code, &core_restore_code,
-	       &restore_registers - &core_restore_code);
+	error = relocate_restore_code();
+	if (error)
+		return error;
 
 	restore_image();
 	return 0;
@@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn)
 
 struct restore_data_record {
 	unsigned long jump_address;
+	unsigned long jump_address_phys;
 	unsigned long cr3;
 	unsigned long magic;
 };
 
-#define RESTORE_MAGIC	0x0123456789ABCDEFUL
+#define RESTORE_MAGIC	0x123456789ABCDEF0UL
 
 /**
  *	arch_hibernation_header_save - populate the architecture specific part
@@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 
 	if (max_size < sizeof(struct restore_data_record))
 		return -EOVERFLOW;
-	rdr->jump_address = restore_jump_address;
+	rdr->jump_address = (unsigned long)&restore_registers;
+	rdr->jump_address_phys = __pa_symbol(&restore_registers);
 	rdr->cr3 = restore_cr3;
 	rdr->magic = RESTORE_MAGIC;
 	return 0;
@@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr)
 	struct restore_data_record *rdr = addr;
 
 	restore_jump_address = rdr->jump_address;
+	jump_address_phys = rdr->jump_address_phys;
 	restore_cr3 = rdr->cr3;
 	return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
 }
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 4400a43b9e28..3177c2bc26f6 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend)
 	pushfq
 	popq	pt_regs_flags(%rax)
 
-	/* save the address of restore_registers */
-	movq	$restore_registers, %rax
-	movq	%rax, restore_jump_address(%rip)
 	/* save cr3 */
 	movq	%cr3, %rax
 	movq	%rax, restore_cr3(%rip)
@@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend)
 ENDPROC(swsusp_arch_suspend)
 
 ENTRY(restore_image)
-	/* switch to temporary page tables */
-	movq	$__PAGE_OFFSET, %rdx
-	movq	temp_level4_pgt(%rip), %rax
-	subq	%rdx, %rax
-	movq	%rax, %cr3
-	/* Flush TLB */
-	movq	mmu_cr4_features(%rip), %rax
-	movq	%rax, %rdx
-	andq	$~(X86_CR4_PGE), %rdx
-	movq	%rdx, %cr4;  # turn off PGE
-	movq	%cr3, %rcx;  # flush TLB
-	movq	%rcx, %cr3;
-	movq	%rax, %cr4;  # turn PGE back on
-
 	/* prepare to jump to the image kernel */
-	movq	restore_jump_address(%rip), %rax
-	movq	restore_cr3(%rip), %rbx
+	movq	restore_jump_address(%rip), %r8
+	movq	restore_cr3(%rip), %r9
+
+	/* prepare to switch to temporary page tables */
+	movq	temp_level4_pgt(%rip), %rax
+	movq	mmu_cr4_features(%rip), %rbx
 
 	/* prepare to copy image data to their original locations */
 	movq	restore_pblist(%rip), %rdx
+
+	/* jump to relocated restore code */
 	movq	relocated_restore_code(%rip), %rcx
 	jmpq	*%rcx
 
 	/* code below has been relocated to a safe page */
 ENTRY(core_restore_code)
+	/* switch to temporary page tables */
+	movq	$__PAGE_OFFSET, %rcx
+	subq	%rcx, %rax
+	movq	%rax, %cr3
+	/* flush TLB */
+	movq	%rbx, %rcx
+	andq	$~(X86_CR4_PGE), %rcx
+	movq	%rcx, %cr4;  # turn off PGE
+	movq	%cr3, %rcx;  # flush TLB
+	movq	%rcx, %cr3;
+	movq	%rbx, %cr4;  # turn PGE back on
 .Lloop:
 	testq	%rdx, %rdx
 	jz	.Ldone
@@ -96,24 +96,17 @@ ENTRY(core_restore_code)
 	/* progress to the next pbe */
 	movq	pbe_next(%rdx), %rdx
 	jmp	.Lloop
+
 .Ldone:
 	/* jump to the restore_registers address from the image header */
-	jmpq	*%rax
-	/*
-	 * NOTE: This assumes that the boot kernel's text mapping covers the
-	 * image kernel's page containing restore_registers and the address of
-	 * this page is the same as in the image kernel's text mapping (it
-	 * should always be true, because the text mapping is linear, starting
-	 * from 0, and is supposed to cover the entire kernel text for every
-	 * kernel).
-	 *
-	 * code below belongs to the image kernel
-	 */
+	jmpq	*%r8
 
+	 /* code below belongs to the image kernel */
+	.align PAGE_SIZE
 ENTRY(restore_registers)
 	FRAME_BEGIN
 	/* go back to the original page tables */
-	movq    %rbx, %cr3
+	movq    %r9, %cr3
 
 	/* Flush TLB, including "global" things (vmalloc) */
 	movq	mmu_cr4_features(%rip), %rax
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index e69f4701a076..1104515d5ad2 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -241,6 +241,31 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
 		       __func__, PCI_FUNC(F3->devfn), NBCFG);
 }
 
+static void prepare_msrs(void *info)
+{
+	struct mce i_mce = *(struct mce *)info;
+	u8 b = i_mce.bank;
+
+	wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus);
+
+	if (boot_cpu_has(X86_FEATURE_SMCA)) {
+		if (i_mce.inject_flags == DFR_INT_INJ) {
+			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status);
+			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr);
+		} else {
+			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status);
+			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr);
+		}
+
+		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc);
+	} else {
+		wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status);
+		wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr);
+		wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc);
+	}
+
+}
+
 static void do_inject(void)
 {
 	u64 mcg_status = 0;
@@ -287,36 +312,9 @@ static void do_inject(void)
 
 	toggle_hw_mce_inject(cpu, true);
 
-	wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
-		     (u32)mcg_status, (u32)(mcg_status >> 32));
-
-	if (boot_cpu_has(X86_FEATURE_SMCA)) {
-		if (inj_type == DFR_INT_INJ) {
-			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
-				     (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
-			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
-				     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
-		} else {
-			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
-				     (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
-			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
-				     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
-		}
-
-		wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
-			     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
-	} else {
-		wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
-			     (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
-		wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
-			     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
-
-		wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
-			     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
-	}
+	i_mce.mcgstatus = mcg_status;
+	i_mce.inject_flags = inj_type;
+	smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
 
 	toggle_hw_mce_inject(cpu, false);
 
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 0b7a63d98440..705e3fffb4a1 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -8,6 +8,9 @@
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
 
+/* Hold the pgd entry used on booting additional CPUs */
+pgd_t trampoline_pgd_entry;
+
 void __init reserve_real_mode(void)
 {
 	phys_addr_t mem;
@@ -84,7 +87,7 @@ void __init setup_real_mode(void)
 	*trampoline_cr4_features = __read_cr4();
 
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
-	trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+	trampoline_pgd[0] = trampoline_pgd_entry.pgd;
 	trampoline_pgd[511] = init_level4_pgt[511].pgd;
 #endif
 }
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index 093a892026f9..a3d2c62fd805 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -72,12 +72,14 @@ BEGIN {
 	lprefix_expr = "\\((66|F2|F3)\\)"
 	max_lprefix = 4
 
-	# All opcodes starting with lower-case 'v' or with (v1) superscript
+	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
 	# accepts VEX prefix
-	vexok_opcode_expr = "^v.*"
+	vexok_opcode_expr = "^[vk].*"
 	vexok_expr = "\\(v1\\)"
 	# All opcodes with (v) superscript supports *only* VEX prefix
 	vexonly_expr = "\\(v\\)"
+	# All opcodes with (ev) superscript supports *only* EVEX prefix
+	evexonly_expr = "\\(ev\\)"
 
 	prefix_expr = "\\(Prefix\\)"
 	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -95,6 +97,7 @@ BEGIN {
 	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
 	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
 	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+	prefix_num["EVEX"] = "INAT_PFX_EVEX"
 
 	clear_vars()
 }
@@ -319,7 +322,9 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 			flags = add_flags(flags, "INAT_MODRM")
 
 		# check VEX codes
-		if (match(ext, vexonly_expr))
+		if (match(ext, evexonly_expr))
+			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+		else if (match(ext, vexonly_expr))
 			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
 		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
 			flags = add_flags(flags, "INAT_VEXOK")
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index db52a7fafcc2..44c88ad1841a 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -177,7 +177,6 @@ static struct apic xen_pv_apic = {
 
 	.get_apic_id 			= xen_get_apic_id,
 	.set_apic_id 			= xen_set_apic_id, /* Can be NULL on 32-bit. */
-	.apic_id_mask			= 0xFF << 24, /* Used by verify_local_APIC. Match with what xen_get_apic_id does. */
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index be14cc3e48d5..3be012115853 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -20,10 +20,121 @@
 #include <linux/init.h>
 #include <linux/string.h>
 
+#include <xen/xen.h>
 #include <xen/xen-ops.h>
+#include <xen/interface/platform.h>
 
 #include <asm/page.h>
 #include <asm/setup.h>
+#include <asm/xen/hypercall.h>
+
+static efi_char16_t vendor[100] __initdata;
+
+static efi_system_table_t efi_systab_xen __initdata = {
+	.hdr = {
+		.signature	= EFI_SYSTEM_TABLE_SIGNATURE,
+		.revision	= 0, /* Initialized later. */
+		.headersize	= 0, /* Ignored by Linux Kernel. */
+		.crc32		= 0, /* Ignored by Linux Kernel. */
+		.reserved	= 0
+	},
+	.fw_vendor	= EFI_INVALID_TABLE_ADDR, /* Initialized later. */
+	.fw_revision	= 0,			  /* Initialized later. */
+	.con_in_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+	.con_in		= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+	.con_out_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+	.con_out	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+	.stderr_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+	.stderr		= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+	.runtime	= (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR,
+						  /* Not used under Xen. */
+	.boottime	= (efi_boot_services_t *)EFI_INVALID_TABLE_ADDR,
+						  /* Not used under Xen. */
+	.nr_tables	= 0,			  /* Initialized later. */
+	.tables		= EFI_INVALID_TABLE_ADDR  /* Initialized later. */
+};
+
+static const struct efi efi_xen __initconst = {
+	.systab                   = NULL, /* Initialized later. */
+	.runtime_version	  = 0,    /* Initialized later. */
+	.mps                      = EFI_INVALID_TABLE_ADDR,
+	.acpi                     = EFI_INVALID_TABLE_ADDR,
+	.acpi20                   = EFI_INVALID_TABLE_ADDR,
+	.smbios                   = EFI_INVALID_TABLE_ADDR,
+	.smbios3                  = EFI_INVALID_TABLE_ADDR,
+	.sal_systab               = EFI_INVALID_TABLE_ADDR,
+	.boot_info                = EFI_INVALID_TABLE_ADDR,
+	.hcdp                     = EFI_INVALID_TABLE_ADDR,
+	.uga                      = EFI_INVALID_TABLE_ADDR,
+	.uv_systab                = EFI_INVALID_TABLE_ADDR,
+	.fw_vendor                = EFI_INVALID_TABLE_ADDR,
+	.runtime                  = EFI_INVALID_TABLE_ADDR,
+	.config_table             = EFI_INVALID_TABLE_ADDR,
+	.get_time                 = xen_efi_get_time,
+	.set_time                 = xen_efi_set_time,
+	.get_wakeup_time          = xen_efi_get_wakeup_time,
+	.set_wakeup_time          = xen_efi_set_wakeup_time,
+	.get_variable             = xen_efi_get_variable,
+	.get_next_variable        = xen_efi_get_next_variable,
+	.set_variable             = xen_efi_set_variable,
+	.query_variable_info      = xen_efi_query_variable_info,
+	.update_capsule           = xen_efi_update_capsule,
+	.query_capsule_caps       = xen_efi_query_capsule_caps,
+	.get_next_high_mono_count = xen_efi_get_next_high_mono_count,
+	.reset_system             = NULL, /* Functionality provided by Xen. */
+	.set_virtual_address_map  = NULL, /* Not used under Xen. */
+	.flags			  = 0     /* Initialized later. */
+};
+
+static efi_system_table_t __init *xen_efi_probe(void)
+{
+	struct xen_platform_op op = {
+		.cmd = XENPF_firmware_info,
+		.u.firmware_info = {
+			.type = XEN_FW_EFI_INFO,
+			.index = XEN_FW_EFI_CONFIG_TABLE
+		}
+	};
+	union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info;
+
+	if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0)
+		return NULL;
+
+	/* Here we know that Xen runs on EFI platform. */
+
+	efi = efi_xen;
+
+	efi_systab_xen.tables = info->cfg.addr;
+	efi_systab_xen.nr_tables = info->cfg.nent;
+
+	op.cmd = XENPF_firmware_info;
+	op.u.firmware_info.type = XEN_FW_EFI_INFO;
+	op.u.firmware_info.index = XEN_FW_EFI_VENDOR;
+	info->vendor.bufsz = sizeof(vendor);
+	set_xen_guest_handle(info->vendor.name, vendor);
+
+	if (HYPERVISOR_platform_op(&op) == 0) {
+		efi_systab_xen.fw_vendor = __pa_symbol(vendor);
+		efi_systab_xen.fw_revision = info->vendor.revision;
+	} else
+		efi_systab_xen.fw_vendor = __pa_symbol(L"UNKNOWN");
+
+	op.cmd = XENPF_firmware_info;
+	op.u.firmware_info.type = XEN_FW_EFI_INFO;
+	op.u.firmware_info.index = XEN_FW_EFI_VERSION;
+
+	if (HYPERVISOR_platform_op(&op) == 0)
+		efi_systab_xen.hdr.revision = info->version;
+
+	op.cmd = XENPF_firmware_info;
+	op.u.firmware_info.type = XEN_FW_EFI_INFO;
+	op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION;
+
+	if (HYPERVISOR_platform_op(&op) == 0)
+		efi.runtime_version = info->version;
+
+	return &efi_systab_xen;
+}
 
 void __init xen_efi_init(void)
 {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 760789ae8562..69b4b6d29738 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -59,6 +59,7 @@
 #include <asm/xen/pci.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/xen/cpuid.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -118,6 +119,10 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
  */
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 
+/* Linux <-> Xen vCPU id mapping */
+DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+
 enum xen_domain_type xen_domain_type = XEN_NATIVE;
 EXPORT_SYMBOL_GPL(xen_domain_type);
 
@@ -179,7 +184,7 @@ static void clamp_max_cpus(void)
 #endif
 }
 
-static void xen_vcpu_setup(int cpu)
+void xen_vcpu_setup(int cpu)
 {
 	struct vcpu_register_vcpu_info info;
 	int err;
@@ -202,8 +207,9 @@ static void xen_vcpu_setup(int cpu)
 		if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
 			return;
 	}
-	if (cpu < MAX_VIRT_CPUS)
-		per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+	if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
+		per_cpu(xen_vcpu, cpu) =
+			&HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
 
 	if (!have_vcpu_info_placement) {
 		if (cpu >= MAX_VIRT_CPUS)
@@ -223,7 +229,8 @@ static void xen_vcpu_setup(int cpu)
 	   hypervisor has no unregister variant and this hypercall does not
 	   allow to over-write info.mfn and info.offset.
 	 */
-	err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+	err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
+				 &info);
 
 	if (err) {
 		printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
@@ -247,10 +254,11 @@ void xen_vcpu_restore(void)
 
 	for_each_possible_cpu(cpu) {
 		bool other_cpu = (cpu != smp_processor_id());
-		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
+						NULL);
 
 		if (other_cpu && is_up &&
-		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+		    HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
 			BUG();
 
 		xen_setup_runstate_info(cpu);
@@ -259,7 +267,7 @@ void xen_vcpu_restore(void)
 			xen_vcpu_setup(cpu);
 
 		if (other_cpu && is_up &&
-		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+		    HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
 			BUG();
 	}
 }
@@ -521,9 +529,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
 
 	preempt_disable();
 
-	pagefault_disable();	/* Avoid warnings due to being atomic. */
-	__get_user(dummy, (unsigned char __user __force *)v);
-	pagefault_enable();
+	probe_kernel_read(&dummy, v, 1);
 
 	if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
 		BUG();
@@ -590,7 +596,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 {
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
-	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+	unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
 	unsigned long frames[pages];
 	int f;
 
@@ -639,7 +645,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 {
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
-	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+	unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
 	unsigned long frames[pages];
 	int f;
 
@@ -1137,8 +1143,11 @@ void xen_setup_vcpu_info_placement(void)
 {
 	int cpu;
 
-	for_each_possible_cpu(cpu)
+	for_each_possible_cpu(cpu) {
+		/* Set up direct vCPU id mapping for PV guests. */
+		per_cpu(xen_vcpu_id, cpu) = cpu;
 		xen_vcpu_setup(cpu);
+	}
 
 	/* xen_vcpu_setup managed to place the vcpu_info within the
 	 * percpu area for all cpus, so make use of it. Note that for
@@ -1729,6 +1738,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
 #endif
 	xen_raw_console_write("about to get started...\n");
 
+	/* Let's presume PV guests always boot on vCPU with id 0. */
+	per_cpu(xen_vcpu_id, 0) = 0;
+
 	xen_setup_runstate_info(0);
 
 	xen_efi_init();
@@ -1770,9 +1782,10 @@ void __ref xen_hvm_init_shared_info(void)
 	 * in that case multiple vcpus might be online. */
 	for_each_online_cpu(cpu) {
 		/* Leave it to be NULL. */
-		if (cpu >= MAX_VIRT_CPUS)
+		if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
 			continue;
-		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+		per_cpu(xen_vcpu, cpu) =
+			&HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
 	}
 }
 
@@ -1797,6 +1810,12 @@ static void __init init_hvm_pv_info(void)
 
 	xen_setup_features();
 
+	cpuid(base + 4, &eax, &ebx, &ecx, &edx);
+	if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
+		this_cpu_write(xen_vcpu_id, ebx);
+	else
+		this_cpu_write(xen_vcpu_id, smp_processor_id());
+
 	pv_info.name = "Xen HVM";
 
 	xen_domain_type = XEN_HVM_DOMAIN;
@@ -1808,6 +1827,10 @@ static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action,
 	int cpu = (long)hcpu;
 	switch (action) {
 	case CPU_UP_PREPARE:
+		if (cpu_acpi_id(cpu) != U32_MAX)
+			per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
+		else
+			per_cpu(xen_vcpu_id, cpu) = cpu;
 		xen_vcpu_setup(cpu);
 		if (xen_have_vector_callback) {
 			if (xen_feature(XENFEAT_hvm_safe_pvclock))
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index e079500b17f3..de4144c24f1c 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -111,63 +111,18 @@ int arch_gnttab_init(unsigned long nr_shared)
 }
 
 #ifdef CONFIG_XEN_PVH
-#include <xen/balloon.h>
 #include <xen/events.h>
-#include <linux/slab.h>
-static int __init xlated_setup_gnttab_pages(void)
-{
-	struct page **pages;
-	xen_pfn_t *pfns;
-	void *vaddr;
-	int rc;
-	unsigned int i;
-	unsigned long nr_grant_frames = gnttab_max_grant_frames();
-
-	BUG_ON(nr_grant_frames == 0);
-	pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL);
-	if (!pfns) {
-		kfree(pages);
-		return -ENOMEM;
-	}
-	rc = alloc_xenballooned_pages(nr_grant_frames, pages);
-	if (rc) {
-		pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
-			nr_grant_frames, rc);
-		kfree(pages);
-		kfree(pfns);
-		return rc;
-	}
-	for (i = 0; i < nr_grant_frames; i++)
-		pfns[i] = page_to_pfn(pages[i]);
-
-	vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL);
-	if (!vaddr) {
-		pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
-			nr_grant_frames, rc);
-		free_xenballooned_pages(nr_grant_frames, pages);
-		kfree(pages);
-		kfree(pfns);
-		return -ENOMEM;
-	}
-	kfree(pages);
-
-	xen_auto_xlat_grant_frames.pfn = pfns;
-	xen_auto_xlat_grant_frames.count = nr_grant_frames;
-	xen_auto_xlat_grant_frames.vaddr = vaddr;
-
-	return 0;
-}
-
+#include <xen/xen-ops.h>
 static int __init xen_pvh_gnttab_setup(void)
 {
 	if (!xen_pvh_domain())
 		return -ENODEV;
 
-	return xlated_setup_gnttab_pages();
+	xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
+
+	return xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
+					     &xen_auto_xlat_grant_frames.vaddr,
+					     xen_auto_xlat_grant_frames.count);
 }
 /* Call it _before_ __gnttab_init as we need to initialize the
  * xen_auto_xlat_grant_frames first. */
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index a1207cb6472a..33e92955e09d 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -109,7 +109,8 @@ static void xen_safe_halt(void)
 static void xen_halt(void)
 {
 	if (irqs_disabled())
-		HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+		HYPERVISOR_vcpu_op(VCPUOP_down,
+				   xen_vcpu_nr(smp_processor_id()), NULL);
 	else
 		xen_safe_halt();
 }
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 478a2de543a5..67433714b791 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1113,7 +1113,7 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
 
 	/* NOTE: The loop is more greedy than the cleanup_highmap variant.
 	 * We include the PMD passed in on _both_ boundaries. */
-	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
+	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD));
 			pmd++, vaddr += PMD_SIZE) {
 		if (pmd_none(*pmd))
 			continue;
@@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
-{
-	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
-	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
-		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
-			       pte_val_ma(pte));
-
-	return pte;
-}
-#else /* CONFIG_X86_64 */
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
-{
-	unsigned long pfn;
-
-	if (xen_feature(XENFEAT_writable_page_tables) ||
-	    xen_feature(XENFEAT_auto_translated_physmap) ||
-	    xen_start_info->mfn_list >= __START_KERNEL_map)
-		return pte;
-
-	/*
-	 * Pages belonging to the initial p2m list mapped outside the default
-	 * address range must be mapped read-only. This region contains the
-	 * page tables for mapping the p2m list, too, and page tables MUST be
-	 * mapped read-only.
-	 */
-	pfn = pte_pfn(pte);
-	if (pfn >= xen_start_info->first_p2m_pfn &&
-	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
-		pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
-
-	return pte;
-}
-#endif /* CONFIG_X86_64 */
-
 /*
  * Init-time set_pte while constructing initial pagetables, which
  * doesn't allow RO page table pages to be remapped RW.
@@ -1600,13 +1565,37 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
  * so always write the PTE directly and rely on Xen trapping and
  * emulating any updates as necessary.
  */
-static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
+__visible pte_t xen_make_pte_init(pteval_t pte)
 {
-	if (pte_mfn(pte) != INVALID_P2M_ENTRY)
-		pte = mask_rw_pte(ptep, pte);
-	else
-		pte = __pte_ma(0);
+#ifdef CONFIG_X86_64
+	unsigned long pfn;
+
+	/*
+	 * Pages belonging to the initial p2m list mapped outside the default
+	 * address range must be mapped read-only. This region contains the
+	 * page tables for mapping the p2m list, too, and page tables MUST be
+	 * mapped read-only.
+	 */
+	pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+	if (xen_start_info->mfn_list < __START_KERNEL_map &&
+	    pfn >= xen_start_info->first_p2m_pfn &&
+	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
+		pte &= ~_PAGE_RW;
+#endif
+	pte = pte_pfn_to_mfn(pte);
+	return native_make_pte(pte);
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
 
+static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
+{
+#ifdef CONFIG_X86_32
+	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
+	if (pte_mfn(pte) != INVALID_P2M_ENTRY
+	    && pte_val_ma(*ptep) & _PAGE_PRESENT)
+		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
+			       pte_val_ma(pte));
+#endif
 	native_set_pte(ptep, pte);
 }
 
@@ -2407,6 +2396,7 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.alloc_pud = xen_alloc_pud;
 	pv_mmu_ops.release_pud = xen_release_pud;
 #endif
+	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
 
 #ifdef CONFIG_X86_64
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
@@ -2455,7 +2445,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.pte_val = PV_CALLEE_SAVE(xen_pte_val),
 	.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
 
-	.make_pte = PV_CALLEE_SAVE(xen_make_pte),
+	.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
 	.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
 
 #ifdef CONFIG_X86_PAE
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index cab9f766bb06..dd2a49a8aacc 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -182,7 +182,7 @@ static void * __ref alloc_p2m_page(void)
 	if (unlikely(!slab_is_available()))
 		return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
 
-	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
+	return (void *)__get_free_page(GFP_KERNEL);
 }
 
 static void __ref free_p2m_page(void *p)
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 9466354d3e49..32bdc2c90297 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -547,7 +547,7 @@ void xen_pmu_init(int cpu)
 	return;
 
 fail:
-	pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
+	pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
 		cpu, err);
 	free_pages((unsigned long)xenpmu_data, 0);
 }
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 719cf291dcdf..0b4d04c8ab4d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -322,6 +322,13 @@ static void __init xen_smp_prepare_boot_cpu(void)
 		xen_filter_cpu_maps();
 		xen_setup_vcpu_info_placement();
 	}
+
+	/*
+	 * Setup vcpu_info for boot CPU.
+	 */
+	if (xen_hvm_domain())
+		xen_vcpu_setup(0);
+
 	/*
 	 * The alternative logic (which patches the unlock/lock) runs before
 	 * the smp bootup up code is activated. Hence we need to set this up
@@ -454,7 +461,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 #endif
 	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
 	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
-	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
+	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
 		BUG();
 
 	kfree(ctxt);
@@ -492,7 +499,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 	if (rc)
 		return rc;
 
-	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
+	rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
 	BUG_ON(rc);
 
 	while (cpu_report_state(cpu) != CPU_ONLINE)
@@ -520,7 +527,8 @@ static int xen_cpu_disable(void)
 
 static void xen_cpu_die(unsigned int cpu)
 {
-	while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+	while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up,
+						     xen_vcpu_nr(cpu), NULL)) {
 		__set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(HZ/10);
 	}
@@ -536,7 +544,7 @@ static void xen_cpu_die(unsigned int cpu)
 static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
 {
 	play_dead_common();
-	HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+	HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
 	cpu_bringup();
 	/*
 	 * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
@@ -576,7 +584,7 @@ static void stop_self(void *v)
 
 	set_cpu_online(cpu, false);
 
-	HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
+	HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
 	BUG();
 }
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 6deba5bc7e34..67356d29d74d 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -11,8 +11,6 @@
 #include <linux/interrupt.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-#include <linux/math64.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/pvclock_gtod.h>
@@ -31,44 +29,6 @@
 
 /* Xen may fire a timer up to this many ns early */
 #define TIMER_SLOP	100000
-#define NS_PER_TICK	(1000000000LL / HZ)
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-
-/* unused ns of stolen time */
-static DEFINE_PER_CPU(u64, xen_residual_stolen);
-
-static void do_stolen_accounting(void)
-{
-	struct vcpu_runstate_info state;
-	struct vcpu_runstate_info *snap;
-	s64 runnable, offline, stolen;
-	cputime_t ticks;
-
-	xen_get_runstate_snapshot(&state);
-
-	WARN_ON(state.state != RUNSTATE_running);
-
-	snap = this_cpu_ptr(&xen_runstate_snapshot);
-
-	/* work out how much time the VCPU has not been runn*ing*  */
-	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
-	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
-	*snap = state;
-
-	/* Add the appropriate number of ticks of stolen time,
-	   including any left-overs from last time. */
-	stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
-
-	if (stolen < 0)
-		stolen = 0;
-
-	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
-	__this_cpu_write(xen_residual_stolen, stolen);
-	account_steal_ticks(ticks);
-}
 
 /* Get the TSC speed from Xen */
 static unsigned long xen_tsc_khz(void)
@@ -263,8 +223,10 @@ static int xen_vcpuop_shutdown(struct clock_event_device *evt)
 {
 	int cpu = smp_processor_id();
 
-	if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
-	    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+	if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
+			       NULL) ||
+	    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+			       NULL))
 		BUG();
 
 	return 0;
@@ -274,7 +236,8 @@ static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
 {
 	int cpu = smp_processor_id();
 
-	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+			       NULL))
 		BUG();
 
 	return 0;
@@ -293,7 +256,8 @@ static int xen_vcpuop_set_next_event(unsigned long delta,
 	/* Get an event anyway, even if the timeout is already expired */
 	single.flags = 0;
 
-	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
+	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
+				 &single);
 	BUG_ON(ret != 0);
 
 	return ret;
@@ -335,8 +299,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
 		ret = IRQ_HANDLED;
 	}
 
-	do_stolen_accounting();
-
 	return ret;
 }
 
@@ -394,13 +356,15 @@ void xen_timer_resume(void)
 		return;
 
 	for_each_online_cpu(cpu) {
-		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
+				       xen_vcpu_nr(cpu), NULL))
 			BUG();
 	}
 }
 
 static const struct pv_time_ops xen_time_ops __initconst = {
 	.sched_clock = xen_clocksource_read,
+	.steal_clock = xen_steal_clock,
 };
 
 static void __init xen_time_init(void)
@@ -414,7 +378,8 @@ static void __init xen_time_init(void)
 
 	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
 
-	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
+	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+			       NULL) == 0) {
 		/* Successfully turned off 100Hz tick, so we have the
 		   vcpuop-based timer interface */
 		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
@@ -431,6 +396,8 @@ static void __init xen_time_init(void)
 	xen_setup_timer(cpu);
 	xen_setup_cpu_clockevents();
 
+	xen_time_setup_guest();
+
 	if (xen_initial_domain())
 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 4140b070f2e9..3cbce3b085e7 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -76,6 +76,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
 bool xen_vcpu_stolen(int vcpu);
 
+void xen_vcpu_setup(int cpu);
 void xen_setup_vcpu_info_placement(void);
 
 #ifdef CONFIG_SMP
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index fd8017ce298a..e7a23f2a519a 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	return result;							\
 }
 
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+{									\
+	unsigned long tmp;						\
+	int result;							\
+									\
+	__asm__ __volatile__(						\
+			"1:     l32i    %1, %3, 0\n"			\
+			"       wsr     %1, scompare1\n"		\
+			"       " #op " %0, %1, %2\n"			\
+			"       s32c1i  %0, %3, 0\n"			\
+			"       bne     %0, %1, 1b\n"			\
+			: "=&a" (result), "=&a" (tmp)			\
+			: "a" (i), "a" (v)				\
+			: "memory"					\
+			);						\
+									\
+	return result;							\
+}
+
 #else /* XCHAL_HAVE_S32C1I */
 
 #define ATOMIC_OP(op)							\
@@ -138,18 +158,42 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	return vval;							\
 }
 
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+{									\
+	unsigned int tmp, vval;						\
+									\
+	__asm__ __volatile__(						\
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"	\
+			"       l32i    %0, %3, 0\n"			\
+			"       " #op " %1, %0, %2\n"			\
+			"       s32i    %1, %3, 0\n"			\
+			"       wsr     a15, ps\n"			\
+			"       rsync\n"				\
+			: "=&a" (vval), "=&a" (tmp)			\
+			: "a" (i), "a" (v)				\
+			: "a15", "memory"				\
+			);						\
+									\
+	return vval;							\
+}
+
 #endif /* XCHAL_HAVE_S32C1I */
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index d38eb9237e64..1065bc8bcae5 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -44,7 +44,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	pte_t *ptep;
 	int i;
 
-	ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	ptep = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (!ptep)
 		return NULL;
 	for (i = 0; i < 1024; i++)
diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index 1d95fa5dcd10..a36221cf6363 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -11,6 +11,9 @@
 #ifndef _XTENSA_SPINLOCK_H
 #define _XTENSA_SPINLOCK_H
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 /*
  * spinlock
  *
@@ -29,8 +32,11 @@
  */
 
 #define arch_spin_is_locked(x) ((x)->slock != 0)
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 7f4a1fdb1502..2725e08ef353 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -110,7 +110,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c
index 5f4bd71971d6..4904c5c16918 100644
--- a/arch/xtensa/platforms/xt2000/setup.c
+++ b/arch/xtensa/platforms/xt2000/setup.c
@@ -113,7 +113,6 @@ void platform_heartbeat(void)
 }
 
 //#define RS_TABLE_SIZE 2
-//#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF|UPF_SKIP_TEST)
 
 #define _SERIAL_PORT(_base,_irq)					\
 {									\
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 711e4d8de6fa..f70cc3bdfd01 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -26,6 +26,7 @@
 #include <linux/bio.h>
 #include <linux/workqueue.h>
 #include <linux/slab.h>
+#include "blk.h"
 
 #define BIP_INLINE_VECS	4
 
@@ -53,7 +54,6 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 {
 	struct bio_integrity_payload *bip;
 	struct bio_set *bs = bio->bi_pool;
-	unsigned long idx = BIO_POOL_NONE;
 	unsigned inline_vecs;
 
 	if (!bs || !bs->bio_integrity_pool) {
@@ -71,17 +71,19 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 	memset(bip, 0, sizeof(*bip));
 
 	if (nr_vecs > inline_vecs) {
+		unsigned long idx = 0;
+
 		bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
 					  bs->bvec_integrity_pool);
 		if (!bip->bip_vec)
 			goto err;
 		bip->bip_max_vcnt = bvec_nr_vecs(idx);
+		bip->bip_slab = idx;
 	} else {
 		bip->bip_vec = bip->bip_inline_vecs;
 		bip->bip_max_vcnt = inline_vecs;
 	}
 
-	bip->bip_slab = idx;
 	bip->bip_bio = bio;
 	bio->bi_integrity = bip;
 	bio->bi_rw |= REQ_INTEGRITY;
@@ -110,9 +112,7 @@ void bio_integrity_free(struct bio *bio)
 		      bip->bip_vec->bv_offset);
 
 	if (bs && bs->bio_integrity_pool) {
-		if (bip->bip_slab != BIO_POOL_NONE)
-			bvec_free(bs->bvec_integrity_pool, bip->bip_vec,
-				  bip->bip_slab);
+		bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
 
 		mempool_free(bip, bs->bio_integrity_pool);
 	} else {
diff --git a/block/bio.c b/block/bio.c
index 0e4aa42bc30d..54ee3846c3a5 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -43,7 +43,7 @@
  * unsigned short
  */
 #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
+static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
 	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
 };
 #undef BV
@@ -160,11 +160,15 @@ unsigned int bvec_nr_vecs(unsigned short idx)
 
 void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
 {
-	BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
+	if (!idx)
+		return;
+	idx--;
+
+	BIO_BUG_ON(idx >= BVEC_POOL_NR);
 
-	if (idx == BIOVEC_MAX_IDX)
+	if (idx == BVEC_POOL_MAX) {
 		mempool_free(bv, pool);
-	else {
+	} else {
 		struct biovec_slab *bvs = bvec_slabs + idx;
 
 		kmem_cache_free(bvs->slab, bv);
@@ -206,7 +210,7 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
 	 * idx now points to the pool we want to allocate from. only the
 	 * 1-vec entry pool is mempool backed.
 	 */
-	if (*idx == BIOVEC_MAX_IDX) {
+	if (*idx == BVEC_POOL_MAX) {
 fallback:
 		bvl = mempool_alloc(pool, gfp_mask);
 	} else {
@@ -226,11 +230,12 @@ fallback:
 		 */
 		bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
 		if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
-			*idx = BIOVEC_MAX_IDX;
+			*idx = BVEC_POOL_MAX;
 			goto fallback;
 		}
 	}
 
+	(*idx)++;
 	return bvl;
 }
 
@@ -250,8 +255,7 @@ static void bio_free(struct bio *bio)
 	__bio_free(bio);
 
 	if (bs) {
-		if (bio_flagged(bio, BIO_OWNS_VEC))
-			bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
+		bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
 
 		/*
 		 * If we have front padding, adjust the bio pointer before freeing
@@ -420,7 +424,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 	gfp_t saved_gfp = gfp_mask;
 	unsigned front_pad;
 	unsigned inline_vecs;
-	unsigned long idx = BIO_POOL_NONE;
 	struct bio_vec *bvl = NULL;
 	struct bio *bio;
 	void *p;
@@ -480,6 +483,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 	bio_init(bio);
 
 	if (nr_iovecs > inline_vecs) {
+		unsigned long idx = 0;
+
 		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
 		if (!bvl && gfp_mask != saved_gfp) {
 			punt_bios_to_rescuer(bs);
@@ -490,13 +495,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		if (unlikely(!bvl))
 			goto err_free;
 
-		bio_set_flag(bio, BIO_OWNS_VEC);
+		bio->bi_flags |= idx << BVEC_POOL_OFFSET;
 	} else if (nr_iovecs) {
 		bvl = bio->bi_inline_vecs;
 	}
 
 	bio->bi_pool = bs;
-	bio->bi_flags |= idx << BIO_POOL_OFFSET;
 	bio->bi_max_vecs = nr_iovecs;
 	bio->bi_io_vec = bvl;
 	return bio;
@@ -568,7 +572,7 @@ EXPORT_SYMBOL(bio_phys_segments);
  */
 void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 {
-	BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
+	BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));
 
 	/*
 	 * most users will be overriding ->bi_bdev with a new target,
@@ -656,16 +660,15 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
 	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
 	if (!bio)
 		return NULL;
-
 	bio->bi_bdev		= bio_src->bi_bdev;
 	bio->bi_rw		= bio_src->bi_rw;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
-	if (bio->bi_rw & REQ_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD)
 		goto integrity_clone;
 
-	if (bio->bi_rw & REQ_WRITE_SAME) {
+	if (bio_op(bio) == REQ_OP_WRITE_SAME) {
 		bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
 		goto integrity_clone;
 	}
@@ -854,21 +857,20 @@ static void submit_bio_wait_endio(struct bio *bio)
 
 /**
  * submit_bio_wait - submit a bio, and wait until it completes
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
  * bio_endio() on failure.
  */
-int submit_bio_wait(int rw, struct bio *bio)
+int submit_bio_wait(struct bio *bio)
 {
 	struct submit_bio_ret ret;
 
-	rw |= REQ_SYNC;
 	init_completion(&ret.event);
 	bio->bi_private = &ret;
 	bio->bi_end_io = submit_bio_wait_endio;
-	submit_bio(rw, bio);
+	bio->bi_rw |= REQ_SYNC;
+	submit_bio(bio);
 	wait_for_completion_io(&ret.event);
 
 	return ret.error;
@@ -1099,7 +1101,6 @@ int bio_uncopy_user(struct bio *bio)
 	bio_put(bio);
 	return ret;
 }
-EXPORT_SYMBOL(bio_uncopy_user);
 
 /**
  *	bio_copy_user_iov	-	copy user data to bio
@@ -1167,7 +1168,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 		goto out_bmd;
 
 	if (iter->type & WRITE)
-		bio->bi_rw |= REQ_WRITE;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	ret = 0;
 
@@ -1337,7 +1338,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 	 * set data direction, and check if mapped pages need bouncing
 	 */
 	if (iter->type & WRITE)
-		bio->bi_rw |= REQ_WRITE;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	bio_set_flag(bio, BIO_USER_MAPPED);
 
@@ -1394,7 +1395,6 @@ void bio_unmap_user(struct bio *bio)
 	__bio_unmap_user(bio);
 	bio_put(bio);
 }
-EXPORT_SYMBOL(bio_unmap_user);
 
 static void bio_map_kern_endio(struct bio *bio)
 {
@@ -1530,7 +1530,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
 		bio->bi_private = data;
 	} else {
 		bio->bi_end_io = bio_copy_kern_endio;
-		bio->bi_rw |= REQ_WRITE;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	}
 
 	return bio;
@@ -1540,7 +1540,6 @@ cleanup:
 	bio_put(bio);
 	return ERR_PTR(-ENOMEM);
 }
-EXPORT_SYMBOL(bio_copy_kern);
 
 /*
  * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
@@ -1785,7 +1784,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	 * Discards need a mutable bio_vec to accommodate the payload
 	 * required by the DSM TRIM and UNMAP commands.
 	 */
-	if (bio->bi_rw & REQ_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD)
 		split = bio_clone_bioset(bio, gfp, bs);
 	else
 		split = bio_clone_fast(bio, gfp, bs);
@@ -1834,7 +1833,7 @@ EXPORT_SYMBOL_GPL(bio_trim);
  */
 mempool_t *biovec_create_pool(int pool_entries)
 {
-	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
+	struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
 
 	return mempool_create_slab_pool(pool_entries, bp->slab);
 }
@@ -2011,7 +2010,7 @@ static void __init biovec_init_slabs(void)
 {
 	int i;
 
-	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
+	for (i = 0; i < BVEC_POOL_NR; i++) {
 		int size;
 		struct biovec_slab *bvs = bvec_slabs + i;
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 66e6f1aae02e..dd38e5ced4a3 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -905,7 +905,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 	return 0;
 }
 
-struct cftype blkcg_files[] = {
+static struct cftype blkcg_files[] = {
 	{
 		.name = "stat",
 		.flags = CFTYPE_NOT_ON_ROOT,
@@ -914,7 +914,7 @@ struct cftype blkcg_files[] = {
 	{ }	/* terminate */
 };
 
-struct cftype blkcg_legacy_files[] = {
+static struct cftype blkcg_legacy_files[] = {
 	{
 		.name = "reset_stats",
 		.write_u64 = blkcg_reset_stats,
diff --git a/block/blk-core.c b/block/blk-core.c
index 2475b1c72773..a687e9cc16c2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -959,10 +959,10 @@ static void __freed_request(struct request_list *rl, int sync)
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(struct request_list *rl, unsigned int flags)
+static void freed_request(struct request_list *rl, int op, unsigned int flags)
 {
 	struct request_queue *q = rl->q;
-	int sync = rw_is_sync(flags);
+	int sync = rw_is_sync(op, flags);
 
 	q->nr_rqs[sync]--;
 	rl->count[sync]--;
@@ -1029,7 +1029,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
 	 * Flush requests do not use the elevator so skip initialization.
 	 * This allows a request to share the flush and elevator data.
 	 */
-	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
+	if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA))
 		return false;
 
 	return true;
@@ -1054,7 +1054,8 @@ static struct io_context *rq_ioc(struct bio *bio)
 /**
  * __get_request - get a free request
  * @rl: request list to allocate from
- * @rw_flags: RW and SYNC flags
+ * @op: REQ_OP_READ/REQ_OP_WRITE
+ * @op_flags: rq_flag_bits
  * @bio: bio to allocate request for (can be %NULL)
  * @gfp_mask: allocation mask
  *
@@ -1065,21 +1066,22 @@ static struct io_context *rq_ioc(struct bio *bio)
  * Returns ERR_PTR on failure, with @q->queue_lock held.
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
-static struct request *__get_request(struct request_list *rl, int rw_flags,
-				     struct bio *bio, gfp_t gfp_mask)
+static struct request *__get_request(struct request_list *rl, int op,
+				     int op_flags, struct bio *bio,
+				     gfp_t gfp_mask)
 {
 	struct request_queue *q = rl->q;
 	struct request *rq;
 	struct elevator_type *et = q->elevator->type;
 	struct io_context *ioc = rq_ioc(bio);
 	struct io_cq *icq = NULL;
-	const bool is_sync = rw_is_sync(rw_flags) != 0;
+	const bool is_sync = rw_is_sync(op, op_flags) != 0;
 	int may_queue;
 
 	if (unlikely(blk_queue_dying(q)))
 		return ERR_PTR(-ENODEV);
 
-	may_queue = elv_may_queue(q, rw_flags);
+	may_queue = elv_may_queue(q, op, op_flags);
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 
@@ -1123,7 +1125,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
 
 	/*
 	 * Decide whether the new request will be managed by elevator.  If
-	 * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+	 * so, mark @op_flags and increment elvpriv.  Non-zero elvpriv will
 	 * prevent the current elevator from being destroyed until the new
 	 * request is freed.  This guarantees icq's won't be destroyed and
 	 * makes creating new ones safe.
@@ -1132,14 +1134,14 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
 	 * it will be created after releasing queue_lock.
 	 */
 	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
-		rw_flags |= REQ_ELVPRIV;
+		op_flags |= REQ_ELVPRIV;
 		q->nr_rqs_elvpriv++;
 		if (et->icq_cache && ioc)
 			icq = ioc_lookup_icq(ioc, q);
 	}
 
 	if (blk_queue_io_stat(q))
-		rw_flags |= REQ_IO_STAT;
+		op_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 
 	/* allocate and init request */
@@ -1149,10 +1151,10 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
 
 	blk_rq_init(q, rq);
 	blk_rq_set_rl(rq, rl);
-	rq->cmd_flags = rw_flags | REQ_ALLOCED;
+	req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED);
 
 	/* init elvpriv */
-	if (rw_flags & REQ_ELVPRIV) {
+	if (op_flags & REQ_ELVPRIV) {
 		if (unlikely(et->icq_cache && !icq)) {
 			if (ioc)
 				icq = ioc_create_icq(ioc, q, gfp_mask);
@@ -1178,7 +1180,7 @@ out:
 	if (ioc_batching(q, ioc))
 		ioc->nr_batch_requests--;
 
-	trace_block_getrq(q, bio, rw_flags & 1);
+	trace_block_getrq(q, bio, op);
 	return rq;
 
 fail_elvpriv:
@@ -1208,7 +1210,7 @@ fail_alloc:
 	 * queue, but this is pretty rare.
 	 */
 	spin_lock_irq(q->queue_lock);
-	freed_request(rl, rw_flags);
+	freed_request(rl, op, op_flags);
 
 	/*
 	 * in the very unlikely event that allocation failed and no
@@ -1226,7 +1228,8 @@ rq_starved:
 /**
  * get_request - get a free request
  * @q: request_queue to allocate request from
- * @rw_flags: RW and SYNC flags
+ * @op: REQ_OP_READ/REQ_OP_WRITE
+ * @op_flags: rq_flag_bits
  * @bio: bio to allocate request for (can be %NULL)
  * @gfp_mask: allocation mask
  *
@@ -1237,17 +1240,18 @@ rq_starved:
  * Returns ERR_PTR on failure, with @q->queue_lock held.
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
-static struct request *get_request(struct request_queue *q, int rw_flags,
-				   struct bio *bio, gfp_t gfp_mask)
+static struct request *get_request(struct request_queue *q, int op,
+				   int op_flags, struct bio *bio,
+				   gfp_t gfp_mask)
 {
-	const bool is_sync = rw_is_sync(rw_flags) != 0;
+	const bool is_sync = rw_is_sync(op, op_flags) != 0;
 	DEFINE_WAIT(wait);
 	struct request_list *rl;
 	struct request *rq;
 
 	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
 retry:
-	rq = __get_request(rl, rw_flags, bio, gfp_mask);
+	rq = __get_request(rl, op, op_flags, bio, gfp_mask);
 	if (!IS_ERR(rq))
 		return rq;
 
@@ -1260,7 +1264,7 @@ retry:
 	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
 				  TASK_UNINTERRUPTIBLE);
 
-	trace_block_sleeprq(q, bio, rw_flags & 1);
+	trace_block_sleeprq(q, bio, op);
 
 	spin_unlock_irq(q->queue_lock);
 	io_schedule();
@@ -1289,11 +1293,16 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 	create_io_context(gfp_mask, q->node);
 
 	spin_lock_irq(q->queue_lock);
-	rq = get_request(q, rw, NULL, gfp_mask);
-	if (IS_ERR(rq))
+	rq = get_request(q, rw, 0, NULL, gfp_mask);
+	if (IS_ERR(rq)) {
 		spin_unlock_irq(q->queue_lock);
-	/* q->queue_lock is unlocked at this point */
+		return rq;
+	}
 
+	/* q->queue_lock is unlocked at this point */
+	rq->__data_len = 0;
+	rq->__sector = (sector_t) -1;
+	rq->bio = rq->biotail = NULL;
 	return rq;
 }
 
@@ -1308,63 +1317,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_get_request);
 
-/**
- * blk_make_request - given a bio, allocate a corresponding struct request.
- * @q: target request queue
- * @bio:  The bio describing the memory mappings that will be submitted for IO.
- *        It may be a chained-bio properly constructed by block/bio layer.
- * @gfp_mask: gfp flags to be used for memory allocation
- *
- * blk_make_request is the parallel of generic_make_request for BLOCK_PC
- * type commands. Where the struct request needs to be farther initialized by
- * the caller. It is passed a &struct bio, which describes the memory info of
- * the I/O transfer.
- *
- * The caller of blk_make_request must make sure that bi_io_vec
- * are set to describe the memory buffers. That bio_data_dir() will return
- * the needed direction of the request. (And all bio's in the passed bio-chain
- * are properly set accordingly)
- *
- * If called under none-sleepable conditions, mapped bio buffers must not
- * need bouncing, by calling the appropriate masked or flagged allocator,
- * suitable for the target device. Otherwise the call to blk_queue_bounce will
- * BUG.
- *
- * WARNING: When allocating/cloning a bio-chain, careful consideration should be
- * given to how you allocate bios. In particular, you cannot use
- * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
- * you risk waiting for IO completion of a bio that hasn't been submitted yet,
- * thus resulting in a deadlock. Alternatively bios should be allocated using
- * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
- * If possible a big IO should be split into smaller parts when allocation
- * fails. Partial allocation should not be an error, or you risk a live-lock.
- */
-struct request *blk_make_request(struct request_queue *q, struct bio *bio,
-				 gfp_t gfp_mask)
-{
-	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
-
-	if (IS_ERR(rq))
-		return rq;
-
-	blk_rq_set_block_pc(rq);
-
-	for_each_bio(bio) {
-		struct bio *bounce_bio = bio;
-		int ret;
-
-		blk_queue_bounce(q, &bounce_bio);
-		ret = blk_rq_append_bio(q, rq, bounce_bio);
-		if (unlikely(ret)) {
-			blk_put_request(rq);
-			return ERR_PTR(ret);
-		}
-	}
-
-	return rq;
-}
-EXPORT_SYMBOL(blk_make_request);
-
 /**
  * blk_rq_set_block_pc - initialize a request to type BLOCK_PC
  * @rq:		request to be initialized
@@ -1373,9 +1325,6 @@ EXPORT_SYMBOL(blk_make_request);
 void blk_rq_set_block_pc(struct request *rq)
 {
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-	rq->__data_len = 0;
-	rq->__sector = (sector_t) -1;
-	rq->bio = rq->biotail = NULL;
 	memset(rq->__cmd, 0, sizeof(rq->__cmd));
 }
 EXPORT_SYMBOL(blk_rq_set_block_pc);
@@ -1491,13 +1440,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	 */
 	if (req->cmd_flags & REQ_ALLOCED) {
 		unsigned int flags = req->cmd_flags;
+		int op = req_op(req);
 		struct request_list *rl = blk_rq_rl(req);
 
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(ELV_ON_HASH(req));
 
 		blk_free_request(rl, req);
-		freed_request(rl, flags);
+		freed_request(rl, op, flags);
 		blk_put_rl(rl);
 	}
 }
@@ -1712,7 +1662,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
 	struct blk_plug *plug;
-	int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
+	int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
 	struct request *req;
 	unsigned int request_count = 0;
 
@@ -1731,7 +1681,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
-	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+	if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) {
 		spin_lock_irq(q->queue_lock);
 		where = ELEVATOR_INSERT_FLUSH;
 		goto get_rq;
@@ -1772,15 +1722,19 @@ get_rq:
 	 * but we need to set it earlier to expose the sync flag to the
 	 * rq allocator and io schedulers.
 	 */
-	rw_flags = bio_data_dir(bio);
 	if (sync)
 		rw_flags |= REQ_SYNC;
 
+	/*
+	 * Add in META/PRIO flags, if set, before we get to the IO scheduler
+	 */
+	rw_flags |= (bio->bi_rw & (REQ_META | REQ_PRIO));
+
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 */
-	req = get_request(q, rw_flags, bio, GFP_NOIO);
+	req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
 	if (IS_ERR(req)) {
 		bio->bi_error = PTR_ERR(req);
 		bio_endio(bio);
@@ -1849,7 +1803,7 @@ static void handle_bad_sector(struct bio *bio)
 	char b[BDEVNAME_SIZE];
 
 	printk(KERN_INFO "attempt to access beyond end of device\n");
-	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
+	printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
 			bdevname(bio->bi_bdev, b),
 			bio->bi_rw,
 			(unsigned long long)bio_end_sector(bio),
@@ -1964,25 +1918,30 @@ generic_make_request_checks(struct bio *bio)
 	 * drivers without flush support don't have to worry
 	 * about them.
 	 */
-	if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+	if ((bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) &&
 	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
-		bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+		bio->bi_rw &= ~(REQ_PREFLUSH | REQ_FUA);
 		if (!nr_sectors) {
 			err = 0;
 			goto end_io;
 		}
 	}
 
-	if ((bio->bi_rw & REQ_DISCARD) &&
-	    (!blk_queue_discard(q) ||
-	     ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
-		err = -EOPNOTSUPP;
-		goto end_io;
-	}
-
-	if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
-		err = -EOPNOTSUPP;
-		goto end_io;
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+		if (!blk_queue_discard(q))
+			goto not_supported;
+		break;
+	case REQ_OP_SECURE_ERASE:
+		if (!blk_queue_secure_erase(q))
+			goto not_supported;
+		break;
+	case REQ_OP_WRITE_SAME:
+		if (!bdev_write_same(bio->bi_bdev))
+			goto not_supported;
+		break;
+	default:
+		break;
 	}
 
 	/*
@@ -1999,6 +1958,8 @@ generic_make_request_checks(struct bio *bio)
 	trace_block_bio_queue(q, bio);
 	return true;
 
+not_supported:
+	err = -EOPNOTSUPP;
 end_io:
 	bio->bi_error = err;
 	bio_endio(bio);
@@ -2094,7 +2055,6 @@ EXPORT_SYMBOL(generic_make_request);
 
 /**
  * submit_bio - submit a bio to the block device layer for I/O
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
@@ -2102,10 +2062,8 @@ EXPORT_SYMBOL(generic_make_request);
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
-blk_qc_t submit_bio(int rw, struct bio *bio)
+blk_qc_t submit_bio(struct bio *bio)
 {
-	bio->bi_rw |= rw;
-
 	/*
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
@@ -2113,12 +2071,12 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
 	if (bio_has_data(bio)) {
 		unsigned int count;
 
-		if (unlikely(rw & REQ_WRITE_SAME))
+		if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
 			count = bdev_logical_block_size(bio->bi_bdev) >> 9;
 		else
 			count = bio_sectors(bio);
 
-		if (rw & WRITE) {
+		if (op_is_write(bio_op(bio))) {
 			count_vm_events(PGPGOUT, count);
 		} else {
 			task_io_account_read(bio->bi_iter.bi_size);
@@ -2129,7 +2087,7 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
 			char b[BDEVNAME_SIZE];
 			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
 			current->comm, task_pid_nr(current),
-				(rw & WRITE) ? "WRITE" : "READ",
+				op_is_write(bio_op(bio)) ? "WRITE" : "READ",
 				(unsigned long long)bio->bi_iter.bi_sector,
 				bdevname(bio->bi_bdev, b),
 				count);
@@ -2160,7 +2118,7 @@ EXPORT_SYMBOL(submit_bio);
 static int blk_cloned_rq_check_limits(struct request_queue *q,
 				      struct request *rq)
 {
-	if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
+	if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
@@ -2216,7 +2174,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 	 */
 	BUG_ON(blk_queued_rq(rq));
 
-	if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
+	if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
 		where = ELEVATOR_INSERT_FLUSH;
 
 	add_acct_request(q, rq, where);
@@ -2979,8 +2937,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request_err);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
-	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
-	rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
+	req_set_op(rq, bio_op(bio));
 
 	if (bio_has_data(bio))
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
@@ -3065,7 +3022,8 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
 	dst->cpu = src->cpu;
-	dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
+	req_set_op_attrs(dst, req_op(src),
+			 (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE);
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);
@@ -3310,7 +3268,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		/*
 		 * rq is already accounted, so use raw insert
 		 */
-		if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
+		if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
 			__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
 		else
 			__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
@@ -3377,6 +3335,7 @@ bool blk_poll(struct request_queue *q, blk_qc_t cookie)
 
 	return false;
 }
+EXPORT_SYMBOL_GPL(blk_poll);
 
 #ifdef CONFIG_PM
 /**
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3fec8a29d0fa..7ea04325d02f 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -62,7 +62,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	/*
 	 * don't check dying flag for MQ because the request won't
-	 * be resued after dying flag is set
+	 * be reused after dying flag is set
 	 */
 	if (q->mq_ops) {
 		blk_mq_insert_request(rq, at_head, true, false);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index b1c91d229e5e..d308def812db 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -10,8 +10,8 @@
  * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
  * properties and hardware capability.
  *
- * If a request doesn't have data, only REQ_FLUSH makes sense, which
- * indicates a simple flush request.  If there is data, REQ_FLUSH indicates
+ * If a request doesn't have data, only REQ_PREFLUSH makes sense, which
+ * indicates a simple flush request.  If there is data, REQ_PREFLUSH indicates
  * that the device cache should be flushed before the data is executed, and
  * REQ_FUA means that the data must be on non-volatile media on request
  * completion.
@@ -20,16 +20,16 @@
  * difference.  The requests are either completed immediately if there's no
  * data or executed as normal requests otherwise.
  *
- * If the device has writeback cache and supports FUA, REQ_FLUSH is
+ * If the device has writeback cache and supports FUA, REQ_PREFLUSH is
  * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
  *
- * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is
- * translated to PREFLUSH and REQ_FUA to POSTFLUSH.
+ * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH
+ * is translated to PREFLUSH and REQ_FUA to POSTFLUSH.
  *
  * The actual execution of flush is double buffered.  Whenever a request
  * needs to execute PRE or POSTFLUSH, it queues at
  * fq->flush_queue[fq->flush_pending_idx].  Once certain criteria are met, a
- * flush is issued and the pending_idx is toggled.  When the flush
+ * REQ_OP_FLUSH is issued and the pending_idx is toggled.  When the flush
  * completes, all the requests which were pending are proceeded to the next
  * step.  This allows arbitrary merging of different types of FLUSH/FUA
  * requests.
@@ -103,7 +103,7 @@ static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
 		policy |= REQ_FSEQ_DATA;
 
 	if (fflags & (1UL << QUEUE_FLAG_WC)) {
-		if (rq->cmd_flags & REQ_FLUSH)
+		if (rq->cmd_flags & REQ_PREFLUSH)
 			policy |= REQ_FSEQ_PREFLUSH;
 		if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
 		    (rq->cmd_flags & REQ_FUA))
@@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	}
 
 	flush_rq->cmd_type = REQ_TYPE_FS;
-	flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+	req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH | REQ_FLUSH_SEQ);
 	flush_rq->rq_disk = first_rq->rq_disk;
 	flush_rq->end_io = flush_end_io;
 
@@ -391,9 +391,9 @@ void blk_insert_flush(struct request *rq)
 
 	/*
 	 * @policy now records what operations need to be done.  Adjust
-	 * REQ_FLUSH and FUA for the driver.
+	 * REQ_PREFLUSH and FUA for the driver.
 	 */
-	rq->cmd_flags &= ~REQ_FLUSH;
+	rq->cmd_flags &= ~REQ_PREFLUSH;
 	if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
 		rq->cmd_flags &= ~REQ_FUA;
 
@@ -485,8 +485,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 
 	bio = bio_alloc(gfp_mask, 0);
 	bio->bi_bdev = bdev;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
 
-	ret = submit_bio_wait(WRITE_FLUSH, bio);
+	ret = submit_bio_wait(bio);
 
 	/*
 	 * The driver must store the error location in ->bi_sector, if
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9e29dc351695..083e56f72308 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,33 +9,46 @@
 
 #include "blk.h"
 
-static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages,
+static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
 		gfp_t gfp)
 {
 	struct bio *new = bio_alloc(gfp, nr_pages);
 
 	if (bio) {
 		bio_chain(bio, new);
-		submit_bio(rw, bio);
+		submit_bio(bio);
 	}
 
 	return new;
 }
 
 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-		sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop)
+		sector_t nr_sects, gfp_t gfp_mask, int flags,
+		struct bio **biop)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 	struct bio *bio = *biop;
 	unsigned int granularity;
+	enum req_op op;
 	int alignment;
 
 	if (!q)
 		return -ENXIO;
-	if (!blk_queue_discard(q))
-		return -EOPNOTSUPP;
-	if ((type & REQ_SECURE) && !blk_queue_secdiscard(q))
-		return -EOPNOTSUPP;
+
+	if (flags & BLKDEV_DISCARD_SECURE) {
+		if (flags & BLKDEV_DISCARD_ZERO)
+			return -EOPNOTSUPP;
+		if (!blk_queue_secure_erase(q))
+			return -EOPNOTSUPP;
+		op = REQ_OP_SECURE_ERASE;
+	} else {
+		if (!blk_queue_discard(q))
+			return -EOPNOTSUPP;
+		if ((flags & BLKDEV_DISCARD_ZERO) &&
+		    !q->limits.discard_zeroes_data)
+			return -EOPNOTSUPP;
+		op = REQ_OP_DISCARD;
+	}
 
 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
 	granularity = max(q->limits.discard_granularity >> 9, 1U);
@@ -62,9 +75,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 			req_sects = end_sect - sector;
 		}
 
-		bio = next_bio(bio, type, 1, gfp_mask);
+		bio = next_bio(bio, 1, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio->bi_bdev = bdev;
+		bio_set_op_attrs(bio, op, 0);
 
 		bio->bi_iter.bi_size = req_sects << 9;
 		nr_sects -= req_sects;
@@ -98,20 +112,16 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
-	int type = REQ_WRITE | REQ_DISCARD;
 	struct bio *bio = NULL;
 	struct blk_plug plug;
 	int ret;
 
-	if (flags & BLKDEV_DISCARD_SECURE)
-		type |= REQ_SECURE;
-
 	blk_start_plug(&plug);
-	ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, type,
+	ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
 			&bio);
 	if (!ret && bio) {
-		ret = submit_bio_wait(type, bio);
-		if (ret == -EOPNOTSUPP)
+		ret = submit_bio_wait(bio);
+		if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO))
 			ret = 0;
 		bio_put(bio);
 	}
@@ -148,13 +158,14 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 	max_write_same_sectors = UINT_MAX >> 9;
 
 	while (nr_sects) {
-		bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask);
+		bio = next_bio(bio, 1, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio->bi_bdev = bdev;
 		bio->bi_vcnt = 1;
 		bio->bi_io_vec->bv_page = page;
 		bio->bi_io_vec->bv_offset = 0;
 		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
+		bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
 
 		if (nr_sects > max_write_same_sectors) {
 			bio->bi_iter.bi_size = max_write_same_sectors << 9;
@@ -167,10 +178,10 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 	}
 
 	if (bio) {
-		ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio);
+		ret = submit_bio_wait(bio);
 		bio_put(bio);
 	}
-	return ret != -EOPNOTSUPP ? ret : 0;
+	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_write_same);
 
@@ -193,11 +204,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	unsigned int sz;
 
 	while (nr_sects != 0) {
-		bio = next_bio(bio, WRITE,
-				min(nr_sects, (sector_t)BIO_MAX_PAGES),
+		bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
 				gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio->bi_bdev   = bdev;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		while (nr_sects != 0) {
 			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -210,7 +221,7 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	}
 
 	if (bio) {
-		ret = submit_bio_wait(WRITE, bio);
+		ret = submit_bio_wait(bio);
 		bio_put(bio);
 		return ret;
 	}
@@ -241,11 +252,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			 sector_t nr_sects, gfp_t gfp_mask, bool discard)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data &&
-	    blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0)
-		return 0;
+	if (discard) {
+		if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
+				BLKDEV_DISCARD_ZERO))
+			return 0;
+	}
 
 	if (bdev_write_same(bdev) &&
 	    blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
diff --git a/block/blk-map.c b/block/blk-map.c
index b9f88b7751fb..b8657fa8dc9a 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -9,21 +9,26 @@
 
 #include "blk.h"
 
-int blk_rq_append_bio(struct request_queue *q, struct request *rq,
-		      struct bio *bio)
+/*
+ * Append a bio to a passthrough request.  Only works can be merged into
+ * the request based on the driver constraints.
+ */
+int blk_rq_append_bio(struct request *rq, struct bio *bio)
 {
-	if (!rq->bio)
-		blk_rq_bio_prep(q, rq, bio);
-	else if (!ll_back_merge_fn(q, rq, bio))
-		return -EINVAL;
-	else {
+	if (!rq->bio) {
+		blk_rq_bio_prep(rq->q, rq, bio);
+	} else {
+		if (!ll_back_merge_fn(rq->q, rq, bio))
+			return -EINVAL;
+
 		rq->biotail->bi_next = bio;
 		rq->biotail = bio;
-
 		rq->__data_len += bio->bi_iter.bi_size;
 	}
+
 	return 0;
 }
+EXPORT_SYMBOL(blk_rq_append_bio);
 
 static int __blk_rq_unmap_user(struct bio *bio)
 {
@@ -71,7 +76,7 @@ static int __blk_rq_map_user_iov(struct request *rq,
 	 */
 	bio_get(bio);
 
-	ret = blk_rq_append_bio(q, rq, bio);
+	ret = blk_rq_append_bio(rq, bio);
 	if (ret) {
 		bio_endio(bio);
 		__blk_rq_unmap_user(orig_bio);
@@ -224,12 +229,12 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		return PTR_ERR(bio);
 
 	if (!reading)
-		bio->bi_rw |= REQ_WRITE;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	if (do_copy)
 		rq->cmd_flags |= REQ_COPY_USER;
 
-	ret = blk_rq_append_bio(q, rq, bio);
+	ret = blk_rq_append_bio(rq, bio);
 	if (unlikely(ret)) {
 		/* request is too big */
 		bio_put(bio);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 261353166dcf..41cbd4878958 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -172,9 +172,9 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
 	struct bio *split, *res;
 	unsigned nsegs;
 
-	if ((*bio)->bi_rw & REQ_DISCARD)
+	if (bio_op(*bio) == REQ_OP_DISCARD)
 		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
-	else if ((*bio)->bi_rw & REQ_WRITE_SAME)
+	else if (bio_op(*bio) == REQ_OP_WRITE_SAME)
 		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
 	else
 		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -213,10 +213,10 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 	 * This should probably be returning 0, but blk_add_request_payload()
 	 * (Christoph!!!!)
 	 */
-	if (bio->bi_rw & REQ_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD)
 		return 1;
 
-	if (bio->bi_rw & REQ_WRITE_SAME)
+	if (bio_op(bio) == REQ_OP_WRITE_SAME)
 		return 1;
 
 	fbio = bio;
@@ -385,7 +385,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
 	nsegs = 0;
 	cluster = blk_queue_cluster(q);
 
-	if (bio->bi_rw & REQ_DISCARD) {
+	if (bio_op(bio) == REQ_OP_DISCARD) {
 		/*
 		 * This is a hack - drivers should be neither modifying the
 		 * biovec, nor relying on bi_vcnt - but because of
@@ -400,7 +400,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
 		return 0;
 	}
 
-	if (bio->bi_rw & REQ_WRITE_SAME) {
+	if (bio_op(bio) == REQ_OP_WRITE_SAME) {
 single_segment:
 		*sg = sglist;
 		bvec = bio_iovec(bio);
@@ -439,7 +439,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 	}
 
 	if (q->dma_drain_size && q->dma_drain_needed(rq)) {
-		if (rq->cmd_flags & REQ_WRITE)
+		if (op_is_write(req_op(rq)))
 			memset(q->dma_drain_buffer, 0, q->dma_drain_size);
 
 		sg_unmark_end(sg);
@@ -500,7 +500,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
 	    integrity_req_gap_back_merge(req, bio))
 		return 0;
 	if (blk_rq_sectors(req) + bio_sectors(bio) >
-	    blk_rq_get_max_sectors(req)) {
+	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -524,7 +524,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 	    integrity_req_gap_front_merge(req, bio))
 		return 0;
 	if (blk_rq_sectors(req) + bio_sectors(bio) >
-	    blk_rq_get_max_sectors(req)) {
+	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -570,7 +570,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	 * Will it become too large?
 	 */
 	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
-	    blk_rq_get_max_sectors(req))
+	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
 		return 0;
 
 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -649,7 +649,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	if (!rq_mergeable(req) || !rq_mergeable(next))
 		return 0;
 
-	if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
+	if (req_op(req) != req_op(next))
 		return 0;
 
 	/*
@@ -663,7 +663,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	    || req_no_special_merge(next))
 		return 0;
 
-	if (req->cmd_flags & REQ_WRITE_SAME &&
+	if (req_op(req) == REQ_OP_WRITE_SAME &&
 	    !blk_write_same_mergeable(req->bio, next->bio))
 		return 0;
 
@@ -743,6 +743,12 @@ int attempt_front_merge(struct request_queue *q, struct request *rq)
 int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 			  struct request *next)
 {
+	struct elevator_queue *e = q->elevator;
+
+	if (e->type->ops.elevator_allow_rq_merge_fn)
+		if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
+			return 0;
+
 	return attempt_merge(q, rq, next);
 }
 
@@ -751,7 +757,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
 		return false;
 
-	if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
+	if (req_op(rq) != bio_op(bio))
 		return false;
 
 	/* different data direction or already started, don't merge */
@@ -767,7 +773,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 		return false;
 
 	/* must be using the same buffer */
-	if (rq->cmd_flags & REQ_WRITE_SAME &&
+	if (req_op(rq) == REQ_OP_WRITE_SAME &&
 	    !blk_write_same_mergeable(rq->bio, bio))
 		return false;
 
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 56a0c37a3d06..729bac3a673b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -485,6 +485,32 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 }
 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
 
+int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
+{
+	int i, j, ret = 0;
+
+	if (!set->ops->reinit_request)
+		goto out;
+
+	for (i = 0; i < set->nr_hw_queues; i++) {
+		struct blk_mq_tags *tags = set->tags[i];
+
+		for (j = 0; j < tags->nr_tags; j++) {
+			if (!tags->rqs[j])
+				continue;
+
+			ret = set->ops->reinit_request(set->driver_data,
+						tags->rqs[j]);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
+
 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 		void *priv)
 {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f9b9049b1284..576e7112f807 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -159,16 +159,17 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 EXPORT_SYMBOL(blk_mq_can_queue);
 
 static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-			       struct request *rq, unsigned int rw_flags)
+			       struct request *rq, int op,
+			       unsigned int op_flags)
 {
 	if (blk_queue_io_stat(q))
-		rw_flags |= REQ_IO_STAT;
+		op_flags |= REQ_IO_STAT;
 
 	INIT_LIST_HEAD(&rq->queuelist);
 	/* csd/requeue_work/fifo_time is initialized before use */
 	rq->q = q;
 	rq->mq_ctx = ctx;
-	rq->cmd_flags |= rw_flags;
+	req_set_op_attrs(rq, op, op_flags);
 	/* do not touch atomic flags, it needs atomic ops against the timer */
 	rq->cpu = -1;
 	INIT_HLIST_NODE(&rq->hash);
@@ -203,11 +204,11 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	rq->end_io_data = NULL;
 	rq->next_rq = NULL;
 
-	ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
+	ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
 }
 
 static struct request *
-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
+__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
 {
 	struct request *rq;
 	unsigned int tag;
@@ -222,7 +223,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
 		}
 
 		rq->tag = tag;
-		blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw);
+		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
 		return rq;
 	}
 
@@ -246,7 +247,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
 
-	rq = __blk_mq_alloc_request(&alloc_data, rw);
+	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
 	if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
 		__blk_mq_run_hw_queue(hctx);
 		blk_mq_put_ctx(ctx);
@@ -254,7 +255,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
 		blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-		rq =  __blk_mq_alloc_request(&alloc_data, rw);
+		rq =  __blk_mq_alloc_request(&alloc_data, rw, 0);
 		ctx = alloc_data.ctx;
 	}
 	blk_mq_put_ctx(ctx);
@@ -262,10 +263,53 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		blk_queue_exit(q);
 		return ERR_PTR(-EWOULDBLOCK);
 	}
+
+	rq->__data_len = 0;
+	rq->__sector = (sector_t) -1;
+	rq->bio = rq->biotail = NULL;
 	return rq;
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+		unsigned int flags, unsigned int hctx_idx)
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx;
+	struct request *rq;
+	struct blk_mq_alloc_data alloc_data;
+	int ret;
+
+	/*
+	 * If the tag allocator sleeps we could get an allocation for a
+	 * different hardware context.  No need to complicate the low level
+	 * allocator for this for the rare use case of a command tied to
+	 * a specific queue.
+	 */
+	if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
+		return ERR_PTR(-EINVAL);
+
+	if (hctx_idx >= q->nr_hw_queues)
+		return ERR_PTR(-EIO);
+
+	ret = blk_queue_enter(q, true);
+	if (ret)
+		return ERR_PTR(ret);
+
+	hctx = q->queue_hw_ctx[hctx_idx];
+	ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
+	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
+	if (!rq) {
+		blk_queue_exit(q);
+		return ERR_PTR(-EWOULDBLOCK);
+	}
+
+	return rq;
+}
+EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+
 static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 				  struct blk_mq_ctx *ctx, struct request *rq)
 {
@@ -784,7 +828,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		switch (ret) {
 		case BLK_MQ_RQ_QUEUE_OK:
 			queued++;
-			continue;
+			break;
 		case BLK_MQ_RQ_QUEUE_BUSY:
 			list_add(&rq->queuelist, &rq_list);
 			__blk_mq_requeue_request(rq);
@@ -1169,28 +1213,29 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	struct request *rq;
-	int rw = bio_data_dir(bio);
+	int op = bio_data_dir(bio);
+	int op_flags = 0;
 	struct blk_mq_alloc_data alloc_data;
 
 	blk_queue_enter_live(q);
 	ctx = blk_mq_get_ctx(q);
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
-	if (rw_is_sync(bio->bi_rw))
-		rw |= REQ_SYNC;
+	if (rw_is_sync(bio_op(bio), bio->bi_rw))
+		op_flags |= REQ_SYNC;
 
-	trace_block_getrq(q, bio, rw);
+	trace_block_getrq(q, bio, op);
 	blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
-	rq = __blk_mq_alloc_request(&alloc_data, rw);
+	rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
 	if (unlikely(!rq)) {
 		__blk_mq_run_hw_queue(hctx);
 		blk_mq_put_ctx(ctx);
-		trace_block_sleeprq(q, bio, rw);
+		trace_block_sleeprq(q, bio, op);
 
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
 		blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
-		rq = __blk_mq_alloc_request(&alloc_data, rw);
+		rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
 		ctx = alloc_data.ctx;
 		hctx = alloc_data.hctx;
 	}
@@ -1244,8 +1289,8 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
  */
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
-	const int is_sync = rw_is_sync(bio->bi_rw);
-	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+	const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw);
+	const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
 	struct blk_map_ctx data;
 	struct request *rq;
 	unsigned int request_count = 0;
@@ -1338,8 +1383,8 @@ done:
  */
 static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
-	const int is_sync = rw_is_sync(bio->bi_rw);
-	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+	const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw);
+	const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
 	struct blk_plug *plug;
 	unsigned int request_count = 0;
 	struct blk_map_ctx data;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 99205965f559..f87a7e747d36 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -379,6 +379,11 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
 	return count;
 }
 
+static ssize_t queue_dax_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(blk_queue_dax(q), page);
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -516,6 +521,11 @@ static struct queue_sysfs_entry queue_wc_entry = {
 	.store = queue_wc_store,
 };
 
+static struct queue_sysfs_entry queue_dax_entry = {
+	.attr = {.name = "dax", .mode = S_IRUGO },
+	.show = queue_dax_show,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -542,6 +552,7 @@ static struct attribute *default_attrs[] = {
 	&queue_random_entry.attr,
 	&queue_poll_entry.attr,
 	&queue_wc_entry.attr,
+	&queue_dax_entry.attr,
 	NULL,
 };
 
diff --git a/block/blk.h b/block/blk.h
index 70e4aee9cdcb..c37492f5edaa 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -64,8 +64,6 @@ void blk_exit_rl(struct request_list *rl);
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
-int blk_rq_append_bio(struct request_queue *q, struct request *rq,
-		      struct bio *bio);
 void blk_queue_bypass_start(struct request_queue *q);
 void blk_queue_bypass_end(struct request_queue *q);
 void blk_dequeue_request(struct request *rq);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 4a349787bc62..acabba198de9 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
-#include <linux/jiffies.h>
+#include <linux/ktime.h>
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
@@ -22,28 +22,28 @@
  */
 /* max queue in one round of service */
 static const int cfq_quantum = 8;
-static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
+static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
 /* maximum backwards seek, in KiB */
 static const int cfq_back_max = 16 * 1024;
 /* penalty of a backwards seek */
 static const int cfq_back_penalty = 2;
-static const int cfq_slice_sync = HZ / 10;
-static int cfq_slice_async = HZ / 25;
+static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
+static u64 cfq_slice_async = NSEC_PER_SEC / 25;
 static const int cfq_slice_async_rq = 2;
-static int cfq_slice_idle = HZ / 125;
-static int cfq_group_idle = HZ / 125;
-static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
+static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
+static u64 cfq_group_idle = NSEC_PER_SEC / 125;
+static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
 static const int cfq_hist_divisor = 4;
 
 /*
  * offset from end of service tree
  */
-#define CFQ_IDLE_DELAY		(HZ / 5)
+#define CFQ_IDLE_DELAY		(NSEC_PER_SEC / 5)
 
 /*
  * below this threshold, we consider thinktime immediate
  */
-#define CFQ_MIN_TT		(2)
+#define CFQ_MIN_TT		(2 * NSEC_PER_SEC / HZ)
 
 #define CFQ_SLICE_SCALE		(5)
 #define CFQ_HW_QUEUE_MIN	(5)
@@ -73,11 +73,11 @@ static struct kmem_cache *cfq_pool;
 #define CFQ_WEIGHT_LEGACY_MAX	1000
 
 struct cfq_ttime {
-	unsigned long last_end_request;
+	u64 last_end_request;
 
-	unsigned long ttime_total;
+	u64 ttime_total;
+	u64 ttime_mean;
 	unsigned long ttime_samples;
-	unsigned long ttime_mean;
 };
 
 /*
@@ -94,7 +94,7 @@ struct cfq_rb_root {
 	struct cfq_ttime ttime;
 };
 #define CFQ_RB_ROOT	(struct cfq_rb_root) { .rb = RB_ROOT, \
-			.ttime = {.last_end_request = jiffies,},}
+			.ttime = {.last_end_request = ktime_get_ns(),},}
 
 /*
  * Per process-grouping structure
@@ -109,7 +109,7 @@ struct cfq_queue {
 	/* service_tree member */
 	struct rb_node rb_node;
 	/* service_tree key */
-	unsigned long rb_key;
+	u64 rb_key;
 	/* prio tree member */
 	struct rb_node p_node;
 	/* prio tree root we belong to, if any */
@@ -126,13 +126,13 @@ struct cfq_queue {
 	struct list_head fifo;
 
 	/* time when queue got scheduled in to dispatch first request. */
-	unsigned long dispatch_start;
-	unsigned int allocated_slice;
-	unsigned int slice_dispatch;
+	u64 dispatch_start;
+	u64 allocated_slice;
+	u64 slice_dispatch;
 	/* time when first request from queue completed and slice started. */
-	unsigned long slice_start;
-	unsigned long slice_end;
-	long slice_resid;
+	u64 slice_start;
+	u64 slice_end;
+	s64 slice_resid;
 
 	/* pending priority requests */
 	int prio_pending;
@@ -141,7 +141,7 @@ struct cfq_queue {
 
 	/* io prio of this group */
 	unsigned short ioprio, org_ioprio;
-	unsigned short ioprio_class;
+	unsigned short ioprio_class, org_ioprio_class;
 
 	pid_t pid;
 
@@ -290,7 +290,7 @@ struct cfq_group {
 	struct cfq_rb_root service_trees[2][3];
 	struct cfq_rb_root service_tree_idle;
 
-	unsigned long saved_wl_slice;
+	u64 saved_wl_slice;
 	enum wl_type_t saved_wl_type;
 	enum wl_class_t saved_wl_class;
 
@@ -329,7 +329,7 @@ struct cfq_data {
 	 */
 	enum wl_class_t serving_wl_class;
 	enum wl_type_t serving_wl_type;
-	unsigned long workload_expires;
+	u64 workload_expires;
 	struct cfq_group *serving_group;
 
 	/*
@@ -362,7 +362,7 @@ struct cfq_data {
 	/*
 	 * idle window management
 	 */
-	struct timer_list idle_slice_timer;
+	struct hrtimer idle_slice_timer;
 	struct work_struct unplug_work;
 
 	struct cfq_queue *active_queue;
@@ -374,22 +374,22 @@ struct cfq_data {
 	 * tunables, see top of file
 	 */
 	unsigned int cfq_quantum;
-	unsigned int cfq_fifo_expire[2];
 	unsigned int cfq_back_penalty;
 	unsigned int cfq_back_max;
-	unsigned int cfq_slice[2];
 	unsigned int cfq_slice_async_rq;
-	unsigned int cfq_slice_idle;
-	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
-	unsigned int cfq_target_latency;
+	u64 cfq_fifo_expire[2];
+	u64 cfq_slice[2];
+	u64 cfq_slice_idle;
+	u64 cfq_group_idle;
+	u64 cfq_target_latency;
 
 	/*
 	 * Fallback dummy cfqq for extreme OOM conditions
 	 */
 	struct cfq_queue oom_cfqq;
 
-	unsigned long last_delayed_sync;
+	u64 last_delayed_sync;
 };
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -667,15 +667,16 @@ static inline void cfqg_put(struct cfq_group *cfqg)
 } while (0)
 
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
-					    struct cfq_group *curr_cfqg, int rw)
+					    struct cfq_group *curr_cfqg, int op,
+					    int op_flags)
 {
-	blkg_rwstat_add(&cfqg->stats.queued, rw, 1);
+	blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, 1);
 	cfqg_stats_end_empty_time(&cfqg->stats);
 	cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
 }
 
 static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
-			unsigned long time, unsigned long unaccounted_time)
+			uint64_t time, unsigned long unaccounted_time)
 {
 	blkg_stat_add(&cfqg->stats.time, time);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -683,26 +684,30 @@ static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
 #endif
 }
 
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw)
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
+					       int op_flags)
 {
-	blkg_rwstat_add(&cfqg->stats.queued, rw, -1);
+	blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, -1);
 }
 
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw)
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
+					       int op_flags)
 {
-	blkg_rwstat_add(&cfqg->stats.merged, rw, 1);
+	blkg_rwstat_add(&cfqg->stats.merged, op, op_flags, 1);
 }
 
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-			uint64_t start_time, uint64_t io_start_time, int rw)
+			uint64_t start_time, uint64_t io_start_time, int op,
+			int op_flags)
 {
 	struct cfqg_stats *stats = &cfqg->stats;
 	unsigned long long now = sched_clock();
 
 	if (time_after64(now, io_start_time))
-		blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+		blkg_rwstat_add(&stats->service_time, op, op_flags,
+				now - io_start_time);
 	if (time_after64(io_start_time, start_time))
-		blkg_rwstat_add(&stats->wait_time, rw,
+		blkg_rwstat_add(&stats->wait_time, op, op_flags,
 				io_start_time - start_time);
 }
 
@@ -781,13 +786,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { }
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)		do {} while (0)
 
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
-			struct cfq_group *curr_cfqg, int rw) { }
+			struct cfq_group *curr_cfqg, int op, int op_flags) { }
 static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
-			unsigned long time, unsigned long unaccounted_time) { }
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { }
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { }
+			uint64_t time, unsigned long unaccounted_time) { }
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
+			int op_flags) { }
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
+			int op_flags) { }
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-			uint64_t start_time, uint64_t io_start_time, int rw) { }
+			uint64_t start_time, uint64_t io_start_time, int op,
+			int op_flags) { }
 
 #endif	/* CONFIG_CFQ_GROUP_IOSCHED */
 
@@ -807,7 +815,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
 static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
 	struct cfq_ttime *ttime, bool group_idle)
 {
-	unsigned long slice;
+	u64 slice;
 	if (!sample_valid(ttime->ttime_samples))
 		return false;
 	if (group_idle)
@@ -930,17 +938,18 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
  * if a queue is marked sync and has sync io queued. A sync queue with async
  * io only, should not get full sync slice length.
  */
-static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
+static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
 				 unsigned short prio)
 {
-	const int base_slice = cfqd->cfq_slice[sync];
+	u64 base_slice = cfqd->cfq_slice[sync];
+	u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
 
 	WARN_ON(prio >= IOPRIO_BE_NR);
 
-	return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
+	return base_slice + (slice * (4 - prio));
 }
 
-static inline int
+static inline u64
 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
@@ -958,15 +967,14 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  *
  * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
  */
-static inline u64 cfqg_scale_charge(unsigned long charge,
+static inline u64 cfqg_scale_charge(u64 charge,
 				    unsigned int vfraction)
 {
 	u64 c = charge << CFQ_SERVICE_SHIFT;	/* make it fixed point */
 
 	/* charge / vfraction */
 	c <<= CFQ_SERVICE_SHIFT;
-	do_div(c, vfraction);
-	return c;
+	return div_u64(c, vfraction);
 }
 
 static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
@@ -1019,16 +1027,16 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
 	return cfqg->busy_queues_avg[rt];
 }
 
-static inline unsigned
+static inline u64
 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
 	return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
 }
 
-static inline unsigned
+static inline u64
 cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
+	u64 slice = cfq_prio_to_slice(cfqd, cfqq);
 	if (cfqd->cfq_latency) {
 		/*
 		 * interested queues (we consider only the ones with the same
@@ -1036,20 +1044,22 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 		 */
 		unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
 						cfq_class_rt(cfqq));
-		unsigned sync_slice = cfqd->cfq_slice[1];
-		unsigned expect_latency = sync_slice * iq;
-		unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
+		u64 sync_slice = cfqd->cfq_slice[1];
+		u64 expect_latency = sync_slice * iq;
+		u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
 
 		if (expect_latency > group_slice) {
-			unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
+			u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
+			u64 low_slice;
+
 			/* scale low_slice according to IO priority
 			 * and sync vs async */
-			unsigned low_slice =
-				min(slice, base_low_slice * slice / sync_slice);
+			low_slice = div64_u64(base_low_slice*slice, sync_slice);
+			low_slice = min(slice, low_slice);
 			/* the adapted slice value is scaled to fit all iqs
 			 * into the target latency */
-			slice = max(slice * group_slice / expect_latency,
-				    low_slice);
+			slice = div64_u64(slice*group_slice, expect_latency);
+			slice = max(slice, low_slice);
 		}
 	}
 	return slice;
@@ -1058,12 +1068,13 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static inline void
 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+	u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+	u64 now = ktime_get_ns();
 
-	cfqq->slice_start = jiffies;
-	cfqq->slice_end = jiffies + slice;
+	cfqq->slice_start = now;
+	cfqq->slice_end = now + slice;
 	cfqq->allocated_slice = slice;
-	cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
+	cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
 }
 
 /*
@@ -1075,7 +1086,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq)
 {
 	if (cfq_cfqq_slice_new(cfqq))
 		return false;
-	if (time_before(jiffies, cfqq->slice_end))
+	if (ktime_get_ns() < cfqq->slice_end)
 		return false;
 
 	return true;
@@ -1241,8 +1252,8 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
 }
 
-static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
-				      struct cfq_queue *cfqq)
+static u64 cfq_slice_offset(struct cfq_data *cfqd,
+			    struct cfq_queue *cfqq)
 {
 	/*
 	 * just an approximation, should be ok.
@@ -1435,31 +1446,32 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	cfqg_stats_update_dequeue(cfqg);
 }
 
-static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
-						unsigned int *unaccounted_time)
+static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
+				       u64 *unaccounted_time)
 {
-	unsigned int slice_used;
+	u64 slice_used;
+	u64 now = ktime_get_ns();
 
 	/*
 	 * Queue got expired before even a single request completed or
 	 * got expired immediately after first request completion.
 	 */
-	if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
+	if (!cfqq->slice_start || cfqq->slice_start == now) {
 		/*
 		 * Also charge the seek time incurred to the group, otherwise
 		 * if there are mutiple queues in the group, each can dispatch
 		 * a single request on seeky media and cause lots of seek time
 		 * and group will never know it.
 		 */
-		slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
-					1);
+		slice_used = max_t(u64, (now - cfqq->dispatch_start),
+					jiffies_to_nsecs(1));
 	} else {
-		slice_used = jiffies - cfqq->slice_start;
+		slice_used = now - cfqq->slice_start;
 		if (slice_used > cfqq->allocated_slice) {
 			*unaccounted_time = slice_used - cfqq->allocated_slice;
 			slice_used = cfqq->allocated_slice;
 		}
-		if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+		if (cfqq->slice_start > cfqq->dispatch_start)
 			*unaccounted_time += cfqq->slice_start -
 					cfqq->dispatch_start;
 	}
@@ -1471,10 +1483,11 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 				struct cfq_queue *cfqq)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
-	unsigned int used_sl, charge, unaccounted_sl = 0;
+	u64 used_sl, charge, unaccounted_sl = 0;
 	int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
 			- cfqg->service_tree_idle.count;
 	unsigned int vfr;
+	u64 now = ktime_get_ns();
 
 	BUG_ON(nr_sync < 0);
 	used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
@@ -1496,9 +1509,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 	cfq_group_service_tree_add(st, cfqg);
 
 	/* This group is being expired. Save the context */
-	if (time_after(cfqd->workload_expires, jiffies)) {
-		cfqg->saved_wl_slice = cfqd->workload_expires
-						- jiffies;
+	if (cfqd->workload_expires > now) {
+		cfqg->saved_wl_slice = cfqd->workload_expires - now;
 		cfqg->saved_wl_type = cfqd->serving_wl_type;
 		cfqg->saved_wl_class = cfqd->serving_wl_class;
 	} else
@@ -1507,7 +1519,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 	cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
 					st->min_vdisktime);
 	cfq_log_cfqq(cfqq->cfqd, cfqq,
-		     "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
+		     "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
 		     used_sl, cfqq->slice_dispatch, charge,
 		     iops_mode(cfqd), cfqq->nr_sectors);
 	cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
@@ -1530,7 +1542,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
 		*st = CFQ_RB_ROOT;
 	RB_CLEAR_NODE(&cfqg->rb_node);
 
-	cfqg->ttime.last_end_request = jiffies;
+	cfqg->ttime.last_end_request = ktime_get_ns();
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -2213,10 +2225,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 {
 	struct rb_node **p, *parent;
 	struct cfq_queue *__cfqq;
-	unsigned long rb_key;
+	u64 rb_key;
 	struct cfq_rb_root *st;
 	int left;
 	int new_cfqq = 1;
+	u64 now = ktime_get_ns();
 
 	st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
 	if (cfq_class_idle(cfqq)) {
@@ -2226,7 +2239,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			__cfqq = rb_entry(parent, struct cfq_queue, rb_node);
 			rb_key += __cfqq->rb_key;
 		} else
-			rb_key += jiffies;
+			rb_key += now;
 	} else if (!add_front) {
 		/*
 		 * Get our rb key offset. Subtract any residual slice
@@ -2234,13 +2247,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * count indicates slice overrun, and this should position
 		 * the next service time further away in the tree.
 		 */
-		rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
+		rb_key = cfq_slice_offset(cfqd, cfqq) + now;
 		rb_key -= cfqq->slice_resid;
 		cfqq->slice_resid = 0;
 	} else {
-		rb_key = -HZ;
+		rb_key = -NSEC_PER_SEC;
 		__cfqq = cfq_rb_first(st);
-		rb_key += __cfqq ? __cfqq->rb_key : jiffies;
+		rb_key += __cfqq ? __cfqq->rb_key : now;
 	}
 
 	if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
@@ -2266,7 +2279,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		/*
 		 * sort by key, that represents service time.
 		 */
-		if (time_before(rb_key, __cfqq->rb_key))
+		if (rb_key < __cfqq->rb_key)
 			p = &parent->rb_left;
 		else {
 			p = &parent->rb_right;
@@ -2461,10 +2474,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
 	elv_rb_del(&cfqq->sort_list, rq);
 	cfqq->queued[rq_is_sync(rq)]--;
-	cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
+	cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
 	cfq_add_rq_rb(rq);
 	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
-				 rq->cmd_flags);
+				 req_op(rq), rq->cmd_flags);
 }
 
 static struct request *
@@ -2517,7 +2530,7 @@ static void cfq_remove_request(struct request *rq)
 	cfq_del_rq_rb(rq);
 
 	cfqq->cfqd->rq_queued--;
-	cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
+	cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
 	if (rq->cmd_flags & REQ_PRIO) {
 		WARN_ON(!cfqq->prio_pending);
 		cfqq->prio_pending--;
@@ -2531,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req,
 	struct request *__rq;
 
 	__rq = cfq_find_rq_fmerge(cfqd, bio);
-	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+	if (__rq && elv_bio_merge_ok(__rq, bio)) {
 		*req = __rq;
 		return ELEVATOR_FRONT_MERGE;
 	}
@@ -2552,7 +2565,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
 				struct bio *bio)
 {
-	cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw);
+	cfqg_stats_update_io_merged(RQ_CFQG(req), bio_op(bio), bio->bi_rw);
 }
 
 static void
@@ -2566,7 +2579,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 	 * reposition in fifo if next is older than rq
 	 */
 	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
-	    time_before(next->fifo_time, rq->fifo_time) &&
+	    next->fifo_time < rq->fifo_time &&
 	    cfqq == RQ_CFQQ(next)) {
 		list_move(&rq->queuelist, &next->queuelist);
 		rq->fifo_time = next->fifo_time;
@@ -2575,7 +2588,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 	if (cfqq->next_rq == next)
 		cfqq->next_rq = rq;
 	cfq_remove_request(next);
-	cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
+	cfqg_stats_update_io_merged(RQ_CFQG(rq), req_op(next), next->cmd_flags);
 
 	cfqq = RQ_CFQQ(next);
 	/*
@@ -2588,8 +2601,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 		cfq_del_cfqq_rr(cfqd, cfqq);
 }
 
-static int cfq_allow_merge(struct request_queue *q, struct request *rq,
-			   struct bio *bio)
+static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
+			       struct bio *bio)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_io_cq *cic;
@@ -2613,9 +2626,15 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 	return cfqq == RQ_CFQQ(rq);
 }
 
+static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
+			      struct request *next)
+{
+	return RQ_CFQQ(rq) == RQ_CFQQ(next);
+}
+
 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	del_timer(&cfqd->idle_slice_timer);
+	hrtimer_try_to_cancel(&cfqd->idle_slice_timer);
 	cfqg_stats_update_idle_time(cfqq->cfqg);
 }
 
@@ -2627,7 +2646,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
 				cfqd->serving_wl_class, cfqd->serving_wl_type);
 		cfqg_stats_update_avg_queue_size(cfqq->cfqg);
 		cfqq->slice_start = 0;
-		cfqq->dispatch_start = jiffies;
+		cfqq->dispatch_start = ktime_get_ns();
 		cfqq->allocated_slice = 0;
 		cfqq->slice_end = 0;
 		cfqq->slice_dispatch = 0;
@@ -2676,8 +2695,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		if (cfq_cfqq_slice_new(cfqq))
 			cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
 		else
-			cfqq->slice_resid = cfqq->slice_end - jiffies;
-		cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
+			cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
+		cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid);
 	}
 
 	cfq_group_served(cfqd, cfqq->cfqg, cfqq);
@@ -2911,7 +2930,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	struct cfq_queue *cfqq = cfqd->active_queue;
 	struct cfq_rb_root *st = cfqq->service_tree;
 	struct cfq_io_cq *cic;
-	unsigned long sl, group_idle = 0;
+	u64 sl, group_idle = 0;
+	u64 now = ktime_get_ns();
 
 	/*
 	 * SSD device without seek penalty, disable idling. But only do so
@@ -2954,8 +2974,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	 * time slice.
 	 */
 	if (sample_valid(cic->ttime.ttime_samples) &&
-	    (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
-		cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
+	    (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
+		cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
 			     cic->ttime.ttime_mean);
 		return;
 	}
@@ -2976,9 +2996,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	else
 		sl = cfqd->cfq_slice_idle;
 
-	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+	hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl),
+		      HRTIMER_MODE_REL);
 	cfqg_stats_set_start_idle_time(cfqq->cfqg);
-	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
+	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
 			group_idle ? 1 : 0);
 }
 
@@ -3018,7 +3039,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
 		return NULL;
 
 	rq = rq_entry_fifo(cfqq->fifo.next);
-	if (time_before(jiffies, rq->fifo_time))
+	if (ktime_get_ns() < rq->fifo_time)
 		rq = NULL;
 
 	cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
@@ -3096,14 +3117,14 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
 	struct cfq_queue *queue;
 	int i;
 	bool key_valid = false;
-	unsigned long lowest_key = 0;
+	u64 lowest_key = 0;
 	enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
 
 	for (i = 0; i <= SYNC_WORKLOAD; ++i) {
 		/* select the one with lowest rb_key */
 		queue = cfq_rb_first(st_for(cfqg, wl_class, i));
 		if (queue &&
-		    (!key_valid || time_before(queue->rb_key, lowest_key))) {
+		    (!key_valid || queue->rb_key < lowest_key)) {
 			lowest_key = queue->rb_key;
 			cur_best = i;
 			key_valid = true;
@@ -3116,11 +3137,12 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
 static void
 choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
-	unsigned slice;
+	u64 slice;
 	unsigned count;
 	struct cfq_rb_root *st;
-	unsigned group_slice;
+	u64 group_slice;
 	enum wl_class_t original_class = cfqd->serving_wl_class;
+	u64 now = ktime_get_ns();
 
 	/* Choose next priority. RT > BE > IDLE */
 	if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -3129,7 +3151,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
 		cfqd->serving_wl_class = BE_WORKLOAD;
 	else {
 		cfqd->serving_wl_class = IDLE_WORKLOAD;
-		cfqd->workload_expires = jiffies + 1;
+		cfqd->workload_expires = now + jiffies_to_nsecs(1);
 		return;
 	}
 
@@ -3147,7 +3169,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	/*
 	 * check workload expiration, and that we still have other queues ready
 	 */
-	if (count && !time_after(jiffies, cfqd->workload_expires))
+	if (count && !(now > cfqd->workload_expires))
 		return;
 
 new_workload:
@@ -3164,13 +3186,13 @@ new_workload:
 	 */
 	group_slice = cfq_group_slice(cfqd, cfqg);
 
-	slice = group_slice * count /
+	slice = div_u64(group_slice * count,
 		max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
 		      cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
-					cfqg));
+					cfqg)));
 
 	if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
-		unsigned int tmp;
+		u64 tmp;
 
 		/*
 		 * Async queues are currently system wide. Just taking
@@ -3181,19 +3203,19 @@ new_workload:
 		 */
 		tmp = cfqd->cfq_target_latency *
 			cfqg_busy_async_queues(cfqd, cfqg);
-		tmp = tmp/cfqd->busy_queues;
-		slice = min_t(unsigned, slice, tmp);
+		tmp = div_u64(tmp, cfqd->busy_queues);
+		slice = min_t(u64, slice, tmp);
 
 		/* async workload slice is scaled down according to
 		 * the sync/async slice ratio. */
-		slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
+		slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
 	} else
 		/* sync workload slice is at least 2 * cfq_slice_idle */
 		slice = max(slice, 2 * cfqd->cfq_slice_idle);
 
-	slice = max_t(unsigned, slice, CFQ_MIN_TT);
-	cfq_log(cfqd, "workload slice:%d", slice);
-	cfqd->workload_expires = jiffies + slice;
+	slice = max_t(u64, slice, CFQ_MIN_TT);
+	cfq_log(cfqd, "workload slice:%llu", slice);
+	cfqd->workload_expires = now + slice;
 }
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
@@ -3211,16 +3233,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
 static void cfq_choose_cfqg(struct cfq_data *cfqd)
 {
 	struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
+	u64 now = ktime_get_ns();
 
 	cfqd->serving_group = cfqg;
 
 	/* Restore the workload type data */
 	if (cfqg->saved_wl_slice) {
-		cfqd->workload_expires = jiffies + cfqg->saved_wl_slice;
+		cfqd->workload_expires = now + cfqg->saved_wl_slice;
 		cfqd->serving_wl_type = cfqg->saved_wl_type;
 		cfqd->serving_wl_class = cfqg->saved_wl_class;
 	} else
-		cfqd->workload_expires = jiffies - 1;
+		cfqd->workload_expires = now - 1;
 
 	choose_wl_class_and_type(cfqd, cfqg);
 }
@@ -3232,6 +3255,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 {
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
+	u64 now = ktime_get_ns();
 
 	cfqq = cfqd->active_queue;
 	if (!cfqq)
@@ -3292,7 +3316,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 	 * flight or is idling for a new request, allow either of these
 	 * conditions to happen (or time out) before selecting a new queue.
 	 */
-	if (timer_pending(&cfqd->idle_slice_timer)) {
+	if (hrtimer_active(&cfqd->idle_slice_timer)) {
 		cfqq = NULL;
 		goto keep_queue;
 	}
@@ -3303,7 +3327,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 	 **/
 	if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
 	    (cfq_cfqq_slice_new(cfqq) ||
-	    (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+	    (cfqq->slice_end - now > now - cfqq->slice_start))) {
 		cfq_clear_cfqq_deep(cfqq);
 		cfq_clear_cfqq_idle_window(cfqq);
 	}
@@ -3381,11 +3405,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
 static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
 	struct cfq_queue *cfqq)
 {
+	u64 now = ktime_get_ns();
+
 	/* the queue hasn't finished any request, can't estimate */
 	if (cfq_cfqq_slice_new(cfqq))
 		return true;
-	if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
-		cfqq->slice_end))
+	if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
 		return true;
 
 	return false;
@@ -3460,10 +3485,10 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	 * based on the last sync IO we serviced
 	 */
 	if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
-		unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
+		u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
 		unsigned int depth;
 
-		depth = last_sync / cfqd->cfq_slice[1];
+		depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
 		if (!depth && !cfqq->dispatched)
 			depth = 1;
 		if (depth < max_dispatch)
@@ -3546,7 +3571,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 	if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
 	    cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
 	    cfq_class_idle(cfqq))) {
-		cfqq->slice_end = jiffies + 1;
+		cfqq->slice_end = ktime_get_ns() + 1;
 		cfq_slice_expired(cfqd, 0);
 	}
 
@@ -3624,7 +3649,7 @@ static void cfq_init_icq(struct io_cq *icq)
 {
 	struct cfq_io_cq *cic = icq_to_cic(icq);
 
-	cic->ttime.last_end_request = jiffies;
+	cic->ttime.last_end_request = ktime_get_ns();
 }
 
 static void cfq_exit_icq(struct io_cq *icq)
@@ -3682,6 +3707,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
 	 * elevate the priority of this queue
 	 */
 	cfqq->org_ioprio = cfqq->ioprio;
+	cfqq->org_ioprio_class = cfqq->ioprio_class;
 	cfq_clear_cfqq_prio_changed(cfqq);
 }
 
@@ -3845,14 +3871,15 @@ out:
 }
 
 static void
-__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
 {
-	unsigned long elapsed = jiffies - ttime->last_end_request;
+	u64 elapsed = ktime_get_ns() - ttime->last_end_request;
 	elapsed = min(elapsed, 2UL * slice_idle);
 
 	ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
-	ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
-	ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+	ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed,  8);
+	ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
+				     ttime->ttime_samples);
 }
 
 static void
@@ -4105,10 +4132,10 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 	cfq_log_cfqq(cfqd, cfqq, "insert_request");
 	cfq_init_prio_data(cfqq, RQ_CIC(rq));
 
-	rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
+	rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 	cfq_add_rq_rb(rq);
-	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
+	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq),
 				 rq->cmd_flags);
 	cfq_rq_enqueued(cfqd, cfqq, rq);
 }
@@ -4153,6 +4180,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	struct cfq_io_cq *cic = cfqd->active_cic;
+	u64 now = ktime_get_ns();
 
 	/* If the queue already has requests, don't wait */
 	if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -4171,7 +4199,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
 	/* if slice left is less than think time, wait busy */
 	if (cic && sample_valid(cic->ttime.ttime_samples)
-	    && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
+	    && (cfqq->slice_end - now < cic->ttime.ttime_mean))
 		return true;
 
 	/*
@@ -4181,7 +4209,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	 * case where think time is less than a jiffy, mark the queue wait
 	 * busy if only 1 jiffy is left in the slice.
 	 */
-	if (cfqq->slice_end - jiffies == 1)
+	if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
 		return true;
 
 	return false;
@@ -4192,9 +4220,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 	struct cfq_data *cfqd = cfqq->cfqd;
 	const int sync = rq_is_sync(rq);
-	unsigned long now;
+	u64 now = ktime_get_ns();
 
-	now = jiffies;
 	cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
 		     !!(rq->cmd_flags & REQ_NOIDLE));
 
@@ -4206,7 +4233,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	cfqq->dispatched--;
 	(RQ_CFQG(rq))->dispatched--;
 	cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
-				     rq_io_start_time_ns(rq), rq->cmd_flags);
+				     rq_io_start_time_ns(rq), req_op(rq),
+				     rq->cmd_flags);
 
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
@@ -4222,7 +4250,16 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 					cfqq_type(cfqq));
 
 		st->ttime.last_end_request = now;
-		if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
+		/*
+		 * We have to do this check in jiffies since start_time is in
+		 * jiffies and it is not trivial to convert to ns. If
+		 * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
+		 * will become problematic but so far we are fine (the default
+		 * is 128 ms).
+		 */
+		if (!time_after(rq->start_time +
+				  nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
+				jiffies))
 			cfqd->last_delayed_sync = now;
 	}
 
@@ -4247,10 +4284,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 		 * the queue.
 		 */
 		if (cfq_should_wait_busy(cfqd, cfqq)) {
-			unsigned long extend_sl = cfqd->cfq_slice_idle;
+			u64 extend_sl = cfqd->cfq_slice_idle;
 			if (!cfqd->cfq_slice_idle)
 				extend_sl = cfqd->cfq_group_idle;
-			cfqq->slice_end = jiffies + extend_sl;
+			cfqq->slice_end = now + extend_sl;
 			cfq_mark_cfqq_wait_busy(cfqq);
 			cfq_log_cfqq(cfqd, cfqq, "will busy wait");
 		}
@@ -4275,6 +4312,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 		cfq_schedule_dispatch(cfqd);
 }
 
+static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int op_flags)
+{
+	/*
+	 * If REQ_PRIO is set, boost class and prio level, if it's below
+	 * BE/NORM. If prio is not set, restore the potentially boosted
+	 * class/prio level.
+	 */
+	if (!(op_flags & REQ_PRIO)) {
+		cfqq->ioprio_class = cfqq->org_ioprio_class;
+		cfqq->ioprio = cfqq->org_ioprio;
+	} else {
+		if (cfq_class_idle(cfqq))
+			cfqq->ioprio_class = IOPRIO_CLASS_BE;
+		if (cfqq->ioprio > IOPRIO_NORM)
+			cfqq->ioprio = IOPRIO_NORM;
+	}
+}
+
 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
 	if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -4285,7 +4340,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 	return ELV_MQUEUE_MAY;
 }
 
-static int cfq_may_queue(struct request_queue *q, int rw)
+static int cfq_may_queue(struct request_queue *q, int op, int op_flags)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
@@ -4302,9 +4357,10 @@ static int cfq_may_queue(struct request_queue *q, int rw)
 	if (!cic)
 		return ELV_MQUEUE_MAY;
 
-	cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
+	cfqq = cic_to_cfqq(cic, rw_is_sync(op, op_flags));
 	if (cfqq) {
 		cfq_init_prio_data(cfqq, cic);
+		cfqq_boost_on_prio(cfqq, op_flags);
 
 		return __cfq_may_queue(cfqq);
 	}
@@ -4435,9 +4491,10 @@ static void cfq_kick_queue(struct work_struct *work)
 /*
  * Timer running if the active_queue is currently idling inside its time slice
  */
-static void cfq_idle_slice_timer(unsigned long data)
+static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer)
 {
-	struct cfq_data *cfqd = (struct cfq_data *) data;
+	struct cfq_data *cfqd = container_of(timer, struct cfq_data,
+					     idle_slice_timer);
 	struct cfq_queue *cfqq;
 	unsigned long flags;
 	int timed_out = 1;
@@ -4486,11 +4543,12 @@ out_kick:
 	cfq_schedule_dispatch(cfqd);
 out_cont:
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+	return HRTIMER_NORESTART;
 }
 
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
-	del_timer_sync(&cfqd->idle_slice_timer);
+	hrtimer_cancel(&cfqd->idle_slice_timer);
 	cancel_work_sync(&cfqd->unplug_work);
 }
 
@@ -4586,9 +4644,9 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
 	cfqg_put(cfqd->root_group);
 	spin_unlock_irq(q->queue_lock);
 
-	init_timer(&cfqd->idle_slice_timer);
+	hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
 	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
-	cfqd->idle_slice_timer.data = (unsigned long) cfqd;
 
 	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
@@ -4609,7 +4667,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
 	 * we optimistically start assuming sync ops weren't delayed in last
 	 * second, in order to have larger depth for async operations.
 	 */
-	cfqd->last_delayed_sync = jiffies - HZ;
+	cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
 	return 0;
 
 out_free:
@@ -4652,9 +4710,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
 static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 {									\
 	struct cfq_data *cfqd = e->elevator_data;			\
-	unsigned int __data = __VAR;					\
+	u64 __data = __VAR;						\
 	if (__CONV)							\
-		__data = jiffies_to_msecs(__data);			\
+		__data = div_u64(__data, NSEC_PER_MSEC);			\
 	return cfq_var_show(__data, (page));				\
 }
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
@@ -4671,6 +4729,21 @@ SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
 SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
 #undef SHOW_FUNCTION
 
+#define USEC_SHOW_FUNCTION(__FUNC, __VAR)				\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
+{									\
+	struct cfq_data *cfqd = e->elevator_data;			\
+	u64 __data = __VAR;						\
+	__data = div_u64(__data, NSEC_PER_USEC);			\
+	return cfq_var_show(__data, (page));				\
+}
+USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
+USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
+USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
+USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
+USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency);
+#undef USEC_SHOW_FUNCTION
+
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
@@ -4682,7 +4755,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
 	else if (__data > (MAX))					\
 		__data = (MAX);						\
 	if (__CONV)							\
-		*(__PTR) = msecs_to_jiffies(__data);			\
+		*(__PTR) = (u64)__data * NSEC_PER_MSEC;			\
 	else								\
 		*(__PTR) = __data;					\
 	return ret;							\
@@ -4705,6 +4778,26 @@ STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
 STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
 #undef STORE_FUNCTION
 
+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)			\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
+{									\
+	struct cfq_data *cfqd = e->elevator_data;			\
+	unsigned int __data;						\
+	int ret = cfq_var_store(&__data, (page), count);		\
+	if (__data < (MIN))						\
+		__data = (MIN);						\
+	else if (__data > (MAX))					\
+		__data = (MAX);						\
+	*(__PTR) = (u64)__data * NSEC_PER_USEC;				\
+	return ret;							\
+}
+USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX);
+#undef USEC_STORE_FUNCTION
+
 #define CFQ_ATTR(name) \
 	__ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
 
@@ -4715,12 +4808,17 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(back_seek_max),
 	CFQ_ATTR(back_seek_penalty),
 	CFQ_ATTR(slice_sync),
+	CFQ_ATTR(slice_sync_us),
 	CFQ_ATTR(slice_async),
+	CFQ_ATTR(slice_async_us),
 	CFQ_ATTR(slice_async_rq),
 	CFQ_ATTR(slice_idle),
+	CFQ_ATTR(slice_idle_us),
 	CFQ_ATTR(group_idle),
+	CFQ_ATTR(group_idle_us),
 	CFQ_ATTR(low_latency),
 	CFQ_ATTR(target_latency),
+	CFQ_ATTR(target_latency_us),
 	__ATTR_NULL
 };
 
@@ -4729,7 +4827,8 @@ static struct elevator_type iosched_cfq = {
 		.elevator_merge_fn = 		cfq_merge,
 		.elevator_merged_fn =		cfq_merged_request,
 		.elevator_merge_req_fn =	cfq_merged_requests,
-		.elevator_allow_merge_fn =	cfq_allow_merge,
+		.elevator_allow_bio_merge_fn =	cfq_allow_bio_merge,
+		.elevator_allow_rq_merge_fn =	cfq_allow_rq_merge,
 		.elevator_bio_merged_fn =	cfq_bio_merged,
 		.elevator_dispatch_fn =		cfq_dispatch_requests,
 		.elevator_add_req_fn =		cfq_insert_request,
@@ -4776,18 +4875,7 @@ static int __init cfq_init(void)
 {
 	int ret;
 
-	/*
-	 * could be 0 on HZ < 1000 setups
-	 */
-	if (!cfq_slice_async)
-		cfq_slice_async = 1;
-	if (!cfq_slice_idle)
-		cfq_slice_idle = 1;
-
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-	if (!cfq_group_idle)
-		cfq_group_idle = 1;
-
 	ret = blkcg_policy_register(&blkcg_policy_cfq);
 	if (ret)
 		return ret;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index d0dd7882d8c7..55e0bb6d7da7 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -137,7 +137,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
 		if (__rq) {
 			BUG_ON(sector != blk_rq_pos(__rq));
 
-			if (elv_rq_merge_ok(__rq, bio)) {
+			if (elv_bio_merge_ok(__rq, bio)) {
 				ret = ELEVATOR_FRONT_MERGE;
 				goto out;
 			}
@@ -173,7 +173,8 @@ deadline_merged_requests(struct request_queue *q, struct request *req,
 	 * and move into next position (next will be deleted) in fifo
 	 */
 	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
-		if (time_before(next->fifo_time, req->fifo_time)) {
+		if (time_before((unsigned long)next->fifo_time,
+				(unsigned long)req->fifo_time)) {
 			list_move(&req->queuelist, &next->queuelist);
 			req->fifo_time = next->fifo_time;
 		}
@@ -227,7 +228,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 	/*
 	 * rq is expired!
 	 */
-	if (time_after_eq(jiffies, rq->fifo_time))
+	if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
 		return 1;
 
 	return 0;
diff --git a/block/elevator.c b/block/elevator.c
index c3555c9c672f..7096c22041e7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -53,13 +53,13 @@ static LIST_HEAD(elv_list);
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
  */
-static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
+static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
 {
 	struct request_queue *q = rq->q;
 	struct elevator_queue *e = q->elevator;
 
-	if (e->type->ops.elevator_allow_merge_fn)
-		return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
+	if (e->type->ops.elevator_allow_bio_merge_fn)
+		return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio);
 
 	return 1;
 }
@@ -67,17 +67,17 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 /*
  * can we safely merge with this request?
  */
-bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
 {
 	if (!blk_rq_merge_ok(rq, bio))
-		return 0;
+		return false;
 
-	if (!elv_iosched_allow_merge(rq, bio))
-		return 0;
+	if (!elv_iosched_allow_bio_merge(rq, bio))
+		return false;
 
-	return 1;
+	return true;
 }
-EXPORT_SYMBOL(elv_rq_merge_ok);
+EXPORT_SYMBOL(elv_bio_merge_ok);
 
 static struct elevator_type *elevator_find(const char *name)
 {
@@ -366,8 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
-		if ((rq->cmd_flags & REQ_DISCARD) !=
-		    (pos->cmd_flags & REQ_DISCARD))
+		if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD))
 			break;
 		if (rq_data_dir(rq) != rq_data_dir(pos))
 			break;
@@ -426,7 +425,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 	/*
 	 * First try one-hit cache.
 	 */
-	if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+	if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
 		ret = blk_try_merge(q->last_merge, bio);
 		if (ret != ELEVATOR_NO_MERGE) {
 			*req = q->last_merge;
@@ -441,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 	 * See if our hash lookup can find a potential backmerge.
 	 */
 	__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
-	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+	if (__rq && elv_bio_merge_ok(__rq, bio)) {
 		*req = __rq;
 		return ELEVATOR_BACK_MERGE;
 	}
@@ -717,12 +716,12 @@ void elv_put_request(struct request_queue *q, struct request *rq)
 		e->type->ops.elevator_put_req_fn(rq);
 }
 
-int elv_may_queue(struct request_queue *q, int rw)
+int elv_may_queue(struct request_queue *q, int op, int op_flags)
 {
 	struct elevator_queue *e = q->elevator;
 
 	if (e->type->ops.elevator_may_queue_fn)
-		return e->type->ops.elevator_may_queue_fn(q, rw);
+		return e->type->ops.elevator_may_queue_fn(q, op, op_flags);
 
 	return ELV_MQUEUE_MAY;
 }
diff --git a/block/genhd.c b/block/genhd.c
index 9f42526b4d62..3c9dede4e04f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -506,7 +506,7 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
-static void register_disk(struct gendisk *disk)
+static void register_disk(struct device *parent, struct gendisk *disk)
 {
 	struct device *ddev = disk_to_dev(disk);
 	struct block_device *bdev;
@@ -514,7 +514,7 @@ static void register_disk(struct gendisk *disk)
 	struct hd_struct *part;
 	int err;
 
-	ddev->parent = disk->driverfs_dev;
+	ddev->parent = parent;
 
 	dev_set_name(ddev, "%s", disk->disk_name);
 
@@ -573,7 +573,8 @@ exit:
 }
 
 /**
- * add_disk - add partitioning information to kernel list
+ * device_add_disk - add partitioning information to kernel list
+ * @parent: parent device for the disk
  * @disk: per-device partitioning information
  *
  * This function registers the partitioning information in @disk
@@ -581,7 +582,7 @@ exit:
  *
  * FIXME: error handling
  */
-void add_disk(struct gendisk *disk)
+void device_add_disk(struct device *parent, struct gendisk *disk)
 {
 	struct backing_dev_info *bdi;
 	dev_t devt;
@@ -617,7 +618,7 @@ void add_disk(struct gendisk *disk)
 
 	blk_register_region(disk_devt(disk), disk->minors, NULL,
 			    exact_match, exact_lock, disk);
-	register_disk(disk);
+	register_disk(parent, disk);
 	blk_register_queue(disk);
 
 	/*
@@ -633,7 +634,7 @@ void add_disk(struct gendisk *disk)
 	disk_add_events(disk);
 	blk_integrity_add(disk);
 }
-EXPORT_SYMBOL(add_disk);
+EXPORT_SYMBOL(device_add_disk);
 
 void del_gendisk(struct gendisk *disk)
 {
@@ -799,10 +800,9 @@ void __init printk_all_partitions(void)
 			       , disk_name(disk, part->partno, name_buf),
 			       part->info ? part->info->uuid : "");
 			if (is_part0) {
-				if (disk->driverfs_dev != NULL &&
-				    disk->driverfs_dev->driver != NULL)
+				if (dev->parent && dev->parent->driver)
 					printk(" driver: %s\n",
-					      disk->driverfs_dev->driver->name);
+					      dev->parent->driver->name);
 				else
 					printk(" (driver?)\n");
 			} else
@@ -1523,12 +1523,7 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	if (--ev->block)
 		goto out_unlock;
 
-	/*
-	 * Not exactly a latency critical operation, set poll timer
-	 * slack to 25% and kick event check.
-	 */
 	intv = disk_events_poll_jiffies(disk);
-	set_timer_slack(&ev->dwork.timer, intv / 4);
 	if (check_now)
 		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, 0);
diff --git a/block/ioprio.c b/block/ioprio.c
index cc7800e9eb44..01b8116298a1 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -150,8 +150,10 @@ static int get_task_ioprio(struct task_struct *p)
 	if (ret)
 		goto out;
 	ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM);
+	task_lock(p);
 	if (p->io_context)
 		ret = p->io_context->ioprio;
+	task_unlock(p);
 out:
 	return ret;
 }
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d7eb77e1e3a8..71d9ed9df8da 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -495,7 +495,6 @@ rescan:
 	/* add partitions */
 	for (p = 1; p < state->limit; p++) {
 		sector_t size, from;
-		struct partition_meta_info *info = NULL;
 
 		size = state->parts[p].size;
 		if (!size)
@@ -530,8 +529,6 @@ rescan:
 			}
 		}
 
-		if (state->parts[p].has_info)
-			info = &state->parts[p].info;
 		part = add_partition(disk, p, from, size,
 				     state->parts[p].flags,
 				     &state->parts[p].info);
diff --git a/block/partitions/atari.c b/block/partitions/atari.c
index 9875b05e80a2..ff1fb93712c1 100644
--- a/block/partitions/atari.c
+++ b/block/partitions/atari.c
@@ -42,6 +42,13 @@ int atari_partition(struct parsed_partitions *state)
 	int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
 #endif
 
+	/*
+	 * ATARI partition scheme supports 512 lba only.  If this is not
+	 * the case, bail early to avoid miscalculating hd_size.
+	 */
+	if (bdev_logical_block_size(state->bdev) != 512)
+		return 0;
+
 	rs = read_part_sector(state, 0, &sect);
 	if (!rs)
 		return -1;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 1d33beb6a1ae..a9377bef25e3 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -93,6 +93,15 @@ config CRYPTO_AKCIPHER
 	select CRYPTO_AKCIPHER2
 	select CRYPTO_ALGAPI
 
+config CRYPTO_KPP2
+	tristate
+	select CRYPTO_ALGAPI2
+
+config CRYPTO_KPP
+	tristate
+	select CRYPTO_ALGAPI
+	select CRYPTO_KPP2
+
 config CRYPTO_RSA
 	tristate "RSA algorithm"
 	select CRYPTO_AKCIPHER
@@ -102,6 +111,19 @@ config CRYPTO_RSA
 	help
 	  Generic implementation of the RSA public key algorithm.
 
+config CRYPTO_DH
+	tristate "Diffie-Hellman algorithm"
+	select CRYPTO_KPP
+	select MPILIB
+	help
+	  Generic implementation of the Diffie-Hellman algorithm.
+
+config CRYPTO_ECDH
+	tristate "ECDH algorithm"
+	select CRYTPO_KPP
+	help
+	  Generic implementation of the ECDH algorithm
+
 config CRYPTO_MANAGER
 	tristate "Cryptographic algorithm manager"
 	select CRYPTO_MANAGER2
@@ -115,6 +137,7 @@ config CRYPTO_MANAGER2
 	select CRYPTO_HASH2
 	select CRYPTO_BLKCIPHER2
 	select CRYPTO_AKCIPHER2
+	select CRYPTO_KPP2
 
 config CRYPTO_USER
 	tristate "Userspace cryptographic algorithm configuration"
@@ -414,6 +437,17 @@ config CRYPTO_CRC32C_INTEL
 	  gain performance compared with software implementation.
 	  Module will be crc32c-intel.
 
+config CRYPT_CRC32C_VPMSUM
+	tristate "CRC32c CRC algorithm (powerpc64)"
+	depends on PPC64
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  CRC32c algorithm implemented using vector polynomial multiply-sum
+	  (vpmsum) instructions, introduced in POWER8. Enable on POWER8
+	  and newer processors for improved performance.
+
+
 config CRYPTO_CRC32C_SPARC64
 	tristate "CRC32c CRC algorithm (SPARC64)"
 	depends on SPARC64
@@ -681,6 +715,38 @@ config CRYPTO_SHA1_MB
 	  lanes remain unfilled, a flush operation will be initiated to
 	  process the crypto jobs, adding a slight latency.
 
+config CRYPTO_SHA256_MB
+	tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)"
+	depends on X86 && 64BIT
+	select CRYPTO_SHA256
+	select CRYPTO_HASH
+	select CRYPTO_MCRYPTD
+	help
+	  SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+	  using multi-buffer technique.  This algorithm computes on
+	  multiple data lanes concurrently with SIMD instructions for
+	  better throughput.  It should not be enabled by default but
+	  used when there is significant amount of work to keep the keep
+	  the data lanes filled to get performance benefit.  If the data
+	  lanes remain unfilled, a flush operation will be initiated to
+	  process the crypto jobs, adding a slight latency.
+
+config CRYPTO_SHA512_MB
+        tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)"
+        depends on X86 && 64BIT
+        select CRYPTO_SHA512
+        select CRYPTO_HASH
+        select CRYPTO_MCRYPTD
+        help
+          SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+          using multi-buffer technique.  This algorithm computes on
+          multiple data lanes concurrently with SIMD instructions for
+          better throughput.  It should not be enabled by default but
+          used when there is significant amount of work to keep the keep
+          the data lanes filled to get performance benefit.  If the data
+          lanes remain unfilled, a flush operation will be initiated to
+          process the crypto jobs, adding a slight latency.
+
 config CRYPTO_SHA256
 	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_HASH
@@ -750,6 +816,16 @@ config CRYPTO_SHA512_SPARC64
 	  SHA-512 secure hash standard (DFIPS 180-2) implemented
 	  using sparc64 crypto instructions, when available.
 
+config CRYPTO_SHA3
+	tristate "SHA3 digest algorithm"
+	select CRYPTO_HASH
+	help
+	  SHA-3 secure hash standard (DFIPS 202). It's based on
+	  cryptographic sponge function family called Keccak.
+
+	  References:
+	  http://keccak.noekeon.org/
+
 config CRYPTO_TGR192
 	tristate "Tiger digest algorithms"
 	select CRYPTO_HASH
@@ -1567,6 +1643,7 @@ config CRYPTO_DRBG_HASH
 config CRYPTO_DRBG_CTR
 	bool "Enable CTR DRBG"
 	select CRYPTO_AES
+	depends on CRYPTO_CTR
 	help
 	  Enable the CTR DRBG variant as defined in NIST SP800-90A.
 
diff --git a/crypto/Makefile b/crypto/Makefile
index 4f4ef7eaae3f..99cc64ac70ef 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -20,8 +20,6 @@ crypto_blkcipher-y := ablkcipher.o
 crypto_blkcipher-y += blkcipher.o
 crypto_blkcipher-y += skcipher.o
 obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o
-obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o
-obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o
 obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
 obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o
 
@@ -30,6 +28,15 @@ crypto_hash-y += shash.o
 obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
 
 obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o
+obj-$(CONFIG_CRYPTO_KPP2) += kpp.o
+
+dh_generic-y := dh.o
+dh_generic-y += dh_helper.o
+obj-$(CONFIG_CRYPTO_DH) += dh_generic.o
+ecdh_generic-y := ecc.o
+ecdh_generic-y += ecdh.o
+ecdh_generic-y += ecdh_helper.o
+obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
 
 $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
 $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h
@@ -61,6 +68,7 @@ obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o
 obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
 obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
 obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
+obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
 obj-$(CONFIG_CRYPTO_WP512) += wp512.o
 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
 obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
index e1fcf53bb931..1441f07d0a19 100644
--- a/crypto/ablk_helper.c
+++ b/crypto/ablk_helper.c
@@ -71,7 +71,8 @@ int ablk_encrypt(struct ablkcipher_request *req)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 
-	if (!may_use_simd()) {
+	if (!may_use_simd() ||
+	    (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) {
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 
@@ -90,7 +91,8 @@ int ablk_decrypt(struct ablkcipher_request *req)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 
-	if (!may_use_simd()) {
+	if (!may_use_simd() ||
+	    (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) {
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 
diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index e5b5721809e2..d676fc59521a 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -14,11 +14,8 @@
  */
 
 #include <crypto/internal/skcipher.h>
-#include <linux/cpumask.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/cryptouser.h>
@@ -349,16 +346,6 @@ static unsigned int crypto_ablkcipher_ctxsize(struct crypto_alg *alg, u32 type,
 	return alg->cra_ctxsize;
 }
 
-int skcipher_null_givencrypt(struct skcipher_givcrypt_request *req)
-{
-	return crypto_ablkcipher_encrypt(&req->creq);
-}
-
-int skcipher_null_givdecrypt(struct skcipher_givcrypt_request *req)
-{
-	return crypto_ablkcipher_decrypt(&req->creq);
-}
-
 static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type,
 				      u32 mask)
 {
@@ -371,10 +358,6 @@ static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type,
 	crt->setkey = setkey;
 	crt->encrypt = alg->encrypt;
 	crt->decrypt = alg->decrypt;
-	if (!alg->ivsize) {
-		crt->givencrypt = skcipher_null_givencrypt;
-		crt->givdecrypt = skcipher_null_givdecrypt;
-	}
 	crt->base = __crypto_ablkcipher_cast(tfm);
 	crt->ivsize = alg->ivsize;
 
@@ -436,11 +419,6 @@ const struct crypto_type crypto_ablkcipher_type = {
 };
 EXPORT_SYMBOL_GPL(crypto_ablkcipher_type);
 
-static int no_givdecrypt(struct skcipher_givcrypt_request *req)
-{
-	return -ENOSYS;
-}
-
 static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type,
 				      u32 mask)
 {
@@ -454,8 +432,6 @@ static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type,
 		      alg->setkey : setkey;
 	crt->encrypt = alg->encrypt;
 	crt->decrypt = alg->decrypt;
-	crt->givencrypt = alg->givencrypt ?: no_givdecrypt;
-	crt->givdecrypt = alg->givdecrypt ?: no_givdecrypt;
 	crt->base = __crypto_ablkcipher_cast(tfm);
 	crt->ivsize = alg->ivsize;
 
@@ -516,202 +492,3 @@ const struct crypto_type crypto_givcipher_type = {
 	.report = crypto_givcipher_report,
 };
 EXPORT_SYMBOL_GPL(crypto_givcipher_type);
-
-const char *crypto_default_geniv(const struct crypto_alg *alg)
-{
-	if (((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-	     CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize :
-					 alg->cra_ablkcipher.ivsize) !=
-	    alg->cra_blocksize)
-		return "chainiv";
-
-	return "eseqiv";
-}
-
-static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask)
-{
-	struct rtattr *tb[3];
-	struct {
-		struct rtattr attr;
-		struct crypto_attr_type data;
-	} ptype;
-	struct {
-		struct rtattr attr;
-		struct crypto_attr_alg data;
-	} palg;
-	struct crypto_template *tmpl;
-	struct crypto_instance *inst;
-	struct crypto_alg *larval;
-	const char *geniv;
-	int err;
-
-	larval = crypto_larval_lookup(alg->cra_driver_name,
-				      (type & ~CRYPTO_ALG_TYPE_MASK) |
-				      CRYPTO_ALG_TYPE_GIVCIPHER,
-				      mask | CRYPTO_ALG_TYPE_MASK);
-	err = PTR_ERR(larval);
-	if (IS_ERR(larval))
-		goto out;
-
-	err = -EAGAIN;
-	if (!crypto_is_larval(larval))
-		goto drop_larval;
-
-	ptype.attr.rta_len = sizeof(ptype);
-	ptype.attr.rta_type = CRYPTOA_TYPE;
-	ptype.data.type = type | CRYPTO_ALG_GENIV;
-	/* GENIV tells the template that we're making a default geniv. */
-	ptype.data.mask = mask | CRYPTO_ALG_GENIV;
-	tb[0] = &ptype.attr;
-
-	palg.attr.rta_len = sizeof(palg);
-	palg.attr.rta_type = CRYPTOA_ALG;
-	/* Must use the exact name to locate ourselves. */
-	memcpy(palg.data.name, alg->cra_driver_name, CRYPTO_MAX_ALG_NAME);
-	tb[1] = &palg.attr;
-
-	tb[2] = NULL;
-
-	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-	    CRYPTO_ALG_TYPE_BLKCIPHER)
-		geniv = alg->cra_blkcipher.geniv;
-	else
-		geniv = alg->cra_ablkcipher.geniv;
-
-	if (!geniv)
-		geniv = crypto_default_geniv(alg);
-
-	tmpl = crypto_lookup_template(geniv);
-	err = -ENOENT;
-	if (!tmpl)
-		goto kill_larval;
-
-	if (tmpl->create) {
-		err = tmpl->create(tmpl, tb);
-		if (err)
-			goto put_tmpl;
-		goto ok;
-	}
-
-	inst = tmpl->alloc(tb);
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto put_tmpl;
-
-	err = crypto_register_instance(tmpl, inst);
-	if (err) {
-		tmpl->free(inst);
-		goto put_tmpl;
-	}
-
-ok:
-	/* Redo the lookup to use the instance we just registered. */
-	err = -EAGAIN;
-
-put_tmpl:
-	crypto_tmpl_put(tmpl);
-kill_larval:
-	crypto_larval_kill(larval);
-drop_larval:
-	crypto_mod_put(larval);
-out:
-	crypto_mod_put(alg);
-	return err;
-}
-
-struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask)
-{
-	struct crypto_alg *alg;
-
-	alg = crypto_alg_mod_lookup(name, type, mask);
-	if (IS_ERR(alg))
-		return alg;
-
-	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-	    CRYPTO_ALG_TYPE_GIVCIPHER)
-		return alg;
-
-	if (!((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-	      CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize :
-					  alg->cra_ablkcipher.ivsize))
-		return alg;
-
-	crypto_mod_put(alg);
-	alg = crypto_alg_mod_lookup(name, type | CRYPTO_ALG_TESTED,
-				    mask & ~CRYPTO_ALG_TESTED);
-	if (IS_ERR(alg))
-		return alg;
-
-	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-	    CRYPTO_ALG_TYPE_GIVCIPHER) {
-		if (~alg->cra_flags & (type ^ ~mask) & CRYPTO_ALG_TESTED) {
-			crypto_mod_put(alg);
-			alg = ERR_PTR(-ENOENT);
-		}
-		return alg;
-	}
-
-	BUG_ON(!((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-		 CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize :
-					     alg->cra_ablkcipher.ivsize));
-
-	return ERR_PTR(crypto_givcipher_default(alg, type, mask));
-}
-EXPORT_SYMBOL_GPL(crypto_lookup_skcipher);
-
-int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name,
-			 u32 type, u32 mask)
-{
-	struct crypto_alg *alg;
-	int err;
-
-	type = crypto_skcipher_type(type);
-	mask = crypto_skcipher_mask(mask);
-
-	alg = crypto_lookup_skcipher(name, type, mask);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
-
-	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
-	crypto_mod_put(alg);
-	return err;
-}
-EXPORT_SYMBOL_GPL(crypto_grab_skcipher);
-
-struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name,
-						  u32 type, u32 mask)
-{
-	struct crypto_tfm *tfm;
-	int err;
-
-	type = crypto_skcipher_type(type);
-	mask = crypto_skcipher_mask(mask);
-
-	for (;;) {
-		struct crypto_alg *alg;
-
-		alg = crypto_lookup_skcipher(alg_name, type, mask);
-		if (IS_ERR(alg)) {
-			err = PTR_ERR(alg);
-			goto err;
-		}
-
-		tfm = __crypto_alloc_tfm(alg, type, mask);
-		if (!IS_ERR(tfm))
-			return __crypto_ablkcipher_cast(tfm);
-
-		crypto_mod_put(alg);
-		err = PTR_ERR(tfm);
-
-err:
-		if (err != -EAGAIN)
-			break;
-		if (fatal_signal_pending(current)) {
-			err = -EINTR;
-			break;
-		}
-	}
-
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(crypto_alloc_ablkcipher);
diff --git a/crypto/aead.c b/crypto/aead.c
index 9b18a1e40d6a..3f5c5ff004ab 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -294,9 +294,9 @@ int aead_init_geniv(struct crypto_aead *aead)
 	if (err)
 		goto out;
 
-	ctx->null = crypto_get_default_null_skcipher();
-	err = PTR_ERR(ctx->null);
-	if (IS_ERR(ctx->null))
+	ctx->sknull = crypto_get_default_null_skcipher2();
+	err = PTR_ERR(ctx->sknull);
+	if (IS_ERR(ctx->sknull))
 		goto out;
 
 	child = crypto_spawn_aead(aead_instance_ctx(inst));
@@ -314,7 +314,7 @@ out:
 	return err;
 
 drop_null:
-	crypto_put_default_null_skcipher();
+	crypto_put_default_null_skcipher2();
 	goto out;
 }
 EXPORT_SYMBOL_GPL(aead_init_geniv);
@@ -324,7 +324,7 @@ void aead_exit_geniv(struct crypto_aead *tfm)
 	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher();
+	crypto_put_default_null_skcipher2();
 }
 EXPORT_SYMBOL_GPL(aead_exit_geniv);
 
@@ -346,9 +346,13 @@ static int aead_prepare_alg(struct aead_alg *alg)
 {
 	struct crypto_alg *base = &alg->base;
 
-	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
+	if (max3(alg->maxauthsize, alg->ivsize, alg->chunksize) >
+	    PAGE_SIZE / 8)
 		return -EINVAL;
 
+	if (!alg->chunksize)
+		alg->chunksize = base->cra_blocksize;
+
 	base->cra_type = &crypto_aead_type;
 	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
 	base->cra_flags |= CRYPTO_ALG_TYPE_AEAD;
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 3887a98abcc3..2ce8bcb9049c 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -461,10 +461,10 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 
 static unsigned int crypto_ahash_extsize(struct crypto_alg *alg)
 {
-	if (alg->cra_type == &crypto_ahash_type)
-		return alg->cra_ctxsize;
+	if (alg->cra_type != &crypto_ahash_type)
+		return sizeof(struct crypto_shash *);
 
-	return sizeof(struct crypto_shash *);
+	return crypto_alg_extsize(alg);
 }
 
 #ifdef CONFIG_NET
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 731255a6104f..df939b54b09f 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -811,6 +811,21 @@ int crypto_attr_u32(struct rtattr *rta, u32 *num)
 }
 EXPORT_SYMBOL_GPL(crypto_attr_u32);
 
+int crypto_inst_setname(struct crypto_instance *inst, const char *name,
+			struct crypto_alg *alg)
+{
+	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name,
+		     alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+		     name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_inst_setname);
+
 void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg,
 			     unsigned int head)
 {
@@ -825,13 +840,8 @@ void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg,
 
 	inst = (void *)(p + head);
 
-	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name,
-		     alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
-		goto err_free_inst;
-
-	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
-		     name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+	err = crypto_inst_setname(inst, name, alg);
+	if (err)
 		goto err_free_inst;
 
 	return p;
diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c
index 6a76d5c70ef6..9492e1c22d38 100644
--- a/crypto/asymmetric_keys/mscode_parser.c
+++ b/crypto/asymmetric_keys/mscode_parser.c
@@ -124,5 +124,10 @@ int mscode_note_digest(void *context, size_t hdrlen,
 	struct pefile_context *ctx = context;
 
 	ctx->digest = kmemdup(value, vlen, GFP_KERNEL);
-	return ctx->digest ? 0 : -ENOMEM;
+	if (!ctx->digest)
+		return -ENOMEM;
+
+	ctx->digest_len = vlen;
+
+	return 0;
 }
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 44b746e9df1b..2ffd69769466 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -227,7 +227,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 				if (asymmetric_key_id_same(p->id, auth))
 					goto found_issuer_check_skid;
 			}
-		} else {
+		} else if (sig->auth_ids[1]) {
 			auth = sig->auth_ids[1];
 			pr_debug("- want %*phN\n", auth->len, auth->data);
 			for (p = pkcs7->certs; p; p = p->next) {
diff --git a/crypto/asymmetric_keys/restrict.c b/crypto/asymmetric_keys/restrict.c
index ac4bddf669de..19d1afb9890f 100644
--- a/crypto/asymmetric_keys/restrict.c
+++ b/crypto/asymmetric_keys/restrict.c
@@ -87,7 +87,7 @@ int restrict_link_by_signature(struct key *trust_keyring,
 
 	sig = payload->data[asym_auth];
 	if (!sig->auth_ids[0] && !sig->auth_ids[1])
-		return 0;
+		return -ENOKEY;
 
 	if (ca_keyid && !asymmetric_key_id_partial(sig->auth_ids[1], ca_keyid))
 		return -EPERM;
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 55a354d57251..a7e1ac786c5d 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -32,8 +32,8 @@ struct authenc_instance_ctx {
 
 struct crypto_authenc_ctx {
 	struct crypto_ahash *auth;
-	struct crypto_ablkcipher *enc;
-	struct crypto_blkcipher *null;
+	struct crypto_skcipher *enc;
+	struct crypto_skcipher *null;
 };
 
 struct authenc_request_ctx {
@@ -83,7 +83,7 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 {
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
 	struct crypto_ahash *auth = ctx->auth;
-	struct crypto_ablkcipher *enc = ctx->enc;
+	struct crypto_skcipher *enc = ctx->enc;
 	struct crypto_authenc_keys keys;
 	int err = -EINVAL;
 
@@ -100,11 +100,11 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 	if (err)
 		goto out;
 
-	crypto_ablkcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(enc, crypto_aead_get_flags(authenc) &
-					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(enc, keys.enckey, keys.enckeylen);
-	crypto_aead_set_flags(authenc, crypto_ablkcipher_get_flags(enc) &
+	crypto_skcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen);
+	crypto_aead_set_flags(authenc, crypto_skcipher_get_flags(enc) &
 				       CRYPTO_TFM_RES_MASK);
 
 out:
@@ -184,12 +184,15 @@ static int crypto_authenc_copy_assoc(struct aead_request *req)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
-	struct blkcipher_desc desc = {
-		.tfm = ctx->null,
-	};
+	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
 
-	return crypto_blkcipher_encrypt(&desc, req->dst, req->src,
-					req->assoclen);
+	skcipher_request_set_tfm(skreq, ctx->null);
+	skcipher_request_set_callback(skreq, aead_request_flags(req),
+				      NULL, NULL);
+	skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
+				   NULL);
+
+	return crypto_skcipher_encrypt(skreq);
 }
 
 static int crypto_authenc_encrypt(struct aead_request *req)
@@ -199,14 +202,13 @@ static int crypto_authenc_encrypt(struct aead_request *req)
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
 	struct authenc_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct authenc_request_ctx *areq_ctx = aead_request_ctx(req);
-	struct crypto_ablkcipher *enc = ctx->enc;
+	struct crypto_skcipher *enc = ctx->enc;
 	unsigned int cryptlen = req->cryptlen;
-	struct ablkcipher_request *abreq = (void *)(areq_ctx->tail +
-						    ictx->reqoff);
+	struct skcipher_request *skreq = (void *)(areq_ctx->tail +
+						  ictx->reqoff);
 	struct scatterlist *src, *dst;
 	int err;
 
-	sg_init_table(areq_ctx->src, 2);
 	src = scatterwalk_ffwd(areq_ctx->src, req->src, req->assoclen);
 	dst = src;
 
@@ -215,16 +217,15 @@ static int crypto_authenc_encrypt(struct aead_request *req)
 		if (err)
 			return err;
 
-		sg_init_table(areq_ctx->dst, 2);
 		dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen);
 	}
 
-	ablkcipher_request_set_tfm(abreq, enc);
-	ablkcipher_request_set_callback(abreq, aead_request_flags(req),
-					crypto_authenc_encrypt_done, req);
-	ablkcipher_request_set_crypt(abreq, src, dst, cryptlen, req->iv);
+	skcipher_request_set_tfm(skreq, enc);
+	skcipher_request_set_callback(skreq, aead_request_flags(req),
+				      crypto_authenc_encrypt_done, req);
+	skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv);
 
-	err = crypto_ablkcipher_encrypt(abreq);
+	err = crypto_skcipher_encrypt(skreq);
 	if (err)
 		return err;
 
@@ -240,8 +241,8 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req,
 	struct authenc_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct authenc_request_ctx *areq_ctx = aead_request_ctx(req);
 	struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff);
-	struct ablkcipher_request *abreq = (void *)(areq_ctx->tail +
-						    ictx->reqoff);
+	struct skcipher_request *skreq = (void *)(areq_ctx->tail +
+						  ictx->reqoff);
 	unsigned int authsize = crypto_aead_authsize(authenc);
 	u8 *ihash = ahreq->result + authsize;
 	struct scatterlist *src, *dst;
@@ -251,22 +252,19 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req,
 	if (crypto_memneq(ihash, ahreq->result, authsize))
 		return -EBADMSG;
 
-	sg_init_table(areq_ctx->src, 2);
 	src = scatterwalk_ffwd(areq_ctx->src, req->src, req->assoclen);
 	dst = src;
 
-	if (req->src != req->dst) {
-		sg_init_table(areq_ctx->dst, 2);
+	if (req->src != req->dst)
 		dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen);
-	}
 
-	ablkcipher_request_set_tfm(abreq, ctx->enc);
-	ablkcipher_request_set_callback(abreq, aead_request_flags(req),
-					req->base.complete, req->base.data);
-	ablkcipher_request_set_crypt(abreq, src, dst,
-				     req->cryptlen - authsize, req->iv);
+	skcipher_request_set_tfm(skreq, ctx->enc);
+	skcipher_request_set_callback(skreq, aead_request_flags(req),
+				      req->base.complete, req->base.data);
+	skcipher_request_set_crypt(skreq, src, dst,
+				   req->cryptlen - authsize, req->iv);
 
-	return crypto_ablkcipher_decrypt(abreq);
+	return crypto_skcipher_decrypt(skreq);
 }
 
 static void authenc_verify_ahash_done(struct crypto_async_request *areq,
@@ -318,20 +316,20 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
 	struct authenc_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_ahash *auth;
-	struct crypto_ablkcipher *enc;
-	struct crypto_blkcipher *null;
+	struct crypto_skcipher *enc;
+	struct crypto_skcipher *null;
 	int err;
 
 	auth = crypto_spawn_ahash(&ictx->auth);
 	if (IS_ERR(auth))
 		return PTR_ERR(auth);
 
-	enc = crypto_spawn_skcipher(&ictx->enc);
+	enc = crypto_spawn_skcipher2(&ictx->enc);
 	err = PTR_ERR(enc);
 	if (IS_ERR(enc))
 		goto err_free_ahash;
 
-	null = crypto_get_default_null_skcipher();
+	null = crypto_get_default_null_skcipher2();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_skcipher;
@@ -347,13 +345,13 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
 		max_t(unsigned int,
 		      crypto_ahash_reqsize(auth) +
 		      sizeof(struct ahash_request),
-		      sizeof(struct ablkcipher_request) +
-		      crypto_ablkcipher_reqsize(enc)));
+		      sizeof(struct skcipher_request) +
+		      crypto_skcipher_reqsize(enc)));
 
 	return 0;
 
 err_free_skcipher:
-	crypto_free_ablkcipher(enc);
+	crypto_free_skcipher(enc);
 err_free_ahash:
 	crypto_free_ahash(auth);
 	return err;
@@ -364,8 +362,8 @@ static void crypto_authenc_exit_tfm(struct crypto_aead *tfm)
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_ahash(ctx->auth);
-	crypto_free_ablkcipher(ctx->enc);
-	crypto_put_default_null_skcipher();
+	crypto_free_skcipher(ctx->enc);
+	crypto_put_default_null_skcipher2();
 }
 
 static void crypto_authenc_free(struct aead_instance *inst)
@@ -384,7 +382,7 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
 	struct aead_instance *inst;
 	struct hash_alg_common *auth;
 	struct crypto_alg *auth_base;
-	struct crypto_alg *enc;
+	struct skcipher_alg *enc;
 	struct authenc_instance_ctx *ctx;
 	const char *enc_name;
 	int err;
@@ -397,7 +395,8 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
 		return -EINVAL;
 
 	auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH,
-			       CRYPTO_ALG_TYPE_AHASH_MASK);
+			      CRYPTO_ALG_TYPE_AHASH_MASK |
+			      crypto_requires_sync(algt->type, algt->mask));
 	if (IS_ERR(auth))
 		return PTR_ERR(auth);
 
@@ -421,37 +420,40 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
 		goto err_free_inst;
 
 	crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0,
+				    crypto_requires_sync(algt->type,
+							 algt->mask));
 	if (err)
 		goto err_drop_auth;
 
-	enc = crypto_skcipher_spawn_alg(&ctx->enc);
+	enc = crypto_spawn_skcipher_alg(&ctx->enc);
 
 	ctx->reqoff = ALIGN(2 * auth->digestsize + auth_base->cra_alignmask,
 			    auth_base->cra_alignmask + 1);
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
-		     "authenc(%s,%s)", auth_base->cra_name, enc->cra_name) >=
+		     "authenc(%s,%s)", auth_base->cra_name,
+		     enc->base.cra_name) >=
 	    CRYPTO_MAX_ALG_NAME)
 		goto err_drop_enc;
 
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "authenc(%s,%s)", auth_base->cra_driver_name,
-		     enc->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		     enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
 		goto err_drop_enc;
 
-	inst->alg.base.cra_flags = enc->cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.base.cra_priority = enc->cra_priority * 10 +
+	inst->alg.base.cra_flags = (auth_base->cra_flags |
+				    enc->base.cra_flags) & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = enc->base.cra_priority * 10 +
 				      auth_base->cra_priority;
-	inst->alg.base.cra_blocksize = enc->cra_blocksize;
+	inst->alg.base.cra_blocksize = enc->base.cra_blocksize;
 	inst->alg.base.cra_alignmask = auth_base->cra_alignmask |
-				       enc->cra_alignmask;
+				       enc->base.cra_alignmask;
 	inst->alg.base.cra_ctxsize = sizeof(struct crypto_authenc_ctx);
 
-	inst->alg.ivsize = enc->cra_ablkcipher.ivsize;
+	inst->alg.ivsize = crypto_skcipher_alg_ivsize(enc);
+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(enc);
 	inst->alg.maxauthsize = auth->digestsize;
 
 	inst->alg.init = crypto_authenc_init_tfm;
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 0c0468869e25..121010ac9962 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -35,8 +35,8 @@ struct authenc_esn_instance_ctx {
 struct crypto_authenc_esn_ctx {
 	unsigned int reqoff;
 	struct crypto_ahash *auth;
-	struct crypto_ablkcipher *enc;
-	struct crypto_blkcipher *null;
+	struct crypto_skcipher *enc;
+	struct crypto_skcipher *null;
 };
 
 struct authenc_esn_request_ctx {
@@ -65,7 +65,7 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *
 {
 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
 	struct crypto_ahash *auth = ctx->auth;
-	struct crypto_ablkcipher *enc = ctx->enc;
+	struct crypto_skcipher *enc = ctx->enc;
 	struct crypto_authenc_keys keys;
 	int err = -EINVAL;
 
@@ -82,11 +82,11 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *
 	if (err)
 		goto out;
 
-	crypto_ablkcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(enc, crypto_aead_get_flags(authenc_esn) &
+	crypto_skcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc_esn) &
 					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(enc, keys.enckey, keys.enckeylen);
-	crypto_aead_set_flags(authenc_esn, crypto_ablkcipher_get_flags(enc) &
+	err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen);
+	crypto_aead_set_flags(authenc_esn, crypto_skcipher_get_flags(enc) &
 					   CRYPTO_TFM_RES_MASK);
 
 out:
@@ -182,11 +182,14 @@ static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len)
 {
 	struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
-	struct blkcipher_desc desc = {
-		.tfm = ctx->null,
-	};
+	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
 
-	return crypto_blkcipher_encrypt(&desc, req->dst, req->src, len);
+	skcipher_request_set_tfm(skreq, ctx->null);
+	skcipher_request_set_callback(skreq, aead_request_flags(req),
+				      NULL, NULL);
+	skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL);
+
+	return crypto_skcipher_encrypt(skreq);
 }
 
 static int crypto_authenc_esn_encrypt(struct aead_request *req)
@@ -194,9 +197,9 @@ static int crypto_authenc_esn_encrypt(struct aead_request *req)
 	struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
 	struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req);
 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
-	struct ablkcipher_request *abreq = (void *)(areq_ctx->tail
-						    + ctx->reqoff);
-	struct crypto_ablkcipher *enc = ctx->enc;
+	struct skcipher_request *skreq = (void *)(areq_ctx->tail +
+						  ctx->reqoff);
+	struct crypto_skcipher *enc = ctx->enc;
 	unsigned int assoclen = req->assoclen;
 	unsigned int cryptlen = req->cryptlen;
 	struct scatterlist *src, *dst;
@@ -215,12 +218,12 @@ static int crypto_authenc_esn_encrypt(struct aead_request *req)
 		dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, assoclen);
 	}
 
-	ablkcipher_request_set_tfm(abreq, enc);
-	ablkcipher_request_set_callback(abreq, aead_request_flags(req),
-					crypto_authenc_esn_encrypt_done, req);
-	ablkcipher_request_set_crypt(abreq, src, dst, cryptlen, req->iv);
+	skcipher_request_set_tfm(skreq, enc);
+	skcipher_request_set_callback(skreq, aead_request_flags(req),
+				      crypto_authenc_esn_encrypt_done, req);
+	skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv);
 
-	err = crypto_ablkcipher_encrypt(abreq);
+	err = crypto_skcipher_encrypt(skreq);
 	if (err)
 		return err;
 
@@ -234,8 +237,8 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req,
 	unsigned int authsize = crypto_aead_authsize(authenc_esn);
 	struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req);
 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
-	struct ablkcipher_request *abreq = (void *)(areq_ctx->tail
-						    + ctx->reqoff);
+	struct skcipher_request *skreq = (void *)(areq_ctx->tail +
+						  ctx->reqoff);
 	struct crypto_ahash *auth = ctx->auth;
 	u8 *ohash = PTR_ALIGN((u8 *)areq_ctx->tail,
 			      crypto_ahash_alignmask(auth) + 1);
@@ -256,12 +259,12 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req,
 	sg_init_table(areq_ctx->dst, 2);
 	dst = scatterwalk_ffwd(areq_ctx->dst, dst, assoclen);
 
-	ablkcipher_request_set_tfm(abreq, ctx->enc);
-	ablkcipher_request_set_callback(abreq, flags,
-					req->base.complete, req->base.data);
-	ablkcipher_request_set_crypt(abreq, dst, dst, cryptlen, req->iv);
+	skcipher_request_set_tfm(skreq, ctx->enc);
+	skcipher_request_set_callback(skreq, flags,
+				      req->base.complete, req->base.data);
+	skcipher_request_set_crypt(skreq, dst, dst, cryptlen, req->iv);
 
-	return crypto_ablkcipher_decrypt(abreq);
+	return crypto_skcipher_decrypt(skreq);
 }
 
 static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq,
@@ -331,20 +334,20 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
 	struct authenc_esn_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_ahash *auth;
-	struct crypto_ablkcipher *enc;
-	struct crypto_blkcipher *null;
+	struct crypto_skcipher *enc;
+	struct crypto_skcipher *null;
 	int err;
 
 	auth = crypto_spawn_ahash(&ictx->auth);
 	if (IS_ERR(auth))
 		return PTR_ERR(auth);
 
-	enc = crypto_spawn_skcipher(&ictx->enc);
+	enc = crypto_spawn_skcipher2(&ictx->enc);
 	err = PTR_ERR(enc);
 	if (IS_ERR(enc))
 		goto err_free_ahash;
 
-	null = crypto_get_default_null_skcipher();
+	null = crypto_get_default_null_skcipher2();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_skcipher;
@@ -361,15 +364,15 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
 		sizeof(struct authenc_esn_request_ctx) +
 		ctx->reqoff +
 		max_t(unsigned int,
-			crypto_ahash_reqsize(auth) +
-			sizeof(struct ahash_request),
-			sizeof(struct skcipher_givcrypt_request) +
-			crypto_ablkcipher_reqsize(enc)));
+		      crypto_ahash_reqsize(auth) +
+		      sizeof(struct ahash_request),
+		      sizeof(struct skcipher_request) +
+		      crypto_skcipher_reqsize(enc)));
 
 	return 0;
 
 err_free_skcipher:
-	crypto_free_ablkcipher(enc);
+	crypto_free_skcipher(enc);
 err_free_ahash:
 	crypto_free_ahash(auth);
 	return err;
@@ -380,8 +383,8 @@ static void crypto_authenc_esn_exit_tfm(struct crypto_aead *tfm)
 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_ahash(ctx->auth);
-	crypto_free_ablkcipher(ctx->enc);
-	crypto_put_default_null_skcipher();
+	crypto_free_skcipher(ctx->enc);
+	crypto_put_default_null_skcipher2();
 }
 
 static void crypto_authenc_esn_free(struct aead_instance *inst)
@@ -400,7 +403,7 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
 	struct aead_instance *inst;
 	struct hash_alg_common *auth;
 	struct crypto_alg *auth_base;
-	struct crypto_alg *enc;
+	struct skcipher_alg *enc;
 	struct authenc_esn_instance_ctx *ctx;
 	const char *enc_name;
 	int err;
@@ -413,7 +416,8 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
 		return -EINVAL;
 
 	auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH,
-			       CRYPTO_ALG_TYPE_AHASH_MASK);
+			      CRYPTO_ALG_TYPE_AHASH_MASK |
+			      crypto_requires_sync(algt->type, algt->mask));
 	if (IS_ERR(auth))
 		return PTR_ERR(auth);
 
@@ -437,34 +441,36 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
 		goto err_free_inst;
 
 	crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0,
+				    crypto_requires_sync(algt->type,
+							 algt->mask));
 	if (err)
 		goto err_drop_auth;
 
-	enc = crypto_skcipher_spawn_alg(&ctx->enc);
+	enc = crypto_spawn_skcipher_alg(&ctx->enc);
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
 		     "authencesn(%s,%s)", auth_base->cra_name,
-		     enc->cra_name) >= CRYPTO_MAX_ALG_NAME)
+		     enc->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
 		goto err_drop_enc;
 
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "authencesn(%s,%s)", auth_base->cra_driver_name,
-		     enc->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		     enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
 		goto err_drop_enc;
 
-	inst->alg.base.cra_flags = enc->cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.base.cra_priority = enc->cra_priority * 10 +
+	inst->alg.base.cra_flags = (auth_base->cra_flags |
+				    enc->base.cra_flags) & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = enc->base.cra_priority * 10 +
 				      auth_base->cra_priority;
-	inst->alg.base.cra_blocksize = enc->cra_blocksize;
+	inst->alg.base.cra_blocksize = enc->base.cra_blocksize;
 	inst->alg.base.cra_alignmask = auth_base->cra_alignmask |
-				       enc->cra_alignmask;
+				       enc->base.cra_alignmask;
 	inst->alg.base.cra_ctxsize = sizeof(struct crypto_authenc_esn_ctx);
 
-	inst->alg.ivsize = enc->cra_ablkcipher.ivsize;
+	inst->alg.ivsize = crypto_skcipher_alg_ivsize(enc);
+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(enc);
 	inst->alg.maxauthsize = auth->digestsize;
 
 	inst->alg.init = crypto_authenc_esn_init_tfm;
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 8cc1622b2ee0..369999530108 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -21,7 +21,6 @@
 #include <linux/hardirq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/scatterlist.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -466,10 +465,6 @@ static int crypto_init_blkcipher_ops_async(struct crypto_tfm *tfm)
 	crt->setkey = async_setkey;
 	crt->encrypt = async_encrypt;
 	crt->decrypt = async_decrypt;
-	if (!alg->ivsize) {
-		crt->givencrypt = skcipher_null_givencrypt;
-		crt->givdecrypt = skcipher_null_givdecrypt;
-	}
 	crt->base = __crypto_ablkcipher_cast(tfm);
 	crt->ivsize = alg->ivsize;
 
@@ -560,185 +555,5 @@ const struct crypto_type crypto_blkcipher_type = {
 };
 EXPORT_SYMBOL_GPL(crypto_blkcipher_type);
 
-static int crypto_grab_nivcipher(struct crypto_skcipher_spawn *spawn,
-				const char *name, u32 type, u32 mask)
-{
-	struct crypto_alg *alg;
-	int err;
-
-	type = crypto_skcipher_type(type);
-	mask = crypto_skcipher_mask(mask)| CRYPTO_ALG_GENIV;
-
-	alg = crypto_alg_mod_lookup(name, type, mask);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
-
-	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
-	crypto_mod_put(alg);
-	return err;
-}
-
-struct crypto_instance *skcipher_geniv_alloc(struct crypto_template *tmpl,
-					     struct rtattr **tb, u32 type,
-					     u32 mask)
-{
-	struct {
-		int (*setkey)(struct crypto_ablkcipher *tfm, const u8 *key,
-			      unsigned int keylen);
-		int (*encrypt)(struct ablkcipher_request *req);
-		int (*decrypt)(struct ablkcipher_request *req);
-
-		unsigned int min_keysize;
-		unsigned int max_keysize;
-		unsigned int ivsize;
-
-		const char *geniv;
-	} balg;
-	const char *name;
-	struct crypto_skcipher_spawn *spawn;
-	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
-	int err;
-
-	algt = crypto_get_attr_type(tb);
-	if (IS_ERR(algt))
-		return ERR_CAST(algt);
-
-	if ((algt->type ^ (CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV)) &
-	    algt->mask)
-		return ERR_PTR(-EINVAL);
-
-	name = crypto_attr_alg_name(tb[1]);
-	if (IS_ERR(name))
-		return ERR_CAST(name);
-
-	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
-	if (!inst)
-		return ERR_PTR(-ENOMEM);
-
-	spawn = crypto_instance_ctx(inst);
-
-	/* Ignore async algorithms if necessary. */
-	mask |= crypto_requires_sync(algt->type, algt->mask);
-
-	crypto_set_skcipher_spawn(spawn, inst);
-	err = crypto_grab_nivcipher(spawn, name, type, mask);
-	if (err)
-		goto err_free_inst;
-
-	alg = crypto_skcipher_spawn_alg(spawn);
-
-	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-	    CRYPTO_ALG_TYPE_BLKCIPHER) {
-		balg.ivsize = alg->cra_blkcipher.ivsize;
-		balg.min_keysize = alg->cra_blkcipher.min_keysize;
-		balg.max_keysize = alg->cra_blkcipher.max_keysize;
-
-		balg.setkey = async_setkey;
-		balg.encrypt = async_encrypt;
-		balg.decrypt = async_decrypt;
-
-		balg.geniv = alg->cra_blkcipher.geniv;
-	} else {
-		balg.ivsize = alg->cra_ablkcipher.ivsize;
-		balg.min_keysize = alg->cra_ablkcipher.min_keysize;
-		balg.max_keysize = alg->cra_ablkcipher.max_keysize;
-
-		balg.setkey = alg->cra_ablkcipher.setkey;
-		balg.encrypt = alg->cra_ablkcipher.encrypt;
-		balg.decrypt = alg->cra_ablkcipher.decrypt;
-
-		balg.geniv = alg->cra_ablkcipher.geniv;
-	}
-
-	err = -EINVAL;
-	if (!balg.ivsize)
-		goto err_drop_alg;
-
-	/*
-	 * This is only true if we're constructing an algorithm with its
-	 * default IV generator.  For the default generator we elide the
-	 * template name and double-check the IV generator.
-	 */
-	if (algt->mask & CRYPTO_ALG_GENIV) {
-		if (!balg.geniv)
-			balg.geniv = crypto_default_geniv(alg);
-		err = -EAGAIN;
-		if (strcmp(tmpl->name, balg.geniv))
-			goto err_drop_alg;
-
-		memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-		memcpy(inst->alg.cra_driver_name, alg->cra_driver_name,
-		       CRYPTO_MAX_ALG_NAME);
-	} else {
-		err = -ENAMETOOLONG;
-		if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
-			     "%s(%s)", tmpl->name, alg->cra_name) >=
-		    CRYPTO_MAX_ALG_NAME)
-			goto err_drop_alg;
-		if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-			     "%s(%s)", tmpl->name, alg->cra_driver_name) >=
-		    CRYPTO_MAX_ALG_NAME)
-			goto err_drop_alg;
-	}
-
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV;
-	inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_givcipher_type;
-
-	inst->alg.cra_ablkcipher.ivsize = balg.ivsize;
-	inst->alg.cra_ablkcipher.min_keysize = balg.min_keysize;
-	inst->alg.cra_ablkcipher.max_keysize = balg.max_keysize;
-	inst->alg.cra_ablkcipher.geniv = balg.geniv;
-
-	inst->alg.cra_ablkcipher.setkey = balg.setkey;
-	inst->alg.cra_ablkcipher.encrypt = balg.encrypt;
-	inst->alg.cra_ablkcipher.decrypt = balg.decrypt;
-
-out:
-	return inst;
-
-err_drop_alg:
-	crypto_drop_skcipher(spawn);
-err_free_inst:
-	kfree(inst);
-	inst = ERR_PTR(err);
-	goto out;
-}
-EXPORT_SYMBOL_GPL(skcipher_geniv_alloc);
-
-void skcipher_geniv_free(struct crypto_instance *inst)
-{
-	crypto_drop_skcipher(crypto_instance_ctx(inst));
-	kfree(inst);
-}
-EXPORT_SYMBOL_GPL(skcipher_geniv_free);
-
-int skcipher_geniv_init(struct crypto_tfm *tfm)
-{
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_ablkcipher *cipher;
-
-	cipher = crypto_spawn_skcipher(crypto_instance_ctx(inst));
-	if (IS_ERR(cipher))
-		return PTR_ERR(cipher);
-
-	tfm->crt_ablkcipher.base = cipher;
-	tfm->crt_ablkcipher.reqsize += crypto_ablkcipher_reqsize(cipher);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(skcipher_geniv_init);
-
-void skcipher_geniv_exit(struct crypto_tfm *tfm)
-{
-	crypto_free_ablkcipher(tfm->crt_ablkcipher.base);
-}
-EXPORT_SYMBOL_GPL(skcipher_geniv_exit);
-
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Generic block chaining cipher type");
diff --git a/crypto/ccm.c b/crypto/ccm.c
index cc31ea4335bf..006d8575ef5c 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -28,7 +28,7 @@ struct ccm_instance_ctx {
 
 struct crypto_ccm_ctx {
 	struct crypto_cipher *cipher;
-	struct crypto_ablkcipher *ctr;
+	struct crypto_skcipher *ctr;
 };
 
 struct crypto_rfc4309_ctx {
@@ -50,7 +50,7 @@ struct crypto_ccm_req_priv_ctx {
 	u32 flags;
 	struct scatterlist src[3];
 	struct scatterlist dst[3];
-	struct ablkcipher_request abreq;
+	struct skcipher_request skreq;
 };
 
 static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx(
@@ -83,15 +83,15 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
 			     unsigned int keylen)
 {
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
-	struct crypto_ablkcipher *ctr = ctx->ctr;
+	struct crypto_skcipher *ctr = ctx->ctr;
 	struct crypto_cipher *tfm = ctx->cipher;
 	int err = 0;
 
-	crypto_ablkcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
-				    CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(ctr, key, keylen);
-	crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctr) &
+	crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(ctr, key, keylen);
+	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) &
 			      CRYPTO_TFM_RES_MASK);
 	if (err)
 		goto out;
@@ -347,7 +347,7 @@ static int crypto_ccm_encrypt(struct aead_request *req)
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
-	struct ablkcipher_request *abreq = &pctx->abreq;
+	struct skcipher_request *skreq = &pctx->skreq;
 	struct scatterlist *dst;
 	unsigned int cryptlen = req->cryptlen;
 	u8 *odata = pctx->odata;
@@ -366,11 +366,11 @@ static int crypto_ccm_encrypt(struct aead_request *req)
 	if (req->src != req->dst)
 		dst = pctx->dst;
 
-	ablkcipher_request_set_tfm(abreq, ctx->ctr);
-	ablkcipher_request_set_callback(abreq, pctx->flags,
-					crypto_ccm_encrypt_done, req);
-	ablkcipher_request_set_crypt(abreq, pctx->src, dst, cryptlen + 16, iv);
-	err = crypto_ablkcipher_encrypt(abreq);
+	skcipher_request_set_tfm(skreq, ctx->ctr);
+	skcipher_request_set_callback(skreq, pctx->flags,
+				      crypto_ccm_encrypt_done, req);
+	skcipher_request_set_crypt(skreq, pctx->src, dst, cryptlen + 16, iv);
+	err = crypto_skcipher_encrypt(skreq);
 	if (err)
 		return err;
 
@@ -407,7 +407,7 @@ static int crypto_ccm_decrypt(struct aead_request *req)
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
-	struct ablkcipher_request *abreq = &pctx->abreq;
+	struct skcipher_request *skreq = &pctx->skreq;
 	struct scatterlist *dst;
 	unsigned int authsize = crypto_aead_authsize(aead);
 	unsigned int cryptlen = req->cryptlen;
@@ -429,11 +429,11 @@ static int crypto_ccm_decrypt(struct aead_request *req)
 	if (req->src != req->dst)
 		dst = pctx->dst;
 
-	ablkcipher_request_set_tfm(abreq, ctx->ctr);
-	ablkcipher_request_set_callback(abreq, pctx->flags,
-					crypto_ccm_decrypt_done, req);
-	ablkcipher_request_set_crypt(abreq, pctx->src, dst, cryptlen + 16, iv);
-	err = crypto_ablkcipher_decrypt(abreq);
+	skcipher_request_set_tfm(skreq, ctx->ctr);
+	skcipher_request_set_callback(skreq, pctx->flags,
+				      crypto_ccm_decrypt_done, req);
+	skcipher_request_set_crypt(skreq, pctx->src, dst, cryptlen + 16, iv);
+	err = crypto_skcipher_decrypt(skreq);
 	if (err)
 		return err;
 
@@ -454,7 +454,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 	struct ccm_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_cipher *cipher;
-	struct crypto_ablkcipher *ctr;
+	struct crypto_skcipher *ctr;
 	unsigned long align;
 	int err;
 
@@ -462,7 +462,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
-	ctr = crypto_spawn_skcipher(&ictx->ctr);
+	ctr = crypto_spawn_skcipher2(&ictx->ctr);
 	err = PTR_ERR(ctr);
 	if (IS_ERR(ctr))
 		goto err_free_cipher;
@@ -475,7 +475,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 	crypto_aead_set_reqsize(
 		tfm,
 		align + sizeof(struct crypto_ccm_req_priv_ctx) +
-		crypto_ablkcipher_reqsize(ctr));
+		crypto_skcipher_reqsize(ctr));
 
 	return 0;
 
@@ -489,7 +489,7 @@ static void crypto_ccm_exit_tfm(struct crypto_aead *tfm)
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_cipher(ctx->cipher);
-	crypto_free_ablkcipher(ctx->ctr);
+	crypto_free_skcipher(ctx->ctr);
 }
 
 static void crypto_ccm_free(struct aead_instance *inst)
@@ -509,7 +509,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 {
 	struct crypto_attr_type *algt;
 	struct aead_instance *inst;
-	struct crypto_alg *ctr;
+	struct skcipher_alg *ctr;
 	struct crypto_alg *cipher;
 	struct ccm_instance_ctx *ictx;
 	int err;
@@ -544,39 +544,40 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 		goto err_free_inst;
 
 	crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher2(&ictx->ctr, ctr_name, 0,
+				    crypto_requires_sync(algt->type,
+							 algt->mask));
 	if (err)
 		goto err_drop_cipher;
 
-	ctr = crypto_skcipher_spawn_alg(&ictx->ctr);
+	ctr = crypto_spawn_skcipher_alg(&ictx->ctr);
 
 	/* Not a stream cipher? */
 	err = -EINVAL;
-	if (ctr->cra_blocksize != 1)
+	if (ctr->base.cra_blocksize != 1)
 		goto err_drop_ctr;
 
 	/* We want the real thing! */
-	if (ctr->cra_ablkcipher.ivsize != 16)
+	if (crypto_skcipher_alg_ivsize(ctr) != 16)
 		goto err_drop_ctr;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "ccm_base(%s,%s)", ctr->cra_driver_name,
+		     "ccm_base(%s,%s)", ctr->base.cra_driver_name,
 		     cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
 		goto err_drop_ctr;
 
 	memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
 
-	inst->alg.base.cra_flags = ctr->cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC;
 	inst->alg.base.cra_priority = (cipher->cra_priority +
-				       ctr->cra_priority) / 2;
+				       ctr->base.cra_priority) / 2;
 	inst->alg.base.cra_blocksize = 1;
 	inst->alg.base.cra_alignmask = cipher->cra_alignmask |
-				       ctr->cra_alignmask |
+				       ctr->base.cra_alignmask |
 				       (__alignof__(u32) - 1);
 	inst->alg.ivsize = 16;
+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(ctr);
 	inst->alg.maxauthsize = 16;
 	inst->alg.base.cra_ctxsize = sizeof(struct crypto_ccm_ctx);
 	inst->alg.init = crypto_ccm_init_tfm;
@@ -863,6 +864,7 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
 	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
 
 	inst->alg.ivsize = 8;
+	inst->alg.chunksize = crypto_aead_alg_chunksize(alg);
 	inst->alg.maxauthsize = 16;
 
 	inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4309_ctx);
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index 7b6b935cef23..e899ef51dc8e 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -31,7 +31,7 @@ struct chachapoly_instance_ctx {
 };
 
 struct chachapoly_ctx {
-	struct crypto_ablkcipher *chacha;
+	struct crypto_skcipher *chacha;
 	struct crypto_ahash *poly;
 	/* key bytes we use for the ChaCha20 IV */
 	unsigned int saltlen;
@@ -53,7 +53,7 @@ struct poly_req {
 struct chacha_req {
 	u8 iv[CHACHA20_IV_SIZE];
 	struct scatterlist src[1];
-	struct ablkcipher_request req; /* must be last member */
+	struct skcipher_request req; /* must be last member */
 };
 
 struct chachapoly_req_ctx {
@@ -144,12 +144,12 @@ static int chacha_decrypt(struct aead_request *req)
 		dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen);
 	}
 
-	ablkcipher_request_set_callback(&creq->req, aead_request_flags(req),
-					chacha_decrypt_done, req);
-	ablkcipher_request_set_tfm(&creq->req, ctx->chacha);
-	ablkcipher_request_set_crypt(&creq->req, src, dst,
-				     rctx->cryptlen, creq->iv);
-	err = crypto_ablkcipher_decrypt(&creq->req);
+	skcipher_request_set_callback(&creq->req, aead_request_flags(req),
+				      chacha_decrypt_done, req);
+	skcipher_request_set_tfm(&creq->req, ctx->chacha);
+	skcipher_request_set_crypt(&creq->req, src, dst,
+				   rctx->cryptlen, creq->iv);
+	err = crypto_skcipher_decrypt(&creq->req);
 	if (err)
 		return err;
 
@@ -393,13 +393,13 @@ static int poly_genkey(struct aead_request *req)
 
 	chacha_iv(creq->iv, req, 0);
 
-	ablkcipher_request_set_callback(&creq->req, aead_request_flags(req),
-					poly_genkey_done, req);
-	ablkcipher_request_set_tfm(&creq->req, ctx->chacha);
-	ablkcipher_request_set_crypt(&creq->req, creq->src, creq->src,
-				     POLY1305_KEY_SIZE, creq->iv);
+	skcipher_request_set_callback(&creq->req, aead_request_flags(req),
+				      poly_genkey_done, req);
+	skcipher_request_set_tfm(&creq->req, ctx->chacha);
+	skcipher_request_set_crypt(&creq->req, creq->src, creq->src,
+				   POLY1305_KEY_SIZE, creq->iv);
 
-	err = crypto_ablkcipher_decrypt(&creq->req);
+	err = crypto_skcipher_decrypt(&creq->req);
 	if (err)
 		return err;
 
@@ -433,12 +433,12 @@ static int chacha_encrypt(struct aead_request *req)
 		dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen);
 	}
 
-	ablkcipher_request_set_callback(&creq->req, aead_request_flags(req),
-					chacha_encrypt_done, req);
-	ablkcipher_request_set_tfm(&creq->req, ctx->chacha);
-	ablkcipher_request_set_crypt(&creq->req, src, dst,
-				     req->cryptlen, creq->iv);
-	err = crypto_ablkcipher_encrypt(&creq->req);
+	skcipher_request_set_callback(&creq->req, aead_request_flags(req),
+				      chacha_encrypt_done, req);
+	skcipher_request_set_tfm(&creq->req, ctx->chacha);
+	skcipher_request_set_crypt(&creq->req, src, dst,
+				   req->cryptlen, creq->iv);
+	err = crypto_skcipher_encrypt(&creq->req);
 	if (err)
 		return err;
 
@@ -500,13 +500,13 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 	keylen -= ctx->saltlen;
 	memcpy(ctx->salt, key + keylen, ctx->saltlen);
 
-	crypto_ablkcipher_clear_flags(ctx->chacha, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(ctx->chacha, crypto_aead_get_flags(aead) &
-				    CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_clear_flags(ctx->chacha, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctx->chacha, crypto_aead_get_flags(aead) &
+					       CRYPTO_TFM_REQ_MASK);
 
-	err = crypto_ablkcipher_setkey(ctx->chacha, key, keylen);
-	crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctx->chacha) &
-			      CRYPTO_TFM_RES_MASK);
+	err = crypto_skcipher_setkey(ctx->chacha, key, keylen);
+	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctx->chacha) &
+				    CRYPTO_TFM_RES_MASK);
 	return err;
 }
 
@@ -524,7 +524,7 @@ static int chachapoly_init(struct crypto_aead *tfm)
 	struct aead_instance *inst = aead_alg_instance(tfm);
 	struct chachapoly_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
-	struct crypto_ablkcipher *chacha;
+	struct crypto_skcipher *chacha;
 	struct crypto_ahash *poly;
 	unsigned long align;
 
@@ -532,7 +532,7 @@ static int chachapoly_init(struct crypto_aead *tfm)
 	if (IS_ERR(poly))
 		return PTR_ERR(poly);
 
-	chacha = crypto_spawn_skcipher(&ictx->chacha);
+	chacha = crypto_spawn_skcipher2(&ictx->chacha);
 	if (IS_ERR(chacha)) {
 		crypto_free_ahash(poly);
 		return PTR_ERR(chacha);
@@ -548,8 +548,8 @@ static int chachapoly_init(struct crypto_aead *tfm)
 		tfm,
 		align + offsetof(struct chachapoly_req_ctx, u) +
 		max(offsetof(struct chacha_req, req) +
-		    sizeof(struct ablkcipher_request) +
-		    crypto_ablkcipher_reqsize(chacha),
+		    sizeof(struct skcipher_request) +
+		    crypto_skcipher_reqsize(chacha),
 		    offsetof(struct poly_req, req) +
 		    sizeof(struct ahash_request) +
 		    crypto_ahash_reqsize(poly)));
@@ -562,7 +562,7 @@ static void chachapoly_exit(struct crypto_aead *tfm)
 	struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_ahash(ctx->poly);
-	crypto_free_ablkcipher(ctx->chacha);
+	crypto_free_skcipher(ctx->chacha);
 }
 
 static void chachapoly_free(struct aead_instance *inst)
@@ -579,7 +579,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 {
 	struct crypto_attr_type *algt;
 	struct aead_instance *inst;
-	struct crypto_alg *chacha;
+	struct skcipher_alg *chacha;
 	struct crypto_alg *poly;
 	struct hash_alg_common *poly_hash;
 	struct chachapoly_instance_ctx *ctx;
@@ -605,7 +605,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 
 	poly = crypto_find_alg(poly_name, &crypto_ahash_type,
 			       CRYPTO_ALG_TYPE_HASH,
-			       CRYPTO_ALG_TYPE_AHASH_MASK);
+			       CRYPTO_ALG_TYPE_AHASH_MASK |
+			       crypto_requires_sync(algt->type,
+						    algt->mask));
 	if (IS_ERR(poly))
 		return PTR_ERR(poly);
 
@@ -623,20 +625,20 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 		goto err_free_inst;
 
 	crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher2(&ctx->chacha, chacha_name, 0,
+				    crypto_requires_sync(algt->type,
+							 algt->mask));
 	if (err)
 		goto err_drop_poly;
 
-	chacha = crypto_skcipher_spawn_alg(&ctx->chacha);
+	chacha = crypto_spawn_skcipher_alg(&ctx->chacha);
 
 	err = -EINVAL;
 	/* Need 16-byte IV size, including Initial Block Counter value */
-	if (chacha->cra_ablkcipher.ivsize != CHACHA20_IV_SIZE)
+	if (crypto_skcipher_alg_ivsize(chacha) != CHACHA20_IV_SIZE)
 		goto out_drop_chacha;
 	/* Not a stream cipher? */
-	if (chacha->cra_blocksize != 1)
+	if (chacha->base.cra_blocksize != 1)
 		goto out_drop_chacha;
 
 	err = -ENAMETOOLONG;
@@ -645,20 +647,21 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 		     poly_name) >= CRYPTO_MAX_ALG_NAME)
 		goto out_drop_chacha;
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "%s(%s,%s)", name, chacha->cra_driver_name,
+		     "%s(%s,%s)", name, chacha->base.cra_driver_name,
 		     poly->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
 		goto out_drop_chacha;
 
-	inst->alg.base.cra_flags = (chacha->cra_flags | poly->cra_flags) &
+	inst->alg.base.cra_flags = (chacha->base.cra_flags | poly->cra_flags) &
 				   CRYPTO_ALG_ASYNC;
-	inst->alg.base.cra_priority = (chacha->cra_priority +
+	inst->alg.base.cra_priority = (chacha->base.cra_priority +
 				       poly->cra_priority) / 2;
 	inst->alg.base.cra_blocksize = 1;
-	inst->alg.base.cra_alignmask = chacha->cra_alignmask |
+	inst->alg.base.cra_alignmask = chacha->base.cra_alignmask |
 				       poly->cra_alignmask;
 	inst->alg.base.cra_ctxsize = sizeof(struct chachapoly_ctx) +
 				     ctx->saltlen;
 	inst->alg.ivsize = ivsize;
+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(chacha);
 	inst->alg.maxauthsize = POLY1305_DIGEST_SIZE;
 	inst->alg.init = chachapoly_init;
 	inst->alg.exit = chachapoly_exit;
diff --git a/crypto/chainiv.c b/crypto/chainiv.c
deleted file mode 100644
index b4340018c8d4..000000000000
--- a/crypto/chainiv.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * chainiv: Chain IV Generator
- *
- * Generate IVs simply be using the last block of the previous encryption.
- * This is mainly useful for CBC with a synchronous algorithm.
- *
- * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/skcipher.h>
-#include <crypto/rng.h>
-#include <crypto/crypto_wq.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/workqueue.h>
-
-enum {
-	CHAINIV_STATE_INUSE = 0,
-};
-
-struct chainiv_ctx {
-	spinlock_t lock;
-	char iv[];
-};
-
-struct async_chainiv_ctx {
-	unsigned long state;
-
-	spinlock_t lock;
-	int err;
-
-	struct crypto_queue queue;
-	struct work_struct postponed;
-
-	char iv[];
-};
-
-static int chainiv_givencrypt(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req);
-	unsigned int ivsize;
-	int err;
-
-	ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv));
-	ablkcipher_request_set_callback(subreq, req->creq.base.flags &
-						~CRYPTO_TFM_REQ_MAY_SLEEP,
-					req->creq.base.complete,
-					req->creq.base.data);
-	ablkcipher_request_set_crypt(subreq, req->creq.src, req->creq.dst,
-				     req->creq.nbytes, req->creq.info);
-
-	spin_lock_bh(&ctx->lock);
-
-	ivsize = crypto_ablkcipher_ivsize(geniv);
-
-	memcpy(req->giv, ctx->iv, ivsize);
-	memcpy(subreq->info, ctx->iv, ivsize);
-
-	err = crypto_ablkcipher_encrypt(subreq);
-	if (err)
-		goto unlock;
-
-	memcpy(ctx->iv, subreq->info, ivsize);
-
-unlock:
-	spin_unlock_bh(&ctx->lock);
-
-	return err;
-}
-
-static int chainiv_init_common(struct crypto_tfm *tfm, char iv[])
-{
-	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
-	int err = 0;
-
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request);
-
-	if (iv) {
-		err = crypto_rng_get_bytes(crypto_default_rng, iv,
-					   crypto_ablkcipher_ivsize(geniv));
-		crypto_put_default_rng();
-	}
-
-	return err ?: skcipher_geniv_init(tfm);
-}
-
-static int chainiv_init(struct crypto_tfm *tfm)
-{
-	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
-	struct chainiv_ctx *ctx = crypto_tfm_ctx(tfm);
-	char *iv;
-
-	spin_lock_init(&ctx->lock);
-
-	iv = NULL;
-	if (!crypto_get_default_rng()) {
-		crypto_ablkcipher_crt(geniv)->givencrypt = chainiv_givencrypt;
-		iv = ctx->iv;
-	}
-
-	return chainiv_init_common(tfm, iv);
-}
-
-static int async_chainiv_schedule_work(struct async_chainiv_ctx *ctx)
-{
-	int queued;
-	int err = ctx->err;
-
-	if (!ctx->queue.qlen) {
-		smp_mb__before_atomic();
-		clear_bit(CHAINIV_STATE_INUSE, &ctx->state);
-
-		if (!ctx->queue.qlen ||
-		    test_and_set_bit(CHAINIV_STATE_INUSE, &ctx->state))
-			goto out;
-	}
-
-	queued = queue_work(kcrypto_wq, &ctx->postponed);
-	BUG_ON(!queued);
-
-out:
-	return err;
-}
-
-static int async_chainiv_postpone_request(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct async_chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	int err;
-
-	spin_lock_bh(&ctx->lock);
-	err = skcipher_enqueue_givcrypt(&ctx->queue, req);
-	spin_unlock_bh(&ctx->lock);
-
-	if (test_and_set_bit(CHAINIV_STATE_INUSE, &ctx->state))
-		return err;
-
-	ctx->err = err;
-	return async_chainiv_schedule_work(ctx);
-}
-
-static int async_chainiv_givencrypt_tail(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct async_chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req);
-	unsigned int ivsize = crypto_ablkcipher_ivsize(geniv);
-
-	memcpy(req->giv, ctx->iv, ivsize);
-	memcpy(subreq->info, ctx->iv, ivsize);
-
-	ctx->err = crypto_ablkcipher_encrypt(subreq);
-	if (ctx->err)
-		goto out;
-
-	memcpy(ctx->iv, subreq->info, ivsize);
-
-out:
-	return async_chainiv_schedule_work(ctx);
-}
-
-static int async_chainiv_givencrypt(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct async_chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req);
-
-	ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv));
-	ablkcipher_request_set_callback(subreq, req->creq.base.flags,
-					req->creq.base.complete,
-					req->creq.base.data);
-	ablkcipher_request_set_crypt(subreq, req->creq.src, req->creq.dst,
-				     req->creq.nbytes, req->creq.info);
-
-	if (test_and_set_bit(CHAINIV_STATE_INUSE, &ctx->state))
-		goto postpone;
-
-	if (ctx->queue.qlen) {
-		clear_bit(CHAINIV_STATE_INUSE, &ctx->state);
-		goto postpone;
-	}
-
-	return async_chainiv_givencrypt_tail(req);
-
-postpone:
-	return async_chainiv_postpone_request(req);
-}
-
-static void async_chainiv_do_postponed(struct work_struct *work)
-{
-	struct async_chainiv_ctx *ctx = container_of(work,
-						     struct async_chainiv_ctx,
-						     postponed);
-	struct skcipher_givcrypt_request *req;
-	struct ablkcipher_request *subreq;
-	int err;
-
-	/* Only handle one request at a time to avoid hogging keventd. */
-	spin_lock_bh(&ctx->lock);
-	req = skcipher_dequeue_givcrypt(&ctx->queue);
-	spin_unlock_bh(&ctx->lock);
-
-	if (!req) {
-		async_chainiv_schedule_work(ctx);
-		return;
-	}
-
-	subreq = skcipher_givcrypt_reqctx(req);
-	subreq->base.flags |= CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	err = async_chainiv_givencrypt_tail(req);
-
-	local_bh_disable();
-	skcipher_givcrypt_complete(req, err);
-	local_bh_enable();
-}
-
-static int async_chainiv_init(struct crypto_tfm *tfm)
-{
-	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
-	struct async_chainiv_ctx *ctx = crypto_tfm_ctx(tfm);
-	char *iv;
-
-	spin_lock_init(&ctx->lock);
-
-	crypto_init_queue(&ctx->queue, 100);
-	INIT_WORK(&ctx->postponed, async_chainiv_do_postponed);
-
-	iv = NULL;
-	if (!crypto_get_default_rng()) {
-		crypto_ablkcipher_crt(geniv)->givencrypt =
-			async_chainiv_givencrypt;
-		iv = ctx->iv;
-	}
-
-	return chainiv_init_common(tfm, iv);
-}
-
-static void async_chainiv_exit(struct crypto_tfm *tfm)
-{
-	struct async_chainiv_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	BUG_ON(test_bit(CHAINIV_STATE_INUSE, &ctx->state) || ctx->queue.qlen);
-
-	skcipher_geniv_exit(tfm);
-}
-
-static struct crypto_template chainiv_tmpl;
-
-static struct crypto_instance *chainiv_alloc(struct rtattr **tb)
-{
-	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
-
-	algt = crypto_get_attr_type(tb);
-	if (IS_ERR(algt))
-		return ERR_CAST(algt);
-
-	inst = skcipher_geniv_alloc(&chainiv_tmpl, tb, 0, 0);
-	if (IS_ERR(inst))
-		goto out;
-
-	inst->alg.cra_init = chainiv_init;
-	inst->alg.cra_exit = skcipher_geniv_exit;
-
-	inst->alg.cra_ctxsize = sizeof(struct chainiv_ctx);
-
-	if (!crypto_requires_sync(algt->type, algt->mask)) {
-		inst->alg.cra_flags |= CRYPTO_ALG_ASYNC;
-
-		inst->alg.cra_init = async_chainiv_init;
-		inst->alg.cra_exit = async_chainiv_exit;
-
-		inst->alg.cra_ctxsize = sizeof(struct async_chainiv_ctx);
-	}
-
-	inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize;
-
-out:
-	return inst;
-}
-
-static struct crypto_template chainiv_tmpl = {
-	.name = "chainiv",
-	.alloc = chainiv_alloc,
-	.free = skcipher_geniv_free,
-	.module = THIS_MODULE,
-};
-
-static int __init chainiv_module_init(void)
-{
-	return crypto_register_template(&chainiv_tmpl);
-}
-
-static void chainiv_module_exit(void)
-{
-	crypto_unregister_template(&chainiv_tmpl);
-}
-
-module_init(chainiv_module_init);
-module_exit(chainiv_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Chain IV Generator");
-MODULE_ALIAS_CRYPTO("chainiv");
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 7921251cdb13..cf8037a87b2d 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -22,6 +22,7 @@
 #include <crypto/internal/aead.h>
 #include <crypto/cryptd.h>
 #include <crypto/crypto_wq.h>
+#include <linux/atomic.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -31,7 +32,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-#define CRYPTD_MAX_CPU_QLEN 100
+#define CRYPTD_MAX_CPU_QLEN 1000
 
 struct cryptd_cpu_queue {
 	struct crypto_queue queue;
@@ -58,6 +59,7 @@ struct aead_instance_ctx {
 };
 
 struct cryptd_blkcipher_ctx {
+	atomic_t refcnt;
 	struct crypto_blkcipher *child;
 };
 
@@ -66,6 +68,7 @@ struct cryptd_blkcipher_request_ctx {
 };
 
 struct cryptd_hash_ctx {
+	atomic_t refcnt;
 	struct crypto_shash *child;
 };
 
@@ -75,6 +78,7 @@ struct cryptd_hash_request_ctx {
 };
 
 struct cryptd_aead_ctx {
+	atomic_t refcnt;
 	struct crypto_aead *child;
 };
 
@@ -118,11 +122,29 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
 {
 	int cpu, err;
 	struct cryptd_cpu_queue *cpu_queue;
+	struct crypto_tfm *tfm;
+	atomic_t *refcnt;
+	bool may_backlog;
 
 	cpu = get_cpu();
 	cpu_queue = this_cpu_ptr(queue->cpu_queue);
 	err = crypto_enqueue_request(&cpu_queue->queue, request);
+
+	refcnt = crypto_tfm_ctx(request->tfm);
+	may_backlog = request->flags & CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+	if (err == -EBUSY && !may_backlog)
+		goto out_put_cpu;
+
 	queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
+
+	if (!atomic_read(refcnt))
+		goto out_put_cpu;
+
+	tfm = request->tfm;
+	atomic_inc(refcnt);
+
+out_put_cpu:
 	put_cpu();
 
 	return err;
@@ -206,7 +228,10 @@ static void cryptd_blkcipher_crypt(struct ablkcipher_request *req,
 						unsigned int len))
 {
 	struct cryptd_blkcipher_request_ctx *rctx;
+	struct cryptd_blkcipher_ctx *ctx;
+	struct crypto_ablkcipher *tfm;
 	struct blkcipher_desc desc;
+	int refcnt;
 
 	rctx = ablkcipher_request_ctx(req);
 
@@ -222,9 +247,16 @@ static void cryptd_blkcipher_crypt(struct ablkcipher_request *req,
 	req->base.complete = rctx->complete;
 
 out:
+	tfm = crypto_ablkcipher_reqtfm(req);
+	ctx = crypto_ablkcipher_ctx(tfm);
+	refcnt = atomic_read(&ctx->refcnt);
+
 	local_bh_disable();
 	rctx->complete(&req->base, err);
 	local_bh_enable();
+
+	if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
+		crypto_free_ablkcipher(tfm);
 }
 
 static void cryptd_blkcipher_encrypt(struct crypto_async_request *req, int err)
@@ -456,6 +488,21 @@ static int cryptd_hash_enqueue(struct ahash_request *req,
 	return cryptd_enqueue_request(queue, &req->base);
 }
 
+static void cryptd_hash_complete(struct ahash_request *req, int err)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+	int refcnt = atomic_read(&ctx->refcnt);
+
+	local_bh_disable();
+	rctx->complete(&req->base, err);
+	local_bh_enable();
+
+	if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
+		crypto_free_ahash(tfm);
+}
+
 static void cryptd_hash_init(struct crypto_async_request *req_async, int err)
 {
 	struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
@@ -475,9 +522,7 @@ static void cryptd_hash_init(struct crypto_async_request *req_async, int err)
 	req->base.complete = rctx->complete;
 
 out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
+	cryptd_hash_complete(req, err);
 }
 
 static int cryptd_hash_init_enqueue(struct ahash_request *req)
@@ -500,9 +545,7 @@ static void cryptd_hash_update(struct crypto_async_request *req_async, int err)
 	req->base.complete = rctx->complete;
 
 out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
+	cryptd_hash_complete(req, err);
 }
 
 static int cryptd_hash_update_enqueue(struct ahash_request *req)
@@ -523,9 +566,7 @@ static void cryptd_hash_final(struct crypto_async_request *req_async, int err)
 	req->base.complete = rctx->complete;
 
 out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
+	cryptd_hash_complete(req, err);
 }
 
 static int cryptd_hash_final_enqueue(struct ahash_request *req)
@@ -546,9 +587,7 @@ static void cryptd_hash_finup(struct crypto_async_request *req_async, int err)
 	req->base.complete = rctx->complete;
 
 out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
+	cryptd_hash_complete(req, err);
 }
 
 static int cryptd_hash_finup_enqueue(struct ahash_request *req)
@@ -575,9 +614,7 @@ static void cryptd_hash_digest(struct crypto_async_request *req_async, int err)
 	req->base.complete = rctx->complete;
 
 out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
+	cryptd_hash_complete(req, err);
 }
 
 static int cryptd_hash_digest_enqueue(struct ahash_request *req)
@@ -688,7 +725,10 @@ static void cryptd_aead_crypt(struct aead_request *req,
 			int (*crypt)(struct aead_request *req))
 {
 	struct cryptd_aead_request_ctx *rctx;
+	struct cryptd_aead_ctx *ctx;
 	crypto_completion_t compl;
+	struct crypto_aead *tfm;
+	int refcnt;
 
 	rctx = aead_request_ctx(req);
 	compl = rctx->complete;
@@ -697,10 +737,18 @@ static void cryptd_aead_crypt(struct aead_request *req,
 		goto out;
 	aead_request_set_tfm(req, child);
 	err = crypt( req );
+
 out:
+	tfm = crypto_aead_reqtfm(req);
+	ctx = crypto_aead_ctx(tfm);
+	refcnt = atomic_read(&ctx->refcnt);
+
 	local_bh_disable();
 	compl(&req->base, err);
 	local_bh_enable();
+
+	if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
+		crypto_free_aead(tfm);
 }
 
 static void cryptd_aead_encrypt(struct crypto_async_request *areq, int err)
@@ -883,6 +931,7 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
 						  u32 type, u32 mask)
 {
 	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+	struct cryptd_blkcipher_ctx *ctx;
 	struct crypto_tfm *tfm;
 
 	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
@@ -899,6 +948,9 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
 		return ERR_PTR(-EINVAL);
 	}
 
+	ctx = crypto_tfm_ctx(tfm);
+	atomic_set(&ctx->refcnt, 1);
+
 	return __cryptd_ablkcipher_cast(__crypto_ablkcipher_cast(tfm));
 }
 EXPORT_SYMBOL_GPL(cryptd_alloc_ablkcipher);
@@ -910,9 +962,20 @@ struct crypto_blkcipher *cryptd_ablkcipher_child(struct cryptd_ablkcipher *tfm)
 }
 EXPORT_SYMBOL_GPL(cryptd_ablkcipher_child);
 
+bool cryptd_ablkcipher_queued(struct cryptd_ablkcipher *tfm)
+{
+	struct cryptd_blkcipher_ctx *ctx = crypto_ablkcipher_ctx(&tfm->base);
+
+	return atomic_read(&ctx->refcnt) - 1;
+}
+EXPORT_SYMBOL_GPL(cryptd_ablkcipher_queued);
+
 void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm)
 {
-	crypto_free_ablkcipher(&tfm->base);
+	struct cryptd_blkcipher_ctx *ctx = crypto_ablkcipher_ctx(&tfm->base);
+
+	if (atomic_dec_and_test(&ctx->refcnt))
+		crypto_free_ablkcipher(&tfm->base);
 }
 EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher);
 
@@ -920,6 +983,7 @@ struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
 					u32 type, u32 mask)
 {
 	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+	struct cryptd_hash_ctx *ctx;
 	struct crypto_ahash *tfm;
 
 	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
@@ -933,6 +997,9 @@ struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
 		return ERR_PTR(-EINVAL);
 	}
 
+	ctx = crypto_ahash_ctx(tfm);
+	atomic_set(&ctx->refcnt, 1);
+
 	return __cryptd_ahash_cast(tfm);
 }
 EXPORT_SYMBOL_GPL(cryptd_alloc_ahash);
@@ -952,9 +1019,20 @@ struct shash_desc *cryptd_shash_desc(struct ahash_request *req)
 }
 EXPORT_SYMBOL_GPL(cryptd_shash_desc);
 
+bool cryptd_ahash_queued(struct cryptd_ahash *tfm)
+{
+	struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
+
+	return atomic_read(&ctx->refcnt) - 1;
+}
+EXPORT_SYMBOL_GPL(cryptd_ahash_queued);
+
 void cryptd_free_ahash(struct cryptd_ahash *tfm)
 {
-	crypto_free_ahash(&tfm->base);
+	struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
+
+	if (atomic_dec_and_test(&ctx->refcnt))
+		crypto_free_ahash(&tfm->base);
 }
 EXPORT_SYMBOL_GPL(cryptd_free_ahash);
 
@@ -962,6 +1040,7 @@ struct cryptd_aead *cryptd_alloc_aead(const char *alg_name,
 						  u32 type, u32 mask)
 {
 	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+	struct cryptd_aead_ctx *ctx;
 	struct crypto_aead *tfm;
 
 	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
@@ -974,6 +1053,10 @@ struct cryptd_aead *cryptd_alloc_aead(const char *alg_name,
 		crypto_free_aead(tfm);
 		return ERR_PTR(-EINVAL);
 	}
+
+	ctx = crypto_aead_ctx(tfm);
+	atomic_set(&ctx->refcnt, 1);
+
 	return __cryptd_aead_cast(tfm);
 }
 EXPORT_SYMBOL_GPL(cryptd_alloc_aead);
@@ -986,9 +1069,20 @@ struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm)
 }
 EXPORT_SYMBOL_GPL(cryptd_aead_child);
 
+bool cryptd_aead_queued(struct cryptd_aead *tfm)
+{
+	struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base);
+
+	return atomic_read(&ctx->refcnt) - 1;
+}
+EXPORT_SYMBOL_GPL(cryptd_aead_queued);
+
 void cryptd_free_aead(struct cryptd_aead *tfm)
 {
-	crypto_free_aead(&tfm->base);
+	struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base);
+
+	if (atomic_dec_and_test(&ctx->refcnt))
+		crypto_free_aead(&tfm->base);
 }
 EXPORT_SYMBOL_GPL(cryptd_free_aead);
 
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 941c9a434d50..20ff2c746e0b 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -26,7 +26,7 @@
 #include <linux/string.h>
 
 static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
-static struct crypto_blkcipher *crypto_default_null_skcipher;
+static struct crypto_skcipher *crypto_default_null_skcipher;
 static int crypto_default_null_skcipher_refcnt;
 
 static int null_compress(struct crypto_tfm *tfm, const u8 *src,
@@ -153,15 +153,16 @@ MODULE_ALIAS_CRYPTO("compress_null");
 MODULE_ALIAS_CRYPTO("digest_null");
 MODULE_ALIAS_CRYPTO("cipher_null");
 
-struct crypto_blkcipher *crypto_get_default_null_skcipher(void)
+struct crypto_skcipher *crypto_get_default_null_skcipher(void)
 {
-	struct crypto_blkcipher *tfm;
+	struct crypto_skcipher *tfm;
 
 	mutex_lock(&crypto_default_null_skcipher_lock);
 	tfm = crypto_default_null_skcipher;
 
 	if (!tfm) {
-		tfm = crypto_alloc_blkcipher("ecb(cipher_null)", 0, 0);
+		tfm = crypto_alloc_skcipher("ecb(cipher_null)",
+					    0, CRYPTO_ALG_ASYNC);
 		if (IS_ERR(tfm))
 			goto unlock;
 
@@ -181,7 +182,7 @@ void crypto_put_default_null_skcipher(void)
 {
 	mutex_lock(&crypto_default_null_skcipher_lock);
 	if (!--crypto_default_null_skcipher_refcnt) {
-		crypto_free_blkcipher(crypto_default_null_skcipher);
+		crypto_free_skcipher(crypto_default_null_skcipher);
 		crypto_default_null_skcipher = NULL;
 	}
 	mutex_unlock(&crypto_default_null_skcipher_lock);
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 43fe85f20d57..1c5705481c69 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -28,6 +28,7 @@
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/rng.h>
 #include <crypto/akcipher.h>
+#include <crypto/kpp.h>
 
 #include "internal.h"
 
@@ -126,6 +127,21 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_kpp rkpp;
+
+	strncpy(rkpp.type, "kpp", sizeof(rkpp.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_KPP,
+		    sizeof(struct crypto_report_kpp), &rkpp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static int crypto_report_one(struct crypto_alg *alg,
 			     struct crypto_user_alg *ualg, struct sk_buff *skb)
 {
@@ -176,6 +192,10 @@ static int crypto_report_one(struct crypto_alg *alg,
 			goto nla_put_failure;
 
 		break;
+	case CRYPTO_ALG_TYPE_KPP:
+		if (crypto_report_kpp(skb, alg))
+			goto nla_put_failure;
+		break;
 	}
 
 out:
@@ -358,32 +378,6 @@ drop_alg:
 	return err;
 }
 
-static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type,
-						   u32 mask)
-{
-	int err;
-	struct crypto_alg *alg;
-
-	type = crypto_skcipher_type(type);
-	mask = crypto_skcipher_mask(mask);
-
-	for (;;) {
-		alg = crypto_lookup_skcipher(name,  type, mask);
-		if (!IS_ERR(alg))
-			return alg;
-
-		err = PTR_ERR(alg);
-		if (err != -EAGAIN)
-			break;
-		if (fatal_signal_pending(current)) {
-			err = -EINTR;
-			break;
-		}
-	}
-
-	return ERR_PTR(err);
-}
-
 static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			  struct nlattr **attrs)
 {
@@ -416,16 +410,7 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	else
 		name = p->cru_name;
 
-	switch (p->cru_type & p->cru_mask & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_GIVCIPHER:
-	case CRYPTO_ALG_TYPE_BLKCIPHER:
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		alg = crypto_user_skcipher_alg(name, p->cru_type, p->cru_mask);
-		break;
-	default:
-		alg = crypto_alg_mod_lookup(name, p->cru_type, p->cru_mask);
-	}
-
+	alg = crypto_alg_mod_lookup(name, p->cru_type, p->cru_mask);
 	if (IS_ERR(alg))
 		return PTR_ERR(alg);
 
@@ -455,6 +440,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
 };
 
diff --git a/crypto/ctr.c b/crypto/ctr.c
index 2386f7313952..ff4d21eddb83 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -26,13 +26,13 @@ struct crypto_ctr_ctx {
 };
 
 struct crypto_rfc3686_ctx {
-	struct crypto_ablkcipher *child;
+	struct crypto_skcipher *child;
 	u8 nonce[CTR_RFC3686_NONCE_SIZE];
 };
 
 struct crypto_rfc3686_req_ctx {
 	u8 iv[CTR_RFC3686_BLOCK_SIZE];
-	struct ablkcipher_request subreq CRYPTO_MINALIGN_ATTR;
+	struct skcipher_request subreq CRYPTO_MINALIGN_ATTR;
 };
 
 static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
@@ -249,11 +249,11 @@ static struct crypto_template crypto_ctr_tmpl = {
 	.module = THIS_MODULE,
 };
 
-static int crypto_rfc3686_setkey(struct crypto_ablkcipher *parent,
+static int crypto_rfc3686_setkey(struct crypto_skcipher *parent,
 				 const u8 *key, unsigned int keylen)
 {
-	struct crypto_rfc3686_ctx *ctx = crypto_ablkcipher_ctx(parent);
-	struct crypto_ablkcipher *child = ctx->child;
+	struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child = ctx->child;
 	int err;
 
 	/* the nonce is stored in bytes at end of key */
@@ -265,173 +265,178 @@ static int crypto_rfc3686_setkey(struct crypto_ablkcipher *parent,
 
 	keylen -= CTR_RFC3686_NONCE_SIZE;
 
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(parent) &
-				    CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, keylen);
-	crypto_ablkcipher_set_flags(parent, crypto_ablkcipher_get_flags(child) &
-				    CRYPTO_TFM_RES_MASK);
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
 
 	return err;
 }
 
-static int crypto_rfc3686_crypt(struct ablkcipher_request *req)
+static int crypto_rfc3686_crypt(struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct crypto_rfc3686_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = ctx->child;
-	unsigned long align = crypto_ablkcipher_alignmask(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *child = ctx->child;
+	unsigned long align = crypto_skcipher_alignmask(tfm);
 	struct crypto_rfc3686_req_ctx *rctx =
-		(void *)PTR_ALIGN((u8 *)ablkcipher_request_ctx(req), align + 1);
-	struct ablkcipher_request *subreq = &rctx->subreq;
+		(void *)PTR_ALIGN((u8 *)skcipher_request_ctx(req), align + 1);
+	struct skcipher_request *subreq = &rctx->subreq;
 	u8 *iv = rctx->iv;
 
 	/* set up counter block */
 	memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE);
-	memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->info, CTR_RFC3686_IV_SIZE);
+	memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->iv, CTR_RFC3686_IV_SIZE);
 
 	/* initialize counter portion of counter block */
 	*(__be32 *)(iv + CTR_RFC3686_NONCE_SIZE + CTR_RFC3686_IV_SIZE) =
 		cpu_to_be32(1);
 
-	ablkcipher_request_set_tfm(subreq, child);
-	ablkcipher_request_set_callback(subreq, req->base.flags,
-					req->base.complete, req->base.data);
-	ablkcipher_request_set_crypt(subreq, req->src, req->dst, req->nbytes,
-				     iv);
+	skcipher_request_set_tfm(subreq, child);
+	skcipher_request_set_callback(subreq, req->base.flags,
+				      req->base.complete, req->base.data);
+	skcipher_request_set_crypt(subreq, req->src, req->dst,
+				   req->cryptlen, iv);
 
-	return crypto_ablkcipher_encrypt(subreq);
+	return crypto_skcipher_encrypt(subreq);
 }
 
-static int crypto_rfc3686_init_tfm(struct crypto_tfm *tfm)
+static int crypto_rfc3686_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_skcipher_spawn *spawn = crypto_instance_ctx(inst);
-	struct crypto_rfc3686_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_ablkcipher *cipher;
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *cipher;
 	unsigned long align;
+	unsigned int reqsize;
 
-	cipher = crypto_spawn_skcipher(spawn);
+	cipher = crypto_spawn_skcipher2(spawn);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
 	ctx->child = cipher;
 
-	align = crypto_tfm_alg_alignmask(tfm);
+	align = crypto_skcipher_alignmask(tfm);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_ablkcipher.reqsize = align +
-		sizeof(struct crypto_rfc3686_req_ctx) +
-		crypto_ablkcipher_reqsize(cipher);
+	reqsize = align + sizeof(struct crypto_rfc3686_req_ctx) +
+		  crypto_skcipher_reqsize(cipher);
+	crypto_skcipher_set_reqsize(tfm, reqsize);
 
 	return 0;
 }
 
-static void crypto_rfc3686_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_rfc3686_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_rfc3686_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(ctx->child);
+}
 
-	crypto_free_ablkcipher(ctx->child);
+static void crypto_rfc3686_free(struct skcipher_instance *inst)
+{
+	struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst);
+
+	crypto_drop_skcipher(spawn);
+	kfree(inst);
 }
 
-static struct crypto_instance *crypto_rfc3686_alloc(struct rtattr **tb)
+static int crypto_rfc3686_create(struct crypto_template *tmpl,
+				 struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
+	struct skcipher_instance *inst;
+	struct skcipher_alg *alg;
 	struct crypto_skcipher_spawn *spawn;
 	const char *cipher_name;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
-		return ERR_CAST(algt);
+		return PTR_ERR(algt);
 
-	if ((algt->type ^ CRYPTO_ALG_TYPE_BLKCIPHER) & algt->mask)
-		return ERR_PTR(-EINVAL);
+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+		return -EINVAL;
 
 	cipher_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(cipher_name))
-		return ERR_CAST(cipher_name);
+		return PTR_ERR(cipher_name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
 	if (!inst)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	spawn = crypto_instance_ctx(inst);
+	spawn = skcipher_instance_ctx(inst);
 
-	crypto_set_skcipher_spawn(spawn, inst);
-	err = crypto_grab_skcipher(spawn, cipher_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
+	err = crypto_grab_skcipher2(spawn, cipher_name, 0,
+				    crypto_requires_sync(algt->type,
+							 algt->mask));
 	if (err)
 		goto err_free_inst;
 
-	alg = crypto_skcipher_spawn_alg(spawn);
+	alg = crypto_spawn_skcipher_alg(spawn);
 
 	/* We only support 16-byte blocks. */
 	err = -EINVAL;
-	if (alg->cra_ablkcipher.ivsize != CTR_RFC3686_BLOCK_SIZE)
+	if (crypto_skcipher_alg_ivsize(alg) != CTR_RFC3686_BLOCK_SIZE)
 		goto err_drop_spawn;
 
 	/* Not a stream cipher? */
-	if (alg->cra_blocksize != 1)
+	if (alg->base.cra_blocksize != 1)
 		goto err_drop_spawn;
 
 	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "rfc3686(%s)",
-		     alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "rfc3686(%s)", alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
 		goto err_drop_spawn;
-	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "rfc3686(%s)", alg->cra_driver_name) >=
-			CRYPTO_MAX_ALG_NAME)
+	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "rfc3686(%s)", alg->base.cra_driver_name) >=
+	    CRYPTO_MAX_ALG_NAME)
 		goto err_drop_spawn;
 
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = 1;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = 1;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-			      (alg->cra_flags & CRYPTO_ALG_ASYNC);
-	inst->alg.cra_type = &crypto_ablkcipher_type;
+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
 
-	inst->alg.cra_ablkcipher.ivsize = CTR_RFC3686_IV_SIZE;
-	inst->alg.cra_ablkcipher.min_keysize =
-		alg->cra_ablkcipher.min_keysize + CTR_RFC3686_NONCE_SIZE;
-	inst->alg.cra_ablkcipher.max_keysize =
-		alg->cra_ablkcipher.max_keysize + CTR_RFC3686_NONCE_SIZE;
+	inst->alg.ivsize = CTR_RFC3686_IV_SIZE;
+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
+				CTR_RFC3686_NONCE_SIZE;
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) +
+				CTR_RFC3686_NONCE_SIZE;
 
-	inst->alg.cra_ablkcipher.geniv = "seqiv";
+	inst->alg.setkey = crypto_rfc3686_setkey;
+	inst->alg.encrypt = crypto_rfc3686_crypt;
+	inst->alg.decrypt = crypto_rfc3686_crypt;
 
-	inst->alg.cra_ablkcipher.setkey = crypto_rfc3686_setkey;
-	inst->alg.cra_ablkcipher.encrypt = crypto_rfc3686_crypt;
-	inst->alg.cra_ablkcipher.decrypt = crypto_rfc3686_crypt;
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc3686_ctx);
 
-	inst->alg.cra_ctxsize = sizeof(struct crypto_rfc3686_ctx);
+	inst->alg.init = crypto_rfc3686_init_tfm;
+	inst->alg.exit = crypto_rfc3686_exit_tfm;
 
-	inst->alg.cra_init = crypto_rfc3686_init_tfm;
-	inst->alg.cra_exit = crypto_rfc3686_exit_tfm;
+	inst->free = crypto_rfc3686_free;
 
-	return inst;
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
 
 err_drop_spawn:
 	crypto_drop_skcipher(spawn);
 err_free_inst:
 	kfree(inst);
-	return ERR_PTR(err);
-}
-
-static void crypto_rfc3686_free(struct crypto_instance *inst)
-{
-	struct crypto_skcipher_spawn *spawn = crypto_instance_ctx(inst);
-
-	crypto_drop_skcipher(spawn);
-	kfree(inst);
+	goto out;
 }
 
 static struct crypto_template crypto_rfc3686_tmpl = {
 	.name = "rfc3686",
-	.alloc = crypto_rfc3686_alloc,
-	.free = crypto_rfc3686_free,
+	.create = crypto_rfc3686_create,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/cts.c b/crypto/cts.c
index e467ec0acf9f..51976187b2bf 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -40,7 +40,7 @@
  * rfc3962 includes errata information in its Appendix A.
  */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -51,289 +51,364 @@
 #include <linux/slab.h>
 
 struct crypto_cts_ctx {
-	struct crypto_blkcipher *child;
+	struct crypto_skcipher *child;
 };
 
-static int crypto_cts_setkey(struct crypto_tfm *parent, const u8 *key,
-			     unsigned int keylen)
+struct crypto_cts_reqctx {
+	struct scatterlist sg[2];
+	unsigned offset;
+	struct skcipher_request subreq;
+};
+
+static inline u8 *crypto_cts_reqctx_space(struct skcipher_request *req)
 {
-	struct crypto_cts_ctx *ctx = crypto_tfm_ctx(parent);
-	struct crypto_blkcipher *child = ctx->child;
-	int err;
+	struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *child = ctx->child;
 
-	crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
-				       CRYPTO_TFM_REQ_MASK);
-	err = crypto_blkcipher_setkey(child, key, keylen);
-	crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
-				     CRYPTO_TFM_RES_MASK);
-	return err;
+	return PTR_ALIGN((u8 *)(rctx + 1) + crypto_skcipher_reqsize(child),
+			 crypto_skcipher_alignmask(tfm) + 1);
 }
 
-static int cts_cbc_encrypt(struct crypto_cts_ctx *ctx,
-			   struct blkcipher_desc *desc,
-			   struct scatterlist *dst,
-			   struct scatterlist *src,
-			   unsigned int offset,
-			   unsigned int nbytes)
+static int crypto_cts_setkey(struct crypto_skcipher *parent, const u8 *key,
+			     unsigned int keylen)
 {
-	int bsize = crypto_blkcipher_blocksize(desc->tfm);
-	u8 tmp[bsize], tmp2[bsize];
-	struct blkcipher_desc lcldesc;
-	struct scatterlist sgsrc[1], sgdst[1];
-	int lastn = nbytes - bsize;
-	u8 iv[bsize];
-	u8 s[bsize * 2], d[bsize * 2];
+	struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child = ctx->child;
 	int err;
 
-	if (lastn < 0)
-		return -EINVAL;
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
+	return err;
+}
 
-	sg_init_table(sgsrc, 1);
-	sg_init_table(sgdst, 1);
+static void cts_cbc_crypt_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
 
-	memset(s, 0, sizeof(s));
-	scatterwalk_map_and_copy(s, src, offset, nbytes, 0);
+	if (err == -EINPROGRESS)
+		return;
 
-	memcpy(iv, desc->info, bsize);
+	skcipher_request_complete(req, err);
+}
 
-	lcldesc.tfm = ctx->child;
-	lcldesc.info = iv;
-	lcldesc.flags = desc->flags;
+static int cts_cbc_encrypt(struct skcipher_request *req)
+{
+	struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_request *subreq = &rctx->subreq;
+	int bsize = crypto_skcipher_blocksize(tfm);
+	u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32))));
+	struct scatterlist *sg;
+	unsigned int offset;
+	int lastn;
+
+	offset = rctx->offset;
+	lastn = req->cryptlen - offset;
+
+	sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - bsize);
+	scatterwalk_map_and_copy(d + bsize, sg, 0, bsize, 0);
+
+	memset(d, 0, bsize);
+	scatterwalk_map_and_copy(d, req->src, offset, lastn, 0);
+
+	scatterwalk_map_and_copy(d, sg, 0, bsize + lastn, 1);
+	memzero_explicit(d, sizeof(d));
+
+	skcipher_request_set_callback(subreq, req->base.flags &
+					      CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      cts_cbc_crypt_done, req);
+	skcipher_request_set_crypt(subreq, sg, sg, bsize, req->iv);
+	return crypto_skcipher_encrypt(subreq);
+}
 
-	sg_set_buf(&sgsrc[0], s, bsize);
-	sg_set_buf(&sgdst[0], tmp, bsize);
-	err = crypto_blkcipher_encrypt_iv(&lcldesc, sgdst, sgsrc, bsize);
+static void crypto_cts_encrypt_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
 
-	memcpy(d + bsize, tmp, lastn);
+	if (err)
+		goto out;
 
-	lcldesc.info = tmp;
+	err = cts_cbc_encrypt(req);
+	if (err == -EINPROGRESS ||
+	    (err == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return;
 
-	sg_set_buf(&sgsrc[0], s + bsize, bsize);
-	sg_set_buf(&sgdst[0], tmp2, bsize);
-	err = crypto_blkcipher_encrypt_iv(&lcldesc, sgdst, sgsrc, bsize);
+out:
+	skcipher_request_complete(req, err);
+}
 
-	memcpy(d, tmp2, bsize);
+static int crypto_cts_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_request *subreq = &rctx->subreq;
+	int bsize = crypto_skcipher_blocksize(tfm);
+	unsigned int nbytes = req->cryptlen;
+	int cbc_blocks = (nbytes + bsize - 1) / bsize - 1;
+	unsigned int offset;
+
+	skcipher_request_set_tfm(subreq, ctx->child);
+
+	if (cbc_blocks <= 0) {
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      req->base.complete,
+					      req->base.data);
+		skcipher_request_set_crypt(subreq, req->src, req->dst, nbytes,
+					   req->iv);
+		return crypto_skcipher_encrypt(subreq);
+	}
 
-	scatterwalk_map_and_copy(d, dst, offset, nbytes, 1);
+	offset = cbc_blocks * bsize;
+	rctx->offset = offset;
 
-	memcpy(desc->info, tmp2, bsize);
+	skcipher_request_set_callback(subreq, req->base.flags,
+				      crypto_cts_encrypt_done, req);
+	skcipher_request_set_crypt(subreq, req->src, req->dst,
+				   offset, req->iv);
 
-	return err;
+	return crypto_skcipher_encrypt(subreq) ?:
+	       cts_cbc_encrypt(req);
 }
 
-static int crypto_cts_encrypt(struct blkcipher_desc *desc,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
+static int cts_cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_cts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	int bsize = crypto_blkcipher_blocksize(desc->tfm);
-	int tot_blocks = (nbytes + bsize - 1) / bsize;
-	int cbc_blocks = tot_blocks > 2 ? tot_blocks - 2 : 0;
-	struct blkcipher_desc lcldesc;
-	int err;
+	struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_request *subreq = &rctx->subreq;
+	int bsize = crypto_skcipher_blocksize(tfm);
+	u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32))));
+	struct scatterlist *sg;
+	unsigned int offset;
+	u8 *space;
+	int lastn;
+
+	offset = rctx->offset;
+	lastn = req->cryptlen - offset;
+
+	sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - bsize);
+
+	/* 1. Decrypt Cn-1 (s) to create Dn */
+	scatterwalk_map_and_copy(d + bsize, sg, 0, bsize, 0);
+	space = crypto_cts_reqctx_space(req);
+	crypto_xor(d + bsize, space, bsize);
+	/* 2. Pad Cn with zeros at the end to create C of length BB */
+	memset(d, 0, bsize);
+	scatterwalk_map_and_copy(d, req->src, offset, lastn, 0);
+	/* 3. Exclusive-or Dn with C to create Xn */
+	/* 4. Select the first Ln bytes of Xn to create Pn */
+	crypto_xor(d + bsize, d, lastn);
+
+	/* 5. Append the tail (BB - Ln) bytes of Xn to Cn to create En */
+	memcpy(d + lastn, d + bsize + lastn, bsize - lastn);
+	/* 6. Decrypt En to create Pn-1 */
 
-	lcldesc.tfm = ctx->child;
-	lcldesc.info = desc->info;
-	lcldesc.flags = desc->flags;
-
-	if (tot_blocks == 1) {
-		err = crypto_blkcipher_encrypt_iv(&lcldesc, dst, src, bsize);
-	} else if (nbytes <= bsize * 2) {
-		err = cts_cbc_encrypt(ctx, desc, dst, src, 0, nbytes);
-	} else {
-		/* do normal function for tot_blocks - 2 */
-		err = crypto_blkcipher_encrypt_iv(&lcldesc, dst, src,
-							cbc_blocks * bsize);
-		if (err == 0) {
-			/* do cts for final two blocks */
-			err = cts_cbc_encrypt(ctx, desc, dst, src,
-						cbc_blocks * bsize,
-						nbytes - (cbc_blocks * bsize));
-		}
-	}
+	scatterwalk_map_and_copy(d, sg, 0, bsize + lastn, 1);
+	memzero_explicit(d, sizeof(d));
 
-	return err;
+	skcipher_request_set_callback(subreq, req->base.flags &
+					      CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      cts_cbc_crypt_done, req);
+
+	skcipher_request_set_crypt(subreq, sg, sg, bsize, space);
+	return crypto_skcipher_decrypt(subreq);
 }
 
-static int cts_cbc_decrypt(struct crypto_cts_ctx *ctx,
-			   struct blkcipher_desc *desc,
-			   struct scatterlist *dst,
-			   struct scatterlist *src,
-			   unsigned int offset,
-			   unsigned int nbytes)
+static void crypto_cts_decrypt_done(struct crypto_async_request *areq, int err)
 {
-	int bsize = crypto_blkcipher_blocksize(desc->tfm);
-	u8 tmp[bsize];
-	struct blkcipher_desc lcldesc;
-	struct scatterlist sgsrc[1], sgdst[1];
-	int lastn = nbytes - bsize;
-	u8 iv[bsize];
-	u8 s[bsize * 2], d[bsize * 2];
-	int err;
-
-	if (lastn < 0)
-		return -EINVAL;
+	struct skcipher_request *req = areq->data;
 
-	sg_init_table(sgsrc, 1);
-	sg_init_table(sgdst, 1);
+	if (err)
+		goto out;
 
-	scatterwalk_map_and_copy(s, src, offset, nbytes, 0);
+	err = cts_cbc_decrypt(req);
+	if (err == -EINPROGRESS ||
+	    (err == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return;
 
-	lcldesc.tfm = ctx->child;
-	lcldesc.info = iv;
-	lcldesc.flags = desc->flags;
+out:
+	skcipher_request_complete(req, err);
+}
 
-	/* 1. Decrypt Cn-1 (s) to create Dn (tmp)*/
-	memset(iv, 0, sizeof(iv));
-	sg_set_buf(&sgsrc[0], s, bsize);
-	sg_set_buf(&sgdst[0], tmp, bsize);
-	err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize);
-	if (err)
-		return err;
-	/* 2. Pad Cn with zeros at the end to create C of length BB */
-	memset(iv, 0, sizeof(iv));
-	memcpy(iv, s + bsize, lastn);
-	/* 3. Exclusive-or Dn (tmp) with C (iv) to create Xn (tmp) */
-	crypto_xor(tmp, iv, bsize);
-	/* 4. Select the first Ln bytes of Xn (tmp) to create Pn */
-	memcpy(d + bsize, tmp, lastn);
-
-	/* 5. Append the tail (BB - Ln) bytes of Xn (tmp) to Cn to create En */
-	memcpy(s + bsize + lastn, tmp + lastn, bsize - lastn);
-	/* 6. Decrypt En to create Pn-1 */
-	memzero_explicit(iv, sizeof(iv));
+static int crypto_cts_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_request *subreq = &rctx->subreq;
+	int bsize = crypto_skcipher_blocksize(tfm);
+	unsigned int nbytes = req->cryptlen;
+	int cbc_blocks = (nbytes + bsize - 1) / bsize - 1;
+	unsigned int offset;
+	u8 *space;
+
+	skcipher_request_set_tfm(subreq, ctx->child);
+
+	if (cbc_blocks <= 0) {
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      req->base.complete,
+					      req->base.data);
+		skcipher_request_set_crypt(subreq, req->src, req->dst, nbytes,
+					   req->iv);
+		return crypto_skcipher_decrypt(subreq);
+	}
 
-	sg_set_buf(&sgsrc[0], s + bsize, bsize);
-	sg_set_buf(&sgdst[0], d, bsize);
-	err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize);
+	skcipher_request_set_callback(subreq, req->base.flags,
+				      crypto_cts_decrypt_done, req);
 
-	/* XOR with previous block */
-	crypto_xor(d, desc->info, bsize);
+	space = crypto_cts_reqctx_space(req);
 
-	scatterwalk_map_and_copy(d, dst, offset, nbytes, 1);
+	offset = cbc_blocks * bsize;
+	rctx->offset = offset;
 
-	memcpy(desc->info, s, bsize);
-	return err;
-}
+	if (cbc_blocks <= 1)
+		memcpy(space, req->iv, bsize);
+	else
+		scatterwalk_map_and_copy(space, req->src, offset - 2 * bsize,
+					 bsize, 0);
 
-static int crypto_cts_decrypt(struct blkcipher_desc *desc,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
-{
-	struct crypto_cts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	int bsize = crypto_blkcipher_blocksize(desc->tfm);
-	int tot_blocks = (nbytes + bsize - 1) / bsize;
-	int cbc_blocks = tot_blocks > 2 ? tot_blocks - 2 : 0;
-	struct blkcipher_desc lcldesc;
-	int err;
+	skcipher_request_set_crypt(subreq, req->src, req->dst,
+				   offset, req->iv);
 
-	lcldesc.tfm = ctx->child;
-	lcldesc.info = desc->info;
-	lcldesc.flags = desc->flags;
-
-	if (tot_blocks == 1) {
-		err = crypto_blkcipher_decrypt_iv(&lcldesc, dst, src, bsize);
-	} else if (nbytes <= bsize * 2) {
-		err = cts_cbc_decrypt(ctx, desc, dst, src, 0, nbytes);
-	} else {
-		/* do normal function for tot_blocks - 2 */
-		err = crypto_blkcipher_decrypt_iv(&lcldesc, dst, src,
-							cbc_blocks * bsize);
-		if (err == 0) {
-			/* do cts for final two blocks */
-			err = cts_cbc_decrypt(ctx, desc, dst, src,
-						cbc_blocks * bsize,
-						nbytes - (cbc_blocks * bsize));
-		}
-	}
-	return err;
+	return crypto_skcipher_decrypt(subreq) ?:
+	       cts_cbc_decrypt(req);
 }
 
-static int crypto_cts_init_tfm(struct crypto_tfm *tfm)
+static int crypto_cts_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct crypto_cts_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_blkcipher *cipher;
-
-	cipher = crypto_spawn_blkcipher(spawn);
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *cipher;
+	unsigned reqsize;
+	unsigned bsize;
+	unsigned align;
+
+	cipher = crypto_spawn_skcipher2(spawn);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
 	ctx->child = cipher;
+
+	align = crypto_skcipher_alignmask(tfm);
+	bsize = crypto_skcipher_blocksize(cipher);
+	reqsize = ALIGN(sizeof(struct crypto_cts_reqctx) +
+			crypto_skcipher_reqsize(cipher),
+			crypto_tfm_ctx_alignment()) +
+		  (align & ~(crypto_tfm_ctx_alignment() - 1)) + bsize;
+
+	crypto_skcipher_set_reqsize(tfm, reqsize);
+
 	return 0;
 }
 
-static void crypto_cts_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_cts_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_cts_ctx *ctx = crypto_tfm_ctx(tfm);
-	crypto_free_blkcipher(ctx->child);
+	struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(ctx->child);
 }
 
-static struct crypto_instance *crypto_cts_alloc(struct rtattr **tb)
+static void crypto_cts_free(struct skcipher_instance *inst)
 {
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct crypto_skcipher_spawn *spawn;
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct skcipher_alg *alg;
+	const char *cipher_name;
 	int err;
 
-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return PTR_ERR(algt);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+		return -EINVAL;
+
+	cipher_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(cipher_name))
+		return PTR_ERR(cipher_name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	spawn = skcipher_instance_ctx(inst);
+
+	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
+	err = crypto_grab_skcipher2(spawn, cipher_name, 0,
+				    crypto_requires_sync(algt->type,
+							 algt->mask));
 	if (err)
-		return ERR_PTR(err);
+		goto err_free_inst;
 
-	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_BLKCIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return ERR_CAST(alg);
+	alg = crypto_spawn_skcipher_alg(spawn);
 
-	inst = ERR_PTR(-EINVAL);
-	if (!is_power_of_2(alg->cra_blocksize))
-		goto out_put_alg;
+	err = -EINVAL;
+	if (crypto_skcipher_alg_ivsize(alg) != alg->base.cra_blocksize)
+		goto err_drop_spawn;
 
-	if (strncmp(alg->cra_name, "cbc(", 4))
-		goto out_put_alg;
+	if (strncmp(alg->base.cra_name, "cbc(", 4))
+		goto err_drop_spawn;
 
-	inst = crypto_alloc_instance("cts", alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "cts",
+				  &alg->base);
+	if (err)
+		goto err_drop_spawn;
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_blkcipher_type;
+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
 
 	/* We access the data as u32s when xoring. */
-	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
 
-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-	inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
-	inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
+	inst->alg.ivsize = alg->base.cra_blocksize;
+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
 
-	inst->alg.cra_ctxsize = sizeof(struct crypto_cts_ctx);
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_cts_ctx);
 
-	inst->alg.cra_init = crypto_cts_init_tfm;
-	inst->alg.cra_exit = crypto_cts_exit_tfm;
+	inst->alg.init = crypto_cts_init_tfm;
+	inst->alg.exit = crypto_cts_exit_tfm;
 
-	inst->alg.cra_blkcipher.setkey = crypto_cts_setkey;
-	inst->alg.cra_blkcipher.encrypt = crypto_cts_encrypt;
-	inst->alg.cra_blkcipher.decrypt = crypto_cts_decrypt;
+	inst->alg.setkey = crypto_cts_setkey;
+	inst->alg.encrypt = crypto_cts_encrypt;
+	inst->alg.decrypt = crypto_cts_decrypt;
 
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
-}
+	inst->free = crypto_cts_free;
 
-static void crypto_cts_free(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_skcipher(spawn);
+err_free_inst:
 	kfree(inst);
+	goto out;
 }
 
 static struct crypto_template crypto_cts_tmpl = {
 	.name = "cts",
-	.alloc = crypto_cts_alloc,
-	.free = crypto_cts_free,
+	.create = crypto_cts_create,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/dh.c b/crypto/dh.c
new file mode 100644
index 000000000000..9d19360e7189
--- /dev/null
+++ b/crypto/dh.c
@@ -0,0 +1,189 @@
+/*  Diffie-Hellman Key Agreement Method [RFC2631]
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/kpp.h>
+#include <crypto/dh.h>
+#include <linux/mpi.h>
+
+struct dh_ctx {
+	MPI p;
+	MPI g;
+	MPI xa;
+};
+
+static inline void dh_clear_params(struct dh_ctx *ctx)
+{
+	mpi_free(ctx->p);
+	mpi_free(ctx->g);
+	ctx->p = NULL;
+	ctx->g = NULL;
+}
+
+static void dh_free_ctx(struct dh_ctx *ctx)
+{
+	dh_clear_params(ctx);
+	mpi_free(ctx->xa);
+	ctx->xa = NULL;
+}
+
+/*
+ * If base is g we compute the public key
+ *	ya = g^xa mod p; [RFC2631 sec 2.1.1]
+ * else if base if the counterpart public key we compute the shared secret
+ *	ZZ = yb^xa mod p; [RFC2631 sec 2.1.1]
+ */
+static int _compute_val(const struct dh_ctx *ctx, MPI base, MPI val)
+{
+	/* val = base^xa mod p */
+	return mpi_powm(val, base, ctx->xa, ctx->p);
+}
+
+static inline struct dh_ctx *dh_get_ctx(struct crypto_kpp *tfm)
+{
+	return kpp_tfm_ctx(tfm);
+}
+
+static int dh_check_params_length(unsigned int p_len)
+{
+	return (p_len < 1536) ? -EINVAL : 0;
+}
+
+static int dh_set_params(struct dh_ctx *ctx, struct dh *params)
+{
+	if (unlikely(!params->p || !params->g))
+		return -EINVAL;
+
+	if (dh_check_params_length(params->p_size << 3))
+		return -EINVAL;
+
+	ctx->p = mpi_read_raw_data(params->p, params->p_size);
+	if (!ctx->p)
+		return -EINVAL;
+
+	ctx->g = mpi_read_raw_data(params->g, params->g_size);
+	if (!ctx->g) {
+		mpi_free(ctx->p);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dh_set_secret(struct crypto_kpp *tfm, void *buf, unsigned int len)
+{
+	struct dh_ctx *ctx = dh_get_ctx(tfm);
+	struct dh params;
+
+	if (crypto_dh_decode_key(buf, len, &params) < 0)
+		return -EINVAL;
+
+	if (dh_set_params(ctx, &params) < 0)
+		return -EINVAL;
+
+	ctx->xa = mpi_read_raw_data(params.key, params.key_size);
+	if (!ctx->xa) {
+		dh_clear_params(ctx);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dh_compute_value(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct dh_ctx *ctx = dh_get_ctx(tfm);
+	MPI base, val = mpi_alloc(0);
+	int ret = 0;
+	int sign;
+
+	if (!val)
+		return -ENOMEM;
+
+	if (unlikely(!ctx->xa)) {
+		ret = -EINVAL;
+		goto err_free_val;
+	}
+
+	if (req->src) {
+		base = mpi_read_raw_from_sgl(req->src, req->src_len);
+		if (!base) {
+			ret = EINVAL;
+			goto err_free_val;
+		}
+	} else {
+		base = ctx->g;
+	}
+
+	ret = _compute_val(ctx, base, val);
+	if (ret)
+		goto err_free_base;
+
+	ret = mpi_write_to_sgl(val, req->dst, req->dst_len, &sign);
+	if (ret)
+		goto err_free_base;
+
+	if (sign < 0)
+		ret = -EBADMSG;
+err_free_base:
+	if (req->src)
+		mpi_free(base);
+err_free_val:
+	mpi_free(val);
+	return ret;
+}
+
+static int dh_max_size(struct crypto_kpp *tfm)
+{
+	struct dh_ctx *ctx = dh_get_ctx(tfm);
+
+	return mpi_get_size(ctx->p);
+}
+
+static void dh_exit_tfm(struct crypto_kpp *tfm)
+{
+	struct dh_ctx *ctx = dh_get_ctx(tfm);
+
+	dh_free_ctx(ctx);
+}
+
+static struct kpp_alg dh = {
+	.set_secret = dh_set_secret,
+	.generate_public_key = dh_compute_value,
+	.compute_shared_secret = dh_compute_value,
+	.max_size = dh_max_size,
+	.exit = dh_exit_tfm,
+	.base = {
+		.cra_name = "dh",
+		.cra_driver_name = "dh-generic",
+		.cra_priority = 100,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct dh_ctx),
+	},
+};
+
+static int dh_init(void)
+{
+	return crypto_register_kpp(&dh);
+}
+
+static void dh_exit(void)
+{
+	crypto_unregister_kpp(&dh);
+}
+
+module_init(dh_init);
+module_exit(dh_exit);
+MODULE_ALIAS_CRYPTO("dh");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DH generic algorithm");
diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
new file mode 100644
index 000000000000..02db76b20d00
--- /dev/null
+++ b/crypto/dh_helper.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <crypto/dh.h>
+#include <crypto/kpp.h>
+
+#define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 3 * sizeof(int))
+
+static inline u8 *dh_pack_data(void *dst, const void *src, size_t size)
+{
+	memcpy(dst, src, size);
+	return dst + size;
+}
+
+static inline const u8 *dh_unpack_data(void *dst, const void *src, size_t size)
+{
+	memcpy(dst, src, size);
+	return src + size;
+}
+
+static inline int dh_data_size(const struct dh *p)
+{
+	return p->key_size + p->p_size + p->g_size;
+}
+
+int crypto_dh_key_len(const struct dh *p)
+{
+	return DH_KPP_SECRET_MIN_SIZE + dh_data_size(p);
+}
+EXPORT_SYMBOL_GPL(crypto_dh_key_len);
+
+int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params)
+{
+	u8 *ptr = buf;
+	struct kpp_secret secret = {
+		.type = CRYPTO_KPP_SECRET_TYPE_DH,
+		.len = len
+	};
+
+	if (unlikely(!buf))
+		return -EINVAL;
+
+	if (len != crypto_dh_key_len(params))
+		return -EINVAL;
+
+	ptr = dh_pack_data(ptr, &secret, sizeof(secret));
+	ptr = dh_pack_data(ptr, &params->key_size, sizeof(params->key_size));
+	ptr = dh_pack_data(ptr, &params->p_size, sizeof(params->p_size));
+	ptr = dh_pack_data(ptr, &params->g_size, sizeof(params->g_size));
+	ptr = dh_pack_data(ptr, params->key, params->key_size);
+	ptr = dh_pack_data(ptr, params->p, params->p_size);
+	dh_pack_data(ptr, params->g, params->g_size);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_dh_encode_key);
+
+int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
+{
+	const u8 *ptr = buf;
+	struct kpp_secret secret;
+
+	if (unlikely(!buf || len < DH_KPP_SECRET_MIN_SIZE))
+		return -EINVAL;
+
+	ptr = dh_unpack_data(&secret, ptr, sizeof(secret));
+	if (secret.type != CRYPTO_KPP_SECRET_TYPE_DH)
+		return -EINVAL;
+
+	ptr = dh_unpack_data(&params->key_size, ptr, sizeof(params->key_size));
+	ptr = dh_unpack_data(&params->p_size, ptr, sizeof(params->p_size));
+	ptr = dh_unpack_data(&params->g_size, ptr, sizeof(params->g_size));
+	if (secret.len != crypto_dh_key_len(params))
+		return -EINVAL;
+
+	/* Don't allocate memory. Set pointers to data within
+	 * the given buffer
+	 */
+	params->key = (void *)ptr;
+	params->p = (void *)(ptr + params->key_size);
+	params->g = (void *)(ptr + params->key_size + params->p_size);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_dh_decode_key);
diff --git a/crypto/drbg.c b/crypto/drbg.c
index 0a3538f6cf22..f752da3a7c75 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -252,10 +252,16 @@ MODULE_ALIAS_CRYPTO("drbg_nopr_ctr_aes192");
 MODULE_ALIAS_CRYPTO("drbg_pr_ctr_aes128");
 MODULE_ALIAS_CRYPTO("drbg_nopr_ctr_aes128");
 
-static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key,
-			  unsigned char *outval, const struct drbg_string *in);
+static void drbg_kcapi_symsetkey(struct drbg_state *drbg,
+				 const unsigned char *key);
+static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval,
+			  const struct drbg_string *in);
 static int drbg_init_sym_kernel(struct drbg_state *drbg);
 static int drbg_fini_sym_kernel(struct drbg_state *drbg);
+static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
+			      u8 *inbuf, u32 inbuflen,
+			      u8 *outbuf, u32 outlen);
+#define DRBG_CTR_NULL_LEN 128
 
 /* BCC function for CTR DRBG as defined in 10.4.3 */
 static int drbg_ctr_bcc(struct drbg_state *drbg,
@@ -270,6 +276,7 @@ static int drbg_ctr_bcc(struct drbg_state *drbg,
 	drbg_string_fill(&data, out, drbg_blocklen(drbg));
 
 	/* 10.4.3 step 2 / 4 */
+	drbg_kcapi_symsetkey(drbg, key);
 	list_for_each_entry(curr, in, list) {
 		const unsigned char *pos = curr->buf;
 		size_t len = curr->len;
@@ -278,7 +285,7 @@ static int drbg_ctr_bcc(struct drbg_state *drbg,
 			/* 10.4.3 step 4.2 */
 			if (drbg_blocklen(drbg) == cnt) {
 				cnt = 0;
-				ret = drbg_kcapi_sym(drbg, key, out, &data);
+				ret = drbg_kcapi_sym(drbg, out, &data);
 				if (ret)
 					return ret;
 			}
@@ -290,7 +297,7 @@ static int drbg_ctr_bcc(struct drbg_state *drbg,
 	}
 	/* 10.4.3 step 4.2 for last block */
 	if (cnt)
-		ret = drbg_kcapi_sym(drbg, key, out, &data);
+		ret = drbg_kcapi_sym(drbg, out, &data);
 
 	return ret;
 }
@@ -425,6 +432,7 @@ static int drbg_ctr_df(struct drbg_state *drbg,
 	/* 10.4.2 step 12: overwriting of outval is implemented in next step */
 
 	/* 10.4.2 step 13 */
+	drbg_kcapi_symsetkey(drbg, temp);
 	while (generated_len < bytes_to_return) {
 		short blocklen = 0;
 		/*
@@ -432,7 +440,7 @@ static int drbg_ctr_df(struct drbg_state *drbg,
 		 * implicit as the key is only drbg_blocklen in size based on
 		 * the implementation of the cipher function callback
 		 */
-		ret = drbg_kcapi_sym(drbg, temp, X, &cipherin);
+		ret = drbg_kcapi_sym(drbg, X, &cipherin);
 		if (ret)
 			goto out;
 		blocklen = (drbg_blocklen(drbg) <
@@ -476,49 +484,47 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed,
 	unsigned char *temp = drbg->scratchpad;
 	unsigned char *df_data = drbg->scratchpad + drbg_statelen(drbg) +
 				 drbg_blocklen(drbg);
-	unsigned char *temp_p, *df_data_p; /* pointer to iterate over buffers */
-	unsigned int len = 0;
-	struct drbg_string cipherin;
 
 	if (3 > reseed)
 		memset(df_data, 0, drbg_statelen(drbg));
 
-	/* 10.2.1.3.2 step 2 and 10.2.1.4.2 step 2 */
-	if (seed) {
-		ret = drbg_ctr_df(drbg, df_data, drbg_statelen(drbg), seed);
+	if (!reseed) {
+		/*
+		 * The DRBG uses the CTR mode of the underlying AES cipher. The
+		 * CTR mode increments the counter value after the AES operation
+		 * but SP800-90A requires that the counter is incremented before
+		 * the AES operation. Hence, we increment it at the time we set
+		 * it by one.
+		 */
+		crypto_inc(drbg->V, drbg_blocklen(drbg));
+
+		ret = crypto_skcipher_setkey(drbg->ctr_handle, drbg->C,
+					     drbg_keylen(drbg));
 		if (ret)
 			goto out;
 	}
 
-	drbg_string_fill(&cipherin, drbg->V, drbg_blocklen(drbg));
-	/*
-	 * 10.2.1.3.2 steps 2 and 3 are already covered as the allocation
-	 * zeroizes all memory during initialization
-	 */
-	while (len < (drbg_statelen(drbg))) {
-		/* 10.2.1.2 step 2.1 */
-		crypto_inc(drbg->V, drbg_blocklen(drbg));
-		/*
-		 * 10.2.1.2 step 2.2 */
-		ret = drbg_kcapi_sym(drbg, drbg->C, temp + len, &cipherin);
+	/* 10.2.1.3.2 step 2 and 10.2.1.4.2 step 2 */
+	if (seed) {
+		ret = drbg_ctr_df(drbg, df_data, drbg_statelen(drbg), seed);
 		if (ret)
 			goto out;
-		/* 10.2.1.2 step 2.3 and 3 */
-		len += drbg_blocklen(drbg);
 	}
 
-	/* 10.2.1.2 step 4 */
-	temp_p = temp;
-	df_data_p = df_data;
-	for (len = 0; len < drbg_statelen(drbg); len++) {
-		*temp_p ^= *df_data_p;
-		df_data_p++; temp_p++;
-	}
+	ret = drbg_kcapi_sym_ctr(drbg, df_data, drbg_statelen(drbg),
+				 temp, drbg_statelen(drbg));
+	if (ret)
+		return ret;
 
 	/* 10.2.1.2 step 5 */
-	memcpy(drbg->C, temp, drbg_keylen(drbg));
+	ret = crypto_skcipher_setkey(drbg->ctr_handle, temp,
+				     drbg_keylen(drbg));
+	if (ret)
+		goto out;
 	/* 10.2.1.2 step 6 */
 	memcpy(drbg->V, temp + drbg_keylen(drbg), drbg_blocklen(drbg));
+	/* See above: increment counter by one to compensate timing of CTR op */
+	crypto_inc(drbg->V, drbg_blocklen(drbg));
 	ret = 0;
 
 out:
@@ -537,9 +543,8 @@ static int drbg_ctr_generate(struct drbg_state *drbg,
 			     unsigned char *buf, unsigned int buflen,
 			     struct list_head *addtl)
 {
-	int len = 0;
-	int ret = 0;
-	struct drbg_string data;
+	int ret;
+	int len = min_t(int, buflen, INT_MAX);
 
 	/* 10.2.1.5.2 step 2 */
 	if (addtl && !list_empty(addtl)) {
@@ -549,33 +554,16 @@ static int drbg_ctr_generate(struct drbg_state *drbg,
 	}
 
 	/* 10.2.1.5.2 step 4.1 */
-	crypto_inc(drbg->V, drbg_blocklen(drbg));
-	drbg_string_fill(&data, drbg->V, drbg_blocklen(drbg));
-	while (len < buflen) {
-		int outlen = 0;
-		/* 10.2.1.5.2 step 4.2 */
-		ret = drbg_kcapi_sym(drbg, drbg->C, drbg->scratchpad, &data);
-		if (ret) {
-			len = ret;
-			goto out;
-		}
-		outlen = (drbg_blocklen(drbg) < (buflen - len)) ?
-			  drbg_blocklen(drbg) : (buflen - len);
-		/* 10.2.1.5.2 step 4.3 */
-		memcpy(buf + len, drbg->scratchpad, outlen);
-		len += outlen;
-		/* 10.2.1.5.2 step 6 */
-		if (len < buflen)
-			crypto_inc(drbg->V, drbg_blocklen(drbg));
-	}
+	ret = drbg_kcapi_sym_ctr(drbg, drbg->ctr_null_value, DRBG_CTR_NULL_LEN,
+				 buf, len);
+	if (ret)
+		return ret;
 
 	/* 10.2.1.5.2 step 6 */
 	ret = drbg_ctr_update(drbg, NULL, 3);
 	if (ret)
 		len = ret;
 
-out:
-	memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
 	return len;
 }
 
@@ -1145,11 +1133,11 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
 	if (!drbg)
 		return;
 	kzfree(drbg->V);
-	drbg->V = NULL;
+	drbg->Vbuf = NULL;
 	kzfree(drbg->C);
-	drbg->C = NULL;
-	kzfree(drbg->scratchpad);
-	drbg->scratchpad = NULL;
+	drbg->Cbuf = NULL;
+	kzfree(drbg->scratchpadbuf);
+	drbg->scratchpadbuf = NULL;
 	drbg->reseed_ctr = 0;
 	drbg->d_ops = NULL;
 	drbg->core = NULL;
@@ -1185,12 +1173,18 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 		goto err;
 	}
 
-	drbg->V = kmalloc(drbg_statelen(drbg), GFP_KERNEL);
-	if (!drbg->V)
-		goto err;
-	drbg->C = kmalloc(drbg_statelen(drbg), GFP_KERNEL);
-	if (!drbg->C)
+	ret = drbg->d_ops->crypto_init(drbg);
+	if (ret < 0)
 		goto err;
+
+	drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
+	if (!drbg->Vbuf)
+		goto fini;
+	drbg->V = PTR_ALIGN(drbg->Vbuf, ret + 1);
+	drbg->Cbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
+	if (!drbg->Cbuf)
+		goto fini;
+	drbg->C = PTR_ALIGN(drbg->Cbuf, ret + 1);
 	/* scratchpad is only generated for CTR and Hash */
 	if (drbg->core->flags & DRBG_HMAC)
 		sb_size = 0;
@@ -1204,13 +1198,16 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 		sb_size = drbg_statelen(drbg) + drbg_blocklen(drbg);
 
 	if (0 < sb_size) {
-		drbg->scratchpad = kzalloc(sb_size, GFP_KERNEL);
-		if (!drbg->scratchpad)
-			goto err;
+		drbg->scratchpadbuf = kzalloc(sb_size + ret, GFP_KERNEL);
+		if (!drbg->scratchpadbuf)
+			goto fini;
+		drbg->scratchpad = PTR_ALIGN(drbg->scratchpadbuf, ret + 1);
 	}
 
 	return 0;
 
+fini:
+	drbg->d_ops->crypto_fini(drbg);
 err:
 	drbg_dealloc_state(drbg);
 	return ret;
@@ -1478,10 +1475,6 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers,
 		if (ret)
 			goto unlock;
 
-		ret = -EFAULT;
-		if (drbg->d_ops->crypto_init(drbg))
-			goto err;
-
 		ret = drbg_prepare_hrng(drbg);
 		if (ret)
 			goto free_everything;
@@ -1505,8 +1498,6 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers,
 	mutex_unlock(&drbg->drbg_mutex);
 	return ret;
 
-err:
-	drbg_dealloc_state(drbg);
 unlock:
 	mutex_unlock(&drbg->drbg_mutex);
 	return ret;
@@ -1591,7 +1582,8 @@ static int drbg_init_hash_kernel(struct drbg_state *drbg)
 	sdesc->shash.tfm = tfm;
 	sdesc->shash.flags = 0;
 	drbg->priv_data = sdesc;
-	return 0;
+
+	return crypto_shash_alignmask(tfm);
 }
 
 static int drbg_fini_hash_kernel(struct drbg_state *drbg)
@@ -1627,10 +1619,45 @@ static int drbg_kcapi_hash(struct drbg_state *drbg, unsigned char *outval,
 #endif /* (CONFIG_CRYPTO_DRBG_HASH || CONFIG_CRYPTO_DRBG_HMAC) */
 
 #ifdef CONFIG_CRYPTO_DRBG_CTR
+static int drbg_fini_sym_kernel(struct drbg_state *drbg)
+{
+	struct crypto_cipher *tfm =
+		(struct crypto_cipher *)drbg->priv_data;
+	if (tfm)
+		crypto_free_cipher(tfm);
+	drbg->priv_data = NULL;
+
+	if (drbg->ctr_handle)
+		crypto_free_skcipher(drbg->ctr_handle);
+	drbg->ctr_handle = NULL;
+
+	if (drbg->ctr_req)
+		skcipher_request_free(drbg->ctr_req);
+	drbg->ctr_req = NULL;
+
+	kfree(drbg->ctr_null_value_buf);
+	drbg->ctr_null_value = NULL;
+
+	return 0;
+}
+
+static void drbg_skcipher_cb(struct crypto_async_request *req, int error)
+{
+	struct drbg_state *drbg = req->data;
+
+	if (error == -EINPROGRESS)
+		return;
+	drbg->ctr_async_err = error;
+	complete(&drbg->ctr_completion);
+}
+
 static int drbg_init_sym_kernel(struct drbg_state *drbg)
 {
-	int ret = 0;
 	struct crypto_cipher *tfm;
+	struct crypto_skcipher *sk_tfm;
+	struct skcipher_request *req;
+	unsigned int alignmask;
+	char ctr_name[CRYPTO_MAX_ALG_NAME];
 
 	tfm = crypto_alloc_cipher(drbg->core->backend_cra_name, 0, 0);
 	if (IS_ERR(tfm)) {
@@ -1640,31 +1667,103 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg)
 	}
 	BUG_ON(drbg_blocklen(drbg) != crypto_cipher_blocksize(tfm));
 	drbg->priv_data = tfm;
-	return ret;
+
+	if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)",
+	    drbg->core->backend_cra_name) >= CRYPTO_MAX_ALG_NAME) {
+		drbg_fini_sym_kernel(drbg);
+		return -EINVAL;
+	}
+	sk_tfm = crypto_alloc_skcipher(ctr_name, 0, 0);
+	if (IS_ERR(sk_tfm)) {
+		pr_info("DRBG: could not allocate CTR cipher TFM handle: %s\n",
+				ctr_name);
+		drbg_fini_sym_kernel(drbg);
+		return PTR_ERR(sk_tfm);
+	}
+	drbg->ctr_handle = sk_tfm;
+
+	req = skcipher_request_alloc(sk_tfm, GFP_KERNEL);
+	if (!req) {
+		pr_info("DRBG: could not allocate request queue\n");
+		drbg_fini_sym_kernel(drbg);
+		return -ENOMEM;
+	}
+	drbg->ctr_req = req;
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					drbg_skcipher_cb, drbg);
+
+	alignmask = crypto_skcipher_alignmask(sk_tfm);
+	drbg->ctr_null_value_buf = kzalloc(DRBG_CTR_NULL_LEN + alignmask,
+					   GFP_KERNEL);
+	if (!drbg->ctr_null_value_buf) {
+		drbg_fini_sym_kernel(drbg);
+		return -ENOMEM;
+	}
+	drbg->ctr_null_value = (u8 *)PTR_ALIGN(drbg->ctr_null_value_buf,
+					       alignmask + 1);
+
+	return alignmask;
 }
 
-static int drbg_fini_sym_kernel(struct drbg_state *drbg)
+static void drbg_kcapi_symsetkey(struct drbg_state *drbg,
+				 const unsigned char *key)
 {
 	struct crypto_cipher *tfm =
 		(struct crypto_cipher *)drbg->priv_data;
-	if (tfm)
-		crypto_free_cipher(tfm);
-	drbg->priv_data = NULL;
-	return 0;
+
+	crypto_cipher_setkey(tfm, key, (drbg_keylen(drbg)));
 }
 
-static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key,
-			  unsigned char *outval, const struct drbg_string *in)
+static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval,
+			  const struct drbg_string *in)
 {
 	struct crypto_cipher *tfm =
 		(struct crypto_cipher *)drbg->priv_data;
 
-	crypto_cipher_setkey(tfm, key, (drbg_keylen(drbg)));
 	/* there is only component in *in */
 	BUG_ON(in->len < drbg_blocklen(drbg));
 	crypto_cipher_encrypt_one(tfm, outval, in->buf);
 	return 0;
 }
+
+static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
+			      u8 *inbuf, u32 inlen,
+			      u8 *outbuf, u32 outlen)
+{
+	struct scatterlist sg_in;
+
+	sg_init_one(&sg_in, inbuf, inlen);
+
+	while (outlen) {
+		u32 cryptlen = min_t(u32, inlen, outlen);
+		struct scatterlist sg_out;
+		int ret;
+
+		sg_init_one(&sg_out, outbuf, cryptlen);
+		skcipher_request_set_crypt(drbg->ctr_req, &sg_in, &sg_out,
+					   cryptlen, drbg->V);
+		ret = crypto_skcipher_encrypt(drbg->ctr_req);
+		switch (ret) {
+		case 0:
+			break;
+		case -EINPROGRESS:
+		case -EBUSY:
+			ret = wait_for_completion_interruptible(
+				&drbg->ctr_completion);
+			if (!ret && !drbg->ctr_async_err) {
+				reinit_completion(&drbg->ctr_completion);
+				break;
+			}
+		default:
+			return ret;
+		}
+		init_completion(&drbg->ctr_completion);
+
+		outlen -= cryptlen;
+	}
+
+	return 0;
+}
 #endif /* CONFIG_CRYPTO_DRBG_CTR */
 
 /***************************************************************
diff --git a/crypto/ecc.c b/crypto/ecc.c
new file mode 100644
index 000000000000..414c78a9c214
--- /dev/null
+++ b/crypto/ecc.c
@@ -0,0 +1,1018 @@
+/*
+ * Copyright (c) 2013, Kenneth MacKay
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *  * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/swab.h>
+#include <linux/fips.h>
+#include <crypto/ecdh.h>
+
+#include "ecc.h"
+#include "ecc_curve_defs.h"
+
+typedef struct {
+	u64 m_low;
+	u64 m_high;
+} uint128_t;
+
+static inline const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
+{
+	switch (curve_id) {
+	/* In FIPS mode only allow P256 and higher */
+	case ECC_CURVE_NIST_P192:
+		return fips_enabled ? NULL : &nist_p192;
+	case ECC_CURVE_NIST_P256:
+		return &nist_p256;
+	default:
+		return NULL;
+	}
+}
+
+static u64 *ecc_alloc_digits_space(unsigned int ndigits)
+{
+	size_t len = ndigits * sizeof(u64);
+
+	if (!len)
+		return NULL;
+
+	return kmalloc(len, GFP_KERNEL);
+}
+
+static void ecc_free_digits_space(u64 *space)
+{
+	kzfree(space);
+}
+
+static struct ecc_point *ecc_alloc_point(unsigned int ndigits)
+{
+	struct ecc_point *p = kmalloc(sizeof(*p), GFP_KERNEL);
+
+	if (!p)
+		return NULL;
+
+	p->x = ecc_alloc_digits_space(ndigits);
+	if (!p->x)
+		goto err_alloc_x;
+
+	p->y = ecc_alloc_digits_space(ndigits);
+	if (!p->y)
+		goto err_alloc_y;
+
+	p->ndigits = ndigits;
+
+	return p;
+
+err_alloc_y:
+	ecc_free_digits_space(p->x);
+err_alloc_x:
+	kfree(p);
+	return NULL;
+}
+
+static void ecc_free_point(struct ecc_point *p)
+{
+	if (!p)
+		return;
+
+	kzfree(p->x);
+	kzfree(p->y);
+	kzfree(p);
+}
+
+static void vli_clear(u64 *vli, unsigned int ndigits)
+{
+	int i;
+
+	for (i = 0; i < ndigits; i++)
+		vli[i] = 0;
+}
+
+/* Returns true if vli == 0, false otherwise. */
+static bool vli_is_zero(const u64 *vli, unsigned int ndigits)
+{
+	int i;
+
+	for (i = 0; i < ndigits; i++) {
+		if (vli[i])
+			return false;
+	}
+
+	return true;
+}
+
+/* Returns nonzero if bit bit of vli is set. */
+static u64 vli_test_bit(const u64 *vli, unsigned int bit)
+{
+	return (vli[bit / 64] & ((u64)1 << (bit % 64)));
+}
+
+/* Counts the number of 64-bit "digits" in vli. */
+static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
+{
+	int i;
+
+	/* Search from the end until we find a non-zero digit.
+	 * We do it in reverse because we expect that most digits will
+	 * be nonzero.
+	 */
+	for (i = ndigits - 1; i >= 0 && vli[i] == 0; i--);
+
+	return (i + 1);
+}
+
+/* Counts the number of bits required for vli. */
+static unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
+{
+	unsigned int i, num_digits;
+	u64 digit;
+
+	num_digits = vli_num_digits(vli, ndigits);
+	if (num_digits == 0)
+		return 0;
+
+	digit = vli[num_digits - 1];
+	for (i = 0; digit; i++)
+		digit >>= 1;
+
+	return ((num_digits - 1) * 64 + i);
+}
+
+/* Sets dest = src. */
+static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
+{
+	int i;
+
+	for (i = 0; i < ndigits; i++)
+		dest[i] = src[i];
+}
+
+/* Returns sign of left - right. */
+static int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
+{
+	int i;
+
+	for (i = ndigits - 1; i >= 0; i--) {
+		if (left[i] > right[i])
+			return 1;
+		else if (left[i] < right[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+/* Computes result = in << c, returning carry. Can modify in place
+ * (if result == in). 0 < shift < 64.
+ */
+static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
+		      unsigned int ndigits)
+{
+	u64 carry = 0;
+	int i;
+
+	for (i = 0; i < ndigits; i++) {
+		u64 temp = in[i];
+
+		result[i] = (temp << shift) | carry;
+		carry = temp >> (64 - shift);
+	}
+
+	return carry;
+}
+
+/* Computes vli = vli >> 1. */
+static void vli_rshift1(u64 *vli, unsigned int ndigits)
+{
+	u64 *end = vli;
+	u64 carry = 0;
+
+	vli += ndigits;
+
+	while (vli-- > end) {
+		u64 temp = *vli;
+		*vli = (temp >> 1) | carry;
+		carry = temp << 63;
+	}
+}
+
+/* Computes result = left + right, returning carry. Can modify in place. */
+static u64 vli_add(u64 *result, const u64 *left, const u64 *right,
+		   unsigned int ndigits)
+{
+	u64 carry = 0;
+	int i;
+
+	for (i = 0; i < ndigits; i++) {
+		u64 sum;
+
+		sum = left[i] + right[i] + carry;
+		if (sum != left[i])
+			carry = (sum < left[i]);
+
+		result[i] = sum;
+	}
+
+	return carry;
+}
+
+/* Computes result = left - right, returning borrow. Can modify in place. */
+static u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
+		   unsigned int ndigits)
+{
+	u64 borrow = 0;
+	int i;
+
+	for (i = 0; i < ndigits; i++) {
+		u64 diff;
+
+		diff = left[i] - right[i] - borrow;
+		if (diff != left[i])
+			borrow = (diff > left[i]);
+
+		result[i] = diff;
+	}
+
+	return borrow;
+}
+
+static uint128_t mul_64_64(u64 left, u64 right)
+{
+	u64 a0 = left & 0xffffffffull;
+	u64 a1 = left >> 32;
+	u64 b0 = right & 0xffffffffull;
+	u64 b1 = right >> 32;
+	u64 m0 = a0 * b0;
+	u64 m1 = a0 * b1;
+	u64 m2 = a1 * b0;
+	u64 m3 = a1 * b1;
+	uint128_t result;
+
+	m2 += (m0 >> 32);
+	m2 += m1;
+
+	/* Overflow */
+	if (m2 < m1)
+		m3 += 0x100000000ull;
+
+	result.m_low = (m0 & 0xffffffffull) | (m2 << 32);
+	result.m_high = m3 + (m2 >> 32);
+
+	return result;
+}
+
+static uint128_t add_128_128(uint128_t a, uint128_t b)
+{
+	uint128_t result;
+
+	result.m_low = a.m_low + b.m_low;
+	result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
+
+	return result;
+}
+
+static void vli_mult(u64 *result, const u64 *left, const u64 *right,
+		     unsigned int ndigits)
+{
+	uint128_t r01 = { 0, 0 };
+	u64 r2 = 0;
+	unsigned int i, k;
+
+	/* Compute each digit of result in sequence, maintaining the
+	 * carries.
+	 */
+	for (k = 0; k < ndigits * 2 - 1; k++) {
+		unsigned int min;
+
+		if (k < ndigits)
+			min = 0;
+		else
+			min = (k + 1) - ndigits;
+
+		for (i = min; i <= k && i < ndigits; i++) {
+			uint128_t product;
+
+			product = mul_64_64(left[i], right[k - i]);
+
+			r01 = add_128_128(r01, product);
+			r2 += (r01.m_high < product.m_high);
+		}
+
+		result[k] = r01.m_low;
+		r01.m_low = r01.m_high;
+		r01.m_high = r2;
+		r2 = 0;
+	}
+
+	result[ndigits * 2 - 1] = r01.m_low;
+}
+
+static void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
+{
+	uint128_t r01 = { 0, 0 };
+	u64 r2 = 0;
+	int i, k;
+
+	for (k = 0; k < ndigits * 2 - 1; k++) {
+		unsigned int min;
+
+		if (k < ndigits)
+			min = 0;
+		else
+			min = (k + 1) - ndigits;
+
+		for (i = min; i <= k && i <= k - i; i++) {
+			uint128_t product;
+
+			product = mul_64_64(left[i], left[k - i]);
+
+			if (i < k - i) {
+				r2 += product.m_high >> 63;
+				product.m_high = (product.m_high << 1) |
+						 (product.m_low >> 63);
+				product.m_low <<= 1;
+			}
+
+			r01 = add_128_128(r01, product);
+			r2 += (r01.m_high < product.m_high);
+		}
+
+		result[k] = r01.m_low;
+		r01.m_low = r01.m_high;
+		r01.m_high = r2;
+		r2 = 0;
+	}
+
+	result[ndigits * 2 - 1] = r01.m_low;
+}
+
+/* Computes result = (left + right) % mod.
+ * Assumes that left < mod and right < mod, result != mod.
+ */
+static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
+			const u64 *mod, unsigned int ndigits)
+{
+	u64 carry;
+
+	carry = vli_add(result, left, right, ndigits);
+
+	/* result > mod (result = mod + remainder), so subtract mod to
+	 * get remainder.
+	 */
+	if (carry || vli_cmp(result, mod, ndigits) >= 0)
+		vli_sub(result, result, mod, ndigits);
+}
+
+/* Computes result = (left - right) % mod.
+ * Assumes that left < mod and right < mod, result != mod.
+ */
+static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
+			const u64 *mod, unsigned int ndigits)
+{
+	u64 borrow = vli_sub(result, left, right, ndigits);
+
+	/* In this case, p_result == -diff == (max int) - diff.
+	 * Since -x % d == d - x, we can get the correct result from
+	 * result + mod (with overflow).
+	 */
+	if (borrow)
+		vli_add(result, result, mod, ndigits);
+}
+
+/* Computes p_result = p_product % curve_p.
+ * See algorithm 5 and 6 from
+ * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
+ */
+static void vli_mmod_fast_192(u64 *result, const u64 *product,
+			      const u64 *curve_prime, u64 *tmp)
+{
+	const unsigned int ndigits = 3;
+	int carry;
+
+	vli_set(result, product, ndigits);
+
+	vli_set(tmp, &product[3], ndigits);
+	carry = vli_add(result, result, tmp, ndigits);
+
+	tmp[0] = 0;
+	tmp[1] = product[3];
+	tmp[2] = product[4];
+	carry += vli_add(result, result, tmp, ndigits);
+
+	tmp[0] = tmp[1] = product[5];
+	tmp[2] = 0;
+	carry += vli_add(result, result, tmp, ndigits);
+
+	while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
+		carry -= vli_sub(result, result, curve_prime, ndigits);
+}
+
+/* Computes result = product % curve_prime
+ * from http://www.nsa.gov/ia/_files/nist-routines.pdf
+ */
+static void vli_mmod_fast_256(u64 *result, const u64 *product,
+			      const u64 *curve_prime, u64 *tmp)
+{
+	int carry;
+	const unsigned int ndigits = 4;
+
+	/* t */
+	vli_set(result, product, ndigits);
+
+	/* s1 */
+	tmp[0] = 0;
+	tmp[1] = product[5] & 0xffffffff00000000ull;
+	tmp[2] = product[6];
+	tmp[3] = product[7];
+	carry = vli_lshift(tmp, tmp, 1, ndigits);
+	carry += vli_add(result, result, tmp, ndigits);
+
+	/* s2 */
+	tmp[1] = product[6] << 32;
+	tmp[2] = (product[6] >> 32) | (product[7] << 32);
+	tmp[3] = product[7] >> 32;
+	carry += vli_lshift(tmp, tmp, 1, ndigits);
+	carry += vli_add(result, result, tmp, ndigits);
+
+	/* s3 */
+	tmp[0] = product[4];
+	tmp[1] = product[5] & 0xffffffff;
+	tmp[2] = 0;
+	tmp[3] = product[7];
+	carry += vli_add(result, result, tmp, ndigits);
+
+	/* s4 */
+	tmp[0] = (product[4] >> 32) | (product[5] << 32);
+	tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
+	tmp[2] = product[7];
+	tmp[3] = (product[6] >> 32) | (product[4] << 32);
+	carry += vli_add(result, result, tmp, ndigits);
+
+	/* d1 */
+	tmp[0] = (product[5] >> 32) | (product[6] << 32);
+	tmp[1] = (product[6] >> 32);
+	tmp[2] = 0;
+	tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
+	carry -= vli_sub(result, result, tmp, ndigits);
+
+	/* d2 */
+	tmp[0] = product[6];
+	tmp[1] = product[7];
+	tmp[2] = 0;
+	tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
+	carry -= vli_sub(result, result, tmp, ndigits);
+
+	/* d3 */
+	tmp[0] = (product[6] >> 32) | (product[7] << 32);
+	tmp[1] = (product[7] >> 32) | (product[4] << 32);
+	tmp[2] = (product[4] >> 32) | (product[5] << 32);
+	tmp[3] = (product[6] << 32);
+	carry -= vli_sub(result, result, tmp, ndigits);
+
+	/* d4 */
+	tmp[0] = product[7];
+	tmp[1] = product[4] & 0xffffffff00000000ull;
+	tmp[2] = product[5];
+	tmp[3] = product[6] & 0xffffffff00000000ull;
+	carry -= vli_sub(result, result, tmp, ndigits);
+
+	if (carry < 0) {
+		do {
+			carry += vli_add(result, result, curve_prime, ndigits);
+		} while (carry < 0);
+	} else {
+		while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
+			carry -= vli_sub(result, result, curve_prime, ndigits);
+	}
+}
+
+/* Computes result = product % curve_prime
+ *  from http://www.nsa.gov/ia/_files/nist-routines.pdf
+*/
+static bool vli_mmod_fast(u64 *result, u64 *product,
+			  const u64 *curve_prime, unsigned int ndigits)
+{
+	u64 tmp[2 * ndigits];
+
+	switch (ndigits) {
+	case 3:
+		vli_mmod_fast_192(result, product, curve_prime, tmp);
+		break;
+	case 4:
+		vli_mmod_fast_256(result, product, curve_prime, tmp);
+		break;
+	default:
+		pr_err("unsupports digits size!\n");
+		return false;
+	}
+
+	return true;
+}
+
+/* Computes result = (left * right) % curve_prime. */
+static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
+			      const u64 *curve_prime, unsigned int ndigits)
+{
+	u64 product[2 * ndigits];
+
+	vli_mult(product, left, right, ndigits);
+	vli_mmod_fast(result, product, curve_prime, ndigits);
+}
+
+/* Computes result = left^2 % curve_prime. */
+static void vli_mod_square_fast(u64 *result, const u64 *left,
+				const u64 *curve_prime, unsigned int ndigits)
+{
+	u64 product[2 * ndigits];
+
+	vli_square(product, left, ndigits);
+	vli_mmod_fast(result, product, curve_prime, ndigits);
+}
+
+#define EVEN(vli) (!(vli[0] & 1))
+/* Computes result = (1 / p_input) % mod. All VLIs are the same size.
+ * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
+ * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
+ */
+static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
+			unsigned int ndigits)
+{
+	u64 a[ndigits], b[ndigits];
+	u64 u[ndigits], v[ndigits];
+	u64 carry;
+	int cmp_result;
+
+	if (vli_is_zero(input, ndigits)) {
+		vli_clear(result, ndigits);
+		return;
+	}
+
+	vli_set(a, input, ndigits);
+	vli_set(b, mod, ndigits);
+	vli_clear(u, ndigits);
+	u[0] = 1;
+	vli_clear(v, ndigits);
+
+	while ((cmp_result = vli_cmp(a, b, ndigits)) != 0) {
+		carry = 0;
+
+		if (EVEN(a)) {
+			vli_rshift1(a, ndigits);
+
+			if (!EVEN(u))
+				carry = vli_add(u, u, mod, ndigits);
+
+			vli_rshift1(u, ndigits);
+			if (carry)
+				u[ndigits - 1] |= 0x8000000000000000ull;
+		} else if (EVEN(b)) {
+			vli_rshift1(b, ndigits);
+
+			if (!EVEN(v))
+				carry = vli_add(v, v, mod, ndigits);
+
+			vli_rshift1(v, ndigits);
+			if (carry)
+				v[ndigits - 1] |= 0x8000000000000000ull;
+		} else if (cmp_result > 0) {
+			vli_sub(a, a, b, ndigits);
+			vli_rshift1(a, ndigits);
+
+			if (vli_cmp(u, v, ndigits) < 0)
+				vli_add(u, u, mod, ndigits);
+
+			vli_sub(u, u, v, ndigits);
+			if (!EVEN(u))
+				carry = vli_add(u, u, mod, ndigits);
+
+			vli_rshift1(u, ndigits);
+			if (carry)
+				u[ndigits - 1] |= 0x8000000000000000ull;
+		} else {
+			vli_sub(b, b, a, ndigits);
+			vli_rshift1(b, ndigits);
+
+			if (vli_cmp(v, u, ndigits) < 0)
+				vli_add(v, v, mod, ndigits);
+
+			vli_sub(v, v, u, ndigits);
+			if (!EVEN(v))
+				carry = vli_add(v, v, mod, ndigits);
+
+			vli_rshift1(v, ndigits);
+			if (carry)
+				v[ndigits - 1] |= 0x8000000000000000ull;
+		}
+	}
+
+	vli_set(result, u, ndigits);
+}
+
+/* ------ Point operations ------ */
+
+/* Returns true if p_point is the point at infinity, false otherwise. */
+static bool ecc_point_is_zero(const struct ecc_point *point)
+{
+	return (vli_is_zero(point->x, point->ndigits) &&
+		vli_is_zero(point->y, point->ndigits));
+}
+
+/* Point multiplication algorithm using Montgomery's ladder with co-Z
+ * coordinates. From http://eprint.iacr.org/2011/338.pdf
+ */
+
+/* Double in place */
+static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
+				      u64 *curve_prime, unsigned int ndigits)
+{
+	/* t1 = x, t2 = y, t3 = z */
+	u64 t4[ndigits];
+	u64 t5[ndigits];
+
+	if (vli_is_zero(z1, ndigits))
+		return;
+
+	/* t4 = y1^2 */
+	vli_mod_square_fast(t4, y1, curve_prime, ndigits);
+	/* t5 = x1*y1^2 = A */
+	vli_mod_mult_fast(t5, x1, t4, curve_prime, ndigits);
+	/* t4 = y1^4 */
+	vli_mod_square_fast(t4, t4, curve_prime, ndigits);
+	/* t2 = y1*z1 = z3 */
+	vli_mod_mult_fast(y1, y1, z1, curve_prime, ndigits);
+	/* t3 = z1^2 */
+	vli_mod_square_fast(z1, z1, curve_prime, ndigits);
+
+	/* t1 = x1 + z1^2 */
+	vli_mod_add(x1, x1, z1, curve_prime, ndigits);
+	/* t3 = 2*z1^2 */
+	vli_mod_add(z1, z1, z1, curve_prime, ndigits);
+	/* t3 = x1 - z1^2 */
+	vli_mod_sub(z1, x1, z1, curve_prime, ndigits);
+	/* t1 = x1^2 - z1^4 */
+	vli_mod_mult_fast(x1, x1, z1, curve_prime, ndigits);
+
+	/* t3 = 2*(x1^2 - z1^4) */
+	vli_mod_add(z1, x1, x1, curve_prime, ndigits);
+	/* t1 = 3*(x1^2 - z1^4) */
+	vli_mod_add(x1, x1, z1, curve_prime, ndigits);
+	if (vli_test_bit(x1, 0)) {
+		u64 carry = vli_add(x1, x1, curve_prime, ndigits);
+
+		vli_rshift1(x1, ndigits);
+		x1[ndigits - 1] |= carry << 63;
+	} else {
+		vli_rshift1(x1, ndigits);
+	}
+	/* t1 = 3/2*(x1^2 - z1^4) = B */
+
+	/* t3 = B^2 */
+	vli_mod_square_fast(z1, x1, curve_prime, ndigits);
+	/* t3 = B^2 - A */
+	vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
+	/* t3 = B^2 - 2A = x3 */
+	vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
+	/* t5 = A - x3 */
+	vli_mod_sub(t5, t5, z1, curve_prime, ndigits);
+	/* t1 = B * (A - x3) */
+	vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
+	/* t4 = B * (A - x3) - y1^4 = y3 */
+	vli_mod_sub(t4, x1, t4, curve_prime, ndigits);
+
+	vli_set(x1, z1, ndigits);
+	vli_set(z1, y1, ndigits);
+	vli_set(y1, t4, ndigits);
+}
+
+/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
+static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime,
+		    unsigned int ndigits)
+{
+	u64 t1[ndigits];
+
+	vli_mod_square_fast(t1, z, curve_prime, ndigits);    /* z^2 */
+	vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */
+	vli_mod_mult_fast(t1, t1, z, curve_prime, ndigits);  /* z^3 */
+	vli_mod_mult_fast(y1, y1, t1, curve_prime, ndigits); /* y1 * z^3 */
+}
+
+/* P = (x1, y1) => 2P, (x2, y2) => P' */
+static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
+				u64 *p_initial_z, u64 *curve_prime,
+				unsigned int ndigits)
+{
+	u64 z[ndigits];
+
+	vli_set(x2, x1, ndigits);
+	vli_set(y2, y1, ndigits);
+
+	vli_clear(z, ndigits);
+	z[0] = 1;
+
+	if (p_initial_z)
+		vli_set(z, p_initial_z, ndigits);
+
+	apply_z(x1, y1, z, curve_prime, ndigits);
+
+	ecc_point_double_jacobian(x1, y1, z, curve_prime, ndigits);
+
+	apply_z(x2, y2, z, curve_prime, ndigits);
+}
+
+/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
+ * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
+ * or P => P', Q => P + Q
+ */
+static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
+		     unsigned int ndigits)
+{
+	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
+	u64 t5[ndigits];
+
+	/* t5 = x2 - x1 */
+	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
+	/* t5 = (x2 - x1)^2 = A */
+	vli_mod_square_fast(t5, t5, curve_prime, ndigits);
+	/* t1 = x1*A = B */
+	vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
+	/* t3 = x2*A = C */
+	vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits);
+	/* t4 = y2 - y1 */
+	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
+	/* t5 = (y2 - y1)^2 = D */
+	vli_mod_square_fast(t5, y2, curve_prime, ndigits);
+
+	/* t5 = D - B */
+	vli_mod_sub(t5, t5, x1, curve_prime, ndigits);
+	/* t5 = D - B - C = x3 */
+	vli_mod_sub(t5, t5, x2, curve_prime, ndigits);
+	/* t3 = C - B */
+	vli_mod_sub(x2, x2, x1, curve_prime, ndigits);
+	/* t2 = y1*(C - B) */
+	vli_mod_mult_fast(y1, y1, x2, curve_prime, ndigits);
+	/* t3 = B - x3 */
+	vli_mod_sub(x2, x1, t5, curve_prime, ndigits);
+	/* t4 = (y2 - y1)*(B - x3) */
+	vli_mod_mult_fast(y2, y2, x2, curve_prime, ndigits);
+	/* t4 = y3 */
+	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
+
+	vli_set(x2, t5, ndigits);
+}
+
+/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
+ * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
+ * or P => P - Q, Q => P + Q
+ */
+static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
+		       unsigned int ndigits)
+{
+	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
+	u64 t5[ndigits];
+	u64 t6[ndigits];
+	u64 t7[ndigits];
+
+	/* t5 = x2 - x1 */
+	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
+	/* t5 = (x2 - x1)^2 = A */
+	vli_mod_square_fast(t5, t5, curve_prime, ndigits);
+	/* t1 = x1*A = B */
+	vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
+	/* t3 = x2*A = C */
+	vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits);
+	/* t4 = y2 + y1 */
+	vli_mod_add(t5, y2, y1, curve_prime, ndigits);
+	/* t4 = y2 - y1 */
+	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
+
+	/* t6 = C - B */
+	vli_mod_sub(t6, x2, x1, curve_prime, ndigits);
+	/* t2 = y1 * (C - B) */
+	vli_mod_mult_fast(y1, y1, t6, curve_prime, ndigits);
+	/* t6 = B + C */
+	vli_mod_add(t6, x1, x2, curve_prime, ndigits);
+	/* t3 = (y2 - y1)^2 */
+	vli_mod_square_fast(x2, y2, curve_prime, ndigits);
+	/* t3 = x3 */
+	vli_mod_sub(x2, x2, t6, curve_prime, ndigits);
+
+	/* t7 = B - x3 */
+	vli_mod_sub(t7, x1, x2, curve_prime, ndigits);
+	/* t4 = (y2 - y1)*(B - x3) */
+	vli_mod_mult_fast(y2, y2, t7, curve_prime, ndigits);
+	/* t4 = y3 */
+	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
+
+	/* t7 = (y2 + y1)^2 = F */
+	vli_mod_square_fast(t7, t5, curve_prime, ndigits);
+	/* t7 = x3' */
+	vli_mod_sub(t7, t7, t6, curve_prime, ndigits);
+	/* t6 = x3' - B */
+	vli_mod_sub(t6, t7, x1, curve_prime, ndigits);
+	/* t6 = (y2 + y1)*(x3' - B) */
+	vli_mod_mult_fast(t6, t6, t5, curve_prime, ndigits);
+	/* t2 = y3' */
+	vli_mod_sub(y1, t6, y1, curve_prime, ndigits);
+
+	vli_set(x1, t7, ndigits);
+}
+
+static void ecc_point_mult(struct ecc_point *result,
+			   const struct ecc_point *point, const u64 *scalar,
+			   u64 *initial_z, u64 *curve_prime,
+			   unsigned int ndigits)
+{
+	/* R0 and R1 */
+	u64 rx[2][ndigits];
+	u64 ry[2][ndigits];
+	u64 z[ndigits];
+	int i, nb;
+	int num_bits = vli_num_bits(scalar, ndigits);
+
+	vli_set(rx[1], point->x, ndigits);
+	vli_set(ry[1], point->y, ndigits);
+
+	xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve_prime,
+			    ndigits);
+
+	for (i = num_bits - 2; i > 0; i--) {
+		nb = !vli_test_bit(scalar, i);
+		xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime,
+			   ndigits);
+		xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime,
+			 ndigits);
+	}
+
+	nb = !vli_test_bit(scalar, 0);
+	xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime,
+		   ndigits);
+
+	/* Find final 1/Z value. */
+	/* X1 - X0 */
+	vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits);
+	/* Yb * (X1 - X0) */
+	vli_mod_mult_fast(z, z, ry[1 - nb], curve_prime, ndigits);
+	/* xP * Yb * (X1 - X0) */
+	vli_mod_mult_fast(z, z, point->x, curve_prime, ndigits);
+
+	/* 1 / (xP * Yb * (X1 - X0)) */
+	vli_mod_inv(z, z, curve_prime, point->ndigits);
+
+	/* yP / (xP * Yb * (X1 - X0)) */
+	vli_mod_mult_fast(z, z, point->y, curve_prime, ndigits);
+	/* Xb * yP / (xP * Yb * (X1 - X0)) */
+	vli_mod_mult_fast(z, z, rx[1 - nb], curve_prime, ndigits);
+	/* End 1/Z calculation */
+
+	xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, ndigits);
+
+	apply_z(rx[0], ry[0], z, curve_prime, ndigits);
+
+	vli_set(result->x, rx[0], ndigits);
+	vli_set(result->y, ry[0], ndigits);
+}
+
+static inline void ecc_swap_digits(const u64 *in, u64 *out,
+				   unsigned int ndigits)
+{
+	int i;
+
+	for (i = 0; i < ndigits; i++)
+		out[i] = __swab64(in[ndigits - 1 - i]);
+}
+
+int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
+		     const u8 *private_key, unsigned int private_key_len)
+{
+	int nbytes;
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+
+	if (!private_key)
+		return -EINVAL;
+
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	if (private_key_len != nbytes)
+		return -EINVAL;
+
+	if (vli_is_zero((const u64 *)&private_key[0], ndigits))
+		return -EINVAL;
+
+	/* Make sure the private key is in the range [1, n-1]. */
+	if (vli_cmp(curve->n, (const u64 *)&private_key[0], ndigits) != 1)
+		return -EINVAL;
+
+	return 0;
+}
+
+int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
+		      const u8 *private_key, unsigned int private_key_len,
+		      u8 *public_key, unsigned int public_key_len)
+{
+	int ret = 0;
+	struct ecc_point *pk;
+	u64 priv[ndigits];
+	unsigned int nbytes;
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+
+	if (!private_key || !curve) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
+
+	pk = ecc_alloc_point(ndigits);
+	if (!pk) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits);
+	if (ecc_point_is_zero(pk)) {
+		ret = -EAGAIN;
+		goto err_free_point;
+	}
+
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	ecc_swap_digits(pk->x, (u64 *)public_key, ndigits);
+	ecc_swap_digits(pk->y, (u64 *)&public_key[nbytes], ndigits);
+
+err_free_point:
+	ecc_free_point(pk);
+out:
+	return ret;
+}
+
+int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
+		       const u8 *private_key, unsigned int private_key_len,
+		       const u8 *public_key, unsigned int public_key_len,
+		       u8 *secret, unsigned int secret_len)
+{
+	int ret = 0;
+	struct ecc_point *product, *pk;
+	u64 priv[ndigits];
+	u64 rand_z[ndigits];
+	unsigned int nbytes;
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+
+	if (!private_key || !public_key || !curve) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	get_random_bytes(rand_z, nbytes);
+
+	pk = ecc_alloc_point(ndigits);
+	if (!pk) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	product = ecc_alloc_point(ndigits);
+	if (!product) {
+		ret = -ENOMEM;
+		goto err_alloc_product;
+	}
+
+	ecc_swap_digits((const u64 *)public_key, pk->x, ndigits);
+	ecc_swap_digits((const u64 *)&public_key[nbytes], pk->y, ndigits);
+	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
+
+	ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
+
+	ecc_swap_digits(product->x, (u64 *)secret, ndigits);
+
+	if (ecc_point_is_zero(product))
+		ret = -EFAULT;
+
+	ecc_free_point(product);
+err_alloc_product:
+	ecc_free_point(pk);
+out:
+	return ret;
+}
diff --git a/crypto/ecc.h b/crypto/ecc.h
new file mode 100644
index 000000000000..663d598c7406
--- /dev/null
+++ b/crypto/ecc.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2013, Kenneth MacKay
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *  * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _CRYPTO_ECC_H
+#define _CRYPTO_ECC_H
+
+#define ECC_MAX_DIGITS	4 /* 256 */
+
+#define ECC_DIGITS_TO_BYTES_SHIFT 3
+
+/**
+ * ecc_is_key_valid() - Validate a given ECDH private key
+ *
+ * @curve_id:		id representing the curve to use
+ * @ndigits:		curve number of digits
+ * @private_key:	private key to be used for the given curve
+ * @private_key_len:	private key len
+ *
+ * Returns 0 if the key is acceptable, a negative value otherwise
+ */
+int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
+		     const u8 *private_key, unsigned int private_key_len);
+
+/**
+ * ecdh_make_pub_key() - Compute an ECC public key
+ *
+ * @curve_id:		id representing the curve to use
+ * @private_key:	pregenerated private key for the given curve
+ * @private_key_len:	length of private_key
+ * @public_key:		buffer for storing the public key generated
+ * @public_key_len:	length of the public_key buffer
+ *
+ * Returns 0 if the public key was generated successfully, a negative value
+ * if an error occurred.
+ */
+int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
+		      const u8 *private_key, unsigned int private_key_len,
+		      u8 *public_key, unsigned int public_key_len);
+
+/**
+ * crypto_ecdh_shared_secret() - Compute a shared secret
+ *
+ * @curve_id:		id representing the curve to use
+ * @private_key:	private key of part A
+ * @private_key_len:	length of private_key
+ * @public_key:		public key of counterpart B
+ * @public_key_len:	length of public_key
+ * @secret:		buffer for storing the calculated shared secret
+ * @secret_len:		length of the secret buffer
+ *
+ * Note: It is recommended that you hash the result of crypto_ecdh_shared_secret
+ * before using it for symmetric encryption or HMAC.
+ *
+ * Returns 0 if the shared secret was generated successfully, a negative value
+ * if an error occurred.
+ */
+int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
+		       const u8 *private_key, unsigned int private_key_len,
+		       const u8 *public_key, unsigned int public_key_len,
+		       u8 *secret, unsigned int secret_len);
+#endif
diff --git a/crypto/ecc_curve_defs.h b/crypto/ecc_curve_defs.h
new file mode 100644
index 000000000000..03ae5f714028
--- /dev/null
+++ b/crypto/ecc_curve_defs.h
@@ -0,0 +1,57 @@
+#ifndef _CRYTO_ECC_CURVE_DEFS_H
+#define _CRYTO_ECC_CURVE_DEFS_H
+
+struct ecc_point {
+	u64 *x;
+	u64 *y;
+	u8 ndigits;
+};
+
+struct ecc_curve {
+	char *name;
+	struct ecc_point g;
+	u64 *p;
+	u64 *n;
+};
+
+/* NIST P-192 */
+static u64 nist_p192_g_x[] = { 0xF4FF0AFD82FF1012ull, 0x7CBF20EB43A18800ull,
+				0x188DA80EB03090F6ull };
+static u64 nist_p192_g_y[] = { 0x73F977A11E794811ull, 0x631011ED6B24CDD5ull,
+				0x07192B95FFC8DA78ull };
+static u64 nist_p192_p[] = { 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFEull,
+				0xFFFFFFFFFFFFFFFFull };
+static u64 nist_p192_n[] = { 0x146BC9B1B4D22831ull, 0xFFFFFFFF99DEF836ull,
+				0xFFFFFFFFFFFFFFFFull };
+static struct ecc_curve nist_p192 = {
+	.name = "nist_192",
+	.g = {
+		.x = nist_p192_g_x,
+		.y = nist_p192_g_y,
+		.ndigits = 3,
+	},
+	.p = nist_p192_p,
+	.n = nist_p192_n
+};
+
+/* NIST P-256 */
+static u64 nist_p256_g_x[] = { 0xF4A13945D898C296ull, 0x77037D812DEB33A0ull,
+				0xF8BCE6E563A440F2ull, 0x6B17D1F2E12C4247ull };
+static u64 nist_p256_g_y[] = { 0xCBB6406837BF51F5ull, 0x2BCE33576B315ECEull,
+				0x8EE7EB4A7C0F9E16ull, 0x4FE342E2FE1A7F9Bull };
+static u64 nist_p256_p[] = { 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull,
+				0x0000000000000000ull, 0xFFFFFFFF00000001ull };
+static u64 nist_p256_n[] = { 0xF3B9CAC2FC632551ull, 0xBCE6FAADA7179E84ull,
+				0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00000000ull };
+static struct ecc_curve nist_p256 = {
+	.name = "nist_256",
+	.g = {
+		.x = nist_p256_g_x,
+		.y = nist_p256_g_y,
+		.ndigits = 4,
+	},
+	.p = nist_p256_p,
+	.n = nist_p256_n
+};
+
+#endif
diff --git a/crypto/ecdh.c b/crypto/ecdh.c
new file mode 100644
index 000000000000..3de289806d67
--- /dev/null
+++ b/crypto/ecdh.c
@@ -0,0 +1,151 @@
+/* ECDH key-agreement protocol
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvator Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/kpp.h>
+#include <crypto/ecdh.h>
+#include <linux/scatterlist.h>
+#include "ecc.h"
+
+struct ecdh_ctx {
+	unsigned int curve_id;
+	unsigned int ndigits;
+	u64 private_key[ECC_MAX_DIGITS];
+	u64 public_key[2 * ECC_MAX_DIGITS];
+	u64 shared_secret[ECC_MAX_DIGITS];
+};
+
+static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
+{
+	return kpp_tfm_ctx(tfm);
+}
+
+static unsigned int ecdh_supported_curve(unsigned int curve_id)
+{
+	switch (curve_id) {
+	case ECC_CURVE_NIST_P192: return 3;
+	case ECC_CURVE_NIST_P256: return 4;
+	default: return 0;
+	}
+}
+
+static int ecdh_set_secret(struct crypto_kpp *tfm, void *buf, unsigned int len)
+{
+	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
+	struct ecdh params;
+	unsigned int ndigits;
+
+	if (crypto_ecdh_decode_key(buf, len, &params) < 0)
+		return -EINVAL;
+
+	ndigits = ecdh_supported_curve(params.curve_id);
+	if (!ndigits)
+		return -EINVAL;
+
+	ctx->curve_id = params.curve_id;
+	ctx->ndigits = ndigits;
+
+	if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits,
+			     (const u8 *)params.key, params.key_size) < 0)
+		return -EINVAL;
+
+	memcpy(ctx->private_key, params.key, params.key_size);
+
+	return 0;
+}
+
+static int ecdh_compute_value(struct kpp_request *req)
+{
+	int ret = 0;
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
+	size_t copied, nbytes;
+	void *buf;
+
+	nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	if (req->src) {
+		copied = sg_copy_to_buffer(req->src, 1, ctx->public_key,
+					   2 * nbytes);
+		if (copied != 2 * nbytes)
+			return -EINVAL;
+
+		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
+					 (const u8 *)ctx->private_key, nbytes,
+					 (const u8 *)ctx->public_key, 2 * nbytes,
+					 (u8 *)ctx->shared_secret, nbytes);
+
+		buf = ctx->shared_secret;
+	} else {
+		ret = ecdh_make_pub_key(ctx->curve_id, ctx->ndigits,
+					(const u8 *)ctx->private_key, nbytes,
+					(u8 *)ctx->public_key,
+					sizeof(ctx->public_key));
+		buf = ctx->public_key;
+		/* Public part is a point thus it has both coordinates */
+		nbytes *= 2;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
+	if (copied != nbytes)
+		return -EINVAL;
+
+	return ret;
+}
+
+static int ecdh_max_size(struct crypto_kpp *tfm)
+{
+	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
+	int nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	/* Public key is made of two coordinates */
+	return 2 * nbytes;
+}
+
+static void no_exit_tfm(struct crypto_kpp *tfm)
+{
+	return;
+}
+
+static struct kpp_alg ecdh = {
+	.set_secret = ecdh_set_secret,
+	.generate_public_key = ecdh_compute_value,
+	.compute_shared_secret = ecdh_compute_value,
+	.max_size = ecdh_max_size,
+	.exit = no_exit_tfm,
+	.base = {
+		.cra_name = "ecdh",
+		.cra_driver_name = "ecdh-generic",
+		.cra_priority = 100,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct ecdh_ctx),
+	},
+};
+
+static int ecdh_init(void)
+{
+	return crypto_register_kpp(&ecdh);
+}
+
+static void ecdh_exit(void)
+{
+	crypto_unregister_kpp(&ecdh);
+}
+
+module_init(ecdh_init);
+module_exit(ecdh_exit);
+MODULE_ALIAS_CRYPTO("ecdh");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ECDH generic algorithm");
diff --git a/crypto/ecdh_helper.c b/crypto/ecdh_helper.c
new file mode 100644
index 000000000000..3cd8a2414e60
--- /dev/null
+++ b/crypto/ecdh_helper.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <crypto/ecdh.h>
+#include <crypto/kpp.h>
+
+#define ECDH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 2 * sizeof(short))
+
+static inline u8 *ecdh_pack_data(void *dst, const void *src, size_t sz)
+{
+	memcpy(dst, src, sz);
+	return dst + sz;
+}
+
+static inline const u8 *ecdh_unpack_data(void *dst, const void *src, size_t sz)
+{
+	memcpy(dst, src, sz);
+	return src + sz;
+}
+
+int crypto_ecdh_key_len(const struct ecdh *params)
+{
+	return ECDH_KPP_SECRET_MIN_SIZE + params->key_size;
+}
+EXPORT_SYMBOL_GPL(crypto_ecdh_key_len);
+
+int crypto_ecdh_encode_key(char *buf, unsigned int len,
+			   const struct ecdh *params)
+{
+	u8 *ptr = buf;
+	struct kpp_secret secret = {
+		.type = CRYPTO_KPP_SECRET_TYPE_ECDH,
+		.len = len
+	};
+
+	if (unlikely(!buf))
+		return -EINVAL;
+
+	if (len != crypto_ecdh_key_len(params))
+		return -EINVAL;
+
+	ptr = ecdh_pack_data(ptr, &secret, sizeof(secret));
+	ptr = ecdh_pack_data(ptr, &params->curve_id, sizeof(params->curve_id));
+	ptr = ecdh_pack_data(ptr, &params->key_size, sizeof(params->key_size));
+	ecdh_pack_data(ptr, params->key, params->key_size);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_ecdh_encode_key);
+
+int crypto_ecdh_decode_key(const char *buf, unsigned int len,
+			   struct ecdh *params)
+{
+	const u8 *ptr = buf;
+	struct kpp_secret secret;
+
+	if (unlikely(!buf || len < ECDH_KPP_SECRET_MIN_SIZE))
+		return -EINVAL;
+
+	ptr = ecdh_unpack_data(&secret, ptr, sizeof(secret));
+	if (secret.type != CRYPTO_KPP_SECRET_TYPE_ECDH)
+		return -EINVAL;
+
+	ptr = ecdh_unpack_data(&params->curve_id, ptr, sizeof(params->curve_id));
+	ptr = ecdh_unpack_data(&params->key_size, ptr, sizeof(params->key_size));
+	if (secret.len != crypto_ecdh_key_len(params))
+		return -EINVAL;
+
+	/* Don't allocate memory. Set pointer to data
+	 * within the given buffer
+	 */
+	params->key = (void *)ptr;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_ecdh_decode_key);
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index b96a84560b67..1b01fe98e91f 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -20,6 +20,7 @@
 
 #include <crypto/internal/geniv.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -112,13 +113,16 @@ static int echainiv_encrypt(struct aead_request *req)
 	info = req->iv;
 
 	if (req->src != req->dst) {
-		struct blkcipher_desc desc = {
-			.tfm = ctx->null,
-		};
+		SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
 
-		err = crypto_blkcipher_encrypt(
-			&desc, req->dst, req->src,
-			req->assoclen + req->cryptlen);
+		skcipher_request_set_tfm(nreq, ctx->sknull);
+		skcipher_request_set_callback(nreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(nreq, req->src, req->dst,
+					   req->assoclen + req->cryptlen,
+					   NULL);
+
+		err = crypto_skcipher_encrypt(nreq);
 		if (err)
 			return err;
 	}
diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c
deleted file mode 100644
index 16dda72fc4f8..000000000000
--- a/crypto/eseqiv.c
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * eseqiv: Encrypted Sequence Number IV Generator
- *
- * This generator generates an IV based on a sequence number by xoring it
- * with a salt and then encrypting it with the same key as used to encrypt
- * the plain text.  This algorithm requires that the block size be equal
- * to the IV size.  It is mainly useful for CBC.
- *
- * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/skcipher.h>
-#include <crypto/rng.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-
-struct eseqiv_request_ctx {
-	struct scatterlist src[2];
-	struct scatterlist dst[2];
-	char tail[];
-};
-
-struct eseqiv_ctx {
-	spinlock_t lock;
-	unsigned int reqoff;
-	char salt[];
-};
-
-static void eseqiv_complete2(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct eseqiv_request_ctx *reqctx = skcipher_givcrypt_reqctx(req);
-
-	memcpy(req->giv, PTR_ALIGN((u8 *)reqctx->tail,
-			 crypto_ablkcipher_alignmask(geniv) + 1),
-	       crypto_ablkcipher_ivsize(geniv));
-}
-
-static void eseqiv_complete(struct crypto_async_request *base, int err)
-{
-	struct skcipher_givcrypt_request *req = base->data;
-
-	if (err)
-		goto out;
-
-	eseqiv_complete2(req);
-
-out:
-	skcipher_givcrypt_complete(req, err);
-}
-
-static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct eseqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	struct eseqiv_request_ctx *reqctx = skcipher_givcrypt_reqctx(req);
-	struct ablkcipher_request *subreq;
-	crypto_completion_t compl;
-	void *data;
-	struct scatterlist *osrc, *odst;
-	struct scatterlist *dst;
-	struct page *srcp;
-	struct page *dstp;
-	u8 *giv;
-	u8 *vsrc;
-	u8 *vdst;
-	__be64 seq;
-	unsigned int ivsize;
-	unsigned int len;
-	int err;
-
-	subreq = (void *)(reqctx->tail + ctx->reqoff);
-	ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv));
-
-	giv = req->giv;
-	compl = req->creq.base.complete;
-	data = req->creq.base.data;
-
-	osrc = req->creq.src;
-	odst = req->creq.dst;
-	srcp = sg_page(osrc);
-	dstp = sg_page(odst);
-	vsrc = PageHighMem(srcp) ? NULL : page_address(srcp) + osrc->offset;
-	vdst = PageHighMem(dstp) ? NULL : page_address(dstp) + odst->offset;
-
-	ivsize = crypto_ablkcipher_ivsize(geniv);
-
-	if (vsrc != giv + ivsize && vdst != giv + ivsize) {
-		giv = PTR_ALIGN((u8 *)reqctx->tail,
-				crypto_ablkcipher_alignmask(geniv) + 1);
-		compl = eseqiv_complete;
-		data = req;
-	}
-
-	ablkcipher_request_set_callback(subreq, req->creq.base.flags, compl,
-					data);
-
-	sg_init_table(reqctx->src, 2);
-	sg_set_buf(reqctx->src, giv, ivsize);
-	scatterwalk_crypto_chain(reqctx->src, osrc, vsrc == giv + ivsize, 2);
-
-	dst = reqctx->src;
-	if (osrc != odst) {
-		sg_init_table(reqctx->dst, 2);
-		sg_set_buf(reqctx->dst, giv, ivsize);
-		scatterwalk_crypto_chain(reqctx->dst, odst, vdst == giv + ivsize, 2);
-
-		dst = reqctx->dst;
-	}
-
-	ablkcipher_request_set_crypt(subreq, reqctx->src, dst,
-				     req->creq.nbytes + ivsize,
-				     req->creq.info);
-
-	memcpy(req->creq.info, ctx->salt, ivsize);
-
-	len = ivsize;
-	if (ivsize > sizeof(u64)) {
-		memset(req->giv, 0, ivsize - sizeof(u64));
-		len = sizeof(u64);
-	}
-	seq = cpu_to_be64(req->seq);
-	memcpy(req->giv + ivsize - len, &seq, len);
-
-	err = crypto_ablkcipher_encrypt(subreq);
-	if (err)
-		goto out;
-
-	if (giv != req->giv)
-		eseqiv_complete2(req);
-
-out:
-	return err;
-}
-
-static int eseqiv_init(struct crypto_tfm *tfm)
-{
-	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
-	struct eseqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	unsigned long alignmask;
-	unsigned int reqsize;
-	int err;
-
-	spin_lock_init(&ctx->lock);
-
-	alignmask = crypto_tfm_ctx_alignment() - 1;
-	reqsize = sizeof(struct eseqiv_request_ctx);
-
-	if (alignmask & reqsize) {
-		alignmask &= reqsize;
-		alignmask--;
-	}
-
-	alignmask = ~alignmask;
-	alignmask &= crypto_ablkcipher_alignmask(geniv);
-
-	reqsize += alignmask;
-	reqsize += crypto_ablkcipher_ivsize(geniv);
-	reqsize = ALIGN(reqsize, crypto_tfm_ctx_alignment());
-
-	ctx->reqoff = reqsize - sizeof(struct eseqiv_request_ctx);
-
-	tfm->crt_ablkcipher.reqsize = reqsize +
-				      sizeof(struct ablkcipher_request);
-
-	err = 0;
-	if (!crypto_get_default_rng()) {
-		crypto_ablkcipher_crt(geniv)->givencrypt = eseqiv_givencrypt;
-		err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
-					   crypto_ablkcipher_ivsize(geniv));
-		crypto_put_default_rng();
-	}
-
-	return err ?: skcipher_geniv_init(tfm);
-}
-
-static struct crypto_template eseqiv_tmpl;
-
-static struct crypto_instance *eseqiv_alloc(struct rtattr **tb)
-{
-	struct crypto_instance *inst;
-	int err;
-
-	inst = skcipher_geniv_alloc(&eseqiv_tmpl, tb, 0, 0);
-	if (IS_ERR(inst))
-		goto out;
-
-	err = -EINVAL;
-	if (inst->alg.cra_ablkcipher.ivsize != inst->alg.cra_blocksize)
-		goto free_inst;
-
-	inst->alg.cra_init = eseqiv_init;
-	inst->alg.cra_exit = skcipher_geniv_exit;
-
-	inst->alg.cra_ctxsize = sizeof(struct eseqiv_ctx);
-	inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize;
-
-out:
-	return inst;
-
-free_inst:
-	skcipher_geniv_free(inst);
-	inst = ERR_PTR(err);
-	goto out;
-}
-
-static struct crypto_template eseqiv_tmpl = {
-	.name = "eseqiv",
-	.alloc = eseqiv_alloc,
-	.free = skcipher_geniv_free,
-	.module = THIS_MODULE,
-};
-
-static int __init eseqiv_module_init(void)
-{
-	return crypto_register_template(&eseqiv_tmpl);
-}
-
-static void __exit eseqiv_module_exit(void)
-{
-	crypto_unregister_template(&eseqiv_tmpl);
-}
-
-module_init(eseqiv_module_init);
-module_exit(eseqiv_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Encrypted Sequence Number IV Generator");
-MODULE_ALIAS_CRYPTO("eseqiv");
diff --git a/crypto/gcm.c b/crypto/gcm.c
index bec329b3de8d..70a892e87ccb 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -29,7 +29,7 @@ struct gcm_instance_ctx {
 };
 
 struct crypto_gcm_ctx {
-	struct crypto_ablkcipher *ctr;
+	struct crypto_skcipher *ctr;
 	struct crypto_ahash *ghash;
 };
 
@@ -50,7 +50,7 @@ struct crypto_rfc4543_instance_ctx {
 
 struct crypto_rfc4543_ctx {
 	struct crypto_aead *child;
-	struct crypto_blkcipher *null;
+	struct crypto_skcipher *null;
 	u8 nonce[4];
 };
 
@@ -74,7 +74,7 @@ struct crypto_gcm_req_priv_ctx {
 	struct crypto_gcm_ghash_ctx ghash_ctx;
 	union {
 		struct ahash_request ahreq;
-		struct ablkcipher_request abreq;
+		struct skcipher_request skreq;
 	} u;
 };
 
@@ -114,7 +114,7 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 {
 	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_ahash *ghash = ctx->ghash;
-	struct crypto_ablkcipher *ctr = ctx->ctr;
+	struct crypto_skcipher *ctr = ctx->ctr;
 	struct {
 		be128 hash;
 		u8 iv[8];
@@ -122,35 +122,35 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 		struct crypto_gcm_setkey_result result;
 
 		struct scatterlist sg[1];
-		struct ablkcipher_request req;
+		struct skcipher_request req;
 	} *data;
 	int err;
 
-	crypto_ablkcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
-					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(ctr, key, keylen);
-	crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctr) &
+	crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(ctr, key, keylen);
+	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) &
 				    CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
-	data = kzalloc(sizeof(*data) + crypto_ablkcipher_reqsize(ctr),
+	data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(ctr),
 		       GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
 	init_completion(&data->result.completion);
 	sg_init_one(data->sg, &data->hash, sizeof(data->hash));
-	ablkcipher_request_set_tfm(&data->req, ctr);
-	ablkcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP |
-						    CRYPTO_TFM_REQ_MAY_BACKLOG,
-					crypto_gcm_setkey_done,
-					&data->result);
-	ablkcipher_request_set_crypt(&data->req, data->sg, data->sg,
-				     sizeof(data->hash), data->iv);
-
-	err = crypto_ablkcipher_encrypt(&data->req);
+	skcipher_request_set_tfm(&data->req, ctr);
+	skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+						  CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      crypto_gcm_setkey_done,
+				      &data->result);
+	skcipher_request_set_crypt(&data->req, data->sg, data->sg,
+				   sizeof(data->hash), data->iv);
+
+	err = crypto_skcipher_encrypt(&data->req);
 	if (err == -EINPROGRESS || err == -EBUSY) {
 		err = wait_for_completion_interruptible(
 			&data->result.completion);
@@ -223,13 +223,13 @@ static void crypto_gcm_init_crypt(struct aead_request *req,
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-	struct ablkcipher_request *ablk_req = &pctx->u.abreq;
+	struct skcipher_request *skreq = &pctx->u.skreq;
 	struct scatterlist *dst;
 
 	dst = req->src == req->dst ? pctx->src : pctx->dst;
 
-	ablkcipher_request_set_tfm(ablk_req, ctx->ctr);
-	ablkcipher_request_set_crypt(ablk_req, pctx->src, dst,
+	skcipher_request_set_tfm(skreq, ctx->ctr);
+	skcipher_request_set_crypt(skreq, pctx->src, dst,
 				     cryptlen + sizeof(pctx->auth_tag),
 				     pctx->iv);
 }
@@ -494,14 +494,14 @@ out:
 static int crypto_gcm_encrypt(struct aead_request *req)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-	struct ablkcipher_request *abreq = &pctx->u.abreq;
+	struct skcipher_request *skreq = &pctx->u.skreq;
 	u32 flags = aead_request_flags(req);
 
 	crypto_gcm_init_common(req);
 	crypto_gcm_init_crypt(req, req->cryptlen);
-	ablkcipher_request_set_callback(abreq, flags, gcm_encrypt_done, req);
+	skcipher_request_set_callback(skreq, flags, gcm_encrypt_done, req);
 
-	return crypto_ablkcipher_encrypt(abreq) ?:
+	return crypto_skcipher_encrypt(skreq) ?:
 	       gcm_encrypt_continue(req, flags);
 }
 
@@ -533,12 +533,12 @@ static void gcm_decrypt_done(struct crypto_async_request *areq, int err)
 static int gcm_dec_hash_continue(struct aead_request *req, u32 flags)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-	struct ablkcipher_request *abreq = &pctx->u.abreq;
+	struct skcipher_request *skreq = &pctx->u.skreq;
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
 
 	crypto_gcm_init_crypt(req, gctx->cryptlen);
-	ablkcipher_request_set_callback(abreq, flags, gcm_decrypt_done, req);
-	return crypto_ablkcipher_decrypt(abreq) ?: crypto_gcm_verify(req);
+	skcipher_request_set_callback(skreq, flags, gcm_decrypt_done, req);
+	return crypto_skcipher_decrypt(skreq) ?: crypto_gcm_verify(req);
 }
 
 static int crypto_gcm_decrypt(struct aead_request *req)
@@ -566,7 +566,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm)
 	struct aead_instance *inst = aead_alg_instance(tfm);
 	struct gcm_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(tfm);
-	struct crypto_ablkcipher *ctr;
+	struct crypto_skcipher *ctr;
 	struct crypto_ahash *ghash;
 	unsigned long align;
 	int err;
@@ -575,7 +575,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(ghash))
 		return PTR_ERR(ghash);
 
-	ctr = crypto_spawn_skcipher(&ictx->ctr);
+	ctr = crypto_spawn_skcipher2(&ictx->ctr);
 	err = PTR_ERR(ctr);
 	if (IS_ERR(ctr))
 		goto err_free_hash;
@@ -587,8 +587,8 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm)
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
 	crypto_aead_set_reqsize(tfm,
 		align + offsetof(struct crypto_gcm_req_priv_ctx, u) +
-		max(sizeof(struct ablkcipher_request) +
-		    crypto_ablkcipher_reqsize(ctr),
+		max(sizeof(struct skcipher_request) +
+		    crypto_skcipher_reqsize(ctr),
 		    sizeof(struct ahash_request) +
 		    crypto_ahash_reqsize(ghash)));
 
@@ -604,7 +604,7 @@ static void crypto_gcm_exit_tfm(struct crypto_aead *tfm)
 	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_ahash(ctx->ghash);
-	crypto_free_ablkcipher(ctx->ctr);
+	crypto_free_skcipher(ctx->ctr);
 }
 
 static void crypto_gcm_free(struct aead_instance *inst)
@@ -624,7 +624,7 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 {
 	struct crypto_attr_type *algt;
 	struct aead_instance *inst;
-	struct crypto_alg *ctr;
+	struct skcipher_alg *ctr;
 	struct crypto_alg *ghash_alg;
 	struct hash_alg_common *ghash;
 	struct gcm_instance_ctx *ctx;
@@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 
 	ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
 				    CRYPTO_ALG_TYPE_HASH,
-				    CRYPTO_ALG_TYPE_AHASH_MASK);
+				    CRYPTO_ALG_TYPE_AHASH_MASK |
+				    crypto_requires_sync(algt->type,
+							 algt->mask));
 	if (IS_ERR(ghash_alg))
 		return PTR_ERR(ghash_alg);
 
@@ -661,41 +663,42 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 		goto err_drop_ghash;
 
 	crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->ctr, ctr_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+	err = crypto_grab_skcipher2(&ctx->ctr, ctr_name, 0,
+				    crypto_requires_sync(algt->type,
+							 algt->mask));
 	if (err)
 		goto err_drop_ghash;
 
-	ctr = crypto_skcipher_spawn_alg(&ctx->ctr);
+	ctr = crypto_spawn_skcipher_alg(&ctx->ctr);
 
 	/* We only support 16-byte blocks. */
-	if (ctr->cra_ablkcipher.ivsize != 16)
+	if (crypto_skcipher_alg_ivsize(ctr) != 16)
 		goto out_put_ctr;
 
 	/* Not a stream cipher? */
 	err = -EINVAL;
-	if (ctr->cra_blocksize != 1)
+	if (ctr->base.cra_blocksize != 1)
 		goto out_put_ctr;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "gcm_base(%s,%s)", ctr->cra_driver_name,
+		     "gcm_base(%s,%s)", ctr->base.cra_driver_name,
 		     ghash_alg->cra_driver_name) >=
 	    CRYPTO_MAX_ALG_NAME)
 		goto out_put_ctr;
 
 	memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
 
-	inst->alg.base.cra_flags = (ghash->base.cra_flags | ctr->cra_flags) &
-				   CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_flags = (ghash->base.cra_flags |
+				    ctr->base.cra_flags) & CRYPTO_ALG_ASYNC;
 	inst->alg.base.cra_priority = (ghash->base.cra_priority +
-				       ctr->cra_priority) / 2;
+				       ctr->base.cra_priority) / 2;
 	inst->alg.base.cra_blocksize = 1;
 	inst->alg.base.cra_alignmask = ghash->base.cra_alignmask |
-				       ctr->cra_alignmask;
+				       ctr->base.cra_alignmask;
 	inst->alg.base.cra_ctxsize = sizeof(struct crypto_gcm_ctx);
 	inst->alg.ivsize = 12;
+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(ctr);
 	inst->alg.maxauthsize = 16;
 	inst->alg.init = crypto_gcm_init_tfm;
 	inst->alg.exit = crypto_gcm_exit_tfm;
@@ -980,6 +983,7 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
 	inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4106_ctx);
 
 	inst->alg.ivsize = 8;
+	inst->alg.chunksize = crypto_aead_alg_chunksize(alg);
 	inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg);
 
 	inst->alg.init = crypto_rfc4106_init_tfm;
@@ -1084,11 +1088,13 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
 	unsigned int authsize = crypto_aead_authsize(aead);
 	unsigned int nbytes = req->assoclen + req->cryptlen -
 			      (enc ? 0 : authsize);
-	struct blkcipher_desc desc = {
-		.tfm = ctx->null,
-	};
+	SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
 
-	return crypto_blkcipher_encrypt(&desc, req->dst, req->src, nbytes);
+	skcipher_request_set_tfm(nreq, ctx->null);
+	skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL);
+	skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL);
+
+	return crypto_skcipher_encrypt(nreq);
 }
 
 static int crypto_rfc4543_encrypt(struct aead_request *req)
@@ -1108,7 +1114,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
 	struct crypto_aead_spawn *spawn = &ictx->aead;
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_aead *aead;
-	struct crypto_blkcipher *null;
+	struct crypto_skcipher *null;
 	unsigned long align;
 	int err = 0;
 
@@ -1116,7 +1122,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(aead))
 		return PTR_ERR(aead);
 
-	null = crypto_get_default_null_skcipher();
+	null = crypto_get_default_null_skcipher2();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_aead;
@@ -1144,7 +1150,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm)
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher();
+	crypto_put_default_null_skcipher2();
 }
 
 static void crypto_rfc4543_free(struct aead_instance *inst)
@@ -1219,6 +1225,7 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
 	inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4543_ctx);
 
 	inst->alg.ivsize = 8;
+	inst->alg.chunksize = crypto_aead_alg_chunksize(alg);
 	inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg);
 
 	inst->alg.init = crypto_rfc4543_init_tfm;
diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c
index 597cedd3531c..c4938497eedb 100644
--- a/crypto/jitterentropy-kcapi.c
+++ b/crypto/jitterentropy-kcapi.c
@@ -87,24 +87,28 @@ void jent_memcpy(void *dest, const void *src, unsigned int n)
 	memcpy(dest, src, n);
 }
 
+/*
+ * Obtain a high-resolution time stamp value. The time stamp is used to measure
+ * the execution time of a given code path and its variations. Hence, the time
+ * stamp must have a sufficiently high resolution.
+ *
+ * Note, if the function returns zero because a given architecture does not
+ * implement a high-resolution time stamp, the RNG code's runtime test
+ * will detect it and will not produce output.
+ */
 void jent_get_nstime(__u64 *out)
 {
-	struct timespec ts;
 	__u64 tmp = 0;
 
 	tmp = random_get_entropy();
 
 	/*
-	 * If random_get_entropy does not return a value (which is possible on,
-	 * for example, MIPS), invoke __getnstimeofday
+	 * If random_get_entropy does not return a value, i.e. it is not
+	 * implemented for a given architecture, use a clock source.
 	 * hoping that there are timers we can work with.
 	 */
-	if ((0 == tmp) &&
-	   (0 == __getnstimeofday(&ts))) {
-		tmp = ts.tv_sec;
-		tmp = tmp << 32;
-		tmp = tmp | ts.tv_nsec;
-	}
+	if (tmp == 0)
+		tmp = ktime_get_ns();
 
 	*out = tmp;
 }
diff --git a/crypto/kpp.c b/crypto/kpp.c
new file mode 100644
index 000000000000..d36ce05eee43
--- /dev/null
+++ b/crypto/kpp.c
@@ -0,0 +1,123 @@
+/*
+ * Key-agreement Protocol Primitives (KPP)
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <linux/cryptouser.h>
+#include <net/netlink.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/kpp.h>
+#include "internal.h"
+
+#ifdef CONFIG_NET
+static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_kpp rkpp;
+
+	strncpy(rkpp.type, "kpp", sizeof(rkpp.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_KPP,
+		    sizeof(struct crypto_report_kpp), &rkpp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
+static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	return -ENOSYS;
+}
+#endif
+
+static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+
+static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	seq_puts(m, "type         : kpp\n");
+}
+
+static void crypto_kpp_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_kpp *kpp = __crypto_kpp_tfm(tfm);
+	struct kpp_alg *alg = crypto_kpp_alg(kpp);
+
+	alg->exit(kpp);
+}
+
+static int crypto_kpp_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_kpp *kpp = __crypto_kpp_tfm(tfm);
+	struct kpp_alg *alg = crypto_kpp_alg(kpp);
+
+	if (alg->exit)
+		kpp->base.exit = crypto_kpp_exit_tfm;
+
+	if (alg->init)
+		return alg->init(kpp);
+
+	return 0;
+}
+
+static const struct crypto_type crypto_kpp_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_kpp_init_tfm,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_kpp_show,
+#endif
+	.report = crypto_kpp_report,
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_KPP,
+	.tfmsize = offsetof(struct crypto_kpp, base),
+};
+
+struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_kpp_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_kpp);
+
+static void kpp_prepare_alg(struct kpp_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+
+	base->cra_type = &crypto_kpp_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_KPP;
+}
+
+int crypto_register_kpp(struct kpp_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+
+	kpp_prepare_alg(alg);
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_kpp);
+
+void crypto_unregister_kpp(struct kpp_alg *alg)
+{
+	crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_kpp);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Key-agreement Protocol Primitives");
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index c4eb9da49d4f..86fb59b109a9 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -41,7 +41,7 @@ struct mcryptd_flush_list {
 static struct mcryptd_flush_list __percpu *mcryptd_flist;
 
 struct hashd_instance_ctx {
-	struct crypto_shash_spawn spawn;
+	struct crypto_ahash_spawn spawn;
 	struct mcryptd_queue *queue;
 };
 
@@ -272,18 +272,18 @@ static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
 	struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
-	struct crypto_shash_spawn *spawn = &ictx->spawn;
+	struct crypto_ahash_spawn *spawn = &ictx->spawn;
 	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_shash *hash;
+	struct crypto_ahash *hash;
 
-	hash = crypto_spawn_shash(spawn);
+	hash = crypto_spawn_ahash(spawn);
 	if (IS_ERR(hash))
 		return PTR_ERR(hash);
 
 	ctx->child = hash;
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct mcryptd_hash_request_ctx) +
-				 crypto_shash_descsize(hash));
+				 crypto_ahash_reqsize(hash));
 	return 0;
 }
 
@@ -291,21 +291,21 @@ static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
 {
 	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_shash(ctx->child);
+	crypto_free_ahash(ctx->child);
 }
 
 static int mcryptd_hash_setkey(struct crypto_ahash *parent,
 				   const u8 *key, unsigned int keylen)
 {
 	struct mcryptd_hash_ctx *ctx   = crypto_ahash_ctx(parent);
-	struct crypto_shash *child = ctx->child;
+	struct crypto_ahash *child = ctx->child;
 	int err;
 
-	crypto_shash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_shash_set_flags(child, crypto_ahash_get_flags(parent) &
+	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) &
 				      CRYPTO_TFM_REQ_MASK);
-	err = crypto_shash_setkey(child, key, keylen);
-	crypto_ahash_set_flags(parent, crypto_shash_get_flags(child) &
+	err = crypto_ahash_setkey(child, key, keylen);
+	crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) &
 				       CRYPTO_TFM_RES_MASK);
 	return err;
 }
@@ -331,20 +331,20 @@ static int mcryptd_hash_enqueue(struct ahash_request *req,
 static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
 {
 	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
-	struct crypto_shash *child = ctx->child;
+	struct crypto_ahash *child = ctx->child;
 	struct ahash_request *req = ahash_request_cast(req_async);
 	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-	struct shash_desc *desc = &rctx->desc;
+	struct ahash_request *desc = &rctx->areq;
 
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 
-	desc->tfm = child;
-	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	ahash_request_set_tfm(desc, child);
+	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
+						rctx->complete, req_async);
 
-	err = crypto_shash_init(desc);
-
-	req->base.complete = rctx->complete;
+	rctx->out = req->result;
+	err = crypto_ahash_init(desc);
 
 out:
 	local_bh_disable();
@@ -365,7 +365,8 @@ static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 
-	err = shash_ahash_mcryptd_update(req, &rctx->desc);
+	rctx->out = req->result;
+	err = ahash_mcryptd_update(&rctx->areq);
 	if (err) {
 		req->base.complete = rctx->complete;
 		goto out;
@@ -391,7 +392,8 @@ static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 
-	err = shash_ahash_mcryptd_final(req, &rctx->desc);
+	rctx->out = req->result;
+	err = ahash_mcryptd_final(&rctx->areq);
 	if (err) {
 		req->base.complete = rctx->complete;
 		goto out;
@@ -416,8 +418,8 @@ static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
 
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
-
-	err = shash_ahash_mcryptd_finup(req, &rctx->desc);
+	rctx->out = req->result;
+	err = ahash_mcryptd_finup(&rctx->areq);
 
 	if (err) {
 		req->base.complete = rctx->complete;
@@ -439,25 +441,21 @@ static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
 static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
 {
 	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
-	struct crypto_shash *child = ctx->child;
+	struct crypto_ahash *child = ctx->child;
 	struct ahash_request *req = ahash_request_cast(req_async);
 	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-	struct shash_desc *desc = &rctx->desc;
+	struct ahash_request *desc = &rctx->areq;
 
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 
-	desc->tfm = child;
-	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;  /* check this again */
-
-	err = shash_ahash_mcryptd_digest(req, desc);
+	ahash_request_set_tfm(desc, child);
+	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
+						rctx->complete, req_async);
 
-	if (err) {
-		req->base.complete = rctx->complete;
-		goto out;
-	}
+	rctx->out = req->result;
+	err = ahash_mcryptd_digest(desc);
 
-	return;
 out:
 	local_bh_disable();
 	rctx->complete(&req->base, err);
@@ -473,14 +471,14 @@ static int mcryptd_hash_export(struct ahash_request *req, void *out)
 {
 	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
 
-	return crypto_shash_export(&rctx->desc, out);
+	return crypto_ahash_export(&rctx->areq, out);
 }
 
 static int mcryptd_hash_import(struct ahash_request *req, const void *in)
 {
 	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
 
-	return crypto_shash_import(&rctx->desc, in);
+	return crypto_ahash_import(&rctx->areq, in);
 }
 
 static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
@@ -488,7 +486,7 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 {
 	struct hashd_instance_ctx *ctx;
 	struct ahash_instance *inst;
-	struct shash_alg *salg;
+	struct hash_alg_common *halg;
 	struct crypto_alg *alg;
 	u32 type = 0;
 	u32 mask = 0;
@@ -496,11 +494,11 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 
 	mcryptd_check_internal(tb, &type, &mask);
 
-	salg = shash_attr_alg(tb[1], type, mask);
-	if (IS_ERR(salg))
-		return PTR_ERR(salg);
+	halg = ahash_attr_alg(tb[1], type, mask);
+	if (IS_ERR(halg))
+		return PTR_ERR(halg);
 
-	alg = &salg->base;
+	alg = &halg->base;
 	pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
 	inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
 					sizeof(*ctx));
@@ -511,7 +509,7 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	ctx = ahash_instance_ctx(inst);
 	ctx->queue = queue;
 
-	err = crypto_init_shash_spawn(&ctx->spawn, salg,
+	err = crypto_init_ahash_spawn(&ctx->spawn, halg,
 				      ahash_crypto_instance(inst));
 	if (err)
 		goto out_free_inst;
@@ -521,8 +519,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 		type |= CRYPTO_ALG_INTERNAL;
 	inst->alg.halg.base.cra_flags = type;
 
-	inst->alg.halg.digestsize = salg->digestsize;
-	inst->alg.halg.statesize = salg->statesize;
+	inst->alg.halg.digestsize = halg->digestsize;
+	inst->alg.halg.statesize = halg->statesize;
 	inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
 
 	inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
@@ -539,7 +537,7 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 
 	err = ahash_register_instance(tmpl, inst);
 	if (err) {
-		crypto_drop_shash(&ctx->spawn);
+		crypto_drop_ahash(&ctx->spawn);
 out_free_inst:
 		kfree(inst);
 	}
@@ -575,7 +573,7 @@ static void mcryptd_free(struct crypto_instance *inst)
 
 	switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_AHASH:
-		crypto_drop_shash(&hctx->spawn);
+		crypto_drop_ahash(&hctx->spawn);
 		kfree(ahash_instance(inst));
 		return;
 	default:
@@ -612,55 +610,38 @@ struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
 }
 EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
 
-int shash_ahash_mcryptd_digest(struct ahash_request *req,
-			       struct shash_desc *desc)
+int ahash_mcryptd_digest(struct ahash_request *desc)
 {
 	int err;
 
-	err = crypto_shash_init(desc) ?:
-	      shash_ahash_mcryptd_finup(req, desc);
+	err = crypto_ahash_init(desc) ?:
+	      ahash_mcryptd_finup(desc);
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_digest);
 
-int shash_ahash_mcryptd_update(struct ahash_request *req,
-			       struct shash_desc *desc)
+int ahash_mcryptd_update(struct ahash_request *desc)
 {
-	struct crypto_shash *tfm = desc->tfm;
-	struct shash_alg *shash = crypto_shash_alg(tfm);
-
 	/* alignment is to be done by multi-buffer crypto algorithm if needed */
 
-	return shash->update(desc, NULL, 0);
+	return crypto_ahash_update(desc);
 }
-EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_update);
 
-int shash_ahash_mcryptd_finup(struct ahash_request *req,
-			      struct shash_desc *desc)
+int ahash_mcryptd_finup(struct ahash_request *desc)
 {
-	struct crypto_shash *tfm = desc->tfm;
-	struct shash_alg *shash = crypto_shash_alg(tfm);
-
 	/* alignment is to be done by multi-buffer crypto algorithm if needed */
 
-	return shash->finup(desc, NULL, 0, req->result);
+	return crypto_ahash_finup(desc);
 }
-EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_finup);
 
-int shash_ahash_mcryptd_final(struct ahash_request *req,
-			      struct shash_desc *desc)
+int ahash_mcryptd_final(struct ahash_request *desc)
 {
-	struct crypto_shash *tfm = desc->tfm;
-	struct shash_alg *shash = crypto_shash_alg(tfm);
-
 	/* alignment is to be done by multi-buffer crypto algorithm if needed */
 
-	return shash->final(desc, req->result);
+	return crypto_ahash_final(desc);
 }
-EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_final);
 
-struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
+struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
 {
 	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
 
@@ -668,12 +649,12 @@ struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
 }
 EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
 
-struct shash_desc *mcryptd_shash_desc(struct ahash_request *req)
+struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req)
 {
 	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-	return &rctx->desc;
+	return &rctx->areq;
 }
-EXPORT_SYMBOL_GPL(mcryptd_shash_desc);
+EXPORT_SYMBOL_GPL(mcryptd_ahash_desc);
 
 void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
 {
@@ -681,7 +662,6 @@ void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
 }
 EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
 
-
 static int __init mcryptd_init(void)
 {
 	int err, cpu;
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index ead8dc0d084e..877019a6d3ea 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -92,60 +92,66 @@ static const struct rsa_asn1_template *rsa_lookup_asn1(const char *name)
 
 struct pkcs1pad_ctx {
 	struct crypto_akcipher *child;
-	const char *hash_name;
 	unsigned int key_size;
 };
 
 struct pkcs1pad_inst_ctx {
 	struct crypto_akcipher_spawn spawn;
-	const char *hash_name;
+	const struct rsa_asn1_template *digest_info;
 };
 
 struct pkcs1pad_request {
-	struct akcipher_request child_req;
-
-	struct scatterlist in_sg[3], out_sg[2];
+	struct scatterlist in_sg[2], out_sg[1];
 	uint8_t *in_buf, *out_buf;
+	struct akcipher_request child_req;
 };
 
 static int pkcs1pad_set_pub_key(struct crypto_akcipher *tfm, const void *key,
 		unsigned int keylen)
 {
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
-	int err, size;
+	int err;
+
+	ctx->key_size = 0;
 
 	err = crypto_akcipher_set_pub_key(ctx->child, key, keylen);
+	if (err)
+		return err;
 
-	if (!err) {
-		/* Find out new modulus size from rsa implementation */
-		size = crypto_akcipher_maxsize(ctx->child);
+	/* Find out new modulus size from rsa implementation */
+	err = crypto_akcipher_maxsize(ctx->child);
+	if (err < 0)
+		return err;
 
-		ctx->key_size = size > 0 ? size : 0;
-		if (size <= 0)
-			err = size;
-	}
+	if (err > PAGE_SIZE)
+		return -ENOTSUPP;
 
-	return err;
+	ctx->key_size = err;
+	return 0;
 }
 
 static int pkcs1pad_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 		unsigned int keylen)
 {
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
-	int err, size;
+	int err;
+
+	ctx->key_size = 0;
 
 	err = crypto_akcipher_set_priv_key(ctx->child, key, keylen);
+	if (err)
+		return err;
 
-	if (!err) {
-		/* Find out new modulus size from rsa implementation */
-		size = crypto_akcipher_maxsize(ctx->child);
+	/* Find out new modulus size from rsa implementation */
+	err = crypto_akcipher_maxsize(ctx->child);
+	if (err < 0)
+		return err;
 
-		ctx->key_size = size > 0 ? size : 0;
-		if (size <= 0)
-			err = size;
-	}
+	if (err > PAGE_SIZE)
+		return -ENOTSUPP;
 
-	return err;
+	ctx->key_size = err;
+	return 0;
 }
 
 static int pkcs1pad_get_max_size(struct crypto_akcipher *tfm)
@@ -164,19 +170,10 @@ static int pkcs1pad_get_max_size(struct crypto_akcipher *tfm)
 static void pkcs1pad_sg_set_buf(struct scatterlist *sg, void *buf, size_t len,
 		struct scatterlist *next)
 {
-	int nsegs = next ? 1 : 0;
-
-	if (offset_in_page(buf) + len <= PAGE_SIZE) {
-		nsegs += 1;
-		sg_init_table(sg, nsegs);
-		sg_set_buf(sg, buf, len);
-	} else {
-		nsegs += 2;
-		sg_init_table(sg, nsegs);
-		sg_set_buf(sg + 0, buf, PAGE_SIZE - offset_in_page(buf));
-		sg_set_buf(sg + 1, buf + PAGE_SIZE - offset_in_page(buf),
-				offset_in_page(buf) + len - PAGE_SIZE);
-	}
+	int nsegs = next ? 2 : 1;
+
+	sg_init_table(sg, nsegs);
+	sg_set_buf(sg, buf, len);
 
 	if (next)
 		sg_chain(sg, nsegs, next);
@@ -187,37 +184,36 @@ static int pkcs1pad_encrypt_sign_complete(struct akcipher_request *req, int err)
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req);
-	size_t pad_len = ctx->key_size - req_ctx->child_req.dst_len;
-	size_t chunk_len, pad_left;
-	struct sg_mapping_iter miter;
-
-	if (!err) {
-		if (pad_len) {
-			sg_miter_start(&miter, req->dst,
-					sg_nents_for_len(req->dst, pad_len),
-					SG_MITER_ATOMIC | SG_MITER_TO_SG);
-
-			pad_left = pad_len;
-			while (pad_left) {
-				sg_miter_next(&miter);
-
-				chunk_len = min(miter.length, pad_left);
-				memset(miter.addr, 0, chunk_len);
-				pad_left -= chunk_len;
-			}
-
-			sg_miter_stop(&miter);
-		}
-
-		sg_pcopy_from_buffer(req->dst,
-				sg_nents_for_len(req->dst, ctx->key_size),
-				req_ctx->out_buf, req_ctx->child_req.dst_len,
-				pad_len);
-	}
+	unsigned int pad_len;
+	unsigned int len;
+	u8 *out_buf;
+
+	if (err)
+		goto out;
+
+	len = req_ctx->child_req.dst_len;
+	pad_len = ctx->key_size - len;
+
+	/* Four billion to one */
+	if (likely(!pad_len))
+		goto out;
+
+	out_buf = kzalloc(ctx->key_size, GFP_ATOMIC);
+	err = -ENOMEM;
+	if (!out_buf)
+		goto out;
+
+	sg_copy_to_buffer(req->dst, sg_nents_for_len(req->dst, len),
+			  out_buf + pad_len, len);
+	sg_copy_from_buffer(req->dst,
+			    sg_nents_for_len(req->dst, ctx->key_size),
+			    out_buf, ctx->key_size);
+	kzfree(out_buf);
+
+out:
 	req->dst_len = ctx->key_size;
 
 	kfree(req_ctx->in_buf);
-	kzfree(req_ctx->out_buf);
 
 	return err;
 }
@@ -257,21 +253,8 @@ static int pkcs1pad_encrypt(struct akcipher_request *req)
 		return -EOVERFLOW;
 	}
 
-	if (ctx->key_size > PAGE_SIZE)
-		return -ENOTSUPP;
-
-	/*
-	 * Replace both input and output to add the padding in the input and
-	 * the potential missing leading zeros in the output.
-	 */
-	req_ctx->child_req.src = req_ctx->in_sg;
-	req_ctx->child_req.src_len = ctx->key_size - 1;
-	req_ctx->child_req.dst = req_ctx->out_sg;
-	req_ctx->child_req.dst_len = ctx->key_size;
-
 	req_ctx->in_buf = kmalloc(ctx->key_size - 1 - req->src_len,
-			(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-			GFP_KERNEL : GFP_ATOMIC);
+				  GFP_KERNEL);
 	if (!req_ctx->in_buf)
 		return -ENOMEM;
 
@@ -284,9 +267,7 @@ static int pkcs1pad_encrypt(struct akcipher_request *req)
 	pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
 			ctx->key_size - 1 - req->src_len, req->src);
 
-	req_ctx->out_buf = kmalloc(ctx->key_size,
-			(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-			GFP_KERNEL : GFP_ATOMIC);
+	req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL);
 	if (!req_ctx->out_buf) {
 		kfree(req_ctx->in_buf);
 		return -ENOMEM;
@@ -299,6 +280,10 @@ static int pkcs1pad_encrypt(struct akcipher_request *req)
 	akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
 			pkcs1pad_encrypt_sign_complete_cb, req);
 
+	/* Reuse output buffer */
+	akcipher_request_set_crypt(&req_ctx->child_req, req_ctx->in_sg,
+				   req->dst, ctx->key_size - 1, req->dst_len);
+
 	err = crypto_akcipher_encrypt(&req_ctx->child_req);
 	if (err != -EINPROGRESS &&
 			(err != -EBUSY ||
@@ -380,18 +365,7 @@ static int pkcs1pad_decrypt(struct akcipher_request *req)
 	if (!ctx->key_size || req->src_len != ctx->key_size)
 		return -EINVAL;
 
-	if (ctx->key_size > PAGE_SIZE)
-		return -ENOTSUPP;
-
-	/* Reuse input buffer, output to a new buffer */
-	req_ctx->child_req.src = req->src;
-	req_ctx->child_req.src_len = req->src_len;
-	req_ctx->child_req.dst = req_ctx->out_sg;
-	req_ctx->child_req.dst_len = ctx->key_size ;
-
-	req_ctx->out_buf = kmalloc(ctx->key_size,
-			(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-			GFP_KERNEL : GFP_ATOMIC);
+	req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL);
 	if (!req_ctx->out_buf)
 		return -ENOMEM;
 
@@ -402,6 +376,11 @@ static int pkcs1pad_decrypt(struct akcipher_request *req)
 	akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
 			pkcs1pad_decrypt_complete_cb, req);
 
+	/* Reuse input buffer, output to a new buffer */
+	akcipher_request_set_crypt(&req_ctx->child_req, req->src,
+				   req_ctx->out_sg, req->src_len,
+				   ctx->key_size);
+
 	err = crypto_akcipher_decrypt(&req_ctx->child_req);
 	if (err != -EINPROGRESS &&
 			(err != -EBUSY ||
@@ -416,20 +395,16 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req);
-	const struct rsa_asn1_template *digest_info = NULL;
+	struct akcipher_instance *inst = akcipher_alg_instance(tfm);
+	struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst);
+	const struct rsa_asn1_template *digest_info = ictx->digest_info;
 	int err;
 	unsigned int ps_end, digest_size = 0;
 
 	if (!ctx->key_size)
 		return -EINVAL;
 
-	if (ctx->hash_name) {
-		digest_info = rsa_lookup_asn1(ctx->hash_name);
-		if (!digest_info)
-			return -EINVAL;
-
-		digest_size = digest_info->size;
-	}
+	digest_size = digest_info->size;
 
 	if (req->src_len + digest_size > ctx->key_size - 11)
 		return -EOVERFLOW;
@@ -439,21 +414,8 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 		return -EOVERFLOW;
 	}
 
-	if (ctx->key_size > PAGE_SIZE)
-		return -ENOTSUPP;
-
-	/*
-	 * Replace both input and output to add the padding in the input and
-	 * the potential missing leading zeros in the output.
-	 */
-	req_ctx->child_req.src = req_ctx->in_sg;
-	req_ctx->child_req.src_len = ctx->key_size - 1;
-	req_ctx->child_req.dst = req_ctx->out_sg;
-	req_ctx->child_req.dst_len = ctx->key_size;
-
 	req_ctx->in_buf = kmalloc(ctx->key_size - 1 - req->src_len,
-			(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-			GFP_KERNEL : GFP_ATOMIC);
+				  GFP_KERNEL);
 	if (!req_ctx->in_buf)
 		return -ENOMEM;
 
@@ -462,29 +424,20 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 	memset(req_ctx->in_buf + 1, 0xff, ps_end - 1);
 	req_ctx->in_buf[ps_end] = 0x00;
 
-	if (digest_info) {
-		memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data,
-		       digest_info->size);
-	}
+	memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data,
+	       digest_info->size);
 
 	pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
 			ctx->key_size - 1 - req->src_len, req->src);
 
-	req_ctx->out_buf = kmalloc(ctx->key_size,
-			(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-			GFP_KERNEL : GFP_ATOMIC);
-	if (!req_ctx->out_buf) {
-		kfree(req_ctx->in_buf);
-		return -ENOMEM;
-	}
-
-	pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf,
-			ctx->key_size, NULL);
-
 	akcipher_request_set_tfm(&req_ctx->child_req, ctx->child);
 	akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
 			pkcs1pad_encrypt_sign_complete_cb, req);
 
+	/* Reuse output buffer */
+	akcipher_request_set_crypt(&req_ctx->child_req, req_ctx->in_sg,
+				   req->dst, ctx->key_size - 1, req->dst_len);
+
 	err = crypto_akcipher_sign(&req_ctx->child_req);
 	if (err != -EINPROGRESS &&
 			(err != -EBUSY ||
@@ -499,56 +452,58 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req);
-	const struct rsa_asn1_template *digest_info;
+	struct akcipher_instance *inst = akcipher_alg_instance(tfm);
+	struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst);
+	const struct rsa_asn1_template *digest_info = ictx->digest_info;
+	unsigned int dst_len;
 	unsigned int pos;
-
-	if (err == -EOVERFLOW)
-		/* Decrypted value had no leading 0 byte */
-		err = -EINVAL;
+	u8 *out_buf;
 
 	if (err)
 		goto done;
 
-	if (req_ctx->child_req.dst_len != ctx->key_size - 1) {
-		err = -EINVAL;
+	err = -EINVAL;
+	dst_len = req_ctx->child_req.dst_len;
+	if (dst_len < ctx->key_size - 1)
 		goto done;
+
+	out_buf = req_ctx->out_buf;
+	if (dst_len == ctx->key_size) {
+		if (out_buf[0] != 0x00)
+			/* Decrypted value had no leading 0 byte */
+			goto done;
+
+		dst_len--;
+		out_buf++;
 	}
 
 	err = -EBADMSG;
-	if (req_ctx->out_buf[0] != 0x01)
+	if (out_buf[0] != 0x01)
 		goto done;
 
-	for (pos = 1; pos < req_ctx->child_req.dst_len; pos++)
-		if (req_ctx->out_buf[pos] != 0xff)
+	for (pos = 1; pos < dst_len; pos++)
+		if (out_buf[pos] != 0xff)
 			break;
 
-	if (pos < 9 || pos == req_ctx->child_req.dst_len ||
-	    req_ctx->out_buf[pos] != 0x00)
+	if (pos < 9 || pos == dst_len || out_buf[pos] != 0x00)
 		goto done;
 	pos++;
 
-	if (ctx->hash_name) {
-		digest_info = rsa_lookup_asn1(ctx->hash_name);
-		if (!digest_info)
-			goto done;
-
-		if (memcmp(req_ctx->out_buf + pos, digest_info->data,
-			   digest_info->size))
-			goto done;
+	if (memcmp(out_buf + pos, digest_info->data, digest_info->size))
+		goto done;
 
-		pos += digest_info->size;
-	}
+	pos += digest_info->size;
 
 	err = 0;
 
-	if (req->dst_len < req_ctx->child_req.dst_len - pos)
+	if (req->dst_len < dst_len - pos)
 		err = -EOVERFLOW;
-	req->dst_len = req_ctx->child_req.dst_len - pos;
+	req->dst_len = dst_len - pos;
 
 	if (!err)
 		sg_copy_from_buffer(req->dst,
 				sg_nents_for_len(req->dst, req->dst_len),
-				req_ctx->out_buf + pos, req->dst_len);
+				out_buf + pos, req->dst_len);
 done:
 	kzfree(req_ctx->out_buf);
 
@@ -588,18 +543,7 @@ static int pkcs1pad_verify(struct akcipher_request *req)
 	if (!ctx->key_size || req->src_len < ctx->key_size)
 		return -EINVAL;
 
-	if (ctx->key_size > PAGE_SIZE)
-		return -ENOTSUPP;
-
-	/* Reuse input buffer, output to a new buffer */
-	req_ctx->child_req.src = req->src;
-	req_ctx->child_req.src_len = req->src_len;
-	req_ctx->child_req.dst = req_ctx->out_sg;
-	req_ctx->child_req.dst_len = ctx->key_size;
-
-	req_ctx->out_buf = kmalloc(ctx->key_size,
-			(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-			GFP_KERNEL : GFP_ATOMIC);
+	req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL);
 	if (!req_ctx->out_buf)
 		return -ENOMEM;
 
@@ -610,6 +554,11 @@ static int pkcs1pad_verify(struct akcipher_request *req)
 	akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
 			pkcs1pad_verify_complete_cb, req);
 
+	/* Reuse input buffer, output to a new buffer */
+	akcipher_request_set_crypt(&req_ctx->child_req, req->src,
+				   req_ctx->out_sg, req->src_len,
+				   ctx->key_size);
+
 	err = crypto_akcipher_verify(&req_ctx->child_req);
 	if (err != -EINPROGRESS &&
 			(err != -EBUSY ||
@@ -626,12 +575,11 @@ static int pkcs1pad_init_tfm(struct crypto_akcipher *tfm)
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct crypto_akcipher *child_tfm;
 
-	child_tfm = crypto_spawn_akcipher(akcipher_instance_ctx(inst));
+	child_tfm = crypto_spawn_akcipher(&ictx->spawn);
 	if (IS_ERR(child_tfm))
 		return PTR_ERR(child_tfm);
 
 	ctx->child = child_tfm;
-	ctx->hash_name = ictx->hash_name;
 	return 0;
 }
 
@@ -648,12 +596,12 @@ static void pkcs1pad_free(struct akcipher_instance *inst)
 	struct crypto_akcipher_spawn *spawn = &ctx->spawn;
 
 	crypto_drop_akcipher(spawn);
-	kfree(ctx->hash_name);
 	kfree(inst);
 }
 
 static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
+	const struct rsa_asn1_template *digest_info;
 	struct crypto_attr_type *algt;
 	struct akcipher_instance *inst;
 	struct pkcs1pad_inst_ctx *ctx;
@@ -676,7 +624,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	hash_name = crypto_attr_alg_name(tb[2]);
 	if (IS_ERR(hash_name))
-		hash_name = NULL;
+		return PTR_ERR(hash_name);
+
+	digest_info = rsa_lookup_asn1(hash_name);
+	if (!digest_info)
+		return -EINVAL;
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
 	if (!inst)
@@ -684,7 +636,7 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	ctx = akcipher_instance_ctx(inst);
 	spawn = &ctx->spawn;
-	ctx->hash_name = hash_name ? kstrdup(hash_name, GFP_KERNEL) : NULL;
+	ctx->digest_info = digest_info;
 
 	crypto_set_spawn(&spawn->base, akcipher_crypto_instance(inst));
 	err = crypto_grab_akcipher(spawn, rsa_alg_name, 0,
@@ -696,27 +648,14 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	err = -ENAMETOOLONG;
 
-	if (!hash_name) {
-		if (snprintf(inst->alg.base.cra_name,
-			     CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
-			     rsa_alg->base.cra_name) >=
-					CRYPTO_MAX_ALG_NAME ||
-		    snprintf(inst->alg.base.cra_driver_name,
-			     CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
-			     rsa_alg->base.cra_driver_name) >=
-					CRYPTO_MAX_ALG_NAME)
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "pkcs1pad(%s,%s)", rsa_alg->base.cra_name, hash_name) >=
+	    CRYPTO_MAX_ALG_NAME ||
+	    snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "pkcs1pad(%s,%s)",
+		     rsa_alg->base.cra_driver_name, hash_name) >=
+	    CRYPTO_MAX_ALG_NAME)
 		goto out_drop_alg;
-	} else {
-		if (snprintf(inst->alg.base.cra_name,
-			     CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)",
-			     rsa_alg->base.cra_name, hash_name) >=
-				CRYPTO_MAX_ALG_NAME ||
-		    snprintf(inst->alg.base.cra_driver_name,
-			     CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)",
-			     rsa_alg->base.cra_driver_name, hash_name) >=
-					CRYPTO_MAX_ALG_NAME)
-		goto out_free_hash;
-	}
 
 	inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC;
 	inst->alg.base.cra_priority = rsa_alg->base.cra_priority;
@@ -738,12 +677,10 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	err = akcipher_register_instance(tmpl, inst);
 	if (err)
-		goto out_free_hash;
+		goto out_drop_alg;
 
 	return 0;
 
-out_free_hash:
-	kfree(ctx->hash_name);
 out_drop_alg:
 	crypto_drop_akcipher(spawn);
 out_free_inst:
diff --git a/crypto/rsa.c b/crypto/rsa.c
index 77d737f52147..4c280b6a3ea9 100644
--- a/crypto/rsa.c
+++ b/crypto/rsa.c
@@ -10,16 +10,23 @@
  */
 
 #include <linux/module.h>
+#include <linux/mpi.h>
 #include <crypto/internal/rsa.h>
 #include <crypto/internal/akcipher.h>
 #include <crypto/akcipher.h>
 #include <crypto/algapi.h>
 
+struct rsa_mpi_key {
+	MPI n;
+	MPI e;
+	MPI d;
+};
+
 /*
  * RSAEP function [RFC3447 sec 5.1.1]
  * c = m^e mod n;
  */
-static int _rsa_enc(const struct rsa_key *key, MPI c, MPI m)
+static int _rsa_enc(const struct rsa_mpi_key *key, MPI c, MPI m)
 {
 	/* (1) Validate 0 <= m < n */
 	if (mpi_cmp_ui(m, 0) < 0 || mpi_cmp(m, key->n) >= 0)
@@ -33,7 +40,7 @@ static int _rsa_enc(const struct rsa_key *key, MPI c, MPI m)
  * RSADP function [RFC3447 sec 5.1.2]
  * m = c^d mod n;
  */
-static int _rsa_dec(const struct rsa_key *key, MPI m, MPI c)
+static int _rsa_dec(const struct rsa_mpi_key *key, MPI m, MPI c)
 {
 	/* (1) Validate 0 <= c < n */
 	if (mpi_cmp_ui(c, 0) < 0 || mpi_cmp(c, key->n) >= 0)
@@ -47,7 +54,7 @@ static int _rsa_dec(const struct rsa_key *key, MPI m, MPI c)
  * RSASP1 function [RFC3447 sec 5.2.1]
  * s = m^d mod n
  */
-static int _rsa_sign(const struct rsa_key *key, MPI s, MPI m)
+static int _rsa_sign(const struct rsa_mpi_key *key, MPI s, MPI m)
 {
 	/* (1) Validate 0 <= m < n */
 	if (mpi_cmp_ui(m, 0) < 0 || mpi_cmp(m, key->n) >= 0)
@@ -61,7 +68,7 @@ static int _rsa_sign(const struct rsa_key *key, MPI s, MPI m)
  * RSAVP1 function [RFC3447 sec 5.2.2]
  * m = s^e mod n;
  */
-static int _rsa_verify(const struct rsa_key *key, MPI m, MPI s)
+static int _rsa_verify(const struct rsa_mpi_key *key, MPI m, MPI s)
 {
 	/* (1) Validate 0 <= s < n */
 	if (mpi_cmp_ui(s, 0) < 0 || mpi_cmp(s, key->n) >= 0)
@@ -71,7 +78,7 @@ static int _rsa_verify(const struct rsa_key *key, MPI m, MPI s)
 	return mpi_powm(m, s, key->e, key->n);
 }
 
-static inline struct rsa_key *rsa_get_key(struct crypto_akcipher *tfm)
+static inline struct rsa_mpi_key *rsa_get_key(struct crypto_akcipher *tfm)
 {
 	return akcipher_tfm_ctx(tfm);
 }
@@ -79,7 +86,7 @@ static inline struct rsa_key *rsa_get_key(struct crypto_akcipher *tfm)
 static int rsa_enc(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	const struct rsa_key *pkey = rsa_get_key(tfm);
+	const struct rsa_mpi_key *pkey = rsa_get_key(tfm);
 	MPI m, c = mpi_alloc(0);
 	int ret = 0;
 	int sign;
@@ -101,7 +108,7 @@ static int rsa_enc(struct akcipher_request *req)
 	if (ret)
 		goto err_free_m;
 
-	ret = mpi_write_to_sgl(c, req->dst, &req->dst_len, &sign);
+	ret = mpi_write_to_sgl(c, req->dst, req->dst_len, &sign);
 	if (ret)
 		goto err_free_m;
 
@@ -118,7 +125,7 @@ err_free_c:
 static int rsa_dec(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	const struct rsa_key *pkey = rsa_get_key(tfm);
+	const struct rsa_mpi_key *pkey = rsa_get_key(tfm);
 	MPI c, m = mpi_alloc(0);
 	int ret = 0;
 	int sign;
@@ -140,7 +147,7 @@ static int rsa_dec(struct akcipher_request *req)
 	if (ret)
 		goto err_free_c;
 
-	ret = mpi_write_to_sgl(m, req->dst, &req->dst_len, &sign);
+	ret = mpi_write_to_sgl(m, req->dst, req->dst_len, &sign);
 	if (ret)
 		goto err_free_c;
 
@@ -156,7 +163,7 @@ err_free_m:
 static int rsa_sign(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	const struct rsa_key *pkey = rsa_get_key(tfm);
+	const struct rsa_mpi_key *pkey = rsa_get_key(tfm);
 	MPI m, s = mpi_alloc(0);
 	int ret = 0;
 	int sign;
@@ -178,7 +185,7 @@ static int rsa_sign(struct akcipher_request *req)
 	if (ret)
 		goto err_free_m;
 
-	ret = mpi_write_to_sgl(s, req->dst, &req->dst_len, &sign);
+	ret = mpi_write_to_sgl(s, req->dst, req->dst_len, &sign);
 	if (ret)
 		goto err_free_m;
 
@@ -195,7 +202,7 @@ err_free_s:
 static int rsa_verify(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	const struct rsa_key *pkey = rsa_get_key(tfm);
+	const struct rsa_mpi_key *pkey = rsa_get_key(tfm);
 	MPI s, m = mpi_alloc(0);
 	int ret = 0;
 	int sign;
@@ -219,7 +226,7 @@ static int rsa_verify(struct akcipher_request *req)
 	if (ret)
 		goto err_free_s;
 
-	ret = mpi_write_to_sgl(m, req->dst, &req->dst_len, &sign);
+	ret = mpi_write_to_sgl(m, req->dst, req->dst_len, &sign);
 	if (ret)
 		goto err_free_s;
 
@@ -233,6 +240,16 @@ err_free_m:
 	return ret;
 }
 
+static void rsa_free_mpi_key(struct rsa_mpi_key *key)
+{
+	mpi_free(key->d);
+	mpi_free(key->e);
+	mpi_free(key->n);
+	key->d = NULL;
+	key->e = NULL;
+	key->n = NULL;
+}
+
 static int rsa_check_key_length(unsigned int len)
 {
 	switch (len) {
@@ -251,49 +268,87 @@ static int rsa_check_key_length(unsigned int len)
 static int rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
 			   unsigned int keylen)
 {
-	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+	struct rsa_mpi_key *mpi_key = akcipher_tfm_ctx(tfm);
+	struct rsa_key raw_key = {0};
 	int ret;
 
-	ret = rsa_parse_pub_key(pkey, key, keylen);
+	/* Free the old MPI key if any */
+	rsa_free_mpi_key(mpi_key);
+
+	ret = rsa_parse_pub_key(&raw_key, key, keylen);
 	if (ret)
 		return ret;
 
-	if (rsa_check_key_length(mpi_get_size(pkey->n) << 3)) {
-		rsa_free_key(pkey);
-		ret = -EINVAL;
+	mpi_key->e = mpi_read_raw_data(raw_key.e, raw_key.e_sz);
+	if (!mpi_key->e)
+		goto err;
+
+	mpi_key->n = mpi_read_raw_data(raw_key.n, raw_key.n_sz);
+	if (!mpi_key->n)
+		goto err;
+
+	if (rsa_check_key_length(mpi_get_size(mpi_key->n) << 3)) {
+		rsa_free_mpi_key(mpi_key);
+		return -EINVAL;
 	}
-	return ret;
+
+	return 0;
+
+err:
+	rsa_free_mpi_key(mpi_key);
+	return -ENOMEM;
 }
 
 static int rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 			    unsigned int keylen)
 {
-	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+	struct rsa_mpi_key *mpi_key = akcipher_tfm_ctx(tfm);
+	struct rsa_key raw_key = {0};
 	int ret;
 
-	ret = rsa_parse_priv_key(pkey, key, keylen);
+	/* Free the old MPI key if any */
+	rsa_free_mpi_key(mpi_key);
+
+	ret = rsa_parse_priv_key(&raw_key, key, keylen);
 	if (ret)
 		return ret;
 
-	if (rsa_check_key_length(mpi_get_size(pkey->n) << 3)) {
-		rsa_free_key(pkey);
-		ret = -EINVAL;
+	mpi_key->d = mpi_read_raw_data(raw_key.d, raw_key.d_sz);
+	if (!mpi_key->d)
+		goto err;
+
+	mpi_key->e = mpi_read_raw_data(raw_key.e, raw_key.e_sz);
+	if (!mpi_key->e)
+		goto err;
+
+	mpi_key->n = mpi_read_raw_data(raw_key.n, raw_key.n_sz);
+	if (!mpi_key->n)
+		goto err;
+
+	if (rsa_check_key_length(mpi_get_size(mpi_key->n) << 3)) {
+		rsa_free_mpi_key(mpi_key);
+		return -EINVAL;
 	}
-	return ret;
+
+	return 0;
+
+err:
+	rsa_free_mpi_key(mpi_key);
+	return -ENOMEM;
 }
 
 static int rsa_max_size(struct crypto_akcipher *tfm)
 {
-	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+	struct rsa_mpi_key *pkey = akcipher_tfm_ctx(tfm);
 
 	return pkey->n ? mpi_get_size(pkey->n) : -EINVAL;
 }
 
 static void rsa_exit_tfm(struct crypto_akcipher *tfm)
 {
-	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+	struct rsa_mpi_key *pkey = akcipher_tfm_ctx(tfm);
 
-	rsa_free_key(pkey);
+	rsa_free_mpi_key(pkey);
 }
 
 static struct akcipher_alg rsa = {
@@ -310,7 +365,7 @@ static struct akcipher_alg rsa = {
 		.cra_driver_name = "rsa-generic",
 		.cra_priority = 100,
 		.cra_module = THIS_MODULE,
-		.cra_ctxsize = sizeof(struct rsa_key),
+		.cra_ctxsize = sizeof(struct rsa_mpi_key),
 	},
 };
 
diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c
index d226f48d0907..4df6451e7543 100644
--- a/crypto/rsa_helper.c
+++ b/crypto/rsa_helper.c
@@ -22,20 +22,29 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
 	      const void *value, size_t vlen)
 {
 	struct rsa_key *key = context;
+	const u8 *ptr = value;
+	size_t n_sz = vlen;
 
-	key->n = mpi_read_raw_data(value, vlen);
-
-	if (!key->n)
-		return -ENOMEM;
-
-	/* In FIPS mode only allow key size 2K & 3K */
-	if (fips_enabled && (mpi_get_size(key->n) != 256 &&
-			     mpi_get_size(key->n) != 384)) {
-		pr_err("RSA: key size not allowed in FIPS mode\n");
-		mpi_free(key->n);
-		key->n = NULL;
+	/* invalid key provided */
+	if (!value || !vlen)
 		return -EINVAL;
+
+	if (fips_enabled) {
+		while (!*ptr && n_sz) {
+			ptr++;
+			n_sz--;
+		}
+
+		/* In FIPS mode only allow key size 2K & 3K */
+		if (n_sz != 256 && n_sz != 384) {
+			pr_err("RSA: key size not allowed in FIPS mode\n");
+			return -EINVAL;
+		}
 	}
+
+	key->n = value;
+	key->n_sz = vlen;
+
 	return 0;
 }
 
@@ -44,10 +53,12 @@ int rsa_get_e(void *context, size_t hdrlen, unsigned char tag,
 {
 	struct rsa_key *key = context;
 
-	key->e = mpi_read_raw_data(value, vlen);
+	/* invalid key provided */
+	if (!value || !key->n_sz || !vlen || vlen > key->n_sz)
+		return -EINVAL;
 
-	if (!key->e)
-		return -ENOMEM;
+	key->e = value;
+	key->e_sz = vlen;
 
 	return 0;
 }
@@ -57,46 +68,95 @@ int rsa_get_d(void *context, size_t hdrlen, unsigned char tag,
 {
 	struct rsa_key *key = context;
 
-	key->d = mpi_read_raw_data(value, vlen);
+	/* invalid key provided */
+	if (!value || !key->n_sz || !vlen || vlen > key->n_sz)
+		return -EINVAL;
 
-	if (!key->d)
-		return -ENOMEM;
+	key->d = value;
+	key->d_sz = vlen;
 
-	/* In FIPS mode only allow key size 2K & 3K */
-	if (fips_enabled && (mpi_get_size(key->d) != 256 &&
-			     mpi_get_size(key->d) != 384)) {
-		pr_err("RSA: key size not allowed in FIPS mode\n");
-		mpi_free(key->d);
-		key->d = NULL;
+	return 0;
+}
+
+int rsa_get_p(void *context, size_t hdrlen, unsigned char tag,
+	      const void *value, size_t vlen)
+{
+	struct rsa_key *key = context;
+
+	/* invalid key provided */
+	if (!value || !vlen || vlen > key->n_sz)
 		return -EINVAL;
-	}
+
+	key->p = value;
+	key->p_sz = vlen;
+
 	return 0;
 }
 
-static void free_mpis(struct rsa_key *key)
+int rsa_get_q(void *context, size_t hdrlen, unsigned char tag,
+	      const void *value, size_t vlen)
 {
-	mpi_free(key->n);
-	mpi_free(key->e);
-	mpi_free(key->d);
-	key->n = NULL;
-	key->e = NULL;
-	key->d = NULL;
+	struct rsa_key *key = context;
+
+	/* invalid key provided */
+	if (!value || !vlen || vlen > key->n_sz)
+		return -EINVAL;
+
+	key->q = value;
+	key->q_sz = vlen;
+
+	return 0;
 }
 
-/**
- * rsa_free_key() - frees rsa key allocated by rsa_parse_key()
- *
- * @rsa_key:	struct rsa_key key representation
- */
-void rsa_free_key(struct rsa_key *key)
+int rsa_get_dp(void *context, size_t hdrlen, unsigned char tag,
+	       const void *value, size_t vlen)
+{
+	struct rsa_key *key = context;
+
+	/* invalid key provided */
+	if (!value || !vlen || vlen > key->n_sz)
+		return -EINVAL;
+
+	key->dp = value;
+	key->dp_sz = vlen;
+
+	return 0;
+}
+
+int rsa_get_dq(void *context, size_t hdrlen, unsigned char tag,
+	       const void *value, size_t vlen)
 {
-	free_mpis(key);
+	struct rsa_key *key = context;
+
+	/* invalid key provided */
+	if (!value || !vlen || vlen > key->n_sz)
+		return -EINVAL;
+
+	key->dq = value;
+	key->dq_sz = vlen;
+
+	return 0;
+}
+
+int rsa_get_qinv(void *context, size_t hdrlen, unsigned char tag,
+		 const void *value, size_t vlen)
+{
+	struct rsa_key *key = context;
+
+	/* invalid key provided */
+	if (!value || !vlen || vlen > key->n_sz)
+		return -EINVAL;
+
+	key->qinv = value;
+	key->qinv_sz = vlen;
+
+	return 0;
 }
-EXPORT_SYMBOL_GPL(rsa_free_key);
 
 /**
- * rsa_parse_pub_key() - extracts an rsa public key from BER encoded buffer
- *			 and stores it in the provided struct rsa_key
+ * rsa_parse_pub_key() - decodes the BER encoded buffer and stores in the
+ *                       provided struct rsa_key, pointers to the raw key as is,
+ *                       so that the caller can copy it or MPI parse it, etc.
  *
  * @rsa_key:	struct rsa_key key representation
  * @key:	key in BER format
@@ -107,23 +167,15 @@ EXPORT_SYMBOL_GPL(rsa_free_key);
 int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key,
 		      unsigned int key_len)
 {
-	int ret;
-
-	free_mpis(rsa_key);
-	ret = asn1_ber_decoder(&rsapubkey_decoder, rsa_key, key, key_len);
-	if (ret < 0)
-		goto error;
-
-	return 0;
-error:
-	free_mpis(rsa_key);
-	return ret;
+	return asn1_ber_decoder(&rsapubkey_decoder, rsa_key, key, key_len);
 }
 EXPORT_SYMBOL_GPL(rsa_parse_pub_key);
 
 /**
- * rsa_parse_pub_key() - extracts an rsa private key from BER encoded buffer
- *			 and stores it in the provided struct rsa_key
+ * rsa_parse_priv_key() - decodes the BER encoded buffer and stores in the
+ *                        provided struct rsa_key, pointers to the raw key
+ *                        as is, so that the caller can copy it or MPI parse it,
+ *                        etc.
  *
  * @rsa_key:	struct rsa_key key representation
  * @key:	key in BER format
@@ -134,16 +186,6 @@ EXPORT_SYMBOL_GPL(rsa_parse_pub_key);
 int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key,
 		       unsigned int key_len)
 {
-	int ret;
-
-	free_mpis(rsa_key);
-	ret = asn1_ber_decoder(&rsaprivkey_decoder, rsa_key, key, key_len);
-	if (ret < 0)
-		goto error;
-
-	return 0;
-error:
-	free_mpis(rsa_key);
-	return ret;
+	return asn1_ber_decoder(&rsaprivkey_decoder, rsa_key, key, key_len);
 }
 EXPORT_SYMBOL_GPL(rsa_parse_priv_key);
diff --git a/crypto/rsaprivkey.asn1 b/crypto/rsaprivkey.asn1
index 731aea5edb0c..4ce06758e8af 100644
--- a/crypto/rsaprivkey.asn1
+++ b/crypto/rsaprivkey.asn1
@@ -3,9 +3,9 @@ RsaPrivKey ::= SEQUENCE {
 	n		INTEGER ({ rsa_get_n }),
 	e		INTEGER ({ rsa_get_e }),
 	d		INTEGER ({ rsa_get_d }),
-	prime1		INTEGER,
-	prime2		INTEGER,
-	exponent1	INTEGER,
-	exponent2	INTEGER,
-	coefficient	INTEGER
+	prime1		INTEGER ({ rsa_get_p }),
+	prime2		INTEGER ({ rsa_get_q }),
+	exponent1	INTEGER ({ rsa_get_dp }),
+	exponent2	INTEGER ({ rsa_get_dq }),
+	coefficient	INTEGER ({ rsa_get_qinv })
 }
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index ea5815c5e128..52ce17a3dd63 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -18,8 +18,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
 #include <linux/scatterlist.h>
 
 static inline void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out)
@@ -30,53 +28,6 @@ static inline void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out)
 	memcpy(dst, src, nbytes);
 }
 
-void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg)
-{
-	walk->sg = sg;
-
-	BUG_ON(!sg->length);
-
-	walk->offset = sg->offset;
-}
-EXPORT_SYMBOL_GPL(scatterwalk_start);
-
-void *scatterwalk_map(struct scatter_walk *walk)
-{
-	return kmap_atomic(scatterwalk_page(walk)) +
-	       offset_in_page(walk->offset);
-}
-EXPORT_SYMBOL_GPL(scatterwalk_map);
-
-static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
-				 unsigned int more)
-{
-	if (out) {
-		struct page *page;
-
-		page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT);
-		/* Test ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE first as
-		 * PageSlab cannot be optimised away per se due to
-		 * use of volatile pointer.
-		 */
-		if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE && !PageSlab(page))
-			flush_dcache_page(page);
-	}
-
-	if (more) {
-		walk->offset += PAGE_SIZE - 1;
-		walk->offset &= PAGE_MASK;
-		if (walk->offset >= walk->sg->offset + walk->sg->length)
-			scatterwalk_start(walk, sg_next(walk->sg));
-	}
-}
-
-void scatterwalk_done(struct scatter_walk *walk, int out, int more)
-{
-	if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
-		scatterwalk_pagedone(walk, out, more);
-}
-EXPORT_SYMBOL_GPL(scatterwalk_done);
-
 void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
 			    size_t nbytes, int out)
 {
@@ -87,9 +38,11 @@ void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
 		if (len_this_page > nbytes)
 			len_this_page = nbytes;
 
-		vaddr = scatterwalk_map(walk);
-		memcpy_dir(buf, vaddr, len_this_page, out);
-		scatterwalk_unmap(vaddr);
+		if (out != 2) {
+			vaddr = scatterwalk_map(walk);
+			memcpy_dir(buf, vaddr, len_this_page, out);
+			scatterwalk_unmap(vaddr);
+		}
 
 		scatterwalk_advance(walk, len_this_page);
 
@@ -99,7 +52,7 @@ void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
 		buf += len_this_page;
 		nbytes -= len_this_page;
 
-		scatterwalk_pagedone(walk, out, 1);
+		scatterwalk_pagedone(walk, out & 1, 1);
 	}
 }
 EXPORT_SYMBOL_GPL(scatterwalk_copychunks);
@@ -125,28 +78,6 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
 }
 EXPORT_SYMBOL_GPL(scatterwalk_map_and_copy);
 
-int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes)
-{
-	int offset = 0, n = 0;
-
-	/* num_bytes is too small */
-	if (num_bytes < sg->length)
-		return -1;
-
-	do {
-		offset += sg->length;
-		n++;
-		sg = sg_next(sg);
-
-		/* num_bytes is too large */
-		if (unlikely(!sg && (num_bytes < offset)))
-			return -1;
-	} while (sg && (num_bytes > offset));
-
-	return n;
-}
-EXPORT_SYMBOL_GPL(scatterwalk_bytes_sglen);
-
 struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
 				     struct scatterlist *src,
 				     unsigned int len)
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index 15a749a5cab7..c7049231861f 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -14,50 +14,17 @@
  */
 
 #include <crypto/internal/geniv.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/rng.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/string.h>
 
-struct seqiv_ctx {
-	spinlock_t lock;
-	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
-};
-
 static void seqiv_free(struct crypto_instance *inst);
 
-static void seqiv_complete2(struct skcipher_givcrypt_request *req, int err)
-{
-	struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req);
-	struct crypto_ablkcipher *geniv;
-
-	if (err == -EINPROGRESS)
-		return;
-
-	if (err)
-		goto out;
-
-	geniv = skcipher_givcrypt_reqtfm(req);
-	memcpy(req->creq.info, subreq->info, crypto_ablkcipher_ivsize(geniv));
-
-out:
-	kfree(subreq->info);
-}
-
-static void seqiv_complete(struct crypto_async_request *base, int err)
-{
-	struct skcipher_givcrypt_request *req = base->data;
-
-	seqiv_complete2(req, err);
-	skcipher_givcrypt_complete(req, err);
-}
-
 static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err)
 {
 	struct aead_request *subreq = aead_request_ctx(req);
@@ -85,65 +52,6 @@ static void seqiv_aead_encrypt_complete(struct crypto_async_request *base,
 	aead_request_complete(req, err);
 }
 
-static void seqiv_geniv(struct seqiv_ctx *ctx, u8 *info, u64 seq,
-			unsigned int ivsize)
-{
-	unsigned int len = ivsize;
-
-	if (ivsize > sizeof(u64)) {
-		memset(info, 0, ivsize - sizeof(u64));
-		len = sizeof(u64);
-	}
-	seq = cpu_to_be64(seq);
-	memcpy(info + ivsize - len, &seq, len);
-	crypto_xor(info, ctx->salt, ivsize);
-}
-
-static int seqiv_givencrypt(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct seqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req);
-	crypto_completion_t compl;
-	void *data;
-	u8 *info;
-	unsigned int ivsize;
-	int err;
-
-	ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv));
-
-	compl = req->creq.base.complete;
-	data = req->creq.base.data;
-	info = req->creq.info;
-
-	ivsize = crypto_ablkcipher_ivsize(geniv);
-
-	if (unlikely(!IS_ALIGNED((unsigned long)info,
-				 crypto_ablkcipher_alignmask(geniv) + 1))) {
-		info = kmalloc(ivsize, req->creq.base.flags &
-				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
-								  GFP_ATOMIC);
-		if (!info)
-			return -ENOMEM;
-
-		compl = seqiv_complete;
-		data = req;
-	}
-
-	ablkcipher_request_set_callback(subreq, req->creq.base.flags, compl,
-					data);
-	ablkcipher_request_set_crypt(subreq, req->creq.src, req->creq.dst,
-				     req->creq.nbytes, info);
-
-	seqiv_geniv(ctx, info, req->seq, ivsize);
-	memcpy(req->giv, info, ivsize);
-
-	err = crypto_ablkcipher_encrypt(subreq);
-	if (unlikely(info != req->creq.info))
-		seqiv_complete2(req, err);
-	return err;
-}
-
 static int seqiv_aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
@@ -165,12 +73,16 @@ static int seqiv_aead_encrypt(struct aead_request *req)
 	info = req->iv;
 
 	if (req->src != req->dst) {
-		struct blkcipher_desc desc = {
-			.tfm = ctx->null,
-		};
+		SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
+
+		skcipher_request_set_tfm(nreq, ctx->sknull);
+		skcipher_request_set_callback(nreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(nreq, req->src, req->dst,
+					   req->assoclen + req->cryptlen,
+					   NULL);
 
-		err = crypto_blkcipher_encrypt(&desc, req->dst, req->src,
-					       req->assoclen + req->cryptlen);
+		err = crypto_skcipher_encrypt(nreq);
 		if (err)
 			return err;
 	}
@@ -229,62 +141,6 @@ static int seqiv_aead_decrypt(struct aead_request *req)
 	return crypto_aead_decrypt(subreq);
 }
 
-static int seqiv_init(struct crypto_tfm *tfm)
-{
-	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
-	struct seqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	int err;
-
-	spin_lock_init(&ctx->lock);
-
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request);
-
-	err = 0;
-	if (!crypto_get_default_rng()) {
-		crypto_ablkcipher_crt(geniv)->givencrypt = seqiv_givencrypt;
-		err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
-					   crypto_ablkcipher_ivsize(geniv));
-		crypto_put_default_rng();
-	}
-
-	return err ?: skcipher_geniv_init(tfm);
-}
-
-static int seqiv_ablkcipher_create(struct crypto_template *tmpl,
-				   struct rtattr **tb)
-{
-	struct crypto_instance *inst;
-	int err;
-
-	inst = skcipher_geniv_alloc(tmpl, tb, 0, 0);
-
-	if (IS_ERR(inst))
-		return PTR_ERR(inst);
-
-	err = -EINVAL;
-	if (inst->alg.cra_ablkcipher.ivsize < sizeof(u64))
-		goto free_inst;
-
-	inst->alg.cra_init = seqiv_init;
-	inst->alg.cra_exit = skcipher_geniv_exit;
-
-	inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize;
-	inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx);
-
-	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
-
-	err = crypto_register_instance(tmpl, inst);
-	if (err)
-		goto free_inst;
-
-out:
-	return err;
-
-free_inst:
-	skcipher_geniv_free(inst);
-	goto out;
-}
-
 static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct aead_instance *inst;
@@ -330,26 +186,20 @@ free_inst:
 static int seqiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
-	int err;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
 		return PTR_ERR(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK)
-		err = seqiv_ablkcipher_create(tmpl, tb);
-	else
-		err = seqiv_aead_create(tmpl, tb);
+		return -EINVAL;
 
-	return err;
+	return seqiv_aead_create(tmpl, tb);
 }
 
 static void seqiv_free(struct crypto_instance *inst)
 {
-	if ((inst->alg.cra_flags ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK)
-		skcipher_geniv_free(inst);
-	else
-		aead_geniv_free(aead_instance(inst));
+	aead_geniv_free(aead_instance(inst));
 }
 
 static struct crypto_template seqiv_tmpl = {
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
new file mode 100644
index 000000000000..62264397a2d2
--- /dev/null
+++ b/crypto/sha3_generic.c
@@ -0,0 +1,300 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-3, as specified in
+ * http://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
+ *
+ * SHA-3 code by Jeff Garzik <jeff@garzik.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)â¢
+ * any later version.
+ *
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/sha3.h>
+#include <asm/byteorder.h>
+
+#define KECCAK_ROUNDS 24
+
+#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
+
+static const u64 keccakf_rndc[24] = {
+	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+	0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+	0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+	0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+	0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+	0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+};
+
+static const int keccakf_rotc[24] = {
+	1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
+	27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
+};
+
+static const int keccakf_piln[24] = {
+	10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4,
+	15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1
+};
+
+/* update the state with given number of rounds */
+
+static void keccakf(u64 st[25])
+{
+	int i, j, round;
+	u64 t, bc[5];
+
+	for (round = 0; round < KECCAK_ROUNDS; round++) {
+
+		/* Theta */
+		for (i = 0; i < 5; i++)
+			bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15]
+				^ st[i + 20];
+
+		for (i = 0; i < 5; i++) {
+			t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+			for (j = 0; j < 25; j += 5)
+				st[j + i] ^= t;
+		}
+
+		/* Rho Pi */
+		t = st[1];
+		for (i = 0; i < 24; i++) {
+			j = keccakf_piln[i];
+			bc[0] = st[j];
+			st[j] = ROTL64(t, keccakf_rotc[i]);
+			t = bc[0];
+		}
+
+		/* Chi */
+		for (j = 0; j < 25; j += 5) {
+			for (i = 0; i < 5; i++)
+				bc[i] = st[j + i];
+			for (i = 0; i < 5; i++)
+				st[j + i] ^= (~bc[(i + 1) % 5]) &
+					     bc[(i + 2) % 5];
+		}
+
+		/* Iota */
+		st[0] ^= keccakf_rndc[round];
+	}
+}
+
+static void sha3_init(struct sha3_state *sctx, unsigned int digest_sz)
+{
+	memset(sctx, 0, sizeof(*sctx));
+	sctx->md_len = digest_sz;
+	sctx->rsiz = 200 - 2 * digest_sz;
+	sctx->rsizw = sctx->rsiz / 8;
+}
+
+static int sha3_224_init(struct shash_desc *desc)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+
+	sha3_init(sctx, SHA3_224_DIGEST_SIZE);
+	return 0;
+}
+
+static int sha3_256_init(struct shash_desc *desc)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+
+	sha3_init(sctx, SHA3_256_DIGEST_SIZE);
+	return 0;
+}
+
+static int sha3_384_init(struct shash_desc *desc)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+
+	sha3_init(sctx, SHA3_384_DIGEST_SIZE);
+	return 0;
+}
+
+static int sha3_512_init(struct shash_desc *desc)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+
+	sha3_init(sctx, SHA3_512_DIGEST_SIZE);
+	return 0;
+}
+
+static int sha3_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+	unsigned int done;
+	const u8 *src;
+
+	done = 0;
+	src = data;
+
+	if ((sctx->partial + len) > (sctx->rsiz - 1)) {
+		if (sctx->partial) {
+			done = -sctx->partial;
+			memcpy(sctx->buf + sctx->partial, data,
+			       done + sctx->rsiz);
+			src = sctx->buf;
+		}
+
+		do {
+			unsigned int i;
+
+			for (i = 0; i < sctx->rsizw; i++)
+				sctx->st[i] ^= ((u64 *) src)[i];
+			keccakf(sctx->st);
+
+			done += sctx->rsiz;
+			src = data + done;
+		} while (done + (sctx->rsiz - 1) < len);
+
+		sctx->partial = 0;
+	}
+	memcpy(sctx->buf + sctx->partial, src, len - done);
+	sctx->partial += (len - done);
+
+	return 0;
+}
+
+static int sha3_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, inlen = sctx->partial;
+
+	sctx->buf[inlen++] = 0x06;
+	memset(sctx->buf + inlen, 0, sctx->rsiz - inlen);
+	sctx->buf[sctx->rsiz - 1] |= 0x80;
+
+	for (i = 0; i < sctx->rsizw; i++)
+		sctx->st[i] ^= ((u64 *) sctx->buf)[i];
+
+	keccakf(sctx->st);
+
+	for (i = 0; i < sctx->rsizw; i++)
+		sctx->st[i] = cpu_to_le64(sctx->st[i]);
+
+	memcpy(out, sctx->st, sctx->md_len);
+
+	memset(sctx, 0, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg sha3_224 = {
+	.digestsize	=	SHA3_224_DIGEST_SIZE,
+	.init		=	sha3_224_init,
+	.update		=	sha3_update,
+	.final		=	sha3_final,
+	.descsize	=	sizeof(struct sha3_state),
+	.base		=	{
+		.cra_name	=	"sha3-224",
+		.cra_driver_name =	"sha3-224-generic",
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA3_224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static struct shash_alg sha3_256 = {
+	.digestsize	=	SHA3_256_DIGEST_SIZE,
+	.init		=	sha3_256_init,
+	.update		=	sha3_update,
+	.final		=	sha3_final,
+	.descsize	=	sizeof(struct sha3_state),
+	.base		=	{
+		.cra_name	=	"sha3-256",
+		.cra_driver_name =	"sha3-256-generic",
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA3_256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static struct shash_alg sha3_384 = {
+	.digestsize	=	SHA3_384_DIGEST_SIZE,
+	.init		=	sha3_384_init,
+	.update		=	sha3_update,
+	.final		=	sha3_final,
+	.descsize	=	sizeof(struct sha3_state),
+	.base		=	{
+		.cra_name	=	"sha3-384",
+		.cra_driver_name =	"sha3-384-generic",
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA3_384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static struct shash_alg sha3_512 = {
+	.digestsize	=	SHA3_512_DIGEST_SIZE,
+	.init		=	sha3_512_init,
+	.update		=	sha3_update,
+	.final		=	sha3_final,
+	.descsize	=	sizeof(struct sha3_state),
+	.base		=	{
+		.cra_name	=	"sha3-512",
+		.cra_driver_name =	"sha3-512-generic",
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA3_512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init sha3_generic_mod_init(void)
+{
+	int ret;
+
+	ret = crypto_register_shash(&sha3_224);
+	if (ret < 0)
+		goto err_out;
+	ret = crypto_register_shash(&sha3_256);
+	if (ret < 0)
+		goto err_out_224;
+	ret = crypto_register_shash(&sha3_384);
+	if (ret < 0)
+		goto err_out_256;
+	ret = crypto_register_shash(&sha3_512);
+	if (ret < 0)
+		goto err_out_384;
+
+	return 0;
+
+err_out_384:
+	crypto_unregister_shash(&sha3_384);
+err_out_256:
+	crypto_unregister_shash(&sha3_256);
+err_out_224:
+	crypto_unregister_shash(&sha3_224);
+err_out:
+	return ret;
+}
+
+static void __exit sha3_generic_mod_fini(void)
+{
+	crypto_unregister_shash(&sha3_224);
+	crypto_unregister_shash(&sha3_256);
+	crypto_unregister_shash(&sha3_384);
+	crypto_unregister_shash(&sha3_512);
+}
+
+module_init(sha3_generic_mod_init);
+module_exit(sha3_generic_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-3 Secure Hash Algorithm");
+
+MODULE_ALIAS_CRYPTO("sha3-224");
+MODULE_ALIAS_CRYPTO("sha3-224-generic");
+MODULE_ALIAS_CRYPTO("sha3-256");
+MODULE_ALIAS_CRYPTO("sha3-256-generic");
+MODULE_ALIAS_CRYPTO("sha3-384");
+MODULE_ALIAS_CRYPTO("sha3-384-generic");
+MODULE_ALIAS_CRYPTO("sha3-512");
+MODULE_ALIAS_CRYPTO("sha3-512-generic");
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 69230e9d4ac9..f7d0018dcaee 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -16,7 +16,11 @@
 
 #include <crypto/internal/skcipher.h>
 #include <linux/bug.h>
+#include <linux/cryptouser.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/seq_file.h>
+#include <net/netlink.h>
 
 #include "internal.h"
 
@@ -25,10 +29,11 @@ static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg)
 	if (alg->cra_type == &crypto_blkcipher_type)
 		return sizeof(struct crypto_blkcipher *);
 
-	BUG_ON(alg->cra_type != &crypto_ablkcipher_type &&
-	       alg->cra_type != &crypto_givcipher_type);
+	if (alg->cra_type == &crypto_ablkcipher_type ||
+	    alg->cra_type == &crypto_givcipher_type)
+		return sizeof(struct crypto_ablkcipher *);
 
-	return sizeof(struct crypto_ablkcipher *);
+	return crypto_alg_extsize(alg);
 }
 
 static int skcipher_setkey_blkcipher(struct crypto_skcipher *tfm,
@@ -216,26 +221,118 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(skcipher);
+
+	alg->exit(skcipher);
+}
+
 static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
 {
+	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(skcipher);
+
 	if (tfm->__crt_alg->cra_type == &crypto_blkcipher_type)
 		return crypto_init_skcipher_ops_blkcipher(tfm);
 
-	BUG_ON(tfm->__crt_alg->cra_type != &crypto_ablkcipher_type &&
-	       tfm->__crt_alg->cra_type != &crypto_givcipher_type);
+	if (tfm->__crt_alg->cra_type == &crypto_ablkcipher_type ||
+	    tfm->__crt_alg->cra_type == &crypto_givcipher_type)
+		return crypto_init_skcipher_ops_ablkcipher(tfm);
+
+	skcipher->setkey = alg->setkey;
+	skcipher->encrypt = alg->encrypt;
+	skcipher->decrypt = alg->decrypt;
+	skcipher->ivsize = alg->ivsize;
+	skcipher->keysize = alg->max_keysize;
+
+	if (alg->exit)
+		skcipher->base.exit = crypto_skcipher_exit_tfm;
 
-	return crypto_init_skcipher_ops_ablkcipher(tfm);
+	if (alg->init)
+		return alg->init(skcipher);
+
+	return 0;
+}
+
+static void crypto_skcipher_free_instance(struct crypto_instance *inst)
+{
+	struct skcipher_instance *skcipher =
+		container_of(inst, struct skcipher_instance, s.base);
+
+	skcipher->free(skcipher);
+}
+
+static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg,
+						     base);
+
+	seq_printf(m, "type         : skcipher\n");
+	seq_printf(m, "async        : %s\n",
+		   alg->cra_flags & CRYPTO_ALG_ASYNC ?  "yes" : "no");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "min keysize  : %u\n", skcipher->min_keysize);
+	seq_printf(m, "max keysize  : %u\n", skcipher->max_keysize);
+	seq_printf(m, "ivsize       : %u\n", skcipher->ivsize);
+	seq_printf(m, "chunksize    : %u\n", skcipher->chunksize);
 }
 
+#ifdef CONFIG_NET
+static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_blkcipher rblkcipher;
+	struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg,
+						     base);
+
+	strncpy(rblkcipher.type, "skcipher", sizeof(rblkcipher.type));
+	strncpy(rblkcipher.geniv, "<none>", sizeof(rblkcipher.geniv));
+
+	rblkcipher.blocksize = alg->cra_blocksize;
+	rblkcipher.min_keysize = skcipher->min_keysize;
+	rblkcipher.max_keysize = skcipher->max_keysize;
+	rblkcipher.ivsize = skcipher->ivsize;
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
+		    sizeof(struct crypto_report_blkcipher), &rblkcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
+static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	return -ENOSYS;
+}
+#endif
+
 static const struct crypto_type crypto_skcipher_type2 = {
 	.extsize = crypto_skcipher_extsize,
 	.init_tfm = crypto_skcipher_init_tfm,
+	.free = crypto_skcipher_free_instance,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_skcipher_show,
+#endif
+	.report = crypto_skcipher_report,
 	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
 	.maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
-	.type = CRYPTO_ALG_TYPE_BLKCIPHER,
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.tfmsize = offsetof(struct crypto_skcipher, base),
 };
 
+int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn,
+			  const char *name, u32 type, u32 mask)
+{
+	spawn->base.frontend = &crypto_skcipher_type2;
+	return crypto_grab_spawn(&spawn->base, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_skcipher);
+
 struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
 					      u32 type, u32 mask)
 {
@@ -243,5 +340,90 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
 
+int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask)
+{
+	return crypto_type_has_alg(alg_name, &crypto_skcipher_type2,
+				   type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_has_skcipher2);
+
+static int skcipher_prepare_alg(struct skcipher_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+
+	if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	if (!alg->chunksize)
+		alg->chunksize = base->cra_blocksize;
+
+	base->cra_type = &crypto_skcipher_type2;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_SKCIPHER;
+
+	return 0;
+}
+
+int crypto_register_skcipher(struct skcipher_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+	int err;
+
+	err = skcipher_prepare_alg(alg);
+	if (err)
+		return err;
+
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_skcipher);
+
+void crypto_unregister_skcipher(struct skcipher_alg *alg)
+{
+	crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_skcipher);
+
+int crypto_register_skciphers(struct skcipher_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_register_skcipher(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	for (--i; i >= 0; --i)
+		crypto_unregister_skcipher(&algs[i]);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_skciphers);
+
+void crypto_unregister_skciphers(struct skcipher_alg *algs, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_skcipher(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_skciphers);
+
+int skcipher_register_instance(struct crypto_template *tmpl,
+			   struct skcipher_instance *inst)
+{
+	int err;
+
+	err = skcipher_prepare_alg(&inst->alg);
+	if (err)
+		return err;
+
+	return crypto_register_instance(tmpl, skcipher_crypto_instance(inst));
+}
+EXPORT_SYMBOL_GPL(skcipher_register_instance);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Symmetric key cipher type");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 579dce071463..ae22f05d5936 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -24,6 +24,7 @@
 
 #include <crypto/aead.h>
 #include <crypto/hash.h>
+#include <crypto/skcipher.h>
 #include <linux/err.h>
 #include <linux/fips.h>
 #include <linux/init.h>
@@ -72,7 +73,8 @@ static char *check[] = {
 	"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
 	"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta",  "fcrypt",
 	"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
-	"lzo", "cts", "zlib", NULL
+	"lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
+	NULL
 };
 
 struct tcrypt_result {
@@ -91,76 +93,6 @@ static void tcrypt_complete(struct crypto_async_request *req, int err)
 	complete(&res->completion);
 }
 
-static int test_cipher_jiffies(struct blkcipher_desc *desc, int enc,
-			       struct scatterlist *sg, int blen, int secs)
-{
-	unsigned long start, end;
-	int bcount;
-	int ret;
-
-	for (start = jiffies, end = start + secs * HZ, bcount = 0;
-	     time_before(jiffies, end); bcount++) {
-		if (enc)
-			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
-		else
-			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
-
-		if (ret)
-			return ret;
-	}
-
-	printk("%d operations in %d seconds (%ld bytes)\n",
-	       bcount, secs, (long)bcount * blen);
-	return 0;
-}
-
-static int test_cipher_cycles(struct blkcipher_desc *desc, int enc,
-			      struct scatterlist *sg, int blen)
-{
-	unsigned long cycles = 0;
-	int ret = 0;
-	int i;
-
-	local_irq_disable();
-
-	/* Warm-up run. */
-	for (i = 0; i < 4; i++) {
-		if (enc)
-			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
-		else
-			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
-
-		if (ret)
-			goto out;
-	}
-
-	/* The real thing. */
-	for (i = 0; i < 8; i++) {
-		cycles_t start, end;
-
-		start = get_cycles();
-		if (enc)
-			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
-		else
-			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
-		end = get_cycles();
-
-		if (ret)
-			goto out;
-
-		cycles += end - start;
-	}
-
-out:
-	local_irq_enable();
-
-	if (ret == 0)
-		printk("1 operation in %lu cycles (%d bytes)\n",
-		       (cycles + 4) / 8, blen);
-
-	return ret;
-}
-
 static inline int do_one_aead_op(struct aead_request *req, int ret)
 {
 	if (ret == -EINPROGRESS || ret == -EBUSY) {
@@ -454,127 +386,148 @@ out_noxbuf:
 	return;
 }
 
-static void test_cipher_speed(const char *algo, int enc, unsigned int secs,
-			      struct cipher_speed_template *template,
-			      unsigned int tcount, u8 *keysize)
+static void test_hash_sg_init(struct scatterlist *sg)
 {
-	unsigned int ret, i, j, iv_len;
-	const char *key;
-	char iv[128];
-	struct crypto_blkcipher *tfm;
-	struct blkcipher_desc desc;
-	const char *e;
-	u32 *b_size;
+	int i;
 
-	if (enc == ENCRYPT)
-	        e = "encryption";
-	else
-		e = "decryption";
+	sg_init_table(sg, TVMEMSIZE);
+	for (i = 0; i < TVMEMSIZE; i++) {
+		sg_set_buf(sg + i, tvmem[i], PAGE_SIZE);
+		memset(tvmem[i], 0xff, PAGE_SIZE);
+	}
+}
 
-	tfm = crypto_alloc_blkcipher(algo, 0, CRYPTO_ALG_ASYNC);
+static inline int do_one_ahash_op(struct ahash_request *req, int ret)
+{
+	if (ret == -EINPROGRESS || ret == -EBUSY) {
+		struct tcrypt_result *tr = req->base.data;
 
-	if (IS_ERR(tfm)) {
-		printk("failed to load transform for %s: %ld\n", algo,
-		       PTR_ERR(tfm));
+		wait_for_completion(&tr->completion);
+		reinit_completion(&tr->completion);
+		ret = tr->err;
+	}
+	return ret;
+}
+
+struct test_mb_ahash_data {
+	struct scatterlist sg[TVMEMSIZE];
+	char result[64];
+	struct ahash_request *req;
+	struct tcrypt_result tresult;
+	char *xbuf[XBUFSIZE];
+};
+
+static void test_mb_ahash_speed(const char *algo, unsigned int sec,
+				struct hash_speed *speed)
+{
+	struct test_mb_ahash_data *data;
+	struct crypto_ahash *tfm;
+	unsigned long start, end;
+	unsigned long cycles;
+	unsigned int i, j, k;
+	int ret;
+
+	data = kzalloc(sizeof(*data) * 8, GFP_KERNEL);
+	if (!data)
 		return;
+
+	tfm = crypto_alloc_ahash(algo, 0, 0);
+	if (IS_ERR(tfm)) {
+		pr_err("failed to load transform for %s: %ld\n",
+			algo, PTR_ERR(tfm));
+		goto free_data;
 	}
-	desc.tfm = tfm;
-	desc.flags = 0;
 
-	printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
-			get_driver_name(crypto_blkcipher, tfm), e);
+	for (i = 0; i < 8; ++i) {
+		if (testmgr_alloc_buf(data[i].xbuf))
+			goto out;
 
-	i = 0;
-	do {
+		init_completion(&data[i].tresult.completion);
 
-		b_size = block_sizes;
-		do {
-			struct scatterlist sg[TVMEMSIZE];
+		data[i].req = ahash_request_alloc(tfm, GFP_KERNEL);
+		if (!data[i].req) {
+			pr_err("alg: hash: Failed to allocate request for %s\n",
+			       algo);
+			goto out;
+		}
 
-			if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) {
-				printk("template (%u) too big for "
-				       "tvmem (%lu)\n", *keysize + *b_size,
-				       TVMEMSIZE * PAGE_SIZE);
-				goto out;
-			}
+		ahash_request_set_callback(data[i].req, 0,
+					   tcrypt_complete, &data[i].tresult);
+		test_hash_sg_init(data[i].sg);
+	}
 
-			printk("test %u (%d bit key, %d byte blocks): ", i,
-					*keysize * 8, *b_size);
+	pr_info("\ntesting speed of multibuffer %s (%s)\n", algo,
+		get_driver_name(crypto_ahash, tfm));
 
-			memset(tvmem[0], 0xff, PAGE_SIZE);
+	for (i = 0; speed[i].blen != 0; i++) {
+		/* For some reason this only tests digests. */
+		if (speed[i].blen != speed[i].plen)
+			continue;
 
-			/* set key, plain text and IV */
-			key = tvmem[0];
-			for (j = 0; j < tcount; j++) {
-				if (template[j].klen == *keysize) {
-					key = template[j].key;
-					break;
-				}
-			}
+		if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
+			pr_err("template (%u) too big for tvmem (%lu)\n",
+			       speed[i].blen, TVMEMSIZE * PAGE_SIZE);
+			goto out;
+		}
 
-			ret = crypto_blkcipher_setkey(tfm, key, *keysize);
-			if (ret) {
-				printk("setkey() failed flags=%x\n",
-						crypto_blkcipher_get_flags(tfm));
-				goto out;
-			}
+		if (speed[i].klen)
+			crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen);
 
-			sg_init_table(sg, TVMEMSIZE);
-			sg_set_buf(sg, tvmem[0] + *keysize,
-				   PAGE_SIZE - *keysize);
-			for (j = 1; j < TVMEMSIZE; j++) {
-				sg_set_buf(sg + j, tvmem[j], PAGE_SIZE);
-				memset (tvmem[j], 0xff, PAGE_SIZE);
-			}
+		for (k = 0; k < 8; k++)
+			ahash_request_set_crypt(data[k].req, data[k].sg,
+						data[k].result, speed[i].blen);
 
-			iv_len = crypto_blkcipher_ivsize(tfm);
-			if (iv_len) {
-				memset(&iv, 0xff, iv_len);
-				crypto_blkcipher_set_iv(tfm, iv, iv_len);
-			}
+		pr_info("test%3u "
+			"(%5u byte blocks,%5u bytes per update,%4u updates): ",
+			i, speed[i].blen, speed[i].plen,
+			speed[i].blen / speed[i].plen);
 
-			if (secs)
-				ret = test_cipher_jiffies(&desc, enc, sg,
-							  *b_size, secs);
-			else
-				ret = test_cipher_cycles(&desc, enc, sg,
-							 *b_size);
+		start = get_cycles();
 
-			if (ret) {
-				printk("%s() failed flags=%x\n", e, desc.flags);
-				break;
+		for (k = 0; k < 8; k++) {
+			ret = crypto_ahash_digest(data[k].req);
+			if (ret == -EINPROGRESS) {
+				ret = 0;
+				continue;
 			}
-			b_size++;
-			i++;
-		} while (*b_size);
-		keysize++;
-	} while (*keysize);
 
-out:
-	crypto_free_blkcipher(tfm);
-}
+			if (ret)
+				break;
 
-static void test_hash_sg_init(struct scatterlist *sg)
-{
-	int i;
+			complete(&data[k].tresult.completion);
+			data[k].tresult.err = 0;
+		}
 
-	sg_init_table(sg, TVMEMSIZE);
-	for (i = 0; i < TVMEMSIZE; i++) {
-		sg_set_buf(sg + i, tvmem[i], PAGE_SIZE);
-		memset(tvmem[i], 0xff, PAGE_SIZE);
-	}
-}
+		for (j = 0; j < k; j++) {
+			struct tcrypt_result *tr = &data[j].tresult;
 
-static inline int do_one_ahash_op(struct ahash_request *req, int ret)
-{
-	if (ret == -EINPROGRESS || ret == -EBUSY) {
-		struct tcrypt_result *tr = req->base.data;
+			wait_for_completion(&tr->completion);
+			if (tr->err)
+				ret = tr->err;
+		}
 
-		wait_for_completion(&tr->completion);
-		reinit_completion(&tr->completion);
-		ret = tr->err;
+		end = get_cycles();
+		cycles = end - start;
+		pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
+			cycles, cycles / (8 * speed[i].blen));
+
+		if (ret) {
+			pr_err("At least one hashing failed ret=%d\n", ret);
+			break;
+		}
 	}
-	return ret;
+
+out:
+	for (k = 0; k < 8; ++k)
+		ahash_request_free(data[k].req);
+
+	for (k = 0; k < 8; ++k)
+		testmgr_free_buf(data[k].xbuf);
+
+	crypto_free_ahash(tfm);
+
+free_data:
+	kfree(data);
 }
 
 static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
@@ -812,7 +765,7 @@ static void test_hash_speed(const char *algo, unsigned int secs,
 	return test_ahash_speed_common(algo, secs, speed, CRYPTO_ALG_ASYNC);
 }
 
-static inline int do_one_acipher_op(struct ablkcipher_request *req, int ret)
+static inline int do_one_acipher_op(struct skcipher_request *req, int ret)
 {
 	if (ret == -EINPROGRESS || ret == -EBUSY) {
 		struct tcrypt_result *tr = req->base.data;
@@ -825,7 +778,7 @@ static inline int do_one_acipher_op(struct ablkcipher_request *req, int ret)
 	return ret;
 }
 
-static int test_acipher_jiffies(struct ablkcipher_request *req, int enc,
+static int test_acipher_jiffies(struct skcipher_request *req, int enc,
 				int blen, int secs)
 {
 	unsigned long start, end;
@@ -836,10 +789,10 @@ static int test_acipher_jiffies(struct ablkcipher_request *req, int enc,
 	     time_before(jiffies, end); bcount++) {
 		if (enc)
 			ret = do_one_acipher_op(req,
-						crypto_ablkcipher_encrypt(req));
+						crypto_skcipher_encrypt(req));
 		else
 			ret = do_one_acipher_op(req,
-						crypto_ablkcipher_decrypt(req));
+						crypto_skcipher_decrypt(req));
 
 		if (ret)
 			return ret;
@@ -850,7 +803,7 @@ static int test_acipher_jiffies(struct ablkcipher_request *req, int enc,
 	return 0;
 }
 
-static int test_acipher_cycles(struct ablkcipher_request *req, int enc,
+static int test_acipher_cycles(struct skcipher_request *req, int enc,
 			       int blen)
 {
 	unsigned long cycles = 0;
@@ -861,10 +814,10 @@ static int test_acipher_cycles(struct ablkcipher_request *req, int enc,
 	for (i = 0; i < 4; i++) {
 		if (enc)
 			ret = do_one_acipher_op(req,
-						crypto_ablkcipher_encrypt(req));
+						crypto_skcipher_encrypt(req));
 		else
 			ret = do_one_acipher_op(req,
-						crypto_ablkcipher_decrypt(req));
+						crypto_skcipher_decrypt(req));
 
 		if (ret)
 			goto out;
@@ -877,10 +830,10 @@ static int test_acipher_cycles(struct ablkcipher_request *req, int enc,
 		start = get_cycles();
 		if (enc)
 			ret = do_one_acipher_op(req,
-						crypto_ablkcipher_encrypt(req));
+						crypto_skcipher_encrypt(req));
 		else
 			ret = do_one_acipher_op(req,
-						crypto_ablkcipher_decrypt(req));
+						crypto_skcipher_decrypt(req));
 		end = get_cycles();
 
 		if (ret)
@@ -897,16 +850,16 @@ out:
 	return ret;
 }
 
-static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
-			       struct cipher_speed_template *template,
-			       unsigned int tcount, u8 *keysize)
+static void test_skcipher_speed(const char *algo, int enc, unsigned int secs,
+				struct cipher_speed_template *template,
+				unsigned int tcount, u8 *keysize, bool async)
 {
 	unsigned int ret, i, j, k, iv_len;
 	struct tcrypt_result tresult;
 	const char *key;
 	char iv[128];
-	struct ablkcipher_request *req;
-	struct crypto_ablkcipher *tfm;
+	struct skcipher_request *req;
+	struct crypto_skcipher *tfm;
 	const char *e;
 	u32 *b_size;
 
@@ -917,7 +870,7 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
 
 	init_completion(&tresult.completion);
 
-	tfm = crypto_alloc_ablkcipher(algo, 0, 0);
+	tfm = crypto_alloc_skcipher(algo, 0, async ? 0 : CRYPTO_ALG_ASYNC);
 
 	if (IS_ERR(tfm)) {
 		pr_err("failed to load transform for %s: %ld\n", algo,
@@ -926,17 +879,17 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
 	}
 
 	pr_info("\ntesting speed of async %s (%s) %s\n", algo,
-			get_driver_name(crypto_ablkcipher, tfm), e);
+			get_driver_name(crypto_skcipher, tfm), e);
 
-	req = ablkcipher_request_alloc(tfm, GFP_KERNEL);
+	req = skcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req) {
 		pr_err("tcrypt: skcipher: Failed to allocate request for %s\n",
 		       algo);
 		goto out;
 	}
 
-	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-					tcrypt_complete, &tresult);
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      tcrypt_complete, &tresult);
 
 	i = 0;
 	do {
@@ -966,12 +919,12 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
 				}
 			}
 
-			crypto_ablkcipher_clear_flags(tfm, ~0);
+			crypto_skcipher_clear_flags(tfm, ~0);
 
-			ret = crypto_ablkcipher_setkey(tfm, key, *keysize);
+			ret = crypto_skcipher_setkey(tfm, key, *keysize);
 			if (ret) {
 				pr_err("setkey() failed flags=%x\n",
-					crypto_ablkcipher_get_flags(tfm));
+					crypto_skcipher_get_flags(tfm));
 				goto out_free_req;
 			}
 
@@ -995,11 +948,11 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
 				sg_set_buf(sg, tvmem[0] + *keysize, *b_size);
 			}
 
-			iv_len = crypto_ablkcipher_ivsize(tfm);
+			iv_len = crypto_skcipher_ivsize(tfm);
 			if (iv_len)
 				memset(&iv, 0xff, iv_len);
 
-			ablkcipher_request_set_crypt(req, sg, sg, *b_size, iv);
+			skcipher_request_set_crypt(req, sg, sg, *b_size, iv);
 
 			if (secs)
 				ret = test_acipher_jiffies(req, enc,
@@ -1010,7 +963,7 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
 
 			if (ret) {
 				pr_err("%s() failed flags=%x\n", e,
-					crypto_ablkcipher_get_flags(tfm));
+				       crypto_skcipher_get_flags(tfm));
 				break;
 			}
 			b_size++;
@@ -1020,9 +973,25 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
 	} while (*keysize);
 
 out_free_req:
-	ablkcipher_request_free(req);
+	skcipher_request_free(req);
 out:
-	crypto_free_ablkcipher(tfm);
+	crypto_free_skcipher(tfm);
+}
+
+static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
+			       struct cipher_speed_template *template,
+			       unsigned int tcount, u8 *keysize)
+{
+	return test_skcipher_speed(algo, enc, secs, template, tcount, keysize,
+				   true);
+}
+
+static void test_cipher_speed(const char *algo, int enc, unsigned int secs,
+			      struct cipher_speed_template *template,
+			      unsigned int tcount, u8 *keysize)
+{
+	return test_skcipher_speed(algo, enc, secs, template, tcount, keysize,
+				   false);
 }
 
 static void test_available(void)
@@ -1284,6 +1253,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		ret += tcrypt_test("crct10dif");
 		break;
 
+	case 48:
+		ret += tcrypt_test("sha3-224");
+		break;
+
+	case 49:
+		ret += tcrypt_test("sha3-256");
+		break;
+
+	case 50:
+		ret += tcrypt_test("sha3-384");
+		break;
+
+	case 51:
+		ret += tcrypt_test("sha3-512");
+		break;
+
 	case 100:
 		ret += tcrypt_test("hmac(md5)");
 		break;
@@ -1328,6 +1313,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		ret += tcrypt_test("hmac(crc32)");
 		break;
 
+	case 111:
+		ret += tcrypt_test("hmac(sha3-224)");
+		break;
+
+	case 112:
+		ret += tcrypt_test("hmac(sha3-256)");
+		break;
+
+	case 113:
+		ret += tcrypt_test("hmac(sha3-384)");
+		break;
+
+	case 114:
+		ret += tcrypt_test("hmac(sha3-512)");
+		break;
+
 	case 150:
 		ret += tcrypt_test("ansi_cprng");
 		break;
@@ -1406,6 +1407,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				speed_template_32_48_64);
 		test_cipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
 				speed_template_32_48_64);
+		test_cipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
+				speed_template_16_24_32);
+		test_cipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
+				speed_template_16_24_32);
 		test_cipher_speed("ctr(aes)", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
 		test_cipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0,
@@ -1691,6 +1696,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		test_hash_speed("poly1305", sec, poly1305_speed_template);
 		if (mode > 300 && mode < 400) break;
 
+	case 322:
+		test_hash_speed("sha3-224", sec, generic_hash_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 323:
+		test_hash_speed("sha3-256", sec, generic_hash_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 324:
+		test_hash_speed("sha3-384", sec, generic_hash_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 325:
+		test_hash_speed("sha3-512", sec, generic_hash_speed_template);
+		if (mode > 300 && mode < 400) break;
+
 	case 399:
 		break;
 
@@ -1770,6 +1791,35 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		test_ahash_speed("rmd320", sec, generic_hash_speed_template);
 		if (mode > 400 && mode < 500) break;
 
+	case 418:
+		test_ahash_speed("sha3-224", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 419:
+		test_ahash_speed("sha3-256", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 420:
+		test_ahash_speed("sha3-384", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+
+	case 421:
+		test_ahash_speed("sha3-512", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 422:
+		test_mb_ahash_speed("sha1", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 423:
+		test_mb_ahash_speed("sha256", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 424:
+		test_mb_ahash_speed("sha512", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
 	case 499:
 		break;
 
@@ -1790,6 +1840,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				   speed_template_32_48_64);
 		test_acipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
 				   speed_template_32_48_64);
+		test_acipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
+		test_acipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
+				   speed_template_16_24_32);
 		test_acipher_speed("ctr(aes)", ENCRYPT, sec, NULL, 0,
 				   speed_template_16_24_32);
 		test_acipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0,
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index c727fb0cb021..5c9d5a5e7b65 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -32,6 +32,7 @@
 #include <crypto/rng.h>
 #include <crypto/drbg.h>
 #include <crypto/akcipher.h>
+#include <crypto/kpp.h>
 
 #include "internal.h"
 
@@ -120,6 +121,11 @@ struct akcipher_test_suite {
 	unsigned int count;
 };
 
+struct kpp_test_suite {
+	struct kpp_testvec *vecs;
+	unsigned int count;
+};
+
 struct alg_test_desc {
 	const char *alg;
 	int (*test)(const struct alg_test_desc *desc, const char *driver,
@@ -134,6 +140,7 @@ struct alg_test_desc {
 		struct cprng_test_suite cprng;
 		struct drbg_test_suite drbg;
 		struct akcipher_test_suite akcipher;
+		struct kpp_test_suite kpp;
 	} suite;
 };
 
@@ -1777,8 +1784,135 @@ static int alg_test_drbg(const struct alg_test_desc *desc, const char *driver,
 
 }
 
-static int do_test_rsa(struct crypto_akcipher *tfm,
-		       struct akcipher_testvec *vecs)
+static int do_test_kpp(struct crypto_kpp *tfm, struct kpp_testvec *vec,
+		       const char *alg)
+{
+	struct kpp_request *req;
+	void *input_buf = NULL;
+	void *output_buf = NULL;
+	struct tcrypt_result result;
+	unsigned int out_len_max;
+	int err = -ENOMEM;
+	struct scatterlist src, dst;
+
+	req = kpp_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		return err;
+
+	init_completion(&result.completion);
+
+	err = crypto_kpp_set_secret(tfm, vec->secret, vec->secret_size);
+	if (err < 0)
+		goto free_req;
+
+	out_len_max = crypto_kpp_maxsize(tfm);
+	output_buf = kzalloc(out_len_max, GFP_KERNEL);
+	if (!output_buf) {
+		err = -ENOMEM;
+		goto free_req;
+	}
+
+	/* Use appropriate parameter as base */
+	kpp_request_set_input(req, NULL, 0);
+	sg_init_one(&dst, output_buf, out_len_max);
+	kpp_request_set_output(req, &dst, out_len_max);
+	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				 tcrypt_complete, &result);
+
+	/* Compute public key */
+	err = wait_async_op(&result, crypto_kpp_generate_public_key(req));
+	if (err) {
+		pr_err("alg: %s: generate public key test failed. err %d\n",
+		       alg, err);
+		goto free_output;
+	}
+	/* Verify calculated public key */
+	if (memcmp(vec->expected_a_public, sg_virt(req->dst),
+		   vec->expected_a_public_size)) {
+		pr_err("alg: %s: generate public key test failed. Invalid output\n",
+		       alg);
+		err = -EINVAL;
+		goto free_output;
+	}
+
+	/* Calculate shared secret key by using counter part (b) public key. */
+	input_buf = kzalloc(vec->b_public_size, GFP_KERNEL);
+	if (!input_buf) {
+		err = -ENOMEM;
+		goto free_output;
+	}
+
+	memcpy(input_buf, vec->b_public, vec->b_public_size);
+	sg_init_one(&src, input_buf, vec->b_public_size);
+	sg_init_one(&dst, output_buf, out_len_max);
+	kpp_request_set_input(req, &src, vec->b_public_size);
+	kpp_request_set_output(req, &dst, out_len_max);
+	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				 tcrypt_complete, &result);
+	err = wait_async_op(&result, crypto_kpp_compute_shared_secret(req));
+	if (err) {
+		pr_err("alg: %s: compute shard secret test failed. err %d\n",
+		       alg, err);
+		goto free_all;
+	}
+	/*
+	 * verify shared secret from which the user will derive
+	 * secret key by executing whatever hash it has chosen
+	 */
+	if (memcmp(vec->expected_ss, sg_virt(req->dst),
+		   vec->expected_ss_size)) {
+		pr_err("alg: %s: compute shared secret test failed. Invalid output\n",
+		       alg);
+		err = -EINVAL;
+	}
+
+free_all:
+	kfree(input_buf);
+free_output:
+	kfree(output_buf);
+free_req:
+	kpp_request_free(req);
+	return err;
+}
+
+static int test_kpp(struct crypto_kpp *tfm, const char *alg,
+		    struct kpp_testvec *vecs, unsigned int tcount)
+{
+	int ret, i;
+
+	for (i = 0; i < tcount; i++) {
+		ret = do_test_kpp(tfm, vecs++, alg);
+		if (ret) {
+			pr_err("alg: %s: test failed on vector %d, err=%d\n",
+			       alg, i + 1, ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver,
+			u32 type, u32 mask)
+{
+	struct crypto_kpp *tfm;
+	int err = 0;
+
+	tfm = crypto_alloc_kpp(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	if (IS_ERR(tfm)) {
+		pr_err("alg: kpp: Failed to load tfm for %s: %ld\n",
+		       driver, PTR_ERR(tfm));
+		return PTR_ERR(tfm);
+	}
+	if (desc->suite.kpp.vecs)
+		err = test_kpp(tfm, desc->alg, desc->suite.kpp.vecs,
+			       desc->suite.kpp.count);
+
+	crypto_free_kpp(tfm);
+	return err;
+}
+
+static int test_akcipher_one(struct crypto_akcipher *tfm,
+			     struct akcipher_testvec *vecs)
 {
 	char *xbuf[XBUFSIZE];
 	struct akcipher_request *req;
@@ -1807,6 +1941,7 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 	if (err)
 		goto free_req;
 
+	err = -ENOMEM;
 	out_len_max = crypto_akcipher_maxsize(tfm);
 	outbuf_enc = kzalloc(out_len_max, GFP_KERNEL);
 	if (!outbuf_enc)
@@ -1829,17 +1964,18 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 	/* Run RSA encrypt - c = m^e mod n;*/
 	err = wait_async_op(&result, crypto_akcipher_encrypt(req));
 	if (err) {
-		pr_err("alg: rsa: encrypt test failed. err %d\n", err);
+		pr_err("alg: akcipher: encrypt test failed. err %d\n", err);
 		goto free_all;
 	}
 	if (req->dst_len != vecs->c_size) {
-		pr_err("alg: rsa: encrypt test failed. Invalid output len\n");
+		pr_err("alg: akcipher: encrypt test failed. Invalid output len\n");
 		err = -EINVAL;
 		goto free_all;
 	}
 	/* verify that encrypted message is equal to expected */
 	if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) {
-		pr_err("alg: rsa: encrypt test failed. Invalid output\n");
+		pr_err("alg: akcipher: encrypt test failed. Invalid output\n");
+		hexdump(outbuf_enc, vecs->c_size);
 		err = -EINVAL;
 		goto free_all;
 	}
@@ -1867,18 +2003,22 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 	/* Run RSA decrypt - m = c^d mod n;*/
 	err = wait_async_op(&result, crypto_akcipher_decrypt(req));
 	if (err) {
-		pr_err("alg: rsa: decrypt test failed. err %d\n", err);
+		pr_err("alg: akcipher: decrypt test failed. err %d\n", err);
 		goto free_all;
 	}
 	out_len = req->dst_len;
-	if (out_len != vecs->m_size) {
-		pr_err("alg: rsa: decrypt test failed. Invalid output len\n");
+	if (out_len < vecs->m_size) {
+		pr_err("alg: akcipher: decrypt test failed. "
+		       "Invalid output len %u\n", out_len);
 		err = -EINVAL;
 		goto free_all;
 	}
 	/* verify that decrypted message is equal to the original msg */
-	if (memcmp(vecs->m, outbuf_dec, vecs->m_size)) {
-		pr_err("alg: rsa: decrypt test failed. Invalid output\n");
+	if (memchr_inv(outbuf_dec, 0, out_len - vecs->m_size) ||
+	    memcmp(vecs->m, outbuf_dec + out_len - vecs->m_size,
+		   vecs->m_size)) {
+		pr_err("alg: akcipher: decrypt test failed. Invalid output\n");
+		hexdump(outbuf_dec, out_len);
 		err = -EINVAL;
 	}
 free_all:
@@ -1891,28 +2031,22 @@ free_xbuf:
 	return err;
 }
 
-static int test_rsa(struct crypto_akcipher *tfm, struct akcipher_testvec *vecs,
-		    unsigned int tcount)
+static int test_akcipher(struct crypto_akcipher *tfm, const char *alg,
+			 struct akcipher_testvec *vecs, unsigned int tcount)
 {
+	const char *algo =
+		crypto_tfm_alg_driver_name(crypto_akcipher_tfm(tfm));
 	int ret, i;
 
 	for (i = 0; i < tcount; i++) {
-		ret = do_test_rsa(tfm, vecs++);
-		if (ret) {
-			pr_err("alg: rsa: test failed on vector %d, err=%d\n",
-			       i + 1, ret);
-			return ret;
-		}
-	}
-	return 0;
-}
-
-static int test_akcipher(struct crypto_akcipher *tfm, const char *alg,
-			 struct akcipher_testvec *vecs, unsigned int tcount)
-{
-	if (strncmp(alg, "rsa", 3) == 0)
-		return test_rsa(tfm, vecs, tcount);
+		ret = test_akcipher_one(tfm, vecs++);
+		if (!ret)
+			continue;
 
+		pr_err("alg: akcipher: test %d failed for %s, err=%d\n",
+		       i + 1, algo, ret);
+		return ret;
+	}
 	return 0;
 }
 
@@ -2728,6 +2862,16 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "dh",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = {
+				.vecs = dh_tv_template,
+				.count = DH_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "digest_null",
 		.test = alg_test_null,
@@ -3156,6 +3300,16 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "ecdh",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = {
+				.vecs = ecdh_tv_template,
+				.count = ECDH_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "gcm(aes)",
 		.test = alg_test_aead,
@@ -3248,6 +3402,46 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = HMAC_SHA256_TEST_VECTORS
 			}
 		}
+	}, {
+		.alg = "hmac(sha3-224)",
+		.test = alg_test_hash,
+		.fips_allowed = 1,
+		.suite = {
+			.hash = {
+				.vecs = hmac_sha3_224_tv_template,
+				.count = HMAC_SHA3_224_TEST_VECTORS
+			}
+		}
+	}, {
+		.alg = "hmac(sha3-256)",
+		.test = alg_test_hash,
+		.fips_allowed = 1,
+		.suite = {
+			.hash = {
+				.vecs = hmac_sha3_256_tv_template,
+				.count = HMAC_SHA3_256_TEST_VECTORS
+			}
+		}
+	}, {
+		.alg = "hmac(sha3-384)",
+		.test = alg_test_hash,
+		.fips_allowed = 1,
+		.suite = {
+			.hash = {
+				.vecs = hmac_sha3_384_tv_template,
+				.count = HMAC_SHA3_384_TEST_VECTORS
+			}
+		}
+	}, {
+		.alg = "hmac(sha3-512)",
+		.test = alg_test_hash,
+		.fips_allowed = 1,
+		.suite = {
+			.hash = {
+				.vecs = hmac_sha3_512_tv_template,
+				.count = HMAC_SHA3_512_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "hmac(sha384)",
 		.test = alg_test_hash,
@@ -3658,6 +3852,46 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = SHA256_TEST_VECTORS
 			}
 		}
+	}, {
+		.alg = "sha3-224",
+		.test = alg_test_hash,
+		.fips_allowed = 1,
+		.suite = {
+			.hash = {
+				.vecs = sha3_224_tv_template,
+				.count = SHA3_224_TEST_VECTORS
+			}
+		}
+	}, {
+		.alg = "sha3-256",
+		.test = alg_test_hash,
+		.fips_allowed = 1,
+		.suite = {
+			.hash = {
+				.vecs = sha3_256_tv_template,
+				.count = SHA3_256_TEST_VECTORS
+			}
+		}
+	}, {
+		.alg = "sha3-384",
+		.test = alg_test_hash,
+		.fips_allowed = 1,
+		.suite = {
+			.hash = {
+				.vecs = sha3_384_tv_template,
+				.count = SHA3_384_TEST_VECTORS
+			}
+		}
+	}, {
+		.alg = "sha3-512",
+		.test = alg_test_hash,
+		.fips_allowed = 1,
+		.suite = {
+			.hash = {
+				.vecs = sha3_512_tv_template,
+				.count = SHA3_512_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "sha384",
 		.test = alg_test_hash,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 487ec880e889..acb6bbff781a 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -133,6 +133,17 @@ struct akcipher_testvec {
 	bool public_key_vec;
 };
 
+struct kpp_testvec {
+	unsigned char *secret;
+	unsigned char *b_public;
+	unsigned char *expected_a_public;
+	unsigned char *expected_ss;
+	unsigned short secret_size;
+	unsigned short b_public_size;
+	unsigned short expected_a_public_size;
+	unsigned short expected_ss_size;
+};
+
 static char zeroed_string[48];
 
 /*
@@ -141,7 +152,7 @@ static char zeroed_string[48];
 #ifdef CONFIG_CRYPTO_FIPS
 #define RSA_TEST_VECTORS	2
 #else
-#define RSA_TEST_VECTORS	4
+#define RSA_TEST_VECTORS	5
 #endif
 static struct akcipher_testvec rsa_tv_template[] = {
 	{
@@ -327,6 +338,516 @@ static struct akcipher_testvec rsa_tv_template[] = {
 	.m_size = 8,
 	.c_size = 256,
 	.public_key_vec = true,
+	}, {
+	.key =
+	"\x30\x82\x09\x29" /* sequence of 2345 bytes */
+	"\x02\x01\x00" /* version integer of 1 byte */
+	"\x02\x82\x02\x01" /* modulus - integer of 513 bytes */
+	"\x00\xC3\x8B\x55\x7B\x73\x4D\xFF\xE9\x9B\xC6\xDC\x67\x3C\xB4\x8E"
+	"\xA0\x86\xED\xF2\xB9\x50\x5C\x54\x5C\xBA\xE4\xA1\xB2\xA7\xAE\x2F"
+	"\x1B\x7D\xF1\xFB\xAC\x79\xC5\xDF\x1A\x00\xC9\xB2\xC1\x61\x25\x33"
+	"\xE6\x9C\xE9\xCF\xD6\x27\xC4\x4E\x44\x30\x44\x5E\x08\xA1\x87\x52"
+	"\xCC\x6B\x97\x70\x8C\xBC\xA5\x06\x31\x0C\xD4\x2F\xD5\x7D\x26\x24"
+	"\xA2\xE2\xAC\x78\xF4\x53\x14\xCE\xF7\x19\x2E\xD7\xF7\xE6\x0C\xB9"
+	"\x56\x7F\x0B\xF1\xB1\xE2\x43\x70\xBD\x86\x1D\xA1\xCC\x2B\x19\x08"
+	"\x76\xEF\x91\xAC\xBF\x20\x24\x0D\x38\xC0\x89\xB8\x9A\x70\xB3\x64"
+	"\xD9\x8F\x80\x41\x10\x5B\x9F\xB1\xCB\x76\x43\x00\x21\x25\x36\xD4"
+	"\x19\xFC\x55\x95\x10\xE4\x26\x74\x98\x2C\xD9\xBD\x0B\x2B\x04\xC2"
+	"\xAC\x82\x38\xB4\xDD\x4C\x04\x7E\x51\x36\x40\x1E\x0B\xC4\x7C\x25"
+	"\xDD\x4B\xB2\xE7\x20\x0A\x57\xF9\xB4\x94\xC3\x08\x33\x22\x6F\x8B"
+	"\x48\xDB\x03\x68\x5A\x5B\xBA\xAE\xF3\xAD\xCF\xC3\x6D\xBA\xF1\x28"
+	"\x67\x7E\x6C\x79\x07\xDE\xFC\xED\xE7\x96\xE3\x6C\xE0\x2C\x87\xF8"
+	"\x02\x01\x28\x38\x43\x21\x53\x84\x69\x75\x78\x15\x7E\xEE\xD2\x1B"
+	"\xB9\x23\x40\xA8\x86\x1E\x38\x83\xB2\x73\x1D\x53\xFB\x9E\x2A\x8A"
+	"\xB2\x75\x35\x01\xC3\xC3\xC4\x94\xE8\x84\x86\x64\x81\xF4\x42\xAA"
+	"\x3C\x0E\xD6\x4F\xBC\x0A\x09\x2D\xE7\x1B\xD4\x10\xA8\x54\xEA\x89"
+	"\x84\x8A\xCB\xF7\x5A\x3C\xCA\x76\x08\x29\x62\xB4\x6A\x22\xDF\x14"
+	"\x95\x71\xFD\xB6\x86\x39\xB8\x8B\xF8\x91\x7F\x38\xAA\x14\xCD\xE5"
+	"\xF5\x1D\xC2\x6D\x53\x69\x52\x84\x7F\xA3\x1A\x5E\x26\x04\x83\x06"
+	"\x73\x52\x56\xCF\x76\x26\xC9\xDD\x75\xD7\xFC\xF4\x69\xD8\x7B\x55"
+	"\xB7\x68\x13\x53\xB9\xE7\x89\xC3\xE8\xD6\x6E\xA7\x6D\xEA\x81\xFD"
+	"\xC4\xB7\x05\x5A\xB7\x41\x0A\x23\x8E\x03\x8A\x1C\xAE\xD3\x1E\xCE"
+	"\xE3\x5E\xFC\x19\x4A\xEE\x61\x9B\x8E\xE5\xE5\xDD\x85\xF9\x41\xEC"
+	"\x14\x53\x92\xF7\xDD\x06\x85\x02\x91\xE3\xEB\x6C\x43\x03\xB1\x36"
+	"\x7B\x89\x5A\xA8\xEB\xFC\xD5\xA8\x35\xDC\x81\xD9\x5C\xBD\xCA\xDC"
+	"\x9B\x98\x0B\x06\x5D\x0C\x5B\xEE\xF3\xD5\xCC\x57\xC9\x71\x2F\x90"
+	"\x3B\x3C\xF0\x8E\x4E\x35\x48\xAE\x63\x74\xA9\xFC\x72\x75\x8E\x34"
+	"\xA8\xF2\x1F\xEA\xDF\x3A\x37\x2D\xE5\x39\x39\xF8\x57\x58\x3C\x04"
+	"\xFE\x87\x06\x98\xBC\x7B\xD3\x21\x36\x60\x25\x54\xA7\x3D\xFA\x91"
+	"\xCC\xA8\x0B\x92\x8E\xB4\xF7\x06\xFF\x1E\x95\xCB\x07\x76\x97\x3B"
+	"\x9D"
+	"\x02\x03\x01\x00\x01" /* public key integer of 3 bytes */
+	"\x02\x82\x02\x00" /* private key integer of 512 bytes */
+	"\x74\xA9\xE0\x6A\x32\xB4\xCA\x85\xD9\x86\x9F\x60\x88\x7B\x40\xCC"
+	"\xCD\x33\x91\xA8\xB6\x25\x1F\xBF\xE3\x51\x1C\x97\xB6\x2A\xD9\xB8"
+	"\x11\x40\x19\xE3\x21\x13\xC8\xB3\x7E\xDC\xD7\x65\x40\x4C\x2D\xD6"
+	"\xDC\xAF\x32\x6C\x96\x75\x2C\x2C\xCA\x8F\x3F\x7A\xEE\xC4\x09\xC6"
+	"\x24\x3A\xC9\xCF\x6D\x8D\x17\x50\x94\x52\xD3\xE7\x0F\x2F\x7E\x94"
+	"\x1F\xA0\xBE\xD9\x25\xE8\x38\x42\x7C\x27\xD2\x79\xF8\x2A\x87\x38"
+	"\xEF\xBB\x74\x8B\xA8\x6E\x8C\x08\xC6\xC7\x4F\x0C\xBC\x79\xC6\xEF"
+	"\x0E\xA7\x5E\xE4\xF8\x8C\x09\xC7\x5E\x37\xCC\x87\x77\xCD\xCF\xD1"
+	"\x6D\x28\x1B\xA9\x62\xC0\xB8\x16\xA7\x8B\xF9\xBB\xCC\xB4\x15\x7F"
+	"\x1B\x69\x03\xF2\x7B\xEB\xE5\x8C\x14\xD6\x23\x4F\x52\x6F\x18\xA6"
+	"\x4B\x5B\x01\xAD\x35\xF9\x48\x53\xB3\x86\x35\x66\xD7\xE7\x29\xC0"
+	"\x09\xB5\xC6\xE6\xFA\xC4\xDA\x19\xBE\xD7\x4D\x41\x14\xBE\x6F\xDF"
+	"\x1B\xAB\xC0\xCA\x88\x07\xAC\xF1\x7D\x35\x83\x67\x28\x2D\x50\xE9"
+	"\xCE\x27\x71\x5E\x1C\xCF\xD2\x30\x65\x79\x72\x2F\x9C\xE1\xD2\x39"
+	"\x7F\xEF\x3B\x01\xF2\x14\x1D\xDF\xBD\x51\xD3\xA1\x53\x62\xCF\x5F"
+	"\x79\x84\xCE\x06\x96\x69\x29\x49\x82\x1C\x71\x4A\xA1\x66\xC8\x2F"
+	"\xFD\x7B\x96\x7B\xFC\xC4\x26\x58\xC4\xFC\x7C\xAF\xB5\xE8\x95\x83"
+	"\x87\xCB\x46\xDE\x97\xA7\xB3\xA2\x54\x5B\xD7\xAF\xAB\xEB\xC8\xF3"
+	"\x55\x9D\x48\x2B\x30\x9C\xDC\x26\x4B\xC2\x89\x45\x13\xB2\x01\x9A"
+	"\xA4\x65\xC3\xEC\x24\x2D\x26\x97\xEB\x80\x8A\x9D\x03\xBC\x59\x66"
+	"\x9E\xE2\xBB\xBB\x63\x19\x64\x93\x11\x7B\x25\x65\x30\xCD\x5B\x4B"
+	"\x2C\xFF\xDC\x2D\x30\x87\x1F\x3C\x88\x07\xD0\xFC\x48\xCC\x05\x8A"
+	"\xA2\xC8\x39\x3E\xD5\x51\xBC\x0A\xBE\x6D\xA8\xA0\xF6\x88\x06\x79"
+	"\x13\xFF\x1B\x45\xDA\x54\xC9\x24\x25\x8A\x75\x0A\x26\xD1\x69\x81"
+	"\x14\x14\xD1\x79\x7D\x8E\x76\xF2\xE0\xEB\xDD\x0F\xDE\xC2\xEC\x80"
+	"\xD7\xDC\x16\x99\x92\xBE\xCB\x40\x0C\xCE\x7C\x3B\x46\xA2\x5B\x5D"
+	"\x0C\x45\xEB\xE1\x00\xDE\x72\x50\xB1\xA6\x0B\x76\xC5\x8D\xFC\x82"
+	"\x38\x6D\x99\x14\x1D\x1A\x4A\xD3\x7C\x53\xB8\x12\x46\xA2\x30\x38"
+	"\x82\xF4\x96\x6E\x8C\xCE\x47\x0D\xAF\x0A\x3B\x45\xB7\x43\x95\x43"
+	"\x9E\x02\x2C\x44\x07\x6D\x1F\x3C\x66\x89\x09\xB6\x1F\x06\x30\xCC"
+	"\xAD\xCE\x7D\x9A\xDE\x3E\xFB\x6C\xE4\x58\x43\xD2\x4F\xA5\x9E\x5E"
+	"\xA7\x7B\xAE\x3A\xF6\x7E\xD9\xDB\xD3\xF5\xC5\x41\xAF\xE6\x9C\x91"
+	"\x02\x82\x01\x01" /* prime1 - integer of 257 bytes */
+	"\x00\xE0\xA6\x6C\xF0\xA2\xF8\x81\x85\x36\x43\xD0\x13\x0B\x33\x8B"
+	"\x8F\x78\x3D\xAC\xC7\x5E\x46\x6A\x7F\x05\xAE\x3E\x26\x0A\xA6\xD0"
+	"\x51\xF3\xC8\x61\xF5\x77\x22\x48\x10\x87\x4C\xD5\xA4\xD5\xAE\x2D"
+	"\x4E\x7A\xFE\x1C\x31\xE7\x6B\xFF\xA4\x69\x20\xF9\x2A\x0B\x99\xBE"
+	"\x7C\x32\x68\xAD\xB0\xC6\x94\x81\x41\x75\xDC\x06\x78\x0A\xB4\xCF"
+	"\xCD\x1B\x2D\x31\xE4\x7B\xEA\xA8\x35\x99\x75\x57\xC6\x0E\xF6\x78"
+	"\x4F\xA0\x92\x4A\x00\x1B\xE7\x96\xF2\x5B\xFD\x2C\x0A\x0A\x13\x81"
+	"\xAF\xCB\x59\x87\x31\xD9\x83\x65\xF2\x22\x48\xD0\x03\x67\x39\xF6"
+	"\xFF\xA8\x36\x07\x3A\x68\xE3\x7B\xA9\x64\xFD\x9C\xF7\xB1\x3D\xBF"
+	"\x26\x5C\xCC\x7A\xFC\xA2\x8F\x51\xD1\xE1\xE2\x3C\xEC\x06\x75\x7C"
+	"\x34\xF9\xA9\x33\x70\x11\xAD\x5A\xDC\x5F\xCF\x50\xF6\x23\x2F\x39"
+	"\xAC\x92\x48\x53\x4D\x01\x96\x3C\xD8\xDC\x1F\x23\x23\x78\x80\x34"
+	"\x54\x14\x76\x8B\xB6\xBB\xFB\x88\x78\x31\x59\x28\xD2\xB1\x75\x17"
+	"\x88\x04\x4A\x78\x62\x18\x2E\xF5\xFB\x9B\xEF\x15\xD8\x16\x47\xC6"
+	"\x42\xB1\x02\xDA\x9E\xE3\x84\x90\xB4\x2D\xC3\xCE\x13\xC9\x12\x7D"
+	"\x3E\xCD\x39\x39\xC9\xAD\xA1\x1A\xE6\xD5\xAD\x5A\x09\x4D\x1B\x0C"
+	"\xAB"
+	"\x02\x82\x01\x01" /* prime 2 - integer of 257 bytes */
+	"\x00\xDE\xD5\x1B\xF6\xCD\x83\xB1\xC6\x47\x7E\xB9\xC0\x6B\xA9\xB8"
+	"\x02\xF3\xAE\x40\x5D\xFC\xD3\xE5\x4E\xF1\xE3\x39\x04\x52\x84\x89"
+	"\x40\x37\xBB\xC2\xCD\x7F\x71\x77\x17\xDF\x6A\x4C\x31\x24\x7F\xB9"
+	"\x7E\x7F\xC8\x43\x4A\x3C\xEB\x8D\x1B\x7F\x21\x51\x67\x45\x8F\xA0"
+	"\x36\x29\x3A\x18\x45\xA5\x32\xEC\x74\x88\x3C\x98\x5D\x67\x3B\xD7"
+	"\x51\x1F\xE9\xAE\x09\x01\xDE\xDE\x7C\xFB\x60\xD1\xA5\x6C\xE9\x6A"
+	"\x93\x04\x02\x3A\xBB\x67\x02\xB9\xFD\x23\xF0\x02\x2B\x49\x85\xC9"
+	"\x5B\xE7\x4B\xDF\xA3\xF4\xEE\x59\x4C\x45\xEF\x8B\xC1\x6B\xDE\xDE"
+	"\xBC\x1A\xFC\xD2\x76\x3F\x33\x74\xA9\x8E\xA3\x7E\x0C\xC6\xCE\x70"
+	"\xA1\x5B\xA6\x77\xEA\x76\xEB\x18\xCE\xB9\xD7\x78\x8D\xAE\x06\xBB"
+	"\xD3\x1F\x16\x0D\x05\xAB\x4F\xC6\x52\xC8\x6B\x36\x51\x7D\x1D\x27"
+	"\xAF\x88\x9A\x6F\xCC\x25\x2E\x74\x06\x72\xCE\x9E\xDB\xE0\x9D\x30"
+	"\xEF\x55\xA5\x58\x21\xA7\x42\x12\x2C\x2C\x23\x87\xC1\x0F\xE8\x51"
+	"\xDA\x53\xDA\xFC\x05\x36\xDF\x08\x0E\x08\x36\xBE\x5C\x86\x9E\xCA"
+	"\x68\x90\x33\x12\x0B\x14\x82\xAB\x90\x1A\xD4\x49\x32\x9C\xBD\xAA"
+	"\xAB\x4E\x38\xF1\xEE\xED\x3D\x3F\xE8\xBD\x48\x56\xA6\x64\xEE\xC8"
+	"\xD7"
+	"\x02\x82\x01\x01" /* exponent 1 - integer of 257 bytes */
+	"\x00\x96\x5E\x6F\x8F\x06\xD6\xE6\x03\x1F\x96\x76\x81\x38\xBF\x30"
+	"\xCC\x40\x84\xAF\xD0\xE7\x06\xA5\x24\x0E\xCE\x59\xA5\x26\xFE\x0F"
+	"\x74\xBB\x83\xC6\x26\x02\xAF\x3C\xA3\x6B\x9C\xFF\x68\x0C\xEB\x40"
+	"\x42\x46\xCB\x2E\x5E\x2C\xF4\x3A\x32\x77\x77\xED\xAF\xBA\x02\x17"
+	"\xE1\x93\xF0\x43\x4A\x8F\x31\x39\xEF\x72\x0F\x6B\x79\x10\x59\x84"
+	"\xBA\x5A\x55\x7F\x0E\xDB\xEE\xEE\xD6\xA9\xB8\x44\x9F\x3A\xC6\xB9"
+	"\x33\x3B\x5C\x90\x11\xD0\x9B\xCC\x8A\xBF\x0E\x10\x5B\x4B\xF1\x50"
+	"\x9E\x35\xB3\xE0\x6D\x7A\x95\x9C\x38\x5D\xC0\x75\x13\xC2\x15\xA7"
+	"\x81\xEA\xBA\xF7\x4D\x9E\x85\x9D\xF1\x7D\xBA\xD0\x45\x6F\x2A\xD0"
+	"\x76\xC2\x28\xD0\xAD\xA7\xB5\xDC\xE3\x6A\x99\xFF\x83\x50\xB3\x75"
+	"\x07\x14\x91\xAF\xEF\x74\xB5\x9F\x9A\xE0\xBA\xA9\x0B\x87\xF3\x85"
+	"\x5C\x40\xB2\x0E\xA7\xFD\xC6\xED\x45\x8E\xD9\x7C\xB0\xB2\x68\xC6"
+	"\x1D\xFD\x70\x78\x06\x41\x7F\x95\x12\x36\x9D\xE2\x58\x5D\x15\xEE"
+	"\x41\x49\xF5\xFA\xEC\x56\x19\xA0\xE6\xE0\xB2\x40\xE1\xD9\xD0\x03"
+	"\x22\x02\xCF\xD1\x3C\x07\x38\x65\x8F\x65\x0E\xAA\x32\xCE\x25\x05"
+	"\x16\x73\x51\xB9\x9F\x88\x0B\xCD\x30\xF3\x97\xCC\x2B\x6B\xA4\x0E"
+	"\x6F"
+	"\x02\x82\x01\x00" /* exponent 2 - integer of 256 bytes */
+	"\x2A\x5F\x3F\xB8\x08\x90\x58\x47\xA9\xE4\xB1\x11\xA3\xE7\x5B\xF4"
+	"\x43\xBE\x08\xC3\x56\x86\x3C\x7E\x6C\x84\x96\x9C\xF9\xCB\xF6\x05"
+	"\x5E\x13\xB8\x11\x37\x80\xAD\xF2\xBE\x2B\x0A\x5D\xF5\xE0\xCB\xB7"
+	"\x00\x39\x66\x82\x41\x5F\x51\x2F\xBF\x56\xE8\x91\xC8\xAA\x6C\xFE"
+	"\x9F\x8C\x4A\x7D\x43\xD2\x91\x1F\xFF\x9F\xF6\x21\x1C\xB6\x46\x55"
+	"\x48\xCA\x38\xAB\xC1\xCD\x4D\x65\x5A\xAF\xA8\x6D\xDA\x6D\xF0\x34"
+	"\x10\x79\x14\x0D\xFA\xA2\x8C\x17\x54\xB4\x18\xD5\x7E\x5F\x90\x50"
+	"\x87\x84\xE7\xFB\xD7\x61\x53\x5D\xAB\x96\xC7\x6E\x7A\x42\xA0\xFC"
+	"\x07\xED\xB7\x5F\x80\xD9\x19\xFF\xFB\xFD\x9E\xC4\x73\x31\x62\x3D"
+	"\x6C\x9E\x15\x03\x62\xA5\x85\xCC\x19\x8E\x9D\x7F\xE3\x6D\xA8\x5D"
+	"\x96\xF5\xAC\x78\x3D\x81\x27\xE7\x29\xF1\x29\x1D\x09\xBB\x77\x86"
+	"\x6B\x65\x62\x88\xE1\x31\x1A\x22\xF7\xC5\xCE\x73\x65\x1C\xBE\xE7"
+	"\x63\xD3\xD3\x14\x63\x27\xAF\x28\xF3\x23\xB6\x76\xC1\xBD\x9D\x82"
+	"\xF4\x9B\x19\x7D\x2C\x57\xF0\xC2\x2A\x51\xAE\x95\x0D\x8C\x38\x54"
+	"\xF5\xC6\xA0\x51\xB7\x0E\xB9\xEC\xE7\x0D\x22\xF6\x1A\xD3\xFE\x16"
+	"\x21\x03\xB7\x0D\x85\xD3\x35\xC9\xDD\xE4\x59\x85\xBE\x7F\xA1\x75"
+	"\x02\x82\x01\x01" /* coefficient - integer of 257 bytes */
+	"\x00\xB9\x48\xD2\x54\x2F\x19\x54\x64\xAE\x62\x80\x61\x89\x80\xB4"
+	"\x48\x0B\x8D\x7E\x1B\x0F\x50\x08\x82\x3F\xED\x75\x84\xB7\x13\xE4"
+	"\xF8\x8D\xA8\xBB\x54\x21\x4C\x5A\x54\x07\x16\x4B\xB4\xA4\x9E\x30"
+	"\xBF\x7A\x30\x1B\x39\x60\xA3\x21\x53\xFB\xB0\xDC\x0F\x7C\x2C\xFB"
+	"\xAA\x95\x7D\x51\x39\x28\x33\x1F\x25\x31\x53\xF5\xD2\x64\x2B\xF2"
+	"\x1E\xB3\xC0\x6A\x0B\xC9\xA4\x42\x64\x5C\xFB\x15\xA3\xE8\x4C\x3A"
+	"\x9C\x3C\xBE\xA3\x39\x83\x23\xE3\x6D\x18\xCC\xC2\xDC\x63\x8D\xBA"
+	"\x98\xE0\xE0\x31\x4A\x2B\x37\x9C\x4D\x6B\xF3\x9F\x51\xE4\x43\x5C"
+	"\x83\x5F\xBF\x5C\xFE\x92\x45\x01\xAF\xF5\xC2\xF4\xB7\x56\x93\xA5"
+	"\xF4\xAA\x67\x3C\x48\x37\xBD\x9A\x3C\xFE\xA5\x9A\xB0\xD1\x6B\x85"
+	"\xDD\x81\xD4\xFA\xAD\x31\x83\xA8\x22\x9B\xFD\xB4\x61\xDC\x7A\x51"
+	"\x59\x62\x10\x1B\x7E\x44\xA3\xFE\x90\x51\x5A\x3E\x02\x87\xAD\xFA"
+	"\xDD\x0B\x1F\x3D\x35\xAF\xEE\x13\x85\x51\xA7\x42\xC0\xEE\x9E\x20"
+	"\xE9\xD0\x29\xB2\xE4\x21\xE4\x6D\x62\xB9\xF4\x48\x4A\xD8\x46\x8E"
+	"\x61\xA6\x2C\x5D\xDF\x8F\x97\x2B\x3A\x75\x1D\x83\x17\x6F\xC6\xB0"
+	"\xDE\xFC\x14\x25\x06\x5A\x60\xBB\xB8\x21\x89\xD1\xEF\x57\xF1\x71"
+	"\x3D",
+	.m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a",
+	.c =
+	"\x5c\xce\x9c\xd7\x9a\x9e\xa1\xfe\x7a\x82\x3c\x68\x27\x98\xe3\x5d"
+	"\xd5\xd7\x07\x29\xf5\xfb\xc3\x1a\x7f\x63\x1e\x62\x31\x3b\x19\x87"
+	"\x79\x4f\xec\x7b\xf3\xcb\xea\x9b\x95\x52\x3a\x40\xe5\x87\x7b\x72"
+	"\xd1\x72\xc9\xfb\x54\x63\xd8\xc9\xd7\x2c\xfc\x7b\xc3\x14\x1e\xbc"
+	"\x18\xb4\x34\xa1\xbf\x14\xb1\x37\x31\x6e\xf0\x1b\x35\x19\x54\x07"
+	"\xf7\x99\xec\x3e\x63\xe2\xcd\x61\x28\x65\xc3\xcd\xb1\x38\x36\xa5"
+	"\xb2\xd7\xb0\xdc\x1f\xf5\xef\x19\xc7\x53\x32\x2d\x1c\x26\xda\xe4"
+	"\x0d\xd6\x90\x7e\x28\xd8\xdc\xe4\x61\x05\xd2\x25\x90\x01\xd3\x96"
+	"\x6d\xa6\xcf\x58\x20\xbb\x03\xf4\x01\xbc\x79\xb9\x18\xd8\xb8\xba"
+	"\xbd\x93\xfc\xf2\x62\x5d\x8c\x66\x1e\x0e\x84\x59\x93\xdd\xe2\x93"
+	"\xa2\x62\x7d\x08\x82\x7a\xdd\xfc\xb8\xbc\xc5\x4f\x9c\x4e\xbf\xb4"
+	"\xfc\xf4\xc5\x01\xe8\x00\x70\x4d\x28\x26\xcc\x2e\xfe\x0e\x58\x41"
+	"\x8b\xec\xaf\x7c\x4b\x54\xd0\xa0\x64\xf9\x32\xf4\x2e\x47\x65\x0a"
+	"\x67\x88\x39\x3a\xdb\xb2\xdb\x7b\xb5\xf6\x17\xa8\xd9\xc6\x5e\x28"
+	"\x13\x82\x8a\x99\xdb\x60\x08\xa5\x23\x37\xfa\x88\x90\x31\xc8\x9d"
+	"\x8f\xec\xfb\x85\x9f\xb1\xce\xa6\x24\x50\x46\x44\x47\xcb\x65\xd1"
+	"\xdf\xc0\xb1\x6c\x90\x1f\x99\x8e\x4d\xd5\x9e\x31\x07\x66\x87\xdf"
+	"\x01\xaa\x56\x3c\x71\xe0\x2b\x6f\x67\x3b\x23\xed\xc2\xbd\x03\x30"
+	"\x79\x76\x02\x10\x10\x98\x85\x8a\xff\xfd\x0b\xda\xa5\xd9\x32\x48"
+	"\x02\xa0\x0b\xb9\x2a\x8a\x18\xca\xc6\x8f\x3f\xbb\x16\xb2\xaa\x98"
+	"\x27\xe3\x60\x43\xed\x15\x70\xd4\x57\x15\xfe\x19\xd4\x9b\x13\x78"
+	"\x8a\xf7\x21\xf1\xa2\xa2\x2d\xb3\x09\xcf\x44\x91\x6e\x08\x3a\x30"
+	"\x81\x3e\x90\x93\x8a\x67\x33\x00\x59\x54\x9a\x25\xd3\x49\x8e\x9f"
+	"\xc1\x4b\xe5\x86\xf3\x50\x4c\xbc\xc5\xd3\xf5\x3a\x54\xe1\x36\x3f"
+	"\xe2\x5a\xb4\x37\xc0\xeb\x70\x35\xec\xf6\xb7\xe8\x44\x3b\x7b\xf3"
+	"\xf1\xf2\x1e\xdb\x60\x7d\xd5\xbe\xf0\x71\x34\x90\x4c\xcb\xd4\x35"
+	"\x51\xc7\xdd\xd8\xc9\x81\xf5\x5d\x57\x46\x2c\xb1\x7b\x9b\xaa\xcb"
+	"\xd1\x22\x25\x49\x44\xa3\xd4\x6b\x29\x7b\xd8\xb2\x07\x93\xbf\x3d"
+	"\x52\x49\x84\x79\xef\xb8\xe5\xc4\xad\xca\xa8\xc6\xf6\xa6\x76\x70"
+	"\x5b\x0b\xe5\x83\xc6\x0e\xef\x55\xf2\xe7\xff\x04\xea\xe6\x13\xbe"
+	"\x40\xe1\x40\x45\x48\x66\x75\x31\xae\x35\x64\x91\x11\x6f\xda\xee"
+	"\x26\x86\x45\x6f\x0b\xd5\x9f\x03\xb1\x65\x5b\xdb\xa4\xe4\xf9\x45",
+	.key_len = 2349,
+	.m_size = 8,
+	.c_size = 512,
+	}
+};
+
+#define DH_TEST_VECTORS 2
+
+struct kpp_testvec dh_tv_template[] = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x11\x02" /* len */
+	"\x00\x01\x00\x00" /* key_size */
+	"\x00\x01\x00\x00" /* p_size */
+	"\x01\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x02\x11" /* len */
+	"\x00\x00\x01\x00" /* key_size */
+	"\x00\x00\x01\x00" /* p_size */
+	"\x00\x00\x00\x01" /* g_size */
+#endif
+	/* xa */
+	"\x44\xc1\x48\x36\xa7\x2b\x6f\x4e\x43\x03\x68\xad\x31\x00\xda\xf3"
+	"\x2a\x01\xa8\x32\x63\x5f\x89\x32\x1f\xdf\x4c\xa1\x6a\xbc\x10\x15"
+	"\x90\x35\xc9\x26\x41\xdf\x7b\xaa\x56\x56\x3d\x85\x44\xb5\xc0\x8e"
+	"\x37\x83\x06\x50\xb3\x5f\x0e\x28\x2c\xd5\x46\x15\xe3\xda\x7d\x74"
+	"\x87\x13\x91\x4f\xd4\x2d\xf6\xc7\x5e\x14\x2c\x11\xc2\x26\xb4\x3a"
+	"\xe3\xb2\x36\x20\x11\x3b\x22\xf2\x06\x65\x66\xe2\x57\x58\xf8\x22"
+	"\x1a\x94\xbd\x2b\x0e\x8c\x55\xad\x61\x23\x45\x2b\x19\x1e\x63\x3a"
+	"\x13\x61\xe3\xa0\x79\x70\x3e\x6d\x98\x32\xbc\x7f\x82\xc3\x11\xd8"
+	"\xeb\x53\xb5\xfc\xb5\xd5\x3c\x4a\xea\x92\x3e\x01\xce\x15\x65\xd4"
+	"\xaa\x85\xc1\x11\x90\x83\x31\x6e\xfe\xe7\x7f\x7d\xed\xab\xf9\x29"
+	"\xf8\xc7\xf1\x68\xc6\xb7\xe4\x1f\x2f\x28\xa0\xc9\x1a\x50\x64\x29"
+	"\x4b\x01\x6d\x1a\xda\x46\x63\x21\x07\x40\x8c\x8e\x4c\x6f\xb5\xe5"
+	"\x12\xf3\xc2\x1b\x48\x27\x5e\x27\x01\xb1\xaa\xed\x68\x9b\x83\x18"
+	"\x8f\xb1\xeb\x1f\x04\xd1\x3c\x79\xed\x4b\xf7\x0a\x33\xdc\xe0\xc6"
+	"\xd8\x02\x51\x59\x00\x74\x30\x07\x4c\x2d\xac\xe4\x13\xf1\x80\xf0"
+	"\xce\xfa\xff\xa9\xce\x29\x46\xdd\x9d\xad\xd1\xc3\xc6\x58\x1a\x63"
+	/* p */
+	"\xb9\x36\x3a\xf1\x82\x1f\x60\xd3\x22\x47\xb8\xbc\x2d\x22\x6b\x81"
+	"\x7f\xe8\x20\x06\x09\x23\x73\x49\x9a\x59\x8b\x35\x25\xf8\x31\xbc"
+	"\x7d\xa8\x1c\x9d\x56\x0d\x1a\xf7\x4b\x4f\x96\xa4\x35\x77\x6a\x89"
+	"\xab\x42\x00\x49\x21\x71\xed\x28\x16\x1d\x87\x5a\x10\xa7\x9c\x64"
+	"\x94\xd4\x87\x3d\x28\xef\x44\xfe\x4b\xe2\xb4\x15\x8c\x82\xa6\xf3"
+	"\x50\x5f\xa8\xe8\xa2\x60\xe7\x00\x86\x78\x05\xd4\x78\x19\xa1\x98"
+	"\x62\x4e\x4a\x00\x78\x56\x96\xe6\xcf\xd7\x10\x1b\x74\x5d\xd0\x26"
+	"\x61\xdb\x6b\x32\x09\x51\xd8\xa5\xfd\x54\x16\x71\x01\xb3\x39\xe6"
+	"\x4e\x69\xb1\xd7\x06\x8f\xd6\x1e\xdc\x72\x25\x26\x74\xc8\x41\x06"
+	"\x5c\xd1\x26\x5c\xb0\x2f\xf9\x59\x13\xc1\x2a\x0f\x78\xea\x7b\xf7"
+	"\xbd\x59\xa0\x90\x1d\xfc\x33\x5b\x4c\xbf\x05\x9c\x3a\x3f\x69\xa2"
+	"\x45\x61\x4e\x10\x6a\xb3\x17\xc5\x68\x30\xfb\x07\x5f\x34\xc6\xfb"
+	"\x73\x07\x3c\x70\xf6\xae\xe7\x72\x84\xc3\x18\x81\x8f\xe8\x11\x1f"
+	"\x3d\x83\x83\x01\x2a\x14\x73\xbf\x32\x32\x2e\xc9\x4d\xdb\x2a\xca"
+	"\xee\x71\xf9\xda\xad\xe8\x82\x0b\x4d\x0c\x1f\xb6\x1d\xef\x00\x67"
+	"\x74\x3d\x95\xe0\xb7\xc4\x30\x8a\x24\x87\x12\x47\x27\x70\x0d\x73"
+	/* g */
+	"\x02",
+	.b_public =
+	"\x2a\x67\x5c\xfd\x63\x5d\xc0\x97\x0a\x8b\xa2\x1f\xf8\x8a\xcb\x54"
+	"\xca\x2f\xd3\x49\x3f\x01\x8e\x87\xfe\xcc\x94\xa0\x3e\xd4\x26\x79"
+	"\x9a\x94\x3c\x11\x81\x58\x5c\x60\x3d\xf5\x98\x90\x89\x64\x62\x1f"
+	"\xbd\x05\x6d\x2b\xcd\x84\x40\x9b\x4a\x1f\xe0\x19\xf1\xca\x20\xb3"
+	"\x4e\xa0\x4f\x15\xcc\xa5\xfe\xa5\xb4\xf5\x0b\x18\x7a\x5a\x37\xaa"
+	"\x58\x00\x19\x7f\xe2\xa3\xd9\x1c\x44\x57\xcc\xde\x2e\xc1\x38\xea"
+	"\xeb\xe3\x90\x40\xc4\x6c\xf7\xcd\xe9\x22\x50\x71\xf5\x7c\xdb\x37"
+	"\x0e\x80\xc3\xed\x7e\xb1\x2b\x2f\xbe\x71\xa6\x11\xa5\x9d\xf5\x39"
+	"\xf1\xa2\xe5\x85\xbc\x25\x91\x4e\x84\x8d\x26\x9f\x4f\xe6\x0f\xa6"
+	"\x2b\x6b\xf9\x0d\xaf\x6f\xbb\xfa\x2d\x79\x15\x31\x57\xae\x19\x60"
+	"\x22\x0a\xf5\xfd\x98\x0e\xbf\x5d\x49\x75\x58\x37\xbc\x7f\xf5\x21"
+	"\x56\x1e\xd5\xb3\x50\x0b\xca\x96\xf3\xd1\x3f\xb3\x70\xa8\x6d\x63"
+	"\x48\xfb\x3d\xd7\x29\x91\x45\xb5\x48\xcd\xb6\x78\x30\xf2\x3f\x1e"
+	"\xd6\x22\xd6\x35\x9b\xf9\x1f\x85\xae\xab\x4b\xd7\xe0\xc7\x86\x67"
+	"\x3f\x05\x7f\xa6\x0d\x2f\x0d\xbf\x53\x5f\x4d\x2c\x6d\x5e\x57\x40"
+	"\x30\x3a\x23\x98\xf9\xb4\x32\xf5\x32\x83\xdd\x0b\xae\x33\x97\x2f",
+	.expected_a_public =
+	"\x5c\x24\xdf\xeb\x5b\x4b\xf8\xc5\xef\x39\x48\x82\xe0\x1e\x62\xee"
+	"\x8a\xae\xdf\x93\x6c\x2b\x16\x95\x92\x16\x3f\x16\x7b\x75\x03\x85"
+	"\xd9\xf1\x69\xc2\x14\x87\x45\xfc\xa4\x19\xf6\xf0\xa4\xf3\xec\xd4"
+	"\x6c\x5c\x03\x3b\x94\xc2\x2f\x92\xe4\xce\xb3\xe4\x72\xe8\x17\xe6"
+	"\x23\x7e\x00\x01\x09\x59\x13\xbf\xc1\x2f\x99\xa9\x07\xaa\x02\x23"
+	"\x4a\xca\x39\x4f\xbc\xec\x0f\x27\x4f\x19\x93\x6c\xb9\x30\x52\xfd"
+	"\x2b\x9d\x86\xf1\x06\x1e\xb6\x56\x27\x4a\xc9\x8a\xa7\x8a\x48\x5e"
+	"\xb5\x60\xcb\xdf\xff\x03\x26\x10\xbf\x90\x8f\x46\x60\xeb\x9b\x9a"
+	"\xd6\x6f\x44\x91\x03\x92\x18\x2c\x96\x5e\x40\x19\xfb\xf4\x4f\x3a"
+	"\x02\x7b\xaf\xcc\x22\x20\x79\xb9\xf8\x9f\x8f\x85\x6b\xec\x44\xbb"
+	"\xe6\xa8\x8e\xb1\xe8\x2c\xee\x64\xee\xf8\xbd\x00\xf3\xe2\x2b\x93"
+	"\xcd\xe7\xc4\xdf\xc9\x19\x46\xfe\xb6\x07\x73\xc1\x8a\x64\x79\x26"
+	"\xe7\x30\xad\x2a\xdf\xe6\x8f\x59\xf5\x81\xbf\x4a\x29\x91\xe7\xb7"
+	"\xcf\x48\x13\x27\x75\x79\x40\xd9\xd6\x32\x52\x4e\x6a\x86\xae\x6f"
+	"\xc2\xbf\xec\x1f\xc2\x69\xb2\xb6\x59\xe5\xa5\x17\xa4\x77\xb7\x62"
+	"\x46\xde\xe8\xd2\x89\x78\x9a\xef\xa3\xb5\x8f\x26\xec\x80\xda\x39",
+	.expected_ss =
+	"\x8f\xf3\xac\xa2\xea\x22\x11\x5c\x45\x65\x1a\x77\x75\x2e\xcf\x46"
+	"\x23\x14\x1e\x67\x53\x4d\x35\xb0\x38\x1d\x4e\xb9\x41\x9a\x21\x24"
+	"\x6e\x9f\x40\xfe\x90\x51\xb1\x06\xa4\x7b\x87\x17\x2f\xe7\x5e\x22"
+	"\xf0\x7b\x54\x84\x0a\xac\x0a\x90\xd2\xd7\xe8\x7f\xe7\xe3\x30\x75"
+	"\x01\x1f\x24\x75\x56\xbe\xcc\x8d\x1e\x68\x0c\x41\x72\xd3\xfa\xbb"
+	"\xe5\x9c\x60\xc7\x28\x77\x0c\xbe\x89\xab\x08\xd6\x21\xe7\x2e\x1a"
+	"\x58\x7a\xca\x4f\x22\xf3\x2b\x30\xfd\xf4\x98\xc1\xa3\xf8\xf6\xcc"
+	"\xa9\xe4\xdb\x5b\xee\xd5\x5c\x6f\x62\x4c\xd1\x1a\x02\x2a\x23\xe4"
+	"\xb5\x57\xf3\xf9\xec\x04\x83\x54\xfe\x08\x5e\x35\xac\xfb\xa8\x09"
+	"\x82\x32\x60\x11\xb2\x16\x62\x6b\xdf\xda\xde\x9c\xcb\x63\x44\x6c"
+	"\x59\x26\x6a\x8f\xb0\x24\xcb\xa6\x72\x48\x1e\xeb\xe0\xe1\x09\x44"
+	"\xdd\xee\x66\x6d\x84\xcf\xa5\xc1\xb8\x36\x74\xd3\x15\x96\xc3\xe4"
+	"\xc6\x5a\x4d\x23\x97\x0c\x5c\xcb\xa9\xf5\x29\xc2\x0e\xff\x93\x82"
+	"\xd3\x34\x49\xad\x64\xa6\xb1\xc0\x59\x28\x75\x60\xa7\x8a\xb0\x11"
+	"\x56\x89\x42\x74\x11\xf5\xf6\x5e\x6f\x16\x54\x6a\xb1\x76\x4d\x50"
+	"\x8a\x68\xc1\x5b\x82\xb9\x0d\x00\x32\x50\xed\x88\x87\x48\x92\x17",
+	.secret_size = 529,
+	.b_public_size = 256,
+	.expected_a_public_size = 256,
+	.expected_ss_size = 256,
+	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x11\x02" /* len */
+	"\x00\x01\x00\x00" /* key_size */
+	"\x00\x01\x00\x00" /* p_size */
+	"\x01\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x02\x11" /* len */
+	"\x00\x00\x01\x00" /* key_size */
+	"\x00\x00\x01\x00" /* p_size */
+	"\x00\x00\x00\x01" /* g_size */
+#endif
+	/* xa */
+	"\x4d\x75\xa8\x6e\xba\x23\x3a\x0c\x63\x56\xc8\xc9\x5a\xa7\xd6\x0e"
+	"\xed\xae\x40\x78\x87\x47\x5f\xe0\xa7\x7b\xba\x84\x88\x67\x4e\xe5"
+	"\x3c\xcc\x5c\x6a\xe7\x4a\x20\xec\xbe\xcb\xf5\x52\x62\x9f\x37\x80"
+	"\x0c\x72\x7b\x83\x66\xa4\xf6\x7f\x95\x97\x1c\x6a\x5c\x7e\xf1\x67"
+	"\x37\xb3\x93\x39\x3d\x0b\x55\x35\xd9\xe5\x22\x04\x9f\xf8\xc1\x04"
+	"\xce\x13\xa5\xac\xe1\x75\x05\xd1\x2b\x53\xa2\x84\xef\xb1\x18\xf4"
+	"\x66\xdd\xea\xe6\x24\x69\x5a\x49\xe0\x7a\xd8\xdf\x1b\xb7\xf1\x6d"
+	"\x9b\x50\x2c\xc8\x1c\x1c\xa3\xb4\x37\xfb\x66\x3f\x67\x71\x73\xa9"
+	"\xff\x5f\xd9\xa2\x25\x6e\x25\x1b\x26\x54\xbf\x0c\xc6\xdb\xea\x0a"
+	"\x52\x6c\x16\x7c\x27\x68\x15\x71\x58\x73\x9d\xe6\xc2\x80\xaa\x97"
+	"\x31\x66\xfb\xa6\xfb\xfd\xd0\x9c\x1d\xbe\x81\x48\xf5\x9a\x32\xf1"
+	"\x69\x62\x18\x78\xae\x72\x36\xe6\x94\x27\xd1\xff\x18\x4f\x28\x6a"
+	"\x16\xbd\x6a\x60\xee\xe5\xf9\x6d\x16\xe4\xb8\xa6\x41\x9b\x23\x7e"
+	"\xf7\x9d\xd1\x1d\x03\x15\x66\x3a\xcf\xb6\x2c\x13\x96\x2c\x52\x21"
+	"\xe4\x2d\x48\x7a\x8a\x5d\xb2\x88\xed\x98\x61\x79\x8b\x6a\x1e\x5f"
+	"\xd0\x8a\x2d\x99\x5a\x2b\x0f\xbc\xef\x53\x8f\x32\xc1\xa2\x99\x26"
+	/* p */
+	"\xb9\x36\x3a\xf1\x82\x1f\x60\xd3\x22\x47\xb8\xbc\x2d\x22\x6b\x81"
+	"\x7f\xe8\x20\x06\x09\x23\x73\x49\x9a\x59\x8b\x35\x25\xf8\x31\xbc"
+	"\x7d\xa8\x1c\x9d\x56\x0d\x1a\xf7\x4b\x4f\x96\xa4\x35\x77\x6a\x89"
+	"\xab\x42\x00\x49\x21\x71\xed\x28\x16\x1d\x87\x5a\x10\xa7\x9c\x64"
+	"\x94\xd4\x87\x3d\x28\xef\x44\xfe\x4b\xe2\xb4\x15\x8c\x82\xa6\xf3"
+	"\x50\x5f\xa8\xe8\xa2\x60\xe7\x00\x86\x78\x05\xd4\x78\x19\xa1\x98"
+	"\x62\x4e\x4a\x00\x78\x56\x96\xe6\xcf\xd7\x10\x1b\x74\x5d\xd0\x26"
+	"\x61\xdb\x6b\x32\x09\x51\xd8\xa5\xfd\x54\x16\x71\x01\xb3\x39\xe6"
+	"\x4e\x69\xb1\xd7\x06\x8f\xd6\x1e\xdc\x72\x25\x26\x74\xc8\x41\x06"
+	"\x5c\xd1\x26\x5c\xb0\x2f\xf9\x59\x13\xc1\x2a\x0f\x78\xea\x7b\xf7"
+	"\xbd\x59\xa0\x90\x1d\xfc\x33\x5b\x4c\xbf\x05\x9c\x3a\x3f\x69\xa2"
+	"\x45\x61\x4e\x10\x6a\xb3\x17\xc5\x68\x30\xfb\x07\x5f\x34\xc6\xfb"
+	"\x73\x07\x3c\x70\xf6\xae\xe7\x72\x84\xc3\x18\x81\x8f\xe8\x11\x1f"
+	"\x3d\x83\x83\x01\x2a\x14\x73\xbf\x32\x32\x2e\xc9\x4d\xdb\x2a\xca"
+	"\xee\x71\xf9\xda\xad\xe8\x82\x0b\x4d\x0c\x1f\xb6\x1d\xef\x00\x67"
+	"\x74\x3d\x95\xe0\xb7\xc4\x30\x8a\x24\x87\x12\x47\x27\x70\x0d\x73"
+	/* g */
+	"\x02",
+	.b_public =
+	"\x99\x4d\xd9\x01\x84\x8e\x4a\x5b\xb8\xa5\x64\x8c\x6c\x00\x5c\x0e"
+	"\x1e\x1b\xee\x5d\x9f\x53\xe3\x16\x70\x01\xed\xbf\x4f\x14\x36\x6e"
+	"\xe4\x43\x45\x43\x49\xcc\xb1\xb0\x2a\xc0\x6f\x22\x55\x42\x17\x94"
+	"\x18\x83\xd7\x2a\x5c\x51\x54\xf8\x4e\x7c\x10\xda\x76\x68\x57\x77"
+	"\x1e\x62\x03\x30\x04\x7b\x4c\x39\x9c\x54\x01\x54\xec\xef\xb3\x55"
+	"\xa4\xc0\x24\x6d\x3d\xbd\xcc\x46\x5b\x00\x96\xc7\xea\x93\xd1\x3f"
+	"\xf2\x6a\x72\xe3\xf2\xc1\x92\x24\x5b\xda\x48\x70\x2c\xa9\x59\x97"
+	"\x19\xb1\xd6\x54\xb3\x9c\x2e\xb0\x63\x07\x9b\x5e\xac\xb5\xf2\xb1"
+	"\x5b\xf8\xf3\xd7\x2d\x37\x9b\x68\x6c\xf8\x90\x07\xbc\x37\x9a\xa5"
+	"\xe2\x91\x12\x25\x47\x77\xe3\x3d\xb2\x95\x69\x44\x0b\x91\x1e\xaf"
+	"\x7c\x8c\x7c\x34\x41\x6a\xab\x60\x6e\xc6\x52\xec\x7e\x94\x0a\x37"
+	"\xec\x98\x90\xdf\x3f\x02\xbd\x23\x52\xdd\xd9\xe5\x31\x80\x74\x25"
+	"\xb6\xd2\xd3\xcc\xd5\xcc\x6d\xf9\x7e\x4d\x78\xab\x77\x51\xfa\x77"
+	"\x19\x94\x49\x8c\x05\xd4\x75\xed\xd2\xb3\x64\x57\xe0\x52\x99\xc0"
+	"\x83\xe3\xbb\x5e\x2b\xf1\xd2\xc0\xb1\x37\x36\x0b\x7c\xb5\x63\x96"
+	"\x8e\xde\x04\x23\x11\x95\x62\x11\x9a\xce\x6f\x63\xc8\xd5\xd1\x8f",
+	.expected_a_public =
+	"\x90\x89\xe4\x82\xd6\x0a\xcf\x1a\xae\xce\x1b\x66\xa7\x19\x71\x18"
+	"\x8f\x95\x4b\x5b\x80\x45\x4a\x5a\x43\x99\x4d\x37\xcf\xa3\xa7\x28"
+	"\x9c\xc7\x73\xf1\xb2\x17\xf6\x99\xe3\x6b\x56\xcb\x3e\x35\x60\x7d"
+	"\x65\xc7\x84\x6b\x3e\x60\xee\xcd\xd2\x70\xe7\xc9\x32\x1c\xf0\xb4"
+	"\xf9\x52\xd9\x88\x75\xfd\x40\x2c\xa7\xbe\x19\x1c\x0a\xae\x93\xe1"
+	"\x71\xc7\xcd\x4f\x33\x5c\x10\x7d\x39\x56\xfc\x73\x84\xb2\x67\xc3"
+	"\x77\x26\x20\x97\x2b\xf8\x13\x43\x93\x9c\x9a\xa4\x08\xc7\x34\x83"
+	"\xe6\x98\x61\xe7\x16\x30\x2c\xb1\xdb\x2a\xb2\xcc\xc3\x02\xa5\x3c"
+	"\x71\x50\x14\x83\xc7\xbb\xa4\xbe\x98\x1b\xfe\xcb\x43\xe9\x97\x62"
+	"\xd6\xf0\x8c\xcb\x1c\xba\x1e\xa8\xa6\xa6\x50\xfc\x85\x7d\x47\xbf"
+	"\xf4\x3e\x23\xd3\x5f\xb2\x71\x3e\x40\x94\xaa\x87\x83\x2c\x6c\x8e"
+	"\x60\xfd\xdd\xf7\xf4\x76\x03\xd3\x1d\xec\x18\x51\xa3\xf2\x44\x1a"
+	"\x3f\xb4\x7c\x18\x0d\x68\x65\x92\x54\x0d\x2d\x81\x16\xf1\x84\x66"
+	"\x89\x92\xd0\x1a\x5e\x1f\x42\x46\x5b\xe5\x83\x86\x80\xd9\xcd\x3a"
+	"\x5a\x2f\xb9\x59\x9b\xe4\x43\x84\x64\xf3\x09\x1a\x0a\xa2\x64\x0f"
+	"\x77\x4e\x8d\x8b\xe6\x88\xd1\xfc\xaf\x8f\xdf\x1d\xbc\x31\xb3\xbd",
+	.expected_ss =
+	"\x34\xc3\x35\x14\x88\x46\x26\x23\x97\xbb\xdd\x28\x5c\x94\xf6\x47"
+	"\xca\xb3\x19\xaf\xca\x44\x9b\xc2\x7d\x89\xfd\x96\x14\xfd\x6d\x58"
+	"\xd8\xc4\x6b\x61\x2a\x0d\xf2\x36\x45\xc8\xe4\xa4\xed\x81\x53\x81"
+	"\x66\x1e\xe0\x5a\xb1\x78\x2d\x0b\x5c\xb4\xd1\xfc\x90\xc6\x9c\xdb"
+	"\x5a\x30\x0b\x14\x7d\xbe\xb3\x7d\xb1\xb2\x76\x3c\x6c\xef\x74\x6b"
+	"\xe7\x1f\x64\x0c\xab\x65\xe1\x76\x5c\x3d\x83\xb5\x8a\xfb\xaf\x0f"
+	"\xf2\x06\x14\x8f\xa0\xf6\xc1\x89\x78\xf2\xba\x72\x73\x3c\xf7\x76"
+	"\x21\x67\xbc\x24\x31\xb8\x09\x65\x0f\x0c\x02\x32\x4a\x98\x14\xfc"
+	"\x72\x2c\x25\x60\x68\x5f\x2f\x30\x1e\x5b\xf0\x3b\xd1\xa2\x87\xa0"
+	"\x54\xdf\xdb\xc0\xee\x0a\x0f\x47\xc9\x90\x20\x2c\xf9\xe3\x52\xad"
+	"\x27\x65\x8d\x54\x8d\xa8\xa1\xf3\xed\x15\xd4\x94\x28\x90\x31\x93"
+	"\x1b\xc0\x51\xbb\x43\x5d\x76\x3b\x1d\x2a\x71\x50\xea\x5d\x48\x94"
+	"\x7f\x6f\xf1\x48\xdb\x30\xe5\xae\x64\x79\xd9\x7a\xdb\xc6\xff\xd8"
+	"\x5e\x5a\x64\xbd\xf6\x85\x04\xe8\x28\x6a\xac\xef\xce\x19\x8e\x9a"
+	"\xfe\x75\xc0\x27\x69\xe3\xb3\x7b\x21\xa7\xb1\x16\xa4\x85\x23\xee"
+	"\xb0\x1b\x04\x6e\xbd\xab\x16\xde\xfd\x86\x6b\xa9\x95\xd7\x0b\xfd",
+	.secret_size = 529,
+	.b_public_size = 256,
+	.expected_a_public_size = 256,
+	.expected_ss_size = 256,
+	}
+};
+
+#ifdef CONFIG_CRYPTO_FIPS
+#define ECDH_TEST_VECTORS 1
+#else
+#define ECDH_TEST_VECTORS 2
+#endif
+struct kpp_testvec ecdh_tv_template[] = {
+	{
+#ifndef CONFIG_CRYPTO_FIPS
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x02\x00" /* type */
+	"\x20\x00" /* len */
+	"\x01\x00" /* curve_id */
+	"\x18\x00" /* key_size */
+#else
+	"\x00\x02" /* type */
+	"\x00\x20" /* len */
+	"\x00\x01" /* curve_id */
+	"\x00\x18" /* key_size */
+#endif
+	"\xb5\x05\xb1\x71\x1e\xbf\x8c\xda"
+	"\x4e\x19\x1e\x62\x1f\x23\x23\x31"
+	"\x36\x1e\xd3\x84\x2f\xcc\x21\x72",
+	.b_public =
+	"\xc3\xba\x67\x4b\x71\xec\xd0\x76"
+	"\x7a\x99\x75\x64\x36\x13\x9a\x94"
+	"\x5d\x8b\xdc\x60\x90\x91\xfd\x3f"
+	"\xb0\x1f\x8a\x0a\x68\xc6\x88\x6e"
+	"\x83\x87\xdd\x67\x09\xf8\x8d\x96"
+	"\x07\xd6\xbd\x1c\xe6\x8d\x9d\x67",
+	.expected_a_public =
+	"\x1a\x04\xdb\xa5\xe1\xdd\x4e\x79"
+	"\xa3\xe6\xef\x0e\x5c\x80\x49\x85"
+	"\xfa\x78\xb4\xef\x49\xbd\x4c\x7c"
+	"\x22\x90\x21\x02\xf9\x1b\x81\x5d"
+	"\x0c\x8a\xa8\x98\xd6\x27\x69\x88"
+	"\x5e\xbc\x94\xd8\x15\x9e\x21\xce",
+	.expected_ss =
+	"\xf4\x57\xcc\x4f\x1f\x4e\x31\xcc"
+	"\xe3\x40\x60\xc8\x06\x93\xc6\x2e"
+	"\x99\x80\x81\x28\xaf\xc5\x51\x74",
+	.secret_size = 32,
+	.b_public_size = 48,
+	.expected_a_public_size = 48,
+	.expected_ss_size = 24
+	}, {
+#endif
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x02\x00" /* type */
+	"\x28\x00" /* len */
+	"\x02\x00" /* curve_id */
+	"\x20\x00" /* key_size */
+#else
+	"\x00\x02" /* type */
+	"\x00\x28" /* len */
+	"\x00\x02" /* curve_id */
+	"\x00\x20" /* key_size */
+#endif
+	"\x24\xd1\x21\xeb\xe5\xcf\x2d\x83"
+	"\xf6\x62\x1b\x6e\x43\x84\x3a\xa3"
+	"\x8b\xe0\x86\xc3\x20\x19\xda\x92"
+	"\x50\x53\x03\xe1\xc0\xea\xb8\x82",
+	.expected_a_public =
+	"\x1a\x7f\xeb\x52\x00\xbd\x3c\x31"
+	"\x7d\xb6\x70\xc1\x86\xa6\xc7\xc4"
+	"\x3b\xc5\x5f\x6c\x6f\x58\x3c\xf5"
+	"\xb6\x63\x82\x77\x33\x24\xa1\x5f"
+	"\x6a\xca\x43\x6f\xf7\x7e\xff\x02"
+	"\x37\x08\xcc\x40\x5e\x7a\xfd\x6a"
+	"\x6a\x02\x6e\x41\x87\x68\x38\x77"
+	"\xfa\xa9\x44\x43\x2d\xef\x09\xdf",
+	.expected_ss =
+	"\xea\x17\x6f\x7e\x6e\x57\x26\x38"
+	"\x8b\xfb\x41\xeb\xba\xc8\x6d\xa5"
+	"\xa8\x72\xd1\xff\xc9\x47\x3d\xaa"
+	"\x58\x43\x9f\x34\x0f\x8c\xf3\xc9",
+	.b_public =
+	"\xcc\xb4\xda\x74\xb1\x47\x3f\xea"
+	"\x6c\x70\x9e\x38\x2d\xc7\xaa\xb7"
+	"\x29\xb2\x47\x03\x19\xab\xdd\x34"
+	"\xbd\xa8\x2c\x93\xe1\xa4\x74\xd9"
+	"\x64\x63\xf7\x70\x20\x2f\xa4\xe6"
+	"\x9f\x4a\x38\xcc\xc0\x2c\x49\x2f"
+	"\xb1\x32\xbb\xaf\x22\x61\xda\xcb"
+	"\x6f\xdb\xa9\xaa\xfc\x77\x81\xf3",
+	.secret_size = 40,
+	.b_public_size = 64,
+	.expected_a_public_size = 64,
+	.expected_ss_size = 32
 	}
 };
 
@@ -376,6 +897,131 @@ static struct hash_testvec md4_tv_template [] = {
 	},
 };
 
+#define SHA3_224_TEST_VECTORS	3
+static struct hash_testvec sha3_224_tv_template[] = {
+	{
+		.plaintext = "",
+		.digest	= "\x6b\x4e\x03\x42\x36\x67\xdb\xb7"
+				"\x3b\x6e\x15\x45\x4f\x0e\xb1\xab"
+				"\xd4\x59\x7f\x9a\x1b\x07\x8e\x3f"
+				"\x5b\x5a\x6b\xc7",
+	}, {
+		.plaintext = "a",
+		.psize	= 1,
+		.digest	= "\x9e\x86\xff\x69\x55\x7c\xa9\x5f"
+				"\x40\x5f\x08\x12\x69\x68\x5b\x38"
+				"\xe3\xa8\x19\xb3\x09\xee\x94\x2f"
+				"\x48\x2b\x6a\x8b",
+	}, {
+		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkl"
+				"jklmklmnlmnomnopnopq",
+		.psize	= 56,
+		.digest	= "\x8a\x24\x10\x8b\x15\x4a\xda\x21"
+				"\xc9\xfd\x55\x74\x49\x44\x79\xba"
+				"\x5c\x7e\x7a\xb7\x6e\xf2\x64\xea"
+				"\xd0\xfc\xce\x33",
+	},
+};
+
+#define SHA3_256_TEST_VECTORS	3
+static struct hash_testvec sha3_256_tv_template[] = {
+	{
+		.plaintext = "",
+		.digest	= "\xa7\xff\xc6\xf8\xbf\x1e\xd7\x66"
+				"\x51\xc1\x47\x56\xa0\x61\xd6\x62"
+				"\xf5\x80\xff\x4d\xe4\x3b\x49\xfa"
+				"\x82\xd8\x0a\x4b\x80\xf8\x43\x4a",
+	}, {
+		.plaintext = "a",
+		.psize	= 1,
+		.digest	= "\x80\x08\x4b\xf2\xfb\xa0\x24\x75"
+				"\x72\x6f\xeb\x2c\xab\x2d\x82\x15"
+				"\xea\xb1\x4b\xc6\xbd\xd8\xbf\xb2"
+				"\xc8\x15\x12\x57\x03\x2e\xcd\x8b",
+	}, {
+		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkl"
+			     "jklmklmnlmnomnopnopq",
+		.psize	= 56,
+		.digest	= "\x41\xc0\xdb\xa2\xa9\xd6\x24\x08"
+				"\x49\x10\x03\x76\xa8\x23\x5e\x2c"
+				"\x82\xe1\xb9\x99\x8a\x99\x9e\x21"
+				"\xdb\x32\xdd\x97\x49\x6d\x33\x76",
+	},
+};
+
+
+#define SHA3_384_TEST_VECTORS	3
+static struct hash_testvec sha3_384_tv_template[] = {
+	{
+		.plaintext = "",
+		.digest	= "\x0c\x63\xa7\x5b\x84\x5e\x4f\x7d"
+				"\x01\x10\x7d\x85\x2e\x4c\x24\x85"
+				"\xc5\x1a\x50\xaa\xaa\x94\xfc\x61"
+				"\x99\x5e\x71\xbb\xee\x98\x3a\x2a"
+				"\xc3\x71\x38\x31\x26\x4a\xdb\x47"
+				"\xfb\x6b\xd1\xe0\x58\xd5\xf0\x04",
+	}, {
+		.plaintext = "a",
+		.psize	= 1,
+		.digest	= "\x18\x15\xf7\x74\xf3\x20\x49\x1b"
+				"\x48\x56\x9e\xfe\xc7\x94\xd2\x49"
+				"\xee\xb5\x9a\xae\x46\xd2\x2b\xf7"
+				"\x7d\xaf\xe2\x5c\x5e\xdc\x28\xd7"
+				"\xea\x44\xf9\x3e\xe1\x23\x4a\xa8"
+				"\x8f\x61\xc9\x19\x12\xa4\xcc\xd9",
+	}, {
+		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkl"
+			     "jklmklmnlmnomnopnopq",
+		.psize	= 56,
+		.digest	= "\x99\x1c\x66\x57\x55\xeb\x3a\x4b"
+				"\x6b\xbd\xfb\x75\xc7\x8a\x49\x2e"
+				"\x8c\x56\xa2\x2c\x5c\x4d\x7e\x42"
+				"\x9b\xfd\xbc\x32\xb9\xd4\xad\x5a"
+				"\xa0\x4a\x1f\x07\x6e\x62\xfe\xa1"
+				"\x9e\xef\x51\xac\xd0\x65\x7c\x22",
+	},
+};
+
+
+#define SHA3_512_TEST_VECTORS	3
+static struct hash_testvec sha3_512_tv_template[] = {
+	{
+		.plaintext = "",
+		.digest	= "\xa6\x9f\x73\xcc\xa2\x3a\x9a\xc5"
+				"\xc8\xb5\x67\xdc\x18\x5a\x75\x6e"
+				"\x97\xc9\x82\x16\x4f\xe2\x58\x59"
+				"\xe0\xd1\xdc\xc1\x47\x5c\x80\xa6"
+				"\x15\xb2\x12\x3a\xf1\xf5\xf9\x4c"
+				"\x11\xe3\xe9\x40\x2c\x3a\xc5\x58"
+				"\xf5\x00\x19\x9d\x95\xb6\xd3\xe3"
+				"\x01\x75\x85\x86\x28\x1d\xcd\x26",
+	}, {
+		.plaintext = "a",
+		.psize	= 1,
+		.digest	= "\x69\x7f\x2d\x85\x61\x72\xcb\x83"
+				"\x09\xd6\xb8\xb9\x7d\xac\x4d\xe3"
+				"\x44\xb5\x49\xd4\xde\xe6\x1e\xdf"
+				"\xb4\x96\x2d\x86\x98\xb7\xfa\x80"
+				"\x3f\x4f\x93\xff\x24\x39\x35\x86"
+				"\xe2\x8b\x5b\x95\x7a\xc3\xd1\xd3"
+				"\x69\x42\x0c\xe5\x33\x32\x71\x2f"
+				"\x99\x7b\xd3\x36\xd0\x9a\xb0\x2a",
+	}, {
+		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkl"
+			     "jklmklmnlmnomnopnopq",
+		.psize	= 56,
+		.digest	= "\x04\xa3\x71\xe8\x4e\xcf\xb5\xb8"
+				"\xb7\x7c\xb4\x86\x10\xfc\xa8\x18"
+				"\x2d\xd4\x57\xce\x6f\x32\x6a\x0f"
+				"\xd3\xd7\xec\x2f\x1e\x91\x63\x6d"
+				"\xee\x69\x1f\xbe\x0c\x98\x53\x02"
+				"\xba\x1b\x0d\x8d\xc7\x8c\x08\x63"
+				"\x46\xb5\x33\xb4\x9c\x03\x0d\x99"
+				"\xa2\x7d\xaf\x11\x39\xd6\xe7\x5e",
+	},
+};
+
+
 /*
  * MD5 test vectors from RFC1321
  */
@@ -3246,6 +3892,394 @@ static struct hash_testvec hmac_sha512_tv_template[] = {
 	},
 };
 
+#define HMAC_SHA3_224_TEST_VECTORS	4
+
+static struct hash_testvec hmac_sha3_224_tv_template[] = {
+	{
+		.key	= "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b",
+		.ksize	= 20,
+		.plaintext = "Hi There",
+		.psize	= 8,
+		.digest	= "\x3b\x16\x54\x6b\xbc\x7b\xe2\x70"
+			  "\x6a\x03\x1d\xca\xfd\x56\x37\x3d"
+			  "\x98\x84\x36\x76\x41\xd8\xc5\x9a"
+			  "\xf3\xc8\x60\xf7",
+	}, {
+		.key	= "Jefe",
+		.ksize	= 4,
+		.plaintext = "what do ya want for nothing?",
+		.psize	= 28,
+		.digest	= "\x7f\xdb\x8d\xd8\x8b\xd2\xf6\x0d"
+			  "\x1b\x79\x86\x34\xad\x38\x68\x11"
+			  "\xc2\xcf\xc8\x5b\xfa\xf5\xd5\x2b"
+			  "\xba\xce\x5e\x66",
+		.np	= 4,
+		.tap	= { 7, 7, 7, 7 }
+	}, {
+		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa",
+		.ksize	= 131,
+		.plaintext = "Test Using Large"
+			   "r Than Block-Siz"
+			   "e Key - Hash Key"
+			   " First",
+		.psize	= 54,
+		.digest = "\xb4\xa1\xf0\x4c\x00\x28\x7a\x9b"
+			  "\x7f\x60\x75\xb3\x13\xd2\x79\xb8"
+			  "\x33\xbc\x8f\x75\x12\x43\x52\xd0"
+			  "\x5f\xb9\x99\x5f",
+	}, {
+		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa",
+		.ksize	= 131,
+		.plaintext =
+			  "This is a test u"
+			  "sing a larger th"
+			  "an block-size ke"
+			  "y and a larger t"
+			  "han block-size d"
+			  "ata. The key nee"
+			  "ds to be hashed "
+			  "before being use"
+			  "d by the HMAC al"
+			  "gorithm.",
+		.psize	= 152,
+		.digest	= "\x05\xd8\xcd\x6d\x00\xfa\xea\x8d"
+			  "\x1e\xb6\x8a\xde\x28\x73\x0b\xbd"
+			  "\x3c\xba\xb6\x92\x9f\x0a\x08\x6b"
+			  "\x29\xcd\x62\xa0",
+	},
+};
+
+#define HMAC_SHA3_256_TEST_VECTORS	4
+
+static struct hash_testvec hmac_sha3_256_tv_template[] = {
+	{
+		.key	= "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b",
+		.ksize	= 20,
+		.plaintext = "Hi There",
+		.psize	= 8,
+		.digest	= "\xba\x85\x19\x23\x10\xdf\xfa\x96"
+			  "\xe2\xa3\xa4\x0e\x69\x77\x43\x51"
+			  "\x14\x0b\xb7\x18\x5e\x12\x02\xcd"
+			  "\xcc\x91\x75\x89\xf9\x5e\x16\xbb",
+	}, {
+		.key	= "Jefe",
+		.ksize	= 4,
+		.plaintext = "what do ya want for nothing?",
+		.psize	= 28,
+		.digest	= "\xc7\xd4\x07\x2e\x78\x88\x77\xae"
+			  "\x35\x96\xbb\xb0\xda\x73\xb8\x87"
+			  "\xc9\x17\x1f\x93\x09\x5b\x29\x4a"
+			  "\xe8\x57\xfb\xe2\x64\x5e\x1b\xa5",
+		.np	= 4,
+		.tap	= { 7, 7, 7, 7 }
+	}, {
+		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa",
+		.ksize	= 131,
+		.plaintext = "Test Using Large"
+			   "r Than Block-Siz"
+			   "e Key - Hash Key"
+			   " First",
+		.psize	= 54,
+		.digest = "\xed\x73\xa3\x74\xb9\x6c\x00\x52"
+			  "\x35\xf9\x48\x03\x2f\x09\x67\x4a"
+			  "\x58\xc0\xce\x55\x5c\xfc\x1f\x22"
+			  "\x3b\x02\x35\x65\x60\x31\x2c\x3b",
+	}, {
+		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa",
+		.ksize	= 131,
+		.plaintext =
+			  "This is a test u"
+			  "sing a larger th"
+			  "an block-size ke"
+			  "y and a larger t"
+			  "han block-size d"
+			  "ata. The key nee"
+			  "ds to be hashed "
+			  "before being use"
+			  "d by the HMAC al"
+			  "gorithm.",
+		.psize	= 152,
+		.digest	= "\x65\xc5\xb0\x6d\x4c\x3d\xe3\x2a"
+			  "\x7a\xef\x87\x63\x26\x1e\x49\xad"
+			  "\xb6\xe2\x29\x3e\xc8\xe7\xc6\x1e"
+			  "\x8d\xe6\x17\x01\xfc\x63\xe1\x23",
+	},
+};
+
+#define HMAC_SHA3_384_TEST_VECTORS	4
+
+static struct hash_testvec hmac_sha3_384_tv_template[] = {
+	{
+		.key	= "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b",
+		.ksize	= 20,
+		.plaintext = "Hi There",
+		.psize	= 8,
+		.digest	= "\x68\xd2\xdc\xf7\xfd\x4d\xdd\x0a"
+			  "\x22\x40\xc8\xa4\x37\x30\x5f\x61"
+			  "\xfb\x73\x34\xcf\xb5\xd0\x22\x6e"
+			  "\x1b\xc2\x7d\xc1\x0a\x2e\x72\x3a"
+			  "\x20\xd3\x70\xb4\x77\x43\x13\x0e"
+			  "\x26\xac\x7e\x3d\x53\x28\x86\xbd",
+	}, {
+		.key	= "Jefe",
+		.ksize	= 4,
+		.plaintext = "what do ya want for nothing?",
+		.psize	= 28,
+		.digest	= "\xf1\x10\x1f\x8c\xbf\x97\x66\xfd"
+			  "\x67\x64\xd2\xed\x61\x90\x3f\x21"
+			  "\xca\x9b\x18\xf5\x7c\xf3\xe1\xa2"
+			  "\x3c\xa1\x35\x08\xa9\x32\x43\xce"
+			  "\x48\xc0\x45\xdc\x00\x7f\x26\xa2"
+			  "\x1b\x3f\x5e\x0e\x9d\xf4\xc2\x0a",
+		.np	= 4,
+		.tap	= { 7, 7, 7, 7 }
+	}, {
+		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa",
+		.ksize	= 131,
+		.plaintext = "Test Using Large"
+			   "r Than Block-Siz"
+			   "e Key - Hash Key"
+			   " First",
+		.psize	= 54,
+		.digest = "\x0f\xc1\x95\x13\xbf\x6b\xd8\x78"
+			  "\x03\x70\x16\x70\x6a\x0e\x57\xbc"
+			  "\x52\x81\x39\x83\x6b\x9a\x42\xc3"
+			  "\xd4\x19\xe4\x98\xe0\xe1\xfb\x96"
+			  "\x16\xfd\x66\x91\x38\xd3\x3a\x11"
+			  "\x05\xe0\x7c\x72\xb6\x95\x3b\xcc",
+	}, {
+		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa",
+		.ksize	= 131,
+		.plaintext =
+			  "This is a test u"
+			  "sing a larger th"
+			  "an block-size ke"
+			  "y and a larger t"
+			  "han block-size d"
+			  "ata. The key nee"
+			  "ds to be hashed "
+			  "before being use"
+			  "d by the HMAC al"
+			  "gorithm.",
+		.psize	= 152,
+		.digest	= "\x02\x6f\xdf\x6b\x50\x74\x1e\x37"
+			  "\x38\x99\xc9\xf7\xd5\x40\x6d\x4e"
+			  "\xb0\x9f\xc6\x66\x56\x36\xfc\x1a"
+			  "\x53\x00\x29\xdd\xf5\xcf\x3c\xa5"
+			  "\xa9\x00\xed\xce\x01\xf5\xf6\x1e"
+			  "\x2f\x40\x8c\xdf\x2f\xd3\xe7\xe8",
+	},
+};
+
+#define HMAC_SHA3_512_TEST_VECTORS	4
+
+static struct hash_testvec hmac_sha3_512_tv_template[] = {
+	{
+		.key	= "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+			  "\x0b\x0b\x0b\x0b",
+		.ksize	= 20,
+		.plaintext = "Hi There",
+		.psize	= 8,
+		.digest	= "\xeb\x3f\xbd\x4b\x2e\xaa\xb8\xf5"
+			  "\xc5\x04\xbd\x3a\x41\x46\x5a\xac"
+			  "\xec\x15\x77\x0a\x7c\xab\xac\x53"
+			  "\x1e\x48\x2f\x86\x0b\x5e\xc7\xba"
+			  "\x47\xcc\xb2\xc6\xf2\xaf\xce\x8f"
+			  "\x88\xd2\x2b\x6d\xc6\x13\x80\xf2"
+			  "\x3a\x66\x8f\xd3\x88\x8b\xb8\x05"
+			  "\x37\xc0\xa0\xb8\x64\x07\x68\x9e",
+	}, {
+		.key	= "Jefe",
+		.ksize	= 4,
+		.plaintext = "what do ya want for nothing?",
+		.psize	= 28,
+		.digest	= "\x5a\x4b\xfe\xab\x61\x66\x42\x7c"
+			  "\x7a\x36\x47\xb7\x47\x29\x2b\x83"
+			  "\x84\x53\x7c\xdb\x89\xaf\xb3\xbf"
+			  "\x56\x65\xe4\xc5\xe7\x09\x35\x0b"
+			  "\x28\x7b\xae\xc9\x21\xfd\x7c\xa0"
+			  "\xee\x7a\x0c\x31\xd0\x22\xa9\x5e"
+			  "\x1f\xc9\x2b\xa9\xd7\x7d\xf8\x83"
+			  "\x96\x02\x75\xbe\xb4\xe6\x20\x24",
+		.np	= 4,
+		.tap	= { 7, 7, 7, 7 }
+	}, {
+		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa",
+		.ksize	= 131,
+		.plaintext = "Test Using Large"
+			   "r Than Block-Siz"
+			   "e Key - Hash Key"
+			   " First",
+		.psize	= 54,
+		.digest = "\x00\xf7\x51\xa9\xe5\x06\x95\xb0"
+			  "\x90\xed\x69\x11\xa4\xb6\x55\x24"
+			  "\x95\x1c\xdc\x15\xa7\x3a\x5d\x58"
+			  "\xbb\x55\x21\x5e\xa2\xcd\x83\x9a"
+			  "\xc7\x9d\x2b\x44\xa3\x9b\xaf\xab"
+			  "\x27\xe8\x3f\xde\x9e\x11\xf6\x34"
+			  "\x0b\x11\xd9\x91\xb1\xb9\x1b\xf2"
+			  "\xee\xe7\xfc\x87\x24\x26\xc3\xa4",
+	}, {
+		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xaa\xaa\xaa",
+		.ksize	= 131,
+		.plaintext =
+			  "This is a test u"
+			  "sing a larger th"
+			  "an block-size ke"
+			  "y and a larger t"
+			  "han block-size d"
+			  "ata. The key nee"
+			  "ds to be hashed "
+			  "before being use"
+			  "d by the HMAC al"
+			  "gorithm.",
+		.psize	= 152,
+		.digest	= "\x38\xa4\x56\xa0\x04\xbd\x10\xd3"
+			  "\x2c\x9a\xb8\x33\x66\x84\x11\x28"
+			  "\x62\xc3\xdb\x61\xad\xcc\xa3\x18"
+			  "\x29\x35\x5e\xaf\x46\xfd\x5c\x73"
+			  "\xd0\x6a\x1f\x0d\x13\xfe\xc9\xa6"
+			  "\x52\xfb\x38\x11\xb5\x77\xb1\xb1"
+			  "\xd1\xb9\x78\x9f\x97\xae\x5b\x83"
+			  "\xc6\xf4\x4d\xfc\xf1\xd6\x7e\xba",
+	},
+};
+
 /*
  * Poly1305 test vectors from RFC7539 A.3.
  */
diff --git a/drivers/Makefile b/drivers/Makefile
index 6e7d458ac7d4..6b0ad573e0d7 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_GENERIC_PHY)	+= phy/
 
 # GPIO must come after pinctrl as gpios may need to mux pins etc
 obj-$(CONFIG_PINCTRL)		+= pinctrl/
-obj-y				+= gpio/
+obj-$(CONFIG_GPIOLIB)		+= gpio/
 obj-y				+= pwm/
 obj-$(CONFIG_PCI)		+= pci/
 obj-$(CONFIG_PARISC)		+= parisc/
@@ -128,7 +128,6 @@ obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
 obj-$(CONFIG_CRYPTO)		+= crypto/
 obj-$(CONFIG_SUPERH)		+= sh/
-obj-$(CONFIG_ARCH_SHMOBILE)	+= sh/
 ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
 obj-y				+= clocksource/
 endif
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index b7e2e776397d..acad70a0bb0d 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -213,6 +213,10 @@ config ACPI_CPU_FREQ_PSS
 	bool
 	select THERMAL
 
+config ACPI_PROCESSOR_CSTATE
+	def_bool y
+	depends on IA64 || X86
+
 config ACPI_PROCESSOR_IDLE
 	bool
 	select CPU_IDLE
@@ -234,7 +238,7 @@ config ACPI_CPPC_LIB
 config ACPI_PROCESSOR
 	tristate "Processor"
 	depends on X86 || IA64 || ARM64
-	select ACPI_PROCESSOR_IDLE if X86 || IA64
+	select ACPI_PROCESSOR_IDLE
 	select ACPI_CPU_FREQ_PSS if X86 || IA64
 	default y
 	help
@@ -291,8 +295,8 @@ config ACPI_THERMAL
 config ACPI_NUMA
 	bool "NUMA support"
 	depends on NUMA
-	depends on (X86 || IA64)
-	default y if IA64_GENERIC || IA64_SGI_SN2
+	depends on (X86 || IA64 || ARM64)
+	default y if IA64_GENERIC || IA64_SGI_SN2 || ARM64
 
 config ACPI_CUSTOM_DSDT_FILE
 	string "Custom DSDT Table file to include"
@@ -311,9 +315,12 @@ config ACPI_CUSTOM_DSDT
 	bool
 	default ACPI_CUSTOM_DSDT_FILE != ""
 
+config ARCH_HAS_ACPI_TABLE_UPGRADE
+	def_bool n
+
 config ACPI_TABLE_UPGRADE
 	bool "Allow upgrading ACPI tables via initrd"
-	depends on BLK_DEV_INITRD && X86
+	depends on BLK_DEV_INITRD && ARCH_HAS_ACPI_TABLE_UPGRADE
 	default y
 	help
 	  This option provides functionality to upgrade arbitrary ACPI tables
@@ -475,6 +482,7 @@ config ACPI_NFIT_DEBUG
 	  issue.
 
 source "drivers/acpi/apei/Kconfig"
+source "drivers/acpi/dptf/Kconfig"
 
 config ACPI_EXTLOG
 	tristate "Extended Error Log support"
@@ -519,6 +527,20 @@ config XPOWER_PMIC_OPREGION
 	help
 	  This config adds ACPI operation region support for XPower AXP288 PMIC.
 
+config BXT_WC_PMIC_OPREGION
+	bool "ACPI operation region support for BXT WhiskeyCove PMIC"
+	depends on INTEL_SOC_PMIC
+	help
+	  This config adds ACPI operation region support for BXT WhiskeyCove PMIC.
+
 endif
 
+config ACPI_CONFIGFS
+	tristate "ACPI configfs support"
+	select CONFIGFS_FS
+	help
+	  Select this option to enable support for ACPI configuration from
+	  userspace. The configurable ACPI groups will be visible under
+	  /config/acpi, assuming configfs is mounted under /config.
+
 endif	# ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 251ce85a66fb..88f54f03e3d2 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -44,7 +44,6 @@ acpi-y				+= acpi_lpss.o acpi_apd.o
 acpi-y				+= acpi_platform.o
 acpi-y				+= acpi_pnp.o
 acpi-$(CONFIG_ARM_AMBA)	+= acpi_amba.o
-acpi-y				+= int340x_thermal.o
 acpi-y				+= power.o
 acpi-y				+= event.o
 acpi-$(CONFIG_ACPI_REDUCED_HARDWARE_ONLY) += evged.o
@@ -99,5 +98,9 @@ obj-$(CONFIG_ACPI_EXTLOG)	+= acpi_extlog.o
 obj-$(CONFIG_PMIC_OPREGION)	+= pmic/intel_pmic.o
 obj-$(CONFIG_CRC_PMIC_OPREGION) += pmic/intel_pmic_crc.o
 obj-$(CONFIG_XPOWER_PMIC_OPREGION) += pmic/intel_pmic_xpower.o
+obj-$(CONFIG_BXT_WC_PMIC_OPREGION) += pmic/intel_pmic_bxtwc.o
+
+obj-$(CONFIG_ACPI_CONFIGFS)	+= acpi_configfs.o
 
 video-objs			+= acpi_video.o video_detect.o
+obj-y				+= dptf/
diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c
new file mode 100644
index 000000000000..146a77fb762d
--- /dev/null
+++ b/drivers/acpi/acpi_configfs.c
@@ -0,0 +1,267 @@
+/*
+ * ACPI configfs support
+ *
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "ACPI configfs: " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/configfs.h>
+#include <linux/acpi.h>
+
+static struct config_group *acpi_table_group;
+
+struct acpi_table {
+	struct config_item cfg;
+	struct acpi_table_header *header;
+};
+
+static ssize_t acpi_table_aml_write(struct config_item *cfg,
+				    const void *data, size_t size)
+{
+	const struct acpi_table_header *header = data;
+	struct acpi_table *table;
+	int ret;
+
+	table = container_of(cfg, struct acpi_table, cfg);
+
+	if (table->header) {
+		pr_err("table already loaded\n");
+		return -EBUSY;
+	}
+
+	if (header->length != size) {
+		pr_err("invalid table length\n");
+		return -EINVAL;
+	}
+
+	if (memcmp(header->signature, ACPI_SIG_SSDT, 4)) {
+		pr_err("invalid table signature\n");
+		return -EINVAL;
+	}
+
+	table = container_of(cfg, struct acpi_table, cfg);
+
+	table->header = kmemdup(header, header->length, GFP_KERNEL);
+	if (!table->header)
+		return -ENOMEM;
+
+	ret = acpi_load_table(table->header);
+	if (ret) {
+		kfree(table->header);
+		table->header = NULL;
+	}
+
+	return ret;
+}
+
+static inline struct acpi_table_header *get_header(struct config_item *cfg)
+{
+	struct acpi_table *table = container_of(cfg, struct acpi_table, cfg);
+
+	if (!table->header)
+		pr_err("table not loaded\n");
+
+	return table->header;
+}
+
+static ssize_t acpi_table_aml_read(struct config_item *cfg,
+				   void *data, size_t size)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	if (data)
+		memcpy(data, h, h->length);
+
+	return h->length;
+}
+
+#define MAX_ACPI_TABLE_SIZE (128 * 1024)
+
+CONFIGFS_BIN_ATTR(acpi_table_, aml, NULL, MAX_ACPI_TABLE_SIZE);
+
+struct configfs_bin_attribute *acpi_table_bin_attrs[] = {
+	&acpi_table_attr_aml,
+	NULL,
+};
+
+ssize_t acpi_table_signature_show(struct config_item *cfg, char *str)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	return sprintf(str, "%.*s\n", ACPI_NAME_SIZE, h->signature);
+}
+
+ssize_t acpi_table_length_show(struct config_item *cfg, char *str)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	return sprintf(str, "%d\n", h->length);
+}
+
+ssize_t acpi_table_revision_show(struct config_item *cfg, char *str)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	return sprintf(str, "%d\n", h->revision);
+}
+
+ssize_t acpi_table_oem_id_show(struct config_item *cfg, char *str)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	return sprintf(str, "%.*s\n", ACPI_OEM_ID_SIZE, h->oem_id);
+}
+
+ssize_t acpi_table_oem_table_id_show(struct config_item *cfg, char *str)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	return sprintf(str, "%.*s\n", ACPI_OEM_TABLE_ID_SIZE, h->oem_table_id);
+}
+
+ssize_t acpi_table_oem_revision_show(struct config_item *cfg, char *str)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	return sprintf(str, "%d\n", h->oem_revision);
+}
+
+ssize_t acpi_table_asl_compiler_id_show(struct config_item *cfg, char *str)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	return sprintf(str, "%.*s\n", ACPI_NAME_SIZE, h->asl_compiler_id);
+}
+
+ssize_t acpi_table_asl_compiler_revision_show(struct config_item *cfg,
+					      char *str)
+{
+	struct acpi_table_header *h = get_header(cfg);
+
+	if (!h)
+		return -EINVAL;
+
+	return sprintf(str, "%d\n", h->asl_compiler_revision);
+}
+
+CONFIGFS_ATTR_RO(acpi_table_, signature);
+CONFIGFS_ATTR_RO(acpi_table_, length);
+CONFIGFS_ATTR_RO(acpi_table_, revision);
+CONFIGFS_ATTR_RO(acpi_table_, oem_id);
+CONFIGFS_ATTR_RO(acpi_table_, oem_table_id);
+CONFIGFS_ATTR_RO(acpi_table_, oem_revision);
+CONFIGFS_ATTR_RO(acpi_table_, asl_compiler_id);
+CONFIGFS_ATTR_RO(acpi_table_, asl_compiler_revision);
+
+struct configfs_attribute *acpi_table_attrs[] = {
+	&acpi_table_attr_signature,
+	&acpi_table_attr_length,
+	&acpi_table_attr_revision,
+	&acpi_table_attr_oem_id,
+	&acpi_table_attr_oem_table_id,
+	&acpi_table_attr_oem_revision,
+	&acpi_table_attr_asl_compiler_id,
+	&acpi_table_attr_asl_compiler_revision,
+	NULL,
+};
+
+static struct config_item_type acpi_table_type = {
+	.ct_owner = THIS_MODULE,
+	.ct_bin_attrs = acpi_table_bin_attrs,
+	.ct_attrs = acpi_table_attrs,
+};
+
+static struct config_item *acpi_table_make_item(struct config_group *group,
+						const char *name)
+{
+	struct acpi_table *table;
+
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return ERR_PTR(-ENOMEM);
+
+	config_item_init_type_name(&table->cfg, name, &acpi_table_type);
+	return &table->cfg;
+}
+
+struct configfs_group_operations acpi_table_group_ops = {
+	.make_item = acpi_table_make_item,
+};
+
+static struct config_item_type acpi_tables_type = {
+	.ct_owner = THIS_MODULE,
+	.ct_group_ops = &acpi_table_group_ops,
+};
+
+static struct config_item_type acpi_root_group_type = {
+	.ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem acpi_configfs = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "acpi",
+			.ci_type = &acpi_root_group_type,
+		},
+	},
+	.su_mutex = __MUTEX_INITIALIZER(acpi_configfs.su_mutex),
+};
+
+static int __init acpi_configfs_init(void)
+{
+	int ret;
+	struct config_group *root = &acpi_configfs.su_group;
+
+	config_group_init(root);
+
+	ret = configfs_register_subsystem(&acpi_configfs);
+	if (ret)
+		return ret;
+
+	acpi_table_group = configfs_register_default_group(root, "table",
+							   &acpi_tables_type);
+	return PTR_ERR_OR_ZERO(acpi_table_group);
+}
+module_init(acpi_configfs_init);
+
+static void __exit acpi_configfs_exit(void)
+{
+	configfs_unregister_default_group(acpi_table_group);
+	configfs_unregister_subsystem(&acpi_configfs);
+}
+module_exit(acpi_configfs_exit);
+
+MODULE_AUTHOR("Octavian Purdila <octavian.purdila@intel.com>");
+MODULE_DESCRIPTION("ACPI configfs support");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c
index 1f4128487dd4..dee86925a9a1 100644
--- a/drivers/acpi/acpi_dbg.c
+++ b/drivers/acpi/acpi_dbg.c
@@ -602,7 +602,7 @@ static int acpi_aml_read_user(char __user *buf, int len)
 	crc->tail = (crc->tail + n) & (ACPI_AML_BUF_SIZE - 1);
 	ret = n;
 out:
-	acpi_aml_unlock_fifo(ACPI_AML_OUT_USER, !ret);
+	acpi_aml_unlock_fifo(ACPI_AML_OUT_USER, ret >= 0);
 	return ret;
 }
 
@@ -672,7 +672,7 @@ static int acpi_aml_write_user(const char __user *buf, int len)
 	crc->head = (crc->head + n) & (ACPI_AML_BUF_SIZE - 1);
 	ret = n;
 out:
-	acpi_aml_unlock_fifo(ACPI_AML_IN_USER, !ret);
+	acpi_aml_unlock_fifo(ACPI_AML_IN_USER, ret >= 0);
 	return n;
 }
 
diff --git a/drivers/acpi/acpi_lpat.c b/drivers/acpi/acpi_lpat.c
index feb61c1630eb..c1c4877ca96c 100644
--- a/drivers/acpi/acpi_lpat.c
+++ b/drivers/acpi/acpi_lpat.c
@@ -13,7 +13,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/acpi.h>
 #include <acpi/acpi_lpat.h>
 
@@ -157,5 +157,3 @@ void acpi_lpat_free_conversion_table(struct acpi_lpat_conversion_table
 	}
 }
 EXPORT_SYMBOL_GPL(acpi_lpat_free_conversion_table);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 0872d5fecb82..357a0b8f860b 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -29,6 +29,7 @@ ACPI_MODULE_NAME("acpi_lpss");
 #ifdef CONFIG_X86_INTEL_LPSS
 
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
 #include <asm/pmc_atom.h>
 
@@ -229,8 +230,8 @@ static const struct lpss_device_desc bsw_spi_dev_desc = {
 #define ICPU(model)	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
 
 static const struct x86_cpu_id lpss_cpu_ids[] = {
-	ICPU(0x37),	/* Valleyview, Bay Trail */
-	ICPU(0x4c),	/* Braswell, Cherry Trail */
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT1),	/* Valleyview, Bay Trail */
+	ICPU(INTEL_FAM6_ATOM_AIRMONT),	/* Braswell, Cherry Trail */
 	{}
 };
 
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index c1d138e128cb..c5557d070954 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -1246,6 +1246,9 @@ static int acpi_video_device_enumerate(struct acpi_video_bus *video)
 	union acpi_object *dod = NULL;
 	union acpi_object *obj;
 
+	if (!video->cap._DOD)
+		return AE_NOT_EXIST;
+
 	status = acpi_evaluate_object(video->device->handle, "_DOD", NULL, &buffer);
 	if (!ACPI_SUCCESS(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _DOD"));
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
index 5d575a955940..e50573de25f1 100644
--- a/drivers/acpi/apei/Makefile
+++ b/drivers/acpi/apei/Makefile
@@ -3,4 +3,4 @@ obj-$(CONFIG_ACPI_APEI_GHES)	+= ghes.o
 obj-$(CONFIG_ACPI_APEI_EINJ)	+= einj.o
 obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
 
-apei-y := apei-base.o hest.o erst.o
+apei-y := apei-base.o hest.o erst.o bert.o
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 16129c78b489..6e9f14c0a71b 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -1,6 +1,6 @@
 /*
  * apei-internal.h - ACPI Platform Error Interface internal
- * definations.
+ * definitions.
  */
 
 #ifndef APEI_INTERNAL_H
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
new file mode 100644
index 000000000000..a05b5c0cf181
--- /dev/null
+++ b/drivers/acpi/apei/bert.c
@@ -0,0 +1,150 @@
+/*
+ * APEI Boot Error Record Table (BERT) support
+ *
+ * Copyright 2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * Under normal circumstances, when a hardware error occurs, the error
+ * handler receives control and processes the error. This gives OSPM a
+ * chance to process the error condition, report it, and optionally attempt
+ * recovery. In some cases, the system is unable to process an error.
+ * For example, system firmware or a management controller may choose to
+ * reset the system or the system might experience an uncontrolled crash
+ * or reset.The boot error source is used to report unhandled errors that
+ * occurred in a previous boot. This mechanism is described in the BERT
+ * table.
+ *
+ * For more information about BERT, please refer to ACPI Specification
+ * version 4.0, section 17.3.1
+ *
+ * This file is licensed under GPLv2.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "BERT: " fmt
+
+static int bert_disable;
+
+static void __init bert_print_all(struct acpi_bert_region *region,
+				  unsigned int region_len)
+{
+	struct acpi_hest_generic_status *estatus =
+		(struct acpi_hest_generic_status *)region;
+	int remain = region_len;
+	u32 estatus_len;
+
+	if (!estatus->block_status)
+		return;
+
+	while (remain > sizeof(struct acpi_bert_region)) {
+		if (cper_estatus_check(estatus)) {
+			pr_err(FW_BUG "Invalid error record.\n");
+			return;
+		}
+
+		estatus_len = cper_estatus_len(estatus);
+		if (remain < estatus_len) {
+			pr_err(FW_BUG "Truncated status block (length: %u).\n",
+			       estatus_len);
+			return;
+		}
+
+		pr_info_once("Error records from previous boot:\n");
+
+		cper_estatus_print(KERN_INFO HW_ERR, estatus);
+
+		/*
+		 * Because the boot error source is "one-time polled" type,
+		 * clear Block Status of current Generic Error Status Block,
+		 * once it's printed.
+		 */
+		estatus->block_status = 0;
+
+		estatus = (void *)estatus + estatus_len;
+		/* No more error records. */
+		if (!estatus->block_status)
+			return;
+
+		remain -= estatus_len;
+	}
+}
+
+static int __init setup_bert_disable(char *str)
+{
+	bert_disable = 1;
+
+	return 0;
+}
+__setup("bert_disable", setup_bert_disable);
+
+static int __init bert_check_table(struct acpi_table_bert *bert_tab)
+{
+	if (bert_tab->header.length < sizeof(struct acpi_table_bert) ||
+	    bert_tab->region_length < sizeof(struct acpi_bert_region))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init bert_init(void)
+{
+	struct acpi_bert_region *boot_error_region;
+	struct acpi_table_bert *bert_tab;
+	unsigned int region_len;
+	acpi_status status;
+	int rc = 0;
+
+	if (acpi_disabled)
+		return 0;
+
+	if (bert_disable) {
+		pr_info("Boot Error Record Table support is disabled.\n");
+		return 0;
+	}
+
+	status = acpi_get_table(ACPI_SIG_BERT, 0, (struct acpi_table_header **)&bert_tab);
+	if (status == AE_NOT_FOUND)
+		return 0;
+
+	if (ACPI_FAILURE(status)) {
+		pr_err("get table failed, %s.\n", acpi_format_exception(status));
+		return -EINVAL;
+	}
+
+	rc = bert_check_table(bert_tab);
+	if (rc) {
+		pr_err(FW_BUG "table invalid.\n");
+		return rc;
+	}
+
+	region_len = bert_tab->region_length;
+	if (!request_mem_region(bert_tab->address, region_len, "APEI BERT")) {
+		pr_err("Can't request iomem region <%016llx-%016llx>.\n",
+		       (unsigned long long)bert_tab->address,
+		       (unsigned long long)bert_tab->address + region_len - 1);
+		return -EIO;
+	}
+
+	boot_error_region = ioremap_cache(bert_tab->address, region_len);
+	if (boot_error_region) {
+		bert_print_all(boot_error_region, region_len);
+		iounmap(boot_error_region);
+	} else {
+		rc = -ENOMEM;
+	}
+
+	release_mem_region(bert_tab->address, region_len);
+
+	return rc;
+}
+
+late_initcall(bert_init);
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 559c1173de1c..eebb7e39c49c 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -33,7 +33,8 @@
 
 #include "apei-internal.h"
 
-#define EINJ_PFX "EINJ: "
+#undef pr_fmt
+#define pr_fmt(fmt) "EINJ: " fmt
 
 #define SPIN_UNIT		100			/* 100ns */
 /* Firmware should respond within 1 milliseconds */
@@ -179,8 +180,7 @@ static int einj_get_available_error_type(u32 *type)
 static int einj_timedout(u64 *t)
 {
 	if ((s64)*t < SPIN_UNIT) {
-		pr_warning(FW_WARN EINJ_PFX
-			   "Firmware does not respond in time\n");
+		pr_warning(FW_WARN "Firmware does not respond in time\n");
 		return 1;
 	}
 	*t -= SPIN_UNIT;
@@ -307,8 +307,7 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
 	r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
 			       "APEI EINJ Trigger Table");
 	if (!r) {
-		pr_err(EINJ_PFX
-	"Can not request [mem %#010llx-%#010llx] for Trigger table\n",
+		pr_err("Can not request [mem %#010llx-%#010llx] for Trigger table\n",
 		       (unsigned long long)trigger_paddr,
 		       (unsigned long long)trigger_paddr +
 			    sizeof(*trigger_tab) - 1);
@@ -316,13 +315,12 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
 	}
 	trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
 	if (!trigger_tab) {
-		pr_err(EINJ_PFX "Failed to map trigger table!\n");
+		pr_err("Failed to map trigger table!\n");
 		goto out_rel_header;
 	}
 	rc = einj_check_trigger_header(trigger_tab);
 	if (rc) {
-		pr_warning(FW_BUG EINJ_PFX
-			   "The trigger error action table is invalid\n");
+		pr_warning(FW_BUG "Invalid trigger error action table.\n");
 		goto out_rel_header;
 	}
 
@@ -336,8 +334,7 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
 			       table_size - sizeof(*trigger_tab),
 			       "APEI EINJ Trigger Table");
 	if (!r) {
-		pr_err(EINJ_PFX
-"Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
+		pr_err("Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
 		       (unsigned long long)trigger_paddr + sizeof(*trigger_tab),
 		       (unsigned long long)trigger_paddr + table_size - 1);
 		goto out_rel_header;
@@ -345,7 +342,7 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
 	iounmap(trigger_tab);
 	trigger_tab = ioremap_cache(trigger_paddr, table_size);
 	if (!trigger_tab) {
-		pr_err(EINJ_PFX "Failed to map trigger table!\n");
+		pr_err("Failed to map trigger table!\n");
 		goto out_rel_entry;
 	}
 	trigger_entry = (struct acpi_whea_header *)
@@ -695,34 +692,42 @@ static int __init einj_init(void)
 	struct dentry *fentry;
 	struct apei_exec_context ctx;
 
-	if (acpi_disabled)
+	if (acpi_disabled) {
+		pr_warn("ACPI disabled.\n");
 		return -ENODEV;
+	}
 
 	status = acpi_get_table(ACPI_SIG_EINJ, 0,
 				(struct acpi_table_header **)&einj_tab);
-	if (status == AE_NOT_FOUND)
+	if (status == AE_NOT_FOUND) {
+		pr_warn("EINJ table not found.\n");
 		return -ENODEV;
+	}
 	else if (ACPI_FAILURE(status)) {
-		const char *msg = acpi_format_exception(status);
-		pr_err(EINJ_PFX "Failed to get table, %s\n", msg);
+		pr_err("Failed to get EINJ table: %s\n",
+				acpi_format_exception(status));
 		return -EINVAL;
 	}
 
 	rc = einj_check_table(einj_tab);
 	if (rc) {
-		pr_warning(FW_BUG EINJ_PFX "EINJ table is invalid\n");
+		pr_warn(FW_BUG "Invalid EINJ table.n");
 		return -EINVAL;
 	}
 
 	rc = -ENOMEM;
 	einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
-	if (!einj_debug_dir)
+	if (!einj_debug_dir) {
+		pr_err("Error creating debugfs node.\n");
 		goto err_cleanup;
+	}
+
 	fentry = debugfs_create_file("available_error_type", S_IRUSR,
 				     einj_debug_dir, NULL,
 				     &available_error_type_fops);
 	if (!fentry)
 		goto err_cleanup;
+
 	fentry = debugfs_create_file("error_type", S_IRUSR | S_IWUSR,
 				     einj_debug_dir, NULL, &error_type_fops);
 	if (!fentry)
@@ -735,14 +740,22 @@ static int __init einj_init(void)
 	apei_resources_init(&einj_resources);
 	einj_exec_ctx_init(&ctx);
 	rc = apei_exec_collect_resources(&ctx, &einj_resources);
-	if (rc)
+	if (rc) {
+		pr_err("Error collecting EINJ resources.\n");
 		goto err_fini;
+	}
+
 	rc = apei_resources_request(&einj_resources, "APEI EINJ");
-	if (rc)
+	if (rc) {
+		pr_err("Error requesting memory/port resources.\n");
 		goto err_fini;
+	}
+
 	rc = apei_exec_pre_map_gars(&ctx);
-	if (rc)
+	if (rc) {
+		pr_err("Error pre-mapping GARs.\n");
 		goto err_release;
+	}
 
 	rc = -ENOMEM;
 	einj_param = einj_get_parameter_address();
@@ -787,7 +800,7 @@ static int __init einj_init(void)
 			goto err_unmap;
 	}
 
-	pr_info(EINJ_PFX "Error INJection is initialized.\n");
+	pr_info("Error INJection is initialized.\n");
 
 	return 0;
 
@@ -798,6 +811,7 @@ err_unmap:
 			sizeof(struct einj_parameter);
 
 		acpi_os_unmap_iomem(einj_param, size);
+		pr_err("Error creating param extension debugfs nodes.\n");
 	}
 	apei_exec_post_unmap_gars(&ctx);
 err_release:
@@ -805,6 +819,7 @@ err_release:
 err_fini:
 	apei_resources_fini(&einj_resources);
 err_cleanup:
+	pr_err("Error creating primary debugfs nodes.\n");
 	debugfs_remove_recursive(einj_debug_dir);
 
 	return rc;
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 006c3894c6ea..f096ab3cb54d 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -927,7 +927,8 @@ static int erst_open_pstore(struct pstore_info *psi);
 static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
 			   struct timespec *time, char **buf,
-			   bool *compressed, struct pstore_info *psi);
+			   bool *compressed, ssize_t *ecc_notice_size,
+			   struct pstore_info *psi);
 static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
 		       u64 *id, unsigned int part, int count, bool compressed,
 		       size_t size, struct pstore_info *psi);
@@ -987,7 +988,8 @@ static int erst_close_pstore(struct pstore_info *psi)
 
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
 			   struct timespec *time, char **buf,
-			   bool *compressed, struct pstore_info *psi)
+			   bool *compressed, ssize_t *ecc_notice_size,
+			   struct pstore_info *psi)
 {
 	int rc;
 	ssize_t len = 0;
@@ -1033,6 +1035,7 @@ skip:
 	memcpy(*buf, rcd->data, len - sizeof(*rcd));
 	*id = record_id;
 	*compressed = false;
+	*ecc_notice_size = 0;
 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
 			CPER_SECTION_TYPE_DMESG_Z) == 0) {
 		*type = PSTORE_TYPE_DMESG;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 262ca31b86d9..85b7d07fe5c8 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -30,6 +30,9 @@
 #include <linux/acpi.h>
 #include <linux/slab.h>
 #include <linux/regulator/machine.h>
+#include <linux/workqueue.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
 #ifdef CONFIG_X86
 #include <asm/mpspec.h>
 #endif
@@ -174,22 +177,17 @@ void acpi_bus_detach_private_data(acpi_handle handle)
 EXPORT_SYMBOL_GPL(acpi_bus_detach_private_data);
 
 static void acpi_print_osc_error(acpi_handle handle,
-	struct acpi_osc_context *context, char *error)
+				 struct acpi_osc_context *context, char *error)
 {
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER};
 	int i;
 
-	if (ACPI_FAILURE(acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer)))
-		printk(KERN_DEBUG "%s: %s\n", context->uuid_str, error);
-	else {
-		printk(KERN_DEBUG "%s (%s): %s\n",
-		       (char *)buffer.pointer, context->uuid_str, error);
-		kfree(buffer.pointer);
-	}
-	printk(KERN_DEBUG "_OSC request data:");
+	acpi_handle_debug(handle, "(%s): %s\n", context->uuid_str, error);
+
+	pr_debug("_OSC request data:");
 	for (i = 0; i < context->cap.length; i += sizeof(u32))
-		printk(" %x", *((u32 *)(context->cap.pointer + i)));
-	printk("\n");
+		pr_debug(" %x", *((u32 *)(context->cap.pointer + i)));
+
+	pr_debug("\n");
 }
 
 acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
@@ -302,6 +300,14 @@ out_kfree:
 EXPORT_SYMBOL(acpi_run_osc);
 
 bool osc_sb_apei_support_acked;
+
+/*
+ * ACPI 6.0 Section 8.4.4.2 Idle State Coordination
+ * OSPM supports platform coordinated low power idle(LPI) states
+ */
+bool osc_pc_lpi_support_confirmed;
+EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);
+
 static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
 static void acpi_bus_osc_support(void)
 {
@@ -322,6 +328,7 @@ static void acpi_bus_osc_support(void)
 		capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PPC_OST_SUPPORT;
 
 	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_OST_SUPPORT;
+	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PCLPI_SUPPORT;
 
 	if (!ghes_disable)
 		capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_APEI_SUPPORT;
@@ -329,9 +336,12 @@ static void acpi_bus_osc_support(void)
 		return;
 	if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) {
 		u32 *capbuf_ret = context.ret.pointer;
-		if (context.ret.length > OSC_SUPPORT_DWORD)
+		if (context.ret.length > OSC_SUPPORT_DWORD) {
 			osc_sb_apei_support_acked =
 				capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
+			osc_pc_lpi_support_confirmed =
+				capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
+		}
 		kfree(context.ret.pointer);
 	}
 	/* do we need to check other returned cap? Sounds no */
@@ -475,6 +485,56 @@ static void acpi_device_remove_notify_handler(struct acpi_device *device)
 					   acpi_device_notify);
 }
 
+/* Handle events targeting \_SB device (at present only graceful shutdown) */
+
+#define ACPI_SB_NOTIFY_SHUTDOWN_REQUEST 0x81
+#define ACPI_SB_INDICATE_INTERVAL	10000
+
+static void sb_notify_work(struct work_struct *dummy)
+{
+	acpi_handle sb_handle;
+
+	orderly_poweroff(true);
+
+	/*
+	 * After initiating graceful shutdown, the ACPI spec requires OSPM
+	 * to evaluate _OST method once every 10seconds to indicate that
+	 * the shutdown is in progress
+	 */
+	acpi_get_handle(NULL, "\\_SB", &sb_handle);
+	while (1) {
+		pr_info("Graceful shutdown in progress.\n");
+		acpi_evaluate_ost(sb_handle, ACPI_OST_EC_OSPM_SHUTDOWN,
+				ACPI_OST_SC_OS_SHUTDOWN_IN_PROGRESS, NULL);
+		msleep(ACPI_SB_INDICATE_INTERVAL);
+	}
+}
+
+static void acpi_sb_notify(acpi_handle handle, u32 event, void *data)
+{
+	static DECLARE_WORK(acpi_sb_work, sb_notify_work);
+
+	if (event == ACPI_SB_NOTIFY_SHUTDOWN_REQUEST) {
+		if (!work_busy(&acpi_sb_work))
+			schedule_work(&acpi_sb_work);
+	} else
+		pr_warn("event %x is not supported by \\_SB device\n", event);
+}
+
+static int __init acpi_setup_sb_notify_handler(void)
+{
+	acpi_handle sb_handle;
+
+	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &sb_handle)))
+		return -ENXIO;
+
+	if (ACPI_FAILURE(acpi_install_notify_handler(sb_handle, ACPI_DEVICE_NOTIFY,
+						acpi_sb_notify, NULL)))
+		return -EINVAL;
+
+	return 0;
+}
+
 /* --------------------------------------------------------------------------
                              Device Matching
    -------------------------------------------------------------------------- */
@@ -961,8 +1021,7 @@ void __init acpi_early_init(void)
 /**
  * acpi_subsystem_init - Finalize the early initialization of ACPI.
  *
- * Switch over the platform to the ACPI mode (if possible), initialize the
- * handling of ACPI events, install the interrupt and global lock handlers.
+ * Switch over the platform to the ACPI mode (if possible).
  *
  * Doing this too early is generally unsafe, but at the same time it needs to be
  * done before all things that really depend on ACPI.  The right spot appears to
@@ -990,6 +1049,13 @@ void __init acpi_subsystem_init(void)
 	}
 }
 
+static acpi_status acpi_bus_table_handler(u32 event, void *table, void *context)
+{
+	acpi_scan_table_handler(event, table, context);
+
+	return acpi_sysfs_table_handler(event, table, context);
+}
+
 static int __init acpi_bus_init(void)
 {
 	int result;
@@ -1043,6 +1109,8 @@ static int __init acpi_bus_init(void)
 	 * _PDC control method may load dynamic SSDT tables,
 	 * and we need to install the table handler before that.
 	 */
+	status = acpi_install_table_handler(acpi_bus_table_handler, NULL);
+
 	acpi_sysfs_init();
 
 	acpi_early_processor_set_pdc();
@@ -1124,6 +1192,7 @@ static int __init acpi_init(void)
 	acpi_sleep_proc_init();
 	acpi_wakeup_device_init();
 	acpi_debugger_init();
+	acpi_setup_sb_notify_handler();
 	return 0;
 }
 
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 5c3b0918d5fd..148f4e5ca104 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -53,6 +53,10 @@
 #define ACPI_BUTTON_DEVICE_NAME_LID	"Lid Switch"
 #define ACPI_BUTTON_TYPE_LID		0x05
 
+#define ACPI_BUTTON_LID_INIT_IGNORE	0x00
+#define ACPI_BUTTON_LID_INIT_OPEN	0x01
+#define ACPI_BUTTON_LID_INIT_METHOD	0x02
+
 #define _COMPONENT		ACPI_BUTTON_COMPONENT
 ACPI_MODULE_NAME("button");
 
@@ -105,6 +109,7 @@ struct acpi_button {
 
 static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
 static struct acpi_device *lid_device;
+static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
 
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
@@ -113,16 +118,52 @@ static struct acpi_device *lid_device;
 static struct proc_dir_entry *acpi_button_dir;
 static struct proc_dir_entry *acpi_lid_dir;
 
+static int acpi_lid_evaluate_state(struct acpi_device *device)
+{
+	unsigned long long lid_state;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(device->handle, "_LID", NULL, &lid_state);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	return lid_state ? 1 : 0;
+}
+
+static int acpi_lid_notify_state(struct acpi_device *device, int state)
+{
+	struct acpi_button *button = acpi_driver_data(device);
+	int ret;
+
+	/* input layer checks if event is redundant */
+	input_report_switch(button->input, SW_LID, !state);
+	input_sync(button->input);
+
+	if (state)
+		pm_wakeup_event(&device->dev, 0);
+
+	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
+	if (ret == NOTIFY_DONE)
+		ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
+						   device);
+	if (ret == NOTIFY_DONE || ret == NOTIFY_OK) {
+		/*
+		 * It is also regarded as success if the notifier_chain
+		 * returns NOTIFY_OK or NOTIFY_DONE.
+		 */
+		ret = 0;
+	}
+	return ret;
+}
+
 static int acpi_button_state_seq_show(struct seq_file *seq, void *offset)
 {
 	struct acpi_device *device = seq->private;
-	acpi_status status;
-	unsigned long long state;
+	int state;
 
-	status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
+	state = acpi_lid_evaluate_state(device);
 	seq_printf(seq, "state:      %s\n",
-		   ACPI_FAILURE(status) ? "unsupported" :
-			(state ? "open" : "closed"));
+		   state < 0 ? "unsupported" : (state ? "open" : "closed"));
 	return 0;
 }
 
@@ -231,51 +272,37 @@ EXPORT_SYMBOL(acpi_lid_notifier_unregister);
 
 int acpi_lid_open(void)
 {
-	acpi_status status;
-	unsigned long long state;
-
 	if (!lid_device)
 		return -ENODEV;
 
-	status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL,
-				       &state);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	return !!state;
+	return acpi_lid_evaluate_state(lid_device);
 }
 EXPORT_SYMBOL(acpi_lid_open);
 
-static int acpi_lid_send_state(struct acpi_device *device)
+static int acpi_lid_update_state(struct acpi_device *device)
 {
-	struct acpi_button *button = acpi_driver_data(device);
-	unsigned long long state;
-	acpi_status status;
-	int ret;
-
-	status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
+	int state;
 
-	/* input layer checks if event is redundant */
-	input_report_switch(button->input, SW_LID, !state);
-	input_sync(button->input);
+	state = acpi_lid_evaluate_state(device);
+	if (state < 0)
+		return state;
 
-	if (state)
-		pm_wakeup_event(&device->dev, 0);
+	return acpi_lid_notify_state(device, state);
+}
 
-	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
-	if (ret == NOTIFY_DONE)
-		ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
-						   device);
-	if (ret == NOTIFY_DONE || ret == NOTIFY_OK) {
-		/*
-		 * It is also regarded as success if the notifier_chain
-		 * returns NOTIFY_OK or NOTIFY_DONE.
-		 */
-		ret = 0;
+static void acpi_lid_initialize_state(struct acpi_device *device)
+{
+	switch (lid_init_state) {
+	case ACPI_BUTTON_LID_INIT_OPEN:
+		(void)acpi_lid_notify_state(device, 1);
+		break;
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		(void)acpi_lid_update_state(device);
+		break;
+	case ACPI_BUTTON_LID_INIT_IGNORE:
+	default:
+		break;
 	}
-	return ret;
 }
 
 static void acpi_button_notify(struct acpi_device *device, u32 event)
@@ -290,7 +317,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event)
 	case ACPI_BUTTON_NOTIFY_STATUS:
 		input = button->input;
 		if (button->type == ACPI_BUTTON_TYPE_LID) {
-			acpi_lid_send_state(device);
+			acpi_lid_update_state(device);
 		} else {
 			int keycode;
 
@@ -335,7 +362,7 @@ static int acpi_button_resume(struct device *dev)
 
 	button->suspended = false;
 	if (button->type == ACPI_BUTTON_TYPE_LID)
-		return acpi_lid_send_state(device);
+		acpi_lid_initialize_state(device);
 	return 0;
 }
 #endif
@@ -416,7 +443,7 @@ static int acpi_button_add(struct acpi_device *device)
 	if (error)
 		goto err_remove_fs;
 	if (button->type == ACPI_BUTTON_TYPE_LID) {
-		acpi_lid_send_state(device);
+		acpi_lid_initialize_state(device);
 		/*
 		 * This assumes there's only one lid device, or if there are
 		 * more we only care about the last one...
@@ -446,4 +473,42 @@ static int acpi_button_remove(struct acpi_device *device)
 	return 0;
 }
 
+static int param_set_lid_init_state(const char *val, struct kernel_param *kp)
+{
+	int result = 0;
+
+	if (!strncmp(val, "open", sizeof("open") - 1)) {
+		lid_init_state = ACPI_BUTTON_LID_INIT_OPEN;
+		pr_info("Notify initial lid state as open\n");
+	} else if (!strncmp(val, "method", sizeof("method") - 1)) {
+		lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
+		pr_info("Notify initial lid state with _LID return value\n");
+	} else if (!strncmp(val, "ignore", sizeof("ignore") - 1)) {
+		lid_init_state = ACPI_BUTTON_LID_INIT_IGNORE;
+		pr_info("Do not notify initial lid state\n");
+	} else
+		result = -EINVAL;
+	return result;
+}
+
+static int param_get_lid_init_state(char *buffer, struct kernel_param *kp)
+{
+	switch (lid_init_state) {
+	case ACPI_BUTTON_LID_INIT_OPEN:
+		return sprintf(buffer, "open");
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		return sprintf(buffer, "method");
+	case ACPI_BUTTON_LID_INIT_IGNORE:
+		return sprintf(buffer, "ignore");
+	default:
+		return sprintf(buffer, "invalid");
+	}
+	return 0;
+}
+
+module_param_call(lid_init_state,
+		  param_set_lid_init_state, param_get_lid_init_state,
+		  NULL, 0644);
+MODULE_PARM_DESC(lid_init_state, "Behavior for reporting LID initial state");
+
 module_acpi_driver(acpi_button_driver);
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 8adac69dba3d..2e981732805b 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -299,8 +299,10 @@ int acpi_get_psd_map(struct cpudata **all_cpu_data)
 			continue;
 
 		cpc_ptr = per_cpu(cpc_desc_ptr, i);
-		if (!cpc_ptr)
-			continue;
+		if (!cpc_ptr) {
+			retval = -EFAULT;
+			goto err_ret;
+		}
 
 		pdomain = &(cpc_ptr->domain_info);
 		cpumask_set_cpu(i, pr->shared_cpu_map);
@@ -322,8 +324,10 @@ int acpi_get_psd_map(struct cpudata **all_cpu_data)
 				continue;
 
 			match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
-			if (!match_cpc_ptr)
-				continue;
+			if (!match_cpc_ptr) {
+				retval = -EFAULT;
+				goto err_ret;
+			}
 
 			match_pdomain = &(match_cpc_ptr->domain_info);
 			if (match_pdomain->domain != pdomain->domain)
@@ -353,8 +357,10 @@ int acpi_get_psd_map(struct cpudata **all_cpu_data)
 				continue;
 
 			match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
-			if (!match_cpc_ptr)
-				continue;
+			if (!match_cpc_ptr) {
+				retval = -EFAULT;
+				goto err_ret;
+			}
 
 			match_pdomain = &(match_cpc_ptr->domain_info);
 			if (match_pdomain->domain != pdomain->domain)
@@ -595,9 +601,6 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 	/* Store CPU Logical ID */
 	cpc_ptr->cpu_id = pr->id;
 
-	/* Plug it into this CPUs CPC descriptor. */
-	per_cpu(cpc_desc_ptr, pr->id) = cpc_ptr;
-
 	/* Parse PSD data for this CPU */
 	ret = acpi_get_psd(cpc_ptr, handle);
 	if (ret)
@@ -610,6 +613,9 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 			goto out_free;
 	}
 
+	/* Plug PSD data into this CPUs CPC descriptor. */
+	per_cpu(cpc_desc_ptr, pr->id) = cpc_ptr;
+
 	/* Everything looks okay */
 	pr_debug("Parsed CPC struct for CPU: %d\n", pr->id);
 
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index e8e128dede29..0c00208b423e 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -21,7 +21,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -33,12 +33,7 @@
 
 #include "internal.h"
 
-#define ACPI_DOCK_DRIVER_DESCRIPTION "ACPI Dock Station Driver"
-
 ACPI_MODULE_NAME("dock");
-MODULE_AUTHOR("Kristen Carlson Accardi");
-MODULE_DESCRIPTION(ACPI_DOCK_DRIVER_DESCRIPTION);
-MODULE_LICENSE("GPL");
 
 static bool immediate_undock = 1;
 module_param(immediate_undock, bool, 0644);
diff --git a/drivers/acpi/dptf/Kconfig b/drivers/acpi/dptf/Kconfig
new file mode 100644
index 000000000000..ac0a6ed0cf46
--- /dev/null
+++ b/drivers/acpi/dptf/Kconfig
@@ -0,0 +1,15 @@
+config DPTF_POWER
+	tristate "DPTF Platform Power Participant"
+	depends on X86
+	help
+	  This driver adds support for Dynamic Platform and Thermal Framework
+	  (DPTF) Platform Power Participant device (INT3407) support.
+	  This participant is responsible for exposing platform telemetry:
+		max_platform_power
+		platform_power_source
+		adapter_rating
+		battery_steady_power
+		charger_type
+
+	  To compile this driver as a module, choose M here:
+	  the module will be called dptf_power.
diff --git a/drivers/acpi/dptf/Makefile b/drivers/acpi/dptf/Makefile
new file mode 100644
index 000000000000..06ea8809583d
--- /dev/null
+++ b/drivers/acpi/dptf/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_ACPI)             += int340x_thermal.o
+obj-$(CONFIG_DPTF_POWER)	+= dptf_power.o
+
+ccflags-y += -Idrivers/acpi
diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c
new file mode 100644
index 000000000000..734642dc5008
--- /dev/null
+++ b/drivers/acpi/dptf/dptf_power.c
@@ -0,0 +1,128 @@
+/*
+ * dptf_power:  DPTF platform power driver
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/platform_device.h>
+
+/*
+ * Presentation of attributes which are defined for INT3407. They are:
+ * PMAX : Maximum platform powe
+ * PSRC : Platform power source
+ * ARTG : Adapter rating
+ * CTYP : Charger type
+ * PBSS : Battery steady power
+ */
+#define DPTF_POWER_SHOW(name, object) \
+static ssize_t name##_show(struct device *dev,\
+			   struct device_attribute *attr,\
+			   char *buf)\
+{\
+	struct platform_device *pdev = to_platform_device(dev);\
+	struct acpi_device *acpi_dev = platform_get_drvdata(pdev);\
+	unsigned long long val;\
+	acpi_status status;\
+\
+	status = acpi_evaluate_integer(acpi_dev->handle, #object,\
+				       NULL, &val);\
+	if (ACPI_SUCCESS(status))\
+		return sprintf(buf, "%d\n", (int)val);\
+	else \
+		return -EINVAL;\
+}
+
+DPTF_POWER_SHOW(max_platform_power_mw, PMAX)
+DPTF_POWER_SHOW(platform_power_source, PSRC)
+DPTF_POWER_SHOW(adapter_rating_mw, ARTG)
+DPTF_POWER_SHOW(battery_steady_power_mw, PBSS)
+DPTF_POWER_SHOW(charger_type, CTYP)
+
+static DEVICE_ATTR_RO(max_platform_power_mw);
+static DEVICE_ATTR_RO(platform_power_source);
+static DEVICE_ATTR_RO(adapter_rating_mw);
+static DEVICE_ATTR_RO(battery_steady_power_mw);
+static DEVICE_ATTR_RO(charger_type);
+
+static struct attribute *dptf_power_attrs[] = {
+	&dev_attr_max_platform_power_mw.attr,
+	&dev_attr_platform_power_source.attr,
+	&dev_attr_adapter_rating_mw.attr,
+	&dev_attr_battery_steady_power_mw.attr,
+	&dev_attr_charger_type.attr,
+	NULL
+};
+
+static struct attribute_group dptf_power_attribute_group = {
+	.attrs = dptf_power_attrs,
+	.name = "dptf_power"
+};
+
+static int dptf_power_add(struct platform_device *pdev)
+{
+	struct acpi_device *acpi_dev;
+	acpi_status status;
+	unsigned long long ptype;
+	int result;
+
+	acpi_dev = ACPI_COMPANION(&(pdev->dev));
+	if (!acpi_dev)
+		return -ENODEV;
+
+	status = acpi_evaluate_integer(acpi_dev->handle, "PTYP", NULL, &ptype);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	if (ptype != 0x11)
+		return -ENODEV;
+
+	result = sysfs_create_group(&pdev->dev.kobj,
+				    &dptf_power_attribute_group);
+	if (result)
+		return result;
+
+	platform_set_drvdata(pdev, acpi_dev);
+
+	return 0;
+}
+
+static int dptf_power_remove(struct platform_device *pdev)
+{
+
+	sysfs_remove_group(&pdev->dev.kobj, &dptf_power_attribute_group);
+
+	return 0;
+}
+
+static const struct acpi_device_id int3407_device_ids[] = {
+	{"INT3407", 0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, int3407_device_ids);
+
+static struct platform_driver dptf_power_driver = {
+	.probe = dptf_power_add,
+	.remove = dptf_power_remove,
+	.driver = {
+		.name = "DPTF Platform Power",
+		.acpi_match_table = int3407_device_ids,
+	},
+};
+
+module_platform_driver(dptf_power_driver);
+
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ACPI DPTF platform power driver");
diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c
similarity index 100%
rename from drivers/acpi/int340x_thermal.c
rename to drivers/acpi/dptf/int340x_thermal.c
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 73c76d646064..999a10914678 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1331,8 +1331,6 @@ static int ec_install_handlers(struct acpi_ec *ec)
 
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
-	acpi_ec_stop(ec, false);
-
 	if (test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
 		if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
 					ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
@@ -1340,6 +1338,19 @@ static void ec_remove_handlers(struct acpi_ec *ec)
 		clear_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
 	}
 
+	/*
+	 * Stops handling the EC transactions after removing the operation
+	 * region handler. This is required because _REG(DISCONNECT)
+	 * invoked during the removal can result in new EC transactions.
+	 *
+	 * Flushes the EC requests and thus disables the GPE before
+	 * removing the GPE handler. This is required by the current ACPICA
+	 * GPE core. ACPICA GPE core will automatically disable a GPE when
+	 * it is indicated but there is no way to handle it. So the drivers
+	 * must disable the GPEs prior to removing the GPE handlers.
+	 */
+	acpi_ec_stop(ec, false);
+
 	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
 		if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
 					&acpi_ec_gpe_handler)))
@@ -1348,13 +1359,9 @@ static void ec_remove_handlers(struct acpi_ec *ec)
 	}
 }
 
-static int acpi_ec_add(struct acpi_device *device)
+static struct acpi_ec *acpi_ec_alloc(void)
 {
-	struct acpi_ec *ec = NULL;
-	int ret;
-
-	strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME);
-	strcpy(acpi_device_class(device), ACPI_EC_CLASS);
+	struct acpi_ec *ec;
 
 	/* Check for boot EC */
 	if (boot_ec) {
@@ -1365,9 +1372,21 @@ static int acpi_ec_add(struct acpi_device *device)
 			first_ec = NULL;
 	} else {
 		ec = make_acpi_ec();
-		if (!ec)
-			return -ENOMEM;
 	}
+	return ec;
+}
+
+static int acpi_ec_add(struct acpi_device *device)
+{
+	struct acpi_ec *ec = NULL;
+	int ret;
+
+	strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_EC_CLASS);
+
+	ec = acpi_ec_alloc();
+	if (!ec)
+		return -ENOMEM;
 	if (ec_parse_device(device->handle, 0, ec, NULL) !=
 		AE_CTRL_TERMINATE) {
 			kfree(ec);
@@ -1454,27 +1473,31 @@ static const struct acpi_device_id ec_device_ids[] = {
 int __init acpi_ec_dsdt_probe(void)
 {
 	acpi_status status;
+	struct acpi_ec *ec;
+	int ret;
 
-	if (boot_ec)
-		return 0;
-
+	ec = acpi_ec_alloc();
+	if (!ec)
+		return -ENOMEM;
 	/*
 	 * Finding EC from DSDT if there is no ECDT EC available. When this
 	 * function is invoked, ACPI tables have been fully loaded, we can
 	 * walk namespace now.
 	 */
-	boot_ec = make_acpi_ec();
-	if (!boot_ec)
-		return -ENOMEM;
 	status = acpi_get_devices(ec_device_ids[0].id,
-				  ec_parse_device, boot_ec, NULL);
-	if (ACPI_FAILURE(status) || !boot_ec->handle)
-		return -ENODEV;
-	if (!ec_install_handlers(boot_ec)) {
-		first_ec = boot_ec;
-		return 0;
+				  ec_parse_device, ec, NULL);
+	if (ACPI_FAILURE(status) || !ec->handle) {
+		ret = -ENODEV;
+		goto error;
 	}
-	return -EFAULT;
+	ret = ec_install_handlers(ec);
+
+error:
+	if (ret)
+		kfree(ec);
+	else
+		first_ec = boot_ec = ec;
+	return ret;
 }
 
 #if 0
@@ -1518,6 +1541,11 @@ static int ec_clear_on_resume(const struct dmi_system_id *id)
 	return 0;
 }
 
+/*
+ * Some ECDTs contain wrong register addresses.
+ * MSI MS-171F
+ * https://bugzilla.kernel.org/show_bug.cgi?id=12461
+ */
 static int ec_correct_ecdt(const struct dmi_system_id *id)
 {
 	pr_debug("Detected system needing ECDT address correction.\n");
@@ -1526,16 +1554,6 @@ static int ec_correct_ecdt(const struct dmi_system_id *id)
 }
 
 static struct dmi_system_id ec_dmi_table[] __initdata = {
-	{
-	ec_correct_ecdt, "Asus L4R", {
-	DMI_MATCH(DMI_BIOS_VERSION, "1008.006"),
-	DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),
-	DMI_MATCH(DMI_BOARD_NAME, "L4R") }, NULL},
-	{
-	ec_correct_ecdt, "Asus M6R", {
-	DMI_MATCH(DMI_BIOS_VERSION, "0207"),
-	DMI_MATCH(DMI_PRODUCT_NAME, "M6R"),
-	DMI_MATCH(DMI_BOARD_NAME, "M6R") }, NULL},
 	{
 	ec_correct_ecdt, "MSI MS-171F", {
 	DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star"),
@@ -1548,12 +1566,13 @@ static struct dmi_system_id ec_dmi_table[] __initdata = {
 
 int __init acpi_ec_ecdt_probe(void)
 {
-	int ret = 0;
+	int ret;
 	acpi_status status;
 	struct acpi_table_ecdt *ecdt_ptr;
+	struct acpi_ec *ec;
 
-	boot_ec = make_acpi_ec();
-	if (!boot_ec)
+	ec = acpi_ec_alloc();
+	if (!ec)
 		return -ENOMEM;
 	/*
 	 * Generate a boot ec context
@@ -1577,28 +1596,20 @@ int __init acpi_ec_ecdt_probe(void)
 
 	pr_info("EC description table is found, configuring boot EC\n");
 	if (EC_FLAGS_CORRECT_ECDT) {
-		/*
-		 * Asus L4R, Asus M6R
-		 * https://bugzilla.kernel.org/show_bug.cgi?id=9399
-		 * MSI MS-171F
-		 * https://bugzilla.kernel.org/show_bug.cgi?id=12461
-		 */
-		boot_ec->command_addr = ecdt_ptr->data.address;
-		boot_ec->data_addr = ecdt_ptr->control.address;
+		ec->command_addr = ecdt_ptr->data.address;
+		ec->data_addr = ecdt_ptr->control.address;
 	} else {
-		boot_ec->command_addr = ecdt_ptr->control.address;
-		boot_ec->data_addr = ecdt_ptr->data.address;
+		ec->command_addr = ecdt_ptr->control.address;
+		ec->data_addr = ecdt_ptr->data.address;
 	}
-	boot_ec->gpe = ecdt_ptr->gpe;
-	boot_ec->handle = ACPI_ROOT_OBJECT;
-	ret = ec_install_handlers(boot_ec);
-	if (!ret)
-		first_ec = boot_ec;
+	ec->gpe = ecdt_ptr->gpe;
+	ec->handle = ACPI_ROOT_OBJECT;
+	ret = ec_install_handlers(ec);
 error:
-	if (ret) {
-		kfree(boot_ec);
-		boot_ec = NULL;
-	}
+	if (ret)
+		kfree(ec);
+	else
+		first_ec = boot_ec = ec;
 	return ret;
 }
 
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 27cc7feabfe4..940218ff0193 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -87,6 +87,9 @@ bool acpi_queue_hotplug_work(struct work_struct *work);
 void acpi_device_hotplug(struct acpi_device *adev, u32 src);
 bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent);
 
+acpi_status acpi_sysfs_table_handler(u32 event, void *table, void *context);
+void acpi_scan_table_handler(u32 event, void *table, void *context);
+
 /* --------------------------------------------------------------------------
                      Device Node Initialization / Removal
    -------------------------------------------------------------------------- */
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 2215fc847fa9..1f0e06065ae6 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -928,7 +928,7 @@ static ssize_t format_show(struct device *dev,
 {
 	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
 
-	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code));
+	return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
 }
 static DEVICE_ATTR_RO(format);
 
@@ -961,8 +961,8 @@ static ssize_t format1_show(struct device *dev,
 				continue;
 			if (nfit_dcr->dcr->code == dcr->code)
 				continue;
-			rc = sprintf(buf, "%#x\n",
-					be16_to_cpu(nfit_dcr->dcr->code));
+			rc = sprintf(buf, "0x%04x\n",
+					le16_to_cpu(nfit_dcr->dcr->code));
 			break;
 		}
 		if (rc != ENXIO)
@@ -1151,9 +1151,10 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		if (disable_vendor_specific)
 			dsm_mask &= ~(1 << 8);
 	} else {
-		dev_err(dev, "unknown dimm command family\n");
+		dev_dbg(dev, "unknown dimm command family\n");
 		nfit_mem->family = -1;
-		return force_enable_dimms ? 0 : -ENODEV;
+		/* DSMs are optional, continue loading the driver... */
+		return 0;
 	}
 
 	uuid = to_nfit_uuid(nfit_mem->family);
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 11cb38348aef..02b9ea1e8d2e 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -53,12 +53,12 @@ enum nfit_uuids {
 };
 
 /*
- * Region format interface codes are stored as an array of bytes in the
- * NFIT DIMM Control Region structure
+ * Region format interface codes are stored with the interface as the
+ * LSB and the function as the MSB.
  */
-#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy backed */
-#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */
-#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */
+#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
+#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
 
 enum {
 	NFIT_BLK_READ_FLUSH = 1,
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index d176e0ece470..ce3a7a16f03f 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -18,22 +18,21 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  */
+
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/numa.h>
 #include <linux/nodemask.h>
 #include <linux/topology.h>
 
-#define PREFIX "ACPI: "
-
-#define ACPI_NUMA	0x80000000
-#define _COMPONENT	ACPI_NUMA
-ACPI_MODULE_NAME("numa");
-
 static nodemask_t nodes_found_map = NODE_MASK_NONE;
 
 /* maps to convert between proximity domain and logical node ID */
@@ -43,6 +42,7 @@ static int node_to_pxm_map[MAX_NUMNODES]
 			= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
 
 unsigned char acpi_srat_revision __initdata;
+int acpi_numa __initdata;
 
 int pxm_to_node(int pxm)
 {
@@ -128,68 +128,63 @@ EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
 static void __init
 acpi_table_print_srat_entry(struct acpi_subtable_header *header)
 {
-
-	ACPI_FUNCTION_NAME("acpi_table_print_srat_entry");
-
-	if (!header)
-		return;
-
 	switch (header->type) {
-
 	case ACPI_SRAT_TYPE_CPU_AFFINITY:
-#ifdef ACPI_DEBUG_OUTPUT
 		{
 			struct acpi_srat_cpu_affinity *p =
 			    (struct acpi_srat_cpu_affinity *)header;
-			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-					  "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
-					  p->apic_id, p->local_sapic_eid,
-					  p->proximity_domain_lo,
-					  (p->flags & ACPI_SRAT_CPU_ENABLED)?
-					  "enabled" : "disabled"));
+			pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
+				 p->apic_id, p->local_sapic_eid,
+				 p->proximity_domain_lo,
+				 (p->flags & ACPI_SRAT_CPU_ENABLED) ?
+				 "enabled" : "disabled");
 		}
-#endif				/* ACPI_DEBUG_OUTPUT */
 		break;
 
 	case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
-#ifdef ACPI_DEBUG_OUTPUT
 		{
 			struct acpi_srat_mem_affinity *p =
 			    (struct acpi_srat_mem_affinity *)header;
-			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-					  "SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
-					  (unsigned long)p->base_address,
-					  (unsigned long)p->length,
-					  p->proximity_domain,
-					  (p->flags & ACPI_SRAT_MEM_ENABLED)?
-					  "enabled" : "disabled",
-					  (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)?
-					  " hot-pluggable" : "",
-					  (p->flags & ACPI_SRAT_MEM_NON_VOLATILE)?
-					  " non-volatile" : ""));
+			pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
+				 (unsigned long)p->base_address,
+				 (unsigned long)p->length,
+				 p->proximity_domain,
+				 (p->flags & ACPI_SRAT_MEM_ENABLED) ?
+				 "enabled" : "disabled",
+				 (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
+				 " hot-pluggable" : "",
+				 (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ?
+				 " non-volatile" : "");
 		}
-#endif				/* ACPI_DEBUG_OUTPUT */
 		break;
 
 	case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
-#ifdef ACPI_DEBUG_OUTPUT
 		{
 			struct acpi_srat_x2apic_cpu_affinity *p =
 			    (struct acpi_srat_x2apic_cpu_affinity *)header;
-			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-					  "SRAT Processor (x2apicid[0x%08x]) in"
-					  " proximity domain %d %s\n",
-					  p->apic_id,
-					  p->proximity_domain,
-					  (p->flags & ACPI_SRAT_CPU_ENABLED) ?
-					  "enabled" : "disabled"));
+			pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
+				 p->apic_id,
+				 p->proximity_domain,
+				 (p->flags & ACPI_SRAT_CPU_ENABLED) ?
+				 "enabled" : "disabled");
 		}
-#endif				/* ACPI_DEBUG_OUTPUT */
 		break;
+
+	case ACPI_SRAT_TYPE_GICC_AFFINITY:
+		{
+			struct acpi_srat_gicc_affinity *p =
+			    (struct acpi_srat_gicc_affinity *)header;
+			pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
+				 p->acpi_processor_uid,
+				 p->proximity_domain,
+				 (p->flags & ACPI_SRAT_GICC_ENABLED) ?
+				 "enabled" : "disabled");
+		}
+		break;
+
 	default:
-		printk(KERN_WARNING PREFIX
-		       "Found unsupported SRAT entry (type = 0x%x)\n",
-		       header->type);
+		pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
+			header->type);
 		break;
 	}
 }
@@ -217,12 +212,117 @@ static int __init slit_valid(struct acpi_table_slit *slit)
 	return 1;
 }
 
+void __init bad_srat(void)
+{
+	pr_err("SRAT: SRAT not used.\n");
+	acpi_numa = -1;
+}
+
+int __init srat_disabled(void)
+{
+	return acpi_numa < 0;
+}
+
+#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
+/*
+ * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
+ * I/O localities since SRAT does not list them.  I/O localities are
+ * not supported at this point.
+ */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+	int i, j;
+
+	for (i = 0; i < slit->locality_count; i++) {
+		const int from_node = pxm_to_node(i);
+
+		if (from_node == NUMA_NO_NODE)
+			continue;
+
+		for (j = 0; j < slit->locality_count; j++) {
+			const int to_node = pxm_to_node(j);
+
+			if (to_node == NUMA_NO_NODE)
+				continue;
+
+			numa_set_distance(from_node, to_node,
+				slit->entry[slit->locality_count * i + j]);
+		}
+	}
+}
+
+/*
+ * Default callback for parsing of the Proximity Domain <-> Memory
+ * Area mappings
+ */
+int __init
+acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
+{
+	u64 start, end;
+	u32 hotpluggable;
+	int node, pxm;
+
+	if (srat_disabled())
+		goto out_err;
+	if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) {
+		pr_err("SRAT: Unexpected header length: %d\n",
+		       ma->header.length);
+		goto out_err_bad_srat;
+	}
+	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
+		goto out_err;
+	hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
+	if (hotpluggable && !IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+		goto out_err;
+
+	start = ma->base_address;
+	end = start + ma->length;
+	pxm = ma->proximity_domain;
+	if (acpi_srat_revision <= 1)
+		pxm &= 0xff;
+
+	node = acpi_map_pxm_to_node(pxm);
+	if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+		pr_err("SRAT: Too many proximity domains.\n");
+		goto out_err_bad_srat;
+	}
+
+	if (numa_add_memblk(node, start, end) < 0) {
+		pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
+		       node, (unsigned long long) start,
+		       (unsigned long long) end - 1);
+		goto out_err_bad_srat;
+	}
+
+	node_set(node, numa_nodes_parsed);
+
+	pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
+		node, pxm,
+		(unsigned long long) start, (unsigned long long) end - 1,
+		hotpluggable ? " hotplug" : "",
+		ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
+
+	/* Mark hotplug range in memblock. */
+	if (hotpluggable && memblock_mark_hotplug(start, ma->length))
+		pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
+			(unsigned long long)start, (unsigned long long)end - 1);
+
+	max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
+
+	return 0;
+out_err_bad_srat:
+	bad_srat();
+out_err:
+	return -EINVAL;
+}
+#endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
+
 static int __init acpi_parse_slit(struct acpi_table_header *table)
 {
 	struct acpi_table_slit *slit = (struct acpi_table_slit *)table;
 
 	if (!slit_valid(slit)) {
-		printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
+		pr_info("SLIT table looks invalid. Not used.\n");
 		return -EINVAL;
 	}
 	acpi_numa_slit_init(slit);
@@ -233,12 +333,9 @@ static int __init acpi_parse_slit(struct acpi_table_header *table)
 void __init __weak
 acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 {
-	printk(KERN_WARNING PREFIX
-	       "Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id);
-	return;
+	pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id);
 }
 
-
 static int __init
 acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
 			   const unsigned long end)
@@ -275,6 +372,24 @@ acpi_parse_processor_affinity(struct acpi_subtable_header *header,
 	return 0;
 }
 
+static int __init
+acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
+			 const unsigned long end)
+{
+	struct acpi_srat_gicc_affinity *processor_affinity;
+
+	processor_affinity = (struct acpi_srat_gicc_affinity *)header;
+	if (!processor_affinity)
+		return -EINVAL;
+
+	acpi_table_print_srat_entry(header);
+
+	/* let architecture-dependent part to do it */
+	acpi_numa_gicc_affinity_init(processor_affinity);
+
+	return 0;
+}
+
 static int __initdata parsed_numa_memblks;
 
 static int __init
@@ -319,6 +434,9 @@ int __init acpi_numa_init(void)
 {
 	int cnt = 0;
 
+	if (acpi_disabled)
+		return -EINVAL;
+
 	/*
 	 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
 	 * SRAT cpu entries could have different order with that in MADT.
@@ -327,13 +445,15 @@ int __init acpi_numa_init(void)
 
 	/* SRAT: Static Resource Affinity Table */
 	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
-		struct acpi_subtable_proc srat_proc[2];
+		struct acpi_subtable_proc srat_proc[3];
 
 		memset(srat_proc, 0, sizeof(srat_proc));
 		srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
 		srat_proc[0].handler = acpi_parse_processor_affinity;
 		srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
 		srat_proc[1].handler = acpi_parse_x2apic_affinity;
+		srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
+		srat_proc[2].handler = acpi_parse_gicc_affinity;
 
 		acpi_table_parse_entries_array(ACPI_SIG_SRAT,
 					sizeof(struct acpi_table_srat),
@@ -347,8 +467,6 @@ int __init acpi_numa_init(void)
 	/* SLIT: System Locality Information Table */
 	acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
 
-	acpi_numa_arch_fixup();
-
 	if (cnt < 0)
 		return cnt;
 	else if (!parsed_numa_memblks)
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 8fc7323ed3e8..c983bf733ad3 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -470,6 +470,7 @@ static int acpi_irq_pci_sharing_penalty(int irq)
 {
 	struct acpi_pci_link *link;
 	int penalty = 0;
+	int i;
 
 	list_for_each_entry(link, &acpi_link_list, list) {
 		/*
@@ -478,18 +479,14 @@ static int acpi_irq_pci_sharing_penalty(int irq)
 		 */
 		if (link->irq.active && link->irq.active == irq)
 			penalty += PIRQ_PENALTY_PCI_USING;
-		else {
-			int i;
-
-			/*
-			 * If a link is inactive, penalize the IRQs it
-			 * might use, but not as severely.
-			 */
-			for (i = 0; i < link->irq.possible_count; i++)
-				if (link->irq.possible[i] == irq)
-					penalty += PIRQ_PENALTY_PCI_POSSIBLE /
-						link->irq.possible_count;
-		}
+
+		/*
+		 * penalize the IRQs PCI might use, but not as severely.
+		 */
+		for (i = 0; i < link->irq.possible_count; i++)
+			if (link->irq.possible[i] == irq)
+				penalty += PIRQ_PENALTY_PCI_POSSIBLE /
+					link->irq.possible_count;
 	}
 
 	return penalty;
@@ -499,9 +496,6 @@ static int acpi_irq_get_penalty(int irq)
 {
 	int penalty = 0;
 
-	if (irq < ACPI_MAX_ISA_IRQS)
-		penalty += acpi_isa_irq_penalty[irq];
-
 	/*
 	* Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict
 	* with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be
@@ -516,10 +510,49 @@ static int acpi_irq_get_penalty(int irq)
 			penalty += PIRQ_PENALTY_PCI_USING;
 	}
 
+	if (irq < ACPI_MAX_ISA_IRQS)
+		return penalty + acpi_isa_irq_penalty[irq];
+
 	penalty += acpi_irq_pci_sharing_penalty(irq);
 	return penalty;
 }
 
+int __init acpi_irq_penalty_init(void)
+{
+	struct acpi_pci_link *link;
+	int i;
+
+	/*
+	 * Update penalties to facilitate IRQ balancing.
+	 */
+	list_for_each_entry(link, &acpi_link_list, list) {
+
+		/*
+		 * reflect the possible and active irqs in the penalty table --
+		 * useful for breaking ties.
+		 */
+		if (link->irq.possible_count) {
+			int penalty =
+			    PIRQ_PENALTY_PCI_POSSIBLE /
+			    link->irq.possible_count;
+
+			for (i = 0; i < link->irq.possible_count; i++) {
+				if (link->irq.possible[i] < ACPI_MAX_ISA_IRQS)
+					acpi_isa_irq_penalty[link->irq.
+							 possible[i]] +=
+					    penalty;
+			}
+
+		} else if (link->irq.active &&
+				(link->irq.active < ACPI_MAX_ISA_IRQS)) {
+			acpi_isa_irq_penalty[link->irq.active] +=
+			    PIRQ_PENALTY_PCI_POSSIBLE;
+		}
+	}
+
+	return 0;
+}
+
 static int acpi_irq_balance = -1;	/* 0: static, 1: balance */
 
 static int acpi_pci_link_allocate(struct acpi_pci_link *link)
@@ -839,7 +872,7 @@ void acpi_penalize_isa_irq(int irq, int active)
 {
 	if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
 		acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
-			active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING;
+		  (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
 }
 
 bool acpi_isa_irq_available(int irq)
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index 7188e53b6b7c..f62c68e24317 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -22,8 +22,9 @@
  *  General Public License for more details.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -33,30 +34,11 @@
 #include <linux/dmi.h>
 #include <linux/pci-acpi.h>
 
-static bool debug;
 static int check_sta_before_sun;
 
-#define DRIVER_VERSION 	"0.1"
-#define DRIVER_AUTHOR	"Alex Chiang <achiang@hp.com>"
-#define DRIVER_DESC	"ACPI PCI Slot Detection Driver"
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
-module_param(debug, bool, 0644);
-
 #define _COMPONENT		ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_slot");
 
-#define MY_NAME "pci_slot"
-#define err(format, arg...) pr_err("%s: " format , MY_NAME , ## arg)
-#define info(format, arg...) pr_info("%s: " format , MY_NAME , ## arg)
-#define dbg(format, arg...)					\
-	do {							\
-		if (debug)					\
-			pr_debug("%s: " format,	MY_NAME , ## arg); \
-	} while (0)
-
 #define SLOT_NAME_SIZE 21		/* Inspired by #define in acpiphp.h */
 
 struct acpi_pci_slot {
@@ -76,7 +58,7 @@ check_slot(acpi_handle handle, unsigned long long *sun)
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 
 	acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
-	dbg("Checking slot on path: %s\n", (char *)buffer.pointer);
+	pr_debug("Checking slot on path: %s\n", (char *)buffer.pointer);
 
 	if (check_sta_before_sun) {
 		/* If SxFy doesn't have _STA, we just assume it's there */
@@ -87,14 +69,16 @@ check_slot(acpi_handle handle, unsigned long long *sun)
 
 	status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
 	if (ACPI_FAILURE(status)) {
-		dbg("_ADR returned %d on %s\n", status, (char *)buffer.pointer);
+		pr_debug("_ADR returned %d on %s\n",
+			 status, (char *)buffer.pointer);
 		goto out;
 	}
 
 	/* No _SUN == not a slot == bail */
 	status = acpi_evaluate_integer(handle, "_SUN", NULL, sun);
 	if (ACPI_FAILURE(status)) {
-		dbg("_SUN returned %d on %s\n", status, (char *)buffer.pointer);
+		pr_debug("_SUN returned %d on %s\n",
+			 status, (char *)buffer.pointer);
 		goto out;
 	}
 
@@ -132,15 +116,13 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 	}
 
 	slot = kmalloc(sizeof(*slot), GFP_KERNEL);
-	if (!slot) {
-		err("%s: cannot allocate memory\n", __func__);
+	if (!slot)
 		return AE_OK;
-	}
 
 	snprintf(name, sizeof(name), "%llu", sun);
 	pci_slot = pci_create_slot(pci_bus, device, name, NULL);
 	if (IS_ERR(pci_slot)) {
-		err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot));
+		pr_err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot));
 		kfree(slot);
 		return AE_OK;
 	}
@@ -150,8 +132,8 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 
 	get_device(&pci_bus->dev);
 
-	dbg("pci_slot: %p, pci_bus: %x, device: %d, name: %s\n",
-		pci_slot, pci_bus->number, device, name);
+	pr_debug("%p, pci_bus: %x, device: %d, name: %s\n",
+		 pci_slot, pci_bus->number, device, name);
 
 	return AE_OK;
 }
@@ -186,7 +168,8 @@ void acpi_pci_slot_remove(struct pci_bus *bus)
 
 static int do_sta_before_sun(const struct dmi_system_id *d)
 {
-	info("%s detected: will evaluate _STA before calling _SUN\n", d->ident);
+	pr_info("%s detected: will evaluate _STA before calling _SUN\n",
+		d->ident);
 	check_sta_before_sun = 1;
 	return 0;
 }
diff --git a/drivers/acpi/pmic/intel_pmic.c b/drivers/acpi/pmic/intel_pmic.c
index bd772cd56494..ca18e0d23df9 100644
--- a/drivers/acpi/pmic/intel_pmic.c
+++ b/drivers/acpi/pmic/intel_pmic.c
@@ -13,7 +13,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/acpi.h>
 #include <linux/regmap.h>
 #include <acpi/acpi_lpat.h>
@@ -21,12 +21,19 @@
 
 #define PMIC_POWER_OPREGION_ID		0x8d
 #define PMIC_THERMAL_OPREGION_ID	0x8c
+#define PMIC_REGS_OPREGION_ID		0x8f
+
+struct intel_pmic_regs_handler_ctx {
+	unsigned int val;
+	u16 addr;
+};
 
 struct intel_pmic_opregion {
 	struct mutex lock;
 	struct acpi_lpat_conversion_table *lpat_table;
 	struct regmap *regmap;
 	struct intel_pmic_opregion_data *data;
+	struct intel_pmic_regs_handler_ctx ctx;
 };
 
 static int pmic_get_reg_bit(int address, struct pmic_table *table,
@@ -131,7 +138,7 @@ static int pmic_thermal_aux(struct intel_pmic_opregion *opregion, int reg,
 }
 
 static int pmic_thermal_pen(struct intel_pmic_opregion *opregion, int reg,
-			    u32 function, u64 *value)
+			    int bit, u32 function, u64 *value)
 {
 	struct intel_pmic_opregion_data *d = opregion->data;
 	struct regmap *regmap = opregion->regmap;
@@ -140,12 +147,12 @@ static int pmic_thermal_pen(struct intel_pmic_opregion *opregion, int reg,
 		return -ENXIO;
 
 	if (function == ACPI_READ)
-		return d->get_policy(regmap, reg, value);
+		return d->get_policy(regmap, reg, bit, value);
 
 	if (*value != 0 && *value != 1)
 		return -EINVAL;
 
-	return d->update_policy(regmap, reg, *value);
+	return d->update_policy(regmap, reg, bit, *value);
 }
 
 static bool pmic_thermal_is_temp(int address)
@@ -170,13 +177,13 @@ static acpi_status intel_pmic_thermal_handler(u32 function,
 {
 	struct intel_pmic_opregion *opregion = region_context;
 	struct intel_pmic_opregion_data *d = opregion->data;
-	int reg, result;
+	int reg, bit, result;
 
 	if (bits != 32 || !value64)
 		return AE_BAD_PARAMETER;
 
 	result = pmic_get_reg_bit(address, d->thermal_table,
-				  d->thermal_table_count, &reg, NULL);
+				  d->thermal_table_count, &reg, &bit);
 	if (result == -ENOENT)
 		return AE_BAD_PARAMETER;
 
@@ -187,7 +194,8 @@ static acpi_status intel_pmic_thermal_handler(u32 function,
 	else if (pmic_thermal_is_aux(address))
 		result = pmic_thermal_aux(opregion, reg, function, value64);
 	else if (pmic_thermal_is_pen(address))
-		result = pmic_thermal_pen(opregion, reg, function, value64);
+		result = pmic_thermal_pen(opregion, reg, bit,
+						function, value64);
 	else
 		result = -EINVAL;
 
@@ -203,6 +211,48 @@ static acpi_status intel_pmic_thermal_handler(u32 function,
 	return AE_OK;
 }
 
+static acpi_status intel_pmic_regs_handler(u32 function,
+		acpi_physical_address address, u32 bits, u64 *value64,
+		void *handler_context, void *region_context)
+{
+	struct intel_pmic_opregion *opregion = region_context;
+	int result = 0;
+
+	switch (address) {
+	case 0:
+		return AE_OK;
+	case 1:
+		opregion->ctx.addr |= (*value64 & 0xff) << 8;
+		return AE_OK;
+	case 2:
+		opregion->ctx.addr |= *value64 & 0xff;
+		return AE_OK;
+	case 3:
+		opregion->ctx.val = *value64 & 0xff;
+		return AE_OK;
+	case 4:
+		if (*value64) {
+			result = regmap_write(opregion->regmap, opregion->ctx.addr,
+					      opregion->ctx.val);
+		} else {
+			result = regmap_read(opregion->regmap, opregion->ctx.addr,
+					     &opregion->ctx.val);
+			if (result == 0)
+				*value64 = opregion->ctx.val;
+		}
+		memset(&opregion->ctx, 0x00, sizeof(opregion->ctx));
+	}
+
+	if (result < 0) {
+		if (result == -EINVAL)
+			return AE_BAD_PARAMETER;
+		else
+			return AE_ERROR;
+	}
+
+	return AE_OK;
+}
+
 int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle,
 					struct regmap *regmap,
 					struct intel_pmic_opregion_data *d)
@@ -242,16 +292,30 @@ int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle,
 		acpi_remove_address_space_handler(handle, PMIC_POWER_OPREGION_ID,
 						  intel_pmic_power_handler);
 		ret = -ENODEV;
-		goto out_error;
+		goto out_remove_power_handler;
+	}
+
+	status = acpi_install_address_space_handler(handle,
+			PMIC_REGS_OPREGION_ID, intel_pmic_regs_handler, NULL,
+			opregion);
+	if (ACPI_FAILURE(status)) {
+		ret = -ENODEV;
+		goto out_remove_thermal_handler;
 	}
 
 	opregion->data = d;
 	return 0;
 
+out_remove_thermal_handler:
+	acpi_remove_address_space_handler(handle, PMIC_THERMAL_OPREGION_ID,
+					  intel_pmic_thermal_handler);
+
+out_remove_power_handler:
+	acpi_remove_address_space_handler(handle, PMIC_POWER_OPREGION_ID,
+					  intel_pmic_power_handler);
+
 out_error:
 	acpi_lpat_free_conversion_table(opregion->lpat_table);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(intel_pmic_install_opregion_handler);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/pmic/intel_pmic.h b/drivers/acpi/pmic/intel_pmic.h
index d4e90af8f0dd..e8bfa7b865a5 100644
--- a/drivers/acpi/pmic/intel_pmic.h
+++ b/drivers/acpi/pmic/intel_pmic.h
@@ -12,8 +12,8 @@ struct intel_pmic_opregion_data {
 	int (*update_power)(struct regmap *r, int reg, int bit, bool on);
 	int (*get_raw_temp)(struct regmap *r, int reg);
 	int (*update_aux)(struct regmap *r, int reg, int raw_temp);
-	int (*get_policy)(struct regmap *r, int reg, u64 *value);
-	int (*update_policy)(struct regmap *r, int reg, int enable);
+	int (*get_policy)(struct regmap *r, int reg, int bit, u64 *value);
+	int (*update_policy)(struct regmap *r, int reg, int bit, int enable);
 	struct pmic_table *power_table;
 	int power_table_count;
 	struct pmic_table *thermal_table;
diff --git a/drivers/acpi/pmic/intel_pmic_bxtwc.c b/drivers/acpi/pmic/intel_pmic_bxtwc.c
new file mode 100644
index 000000000000..90011aad4d20
--- /dev/null
+++ b/drivers/acpi/pmic/intel_pmic_bxtwc.c
@@ -0,0 +1,420 @@
+/*
+ * intel_pmic_bxtwc.c - Intel BXT WhiskeyCove PMIC operation region driver
+ *
+ * Copyright (C) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/mfd/intel_soc_pmic.h>
+#include <linux/regmap.h>
+#include <linux/platform_device.h>
+#include "intel_pmic.h"
+
+#define WHISKEY_COVE_ALRT_HIGH_BIT_MASK 0x0F
+#define WHISKEY_COVE_ADC_HIGH_BIT(x)	(((x & 0x0F) << 8))
+#define WHISKEY_COVE_ADC_CURSRC(x)	(((x & 0xF0) >> 4))
+#define VR_MODE_DISABLED        0
+#define VR_MODE_AUTO            BIT(0)
+#define VR_MODE_NORMAL          BIT(1)
+#define VR_MODE_SWITCH          BIT(2)
+#define VR_MODE_ECO             (BIT(0)|BIT(1))
+#define VSWITCH2_OUTPUT         BIT(5)
+#define VSWITCH1_OUTPUT         BIT(4)
+#define VUSBPHY_CHARGE          BIT(1)
+
+static struct pmic_table power_table[] = {
+	{
+		.address = 0x0,
+		.reg = 0x63,
+		.bit = VR_MODE_AUTO,
+	}, /* VDD1 -> VDD1CNT */
+	{
+		.address = 0x04,
+		.reg = 0x65,
+		.bit = VR_MODE_AUTO,
+	}, /* VDD2 -> VDD2CNT */
+	{
+		.address = 0x08,
+		.reg = 0x67,
+		.bit = VR_MODE_AUTO,
+	}, /* VDD3 -> VDD3CNT */
+	{
+		.address = 0x0c,
+		.reg = 0x6d,
+		.bit = VR_MODE_AUTO,
+	}, /* VLFX -> VFLEXCNT */
+	{
+		.address = 0x10,
+		.reg = 0x6f,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP1A -> VPROG1ACNT */
+	{
+		.address = 0x14,
+		.reg = 0x70,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP1B -> VPROG1BCNT */
+	{
+		.address = 0x18,
+		.reg = 0x71,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP1C -> VPROG1CCNT */
+	{
+		.address = 0x1c,
+		.reg = 0x72,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP1D -> VPROG1DCNT */
+	{
+		.address = 0x20,
+		.reg = 0x73,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP2A -> VPROG2ACNT */
+	{
+		.address = 0x24,
+		.reg = 0x74,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP2B -> VPROG2BCNT */
+	{
+		.address = 0x28,
+		.reg = 0x75,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP2C -> VPROG2CCNT */
+	{
+		.address = 0x2c,
+		.reg = 0x76,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP3A -> VPROG3ACNT */
+	{
+		.address = 0x30,
+		.reg = 0x77,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP3B -> VPROG3BCNT */
+	{
+		.address = 0x34,
+		.reg = 0x78,
+		.bit = VSWITCH2_OUTPUT,
+	}, /* VSW2 -> VLD0CNT Bit 5*/
+	{
+		.address = 0x38,
+		.reg = 0x78,
+		.bit = VSWITCH1_OUTPUT,
+	}, /* VSW1 -> VLD0CNT Bit 4 */
+	{
+		.address = 0x3c,
+		.reg = 0x78,
+		.bit = VUSBPHY_CHARGE,
+	}, /* VUPY -> VLDOCNT Bit 1 */
+	{
+		.address = 0x40,
+		.reg = 0x7b,
+		.bit = VR_MODE_NORMAL,
+	}, /* VRSO -> VREFSOCCNT*/
+	{
+		.address = 0x44,
+		.reg = 0xA0,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP1E -> VPROG1ECNT */
+	{
+		.address = 0x48,
+		.reg = 0xA1,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP1F -> VPROG1FCNT */
+	{
+		.address = 0x4c,
+		.reg = 0xA2,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP2D -> VPROG2DCNT */
+	{
+		.address = 0x50,
+		.reg = 0xA3,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP4A -> VPROG4ACNT */
+	{
+		.address = 0x54,
+		.reg = 0xA4,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP4B -> VPROG4BCNT */
+	{
+		.address = 0x58,
+		.reg = 0xA5,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP4C -> VPROG4CCNT */
+	{
+		.address = 0x5c,
+		.reg = 0xA6,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP4D -> VPROG4DCNT */
+	{
+		.address = 0x60,
+		.reg = 0xA7,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP5A -> VPROG5ACNT */
+	{
+		.address = 0x64,
+		.reg = 0xA8,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP5B -> VPROG5BCNT */
+	{
+		.address = 0x68,
+		.reg = 0xA9,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP6A -> VPROG6ACNT */
+	{
+		.address = 0x6c,
+		.reg = 0xAA,
+		.bit = VR_MODE_NORMAL,
+	}, /* VP6B -> VPROG6BCNT */
+	{
+		.address = 0x70,
+		.reg = 0x36,
+		.bit = BIT(2),
+	}, /* SDWN_N -> MODEMCTRL Bit 2 */
+	{
+		.address = 0x74,
+		.reg = 0x36,
+		.bit = BIT(0),
+	} /* MOFF -> MODEMCTRL Bit 0 */
+};
+
+static struct pmic_table thermal_table[] = {
+	{
+		.address = 0x00,
+		.reg = 0x4F39
+	},
+	{
+		.address = 0x04,
+		.reg = 0x4F24
+	},
+	{
+		.address = 0x08,
+		.reg = 0x4F26
+	},
+	{
+		.address = 0x0c,
+		.reg = 0x4F3B
+	},
+	{
+		.address = 0x10,
+		.reg = 0x4F28
+	},
+	{
+		.address = 0x14,
+		.reg = 0x4F2A
+	},
+	{
+		.address = 0x18,
+		.reg = 0x4F3D
+	},
+	{
+		.address = 0x1c,
+		.reg = 0x4F2C
+	},
+	{
+		.address = 0x20,
+		.reg = 0x4F2E
+	},
+	{
+		.address = 0x24,
+		.reg = 0x4F3F
+	},
+	{
+		.address = 0x28,
+		.reg = 0x4F30
+	},
+	{
+		.address = 0x30,
+		.reg = 0x4F41
+	},
+	{
+		.address = 0x34,
+		.reg = 0x4F32
+	},
+	{
+		.address = 0x3c,
+		.reg = 0x4F43
+	},
+	{
+		.address = 0x40,
+		.reg = 0x4F34
+	},
+	{
+		.address = 0x48,
+		.reg = 0x4F6A,
+		.bit = 0,
+	},
+	{
+		.address = 0x4C,
+		.reg = 0x4F6A,
+		.bit = 1
+	},
+	{
+		.address = 0x50,
+		.reg = 0x4F6A,
+		.bit = 2
+	},
+	{
+		.address = 0x54,
+		.reg = 0x4F6A,
+		.bit = 4
+	},
+	{
+		.address = 0x58,
+		.reg = 0x4F6A,
+		.bit = 5
+	},
+	{
+		.address = 0x5C,
+		.reg = 0x4F6A,
+		.bit = 3
+	},
+};
+
+static int intel_bxtwc_pmic_get_power(struct regmap *regmap, int reg,
+		int bit, u64 *value)
+{
+	int data;
+
+	if (regmap_read(regmap, reg, &data))
+		return -EIO;
+
+	*value = (data & bit) ? 1 : 0;
+	return 0;
+}
+
+static int intel_bxtwc_pmic_update_power(struct regmap *regmap, int reg,
+		int bit, bool on)
+{
+	u8 val, mask = bit;
+
+	if (on)
+		val = 0xFF;
+	else
+		val = 0x0;
+
+	return regmap_update_bits(regmap, reg, mask, val);
+}
+
+static int intel_bxtwc_pmic_get_raw_temp(struct regmap *regmap, int reg)
+{
+	unsigned int val, adc_val, reg_val;
+	u8 temp_l, temp_h, cursrc;
+	unsigned long rlsb;
+	static const unsigned long rlsb_array[] = {
+		0, 260420, 130210, 65100, 32550, 16280,
+		8140, 4070, 2030, 0, 260420, 130210 };
+
+	if (regmap_read(regmap, reg, &val))
+		return -EIO;
+	temp_l = (u8) val;
+
+	if (regmap_read(regmap, (reg - 1), &val))
+		return -EIO;
+	temp_h = (u8) val;
+
+	reg_val = temp_l | WHISKEY_COVE_ADC_HIGH_BIT(temp_h);
+	cursrc = WHISKEY_COVE_ADC_CURSRC(temp_h);
+	rlsb = rlsb_array[cursrc];
+	adc_val = reg_val * rlsb / 1000;
+
+	return adc_val;
+}
+
+static int
+intel_bxtwc_pmic_update_aux(struct regmap *regmap, int reg, int raw)
+{
+	u32 bsr_num;
+	u16 resi_val, count = 0, thrsh = 0;
+	u8 alrt_h, alrt_l, cursel = 0;
+
+	bsr_num = raw;
+	bsr_num /= (1 << 5);
+
+	count = fls(bsr_num) - 1;
+
+	cursel = clamp_t(s8, (count - 7), 0, 7);
+	thrsh = raw / (1 << (4 + cursel));
+
+	resi_val = (cursel << 9) | thrsh;
+	alrt_h = (resi_val >> 8) & WHISKEY_COVE_ALRT_HIGH_BIT_MASK;
+	if (regmap_update_bits(regmap,
+				reg - 1,
+				WHISKEY_COVE_ALRT_HIGH_BIT_MASK,
+				alrt_h))
+		return -EIO;
+
+	alrt_l = (u8)resi_val;
+	return regmap_write(regmap, reg, alrt_l);
+}
+
+static int
+intel_bxtwc_pmic_get_policy(struct regmap *regmap, int reg, int bit, u64 *value)
+{
+	u8 mask = BIT(bit);
+	unsigned int val;
+
+	if (regmap_read(regmap, reg, &val))
+		return -EIO;
+
+	*value = (val & mask) >> bit;
+	return 0;
+}
+
+static int
+intel_bxtwc_pmic_update_policy(struct regmap *regmap,
+				int reg, int bit, int enable)
+{
+	u8 mask = BIT(bit), val = enable << bit;
+
+	return regmap_update_bits(regmap, reg, mask, val);
+}
+
+static struct intel_pmic_opregion_data intel_bxtwc_pmic_opregion_data = {
+	.get_power      = intel_bxtwc_pmic_get_power,
+	.update_power   = intel_bxtwc_pmic_update_power,
+	.get_raw_temp   = intel_bxtwc_pmic_get_raw_temp,
+	.update_aux     = intel_bxtwc_pmic_update_aux,
+	.get_policy     = intel_bxtwc_pmic_get_policy,
+	.update_policy  = intel_bxtwc_pmic_update_policy,
+	.power_table      = power_table,
+	.power_table_count = ARRAY_SIZE(power_table),
+	.thermal_table     = thermal_table,
+	.thermal_table_count = ARRAY_SIZE(thermal_table),
+};
+
+static int intel_bxtwc_pmic_opregion_probe(struct platform_device *pdev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent);
+
+	return intel_pmic_install_opregion_handler(&pdev->dev,
+			ACPI_HANDLE(pdev->dev.parent),
+			pmic->regmap,
+			&intel_bxtwc_pmic_opregion_data);
+}
+
+static struct platform_device_id bxt_wc_opregion_id_table[] = {
+	{ .name = "bxt_wcove_region" },
+	{},
+};
+
+static struct platform_driver intel_bxtwc_pmic_opregion_driver = {
+	.probe = intel_bxtwc_pmic_opregion_probe,
+	.driver = {
+		.name = "bxt_whiskey_cove_pmic",
+	},
+	.id_table = bxt_wc_opregion_id_table,
+};
+
+static int __init intel_bxtwc_pmic_opregion_driver_init(void)
+{
+	return platform_driver_register(&intel_bxtwc_pmic_opregion_driver);
+}
+device_initcall(intel_bxtwc_pmic_opregion_driver_init);
diff --git a/drivers/acpi/pmic/intel_pmic_crc.c b/drivers/acpi/pmic/intel_pmic_crc.c
index fcd1852dcdee..d7f1761ab1bc 100644
--- a/drivers/acpi/pmic/intel_pmic_crc.c
+++ b/drivers/acpi/pmic/intel_pmic_crc.c
@@ -141,7 +141,8 @@ static int intel_crc_pmic_update_aux(struct regmap *regmap, int reg, int raw)
 		regmap_update_bits(regmap, reg - 1, 0x3, raw >> 8) ? -EIO : 0;
 }
 
-static int intel_crc_pmic_get_policy(struct regmap *regmap, int reg, u64 *value)
+static int intel_crc_pmic_get_policy(struct regmap *regmap,
+					int reg, int bit, u64 *value)
 {
 	int pen;
 
@@ -152,7 +153,7 @@ static int intel_crc_pmic_get_policy(struct regmap *regmap, int reg, u64 *value)
 }
 
 static int intel_crc_pmic_update_policy(struct regmap *regmap,
-					int reg, int enable)
+					int reg, int bit, int enable)
 {
 	int alert0;
 
diff --git a/drivers/acpi/pmic/intel_pmic_xpower.c b/drivers/acpi/pmic/intel_pmic_xpower.c
index 6a082d4de12c..e6e991ac20f3 100644
--- a/drivers/acpi/pmic/intel_pmic_xpower.c
+++ b/drivers/acpi/pmic/intel_pmic_xpower.c
@@ -13,7 +13,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/acpi.h>
 #include <linux/mfd/axp20x.h>
 #include <linux/regmap.h>
@@ -262,7 +262,4 @@ static int __init intel_xpower_pmic_opregion_driver_init(void)
 {
 	return platform_driver_register(&intel_xpower_pmic_opregion_driver);
 }
-module_init(intel_xpower_pmic_opregion_driver_init);
-
-MODULE_DESCRIPTION("XPower AXP288 ACPI operation region driver");
-MODULE_LICENSE("GPL");
+device_initcall(intel_xpower_pmic_opregion_driver_init);
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 33a38d604630..9125d7d96372 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -108,13 +108,12 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
 	return -EINVAL;
 }
 
-static phys_cpuid_t map_madt_entry(int type, u32 acpi_id)
+static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
+				   int type, u32 acpi_id)
 {
 	unsigned long madt_end, entry;
 	phys_cpuid_t phys_id = PHYS_CPUID_INVALID;	/* CPU hardware ID */
-	struct acpi_table_madt *madt;
 
-	madt = get_madt_table();
 	if (!madt)
 		return phys_id;
 
@@ -145,6 +144,25 @@ static phys_cpuid_t map_madt_entry(int type, u32 acpi_id)
 	return phys_id;
 }
 
+phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id)
+{
+	struct acpi_table_madt *madt = NULL;
+	acpi_size tbl_size;
+	phys_cpuid_t rv;
+
+	acpi_get_table_with_size(ACPI_SIG_MADT, 0,
+				 (struct acpi_table_header **)&madt,
+				 &tbl_size);
+	if (!madt)
+		return PHYS_CPUID_INVALID;
+
+	rv = map_madt_entry(madt, 1, acpi_id);
+
+	early_acpi_os_unmap_memory(madt, tbl_size);
+
+	return rv;
+}
+
 static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -185,7 +203,7 @@ phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
 
 	phys_id = map_mat_entry(handle, type, acpi_id);
 	if (invalid_phys_cpuid(phys_id))
-		phys_id = map_madt_entry(type, acpi_id);
+		phys_id = map_madt_entry(get_madt_table(), type, acpi_id);
 
 	return phys_id;
 }
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index d2fa8cb82d2b..0ca14ac7bb28 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -90,7 +90,7 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data)
 						  pr->performance_platform_limit);
 		break;
 	case ACPI_PROCESSOR_NOTIFY_POWER:
-		acpi_processor_cst_has_changed(pr);
+		acpi_processor_power_state_has_changed(pr);
 		acpi_bus_generate_netlink_event(device->pnp.device_class,
 						  dev_name(&device->dev), event, 0);
 		break;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 444e3745c8b3..cea52528aa18 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -59,6 +59,12 @@ module_param(latency_factor, uint, 0644);
 
 static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device);
 
+struct cpuidle_driver acpi_idle_driver = {
+	.name =		"acpi_idle",
+	.owner =	THIS_MODULE,
+};
+
+#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
 static
 DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
 
@@ -297,7 +303,6 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *cst;
 
-
 	if (nocst)
 		return -ENODEV;
 
@@ -570,7 +575,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
 	return (working);
 }
 
-static int acpi_processor_get_power_info(struct acpi_processor *pr)
+static int acpi_processor_get_cstate_info(struct acpi_processor *pr)
 {
 	unsigned int i;
 	int result;
@@ -804,36 +809,12 @@ static void acpi_idle_enter_freeze(struct cpuidle_device *dev,
 	acpi_idle_do_entry(cx);
 }
 
-struct cpuidle_driver acpi_idle_driver = {
-	.name =		"acpi_idle",
-	.owner =	THIS_MODULE,
-};
-
-/**
- * acpi_processor_setup_cpuidle_cx - prepares and configures CPUIDLE
- * device i.e. per-cpu data
- *
- * @pr: the ACPI processor
- * @dev : the cpuidle device
- */
 static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
 					   struct cpuidle_device *dev)
 {
 	int i, count = CPUIDLE_DRIVER_STATE_START;
 	struct acpi_processor_cx *cx;
 
-	if (!pr->flags.power_setup_done)
-		return -EINVAL;
-
-	if (pr->flags.power == 0) {
-		return -EINVAL;
-	}
-
-	if (!dev)
-		return -EINVAL;
-
-	dev->cpu = pr->id;
-
 	if (max_cstate == 0)
 		max_cstate = 1;
 
@@ -856,31 +837,13 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
 	return 0;
 }
 
-/**
- * acpi_processor_setup_cpuidle states- prepares and configures cpuidle
- * global state data i.e. idle routines
- *
- * @pr: the ACPI processor
- */
-static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
+static int acpi_processor_setup_cstates(struct acpi_processor *pr)
 {
 	int i, count = CPUIDLE_DRIVER_STATE_START;
 	struct acpi_processor_cx *cx;
 	struct cpuidle_state *state;
 	struct cpuidle_driver *drv = &acpi_idle_driver;
 
-	if (!pr->flags.power_setup_done)
-		return -EINVAL;
-
-	if (pr->flags.power == 0)
-		return -EINVAL;
-
-	drv->safe_state_index = -1;
-	for (i = CPUIDLE_DRIVER_STATE_START; i < CPUIDLE_STATE_MAX; i++) {
-		drv->states[i].name[0] = '\0';
-		drv->states[i].desc[0] = '\0';
-	}
-
 	if (max_cstate == 0)
 		max_cstate = 1;
 
@@ -892,7 +855,7 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
 
 		state = &drv->states[count];
 		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
-		strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
+		strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
 		state->exit_latency = cx->latency;
 		state->target_residency = cx->latency * latency_factor;
 		state->enter = acpi_idle_enter;
@@ -925,6 +888,450 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
 	return 0;
 }
 
+static inline void acpi_processor_cstate_first_run_checks(void)
+{
+	acpi_status status;
+	static int first_run;
+
+	if (first_run)
+		return;
+	dmi_check_system(processor_power_dmi_table);
+	max_cstate = acpi_processor_cstate_check(max_cstate);
+	if (max_cstate < ACPI_C_STATES_MAX)
+		pr_notice("ACPI: processor limited to max C-state %d\n",
+			  max_cstate);
+	first_run++;
+
+	if (acpi_gbl_FADT.cst_control && !nocst) {
+		status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
+					    acpi_gbl_FADT.cst_control, 8);
+		if (ACPI_FAILURE(status))
+			ACPI_EXCEPTION((AE_INFO, status,
+					"Notifying BIOS of _CST ability failed"));
+	}
+}
+#else
+
+static inline int disabled_by_idle_boot_param(void) { return 0; }
+static inline void acpi_processor_cstate_first_run_checks(void) { }
+static int acpi_processor_get_cstate_info(struct acpi_processor *pr)
+{
+	return -ENODEV;
+}
+
+static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
+					   struct cpuidle_device *dev)
+{
+	return -EINVAL;
+}
+
+static int acpi_processor_setup_cstates(struct acpi_processor *pr)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_ACPI_PROCESSOR_CSTATE */
+
+struct acpi_lpi_states_array {
+	unsigned int size;
+	unsigned int composite_states_size;
+	struct acpi_lpi_state *entries;
+	struct acpi_lpi_state *composite_states[ACPI_PROCESSOR_MAX_POWER];
+};
+
+static int obj_get_integer(union acpi_object *obj, u32 *value)
+{
+	if (obj->type != ACPI_TYPE_INTEGER)
+		return -EINVAL;
+
+	*value = obj->integer.value;
+	return 0;
+}
+
+static int acpi_processor_evaluate_lpi(acpi_handle handle,
+				       struct acpi_lpi_states_array *info)
+{
+	acpi_status status;
+	int ret = 0;
+	int pkg_count, state_idx = 1, loop;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *lpi_data;
+	struct acpi_lpi_state *lpi_state;
+
+	status = acpi_evaluate_object(handle, "_LPI", NULL, &buffer);
+	if (ACPI_FAILURE(status)) {
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _LPI, giving up\n"));
+		return -ENODEV;
+	}
+
+	lpi_data = buffer.pointer;
+
+	/* There must be at least 4 elements = 3 elements + 1 package */
+	if (!lpi_data || lpi_data->type != ACPI_TYPE_PACKAGE ||
+	    lpi_data->package.count < 4) {
+		pr_debug("not enough elements in _LPI\n");
+		ret = -ENODATA;
+		goto end;
+	}
+
+	pkg_count = lpi_data->package.elements[2].integer.value;
+
+	/* Validate number of power states. */
+	if (pkg_count < 1 || pkg_count != lpi_data->package.count - 3) {
+		pr_debug("count given by _LPI is not valid\n");
+		ret = -ENODATA;
+		goto end;
+	}
+
+	lpi_state = kcalloc(pkg_count, sizeof(*lpi_state), GFP_KERNEL);
+	if (!lpi_state) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	info->size = pkg_count;
+	info->entries = lpi_state;
+
+	/* LPI States start at index 3 */
+	for (loop = 3; state_idx <= pkg_count; loop++, state_idx++, lpi_state++) {
+		union acpi_object *element, *pkg_elem, *obj;
+
+		element = &lpi_data->package.elements[loop];
+		if (element->type != ACPI_TYPE_PACKAGE || element->package.count < 7)
+			continue;
+
+		pkg_elem = element->package.elements;
+
+		obj = pkg_elem + 6;
+		if (obj->type == ACPI_TYPE_BUFFER) {
+			struct acpi_power_register *reg;
+
+			reg = (struct acpi_power_register *)obj->buffer.pointer;
+			if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
+			    reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)
+				continue;
+
+			lpi_state->address = reg->address;
+			lpi_state->entry_method =
+				reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE ?
+				ACPI_CSTATE_FFH : ACPI_CSTATE_SYSTEMIO;
+		} else if (obj->type == ACPI_TYPE_INTEGER) {
+			lpi_state->entry_method = ACPI_CSTATE_INTEGER;
+			lpi_state->address = obj->integer.value;
+		} else {
+			continue;
+		}
+
+		/* elements[7,8] skipped for now i.e. Residency/Usage counter*/
+
+		obj = pkg_elem + 9;
+		if (obj->type == ACPI_TYPE_STRING)
+			strlcpy(lpi_state->desc, obj->string.pointer,
+				ACPI_CX_DESC_LEN);
+
+		lpi_state->index = state_idx;
+		if (obj_get_integer(pkg_elem + 0, &lpi_state->min_residency)) {
+			pr_debug("No min. residency found, assuming 10 us\n");
+			lpi_state->min_residency = 10;
+		}
+
+		if (obj_get_integer(pkg_elem + 1, &lpi_state->wake_latency)) {
+			pr_debug("No wakeup residency found, assuming 10 us\n");
+			lpi_state->wake_latency = 10;
+		}
+
+		if (obj_get_integer(pkg_elem + 2, &lpi_state->flags))
+			lpi_state->flags = 0;
+
+		if (obj_get_integer(pkg_elem + 3, &lpi_state->arch_flags))
+			lpi_state->arch_flags = 0;
+
+		if (obj_get_integer(pkg_elem + 4, &lpi_state->res_cnt_freq))
+			lpi_state->res_cnt_freq = 1;
+
+		if (obj_get_integer(pkg_elem + 5, &lpi_state->enable_parent_state))
+			lpi_state->enable_parent_state = 0;
+	}
+
+	acpi_handle_debug(handle, "Found %d power states\n", state_idx);
+end:
+	kfree(buffer.pointer);
+	return ret;
+}
+
+/*
+ * flat_state_cnt - the number of composite LPI states after the process of flattening
+ */
+static int flat_state_cnt;
+
+/**
+ * combine_lpi_states - combine local and parent LPI states to form a composite LPI state
+ *
+ * @local: local LPI state
+ * @parent: parent LPI state
+ * @result: composite LPI state
+ */
+static bool combine_lpi_states(struct acpi_lpi_state *local,
+			       struct acpi_lpi_state *parent,
+			       struct acpi_lpi_state *result)
+{
+	if (parent->entry_method == ACPI_CSTATE_INTEGER) {
+		if (!parent->address) /* 0 means autopromotable */
+			return false;
+		result->address = local->address + parent->address;
+	} else {
+		result->address = parent->address;
+	}
+
+	result->min_residency = max(local->min_residency, parent->min_residency);
+	result->wake_latency = local->wake_latency + parent->wake_latency;
+	result->enable_parent_state = parent->enable_parent_state;
+	result->entry_method = local->entry_method;
+
+	result->flags = parent->flags;
+	result->arch_flags = parent->arch_flags;
+	result->index = parent->index;
+
+	strlcpy(result->desc, local->desc, ACPI_CX_DESC_LEN);
+	strlcat(result->desc, "+", ACPI_CX_DESC_LEN);
+	strlcat(result->desc, parent->desc, ACPI_CX_DESC_LEN);
+	return true;
+}
+
+#define ACPI_LPI_STATE_FLAGS_ENABLED			BIT(0)
+
+static void stash_composite_state(struct acpi_lpi_states_array *curr_level,
+				  struct acpi_lpi_state *t)
+{
+	curr_level->composite_states[curr_level->composite_states_size++] = t;
+}
+
+static int flatten_lpi_states(struct acpi_processor *pr,
+			      struct acpi_lpi_states_array *curr_level,
+			      struct acpi_lpi_states_array *prev_level)
+{
+	int i, j, state_count = curr_level->size;
+	struct acpi_lpi_state *p, *t = curr_level->entries;
+
+	curr_level->composite_states_size = 0;
+	for (j = 0; j < state_count; j++, t++) {
+		struct acpi_lpi_state *flpi;
+
+		if (!(t->flags & ACPI_LPI_STATE_FLAGS_ENABLED))
+			continue;
+
+		if (flat_state_cnt >= ACPI_PROCESSOR_MAX_POWER) {
+			pr_warn("Limiting number of LPI states to max (%d)\n",
+				ACPI_PROCESSOR_MAX_POWER);
+			pr_warn("Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
+			break;
+		}
+
+		flpi = &pr->power.lpi_states[flat_state_cnt];
+
+		if (!prev_level) { /* leaf/processor node */
+			memcpy(flpi, t, sizeof(*t));
+			stash_composite_state(curr_level, flpi);
+			flat_state_cnt++;
+			continue;
+		}
+
+		for (i = 0; i < prev_level->composite_states_size; i++) {
+			p = prev_level->composite_states[i];
+			if (t->index <= p->enable_parent_state &&
+			    combine_lpi_states(p, t, flpi)) {
+				stash_composite_state(curr_level, flpi);
+				flat_state_cnt++;
+				flpi++;
+			}
+		}
+	}
+
+	kfree(curr_level->entries);
+	return 0;
+}
+
+static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
+{
+	int ret, i;
+	acpi_status status;
+	acpi_handle handle = pr->handle, pr_ahandle;
+	struct acpi_device *d = NULL;
+	struct acpi_lpi_states_array info[2], *tmp, *prev, *curr;
+
+	if (!osc_pc_lpi_support_confirmed)
+		return -EOPNOTSUPP;
+
+	if (!acpi_has_method(handle, "_LPI"))
+		return -EINVAL;
+
+	flat_state_cnt = 0;
+	prev = &info[0];
+	curr = &info[1];
+	handle = pr->handle;
+	ret = acpi_processor_evaluate_lpi(handle, prev);
+	if (ret)
+		return ret;
+	flatten_lpi_states(pr, prev, NULL);
+
+	status = acpi_get_parent(handle, &pr_ahandle);
+	while (ACPI_SUCCESS(status)) {
+		acpi_bus_get_device(pr_ahandle, &d);
+		handle = pr_ahandle;
+
+		if (strcmp(acpi_device_hid(d), ACPI_PROCESSOR_CONTAINER_HID))
+			break;
+
+		/* can be optional ? */
+		if (!acpi_has_method(handle, "_LPI"))
+			break;
+
+		ret = acpi_processor_evaluate_lpi(handle, curr);
+		if (ret)
+			break;
+
+		/* flatten all the LPI states in this level of hierarchy */
+		flatten_lpi_states(pr, curr, prev);
+
+		tmp = prev, prev = curr, curr = tmp;
+
+		status = acpi_get_parent(handle, &pr_ahandle);
+	}
+
+	pr->power.count = flat_state_cnt;
+	/* reset the index after flattening */
+	for (i = 0; i < pr->power.count; i++)
+		pr->power.lpi_states[i].index = i;
+
+	/* Tell driver that _LPI is supported. */
+	pr->flags.has_lpi = 1;
+	pr->flags.power = 1;
+
+	return 0;
+}
+
+int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
+{
+	return -ENODEV;
+}
+
+int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
+{
+	return -ENODEV;
+}
+
+/**
+ * acpi_idle_lpi_enter - enters an ACPI any LPI state
+ * @dev: the target CPU
+ * @drv: cpuidle driver containing cpuidle state info
+ * @index: index of target state
+ *
+ * Return: 0 for success or negative value for error
+ */
+static int acpi_idle_lpi_enter(struct cpuidle_device *dev,
+			       struct cpuidle_driver *drv, int index)
+{
+	struct acpi_processor *pr;
+	struct acpi_lpi_state *lpi;
+
+	pr = __this_cpu_read(processors);
+
+	if (unlikely(!pr))
+		return -EINVAL;
+
+	lpi = &pr->power.lpi_states[index];
+	if (lpi->entry_method == ACPI_CSTATE_FFH)
+		return acpi_processor_ffh_lpi_enter(lpi);
+
+	return -EINVAL;
+}
+
+static int acpi_processor_setup_lpi_states(struct acpi_processor *pr)
+{
+	int i;
+	struct acpi_lpi_state *lpi;
+	struct cpuidle_state *state;
+	struct cpuidle_driver *drv = &acpi_idle_driver;
+
+	if (!pr->flags.has_lpi)
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < pr->power.count && i < CPUIDLE_STATE_MAX; i++) {
+		lpi = &pr->power.lpi_states[i];
+
+		state = &drv->states[i];
+		snprintf(state->name, CPUIDLE_NAME_LEN, "LPI-%d", i);
+		strlcpy(state->desc, lpi->desc, CPUIDLE_DESC_LEN);
+		state->exit_latency = lpi->wake_latency;
+		state->target_residency = lpi->min_residency;
+		if (lpi->arch_flags)
+			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+		state->enter = acpi_idle_lpi_enter;
+		drv->safe_state_index = i;
+	}
+
+	drv->state_count = i;
+
+	return 0;
+}
+
+/**
+ * acpi_processor_setup_cpuidle_states- prepares and configures cpuidle
+ * global state data i.e. idle routines
+ *
+ * @pr: the ACPI processor
+ */
+static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
+{
+	int i;
+	struct cpuidle_driver *drv = &acpi_idle_driver;
+
+	if (!pr->flags.power_setup_done || !pr->flags.power)
+		return -EINVAL;
+
+	drv->safe_state_index = -1;
+	for (i = CPUIDLE_DRIVER_STATE_START; i < CPUIDLE_STATE_MAX; i++) {
+		drv->states[i].name[0] = '\0';
+		drv->states[i].desc[0] = '\0';
+	}
+
+	if (pr->flags.has_lpi)
+		return acpi_processor_setup_lpi_states(pr);
+
+	return acpi_processor_setup_cstates(pr);
+}
+
+/**
+ * acpi_processor_setup_cpuidle_dev - prepares and configures CPUIDLE
+ * device i.e. per-cpu data
+ *
+ * @pr: the ACPI processor
+ * @dev : the cpuidle device
+ */
+static int acpi_processor_setup_cpuidle_dev(struct acpi_processor *pr,
+					    struct cpuidle_device *dev)
+{
+	if (!pr->flags.power_setup_done || !pr->flags.power || !dev)
+		return -EINVAL;
+
+	dev->cpu = pr->id;
+	if (pr->flags.has_lpi)
+		return acpi_processor_ffh_lpi_probe(pr->id);
+
+	return acpi_processor_setup_cpuidle_cx(pr, dev);
+}
+
+static int acpi_processor_get_power_info(struct acpi_processor *pr)
+{
+	int ret;
+
+	ret = acpi_processor_get_lpi_info(pr);
+	if (ret)
+		ret = acpi_processor_get_cstate_info(pr);
+
+	return ret;
+}
+
 int acpi_processor_hotplug(struct acpi_processor *pr)
 {
 	int ret = 0;
@@ -933,18 +1340,15 @@ int acpi_processor_hotplug(struct acpi_processor *pr)
 	if (disabled_by_idle_boot_param())
 		return 0;
 
-	if (nocst)
-		return -ENODEV;
-
 	if (!pr->flags.power_setup_done)
 		return -ENODEV;
 
 	dev = per_cpu(acpi_cpuidle_device, pr->id);
 	cpuidle_pause_and_lock();
 	cpuidle_disable_device(dev);
-	acpi_processor_get_power_info(pr);
-	if (pr->flags.power) {
-		acpi_processor_setup_cpuidle_cx(pr, dev);
+	ret = acpi_processor_get_power_info(pr);
+	if (!ret && pr->flags.power) {
+		acpi_processor_setup_cpuidle_dev(pr, dev);
 		ret = cpuidle_enable_device(dev);
 	}
 	cpuidle_resume_and_unlock();
@@ -952,7 +1356,7 @@ int acpi_processor_hotplug(struct acpi_processor *pr)
 	return ret;
 }
 
-int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+int acpi_processor_power_state_has_changed(struct acpi_processor *pr)
 {
 	int cpu;
 	struct acpi_processor *_pr;
@@ -961,9 +1365,6 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 	if (disabled_by_idle_boot_param())
 		return 0;
 
-	if (nocst)
-		return -ENODEV;
-
 	if (!pr->flags.power_setup_done)
 		return -ENODEV;
 
@@ -1000,7 +1401,7 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 			acpi_processor_get_power_info(_pr);
 			if (_pr->flags.power) {
 				dev = per_cpu(acpi_cpuidle_device, cpu);
-				acpi_processor_setup_cpuidle_cx(_pr, dev);
+				acpi_processor_setup_cpuidle_dev(_pr, dev);
 				cpuidle_enable_device(dev);
 			}
 		}
@@ -1015,35 +1416,16 @@ static int acpi_processor_registered;
 
 int acpi_processor_power_init(struct acpi_processor *pr)
 {
-	acpi_status status;
 	int retval;
 	struct cpuidle_device *dev;
-	static int first_run;
 
 	if (disabled_by_idle_boot_param())
 		return 0;
 
-	if (!first_run) {
-		dmi_check_system(processor_power_dmi_table);
-		max_cstate = acpi_processor_cstate_check(max_cstate);
-		if (max_cstate < ACPI_C_STATES_MAX)
-			printk(KERN_NOTICE
-			       "ACPI: processor limited to max C-state %d\n",
-			       max_cstate);
-		first_run++;
-	}
-
-	if (acpi_gbl_FADT.cst_control && !nocst) {
-		status =
-		    acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8);
-		if (ACPI_FAILURE(status)) {
-			ACPI_EXCEPTION((AE_INFO, status,
-					"Notifying BIOS of _CST ability failed"));
-		}
-	}
+	acpi_processor_cstate_first_run_checks();
 
-	acpi_processor_get_power_info(pr);
-	pr->flags.power_setup_done = 1;
+	if (!acpi_processor_get_power_info(pr))
+		pr->flags.power_setup_done = 1;
 
 	/*
 	 * Install the idle handler if processor power management is supported.
@@ -1066,7 +1448,7 @@ int acpi_processor_power_init(struct acpi_processor *pr)
 			return -ENOMEM;
 		per_cpu(acpi_cpuidle_device, pr->id) = dev;
 
-		acpi_processor_setup_cpuidle_cx(pr, dev);
+		acpi_processor_setup_cpuidle_dev(pr, dev);
 
 		/* Register per-cpu cpuidle_device. Cpuidle driver
 		 * must already be registered before registering device
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 5f28cf778349..ad9fc84a8601 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -46,6 +46,13 @@ DEFINE_MUTEX(acpi_device_lock);
 LIST_HEAD(acpi_wakeup_device_list);
 static DEFINE_MUTEX(acpi_hp_context_lock);
 
+/*
+ * The UART device described by the SPCR table is the only object which needs
+ * special-casing. Everything else is covered by ACPI namespace paths in STAO
+ * table.
+ */
+static u64 spcr_uart_addr;
+
 struct acpi_dep_data {
 	struct list_head node;
 	acpi_handle master;
@@ -494,6 +501,8 @@ static void acpi_device_del(struct acpi_device *device)
 	device_del(&device->dev);
 }
 
+static BLOCKING_NOTIFIER_HEAD(acpi_reconfig_chain);
+
 static LIST_HEAD(acpi_device_del_list);
 static DEFINE_MUTEX(acpi_device_del_lock);
 
@@ -514,6 +523,9 @@ static void acpi_device_del_work_fn(struct work_struct *work_not_used)
 
 		mutex_unlock(&acpi_device_del_lock);
 
+		blocking_notifier_call_chain(&acpi_reconfig_chain,
+					     ACPI_RECONFIG_DEVICE_REMOVE, adev);
+
 		acpi_device_del(adev);
 		/*
 		 * Drop references to all power resources that might have been
@@ -1406,7 +1418,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	acpi_bus_get_flags(device);
 	device->flags.match_driver = false;
 	device->flags.initialized = true;
-	device->flags.visited = false;
+	acpi_device_clear_enumerated(device);
 	device_initialize(&device->dev);
 	dev_set_uevent_suppress(&device->dev, true);
 	acpi_init_coherency(device);
@@ -1453,6 +1465,41 @@ static int acpi_add_single_object(struct acpi_device **child,
 	return 0;
 }
 
+static acpi_status acpi_get_resource_memory(struct acpi_resource *ares,
+					    void *context)
+{
+	struct resource *res = context;
+
+	if (acpi_dev_resource_memory(ares, res))
+		return AE_CTRL_TERMINATE;
+
+	return AE_OK;
+}
+
+static bool acpi_device_should_be_hidden(acpi_handle handle)
+{
+	acpi_status status;
+	struct resource res;
+
+	/* Check if it should ignore the UART device */
+	if (!(spcr_uart_addr && acpi_has_method(handle, METHOD_NAME__CRS)))
+		return false;
+
+	/*
+	 * The UART device described in SPCR table is assumed to have only one
+	 * memory resource present. So we only look for the first one here.
+	 */
+	status = acpi_walk_resources(handle, METHOD_NAME__CRS,
+				     acpi_get_resource_memory, &res);
+	if (ACPI_FAILURE(status) || res.start != spcr_uart_addr)
+		return false;
+
+	acpi_handle_info(handle, "The UART device @%pa in SPCR table will be hidden\n",
+			 &res.start);
+
+	return true;
+}
+
 static int acpi_bus_type_and_status(acpi_handle handle, int *type,
 				    unsigned long long *sta)
 {
@@ -1466,6 +1513,9 @@ static int acpi_bus_type_and_status(acpi_handle handle, int *type,
 	switch (acpi_type) {
 	case ACPI_TYPE_ANY:		/* for ACPI_ROOT_OBJECT */
 	case ACPI_TYPE_DEVICE:
+		if (acpi_device_should_be_hidden(handle))
+			return -ENODEV;
+
 		*type = ACPI_BUS_TYPE_DEVICE;
 		status = acpi_bus_get_status_handle(handle, sta);
 		if (ACPI_FAILURE(status))
@@ -1676,15 +1726,20 @@ static void acpi_default_enumeration(struct acpi_device *device)
 	bool is_spi_i2c_slave = false;
 
 	/*
-	 * Do not enemerate SPI/I2C slaves as they will be enuerated by their
+	 * Do not enumerate SPI/I2C slaves as they will be enumerated by their
 	 * respective parents.
 	 */
 	INIT_LIST_HEAD(&resource_list);
 	acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
 			       &is_spi_i2c_slave);
 	acpi_dev_free_resource_list(&resource_list);
-	if (!is_spi_i2c_slave)
+	if (!is_spi_i2c_slave) {
 		acpi_create_platform_device(device);
+		acpi_device_set_enumerated(device);
+	} else {
+		blocking_notifier_call_chain(&acpi_reconfig_chain,
+					     ACPI_RECONFIG_DEVICE_ADD, device);
+	}
 }
 
 static const struct acpi_device_id generic_device_ids[] = {
@@ -1751,7 +1806,7 @@ static void acpi_bus_attach(struct acpi_device *device)
 	acpi_bus_get_status(device);
 	/* Skip devices that are not present. */
 	if (!acpi_device_is_present(device)) {
-		device->flags.visited = false;
+		acpi_device_clear_enumerated(device);
 		device->flags.power_manageable = 0;
 		return;
 	}
@@ -1766,7 +1821,7 @@ static void acpi_bus_attach(struct acpi_device *device)
 
 		device->flags.initialized = true;
 	}
-	device->flags.visited = false;
+
 	ret = acpi_scan_attach_handler(device);
 	if (ret < 0)
 		return;
@@ -1780,7 +1835,6 @@ static void acpi_bus_attach(struct acpi_device *device)
 		if (!ret && device->pnp.type.platform_id)
 			acpi_default_enumeration(device);
 	}
-	device->flags.visited = true;
 
  ok:
 	list_for_each_entry(child, &device->children, node)
@@ -1872,7 +1926,7 @@ void acpi_bus_trim(struct acpi_device *adev)
 	 */
 	acpi_device_set_power(adev, ACPI_STATE_D3_COLD);
 	adev->flags.initialized = false;
-	adev->flags.visited = false;
+	acpi_device_clear_enumerated(adev);
 }
 EXPORT_SYMBOL_GPL(acpi_bus_trim);
 
@@ -1916,9 +1970,26 @@ static int acpi_bus_scan_fixed(void)
 	return result < 0 ? result : 0;
 }
 
+static void __init acpi_get_spcr_uart_addr(void)
+{
+	acpi_status status;
+	struct acpi_table_spcr *spcr_ptr;
+
+	status = acpi_get_table(ACPI_SIG_SPCR, 0,
+				(struct acpi_table_header **)&spcr_ptr);
+	if (ACPI_SUCCESS(status))
+		spcr_uart_addr = spcr_ptr->serial_port.address;
+	else
+		printk(KERN_WARNING PREFIX "STAO table present, but SPCR is missing\n");
+}
+
+static bool acpi_scan_initialized;
+
 int __init acpi_scan_init(void)
 {
 	int result;
+	acpi_status status;
+	struct acpi_table_stao *stao_ptr;
 
 	acpi_pci_root_init();
 	acpi_pci_link_init();
@@ -1934,6 +2005,20 @@ int __init acpi_scan_init(void)
 
 	acpi_scan_add_handler(&generic_device_handler);
 
+	/*
+	 * If there is STAO table, check whether it needs to ignore the UART
+	 * device in SPCR table.
+	 */
+	status = acpi_get_table(ACPI_SIG_STAO, 0,
+				(struct acpi_table_header **)&stao_ptr);
+	if (ACPI_SUCCESS(status)) {
+		if (stao_ptr->header.length > sizeof(struct acpi_table_stao))
+			printk(KERN_INFO PREFIX "STAO Name List not yet supported.");
+
+		if (stao_ptr->ignore_uart)
+			acpi_get_spcr_uart_addr();
+	}
+
 	mutex_lock(&acpi_scan_lock);
 	/*
 	 * Enumerate devices in the ACPI namespace.
@@ -1960,6 +2045,8 @@ int __init acpi_scan_init(void)
 
 	acpi_update_all_gpes();
 
+	acpi_scan_initialized = true;
+
  out:
 	mutex_unlock(&acpi_scan_lock);
 	return result;
@@ -2003,3 +2090,57 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr)
 
 	return count;
 }
+
+struct acpi_table_events_work {
+	struct work_struct work;
+	void *table;
+	u32 event;
+};
+
+static void acpi_table_events_fn(struct work_struct *work)
+{
+	struct acpi_table_events_work *tew;
+
+	tew = container_of(work, struct acpi_table_events_work, work);
+
+	if (tew->event == ACPI_TABLE_EVENT_LOAD) {
+		acpi_scan_lock_acquire();
+		acpi_bus_scan(ACPI_ROOT_OBJECT);
+		acpi_scan_lock_release();
+	}
+
+	kfree(tew);
+}
+
+void acpi_scan_table_handler(u32 event, void *table, void *context)
+{
+	struct acpi_table_events_work *tew;
+
+	if (!acpi_scan_initialized)
+		return;
+
+	if (event != ACPI_TABLE_EVENT_LOAD)
+		return;
+
+	tew = kmalloc(sizeof(*tew), GFP_KERNEL);
+	if (!tew)
+		return;
+
+	INIT_WORK(&tew->work, acpi_table_events_fn);
+	tew->table = table;
+	tew->event = event;
+
+	schedule_work(&tew->work);
+}
+
+int acpi_reconfig_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&acpi_reconfig_chain, nb);
+}
+EXPORT_SYMBOL(acpi_reconfig_notifier_register);
+
+int acpi_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&acpi_reconfig_chain, nb);
+}
+EXPORT_SYMBOL(acpi_reconfig_notifier_unregister);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 7a2e4d45b266..2b38c1bb0446 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -47,15 +47,32 @@ static void acpi_sleep_tts_switch(u32 acpi_state)
 	}
 }
 
-static int tts_notify_reboot(struct notifier_block *this,
+static void acpi_sleep_pts_switch(u32 acpi_state)
+{
+	acpi_status status;
+
+	status = acpi_execute_simple_method(NULL, "\\_PTS", acpi_state);
+	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+		/*
+		 * OS can't evaluate the _PTS object correctly. Some warning
+		 * message will be printed. But it won't break anything.
+		 */
+		printk(KERN_NOTICE "Failure in evaluating _PTS object\n");
+	}
+}
+
+static int sleep_notify_reboot(struct notifier_block *this,
 			unsigned long code, void *x)
 {
 	acpi_sleep_tts_switch(ACPI_STATE_S5);
+
+	acpi_sleep_pts_switch(ACPI_STATE_S5);
+
 	return NOTIFY_DONE;
 }
 
-static struct notifier_block tts_notifier = {
-	.notifier_call	= tts_notify_reboot,
+static struct notifier_block sleep_notifier = {
+	.notifier_call	= sleep_notify_reboot,
 	.next		= NULL,
 	.priority	= 0,
 };
@@ -899,9 +916,9 @@ int __init acpi_sleep_init(void)
 	pr_info(PREFIX "(supports%s)\n", supported);
 
 	/*
-	 * Register the tts_notifier to reboot notifier list so that the _TTS
-	 * object can also be evaluated when the system enters S5.
+	 * Register the sleep_notifier to reboot notifier list so that the _TTS
+	 * and _PTS object can also be evaluated when the system enters S5.
 	 */
-	register_reboot_notifier(&tts_notifier);
+	register_reboot_notifier(&sleep_notifier);
 	return 0;
 }
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 4b3a9e27f1b6..358165e9f5b8 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -378,8 +378,7 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
 	return;
 }
 
-static acpi_status
-acpi_sysfs_table_handler(u32 event, void *table, void *context)
+acpi_status acpi_sysfs_table_handler(u32 event, void *table, void *context)
 {
 	struct acpi_table_attr *table_attr;
 
@@ -452,9 +451,8 @@ static int acpi_tables_sysfs_init(void)
 
 	kobject_uevent(tables_kobj, KOBJ_ADD);
 	kobject_uevent(dynamic_tables_kobj, KOBJ_ADD);
-	status = acpi_install_table_handler(acpi_sysfs_table_handler, NULL);
 
-	return ACPI_FAILURE(status) ? -EINVAL : 0;
+	return 0;
 err_dynamic_tables:
 	kobject_put(tables_kobj);
 err:
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index a372f9eaa15d..9f0ad6ebb368 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -34,6 +34,8 @@
 #include <linux/bootmem.h>
 #include <linux/earlycpio.h>
 #include <linux/memblock.h>
+#include <linux/initrd.h>
+#include <linux/acpi.h>
 #include "internal.h"
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -481,8 +483,10 @@ static DECLARE_BITMAP(acpi_initrd_installed, NR_ACPI_INITRD_TABLES);
 
 #define MAP_CHUNK_SIZE   (NR_FIX_BTMAPS << PAGE_SHIFT)
 
-static void __init acpi_table_initrd_init(void *data, size_t size)
+void __init acpi_table_upgrade(void)
 {
+	void *data = (void *)initrd_start;
+	size_t size = initrd_end - initrd_start;
 	int sig, no, table_nr = 0, total_offset = 0;
 	long offset = 0;
 	struct acpi_table_header *table;
@@ -540,7 +544,7 @@ static void __init acpi_table_initrd_init(void *data, size_t size)
 		return;
 
 	acpi_tables_addr =
-		memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT,
+		memblock_find_in_range(0, ACPI_TABLE_UPGRADE_MAX_PHYS,
 				       all_tables_size, PAGE_SIZE);
 	if (!acpi_tables_addr) {
 		WARN_ON(1);
@@ -578,10 +582,10 @@ static void __init acpi_table_initrd_init(void *data, size_t size)
 			clen = size;
 			if (clen > MAP_CHUNK_SIZE - slop)
 				clen = MAP_CHUNK_SIZE - slop;
-			dest_p = early_ioremap(dest_addr & PAGE_MASK,
-						 clen + slop);
+			dest_p = early_memremap(dest_addr & PAGE_MASK,
+						clen + slop);
 			memcpy(dest_p + slop, src_p, clen);
-			early_iounmap(dest_p, clen + slop);
+			early_memunmap(dest_p, clen + slop);
 			src_p += clen;
 			dest_addr += clen;
 			size -= clen;
@@ -696,10 +700,6 @@ next_table:
 	}
 }
 #else
-static void __init acpi_table_initrd_init(void *data, size_t size)
-{
-}
-
 static acpi_status
 acpi_table_initrd_override(struct acpi_table_header *existing_table,
 			   acpi_physical_address *address,
@@ -742,11 +742,6 @@ acpi_os_table_override(struct acpi_table_header *existing_table,
 	return AE_OK;
 }
 
-void __init early_acpi_table_init(void *data, size_t size)
-{
-	acpi_table_initrd_init(data, size);
-}
-
 /*
  * acpi_table_init()
  *
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 82707f9824ca..f4ebe39539af 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -1259,7 +1259,8 @@ static int __init acpi_thermal_init(void)
 		return -ENODEV;
 	}
 
-	acpi_thermal_pm_queue = create_workqueue("acpi_thermal_pm");
+	acpi_thermal_pm_queue = alloc_workqueue("acpi_thermal_pm",
+						WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
 	if (!acpi_thermal_pm_queue)
 		return -ENODEV;
 
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 3d1327615f72..a6b36fc53aec 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -167,6 +167,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X201s"),
 		},
 	},
+        {
+         .callback = video_detect_force_video,
+         .ident = "ThinkPad X201T",
+         .matches = {
+                DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X201T"),
+                },
+        },
 
 	/* The native backlight controls do not work on some older machines */
 	{
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index e2dc4c045146..2c8be74f401d 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -98,12 +98,12 @@ config SATA_AHCI_PLATFORM
 
 	  If unsure, say N.
 
-config AHCI_BRCMSTB
-	tristate "Broadcom STB AHCI SATA support"
-	depends on ARCH_BRCMSTB || BMIPS_GENERIC
+config AHCI_BRCM
+	tristate "Broadcom AHCI SATA support"
+	depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP
 	help
 	  This option enables support for the AHCI SATA3 controller found on
-	  STB SoC's.
+	  Broadcom SoC's.
 
 	  If unsure, say N.
 
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 0b2afb7e5f35..a46e6b784bda 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SATA_INIC162X)	+= sata_inic162x.o
 obj-$(CONFIG_SATA_SIL24)	+= sata_sil24.o
 obj-$(CONFIG_SATA_DWC)		+= sata_dwc_460ex.o
 obj-$(CONFIG_SATA_HIGHBANK)	+= sata_highbank.o libahci.o
-obj-$(CONFIG_AHCI_BRCMSTB)	+= ahci_brcmstb.o libahci.o libahci_platform.o
+obj-$(CONFIG_AHCI_BRCM)		+= ahci_brcm.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_CEVA)		+= ahci_ceva.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_DA850)	+= ahci_da850.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_IMX)		+= ahci_imx.o libahci.o libahci_platform.o
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index a83bbcc58b4c..90eabaf81215 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -580,7 +580,7 @@ static struct pci_driver ahci_pci_driver = {
 	},
 };
 
-#if defined(CONFIG_PATA_MARVELL) || defined(CONFIG_PATA_MARVELL_MODULE)
+#if IS_ENABLED(CONFIG_PATA_MARVELL)
 static int marvell_enable;
 #else
 static int marvell_enable = 1;
diff --git a/drivers/ata/ahci_brcmstb.c b/drivers/ata/ahci_brcm.c
similarity index 91%
rename from drivers/ata/ahci_brcmstb.c
rename to drivers/ata/ahci_brcm.c
index e87bcec0fd7c..6f8a7341fa08 100644
--- a/drivers/ata/ahci_brcmstb.c
+++ b/drivers/ata/ahci_brcm.c
@@ -71,6 +71,12 @@
 	(DATA_ENDIAN << DMADESC_ENDIAN_SHIFT) |		\
 	(MMIO_ENDIAN << MMIO_ENDIAN_SHIFT))
 
+enum brcm_ahci_version {
+	BRCM_SATA_BCM7425 = 1,
+	BRCM_SATA_BCM7445,
+	BRCM_SATA_NSP,
+};
+
 enum brcm_ahci_quirks {
 	BRCM_AHCI_QUIRK_NO_NCQ		= BIT(0),
 	BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE	= BIT(1),
@@ -81,6 +87,7 @@ struct brcm_ahci_priv {
 	void __iomem *top_ctrl;
 	u32 port_mask;
 	u32 quirks;
+	enum brcm_ahci_version version;
 };
 
 static const struct ata_port_info ahci_brcm_port_info = {
@@ -247,9 +254,19 @@ static u32 brcm_ahci_get_portmask(struct platform_device *pdev,
 
 static void brcm_sata_init(struct brcm_ahci_priv *priv)
 {
+	void __iomem *ctrl = priv->top_ctrl + SATA_TOP_CTRL_BUS_CTRL;
+
 	/* Configure endianness */
-	brcm_sata_writereg(BUS_CTRL_ENDIAN_CONF,
-			   priv->top_ctrl + SATA_TOP_CTRL_BUS_CTRL);
+	if (priv->version ==  BRCM_SATA_NSP) {
+		u32 data = brcm_sata_readreg(ctrl);
+
+		data &= ~((0x03 << DMADATA_ENDIAN_SHIFT) |
+			(0x03 << DMADESC_ENDIAN_SHIFT));
+		data |= (0x02 << DMADATA_ENDIAN_SHIFT) |
+			(0x02 << DMADESC_ENDIAN_SHIFT);
+		brcm_sata_writereg(data, ctrl);
+	} else
+		brcm_sata_writereg(BUS_CTRL_ENDIAN_CONF, ctrl);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -282,8 +299,17 @@ static struct scsi_host_template ahci_platform_sht = {
 	AHCI_SHT(DRV_NAME),
 };
 
+static const struct of_device_id ahci_of_match[] = {
+	{.compatible = "brcm,bcm7425-ahci", .data = (void *)BRCM_SATA_BCM7425},
+	{.compatible = "brcm,bcm7445-ahci", .data = (void *)BRCM_SATA_BCM7445},
+	{.compatible = "brcm,bcm-nsp-ahci", .data = (void *)BRCM_SATA_NSP},
+	{},
+};
+MODULE_DEVICE_TABLE(of, ahci_of_match);
+
 static int brcm_ahci_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *of_id;
 	struct device *dev = &pdev->dev;
 	struct brcm_ahci_priv *priv;
 	struct ahci_host_priv *hpriv;
@@ -293,6 +319,12 @@ static int brcm_ahci_probe(struct platform_device *pdev)
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
+
+	of_id = of_match_node(ahci_of_match, pdev->dev.of_node);
+	if (!of_id)
+		return -ENODEV;
+
+	priv->version = (enum brcm_ahci_version)of_id->data;
 	priv->dev = dev;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "top-ctrl");
@@ -300,7 +332,8 @@ static int brcm_ahci_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->top_ctrl))
 		return PTR_ERR(priv->top_ctrl);
 
-	if (of_device_is_compatible(dev->of_node, "brcm,bcm7425-ahci")) {
+	if ((priv->version == BRCM_SATA_BCM7425) ||
+		(priv->version == BRCM_SATA_NSP)) {
 		priv->quirks |= BRCM_AHCI_QUIRK_NO_NCQ;
 		priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE;
 	}
@@ -354,13 +387,6 @@ static int brcm_ahci_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id ahci_of_match[] = {
-	{.compatible = "brcm,bcm7425-ahci"},
-	{.compatible = "brcm,bcm7445-ahci"},
-	{},
-};
-MODULE_DEVICE_TABLE(of, ahci_of_match);
-
 static SIMPLE_DEV_PM_OPS(ahci_brcm_pm_ops, brcm_ahci_suspend, brcm_ahci_resume);
 
 static struct platform_driver brcm_ahci_driver = {
diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c
index 6e702ab57220..1d31c0c0fc20 100644
--- a/drivers/ata/ahci_seattle.c
+++ b/drivers/ata/ahci_seattle.c
@@ -137,7 +137,7 @@ static const struct ata_port_info *ahci_seattle_get_port_info(
 	u32 val;
 
 	plat_data = devm_kzalloc(dev, sizeof(*plat_data), GFP_KERNEL);
-	if (IS_ERR(plat_data))
+	if (!plat_data)
 		return &ahci_port_info;
 
 	plat_data->sgpio_ctrl = devm_ioremap_resource(dev,
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 71b07198e207..7461a587b39b 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1975,7 +1975,7 @@ unsigned int ahci_qc_issue(struct ata_queued_cmd *qc)
 	 */
 	pp->active_link = qc->dev->link;
 
-	if (qc->tf.protocol == ATA_PROT_NCQ)
+	if (ata_is_ncq(qc->tf.protocol))
 		writel(1 << qc->tag, port_mmio + PORT_SCR_ACT);
 
 	if (pp->fbs_enabled && pp->fbs_last_dev != qc->dev->link->pmp) {
@@ -2392,12 +2392,20 @@ static int ahci_port_start(struct ata_port *ap)
 static void ahci_port_stop(struct ata_port *ap)
 {
 	const char *emsg = NULL;
+	struct ahci_host_priv *hpriv = ap->host->private_data;
+	void __iomem *host_mmio = hpriv->mmio;
 	int rc;
 
 	/* de-initialize port */
 	rc = ahci_deinit_port(ap, &emsg);
 	if (rc)
 		ata_port_warn(ap, "%s (%d)\n", emsg, rc);
+
+	/*
+	 * Clear GHC.IS to prevent stuck INTx after disabling MSI and
+	 * re-enabling INTx.
+	 */
+	writel(1 << ap->port_no, host_mmio + HOST_IRQ_STAT);
 }
 
 void ahci_print_info(struct ata_host *host, const char *scc_s)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2eca572f4df6..223a770f78f3 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1239,7 +1239,7 @@ static int ata_read_native_max_address(struct ata_device *dev, u64 *max_sectors)
 	} else
 		tf.command = ATA_CMD_READ_NATIVE_MAX;
 
-	tf.protocol |= ATA_PROT_NODATA;
+	tf.protocol = ATA_PROT_NODATA;
 	tf.device |= ATA_LBA;
 
 	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
@@ -1298,7 +1298,7 @@ static int ata_set_max_sectors(struct ata_device *dev, u64 new_sectors)
 		tf.device |= (new_sectors >> 24) & 0xf;
 	}
 
-	tf.protocol |= ATA_PROT_NODATA;
+	tf.protocol = ATA_PROT_NODATA;
 	tf.device |= ATA_LBA;
 
 	tf.lbal = (new_sectors >> 0) & 0xff;
@@ -4315,6 +4315,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	 */
 	{ "ST380013AS",		"3.20",		ATA_HORKAGE_MAX_SEC_1024 },
 
+	/*
+	 * Device times out with higher max sects.
+	 * https://bugzilla.kernel.org/show_bug.cgi?id=121671
+	 */
+	{ "LITEON CX1-JB256-HP", NULL,		ATA_HORKAGE_MAX_SEC_1024 },
+
 	/* Devices we expect to fail diagnostics */
 
 	/* Devices where NCQ should be avoided */
@@ -4843,7 +4849,7 @@ int ata_std_qc_defer(struct ata_queued_cmd *qc)
 {
 	struct ata_link *link = qc->dev->link;
 
-	if (qc->tf.protocol == ATA_PROT_NCQ) {
+	if (ata_is_ncq(qc->tf.protocol)) {
 		if (!ata_tag_valid(link->active_tag))
 			return 0;
 	} else {
@@ -5008,7 +5014,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc)
 		ata_sg_clean(qc);
 
 	/* command should be marked inactive atomically with qc completion */
-	if (qc->tf.protocol == ATA_PROT_NCQ) {
+	if (ata_is_ncq(qc->tf.protocol)) {
 		link->sactive &= ~(1 << qc->tag);
 		if (!link->sactive)
 			ap->nr_active_links--;
@@ -5045,7 +5051,7 @@ static void ata_verify_xfer(struct ata_queued_cmd *qc)
 {
 	struct ata_device *dev = qc->dev;
 
-	if (ata_is_nodata(qc->tf.protocol))
+	if (!ata_is_data(qc->tf.protocol))
 		return;
 
 	if ((dev->mwdma_mask || dev->udma_mask) && ata_is_pio(qc->tf.protocol))
@@ -5131,7 +5137,9 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
 		switch (qc->tf.command) {
 		case ATA_CMD_SET_FEATURES:
 			if (qc->tf.feature != SETFEATURES_WC_ON &&
-			    qc->tf.feature != SETFEATURES_WC_OFF)
+			    qc->tf.feature != SETFEATURES_WC_OFF &&
+			    qc->tf.feature != SETFEATURES_RA_ON &&
+			    qc->tf.feature != SETFEATURES_RA_OFF)
 				break;
 			/* fall through */
 		case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 61dc7a99e89a..0e1ec37070d1 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -606,7 +606,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	ata_scsi_port_error_handler(host, ap);
 
 	/* finish or retry handled scmd's and clean up */
-	WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+	WARN_ON(!list_empty(&eh_work_q));
 
 	DPRINTK("EXIT\n");
 }
@@ -2607,9 +2607,13 @@ static void ata_eh_link_report(struct ata_link *link)
 				[DMA_FROM_DEVICE]	= "in",
 			};
 			static const char *prot_str[] = {
+				[ATA_PROT_UNKNOWN]	= "unknown",
+				[ATA_PROT_NODATA]	= "nodata",
 				[ATA_PROT_PIO]		= "pio",
 				[ATA_PROT_DMA]		= "dma",
-				[ATA_PROT_NCQ]		= "ncq",
+				[ATA_PROT_NCQ]		= "ncq dma",
+				[ATA_PROT_NCQ_NODATA]	= "ncq nodata",
+				[ATAPI_PROT_NODATA]	= "nodata",
 				[ATAPI_PROT_PIO]	= "pio",
 				[ATAPI_PROT_DMA]	= "dma",
 			};
@@ -3177,7 +3181,7 @@ static void ata_eh_park_issue_cmd(struct ata_device *dev, int park)
 	}
 
 	tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
-	tf.protocol |= ATA_PROT_NODATA;
+	tf.protocol = ATA_PROT_NODATA;
 	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
 	if (park && (err_mask || tf.lbal != 0xc4)) {
 		ata_dev_err(dev, "head unload failed!\n");
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index bfec66fb26e2..e207b33e4ce9 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -304,7 +304,7 @@ static void ata_scsi_set_invalid_field(struct ata_device *dev,
 				       struct scsi_cmnd *cmd, u16 field, u8 bit)
 {
 	ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x24, 0x0);
-	/* "Invalid field in cbd" */
+	/* "Invalid field in CDB" */
 	scsi_set_sense_field_pointer(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
 				     field, bit, 1);
 }
@@ -1190,7 +1190,7 @@ static int atapi_drain_needed(struct request *rq)
 	if (likely(rq->cmd_type != REQ_TYPE_BLOCK_PC))
 		return 0;
 
-	if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_WRITE))
+	if (!blk_rq_bytes(rq) || op_is_write(req_op(rq)))
 		return 0;
 
 	return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC;
@@ -2075,8 +2075,8 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
 		0x03,
 		0x20,	/* SBC-2 (no version claimed) */
 
-		0x02,
-		0x60	/* SPC-3 (no version claimed) */
+		0x03,
+		0x00	/* SPC-3 (no version claimed) */
 	};
 	const u8 versions_zbc[] = {
 		0x00,
@@ -2097,7 +2097,10 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
 		0,
 		0x5,	/* claim SPC-3 version compatibility */
 		2,
-		95 - 4
+		95 - 4,
+		0,
+		0,
+		2
 	};
 
 	VPRINTK("ENTER\n");
@@ -2109,8 +2112,10 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
 	    (args->dev->link->ap->pflags & ATA_PFLAG_EXTERNAL))
 		hdr[1] |= (1 << 7);
 
-	if (args->dev->class == ATA_DEV_ZAC)
+	if (args->dev->class == ATA_DEV_ZAC) {
 		hdr[0] = TYPE_ZBC;
+		hdr[2] = 0x7; /* claim SPC-5 version compatibility */
+	}
 
 	memcpy(rbuf, hdr, sizeof(hdr));
 	memcpy(&rbuf[8], "ATA     ", 8);
@@ -2314,7 +2319,7 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
 	 * with the unmap bit set.
 	 */
 	if (ata_id_has_trim(args->id)) {
-		put_unaligned_be64(65535 * 512 / 8, &rbuf[36]);
+		put_unaligned_be64(65535 * ATA_MAX_TRIM_RNUM, &rbuf[36]);
 		put_unaligned_be32(1, &rbuf[28]);
 	}
 
@@ -2424,15 +2429,17 @@ static void modecpy(u8 *dest, const u8 *src, int n, bool changeable)
 static unsigned int ata_msense_caching(u16 *id, u8 *buf, bool changeable)
 {
 	modecpy(buf, def_cache_mpage, sizeof(def_cache_mpage), changeable);
-	if (changeable || ata_id_wcache_enabled(id))
-		buf[2] |= (1 << 2);	/* write cache enable */
-	if (!changeable && !ata_id_rahead_enabled(id))
-		buf[12] |= (1 << 5);	/* disable read ahead */
+	if (changeable) {
+		buf[2] |= (1 << 2);	/* ata_mselect_caching() */
+	} else {
+		buf[2] |= (ata_id_wcache_enabled(id) << 2);	/* write cache enable */
+		buf[12] |= (!ata_id_rahead_enabled(id) << 5);	/* disable read ahead */
+	}
 	return sizeof(def_cache_mpage);
 }
 
 /**
- *	ata_msense_ctl_mode - Simulate MODE SENSE control mode page
+ *	ata_msense_control - Simulate MODE SENSE control mode page
  *	@dev: ATA device of interest
  *	@buf: output buffer
  *	@changeable: whether changeable parameters are requested
@@ -2442,12 +2449,17 @@ static unsigned int ata_msense_caching(u16 *id, u8 *buf, bool changeable)
  *	LOCKING:
  *	None.
  */
-static unsigned int ata_msense_ctl_mode(struct ata_device *dev, u8 *buf,
+static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf,
 					bool changeable)
 {
 	modecpy(buf, def_control_mpage, sizeof(def_control_mpage), changeable);
-	if (changeable && (dev->flags & ATA_DFLAG_D_SENSE))
-		buf[2] |= (1 << 2);	/* Descriptor sense requested */
+	if (changeable) {
+		buf[2] |= (1 << 2);	/* ata_mselect_control() */
+	} else {
+		bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE);
+
+		buf[2] |= (d_sense << 2);	/* descriptor format sense data */
+	}
 	return sizeof(def_control_mpage);
 }
 
@@ -2566,13 +2578,13 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
 		break;
 
 	case CONTROL_MPAGE:
-		p += ata_msense_ctl_mode(args->dev, p, page_control == 1);
+		p += ata_msense_control(args->dev, p, page_control == 1);
 		break;
 
 	case ALL_MPAGES:
 		p += ata_msense_rw_recovery(p, page_control == 1);
 		p += ata_msense_caching(args->id, p, page_control == 1);
-		p += ata_msense_ctl_mode(args->dev, p, page_control == 1);
+		p += ata_msense_control(args->dev, p, page_control == 1);
 		break;
 
 	default:		/* invalid page code */
@@ -3077,6 +3089,9 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 		goto invalid_fld;
 	}
 
+	if (ata_is_ncq(tf->protocol) && (cdb[2] & 0x3) == 0)
+		tf->protocol = ATA_PROT_NCQ_NODATA;
+
 	/* enable LBA */
 	tf->flags |= ATA_TFLAG_LBA;
 
@@ -3125,8 +3140,8 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 		tf->command = cdb[9];
 	}
 
-	/* For NCQ commands with FPDMA protocol, copy the tag value */
-	if (tf->protocol == ATA_PROT_NCQ)
+	/* For NCQ commands copy the tag value */
+	if (ata_is_ncq(tf->protocol))
 		tf->nsect = qc->tag << 3;
 
 	/* enforce correct master/slave bit */
@@ -3305,7 +3320,13 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 		goto invalid_param_len;
 
 	buf = page_address(sg_page(scsi_sglist(scmd)));
-	size = ata_set_lba_range_entries(buf, 512, block, n_block);
+
+	if (n_block <= 65535 * ATA_MAX_TRIM_RNUM) {
+		size = ata_set_lba_range_entries(buf, ATA_MAX_TRIM_RNUM, block, n_block);
+	} else {
+		fp = 2;
+		goto invalid_fld;
+	}
 
 	if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
 		/* Newer devices support queued TRIM commands */
@@ -3454,7 +3475,7 @@ static unsigned int ata_scsi_zbc_in_xlat(struct ata_queued_cmd *qc)
 		goto invalid_param_len;
 	}
 	sect = n_block / 512;
-	options = cdb[14];
+	options = cdb[14] & 0xbf;
 
 	if (ata_ncq_enabled(qc->dev) &&
 	    ata_fpdma_zac_mgmt_in_supported(qc->dev)) {
@@ -3464,7 +3485,7 @@ static unsigned int ata_scsi_zbc_in_xlat(struct ata_queued_cmd *qc)
 		tf->nsect = qc->tag << 3;
 		tf->feature = sect & 0xff;
 		tf->hob_feature = (sect >> 8) & 0xff;
-		tf->auxiliary = ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES;
+		tf->auxiliary = ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES | (options << 8);
 	} else {
 		tf->command = ATA_CMD_ZAC_MGMT_IN;
 		tf->feature = ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES;
@@ -3506,7 +3527,7 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
 	struct scsi_cmnd *scmd = qc->scsicmd;
 	struct ata_device *dev = qc->dev;
 	const u8 *cdb = scmd->cmnd;
-	u8 reset_all, sa;
+	u8 all, sa;
 	u64 block;
 	u32 n_block;
 	u16 fp = (u16)-1;
@@ -3533,20 +3554,20 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
 	if (block > dev->n_sectors)
 		goto out_of_range;
 
-	reset_all = cdb[14] & 0x1;
+	all = cdb[14] & 0x1;
 
 	if (ata_ncq_enabled(qc->dev) &&
 	    ata_fpdma_zac_mgmt_out_supported(qc->dev)) {
-		tf->protocol = ATA_PROT_NCQ;
+		tf->protocol = ATA_PROT_NCQ_NODATA;
 		tf->command = ATA_CMD_NCQ_NON_DATA;
-		tf->hob_nsect = ATA_SUBCMD_NCQ_NON_DATA_ZAC_MGMT_OUT;
+		tf->feature = ATA_SUBCMD_NCQ_NON_DATA_ZAC_MGMT_OUT;
 		tf->nsect = qc->tag << 3;
-		tf->auxiliary = sa | (reset_all & 0x1) << 8;
+		tf->auxiliary = sa | ((u16)all << 8);
 	} else {
 		tf->protocol = ATA_PROT_NODATA;
 		tf->command = ATA_CMD_ZAC_MGMT_OUT;
 		tf->feature = sa;
-		tf->hob_feature = reset_all & 0x1;
+		tf->hob_feature = all;
 	}
 	tf->lbah = (block >> 16) & 0xff;
 	tf->lbam = (block >> 8) & 0xff;
@@ -3667,7 +3688,7 @@ static int ata_mselect_control(struct ata_queued_cmd *qc,
 	/*
 	 * Check that read-only bits are not modified.
 	 */
-	ata_msense_ctl_mode(dev, mpage, false);
+	ata_msense_control(dev, mpage, false);
 	for (i = 0; i < CONTROL_MPAGE_LEN - 2; i++) {
 		if (i == 0)
 			continue;
@@ -4039,11 +4060,6 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
 	args.done = cmd->scsi_done;
 
 	switch(scsicmd[0]) {
-	/* TODO: worth improving? */
-	case FORMAT_UNIT:
-		ata_scsi_invalid_field(dev, cmd, 0);
-		break;
-
 	case INQUIRY:
 		if (scsicmd[1] & 2)		   /* is CmdDt set?  */
 		    ata_scsi_invalid_field(dev, cmd, 1);
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index e2d94972962d..7ef16c085058 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -495,12 +495,13 @@ struct ata_show_ering_arg {
 static int ata_show_ering(struct ata_ering_entry *ent, void *void_arg)
 {
 	struct ata_show_ering_arg* arg = void_arg;
-	struct timespec time;
+	u64 seconds;
+	u32 rem;
 
-	jiffies_to_timespec(ent->timestamp,&time);
+	seconds = div_u64_rem(ent->timestamp, HZ, &rem);
 	arg->written += sprintf(arg->buf + arg->written,
-			       "[%5lu.%06lu]",
-			       time.tv_sec, time.tv_nsec);
+			        "[%5llu.%09lu]", seconds,
+				rem * NSEC_PER_SEC / HZ);
 	arg->written += get_ata_err_names(ent->err_mask,
 					  arg->buf + arg->written);
 	return 0;
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
index 80fe0f6fed29..b4d54771c9fe 100644
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -565,7 +565,7 @@ chan_request_fail:
 	qc->ap->hsm_task_state = HSM_ST_ERR;
 
 	cf_ctrl_reset(acdev);
-	spin_unlock_irqrestore(qc->ap->lock, flags);
+	spin_unlock_irqrestore(&acdev->host->lock, flags);
 sff_intr:
 	dma_complete(acdev);
 }
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index 970f7767e5fd..49d705c9f0f7 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -183,8 +183,8 @@ static void atiixp_set_dmamode(struct ata_port *ap, struct ata_device *adev)
 	 *	We must now look at the PIO mode situation. We may need to
 	 *	adjust the PIO mode to keep the timings acceptable
 	 */
-	 if (adev->dma_mode >= XFER_MW_DMA_2)
-	 	wanted_pio = 4;
+	if (adev->dma_mode >= XFER_MW_DMA_2)
+		wanted_pio = 4;
 	else if (adev->dma_mode == XFER_MW_DMA_1)
 		wanted_pio = 3;
 	else if (adev->dma_mode == XFER_MW_DMA_0)
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index e5fb7525a5df..a219a503c229 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -368,7 +368,7 @@ static int hpt36x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 
 	/* PCI clocking determines the ATA timing values to use */
 	/* info_hpt366 is safe against re-entry so we can scribble on it */
-	switch ((reg1 & 0x700) >> 8) {
+	switch ((reg1 & 0xf00) >> 8) {
 	case 9:
 		hpriv = &hpt366_40;
 		break;
diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index ae9feb1ba8f7..ff468a6fd8dd 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c
@@ -146,7 +146,7 @@ static int marvell_init_one (struct pci_dev *pdev, const struct pci_device_id *i
 	if (pdev->device == 0x6101)
 		ppi[1] = &ata_dummy_port_info;
 
-#if defined(CONFIG_SATA_AHCI) || defined(CONFIG_SATA_AHCI_MODULE)
+#if IS_ENABLED(CONFIG_SATA_AHCI)
 	if (!marvell_pata_active(pdev)) {
 		printk(KERN_INFO DRV_NAME ": PATA port not active, deferring to AHCI driver.\n");
 		return -ENODEV;
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index 00c2af1d211b..e0939bd5ea73 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -259,11 +259,8 @@ static int sata_dwc_dma_init_old(struct platform_device *pdev,
 	/* Get physical SATA DMA register base address */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	hsdev->dma->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(hsdev->dma->regs)) {
-		dev_err(&pdev->dev,
-			"ioremap failed for AHBDMA register address\n");
+	if (IS_ERR(hsdev->dma->regs))
 		return PTR_ERR(hsdev->dma->regs);
-	}
 
 	/* Initialize AHB DMAC */
 	return dw_dma_probe(hsdev->dma);
@@ -281,7 +278,7 @@ static void sata_dwc_dma_exit_old(struct sata_dwc_device *hsdev)
 
 static const char *get_prot_descript(u8 protocol)
 {
-	switch ((enum ata_tf_protocols)protocol) {
+	switch (protocol) {
 	case ATA_PROT_NODATA:
 		return "ATA no data";
 	case ATA_PROT_PIO:
@@ -290,6 +287,8 @@ static const char *get_prot_descript(u8 protocol)
 		return "ATA DMA";
 	case ATA_PROT_NCQ:
 		return "ATA NCQ";
+	case ATA_PROT_NCQ_NODATA:
+		return "ATA NCQ no data";
 	case ATAPI_PROT_NODATA:
 		return "ATAPI no data";
 	case ATAPI_PROT_PIO:
@@ -1225,11 +1224,8 @@ static int sata_dwc_probe(struct platform_device *ofdev)
 	/* Ioremap SATA registers */
 	res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
 	base = devm_ioremap_resource(&ofdev->dev, res);
-	if (IS_ERR(base)) {
-		dev_err(&ofdev->dev,
-			"ioremap failed for SATA register address\n");
+	if (IS_ERR(base))
 		return PTR_ERR(base);
-	}
 	dev_dbg(&ofdev->dev, "ioremap done for SATA register address\n");
 
 	/* Synopsys DWC SATA specific Registers */
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index bd74ee555278..745489a1c86a 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -986,7 +986,7 @@ static inline void mv_write_cached_reg(void __iomem *addr, u32 *old, u32 new)
 		 * Looks like a lot of fuss, but it avoids an unnecessary
 		 * +1 usec read-after-write delay for unaffected registers.
 		 */
-		laddr = (long)addr & 0xffff;
+		laddr = (unsigned long)addr & 0xffff;
 		if (laddr >= 0x300 && laddr <= 0x33c) {
 			laddr &= 0x000f;
 			if (laddr == 0x4 || laddr == 0xc) {
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 527bbd595e37..5fc81e240c24 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2795,9 +2795,7 @@ static int hrz_probe(struct pci_dev *pci_dev,
 	dev->atm_dev->ci_range.vpi_bits = vpi_bits;
 	dev->atm_dev->ci_range.vci_bits = 10-vpi_bits;
 
-	init_timer(&dev->housekeeping);
-	dev->housekeeping.function = do_housekeeping;
-	dev->housekeeping.data = (unsigned long) dev;
+	setup_timer(&dev->housekeeping, do_housekeeping, (unsigned long) dev);
 	mod_timer(&dev->housekeeping, jiffies);
 
 out:
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index ddc4ceb85fc5..700ed15c2362 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -874,7 +874,8 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd)
 	scq->skb = kmalloc(sizeof(struct sk_buff *) *
 			   (size / NS_SCQE_SIZE), GFP_KERNEL);
 	if (!scq->skb) {
-		kfree(scq->org);
+		dma_free_coherent(&card->pcidev->dev,
+				  2 * size, scq->org, scq->dma);
 		kfree(scq);
 		return NULL;
 	}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index f46dba8b7092..dc75de9059cd 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -391,6 +391,7 @@ static ssize_t show_valid_zones(struct device *dev,
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
 	struct page *first_page;
 	struct zone *zone;
+	int zone_shift = 0;
 
 	start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	end_pfn = start_pfn + nr_pages;
@@ -402,21 +403,26 @@ static ssize_t show_valid_zones(struct device *dev,
 
 	zone = page_zone(first_page);
 
-	if (zone_idx(zone) == ZONE_MOVABLE - 1) {
-		/*The mem block is the last memoryblock of this zone.*/
-		if (end_pfn == zone_end_pfn(zone))
-			return sprintf(buf, "%s %s\n",
-					zone->name, (zone + 1)->name);
+	/* MMOP_ONLINE_KEEP */
+	sprintf(buf, "%s", zone->name);
+
+	/* MMOP_ONLINE_KERNEL */
+	zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL);
+	if (zone_shift) {
+		strcat(buf, " ");
+		strcat(buf, (zone + zone_shift)->name);
 	}
 
-	if (zone_idx(zone) == ZONE_MOVABLE) {
-		/*The mem block is the first memoryblock of ZONE_MOVABLE.*/
-		if (start_pfn == zone->zone_start_pfn)
-			return sprintf(buf, "%s %s\n",
-					zone->name, (zone - 1)->name);
+	/* MMOP_ONLINE_MOVABLE */
+	zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE);
+	if (zone_shift) {
+		strcat(buf, " ");
+		strcat(buf, (zone + zone_shift)->name);
 	}
 
-	return sprintf(buf, "%s\n", zone->name);
+	strcat(buf, "\n");
+
+	return strlen(buf);
 }
 static DEVICE_ATTR(valid_zones, 0444, show_valid_zones, NULL);
 #endif
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 560751bad294..51c7db2c4ee2 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -113,6 +113,8 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       "Node %d SUnreclaim:     %8lu kB\n"
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		       "Node %d AnonHugePages:  %8lu kB\n"
+		       "Node %d ShmemHugePages: %8lu kB\n"
+		       "Node %d ShmemPmdMapped: %8lu kB\n"
 #endif
 			,
 		       nid, K(node_page_state(nid, NR_FILE_DIRTY)),
@@ -131,10 +133,13 @@ static ssize_t node_read_meminfo(struct device *dev,
 				node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
 		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		       nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE))
-			, nid,
-			K(node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
-			HPAGE_PMD_NR));
+		       nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
+		       nid, K(node_page_state(nid, NR_ANON_THPS) *
+				       HPAGE_PMD_NR),
+		       nid, K(node_page_state(nid, NR_SHMEM_THPS) *
+				       HPAGE_PMD_NR),
+		       nid, K(node_page_state(nid, NR_SHMEM_PMDMAPPED) *
+				       HPAGE_PMD_NR));
 #else
 		       nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
 #endif
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 3657ac1cb801..8e2e4757adcb 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -121,6 +121,7 @@ int pm_clk_add(struct device *dev, const char *con_id)
 {
 	return __pm_clk_add(dev, con_id, NULL);
 }
+EXPORT_SYMBOL_GPL(pm_clk_add);
 
 /**
  * pm_clk_add_clk - Start using a device clock for power management.
@@ -136,8 +137,41 @@ int pm_clk_add_clk(struct device *dev, struct clk *clk)
 {
 	return __pm_clk_add(dev, NULL, clk);
 }
+EXPORT_SYMBOL_GPL(pm_clk_add_clk);
 
 
+/**
+ * of_pm_clk_add_clk - Start using a device clock for power management.
+ * @dev: Device whose clock is going to be used for power management.
+ * @name: Name of clock that is going to be used for power management.
+ *
+ * Add the clock described in the 'clocks' device-tree node that matches
+ * with the 'name' provided, to the list of clocks used for the power
+ * management of @dev. On success, returns 0. Returns a negative error
+ * code if the clock is not found or cannot be added.
+ */
+int of_pm_clk_add_clk(struct device *dev, const char *name)
+{
+	struct clk *clk;
+	int ret;
+
+	if (!dev || !dev->of_node || !name)
+		return -EINVAL;
+
+	clk = of_clk_get_by_name(dev->of_node, name);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	ret = pm_clk_add_clk(dev, clk);
+	if (ret) {
+		clk_put(clk);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_pm_clk_add_clk);
+
 /**
  * of_pm_clk_add_clks - Start using device clock(s) for power management.
  * @dev: Device whose clock(s) is going to be used for power management.
@@ -192,6 +226,7 @@ error:
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(of_pm_clk_add_clks);
 
 /**
  * __pm_clk_remove - Destroy PM clock entry.
@@ -252,6 +287,7 @@ void pm_clk_remove(struct device *dev, const char *con_id)
 
 	__pm_clk_remove(ce);
 }
+EXPORT_SYMBOL_GPL(pm_clk_remove);
 
 /**
  * pm_clk_remove_clk - Stop using a device clock for power management.
@@ -285,6 +321,7 @@ void pm_clk_remove_clk(struct device *dev, struct clk *clk)
 
 	__pm_clk_remove(ce);
 }
+EXPORT_SYMBOL_GPL(pm_clk_remove_clk);
 
 /**
  * pm_clk_init - Initialize a device's list of power management clocks.
@@ -299,6 +336,7 @@ void pm_clk_init(struct device *dev)
 	if (psd)
 		INIT_LIST_HEAD(&psd->clock_list);
 }
+EXPORT_SYMBOL_GPL(pm_clk_init);
 
 /**
  * pm_clk_create - Create and initialize a device's list of PM clocks.
@@ -311,6 +349,7 @@ int pm_clk_create(struct device *dev)
 {
 	return dev_pm_get_subsys_data(dev);
 }
+EXPORT_SYMBOL_GPL(pm_clk_create);
 
 /**
  * pm_clk_destroy - Destroy a device's list of power management clocks.
@@ -345,6 +384,7 @@ void pm_clk_destroy(struct device *dev)
 		__pm_clk_remove(ce);
 	}
 }
+EXPORT_SYMBOL_GPL(pm_clk_destroy);
 
 /**
  * pm_clk_suspend - Disable clocks in a device's PM clock list.
@@ -375,6 +415,7 @@ int pm_clk_suspend(struct device *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pm_clk_suspend);
 
 /**
  * pm_clk_resume - Enable clocks in a device's PM clock list.
@@ -400,6 +441,7 @@ int pm_clk_resume(struct device *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pm_clk_resume);
 
 /**
  * pm_clk_notify - Notify routine for device addition and removal.
@@ -480,6 +522,7 @@ int pm_clk_runtime_suspend(struct device *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pm_clk_runtime_suspend);
 
 int pm_clk_runtime_resume(struct device *dev)
 {
@@ -495,6 +538,7 @@ int pm_clk_runtime_resume(struct device *dev)
 
 	return pm_generic_runtime_resume(dev);
 }
+EXPORT_SYMBOL_GPL(pm_clk_runtime_resume);
 
 #else /* !CONFIG_PM_CLK */
 
@@ -598,3 +642,4 @@ void pm_clk_add_notifier(struct bus_type *bus,
 	clknb->nb.notifier_call = pm_clk_notify;
 	bus_register_notifier(bus, &clknb->nb);
 }
+EXPORT_SYMBOL_GPL(pm_clk_add_notifier);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index de23b648fce3..a1f2aff33997 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -187,8 +187,7 @@ static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 	struct gpd_link *link;
 	int ret = 0;
 
-	if (genpd->status == GPD_STATE_ACTIVE
-	    || (genpd->prepared_count > 0 && genpd->suspend_power_off))
+	if (genpd->status == GPD_STATE_ACTIVE)
 		return 0;
 
 	/*
@@ -735,81 +734,23 @@ static int pm_genpd_prepare(struct device *dev)
 
 	mutex_lock(&genpd->lock);
 
-	if (genpd->prepared_count++ == 0) {
+	if (genpd->prepared_count++ == 0)
 		genpd->suspended_count = 0;
-		genpd->suspend_power_off = genpd->status == GPD_STATE_POWER_OFF;
-	}
 
 	mutex_unlock(&genpd->lock);
 
-	if (genpd->suspend_power_off)
-		return 0;
-
-	/*
-	 * The PM domain must be in the GPD_STATE_ACTIVE state at this point,
-	 * so genpd_poweron() will return immediately, but if the device
-	 * is suspended (e.g. it's been stopped by genpd_stop_dev()), we need
-	 * to make it operational.
-	 */
-	pm_runtime_resume(dev);
-	__pm_runtime_disable(dev, false);
-
 	ret = pm_generic_prepare(dev);
 	if (ret) {
 		mutex_lock(&genpd->lock);
 
-		if (--genpd->prepared_count == 0)
-			genpd->suspend_power_off = false;
+		genpd->prepared_count--;
 
 		mutex_unlock(&genpd->lock);
-		pm_runtime_enable(dev);
 	}
 
 	return ret;
 }
 
-/**
- * pm_genpd_suspend - Suspend a device belonging to an I/O PM domain.
- * @dev: Device to suspend.
- *
- * Suspend a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a PM domain consisting of I/O devices.
- */
-static int pm_genpd_suspend(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev);
-}
-
-/**
- * pm_genpd_suspend_late - Late suspend of a device from an I/O PM domain.
- * @dev: Device to suspend.
- *
- * Carry out a late suspend of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a PM domain consisting of I/O devices.
- */
-static int pm_genpd_suspend_late(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_suspend_late(dev);
-}
-
 /**
  * pm_genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain.
  * @dev: Device to suspend.
@@ -820,6 +761,7 @@ static int pm_genpd_suspend_late(struct device *dev)
 static int pm_genpd_suspend_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -827,11 +769,14 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off
-	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
+	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
 		return 0;
 
-	genpd_stop_dev(genpd, dev);
+	if (genpd->dev_ops.stop && genpd->dev_ops.start) {
+		ret = pm_runtime_force_suspend(dev);
+		if (ret)
+			return ret;
+	}
 
 	/*
 	 * Since all of the "noirq" callbacks are executed sequentially, it is
@@ -853,6 +798,7 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 static int pm_genpd_resume_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -860,8 +806,7 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off
-	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
+	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
 		return 0;
 
 	/*
@@ -872,93 +817,10 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	pm_genpd_sync_poweron(genpd, true);
 	genpd->suspended_count--;
 
-	return genpd_start_dev(genpd, dev);
-}
-
-/**
- * pm_genpd_resume_early - Early resume of a device in an I/O PM domain.
- * @dev: Device to resume.
- *
- * Carry out an early resume of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
- */
-static int pm_genpd_resume_early(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_resume_early(dev);
-}
-
-/**
- * pm_genpd_resume - Resume of device in an I/O PM domain.
- * @dev: Device to resume.
- *
- * Resume a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a power domain consisting of I/O devices.
- */
-static int pm_genpd_resume(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_resume(dev);
-}
-
-/**
- * pm_genpd_freeze - Freezing a device in an I/O PM domain.
- * @dev: Device to freeze.
- *
- * Freeze a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a power domain consisting of I/O devices.
- */
-static int pm_genpd_freeze(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev);
-}
+	if (genpd->dev_ops.stop && genpd->dev_ops.start)
+		ret = pm_runtime_force_resume(dev);
 
-/**
- * pm_genpd_freeze_late - Late freeze of a device in an I/O PM domain.
- * @dev: Device to freeze.
- *
- * Carry out a late freeze of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
- */
-static int pm_genpd_freeze_late(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_freeze_late(dev);
+	return ret;
 }
 
 /**
@@ -973,6 +835,7 @@ static int pm_genpd_freeze_late(struct device *dev)
 static int pm_genpd_freeze_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -980,7 +843,10 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_stop_dev(genpd, dev);
+	if (genpd->dev_ops.stop && genpd->dev_ops.start)
+		ret = pm_runtime_force_suspend(dev);
+
+	return ret;
 }
 
 /**
@@ -993,6 +859,7 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 static int pm_genpd_thaw_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1000,51 +867,10 @@ static int pm_genpd_thaw_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ?
-		0 : genpd_start_dev(genpd, dev);
-}
-
-/**
- * pm_genpd_thaw_early - Early thaw of device in an I/O PM domain.
- * @dev: Device to thaw.
- *
- * Carry out an early thaw of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
- */
-static int pm_genpd_thaw_early(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_thaw_early(dev);
-}
-
-/**
- * pm_genpd_thaw - Thaw a device belonging to an I/O power domain.
- * @dev: Device to thaw.
- *
- * Thaw a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a power domain consisting of I/O devices.
- */
-static int pm_genpd_thaw(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
+	if (genpd->dev_ops.stop && genpd->dev_ops.start)
+		ret = pm_runtime_force_resume(dev);
 
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev);
+	return ret;
 }
 
 /**
@@ -1057,6 +883,7 @@ static int pm_genpd_thaw(struct device *dev)
 static int pm_genpd_restore_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1072,30 +899,20 @@ static int pm_genpd_restore_noirq(struct device *dev)
 	 * At this point suspended_count == 0 means we are being run for the
 	 * first time for the given domain in the present cycle.
 	 */
-	if (genpd->suspended_count++ == 0) {
+	if (genpd->suspended_count++ == 0)
 		/*
 		 * The boot kernel might put the domain into arbitrary state,
 		 * so make it appear as powered off to pm_genpd_sync_poweron(),
 		 * so that it tries to power it on in case it was really off.
 		 */
 		genpd->status = GPD_STATE_POWER_OFF;
-		if (genpd->suspend_power_off) {
-			/*
-			 * If the domain was off before the hibernation, make
-			 * sure it will be off going forward.
-			 */
-			genpd_power_off(genpd, true);
-
-			return 0;
-		}
-	}
-
-	if (genpd->suspend_power_off)
-		return 0;
 
 	pm_genpd_sync_poweron(genpd, true);
 
-	return genpd_start_dev(genpd, dev);
+	if (genpd->dev_ops.stop && genpd->dev_ops.start)
+		ret = pm_runtime_force_resume(dev);
+
+	return ret;
 }
 
 /**
@@ -1110,7 +927,6 @@ static int pm_genpd_restore_noirq(struct device *dev)
 static void pm_genpd_complete(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	bool run_complete;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1118,20 +934,15 @@ static void pm_genpd_complete(struct device *dev)
 	if (IS_ERR(genpd))
 		return;
 
+	pm_generic_complete(dev);
+
 	mutex_lock(&genpd->lock);
 
-	run_complete = !genpd->suspend_power_off;
-	if (--genpd->prepared_count == 0)
-		genpd->suspend_power_off = false;
+	genpd->prepared_count--;
+	if (!genpd->prepared_count)
+		genpd_queue_power_off_work(genpd);
 
 	mutex_unlock(&genpd->lock);
-
-	if (run_complete) {
-		pm_generic_complete(dev);
-		pm_runtime_set_active(dev);
-		pm_runtime_enable(dev);
-		pm_request_idle(dev);
-	}
 }
 
 /**
@@ -1173,18 +984,10 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
 #else /* !CONFIG_PM_SLEEP */
 
 #define pm_genpd_prepare		NULL
-#define pm_genpd_suspend		NULL
-#define pm_genpd_suspend_late		NULL
 #define pm_genpd_suspend_noirq		NULL
-#define pm_genpd_resume_early		NULL
 #define pm_genpd_resume_noirq		NULL
-#define pm_genpd_resume			NULL
-#define pm_genpd_freeze			NULL
-#define pm_genpd_freeze_late		NULL
 #define pm_genpd_freeze_noirq		NULL
-#define pm_genpd_thaw_early		NULL
 #define pm_genpd_thaw_noirq		NULL
-#define pm_genpd_thaw			NULL
 #define pm_genpd_restore_noirq		NULL
 #define pm_genpd_complete		NULL
 
@@ -1455,12 +1258,14 @@ EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
  * @genpd: PM domain object to initialize.
  * @gov: PM domain governor to associate with the domain (may be NULL).
  * @is_off: Initial value of the domain's power_is_off field.
+ *
+ * Returns 0 on successful initialization, else a negative error code.
  */
-void pm_genpd_init(struct generic_pm_domain *genpd,
-		   struct dev_power_governor *gov, bool is_off)
+int pm_genpd_init(struct generic_pm_domain *genpd,
+		  struct dev_power_governor *gov, bool is_off)
 {
 	if (IS_ERR_OR_NULL(genpd))
-		return;
+		return -EINVAL;
 
 	INIT_LIST_HEAD(&genpd->master_links);
 	INIT_LIST_HEAD(&genpd->slave_links);
@@ -1476,24 +1281,24 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
 	genpd->domain.ops.runtime_resume = genpd_runtime_resume;
 	genpd->domain.ops.prepare = pm_genpd_prepare;
-	genpd->domain.ops.suspend = pm_genpd_suspend;
-	genpd->domain.ops.suspend_late = pm_genpd_suspend_late;
+	genpd->domain.ops.suspend = pm_generic_suspend;
+	genpd->domain.ops.suspend_late = pm_generic_suspend_late;
 	genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq;
-	genpd->domain.ops.resume_early = pm_genpd_resume_early;
-	genpd->domain.ops.resume = pm_genpd_resume;
-	genpd->domain.ops.freeze = pm_genpd_freeze;
-	genpd->domain.ops.freeze_late = pm_genpd_freeze_late;
+	genpd->domain.ops.resume_early = pm_generic_resume_early;
+	genpd->domain.ops.resume = pm_generic_resume;
+	genpd->domain.ops.freeze = pm_generic_freeze;
+	genpd->domain.ops.freeze_late = pm_generic_freeze_late;
 	genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq;
 	genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq;
-	genpd->domain.ops.thaw_early = pm_genpd_thaw_early;
-	genpd->domain.ops.thaw = pm_genpd_thaw;
-	genpd->domain.ops.poweroff = pm_genpd_suspend;
-	genpd->domain.ops.poweroff_late = pm_genpd_suspend_late;
+	genpd->domain.ops.thaw_early = pm_generic_thaw_early;
+	genpd->domain.ops.thaw = pm_generic_thaw;
+	genpd->domain.ops.poweroff = pm_generic_poweroff;
+	genpd->domain.ops.poweroff_late = pm_generic_poweroff_late;
 	genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
-	genpd->domain.ops.restore_early = pm_genpd_resume_early;
-	genpd->domain.ops.restore = pm_genpd_resume;
+	genpd->domain.ops.restore_early = pm_generic_restore_early;
+	genpd->domain.ops.restore = pm_generic_restore;
 	genpd->domain.ops.complete = pm_genpd_complete;
 
 	if (genpd->flags & GENPD_FLAG_PM_CLK) {
@@ -1518,6 +1323,8 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(pm_genpd_init);
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b74690418504..e097d355cc04 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1045,10 +1045,14 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 		 */
 		if (!parent->power.disable_depth
 		    && !parent->power.ignore_children
-		    && parent->power.runtime_status != RPM_ACTIVE)
+		    && parent->power.runtime_status != RPM_ACTIVE) {
+			dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n",
+				dev_name(dev),
+				dev_name(parent));
 			error = -EBUSY;
-		else if (dev->power.runtime_status == RPM_SUSPENDED)
+		} else if (dev->power.runtime_status == RPM_SUSPENDED) {
 			atomic_inc(&parent->power.child_count);
+		}
 
 		spin_unlock(&parent->power.lock);
 
@@ -1256,7 +1260,7 @@ void pm_runtime_allow(struct device *dev)
 
 	dev->power.runtime_auto = true;
 	if (atomic_dec_and_test(&dev->power.usage_count))
-		rpm_idle(dev, RPM_AUTO);
+		rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -1506,6 +1510,9 @@ int pm_runtime_force_resume(struct device *dev)
 		goto out;
 	}
 
+	if (!pm_runtime_status_suspended(dev))
+		goto out;
+
 	ret = pm_runtime_set_active(dev);
 	if (ret)
 		goto out;
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 1a8ec3b2b601..4735318f4268 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -259,7 +259,7 @@ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
 {
 	if (i2c_check_functionality(i2c->adapter, I2C_FUNC_I2C))
 		return &regmap_i2c;
-	else if (config->reg_bits == 8 &&
+	else if (config->val_bits == 8 && config->reg_bits == 8 &&
 		 i2c_check_functionality(i2c->adapter,
 					 I2C_FUNC_SMBUS_I2C_BLOCK))
 		return &regmap_i2c_smbus_i2c_block;
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 26f799e71c82..ec262476d043 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -268,13 +268,16 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 	bool handled = false;
 	u32 reg;
 
+	if (chip->handle_pre_irq)
+		chip->handle_pre_irq(chip->irq_drv_data);
+
 	if (chip->runtime_pm) {
 		ret = pm_runtime_get_sync(map->dev);
 		if (ret < 0) {
 			dev_err(map->dev, "IRQ thread failed to resume: %d\n",
 				ret);
 			pm_runtime_put(map->dev);
-			return IRQ_NONE;
+			goto exit;
 		}
 	}
 
@@ -296,7 +299,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 		if (ret != 0) {
 			dev_err(map->dev, "Failed to read IRQ status: %d\n",
 				ret);
-			return IRQ_NONE;
+			goto exit;
 		}
 
 		for (i = 0; i < data->chip->num_regs; i++) {
@@ -312,7 +315,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 				break;
 			default:
 				BUG();
-				return IRQ_NONE;
+				goto exit;
 			}
 		}
 
@@ -329,7 +332,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 					ret);
 				if (chip->runtime_pm)
 					pm_runtime_put(map->dev);
-				return IRQ_NONE;
+				goto exit;
 			}
 		}
 	}
@@ -365,6 +368,10 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 	if (chip->runtime_pm)
 		pm_runtime_put(map->dev);
 
+exit:
+	if (chip->handle_post_irq)
+		chip->handle_post_irq(chip->irq_drv_data);
+
 	if (handled)
 		return IRQ_HANDLED;
 	else
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index df2d2ef5d6b3..51fa7d66a393 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1777,8 +1777,6 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 	size_t val_bytes = map->format.val_bytes;
 	size_t total_size = val_bytes * val_count;
 
-	if (map->bus && !map->format.parse_inplace)
-		return -EINVAL;
 	if (!IS_ALIGNED(reg, map->reg_stride))
 		return -EINVAL;
 
@@ -1789,7 +1787,8 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 	 *
 	 * The first if block is used for memory mapped io. It does not allow
 	 * val_bytes of 3 for example.
-	 * The second one is used for busses which do not have this limitation
+	 * The second one is for busses that do not provide raw I/O.
+	 * The third one is used for busses which do not have these limitations
 	 * and can write arbitrary value lengths.
 	 */
 	if (!map->bus) {
@@ -1825,6 +1824,32 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 		}
 out:
 		map->unlock(map->lock_arg);
+	} else if (map->bus && !map->format.parse_inplace) {
+		const u8 *u8 = val;
+		const u16 *u16 = val;
+		const u32 *u32 = val;
+		unsigned int ival;
+
+		for (i = 0; i < val_count; i++) {
+			switch (map->format.val_bytes) {
+			case 4:
+				ival = u32[i];
+				break;
+			case 2:
+				ival = u16[i];
+				break;
+			case 1:
+				ival = u8[i];
+				break;
+			default:
+				return -EINVAL;
+			}
+
+			ret = regmap_write(map, reg + (i * map->reg_stride),
+					   ival);
+			if (ret)
+				return ret;
+		}
 	} else if (map->use_single_write ||
 		   (map->max_raw_write && map->max_raw_write < total_size)) {
 		int chunk_stride = map->reg_stride;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 8b7d7f8e5851..df3c97cb4c99 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -77,6 +77,14 @@ static DEVICE_ATTR_RO(book_siblings);
 static DEVICE_ATTR_RO(book_siblings_list);
 #endif
 
+#ifdef CONFIG_SCHED_DRAWER
+define_id_show_func(drawer_id);
+static DEVICE_ATTR_RO(drawer_id);
+define_siblings_show_func(drawer_siblings, drawer_cpumask);
+static DEVICE_ATTR_RO(drawer_siblings);
+static DEVICE_ATTR_RO(drawer_siblings_list);
+#endif
+
 static struct attribute *default_attrs[] = {
 	&dev_attr_physical_package_id.attr,
 	&dev_attr_core_id.attr,
@@ -88,6 +96,11 @@ static struct attribute *default_attrs[] = {
 	&dev_attr_book_id.attr,
 	&dev_attr_book_siblings.attr,
 	&dev_attr_book_siblings_list.attr,
+#endif
+#ifdef CONFIG_SCHED_DRAWER
+	&dev_attr_drawer_id.attr,
+	&dev_attr_drawer_siblings.attr,
+	&dev_attr_drawer_siblings_list.attr,
 #endif
 	NULL
 };
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index efdc2ae8441a..b5c48a8d485f 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -76,9 +76,16 @@ config BCMA_PFLASH
 	default y
 
 config BCMA_SFLASH
-	bool
-	depends on BCMA_DRIVER_MIPS
+	bool "ChipCommon-attached serial flash support"
+	depends on BCMA_HOST_SOC
 	default y
+	help
+	  Some cheap devices have serial flash connected to the ChipCommon
+	  instead of independent SPI controller. It requires using a separated
+	  driver that implements ChipCommon specific interface communication.
+
+	  Enabling this symbol will let bcma recognize serial flash and register
+	  it as platform device.
 
 config BCMA_NFLASH
 	bool
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index eda09090cb52..f642c4264c27 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -8,8 +8,6 @@
 #include <linux/bcma/bcma.h>
 #include <linux/delay.h>
 
-#define BCMA_CORE_SIZE		0x1000
-
 #define bcma_err(bus, fmt, ...) \
 	pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__)
 #define bcma_warn(bus, fmt, ...) \
diff --git a/drivers/bcma/driver_chipcommon_b.c b/drivers/bcma/driver_chipcommon_b.c
index c20b5f4ff290..57f10b58b47c 100644
--- a/drivers/bcma/driver_chipcommon_b.c
+++ b/drivers/bcma/driver_chipcommon_b.c
@@ -33,11 +33,12 @@ static bool bcma_wait_reg(struct bcma_bus *bus, void __iomem *addr, u32 mask,
 void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value)
 {
 	struct bcma_bus *bus = ccb->core->bus;
+	void __iomem *mii = ccb->mii;
 
-	writel(offset, ccb->mii + 0x00);
-	bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100);
-	writel(value, ccb->mii + 0x04);
-	bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100);
+	writel(offset, mii + BCMA_CCB_MII_MNG_CTL);
+	bcma_wait_reg(bus, mii + BCMA_CCB_MII_MNG_CTL, 0x0100, 0x0000, 100);
+	writel(value, mii + BCMA_CCB_MII_MNG_CMD_DATA);
+	bcma_wait_reg(bus, mii + BCMA_CCB_MII_MNG_CTL, 0x0100, 0x0000, 100);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_b_mii_write);
 
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index cae5385cf499..bd46569e0e52 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -295,6 +295,7 @@ static const struct pci_device_id bcma_pci_bridge_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4359) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4360) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0016) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_FOXCONN, 0xe092) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a0) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a9) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43aa) },
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d597e432e195..ab19adb07a12 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1750,7 +1750,7 @@ aoecmd_init(void)
 	int ret;
 
 	/* get_zeroed_page returns page with ref count 1 */
-	p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	p = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 	empty_page = virt_to_page(p);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index c04bd9bc39fd..ba5145d384d8 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -339,7 +339,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 	if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
 		goto io_error;
 
-	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
 		if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) ||
 		    bio->bi_iter.bi_size & ~PAGE_MASK)
 			goto io_error;
@@ -347,9 +347,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 		goto out;
 	}
 
-	rw = bio_rw(bio);
-	if (rw == READA)
-		rw = READ;
+	rw = bio_data_dir(bio);
 
 	bio_for_each_segment(bvec, bio, iter) {
 		unsigned int len = bvec.bv_len;
@@ -509,7 +507,9 @@ static struct brd_device *brd_alloc(int i)
 	blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX);
 	brd->brd_queue->limits.discard_zeroes_data = 1;
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
-
+#ifdef CONFIG_BLK_DEV_RAM_DAX
+	queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
+#endif
 	disk = brd->brd_disk = alloc_disk(max_part);
 	if (!disk)
 		goto out_free_queue;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 63c2064689f8..db9d6bb6352d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1951,7 +1951,6 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	if (cciss_create_ld_sysfs_entry(h, drv_index))
 		goto cleanup_queue;
 	disk->private_data = h->drv[drv_index];
-	disk->driverfs_dev = &h->drv[drv_index]->dev;
 
 	/* Set up queue information */
 	blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
@@ -1973,7 +1972,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	/* allows the interrupt handler to start the queue */
 	wmb();
 	h->drv[drv_index]->queue = disk->queue;
-	add_disk(disk);
+	device_add_disk(&h->drv[drv_index]->dev, disk);
 	return 0;
 
 cleanup_queue:
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 10459a145062..0a1aaf8c24c4 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -137,19 +137,19 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b
 
 static int _drbd_md_sync_page_io(struct drbd_device *device,
 				 struct drbd_backing_dev *bdev,
-				 sector_t sector, int rw)
+				 sector_t sector, int op)
 {
 	struct bio *bio;
 	/* we do all our meta data IO in aligned 4k blocks. */
 	const int size = 4096;
-	int err;
+	int err, op_flags = 0;
 
 	device->md_io.done = 0;
 	device->md_io.error = -ENODEV;
 
-	if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags))
-		rw |= REQ_FUA | REQ_FLUSH;
-	rw |= REQ_SYNC | REQ_NOIDLE;
+	if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags))
+		op_flags |= REQ_FUA | REQ_PREFLUSH;
+	op_flags |= REQ_SYNC | REQ_NOIDLE;
 
 	bio = bio_alloc_drbd(GFP_NOIO);
 	bio->bi_bdev = bdev->md_bdev;
@@ -159,9 +159,9 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 		goto out;
 	bio->bi_private = device;
 	bio->bi_end_io = drbd_md_endio;
-	bio->bi_rw = rw;
+	bio_set_op_attrs(bio, op, op_flags);
 
-	if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL)
+	if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL)
 		/* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
 		;
 	else if (!get_ldev_if_state(device, D_ATTACHING)) {
@@ -174,10 +174,10 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 	bio_get(bio); /* one bio_put() is in the completion handler */
 	atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
 	device->md_io.submit_jif = jiffies;
-	if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
+	if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
 		bio_io_error(bio);
 	else
-		submit_bio(rw, bio);
+		submit_bio(bio);
 	wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
 	if (!bio->bi_error)
 		err = device->md_io.error;
@@ -188,7 +188,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 }
 
 int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev,
-			 sector_t sector, int rw)
+			 sector_t sector, int op)
 {
 	int err;
 	D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
@@ -197,19 +197,21 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
 
 	dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
 	     current->comm, current->pid, __func__,
-	     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
+	     (unsigned long long)sector, (op == REQ_OP_WRITE) ? "WRITE" : "READ",
 	     (void*)_RET_IP_ );
 
 	if (sector < drbd_md_first_sector(bdev) ||
 	    sector + 7 > drbd_md_last_sector(bdev))
 		drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
 		     current->comm, current->pid, __func__,
-		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+		     (unsigned long long)sector,
+		     (op == REQ_OP_WRITE) ? "WRITE" : "READ");
 
-	err = _drbd_md_sync_page_io(device, bdev, sector, rw);
+	err = _drbd_md_sync_page_io(device, bdev, sector, op);
 	if (err) {
 		drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
-		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
+		    (unsigned long long)sector,
+		    (op == REQ_OP_WRITE) ? "WRITE" : "READ", err);
 	}
 	return err;
 }
@@ -256,7 +258,7 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval
 	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
 	unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
 
-	D_ASSERT(device, (unsigned)(last - first) <= 1);
+	D_ASSERT(device, first <= last);
 	D_ASSERT(device, atomic_read(&device->local_cnt) > 0);
 
 	/* FIXME figure out a fast path for bios crossing AL extent boundaries */
@@ -339,6 +341,8 @@ static int __al_write_transaction(struct drbd_device *device, struct al_transact
 
 	i = 0;
 
+	drbd_bm_reset_al_hints(device);
+
 	/* Even though no one can start to change this list
 	 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
 	 * lc_try_lock_for_transaction() --, someone may still
@@ -768,10 +772,18 @@ static bool lazy_bitmap_update_due(struct drbd_device *device)
 
 static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
 {
-	if (rs_done)
-		set_bit(RS_DONE, &device->flags);
-		/* and also set RS_PROGRESS below */
-	else if (!lazy_bitmap_update_due(device))
+	if (rs_done) {
+		struct drbd_connection *connection = first_peer_device(device)->connection;
+		if (connection->agreed_pro_version <= 95 ||
+		    is_sync_target_state(device->state.conn))
+			set_bit(RS_DONE, &device->flags);
+			/* and also set RS_PROGRESS below */
+
+		/* Else: rather wait for explicit notification via receive_state,
+		 * to avoid uuids-rotated-too-fast causing full resync
+		 * in next handshake, in case the replication link breaks
+		 * at the most unfortunate time... */
+	} else if (!lazy_bitmap_update_due(device))
 		return;
 
 	drbd_device_post_work(device, RS_PROGRESS);
@@ -830,6 +842,13 @@ static int update_sync_bits(struct drbd_device *device,
 	return count;
 }
 
+static bool plausible_request_size(int size)
+{
+	return size > 0
+		&& size <= DRBD_MAX_BATCH_BIO_SIZE
+		&& IS_ALIGNED(size, 512);
+}
+
 /* clear the bit corresponding to the piece of storage in question:
  * size byte of data starting from sector.  Only clear a bits of the affected
  * one ore more _aligned_ BM_BLOCK_SIZE blocks.
@@ -845,11 +864,11 @@ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
 	unsigned long count = 0;
 	sector_t esector, nr_sectors;
 
-	/* This would be an empty REQ_FLUSH, be silent. */
+	/* This would be an empty REQ_PREFLUSH, be silent. */
 	if ((mode == SET_OUT_OF_SYNC) && size == 0)
 		return 0;
 
-	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
+	if (!plausible_request_size(size)) {
 		drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
 				drbd_change_sync_fname[mode],
 				(unsigned long long)sector, size);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 92d6fc020a65..ab62b81c2ca7 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -96,6 +96,13 @@ struct drbd_bitmap {
 	struct page **bm_pages;
 	spinlock_t bm_lock;
 
+	/* exclusively to be used by __al_write_transaction(),
+	 * drbd_bm_mark_for_writeout() and
+	 * and drbd_bm_write_hinted() -> bm_rw() called from there.
+	 */
+	unsigned int n_bitmap_hints;
+	unsigned int al_bitmap_hints[AL_UPDATES_PER_TRANSACTION];
+
 	/* see LIMITATIONS: above */
 
 	unsigned long bm_set;       /* nr of set bits; THINK maybe atomic_t? */
@@ -242,6 +249,11 @@ static void bm_set_page_need_writeout(struct page *page)
 	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
 }
 
+void drbd_bm_reset_al_hints(struct drbd_device *device)
+{
+	device->bitmap->n_bitmap_hints = 0;
+}
+
 /**
  * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
  * @device:	DRBD device.
@@ -253,6 +265,7 @@ static void bm_set_page_need_writeout(struct page *page)
  */
 void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr)
 {
+	struct drbd_bitmap *b = device->bitmap;
 	struct page *page;
 	if (page_nr >= device->bitmap->bm_number_of_pages) {
 		drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n",
@@ -260,7 +273,9 @@ void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr)
 		return;
 	}
 	page = device->bitmap->bm_pages[page_nr];
-	set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
+	BUG_ON(b->n_bitmap_hints >= ARRAY_SIZE(b->al_bitmap_hints));
+	if (!test_and_set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)))
+		b->al_bitmap_hints[b->n_bitmap_hints++] = page_nr;
 }
 
 static int bm_test_page_unchanged(struct page *page)
@@ -427,8 +442,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 }
 
 /*
- * called on driver init only. TODO call when a device is created.
- * allocates the drbd_bitmap, and stores it in device->bitmap.
+ * allocates the drbd_bitmap and stores it in device->bitmap.
  */
 int drbd_bm_init(struct drbd_device *device)
 {
@@ -633,7 +647,8 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
 	unsigned long bits, words, owords, obits;
 	unsigned long want, have, onpages; /* number of pages */
 	struct page **npages, **opages = NULL;
-	int err = 0, growing;
+	int err = 0;
+	bool growing;
 
 	if (!expect(b))
 		return -ENOMEM;
@@ -980,7 +995,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
 	struct drbd_bitmap *b = device->bitmap;
 	struct page *page;
 	unsigned int len;
-	unsigned int rw = (ctx->flags & BM_AIO_READ) ? READ : WRITE;
+	unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
 
 	sector_t on_disk_sector =
 		device->ldev->md.md_offset + device->ldev->md.bm_offset;
@@ -1011,12 +1026,12 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
 	bio_add_page(bio, page, len, 0);
 	bio->bi_private = ctx;
 	bio->bi_end_io = drbd_bm_endio;
+	bio_set_op_attrs(bio, op, 0);
 
-	if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
-		bio->bi_rw |= rw;
+	if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
 		bio_io_error(bio);
 	} else {
-		submit_bio(rw, bio);
+		submit_bio(bio);
 		/* this should not count as user activity and cause the
 		 * resync to throttle -- see drbd_rs_should_slow_down(). */
 		atomic_add(len >> 9, &device->rs_sect_ev);
@@ -1030,7 +1045,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
 {
 	struct drbd_bm_aio_ctx *ctx;
 	struct drbd_bitmap *b = device->bitmap;
-	int num_pages, i, count = 0;
+	unsigned int num_pages, i, count = 0;
 	unsigned long now;
 	char ppb[10];
 	int err = 0;
@@ -1078,16 +1093,37 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
 	now = jiffies;
 
 	/* let the layers below us try to merge these bios... */
-	for (i = 0; i < num_pages; i++) {
-		/* ignore completely unchanged pages */
-		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
-			break;
-		if (!(flags & BM_AIO_READ)) {
-			if ((flags & BM_AIO_WRITE_HINTED) &&
-			    !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
-				    &page_private(b->bm_pages[i])))
-				continue;
 
+	if (flags & BM_AIO_READ) {
+		for (i = 0; i < num_pages; i++) {
+			atomic_inc(&ctx->in_flight);
+			bm_page_io_async(ctx, i);
+			++count;
+			cond_resched();
+		}
+	} else if (flags & BM_AIO_WRITE_HINTED) {
+		/* ASSERT: BM_AIO_WRITE_ALL_PAGES is not set. */
+		unsigned int hint;
+		for (hint = 0; hint < b->n_bitmap_hints; hint++) {
+			i = b->al_bitmap_hints[hint];
+			if (i >= num_pages) /* == -1U: no hint here. */
+				continue;
+			/* Several AL-extents may point to the same page. */
+			if (!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
+			    &page_private(b->bm_pages[i])))
+				continue;
+			/* Has it even changed? */
+			if (bm_test_page_unchanged(b->bm_pages[i]))
+				continue;
+			atomic_inc(&ctx->in_flight);
+			bm_page_io_async(ctx, i);
+			++count;
+		}
+	} else {
+		for (i = 0; i < num_pages; i++) {
+			/* ignore completely unchanged pages */
+			if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
+				break;
 			if (!(flags & BM_AIO_WRITE_ALL_PAGES) &&
 			    bm_test_page_unchanged(b->bm_pages[i])) {
 				dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
@@ -1100,11 +1136,11 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
 				dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i);
 				continue;
 			}
+			atomic_inc(&ctx->in_flight);
+			bm_page_io_async(ctx, i);
+			++count;
+			cond_resched();
 		}
-		atomic_inc(&ctx->in_flight);
-		bm_page_io_async(ctx, i);
-		++count;
-		cond_resched();
 	}
 
 	/*
@@ -1121,10 +1157,14 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
 		kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
 
 	/* summary for global bitmap IO */
-	if (flags == 0)
-		drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n",
-			 (flags & BM_AIO_READ) ? "READ" : "WRITE",
-			 count, jiffies - now);
+	if (flags == 0) {
+		unsigned int ms = jiffies_to_msecs(jiffies - now);
+		if (ms > 5) {
+			drbd_info(device, "bitmap %s of %u pages took %u ms\n",
+				 (flags & BM_AIO_READ) ? "READ" : "WRITE",
+				 count, ms);
+		}
+	}
 
 	if (ctx->error) {
 		drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n");
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index 4de95bbff486..be91a8d7c22a 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -237,14 +237,9 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
 	seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
 	seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
 
-	if (f & EE_IS_TRIM) {
-		seq_putc(m, sep);
-		sep = '|';
-		if (f & EE_IS_TRIM_USE_ZEROOUT)
-			seq_puts(m, "zero-out");
-		else
-			seq_puts(m, "trim");
-	}
+	if (f & EE_IS_TRIM)
+		__seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim");
+	seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
 	seq_putc(m, '\n');
 }
 
@@ -908,7 +903,7 @@ static int drbd_version_open(struct inode *inode, struct file *file)
 	return single_open(file, drbd_version_show, NULL);
 }
 
-static struct file_operations drbd_version_fops = {
+static const struct file_operations drbd_version_fops = {
 	.owner = THIS_MODULE,
 	.open = drbd_version_open,
 	.llseek = seq_lseek,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 7a1cf7eaa71d..7b54354976a5 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -468,9 +468,15 @@ enum {
 	/* this is/was a write request */
 	__EE_WRITE,
 
+	/* this is/was a write same request */
+	__EE_WRITE_SAME,
+
 	/* this originates from application on peer
 	 * (not some resync or verify or other DRBD internal request) */
 	__EE_APPLICATION,
+
+	/* If it contains only 0 bytes, send back P_RS_DEALLOCATED */
+	__EE_RS_THIN_REQ,
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
@@ -484,7 +490,9 @@ enum {
 #define EE_IN_INTERVAL_TREE	(1<<__EE_IN_INTERVAL_TREE)
 #define EE_SUBMITTED		(1<<__EE_SUBMITTED)
 #define EE_WRITE		(1<<__EE_WRITE)
+#define EE_WRITE_SAME		(1<<__EE_WRITE_SAME)
 #define EE_APPLICATION		(1<<__EE_APPLICATION)
+#define EE_RS_THIN_REQ		(1<<__EE_RS_THIN_REQ)
 
 /* flag bits per device */
 enum {
@@ -1123,6 +1131,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
 extern int drbd_send_bitmap(struct drbd_device *device);
 extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
 extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
+extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *);
 extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
 extern void drbd_device_cleanup(struct drbd_device *device);
 void drbd_print_uuids(struct drbd_device *device, const char *text);
@@ -1327,14 +1336,14 @@ struct bm_extent {
 #endif
 #endif
 
-/* BIO_MAX_SIZE is 256 * PAGE_SIZE,
+/* Estimate max bio size as 256 * PAGE_SIZE,
  * so for typical PAGE_SIZE of 4k, that is (1<<20) Byte.
  * Since we may live in a mixed-platform cluster,
  * we limit us to a platform agnostic constant here for now.
  * A followup commit may allow even bigger BIO sizes,
  * once we thought that through. */
 #define DRBD_MAX_BIO_SIZE (1U << 20)
-#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
+#if DRBD_MAX_BIO_SIZE > (BIO_MAX_PAGES << PAGE_SHIFT)
 #error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
 #endif
 #define DRBD_MAX_BIO_SIZE_SAFE (1U << 12)       /* Works always = 4k */
@@ -1342,11 +1351,11 @@ struct bm_extent {
 #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
 #define DRBD_MAX_BIO_SIZE_P95    (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
 
-/* For now, don't allow more than one activity log extent worth of data
- * to be discarded in one go. We may need to rework drbd_al_begin_io()
- * to allow for even larger discard ranges */
-#define DRBD_MAX_DISCARD_SIZE	AL_EXTENT_SIZE
-#define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9)
+/* For now, don't allow more than half of what we can "activate" in one
+ * activity log transaction to be discarded in one go. We may need to rework
+ * drbd_al_begin_io() to allow for even larger discard ranges */
+#define DRBD_MAX_BATCH_BIO_SIZE	 (AL_UPDATES_PER_TRANSACTION/2*AL_EXTENT_SIZE)
+#define DRBD_MAX_BBIO_SECTORS    (DRBD_MAX_BATCH_BIO_SIZE >> 9)
 
 extern int  drbd_bm_init(struct drbd_device *device);
 extern int  drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
@@ -1369,6 +1378,7 @@ extern int  drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
 extern int  drbd_bm_read(struct drbd_device *device) __must_hold(local);
 extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
 extern int  drbd_bm_write(struct drbd_device *device) __must_hold(local);
+extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local);
 extern int  drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
 extern int  drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
 extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
@@ -1483,12 +1493,14 @@ enum determine_dev_size {
 extern enum determine_dev_size
 drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_device *);
-extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev);
+extern void drbd_reconsider_queue_parameters(struct drbd_device *device,
+			struct drbd_backing_dev *bdev, struct o_qlim *o);
 extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
 					enum drbd_role new_role,
 					int force);
 extern bool conn_try_outdate_peer(struct drbd_connection *connection);
 extern void conn_try_outdate_peer_async(struct drbd_connection *connection);
+extern enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd);
 extern int drbd_khelper(struct drbd_device *device, char *cmd);
 
 /* drbd_worker.c */
@@ -1507,7 +1519,7 @@ extern int drbd_resync_finished(struct drbd_device *device);
 extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
 extern void drbd_md_put_buffer(struct drbd_device *device);
 extern int drbd_md_sync_page_io(struct drbd_device *device,
-		struct drbd_backing_dev *bdev, sector_t sector, int rw);
+		struct drbd_backing_dev *bdev, sector_t sector, int op);
 extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int);
 extern void wait_until_done_or_force_detached(struct drbd_device *device,
 		struct drbd_backing_dev *bdev, unsigned int *done);
@@ -1548,6 +1560,8 @@ extern void start_resync_timer_fn(unsigned long data);
 extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
 
 /* drbd_receiver.c */
+extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
+		sector_t start, unsigned int nr_sectors, bool discard);
 extern int drbd_receiver(struct drbd_thread *thi);
 extern int drbd_ack_receiver(struct drbd_thread *thi);
 extern void drbd_send_ping_wf(struct work_struct *ws);
@@ -1557,11 +1571,11 @@ extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector
 		bool throttle_if_app_is_waiting);
 extern int drbd_submit_peer_request(struct drbd_device *,
 				    struct drbd_peer_request *, const unsigned,
-				    const int);
+				    const unsigned, const int);
 extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
 extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
 						     sector_t, unsigned int,
-						     bool,
+						     unsigned int,
 						     gfp_t) __must_hold(local);
 extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
 				 int);
@@ -1635,8 +1649,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
 /* drbd_proc.c */
 extern struct proc_dir_entry *drbd_proc;
 extern const struct file_operations drbd_proc_fops;
-extern const char *drbd_conn_str(enum drbd_conns s);
-extern const char *drbd_role_str(enum drbd_role s);
 
 /* drbd_actlog.c */
 extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
@@ -2095,13 +2107,22 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f
 	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
 }
 
+static inline bool is_sync_target_state(enum drbd_conns connection_state)
+{
+	return	connection_state == C_SYNC_TARGET ||
+		connection_state == C_PAUSED_SYNC_T;
+}
+
+static inline bool is_sync_source_state(enum drbd_conns connection_state)
+{
+	return	connection_state == C_SYNC_SOURCE ||
+		connection_state == C_PAUSED_SYNC_S;
+}
+
 static inline bool is_sync_state(enum drbd_conns connection_state)
 {
-	return
-	   (connection_state == C_SYNC_SOURCE
-	||  connection_state == C_SYNC_TARGET
-	||  connection_state == C_PAUSED_SYNC_S
-	||  connection_state == C_PAUSED_SYNC_T);
+	return	is_sync_source_state(connection_state) ||
+		is_sync_target_state(connection_state);
 }
 
 /**
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index f210543f05f4..23c5a94428d2 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -6,13 +6,13 @@
 
 struct drbd_interval {
 	struct rb_node rb;
-	sector_t sector;	/* start sector of the interval */
-	unsigned int size;	/* size in bytes */
-	sector_t end;		/* highest interval end in subtree */
-	int local:1		/* local or remote request? */;
-	int waiting:1;		/* someone is waiting for this to complete */
-	int completed:1;	/* this has been completed already;
-				 * ignore for conflict detection */
+	sector_t sector;		/* start sector of the interval */
+	unsigned int size;		/* size in bytes */
+	sector_t end;			/* highest interval end in subtree */
+	unsigned int local:1		/* local or remote request? */;
+	unsigned int waiting:1;		/* someone is waiting for completion */
+	unsigned int completed:1;	/* this has been completed already;
+					 * ignore for conflict detection */
 };
 
 static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 2ba1494b2799..0501ae0c517b 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -31,7 +31,7 @@
 #include <linux/module.h>
 #include <linux/jiffies.h>
 #include <linux/drbd.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/types.h>
 #include <net/sock.h>
 #include <linux/ctype.h>
@@ -920,6 +920,31 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
 	}
 }
 
+/* communicated if (agreed_features & DRBD_FF_WSAME) */
+void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct request_queue *q)
+{
+	if (q) {
+		p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
+		p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
+		p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
+		p->qlim->io_min = cpu_to_be32(queue_io_min(q));
+		p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
+		p->qlim->discard_enabled = blk_queue_discard(q);
+		p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q);
+		p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
+	} else {
+		q = device->rq_queue;
+		p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
+		p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
+		p->qlim->alignment_offset = 0;
+		p->qlim->io_min = cpu_to_be32(queue_io_min(q));
+		p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
+		p->qlim->discard_enabled = 0;
+		p->qlim->discard_zeroes_data = 0;
+		p->qlim->write_same_capable = 0;
+	}
+}
+
 int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
 {
 	struct drbd_device *device = peer_device->device;
@@ -928,29 +953,37 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
 	sector_t d_size, u_size;
 	int q_order_type;
 	unsigned int max_bio_size;
+	unsigned int packet_size;
+
+	sock = &peer_device->connection->data;
+	p = drbd_prepare_command(peer_device, sock);
+	if (!p)
+		return -EIO;
 
+	packet_size = sizeof(*p);
+	if (peer_device->connection->agreed_features & DRBD_FF_WSAME)
+		packet_size += sizeof(p->qlim[0]);
+
+	memset(p, 0, packet_size);
 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
-		D_ASSERT(device, device->ldev->backing_bdev);
+		struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
 		d_size = drbd_get_max_capacity(device->ldev);
 		rcu_read_lock();
 		u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
 		rcu_read_unlock();
 		q_order_type = drbd_queue_order_type(device);
-		max_bio_size = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
+		max_bio_size = queue_max_hw_sectors(q) << 9;
 		max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
+		assign_p_sizes_qlim(device, p, q);
 		put_ldev(device);
 	} else {
 		d_size = 0;
 		u_size = 0;
 		q_order_type = QUEUE_ORDERED_NONE;
 		max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
+		assign_p_sizes_qlim(device, p, NULL);
 	}
 
-	sock = &peer_device->connection->data;
-	p = drbd_prepare_command(peer_device, sock);
-	if (!p)
-		return -EIO;
-
 	if (peer_device->connection->agreed_pro_version <= 94)
 		max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
 	else if (peer_device->connection->agreed_pro_version < 100)
@@ -962,7 +995,8 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
 	p->max_bio_size = cpu_to_be32(max_bio_size);
 	p->queue_order_type = cpu_to_be16(q_order_type);
 	p->dds_flags = cpu_to_be16(flags);
-	return drbd_send_command(peer_device, sock, P_SIZES, sizeof(*p), NULL, 0);
+
+	return drbd_send_command(peer_device, sock, P_SIZES, packet_size, NULL, 0);
 }
 
 /**
@@ -1377,6 +1411,22 @@ int drbd_send_ack_ex(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
 			      cpu_to_be64(block_id));
 }
 
+int drbd_send_rs_deallocated(struct drbd_peer_device *peer_device,
+			     struct drbd_peer_request *peer_req)
+{
+	struct drbd_socket *sock;
+	struct p_block_desc *p;
+
+	sock = &peer_device->connection->data;
+	p = drbd_prepare_command(peer_device, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(peer_req->i.sector);
+	p->blksize = cpu_to_be32(peer_req->i.size);
+	p->pad = 0;
+	return drbd_send_command(peer_device, sock, P_RS_DEALLOCATED, sizeof(*p), NULL, 0);
+}
+
 int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd,
 		       sector_t sector, int size, u64 block_id)
 {
@@ -1561,6 +1611,9 @@ static int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio)
 					 ? 0 : MSG_MORE);
 		if (err)
 			return err;
+		/* REQ_OP_WRITE_SAME has only one segment */
+		if (bio_op(bio) == REQ_OP_WRITE_SAME)
+			break;
 	}
 	return 0;
 }
@@ -1579,6 +1632,9 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b
 				      bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
 		if (err)
 			return err;
+		/* REQ_OP_WRITE_SAME has only one segment */
+		if (bio_op(bio) == REQ_OP_WRITE_SAME)
+			break;
 	}
 	return 0;
 }
@@ -1603,15 +1659,17 @@ static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device,
 	return 0;
 }
 
-static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long bi_rw)
+static u32 bio_flags_to_wire(struct drbd_connection *connection,
+			     struct bio *bio)
 {
 	if (connection->agreed_pro_version >= 95)
-		return  (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
-			(bi_rw & REQ_FUA ? DP_FUA : 0) |
-			(bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
-			(bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
+		return  (bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
+			(bio->bi_rw & REQ_FUA ? DP_FUA : 0) |
+			(bio->bi_rw & REQ_PREFLUSH ? DP_FLUSH : 0) |
+			(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
+			(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0);
 	else
-		return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
+		return bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
 }
 
 /* Used to send write or TRIM aka REQ_DISCARD requests
@@ -1622,6 +1680,8 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 	struct drbd_device *device = peer_device->device;
 	struct drbd_socket *sock;
 	struct p_data *p;
+	struct p_wsame *wsame = NULL;
+	void *digest_out;
 	unsigned int dp_flags = 0;
 	int digest_size;
 	int err;
@@ -1636,7 +1696,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 	p->sector = cpu_to_be64(req->i.sector);
 	p->block_id = (unsigned long)req;
 	p->seq_num = cpu_to_be32(atomic_inc_return(&device->packet_seq));
-	dp_flags = bio_flags_to_wire(peer_device->connection, req->master_bio->bi_rw);
+	dp_flags = bio_flags_to_wire(peer_device->connection, req->master_bio);
 	if (device->state.conn >= C_SYNC_SOURCE &&
 	    device->state.conn <= C_PAUSED_SYNC_T)
 		dp_flags |= DP_MAY_SET_IN_SYNC;
@@ -1657,12 +1717,29 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 		err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
 		goto out;
 	}
+	if (dp_flags & DP_WSAME) {
+		/* this will only work if DRBD_FF_WSAME is set AND the
+		 * handshake agreed that all nodes and backend devices are
+		 * WRITE_SAME capable and agree on logical_block_size */
+		wsame = (struct p_wsame*)p;
+		digest_out = wsame + 1;
+		wsame->size = cpu_to_be32(req->i.size);
+	} else
+		digest_out = p + 1;
 
 	/* our digest is still only over the payload.
 	 * TRIM does not carry any payload. */
 	if (digest_size)
-		drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
-	err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + digest_size, NULL, req->i.size);
+		drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest_out);
+	if (wsame) {
+		err =
+		    __send_command(peer_device->connection, device->vnr, sock, P_WSAME,
+				   sizeof(*wsame) + digest_size, NULL,
+				   bio_iovec(req->master_bio).bv_len);
+	} else
+		err =
+		    __send_command(peer_device->connection, device->vnr, sock, P_DATA,
+				   sizeof(*p) + digest_size, NULL, req->i.size);
 	if (!err) {
 		/* For protocol A, we have to memcpy the payload into
 		 * socket buffers, as we may complete right away
@@ -3061,7 +3138,7 @@ void drbd_md_write(struct drbd_device *device, void *b)
 	D_ASSERT(device, drbd_md_ss(device->ldev) == device->ldev->md.md_offset);
 	sector = device->ldev->md.md_offset;
 
-	if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
+	if (drbd_md_sync_page_io(device, device->ldev, sector, REQ_OP_WRITE)) {
 		/* this was a try anyways ... */
 		drbd_err(device, "meta data update failed!\n");
 		drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
@@ -3263,7 +3340,8 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
 	 * Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
 	bdev->md.md_size_sect = 8;
 
-	if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) {
+	if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset,
+				 REQ_OP_READ)) {
 		/* NOTE: can't do normal error processing here as this is
 		   called BEFORE disk is attached */
 		drbd_err(device, "Error while reading metadata.\n");
@@ -3505,7 +3583,12 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
 	struct bm_io_work *work = &device->bm_io_work;
 	int rv = -EIO;
 
-	D_ASSERT(device, atomic_read(&device->ap_bio_cnt) == 0);
+	if (work->flags != BM_LOCKED_CHANGE_ALLOWED) {
+		int cnt = atomic_read(&device->ap_bio_cnt);
+		if (cnt)
+			drbd_err(device, "FIXME: ap_bio_cnt %d, expected 0; queued for '%s'\n",
+					cnt, work->why);
+	}
 
 	if (get_ldev(device)) {
 		drbd_bm_lock(device, work->why, work->flags);
@@ -3585,18 +3668,20 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
 int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *),
 		char *why, enum bm_flag flags)
 {
+	/* Only suspend io, if some operation is supposed to be locked out */
+	const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST);
 	int rv;
 
 	D_ASSERT(device, current != first_peer_device(device)->connection->worker.task);
 
-	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+	if (do_suspend_io)
 		drbd_suspend_io(device);
 
 	drbd_bm_lock(device, why, flags);
 	rv = io_fn(device);
 	drbd_bm_unlock(device);
 
-	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+	if (do_suspend_io)
 		drbd_resume_io(device);
 
 	return rv;
@@ -3635,6 +3720,8 @@ const char *cmdname(enum drbd_packet cmd)
 	 * one PRO_VERSION */
 	static const char *cmdnames[] = {
 		[P_DATA]	        = "Data",
+		[P_WSAME]	        = "WriteSame",
+		[P_TRIM]	        = "Trim",
 		[P_DATA_REPLY]	        = "DataReply",
 		[P_RS_DATA_REPLY]	= "RSDataReply",
 		[P_BARRIER]	        = "Barrier",
@@ -3679,6 +3766,8 @@ const char *cmdname(enum drbd_packet cmd)
 		[P_CONN_ST_CHG_REPLY]	= "conn_st_chg_reply",
 		[P_RETRY_WRITE]		= "retry_write",
 		[P_PROTOCOL_UPDATE]	= "protocol_update",
+		[P_RS_THIN_REQ]         = "rs_thin_req",
+		[P_RS_DEALLOCATED]      = "rs_deallocated",
 
 		/* enum drbd_packet, but not commands - obsoleted flags:
 		 *	P_MAY_IGNORE
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 0bac9c8246bc..f35db29cac76 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -343,7 +343,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
 			 (char[20]) { }, /* address family */
 			 (char[60]) { }, /* address */
 			NULL };
-	char mb[12];
+	char mb[14];
 	char *argv[] = {usermode_helper, cmd, mb, NULL };
 	struct drbd_connection *connection = first_peer_device(device)->connection;
 	struct sib_info sib;
@@ -352,7 +352,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
 	if (current == connection->worker.task)
 		set_bit(CALLBACK_PENDING, &connection->flags);
 
-	snprintf(mb, 12, "minor-%d", device_to_minor(device));
+	snprintf(mb, 14, "minor-%d", device_to_minor(device));
 	setup_khelper_env(connection, envp);
 
 	/* The helper may take some time.
@@ -387,7 +387,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
 	return ret;
 }
 
-static int conn_khelper(struct drbd_connection *connection, char *cmd)
+enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
 {
 	char *envp[] = { "HOME=/",
 			"TERM=linux",
@@ -442,19 +442,17 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connec
 	}
 	rcu_read_unlock();
 
-	if (fp == FP_NOT_AVAIL) {
-		/* IO Suspending works on the whole resource.
-		   Do it only for one device. */
-		vnr = 0;
-		peer_device = idr_get_next(&connection->peer_devices, &vnr);
-		drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
-	}
-
 	return fp;
 }
 
+static bool resource_is_supended(struct drbd_resource *resource)
+{
+	return resource->susp || resource->susp_fen || resource->susp_nod;
+}
+
 bool conn_try_outdate_peer(struct drbd_connection *connection)
 {
+	struct drbd_resource * const resource = connection->resource;
 	unsigned int connect_cnt;
 	union drbd_state mask = { };
 	union drbd_state val = { };
@@ -462,21 +460,41 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
 	char *ex_to_string;
 	int r;
 
-	spin_lock_irq(&connection->resource->req_lock);
+	spin_lock_irq(&resource->req_lock);
 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
 		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
-		spin_unlock_irq(&connection->resource->req_lock);
+		spin_unlock_irq(&resource->req_lock);
 		return false;
 	}
 
 	connect_cnt = connection->connect_cnt;
-	spin_unlock_irq(&connection->resource->req_lock);
+	spin_unlock_irq(&resource->req_lock);
 
 	fp = highest_fencing_policy(connection);
 	switch (fp) {
 	case FP_NOT_AVAIL:
 		drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
-		goto out;
+		spin_lock_irq(&resource->req_lock);
+		if (connection->cstate < C_WF_REPORT_PARAMS) {
+			_conn_request_state(connection,
+					    (union drbd_state) { { .susp_fen = 1 } },
+					    (union drbd_state) { { .susp_fen = 0 } },
+					    CS_VERBOSE | CS_HARD | CS_DC_SUSP);
+			/* We are no longer suspended due to the fencing policy.
+			 * We may still be suspended due to the on-no-data-accessible policy.
+			 * If that was OND_IO_ERROR, fail pending requests. */
+			if (!resource_is_supended(resource))
+				_tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
+		}
+		/* Else: in case we raced with a connection handshake,
+		 * let the handshake figure out if we maybe can RESEND,
+		 * and do not resume/fail pending requests here.
+		 * Worst case is we stay suspended for now, which may be
+		 * resolved by either re-establishing the replication link, or
+		 * the next link failure, or eventually the administrator.  */
+		spin_unlock_irq(&resource->req_lock);
+		return false;
+
 	case FP_DONT_CARE:
 		return true;
 	default: ;
@@ -485,17 +503,17 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
 	r = conn_khelper(connection, "fence-peer");
 
 	switch ((r>>8) & 0xff) {
-	case 3: /* peer is inconsistent */
+	case P_INCONSISTENT: /* peer is inconsistent */
 		ex_to_string = "peer is inconsistent or worse";
 		mask.pdsk = D_MASK;
 		val.pdsk = D_INCONSISTENT;
 		break;
-	case 4: /* peer got outdated, or was already outdated */
+	case P_OUTDATED: /* peer got outdated, or was already outdated */
 		ex_to_string = "peer was fenced";
 		mask.pdsk = D_MASK;
 		val.pdsk = D_OUTDATED;
 		break;
-	case 5: /* peer was down */
+	case P_DOWN: /* peer was down */
 		if (conn_highest_disk(connection) == D_UP_TO_DATE) {
 			/* we will(have) create(d) a new UUID anyways... */
 			ex_to_string = "peer is unreachable, assumed to be dead";
@@ -505,7 +523,7 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
 			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
 		}
 		break;
-	case 6: /* Peer is primary, voluntarily outdate myself.
+	case P_PRIMARY: /* Peer is primary, voluntarily outdate myself.
 		 * This is useful when an unconnected R_SECONDARY is asked to
 		 * become R_PRIMARY, but finds the other peer being active. */
 		ex_to_string = "peer is active";
@@ -513,7 +531,9 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
 		mask.disk = D_MASK;
 		val.disk = D_OUTDATED;
 		break;
-	case 7:
+	case P_FENCING:
+		/* THINK: do we need to handle this
+		 * like case 4, or more like case 5? */
 		if (fp != FP_STONITH)
 			drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
 		ex_to_string = "peer was stonithed";
@@ -529,13 +549,11 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
 	drbd_info(connection, "fence-peer helper returned %d (%s)\n",
 		  (r>>8) & 0xff, ex_to_string);
 
- out:
-
 	/* Not using
 	   conn_request_state(connection, mask, val, CS_VERBOSE);
 	   here, because we might were able to re-establish the connection in the
 	   meantime. */
-	spin_lock_irq(&connection->resource->req_lock);
+	spin_lock_irq(&resource->req_lock);
 	if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
 		if (connection->connect_cnt != connect_cnt)
 			/* In case the connection was established and droped
@@ -544,7 +562,7 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
 		else
 			_conn_request_state(connection, mask, val, CS_VERBOSE);
 	}
-	spin_unlock_irq(&connection->resource->req_lock);
+	spin_unlock_irq(&resource->req_lock);
 
 	return conn_highest_pdsk(connection) <= D_OUTDATED;
 }
@@ -1154,51 +1172,160 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
 	return 0;
 }
 
+static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity)
+{
+	q->limits.discard_granularity = granularity;
+}
+
+static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
+{
+	/* when we introduced REQ_WRITE_SAME support, we also bumped
+	 * our maximum supported batch bio size used for discards. */
+	if (connection->agreed_features & DRBD_FF_WSAME)
+		return DRBD_MAX_BBIO_SECTORS;
+	/* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
+	return AL_EXTENT_SIZE >> 9;
+}
+
+static void decide_on_discard_support(struct drbd_device *device,
+			struct request_queue *q,
+			struct request_queue *b,
+			bool discard_zeroes_if_aligned)
+{
+	/* q = drbd device queue (device->rq_queue)
+	 * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
+	 *     or NULL if diskless
+	 */
+	struct drbd_connection *connection = first_peer_device(device)->connection;
+	bool can_do = b ? blk_queue_discard(b) : true;
+
+	if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) {
+		can_do = false;
+		drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n");
+	}
+	if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
+		can_do = false;
+		drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
+	}
+	if (can_do) {
+		/* We don't care for the granularity, really.
+		 * Stacking limits below should fix it for the local
+		 * device.  Whether or not it is a suitable granularity
+		 * on the remote device is not our problem, really. If
+		 * you care, you need to use devices with similar
+		 * topology on all peers. */
+		blk_queue_discard_granularity(q, 512);
+		q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+	} else {
+		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+		blk_queue_discard_granularity(q, 0);
+		q->limits.max_discard_sectors = 0;
+	}
+}
+
+static void fixup_discard_if_not_supported(struct request_queue *q)
+{
+	/* To avoid confusion, if this queue does not support discard, clear
+	 * max_discard_sectors, which is what lsblk -D reports to the user.
+	 * Older kernels got this wrong in "stack limits".
+	 * */
+	if (!blk_queue_discard(q)) {
+		blk_queue_max_discard_sectors(q, 0);
+		blk_queue_discard_granularity(q, 0);
+	}
+}
+
+static void decide_on_write_same_support(struct drbd_device *device,
+			struct request_queue *q,
+			struct request_queue *b, struct o_qlim *o)
+{
+	struct drbd_peer_device *peer_device = first_peer_device(device);
+	struct drbd_connection *connection = peer_device->connection;
+	bool can_do = b ? b->limits.max_write_same_sectors : true;
+
+	if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
+		can_do = false;
+		drbd_info(peer_device, "peer does not support WRITE_SAME\n");
+	}
+
+	if (o) {
+		/* logical block size; queue_logical_block_size(NULL) is 512 */
+		unsigned int peer_lbs = be32_to_cpu(o->logical_block_size);
+		unsigned int me_lbs_b = queue_logical_block_size(b);
+		unsigned int me_lbs = queue_logical_block_size(q);
+
+		if (me_lbs_b != me_lbs) {
+			drbd_warn(device,
+				"logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n",
+				me_lbs, me_lbs_b);
+			/* rather disable write same than trigger some BUG_ON later in the scsi layer. */
+			can_do = false;
+		}
+		if (me_lbs_b != peer_lbs) {
+			drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n",
+				me_lbs, peer_lbs);
+			if (can_do) {
+				drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n");
+				can_do = false;
+			}
+			me_lbs = max(me_lbs, me_lbs_b);
+			/* We cannot change the logical block size of an in-use queue.
+			 * We can only hope that access happens to be properly aligned.
+			 * If not, the peer will likely produce an IO error, and detach. */
+			if (peer_lbs > me_lbs) {
+				if (device->state.role != R_PRIMARY) {
+					blk_queue_logical_block_size(q, peer_lbs);
+					drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs);
+				} else {
+					drbd_warn(peer_device,
+						"current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n",
+						me_lbs, peer_lbs);
+				}
+			}
+		}
+		if (can_do && !o->write_same_capable) {
+			/* If we introduce an open-coded write-same loop on the receiving side,
+			 * the peer would present itself as "capable". */
+			drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n");
+			can_do = false;
+		}
+	}
+
+	blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0);
+}
+
 static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
-				   unsigned int max_bio_size)
+				   unsigned int max_bio_size, struct o_qlim *o)
 {
 	struct request_queue * const q = device->rq_queue;
 	unsigned int max_hw_sectors = max_bio_size >> 9;
 	unsigned int max_segments = 0;
 	struct request_queue *b = NULL;
+	struct disk_conf *dc;
+	bool discard_zeroes_if_aligned = true;
 
 	if (bdev) {
 		b = bdev->backing_bdev->bd_disk->queue;
 
 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 		rcu_read_lock();
-		max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
+		dc = rcu_dereference(device->ldev->disk_conf);
+		max_segments = dc->max_bio_bvecs;
+		discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
 		rcu_read_unlock();
 
 		blk_set_stacking_limits(&q->limits);
-		blk_queue_max_write_same_sectors(q, 0);
 	}
 
-	blk_queue_logical_block_size(q, 512);
 	blk_queue_max_hw_sectors(q, max_hw_sectors);
 	/* This is the workaround for "bio would need to, but cannot, be split" */
 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
 	blk_queue_segment_boundary(q, PAGE_SIZE-1);
+	decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
+	decide_on_write_same_support(device, q, b, o);
 
 	if (b) {
-		struct drbd_connection *connection = first_peer_device(device)->connection;
-
-		blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
-
-		if (blk_queue_discard(b) &&
-		    (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
-			/* We don't care, stacking below should fix it for the local device.
-			 * Whether or not it is a suitable granularity on the remote device
-			 * is not our problem, really. If you care, you need to
-			 * use devices with similar topology on all peers. */
-			q->limits.discard_granularity = 512;
-			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
-		} else {
-			blk_queue_max_discard_sectors(q, 0);
-			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
-			q->limits.discard_granularity = 0;
-		}
-
 		blk_queue_stack_limits(q, b);
 
 		if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
@@ -1208,15 +1335,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
 			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
 		}
 	}
-	/* To avoid confusion, if this queue does not support discard, clear
-	 * max_discard_sectors, which is what lsblk -D reports to the user.  */
-	if (!blk_queue_discard(q)) {
-		blk_queue_max_discard_sectors(q, 0);
-		q->limits.discard_granularity = 0;
-	}
+	fixup_discard_if_not_supported(q);
 }
 
-void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
+void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
 {
 	unsigned int now, new, local, peer;
 
@@ -1259,7 +1381,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backin
 	if (new != now)
 		drbd_info(device, "max BIO size = %u\n", new);
 
-	drbd_setup_queue_param(device, bdev, new);
+	drbd_setup_queue_param(device, bdev, new, o);
 }
 
 /* Starts the worker thread */
@@ -1348,6 +1470,43 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
 		a->disk_drain != b->disk_drain;
 }
 
+static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
+			       struct drbd_backing_dev *nbc)
+{
+	struct request_queue * const q = nbc->backing_bdev->bd_disk->queue;
+
+	if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
+		disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
+	if (disk_conf->al_extents > drbd_al_extents_max(nbc))
+		disk_conf->al_extents = drbd_al_extents_max(nbc);
+
+	if (!blk_queue_discard(q)
+	    || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) {
+		if (disk_conf->rs_discard_granularity) {
+			disk_conf->rs_discard_granularity = 0; /* disable feature */
+			drbd_info(device, "rs_discard_granularity feature disabled\n");
+		}
+	}
+
+	if (disk_conf->rs_discard_granularity) {
+		int orig_value = disk_conf->rs_discard_granularity;
+		int remainder;
+
+		if (q->limits.discard_granularity > disk_conf->rs_discard_granularity)
+			disk_conf->rs_discard_granularity = q->limits.discard_granularity;
+
+		remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity;
+		disk_conf->rs_discard_granularity += remainder;
+
+		if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9)
+			disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9;
+
+		if (disk_conf->rs_discard_granularity != orig_value)
+			drbd_info(device, "rs_discard_granularity changed to %d\n",
+				  disk_conf->rs_discard_granularity);
+	}
+}
+
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_config_context adm_ctx;
@@ -1395,10 +1554,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	if (!expect(new_disk_conf->resync_rate >= 1))
 		new_disk_conf->resync_rate = 1;
 
-	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
-		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
-	if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
-		new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
+	sanitize_disk_conf(device, new_disk_conf, device->ldev);
 
 	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
 		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
@@ -1457,6 +1613,9 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	if (write_ordering_changed(old_disk_conf, new_disk_conf))
 		drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
 
+	if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned)
+		drbd_reconsider_queue_parameters(device, device->ldev, NULL);
+
 	drbd_md_sync(device);
 
 	if (device->state.conn >= C_CONNECTED) {
@@ -1693,10 +1852,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto fail;
 
-	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
-		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
-	if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
-		new_disk_conf->al_extents = drbd_al_extents_max(nbc);
+	sanitize_disk_conf(device, new_disk_conf, nbc);
 
 	if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
 		drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
@@ -1838,7 +1994,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	device->read_cnt = 0;
 	device->writ_cnt = 0;
 
-	drbd_reconsider_max_bio_size(device, device->ldev);
+	drbd_reconsider_queue_parameters(device, device->ldev, NULL);
 
 	/* If I am currently not R_PRIMARY,
 	 * but meta data primary indicator is set,
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 6537b25db9c1..be2b93fd2c11 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -25,7 +25,7 @@
 
 #include <linux/module.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/proc_fs.h>
@@ -122,18 +122,18 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 
 	x = res/50;
 	y = 20-x;
-	seq_printf(seq, "\t[");
+	seq_puts(seq, "\t[");
 	for (i = 1; i < x; i++)
-		seq_printf(seq, "=");
-	seq_printf(seq, ">");
+		seq_putc(seq, '=');
+	seq_putc(seq, '>');
 	for (i = 0; i < y; i++)
 		seq_printf(seq, ".");
-	seq_printf(seq, "] ");
+	seq_puts(seq, "] ");
 
 	if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
-		seq_printf(seq, "verified:");
+		seq_puts(seq, "verified:");
 	else
-		seq_printf(seq, "sync'ed:");
+		seq_puts(seq, "sync'ed:");
 	seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
 
 	/* if more than a few GB, display in MB */
@@ -146,7 +146,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 			    (unsigned long) Bit2KB(rs_left),
 			    (unsigned long) Bit2KB(rs_total));
 
-	seq_printf(seq, "\n\t");
+	seq_puts(seq, "\n\t");
 
 	/* see drivers/md/md.c
 	 * We do not want to overflow, so the order of operands and
@@ -175,9 +175,9 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 		rt / 3600, (rt % 3600) / 60, rt % 60);
 
 	dbdt = Bit2KB(db/dt);
-	seq_printf(seq, " speed: ");
+	seq_puts(seq, " speed: ");
 	seq_printf_with_thousands_grouping(seq, dbdt);
-	seq_printf(seq, " (");
+	seq_puts(seq, " (");
 	/* ------------------------- ~3s average ------------------------ */
 	if (proc_details >= 1) {
 		/* this is what drbd_rs_should_slow_down() uses */
@@ -188,7 +188,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 		db = device->rs_mark_left[i] - rs_left;
 		dbdt = Bit2KB(db/dt);
 		seq_printf_with_thousands_grouping(seq, dbdt);
-		seq_printf(seq, " -- ");
+		seq_puts(seq, " -- ");
 	}
 
 	/* --------------------- long term average ---------------------- */
@@ -200,11 +200,11 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 	db = rs_total - rs_left;
 	dbdt = Bit2KB(db/dt);
 	seq_printf_with_thousands_grouping(seq, dbdt);
-	seq_printf(seq, ")");
+	seq_putc(seq, ')');
 
 	if (state.conn == C_SYNC_TARGET ||
 	    state.conn == C_VERIFY_S) {
-		seq_printf(seq, " want: ");
+		seq_puts(seq, " want: ");
 		seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
 	}
 	seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
@@ -231,7 +231,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
 		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
 			seq_printf(seq, " stop sector: %llu", stop_sector);
-		seq_printf(seq, "\n");
+		seq_putc(seq, '\n');
 	}
 }
 
@@ -276,7 +276,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 	rcu_read_lock();
 	idr_for_each_entry(&drbd_devices, device, i) {
 		if (prev_i != i - 1)
-			seq_printf(seq, "\n");
+			seq_putc(seq, '\n');
 		prev_i = i;
 
 		state = device->state;
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index ef9245363dcc..4d296800f706 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -60,6 +60,15 @@ enum drbd_packet {
 	 * which is why I chose TRIM here, to disambiguate. */
 	P_TRIM                = 0x31,
 
+	/* Only use these two if both support FF_THIN_RESYNC */
+	P_RS_THIN_REQ         = 0x32, /* Request a block for resync or reply P_RS_DEALLOCATED */
+	P_RS_DEALLOCATED      = 0x33, /* Contains only zeros on sync source node */
+
+	/* REQ_WRITE_SAME.
+	 * On a receiving side without REQ_WRITE_SAME,
+	 * we may fall back to an opencoded loop instead. */
+	P_WSAME               = 0x34,
+
 	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 	P_MAX_OPT_CMD	      = 0x101,
 
@@ -106,16 +115,20 @@ struct p_header100 {
 	u32	  pad;
 } __packed;
 
-/* these defines must not be changed without changing the protocol version */
-#define DP_HARDBARRIER	      1 /* depricated */
+/* These defines must not be changed without changing the protocol version.
+ * New defines may only be introduced together with protocol version bump or
+ * new protocol feature flags.
+ */
+#define DP_HARDBARRIER	      1 /* no longer used */
 #define DP_RW_SYNC	      2 /* equals REQ_SYNC    */
 #define DP_MAY_SET_IN_SYNC    4
 #define DP_UNPLUG             8 /* not used anymore   */
 #define DP_FUA               16 /* equals REQ_FUA     */
-#define DP_FLUSH             32 /* equals REQ_FLUSH   */
+#define DP_FLUSH             32 /* equals REQ_PREFLUSH   */
 #define DP_DISCARD           64 /* equals REQ_DISCARD */
 #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
 #define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
+#define DP_WSAME            512 /* equiv. REQ_WRITE_SAME */
 
 struct p_data {
 	u64	    sector;    /* 64 bits sector number */
@@ -129,6 +142,11 @@ struct p_trim {
 	u32	    size;	/* == bio->bi_size */
 } __packed;
 
+struct p_wsame {
+	struct p_data p_data;
+	u32           size;     /* == bio->bi_size */
+} __packed;
+
 /*
  * commands which share a struct:
  *  p_block_ack:
@@ -160,7 +178,23 @@ struct p_block_req {
  *   ReportParams
  */
 
-#define FF_TRIM      1
+/* supports TRIM/DISCARD on the "wire" protocol */
+#define DRBD_FF_TRIM 1
+
+/* Detect all-zeros during resync, and rather TRIM/UNMAP/DISCARD those blocks
+ * instead of fully allocate a supposedly thin volume on initial resync */
+#define DRBD_FF_THIN_RESYNC 2
+
+/* supports REQ_WRITE_SAME on the "wire" protocol.
+ * Note: this flag is overloaded,
+ * its presence also
+ *   - indicates support for 128 MiB "batch bios",
+ *     max discard size of 128 MiB
+ *     instead of 4M before that.
+ *   - indicates that we exchange additional settings in p_sizes
+ *     drbd_send_sizes()/receive_sizes()
+ */
+#define DRBD_FF_WSAME 4
 
 struct p_connection_features {
 	u32 protocol_min;
@@ -235,6 +269,40 @@ struct p_rs_uuid {
 	u64	    uuid;
 } __packed;
 
+/* optional queue_limits if (agreed_features & DRBD_FF_WSAME)
+ * see also struct queue_limits, as of late 2015 */
+struct o_qlim {
+	/* we don't need it yet, but we may as well communicate it now */
+	u32 physical_block_size;
+
+	/* so the original in struct queue_limits is unsigned short,
+	 * but I'd have to put in padding anyways. */
+	u32 logical_block_size;
+
+	/* One incoming bio becomes one DRBD request,
+	 * which may be translated to several bio on the receiving side.
+	 * We don't need to communicate chunk/boundary/segment ... limits.
+	 */
+
+	/* various IO hints may be useful with "diskless client" setups */
+	u32 alignment_offset;
+	u32 io_min;
+	u32 io_opt;
+
+	/* We may need to communicate integrity stuff at some point,
+	 * but let's not get ahead of ourselves. */
+
+	/* Backend discard capabilities.
+	 * Receiving side uses "blkdev_issue_discard()", no need to communicate
+	 * more specifics.  If the backend cannot do discards, the DRBD peer
+	 * may fall back to blkdev_issue_zeroout().
+	 */
+	u8 discard_enabled;
+	u8 discard_zeroes_data;
+	u8 write_same_capable;
+	u8 _pad;
+} __packed;
+
 struct p_sizes {
 	u64	    d_size;  /* size of disk */
 	u64	    u_size;  /* user requested size */
@@ -242,6 +310,9 @@ struct p_sizes {
 	u32	    max_bio_size;  /* Maximal size of a BIO */
 	u16	    queue_order_type;  /* not yet implemented in DRBD*/
 	u16	    dds_flags; /* use enum dds_flags here. */
+
+	/* optional queue_limits if (agreed_features & DRBD_FF_WSAME) */
+	struct o_qlim qlim[0];
 } __packed;
 
 struct p_state {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 050aaa1c0350..df45713dfbe8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -25,7 +25,7 @@
 
 #include <linux/module.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/sock.h>
 
 #include <linux/drbd.h>
@@ -48,7 +48,7 @@
 #include "drbd_req.h"
 #include "drbd_vli.h"
 
-#define PRO_FEATURES (FF_TRIM)
+#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME)
 
 struct packet_info {
 	enum drbd_packet cmd;
@@ -361,14 +361,17 @@ You must not have the req_lock:
  drbd_wait_ee_list_empty()
 */
 
+/* normal: payload_size == request size (bi_size)
+ * w_same: payload_size == logical_block_size
+ * trim: payload_size == 0 */
 struct drbd_peer_request *
 drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
-		    unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
+		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
 {
 	struct drbd_device *device = peer_device->device;
 	struct drbd_peer_request *peer_req;
 	struct page *page = NULL;
-	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+	unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
 
 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
 		return NULL;
@@ -380,7 +383,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 		return NULL;
 	}
 
-	if (has_payload && data_size) {
+	if (nr_pages) {
 		page = drbd_alloc_pages(peer_device, nr_pages,
 					gfpflags_allow_blocking(gfp_mask));
 		if (!page)
@@ -390,7 +393,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 	memset(peer_req, 0, sizeof(*peer_req));
 	INIT_LIST_HEAD(&peer_req->w.list);
 	drbd_clear_interval(&peer_req->i);
-	peer_req->i.size = data_size;
+	peer_req->i.size = request_size;
 	peer_req->i.sector = sector;
 	peer_req->submit_jif = jiffies;
 	peer_req->peer_device = peer_device;
@@ -1204,13 +1207,84 @@ static int drbd_recv_header(struct drbd_connection *connection, struct packet_in
 	return err;
 }
 
-static void drbd_flush(struct drbd_connection *connection)
+/* This is blkdev_issue_flush, but asynchronous.
+ * We want to submit to all component volumes in parallel,
+ * then wait for all completions.
+ */
+struct issue_flush_context {
+	atomic_t pending;
+	int error;
+	struct completion done;
+};
+struct one_flush_context {
+	struct drbd_device *device;
+	struct issue_flush_context *ctx;
+};
+
+void one_flush_endio(struct bio *bio)
 {
-	int rv;
-	struct drbd_peer_device *peer_device;
-	int vnr;
+	struct one_flush_context *octx = bio->bi_private;
+	struct drbd_device *device = octx->device;
+	struct issue_flush_context *ctx = octx->ctx;
+
+	if (bio->bi_error) {
+		ctx->error = bio->bi_error;
+		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+	}
+	kfree(octx);
+	bio_put(bio);
+
+	clear_bit(FLUSH_PENDING, &device->flags);
+	put_ldev(device);
+	kref_put(&device->kref, drbd_destroy_device);
+
+	if (atomic_dec_and_test(&ctx->pending))
+		complete(&ctx->done);
+}
+
+static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
+{
+	struct bio *bio = bio_alloc(GFP_NOIO, 0);
+	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
+	if (!bio || !octx) {
+		drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
+		/* FIXME: what else can I do now?  disconnecting or detaching
+		 * really does not help to improve the state of the world, either.
+		 */
+		kfree(octx);
+		if (bio)
+			bio_put(bio);
+
+		ctx->error = -ENOMEM;
+		put_ldev(device);
+		kref_put(&device->kref, drbd_destroy_device);
+		return;
+	}
 
+	octx->device = device;
+	octx->ctx = ctx;
+	bio->bi_bdev = device->ldev->backing_bdev;
+	bio->bi_private = octx;
+	bio->bi_end_io = one_flush_endio;
+	bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH);
+
+	device->flush_jif = jiffies;
+	set_bit(FLUSH_PENDING, &device->flags);
+	atomic_inc(&ctx->pending);
+	submit_bio(bio);
+}
+
+static void drbd_flush(struct drbd_connection *connection)
+{
 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
+		struct drbd_peer_device *peer_device;
+		struct issue_flush_context ctx;
+		int vnr;
+
+		atomic_set(&ctx.pending, 1);
+		ctx.error = 0;
+		init_completion(&ctx.done);
+
 		rcu_read_lock();
 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 			struct drbd_device *device = peer_device->device;
@@ -1220,31 +1294,24 @@ static void drbd_flush(struct drbd_connection *connection)
 			kref_get(&device->kref);
 			rcu_read_unlock();
 
-			/* Right now, we have only this one synchronous code path
-			 * for flushes between request epochs.
-			 * We may want to make those asynchronous,
-			 * or at least parallelize the flushes to the volume devices.
-			 */
-			device->flush_jif = jiffies;
-			set_bit(FLUSH_PENDING, &device->flags);
-			rv = blkdev_issue_flush(device->ldev->backing_bdev,
-					GFP_NOIO, NULL);
-			clear_bit(FLUSH_PENDING, &device->flags);
-			if (rv) {
-				drbd_info(device, "local disk flush failed with status %d\n", rv);
-				/* would rather check on EOPNOTSUPP, but that is not reliable.
-				 * don't try again for ANY return value != 0
-				 * if (rv == -EOPNOTSUPP) */
-				drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
-			}
-			put_ldev(device);
-			kref_put(&device->kref, drbd_destroy_device);
+			submit_one_flush(device, &ctx);
 
 			rcu_read_lock();
-			if (rv)
-				break;
 		}
 		rcu_read_unlock();
+
+		/* Do we want to add a timeout,
+		 * if disk-timeout is set? */
+		if (!atomic_dec_and_test(&ctx.pending))
+			wait_for_completion(&ctx.done);
+
+		if (ctx.error) {
+			/* would rather check on EOPNOTSUPP, but that is not reliable.
+			 * don't try again for ANY return value != 0
+			 * if (rv == -EOPNOTSUPP) */
+			/* Any error is already reported by bio_endio callback. */
+			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
+		}
 	}
 }
 
@@ -1379,6 +1446,120 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
 }
 
+/*
+ * We *may* ignore the discard-zeroes-data setting, if so configured.
+ *
+ * Assumption is that it "discard_zeroes_data=0" is only because the backend
+ * may ignore partial unaligned discards.
+ *
+ * LVM/DM thin as of at least
+ *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
+ *   Library version: 1.02.93-RHEL7 (2015-01-28)
+ *   Driver version:  4.29.0
+ * still behaves this way.
+ *
+ * For unaligned (wrt. alignment and granularity) or too small discards,
+ * we zero-out the initial (and/or) trailing unaligned partial chunks,
+ * but discard all the aligned full chunks.
+ *
+ * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1".
+ */
+int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard)
+{
+	struct block_device *bdev = device->ldev->backing_bdev;
+	struct request_queue *q = bdev_get_queue(bdev);
+	sector_t tmp, nr;
+	unsigned int max_discard_sectors, granularity;
+	int alignment;
+	int err = 0;
+
+	if (!discard)
+		goto zero_out;
+
+	/* Zero-sector (unknown) and one-sector granularities are the same.  */
+	granularity = max(q->limits.discard_granularity >> 9, 1U);
+	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
+
+	max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
+	max_discard_sectors -= max_discard_sectors % granularity;
+	if (unlikely(!max_discard_sectors))
+		goto zero_out;
+
+	if (nr_sectors < granularity)
+		goto zero_out;
+
+	tmp = start;
+	if (sector_div(tmp, granularity) != alignment) {
+		if (nr_sectors < 2*granularity)
+			goto zero_out;
+		/* start + gran - (start + gran - align) % gran */
+		tmp = start + granularity - alignment;
+		tmp = start + granularity - sector_div(tmp, granularity);
+
+		nr = tmp - start;
+		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
+		nr_sectors -= nr;
+		start = tmp;
+	}
+	while (nr_sectors >= granularity) {
+		nr = min_t(sector_t, nr_sectors, max_discard_sectors);
+		err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
+		nr_sectors -= nr;
+		start += nr;
+	}
+ zero_out:
+	if (nr_sectors) {
+		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0);
+	}
+	return err != 0;
+}
+
+static bool can_do_reliable_discards(struct drbd_device *device)
+{
+	struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+	struct disk_conf *dc;
+	bool can_do;
+
+	if (!blk_queue_discard(q))
+		return false;
+
+	if (q->limits.discard_zeroes_data)
+		return true;
+
+	rcu_read_lock();
+	dc = rcu_dereference(device->ldev->disk_conf);
+	can_do = dc->discard_zeroes_if_aligned;
+	rcu_read_unlock();
+	return can_do;
+}
+
+static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
+{
+	/* If the backend cannot discard, or does not guarantee
+	 * read-back zeroes in discarded ranges, we fall back to
+	 * zero-out.  Unless configuration specifically requested
+	 * otherwise. */
+	if (!can_do_reliable_discards(device))
+		peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
+
+	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
+	    peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT)))
+		peer_req->flags |= EE_WAS_ERROR;
+	drbd_endio_write_sec_final(peer_req);
+}
+
+static void drbd_issue_peer_wsame(struct drbd_device *device,
+				  struct drbd_peer_request *peer_req)
+{
+	struct block_device *bdev = device->ldev->backing_bdev;
+	sector_t s = peer_req->i.sector;
+	sector_t nr = peer_req->i.size >> 9;
+	if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
+		peer_req->flags |= EE_WAS_ERROR;
+	drbd_endio_write_sec_final(peer_req);
+}
+
+
 /**
  * drbd_submit_peer_request()
  * @device:	DRBD device.
@@ -1398,7 +1579,8 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
 /* TODO allocate from our own bio_set. */
 int drbd_submit_peer_request(struct drbd_device *device,
 			     struct drbd_peer_request *peer_req,
-			     const unsigned rw, const int fault_type)
+			     const unsigned op, const unsigned op_flags,
+			     const int fault_type)
 {
 	struct bio *bios = NULL;
 	struct bio *bio;
@@ -1409,7 +1591,13 @@ int drbd_submit_peer_request(struct drbd_device *device,
 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
 	int err = -ENOMEM;
 
-	if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
+	/* TRIM/DISCARD: for now, always use the helper function
+	 * blkdev_issue_zeroout(..., discard=true).
+	 * It's synchronous, but it does the right thing wrt. bio splitting.
+	 * Correctness first, performance later.  Next step is to code an
+	 * asynchronous variant of the same.
+	 */
+	if (peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) {
 		/* wait for all pending IO completions, before we start
 		 * zeroing things out. */
 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
@@ -1417,22 +1605,22 @@ int drbd_submit_peer_request(struct drbd_device *device,
 		 * so we can find it to present it in debugfs */
 		peer_req->submit_jif = jiffies;
 		peer_req->flags |= EE_SUBMITTED;
-		spin_lock_irq(&device->resource->req_lock);
-		list_add_tail(&peer_req->w.list, &device->active_ee);
-		spin_unlock_irq(&device->resource->req_lock);
-		if (blkdev_issue_zeroout(device->ldev->backing_bdev,
-			sector, data_size >> 9, GFP_NOIO, false))
-			peer_req->flags |= EE_WAS_ERROR;
-		drbd_endio_write_sec_final(peer_req);
+
+		/* If this was a resync request from receive_rs_deallocated(),
+		 * it is already on the sync_ee list */
+		if (list_empty(&peer_req->w.list)) {
+			spin_lock_irq(&device->resource->req_lock);
+			list_add_tail(&peer_req->w.list, &device->active_ee);
+			spin_unlock_irq(&device->resource->req_lock);
+		}
+
+		if (peer_req->flags & EE_IS_TRIM)
+			drbd_issue_peer_discard(device, peer_req);
+		else /* EE_WRITE_SAME */
+			drbd_issue_peer_wsame(device, peer_req);
 		return 0;
 	}
 
-	/* Discards don't have any payload.
-	 * But the scsi layer still expects a bio_vec it can use internally,
-	 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
-	if (peer_req->flags & EE_IS_TRIM)
-		nr_pages = 1;
-
 	/* In most cases, we will only need one bio.  But in case the lower
 	 * level restrictions happen to be different at this offset on this
 	 * side than those of the sending peer, we may need to submit the
@@ -1450,7 +1638,7 @@ next_bio:
 	/* > peer_req->i.sector, unless this is the first bio */
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_bdev = device->ldev->backing_bdev;
-	bio->bi_rw = rw;
+	bio_set_op_attrs(bio, op, op_flags);
 	bio->bi_private = peer_req;
 	bio->bi_end_io = drbd_peer_request_endio;
 
@@ -1458,11 +1646,6 @@ next_bio:
 	bios = bio;
 	++n_bios;
 
-	if (rw & REQ_DISCARD) {
-		bio->bi_iter.bi_size = data_size;
-		goto submit;
-	}
-
 	page_chain_for_each(page) {
 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
 		if (!bio_add_page(bio, page, len, 0)) {
@@ -1484,7 +1667,6 @@ next_bio:
 		--nr_pages;
 	}
 	D_ASSERT(device, data_size == 0);
-submit:
 	D_ASSERT(device, page == NULL);
 
 	atomic_set(&peer_req->pending_bios, n_bios);
@@ -1608,8 +1790,26 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
 	return 0;
 }
 
+/* quick wrapper in case payload size != request_size (write same) */
+static void drbd_csum_ee_size(struct crypto_ahash *h,
+			      struct drbd_peer_request *r, void *d,
+			      unsigned int payload_size)
+{
+	unsigned int tmp = r->i.size;
+	r->i.size = payload_size;
+	drbd_csum_ee(h, r, d);
+	r->i.size = tmp;
+}
+
 /* used from receive_RSDataReply (recv_resync_read)
- * and from receive_Data */
+ * and from receive_Data.
+ * data_size: actual payload ("data in")
+ * 	for normal writes that is bi_size.
+ * 	for discards, that is zero.
+ * 	for write same, it is logical_block_size.
+ * both trim and write same have the bi_size ("data len to be affected")
+ * as extra argument in the packet header.
+ */
 static struct drbd_peer_request *
 read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 	      struct packet_info *pi) __must_hold(local)
@@ -1624,6 +1824,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 	void *dig_vv = peer_device->connection->int_dig_vv;
 	unsigned long *data;
 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
+	struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
 
 	digest_size = 0;
 	if (!trim && peer_device->connection->peer_integrity_tfm) {
@@ -1638,38 +1839,60 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 		data_size -= digest_size;
 	}
 
+	/* assume request_size == data_size, but special case trim and wsame. */
+	ds = data_size;
 	if (trim) {
-		D_ASSERT(peer_device, data_size == 0);
-		data_size = be32_to_cpu(trim->size);
+		if (!expect(data_size == 0))
+			return NULL;
+		ds = be32_to_cpu(trim->size);
+	} else if (wsame) {
+		if (data_size != queue_logical_block_size(device->rq_queue)) {
+			drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
+				data_size, queue_logical_block_size(device->rq_queue));
+			return NULL;
+		}
+		if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
+			drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
+				data_size, bdev_logical_block_size(device->ldev->backing_bdev));
+			return NULL;
+		}
+		ds = be32_to_cpu(wsame->size);
 	}
 
-	if (!expect(IS_ALIGNED(data_size, 512)))
+	if (!expect(IS_ALIGNED(ds, 512)))
 		return NULL;
-	/* prepare for larger trim requests. */
-	if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
+	if (trim || wsame) {
+		if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
+			return NULL;
+	} else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
 		return NULL;
 
 	/* even though we trust out peer,
 	 * we sometimes have to double check. */
-	if (sector + (data_size>>9) > capacity) {
+	if (sector + (ds>>9) > capacity) {
 		drbd_err(device, "request from peer beyond end of local disk: "
 			"capacity: %llus < sector: %llus + size: %u\n",
 			(unsigned long long)capacity,
-			(unsigned long long)sector, data_size);
+			(unsigned long long)sector, ds);
 		return NULL;
 	}
 
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
-	peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
+	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
 	if (!peer_req)
 		return NULL;
 
 	peer_req->flags |= EE_WRITE;
-	if (trim)
+	if (trim) {
+		peer_req->flags |= EE_IS_TRIM;
 		return peer_req;
+	}
+	if (wsame)
+		peer_req->flags |= EE_WRITE_SAME;
 
+	/* receive payload size bytes into page chain */
 	ds = data_size;
 	page = peer_req->pages;
 	page_chain_for_each(page) {
@@ -1689,7 +1912,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 	}
 
 	if (digest_size) {
-		drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
+		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
 		if (memcmp(dig_in, dig_vv, digest_size)) {
 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
 				(unsigned long long)sector, data_size);
@@ -1830,7 +2053,8 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
 	spin_unlock_irq(&device->resource->req_lock);
 
 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
-	if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
+	if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
+				     DRBD_FAULT_RS_WR) == 0)
 		return 0;
 
 	/* don't care for the reason here */
@@ -2065,13 +2289,13 @@ static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
 static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
 {
 	struct drbd_peer_request *rs_req;
-	bool rv = 0;
+	bool rv = false;
 
 	spin_lock_irq(&device->resource->req_lock);
 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
 		if (overlaps(peer_req->i.sector, peer_req->i.size,
 			     rs_req->i.sector, rs_req->i.size)) {
-			rv = 1;
+			rv = true;
 			break;
 		}
 	}
@@ -2152,12 +2376,19 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
 /* see also bio_flags_to_wire()
  * DRBD_REQ_*, because we need to semantically map the flags to data packet
  * flags and back. We may replicate to other kernel versions. */
-static unsigned long wire_flags_to_bio(u32 dpf)
+static unsigned long wire_flags_to_bio_flags(u32 dpf)
 {
 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
 		(dpf & DP_FUA ? REQ_FUA : 0) |
-		(dpf & DP_FLUSH ? REQ_FLUSH : 0) |
-		(dpf & DP_DISCARD ? REQ_DISCARD : 0);
+		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
+}
+
+static unsigned long wire_flags_to_bio_op(u32 dpf)
+{
+	if (dpf & DP_DISCARD)
+		return REQ_OP_DISCARD;
+	else
+		return REQ_OP_WRITE;
 }
 
 static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
@@ -2303,7 +2534,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	struct drbd_peer_request *peer_req;
 	struct p_data *p = pi->data;
 	u32 peer_seq = be32_to_cpu(p->seq_num);
-	int rw = WRITE;
+	int op, op_flags;
 	u32 dp_flags;
 	int err, tp;
 
@@ -2342,14 +2573,11 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	peer_req->flags |= EE_APPLICATION;
 
 	dp_flags = be32_to_cpu(p->dp_flags);
-	rw |= wire_flags_to_bio(dp_flags);
+	op = wire_flags_to_bio_op(dp_flags);
+	op_flags = wire_flags_to_bio_flags(dp_flags);
 	if (pi->cmd == P_TRIM) {
-		struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
-		peer_req->flags |= EE_IS_TRIM;
-		if (!blk_queue_discard(q))
-			peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
 		D_ASSERT(peer_device, peer_req->i.size > 0);
-		D_ASSERT(peer_device, rw & REQ_DISCARD);
+		D_ASSERT(peer_device, op == REQ_OP_DISCARD);
 		D_ASSERT(peer_device, peer_req->pages == NULL);
 	} else if (peer_req->pages == NULL) {
 		D_ASSERT(device, peer_req->i.size == 0);
@@ -2414,11 +2642,11 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 		update_peer_seq(peer_device, peer_seq);
 		spin_lock_irq(&device->resource->req_lock);
 	}
-	/* if we use the zeroout fallback code, we process synchronously
-	 * and we wait for all pending requests, respectively wait for
+	/* TRIM and WRITE_SAME are processed synchronously,
+	 * we wait for all pending requests, respectively wait for
 	 * active_ee to become empty in drbd_submit_peer_request();
 	 * better not add ourselves here. */
-	if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
+	if ((peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) == 0)
 		list_add_tail(&peer_req->w.list, &device->active_ee);
 	spin_unlock_irq(&device->resource->req_lock);
 
@@ -2433,7 +2661,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
 	}
 
-	err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
+	err = drbd_submit_peer_request(device, peer_req, op, op_flags,
+				       DRBD_FAULT_DT_WR);
 	if (!err)
 		return 0;
 
@@ -2449,7 +2678,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	}
 
 out_interrupted:
-	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
+	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
 	put_ldev(device);
 	drbd_free_peer_req(device, peer_req);
 	return err;
@@ -2574,6 +2803,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 		case P_DATA_REQUEST:
 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
 			break;
+		case P_RS_THIN_REQ:
 		case P_RS_DATA_REQUEST:
 		case P_CSUM_RS_REQUEST:
 		case P_OV_REQUEST:
@@ -2599,7 +2829,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
-			true /* has real payload */, GFP_NOIO);
+			size, GFP_NOIO);
 	if (!peer_req) {
 		put_ldev(device);
 		return -ENOMEM;
@@ -2613,6 +2843,12 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 		peer_req->flags |= EE_APPLICATION;
 		goto submit;
 
+	case P_RS_THIN_REQ:
+		/* If at some point in the future we have a smart way to
+		   find out if this data block is completely deallocated,
+		   then we would do something smarter here than reading
+		   the block... */
+		peer_req->flags |= EE_RS_THIN_REQ;
 	case P_RS_DATA_REQUEST:
 		peer_req->w.cb = w_e_end_rsdata_req;
 		fault_type = DRBD_FAULT_RS_RD;
@@ -2723,7 +2959,8 @@ submit_for_resync:
 submit:
 	update_receiver_timing_details(connection, drbd_submit_peer_request);
 	inc_unacked(device);
-	if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
+	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
+				     fault_type) == 0)
 		return 0;
 
 	/* don't care for the reason here */
@@ -2957,7 +3194,8 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
 -1091   requires proto 91
 -1096   requires proto 96
  */
-static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
+
+static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
 {
 	struct drbd_peer_device *const peer_device = first_peer_device(device);
 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
@@ -3037,8 +3275,39 @@ static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __m
 		 * next bit (weight 2) is set when peer was primary */
 		*rule_nr = 40;
 
+		/* Neither has the "crashed primary" flag set,
+		 * only a replication link hickup. */
+		if (rct == 0)
+			return 0;
+
+		/* Current UUID equal and no bitmap uuid; does not necessarily
+		 * mean this was a "simultaneous hard crash", maybe IO was
+		 * frozen, so no UUID-bump happened.
+		 * This is a protocol change, overload DRBD_FF_WSAME as flag
+		 * for "new-enough" peer DRBD version. */
+		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
+			*rule_nr = 41;
+			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
+				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
+				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
+			}
+			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
+				/* At least one has the "crashed primary" bit set,
+				 * both are primary now, but neither has rotated its UUIDs?
+				 * "Can not happen." */
+				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
+				return -100;
+			}
+			if (device->state.role == R_PRIMARY)
+				return 1;
+			return -1;
+		}
+
+		/* Both are secondary.
+		 * Really looks like recovery from simultaneous hard crash.
+		 * Check which had been primary before, and arbitrate. */
 		switch (rct) {
-		case 0: /* !self_pri && !peer_pri */ return 0;
+		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
 		case 1: /*  self_pri && !peer_pri */ return 1;
 		case 2: /* !self_pri &&  peer_pri */ return -1;
 		case 3: /*  self_pri &&  peer_pri */
@@ -3165,7 +3434,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
 	drbd_uuid_dump(device, "peer", device->p_uuid,
 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
 
-	hg = drbd_uuid_compare(device, &rule_nr);
+	hg = drbd_uuid_compare(device, peer_role, &rule_nr);
 	spin_unlock_irq(&device->ldev->md.uuid_lock);
 
 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
@@ -3174,6 +3443,15 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
 		drbd_alert(device, "Unrelated data, aborting!\n");
 		return C_MASK;
 	}
+	if (hg < -0x10000) {
+		int proto, fflags;
+		hg = -hg;
+		proto = hg & 0xff;
+		fflags = (hg >> 8) & 0xff;
+		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
+					proto, fflags);
+		return C_MASK;
+	}
 	if (hg < -1000) {
 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
 		return C_MASK;
@@ -3403,7 +3681,8 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
 		 */
 
 		peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
-		if (!peer_integrity_tfm) {
+		if (IS_ERR(peer_integrity_tfm)) {
+			peer_integrity_tfm = NULL;
 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
 				 integrity_alg);
 			goto disconnect;
@@ -3754,6 +4033,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 	struct drbd_peer_device *peer_device;
 	struct drbd_device *device;
 	struct p_sizes *p = pi->data;
+	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
 	enum determine_dev_size dd = DS_UNCHANGED;
 	sector_t p_size, p_usize, p_csize, my_usize;
 	int ldsc = 0; /* local disk size changed */
@@ -3773,6 +4053,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 	device->p_size = p_size;
 
 	if (get_ldev(device)) {
+		sector_t new_size, cur_size;
 		rcu_read_lock();
 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
 		rcu_read_unlock();
@@ -3789,11 +4070,13 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 
 		/* Never shrink a device with usable data during connect.
 		   But allow online shrinking if we are connected. */
-		if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
-		    drbd_get_capacity(device->this_bdev) &&
+		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
+		cur_size = drbd_get_capacity(device->this_bdev);
+		if (new_size < cur_size &&
 		    device->state.disk >= D_OUTDATED &&
 		    device->state.conn < C_CONNECTED) {
-			drbd_err(device, "The peer's disk size is too small!\n");
+			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
+					(unsigned long long)new_size, (unsigned long long)cur_size);
 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
 			put_ldev(device);
 			return -EIO;
@@ -3827,14 +4110,14 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 	}
 
 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
-	/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
+	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
-	   drbd_reconsider_max_bio_size(), we can be sure that after
+	   drbd_reconsider_queue_parameters(), we can be sure that after
 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(device)) {
-		drbd_reconsider_max_bio_size(device, device->ldev);
+		drbd_reconsider_queue_parameters(device, device->ldev, o);
 		dd = drbd_determine_dev_size(device, ddsf, NULL);
 		put_ldev(device);
 		if (dd == DS_ERROR)
@@ -3854,7 +4137,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 		 * However, if he sends a zero current size,
 		 * take his (user-capped or) backing disk size anyways.
 		 */
-		drbd_reconsider_max_bio_size(device, NULL);
+		drbd_reconsider_queue_parameters(device, NULL, o);
 		drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
 	}
 
@@ -4587,9 +4870,75 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet
 	return 0;
 }
 
+static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
+{
+	struct drbd_peer_device *peer_device;
+	struct p_block_desc *p = pi->data;
+	struct drbd_device *device;
+	sector_t sector;
+	int size, err = 0;
+
+	peer_device = conn_peer_device(connection, pi->vnr);
+	if (!peer_device)
+		return -EIO;
+	device = peer_device->device;
+
+	sector = be64_to_cpu(p->sector);
+	size = be32_to_cpu(p->blksize);
+
+	dec_rs_pending(device);
+
+	if (get_ldev(device)) {
+		struct drbd_peer_request *peer_req;
+		const int op = REQ_OP_DISCARD;
+
+		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
+					       size, 0, GFP_NOIO);
+		if (!peer_req) {
+			put_ldev(device);
+			return -ENOMEM;
+		}
+
+		peer_req->w.cb = e_end_resync_block;
+		peer_req->submit_jif = jiffies;
+		peer_req->flags |= EE_IS_TRIM;
+
+		spin_lock_irq(&device->resource->req_lock);
+		list_add_tail(&peer_req->w.list, &device->sync_ee);
+		spin_unlock_irq(&device->resource->req_lock);
+
+		atomic_add(pi->size >> 9, &device->rs_sect_ev);
+		err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
+
+		if (err) {
+			spin_lock_irq(&device->resource->req_lock);
+			list_del(&peer_req->w.list);
+			spin_unlock_irq(&device->resource->req_lock);
+
+			drbd_free_peer_req(device, peer_req);
+			put_ldev(device);
+			err = 0;
+			goto fail;
+		}
+
+		inc_unacked(device);
+
+		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
+		   as well as drbd_rs_complete_io() */
+	} else {
+	fail:
+		drbd_rs_complete_io(device, sector);
+		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
+	}
+
+	atomic_add(size >> 9, &device->rs_sect_in);
+
+	return err;
+}
+
 struct data_cmd {
 	int expect_payload;
-	size_t pkt_size;
+	unsigned int pkt_size;
 	int (*fn)(struct drbd_connection *, struct packet_info *);
 };
 
@@ -4614,11 +4963,14 @@ static struct data_cmd drbd_cmd_handler[] = {
 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
+	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
+	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
+	[P_WSAME]	    = { 1, sizeof(struct p_wsame), receive_Data },
 };
 
 static void drbdd(struct drbd_connection *connection)
@@ -4628,7 +4980,7 @@ static void drbdd(struct drbd_connection *connection)
 	int err;
 
 	while (get_t_state(&connection->receiver) == RUNNING) {
-		struct data_cmd *cmd;
+		struct data_cmd const *cmd;
 
 		drbd_thread_current_set_cpu(&connection->receiver);
 		update_receiver_timing_details(connection, drbd_recv_header);
@@ -4643,11 +4995,18 @@ static void drbdd(struct drbd_connection *connection)
 		}
 
 		shs = cmd->pkt_size;
+		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
+			shs += sizeof(struct o_qlim);
 		if (pi.size > shs && !cmd->expect_payload) {
 			drbd_err(connection, "No payload expected %s l:%d\n",
 				 cmdname(pi.cmd), pi.size);
 			goto err_out;
 		}
+		if (pi.size < shs) {
+			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
+				 cmdname(pi.cmd), (int)shs, pi.size);
+			goto err_out;
+		}
 
 		if (shs) {
 			update_receiver_timing_details(connection, drbd_recv_all_warn);
@@ -4783,9 +5142,11 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device)
 
 	drbd_md_sync(device);
 
-	/* serialize with bitmap writeout triggered by the state change,
-	 * if any. */
-	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
+	if (get_ldev(device)) {
+		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
+				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
+		put_ldev(device);
+	}
 
 	/* tcp_close and release of sendpage pages can be deferred.  I don't
 	 * want to use SO_LINGER, because apparently it can be deferred for
@@ -4892,8 +5253,12 @@ static int drbd_do_features(struct drbd_connection *connection)
 	drbd_info(connection, "Handshake successful: "
 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
 
-	drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
-		  connection->agreed_features & FF_TRIM ? " " : " not ");
+	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s.\n",
+		  connection->agreed_features,
+		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
+		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
+		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" :
+		  connection->agreed_features ? "" : " none");
 
 	return 1;
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 2255dcfebd2b..66b8e4bb74d8 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -47,8 +47,7 @@ static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *r
 			    &device->vdisk->part0, req->start_jif);
 }
 
-static struct drbd_request *drbd_req_new(struct drbd_device *device,
-					       struct bio *bio_src)
+static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src)
 {
 	struct drbd_request *req;
 
@@ -58,10 +57,12 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
 	memset(req, 0, sizeof(*req));
 
 	drbd_req_make_private_bio(req, bio_src);
-	req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
-	req->device   = device;
-	req->master_bio  = bio_src;
-	req->epoch       = 0;
+	req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
+		      | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
+		      | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
+	req->device = device;
+	req->master_bio = bio_src;
+	req->epoch = 0;
 
 	drbd_clear_interval(&req->i);
 	req->i.sector     = bio_src->bi_iter.bi_sector;
@@ -218,7 +219,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 {
 	const unsigned s = req->rq_state;
 	struct drbd_device *device = req->device;
-	int rw;
 	int error, ok;
 
 	/* we must not complete the master bio, while it is
@@ -242,8 +242,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 		return;
 	}
 
-	rw = bio_rw(req->master_bio);
-
 	/*
 	 * figure out whether to report success or failure.
 	 *
@@ -267,7 +265,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 	 * epoch number.  If they match, increase the current_tle_nr,
 	 * and reset the transfer log epoch write_cnt.
 	 */
-	if (rw == WRITE &&
+	if (op_is_write(bio_op(req->master_bio)) &&
 	    req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr))
 		start_new_tl_epoch(first_peer_device(device)->connection);
 
@@ -284,11 +282,14 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 	 * because no path was available, in which case
 	 * it was not even added to the transfer_log.
 	 *
-	 * READA may fail, and will not be retried.
+	 * read-ahead may fail, and will not be retried.
 	 *
 	 * WRITE should have used all available paths already.
 	 */
-	if (!ok && rw == READ && !list_empty(&req->tl_requests))
+	if (!ok &&
+	    bio_op(req->master_bio) == REQ_OP_READ &&
+	    !(req->master_bio->bi_rw & REQ_RAHEAD) &&
+	    !list_empty(&req->tl_requests))
 		req->rq_state |= RQ_POSTPONED;
 
 	if (!(req->rq_state & RQ_POSTPONED)) {
@@ -644,7 +645,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		__drbd_chk_io_error(device, DRBD_READ_ERROR);
 		/* fall through. */
 	case READ_AHEAD_COMPLETED_WITH_ERROR:
-		/* it is legal to fail READA, no __drbd_chk_io_error in that case. */
+		/* it is legal to fail read-ahead, no __drbd_chk_io_error in that case. */
 		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
 		break;
 
@@ -656,7 +657,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		break;
 
 	case QUEUE_FOR_NET_READ:
-		/* READ or READA, and
+		/* READ, and
 		 * no local disk,
 		 * or target area marked as invalid,
 		 * or just got an io-error. */
@@ -977,16 +978,20 @@ static void complete_conflicting_writes(struct drbd_request *req)
 	sector_t sector = req->i.sector;
 	int size = req->i.size;
 
-	i = drbd_find_overlap(&device->write_requests, sector, size);
-	if (!i)
-		return;
-
 	for (;;) {
-		prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
-		i = drbd_find_overlap(&device->write_requests, sector, size);
-		if (!i)
+		drbd_for_each_overlap(i, &device->write_requests, sector, size) {
+			/* Ignore, if already completed to upper layers. */
+			if (i->completed)
+				continue;
+			/* Handle the first found overlap.  After the schedule
+			 * we have to restart the tree walk. */
+			break;
+		}
+		if (!i)	/* if any */
 			break;
+
 		/* Indicate to wake up device->misc_wait on progress.  */
+		prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
 		i->waiting = true;
 		spin_unlock_irq(&device->resource->req_lock);
 		schedule();
@@ -995,7 +1000,7 @@ static void complete_conflicting_writes(struct drbd_request *req)
 	finish_wait(&device->misc_wait, &wait);
 }
 
-/* called within req_lock and rcu_read_lock() */
+/* called within req_lock */
 static void maybe_pull_ahead(struct drbd_device *device)
 {
 	struct drbd_connection *connection = first_peer_device(device)->connection;
@@ -1132,7 +1137,7 @@ static int drbd_process_write_request(struct drbd_request *req)
 	 * replicating, in which case there is no point. */
 	if (unlikely(req->i.size == 0)) {
 		/* The only size==0 bios we expect are empty flushes. */
-		D_ASSERT(device, req->master_bio->bi_rw & REQ_FLUSH);
+		D_ASSERT(device, req->master_bio->bi_rw & REQ_PREFLUSH);
 		if (remote)
 			_req_mod(req, QUEUE_AS_DRBD_BARRIER);
 		return remote;
@@ -1152,12 +1157,29 @@ static int drbd_process_write_request(struct drbd_request *req)
 	return remote;
 }
 
+static void drbd_process_discard_req(struct drbd_request *req)
+{
+	int err = drbd_issue_discard_or_zero_out(req->device,
+				req->i.sector, req->i.size >> 9, true);
+
+	if (err)
+		req->private_bio->bi_error = -EIO;
+	bio_endio(req->private_bio);
+}
+
 static void
 drbd_submit_req_private_bio(struct drbd_request *req)
 {
 	struct drbd_device *device = req->device;
 	struct bio *bio = req->private_bio;
-	const int rw = bio_rw(bio);
+	unsigned int type;
+
+	if (bio_op(bio) != REQ_OP_READ)
+		type = DRBD_FAULT_DT_WR;
+	else if (bio->bi_rw & REQ_RAHEAD)
+		type = DRBD_FAULT_DT_RA;
+	else
+		type = DRBD_FAULT_DT_RD;
 
 	bio->bi_bdev = device->ldev->backing_bdev;
 
@@ -1167,11 +1189,10 @@ drbd_submit_req_private_bio(struct drbd_request *req)
 	 * stable storage, and this is a WRITE, we may not even submit
 	 * this bio. */
 	if (get_ldev(device)) {
-		if (drbd_insert_fault(device,
-				      rw == WRITE ? DRBD_FAULT_DT_WR
-				    : rw == READ  ? DRBD_FAULT_DT_RD
-				    :               DRBD_FAULT_DT_RA))
+		if (drbd_insert_fault(device, type))
 			bio_io_error(bio);
+		else if (bio_op(bio) == REQ_OP_DISCARD)
+			drbd_process_discard_req(req);
 		else
 			generic_make_request(bio);
 		put_ldev(device);
@@ -1223,24 +1244,45 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 	/* Update disk stats */
 	_drbd_start_io_acct(device, req);
 
+	/* process discards always from our submitter thread */
+	if (bio_op(bio) & REQ_OP_DISCARD)
+		goto queue_for_submitter_thread;
+
 	if (rw == WRITE && req->private_bio && req->i.size
 	&& !test_bit(AL_SUSPENDED, &device->flags)) {
-		if (!drbd_al_begin_io_fastpath(device, &req->i)) {
-			atomic_inc(&device->ap_actlog_cnt);
-			drbd_queue_write(device, req);
-			return NULL;
-		}
+		if (!drbd_al_begin_io_fastpath(device, &req->i))
+			goto queue_for_submitter_thread;
 		req->rq_state |= RQ_IN_ACT_LOG;
 		req->in_actlog_jif = jiffies;
 	}
-
 	return req;
+
+ queue_for_submitter_thread:
+	atomic_inc(&device->ap_actlog_cnt);
+	drbd_queue_write(device, req);
+	return NULL;
+}
+
+/* Require at least one path to current data.
+ * We don't want to allow writes on C_STANDALONE D_INCONSISTENT:
+ * We would not allow to read what was written,
+ * we would not have bumped the data generation uuids,
+ * we would cause data divergence for all the wrong reasons.
+ *
+ * If we don't see at least one D_UP_TO_DATE, we will fail this request,
+ * which either returns EIO, or, if OND_SUSPEND_IO is set, suspends IO,
+ * and queues for retry later.
+ */
+static bool may_do_writes(struct drbd_device *device)
+{
+	const union drbd_dev_state s = device->state;
+	return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE;
 }
 
 static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
 {
 	struct drbd_resource *resource = device->resource;
-	const int rw = bio_rw(req->master_bio);
+	const int rw = bio_data_dir(req->master_bio);
 	struct bio_and_error m = { NULL, };
 	bool no_remote = false;
 	bool submit_private_bio = false;
@@ -1270,7 +1312,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
 		goto out;
 	}
 
-	/* We fail READ/READA early, if we can not serve it.
+	/* We fail READ early, if we can not serve it.
 	 * We must do this before req is registered on any lists.
 	 * Otherwise, drbd_req_complete() will queue failed READ for retry. */
 	if (rw != WRITE) {
@@ -1291,6 +1333,12 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
 	}
 
 	if (rw == WRITE) {
+		if (req->private_bio && !may_do_writes(device)) {
+			bio_put(req->private_bio);
+			req->private_bio = NULL;
+			put_ldev(device);
+			goto nodata;
+		}
 		if (!drbd_process_write_request(req))
 			no_remote = true;
 	} else {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index bb2ef78165e5..eb49e7f2da91 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -206,6 +206,8 @@ enum drbd_req_state_bits {
 
 	/* Set when this is a write, clear for a read */
 	__RQ_WRITE,
+	__RQ_WSAME,
+	__RQ_UNMAP,
 
 	/* Should call drbd_al_complete_io() for this request... */
 	__RQ_IN_ACT_LOG,
@@ -241,10 +243,11 @@ enum drbd_req_state_bits {
 #define RQ_NET_OK          (1UL << __RQ_NET_OK)
 #define RQ_NET_SIS         (1UL << __RQ_NET_SIS)
 
-/* 0x1f8 */
 #define RQ_NET_MASK        (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
 
 #define RQ_WRITE           (1UL << __RQ_WRITE)
+#define RQ_WSAME           (1UL << __RQ_WSAME)
+#define RQ_UNMAP           (1UL << __RQ_UNMAP)
 #define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
 #define RQ_POSTPONED	   (1UL << __RQ_POSTPONED)
 #define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 5a7ef7873b67..eea0c4aec978 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -814,7 +814,7 @@ is_valid_state(struct drbd_device *device, union drbd_state ns)
 	}
 
 	if (rv <= 0)
-		/* already found a reason to abort */;
+		goto out; /* already found a reason to abort */
 	else if (ns.role == R_SECONDARY && device->open_cnt)
 		rv = SS_DEVICE_IN_USE;
 
@@ -862,6 +862,7 @@ is_valid_state(struct drbd_device *device, union drbd_state ns)
 	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
 		rv = SS_CONNECTED_OUTDATES;
 
+out:
 	rcu_read_unlock();
 
 	return rv;
@@ -906,6 +907,15 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_c
 	      (ns.conn >= C_CONNECTED && os.conn == C_WF_REPORT_PARAMS)))
 		rv = SS_IN_TRANSIENT_STATE;
 
+	/* Do not promote during resync handshake triggered by "force primary".
+	 * This is a hack. It should really be rejected by the peer during the
+	 * cluster wide state change request. */
+	if (os.role != R_PRIMARY && ns.role == R_PRIMARY
+		&& ns.pdsk == D_UP_TO_DATE
+		&& ns.disk != D_UP_TO_DATE && ns.disk != D_DISKLESS
+		&& (ns.conn <= C_WF_SYNC_UUID || ns.conn != os.conn))
+			rv = SS_IN_TRANSIENT_STATE;
+
 	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
 		rv = SS_NEED_CONNECTION;
 
@@ -1628,6 +1638,26 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
 #undef REMEMBER_STATE_CHANGE
 }
 
+/* takes old and new peer disk state */
+static bool lost_contact_to_peer_data(enum drbd_disk_state os, enum drbd_disk_state ns)
+{
+	if ((os >= D_INCONSISTENT && os != D_UNKNOWN && os != D_OUTDATED)
+	&&  (ns < D_INCONSISTENT || ns == D_UNKNOWN || ns == D_OUTDATED))
+		return true;
+
+	/* Scenario, starting with normal operation
+	 * Connected Primary/Secondary UpToDate/UpToDate
+	 * NetworkFailure Primary/Unknown UpToDate/DUnknown (frozen)
+	 * ...
+	 * Connected Primary/Secondary UpToDate/Diskless (resumed; needs to bump uuid!)
+	 */
+	if (os == D_UNKNOWN
+	&&  (ns == D_DISKLESS || ns == D_FAILED || ns == D_OUTDATED))
+		return true;
+
+	return false;
+}
+
 /**
  * after_state_ch() - Perform after state change actions that may sleep
  * @device:	DRBD device.
@@ -1675,7 +1705,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 			what = RESEND;
 
 		if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
-		    conn_lowest_disk(connection) > D_NEGOTIATING)
+		    conn_lowest_disk(connection) == D_UP_TO_DATE)
 			what = RESTART_FROZEN_DISK_IO;
 
 		if (resource->susp_nod && what != NOTHING) {
@@ -1699,6 +1729,13 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 			idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
 				clear_bit(NEW_CUR_UUID, &peer_device->device->flags);
 			rcu_read_unlock();
+
+			/* We should actively create a new uuid, _before_
+			 * we resume/resent, if the peer is diskless
+			 * (recovery from a multiple error scenario).
+			 * Currently, this happens with a slight delay
+			 * below when checking lost_contact_to_peer_data() ...
+			 */
 			_tl_restart(connection, RESEND);
 			_conn_request_state(connection,
 					    (union drbd_state) { { .susp_fen = 1 } },
@@ -1742,12 +1779,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 				BM_LOCKED_TEST_ALLOWED);
 
 	/* Lost contact to peer's copy of the data */
-	if ((os.pdsk >= D_INCONSISTENT &&
-	     os.pdsk != D_UNKNOWN &&
-	     os.pdsk != D_OUTDATED)
-	&&  (ns.pdsk < D_INCONSISTENT ||
-	     ns.pdsk == D_UNKNOWN ||
-	     ns.pdsk == D_OUTDATED)) {
+	if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) {
 		if (get_ldev(device)) {
 			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
 			    device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
@@ -1934,12 +1966,17 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 
 	/* This triggers bitmap writeout of potentially still unwritten pages
 	 * if the resync finished cleanly, or aborted because of peer disk
-	 * failure, or because of connection loss.
+	 * failure, or on transition from resync back to AHEAD/BEHIND.
+	 *
+	 * Connection loss is handled in drbd_disconnected() by the receiver.
+	 *
 	 * For resync aborted because of local disk failure, we cannot do
 	 * any bitmap writeout anymore.
+	 *
 	 * No harm done if some bits change during this phase.
 	 */
-	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(device)) {
+	if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) &&
+	    (ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) {
 		drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL,
 			"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
 		put_ldev(device);
@@ -2160,9 +2197,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
 			ns.disk = os.disk;
 
 		rv = _drbd_set_state(device, ns, flags, NULL);
-		if (rv < SS_SUCCESS)
-			BUG();
-
+		BUG_ON(rv < SS_SUCCESS);
 		ns.i = device->state.i;
 		ns_max.role = max_role(ns.role, ns_max.role);
 		ns_max.peer = max_role(ns.peer, ns_max.peer);
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index bd989536f888..6c9d5d4a8a75 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -140,7 +140,7 @@ extern void drbd_resume_al(struct drbd_device *device);
 extern bool conn_all_vols_unconf(struct drbd_connection *connection);
 
 /**
- * drbd_request_state() - Reqest a state change
+ * drbd_request_state() - Request a state change
  * @device:	DRBD device.
  * @mask:	mask of state bits to change.
  * @val:	value of new state bits.
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
index 80b0f63c7075..0eeab14776e9 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -26,7 +26,7 @@
 #include <linux/drbd.h>
 #include "drbd_strings.h"
 
-static const char *drbd_conn_s_names[] = {
+static const char * const drbd_conn_s_names[] = {
 	[C_STANDALONE]       = "StandAlone",
 	[C_DISCONNECTING]    = "Disconnecting",
 	[C_UNCONNECTED]      = "Unconnected",
@@ -53,13 +53,13 @@ static const char *drbd_conn_s_names[] = {
 	[C_BEHIND]           = "Behind",
 };
 
-static const char *drbd_role_s_names[] = {
+static const char * const drbd_role_s_names[] = {
 	[R_PRIMARY]   = "Primary",
 	[R_SECONDARY] = "Secondary",
 	[R_UNKNOWN]   = "Unknown"
 };
 
-static const char *drbd_disk_s_names[] = {
+static const char * const drbd_disk_s_names[] = {
 	[D_DISKLESS]     = "Diskless",
 	[D_ATTACHING]    = "Attaching",
 	[D_FAILED]       = "Failed",
@@ -71,7 +71,7 @@ static const char *drbd_disk_s_names[] = {
 	[D_UP_TO_DATE]   = "UpToDate",
 };
 
-static const char *drbd_state_sw_errors[] = {
+static const char * const drbd_state_sw_errors[] = {
 	[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
 	[-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data",
 	[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 4d87499f0d54..35dbb3dca47e 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -173,8 +173,8 @@ void drbd_peer_request_endio(struct bio *bio)
 {
 	struct drbd_peer_request *peer_req = bio->bi_private;
 	struct drbd_device *device = peer_req->peer_device->device;
-	int is_write = bio_data_dir(bio) == WRITE;
-	int is_discard = !!(bio->bi_rw & REQ_DISCARD);
+	bool is_write = bio_data_dir(bio) == WRITE;
+	bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD);
 
 	if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
 		drbd_warn(device, "%s: error=%d s=%llus\n",
@@ -248,18 +248,26 @@ void drbd_request_endio(struct bio *bio)
 
 	/* to avoid recursion in __req_mod */
 	if (unlikely(bio->bi_error)) {
-		if (bio->bi_rw & REQ_DISCARD)
-			what = (bio->bi_error == -EOPNOTSUPP)
-				? DISCARD_COMPLETED_NOTSUPP
-				: DISCARD_COMPLETED_WITH_ERROR;
-		else
-			what = (bio_data_dir(bio) == WRITE)
-			? WRITE_COMPLETED_WITH_ERROR
-			: (bio_rw(bio) == READ)
-			  ? READ_COMPLETED_WITH_ERROR
-			  : READ_AHEAD_COMPLETED_WITH_ERROR;
-	} else
+		switch (bio_op(bio)) {
+		case REQ_OP_DISCARD:
+			if (bio->bi_error == -EOPNOTSUPP)
+				what = DISCARD_COMPLETED_NOTSUPP;
+			else
+				what = DISCARD_COMPLETED_WITH_ERROR;
+			break;
+		case REQ_OP_READ:
+			if (bio->bi_rw & REQ_RAHEAD)
+				what = READ_AHEAD_COMPLETED_WITH_ERROR;
+			else
+				what = READ_COMPLETED_WITH_ERROR;
+			break;
+		default:
+			what = WRITE_COMPLETED_WITH_ERROR;
+			break;
+		}
+	} else {
 		what = COMPLETED_OK;
+	}
 
 	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(bio->bi_error);
@@ -320,6 +328,10 @@ void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest)
 		sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
 		ahash_request_set_crypt(req, &sg, NULL, sg.length);
 		crypto_ahash_update(req);
+		/* REQ_OP_WRITE_SAME has only one segment,
+		 * checksum the payload only once. */
+		if (bio_op(bio) == REQ_OP_WRITE_SAME)
+			break;
 	}
 	ahash_request_set_crypt(req, NULL, digest, 0);
 	crypto_ahash_final(req);
@@ -387,7 +399,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
 	/* GFP_TRY, because if there is no memory available right now, this may
 	 * be rescheduled for later. It is "only" background resync, after all. */
 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
-				       size, true /* has real payload */, GFP_TRY);
+				       size, size, GFP_TRY);
 	if (!peer_req)
 		goto defer;
 
@@ -397,7 +409,8 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
 	spin_unlock_irq(&device->resource->req_lock);
 
 	atomic_add(size >> 9, &device->rs_sect_ev);
-	if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
+	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
+				     DRBD_FAULT_RS_RD) == 0)
 		return 0;
 
 	/* If it failed because of ENOMEM, retry should help.  If it failed
@@ -582,6 +595,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel)
 	int number, rollback_i, size;
 	int align, requeue = 0;
 	int i = 0;
+	int discard_granularity = 0;
 
 	if (unlikely(cancel))
 		return 0;
@@ -601,6 +615,12 @@ static int make_resync_request(struct drbd_device *const device, int cancel)
 		return 0;
 	}
 
+	if (connection->agreed_features & DRBD_FF_THIN_RESYNC) {
+		rcu_read_lock();
+		discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity;
+		rcu_read_unlock();
+	}
+
 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
 	number = drbd_rs_number_requests(device);
 	if (number <= 0)
@@ -665,6 +685,9 @@ next_sector:
 			if (sector & ((1<<(align+3))-1))
 				break;
 
+			if (discard_granularity && size == discard_granularity)
+				break;
+
 			/* do not cross extent boundaries */
 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
 				break;
@@ -711,7 +734,8 @@ next_sector:
 			int err;
 
 			inc_rs_pending(device);
-			err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
+			err = drbd_send_drequest(peer_device,
+						 size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
 						 sector, size, ID_SYNCER);
 			if (err) {
 				drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
@@ -828,6 +852,7 @@ static void ping_peer(struct drbd_device *device)
 
 int drbd_resync_finished(struct drbd_device *device)
 {
+	struct drbd_connection *connection = first_peer_device(device)->connection;
 	unsigned long db, dt, dbdt;
 	unsigned long n_oos;
 	union drbd_state os, ns;
@@ -849,8 +874,7 @@ int drbd_resync_finished(struct drbd_device *device)
 		if (dw) {
 			dw->w.cb = w_resync_finished;
 			dw->device = device;
-			drbd_queue_work(&first_peer_device(device)->connection->sender_work,
-					&dw->w);
+			drbd_queue_work(&connection->sender_work, &dw->w);
 			return 1;
 		}
 		drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
@@ -963,6 +987,30 @@ int drbd_resync_finished(struct drbd_device *device)
 	_drbd_set_state(device, ns, CS_VERBOSE, NULL);
 out_unlock:
 	spin_unlock_irq(&device->resource->req_lock);
+
+	/* If we have been sync source, and have an effective fencing-policy,
+	 * once *all* volumes are back in sync, call "unfence". */
+	if (os.conn == C_SYNC_SOURCE) {
+		enum drbd_disk_state disk_state = D_MASK;
+		enum drbd_disk_state pdsk_state = D_MASK;
+		enum drbd_fencing_p fp = FP_DONT_CARE;
+
+		rcu_read_lock();
+		fp = rcu_dereference(device->ldev->disk_conf)->fencing;
+		if (fp != FP_DONT_CARE) {
+			struct drbd_peer_device *peer_device;
+			int vnr;
+			idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
+				struct drbd_device *device = peer_device->device;
+				disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
+				pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk);
+			}
+		}
+		rcu_read_unlock();
+		if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE)
+			conn_khelper(connection, "unfence-peer");
+	}
+
 	put_ldev(device);
 out:
 	device->rs_total  = 0;
@@ -999,7 +1047,6 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_
 
 /**
  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
- * @device:	DRBD device.
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
@@ -1035,6 +1082,30 @@ int w_e_end_data_req(struct drbd_work *w, int cancel)
 	return err;
 }
 
+static bool all_zero(struct drbd_peer_request *peer_req)
+{
+	struct page *page = peer_req->pages;
+	unsigned int len = peer_req->i.size;
+
+	page_chain_for_each(page) {
+		unsigned int l = min_t(unsigned int, len, PAGE_SIZE);
+		unsigned int i, words = l / sizeof(long);
+		unsigned long *d;
+
+		d = kmap_atomic(page);
+		for (i = 0; i < words; i++) {
+			if (d[i]) {
+				kunmap_atomic(d);
+				return false;
+			}
+		}
+		kunmap_atomic(d);
+		len -= l;
+	}
+
+	return true;
+}
+
 /**
  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
  * @w:		work object.
@@ -1063,7 +1134,10 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 		if (likely(device->state.pdsk >= D_INCONSISTENT)) {
 			inc_rs_pending(device);
-			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
+			if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
+				err = drbd_send_rs_deallocated(peer_device, peer_req);
+			else
+				err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
 		} else {
 			if (__ratelimit(&drbd_ratelimit_state))
 				drbd_err(device, "Not sending RSDataReply, "
@@ -1633,7 +1707,7 @@ static bool use_checksum_based_resync(struct drbd_connection *connection, struct
 	rcu_read_unlock();
 	return connection->agreed_pro_version >= 89 &&		/* supported? */
 		connection->csums_tfm &&			/* configured? */
-		(csums_after_crash_only == 0			/* use for each resync? */
+		(csums_after_crash_only == false		/* use for each resync? */
 		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
 }
 
@@ -1768,7 +1842,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 			device->bm_resync_fo = 0;
 			device->use_csums = use_checksum_based_resync(connection, device);
 		} else {
-			device->use_csums = 0;
+			device->use_csums = false;
 		}
 
 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 84708a5f8c52..c557057fe8ae 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3822,8 +3822,9 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
 	bio.bi_flags |= (1 << BIO_QUIET);
 	bio.bi_private = &cbdata;
 	bio.bi_end_io = floppy_rb0_cb;
+	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
 
-	submit_bio(READ, &bio);
+	submit_bio(&bio);
 	process_fd_request();
 
 	init_completion(&cbdata.complete);
@@ -4349,8 +4350,7 @@ static int __init do_floppy_init(void)
 		/* to be cleaned up... */
 		disks[drive]->private_data = (void *)(long)drive;
 		disks[drive]->flags |= GENHD_FL_REMOVABLE;
-		disks[drive]->driverfs_dev = &floppy_device[drive].dev;
-		add_disk(disks[drive]);
+		device_add_disk(&floppy_device[drive].dev, disks[drive]);
 	}
 
 	return 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1fa8cc235977..075377eee0c0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -447,7 +447,7 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
 
 static inline void handle_partial_read(struct loop_cmd *cmd, long bytes)
 {
-	if (bytes < 0 || (cmd->rq->cmd_flags & REQ_WRITE))
+	if (bytes < 0 || op_is_write(req_op(cmd->rq)))
 		return;
 
 	if (unlikely(bytes < blk_rq_bytes(cmd->rq))) {
@@ -541,10 +541,10 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
 
 	pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
 
-	if (rq->cmd_flags & REQ_WRITE) {
-		if (rq->cmd_flags & REQ_FLUSH)
+	if (op_is_write(req_op(rq))) {
+		if (req_op(rq) == REQ_OP_FLUSH)
 			ret = lo_req_flush(lo, rq);
-		else if (rq->cmd_flags & REQ_DISCARD)
+		else if (req_op(rq) == REQ_OP_DISCARD)
 			ret = lo_discard(lo, rq, pos);
 		else if (lo->transfer)
 			ret = lo_write_transfer(lo, rq, pos);
@@ -1659,8 +1659,8 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (lo->lo_state != Lo_bound)
 		return -EIO;
 
-	if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH |
-					REQ_DISCARD)))
+	if (lo->use_dio && (req_op(cmd->rq) != REQ_OP_FLUSH ||
+	    req_op(cmd->rq) == REQ_OP_DISCARD))
 		cmd->use_aio = true;
 	else
 		cmd->use_aio = false;
@@ -1672,7 +1672,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 static void loop_handle_cmd(struct loop_cmd *cmd)
 {
-	const bool write = cmd->rq->cmd_flags & REQ_WRITE;
+	const bool write = op_is_write(req_op(cmd->rq));
 	struct loop_device *lo = cmd->rq->q->queuedata;
 	int ret = 0;
 
@@ -1765,6 +1765,7 @@ static int loop_add(struct loop_device **l, int i)
 	 */
 	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
 
+	err = -ENOMEM;
 	disk = lo->lo_disk = alloc_disk(1 << part_shift);
 	if (!disk)
 		goto out_free_queue;
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 145ce2aa2e78..e937fcf71769 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -687,15 +687,13 @@ static unsigned int mg_issue_req(struct request *req,
 		unsigned int sect_num,
 		unsigned int sect_cnt)
 {
-	switch (rq_data_dir(req)) {
-	case READ:
+	if (rq_data_dir(req) == READ) {
 		if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr)
 				!= MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return host->error;
 		}
-		break;
-	case WRITE:
+	} else {
 		/* TODO : handler */
 		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 		if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr)
@@ -714,7 +712,6 @@ static unsigned int mg_issue_req(struct request *req,
 		mod_timer(&host->timer, jiffies + 3 * HZ);
 		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
-		break;
 	}
 	return MG_ERR_NONE;
 }
@@ -1018,7 +1015,7 @@ probe_err_7:
 probe_err_6:
 	blk_cleanup_queue(host->breq);
 probe_err_5:
-	unregister_blkdev(MG_DISK_MAJ, MG_DISK_NAME);
+	unregister_blkdev(host->major, MG_DISK_NAME);
 probe_err_4:
 	if (!prv_data->use_polling)
 		free_irq(host->irq, host);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 6053e4659fa2..2aca98e8e427 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3765,7 +3765,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 			return -ENODATA;
 	}
 
-	if (rq->cmd_flags & REQ_DISCARD) {
+	if (req_op(rq) == REQ_OP_DISCARD) {
 		int err;
 
 		err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
@@ -3956,7 +3956,6 @@ static int mtip_block_initialize(struct driver_data *dd)
 	if (rv)
 		goto disk_index_error;
 
-	dd->disk->driverfs_dev	= &dd->pdev->dev;
 	dd->disk->major		= dd->major;
 	dd->disk->first_minor	= index * MTIP_MAX_MINORS;
 	dd->disk->minors 	= MTIP_MAX_MINORS;
@@ -4008,7 +4007,7 @@ skip_create_disk:
 
 	/*
 	 * if rebuild pending, start the service thread, and delay the block
-	 * queue creation and add_disk()
+	 * queue creation and device_add_disk()
 	 */
 	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
 		goto start_service_thread;
@@ -4042,7 +4041,7 @@ skip_create_disk:
 	set_capacity(dd->disk, capacity);
 
 	/* Enable the block device and add it to /dev */
-	add_disk(dd->disk);
+	device_add_disk(&dd->pdev->dev, dd->disk);
 
 	dd->bdev = bdget_disk(dd->disk, 0);
 	/*
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6a48ed41963f..6f55b262b5ce 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -282,9 +282,9 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 
 	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
 		type = NBD_CMD_DISC;
-	else if (req->cmd_flags & REQ_DISCARD)
+	else if (req_op(req) == REQ_OP_DISCARD)
 		type = NBD_CMD_TRIM;
-	else if (req->cmd_flags & REQ_FLUSH)
+	else if (req_op(req) == REQ_OP_FLUSH)
 		type = NBD_CMD_FLUSH;
 	else if (rq_data_dir(req) == WRITE)
 		type = NBD_CMD_WRITE;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index cab97593ba54..75a7f88d6717 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -448,7 +448,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	struct request *rq;
 	struct bio *bio = rqd->bio;
 
-	rq = blk_mq_alloc_request(q, bio_rw(bio), 0);
+	rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0);
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index c2854a2bfdb0..92900f5f0b47 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -321,7 +321,7 @@ static void osdblk_rq_fn(struct request_queue *q)
 		 * driver-specific, etc.
 		 */
 
-		do_flush = rq->cmd_flags & REQ_FLUSH;
+		do_flush = (req_op(rq) == REQ_OP_FLUSH);
 		do_write = (rq_data_dir(rq) == WRITE);
 
 		if (!do_flush) { /* osd_flush does not use a bio */
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d06c62eccdf0..9393bc730acf 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1074,7 +1074,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 			BUG();
 
 		atomic_inc(&pkt->io_wait);
-		bio->bi_rw = READ;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 		pkt_queue_bio(pd, bio);
 		frames_read++;
 	}
@@ -1336,7 +1336,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 
 	/* Start the write request */
 	atomic_set(&pkt->io_wait, 1);
-	pkt->w_bio->bi_rw = WRITE;
+	bio_set_op_attrs(pkt->w_bio, REQ_OP_WRITE, 0);
 	pkt_queue_bio(pd, pkt->w_bio);
 }
 
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 4b7e405830d7..76f33c84ce3d 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -196,7 +196,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
 	while ((req = blk_fetch_request(q))) {
-		if (req->cmd_flags & REQ_FLUSH) {
+		if (req_op(req) == REQ_OP_FLUSH) {
 			if (ps3disk_submit_flush_request(dev, req))
 				break;
 		} else if (req->cmd_type == REQ_TYPE_FS) {
@@ -256,7 +256,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 		return IRQ_HANDLED;
 	}
 
-	if (req->cmd_flags & REQ_FLUSH) {
+	if (req_op(req) == REQ_OP_FLUSH) {
 		read = 0;
 		op = "flush";
 	} else {
@@ -487,7 +487,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 	gendisk->fops = &ps3disk_fops;
 	gendisk->queue = queue;
 	gendisk->private_data = dev;
-	gendisk->driverfs_dev = &dev->sbd.core;
 	snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME,
 		 devidx+'a');
 	priv->blocking_factor = dev->blk_size >> 9;
@@ -499,7 +498,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 		 gendisk->disk_name, priv->model, priv->raw_capacity >> 11,
 		 get_capacity(gendisk) >> 11);
 
-	add_disk(gendisk);
+	device_add_disk(&dev->sbd.core, gendisk);
 	return 0;
 
 fail_cleanup_queue:
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 56847fcda086..456b4fe21559 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -773,14 +773,13 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
 	gendisk->fops = &ps3vram_fops;
 	gendisk->queue = queue;
 	gendisk->private_data = dev;
-	gendisk->driverfs_dev = &dev->core;
 	strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
 	set_capacity(gendisk, priv->size >> 9);
 
 	dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n",
 		 gendisk->disk_name, get_capacity(gendisk) >> 11);
 
-	add_disk(gendisk);
+	device_add_disk(&dev->core, gendisk);
 	return 0;
 
 fail_cleanup_queue:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 81666a56415e..450662055d97 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3286,9 +3286,9 @@ static void rbd_queue_workfn(struct work_struct *work)
 		goto err;
 	}
 
-	if (rq->cmd_flags & REQ_DISCARD)
+	if (req_op(rq) == REQ_OP_DISCARD)
 		op_type = OBJ_OP_DISCARD;
-	else if (rq->cmd_flags & REQ_WRITE)
+	else if (req_op(rq) == REQ_OP_WRITE)
 		op_type = OBJ_OP_WRITE;
 	else
 		op_type = OBJ_OP_READ;
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index e1b8b7061d2f..f81d70b39d10 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -230,8 +230,7 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card)
 			set_capacity(card->gendisk, card->size8 >> 9);
 		else
 			set_capacity(card->gendisk, 0);
-		add_disk(card->gendisk);
-
+		device_add_disk(CARD_TO_DEV(card), card->gendisk);
 		card->bdev_attached = 1;
 	}
 
@@ -308,7 +307,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 
 	snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
 		 "rsxx%d", card->disk_id);
-	card->gendisk->driverfs_dev = &card->dev->dev;
 	card->gendisk->major = card->major;
 	card->gendisk->first_minor = 0;
 	card->gendisk->fops = &rsxx_fops;
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index cf8cd293abb5..5a20385f87d0 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -705,7 +705,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 		dma_cnt[i] = 0;
 	}
 
-	if (bio->bi_rw & REQ_DISCARD) {
+	if (bio_op(bio) == REQ_OP_DISCARD) {
 		bv_len = bio->bi_iter.bi_size;
 
 		while (bv_len > 0) {
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 910e065918af..3822eae102db 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -597,7 +597,7 @@ static void skd_request_fn(struct request_queue *q)
 		data_dir = rq_data_dir(req);
 		io_flags = req->cmd_flags;
 
-		if (io_flags & REQ_FLUSH)
+		if (req_op(req) == REQ_OP_FLUSH)
 			flush++;
 
 		if (io_flags & REQ_FUA)
@@ -4690,10 +4690,10 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return -EIO;
 }
 
-static int skd_bdev_attach(struct skd_device *skdev)
+static int skd_bdev_attach(struct device *parent, struct skd_device *skdev)
 {
 	pr_debug("%s:%s:%d add_disk\n", skdev->name, __func__, __LINE__);
-	add_disk(skdev->disk);
+	device_add_disk(parent, skdev->disk);
 	return 0;
 }
 
@@ -4812,8 +4812,6 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata(pdev, skdev);
 
-	skdev->disk->driverfs_dev = &pdev->dev;
-
 	for (i = 0; i < SKD_MAX_BARS; i++) {
 		skdev->mem_phys[i] = pci_resource_start(pdev, i);
 		skdev->mem_size[i] = (u32)pci_resource_len(pdev, i);
@@ -4851,7 +4849,7 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 					      (SKD_START_WAIT_SECONDS * HZ));
 	if (skdev->gendisk_on > 0) {
 		/* device came on-line after reset */
-		skd_bdev_attach(skdev);
+		skd_bdev_attach(&pdev->dev, skdev);
 		rc = 0;
 	} else {
 		/* we timed out, something is wrong with the device,
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 4b911ed96ea3..cab157331c4e 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -804,7 +804,6 @@ static int probe_disk(struct vdc_port *port)
 	g->fops = &vdc_fops;
 	g->queue = q;
 	g->private_data = port;
-	g->driverfs_dev = &port->vio.vdev->dev;
 
 	set_capacity(g, port->vdisk_size);
 
@@ -835,7 +834,7 @@ static int probe_disk(struct vdc_port *port)
 	       port->vdisk_size, (port->vdisk_size >> (20 - 9)),
 	       port->vio.ver.major, port->vio.ver.minor);
 
-	add_disk(g);
+	device_add_disk(&port->vio.vdev->dev, g);
 
 	return 0;
 }
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 7939b9f87441..d0a3e6d4515f 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -344,7 +344,6 @@ static int add_bio(struct cardinfo *card)
 	int offset;
 	struct bio *bio;
 	struct bio_vec vec;
-	int rw;
 
 	bio = card->currentbio;
 	if (!bio && card->bio) {
@@ -359,7 +358,6 @@ static int add_bio(struct cardinfo *card)
 	if (!bio)
 		return 0;
 
-	rw = bio_rw(bio);
 	if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
 		return 0;
 
@@ -369,7 +367,7 @@ static int add_bio(struct cardinfo *card)
 				  vec.bv_page,
 				  vec.bv_offset,
 				  vec.bv_len,
-				  (rw == READ) ?
+				  bio_op(bio) == REQ_OP_READ ?
 				  PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
 
 	p = &card->mm_pages[card->Ready];
@@ -398,7 +396,7 @@ static int add_bio(struct cardinfo *card)
 					 DMASCR_CHAIN_EN |
 					 DMASCR_SEM_EN |
 					 pci_cmds);
-	if (rw == WRITE)
+	if (bio_op(bio) == REQ_OP_WRITE)
 		desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
 	desc->sem_control_bits = desc->control_bits;
 
@@ -462,7 +460,7 @@ static void process_page(unsigned long data)
 				le32_to_cpu(desc->local_addr)>>9,
 				le32_to_cpu(desc->transfer_size));
 			dump_dmastat(card, control);
-		} else if ((bio->bi_rw & REQ_WRITE) &&
+		} else if (op_is_write(bio_op(bio)) &&
 			   le32_to_cpu(desc->local_addr) >> 9 ==
 				card->init_size) {
 			card->init_size += le32_to_cpu(desc->transfer_size) >> 9;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 42758b52768c..1523e05c46fc 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -172,7 +172,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
 	vbr->req = req;
-	if (req->cmd_flags & REQ_FLUSH) {
+	if (req_op(req) == REQ_OP_FLUSH) {
 		vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH);
 		vbr->out_hdr.sector = 0;
 		vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
@@ -236,25 +236,22 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 static int virtblk_get_id(struct gendisk *disk, char *id_str)
 {
 	struct virtio_blk *vblk = disk->private_data;
+	struct request_queue *q = vblk->disk->queue;
 	struct request *req;
-	struct bio *bio;
 	int err;
 
-	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
-			   GFP_KERNEL);
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
-	if (IS_ERR(req)) {
-		bio_put(bio);
+	req = blk_get_request(q, READ, GFP_KERNEL);
+	if (IS_ERR(req))
 		return PTR_ERR(req);
-	}
-
 	req->cmd_type = REQ_TYPE_DRV_PRIV;
+
+	err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
+	if (err)
+		goto out;
+
 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
+out:
 	blk_put_request(req);
-
 	return err;
 }
 
@@ -656,7 +653,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 	vblk->disk->first_minor = index_to_minor(index);
 	vblk->disk->private_data = vblk;
 	vblk->disk->fops = &virtblk_fops;
-	vblk->disk->driverfs_dev = &vdev->dev;
 	vblk->disk->flags |= GENHD_FL_EXT_DEVT;
 	vblk->index = index;
 
@@ -733,7 +729,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 
 	virtio_device_ready(vdev);
 
-	add_disk(vblk->disk);
+	device_add_disk(&vdev->dev, vblk->disk);
 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
 	if (err)
 		goto out_del_disk;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 4809c1501d7e..4a80ee752597 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -501,7 +501,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
 	struct xen_vbd *vbd = &blkif->vbd;
 	int rc = -EACCES;
 
-	if ((operation != READ) && vbd->readonly)
+	if ((operation != REQ_OP_READ) && vbd->readonly)
 		goto out;
 
 	if (likely(req->nr_sects)) {
@@ -1014,7 +1014,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
 	preq.sector_number = req->u.discard.sector_number;
 	preq.nr_sects      = req->u.discard.nr_sectors;
 
-	err = xen_vbd_translate(&preq, blkif, WRITE);
+	err = xen_vbd_translate(&preq, blkif, REQ_OP_WRITE);
 	if (err) {
 		pr_warn("access denied: DISCARD [%llu->%llu] on dev=%04x\n",
 			preq.sector_number,
@@ -1229,6 +1229,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	struct bio **biolist = pending_req->biolist;
 	int i, nbio = 0;
 	int operation;
+	int operation_flags = 0;
 	struct blk_plug plug;
 	bool drain = false;
 	struct grant_page **pages = pending_req->segments;
@@ -1247,17 +1248,19 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	switch (req_operation) {
 	case BLKIF_OP_READ:
 		ring->st_rd_req++;
-		operation = READ;
+		operation = REQ_OP_READ;
 		break;
 	case BLKIF_OP_WRITE:
 		ring->st_wr_req++;
-		operation = WRITE_ODIRECT;
+		operation = REQ_OP_WRITE;
+		operation_flags = WRITE_ODIRECT;
 		break;
 	case BLKIF_OP_WRITE_BARRIER:
 		drain = true;
 	case BLKIF_OP_FLUSH_DISKCACHE:
 		ring->st_f_req++;
-		operation = WRITE_FLUSH;
+		operation = REQ_OP_WRITE;
+		operation_flags = WRITE_FLUSH;
 		break;
 	default:
 		operation = 0; /* make gcc happy */
@@ -1269,7 +1272,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	nseg = req->operation == BLKIF_OP_INDIRECT ?
 	       req->u.indirect.nr_segments : req->u.rw.nr_segments;
 
-	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+	if (unlikely(nseg == 0 && operation_flags != WRITE_FLUSH) ||
 	    unlikely((req->operation != BLKIF_OP_INDIRECT) &&
 		     (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
 	    unlikely((req->operation == BLKIF_OP_INDIRECT) &&
@@ -1310,7 +1313,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 
 	if (xen_vbd_translate(&preq, ring->blkif, operation) != 0) {
 		pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n",
-			 operation == READ ? "read" : "write",
+			 operation == REQ_OP_READ ? "read" : "write",
 			 preq.sector_number,
 			 preq.sector_number + preq.nr_sects,
 			 ring->blkif->vbd.pdevice);
@@ -1369,6 +1372,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 			bio->bi_private = pending_req;
 			bio->bi_end_io  = end_block_io_op;
 			bio->bi_iter.bi_sector  = preq.sector_number;
+			bio_set_op_attrs(bio, operation, operation_flags);
 		}
 
 		preq.sector_number += seg[i].nsec;
@@ -1376,7 +1380,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 
 	/* This will be hit if the operation was a flush or discard. */
 	if (!bio) {
-		BUG_ON(operation != WRITE_FLUSH);
+		BUG_ON(operation_flags != WRITE_FLUSH);
 
 		bio = bio_alloc(GFP_KERNEL, 0);
 		if (unlikely(bio == NULL))
@@ -1386,20 +1390,21 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 		bio->bi_bdev    = preq.bdev;
 		bio->bi_private = pending_req;
 		bio->bi_end_io  = end_block_io_op;
+		bio_set_op_attrs(bio, operation, operation_flags);
 	}
 
 	atomic_set(&pending_req->pendcnt, nbio);
 	blk_start_plug(&plug);
 
 	for (i = 0; i < nbio; i++)
-		submit_bio(operation, biolist[i]);
+		submit_bio(biolist[i]);
 
 	/* Let the I/Os go.. */
 	blk_finish_plug(&plug);
 
-	if (operation == READ)
+	if (operation == REQ_OP_READ)
 		ring->st_rd_sect += preq.nr_sects;
-	else if (operation & WRITE)
+	else if (operation == REQ_OP_WRITE)
 		ring->st_wr_sect += preq.nr_sects;
 
 	return 0;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 3355f1cdd4e5..3cc6d1d86f1e 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -379,7 +379,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
 	NULL
 };
 
-static struct attribute_group xen_vbdstat_group = {
+static const struct attribute_group xen_vbdstat_group = {
 	.name = "statistics",
 	.attrs = xen_vbdstat_attrs,
 };
@@ -480,7 +480,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 	if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
 		vbd->flush_support = true;
 
-	if (q && blk_queue_secdiscard(q))
+	if (q && blk_queue_secure_erase(q))
 		vbd->discard_secure = true;
 
 	pr_debug("Successful creation of handle=%04x (dom=%u)\n",
@@ -715,8 +715,11 @@ static void backend_changed(struct xenbus_watch *watch,
 
 	/* Front end dir is a number, which is used as the handle. */
 	err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
-	if (err)
+	if (err) {
+		kfree(be->mode);
+		be->mode = NULL;
 		return;
+	}
 
 	be->major = major;
 	be->minor = minor;
@@ -1022,9 +1025,9 @@ static int connect_ring(struct backend_info *be)
 	pr_debug("%s %s\n", __func__, dev->otherend);
 
 	be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
-	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
-			    "%63s", protocol, NULL);
-	if (err)
+	err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol",
+			   "%63s", protocol);
+	if (err <= 0)
 		strcpy(protocol, "unspecified, assuming default");
 	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
 		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
@@ -1036,10 +1039,9 @@ static int connect_ring(struct backend_info *be)
 		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
 		return -ENOSYS;
 	}
-	err = xenbus_gather(XBT_NIL, dev->otherend,
-			    "feature-persistent", "%u",
-			    &pers_grants, NULL);
-	if (err)
+	err = xenbus_scanf(XBT_NIL, dev->otherend,
+			   "feature-persistent", "%u", &pers_grants);
+	if (err <= 0)
 		pers_grants = 0;
 
 	be->blkif->vbd.feature_gnt_persistent = pers_grants;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2e6d1e9c3345..be4fea6a5dd3 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -196,6 +196,7 @@ struct blkfront_info
 	unsigned int nr_ring_pages;
 	struct request_queue *rq;
 	unsigned int feature_flush;
+	unsigned int feature_fua;
 	unsigned int feature_discard:1;
 	unsigned int feature_secdiscard:1;
 	unsigned int discard_granularity;
@@ -207,6 +208,9 @@ struct blkfront_info
 	struct blk_mq_tag_set tag_set;
 	struct blkfront_ring_info *rinfo;
 	unsigned int nr_rings;
+	/* Save uncomplete reqs and bios for migration. */
+	struct list_head requests;
+	struct bio_list bio_list;
 };
 
 static unsigned int nr_minors;
@@ -544,7 +548,7 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf
 	ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
 	ring_req->u.discard.id = id;
 	ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
-	if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
+	if (req_op(req) == REQ_OP_SECURE_ERASE && info->feature_secdiscard)
 		ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
 	else
 		ring_req->u.discard.flag = 0;
@@ -743,7 +747,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 		 * The indirect operation can only be a BLKIF_OP_READ or
 		 * BLKIF_OP_WRITE
 		 */
-		BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
+		BUG_ON(req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA);
 		ring_req->operation = BLKIF_OP_INDIRECT;
 		ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
 			BLKIF_OP_WRITE : BLKIF_OP_READ;
@@ -755,7 +759,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 		ring_req->u.rw.handle = info->handle;
 		ring_req->operation = rq_data_dir(req) ?
 			BLKIF_OP_WRITE : BLKIF_OP_READ;
-		if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+		if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) {
 			/*
 			 * Ideally we can do an unordered flush-to-disk.
 			 * In case the backend onlysupports barriers, use that.
@@ -763,19 +767,14 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 			 * implement it the same way.  (It's also a FLUSH+FUA,
 			 * since it is guaranteed ordered WRT previous writes.)
 			 */
-			switch (info->feature_flush &
-				((REQ_FLUSH|REQ_FUA))) {
-			case REQ_FLUSH|REQ_FUA:
+			if (info->feature_flush && info->feature_fua)
 				ring_req->operation =
 					BLKIF_OP_WRITE_BARRIER;
-				break;
-			case REQ_FLUSH:
+			else if (info->feature_flush)
 				ring_req->operation =
 					BLKIF_OP_FLUSH_DISKCACHE;
-				break;
-			default:
+			else
 				ring_req->operation = 0;
-			}
 		}
 		ring_req->u.rw.nr_segments = num_grant;
 		if (unlikely(require_extra_req)) {
@@ -844,7 +843,8 @@ static int blkif_queue_request(struct request *req, struct blkfront_ring_info *r
 	if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED))
 		return 1;
 
-	if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE)))
+	if (unlikely(req_op(req) == REQ_OP_DISCARD ||
+		     req_op(req) == REQ_OP_SECURE_ERASE))
 		return blkif_queue_discard_req(req, rinfo);
 	else
 		return blkif_queue_rw_req(req, rinfo);
@@ -864,10 +864,10 @@ static inline bool blkif_request_flush_invalid(struct request *req,
 					       struct blkfront_info *info)
 {
 	return ((req->cmd_type != REQ_TYPE_FS) ||
-		((req->cmd_flags & REQ_FLUSH) &&
-		 !(info->feature_flush & REQ_FLUSH)) ||
+		((req_op(req) == REQ_OP_FLUSH) &&
+		 !info->feature_flush) ||
 		((req->cmd_flags & REQ_FUA) &&
-		 !(info->feature_flush & REQ_FUA)));
+		 !info->feature_fua));
 }
 
 static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -952,7 +952,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 		rq->limits.discard_granularity = info->discard_granularity;
 		rq->limits.discard_alignment = info->discard_alignment;
 		if (info->feature_secdiscard)
-			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
+			queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
 	}
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -978,24 +978,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 	return 0;
 }
 
-static const char *flush_info(unsigned int feature_flush)
+static const char *flush_info(struct blkfront_info *info)
 {
-	switch (feature_flush & ((REQ_FLUSH | REQ_FUA))) {
-	case REQ_FLUSH|REQ_FUA:
+	if (info->feature_flush && info->feature_fua)
 		return "barrier: enabled;";
-	case REQ_FLUSH:
+	else if (info->feature_flush)
 		return "flush diskcache: enabled;";
-	default:
+	else
 		return "barrier or flush: disabled;";
-	}
 }
 
 static void xlvbd_flush(struct blkfront_info *info)
 {
-	blk_queue_write_cache(info->rq, info->feature_flush & REQ_FLUSH,
-				info->feature_flush & REQ_FUA);
+	blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
+			      info->feature_fua ? true : false);
 	pr_info("blkfront: %s: %s %s %s %s %s\n",
-		info->gd->disk_name, flush_info(info->feature_flush),
+		info->gd->disk_name, flush_info(info),
 		"persistent grants:", info->feature_persistent ?
 		"enabled;" : "disabled;", "indirect descriptors:",
 		info->max_indirect_segments ? "enabled;" : "disabled;");
@@ -1136,7 +1134,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	gd->first_minor = minor;
 	gd->fops = &xlvbd_block_fops;
 	gd->private_data = info;
-	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);
 
 	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
@@ -1594,7 +1591,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
-				queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
+				queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
 			}
 			blk_mq_complete_request(req, error);
 			break;
@@ -1614,6 +1611,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			if (unlikely(error)) {
 				if (error == -EOPNOTSUPP)
 					error = 0;
+				info->feature_fua = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
 			}
@@ -2002,69 +2000,22 @@ static int blkif_recover(struct blkfront_info *info)
 {
 	unsigned int i, r_index;
 	struct request *req, *n;
-	struct blk_shadow *copy;
 	int rc;
 	struct bio *bio, *cloned_bio;
-	struct bio_list bio_list, merge_bio;
 	unsigned int segs, offset;
 	int pending, size;
 	struct split_bio *split_bio;
-	struct list_head requests;
 
 	blkfront_gather_backend_features(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
 	blk_queue_max_segments(info->rq, segs);
-	bio_list_init(&bio_list);
-	INIT_LIST_HEAD(&requests);
 
 	for (r_index = 0; r_index < info->nr_rings; r_index++) {
-		struct blkfront_ring_info *rinfo;
-
-		rinfo = &info->rinfo[r_index];
-		/* Stage 1: Make a safe copy of the shadow state. */
-		copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
-			       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
-		if (!copy)
-			return -ENOMEM;
-
-		/* Stage 2: Set up free list. */
-		memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
-		for (i = 0; i < BLK_RING_SIZE(info); i++)
-			rinfo->shadow[i].req.u.rw.id = i+1;
-		rinfo->shadow_free = rinfo->ring.req_prod_pvt;
-		rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
 
 		rc = blkfront_setup_indirect(rinfo);
-		if (rc) {
-			kfree(copy);
+		if (rc)
 			return rc;
-		}
-
-		for (i = 0; i < BLK_RING_SIZE(info); i++) {
-			/* Not in use? */
-			if (!copy[i].request)
-				continue;
-
-			/*
-			 * Get the bios in the request so we can re-queue them.
-			 */
-			if (copy[i].request->cmd_flags &
-			    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
-				/*
-				 * Flush operations don't contain bios, so
-				 * we need to requeue the whole request
-				 */
-				list_add(&copy[i].request->queuelist, &requests);
-				continue;
-			}
-			merge_bio.head = copy[i].request->bio;
-			merge_bio.tail = copy[i].request->biotail;
-			bio_list_merge(&bio_list, &merge_bio);
-			copy[i].request->bio = NULL;
-			blk_end_request_all(copy[i].request, 0);
-		}
-
-		kfree(copy);
 	}
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
@@ -2079,7 +2030,7 @@ static int blkif_recover(struct blkfront_info *info)
 		kick_pending_request_queues(rinfo);
 	}
 
-	list_for_each_entry_safe(req, n, &requests, queuelist) {
+	list_for_each_entry_safe(req, n, &info->requests, queuelist) {
 		/* Requeue pending requests (flush or discard) */
 		list_del_init(&req->queuelist);
 		BUG_ON(req->nr_phys_segments > segs);
@@ -2087,7 +2038,7 @@ static int blkif_recover(struct blkfront_info *info)
 	}
 	blk_mq_kick_requeue_list(info->rq);
 
-	while ((bio = bio_list_pop(&bio_list)) != NULL) {
+	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
 		/* Traverse the list of pending bios and re-queue them */
 		if (bio_segments(bio) > segs) {
 			/*
@@ -2108,7 +2059,7 @@ static int blkif_recover(struct blkfront_info *info)
 				bio_trim(cloned_bio, offset, size);
 				cloned_bio->bi_private = split_bio;
 				cloned_bio->bi_end_io = split_bio_end;
-				submit_bio(cloned_bio->bi_rw, cloned_bio);
+				submit_bio(cloned_bio);
 			}
 			/*
 			 * Now we have to wait for all those smaller bios to
@@ -2117,7 +2068,7 @@ static int blkif_recover(struct blkfront_info *info)
 			continue;
 		}
 		/* We don't need to split this bio */
-		submit_bio(bio->bi_rw, bio);
+		submit_bio(bio);
 	}
 
 	return 0;
@@ -2133,9 +2084,47 @@ static int blkfront_resume(struct xenbus_device *dev)
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 	int err = 0;
+	unsigned int i, j;
 
 	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
 
+	bio_list_init(&info->bio_list);
+	INIT_LIST_HEAD(&info->requests);
+	for (i = 0; i < info->nr_rings; i++) {
+		struct blkfront_ring_info *rinfo = &info->rinfo[i];
+		struct bio_list merge_bio;
+		struct blk_shadow *shadow = rinfo->shadow;
+
+		for (j = 0; j < BLK_RING_SIZE(info); j++) {
+			/* Not in use? */
+			if (!shadow[j].request)
+				continue;
+
+			/*
+			 * Get the bios in the request so we can re-queue them.
+			 */
+			if (req_op(shadow[i].request) == REQ_OP_FLUSH ||
+			    req_op(shadow[i].request) == REQ_OP_DISCARD ||
+			    req_op(shadow[i].request) == REQ_OP_SECURE_ERASE ||
+			    shadow[j].request->cmd_flags & REQ_FUA) {
+				/*
+				 * Flush operations don't contain bios, so
+				 * we need to requeue the whole request
+				 *
+				 * XXX: but this doesn't make any sense for a
+				 * write with the FUA flag set..
+				 */
+				list_add(&shadow[j].request->queuelist, &info->requests);
+				continue;
+			}
+			merge_bio.head = shadow[j].request->bio;
+			merge_bio.tail = shadow[j].request->biotail;
+			bio_list_merge(&info->bio_list, &merge_bio);
+			shadow[j].request->bio = NULL;
+			blk_mq_end_request(shadow[j].request, 0);
+		}
+	}
+
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
 	err = negotiate_mq(info);
@@ -2208,10 +2197,9 @@ static void blkfront_setup_discard(struct blkfront_info *info)
 		info->discard_granularity = discard_granularity;
 		info->discard_alignment = discard_alignment;
 	}
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-		    "discard-secure", "%d", &discard_secure,
-		    NULL);
-	if (!err)
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "discard-secure", "%u", &discard_secure);
+	if (err > 0)
 		info->feature_secdiscard = !!discard_secure;
 }
 
@@ -2309,10 +2297,10 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
 	unsigned int indirect_segments;
 
 	info->feature_flush = 0;
+	info->feature_fua = 0;
 
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			"feature-barrier", "%d", &barrier,
-			NULL);
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "feature-barrier", "%d", &barrier);
 
 	/*
 	 * If there's no "feature-barrier" defined, then it means
@@ -2321,38 +2309,40 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
 	 *
 	 * If there are barriers, then we use flush.
 	 */
-	if (!err && barrier)
-		info->feature_flush = REQ_FLUSH | REQ_FUA;
+	if (err > 0 && barrier) {
+		info->feature_flush = 1;
+		info->feature_fua = 1;
+	}
+
 	/*
 	 * And if there is "feature-flush-cache" use that above
 	 * barriers.
 	 */
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			"feature-flush-cache", "%d", &flush,
-			NULL);
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "feature-flush-cache", "%d", &flush);
 
-	if (!err && flush)
-		info->feature_flush = REQ_FLUSH;
+	if (err > 0 && flush) {
+		info->feature_flush = 1;
+		info->feature_fua = 0;
+	}
 
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			"feature-discard", "%d", &discard,
-			NULL);
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "feature-discard", "%d", &discard);
 
-	if (!err && discard)
+	if (err > 0 && discard)
 		blkfront_setup_discard(info);
 
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			"feature-persistent", "%u", &persistent,
-			NULL);
-	if (err)
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "feature-persistent", "%d", &persistent);
+	if (err <= 0)
 		info->feature_persistent = 0;
 	else
 		info->feature_persistent = persistent;
 
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "feature-max-indirect-segments", "%u", &indirect_segments,
-			    NULL);
-	if (err)
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "feature-max-indirect-segments", "%u",
+			   &indirect_segments);
+	if (err <= 0)
 		info->max_indirect_segments = 0;
 	else
 		info->max_indirect_segments = min(indirect_segments,
@@ -2452,7 +2442,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	for (i = 0; i < info->nr_rings; i++)
 		kick_pending_request_queues(&info->rinfo[i]);
 
-	add_disk(info->gd);
+	device_add_disk(&info->xbdev->dev, info->gd);
 
 	info->is_ready = 1;
 }
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 386ba3d1a6ee..b8ecba6dcd3b 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -1,8 +1,7 @@
 config ZRAM
 	tristate "Compressed RAM block device support"
-	depends on BLOCK && SYSFS && ZSMALLOC
-	select LZO_COMPRESS
-	select LZO_DECOMPRESS
+	depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO
+	select CRYPTO_LZO
 	default n
 	help
 	  Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
@@ -14,13 +13,3 @@ config ZRAM
 	  disks and maybe many more.
 
 	  See zram.txt for more information.
-
-config ZRAM_LZ4_COMPRESS
-	bool "Enable LZ4 algorithm support"
-	depends on ZRAM
-	select LZ4_COMPRESS
-	select LZ4_DECOMPRESS
-	default n
-	help
-	  This option enables LZ4 compression algorithm support. Compression
-	  algorithm can be changed using `comp_algorithm' device attribute.
\ No newline at end of file
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
index be0763ff57a2..9e2b79e9a990 100644
--- a/drivers/block/zram/Makefile
+++ b/drivers/block/zram/Makefile
@@ -1,5 +1,3 @@
-zram-y	:=	zcomp_lzo.o zcomp.o zram_drv.o
-
-zram-$(CONFIG_ZRAM_LZ4_COMPRESS) += zcomp_lz4.o
+zram-y	:=	zcomp.o zram_drv.o
 
 obj-$(CONFIG_ZRAM)	+=	zram.o
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index b51a816d766b..4b5cd3a7b2b6 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -14,108 +14,150 @@
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/cpu.h>
+#include <linux/crypto.h>
 
 #include "zcomp.h"
-#include "zcomp_lzo.h"
-#ifdef CONFIG_ZRAM_LZ4_COMPRESS
-#include "zcomp_lz4.h"
-#endif
 
-static struct zcomp_backend *backends[] = {
-	&zcomp_lzo,
-#ifdef CONFIG_ZRAM_LZ4_COMPRESS
-	&zcomp_lz4,
+static const char * const backends[] = {
+	"lzo",
+#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
+	"lz4",
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_DEFLATE)
+	"deflate",
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_LZ4HC)
+	"lz4hc",
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_842)
+	"842",
 #endif
 	NULL
 };
 
-static struct zcomp_backend *find_backend(const char *compress)
-{
-	int i = 0;
-	while (backends[i]) {
-		if (sysfs_streq(compress, backends[i]->name))
-			break;
-		i++;
-	}
-	return backends[i];
-}
-
-static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm)
+static void zcomp_strm_free(struct zcomp_strm *zstrm)
 {
-	if (zstrm->private)
-		comp->backend->destroy(zstrm->private);
+	if (!IS_ERR_OR_NULL(zstrm->tfm))
+		crypto_free_comp(zstrm->tfm);
 	free_pages((unsigned long)zstrm->buffer, 1);
 	kfree(zstrm);
 }
 
 /*
- * allocate new zcomp_strm structure with ->private initialized by
+ * allocate new zcomp_strm structure with ->tfm initialized by
  * backend, return NULL on error
  */
-static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags)
+static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
 {
-	struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), flags);
+	struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL);
 	if (!zstrm)
 		return NULL;
 
-	zstrm->private = comp->backend->create(flags);
+	zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0);
 	/*
 	 * allocate 2 pages. 1 for compressed data, plus 1 extra for the
 	 * case when compressed size is larger than the original one
 	 */
-	zstrm->buffer = (void *)__get_free_pages(flags | __GFP_ZERO, 1);
-	if (!zstrm->private || !zstrm->buffer) {
-		zcomp_strm_free(comp, zstrm);
+	zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+	if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) {
+		zcomp_strm_free(zstrm);
 		zstrm = NULL;
 	}
 	return zstrm;
 }
 
+bool zcomp_available_algorithm(const char *comp)
+{
+	int i = 0;
+
+	while (backends[i]) {
+		if (sysfs_streq(comp, backends[i]))
+			return true;
+		i++;
+	}
+
+	/*
+	 * Crypto does not ignore a trailing new line symbol,
+	 * so make sure you don't supply a string containing
+	 * one.
+	 * This also means that we permit zcomp initialisation
+	 * with any compressing algorithm known to crypto api.
+	 */
+	return crypto_has_comp(comp, 0, 0) == 1;
+}
+
 /* show available compressors */
 ssize_t zcomp_available_show(const char *comp, char *buf)
 {
+	bool known_algorithm = false;
 	ssize_t sz = 0;
 	int i = 0;
 
-	while (backends[i]) {
-		if (!strcmp(comp, backends[i]->name))
+	for (; backends[i]; i++) {
+		if (!strcmp(comp, backends[i])) {
+			known_algorithm = true;
 			sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
-					"[%s] ", backends[i]->name);
-		else
+					"[%s] ", backends[i]);
+		} else {
 			sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
-					"%s ", backends[i]->name);
-		i++;
+					"%s ", backends[i]);
+		}
 	}
+
+	/*
+	 * Out-of-tree module known to crypto api or a missing
+	 * entry in `backends'.
+	 */
+	if (!known_algorithm && crypto_has_comp(comp, 0, 0) == 1)
+		sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
+				"[%s] ", comp);
+
 	sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
 	return sz;
 }
 
-bool zcomp_available_algorithm(const char *comp)
-{
-	return find_backend(comp) != NULL;
-}
-
-struct zcomp_strm *zcomp_strm_find(struct zcomp *comp)
+struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
 {
 	return *get_cpu_ptr(comp->stream);
 }
 
-void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm)
+void zcomp_stream_put(struct zcomp *comp)
 {
 	put_cpu_ptr(comp->stream);
 }
 
-int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
-		const unsigned char *src, size_t *dst_len)
+int zcomp_compress(struct zcomp_strm *zstrm,
+		const void *src, unsigned int *dst_len)
 {
-	return comp->backend->compress(src, zstrm->buffer, dst_len,
-			zstrm->private);
+	/*
+	 * Our dst memory (zstrm->buffer) is always `2 * PAGE_SIZE' sized
+	 * because sometimes we can endup having a bigger compressed data
+	 * due to various reasons: for example compression algorithms tend
+	 * to add some padding to the compressed buffer. Speaking of padding,
+	 * comp algorithm `842' pads the compressed length to multiple of 8
+	 * and returns -ENOSP when the dst memory is not big enough, which
+	 * is not something that ZRAM wants to see. We can handle the
+	 * `compressed_size > PAGE_SIZE' case easily in ZRAM, but when we
+	 * receive -ERRNO from the compressing backend we can't help it
+	 * anymore. To make `842' happy we need to tell the exact size of
+	 * the dst buffer, zram_drv will take care of the fact that
+	 * compressed buffer is too big.
+	 */
+	*dst_len = PAGE_SIZE * 2;
+
+	return crypto_comp_compress(zstrm->tfm,
+			src, PAGE_SIZE,
+			zstrm->buffer, dst_len);
 }
 
-int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
-		size_t src_len, unsigned char *dst)
+int zcomp_decompress(struct zcomp_strm *zstrm,
+		const void *src, unsigned int src_len, void *dst)
 {
-	return comp->backend->decompress(src, src_len, dst);
+	unsigned int dst_len = PAGE_SIZE;
+
+	return crypto_comp_decompress(zstrm->tfm,
+			src, src_len,
+			dst, &dst_len);
 }
 
 static int __zcomp_cpu_notifier(struct zcomp *comp,
@@ -127,7 +169,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp,
 	case CPU_UP_PREPARE:
 		if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
 			break;
-		zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
+		zstrm = zcomp_strm_alloc(comp);
 		if (IS_ERR_OR_NULL(zstrm)) {
 			pr_err("Can't allocate a compression stream\n");
 			return NOTIFY_BAD;
@@ -138,7 +180,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp,
 	case CPU_UP_CANCELED:
 		zstrm = *per_cpu_ptr(comp->stream, cpu);
 		if (!IS_ERR_OR_NULL(zstrm))
-			zcomp_strm_free(comp, zstrm);
+			zcomp_strm_free(zstrm);
 		*per_cpu_ptr(comp->stream, cpu) = NULL;
 		break;
 	default:
@@ -209,18 +251,16 @@ void zcomp_destroy(struct zcomp *comp)
 struct zcomp *zcomp_create(const char *compress)
 {
 	struct zcomp *comp;
-	struct zcomp_backend *backend;
 	int error;
 
-	backend = find_backend(compress);
-	if (!backend)
+	if (!zcomp_available_algorithm(compress))
 		return ERR_PTR(-EINVAL);
 
 	comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
 	if (!comp)
 		return ERR_PTR(-ENOMEM);
 
-	comp->backend = backend;
+	comp->name = compress;
 	error = zcomp_init(comp);
 	if (error) {
 		kfree(comp);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index ffd88cb747fe..478cac2ed465 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -13,33 +13,15 @@
 struct zcomp_strm {
 	/* compression/decompression buffer */
 	void *buffer;
-	/*
-	 * The private data of the compression stream, only compression
-	 * stream backend can touch this (e.g. compression algorithm
-	 * working memory)
-	 */
-	void *private;
-};
-
-/* static compression backend */
-struct zcomp_backend {
-	int (*compress)(const unsigned char *src, unsigned char *dst,
-			size_t *dst_len, void *private);
-
-	int (*decompress)(const unsigned char *src, size_t src_len,
-			unsigned char *dst);
-
-	void *(*create)(gfp_t flags);
-	void (*destroy)(void *private);
-
-	const char *name;
+	struct crypto_comp *tfm;
 };
 
 /* dynamic per-device compression frontend */
 struct zcomp {
 	struct zcomp_strm * __percpu *stream;
-	struct zcomp_backend *backend;
 	struct notifier_block notifier;
+
+	const char *name;
 };
 
 ssize_t zcomp_available_show(const char *comp, char *buf);
@@ -48,14 +30,14 @@ bool zcomp_available_algorithm(const char *comp);
 struct zcomp *zcomp_create(const char *comp);
 void zcomp_destroy(struct zcomp *comp);
 
-struct zcomp_strm *zcomp_strm_find(struct zcomp *comp);
-void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm);
+struct zcomp_strm *zcomp_stream_get(struct zcomp *comp);
+void zcomp_stream_put(struct zcomp *comp);
 
-int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
-		const unsigned char *src, size_t *dst_len);
+int zcomp_compress(struct zcomp_strm *zstrm,
+		const void *src, unsigned int *dst_len);
 
-int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
-		size_t src_len, unsigned char *dst);
+int zcomp_decompress(struct zcomp_strm *zstrm,
+		const void *src, unsigned int src_len, void *dst);
 
 bool zcomp_set_max_streams(struct zcomp *comp, int num_strm);
 #endif /* _ZCOMP_H_ */
diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c
deleted file mode 100644
index 0110086accba..000000000000
--- a/drivers/block/zram/zcomp_lz4.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2014 Sergey Senozhatsky.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/lz4.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-
-#include "zcomp_lz4.h"
-
-static void *zcomp_lz4_create(gfp_t flags)
-{
-	void *ret;
-
-	ret = kmalloc(LZ4_MEM_COMPRESS, flags);
-	if (!ret)
-		ret = __vmalloc(LZ4_MEM_COMPRESS,
-				flags | __GFP_HIGHMEM,
-				PAGE_KERNEL);
-	return ret;
-}
-
-static void zcomp_lz4_destroy(void *private)
-{
-	kvfree(private);
-}
-
-static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst,
-		size_t *dst_len, void *private)
-{
-	/* return  : Success if return 0 */
-	return lz4_compress(src, PAGE_SIZE, dst, dst_len, private);
-}
-
-static int zcomp_lz4_decompress(const unsigned char *src, size_t src_len,
-		unsigned char *dst)
-{
-	size_t dst_len = PAGE_SIZE;
-	/* return  : Success if return 0 */
-	return lz4_decompress_unknownoutputsize(src, src_len, dst, &dst_len);
-}
-
-struct zcomp_backend zcomp_lz4 = {
-	.compress = zcomp_lz4_compress,
-	.decompress = zcomp_lz4_decompress,
-	.create = zcomp_lz4_create,
-	.destroy = zcomp_lz4_destroy,
-	.name = "lz4",
-};
diff --git a/drivers/block/zram/zcomp_lz4.h b/drivers/block/zram/zcomp_lz4.h
deleted file mode 100644
index 60613fb29dd8..000000000000
--- a/drivers/block/zram/zcomp_lz4.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 2014 Sergey Senozhatsky.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ZCOMP_LZ4_H_
-#define _ZCOMP_LZ4_H_
-
-#include "zcomp.h"
-
-extern struct zcomp_backend zcomp_lz4;
-
-#endif /* _ZCOMP_LZ4_H_ */
diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c
deleted file mode 100644
index ed7a1f0549ec..000000000000
--- a/drivers/block/zram/zcomp_lzo.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2014 Sergey Senozhatsky.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/lzo.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-
-#include "zcomp_lzo.h"
-
-static void *lzo_create(gfp_t flags)
-{
-	void *ret;
-
-	ret = kmalloc(LZO1X_MEM_COMPRESS, flags);
-	if (!ret)
-		ret = __vmalloc(LZO1X_MEM_COMPRESS,
-				flags | __GFP_HIGHMEM,
-				PAGE_KERNEL);
-	return ret;
-}
-
-static void lzo_destroy(void *private)
-{
-	kvfree(private);
-}
-
-static int lzo_compress(const unsigned char *src, unsigned char *dst,
-		size_t *dst_len, void *private)
-{
-	int ret = lzo1x_1_compress(src, PAGE_SIZE, dst, dst_len, private);
-	return ret == LZO_E_OK ? 0 : ret;
-}
-
-static int lzo_decompress(const unsigned char *src, size_t src_len,
-		unsigned char *dst)
-{
-	size_t dst_len = PAGE_SIZE;
-	int ret = lzo1x_decompress_safe(src, src_len, dst, &dst_len);
-	return ret == LZO_E_OK ? 0 : ret;
-}
-
-struct zcomp_backend zcomp_lzo = {
-	.compress = lzo_compress,
-	.decompress = lzo_decompress,
-	.create = lzo_create,
-	.destroy = lzo_destroy,
-	.name = "lzo",
-};
diff --git a/drivers/block/zram/zcomp_lzo.h b/drivers/block/zram/zcomp_lzo.h
deleted file mode 100644
index 128c5807fa14..000000000000
--- a/drivers/block/zram/zcomp_lzo.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 2014 Sergey Senozhatsky.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ZCOMP_LZO_H_
-#define _ZCOMP_LZO_H_
-
-#include "zcomp.h"
-
-extern struct zcomp_backend zcomp_lzo;
-
-#endif /* _ZCOMP_LZO_H_ */
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 8fcad8b761f1..7454cf188c8e 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -342,9 +342,16 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 	struct zram *zram = dev_to_zram(dev);
+	char compressor[CRYPTO_MAX_ALG_NAME];
 	size_t sz;
 
-	if (!zcomp_available_algorithm(buf))
+	strlcpy(compressor, buf, sizeof(compressor));
+	/* ignore trailing newline */
+	sz = strlen(compressor);
+	if (sz > 0 && compressor[sz - 1] == '\n')
+		compressor[sz - 1] = 0x00;
+
+	if (!zcomp_available_algorithm(compressor))
 		return -EINVAL;
 
 	down_write(&zram->init_lock);
@@ -353,13 +360,8 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		pr_info("Can't change algorithm for initialized device\n");
 		return -EBUSY;
 	}
-	strlcpy(zram->compressor, buf, sizeof(zram->compressor));
-
-	/* ignore trailing newline */
-	sz = strlen(zram->compressor);
-	if (sz > 0 && zram->compressor[sz - 1] == '\n')
-		zram->compressor[sz - 1] = 0x00;
 
+	strlcpy(zram->compressor, compressor, sizeof(compressor));
 	up_write(&zram->init_lock);
 	return len;
 }
@@ -563,7 +565,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
 	unsigned char *cmem;
 	struct zram_meta *meta = zram->meta;
 	unsigned long handle;
-	size_t size;
+	unsigned int size;
 
 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
 	handle = meta->table[index].handle;
@@ -576,10 +578,14 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
 	}
 
 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
-	if (size == PAGE_SIZE)
+	if (size == PAGE_SIZE) {
 		copy_page(mem, cmem);
-	else
-		ret = zcomp_decompress(zram->comp, cmem, size, mem);
+	} else {
+		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
+
+		ret = zcomp_decompress(zstrm, cmem, size, mem);
+		zcomp_stream_put(zram->comp);
+	}
 	zs_unmap_object(meta->mem_pool, handle);
 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
 
@@ -646,7 +652,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 			   int offset)
 {
 	int ret = 0;
-	size_t clen;
+	unsigned int clen;
 	unsigned long handle = 0;
 	struct page *page;
 	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
@@ -695,8 +701,8 @@ compress_again:
 		goto out;
 	}
 
-	zstrm = zcomp_strm_find(zram->comp);
-	ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
+	zstrm = zcomp_stream_get(zram->comp);
+	ret = zcomp_compress(zstrm, uncmem, &clen);
 	if (!is_partial_io(bvec)) {
 		kunmap_atomic(user_mem);
 		user_mem = NULL;
@@ -732,19 +738,21 @@ compress_again:
 		handle = zs_malloc(meta->mem_pool, clen,
 				__GFP_KSWAPD_RECLAIM |
 				__GFP_NOWARN |
-				__GFP_HIGHMEM);
+				__GFP_HIGHMEM |
+				__GFP_MOVABLE);
 	if (!handle) {
-		zcomp_strm_release(zram->comp, zstrm);
+		zcomp_stream_put(zram->comp);
 		zstrm = NULL;
 
 		atomic64_inc(&zram->stats.writestall);
 
 		handle = zs_malloc(meta->mem_pool, clen,
-				GFP_NOIO | __GFP_HIGHMEM);
+				GFP_NOIO | __GFP_HIGHMEM |
+				__GFP_MOVABLE);
 		if (handle)
 			goto compress_again;
 
-		pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
+		pr_err("Error allocating memory for compressed page: %u, size=%u\n",
 			index, clen);
 		ret = -ENOMEM;
 		goto out;
@@ -769,7 +777,7 @@ compress_again:
 		memcpy(cmem, src, clen);
 	}
 
-	zcomp_strm_release(zram->comp, zstrm);
+	zcomp_stream_put(zram->comp);
 	zstrm = NULL;
 	zs_unmap_object(meta->mem_pool, handle);
 
@@ -789,7 +797,7 @@ compress_again:
 	atomic64_inc(&zram->stats.pages_stored);
 out:
 	if (zstrm)
-		zcomp_strm_release(zram->comp, zstrm);
+		zcomp_stream_put(zram->comp);
 	if (is_partial_io(bvec))
 		kfree(uncmem);
 	return ret;
@@ -874,7 +882,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 	offset = (bio->bi_iter.bi_sector &
 		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
 
-	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
 		zram_bio_discard(zram, index, offset, bio);
 		bio_endio(bio);
 		return;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 3f5bf66a27e4..74fcf10da374 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -15,8 +15,9 @@
 #ifndef _ZRAM_DRV_H_
 #define _ZRAM_DRV_H_
 
-#include <linux/spinlock.h>
+#include <linux/rwsem.h>
 #include <linux/zsmalloc.h>
+#include <linux/crypto.h>
 
 #include "zcomp.h"
 
@@ -113,7 +114,7 @@ struct zram {
 	 * we can store in a disk.
 	 */
 	u64 disksize;	/* bytes */
-	char compressor[10];
+	char compressor[CRYPTO_MAX_ALG_NAME];
 	/*
 	 * zram is claimed so open request will be failed
 	 */
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 25894687c168..fadba88745dc 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -123,6 +123,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3472) },
 	{ USB_DEVICE(0x13d3, 0x3474) },
 	{ USB_DEVICE(0x13d3, 0x3487) },
+	{ USB_DEVICE(0x13d3, 0x3490) },
 
 	/* Atheros AR5BBU12 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xE02C) },
@@ -190,6 +191,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3487), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3490), .driver_info = BTUSB_ATH3012 },
 
 	/* Atheros AR5BBU22 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xE036), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index fd6b53e9bbf2..a9932fe57d92 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -274,6 +274,8 @@ static int bpa10x_setup(struct hci_dev *hdev)
 
 	BT_INFO("%s: %s", hdev->name, (char *)(skb->data + 1));
 
+	hci_set_fw_info(hdev, "%s", skb->data + 1);
+
 	kfree_skb(skb);
 	return 0;
 }
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index 7ad8d61c0c61..e6a85f0e6309 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -138,7 +138,7 @@ int btmrvl_process_event(struct btmrvl_private *priv, struct sk_buff *skb)
 			if (event->length > 3 && event->data[3])
 				priv->btmrvl_dev.dev_type = HCI_AMP;
 			else
-				priv->btmrvl_dev.dev_type = HCI_BREDR;
+				priv->btmrvl_dev.dev_type = HCI_PRIMARY;
 
 			BT_DBG("dev_type: %d", priv->btmrvl_dev.dev_type);
 		} else if (priv->btmrvl_dev.sendcmdflag &&
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index f425ddf91a24..d02f2c14df32 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -1071,7 +1071,6 @@ static int btmrvl_sdio_host_to_card(struct btmrvl_private *priv,
 {
 	struct btmrvl_sdio_card *card = priv->btmrvl_dev.card;
 	int ret = 0;
-	int buf_block_len;
 	int blksz;
 	int i = 0;
 	u8 *buf = NULL;
@@ -1083,9 +1082,13 @@ static int btmrvl_sdio_host_to_card(struct btmrvl_private *priv,
 		return -EINVAL;
 	}
 
+	blksz = DIV_ROUND_UP(nb, SDIO_BLOCK_SIZE) * SDIO_BLOCK_SIZE;
+
 	buf = payload;
-	if ((unsigned long) payload & (BTSDIO_DMA_ALIGN - 1)) {
-		tmpbufsz = ALIGN_SZ(nb, BTSDIO_DMA_ALIGN);
+	if ((unsigned long) payload & (BTSDIO_DMA_ALIGN - 1) ||
+	    nb < blksz) {
+		tmpbufsz = ALIGN_SZ(blksz, BTSDIO_DMA_ALIGN) +
+			   BTSDIO_DMA_ALIGN;
 		tmpbuf = kzalloc(tmpbufsz, GFP_KERNEL);
 		if (!tmpbuf)
 			return -ENOMEM;
@@ -1093,15 +1096,12 @@ static int btmrvl_sdio_host_to_card(struct btmrvl_private *priv,
 		memcpy(buf, payload, nb);
 	}
 
-	blksz = SDIO_BLOCK_SIZE;
-	buf_block_len = DIV_ROUND_UP(nb, blksz);
-
 	sdio_claim_host(card->func);
 
 	do {
 		/* Transfer data to card */
 		ret = sdio_writesb(card->func, card->ioport, buf,
-				   buf_block_len * blksz);
+				   blksz);
 		if (ret < 0) {
 			i++;
 			BT_ERR("i=%d writesb failed: %d", i, ret);
@@ -1625,6 +1625,7 @@ static int btmrvl_sdio_suspend(struct device *dev)
 	if (priv->adapter->hs_state != HS_ACTIVATED) {
 		if (btmrvl_enable_hs(priv)) {
 			BT_ERR("HS not actived, suspend failed!");
+			priv->adapter->is_suspending = false;
 			return -EBUSY;
 		}
 	}
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 2b05661e3818..1cb958e199eb 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -311,7 +311,7 @@ static int btsdio_probe(struct sdio_func *func,
 	if (id->class == SDIO_CLASS_BT_AMP)
 		hdev->dev_type = HCI_AMP;
 	else
-		hdev->dev_type = HCI_BREDR;
+		hdev->dev_type = HCI_PRIMARY;
 
 	data->hdev = hdev;
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a3be65e6231a..811f9b97e360 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -237,6 +237,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3487), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3490), .driver_info = BTUSB_ATH3012 },
 
 	/* Atheros AR5BBU12 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },
@@ -249,6 +250,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME },
+	{ USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME },
 
 	/* Broadcom BCM2035 */
 	{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
@@ -314,6 +316,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
 	{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
 	{ USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW },
+	{ USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL },
 
 	/* Other Intel Bluetooth devices */
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01),
@@ -2103,10 +2106,14 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	/* With this Intel bootloader only the hardware variant and device
 	 * revision information are used to select the right firmware.
 	 *
-	 * Currently this bootloader support is limited to hardware variant
-	 * iBT 3.0 (LnP/SfP) which is identified by the value 11 (0x0b).
+	 * The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi.
+	 *
+	 * Currently the supported hardware variants are:
+	 *   11 (0x0b) for iBT3.0 (LnP/SfP)
+	 *   12 (0x0c) for iBT3.5 (WsP)
 	 */
-	snprintf(fwname, sizeof(fwname), "intel/ibt-11-%u.sfi",
+	snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi",
+		 le16_to_cpu(ver.hw_variant),
 		 le16_to_cpu(params->dev_revid));
 
 	err = request_firmware(&fw, fwname, &hdev->dev);
@@ -2122,7 +2129,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	/* Save the DDC file name for later use to apply once the firmware
 	 * downloading is done.
 	 */
-	snprintf(fwname, sizeof(fwname), "intel/ibt-11-%u.ddc",
+	snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc",
+		 le16_to_cpu(ver.hw_variant),
 		 le16_to_cpu(params->dev_revid));
 
 	kfree_skb(skb);
@@ -2825,7 +2833,7 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_AMP)
 		hdev->dev_type = HCI_AMP;
 	else
-		hdev->dev_type = HCI_BREDR;
+		hdev->dev_type = HCI_PRIMARY;
 
 	data->hdev = hdev;
 
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
index 24a652f9252b..485281b3f167 100644
--- a/drivers/bluetooth/btwilink.c
+++ b/drivers/bluetooth/btwilink.c
@@ -51,7 +51,7 @@
  */
 struct ti_st {
 	struct hci_dev *hdev;
-	char reg_status;
+	int reg_status;
 	long (*st_write) (struct sk_buff *);
 	struct completion wait_reg_completion;
 };
@@ -83,7 +83,7 @@ static inline void ti_st_tx_complete(struct ti_st *hst, int pkt_type)
  * status.ti_st_open() function will wait for signal from this
  * API when st_register() function returns ST_PENDING.
  */
-static void st_reg_completion_cb(void *priv_data, char data)
+static void st_reg_completion_cb(void *priv_data, int data)
 {
 	struct ti_st *lhst = priv_data;
 
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index f6f2b01a1fea..ed0a4201b551 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -537,9 +537,7 @@ static int intel_setup(struct hci_uart *hu)
 {
 	static const u8 reset_param[] = { 0x00, 0x01, 0x00, 0x01,
 					  0x00, 0x08, 0x04, 0x00 };
-	static const u8 lpm_param[] = { 0x03, 0x07, 0x01, 0x0b };
 	struct intel_data *intel = hu->priv;
-	struct intel_device *idev = NULL;
 	struct hci_dev *hdev = hu->hdev;
 	struct sk_buff *skb;
 	struct intel_version ver;
@@ -884,35 +882,23 @@ done:
 
 	bt_dev_info(hdev, "Device booted in %llu usecs", duration);
 
-	/* Enable LPM if matching pdev with wakeup enabled */
+	/* Enable LPM if matching pdev with wakeup enabled, set TX active
+	 * until further LPM TX notification.
+	 */
 	mutex_lock(&intel_device_list_lock);
 	list_for_each(p, &intel_device_list) {
 		struct intel_device *dev = list_entry(p, struct intel_device,
 						      list);
 		if (hu->tty->dev->parent == dev->pdev->dev.parent) {
-			if (device_may_wakeup(&dev->pdev->dev))
-				idev = dev;
+			if (device_may_wakeup(&dev->pdev->dev)) {
+				set_bit(STATE_LPM_ENABLED, &intel->flags);
+				set_bit(STATE_TX_ACTIVE, &intel->flags);
+			}
 			break;
 		}
 	}
 	mutex_unlock(&intel_device_list_lock);
 
-	if (!idev)
-		goto no_lpm;
-
-	bt_dev_info(hdev, "Enabling LPM");
-
-	skb = __hci_cmd_sync(hdev, 0xfc8b, sizeof(lpm_param), lpm_param,
-			     HCI_CMD_TIMEOUT);
-	if (IS_ERR(skb)) {
-		bt_dev_err(hdev, "Failed to enable LPM");
-		goto no_lpm;
-	}
-	kfree_skb(skb);
-
-	set_bit(STATE_LPM_ENABLED, &intel->flags);
-
-no_lpm:
 	/* Ignore errors, device can work without DDC parameters */
 	btintel_load_ddc_config(hdev, fwname);
 
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 49b3e1e2d236..dda97398c59a 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -609,7 +609,7 @@ static int hci_uart_register_dev(struct hci_uart *hu)
 	if (test_bit(HCI_UART_CREATE_AMP, &hu->hdev_flags))
 		hdev->dev_type = HCI_AMP;
 	else
-		hdev->dev_type = HCI_BREDR;
+		hdev->dev_type = HCI_PRIMARY;
 
 	if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags))
 		return 0;
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index aba31210c802..3ff229b2e7f3 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -97,10 +97,10 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
 	if (data->hdev)
 		return -EBADFD;
 
-	/* bits 0-1 are dev_type (BR/EDR or AMP) */
+	/* bits 0-1 are dev_type (Primary or AMP) */
 	dev_type = opcode & 0x03;
 
-	if (dev_type != HCI_BREDR && dev_type != HCI_AMP)
+	if (dev_type != HCI_PRIMARY && dev_type != HCI_AMP)
 		return -EINVAL;
 
 	/* bits 2-5 are reserved (must be zero) */
@@ -316,7 +316,7 @@ static void vhci_open_timeout(struct work_struct *work)
 	struct vhci_data *data = container_of(work, struct vhci_data,
 					      open_timeout.work);
 
-	vhci_create_device(data, amp ? HCI_AMP : HCI_BREDR);
+	vhci_create_device(data, amp ? HCI_AMP : HCI_PRIMARY);
 }
 
 static int vhci_open(struct inode *inode, struct file *file)
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 1b257ea9776a..5d475b3a0b2e 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2032,7 +2032,7 @@ static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
 
 	init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ);
 	cgc.cmd[0] = GPCMD_READ_SUBCHANNEL;
-	cgc.cmd[1] = 2;     /* MSF addressing */
+	cgc.cmd[1] = subchnl->cdsc_format;/* MSF or LBA addressing */
 	cgc.cmd[2] = 0x40;  /* request subQ data */
 	cgc.cmd[3] = mcn ? 2 : 1;
 	cgc.cmd[8] = 16;
@@ -2041,17 +2041,27 @@ static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
 		return ret;
 
 	subchnl->cdsc_audiostatus = cgc.buffer[1];
-	subchnl->cdsc_format = CDROM_MSF;
 	subchnl->cdsc_ctrl = cgc.buffer[5] & 0xf;
 	subchnl->cdsc_trk = cgc.buffer[6];
 	subchnl->cdsc_ind = cgc.buffer[7];
 
-	subchnl->cdsc_reladdr.msf.minute = cgc.buffer[13];
-	subchnl->cdsc_reladdr.msf.second = cgc.buffer[14];
-	subchnl->cdsc_reladdr.msf.frame = cgc.buffer[15];
-	subchnl->cdsc_absaddr.msf.minute = cgc.buffer[9];
-	subchnl->cdsc_absaddr.msf.second = cgc.buffer[10];
-	subchnl->cdsc_absaddr.msf.frame = cgc.buffer[11];
+	if (subchnl->cdsc_format == CDROM_LBA) {
+		subchnl->cdsc_absaddr.lba = ((cgc.buffer[8] << 24) |
+						(cgc.buffer[9] << 16) |
+						(cgc.buffer[10] << 8) |
+						(cgc.buffer[11]));
+		subchnl->cdsc_reladdr.lba = ((cgc.buffer[12] << 24) |
+						(cgc.buffer[13] << 16) |
+						(cgc.buffer[14] << 8) |
+						(cgc.buffer[15]));
+	} else {
+		subchnl->cdsc_reladdr.msf.minute = cgc.buffer[13];
+		subchnl->cdsc_reladdr.msf.second = cgc.buffer[14];
+		subchnl->cdsc_reladdr.msf.frame = cgc.buffer[15];
+		subchnl->cdsc_absaddr.msf.minute = cgc.buffer[9];
+		subchnl->cdsc_absaddr.msf.second = cgc.buffer[10];
+		subchnl->cdsc_absaddr.msf.frame = cgc.buffer[11];
+	}
 
 	return 0;
 }
@@ -3022,7 +3032,7 @@ static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi,
 	if (!((requested == CDROM_MSF) ||
 	      (requested == CDROM_LBA)))
 		return -EINVAL;
-	q.cdsc_format = CDROM_MSF;
+
 	ret = cdrom_read_subchannel(cdi, &q, 0);
 	if (ret)
 		return ret;
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 8bf70e8c3f79..50aa9ba91f25 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -325,7 +325,7 @@ static long dsp56k_ioctl(struct file *file, unsigned int cmd,
 			if(get_user(bin, &binary->bin) < 0)
 				return -EFAULT;
 		
-			if (len == 0) {
+			if (len <= 0) {
 				return -EINVAL;      /* nothing to upload?!? */
 			}
 			if (len > DSP56K_MAX_BINARY_LENGTH) {
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index ac51149e9777..56ad5a5936a9 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -90,7 +90,7 @@ config HW_RANDOM_BCM63XX
 
 config HW_RANDOM_BCM2835
 	tristate "Broadcom BCM2835 Random Number Generator support"
-	depends on ARCH_BCM2835
+	depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the Random Number
@@ -396,6 +396,20 @@ config HW_RANDOM_PIC32
 
 	  If unsure, say Y.
 
+config HW_RANDOM_MESON
+	tristate "Amlogic Meson Random Number Generator support"
+	depends on HW_RANDOM
+	depends on ARCH_MESON || COMPILE_TEST
+	default y
+	---help---
+	  This driver provides kernel-side support for the Random Number
+	  Generator hardware found on Amlogic Meson SoCs.
+
+	  To compile this driver as a module, choose M here. the
+	  module will be called meson-rng.
+
+	  If unsure, say Y.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 63022b49f160..04bb0b03356f 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -34,3 +34,4 @@ obj-$(CONFIG_HW_RANDOM_ST) += st-rng.o
 obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o
 obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
 obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o
+obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index 7192ec25f667..af2149273fe0 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -19,6 +19,7 @@
 #define RNG_CTRL	0x0
 #define RNG_STATUS	0x4
 #define RNG_DATA	0x8
+#define RNG_INT_MASK	0x10
 
 /* enable rng */
 #define RNG_RBGEN	0x1
@@ -26,10 +27,24 @@
 /* the initial numbers generated are "less random" so will be discarded */
 #define RNG_WARMUP_COUNT 0x40000
 
+#define RNG_INT_OFF	0x1
+
+static void __init nsp_rng_init(void __iomem *base)
+{
+	u32 val;
+
+	/* mask the interrupt */
+	val = readl(base + RNG_INT_MASK);
+	val |= RNG_INT_OFF;
+	writel(val, base + RNG_INT_MASK);
+}
+
 static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max,
 			       bool wait)
 {
 	void __iomem *rng_base = (void __iomem *)rng->priv;
+	u32 max_words = max / sizeof(u32);
+	u32 num_words, count;
 
 	while ((__raw_readl(rng_base + RNG_STATUS) >> 24) == 0) {
 		if (!wait)
@@ -37,8 +52,14 @@ static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max,
 		cpu_relax();
 	}
 
-	*(u32 *)buf = __raw_readl(rng_base + RNG_DATA);
-	return sizeof(u32);
+	num_words = readl(rng_base + RNG_STATUS) >> 24;
+	if (num_words > max_words)
+		num_words = max_words;
+
+	for (count = 0; count < num_words; count++)
+		((u32 *)buf)[count] = readl(rng_base + RNG_DATA);
+
+	return num_words * sizeof(u32);
 }
 
 static struct hwrng bcm2835_rng_ops = {
@@ -46,10 +67,19 @@ static struct hwrng bcm2835_rng_ops = {
 	.read	= bcm2835_rng_read,
 };
 
+static const struct of_device_id bcm2835_rng_of_match[] = {
+	{ .compatible = "brcm,bcm2835-rng"},
+	{ .compatible = "brcm,bcm-nsp-rng", .data = nsp_rng_init},
+	{ .compatible = "brcm,bcm5301x-rng", .data = nsp_rng_init},
+	{},
+};
+
 static int bcm2835_rng_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
+	void (*rng_setup)(void __iomem *base);
+	const struct of_device_id *rng_id;
 	void __iomem *rng_base;
 	int err;
 
@@ -61,6 +91,15 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 	}
 	bcm2835_rng_ops.priv = (unsigned long)rng_base;
 
+	rng_id = of_match_node(bcm2835_rng_of_match, np);
+	if (!rng_id)
+		return -EINVAL;
+
+	/* Check for rng init function, execute it */
+	rng_setup = rng_id->data;
+	if (rng_setup)
+		rng_setup(rng_base);
+
 	/* set warm-up count & enable */
 	__raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
 	__raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
@@ -90,10 +129,6 @@ static int bcm2835_rng_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id bcm2835_rng_of_match[] = {
-	{ .compatible = "brcm,bcm2835-rng", },
-	{},
-};
 MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match);
 
 static struct platform_driver bcm2835_rng_driver = {
diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c
index ed44561ea647..23d358553b21 100644
--- a/drivers/char/hw_random/exynos-rng.c
+++ b/drivers/char/hw_random/exynos-rng.c
@@ -45,12 +45,12 @@ struct exynos_rng {
 
 static u32 exynos_rng_readl(struct exynos_rng *rng, u32 offset)
 {
-	return	__raw_readl(rng->mem + offset);
+	return	readl_relaxed(rng->mem + offset);
 }
 
 static void exynos_rng_writel(struct exynos_rng *rng, u32 val, u32 offset)
 {
-	__raw_writel(val, rng->mem + offset);
+	writel_relaxed(val, rng->mem + offset);
 }
 
 static int exynos_rng_configure(struct exynos_rng *exynos_rng)
diff --git a/drivers/char/hw_random/meson-rng.c b/drivers/char/hw_random/meson-rng.c
new file mode 100644
index 000000000000..0cfd81bcaeac
--- /dev/null
+++ b/drivers/char/hw_random/meson-rng.c
@@ -0,0 +1,131 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (c) 2016 BayLibre, SAS.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ * Copyright (C) 2014 Amlogic, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016 BayLibre, SAS.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ * Copyright (C) 2014 Amlogic, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/hw_random.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#define RNG_DATA 0x00
+
+struct meson_rng_data {
+	void __iomem *base;
+	struct platform_device *pdev;
+	struct hwrng rng;
+};
+
+static int meson_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+	struct meson_rng_data *data =
+			container_of(rng, struct meson_rng_data, rng);
+
+	if (max < sizeof(u32))
+		return 0;
+
+	*(u32 *)buf = readl_relaxed(data->base + RNG_DATA);
+
+	return sizeof(u32);
+}
+
+static int meson_rng_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct meson_rng_data *data;
+	struct resource *res;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->pdev = pdev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(data->base))
+		return PTR_ERR(data->base);
+
+	data->rng.name = pdev->name;
+	data->rng.read = meson_rng_read;
+
+	platform_set_drvdata(pdev, data);
+
+	return devm_hwrng_register(dev, &data->rng);
+}
+
+static const struct of_device_id meson_rng_of_match[] = {
+	{ .compatible = "amlogic,meson-rng", },
+	{},
+};
+
+static struct platform_driver meson_rng_driver = {
+	.probe	= meson_rng_probe,
+	.driver	= {
+		.name = "meson-rng",
+		.of_match_table = meson_rng_of_match,
+	},
+};
+
+module_platform_driver(meson_rng_driver);
+
+MODULE_ALIAS("platform:meson-rng");
+MODULE_DESCRIPTION("Meson H/W Random Number Generator driver");
+MODULE_AUTHOR("Lawrence Mok <lawrence.mok@amlogic.com>");
+MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 8a1432e8bb80..01d4be2c354b 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -384,7 +384,12 @@ static int omap_rng_probe(struct platform_device *pdev)
 	}
 
 	pm_runtime_enable(&pdev->dev);
-	pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret);
+		pm_runtime_put_noidle(&pdev->dev);
+		goto err_ioremap;
+	}
 
 	ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) :
 				get_omap_rng_device_details(priv);
@@ -435,8 +440,15 @@ static int __maybe_unused omap_rng_suspend(struct device *dev)
 static int __maybe_unused omap_rng_resume(struct device *dev)
 {
 	struct omap_rng_dev *priv = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret) {
+		dev_err(dev, "Failed to runtime_get device: %d\n", ret);
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
 
-	pm_runtime_get_sync(dev);
 	priv->pdata->init(priv);
 
 	return 0;
diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index 92a810648bd0..63d84e6f1891 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -69,8 +69,12 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 		}
 
 		/* If error detected or data not ready... */
-		if (sr != RNG_SR_DRDY)
+		if (sr != RNG_SR_DRDY) {
+			if (WARN_ONCE(sr & (RNG_SR_SEIS | RNG_SR_CEIS),
+					"bad RNG status - %x\n", sr))
+				writel_relaxed(0, priv->base + RNG_SR);
 			break;
+		}
 
 		*(u32 *)data = readl_relaxed(priv->base + RNG_DR);
 
@@ -79,10 +83,6 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 		max -= sizeof(u32);
 	}
 
-	if (WARN_ONCE(sr & (RNG_SR_SEIS | RNG_SR_CEIS),
-		      "bad RNG status - %x\n", sr))
-		writel_relaxed(0, priv->base + RNG_SR);
-
 	pm_runtime_mark_last_busy((struct device *) priv->rng.priv);
 	pm_runtime_put_sync_autosuspend((struct device *) priv->rng.priv);
 
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 6ed9e9fe5233..5a9350b1069a 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -50,18 +50,6 @@ config IPMI_SI
 	 Currently, only KCS and SMIC are supported.  If
 	 you are using IPMI, you should probably say "y" here.
 
-config IPMI_SI_PROBE_DEFAULTS
-       bool 'Probe for all possible IPMI system interfaces by default'
-       default n
-       depends on IPMI_SI
-       help
-	 Modern systems will usually expose IPMI interfaces via a discoverable
-	 firmware mechanism such as ACPI or DMI. Older systems do not, and so
-	 the driver is forced to probe hardware manually. This may cause boot
-	 delays. Say "n" here to disable this manual probing. IPMI will then
-	 only be available on older systems if the "ipmi_si_intf.trydefaults=1"
-	 boot argument is passed.
-
 config IPMI_SSIF
        tristate 'IPMI SMBus handler (SSIF)'
        select I2C
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 44b1bd6baa38..d8619998cfb5 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -474,12 +474,12 @@ static DEFINE_MUTEX(smi_watchers_mutex);
 
 static const char * const addr_src_to_str[] = {
 	"invalid", "hotmod", "hardcoded", "SPMI", "ACPI", "SMBIOS", "PCI",
-	"device-tree", "default"
+	"device-tree"
 };
 
 const char *ipmi_addr_src_to_str(enum ipmi_addr_src src)
 {
-	if (src > SI_DEFAULT)
+	if (src >= SI_LAST)
 		src = 0; /* Invalid */
 	return addr_src_to_str[src];
 }
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 7b1c412b40a2..a112c0146012 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1322,7 +1322,6 @@ static bool          si_tryplatform = true;
 #ifdef CONFIG_PCI
 static bool          si_trypci = true;
 #endif
-static bool          si_trydefaults = IS_ENABLED(CONFIG_IPMI_SI_PROBE_DEFAULTS);
 static char          *si_type[SI_MAX_PARMS];
 #define MAX_SI_TYPE_STR 30
 static char          si_type_str[MAX_SI_TYPE_STR];
@@ -1371,10 +1370,6 @@ module_param_named(trypci, si_trypci, bool, 0);
 MODULE_PARM_DESC(trypci, "Setting this to zero will disable the"
 		 " default scan of the interfaces identified via pci");
 #endif
-module_param_named(trydefaults, si_trydefaults, bool, 0);
-MODULE_PARM_DESC(trydefaults, "Setting this to 'false' will disable the"
-		 " default scan of the KCS and SMIC interface at the standard"
-		 " address");
 module_param_string(type, si_type_str, MAX_SI_TYPE_STR, 0);
 MODULE_PARM_DESC(type, "Defines the type of each interface, each"
 		 " interface separated by commas.  The types are 'kcs',"
@@ -3461,62 +3456,6 @@ static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
 		del_timer_sync(&smi_info->si_timer);
 }
 
-static const struct ipmi_default_vals
-{
-	const int type;
-	const int port;
-} ipmi_defaults[] =
-{
-	{ .type = SI_KCS, .port = 0xca2 },
-	{ .type = SI_SMIC, .port = 0xca9 },
-	{ .type = SI_BT, .port = 0xe4 },
-	{ .port = 0 }
-};
-
-static void default_find_bmc(void)
-{
-	struct smi_info *info;
-	int             i;
-
-	for (i = 0; ; i++) {
-		if (!ipmi_defaults[i].port)
-			break;
-#ifdef CONFIG_PPC
-		if (check_legacy_ioport(ipmi_defaults[i].port))
-			continue;
-#endif
-		info = smi_info_alloc();
-		if (!info)
-			return;
-
-		info->addr_source = SI_DEFAULT;
-
-		info->si_type = ipmi_defaults[i].type;
-		info->io_setup = port_setup;
-		info->io.addr_data = ipmi_defaults[i].port;
-		info->io.addr_type = IPMI_IO_ADDR_SPACE;
-
-		info->io.addr = NULL;
-		info->io.regspacing = DEFAULT_REGSPACING;
-		info->io.regsize = DEFAULT_REGSPACING;
-		info->io.regshift = 0;
-
-		if (add_smi(info) == 0) {
-			if ((try_smi_init(info)) == 0) {
-				/* Found one... */
-				printk(KERN_INFO PFX "Found default %s"
-				" state machine at %s address 0x%lx\n",
-				si_to_str[info->si_type],
-				addr_space_to_str[info->io.addr_type],
-				info->io.addr_data);
-			} else
-				cleanup_one_si(info);
-		} else {
-			kfree(info);
-		}
-	}
-}
-
 static int is_new_interface(struct smi_info *info)
 {
 	struct smi_info *e;
@@ -3844,8 +3783,6 @@ static int init_ipmi_si(void)
 #ifdef CONFIG_PARISC
 	register_parisc_driver(&ipmi_parisc_driver);
 	parisc_registered = true;
-	/* poking PC IO addresses will crash machine, don't do it */
-	si_trydefaults = 0;
 #endif
 
 	/* We prefer devices with interrupts, but in the case of a machine
@@ -3885,16 +3822,6 @@ static int init_ipmi_si(void)
 	if (type)
 		return 0;
 
-	if (si_trydefaults) {
-		mutex_lock(&smi_infos_lock);
-		if (list_empty(&smi_infos)) {
-			/* No BMC was found, try defaults. */
-			mutex_unlock(&smi_infos_lock);
-			default_find_bmc();
-		} else
-			mutex_unlock(&smi_infos_lock);
-	}
-
 	mutex_lock(&smi_infos_lock);
 	if (unload_when_empty && list_empty(&smi_infos)) {
 		mutex_unlock(&smi_infos_lock);
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 71025c2f6bbb..a33163dbb913 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/highmem.h>
 #include <linux/backing-dev.h>
+#include <linux/shmem_fs.h>
 #include <linux/splice.h>
 #include <linux/pfn.h>
 #include <linux/export.h>
@@ -66,12 +67,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 	u64 cursor = from;
 
 	while (cursor < to) {
-		if (!devmem_is_allowed(pfn)) {
-			printk(KERN_INFO
-		"Program %s tried to access /dev/mem between %Lx->%Lx.\n",
-				current->comm, from, to);
+		if (!devmem_is_allowed(pfn))
 			return 0;
-		}
 		cursor += PAGE_SIZE;
 		pfn++;
 	}
@@ -661,6 +658,28 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
+static unsigned long get_unmapped_area_zero(struct file *file,
+				unsigned long addr, unsigned long len,
+				unsigned long pgoff, unsigned long flags)
+{
+#ifdef CONFIG_MMU
+	if (flags & MAP_SHARED) {
+		/*
+		 * mmap_zero() will call shmem_zero_setup() to create a file,
+		 * so use shmem's get_unmapped_area in case it can be huge;
+		 * and pass NULL for file as in mmap.c's get_unmapped_area(),
+		 * so as not to confuse shmem with our handle on "/dev/zero".
+		 */
+		return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
+	}
+
+	/* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+#else
+	return -ENOSYS;
+#endif
+}
+
 static ssize_t write_full(struct file *file, const char __user *buf,
 			  size_t count, loff_t *ppos)
 {
@@ -768,6 +787,7 @@ static const struct file_operations zero_fops = {
 	.read_iter	= read_iter_zero,
 	.write_iter	= write_iter_zero,
 	.mmap		= mmap_zero,
+	.get_unmapped_area = get_unmapped_area_zero,
 #ifndef CONFIG_MMU
 	.mmap_capabilities = zero_mmap_capabilities,
 #endif
diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 10f846cc8db1..25d5906640c3 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c
@@ -99,7 +99,7 @@ static int clk_programmable_set_parent(struct clk_hw *hw, u8 index)
 	struct clk_programmable *prog = to_clk_programmable(hw);
 	const struct clk_programmable_layout *layout = prog->layout;
 	unsigned int mask = layout->css_mask;
-	unsigned int pckr = 0;
+	unsigned int pckr = index;
 
 	if (layout->have_slck_mck)
 		mask |= AT91_PMC_CSSMCK_MCK;
diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c
index efba7d4dbcfc..79bcb2e42060 100644
--- a/drivers/clk/clk-oxnas.c
+++ b/drivers/clk/clk-oxnas.c
@@ -144,9 +144,9 @@ static int oxnas_stdclk_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	regmap = syscon_node_to_regmap(of_get_parent(np));
-	if (!regmap) {
+	if (IS_ERR(regmap)) {
 		dev_err(&pdev->dev, "failed to have parent regmap\n");
-		return -EINVAL;
+		return PTR_ERR(regmap);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(clk_oxnas_init); i++) {
diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c
index 4bb130cd0062..05b3d73bfefa 100644
--- a/drivers/clk/rockchip/clk-cpu.c
+++ b/drivers/clk/rockchip/clk-cpu.c
@@ -321,9 +321,9 @@ struct clk *rockchip_clk_register_cpuclk(const char *name,
 	}
 
 	cclk = clk_register(NULL, &cpuclk->hw);
-	if (IS_ERR(clk)) {
+	if (IS_ERR(cclk)) {
 		pr_err("%s: could not register cpuclk %s\n", __func__,	name);
-		ret = PTR_ERR(clk);
+		ret = PTR_ERR(cclk);
 		goto free_rate_table;
 	}
 
diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index bc856f21f6b2..077fcdc7908b 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c
@@ -41,8 +41,6 @@ static unsigned long rockchip_mmc_recalc(struct clk_hw *hw,
 #define ROCKCHIP_MMC_DEGREE_MASK 0x3
 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2
 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET)
-#define ROCKCHIP_MMC_INIT_STATE_RESET 0x1
-#define ROCKCHIP_MMC_INIT_STATE_SHIFT 1
 
 #define PSECS_PER_SEC 1000000000000LL
 
@@ -154,6 +152,7 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
+	init.flags = 0;
 	init.num_parents = num_parents;
 	init.parent_names = parent_names;
 	init.ops = &rockchip_mmc_clk_ops;
@@ -162,15 +161,6 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 	mmc_clock->reg = reg;
 	mmc_clock->shift = shift;
 
-	/*
-	 * Assert init_state to soft reset the CLKGEN
-	 * for mmc tuning phase and degree
-	 */
-	if (mmc_clock->shift == ROCKCHIP_MMC_INIT_STATE_SHIFT)
-		writel(HIWORD_UPDATE(ROCKCHIP_MMC_INIT_STATE_RESET,
-				     ROCKCHIP_MMC_INIT_STATE_RESET,
-				     mmc_clock->shift), mmc_clock->reg);
-
 	clk = clk_register(NULL, &mmc_clock->hw);
 	if (IS_ERR(clk))
 		kfree(mmc_clock);
diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 291543f52caa..8059a8d3ea36 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -832,9 +832,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
 			RK3399_CLKGATE_CON(13), 1, GFLAGS),
 
 	/* perihp */
-	GATE(0, "cpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
+	GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 0, GFLAGS),
-	GATE(0, "gpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
+	GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 1, GFLAGS),
 	COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED,
 			RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS,
@@ -1466,6 +1466,8 @@ static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = {
 
 static const char *const rk3399_cru_critical_clocks[] __initconst = {
 	"aclk_cci_pre",
+	"aclk_gic",
+	"aclk_gic_noc",
 	"pclk_perilp0",
 	"pclk_perilp0",
 	"hclk_perilp0",
@@ -1508,6 +1510,7 @@ static void __init rk3399_clk_init(struct device_node *np)
 	ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip clk init failed\n", __func__);
+		iounmap(reg_base);
 		return;
 	}
 
@@ -1553,6 +1556,7 @@ static void __init rk3399_pmu_clk_init(struct device_node *np)
 	ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip pmu clk init failed\n", __func__);
+		iounmap(reg_base);
 		return;
 	}
 
diff --git a/drivers/clk/sunxi/clk-sun4i-display.c b/drivers/clk/sunxi/clk-sun4i-display.c
index 445a7498d6df..9780fac6d029 100644
--- a/drivers/clk/sunxi/clk-sun4i-display.c
+++ b/drivers/clk/sunxi/clk-sun4i-display.c
@@ -33,6 +33,8 @@ struct sun4i_a10_display_clk_data {
 
 	u8	width_div;
 	u8	width_mux;
+
+	u32	flags;
 };
 
 struct reset_data {
@@ -166,7 +168,7 @@ static void __init sun4i_a10_display_init(struct device_node *node,
 				     data->has_div ? &div->hw : NULL,
 				     data->has_div ? &clk_divider_ops : NULL,
 				     &gate->hw, &clk_gate_ops,
-				     0);
+				     data->flags);
 	if (IS_ERR(clk)) {
 		pr_err("%s: Couldn't register the clock\n", clk_name);
 		goto free_div;
@@ -232,6 +234,7 @@ static const struct sun4i_a10_display_clk_data sun4i_a10_tcon_ch0_data __initcon
 	.offset_rst	= 29,
 	.offset_mux	= 24,
 	.width_mux	= 2,
+	.flags		= CLK_SET_RATE_PARENT,
 };
 
 static void __init sun4i_a10_tcon_ch0_setup(struct device_node *node)
diff --git a/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c b/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
index 98a4582de56a..b6d29d1bedca 100644
--- a/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
+++ b/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
@@ -79,15 +79,11 @@ static int tcon_ch1_is_enabled(struct clk_hw *hw)
 static u8 tcon_ch1_get_parent(struct clk_hw *hw)
 {
 	struct tcon_ch1_clk *tclk = hw_to_tclk(hw);
-	int num_parents = clk_hw_get_num_parents(hw);
 	u32 reg;
 
 	reg = readl(tclk->reg) >> TCON_CH1_SCLK2_MUX_SHIFT;
 	reg &= reg >> TCON_CH1_SCLK2_MUX_MASK;
 
-	if (reg >= num_parents)
-		return -EINVAL;
-
 	return reg;
 }
 
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 47352d25c15e..567788664723 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -27,6 +27,20 @@ config CLKBLD_I8253
 config CLKSRC_MMIO
 	bool
 
+config BCM2835_TIMER
+	bool "BCM2835 timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables the support for the BCM2835 timer driver.
+
+config BCM_KONA_TIMER
+	bool "BCM mobile timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables the support for the BCM Kona mobile timer driver.
+
 config DIGICOLOR_TIMER
 	bool "Digicolor timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
@@ -141,6 +155,72 @@ config CLKSRC_DBX500_PRCMU
 	help
 	  Use the always on PRCMU Timer as clocksource
 
+config CLPS711X_TIMER
+	bool "Cirrus logic timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Cirrus Logic PS711 timer.
+
+config ATLAS7_TIMER
+	bool "Atlas7 timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Atlas7 timer.
+
+config MOXART_TIMER
+	bool "Moxart timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Moxart timer.
+
+config MXS_TIMER
+	bool "Mxs timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	select STMP_DEVICE
+	help
+	  Enables support for the Mxs timer.
+
+config PRIMA2_TIMER
+	bool "Prima2 timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Prima2 timer.
+
+config U300_TIMER
+	bool "U300 timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	depends on ARM
+	select CLKSRC_MMIO
+	help
+	  Enables support for the U300 timer.
+
+config NSPIRE_TIMER
+	bool "NSpire timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Nspire timer.
+
+config KEYSTONE_TIMER
+	bool "Keystone timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	depends on ARM || ARM64
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Keystone timer.
+
+config INTEGRATOR_AP_TIMER
+	bool "Integrator-ap timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Integrator-ap timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
@@ -208,14 +288,16 @@ config ARM_ARCH_TIMER
 	select CLKSRC_ACPI if ACPI
 
 config ARM_ARCH_TIMER_EVTSTREAM
-	bool "Support for ARM architected timer event stream generation"
+	bool "Enable ARM architected timer event stream generation by default"
 	default y if ARM_ARCH_TIMER
 	depends on ARM_ARCH_TIMER
 	help
-	  This option enables support for event stream generation based on
-	  the ARM architected timer. It is used for waking up CPUs executing
-	  the wfe instruction at a frequency represented as a power-of-2
-	  divisor of the clock rate.
+	  This option enables support by default for event stream generation
+	  based on the ARM architected timer. It is used for waking up CPUs
+	  executing the wfe instruction at a frequency represented as a
+	  power-of-2 divisor of the clock rate. The behaviour can also be
+	  overridden on the command line using the
+	  clocksource.arm_arch_timer.evtstream parameter.
 	  The main use of the event stream is wfe-based timeouts of userspace
 	  locking implementations. It might also be useful for imposing timeout
 	  on wfe to safeguard against any programming errors in case an expected
@@ -224,8 +306,9 @@ config ARM_ARCH_TIMER_EVTSTREAM
 	  hardware anomalies of missing events.
 
 config ARM_GLOBAL_TIMER
-	bool
+	bool "Support for the ARM global timer" if COMPILE_TEST
 	select CLKSRC_OF if OF
+	depends on ARM
 	help
 	  This options enables support for the ARM global timer unit
 
@@ -243,7 +326,7 @@ config CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
 	 Use ARM global timer clock source as sched_clock
 
 config ARMV7M_SYSTICK
-	bool
+	bool "Support for the ARMv7M system time" if COMPILE_TEST
 	select CLKSRC_OF if OF
 	select CLKSRC_MMIO
 	help
@@ -254,9 +337,12 @@ config ATMEL_PIT
 	def_bool SOC_AT91SAM9 || SOC_SAMA5
 
 config ATMEL_ST
-	bool
+	bool "Atmel ST timer support" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
 	select CLKSRC_OF
 	select MFD_SYSCON
+	help
+	  Support for the Atmel ST timer.
 
 config CLKSRC_METAG_GENERIC
 	def_bool y if METAG
@@ -270,7 +356,7 @@ config CLKSRC_EXYNOS_MCT
 	  Support for Multi Core Timer controller on Exynos SoCs.
 
 config CLKSRC_SAMSUNG_PWM
-	bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
+	bool "PWM timer driver for Samsung S3C, S5P" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
 	depends on HAS_IOMEM
 	help
@@ -293,6 +379,14 @@ config VF_PIT_TIMER
 	help
 	  Support for Period Interrupt Timer on Freescale Vybrid Family SoCs.
 
+config OXNAS_RPS_TIMER
+	bool "Oxford Semiconductor OXNAS RPS Timers driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_OF
+	select CLKSRC_MMIO
+	help
+	  This enables support for the Oxford Semiconductor OXNAS RPS timers.
+
 config SYS_SUPPORTS_SH_CMT
         bool
 
@@ -361,8 +455,8 @@ config CLKSRC_QCOM
 	  Qualcomm SoCs.
 
 config CLKSRC_VERSATILE
-	bool "ARM Versatile (Express) reference platforms clock source"
-	depends on PLAT_VERSATILE && GENERIC_SCHED_CLOCK && !ARCH_USES_GETTIMEOFFSET
+	bool "ARM Versatile (Express) reference platforms clock source" if COMPILE_TEST
+	depends on GENERIC_SCHED_CLOCK && !ARCH_USES_GETTIMEOFFSET
 	select CLKSRC_OF
 	default y if MFD_VEXPRESS_SYSREG
 	help
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 473974f9590a..fd9d6df0bbc0 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -19,21 +19,21 @@ obj-$(CONFIG_CLKSRC_NOMADIK_MTU)	+= nomadik-mtu.o
 obj-$(CONFIG_CLKSRC_DBX500_PRCMU)	+= clksrc-dbx500-prcmu.o
 obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= time-armada-370-xp.o
 obj-$(CONFIG_ORION_TIMER)	+= time-orion.o
-obj-$(CONFIG_ARCH_BCM2835)	+= bcm2835_timer.o
-obj-$(CONFIG_ARCH_CLPS711X)	+= clps711x-timer.o
-obj-$(CONFIG_ARCH_ATLAS7)	+= timer-atlas7.o
-obj-$(CONFIG_ARCH_MOXART)	+= moxart_timer.o
-obj-$(CONFIG_ARCH_MXS)		+= mxs_timer.o
+obj-$(CONFIG_BCM2835_TIMER)	+= bcm2835_timer.o
+obj-$(CONFIG_CLPS711X_TIMER)	+= clps711x-timer.o
+obj-$(CONFIG_ATLAS7_TIMER)	+= timer-atlas7.o
+obj-$(CONFIG_MOXART_TIMER)	+= moxart_timer.o
+obj-$(CONFIG_MXS_TIMER)		+= mxs_timer.o
 obj-$(CONFIG_CLKSRC_PXA)	+= pxa_timer.o
-obj-$(CONFIG_ARCH_PRIMA2)	+= timer-prima2.o
-obj-$(CONFIG_ARCH_U300)		+= timer-u300.o
+obj-$(CONFIG_PRIMA2_TIMER)	+= timer-prima2.o
+obj-$(CONFIG_U300_TIMER)	+= timer-u300.o
 obj-$(CONFIG_SUN4I_TIMER)	+= sun4i_timer.o
 obj-$(CONFIG_SUN5I_HSTIMER)	+= timer-sun5i.o
 obj-$(CONFIG_MESON6_TIMER)	+= meson6_timer.o
 obj-$(CONFIG_TEGRA_TIMER)	+= tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
-obj-$(CONFIG_ARCH_NSPIRE)	+= zevio-timer.o
-obj-$(CONFIG_ARCH_BCM_MOBILE)	+= bcm_kona_timer.o
+obj-$(CONFIG_NSPIRE_TIMER)	+= zevio-timer.o
+obj-$(CONFIG_BCM_KONA_TIMER)	+= bcm_kona_timer.o
 obj-$(CONFIG_CADENCE_TTC_TIMER)	+= cadence_ttc_timer.o
 obj-$(CONFIG_CLKSRC_EFM32)	+= time-efm32.o
 obj-$(CONFIG_CLKSRC_STM32)	+= timer-stm32.o
@@ -48,6 +48,7 @@ obj-$(CONFIG_MTK_TIMER)		+= mtk_timer.o
 obj-$(CONFIG_CLKSRC_PISTACHIO)	+= time-pistachio.o
 obj-$(CONFIG_CLKSRC_TI_32K)	+= timer-ti-32k.o
 obj-$(CONFIG_CLKSRC_NPS)	+= timer-nps.o
+obj-$(CONFIG_OXNAS_RPS_TIMER)	+= timer-oxnas-rps.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)		+= arm_arch_timer.o
 obj-$(CONFIG_ARM_GLOBAL_TIMER)		+= arm_global_timer.o
@@ -55,8 +56,8 @@ obj-$(CONFIG_ARMV7M_SYSTICK)		+= armv7m_systick.o
 obj-$(CONFIG_ARM_TIMER_SP804)		+= timer-sp804.o
 obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
 obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST)	+= dummy_timer.o
-obj-$(CONFIG_ARCH_KEYSTONE)		+= timer-keystone.o
-obj-$(CONFIG_ARCH_INTEGRATOR_AP)	+= timer-integrator-ap.o
+obj-$(CONFIG_KEYSTONE_TIMER)		+= timer-keystone.o
+obj-$(CONFIG_INTEGRATOR_AP_TIMER)	+= timer-integrator-ap.o
 obj-$(CONFIG_CLKSRC_VERSATILE)		+= versatile.o
 obj-$(CONFIG_CLKSRC_MIPS_GIC)		+= mips-gic-timer.o
 obj-$(CONFIG_CLKSRC_TANGO_XTAL)		+= tango_xtal.o
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 4814446a0024..5effd3027319 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -79,6 +79,14 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
 static bool arch_timer_c3stop;
 static bool arch_timer_mem_use_virtual;
 
+static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
+
+static int __init early_evtstrm_cfg(char *buf)
+{
+	return strtobool(buf, &evtstrm_enable);
+}
+early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg);
+
 /*
  * Architected system timer support.
  */
@@ -372,7 +380,7 @@ static int arch_timer_setup(struct clock_event_device *clk)
 		enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
 
 	arch_counter_set_user_access();
-	if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM))
+	if (evtstrm_enable)
 		arch_timer_configure_evtstream();
 
 	return 0;
@@ -693,25 +701,26 @@ arch_timer_needs_probing(int type, const struct of_device_id *matches)
 	return needs_probing;
 }
 
-static void __init arch_timer_common_init(void)
+static int __init arch_timer_common_init(void)
 {
 	unsigned mask = ARCH_CP15_TIMER | ARCH_MEM_TIMER;
 
 	/* Wait until both nodes are probed if we have two timers */
 	if ((arch_timers_present & mask) != mask) {
 		if (arch_timer_needs_probing(ARCH_MEM_TIMER, arch_timer_mem_of_match))
-			return;
+			return 0;
 		if (arch_timer_needs_probing(ARCH_CP15_TIMER, arch_timer_of_match))
-			return;
+			return 0;
 	}
 
 	arch_timer_banner(arch_timers_present);
 	arch_counter_register(arch_timers_present);
-	arch_timer_arch_init();
+	return arch_timer_arch_init();
 }
 
-static void __init arch_timer_init(void)
+static int __init arch_timer_init(void)
 {
+	int ret;
 	/*
 	 * If HYP mode is available, we know that the physical timer
 	 * has been configured to be accessible from PL1. Use it, so
@@ -739,23 +748,30 @@ static void __init arch_timer_init(void)
 
 		if (!has_ppi) {
 			pr_warn("arch_timer: No interrupt available, giving up\n");
-			return;
+			return -EINVAL;
 		}
 	}
 
-	arch_timer_register();
-	arch_timer_common_init();
+	ret = arch_timer_register();
+	if (ret)
+		return ret;
+
+	ret = arch_timer_common_init();
+	if (ret)
+		return ret;
 
 	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI];
+	
+	return 0;
 }
 
-static void __init arch_timer_of_init(struct device_node *np)
+static int __init arch_timer_of_init(struct device_node *np)
 {
 	int i;
 
 	if (arch_timers_present & ARCH_CP15_TIMER) {
 		pr_warn("arch_timer: multiple nodes in dt, skipping\n");
-		return;
+		return 0;
 	}
 
 	arch_timers_present |= ARCH_CP15_TIMER;
@@ -774,23 +790,23 @@ static void __init arch_timer_of_init(struct device_node *np)
 	    of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
 		arch_timer_uses_ppi = PHYS_SECURE_PPI;
 
-	arch_timer_init();
+	return arch_timer_init();
 }
 CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
 CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
 
-static void __init arch_timer_mem_init(struct device_node *np)
+static int __init arch_timer_mem_init(struct device_node *np)
 {
 	struct device_node *frame, *best_frame = NULL;
 	void __iomem *cntctlbase, *base;
-	unsigned int irq;
+	unsigned int irq, ret = -EINVAL;
 	u32 cnttidr;
 
 	arch_timers_present |= ARCH_MEM_TIMER;
 	cntctlbase = of_iomap(np, 0);
 	if (!cntctlbase) {
 		pr_err("arch_timer: Can't find CNTCTLBase\n");
-		return;
+		return -ENXIO;
 	}
 
 	cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
@@ -830,6 +846,7 @@ static void __init arch_timer_mem_init(struct device_node *np)
 		best_frame = of_node_get(frame);
 	}
 
+	ret= -ENXIO;
 	base = arch_counter_base = of_iomap(best_frame, 0);
 	if (!base) {
 		pr_err("arch_timer: Can't map frame's registers\n");
@@ -841,6 +858,7 @@ static void __init arch_timer_mem_init(struct device_node *np)
 	else
 		irq = irq_of_parse_and_map(best_frame, 0);
 
+	ret = -EINVAL;
 	if (!irq) {
 		pr_err("arch_timer: Frame missing %s irq",
 		       arch_timer_mem_use_virtual ? "virt" : "phys");
@@ -848,11 +866,15 @@ static void __init arch_timer_mem_init(struct device_node *np)
 	}
 
 	arch_timer_detect_rate(base, np);
-	arch_timer_mem_register(base, irq);
-	arch_timer_common_init();
+	ret = arch_timer_mem_register(base, irq);
+	if (ret)
+		goto out;
+
+	return arch_timer_common_init();
 out:
 	iounmap(cntctlbase);
 	of_node_put(best_frame);
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
 		       arch_timer_mem_init);
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 9df0d1699d22..2a9ceb6e93f9 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -238,7 +238,7 @@ static void __init gt_delay_timer_init(void)
 	register_current_timer_delay(&gt_delay_timer);
 }
 
-static void __init gt_clocksource_init(void)
+static int __init gt_clocksource_init(void)
 {
 	writel(0, gt_base + GT_CONTROL);
 	writel(0, gt_base + GT_COUNTER0);
@@ -249,7 +249,7 @@ static void __init gt_clocksource_init(void)
 #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
 	sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate);
 #endif
-	clocksource_register_hz(&gt_clocksource, gt_clk_rate);
+	return clocksource_register_hz(&gt_clocksource, gt_clk_rate);
 }
 
 static int gt_cpu_notify(struct notifier_block *self, unsigned long action,
@@ -270,7 +270,7 @@ static struct notifier_block gt_cpu_nb = {
 	.notifier_call = gt_cpu_notify,
 };
 
-static void __init global_timer_of_register(struct device_node *np)
+static int __init global_timer_of_register(struct device_node *np)
 {
 	struct clk *gt_clk;
 	int err = 0;
@@ -283,19 +283,19 @@ static void __init global_timer_of_register(struct device_node *np)
 	if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9
 	    && (read_cpuid_id() & 0xf0000f) < 0x200000) {
 		pr_warn("global-timer: non support for this cpu version.\n");
-		return;
+		return -ENOSYS;
 	}
 
 	gt_ppi = irq_of_parse_and_map(np, 0);
 	if (!gt_ppi) {
 		pr_warn("global-timer: unable to parse irq\n");
-		return;
+		return -EINVAL;
 	}
 
 	gt_base = of_iomap(np, 0);
 	if (!gt_base) {
 		pr_warn("global-timer: invalid base address\n");
-		return;
+		return -ENXIO;
 	}
 
 	gt_clk = of_clk_get(np, 0);
@@ -332,11 +332,17 @@ static void __init global_timer_of_register(struct device_node *np)
 	}
 
 	/* Immediately configure the timer on the boot CPU */
-	gt_clocksource_init();
-	gt_clockevents_init(this_cpu_ptr(gt_evt));
+	err = gt_clocksource_init();
+	if (err)
+		goto out_irq;
+	
+	err = gt_clockevents_init(this_cpu_ptr(gt_evt));
+	if (err)
+		goto out_irq;
+
 	gt_delay_timer_init();
 
-	return;
+	return 0;
 
 out_irq:
 	free_percpu_irq(gt_ppi, gt_evt);
@@ -347,6 +353,8 @@ out_clk:
 out_unmap:
 	iounmap(gt_base);
 	WARN(err, "ARM Global timer register failed (%d)\n", err);
+
+	return err;
 }
 
 /* Only tested on r2p2 and r3p0  */
diff --git a/drivers/clocksource/armv7m_systick.c b/drivers/clocksource/armv7m_systick.c
index addfd2c64f54..a315491b7047 100644
--- a/drivers/clocksource/armv7m_systick.c
+++ b/drivers/clocksource/armv7m_systick.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/clk.h>
@@ -21,7 +22,7 @@
 
 #define SYSTICK_LOAD_RELOAD_MASK 0x00FFFFFF
 
-static void __init system_timer_of_register(struct device_node *np)
+static int __init system_timer_of_register(struct device_node *np)
 {
 	struct clk *clk = NULL;
 	void __iomem *base;
@@ -31,22 +32,26 @@ static void __init system_timer_of_register(struct device_node *np)
 	base = of_iomap(np, 0);
 	if (!base) {
 		pr_warn("system-timer: invalid base address\n");
-		return;
+		return -ENXIO;
 	}
 
 	ret = of_property_read_u32(np, "clock-frequency", &rate);
 	if (ret) {
 		clk = of_clk_get(np, 0);
-		if (IS_ERR(clk))
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
 			goto out_unmap;
+		}
 
 		ret = clk_prepare_enable(clk);
 		if (ret)
 			goto out_clk_put;
 
 		rate = clk_get_rate(clk);
-		if (!rate)
+		if (!rate) {
+			ret = -EINVAL;
 			goto out_clk_disable;
+		}
 	}
 
 	writel_relaxed(SYSTICK_LOAD_RELOAD_MASK, base + SYST_RVR);
@@ -64,7 +69,7 @@ static void __init system_timer_of_register(struct device_node *np)
 
 	pr_info("ARM System timer initialized as clocksource\n");
 
-	return;
+	return 0;
 
 out_clk_disable:
 	clk_disable_unprepare(clk);
@@ -73,6 +78,8 @@ out_clk_put:
 out_unmap:
 	iounmap(base);
 	pr_warn("ARM System timer register failed (%d)\n", ret);
+
+	return ret;
 }
 
 CLOCKSOURCE_OF_DECLARE(arm_systick, "arm,armv7m-systick",
diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c
index 217438d39eb3..1ba871b7fe11 100644
--- a/drivers/clocksource/asm9260_timer.c
+++ b/drivers/clocksource/asm9260_timer.c
@@ -184,7 +184,7 @@ static irqreturn_t asm9260_timer_interrupt(int irq, void *dev_id)
  * Timer initialization
  * ---------------------------------------------------------------------------
  */
-static void __init asm9260_timer_init(struct device_node *np)
+static int __init asm9260_timer_init(struct device_node *np)
 {
 	int irq;
 	struct clk *clk;
@@ -192,20 +192,26 @@ static void __init asm9260_timer_init(struct device_node *np)
 	unsigned long rate;
 
 	priv.base = of_io_request_and_map(np, 0, np->name);
-	if (IS_ERR(priv.base))
-		panic("%s: unable to map resource", np->name);
+	if (IS_ERR(priv.base)) {
+		pr_err("%s: unable to map resource", np->name);
+		return PTR_ERR(priv.base);
+	}
 
 	clk = of_clk_get(np, 0);
 
 	ret = clk_prepare_enable(clk);
-	if (ret)
-		panic("Failed to enable clk!\n");
+	if (ret) {
+		pr_err("Failed to enable clk!\n");
+		return ret;
+	}
 
 	irq = irq_of_parse_and_map(np, 0);
 	ret = request_irq(irq, asm9260_timer_interrupt, IRQF_TIMER,
 			DRIVER_NAME, &event_dev);
-	if (ret)
-		panic("Failed to setup irq!\n");
+	if (ret) {
+		pr_err("Failed to setup irq!\n");
+		return ret;
+	}
 
 	/* set all timers for count-up */
 	writel_relaxed(BM_DIR_DEFAULT, priv.base + HW_DIR);
@@ -229,6 +235,8 @@ static void __init asm9260_timer_init(struct device_node *np)
 	priv.ticks_per_jiffy = DIV_ROUND_CLOSEST(rate, HZ);
 	event_dev.cpumask = cpumask_of(0);
 	clockevents_config_and_register(&event_dev, rate, 0x2c00, 0xfffffffe);
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(asm9260_timer, "alphascale,asm9260-timer",
 		asm9260_timer_init);
diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c
index 6f2822928963..e71acf231c89 100644
--- a/drivers/clocksource/bcm2835_timer.c
+++ b/drivers/clocksource/bcm2835_timer.c
@@ -80,19 +80,24 @@ static irqreturn_t bcm2835_time_interrupt(int irq, void *dev_id)
 	}
 }
 
-static void __init bcm2835_timer_init(struct device_node *node)
+static int __init bcm2835_timer_init(struct device_node *node)
 {
 	void __iomem *base;
 	u32 freq;
-	int irq;
+	int irq, ret;
 	struct bcm2835_timer *timer;
 
 	base = of_iomap(node, 0);
-	if (!base)
-		panic("Can't remap registers");
+	if (!base) {
+		pr_err("Can't remap registers");
+		return -ENXIO;
+	}
 
-	if (of_property_read_u32(node, "clock-frequency", &freq))
-		panic("Can't read clock-frequency");
+	ret = of_property_read_u32(node, "clock-frequency", &freq);
+	if (ret) {
+		pr_err("Can't read clock-frequency");
+		return ret;
+	}
 
 	system_clock = base + REG_COUNTER_LO;
 	sched_clock_register(bcm2835_sched_read, 32, freq);
@@ -101,12 +106,16 @@ static void __init bcm2835_timer_init(struct device_node *node)
 		freq, 300, 32, clocksource_mmio_readl_up);
 
 	irq = irq_of_parse_and_map(node, DEFAULT_TIMER);
-	if (irq <= 0)
-		panic("Can't parse IRQ");
+	if (irq <= 0) {
+		pr_err("Can't parse IRQ");
+		return -EINVAL;
+	}
 
 	timer = kzalloc(sizeof(*timer), GFP_KERNEL);
-	if (!timer)
-		panic("Can't allocate timer struct\n");
+	if (!timer) {
+		pr_err("Can't allocate timer struct\n");
+		return -ENOMEM;
+	}
 
 	timer->control = base + REG_CONTROL;
 	timer->compare = base + REG_COMPARE(DEFAULT_TIMER);
@@ -121,12 +130,17 @@ static void __init bcm2835_timer_init(struct device_node *node)
 	timer->act.dev_id = timer;
 	timer->act.handler = bcm2835_time_interrupt;
 
-	if (setup_irq(irq, &timer->act))
-		panic("Can't set up timer IRQ\n");
+	ret = setup_irq(irq, &timer->act);
+	if (ret) {
+		pr_err("Can't set up timer IRQ\n");
+		return ret;
+	}
 
 	clockevents_config_and_register(&timer->evt, freq, 0xf, 0xffffffff);
 
 	pr_info("bcm2835: system timer (irq = %d)\n", irq);
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(bcm2835, "brcm,bcm2835-system-timer",
 			bcm2835_timer_init);
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
index e717e87df9bc..7e3fd375a627 100644
--- a/drivers/clocksource/bcm_kona_timer.c
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -20,7 +20,6 @@
 #include <linux/clk.h>
 
 #include <linux/io.h>
-#include <asm/mach/time.h>
 
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -163,16 +162,11 @@ static struct irqaction kona_timer_irq = {
 	.handler = kona_timer_interrupt,
 };
 
-static void __init kona_timer_init(struct device_node *node)
+static int __init kona_timer_init(struct device_node *node)
 {
 	u32 freq;
 	struct clk *external_clk;
 
-	if (!of_device_is_available(node)) {
-		pr_info("Kona Timer v1 marked as disabled in device tree\n");
-		return;
-	}
-
 	external_clk = of_clk_get_by_name(node, NULL);
 
 	if (!IS_ERR(external_clk)) {
@@ -182,7 +176,7 @@ static void __init kona_timer_init(struct device_node *node)
 		arch_timer_rate = freq;
 	} else {
 		pr_err("Kona Timer v1 unable to determine clock-frequency");
-		return;
+		return -EINVAL;
 	}
 
 	/* Setup IRQ numbers */
@@ -196,6 +190,8 @@ static void __init kona_timer_init(struct device_node *node)
 	kona_timer_clockevents_init();
 	setup_irq(timers.tmr_irq, &kona_timer_irq);
 	kona_timer_set_next_event((arch_timer_rate / HZ), NULL);
+
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(brcm_kona, "brcm,kona-timer", kona_timer_init);
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index 9be6018bd2b8..fbfbdec13b08 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -322,22 +322,22 @@ static int ttc_rate_change_clocksource_cb(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
-static void __init ttc_setup_clocksource(struct clk *clk, void __iomem *base,
+static int __init ttc_setup_clocksource(struct clk *clk, void __iomem *base,
 					 u32 timer_width)
 {
 	struct ttc_timer_clocksource *ttccs;
 	int err;
 
 	ttccs = kzalloc(sizeof(*ttccs), GFP_KERNEL);
-	if (WARN_ON(!ttccs))
-		return;
+	if (!ttccs)
+		return -ENOMEM;
 
 	ttccs->ttc.clk = clk;
 
 	err = clk_prepare_enable(ttccs->ttc.clk);
-	if (WARN_ON(err)) {
+	if (err) {
 		kfree(ttccs);
-		return;
+		return err;
 	}
 
 	ttccs->ttc.freq = clk_get_rate(ttccs->ttc.clk);
@@ -345,8 +345,10 @@ static void __init ttc_setup_clocksource(struct clk *clk, void __iomem *base,
 	ttccs->ttc.clk_rate_change_nb.notifier_call =
 		ttc_rate_change_clocksource_cb;
 	ttccs->ttc.clk_rate_change_nb.next = NULL;
-	if (clk_notifier_register(ttccs->ttc.clk,
-				&ttccs->ttc.clk_rate_change_nb))
+
+	err = clk_notifier_register(ttccs->ttc.clk,
+				    &ttccs->ttc.clk_rate_change_nb);
+	if (err)
 		pr_warn("Unable to register clock notifier.\n");
 
 	ttccs->ttc.base_addr = base;
@@ -368,14 +370,16 @@ static void __init ttc_setup_clocksource(struct clk *clk, void __iomem *base,
 		     ttccs->ttc.base_addr + TTC_CNT_CNTRL_OFFSET);
 
 	err = clocksource_register_hz(&ttccs->cs, ttccs->ttc.freq / PRESCALE);
-	if (WARN_ON(err)) {
+	if (err) {
 		kfree(ttccs);
-		return;
+		return err;
 	}
 
 	ttc_sched_clock_val_reg = base + TTC_COUNT_VAL_OFFSET;
 	sched_clock_register(ttc_sched_clock_read, timer_width,
 			     ttccs->ttc.freq / PRESCALE);
+
+	return 0;
 }
 
 static int ttc_rate_change_clockevent_cb(struct notifier_block *nb,
@@ -401,30 +405,35 @@ static int ttc_rate_change_clockevent_cb(struct notifier_block *nb,
 	}
 }
 
-static void __init ttc_setup_clockevent(struct clk *clk,
-						void __iomem *base, u32 irq)
+static int __init ttc_setup_clockevent(struct clk *clk,
+				       void __iomem *base, u32 irq)
 {
 	struct ttc_timer_clockevent *ttcce;
 	int err;
 
 	ttcce = kzalloc(sizeof(*ttcce), GFP_KERNEL);
-	if (WARN_ON(!ttcce))
-		return;
+	if (!ttcce)
+		return -ENOMEM;
 
 	ttcce->ttc.clk = clk;
 
 	err = clk_prepare_enable(ttcce->ttc.clk);
-	if (WARN_ON(err)) {
+	if (err) {
 		kfree(ttcce);
-		return;
+		return err;
 	}
 
 	ttcce->ttc.clk_rate_change_nb.notifier_call =
 		ttc_rate_change_clockevent_cb;
 	ttcce->ttc.clk_rate_change_nb.next = NULL;
-	if (clk_notifier_register(ttcce->ttc.clk,
-				&ttcce->ttc.clk_rate_change_nb))
+
+	err = clk_notifier_register(ttcce->ttc.clk,
+				    &ttcce->ttc.clk_rate_change_nb);
+	if (err) {
 		pr_warn("Unable to register clock notifier.\n");
+		return err;
+	}
+
 	ttcce->ttc.freq = clk_get_rate(ttcce->ttc.clk);
 
 	ttcce->ttc.base_addr = base;
@@ -451,13 +460,15 @@ static void __init ttc_setup_clockevent(struct clk *clk,
 
 	err = request_irq(irq, ttc_clock_event_interrupt,
 			  IRQF_TIMER, ttcce->ce.name, ttcce);
-	if (WARN_ON(err)) {
+	if (err) {
 		kfree(ttcce);
-		return;
+		return err;
 	}
 
 	clockevents_config_and_register(&ttcce->ce,
 			ttcce->ttc.freq / PRESCALE, 1, 0xfffe);
+
+	return 0;
 }
 
 /**
@@ -466,17 +477,17 @@ static void __init ttc_setup_clockevent(struct clk *clk,
  * Initializes the timer hardware and register the clock source and clock event
  * timers with Linux kernal timer framework
  */
-static void __init ttc_timer_init(struct device_node *timer)
+static int __init ttc_timer_init(struct device_node *timer)
 {
 	unsigned int irq;
 	void __iomem *timer_baseaddr;
 	struct clk *clk_cs, *clk_ce;
 	static int initialized;
-	int clksel;
+	int clksel, ret;
 	u32 timer_width = 16;
 
 	if (initialized)
-		return;
+		return 0;
 
 	initialized = 1;
 
@@ -488,13 +499,13 @@ static void __init ttc_timer_init(struct device_node *timer)
 	timer_baseaddr = of_iomap(timer, 0);
 	if (!timer_baseaddr) {
 		pr_err("ERROR: invalid timer base address\n");
-		BUG();
+		return -ENXIO;
 	}
 
 	irq = irq_of_parse_and_map(timer, 1);
 	if (irq <= 0) {
 		pr_err("ERROR: invalid interrupt number\n");
-		BUG();
+		return -EINVAL;
 	}
 
 	of_property_read_u32(timer, "timer-width", &timer_width);
@@ -504,7 +515,7 @@ static void __init ttc_timer_init(struct device_node *timer)
 	clk_cs = of_clk_get(timer, clksel);
 	if (IS_ERR(clk_cs)) {
 		pr_err("ERROR: timer input clock not found\n");
-		BUG();
+		return PTR_ERR(clk_cs);
 	}
 
 	clksel = readl_relaxed(timer_baseaddr + 4 + TTC_CLK_CNTRL_OFFSET);
@@ -512,13 +523,20 @@ static void __init ttc_timer_init(struct device_node *timer)
 	clk_ce = of_clk_get(timer, clksel);
 	if (IS_ERR(clk_ce)) {
 		pr_err("ERROR: timer input clock not found\n");
-		BUG();
+		return PTR_ERR(clk_ce);
 	}
 
-	ttc_setup_clocksource(clk_cs, timer_baseaddr, timer_width);
-	ttc_setup_clockevent(clk_ce, timer_baseaddr + 4, irq);
+	ret = ttc_setup_clocksource(clk_cs, timer_baseaddr, timer_width);
+	if (ret)
+		return ret;
+
+	ret = ttc_setup_clockevent(clk_ce, timer_baseaddr + 4, irq);
+	if (ret)
+		return ret;
 
 	pr_info("%s #0 at %p, irq=%d\n", timer->name, timer_baseaddr, irq);
+
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(ttc, "cdns,ttc", ttc_timer_init);
diff --git a/drivers/clocksource/clksrc-dbx500-prcmu.c b/drivers/clocksource/clksrc-dbx500-prcmu.c
index dfad6eb99662..77a365f573d7 100644
--- a/drivers/clocksource/clksrc-dbx500-prcmu.c
+++ b/drivers/clocksource/clksrc-dbx500-prcmu.c
@@ -64,7 +64,7 @@ static u64 notrace dbx500_prcmu_sched_clock_read(void)
 
 #endif
 
-static void __init clksrc_dbx500_prcmu_init(struct device_node *node)
+static int __init clksrc_dbx500_prcmu_init(struct device_node *node)
 {
 	clksrc_dbx500_timer_base = of_iomap(node, 0);
 
@@ -84,7 +84,7 @@ static void __init clksrc_dbx500_prcmu_init(struct device_node *node)
 #ifdef CONFIG_CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	sched_clock_register(dbx500_prcmu_sched_clock_read, 32, RATE_32K);
 #endif
-	clocksource_register_hz(&clocksource_dbx500_prcmu, RATE_32K);
+	return clocksource_register_hz(&clocksource_dbx500_prcmu, RATE_32K);
 }
 CLOCKSOURCE_OF_DECLARE(dbx500_prcmu, "stericsson,db8500-prcmu-timer-4",
 		       clksrc_dbx500_prcmu_init);
diff --git a/drivers/clocksource/clksrc-probe.c b/drivers/clocksource/clksrc-probe.c
index 7cb6c923a836..bc62be97f0a8 100644
--- a/drivers/clocksource/clksrc-probe.c
+++ b/drivers/clocksource/clksrc-probe.c
@@ -28,15 +28,23 @@ void __init clocksource_probe(void)
 {
 	struct device_node *np;
 	const struct of_device_id *match;
-	of_init_fn_1 init_func;
+	of_init_fn_1_ret init_func_ret;
 	unsigned clocksources = 0;
+	int ret;
 
 	for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
 		if (!of_device_is_available(np))
 			continue;
 
-		init_func = match->data;
-		init_func(np);
+		init_func_ret = match->data;
+
+		ret = init_func_ret(np);
+		if (ret) {
+			pr_err("Failed to initialize '%s': %d",
+			       of_node_full_name(np), ret);
+			continue;
+		}
+
 		clocksources++;
 	}
 
diff --git a/drivers/clocksource/clksrc_st_lpc.c b/drivers/clocksource/clksrc_st_lpc.c
index 65ec4674416d..03cc49217bb4 100644
--- a/drivers/clocksource/clksrc_st_lpc.c
+++ b/drivers/clocksource/clksrc_st_lpc.c
@@ -92,7 +92,7 @@ static int __init st_clksrc_setup_clk(struct device_node *np)
 	return 0;
 }
 
-static void __init st_clksrc_of_register(struct device_node *np)
+static int __init st_clksrc_of_register(struct device_node *np)
 {
 	int ret;
 	uint32_t mode;
@@ -100,32 +100,36 @@ static void __init st_clksrc_of_register(struct device_node *np)
 	ret = of_property_read_u32(np, "st,lpc-mode", &mode);
 	if (ret) {
 		pr_err("clksrc-st-lpc: An LPC mode must be provided\n");
-		return;
+		return ret;
 	}
 
 	/* LPC can either run as a Clocksource or in RTC or WDT mode */
 	if (mode != ST_LPC_MODE_CLKSRC)
-		return;
+		return 0;
 
 	ddata.base = of_iomap(np, 0);
 	if (!ddata.base) {
 		pr_err("clksrc-st-lpc: Unable to map iomem\n");
-		return;
+		return -ENXIO;
 	}
 
-	if (st_clksrc_setup_clk(np)) {
+	ret = st_clksrc_setup_clk(np);
+	if (ret) {
 		iounmap(ddata.base);
-		return;
+		return ret;
 	}
 
-	if (st_clksrc_init()) {
+	ret = st_clksrc_init();
+	if (ret) {
 		clk_disable_unprepare(ddata.clk);
 		clk_put(ddata.clk);
 		iounmap(ddata.base);
-		return;
+		return ret;
 	}
 
 	pr_info("clksrc-st-lpc: clocksource initialised - running @ %luHz\n",
 		clk_get_rate(ddata.clk));
+
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(ddata, "st,stih407-lpc", st_clksrc_of_register);
diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c
index cdd86e3525bb..84aed78261e4 100644
--- a/drivers/clocksource/clps711x-timer.c
+++ b/drivers/clocksource/clps711x-timer.c
@@ -104,7 +104,7 @@ void __init clps711x_clksrc_init(void __iomem *tc1_base, void __iomem *tc2_base,
 }
 
 #ifdef CONFIG_CLKSRC_OF
-static void __init clps711x_timer_init(struct device_node *np)
+static int __init clps711x_timer_init(struct device_node *np)
 {
 	unsigned int irq = irq_of_parse_and_map(np, 0);
 	struct clk *clock = of_clk_get(np, 0);
@@ -112,13 +112,11 @@ static void __init clps711x_timer_init(struct device_node *np)
 
 	switch (of_alias_get_id(np, "timer")) {
 	case CLPS711X_CLKSRC_CLOCKSOURCE:
-		BUG_ON(_clps711x_clksrc_init(clock, base));
-		break;
+		return _clps711x_clksrc_init(clock, base);
 	case CLPS711X_CLKSRC_CLOCKEVENT:
-		BUG_ON(_clps711x_clkevt_init(clock, base, irq));
-		break;
+		return _clps711x_clkevt_init(clock, base, irq);
 	default:
-		break;
+		return -EINVAL;
 	}
 }
 CLOCKSOURCE_OF_DECLARE(clps711x, "cirrus,clps711x-timer", clps711x_timer_init);
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index 860843cef572..aee6c0d39a7c 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -143,7 +143,7 @@ static struct delay_timer dw_apb_delay_timer = {
 #endif
 
 static int num_called;
-static void __init dw_apb_timer_init(struct device_node *timer)
+static int __init dw_apb_timer_init(struct device_node *timer)
 {
 	switch (num_called) {
 	case 0:
@@ -164,6 +164,8 @@ static void __init dw_apb_timer_init(struct device_node *timer)
 	}
 
 	num_called++;
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(pc3x2_timer, "picochip,pc3x2-timer", dw_apb_timer_init);
 CLOCKSOURCE_OF_DECLARE(apb_timer_osc, "snps,dw-apb-timer-osc", dw_apb_timer_init);
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index be09bc0b5e26..0d18dd4b3bd2 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -232,7 +232,7 @@ static cycles_t exynos4_read_current_timer(void)
 	return exynos4_read_count_32();
 }
 
-static void __init exynos4_clocksource_init(void)
+static int __init exynos4_clocksource_init(void)
 {
 	exynos4_mct_frc_start();
 
@@ -244,6 +244,8 @@ static void __init exynos4_clocksource_init(void)
 		panic("%s: can't register clocksource\n", mct_frc.name);
 
 	sched_clock_register(exynos4_read_sched_clock, 32, clk_rate);
+
+	return 0;
 }
 
 static void exynos4_mct_comp0_stop(void)
@@ -335,12 +337,14 @@ static struct irqaction mct_comp_event_irq = {
 	.dev_id		= &mct_comp_device,
 };
 
-static void exynos4_clockevent_init(void)
+static int exynos4_clockevent_init(void)
 {
 	mct_comp_device.cpumask = cpumask_of(0);
 	clockevents_config_and_register(&mct_comp_device, clk_rate,
 					0xf, 0xffffffff);
 	setup_irq(mct_irqs[MCT_G0_IRQ], &mct_comp_event_irq);
+
+	return 0;
 }
 
 static DEFINE_PER_CPU(struct mct_clock_event_device, percpu_mct_tick);
@@ -516,7 +520,7 @@ static struct notifier_block exynos4_mct_cpu_nb = {
 	.notifier_call = exynos4_mct_cpu_notify,
 };
 
-static void __init exynos4_timer_resources(struct device_node *np, void __iomem *base)
+static int __init exynos4_timer_resources(struct device_node *np, void __iomem *base)
 {
 	int err, cpu;
 	struct mct_clock_event_device *mevt = this_cpu_ptr(&percpu_mct_tick);
@@ -572,15 +576,17 @@ static void __init exynos4_timer_resources(struct device_node *np, void __iomem
 
 	/* Immediately configure the timer on the boot CPU */
 	exynos4_local_timer_setup(mevt);
-	return;
+	return 0;
 
 out_irq:
 	free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick);
+	return err;
 }
 
-static void __init mct_init_dt(struct device_node *np, unsigned int int_type)
+static int __init mct_init_dt(struct device_node *np, unsigned int int_type)
 {
 	u32 nr_irqs, i;
+	int ret;
 
 	mct_int_type = int_type;
 
@@ -600,18 +606,24 @@ static void __init mct_init_dt(struct device_node *np, unsigned int int_type)
 	for (i = MCT_L0_IRQ; i < nr_irqs; i++)
 		mct_irqs[i] = irq_of_parse_and_map(np, i);
 
-	exynos4_timer_resources(np, of_iomap(np, 0));
-	exynos4_clocksource_init();
-	exynos4_clockevent_init();
+	ret = exynos4_timer_resources(np, of_iomap(np, 0));
+	if (ret)
+		return ret;
+
+	ret = exynos4_clocksource_init();
+	if (ret)
+		return ret;
+
+	return exynos4_clockevent_init();
 }
 
 
-static void __init mct_init_spi(struct device_node *np)
+static int __init mct_init_spi(struct device_node *np)
 {
 	return mct_init_dt(np, MCT_INT_SPI);
 }
 
-static void __init mct_init_ppi(struct device_node *np)
+static int __init mct_init_ppi(struct device_node *np)
 {
 	return mct_init_dt(np, MCT_INT_PPI);
 }
diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c
index 517e1c7624d4..738515b89073 100644
--- a/drivers/clocksource/fsl_ftm_timer.c
+++ b/drivers/clocksource/fsl_ftm_timer.c
@@ -316,15 +316,16 @@ static int __init ftm_calc_closest_round_cyc(unsigned long freq)
 	return 0;
 }
 
-static void __init ftm_timer_init(struct device_node *np)
+static int __init ftm_timer_init(struct device_node *np)
 {
 	unsigned long freq;
-	int irq;
+	int ret, irq;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
-		return;
+		return -ENOMEM;
 
+	ret = -ENXIO;
 	priv->clkevt_base = of_iomap(np, 0);
 	if (!priv->clkevt_base) {
 		pr_err("ftm: unable to map event timer registers\n");
@@ -337,6 +338,7 @@ static void __init ftm_timer_init(struct device_node *np)
 		goto err;
 	}
 
+	ret = -EINVAL;
 	irq = irq_of_parse_and_map(np, 0);
 	if (irq <= 0) {
 		pr_err("ftm: unable to get IRQ from DT, %d\n", irq);
@@ -349,18 +351,22 @@ static void __init ftm_timer_init(struct device_node *np)
 	if (!freq)
 		goto err;
 
-	if (ftm_calc_closest_round_cyc(freq))
+	ret = ftm_calc_closest_round_cyc(freq);
+	if (ret)
 		goto err;
 
-	if (ftm_clocksource_init(freq))
+	ret = ftm_clocksource_init(freq);
+	if (ret)
 		goto err;
 
-	if (ftm_clockevent_init(freq, irq))
+	ret = ftm_clockevent_init(freq, irq);
+	if (ret)
 		goto err;
 
-	return;
+	return 0;
 
 err:
 	kfree(priv);
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(flextimer, "fsl,ftm-timer", ftm_timer_init);
diff --git a/drivers/clocksource/h8300_timer16.c b/drivers/clocksource/h8300_timer16.c
index 75c44079b345..07d9d5be9054 100644
--- a/drivers/clocksource/h8300_timer16.c
+++ b/drivers/clocksource/h8300_timer16.c
@@ -126,7 +126,7 @@ static struct timer16_priv timer16_priv = {
 #define REG_CH   0
 #define REG_COMM 1
 
-static void __init h8300_16timer_init(struct device_node *node)
+static int __init h8300_16timer_init(struct device_node *node)
 {
 	void __iomem *base[2];
 	int ret, irq;
@@ -136,9 +136,10 @@ static void __init h8300_16timer_init(struct device_node *node)
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk)) {
 		pr_err("failed to get clock for clocksource\n");
-		return;
+		return PTR_ERR(clk);
 	}
 
+	ret = -ENXIO;
 	base[REG_CH] = of_iomap(node, 0);
 	if (!base[REG_CH]) {
 		pr_err("failed to map registers for clocksource\n");
@@ -151,6 +152,7 @@ static void __init h8300_16timer_init(struct device_node *node)
 		goto unmap_ch;
 	}
 
+	ret = -EINVAL;
 	irq = irq_of_parse_and_map(node, 0);
 	if (!irq) {
 		pr_err("failed to get irq for clockevent\n");
@@ -174,7 +176,7 @@ static void __init h8300_16timer_init(struct device_node *node)
 
 	clocksource_register_hz(&timer16_priv.cs,
 				clk_get_rate(clk) / 8);
-	return;
+	return 0;
 
 unmap_comm:
 	iounmap(base[REG_COMM]);
@@ -182,6 +184,8 @@ unmap_ch:
 	iounmap(base[REG_CH]);
 free_clk:
 	clk_put(clk);
+	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE(h8300_16bit, "renesas,16bit-timer", h8300_16timer_init);
+CLOCKSOURCE_OF_DECLARE(h8300_16bit, "renesas,16bit-timer",
+			   h8300_16timer_init);
diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c
index c151941e1956..546bb180f5a4 100644
--- a/drivers/clocksource/h8300_timer8.c
+++ b/drivers/clocksource/h8300_timer8.c
@@ -164,24 +164,26 @@ static struct timer8_priv timer8_priv = {
 	},
 };
 
-static void __init h8300_8timer_init(struct device_node *node)
+static int __init h8300_8timer_init(struct device_node *node)
 {
 	void __iomem *base;
-	int irq;
+	int irq, ret;
 	struct clk *clk;
 
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk)) {
 		pr_err("failed to get clock for clockevent\n");
-		return;
+		return PTR_ERR(clk);
 	}
 
+	ret = ENXIO;
 	base = of_iomap(node, 0);
 	if (!base) {
 		pr_err("failed to map registers for clockevent\n");
 		goto free_clk;
 	}
 
+	ret = -EINVAL;
 	irq = irq_of_parse_and_map(node, 0);
 	if (!irq) {
 		pr_err("failed to get irq for clockevent\n");
@@ -205,11 +207,12 @@ static void __init h8300_8timer_init(struct device_node *node)
 	clockevents_config_and_register(&timer8_priv.ced,
 					timer8_priv.rate, 1, 0x0000ffff);
 
-	return;
+	return 0;
 unmap_reg:
 	iounmap(base);
 free_clk:
 	clk_put(clk);
+	return ret;
 }
 
 CLOCKSOURCE_OF_DECLARE(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init);
diff --git a/drivers/clocksource/h8300_tpu.c b/drivers/clocksource/h8300_tpu.c
index d4c1a287c262..7bdf1991c847 100644
--- a/drivers/clocksource/h8300_tpu.c
+++ b/drivers/clocksource/h8300_tpu.c
@@ -119,15 +119,16 @@ static struct tpu_priv tpu_priv = {
 #define CH_L 0
 #define CH_H 1
 
-static void __init h8300_tpu_init(struct device_node *node)
+static int __init h8300_tpu_init(struct device_node *node)
 {
 	void __iomem *base[2];
 	struct clk *clk;
+	int ret = -ENXIO;
 
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk)) {
 		pr_err("failed to get clock for clocksource\n");
-		return;
+		return PTR_ERR(clk);
 	}
 
 	base[CH_L] = of_iomap(node, CH_L);
@@ -144,14 +145,13 @@ static void __init h8300_tpu_init(struct device_node *node)
 	tpu_priv.mapbase1 = base[CH_L];
 	tpu_priv.mapbase2 = base[CH_H];
 
-	clocksource_register_hz(&tpu_priv.cs, clk_get_rate(clk) / 64);
-
-	return;
+	return clocksource_register_hz(&tpu_priv.cs, clk_get_rate(clk) / 64);
 
 unmap_L:
 	iounmap(base[CH_H]);
 free_clk:
 	clk_put(clk);
+	return ret;
 }
 
 CLOCKSOURCE_OF_DECLARE(h8300_tpu, "renesas,tpu", h8300_tpu_init);
diff --git a/drivers/clocksource/meson6_timer.c b/drivers/clocksource/meson6_timer.c
index 1fa22c4d2d49..52af591a9fc7 100644
--- a/drivers/clocksource/meson6_timer.c
+++ b/drivers/clocksource/meson6_timer.c
@@ -126,18 +126,22 @@ static struct irqaction meson6_timer_irq = {
 	.dev_id		= &meson6_clockevent,
 };
 
-static void __init meson6_timer_init(struct device_node *node)
+static int __init meson6_timer_init(struct device_node *node)
 {
 	u32 val;
 	int ret, irq;
 
 	timer_base = of_io_request_and_map(node, 0, "meson6-timer");
-	if (IS_ERR(timer_base))
-		panic("Can't map registers");
+	if (IS_ERR(timer_base)) {
+		pr_err("Can't map registers");
+		return -ENXIO;
+	}
 
 	irq = irq_of_parse_and_map(node, 0);
-	if (irq <= 0)
-		panic("Can't parse IRQ");
+	if (irq <= 0) {
+		pr_err("Can't parse IRQ");
+		return -EINVAL;
+	}
 
 	/* Set 1us for timer E */
 	val = readl(timer_base + TIMER_ISA_MUX);
@@ -158,14 +162,17 @@ static void __init meson6_timer_init(struct device_node *node)
 	meson6_clkevt_time_stop(CED_ID);
 
 	ret = setup_irq(irq, &meson6_timer_irq);
-	if (ret)
+	if (ret) {
 		pr_warn("failed to setup irq %d\n", irq);
+		return ret;
+	}
 
 	meson6_clockevent.cpumask = cpu_possible_mask;
 	meson6_clockevent.irq = irq;
 
 	clockevents_config_and_register(&meson6_clockevent, USEC_PER_SEC,
 					1, 0xfffe);
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(meson6, "amlogic,meson6-timer",
 		       meson6_timer_init);
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index 89d3e4d7900c..1572c7a778ab 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -146,7 +146,7 @@ static struct clocksource gic_clocksource = {
 	.archdata	= { .vdso_clock_mode = VDSO_CLOCK_GIC },
 };
 
-static void __init __gic_clocksource_init(void)
+static int __init __gic_clocksource_init(void)
 {
 	int ret;
 
@@ -159,6 +159,8 @@ static void __init __gic_clocksource_init(void)
 	ret = clocksource_register_hz(&gic_clocksource, gic_frequency);
 	if (ret < 0)
 		pr_warn("GIC: Unable to register clocksource\n");
+
+	return ret;
 }
 
 void __init gic_clocksource_init(unsigned int frequency)
@@ -179,31 +181,35 @@ static void __init gic_clocksource_of_init(struct device_node *node)
 	struct clk *clk;
 	int ret;
 
-	if (WARN_ON(!gic_present || !node->parent ||
-		    !of_device_is_compatible(node->parent, "mti,gic")))
-		return;
+	if (!gic_present || !node->parent ||
+	    !of_device_is_compatible(node->parent, "mti,gic")) {
+		pr_warn("No DT definition for the mips gic driver");
+		return -ENXIO;
+	}
 
 	clk = of_clk_get(node, 0);
 	if (!IS_ERR(clk)) {
 		if (clk_prepare_enable(clk) < 0) {
 			pr_err("GIC failed to enable clock\n");
 			clk_put(clk);
-			return;
+			return PTR_ERR(clk);
 		}
 
 		gic_frequency = clk_get_rate(clk);
 	} else if (of_property_read_u32(node, "clock-frequency",
 					&gic_frequency)) {
 		pr_err("GIC frequency not specified.\n");
-		return;
+		return -EINVAL;;
 	}
 	gic_timer_irq = irq_of_parse_and_map(node, 0);
 	if (!gic_timer_irq) {
 		pr_err("GIC timer IRQ not specified.\n");
-		return;
+		return -EINVAL;;
 	}
 
-	__gic_clocksource_init();
+	ret = __gic_clocksource_init();
+	if (ret)
+		return ret;
 
 	ret = gic_clockevent_init();
 	if (!ret && !IS_ERR(clk)) {
@@ -213,6 +219,8 @@ static void __init gic_clocksource_of_init(struct device_node *node)
 
 	/* And finally start the counter */
 	gic_start_count();
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(mips_gic_timer, "mti,gic-timer",
 		       gic_clocksource_of_init);
diff --git a/drivers/clocksource/moxart_timer.c b/drivers/clocksource/moxart_timer.c
index 19857af651c1..841454417acd 100644
--- a/drivers/clocksource/moxart_timer.c
+++ b/drivers/clocksource/moxart_timer.c
@@ -119,34 +119,45 @@ static struct irqaction moxart_timer_irq = {
 	.dev_id		= &moxart_clockevent,
 };
 
-static void __init moxart_timer_init(struct device_node *node)
+static int __init moxart_timer_init(struct device_node *node)
 {
 	int ret, irq;
 	unsigned long pclk;
 	struct clk *clk;
 
 	base = of_iomap(node, 0);
-	if (!base)
-		panic("%s: of_iomap failed\n", node->full_name);
+	if (!base) {
+		pr_err("%s: of_iomap failed\n", node->full_name);
+		return -ENXIO;
+	}
 
 	irq = irq_of_parse_and_map(node, 0);
-	if (irq <= 0)
-		panic("%s: irq_of_parse_and_map failed\n", node->full_name);
+	if (irq <= 0) {
+		pr_err("%s: irq_of_parse_and_map failed\n", node->full_name);
+		return -EINVAL;
+	}
 
 	ret = setup_irq(irq, &moxart_timer_irq);
-	if (ret)
-		panic("%s: setup_irq failed\n", node->full_name);
+	if (ret) {
+		pr_err("%s: setup_irq failed\n", node->full_name);
+		return ret;
+	}
 
 	clk = of_clk_get(node, 0);
-	if (IS_ERR(clk))
-		panic("%s: of_clk_get failed\n", node->full_name);
+	if (IS_ERR(clk))  {
+		pr_err("%s: of_clk_get failed\n", node->full_name);
+		return PTR_ERR(clk);
+	}
 
 	pclk = clk_get_rate(clk);
 
-	if (clocksource_mmio_init(base + TIMER2_BASE + REG_COUNT,
-				  "moxart_timer", pclk, 200, 32,
-				  clocksource_mmio_readl_down))
-		panic("%s: clocksource_mmio_init failed\n", node->full_name);
+	ret = clocksource_mmio_init(base + TIMER2_BASE + REG_COUNT,
+				    "moxart_timer", pclk, 200, 32,
+				    clocksource_mmio_readl_down);
+	if (ret) {
+		pr_err("%s: clocksource_mmio_init failed\n", node->full_name);
+		return ret;
+	}
 
 	clock_count_per_tick = DIV_ROUND_CLOSEST(pclk, HZ);
 
@@ -164,5 +175,7 @@ static void __init moxart_timer_init(struct device_node *node)
 	 */
 	clockevents_config_and_register(&moxart_clockevent, pclk,
 					0x4, 0xfffffffe);
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(moxart, "moxa,moxart-timer", moxart_timer_init);
diff --git a/drivers/clocksource/mps2-timer.c b/drivers/clocksource/mps2-timer.c
index 3d33a5e23dee..3e4431ed9aa9 100644
--- a/drivers/clocksource/mps2-timer.c
+++ b/drivers/clocksource/mps2-timer.c
@@ -250,7 +250,7 @@ out:
 	return ret;
 }
 
-static void __init mps2_timer_init(struct device_node *np)
+static int __init mps2_timer_init(struct device_node *np)
 {
 	static int has_clocksource, has_clockevent;
 	int ret;
@@ -259,7 +259,7 @@ static void __init mps2_timer_init(struct device_node *np)
 		ret = mps2_clocksource_init(np);
 		if (!ret) {
 			has_clocksource = 1;
-			return;
+			return 0;
 		}
 	}
 
@@ -267,9 +267,11 @@ static void __init mps2_timer_init(struct device_node *np)
 		ret = mps2_clockevent_init(np);
 		if (!ret) {
 			has_clockevent = 1;
-			return;
+			return 0;
 		}
 	}
+
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(mps2_timer, "arm,mps2-timer", mps2_timer_init);
diff --git a/drivers/clocksource/mtk_timer.c b/drivers/clocksource/mtk_timer.c
index 7e583f8ea5f4..90659493c59c 100644
--- a/drivers/clocksource/mtk_timer.c
+++ b/drivers/clocksource/mtk_timer.c
@@ -181,7 +181,7 @@ static void mtk_timer_enable_irq(struct mtk_clock_event_device *evt, u8 timer)
 			evt->gpt_base + GPT_IRQ_EN_REG);
 }
 
-static void __init mtk_timer_init(struct device_node *node)
+static int __init mtk_timer_init(struct device_node *node)
 {
 	struct mtk_clock_event_device *evt;
 	struct resource res;
@@ -190,7 +190,7 @@ static void __init mtk_timer_init(struct device_node *node)
 
 	evt = kzalloc(sizeof(*evt), GFP_KERNEL);
 	if (!evt)
-		return;
+		return -ENOMEM;
 
 	evt->dev.name = "mtk_tick";
 	evt->dev.rating = 300;
@@ -248,7 +248,7 @@ static void __init mtk_timer_init(struct device_node *node)
 
 	mtk_timer_enable_irq(evt, GPT_CLK_EVT);
 
-	return;
+	return 0;
 
 err_clk_disable:
 	clk_disable_unprepare(clk);
@@ -262,5 +262,7 @@ err_mem:
 	release_mem_region(res.start, resource_size(&res));
 err_kzalloc:
 	kfree(evt);
+
+	return -EINVAL;
 }
 CLOCKSOURCE_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init);
diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c
index f5ce2961c0d6..0ba0a913b41d 100644
--- a/drivers/clocksource/mxs_timer.c
+++ b/drivers/clocksource/mxs_timer.c
@@ -31,8 +31,6 @@
 #include <linux/stmp_device.h>
 #include <linux/sched_clock.h>
 
-#include <asm/mach/time.h>
-
 /*
  * There are 2 versions of the timrot on Freescale MXS-based SoCs.
  * The v1 on MX23 only gets 16 bits counter, while v2 on MX28
@@ -226,10 +224,10 @@ static int __init mxs_clocksource_init(struct clk *timer_clk)
 	return 0;
 }
 
-static void __init mxs_timer_init(struct device_node *np)
+static int __init mxs_timer_init(struct device_node *np)
 {
 	struct clk *timer_clk;
-	int irq;
+	int irq, ret;
 
 	mxs_timrot_base = of_iomap(np, 0);
 	WARN_ON(!mxs_timrot_base);
@@ -237,10 +235,12 @@ static void __init mxs_timer_init(struct device_node *np)
 	timer_clk = of_clk_get(np, 0);
 	if (IS_ERR(timer_clk)) {
 		pr_err("%s: failed to get clk\n", __func__);
-		return;
+		return PTR_ERR(timer_clk);
 	}
 
-	clk_prepare_enable(timer_clk);
+	ret = clk_prepare_enable(timer_clk);
+	if (ret)
+		return ret;
 
 	/*
 	 * Initialize timers to a known state
@@ -278,11 +278,19 @@ static void __init mxs_timer_init(struct device_node *np)
 			mxs_timrot_base + HW_TIMROT_FIXED_COUNTn(1));
 
 	/* init and register the timer to the framework */
-	mxs_clocksource_init(timer_clk);
-	mxs_clockevent_init(timer_clk);
+	ret = mxs_clocksource_init(timer_clk);
+	if (ret)
+		return ret;
+
+	ret = mxs_clockevent_init(timer_clk);
+	if (ret)
+		return ret;
 
 	/* Make irqs happen */
 	irq = irq_of_parse_and_map(np, 0);
-	setup_irq(irq, &mxs_timer_irq);
+	if (irq <= 0)
+		return -EINVAL;
+
+	return setup_irq(irq, &mxs_timer_irq);
 }
 CLOCKSOURCE_OF_DECLARE(mxs, "fsl,timrot", mxs_timer_init);
diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c
index bc8dd443c727..3c124d1ca600 100644
--- a/drivers/clocksource/nomadik-mtu.c
+++ b/drivers/clocksource/nomadik-mtu.c
@@ -193,10 +193,11 @@ static struct irqaction nmdk_timer_irq = {
 	.dev_id		= &nmdk_clkevt,
 };
 
-static void __init nmdk_timer_init(void __iomem *base, int irq,
+static int __init nmdk_timer_init(void __iomem *base, int irq,
 				   struct clk *pclk, struct clk *clk)
 {
 	unsigned long rate;
+	int ret;
 
 	mtu_base = base;
 
@@ -226,10 +227,12 @@ static void __init nmdk_timer_init(void __iomem *base, int irq,
 	/* Timer 0 is the free running clocksource */
 	nmdk_clksrc_reset();
 
-	if (clocksource_mmio_init(mtu_base + MTU_VAL(0), "mtu_0",
-			rate, 200, 32, clocksource_mmio_readl_down))
-		pr_err("timer: failed to initialize clock source %s\n",
-		       "mtu_0");
+	ret = clocksource_mmio_init(mtu_base + MTU_VAL(0), "mtu_0",
+				    rate, 200, 32, clocksource_mmio_readl_down);
+	if (ret) {
+		pr_err("timer: failed to initialize clock source %s\n", "mtu_0");
+		return ret;
+	}
 
 #ifdef CONFIG_CLKSRC_NOMADIK_MTU_SCHED_CLOCK
 	sched_clock_register(nomadik_read_sched_clock, 32, rate);
@@ -244,9 +247,11 @@ static void __init nmdk_timer_init(void __iomem *base, int irq,
 	mtu_delay_timer.read_current_timer = &nmdk_timer_read_current_timer;
 	mtu_delay_timer.freq = rate;
 	register_current_timer_delay(&mtu_delay_timer);
+
+	return 0;
 }
 
-static void __init nmdk_timer_of_init(struct device_node *node)
+static int __init nmdk_timer_of_init(struct device_node *node)
 {
 	struct clk *pclk;
 	struct clk *clk;
@@ -254,22 +259,30 @@ static void __init nmdk_timer_of_init(struct device_node *node)
 	int irq;
 
 	base = of_iomap(node, 0);
-	if (!base)
-		panic("Can't remap registers");
+	if (!base) {
+		pr_err("Can't remap registers");
+		return -ENXIO;
+	}
 
 	pclk = of_clk_get_by_name(node, "apb_pclk");
-	if (IS_ERR(pclk))
-		panic("could not get apb_pclk");
+	if (IS_ERR(pclk)) {
+		pr_err("could not get apb_pclk");
+		return PTR_ERR(pclk);
+	}
 
 	clk = of_clk_get_by_name(node, "timclk");
-	if (IS_ERR(clk))
-		panic("could not get timclk");
+	if (IS_ERR(clk)) {
+		pr_err("could not get timclk");
+		return PTR_ERR(clk);
+	}
 
 	irq = irq_of_parse_and_map(node, 0);
-	if (irq <= 0)
-		panic("Can't parse IRQ");
+	if (irq <= 0) {
+		pr_err("Can't parse IRQ");
+		return -EINVAL;
+	}
 
-	nmdk_timer_init(base, irq, pclk, clk);
+	return nmdk_timer_init(base, irq, pclk, clk);
 }
 CLOCKSOURCE_OF_DECLARE(nomadik_mtu, "st,nomadik-mtu",
 		       nmdk_timer_of_init);
diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c
index 45b6a4999713..937e10b84d58 100644
--- a/drivers/clocksource/pxa_timer.c
+++ b/drivers/clocksource/pxa_timer.c
@@ -150,8 +150,10 @@ static struct irqaction pxa_ost0_irq = {
 	.dev_id		= &ckevt_pxa_osmr0,
 };
 
-static void __init pxa_timer_common_init(int irq, unsigned long clock_tick_rate)
+static int __init pxa_timer_common_init(int irq, unsigned long clock_tick_rate)
 {
+	int ret;
+
 	timer_writel(0, OIER);
 	timer_writel(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR);
 
@@ -159,39 +161,57 @@ static void __init pxa_timer_common_init(int irq, unsigned long clock_tick_rate)
 
 	ckevt_pxa_osmr0.cpumask = cpumask_of(0);
 
-	setup_irq(irq, &pxa_ost0_irq);
+	ret = setup_irq(irq, &pxa_ost0_irq);
+	if (ret) {
+		pr_err("Failed to setup irq");
+		return ret;
+	}
+
+	ret = clocksource_mmio_init(timer_base + OSCR, "oscr0", clock_tick_rate, 200,
+				    32, clocksource_mmio_readl_up);
+	if (ret) {
+		pr_err("Failed to init clocksource");
+		return ret;
+	}
 
-	clocksource_mmio_init(timer_base + OSCR, "oscr0", clock_tick_rate, 200,
-			      32, clocksource_mmio_readl_up);
 	clockevents_config_and_register(&ckevt_pxa_osmr0, clock_tick_rate,
 					MIN_OSCR_DELTA * 2, 0x7fffffff);
+
+	return 0;
 }
 
-static void __init pxa_timer_dt_init(struct device_node *np)
+static int __init pxa_timer_dt_init(struct device_node *np)
 {
 	struct clk *clk;
-	int irq;
+	int irq, ret;
 
 	/* timer registers are shared with watchdog timer */
 	timer_base = of_iomap(np, 0);
-	if (!timer_base)
-		panic("%s: unable to map resource\n", np->name);
+	if (!timer_base) {
+		pr_err("%s: unable to map resource\n", np->name);
+		return -ENXIO;
+	}
 
 	clk = of_clk_get(np, 0);
 	if (IS_ERR(clk)) {
 		pr_crit("%s: unable to get clk\n", np->name);
-		return;
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_crit("Failed to prepare clock");
+		return ret;
 	}
-	clk_prepare_enable(clk);
 
 	/* we are only interested in OS-timer0 irq */
 	irq = irq_of_parse_and_map(np, 0);
 	if (irq <= 0) {
 		pr_crit("%s: unable to parse OS-timer0 irq\n", np->name);
-		return;
+		return -EINVAL;
 	}
 
-	pxa_timer_common_init(irq, clk_get_rate(clk));
+	return pxa_timer_common_init(irq, clk_get_rate(clk));
 }
 CLOCKSOURCE_OF_DECLARE(pxa_timer, "marvell,pxa-timer", pxa_timer_dt_init);
 
diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c
index f8e09f923651..662576339049 100644
--- a/drivers/clocksource/qcom-timer.c
+++ b/drivers/clocksource/qcom-timer.c
@@ -178,7 +178,7 @@ static struct delay_timer msm_delay_timer = {
 	.read_current_timer = msm_read_current_timer,
 };
 
-static void __init msm_timer_init(u32 dgt_hz, int sched_bits, int irq,
+static int __init msm_timer_init(u32 dgt_hz, int sched_bits, int irq,
 				  bool percpu)
 {
 	struct clocksource *cs = &msm_clocksource;
@@ -218,12 +218,14 @@ err:
 	sched_clock_register(msm_sched_clock_read, sched_bits, dgt_hz);
 	msm_delay_timer.freq = dgt_hz;
 	register_current_timer_delay(&msm_delay_timer);
+
+	return res;
 }
 
-static void __init msm_dt_timer_init(struct device_node *np)
+static int __init msm_dt_timer_init(struct device_node *np)
 {
 	u32 freq;
-	int irq;
+	int irq, ret;
 	struct resource res;
 	u32 percpu_offset;
 	void __iomem *base;
@@ -232,34 +234,35 @@ static void __init msm_dt_timer_init(struct device_node *np)
 	base = of_iomap(np, 0);
 	if (!base) {
 		pr_err("Failed to map event base\n");
-		return;
+		return -ENXIO;
 	}
 
 	/* We use GPT0 for the clockevent */
 	irq = irq_of_parse_and_map(np, 1);
 	if (irq <= 0) {
 		pr_err("Can't get irq\n");
-		return;
+		return -EINVAL;
 	}
 
 	/* We use CPU0's DGT for the clocksource */
 	if (of_property_read_u32(np, "cpu-offset", &percpu_offset))
 		percpu_offset = 0;
 
-	if (of_address_to_resource(np, 0, &res)) {
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret) {
 		pr_err("Failed to parse DGT resource\n");
-		return;
+		return ret;
 	}
 
 	cpu0_base = ioremap(res.start + percpu_offset, resource_size(&res));
 	if (!cpu0_base) {
 		pr_err("Failed to map source base\n");
-		return;
+		return -EINVAL;
 	}
 
 	if (of_property_read_u32(np, "clock-frequency", &freq)) {
 		pr_err("Unknown frequency\n");
-		return;
+		return -EINVAL;
 	}
 
 	event_base = base + 0x4;
@@ -268,7 +271,7 @@ static void __init msm_dt_timer_init(struct device_node *np)
 	freq /= 4;
 	writel_relaxed(DGT_CLK_CTL_DIV_4, source_base + DGT_CLK_CTL);
 
-	msm_timer_init(freq, 32, irq, !!percpu_offset);
+	return msm_timer_init(freq, 32, irq, !!percpu_offset);
 }
 CLOCKSOURCE_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init);
 CLOCKSOURCE_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init);
diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c
index b991b288c803..23e267acba25 100644
--- a/drivers/clocksource/rockchip_timer.c
+++ b/drivers/clocksource/rockchip_timer.c
@@ -19,7 +19,8 @@
 
 #define TIMER_LOAD_COUNT0	0x00
 #define TIMER_LOAD_COUNT1	0x04
-#define TIMER_CONTROL_REG	0x10
+#define TIMER_CONTROL_REG3288	0x10
+#define TIMER_CONTROL_REG3399	0x1c
 #define TIMER_INT_STATUS	0x18
 
 #define TIMER_DISABLE		0x0
@@ -31,6 +32,7 @@
 struct bc_timer {
 	struct clock_event_device ce;
 	void __iomem *base;
+	void __iomem *ctrl;
 	u32 freq;
 };
 
@@ -46,15 +48,20 @@ static inline void __iomem *rk_base(struct clock_event_device *ce)
 	return rk_timer(ce)->base;
 }
 
+static inline void __iomem *rk_ctrl(struct clock_event_device *ce)
+{
+	return rk_timer(ce)->ctrl;
+}
+
 static inline void rk_timer_disable(struct clock_event_device *ce)
 {
-	writel_relaxed(TIMER_DISABLE, rk_base(ce) + TIMER_CONTROL_REG);
+	writel_relaxed(TIMER_DISABLE, rk_ctrl(ce));
 }
 
 static inline void rk_timer_enable(struct clock_event_device *ce, u32 flags)
 {
 	writel_relaxed(TIMER_ENABLE | TIMER_INT_UNMASK | flags,
-		       rk_base(ce) + TIMER_CONTROL_REG);
+		       rk_ctrl(ce));
 }
 
 static void rk_timer_update_counter(unsigned long cycles,
@@ -106,37 +113,42 @@ static irqreturn_t rk_timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void __init rk_timer_init(struct device_node *np)
+static int __init rk_timer_init(struct device_node *np, u32 ctrl_reg)
 {
 	struct clock_event_device *ce = &bc_timer.ce;
 	struct clk *timer_clk;
 	struct clk *pclk;
-	int ret, irq;
+	int ret = -EINVAL, irq;
 
 	bc_timer.base = of_iomap(np, 0);
 	if (!bc_timer.base) {
 		pr_err("Failed to get base address for '%s'\n", TIMER_NAME);
-		return;
+		return -ENXIO;
 	}
+	bc_timer.ctrl = bc_timer.base + ctrl_reg;
 
 	pclk = of_clk_get_by_name(np, "pclk");
 	if (IS_ERR(pclk)) {
+		ret = PTR_ERR(pclk);
 		pr_err("Failed to get pclk for '%s'\n", TIMER_NAME);
 		goto out_unmap;
 	}
 
-	if (clk_prepare_enable(pclk)) {
+	ret = clk_prepare_enable(pclk);
+	if (ret) {
 		pr_err("Failed to enable pclk for '%s'\n", TIMER_NAME);
 		goto out_unmap;
 	}
 
 	timer_clk = of_clk_get_by_name(np, "timer");
 	if (IS_ERR(timer_clk)) {
+		ret = PTR_ERR(timer_clk);
 		pr_err("Failed to get timer clock for '%s'\n", TIMER_NAME);
 		goto out_timer_clk;
 	}
 
-	if (clk_prepare_enable(timer_clk)) {
+	ret = clk_prepare_enable(timer_clk);
+	if (ret) {
 		pr_err("Failed to enable timer clock\n");
 		goto out_timer_clk;
 	}
@@ -145,17 +157,19 @@ static void __init rk_timer_init(struct device_node *np)
 
 	irq = irq_of_parse_and_map(np, 0);
 	if (!irq) {
+		ret = -EINVAL;
 		pr_err("Failed to map interrupts for '%s'\n", TIMER_NAME);
 		goto out_irq;
 	}
 
 	ce->name = TIMER_NAME;
-	ce->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	ce->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
+		       CLOCK_EVT_FEAT_DYNIRQ;
 	ce->set_next_event = rk_timer_set_next_event;
 	ce->set_state_shutdown = rk_timer_shutdown;
 	ce->set_state_periodic = rk_timer_set_periodic;
 	ce->irq = irq;
-	ce->cpumask = cpumask_of(0);
+	ce->cpumask = cpu_possible_mask;
 	ce->rating = 250;
 
 	rk_timer_interrupt_clear(ce);
@@ -169,7 +183,7 @@ static void __init rk_timer_init(struct device_node *np)
 
 	clockevents_config_and_register(ce, bc_timer.freq, 1, UINT_MAX);
 
-	return;
+	return 0;
 
 out_irq:
 	clk_disable_unprepare(timer_clk);
@@ -177,6 +191,21 @@ out_timer_clk:
 	clk_disable_unprepare(pclk);
 out_unmap:
 	iounmap(bc_timer.base);
+
+	return ret;
+}
+
+static int __init rk3288_timer_init(struct device_node *np)
+{
+	return rk_timer_init(np, TIMER_CONTROL_REG3288);
+}
+
+static int __init rk3399_timer_init(struct device_node *np)
+{
+	return rk_timer_init(np, TIMER_CONTROL_REG3399);
 }
 
-CLOCKSOURCE_OF_DECLARE(rk_timer, "rockchip,rk3288-timer", rk_timer_init);
+CLOCKSOURCE_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer",
+		       rk3288_timer_init);
+CLOCKSOURCE_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer",
+		       rk3399_timer_init);
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 9502bc4c3f6d..54565bd0093b 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -130,9 +130,9 @@ static void samsung_time_stop(unsigned int channel)
 
 	spin_lock_irqsave(&samsung_pwm_lock, flags);
 
-	tcon = __raw_readl(pwm.base + REG_TCON);
+	tcon = readl_relaxed(pwm.base + REG_TCON);
 	tcon &= ~TCON_START(channel);
-	__raw_writel(tcon, pwm.base + REG_TCON);
+	writel_relaxed(tcon, pwm.base + REG_TCON);
 
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
@@ -148,14 +148,14 @@ static void samsung_time_setup(unsigned int channel, unsigned long tcnt)
 
 	spin_lock_irqsave(&samsung_pwm_lock, flags);
 
-	tcon = __raw_readl(pwm.base + REG_TCON);
+	tcon = readl_relaxed(pwm.base + REG_TCON);
 
 	tcon &= ~(TCON_START(tcon_chan) | TCON_AUTORELOAD(tcon_chan));
 	tcon |= TCON_MANUALUPDATE(tcon_chan);
 
-	__raw_writel(tcnt, pwm.base + REG_TCNTB(channel));
-	__raw_writel(tcnt, pwm.base + REG_TCMPB(channel));
-	__raw_writel(tcon, pwm.base + REG_TCON);
+	writel_relaxed(tcnt, pwm.base + REG_TCNTB(channel));
+	writel_relaxed(tcnt, pwm.base + REG_TCMPB(channel));
+	writel_relaxed(tcon, pwm.base + REG_TCON);
 
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
@@ -170,7 +170,7 @@ static void samsung_time_start(unsigned int channel, bool periodic)
 
 	spin_lock_irqsave(&samsung_pwm_lock, flags);
 
-	tcon = __raw_readl(pwm.base + REG_TCON);
+	tcon = readl_relaxed(pwm.base + REG_TCON);
 
 	tcon &= ~TCON_MANUALUPDATE(channel);
 	tcon |= TCON_START(channel);
@@ -180,7 +180,7 @@ static void samsung_time_start(unsigned int channel, bool periodic)
 	else
 		tcon &= ~TCON_AUTORELOAD(channel);
 
-	__raw_writel(tcon, pwm.base + REG_TCON);
+	writel_relaxed(tcon, pwm.base + REG_TCON);
 
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
@@ -333,11 +333,10 @@ static u64 notrace samsung_read_sched_clock(void)
 	return samsung_clocksource_read(NULL);
 }
 
-static void __init samsung_clocksource_init(void)
+static int __init samsung_clocksource_init(void)
 {
 	unsigned long pclk;
 	unsigned long clock_rate;
-	int ret;
 
 	pclk = clk_get_rate(pwm.timerclk);
 
@@ -358,9 +357,7 @@ static void __init samsung_clocksource_init(void)
 						pwm.variant.bits, clock_rate);
 
 	samsung_clocksource.mask = CLOCKSOURCE_MASK(pwm.variant.bits);
-	ret = clocksource_register_hz(&samsung_clocksource, clock_rate);
-	if (ret)
-		panic("samsung_clocksource_timer: can't register clocksource\n");
+	return clocksource_register_hz(&samsung_clocksource, clock_rate);
 }
 
 static void __init samsung_timer_resources(void)
@@ -380,26 +377,31 @@ static void __init samsung_timer_resources(void)
 /*
  * PWM master driver
  */
-static void __init _samsung_pwm_clocksource_init(void)
+static int __init _samsung_pwm_clocksource_init(void)
 {
 	u8 mask;
 	int channel;
 
 	mask = ~pwm.variant.output_mask & ((1 << SAMSUNG_PWM_NUM) - 1);
 	channel = fls(mask) - 1;
-	if (channel < 0)
-		panic("failed to find PWM channel for clocksource");
+	if (channel < 0) {
+		pr_crit("failed to find PWM channel for clocksource");
+		return -EINVAL;
+	}
 	pwm.source_id = channel;
 
 	mask &= ~(1 << channel);
 	channel = fls(mask) - 1;
-	if (channel < 0)
-		panic("failed to find PWM channel for clock event");
+	if (channel < 0) {
+		pr_crit("failed to find PWM channel for clock event");
+		return -EINVAL;
+	}
 	pwm.event_id = channel;
 
 	samsung_timer_resources();
 	samsung_clockevent_init();
-	samsung_clocksource_init();
+
+	return samsung_clocksource_init();
 }
 
 void __init samsung_pwm_clocksource_init(void __iomem *base,
@@ -417,8 +419,8 @@ void __init samsung_pwm_clocksource_init(void __iomem *base,
 }
 
 #ifdef CONFIG_CLKSRC_OF
-static void __init samsung_pwm_alloc(struct device_node *np,
-				     const struct samsung_pwm_variant *variant)
+static int __init samsung_pwm_alloc(struct device_node *np,
+				    const struct samsung_pwm_variant *variant)
 {
 	struct property *prop;
 	const __be32 *cur;
@@ -441,14 +443,16 @@ static void __init samsung_pwm_alloc(struct device_node *np,
 	pwm.base = of_iomap(np, 0);
 	if (!pwm.base) {
 		pr_err("%s: failed to map PWM registers\n", __func__);
-		return;
+		return -ENXIO;
 	}
 
 	pwm.timerclk = of_clk_get_by_name(np, "timers");
-	if (IS_ERR(pwm.timerclk))
-		panic("failed to get timers clock for timer");
+	if (IS_ERR(pwm.timerclk)) {
+		pr_crit("failed to get timers clock for timer");
+		return PTR_ERR(pwm.timerclk);
+	}
 
-	_samsung_pwm_clocksource_init();
+	return _samsung_pwm_clocksource_init();
 }
 
 static const struct samsung_pwm_variant s3c24xx_variant = {
@@ -458,9 +462,9 @@ static const struct samsung_pwm_variant s3c24xx_variant = {
 	.tclk_mask	= (1 << 4),
 };
 
-static void __init s3c2410_pwm_clocksource_init(struct device_node *np)
+static int __init s3c2410_pwm_clocksource_init(struct device_node *np)
 {
-	samsung_pwm_alloc(np, &s3c24xx_variant);
+	return samsung_pwm_alloc(np, &s3c24xx_variant);
 }
 CLOCKSOURCE_OF_DECLARE(s3c2410_pwm, "samsung,s3c2410-pwm", s3c2410_pwm_clocksource_init);
 
@@ -471,9 +475,9 @@ static const struct samsung_pwm_variant s3c64xx_variant = {
 	.tclk_mask	= (1 << 7) | (1 << 6) | (1 << 5),
 };
 
-static void __init s3c64xx_pwm_clocksource_init(struct device_node *np)
+static int __init s3c64xx_pwm_clocksource_init(struct device_node *np)
 {
-	samsung_pwm_alloc(np, &s3c64xx_variant);
+	return samsung_pwm_alloc(np, &s3c64xx_variant);
 }
 CLOCKSOURCE_OF_DECLARE(s3c6400_pwm, "samsung,s3c6400-pwm", s3c64xx_pwm_clocksource_init);
 
@@ -484,9 +488,9 @@ static const struct samsung_pwm_variant s5p64x0_variant = {
 	.tclk_mask	= 0,
 };
 
-static void __init s5p64x0_pwm_clocksource_init(struct device_node *np)
+static int __init s5p64x0_pwm_clocksource_init(struct device_node *np)
 {
-	samsung_pwm_alloc(np, &s5p64x0_variant);
+	return samsung_pwm_alloc(np, &s5p64x0_variant);
 }
 CLOCKSOURCE_OF_DECLARE(s5p6440_pwm, "samsung,s5p6440-pwm", s5p64x0_pwm_clocksource_init);
 
@@ -497,9 +501,9 @@ static const struct samsung_pwm_variant s5p_variant = {
 	.tclk_mask	= (1 << 5),
 };
 
-static void __init s5p_pwm_clocksource_init(struct device_node *np)
+static int __init s5p_pwm_clocksource_init(struct device_node *np)
 {
-	samsung_pwm_alloc(np, &s5p_variant);
+	return samsung_pwm_alloc(np, &s5p_variant);
 }
 CLOCKSOURCE_OF_DECLARE(s5pc100_pwm, "samsung,s5pc100-pwm", s5p_pwm_clocksource_init);
 #endif
diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index 6f3719d73390..97669ee4df2a 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -146,7 +146,7 @@ static u64 notrace sun4i_timer_sched_read(void)
 	return ~readl(timer_base + TIMER_CNTVAL_REG(1));
 }
 
-static void __init sun4i_timer_init(struct device_node *node)
+static int __init sun4i_timer_init(struct device_node *node)
 {
 	unsigned long rate = 0;
 	struct clk *clk;
@@ -154,17 +154,28 @@ static void __init sun4i_timer_init(struct device_node *node)
 	u32 val;
 
 	timer_base = of_iomap(node, 0);
-	if (!timer_base)
-		panic("Can't map registers");
+	if (!timer_base) {
+		pr_crit("Can't map registers");
+		return -ENXIO;
+	}
 
 	irq = irq_of_parse_and_map(node, 0);
-	if (irq <= 0)
-		panic("Can't parse IRQ");
+	if (irq <= 0) {
+		pr_crit("Can't parse IRQ");
+		return -EINVAL;
+	}
 
 	clk = of_clk_get(node, 0);
-	if (IS_ERR(clk))
-		panic("Can't get timer clock");
-	clk_prepare_enable(clk);
+	if (IS_ERR(clk)) {
+		pr_crit("Can't get timer clock");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_err("Failed to prepare clock");
+		return ret;
+	}
 
 	rate = clk_get_rate(clk);
 
@@ -182,8 +193,12 @@ static void __init sun4i_timer_init(struct device_node *node)
 	    of_machine_is_compatible("allwinner,sun5i-a10s"))
 		sched_clock_register(sun4i_timer_sched_read, 32, rate);
 
-	clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name,
-			      rate, 350, 32, clocksource_mmio_readl_down);
+	ret = clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name,
+				    rate, 350, 32, clocksource_mmio_readl_down);
+	if (ret) {
+		pr_err("Failed to register clocksource");
+		return ret;
+	}
 
 	ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
 
@@ -200,12 +215,16 @@ static void __init sun4i_timer_init(struct device_node *node)
 					TIMER_SYNC_TICKS, 0xffffffff);
 
 	ret = setup_irq(irq, &sun4i_timer_irq);
-	if (ret)
-		pr_warn("failed to setup irq %d\n", irq);
+	if (ret) {
+		pr_err("failed to setup irq %d\n", irq);
+		return ret;
+	}
 
 	/* Enable timer0 interrupt */
 	val = readl(timer_base + TIMER_IRQ_EN_REG);
 	writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG);
+
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer",
 		       sun4i_timer_init);
diff --git a/drivers/clocksource/tango_xtal.c b/drivers/clocksource/tango_xtal.c
index c407c47a3232..12fcef8cf2d3 100644
--- a/drivers/clocksource/tango_xtal.c
+++ b/drivers/clocksource/tango_xtal.c
@@ -19,7 +19,7 @@ static u64 notrace read_sched_clock(void)
 	return read_xtal_counter();
 }
 
-static void __init tango_clocksource_init(struct device_node *np)
+static int __init tango_clocksource_init(struct device_node *np)
 {
 	struct clk *clk;
 	int xtal_freq, ret;
@@ -27,13 +27,13 @@ static void __init tango_clocksource_init(struct device_node *np)
 	xtal_in_cnt = of_iomap(np, 0);
 	if (xtal_in_cnt == NULL) {
 		pr_err("%s: invalid address\n", np->full_name);
-		return;
+		return -ENXIO;
 	}
 
 	clk = of_clk_get(np, 0);
 	if (IS_ERR(clk)) {
 		pr_err("%s: invalid clock\n", np->full_name);
-		return;
+		return PTR_ERR(clk);
 	}
 
 	xtal_freq = clk_get_rate(clk);
@@ -44,11 +44,13 @@ static void __init tango_clocksource_init(struct device_node *np)
 				    32, clocksource_mmio_readl_up);
 	if (ret) {
 		pr_err("%s: registration failed\n", np->full_name);
-		return;
+		return ret;
 	}
 
 	sched_clock_register(read_sched_clock, 32, xtal_freq);
 	register_current_timer_delay(&delay_timer);
+
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(tango, "sigma,tick-counter", tango_clocksource_init);
diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index 7b94ad2ab278..f960891aa04e 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -165,7 +165,7 @@ static struct irqaction tegra_timer_irq = {
 	.dev_id		= &tegra_clockevent,
 };
 
-static void __init tegra20_init_timer(struct device_node *np)
+static int __init tegra20_init_timer(struct device_node *np)
 {
 	struct clk *clk;
 	unsigned long rate;
@@ -174,13 +174,13 @@ static void __init tegra20_init_timer(struct device_node *np)
 	timer_reg_base = of_iomap(np, 0);
 	if (!timer_reg_base) {
 		pr_err("Can't map timer registers\n");
-		BUG();
+		return -ENXIO;
 	}
 
 	tegra_timer_irq.irq = irq_of_parse_and_map(np, 2);
 	if (tegra_timer_irq.irq <= 0) {
 		pr_err("Failed to map timer IRQ\n");
-		BUG();
+		return -EINVAL;
 	}
 
 	clk = of_clk_get(np, 0);
@@ -211,10 +211,12 @@ static void __init tegra20_init_timer(struct device_node *np)
 
 	sched_clock_register(tegra_read_sched_clock, 32, 1000000);
 
-	if (clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
-		"timer_us", 1000000, 300, 32, clocksource_mmio_readl_up)) {
+	ret = clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
+				    "timer_us", 1000000, 300, 32,
+				    clocksource_mmio_readl_up);
+	if (ret) {
 		pr_err("Failed to register clocksource\n");
-		BUG();
+		return ret;
 	}
 
 	tegra_delay_timer.read_current_timer =
@@ -225,24 +227,26 @@ static void __init tegra20_init_timer(struct device_node *np)
 	ret = setup_irq(tegra_timer_irq.irq, &tegra_timer_irq);
 	if (ret) {
 		pr_err("Failed to register timer IRQ: %d\n", ret);
-		BUG();
+		return ret;
 	}
 
 	tegra_clockevent.cpumask = cpu_all_mask;
 	tegra_clockevent.irq = tegra_timer_irq.irq;
 	clockevents_config_and_register(&tegra_clockevent, 1000000,
 					0x1, 0x1fffffff);
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
 
-static void __init tegra20_init_rtc(struct device_node *np)
+static int __init tegra20_init_rtc(struct device_node *np)
 {
 	struct clk *clk;
 
 	rtc_base = of_iomap(np, 0);
 	if (!rtc_base) {
 		pr_err("Can't map RTC registers");
-		BUG();
+		return -ENXIO;
 	}
 
 	/*
@@ -255,6 +259,6 @@ static void __init tegra20_init_rtc(struct device_node *np)
 	else
 		clk_prepare_enable(clk);
 
-	register_persistent_clock(NULL, tegra_read_persistent_clock64);
+	return register_persistent_clock(NULL, tegra_read_persistent_clock64);
 }
 CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c
index d93ec3c4f139..20ec066481fe 100644
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/time-armada-370-xp.c
@@ -246,7 +246,7 @@ static void armada_370_xp_timer_resume(void)
 	writel(timer0_local_ctrl_reg, local_base + TIMER_CTRL_OFF);
 }
 
-struct syscore_ops armada_370_xp_timer_syscore_ops = {
+static struct syscore_ops armada_370_xp_timer_syscore_ops = {
 	.suspend	= armada_370_xp_timer_suspend,
 	.resume		= armada_370_xp_timer_resume,
 };
@@ -260,14 +260,22 @@ static struct delay_timer armada_370_delay_timer = {
 	.read_current_timer = armada_370_delay_timer_read,
 };
 
-static void __init armada_370_xp_timer_common_init(struct device_node *np)
+static int __init armada_370_xp_timer_common_init(struct device_node *np)
 {
 	u32 clr = 0, set = 0;
 	int res;
 
 	timer_base = of_iomap(np, 0);
-	WARN_ON(!timer_base);
+	if (!timer_base) {
+		pr_err("Failed to iomap");
+		return -ENXIO;
+	}
+
 	local_base = of_iomap(np, 1);
+	if (!local_base) {
+		pr_err("Failed to iomap");
+		return -ENXIO;
+	}
 
 	if (timer25Mhz) {
 		set = TIMER0_25MHZ;		
@@ -306,14 +314,19 @@ static void __init armada_370_xp_timer_common_init(struct device_node *np)
 	 */
 	sched_clock_register(armada_370_xp_read_sched_clock, 32, timer_clk);
 
-	clocksource_mmio_init(timer_base + TIMER0_VAL_OFF,
-			      "armada_370_xp_clocksource",
-			      timer_clk, 300, 32, clocksource_mmio_readl_down);
+	res = clocksource_mmio_init(timer_base + TIMER0_VAL_OFF,
+				    "armada_370_xp_clocksource",
+				    timer_clk, 300, 32, clocksource_mmio_readl_down);
+	if (res) {
+		pr_err("Failed to initialize clocksource mmio");
+		return res;
+	}
 
 	register_cpu_notifier(&armada_370_xp_timer_cpu_nb);
 
 	armada_370_xp_evt = alloc_percpu(struct clock_event_device);
-
+	if (!armada_370_xp_evt)
+		return -ENOMEM;
 
 	/*
 	 * Setup clockevent timer (interrupt-driven).
@@ -323,33 +336,54 @@ static void __init armada_370_xp_timer_common_init(struct device_node *np)
 				"armada_370_xp_per_cpu_tick",
 				armada_370_xp_evt);
 	/* Immediately configure the timer on the boot CPU */
-	if (!res)
-		armada_370_xp_timer_setup(this_cpu_ptr(armada_370_xp_evt));
+	if (res) {
+		pr_err("Failed to request percpu irq");
+		return res;
+	}
+
+	res = armada_370_xp_timer_setup(this_cpu_ptr(armada_370_xp_evt));
+	if (res) {
+		pr_err("Failed to setup timer");
+		return res;
+	}
 
 	register_syscore_ops(&armada_370_xp_timer_syscore_ops);
+	
+	return 0;
 }
 
-static void __init armada_xp_timer_init(struct device_node *np)
+static int __init armada_xp_timer_init(struct device_node *np)
 {
 	struct clk *clk = of_clk_get_by_name(np, "fixed");
+	int ret;
+
+	clk = of_clk_get(np, 0);
+	if (IS_ERR(clk)) {
+		pr_err("Failed to get clock");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ret;
 
-	/* The 25Mhz fixed clock is mandatory, and must always be available */
-	BUG_ON(IS_ERR(clk));
-	clk_prepare_enable(clk);
 	timer_clk = clk_get_rate(clk);
 
-	armada_370_xp_timer_common_init(np);
+	return armada_370_xp_timer_common_init(np);
 }
 CLOCKSOURCE_OF_DECLARE(armada_xp, "marvell,armada-xp-timer",
 		       armada_xp_timer_init);
 
-static void __init armada_375_timer_init(struct device_node *np)
+static int __init armada_375_timer_init(struct device_node *np)
 {
 	struct clk *clk;
+	int ret;
 
 	clk = of_clk_get_by_name(np, "fixed");
 	if (!IS_ERR(clk)) {
-		clk_prepare_enable(clk);
+		ret = clk_prepare_enable(clk);
+		if (ret)
+			return ret;
 		timer_clk = clk_get_rate(clk);
 	} else {
 
@@ -360,27 +394,43 @@ static void __init armada_375_timer_init(struct device_node *np)
 		clk = of_clk_get(np, 0);
 
 		/* Must have at least a clock */
-		BUG_ON(IS_ERR(clk));
-		clk_prepare_enable(clk);
+		if (IS_ERR(clk)) {
+			pr_err("Failed to get clock");
+			return PTR_ERR(clk);
+		}
+
+		ret = clk_prepare_enable(clk);
+		if (ret)
+			return ret;
+
 		timer_clk = clk_get_rate(clk) / TIMER_DIVIDER;
 		timer25Mhz = false;
 	}
 
-	armada_370_xp_timer_common_init(np);
+	return armada_370_xp_timer_common_init(np);
 }
 CLOCKSOURCE_OF_DECLARE(armada_375, "marvell,armada-375-timer",
 		       armada_375_timer_init);
 
-static void __init armada_370_timer_init(struct device_node *np)
+static int __init armada_370_timer_init(struct device_node *np)
 {
-	struct clk *clk = of_clk_get(np, 0);
+	struct clk *clk;
+	int ret;
+
+	clk = of_clk_get(np, 0);
+	if (IS_ERR(clk)) {
+		pr_err("Failed to get clock");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ret;
 
-	BUG_ON(IS_ERR(clk));
-	clk_prepare_enable(clk);
 	timer_clk = clk_get_rate(clk) / TIMER_DIVIDER;
 	timer25Mhz = false;
 
-	armada_370_xp_timer_common_init(np);
+	return armada_370_xp_timer_common_init(np);
 }
 CLOCKSOURCE_OF_DECLARE(armada_370, "marvell,armada-370-timer",
 		       armada_370_timer_init);
diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c
index b06e4c2be406..5ac344b383e1 100644
--- a/drivers/clocksource/time-efm32.c
+++ b/drivers/clocksource/time-efm32.c
@@ -233,10 +233,15 @@ static int __init efm32_clockevent_init(struct device_node *np)
 					DIV_ROUND_CLOSEST(rate, 1024),
 					0xf, 0xffff);
 
-	setup_irq(irq, &efm32_clock_event_irq);
+	ret = setup_irq(irq, &efm32_clock_event_irq);
+	if (ret) {
+		pr_err("Failed setup irq");
+		goto err_setup_irq;
+	}
 
 	return 0;
 
+err_setup_irq:
 err_get_irq:
 
 	iounmap(base);
@@ -255,16 +260,16 @@ err_clk_get:
  * This function asserts that we have exactly one clocksource and one
  * clock_event_device in the end.
  */
-static void __init efm32_timer_init(struct device_node *np)
+static int __init efm32_timer_init(struct device_node *np)
 {
 	static int has_clocksource, has_clockevent;
-	int ret;
+	int ret = 0;
 
 	if (!has_clocksource) {
 		ret = efm32_clocksource_init(np);
 		if (!ret) {
 			has_clocksource = 1;
-			return;
+			return 0;
 		}
 	}
 
@@ -272,9 +277,11 @@ static void __init efm32_timer_init(struct device_node *np)
 		ret = efm32_clockevent_init(np);
 		if (!ret) {
 			has_clockevent = 1;
-			return;
+			return 0;
 		}
 	}
+
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(efm32compat, "efm32,timer", efm32_timer_init);
 CLOCKSOURCE_OF_DECLARE(efm32, "energymicro,efm32-timer", efm32_timer_init);
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c
index daae61e8c820..9649cfdb9213 100644
--- a/drivers/clocksource/time-lpc32xx.c
+++ b/drivers/clocksource/time-lpc32xx.c
@@ -288,16 +288,16 @@ err_clk_enable:
  * This function asserts that we have exactly one clocksource and one
  * clock_event_device in the end.
  */
-static void __init lpc32xx_timer_init(struct device_node *np)
+static int __init lpc32xx_timer_init(struct device_node *np)
 {
 	static int has_clocksource, has_clockevent;
-	int ret;
+	int ret = 0;
 
 	if (!has_clocksource) {
 		ret = lpc32xx_clocksource_init(np);
 		if (!ret) {
 			has_clocksource = 1;
-			return;
+			return 0;
 		}
 	}
 
@@ -305,8 +305,10 @@ static void __init lpc32xx_timer_init(struct device_node *np)
 		ret = lpc32xx_clockevent_init(np);
 		if (!ret) {
 			has_clockevent = 1;
-			return;
+			return 0;
 		}
 	}
+
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init);
diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/time-orion.c
index 0ece7427b497..a28f496e97cf 100644
--- a/drivers/clocksource/time-orion.c
+++ b/drivers/clocksource/time-orion.c
@@ -104,25 +104,36 @@ static struct irqaction orion_clkevt_irq = {
 	.handler	= orion_clkevt_irq_handler,
 };
 
-static void __init orion_timer_init(struct device_node *np)
+static int __init orion_timer_init(struct device_node *np)
 {
 	struct clk *clk;
-	int irq;
+	int irq, ret;
 
 	/* timer registers are shared with watchdog timer */
 	timer_base = of_iomap(np, 0);
-	if (!timer_base)
-		panic("%s: unable to map resource\n", np->name);
+	if (!timer_base) {
+		pr_err("%s: unable to map resource\n", np->name);
+		return -ENXIO;
+	}
 
 	clk = of_clk_get(np, 0);
-	if (IS_ERR(clk))
-		panic("%s: unable to get clk\n", np->name);
-	clk_prepare_enable(clk);
+	if (IS_ERR(clk)) {
+		pr_err("%s: unable to get clk\n", np->name);
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_err("Failed to prepare clock");
+		return ret;
+	}
 
 	/* we are only interested in timer1 irq */
 	irq = irq_of_parse_and_map(np, 1);
-	if (irq <= 0)
-		panic("%s: unable to parse timer1 irq\n", np->name);
+	if (irq <= 0) {
+		pr_err("%s: unable to parse timer1 irq\n", np->name);
+		return -EINVAL;
+	}
 
 	/* setup timer0 as free-running clocksource */
 	writel(~0, timer_base + TIMER0_VAL);
@@ -130,19 +141,30 @@ static void __init orion_timer_init(struct device_node *np)
 	atomic_io_modify(timer_base + TIMER_CTRL,
 		TIMER0_RELOAD_EN | TIMER0_EN,
 		TIMER0_RELOAD_EN | TIMER0_EN);
-	clocksource_mmio_init(timer_base + TIMER0_VAL, "orion_clocksource",
-			      clk_get_rate(clk), 300, 32,
-			      clocksource_mmio_readl_down);
+
+	ret = clocksource_mmio_init(timer_base + TIMER0_VAL, "orion_clocksource",
+				    clk_get_rate(clk), 300, 32,
+				    clocksource_mmio_readl_down);
+	if (ret) {
+		pr_err("Failed to initialize mmio timer");
+		return ret;
+	}
+
 	sched_clock_register(orion_read_sched_clock, 32, clk_get_rate(clk));
 
 	/* setup timer1 as clockevent timer */
-	if (setup_irq(irq, &orion_clkevt_irq))
-		panic("%s: unable to setup irq\n", np->name);
+	ret = setup_irq(irq, &orion_clkevt_irq);
+	if (ret) {
+		pr_err("%s: unable to setup irq\n", np->name);
+		return ret;
+	}
 
 	ticks_per_jiffy = (clk_get_rate(clk) + HZ/2) / HZ;
 	orion_clkevt.cpumask = cpumask_of(0);
 	orion_clkevt.irq = irq;
 	clockevents_config_and_register(&orion_clkevt, clk_get_rate(clk),
 					ORION_ONESHOT_MIN, ORION_ONESHOT_MAX);
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(orion_timer, "marvell,orion-timer", orion_timer_init);
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index 376e59bc5fa0..a7d9a08e4b0e 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c
@@ -148,7 +148,7 @@ static struct pistachio_clocksource pcs_gpt = {
 		},
 };
 
-static void __init pistachio_clksrc_of_init(struct device_node *node)
+static int __init pistachio_clksrc_of_init(struct device_node *node)
 {
 	struct clk *sys_clk, *fast_clk;
 	struct regmap *periph_regs;
@@ -158,45 +158,45 @@ static void __init pistachio_clksrc_of_init(struct device_node *node)
 	pcs_gpt.base = of_iomap(node, 0);
 	if (!pcs_gpt.base) {
 		pr_err("cannot iomap\n");
-		return;
+		return -ENXIO;
 	}
 
 	periph_regs = syscon_regmap_lookup_by_phandle(node, "img,cr-periph");
 	if (IS_ERR(periph_regs)) {
 		pr_err("cannot get peripheral regmap (%ld)\n",
 		       PTR_ERR(periph_regs));
-		return;
+		return PTR_ERR(periph_regs);
 	}
 
 	/* Switch to using the fast counter clock */
 	ret = regmap_update_bits(periph_regs, PERIP_TIMER_CONTROL,
 				 0xf, 0x0);
 	if (ret)
-		return;
+		return ret;
 
 	sys_clk = of_clk_get_by_name(node, "sys");
 	if (IS_ERR(sys_clk)) {
 		pr_err("clock get failed (%ld)\n", PTR_ERR(sys_clk));
-		return;
+		return PTR_ERR(sys_clk);
 	}
 
 	fast_clk = of_clk_get_by_name(node, "fast");
 	if (IS_ERR(fast_clk)) {
 		pr_err("clock get failed (%lu)\n", PTR_ERR(fast_clk));
-		return;
+		return PTR_ERR(fast_clk);
 	}
 
 	ret = clk_prepare_enable(sys_clk);
 	if (ret < 0) {
 		pr_err("failed to enable clock (%d)\n", ret);
-		return;
+		return ret;
 	}
 
 	ret = clk_prepare_enable(fast_clk);
 	if (ret < 0) {
 		pr_err("failed to enable clock (%d)\n", ret);
 		clk_disable_unprepare(sys_clk);
-		return;
+		return ret;
 	}
 
 	rate = clk_get_rate(fast_clk);
@@ -212,7 +212,7 @@ static void __init pistachio_clksrc_of_init(struct device_node *node)
 
 	raw_spin_lock_init(&pcs_gpt.lock);
 	sched_clock_register(pistachio_read_sched_clock, 32, rate);
-	clocksource_register_hz(&pcs_gpt.cs, rate);
+	return clocksource_register_hz(&pcs_gpt.cs, rate);
 }
 CLOCKSOURCE_OF_DECLARE(pistachio_gptimer, "img,pistachio-gptimer",
 		       pistachio_clksrc_of_init);
diff --git a/drivers/clocksource/timer-atlas7.c b/drivers/clocksource/timer-atlas7.c
index 27fa13680be1..90f8fbc154a4 100644
--- a/drivers/clocksource/timer-atlas7.c
+++ b/drivers/clocksource/timer-atlas7.c
@@ -238,7 +238,7 @@ static struct notifier_block sirfsoc_cpu_nb = {
 	.notifier_call = sirfsoc_cpu_notify,
 };
 
-static void __init sirfsoc_clockevent_init(void)
+static int __init sirfsoc_clockevent_init(void)
 {
 	sirfsoc_clockevent = alloc_percpu(struct clock_event_device);
 	BUG_ON(!sirfsoc_clockevent);
@@ -246,11 +246,11 @@ static void __init sirfsoc_clockevent_init(void)
 	BUG_ON(register_cpu_notifier(&sirfsoc_cpu_nb));
 
 	/* Immediately configure the timer on the boot CPU */
-	sirfsoc_local_timer_setup(this_cpu_ptr(sirfsoc_clockevent));
+	return sirfsoc_local_timer_setup(this_cpu_ptr(sirfsoc_clockevent));
 }
 
 /* initialize the kernel jiffy timer source */
-static void __init sirfsoc_atlas7_timer_init(struct device_node *np)
+static int __init sirfsoc_atlas7_timer_init(struct device_node *np)
 {
 	struct clk *clk;
 
@@ -279,23 +279,29 @@ static void __init sirfsoc_atlas7_timer_init(struct device_node *np)
 
 	BUG_ON(clocksource_register_hz(&sirfsoc_clocksource, atlas7_timer_rate));
 
-	sirfsoc_clockevent_init();
+	return sirfsoc_clockevent_init();
 }
 
-static void __init sirfsoc_of_timer_init(struct device_node *np)
+static int __init sirfsoc_of_timer_init(struct device_node *np)
 {
 	sirfsoc_timer_base = of_iomap(np, 0);
-	if (!sirfsoc_timer_base)
-		panic("unable to map timer cpu registers\n");
+	if (!sirfsoc_timer_base) {
+		pr_err("unable to map timer cpu registers\n");
+		return -ENXIO;
+	}
 
 	sirfsoc_timer_irq.irq = irq_of_parse_and_map(np, 0);
-	if (!sirfsoc_timer_irq.irq)
-		panic("No irq passed for timer0 via DT\n");
+	if (!sirfsoc_timer_irq.irq) {
+		pr_err("No irq passed for timer0 via DT\n");
+		return -EINVAL;
+	}
 
 	sirfsoc_timer1_irq.irq = irq_of_parse_and_map(np, 1);
-	if (!sirfsoc_timer1_irq.irq)
-		panic("No irq passed for timer1 via DT\n");
+	if (!sirfsoc_timer1_irq.irq) {
+		pr_err("No irq passed for timer1 via DT\n");
+		return -EINVAL;
+	}
 
-	sirfsoc_atlas7_timer_init(np);
+	return sirfsoc_atlas7_timer_init(np);
 }
 CLOCKSOURCE_OF_DECLARE(sirfsoc_atlas7_timer, "sirf,atlas7-tick", sirfsoc_of_timer_init);
diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
index d911c5dca8f1..1ffac0cb0cb7 100644
--- a/drivers/clocksource/timer-atmel-pit.c
+++ b/drivers/clocksource/timer-atmel-pit.c
@@ -177,7 +177,7 @@ static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id)
 /*
  * Set up both clocksource and clockevent support.
  */
-static void __init at91sam926x_pit_common_init(struct pit_data *data)
+static int __init at91sam926x_pit_common_init(struct pit_data *data)
 {
 	unsigned long	pit_rate;
 	unsigned	bits;
@@ -204,14 +204,21 @@ static void __init at91sam926x_pit_common_init(struct pit_data *data)
 	data->clksrc.rating = 175;
 	data->clksrc.read = read_pit_clk;
 	data->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
-	clocksource_register_hz(&data->clksrc, pit_rate);
+	
+	ret = clocksource_register_hz(&data->clksrc, pit_rate);
+	if (ret) {
+		pr_err("Failed to register clocksource");
+		return ret;
+	}
 
 	/* Set up irq handler */
 	ret = request_irq(data->irq, at91sam926x_pit_interrupt,
 			  IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
 			  "at91_tick", data);
-	if (ret)
-		panic(pr_fmt("Unable to setup IRQ\n"));
+	if (ret) {
+		pr_err("Unable to setup IRQ\n");
+		return ret;
+	}
 
 	/* Set up and register clockevents */
 	data->clkevt.name = "pit";
@@ -226,34 +233,42 @@ static void __init at91sam926x_pit_common_init(struct pit_data *data)
 	data->clkevt.resume = at91sam926x_pit_resume;
 	data->clkevt.suspend = at91sam926x_pit_suspend;
 	clockevents_register_device(&data->clkevt);
+
+	return 0;
 }
 
-static void __init at91sam926x_pit_dt_init(struct device_node *node)
+static int __init at91sam926x_pit_dt_init(struct device_node *node)
 {
 	struct pit_data *data;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
-		panic(pr_fmt("Unable to allocate memory\n"));
+		return -ENOMEM;
 
 	data->base = of_iomap(node, 0);
-	if (!data->base)
-		panic(pr_fmt("Could not map PIT address\n"));
+	if (!data->base) {
+		pr_err("Could not map PIT address\n");
+		return -ENXIO;
+	}
 
 	data->mck = of_clk_get(node, 0);
 	if (IS_ERR(data->mck))
 		/* Fallback on clkdev for !CCF-based boards */
 		data->mck = clk_get(NULL, "mck");
 
-	if (IS_ERR(data->mck))
-		panic(pr_fmt("Unable to get mck clk\n"));
+	if (IS_ERR(data->mck)) {
+		pr_err("Unable to get mck clk\n");
+		return PTR_ERR(data->mck);
+	}
 
 	/* Get the interrupts property */
 	data->irq = irq_of_parse_and_map(node, 0);
-	if (!data->irq)
-		panic(pr_fmt("Unable to get IRQ from DT\n"));
+	if (!data->irq) {
+		pr_err("Unable to get IRQ from DT\n");
+		return -EINVAL;
+	}
 
-	at91sam926x_pit_common_init(data);
+	return at91sam926x_pit_common_init(data);
 }
 CLOCKSOURCE_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit",
 		       at91sam926x_pit_dt_init);
diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c
index 29d21d68df5a..e90ab5b63a90 100644
--- a/drivers/clocksource/timer-atmel-st.c
+++ b/drivers/clocksource/timer-atmel-st.c
@@ -194,15 +194,17 @@ static struct clock_event_device clkevt = {
 /*
  * ST (system timer) module supports both clockevents and clocksource.
  */
-static void __init atmel_st_timer_init(struct device_node *node)
+static int __init atmel_st_timer_init(struct device_node *node)
 {
 	struct clk *sclk;
 	unsigned int sclk_rate, val;
 	int irq, ret;
 
 	regmap_st = syscon_node_to_regmap(node);
-	if (IS_ERR(regmap_st))
-		panic(pr_fmt("Unable to get regmap\n"));
+	if (IS_ERR(regmap_st)) {
+		pr_err("Unable to get regmap\n");
+		return PTR_ERR(regmap_st);
+	}
 
 	/* Disable all timer interrupts, and clear any pending ones */
 	regmap_write(regmap_st, AT91_ST_IDR,
@@ -211,27 +213,37 @@ static void __init atmel_st_timer_init(struct device_node *node)
 
 	/* Get the interrupts property */
 	irq  = irq_of_parse_and_map(node, 0);
-	if (!irq)
-		panic(pr_fmt("Unable to get IRQ from DT\n"));
+	if (!irq) {
+		pr_err("Unable to get IRQ from DT\n");
+		return -EINVAL;
+	}
 
 	/* Make IRQs happen for the system timer */
 	ret = request_irq(irq, at91rm9200_timer_interrupt,
 			  IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
 			  "at91_tick", regmap_st);
-	if (ret)
-		panic(pr_fmt("Unable to setup IRQ\n"));
+	if (ret) {
+		pr_err("Unable to setup IRQ\n");
+		return ret;
+	}
 
 	sclk = of_clk_get(node, 0);
-	if (IS_ERR(sclk))
-		panic(pr_fmt("Unable to get slow clock\n"));
+	if (IS_ERR(sclk)) {
+		pr_err("Unable to get slow clock\n");
+		return PTR_ERR(sclk);
+	}
 
-	clk_prepare_enable(sclk);
-	if (ret)
-		panic(pr_fmt("Could not enable slow clock\n"));
+	ret = clk_prepare_enable(sclk);
+	if (ret) {
+		pr_err("Could not enable slow clock\n");
+		return ret;
+	}
 
 	sclk_rate = clk_get_rate(sclk);
-	if (!sclk_rate)
-		panic(pr_fmt("Invalid slow clock rate\n"));
+	if (!sclk_rate) {
+		pr_err("Invalid slow clock rate\n");
+		return -EINVAL;
+	}
 	timer_latch = (sclk_rate + HZ / 2) / HZ;
 
 	/* The 32KiHz "Slow Clock" (tick every 30517.58 nanoseconds) is used
@@ -246,7 +258,7 @@ static void __init atmel_st_timer_init(struct device_node *node)
 					2, AT91_ST_ALMV);
 
 	/* register clocksource */
-	clocksource_register_hz(&clk32k, sclk_rate);
+	return clocksource_register_hz(&clk32k, sclk_rate);
 }
 CLOCKSOURCE_OF_DECLARE(atmel_st_timer, "atmel,at91rm9200-st",
 		       atmel_st_timer_init);
diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c
index a536eeb634d8..10318cc99c0e 100644
--- a/drivers/clocksource/timer-digicolor.c
+++ b/drivers/clocksource/timer-digicolor.c
@@ -63,7 +63,7 @@ struct digicolor_timer {
 	int timer_id; /* one of TIMER_* */
 };
 
-struct digicolor_timer *dc_timer(struct clock_event_device *ce)
+static struct digicolor_timer *dc_timer(struct clock_event_device *ce)
 {
 	return container_of(ce, struct digicolor_timer, ce);
 }
@@ -148,7 +148,7 @@ static u64 notrace digicolor_timer_sched_read(void)
 	return ~readl(dc_timer_dev.base + COUNT(TIMER_B));
 }
 
-static void __init digicolor_timer_init(struct device_node *node)
+static int __init digicolor_timer_init(struct device_node *node)
 {
 	unsigned long rate;
 	struct clk *clk;
@@ -161,19 +161,19 @@ static void __init digicolor_timer_init(struct device_node *node)
 	dc_timer_dev.base = of_iomap(node, 0);
 	if (!dc_timer_dev.base) {
 		pr_err("Can't map registers");
-		return;
+		return -ENXIO;
 	}
 
 	irq = irq_of_parse_and_map(node, dc_timer_dev.timer_id);
 	if (irq <= 0) {
 		pr_err("Can't parse IRQ");
-		return;
+		return -EINVAL;
 	}
 
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk)) {
 		pr_err("Can't get timer clock");
-		return;
+		return PTR_ERR(clk);
 	}
 	clk_prepare_enable(clk);
 	rate = clk_get_rate(clk);
@@ -190,13 +190,17 @@ static void __init digicolor_timer_init(struct device_node *node)
 	ret = request_irq(irq, digicolor_timer_interrupt,
 			  IRQF_TIMER | IRQF_IRQPOLL, "digicolor_timerC",
 			  &dc_timer_dev.ce);
-	if (ret)
+	if (ret) {
 		pr_warn("request of timer irq %d failed (%d)\n", irq, ret);
+		return ret;
+	}
 
 	dc_timer_dev.ce.cpumask = cpu_possible_mask;
 	dc_timer_dev.ce.irq = irq;
 
 	clockevents_config_and_register(&dc_timer_dev.ce, rate, 0, 0xffffffff);
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(conexant_digicolor, "cnxt,cx92755-timer",
 		       digicolor_timer_init);
diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c
index 99ec96769dda..f595460bfc58 100644
--- a/drivers/clocksource/timer-imx-gpt.c
+++ b/drivers/clocksource/timer-imx-gpt.c
@@ -407,8 +407,10 @@ static const struct imx_gpt_data imx6dl_gpt_data = {
 	.set_next_event = v2_set_next_event,
 };
 
-static void __init _mxc_timer_init(struct imx_timer *imxtm)
+static int __init _mxc_timer_init(struct imx_timer *imxtm)
 {
+	int ret;
+
 	switch (imxtm->type) {
 	case GPT_TYPE_IMX1:
 		imxtm->gpt = &imx1_gpt_data;
@@ -423,12 +425,12 @@ static void __init _mxc_timer_init(struct imx_timer *imxtm)
 		imxtm->gpt = &imx6dl_gpt_data;
 		break;
 	default:
-		BUG();
+		return -EINVAL;
 	}
 
 	if (IS_ERR(imxtm->clk_per)) {
 		pr_err("i.MX timer: unable to get clk\n");
-		return;
+		return PTR_ERR(imxtm->clk_per);
 	}
 
 	if (!IS_ERR(imxtm->clk_ipg))
@@ -446,8 +448,11 @@ static void __init _mxc_timer_init(struct imx_timer *imxtm)
 	imxtm->gpt->gpt_setup_tctl(imxtm);
 
 	/* init and register the timer to the framework */
-	mxc_clocksource_init(imxtm);
-	mxc_clockevent_init(imxtm);
+	ret = mxc_clocksource_init(imxtm);
+	if (ret)
+		return ret;
+
+	return mxc_clockevent_init(imxtm);
 }
 
 void __init mxc_timer_init(unsigned long pbase, int irq, enum imx_gpt_type type)
@@ -469,21 +474,27 @@ void __init mxc_timer_init(unsigned long pbase, int irq, enum imx_gpt_type type)
 	_mxc_timer_init(imxtm);
 }
 
-static void __init mxc_timer_init_dt(struct device_node *np,  enum imx_gpt_type type)
+static int __init mxc_timer_init_dt(struct device_node *np,  enum imx_gpt_type type)
 {
 	struct imx_timer *imxtm;
 	static int initialized;
+	int ret;
 
 	/* Support one instance only */
 	if (initialized)
-		return;
+		return 0;
 
 	imxtm = kzalloc(sizeof(*imxtm), GFP_KERNEL);
-	BUG_ON(!imxtm);
+	if (!imxtm)
+		return -ENOMEM;
 
 	imxtm->base = of_iomap(np, 0);
-	WARN_ON(!imxtm->base);
+	if (!imxtm->base)
+		return -ENXIO;
+
 	imxtm->irq = irq_of_parse_and_map(np, 0);
+	if (imxtm->irq <= 0)
+		return -EINVAL;
 
 	imxtm->clk_ipg = of_clk_get_by_name(np, "ipg");
 
@@ -494,22 +505,26 @@ static void __init mxc_timer_init_dt(struct device_node *np,  enum imx_gpt_type
 
 	imxtm->type = type;
 
-	_mxc_timer_init(imxtm);
+	ret = _mxc_timer_init(imxtm);
+	if (ret)
+		return ret;
 
 	initialized = 1;
+
+	return 0;
 }
 
-static void __init imx1_timer_init_dt(struct device_node *np)
+static int __init imx1_timer_init_dt(struct device_node *np)
 {
-	mxc_timer_init_dt(np, GPT_TYPE_IMX1);
+	return mxc_timer_init_dt(np, GPT_TYPE_IMX1);
 }
 
-static void __init imx21_timer_init_dt(struct device_node *np)
+static int __init imx21_timer_init_dt(struct device_node *np)
 {
-	mxc_timer_init_dt(np, GPT_TYPE_IMX21);
+	return mxc_timer_init_dt(np, GPT_TYPE_IMX21);
 }
 
-static void __init imx31_timer_init_dt(struct device_node *np)
+static int __init imx31_timer_init_dt(struct device_node *np)
 {
 	enum imx_gpt_type type = GPT_TYPE_IMX31;
 
@@ -522,12 +537,12 @@ static void __init imx31_timer_init_dt(struct device_node *np)
 	if (of_machine_is_compatible("fsl,imx6dl"))
 		type = GPT_TYPE_IMX6DL;
 
-	mxc_timer_init_dt(np, type);
+	return mxc_timer_init_dt(np, type);
 }
 
-static void __init imx6dl_timer_init_dt(struct device_node *np)
+static int __init imx6dl_timer_init_dt(struct device_node *np)
 {
-	mxc_timer_init_dt(np, GPT_TYPE_IMX6DL);
+	return mxc_timer_init_dt(np, GPT_TYPE_IMX6DL);
 }
 
 CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt);
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index 3f59ac2180dc..df6e672afc04 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -36,11 +36,12 @@ static u64 notrace integrator_read_sched_clock(void)
 	return -readl(sched_clk_base + TIMER_VALUE);
 }
 
-static void integrator_clocksource_init(unsigned long inrate,
-					void __iomem *base)
+static int integrator_clocksource_init(unsigned long inrate,
+				       void __iomem *base)
 {
 	u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC;
 	unsigned long rate = inrate;
+	int ret;
 
 	if (rate >= 1500000) {
 		rate /= 16;
@@ -50,11 +51,15 @@ static void integrator_clocksource_init(unsigned long inrate,
 	writel(0xffff, base + TIMER_LOAD);
 	writel(ctrl, base + TIMER_CTRL);
 
-	clocksource_mmio_init(base + TIMER_VALUE, "timer2",
-			rate, 200, 16, clocksource_mmio_readl_down);
+	ret = clocksource_mmio_init(base + TIMER_VALUE, "timer2",
+				    rate, 200, 16, clocksource_mmio_readl_down);
+	if (ret)
+		return ret;
 
 	sched_clk_base = base;
 	sched_clock_register(integrator_read_sched_clock, 16, rate);
+
+	return 0;
 }
 
 static unsigned long timer_reload;
@@ -138,11 +143,12 @@ static struct irqaction integrator_timer_irq = {
 	.dev_id		= &integrator_clockevent,
 };
 
-static void integrator_clockevent_init(unsigned long inrate,
-				void __iomem *base, int irq)
+static int integrator_clockevent_init(unsigned long inrate,
+				      void __iomem *base, int irq)
 {
 	unsigned long rate = inrate;
 	unsigned int ctrl = 0;
+	int ret;
 
 	clkevt_base = base;
 	/* Calculate and program a divisor */
@@ -156,14 +162,18 @@ static void integrator_clockevent_init(unsigned long inrate,
 	timer_reload = rate / HZ;
 	writel(ctrl, clkevt_base + TIMER_CTRL);
 
-	setup_irq(irq, &integrator_timer_irq);
+	ret = setup_irq(irq, &integrator_timer_irq);
+	if (ret)
+		return ret;
+
 	clockevents_config_and_register(&integrator_clockevent,
 					rate,
 					1,
 					0xffffU);
+	return 0;
 }
 
-static void __init integrator_ap_timer_init_of(struct device_node *node)
+static int __init integrator_ap_timer_init_of(struct device_node *node)
 {
 	const char *path;
 	void __iomem *base;
@@ -176,12 +186,12 @@ static void __init integrator_ap_timer_init_of(struct device_node *node)
 
 	base = of_io_request_and_map(node, 0, "integrator-timer");
 	if (IS_ERR(base))
-		return;
+		return PTR_ERR(base);
 
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk)) {
 		pr_err("No clock for %s\n", node->name);
-		return;
+		return PTR_ERR(clk);
 	}
 	clk_prepare_enable(clk);
 	rate = clk_get_rate(clk);
@@ -189,30 +199,37 @@ static void __init integrator_ap_timer_init_of(struct device_node *node)
 
 	err = of_property_read_string(of_aliases,
 				"arm,timer-primary", &path);
-	if (WARN_ON(err))
-		return;
+	if (err) {
+		pr_warn("Failed to read property");
+		return err;
+	}
+
 	pri_node = of_find_node_by_path(path);
+
 	err = of_property_read_string(of_aliases,
 				"arm,timer-secondary", &path);
-	if (WARN_ON(err))
-		return;
+	if (err) {
+		pr_warn("Failed to read property");		
+		return err;
+	}
+
+
 	sec_node = of_find_node_by_path(path);
 
-	if (node == pri_node) {
+	if (node == pri_node)
 		/* The primary timer lacks IRQ, use as clocksource */
-		integrator_clocksource_init(rate, base);
-		return;
-	}
+		return integrator_clocksource_init(rate, base);
 
 	if (node == sec_node) {
 		/* The secondary timer will drive the clock event */
 		irq = irq_of_parse_and_map(node, 0);
-		integrator_clockevent_init(rate, base, irq);
-		return;
+		return integrator_clockevent_init(rate, base, irq);
 	}
 
 	pr_info("Timer @%p unused\n", base);
 	clk_disable_unprepare(clk);
+
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(integrator_ap_timer, "arm,integrator-timer",
diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c
index 1cea08cf603e..ab68a47ab3b4 100644
--- a/drivers/clocksource/timer-keystone.c
+++ b/drivers/clocksource/timer-keystone.c
@@ -144,7 +144,7 @@ static int keystone_set_periodic(struct clock_event_device *evt)
 	return 0;
 }
 
-static void __init keystone_timer_init(struct device_node *np)
+static int __init keystone_timer_init(struct device_node *np)
 {
 	struct clock_event_device *event_dev = &timer.event_dev;
 	unsigned long rate;
@@ -154,20 +154,20 @@ static void __init keystone_timer_init(struct device_node *np)
 	irq  = irq_of_parse_and_map(np, 0);
 	if (!irq) {
 		pr_err("%s: failed to map interrupts\n", __func__);
-		return;
+		return -EINVAL;
 	}
 
 	timer.base = of_iomap(np, 0);
 	if (!timer.base) {
 		pr_err("%s: failed to map registers\n", __func__);
-		return;
+		return -ENXIO;
 	}
 
 	clk = of_clk_get(np, 0);
 	if (IS_ERR(clk)) {
 		pr_err("%s: failed to get clock\n", __func__);
 		iounmap(timer.base);
-		return;
+		return PTR_ERR(clk);
 	}
 
 	error = clk_prepare_enable(clk);
@@ -219,11 +219,12 @@ static void __init keystone_timer_init(struct device_node *np)
 	clockevents_config_and_register(event_dev, rate, 1, ULONG_MAX);
 
 	pr_info("keystone timer clock @%lu Hz\n", rate);
-	return;
+	return 0;
 err:
 	clk_put(clk);
 	iounmap(timer.base);
+	return error;
 }
 
 CLOCKSOURCE_OF_DECLARE(keystone_timer, "ti,keystone-timer",
-					keystone_timer_init);
+			   keystone_timer_init);
diff --git a/drivers/clocksource/timer-nps.c b/drivers/clocksource/timer-nps.c
index d46108920b2c..70c149af8ee0 100644
--- a/drivers/clocksource/timer-nps.c
+++ b/drivers/clocksource/timer-nps.c
@@ -55,8 +55,8 @@ static cycle_t nps_clksrc_read(struct clocksource *clksrc)
 	return (cycle_t)ioread32be(nps_msu_reg_low_addr[cluster]);
 }
 
-static void __init nps_setup_clocksource(struct device_node *node,
-					 struct clk *clk)
+static int __init nps_setup_clocksource(struct device_node *node,
+					struct clk *clk)
 {
 	int ret, cluster;
 
@@ -68,7 +68,7 @@ static void __init nps_setup_clocksource(struct device_node *node,
 	ret = clk_prepare_enable(clk);
 	if (ret) {
 		pr_err("Couldn't enable parent clock\n");
-		return;
+		return ret;
 	}
 
 	nps_timer_rate = clk_get_rate(clk);
@@ -79,19 +79,21 @@ static void __init nps_setup_clocksource(struct device_node *node,
 		pr_err("Couldn't register clock source.\n");
 		clk_disable_unprepare(clk);
 	}
+
+	return ret;
 }
 
-static void __init nps_timer_init(struct device_node *node)
+static int __init nps_timer_init(struct device_node *node)
 {
 	struct clk *clk;
 
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk)) {
 		pr_err("Can't get timer clock.\n");
-		return;
+		return PTR_ERR(clk);
 	}
 
-	nps_setup_clocksource(node, clk);
+	return nps_setup_clocksource(node, clk);
 }
 
 CLOCKSOURCE_OF_DECLARE(ezchip_nps400_clksrc, "ezchip,nps400-timer",
diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c
new file mode 100644
index 000000000000..bd887e2a8cf8
--- /dev/null
+++ b/drivers/clocksource/timer-oxnas-rps.c
@@ -0,0 +1,297 @@
+/*
+ * drivers/clocksource/timer-oxnas-rps.c
+ *
+ * Copyright (C) 2009 Oxford Semiconductor Ltd
+ * Copyright (C) 2013 Ma Haijun <mahaijuns@gmail.com>
+ * Copyright (C) 2016 Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/clockchips.h>
+#include <linux/sched_clock.h>
+
+/* TIMER1 used as tick
+ * TIMER2 used as clocksource
+ */
+
+/* Registers definitions */
+
+#define TIMER_LOAD_REG		0x0
+#define TIMER_CURR_REG		0x4
+#define TIMER_CTRL_REG		0x8
+#define TIMER_CLRINT_REG	0xC
+
+#define TIMER_BITS		24
+
+#define TIMER_MAX_VAL		(BIT(TIMER_BITS) - 1)
+
+#define TIMER_PERIODIC		BIT(6)
+#define TIMER_ENABLE		BIT(7)
+
+#define TIMER_DIV1		(0)
+#define TIMER_DIV16		(1 << 2)
+#define TIMER_DIV256		(2 << 2)
+
+#define TIMER1_REG_OFFSET	0
+#define TIMER2_REG_OFFSET	0x20
+
+/* Clockevent & Clocksource data */
+
+struct oxnas_rps_timer {
+	struct clock_event_device clkevent;
+	void __iomem *clksrc_base;
+	void __iomem *clkevt_base;
+	unsigned long timer_period;
+	unsigned int timer_prescaler;
+	struct clk *clk;
+	int irq;
+};
+
+static irqreturn_t oxnas_rps_timer_irq(int irq, void *dev_id)
+{
+	struct oxnas_rps_timer *rps = dev_id;
+
+	writel_relaxed(0, rps->clkevt_base + TIMER_CLRINT_REG);
+
+	rps->clkevent.event_handler(&rps->clkevent);
+
+	return IRQ_HANDLED;
+}
+
+static void oxnas_rps_timer_config(struct oxnas_rps_timer *rps,
+				   unsigned long period,
+				   unsigned int periodic)
+{
+	uint32_t cfg = rps->timer_prescaler;
+
+	if (period)
+		cfg |= TIMER_ENABLE;
+
+	if (periodic)
+		cfg |= TIMER_PERIODIC;
+
+	writel_relaxed(period, rps->clkevt_base + TIMER_LOAD_REG);
+	writel_relaxed(cfg, rps->clkevt_base + TIMER_CTRL_REG);
+}
+
+static int oxnas_rps_timer_shutdown(struct clock_event_device *evt)
+{
+	struct oxnas_rps_timer *rps =
+		container_of(evt, struct oxnas_rps_timer, clkevent);
+
+	oxnas_rps_timer_config(rps, 0, 0);
+
+	return 0;
+}
+
+static int oxnas_rps_timer_set_periodic(struct clock_event_device *evt)
+{
+	struct oxnas_rps_timer *rps =
+		container_of(evt, struct oxnas_rps_timer, clkevent);
+
+	oxnas_rps_timer_config(rps, rps->timer_period, 1);
+
+	return 0;
+}
+
+static int oxnas_rps_timer_set_oneshot(struct clock_event_device *evt)
+{
+	struct oxnas_rps_timer *rps =
+		container_of(evt, struct oxnas_rps_timer, clkevent);
+
+	oxnas_rps_timer_config(rps, rps->timer_period, 0);
+
+	return 0;
+}
+
+static int oxnas_rps_timer_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	struct oxnas_rps_timer *rps =
+		container_of(evt, struct oxnas_rps_timer, clkevent);
+
+	oxnas_rps_timer_config(rps, delta, 0);
+
+	return 0;
+}
+
+static int __init oxnas_rps_clockevent_init(struct oxnas_rps_timer *rps)
+{
+	ulong clk_rate = clk_get_rate(rps->clk);
+	ulong timer_rate;
+
+	/* Start with prescaler 1 */
+	rps->timer_prescaler = TIMER_DIV1;
+	rps->timer_period = DIV_ROUND_UP(clk_rate, HZ);
+	timer_rate = clk_rate;
+
+	if (rps->timer_period > TIMER_MAX_VAL) {
+		rps->timer_prescaler = TIMER_DIV16;
+		timer_rate = clk_rate / 16;
+		rps->timer_period = DIV_ROUND_UP(timer_rate, HZ);
+	}
+	if (rps->timer_period > TIMER_MAX_VAL) {
+		rps->timer_prescaler = TIMER_DIV256;
+		timer_rate = clk_rate / 256;
+		rps->timer_period = DIV_ROUND_UP(timer_rate, HZ);
+	}
+
+	rps->clkevent.name = "oxnas-rps";
+	rps->clkevent.features = CLOCK_EVT_FEAT_PERIODIC |
+				 CLOCK_EVT_FEAT_ONESHOT |
+				 CLOCK_EVT_FEAT_DYNIRQ;
+	rps->clkevent.tick_resume = oxnas_rps_timer_shutdown;
+	rps->clkevent.set_state_shutdown = oxnas_rps_timer_shutdown;
+	rps->clkevent.set_state_periodic = oxnas_rps_timer_set_periodic;
+	rps->clkevent.set_state_oneshot = oxnas_rps_timer_set_oneshot;
+	rps->clkevent.set_next_event = oxnas_rps_timer_next_event;
+	rps->clkevent.rating = 200;
+	rps->clkevent.cpumask = cpu_possible_mask;
+	rps->clkevent.irq = rps->irq;
+	clockevents_config_and_register(&rps->clkevent,
+					timer_rate,
+					1,
+					TIMER_MAX_VAL);
+
+	pr_info("Registered clock event rate %luHz prescaler %x period %lu\n",
+			clk_rate,
+			rps->timer_prescaler,
+			rps->timer_period);
+
+	return 0;
+}
+
+/* Clocksource */
+
+static void __iomem *timer_sched_base;
+
+static u64 notrace oxnas_rps_read_sched_clock(void)
+{
+	return ~readl_relaxed(timer_sched_base);
+}
+
+static int __init oxnas_rps_clocksource_init(struct oxnas_rps_timer *rps)
+{
+	ulong clk_rate = clk_get_rate(rps->clk);
+	int ret;
+
+	/* use prescale 16 */
+	clk_rate = clk_rate / 16;
+
+	writel_relaxed(TIMER_MAX_VAL, rps->clksrc_base + TIMER_LOAD_REG);
+	writel_relaxed(TIMER_PERIODIC | TIMER_ENABLE | TIMER_DIV16,
+			rps->clksrc_base + TIMER_CTRL_REG);
+
+	timer_sched_base = rps->clksrc_base + TIMER_CURR_REG;
+	sched_clock_register(oxnas_rps_read_sched_clock,
+			     TIMER_BITS, clk_rate);
+	ret = clocksource_mmio_init(timer_sched_base,
+				    "oxnas_rps_clocksource_timer",
+				    clk_rate, 250, TIMER_BITS,
+				    clocksource_mmio_readl_down);
+	if (WARN_ON(ret)) {
+		pr_err("can't register clocksource\n");
+		return ret;
+	}
+
+	pr_info("Registered clocksource rate %luHz\n", clk_rate);
+
+	return 0;
+}
+
+static int __init oxnas_rps_timer_init(struct device_node *np)
+{
+	struct oxnas_rps_timer *rps;
+	void __iomem *base;
+	int ret;
+
+	rps = kzalloc(sizeof(*rps), GFP_KERNEL);
+	if (!rps)
+		return -ENOMEM;
+
+	rps->clk = of_clk_get(np, 0);
+	if (IS_ERR(rps->clk)) {
+		ret = PTR_ERR(rps->clk);
+		goto err_alloc;
+	}
+
+	ret = clk_prepare_enable(rps->clk);
+	if (ret)
+		goto err_clk;
+
+	base = of_iomap(np, 0);
+	if (!base) {
+		ret = -ENXIO;
+		goto err_clk_prepare;
+	}
+
+	rps->irq = irq_of_parse_and_map(np, 0);
+	if (rps->irq < 0) {
+		ret = -EINVAL;
+		goto err_iomap;
+	}
+
+	rps->clkevt_base = base + TIMER1_REG_OFFSET;
+	rps->clksrc_base = base + TIMER2_REG_OFFSET;
+
+	/* Disable timers */
+	writel_relaxed(0, rps->clkevt_base + TIMER_CTRL_REG);
+	writel_relaxed(0, rps->clksrc_base + TIMER_CTRL_REG);
+	writel_relaxed(0, rps->clkevt_base + TIMER_LOAD_REG);
+	writel_relaxed(0, rps->clksrc_base + TIMER_LOAD_REG);
+	writel_relaxed(0, rps->clkevt_base + TIMER_CLRINT_REG);
+	writel_relaxed(0, rps->clksrc_base + TIMER_CLRINT_REG);
+
+	ret = request_irq(rps->irq, oxnas_rps_timer_irq,
+			  IRQF_TIMER | IRQF_IRQPOLL,
+			  "rps-timer", rps);
+	if (ret)
+		goto err_iomap;
+
+	ret = oxnas_rps_clocksource_init(rps);
+	if (ret)
+		goto err_irqreq;
+
+	ret = oxnas_rps_clockevent_init(rps);
+	if (ret)
+		goto err_irqreq;
+
+	return 0;
+
+err_irqreq:
+	free_irq(rps->irq, rps);
+err_iomap:
+	iounmap(base);
+err_clk_prepare:
+	clk_disable_unprepare(rps->clk);
+err_clk:
+	clk_put(rps->clk);
+err_alloc:
+	kfree(rps);
+
+	return ret;
+}
+
+CLOCKSOURCE_OF_DECLARE(ox810se_rps,
+		       "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c
index 2854c663e8b5..c32148ec7a38 100644
--- a/drivers/clocksource/timer-prima2.c
+++ b/drivers/clocksource/timer-prima2.c
@@ -19,7 +19,6 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/sched_clock.h>
-#include <asm/mach/time.h>
 
 #define PRIMA2_CLOCK_FREQ 1000000
 
@@ -189,24 +188,36 @@ static void __init sirfsoc_clockevent_init(void)
 }
 
 /* initialize the kernel jiffy timer source */
-static void __init sirfsoc_prima2_timer_init(struct device_node *np)
+static int __init sirfsoc_prima2_timer_init(struct device_node *np)
 {
 	unsigned long rate;
 	struct clk *clk;
+	int ret;
 
 	clk = of_clk_get(np, 0);
-	BUG_ON(IS_ERR(clk));
+	if (IS_ERR(clk)) {
+		pr_err("Failed to get clock");
+		return PTR_ERR(clk);
+	}
 
-	BUG_ON(clk_prepare_enable(clk));
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_err("Failed to enable clock");
+		return ret;
+	}
 
 	rate = clk_get_rate(clk);
 
-	BUG_ON(rate < PRIMA2_CLOCK_FREQ);
-	BUG_ON(rate % PRIMA2_CLOCK_FREQ);
+	if (rate < PRIMA2_CLOCK_FREQ || rate % PRIMA2_CLOCK_FREQ) {
+		pr_err("Invalid clock rate");
+		return -EINVAL;
+	}
 
 	sirfsoc_timer_base = of_iomap(np, 0);
-	if (!sirfsoc_timer_base)
-		panic("unable to map timer cpu registers\n");
+	if (!sirfsoc_timer_base) {
+		pr_err("unable to map timer cpu registers\n");
+		return -ENXIO;
+	}
 
 	sirfsoc_timer_irq.irq = irq_of_parse_and_map(np, 0);
 
@@ -216,14 +227,23 @@ static void __init sirfsoc_prima2_timer_init(struct device_node *np)
 	writel_relaxed(0, sirfsoc_timer_base + SIRFSOC_TIMER_COUNTER_HI);
 	writel_relaxed(BIT(0), sirfsoc_timer_base + SIRFSOC_TIMER_STATUS);
 
-	BUG_ON(clocksource_register_hz(&sirfsoc_clocksource,
-				       PRIMA2_CLOCK_FREQ));
+	ret = clocksource_register_hz(&sirfsoc_clocksource, PRIMA2_CLOCK_FREQ);
+	if (ret) {
+		pr_err("Failed to register clocksource");
+		return ret;
+	}
 
 	sched_clock_register(sirfsoc_read_sched_clock, 64, PRIMA2_CLOCK_FREQ);
 
-	BUG_ON(setup_irq(sirfsoc_timer_irq.irq, &sirfsoc_timer_irq));
+	ret = setup_irq(sirfsoc_timer_irq.irq, &sirfsoc_timer_irq);
+	if (ret) {
+		pr_err("Failed to setup irq");
+		return ret;
+	}
 
 	sirfsoc_clockevent_init();
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(sirfsoc_prima2_timer,
 	"sirf,prima2-tick", sirfsoc_prima2_timer_init);
diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c
index 5f45b9adef60..d07863388e05 100644
--- a/drivers/clocksource/timer-sp804.c
+++ b/drivers/clocksource/timer-sp804.c
@@ -77,7 +77,7 @@ void __init sp804_timer_disable(void __iomem *base)
 	writel(0, base + TIMER_CTRL);
 }
 
-void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
+int  __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
 						     const char *name,
 						     struct clk *clk,
 						     int use_sched_clock)
@@ -89,14 +89,13 @@ void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
 		if (IS_ERR(clk)) {
 			pr_err("sp804: clock not found: %d\n",
 			       (int)PTR_ERR(clk));
-			return;
+			return PTR_ERR(clk);
 		}
 	}
 
 	rate = sp804_get_clock_rate(clk);
-
 	if (rate < 0)
-		return;
+		return -EINVAL;
 
 	/* setup timer 0 as free-running clocksource */
 	writel(0, base + TIMER_CTRL);
@@ -112,6 +111,8 @@ void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
 		sched_clock_base = base;
 		sched_clock_register(sp804_read, 32, rate);
 	}
+
+	return 0;
 }
 
 
@@ -186,7 +187,7 @@ static struct irqaction sp804_timer_irq = {
 	.dev_id		= &sp804_clockevent,
 };
 
-void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struct clk *clk, const char *name)
+int __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struct clk *clk, const char *name)
 {
 	struct clock_event_device *evt = &sp804_clockevent;
 	long rate;
@@ -196,12 +197,12 @@ void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struc
 	if (IS_ERR(clk)) {
 		pr_err("sp804: %s clock not found: %d\n", name,
 			(int)PTR_ERR(clk));
-		return;
+		return PTR_ERR(clk);
 	}
 
 	rate = sp804_get_clock_rate(clk);
 	if (rate < 0)
-		return;
+		return -EINVAL;
 
 	clkevt_base = base;
 	clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ);
@@ -213,27 +214,31 @@ void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struc
 
 	setup_irq(irq, &sp804_timer_irq);
 	clockevents_config_and_register(evt, rate, 0xf, 0xffffffff);
+
+	return 0;
 }
 
-static void __init sp804_of_init(struct device_node *np)
+static int __init sp804_of_init(struct device_node *np)
 {
 	static bool initialized = false;
 	void __iomem *base;
-	int irq;
+	int irq, ret = -EINVAL;
 	u32 irq_num = 0;
 	struct clk *clk1, *clk2;
 	const char *name = of_get_property(np, "compatible", NULL);
 
 	base = of_iomap(np, 0);
-	if (WARN_ON(!base))
-		return;
+	if (!base)
+		return -ENXIO;
 
 	/* Ensure timers are disabled */
 	writel(0, base + TIMER_CTRL);
 	writel(0, base + TIMER_2_BASE + TIMER_CTRL);
 
-	if (initialized || !of_device_is_available(np))
+	if (initialized || !of_device_is_available(np)) {
+		ret = -EINVAL;
 		goto err;
+	}
 
 	clk1 = of_clk_get(np, 0);
 	if (IS_ERR(clk1))
@@ -256,35 +261,53 @@ static void __init sp804_of_init(struct device_node *np)
 
 	of_property_read_u32(np, "arm,sp804-has-irq", &irq_num);
 	if (irq_num == 2) {
-		__sp804_clockevents_init(base + TIMER_2_BASE, irq, clk2, name);
-		__sp804_clocksource_and_sched_clock_init(base, name, clk1, 1);
+
+		ret = __sp804_clockevents_init(base + TIMER_2_BASE, irq, clk2, name);
+		if (ret)
+			goto err;
+
+		ret = __sp804_clocksource_and_sched_clock_init(base, name, clk1, 1);
+		if (ret)
+			goto err;
 	} else {
-		__sp804_clockevents_init(base, irq, clk1 , name);
-		__sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE,
-							 name, clk2, 1);
+
+		ret = __sp804_clockevents_init(base, irq, clk1 , name);
+		if (ret)
+			goto err;
+
+		ret =__sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE,
+							      name, clk2, 1);
+		if (ret)
+			goto err;
 	}
 	initialized = true;
 
-	return;
+	return 0;
 err:
 	iounmap(base);
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init);
 
-static void __init integrator_cp_of_init(struct device_node *np)
+static int __init integrator_cp_of_init(struct device_node *np)
 {
 	static int init_count = 0;
 	void __iomem *base;
-	int irq;
+	int irq, ret = -EINVAL;
 	const char *name = of_get_property(np, "compatible", NULL);
 	struct clk *clk;
 
 	base = of_iomap(np, 0);
-	if (WARN_ON(!base))
-		return;
+	if (!base) {
+		pr_err("Failed to iomap");
+		return -ENXIO;
+	}
+
 	clk = of_clk_get(np, 0);
-	if (WARN_ON(IS_ERR(clk)))
-		return;
+	if (IS_ERR(clk)) {
+		pr_err("Failed to get clock");
+		return PTR_ERR(clk);
+	}
 
 	/* Ensure timer is disabled */
 	writel(0, base + TIMER_CTRL);
@@ -292,19 +315,24 @@ static void __init integrator_cp_of_init(struct device_node *np)
 	if (init_count == 2 || !of_device_is_available(np))
 		goto err;
 
-	if (!init_count)
-		__sp804_clocksource_and_sched_clock_init(base, name, clk, 0);
-	else {
+	if (!init_count) {
+		ret = __sp804_clocksource_and_sched_clock_init(base, name, clk, 0);
+		if (ret)
+			goto err;
+	} else {
 		irq = irq_of_parse_and_map(np, 0);
 		if (irq <= 0)
 			goto err;
 
-		__sp804_clockevents_init(base, irq, clk, name);
+		ret = __sp804_clockevents_init(base, irq, clk, name);
+		if (ret)
+			goto err;
 	}
 
 	init_count++;
-	return;
+	return 0;
 err:
 	iounmap(base);
+	return ret;
 }
 CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c
index f3dcb76799b4..1b2574c4fb97 100644
--- a/drivers/clocksource/timer-stm32.c
+++ b/drivers/clocksource/timer-stm32.c
@@ -98,7 +98,7 @@ static struct stm32_clock_event_ddata clock_event_ddata = {
 	},
 };
 
-static void __init stm32_clockevent_init(struct device_node *np)
+static int __init stm32_clockevent_init(struct device_node *np)
 {
 	struct stm32_clock_event_ddata *data = &clock_event_ddata;
 	struct clk *clk;
@@ -130,12 +130,14 @@ static void __init stm32_clockevent_init(struct device_node *np)
 
 	data->base = of_iomap(np, 0);
 	if (!data->base) {
+		ret = -ENXIO;
 		pr_err("failed to map registers for clockevent\n");
 		goto err_iomap;
 	}
 
 	irq = irq_of_parse_and_map(np, 0);
 	if (!irq) {
+		ret = -EINVAL;
 		pr_err("%s: failed to get irq.\n", np->full_name);
 		goto err_get_irq;
 	}
@@ -173,7 +175,7 @@ static void __init stm32_clockevent_init(struct device_node *np)
 	pr_info("%s: STM32 clockevent driver initialized (%d bits)\n",
 			np->full_name, bits);
 
-	return;
+	return ret;
 
 err_get_irq:
 	iounmap(data->base);
@@ -182,7 +184,7 @@ err_iomap:
 err_clk_enable:
 	clk_put(clk);
 err_clk_get:
-	return;
+	return ret;
 }
 
 CLOCKSOURCE_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init);
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 24c83f9efd87..c184eb84101e 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -311,33 +311,42 @@ err_free:
 	return ret;
 }
 
-static void __init sun5i_timer_init(struct device_node *node)
+static int __init sun5i_timer_init(struct device_node *node)
 {
 	struct reset_control *rstc;
 	void __iomem *timer_base;
 	struct clk *clk;
-	int irq;
+	int irq, ret;
 
 	timer_base = of_io_request_and_map(node, 0, of_node_full_name(node));
-	if (IS_ERR(timer_base))
-		panic("Can't map registers");
+	if (IS_ERR(timer_base)) {
+		pr_err("Can't map registers");
+		return PTR_ERR(timer_base);;
+	}
 
 	irq = irq_of_parse_and_map(node, 0);
-	if (irq <= 0)
-		panic("Can't parse IRQ");
+	if (irq <= 0) {
+		pr_err("Can't parse IRQ");
+		return -EINVAL;
+	}
 
 	clk = of_clk_get(node, 0);
-	if (IS_ERR(clk))
-		panic("Can't get timer clock");
+	if (IS_ERR(clk)) {
+		pr_err("Can't get timer clock");
+		return PTR_ERR(clk);
+	}
 
 	rstc = of_reset_control_get(node, NULL);
 	if (!IS_ERR(rstc))
 		reset_control_deassert(rstc);
 
-	sun5i_setup_clocksource(node, timer_base, clk, irq);
-	sun5i_setup_clockevent(node, timer_base, clk, irq);
+	ret = sun5i_setup_clocksource(node, timer_base, clk, irq);
+	if (ret)
+		return ret;
+
+	return sun5i_setup_clockevent(node, timer_base, clk, irq);
 }
 CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
-		       sun5i_timer_init);
+			   sun5i_timer_init);
 CLOCKSOURCE_OF_DECLARE(sun7i_a20, "allwinner,sun7i-a20-hstimer",
-		       sun5i_timer_init);
+			   sun5i_timer_init);
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index 8518d9dfba5c..92b7e390f6c8 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -88,14 +88,14 @@ static u64 notrace omap_32k_read_sched_clock(void)
 	return ti_32k_read_cycles(&ti_32k_timer.cs);
 }
 
-static void __init ti_32k_timer_init(struct device_node *np)
+static int __init ti_32k_timer_init(struct device_node *np)
 {
 	int ret;
 
 	ti_32k_timer.base = of_iomap(np, 0);
 	if (!ti_32k_timer.base) {
 		pr_err("Can't ioremap 32k timer base\n");
-		return;
+		return -ENXIO;
 	}
 
 	ti_32k_timer.counter = ti_32k_timer.base;
@@ -116,11 +116,13 @@ static void __init ti_32k_timer_init(struct device_node *np)
 	ret = clocksource_register_hz(&ti_32k_timer.cs, 32768);
 	if (ret) {
 		pr_err("32k_counter: can't register clocksource\n");
-		return;
+		return ret;
 	}
 
 	sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
 	pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(ti_32k_timer, "ti,omap-counter32k",
 		ti_32k_timer_init);
diff --git a/drivers/clocksource/timer-u300.c b/drivers/clocksource/timer-u300.c
index 1744b243898a..704e40c6f151 100644
--- a/drivers/clocksource/timer-u300.c
+++ b/drivers/clocksource/timer-u300.c
@@ -359,27 +359,37 @@ static struct delay_timer u300_delay_timer;
 /*
  * This sets up the system timers, clock source and clock event.
  */
-static void __init u300_timer_init_of(struct device_node *np)
+static int __init u300_timer_init_of(struct device_node *np)
 {
 	unsigned int irq;
 	struct clk *clk;
 	unsigned long rate;
+	int ret;
 
 	u300_timer_base = of_iomap(np, 0);
-	if (!u300_timer_base)
-		panic("could not ioremap system timer\n");
+	if (!u300_timer_base) {
+		pr_err("could not ioremap system timer\n");
+		return -ENXIO;
+	}
 
 	/* Get the IRQ for the GP1 timer */
 	irq = irq_of_parse_and_map(np, 2);
-	if (!irq)
-		panic("no IRQ for system timer\n");
+	if (!irq) {
+		pr_err("no IRQ for system timer\n");
+		return -EINVAL;
+	}
 
 	pr_info("U300 GP1 timer @ base: %p, IRQ: %u\n", u300_timer_base, irq);
 
 	/* Clock the interrupt controller */
 	clk = of_clk_get(np, 0);
-	BUG_ON(IS_ERR(clk));
-	clk_prepare_enable(clk);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ret;
+
 	rate = clk_get_rate(clk);
 
 	u300_clockevent_data.ticks_per_jiffy = DIV_ROUND_CLOSEST(rate, HZ);
@@ -410,7 +420,9 @@ static void __init u300_timer_init_of(struct device_node *np)
 		u300_timer_base + U300_TIMER_APP_RGPT1);
 
 	/* Set up the IRQ handler */
-	setup_irq(irq, &u300_timer_irq);
+	ret = setup_irq(irq, &u300_timer_irq);
+	if (ret)
+		return ret;
 
 	/* Reset the General Purpose timer 2 */
 	writel(U300_TIMER_APP_RGPT2_TIMER_RESET,
@@ -428,9 +440,12 @@ static void __init u300_timer_init_of(struct device_node *np)
 		u300_timer_base + U300_TIMER_APP_EGPT2);
 
 	/* Use general purpose timer 2 as clock source */
-	if (clocksource_mmio_init(u300_timer_base + U300_TIMER_APP_GPT2CC,
-			"GPT2", rate, 300, 32, clocksource_mmio_readl_up))
+	ret = clocksource_mmio_init(u300_timer_base + U300_TIMER_APP_GPT2CC,
+				    "GPT2", rate, 300, 32, clocksource_mmio_readl_up);
+	if (ret) {
 		pr_err("timer: failed to initialize U300 clock source\n");
+		return ret;
+	}
 
 	/* Configure and register the clockevent */
 	clockevents_config_and_register(&u300_clockevent_data.cevd, rate,
@@ -440,6 +455,7 @@ static void __init u300_timer_init_of(struct device_node *np)
 	 * TODO: init and register the rest of the timers too, they can be
 	 * used by hrtimers!
 	 */
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(u300_timer, "stericsson,u300-apptimer",
diff --git a/drivers/clocksource/versatile.c b/drivers/clocksource/versatile.c
index 0a26d3dde6c0..220b490a8142 100644
--- a/drivers/clocksource/versatile.c
+++ b/drivers/clocksource/versatile.c
@@ -25,16 +25,18 @@ static u64 notrace versatile_sys_24mhz_read(void)
 	return readl(versatile_sys_24mhz);
 }
 
-static void __init versatile_sched_clock_init(struct device_node *node)
+static int __init versatile_sched_clock_init(struct device_node *node)
 {
 	void __iomem *base = of_iomap(node, 0);
 
 	if (!base)
-		return;
+		return -ENXIO;
 
 	versatile_sys_24mhz = base + SYS_24MHZ;
 
 	sched_clock_register(versatile_sys_24mhz_read, 32, 24000000);
+
+	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(vexpress, "arm,vexpress-sysreg",
 		       versatile_sched_clock_init);
diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/vf_pit_timer.c
index a0e6c68536a1..55d8d8402d90 100644
--- a/drivers/clocksource/vf_pit_timer.c
+++ b/drivers/clocksource/vf_pit_timer.c
@@ -156,15 +156,18 @@ static int __init pit_clockevent_init(unsigned long rate, int irq)
 	return 0;
 }
 
-static void __init pit_timer_init(struct device_node *np)
+static int __init pit_timer_init(struct device_node *np)
 {
 	struct clk *pit_clk;
 	void __iomem *timer_base;
 	unsigned long clk_rate;
-	int irq;
+	int irq, ret;
 
 	timer_base = of_iomap(np, 0);
-	BUG_ON(!timer_base);
+	if (!timer_base) {
+		pr_err("Failed to iomap");
+		return -ENXIO;
+	}
 
 	/*
 	 * PIT0 and PIT1 can be chained to build a 64-bit timer,
@@ -175,12 +178,16 @@ static void __init pit_timer_init(struct device_node *np)
 	clkevt_base = timer_base + PITn_OFFSET(3);
 
 	irq = irq_of_parse_and_map(np, 0);
-	BUG_ON(irq <= 0);
+	if (irq <= 0)
+		return -EINVAL;
 
 	pit_clk = of_clk_get(np, 0);
-	BUG_ON(IS_ERR(pit_clk));
+	if (IS_ERR(pit_clk))
+		return PTR_ERR(pit_clk);
 
-	BUG_ON(clk_prepare_enable(pit_clk));
+	ret = clk_prepare_enable(pit_clk);
+	if (ret)
+		return ret;
 
 	clk_rate = clk_get_rate(pit_clk);
 	cycle_per_jiffy = clk_rate / (HZ);
@@ -188,8 +195,10 @@ static void __init pit_timer_init(struct device_node *np)
 	/* enable the pit module */
 	__raw_writel(~PITMCR_MDIS, timer_base + PITMCR);
 
-	BUG_ON(pit_clocksource_init(clk_rate));
+	ret = pit_clocksource_init(clk_rate);
+	if (ret)
+		return ret;
 
-	pit_clockevent_init(clk_rate, irq);
+	return pit_clockevent_init(clk_rate, irq);
 }
 CLOCKSOURCE_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init);
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
index ddb409274f45..b15069483fbd 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/vt8500_timer.c
@@ -121,38 +121,48 @@ static struct irqaction irq = {
 	.dev_id  = &clockevent,
 };
 
-static void __init vt8500_timer_init(struct device_node *np)
+static int __init vt8500_timer_init(struct device_node *np)
 {
-	int timer_irq;
+	int timer_irq, ret;
 
 	regbase = of_iomap(np, 0);
 	if (!regbase) {
 		pr_err("%s: Missing iobase description in Device Tree\n",
 								__func__);
-		return;
+		return -ENXIO;
 	}
+
 	timer_irq = irq_of_parse_and_map(np, 0);
 	if (!timer_irq) {
 		pr_err("%s: Missing irq description in Device Tree\n",
 								__func__);
-		return;
+		return -EINVAL;
 	}
 
 	writel(1, regbase + TIMER_CTRL_VAL);
 	writel(0xf, regbase + TIMER_STATUS_VAL);
 	writel(~0, regbase + TIMER_MATCH_VAL);
 
-	if (clocksource_register_hz(&clocksource, VT8500_TIMER_HZ))
+	ret = clocksource_register_hz(&clocksource, VT8500_TIMER_HZ);
+	if (ret) {
 		pr_err("%s: vt8500_timer_init: clocksource_register failed for %s\n",
-					__func__, clocksource.name);
+		       __func__, clocksource.name);
+		return ret;
+	}
 
 	clockevent.cpumask = cpumask_of(0);
 
-	if (setup_irq(timer_irq, &irq))
+	ret = setup_irq(timer_irq, &irq);
+	if (ret) {
 		pr_err("%s: setup_irq failed for %s\n", __func__,
 							clockevent.name);
+		return ret;
+	}
+
 	clockevents_config_and_register(&clockevent, VT8500_TIMER_HZ,
 					MIN_OSCR_DELTA * 2, 0xf0000000);
+
+	return 0;
 }
 
 CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init);
diff --git a/drivers/clocksource/zevio-timer.c b/drivers/clocksource/zevio-timer.c
index ceaa6133f9c2..9a53f5ef6157 100644
--- a/drivers/clocksource/zevio-timer.c
+++ b/drivers/clocksource/zevio-timer.c
@@ -210,9 +210,9 @@ error_free:
 	return ret;
 }
 
-static void __init zevio_timer_init(struct device_node *node)
+static int __init zevio_timer_init(struct device_node *node)
 {
-	BUG_ON(zevio_timer_add(node));
+	return zevio_timer_add(node);
 }
 
 CLOCKSOURCE_OF_DECLARE(zevio_timer, "lsi,zevio-timer", zevio_timer_init);
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 15d06fcf0b50..a782ce87715c 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -22,7 +22,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/ktime.h>
 #include <linux/init.h>
@@ -56,11 +55,21 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
 /* proc_event_counts is used as the sequence number of the netlink message */
 static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
 
-static inline void get_seq(__u32 *ts, int *cpu)
+static inline void send_msg(struct cn_msg *msg)
 {
 	preempt_disable();
-	*ts = __this_cpu_inc_return(proc_event_counts) - 1;
-	*cpu = smp_processor_id();
+
+	msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
+	((struct proc_event *)msg->data)->cpu = smp_processor_id();
+
+	/*
+	 * Preemption remains disabled during send to ensure the messages are
+	 * ordered according to their sequence numbers.
+	 *
+	 * If cn_netlink_send() fails, the data is not sent.
+	 */
+	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
+
 	preempt_enable();
 }
 
@@ -77,7 +86,6 @@ void proc_fork_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_FORK;
 	rcu_read_lock();
@@ -92,8 +100,7 @@ void proc_fork_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	/*  If cn_netlink_send() failed, the data is not sent */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_exec_connector(struct task_struct *task)
@@ -108,7 +115,6 @@ void proc_exec_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_EXEC;
 	ev->event_data.exec.process_pid = task->pid;
@@ -118,7 +124,7 @@ void proc_exec_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_id_connector(struct task_struct *task, int which_id)
@@ -150,14 +156,13 @@ void proc_id_connector(struct task_struct *task, int which_id)
 		return;
 	}
 	rcu_read_unlock();
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_sid_connector(struct task_struct *task)
@@ -172,7 +177,6 @@ void proc_sid_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_SID;
 	ev->event_data.sid.process_pid = task->pid;
@@ -182,7 +186,7 @@ void proc_sid_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
@@ -197,7 +201,6 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_PTRACE;
 	ev->event_data.ptrace.process_pid  = task->pid;
@@ -215,7 +218,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_comm_connector(struct task_struct *task)
@@ -230,7 +233,6 @@ void proc_comm_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_COMM;
 	ev->event_data.comm.process_pid  = task->pid;
@@ -241,7 +243,7 @@ void proc_comm_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_coredump_connector(struct task_struct *task)
@@ -256,7 +258,6 @@ void proc_coredump_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_COREDUMP;
 	ev->event_data.coredump.process_pid = task->pid;
@@ -266,7 +267,7 @@ void proc_coredump_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_exit_connector(struct task_struct *task)
@@ -281,7 +282,6 @@ void proc_exit_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_EXIT;
 	ev->event_data.exit.process_pid = task->pid;
@@ -293,7 +293,7 @@ void proc_exit_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 /*
@@ -325,7 +325,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
 	msg->ack = rcvd_ack + 1;
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 /**
@@ -389,5 +389,4 @@ static int __init cn_proc_init(void)
 	}
 	return 0;
 }
-
-module_init(cn_proc_init);
+device_initcall(cn_proc_init);
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index b7445b6ae5a4..c822d72629d5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -31,23 +31,18 @@ config CPU_FREQ_BOOST_SW
 	depends on THERMAL
 
 config CPU_FREQ_STAT
-	tristate "CPU frequency translation statistics"
+	bool "CPU frequency transition statistics"
 	default y
 	help
-	  This driver exports CPU frequency statistics information through sysfs
-	  file system.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called cpufreq_stats.
+	  Export CPU frequency statistics information through sysfs.
 
 	  If in doubt, say N.
 
 config CPU_FREQ_STAT_DETAILS
-	bool "CPU frequency translation statistics details"
+	bool "CPU frequency transition statistics details"
 	depends on CPU_FREQ_STAT
 	help
-	  This will show detail CPU frequency translation table in sysfs file
-	  system.
+	  Show detailed CPU frequency transition table in sysfs.
 
 	  If in doubt, say N.
 
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 32a15052f363..297e9128fe9f 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -468,20 +468,17 @@ unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
 	struct acpi_cpufreq_data *data = policy->driver_data;
 	struct acpi_processor_performance *perf;
 	struct cpufreq_frequency_table *entry;
-	unsigned int next_perf_state, next_freq, freq;
+	unsigned int next_perf_state, next_freq, index;
 
 	/*
 	 * Find the closest frequency above target_freq.
-	 *
-	 * The table is sorted in the reverse order with respect to the
-	 * frequency and all of the entries are valid (see the initialization).
 	 */
-	entry = policy->freq_table;
-	do {
-		entry++;
-		freq = entry->frequency;
-	} while (freq >= target_freq && freq != CPUFREQ_TABLE_END);
-	entry--;
+	if (policy->cached_target_freq == target_freq)
+		index = policy->cached_resolved_idx;
+	else
+		index = cpufreq_table_find_index_dl(policy, target_freq);
+
+	entry = &policy->freq_table[index];
 	next_freq = entry->frequency;
 	next_perf_state = entry->driver_data;
 
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index 404360cad25c..042023bbbf62 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -48,9 +48,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
 	struct dbs_data *od_data = policy_dbs->dbs_data;
 	struct od_dbs_tuners *od_tuners = od_data->tuners;
-	struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs);
 
-	if (!od_info->freq_table)
+	if (!policy->freq_table)
 		return freq_next;
 
 	rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_ACTUAL,
@@ -92,10 +91,9 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
 		else {
 			unsigned int index;
 
-			cpufreq_frequency_table_target(policy,
-				od_info->freq_table, policy->cur - 1,
-				CPUFREQ_RELATION_H, &index);
-			freq_next = od_info->freq_table[index].frequency;
+			index = cpufreq_table_find_index_h(policy,
+							   policy->cur - 1);
+			freq_next = policy->freq_table[index].frequency;
 		}
 
 		data->freq_prev = freq_next;
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 3646b143bbf5..0bb44d5b5df4 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -79,15 +79,16 @@ static const struct of_device_id machines[] __initconst = {
 static int __init cpufreq_dt_platdev_init(void)
 {
 	struct device_node *np = of_find_node_by_path("/");
+	const struct of_device_id *match;
 
 	if (!np)
 		return -ENODEV;
 
-	if (!of_match_node(machines, np))
+	match = of_match_node(machines, np);
+	of_node_put(np);
+	if (!match)
 		return -ENODEV;
 
-	of_node_put(of_root);
-
 	return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
 							       NULL, 0));
 }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 9009295f5134..3dd4884c6f9e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -74,19 +74,12 @@ static inline bool has_target(void)
 }
 
 /* internal prototypes */
-static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
+static int cpufreq_init_governor(struct cpufreq_policy *policy);
+static void cpufreq_exit_governor(struct cpufreq_policy *policy);
 static int cpufreq_start_governor(struct cpufreq_policy *policy);
-
-static inline void cpufreq_exit_governor(struct cpufreq_policy *policy)
-{
-	(void)cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
-}
-
-static inline void cpufreq_stop_governor(struct cpufreq_policy *policy)
-{
-	(void)cpufreq_governor(policy, CPUFREQ_GOV_STOP);
-}
+static void cpufreq_stop_governor(struct cpufreq_policy *policy);
+static void cpufreq_governor_limits(struct cpufreq_policy *policy);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -133,15 +126,6 @@ struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(get_governor_parent_kobj);
 
-struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu)
-{
-	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
-
-	return policy && !policy_is_inactive(policy) ?
-		policy->freq_table : NULL;
-}
-EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table);
-
 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
 	u64 idle_time;
@@ -354,6 +338,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
 		pr_debug("FREQ: %lu - CPU: %lu\n",
 			 (unsigned long)freqs->new, (unsigned long)freqs->cpu);
 		trace_cpu_frequency(freqs->new, freqs->cpu);
+		cpufreq_stats_record_transition(policy, freqs->new);
 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
 				CPUFREQ_POSTCHANGE, freqs);
 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
@@ -507,6 +492,38 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch);
 
+/**
+ * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported
+ * one.
+ * @target_freq: target frequency to resolve.
+ *
+ * The target to driver frequency mapping is cached in the policy.
+ *
+ * Return: Lowest driver-supported frequency greater than or equal to the
+ * given target_freq, subject to policy (min/max) and driver limitations.
+ */
+unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
+					 unsigned int target_freq)
+{
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
+	policy->cached_target_freq = target_freq;
+
+	if (cpufreq_driver->target_index) {
+		int idx;
+
+		idx = cpufreq_frequency_table_target(policy, target_freq,
+						     CPUFREQ_RELATION_L);
+		policy->cached_resolved_idx = idx;
+		return policy->freq_table[idx].frequency;
+	}
+
+	if (cpufreq_driver->resolve_freq)
+		return cpufreq_driver->resolve_freq(policy, target_freq);
+
+	return target_freq;
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
+
 /*********************************************************************
  *                          SYSFS INTERFACE                          *
  *********************************************************************/
@@ -1115,6 +1132,7 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify)
 					     CPUFREQ_REMOVE_POLICY, policy);
 
 	down_write(&policy->rwsem);
+	cpufreq_stats_free_table(policy);
 	cpufreq_remove_dev_symlink(policy);
 	kobj = &policy->kobj;
 	cmp = &policy->kobj_unregister;
@@ -1265,13 +1283,12 @@ static int cpufreq_online(unsigned int cpu)
 		}
 	}
 
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-				     CPUFREQ_START, policy);
-
 	if (new_policy) {
 		ret = cpufreq_add_dev_interface(policy);
 		if (ret)
 			goto out_exit_policy;
+
+		cpufreq_stats_create_table(policy);
 		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				CPUFREQ_CREATE_POLICY, policy);
 
@@ -1280,6 +1297,9 @@ static int cpufreq_online(unsigned int cpu)
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 	}
 
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+				     CPUFREQ_START, policy);
+
 	ret = cpufreq_init_policy(policy);
 	if (ret) {
 		pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
@@ -1556,9 +1576,6 @@ static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy)
 {
 	unsigned int new_freq;
 
-	if (cpufreq_suspended)
-		return 0;
-
 	new_freq = cpufreq_driver->get(policy->cpu);
 	if (!new_freq)
 		return 0;
@@ -1864,14 +1881,17 @@ static int __target_intermediate(struct cpufreq_policy *policy,
 	return ret;
 }
 
-static int __target_index(struct cpufreq_policy *policy,
-			  struct cpufreq_frequency_table *freq_table, int index)
+static int __target_index(struct cpufreq_policy *policy, int index)
 {
 	struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0};
 	unsigned int intermediate_freq = 0;
+	unsigned int newfreq = policy->freq_table[index].frequency;
 	int retval = -EINVAL;
 	bool notify;
 
+	if (newfreq == policy->cur)
+		return 0;
+
 	notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
 	if (notify) {
 		/* Handle switching to intermediate frequency */
@@ -1886,7 +1906,7 @@ static int __target_index(struct cpufreq_policy *policy,
 				freqs.old = freqs.new;
 		}
 
-		freqs.new = freq_table[index].frequency;
+		freqs.new = newfreq;
 		pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
 			 __func__, policy->cpu, freqs.old, freqs.new);
 
@@ -1923,17 +1943,13 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 			    unsigned int relation)
 {
 	unsigned int old_target_freq = target_freq;
-	struct cpufreq_frequency_table *freq_table;
-	int index, retval;
+	int index;
 
 	if (cpufreq_disabled())
 		return -ENODEV;
 
 	/* Make sure that target_freq is within supported range */
-	if (target_freq > policy->max)
-		target_freq = policy->max;
-	if (target_freq < policy->min)
-		target_freq = policy->min;
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
 
 	pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
 		 policy->cpu, target_freq, relation, old_target_freq);
@@ -1956,23 +1972,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 	if (!cpufreq_driver->target_index)
 		return -EINVAL;
 
-	freq_table = cpufreq_frequency_get_table(policy->cpu);
-	if (unlikely(!freq_table)) {
-		pr_err("%s: Unable to find freq_table\n", __func__);
-		return -EINVAL;
-	}
-
-	retval = cpufreq_frequency_table_target(policy, freq_table, target_freq,
-						relation, &index);
-	if (unlikely(retval)) {
-		pr_err("%s: Unable to find matching freq\n", __func__);
-		return retval;
-	}
-
-	if (freq_table[index].frequency == policy->cur)
-		return 0;
+	index = cpufreq_frequency_table_target(policy, target_freq, relation);
 
-	return __target_index(policy, freq_table, index);
+	return __target_index(policy, index);
 }
 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
 
@@ -1997,7 +1999,7 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void)
 	return NULL;
 }
 
-static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
+static int cpufreq_init_governor(struct cpufreq_policy *policy)
 {
 	int ret;
 
@@ -2025,36 +2027,82 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
 		}
 	}
 
-	if (event == CPUFREQ_GOV_POLICY_INIT)
-		if (!try_module_get(policy->governor->owner))
-			return -EINVAL;
-
-	pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event);
+	if (!try_module_get(policy->governor->owner))
+		return -EINVAL;
 
-	ret = policy->governor->governor(policy, event);
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
 
-	if (event == CPUFREQ_GOV_POLICY_INIT) {
-		if (ret)
+	if (policy->governor->init) {
+		ret = policy->governor->init(policy);
+		if (ret) {
 			module_put(policy->governor->owner);
-		else
-			policy->governor->initialized++;
-	} else if (event == CPUFREQ_GOV_POLICY_EXIT) {
-		policy->governor->initialized--;
-		module_put(policy->governor->owner);
+			return ret;
+		}
 	}
 
-	return ret;
+	return 0;
+}
+
+static void cpufreq_exit_governor(struct cpufreq_policy *policy)
+{
+	if (cpufreq_suspended || !policy->governor)
+		return;
+
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
+
+	if (policy->governor->exit)
+		policy->governor->exit(policy);
+
+	module_put(policy->governor->owner);
 }
 
 static int cpufreq_start_governor(struct cpufreq_policy *policy)
 {
 	int ret;
 
+	if (cpufreq_suspended)
+		return 0;
+
+	if (!policy->governor)
+		return -EINVAL;
+
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
+
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy)
 		cpufreq_update_current_freq(policy);
 
-	ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-	return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+	if (policy->governor->start) {
+		ret = policy->governor->start(policy);
+		if (ret)
+			return ret;
+	}
+
+	if (policy->governor->limits)
+		policy->governor->limits(policy);
+
+	return 0;
+}
+
+static void cpufreq_stop_governor(struct cpufreq_policy *policy)
+{
+	if (cpufreq_suspended || !policy->governor)
+		return;
+
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
+
+	if (policy->governor->stop)
+		policy->governor->stop(policy);
+}
+
+static void cpufreq_governor_limits(struct cpufreq_policy *policy)
+{
+	if (cpufreq_suspended || !policy->governor)
+		return;
+
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
+
+	if (policy->governor->limits)
+		policy->governor->limits(policy);
 }
 
 int cpufreq_register_governor(struct cpufreq_governor *governor)
@@ -2069,7 +2117,6 @@ int cpufreq_register_governor(struct cpufreq_governor *governor)
 
 	mutex_lock(&cpufreq_governor_mutex);
 
-	governor->initialized = 0;
 	err = -EBUSY;
 	if (!find_governor(governor->name)) {
 		err = 0;
@@ -2184,6 +2231,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	policy->min = new_policy->min;
 	policy->max = new_policy->max;
 
+	policy->cached_target_freq = UINT_MAX;
+
 	pr_debug("new min and max freqs are %u - %u kHz\n",
 		 policy->min, policy->max);
 
@@ -2195,7 +2244,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 
 	if (new_policy->governor == policy->governor) {
 		pr_debug("cpufreq: governor limits update\n");
-		return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+		cpufreq_governor_limits(policy);
+		return 0;
 	}
 
 	pr_debug("governor switch\n");
@@ -2210,7 +2260,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 
 	/* start new governor */
 	policy->governor = new_policy->governor;
-	ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
+	ret = cpufreq_init_governor(policy);
 	if (!ret) {
 		ret = cpufreq_start_governor(policy);
 		if (!ret) {
@@ -2224,7 +2274,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	pr_debug("starting governor %s failed\n", policy->governor->name);
 	if (old_gov) {
 		policy->governor = old_gov;
-		if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
+		if (cpufreq_init_governor(policy))
 			policy->governor = NULL;
 		else
 			cpufreq_start_governor(policy);
@@ -2261,6 +2311,10 @@ int cpufreq_update_policy(unsigned int cpu)
 	 * -> ask driver for current freq and notify governors about a change
 	 */
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
+		if (cpufreq_suspended) {
+			ret = -EAGAIN;
+			goto unlock;
+		}
 		new_policy.cur = cpufreq_update_current_freq(policy);
 		if (WARN_ON(!new_policy.cur)) {
 			ret = -EIO;
@@ -2305,26 +2359,25 @@ static struct notifier_block __refdata cpufreq_cpu_notifier = {
  *********************************************************************/
 static int cpufreq_boost_set_sw(int state)
 {
-	struct cpufreq_frequency_table *freq_table;
 	struct cpufreq_policy *policy;
 	int ret = -EINVAL;
 
 	for_each_active_policy(policy) {
-		freq_table = cpufreq_frequency_get_table(policy->cpu);
-		if (freq_table) {
-			ret = cpufreq_frequency_table_cpuinfo(policy,
-							freq_table);
-			if (ret) {
-				pr_err("%s: Policy frequency update failed\n",
-				       __func__);
-				break;
-			}
-
-			down_write(&policy->rwsem);
-			policy->user_policy.max = policy->max;
-			cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-			up_write(&policy->rwsem);
+		if (!policy->freq_table)
+			continue;
+
+		ret = cpufreq_frequency_table_cpuinfo(policy,
+						      policy->freq_table);
+		if (ret) {
+			pr_err("%s: Policy frequency update failed\n",
+			       __func__);
+			break;
 		}
+
+		down_write(&policy->rwsem);
+		policy->user_policy.max = policy->max;
+		cpufreq_governor_limits(policy);
+		up_write(&policy->rwsem);
 	}
 
 	return ret;
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 316df247e00d..18da4f8051d3 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -17,7 +17,6 @@
 struct cs_policy_dbs_info {
 	struct policy_dbs_info policy_dbs;
 	unsigned int down_skip;
-	unsigned int requested_freq;
 };
 
 static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs)
@@ -75,19 +74,17 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
 
 	/* Check for frequency increase */
 	if (load > dbs_data->up_threshold) {
+		unsigned int requested_freq = policy->cur;
+
 		dbs_info->down_skip = 0;
 
 		/* if we are already at full speed then break out early */
-		if (dbs_info->requested_freq == policy->max)
+		if (requested_freq == policy->max)
 			goto out;
 
-		dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
-
-		if (dbs_info->requested_freq > policy->max)
-			dbs_info->requested_freq = policy->max;
+		requested_freq += get_freq_target(cs_tuners, policy);
 
-		__cpufreq_driver_target(policy, dbs_info->requested_freq,
-			CPUFREQ_RELATION_H);
+		__cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_H);
 		goto out;
 	}
 
@@ -98,36 +95,27 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
 
 	/* Check for frequency decrease */
 	if (load < cs_tuners->down_threshold) {
-		unsigned int freq_target;
+		unsigned int freq_target, requested_freq = policy->cur;
 		/*
 		 * if we cannot reduce the frequency anymore, break out early
 		 */
-		if (policy->cur == policy->min)
+		if (requested_freq == policy->min)
 			goto out;
 
 		freq_target = get_freq_target(cs_tuners, policy);
-		if (dbs_info->requested_freq > freq_target)
-			dbs_info->requested_freq -= freq_target;
+		if (requested_freq > freq_target)
+			requested_freq -= freq_target;
 		else
-			dbs_info->requested_freq = policy->min;
+			requested_freq = policy->min;
 
-		__cpufreq_driver_target(policy, dbs_info->requested_freq,
-				CPUFREQ_RELATION_L);
+		__cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_L);
 	}
 
  out:
 	return dbs_data->sampling_rate;
 }
 
-static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-				void *data);
-
-static struct notifier_block cs_cpufreq_notifier_block = {
-	.notifier_call = dbs_cpufreq_notifier,
-};
-
 /************************** sysfs interface ************************/
-static struct dbs_governor cs_dbs_gov;
 
 static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
 					  const char *buf, size_t count)
@@ -268,15 +256,13 @@ static void cs_free(struct policy_dbs_info *policy_dbs)
 	kfree(to_dbs_info(policy_dbs));
 }
 
-static int cs_init(struct dbs_data *dbs_data, bool notify)
+static int cs_init(struct dbs_data *dbs_data)
 {
 	struct cs_dbs_tuners *tuners;
 
 	tuners = kzalloc(sizeof(*tuners), GFP_KERNEL);
-	if (!tuners) {
-		pr_err("%s: kzalloc failed\n", __func__);
+	if (!tuners)
 		return -ENOMEM;
-	}
 
 	tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD;
 	tuners->freq_step = DEF_FREQUENCY_STEP;
@@ -288,19 +274,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify)
 	dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
 		jiffies_to_usecs(10);
 
-	if (notify)
-		cpufreq_register_notifier(&cs_cpufreq_notifier_block,
-					  CPUFREQ_TRANSITION_NOTIFIER);
-
 	return 0;
 }
 
-static void cs_exit(struct dbs_data *dbs_data, bool notify)
+static void cs_exit(struct dbs_data *dbs_data)
 {
-	if (notify)
-		cpufreq_unregister_notifier(&cs_cpufreq_notifier_block,
-					    CPUFREQ_TRANSITION_NOTIFIER);
-
 	kfree(dbs_data->tuners);
 }
 
@@ -309,16 +287,10 @@ static void cs_start(struct cpufreq_policy *policy)
 	struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
 
 	dbs_info->down_skip = 0;
-	dbs_info->requested_freq = policy->cur;
 }
 
-static struct dbs_governor cs_dbs_gov = {
-	.gov = {
-		.name = "conservative",
-		.governor = cpufreq_governor_dbs,
-		.max_transition_latency = TRANSITION_LATENCY_LIMIT,
-		.owner = THIS_MODULE,
-	},
+static struct dbs_governor cs_governor = {
+	.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"),
 	.kobj_type = { .default_attrs = cs_attributes },
 	.gov_dbs_timer = cs_dbs_timer,
 	.alloc = cs_alloc,
@@ -328,33 +300,7 @@ static struct dbs_governor cs_dbs_gov = {
 	.start = cs_start,
 };
 
-#define CPU_FREQ_GOV_CONSERVATIVE	(&cs_dbs_gov.gov)
-
-static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-				void *data)
-{
-	struct cpufreq_freqs *freq = data;
-	struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu);
-	struct cs_policy_dbs_info *dbs_info;
-
-	if (!policy)
-		return 0;
-
-	/* policy isn't governed by conservative governor */
-	if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE)
-		return 0;
-
-	dbs_info = to_dbs_info(policy->governor_data);
-	/*
-	 * we only care if our internally tracked freq moves outside the 'valid'
-	 * ranges of frequency available to us otherwise we do not change it
-	*/
-	if (dbs_info->requested_freq > policy->max
-			|| dbs_info->requested_freq < policy->min)
-		dbs_info->requested_freq = freq->new;
-
-	return 0;
-}
+#define CPU_FREQ_GOV_CONSERVATIVE	(&cs_governor.gov)
 
 static int __init cpufreq_gov_dbs_init(void)
 {
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index be498d56dd69..e415349ab31b 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -336,17 +336,6 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy)
 	synchronize_sched();
 }
 
-static void gov_cancel_work(struct cpufreq_policy *policy)
-{
-	struct policy_dbs_info *policy_dbs = policy->governor_data;
-
-	gov_clear_update_util(policy_dbs->policy);
-	irq_work_sync(&policy_dbs->irq_work);
-	cancel_work_sync(&policy_dbs->work);
-	atomic_set(&policy_dbs->work_count, 0);
-	policy_dbs->work_in_progress = false;
-}
-
 static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
 						     struct dbs_governor *gov)
 {
@@ -389,7 +378,7 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
 	gov->free(policy_dbs);
 }
 
-static int cpufreq_governor_init(struct cpufreq_policy *policy)
+int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
 {
 	struct dbs_governor *gov = dbs_governor_of(policy);
 	struct dbs_data *dbs_data;
@@ -429,7 +418,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
 
 	gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
 
-	ret = gov->init(dbs_data, !policy->governor->initialized);
+	ret = gov->init(dbs_data);
 	if (ret)
 		goto free_policy_dbs_info;
 
@@ -458,13 +447,13 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
 		goto out;
 
 	/* Failure, so roll back. */
-	pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret);
+	pr_err("initialization failed (dbs_data kobject init error %d)\n", ret);
 
 	policy->governor_data = NULL;
 
 	if (!have_governor_per_policy())
 		gov->gdbs_data = NULL;
-	gov->exit(dbs_data, !policy->governor->initialized);
+	gov->exit(dbs_data);
 	kfree(dbs_data);
 
 free_policy_dbs_info:
@@ -474,8 +463,9 @@ out:
 	mutex_unlock(&gov_dbs_data_mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_init);
 
-static int cpufreq_governor_exit(struct cpufreq_policy *policy)
+void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy)
 {
 	struct dbs_governor *gov = dbs_governor_of(policy);
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
@@ -493,17 +483,17 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy)
 		if (!have_governor_per_policy())
 			gov->gdbs_data = NULL;
 
-		gov->exit(dbs_data, policy->governor->initialized == 1);
+		gov->exit(dbs_data);
 		kfree(dbs_data);
 	}
 
 	free_policy_dbs_info(policy_dbs, gov);
 
 	mutex_unlock(&gov_dbs_data_mutex);
-	return 0;
 }
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_exit);
 
-static int cpufreq_governor_start(struct cpufreq_policy *policy)
+int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
 {
 	struct dbs_governor *gov = dbs_governor_of(policy);
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
@@ -539,47 +529,28 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy)
 	gov_set_update_util(policy_dbs, sampling_rate);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_start);
 
-static int cpufreq_governor_stop(struct cpufreq_policy *policy)
+void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy)
 {
-	gov_cancel_work(policy);
-	return 0;
+	struct policy_dbs_info *policy_dbs = policy->governor_data;
+
+	gov_clear_update_util(policy_dbs->policy);
+	irq_work_sync(&policy_dbs->irq_work);
+	cancel_work_sync(&policy_dbs->work);
+	atomic_set(&policy_dbs->work_count, 0);
+	policy_dbs->work_in_progress = false;
 }
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop);
 
-static int cpufreq_governor_limits(struct cpufreq_policy *policy)
+void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy)
 {
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
 
 	mutex_lock(&policy_dbs->timer_mutex);
-
-	if (policy->max < policy->cur)
-		__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
-	else if (policy->min > policy->cur)
-		__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
-
+	cpufreq_policy_apply_limits(policy);
 	gov_update_sample_delay(policy_dbs, 0);
 
 	mutex_unlock(&policy_dbs->timer_mutex);
-
-	return 0;
-}
-
-int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
-{
-	if (event == CPUFREQ_GOV_POLICY_INIT) {
-		return cpufreq_governor_init(policy);
-	} else if (policy->governor_data) {
-		switch (event) {
-		case CPUFREQ_GOV_POLICY_EXIT:
-			return cpufreq_governor_exit(policy);
-		case CPUFREQ_GOV_START:
-			return cpufreq_governor_start(policy);
-		case CPUFREQ_GOV_STOP:
-			return cpufreq_governor_stop(policy);
-		case CPUFREQ_GOV_LIMITS:
-			return cpufreq_governor_limits(policy);
-		}
-	}
-	return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 34eb214b6d57..ef1037e9c92b 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -138,8 +138,8 @@ struct dbs_governor {
 	unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy);
 	struct policy_dbs_info *(*alloc)(void);
 	void (*free)(struct policy_dbs_info *policy_dbs);
-	int (*init)(struct dbs_data *dbs_data, bool notify);
-	void (*exit)(struct dbs_data *dbs_data, bool notify);
+	int (*init)(struct dbs_data *dbs_data);
+	void (*exit)(struct dbs_data *dbs_data);
 	void (*start)(struct cpufreq_policy *policy);
 };
 
@@ -148,6 +148,25 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy
 	return container_of(policy->governor, struct dbs_governor, gov);
 }
 
+/* Governor callback routines */
+int cpufreq_dbs_governor_init(struct cpufreq_policy *policy);
+void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy);
+int cpufreq_dbs_governor_start(struct cpufreq_policy *policy);
+void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy);
+void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy);
+
+#define CPUFREQ_DBS_GOVERNOR_INITIALIZER(_name_)			\
+	{								\
+		.name = _name_,						\
+		.max_transition_latency	= TRANSITION_LATENCY_LIMIT,	\
+		.owner = THIS_MODULE,					\
+		.init = cpufreq_dbs_governor_init,			\
+		.exit = cpufreq_dbs_governor_exit,			\
+		.start = cpufreq_dbs_governor_start,			\
+		.stop = cpufreq_dbs_governor_stop,			\
+		.limits = cpufreq_dbs_governor_limits,			\
+	}
+
 /* Governor specific operations */
 struct od_ops {
 	unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy,
@@ -155,7 +174,6 @@ struct od_ops {
 };
 
 unsigned int dbs_update(struct cpufreq_policy *policy);
-int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
 void od_register_powersave_bias_handler(unsigned int (*f)
 		(struct cpufreq_policy *, unsigned int, unsigned int),
 		unsigned int powersave_bias);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 300163430516..3a1f49f5f4c6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -65,34 +65,30 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
 {
 	unsigned int freq_req, freq_reduc, freq_avg;
 	unsigned int freq_hi, freq_lo;
-	unsigned int index = 0;
+	unsigned int index;
 	unsigned int delay_hi_us;
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
 	struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
 	struct dbs_data *dbs_data = policy_dbs->dbs_data;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+	struct cpufreq_frequency_table *freq_table = policy->freq_table;
 
-	if (!dbs_info->freq_table) {
+	if (!freq_table) {
 		dbs_info->freq_lo = 0;
 		dbs_info->freq_lo_delay_us = 0;
 		return freq_next;
 	}
 
-	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
-			relation, &index);
-	freq_req = dbs_info->freq_table[index].frequency;
+	index = cpufreq_frequency_table_target(policy, freq_next, relation);
+	freq_req = freq_table[index].frequency;
 	freq_reduc = freq_req * od_tuners->powersave_bias / 1000;
 	freq_avg = freq_req - freq_reduc;
 
 	/* Find freq bounds for freq_avg in freq_table */
-	index = 0;
-	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
-			CPUFREQ_RELATION_H, &index);
-	freq_lo = dbs_info->freq_table[index].frequency;
-	index = 0;
-	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
-			CPUFREQ_RELATION_L, &index);
-	freq_hi = dbs_info->freq_table[index].frequency;
+	index = cpufreq_table_find_index_h(policy, freq_avg);
+	freq_lo = freq_table[index].frequency;
+	index = cpufreq_table_find_index_l(policy, freq_avg);
+	freq_hi = freq_table[index].frequency;
 
 	/* Find out how long we have to be in hi and lo freqs */
 	if (freq_hi == freq_lo) {
@@ -113,7 +109,6 @@ static void ondemand_powersave_bias_init(struct cpufreq_policy *policy)
 {
 	struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
 
-	dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu);
 	dbs_info->freq_lo = 0;
 }
 
@@ -361,17 +356,15 @@ static void od_free(struct policy_dbs_info *policy_dbs)
 	kfree(to_dbs_info(policy_dbs));
 }
 
-static int od_init(struct dbs_data *dbs_data, bool notify)
+static int od_init(struct dbs_data *dbs_data)
 {
 	struct od_dbs_tuners *tuners;
 	u64 idle_time;
 	int cpu;
 
 	tuners = kzalloc(sizeof(*tuners), GFP_KERNEL);
-	if (!tuners) {
-		pr_err("%s: kzalloc failed\n", __func__);
+	if (!tuners)
 		return -ENOMEM;
-	}
 
 	cpu = get_cpu();
 	idle_time = get_cpu_idle_time_us(cpu, NULL);
@@ -402,7 +395,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify)
 	return 0;
 }
 
-static void od_exit(struct dbs_data *dbs_data, bool notify)
+static void od_exit(struct dbs_data *dbs_data)
 {
 	kfree(dbs_data->tuners);
 }
@@ -420,12 +413,7 @@ static struct od_ops od_ops = {
 };
 
 static struct dbs_governor od_dbs_gov = {
-	.gov = {
-		.name = "ondemand",
-		.governor = cpufreq_governor_dbs,
-		.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
-		.owner = THIS_MODULE,
-	},
+	.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"),
 	.kobj_type = { .default_attrs = od_attributes },
 	.gov_dbs_timer = od_dbs_timer,
 	.alloc = od_alloc,
diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h
index f0121db3cd9e..640ea4e97106 100644
--- a/drivers/cpufreq/cpufreq_ondemand.h
+++ b/drivers/cpufreq/cpufreq_ondemand.h
@@ -13,7 +13,6 @@
 
 struct od_policy_dbs_info {
 	struct policy_dbs_info policy_dbs;
-	struct cpufreq_frequency_table *freq_table;
 	unsigned int freq_lo;
 	unsigned int freq_lo_delay_us;
 	unsigned int freq_hi_delay_us;
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index af9f4b96f5a8..dafb679adc58 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -16,27 +16,16 @@
 #include <linux/init.h>
 #include <linux/module.h>
 
-static int cpufreq_governor_performance(struct cpufreq_policy *policy,
-					unsigned int event)
+static void cpufreq_gov_performance_limits(struct cpufreq_policy *policy)
 {
-	switch (event) {
-	case CPUFREQ_GOV_START:
-	case CPUFREQ_GOV_LIMITS:
-		pr_debug("setting to %u kHz because of event %u\n",
-						policy->max, event);
-		__cpufreq_driver_target(policy, policy->max,
-						CPUFREQ_RELATION_H);
-		break;
-	default:
-		break;
-	}
-	return 0;
+	pr_debug("setting to %u kHz\n", policy->max);
+	__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
 }
 
 static struct cpufreq_governor cpufreq_gov_performance = {
 	.name		= "performance",
-	.governor	= cpufreq_governor_performance,
 	.owner		= THIS_MODULE,
+	.limits		= cpufreq_gov_performance_limits,
 };
 
 static int __init cpufreq_gov_performance_init(void)
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index b8b400232a74..78a651038faf 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -16,26 +16,15 @@
 #include <linux/init.h>
 #include <linux/module.h>
 
-static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
-					unsigned int event)
+static void cpufreq_gov_powersave_limits(struct cpufreq_policy *policy)
 {
-	switch (event) {
-	case CPUFREQ_GOV_START:
-	case CPUFREQ_GOV_LIMITS:
-		pr_debug("setting to %u kHz because of event %u\n",
-							policy->min, event);
-		__cpufreq_driver_target(policy, policy->min,
-						CPUFREQ_RELATION_L);
-		break;
-	default:
-		break;
-	}
-	return 0;
+	pr_debug("setting to %u kHz\n", policy->min);
+	__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
 }
 
 static struct cpufreq_governor cpufreq_gov_powersave = {
 	.name		= "powersave",
-	.governor	= cpufreq_governor_powersave,
+	.limits		= cpufreq_gov_powersave_limits,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 5e370a30a964..06d3abdffd3a 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -15,7 +15,7 @@
 #include <linux/slab.h>
 #include <linux/cputime.h>
 
-static spinlock_t cpufreq_stats_lock;
+static DEFINE_SPINLOCK(cpufreq_stats_lock);
 
 struct cpufreq_stats {
 	unsigned int total_trans;
@@ -52,6 +52,9 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
 	ssize_t len = 0;
 	int i;
 
+	if (policy->fast_switch_enabled)
+		return 0;
+
 	cpufreq_stats_update(stats);
 	for (i = 0; i < stats->state_num; i++) {
 		len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i],
@@ -68,6 +71,9 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 	ssize_t len = 0;
 	int i, j;
 
+	if (policy->fast_switch_enabled)
+		return 0;
+
 	len += snprintf(buf + len, PAGE_SIZE - len, "   From  :    To\n");
 	len += snprintf(buf + len, PAGE_SIZE - len, "         : ");
 	for (i = 0; i < stats->state_num; i++) {
@@ -130,7 +136,7 @@ static int freq_table_get_index(struct cpufreq_stats *stats, unsigned int freq)
 	return -1;
 }
 
-static void __cpufreq_stats_free_table(struct cpufreq_policy *policy)
+void cpufreq_stats_free_table(struct cpufreq_policy *policy)
 {
 	struct cpufreq_stats *stats = policy->stats;
 
@@ -146,39 +152,25 @@ static void __cpufreq_stats_free_table(struct cpufreq_policy *policy)
 	policy->stats = NULL;
 }
 
-static void cpufreq_stats_free_table(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-
-	policy = cpufreq_cpu_get(cpu);
-	if (!policy)
-		return;
-
-	__cpufreq_stats_free_table(policy);
-
-	cpufreq_cpu_put(policy);
-}
-
-static int __cpufreq_stats_create_table(struct cpufreq_policy *policy)
+void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 {
 	unsigned int i = 0, count = 0, ret = -ENOMEM;
 	struct cpufreq_stats *stats;
 	unsigned int alloc_size;
-	unsigned int cpu = policy->cpu;
 	struct cpufreq_frequency_table *pos, *table;
 
 	/* We need cpufreq table for creating stats table */
-	table = cpufreq_frequency_get_table(cpu);
+	table = policy->freq_table;
 	if (unlikely(!table))
-		return 0;
+		return;
 
 	/* stats already initialized */
 	if (policy->stats)
-		return -EEXIST;
+		return;
 
 	stats = kzalloc(sizeof(*stats), GFP_KERNEL);
 	if (!stats)
-		return -ENOMEM;
+		return;
 
 	/* Find total allocation size */
 	cpufreq_for_each_valid_entry(pos, table)
@@ -215,80 +207,32 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	policy->stats = stats;
 	ret = sysfs_create_group(&policy->kobj, &stats_attr_group);
 	if (!ret)
-		return 0;
+		return;
 
 	/* We failed, release resources */
 	policy->stats = NULL;
 	kfree(stats->time_in_state);
 free_stat:
 	kfree(stats);
-
-	return ret;
-}
-
-static void cpufreq_stats_create_table(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-
-	/*
-	 * "likely(!policy)" because normally cpufreq_stats will be registered
-	 * before cpufreq driver
-	 */
-	policy = cpufreq_cpu_get(cpu);
-	if (likely(!policy))
-		return;
-
-	__cpufreq_stats_create_table(policy);
-
-	cpufreq_cpu_put(policy);
 }
 
-static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
-		unsigned long val, void *data)
+void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
+				     unsigned int new_freq)
 {
-	int ret = 0;
-	struct cpufreq_policy *policy = data;
-
-	if (val == CPUFREQ_CREATE_POLICY)
-		ret = __cpufreq_stats_create_table(policy);
-	else if (val == CPUFREQ_REMOVE_POLICY)
-		__cpufreq_stats_free_table(policy);
-
-	return ret;
-}
-
-static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
-		unsigned long val, void *data)
-{
-	struct cpufreq_freqs *freq = data;
-	struct cpufreq_policy *policy = cpufreq_cpu_get(freq->cpu);
-	struct cpufreq_stats *stats;
+	struct cpufreq_stats *stats = policy->stats;
 	int old_index, new_index;
 
-	if (!policy) {
-		pr_err("%s: No policy found\n", __func__);
-		return 0;
-	}
-
-	if (val != CPUFREQ_POSTCHANGE)
-		goto put_policy;
-
-	if (!policy->stats) {
+	if (!stats) {
 		pr_debug("%s: No stats found\n", __func__);
-		goto put_policy;
+		return;
 	}
 
-	stats = policy->stats;
-
 	old_index = stats->last_index;
-	new_index = freq_table_get_index(stats, freq->new);
+	new_index = freq_table_get_index(stats, new_freq);
 
 	/* We can't do stats->time_in_state[-1]= .. */
-	if (old_index == -1 || new_index == -1)
-		goto put_policy;
-
-	if (old_index == new_index)
-		goto put_policy;
+	if (old_index == -1 || new_index == -1 || old_index == new_index)
+		return;
 
 	cpufreq_stats_update(stats);
 
@@ -297,61 +241,4 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
 	stats->trans_table[old_index * stats->max_state + new_index]++;
 #endif
 	stats->total_trans++;
-
-put_policy:
-	cpufreq_cpu_put(policy);
-	return 0;
 }
-
-static struct notifier_block notifier_policy_block = {
-	.notifier_call = cpufreq_stat_notifier_policy
-};
-
-static struct notifier_block notifier_trans_block = {
-	.notifier_call = cpufreq_stat_notifier_trans
-};
-
-static int __init cpufreq_stats_init(void)
-{
-	int ret;
-	unsigned int cpu;
-
-	spin_lock_init(&cpufreq_stats_lock);
-	ret = cpufreq_register_notifier(&notifier_policy_block,
-				CPUFREQ_POLICY_NOTIFIER);
-	if (ret)
-		return ret;
-
-	for_each_online_cpu(cpu)
-		cpufreq_stats_create_table(cpu);
-
-	ret = cpufreq_register_notifier(&notifier_trans_block,
-				CPUFREQ_TRANSITION_NOTIFIER);
-	if (ret) {
-		cpufreq_unregister_notifier(&notifier_policy_block,
-				CPUFREQ_POLICY_NOTIFIER);
-		for_each_online_cpu(cpu)
-			cpufreq_stats_free_table(cpu);
-		return ret;
-	}
-
-	return 0;
-}
-static void __exit cpufreq_stats_exit(void)
-{
-	unsigned int cpu;
-
-	cpufreq_unregister_notifier(&notifier_policy_block,
-			CPUFREQ_POLICY_NOTIFIER);
-	cpufreq_unregister_notifier(&notifier_trans_block,
-			CPUFREQ_TRANSITION_NOTIFIER);
-	for_each_online_cpu(cpu)
-		cpufreq_stats_free_table(cpu);
-}
-
-MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
-MODULE_DESCRIPTION("Export cpufreq stats via sysfs");
-MODULE_LICENSE("GPL");
-
-module_init(cpufreq_stats_init);
-module_exit(cpufreq_stats_exit);
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 9f3dec9a3f36..bd897e3e134d 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -65,66 +65,66 @@ static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
-				   unsigned int event)
+static void cpufreq_userspace_policy_exit(struct cpufreq_policy *policy)
+{
+	mutex_lock(&userspace_mutex);
+	kfree(policy->governor_data);
+	policy->governor_data = NULL;
+	mutex_unlock(&userspace_mutex);
+}
+
+static int cpufreq_userspace_policy_start(struct cpufreq_policy *policy)
 {
 	unsigned int *setspeed = policy->governor_data;
-	unsigned int cpu = policy->cpu;
-	int rc = 0;
 
-	if (event == CPUFREQ_GOV_POLICY_INIT)
-		return cpufreq_userspace_policy_init(policy);
+	BUG_ON(!policy->cur);
+	pr_debug("started managing cpu %u\n", policy->cpu);
 
-	if (!setspeed)
-		return -EINVAL;
-
-	switch (event) {
-	case CPUFREQ_GOV_POLICY_EXIT:
-		mutex_lock(&userspace_mutex);
-		policy->governor_data = NULL;
-		kfree(setspeed);
-		mutex_unlock(&userspace_mutex);
-		break;
-	case CPUFREQ_GOV_START:
-		BUG_ON(!policy->cur);
-		pr_debug("started managing cpu %u\n", cpu);
-
-		mutex_lock(&userspace_mutex);
-		per_cpu(cpu_is_managed, cpu) = 1;
-		*setspeed = policy->cur;
-		mutex_unlock(&userspace_mutex);
-		break;
-	case CPUFREQ_GOV_STOP:
-		pr_debug("managing cpu %u stopped\n", cpu);
-
-		mutex_lock(&userspace_mutex);
-		per_cpu(cpu_is_managed, cpu) = 0;
-		*setspeed = 0;
-		mutex_unlock(&userspace_mutex);
-		break;
-	case CPUFREQ_GOV_LIMITS:
-		mutex_lock(&userspace_mutex);
-		pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n",
-			cpu, policy->min, policy->max, policy->cur, *setspeed);
-
-		if (policy->max < *setspeed)
-			__cpufreq_driver_target(policy, policy->max,
-						CPUFREQ_RELATION_H);
-		else if (policy->min > *setspeed)
-			__cpufreq_driver_target(policy, policy->min,
-						CPUFREQ_RELATION_L);
-		else
-			__cpufreq_driver_target(policy, *setspeed,
-						CPUFREQ_RELATION_L);
-		mutex_unlock(&userspace_mutex);
-		break;
-	}
-	return rc;
+	mutex_lock(&userspace_mutex);
+	per_cpu(cpu_is_managed, policy->cpu) = 1;
+	*setspeed = policy->cur;
+	mutex_unlock(&userspace_mutex);
+	return 0;
+}
+
+static void cpufreq_userspace_policy_stop(struct cpufreq_policy *policy)
+{
+	unsigned int *setspeed = policy->governor_data;
+
+	pr_debug("managing cpu %u stopped\n", policy->cpu);
+
+	mutex_lock(&userspace_mutex);
+	per_cpu(cpu_is_managed, policy->cpu) = 0;
+	*setspeed = 0;
+	mutex_unlock(&userspace_mutex);
+}
+
+static void cpufreq_userspace_policy_limits(struct cpufreq_policy *policy)
+{
+	unsigned int *setspeed = policy->governor_data;
+
+	mutex_lock(&userspace_mutex);
+
+	pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n",
+		 policy->cpu, policy->min, policy->max, policy->cur, *setspeed);
+
+	if (policy->max < *setspeed)
+		__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+	else if (policy->min > *setspeed)
+		__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
+	else
+		__cpufreq_driver_target(policy, *setspeed, CPUFREQ_RELATION_L);
+
+	mutex_unlock(&userspace_mutex);
 }
 
 static struct cpufreq_governor cpufreq_gov_userspace = {
 	.name		= "userspace",
-	.governor	= cpufreq_governor_userspace,
+	.init		= cpufreq_userspace_policy_init,
+	.exit		= cpufreq_userspace_policy_exit,
+	.start		= cpufreq_userspace_policy_start,
+	.stop		= cpufreq_userspace_policy_stop,
+	.limits		= cpufreq_userspace_policy_limits,
 	.store_setspeed	= cpufreq_set,
 	.show_setspeed	= show_speed,
 	.owner		= THIS_MODULE,
diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index 7e336d20c184..b95a872800ec 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -38,26 +38,6 @@ struct davinci_cpufreq {
 };
 static struct davinci_cpufreq cpufreq;
 
-static int davinci_verify_speed(struct cpufreq_policy *policy)
-{
-	struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data;
-	struct cpufreq_frequency_table *freq_table = pdata->freq_table;
-	struct clk *armclk = cpufreq.armclk;
-
-	if (freq_table)
-		return cpufreq_frequency_table_verify(policy, freq_table);
-
-	if (policy->cpu)
-		return -EINVAL;
-
-	cpufreq_verify_within_cpu_limits(policy);
-	policy->min = clk_round_rate(armclk, policy->min * 1000) / 1000;
-	policy->max = clk_round_rate(armclk, policy->max * 1000) / 1000;
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-						policy->cpuinfo.max_freq);
-	return 0;
-}
-
 static int davinci_target(struct cpufreq_policy *policy, unsigned int idx)
 {
 	struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data;
@@ -121,7 +101,7 @@ static int davinci_cpu_init(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver davinci_driver = {
 	.flags		= CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
-	.verify		= davinci_verify_speed,
+	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= davinci_target,
 	.get		= cpufreq_generic_get,
 	.init		= davinci_cpu_init,
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index a8f1daffc9bc..3bbbf9e6960c 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -63,8 +63,6 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	else
 		return 0;
 }
-EXPORT_SYMBOL_GPL(cpufreq_frequency_table_cpuinfo);
-
 
 int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 				   struct cpufreq_frequency_table *table)
@@ -108,20 +106,16 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify);
  */
 int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy)
 {
-	struct cpufreq_frequency_table *table =
-		cpufreq_frequency_get_table(policy->cpu);
-	if (!table)
+	if (!policy->freq_table)
 		return -ENODEV;
 
-	return cpufreq_frequency_table_verify(policy, table);
+	return cpufreq_frequency_table_verify(policy, policy->freq_table);
 }
 EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify);
 
-int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-				   struct cpufreq_frequency_table *table,
-				   unsigned int target_freq,
-				   unsigned int relation,
-				   unsigned int *index)
+int cpufreq_table_index_unsorted(struct cpufreq_policy *policy,
+				 unsigned int target_freq,
+				 unsigned int relation)
 {
 	struct cpufreq_frequency_table optimal = {
 		.driver_data = ~0,
@@ -132,7 +126,9 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		.frequency = 0,
 	};
 	struct cpufreq_frequency_table *pos;
+	struct cpufreq_frequency_table *table = policy->freq_table;
 	unsigned int freq, diff, i = 0;
+	int index;
 
 	pr_debug("request for target %u kHz (relation: %u) for cpu %u\n",
 					target_freq, relation, policy->cpu);
@@ -196,25 +192,26 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		}
 	}
 	if (optimal.driver_data > i) {
-		if (suboptimal.driver_data > i)
-			return -EINVAL;
-		*index = suboptimal.driver_data;
-	} else
-		*index = optimal.driver_data;
+		if (suboptimal.driver_data > i) {
+			WARN(1, "Invalid frequency table: %d\n", policy->cpu);
+			return 0;
+		}
 
-	pr_debug("target index is %u, freq is:%u kHz\n", *index,
-		 table[*index].frequency);
+		index = suboptimal.driver_data;
+	} else
+		index = optimal.driver_data;
 
-	return 0;
+	pr_debug("target index is %u, freq is:%u kHz\n", index,
+		 table[index].frequency);
+	return index;
 }
-EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target);
+EXPORT_SYMBOL_GPL(cpufreq_table_index_unsorted);
 
 int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy,
 		unsigned int freq)
 {
-	struct cpufreq_frequency_table *pos, *table;
+	struct cpufreq_frequency_table *pos, *table = policy->freq_table;
 
-	table = cpufreq_frequency_get_table(policy->cpu);
 	if (unlikely(!table)) {
 		pr_debug("%s: Unable to find frequency table\n", __func__);
 		return -ENOENT;
@@ -300,15 +297,72 @@ struct freq_attr *cpufreq_generic_attr[] = {
 };
 EXPORT_SYMBOL_GPL(cpufreq_generic_attr);
 
+static int set_freq_table_sorted(struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *pos, *table = policy->freq_table;
+	struct cpufreq_frequency_table *prev = NULL;
+	int ascending = 0;
+
+	policy->freq_table_sorted = CPUFREQ_TABLE_UNSORTED;
+
+	cpufreq_for_each_valid_entry(pos, table) {
+		if (!prev) {
+			prev = pos;
+			continue;
+		}
+
+		if (pos->frequency == prev->frequency) {
+			pr_warn("Duplicate freq-table entries: %u\n",
+				pos->frequency);
+			return -EINVAL;
+		}
+
+		/* Frequency increased from prev to pos */
+		if (pos->frequency > prev->frequency) {
+			/* But frequency was decreasing earlier */
+			if (ascending < 0) {
+				pr_debug("Freq table is unsorted\n");
+				return 0;
+			}
+
+			ascending++;
+		} else {
+			/* Frequency decreased from prev to pos */
+
+			/* But frequency was increasing earlier */
+			if (ascending > 0) {
+				pr_debug("Freq table is unsorted\n");
+				return 0;
+			}
+
+			ascending--;
+		}
+
+		prev = pos;
+	}
+
+	if (ascending > 0)
+		policy->freq_table_sorted = CPUFREQ_TABLE_SORTED_ASCENDING;
+	else
+		policy->freq_table_sorted = CPUFREQ_TABLE_SORTED_DESCENDING;
+
+	pr_debug("Freq table is sorted in %s order\n",
+		 ascending > 0 ? "ascending" : "descending");
+
+	return 0;
+}
+
 int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
 				      struct cpufreq_frequency_table *table)
 {
-	int ret = cpufreq_frequency_table_cpuinfo(policy, table);
+	int ret;
 
-	if (!ret)
-		policy->freq_table = table;
+	ret = cpufreq_frequency_table_cpuinfo(policy, table);
+	if (ret)
+		return ret;
 
-	return ret;
+	policy->freq_table = table;
+	return set_freq_table_sorted(policy);
 }
 EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show);
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index fe9dc17ea873..9ec033b4f2d9 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -35,6 +35,7 @@
 #include <asm/msr.h>
 #include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
+#include <asm/intel-family.h>
 
 #define ATOM_RATIOS		0x66a
 #define ATOM_VIDS		0x66b
@@ -96,7 +97,6 @@ static inline u64 div_ext_fp(u64 x, u64 y)
  *			read from MPERF MSR between last and current sample
  * @tsc:		Difference of time stamp counter between last and
  *			current sample
- * @freq:		Effective frequency calculated from APERF/MPERF
  * @time:		Current time from scheduler
  *
  * This structure is used in the cpudata structure to store performance sample
@@ -108,7 +108,6 @@ struct sample {
 	u64 aperf;
 	u64 mperf;
 	u64 tsc;
-	int freq;
 	u64 time;
 };
 
@@ -281,9 +280,9 @@ struct cpu_defaults {
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
 
-static struct pstate_adjust_policy pid_params;
-static struct pstate_funcs pstate_funcs;
-static int hwp_active;
+static struct pstate_adjust_policy pid_params __read_mostly;
+static struct pstate_funcs pstate_funcs __read_mostly;
+static int hwp_active __read_mostly;
 
 #ifdef CONFIG_ACPI
 static bool acpi_ppc;
@@ -807,7 +806,8 @@ static void __init intel_pstate_sysfs_expose_params(void)
 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
 	/* First disable HWP notification interrupt as we don't process them */
-	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+	if (static_cpu_has(X86_FEATURE_HWP_NOTIFY))
+		wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
 
 	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
 }
@@ -944,7 +944,7 @@ static int core_get_max_pstate(void)
 			if (err)
 				goto skip_tar;
 
-			tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
+			tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3);
 			err = rdmsrl_safe(tdp_msr, &tdp_ratio);
 			if (err)
 				goto skip_tar;
@@ -972,7 +972,7 @@ static int core_get_turbo_pstate(void)
 	u64 value;
 	int nont, ret;
 
-	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
+	rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
 	nont = core_get_max_pstate();
 	ret = (value) & 255;
 	if (ret <= nont)
@@ -1001,7 +1001,7 @@ static int knl_get_turbo_pstate(void)
 	u64 value;
 	int nont, ret;
 
-	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
+	rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
 	nont = core_get_max_pstate();
 	ret = (((value) >> 8) & 0xFF);
 	if (ret <= nont)
@@ -1091,6 +1091,26 @@ static struct cpu_defaults knl_params = {
 	},
 };
 
+static struct cpu_defaults bxt_params = {
+	.pid_policy = {
+		.sample_rate_ms = 10,
+		.deadband = 0,
+		.setpoint = 60,
+		.p_gain_pct = 14,
+		.d_gain_pct = 0,
+		.i_gain_pct = 4,
+	},
+	.funcs = {
+		.get_max = core_get_max_pstate,
+		.get_max_physical = core_get_max_pstate_physical,
+		.get_min = core_get_min_pstate,
+		.get_turbo = core_get_turbo_pstate,
+		.get_scaling = core_get_scaling,
+		.get_val = core_get_val,
+		.get_target_pstate = get_target_pstate_use_cpu_load,
+	},
+};
+
 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 {
 	int max_perf = cpu->pstate.turbo_pstate;
@@ -1113,17 +1133,12 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
-static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
-{
-	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
-	cpu->pstate.current_pstate = pstate;
-}
-
 static void intel_pstate_set_min_pstate(struct cpudata *cpu)
 {
 	int pstate = cpu->pstate.min_pstate;
 
-	intel_pstate_record_pstate(cpu, pstate);
+	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
+	cpu->pstate.current_pstate = pstate;
 	/*
 	 * Generally, there is no guarantee that this code will always run on
 	 * the CPU being updated, so force the register update to run on the
@@ -1283,10 +1298,11 @@ static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 
 	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
 	pstate = clamp_t(int, pstate, min_perf, max_perf);
+	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
 	if (pstate == cpu->pstate.current_pstate)
 		return;
 
-	intel_pstate_record_pstate(cpu, pstate);
+	cpu->pstate.current_pstate = pstate;
 	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
 }
 
@@ -1334,29 +1350,30 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 			(unsigned long)&policy }
 
 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
-	ICPU(0x2a, core_params),
-	ICPU(0x2d, core_params),
-	ICPU(0x37, silvermont_params),
-	ICPU(0x3a, core_params),
-	ICPU(0x3c, core_params),
-	ICPU(0x3d, core_params),
-	ICPU(0x3e, core_params),
-	ICPU(0x3f, core_params),
-	ICPU(0x45, core_params),
-	ICPU(0x46, core_params),
-	ICPU(0x47, core_params),
-	ICPU(0x4c, airmont_params),
-	ICPU(0x4e, core_params),
-	ICPU(0x4f, core_params),
-	ICPU(0x5e, core_params),
-	ICPU(0x56, core_params),
-	ICPU(0x57, knl_params),
+	ICPU(INTEL_FAM6_SANDYBRIDGE, 		core_params),
+	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		core_params),
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	silvermont_params),
+	ICPU(INTEL_FAM6_IVYBRIDGE,		core_params),
+	ICPU(INTEL_FAM6_HASWELL_CORE,		core_params),
+	ICPU(INTEL_FAM6_BROADWELL_CORE,		core_params),
+	ICPU(INTEL_FAM6_IVYBRIDGE_X,		core_params),
+	ICPU(INTEL_FAM6_HASWELL_X,		core_params),
+	ICPU(INTEL_FAM6_HASWELL_ULT,		core_params),
+	ICPU(INTEL_FAM6_HASWELL_GT3E,		core_params),
+	ICPU(INTEL_FAM6_BROADWELL_GT3E,		core_params),
+	ICPU(INTEL_FAM6_ATOM_AIRMONT,		airmont_params),
+	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		core_params),
+	ICPU(INTEL_FAM6_BROADWELL_X,		core_params),
+	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	core_params),
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	core_params),
+	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_params),
+	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_params),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 
-static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
-	ICPU(0x56, core_params),
+static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
 	{}
 };
 
@@ -1400,6 +1417,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 {
 	struct cpudata *cpu = all_cpu_data[cpu_num];
 
+	if (cpu->update_util_set)
+		return;
+
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
 	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
@@ -1440,8 +1460,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 
-	intel_pstate_clear_update_util_hook(policy->cpu);
-
 	pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
 		 policy->cpuinfo.max_freq, policy->max);
 
@@ -1574,12 +1592,12 @@ static struct cpufreq_driver intel_pstate_driver = {
 	.name		= "intel_pstate",
 };
 
-static int __initdata no_load;
-static int __initdata no_hwp;
-static int __initdata hwp_only;
-static unsigned int force_load;
+static int no_load __initdata;
+static int no_hwp __initdata;
+static int hwp_only __initdata;
+static unsigned int force_load __initdata;
 
-static int intel_pstate_msrs_not_valid(void)
+static int __init intel_pstate_msrs_not_valid(void)
 {
 	if (!pstate_funcs.get_max() ||
 	    !pstate_funcs.get_min() ||
@@ -1589,7 +1607,7 @@ static int intel_pstate_msrs_not_valid(void)
 	return 0;
 }
 
-static void copy_pid_params(struct pstate_adjust_policy *policy)
+static void __init copy_pid_params(struct pstate_adjust_policy *policy)
 {
 	pid_params.sample_rate_ms = policy->sample_rate_ms;
 	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
@@ -1600,7 +1618,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy)
 	pid_params.setpoint = policy->setpoint;
 }
 
-static void copy_cpu_funcs(struct pstate_funcs *funcs)
+static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
 {
 	pstate_funcs.get_max   = funcs->get_max;
 	pstate_funcs.get_max_physical = funcs->get_max_physical;
@@ -1615,7 +1633,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
 
 #ifdef CONFIG_ACPI
 
-static bool intel_pstate_no_acpi_pss(void)
+static bool __init intel_pstate_no_acpi_pss(void)
 {
 	int i;
 
@@ -1644,7 +1662,7 @@ static bool intel_pstate_no_acpi_pss(void)
 	return true;
 }
 
-static bool intel_pstate_has_acpi_ppc(void)
+static bool __init intel_pstate_has_acpi_ppc(void)
 {
 	int i;
 
@@ -1672,7 +1690,7 @@ struct hw_vendor_info {
 };
 
 /* Hardware vendor-specific info that has its own power management modes */
-static struct hw_vendor_info vendor_info[] = {
+static struct hw_vendor_info vendor_info[] __initdata = {
 	{1, "HP    ", "ProLiant", PSS},
 	{1, "ORACLE", "X4-2    ", PPC},
 	{1, "ORACLE", "X4-2L   ", PPC},
@@ -1691,7 +1709,7 @@ static struct hw_vendor_info vendor_info[] = {
 	{0, "", ""},
 };
 
-static bool intel_pstate_platform_pwr_mgmt_exists(void)
+static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
 {
 	struct acpi_table_header hdr;
 	struct hw_vendor_info *v_info;
diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c
index e920889b9ac2..ed915ee85dd9 100644
--- a/drivers/cpufreq/mvebu-cpufreq.c
+++ b/drivers/cpufreq/mvebu-cpufreq.c
@@ -70,7 +70,7 @@ static int __init armada_xp_pmsu_cpufreq_init(void)
 			continue;
 		}
 
-		clk = clk_get(cpu_dev, 0);
+		clk = clk_get(cpu_dev, NULL);
 		if (IS_ERR(clk)) {
 			pr_err("Cannot get clock for CPU %d\n", cpu);
 			return PTR_ERR(clk);
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 808a320e9d5d..3f0ce2ae35ee 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -487,7 +487,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.space_id = reg_resource->space_id;
 	doorbell.bit_width = reg_resource->bit_width;
 	doorbell.bit_offset = reg_resource->bit_offset;
-	doorbell.access_width = 64;
+	doorbell.access_width = 4;
 	doorbell.address = reg_resource->address;
 
 	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
@@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->min = policy->cpuinfo.min_freq =
 		ioread32(&pcch_hdr->minimum_frequency) * 1000;
 
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-
 	pr_debug("init: policy->max is %d, policy->min is %d\n",
 		policy->max, policy->min);
 out:
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 54c45368e3f1..87796e0864e9 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -64,12 +64,14 @@
 /**
  * struct global_pstate_info -	Per policy data structure to maintain history of
  *				global pstates
- * @highest_lpstate:		The local pstate from which we are ramping down
+ * @highest_lpstate_idx:	The local pstate index from which we are
+ *				ramping down
  * @elapsed_time:		Time in ms spent in ramping down from
- *				highest_lpstate
+ *				highest_lpstate_idx
  * @last_sampled_time:		Time from boot in ms when global pstates were
  *				last set
- * @last_lpstate,last_gpstate:	Last set values for local and global pstates
+ * @last_lpstate_idx,		Last set value of local pstate and global
+ * last_gpstate_idx		pstate in terms of cpufreq table index
  * @timer:			Is used for ramping down if cpu goes idle for
  *				a long time with global pstate held high
  * @gpstate_lock:		A spinlock to maintain synchronization between
@@ -77,11 +79,11 @@
  *				governer's target_index calls
  */
 struct global_pstate_info {
-	int highest_lpstate;
+	int highest_lpstate_idx;
 	unsigned int elapsed_time;
 	unsigned int last_sampled_time;
-	int last_lpstate;
-	int last_gpstate;
+	int last_lpstate_idx;
+	int last_gpstate_idx;
 	spinlock_t gpstate_lock;
 	struct timer_list timer;
 };
@@ -124,29 +126,47 @@ static int nr_chips;
 static DEFINE_PER_CPU(struct chip *, chip_info);
 
 /*
- * Note: The set of pstates consists of contiguous integers, the
- * smallest of which is indicated by powernv_pstate_info.min, the
- * largest of which is indicated by powernv_pstate_info.max.
+ * Note:
+ * The set of pstates consists of contiguous integers.
+ * powernv_pstate_info stores the index of the frequency table for
+ * max, min and nominal frequencies. It also stores number of
+ * available frequencies.
  *
- * The nominal pstate is the highest non-turbo pstate in this
- * platform. This is indicated by powernv_pstate_info.nominal.
+ * powernv_pstate_info.nominal indicates the index to the highest
+ * non-turbo frequency.
  */
 static struct powernv_pstate_info {
-	int min;
-	int max;
-	int nominal;
-	int nr_pstates;
+	unsigned int min;
+	unsigned int max;
+	unsigned int nominal;
+	unsigned int nr_pstates;
 } powernv_pstate_info;
 
+/* Use following macros for conversions between pstate_id and index */
+static inline int idx_to_pstate(unsigned int i)
+{
+	return powernv_freqs[i].driver_data;
+}
+
+static inline unsigned int pstate_to_idx(int pstate)
+{
+	/*
+	 * abs() is deliberately used so that is works with
+	 * both monotonically increasing and decreasing
+	 * pstate values
+	 */
+	return abs(pstate - idx_to_pstate(powernv_pstate_info.max));
+}
+
 static inline void reset_gpstates(struct cpufreq_policy *policy)
 {
 	struct global_pstate_info *gpstates = policy->driver_data;
 
-	gpstates->highest_lpstate = 0;
+	gpstates->highest_lpstate_idx = 0;
 	gpstates->elapsed_time = 0;
 	gpstates->last_sampled_time = 0;
-	gpstates->last_lpstate = 0;
-	gpstates->last_gpstate = 0;
+	gpstates->last_lpstate_idx = 0;
+	gpstates->last_gpstate_idx = 0;
 }
 
 /*
@@ -156,9 +176,10 @@ static inline void reset_gpstates(struct cpufreq_policy *policy)
 static int init_powernv_pstates(void)
 {
 	struct device_node *power_mgt;
-	int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0;
+	int i, nr_pstates = 0;
 	const __be32 *pstate_ids, *pstate_freqs;
 	u32 len_ids, len_freqs;
+	u32 pstate_min, pstate_max, pstate_nominal;
 
 	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
 	if (!power_mgt) {
@@ -208,6 +229,7 @@ static int init_powernv_pstates(void)
 		return -ENODEV;
 	}
 
+	powernv_pstate_info.nr_pstates = nr_pstates;
 	pr_debug("NR PStates %d\n", nr_pstates);
 	for (i = 0; i < nr_pstates; i++) {
 		u32 id = be32_to_cpu(pstate_ids[i]);
@@ -216,15 +238,17 @@ static int init_powernv_pstates(void)
 		pr_debug("PState id %d freq %d MHz\n", id, freq);
 		powernv_freqs[i].frequency = freq * 1000; /* kHz */
 		powernv_freqs[i].driver_data = id;
+
+		if (id == pstate_max)
+			powernv_pstate_info.max = i;
+		else if (id == pstate_nominal)
+			powernv_pstate_info.nominal = i;
+		else if (id == pstate_min)
+			powernv_pstate_info.min = i;
 	}
+
 	/* End of list marker entry */
 	powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
-
-	powernv_pstate_info.min = pstate_min;
-	powernv_pstate_info.max = pstate_max;
-	powernv_pstate_info.nominal = pstate_nominal;
-	powernv_pstate_info.nr_pstates = nr_pstates;
-
 	return 0;
 }
 
@@ -233,12 +257,12 @@ static unsigned int pstate_id_to_freq(int pstate_id)
 {
 	int i;
 
-	i = powernv_pstate_info.max - pstate_id;
+	i = pstate_to_idx(pstate_id);
 	if (i >= powernv_pstate_info.nr_pstates || i < 0) {
 		pr_warn("PState id %d outside of PState table, "
 			"reporting nominal id %d instead\n",
-			pstate_id, powernv_pstate_info.nominal);
-		i = powernv_pstate_info.max - powernv_pstate_info.nominal;
+			pstate_id, idx_to_pstate(powernv_pstate_info.nominal));
+		i = powernv_pstate_info.nominal;
 	}
 
 	return powernv_freqs[i].frequency;
@@ -252,7 +276,7 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy,
 					char *buf)
 {
 	return sprintf(buf, "%u\n",
-		pstate_id_to_freq(powernv_pstate_info.nominal));
+		powernv_freqs[powernv_pstate_info.nominal].frequency);
 }
 
 struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
@@ -426,7 +450,7 @@ static void set_pstate(void *data)
  */
 static inline unsigned int get_nominal_index(void)
 {
-	return powernv_pstate_info.max - powernv_pstate_info.nominal;
+	return powernv_pstate_info.nominal;
 }
 
 static void powernv_cpufreq_throttle_check(void *data)
@@ -435,20 +459,22 @@ static void powernv_cpufreq_throttle_check(void *data)
 	unsigned int cpu = smp_processor_id();
 	unsigned long pmsr;
 	int pmsr_pmax;
+	unsigned int pmsr_pmax_idx;
 
 	pmsr = get_pmspr(SPRN_PMSR);
 	chip = this_cpu_read(chip_info);
 
 	/* Check for Pmax Capping */
 	pmsr_pmax = (s8)PMSR_MAX(pmsr);
-	if (pmsr_pmax != powernv_pstate_info.max) {
+	pmsr_pmax_idx = pstate_to_idx(pmsr_pmax);
+	if (pmsr_pmax_idx != powernv_pstate_info.max) {
 		if (chip->throttled)
 			goto next;
 		chip->throttled = true;
-		if (pmsr_pmax < powernv_pstate_info.nominal) {
-			pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
+		if (pmsr_pmax_idx > powernv_pstate_info.nominal) {
+			pr_warn_once("CPU %d on Chip %u has Pmax(%d) reduced below nominal frequency(%d)\n",
 				     cpu, chip->id, pmsr_pmax,
-				     powernv_pstate_info.nominal);
+				     idx_to_pstate(powernv_pstate_info.nominal));
 			chip->throttle_sub_turbo++;
 		} else {
 			chip->throttle_turbo++;
@@ -484,34 +510,35 @@ next:
 
 /**
  * calc_global_pstate - Calculate global pstate
- * @elapsed_time:	Elapsed time in milliseconds
- * @local_pstate:	New local pstate
- * @highest_lpstate:	pstate from which its ramping down
+ * @elapsed_time:		Elapsed time in milliseconds
+ * @local_pstate_idx:		New local pstate
+ * @highest_lpstate_idx:	pstate from which its ramping down
  *
  * Finds the appropriate global pstate based on the pstate from which its
  * ramping down and the time elapsed in ramping down. It follows a quadratic
  * equation which ensures that it reaches ramping down to pmin in 5sec.
  */
 static inline int calc_global_pstate(unsigned int elapsed_time,
-				     int highest_lpstate, int local_pstate)
+				     int highest_lpstate_idx,
+				     int local_pstate_idx)
 {
-	int pstate_diff;
+	int index_diff;
 
 	/*
 	 * Using ramp_down_percent we get the percentage of rampdown
 	 * that we are expecting to be dropping. Difference between
-	 * highest_lpstate and powernv_pstate_info.min will give a absolute
+	 * highest_lpstate_idx and powernv_pstate_info.min will give a absolute
 	 * number of how many pstates we will drop eventually by the end of
 	 * 5 seconds, then just scale it get the number pstates to be dropped.
 	 */
-	pstate_diff =  ((int)ramp_down_percent(elapsed_time) *
-			(highest_lpstate - powernv_pstate_info.min)) / 100;
+	index_diff =  ((int)ramp_down_percent(elapsed_time) *
+			(powernv_pstate_info.min - highest_lpstate_idx)) / 100;
 
 	/* Ensure that global pstate is >= to local pstate */
-	if (highest_lpstate - pstate_diff < local_pstate)
-		return local_pstate;
+	if (highest_lpstate_idx + index_diff >= local_pstate_idx)
+		return local_pstate_idx;
 	else
-		return highest_lpstate - pstate_diff;
+		return highest_lpstate_idx + index_diff;
 }
 
 static inline void  queue_gpstate_timer(struct global_pstate_info *gpstates)
@@ -530,8 +557,7 @@ static inline void  queue_gpstate_timer(struct global_pstate_info *gpstates)
 	else
 		timer_interval = GPSTATE_TIMER_INTERVAL;
 
-	mod_timer_pinned(&gpstates->timer, jiffies +
-			msecs_to_jiffies(timer_interval));
+	mod_timer(&gpstates->timer, jiffies + msecs_to_jiffies(timer_interval));
 }
 
 /**
@@ -547,7 +573,7 @@ void gpstate_timer_handler(unsigned long data)
 {
 	struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
 	struct global_pstate_info *gpstates = policy->driver_data;
-	int gpstate_id;
+	int gpstate_idx;
 	unsigned int time_diff = jiffies_to_msecs(jiffies)
 					- gpstates->last_sampled_time;
 	struct powernv_smp_call_data freq_data;
@@ -557,29 +583,29 @@ void gpstate_timer_handler(unsigned long data)
 
 	gpstates->last_sampled_time += time_diff;
 	gpstates->elapsed_time += time_diff;
-	freq_data.pstate_id = gpstates->last_lpstate;
+	freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx);
 
-	if ((gpstates->last_gpstate == freq_data.pstate_id) ||
+	if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) ||
 	    (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
-		gpstate_id = freq_data.pstate_id;
+		gpstate_idx = pstate_to_idx(freq_data.pstate_id);
 		reset_gpstates(policy);
-		gpstates->highest_lpstate = freq_data.pstate_id;
+		gpstates->highest_lpstate_idx = gpstate_idx;
 	} else {
-		gpstate_id = calc_global_pstate(gpstates->elapsed_time,
-						gpstates->highest_lpstate,
-						freq_data.pstate_id);
+		gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
+						 gpstates->highest_lpstate_idx,
+						 freq_data.pstate_id);
 	}
 
 	/*
 	 * If local pstate is equal to global pstate, rampdown is over
 	 * So timer is not required to be queued.
 	 */
-	if (gpstate_id != freq_data.pstate_id)
+	if (gpstate_idx != gpstates->last_lpstate_idx)
 		queue_gpstate_timer(gpstates);
 
-	freq_data.gpstate_id = gpstate_id;
-	gpstates->last_gpstate = freq_data.gpstate_id;
-	gpstates->last_lpstate = freq_data.pstate_id;
+	freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
+	gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id);
+	gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id);
 
 	spin_unlock(&gpstates->gpstate_lock);
 
@@ -596,7 +622,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 					unsigned int new_index)
 {
 	struct powernv_smp_call_data freq_data;
-	unsigned int cur_msec, gpstate_id;
+	unsigned int cur_msec, gpstate_idx;
 	struct global_pstate_info *gpstates = policy->driver_data;
 
 	if (unlikely(rebooting) && new_index != get_nominal_index())
@@ -608,15 +634,15 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 	cur_msec = jiffies_to_msecs(get_jiffies_64());
 
 	spin_lock(&gpstates->gpstate_lock);
-	freq_data.pstate_id = powernv_freqs[new_index].driver_data;
+	freq_data.pstate_id = idx_to_pstate(new_index);
 
 	if (!gpstates->last_sampled_time) {
-		gpstate_id = freq_data.pstate_id;
-		gpstates->highest_lpstate = freq_data.pstate_id;
+		gpstate_idx = new_index;
+		gpstates->highest_lpstate_idx = new_index;
 		goto gpstates_done;
 	}
 
-	if (gpstates->last_gpstate > freq_data.pstate_id) {
+	if (gpstates->last_gpstate_idx < new_index) {
 		gpstates->elapsed_time += cur_msec -
 						 gpstates->last_sampled_time;
 
@@ -627,34 +653,34 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 		 */
 		if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
 			reset_gpstates(policy);
-			gpstates->highest_lpstate = freq_data.pstate_id;
-			gpstate_id = freq_data.pstate_id;
+			gpstates->highest_lpstate_idx = new_index;
+			gpstate_idx = new_index;
 		} else {
 		/* Elaspsed_time is less than 5 seconds, continue to rampdown */
-			gpstate_id = calc_global_pstate(gpstates->elapsed_time,
-							gpstates->highest_lpstate,
-							freq_data.pstate_id);
+			gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
+							 gpstates->highest_lpstate_idx,
+							 new_index);
 		}
 	} else {
 		reset_gpstates(policy);
-		gpstates->highest_lpstate = freq_data.pstate_id;
-		gpstate_id = freq_data.pstate_id;
+		gpstates->highest_lpstate_idx = new_index;
+		gpstate_idx = new_index;
 	}
 
 	/*
 	 * If local pstate is equal to global pstate, rampdown is over
 	 * So timer is not required to be queued.
 	 */
-	if (gpstate_id != freq_data.pstate_id)
+	if (gpstate_idx != new_index)
 		queue_gpstate_timer(gpstates);
 	else
 		del_timer_sync(&gpstates->timer);
 
 gpstates_done:
-	freq_data.gpstate_id = gpstate_id;
+	freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
 	gpstates->last_sampled_time = cur_msec;
-	gpstates->last_gpstate = freq_data.gpstate_id;
-	gpstates->last_lpstate = freq_data.pstate_id;
+	gpstates->last_gpstate_idx = gpstate_idx;
+	gpstates->last_lpstate_idx = new_index;
 
 	spin_unlock(&gpstates->gpstate_lock);
 
@@ -699,7 +725,7 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->driver_data = gpstates;
 
 	/* initialize timer */
-	init_timer_deferrable(&gpstates->timer);
+	init_timer_pinned_deferrable(&gpstates->timer);
 	gpstates->timer.data = (unsigned long)policy;
 	gpstates->timer.function = gpstate_timer_handler;
 	gpstates->timer.expires = jiffies +
@@ -760,9 +786,7 @@ void powernv_cpufreq_work_fn(struct work_struct *work)
 		struct cpufreq_policy policy;
 
 		cpufreq_get_policy(&policy, cpu);
-		cpufreq_frequency_table_target(&policy, policy.freq_table,
-					       policy.cur,
-					       CPUFREQ_RELATION_C, &index);
+		index = cpufreq_table_find_index_c(&policy, policy.cur);
 		powernv_cpufreq_target_index(&policy, index);
 		cpumask_andnot(&mask, &mask, policy.cpus);
 	}
@@ -848,8 +872,8 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 	struct powernv_smp_call_data freq_data;
 	struct global_pstate_info *gpstates = policy->driver_data;
 
-	freq_data.pstate_id = powernv_pstate_info.min;
-	freq_data.gpstate_id = powernv_pstate_info.min;
+	freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min);
+	freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min);
 	smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1);
 	del_timer_sync(&gpstates->timer);
 }
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 7c4cd5c634f2..dc112481a408 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -94,7 +94,7 @@ static int pmi_notifier(struct notifier_block *nb,
 				       unsigned long event, void *data)
 {
 	struct cpufreq_policy *policy = data;
-	struct cpufreq_frequency_table *cbe_freqs;
+	struct cpufreq_frequency_table *cbe_freqs = policy->freq_table;
 	u8 node;
 
 	/* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY
@@ -103,7 +103,6 @@ static int pmi_notifier(struct notifier_block *nb,
 	if (event == CPUFREQ_START)
 		return 0;
 
-	cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
 	node = cbe_cpu_to_node(policy->cpu);
 
 	pr_debug("got notified, event=%lu, node=%u\n", event, node);
diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index ae8eaed77b70..7b596fa38ad2 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c
@@ -293,12 +293,8 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 		     __func__, policy, target_freq, relation);
 
 	if (ftab) {
-		if (cpufreq_frequency_table_target(policy, ftab,
-						   target_freq, relation,
-						   &index)) {
-			s3c_freq_dbg("%s: table failed\n", __func__);
-			return -EINVAL;
-		}
+		index = cpufreq_frequency_table_target(policy, target_freq,
+						       relation);
 
 		s3c_freq_dbg("%s: adjust %d to entry %d (%u)\n", __func__,
 			     target_freq, index, ftab[index].frequency);
@@ -315,7 +311,6 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 		pll = NULL;
 	} else {
 		struct cpufreq_policy tmp_policy;
-		int ret;
 
 		/* we keep the cpu pll table in Hz, to ensure we get an
 		 * accurate value for the PLL output. */
@@ -323,20 +318,14 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 		tmp_policy.min = policy->min * 1000;
 		tmp_policy.max = policy->max * 1000;
 		tmp_policy.cpu = policy->cpu;
+		tmp_policy.freq_table = pll_reg;
 
-		/* cpufreq_frequency_table_target uses a pointer to 'index'
-		 * which is the number of the table entry, not the value of
+		/* cpufreq_frequency_table_target returns the index
+		 * of the table entry, not the value of
 		 * the table entry's index field. */
 
-		ret = cpufreq_frequency_table_target(&tmp_policy, pll_reg,
-						     target_freq, relation,
-						     &index);
-
-		if (ret < 0) {
-			pr_err("%s: no PLL available\n", __func__);
-			goto err_notpossible;
-		}
-
+		index = cpufreq_frequency_table_target(&tmp_policy, target_freq,
+						       relation);
 		pll = pll_reg + index;
 
 		s3c_freq_dbg("%s: target %u => %u\n",
@@ -346,10 +335,6 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 	}
 
 	return s3c_cpufreq_settarget(policy, target_freq, pll);
-
- err_notpossible:
-	pr_err("no compatible settings for %d\n", target_freq);
-	return -EINVAL;
 }
 
 struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name)
@@ -571,11 +556,7 @@ static int s3c_cpufreq_build_freq(void)
 {
 	int size, ret;
 
-	if (!cpu_cur.info->calc_freqtable)
-		return -EINVAL;
-
 	kfree(ftab);
-	ftab = NULL;
 
 	size = cpu_cur.info->calc_freqtable(&cpu_cur, NULL, 0);
 	size++;
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 06d85917b6d5..9e07588ea9f5 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -246,12 +246,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
 	new_freq = s5pv210_freq_table[index].frequency;
 
 	/* Finding current running level index */
-	if (cpufreq_frequency_table_target(policy, s5pv210_freq_table,
-					   old_freq, CPUFREQ_RELATION_H,
-					   &priv_index)) {
-		ret = -EINVAL;
-		goto exit;
-	}
+	priv_index = cpufreq_table_find_index_h(policy, old_freq);
 
 	arm_volt = dvs_conf[index].arm_volt;
 	int_volt = dvs_conf[index].int_volt;
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index e342565e8715..4ba3d3fe142f 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -36,26 +36,12 @@
 static int arm_enter_idle_state(struct cpuidle_device *dev,
 				struct cpuidle_driver *drv, int idx)
 {
-	int ret;
-
-	if (!idx) {
-		cpu_do_idle();
-		return idx;
-	}
-
-	ret = cpu_pm_enter();
-	if (!ret) {
-		/*
-		 * Pass idle state index to cpu_suspend which in turn will
-		 * call the CPU ops suspend protocol with idle index as a
-		 * parameter.
-		 */
-		ret = arm_cpuidle_suspend(idx);
-
-		cpu_pm_exit();
-	}
-
-	return ret ? -1 : idx;
+	/*
+	 * Pass idle state index to arm_cpuidle_suspend which in turn
+	 * will call the CPU ops suspend protocol with idle index as a
+	 * parameter.
+	 */
+	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);
 }
 
 static struct cpuidle_driver arm_idle_driver = {
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index a4d0059e232c..c73207abb5a4 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -173,7 +173,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 
 	struct cpuidle_state *target_state = &drv->states[index];
 	bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
-	u64 time_start, time_end;
+	ktime_t time_start, time_end;
 	s64 diff;
 
 	/*
@@ -195,13 +195,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	sched_idle_set_state(target_state);
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);
-	time_start = local_clock();
+	time_start = ns_to_ktime(local_clock());
 
 	stop_critical_timings();
 	entered_state = target_state->enter(dev, drv, index);
 	start_critical_timings();
 
-	time_end = local_clock();
+	time_end = ns_to_ktime(local_clock());
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
 	/* The cpu is no longer idle or about to enter idle. */
@@ -217,11 +217,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	if (!cpuidle_state_is_coupled(drv, index))
 		local_irq_enable();
 
-	/*
-	 * local_clock() returns the time in nanosecond, let's shift
-	 * by 10 (divide by 1024) to have microsecond based time.
-	 */
-	diff = (time_end - time_start) >> 10;
+	diff = ktime_us_delta(time_end, time_start);
 	if (diff > INT_MAX)
 		diff = INT_MAX;
 
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index d77ba2f12242..1af94e2d1a25 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -159,6 +159,19 @@ config CRYPTO_GHASH_S390
 
 	  It is available as of z196.
 
+config CRYPTO_CRC32_S390
+	tristate "CRC-32 algorithms"
+	depends on S390
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  Select this option if you want to use hardware accelerated
+	  implementations of CRC algorithms.  With this option, you
+	  can optimize the computation of CRC-32 (IEEE 802.3 Ethernet)
+	  and CRC-32C (Castagnoli).
+
+	  It is available with IBM z13 or later.
+
 config CRYPTO_DEV_MV_CESA
 	tristate "Marvell's Cryptographic Engine"
 	depends on PLAT_ORION
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c
index 95b73968cf72..10db7df366c8 100644
--- a/drivers/crypto/bfin_crc.c
+++ b/drivers/crypto/bfin_crc.c
@@ -588,11 +588,6 @@ static int bfin_crypto_crc_probe(struct platform_device *pdev)
 	crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n");
-		return -ENOENT;
-	}
-
 	crc->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR((void *)crc->regs)) {
 		dev_err(&pdev->dev, "Cannot map CRC IO\n");
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 5652a53415dc..64bf3024b680 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -1,6 +1,6 @@
 config CRYPTO_DEV_FSL_CAAM
 	tristate "Freescale CAAM-Multicore driver backend"
-	depends on FSL_SOC || ARCH_MXC
+	depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
 	help
 	  Enables the driver module for Freescale's Cryptographic Accelerator
 	  and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
@@ -99,6 +99,18 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API
 	  To compile this as a module, choose M here: the module
 	  will be called caamhash.
 
+config CRYPTO_DEV_FSL_CAAM_PKC_API
+        tristate "Register public key cryptography implementations with Crypto API"
+        depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+        default y
+        select CRYPTO_RSA
+        help
+          Selecting this will allow SEC Public key support for RSA.
+          Supported cryptographic primitives: encryption, decryption,
+          signature and verification.
+          To compile this as a module, choose M here: the module
+          will be called caam_pkc.
+
 config CRYPTO_DEV_FSL_CAAM_RNG_API
 	tristate "Register caam device for hwrng API"
 	depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
@@ -116,10 +128,6 @@ config CRYPTO_DEV_FSL_CAAM_IMX
 	def_bool SOC_IMX6 || SOC_IMX7D
 	depends on CRYPTO_DEV_FSL_CAAM
 
-config CRYPTO_DEV_FSL_CAAM_LE
-	def_bool CRYPTO_DEV_FSL_CAAM_IMX || SOC_LS1021A
-	depends on CRYPTO_DEV_FSL_CAAM
-
 config CRYPTO_DEV_FSL_CAAM_DEBUG
 	bool "Enable debug output in CAAM driver"
 	depends on CRYPTO_DEV_FSL_CAAM
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index 550758a333e7..08bf5515ae8a 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the CAAM backend and dependent components
 #
 ifeq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG), y)
-	EXTRA_CFLAGS := -DDEBUG
+	ccflags-y := -DDEBUG
 endif
 
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o
@@ -10,6 +10,8 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o
 
 caam-objs := ctrl.o
 caam_jr-objs := jr.o key_gen.o error.o
+caam_pkc-y := caampkc.o pkc_desc.o
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 5845d4a08797..f1ecc8df8d41 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -847,7 +847,7 @@ static int ahash_update_ctx(struct ahash_request *req)
 							 *next_buflen, 0);
 		} else {
 			(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
-							SEC4_SG_LEN_FIN;
+				cpu_to_caam32(SEC4_SG_LEN_FIN);
 		}
 
 		state->current_buf = !state->current_buf;
@@ -949,7 +949,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
 						buf, state->buf_dma, buflen,
 						last_buflen);
-	(edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN;
+	(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
+		cpu_to_caam32(SEC4_SG_LEN_FIN);
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
new file mode 100644
index 000000000000..851015e652b8
--- /dev/null
+++ b/drivers/crypto/caam/caampkc.c
@@ -0,0 +1,607 @@
+/*
+ * caam - Freescale FSL CAAM support for Public Key Cryptography
+ *
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ *
+ * There is no Shared Descriptor for PKC so that the Job Descriptor must carry
+ * all the desired key parameters, input and output pointers.
+ */
+#include "compat.h"
+#include "regs.h"
+#include "intern.h"
+#include "jr.h"
+#include "error.h"
+#include "desc_constr.h"
+#include "sg_sw_sec4.h"
+#include "caampkc.h"
+
+#define DESC_RSA_PUB_LEN	(2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb))
+#define DESC_RSA_PRIV_F1_LEN	(2 * CAAM_CMD_SZ + \
+				 sizeof(struct rsa_priv_f1_pdb))
+
+static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc,
+			 struct akcipher_request *req)
+{
+	dma_unmap_sg(dev, req->dst, edesc->dst_nents, DMA_FROM_DEVICE);
+	dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE);
+
+	if (edesc->sec4_sg_bytes)
+		dma_unmap_single(dev, edesc->sec4_sg_dma, edesc->sec4_sg_bytes,
+				 DMA_TO_DEVICE);
+}
+
+static void rsa_pub_unmap(struct device *dev, struct rsa_edesc *edesc,
+			  struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct rsa_pub_pdb *pdb = &edesc->pdb.pub;
+
+	dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->e_dma, key->e_sz, DMA_TO_DEVICE);
+}
+
+static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc,
+			      struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1;
+
+	dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
+}
+
+/* RSA Job Completion handler */
+static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context)
+{
+	struct akcipher_request *req = context;
+	struct rsa_edesc *edesc;
+
+	if (err)
+		caam_jr_strstatus(dev, err);
+
+	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+
+	rsa_pub_unmap(dev, edesc, req);
+	rsa_io_unmap(dev, edesc, req);
+	kfree(edesc);
+
+	akcipher_request_complete(req, err);
+}
+
+static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err,
+			     void *context)
+{
+	struct akcipher_request *req = context;
+	struct rsa_edesc *edesc;
+
+	if (err)
+		caam_jr_strstatus(dev, err);
+
+	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+
+	rsa_priv_f1_unmap(dev, edesc, req);
+	rsa_io_unmap(dev, edesc, req);
+	kfree(edesc);
+
+	akcipher_request_complete(req, err);
+}
+
+static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
+					 size_t desclen)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct device *dev = ctx->dev;
+	struct rsa_edesc *edesc;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	int sgc;
+	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
+	int src_nents, dst_nents;
+
+	src_nents = sg_nents_for_len(req->src, req->src_len);
+	dst_nents = sg_nents_for_len(req->dst, req->dst_len);
+
+	if (src_nents > 1)
+		sec4_sg_len = src_nents;
+	if (dst_nents > 1)
+		sec4_sg_len += dst_nents;
+
+	sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
+
+	/* allocate space for base edesc, hw desc commands and link tables */
+	edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes,
+			GFP_DMA | flags);
+	if (!edesc)
+		return ERR_PTR(-ENOMEM);
+
+	sgc = dma_map_sg(dev, req->src, src_nents, DMA_TO_DEVICE);
+	if (unlikely(!sgc)) {
+		dev_err(dev, "unable to map source\n");
+		goto src_fail;
+	}
+
+	sgc = dma_map_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE);
+	if (unlikely(!sgc)) {
+		dev_err(dev, "unable to map destination\n");
+		goto dst_fail;
+	}
+
+	edesc->sec4_sg = (void *)edesc + sizeof(*edesc) + desclen;
+
+	sec4_sg_index = 0;
+	if (src_nents > 1) {
+		sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0);
+		sec4_sg_index += src_nents;
+	}
+	if (dst_nents > 1)
+		sg_to_sec4_sg_last(req->dst, dst_nents,
+				   edesc->sec4_sg + sec4_sg_index, 0);
+
+	/* Save nents for later use in Job Descriptor */
+	edesc->src_nents = src_nents;
+	edesc->dst_nents = dst_nents;
+
+	if (!sec4_sg_bytes)
+		return edesc;
+
+	edesc->sec4_sg_dma = dma_map_single(dev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, edesc->sec4_sg_dma)) {
+		dev_err(dev, "unable to map S/G table\n");
+		goto sec4_sg_fail;
+	}
+
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+
+	return edesc;
+
+sec4_sg_fail:
+	dma_unmap_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE);
+dst_fail:
+	dma_unmap_sg(dev, req->src, src_nents, DMA_TO_DEVICE);
+src_fail:
+	kfree(edesc);
+	return ERR_PTR(-ENOMEM);
+}
+
+static int set_rsa_pub_pdb(struct akcipher_request *req,
+			   struct rsa_edesc *edesc)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct device *dev = ctx->dev;
+	struct rsa_pub_pdb *pdb = &edesc->pdb.pub;
+	int sec4_sg_index = 0;
+
+	pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->n_dma)) {
+		dev_err(dev, "Unable to map RSA modulus memory\n");
+		return -ENOMEM;
+	}
+
+	pdb->e_dma = dma_map_single(dev, key->e, key->e_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->e_dma)) {
+		dev_err(dev, "Unable to map RSA public exponent memory\n");
+		dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE);
+		return -ENOMEM;
+	}
+
+	if (edesc->src_nents > 1) {
+		pdb->sgf |= RSA_PDB_SGF_F;
+		pdb->f_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += edesc->src_nents;
+	} else {
+		pdb->f_dma = sg_dma_address(req->src);
+	}
+
+	if (edesc->dst_nents > 1) {
+		pdb->sgf |= RSA_PDB_SGF_G;
+		pdb->g_dma = edesc->sec4_sg_dma +
+			     sec4_sg_index * sizeof(struct sec4_sg_entry);
+	} else {
+		pdb->g_dma = sg_dma_address(req->dst);
+	}
+
+	pdb->sgf |= (key->e_sz << RSA_PDB_E_SHIFT) | key->n_sz;
+	pdb->f_len = req->src_len;
+
+	return 0;
+}
+
+static int set_rsa_priv_f1_pdb(struct akcipher_request *req,
+			       struct rsa_edesc *edesc)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct device *dev = ctx->dev;
+	struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1;
+	int sec4_sg_index = 0;
+
+	pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->n_dma)) {
+		dev_err(dev, "Unable to map modulus memory\n");
+		return -ENOMEM;
+	}
+
+	pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->d_dma)) {
+		dev_err(dev, "Unable to map RSA private exponent memory\n");
+		dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE);
+		return -ENOMEM;
+	}
+
+	if (edesc->src_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_G;
+		pdb->g_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += edesc->src_nents;
+	} else {
+		pdb->g_dma = sg_dma_address(req->src);
+	}
+
+	if (edesc->dst_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_F;
+		pdb->f_dma = edesc->sec4_sg_dma +
+			     sec4_sg_index * sizeof(struct sec4_sg_entry);
+	} else {
+		pdb->f_dma = sg_dma_address(req->dst);
+	}
+
+	pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz;
+
+	return 0;
+}
+
+static int caam_rsa_enc(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct device *jrdev = ctx->dev;
+	struct rsa_edesc *edesc;
+	int ret;
+
+	if (unlikely(!key->n || !key->e))
+		return -EINVAL;
+
+	if (req->dst_len < key->n_sz) {
+		req->dst_len = key->n_sz;
+		dev_err(jrdev, "Output buffer length less than parameter n\n");
+		return -EOVERFLOW;
+	}
+
+	/* Allocate extended descriptor */
+	edesc = rsa_edesc_alloc(req, DESC_RSA_PUB_LEN);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* Set RSA Encrypt Protocol Data Block */
+	ret = set_rsa_pub_pdb(req, edesc);
+	if (ret)
+		goto init_fail;
+
+	/* Initialize Job Descriptor */
+	init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub);
+
+	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req);
+	if (!ret)
+		return -EINPROGRESS;
+
+	rsa_pub_unmap(jrdev, edesc, req);
+
+init_fail:
+	rsa_io_unmap(jrdev, edesc, req);
+	kfree(edesc);
+	return ret;
+}
+
+static int caam_rsa_dec(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct device *jrdev = ctx->dev;
+	struct rsa_edesc *edesc;
+	int ret;
+
+	if (unlikely(!key->n || !key->d))
+		return -EINVAL;
+
+	if (req->dst_len < key->n_sz) {
+		req->dst_len = key->n_sz;
+		dev_err(jrdev, "Output buffer length less than parameter n\n");
+		return -EOVERFLOW;
+	}
+
+	/* Allocate extended descriptor */
+	edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* Set RSA Decrypt Protocol Data Block - Private Key Form #1 */
+	ret = set_rsa_priv_f1_pdb(req, edesc);
+	if (ret)
+		goto init_fail;
+
+	/* Initialize Job Descriptor */
+	init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1);
+
+	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req);
+	if (!ret)
+		return -EINPROGRESS;
+
+	rsa_priv_f1_unmap(jrdev, edesc, req);
+
+init_fail:
+	rsa_io_unmap(jrdev, edesc, req);
+	kfree(edesc);
+	return ret;
+}
+
+static void caam_rsa_free_key(struct caam_rsa_key *key)
+{
+	kzfree(key->d);
+	kfree(key->e);
+	kfree(key->n);
+	key->d = NULL;
+	key->e = NULL;
+	key->n = NULL;
+	key->d_sz = 0;
+	key->e_sz = 0;
+	key->n_sz = 0;
+}
+
+/**
+ * caam_read_raw_data - Read a raw byte stream as a positive integer.
+ * The function skips buffer's leading zeros, copies the remained data
+ * to a buffer allocated in the GFP_DMA | GFP_KERNEL zone and returns
+ * the address of the new buffer.
+ *
+ * @buf   : The data to read
+ * @nbytes: The amount of data to read
+ */
+static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes)
+{
+	u8 *val;
+
+	while (!*buf && *nbytes) {
+		buf++;
+		(*nbytes)--;
+	}
+
+	val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL);
+	if (!val)
+		return NULL;
+
+	memcpy(val, buf, *nbytes);
+
+	return val;
+}
+
+static int caam_rsa_check_key_length(unsigned int len)
+{
+	if (len > 4096)
+		return -EINVAL;
+	return 0;
+}
+
+static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
+				unsigned int keylen)
+{
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct rsa_key raw_key = {0};
+	struct caam_rsa_key *rsa_key = &ctx->key;
+	int ret;
+
+	/* Free the old RSA key if any */
+	caam_rsa_free_key(rsa_key);
+
+	ret = rsa_parse_pub_key(&raw_key, key, keylen);
+	if (ret)
+		return ret;
+
+	/* Copy key in DMA zone */
+	rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL);
+	if (!rsa_key->e)
+		goto err;
+
+	/*
+	 * Skip leading zeros and copy the positive integer to a buffer
+	 * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor
+	 * expects a positive integer for the RSA modulus and uses its length as
+	 * decryption output length.
+	 */
+	rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz);
+	if (!rsa_key->n)
+		goto err;
+
+	if (caam_rsa_check_key_length(raw_key.n_sz << 3)) {
+		caam_rsa_free_key(rsa_key);
+		return -EINVAL;
+	}
+
+	rsa_key->e_sz = raw_key.e_sz;
+	rsa_key->n_sz = raw_key.n_sz;
+
+	memcpy(rsa_key->e, raw_key.e, raw_key.e_sz);
+
+	return 0;
+err:
+	caam_rsa_free_key(rsa_key);
+	return -ENOMEM;
+}
+
+static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
+				 unsigned int keylen)
+{
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct rsa_key raw_key = {0};
+	struct caam_rsa_key *rsa_key = &ctx->key;
+	int ret;
+
+	/* Free the old RSA key if any */
+	caam_rsa_free_key(rsa_key);
+
+	ret = rsa_parse_priv_key(&raw_key, key, keylen);
+	if (ret)
+		return ret;
+
+	/* Copy key in DMA zone */
+	rsa_key->d = kzalloc(raw_key.d_sz, GFP_DMA | GFP_KERNEL);
+	if (!rsa_key->d)
+		goto err;
+
+	rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL);
+	if (!rsa_key->e)
+		goto err;
+
+	/*
+	 * Skip leading zeros and copy the positive integer to a buffer
+	 * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor
+	 * expects a positive integer for the RSA modulus and uses its length as
+	 * decryption output length.
+	 */
+	rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz);
+	if (!rsa_key->n)
+		goto err;
+
+	if (caam_rsa_check_key_length(raw_key.n_sz << 3)) {
+		caam_rsa_free_key(rsa_key);
+		return -EINVAL;
+	}
+
+	rsa_key->d_sz = raw_key.d_sz;
+	rsa_key->e_sz = raw_key.e_sz;
+	rsa_key->n_sz = raw_key.n_sz;
+
+	memcpy(rsa_key->d, raw_key.d, raw_key.d_sz);
+	memcpy(rsa_key->e, raw_key.e, raw_key.e_sz);
+
+	return 0;
+
+err:
+	caam_rsa_free_key(rsa_key);
+	return -ENOMEM;
+}
+
+static int caam_rsa_max_size(struct crypto_akcipher *tfm)
+{
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+
+	return (key->n) ? key->n_sz : -EINVAL;
+}
+
+/* Per session pkc's driver context creation function */
+static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
+{
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	ctx->dev = caam_jr_alloc();
+
+	if (IS_ERR(ctx->dev)) {
+		dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n");
+		return PTR_ERR(ctx->dev);
+	}
+
+	return 0;
+}
+
+/* Per session pkc's driver context cleanup function */
+static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+
+	caam_rsa_free_key(key);
+	caam_jr_free(ctx->dev);
+}
+
+static struct akcipher_alg caam_rsa = {
+	.encrypt = caam_rsa_enc,
+	.decrypt = caam_rsa_dec,
+	.sign = caam_rsa_dec,
+	.verify = caam_rsa_enc,
+	.set_pub_key = caam_rsa_set_pub_key,
+	.set_priv_key = caam_rsa_set_priv_key,
+	.max_size = caam_rsa_max_size,
+	.init = caam_rsa_init_tfm,
+	.exit = caam_rsa_exit_tfm,
+	.base = {
+		.cra_name = "rsa",
+		.cra_driver_name = "rsa-caam",
+		.cra_priority = 3000,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct caam_rsa_ctx),
+	},
+};
+
+/* Public Key Cryptography module initialization handler */
+static int __init caam_pkc_init(void)
+{
+	struct device_node *dev_node;
+	struct platform_device *pdev;
+	struct device *ctrldev;
+	struct caam_drv_private *priv;
+	u32 cha_inst, pk_inst;
+	int err;
+
+	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
+	if (!dev_node) {
+		dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
+		if (!dev_node)
+			return -ENODEV;
+	}
+
+	pdev = of_find_device_by_node(dev_node);
+	if (!pdev) {
+		of_node_put(dev_node);
+		return -ENODEV;
+	}
+
+	ctrldev = &pdev->dev;
+	priv = dev_get_drvdata(ctrldev);
+	of_node_put(dev_node);
+
+	/*
+	 * If priv is NULL, it's probably because the caam driver wasn't
+	 * properly initialized (e.g. RNG4 init failed). Thus, bail out here.
+	 */
+	if (!priv)
+		return -ENODEV;
+
+	/* Determine public key hardware accelerator presence. */
+	cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls);
+	pk_inst = (cha_inst & CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT;
+
+	/* Do not register algorithms if PKHA is not present. */
+	if (!pk_inst)
+		return -ENODEV;
+
+	err = crypto_register_akcipher(&caam_rsa);
+	if (err)
+		dev_warn(ctrldev, "%s alg registration failed\n",
+			 caam_rsa.base.cra_driver_name);
+	else
+		dev_info(ctrldev, "caam pkc algorithms registered in /proc/crypto\n");
+
+	return err;
+}
+
+static void __exit caam_pkc_exit(void)
+{
+	crypto_unregister_akcipher(&caam_rsa);
+}
+
+module_init(caam_pkc_init);
+module_exit(caam_pkc_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("FSL CAAM support for PKC functions of crypto API");
+MODULE_AUTHOR("Freescale Semiconductor");
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
new file mode 100644
index 000000000000..f595d159b112
--- /dev/null
+++ b/drivers/crypto/caam/caampkc.h
@@ -0,0 +1,70 @@
+/*
+ * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors
+ *
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ *
+ * There is no Shared Descriptor for PKC so that the Job Descriptor must carry
+ * all the desired key parameters, input and output pointers.
+ */
+
+#ifndef _PKC_DESC_H_
+#define _PKC_DESC_H_
+#include "compat.h"
+#include "pdb.h"
+
+/**
+ * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone.
+ * @n           : RSA modulus raw byte stream
+ * @e           : RSA public exponent raw byte stream
+ * @d           : RSA private exponent raw byte stream
+ * @n_sz        : length in bytes of RSA modulus n
+ * @e_sz        : length in bytes of RSA public exponent
+ * @d_sz        : length in bytes of RSA private exponent
+ */
+struct caam_rsa_key {
+	u8 *n;
+	u8 *e;
+	u8 *d;
+	size_t n_sz;
+	size_t e_sz;
+	size_t d_sz;
+};
+
+/**
+ * caam_rsa_ctx - per session context.
+ * @key         : RSA key in DMA zone
+ * @dev         : device structure
+ */
+struct caam_rsa_ctx {
+	struct caam_rsa_key key;
+	struct device *dev;
+};
+
+/**
+ * rsa_edesc - s/w-extended rsa descriptor
+ * @src_nents     : number of segments in input scatterlist
+ * @dst_nents     : number of segments in output scatterlist
+ * @sec4_sg_bytes : length of h/w link table
+ * @sec4_sg_dma   : dma address of h/w link table
+ * @sec4_sg       : pointer to h/w link table
+ * @pdb           : specific RSA Protocol Data Block (PDB)
+ * @hw_desc       : descriptor followed by link tables if any
+ */
+struct rsa_edesc {
+	int src_nents;
+	int dst_nents;
+	int sec4_sg_bytes;
+	dma_addr_t sec4_sg_dma;
+	struct sec4_sg_entry *sec4_sg;
+	union {
+		struct rsa_pub_pdb pub;
+		struct rsa_priv_f1_pdb priv_f1;
+	} pdb;
+	u32 hw_desc[];
+};
+
+/* Descriptor construction primitives. */
+void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb);
+void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb);
+
+#endif
diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h
index b6955ecdfb3f..7149cd2492e0 100644
--- a/drivers/crypto/caam/compat.h
+++ b/drivers/crypto/caam/compat.h
@@ -35,8 +35,11 @@
 #include <crypto/md5.h>
 #include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
+#include <crypto/akcipher.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/internal/akcipher.h>
 
 #endif /* !defined(CAAM_COMPAT_H) */
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 5ad5f3009ae0..0ec112ee5204 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -15,6 +15,9 @@
 #include "desc_constr.h"
 #include "error.h"
 
+bool caam_little_end;
+EXPORT_SYMBOL(caam_little_end);
+
 /*
  * i.MX targets tend to have clock control subsystems that can
  * enable/disable clocking to our device.
@@ -106,7 +109,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc,
 
 
 	if (ctrlpriv->virt_en == 1) {
-		setbits32(&ctrl->deco_rsr, DECORSR_JR0);
+		clrsetbits_32(&ctrl->deco_rsr, 0, DECORSR_JR0);
 
 		while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) &&
 		       --timeout)
@@ -115,7 +118,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc,
 		timeout = 100000;
 	}
 
-	setbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE);
+	clrsetbits_32(&ctrl->deco_rq, 0, DECORR_RQD0ENABLE);
 
 	while (!(rd_reg32(&ctrl->deco_rq) & DECORR_DEN0) &&
 								 --timeout)
@@ -123,12 +126,12 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc,
 
 	if (!timeout) {
 		dev_err(ctrldev, "failed to acquire DECO 0\n");
-		clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE);
+		clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0);
 		return -ENODEV;
 	}
 
 	for (i = 0; i < desc_len(desc); i++)
-		wr_reg32(&deco->descbuf[i], *(desc + i));
+		wr_reg32(&deco->descbuf[i], caam32_to_cpu(*(desc + i)));
 
 	flags = DECO_JQCR_WHL;
 	/*
@@ -139,7 +142,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc,
 		flags |= DECO_JQCR_FOUR;
 
 	/* Instruct the DECO to execute it */
-	setbits32(&deco->jr_ctl_hi, flags);
+	clrsetbits_32(&deco->jr_ctl_hi, 0, flags);
 
 	timeout = 10000000;
 	do {
@@ -158,10 +161,10 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc,
 		  DECO_OP_STATUS_HI_ERR_MASK;
 
 	if (ctrlpriv->virt_en == 1)
-		clrbits32(&ctrl->deco_rsr, DECORSR_JR0);
+		clrsetbits_32(&ctrl->deco_rsr, DECORSR_JR0, 0);
 
 	/* Mark the DECO as free */
-	clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE);
+	clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0);
 
 	if (!timeout)
 		return -EAGAIN;
@@ -349,7 +352,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
 	r4tst = &ctrl->r4tst[0];
 
 	/* put RNG4 into program mode */
-	setbits32(&r4tst->rtmctl, RTMCTL_PRGM);
+	clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM);
 
 	/*
 	 * Performance-wise, it does not make sense to
@@ -363,7 +366,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
 	      >> RTSDCTL_ENT_DLY_SHIFT;
 	if (ent_delay <= val) {
 		/* put RNG4 into run mode */
-		clrbits32(&r4tst->rtmctl, RTMCTL_PRGM);
+		clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0);
 		return;
 	}
 
@@ -381,9 +384,9 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
 	 * select raw sampling in both entropy shifter
 	 * and statistical checker
 	 */
-	setbits32(&val, RTMCTL_SAMP_MODE_RAW_ES_SC);
+	clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC);
 	/* put RNG4 into run mode */
-	clrbits32(&val, RTMCTL_PRGM);
+	clrsetbits_32(&val, RTMCTL_PRGM, 0);
 	/* write back the control register */
 	wr_reg32(&r4tst->rtmctl, val);
 }
@@ -406,6 +409,23 @@ int caam_get_era(void)
 }
 EXPORT_SYMBOL(caam_get_era);
 
+#ifdef CONFIG_DEBUG_FS
+static int caam_debugfs_u64_get(void *data, u64 *val)
+{
+	*val = caam64_to_cpu(*(u64 *)data);
+	return 0;
+}
+
+static int caam_debugfs_u32_get(void *data, u64 *val)
+{
+	*val = caam32_to_cpu(*(u32 *)data);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n");
+#endif
+
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
 {
@@ -504,6 +524,10 @@ static int caam_probe(struct platform_device *pdev)
 		ret = -ENOMEM;
 		goto disable_caam_emi_slow;
 	}
+
+	caam_little_end = !(bool)(rd_reg32(&ctrl->perfmon.status) &
+				  (CSTA_PLEND | CSTA_ALT_PLEND));
+
 	/* Finding the page size for using the CTPR_MS register */
 	comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms);
 	pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT;
@@ -559,9 +583,9 @@ static int caam_probe(struct platform_device *pdev)
 	}
 
 	if (ctrlpriv->virt_en == 1)
-		setbits32(&ctrl->jrstart, JRSTART_JR0_START |
-			  JRSTART_JR1_START | JRSTART_JR2_START |
-			  JRSTART_JR3_START);
+		clrsetbits_32(&ctrl->jrstart, 0, JRSTART_JR0_START |
+			      JRSTART_JR1_START | JRSTART_JR2_START |
+			      JRSTART_JR3_START);
 
 	if (sizeof(dma_addr_t) == sizeof(u64))
 		if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
@@ -693,7 +717,7 @@ static int caam_probe(struct platform_device *pdev)
 		ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK;
 
 		/* Enable RDB bit so that RNG works faster */
-		setbits32(&ctrl->scfgr, SCFGR_RDBENABLE);
+		clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE);
 	}
 
 	/* NOTE: RTIC detection ought to go here, around Si time */
@@ -719,48 +743,59 @@ static int caam_probe(struct platform_device *pdev)
 	ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root);
 
 	/* Controller-level - performance monitor counters */
+
 	ctrlpriv->ctl_rq_dequeued =
-		debugfs_create_u64("rq_dequeued",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->req_dequeued);
+		debugfs_create_file("rq_dequeued",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->req_dequeued,
+				    &caam_fops_u64_ro);
 	ctrlpriv->ctl_ob_enc_req =
-		debugfs_create_u64("ob_rq_encrypted",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->ob_enc_req);
+		debugfs_create_file("ob_rq_encrypted",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->ob_enc_req,
+				    &caam_fops_u64_ro);
 	ctrlpriv->ctl_ib_dec_req =
-		debugfs_create_u64("ib_rq_decrypted",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->ib_dec_req);
+		debugfs_create_file("ib_rq_decrypted",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->ib_dec_req,
+				    &caam_fops_u64_ro);
 	ctrlpriv->ctl_ob_enc_bytes =
-		debugfs_create_u64("ob_bytes_encrypted",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->ob_enc_bytes);
+		debugfs_create_file("ob_bytes_encrypted",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->ob_enc_bytes,
+				    &caam_fops_u64_ro);
 	ctrlpriv->ctl_ob_prot_bytes =
-		debugfs_create_u64("ob_bytes_protected",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->ob_prot_bytes);
+		debugfs_create_file("ob_bytes_protected",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->ob_prot_bytes,
+				    &caam_fops_u64_ro);
 	ctrlpriv->ctl_ib_dec_bytes =
-		debugfs_create_u64("ib_bytes_decrypted",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->ib_dec_bytes);
+		debugfs_create_file("ib_bytes_decrypted",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->ib_dec_bytes,
+				    &caam_fops_u64_ro);
 	ctrlpriv->ctl_ib_valid_bytes =
-		debugfs_create_u64("ib_bytes_validated",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->ib_valid_bytes);
+		debugfs_create_file("ib_bytes_validated",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->ib_valid_bytes,
+				    &caam_fops_u64_ro);
 
 	/* Controller level - global status values */
 	ctrlpriv->ctl_faultaddr =
-		debugfs_create_u64("fault_addr",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->faultaddr);
+		debugfs_create_file("fault_addr",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->faultaddr,
+				    &caam_fops_u32_ro);
 	ctrlpriv->ctl_faultdetail =
-		debugfs_create_u32("fault_detail",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->faultdetail);
+		debugfs_create_file("fault_detail",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->faultdetail,
+				    &caam_fops_u32_ro);
 	ctrlpriv->ctl_faultstatus =
-		debugfs_create_u32("fault_status",
-				   S_IRUSR | S_IRGRP | S_IROTH,
-				   ctrlpriv->ctl, &perfmon->status);
+		debugfs_create_file("fault_status",
+				    S_IRUSR | S_IRGRP | S_IROTH,
+				    ctrlpriv->ctl, &perfmon->status,
+				    &caam_fops_u32_ro);
 
 	/* Internal covering keys (useful in non-secure mode only) */
 	ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0];
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 1e93c6af2275..26427c11ad87 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -20,19 +20,18 @@
 #define SEC4_SG_BPID_MASK	0x000000ff
 #define SEC4_SG_BPID_SHIFT	16
 #define SEC4_SG_LEN_MASK	0x3fffffff	/* Excludes EXT and FINAL */
-#define SEC4_SG_OFFS_MASK	0x00001fff
+#define SEC4_SG_OFFSET_MASK	0x00001fff
 
 struct sec4_sg_entry {
-#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
+#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \
+	defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
 	u32 rsvd1;
 	dma_addr_t ptr;
 #else
 	u64 ptr;
 #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */
 	u32 len;
-	u8 rsvd2;
-	u8 buf_pool_id;
-	u16 offset;
+	u32 bpid_offset;
 };
 
 /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
@@ -454,6 +453,8 @@ struct sec4_sg_entry {
 #define OP_PCLID_PUBLICKEYPAIR	(0x14 << OP_PCLID_SHIFT)
 #define OP_PCLID_DSASIGN	(0x15 << OP_PCLID_SHIFT)
 #define OP_PCLID_DSAVERIFY	(0x16 << OP_PCLID_SHIFT)
+#define OP_PCLID_RSAENC_PUBKEY  (0x18 << OP_PCLID_SHIFT)
+#define OP_PCLID_RSADEC_PRVKEY  (0x19 << OP_PCLID_SHIFT)
 
 /* Assuming OP_TYPE = OP_TYPE_DECAP_PROTOCOL/ENCAP_PROTOCOL */
 #define OP_PCLID_IPSEC		(0x01 << OP_PCLID_SHIFT)
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index 98d07de24fc4..d3869b95e7b1 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -5,6 +5,7 @@
  */
 
 #include "desc.h"
+#include "regs.h"
 
 #define IMMEDIATE (1 << 23)
 #define CAAM_CMD_SZ sizeof(u32)
@@ -30,9 +31,11 @@
 			       LDST_SRCDST_WORD_DECOCTRL | \
 			       (LDOFF_ENABLE_AUTO_NFIFO << LDST_OFFSET_SHIFT))
 
+extern bool caam_little_end;
+
 static inline int desc_len(u32 *desc)
 {
-	return *desc & HDR_DESCLEN_MASK;
+	return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK;
 }
 
 static inline int desc_bytes(void *desc)
@@ -52,7 +55,7 @@ static inline void *sh_desc_pdb(u32 *desc)
 
 static inline void init_desc(u32 *desc, u32 options)
 {
-	*desc = (options | HDR_ONE) + 1;
+	*desc = cpu_to_caam32((options | HDR_ONE) + 1);
 }
 
 static inline void init_sh_desc(u32 *desc, u32 options)
@@ -74,13 +77,21 @@ static inline void init_job_desc(u32 *desc, u32 options)
 	init_desc(desc, CMD_DESC_HDR | options);
 }
 
+static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
+{
+	u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
+
+	init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options);
+}
+
 static inline void append_ptr(u32 *desc, dma_addr_t ptr)
 {
 	dma_addr_t *offset = (dma_addr_t *)desc_end(desc);
 
-	*offset = ptr;
+	*offset = cpu_to_caam_dma(ptr);
 
-	(*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ;
+	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +
+				CAAM_PTR_SZ / CAAM_CMD_SZ);
 }
 
 static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
@@ -99,16 +110,17 @@ static inline void append_data(u32 *desc, void *data, int len)
 	if (len) /* avoid sparse warning: memcpy with byte count of 0 */
 		memcpy(offset, data, len);
 
-	(*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
+	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +
+				(len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ);
 }
 
 static inline void append_cmd(u32 *desc, u32 command)
 {
 	u32 *cmd = desc_end(desc);
 
-	*cmd = command;
+	*cmd = cpu_to_caam32(command);
 
-	(*desc)++;
+	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 1);
 }
 
 #define append_u32 append_cmd
@@ -117,16 +129,22 @@ static inline void append_u64(u32 *desc, u64 data)
 {
 	u32 *offset = desc_end(desc);
 
-	*offset = upper_32_bits(data);
-	*(++offset) = lower_32_bits(data);
+	/* Only 32-bit alignment is guaranteed in descriptor buffer */
+	if (caam_little_end) {
+		*offset = cpu_to_caam32(lower_32_bits(data));
+		*(++offset) = cpu_to_caam32(upper_32_bits(data));
+	} else {
+		*offset = cpu_to_caam32(upper_32_bits(data));
+		*(++offset) = cpu_to_caam32(lower_32_bits(data));
+	}
 
-	(*desc) += 2;
+	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 2);
 }
 
 /* Write command without affecting header, and return pointer to next word */
 static inline u32 *write_cmd(u32 *desc, u32 command)
 {
-	*desc = command;
+	*desc = cpu_to_caam32(command);
 
 	return desc + 1;
 }
@@ -168,14 +186,17 @@ APPEND_CMD_RET(move, MOVE)
 
 static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd)
 {
-	*jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc));
+	*jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) |
+				  (desc_len(desc) - (jump_cmd - desc)));
 }
 
 static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
 {
-	*move_cmd &= ~MOVE_OFFSET_MASK;
-	*move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) &
-				 MOVE_OFFSET_MASK);
+	u32 val = caam32_to_cpu(*move_cmd);
+
+	val &= ~MOVE_OFFSET_MASK;
+	val |= (desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & MOVE_OFFSET_MASK;
+	*move_cmd = cpu_to_caam32(val);
 }
 
 #define APPEND_CMD(cmd, op) \
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 5ef4be22eb80..a81f551ac222 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -31,7 +31,7 @@ static int caam_reset_hw_jr(struct device *dev)
 	 * mask interrupts since we are going to poll
 	 * for reset completion status
 	 */
-	setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
+	clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK);
 
 	/* initiate flush (required prior to reset) */
 	wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET);
@@ -57,7 +57,7 @@ static int caam_reset_hw_jr(struct device *dev)
 	}
 
 	/* unmask interrupts */
-	clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
+	clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
 
 	return 0;
 }
@@ -147,7 +147,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 	}
 
 	/* mask valid interrupts */
-	setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
+	clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK);
 
 	/* Have valid interrupt at this point, just ACK and trigger */
 	wr_reg32(&jrp->rregs->jrintstatus, irqstate);
@@ -182,7 +182,7 @@ static void caam_jr_dequeue(unsigned long devarg)
 			sw_idx = (tail + i) & (JOBR_DEPTH - 1);
 
 			if (jrp->outring[hw_idx].desc ==
-			    jrp->entinfo[sw_idx].desc_addr_dma)
+			    caam_dma_to_cpu(jrp->entinfo[sw_idx].desc_addr_dma))
 				break; /* found */
 		}
 		/* we should never fail to find a matching descriptor */
@@ -200,7 +200,7 @@ static void caam_jr_dequeue(unsigned long devarg)
 		usercall = jrp->entinfo[sw_idx].callbk;
 		userarg = jrp->entinfo[sw_idx].cbkarg;
 		userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
-		userstatus = jrp->outring[hw_idx].jrstatus;
+		userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus);
 
 		/*
 		 * Make sure all information from the job has been obtained
@@ -236,7 +236,7 @@ static void caam_jr_dequeue(unsigned long devarg)
 	}
 
 	/* reenable / unmask IRQs */
-	clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
+	clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
 }
 
 /**
@@ -330,7 +330,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 	int head, tail, desc_size;
 	dma_addr_t desc_dma;
 
-	desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32);
+	desc_size = (caam32_to_cpu(*desc) & HDR_JD_LENGTH_MASK) * sizeof(u32);
 	desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE);
 	if (dma_mapping_error(dev, desc_dma)) {
 		dev_err(dev, "caam_jr_enqueue(): can't map jobdesc\n");
@@ -356,7 +356,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 	head_entry->cbkarg = areq;
 	head_entry->desc_addr_dma = desc_dma;
 
-	jrp->inpring[jrp->inp_ring_write_index] = desc_dma;
+	jrp->inpring[jrp->inp_ring_write_index] = cpu_to_caam_dma(desc_dma);
 
 	/*
 	 * Guarantee that the descriptor's DMA address has been written to
@@ -444,9 +444,9 @@ static int caam_jr_init(struct device *dev)
 	spin_lock_init(&jrp->outlock);
 
 	/* Select interrupt coalescing parameters */
-	setbits32(&jrp->rregs->rconfig_lo, JOBR_INTC |
-		  (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) |
-		  (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT));
+	clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JOBR_INTC |
+		      (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) |
+		      (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT));
 
 	return 0;
 
diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h
index 3a87c0cf879a..aaa00dd1c601 100644
--- a/drivers/crypto/caam/pdb.h
+++ b/drivers/crypto/caam/pdb.h
@@ -1,18 +1,19 @@
 /*
  * CAAM Protocol Data Block (PDB) definition header file
  *
- * Copyright 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright 2008-2016 Freescale Semiconductor, Inc.
  *
  */
 
 #ifndef CAAM_PDB_H
 #define CAAM_PDB_H
+#include "compat.h"
 
 /*
  * PDB- IPSec ESP Header Modification Options
  */
-#define PDBHMO_ESP_DECAP_SHIFT	12
-#define PDBHMO_ESP_ENCAP_SHIFT	4
+#define PDBHMO_ESP_DECAP_SHIFT	28
+#define PDBHMO_ESP_ENCAP_SHIFT	28
 /*
  * Encap and Decap - Decrement TTL (Hop Limit) - Based on the value of the
  * Options Byte IP version (IPvsn) field:
@@ -32,12 +33,23 @@
  */
 #define PDBHMO_ESP_DFBIT		(0x04 << PDBHMO_ESP_ENCAP_SHIFT)
 
+#define PDBNH_ESP_ENCAP_SHIFT		16
+#define PDBNH_ESP_ENCAP_MASK		(0xff << PDBNH_ESP_ENCAP_SHIFT)
+
+#define PDBHDRLEN_ESP_DECAP_SHIFT	16
+#define PDBHDRLEN_MASK			(0x0fff << PDBHDRLEN_ESP_DECAP_SHIFT)
+
+#define PDB_NH_OFFSET_SHIFT		8
+#define PDB_NH_OFFSET_MASK		(0xff << PDB_NH_OFFSET_SHIFT)
+
 /*
  * PDB - IPSec ESP Encap/Decap Options
  */
 #define PDBOPTS_ESP_ARSNONE	0x00 /* no antireplay window */
 #define PDBOPTS_ESP_ARS32	0x40 /* 32-entry antireplay window */
+#define PDBOPTS_ESP_ARS128	0x80 /* 128-entry antireplay window */
 #define PDBOPTS_ESP_ARS64	0xc0 /* 64-entry antireplay window */
+#define PDBOPTS_ESP_ARS_MASK	0xc0 /* antireplay window mask */
 #define PDBOPTS_ESP_IVSRC	0x20 /* IV comes from internal random gen */
 #define PDBOPTS_ESP_ESN		0x10 /* extended sequence included */
 #define PDBOPTS_ESP_OUTFMT	0x08 /* output only decapsulation (decap) */
@@ -54,35 +66,73 @@
 /*
  * General IPSec encap/decap PDB definitions
  */
+
+/**
+ * ipsec_encap_cbc - PDB part for IPsec CBC encapsulation
+ * @iv: 16-byte array initialization vector
+ */
 struct ipsec_encap_cbc {
-	u32 iv[4];
+	u8 iv[16];
 };
 
+/**
+ * ipsec_encap_ctr - PDB part for IPsec CTR encapsulation
+ * @ctr_nonce: 4-byte array nonce
+ * @ctr_initial: initial count constant
+ * @iv: initialization vector
+ */
 struct ipsec_encap_ctr {
-	u32 ctr_nonce;
+	u8 ctr_nonce[4];
 	u32 ctr_initial;
-	u32 iv[2];
+	u64 iv;
 };
 
+/**
+ * ipsec_encap_ccm - PDB part for IPsec CCM encapsulation
+ * @salt: 3-byte array salt (lower 24 bits)
+ * @ccm_opt: CCM algorithm options - MSB-LSB description:
+ *  b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV,
+ *    0x7B for 16-byte ICV (cf. RFC4309, RFC3610)
+ *  ctr_flags (8b) - counter flags; constant equal to 0x3
+ *  ctr_initial (16b) - initial count constant
+ * @iv: initialization vector
+ */
 struct ipsec_encap_ccm {
-	u32 salt; /* lower 24 bits */
-	u8 b0_flags;
-	u8 ctr_flags;
-	u16 ctr_initial;
-	u32 iv[2];
+	u8 salt[4];
+	u32 ccm_opt;
+	u64 iv;
 };
 
+/**
+ * ipsec_encap_gcm - PDB part for IPsec GCM encapsulation
+ * @salt: 3-byte array salt (lower 24 bits)
+ * @rsvd: reserved, do not use
+ * @iv: initialization vector
+ */
 struct ipsec_encap_gcm {
-	u32 salt; /* lower 24 bits */
+	u8 salt[4];
 	u32 rsvd1;
-	u32 iv[2];
+	u64 iv;
 };
 
+/**
+ * ipsec_encap_pdb - PDB for IPsec encapsulation
+ * @options: MSB-LSB description
+ *  hmo (header manipulation options) - 4b
+ *  reserved - 4b
+ *  next header - 8b
+ *  next header offset - 8b
+ *  option flags (depend on selected algorithm) - 8b
+ * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN)
+ * @seq_num: IPsec sequence number
+ * @spi: IPsec SPI (Security Parameters Index)
+ * @ip_hdr_len: optional IP Header length (in bytes)
+ *  reserved - 16b
+ *  Opt. IP Hdr Len - 16b
+ * @ip_hdr: optional IP Header content
+ */
 struct ipsec_encap_pdb {
-	u8 hmo_rsvd;
-	u8 ip_nh;
-	u8 ip_nh_offset;
-	u8 options;
+	u32 options;
 	u32 seq_num_ext_hi;
 	u32 seq_num;
 	union {
@@ -92,36 +142,65 @@ struct ipsec_encap_pdb {
 		struct ipsec_encap_gcm gcm;
 	};
 	u32 spi;
-	u16 rsvd1;
-	u16 ip_hdr_len;
-	u32 ip_hdr[0]; /* optional IP Header content */
+	u32 ip_hdr_len;
+	u32 ip_hdr[0];
 };
 
+/**
+ * ipsec_decap_cbc - PDB part for IPsec CBC decapsulation
+ * @rsvd: reserved, do not use
+ */
 struct ipsec_decap_cbc {
 	u32 rsvd[2];
 };
 
+/**
+ * ipsec_decap_ctr - PDB part for IPsec CTR decapsulation
+ * @ctr_nonce: 4-byte array nonce
+ * @ctr_initial: initial count constant
+ */
 struct ipsec_decap_ctr {
-	u32 salt;
+	u8 ctr_nonce[4];
 	u32 ctr_initial;
 };
 
+/**
+ * ipsec_decap_ccm - PDB part for IPsec CCM decapsulation
+ * @salt: 3-byte salt (lower 24 bits)
+ * @ccm_opt: CCM algorithm options - MSB-LSB description:
+ *  b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV,
+ *    0x7B for 16-byte ICV (cf. RFC4309, RFC3610)
+ *  ctr_flags (8b) - counter flags; constant equal to 0x3
+ *  ctr_initial (16b) - initial count constant
+ */
 struct ipsec_decap_ccm {
-	u32 salt;
-	u8 iv_flags;
-	u8 ctr_flags;
-	u16 ctr_initial;
+	u8 salt[4];
+	u32 ccm_opt;
 };
 
+/**
+ * ipsec_decap_gcm - PDB part for IPsec GCN decapsulation
+ * @salt: 4-byte salt
+ * @rsvd: reserved, do not use
+ */
 struct ipsec_decap_gcm {
-	u32 salt;
+	u8 salt[4];
 	u32 resvd;
 };
 
+/**
+ * ipsec_decap_pdb - PDB for IPsec decapsulation
+ * @options: MSB-LSB description
+ *  hmo (header manipulation options) - 4b
+ *  IP header length - 12b
+ *  next header offset - 8b
+ *  option flags (depend on selected algorithm) - 8b
+ * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN)
+ * @seq_num: IPsec sequence number
+ * @anti_replay: Anti-replay window; size depends on ARS (option flags)
+ */
 struct ipsec_decap_pdb {
-	u16 hmo_ip_hdr_len;
-	u8 ip_nh_offset;
-	u8 options;
+	u32 options;
 	union {
 		struct ipsec_decap_cbc cbc;
 		struct ipsec_decap_ctr ctr;
@@ -130,8 +209,7 @@ struct ipsec_decap_pdb {
 	};
 	u32 seq_num_ext_hi;
 	u32 seq_num;
-	u32 anti_replay[2];
-	u32 end_index[0];
+	__be32 anti_replay[4];
 };
 
 /*
@@ -399,4 +477,52 @@ struct dsa_verify_pdb {
 	u8 *ab; /* only used if ECC processing */
 };
 
+/* RSA Protocol Data Block */
+#define RSA_PDB_SGF_SHIFT       28
+#define RSA_PDB_E_SHIFT         12
+#define RSA_PDB_E_MASK          (0xFFF << RSA_PDB_E_SHIFT)
+#define RSA_PDB_D_SHIFT         12
+#define RSA_PDB_D_MASK          (0xFFF << RSA_PDB_D_SHIFT)
+
+#define RSA_PDB_SGF_F           (0x8 << RSA_PDB_SGF_SHIFT)
+#define RSA_PDB_SGF_G           (0x4 << RSA_PDB_SGF_SHIFT)
+#define RSA_PRIV_PDB_SGF_F      (0x4 << RSA_PDB_SGF_SHIFT)
+#define RSA_PRIV_PDB_SGF_G      (0x8 << RSA_PDB_SGF_SHIFT)
+
+#define RSA_PRIV_KEY_FRM_1      0
+
+/**
+ * RSA Encrypt Protocol Data Block
+ * @sgf: scatter-gather field
+ * @f_dma: dma address of input data
+ * @g_dma: dma address of encrypted output data
+ * @n_dma: dma address of RSA modulus
+ * @e_dma: dma address of RSA public exponent
+ * @f_len: length in octets of the input data
+ */
+struct rsa_pub_pdb {
+	u32		sgf;
+	dma_addr_t	f_dma;
+	dma_addr_t	g_dma;
+	dma_addr_t	n_dma;
+	dma_addr_t	e_dma;
+	u32		f_len;
+} __packed;
+
+/**
+ * RSA Decrypt PDB - Private Key Form #1
+ * @sgf: scatter-gather field
+ * @g_dma: dma address of encrypted input data
+ * @f_dma: dma address of output data
+ * @n_dma: dma address of RSA modulus
+ * @d_dma: dma address of RSA private exponent
+ */
+struct rsa_priv_f1_pdb {
+	u32		sgf;
+	dma_addr_t	g_dma;
+	dma_addr_t	f_dma;
+	dma_addr_t	n_dma;
+	dma_addr_t	d_dma;
+} __packed;
+
 #endif
diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c
new file mode 100644
index 000000000000..4e4183e615ea
--- /dev/null
+++ b/drivers/crypto/caam/pkc_desc.c
@@ -0,0 +1,36 @@
+/*
+ * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors
+ *
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ *
+ * There is no Shared Descriptor for PKC so that the Job Descriptor must carry
+ * all the desired key parameters, input and output pointers.
+ */
+#include "caampkc.h"
+#include "desc_constr.h"
+
+/* Descriptor for RSA Public operation */
+void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb)
+{
+	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	append_cmd(desc, pdb->sgf);
+	append_ptr(desc, pdb->f_dma);
+	append_ptr(desc, pdb->g_dma);
+	append_ptr(desc, pdb->n_dma);
+	append_ptr(desc, pdb->e_dma);
+	append_cmd(desc, pdb->f_len);
+	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSAENC_PUBKEY);
+}
+
+/* Descriptor for RSA Private operation - Private Key Form #1 */
+void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb)
+{
+	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	append_cmd(desc, pdb->sgf);
+	append_ptr(desc, pdb->g_dma);
+	append_ptr(desc, pdb->f_dma);
+	append_ptr(desc, pdb->n_dma);
+	append_ptr(desc, pdb->d_dma);
+	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY |
+			 RSA_PRIV_KEY_FRM_1);
+}
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 0ba9c40597dc..b3c5016f6458 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -8,6 +8,7 @@
 #define REGS_H
 
 #include <linux/types.h>
+#include <linux/bitops.h>
 #include <linux/io.h>
 
 /*
@@ -65,46 +66,56 @@
  *
  */
 
-#ifdef CONFIG_ARM
-/* These are common macros for Power, put here for ARM */
-#define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr))
-#define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr))
+extern bool caam_little_end;
 
-#define out_arch(type, endian, a, v)	__raw_write##type(cpu_to_##endian(v), a)
-#define in_arch(type, endian, a)	endian##_to_cpu(__raw_read##type(a))
+#define caam_to_cpu(len)				\
+static inline u##len caam##len ## _to_cpu(u##len val)	\
+{							\
+	if (caam_little_end)				\
+		return le##len ## _to_cpu(val);		\
+	else						\
+		return be##len ## _to_cpu(val);		\
+}
 
-#define out_le32(a, v)	out_arch(l, le32, a, v)
-#define in_le32(a)	in_arch(l, le32, a)
+#define cpu_to_caam(len)				\
+static inline u##len cpu_to_caam##len(u##len val)	\
+{							\
+	if (caam_little_end)				\
+		return cpu_to_le##len(val);		\
+	else						\
+		return cpu_to_be##len(val);		\
+}
 
-#define out_be32(a, v)	out_arch(l, be32, a, v)
-#define in_be32(a)	in_arch(l, be32, a)
+caam_to_cpu(16)
+caam_to_cpu(32)
+caam_to_cpu(64)
+cpu_to_caam(16)
+cpu_to_caam(32)
+cpu_to_caam(64)
 
-#define clrsetbits(type, addr, clear, set) \
-	out_##type((addr), (in_##type(addr) & ~(clear)) | (set))
+static inline void wr_reg32(void __iomem *reg, u32 data)
+{
+	if (caam_little_end)
+		iowrite32(data, reg);
+	else
+		iowrite32be(data, reg);
+}
 
-#define clrsetbits_be32(addr, clear, set) clrsetbits(be32, addr, clear, set)
-#define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set)
-#endif
+static inline u32 rd_reg32(void __iomem *reg)
+{
+	if (caam_little_end)
+		return ioread32(reg);
 
-#ifdef __BIG_ENDIAN
-#define wr_reg32(reg, data) out_be32(reg, data)
-#define rd_reg32(reg) in_be32(reg)
-#define clrsetbits_32(addr, clear, set) clrsetbits_be32(addr, clear, set)
-#ifdef CONFIG_64BIT
-#define wr_reg64(reg, data) out_be64(reg, data)
-#define rd_reg64(reg) in_be64(reg)
-#endif
-#else
-#ifdef __LITTLE_ENDIAN
-#define wr_reg32(reg, data) __raw_writel(data, reg)
-#define rd_reg32(reg) __raw_readl(reg)
-#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set)
-#ifdef CONFIG_64BIT
-#define wr_reg64(reg, data) __raw_writeq(data, reg)
-#define rd_reg64(reg) __raw_readq(reg)
-#endif
-#endif
-#endif
+	return ioread32be(reg);
+}
+
+static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set)
+{
+	if (caam_little_end)
+		iowrite32((ioread32(reg) & ~clear) | set, reg);
+	else
+		iowrite32be((ioread32be(reg) & ~clear) | set, reg);
+}
 
 /*
  * The only users of these wr/rd_reg64 functions is the Job Ring (JR).
@@ -123,29 +134,67 @@
  *    base + 0x0000 : least-significant 32 bits
  *    base + 0x0004 : most-significant 32 bits
  */
+#ifdef CONFIG_64BIT
+static inline void wr_reg64(void __iomem *reg, u64 data)
+{
+	if (caam_little_end)
+		iowrite64(data, reg);
+	else
+		iowrite64be(data, reg);
+}
 
-#ifndef CONFIG_64BIT
-#if !defined(CONFIG_CRYPTO_DEV_FSL_CAAM_LE) || \
-	defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
-#define REG64_MS32(reg) ((u32 __iomem *)(reg))
-#define REG64_LS32(reg) ((u32 __iomem *)(reg) + 1)
-#else
-#define REG64_MS32(reg) ((u32 __iomem *)(reg) + 1)
-#define REG64_LS32(reg) ((u32 __iomem *)(reg))
-#endif
-
-static inline void wr_reg64(u64 __iomem *reg, u64 data)
+static inline u64 rd_reg64(void __iomem *reg)
 {
-	wr_reg32(REG64_MS32(reg), data >> 32);
-	wr_reg32(REG64_LS32(reg), data);
+	if (caam_little_end)
+		return ioread64(reg);
+	else
+		return ioread64be(reg);
 }
 
-static inline u64 rd_reg64(u64 __iomem *reg)
+#else /* CONFIG_64BIT */
+static inline void wr_reg64(void __iomem *reg, u64 data)
 {
-	return ((u64)rd_reg32(REG64_MS32(reg)) << 32 |
-		(u64)rd_reg32(REG64_LS32(reg)));
+#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
+	if (caam_little_end) {
+		wr_reg32((u32 __iomem *)(reg) + 1, data >> 32);
+		wr_reg32((u32 __iomem *)(reg), data);
+	} else
+#endif
+	{
+		wr_reg32((u32 __iomem *)(reg), data >> 32);
+		wr_reg32((u32 __iomem *)(reg) + 1, data);
+	}
 }
+
+static inline u64 rd_reg64(void __iomem *reg)
+{
+#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
+	if (caam_little_end)
+		return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 |
+			(u64)rd_reg32((u32 __iomem *)(reg)));
+	else
 #endif
+		return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
+			(u64)rd_reg32((u32 __iomem *)(reg) + 1));
+}
+#endif /* CONFIG_64BIT  */
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#ifdef CONFIG_SOC_IMX7D
+#define cpu_to_caam_dma(value) \
+		(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
+		  (u64)cpu_to_caam32(upper_32_bits(value)))
+#define caam_dma_to_cpu(value) \
+		(((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \
+		  (u64)caam32_to_cpu(upper_32_bits(value)))
+#else
+#define cpu_to_caam_dma(value) cpu_to_caam64(value)
+#define caam_dma_to_cpu(value) caam64_to_cpu(value)
+#endif /* CONFIG_SOC_IMX7D */
+#else
+#define cpu_to_caam_dma(value) cpu_to_caam32(value)
+#define caam_dma_to_cpu(value) caam32_to_cpu(value)
+#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT  */
 
 /*
  * jr_outentry
@@ -249,6 +298,8 @@ struct caam_perfmon {
 	u32 faultliodn;	/* FALR - Fault Address LIODN	*/
 	u32 faultdetail;	/* FADR - Fault Addr Detail	*/
 	u32 rsvd2;
+#define CSTA_PLEND		BIT(10)
+#define CSTA_ALT_PLEND		BIT(18)
 	u32 status;		/* CSTA - CAAM Status */
 	u64 rsvd3;
 
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 12ec6616e89d..19dc64fede0d 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -5,18 +5,19 @@
  *
  */
 
+#include "regs.h"
+
 struct sec4_sg_entry;
 
 /*
  * convert single dma address to h/w link table format
  */
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
-				      dma_addr_t dma, u32 len, u32 offset)
+				      dma_addr_t dma, u32 len, u16 offset)
 {
-	sec4_sg_ptr->ptr = dma;
-	sec4_sg_ptr->len = len;
-	sec4_sg_ptr->buf_pool_id = 0;
-	sec4_sg_ptr->offset = offset;
+	sec4_sg_ptr->ptr = cpu_to_caam_dma(dma);
+	sec4_sg_ptr->len = cpu_to_caam32(len);
+	sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
@@ -30,7 +31,7 @@ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
  */
 static inline struct sec4_sg_entry *
 sg_to_sec4_sg(struct scatterlist *sg, int sg_count,
-	      struct sec4_sg_entry *sec4_sg_ptr, u32 offset)
+	      struct sec4_sg_entry *sec4_sg_ptr, u16 offset)
 {
 	while (sg_count) {
 		dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg),
@@ -48,10 +49,10 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count,
  */
 static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
 				      struct sec4_sg_entry *sec4_sg_ptr,
-				      u32 offset)
+				      u16 offset)
 {
 	sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
-	sec4_sg_ptr->len |= SEC4_SG_LEN_FIN;
+	sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
 }
 
 static inline struct sec4_sg_entry *sg_to_sec4_sg_len(
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 0d0d4529ee36..58a4244b4752 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -14,9 +14,8 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
-#include <linux/crypto.h>
-#include <crypto/algapi.h>
 #include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 
 #include "ccp-crypto.h"
@@ -110,15 +109,12 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	ctx->u.aes.key_len = key_len / 2;
 	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
 
-	return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key,
-					key_len);
+	return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len);
 }
 
 static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 			     unsigned int encrypt)
 {
-	struct crypto_tfm *tfm =
-		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
 	unsigned int unit;
@@ -146,14 +142,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 
 	if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) ||
 	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
+		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);
+
 		/* Use the fallback to process the request for any
 		 * unsupported unit sizes or key sizes
 		 */
-		ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher);
-		ret = (encrypt) ? crypto_ablkcipher_encrypt(req) :
-				  crypto_ablkcipher_decrypt(req);
-		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
-
+		skcipher_request_set_tfm(subreq, ctx->u.aes.tfm_skcipher);
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->nbytes, req->info);
+		ret = encrypt ? crypto_skcipher_encrypt(subreq) :
+				crypto_skcipher_decrypt(subreq);
+		skcipher_request_zero(subreq);
 		return ret;
 	}
 
@@ -192,23 +193,21 @@ static int ccp_aes_xts_decrypt(struct ablkcipher_request *req)
 static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm)
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_ablkcipher *fallback_tfm;
+	struct crypto_skcipher *fallback_tfm;
 
 	ctx->complete = ccp_aes_xts_complete;
 	ctx->u.aes.key_len = 0;
 
-	fallback_tfm = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), 0,
-					       CRYPTO_ALG_ASYNC |
-					       CRYPTO_ALG_NEED_FALLBACK);
+	fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0,
+					     CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback_tfm)) {
-		pr_warn("could not load fallback driver %s\n",
-			crypto_tfm_alg_name(tfm));
+		pr_warn("could not load fallback driver xts(aes)\n");
 		return PTR_ERR(fallback_tfm);
 	}
-	ctx->u.aes.tfm_ablkcipher = fallback_tfm;
+	ctx->u.aes.tfm_skcipher = fallback_tfm;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) +
-				      fallback_tfm->base.crt_ablkcipher.reqsize;
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
 
 	return 0;
 }
@@ -217,9 +216,7 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (ctx->u.aes.tfm_ablkcipher)
-		crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher);
-	ctx->u.aes.tfm_ablkcipher = NULL;
+	crypto_free_skcipher(ctx->u.aes.tfm_skcipher);
 }
 
 static int ccp_register_aes_xts_alg(struct list_head *head,
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index a326ec20bfa8..8335b32e815e 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -17,7 +17,6 @@
 #include <linux/wait.h>
 #include <linux/pci.h>
 #include <linux/ccp.h>
-#include <linux/crypto.h>
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
 #include <crypto/ctr.h>
@@ -69,7 +68,7 @@ static inline struct ccp_crypto_ahash_alg *
 /***** AES related defines *****/
 struct ccp_aes_ctx {
 	/* Fallback cipher for XTS with unsupported unit sizes */
-	struct crypto_ablkcipher *tfm_ablkcipher;
+	struct crypto_skcipher *tfm_skcipher;
 
 	/* Cipher used to generate CMAC K1/K2 keys */
 	struct crypto_cipher *tfm_cipher;
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index e8ef9fd24a16..e373cc6557c6 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -31,22 +31,42 @@
 
 #include "cesa.h"
 
+/* Limit of the crypto queue before reaching the backlog */
+#define CESA_CRYPTO_DEFAULT_MAX_QLEN 128
+
 static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA);
 module_param_named(allhwsupport, allhwsupport, int, 0444);
 MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if overlaps with the mv_cesa driver)");
 
 struct mv_cesa_dev *cesa_dev;
 
-static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine)
+struct crypto_async_request *
+mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine,
+			   struct crypto_async_request **backlog)
 {
-	struct crypto_async_request *req, *backlog;
+	struct crypto_async_request *req;
+
+	*backlog = crypto_get_backlog(&engine->queue);
+	req = crypto_dequeue_request(&engine->queue);
+
+	if (!req)
+		return NULL;
+
+	return req;
+}
+
+static void mv_cesa_rearm_engine(struct mv_cesa_engine *engine)
+{
+	struct crypto_async_request *req = NULL, *backlog = NULL;
 	struct mv_cesa_ctx *ctx;
 
-	spin_lock_bh(&cesa_dev->lock);
-	backlog = crypto_get_backlog(&cesa_dev->queue);
-	req = crypto_dequeue_request(&cesa_dev->queue);
-	engine->req = req;
-	spin_unlock_bh(&cesa_dev->lock);
+
+	spin_lock_bh(&engine->lock);
+	if (!engine->req) {
+		req = mv_cesa_dequeue_req_locked(engine, &backlog);
+		engine->req = req;
+	}
+	spin_unlock_bh(&engine->lock);
 
 	if (!req)
 		return;
@@ -55,8 +75,47 @@ static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine)
 		backlog->complete(backlog, -EINPROGRESS);
 
 	ctx = crypto_tfm_ctx(req->tfm);
-	ctx->ops->prepare(req, engine);
 	ctx->ops->step(req);
+
+	return;
+}
+
+static int mv_cesa_std_process(struct mv_cesa_engine *engine, u32 status)
+{
+	struct crypto_async_request *req;
+	struct mv_cesa_ctx *ctx;
+	int res;
+
+	req = engine->req;
+	ctx = crypto_tfm_ctx(req->tfm);
+	res = ctx->ops->process(req, status);
+
+	if (res == 0) {
+		ctx->ops->complete(req);
+		mv_cesa_engine_enqueue_complete_request(engine, req);
+	} else if (res == -EINPROGRESS) {
+		ctx->ops->step(req);
+	}
+
+	return res;
+}
+
+static int mv_cesa_int_process(struct mv_cesa_engine *engine, u32 status)
+{
+	if (engine->chain.first && engine->chain.last)
+		return mv_cesa_tdma_process(engine, status);
+
+	return mv_cesa_std_process(engine, status);
+}
+
+static inline void
+mv_cesa_complete_req(struct mv_cesa_ctx *ctx, struct crypto_async_request *req,
+		     int res)
+{
+	ctx->ops->cleanup(req);
+	local_bh_disable();
+	req->complete(req, res);
+	local_bh_enable();
 }
 
 static irqreturn_t mv_cesa_int(int irq, void *priv)
@@ -83,49 +142,54 @@ static irqreturn_t mv_cesa_int(int irq, void *priv)
 		writel(~status, engine->regs + CESA_SA_FPGA_INT_STATUS);
 		writel(~status, engine->regs + CESA_SA_INT_STATUS);
 
+		/* Process fetched requests */
+		res = mv_cesa_int_process(engine, status & mask);
 		ret = IRQ_HANDLED;
+
 		spin_lock_bh(&engine->lock);
 		req = engine->req;
+		if (res != -EINPROGRESS)
+			engine->req = NULL;
 		spin_unlock_bh(&engine->lock);
-		if (req) {
-			ctx = crypto_tfm_ctx(req->tfm);
-			res = ctx->ops->process(req, status & mask);
-			if (res != -EINPROGRESS) {
-				spin_lock_bh(&engine->lock);
-				engine->req = NULL;
-				mv_cesa_dequeue_req_unlocked(engine);
-				spin_unlock_bh(&engine->lock);
-				ctx->ops->cleanup(req);
-				local_bh_disable();
-				req->complete(req, res);
-				local_bh_enable();
-			} else {
-				ctx->ops->step(req);
-			}
+
+		ctx = crypto_tfm_ctx(req->tfm);
+
+		if (res && res != -EINPROGRESS)
+			mv_cesa_complete_req(ctx, req, res);
+
+		/* Launch the next pending request */
+		mv_cesa_rearm_engine(engine);
+
+		/* Iterate over the complete queue */
+		while (true) {
+			req = mv_cesa_engine_dequeue_complete_request(engine);
+			if (!req)
+				break;
+
+			mv_cesa_complete_req(ctx, req, 0);
 		}
 	}
 
 	return ret;
 }
 
-int mv_cesa_queue_req(struct crypto_async_request *req)
+int mv_cesa_queue_req(struct crypto_async_request *req,
+		      struct mv_cesa_req *creq)
 {
 	int ret;
-	int i;
+	struct mv_cesa_engine *engine = creq->engine;
+
+	spin_lock_bh(&engine->lock);
+	if (mv_cesa_req_get_type(creq) == CESA_DMA_REQ)
+		mv_cesa_tdma_chain(engine, creq);
 
-	spin_lock_bh(&cesa_dev->lock);
-	ret = crypto_enqueue_request(&cesa_dev->queue, req);
-	spin_unlock_bh(&cesa_dev->lock);
+	ret = crypto_enqueue_request(&engine->queue, req);
+	spin_unlock_bh(&engine->lock);
 
 	if (ret != -EINPROGRESS)
 		return ret;
 
-	for (i = 0; i < cesa_dev->caps->nengines; i++) {
-		spin_lock_bh(&cesa_dev->engines[i].lock);
-		if (!cesa_dev->engines[i].req)
-			mv_cesa_dequeue_req_unlocked(&cesa_dev->engines[i]);
-		spin_unlock_bh(&cesa_dev->engines[i].lock);
-	}
+	mv_cesa_rearm_engine(engine);
 
 	return -EINPROGRESS;
 }
@@ -309,6 +373,10 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
 	if (!dma->padding_pool)
 		return -ENOMEM;
 
+	dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
+	if (!dma->iv_pool)
+		return -ENOMEM;
+
 	cesa->dma = dma;
 
 	return 0;
@@ -416,7 +484,7 @@ static int mv_cesa_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	spin_lock_init(&cesa->lock);
-	crypto_init_queue(&cesa->queue, 50);
+
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
 	cesa->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(cesa->regs))
@@ -489,6 +557,10 @@ static int mv_cesa_probe(struct platform_device *pdev)
 						engine);
 		if (ret)
 			goto err_cleanup;
+
+		crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN);
+		atomic_set(&engine->load, 0);
+		INIT_LIST_HEAD(&engine->complete_queue);
 	}
 
 	cesa_dev = cesa;
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index 74071e45ada0..e423d33decd4 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -271,10 +271,13 @@ struct mv_cesa_op_ctx {
 /* TDMA descriptor flags */
 #define CESA_TDMA_DST_IN_SRAM			BIT(31)
 #define CESA_TDMA_SRC_IN_SRAM			BIT(30)
-#define CESA_TDMA_TYPE_MSK			GENMASK(29, 0)
+#define CESA_TDMA_END_OF_REQ			BIT(29)
+#define CESA_TDMA_BREAK_CHAIN			BIT(28)
+#define CESA_TDMA_TYPE_MSK			GENMASK(27, 0)
 #define CESA_TDMA_DUMMY				0
 #define CESA_TDMA_DATA				1
 #define CESA_TDMA_OP				2
+#define CESA_TDMA_IV				3
 
 /**
  * struct mv_cesa_tdma_desc - TDMA descriptor
@@ -390,6 +393,7 @@ struct mv_cesa_dev_dma {
 	struct dma_pool *op_pool;
 	struct dma_pool *cache_pool;
 	struct dma_pool *padding_pool;
+	struct dma_pool *iv_pool;
 };
 
 /**
@@ -398,7 +402,6 @@ struct mv_cesa_dev_dma {
  * @regs:	device registers
  * @sram_size:	usable SRAM size
  * @lock:	device lock
- * @queue:	crypto request queue
  * @engines:	array of engines
  * @dma:	dma pools
  *
@@ -410,7 +413,6 @@ struct mv_cesa_dev {
 	struct device *dev;
 	unsigned int sram_size;
 	spinlock_t lock;
-	struct crypto_queue queue;
 	struct mv_cesa_engine *engines;
 	struct mv_cesa_dev_dma *dma;
 };
@@ -429,6 +431,11 @@ struct mv_cesa_dev {
  * @int_mask:		interrupt mask cache
  * @pool:		memory pool pointing to the memory region reserved in
  *			SRAM
+ * @queue:		fifo of the pending crypto requests
+ * @load:		engine load counter, useful for load balancing
+ * @chain:		list of the current tdma descriptors being processed
+ * 			by this engine.
+ * @complete_queue:	fifo of the processed requests by the engine
  *
  * Structure storing CESA engine information.
  */
@@ -444,23 +451,27 @@ struct mv_cesa_engine {
 	size_t max_req_len;
 	u32 int_mask;
 	struct gen_pool *pool;
+	struct crypto_queue queue;
+	atomic_t load;
+	struct mv_cesa_tdma_chain chain;
+	struct list_head complete_queue;
 };
 
 /**
  * struct mv_cesa_req_ops - CESA request operations
- * @prepare:	prepare a request to be executed on the specified engine
  * @process:	process a request chunk result (should return 0 if the
  *		operation, -EINPROGRESS if it needs more steps or an error
  *		code)
  * @step:	launch the crypto operation on the next chunk
  * @cleanup:	cleanup the crypto request (release associated data)
+ * @complete:	complete the request, i.e copy result or context from sram when
+ * 		needed.
  */
 struct mv_cesa_req_ops {
-	void (*prepare)(struct crypto_async_request *req,
-			struct mv_cesa_engine *engine);
 	int (*process)(struct crypto_async_request *req, u32 status);
 	void (*step)(struct crypto_async_request *req);
 	void (*cleanup)(struct crypto_async_request *req);
+	void (*complete)(struct crypto_async_request *req);
 };
 
 /**
@@ -507,21 +518,11 @@ enum mv_cesa_req_type {
 
 /**
  * struct mv_cesa_req - CESA request
- * @type:	request type
  * @engine:	engine associated with this request
+ * @chain:	list of tdma descriptors associated  with this request
  */
 struct mv_cesa_req {
-	enum mv_cesa_req_type type;
 	struct mv_cesa_engine *engine;
-};
-
-/**
- * struct mv_cesa_tdma_req - CESA TDMA request
- * @base:	base information
- * @chain:	TDMA chain
- */
-struct mv_cesa_tdma_req {
-	struct mv_cesa_req base;
 	struct mv_cesa_tdma_chain chain;
 };
 
@@ -538,13 +539,11 @@ struct mv_cesa_sg_std_iter {
 
 /**
  * struct mv_cesa_ablkcipher_std_req - cipher standard request
- * @base:	base information
  * @op:		operation context
  * @offset:	current operation offset
  * @size:	size of the crypto operation
  */
 struct mv_cesa_ablkcipher_std_req {
-	struct mv_cesa_req base;
 	struct mv_cesa_op_ctx op;
 	unsigned int offset;
 	unsigned int size;
@@ -558,34 +557,27 @@ struct mv_cesa_ablkcipher_std_req {
  * @dst_nents:	number of entries in the dest sg list
  */
 struct mv_cesa_ablkcipher_req {
-	union {
-		struct mv_cesa_req base;
-		struct mv_cesa_tdma_req dma;
-		struct mv_cesa_ablkcipher_std_req std;
-	} req;
+	struct mv_cesa_req base;
+	struct mv_cesa_ablkcipher_std_req std;
 	int src_nents;
 	int dst_nents;
 };
 
 /**
  * struct mv_cesa_ahash_std_req - standard hash request
- * @base:	base information
  * @offset:	current operation offset
  */
 struct mv_cesa_ahash_std_req {
-	struct mv_cesa_req base;
 	unsigned int offset;
 };
 
 /**
  * struct mv_cesa_ahash_dma_req - DMA hash request
- * @base:		base information
  * @padding:		padding buffer
  * @padding_dma:	DMA address of the padding buffer
  * @cache_dma:		DMA address of the cache buffer
  */
 struct mv_cesa_ahash_dma_req {
-	struct mv_cesa_tdma_req base;
 	u8 *padding;
 	dma_addr_t padding_dma;
 	u8 *cache;
@@ -604,8 +596,8 @@ struct mv_cesa_ahash_dma_req {
  * @state:		hash state
  */
 struct mv_cesa_ahash_req {
+	struct mv_cesa_req base;
 	union {
-		struct mv_cesa_req base;
 		struct mv_cesa_ahash_dma_req dma;
 		struct mv_cesa_ahash_std_req std;
 	} req;
@@ -623,6 +615,35 @@ struct mv_cesa_ahash_req {
 
 extern struct mv_cesa_dev *cesa_dev;
 
+
+static inline void
+mv_cesa_engine_enqueue_complete_request(struct mv_cesa_engine *engine,
+					struct crypto_async_request *req)
+{
+	list_add_tail(&req->list, &engine->complete_queue);
+}
+
+static inline struct crypto_async_request *
+mv_cesa_engine_dequeue_complete_request(struct mv_cesa_engine *engine)
+{
+	struct crypto_async_request *req;
+
+	req = list_first_entry_or_null(&engine->complete_queue,
+				       struct crypto_async_request,
+				       list);
+	if (req)
+		list_del(&req->list);
+
+	return req;
+}
+
+
+static inline enum mv_cesa_req_type
+mv_cesa_req_get_type(struct mv_cesa_req *req)
+{
+	return req->chain.first ? CESA_DMA_REQ : CESA_STD_REQ;
+}
+
 static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op,
 					 u32 cfg, u32 mask)
 {
@@ -695,7 +716,32 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op)
 		CESA_SA_DESC_CFG_FIRST_FRAG;
 }
 
-int mv_cesa_queue_req(struct crypto_async_request *req);
+int mv_cesa_queue_req(struct crypto_async_request *req,
+		      struct mv_cesa_req *creq);
+
+struct crypto_async_request *
+mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine,
+			   struct crypto_async_request **backlog);
+
+static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight)
+{
+	int i;
+	u32 min_load = U32_MAX;
+	struct mv_cesa_engine *selected = NULL;
+
+	for (i = 0; i < cesa_dev->caps->nengines; i++) {
+		struct mv_cesa_engine *engine = cesa_dev->engines + i;
+		u32 load = atomic_read(&engine->load);
+		if (load < min_load) {
+			min_load = load;
+			selected = engine;
+		}
+	}
+
+	atomic_add(weight, &selected->load);
+
+	return selected;
+}
 
 /*
  * Helper function that indicates whether a crypto request needs to be
@@ -765,9 +811,9 @@ static inline bool mv_cesa_req_dma_iter_next_op(struct mv_cesa_dma_iter *iter)
 	return iter->op_len;
 }
 
-void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq);
+void mv_cesa_dma_step(struct mv_cesa_req *dreq);
 
-static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq,
+static inline int mv_cesa_dma_process(struct mv_cesa_req *dreq,
 				      u32 status)
 {
 	if (!(status & CESA_SA_INT_ACC0_IDMA_DONE))
@@ -779,10 +825,13 @@ static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq,
 	return 0;
 }
 
-void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq,
+void mv_cesa_dma_prepare(struct mv_cesa_req *dreq,
 			 struct mv_cesa_engine *engine);
+void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq);
+void mv_cesa_tdma_chain(struct mv_cesa_engine *engine,
+			struct mv_cesa_req *dreq);
+int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status);
 
-void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq);
 
 static inline void
 mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
@@ -790,6 +839,9 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
 	memset(chain, 0, sizeof(*chain));
 }
 
+int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+			  u32 size, u32 flags, gfp_t gfp_flags);
+
 struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
 					const struct mv_cesa_op_ctx *op_templ,
 					bool skip_ctx,
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index dcf1fceb9336..48df03a06066 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -70,25 +70,28 @@ mv_cesa_ablkcipher_dma_cleanup(struct ablkcipher_request *req)
 		dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents,
 			     DMA_BIDIRECTIONAL);
 	}
-	mv_cesa_dma_cleanup(&creq->req.dma);
+	mv_cesa_dma_cleanup(&creq->base);
 }
 
 static inline void mv_cesa_ablkcipher_cleanup(struct ablkcipher_request *req)
 {
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
 
-	if (creq->req.base.type == CESA_DMA_REQ)
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
 		mv_cesa_ablkcipher_dma_cleanup(req);
 }
 
 static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req)
 {
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
-	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
-	struct mv_cesa_engine *engine = sreq->base.engine;
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->std;
+	struct mv_cesa_engine *engine = creq->base.engine;
 	size_t  len = min_t(size_t, req->nbytes - sreq->offset,
 			    CESA_SA_SRAM_PAYLOAD_SIZE);
 
+	mv_cesa_adjust_op(engine, &sreq->op);
+	memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op));
+
 	len = sg_pcopy_to_buffer(req->src, creq->src_nents,
 				 engine->sram + CESA_SA_DATA_SRAM_OFFSET,
 				 len, sreq->offset);
@@ -106,6 +109,8 @@ static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req)
 
 	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
 	writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
+	BUG_ON(readl(engine->regs + CESA_SA_CMD) &
+	       CESA_SA_CMD_EN_CESA_SA_ACCL0);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
@@ -113,8 +118,8 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req,
 					  u32 status)
 {
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
-	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
-	struct mv_cesa_engine *engine = sreq->base.engine;
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->std;
+	struct mv_cesa_engine *engine = creq->base.engine;
 	size_t len;
 
 	len = sg_pcopy_from_buffer(req->dst, creq->dst_nents,
@@ -133,21 +138,19 @@ static int mv_cesa_ablkcipher_process(struct crypto_async_request *req,
 {
 	struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req);
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq);
-	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
-	struct mv_cesa_engine *engine = sreq->base.engine;
+	struct mv_cesa_req *basereq = &creq->base;
+	unsigned int ivsize;
 	int ret;
 
-	if (creq->req.base.type == CESA_DMA_REQ)
-		ret = mv_cesa_dma_process(&creq->req.dma, status);
-	else
-		ret = mv_cesa_ablkcipher_std_process(ablkreq, status);
+	if (mv_cesa_req_get_type(basereq) == CESA_STD_REQ)
+		return mv_cesa_ablkcipher_std_process(ablkreq, status);
 
+	ret = mv_cesa_dma_process(basereq, status);
 	if (ret)
 		return ret;
 
-	memcpy_fromio(ablkreq->info,
-		      engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
-		      crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)));
+	ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq));
+	memcpy_fromio(ablkreq->info, basereq->chain.last->data, ivsize);
 
 	return 0;
 }
@@ -157,8 +160,8 @@ static void mv_cesa_ablkcipher_step(struct crypto_async_request *req)
 	struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req);
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq);
 
-	if (creq->req.base.type == CESA_DMA_REQ)
-		mv_cesa_dma_step(&creq->req.dma);
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
+		mv_cesa_dma_step(&creq->base);
 	else
 		mv_cesa_ablkcipher_std_step(ablkreq);
 }
@@ -167,22 +170,19 @@ static inline void
 mv_cesa_ablkcipher_dma_prepare(struct ablkcipher_request *req)
 {
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
-	struct mv_cesa_tdma_req *dreq = &creq->req.dma;
+	struct mv_cesa_req *basereq = &creq->base;
 
-	mv_cesa_dma_prepare(dreq, dreq->base.engine);
+	mv_cesa_dma_prepare(basereq, basereq->engine);
 }
 
 static inline void
 mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req)
 {
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
-	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
-	struct mv_cesa_engine *engine = sreq->base.engine;
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->std;
 
 	sreq->size = 0;
 	sreq->offset = 0;
-	mv_cesa_adjust_op(engine, &sreq->op);
-	memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op));
 }
 
 static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req,
@@ -190,9 +190,9 @@ static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req,
 {
 	struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req);
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq);
-	creq->req.base.engine = engine;
+	creq->base.engine = engine;
 
-	if (creq->req.base.type == CESA_DMA_REQ)
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
 		mv_cesa_ablkcipher_dma_prepare(ablkreq);
 	else
 		mv_cesa_ablkcipher_std_prepare(ablkreq);
@@ -206,11 +206,34 @@ mv_cesa_ablkcipher_req_cleanup(struct crypto_async_request *req)
 	mv_cesa_ablkcipher_cleanup(ablkreq);
 }
 
+static void
+mv_cesa_ablkcipher_complete(struct crypto_async_request *req)
+{
+	struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req);
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq);
+	struct mv_cesa_engine *engine = creq->base.engine;
+	unsigned int ivsize;
+
+	atomic_sub(ablkreq->nbytes, &engine->load);
+	ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq));
+
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) {
+		struct mv_cesa_req *basereq;
+
+		basereq = &creq->base;
+		memcpy(ablkreq->info, basereq->chain.last->data, ivsize);
+	} else {
+		memcpy_fromio(ablkreq->info,
+			      engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
+			      ivsize);
+	}
+}
+
 static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = {
 	.step = mv_cesa_ablkcipher_step,
 	.process = mv_cesa_ablkcipher_process,
-	.prepare = mv_cesa_ablkcipher_prepare,
 	.cleanup = mv_cesa_ablkcipher_req_cleanup,
+	.complete = mv_cesa_ablkcipher_complete,
 };
 
 static int mv_cesa_ablkcipher_cra_init(struct crypto_tfm *tfm)
@@ -295,15 +318,15 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	struct mv_cesa_tdma_req *dreq = &creq->req.dma;
+	struct mv_cesa_req *basereq = &creq->base;
 	struct mv_cesa_ablkcipher_dma_iter iter;
 	struct mv_cesa_tdma_chain chain;
 	bool skip_ctx = false;
 	int ret;
+	unsigned int ivsize;
 
-	dreq->base.type = CESA_DMA_REQ;
-	dreq->chain.first = NULL;
-	dreq->chain.last = NULL;
+	basereq->chain.first = NULL;
+	basereq->chain.last = NULL;
 
 	if (req->src != req->dst) {
 		ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
@@ -358,12 +381,21 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
 
 	} while (mv_cesa_ablkcipher_req_iter_next_op(&iter));
 
-	dreq->chain = chain;
+	/* Add output data for IV */
+	ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
+	ret = mv_cesa_dma_add_iv_op(&chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
+				    ivsize, CESA_TDMA_SRC_IN_SRAM, flags);
+
+	if (ret)
+		goto err_free_tdma;
+
+	basereq->chain = chain;
+	basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
 
 	return 0;
 
 err_free_tdma:
-	mv_cesa_dma_cleanup(dreq);
+	mv_cesa_dma_cleanup(basereq);
 	if (req->dst != req->src)
 		dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents,
 			     DMA_FROM_DEVICE);
@@ -380,11 +412,13 @@ mv_cesa_ablkcipher_std_req_init(struct ablkcipher_request *req,
 				const struct mv_cesa_op_ctx *op_templ)
 {
 	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
-	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->std;
+	struct mv_cesa_req *basereq = &creq->base;
 
-	sreq->base.type = CESA_STD_REQ;
 	sreq->op = *op_templ;
 	sreq->skip_ctx = false;
+	basereq->chain.first = NULL;
+	basereq->chain.last = NULL;
 
 	return 0;
 }
@@ -414,7 +448,6 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req,
 	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_OP_CRYPT_ONLY,
 			      CESA_SA_DESC_CFG_OP_MSK);
 
-	/* TODO: add a threshold for DMA usage */
 	if (cesa_dev->caps->has_tdma)
 		ret = mv_cesa_ablkcipher_dma_req_init(req, tmpl);
 	else
@@ -423,28 +456,41 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req,
 	return ret;
 }
 
-static int mv_cesa_des_op(struct ablkcipher_request *req,
-			  struct mv_cesa_op_ctx *tmpl)
+static int mv_cesa_ablkcipher_queue_req(struct ablkcipher_request *req,
+					struct mv_cesa_op_ctx *tmpl)
 {
-	struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	int ret;
-
-	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES,
-			      CESA_SA_DESC_CFG_CRYPTM_MSK);
-
-	memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE);
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+	struct mv_cesa_engine *engine;
 
 	ret = mv_cesa_ablkcipher_req_init(req, tmpl);
 	if (ret)
 		return ret;
 
-	ret = mv_cesa_queue_req(&req->base);
+	engine = mv_cesa_select_engine(req->nbytes);
+	mv_cesa_ablkcipher_prepare(&req->base, engine);
+
+	ret = mv_cesa_queue_req(&req->base, &creq->base);
+
 	if (mv_cesa_req_needs_cleanup(&req->base, ret))
 		mv_cesa_ablkcipher_cleanup(req);
 
 	return ret;
 }
 
+static int mv_cesa_des_op(struct ablkcipher_request *req,
+			  struct mv_cesa_op_ctx *tmpl)
+{
+	struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+
+	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES,
+			      CESA_SA_DESC_CFG_CRYPTM_MSK);
+
+	memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE);
+
+	return mv_cesa_ablkcipher_queue_req(req, tmpl);
+}
+
 static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req)
 {
 	struct mv_cesa_op_ctx tmpl;
@@ -547,22 +593,13 @@ static int mv_cesa_des3_op(struct ablkcipher_request *req,
 			   struct mv_cesa_op_ctx *tmpl)
 {
 	struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	int ret;
 
 	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES,
 			      CESA_SA_DESC_CFG_CRYPTM_MSK);
 
 	memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE);
 
-	ret = mv_cesa_ablkcipher_req_init(req, tmpl);
-	if (ret)
-		return ret;
-
-	ret = mv_cesa_queue_req(&req->base);
-	if (mv_cesa_req_needs_cleanup(&req->base, ret))
-		mv_cesa_ablkcipher_cleanup(req);
-
-	return ret;
+	return mv_cesa_ablkcipher_queue_req(req, tmpl);
 }
 
 static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req)
@@ -673,7 +710,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req,
 			  struct mv_cesa_op_ctx *tmpl)
 {
 	struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	int ret, i;
+	int i;
 	u32 *key;
 	u32 cfg;
 
@@ -696,15 +733,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req,
 			      CESA_SA_DESC_CFG_CRYPTM_MSK |
 			      CESA_SA_DESC_CFG_AES_LEN_MSK);
 
-	ret = mv_cesa_ablkcipher_req_init(req, tmpl);
-	if (ret)
-		return ret;
-
-	ret = mv_cesa_queue_req(&req->base);
-	if (mv_cesa_req_needs_cleanup(&req->base, ret))
-		mv_cesa_ablkcipher_cleanup(req);
-
-	return ret;
+	return mv_cesa_ablkcipher_queue_req(req, tmpl);
 }
 
 static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req)
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 7a5058da9151..c35912b4fffb 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -103,14 +103,14 @@ static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req)
 
 	dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE);
 	mv_cesa_ahash_dma_free_cache(&creq->req.dma);
-	mv_cesa_dma_cleanup(&creq->req.dma.base);
+	mv_cesa_dma_cleanup(&creq->base);
 }
 
 static inline void mv_cesa_ahash_cleanup(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 
-	if (creq->req.base.type == CESA_DMA_REQ)
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
 		mv_cesa_ahash_dma_cleanup(req);
 }
 
@@ -118,7 +118,7 @@ static void mv_cesa_ahash_last_cleanup(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 
-	if (creq->req.base.type == CESA_DMA_REQ)
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
 		mv_cesa_ahash_dma_last_cleanup(req);
 }
 
@@ -157,11 +157,23 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_ahash_std_req *sreq = &creq->req.std;
-	struct mv_cesa_engine *engine = sreq->base.engine;
+	struct mv_cesa_engine *engine = creq->base.engine;
 	struct mv_cesa_op_ctx *op;
 	unsigned int new_cache_ptr = 0;
 	u32 frag_mode;
 	size_t  len;
+	unsigned int digsize;
+	int i;
+
+	mv_cesa_adjust_op(engine, &creq->op_tmpl);
+	memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl));
+
+	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
+	for (i = 0; i < digsize / 4; i++)
+		writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i));
+
+	mv_cesa_adjust_op(engine, &creq->op_tmpl);
+	memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl));
 
 	if (creq->cache_ptr)
 		memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET,
@@ -237,6 +249,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
 
 	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
 	writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
+	BUG_ON(readl(engine->regs + CESA_SA_CMD) &
+	       CESA_SA_CMD_EN_CESA_SA_ACCL0);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
@@ -254,20 +268,17 @@ static int mv_cesa_ahash_std_process(struct ahash_request *req, u32 status)
 static inline void mv_cesa_ahash_dma_prepare(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	struct mv_cesa_tdma_req *dreq = &creq->req.dma.base;
+	struct mv_cesa_req *basereq = &creq->base;
 
-	mv_cesa_dma_prepare(dreq, dreq->base.engine);
+	mv_cesa_dma_prepare(basereq, basereq->engine);
 }
 
 static void mv_cesa_ahash_std_prepare(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_ahash_std_req *sreq = &creq->req.std;
-	struct mv_cesa_engine *engine = sreq->base.engine;
 
 	sreq->offset = 0;
-	mv_cesa_adjust_op(engine, &creq->op_tmpl);
-	memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl));
 }
 
 static void mv_cesa_ahash_step(struct crypto_async_request *req)
@@ -275,8 +286,8 @@ static void mv_cesa_ahash_step(struct crypto_async_request *req)
 	struct ahash_request *ahashreq = ahash_request_cast(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
 
-	if (creq->req.base.type == CESA_DMA_REQ)
-		mv_cesa_dma_step(&creq->req.dma.base);
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
+		mv_cesa_dma_step(&creq->base);
 	else
 		mv_cesa_ahash_std_step(ahashreq);
 }
@@ -285,17 +296,20 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status)
 {
 	struct ahash_request *ahashreq = ahash_request_cast(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
-	struct mv_cesa_engine *engine = creq->req.base.engine;
-	unsigned int digsize;
-	int ret, i;
 
-	if (creq->req.base.type == CESA_DMA_REQ)
-		ret = mv_cesa_dma_process(&creq->req.dma.base, status);
-	else
-		ret = mv_cesa_ahash_std_process(ahashreq, status);
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
+		return mv_cesa_dma_process(&creq->base, status);
 
-	if (ret == -EINPROGRESS)
-		return ret;
+	return mv_cesa_ahash_std_process(ahashreq, status);
+}
+
+static void mv_cesa_ahash_complete(struct crypto_async_request *req)
+{
+	struct ahash_request *ahashreq = ahash_request_cast(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
+	struct mv_cesa_engine *engine = creq->base.engine;
+	unsigned int digsize;
+	int i;
 
 	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
 	for (i = 0; i < digsize / 4; i++)
@@ -325,7 +339,7 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status)
 		}
 	}
 
-	return ret;
+	atomic_sub(ahashreq->nbytes, &engine->load);
 }
 
 static void mv_cesa_ahash_prepare(struct crypto_async_request *req,
@@ -333,19 +347,13 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req,
 {
 	struct ahash_request *ahashreq = ahash_request_cast(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
-	unsigned int digsize;
-	int i;
 
-	creq->req.base.engine = engine;
+	creq->base.engine = engine;
 
-	if (creq->req.base.type == CESA_DMA_REQ)
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
 		mv_cesa_ahash_dma_prepare(ahashreq);
 	else
 		mv_cesa_ahash_std_prepare(ahashreq);
-
-	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
-	for (i = 0; i < digsize / 4; i++)
-		writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i));
 }
 
 static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req)
@@ -362,8 +370,8 @@ static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req)
 static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = {
 	.step = mv_cesa_ahash_step,
 	.process = mv_cesa_ahash_process,
-	.prepare = mv_cesa_ahash_prepare,
 	.cleanup = mv_cesa_ahash_req_cleanup,
+	.complete = mv_cesa_ahash_complete,
 };
 
 static int mv_cesa_ahash_init(struct ahash_request *req,
@@ -553,15 +561,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		      GFP_KERNEL : GFP_ATOMIC;
-	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
-	struct mv_cesa_tdma_req *dreq = &ahashdreq->base;
+	struct mv_cesa_req *basereq = &creq->base;
 	struct mv_cesa_ahash_dma_iter iter;
 	struct mv_cesa_op_ctx *op = NULL;
 	unsigned int frag_len;
 	int ret;
 
-	dreq->chain.first = NULL;
-	dreq->chain.last = NULL;
+	basereq->chain.first = NULL;
+	basereq->chain.last = NULL;
 
 	if (creq->src_nents) {
 		ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
@@ -572,14 +579,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 		}
 	}
 
-	mv_cesa_tdma_desc_iter_init(&dreq->chain);
+	mv_cesa_tdma_desc_iter_init(&basereq->chain);
 	mv_cesa_ahash_req_iter_init(&iter, req);
 
 	/*
 	 * Add the cache (left-over data from a previous block) first.
 	 * This will never overflow the SRAM size.
 	 */
-	ret = mv_cesa_ahash_dma_add_cache(&dreq->chain, &iter, creq, flags);
+	ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, &iter, creq, flags);
 	if (ret)
 		goto err_free_tdma;
 
@@ -590,7 +597,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 		 * data. We intentionally do not add the final op block.
 		 */
 		while (true) {
-			ret = mv_cesa_dma_add_op_transfers(&dreq->chain,
+			ret = mv_cesa_dma_add_op_transfers(&basereq->chain,
 							   &iter.base,
 							   &iter.src, flags);
 			if (ret)
@@ -601,7 +608,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 			if (!mv_cesa_ahash_req_iter_next_op(&iter))
 				break;
 
-			op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl,
+			op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl,
 						  frag_len, flags);
 			if (IS_ERR(op)) {
 				ret = PTR_ERR(op);
@@ -619,10 +626,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	 * operation, which depends whether this is the final request.
 	 */
 	if (creq->last_req)
-		op = mv_cesa_ahash_dma_last_req(&dreq->chain, &iter, creq,
+		op = mv_cesa_ahash_dma_last_req(&basereq->chain, &iter, creq,
 						frag_len, flags);
 	else if (frag_len)
-		op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl,
+		op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl,
 					  frag_len, flags);
 
 	if (IS_ERR(op)) {
@@ -632,7 +639,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 
 	if (op) {
 		/* Add dummy desc to wait for crypto operation end */
-		ret = mv_cesa_dma_add_dummy_end(&dreq->chain, flags);
+		ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
 		if (ret)
 			goto err_free_tdma;
 	}
@@ -643,10 +650,13 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	else
 		creq->cache_ptr = 0;
 
+	basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
+				       CESA_TDMA_BREAK_CHAIN);
+
 	return 0;
 
 err_free_tdma:
-	mv_cesa_dma_cleanup(dreq);
+	mv_cesa_dma_cleanup(basereq);
 	dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE);
 
 err:
@@ -660,11 +670,6 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	int ret;
 
-	if (cesa_dev->caps->has_tdma)
-		creq->req.base.type = CESA_DMA_REQ;
-	else
-		creq->req.base.type = CESA_STD_REQ;
-
 	creq->src_nents = sg_nents_for_len(req->src, req->nbytes);
 	if (creq->src_nents < 0) {
 		dev_err(cesa_dev->dev, "Invalid number of src SG");
@@ -678,19 +683,19 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
 	if (*cached)
 		return 0;
 
-	if (creq->req.base.type == CESA_DMA_REQ)
+	if (cesa_dev->caps->has_tdma)
 		ret = mv_cesa_ahash_dma_req_init(req);
 
 	return ret;
 }
 
-static int mv_cesa_ahash_update(struct ahash_request *req)
+static int mv_cesa_ahash_queue_req(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_engine *engine;
 	bool cached = false;
 	int ret;
 
-	creq->len += req->nbytes;
 	ret = mv_cesa_ahash_req_init(req, &cached);
 	if (ret)
 		return ret;
@@ -698,61 +703,48 @@ static int mv_cesa_ahash_update(struct ahash_request *req)
 	if (cached)
 		return 0;
 
-	ret = mv_cesa_queue_req(&req->base);
+	engine = mv_cesa_select_engine(req->nbytes);
+	mv_cesa_ahash_prepare(&req->base, engine);
+
+	ret = mv_cesa_queue_req(&req->base, &creq->base);
+
 	if (mv_cesa_req_needs_cleanup(&req->base, ret))
 		mv_cesa_ahash_cleanup(req);
 
 	return ret;
 }
 
+static int mv_cesa_ahash_update(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+
+	creq->len += req->nbytes;
+
+	return mv_cesa_ahash_queue_req(req);
+}
+
 static int mv_cesa_ahash_final(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl;
-	bool cached = false;
-	int ret;
 
 	mv_cesa_set_mac_op_total_len(tmpl, creq->len);
 	creq->last_req = true;
 	req->nbytes = 0;
 
-	ret = mv_cesa_ahash_req_init(req, &cached);
-	if (ret)
-		return ret;
-
-	if (cached)
-		return 0;
-
-	ret = mv_cesa_queue_req(&req->base);
-	if (mv_cesa_req_needs_cleanup(&req->base, ret))
-		mv_cesa_ahash_cleanup(req);
-
-	return ret;
+	return mv_cesa_ahash_queue_req(req);
 }
 
 static int mv_cesa_ahash_finup(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl;
-	bool cached = false;
-	int ret;
 
 	creq->len += req->nbytes;
 	mv_cesa_set_mac_op_total_len(tmpl, creq->len);
 	creq->last_req = true;
 
-	ret = mv_cesa_ahash_req_init(req, &cached);
-	if (ret)
-		return ret;
-
-	if (cached)
-		return 0;
-
-	ret = mv_cesa_queue_req(&req->base);
-	if (mv_cesa_req_needs_cleanup(&req->base, ret))
-		mv_cesa_ahash_cleanup(req);
-
-	return ret;
+	return mv_cesa_ahash_queue_req(req);
 }
 
 static int mv_cesa_ahash_export(struct ahash_request *req, void *hash,
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
index 0ad8f1ecf175..86a065bcc187 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -37,9 +37,9 @@ bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *iter,
 	return true;
 }
 
-void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq)
+void mv_cesa_dma_step(struct mv_cesa_req *dreq)
 {
-	struct mv_cesa_engine *engine = dreq->base.engine;
+	struct mv_cesa_engine *engine = dreq->engine;
 
 	writel_relaxed(0, engine->regs + CESA_SA_CFG);
 
@@ -53,19 +53,25 @@ void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq)
 		       engine->regs + CESA_SA_CFG);
 	writel_relaxed(dreq->chain.first->cur_dma,
 		       engine->regs + CESA_TDMA_NEXT_ADDR);
+	BUG_ON(readl(engine->regs + CESA_SA_CMD) &
+	       CESA_SA_CMD_EN_CESA_SA_ACCL0);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
-void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq)
+void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
 {
 	struct mv_cesa_tdma_desc *tdma;
 
 	for (tdma = dreq->chain.first; tdma;) {
 		struct mv_cesa_tdma_desc *old_tdma = tdma;
+		u32 type = tdma->flags & CESA_TDMA_TYPE_MSK;
 
-		if (tdma->flags & CESA_TDMA_OP)
+		if (type == CESA_TDMA_OP)
 			dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
 				      le32_to_cpu(tdma->src));
+		else if (type == CESA_TDMA_IV)
+			dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
+				      le32_to_cpu(tdma->dst));
 
 		tdma = tdma->next;
 		dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma,
@@ -76,7 +82,7 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq)
 	dreq->chain.last = NULL;
 }
 
-void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq,
+void mv_cesa_dma_prepare(struct mv_cesa_req *dreq,
 			 struct mv_cesa_engine *engine)
 {
 	struct mv_cesa_tdma_desc *tdma;
@@ -88,11 +94,97 @@ void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq,
 		if (tdma->flags & CESA_TDMA_SRC_IN_SRAM)
 			tdma->src = cpu_to_le32(tdma->src + engine->sram_dma);
 
-		if (tdma->flags & CESA_TDMA_OP)
+		if ((tdma->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_OP)
 			mv_cesa_adjust_op(engine, tdma->op);
 	}
 }
 
+void mv_cesa_tdma_chain(struct mv_cesa_engine *engine,
+			struct mv_cesa_req *dreq)
+{
+	if (engine->chain.first == NULL && engine->chain.last == NULL) {
+		engine->chain.first = dreq->chain.first;
+		engine->chain.last  = dreq->chain.last;
+	} else {
+		struct mv_cesa_tdma_desc *last;
+
+		last = engine->chain.last;
+		last->next = dreq->chain.first;
+		engine->chain.last = dreq->chain.last;
+
+		if (!(last->flags & CESA_TDMA_BREAK_CHAIN))
+			last->next_dma = dreq->chain.first->cur_dma;
+	}
+}
+
+int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status)
+{
+	struct crypto_async_request *req = NULL;
+	struct mv_cesa_tdma_desc *tdma = NULL, *next = NULL;
+	dma_addr_t tdma_cur;
+	int res = 0;
+
+	tdma_cur = readl(engine->regs + CESA_TDMA_CUR);
+
+	for (tdma = engine->chain.first; tdma; tdma = next) {
+		spin_lock_bh(&engine->lock);
+		next = tdma->next;
+		spin_unlock_bh(&engine->lock);
+
+		if (tdma->flags & CESA_TDMA_END_OF_REQ) {
+			struct crypto_async_request *backlog = NULL;
+			struct mv_cesa_ctx *ctx;
+			u32 current_status;
+
+			spin_lock_bh(&engine->lock);
+			/*
+			 * if req is NULL, this means we're processing the
+			 * request in engine->req.
+			 */
+			if (!req)
+				req = engine->req;
+			else
+				req = mv_cesa_dequeue_req_locked(engine,
+								 &backlog);
+
+			/* Re-chaining to the next request */
+			engine->chain.first = tdma->next;
+			tdma->next = NULL;
+
+			/* If this is the last request, clear the chain */
+			if (engine->chain.first == NULL)
+				engine->chain.last  = NULL;
+			spin_unlock_bh(&engine->lock);
+
+			ctx = crypto_tfm_ctx(req->tfm);
+			current_status = (tdma->cur_dma == tdma_cur) ?
+					  status : CESA_SA_INT_ACC0_IDMA_DONE;
+			res = ctx->ops->process(req, current_status);
+			ctx->ops->complete(req);
+
+			if (res == 0)
+				mv_cesa_engine_enqueue_complete_request(engine,
+									req);
+
+			if (backlog)
+				backlog->complete(backlog, -EINPROGRESS);
+		}
+
+		if (res || tdma->cur_dma == tdma_cur)
+			break;
+	}
+
+	/* Save the last request in error to engine->req, so that the core
+	 * knows which request was fautly */
+	if (res) {
+		spin_lock_bh(&engine->lock);
+		engine->req = req;
+		spin_unlock_bh(&engine->lock);
+	}
+
+	return res;
+}
+
 static struct mv_cesa_tdma_desc *
 mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
 {
@@ -117,6 +209,32 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
 	return new_tdma;
 }
 
+int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+			  u32 size, u32 flags, gfp_t gfp_flags)
+{
+
+	struct mv_cesa_tdma_desc *tdma;
+	u8 *iv;
+	dma_addr_t dma_handle;
+
+	tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
+	if (IS_ERR(tdma))
+		return PTR_ERR(tdma);
+
+	iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
+	if (!iv)
+		return -ENOMEM;
+
+	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
+	tdma->src = src;
+	tdma->dst = cpu_to_le32(dma_handle);
+	tdma->data = iv;
+
+	flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
+	tdma->flags = flags | CESA_TDMA_IV;
+	return 0;
+}
+
 struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
 					const struct mv_cesa_op_ctx *op_templ,
 					bool skip_ctx,
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 59ed54e464a9..625ee50fd78b 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -11,7 +11,6 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 
-#include <linux/crypto.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -25,6 +24,7 @@
 #include <crypto/aes.h>
 #include <crypto/sha.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
 
 #define DCP_MAX_CHANS	4
 #define DCP_BUF_SZ	PAGE_SIZE
@@ -84,7 +84,7 @@ struct dcp_async_ctx {
 	unsigned int			hot:1;
 
 	/* Crypto-specific context */
-	struct crypto_ablkcipher	*fallback;
+	struct crypto_skcipher		*fallback;
 	unsigned int			key_len;
 	uint8_t				key[AES_KEYSIZE_128];
 };
@@ -374,20 +374,22 @@ static int dcp_chan_thread_aes(void *data)
 
 static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc)
 {
-	struct crypto_tfm *tfm =
-		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
-	struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(
-		crypto_ablkcipher_reqtfm(req));
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 	int ret;
 
-	ablkcipher_request_set_tfm(req, ctx->fallback);
+	skcipher_request_set_tfm(subreq, ctx->fallback);
+	skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL);
+	skcipher_request_set_crypt(subreq, req->src, req->dst,
+				   req->nbytes, req->info);
 
 	if (enc)
-		ret = crypto_ablkcipher_encrypt(req);
+		ret = crypto_skcipher_encrypt(subreq);
 	else
-		ret = crypto_ablkcipher_decrypt(req);
+		ret = crypto_skcipher_decrypt(subreq);
 
-	ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
+	skcipher_request_zero(subreq);
 
 	return ret;
 }
@@ -453,28 +455,22 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 		return 0;
 	}
 
-	/* Check if the key size is supported by kernel at all. */
-	if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) {
-		tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
 	/*
 	 * If the requested AES key size is not supported by the hardware,
 	 * but is supported by in-kernel software implementation, we use
 	 * software fallback.
 	 */
-	actx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
-	actx->fallback->base.crt_flags |=
-		tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK;
+	crypto_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(actx->fallback,
+				  tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_ablkcipher_setkey(actx->fallback, key, len);
+	ret = crypto_skcipher_setkey(actx->fallback, key, len);
 	if (!ret)
 		return 0;
 
 	tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->base.crt_flags |=
-		actx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK;
+	tfm->base.crt_flags |= crypto_skcipher_get_flags(actx->fallback) &
+			       CRYPTO_TFM_RES_MASK;
 
 	return ret;
 }
@@ -484,9 +480,9 @@ static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm)
 	const char *name = crypto_tfm_alg_name(tfm);
 	const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK;
 	struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
-	struct crypto_ablkcipher *blk;
+	struct crypto_skcipher *blk;
 
-	blk = crypto_alloc_ablkcipher(name, 0, flags);
+	blk = crypto_alloc_skcipher(name, 0, flags);
 	if (IS_ERR(blk))
 		return PTR_ERR(blk);
 
@@ -499,8 +495,7 @@ static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm)
 {
 	struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
 
-	crypto_free_ablkcipher(actx->fallback);
-	actx->fallback = NULL;
+	crypto_free_skcipher(actx->fallback);
 }
 
 /*
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 0794f1cc0018..42f0f229f7f7 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -392,7 +392,7 @@ static void nx_of_update_msc(struct device   *dev,
 		     ((bytes_so_far + sizeof(struct msc_triplet)) <= lenp) &&
 		     i < msc->triplets;
 		     i++) {
-			if (msc->fc > NX_MAX_FC || msc->mode > NX_MAX_MODE) {
+			if (msc->fc >= NX_MAX_FC || msc->mode >= NX_MAX_MODE) {
 				dev_err(dev, "unknown function code/mode "
 					"combo: %d/%d (ignored)\n", msc->fc,
 					msc->mode);
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index ce174d3b842c..4ab53a604312 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -528,8 +528,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 
 	omap_aes_dma_stop(dd);
 
-	dmaengine_terminate_all(dd->dma_lch_in);
-	dmaengine_terminate_all(dd->dma_lch_out);
 
 	return 0;
 }
@@ -580,10 +578,12 @@ static int omap_aes_copy_sgs(struct omap_aes_dev *dd)
 	sg_init_table(&dd->in_sgl, 1);
 	sg_set_buf(&dd->in_sgl, buf_in, total);
 	dd->in_sg = &dd->in_sgl;
+	dd->in_sg_len = 1;
 
 	sg_init_table(&dd->out_sgl, 1);
 	sg_set_buf(&dd->out_sgl, buf_out, total);
 	dd->out_sg = &dd->out_sgl;
+	dd->out_sg_len = 1;
 
 	return 0;
 }
@@ -604,7 +604,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 			crypto_ablkcipher_reqtfm(req));
 	struct omap_aes_dev *dd = omap_aes_find_dev(ctx);
 	struct omap_aes_reqctx *rctx;
-	int len;
 
 	if (!dd)
 		return -ENODEV;
@@ -616,6 +615,14 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 	dd->in_sg = req->src;
 	dd->out_sg = req->dst;
 
+	dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total);
+	if (dd->in_sg_len < 0)
+		return dd->in_sg_len;
+
+	dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total);
+	if (dd->out_sg_len < 0)
+		return dd->out_sg_len;
+
 	if (omap_aes_check_aligned(dd->in_sg, dd->total) ||
 	    omap_aes_check_aligned(dd->out_sg, dd->total)) {
 		if (omap_aes_copy_sgs(dd))
@@ -625,11 +632,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 		dd->sgs_copied = 0;
 	}
 
-	len = ALIGN(dd->total, AES_BLOCK_SIZE);
-	dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, len);
-	dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, len);
-	BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0);
-
 	rctx = ablkcipher_request_ctx(req);
 	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
 	rctx->mode &= FLAGS_MODE_MASK;
@@ -1185,17 +1187,19 @@ static int omap_aes_probe(struct platform_device *pdev)
 	spin_unlock(&list_lock);
 
 	for (i = 0; i < dd->pdata->algs_info_size; i++) {
-		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
-			algp = &dd->pdata->algs_info[i].algs_list[j];
+		if (!dd->pdata->algs_info[i].registered) {
+			for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
+				algp = &dd->pdata->algs_info[i].algs_list[j];
 
-			pr_debug("reg alg: %s\n", algp->cra_name);
-			INIT_LIST_HEAD(&algp->cra_list);
+				pr_debug("reg alg: %s\n", algp->cra_name);
+				INIT_LIST_HEAD(&algp->cra_list);
 
-			err = crypto_register_alg(algp);
-			if (err)
-				goto err_algs;
+				err = crypto_register_alg(algp);
+				if (err)
+					goto err_algs;
 
-			dd->pdata->algs_info[i].registered++;
+				dd->pdata->algs_info[i].registered++;
+			}
 		}
 	}
 
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index 3eedb03111ba..5691434ffb2d 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -560,10 +560,12 @@ static int omap_des_copy_sgs(struct omap_des_dev *dd)
 	sg_init_table(&dd->in_sgl, 1);
 	sg_set_buf(&dd->in_sgl, buf_in, dd->total);
 	dd->in_sg = &dd->in_sgl;
+	dd->in_sg_len = 1;
 
 	sg_init_table(&dd->out_sgl, 1);
 	sg_set_buf(&dd->out_sgl, buf_out, dd->total);
 	dd->out_sg = &dd->out_sgl;
+	dd->out_sg_len = 1;
 
 	return 0;
 }
@@ -595,6 +597,14 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 	dd->in_sg = req->src;
 	dd->out_sg = req->dst;
 
+	dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total);
+	if (dd->in_sg_len < 0)
+		return dd->in_sg_len;
+
+	dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total);
+	if (dd->out_sg_len < 0)
+		return dd->out_sg_len;
+
 	if (omap_des_copy_needed(dd->in_sg) ||
 	    omap_des_copy_needed(dd->out_sg)) {
 		if (omap_des_copy_sgs(dd))
@@ -604,10 +614,6 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 		dd->sgs_copied = 0;
 	}
 
-	dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, dd->total);
-	dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, dd->total);
-	BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0);
-
 	rctx = ablkcipher_request_ctx(req);
 	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
 	rctx->mode &= FLAGS_MODE_MASK;
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 63464e86f2b1..7fe4eef12fe2 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -100,6 +100,8 @@
 
 #define DEFAULT_TIMEOUT_INTERVAL	HZ
 
+#define DEFAULT_AUTOSUSPEND_DELAY	1000
+
 /* mostly device flags */
 #define FLAGS_BUSY		0
 #define FLAGS_FINAL		1
@@ -173,7 +175,7 @@ struct omap_sham_ctx {
 	struct omap_sham_hmac_ctx base[0];
 };
 
-#define OMAP_SHAM_QUEUE_LENGTH	1
+#define OMAP_SHAM_QUEUE_LENGTH	10
 
 struct omap_sham_algs_info {
 	struct ahash_alg	*algs_list;
@@ -813,7 +815,6 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
 {
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 
-	dmaengine_terminate_all(dd->dma_lch);
 
 	if (ctx->flags & BIT(FLAGS_SG)) {
 		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
@@ -999,7 +1000,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
 	dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) |
 			BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY));
 
-	pm_runtime_put(dd->dev);
+	pm_runtime_mark_last_busy(dd->dev);
+	pm_runtime_put_autosuspend(dd->dev);
 
 	if (req->base.complete)
 		req->base.complete(&req->base, err);
@@ -1093,7 +1095,7 @@ static int omap_sham_update(struct ahash_request *req)
 	ctx->offset = 0;
 
 	if (ctx->flags & BIT(FLAGS_FINUP)) {
-		if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) {
+		if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 240) {
 			/*
 			* OMAP HW accel works only with buffers >= 9
 			* will switch to bypass in final()
@@ -1149,9 +1151,13 @@ static int omap_sham_final(struct ahash_request *req)
 	if (ctx->flags & BIT(FLAGS_ERROR))
 		return 0; /* uncompleted hash is not needed */
 
-	/* OMAP HW accel works only with buffers >= 9 */
-	/* HMAC is always >= 9 because ipad == block size */
-	if ((ctx->digcnt + ctx->bufcnt) < 9)
+	/*
+	 * OMAP HW accel works only with buffers >= 9.
+	 * HMAC is always >= 9 because ipad == block size.
+	 * If buffersize is less than 240, we use fallback SW encoding,
+	 * as using DMA + HW in this case doesn't provide any benefit.
+	 */
+	if ((ctx->digcnt + ctx->bufcnt) < 240)
 		return omap_sham_final_shash(req);
 	else if (ctx->bufcnt)
 		return omap_sham_enqueue(req, OP_FINAL);
@@ -1328,7 +1334,7 @@ static struct ahash_alg algs_sha1_md5[] = {
 	.halg.base	= {
 		.cra_name		= "sha1",
 		.cra_driver_name	= "omap-sha1",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_KERN_DRIVER_ONLY |
 						CRYPTO_ALG_ASYNC |
@@ -1351,7 +1357,7 @@ static struct ahash_alg algs_sha1_md5[] = {
 	.halg.base	= {
 		.cra_name		= "md5",
 		.cra_driver_name	= "omap-md5",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_KERN_DRIVER_ONLY |
 						CRYPTO_ALG_ASYNC |
@@ -1375,7 +1381,7 @@ static struct ahash_alg algs_sha1_md5[] = {
 	.halg.base	= {
 		.cra_name		= "hmac(sha1)",
 		.cra_driver_name	= "omap-hmac-sha1",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_KERN_DRIVER_ONLY |
 						CRYPTO_ALG_ASYNC |
@@ -1400,7 +1406,7 @@ static struct ahash_alg algs_sha1_md5[] = {
 	.halg.base	= {
 		.cra_name		= "hmac(md5)",
 		.cra_driver_name	= "omap-hmac-md5",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_KERN_DRIVER_ONLY |
 						CRYPTO_ALG_ASYNC |
@@ -1428,7 +1434,7 @@ static struct ahash_alg algs_sha224_sha256[] = {
 	.halg.base	= {
 		.cra_name		= "sha224",
 		.cra_driver_name	= "omap-sha224",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_ASYNC |
 						CRYPTO_ALG_NEED_FALLBACK,
@@ -1450,7 +1456,7 @@ static struct ahash_alg algs_sha224_sha256[] = {
 	.halg.base	= {
 		.cra_name		= "sha256",
 		.cra_driver_name	= "omap-sha256",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_ASYNC |
 						CRYPTO_ALG_NEED_FALLBACK,
@@ -1473,7 +1479,7 @@ static struct ahash_alg algs_sha224_sha256[] = {
 	.halg.base	= {
 		.cra_name		= "hmac(sha224)",
 		.cra_driver_name	= "omap-hmac-sha224",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_ASYNC |
 						CRYPTO_ALG_NEED_FALLBACK,
@@ -1497,7 +1503,7 @@ static struct ahash_alg algs_sha224_sha256[] = {
 	.halg.base	= {
 		.cra_name		= "hmac(sha256)",
 		.cra_driver_name	= "omap-hmac-sha256",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_ASYNC |
 						CRYPTO_ALG_NEED_FALLBACK,
@@ -1523,7 +1529,7 @@ static struct ahash_alg algs_sha384_sha512[] = {
 	.halg.base	= {
 		.cra_name		= "sha384",
 		.cra_driver_name	= "omap-sha384",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_ASYNC |
 						CRYPTO_ALG_NEED_FALLBACK,
@@ -1545,7 +1551,7 @@ static struct ahash_alg algs_sha384_sha512[] = {
 	.halg.base	= {
 		.cra_name		= "sha512",
 		.cra_driver_name	= "omap-sha512",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_ASYNC |
 						CRYPTO_ALG_NEED_FALLBACK,
@@ -1568,7 +1574,7 @@ static struct ahash_alg algs_sha384_sha512[] = {
 	.halg.base	= {
 		.cra_name		= "hmac(sha384)",
 		.cra_driver_name	= "omap-hmac-sha384",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_ASYNC |
 						CRYPTO_ALG_NEED_FALLBACK,
@@ -1592,7 +1598,7 @@ static struct ahash_alg algs_sha384_sha512[] = {
 	.halg.base	= {
 		.cra_name		= "hmac(sha512)",
 		.cra_driver_name	= "omap-hmac-sha512",
-		.cra_priority		= 100,
+		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
 						CRYPTO_ALG_ASYNC |
 						CRYPTO_ALG_NEED_FALLBACK,
@@ -1946,6 +1952,9 @@ static int omap_sham_probe(struct platform_device *pdev)
 
 	dd->flags |= dd->pdata->flags;
 
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY);
+
 	pm_runtime_enable(dev);
 	pm_runtime_irq_safe(dev);
 
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 3b1c7ecf078f..47576098831f 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -171,7 +171,7 @@ struct spacc_ablk_ctx {
 	 * The fallback cipher. If the operation can't be done in hardware,
 	 * fallback to a software version.
 	 */
-	struct crypto_ablkcipher	*sw_cipher;
+	struct crypto_skcipher		*sw_cipher;
 };
 
 /* AEAD cipher context. */
@@ -789,33 +789,35 @@ static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	 * request for any other size (192 bits) then we need to do a software
 	 * fallback.
 	 */
-	if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 &&
-	    ctx->sw_cipher) {
+	if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256) {
+		if (!ctx->sw_cipher)
+			return -EINVAL;
+
 		/*
 		 * Set the fallback transform to use the same request flags as
 		 * the hardware transform.
 		 */
-		ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
-		ctx->sw_cipher->base.crt_flags |=
-			cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK;
+		crypto_skcipher_clear_flags(ctx->sw_cipher,
+					    CRYPTO_TFM_REQ_MASK);
+		crypto_skcipher_set_flags(ctx->sw_cipher,
+					  cipher->base.crt_flags &
+					  CRYPTO_TFM_REQ_MASK);
+
+		err = crypto_skcipher_setkey(ctx->sw_cipher, key, len);
+
+		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+		tfm->crt_flags |=
+			crypto_skcipher_get_flags(ctx->sw_cipher) &
+			CRYPTO_TFM_RES_MASK;
 
-		err = crypto_ablkcipher_setkey(ctx->sw_cipher, key, len);
 		if (err)
 			goto sw_setkey_failed;
-	} else if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 &&
-		   !ctx->sw_cipher)
-		err = -EINVAL;
+	}
 
 	memcpy(ctx->key, key, len);
 	ctx->key_len = len;
 
 sw_setkey_failed:
-	if (err && ctx->sw_cipher) {
-		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm->crt_flags |=
-			ctx->sw_cipher->base.crt_flags & CRYPTO_TFM_RES_MASK;
-	}
-
 	return err;
 }
 
@@ -910,20 +912,21 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req,
 	struct crypto_tfm *old_tfm =
 	    crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
 	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm);
+	SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher);
 	int err;
 
-	if (!ctx->sw_cipher)
-		return -EINVAL;
-
 	/*
 	 * Change the request to use the software fallback transform, and once
 	 * the ciphering has completed, put the old transform back into the
 	 * request.
 	 */
-	ablkcipher_request_set_tfm(req, ctx->sw_cipher);
-	err = is_encrypt ? crypto_ablkcipher_encrypt(req) :
-		crypto_ablkcipher_decrypt(req);
-	ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(old_tfm));
+	skcipher_request_set_tfm(subreq, ctx->sw_cipher);
+	skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL);
+	skcipher_request_set_crypt(subreq, req->src, req->dst,
+				   req->nbytes, req->info);
+	err = is_encrypt ? crypto_skcipher_encrypt(subreq) :
+			   crypto_skcipher_decrypt(subreq);
+	skcipher_request_zero(subreq);
 
 	return err;
 }
@@ -1015,12 +1018,13 @@ static int spacc_ablk_cra_init(struct crypto_tfm *tfm)
 	ctx->generic.flags = spacc_alg->type;
 	ctx->generic.engine = engine;
 	if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
-		ctx->sw_cipher = crypto_alloc_ablkcipher(alg->cra_name, 0,
-				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+		ctx->sw_cipher = crypto_alloc_skcipher(
+			alg->cra_name, 0, CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK);
 		if (IS_ERR(ctx->sw_cipher)) {
 			dev_warn(engine->dev, "failed to allocate fallback for %s\n",
 				 alg->cra_name);
-			ctx->sw_cipher = NULL;
+			return PTR_ERR(ctx->sw_cipher);
 		}
 	}
 	ctx->generic.key_offs = spacc_alg->key_offs;
@@ -1035,9 +1039,7 @@ static void spacc_ablk_cra_exit(struct crypto_tfm *tfm)
 {
 	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (ctx->sw_cipher)
-		crypto_free_ablkcipher(ctx->sw_cipher);
-	ctx->sw_cipher = NULL;
+	crypto_free_skcipher(ctx->sw_cipher);
 }
 
 static int spacc_ablk_encrypt(struct ablkcipher_request *req)
diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig
index 85b44e577684..ce3cae40f949 100644
--- a/drivers/crypto/qat/Kconfig
+++ b/drivers/crypto/qat/Kconfig
@@ -4,12 +4,13 @@ config CRYPTO_DEV_QAT
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AKCIPHER
+	select CRYPTO_DH
 	select CRYPTO_HMAC
+	select CRYPTO_RSA
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
 	select FW_LOADER
-	select ASN1
 
 config CRYPTO_DEV_QAT_DH895xCC
 	tristate "Support for Intel(R) DH895xCC"
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index c5bd5a9abc4d..6bc68bc00d76 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -229,6 +229,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
 	hw_data->get_arb_mapping = adf_get_arbiter_mapping;
 	hw_data->enable_ints = adf_enable_ints;
 	hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
+	hw_data->reset_device = adf_reset_flr;
 	hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
 }
 
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
index 879e04cae714..618cec360b39 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
@@ -239,6 +239,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
 	hw_data->get_arb_mapping = adf_get_arbiter_mapping;
 	hw_data->enable_ints = adf_enable_ints;
 	hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
+	hw_data->reset_device = adf_reset_flr;
 	hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
 }
 
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index 6d74b91f2152..92fb6ffdc062 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -1,11 +1,3 @@
-$(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \
-			     $(obj)/qat_rsapubkey-asn1.h
-$(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \
-			      $(obj)/qat_rsaprivkey-asn1.h
-
-clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h
-clean-files += qat_rsaprivkey-asn1.c qat_rsaprivkey-asn1.h
-
 obj-$(CONFIG_CRYPTO_DEV_QAT) += intel_qat.o
 intel_qat-objs := adf_cfg.o \
 	adf_isr.o \
@@ -19,8 +11,6 @@ intel_qat-objs := adf_cfg.o \
 	adf_hw_arbiter.o \
 	qat_crypto.o \
 	qat_algs.o \
-	qat_rsapubkey-asn1.o \
-	qat_rsaprivkey-asn1.o \
 	qat_asym_algs.o \
 	qat_uclo.o \
 	qat_hal.o
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index 5a07208ce778..e8822536530b 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -176,6 +176,7 @@ struct adf_hw_device_data {
 	void (*disable_iov)(struct adf_accel_dev *accel_dev);
 	void (*enable_ints)(struct adf_accel_dev *accel_dev);
 	int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev);
+	void (*reset_device)(struct adf_accel_dev *accel_dev);
 	const char *fw_name;
 	const char *fw_mmp_name;
 	uint32_t fuses;
diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c
index b40d9c8dad96..2839fccdd84b 100644
--- a/drivers/crypto/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/qat/qat_common/adf_aer.c
@@ -82,18 +82,12 @@ struct adf_reset_dev_data {
 	struct work_struct reset_work;
 };
 
-void adf_dev_restore(struct adf_accel_dev *accel_dev)
+void adf_reset_sbr(struct adf_accel_dev *accel_dev)
 {
 	struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
 	struct pci_dev *parent = pdev->bus->self;
 	uint16_t bridge_ctl = 0;
 
-	if (accel_dev->is_vf)
-		return;
-
-	dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
-		 accel_dev->accel_id);
-
 	if (!parent)
 		parent = pdev;
 
@@ -101,6 +95,8 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev)
 		dev_info(&GET_DEV(accel_dev),
 			 "Transaction still in progress. Proceeding\n");
 
+	dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n");
+
 	pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl);
 	bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET;
 	pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
@@ -108,8 +104,40 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev)
 	bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
 	pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
 	msleep(100);
-	pci_restore_state(pdev);
-	pci_save_state(pdev);
+}
+EXPORT_SYMBOL_GPL(adf_reset_sbr);
+
+void adf_reset_flr(struct adf_accel_dev *accel_dev)
+{
+	struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
+	u16 control = 0;
+	int pos = 0;
+
+	dev_info(&GET_DEV(accel_dev), "Function level reset\n");
+	pos = pci_pcie_cap(pdev);
+	if (!pos) {
+		dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
+		return;
+	}
+	pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &control);
+	control |= PCI_EXP_DEVCTL_BCR_FLR;
+	pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, control);
+	msleep(100);
+}
+EXPORT_SYMBOL_GPL(adf_reset_flr);
+
+void adf_dev_restore(struct adf_accel_dev *accel_dev)
+{
+	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+	struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
+
+	if (hw_device->reset_device) {
+		dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
+			 accel_dev->accel_id);
+		hw_device->reset_device(accel_dev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+	}
 }
 
 static void adf_device_reset_worker(struct work_struct *work)
@@ -243,7 +271,8 @@ EXPORT_SYMBOL_GPL(adf_disable_aer);
 
 int adf_init_aer(void)
 {
-	device_reset_wq = create_workqueue("qat_device_reset_wq");
+	device_reset_wq = alloc_workqueue("qat_device_reset_wq",
+					  WQ_MEM_RECLAIM, 0);
 	return !device_reset_wq ? -EFAULT : 0;
 }
 
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 75faa39bc8d0..980e07475012 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -141,6 +141,8 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev);
 
 int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf);
 void adf_disable_aer(struct adf_accel_dev *accel_dev);
+void adf_reset_sbr(struct adf_accel_dev *accel_dev);
+void adf_reset_flr(struct adf_accel_dev *accel_dev);
 void adf_dev_restore(struct adf_accel_dev *accel_dev);
 int adf_init_aer(void);
 void adf_exit_aer(void);
diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c
index 4a526e2f1d7f..9320ae1d005b 100644
--- a/drivers/crypto/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/qat/qat_common/adf_sriov.c
@@ -292,7 +292,7 @@ EXPORT_SYMBOL_GPL(adf_sriov_configure);
 int __init adf_init_pf_wq(void)
 {
 	/* Workqueue for PF2VF responses */
-	pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq");
+	pf2vf_resp_wq = alloc_workqueue("qat_pf2vf_resp_wq", WQ_MEM_RECLAIM, 0);
 
 	return !pf2vf_resp_wq ? -ENOMEM : 0;
 }
diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c
index aa689cabedb4..bf99e11a3403 100644
--- a/drivers/crypto/qat/qat_common/adf_vf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c
@@ -321,7 +321,7 @@ EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc);
 
 int __init adf_init_vf_wq(void)
 {
-	adf_vf_stop_wq = create_workqueue("adf_vf_stop_wq");
+	adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0);
 
 	return !adf_vf_stop_wq ? -EFAULT : 0;
 }
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 1e8852a8a057..769148dbaeb3 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -947,13 +947,13 @@ static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
 	return 0;
 
 out_free_all:
-	memset(ctx->dec_cd, 0, sizeof(*ctx->enc_cd));
-	dma_free_coherent(dev, sizeof(*ctx->enc_cd),
+	memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd));
+	dma_free_coherent(dev, sizeof(*ctx->dec_cd),
 			  ctx->dec_cd, ctx->dec_cd_paddr);
 	ctx->dec_cd = NULL;
 out_free_enc:
-	memset(ctx->enc_cd, 0, sizeof(*ctx->dec_cd));
-	dma_free_coherent(dev, sizeof(*ctx->dec_cd),
+	memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd));
+	dma_free_coherent(dev, sizeof(*ctx->enc_cd),
 			  ctx->enc_cd, ctx->enc_cd_paddr);
 	ctx->enc_cd = NULL;
 	return -ENOMEM;
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 05f49d4f94b2..0d35dca2e925 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -49,11 +49,12 @@
 #include <crypto/internal/rsa.h>
 #include <crypto/internal/akcipher.h>
 #include <crypto/akcipher.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/dh.h>
 #include <linux/dma-mapping.h>
 #include <linux/fips.h>
 #include <crypto/scatterwalk.h>
-#include "qat_rsapubkey-asn1.h"
-#include "qat_rsaprivkey-asn1.h"
 #include "icp_qat_fw_pke.h"
 #include "adf_accel_devices.h"
 #include "adf_transport.h"
@@ -75,6 +76,14 @@ struct qat_rsa_input_params {
 			dma_addr_t d;
 			dma_addr_t n;
 		} dec;
+		struct {
+			dma_addr_t c;
+			dma_addr_t p;
+			dma_addr_t q;
+			dma_addr_t dp;
+			dma_addr_t dq;
+			dma_addr_t qinv;
+		} dec_crt;
 		u64 in_tab[8];
 	};
 } __packed __aligned(64);
@@ -95,71 +104,480 @@ struct qat_rsa_ctx {
 	char *n;
 	char *e;
 	char *d;
+	char *p;
+	char *q;
+	char *dp;
+	char *dq;
+	char *qinv;
 	dma_addr_t dma_n;
 	dma_addr_t dma_e;
 	dma_addr_t dma_d;
+	dma_addr_t dma_p;
+	dma_addr_t dma_q;
+	dma_addr_t dma_dp;
+	dma_addr_t dma_dq;
+	dma_addr_t dma_qinv;
 	unsigned int key_sz;
+	bool crt_mode;
+	struct qat_crypto_instance *inst;
+} __packed __aligned(64);
+
+struct qat_dh_input_params {
+	union {
+		struct {
+			dma_addr_t b;
+			dma_addr_t xa;
+			dma_addr_t p;
+		} in;
+		struct {
+			dma_addr_t xa;
+			dma_addr_t p;
+		} in_g2;
+		u64 in_tab[8];
+	};
+} __packed __aligned(64);
+
+struct qat_dh_output_params {
+	union {
+		dma_addr_t r;
+		u64 out_tab[8];
+	};
+} __packed __aligned(64);
+
+struct qat_dh_ctx {
+	char *g;
+	char *xa;
+	char *p;
+	dma_addr_t dma_g;
+	dma_addr_t dma_xa;
+	dma_addr_t dma_p;
+	unsigned int p_size;
+	bool g2;
 	struct qat_crypto_instance *inst;
 } __packed __aligned(64);
 
-struct qat_rsa_request {
-	struct qat_rsa_input_params in;
-	struct qat_rsa_output_params out;
+struct qat_asym_request {
+	union {
+		struct qat_rsa_input_params rsa;
+		struct qat_dh_input_params dh;
+	} in;
+	union {
+		struct qat_rsa_output_params rsa;
+		struct qat_dh_output_params dh;
+	} out;
 	dma_addr_t phy_in;
 	dma_addr_t phy_out;
 	char *src_align;
 	char *dst_align;
 	struct icp_qat_fw_pke_request req;
-	struct qat_rsa_ctx *ctx;
+	union {
+		struct qat_rsa_ctx *rsa;
+		struct qat_dh_ctx *dh;
+	} ctx;
+	union {
+		struct akcipher_request *rsa;
+		struct kpp_request *dh;
+	} areq;
 	int err;
+	void (*cb)(struct icp_qat_fw_pke_resp *resp);
 } __aligned(64);
 
-static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp)
+static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp)
 {
-	struct akcipher_request *areq = (void *)(__force long)resp->opaque;
-	struct qat_rsa_request *req = PTR_ALIGN(akcipher_request_ctx(areq), 64);
-	struct device *dev = &GET_DEV(req->ctx->inst->accel_dev);
+	struct qat_asym_request *req = (void *)(__force long)resp->opaque;
+	struct kpp_request *areq = req->areq.dh;
+	struct device *dev = &GET_DEV(req->ctx.dh->inst->accel_dev);
 	int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET(
 				resp->pke_resp_hdr.comn_resp_flags);
 
 	err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL;
 
-	if (req->src_align)
-		dma_free_coherent(dev, req->ctx->key_sz, req->src_align,
-				  req->in.enc.m);
-	else
-		dma_unmap_single(dev, req->in.enc.m, req->ctx->key_sz,
-				 DMA_TO_DEVICE);
+	if (areq->src) {
+		if (req->src_align)
+			dma_free_coherent(dev, req->ctx.dh->p_size,
+					  req->src_align, req->in.dh.in.b);
+		else
+			dma_unmap_single(dev, req->in.dh.in.b,
+					 req->ctx.dh->p_size, DMA_TO_DEVICE);
+	}
 
-	areq->dst_len = req->ctx->key_sz;
+	areq->dst_len = req->ctx.dh->p_size;
 	if (req->dst_align) {
-		char *ptr = req->dst_align;
+		scatterwalk_map_and_copy(req->dst_align, areq->dst, 0,
+					 areq->dst_len, 1);
 
-		while (!(*ptr) && areq->dst_len) {
-			areq->dst_len--;
-			ptr++;
-		}
+		dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align,
+				  req->out.dh.r);
+	} else {
+		dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size,
+				 DMA_FROM_DEVICE);
+	}
 
-		if (areq->dst_len != req->ctx->key_sz)
-			memmove(req->dst_align, ptr, areq->dst_len);
+	dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params),
+			 DMA_TO_DEVICE);
+	dma_unmap_single(dev, req->phy_out,
+			 sizeof(struct qat_dh_output_params),
+			 DMA_TO_DEVICE);
 
-		scatterwalk_map_and_copy(req->dst_align, areq->dst, 0,
-					 areq->dst_len, 1);
+	kpp_request_complete(areq, err);
+}
+
+#define PKE_DH_1536 0x390c1a49
+#define PKE_DH_G2_1536 0x2e0b1a3e
+#define PKE_DH_2048 0x4d0c1a60
+#define PKE_DH_G2_2048 0x3e0b1a55
+#define PKE_DH_3072 0x510c1a77
+#define PKE_DH_G2_3072 0x3a0b1a6c
+#define PKE_DH_4096 0x690c1a8e
+#define PKE_DH_G2_4096 0x4a0b1a83
+
+static unsigned long qat_dh_fn_id(unsigned int len, bool g2)
+{
+	unsigned int bitslen = len << 3;
+
+	switch (bitslen) {
+	case 1536:
+		return g2 ? PKE_DH_G2_1536 : PKE_DH_1536;
+	case 2048:
+		return g2 ? PKE_DH_G2_2048 : PKE_DH_2048;
+	case 3072:
+		return g2 ? PKE_DH_G2_3072 : PKE_DH_3072;
+	case 4096:
+		return g2 ? PKE_DH_G2_4096 : PKE_DH_4096;
+	default:
+		return 0;
+	};
+}
+
+static inline struct qat_dh_ctx *qat_dh_get_params(struct crypto_kpp *tfm)
+{
+	return kpp_tfm_ctx(tfm);
+}
+
+static int qat_dh_compute_value(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct qat_crypto_instance *inst = ctx->inst;
+	struct device *dev = &GET_DEV(inst->accel_dev);
+	struct qat_asym_request *qat_req =
+			PTR_ALIGN(kpp_request_ctx(req), 64);
+	struct icp_qat_fw_pke_request *msg = &qat_req->req;
+	int ret, ctr = 0;
+	int n_input_params = 0;
+
+	if (unlikely(!ctx->xa))
+		return -EINVAL;
+
+	if (req->dst_len < ctx->p_size) {
+		req->dst_len = ctx->p_size;
+		return -EOVERFLOW;
+	}
+	memset(msg, '\0', sizeof(*msg));
+	ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
+					  ICP_QAT_FW_COMN_REQ_FLAG_SET);
+
+	msg->pke_hdr.cd_pars.func_id = qat_dh_fn_id(ctx->p_size,
+						    !req->src && ctx->g2);
+	if (unlikely(!msg->pke_hdr.cd_pars.func_id))
+		return -EINVAL;
+
+	qat_req->cb = qat_dh_cb;
+	qat_req->ctx.dh = ctx;
+	qat_req->areq.dh = req;
+	msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE;
+	msg->pke_hdr.comn_req_flags =
+		ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT,
+					    QAT_COMN_CD_FLD_TYPE_64BIT_ADR);
 
-		dma_free_coherent(dev, req->ctx->key_sz, req->dst_align,
-				  req->out.enc.c);
+	/*
+	 * If no source is provided use g as base
+	 */
+	if (req->src) {
+		qat_req->in.dh.in.xa = ctx->dma_xa;
+		qat_req->in.dh.in.p = ctx->dma_p;
+		n_input_params = 3;
 	} else {
-		char *ptr = sg_virt(areq->dst);
+		if (ctx->g2) {
+			qat_req->in.dh.in_g2.xa = ctx->dma_xa;
+			qat_req->in.dh.in_g2.p = ctx->dma_p;
+			n_input_params = 2;
+		} else {
+			qat_req->in.dh.in.b = ctx->dma_g;
+			qat_req->in.dh.in.xa = ctx->dma_xa;
+			qat_req->in.dh.in.p = ctx->dma_p;
+			n_input_params = 3;
+		}
+	}
 
-		while (!(*ptr) && areq->dst_len) {
-			areq->dst_len--;
-			ptr++;
+	ret = -ENOMEM;
+	if (req->src) {
+		/*
+		 * src can be of any size in valid range, but HW expects it to
+		 * be the same as modulo p so in case it is different we need
+		 * to allocate a new buf and copy src data.
+		 * In other case we just need to map the user provided buffer.
+		 * Also need to make sure that it is in contiguous buffer.
+		 */
+		if (sg_is_last(req->src) && req->src_len == ctx->p_size) {
+			qat_req->src_align = NULL;
+			qat_req->in.dh.in.b = dma_map_single(dev,
+							     sg_virt(req->src),
+							     req->src_len,
+							     DMA_TO_DEVICE);
+			if (unlikely(dma_mapping_error(dev,
+						       qat_req->in.dh.in.b)))
+				return ret;
+
+		} else {
+			int shift = ctx->p_size - req->src_len;
+
+			qat_req->src_align = dma_zalloc_coherent(dev,
+								 ctx->p_size,
+								 &qat_req->in.dh.in.b,
+								 GFP_KERNEL);
+			if (unlikely(!qat_req->src_align))
+				return ret;
+
+			scatterwalk_map_and_copy(qat_req->src_align + shift,
+						 req->src, 0, req->src_len, 0);
 		}
+	}
+	/*
+	 * dst can be of any size in valid range, but HW expects it to be the
+	 * same as modulo m so in case it is different we need to allocate a
+	 * new buf and copy src data.
+	 * In other case we just need to map the user provided buffer.
+	 * Also need to make sure that it is in contiguous buffer.
+	 */
+	if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) {
+		qat_req->dst_align = NULL;
+		qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst),
+						   req->dst_len,
+						   DMA_FROM_DEVICE);
 
-		if (sg_virt(areq->dst) != ptr && areq->dst_len)
-			memmove(sg_virt(areq->dst), ptr, areq->dst_len);
+		if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r)))
+			goto unmap_src;
+
+	} else {
+		qat_req->dst_align = dma_zalloc_coherent(dev, ctx->p_size,
+							 &qat_req->out.dh.r,
+							 GFP_KERNEL);
+		if (unlikely(!qat_req->dst_align))
+			goto unmap_src;
+	}
 
-		dma_unmap_single(dev, req->out.enc.c, req->ctx->key_sz,
+	qat_req->in.dh.in_tab[n_input_params] = 0;
+	qat_req->out.dh.out_tab[1] = 0;
+	/* Mapping in.in.b or in.in_g2.xa is the same */
+	qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b,
+					 sizeof(struct qat_dh_input_params),
+					 DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+		goto unmap_dst;
+
+	qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r,
+					  sizeof(struct qat_dh_output_params),
+					  DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+		goto unmap_in_params;
+
+	msg->pke_mid.src_data_addr = qat_req->phy_in;
+	msg->pke_mid.dest_data_addr = qat_req->phy_out;
+	msg->pke_mid.opaque = (uint64_t)(__force long)qat_req;
+	msg->input_param_count = n_input_params;
+	msg->output_param_count = 1;
+
+	do {
+		ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg);
+	} while (ret == -EBUSY && ctr++ < 100);
+
+	if (!ret)
+		return -EINPROGRESS;
+
+	if (!dma_mapping_error(dev, qat_req->phy_out))
+		dma_unmap_single(dev, qat_req->phy_out,
+				 sizeof(struct qat_dh_output_params),
+				 DMA_TO_DEVICE);
+unmap_in_params:
+	if (!dma_mapping_error(dev, qat_req->phy_in))
+		dma_unmap_single(dev, qat_req->phy_in,
+				 sizeof(struct qat_dh_input_params),
+				 DMA_TO_DEVICE);
+unmap_dst:
+	if (qat_req->dst_align)
+		dma_free_coherent(dev, ctx->p_size, qat_req->dst_align,
+				  qat_req->out.dh.r);
+	else
+		if (!dma_mapping_error(dev, qat_req->out.dh.r))
+			dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size,
+					 DMA_FROM_DEVICE);
+unmap_src:
+	if (req->src) {
+		if (qat_req->src_align)
+			dma_free_coherent(dev, ctx->p_size, qat_req->src_align,
+					  qat_req->in.dh.in.b);
+		else
+			if (!dma_mapping_error(dev, qat_req->in.dh.in.b))
+				dma_unmap_single(dev, qat_req->in.dh.in.b,
+						 ctx->p_size,
+						 DMA_TO_DEVICE);
+	}
+	return ret;
+}
+
+static int qat_dh_check_params_length(unsigned int p_len)
+{
+	switch (p_len) {
+	case 1536:
+	case 2048:
+	case 3072:
+	case 4096:
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params)
+{
+	struct qat_crypto_instance *inst = ctx->inst;
+	struct device *dev = &GET_DEV(inst->accel_dev);
+
+	if (unlikely(!params->p || !params->g))
+		return -EINVAL;
+
+	if (qat_dh_check_params_length(params->p_size << 3))
+		return -EINVAL;
+
+	ctx->p_size = params->p_size;
+	ctx->p = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL);
+	if (!ctx->p)
+		return -ENOMEM;
+	memcpy(ctx->p, params->p, ctx->p_size);
+
+	/* If g equals 2 don't copy it */
+	if (params->g_size == 1 && *(char *)params->g == 0x02) {
+		ctx->g2 = true;
+		return 0;
+	}
+
+	ctx->g = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_g, GFP_KERNEL);
+	if (!ctx->g) {
+		dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p);
+		ctx->p = NULL;
+		return -ENOMEM;
+	}
+	memcpy(ctx->g + (ctx->p_size - params->g_size), params->g,
+	       params->g_size);
+
+	return 0;
+}
+
+static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx)
+{
+	if (ctx->g) {
+		dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g);
+		ctx->g = NULL;
+	}
+	if (ctx->xa) {
+		dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa);
+		ctx->xa = NULL;
+	}
+	if (ctx->p) {
+		dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p);
+		ctx->p = NULL;
+	}
+	ctx->p_size = 0;
+	ctx->g2 = false;
+}
+
+static int qat_dh_set_secret(struct crypto_kpp *tfm, void *buf,
+			     unsigned int len)
+{
+	struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct device *dev = &GET_DEV(ctx->inst->accel_dev);
+	struct dh params;
+	int ret;
+
+	if (crypto_dh_decode_key(buf, len, &params) < 0)
+		return -EINVAL;
+
+	/* Free old secret if any */
+	qat_dh_clear_ctx(dev, ctx);
+
+	ret = qat_dh_set_params(ctx, &params);
+	if (ret < 0)
+		return ret;
+
+	ctx->xa = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_xa,
+				      GFP_KERNEL);
+	if (!ctx->xa) {
+		qat_dh_clear_ctx(dev, ctx);
+		return -ENOMEM;
+	}
+	memcpy(ctx->xa + (ctx->p_size - params.key_size), params.key,
+	       params.key_size);
+
+	return 0;
+}
+
+static int qat_dh_max_size(struct crypto_kpp *tfm)
+{
+	struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	return ctx->p ? ctx->p_size : -EINVAL;
+}
+
+static int qat_dh_init_tfm(struct crypto_kpp *tfm)
+{
+	struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct qat_crypto_instance *inst =
+			qat_crypto_get_instance_node(get_current_node());
+
+	if (!inst)
+		return -EINVAL;
+
+	ctx->p_size = 0;
+	ctx->g2 = false;
+	ctx->inst = inst;
+	return 0;
+}
+
+static void qat_dh_exit_tfm(struct crypto_kpp *tfm)
+{
+	struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct device *dev = &GET_DEV(ctx->inst->accel_dev);
+
+	qat_dh_clear_ctx(dev, ctx);
+	qat_crypto_put_instance(ctx->inst);
+}
+
+static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp)
+{
+	struct qat_asym_request *req = (void *)(__force long)resp->opaque;
+	struct akcipher_request *areq = req->areq.rsa;
+	struct device *dev = &GET_DEV(req->ctx.rsa->inst->accel_dev);
+	int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET(
+				resp->pke_resp_hdr.comn_resp_flags);
+
+	err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL;
+
+	if (req->src_align)
+		dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align,
+				  req->in.rsa.enc.m);
+	else
+		dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz,
+				 DMA_TO_DEVICE);
+
+	areq->dst_len = req->ctx.rsa->key_sz;
+	if (req->dst_align) {
+		scatterwalk_map_and_copy(req->dst_align, areq->dst, 0,
+					 areq->dst_len, 1);
+
+		dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align,
+				  req->out.rsa.enc.c);
+	} else {
+		dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz,
 				 DMA_FROM_DEVICE);
 	}
 
@@ -175,8 +593,9 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp)
 void qat_alg_asym_callback(void *_resp)
 {
 	struct icp_qat_fw_pke_resp *resp = _resp;
+	struct qat_asym_request *areq = (void *)(__force long)resp->opaque;
 
-	qat_rsa_cb(resp);
+	areq->cb(resp);
 }
 
 #define PKE_RSA_EP_512 0x1c161b21
@@ -237,13 +656,42 @@ static unsigned long qat_rsa_dec_fn_id(unsigned int len)
 	};
 }
 
+#define PKE_RSA_DP2_512 0x1c131b57
+#define PKE_RSA_DP2_1024 0x26131c2d
+#define PKE_RSA_DP2_1536 0x45111d12
+#define PKE_RSA_DP2_2048 0x59121dfa
+#define PKE_RSA_DP2_3072 0x81121ed9
+#define PKE_RSA_DP2_4096 0xb1111fb2
+
+static unsigned long qat_rsa_dec_fn_id_crt(unsigned int len)
+{
+	unsigned int bitslen = len << 3;
+
+	switch (bitslen) {
+	case 512:
+		return PKE_RSA_DP2_512;
+	case 1024:
+		return PKE_RSA_DP2_1024;
+	case 1536:
+		return PKE_RSA_DP2_1536;
+	case 2048:
+		return PKE_RSA_DP2_2048;
+	case 3072:
+		return PKE_RSA_DP2_3072;
+	case 4096:
+		return PKE_RSA_DP2_4096;
+	default:
+		return 0;
+	};
+}
+
 static int qat_rsa_enc(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev = &GET_DEV(inst->accel_dev);
-	struct qat_rsa_request *qat_req =
+	struct qat_asym_request *qat_req =
 			PTR_ALIGN(akcipher_request_ctx(req), 64);
 	struct icp_qat_fw_pke_request *msg = &qat_req->req;
 	int ret, ctr = 0;
@@ -262,14 +710,16 @@ static int qat_rsa_enc(struct akcipher_request *req)
 	if (unlikely(!msg->pke_hdr.cd_pars.func_id))
 		return -EINVAL;
 
-	qat_req->ctx = ctx;
+	qat_req->cb = qat_rsa_cb;
+	qat_req->ctx.rsa = ctx;
+	qat_req->areq.rsa = req;
 	msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE;
 	msg->pke_hdr.comn_req_flags =
 		ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT,
 					    QAT_COMN_CD_FLD_TYPE_64BIT_ADR);
 
-	qat_req->in.enc.e = ctx->dma_e;
-	qat_req->in.enc.n = ctx->dma_n;
+	qat_req->in.rsa.enc.e = ctx->dma_e;
+	qat_req->in.rsa.enc.n = ctx->dma_n;
 	ret = -ENOMEM;
 
 	/*
@@ -281,16 +731,16 @@ static int qat_rsa_enc(struct akcipher_request *req)
 	 */
 	if (sg_is_last(req->src) && req->src_len == ctx->key_sz) {
 		qat_req->src_align = NULL;
-		qat_req->in.enc.m = dma_map_single(dev, sg_virt(req->src),
+		qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src),
 						   req->src_len, DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(dev, qat_req->in.enc.m)))
+		if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m)))
 			return ret;
 
 	} else {
 		int shift = ctx->key_sz - req->src_len;
 
 		qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz,
-							 &qat_req->in.enc.m,
+							 &qat_req->in.rsa.enc.m,
 							 GFP_KERNEL);
 		if (unlikely(!qat_req->src_align))
 			return ret;
@@ -300,30 +750,30 @@ static int qat_rsa_enc(struct akcipher_request *req)
 	}
 	if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
 		qat_req->dst_align = NULL;
-		qat_req->out.enc.c = dma_map_single(dev, sg_virt(req->dst),
-						    req->dst_len,
-						    DMA_FROM_DEVICE);
+		qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst),
+							req->dst_len,
+							DMA_FROM_DEVICE);
 
-		if (unlikely(dma_mapping_error(dev, qat_req->out.enc.c)))
+		if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c)))
 			goto unmap_src;
 
 	} else {
 		qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz,
-							 &qat_req->out.enc.c,
+							 &qat_req->out.rsa.enc.c,
 							 GFP_KERNEL);
 		if (unlikely(!qat_req->dst_align))
 			goto unmap_src;
 
 	}
-	qat_req->in.in_tab[3] = 0;
-	qat_req->out.out_tab[1] = 0;
-	qat_req->phy_in = dma_map_single(dev, &qat_req->in.enc.m,
+	qat_req->in.rsa.in_tab[3] = 0;
+	qat_req->out.rsa.out_tab[1] = 0;
+	qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m,
 					 sizeof(struct qat_rsa_input_params),
 					 DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
 		goto unmap_dst;
 
-	qat_req->phy_out = dma_map_single(dev, &qat_req->out.enc.c,
+	qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c,
 					  sizeof(struct qat_rsa_output_params),
 					  DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
@@ -331,7 +781,7 @@ static int qat_rsa_enc(struct akcipher_request *req)
 
 	msg->pke_mid.src_data_addr = qat_req->phy_in;
 	msg->pke_mid.dest_data_addr = qat_req->phy_out;
-	msg->pke_mid.opaque = (uint64_t)(__force long)req;
+	msg->pke_mid.opaque = (uint64_t)(__force long)qat_req;
 	msg->input_param_count = 3;
 	msg->output_param_count = 1;
 	do {
@@ -353,19 +803,19 @@ unmap_in_params:
 unmap_dst:
 	if (qat_req->dst_align)
 		dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align,
-				  qat_req->out.enc.c);
+				  qat_req->out.rsa.enc.c);
 	else
-		if (!dma_mapping_error(dev, qat_req->out.enc.c))
-			dma_unmap_single(dev, qat_req->out.enc.c, ctx->key_sz,
-					 DMA_FROM_DEVICE);
+		if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c))
+			dma_unmap_single(dev, qat_req->out.rsa.enc.c,
+					 ctx->key_sz, DMA_FROM_DEVICE);
 unmap_src:
 	if (qat_req->src_align)
 		dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
-				  qat_req->in.enc.m);
+				  qat_req->in.rsa.enc.m);
 	else
-		if (!dma_mapping_error(dev, qat_req->in.enc.m))
-			dma_unmap_single(dev, qat_req->in.enc.m, ctx->key_sz,
-					 DMA_TO_DEVICE);
+		if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m))
+			dma_unmap_single(dev, qat_req->in.rsa.enc.m,
+					 ctx->key_sz, DMA_TO_DEVICE);
 	return ret;
 }
 
@@ -375,7 +825,7 @@ static int qat_rsa_dec(struct akcipher_request *req)
 	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev = &GET_DEV(inst->accel_dev);
-	struct qat_rsa_request *qat_req =
+	struct qat_asym_request *qat_req =
 			PTR_ALIGN(akcipher_request_ctx(req), 64);
 	struct icp_qat_fw_pke_request *msg = &qat_req->req;
 	int ret, ctr = 0;
@@ -390,18 +840,30 @@ static int qat_rsa_dec(struct akcipher_request *req)
 	memset(msg, '\0', sizeof(*msg));
 	ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
 					  ICP_QAT_FW_COMN_REQ_FLAG_SET);
-	msg->pke_hdr.cd_pars.func_id = qat_rsa_dec_fn_id(ctx->key_sz);
+	msg->pke_hdr.cd_pars.func_id = ctx->crt_mode ?
+		qat_rsa_dec_fn_id_crt(ctx->key_sz) :
+		qat_rsa_dec_fn_id(ctx->key_sz);
 	if (unlikely(!msg->pke_hdr.cd_pars.func_id))
 		return -EINVAL;
 
-	qat_req->ctx = ctx;
+	qat_req->cb = qat_rsa_cb;
+	qat_req->ctx.rsa = ctx;
+	qat_req->areq.rsa = req;
 	msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE;
 	msg->pke_hdr.comn_req_flags =
 		ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT,
 					    QAT_COMN_CD_FLD_TYPE_64BIT_ADR);
 
-	qat_req->in.dec.d = ctx->dma_d;
-	qat_req->in.dec.n = ctx->dma_n;
+	if (ctx->crt_mode) {
+		qat_req->in.rsa.dec_crt.p = ctx->dma_p;
+		qat_req->in.rsa.dec_crt.q = ctx->dma_q;
+		qat_req->in.rsa.dec_crt.dp = ctx->dma_dp;
+		qat_req->in.rsa.dec_crt.dq = ctx->dma_dq;
+		qat_req->in.rsa.dec_crt.qinv = ctx->dma_qinv;
+	} else {
+		qat_req->in.rsa.dec.d = ctx->dma_d;
+		qat_req->in.rsa.dec.n = ctx->dma_n;
+	}
 	ret = -ENOMEM;
 
 	/*
@@ -413,16 +875,16 @@ static int qat_rsa_dec(struct akcipher_request *req)
 	 */
 	if (sg_is_last(req->src) && req->src_len == ctx->key_sz) {
 		qat_req->src_align = NULL;
-		qat_req->in.dec.c = dma_map_single(dev, sg_virt(req->src),
+		qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src),
 						   req->dst_len, DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(dev, qat_req->in.dec.c)))
+		if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c)))
 			return ret;
 
 	} else {
 		int shift = ctx->key_sz - req->src_len;
 
 		qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz,
-							 &qat_req->in.dec.c,
+							 &qat_req->in.rsa.dec.c,
 							 GFP_KERNEL);
 		if (unlikely(!qat_req->src_align))
 			return ret;
@@ -432,31 +894,34 @@ static int qat_rsa_dec(struct akcipher_request *req)
 	}
 	if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
 		qat_req->dst_align = NULL;
-		qat_req->out.dec.m = dma_map_single(dev, sg_virt(req->dst),
+		qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst),
 						    req->dst_len,
 						    DMA_FROM_DEVICE);
 
-		if (unlikely(dma_mapping_error(dev, qat_req->out.dec.m)))
+		if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m)))
 			goto unmap_src;
 
 	} else {
 		qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz,
-							 &qat_req->out.dec.m,
+							 &qat_req->out.rsa.dec.m,
 							 GFP_KERNEL);
 		if (unlikely(!qat_req->dst_align))
 			goto unmap_src;
 
 	}
 
-	qat_req->in.in_tab[3] = 0;
-	qat_req->out.out_tab[1] = 0;
-	qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c,
+	if (ctx->crt_mode)
+		qat_req->in.rsa.in_tab[6] = 0;
+	else
+		qat_req->in.rsa.in_tab[3] = 0;
+	qat_req->out.rsa.out_tab[1] = 0;
+	qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c,
 					 sizeof(struct qat_rsa_input_params),
 					 DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
 		goto unmap_dst;
 
-	qat_req->phy_out = dma_map_single(dev, &qat_req->out.dec.m,
+	qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m,
 					  sizeof(struct qat_rsa_output_params),
 					  DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
@@ -464,8 +929,12 @@ static int qat_rsa_dec(struct akcipher_request *req)
 
 	msg->pke_mid.src_data_addr = qat_req->phy_in;
 	msg->pke_mid.dest_data_addr = qat_req->phy_out;
-	msg->pke_mid.opaque = (uint64_t)(__force long)req;
-	msg->input_param_count = 3;
+	msg->pke_mid.opaque = (uint64_t)(__force long)qat_req;
+	if (ctx->crt_mode)
+		msg->input_param_count = 6;
+	else
+		msg->input_param_count = 3;
+
 	msg->output_param_count = 1;
 	do {
 		ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg);
@@ -486,26 +955,24 @@ unmap_in_params:
 unmap_dst:
 	if (qat_req->dst_align)
 		dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align,
-				  qat_req->out.dec.m);
+				  qat_req->out.rsa.dec.m);
 	else
-		if (!dma_mapping_error(dev, qat_req->out.dec.m))
-			dma_unmap_single(dev, qat_req->out.dec.m, ctx->key_sz,
-					 DMA_FROM_DEVICE);
+		if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m))
+			dma_unmap_single(dev, qat_req->out.rsa.dec.m,
+					 ctx->key_sz, DMA_FROM_DEVICE);
 unmap_src:
 	if (qat_req->src_align)
 		dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
-				  qat_req->in.dec.c);
+				  qat_req->in.rsa.dec.c);
 	else
-		if (!dma_mapping_error(dev, qat_req->in.dec.c))
-			dma_unmap_single(dev, qat_req->in.dec.c, ctx->key_sz,
-					 DMA_TO_DEVICE);
+		if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c))
+			dma_unmap_single(dev, qat_req->in.rsa.dec.c,
+					 ctx->key_sz, DMA_TO_DEVICE);
 	return ret;
 }
 
-int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
-		  const void *value, size_t vlen)
+int qat_rsa_set_n(struct qat_rsa_ctx *ctx, const char *value, size_t vlen)
 {
-	struct qat_rsa_ctx *ctx = context;
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev = &GET_DEV(inst->accel_dev);
 	const char *ptr = value;
@@ -518,11 +985,6 @@ int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
 
 	ctx->key_sz = vlen;
 	ret = -EINVAL;
-	/* In FIPS mode only allow key size 2K & 3K */
-	if (fips_enabled && (ctx->key_sz != 256 && ctx->key_sz != 384)) {
-		pr_err("QAT: RSA: key size not allowed in FIPS mode\n");
-		goto err;
-	}
 	/* invalid key size provided */
 	if (!qat_rsa_enc_fn_id(ctx->key_sz))
 		goto err;
@@ -540,10 +1002,8 @@ err:
 	return ret;
 }
 
-int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag,
-		  const void *value, size_t vlen)
+int qat_rsa_set_e(struct qat_rsa_ctx *ctx, const char *value, size_t vlen)
 {
-	struct qat_rsa_ctx *ctx = context;
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev = &GET_DEV(inst->accel_dev);
 	const char *ptr = value;
@@ -559,18 +1019,15 @@ int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag,
 	}
 
 	ctx->e = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_e, GFP_KERNEL);
-	if (!ctx->e) {
-		ctx->e = NULL;
+	if (!ctx->e)
 		return -ENOMEM;
-	}
+
 	memcpy(ctx->e + (ctx->key_sz - vlen), ptr, vlen);
 	return 0;
 }
 
-int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag,
-		  const void *value, size_t vlen)
+int qat_rsa_set_d(struct qat_rsa_ctx *ctx, const char *value, size_t vlen)
 {
-	struct qat_rsa_ctx *ctx = context;
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev = &GET_DEV(inst->accel_dev);
 	const char *ptr = value;
@@ -585,12 +1042,6 @@ int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag,
 	if (!ctx->key_sz || !vlen || vlen > ctx->key_sz)
 		goto err;
 
-	/* In FIPS mode only allow key size 2K & 3K */
-	if (fips_enabled && (vlen != 256 && vlen != 384)) {
-		pr_err("QAT: RSA: key size not allowed in FIPS mode\n");
-		goto err;
-	}
-
 	ret = -ENOMEM;
 	ctx->d = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_d, GFP_KERNEL);
 	if (!ctx->d)
@@ -603,12 +1054,106 @@ err:
 	return ret;
 }
 
-static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
-			  unsigned int keylen, bool private)
+static void qat_rsa_drop_leading_zeros(const char **ptr, unsigned int *len)
 {
-	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-	struct device *dev = &GET_DEV(ctx->inst->accel_dev);
-	int ret;
+	while (!**ptr && *len) {
+		(*ptr)++;
+		(*len)--;
+	}
+}
+
+static void qat_rsa_setkey_crt(struct qat_rsa_ctx *ctx, struct rsa_key *rsa_key)
+{
+	struct qat_crypto_instance *inst = ctx->inst;
+	struct device *dev = &GET_DEV(inst->accel_dev);
+	const char *ptr;
+	unsigned int len;
+	unsigned int half_key_sz = ctx->key_sz / 2;
+
+	/* p */
+	ptr = rsa_key->p;
+	len = rsa_key->p_sz;
+	qat_rsa_drop_leading_zeros(&ptr, &len);
+	if (!len)
+		goto err;
+	ctx->p = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_p, GFP_KERNEL);
+	if (!ctx->p)
+		goto err;
+	memcpy(ctx->p + (half_key_sz - len), ptr, len);
+
+	/* q */
+	ptr = rsa_key->q;
+	len = rsa_key->q_sz;
+	qat_rsa_drop_leading_zeros(&ptr, &len);
+	if (!len)
+		goto free_p;
+	ctx->q = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_q, GFP_KERNEL);
+	if (!ctx->q)
+		goto free_p;
+	memcpy(ctx->q + (half_key_sz - len), ptr, len);
+
+	/* dp */
+	ptr = rsa_key->dp;
+	len = rsa_key->dp_sz;
+	qat_rsa_drop_leading_zeros(&ptr, &len);
+	if (!len)
+		goto free_q;
+	ctx->dp = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dp,
+				      GFP_KERNEL);
+	if (!ctx->dp)
+		goto free_q;
+	memcpy(ctx->dp + (half_key_sz - len), ptr, len);
+
+	/* dq */
+	ptr = rsa_key->dq;
+	len = rsa_key->dq_sz;
+	qat_rsa_drop_leading_zeros(&ptr, &len);
+	if (!len)
+		goto free_dp;
+	ctx->dq = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dq,
+				      GFP_KERNEL);
+	if (!ctx->dq)
+		goto free_dp;
+	memcpy(ctx->dq + (half_key_sz - len), ptr, len);
+
+	/* qinv */
+	ptr = rsa_key->qinv;
+	len = rsa_key->qinv_sz;
+	qat_rsa_drop_leading_zeros(&ptr, &len);
+	if (!len)
+		goto free_dq;
+	ctx->qinv = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_qinv,
+					GFP_KERNEL);
+	if (!ctx->qinv)
+		goto free_dq;
+	memcpy(ctx->qinv + (half_key_sz - len), ptr, len);
+
+	ctx->crt_mode = true;
+	return;
+
+free_dq:
+	memset(ctx->dq, '\0', half_key_sz);
+	dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq);
+	ctx->dq = NULL;
+free_dp:
+	memset(ctx->dp, '\0', half_key_sz);
+	dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp);
+	ctx->dp = NULL;
+free_q:
+	memset(ctx->q, '\0', half_key_sz);
+	dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q);
+	ctx->q = NULL;
+free_p:
+	memset(ctx->p, '\0', half_key_sz);
+	dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p);
+	ctx->p = NULL;
+err:
+	ctx->crt_mode = false;
+}
+
+static void qat_rsa_clear_ctx(struct device *dev, struct qat_rsa_ctx *ctx)
+{
+	unsigned int half_key_sz = ctx->key_sz / 2;
 
 	/* Free the old key if any */
 	if (ctx->n)
@@ -619,19 +1164,68 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
 		memset(ctx->d, '\0', ctx->key_sz);
 		dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d);
 	}
+	if (ctx->p) {
+		memset(ctx->p, '\0', half_key_sz);
+		dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p);
+	}
+	if (ctx->q) {
+		memset(ctx->q, '\0', half_key_sz);
+		dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q);
+	}
+	if (ctx->dp) {
+		memset(ctx->dp, '\0', half_key_sz);
+		dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp);
+	}
+	if (ctx->dq) {
+		memset(ctx->dq, '\0', half_key_sz);
+		dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq);
+	}
+	if (ctx->qinv) {
+		memset(ctx->qinv, '\0', half_key_sz);
+		dma_free_coherent(dev, half_key_sz, ctx->qinv, ctx->dma_qinv);
+	}
 
 	ctx->n = NULL;
 	ctx->e = NULL;
 	ctx->d = NULL;
+	ctx->p = NULL;
+	ctx->q = NULL;
+	ctx->dp = NULL;
+	ctx->dq = NULL;
+	ctx->qinv = NULL;
+	ctx->crt_mode = false;
+	ctx->key_sz = 0;
+}
+
+static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
+			  unsigned int keylen, bool private)
+{
+	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct device *dev = &GET_DEV(ctx->inst->accel_dev);
+	struct rsa_key rsa_key;
+	int ret;
+
+	qat_rsa_clear_ctx(dev, ctx);
 
 	if (private)
-		ret = asn1_ber_decoder(&qat_rsaprivkey_decoder, ctx, key,
-				       keylen);
+		ret = rsa_parse_priv_key(&rsa_key, key, keylen);
 	else
-		ret = asn1_ber_decoder(&qat_rsapubkey_decoder, ctx, key,
-				       keylen);
+		ret = rsa_parse_pub_key(&rsa_key, key, keylen);
+	if (ret < 0)
+		goto free;
+
+	ret = qat_rsa_set_n(ctx, rsa_key.n, rsa_key.n_sz);
 	if (ret < 0)
 		goto free;
+	ret = qat_rsa_set_e(ctx, rsa_key.e, rsa_key.e_sz);
+	if (ret < 0)
+		goto free;
+	if (private) {
+		ret = qat_rsa_set_d(ctx, rsa_key.d, rsa_key.d_sz);
+		if (ret < 0)
+			goto free;
+		qat_rsa_setkey_crt(ctx, &rsa_key);
+	}
 
 	if (!ctx->n || !ctx->e) {
 		/* invalid key provided */
@@ -646,20 +1240,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
 
 	return 0;
 free:
-	if (ctx->d) {
-		memset(ctx->d, '\0', ctx->key_sz);
-		dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d);
-		ctx->d = NULL;
-	}
-	if (ctx->e) {
-		dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e);
-		ctx->e = NULL;
-	}
-	if (ctx->n) {
-		dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n);
-		ctx->n = NULL;
-		ctx->key_sz = 0;
-	}
+	qat_rsa_clear_ctx(dev, ctx);
 	return ret;
 }
 
@@ -725,7 +1306,7 @@ static struct akcipher_alg rsa = {
 	.max_size = qat_rsa_max_size,
 	.init = qat_rsa_init_tfm,
 	.exit = qat_rsa_exit_tfm,
-	.reqsize = sizeof(struct qat_rsa_request) + 64,
+	.reqsize = sizeof(struct qat_asym_request) + 64,
 	.base = {
 		.cra_name = "rsa",
 		.cra_driver_name = "qat-rsa",
@@ -735,6 +1316,23 @@ static struct akcipher_alg rsa = {
 	},
 };
 
+static struct kpp_alg dh = {
+	.set_secret = qat_dh_set_secret,
+	.generate_public_key = qat_dh_compute_value,
+	.compute_shared_secret = qat_dh_compute_value,
+	.max_size = qat_dh_max_size,
+	.init = qat_dh_init_tfm,
+	.exit = qat_dh_exit_tfm,
+	.reqsize = sizeof(struct qat_asym_request) + 64,
+	.base = {
+		.cra_name = "dh",
+		.cra_driver_name = "qat-dh",
+		.cra_priority = 1000,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct qat_dh_ctx),
+	},
+};
+
 int qat_asym_algs_register(void)
 {
 	int ret = 0;
@@ -743,7 +1341,11 @@ int qat_asym_algs_register(void)
 	if (++active_devs == 1) {
 		rsa.base.cra_flags = 0;
 		ret = crypto_register_akcipher(&rsa);
+		if (ret)
+			goto unlock;
+		ret = crypto_register_kpp(&dh);
 	}
+unlock:
 	mutex_unlock(&algs_lock);
 	return ret;
 }
@@ -751,7 +1353,9 @@ int qat_asym_algs_register(void)
 void qat_asym_algs_unregister(void)
 {
 	mutex_lock(&algs_lock);
-	if (--active_devs == 0)
+	if (--active_devs == 0) {
 		crypto_unregister_akcipher(&rsa);
+		crypto_unregister_kpp(&dh);
+	}
 	mutex_unlock(&algs_lock);
 }
diff --git a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1
deleted file mode 100644
index f0066adb79b8..000000000000
--- a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1
+++ /dev/null
@@ -1,11 +0,0 @@
-RsaPrivKey ::= SEQUENCE {
-	version		INTEGER,
-	n		INTEGER ({ qat_rsa_get_n }),
-	e		INTEGER ({ qat_rsa_get_e }),
-	d		INTEGER ({ qat_rsa_get_d }),
-	prime1		INTEGER,
-	prime2		INTEGER,
-	exponent1	INTEGER,
-	exponent2	INTEGER,
-	coefficient	INTEGER
-}
diff --git a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1
deleted file mode 100644
index bd667b31a21a..000000000000
--- a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1
+++ /dev/null
@@ -1,4 +0,0 @@
-RsaPubKey ::= SEQUENCE {
-	n INTEGER ({ qat_rsa_get_n }),
-	e INTEGER ({ qat_rsa_get_e })
-}
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index 6e1d5e185526..1dfcab317bed 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -252,6 +252,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
 	hw_data->get_arb_mapping = adf_get_arbiter_mapping;
 	hw_data->enable_ints = adf_enable_ints;
 	hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
+	hw_data->reset_device = adf_reset_sbr;
 	hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
 }
 
diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c
index dbcbbe242bd6..b04b42f48366 100644
--- a/drivers/crypto/qce/ablkcipher.c
+++ b/drivers/crypto/qce/ablkcipher.c
@@ -15,8 +15,8 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <crypto/aes.h>
-#include <crypto/algapi.h>
 #include <crypto/des.h>
+#include <crypto/internal/skcipher.h>
 
 #include "cipher.h"
 
@@ -189,7 +189,7 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key,
 	memcpy(ctx->enc_key, key, keylen);
 	return 0;
 fallback:
-	ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen);
+	ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
 	if (!ret)
 		ctx->enc_keylen = keylen;
 	return ret;
@@ -212,10 +212,16 @@ static int qce_ablkcipher_crypt(struct ablkcipher_request *req, int encrypt)
 
 	if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 &&
 	    ctx->enc_keylen != AES_KEYSIZE_256) {
-		ablkcipher_request_set_tfm(req, ctx->fallback);
-		ret = encrypt ? crypto_ablkcipher_encrypt(req) :
-				crypto_ablkcipher_decrypt(req);
-		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
+		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->nbytes, req->info);
+		ret = encrypt ? crypto_skcipher_encrypt(subreq) :
+				crypto_skcipher_decrypt(subreq);
+		skcipher_request_zero(subreq);
 		return ret;
 	}
 
@@ -239,10 +245,9 @@ static int qce_ablkcipher_init(struct crypto_tfm *tfm)
 	memset(ctx, 0, sizeof(*ctx));
 	tfm->crt_ablkcipher.reqsize = sizeof(struct qce_cipher_reqctx);
 
-	ctx->fallback = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm),
-						CRYPTO_ALG_TYPE_ABLKCIPHER,
-						CRYPTO_ALG_ASYNC |
-						CRYPTO_ALG_NEED_FALLBACK);
+	ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(tfm), 0,
+					      CRYPTO_ALG_ASYNC |
+					      CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->fallback))
 		return PTR_ERR(ctx->fallback);
 
@@ -253,7 +258,7 @@ static void qce_ablkcipher_exit(struct crypto_tfm *tfm)
 {
 	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_ablkcipher(ctx->fallback);
+	crypto_free_skcipher(ctx->fallback);
 }
 
 struct qce_ablkcipher_def {
diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h
index 5c6a5f8633e5..2b0278bb6e92 100644
--- a/drivers/crypto/qce/cipher.h
+++ b/drivers/crypto/qce/cipher.h
@@ -22,7 +22,7 @@
 struct qce_cipher_ctx {
 	u8 enc_key[QCE_MAX_KEY_SIZE];
 	unsigned int enc_keylen;
-	struct crypto_ablkcipher *fallback;
+	struct crypto_skcipher *fallback;
 };
 
 /**
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 2b3a0cfe3331..dce1af0ce85c 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -155,43 +155,43 @@
  * expansion of its usage.
  */
 struct samsung_aes_variant {
-	unsigned int		    aes_offset;
+	unsigned int			aes_offset;
 };
 
 struct s5p_aes_reqctx {
-	unsigned long mode;
+	unsigned long			mode;
 };
 
 struct s5p_aes_ctx {
-	struct s5p_aes_dev         *dev;
+	struct s5p_aes_dev		*dev;
 
-	uint8_t                     aes_key[AES_MAX_KEY_SIZE];
-	uint8_t                     nonce[CTR_RFC3686_NONCE_SIZE];
-	int                         keylen;
+	uint8_t				aes_key[AES_MAX_KEY_SIZE];
+	uint8_t				nonce[CTR_RFC3686_NONCE_SIZE];
+	int				keylen;
 };
 
 struct s5p_aes_dev {
-	struct device              *dev;
-	struct clk                 *clk;
-	void __iomem               *ioaddr;
-	void __iomem               *aes_ioaddr;
-	int                         irq_fc;
+	struct device			*dev;
+	struct clk			*clk;
+	void __iomem			*ioaddr;
+	void __iomem			*aes_ioaddr;
+	int				irq_fc;
 
-	struct ablkcipher_request  *req;
-	struct s5p_aes_ctx         *ctx;
-	struct scatterlist         *sg_src;
-	struct scatterlist         *sg_dst;
+	struct ablkcipher_request	*req;
+	struct s5p_aes_ctx		*ctx;
+	struct scatterlist		*sg_src;
+	struct scatterlist		*sg_dst;
 
 	/* In case of unaligned access: */
-	struct scatterlist         *sg_src_cpy;
-	struct scatterlist         *sg_dst_cpy;
+	struct scatterlist		*sg_src_cpy;
+	struct scatterlist		*sg_dst_cpy;
 
-	struct tasklet_struct       tasklet;
-	struct crypto_queue         queue;
-	bool                        busy;
-	spinlock_t                  lock;
+	struct tasklet_struct		tasklet;
+	struct crypto_queue		queue;
+	bool				busy;
+	spinlock_t			lock;
 
-	struct samsung_aes_variant *variant;
+	struct samsung_aes_variant	*variant;
 };
 
 static struct s5p_aes_dev *s5p_dev;
@@ -421,11 +421,11 @@ static bool s5p_aes_rx(struct s5p_aes_dev *dev)
 static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 {
 	struct platform_device *pdev = dev_id;
-	struct s5p_aes_dev     *dev  = platform_get_drvdata(pdev);
-	uint32_t                status;
-	unsigned long           flags;
-	bool			set_dma_tx = false;
-	bool			set_dma_rx = false;
+	struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
+	bool set_dma_tx = false;
+	bool set_dma_rx = false;
+	unsigned long flags;
+	uint32_t status;
 
 	spin_lock_irqsave(&dev->lock, flags);
 
@@ -538,10 +538,10 @@ static int s5p_set_outdata_start(struct s5p_aes_dev *dev,
 
 static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 {
-	struct ablkcipher_request  *req = dev->req;
-	uint32_t                    aes_control;
-	int                         err;
-	unsigned long               flags;
+	struct ablkcipher_request *req = dev->req;
+	uint32_t aes_control;
+	unsigned long flags;
+	int err;
 
 	aes_control = SSS_AES_KEY_CHANGE_MODE;
 	if (mode & FLAGS_AES_DECRYPT)
@@ -653,10 +653,10 @@ exit:
 
 static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 {
-	struct crypto_ablkcipher   *tfm    = crypto_ablkcipher_reqtfm(req);
-	struct s5p_aes_ctx         *ctx    = crypto_ablkcipher_ctx(tfm);
-	struct s5p_aes_reqctx      *reqctx = ablkcipher_request_ctx(req);
-	struct s5p_aes_dev         *dev    = ctx->dev;
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req);
+	struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct s5p_aes_dev *dev = ctx->dev;
 
 	if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
 		dev_err(dev->dev, "request size is not exact amount of AES blocks\n");
@@ -671,7 +671,7 @@ static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 static int s5p_aes_setkey(struct crypto_ablkcipher *cipher,
 			  const uint8_t *key, unsigned int keylen)
 {
-	struct crypto_tfm  *tfm = crypto_ablkcipher_tfm(cipher);
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (keylen != AES_KEYSIZE_128 &&
@@ -763,11 +763,11 @@ static struct crypto_alg algs[] = {
 
 static int s5p_aes_probe(struct platform_device *pdev)
 {
-	int                 i, j, err = -ENODEV;
-	struct s5p_aes_dev *pdata;
-	struct device      *dev = &pdev->dev;
-	struct resource    *res;
+	struct device *dev = &pdev->dev;
+	int i, j, err = -ENODEV;
 	struct samsung_aes_variant *variant;
+	struct s5p_aes_dev *pdata;
+	struct resource *res;
 
 	if (s5p_dev)
 		return -EEXIST;
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index c3f3d89e4831..0c49956ee0ce 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -14,10 +14,9 @@
  * Based on omap-aes.c and tegra-aes.c
  */
 
-#include <crypto/algapi.h>
 #include <crypto/aes.h>
-#include <crypto/hash.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/sha.h>
 
@@ -150,10 +149,7 @@ struct sahara_ctx {
 	/* AES-specific context */
 	int keylen;
 	u8 key[AES_KEYSIZE_128];
-	struct crypto_ablkcipher *fallback;
-
-	/* SHA-specific context */
-	struct crypto_shash *shash_fallback;
+	struct crypto_skcipher *fallback;
 };
 
 struct sahara_aes_reqctx {
@@ -620,25 +616,21 @@ static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 		return 0;
 	}
 
-	if (keylen != AES_KEYSIZE_128 &&
-	    keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256)
+	if (keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256)
 		return -EINVAL;
 
 	/*
 	 * The requested key size is not supported by HW, do a fallback.
 	 */
-	ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
-	ctx->fallback->base.crt_flags |=
-		(tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
+						 CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen);
-	if (ret) {
-		struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm);
+	ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
 
-		tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm_aux->crt_flags |=
-			(ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK);
-	}
+	tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	tfm->base.crt_flags |= crypto_skcipher_get_flags(ctx->fallback) &
+			       CRYPTO_TFM_RES_MASK;
 	return ret;
 }
 
@@ -670,16 +662,20 @@ static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 
 static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req)
 {
-	struct crypto_tfm *tfm =
-		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
 	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
 		crypto_ablkcipher_reqtfm(req));
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-		ablkcipher_request_set_tfm(req, ctx->fallback);
-		err = crypto_ablkcipher_encrypt(req);
-		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
+		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->nbytes, req->info);
+		err = crypto_skcipher_encrypt(subreq);
+		skcipher_request_zero(subreq);
 		return err;
 	}
 
@@ -688,16 +684,20 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req)
 
 static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req)
 {
-	struct crypto_tfm *tfm =
-		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
 	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
 		crypto_ablkcipher_reqtfm(req));
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-		ablkcipher_request_set_tfm(req, ctx->fallback);
-		err = crypto_ablkcipher_decrypt(req);
-		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
+		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->nbytes, req->info);
+		err = crypto_skcipher_decrypt(subreq);
+		skcipher_request_zero(subreq);
 		return err;
 	}
 
@@ -706,16 +706,20 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req)
 
 static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req)
 {
-	struct crypto_tfm *tfm =
-		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
 	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
 		crypto_ablkcipher_reqtfm(req));
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-		ablkcipher_request_set_tfm(req, ctx->fallback);
-		err = crypto_ablkcipher_encrypt(req);
-		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
+		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->nbytes, req->info);
+		err = crypto_skcipher_encrypt(subreq);
+		skcipher_request_zero(subreq);
 		return err;
 	}
 
@@ -724,16 +728,20 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req)
 
 static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req)
 {
-	struct crypto_tfm *tfm =
-		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
 	struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
 		crypto_ablkcipher_reqtfm(req));
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-		ablkcipher_request_set_tfm(req, ctx->fallback);
-		err = crypto_ablkcipher_decrypt(req);
-		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
+		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->nbytes, req->info);
+		err = crypto_skcipher_decrypt(subreq);
+		skcipher_request_zero(subreq);
 		return err;
 	}
 
@@ -745,8 +753,9 @@ static int sahara_aes_cra_init(struct crypto_tfm *tfm)
 	const char *name = crypto_tfm_alg_name(tfm);
 	struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	ctx->fallback = crypto_alloc_ablkcipher(name, 0,
-				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	ctx->fallback = crypto_alloc_skcipher(name, 0,
+					      CRYPTO_ALG_ASYNC |
+					      CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->fallback)) {
 		pr_err("Error allocating fallback algo %s\n", name);
 		return PTR_ERR(ctx->fallback);
@@ -761,9 +770,7 @@ static void sahara_aes_cra_exit(struct crypto_tfm *tfm)
 {
 	struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (ctx->fallback)
-		crypto_free_ablkcipher(ctx->fallback);
-	ctx->fallback = NULL;
+	crypto_free_skcipher(ctx->fallback);
 }
 
 static u32 sahara_sha_init_hdr(struct sahara_dev *dev,
@@ -1180,15 +1187,6 @@ static int sahara_sha_import(struct ahash_request *req, const void *in)
 
 static int sahara_sha_cra_init(struct crypto_tfm *tfm)
 {
-	const char *name = crypto_tfm_alg_name(tfm);
-	struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	ctx->shash_fallback = crypto_alloc_shash(name, 0,
-					CRYPTO_ALG_NEED_FALLBACK);
-	if (IS_ERR(ctx->shash_fallback)) {
-		pr_err("Error allocating fallback algo %s\n", name);
-		return PTR_ERR(ctx->shash_fallback);
-	}
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct sahara_sha_reqctx) +
 				 SHA_BUFFER_LEN + SHA256_BLOCK_SIZE);
@@ -1196,14 +1194,6 @@ static int sahara_sha_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static void sahara_sha_cra_exit(struct crypto_tfm *tfm)
-{
-	struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	crypto_free_shash(ctx->shash_fallback);
-	ctx->shash_fallback = NULL;
-}
-
 static struct crypto_alg aes_algs[] = {
 {
 	.cra_name		= "ecb(aes)",
@@ -1272,7 +1262,6 @@ static struct ahash_alg sha_v3_algs[] = {
 		.cra_alignmask		= 0,
 		.cra_module		= THIS_MODULE,
 		.cra_init		= sahara_sha_cra_init,
-		.cra_exit		= sahara_sha_cra_exit,
 	}
 },
 };
@@ -1300,7 +1289,6 @@ static struct ahash_alg sha_v4_algs[] = {
 		.cra_alignmask		= 0,
 		.cra_module		= THIS_MODULE,
 		.cra_init		= sahara_sha_cra_init,
-		.cra_exit		= sahara_sha_cra_exit,
 	}
 },
 };
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index b7ee8d30147d..0418a2f41dc0 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -91,10 +91,17 @@ static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr,
 		return be16_to_cpu(ptr->len);
 }
 
-static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1)
+static void to_talitos_ptr_ext_set(struct talitos_ptr *ptr, u8 val,
+				   bool is_sec1)
 {
 	if (!is_sec1)
-		ptr->j_extent = 0;
+		ptr->j_extent = val;
+}
+
+static void to_talitos_ptr_ext_or(struct talitos_ptr *ptr, u8 val, bool is_sec1)
+{
+	if (!is_sec1)
+		ptr->j_extent |= val;
 }
 
 /*
@@ -111,7 +118,7 @@ static void map_single_talitos_ptr(struct device *dev,
 
 	to_talitos_ptr_len(ptr, len, is_sec1);
 	to_talitos_ptr(ptr, dma_addr, is_sec1);
-	to_talitos_ptr_extent_clear(ptr, is_sec1);
+	to_talitos_ptr_ext_set(ptr, 0, is_sec1);
 }
 
 /*
@@ -804,6 +811,11 @@ static void talitos_unregister_rng(struct device *dev)
  * crypto alg
  */
 #define TALITOS_CRA_PRIORITY		3000
+/*
+ * Defines a priority for doing AEAD with descriptors type
+ * HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP
+ */
+#define TALITOS_CRA_PRIORITY_AEAD_HSNA	(TALITOS_CRA_PRIORITY - 1)
 #define TALITOS_MAX_KEY_SIZE		96
 #define TALITOS_MAX_IV_LENGTH		16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
 
@@ -904,35 +916,59 @@ struct talitos_edesc {
 static void talitos_sg_unmap(struct device *dev,
 			     struct talitos_edesc *edesc,
 			     struct scatterlist *src,
-			     struct scatterlist *dst)
+			     struct scatterlist *dst,
+			     unsigned int len, unsigned int offset)
 {
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
 	unsigned int src_nents = edesc->src_nents ? : 1;
 	unsigned int dst_nents = edesc->dst_nents ? : 1;
 
+	if (is_sec1 && dst && dst_nents > 1) {
+		dma_sync_single_for_device(dev, edesc->dma_link_tbl + offset,
+					   len, DMA_FROM_DEVICE);
+		sg_pcopy_from_buffer(dst, dst_nents, edesc->buf + offset, len,
+				     offset);
+	}
 	if (src != dst) {
-		dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
+		if (src_nents == 1 || !is_sec1)
+			dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
 
-		if (dst) {
+		if (dst && (dst_nents == 1 || !is_sec1))
 			dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
-		}
-	} else
+	} else if (src_nents == 1 || !is_sec1) {
 		dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
+	}
 }
 
 static void ipsec_esp_unmap(struct device *dev,
 			    struct talitos_edesc *edesc,
 			    struct aead_request *areq)
 {
-	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE);
+	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+
+	if (edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP)
+		unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6],
+					 DMA_FROM_DEVICE);
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[3], DMA_TO_DEVICE);
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE);
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[0], DMA_TO_DEVICE);
 
-	talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
+	talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen,
+			 areq->assoclen);
 
 	if (edesc->dma_len)
 		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
 				 DMA_BIDIRECTIONAL);
+
+	if (!(edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP)) {
+		unsigned int dst_nents = edesc->dst_nents ? : 1;
+
+		sg_pcopy_to_buffer(areq->dst, dst_nents, ctx->iv, ivsize,
+				   areq->assoclen + areq->cryptlen - ivsize);
+	}
 }
 
 /*
@@ -942,6 +978,8 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 				   struct talitos_desc *desc, void *context,
 				   int err)
 {
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
 	struct aead_request *areq = context;
 	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
 	unsigned int authsize = crypto_aead_authsize(authenc);
@@ -955,8 +993,11 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 
 	/* copy the generated ICV to dst */
 	if (edesc->icv_ool) {
-		icvdata = &edesc->link_tbl[edesc->src_nents +
-					   edesc->dst_nents + 2];
+		if (is_sec1)
+			icvdata = edesc->buf + areq->assoclen + areq->cryptlen;
+		else
+			icvdata = &edesc->link_tbl[edesc->src_nents +
+						   edesc->dst_nents + 2];
 		sg = sg_last(areq->dst, edesc->dst_nents);
 		memcpy((char *)sg_virt(sg) + sg->length - authsize,
 		       icvdata, authsize);
@@ -977,6 +1018,8 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
 	struct talitos_edesc *edesc;
 	struct scatterlist *sg;
 	char *oicv, *icv;
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	edesc = container_of(desc, struct talitos_edesc, desc);
 
@@ -988,7 +1031,12 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
 		icv = (char *)sg_virt(sg) + sg->length - authsize;
 
 		if (edesc->dma_len) {
-			oicv = (char *)&edesc->link_tbl[edesc->src_nents +
+			if (is_sec1)
+				oicv = (char *)&edesc->dma_link_tbl +
+					       req->assoclen + req->cryptlen;
+			else
+				oicv = (char *)
+				       &edesc->link_tbl[edesc->src_nents +
 							edesc->dst_nents + 2];
 			if (edesc->icv_ool)
 				icv = oicv + authsize;
@@ -1050,8 +1098,8 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
 
 		to_talitos_ptr(link_tbl_ptr + count,
 			       sg_dma_address(sg) + offset, 0);
-		link_tbl_ptr[count].len = cpu_to_be16(len);
-		link_tbl_ptr[count].j_extent = 0;
+		to_talitos_ptr_len(link_tbl_ptr + count, len, 0);
+		to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0);
 		count++;
 		cryptlen -= len;
 		offset = 0;
@@ -1062,17 +1110,43 @@ next:
 
 	/* tag end of link table */
 	if (count > 0)
-		link_tbl_ptr[count - 1].j_extent = DESC_PTR_LNKTBL_RETURN;
+		to_talitos_ptr_ext_set(link_tbl_ptr + count - 1,
+				       DESC_PTR_LNKTBL_RETURN, 0);
 
 	return count;
 }
 
-static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
-				 int cryptlen,
-				 struct talitos_ptr *link_tbl_ptr)
+int talitos_sg_map(struct device *dev, struct scatterlist *src,
+		   unsigned int len, struct talitos_edesc *edesc,
+		   struct talitos_ptr *ptr,
+		   int sg_count, unsigned int offset, int tbl_off)
 {
-	return sg_to_link_tbl_offset(sg, sg_count, 0, cryptlen,
-				     link_tbl_ptr);
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
+
+	to_talitos_ptr_len(ptr, len, is_sec1);
+	to_talitos_ptr_ext_set(ptr, 0, is_sec1);
+
+	if (sg_count == 1) {
+		to_talitos_ptr(ptr, sg_dma_address(src) + offset, is_sec1);
+		return sg_count;
+	}
+	if (is_sec1) {
+		to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, is_sec1);
+		return sg_count;
+	}
+	sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len,
+					 &edesc->link_tbl[tbl_off]);
+	if (sg_count == 1) {
+		/* Only one segment now, so no link tbl needed*/
+		copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1);
+		return sg_count;
+	}
+	to_talitos_ptr(ptr, edesc->dma_link_tbl +
+			    tbl_off * sizeof(struct talitos_ptr), is_sec1);
+	to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1);
+
+	return sg_count;
 }
 
 /*
@@ -1093,42 +1167,52 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	int tbl_off = 0;
 	int sg_count, ret;
 	int sg_link_tbl_len;
+	bool sync_needed = false;
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	/* hmac key */
 	map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key,
 			       DMA_TO_DEVICE);
 
-	sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1,
-			      (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
-							   : DMA_TO_DEVICE);
-	/* hmac data */
-	desc->ptr[1].len = cpu_to_be16(areq->assoclen);
-	if (sg_count > 1 &&
-	    (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0,
-					 areq->assoclen,
-					 &edesc->link_tbl[tbl_off])) > 1) {
-		to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off *
-			       sizeof(struct talitos_ptr), 0);
-		desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP;
+	sg_count = edesc->src_nents ?: 1;
+	if (is_sec1 && sg_count > 1)
+		sg_copy_to_buffer(areq->src, sg_count, edesc->buf,
+				  areq->assoclen + cryptlen);
+	else
+		sg_count = dma_map_sg(dev, areq->src, sg_count,
+				      (areq->src == areq->dst) ?
+				      DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
 
-		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
-					   edesc->dma_len, DMA_BIDIRECTIONAL);
+	/* hmac data */
+	ret = talitos_sg_map(dev, areq->src, areq->assoclen, edesc,
+			     &desc->ptr[1], sg_count, 0, tbl_off);
 
+	if (ret > 1) {
 		tbl_off += ret;
-	} else {
-		to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0);
-		desc->ptr[1].j_extent = 0;
+		sync_needed = true;
 	}
 
 	/* cipher iv */
-	to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0);
-	desc->ptr[2].len = cpu_to_be16(ivsize);
-	desc->ptr[2].j_extent = 0;
+	if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) {
+		to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, is_sec1);
+		to_talitos_ptr_len(&desc->ptr[2], ivsize, is_sec1);
+		to_talitos_ptr_ext_set(&desc->ptr[2], 0, is_sec1);
+	} else {
+		to_talitos_ptr(&desc->ptr[3], edesc->iv_dma, is_sec1);
+		to_talitos_ptr_len(&desc->ptr[3], ivsize, is_sec1);
+		to_talitos_ptr_ext_set(&desc->ptr[3], 0, is_sec1);
+	}
 
 	/* cipher key */
-	map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen,
-			       (char *)&ctx->key + ctx->authkeylen,
-			       DMA_TO_DEVICE);
+	if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)
+		map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen,
+				       (char *)&ctx->key + ctx->authkeylen,
+				       DMA_TO_DEVICE);
+	else
+		map_single_talitos_ptr(dev, &desc->ptr[2], ctx->enckeylen,
+				       (char *)&ctx->key + ctx->authkeylen,
+				       DMA_TO_DEVICE);
 
 	/*
 	 * cipher in
@@ -1136,78 +1220,82 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	 * extent is bytes of HMAC postpended to ciphertext,
 	 * typically 12 for ipsec
 	 */
-	desc->ptr[4].len = cpu_to_be16(cryptlen);
-	desc->ptr[4].j_extent = authsize;
+	to_talitos_ptr_len(&desc->ptr[4], cryptlen, is_sec1);
+	to_talitos_ptr_ext_set(&desc->ptr[4], 0, is_sec1);
 
 	sg_link_tbl_len = cryptlen;
-	if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV)
-		sg_link_tbl_len += authsize;
 
-	if (sg_count == 1) {
-		to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) +
-			       areq->assoclen, 0);
-	} else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count,
-						areq->assoclen, sg_link_tbl_len,
-						&edesc->link_tbl[tbl_off])) >
-		   1) {
-		desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
-		to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl +
-					      tbl_off *
-					      sizeof(struct talitos_ptr), 0);
-		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
-					   edesc->dma_len,
-					   DMA_BIDIRECTIONAL);
-		tbl_off += ret;
-	} else {
-		copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0);
+	if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) {
+		to_talitos_ptr_ext_set(&desc->ptr[4], authsize, is_sec1);
+
+		if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV)
+			sg_link_tbl_len += authsize;
 	}
 
-	/* cipher out */
-	desc->ptr[5].len = cpu_to_be16(cryptlen);
-	desc->ptr[5].j_extent = authsize;
+	sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc,
+				  &desc->ptr[4], sg_count, areq->assoclen,
+				  tbl_off);
 
-	if (areq->src != areq->dst)
-		sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1,
-				      DMA_FROM_DEVICE);
+	if (sg_count > 1) {
+		tbl_off += sg_count;
+		sync_needed = true;
+	}
 
-	edesc->icv_ool = false;
+	/* cipher out */
+	if (areq->src != areq->dst) {
+		sg_count = edesc->dst_nents ? : 1;
+		if (!is_sec1 || sg_count == 1)
+			dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE);
+	}
 
-	if (sg_count == 1) {
-		to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) +
-			       areq->assoclen, 0);
-	} else if ((sg_count =
-			sg_to_link_tbl_offset(areq->dst, sg_count,
-					      areq->assoclen, cryptlen,
-					      &edesc->link_tbl[tbl_off])) > 1) {
-		struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
-
-		to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl +
-			       tbl_off * sizeof(struct talitos_ptr), 0);
-
-		/* Add an entry to the link table for ICV data */
-		tbl_ptr += sg_count - 1;
-		tbl_ptr->j_extent = 0;
-		tbl_ptr++;
-		tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN;
-		tbl_ptr->len = cpu_to_be16(authsize);
-
-		/* icv data follows link tables */
-		to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl +
-					(edesc->src_nents + edesc->dst_nents +
-					 2) * sizeof(struct talitos_ptr) +
-					authsize, 0);
-		desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP;
-		dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
-					   edesc->dma_len, DMA_BIDIRECTIONAL);
+	sg_count = talitos_sg_map(dev, areq->dst, cryptlen, edesc,
+				  &desc->ptr[5], sg_count, areq->assoclen,
+				  tbl_off);
 
+	if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)
+		to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1);
+
+	if (sg_count > 1) {
 		edesc->icv_ool = true;
+		sync_needed = true;
+
+		if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) {
+			struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
+			int offset = (edesc->src_nents + edesc->dst_nents + 2) *
+				     sizeof(struct talitos_ptr) + authsize;
+
+			/* Add an entry to the link table for ICV data */
+			tbl_ptr += sg_count - 1;
+			to_talitos_ptr_ext_set(tbl_ptr, 0, is_sec1);
+			tbl_ptr++;
+			to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN,
+					       is_sec1);
+			to_talitos_ptr_len(tbl_ptr, authsize, is_sec1);
+
+			/* icv data follows link tables */
+			to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset,
+				       is_sec1);
+		}
 	} else {
-		copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0);
+		edesc->icv_ool = false;
+	}
+
+	/* ICV data */
+	if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) {
+		to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1);
+		to_talitos_ptr(&desc->ptr[6], edesc->dma_link_tbl +
+			       areq->assoclen + cryptlen, is_sec1);
 	}
 
 	/* iv out */
-	map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,
-			       DMA_FROM_DEVICE);
+	if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)
+		map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,
+				       DMA_FROM_DEVICE);
+
+	if (sync_needed)
+		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+					   edesc->dma_len,
+					   DMA_BIDIRECTIONAL);
 
 	ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
 	if (ret != -EINPROGRESS) {
@@ -1233,7 +1321,7 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 						 bool encrypt)
 {
 	struct talitos_edesc *edesc;
-	int src_nents, dst_nents, alloc_len, dma_len;
+	int src_nents, dst_nents, alloc_len, dma_len, src_len, dst_len;
 	dma_addr_t iv_dma = 0;
 	gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		      GFP_ATOMIC;
@@ -1251,8 +1339,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 		iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE);
 
 	if (!dst || dst == src) {
-		src_nents = sg_nents_for_len(src,
-					     assoclen + cryptlen + authsize);
+		src_len = assoclen + cryptlen + authsize;
+		src_nents = sg_nents_for_len(src, src_len);
 		if (src_nents < 0) {
 			dev_err(dev, "Invalid number of src SG.\n");
 			err = ERR_PTR(-EINVAL);
@@ -1260,17 +1348,18 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 		}
 		src_nents = (src_nents == 1) ? 0 : src_nents;
 		dst_nents = dst ? src_nents : 0;
+		dst_len = 0;
 	} else { /* dst && dst != src*/
-		src_nents = sg_nents_for_len(src, assoclen + cryptlen +
-						 (encrypt ? 0 : authsize));
+		src_len = assoclen + cryptlen + (encrypt ? 0 : authsize);
+		src_nents = sg_nents_for_len(src, src_len);
 		if (src_nents < 0) {
 			dev_err(dev, "Invalid number of src SG.\n");
 			err = ERR_PTR(-EINVAL);
 			goto error_sg;
 		}
 		src_nents = (src_nents == 1) ? 0 : src_nents;
-		dst_nents = sg_nents_for_len(dst, assoclen + cryptlen +
-						 (encrypt ? authsize : 0));
+		dst_len = assoclen + cryptlen + (encrypt ? authsize : 0);
+		dst_nents = sg_nents_for_len(dst, dst_len);
 		if (dst_nents < 0) {
 			dev_err(dev, "Invalid number of dst SG.\n");
 			err = ERR_PTR(-EINVAL);
@@ -1287,8 +1376,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 	alloc_len = sizeof(struct talitos_edesc);
 	if (src_nents || dst_nents) {
 		if (is_sec1)
-			dma_len = (src_nents ? cryptlen : 0) +
-				  (dst_nents ? cryptlen : 0);
+			dma_len = (src_nents ? src_len : 0) +
+				  (dst_nents ? dst_len : 0);
 		else
 			dma_len = (src_nents + dst_nents + 2) *
 				  sizeof(struct talitos_ptr) + authsize * 2;
@@ -1412,40 +1501,13 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	return 0;
 }
 
-static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src,
-				 struct scatterlist *dst, unsigned int len,
-				 struct talitos_edesc *edesc)
-{
-	struct talitos_private *priv = dev_get_drvdata(dev);
-	bool is_sec1 = has_ftr_sec1(priv);
-
-	if (is_sec1) {
-		if (!edesc->src_nents) {
-			dma_unmap_sg(dev, src, 1,
-				     dst != src ? DMA_TO_DEVICE
-						: DMA_BIDIRECTIONAL);
-		}
-		if (dst && edesc->dst_nents) {
-			dma_sync_single_for_device(dev,
-						   edesc->dma_link_tbl + len,
-						   len, DMA_FROM_DEVICE);
-			sg_copy_from_buffer(dst, edesc->dst_nents ? : 1,
-					    edesc->buf + len, len);
-		} else if (dst && dst != src) {
-			dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE);
-		}
-	} else {
-		talitos_sg_unmap(dev, edesc, src, dst);
-	}
-}
-
 static void common_nonsnoop_unmap(struct device *dev,
 				  struct talitos_edesc *edesc,
 				  struct ablkcipher_request *areq)
 {
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
 
-	unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc);
+	talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->nbytes, 0);
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE);
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
 
@@ -1470,100 +1532,6 @@ static void ablkcipher_done(struct device *dev,
 	areq->base.complete(&areq->base, err);
 }
 
-int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src,
-			  unsigned int len, struct talitos_edesc *edesc,
-			  enum dma_data_direction dir, struct talitos_ptr *ptr)
-{
-	int sg_count;
-	struct talitos_private *priv = dev_get_drvdata(dev);
-	bool is_sec1 = has_ftr_sec1(priv);
-
-	to_talitos_ptr_len(ptr, len, is_sec1);
-
-	if (is_sec1) {
-		sg_count = edesc->src_nents ? : 1;
-
-		if (sg_count == 1) {
-			dma_map_sg(dev, src, 1, dir);
-			to_talitos_ptr(ptr, sg_dma_address(src), is_sec1);
-		} else {
-			sg_copy_to_buffer(src, sg_count, edesc->buf, len);
-			to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1);
-			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
-						   len, DMA_TO_DEVICE);
-		}
-	} else {
-		to_talitos_ptr_extent_clear(ptr, is_sec1);
-
-		sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir);
-
-		if (sg_count == 1) {
-			to_talitos_ptr(ptr, sg_dma_address(src), is_sec1);
-		} else {
-			sg_count = sg_to_link_tbl(src, sg_count, len,
-						  &edesc->link_tbl[0]);
-			if (sg_count > 1) {
-				to_talitos_ptr(ptr, edesc->dma_link_tbl, 0);
-				ptr->j_extent |= DESC_PTR_LNKTBL_JUMP;
-				dma_sync_single_for_device(dev,
-							   edesc->dma_link_tbl,
-							   edesc->dma_len,
-							   DMA_BIDIRECTIONAL);
-			} else {
-				/* Only one segment now, so no link tbl needed*/
-				to_talitos_ptr(ptr, sg_dma_address(src),
-					       is_sec1);
-			}
-		}
-	}
-	return sg_count;
-}
-
-void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst,
-			    unsigned int len, struct talitos_edesc *edesc,
-			    enum dma_data_direction dir,
-			    struct talitos_ptr *ptr, int sg_count)
-{
-	struct talitos_private *priv = dev_get_drvdata(dev);
-	bool is_sec1 = has_ftr_sec1(priv);
-
-	if (dir != DMA_NONE)
-		sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir);
-
-	to_talitos_ptr_len(ptr, len, is_sec1);
-
-	if (is_sec1) {
-		if (sg_count == 1) {
-			if (dir != DMA_NONE)
-				dma_map_sg(dev, dst, 1, dir);
-			to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1);
-		} else {
-			to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1);
-			dma_sync_single_for_device(dev,
-						   edesc->dma_link_tbl + len,
-						   len, DMA_FROM_DEVICE);
-		}
-	} else {
-		to_talitos_ptr_extent_clear(ptr, is_sec1);
-
-		if (sg_count == 1) {
-			to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1);
-		} else {
-			struct talitos_ptr *link_tbl_ptr =
-				&edesc->link_tbl[edesc->src_nents + 1];
-
-			to_talitos_ptr(ptr, edesc->dma_link_tbl +
-					    (edesc->src_nents + 1) *
-					     sizeof(struct talitos_ptr), 0);
-			ptr->j_extent |= DESC_PTR_LNKTBL_JUMP;
-			sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr);
-			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
-						   edesc->dma_len,
-						   DMA_BIDIRECTIONAL);
-		}
-	}
-}
-
 static int common_nonsnoop(struct talitos_edesc *edesc,
 			   struct ablkcipher_request *areq,
 			   void (*callback) (struct device *dev,
@@ -1577,6 +1545,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 	unsigned int cryptlen = areq->nbytes;
 	unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
 	int sg_count, ret;
+	bool sync_needed = false;
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
 
@@ -1586,25 +1555,39 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 	/* cipher iv */
 	to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, is_sec1);
 	to_talitos_ptr_len(&desc->ptr[1], ivsize, is_sec1);
-	to_talitos_ptr_extent_clear(&desc->ptr[1], is_sec1);
+	to_talitos_ptr_ext_set(&desc->ptr[1], 0, is_sec1);
 
 	/* cipher key */
 	map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
 			       (char *)&ctx->key, DMA_TO_DEVICE);
 
+	sg_count = edesc->src_nents ?: 1;
+	if (is_sec1 && sg_count > 1)
+		sg_copy_to_buffer(areq->src, sg_count, edesc->buf,
+				  cryptlen);
+	else
+		sg_count = dma_map_sg(dev, areq->src, sg_count,
+				      (areq->src == areq->dst) ?
+				      DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
 	/*
 	 * cipher in
 	 */
-	sg_count = map_sg_in_talitos_ptr(dev, areq->src, cryptlen, edesc,
-					 (areq->src == areq->dst) ?
-					  DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
-					  &desc->ptr[3]);
+	sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc,
+				  &desc->ptr[3], sg_count, 0, 0);
+	if (sg_count > 1)
+		sync_needed = true;
 
 	/* cipher out */
-	map_sg_out_talitos_ptr(dev, areq->dst, cryptlen, edesc,
-			       (areq->src == areq->dst) ? DMA_NONE
-							: DMA_FROM_DEVICE,
-			       &desc->ptr[4], sg_count);
+	if (areq->src != areq->dst) {
+		sg_count = edesc->dst_nents ? : 1;
+		if (!is_sec1 || sg_count == 1)
+			dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE);
+	}
+
+	ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[4],
+			     sg_count, 0, (edesc->src_nents + 1));
+	if (ret > 1)
+		sync_needed = true;
 
 	/* iv out */
 	map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv,
@@ -1613,6 +1596,10 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 	/* last DWORD empty */
 	desc->ptr[6] = zero_entry;
 
+	if (sync_needed)
+		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+					   edesc->dma_len, DMA_BIDIRECTIONAL);
+
 	ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
 	if (ret != -EINPROGRESS) {
 		common_nonsnoop_unmap(dev, edesc, areq);
@@ -1676,7 +1663,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
 
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
 
-	unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc);
+	talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
 
 	/* When using hashctx-in, must unmap it. */
 	if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1))
@@ -1747,8 +1734,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	struct device *dev = ctx->dev;
 	struct talitos_desc *desc = &edesc->desc;
 	int ret;
+	bool sync_needed = false;
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
+	int sg_count;
 
 	/* first DWORD empty */
 	desc->ptr[0] = zero_entry;
@@ -1773,11 +1762,19 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	else
 		desc->ptr[2] = zero_entry;
 
+	sg_count = edesc->src_nents ?: 1;
+	if (is_sec1 && sg_count > 1)
+		sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length);
+	else
+		sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
+				      DMA_TO_DEVICE);
 	/*
 	 * data in
 	 */
-	map_sg_in_talitos_ptr(dev, req_ctx->psrc, length, edesc,
-			      DMA_TO_DEVICE, &desc->ptr[3]);
+	sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
+				  &desc->ptr[3], sg_count, 0, 0);
+	if (sg_count > 1)
+		sync_needed = true;
 
 	/* fifth DWORD empty */
 	desc->ptr[4] = zero_entry;
@@ -1798,6 +1795,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0)
 		talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
 
+	if (sync_needed)
+		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+					   edesc->dma_len, DMA_BIDIRECTIONAL);
+
 	ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
 	if (ret != -EINPROGRESS) {
 		common_nonsnoop_hash_unmap(dev, edesc, areq);
@@ -2124,6 +2125,7 @@ static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 
 struct talitos_alg_template {
 	u32 type;
+	u32 priority;
 	union {
 		struct crypto_alg crypto;
 		struct ahash_alg hash;
@@ -2154,6 +2156,27 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha1),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-sha1-"
+						   "cbc-aes-talitos",
+				.cra_blocksize = AES_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_ASYNC,
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_AESU |
+				     DESC_HDR_MODE0_AESU_CBC |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
+	},
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.aead = {
 			.base = {
@@ -2176,6 +2199,29 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha1),"
+					    "cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-sha1-"
+						   "cbc-3des-talitos",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_ASYNC,
+			},
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_DEU |
+				     DESC_HDR_MODE0_DEU_CBC |
+				     DESC_HDR_MODE0_DEU_3DES |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
+	},
 	{       .type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.aead = {
 			.base = {
@@ -2196,6 +2242,27 @@ static struct talitos_alg_template driver_algs[] = {
 				     DESC_HDR_MODE1_MDEU_PAD |
 				     DESC_HDR_MODE1_MDEU_SHA224_HMAC,
 	},
+	{       .type = CRYPTO_ALG_TYPE_AEAD,
+		.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha224),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-sha224-"
+						   "cbc-aes-talitos",
+				.cra_blocksize = AES_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_ASYNC,
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_AESU |
+				     DESC_HDR_MODE0_AESU_CBC |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_SHA224_HMAC,
+	},
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.aead = {
 			.base = {
@@ -2218,6 +2285,29 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA224_HMAC,
 	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha224),"
+					    "cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-sha224-"
+						   "cbc-3des-talitos",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_ASYNC,
+			},
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_DEU |
+				     DESC_HDR_MODE0_DEU_CBC |
+				     DESC_HDR_MODE0_DEU_3DES |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_SHA224_HMAC,
+	},
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.aead = {
 			.base = {
@@ -2238,6 +2328,27 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha256),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-sha256-"
+						   "cbc-aes-talitos",
+				.cra_blocksize = AES_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_ASYNC,
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_AESU |
+				     DESC_HDR_MODE0_AESU_CBC |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
+	},
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.aead = {
 			.base = {
@@ -2260,6 +2371,29 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha256),"
+					    "cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-sha256-"
+						   "cbc-3des-talitos",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_ASYNC,
+			},
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_DEU |
+				     DESC_HDR_MODE0_DEU_CBC |
+				     DESC_HDR_MODE0_DEU_3DES |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
+	},
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.aead = {
 			.base = {
@@ -2364,6 +2498,27 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
 	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(md5),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-md5-"
+						   "cbc-aes-talitos",
+				.cra_blocksize = AES_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_ASYNC,
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_AESU |
+				     DESC_HDR_MODE0_AESU_CBC |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_MD5_HMAC,
+	},
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.aead = {
 			.base = {
@@ -2385,6 +2540,28 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
 	},
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.priority = TALITOS_CRA_PRIORITY_AEAD_HSNA,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-md5-"
+						   "cbc-3des-talitos",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_ASYNC,
+			},
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_DEU |
+				     DESC_HDR_MODE0_DEU_CBC |
+				     DESC_HDR_MODE0_DEU_3DES |
+				     DESC_HDR_SEL1_MDEUA |
+				     DESC_HDR_MODE1_MDEU_INIT |
+				     DESC_HDR_MODE1_MDEU_PAD |
+				     DESC_HDR_MODE1_MDEU_MD5_HMAC,
+	},
 	/* ABLKCIPHER algorithms. */
 	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
 		.alg.crypto = {
@@ -2901,7 +3078,10 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 	}
 
 	alg->cra_module = THIS_MODULE;
-	alg->cra_priority = TALITOS_CRA_PRIORITY;
+	if (t_alg->algt.priority)
+		alg->cra_priority = t_alg->algt.priority;
+	else
+		alg->cra_priority = TALITOS_CRA_PRIORITY;
 	alg->cra_alignmask = 0;
 	alg->cra_ctxsize = sizeof(struct talitos_ctx);
 	alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY;
diff --git a/drivers/crypto/ux500/cryp/Makefile b/drivers/crypto/ux500/cryp/Makefile
index e5d362a6f680..b497ae3dde07 100644
--- a/drivers/crypto/ux500/cryp/Makefile
+++ b/drivers/crypto/ux500/cryp/Makefile
@@ -4,9 +4,9 @@
 # * License terms: GNU General Public License (GPL) version 2  */
 
 ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG
-CFLAGS_cryp_core.o := -DDEBUG -O0
-CFLAGS_cryp.o := -DDEBUG -O0
-CFLAGS_cryp_irq.o := -DDEBUG -O0
+CFLAGS_cryp_core.o := -DDEBUG
+CFLAGS_cryp.o := -DDEBUG
+CFLAGS_cryp_irq.o := -DDEBUG
 endif
 
 obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += ux500_cryp.o
diff --git a/drivers/crypto/ux500/hash/Makefile b/drivers/crypto/ux500/hash/Makefile
index b2f90d9bac72..784d9c0a8853 100644
--- a/drivers/crypto/ux500/hash/Makefile
+++ b/drivers/crypto/ux500/hash/Makefile
@@ -4,7 +4,7 @@
 # License terms: GNU General Public License (GPL) version 2
 #
 ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG
-CFLAGS_hash_core.o := -DDEBUG -O0
+CFLAGS_hash_core.o := -DDEBUG
 endif
 
 obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += ux500_hash.o
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 574e87c7f2b8..9acccad26928 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data,
 						&device_data->state);
 				memmove(req_ctx->state.buffer,
 					device_data->state.buffer,
-					HASH_BLOCK_SIZE / sizeof(u32));
+					HASH_BLOCK_SIZE);
 				if (ret) {
 					dev_err(device_data->dev,
 						"%s: hash_resume_state() failed!\n",
@@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data,
 
 			memmove(device_data->state.buffer,
 				req_ctx->state.buffer,
-				HASH_BLOCK_SIZE / sizeof(u32));
+				HASH_BLOCK_SIZE);
 			if (ret) {
 				dev_err(device_data->dev, "%s: hash_save_state() failed!\n",
 					__func__);
diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore
new file mode 100644
index 000000000000..af4a7ce4738d
--- /dev/null
+++ b/drivers/crypto/vmx/.gitignore
@@ -0,0 +1,2 @@
+aesp8-ppc.S
+ghashp8-ppc.S
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
index d28ab96a2475..de6e241b0866 100644
--- a/drivers/crypto/vmx/Makefile
+++ b/drivers/crypto/vmx/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
-vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o
+vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
 TARGET := linux-ppc64le
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 495577b6d31b..94ad5c0adbcb 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
 	.cra_name = "cbc(aes)",
 	.cra_driver_name = "p8_aes_cbc",
 	.cra_module = THIS_MODULE,
-	.cra_priority = 1000,
+	.cra_priority = 2000,
 	.cra_type = &crypto_blkcipher_type,
 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 0a3c1b04cf3c..38ed10d761d0 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
 	.cra_name = "ctr(aes)",
 	.cra_driver_name = "p8_aes_ctr",
 	.cra_module = THIS_MODULE,
-	.cra_priority = 1000,
+	.cra_priority = 2000,
 	.cra_type = &crypto_blkcipher_type,
 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
new file mode 100644
index 000000000000..cfb25413917c
--- /dev/null
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -0,0 +1,190 @@
+/**
+ * AES XTS routines supporting VMX In-core instructions on Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundations; version 2 only.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY of FITNESS FOR A PARTICUPAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/xts.h>
+
+#include "aesp8-ppc.h"
+
+struct p8_aes_xts_ctx {
+	struct crypto_blkcipher *fallback;
+	struct aes_key enc_key;
+	struct aes_key dec_key;
+	struct aes_key tweak_key;
+};
+
+static int p8_aes_xts_init(struct crypto_tfm *tfm)
+{
+	const char *alg;
+	struct crypto_blkcipher *fallback;
+	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (!(alg = crypto_tfm_alg_name(tfm))) {
+		printk(KERN_ERR "Failed to get algorithm name.\n");
+		return -ENOENT;
+	}
+
+	fallback =
+		crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback)) {
+		printk(KERN_ERR
+			"Failed to allocate transformation for '%s': %ld\n",
+			alg, PTR_ERR(fallback));
+		return PTR_ERR(fallback);
+	}
+	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
+		crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+
+	crypto_blkcipher_set_flags(
+		fallback,
+		crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm));
+	ctx->fallback = fallback;
+
+	return 0;
+}
+
+static void p8_aes_xts_exit(struct crypto_tfm *tfm)
+{
+	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (ctx->fallback) {
+		crypto_free_blkcipher(ctx->fallback);
+		ctx->fallback = NULL;
+	}
+}
+
+static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
+			     unsigned int keylen)
+{
+	int ret;
+	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ret = xts_check_key(tfm, key, keylen);
+	if (ret)
+		return ret;
+
+	preempt_disable();
+	pagefault_disable();
+	enable_kernel_vsx();
+	ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key);
+	ret += aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key);
+	ret += aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key);
+	disable_kernel_vsx();
+	pagefault_enable();
+	preempt_enable();
+
+	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+	return ret;
+}
+
+static int p8_aes_xts_crypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst,
+			    struct scatterlist *src,
+			    unsigned int nbytes, int enc)
+{
+	int ret;
+	u8 tweak[AES_BLOCK_SIZE];
+	u8 *iv;
+	struct blkcipher_walk walk;
+	struct p8_aes_xts_ctx *ctx =
+		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
+	struct blkcipher_desc fallback_desc = {
+		.tfm = ctx->fallback,
+		.info = desc->info,
+		.flags = desc->flags
+	};
+
+	if (in_interrupt()) {
+		ret = enc ? crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes) :
+                            crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes);
+	} else {
+		preempt_disable();
+		pagefault_disable();
+		enable_kernel_vsx();
+
+		blkcipher_walk_init(&walk, dst, src, nbytes);
+
+		iv = (u8 *)walk.iv;
+		ret = blkcipher_walk_virt(desc, &walk);
+		memset(tweak, 0, AES_BLOCK_SIZE);
+		aes_p8_encrypt(iv, tweak, &ctx->tweak_key);
+
+		while ((nbytes = walk.nbytes)) {
+			if (enc)
+				aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
+						nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak);
+			else
+				aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
+						nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak);
+
+			nbytes &= AES_BLOCK_SIZE - 1;
+			ret = blkcipher_walk_done(desc, &walk, nbytes);
+		}
+
+		disable_kernel_vsx();
+		pagefault_enable();
+		preempt_enable();
+	}
+	return ret;
+}
+
+static int p8_aes_xts_encrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst,
+			      struct scatterlist *src, unsigned int nbytes)
+{
+	return p8_aes_xts_crypt(desc, dst, src, nbytes, 1);
+}
+
+static int p8_aes_xts_decrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst,
+			      struct scatterlist *src, unsigned int nbytes)
+{
+	return p8_aes_xts_crypt(desc, dst, src, nbytes, 0);
+}
+
+struct crypto_alg p8_aes_xts_alg = {
+	.cra_name = "xts(aes)",
+	.cra_driver_name = "p8_aes_xts",
+	.cra_module = THIS_MODULE,
+	.cra_priority = 2000,
+	.cra_type = &crypto_blkcipher_type,
+	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
+	.cra_alignmask = 0,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct p8_aes_xts_ctx),
+	.cra_init = p8_aes_xts_init,
+	.cra_exit = p8_aes_xts_exit,
+	.cra_blkcipher = {
+			.ivsize = AES_BLOCK_SIZE,
+			.min_keysize = 2 * AES_MIN_KEY_SIZE,
+			.max_keysize = 2 * AES_MAX_KEY_SIZE,
+			.setkey	 = p8_aes_xts_setkey,
+			.encrypt = p8_aes_xts_encrypt,
+			.decrypt = p8_aes_xts_decrypt,
+	}
+};
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h
index 4cd34ee54a94..01972e16a6c0 100644
--- a/drivers/crypto/vmx/aesp8-ppc.h
+++ b/drivers/crypto/vmx/aesp8-ppc.h
@@ -19,3 +19,7 @@ void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len,
 void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out,
 				 size_t len, const struct aes_key *key,
 				 const u8 *iv);
+void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len,
+			const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
+void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len,
+			const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
index 228053921b3f..0b4a293b8a1e 100644
--- a/drivers/crypto/vmx/aesp8-ppc.pl
+++ b/drivers/crypto/vmx/aesp8-ppc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
 #
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -20,6 +27,19 @@
 # instructions are interleaved. It's reckoned that eventual
 # misalignment penalties at page boundaries are in average lower
 # than additional overhead in pure AltiVec approach.
+#
+# May 2016
+#
+# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
+# systems were measured.
+#
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+#		CBC en-/decrypt	CTR	XTS
+# POWER8[le]	3.96/0.72	0.74	1.1
+# POWER8[be]	3.75/0.65	0.66	1.0
 
 $flavour = shift;
 
@@ -1875,6 +1895,1845 @@ Lctr32_enc8x_done:
 ___
 }}	}}}
 
+#########################################################################
+{{{	# XTS procedures						#
+# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
+#                             const AES_KEY *key1, const AES_KEY *key2,	#
+#                             [const] unsigned char iv[16]);		#
+# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
+# input tweak value is assumed to be encrypted already, and last tweak	#
+# value, one suitable for consecutive call on same chunk of data, is	#
+# written back to original buffer. In addition, in "tweak chaining"	#
+# mode only complete input blocks are processed.			#
+
+my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
+my ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
+my ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
+my $taillen = $key2;
+
+   ($inp,$idx) = ($idx,$inp);				# reassign
+
+$code.=<<___;
+.globl	.${prefix}_xts_encrypt
+	mr		$inp,r3				# reassign
+	li		r3,-1
+	${UCMP}i	$len,16
+	bltlr-
+
+	lis		r0,0xfff0
+	mfspr		r12,256				# save vrsave
+	li		r11,0
+	mtspr		256,r0
+
+	vspltisb	$seven,0x07			# 0x070707..07
+	le?lvsl		$leperm,r11,r11
+	le?vspltisb	$tmp,0x0f
+	le?vxor		$leperm,$leperm,$seven
+
+	li		$idx,15
+	lvx		$tweak,0,$ivp			# load [unaligned] iv
+	lvsl		$inpperm,0,$ivp
+	lvx		$inptail,$idx,$ivp
+	le?vxor		$inpperm,$inpperm,$tmp
+	vperm		$tweak,$tweak,$inptail,$inpperm
+
+	neg		r11,$inp
+	lvsr		$inpperm,0,r11			# prepare for unaligned load
+	lvx		$inout,0,$inp
+	addi		$inp,$inp,15			# 15 is not typo
+	le?vxor		$inpperm,$inpperm,$tmp
+
+	${UCMP}i	$key2,0				# key2==NULL?
+	beq		Lxts_enc_no_key2
+
+	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
+	lwz		$rounds,240($key2)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	lvx		$rndkey0,0,$key2
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+Ltweak_xts_enc:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	bdnz		Ltweak_xts_enc
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipherlast	$tweak,$tweak,$rndkey0
+
+	li		$ivp,0				# don't chain the tweak
+	b		Lxts_enc
+
+Lxts_enc_no_key2:
+	li		$idx,-16
+	and		$len,$len,$idx			# in "tweak chaining"
+							# mode only complete
+							# blocks are processed
+Lxts_enc:
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+
+	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
+	lwz		$rounds,240($key1)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	vslb		$eighty7,$seven,$seven		# 0x808080..80
+	vor		$eighty7,$eighty7,$seven	# 0x878787..87
+	vspltisb	$tmp,1				# 0x010101..01
+	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
+
+	${UCMP}i	$len,96
+	bge		_aesp8_xts_encrypt6x
+
+	andi.		$taillen,$len,15
+	subic		r0,$len,32
+	subi		$taillen,$taillen,16
+	subfe		r0,r0,r0
+	and		r0,r0,$taillen
+	add		$inp,$inp,r0
+
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	mtctr		$rounds
+	b		Loop_xts_enc
+
+.align	5
+Loop_xts_enc:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_enc
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak
+	vcipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+	addi		$out,$out,16
+
+	subic.		$len,$len,16
+	beq		Lxts_enc_done
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+
+	subic		r0,$len,32
+	subfe		r0,r0,r0
+	and		r0,r0,$taillen
+	add		$inp,$inp,r0
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$output,$output,$rndkey0	# just in case $len<16
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	mtctr		$rounds
+	${UCMP}i	$len,16
+	bge		Loop_xts_enc
+
+	vxor		$output,$output,$tweak
+	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
+	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
+	vspltisb	$tmp,-1
+	vperm		$inptail,$inptail,$tmp,$inpperm
+	vsel		$inout,$inout,$output,$inptail
+
+	subi		r11,$out,17
+	subi		$out,$out,16
+	mtctr		$len
+	li		$len,16
+Loop_xts_enc_steal:
+	lbzu		r0,1(r11)
+	stb		r0,16(r11)
+	bdnz		Loop_xts_enc_steal
+
+	mtctr		$rounds
+	b		Loop_xts_enc			# one more time...
+
+Lxts_enc_done:
+	${UCMP}i	$ivp,0
+	beq		Lxts_enc_ret
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	le?vperm	$tweak,$tweak,$tweak,$leperm
+	stvx_u		$tweak,0,$ivp
+
+Lxts_enc_ret:
+	mtspr		256,r12				# restore vrsave
+	li		r3,0
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,6,6,0
+	.long		0
+.size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
+
+.globl	.${prefix}_xts_decrypt
+	mr		$inp,r3				# reassign
+	li		r3,-1
+	${UCMP}i	$len,16
+	bltlr-
+
+	lis		r0,0xfff8
+	mfspr		r12,256				# save vrsave
+	li		r11,0
+	mtspr		256,r0
+
+	andi.		r0,$len,15
+	neg		r0,r0
+	andi.		r0,r0,16
+	sub		$len,$len,r0
+
+	vspltisb	$seven,0x07			# 0x070707..07
+	le?lvsl		$leperm,r11,r11
+	le?vspltisb	$tmp,0x0f
+	le?vxor		$leperm,$leperm,$seven
+
+	li		$idx,15
+	lvx		$tweak,0,$ivp			# load [unaligned] iv
+	lvsl		$inpperm,0,$ivp
+	lvx		$inptail,$idx,$ivp
+	le?vxor		$inpperm,$inpperm,$tmp
+	vperm		$tweak,$tweak,$inptail,$inpperm
+
+	neg		r11,$inp
+	lvsr		$inpperm,0,r11			# prepare for unaligned load
+	lvx		$inout,0,$inp
+	addi		$inp,$inp,15			# 15 is not typo
+	le?vxor		$inpperm,$inpperm,$tmp
+
+	${UCMP}i	$key2,0				# key2==NULL?
+	beq		Lxts_dec_no_key2
+
+	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
+	lwz		$rounds,240($key2)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	lvx		$rndkey0,0,$key2
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+Ltweak_xts_dec:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	bdnz		Ltweak_xts_dec
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipherlast	$tweak,$tweak,$rndkey0
+
+	li		$ivp,0				# don't chain the tweak
+	b		Lxts_dec
+
+Lxts_dec_no_key2:
+	neg		$idx,$len
+	andi.		$idx,$idx,15
+	add		$len,$len,$idx			# in "tweak chaining"
+							# mode only complete
+							# blocks are processed
+Lxts_dec:
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+
+	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
+	lwz		$rounds,240($key1)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	vslb		$eighty7,$seven,$seven		# 0x808080..80
+	vor		$eighty7,$eighty7,$seven	# 0x878787..87
+	vspltisb	$tmp,1				# 0x010101..01
+	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
+
+	${UCMP}i	$len,96
+	bge		_aesp8_xts_decrypt6x
+
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+	${UCMP}i	$len,16
+	blt		Ltail_xts_dec
+	be?b		Loop_xts_dec
+
+.align	5
+Loop_xts_dec:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vncipher	$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_dec
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak
+	vncipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+	addi		$out,$out,16
+
+	subic.		$len,$len,16
+	beq		Lxts_dec_done
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	mtctr		$rounds
+	${UCMP}i	$len,16
+	bge		Loop_xts_dec
+
+Ltail_xts_dec:
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak1,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak1,$tweak1,$tmp
+
+	subi		$inp,$inp,16
+	add		$inp,$inp,$len
+
+	vxor		$inout,$inout,$tweak		# :-(
+	vxor		$inout,$inout,$tweak1		# :-)
+
+Loop_xts_dec_short:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vncipher	$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_dec_short
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak1
+	vncipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	#addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+
+	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
+	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
+	vspltisb	$tmp,-1
+	vperm		$inptail,$inptail,$tmp,$inpperm
+	vsel		$inout,$inout,$output,$inptail
+
+	vxor		$rndkey0,$rndkey0,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	subi		r11,$out,1
+	mtctr		$len
+	li		$len,16
+Loop_xts_dec_steal:
+	lbzu		r0,1(r11)
+	stb		r0,16(r11)
+	bdnz		Loop_xts_dec_steal
+
+	mtctr		$rounds
+	b		Loop_xts_dec			# one more time...
+
+Lxts_dec_done:
+	${UCMP}i	$ivp,0
+	beq		Lxts_dec_ret
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	le?vperm	$tweak,$tweak,$tweak,$leperm
+	stvx_u		$tweak,0,$ivp
+
+Lxts_dec_ret:
+	mtspr		256,r12				# restore vrsave
+	li		r3,0
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,6,6,0
+	.long		0
+.size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
+___
+#########################################################################
+{{	# Optimized XTS procedures					#
+my $key_=$key2;
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
+    $x00=0 if ($flavour =~ /osx/);
+my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
+my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
+my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
+my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
+			# v26-v31 last 6 round keys
+my ($keyperm)=($out0);	# aliases with "caller", redundant assignment
+my $taillen=$x70;
+
+$code.=<<___;
+.align	5
+_aesp8_xts_encrypt6x:
+	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+	mflr		r11
+	li		r7,`$FRAME+8*16+15`
+	li		r3,`$FRAME+8*16+31`
+	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+	stvx		v20,r7,$sp		# ABI says so
+	addi		r7,r7,32
+	stvx		v21,r3,$sp
+	addi		r3,r3,32
+	stvx		v22,r7,$sp
+	addi		r7,r7,32
+	stvx		v23,r3,$sp
+	addi		r3,r3,32
+	stvx		v24,r7,$sp
+	addi		r7,r7,32
+	stvx		v25,r3,$sp
+	addi		r3,r3,32
+	stvx		v26,r7,$sp
+	addi		r7,r7,32
+	stvx		v27,r3,$sp
+	addi		r3,r3,32
+	stvx		v28,r7,$sp
+	addi		r7,r7,32
+	stvx		v29,r3,$sp
+	addi		r3,r3,32
+	stvx		v30,r7,$sp
+	stvx		v31,r3,$sp
+	li		r0,-1
+	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
+	li		$x10,0x10
+	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	li		$x20,0x20
+	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	li		$x30,0x30
+	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	li		$x40,0x40
+	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	li		$x50,0x50
+	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	li		$x60,0x60
+	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	li		$x70,0x70
+	mtspr		256,r0
+
+	subi		$rounds,$rounds,3	# -4 in total
+
+	lvx		$rndkey0,$x00,$key1	# load key schedule
+	lvx		v30,$x10,$key1
+	addi		$key1,$key1,0x20
+	lvx		v31,$x00,$key1
+	?vperm		$rndkey0,$rndkey0,v30,$keyperm
+	addi		$key_,$sp,$FRAME+15
+	mtctr		$rounds
+
+Load_xts_enc_key:
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v30,$x10,$key1
+	addi		$key1,$key1,0x20
+	stvx		v24,$x00,$key_		# off-load round[1]
+	?vperm		v25,v31,v30,$keyperm
+	lvx		v31,$x00,$key1
+	stvx		v25,$x10,$key_		# off-load round[2]
+	addi		$key_,$key_,0x20
+	bdnz		Load_xts_enc_key
+
+	lvx		v26,$x10,$key1
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v27,$x20,$key1
+	stvx		v24,$x00,$key_		# off-load round[3]
+	?vperm		v25,v31,v26,$keyperm
+	lvx		v28,$x30,$key1
+	stvx		v25,$x10,$key_		# off-load round[4]
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	?vperm		v26,v26,v27,$keyperm
+	lvx		v29,$x40,$key1
+	?vperm		v27,v27,v28,$keyperm
+	lvx		v30,$x50,$key1
+	?vperm		v28,v28,v29,$keyperm
+	lvx		v31,$x60,$key1
+	?vperm		v29,v29,v30,$keyperm
+	lvx		$twk5,$x70,$key1	# borrow $twk5
+	?vperm		v30,v30,v31,$keyperm
+	lvx		v24,$x00,$key_		# pre-load round[1]
+	?vperm		v31,v31,$twk5,$keyperm
+	lvx		v25,$x10,$key_		# pre-load round[2]
+
+	 vperm		$in0,$inout,$inptail,$inpperm
+	 subi		$inp,$inp,31		# undo "caller"
+	vxor		$twk0,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out0,$in0,$twk0
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in1,$x10,$inp
+	vxor		$twk1,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in1,$in1,$in1,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out1,$in1,$twk1
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in2,$x20,$inp
+	 andi.		$taillen,$len,15
+	vxor		$twk2,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in2,$in2,$in2,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out2,$in2,$twk2
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in3,$x30,$inp
+	 sub		$len,$len,$taillen
+	vxor		$twk3,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in3,$in3,$in3,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out3,$in3,$twk3
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in4,$x40,$inp
+	 subi		$len,$len,0x60
+	vxor		$twk4,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in4,$in4,$in4,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out4,$in4,$twk4
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in5,$x50,$inp
+	 addi		$inp,$inp,0x60
+	vxor		$twk5,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in5,$in5,$in5,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out5,$in5,$twk5
+	vxor		$tweak,$tweak,$tmp
+
+	vxor		v31,v31,$rndkey0
+	mtctr		$rounds
+	b		Loop_xts_enc6x
+
+.align	5
+Loop_xts_enc6x:
+	vcipher		$out0,$out0,v24
+	vcipher		$out1,$out1,v24
+	vcipher		$out2,$out2,v24
+	vcipher		$out3,$out3,v24
+	vcipher		$out4,$out4,v24
+	vcipher		$out5,$out5,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vcipher		$out0,$out0,v25
+	vcipher		$out1,$out1,v25
+	vcipher		$out2,$out2,v25
+	vcipher		$out3,$out3,v25
+	vcipher		$out4,$out4,v25
+	vcipher		$out5,$out5,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_xts_enc6x
+
+	subic		$len,$len,96		# $len-=96
+	 vxor		$in0,$twk0,v31		# xor with last round key
+	vcipher		$out0,$out0,v24
+	vcipher		$out1,$out1,v24
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk0,$tweak,$rndkey0
+	 vaddubm	$tweak,$tweak,$tweak
+	vcipher		$out2,$out2,v24
+	vcipher		$out3,$out3,v24
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipher		$out4,$out4,v24
+	vcipher		$out5,$out5,v24
+
+	subfe.		r0,r0,r0		# borrow?-1:0
+	 vand		$tmp,$tmp,$eighty7
+	vcipher		$out0,$out0,v25
+	vcipher		$out1,$out1,v25
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out2,$out2,v25
+	vcipher		$out3,$out3,v25
+	 vxor		$in1,$twk1,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk1,$tweak,$rndkey0
+	vcipher		$out4,$out4,v25
+	vcipher		$out5,$out5,v25
+
+	and		r0,r0,$len
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipher		$out0,$out0,v26
+	vcipher		$out1,$out1,v26
+	 vand		$tmp,$tmp,$eighty7
+	vcipher		$out2,$out2,v26
+	vcipher		$out3,$out3,v26
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out4,$out4,v26
+	vcipher		$out5,$out5,v26
+
+	add		$inp,$inp,r0		# $inp is adjusted in such
+						# way that at exit from the
+						# loop inX-in5 are loaded
+						# with last "words"
+	 vxor		$in2,$twk2,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk2,$tweak,$rndkey0
+	 vaddubm	$tweak,$tweak,$tweak
+	vcipher		$out0,$out0,v27
+	vcipher		$out1,$out1,v27
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipher		$out2,$out2,v27
+	vcipher		$out3,$out3,v27
+	 vand		$tmp,$tmp,$eighty7
+	vcipher		$out4,$out4,v27
+	vcipher		$out5,$out5,v27
+
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out0,$out0,v28
+	vcipher		$out1,$out1,v28
+	 vxor		$in3,$twk3,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk3,$tweak,$rndkey0
+	vcipher		$out2,$out2,v28
+	vcipher		$out3,$out3,v28
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipher		$out4,$out4,v28
+	vcipher		$out5,$out5,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+	 vand		$tmp,$tmp,$eighty7
+
+	vcipher		$out0,$out0,v29
+	vcipher		$out1,$out1,v29
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out2,$out2,v29
+	vcipher		$out3,$out3,v29
+	 vxor		$in4,$twk4,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk4,$tweak,$rndkey0
+	vcipher		$out4,$out4,v29
+	vcipher		$out5,$out5,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+
+	vcipher		$out0,$out0,v30
+	vcipher		$out1,$out1,v30
+	 vand		$tmp,$tmp,$eighty7
+	vcipher		$out2,$out2,v30
+	vcipher		$out3,$out3,v30
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out4,$out4,v30
+	vcipher		$out5,$out5,v30
+	 vxor		$in5,$twk5,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk5,$tweak,$rndkey0
+
+	vcipherlast	$out0,$out0,$in0
+	 lvx_u		$in0,$x00,$inp		# load next input block
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipherlast	$out1,$out1,$in1
+	 lvx_u		$in1,$x10,$inp
+	vcipherlast	$out2,$out2,$in2
+	 le?vperm	$in0,$in0,$in0,$leperm
+	 lvx_u		$in2,$x20,$inp
+	 vand		$tmp,$tmp,$eighty7
+	vcipherlast	$out3,$out3,$in3
+	 le?vperm	$in1,$in1,$in1,$leperm
+	 lvx_u		$in3,$x30,$inp
+	vcipherlast	$out4,$out4,$in4
+	 le?vperm	$in2,$in2,$in2,$leperm
+	 lvx_u		$in4,$x40,$inp
+	 vxor		$tweak,$tweak,$tmp
+	vcipherlast	$tmp,$out5,$in5		# last block might be needed
+						# in stealing mode
+	 le?vperm	$in3,$in3,$in3,$leperm
+	 lvx_u		$in5,$x50,$inp
+	 addi		$inp,$inp,0x60
+	 le?vperm	$in4,$in4,$in4,$leperm
+	 le?vperm	$in5,$in5,$in5,$leperm
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	 vxor		$out0,$in0,$twk0
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	 vxor		$out1,$in1,$twk1
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	 vxor		$out2,$in2,$twk2
+	le?vperm	$out4,$out4,$out4,$leperm
+	stvx_u		$out3,$x30,$out
+	 vxor		$out3,$in3,$twk3
+	le?vperm	$out5,$tmp,$tmp,$leperm
+	stvx_u		$out4,$x40,$out
+	 vxor		$out4,$in4,$twk4
+	le?stvx_u	$out5,$x50,$out
+	be?stvx_u	$tmp, $x50,$out
+	 vxor		$out5,$in5,$twk5
+	addi		$out,$out,0x60
+
+	mtctr		$rounds
+	beq		Loop_xts_enc6x		# did $len-=96 borrow?
+
+	addic.		$len,$len,0x60
+	beq		Lxts_enc6x_zero
+	cmpwi		$len,0x20
+	blt		Lxts_enc6x_one
+	nop
+	beq		Lxts_enc6x_two
+	cmpwi		$len,0x40
+	blt		Lxts_enc6x_three
+	nop
+	beq		Lxts_enc6x_four
+
+Lxts_enc6x_five:
+	vxor		$out0,$in1,$twk0
+	vxor		$out1,$in2,$twk1
+	vxor		$out2,$in3,$twk2
+	vxor		$out3,$in4,$twk3
+	vxor		$out4,$in5,$twk4
+
+	bl		_aesp8_xts_enc5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk5		# unused tweak
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	vxor		$tmp,$out4,$twk5	# last block prep for stealing
+	le?vperm	$out4,$out4,$out4,$leperm
+	stvx_u		$out3,$x30,$out
+	stvx_u		$out4,$x40,$out
+	addi		$out,$out,0x50
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_four:
+	vxor		$out0,$in2,$twk0
+	vxor		$out1,$in3,$twk1
+	vxor		$out2,$in4,$twk2
+	vxor		$out3,$in5,$twk3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_enc5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk4		# unused tweak
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	vxor		$tmp,$out3,$twk4	# last block prep for stealing
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	stvx_u		$out3,$x30,$out
+	addi		$out,$out,0x40
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_three:
+	vxor		$out0,$in3,$twk0
+	vxor		$out1,$in4,$twk1
+	vxor		$out2,$in5,$twk2
+	vxor		$out3,$out3,$out3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_enc5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk3		# unused tweak
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$tmp,$out2,$twk3	# last block prep for stealing
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	stvx_u		$out2,$x20,$out
+	addi		$out,$out,0x30
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_two:
+	vxor		$out0,$in4,$twk0
+	vxor		$out1,$in5,$twk1
+	vxor		$out2,$out2,$out2
+	vxor		$out3,$out3,$out3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_enc5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk2		# unused tweak
+	vxor		$tmp,$out1,$twk2	# last block prep for stealing
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	stvx_u		$out1,$x10,$out
+	addi		$out,$out,0x20
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_one:
+	vxor		$out0,$in5,$twk0
+	nop
+Loop_xts_enc1x:
+	vcipher		$out0,$out0,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vcipher		$out0,$out0,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_xts_enc1x
+
+	add		$inp,$inp,$taillen
+	cmpwi		$taillen,0
+	vcipher		$out0,$out0,v24
+
+	subi		$inp,$inp,16
+	vcipher		$out0,$out0,v25
+
+	lvsr		$inpperm,0,$taillen
+	vcipher		$out0,$out0,v26
+
+	lvx_u		$in0,0,$inp
+	vcipher		$out0,$out0,v27
+
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	vcipher		$out0,$out0,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+
+	vcipher		$out0,$out0,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$twk0,$twk0,v31
+
+	le?vperm	$in0,$in0,$in0,$leperm
+	vcipher		$out0,$out0,v30
+
+	vperm		$in0,$in0,$in0,$inpperm
+	vcipherlast	$out0,$out0,$twk0
+
+	vmr		$twk0,$twk1		# unused tweak
+	vxor		$tmp,$out0,$twk1	# last block prep for stealing
+	le?vperm	$out0,$out0,$out0,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	addi		$out,$out,0x10
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_zero:
+	cmpwi		$taillen,0
+	beq		Lxts_enc6x_done
+
+	add		$inp,$inp,$taillen
+	subi		$inp,$inp,16
+	lvx_u		$in0,0,$inp
+	lvsr		$inpperm,0,$taillen	# $in5 is no more
+	le?vperm	$in0,$in0,$in0,$leperm
+	vperm		$in0,$in0,$in0,$inpperm
+	vxor		$tmp,$tmp,$twk0
+Lxts_enc6x_steal:
+	vxor		$in0,$in0,$twk0
+	vxor		$out0,$out0,$out0
+	vspltisb	$out1,-1
+	vperm		$out0,$out0,$out1,$inpperm
+	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?
+
+	subi		r30,$out,17
+	subi		$out,$out,16
+	mtctr		$taillen
+Loop_xts_enc6x_steal:
+	lbzu		r0,1(r30)
+	stb		r0,16(r30)
+	bdnz		Loop_xts_enc6x_steal
+
+	li		$taillen,0
+	mtctr		$rounds
+	b		Loop_xts_enc1x		# one more time...
+
+.align	4
+Lxts_enc6x_done:
+	${UCMP}i	$ivp,0
+	beq		Lxts_enc6x_ret
+
+	vxor		$tweak,$twk0,$rndkey0
+	le?vperm	$tweak,$tweak,$tweak,$leperm
+	stvx_u		$tweak,0,$ivp
+
+Lxts_enc6x_ret:
+	mtlr		r11
+	li		r10,`$FRAME+15`
+	li		r11,`$FRAME+31`
+	stvx		$seven,r10,$sp		# wipe copies of round keys
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+
+	mtspr		256,$vrsave
+	lvx		v20,r10,$sp		# ABI says so
+	addi		r10,r10,32
+	lvx		v21,r11,$sp
+	addi		r11,r11,32
+	lvx		v22,r10,$sp
+	addi		r10,r10,32
+	lvx		v23,r11,$sp
+	addi		r11,r11,32
+	lvx		v24,r10,$sp
+	addi		r10,r10,32
+	lvx		v25,r11,$sp
+	addi		r11,r11,32
+	lvx		v26,r10,$sp
+	addi		r10,r10,32
+	lvx		v27,r11,$sp
+	addi		r11,r11,32
+	lvx		v28,r10,$sp
+	addi		r10,r10,32
+	lvx		v29,r11,$sp
+	addi		r11,r11,32
+	lvx		v30,r10,$sp
+	lvx		v31,r11,$sp
+	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+	blr
+	.long		0
+	.byte		0,12,0x04,1,0x80,6,6,0
+	.long		0
+
+.align	5
+_aesp8_xts_enc5x:
+	vcipher		$out0,$out0,v24
+	vcipher		$out1,$out1,v24
+	vcipher		$out2,$out2,v24
+	vcipher		$out3,$out3,v24
+	vcipher		$out4,$out4,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vcipher		$out0,$out0,v25
+	vcipher		$out1,$out1,v25
+	vcipher		$out2,$out2,v25
+	vcipher		$out3,$out3,v25
+	vcipher		$out4,$out4,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		_aesp8_xts_enc5x
+
+	add		$inp,$inp,$taillen
+	cmpwi		$taillen,0
+	vcipher		$out0,$out0,v24
+	vcipher		$out1,$out1,v24
+	vcipher		$out2,$out2,v24
+	vcipher		$out3,$out3,v24
+	vcipher		$out4,$out4,v24
+
+	subi		$inp,$inp,16
+	vcipher		$out0,$out0,v25
+	vcipher		$out1,$out1,v25
+	vcipher		$out2,$out2,v25
+	vcipher		$out3,$out3,v25
+	vcipher		$out4,$out4,v25
+	 vxor		$twk0,$twk0,v31
+
+	vcipher		$out0,$out0,v26
+	lvsr		$inpperm,r0,$taillen	# $in5 is no more
+	vcipher		$out1,$out1,v26
+	vcipher		$out2,$out2,v26
+	vcipher		$out3,$out3,v26
+	vcipher		$out4,$out4,v26
+	 vxor		$in1,$twk1,v31
+
+	vcipher		$out0,$out0,v27
+	lvx_u		$in0,0,$inp
+	vcipher		$out1,$out1,v27
+	vcipher		$out2,$out2,v27
+	vcipher		$out3,$out3,v27
+	vcipher		$out4,$out4,v27
+	 vxor		$in2,$twk2,v31
+
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	vcipher		$out0,$out0,v28
+	vcipher		$out1,$out1,v28
+	vcipher		$out2,$out2,v28
+	vcipher		$out3,$out3,v28
+	vcipher		$out4,$out4,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+	 vxor		$in3,$twk3,v31
+
+	vcipher		$out0,$out0,v29
+	le?vperm	$in0,$in0,$in0,$leperm
+	vcipher		$out1,$out1,v29
+	vcipher		$out2,$out2,v29
+	vcipher		$out3,$out3,v29
+	vcipher		$out4,$out4,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$in4,$twk4,v31
+
+	vcipher		$out0,$out0,v30
+	vperm		$in0,$in0,$in0,$inpperm
+	vcipher		$out1,$out1,v30
+	vcipher		$out2,$out2,v30
+	vcipher		$out3,$out3,v30
+	vcipher		$out4,$out4,v30
+
+	vcipherlast	$out0,$out0,$twk0
+	vcipherlast	$out1,$out1,$in1
+	vcipherlast	$out2,$out2,$in2
+	vcipherlast	$out3,$out3,$in3
+	vcipherlast	$out4,$out4,$in4
+	blr
+        .long   	0
+        .byte   	0,12,0x14,0,0,0,0,0
+
+.align	5
+_aesp8_xts_decrypt6x:
+	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+	mflr		r11
+	li		r7,`$FRAME+8*16+15`
+	li		r3,`$FRAME+8*16+31`
+	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+	stvx		v20,r7,$sp		# ABI says so
+	addi		r7,r7,32
+	stvx		v21,r3,$sp
+	addi		r3,r3,32
+	stvx		v22,r7,$sp
+	addi		r7,r7,32
+	stvx		v23,r3,$sp
+	addi		r3,r3,32
+	stvx		v24,r7,$sp
+	addi		r7,r7,32
+	stvx		v25,r3,$sp
+	addi		r3,r3,32
+	stvx		v26,r7,$sp
+	addi		r7,r7,32
+	stvx		v27,r3,$sp
+	addi		r3,r3,32
+	stvx		v28,r7,$sp
+	addi		r7,r7,32
+	stvx		v29,r3,$sp
+	addi		r3,r3,32
+	stvx		v30,r7,$sp
+	stvx		v31,r3,$sp
+	li		r0,-1
+	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
+	li		$x10,0x10
+	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	li		$x20,0x20
+	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	li		$x30,0x30
+	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	li		$x40,0x40
+	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	li		$x50,0x50
+	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	li		$x60,0x60
+	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	li		$x70,0x70
+	mtspr		256,r0
+
+	subi		$rounds,$rounds,3	# -4 in total
+
+	lvx		$rndkey0,$x00,$key1	# load key schedule
+	lvx		v30,$x10,$key1
+	addi		$key1,$key1,0x20
+	lvx		v31,$x00,$key1
+	?vperm		$rndkey0,$rndkey0,v30,$keyperm
+	addi		$key_,$sp,$FRAME+15
+	mtctr		$rounds
+
+Load_xts_dec_key:
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v30,$x10,$key1
+	addi		$key1,$key1,0x20
+	stvx		v24,$x00,$key_		# off-load round[1]
+	?vperm		v25,v31,v30,$keyperm
+	lvx		v31,$x00,$key1
+	stvx		v25,$x10,$key_		# off-load round[2]
+	addi		$key_,$key_,0x20
+	bdnz		Load_xts_dec_key
+
+	lvx		v26,$x10,$key1
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v27,$x20,$key1
+	stvx		v24,$x00,$key_		# off-load round[3]
+	?vperm		v25,v31,v26,$keyperm
+	lvx		v28,$x30,$key1
+	stvx		v25,$x10,$key_		# off-load round[4]
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	?vperm		v26,v26,v27,$keyperm
+	lvx		v29,$x40,$key1
+	?vperm		v27,v27,v28,$keyperm
+	lvx		v30,$x50,$key1
+	?vperm		v28,v28,v29,$keyperm
+	lvx		v31,$x60,$key1
+	?vperm		v29,v29,v30,$keyperm
+	lvx		$twk5,$x70,$key1	# borrow $twk5
+	?vperm		v30,v30,v31,$keyperm
+	lvx		v24,$x00,$key_		# pre-load round[1]
+	?vperm		v31,v31,$twk5,$keyperm
+	lvx		v25,$x10,$key_		# pre-load round[2]
+
+	 vperm		$in0,$inout,$inptail,$inpperm
+	 subi		$inp,$inp,31		# undo "caller"
+	vxor		$twk0,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out0,$in0,$twk0
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in1,$x10,$inp
+	vxor		$twk1,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in1,$in1,$in1,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out1,$in1,$twk1
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in2,$x20,$inp
+	 andi.		$taillen,$len,15
+	vxor		$twk2,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in2,$in2,$in2,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out2,$in2,$twk2
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in3,$x30,$inp
+	 sub		$len,$len,$taillen
+	vxor		$twk3,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in3,$in3,$in3,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out3,$in3,$twk3
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in4,$x40,$inp
+	 subi		$len,$len,0x60
+	vxor		$twk4,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in4,$in4,$in4,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out4,$in4,$twk4
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in5,$x50,$inp
+	 addi		$inp,$inp,0x60
+	vxor		$twk5,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in5,$in5,$in5,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out5,$in5,$twk5
+	vxor		$tweak,$tweak,$tmp
+
+	vxor		v31,v31,$rndkey0
+	mtctr		$rounds
+	b		Loop_xts_dec6x
+
+.align	5
+Loop_xts_dec6x:
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+	vncipher	$out5,$out5,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	vncipher	$out5,$out5,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_xts_dec6x
+
+	subic		$len,$len,96		# $len-=96
+	 vxor		$in0,$twk0,v31		# xor with last round key
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk0,$tweak,$rndkey0
+	 vaddubm	$tweak,$tweak,$tweak
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipher	$out4,$out4,v24
+	vncipher	$out5,$out5,v24
+
+	subfe.		r0,r0,r0		# borrow?-1:0
+	 vand		$tmp,$tmp,$eighty7
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	 vxor		$in1,$twk1,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk1,$tweak,$rndkey0
+	vncipher	$out4,$out4,v25
+	vncipher	$out5,$out5,v25
+
+	and		r0,r0,$len
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipher	$out0,$out0,v26
+	vncipher	$out1,$out1,v26
+	 vand		$tmp,$tmp,$eighty7
+	vncipher	$out2,$out2,v26
+	vncipher	$out3,$out3,v26
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out4,$out4,v26
+	vncipher	$out5,$out5,v26
+
+	add		$inp,$inp,r0		# $inp is adjusted in such
+						# way that at exit from the
+						# loop inX-in5 are loaded
+						# with last "words"
+	 vxor		$in2,$twk2,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk2,$tweak,$rndkey0
+	 vaddubm	$tweak,$tweak,$tweak
+	vncipher	$out0,$out0,v27
+	vncipher	$out1,$out1,v27
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipher	$out2,$out2,v27
+	vncipher	$out3,$out3,v27
+	 vand		$tmp,$tmp,$eighty7
+	vncipher	$out4,$out4,v27
+	vncipher	$out5,$out5,v27
+
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out0,$out0,v28
+	vncipher	$out1,$out1,v28
+	 vxor		$in3,$twk3,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk3,$tweak,$rndkey0
+	vncipher	$out2,$out2,v28
+	vncipher	$out3,$out3,v28
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipher	$out4,$out4,v28
+	vncipher	$out5,$out5,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+	 vand		$tmp,$tmp,$eighty7
+
+	vncipher	$out0,$out0,v29
+	vncipher	$out1,$out1,v29
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out2,$out2,v29
+	vncipher	$out3,$out3,v29
+	 vxor		$in4,$twk4,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk4,$tweak,$rndkey0
+	vncipher	$out4,$out4,v29
+	vncipher	$out5,$out5,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+
+	vncipher	$out0,$out0,v30
+	vncipher	$out1,$out1,v30
+	 vand		$tmp,$tmp,$eighty7
+	vncipher	$out2,$out2,v30
+	vncipher	$out3,$out3,v30
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out4,$out4,v30
+	vncipher	$out5,$out5,v30
+	 vxor		$in5,$twk5,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk5,$tweak,$rndkey0
+
+	vncipherlast	$out0,$out0,$in0
+	 lvx_u		$in0,$x00,$inp		# load next input block
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipherlast	$out1,$out1,$in1
+	 lvx_u		$in1,$x10,$inp
+	vncipherlast	$out2,$out2,$in2
+	 le?vperm	$in0,$in0,$in0,$leperm
+	 lvx_u		$in2,$x20,$inp
+	 vand		$tmp,$tmp,$eighty7
+	vncipherlast	$out3,$out3,$in3
+	 le?vperm	$in1,$in1,$in1,$leperm
+	 lvx_u		$in3,$x30,$inp
+	vncipherlast	$out4,$out4,$in4
+	 le?vperm	$in2,$in2,$in2,$leperm
+	 lvx_u		$in4,$x40,$inp
+	 vxor		$tweak,$tweak,$tmp
+	vncipherlast	$out5,$out5,$in5
+	 le?vperm	$in3,$in3,$in3,$leperm
+	 lvx_u		$in5,$x50,$inp
+	 addi		$inp,$inp,0x60
+	 le?vperm	$in4,$in4,$in4,$leperm
+	 le?vperm	$in5,$in5,$in5,$leperm
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	 vxor		$out0,$in0,$twk0
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	 vxor		$out1,$in1,$twk1
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	 vxor		$out2,$in2,$twk2
+	le?vperm	$out4,$out4,$out4,$leperm
+	stvx_u		$out3,$x30,$out
+	 vxor		$out3,$in3,$twk3
+	le?vperm	$out5,$out5,$out5,$leperm
+	stvx_u		$out4,$x40,$out
+	 vxor		$out4,$in4,$twk4
+	stvx_u		$out5,$x50,$out
+	 vxor		$out5,$in5,$twk5
+	addi		$out,$out,0x60
+
+	mtctr		$rounds
+	beq		Loop_xts_dec6x		# did $len-=96 borrow?
+
+	addic.		$len,$len,0x60
+	beq		Lxts_dec6x_zero
+	cmpwi		$len,0x20
+	blt		Lxts_dec6x_one
+	nop
+	beq		Lxts_dec6x_two
+	cmpwi		$len,0x40
+	blt		Lxts_dec6x_three
+	nop
+	beq		Lxts_dec6x_four
+
+Lxts_dec6x_five:
+	vxor		$out0,$in1,$twk0
+	vxor		$out1,$in2,$twk1
+	vxor		$out2,$in3,$twk2
+	vxor		$out3,$in4,$twk3
+	vxor		$out4,$in5,$twk4
+
+	bl		_aesp8_xts_dec5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk5		# unused tweak
+	vxor		$twk1,$tweak,$rndkey0
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$out0,$in0,$twk1
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	le?vperm	$out4,$out4,$out4,$leperm
+	stvx_u		$out3,$x30,$out
+	stvx_u		$out4,$x40,$out
+	addi		$out,$out,0x50
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_four:
+	vxor		$out0,$in2,$twk0
+	vxor		$out1,$in3,$twk1
+	vxor		$out2,$in4,$twk2
+	vxor		$out3,$in5,$twk3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_dec5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk4		# unused tweak
+	vmr		$twk1,$twk5
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$out0,$in0,$twk5
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	stvx_u		$out3,$x30,$out
+	addi		$out,$out,0x40
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_three:
+	vxor		$out0,$in3,$twk0
+	vxor		$out1,$in4,$twk1
+	vxor		$out2,$in5,$twk2
+	vxor		$out3,$out3,$out3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_dec5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk3		# unused tweak
+	vmr		$twk1,$twk4
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$out0,$in0,$twk4
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	stvx_u		$out2,$x20,$out
+	addi		$out,$out,0x30
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_two:
+	vxor		$out0,$in4,$twk0
+	vxor		$out1,$in5,$twk1
+	vxor		$out2,$out2,$out2
+	vxor		$out3,$out3,$out3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_dec5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk2		# unused tweak
+	vmr		$twk1,$twk3
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$out0,$in0,$twk3
+	stvx_u		$out1,$x10,$out
+	addi		$out,$out,0x20
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_one:
+	vxor		$out0,$in5,$twk0
+	nop
+Loop_xts_dec1x:
+	vncipher	$out0,$out0,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_xts_dec1x
+
+	subi		r0,$taillen,1
+	vncipher	$out0,$out0,v24
+
+	andi.		r0,r0,16
+	cmpwi		$taillen,0
+	vncipher	$out0,$out0,v25
+
+	sub		$inp,$inp,r0
+	vncipher	$out0,$out0,v26
+
+	lvx_u		$in0,0,$inp
+	vncipher	$out0,$out0,v27
+
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	vncipher	$out0,$out0,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+
+	vncipher	$out0,$out0,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$twk0,$twk0,v31
+
+	le?vperm	$in0,$in0,$in0,$leperm
+	vncipher	$out0,$out0,v30
+
+	mtctr		$rounds
+	vncipherlast	$out0,$out0,$twk0
+
+	vmr		$twk0,$twk1		# unused tweak
+	vmr		$twk1,$twk2
+	le?vperm	$out0,$out0,$out0,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	addi		$out,$out,0x10
+	vxor		$out0,$in0,$twk2
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_zero:
+	cmpwi		$taillen,0
+	beq		Lxts_dec6x_done
+
+	lvx_u		$in0,0,$inp
+	le?vperm	$in0,$in0,$in0,$leperm
+	vxor		$out0,$in0,$twk1
+Lxts_dec6x_steal:
+	vncipher	$out0,$out0,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Lxts_dec6x_steal
+
+	add		$inp,$inp,$taillen
+	vncipher	$out0,$out0,v24
+
+	cmpwi		$taillen,0
+	vncipher	$out0,$out0,v25
+
+	lvx_u		$in0,0,$inp
+	vncipher	$out0,$out0,v26
+
+	lvsr		$inpperm,0,$taillen	# $in5 is no more
+	vncipher	$out0,$out0,v27
+
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	vncipher	$out0,$out0,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+
+	vncipher	$out0,$out0,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$twk1,$twk1,v31
+
+	le?vperm	$in0,$in0,$in0,$leperm
+	vncipher	$out0,$out0,v30
+
+	vperm		$in0,$in0,$in0,$inpperm
+	vncipherlast	$tmp,$out0,$twk1
+
+	le?vperm	$out0,$tmp,$tmp,$leperm
+	le?stvx_u	$out0,0,$out
+	be?stvx_u	$tmp,0,$out
+
+	vxor		$out0,$out0,$out0
+	vspltisb	$out1,-1
+	vperm		$out0,$out0,$out1,$inpperm
+	vsel		$out0,$in0,$tmp,$out0
+	vxor		$out0,$out0,$twk0
+
+	subi		r30,$out,1
+	mtctr		$taillen
+Loop_xts_dec6x_steal:
+	lbzu		r0,1(r30)
+	stb		r0,16(r30)
+	bdnz		Loop_xts_dec6x_steal
+
+	li		$taillen,0
+	mtctr		$rounds
+	b		Loop_xts_dec1x		# one more time...
+
+.align	4
+Lxts_dec6x_done:
+	${UCMP}i	$ivp,0
+	beq		Lxts_dec6x_ret
+
+	vxor		$tweak,$twk0,$rndkey0
+	le?vperm	$tweak,$tweak,$tweak,$leperm
+	stvx_u		$tweak,0,$ivp
+
+Lxts_dec6x_ret:
+	mtlr		r11
+	li		r10,`$FRAME+15`
+	li		r11,`$FRAME+31`
+	stvx		$seven,r10,$sp		# wipe copies of round keys
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+
+	mtspr		256,$vrsave
+	lvx		v20,r10,$sp		# ABI says so
+	addi		r10,r10,32
+	lvx		v21,r11,$sp
+	addi		r11,r11,32
+	lvx		v22,r10,$sp
+	addi		r10,r10,32
+	lvx		v23,r11,$sp
+	addi		r11,r11,32
+	lvx		v24,r10,$sp
+	addi		r10,r10,32
+	lvx		v25,r11,$sp
+	addi		r11,r11,32
+	lvx		v26,r10,$sp
+	addi		r10,r10,32
+	lvx		v27,r11,$sp
+	addi		r11,r11,32
+	lvx		v28,r10,$sp
+	addi		r10,r10,32
+	lvx		v29,r11,$sp
+	addi		r11,r11,32
+	lvx		v30,r10,$sp
+	lvx		v31,r11,$sp
+	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+	blr
+	.long		0
+	.byte		0,12,0x04,1,0x80,6,6,0
+	.long		0
+
+.align	5
+_aesp8_xts_dec5x:
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		_aesp8_xts_dec5x
+
+	subi		r0,$taillen,1
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+
+	andi.		r0,r0,16
+	cmpwi		$taillen,0
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	 vxor		$twk0,$twk0,v31
+
+	sub		$inp,$inp,r0
+	vncipher	$out0,$out0,v26
+	vncipher	$out1,$out1,v26
+	vncipher	$out2,$out2,v26
+	vncipher	$out3,$out3,v26
+	vncipher	$out4,$out4,v26
+	 vxor		$in1,$twk1,v31
+
+	vncipher	$out0,$out0,v27
+	lvx_u		$in0,0,$inp
+	vncipher	$out1,$out1,v27
+	vncipher	$out2,$out2,v27
+	vncipher	$out3,$out3,v27
+	vncipher	$out4,$out4,v27
+	 vxor		$in2,$twk2,v31
+
+	addi		$key_,$sp,$FRAME+15	# rewind $key_
+	vncipher	$out0,$out0,v28
+	vncipher	$out1,$out1,v28
+	vncipher	$out2,$out2,v28
+	vncipher	$out3,$out3,v28
+	vncipher	$out4,$out4,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+	 vxor		$in3,$twk3,v31
+
+	vncipher	$out0,$out0,v29
+	le?vperm	$in0,$in0,$in0,$leperm
+	vncipher	$out1,$out1,v29
+	vncipher	$out2,$out2,v29
+	vncipher	$out3,$out3,v29
+	vncipher	$out4,$out4,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$in4,$twk4,v31
+
+	vncipher	$out0,$out0,v30
+	vncipher	$out1,$out1,v30
+	vncipher	$out2,$out2,v30
+	vncipher	$out3,$out3,v30
+	vncipher	$out4,$out4,v30
+
+	vncipherlast	$out0,$out0,$twk0
+	vncipherlast	$out1,$out1,$in1
+	vncipherlast	$out2,$out2,$in2
+	vncipherlast	$out3,$out3,$in3
+	vncipherlast	$out4,$out4,$in4
+	mtctr		$rounds
+	blr
+        .long   	0
+        .byte   	0,12,0x14,0,0,0,0,0
+___
+}}	}}}
+
 my $consts=1;
 foreach(split("\n",$code)) {
         s/\`([^\`]*)\`/eval($1)/geo;
@@ -1898,7 +3757,7 @@ foreach(split("\n",$code)) {
 	    if ($flavour =~ /le$/o) {
 		SWITCH: for($conv)  {
 		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
-		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
+		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
 		}
 	    }
 
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
index 9f4994cabcc7..b18e67d0e065 100644
--- a/drivers/crypto/vmx/ppc-xlate.pl
+++ b/drivers/crypto/vmx/ppc-xlate.pl
@@ -141,7 +141,7 @@ my $vmr = sub {
 
 # Some ABIs specify vrsave, special-purpose register #256, as reserved
 # for system use.
-my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $no_vrsave = ($flavour =~ /linux-ppc64le/);
 my $mtspr = sub {
     my ($f,$idx,$ra) = @_;
     if ($idx == 256 && $no_vrsave) {
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c
index e163d5770438..f688c32fbcc7 100644
--- a/drivers/crypto/vmx/vmx.c
+++ b/drivers/crypto/vmx/vmx.c
@@ -31,10 +31,12 @@ extern struct shash_alg p8_ghash_alg;
 extern struct crypto_alg p8_aes_alg;
 extern struct crypto_alg p8_aes_cbc_alg;
 extern struct crypto_alg p8_aes_ctr_alg;
+extern struct crypto_alg p8_aes_xts_alg;
 static struct crypto_alg *algs[] = {
 	&p8_aes_alg,
 	&p8_aes_cbc_alg,
 	&p8_aes_ctr_alg,
+	&p8_aes_xts_alg,
 	NULL,
 };
 
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 78dac0e9da11..a5be56ec57f2 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -75,7 +75,7 @@ config DEVFREQ_GOV_PASSIVE
 comment "DEVFREQ Drivers"
 
 config ARM_EXYNOS_BUS_DEVFREQ
-	bool "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
+	tristate "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
 	depends on ARCH_EXYNOS
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	select DEVFREQ_GOV_PASSIVE
diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c
index 39b048eda2ce..9aea2c7ecbe6 100644
--- a/drivers/devfreq/devfreq-event.c
+++ b/drivers/devfreq/devfreq-event.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/of.h>
@@ -481,13 +481,3 @@ static int __init devfreq_event_init(void)
 	return 0;
 }
 subsys_initcall(devfreq_event_init);
-
-static void __exit devfreq_event_exit(void)
-{
-	class_destroy(devfreq_event_class);
-}
-module_exit(devfreq_event_exit);
-
-MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
-MODULE_DESCRIPTION("DEVFREQ-Event class support");
-MODULE_LICENSE("GPL");
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 1d6c803804d5..478006b7764a 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -15,7 +15,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/pm_opp.h>
@@ -268,8 +268,11 @@ int update_devfreq(struct devfreq *devfreq)
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE);
 
 	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
-	if (err)
+	if (err) {
+		freqs.new = cur_freq;
+		devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
 		return err;
+	}
 
 	freqs.new = freq;
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
@@ -552,6 +555,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->profile = profile;
 	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
+	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
@@ -561,23 +565,22 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		mutex_lock(&devfreq->lock);
 	}
 
-	devfreq->trans_table =	devm_kzalloc(dev, sizeof(unsigned int) *
-						devfreq->profile->max_state *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned long) *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->last_stat_updated = jiffies;
-
 	dev_set_name(&devfreq->dev, "%s", dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
-		put_device(&devfreq->dev);
 		mutex_unlock(&devfreq->lock);
 		goto err_out;
 	}
 
+	devfreq->trans_table =	devm_kzalloc(&devfreq->dev, sizeof(unsigned int) *
+						devfreq->profile->max_state *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->time_in_state = devm_kzalloc(&devfreq->dev, sizeof(unsigned long) *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->last_stat_updated = jiffies;
+
 	srcu_init_notifier_head(&devfreq->transition_notifier_list);
 
 	mutex_unlock(&devfreq->lock);
@@ -603,7 +606,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 err_init:
 	list_del(&devfreq->node);
 	device_unregister(&devfreq->dev);
-	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
@@ -621,7 +623,6 @@ int devfreq_remove_device(struct devfreq *devfreq)
 		return -EINVAL;
 
 	device_unregister(&devfreq->dev);
-	put_device(&devfreq->dev);
 
 	return 0;
 }
@@ -706,10 +707,12 @@ struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index)
 		if (devfreq->dev.parent
 			&& devfreq->dev.parent->of_node == node) {
 			mutex_unlock(&devfreq_list_lock);
+			of_node_put(node);
 			return devfreq;
 		}
 	}
 	mutex_unlock(&devfreq_list_lock);
+	of_node_put(node);
 
 	return ERR_PTR(-EPROBE_DEFER);
 }
@@ -1198,13 +1201,6 @@ static int __init devfreq_init(void)
 }
 subsys_initcall(devfreq_init);
 
-static void __exit devfreq_exit(void)
-{
-	class_destroy(devfreq_class);
-	destroy_workqueue(devfreq_wq);
-}
-module_exit(devfreq_exit);
-
 /*
  * The followings are helper functions for devfreq user device drivers with
  * OPP framework.
@@ -1470,7 +1466,3 @@ void devm_devfreq_unregister_notifier(struct device *dev,
 			       devm_devfreq_dev_match, devfreq));
 }
 EXPORT_SYMBOL(devm_devfreq_unregister_notifier);
-
-MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
-MODULE_DESCRIPTION("devfreq class support");
-MODULE_LICENSE("GPL");
diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
index 1e8b4f469f38..eb6f74a2b6b9 100644
--- a/drivers/devfreq/event/Kconfig
+++ b/drivers/devfreq/event/Kconfig
@@ -14,7 +14,7 @@ menuconfig PM_DEVFREQ_EVENT
 if PM_DEVFREQ_EVENT
 
 config DEVFREQ_EVENT_EXYNOS_NOCP
-	bool "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
+	tristate "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
 	depends on ARCH_EXYNOS
 	select PM_OPP
 	help
@@ -22,7 +22,7 @@ config DEVFREQ_EVENT_EXYNOS_NOCP
 	  (Network on Chip) Probe counters to measure the bandwidth of AXI bus.
 
 config DEVFREQ_EVENT_EXYNOS_PPMU
-	bool "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
+	tristate "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
 	depends on ARCH_EXYNOS
 	select PM_OPP
 	help
diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
index 6b6a5f310486..a5841403bde8 100644
--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -220,9 +220,6 @@ static int exynos_nocp_parse_dt(struct platform_device *pdev,
 
 	/* Maps the memory mapped IO to control nocp register */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
-
 	base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index f312485f1451..845bf25fb9fb 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -482,7 +482,8 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
 	if (!info->edev) {
 		dev_err(&pdev->dev,
 			"failed to allocate memory devfreq-event devices\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err;
 	}
 	edev = info->edev;
 	platform_set_drvdata(pdev, info);
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index 2363d0a189b7..29866f7e6d7e 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -383,7 +383,7 @@ err_clk:
 static int exynos_bus_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *np = dev->of_node;
+	struct device_node *np = dev->of_node, *node;
 	struct devfreq_dev_profile *profile;
 	struct devfreq_simple_ondemand_data *ondemand_data;
 	struct devfreq_passive_data *passive_data;
@@ -407,7 +407,7 @@ static int exynos_bus_probe(struct platform_device *pdev)
 	/* Parse the device-tree to get the resource information */
 	ret = exynos_bus_parse_of(np, bus);
 	if (ret < 0)
-		goto err;
+		return ret;
 
 	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
 	if (!profile) {
@@ -415,10 +415,13 @@ static int exynos_bus_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	if (of_parse_phandle(dev->of_node, "devfreq", 0))
+	node = of_parse_phandle(dev->of_node, "devfreq", 0);
+	if (node) {
+		of_node_put(node);
 		goto passive;
-	else
+	} else {
 		ret = exynos_bus_parent_parse_of(np, bus);
+	}
 
 	if (ret < 0)
 		goto err;
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index f8c5cd53307c..c5f21efd6090 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -126,28 +126,33 @@ static void hsu_dma_start_transfer(struct hsu_dma_chan *hsuc)
 	hsu_dma_start_channel(hsuc);
 }
 
-static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc)
-{
-	unsigned long flags;
-	u32 sr;
-
-	spin_lock_irqsave(&hsuc->vchan.lock, flags);
-	sr = hsu_chan_readl(hsuc, HSU_CH_SR);
-	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
-
-	return sr & ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY);
-}
-
-irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr)
+/*
+ *      hsu_dma_get_status() - get DMA channel status
+ *      @chip: HSUART DMA chip
+ *      @nr: DMA channel number
+ *      @status: pointer for DMA Channel Status Register value
+ *
+ *      Description:
+ *      The function reads and clears the DMA Channel Status Register, checks
+ *      if it was a timeout interrupt and returns a corresponding value.
+ *
+ *      Caller should provide a valid pointer for the DMA Channel Status
+ *      Register value that will be returned in @status.
+ *
+ *      Return:
+ *      1 for DMA timeout status, 0 for other DMA status, or error code for
+ *      invalid parameters or no interrupt pending.
+ */
+int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr,
+		       u32 *status)
 {
 	struct hsu_dma_chan *hsuc;
-	struct hsu_dma_desc *desc;
 	unsigned long flags;
 	u32 sr;
 
 	/* Sanity check */
 	if (nr >= chip->hsu->nr_channels)
-		return IRQ_NONE;
+		return -EINVAL;
 
 	hsuc = &chip->hsu->chan[nr];
 
@@ -155,22 +160,65 @@ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr)
 	 * No matter what situation, need read clear the IRQ status
 	 * There is a bug, see Errata 5, HSD 2900918
 	 */
-	sr = hsu_dma_chan_get_sr(hsuc);
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
+	sr = hsu_chan_readl(hsuc, HSU_CH_SR);
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+	/* Check if any interrupt is pending */
+	sr &= ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY);
 	if (!sr)
-		return IRQ_NONE;
+		return -EIO;
 
 	/* Timeout IRQ, need wait some time, see Errata 2 */
 	if (sr & HSU_CH_SR_DESCTO_ANY)
 		udelay(2);
 
+	/*
+	 * At this point, at least one of Descriptor Time Out, Channel Error
+	 * or Descriptor Done bits must be set. Clear the Descriptor Time Out
+	 * bits and if sr is still non-zero, it must be channel error or
+	 * descriptor done which are higher priority than timeout and handled
+	 * in hsu_dma_do_irq(). Else, it must be a timeout.
+	 */
 	sr &= ~HSU_CH_SR_DESCTO_ANY;
-	if (!sr)
-		return IRQ_HANDLED;
+
+	*status = sr;
+
+	return sr ? 0 : 1;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_get_status);
+
+/*
+ *      hsu_dma_do_irq() - DMA interrupt handler
+ *      @chip: HSUART DMA chip
+ *      @nr: DMA channel number
+ *      @status: Channel Status Register value
+ *
+ *      Description:
+ *      This function handles Channel Error and Descriptor Done interrupts.
+ *      This function should be called after determining that the DMA interrupt
+ *      is not a normal timeout interrupt, ie. hsu_dma_get_status() returned 0.
+ *
+ *      Return:
+ *      IRQ_NONE for invalid channel number, IRQ_HANDLED otherwise.
+ */
+irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr,
+			   u32 status)
+{
+	struct hsu_dma_chan *hsuc;
+	struct hsu_dma_desc *desc;
+	unsigned long flags;
+
+	/* Sanity check */
+	if (nr >= chip->hsu->nr_channels)
+		return IRQ_NONE;
+
+	hsuc = &chip->hsu->chan[nr];
 
 	spin_lock_irqsave(&hsuc->vchan.lock, flags);
 	desc = hsuc->desc;
 	if (desc) {
-		if (sr & HSU_CH_SR_CHE) {
+		if (status & HSU_CH_SR_CHE) {
 			desc->status = DMA_ERROR;
 		} else if (desc->active < desc->nents) {
 			hsu_dma_start_channel(hsuc);
@@ -184,7 +232,7 @@ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr)
 
 	return IRQ_HANDLED;
 }
-EXPORT_SYMBOL_GPL(hsu_dma_irq);
+EXPORT_SYMBOL_GPL(hsu_dma_do_irq);
 
 static struct hsu_dma_desc *hsu_dma_alloc_desc(unsigned int nents)
 {
diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c
index e2db76bd56d8..9916058531d9 100644
--- a/drivers/dma/hsu/pci.c
+++ b/drivers/dma/hsu/pci.c
@@ -27,13 +27,20 @@ static irqreturn_t hsu_pci_irq(int irq, void *dev)
 {
 	struct hsu_dma_chip *chip = dev;
 	u32 dmaisr;
+	u32 status;
 	unsigned short i;
 	irqreturn_t ret = IRQ_NONE;
+	int err;
 
 	dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
 	for (i = 0; i < chip->hsu->nr_channels; i++) {
-		if (dmaisr & 0x1)
-			ret |= hsu_dma_irq(chip, i);
+		if (dmaisr & 0x1) {
+			err = hsu_dma_get_status(chip, i, &status);
+			if (err > 0)
+				ret |= IRQ_HANDLED;
+			else if (err == 0)
+				ret |= hsu_dma_do_irq(chip, i, status);
+		}
 		dmaisr >>= 1;
 	}
 
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 6ca7474baf4a..d0c1dab9b435 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -391,6 +391,13 @@ config EDAC_ALTERA_OCRAM
 	  Support for error detection and correction on the
 	  Altera On-Chip RAM Memory for Altera SoCs.
 
+config EDAC_ALTERA_ETHERNET
+	bool "Altera Ethernet FIFO ECC"
+	depends on EDAC_ALTERA=y
+	help
+	  Support for error detection and correction on the
+	  Altera Ethernet FIFO Memory for Altera SoCs.
+
 config EDAC_SYNOPSYS
 	tristate "Synopsys DDR Memory Controller"
 	depends on EDAC_MM_EDAC && ARCH_ZYNQ
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 5b4d223d6d68..2398d0701f5b 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -19,12 +19,15 @@
 
 #include <asm/cacheflush.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 #include <linux/edac.h>
 #include <linux/genalloc.h>
 #include <linux/interrupt.h>
+#include <linux/irqchip/chained_irq.h>
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
 #include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -548,10 +551,10 @@ module_platform_driver(altr_edac_driver);
  * trigger testing are different for each memory.
  */
 
-const struct edac_device_prv_data ocramecc_data;
-const struct edac_device_prv_data l2ecc_data;
-const struct edac_device_prv_data a10_ocramecc_data;
-const struct edac_device_prv_data a10_l2ecc_data;
+static const struct edac_device_prv_data ocramecc_data;
+static const struct edac_device_prv_data l2ecc_data;
+static const struct edac_device_prv_data a10_ocramecc_data;
+static const struct edac_device_prv_data a10_l2ecc_data;
 
 static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
 {
@@ -686,11 +689,9 @@ static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
 static const struct of_device_id altr_edac_device_of_match[] = {
 #ifdef CONFIG_EDAC_ALTERA_L2C
 	{ .compatible = "altr,socfpga-l2-ecc", .data = &l2ecc_data },
-	{ .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data },
 #endif
 #ifdef CONFIG_EDAC_ALTERA_OCRAM
 	{ .compatible = "altr,socfpga-ocram-ecc", .data = &ocramecc_data },
-	{ .compatible = "altr,socfpga-a10-ocram-ecc", .data = &a10_ocramecc_data },
 #endif
 	{},
 };
@@ -825,16 +826,16 @@ static struct platform_driver altr_edac_device_driver = {
 };
 module_platform_driver(altr_edac_device_driver);
 
-/*********************** OCRAM EDAC Device Functions *********************/
+/******************* Arria10 Device ECC Shared Functions *****************/
 
-#ifdef CONFIG_EDAC_ALTERA_OCRAM
 /*
  *  Test for memory's ECC dependencies upon entry because platform specific
  *  startup should have initialized the memory and enabled the ECC.
  *  Can't turn on ECC here because accessing un-initialized memory will
  *  cause CE/UE errors possibly causing an ABORT.
  */
-static int altr_check_ecc_deps(struct altr_edac_device_dev *device)
+static int __maybe_unused
+altr_check_ecc_deps(struct altr_edac_device_dev *device)
 {
 	void __iomem  *base = device->base;
 	const struct edac_device_prv_data *prv = device->data;
@@ -848,6 +849,227 @@ static int altr_check_ecc_deps(struct altr_edac_device_dev *device)
 	return -ENODEV;
 }
 
+static irqreturn_t __maybe_unused altr_edac_a10_ecc_irq(int irq, void *dev_id)
+{
+	struct altr_edac_device_dev *dci = dev_id;
+	void __iomem  *base = dci->base;
+
+	if (irq == dci->sb_irq) {
+		writel(ALTR_A10_ECC_SERRPENA,
+		       base + ALTR_A10_ECC_INTSTAT_OFST);
+		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+
+		return IRQ_HANDLED;
+	} else if (irq == dci->db_irq) {
+		writel(ALTR_A10_ECC_DERRPENA,
+		       base + ALTR_A10_ECC_INTSTAT_OFST);
+		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+		if (dci->data->panic)
+			panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+
+		return IRQ_HANDLED;
+	}
+
+	WARN_ON(1);
+
+	return IRQ_NONE;
+}
+
+/******************* Arria10 Memory Buffer Functions *********************/
+
+static inline int a10_get_irq_mask(struct device_node *np)
+{
+	int irq;
+	const u32 *handle = of_get_property(np, "interrupts", NULL);
+
+	if (!handle)
+		return -ENODEV;
+	irq = be32_to_cpup(handle);
+	return irq;
+}
+
+static inline void ecc_set_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	value |= bit_mask;
+	writel(value, ioaddr);
+}
+
+static inline void ecc_clear_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	value &= ~bit_mask;
+	writel(value, ioaddr);
+}
+
+static inline int ecc_test_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	return (value & bit_mask) ? 1 : 0;
+}
+
+/*
+ * This function uses the memory initialization block in the Arria10 ECC
+ * controller to initialize/clear the entire memory data and ECC data.
+ */
+static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port)
+{
+	int limit = ALTR_A10_ECC_INIT_WATCHDOG_10US;
+	u32 init_mask, stat_mask, clear_mask;
+	int ret = 0;
+
+	if (port) {
+		init_mask = ALTR_A10_ECC_INITB;
+		stat_mask = ALTR_A10_ECC_INITCOMPLETEB;
+		clear_mask = ALTR_A10_ECC_ERRPENB_MASK;
+	} else {
+		init_mask = ALTR_A10_ECC_INITA;
+		stat_mask = ALTR_A10_ECC_INITCOMPLETEA;
+		clear_mask = ALTR_A10_ECC_ERRPENA_MASK;
+	}
+
+	ecc_set_bits(init_mask, (ioaddr + ALTR_A10_ECC_CTRL_OFST));
+	while (limit--) {
+		if (ecc_test_bits(stat_mask,
+				  (ioaddr + ALTR_A10_ECC_INITSTAT_OFST)))
+			break;
+		udelay(1);
+	}
+	if (limit < 0)
+		ret = -EBUSY;
+
+	/* Clear any pending ECC interrupts */
+	writel(clear_mask, (ioaddr + ALTR_A10_ECC_INTSTAT_OFST));
+
+	return ret;
+}
+
+static __init int __maybe_unused
+altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
+			u32 ecc_ctrl_en_mask, bool dual_port)
+{
+	int ret = 0;
+	void __iomem *ecc_block_base;
+	struct regmap *ecc_mgr_map;
+	char *ecc_name;
+	struct device_node *np_eccmgr;
+
+	ecc_name = (char *)np->name;
+
+	/* Get the ECC Manager - parent of the device EDACs */
+	np_eccmgr = of_get_parent(np);
+	ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr,
+						      "altr,sysmgr-syscon");
+	of_node_put(np_eccmgr);
+	if (IS_ERR(ecc_mgr_map)) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Unable to get syscon altr,sysmgr-syscon\n");
+		return -ENODEV;
+	}
+
+	/* Map the ECC Block */
+	ecc_block_base = of_iomap(np, 0);
+	if (!ecc_block_base) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Unable to map %s ECC block\n", ecc_name);
+		return -ENODEV;
+	}
+
+	/* Disable ECC */
+	regmap_write(ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, irq_mask);
+	writel(ALTR_A10_ECC_SERRINTEN,
+	       (ecc_block_base + ALTR_A10_ECC_ERRINTENR_OFST));
+	ecc_clear_bits(ecc_ctrl_en_mask,
+		       (ecc_block_base + ALTR_A10_ECC_CTRL_OFST));
+	/* Ensure all writes complete */
+	wmb();
+	/* Use HW initialization block to initialize memory for ECC */
+	ret = altr_init_memory_port(ecc_block_base, 0);
+	if (ret) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "ECC: cannot init %s PORTA memory\n", ecc_name);
+		goto out;
+	}
+
+	if (dual_port) {
+		ret = altr_init_memory_port(ecc_block_base, 1);
+		if (ret) {
+			edac_printk(KERN_ERR, EDAC_DEVICE,
+				    "ECC: cannot init %s PORTB memory\n",
+				    ecc_name);
+			goto out;
+		}
+	}
+
+	/* Interrupt mode set to every SBERR */
+	regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST,
+		     ALTR_A10_ECC_INTMODE);
+	/* Enable ECC */
+	ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base +
+					ALTR_A10_ECC_CTRL_OFST));
+	writel(ALTR_A10_ECC_SERRINTEN,
+	       (ecc_block_base + ALTR_A10_ECC_ERRINTENS_OFST));
+	regmap_write(ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_CLR_OFST, irq_mask);
+	/* Ensure all writes complete */
+	wmb();
+out:
+	iounmap(ecc_block_base);
+	return ret;
+}
+
+static int validate_parent_available(struct device_node *np);
+static const struct of_device_id altr_edac_a10_device_of_match[];
+static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
+{
+	int irq;
+	struct device_node *child, *np = of_find_compatible_node(NULL, NULL,
+					"altr,socfpga-a10-ecc-manager");
+	if (!np) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n");
+		return -ENODEV;
+	}
+
+	for_each_child_of_node(np, child) {
+		const struct of_device_id *pdev_id;
+		const struct edac_device_prv_data *prv;
+
+		if (!of_device_is_available(child))
+			continue;
+		if (!of_device_is_compatible(child, compat))
+			continue;
+
+		if (validate_parent_available(child))
+			continue;
+
+		irq = a10_get_irq_mask(child);
+		if (irq < 0)
+			continue;
+
+		/* Get matching node and check for valid result */
+		pdev_id = of_match_node(altr_edac_a10_device_of_match, child);
+		if (IS_ERR_OR_NULL(pdev_id))
+			continue;
+
+		/* Validate private data pointer before dereferencing */
+		prv = pdev_id->data;
+		if (!prv)
+			continue;
+
+		altr_init_a10_ecc_block(child, BIT(irq),
+					prv->ecc_enable_mask, 0);
+	}
+
+	of_node_put(np);
+	return 0;
+}
+
+/*********************** OCRAM EDAC Device Functions *********************/
+
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
+
 static void *ocram_alloc_mem(size_t size, void **other)
 {
 	struct device_node *np;
@@ -882,25 +1104,7 @@ static void ocram_free_mem(void *p, size_t size, void *other)
 	gen_pool_free((struct gen_pool *)other, (u32)p, size);
 }
 
-static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci,
-					 bool sberr)
-{
-	void __iomem  *base = dci->base;
-
-	if (sberr) {
-		writel(ALTR_A10_ECC_SERRPENA,
-		       base + ALTR_A10_ECC_INTSTAT_OFST);
-		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
-	} else {
-		writel(ALTR_A10_ECC_DERRPENA,
-		       base + ALTR_A10_ECC_INTSTAT_OFST);
-		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
-		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
-	}
-	return IRQ_HANDLED;
-}
-
-const struct edac_device_prv_data ocramecc_data = {
+static const struct edac_device_prv_data ocramecc_data = {
 	.setup = altr_check_ecc_deps,
 	.ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR),
 	.ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR),
@@ -916,7 +1120,7 @@ const struct edac_device_prv_data ocramecc_data = {
 	.inject_fops = &altr_edac_device_inject_fops,
 };
 
-const struct edac_device_prv_data a10_ocramecc_data = {
+static const struct edac_device_prv_data a10_ocramecc_data = {
 	.setup = altr_check_ecc_deps,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
@@ -929,6 +1133,12 @@ const struct edac_device_prv_data a10_ocramecc_data = {
 	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
 	.ecc_irq_handler = altr_edac_a10_ecc_irq,
 	.inject_fops = &altr_edac_a10_device_inject_fops,
+	/*
+	 * OCRAM panic on uncorrectable error because sleep/resume
+	 * functions and FPGA contents are stored in OCRAM. Prefer
+	 * a kernel panic over executing/loading corrupted data.
+	 */
+	.panic = true,
 };
 
 #endif	/* CONFIG_EDAC_ALTERA_OCRAM */
@@ -988,25 +1198,33 @@ static int altr_l2_check_deps(struct altr_edac_device_dev *device)
 	return -ENODEV;
 }
 
-static irqreturn_t altr_edac_a10_l2_irq(struct altr_edac_device_dev *dci,
-					bool sberr)
+static irqreturn_t altr_edac_a10_l2_irq(int irq, void *dev_id)
 {
-	if (sberr) {
+	struct altr_edac_device_dev *dci = dev_id;
+
+	if (irq == dci->sb_irq) {
 		regmap_write(dci->edac->ecc_mgr_map,
 			     A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
 			     A10_SYSGMR_MPU_CLEAR_L2_ECC_SB);
 		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
-	} else {
+
+		return IRQ_HANDLED;
+	} else if (irq == dci->db_irq) {
 		regmap_write(dci->edac->ecc_mgr_map,
 			     A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
 			     A10_SYSGMR_MPU_CLEAR_L2_ECC_MB);
 		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
 		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+
+		return IRQ_HANDLED;
 	}
-	return IRQ_HANDLED;
+
+	WARN_ON(1);
+
+	return IRQ_NONE;
 }
 
-const struct edac_device_prv_data l2ecc_data = {
+static const struct edac_device_prv_data l2ecc_data = {
 	.setup = altr_l2_check_deps,
 	.ce_clear_mask = 0,
 	.ue_clear_mask = 0,
@@ -1021,7 +1239,7 @@ const struct edac_device_prv_data l2ecc_data = {
 	.inject_fops = &altr_edac_device_inject_fops,
 };
 
-const struct edac_device_prv_data a10_l2ecc_data = {
+static const struct edac_device_prv_data a10_l2ecc_data = {
 	.setup = altr_l2_check_deps,
 	.ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR,
 	.ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR,
@@ -1040,7 +1258,49 @@ const struct edac_device_prv_data a10_l2ecc_data = {
 
 #endif	/* CONFIG_EDAC_ALTERA_L2C */
 
+/********************* Ethernet Device Functions ********************/
+
+#ifdef CONFIG_EDAC_ALTERA_ETHERNET
+
+static const struct edac_device_prv_data a10_enetecc_data = {
+	.setup = altr_check_ecc_deps,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+	.dbgfs_name = "altr_trigger",
+	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_TSERRA,
+	.ue_set_mask = ALTR_A10_ECC_TDERRA,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
+};
+
+static int __init socfpga_init_ethernet_ecc(void)
+{
+	return altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc");
+}
+
+early_initcall(socfpga_init_ethernet_ecc);
+
+#endif	/* CONFIG_EDAC_ALTERA_ETHERNET */
+
 /********************* Arria10 EDAC Device Functions *************************/
+static const struct of_device_id altr_edac_a10_device_of_match[] = {
+#ifdef CONFIG_EDAC_ALTERA_L2C
+	{ .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
+	{ .compatible = "altr,socfpga-a10-ocram-ecc",
+	  .data = &a10_ocramecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_ETHERNET
+	{ .compatible = "altr,socfpga-eth-mac-ecc",
+	  .data = &a10_enetecc_data },
+#endif
+	{},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match);
 
 /*
  * The Arria10 EDAC Device Functions differ from the Cyclone5/Arria5
@@ -1075,28 +1335,42 @@ static ssize_t altr_edac_a10_device_trig(struct file *file,
 	return count;
 }
 
-static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id)
+static void altr_edac_a10_irq_handler(struct irq_desc *desc)
 {
-	irqreturn_t rc = IRQ_NONE;
-	struct altr_arria10_edac *edac = dev_id;
-	struct altr_edac_device_dev *dci;
-	int irq_status;
-	bool sberr = (irq == edac->sb_irq) ? 1 : 0;
-	int sm_offset = sberr ? A10_SYSMGR_ECC_INTSTAT_SERR_OFST :
-				A10_SYSMGR_ECC_INTSTAT_DERR_OFST;
+	int dberr, bit, sm_offset, irq_status;
+	struct altr_arria10_edac *edac = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int irq = irq_desc_get_irq(desc);
+
+	dberr = (irq == edac->db_irq) ? 1 : 0;
+	sm_offset = dberr ? A10_SYSMGR_ECC_INTSTAT_DERR_OFST :
+			    A10_SYSMGR_ECC_INTSTAT_SERR_OFST;
+
+	chained_irq_enter(chip, desc);
 
 	regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status);
 
-	if ((irq != edac->sb_irq) && (irq != edac->db_irq)) {
-		WARN_ON(1);
-	} else {
-		list_for_each_entry(dci, &edac->a10_ecc_devices, next) {
-			if (irq_status & dci->data->irq_status_mask)
-				rc = dci->data->ecc_irq_handler(dci, sberr);
-		}
+	for_each_set_bit(bit, (unsigned long *)&irq_status, 32) {
+		irq = irq_linear_revmap(edac->domain, dberr * 32 + bit);
+		if (irq)
+			generic_handle_irq(irq);
 	}
 
-	return rc;
+	chained_irq_exit(chip, desc);
+}
+
+static int validate_parent_available(struct device_node *np)
+{
+	struct device_node *parent;
+	int ret = 0;
+
+	/* Ensure parent device is enabled if parent node exists */
+	parent = of_parse_phandle(np, "altr,ecc-parent", 0);
+	if (parent && !of_device_is_available(parent))
+		ret = -ENODEV;
+
+	of_node_put(parent);
+	return ret;
 }
 
 static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
@@ -1111,7 +1385,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
 	const struct edac_device_prv_data *prv;
 	/* Get matching node and check for valid result */
 	const struct of_device_id *pdev_id =
-		of_match_node(altr_edac_device_of_match, np);
+		of_match_node(altr_edac_a10_device_of_match, np);
 	if (IS_ERR_OR_NULL(pdev_id))
 		return -ENODEV;
 
@@ -1120,6 +1394,9 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
 	if (IS_ERR_OR_NULL(prv))
 		return -ENODEV;
 
+	if (validate_parent_available(np))
+		return -ENODEV;
+
 	if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL))
 		return -ENOMEM;
 
@@ -1168,6 +1445,34 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
 			goto err_release_group1;
 	}
 
+	altdev->sb_irq = irq_of_parse_and_map(np, 0);
+	if (!altdev->sb_irq) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating SBIRQ\n");
+		rc = -ENODEV;
+		goto err_release_group1;
+	}
+	rc = devm_request_irq(edac->dev, altdev->sb_irq,
+			      prv->ecc_irq_handler,
+			      IRQF_SHARED, ecc_name, altdev);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
+		goto err_release_group1;
+	}
+
+	altdev->db_irq = irq_of_parse_and_map(np, 1);
+	if (!altdev->db_irq) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating DBIRQ\n");
+		rc = -ENODEV;
+		goto err_release_group1;
+	}
+	rc = devm_request_irq(edac->dev, altdev->db_irq,
+			      prv->ecc_irq_handler,
+			      IRQF_SHARED, ecc_name, altdev);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
+		goto err_release_group1;
+	}
+
 	rc = edac_device_add_device(dci);
 	if (rc) {
 		dev_err(edac->dev, "edac_device_add_device failed\n");
@@ -1186,7 +1491,6 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
 err_release_group1:
 	edac_device_free_ctl_info(dci);
 err_release_group:
-	edac_printk(KERN_ALERT, EDAC_DEVICE, "%s: %d\n", __func__, __LINE__);
 	devres_release_group(edac->dev, NULL);
 	edac_printk(KERN_ERR, EDAC_DEVICE,
 		    "%s:Error setting up EDAC device: %d\n", ecc_name, rc);
@@ -1194,11 +1498,43 @@ err_release_group:
 	return rc;
 }
 
+static void a10_eccmgr_irq_mask(struct irq_data *d)
+{
+	struct altr_arria10_edac *edac = irq_data_get_irq_chip_data(d);
+
+	regmap_write(edac->ecc_mgr_map,	A10_SYSMGR_ECC_INTMASK_SET_OFST,
+		     BIT(d->hwirq));
+}
+
+static void a10_eccmgr_irq_unmask(struct irq_data *d)
+{
+	struct altr_arria10_edac *edac = irq_data_get_irq_chip_data(d);
+
+	regmap_write(edac->ecc_mgr_map,	A10_SYSMGR_ECC_INTMASK_CLR_OFST,
+		     BIT(d->hwirq));
+}
+
+static int a10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq,
+				    irq_hw_number_t hwirq)
+{
+	struct altr_arria10_edac *edac = d->host_data;
+
+	irq_set_chip_and_handler(irq, &edac->irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, edac);
+	irq_set_noprobe(irq);
+
+	return 0;
+}
+
+struct irq_domain_ops a10_eccmgr_ic_ops = {
+	.map = a10_eccmgr_irqdomain_map,
+	.xlate = irq_domain_xlate_twocell,
+};
+
 static int altr_edac_a10_probe(struct platform_device *pdev)
 {
 	struct altr_arria10_edac *edac;
 	struct device_node *child;
-	int rc;
 
 	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
 	if (!edac)
@@ -1216,32 +1552,50 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
 		return PTR_ERR(edac->ecc_mgr_map);
 	}
 
+	edac->irq_chip.name = pdev->dev.of_node->name;
+	edac->irq_chip.irq_mask = a10_eccmgr_irq_mask;
+	edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask;
+	edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64,
+					     &a10_eccmgr_ic_ops, edac);
+	if (!edac->domain) {
+		dev_err(&pdev->dev, "Error adding IRQ domain\n");
+		return -ENOMEM;
+	}
+
 	edac->sb_irq = platform_get_irq(pdev, 0);
-	rc = devm_request_irq(&pdev->dev, edac->sb_irq,
-			      altr_edac_a10_irq_handler,
-			      IRQF_SHARED, dev_name(&pdev->dev), edac);
-	if (rc) {
-		edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n");
-		return rc;
+	if (edac->sb_irq < 0) {
+		dev_err(&pdev->dev, "No SBERR IRQ resource\n");
+		return edac->sb_irq;
 	}
 
+	irq_set_chained_handler_and_data(edac->sb_irq,
+					 altr_edac_a10_irq_handler,
+					 edac);
+
 	edac->db_irq = platform_get_irq(pdev, 1);
-	rc = devm_request_irq(&pdev->dev, edac->db_irq,
-			      altr_edac_a10_irq_handler,
-			      IRQF_SHARED, dev_name(&pdev->dev), edac);
-	if (rc) {
-		edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
-		return rc;
+	if (edac->db_irq < 0) {
+		dev_err(&pdev->dev, "No DBERR IRQ resource\n");
+		return edac->db_irq;
 	}
+	irq_set_chained_handler_and_data(edac->db_irq,
+					 altr_edac_a10_irq_handler,
+					 edac);
 
 	for_each_child_of_node(pdev->dev.of_node, child) {
 		if (!of_device_is_available(child))
 			continue;
 		if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc"))
 			altr_edac_a10_device_add(edac, child);
-		else if (of_device_is_compatible(child,
-						 "altr,socfpga-a10-ocram-ecc"))
+		else if ((of_device_is_compatible(child,
+					"altr,socfpga-a10-ocram-ecc")) ||
+			 (of_device_is_compatible(child,
+					"altr,socfpga-eth-mac-ecc")))
 			altr_edac_a10_device_add(edac, child);
+		else if (of_device_is_compatible(child,
+						 "altr,sdram-edac-a10"))
+			of_platform_populate(pdev->dev.of_node,
+					     altr_sdram_ctrl_of_match,
+					     NULL, &pdev->dev);
 	}
 
 	return 0;
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 42090f36ba6e..687d8e754d36 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -230,8 +230,13 @@ struct altr_sdram_mc_data {
 #define ALTR_A10_ECC_INITCOMPLETEB      BIT(8)
 
 #define ALTR_A10_ECC_ERRINTEN_OFST      0x10
+#define ALTR_A10_ECC_ERRINTENS_OFST     0x14
+#define ALTR_A10_ECC_ERRINTENR_OFST     0x18
 #define ALTR_A10_ECC_SERRINTEN          BIT(0)
 
+#define ALTR_A10_ECC_INTMODE_OFST       0x1C
+#define ALTR_A10_ECC_INTMODE            BIT(0)
+
 #define ALTR_A10_ECC_INTSTAT_OFST       0x20
 #define ALTR_A10_ECC_SERRPENA           BIT(0)
 #define ALTR_A10_ECC_DERRPENA           BIT(8)
@@ -280,6 +285,12 @@ struct altr_sdram_mc_data {
 /* Arria 10 OCRAM ECC Management Group Defines */
 #define ALTR_A10_OCRAM_ECC_EN_CTL       (BIT(1) | BIT(0))
 
+/* Arria 10 Ethernet ECC Management Group Defines */
+#define ALTR_A10_COMMON_ECC_EN_CTL      BIT(0)
+
+/* A10 ECC Controller memory initialization timeout */
+#define ALTR_A10_ECC_INIT_WATCHDOG_10US      10000
+
 struct altr_edac_device_dev;
 
 struct edac_device_prv_data {
@@ -295,10 +306,10 @@ struct edac_device_prv_data {
 	int ce_set_mask;
 	int ue_set_mask;
 	int set_err_ofst;
-	irqreturn_t (*ecc_irq_handler)(struct altr_edac_device_dev *dci,
-				       bool sb);
+	irqreturn_t (*ecc_irq_handler)(int irq, void *dev_id);
 	int trig_alloc_sz;
 	const struct file_operations *inject_fops;
+	bool panic;
 };
 
 struct altr_edac_device_dev {
@@ -320,6 +331,8 @@ struct altr_arria10_edac {
 	struct regmap		*ecc_mgr_map;
 	int sb_irq;
 	int db_irq;
+	struct irq_domain	*domain;
+	struct irq_chip		irq_chip;
 	struct list_head	a10_ecc_devices;
 };
 
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 46784eb2edc6..8c0ec2128907 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2966,11 +2966,11 @@ static int __init amd64_edac_init(void)
 	int err = -ENODEV;
 	int i;
 
-	opstate_init();
-
 	if (amd_cache_northbridges() < 0)
 		goto err_ret;
 
+	opstate_init();
+
 	err = -ENOMEM;
 	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
 	if (!ecc_stngs)
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 10c305b4a2e1..4e0f8e720ad9 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -313,7 +313,6 @@ static struct device_type csrow_attr_type = {
  * possible dynamic channel DIMM Label attribute files
  *
  */
-
 DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR,
 	channel_dimm_label_show, channel_dimm_label_store, 0);
 DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR,
@@ -326,6 +325,10 @@ DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR,
 	channel_dimm_label_show, channel_dimm_label_store, 4);
 DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR,
 	channel_dimm_label_show, channel_dimm_label_store, 5);
+DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR,
+	channel_dimm_label_show, channel_dimm_label_store, 6);
+DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR,
+	channel_dimm_label_show, channel_dimm_label_store, 7);
 
 /* Total possible dynamic DIMM Label attribute file table */
 static struct attribute *dynamic_csrow_dimm_attr[] = {
@@ -335,6 +338,8 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
 	&dev_attr_legacy_ch3_dimm_label.attr.attr,
 	&dev_attr_legacy_ch4_dimm_label.attr.attr,
 	&dev_attr_legacy_ch5_dimm_label.attr.attr,
+	&dev_attr_legacy_ch6_dimm_label.attr.attr,
+	&dev_attr_legacy_ch7_dimm_label.attr.attr,
 	NULL
 };
 
@@ -351,6 +356,10 @@ DEVICE_CHANNEL(ch4_ce_count, S_IRUGO,
 		   channel_ce_count_show, NULL, 4);
 DEVICE_CHANNEL(ch5_ce_count, S_IRUGO,
 		   channel_ce_count_show, NULL, 5);
+DEVICE_CHANNEL(ch6_ce_count, S_IRUGO,
+		   channel_ce_count_show, NULL, 6);
+DEVICE_CHANNEL(ch7_ce_count, S_IRUGO,
+		   channel_ce_count_show, NULL, 7);
 
 /* Total possible dynamic ce_count attribute file table */
 static struct attribute *dynamic_csrow_ce_count_attr[] = {
@@ -360,6 +369,8 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
 	&dev_attr_legacy_ch3_ce_count.attr.attr,
 	&dev_attr_legacy_ch4_ce_count.attr.attr,
 	&dev_attr_legacy_ch5_ce_count.attr.attr,
+	&dev_attr_legacy_ch6_ce_count.attr.attr,
+	&dev_attr_legacy_ch7_ce_count.attr.attr,
 	NULL
 };
 
@@ -371,9 +382,16 @@ static umode_t csrow_dev_is_visible(struct kobject *kobj,
 
 	if (idx >= csrow->nr_channels)
 		return 0;
+
+	if (idx >= ARRAY_SIZE(dynamic_csrow_ce_count_attr) - 1) {
+		WARN_ONCE(1, "idx: %d\n", idx);
+		return 0;
+	}
+
 	/* Only expose populated DIMMs */
 	if (!csrow->channels[idx]->dimm->nr_pages)
 		return 0;
+
 	return attr->mode;
 }
 
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 6744d88bdea8..4fb2eb7c800d 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -2378,22 +2378,19 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
  * @num_mc: pointer to the memory controllers count, to be incremented in case
  *	    of success.
  * @table: model specific table
- * @allow_dups: allow for multiple devices to exist with the same device id
- *              (as implemented, this isn't expected to work correctly in the
- *              multi-socket case).
- * @multi_bus: don't assume devices on different buses belong to different
- *             memory controllers.
  *
  * returns 0 in case of success or error code
  */
-static int sbridge_get_all_devices_full(u8 *num_mc,
-					const struct pci_id_table *table,
-					int allow_dups,
-					int multi_bus)
+static int sbridge_get_all_devices(u8 *num_mc,
+					const struct pci_id_table *table)
 {
 	int i, rc;
 	struct pci_dev *pdev = NULL;
+	int allow_dups = 0;
+	int multi_bus = 0;
 
+	if (table->type == KNIGHTS_LANDING)
+		allow_dups = multi_bus = 1;
 	while (table && table->descr) {
 		for (i = 0; i < table->n_devs; i++) {
 			if (!allow_dups || i == 0 ||
@@ -2420,11 +2417,6 @@ static int sbridge_get_all_devices_full(u8 *num_mc,
 	return 0;
 }
 
-#define sbridge_get_all_devices(num_mc, table) \
-		sbridge_get_all_devices_full(num_mc, table, 0, 0)
-#define sbridge_get_all_devices_knl(num_mc, table) \
-		sbridge_get_all_devices_full(num_mc, table, 1, 1)
-
 static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 				 struct sbridge_dev *sbridge_dev)
 {
diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile
index 2a0e4f45d5b2..972c813c375b 100644
--- a/drivers/extcon/Makefile
+++ b/drivers/extcon/Makefile
@@ -2,7 +2,8 @@
 # Makefile for external connector class (extcon) devices
 #
 
-obj-$(CONFIG_EXTCON)		+= extcon.o
+obj-$(CONFIG_EXTCON)		+= extcon-core.o
+extcon-core-objs		+= extcon.o devres.o
 obj-$(CONFIG_EXTCON_ADC_JACK)	+= extcon-adc-jack.o
 obj-$(CONFIG_EXTCON_ARIZONA)	+= extcon-arizona.o
 obj-$(CONFIG_EXTCON_AXP288)	+= extcon-axp288.o
diff --git a/drivers/extcon/devres.c b/drivers/extcon/devres.c
new file mode 100644
index 000000000000..e686acd1c459
--- /dev/null
+++ b/drivers/extcon/devres.c
@@ -0,0 +1,216 @@
+/*
+ *  drivers/extcon/devres.c - EXTCON device's resource management
+ *
+ * Copyright (C) 2016 Samsung Electronics
+ * Author: Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/extcon.h>
+
+static int devm_extcon_dev_match(struct device *dev, void *res, void *data)
+{
+	struct extcon_dev **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+static void devm_extcon_dev_release(struct device *dev, void *res)
+{
+	extcon_dev_free(*(struct extcon_dev **)res);
+}
+
+
+static void devm_extcon_dev_unreg(struct device *dev, void *res)
+{
+	extcon_dev_unregister(*(struct extcon_dev **)res);
+}
+
+struct extcon_dev_notifier_devres {
+	struct extcon_dev *edev;
+	unsigned int id;
+	struct notifier_block *nb;
+};
+
+static void devm_extcon_dev_notifier_unreg(struct device *dev, void *res)
+{
+	struct extcon_dev_notifier_devres *this = res;
+
+	extcon_unregister_notifier(this->edev, this->id, this->nb);
+}
+
+/**
+ * devm_extcon_dev_allocate - Allocate managed extcon device
+ * @dev:		device owning the extcon device being created
+ * @supported_cable:	Array of supported extcon ending with EXTCON_NONE.
+ *			If supported_cable is NULL, cable name related APIs
+ *			are disabled.
+ *
+ * This function manages automatically the memory of extcon device using device
+ * resource management and simplify the control of freeing the memory of extcon
+ * device.
+ *
+ * Returns the pointer memory of allocated extcon_dev if success
+ * or ERR_PTR(err) if fail
+ */
+struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
+					const unsigned int *supported_cable)
+{
+	struct extcon_dev **ptr, *edev;
+
+	ptr = devres_alloc(devm_extcon_dev_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	edev = extcon_dev_allocate(supported_cable);
+	if (IS_ERR(edev)) {
+		devres_free(ptr);
+		return edev;
+	}
+
+	edev->dev.parent = dev;
+
+	*ptr = edev;
+	devres_add(dev, ptr);
+
+	return edev;
+}
+EXPORT_SYMBOL_GPL(devm_extcon_dev_allocate);
+
+/**
+ * devm_extcon_dev_free() - Resource-managed extcon_dev_unregister()
+ * @dev:	device the extcon belongs to
+ * @edev:	the extcon device to unregister
+ *
+ * Free the memory that is allocated with devm_extcon_dev_allocate()
+ * function.
+ */
+void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev)
+{
+	WARN_ON(devres_release(dev, devm_extcon_dev_release,
+			       devm_extcon_dev_match, edev));
+}
+EXPORT_SYMBOL_GPL(devm_extcon_dev_free);
+
+/**
+ * devm_extcon_dev_register() - Resource-managed extcon_dev_register()
+ * @dev:	device to allocate extcon device
+ * @edev:	the new extcon device to register
+ *
+ * Managed extcon_dev_register() function. If extcon device is attached with
+ * this function, that extcon device is automatically unregistered on driver
+ * detach. Internally this function calls extcon_dev_register() function.
+ * To get more information, refer that function.
+ *
+ * If extcon device is registered with this function and the device needs to be
+ * unregistered separately, devm_extcon_dev_unregister() should be used.
+ *
+ * Returns 0 if success or negaive error number if failure.
+ */
+int devm_extcon_dev_register(struct device *dev, struct extcon_dev *edev)
+{
+	struct extcon_dev **ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_extcon_dev_unreg, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = extcon_dev_register(edev);
+	if (ret) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	*ptr = edev;
+	devres_add(dev, ptr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_extcon_dev_register);
+
+/**
+ * devm_extcon_dev_unregister() - Resource-managed extcon_dev_unregister()
+ * @dev:	device the extcon belongs to
+ * @edev:	the extcon device to unregister
+ *
+ * Unregister extcon device that is registered with devm_extcon_dev_register()
+ * function.
+ */
+void devm_extcon_dev_unregister(struct device *dev, struct extcon_dev *edev)
+{
+	WARN_ON(devres_release(dev, devm_extcon_dev_unreg,
+			       devm_extcon_dev_match, edev));
+}
+EXPORT_SYMBOL_GPL(devm_extcon_dev_unregister);
+
+/**
+ * devm_extcon_register_notifier() - Resource-managed extcon_register_notifier()
+ * @dev:	device to allocate extcon device
+ * @edev:	the extcon device that has the external connecotr.
+ * @id:		the unique id of each external connector in extcon enumeration.
+ * @nb:		a notifier block to be registered.
+ *
+ * This function manages automatically the notifier of extcon device using
+ * device resource management and simplify the control of unregistering
+ * the notifier of extcon device.
+ *
+ * Note that the second parameter given to the callback of nb (val) is
+ * "old_state", not the current state. The current state can be retrieved
+ * by looking at the third pameter (edev pointer)'s state value.
+ *
+ * Returns 0 if success or negaive error number if failure.
+ */
+int devm_extcon_register_notifier(struct device *dev, struct extcon_dev *edev,
+				unsigned int id, struct notifier_block *nb)
+{
+	struct extcon_dev_notifier_devres *ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_extcon_dev_notifier_unreg, sizeof(*ptr),
+				GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = extcon_register_notifier(edev, id, nb);
+	if (ret) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	ptr->edev = edev;
+	ptr->id = id;
+	ptr->nb = nb;
+	devres_add(dev, ptr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_extcon_register_notifier);
+
+/**
+ * devm_extcon_unregister_notifier()
+			- Resource-managed extcon_unregister_notifier()
+ * @dev:	device to allocate extcon device
+ * @edev:	the extcon device that has the external connecotr.
+ * @id:		the unique id of each external connector in extcon enumeration.
+ * @nb:		a notifier block to be registered.
+ */
+void devm_extcon_unregister_notifier(struct device *dev,
+				struct extcon_dev *edev, unsigned int id,
+				struct notifier_block *nb)
+{
+	WARN_ON(devres_release(dev, devm_extcon_dev_notifier_unreg,
+			       devm_extcon_dev_match, edev));
+}
+EXPORT_SYMBOL(devm_extcon_unregister_notifier);
diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index 7fc0ae1912f8..44e48aa78a84 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -38,6 +38,7 @@
  * @chan:		iio channel being queried.
  */
 struct adc_jack_data {
+	struct device *dev;
 	struct extcon_dev *edev;
 
 	const unsigned int **cable_names;
@@ -49,6 +50,7 @@ struct adc_jack_data {
 	struct delayed_work handler;
 
 	struct iio_channel *chan;
+	bool wakeup_source;
 };
 
 static void adc_jack_handler(struct work_struct *work)
@@ -105,6 +107,7 @@ static int adc_jack_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	data->dev = &pdev->dev;
 	data->edev = devm_extcon_dev_allocate(&pdev->dev, pdata->cable_names);
 	if (IS_ERR(data->edev)) {
 		dev_err(&pdev->dev, "failed to allocate extcon device\n");
@@ -128,6 +131,7 @@ static int adc_jack_probe(struct platform_device *pdev)
 		return PTR_ERR(data->chan);
 
 	data->handling_delay = msecs_to_jiffies(pdata->handling_delay_ms);
+	data->wakeup_source = pdata->wakeup_source;
 
 	INIT_DEFERRABLE_WORK(&data->handler, adc_jack_handler);
 
@@ -151,6 +155,9 @@ static int adc_jack_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	if (data->wakeup_source)
+		device_init_wakeup(&pdev->dev, 1);
+
 	return 0;
 }
 
@@ -165,11 +172,38 @@ static int adc_jack_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int adc_jack_suspend(struct device *dev)
+{
+	struct adc_jack_data *data = dev_get_drvdata(dev);
+
+	cancel_delayed_work_sync(&data->handler);
+	if (device_may_wakeup(data->dev))
+		enable_irq_wake(data->irq);
+
+	return 0;
+}
+
+static int adc_jack_resume(struct device *dev)
+{
+	struct adc_jack_data *data = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(data->dev))
+		disable_irq_wake(data->irq);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(adc_jack_pm_ops,
+		adc_jack_suspend, adc_jack_resume);
+
 static struct platform_driver adc_jack_driver = {
 	.probe          = adc_jack_probe,
 	.remove         = adc_jack_remove,
 	.driver         = {
 		.name   = "adc-jack",
+		.pm = &adc_jack_pm_ops,
 	},
 };
 
diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c
index 2b2fecffb1ad..2512660dc4b9 100644
--- a/drivers/extcon/extcon-usb-gpio.c
+++ b/drivers/extcon/extcon-usb-gpio.c
@@ -24,8 +24,10 @@
 #include <linux/module.h>
 #include <linux/of_gpio.h>
 #include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
+#include <linux/acpi.h>
 
 #define USB_GPIO_DEBOUNCE_MS	20	/* ms */
 
@@ -91,7 +93,7 @@ static int usb_extcon_probe(struct platform_device *pdev)
 	struct usb_extcon_info *info;
 	int ret;
 
-	if (!np)
+	if (!np && !ACPI_HANDLE(dev))
 		return -EINVAL;
 
 	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
@@ -141,7 +143,8 @@ static int usb_extcon_probe(struct platform_device *pdev)
 	}
 
 	platform_set_drvdata(pdev, info);
-	device_init_wakeup(dev, 1);
+	device_init_wakeup(dev, true);
+	dev_pm_set_wake_irq(dev, info->id_irq);
 
 	/* Perform initial detection */
 	usb_extcon_detect_cable(&info->wq_detcable.work);
@@ -155,6 +158,9 @@ static int usb_extcon_remove(struct platform_device *pdev)
 
 	cancel_delayed_work_sync(&info->wq_detcable);
 
+	dev_pm_clear_wake_irq(&pdev->dev);
+	device_init_wakeup(&pdev->dev, false);
+
 	return 0;
 }
 
@@ -164,12 +170,6 @@ static int usb_extcon_suspend(struct device *dev)
 	struct usb_extcon_info *info = dev_get_drvdata(dev);
 	int ret = 0;
 
-	if (device_may_wakeup(dev)) {
-		ret = enable_irq_wake(info->id_irq);
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * We don't want to process any IRQs after this point
 	 * as GPIOs used behind I2C subsystem might not be
@@ -185,13 +185,10 @@ static int usb_extcon_resume(struct device *dev)
 	struct usb_extcon_info *info = dev_get_drvdata(dev);
 	int ret = 0;
 
-	if (device_may_wakeup(dev)) {
-		ret = disable_irq_wake(info->id_irq);
-		if (ret)
-			return ret;
-	}
-
 	enable_irq(info->id_irq);
+	if (!device_may_wakeup(dev))
+		queue_delayed_work(system_power_efficient_wq,
+				   &info->wq_detcable, 0);
 
 	return ret;
 }
@@ -206,6 +203,12 @@ static const struct of_device_id usb_extcon_dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, usb_extcon_dt_match);
 
+static const struct platform_device_id usb_extcon_platform_ids[] = {
+	{ .name = "extcon-usb-gpio", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, usb_extcon_platform_ids);
+
 static struct platform_driver usb_extcon_driver = {
 	.probe		= usb_extcon_probe,
 	.remove		= usb_extcon_remove,
@@ -214,6 +217,7 @@ static struct platform_driver usb_extcon_driver = {
 		.pm	= &usb_extcon_pm_ops,
 		.of_match_table = usb_extcon_dt_match,
 	},
+	.id_table = usb_extcon_platform_ids,
 };
 
 module_platform_driver(usb_extcon_driver);
diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
index 21a123cadf78..8682efc0f57b 100644
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -77,6 +77,26 @@ static const char *extcon_name[] =  {
 	NULL,
 };
 
+/**
+ * struct extcon_cable - An internal data for each cable of extcon device.
+ * @edev:		The extcon device
+ * @cable_index:	Index of this cable in the edev
+ * @attr_g:		Attribute group for the cable
+ * @attr_name:		"name" sysfs entry
+ * @attr_state:		"state" sysfs entry
+ * @attrs:		Array pointing to attr_name and attr_state for attr_g
+ */
+struct extcon_cable {
+	struct extcon_dev *edev;
+	int cable_index;
+
+	struct attribute_group attr_g;
+	struct device_attribute attr_name;
+	struct device_attribute attr_state;
+
+	struct attribute *attrs[3]; /* to be fed to attr_g.attrs */
+};
+
 static struct class *extcon_class;
 #if defined(CONFIG_ANDROID)
 static struct class_compat *switch_class;
@@ -127,38 +147,6 @@ static int find_cable_index_by_id(struct extcon_dev *edev, const unsigned int id
 	return -EINVAL;
 }
 
-static int find_cable_id_by_name(struct extcon_dev *edev, const char *name)
-{
-	int id = -EINVAL;
-	int i = 0;
-
-	/* Find the id of extcon cable */
-	while (extcon_name[i]) {
-		if (!strncmp(extcon_name[i], name, CABLE_NAME_MAX)) {
-			id = i;
-			break;
-		}
-		i++;
-	}
-
-	return id;
-}
-
-static int find_cable_index_by_name(struct extcon_dev *edev, const char *name)
-{
-	int id;
-
-	if (edev->max_supported == 0)
-		return -EINVAL;
-
-	/* Find the the number of extcon cable */
-	id = find_cable_id_by_name(edev, name);
-	if (id < 0)
-		return id;
-
-	return find_cable_index_by_id(edev, id);
-}
-
 static bool is_extcon_changed(u32 prev, u32 new, int idx, bool *attached)
 {
 	if (((prev >> idx) & 0x1) != ((new >> idx) & 0x1)) {
@@ -373,25 +361,6 @@ int extcon_get_cable_state_(struct extcon_dev *edev, const unsigned int id)
 }
 EXPORT_SYMBOL_GPL(extcon_get_cable_state_);
 
-/**
- * extcon_get_cable_state() - Get the status of a specific cable.
- * @edev:	the extcon device that has the cable.
- * @cable_name:	cable name.
- *
- * Note that this is slower than extcon_get_cable_state_.
- */
-int extcon_get_cable_state(struct extcon_dev *edev, const char *cable_name)
-{
-	int id;
-
-	id = find_cable_id_by_name(edev, cable_name);
-	if (id < 0)
-		return id;
-
-	return extcon_get_cable_state_(edev, id);
-}
-EXPORT_SYMBOL_GPL(extcon_get_cable_state);
-
 /**
  * extcon_set_cable_state_() - Set the status of a specific cable.
  * @edev:		the extcon device that has the cable.
@@ -421,28 +390,6 @@ int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id,
 }
 EXPORT_SYMBOL_GPL(extcon_set_cable_state_);
 
-/**
- * extcon_set_cable_state() - Set the status of a specific cable.
- * @edev:		the extcon device that has the cable.
- * @cable_name:		cable name.
- * @cable_state:	the new cable status. The default semantics is
- *			true: attached / false: detached.
- *
- * Note that this is slower than extcon_set_cable_state_.
- */
-int extcon_set_cable_state(struct extcon_dev *edev,
-			const char *cable_name, bool cable_state)
-{
-	int id;
-
-	id = find_cable_id_by_name(edev, cable_name);
-	if (id < 0)
-		return id;
-
-	return extcon_set_cable_state_(edev, id, cable_state);
-}
-EXPORT_SYMBOL_GPL(extcon_set_cable_state);
-
 /**
  * extcon_get_extcon_dev() - Get the extcon device instance from the name
  * @extcon_name:	The extcon name provided with extcon_dev_register()
@@ -466,105 +413,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(extcon_get_extcon_dev);
 
-/**
- * extcon_register_interest() - Register a notifier for a state change of a
- *				specific cable, not an entier set of cables of a
- *				extcon device.
- * @obj:		an empty extcon_specific_cable_nb object to be returned.
- * @extcon_name:	the name of extcon device.
- *			if NULL, extcon_register_interest will register
- *			every cable with the target cable_name given.
- * @cable_name:		the target cable name.
- * @nb:			the notifier block to get notified.
- *
- * Provide an empty extcon_specific_cable_nb. extcon_register_interest() sets
- * the struct for you.
- *
- * extcon_register_interest is a helper function for those who want to get
- * notification for a single specific cable's status change. If a user wants
- * to get notification for any changes of all cables of a extcon device,
- * he/she should use the general extcon_register_notifier().
- *
- * Note that the second parameter given to the callback of nb (val) is
- * "old_state", not the current state. The current state can be retrieved
- * by looking at the third pameter (edev pointer)'s state value.
- */
-int extcon_register_interest(struct extcon_specific_cable_nb *obj,
-			     const char *extcon_name, const char *cable_name,
-			     struct notifier_block *nb)
-{
-	unsigned long flags;
-	int ret;
-
-	if (!obj || !cable_name || !nb)
-		return -EINVAL;
-
-	if (extcon_name) {
-		obj->edev = extcon_get_extcon_dev(extcon_name);
-		if (!obj->edev)
-			return -ENODEV;
-
-		obj->cable_index = find_cable_index_by_name(obj->edev,
-							cable_name);
-		if (obj->cable_index < 0)
-			return obj->cable_index;
-
-		obj->user_nb = nb;
-
-		spin_lock_irqsave(&obj->edev->lock, flags);
-		ret = raw_notifier_chain_register(
-					&obj->edev->nh[obj->cable_index],
-					obj->user_nb);
-		spin_unlock_irqrestore(&obj->edev->lock, flags);
-	} else {
-		struct class_dev_iter iter;
-		struct extcon_dev *extd;
-		struct device *dev;
-
-		if (!extcon_class)
-			return -ENODEV;
-		class_dev_iter_init(&iter, extcon_class, NULL, NULL);
-		while ((dev = class_dev_iter_next(&iter))) {
-			extd = dev_get_drvdata(dev);
-
-			if (find_cable_index_by_name(extd, cable_name) < 0)
-				continue;
-
-			class_dev_iter_exit(&iter);
-			return extcon_register_interest(obj, extd->name,
-						cable_name, nb);
-		}
-
-		ret = -ENODEV;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(extcon_register_interest);
-
-/**
- * extcon_unregister_interest() - Unregister the notifier registered by
- *				  extcon_register_interest().
- * @obj:	the extcon_specific_cable_nb object returned by
- *		extcon_register_interest().
- */
-int extcon_unregister_interest(struct extcon_specific_cable_nb *obj)
-{
-	unsigned long flags;
-	int ret;
-
-	if (!obj)
-		return -EINVAL;
-
-	spin_lock_irqsave(&obj->edev->lock, flags);
-	ret = raw_notifier_chain_unregister(
-			&obj->edev->nh[obj->cable_index], obj->user_nb);
-	spin_unlock_irqrestore(&obj->edev->lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(extcon_unregister_interest);
-
 /**
  * extcon_register_notifier() - Register a notifiee to get notified by
  *				any attach status changes from the extcon.
@@ -582,14 +430,35 @@ int extcon_register_notifier(struct extcon_dev *edev, unsigned int id,
 	unsigned long flags;
 	int ret, idx;
 
-	if (!edev || !nb)
+	if (!nb)
 		return -EINVAL;
 
-	idx = find_cable_index_by_id(edev, id);
+	if (edev) {
+		idx = find_cable_index_by_id(edev, id);
+		if (idx < 0)
+			return idx;
 
-	spin_lock_irqsave(&edev->lock, flags);
-	ret = raw_notifier_chain_register(&edev->nh[idx], nb);
-	spin_unlock_irqrestore(&edev->lock, flags);
+		spin_lock_irqsave(&edev->lock, flags);
+		ret = raw_notifier_chain_register(&edev->nh[idx], nb);
+		spin_unlock_irqrestore(&edev->lock, flags);
+	} else {
+		struct extcon_dev *extd;
+
+		mutex_lock(&extcon_dev_list_lock);
+		list_for_each_entry(extd, &extcon_dev_list, entry) {
+			idx = find_cable_index_by_id(extd, id);
+			if (idx >= 0)
+				break;
+		}
+		mutex_unlock(&extcon_dev_list_lock);
+
+		if (idx >= 0) {
+			edev = extd;
+			return extcon_register_notifier(extd, id, nb);
+		} else {
+			ret = -ENODEV;
+		}
+	}
 
 	return ret;
 }
@@ -611,6 +480,8 @@ int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id,
 		return -EINVAL;
 
 	idx = find_cable_index_by_id(edev, id);
+	if (idx < 0)
+		return idx;
 
 	spin_lock_irqsave(&edev->lock, flags);
 	ret = raw_notifier_chain_unregister(&edev->nh[idx], nb);
@@ -693,66 +564,6 @@ void extcon_dev_free(struct extcon_dev *edev)
 }
 EXPORT_SYMBOL_GPL(extcon_dev_free);
 
-static int devm_extcon_dev_match(struct device *dev, void *res, void *data)
-{
-	struct extcon_dev **r = res;
-
-	if (WARN_ON(!r || !*r))
-		return 0;
-
-	return *r == data;
-}
-
-static void devm_extcon_dev_release(struct device *dev, void *res)
-{
-	extcon_dev_free(*(struct extcon_dev **)res);
-}
-
-/**
- * devm_extcon_dev_allocate - Allocate managed extcon device
- * @dev:		device owning the extcon device being created
- * @supported_cable:	Array of supported extcon ending with EXTCON_NONE.
- *			If supported_cable is NULL, cable name related APIs
- *			are disabled.
- *
- * This function manages automatically the memory of extcon device using device
- * resource management and simplify the control of freeing the memory of extcon
- * device.
- *
- * Returns the pointer memory of allocated extcon_dev if success
- * or ERR_PTR(err) if fail
- */
-struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
-					const unsigned int *supported_cable)
-{
-	struct extcon_dev **ptr, *edev;
-
-	ptr = devres_alloc(devm_extcon_dev_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return ERR_PTR(-ENOMEM);
-
-	edev = extcon_dev_allocate(supported_cable);
-	if (IS_ERR(edev)) {
-		devres_free(ptr);
-		return edev;
-	}
-
-	edev->dev.parent = dev;
-
-	*ptr = edev;
-	devres_add(dev, ptr);
-
-	return edev;
-}
-EXPORT_SYMBOL_GPL(devm_extcon_dev_allocate);
-
-void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev)
-{
-	WARN_ON(devres_release(dev, devm_extcon_dev_release,
-			       devm_extcon_dev_match, edev));
-}
-EXPORT_SYMBOL_GPL(devm_extcon_dev_free);
-
 /**
  * extcon_dev_register() - Register a new extcon device
  * @edev	: the new extcon device (should be allocated before calling)
@@ -1018,63 +829,6 @@ void extcon_dev_unregister(struct extcon_dev *edev)
 }
 EXPORT_SYMBOL_GPL(extcon_dev_unregister);
 
-static void devm_extcon_dev_unreg(struct device *dev, void *res)
-{
-	extcon_dev_unregister(*(struct extcon_dev **)res);
-}
-
-/**
- * devm_extcon_dev_register() - Resource-managed extcon_dev_register()
- * @dev:	device to allocate extcon device
- * @edev:	the new extcon device to register
- *
- * Managed extcon_dev_register() function. If extcon device is attached with
- * this function, that extcon device is automatically unregistered on driver
- * detach. Internally this function calls extcon_dev_register() function.
- * To get more information, refer that function.
- *
- * If extcon device is registered with this function and the device needs to be
- * unregistered separately, devm_extcon_dev_unregister() should be used.
- *
- * Returns 0 if success or negaive error number if failure.
- */
-int devm_extcon_dev_register(struct device *dev, struct extcon_dev *edev)
-{
-	struct extcon_dev **ptr;
-	int ret;
-
-	ptr = devres_alloc(devm_extcon_dev_unreg, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return -ENOMEM;
-
-	ret = extcon_dev_register(edev);
-	if (ret) {
-		devres_free(ptr);
-		return ret;
-	}
-
-	*ptr = edev;
-	devres_add(dev, ptr);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devm_extcon_dev_register);
-
-/**
- * devm_extcon_dev_unregister() - Resource-managed extcon_dev_unregister()
- * @dev:	device the extcon belongs to
- * @edev:	the extcon device to unregister
- *
- * Unregister extcon device that is registered with devm_extcon_dev_register()
- * function.
- */
-void devm_extcon_dev_unregister(struct device *dev, struct extcon_dev *edev)
-{
-	WARN_ON(devres_release(dev, devm_extcon_dev_unreg,
-			       devm_extcon_dev_match, edev));
-}
-EXPORT_SYMBOL_GPL(devm_extcon_dev_unregister);
-
 #ifdef CONFIG_OF
 /*
  * extcon_get_edev_by_phandle - Get the extcon device from devicetree
@@ -1107,10 +861,12 @@ struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index)
 	list_for_each_entry(edev, &extcon_dev_list, entry) {
 		if (edev->dev.parent && edev->dev.parent->of_node == node) {
 			mutex_unlock(&extcon_dev_list_lock);
+			of_node_put(node);
 			return edev;
 		}
 	}
 	mutex_unlock(&extcon_dev_list_lock);
+	of_node_put(node);
 
 	return ERR_PTR(-EPROBE_DEFER);
 }
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 17ccf0a8787a..c394b81fe452 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -107,6 +107,11 @@ static int __init arm_enable_runtime_services(void)
 		return 0;
 	}
 
+	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
+		pr_info("EFI runtime services access via paravirt.\n");
+		return 0;
+	}
+
 	pr_info("Remapping and enabling EFI services.\n");
 
 	mapsize = efi.memmap.map_end - efi.memmap.map;
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index eac76a79a880..30a24d09ea6c 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -34,6 +34,7 @@ struct pstore_read_data {
 	int *count;
 	struct timespec *timespec;
 	bool *compressed;
+	ssize_t *ecc_notice_size;
 	char **buf;
 };
 
@@ -69,6 +70,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
 			*cb_data->compressed = true;
 		else
 			*cb_data->compressed = false;
+		*cb_data->ecc_notice_size = 0;
 	} else if (sscanf(name, "dump-type%u-%u-%d-%lu",
 		   cb_data->type, &part, &cnt, &time) == 4) {
 		*cb_data->id = generic_id(time, part, cnt);
@@ -76,6 +78,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
 		cb_data->timespec->tv_sec = time;
 		cb_data->timespec->tv_nsec = 0;
 		*cb_data->compressed = false;
+		*cb_data->ecc_notice_size = 0;
 	} else if (sscanf(name, "dump-type%u-%u-%lu",
 			  cb_data->type, &part, &time) == 3) {
 		/*
@@ -88,6 +91,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
 		cb_data->timespec->tv_sec = time;
 		cb_data->timespec->tv_nsec = 0;
 		*cb_data->compressed = false;
+		*cb_data->ecc_notice_size = 0;
 	} else
 		return 0;
 
@@ -210,6 +214,7 @@ static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
 static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 			       int *count, struct timespec *timespec,
 			       char **buf, bool *compressed,
+			       ssize_t *ecc_notice_size,
 			       struct pstore_info *psi)
 {
 	struct pstore_read_data data;
@@ -220,6 +225,7 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	data.count = count;
 	data.timespec = timespec;
 	data.compressed = compressed;
+	data.ecc_notice_size = ecc_notice_size;
 	data.buf = buf;
 
 	*data.buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
@@ -393,6 +399,13 @@ static __init int efivars_pstore_init(void)
 
 static __exit void efivars_pstore_exit(void)
 {
+	if (!efi_pstore_info.bufsize)
+		return;
+
+	pstore_unregister(&efi_pstore_info);
+	kfree(efi_pstore_info.buf);
+	efi_pstore_info.buf = NULL;
+	efi_pstore_info.bufsize = 0;
 }
 
 module_init(efivars_pstore_init);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 05509f3aaee8..5a2631af7410 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -24,6 +24,9 @@
 #include <linux/of_fdt.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/ucs2_string.h>
 
 #include <asm/early_ioremap.h>
 
@@ -195,6 +198,96 @@ static void generic_ops_unregister(void)
 	efivars_unregister(&generic_efivars);
 }
 
+#if IS_ENABLED(CONFIG_ACPI)
+#define EFIVAR_SSDT_NAME_MAX	16
+static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata;
+static int __init efivar_ssdt_setup(char *str)
+{
+	if (strlen(str) < sizeof(efivar_ssdt))
+		memcpy(efivar_ssdt, str, strlen(str));
+	else
+		pr_warn("efivar_ssdt: name too long: %s\n", str);
+	return 0;
+}
+__setup("efivar_ssdt=", efivar_ssdt_setup);
+
+static __init int efivar_ssdt_iter(efi_char16_t *name, efi_guid_t vendor,
+				   unsigned long name_size, void *data)
+{
+	struct efivar_entry *entry;
+	struct list_head *list = data;
+	char utf8_name[EFIVAR_SSDT_NAME_MAX];
+	int limit = min_t(unsigned long, EFIVAR_SSDT_NAME_MAX, name_size);
+
+	ucs2_as_utf8(utf8_name, name, limit - 1);
+	if (strncmp(utf8_name, efivar_ssdt, limit) != 0)
+		return 0;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return 0;
+
+	memcpy(entry->var.VariableName, name, name_size);
+	memcpy(&entry->var.VendorGuid, &vendor, sizeof(efi_guid_t));
+
+	efivar_entry_add(entry, list);
+
+	return 0;
+}
+
+static __init int efivar_ssdt_load(void)
+{
+	LIST_HEAD(entries);
+	struct efivar_entry *entry, *aux;
+	unsigned long size;
+	void *data;
+	int ret;
+
+	ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries);
+
+	list_for_each_entry_safe(entry, aux, &entries, list) {
+		pr_info("loading SSDT from variable %s-%pUl\n", efivar_ssdt,
+			&entry->var.VendorGuid);
+
+		list_del(&entry->list);
+
+		ret = efivar_entry_size(entry, &size);
+		if (ret) {
+			pr_err("failed to get var size\n");
+			goto free_entry;
+		}
+
+		data = kmalloc(size, GFP_KERNEL);
+		if (!data)
+			goto free_entry;
+
+		ret = efivar_entry_get(entry, NULL, &size, data);
+		if (ret) {
+			pr_err("failed to get var data\n");
+			goto free_data;
+		}
+
+		ret = acpi_load_table(data);
+		if (ret) {
+			pr_err("failed to load table: %d\n", ret);
+			goto free_data;
+		}
+
+		goto free_entry;
+
+free_data:
+		kfree(data);
+
+free_entry:
+		kfree(entry);
+	}
+
+	return ret;
+}
+#else
+static inline int efivar_ssdt_load(void) { return 0; }
+#endif
+
 /*
  * We register the efi subsystem with the firmware subsystem and the
  * efivars subsystem with the efi subsystem, if the system was booted with
@@ -218,6 +311,9 @@ static int __init efisubsys_init(void)
 	if (error)
 		goto err_put;
 
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
+		efivar_ssdt_load();
+
 	error = sysfs_create_group(efi_kobj, &efi_subsys_attr_group);
 	if (error) {
 		pr_err("efi: Sysfs attribute export failed with error %d.\n",
@@ -472,12 +568,14 @@ device_initcall(efi_load_efivars);
 		FIELD_SIZEOF(struct efi_fdt_params, field) \
 	}
 
-static __initdata struct {
+struct params {
 	const char name[32];
 	const char propname[32];
 	int offset;
 	int size;
-} dt_params[] = {
+};
+
+static __initdata struct params fdt_params[] = {
 	UEFI_PARAM("System Table", "linux,uefi-system-table", system_table),
 	UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap),
 	UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size),
@@ -485,44 +583,91 @@ static __initdata struct {
 	UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver)
 };
 
+static __initdata struct params xen_fdt_params[] = {
+	UEFI_PARAM("System Table", "xen,uefi-system-table", system_table),
+	UEFI_PARAM("MemMap Address", "xen,uefi-mmap-start", mmap),
+	UEFI_PARAM("MemMap Size", "xen,uefi-mmap-size", mmap_size),
+	UEFI_PARAM("MemMap Desc. Size", "xen,uefi-mmap-desc-size", desc_size),
+	UEFI_PARAM("MemMap Desc. Version", "xen,uefi-mmap-desc-ver", desc_ver)
+};
+
+#define EFI_FDT_PARAMS_SIZE	ARRAY_SIZE(fdt_params)
+
+static __initdata struct {
+	const char *uname;
+	const char *subnode;
+	struct params *params;
+} dt_params[] = {
+	{ "hypervisor", "uefi", xen_fdt_params },
+	{ "chosen", NULL, fdt_params },
+};
+
 struct param_info {
 	int found;
 	void *params;
+	const char *missing;
 };
 
-static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
-				       int depth, void *data)
+static int __init __find_uefi_params(unsigned long node,
+				     struct param_info *info,
+				     struct params *params)
 {
-	struct param_info *info = data;
 	const void *prop;
 	void *dest;
 	u64 val;
 	int i, len;
 
-	if (depth != 1 || strcmp(uname, "chosen") != 0)
-		return 0;
-
-	for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
-		prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len);
-		if (!prop)
+	for (i = 0; i < EFI_FDT_PARAMS_SIZE; i++) {
+		prop = of_get_flat_dt_prop(node, params[i].propname, &len);
+		if (!prop) {
+			info->missing = params[i].name;
 			return 0;
-		dest = info->params + dt_params[i].offset;
+		}
+
+		dest = info->params + params[i].offset;
 		info->found++;
 
 		val = of_read_number(prop, len / sizeof(u32));
 
-		if (dt_params[i].size == sizeof(u32))
+		if (params[i].size == sizeof(u32))
 			*(u32 *)dest = val;
 		else
 			*(u64 *)dest = val;
 
 		if (efi_enabled(EFI_DBG))
-			pr_info("  %s: 0x%0*llx\n", dt_params[i].name,
-				dt_params[i].size * 2, val);
+			pr_info("  %s: 0x%0*llx\n", params[i].name,
+				params[i].size * 2, val);
 	}
+
 	return 1;
 }
 
+static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
+				       int depth, void *data)
+{
+	struct param_info *info = data;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
+		const char *subnode = dt_params[i].subnode;
+
+		if (depth != 1 || strcmp(uname, dt_params[i].uname) != 0) {
+			info->missing = dt_params[i].params[0].name;
+			continue;
+		}
+
+		if (subnode) {
+			node = of_get_flat_dt_subnode_by_name(node, subnode);
+			if (node < 0)
+				return 0;
+		}
+
+		return __find_uefi_params(node, info, dt_params[i].params);
+	}
+
+	return 0;
+}
+
 int __init efi_get_fdt_params(struct efi_fdt_params *params)
 {
 	struct param_info info;
@@ -538,7 +683,7 @@ int __init efi_get_fdt_params(struct efi_fdt_params *params)
 		pr_info("UEFI not found.\n");
 	else if (!ret)
 		pr_err("Can't find '%s' in device tree!\n",
-		       dt_params[info.found].name);
+		       info.missing);
 
 	return ret;
 }
diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c
index 8dd0c7085e59..503bbe2a9d49 100644
--- a/drivers/firmware/efi/efibc.c
+++ b/drivers/firmware/efi/efibc.c
@@ -37,13 +37,13 @@ static int efibc_set_variable(const char *name, const char *value)
 	size_t size = (strlen(value) + 1) * sizeof(efi_char16_t);
 
 	if (size > sizeof(entry->var.Data)) {
-		pr_err("value is too large");
+		pr_err("value is too large (%zu bytes) for '%s' EFI variable\n", size, name);
 		return -EINVAL;
 	}
 
 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry) {
-		pr_err("failed to allocate efivar entry");
+		pr_err("failed to allocate efivar entry for '%s' EFI variable\n", name);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 23bef6bb73ee..41958774cde3 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -22,7 +22,16 @@
 #include <linux/stringify.h>
 #include <asm/efi.h>
 
-static void efi_call_virt_check_flags(unsigned long flags, const char *call)
+/*
+ * Wrap around the new efi_call_virt_generic() macros so that the
+ * code doesn't get too cluttered:
+ */
+#define efi_call_virt(f, args...)   \
+	efi_call_virt_pointer(efi.systab->runtime, f, args)
+#define __efi_call_virt(f, args...) \
+	__efi_call_virt_pointer(efi.systab->runtime, f, args)
+
+void efi_call_virt_check_flags(unsigned long flags, const char *call)
 {
 	unsigned long cur_flags, mismatch;
 
@@ -38,48 +47,6 @@ static void efi_call_virt_check_flags(unsigned long flags, const char *call)
 	local_irq_restore(flags);
 }
 
-/*
- * Arch code can implement the following three template macros, avoiding
- * reptition for the void/non-void return cases of {__,}efi_call_virt:
- *
- *  * arch_efi_call_virt_setup
- *
- *    Sets up the environment for the call (e.g. switching page tables,
- *    allowing kernel-mode use of floating point, if required).
- *
- *  * arch_efi_call_virt
- *
- *    Performs the call. The last expression in the macro must be the call
- *    itself, allowing the logic to be shared by the void and non-void
- *    cases.
- *
- *  * arch_efi_call_virt_teardown
- *
- *    Restores the usual kernel environment once the call has returned.
- */
-
-#define efi_call_virt(f, args...)					\
-({									\
-	efi_status_t __s;						\
-	unsigned long flags;						\
-	arch_efi_call_virt_setup();					\
-	local_save_flags(flags);					\
-	__s = arch_efi_call_virt(f, args);				\
-	efi_call_virt_check_flags(flags, __stringify(f));		\
-	arch_efi_call_virt_teardown();					\
-	__s;								\
-})
-
-#define __efi_call_virt(f, args...)					\
-({									\
-	unsigned long flags;						\
-	arch_efi_call_virt_setup();					\
-	local_save_flags(flags);					\
-	arch_efi_call_virt(f, args);					\
-	efi_call_virt_check_flags(flags, __stringify(f));		\
-	arch_efi_call_virt_teardown();					\
-})
-
 /*
  * According to section 7.1 of the UEFI spec, Runtime Services are not fully
  * reentrant, and there are particular combinations of calls that need to be
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index 03e04582791c..8263429e21b8 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) "psci: " fmt
 
+#include <linux/acpi.h>
 #include <linux/arm-smccc.h>
 #include <linux/cpuidle.h>
 #include <linux/errno.h>
@@ -256,13 +257,6 @@ static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
 	u32 *psci_states;
 	struct device_node *state_node;
 
-	/*
-	 * If the PSCI cpu_suspend function hook has not been initialized
-	 * idle states must not be enabled, so bail out
-	 */
-	if (!psci_ops.cpu_suspend)
-		return -EOPNOTSUPP;
-
 	/* Count idle states */
 	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
 					      count))) {
@@ -310,11 +304,69 @@ free_mem:
 	return ret;
 }
 
+#ifdef CONFIG_ACPI
+#include <acpi/processor.h>
+
+static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu)
+{
+	int i, count;
+	u32 *psci_states;
+	struct acpi_lpi_state *lpi;
+	struct acpi_processor *pr = per_cpu(processors, cpu);
+
+	if (unlikely(!pr || !pr->flags.has_lpi))
+		return -EINVAL;
+
+	count = pr->power.count - 1;
+	if (count <= 0)
+		return -ENODEV;
+
+	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+	if (!psci_states)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		u32 state;
+
+		lpi = &pr->power.lpi_states[i + 1];
+		/*
+		 * Only bits[31:0] represent a PSCI power_state while
+		 * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification
+		 */
+		state = lpi->address;
+		if (!psci_power_state_is_valid(state)) {
+			pr_warn("Invalid PSCI power state %#x\n", state);
+			kfree(psci_states);
+			return -EINVAL;
+		}
+		psci_states[i] = state;
+	}
+	/* Idle states parsed correctly, initialize per-cpu pointer */
+	per_cpu(psci_power_state, cpu) = psci_states;
+	return 0;
+}
+#else
+static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu)
+{
+	return -EINVAL;
+}
+#endif
+
 int psci_cpu_init_idle(unsigned int cpu)
 {
 	struct device_node *cpu_node;
 	int ret;
 
+	/*
+	 * If the PSCI cpu_suspend function hook has not been initialized
+	 * idle states must not be enabled, so bail out
+	 */
+	if (!psci_ops.cpu_suspend)
+		return -EOPNOTSUPP;
+
+	if (!acpi_disabled)
+		return psci_acpi_cpu_init_idle(cpu);
+
 	cpu_node = of_get_cpu_node(cpu, NULL);
 	if (!cpu_node)
 		return -ENODEV;
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index cebcb405812e..98dd47a30fc7 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -49,7 +49,7 @@ config GPIO_DEVRES
 
 config OF_GPIO
 	def_bool y
-	depends on OF || COMPILE_TEST
+	depends on OF
 
 config GPIO_ACPI
 	def_bool y
@@ -250,7 +250,7 @@ config GPIO_LOONGSON
 	  driver for GPIO functionality on Loongson-2F/3A/3B processors.
 
 config GPIO_LPC18XX
-	bool "NXP LPC18XX/43XX GPIO support"
+	tristate "NXP LPC18XX/43XX GPIO support"
 	default y if ARCH_LPC18XX
 	depends on OF_GPIO && (ARCH_LPC18XX || COMPILE_TEST)
 	help
@@ -402,9 +402,12 @@ config GPIO_TB10X
 	select OF_GPIO
 
 config GPIO_TEGRA
-	bool
-	default y
+	bool "NVIDIA Tegra GPIO support"
+	default ARCH_TEGRA
 	depends on ARCH_TEGRA || COMPILE_TEST
+	depends on OF
+	help
+	  Say yes here to support GPIO pins on NVIDIA Tegra SoCs.
 
 config GPIO_TS4800
 	tristate "TS-4800 DIO blocks and compatibles"
@@ -871,6 +874,15 @@ config GPIO_LP3943
 	  LP3943 can be used as a GPIO expander which provides up to 16 GPIOs.
 	  Open drain outputs are required for this usage.
 
+config GPIO_MAX77620
+	tristate "GPIO support for PMIC MAX77620 and MAX20024"
+	depends on MFD_MAX77620
+	help
+	  GPIO driver for MAX77620 and MAX20024 PMIC from Maxim Semiconductor.
+	  MAX77620 PMIC has 8 pins that can be configured as GPIOs. The
+	  driver also provides interrupt support for each of the gpios.
+	  Say yes here to enable the max77620 to be used as gpio controller.
+
 config GPIO_MSIC
 	bool "Intel MSIC mixed signal gpio support"
 	depends on MFD_INTEL_MSIC
@@ -1026,11 +1038,18 @@ config GPIO_BT8XX
 	  If unsure, say N.
 
 config GPIO_INTEL_MID
-	bool "Intel Mid GPIO support"
-	depends on X86
+	bool "Intel MID GPIO support"
+	depends on X86_INTEL_MID
+	select GPIOLIB_IRQCHIP
+	help
+	  Say Y here to support Intel MID GPIO.
+
+config GPIO_MERRIFIELD
+	tristate "Intel Merrifield GPIO support"
+	depends on X86_INTEL_MID
 	select GPIOLIB_IRQCHIP
 	help
-	  Say Y here to support Intel Mid GPIO.
+	  Say Y here to support Intel Merrifield GPIO.
 
 config GPIO_ML_IOH
 	tristate "OKI SEMICONDUCTOR ML7213 IOH GPIO support"
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 991598ea3fba..2a035ed8f168 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -61,8 +61,10 @@ obj-$(CONFIG_GPIO_MAX730X)	+= gpio-max730x.o
 obj-$(CONFIG_GPIO_MAX7300)	+= gpio-max7300.o
 obj-$(CONFIG_GPIO_MAX7301)	+= gpio-max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= gpio-max732x.o
+obj-$(CONFIG_GPIO_MAX77620)	+= gpio-max77620.o
 obj-$(CONFIG_GPIO_MB86S7X)	+= gpio-mb86s7x.o
 obj-$(CONFIG_GPIO_MENZ127)	+= gpio-menz127.o
+obj-$(CONFIG_GPIO_MERRIFIELD)	+= gpio-merrifield.o
 obj-$(CONFIG_GPIO_MC33880)	+= gpio-mc33880.o
 obj-$(CONFIG_GPIO_MC9S08DZ60)	+= gpio-mc9s08dz60.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= gpio-mcp23s08.o
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 6c75c83baf5a..2d2763ea1a68 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -247,6 +247,7 @@ static int idi_48_probe(struct device *dev, unsigned int id)
 	idi48gpio->irq = irq[id];
 
 	spin_lock_init(&idi48gpio->lock);
+	spin_lock_init(&idi48gpio->ack_lock);
 
 	dev_set_drvdata(dev, idi48gpio);
 
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index 80f9ddf13343..a6607faf2fdf 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -35,13 +35,8 @@ struct gen_74x164_chip {
 
 static int __gen_74x164_write_config(struct gen_74x164_chip *chip)
 {
-	struct spi_transfer xfer = {
-		.tx_buf = chip->buffer,
-		.len = chip->registers,
-	};
-
-	return spi_sync_transfer(to_spi_device(chip->gpio_chip.parent),
-				 &xfer, 1);
+	return spi_write(to_spi_device(chip->gpio_chip.parent), chip->buffer,
+			 chip->registers);
 }
 
 static int gen_74x164_get_value(struct gpio_chip *gc, unsigned offset)
diff --git a/drivers/gpio/gpio-clps711x.c b/drivers/gpio/gpio-clps711x.c
index 5a690256af9b..52fd63f02134 100644
--- a/drivers/gpio/gpio-clps711x.c
+++ b/drivers/gpio/gpio-clps711x.c
@@ -20,8 +20,12 @@ static int clps711x_gpio_probe(struct platform_device *pdev)
 	void __iomem *dat, *dir;
 	struct gpio_chip *gc;
 	struct resource *res;
-	int err, id = np ? of_alias_get_id(np, "gpio") : pdev->id;
+	int err, id;
 
+	if (!np)
+		return -ENODEV;
+
+	id = of_alias_get_id(np, "gpio");
 	if ((id < 0) || (id > 4))
 		return -ENODEV;
 
@@ -63,7 +67,7 @@ static int clps711x_gpio_probe(struct platform_device *pdev)
 		break;
 	}
 
-	gc->base = id * 8;
+	gc->base = -1;
 	gc->owner = THIS_MODULE;
 	platform_set_drvdata(pdev, gc);
 
@@ -71,7 +75,7 @@ static int clps711x_gpio_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id __maybe_unused clps711x_gpio_ids[] = {
-	{ .compatible = "cirrus,clps711x-gpio" },
+	{ .compatible = "cirrus,ep7209-gpio" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, clps711x_gpio_ids);
diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index 34779bb375de..6193f62c0df4 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -486,6 +486,7 @@ dwapb_gpio_get_pdata(struct device *dev)
 		    pp->idx >= DWAPB_MAX_PORTS) {
 			dev_err(dev,
 				"missing/invalid port index for port%d\n", i);
+			fwnode_handle_put(fwnode);
 			return ERR_PTR(-EINVAL);
 		}
 
diff --git a/drivers/gpio/gpio-f7188x.c b/drivers/gpio/gpio-f7188x.c
index 05aa538c3767..600be8418707 100644
--- a/drivers/gpio/gpio-f7188x.c
+++ b/drivers/gpio/gpio-f7188x.c
@@ -125,6 +125,7 @@ static inline void superio_exit(int base)
  * GPIO chip.
  */
 
+static int f7188x_gpio_get_direction(struct gpio_chip *chip, unsigned offset);
 static int f7188x_gpio_direction_in(struct gpio_chip *chip, unsigned offset);
 static int f7188x_gpio_get(struct gpio_chip *chip, unsigned offset);
 static int f7188x_gpio_direction_out(struct gpio_chip *chip,
@@ -139,6 +140,7 @@ static int f7188x_gpio_set_single_ended(struct gpio_chip *gc,
 		.chip = {						\
 			.label            = DRVNAME,			\
 			.owner            = THIS_MODULE,		\
+			.get_direction    = f7188x_gpio_get_direction,	\
 			.direction_input  = f7188x_gpio_direction_in,	\
 			.get              = f7188x_gpio_get,		\
 			.direction_output = f7188x_gpio_direction_out,	\
@@ -209,6 +211,26 @@ static struct f7188x_gpio_bank f81866_gpio_bank[] = {
 	F7188X_GPIO_BANK(80, 8, 0x88),
 };
 
+static int f7188x_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+{
+	int err;
+	struct f7188x_gpio_bank *bank =
+		container_of(chip, struct f7188x_gpio_bank, chip);
+	struct f7188x_sio *sio = bank->data->sio;
+	u8 dir;
+
+	err = superio_enter(sio->addr);
+	if (err)
+		return err;
+	superio_select(sio->addr, SIO_LD_GPIO);
+
+	dir = superio_inb(sio->addr, gpio_dir(bank->regbase));
+
+	superio_exit(sio->addr);
+
+	return !(dir & 1 << offset);
+}
+
 static int f7188x_gpio_direction_in(struct gpio_chip *chip, unsigned offset)
 {
 	int err;
diff --git a/drivers/gpio/gpio-intel-mid.c b/drivers/gpio/gpio-intel-mid.c
index cdaba13cb8e8..164de64b11fc 100644
--- a/drivers/gpio/gpio-intel-mid.c
+++ b/drivers/gpio/gpio-intel-mid.c
@@ -1,7 +1,7 @@
 /*
  * Intel MID GPIO driver
  *
- * Copyright (c) 2008-2014 Intel Corporation.
+ * Copyright (c) 2008-2014,2016 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -17,21 +17,20 @@
  * Moorestown platform Langwell chip.
  * Medfield platform Penwell chip.
  * Clovertrail platform Cloverview chip.
- * Merrifield platform Tangier chip.
  */
 
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/platform_device.h>
-#include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/stddef.h>
-#include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/gpio/driver.h>
-#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
 
 #define INTEL_MID_IRQ_TYPE_EDGE		(1 << 0)
 #define INTEL_MID_IRQ_TYPE_LEVEL	(1 << 1)
@@ -64,10 +63,6 @@ enum GPIO_REG {
 /* intel_mid gpio driver data */
 struct intel_mid_gpio_ddata {
 	u16 ngpio;		/* number of gpio pins */
-	u32 gplr_offset;	/* offset of first GPLR register from base */
-	u32 flis_base;		/* base address of FLIS registers */
-	u32 flis_len;		/* length of FLIS registers */
-	u32 (*get_flis_offset)(int gpio);
 	u32 chip_irq_type;	/* chip interrupt type */
 };
 
@@ -252,15 +247,6 @@ static const struct intel_mid_gpio_ddata gpio_cloverview_core = {
 	.chip_irq_type = INTEL_MID_IRQ_TYPE_EDGE,
 };
 
-static const struct intel_mid_gpio_ddata gpio_tangier = {
-	.ngpio = 192,
-	.gplr_offset = 4,
-	.flis_base = 0xff0c0000,
-	.flis_len = 0x8000,
-	.get_flis_offset = NULL,
-	.chip_irq_type = INTEL_MID_IRQ_TYPE_EDGE,
-};
-
 static const struct pci_device_id intel_gpio_ids[] = {
 	{
 		/* Lincroft */
@@ -287,11 +273,6 @@ static const struct pci_device_id intel_gpio_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x08f7),
 		.driver_data = (kernel_ulong_t)&gpio_cloverview_core,
 	},
-	{
-		/* Tangier */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x1199),
-		.driver_data = (kernel_ulong_t)&gpio_tangier,
-	},
 	{ 0 }
 };
 MODULE_DEVICE_TABLE(pci, intel_gpio_ids);
@@ -401,7 +382,7 @@ static int intel_gpio_probe(struct pci_dev *pdev,
 	spin_lock_init(&priv->lock);
 
 	pci_set_drvdata(pdev, priv);
-	retval = gpiochip_add_data(&priv->chip, priv);
+	retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
 	if (retval) {
 		dev_err(&pdev->dev, "gpiochip_add error %d\n", retval);
 		return retval;
diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c
index 9df015e85ad9..fbd393b46ce0 100644
--- a/drivers/gpio/gpio-lynxpoint.c
+++ b/drivers/gpio/gpio-lynxpoint.c
@@ -383,7 +383,6 @@ static int lp_gpio_probe(struct platform_device *pdev)
 					   handle_simple_irq, IRQ_TYPE_NONE);
 		if (ret) {
 			dev_err(dev, "failed to add irqchip\n");
-			gpiochip_remove(gc);
 			return ret;
 		}
 
diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c
new file mode 100644
index 000000000000..b46b436cb97f
--- /dev/null
+++ b/drivers/gpio/gpio-max77620.c
@@ -0,0 +1,315 @@
+/*
+ * MAXIM MAX77620 GPIO driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/max77620.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#define GPIO_REG_ADDR(offset) (MAX77620_REG_GPIO0 + offset)
+
+struct max77620_gpio {
+	struct gpio_chip	gpio_chip;
+	struct regmap		*rmap;
+	struct device		*dev;
+	int			gpio_irq;
+	int			irq_base;
+	int			gpio_base;
+};
+
+static const struct regmap_irq max77620_gpio_irqs[] = {
+	[0] = {
+		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE0,
+		.type_rising_mask = MAX77620_CNFG_GPIO_INT_RISING,
+		.type_falling_mask = MAX77620_CNFG_GPIO_INT_FALLING,
+		.reg_offset = 0,
+		.type_reg_offset = 0,
+	},
+	[1] = {
+		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE1,
+		.type_rising_mask = MAX77620_CNFG_GPIO_INT_RISING,
+		.type_falling_mask = MAX77620_CNFG_GPIO_INT_FALLING,
+		.reg_offset = 0,
+		.type_reg_offset = 1,
+	},
+	[2] = {
+		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE2,
+		.type_rising_mask = MAX77620_CNFG_GPIO_INT_RISING,
+		.type_falling_mask = MAX77620_CNFG_GPIO_INT_FALLING,
+		.reg_offset = 0,
+		.type_reg_offset = 2,
+	},
+	[3] = {
+		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE3,
+		.type_rising_mask = MAX77620_CNFG_GPIO_INT_RISING,
+		.type_falling_mask = MAX77620_CNFG_GPIO_INT_FALLING,
+		.reg_offset = 0,
+		.type_reg_offset = 3,
+	},
+	[4] = {
+		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE4,
+		.type_rising_mask = MAX77620_CNFG_GPIO_INT_RISING,
+		.type_falling_mask = MAX77620_CNFG_GPIO_INT_FALLING,
+		.reg_offset = 0,
+		.type_reg_offset = 4,
+	},
+	[5] = {
+		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE5,
+		.type_rising_mask = MAX77620_CNFG_GPIO_INT_RISING,
+		.type_falling_mask = MAX77620_CNFG_GPIO_INT_FALLING,
+		.reg_offset = 0,
+		.type_reg_offset = 5,
+	},
+	[6] = {
+		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE6,
+		.type_rising_mask = MAX77620_CNFG_GPIO_INT_RISING,
+		.type_falling_mask = MAX77620_CNFG_GPIO_INT_FALLING,
+		.reg_offset = 0,
+		.type_reg_offset = 6,
+	},
+	[7] = {
+		.mask = MAX77620_IRQ_LVL2_GPIO_EDGE7,
+		.type_rising_mask = MAX77620_CNFG_GPIO_INT_RISING,
+		.type_falling_mask = MAX77620_CNFG_GPIO_INT_FALLING,
+		.reg_offset = 0,
+		.type_reg_offset = 7,
+	},
+};
+
+static struct regmap_irq_chip max77620_gpio_irq_chip = {
+	.name = "max77620-gpio",
+	.irqs = max77620_gpio_irqs,
+	.num_irqs = ARRAY_SIZE(max77620_gpio_irqs),
+	.num_regs = 1,
+	.num_type_reg = 8,
+	.irq_reg_stride = 1,
+	.type_reg_stride = 1,
+	.status_base = MAX77620_REG_IRQ_LVL2_GPIO,
+	.type_base = MAX77620_REG_GPIO0,
+};
+
+static int max77620_gpio_dir_input(struct gpio_chip *gc, unsigned int offset)
+{
+	struct max77620_gpio *mgpio = gpiochip_get_data(gc);
+	int ret;
+
+	ret = regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
+				 MAX77620_CNFG_GPIO_DIR_MASK,
+				 MAX77620_CNFG_GPIO_DIR_INPUT);
+	if (ret < 0)
+		dev_err(mgpio->dev, "CNFG_GPIOx dir update failed: %d\n", ret);
+
+	return ret;
+}
+
+static int max77620_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+	struct max77620_gpio *mgpio = gpiochip_get_data(gc);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(mgpio->rmap, GPIO_REG_ADDR(offset), &val);
+	if (ret < 0) {
+		dev_err(mgpio->dev, "CNFG_GPIOx read failed: %d\n", ret);
+		return ret;
+	}
+
+	if  (val & MAX77620_CNFG_GPIO_DIR_MASK)
+		return !!(val & MAX77620_CNFG_GPIO_INPUT_VAL_MASK);
+	else
+		return !!(val & MAX77620_CNFG_GPIO_OUTPUT_VAL_MASK);
+}
+
+static int max77620_gpio_dir_output(struct gpio_chip *gc, unsigned int offset,
+				    int value)
+{
+	struct max77620_gpio *mgpio = gpiochip_get_data(gc);
+	u8 val;
+	int ret;
+
+	val = (value) ? MAX77620_CNFG_GPIO_OUTPUT_VAL_HIGH :
+				MAX77620_CNFG_GPIO_OUTPUT_VAL_LOW;
+
+	ret = regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
+				 MAX77620_CNFG_GPIO_OUTPUT_VAL_MASK, val);
+	if (ret < 0) {
+		dev_err(mgpio->dev, "CNFG_GPIOx val update failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
+				 MAX77620_CNFG_GPIO_DIR_MASK,
+				 MAX77620_CNFG_GPIO_DIR_OUTPUT);
+	if (ret < 0)
+		dev_err(mgpio->dev, "CNFG_GPIOx dir update failed: %d\n", ret);
+
+	return ret;
+}
+
+static int max77620_gpio_set_debounce(struct gpio_chip *gc,
+				      unsigned int offset,
+				      unsigned int debounce)
+{
+	struct max77620_gpio *mgpio = gpiochip_get_data(gc);
+	u8 val;
+	int ret;
+
+	switch (debounce) {
+	case 0:
+		val = MAX77620_CNFG_GPIO_DBNC_None;
+		break;
+	case 1 ... 8:
+		val = MAX77620_CNFG_GPIO_DBNC_8ms;
+		break;
+	case 9 ... 16:
+		val = MAX77620_CNFG_GPIO_DBNC_16ms;
+		break;
+	case 17 ... 32:
+		val = MAX77620_CNFG_GPIO_DBNC_32ms;
+		break;
+	default:
+		dev_err(mgpio->dev, "Illegal value %u\n", debounce);
+		return -EINVAL;
+	}
+
+	ret = regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
+				 MAX77620_CNFG_GPIO_DBNC_MASK, val);
+	if (ret < 0)
+		dev_err(mgpio->dev, "CNFG_GPIOx_DBNC update failed: %d\n", ret);
+
+	return ret;
+}
+
+static void max77620_gpio_set(struct gpio_chip *gc, unsigned int offset,
+			      int value)
+{
+	struct max77620_gpio *mgpio = gpiochip_get_data(gc);
+	u8 val;
+	int ret;
+
+	val = (value) ? MAX77620_CNFG_GPIO_OUTPUT_VAL_HIGH :
+				MAX77620_CNFG_GPIO_OUTPUT_VAL_LOW;
+
+	ret = regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
+				 MAX77620_CNFG_GPIO_OUTPUT_VAL_MASK, val);
+	if (ret < 0)
+		dev_err(mgpio->dev, "CNFG_GPIO_OUT update failed: %d\n", ret);
+}
+
+static int max77620_gpio_set_single_ended(struct gpio_chip *gc,
+					  unsigned int offset,
+					  enum single_ended_mode mode)
+{
+	struct max77620_gpio *mgpio = gpiochip_get_data(gc);
+
+	switch (mode) {
+	case LINE_MODE_OPEN_DRAIN:
+		return regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
+					  MAX77620_CNFG_GPIO_DRV_MASK,
+					  MAX77620_CNFG_GPIO_DRV_OPENDRAIN);
+	case LINE_MODE_PUSH_PULL:
+		return regmap_update_bits(mgpio->rmap, GPIO_REG_ADDR(offset),
+					  MAX77620_CNFG_GPIO_DRV_MASK,
+					  MAX77620_CNFG_GPIO_DRV_PUSHPULL);
+	default:
+		break;
+	}
+
+	return -ENOTSUPP;
+}
+
+static int max77620_gpio_to_irq(struct gpio_chip *gc, unsigned int offset)
+{
+	struct max77620_gpio *mgpio = gpiochip_get_data(gc);
+	struct max77620_chip *chip = dev_get_drvdata(mgpio->dev->parent);
+
+	return regmap_irq_get_virq(chip->gpio_irq_data, offset);
+}
+
+static int max77620_gpio_probe(struct platform_device *pdev)
+{
+	struct max77620_chip *chip =  dev_get_drvdata(pdev->dev.parent);
+	struct max77620_gpio *mgpio;
+	int gpio_irq;
+	int ret;
+
+	gpio_irq = platform_get_irq(pdev, 0);
+	if (gpio_irq <= 0) {
+		dev_err(&pdev->dev, "GPIO irq not available %d\n", gpio_irq);
+		return -ENODEV;
+	}
+
+	mgpio = devm_kzalloc(&pdev->dev, sizeof(*mgpio), GFP_KERNEL);
+	if (!mgpio)
+		return -ENOMEM;
+
+	mgpio->rmap = chip->rmap;
+	mgpio->dev = &pdev->dev;
+	mgpio->gpio_irq = gpio_irq;
+
+	mgpio->gpio_chip.label = pdev->name;
+	mgpio->gpio_chip.parent = &pdev->dev;
+	mgpio->gpio_chip.direction_input = max77620_gpio_dir_input;
+	mgpio->gpio_chip.get = max77620_gpio_get;
+	mgpio->gpio_chip.direction_output = max77620_gpio_dir_output;
+	mgpio->gpio_chip.set_debounce = max77620_gpio_set_debounce;
+	mgpio->gpio_chip.set = max77620_gpio_set;
+	mgpio->gpio_chip.set_single_ended = max77620_gpio_set_single_ended;
+	mgpio->gpio_chip.to_irq = max77620_gpio_to_irq;
+	mgpio->gpio_chip.ngpio = MAX77620_GPIO_NR;
+	mgpio->gpio_chip.can_sleep = 1;
+	mgpio->gpio_chip.base = -1;
+	mgpio->irq_base = -1;
+#ifdef CONFIG_OF_GPIO
+	mgpio->gpio_chip.of_node = pdev->dev.parent->of_node;
+#endif
+
+	platform_set_drvdata(pdev, mgpio);
+
+	ret = devm_gpiochip_add_data(&pdev->dev, &mgpio->gpio_chip, mgpio);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "gpio_init: Failed to add max77620_gpio\n");
+		return ret;
+	}
+
+	mgpio->gpio_base = mgpio->gpio_chip.base;
+	ret = devm_regmap_add_irq_chip(&pdev->dev, chip->rmap, mgpio->gpio_irq,
+				       IRQF_ONESHOT, mgpio->irq_base,
+				       &max77620_gpio_irq_chip,
+				       &chip->gpio_irq_data);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to add gpio irq_chip %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct platform_device_id max77620_gpio_devtype[] = {
+	{ .name = "max77620-gpio", },
+	{},
+};
+MODULE_DEVICE_TABLE(platform, max77620_gpio_devtype);
+
+static struct platform_driver max77620_gpio_driver = {
+	.driver.name	= "max77620-gpio",
+	.probe		= max77620_gpio_probe,
+	.id_table	= max77620_gpio_devtype,
+};
+
+module_platform_driver(max77620_gpio_driver);
+
+MODULE_DESCRIPTION("GPIO interface for MAX77620 and MAX20024 PMIC");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_AUTHOR("Chaitanya Bandi <bandik@nvidia.com>");
+MODULE_ALIAS("platform:max77620-gpio");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-menz127.c b/drivers/gpio/gpio-menz127.c
index cc103aff45e4..a1210e330571 100644
--- a/drivers/gpio/gpio-menz127.c
+++ b/drivers/gpio/gpio-menz127.c
@@ -187,7 +187,6 @@ MODULE_DEVICE_TABLE(mcb, men_z127_ids);
 static struct mcb_driver men_z127_driver = {
 	.driver = {
 		.name = "z127-gpio",
-		.owner = THIS_MODULE,
 	},
 	.probe = men_z127_probe,
 	.remove = men_z127_remove,
diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c
new file mode 100644
index 000000000000..45b51278b8ee
--- /dev/null
+++ b/drivers/gpio/gpio-merrifield.c
@@ -0,0 +1,444 @@
+/*
+ * Intel Merrifield SoC GPIO driver
+ *
+ * Copyright (c) 2016 Intel Corporation.
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/gpio/driver.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pinctrl/consumer.h>
+
+#define GCCR		0x000	/* controller configuration */
+#define GPLR		0x004	/* pin level r/o */
+#define GPDR		0x01c	/* pin direction */
+#define GPSR		0x034	/* pin set w/o */
+#define GPCR		0x04c	/* pin clear w/o */
+#define GRER		0x064	/* rising edge detect */
+#define GFER		0x07c	/* falling edge detect */
+#define GFBR		0x094	/* glitch filter bypass */
+#define GIMR		0x0ac	/* interrupt mask */
+#define GISR		0x0c4	/* interrupt source */
+#define GITR		0x300	/* input type */
+#define GLPR		0x318	/* level input polarity */
+#define GWMR		0x400	/* wake mask */
+#define GWSR		0x418	/* wake source */
+#define GSIR		0xc00	/* secure input */
+
+/* Intel Merrifield has 192 GPIO pins */
+#define MRFLD_NGPIO	192
+
+struct mrfld_gpio_pinrange {
+	unsigned int gpio_base;
+	unsigned int pin_base;
+	unsigned int npins;
+};
+
+#define GPIO_PINRANGE(gstart, gend, pstart)		\
+	{						\
+		.gpio_base = (gstart),			\
+		.pin_base = (pstart),			\
+		.npins = (gend) - (gstart) + 1,		\
+	}
+
+struct mrfld_gpio {
+	struct gpio_chip	chip;
+	void __iomem		*reg_base;
+	raw_spinlock_t		lock;
+	struct device		*dev;
+};
+
+static const struct mrfld_gpio_pinrange mrfld_gpio_ranges[] = {
+	GPIO_PINRANGE(0, 11, 146),
+	GPIO_PINRANGE(12, 13, 144),
+	GPIO_PINRANGE(14, 15, 35),
+	GPIO_PINRANGE(16, 16, 164),
+	GPIO_PINRANGE(17, 18, 105),
+	GPIO_PINRANGE(19, 22, 101),
+	GPIO_PINRANGE(23, 30, 107),
+	GPIO_PINRANGE(32, 43, 67),
+	GPIO_PINRANGE(44, 63, 195),
+	GPIO_PINRANGE(64, 67, 140),
+	GPIO_PINRANGE(68, 69, 165),
+	GPIO_PINRANGE(70, 71, 65),
+	GPIO_PINRANGE(72, 76, 228),
+	GPIO_PINRANGE(77, 86, 37),
+	GPIO_PINRANGE(87, 87, 48),
+	GPIO_PINRANGE(88, 88, 47),
+	GPIO_PINRANGE(89, 96, 49),
+	GPIO_PINRANGE(97, 97, 34),
+	GPIO_PINRANGE(102, 119, 83),
+	GPIO_PINRANGE(120, 123, 79),
+	GPIO_PINRANGE(124, 135, 115),
+	GPIO_PINRANGE(137, 142, 158),
+	GPIO_PINRANGE(154, 163, 24),
+	GPIO_PINRANGE(164, 176, 215),
+	GPIO_PINRANGE(177, 189, 127),
+	GPIO_PINRANGE(190, 191, 178),
+};
+
+static void __iomem *gpio_reg(struct gpio_chip *chip, unsigned int offset,
+			      unsigned int reg_type_offset)
+{
+	struct mrfld_gpio *priv = gpiochip_get_data(chip);
+	u8 reg = offset / 32;
+
+	return priv->reg_base + reg_type_offset + reg * 4;
+}
+
+static int mrfld_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	void __iomem *gplr = gpio_reg(chip, offset, GPLR);
+
+	return !!(readl(gplr) & BIT(offset % 32));
+}
+
+static void mrfld_gpio_set(struct gpio_chip *chip, unsigned int offset,
+			   int value)
+{
+	struct mrfld_gpio *priv = gpiochip_get_data(chip);
+	void __iomem *gpsr, *gpcr;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	if (value) {
+		gpsr = gpio_reg(chip, offset, GPSR);
+		writel(BIT(offset % 32), gpsr);
+	} else {
+		gpcr = gpio_reg(chip, offset, GPCR);
+		writel(BIT(offset % 32), gpcr);
+	}
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int mrfld_gpio_direction_input(struct gpio_chip *chip,
+				      unsigned int offset)
+{
+	struct mrfld_gpio *priv = gpiochip_get_data(chip);
+	void __iomem *gpdr = gpio_reg(chip, offset, GPDR);
+	unsigned long flags;
+	u32 value;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	value = readl(gpdr);
+	value &= ~BIT(offset % 32);
+	writel(value, gpdr);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int mrfld_gpio_direction_output(struct gpio_chip *chip,
+				       unsigned int offset, int value)
+{
+	struct mrfld_gpio *priv = gpiochip_get_data(chip);
+	void __iomem *gpdr = gpio_reg(chip, offset, GPDR);
+	unsigned long flags;
+
+	mrfld_gpio_set(chip, offset, value);
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	value = readl(gpdr);
+	value |= BIT(offset % 32);
+	writel(value, gpdr);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static void mrfld_irq_ack(struct irq_data *d)
+{
+	struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d);
+	u32 gpio = irqd_to_hwirq(d);
+	void __iomem *gisr = gpio_reg(&priv->chip, gpio, GISR);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	writel(BIT(gpio % 32), gisr);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void mrfld_irq_unmask_mask(struct irq_data *d, bool unmask)
+{
+	struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d);
+	u32 gpio = irqd_to_hwirq(d);
+	void __iomem *gimr = gpio_reg(&priv->chip, gpio, GIMR);
+	unsigned long flags;
+	u32 value;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	if (unmask)
+		value = readl(gimr) | BIT(gpio % 32);
+	else
+		value = readl(gimr) & ~BIT(gpio % 32);
+	writel(value, gimr);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void mrfld_irq_mask(struct irq_data *d)
+{
+	mrfld_irq_unmask_mask(d, false);
+}
+
+static void mrfld_irq_unmask(struct irq_data *d)
+{
+	mrfld_irq_unmask_mask(d, true);
+}
+
+static int mrfld_irq_set_type(struct irq_data *d, unsigned int type)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct mrfld_gpio *priv = gpiochip_get_data(gc);
+	u32 gpio = irqd_to_hwirq(d);
+	void __iomem *grer = gpio_reg(&priv->chip, gpio, GRER);
+	void __iomem *gfer = gpio_reg(&priv->chip, gpio, GFER);
+	void __iomem *gitr = gpio_reg(&priv->chip, gpio, GITR);
+	void __iomem *glpr = gpio_reg(&priv->chip, gpio, GLPR);
+	unsigned long flags;
+	u32 value;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	if (type & IRQ_TYPE_EDGE_RISING)
+		value = readl(grer) | BIT(gpio % 32);
+	else
+		value = readl(grer) & ~BIT(gpio % 32);
+	writel(value, grer);
+
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		value = readl(gfer) | BIT(gpio % 32);
+	else
+		value = readl(gfer) & ~BIT(gpio % 32);
+	writel(value, gfer);
+
+	/*
+	 * To prevent glitches from triggering an unintended level interrupt,
+	 * configure GLPR register first and then configure GITR.
+	 */
+	if (type & IRQ_TYPE_LEVEL_LOW)
+		value = readl(glpr) | BIT(gpio % 32);
+	else
+		value = readl(glpr) & ~BIT(gpio % 32);
+	writel(value, glpr);
+
+	if (type & IRQ_TYPE_LEVEL_MASK) {
+		value = readl(gitr) | BIT(gpio % 32);
+		writel(value, gitr);
+
+		irq_set_handler_locked(d, handle_level_irq);
+	} else if (type & IRQ_TYPE_EDGE_BOTH) {
+		value = readl(gitr) & ~BIT(gpio % 32);
+		writel(value, gitr);
+
+		irq_set_handler_locked(d, handle_edge_irq);
+	}
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int mrfld_irq_set_wake(struct irq_data *d, unsigned int on)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct mrfld_gpio *priv = gpiochip_get_data(gc);
+	u32 gpio = irqd_to_hwirq(d);
+	void __iomem *gwmr = gpio_reg(&priv->chip, gpio, GWMR);
+	void __iomem *gwsr = gpio_reg(&priv->chip, gpio, GWSR);
+	unsigned long flags;
+	u32 value;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	/* Clear the existing wake status */
+	writel(BIT(gpio % 32), gwsr);
+
+	if (on)
+		value = readl(gwmr) | BIT(gpio % 32);
+	else
+		value = readl(gwmr) & ~BIT(gpio % 32);
+	writel(value, gwmr);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+
+	dev_dbg(priv->dev, "%sable wake for gpio %u\n", on ? "en" : "dis", gpio);
+	return 0;
+}
+
+static struct irq_chip mrfld_irqchip = {
+	.name		= "gpio-merrifield",
+	.irq_ack	= mrfld_irq_ack,
+	.irq_mask	= mrfld_irq_mask,
+	.irq_unmask	= mrfld_irq_unmask,
+	.irq_set_type	= mrfld_irq_set_type,
+	.irq_set_wake	= mrfld_irq_set_wake,
+};
+
+static void mrfld_irq_handler(struct irq_desc *desc)
+{
+	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
+	struct mrfld_gpio *priv = gpiochip_get_data(gc);
+	struct irq_chip *irqchip = irq_desc_get_chip(desc);
+	unsigned long base, gpio;
+
+	chained_irq_enter(irqchip, desc);
+
+	/* Check GPIO controller to check which pin triggered the interrupt */
+	for (base = 0; base < priv->chip.ngpio; base += 32) {
+		void __iomem *gisr = gpio_reg(&priv->chip, base, GISR);
+		void __iomem *gimr = gpio_reg(&priv->chip, base, GIMR);
+		unsigned long pending, enabled;
+
+		pending = readl(gisr);
+		enabled = readl(gimr);
+
+		/* Only interrupts that are enabled */
+		pending &= enabled;
+
+		for_each_set_bit(gpio, &pending, 32) {
+			unsigned int irq;
+
+			irq = irq_find_mapping(gc->irqdomain, base + gpio);
+			generic_handle_irq(irq);
+		}
+	}
+
+	chained_irq_exit(irqchip, desc);
+}
+
+static void mrfld_irq_init_hw(struct mrfld_gpio *priv)
+{
+	void __iomem *reg;
+	unsigned int base;
+
+	for (base = 0; base < priv->chip.ngpio; base += 32) {
+		/* Clear the rising-edge detect register */
+		reg = gpio_reg(&priv->chip, base, GRER);
+		writel(0, reg);
+		/* Clear the falling-edge detect register */
+		reg = gpio_reg(&priv->chip, base, GFER);
+		writel(0, reg);
+	}
+}
+
+static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	const struct mrfld_gpio_pinrange *range;
+	struct mrfld_gpio *priv;
+	u32 gpio_base, irq_base;
+	void __iomem *base;
+	unsigned int i;
+	int retval;
+
+	retval = pcim_enable_device(pdev);
+	if (retval)
+		return retval;
+
+	retval = pcim_iomap_regions(pdev, BIT(1) | BIT(0), pci_name(pdev));
+	if (retval) {
+		dev_err(&pdev->dev, "I/O memory mapping error\n");
+		return retval;
+	}
+
+	base = pcim_iomap_table(pdev)[1];
+
+	irq_base = readl(base);
+	gpio_base = readl(sizeof(u32) + base);
+
+	/* Release the IO mapping, since we already get the info from BAR1 */
+	pcim_iounmap_regions(pdev, BIT(1));
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&pdev->dev, "can't allocate chip data\n");
+		return -ENOMEM;
+	}
+
+	priv->dev = &pdev->dev;
+	priv->reg_base = pcim_iomap_table(pdev)[0];
+
+	priv->chip.label = dev_name(&pdev->dev);
+	priv->chip.parent = &pdev->dev;
+	priv->chip.request = gpiochip_generic_request;
+	priv->chip.free = gpiochip_generic_free;
+	priv->chip.direction_input = mrfld_gpio_direction_input;
+	priv->chip.direction_output = mrfld_gpio_direction_output;
+	priv->chip.get = mrfld_gpio_get;
+	priv->chip.set = mrfld_gpio_set;
+	priv->chip.base = gpio_base;
+	priv->chip.ngpio = MRFLD_NGPIO;
+	priv->chip.can_sleep = false;
+
+	raw_spin_lock_init(&priv->lock);
+
+	pci_set_drvdata(pdev, priv);
+	retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
+	if (retval) {
+		dev_err(&pdev->dev, "gpiochip_add error %d\n", retval);
+		return retval;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mrfld_gpio_ranges); i++) {
+		range = &mrfld_gpio_ranges[i];
+		retval = gpiochip_add_pin_range(&priv->chip,
+						"pinctrl-merrifield",
+						range->gpio_base,
+						range->pin_base,
+						range->npins);
+		if (retval) {
+			dev_err(&pdev->dev, "failed to add GPIO pin range\n");
+			return retval;
+		}
+	}
+
+	retval = gpiochip_irqchip_add(&priv->chip, &mrfld_irqchip, irq_base,
+				      handle_simple_irq, IRQ_TYPE_NONE);
+	if (retval) {
+		dev_err(&pdev->dev, "could not connect irqchip to gpiochip\n");
+		return retval;
+	}
+
+	mrfld_irq_init_hw(priv);
+
+	gpiochip_set_chained_irqchip(&priv->chip, &mrfld_irqchip, pdev->irq,
+				     mrfld_irq_handler);
+
+	return 0;
+}
+
+static const struct pci_device_id mrfld_gpio_ids[] = {
+	{ PCI_VDEVICE(INTEL, 0x1199) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, mrfld_gpio_ids);
+
+static struct pci_driver mrfld_gpio_driver = {
+	.name		= "gpio-merrifield",
+	.id_table	= mrfld_gpio_ids,
+	.probe		= mrfld_gpio_probe,
+};
+
+module_pci_driver(mrfld_gpio_driver);
+
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
+MODULE_DESCRIPTION("Intel Merrifield SoC GPIO driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index 6c1cb3b8c02c..6ec144baeb11 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -61,6 +61,8 @@ o        `                     ~~~~\___/~~~~    ` controller in FPGA is ,.`
 #include <linux/bitops.h>
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 static void bgpio_write8(void __iomem *reg, unsigned long data)
 {
@@ -569,6 +571,41 @@ static void __iomem *bgpio_map(struct platform_device *pdev,
 	return devm_ioremap_resource(&pdev->dev, r);
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id bgpio_of_match[] = {
+	{ .compatible = "wd,mbl-gpio" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, bgpio_of_match);
+
+static struct bgpio_pdata *bgpio_parse_dt(struct platform_device *pdev,
+					  unsigned long *flags)
+{
+	struct bgpio_pdata *pdata;
+
+	if (!of_match_device(bgpio_of_match, &pdev->dev))
+		return NULL;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(struct bgpio_pdata),
+			     GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	pdata->base = -1;
+
+	if (of_property_read_bool(pdev->dev.of_node, "no-output"))
+		*flags |= BGPIOF_NO_OUTPUT;
+
+	return pdata;
+}
+#else
+static struct bgpio_pdata *bgpio_parse_dt(struct platform_device *pdev,
+					  unsigned long *flags)
+{
+	return NULL;
+}
+#endif /* CONFIG_OF */
+
 static int bgpio_pdev_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -579,10 +616,19 @@ static int bgpio_pdev_probe(struct platform_device *pdev)
 	void __iomem *dirout;
 	void __iomem *dirin;
 	unsigned long sz;
-	unsigned long flags = pdev->id_entry->driver_data;
+	unsigned long flags = 0;
 	int err;
 	struct gpio_chip *gc;
-	struct bgpio_pdata *pdata = dev_get_platdata(dev);
+	struct bgpio_pdata *pdata;
+
+	pdata = bgpio_parse_dt(pdev, &flags);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+
+	if (!pdata) {
+		pdata = dev_get_platdata(dev);
+		flags = pdev->id_entry->driver_data;
+	}
 
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dat");
 	if (!r)
@@ -646,6 +692,7 @@ MODULE_DEVICE_TABLE(platform, bgpio_id_table);
 static struct platform_driver bgpio_driver = {
 	.driver = {
 		.name = "basic-mmio-gpio",
+		.of_match_table = of_match_ptr(bgpio_of_match),
 	},
 	.id_table = bgpio_id_table,
 	.probe = bgpio_pdev_probe,
diff --git a/drivers/gpio/gpio-palmas.c b/drivers/gpio/gpio-palmas.c
index e248707ca39e..839474430229 100644
--- a/drivers/gpio/gpio-palmas.c
+++ b/drivers/gpio/gpio-palmas.c
@@ -208,7 +208,6 @@ static int palmas_gpio_probe(struct platform_device *pdev)
 
 static struct platform_driver palmas_gpio_driver = {
 	.driver.name	= "palmas-gpio",
-	.driver.owner	= THIS_MODULE,
 	.driver.of_match_table = of_palmas_gpio_match,
 	.probe		= palmas_gpio_probe,
 };
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 5e3be32ebb8d..02f2a5621bb0 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -44,7 +44,7 @@
 
 #define PCA_GPIO_MASK		0x00FF
 #define PCA_INT			0x0100
-#define PCA_PCAL			0x0200
+#define PCA_PCAL		0x0200
 #define PCA953X_TYPE		0x1000
 #define PCA957X_TYPE		0x2000
 #define PCA_TYPE_MASK		0xF000
@@ -67,6 +67,8 @@ static const struct i2c_device_id pca953x_id[] = {
 	{ "pca9575", 16 | PCA957X_TYPE | PCA_INT, },
 	{ "pca9698", 40 | PCA953X_TYPE, },
 
+	{ "pcal9555a", 16 | PCA953X_TYPE | PCA_INT | PCA_PCAL, },
+
 	{ "max7310", 8  | PCA953X_TYPE, },
 	{ "max7312", 16 | PCA953X_TYPE | PCA_INT, },
 	{ "max7313", 16 | PCA953X_TYPE | PCA_INT, },
@@ -90,7 +92,7 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids);
 #define MAX_BANK 5
 #define BANK_SZ 8
 
-#define NBANK(chip) (chip->gpio_chip.ngpio / BANK_SZ)
+#define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ)
 
 struct pca953x_chip {
 	unsigned gpio_start;
@@ -135,7 +137,7 @@ static int pca953x_read_single(struct pca953x_chip *chip, int reg, u32 *val,
 static int pca953x_write_single(struct pca953x_chip *chip, int reg, u32 val,
 				int off)
 {
-	int ret = 0;
+	int ret;
 	int bank_shift = fls((chip->gpio_chip.ngpio - 1) / BANK_SZ);
 	int offset = off / BANK_SZ;
 
@@ -163,10 +165,13 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
 					NBANK(chip), val);
 	} else {
 		switch (chip->chip_type) {
-		case PCA953X_TYPE:
-			ret = i2c_smbus_write_word_data(chip->client,
-			    reg << 1, cpu_to_le16(get_unaligned((u16 *)val)));
+		case PCA953X_TYPE: {
+			__le16 word = cpu_to_le16(get_unaligned((u16 *)val));
+
+			ret = i2c_smbus_write_word_data(chip->client, reg << 1,
+							(__force u16)word);
 			break;
+		}
 		case PCA957X_TYPE:
 			ret = i2c_smbus_write_byte_data(chip->client, reg << 1,
 							val[0]);
@@ -235,7 +240,6 @@ static int pca953x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
 		goto exit;
 
 	chip->reg_direction[off / BANK_SZ] = reg_val;
-	ret = 0;
 exit:
 	mutex_unlock(&chip->i2c_lock);
 	return ret;
@@ -286,7 +290,6 @@ static int pca953x_gpio_direction_output(struct gpio_chip *gc,
 		goto exit;
 
 	chip->reg_direction[off / BANK_SZ] = reg_val;
-	ret = 0;
 exit:
 	mutex_unlock(&chip->i2c_lock);
 	return ret;
@@ -351,7 +354,6 @@ exit:
 	mutex_unlock(&chip->i2c_lock);
 }
 
-
 static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
 		unsigned long *mask, unsigned long *bits)
 {
@@ -820,7 +822,7 @@ static int pca953x_remove(struct i2c_client *client)
 {
 	struct pca953x_platform_data *pdata = dev_get_platdata(&client->dev);
 	struct pca953x_chip *chip = i2c_get_clientdata(client);
-	int ret = 0;
+	int ret;
 
 	if (pdata && pdata->teardown) {
 		ret = pdata->teardown(client, chip->gpio_chip.base,
@@ -861,6 +863,7 @@ static const struct of_device_id pca953x_dt_ids[] = {
 	{ .compatible = "maxim,max7315", .data = OF_953X( 8, PCA_INT), },
 
 	{ .compatible = "ti,pca6107", .data = OF_953X( 8, PCA_INT), },
+	{ .compatible = "ti,pca9536", .data = OF_953X( 4, 0), },
 	{ .compatible = "ti,tca6408", .data = OF_953X( 8, PCA_INT), },
 	{ .compatible = "ti,tca6416", .data = OF_953X(16, PCA_INT), },
 	{ .compatible = "ti,tca6424", .data = OF_953X(24, PCA_INT), },
diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c
index 169c09aa33c8..d168410e2338 100644
--- a/drivers/gpio/gpio-pcf857x.c
+++ b/drivers/gpio/gpio-pcf857x.c
@@ -440,6 +440,14 @@ static int pcf857x_remove(struct i2c_client *client)
 	return status;
 }
 
+static void pcf857x_shutdown(struct i2c_client *client)
+{
+	struct pcf857x *gpio = i2c_get_clientdata(client);
+
+	/* Drive all the I/O lines high */
+	gpio->write(gpio->client, BIT(gpio->chip.ngpio) - 1);
+}
+
 static struct i2c_driver pcf857x_driver = {
 	.driver = {
 		.name	= "pcf857x",
@@ -447,6 +455,7 @@ static struct i2c_driver pcf857x_driver = {
 	},
 	.probe	= pcf857x_probe,
 	.remove	= pcf857x_remove,
+	.shutdown = pcf857x_shutdown,
 	.id_table = pcf857x_id,
 };
 
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index 681c93fb9e70..b96e0b466f74 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -334,6 +334,9 @@ static const struct of_device_id gpio_rcar_of_table[] = {
 	}, {
 		.compatible = "renesas,gpio-r8a7791",
 		.data = &gpio_rcar_info_gen2,
+	}, {
+		.compatible = "renesas,gpio-r8a7792",
+		.data = &gpio_rcar_info_gen2,
 	}, {
 		.compatible = "renesas,gpio-r8a7793",
 		.data = &gpio_rcar_info_gen2,
diff --git a/drivers/gpio/gpio-rdc321x.c b/drivers/gpio/gpio-rdc321x.c
index ec945b90f54d..cbf0f9e6465b 100644
--- a/drivers/gpio/gpio-rdc321x.c
+++ b/drivers/gpio/gpio-rdc321x.c
@@ -200,7 +200,6 @@ static int rdc321x_gpio_probe(struct platform_device *pdev)
 
 static struct platform_driver rdc321x_gpio_driver = {
 	.driver.name	= "rdc321x-gpio",
-	.driver.owner	= THIS_MODULE,
 	.probe		= rdc321x_gpio_probe,
 };
 
diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
index e85e7539cf5d..eb43ae4835c1 100644
--- a/drivers/gpio/gpio-sch.c
+++ b/drivers/gpio/gpio-sch.c
@@ -61,9 +61,8 @@ static unsigned sch_gpio_bit(struct sch_gpio *sch, unsigned gpio)
 	return gpio % 8;
 }
 
-static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg)
+static int sch_gpio_reg_get(struct sch_gpio *sch, unsigned gpio, unsigned reg)
 {
-	struct sch_gpio *sch = gpiochip_get_data(gc);
 	unsigned short offset, bit;
 	u8 reg_val;
 
@@ -75,10 +74,9 @@ static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg)
 	return reg_val;
 }
 
-static void sch_gpio_reg_set(struct gpio_chip *gc, unsigned gpio, unsigned reg,
+static void sch_gpio_reg_set(struct sch_gpio *sch, unsigned gpio, unsigned reg,
 			     int val)
 {
-	struct sch_gpio *sch = gpiochip_get_data(gc);
 	unsigned short offset, bit;
 	u8 reg_val;
 
@@ -98,14 +96,15 @@ static int sch_gpio_direction_in(struct gpio_chip *gc, unsigned gpio_num)
 	struct sch_gpio *sch = gpiochip_get_data(gc);
 
 	spin_lock(&sch->lock);
-	sch_gpio_reg_set(gc, gpio_num, GIO, 1);
+	sch_gpio_reg_set(sch, gpio_num, GIO, 1);
 	spin_unlock(&sch->lock);
 	return 0;
 }
 
 static int sch_gpio_get(struct gpio_chip *gc, unsigned gpio_num)
 {
-	return sch_gpio_reg_get(gc, gpio_num, GLV);
+	struct sch_gpio *sch = gpiochip_get_data(gc);
+	return sch_gpio_reg_get(sch, gpio_num, GLV);
 }
 
 static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val)
@@ -113,7 +112,7 @@ static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val)
 	struct sch_gpio *sch = gpiochip_get_data(gc);
 
 	spin_lock(&sch->lock);
-	sch_gpio_reg_set(gc, gpio_num, GLV, val);
+	sch_gpio_reg_set(sch, gpio_num, GLV, val);
 	spin_unlock(&sch->lock);
 }
 
@@ -123,7 +122,7 @@ static int sch_gpio_direction_out(struct gpio_chip *gc, unsigned gpio_num,
 	struct sch_gpio *sch = gpiochip_get_data(gc);
 
 	spin_lock(&sch->lock);
-	sch_gpio_reg_set(gc, gpio_num, GIO, 0);
+	sch_gpio_reg_set(sch, gpio_num, GIO, 0);
 	spin_unlock(&sch->lock);
 
 	/*
@@ -182,13 +181,13 @@ static int sch_gpio_probe(struct platform_device *pdev)
 		 * GPIO7 is configured by the CMC as SLPIOVR
 		 * Enable GPIO[9:8] core powered gpios explicitly
 		 */
-		sch_gpio_reg_set(&sch->chip, 8, GEN, 1);
-		sch_gpio_reg_set(&sch->chip, 9, GEN, 1);
+		sch_gpio_reg_set(sch, 8, GEN, 1);
+		sch_gpio_reg_set(sch, 9, GEN, 1);
 		/*
 		 * SUS_GPIO[2:0] enabled by default
 		 * Enable SUS_GPIO3 resume powered gpio explicitly
 		 */
-		sch_gpio_reg_set(&sch->chip, 13, GEN, 1);
+		sch_gpio_reg_set(sch, 13, GEN, 1);
 		break;
 
 	case PCI_DEVICE_ID_INTEL_ITC_LPC:
diff --git a/drivers/gpio/gpio-sch311x.c b/drivers/gpio/gpio-sch311x.c
index a03b38ee2e02..b96990c262a1 100644
--- a/drivers/gpio/gpio-sch311x.c
+++ b/drivers/gpio/gpio-sch311x.c
@@ -296,7 +296,6 @@ static int sch311x_gpio_remove(struct platform_device *pdev)
 
 static struct platform_driver sch311x_gpio_driver = {
 	.driver.name	= DRV_NAME,
-	.driver.owner	= THIS_MODULE,
 	.probe		= sch311x_gpio_probe,
 	.remove		= sch311x_gpio_remove,
 };
diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c
index 6f7af28b8966..f675132de10e 100644
--- a/drivers/gpio/gpio-stmpe.c
+++ b/drivers/gpio/gpio-stmpe.c
@@ -68,6 +68,22 @@ static void stmpe_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
 		stmpe_reg_write(stmpe, reg, mask);
 }
 
+static int stmpe_gpio_get_direction(struct gpio_chip *chip,
+				    unsigned offset)
+{
+	struct stmpe_gpio *stmpe_gpio = gpiochip_get_data(chip);
+	struct stmpe *stmpe = stmpe_gpio->stmpe;
+	u8 reg = stmpe->regs[STMPE_IDX_GPDR_LSB] - (offset / 8);
+	u8 mask = 1 << (offset % 8);
+	int ret;
+
+	ret = stmpe_reg_read(stmpe, reg);
+	if (ret < 0)
+		return ret;
+
+	return !(ret & mask);
+}
+
 static int stmpe_gpio_direction_output(struct gpio_chip *chip,
 					 unsigned offset, int val)
 {
@@ -106,6 +122,7 @@ static int stmpe_gpio_request(struct gpio_chip *chip, unsigned offset)
 static struct gpio_chip template_chip = {
 	.label			= "stmpe",
 	.owner			= THIS_MODULE,
+	.get_direction		= stmpe_gpio_get_direction,
 	.direction_input	= stmpe_gpio_direction_input,
 	.get			= stmpe_gpio_get,
 	.direction_output	= stmpe_gpio_direction_output,
@@ -416,7 +433,6 @@ static struct platform_driver stmpe_gpio_driver = {
 	.driver = {
 		.suppress_bind_attrs	= true,
 		.name			= "stmpe-gpio",
-		.owner			= THIS_MODULE,
 	},
 	.probe		= stmpe_gpio_probe,
 };
diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c
index 24b6d643ecdb..537cec7583fc 100644
--- a/drivers/gpio/gpio-syscon.c
+++ b/drivers/gpio/gpio-syscon.c
@@ -129,7 +129,7 @@ static int syscon_gpio_dir_out(struct gpio_chip *chip, unsigned offset, int val)
 
 static const struct syscon_gpio_data clps711x_mctrl_gpio = {
 	/* ARM CLPS711X SYSFLG1 Bits 8-10 */
-	.compatible	= "cirrus,clps711x-syscon1",
+	.compatible	= "cirrus,ep7209-syscon1",
 	.flags		= GPIO_SYSCON_FEAT_IN,
 	.bit_count	= 3,
 	.dat_bit_offset	= 0x40 * 8 + 8,
@@ -168,7 +168,7 @@ static const struct syscon_gpio_data keystone_dsp_gpio = {
 
 static const struct of_device_id syscon_gpio_ids[] = {
 	{
-		.compatible	= "cirrus,clps711x-mctrl-gpio",
+		.compatible	= "cirrus,ep7209-mctrl-gpio",
 		.data		= &clps711x_mctrl_gpio,
 	},
 	{
diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c
index 2e35ed3abbcf..8b3659352e49 100644
--- a/drivers/gpio/gpio-tc3589x.c
+++ b/drivers/gpio/gpio-tc3589x.c
@@ -343,7 +343,6 @@ static int tc3589x_gpio_probe(struct platform_device *pdev)
 
 static struct platform_driver tc3589x_gpio_driver = {
 	.driver.name	= "tc3589x-gpio",
-	.driver.owner	= THIS_MODULE,
 	.probe		= tc3589x_gpio_probe,
 };
 
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index ec891a27952f..661b0e34e067 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -98,7 +98,6 @@ struct tegra_gpio_info {
 	const struct tegra_gpio_soc_config	*soc;
 	struct gpio_chip			gc;
 	struct irq_chip				ic;
-	struct lock_class_key			lock_class;
 	u32					bank_count;
 };
 
@@ -547,6 +546,12 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(tegra_gpio_suspend, tegra_gpio_resume)
 };
 
+/*
+ * This lock class tells lockdep that GPIO irqs are in a different category
+ * than their parents, so it won't report false recursion.
+ */
+static struct lock_class_key gpio_lock_class;
+
 static int tegra_gpio_probe(struct platform_device *pdev)
 {
 	const struct tegra_gpio_soc_config *config;
@@ -660,7 +665,7 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 
 		bank = &tgi->bank_info[GPIO_BANK(gpio)];
 
-		irq_set_lockdep_class(irq, &tgi->lock_class);
+		irq_set_lockdep_class(irq, &gpio_lock_class);
 		irq_set_chip_data(irq, bank);
 		irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
 	}
diff --git a/drivers/gpio/gpio-tps65218.c b/drivers/gpio/gpio-tps65218.c
index 0eaeac8de9de..1c09a19ae10c 100644
--- a/drivers/gpio/gpio-tps65218.c
+++ b/drivers/gpio/gpio-tps65218.c
@@ -230,6 +230,12 @@ static const struct of_device_id tps65218_dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, tps65218_dt_match);
 
+static const struct platform_device_id tps65218_gpio_id_table[] = {
+	{ "tps65218-gpio", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, tps65218_gpio_id_table);
+
 static struct platform_driver tps65218_gpio_driver = {
 	.driver = {
 		.name = "tps65218-gpio",
@@ -237,6 +243,7 @@ static struct platform_driver tps65218_gpio_driver = {
 	},
 	.probe = tps65218_gpio_probe,
 	.remove = tps65218_gpio_remove,
+	.id_table = tps65218_gpio_id_table,
 };
 
 module_platform_driver(tps65218_gpio_driver);
diff --git a/drivers/gpio/gpio-tps6586x.c b/drivers/gpio/gpio-tps6586x.c
index 6b15e68a314f..042b9a20781a 100644
--- a/drivers/gpio/gpio-tps6586x.c
+++ b/drivers/gpio/gpio-tps6586x.c
@@ -131,7 +131,6 @@ static int tps6586x_gpio_probe(struct platform_device *pdev)
 
 static struct platform_driver tps6586x_gpio_driver = {
 	.driver.name	= "tps6586x-gpio",
-	.driver.owner	= THIS_MODULE,
 	.probe		= tps6586x_gpio_probe,
 };
 
diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c
index 0ae6a5a54ea8..e63d7dabf78b 100644
--- a/drivers/gpio/gpio-tps65910.c
+++ b/drivers/gpio/gpio-tps65910.c
@@ -184,7 +184,6 @@ skip_init:
 
 static struct platform_driver tps65910_gpio_driver = {
 	.driver.name    = "tps65910-gpio",
-	.driver.owner   = THIS_MODULE,
 	.probe		= tps65910_gpio_probe,
 };
 
diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c
index dec47aafd5cd..e6d1328dddfa 100644
--- a/drivers/gpio/gpio-viperboard.c
+++ b/drivers/gpio/gpio-viperboard.c
@@ -440,7 +440,6 @@ static int vprbrd_gpio_probe(struct platform_device *pdev)
 
 static struct platform_driver vprbrd_gpio_driver = {
 	.driver.name	= "viperboard-gpio",
-	.driver.owner	= THIS_MODULE,
 	.probe		= vprbrd_gpio_probe,
 };
 
diff --git a/drivers/gpio/gpio-wm831x.c b/drivers/gpio/gpio-wm831x.c
index 41ec7834059a..21f97bcd0062 100644
--- a/drivers/gpio/gpio-wm831x.c
+++ b/drivers/gpio/gpio-wm831x.c
@@ -296,7 +296,6 @@ static int wm831x_gpio_probe(struct platform_device *pdev)
 
 static struct platform_driver wm831x_gpio_driver = {
 	.driver.name	= "wm831x-gpio",
-	.driver.owner	= THIS_MODULE,
 	.probe		= wm831x_gpio_probe,
 };
 
diff --git a/drivers/gpio/gpio-wm8350.c b/drivers/gpio/gpio-wm8350.c
index 07d45a3b205a..e9765707d5c1 100644
--- a/drivers/gpio/gpio-wm8350.c
+++ b/drivers/gpio/gpio-wm8350.c
@@ -139,7 +139,6 @@ static int wm8350_gpio_probe(struct platform_device *pdev)
 
 static struct platform_driver wm8350_gpio_driver = {
 	.driver.name	= "wm8350-gpio",
-	.driver.owner	= THIS_MODULE,
 	.probe		= wm8350_gpio_probe,
 };
 
diff --git a/drivers/gpio/gpio-wm8994.c b/drivers/gpio/gpio-wm8994.c
index 744af388c949..2457aac8592e 100644
--- a/drivers/gpio/gpio-wm8994.c
+++ b/drivers/gpio/gpio-wm8994.c
@@ -299,7 +299,6 @@ static int wm8994_gpio_probe(struct platform_device *pdev)
 
 static struct platform_driver wm8994_gpio_driver = {
 	.driver.name	= "wm8994-gpio",
-	.driver.owner	= THIS_MODULE,
 	.probe		= wm8994_gpio_probe,
 };
 
diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index d0fbb7f99523..14b2a62338ea 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -132,6 +132,53 @@ static void xgpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
 	spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
 }
 
+/**
+ * xgpio_set_multiple - Write the specified signals of the GPIO device.
+ * @gc:     Pointer to gpio_chip device structure.
+ * @mask:   Mask of the GPIOS to modify.
+ * @bits:   Value to be wrote on each GPIO
+ *
+ * This function writes the specified values into the specified signals of the
+ * GPIO devices.
+ */
+static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
+			       unsigned long *bits)
+{
+	unsigned long flags;
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct xgpio_instance *chip = gpiochip_get_data(gc);
+	int index = xgpio_index(chip, 0);
+	int offset, i;
+
+	spin_lock_irqsave(&chip->gpio_lock[index], flags);
+
+	/* Write to GPIO signals */
+	for (i = 0; i < gc->ngpio; i++) {
+		if (*mask == 0)
+			break;
+		if (index !=  xgpio_index(chip, i)) {
+			xgpio_writereg(mm_gc->regs + XGPIO_DATA_OFFSET +
+				       xgpio_regoffset(chip, i),
+				       chip->gpio_state[index]);
+			spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
+			index =  xgpio_index(chip, i);
+			spin_lock_irqsave(&chip->gpio_lock[index], flags);
+		}
+		if (__test_and_clear_bit(i, mask)) {
+			offset =  xgpio_offset(chip, i);
+			if (test_bit(i, bits))
+				chip->gpio_state[index] |= BIT(offset);
+			else
+				chip->gpio_state[index] &= ~BIT(offset);
+		}
+	}
+
+	xgpio_writereg(mm_gc->regs + XGPIO_DATA_OFFSET +
+		       xgpio_regoffset(chip, i), chip->gpio_state[index]);
+
+	spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
+}
+
 /**
  * xgpio_dir_in - Set the direction of the specified GPIO signal as input.
  * @gc:     Pointer to gpio_chip device structure.
@@ -306,6 +353,7 @@ static int xgpio_probe(struct platform_device *pdev)
 	chip->mmchip.gc.direction_output = xgpio_dir_out;
 	chip->mmchip.gc.get = xgpio_get;
 	chip->mmchip.gc.set = xgpio_set;
+	chip->mmchip.gc.set_multiple = xgpio_set_multiple;
 
 	chip->mmchip.save_regs = xgpio_save_regs;
 
diff --git a/drivers/gpio/gpio-xlp.c b/drivers/gpio/gpio-xlp.c
index 1a33a19d95b9..4620d050e5a8 100644
--- a/drivers/gpio/gpio-xlp.c
+++ b/drivers/gpio/gpio-xlp.c
@@ -19,6 +19,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/irqchip/chained_irq.h>
+#include <linux/acpi.h>
 
 /*
  * XLP GPIO has multiple 32 bit registers for each feature where each register
@@ -299,7 +300,6 @@ static int xlp_gpio_probe(struct platform_device *pdev)
 	struct gpio_chip *gc;
 	struct resource *iores;
 	struct xlp_gpio_priv *priv;
-	const struct of_device_id *of_id;
 	void __iomem *gpio_base;
 	int irq_base, irq, err;
 	int ngpio;
@@ -321,13 +321,26 @@ static int xlp_gpio_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	of_id = of_match_device(xlp_gpio_of_ids, &pdev->dev);
-	if (!of_id) {
-		dev_err(&pdev->dev, "Failed to get soc type!\n");
-		return -ENODEV;
-	}
+	if (pdev->dev.of_node) {
+		const struct of_device_id *of_id;
 
-	soc_type = (uintptr_t) of_id->data;
+		of_id = of_match_device(xlp_gpio_of_ids, &pdev->dev);
+		if (!of_id) {
+			dev_err(&pdev->dev, "Unable to match OF ID\n");
+			return -ENODEV;
+		}
+		soc_type = (uintptr_t) of_id->data;
+	} else {
+		const struct acpi_device_id *acpi_id;
+
+		acpi_id = acpi_match_device(pdev->dev.driver->acpi_match_table,
+						&pdev->dev);
+		if (!acpi_id || !acpi_id->driver_data) {
+			dev_err(&pdev->dev, "Unable to match ACPI ID\n");
+			return -ENODEV;
+		}
+		soc_type = (uintptr_t) acpi_id->driver_data;
+	}
 
 	switch (soc_type) {
 	case XLP_GPIO_VARIANT_XLP832:
@@ -388,14 +401,16 @@ static int xlp_gpio_probe(struct platform_device *pdev)
 	gc->get = xlp_gpio_get;
 
 	spin_lock_init(&priv->lock);
-	/* XLP has fixed IRQ range for GPIO interrupts */
-	if (soc_type == GPIO_VARIANT_VULCAN)
-		irq_base = irq_alloc_descs(-1, 0, gc->ngpio, 0);
-	else
+
+	/* XLP(MIPS) has fixed range for GPIO IRQs, Vulcan(ARM64) does not */
+	if (soc_type != GPIO_VARIANT_VULCAN) {
 		irq_base = irq_alloc_descs(-1, XLP_GPIO_IRQ_BASE, gc->ngpio, 0);
-	if (irq_base < 0) {
-		dev_err(&pdev->dev, "Failed to allocate IRQ numbers\n");
-		return irq_base;
+		if (irq_base < 0) {
+			dev_err(&pdev->dev, "Failed to allocate IRQ numbers\n");
+			return irq_base;
+		}
+	} else {
+		irq_base = 0;
 	}
 
 	err = gpiochip_add_data(gc, priv);
@@ -423,10 +438,19 @@ out_free_desc:
 	return err;
 }
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xlp_gpio_acpi_match[] = {
+	{ "BRCM9006", GPIO_VARIANT_VULCAN },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, xlp_gpio_acpi_match);
+#endif
+
 static struct platform_driver xlp_gpio_driver = {
 	.driver		= {
 		.name	= "xlp-gpio",
 		.of_match_table = xlp_gpio_of_ids,
+		.acpi_match_table = ACPI_PTR(xlp_gpio_acpi_match),
 	},
 	.probe		= xlp_gpio_probe,
 };
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 2dc52585e3f2..af514618d7fb 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -836,6 +836,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
 	}
 
 	acpi_gpiochip_request_regions(acpi_gpio);
+	acpi_walk_dep_device_list(handle);
 }
 
 void acpi_gpiochip_remove(struct gpio_chip *chip)
diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c
index 3a5c7011ad3b..8b830996fe02 100644
--- a/drivers/gpio/gpiolib-legacy.c
+++ b/drivers/gpio/gpiolib-legacy.c
@@ -28,6 +28,10 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
 	if (!desc && gpio_is_valid(gpio))
 		return -EPROBE_DEFER;
 
+	err = gpiod_request(desc, label);
+	if (err)
+		return err;
+
 	if (flags & GPIOF_OPEN_DRAIN)
 		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
 
@@ -37,10 +41,6 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
 	if (flags & GPIOF_ACTIVE_LOW)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
 
-	err = gpiod_request(desc, label);
-	if (err)
-		return err;
-
 	if (flags & GPIOF_DIR_IN)
 		err = gpiod_direction_input(desc);
 	else
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 4aabddb38b59..75e7b3919ea7 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -27,38 +27,30 @@
 
 #include "gpiolib.h"
 
-/* Private data structure for of_gpiochip_find_and_xlate */
-struct gg_data {
-	enum of_gpio_flags *flags;
-	struct of_phandle_args gpiospec;
+static int of_gpiochip_match_node(struct gpio_chip *chip, void *data)
+{
+	return chip->gpiodev->dev.of_node == data;
+}
 
-	struct gpio_desc *out_gpio;
-};
+static struct gpio_chip *of_find_gpiochip_by_node(struct device_node *np)
+{
+	return gpiochip_find(np, of_gpiochip_match_node);
+}
 
-/* Private function for resolving node pointer to gpio_chip */
-static int of_gpiochip_find_and_xlate(struct gpio_chip *gc, void *data)
+static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip,
+					struct of_phandle_args *gpiospec,
+					enum of_gpio_flags *flags)
 {
-	struct gg_data *gg_data = data;
 	int ret;
 
-	if ((gc->of_node != gg_data->gpiospec.np) ||
-	    (gc->of_gpio_n_cells != gg_data->gpiospec.args_count) ||
-	    (!gc->of_xlate))
-		return false;
-
-	ret = gc->of_xlate(gc, &gg_data->gpiospec, gg_data->flags);
-	if (ret < 0) {
-		/* We've found a gpio chip, but the translation failed.
-		 * Store translation error in out_gpio.
-		 * Return false to keep looking, as more than one gpio chip
-		 * could be registered per of-node.
-		 */
-		gg_data->out_gpio = ERR_PTR(ret);
-		return false;
-	 }
-
-	gg_data->out_gpio = gpiochip_get_desc(gc, ret);
-	return true;
+	if (chip->of_gpio_n_cells != gpiospec->args_count)
+		return ERR_PTR(-EINVAL);
+
+	ret = chip->of_xlate(chip, gpiospec, flags);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	return gpiochip_get_desc(chip, ret);
 }
 
 /**
@@ -75,34 +67,37 @@ static int of_gpiochip_find_and_xlate(struct gpio_chip *gc, void *data)
 struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		     const char *propname, int index, enum of_gpio_flags *flags)
 {
-	/* Return -EPROBE_DEFER to support probe() functions to be called
-	 * later when the GPIO actually becomes available
-	 */
-	struct gg_data gg_data = {
-		.flags = flags,
-		.out_gpio = ERR_PTR(-EPROBE_DEFER)
-	};
+	struct of_phandle_args gpiospec;
+	struct gpio_chip *chip;
+	struct gpio_desc *desc;
 	int ret;
 
-	/* .of_xlate might decide to not fill in the flags, so clear it. */
-	if (flags)
-		*flags = 0;
-
 	ret = of_parse_phandle_with_args(np, propname, "#gpio-cells", index,
-					 &gg_data.gpiospec);
+					 &gpiospec);
 	if (ret) {
 		pr_debug("%s: can't parse '%s' property of node '%s[%d]'\n",
 			__func__, propname, np->full_name, index);
 		return ERR_PTR(ret);
 	}
 
-	gpiochip_find(&gg_data, of_gpiochip_find_and_xlate);
+	chip = of_find_gpiochip_by_node(gpiospec.np);
+	if (!chip) {
+		desc = ERR_PTR(-EPROBE_DEFER);
+		goto out;
+	}
+
+	desc = of_xlate_and_get_gpiod_flags(chip, &gpiospec, flags);
+	if (IS_ERR(desc))
+		goto out;
 
-	of_node_put(gg_data.gpiospec.np);
 	pr_debug("%s: parsed '%s' property of node '%s[%d]' - status (%d)\n",
 		 __func__, propname, np->full_name, index,
-		 PTR_ERR_OR_ZERO(gg_data.out_gpio));
-	return gg_data.out_gpio;
+		 PTR_ERR_OR_ZERO(desc));
+
+out:
+	of_node_put(gpiospec.np);
+
+	return desc;
 }
 
 int of_get_named_gpio_flags(struct device_node *np, const char *list_name,
@@ -122,6 +117,7 @@ EXPORT_SYMBOL(of_get_named_gpio_flags);
 /**
  * of_parse_own_gpio() - Get a GPIO hog descriptor, names and flags for GPIO API
  * @np:		device node to get GPIO from
+ * @chip:	GPIO chip whose hog is parsed
  * @name:	GPIO line name
  * @lflags:	gpio_lookup_flags - returned from of_find_gpio() or
  *		of_parse_own_gpio()
@@ -131,19 +127,19 @@ EXPORT_SYMBOL(of_get_named_gpio_flags);
  * value on the error condition.
  */
 static struct gpio_desc *of_parse_own_gpio(struct device_node *np,
+					   struct gpio_chip *chip,
 					   const char **name,
 					   enum gpio_lookup_flags *lflags,
 					   enum gpiod_flags *dflags)
 {
 	struct device_node *chip_np;
 	enum of_gpio_flags xlate_flags;
-	struct gg_data gg_data = {
-		.flags = &xlate_flags,
-	};
+	struct of_phandle_args gpiospec;
+	struct gpio_desc *desc;
 	u32 tmp;
-	int i, ret;
+	int ret;
 
-	chip_np = np->parent;
+	chip_np = chip->of_node;
 	if (!chip_np)
 		return ERR_PTR(-EINVAL);
 
@@ -155,25 +151,16 @@ static struct gpio_desc *of_parse_own_gpio(struct device_node *np,
 	if (ret)
 		return ERR_PTR(ret);
 
-	if (tmp > MAX_PHANDLE_ARGS)
-		return ERR_PTR(-EINVAL);
+	gpiospec.np = chip_np;
+	gpiospec.args_count = tmp;
 
-	gg_data.gpiospec.args_count = tmp;
-	gg_data.gpiospec.np = chip_np;
-	for (i = 0; i < tmp; i++) {
-		ret = of_property_read_u32_index(np, "gpios", i,
-					   &gg_data.gpiospec.args[i]);
-		if (ret)
-			return ERR_PTR(ret);
-	}
+	ret = of_property_read_u32_array(np, "gpios", gpiospec.args, tmp);
+	if (ret)
+		return ERR_PTR(ret);
 
-	gpiochip_find(&gg_data, of_gpiochip_find_and_xlate);
-	if (!gg_data.out_gpio) {
-		if (np->parent == np)
-			return ERR_PTR(-ENXIO);
-		else
-			return ERR_PTR(-EINVAL);
-	}
+	desc = of_xlate_and_get_gpiod_flags(chip, &gpiospec, &xlate_flags);
+	if (IS_ERR(desc))
+		return desc;
 
 	if (xlate_flags & OF_GPIO_ACTIVE_LOW)
 		*lflags |= GPIO_ACTIVE_LOW;
@@ -186,14 +173,14 @@ static struct gpio_desc *of_parse_own_gpio(struct device_node *np,
 		*dflags |= GPIOD_OUT_HIGH;
 	else {
 		pr_warn("GPIO line %d (%s): no hogging state specified, bailing out\n",
-			desc_to_gpio(gg_data.out_gpio), np->name);
+			desc_to_gpio(desc), np->name);
 		return ERR_PTR(-EINVAL);
 	}
 
 	if (name && of_property_read_string(np, "line-name", name))
 		*name = np->name;
 
-	return gg_data.out_gpio;
+	return desc;
 }
 
 /**
@@ -262,7 +249,7 @@ static int of_gpiochip_scan_gpios(struct gpio_chip *chip)
 		if (!of_property_read_bool(np, "gpio-hog"))
 			continue;
 
-		desc = of_parse_own_gpio(np, &name, &lflags, &dflags);
+		desc = of_parse_own_gpio(np, chip, &name, &lflags, &dflags);
 		if (IS_ERR(desc))
 			continue;
 
@@ -410,6 +397,7 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip)
 			break;
 
 		pctldev = of_pinctrl_get(pinspec.np);
+		of_node_put(pinspec.np);
 		if (!pctldev)
 			return -EPROBE_DEFER;
 
@@ -487,6 +475,9 @@ int of_gpiochip_add(struct gpio_chip *chip)
 		chip->of_xlate = of_gpio_simple_xlate;
 	}
 
+	if (chip->of_gpio_n_cells > MAX_PHANDLE_ARGS)
+		return -EINVAL;
+
 	status = of_gpiochip_add_pin_range(chip);
 	if (status)
 		return status;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 58d822d7e8da..53ff25ac66d8 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -16,11 +16,14 @@
 #include <linux/gpio/driver.h>
 #include <linux/gpio/machine.h>
 #include <linux/pinctrl/consumer.h>
-#include <linux/idr.h>
 #include <linux/cdev.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/compat.h>
+#include <linux/anon_inodes.h>
+#include <linux/kfifo.h>
+#include <linux/poll.h>
+#include <linux/timekeeping.h>
 #include <uapi/linux/gpio.h>
 
 #include "gpiolib.h"
@@ -310,6 +313,497 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc)
 	return 0;
 }
 
+/*
+ * GPIO line handle management
+ */
+
+/**
+ * struct linehandle_state - contains the state of a userspace handle
+ * @gdev: the GPIO device the handle pertains to
+ * @label: consumer label used to tag descriptors
+ * @descs: the GPIO descriptors held by this handle
+ * @numdescs: the number of descriptors held in the descs array
+ */
+struct linehandle_state {
+	struct gpio_device *gdev;
+	const char *label;
+	struct gpio_desc *descs[GPIOHANDLES_MAX];
+	u32 numdescs;
+};
+
+static long linehandle_ioctl(struct file *filep, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct linehandle_state *lh = filep->private_data;
+	void __user *ip = (void __user *)arg;
+	struct gpiohandle_data ghd;
+	int i;
+
+	if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) {
+		int val;
+
+		/* TODO: check if descriptors are really input */
+		for (i = 0; i < lh->numdescs; i++) {
+			val = gpiod_get_value_cansleep(lh->descs[i]);
+			if (val < 0)
+				return val;
+			ghd.values[i] = val;
+		}
+
+		if (copy_to_user(ip, &ghd, sizeof(ghd)))
+			return -EFAULT;
+
+		return 0;
+	} else if (cmd == GPIOHANDLE_SET_LINE_VALUES_IOCTL) {
+		int vals[GPIOHANDLES_MAX];
+
+		/* TODO: check if descriptors are really output */
+		if (copy_from_user(&ghd, ip, sizeof(ghd)))
+			return -EFAULT;
+
+		/* Clamp all values to [0,1] */
+		for (i = 0; i < lh->numdescs; i++)
+			vals[i] = !!ghd.values[i];
+
+		/* Reuse the array setting function */
+		gpiod_set_array_value_complex(false,
+					      true,
+					      lh->numdescs,
+					      lh->descs,
+					      vals);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+#ifdef CONFIG_COMPAT
+static long linehandle_ioctl_compat(struct file *filep, unsigned int cmd,
+			     unsigned long arg)
+{
+	return linehandle_ioctl(filep, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static int linehandle_release(struct inode *inode, struct file *filep)
+{
+	struct linehandle_state *lh = filep->private_data;
+	struct gpio_device *gdev = lh->gdev;
+	int i;
+
+	for (i = 0; i < lh->numdescs; i++)
+		gpiod_free(lh->descs[i]);
+	kfree(lh->label);
+	kfree(lh);
+	put_device(&gdev->dev);
+	return 0;
+}
+
+static const struct file_operations linehandle_fileops = {
+	.release = linehandle_release,
+	.owner = THIS_MODULE,
+	.llseek = noop_llseek,
+	.unlocked_ioctl = linehandle_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = linehandle_ioctl_compat,
+#endif
+};
+
+static int linehandle_create(struct gpio_device *gdev, void __user *ip)
+{
+	struct gpiohandle_request handlereq;
+	struct linehandle_state *lh;
+	int fd, i, ret;
+
+	if (copy_from_user(&handlereq, ip, sizeof(handlereq)))
+		return -EFAULT;
+	if ((handlereq.lines == 0) || (handlereq.lines > GPIOHANDLES_MAX))
+		return -EINVAL;
+
+	lh = kzalloc(sizeof(*lh), GFP_KERNEL);
+	if (!lh)
+		return -ENOMEM;
+	lh->gdev = gdev;
+	get_device(&gdev->dev);
+
+	/* Make sure this is terminated */
+	handlereq.consumer_label[sizeof(handlereq.consumer_label)-1] = '\0';
+	if (strlen(handlereq.consumer_label)) {
+		lh->label = kstrdup(handlereq.consumer_label,
+				    GFP_KERNEL);
+		if (!lh->label) {
+			ret = -ENOMEM;
+			goto out_free_lh;
+		}
+	}
+
+	/* Request each GPIO */
+	for (i = 0; i < handlereq.lines; i++) {
+		u32 offset = handlereq.lineoffsets[i];
+		u32 lflags = handlereq.flags;
+		struct gpio_desc *desc;
+
+		desc = &gdev->descs[offset];
+		ret = gpiod_request(desc, lh->label);
+		if (ret)
+			goto out_free_descs;
+		lh->descs[i] = desc;
+
+		if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW)
+			set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+		if (lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN)
+			set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+		if (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE)
+			set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+
+		/*
+		 * Lines have to be requested explicitly for input
+		 * or output, else the line will be treated "as is".
+		 */
+		if (lflags & GPIOHANDLE_REQUEST_OUTPUT) {
+			int val = !!handlereq.default_values[i];
+
+			ret = gpiod_direction_output(desc, val);
+			if (ret)
+				goto out_free_descs;
+		} else if (lflags & GPIOHANDLE_REQUEST_INPUT) {
+			ret = gpiod_direction_input(desc);
+			if (ret)
+				goto out_free_descs;
+		}
+		dev_dbg(&gdev->dev, "registered chardev handle for line %d\n",
+			offset);
+	}
+	/* Let i point at the last handle */
+	i--;
+	lh->numdescs = handlereq.lines;
+
+	fd = anon_inode_getfd("gpio-linehandle",
+			      &linehandle_fileops,
+			      lh,
+			      O_RDONLY | O_CLOEXEC);
+	if (fd < 0) {
+		ret = fd;
+		goto out_free_descs;
+	}
+
+	handlereq.fd = fd;
+	if (copy_to_user(ip, &handlereq, sizeof(handlereq))) {
+		ret = -EFAULT;
+		goto out_free_descs;
+	}
+
+	dev_dbg(&gdev->dev, "registered chardev handle for %d lines\n",
+		lh->numdescs);
+
+	return 0;
+
+out_free_descs:
+	for (; i >= 0; i--)
+		gpiod_free(lh->descs[i]);
+	kfree(lh->label);
+out_free_lh:
+	kfree(lh);
+	put_device(&gdev->dev);
+	return ret;
+}
+
+/*
+ * GPIO line event management
+ */
+
+/**
+ * struct lineevent_state - contains the state of a userspace event
+ * @gdev: the GPIO device the event pertains to
+ * @label: consumer label used to tag descriptors
+ * @desc: the GPIO descriptor held by this event
+ * @eflags: the event flags this line was requested with
+ * @irq: the interrupt that trigger in response to events on this GPIO
+ * @wait: wait queue that handles blocking reads of events
+ * @events: KFIFO for the GPIO events
+ * @read_lock: mutex lock to protect reads from colliding with adding
+ * new events to the FIFO
+ */
+struct lineevent_state {
+	struct gpio_device *gdev;
+	const char *label;
+	struct gpio_desc *desc;
+	u32 eflags;
+	int irq;
+	wait_queue_head_t wait;
+	DECLARE_KFIFO(events, struct gpioevent_data, 16);
+	struct mutex read_lock;
+};
+
+static unsigned int lineevent_poll(struct file *filep,
+				   struct poll_table_struct *wait)
+{
+	struct lineevent_state *le = filep->private_data;
+	unsigned int events = 0;
+
+	poll_wait(filep, &le->wait, wait);
+
+	if (!kfifo_is_empty(&le->events))
+		events = POLLIN | POLLRDNORM;
+
+	return events;
+}
+
+
+static ssize_t lineevent_read(struct file *filep,
+			      char __user *buf,
+			      size_t count,
+			      loff_t *f_ps)
+{
+	struct lineevent_state *le = filep->private_data;
+	unsigned int copied;
+	int ret;
+
+	if (count < sizeof(struct gpioevent_data))
+		return -EINVAL;
+
+	do {
+		if (kfifo_is_empty(&le->events)) {
+			if (filep->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			ret = wait_event_interruptible(le->wait,
+					!kfifo_is_empty(&le->events));
+			if (ret)
+				return ret;
+		}
+
+		if (mutex_lock_interruptible(&le->read_lock))
+			return -ERESTARTSYS;
+		ret = kfifo_to_user(&le->events, buf, count, &copied);
+		mutex_unlock(&le->read_lock);
+
+		if (ret)
+			return ret;
+
+		/*
+		 * If we couldn't read anything from the fifo (a different
+		 * thread might have been faster) we either return -EAGAIN if
+		 * the file descriptor is non-blocking, otherwise we go back to
+		 * sleep and wait for more data to arrive.
+		 */
+		if (copied == 0 && (filep->f_flags & O_NONBLOCK))
+			return -EAGAIN;
+
+	} while (copied == 0);
+
+	return copied;
+}
+
+static int lineevent_release(struct inode *inode, struct file *filep)
+{
+	struct lineevent_state *le = filep->private_data;
+	struct gpio_device *gdev = le->gdev;
+
+	free_irq(le->irq, le);
+	gpiod_free(le->desc);
+	kfree(le->label);
+	kfree(le);
+	put_device(&gdev->dev);
+	return 0;
+}
+
+static long lineevent_ioctl(struct file *filep, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct lineevent_state *le = filep->private_data;
+	void __user *ip = (void __user *)arg;
+	struct gpiohandle_data ghd;
+
+	/*
+	 * We can get the value for an event line but not set it,
+	 * because it is input by definition.
+	 */
+	if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) {
+		int val;
+
+		val = gpiod_get_value_cansleep(le->desc);
+		if (val < 0)
+			return val;
+		ghd.values[0] = val;
+
+		if (copy_to_user(ip, &ghd, sizeof(ghd)))
+			return -EFAULT;
+
+		return 0;
+	}
+	return -EINVAL;
+}
+
+#ifdef CONFIG_COMPAT
+static long lineevent_ioctl_compat(struct file *filep, unsigned int cmd,
+				   unsigned long arg)
+{
+	return lineevent_ioctl(filep, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations lineevent_fileops = {
+	.release = lineevent_release,
+	.read = lineevent_read,
+	.poll = lineevent_poll,
+	.owner = THIS_MODULE,
+	.llseek = noop_llseek,
+	.unlocked_ioctl = lineevent_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = lineevent_ioctl_compat,
+#endif
+};
+
+static irqreturn_t lineevent_irq_thread(int irq, void *p)
+{
+	struct lineevent_state *le = p;
+	struct gpioevent_data ge;
+	int ret;
+
+	ge.timestamp = ktime_get_real_ns();
+
+	if (le->eflags & GPIOEVENT_REQUEST_BOTH_EDGES) {
+		int level = gpiod_get_value_cansleep(le->desc);
+
+		if (level)
+			/* Emit low-to-high event */
+			ge.id = GPIOEVENT_EVENT_RISING_EDGE;
+		else
+			/* Emit high-to-low event */
+			ge.id = GPIOEVENT_EVENT_FALLING_EDGE;
+	} else if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE) {
+		/* Emit low-to-high event */
+		ge.id = GPIOEVENT_EVENT_RISING_EDGE;
+	} else if (le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) {
+		/* Emit high-to-low event */
+		ge.id = GPIOEVENT_EVENT_FALLING_EDGE;
+	} else {
+		return IRQ_NONE;
+	}
+
+	ret = kfifo_put(&le->events, ge);
+	if (ret != 0)
+		wake_up_poll(&le->wait, POLLIN);
+
+	return IRQ_HANDLED;
+}
+
+static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+{
+	struct gpioevent_request eventreq;
+	struct lineevent_state *le;
+	struct gpio_desc *desc;
+	u32 offset;
+	u32 lflags;
+	u32 eflags;
+	int fd;
+	int ret;
+	int irqflags = 0;
+
+	if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
+		return -EFAULT;
+
+	le = kzalloc(sizeof(*le), GFP_KERNEL);
+	if (!le)
+		return -ENOMEM;
+	le->gdev = gdev;
+	get_device(&gdev->dev);
+
+	/* Make sure this is terminated */
+	eventreq.consumer_label[sizeof(eventreq.consumer_label)-1] = '\0';
+	if (strlen(eventreq.consumer_label)) {
+		le->label = kstrdup(eventreq.consumer_label,
+				    GFP_KERNEL);
+		if (!le->label) {
+			ret = -ENOMEM;
+			goto out_free_le;
+		}
+	}
+
+	offset = eventreq.lineoffset;
+	lflags = eventreq.handleflags;
+	eflags = eventreq.eventflags;
+
+	/* This is just wrong: we don't look for events on output lines */
+	if (lflags & GPIOHANDLE_REQUEST_OUTPUT) {
+		ret = -EINVAL;
+		goto out_free_label;
+	}
+
+	desc = &gdev->descs[offset];
+	ret = gpiod_request(desc, le->label);
+	if (ret)
+		goto out_free_desc;
+	le->desc = desc;
+	le->eflags = eflags;
+
+	if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW)
+		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+	if (lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN)
+		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+	if (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE)
+		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+
+	ret = gpiod_direction_input(desc);
+	if (ret)
+		goto out_free_desc;
+
+	le->irq = gpiod_to_irq(desc);
+	if (le->irq <= 0) {
+		ret = -ENODEV;
+		goto out_free_desc;
+	}
+
+	if (eflags & GPIOEVENT_REQUEST_RISING_EDGE)
+		irqflags |= IRQF_TRIGGER_RISING;
+	if (eflags & GPIOEVENT_REQUEST_FALLING_EDGE)
+		irqflags |= IRQF_TRIGGER_FALLING;
+	irqflags |= IRQF_ONESHOT;
+	irqflags |= IRQF_SHARED;
+
+	INIT_KFIFO(le->events);
+	init_waitqueue_head(&le->wait);
+	mutex_init(&le->read_lock);
+
+	/* Request a thread to read the events */
+	ret = request_threaded_irq(le->irq,
+			NULL,
+			lineevent_irq_thread,
+			irqflags,
+			le->label,
+			le);
+	if (ret)
+		goto out_free_desc;
+
+	fd = anon_inode_getfd("gpio-event",
+			      &lineevent_fileops,
+			      le,
+			      O_RDONLY | O_CLOEXEC);
+	if (fd < 0) {
+		ret = fd;
+		goto out_free_irq;
+	}
+
+	eventreq.fd = fd;
+	if (copy_to_user(ip, &eventreq, sizeof(eventreq))) {
+		ret = -EFAULT;
+		goto out_free_irq;
+	}
+
+	return 0;
+
+out_free_irq:
+	free_irq(le->irq, le);
+out_free_desc:
+	gpiod_free(le->desc);
+out_free_label:
+	kfree(le->label);
+out_free_le:
+	kfree(le);
+	put_device(&gdev->dev);
+	return ret;
+}
+
 /**
  * gpio_ioctl() - ioctl handler for the GPIO chardev
  */
@@ -385,6 +879,10 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (copy_to_user(ip, &lineinfo, sizeof(lineinfo)))
 			return -EFAULT;
 		return 0;
+	} else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) {
+		return linehandle_create(gdev, ip);
+	} else if (cmd == GPIO_GET_LINEEVENT_IOCTL) {
+		return lineevent_create(gdev, ip);
 	}
 	return -EINVAL;
 }
@@ -548,13 +1046,14 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 	if (chip->parent) {
 		gdev->dev.parent = chip->parent;
 		gdev->dev.of_node = chip->parent->of_node;
-	} else {
+	}
+
 #ifdef CONFIG_OF_GPIO
 	/* If the gpiochip has an assigned OF node this takes precedence */
-		if (chip->of_node)
-			gdev->dev.of_node = chip->of_node;
+	if (chip->of_node)
+		gdev->dev.of_node = chip->of_node;
 #endif
-	}
+
 	gdev->id = ida_simple_get(&gpio_ida, 0, 0, GFP_KERNEL);
 	if (gdev->id < 0) {
 		status = gdev->id;
@@ -1352,14 +1851,6 @@ static int __gpiod_request(struct gpio_desc *desc, const char *label)
 		spin_lock_irqsave(&gpio_lock, flags);
 	}
 done:
-	if (status < 0) {
-		/* Clear flags that might have been set by the caller before
-		 * requesting the GPIO.
-		 */
-		clear_bit(FLAG_ACTIVE_LOW, &desc->flags);
-		clear_bit(FLAG_OPEN_DRAIN, &desc->flags);
-		clear_bit(FLAG_OPEN_SOURCE, &desc->flags);
-	}
 	spin_unlock_irqrestore(&gpio_lock, flags);
 	return status;
 }
@@ -1373,8 +1864,12 @@ done:
 #define VALIDATE_DESC(desc) do { \
 	if (!desc) \
 		return 0; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return PTR_ERR(desc); \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return -EINVAL; \
 	} \
 	if ( !desc->gdev->chip ) { \
@@ -1386,8 +1881,12 @@ done:
 #define VALIDATE_DESC_VOID(desc) do { \
 	if (!desc) \
 		return; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return; \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return; \
 	} \
 	if (!desc->gdev->chip) { \
@@ -2056,7 +2555,14 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 	struct gpio_chip *chip;
 	int offset;
 
-	VALIDATE_DESC(desc);
+	/*
+	 * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
+	 * requires this function to not return zero on an invalid descriptor
+	 * but rather a negative error number.
+	 */
+	if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip)
+		return -EINVAL;
+
 	chip = desc->gdev->chip;
 	offset = gpio_chip_hwgpio(desc);
 	if (chip->to_irq) {
@@ -2326,7 +2832,7 @@ static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 
 		desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
 						&of_flags);
-		if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER))
+		if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT))
 			break;
 	}
 
@@ -2572,28 +3078,13 @@ struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(gpiod_get_optional);
 
-/**
- * gpiod_parse_flags - helper function to parse GPIO lookup flags
- * @desc:	gpio to be setup
- * @lflags:	gpio_lookup_flags - returned from of_find_gpio() or
- *		of_get_gpio_hog()
- *
- * Set the GPIO descriptor flags based on the given GPIO lookup flags.
- */
-static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags)
-{
-	if (lflags & GPIO_ACTIVE_LOW)
-		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
-	if (lflags & GPIO_OPEN_DRAIN)
-		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
-	if (lflags & GPIO_OPEN_SOURCE)
-		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
-}
 
 /**
  * gpiod_configure_flags - helper function to configure a given GPIO
  * @desc:	gpio whose value will be assigned
  * @con_id:	function within the GPIO consumer
+ * @lflags:	gpio_lookup_flags - returned from of_find_gpio() or
+ *		of_get_gpio_hog()
  * @dflags:	gpiod_flags - optional GPIO initialization flags
  *
  * Return 0 on success, -ENOENT if no GPIO has been assigned to the
@@ -2601,10 +3092,17 @@ static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags)
  * occurred while trying to acquire the GPIO.
  */
 static int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
-				 enum gpiod_flags dflags)
+		unsigned long lflags, enum gpiod_flags dflags)
 {
 	int status;
 
+	if (lflags & GPIO_ACTIVE_LOW)
+		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+	if (lflags & GPIO_OPEN_DRAIN)
+		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+	if (lflags & GPIO_OPEN_SOURCE)
+		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+
 	/* No particular flag request, return here... */
 	if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
 		pr_debug("no flags found for %s\n", con_id);
@@ -2671,13 +3169,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 		return desc;
 	}
 
-	gpiod_parse_flags(desc, lookupflags);
-
 	status = gpiod_request(desc, con_id);
 	if (status < 0)
 		return ERR_PTR(status);
 
-	status = gpiod_configure_flags(desc, con_id, flags);
+	status = gpiod_configure_flags(desc, con_id, lookupflags, flags);
 	if (status < 0) {
 		dev_dbg(dev, "setup of GPIO %s failed\n", con_id);
 		gpiod_put(desc);
@@ -2733,6 +3229,10 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 	if (IS_ERR(desc))
 		return desc;
 
+	ret = gpiod_request(desc, NULL);
+	if (ret)
+		return ERR_PTR(ret);
+
 	if (active_low)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
 
@@ -2743,10 +3243,6 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 			set_bit(FLAG_OPEN_SOURCE, &desc->flags);
 	}
 
-	ret = gpiod_request(desc, NULL);
-	if (ret)
-		return ERR_PTR(ret);
-
 	return desc;
 }
 EXPORT_SYMBOL_GPL(fwnode_get_named_gpiod);
@@ -2799,8 +3295,6 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
 	chip = gpiod_to_chip(desc);
 	hwnum = gpio_chip_hwgpio(desc);
 
-	gpiod_parse_flags(desc, lflags);
-
 	local_desc = gpiochip_request_own_desc(chip, hwnum, name);
 	if (IS_ERR(local_desc)) {
 		status = PTR_ERR(local_desc);
@@ -2809,7 +3303,7 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
 		return status;
 	}
 
-	status = gpiod_configure_flags(desc, name, dflags);
+	status = gpiod_configure_flags(desc, name, lflags, dflags);
 	if (status < 0) {
 		pr_err("setup of hog GPIO %s (chip %s, offset %d) failed, %d\n",
 		       name, chip->label, hwnum, status);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 252edba16e36..892d60fb225b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -421,29 +421,6 @@ static int acp_suspend(void *handle)
 
 static int acp_resume(void *handle)
 {
-	int i, ret;
-	struct acp_pm_domain *apd;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	/* return early if no ACP */
-	if (!adev->acp.acp_genpd)
-		return 0;
-
-	/* SMU block will power on ACP irrespective of ACP runtime status.
-	 * Power off explicitly based on genpd ACP runtime status so that ACP
-	 * hw and ACP-genpd status are in sync.
-	 * 'suspend_power_off' represents "Power status before system suspend"
-	*/
-	if (adev->acp.acp_genpd->gpd.suspend_power_off == true) {
-		apd = container_of(&adev->acp.acp_genpd->gpd,
-					struct acp_pm_domain, gpd);
-
-		for (i = 4; i >= 0 ; i--) {
-			ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
-			if (ret)
-				pr_err("ACP tile %d tile suspend failed\n", i);
-		}
-	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 8943099eb135..cf6f49fc1c75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -909,7 +909,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 	struct cgs_acpi_method_argument *argument = NULL;
 	uint32_t i, count;
 	acpi_status status;
-	int result;
+	int result = 0;
 	uint32_t func_no = 0xFFFFFFFF;
 
 	handle = ACPI_HANDLE(&adev->pdev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 66482b429458..6e920086af46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1535,7 +1535,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	/* Post card if necessary */
 	if (!amdgpu_card_posted(adev) ||
 	    (adev->virtualization.is_virtual &&
-	     !adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN)) {
+	     !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 40a23704a981..d851ea15059f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,7 +447,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
 		}
 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
-		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
+		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
+			adev->gfx.config.max_shader_engines;
 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
 		dev_info._pad = 0;
 		dev_info.ids_flags = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 589b36e8c5cf..0e13d80d2a95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -270,30 +270,28 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type state = 0;
-	long idx;
+	unsigned long idx;
 	int ret;
 
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
-	else {
-		ret = kstrtol(buf, 0, &idx);
+	else if (adev->pp_enabled) {
+		struct pp_states_info data;
 
-		if (ret) {
+		ret = kstrtoul(buf, 0, &idx);
+		if (ret || idx >= ARRAY_SIZE(data.states)) {
 			count = -EINVAL;
 			goto fail;
 		}
 
-		if (adev->pp_enabled) {
-			struct pp_states_info data;
-			amdgpu_dpm_get_pp_num_states(adev, &data);
-			state = data.states[idx];
-			/* only set user selected power states */
-			if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
-				state != POWER_STATE_TYPE_DEFAULT) {
-				amdgpu_dpm_dispatch_task(adev,
-						AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
-				adev->pp_force_state_enabled = true;
-			}
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+		state = data.states[idx];
+		/* only set user selected power states */
+		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
+		    state != POWER_STATE_TYPE_DEFAULT) {
+			amdgpu_dpm_dispatch_task(adev,
+					AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
+			adev->pp_force_state_enabled = true;
 		}
 	}
 fail:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e19520c4b4b6..d9c88d13f8db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1106,6 +1106,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 	if (fences == 0 && handles == 0) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, false);
+			/* just work around for uvd clock remain high even
+			 * when uvd dpm disabled on Polaris10 */
+			if (adev->asic_type == CHIP_POLARIS10)
+				amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		} else {
 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index 13cdb01e9b45..bc56c8a181e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -156,3 +156,18 @@ u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap)
 	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev, u8 slave_addr, u8 line_number, u8 offset, u8 data)
+{
+	PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
+
+	args.ucRegIndex = offset;
+	args.lpI2CDataOut = data;
+	args.ucFlag = 1;
+	args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
+	args.ucTransBytes = 1;
+	args.ucSlaveAddr = slave_addr;
+	args.ucLineNumber = line_number;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
index d6128d9de56e..251aaf41f65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
@@ -27,5 +27,7 @@
 int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap,
 		      struct i2c_msg *msgs, int num);
 u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap);
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev,
+		u8 slave_addr, u8 line_number, u8 offset, u8 data);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 9f6f8669edc3..c2ef94511f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -28,6 +28,7 @@
 #include "vid.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_atombios.h"
+#include "atombios_i2c.h"
 #include "clearstate_vi.h"
 
 #include "gmc/gmc_8_2_d.h"
@@ -47,6 +48,8 @@
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
+#include "smu/smu_7_1_3_d.h"
+
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_NUM_COMPUTE_RINGS 8
 
@@ -282,6 +285,7 @@ static const u32 golden_settings_polaris11_a11[] =
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 polaris11_golden_common_all[] =
@@ -297,7 +301,8 @@ static const u32 polaris11_golden_common_all[] =
 static const u32 golden_settings_polaris10_a11[] =
 {
 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
-	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0, 0x0f000000,
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -311,6 +316,7 @@ static const u32 golden_settings_polaris10_a11[] =
 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 polaris10_golden_common_all[] =
@@ -692,6 +698,11 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 		amdgpu_program_register_sequence(adev,
 						 polaris10_golden_common_all,
 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
+		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
+		if (adev->pdev->revision == 0xc7) {
+			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
+			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
+		}
 		break;
 	case CHIP_CARRIZO:
 		amdgpu_program_register_sequence(adev,
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 32f3e345de08..3493da5c8f0e 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -5538,6 +5538,78 @@ typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_5
   ULONG  ulReserved[12];
 }ATOM_ASIC_PROFILING_INFO_V3_5;
 
+/* for Polars10/11 AVFS parameters */
+typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_6
+{
+  ATOM_COMMON_TABLE_HEADER         asHeader;
+  ULONG  ulMaxVddc;
+  ULONG  ulMinVddc;
+  USHORT usLkgEuseIndex;
+  UCHAR  ucLkgEfuseBitLSB;
+  UCHAR  ucLkgEfuseLength;
+  ULONG  ulLkgEncodeLn_MaxDivMin;
+  ULONG  ulLkgEncodeMax;
+  ULONG  ulLkgEncodeMin;
+  EFUSE_LINEAR_FUNC_PARAM sRoFuse;
+  ULONG  ulEvvDefaultVddc;
+  ULONG  ulEvvNoCalcVddc;
+  ULONG  ulSpeed_Model;
+  ULONG  ulSM_A0;
+  ULONG  ulSM_A1;
+  ULONG  ulSM_A2;
+  ULONG  ulSM_A3;
+  ULONG  ulSM_A4;
+  ULONG  ulSM_A5;
+  ULONG  ulSM_A6;
+  ULONG  ulSM_A7;
+  UCHAR  ucSM_A0_sign;
+  UCHAR  ucSM_A1_sign;
+  UCHAR  ucSM_A2_sign;
+  UCHAR  ucSM_A3_sign;
+  UCHAR  ucSM_A4_sign;
+  UCHAR  ucSM_A5_sign;
+  UCHAR  ucSM_A6_sign;
+  UCHAR  ucSM_A7_sign;
+  ULONG  ulMargin_RO_a;
+  ULONG  ulMargin_RO_b;
+  ULONG  ulMargin_RO_c;
+  ULONG  ulMargin_fixed;
+  ULONG  ulMargin_Fmax_mean;
+  ULONG  ulMargin_plat_mean;
+  ULONG  ulMargin_Fmax_sigma;
+  ULONG  ulMargin_plat_sigma;
+  ULONG  ulMargin_DC_sigma;
+  ULONG  ulLoadLineSlop;
+  ULONG  ulaTDClimitPerDPM[8];
+  ULONG  ulaNoCalcVddcPerDPM[8];
+  ULONG  ulAVFS_meanNsigma_Acontant0;
+  ULONG  ulAVFS_meanNsigma_Acontant1;
+  ULONG  ulAVFS_meanNsigma_Acontant2;
+  USHORT usAVFS_meanNsigma_DC_tol_sigma;
+  USHORT usAVFS_meanNsigma_Platform_mean;
+  USHORT usAVFS_meanNsigma_Platform_sigma;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a2;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSON_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_b;
+  USHORT usMaxVoltage_0_25mv;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSON;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSON;
+  USHORT usPSM_Age_ComFactor;
+  UCHAR  ucEnableApplyAVFS_CKS_OFF_Voltage;
+  UCHAR  ucReserved;
+}ATOM_ASIC_PROFILING_INFO_V3_6;
+
 
 typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{
   ULONG  ulMaxSclkFreq;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 586f73276226..92912ab20944 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -633,6 +633,8 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = FIJI_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = FIJI_VOLTAGE_CONTROL_NONE;
 
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = FIJI_VOLTAGE_CONTROL_BY_SVID2;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index fa208ada6892..efb77eda7508 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -306,10 +306,14 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 {
 	PHM_FUNC_CHECK(hwmgr);
 
-	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+	if (display_config == NULL)
 		return -EINVAL;
 
 	hwmgr->display_config = *display_config;
+
+	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+		return -EINVAL;
+
 	/* to do pass other display configuration in furture */
 
 	if (hwmgr->hwmgr_func->store_cc6_data)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index 1400bc420881..91e25f942d90 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -732,7 +732,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
 			table->Smio[level] |=
 				data->mvdd_voltage_table.entries[level].smio_low;
 		}
-		table->SmioMask2 = data->vddci_voltage_table.mask_low;
+		table->SmioMask2 = data->mvdd_voltage_table.mask_low;
 
 		table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count);
 	}
@@ -1296,7 +1296,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	}
 
 	mem_level->MclkFrequency = clock;
-	mem_level->StutterEnable = 0;
 	mem_level->EnabledForThrottle = 1;
 	mem_level->EnabledForActivity = 0;
 	mem_level->UpHyst = 0;
@@ -1304,7 +1303,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	mem_level->VoltageDownHyst = 0;
 	mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target;
 	mem_level->StutterEnable = false;
-
 	mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
 	data->display_timing.num_existing_displays = info.display_count;
@@ -1363,7 +1361,7 @@ static int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
 	 * a higher state by default such that we are not effected by
 	 * up threshold or and MCLK DPM latency.
 	 */
-	levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target;
+	levels[0].ActivityLevel = 0x1f;
 	CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel);
 
 	data->smc_state_table.MemoryDpmLevelCount =
@@ -1424,22 +1422,19 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 
 	table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
 
-	if (!data->sclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0,
-		 * already converted to SMC_UL */
-		sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_sclk,
-				table->ACPILevel.SclkFrequency,
-				&table->ACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDC voltage value "
-				"in Clock Dependency Table", );
-	} else {
-		sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
-		table->ACPILevel.MinVoltage =
-				data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0,
+	 * already converted to SMC_UL */
+	sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_sclk,
+			sclk_frequency,
+			&table->ACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDC voltage value "
+			"in Clock Dependency Table",
+			);
+
 
 	result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency,  &(table->ACPILevel.SclkSetting));
 	PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result);
@@ -1464,24 +1459,18 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac);
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate);
 
-	if (!data->mclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
-		table->MemoryACPILevel.MclkFrequency =
-				data->dpm_table.mclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_mclk,
-				table->MemoryACPILevel.MclkFrequency,
-				&table->MemoryACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDCI voltage value "
-				"in Clock Dependency Table",
-				);
-	} else {
-		table->MemoryACPILevel.MclkFrequency =
-				data->vbios_boot_state.mclk_bootup_value;
-		table->MemoryACPILevel.MinVoltage =
-				data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
+	table->MemoryACPILevel.MclkFrequency =
+			data->dpm_table.mclk_table.dpm_levels[0].value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_mclk,
+			table->MemoryACPILevel.MclkFrequency,
+			&table->MemoryACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDCI voltage value "
+			"in Clock Dependency Table",
+			);
 
 	us_mvdd = 0;
 	if ((POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control) ||
@@ -1526,6 +1515,7 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->VceLevelCount = (uint8_t)(mm_table->count);
 	table->VceBootLevel = 0;
@@ -1535,9 +1525,18 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 		table->VceLevel[count].MinVoltage = 0;
 		table->VceLevel[count].MinVoltage |=
 				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+
 		table->VceLevel[count].MinVoltage |=
-				((mm_table->entries[count].vddc - data->vddc_vddci_delta) *
-						VOLTAGE_SCALE) << VDDCI_SHIFT;
+				(vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/*retrieve divider value for VBIOS */
@@ -1566,6 +1565,7 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->SamuBootLevel = 0;
 	table->SamuLevelCount = (uint8_t)(mm_table->count);
@@ -1576,8 +1576,16 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 		table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
 		table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1660,6 +1668,7 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->UvdLevelCount = (uint8_t)(mm_table->count);
 	table->UvdBootLevel = 0;
@@ -1670,8 +1679,16 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
 		table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1692,8 +1709,8 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage);
-
 	}
+
 	return result;
 }
 
@@ -1761,12 +1778,9 @@ static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr)
 
 static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 {
-	uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks,
-			volt_with_cks, value;
-	uint16_t clock_freq_u16;
+	uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-	uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2,
-			volt_offset = 0;
+	uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
@@ -1778,50 +1792,44 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 	 * if the part is SS or FF. if RO >= 1660MHz, part is FF.
 	 */
 	efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (146 * 4));
-	efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (148 * 4));
+			ixSMU_EFUSE_0 + (67 * 4));
 	efuse &= 0xFF000000;
 	efuse = efuse >> 24;
-	efuse2 &= 0xF;
 
-	if (efuse2 == 1)
-		ro = (2300 - 1350) * efuse / 255 + 1350;
-	else
-		ro = (2500 - 1000) * efuse / 255 + 1000;
-
-	if (ro >= 1660)
-		type = 0;
-	else
-		type = 1;
+	if (hwmgr->chip_id == CHIP_POLARIS10) {
+		min = 1000;
+		max = 2300;
+	} else {
+		min = 1100;
+		max = 2100;
+	}
 
-	/* Populate Stretch amount */
-	data->smc_state_table.ClockStretcherAmount = stretch_amount;
+	ro = efuse * (max -min)/255 + min;
 
 	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
 	for (i = 0; i < sclk_table->count; i++) {
 		data->smc_state_table.Sclk_CKS_masterEn0_7 |=
 				sclk_table->entries[i].cks_enable << i;
-		volt_without_cks = (uint32_t)((14041 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 /
-			(4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000)));
-		volt_with_cks = (uint32_t)((13946 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 /
-			(3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000)));
+		if (hwmgr->chip_id == CHIP_POLARIS10) {
+			volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \
+						(2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000));
+			volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * 3232 - (ro - 65) * 1000000) / \
+					(2522480 - sclk_table->entries[i].clk/100 * 115764/100));
+		} else {
+			volt_without_cks = (uint32_t)((2416794800U + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \
+						(2625416 - (sclk_table->entries[i].clk/100) * (12586807/10000)));
+			volt_with_cks = (uint32_t)((2999656000U - sclk_table->entries[i].clk/100 * 392803 - (ro - 44) * 1000000) / \
+					(3422454 - sclk_table->entries[i].clk/100 * (18886376/10000)));
+		}
+
 		if (volt_without_cks >= volt_with_cks)
 			volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
-					sclk_table->entries[i].cks_voffset) * 100 / 625) + 1);
+					sclk_table->entries[i].cks_voffset) * 100 + 624) / 625);
+
 		data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
 	}
 
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			STRETCH_ENABLE, 0x0);
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x1);
-	/* PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, staticEnable, 0x1); */
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x0);
-
+	data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6;
 	/* Populate CKS Lookup Table */
 	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
 		stretch_amount2 = 0;
@@ -1835,69 +1843,6 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 				return -EINVAL);
 	}
 
-	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL);
-	value &= 0xFFC2FF87;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][0];
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][1];
-	clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(data->smc_state_table.
-			GraphicsLevel[data->smc_state_table.GraphicsDpmLevelCount - 1].SclkSetting.SclkFrequency) / 100);
-	if (polaris10_clock_stretcher_lookup_table[stretch_amount2][0] < clock_freq_u16
-	&& polaris10_clock_stretcher_lookup_table[stretch_amount2][1] > clock_freq_u16) {
-		/* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 16;
-		/* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][2]) << 18;
-		/* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */
-		value |= (polaris10_clock_stretch_amount_conversion
-				[polaris10_clock_stretcher_lookup_table[stretch_amount2][3]]
-				 [stretch_amount]) << 3;
-	}
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq);
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq);
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |=
-			(polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 7;
-
-	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL, value);
-
-	/* Populate DDT Lookup Table */
-	for (i = 0; i < 4; i++) {
-		/* Assign the minimum and maximum VID stored
-		 * in the last row of Clock Stretcher Voltage Table.
-		 */
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].minVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][2];
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].maxVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][3];
-		/* Loop through each SCLK and check the frequency
-		 * to see if it lies within the frequency for clock stretcher.
-		 */
-		for (j = 0; j < data->smc_state_table.GraphicsDpmLevelCount; j++) {
-			cks_setting = 0;
-			clock_freq = PP_SMC_TO_HOST_UL(
-					data->smc_state_table.GraphicsLevel[j].SclkSetting.SclkFrequency);
-			/* Check the allowed frequency against the sclk level[j].
-			 *  Sclk's endianness has already been converted,
-			 *  and it's in 10Khz unit,
-			 *  as opposed to Data table, which is in Mhz unit.
-			 */
-			if (clock_freq >= (polaris10_clock_stretcher_ddt_table[type][i][0]) * 100) {
-				cks_setting |= 0x2;
-				if (clock_freq < (polaris10_clock_stretcher_ddt_table[type][i][1]) * 100)
-					cks_setting |= 0x1;
-			}
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting
-							|= cks_setting << (j * 2);
-		}
-		CONVERT_FROM_HOST_TO_SMC_US(
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting);
-	}
-
 	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL);
 	value &= 0xFFFFFFFE;
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value);
@@ -1956,6 +1901,90 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+
+int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	SMU74_Discrete_DpmTable  *table = &(data->smc_state_table);
+	int result = 0;
+	struct pp_atom_ctrl__avfs_parameters avfs_params = {0};
+	AVFS_meanNsigma_t AVFS_meanNsigma = { {0} };
+	AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} };
+	uint32_t tmp, i;
+	struct pp_smumgr *smumgr = hwmgr->smumgr;
+	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+			table_info->vdd_dep_on_sclk;
+
+
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
+		return result;
+
+	result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
+
+	if (0 == result) {
+		table->BTCGB_VDROOP_TABLE[0].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
+		table->BTCGB_VDROOP_TABLE[0].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
+		table->BTCGB_VDROOP_TABLE[0].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
+		table->BTCGB_VDROOP_TABLE[1].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0);
+		table->BTCGB_VDROOP_TABLE[1].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1);
+		table->BTCGB_VDROOP_TABLE[1].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2);
+		table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1);
+		table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2);
+		table->AVFSGB_VDROOP_TABLE[0].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b);
+		table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[0].m2_shift  = 12;
+		table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+		table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+		table->AVFSGB_VDROOP_TABLE[1].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+		table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[1].m2_shift  = 12;
+		table->MaxVoltage                = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv);
+		AVFS_meanNsigma.Aconstant[0]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0);
+		AVFS_meanNsigma.Aconstant[1]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1);
+		AVFS_meanNsigma.Aconstant[2]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2);
+		AVFS_meanNsigma.DC_tol_sigma      = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma);
+		AVFS_meanNsigma.Platform_mean     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean);
+		AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor);
+		AVFS_meanNsigma.Platform_sigma     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma);
+
+		for (i = 0; i < NUM_VFT_COLUMNS; i++) {
+			AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625);
+			AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100);
+		}
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma),
+				&tmp, data->sram_end);
+
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_meanNsigma,
+					sizeof(AVFS_meanNsigma_t),
+					data->sram_end);
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable),
+				&tmp, data->sram_end);
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_SclkOffset,
+					sizeof(AVFS_Sclk_Offset_t),
+					data->sram_end);
+
+		data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT);
+		data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false;
+	}
+	return result;
+}
+
+
 /**
 * Initializes the SMC table and uploads it
 *
@@ -2056,6 +2085,10 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr)
 				"Failed to populate Clock Stretcher Data Table!",
 				return result);
 	}
+
+	result = polaris10_populate_avfs_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;);
+
 	table->CurrSclkPllRange = 0xff;
 	table->GraphicsVoltageChangeEnable  = 1;
 	table->GraphicsThermThrottleEnable  = 1;
@@ -2252,6 +2285,9 @@ static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t soft_register_value = 0;
+	uint32_t handshake_disables_offset = data->soft_regs_start
+				+ offsetof(SMU74_SoftRegisters, HandshakeDisables);
 
 	/* enable SCLK dpm */
 	if (!data->sclk_dpm_key_disabled)
@@ -2262,6 +2298,12 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 
 	/* enable MCLK dpm */
 	if (0 == data->mclk_dpm_key_disabled) {
+/* Disable UVD - SMU handshake for MCLK. */
+		soft_register_value = cgs_read_ind_register(hwmgr->device,
+					CGS_IND_REG__SMC, handshake_disables_offset);
+		soft_register_value |= SMU7_UVD_MCLK_HANDSHAKE_DISABLE;
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+				handshake_disables_offset, soft_register_value);
 
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -2269,7 +2311,6 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Failed to enable MCLK DPM during DPM Start Function!",
 				return -1);
 
-
 		PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
 
 		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x5);
@@ -2471,6 +2512,8 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable VR hot GPIO interrupt!", result = tmp_result);
 
+	smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay);
+
 	tmp_result = polaris10_enable_sclk_control(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable SCLK control!", result = tmp_result);
@@ -2606,6 +2649,7 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_FanSpeedInTableIsRPM);
+
 	if (hwmgr->chip_id == CHIP_POLARIS11)
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SPLLShutdownSupport);
@@ -2638,7 +2682,7 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint16_t vv_id;
-	uint16_t vddc = 0;
+	uint32_t vddc = 0;
 	uint16_t i, j;
 	uint32_t sclk = 0;
 	struct phm_ppt_v1_information *table_info =
@@ -2669,8 +2713,9 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
 						continue);
 
 
-			/* need to make sure vddc is less than 2v or else, it could burn the ASIC. */
-			PP_ASSERT_WITH_CODE((vddc < 2000 && vddc != 0),
+			/* need to make sure vddc is less than 2v or else, it could burn the ASIC.
+			 * real voltage level in unit of 0.01mv */
+			PP_ASSERT_WITH_CODE((vddc < 200000 && vddc != 0),
 					"Invalid VDDC value", result = -EINVAL;);
 
 			/* the voltage should not be zero nor equal to leakage ID */
@@ -2896,6 +2941,31 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+int polaris10_patch_voltage_workaround(struct pp_hwmgr *hwmgr)
+{
+	struct phm_ppt_v1_information *table_info =
+		       (struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table =
+			table_info->vdd_dep_on_mclk;
+	struct phm_ppt_v1_voltage_lookup_table *lookup_table =
+			table_info->vddc_lookup_table;
+	uint32_t i;
+
+	if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7) {
+		if (lookup_table->entries[dep_mclk_table->entries[dep_mclk_table->count-1].vddInd].us_vdd >= 1000)
+			return 0;
+
+		for (i = 0; i < lookup_table->count; i++) {
+			if (lookup_table->entries[i].us_vdd < 0xff01 && lookup_table->entries[i].us_vdd >= 1000) {
+				dep_mclk_table->entries[dep_mclk_table->count-1].vddInd = (uint8_t) i;
+				return 0;
+			}
+		}
+	}
+	return 0;
+}
+
+
 int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -2938,6 +3008,11 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 
+	data->enable_tdc_limit_feature = true;
+	data->enable_pkg_pwr_tracking_feature = true;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+	data->mclk_stutter_mode_threshold = 40000;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
@@ -2962,8 +3037,13 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 			data->vddci_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
 	}
 
+	if (table_info->cac_dtp_table->usClockStretchAmount != 0)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_ClockStretcher);
+
 	polaris10_set_features_platform_caps(hwmgr);
 
+	polaris10_patch_voltage_workaround(hwmgr);
 	polaris10_init_dpm_defaults(hwmgr);
 
 	/* Get leakage voltage based on leakage ID. */
@@ -4333,6 +4413,15 @@ static int polaris10_notify_link_speed_change_after_state_change(
 	return 0;
 }
 
+static int polaris10_notify_smc_display(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+		(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+	return (smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ?  0 : -EINVAL;
+}
+
 static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input)
 {
 	int tmp_result, result = 0;
@@ -4381,6 +4470,11 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i
 			"Failed to program memory timing parameters!",
 			result = tmp_result);
 
+	tmp_result = polaris10_notify_smc_display(hwmgr);
+	PP_ASSERT_WITH_CODE((0 == tmp_result),
+			"Failed to notify smc display settings!",
+			result = tmp_result);
+
 	tmp_result = polaris10_unfreeze_sclk_mclk_dpm(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to unfreeze SCLK MCLK DPM!",
@@ -4415,6 +4509,7 @@ static int polaris10_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_
 			PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm);
 }
 
+
 int polaris10_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display)
 {
 	PPSMC_Msg msg = has_display ? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay;
@@ -4434,8 +4529,6 @@ int polaris10_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwm
 
 	if (num_active_displays > 1)  /* to do && (pHwMgr->pPECI->displayConfiguration.bMultiMonitorInSync != TRUE)) */
 		polaris10_notify_smc_display_change(hwmgr, false);
-	else
-		polaris10_notify_smc_display_change(hwmgr, true);
 
 	return 0;
 }
@@ -4476,6 +4569,8 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 	frame_time_in_us = 1000000 / refresh_rate;
 
 	pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us;
+	data->frame_time_x2 = frame_time_in_us * 2 / 100;
+
 	display_gap2 = pre_vbi_time_in_us * (ref_clock / 100);
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL2, display_gap2);
@@ -4484,8 +4579,6 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start + offsetof(SMU74_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us));
 
-	polaris10_notify_smc_display_change(hwmgr, num_active_displays != 0);
-
 	return 0;
 }
 
@@ -4597,7 +4690,7 @@ int polaris10_upload_mc_firmware(struct pp_hwmgr *hwmgr)
 		return 0;
 	}
 
-	data->need_long_memory_training = true;
+	data->need_long_memory_training = false;
 
 /*
  *	PPMCME_FirmwareDescriptorEntry *pfd = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
index beedf35cbfa6..afc3434822d1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
@@ -312,6 +312,10 @@ struct polaris10_hwmgr {
 
 	/* soft pptable for re-uploading into smu */
 	void *soft_pp_table;
+
+	uint32_t                              avfs_vdroop_override_setting;
+	bool                                  apply_avfs_cks_off_voltage;
+	uint32_t                              frame_time_x2;
 };
 
 /* To convert to Q8.8 format for firmware */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
index aba167f7d167..b206632d4650 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
@@ -625,10 +625,14 @@ static int tf_polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr,
 	int ret;
 	struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr);
 	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 
-	if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS)
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
 		return 0;
 
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+			PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting);
+
 	ret = (smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs) == 0) ?
 			0 : -1;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
index 58742e0d1492..a3c38bbd1e94 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
@@ -44,6 +44,20 @@ bool acpi_atcs_functions_supported(void *device, uint32_t index)
 	return result == 0 ? (output_buf.function_bits & (1 << (index - 1))) != 0 : false;
 }
 
+bool acpi_atcs_notify_pcie_device_ready(void *device)
+{
+	int32_t temp_buffer = 1;
+
+	return cgs_call_acpi_method(device, CGS_ACPI_METHOD_ATCS,
+				ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION,
+						&temp_buffer,
+						NULL,
+						0,
+						sizeof(temp_buffer),
+						0);
+}
+
+
 int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 {
 	struct atcs_pref_req_input atcs_input;
@@ -52,7 +66,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 	int result;
 	struct cgs_system_info info = {0};
 
-	if (!acpi_atcs_functions_supported(device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST))
+	if( 0 != acpi_atcs_notify_pcie_device_ready(device))
 		return -EINVAL;
 
 	info.size = sizeof(struct cgs_system_info);
@@ -77,7 +91,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 						ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST,
 						&atcs_input,
 						&atcs_output,
-						0,
+						1,
 						sizeof(atcs_input),
 						sizeof(atcs_output));
 		if (result != 0)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index da9f5f1b6dc2..90b35c5c10a4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -1256,7 +1256,7 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
 }
 
 int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
-				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage)
+				uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage)
 {
 
 	int result;
@@ -1274,7 +1274,7 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_
 	if (0 != result)
 		return result;
 
-	*voltage = get_voltage_info_param_space.usVoltageLevel;
+	*voltage = ((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel;
 
 	return result;
 }
@@ -1302,3 +1302,46 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr
 
 	return 0;
 }
+
+int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param)
+{
+	ATOM_ASIC_PROFILING_INFO_V3_6 *profile = NULL;
+
+	if (param == NULL)
+		return -EINVAL;
+
+	profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *)
+			cgs_atom_get_data_table(hwmgr->device,
+					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
+					NULL, NULL, NULL);
+	if (!profile)
+		return -1;
+
+	param->ulAVFS_meanNsigma_Acontant0 = profile->ulAVFS_meanNsigma_Acontant0;
+	param->ulAVFS_meanNsigma_Acontant1 = profile->ulAVFS_meanNsigma_Acontant1;
+	param->ulAVFS_meanNsigma_Acontant2 = profile->ulAVFS_meanNsigma_Acontant2;
+	param->usAVFS_meanNsigma_DC_tol_sigma = profile->usAVFS_meanNsigma_DC_tol_sigma;
+	param->usAVFS_meanNsigma_Platform_mean = profile->usAVFS_meanNsigma_Platform_mean;
+	param->usAVFS_meanNsigma_Platform_sigma = profile->usAVFS_meanNsigma_Platform_sigma;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a0 = profile->ulGB_VDROOP_TABLE_CKSOFF_a0;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a1 = profile->ulGB_VDROOP_TABLE_CKSOFF_a1;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a2 = profile->ulGB_VDROOP_TABLE_CKSOFF_a2;
+	param->ulGB_VDROOP_TABLE_CKSON_a0 = profile->ulGB_VDROOP_TABLE_CKSON_a0;
+	param->ulGB_VDROOP_TABLE_CKSON_a1 = profile->ulGB_VDROOP_TABLE_CKSON_a1;
+	param->ulGB_VDROOP_TABLE_CKSON_a2 = profile->ulGB_VDROOP_TABLE_CKSON_a2;
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	param->usAVFSGB_FUSE_TABLE_CKSOFF_m2 = profile->usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_b = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	param->ulAVFSGB_FUSE_TABLE_CKSON_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	param->usAVFSGB_FUSE_TABLE_CKSON_m2 = profile->usAVFSGB_FUSE_TABLE_CKSON_m2;
+	param->ulAVFSGB_FUSE_TABLE_CKSON_b = profile->ulAVFSGB_FUSE_TABLE_CKSON_b;
+	param->usMaxVoltage_0_25mv = profile->usMaxVoltage_0_25mv;
+	param->ucEnableGB_VDROOP_TABLE_CKSOFF = profile->ucEnableGB_VDROOP_TABLE_CKSOFF;
+	param->ucEnableGB_VDROOP_TABLE_CKSON = profile->ucEnableGB_VDROOP_TABLE_CKSON;
+	param->ucEnableGB_FUSE_TABLE_CKSOFF = profile->ucEnableGB_FUSE_TABLE_CKSOFF;
+	param->ucEnableGB_FUSE_TABLE_CKSON = profile->ucEnableGB_FUSE_TABLE_CKSON;
+	param->usPSM_Age_ComFactor = profile->usPSM_Age_ComFactor;
+	param->ucEnableApplyAVFS_CKS_OFF_Voltage = profile->ucEnableApplyAVFS_CKS_OFF_Voltage;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index d24ebb566905..1e35a9625baf 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -250,6 +250,35 @@ struct pp_atomctrl_gpio_pin_assignment {
 };
 typedef struct pp_atomctrl_gpio_pin_assignment pp_atomctrl_gpio_pin_assignment;
 
+struct pp_atom_ctrl__avfs_parameters {
+	uint32_t  ulAVFS_meanNsigma_Acontant0;
+	uint32_t  ulAVFS_meanNsigma_Acontant1;
+	uint32_t  ulAVFS_meanNsigma_Acontant2;
+	uint16_t usAVFS_meanNsigma_DC_tol_sigma;
+	uint16_t usAVFS_meanNsigma_Platform_mean;
+	uint16_t usAVFS_meanNsigma_Platform_sigma;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a2;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSON_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_b;
+	uint16_t  usMaxVoltage_0_25mv;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSON;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSON;
+	uint16_t usPSM_Age_ComFactor;
+	uint8_t  ucEnableApplyAVFS_CKS_OFF_Voltage;
+	uint8_t  ucReserved;
+};
+
 extern bool atomctrl_get_pp_assign_pin(struct pp_hwmgr *hwmgr, const uint32_t pinId, pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment);
 extern int atomctrl_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
 extern uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr);
@@ -276,7 +305,10 @@ extern int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr, uint32_t
 extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
 								uint8_t level);
 extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
-				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
+				uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage);
 extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table);
+
+extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param);
+
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index d27e8c40602a..5d0f655bf160 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -1302,7 +1302,7 @@ static int tonga_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
 			table->Smio[count] |=
 				data->mvdd_voltage_table.entries[count].smio_low;
 		}
-		table->SmioMask2 = data->vddci_voltage_table.mask_low;
+		table->SmioMask2 = data->mvdd_voltage_table.mask_low;
 
 		CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount);
 	}
@@ -4489,6 +4489,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vdd_ci_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->vdd_gfx_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = TONGA_VOLTAGE_CONTROL_NONE;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
 
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 				VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
index 671fdb4d615a..dccc859f638c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
@@ -302,7 +302,7 @@ static int init_dpm_2_parameters(
 			(((unsigned long)powerplay_table) + le16_to_cpu(powerplay_table->usPPMTableOffset));
 
 		if (0 != powerplay_table->usPPMTableOffset) {
-			if (1 == get_platform_power_management_table(hwmgr, atom_ppm_table)) {
+			if (get_platform_power_management_table(hwmgr, atom_ppm_table) == 0) {
 				phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_EnablePlatformPowerManagement);
 			}
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 28f571449495..77e8e33d5870 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -411,6 +411,8 @@ struct phm_cac_tdp_table {
 	uint8_t  ucVr_I2C_Line;
 	uint8_t  ucPlx_I2C_address;
 	uint8_t  ucPlx_I2C_Line;
+	uint32_t usBoostPowerLimit;
+	uint8_t  ucCKS_LDO_REFSEL;
 };
 
 struct phm_ppm_table {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
index 0c6a413eaa5b..b8f4b73c322e 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
@@ -27,6 +27,7 @@
 
 #pragma pack(push, 1)
 
+#define PPSMC_MSG_SetGBDroopSettings          ((uint16_t) 0x305)
 
 #define PPSMC_SWSTATE_FLAG_DC                           0x01
 #define PPSMC_SWSTATE_FLAG_UVD                          0x02
@@ -391,6 +392,8 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_SetGpuPllDfsForSclk         ((uint16_t) 0x300)
 #define PPSMC_MSG_Didt_Block_Function		  ((uint16_t) 0x301)
 
+#define PPSMC_MSG_SetVBITimeout               ((uint16_t) 0x306)
+
 #define PPSMC_MSG_SecureSRBMWrite             ((uint16_t) 0x600)
 #define PPSMC_MSG_SecureSRBMRead              ((uint16_t) 0x601)
 #define PPSMC_MSG_SetAddress                  ((uint16_t) 0x800)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
index 3bd5e69b9045..3df5de2cdab0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
@@ -26,3 +26,4 @@ extern bool acpi_atcs_functions_supported(void *device,
 extern int acpi_pcie_perf_request(void *device,
 						uint8_t perf_req,
 						bool advertise);
+extern bool acpi_atcs_notify_pcie_device_ready(void *device);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74.h b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
index 1a12d85b8e97..fd10a9fa843d 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
@@ -34,6 +34,30 @@
 #define SMU__NUM_LCLK_DPM_LEVELS 8
 #define SMU__NUM_PCIE_DPM_LEVELS 8
 
+#define EXP_M1  35
+#define EXP_M2  92821
+#define EXP_B   66629747
+
+#define EXP_M1_1  365
+#define EXP_M2_1  658700
+#define EXP_B_1   305506134
+
+#define EXP_M1_2  189
+#define EXP_M2_2  379692
+#define EXP_B_2   194609469
+
+#define EXP_M1_3  99
+#define EXP_M2_3  217915
+#define EXP_B_3   122255994
+
+#define EXP_M1_4  51
+#define EXP_M2_4  122643
+#define EXP_B_4   74893384
+
+#define EXP_M1_5  423
+#define EXP_M2_5  1103326
+#define EXP_B_5   728122621
+
 enum SID_OPTION {
 	SID_OPTION_HI,
 	SID_OPTION_LO,
@@ -548,20 +572,20 @@ struct SMU74_Firmware_Header {
 	uint32_t CacConfigTable;
 	uint32_t CacStatusTable;
 
-
 	uint32_t mcRegisterTable;
 
-
 	uint32_t mcArbDramTimingTable;
 
-
-
-
 	uint32_t PmFuseTable;
 	uint32_t Globals;
 	uint32_t ClockStretcherTable;
 	uint32_t VftTable;
-	uint32_t Reserved[21];
+	uint32_t Reserved1;
+	uint32_t AvfsTable;
+	uint32_t AvfsCksOffGbvTable;
+	uint32_t AvfsMeanNSigma;
+	uint32_t AvfsSclkOffsetTable;
+	uint32_t Reserved[16];
 	uint32_t Signature;
 };
 
@@ -701,8 +725,6 @@ VR Config info is contained in dpmTable.VRConfig */
 struct SMU_ClockStretcherDataTableEntry {
 	uint8_t minVID;
 	uint8_t maxVID;
-
-
 	uint16_t setting;
 };
 typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry;
@@ -769,6 +791,43 @@ struct VFT_TABLE_t {
 typedef struct VFT_TABLE_t VFT_TABLE_t;
 
 
+/* Total margin, root mean square of Fmax + DC + Platform */
+struct AVFS_Margin_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_Margin_t AVFS_Margin_t;
+
+#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2
+#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2
+
+struct GB_VDROOP_TABLE_t {
+	int32_t a0;
+	int32_t a1;
+	int32_t a2;
+	uint32_t spare;
+};
+typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t;
+
+struct AVFS_CksOff_Gbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t;
+
+struct AVFS_meanNsigma_t {
+	uint32_t Aconstant[3];
+	uint16_t DC_tol_sigma;
+	uint16_t Platform_mean;
+	uint16_t Platform_sigma;
+	uint16_t PSM_Age_CompFactor;
+	uint8_t  Static_Voltage_Offset[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t;
+
+struct AVFS_Sclk_Offset_t {
+	uint16_t Sclk_Offset[8];
+};
+typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t;
+
 #endif
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
index 0dfe82336dc7..899d6d8108c2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
@@ -223,6 +223,16 @@ struct SMU74_Discrete_StateInfo {
 
 typedef struct SMU74_Discrete_StateInfo SMU74_Discrete_StateInfo;
 
+struct SMU_QuadraticCoeffs {
+	int32_t m1;
+	uint32_t b;
+
+	int16_t m2;
+	uint8_t m1_shift;
+	uint8_t m2_shift;
+};
+typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
+
 struct SMU74_Discrete_DpmTable {
 
 	SMU74_PIDController                  GraphicsPIDController;
@@ -258,7 +268,15 @@ struct SMU74_Discrete_DpmTable {
 	uint8_t                             ThermOutPolarity;
 	uint8_t                             ThermOutMode;
 	uint8_t                             BootPhases;
-	uint32_t                            Reserved[4];
+
+	uint8_t                             VRHotLevel;
+	uint8_t                             LdoRefSel;
+	uint8_t                             Reserved1[2];
+	uint16_t                            FanStartTemperature;
+	uint16_t                            FanStopTemperature;
+	uint16_t                            MaxVoltage;
+	uint16_t                            Reserved2;
+	uint32_t                            Reserved[1];
 
 	SMU74_Discrete_GraphicsLevel        GraphicsLevel[SMU74_MAX_LEVELS_GRAPHICS];
 	SMU74_Discrete_MemoryLevel          MemoryACPILevel;
@@ -347,6 +365,8 @@ struct SMU74_Discrete_DpmTable {
 
 	uint32_t                            CurrSclkPllRange;
 	sclkFcwRange_t                      SclkFcwRangeTable[NUM_SCLK_RANGE];
+	GB_VDROOP_TABLE_t                   BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES];
+	SMU_QuadraticCoeffs                 AVFSGB_VDROOP_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES];
 };
 
 typedef struct SMU74_Discrete_DpmTable SMU74_Discrete_DpmTable;
@@ -550,16 +570,6 @@ struct SMU7_AcpiScoreboard {
 
 typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard;
 
-struct SMU_QuadraticCoeffs {
-	int32_t m1;
-	uint32_t b;
-
-	int16_t m2;
-	uint8_t m1_shift;
-	uint8_t m2_shift;
-};
-typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
-
 struct SMU74_Discrete_PmFuses {
 	uint8_t BapmVddCVidHiSidd[8];
 	uint8_t BapmVddCVidLoSidd[8];
@@ -821,6 +831,17 @@ typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard;
 #define DB_PCC_SHIFT 26 
 #define DB_EDC_SHIFT 27
 
+#define BTCGB0_Vdroop_Enable_MASK  0x1
+#define BTCGB1_Vdroop_Enable_MASK  0x2
+#define AVFSGB0_Vdroop_Enable_MASK 0x4
+#define AVFSGB1_Vdroop_Enable_MASK 0x8
+
+#define BTCGB0_Vdroop_Enable_SHIFT  0
+#define BTCGB1_Vdroop_Enable_SHIFT  1
+#define AVFSGB0_Vdroop_Enable_SHIFT 2
+#define AVFSGB1_Vdroop_Enable_SHIFT 3
+
+
 #pragma pack(pop)
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 043b6ac09d5f..5dba7c509710 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -52,19 +52,18 @@
 static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = {
 	/*  Min      pcie   DeepSleep Activity  CgSpll      CgSpll    CcPwr  CcPwr  Sclk         Enabled      Enabled                       Voltage    Power */
 	/* Voltage, DpmLevel, DivId,  Level,  FuncCntl3,  FuncCntl4,  DynRm, DynRm1 Did, Padding,ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */
-	{ 0x3c0fd047, 0x00, 0x03, 0x1e00, 0x00200410, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x30750000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xa00fd047, 0x01, 0x04, 0x1e00, 0x00800510, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x409c0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x0410d047, 0x01, 0x00, 0x1e00, 0x00600410, 0x87020000, 0, 0, 0x0e, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x50c30000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x6810d047, 0x01, 0x00, 0x1e00, 0x00800410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x60ea0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xcc10d047, 0x01, 0x00, 0x1e00, 0x00e00410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xe8fd0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x3011d047, 0x01, 0x00, 0x1e00, 0x00400510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x70110100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x9411d047, 0x01, 0x00, 0x1e00, 0x00a00510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xf8240100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xf811d047, 0x01, 0x00, 0x1e00, 0x00000610, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x80380100, 0, 0, 0, 0, 0, 0, 0 } }
+	{ 0x100ea446, 0x00, 0x03, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x30750000, 0x3000, 0, 0x2600, 0, 0, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0x400ea446, 0x01, 0x04, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x409c0000, 0x2000, 0, 0x1e00, 1, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x740ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x50c30000, 0x2800, 0, 0x2000, 1, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } },
+	{ 0xa40ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x60ea0000, 0x3000, 0, 0x2600, 1, 1, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0xd80ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x70110100, 0x3800, 0, 0x2c00, 1, 1, 0x0004, 0x1203, 0xffff, 0x3600, 0xc9e2, 0x2e00 } },
+	{ 0x3c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x80380100, 0x2000, 0, 0x1e00, 2, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x6c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x905f0100, 0x2400, 0, 0x1e00, 2, 1, 0x0004, 0x8901, 0xffff, 0x2300, 0x314c, 0x1d00 } },
+	{ 0xa00fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0xa0860100, 0x2800, 0, 0x2000, 2, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } }
 };
 
 static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 =
-	{0x50140000, 0x50140000, 0x00320000, 0x00, 0x00,
-	 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x0000, 0x00, 0x00};
+	{0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00};
 
 /**
 * Set the address for reading/writing the SMC SRAM space.
@@ -219,6 +218,18 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 	&& (0x20100 <= cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMC_PC_C)));
 }
 
+static bool polaris10_is_hw_avfs_present(struct pp_smumgr *smumgr)
+{
+	uint32_t efuse;
+
+	efuse = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMU_EFUSE_0 + (49*4));
+	efuse &= 0x00000001;
+	if (efuse)
+		return true;
+
+	return false;
+}
+
 /**
 * Send a message to the SMC, and wait for its response.
 *
@@ -228,21 +239,27 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 */
 int polaris10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 {
+	int ret;
+
 	if (!polaris10_is_smc_ram_running(smumgr))
 		return -1;
 
+
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Previous Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
 
+	if (ret != 1)
+		printk("\n failed to send pre message %x ret is %d \n",  msg, ret);
 
 	cgs_write_register(smumgr->device, mmSMC_MESSAGE_0, msg);
 
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
+
+	if (ret != 1)
+		printk("\n failed to send message %x ret is %d \n",  msg, ret);
 
 	return 0;
 }
@@ -953,6 +970,11 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr)
 		(cgs_handle_t)smu_data->smu_buffer.handle);
 		return -1;);
 
+	if (polaris10_is_hw_avfs_present(smumgr))
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT;
+	else
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 39802c0539b6..3d34fc4ca826 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -266,9 +266,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_check_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	for_each_endpoint_of_node(dev->dev->of_node, ep_np) {
@@ -276,9 +277,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_attach_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index aef3ca8a81fa..016c191221f3 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -339,6 +339,8 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
 
 		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff,
 					     factor_reg);
+	} else {
+		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0);
 	}
 }
 
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c204ef32df16..9bb99e274d23 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1296,14 +1296,39 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes);
  */
 void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
 {
+	struct drm_device *dev = state->dev;
+	unsigned crtc_mask = 0;
+	struct drm_crtc *crtc;
 	int ret;
+	bool global = false;
+
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->acquire_ctx != state->acquire_ctx)
+			continue;
+
+		crtc_mask |= drm_crtc_mask(crtc);
+		crtc->acquire_ctx = NULL;
+	}
+
+	if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) {
+		global = true;
+
+		dev->mode_config.acquire_ctx = NULL;
+	}
 
 retry:
 	drm_modeset_backoff(state->acquire_ctx);
 
-	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
+	ret = drm_modeset_lock_all_ctx(dev, state->acquire_ctx);
 	if (ret)
 		goto retry;
+
+	drm_for_each_crtc(crtc, dev)
+		if (drm_crtc_mask(crtc) & crtc_mask)
+			crtc->acquire_ctx = state->acquire_ctx;
+
+	if (global)
+		dev->mode_config.acquire_ctx = state->acquire_ctx;
 }
 EXPORT_SYMBOL(drm_atomic_legacy_backoff);
 
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f6223f907c15..7f9901b7777b 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -31,7 +31,6 @@
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_iommu.h"
 
 /*
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 468498e3fec1..4c1fb3f8b5a6 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -34,7 +34,7 @@
 
 struct exynos_dp_device {
 	struct drm_encoder         encoder;
-	struct drm_connector       connector;
+	struct drm_connector       *connector;
 	struct drm_bridge          *ptn_bridge;
 	struct drm_device          *drm_dev;
 	struct device              *dev;
@@ -70,7 +70,7 @@ static int exynos_dp_poweroff(struct analogix_dp_plat_data *plat_data)
 static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data)
 {
 	struct exynos_dp_device *dp = to_dp(plat_data);
-	struct drm_connector *connector = &dp->connector;
+	struct drm_connector *connector = dp->connector;
 	struct drm_display_mode *mode;
 	int num_modes = 0;
 
@@ -103,6 +103,7 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data,
 	int ret;
 
 	drm_connector_register(connector);
+	dp->connector = connector;
 
 	/* Pre-empt DP connector creation if there's a bridge */
 	if (dp->ptn_bridge) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
index 011211e4167d..edbd98ff293e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_core.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
-#include "exynos_drm_fbdev.h"
 
 static LIST_HEAD(exynos_drm_subdrv_list);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 3efe1aa89416..d47216488985 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -30,7 +30,6 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
@@ -120,7 +119,6 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = {
 	.timing_base = 0x0,
 	.has_clksel = 1,
 	.has_limited_fmt = 1,
-	.has_hw_trigger = 1,
 };
 
 static struct fimd_driver_data exynos3_fimd_driver_data = {
@@ -171,14 +169,11 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = {
 	.lcdblk_vt_shift = 24,
 	.lcdblk_bypass_shift = 15,
 	.lcdblk_mic_bypass_shift = 11,
-	.trg_type = I80_HW_TRG,
 	.has_shadowcon = 1,
 	.has_vidoutcon = 1,
 	.has_vtsel = 1,
 	.has_mic_bypass = 1,
 	.has_dp_clk = 1,
-	.has_hw_trigger = 1,
-	.has_trigger_per_te = 1,
 };
 
 struct fimd_context {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 493552368295..8564c3da0d22 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -48,13 +48,13 @@
 
 /* registers for base address */
 #define G2D_SRC_BASE_ADDR		0x0304
-#define G2D_SRC_STRIDE_REG		0x0308
+#define G2D_SRC_STRIDE			0x0308
 #define G2D_SRC_COLOR_MODE		0x030C
 #define G2D_SRC_LEFT_TOP		0x0310
 #define G2D_SRC_RIGHT_BOTTOM		0x0314
 #define G2D_SRC_PLANE2_BASE_ADDR	0x0318
 #define G2D_DST_BASE_ADDR		0x0404
-#define G2D_DST_STRIDE_REG		0x0408
+#define G2D_DST_STRIDE			0x0408
 #define G2D_DST_COLOR_MODE		0x040C
 #define G2D_DST_LEFT_TOP		0x0410
 #define G2D_DST_RIGHT_BOTTOM		0x0414
@@ -563,7 +563,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 
 	switch (reg_offset) {
 	case G2D_SRC_BASE_ADDR:
-	case G2D_SRC_STRIDE_REG:
+	case G2D_SRC_STRIDE:
 	case G2D_SRC_COLOR_MODE:
 	case G2D_SRC_LEFT_TOP:
 	case G2D_SRC_RIGHT_BOTTOM:
@@ -573,7 +573,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 		reg_type = REG_TYPE_SRC_PLANE2;
 		break;
 	case G2D_DST_BASE_ADDR:
-	case G2D_DST_STRIDE_REG:
+	case G2D_DST_STRIDE:
 	case G2D_DST_COLOR_MODE:
 	case G2D_DST_LEFT_TOP:
 	case G2D_DST_RIGHT_BOTTOM:
@@ -968,8 +968,8 @@ static int g2d_check_reg_offset(struct device *dev,
 			} else
 				buf_info->types[reg_type] = BUF_TYPE_GEM;
 			break;
-		case G2D_SRC_STRIDE_REG:
-		case G2D_DST_STRIDE_REG:
+		case G2D_SRC_STRIDE:
+		case G2D_DST_STRIDE:
 			if (for_addr)
 				goto err;
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c
index 55f1d37c666a..77f12c00abf9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c
@@ -242,7 +242,7 @@ exynos_drm_plane_check_size(const struct exynos_drm_plane_config *config,
 	    state->v_ratio == (1 << 15))
 		height_ok = true;
 
-	if (width_ok & height_ok)
+	if (width_ok && height_ok)
 		return 0;
 
 	DRM_DEBUG_KMS("scaling mode is not supported");
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 32690332d441..103546834b60 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2365,16 +2365,16 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
 		task = get_pid_task(file->pid, PIDTYPE_PID);
 		if (!task) {
 			ret = -ESRCH;
-			goto out_put;
+			goto out_unlock;
 		}
 		seq_printf(m, "\nproc: %s\n", task->comm);
 		put_task_struct(task);
 		idr_for_each(&file_priv->context_idr, per_file_ctx,
 			     (void *)(unsigned long)m);
 	}
+out_unlock:
 	mutex_unlock(&dev->filelist_mutex);
 
-out_put:
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f313b4d8344f..85c4debf47e0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -512,6 +512,10 @@ void intel_detect_pch(struct drm_device *dev)
 				DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n");
 				WARN_ON(!IS_SKYLAKE(dev) &&
 					!IS_KABYLAKE(dev));
+			} else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) {
+				dev_priv->pch_type = PCH_KBP;
+				DRM_DEBUG_KMS("Found KabyPoint PCH\n");
+				WARN_ON(!IS_KABYLAKE(dev));
 			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
 				   (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) ||
 				   ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c334e902266..bc3f2e6842e7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -990,6 +990,7 @@ enum intel_pch {
 	PCH_CPT,	/* Cougarpoint PCH */
 	PCH_LPT,	/* Lynxpoint PCH */
 	PCH_SPT,        /* Sunrisepoint PCH */
+	PCH_KBP,        /* Kabypoint PCH */
 	PCH_NOP,
 };
 
@@ -2600,6 +2601,15 @@ struct drm_i915_cmd_table {
 
 #define IS_BXT_REVID(p, since, until) (IS_BROXTON(p) && IS_REVID(p, since, until))
 
+#define KBL_REVID_A0		0x0
+#define KBL_REVID_B0		0x1
+#define KBL_REVID_C0		0x2
+#define KBL_REVID_D0		0x3
+#define KBL_REVID_E0		0x4
+
+#define IS_KBL_REVID(p, since, until) \
+	(IS_KABYLAKE(p) && IS_REVID(p, since, until))
+
 /*
  * The genX designation typically refers to the render engine, so render
  * capability related checks should use IS_GEN, while display and other checks
@@ -2708,11 +2718,13 @@ struct drm_i915_cmd_table {
 #define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE		0x9c00
 #define INTEL_PCH_SPT_DEVICE_ID_TYPE		0xA100
 #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE		0x9D00
+#define INTEL_PCH_KBP_DEVICE_ID_TYPE		0xA200
 #define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
 #define INTEL_PCH_P3X_DEVICE_ID_TYPE		0x7000
 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
 
 #define INTEL_PCH_TYPE(dev) (__I915__(dev)->pch_type)
+#define HAS_PCH_KBP(dev) (INTEL_PCH_TYPE(dev) == PCH_KBP)
 #define HAS_PCH_SPT(dev) (INTEL_PCH_TYPE(dev) == PCH_SPT)
 #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
 #define HAS_PCH_LPT_LP(dev) (__I915__(dev)->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 425e721aac58..66571466e9a8 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 	if (!mutex_is_locked(mutex))
 		return false;
 
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
 	return mutex->owner == task;
 #else
 	/* Since UP may be pre-empted, we cannot assume that we own the lock */
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index b7ce963fb8f8..44004e3f09e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -55,8 +55,10 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
 		return -ENODEV;
 
 	/* See the comment at the drm_mm_init() call for more about this check.
-	 * WaSkipStolenMemoryFirstPage:bdw,chv (incomplete) */
-	if (INTEL_INFO(dev_priv)->gen == 8 && start < 4096)
+	 * WaSkipStolenMemoryFirstPage:bdw,chv,kbl (incomplete)
+	 */
+	if (start < 4096 && (IS_GEN8(dev_priv) ||
+			     IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)))
 		start = 4096;
 
 	mutex_lock(&dev_priv->mm.stolen_lock);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2f6fd33c07ba..aab47f7bb61b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2471,7 +2471,7 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl)
 			I915_WRITE(SDEIIR, iir);
 			ret = IRQ_HANDLED;
 
-			if (HAS_PCH_SPT(dev_priv))
+			if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv))
 				spt_irq_handler(dev, iir);
 			else
 				cpt_irq_handler(dev, iir);
@@ -4661,7 +4661,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 		dev->driver->disable_vblank = gen8_disable_vblank;
 		if (IS_BROXTON(dev))
 			dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup;
-		else if (HAS_PCH_SPT(dev))
+		else if (HAS_PCH_SPT(dev) || HAS_PCH_KBP(dev))
 			dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup;
 		else
 			dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b407411e31ba..3fcf7dd5b6ca 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -220,6 +220,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   ECOCHK_PPGTT_WT_HSW		(0x2<<3)
 #define   ECOCHK_PPGTT_WB_HSW		(0x3<<3)
 
+#define GEN8_CONFIG0			_MMIO(0xD00)
+#define  GEN9_DEFAULT_FIXES		(1 << 3 | 1 << 2 | 1 << 1)
+
 #define GAC_ECO_BITS			_MMIO(0x14090)
 #define   ECOBITS_SNB_BIT		(1<<13)
 #define   ECOBITS_PPGTT_CACHE64B	(3<<8)
@@ -1669,6 +1672,9 @@ enum skl_disp_power_wells {
 
 #define GEN7_TLB_RD_ADDR	_MMIO(0x4700)
 
+#define GAMT_CHKN_BIT_REG	_MMIO(0x4ab8)
+#define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1<<28)
+
 #if 0
 #define PRB0_TAIL	_MMIO(0x2030)
 #define PRB0_HEAD	_MMIO(0x2034)
@@ -1804,6 +1810,10 @@ enum skl_disp_power_wells {
 #define   GEN9_IZ_HASHING_MASK(slice)			(0x3 << ((slice) * 2))
 #define   GEN9_IZ_HASHING(slice, val)			((val) << ((slice) * 2))
 
+/* chicken reg for WaConextSwitchWithConcurrentTLBInvalidate */
+#define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20D4)
+#define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
+
 /* WaClearTdlStateAckDirtyBits */
 #define GEN8_STATE_ACK		_MMIO(0x20F0)
 #define GEN9_STATE_ACK_SLICE1	_MMIO(0x20F8)
@@ -2200,6 +2210,8 @@ enum skl_disp_power_wells {
 #define ILK_DPFC_STATUS		_MMIO(0x43210)
 #define ILK_DPFC_FENCE_YOFF	_MMIO(0x43218)
 #define ILK_DPFC_CHICKEN	_MMIO(0x43224)
+#define   ILK_DPFC_DISABLE_DUMMY0 (1<<8)
+#define   ILK_DPFC_NUKE_ON_ANY_MODIFICATION	(1<<23)
 #define ILK_FBC_RT_BASE		_MMIO(0x2128)
 #define   ILK_FBC_RT_VALID	(1<<0)
 #define   SNB_FBC_FRONT_BUFFER	(1<<1)
@@ -6031,6 +6043,7 @@ enum skl_disp_power_wells {
 #define CHICKEN_PAR1_1		_MMIO(0x42080)
 #define  DPA_MASK_VBLANK_SRD	(1 << 15)
 #define  FORCE_ARB_IDLE_PLANES	(1 << 14)
+#define  SKL_EDP_PSR_FIX_RDWRAP	(1 << 3)
 
 #define _CHICKEN_PIPESL_1_A	0x420b0
 #define _CHICKEN_PIPESL_1_B	0x420b4
@@ -6039,6 +6052,7 @@ enum skl_disp_power_wells {
 #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
 
 #define DISP_ARB_CTL	_MMIO(0x45000)
+#define  DISP_FBC_MEMORY_WAKE		(1<<31)
 #define  DISP_TILE_SURFACE_SWIZZLING	(1<<13)
 #define  DISP_FBC_WM_DIS		(1<<15)
 #define DISP_ARB_CTL2	_MMIO(0x45004)
@@ -6052,6 +6066,9 @@ enum skl_disp_power_wells {
 #define HSW_NDE_RSTWRN_OPT	_MMIO(0x46408)
 #define  RESET_PCH_HANDSHAKE_ENABLE	(1<<4)
 
+#define GEN8_CHICKEN_DCPR_1		_MMIO(0x46430)
+#define   MASK_WAKEMEM			(1<<13)
+
 #define SKL_DFSM			_MMIO(0x51000)
 #define SKL_DFSM_CDCLK_LIMIT_MASK	(3 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_675	(0 << 23)
@@ -6069,6 +6086,7 @@ enum skl_disp_power_wells {
 #define  GEN9_TSG_BARRIER_ACK_DISABLE		(1<<8)
 
 #define GEN9_CS_DEBUG_MODE1		_MMIO(0x20ec)
+#define GEN9_CTX_PREEMPT_REG		_MMIO(0x2248)
 #define GEN8_CS_CHICKEN1		_MMIO(0x2580)
 
 /* GEN7 chicken */
@@ -6076,6 +6094,7 @@ enum skl_disp_power_wells {
 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC	((1<<10) | (1<<26))
 # define GEN9_RHWO_OPTIMIZATION_DISABLE		(1<<14)
 #define COMMON_SLICE_CHICKEN2			_MMIO(0x7014)
+# define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8)
 # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE	(1<<0)
 
 #define HIZ_CHICKEN					_MMIO(0x7018)
@@ -6921,6 +6940,7 @@ enum skl_disp_power_wells {
 #define    EDRAM_SETS_IDX(cap)			(((cap) >> 8) & 0x3)
 
 #define GEN6_UCGCTL1				_MMIO(0x9400)
+# define GEN6_GAMUNIT_CLOCK_GATE_DISABLE		(1 << 22)
 # define GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE		(1 << 16)
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
@@ -6937,6 +6957,7 @@ enum skl_disp_power_wells {
 
 #define GEN7_UCGCTL4				_MMIO(0x940c)
 #define  GEN7_L3BANK2X_CLOCK_GATE_DISABLE	(1<<25)
+#define  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE	(1<<14)
 
 #define GEN6_RCGCTL1				_MMIO(0x9410)
 #define GEN6_RCGCTL2				_MMIO(0x9414)
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index a34c23eceba0..2b3b428d9cd2 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -41,16 +41,22 @@
  * be moved to FW_FAILED.
  */
 
+#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin"
+MODULE_FIRMWARE(I915_CSR_KBL);
+#define KBL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 1)
+
 #define I915_CSR_SKL "i915/skl_dmc_ver1.bin"
+MODULE_FIRMWARE(I915_CSR_SKL);
+#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
+
 #define I915_CSR_BXT "i915/bxt_dmc_ver1.bin"
+MODULE_FIRMWARE(I915_CSR_BXT);
+#define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
 
 #define FIRMWARE_URL  "https://01.org/linuxgraphics/intel-linux-graphics-firmwares"
 
-MODULE_FIRMWARE(I915_CSR_SKL);
-MODULE_FIRMWARE(I915_CSR_BXT);
 
-#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
-#define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
+
 
 #define CSR_MAX_FW_SIZE			0x2FFF
 #define CSR_DEFAULT_FW_OFFSET		0xFFFFFFFF
@@ -169,12 +175,10 @@ struct stepping_info {
 	char substepping;
 };
 
-/*
- * Kabylake derivated from Skylake H0, so SKL H0
- * is the right firmware for KBL A0 (revid 0).
- */
 static const struct stepping_info kbl_stepping_info[] = {
-	{'H', '0'}, {'I', '0'}
+	{'A', '0'}, {'B', '0'}, {'C', '0'},
+	{'D', '0'}, {'E', '0'}, {'F', '0'},
+	{'G', '0'}, {'H', '0'}, {'I', '0'},
 };
 
 static const struct stepping_info skl_stepping_info[] = {
@@ -298,7 +302,9 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
 
 	csr->version = css_header->version;
 
-	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+	if (IS_KABYLAKE(dev_priv)) {
+		required_min_version = KBL_CSR_VERSION_REQUIRED;
+	} else if (IS_SKYLAKE(dev_priv)) {
 		required_min_version = SKL_CSR_VERSION_REQUIRED;
 	} else if (IS_BROXTON(dev_priv)) {
 		required_min_version = BXT_CSR_VERSION_REQUIRED;
@@ -446,7 +452,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv)
 	if (!HAS_CSR(dev_priv))
 		return;
 
-	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+	if (IS_KABYLAKE(dev_priv))
+		csr->fw_path = I915_CSR_KBL;
+	else if (IS_SKYLAKE(dev_priv))
 		csr->fw_path = I915_CSR_SKL;
 	else if (IS_BROXTON(dev_priv))
 		csr->fw_path = I915_CSR_BXT;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 56a1637c864f..3074c56a643d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8447,16 +8447,16 @@ static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv)
 	tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
 	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
-			       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
+	if (wait_for_us(I915_READ(SOUTH_CHICKEN2) &
+			FDI_MPHY_IOSFSB_RESET_STATUS, 100))
 		DRM_ERROR("FDI mPHY reset assert timeout\n");
 
 	tmp = I915_READ(SOUTH_CHICKEN2);
 	tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
 	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
-				FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
+	if (wait_for_us((I915_READ(SOUTH_CHICKEN2) &
+			 FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
 		DRM_ERROR("FDI mPHY reset de-assert timeout\n");
 }
 
@@ -9440,8 +9440,8 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
 		val |= LCPLL_CD_SOURCE_FCLK;
 		I915_WRITE(LCPLL_CTL, val);
 
-		if (wait_for_atomic_us(I915_READ(LCPLL_CTL) &
-				       LCPLL_CD_SOURCE_FCLK_DONE, 1))
+		if (wait_for_us(I915_READ(LCPLL_CTL) &
+				LCPLL_CD_SOURCE_FCLK_DONE, 1))
 			DRM_ERROR("Switching to FCLK failed\n");
 
 		val = I915_READ(LCPLL_CTL);
@@ -9514,8 +9514,8 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
 		val &= ~LCPLL_CD_SOURCE_FCLK;
 		I915_WRITE(LCPLL_CTL, val);
 
-		if (wait_for_atomic_us((I915_READ(LCPLL_CTL) &
-					LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
+		if (wait_for_us((I915_READ(LCPLL_CTL) &
+				 LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
 			DRM_ERROR("Switching back to LCPLL failed\n");
 	}
 
@@ -11997,6 +11997,12 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 		ret = intel_color_check(crtc, crtc_state);
 		if (ret)
 			return ret;
+
+		/*
+		 * Changing color management on Intel hardware is
+		 * handled as part of planes update.
+		 */
+		crtc_state->planes_changed = true;
 	}
 
 	ret = 0;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index ffe5f8430957..891107f92d9f 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -663,7 +663,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq)
 		done = wait_event_timeout(dev_priv->gmbus_wait_queue, C,
 					  msecs_to_jiffies_timeout(10));
 	else
-		done = wait_for_atomic(C, 10) == 0;
+		done = wait_for(C, 10) == 0;
 	if (!done)
 		DRM_ERROR("dp aux hw did not signal timeout (has irq: %i)!\n",
 			  has_aux_irq);
@@ -4645,7 +4645,7 @@ intel_dp_detect(struct drm_connector *connector, bool force)
 
 	intel_dp->detect_done = false;
 
-	if (intel_connector->detect_edid)
+	if (is_edp(intel_dp) || intel_connector->detect_edid)
 		return connector_status_connected;
 	else
 		return connector_status_disconnected;
@@ -4899,13 +4899,15 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp)
 
 void intel_dp_encoder_reset(struct drm_encoder *encoder)
 {
-	struct intel_dp *intel_dp;
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+	if (!HAS_DDI(dev_priv))
+		intel_dp->DP = I915_READ(intel_dp->output_reg);
 
 	if (to_intel_encoder(encoder)->type != INTEL_OUTPUT_EDP)
 		return;
 
-	intel_dp = enc_to_intel_dp(encoder);
-
 	pps_lock(intel_dp);
 
 	/*
@@ -4977,9 +4979,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	intel_display_power_get(dev_priv, power_domain);
 
 	if (long_hpd) {
-		/* indicate that we need to restart link training */
-		intel_dp->train_set_valid = false;
-
 		intel_dp_long_pulse(intel_dp->attached_connector);
 		if (intel_dp->is_mst)
 			ret = IRQ_HANDLED;
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index 0b8eefc2acc5..60fb39cd220b 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -85,8 +85,7 @@ static bool
 intel_dp_reset_link_train(struct intel_dp *intel_dp,
 			uint8_t dp_train_pat)
 {
-	if (!intel_dp->train_set_valid)
-		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
+	memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
 	intel_dp_set_signal_levels(intel_dp);
 	return intel_dp_set_link_train(intel_dp, dp_train_pat);
 }
@@ -161,23 +160,6 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
 			break;
 		}
 
-		/*
-		 * if we used previously trained voltage and pre-emphasis values
-		 * and we don't get clock recovery, reset link training values
-		 */
-		if (intel_dp->train_set_valid) {
-			DRM_DEBUG_KMS("clock recovery not ok, reset");
-			/* clear the flag as we are not reusing train set */
-			intel_dp->train_set_valid = false;
-			if (!intel_dp_reset_link_train(intel_dp,
-						       DP_TRAINING_PATTERN_1 |
-						       DP_LINK_SCRAMBLING_DISABLE)) {
-				DRM_ERROR("failed to enable link training\n");
-				return;
-			}
-			continue;
-		}
-
 		/* Check to see if we've tried the max voltage */
 		for (i = 0; i < intel_dp->lane_count; i++)
 			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
@@ -284,7 +266,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 		/* Make sure clock is still ok */
 		if (!drm_dp_clock_recovery_ok(link_status,
 					      intel_dp->lane_count)) {
-			intel_dp->train_set_valid = false;
 			intel_dp_link_training_clock_recovery(intel_dp);
 			intel_dp_set_link_train(intel_dp,
 						training_pattern |
@@ -301,7 +282,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 
 		/* Try 5 times, then try clock recovery if that fails */
 		if (tries > 5) {
-			intel_dp->train_set_valid = false;
 			intel_dp_link_training_clock_recovery(intel_dp);
 			intel_dp_set_link_train(intel_dp,
 						training_pattern |
@@ -322,10 +302,8 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 
 	intel_dp_set_idle_link_train(intel_dp);
 
-	if (channel_eq) {
-		intel_dp->train_set_valid = true;
+	if (channel_eq)
 		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
-	}
 }
 
 void intel_dp_stop_link_train(struct intel_dp *intel_dp)
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index baf6f5584cbd..58f60b27837e 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -1377,8 +1377,8 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv,
 	I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp);
 	POSTING_READ(BXT_PORT_PLL_ENABLE(port));
 
-	if (wait_for_atomic_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) &
-			PORT_PLL_LOCK), 200))
+	if (wait_for_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) & PORT_PLL_LOCK),
+			200))
 		DRM_ERROR("PLL %d not locked\n", port);
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4a24b0067a3a..f7f0f01814f6 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -863,8 +863,6 @@ struct intel_dp {
 	/* This is called before a link training is starterd */
 	void (*prepare_link_retrain)(struct intel_dp *intel_dp);
 
-	bool train_set_valid;
-
 	/* Displayport compliance testing */
 	unsigned long compliance_test_type;
 	unsigned long compliance_test_data;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index d5a7cfec589b..647127f3aaff 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -824,8 +824,7 @@ static bool intel_fbc_can_choose(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
-	bool enable_by_default = IS_HASWELL(dev_priv) ||
-				 IS_BROADWELL(dev_priv);
+	bool enable_by_default = IS_BROADWELL(dev_priv);
 
 	if (intel_vgpu_active(dev_priv->dev)) {
 		fbc->no_fbc_reason = "VGPU is active";
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 42eac37de047..7f2d8415ed8b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1103,15 +1103,17 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
 						uint32_t *const batch,
 						uint32_t index)
 {
+	struct drm_i915_private *dev_priv = engine->dev->dev_private;
 	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	/*
-	 * WaDisableLSQCROPERFforOCL:skl
+	 * WaDisableLSQCROPERFforOCL:skl,kbl
 	 * This WA is implemented in skl_init_clock_gating() but since
 	 * this batch updates GEN8_L3SQCREG4 with default value we need to
 	 * set this bit here to retain the WA during flush.
 	 */
-	if (IS_SKL_REVID(engine->dev, 0, SKL_REVID_E0))
+	if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) ||
+	    IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
 		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
 
 	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
@@ -1273,6 +1275,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 {
 	int ret;
 	struct drm_device *dev = engine->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
@@ -1286,6 +1289,22 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 		return ret;
 	index = ret;
 
+	/* WaClearSlmSpaceAtContextSwitch:kbl */
+	/* Actual scratch location is at 128 bytes offset */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
+		uint32_t scratch_addr
+			= engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
+
+		wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
+		wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
+					   PIPE_CONTROL_GLOBAL_GTT_IVB |
+					   PIPE_CONTROL_CS_STALL |
+					   PIPE_CONTROL_QW_WRITE));
+		wa_ctx_emit(batch, index, scratch_addr);
+		wa_ctx_emit(batch, index, 0);
+		wa_ctx_emit(batch, index, 0);
+		wa_ctx_emit(batch, index, 0);
+	}
 	/* Pad to end of cacheline */
 	while (index % CACHELINE_DWORDS)
 		wa_ctx_emit(batch, index, MI_NOOP);
@@ -1687,9 +1706,10 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 	struct intel_ringbuffer *ringbuf = request->ringbuf;
 	struct intel_engine_cs *engine = ringbuf->engine;
 	u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
-	bool vf_flush_wa = false;
+	bool vf_flush_wa = false, dc_flush_wa = false;
 	u32 flags = 0;
 	int ret;
+	int len;
 
 	flags |= PIPE_CONTROL_CS_STALL;
 
@@ -1716,9 +1736,21 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 		 */
 		if (IS_GEN9(engine->dev))
 			vf_flush_wa = true;
+
+		/* WaForGAMHang:kbl */
+		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
+			dc_flush_wa = true;
 	}
 
-	ret = intel_ring_begin(request, vf_flush_wa ? 12 : 6);
+	len = 6;
+
+	if (vf_flush_wa)
+		len += 6;
+
+	if (dc_flush_wa)
+		len += 12;
+
+	ret = intel_ring_begin(request, len);
 	if (ret)
 		return ret;
 
@@ -1731,12 +1763,31 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 		intel_logical_ring_emit(ringbuf, 0);
 	}
 
+	if (dc_flush_wa) {
+		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+		intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+	}
+
 	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
 	intel_logical_ring_emit(ringbuf, flags);
 	intel_logical_ring_emit(ringbuf, scratch_addr);
 	intel_logical_ring_emit(ringbuf, 0);
 	intel_logical_ring_emit(ringbuf, 0);
 	intel_logical_ring_emit(ringbuf, 0);
+
+	if (dc_flush_wa) {
+		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+		intel_logical_ring_emit(ringbuf, PIPE_CONTROL_CS_STALL);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+	}
+
 	intel_logical_ring_advance(ringbuf);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 99e26034ae8d..16e209d326b6 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -1038,5 +1038,16 @@ intel_opregion_get_panel_type(struct drm_device *dev)
 		return -ENODEV;
 	}
 
+	/*
+	 * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us
+	 * low vswing for eDP, whereas the VBT panel type (2) gives us normal
+	 * vswing instead. Low vswing results in some display flickers, so
+	 * let's simply ignore the OpRegion panel type on SKL for now.
+	 */
+	if (IS_SKYLAKE(dev)) {
+		DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1);
+		return -ENODEV;
+	}
+
 	return ret - 1;
 }
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 8357d571553a..aba94099886b 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1731,7 +1731,8 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel)
 		panel->backlight.set = bxt_set_backlight;
 		panel->backlight.get = bxt_get_backlight;
 		panel->backlight.hz_to_pwm = bxt_hz_to_pwm;
-	} else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_SPT(dev_priv)) {
+	} else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_SPT(dev_priv) ||
+		   HAS_PCH_KBP(dev_priv)) {
 		panel->backlight.setup = lpt_setup_backlight;
 		panel->backlight.enable = lpt_enable_backlight;
 		panel->backlight.disable = lpt_disable_backlight;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a7ef45da0a9e..2863b92c9da6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -54,10 +54,38 @@
 #define INTEL_RC6p_ENABLE			(1<<1)
 #define INTEL_RC6pp_ENABLE			(1<<2)
 
+static void gen9_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */
+	I915_WRITE(CHICKEN_PAR1_1,
+		   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+
+	I915_WRITE(GEN8_CONFIG0,
+		   I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
+
+	/* WaEnableChickenDCPR:skl,bxt,kbl */
+	I915_WRITE(GEN8_CHICKEN_DCPR_1,
+		   I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
+
+	/* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */
+	/* WaFbcWakeMemOn:skl,bxt,kbl */
+	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+		   DISP_FBC_WM_DIS |
+		   DISP_FBC_MEMORY_WAKE);
+
+	/* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_DISABLE_DUMMY0);
+}
+
 static void bxt_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	gen9_init_clock_gating(dev);
+
 	/* WaDisableSDEUnitClockGating:bxt */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
@@ -6698,6 +6726,38 @@ static void lpt_suspend_hw(struct drm_device *dev)
 	}
 }
 
+static void kabylake_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	gen9_init_clock_gating(dev);
+
+	/* WaDisableSDEUnitClockGating:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaDisableGamClockGating:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
+			   GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaFbcNukeOnHostModify:kbl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+}
+
+static void skylake_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	gen9_init_clock_gating(dev);
+
+	/* WaFbcNukeOnHostModify:skl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+}
+
 static void broadwell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -7163,9 +7223,9 @@ static void nop_init_clock_gating(struct drm_device *dev)
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
 {
 	if (IS_SKYLAKE(dev_priv))
-		dev_priv->display.init_clock_gating = nop_init_clock_gating;
+		dev_priv->display.init_clock_gating = skylake_init_clock_gating;
 	else if (IS_KABYLAKE(dev_priv))
-		dev_priv->display.init_clock_gating = nop_init_clock_gating;
+		dev_priv->display.init_clock_gating = kabylake_init_clock_gating;
 	else if (IS_BROXTON(dev_priv))
 		dev_priv->display.init_clock_gating = bxt_init_clock_gating;
 	else if (IS_BROADWELL(dev_priv))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 04402bb9d26b..68c5af079ef8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -913,24 +913,26 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 {
 	struct drm_device *dev = engine->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t tmp;
 	int ret;
 
-	/* WaEnableLbsSlaRetryTimerDecrement:skl */
+	/* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */
+	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
+
+	/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */
 	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
 		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
-	/* WaDisableKillLogic:bxt,skl */
+	/* WaDisableKillLogic:bxt,skl,kbl */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
 		   ECOCHK_DIS_TLB);
 
-	/* WaClearFlowControlGpgpuContextSave:skl,bxt */
-	/* WaDisablePartialInstShootdown:skl,bxt */
+	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */
+	/* WaDisablePartialInstShootdown:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 			  FLOW_CONTROL_ENABLE |
 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
-	/* Syncing dependencies between camera and graphics:skl,bxt */
+	/* Syncing dependencies between camera and graphics:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 			  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 
@@ -952,18 +954,18 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 		 */
 	}
 
-	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
-	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt */
+	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl */
+	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 			  GEN9_ENABLE_YV12_BUGFIX |
 			  GEN9_ENABLE_GPGPU_PREEMPTION);
 
-	/* Wa4x4STCOptimizationDisable:skl,bxt */
-	/* WaDisablePartialResolveInVc:skl,bxt */
+	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl */
+	/* WaDisablePartialResolveInVc:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 					 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
 
-	/* WaCcsTlbPrefetchDisable:skl,bxt */
+	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl */
 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 
@@ -973,31 +975,57 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
 				  PIXEL_MASK_CAMMING_DISABLE);
 
-	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
-	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
-	if (IS_SKL_REVID(dev, SKL_REVID_F0, REVID_FOREVER) ||
-	    IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
-		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
-	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
+	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
+			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
+
+	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
+	 * both tied to WaForceContextSaveRestoreNonCoherent
+	 * in some hsds for skl. We keep the tie for all gen9. The
+	 * documentation is a bit hazy and so we want to get common behaviour,
+	 * even though there is no clear evidence we would need both on kbl/bxt.
+	 * This area has been source of system hangs so we play it safe
+	 * and mimic the skl regardless of what bspec says.
+	 *
+	 * Use Force Non-Coherent whenever executing a 3D context. This
+	 * is a workaround for a possible hang in the unlikely event
+	 * a TLB invalidation occurs during a PSD flush.
+	 */
 
-	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
-	if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
+	/* WaForceEnableNonCoherent:skl,bxt,kbl */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_FORCE_NON_COHERENT);
+
+	/* WaDisableHDCInvalidation:skl,bxt,kbl */
+	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+		   BDW_DISABLE_HDC_INVALIDATION);
+
+	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */
+	if (IS_SKYLAKE(dev_priv) ||
+	    IS_KABYLAKE(dev_priv) ||
+	    IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
 
-	/* WaDisableSTUnitPowerOptimization:skl,bxt */
+	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
-	/* WaOCLCoherentLineFlush:skl,bxt */
+	/* WaOCLCoherentLineFlush:skl,bxt,kbl */
 	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
 				    GEN8_LQSC_FLUSH_COHERENT_LINES));
 
-	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
+	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */
+	ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
+	if (ret)
+		return ret;
+
+	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */
 	ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
 	if (ret)
 		return ret;
 
-	/* WaAllowUMDToModifyHDCChicken1:skl,bxt */
+	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */
 	ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
 	if (ret)
 		return ret;
@@ -1092,22 +1120,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
 		WA_SET_BIT_MASKED(HIZ_CHICKEN,
 				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
-	/* This is tied to WaForceContextSaveRestoreNonCoherent */
-	if (IS_SKL_REVID(dev, 0, REVID_FOREVER)) {
-		/*
-		 *Use Force Non-Coherent whenever executing a 3D context. This
-		 * is a workaround for a possible hang in the unlikely event
-		 * a TLB invalidation occurs during a PSD flush.
-		 */
-		/* WaForceEnableNonCoherent:skl */
-		WA_SET_BIT_MASKED(HDC_CHICKEN0,
-				  HDC_FORCE_NON_COHERENT);
-
-		/* WaDisableHDCInvalidation:skl */
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-			   BDW_DISABLE_HDC_INVALIDATION);
-	}
-
 	/* WaBarrierPerformanceFixDisable:skl */
 	if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
@@ -1120,6 +1132,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
 			GEN7_HALF_SLICE_CHICKEN1,
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 
+	/* WaDisableGafsUnitClkGating:skl */
+	WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
 	/* WaDisableLSQCROPERFforOCL:skl */
 	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
 	if (ret)
@@ -1174,6 +1189,63 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
 			return ret;
 	}
 
+	/* WaInsertDummyPushConstPs:bxt */
+	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	return 0;
+}
+
+static int kbl_init_workarounds(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->dev->dev_private;
+	int ret;
+
+	ret = gen9_init_workarounds(engine);
+	if (ret)
+		return ret;
+
+	/* WaEnableGapsTsvCreditFix:kbl */
+	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
+				   GEN9_GAPS_TSV_CREDIT_DISABLE));
+
+	/* WaDisableDynamicCreditSharing:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		WA_SET_BIT(GAMT_CHKN_BIT_REG,
+			   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
+
+	/* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
+		WA_SET_BIT_MASKED(HDC_CHICKEN0,
+				  HDC_FENCE_DEST_SLM_DISABLE);
+
+	/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
+	 * involving this register should also be added to WA batch as required.
+	 */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
+		/* WaDisableLSQCROPERFforOCL:kbl */
+		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
+			   GEN8_LQSC_RO_PERF_DIS);
+
+	/* WaInsertDummyPushConstPs:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	/* WaDisableGafsUnitClkGating:kbl */
+	WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaDisableSbeCacheDispatchPortSharing:kbl */
+	WA_SET_BIT_MASKED(
+		GEN7_HALF_SLICE_CHICKEN1,
+		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+
+	/* WaDisableLSQCROPERFforOCL:kbl */
+	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -1199,6 +1271,9 @@ int init_workarounds_ring(struct intel_engine_cs *engine)
 	if (IS_BROXTON(dev))
 		return bxt_init_workarounds(engine);
 
+	if (IS_KABYLAKE(dev_priv))
+		return kbl_init_workarounds(engine);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 300ea03be8f0..d1f248fd3506 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -552,7 +552,8 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
-	fbcon->helper.fbdev->pixmap.buf_align = 4;
+	if (fbcon->helper.fbdev)
+		fbcon->helper.fbdev->pixmap.buf_align = 4;
 	return 0;
 
 fini:
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index 22706c0a54b5..49bd5da194e1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -40,7 +40,8 @@ static int
 gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	nvkm_mask(device, 0x61c110, 0x0f0f0f0f, 0x01010101 * pattern);
+	const u32 soff = gf119_sor_soff(outp);
+	nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, 0x01010101 * pattern);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index e671a7cd3463..6ac717f2056f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -148,40 +148,39 @@ static void rcar_du_vsp_plane_setup(struct rcar_du_vsp_plane *plane)
 	struct rcar_du_vsp_plane_state *state =
 		to_rcar_vsp_plane_state(plane->plane.state);
 	struct drm_framebuffer *fb = plane->plane.state->fb;
-	struct v4l2_rect src;
-	struct v4l2_rect dst;
-	dma_addr_t paddr[2] = { 0, };
-	u32 pixelformat = 0;
+	struct vsp1_du_atomic_config cfg = {
+		.pixelformat = 0,
+		.pitch = fb->pitches[0],
+		.alpha = state->alpha,
+		.zpos = state->zpos,
+	};
 	unsigned int i;
 
-	src.left = state->state.src_x >> 16;
-	src.top = state->state.src_y >> 16;
-	src.width = state->state.src_w >> 16;
-	src.height = state->state.src_h >> 16;
+	cfg.src.left = state->state.src_x >> 16;
+	cfg.src.top = state->state.src_y >> 16;
+	cfg.src.width = state->state.src_w >> 16;
+	cfg.src.height = state->state.src_h >> 16;
 
-	dst.left = state->state.crtc_x;
-	dst.top = state->state.crtc_y;
-	dst.width = state->state.crtc_w;
-	dst.height = state->state.crtc_h;
+	cfg.dst.left = state->state.crtc_x;
+	cfg.dst.top = state->state.crtc_y;
+	cfg.dst.width = state->state.crtc_w;
+	cfg.dst.height = state->state.crtc_h;
 
 	for (i = 0; i < state->format->planes; ++i) {
 		struct drm_gem_cma_object *gem;
 
 		gem = drm_fb_cma_get_gem_obj(fb, i);
-		paddr[i] = gem->paddr + fb->offsets[i];
+		cfg.mem[i] = gem->paddr + fb->offsets[i];
 	}
 
 	for (i = 0; i < ARRAY_SIZE(formats_kms); ++i) {
 		if (formats_kms[i] == state->format->fourcc) {
-			pixelformat = formats_v4l2[i];
+			cfg.pixelformat = formats_v4l2[i];
 			break;
 		}
 	}
 
-	WARN_ON(!pixelformat);
-
-	vsp1_du_atomic_update(plane->vsp->vsp, plane->index, pixelformat,
-			      fb->pitches[0], paddr, &src, &dst);
+	vsp1_du_atomic_update(plane->vsp->vsp, plane->index, &cfg);
 }
 
 static int rcar_du_vsp_plane_atomic_check(struct drm_plane *plane,
@@ -220,8 +219,7 @@ static void rcar_du_vsp_plane_atomic_update(struct drm_plane *plane,
 	if (plane->state->crtc)
 		rcar_du_vsp_plane_setup(rplane);
 	else
-		vsp1_du_atomic_update(rplane->vsp->vsp, rplane->index, 0, 0, 0,
-				      NULL, NULL);
+		vsp1_du_atomic_update(rplane->vsp->vsp, rplane->index, NULL);
 }
 
 static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = {
@@ -269,6 +267,7 @@ static void rcar_du_vsp_plane_reset(struct drm_plane *plane)
 		return;
 
 	state->alpha = 255;
+	state->zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1;
 
 	plane->state = &state->state;
 	plane->state->plane = plane;
@@ -283,6 +282,8 @@ static int rcar_du_vsp_plane_atomic_set_property(struct drm_plane *plane,
 
 	if (property == rcdu->props.alpha)
 		rstate->alpha = val;
+	else if (property == rcdu->props.zpos)
+		rstate->zpos = val;
 	else
 		return -EINVAL;
 
@@ -299,6 +300,8 @@ static int rcar_du_vsp_plane_atomic_get_property(struct drm_plane *plane,
 
 	if (property == rcdu->props.alpha)
 		*val = rstate->alpha;
+	else if (property == rcdu->props.zpos)
+		*val = rstate->zpos;
 	else
 		return -EINVAL;
 
@@ -378,6 +381,8 @@ int rcar_du_vsp_init(struct rcar_du_vsp *vsp)
 
 		drm_object_attach_property(&plane->plane.base,
 					   rcdu->props.alpha, 255);
+		drm_object_attach_property(&plane->plane.base,
+					   rcdu->props.zpos, 1);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h
index df3bf3805c69..510dcc9c6816 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h
@@ -44,6 +44,7 @@ static inline struct rcar_du_vsp_plane *to_rcar_vsp_plane(struct drm_plane *p)
  * @state: base DRM plane state
  * @format: information about the pixel format used by the plane
  * @alpha: value of the plane alpha property
+ * @zpos: value of the plane zpos property
  */
 struct rcar_du_vsp_plane_state {
 	struct drm_plane_state state;
@@ -51,6 +52,7 @@ struct rcar_du_vsp_plane_state {
 	const struct rcar_du_format_info *format;
 
 	unsigned int alpha;
+	unsigned int zpos;
 };
 
 static inline struct rcar_du_vsp_plane_state *
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 99510e64e91a..a4b357db8856 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -1,6 +1,6 @@
 config DRM_SUN4I
 	tristate "DRM Support for Allwinner A10 Display Engine"
-	depends on DRM && ARM
+	depends on DRM && ARM && COMMON_CLK
 	depends on ARCH_SUNXI || COMPILE_TEST
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index f7a15c1a93bf..3ab560450a82 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -190,7 +190,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	/* Get the physical address of the buffer in memory */
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-	DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr);
+	DRM_DEBUG_DRIVER("Using GEM @ %pad\n", &gem->paddr);
 
 	/* Compute the start of the displayed memory */
 	bpp = drm_format_plane_cpp(fb->pixel_format, 0);
@@ -198,7 +198,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	paddr += (state->src_x >> 16) * bpp;
 	paddr += (state->src_y >> 16) * fb->pitches[0];
 
-	DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr);
+	DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr);
 
 	/* Write the 32 lower bits of the address (in bits) */
 	lo_paddr = paddr << 3;
diff --git a/drivers/gpu/drm/sun4i/sun4i_crtc.c b/drivers/gpu/drm/sun4i/sun4i_crtc.c
index 4182a21f5923..41cacecbea9a 100644
--- a/drivers/gpu/drm/sun4i/sun4i_crtc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_crtc.c
@@ -65,6 +65,14 @@ static void sun4i_crtc_disable(struct drm_crtc *crtc)
 	DRM_DEBUG_DRIVER("Disabling the CRTC\n");
 
 	sun4i_tcon_disable(drv->tcon);
+
+	if (crtc->state->event && !crtc->state->active) {
+		spin_lock_irq(&crtc->dev->event_lock);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+
+		crtc->state->event = NULL;
+	}
 }
 
 static void sun4i_crtc_enable(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index 3ff668cb463c..5b3463197c48 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -72,14 +72,40 @@ static unsigned long sun4i_dclk_recalc_rate(struct clk_hw *hw,
 static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *parent_rate)
 {
-	return *parent_rate / DIV_ROUND_CLOSEST(*parent_rate, rate);
+	unsigned long best_parent = 0;
+	u8 best_div = 1;
+	int i;
+
+	for (i = 6; i < 127; i++) {
+		unsigned long ideal = rate * i;
+		unsigned long rounded;
+
+		rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
+					    ideal);
+
+		if (rounded == ideal) {
+			best_parent = rounded;
+			best_div = i;
+			goto out;
+		}
+
+		if ((rounded < ideal) && (rounded > best_parent)) {
+			best_parent = rounded;
+			best_div = i;
+		}
+	}
+
+out:
+	*parent_rate = best_parent;
+
+	return best_parent / best_div;
 }
 
 static int sun4i_dclk_set_rate(struct clk_hw *hw, unsigned long rate,
 			       unsigned long parent_rate)
 {
 	struct sun4i_dclk *dclk = hw_to_dclk(hw);
-	int div = DIV_ROUND_CLOSEST(parent_rate, rate);
+	u8 div = parent_rate / rate;
 
 	return regmap_update_bits(dclk->regmap, SUN4I_TCON0_DCLK_REG,
 				  GENMASK(6, 0), div);
@@ -127,10 +153,14 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	const char *clk_name, *parent_name;
 	struct clk_init_data init;
 	struct sun4i_dclk *dclk;
+	int ret;
 
 	parent_name = __clk_get_name(tcon->sclk0);
-	of_property_read_string_index(dev->of_node, "clock-output-names", 0,
-				      &clk_name);
+	ret = of_property_read_string_index(dev->of_node,
+					    "clock-output-names", 0,
+					    &clk_name);
+	if (ret)
+		return ret;
 
 	dclk = devm_kzalloc(dev, sizeof(*dclk), GFP_KERNEL);
 	if (!dclk)
@@ -140,6 +170,7 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	init.ops = &sun4i_dclk_ops;
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_PARENT;
 
 	dclk->regmap = tcon->regs;
 	dclk->hw.init = &init;
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 76e922bb60e5..937394cbc241 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -24,34 +24,6 @@
 #include "sun4i_layer.h"
 #include "sun4i_tcon.h"
 
-static int sun4i_drv_connector_plug_all(struct drm_device *drm)
-{
-	struct drm_connector *connector, *failed;
-	int ret;
-
-	mutex_lock(&drm->mode_config.mutex);
-	list_for_each_entry(connector, &drm->mode_config.connector_list, head) {
-		ret = drm_connector_register(connector);
-		if (ret) {
-			failed = connector;
-			goto err;
-		}
-	}
-	mutex_unlock(&drm->mode_config.mutex);
-	return 0;
-
-err:
-	list_for_each_entry(connector, &drm->mode_config.connector_list, head) {
-		if (failed == connector)
-			break;
-
-		drm_connector_unregister(connector);
-	}
-	mutex_unlock(&drm->mode_config.mutex);
-
-	return ret;
-}
-
 static int sun4i_drv_enable_vblank(struct drm_device *drm, unsigned int pipe)
 {
 	struct sun4i_drv *drv = drm->dev_private;
@@ -120,11 +92,27 @@ static struct drm_driver sun4i_drv_driver = {
 	/* Frame Buffer Operations */
 
 	/* VBlank Operations */
-	.get_vblank_counter	= drm_vblank_count,
+	.get_vblank_counter	= drm_vblank_no_hw_counter,
 	.enable_vblank		= sun4i_drv_enable_vblank,
 	.disable_vblank		= sun4i_drv_disable_vblank,
 };
 
+static void sun4i_remove_framebuffers(void)
+{
+	struct apertures_struct *ap;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return;
+
+	/* The framebuffer can be located anywhere in RAM */
+	ap->ranges[0].base = 0;
+	ap->ranges[0].size = ~0;
+
+	remove_conflicting_framebuffers(ap, "sun4i-drm-fb", false);
+	kfree(ap);
+}
+
 static int sun4i_drv_bind(struct device *dev)
 {
 	struct drm_device *drm;
@@ -172,6 +160,9 @@ static int sun4i_drv_bind(struct device *dev)
 	}
 	drm->irq_enabled = true;
 
+	/* Remove early framebuffers (ie. simplefb) */
+	sun4i_remove_framebuffers();
+
 	/* Create our framebuffer */
 	drv->fbdev = sun4i_framebuffer_init(drm);
 	if (IS_ERR(drv->fbdev)) {
@@ -187,7 +178,7 @@ static int sun4i_drv_bind(struct device *dev)
 	if (ret)
 		goto free_drm;
 
-	ret = sun4i_drv_connector_plug_all(drm);
+	ret = drm_connector_register_all(drm);
 	if (ret)
 		goto unregister_drm;
 
@@ -204,6 +195,7 @@ static void sun4i_drv_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 
+	drm_connector_unregister_all(drm);
 	drm_dev_unregister(drm);
 	drm_kms_helper_poll_fini(drm);
 	sun4i_framebuffer_free(drm);
@@ -318,6 +310,7 @@ static int sun4i_drv_probe(struct platform_device *pdev)
 
 		count += sun4i_drv_add_endpoints(&pdev->dev, &match,
 						pipeline);
+		of_node_put(pipeline);
 
 		DRM_DEBUG_DRIVER("Queued %d outputs on pipeline %d\n",
 				 count, i);
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index ab6494818050..aaffe9e64ffb 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -54,8 +54,13 @@ static int sun4i_rgb_get_modes(struct drm_connector *connector)
 static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
+	struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
+	struct sun4i_drv *drv = rgb->drv;
+	struct sun4i_tcon *tcon = drv->tcon;
 	u32 hsync = mode->hsync_end - mode->hsync_start;
 	u32 vsync = mode->vsync_end - mode->vsync_start;
+	unsigned long rate = mode->clock * 1000;
+	long rounded_rate;
 
 	DRM_DEBUG_DRIVER("Validating modes...\n");
 
@@ -87,6 +92,15 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 
 	DRM_DEBUG_DRIVER("Vertical parameters OK\n");
 
+	rounded_rate = clk_round_rate(tcon->dclk, rate);
+	if (rounded_rate < rate)
+		return MODE_CLOCK_LOW;
+
+	if (rounded_rate > rate)
+		return MODE_CLOCK_HIGH;
+
+	DRM_DEBUG_DRIVER("Clock rate OK\n");
+
 	return MODE_OK;
 }
 
@@ -203,7 +217,7 @@ int sun4i_rgb_init(struct drm_device *drm)
 	int ret;
 
 	/* If we don't have a panel, there's no point in going on */
-	if (!tcon->panel)
+	if (IS_ERR(tcon->panel))
 		return -ENODEV;
 
 	rgb = devm_kzalloc(drm->dev, sizeof(*rgb), GFP_KERNEL);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 9f19b0e08560..652385f09735 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -425,11 +425,11 @@ static struct drm_panel *sun4i_tcon_find_panel(struct device_node *node)
 
 	remote = of_graph_get_remote_port_parent(end_node);
 	if (!remote) {
-		DRM_DEBUG_DRIVER("Enable to parse remote node\n");
+		DRM_DEBUG_DRIVER("Unable to parse remote node\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return of_drm_find_panel(remote);
+	return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER);
 }
 
 static int sun4i_tcon_bind(struct device *dev, struct device *master,
@@ -490,7 +490,11 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return 0;
 	}
 
-	return sun4i_rgb_init(drm);
+	ret = sun4i_rgb_init(drm);
+	if (ret < 0)
+		goto err_free_clocks;
+
+	return 0;
 
 err_free_clocks:
 	sun4i_tcon_free_clocks(tcon);
@@ -522,12 +526,13 @@ static int sun4i_tcon_probe(struct platform_device *pdev)
 	 * Defer the probe.
 	 */
 	panel = sun4i_tcon_find_panel(node);
-	if (IS_ERR(panel)) {
-		/*
-		 * If we don't have a panel endpoint, just go on
-		 */
-		if (PTR_ERR(panel) != -ENODEV)
-			return -EPROBE_DEFER;
+
+	/*
+	 * If we don't have a panel endpoint, just go on
+	 */
+	if (PTR_ERR(panel) == -EPROBE_DEFER) {
+		DRM_DEBUG_DRIVER("Still waiting for our panel. Deferring...\n");
+		return -EPROBE_DEFER;
 	}
 
 	return component_add(&pdev->dev, &sun4i_tcon_ops);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 39386f50af87..a71cf98c655f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1034,9 +1034,9 @@ out_unlock:
 	return ret;
 }
 
-static bool ttm_bo_mem_compat(struct ttm_placement *placement,
-			      struct ttm_mem_reg *mem,
-			      uint32_t *new_flags)
+bool ttm_bo_mem_compat(struct ttm_placement *placement,
+		       struct ttm_mem_reg *mem,
+		       uint32_t *new_flags)
 {
 	int i;
 
@@ -1068,6 +1068,7 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement,
 
 	return false;
 }
+EXPORT_SYMBOL(ttm_bo_mem_compat);
 
 int ttm_bo_validate(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
index 9b078a493996..0cd889015dc5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -49,6 +49,7 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
+	uint32_t new_flags;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -60,7 +61,12 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false);
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, placement, interruptible, false);
+
 	if (!ret)
 		vmw_bo_pin_reserved(buf, true);
 
@@ -91,6 +97,7 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
+	uint32_t new_flags;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -102,6 +109,12 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
+	if (buf->pin_count > 0) {
+		ret = ttm_bo_mem_compat(&vmw_vram_gmr_placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+		goto out_unreserve;
+	}
+
 	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, interruptible,
 			      false);
 	if (likely(ret == 0) || ret == -ERESTARTSYS)
@@ -161,6 +174,7 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
 	struct ttm_placement placement;
 	struct ttm_place place;
 	int ret = 0;
+	uint32_t new_flags;
 
 	place = vmw_vram_placement.placement[0];
 	place.lpfn = bo->num_pages;
@@ -185,10 +199,15 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
 	 */
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
 	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0)
+	    bo->mem.start > 0 &&
+	    buf->pin_count == 0)
 		(void) ttm_bo_validate(bo, &vmw_sys_placement, false, false);
 
-	ret = ttm_bo_validate(bo, &placement, interruptible, false);
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(&placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, &placement, interruptible, false);
 
 	/* For some reason we didn't end up at the start of vram */
 	WARN_ON(ret == 0 && bo->offset != 0);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9fcd8200d485..8d528fcf6e96 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -233,6 +233,7 @@ static int vmw_force_iommu;
 static int vmw_restrict_iommu;
 static int vmw_force_coherent;
 static int vmw_restrict_dma_mask;
+static int vmw_assume_16bpp;
 
 static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
 static void vmw_master_init(struct vmw_master *);
@@ -249,6 +250,8 @@ MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
 module_param_named(force_coherent, vmw_force_coherent, int, 0600);
 MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU");
 module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600);
+MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
+module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
 
 
 static void vmw_print_capabilities(uint32_t capabilities)
@@ -660,6 +663,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
 	dev_priv->mmio_start = pci_resource_start(dev->pdev, 2);
 
+	dev_priv->assume_16bpp = !!vmw_assume_16bpp;
+
 	dev_priv->enable_fb = enable_fbdev;
 
 	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
@@ -706,6 +711,13 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			vmw_read(dev_priv,
 				 SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB);
 
+		/*
+		 * Workaround for low memory 2D VMs to compensate for the
+		 * allocation taken by fbdev
+		 */
+		if (!(dev_priv->capabilities & SVGA_CAP_3D))
+			mem_size *= 2;
+
 		dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE;
 		dev_priv->prim_bb_mem =
 			vmw_read(dev_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 1980e2a28265..89fb19443a3f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -386,6 +386,7 @@ struct vmw_private {
 	spinlock_t hw_lock;
 	spinlock_t cap_lock;
 	bool has_dx;
+	bool assume_16bpp;
 
 	/*
 	 * VGA registers.
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 679a4cb98ee3..d2d93959b119 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -517,28 +517,6 @@ static int vmw_fb_kms_framebuffer(struct fb_info *info)
 
 	par->set_fb = &vfb->base;
 
-	if (!par->bo_ptr) {
-		/*
-		 * Pin before mapping. Since we don't know in what placement
-		 * to pin, call into KMS to do it for us.
-		 */
-		ret = vfb->pin(vfb);
-		if (ret) {
-			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
-			return ret;
-		}
-
-		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
-				  par->vmw_bo->base.num_pages, &par->map);
-		if (ret) {
-			vfb->unpin(vfb);
-			DRM_ERROR("Could not map the fbdev framebuffer.\n");
-			return ret;
-		}
-
-		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
-	}
-
 	return 0;
 }
 
@@ -601,6 +579,31 @@ static int vmw_fb_set_par(struct fb_info *info)
 	if (ret)
 		goto out_unlock;
 
+	if (!par->bo_ptr) {
+		struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(set.fb);
+
+		/*
+		 * Pin before mapping. Since we don't know in what placement
+		 * to pin, call into KMS to do it for us.
+		 */
+		ret = vfb->pin(vfb);
+		if (ret) {
+			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
+			goto out_unlock;
+		}
+
+		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
+				  par->vmw_bo->base.num_pages, &par->map);
+		if (ret) {
+			vfb->unpin(vfb);
+			DRM_ERROR("Could not map the fbdev framebuffer.\n");
+			goto out_unlock;
+		}
+
+		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
+	}
+
+
 	vmw_fb_dirty_mark(par, par->fb_x, par->fb_y,
 			  par->set_fb->width, par->set_fb->height);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 55231cce73a0..e29da45a2847 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1553,14 +1553,10 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector,
 		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
 	};
 	int i;
-	u32 assumed_bpp = 2;
+	u32 assumed_bpp = 4;
 
-	/*
-	 * If using screen objects, then assume 32-bpp because that's what the
-	 * SVGA device is assuming
-	 */
-	if (dev_priv->active_display_unit == vmw_du_screen_object)
-		assumed_bpp = 4;
+	if (dev_priv->assume_16bpp)
+		assumed_bpp = 2;
 
 	if (dev_priv->active_display_unit == vmw_du_screen_target) {
 		max_width  = min(max_width,  dev_priv->stdu_max_width);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index f0374f9b56ca..e57a0bad7a62 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -300,6 +300,9 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg,
 		break;
 	}
 
+	if (retries == RETRIES)
+		return -EINVAL;
+
 	*msg_len = reply_len;
 	*msg     = reply;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 9ca818fb034c..41932a7c4f79 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -399,8 +399,10 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 
 	WARN_ON_ONCE(!stdu->defined);
 
-	if (!vfb->dmabuf && new_fb->width == mode->hdisplay &&
-	    new_fb->height == mode->vdisplay)
+	new_vfbs = (vfb->dmabuf) ? NULL : vmw_framebuffer_to_vfbs(new_fb);
+
+	if (new_vfbs && new_vfbs->surface->base_size.width == mode->hdisplay &&
+	    new_vfbs->surface->base_size.height == mode->vdisplay)
 		new_content_type = SAME_AS_DISPLAY;
 	else if (vfb->dmabuf)
 		new_content_type = SEPARATE_DMA;
@@ -444,7 +446,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 			content_srf.mip_levels[0]     = 1;
 			content_srf.multisample_count = 0;
 		} else {
-			new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 			content_srf = *new_vfbs->surface;
 		}
 
@@ -464,7 +465,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 			return ret;
 		}
 	} else if (new_content_type == SAME_AS_DISPLAY) {
-		new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 		new_display_srf = vmw_surface_reference(new_vfbs->surface);
 	}
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 95b7d61d9910..fb6f1f447279 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -61,6 +61,7 @@ MODULE_LICENSE("GPL");
 #define MT_QUIRK_ALWAYS_VALID		(1 << 4)
 #define MT_QUIRK_VALID_IS_INRANGE	(1 << 5)
 #define MT_QUIRK_VALID_IS_CONFIDENCE	(1 << 6)
+#define MT_QUIRK_CONFIDENCE		(1 << 7)
 #define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE	(1 << 8)
 #define MT_QUIRK_NO_AREA		(1 << 9)
 #define MT_QUIRK_IGNORE_DUPLICATES	(1 << 10)
@@ -78,6 +79,7 @@ struct mt_slot {
 	__s32 contactid;	/* the device ContactID assigned to this slot */
 	bool touch_state;	/* is the touch valid? */
 	bool inrange_state;	/* is the finger in proximity of the sensor? */
+	bool confidence_state;  /* is the touch made by a finger? */
 };
 
 struct mt_class {
@@ -503,10 +505,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			return 1;
 		case HID_DG_CONFIDENCE:
 			if (cls->name == MT_CLS_WIN_8 &&
-				field->application == HID_DG_TOUCHPAD) {
-				cls->quirks &= ~MT_QUIRK_ALWAYS_VALID;
-				cls->quirks |= MT_QUIRK_VALID_IS_CONFIDENCE;
-			}
+				field->application == HID_DG_TOUCHPAD)
+				cls->quirks |= MT_QUIRK_CONFIDENCE;
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_TIPSWITCH:
@@ -619,6 +619,7 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 		return;
 
 	if (td->curvalid || (td->mtclass.quirks & MT_QUIRK_ALWAYS_VALID)) {
+		int active;
 		int slotnum = mt_compute_slot(td, input);
 		struct mt_slot *s = &td->curdata;
 		struct input_mt *mt = input->mt;
@@ -633,10 +634,14 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 				return;
 		}
 
+		if (!(td->mtclass.quirks & MT_QUIRK_CONFIDENCE))
+			s->confidence_state = 1;
+		active = (s->touch_state || s->inrange_state) &&
+							s->confidence_state;
+
 		input_mt_slot(input, slotnum);
-		input_mt_report_slot_state(input, MT_TOOL_FINGER,
-			s->touch_state || s->inrange_state);
-		if (s->touch_state || s->inrange_state) {
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, active);
+		if (active) {
 			/* this finger is in proximity of the sensor */
 			int wide = (s->w > s->h);
 			/* divided by two to match visual scale of touch */
@@ -701,6 +706,8 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field,
 			td->curdata.touch_state = value;
 			break;
 		case HID_DG_CONFIDENCE:
+			if (quirks & MT_QUIRK_CONFIDENCE)
+				td->curdata.confidence_state = value;
 			if (quirks & MT_QUIRK_VALID_IS_CONFIDENCE)
 				td->curvalid = value;
 			break;
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 2f1ddca6f2e0..700145b15088 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -516,13 +516,13 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 					goto inval;
 			} else if (uref->usage_index >= field->report_count)
 				goto inval;
-
-			else if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
-				 (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
-				  uref->usage_index + uref_multi->num_values > field->report_count))
-				goto inval;
 		}
 
+		if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
+		    (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
+		     uref->usage_index + uref_multi->num_values > field->report_count))
+			goto inval;
+
 		switch (cmd) {
 		case HIDIOCGUSAGE:
 			uref->value = field->value[uref->usage_index];
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index ff940075bb90..eaf2f916d48c 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -486,6 +486,18 @@ config SENSORS_FSCHMD
 	  This driver can also be built as a module.  If so, the module
 	  will be called fschmd.
 
+config SENSORS_FTSTEUTATES
+	tristate "Fujitsu Technology Solutions sensor chip Teutates"
+	depends on I2C && WATCHDOG
+	select WATCHDOG_CORE
+	help
+	  If you say yes here you get support for the Fujitsu Technology
+	  Solutions (FTS) sensor chip "Teutates" including support for
+	  the integrated watchdog.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called ftsteutates.
+
 config SENSORS_GL518SM
 	tristate "Genesys Logic GL518SM"
 	depends on I2C
@@ -645,8 +657,8 @@ config SENSORS_JC42
 	  temperature sensors, which are used on many DDR3 memory modules for
 	  mobile devices and servers.  Support will include, but not be limited
 	  to, ADT7408, AT30TS00, CAT34TS02, CAT6095, MAX6604, MCP9804, MCP9805,
-	  MCP98242, MCP98243, MCP98244, MCP9843, SE97, SE98, STTS424(E),
-	  STTS2002, STTS3000, TSE2002, TSE2004, TS3000, and TS3001.
+	  MCP9808, MCP98242, MCP98243, MCP98244, MCP9843, SE97, SE98,
+	  STTS424(E), STTS2002, STTS3000, TSE2002, TSE2004, TS3000, and TS3001.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called jc42.
@@ -958,6 +970,7 @@ config SENSORS_LM75
 	tristate "National Semiconductor LM75 and compatibles"
 	depends on I2C
 	depends on THERMAL || !THERMAL_OF
+	select REGMAP_I2C
 	help
 	  If you say yes here you get support for one common type of
 	  temperature sensor chip, with models including:
@@ -1265,6 +1278,17 @@ config SENSORS_SHT21
 	  This driver can also be built as a module.  If so, the module
 	  will be called sht21.
 
+config SENSORS_SHT3x
+	tristate "Sensiron humidity and temperature sensors. SHT3x and compat."
+	depends on I2C
+	select CRC8
+	help
+	  If you say yes here you get support for the Sensiron SHT30 and SHT31
+	  humidity and temperature sensors.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called sht3x.
+
 config SENSORS_SHTC1
 	tristate "Sensiron humidity and temperature sensors. SHTC1 and compat."
 	depends on I2C
@@ -1514,6 +1538,17 @@ config SENSORS_INA2XX
 	  This driver can also be built as a module.  If so, the module
 	  will be called ina2xx.
 
+config SENSORS_INA3221
+	tristate "Texas Instruments INA3221 Triple Power Monitor"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  If you say yes here you get support for  the TI INA3221 Triple Power
+	  Monitor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ina3221.
+
 config SENSORS_TC74
 	tristate "Microchip TC74"
 	depends on I2C
@@ -1538,6 +1573,7 @@ config SENSORS_TMP102
 	tristate "Texas Instruments TMP102"
 	depends on I2C
 	depends on THERMAL || !THERMAL_OF
+	select REGMAP_I2C
 	help
 	  If you say yes here you get support for Texas Instruments TMP102
 	  sensor chips.
@@ -1561,7 +1597,7 @@ config SENSORS_TMP401
 	depends on I2C
 	help
 	  If you say yes here you get support for Texas Instruments TMP401,
-	  TMP411, TMP431, TMP432 and TMP435 temperature sensor chips.
+	  TMP411, TMP431, TMP432, TMP435, and TMP461 temperature sensor chips.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called tmp401.
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 2ef5b7c4c54f..fe87d2895a97 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_SENSORS_F71882FG)	+= f71882fg.o
 obj-$(CONFIG_SENSORS_F75375S)	+= f75375s.o
 obj-$(CONFIG_SENSORS_FAM15H_POWER) += fam15h_power.o
 obj-$(CONFIG_SENSORS_FSCHMD)	+= fschmd.o
+obj-$(CONFIG_SENSORS_FTSTEUTATES) += ftsteutates.o
 obj-$(CONFIG_SENSORS_G760A)	+= g760a.o
 obj-$(CONFIG_SENSORS_G762)	+= g762.o
 obj-$(CONFIG_SENSORS_GL518SM)	+= gl518sm.o
@@ -77,6 +78,7 @@ obj-$(CONFIG_SENSORS_IBMPOWERNV)+= ibmpowernv.o
 obj-$(CONFIG_SENSORS_IIO_HWMON) += iio_hwmon.o
 obj-$(CONFIG_SENSORS_INA209)	+= ina209.o
 obj-$(CONFIG_SENSORS_INA2XX)	+= ina2xx.o
+obj-$(CONFIG_SENSORS_INA3221)	+= ina3221.o
 obj-$(CONFIG_SENSORS_IT87)	+= it87.o
 obj-$(CONFIG_SENSORS_JC42)	+= jc42.o
 obj-$(CONFIG_SENSORS_JZ4740)	+= jz4740-hwmon.o
@@ -138,6 +140,7 @@ obj-$(CONFIG_SENSORS_SCH5627)	+= sch5627.o
 obj-$(CONFIG_SENSORS_SCH5636)	+= sch5636.o
 obj-$(CONFIG_SENSORS_SHT15)	+= sht15.o
 obj-$(CONFIG_SENSORS_SHT21)	+= sht21.o
+obj-$(CONFIG_SENSORS_SHT3x)	+= sht3x.o
 obj-$(CONFIG_SENSORS_SHTC1)	+= shtc1.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
 obj-$(CONFIG_SENSORS_SMM665)	+= smm665.o
diff --git a/drivers/hwmon/ad7314.c b/drivers/hwmon/ad7314.c
index 202c1fbb3407..8ea35932fbaa 100644
--- a/drivers/hwmon/ad7314.c
+++ b/drivers/hwmon/ad7314.c
@@ -37,7 +37,6 @@ enum ad7314_variant {
 
 struct ad7314_data {
 	struct spi_device	*spi_dev;
-	struct device		*hwmon_dev;
 	u16 rx ____cacheline_aligned;
 };
 
@@ -88,62 +87,30 @@ static ssize_t ad7314_show_temperature(struct device *dev,
 	}
 }
 
-static ssize_t ad7314_show_name(struct device *dev,
-				struct device_attribute *devattr, char *buf)
-{
-	return sprintf(buf, "%s\n", to_spi_device(dev)->modalias);
-}
-
-static DEVICE_ATTR(name, S_IRUGO, ad7314_show_name, NULL);
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
 			  ad7314_show_temperature, NULL, 0);
 
-static struct attribute *ad7314_attributes[] = {
-	&dev_attr_name.attr,
+static struct attribute *ad7314_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	NULL,
 };
 
-static const struct attribute_group ad7314_group = {
-	.attrs = ad7314_attributes,
-};
+ATTRIBUTE_GROUPS(ad7314);
 
 static int ad7314_probe(struct spi_device *spi_dev)
 {
-	int ret;
 	struct ad7314_data *chip;
+	struct device *hwmon_dev;
 
 	chip = devm_kzalloc(&spi_dev->dev, sizeof(*chip), GFP_KERNEL);
 	if (chip == NULL)
 		return -ENOMEM;
 
-	spi_set_drvdata(spi_dev, chip);
-
-	ret = sysfs_create_group(&spi_dev->dev.kobj, &ad7314_group);
-	if (ret < 0)
-		return ret;
-
-	chip->hwmon_dev = hwmon_device_register(&spi_dev->dev);
-	if (IS_ERR(chip->hwmon_dev)) {
-		ret = PTR_ERR(chip->hwmon_dev);
-		goto error_remove_group;
-	}
 	chip->spi_dev = spi_dev;
-
-	return 0;
-error_remove_group:
-	sysfs_remove_group(&spi_dev->dev.kobj, &ad7314_group);
-	return ret;
-}
-
-static int ad7314_remove(struct spi_device *spi_dev)
-{
-	struct ad7314_data *chip = spi_get_drvdata(spi_dev);
-
-	hwmon_device_unregister(chip->hwmon_dev);
-	sysfs_remove_group(&spi_dev->dev.kobj, &ad7314_group);
-
-	return 0;
+	hwmon_dev = devm_hwmon_device_register_with_groups(&spi_dev->dev,
+							   spi_dev->modalias,
+							   chip, ad7314_groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
 static const struct spi_device_id ad7314_id[] = {
@@ -159,7 +126,6 @@ static struct spi_driver ad7314_driver = {
 		.name = "ad7314",
 	},
 	.probe = ad7314_probe,
-	.remove = ad7314_remove,
 	.id_table = ad7314_id,
 };
 
diff --git a/drivers/hwmon/ads7871.c b/drivers/hwmon/ads7871.c
index 4fd9e4de1972..59bd7b9e1772 100644
--- a/drivers/hwmon/ads7871.c
+++ b/drivers/hwmon/ads7871.c
@@ -66,14 +66,12 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
-#include <linux/mutex.h>
 #include <linux/delay.h>
 
 #define DEVICE_NAME	"ads7871"
 
 struct ads7871_data {
-	struct device	*hwmon_dev;
-	struct mutex	update_lock;
+	struct spi_device *spi;
 };
 
 static int ads7871_read_reg8(struct spi_device *spi, int reg)
@@ -101,7 +99,8 @@ static int ads7871_write_reg8(struct spi_device *spi, int reg, u8 val)
 static ssize_t show_voltage(struct device *dev,
 		struct device_attribute *da, char *buf)
 {
-	struct spi_device *spi = to_spi_device(dev);
+	struct ads7871_data *pdata = dev_get_drvdata(dev);
+	struct spi_device *spi = pdata->spi;
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
 	int ret, val, i = 0;
 	uint8_t channel, mux_cnv;
@@ -139,12 +138,6 @@ static ssize_t show_voltage(struct device *dev,
 	}
 }
 
-static ssize_t ads7871_show_name(struct device *dev,
-				 struct device_attribute *devattr, char *buf)
-{
-	return sprintf(buf, "%s\n", to_spi_device(dev)->modalias);
-}
-
 static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, show_voltage, NULL, 0);
 static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, show_voltage, NULL, 1);
 static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, show_voltage, NULL, 2);
@@ -154,9 +147,7 @@ static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, show_voltage, NULL, 5);
 static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, show_voltage, NULL, 6);
 static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, show_voltage, NULL, 7);
 
-static DEVICE_ATTR(name, S_IRUGO, ads7871_show_name, NULL);
-
-static struct attribute *ads7871_attributes[] = {
+static struct attribute *ads7871_attrs[] = {
 	&sensor_dev_attr_in0_input.dev_attr.attr,
 	&sensor_dev_attr_in1_input.dev_attr.attr,
 	&sensor_dev_attr_in2_input.dev_attr.attr,
@@ -165,21 +156,18 @@ static struct attribute *ads7871_attributes[] = {
 	&sensor_dev_attr_in5_input.dev_attr.attr,
 	&sensor_dev_attr_in6_input.dev_attr.attr,
 	&sensor_dev_attr_in7_input.dev_attr.attr,
-	&dev_attr_name.attr,
 	NULL
 };
 
-static const struct attribute_group ads7871_group = {
-	.attrs = ads7871_attributes,
-};
+ATTRIBUTE_GROUPS(ads7871);
 
 static int ads7871_probe(struct spi_device *spi)
 {
-	int ret, err;
+	struct device *dev = &spi->dev;
+	int ret;
 	uint8_t val;
 	struct ads7871_data *pdata;
-
-	dev_dbg(&spi->dev, "probe\n");
+	struct device *hwmon_dev;
 
 	/* Configure the SPI bus */
 	spi->mode = (SPI_MODE_0);
@@ -193,7 +181,7 @@ static int ads7871_probe(struct spi_device *spi)
 	ads7871_write_reg8(spi, REG_OSC_CONTROL, val);
 	ret = ads7871_read_reg8(spi, REG_OSC_CONTROL);
 
-	dev_dbg(&spi->dev, "REG_OSC_CONTROL write:%x, read:%x\n", val, ret);
+	dev_dbg(dev, "REG_OSC_CONTROL write:%x, read:%x\n", val, ret);
 	/*
 	 * because there is no other error checking on an SPI bus
 	 * we need to make sure we really have a chip
@@ -201,46 +189,23 @@ static int ads7871_probe(struct spi_device *spi)
 	if (val != ret)
 		return -ENODEV;
 
-	pdata = devm_kzalloc(&spi->dev, sizeof(struct ads7871_data),
-			     GFP_KERNEL);
+	pdata = devm_kzalloc(dev, sizeof(struct ads7871_data), GFP_KERNEL);
 	if (!pdata)
 		return -ENOMEM;
 
-	err = sysfs_create_group(&spi->dev.kobj, &ads7871_group);
-	if (err < 0)
-		return err;
-
-	spi_set_drvdata(spi, pdata);
+	pdata->spi = spi;
 
-	pdata->hwmon_dev = hwmon_device_register(&spi->dev);
-	if (IS_ERR(pdata->hwmon_dev)) {
-		err = PTR_ERR(pdata->hwmon_dev);
-		goto error_remove;
-	}
-
-	return 0;
-
-error_remove:
-	sysfs_remove_group(&spi->dev.kobj, &ads7871_group);
-	return err;
-}
-
-static int ads7871_remove(struct spi_device *spi)
-{
-	struct ads7871_data *pdata = spi_get_drvdata(spi);
-
-	hwmon_device_unregister(pdata->hwmon_dev);
-	sysfs_remove_group(&spi->dev.kobj, &ads7871_group);
-	return 0;
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, spi->modalias,
+							   pdata,
+							   ads7871_groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
 static struct spi_driver ads7871_driver = {
 	.driver = {
 		.name = DEVICE_NAME,
 	},
-
 	.probe = ads7871_probe,
-	.remove = ads7871_remove,
 };
 
 module_spi_driver(ads7871_driver);
diff --git a/drivers/hwmon/adt7411.c b/drivers/hwmon/adt7411.c
index 827c03703128..a7f886961830 100644
--- a/drivers/hwmon/adt7411.c
+++ b/drivers/hwmon/adt7411.c
@@ -30,6 +30,7 @@
 
 #define ADT7411_REG_CFG1			0x18
 #define ADT7411_CFG1_START_MONITOR		(1 << 0)
+#define ADT7411_CFG1_RESERVED_BIT3		(1 << 3)
 
 #define ADT7411_REG_CFG2			0x19
 #define ADT7411_CFG2_DISABLE_AVG		(1 << 5)
@@ -296,8 +297,10 @@ static int adt7411_probe(struct i2c_client *client,
 	mutex_init(&data->device_lock);
 	mutex_init(&data->update_lock);
 
+	/* According to the datasheet, we must only write 1 to bit 3 */
 	ret = adt7411_modify_bit(client, ADT7411_REG_CFG1,
-				 ADT7411_CFG1_START_MONITOR, 1);
+				 ADT7411_CFG1_RESERVED_BIT3
+				 | ADT7411_CFG1_START_MONITOR, 1);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c43318d3416e..acf9c0361d9f 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -35,6 +35,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/ctype.h>
 
 #include <linux/i8k.h>
 
@@ -66,11 +67,13 @@
 
 static DEFINE_MUTEX(i8k_mutex);
 static char bios_version[4];
+static char bios_machineid[16];
 static struct device *i8k_hwmon_dev;
 static u32 i8k_hwmon_flags;
 static uint i8k_fan_mult = I8K_FAN_MULT;
 static uint i8k_pwm_mult;
 static uint i8k_fan_max = I8K_FAN_HIGH;
+static bool disallow_fan_type_call;
 
 #define I8K_HWMON_HAVE_TEMP1	(1 << 0)
 #define I8K_HWMON_HAVE_TEMP2	(1 << 1)
@@ -78,6 +81,7 @@ static uint i8k_fan_max = I8K_FAN_HIGH;
 #define I8K_HWMON_HAVE_TEMP4	(1 << 3)
 #define I8K_HWMON_HAVE_FAN1	(1 << 4)
 #define I8K_HWMON_HAVE_FAN2	(1 << 5)
+#define I8K_HWMON_HAVE_FAN3	(1 << 6)
 
 MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)");
 MODULE_AUTHOR("Pali RohÃ¡r <pali.rohar@gmail.com>");
@@ -94,13 +98,13 @@ module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match");
 
 #if IS_ENABLED(CONFIG_I8K)
-static bool restricted;
+static bool restricted = true;
 module_param(restricted, bool, 0);
-MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
+MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)");
 
 static bool power_status;
 module_param(power_status, bool, 0600);
-MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
+MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)");
 #endif
 
 static uint fan_mult;
@@ -136,6 +140,14 @@ static int i8k_smm(struct smm_regs *regs)
 	int eax = regs->eax;
 	cpumask_var_t old_mask;
 
+#ifdef DEBUG
+	int ebx = regs->ebx;
+	unsigned long duration;
+	ktime_t calltime, delta, rettime;
+
+	calltime = ktime_get();
+#endif
+
 	/* SMM requires CPU 0 */
 	if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
 		return -ENOMEM;
@@ -207,6 +219,15 @@ static int i8k_smm(struct smm_regs *regs)
 out:
 	set_cpus_allowed_ptr(current, old_mask);
 	free_cpumask_var(old_mask);
+
+#ifdef DEBUG
+	rettime = ktime_get();
+	delta = ktime_sub(rettime, calltime);
+	duration = ktime_to_ns(delta) >> 10;
+	pr_debug("smm(0x%.4x 0x%.4x) = 0x%.4x  (took %7lu usecs)\n", eax, ebx,
+		(rc ? 0xffff : regs->eax & 0xffff), duration);
+#endif
+
 	return rc;
 }
 
@@ -235,14 +256,28 @@ static int i8k_get_fan_speed(int fan)
 /*
  * Read the fan type.
  */
-static int i8k_get_fan_type(int fan)
+static int _i8k_get_fan_type(int fan)
 {
 	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
 
+	if (disallow_fan_type_call)
+		return -EINVAL;
+
 	regs.ebx = fan & 0xff;
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
+static int i8k_get_fan_type(int fan)
+{
+	/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
+	static int types[3] = { INT_MIN, INT_MIN, INT_MIN };
+
+	if (types[fan] == INT_MIN)
+		types[fan] = _i8k_get_fan_type(fan);
+
+	return types[fan];
+}
+
 /*
  * Read the fan nominal rpm for specific fan speed.
  */
@@ -387,14 +422,20 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case I8K_BIOS_VERSION:
+		if (!isdigit(bios_version[0]) || !isdigit(bios_version[1]) ||
+		    !isdigit(bios_version[2]))
+			return -EINVAL;
+
 		val = (bios_version[0] << 16) |
 				(bios_version[1] << 8) | bios_version[2];
 		break;
 
 	case I8K_MACHINE_ID:
-		memset(buff, 0, 16);
-		strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
-			sizeof(buff));
+		if (restricted && !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		memset(buff, 0, sizeof(buff));
+		strlcpy(buff, bios_machineid, sizeof(buff));
 		break;
 
 	case I8K_FN_STATUS:
@@ -511,7 +552,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
 	seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
 		   I8K_PROC_FMT,
 		   bios_version,
-		   i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		   (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid,
 		   cpu_temp,
 		   left_fan, right_fan, left_speed, right_speed,
 		   ac_power, fn_key);
@@ -696,6 +737,12 @@ static SENSOR_DEVICE_ATTR(fan2_label, S_IRUGO, i8k_hwmon_show_fan_label, NULL,
 			  1);
 static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, i8k_hwmon_show_pwm,
 			  i8k_hwmon_set_pwm, 1);
+static SENSOR_DEVICE_ATTR(fan3_input, S_IRUGO, i8k_hwmon_show_fan, NULL,
+			  2);
+static SENSOR_DEVICE_ATTR(fan3_label, S_IRUGO, i8k_hwmon_show_fan_label, NULL,
+			  2);
+static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, i8k_hwmon_show_pwm,
+			  i8k_hwmon_set_pwm, 2);
 
 static struct attribute *i8k_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,	/* 0 */
@@ -712,12 +759,18 @@ static struct attribute *i8k_attrs[] = {
 	&sensor_dev_attr_fan2_input.dev_attr.attr,	/* 11 */
 	&sensor_dev_attr_fan2_label.dev_attr.attr,	/* 12 */
 	&sensor_dev_attr_pwm2.dev_attr.attr,		/* 13 */
+	&sensor_dev_attr_fan3_input.dev_attr.attr,	/* 14 */
+	&sensor_dev_attr_fan3_label.dev_attr.attr,	/* 15 */
+	&sensor_dev_attr_pwm3.dev_attr.attr,		/* 16 */
 	NULL
 };
 
 static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
 			      int index)
 {
+	if (disallow_fan_type_call &&
+	    (index == 9 || index == 12 || index == 15))
+		return 0;
 	if (index >= 0 && index <= 1 &&
 	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
 		return 0;
@@ -736,6 +789,9 @@ static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
 	if (index >= 11 && index <= 13 &&
 	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_FAN2))
 		return 0;
+	if (index >= 14 && index <= 16 &&
+	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_FAN3))
+		return 0;
 
 	return attr->mode;
 }
@@ -767,16 +823,27 @@ static int __init i8k_init_hwmon(void)
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4;
 
-	/* First fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(0);
+	/* First fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(0);
+	if (err < 0)
+		err = i8k_get_fan_type(0);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1;
 
-	/* Second fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(1);
+	/* Second fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(1);
+	if (err < 0)
+		err = i8k_get_fan_type(1);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
 
+	/* Third fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(2);
+	if (err < 0)
+		err = i8k_get_fan_type(2);
+	if (err >= 0)
+		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN3;
+
 	i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell_smm",
 							  NULL, i8k_groups);
 	if (IS_ERR(i8k_hwmon_dev)) {
@@ -929,12 +996,14 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
-static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+/*
+ * On some machines once I8K_SMM_GET_FAN_TYPE is issued then CPU fan speed
+ * randomly going up and down due to bug in Dell SMM or BIOS. Here is blacklist
+ * of affected Dell machines for which we disallow I8K_SMM_GET_FAN_TYPE call.
+ * See bug: https://bugzilla.kernel.org/show_bug.cgi?id=100121
+ */
+static struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initdata = {
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8000
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8000",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
@@ -942,16 +1011,19 @@ static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
 		},
 	},
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8100
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8100",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"),
 		},
 	},
+	{
+		.ident = "Dell Inspiron 580",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "),
+		},
+	},
 	{ }
 };
 
@@ -966,8 +1038,7 @@ static int __init i8k_probe(void)
 	/*
 	 * Get DMI information
 	 */
-	if (!dmi_check_system(i8k_dmi_table) ||
-	    dmi_check_system(i8k_blacklist_dmi_table)) {
+	if (!dmi_check_system(i8k_dmi_table)) {
 		if (!ignore_dmi && !force)
 			return -ENODEV;
 
@@ -978,8 +1049,13 @@ static int __init i8k_probe(void)
 			i8k_get_dmi_data(DMI_BIOS_VERSION));
 	}
 
+	if (dmi_check_system(i8k_blacklist_fan_type_dmi_table))
+		disallow_fan_type_call = true;
+
 	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
 		sizeof(bios_version));
+	strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		sizeof(bios_machineid));
 
 	/*
 	 * Get SMM Dell signature
diff --git a/drivers/hwmon/emc6w201.c b/drivers/hwmon/emc6w201.c
index ada90716448d..f37fe2011640 100644
--- a/drivers/hwmon/emc6w201.c
+++ b/drivers/hwmon/emc6w201.c
@@ -464,7 +464,7 @@ static int emc6w201_detect(struct i2c_client *client,
 	if (verstep < 0 || (verstep & 0xF0) != 0xB0)
 		return -ENODEV;
 	if ((verstep & 0x0F) > 2) {
-		dev_dbg(&client->dev, "Unknwown EMC6W201 stepping %d\n",
+		dev_dbg(&client->dev, "Unknown EMC6W201 stepping %d\n",
 			verstep & 0x0F);
 		return -ENODEV;
 	}
diff --git a/drivers/hwmon/ftsteutates.c b/drivers/hwmon/ftsteutates.c
new file mode 100644
index 000000000000..2b2ff67026be
--- /dev/null
+++ b/drivers/hwmon/ftsteutates.c
@@ -0,0 +1,819 @@
+/*
+ * Support for the FTS Systemmonitoring Chip "Teutates"
+ *
+ * Copyright (C) 2016 Fujitsu Technology Solutions GmbH,
+ *		  Thilo Cestonaro <thilo.cestonaro@ts.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/uaccess.h>
+#include <linux/watchdog.h>
+
+#define FTS_DEVICE_ID_REG		0x0000
+#define FTS_DEVICE_REVISION_REG		0x0001
+#define FTS_DEVICE_STATUS_REG		0x0004
+#define FTS_SATELLITE_STATUS_REG	0x0005
+#define FTS_EVENT_STATUS_REG		0x0006
+#define FTS_GLOBAL_CONTROL_REG		0x0007
+
+#define FTS_SENSOR_EVENT_REG		0x0010
+
+#define FTS_FAN_EVENT_REG		0x0014
+#define FTS_FAN_PRESENT_REG		0x0015
+
+#define FTS_POWER_ON_TIME_COUNTER_A	0x007A
+#define FTS_POWER_ON_TIME_COUNTER_B	0x007B
+#define FTS_POWER_ON_TIME_COUNTER_C	0x007C
+
+#define FTS_PAGE_SELECT_REG		0x007F
+
+#define FTS_WATCHDOG_TIME_PRESET	0x000B
+#define FTS_WATCHDOG_CONTROL		0x5081
+
+#define FTS_NO_FAN_SENSORS		0x08
+#define FTS_NO_TEMP_SENSORS		0x10
+#define FTS_NO_VOLT_SENSORS		0x04
+
+static struct i2c_device_id fts_id[] = {
+	{ "ftsteutates", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, fts_id);
+
+enum WATCHDOG_RESOLUTION {
+	seconds = 1,
+	minutes = 60
+};
+
+struct fts_data {
+	struct i2c_client *client;
+	/* update sensor data lock */
+	struct mutex update_lock;
+	/* read/write register lock */
+	struct mutex access_lock;
+	unsigned long last_updated; /* in jiffies */
+	struct watchdog_device wdd;
+	enum WATCHDOG_RESOLUTION resolution;
+	bool valid; /* false until following fields are valid */
+
+	u8 volt[FTS_NO_VOLT_SENSORS];
+
+	u8 temp_input[FTS_NO_TEMP_SENSORS];
+	u8 temp_alarm;
+
+	u8 fan_present;
+	u8 fan_input[FTS_NO_FAN_SENSORS]; /* in rps */
+	u8 fan_source[FTS_NO_FAN_SENSORS];
+	u8 fan_alarm;
+};
+
+#define FTS_REG_FAN_INPUT(idx) ((idx) + 0x20)
+#define FTS_REG_FAN_SOURCE(idx) ((idx) + 0x30)
+#define FTS_REG_FAN_CONTROL(idx) (((idx) << 16) + 0x4881)
+
+#define FTS_REG_TEMP_INPUT(idx) ((idx) + 0x40)
+#define FTS_REG_TEMP_CONTROL(idx) (((idx) << 16) + 0x0681)
+
+#define FTS_REG_VOLT(idx) ((idx) + 0x18)
+
+/*****************************************************************************/
+/* I2C Helper functions							     */
+/*****************************************************************************/
+static int fts_read_byte(struct i2c_client *client, unsigned short reg)
+{
+	int ret;
+	unsigned char page = reg >> 8;
+	struct fts_data *data = dev_get_drvdata(&client->dev);
+
+	mutex_lock(&data->access_lock);
+
+	dev_dbg(&client->dev, "page select - page: 0x%.02x\n", page);
+	ret = i2c_smbus_write_byte_data(client, FTS_PAGE_SELECT_REG, page);
+	if (ret < 0)
+		goto error;
+
+	reg &= 0xFF;
+	ret = i2c_smbus_read_byte_data(client, reg);
+	dev_dbg(&client->dev, "read - reg: 0x%.02x: val: 0x%.02x\n", reg, ret);
+
+error:
+	mutex_unlock(&data->access_lock);
+	return ret;
+}
+
+static int fts_write_byte(struct i2c_client *client, unsigned short reg,
+			  unsigned char value)
+{
+	int ret;
+	unsigned char page = reg >> 8;
+	struct fts_data *data = dev_get_drvdata(&client->dev);
+
+	mutex_lock(&data->access_lock);
+
+	dev_dbg(&client->dev, "page select - page: 0x%.02x\n", page);
+	ret = i2c_smbus_write_byte_data(client, FTS_PAGE_SELECT_REG, page);
+	if (ret < 0)
+		goto error;
+
+	reg &= 0xFF;
+	dev_dbg(&client->dev,
+		"write - reg: 0x%.02x: val: 0x%.02x\n", reg, value);
+	ret = i2c_smbus_write_byte_data(client, reg, value);
+
+error:
+	mutex_unlock(&data->access_lock);
+	return ret;
+}
+
+/*****************************************************************************/
+/* Data Updater Helper function						     */
+/*****************************************************************************/
+static int fts_update_device(struct fts_data *data)
+{
+	int i;
+	int err = 0;
+
+	mutex_lock(&data->update_lock);
+	if (!time_after(jiffies, data->last_updated + 2 * HZ) && data->valid)
+		goto exit;
+
+	err = fts_read_byte(data->client, FTS_DEVICE_STATUS_REG);
+	if (err < 0)
+		goto exit;
+
+	data->valid = !!(err & 0x02); /* Data not ready yet */
+	if (unlikely(!data->valid)) {
+		err = -EAGAIN;
+		goto exit;
+	}
+
+	err = fts_read_byte(data->client, FTS_FAN_PRESENT_REG);
+	if (err < 0)
+		goto exit;
+	data->fan_present = err;
+
+	err = fts_read_byte(data->client, FTS_FAN_EVENT_REG);
+	if (err < 0)
+		goto exit;
+	data->fan_alarm = err;
+
+	for (i = 0; i < FTS_NO_FAN_SENSORS; i++) {
+		if (data->fan_present & BIT(i)) {
+			err = fts_read_byte(data->client, FTS_REG_FAN_INPUT(i));
+			if (err < 0)
+				goto exit;
+			data->fan_input[i] = err;
+
+			err = fts_read_byte(data->client,
+					    FTS_REG_FAN_SOURCE(i));
+			if (err < 0)
+				goto exit;
+			data->fan_source[i] = err;
+		} else {
+			data->fan_input[i] = 0;
+			data->fan_source[i] = 0;
+		}
+	}
+
+	err = fts_read_byte(data->client, FTS_SENSOR_EVENT_REG);
+	if (err < 0)
+		goto exit;
+	data->temp_alarm = err;
+
+	for (i = 0; i < FTS_NO_TEMP_SENSORS; i++) {
+		err = fts_read_byte(data->client, FTS_REG_TEMP_INPUT(i));
+		if (err < 0)
+			goto exit;
+		data->temp_input[i] = err;
+	}
+
+	for (i = 0; i < FTS_NO_VOLT_SENSORS; i++) {
+		err = fts_read_byte(data->client, FTS_REG_VOLT(i));
+		if (err < 0)
+			goto exit;
+		data->volt[i] = err;
+	}
+	data->last_updated = jiffies;
+	err = 0;
+exit:
+	mutex_unlock(&data->update_lock);
+	return err;
+}
+
+/*****************************************************************************/
+/* Watchdog functions							     */
+/*****************************************************************************/
+static int fts_wd_set_resolution(struct fts_data *data,
+				 enum WATCHDOG_RESOLUTION resolution)
+{
+	int ret;
+
+	if (data->resolution == resolution)
+		return 0;
+
+	ret = fts_read_byte(data->client, FTS_WATCHDOG_CONTROL);
+	if (ret < 0)
+		return ret;
+
+	if ((resolution == seconds && ret & BIT(1)) ||
+	    (resolution == minutes && (ret & BIT(1)) == 0)) {
+		data->resolution = resolution;
+		return 0;
+	}
+
+	if (resolution == seconds)
+		set_bit(1, (unsigned long *)&ret);
+	else
+		ret &= ~BIT(1);
+
+	ret = fts_write_byte(data->client, FTS_WATCHDOG_CONTROL, ret);
+	if (ret < 0)
+		return ret;
+
+	data->resolution = resolution;
+	return ret;
+}
+
+static int fts_wd_set_timeout(struct watchdog_device *wdd, unsigned int timeout)
+{
+	struct fts_data *data;
+	enum WATCHDOG_RESOLUTION resolution = seconds;
+	int ret;
+
+	data = watchdog_get_drvdata(wdd);
+	/* switch watchdog resolution to minutes if timeout does not fit
+	 * into a byte
+	 */
+	if (timeout > 0xFF) {
+		timeout = DIV_ROUND_UP(timeout, 60) * 60;
+		resolution = minutes;
+	}
+
+	ret = fts_wd_set_resolution(data, resolution);
+	if (ret < 0)
+		return ret;
+
+	wdd->timeout = timeout;
+	return 0;
+}
+
+static int fts_wd_start(struct watchdog_device *wdd)
+{
+	struct fts_data *data = watchdog_get_drvdata(wdd);
+
+	return fts_write_byte(data->client, FTS_WATCHDOG_TIME_PRESET,
+			      wdd->timeout / (u8)data->resolution);
+}
+
+static int fts_wd_stop(struct watchdog_device *wdd)
+{
+	struct fts_data *data;
+
+	data = watchdog_get_drvdata(wdd);
+	return fts_write_byte(data->client, FTS_WATCHDOG_TIME_PRESET, 0);
+}
+
+static const struct watchdog_info fts_wd_info = {
+	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+	.identity = "FTS Teutates Hardware Watchdog",
+};
+
+static const struct watchdog_ops fts_wd_ops = {
+	.owner = THIS_MODULE,
+	.start = fts_wd_start,
+	.stop = fts_wd_stop,
+	.set_timeout = fts_wd_set_timeout,
+};
+
+static int fts_watchdog_init(struct fts_data *data)
+{
+	int timeout, ret;
+
+	watchdog_set_drvdata(&data->wdd, data);
+
+	timeout = fts_read_byte(data->client, FTS_WATCHDOG_TIME_PRESET);
+	if (timeout < 0)
+		return timeout;
+
+	/* watchdog not running, set timeout to a default of 60 sec. */
+	if (timeout == 0) {
+		ret = fts_wd_set_resolution(data, seconds);
+		if (ret < 0)
+			return ret;
+		data->wdd.timeout = 60;
+	} else {
+		ret = fts_read_byte(data->client, FTS_WATCHDOG_CONTROL);
+		if (ret < 0)
+			return ret;
+
+		data->resolution = ret & BIT(1) ? seconds : minutes;
+		data->wdd.timeout = timeout * (u8)data->resolution;
+		set_bit(WDOG_HW_RUNNING, &data->wdd.status);
+	}
+
+	/* Register our watchdog part */
+	data->wdd.info = &fts_wd_info;
+	data->wdd.ops = &fts_wd_ops;
+	data->wdd.parent = &data->client->dev;
+	data->wdd.min_timeout = 1;
+
+	/* max timeout 255 minutes. */
+	data->wdd.max_hw_heartbeat_ms = 0xFF * 60 * MSEC_PER_SEC;
+
+	return watchdog_register_device(&data->wdd);
+}
+
+/*****************************************************************************/
+/* SysFS handler functions						     */
+/*****************************************************************************/
+static ssize_t show_in_value(struct device *dev,
+			     struct device_attribute *devattr, char *buf)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	int err;
+
+	err = fts_update_device(data);
+	if (err < 0)
+		return err;
+
+	return sprintf(buf, "%u\n", data->volt[index]);
+}
+
+static ssize_t show_temp_value(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	int err;
+
+	err = fts_update_device(data);
+	if (err < 0)
+		return err;
+
+	return sprintf(buf, "%u\n", data->temp_input[index]);
+}
+
+static ssize_t show_temp_fault(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	int err;
+
+	err = fts_update_device(data);
+	if (err < 0)
+		return err;
+
+	/* 00h Temperature = Sensor Error */
+	return sprintf(buf, "%d\n", data->temp_input[index] == 0);
+}
+
+static ssize_t show_temp_alarm(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	int err;
+
+	err = fts_update_device(data);
+	if (err < 0)
+		return err;
+
+	return sprintf(buf, "%u\n", !!(data->temp_alarm & BIT(index)));
+}
+
+static ssize_t
+clear_temp_alarm(struct device *dev, struct device_attribute *devattr,
+		 const char *buf, size_t count)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	long ret;
+
+	ret = fts_update_device(data);
+	if (ret < 0)
+		return ret;
+
+	if (kstrtoul(buf, 10, &ret) || ret != 0)
+		return -EINVAL;
+
+	mutex_lock(&data->update_lock);
+	ret = fts_read_byte(data->client, FTS_REG_TEMP_CONTROL(index));
+	if (ret < 0)
+		goto error;
+
+	ret = fts_write_byte(data->client, FTS_REG_TEMP_CONTROL(index),
+			     ret | 0x1);
+	if (ret < 0)
+		goto error;
+
+	data->valid = false;
+error:
+	mutex_unlock(&data->update_lock);
+	return ret;
+}
+
+static ssize_t show_fan_value(struct device *dev,
+			      struct device_attribute *devattr, char *buf)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	int err;
+
+	err = fts_update_device(data);
+	if (err < 0)
+		return err;
+
+	return sprintf(buf, "%u\n", data->fan_input[index]);
+}
+
+static ssize_t show_fan_source(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	int err;
+
+	err = fts_update_device(data);
+	if (err < 0)
+		return err;
+
+	return sprintf(buf, "%u\n", data->fan_source[index]);
+}
+
+static ssize_t show_fan_alarm(struct device *dev,
+			      struct device_attribute *devattr, char *buf)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	int err;
+
+	err = fts_update_device(data);
+	if (err < 0)
+		return err;
+
+	return sprintf(buf, "%d\n", !!(data->fan_alarm & BIT(index)));
+}
+
+static ssize_t
+clear_fan_alarm(struct device *dev, struct device_attribute *devattr,
+		const char *buf, size_t count)
+{
+	struct fts_data *data = dev_get_drvdata(dev);
+	int index = to_sensor_dev_attr(devattr)->index;
+	long ret;
+
+	ret = fts_update_device(data);
+	if (ret < 0)
+		return ret;
+
+	if (kstrtoul(buf, 10, &ret) || ret != 0)
+		return -EINVAL;
+
+	mutex_lock(&data->update_lock);
+	ret = fts_read_byte(data->client, FTS_REG_FAN_CONTROL(index));
+	if (ret < 0)
+		goto error;
+
+	ret = fts_write_byte(data->client, FTS_REG_FAN_CONTROL(index),
+			     ret | 0x1);
+	if (ret < 0)
+		goto error;
+
+	data->valid = false;
+error:
+	mutex_unlock(&data->update_lock);
+	return ret;
+}
+
+/*****************************************************************************/
+/* SysFS structs							     */
+/*****************************************************************************/
+
+/* Temprature sensors */
+static SENSOR_DEVICE_ATTR(temp1_input,  S_IRUGO, show_temp_value, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_input,  S_IRUGO, show_temp_value, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_input,  S_IRUGO, show_temp_value, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp4_input,  S_IRUGO, show_temp_value, NULL, 3);
+static SENSOR_DEVICE_ATTR(temp5_input,  S_IRUGO, show_temp_value, NULL, 4);
+static SENSOR_DEVICE_ATTR(temp6_input,  S_IRUGO, show_temp_value, NULL, 5);
+static SENSOR_DEVICE_ATTR(temp7_input,  S_IRUGO, show_temp_value, NULL, 6);
+static SENSOR_DEVICE_ATTR(temp8_input,  S_IRUGO, show_temp_value, NULL, 7);
+static SENSOR_DEVICE_ATTR(temp9_input,  S_IRUGO, show_temp_value, NULL, 8);
+static SENSOR_DEVICE_ATTR(temp10_input, S_IRUGO, show_temp_value, NULL, 9);
+static SENSOR_DEVICE_ATTR(temp11_input, S_IRUGO, show_temp_value, NULL, 10);
+static SENSOR_DEVICE_ATTR(temp12_input, S_IRUGO, show_temp_value, NULL, 11);
+static SENSOR_DEVICE_ATTR(temp13_input, S_IRUGO, show_temp_value, NULL, 12);
+static SENSOR_DEVICE_ATTR(temp14_input, S_IRUGO, show_temp_value, NULL, 13);
+static SENSOR_DEVICE_ATTR(temp15_input, S_IRUGO, show_temp_value, NULL, 14);
+static SENSOR_DEVICE_ATTR(temp16_input, S_IRUGO, show_temp_value, NULL, 15);
+
+static SENSOR_DEVICE_ATTR(temp1_fault,  S_IRUGO, show_temp_fault, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_fault,  S_IRUGO, show_temp_fault, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_fault,  S_IRUGO, show_temp_fault, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp4_fault,  S_IRUGO, show_temp_fault, NULL, 3);
+static SENSOR_DEVICE_ATTR(temp5_fault,  S_IRUGO, show_temp_fault, NULL, 4);
+static SENSOR_DEVICE_ATTR(temp6_fault,  S_IRUGO, show_temp_fault, NULL, 5);
+static SENSOR_DEVICE_ATTR(temp7_fault,  S_IRUGO, show_temp_fault, NULL, 6);
+static SENSOR_DEVICE_ATTR(temp8_fault,  S_IRUGO, show_temp_fault, NULL, 7);
+static SENSOR_DEVICE_ATTR(temp9_fault,  S_IRUGO, show_temp_fault, NULL, 8);
+static SENSOR_DEVICE_ATTR(temp10_fault, S_IRUGO, show_temp_fault, NULL, 9);
+static SENSOR_DEVICE_ATTR(temp11_fault, S_IRUGO, show_temp_fault, NULL, 10);
+static SENSOR_DEVICE_ATTR(temp12_fault, S_IRUGO, show_temp_fault, NULL, 11);
+static SENSOR_DEVICE_ATTR(temp13_fault, S_IRUGO, show_temp_fault, NULL, 12);
+static SENSOR_DEVICE_ATTR(temp14_fault, S_IRUGO, show_temp_fault, NULL, 13);
+static SENSOR_DEVICE_ATTR(temp15_fault, S_IRUGO, show_temp_fault, NULL, 14);
+static SENSOR_DEVICE_ATTR(temp16_fault, S_IRUGO, show_temp_fault, NULL, 15);
+
+static SENSOR_DEVICE_ATTR(temp1_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 0);
+static SENSOR_DEVICE_ATTR(temp2_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 1);
+static SENSOR_DEVICE_ATTR(temp3_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 2);
+static SENSOR_DEVICE_ATTR(temp4_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 3);
+static SENSOR_DEVICE_ATTR(temp5_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 4);
+static SENSOR_DEVICE_ATTR(temp6_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 5);
+static SENSOR_DEVICE_ATTR(temp7_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 6);
+static SENSOR_DEVICE_ATTR(temp8_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 7);
+static SENSOR_DEVICE_ATTR(temp9_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 8);
+static SENSOR_DEVICE_ATTR(temp10_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 9);
+static SENSOR_DEVICE_ATTR(temp11_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 10);
+static SENSOR_DEVICE_ATTR(temp12_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 11);
+static SENSOR_DEVICE_ATTR(temp13_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 12);
+static SENSOR_DEVICE_ATTR(temp14_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 13);
+static SENSOR_DEVICE_ATTR(temp15_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 14);
+static SENSOR_DEVICE_ATTR(temp16_alarm, S_IRUGO | S_IWUSR, show_temp_alarm,
+			  clear_temp_alarm, 15);
+
+static struct attribute *fts_temp_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_temp2_input.dev_attr.attr,
+	&sensor_dev_attr_temp3_input.dev_attr.attr,
+	&sensor_dev_attr_temp4_input.dev_attr.attr,
+	&sensor_dev_attr_temp5_input.dev_attr.attr,
+	&sensor_dev_attr_temp6_input.dev_attr.attr,
+	&sensor_dev_attr_temp7_input.dev_attr.attr,
+	&sensor_dev_attr_temp8_input.dev_attr.attr,
+	&sensor_dev_attr_temp9_input.dev_attr.attr,
+	&sensor_dev_attr_temp10_input.dev_attr.attr,
+	&sensor_dev_attr_temp11_input.dev_attr.attr,
+	&sensor_dev_attr_temp12_input.dev_attr.attr,
+	&sensor_dev_attr_temp13_input.dev_attr.attr,
+	&sensor_dev_attr_temp14_input.dev_attr.attr,
+	&sensor_dev_attr_temp15_input.dev_attr.attr,
+	&sensor_dev_attr_temp16_input.dev_attr.attr,
+
+	&sensor_dev_attr_temp1_fault.dev_attr.attr,
+	&sensor_dev_attr_temp2_fault.dev_attr.attr,
+	&sensor_dev_attr_temp3_fault.dev_attr.attr,
+	&sensor_dev_attr_temp4_fault.dev_attr.attr,
+	&sensor_dev_attr_temp5_fault.dev_attr.attr,
+	&sensor_dev_attr_temp6_fault.dev_attr.attr,
+	&sensor_dev_attr_temp7_fault.dev_attr.attr,
+	&sensor_dev_attr_temp8_fault.dev_attr.attr,
+	&sensor_dev_attr_temp9_fault.dev_attr.attr,
+	&sensor_dev_attr_temp10_fault.dev_attr.attr,
+	&sensor_dev_attr_temp11_fault.dev_attr.attr,
+	&sensor_dev_attr_temp12_fault.dev_attr.attr,
+	&sensor_dev_attr_temp13_fault.dev_attr.attr,
+	&sensor_dev_attr_temp14_fault.dev_attr.attr,
+	&sensor_dev_attr_temp15_fault.dev_attr.attr,
+	&sensor_dev_attr_temp16_fault.dev_attr.attr,
+
+	&sensor_dev_attr_temp1_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp2_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp3_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp4_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp5_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp6_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp7_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp8_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp9_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp10_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp11_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp12_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp13_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp14_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp15_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp16_alarm.dev_attr.attr,
+	NULL
+};
+
+/* Fans */
+static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, show_fan_value, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan2_input, S_IRUGO, show_fan_value, NULL, 1);
+static SENSOR_DEVICE_ATTR(fan3_input, S_IRUGO, show_fan_value, NULL, 2);
+static SENSOR_DEVICE_ATTR(fan4_input, S_IRUGO, show_fan_value, NULL, 3);
+static SENSOR_DEVICE_ATTR(fan5_input, S_IRUGO, show_fan_value, NULL, 4);
+static SENSOR_DEVICE_ATTR(fan6_input, S_IRUGO, show_fan_value, NULL, 5);
+static SENSOR_DEVICE_ATTR(fan7_input, S_IRUGO, show_fan_value, NULL, 6);
+static SENSOR_DEVICE_ATTR(fan8_input, S_IRUGO, show_fan_value, NULL, 7);
+
+static SENSOR_DEVICE_ATTR(fan1_source, S_IRUGO, show_fan_source, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan2_source, S_IRUGO, show_fan_source, NULL, 1);
+static SENSOR_DEVICE_ATTR(fan3_source, S_IRUGO, show_fan_source, NULL, 2);
+static SENSOR_DEVICE_ATTR(fan4_source, S_IRUGO, show_fan_source, NULL, 3);
+static SENSOR_DEVICE_ATTR(fan5_source, S_IRUGO, show_fan_source, NULL, 4);
+static SENSOR_DEVICE_ATTR(fan6_source, S_IRUGO, show_fan_source, NULL, 5);
+static SENSOR_DEVICE_ATTR(fan7_source, S_IRUGO, show_fan_source, NULL, 6);
+static SENSOR_DEVICE_ATTR(fan8_source, S_IRUGO, show_fan_source, NULL, 7);
+
+static SENSOR_DEVICE_ATTR(fan1_alarm, S_IRUGO | S_IWUSR,
+			 show_fan_alarm, clear_fan_alarm, 0);
+static SENSOR_DEVICE_ATTR(fan2_alarm, S_IRUGO | S_IWUSR,
+			 show_fan_alarm, clear_fan_alarm, 1);
+static SENSOR_DEVICE_ATTR(fan3_alarm, S_IRUGO | S_IWUSR,
+			 show_fan_alarm, clear_fan_alarm, 2);
+static SENSOR_DEVICE_ATTR(fan4_alarm, S_IRUGO | S_IWUSR,
+			 show_fan_alarm, clear_fan_alarm, 3);
+static SENSOR_DEVICE_ATTR(fan5_alarm, S_IRUGO | S_IWUSR,
+			 show_fan_alarm, clear_fan_alarm, 4);
+static SENSOR_DEVICE_ATTR(fan6_alarm, S_IRUGO | S_IWUSR,
+			 show_fan_alarm, clear_fan_alarm, 5);
+static SENSOR_DEVICE_ATTR(fan7_alarm, S_IRUGO | S_IWUSR,
+			 show_fan_alarm, clear_fan_alarm, 6);
+static SENSOR_DEVICE_ATTR(fan8_alarm, S_IRUGO | S_IWUSR,
+			 show_fan_alarm, clear_fan_alarm, 7);
+
+static struct attribute *fts_fan_attrs[] = {
+	&sensor_dev_attr_fan1_input.dev_attr.attr,
+	&sensor_dev_attr_fan2_input.dev_attr.attr,
+	&sensor_dev_attr_fan3_input.dev_attr.attr,
+	&sensor_dev_attr_fan4_input.dev_attr.attr,
+	&sensor_dev_attr_fan5_input.dev_attr.attr,
+	&sensor_dev_attr_fan6_input.dev_attr.attr,
+	&sensor_dev_attr_fan7_input.dev_attr.attr,
+	&sensor_dev_attr_fan8_input.dev_attr.attr,
+
+	&sensor_dev_attr_fan1_source.dev_attr.attr,
+	&sensor_dev_attr_fan2_source.dev_attr.attr,
+	&sensor_dev_attr_fan3_source.dev_attr.attr,
+	&sensor_dev_attr_fan4_source.dev_attr.attr,
+	&sensor_dev_attr_fan5_source.dev_attr.attr,
+	&sensor_dev_attr_fan6_source.dev_attr.attr,
+	&sensor_dev_attr_fan7_source.dev_attr.attr,
+	&sensor_dev_attr_fan8_source.dev_attr.attr,
+
+	&sensor_dev_attr_fan1_alarm.dev_attr.attr,
+	&sensor_dev_attr_fan2_alarm.dev_attr.attr,
+	&sensor_dev_attr_fan3_alarm.dev_attr.attr,
+	&sensor_dev_attr_fan4_alarm.dev_attr.attr,
+	&sensor_dev_attr_fan5_alarm.dev_attr.attr,
+	&sensor_dev_attr_fan6_alarm.dev_attr.attr,
+	&sensor_dev_attr_fan7_alarm.dev_attr.attr,
+	&sensor_dev_attr_fan8_alarm.dev_attr.attr,
+	NULL
+};
+
+/* Voltages */
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, show_in_value, NULL, 0);
+static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, show_in_value, NULL, 1);
+static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, show_in_value, NULL, 2);
+static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, show_in_value, NULL, 3);
+static struct attribute *fts_voltage_attrs[] = {
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	&sensor_dev_attr_in2_input.dev_attr.attr,
+	&sensor_dev_attr_in3_input.dev_attr.attr,
+	&sensor_dev_attr_in4_input.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group fts_voltage_attr_group = {
+	.attrs = fts_voltage_attrs
+};
+
+static const struct attribute_group fts_temp_attr_group = {
+	.attrs = fts_temp_attrs
+};
+
+static const struct attribute_group fts_fan_attr_group = {
+	.attrs = fts_fan_attrs
+};
+
+static const struct attribute_group *fts_attr_groups[] = {
+	&fts_voltage_attr_group,
+	&fts_temp_attr_group,
+	&fts_fan_attr_group,
+	NULL
+};
+
+/*****************************************************************************/
+/* Module initialization / remove functions				     */
+/*****************************************************************************/
+static int fts_remove(struct i2c_client *client)
+{
+	struct fts_data *data = dev_get_drvdata(&client->dev);
+
+	watchdog_unregister_device(&data->wdd);
+	return 0;
+}
+
+static int fts_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	u8 revision;
+	struct fts_data *data;
+	int err;
+	s8 deviceid;
+	struct device *hwmon_dev;
+
+	if (client->addr != 0x73)
+		return -ENODEV;
+
+	/* Baseboard Management Controller check */
+	deviceid = i2c_smbus_read_byte_data(client, FTS_DEVICE_ID_REG);
+	if (deviceid > 0 && (deviceid & 0xF0) == 0x10) {
+		switch (deviceid & 0x0F) {
+		case 0x01:
+			break;
+		default:
+			dev_dbg(&client->dev,
+				"No Baseboard Management Controller\n");
+			return -ENODEV;
+		}
+	} else {
+		dev_dbg(&client->dev, "No fujitsu board\n");
+		return -ENODEV;
+	}
+
+	data = devm_kzalloc(&client->dev, sizeof(struct fts_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	mutex_init(&data->update_lock);
+	mutex_init(&data->access_lock);
+	data->client = client;
+	dev_set_drvdata(&client->dev, data);
+
+	err = i2c_smbus_read_byte_data(client, FTS_DEVICE_REVISION_REG);
+	if (err < 0)
+		return err;
+	revision = err;
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev,
+							   "ftsteutates",
+							   data,
+							   fts_attr_groups);
+	if (IS_ERR(hwmon_dev))
+		return PTR_ERR(hwmon_dev);
+
+	err = fts_watchdog_init(data);
+	if (err)
+		return err;
+
+	dev_info(&client->dev, "Detected FTS Teutates chip, revision: %d.%d\n",
+		 (revision & 0xF0) >> 4, revision & 0x0F);
+	return 0;
+}
+
+/*****************************************************************************/
+/* Module Details							     */
+/*****************************************************************************/
+static struct i2c_driver fts_driver = {
+	.driver = {
+		.name = "ftsteutates",
+	},
+	.id_table = fts_id,
+	.probe = fts_probe,
+	.remove = fts_remove,
+};
+
+module_i2c_driver(fts_driver);
+
+MODULE_AUTHOR("Thilo Cestonaro <thilo.cestonaro@ts.fujitsu.com>");
+MODULE_DESCRIPTION("FTS Teutates driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
new file mode 100644
index 000000000000..e6b49500c52a
--- /dev/null
+++ b/drivers/hwmon/ina3221.c
@@ -0,0 +1,445 @@
+/*
+ * INA3221 Triple Current/Voltage Monitor
+ *
+ * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com/
+ *	Andrew F. Davis <afd@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+
+#define INA3221_DRIVER_NAME		"ina3221"
+
+#define INA3221_CONFIG			0x00
+#define INA3221_SHUNT1			0x01
+#define INA3221_BUS1			0x02
+#define INA3221_SHUNT2			0x03
+#define INA3221_BUS2			0x04
+#define INA3221_SHUNT3			0x05
+#define INA3221_BUS3			0x06
+#define INA3221_CRIT1			0x07
+#define INA3221_WARN1			0x08
+#define INA3221_CRIT2			0x09
+#define INA3221_WARN2			0x0a
+#define INA3221_CRIT3			0x0b
+#define INA3221_WARN3			0x0c
+#define INA3221_MASK_ENABLE		0x0f
+
+#define INA3221_CONFIG_MODE_SHUNT	BIT(1)
+#define INA3221_CONFIG_MODE_BUS		BIT(2)
+#define INA3221_CONFIG_MODE_CONTINUOUS	BIT(3)
+
+#define INA3221_RSHUNT_DEFAULT		10000
+
+enum ina3221_fields {
+	/* Configuration */
+	F_RST,
+
+	/* Alert Flags */
+	F_WF3, F_WF2, F_WF1,
+	F_CF3, F_CF2, F_CF1,
+
+	/* sentinel */
+	F_MAX_FIELDS
+};
+
+static const struct reg_field ina3221_reg_fields[] = {
+	[F_RST] = REG_FIELD(INA3221_CONFIG, 15, 15),
+
+	[F_WF3] = REG_FIELD(INA3221_MASK_ENABLE, 3, 3),
+	[F_WF2] = REG_FIELD(INA3221_MASK_ENABLE, 4, 4),
+	[F_WF1] = REG_FIELD(INA3221_MASK_ENABLE, 5, 5),
+	[F_CF3] = REG_FIELD(INA3221_MASK_ENABLE, 7, 7),
+	[F_CF2] = REG_FIELD(INA3221_MASK_ENABLE, 8, 8),
+	[F_CF1] = REG_FIELD(INA3221_MASK_ENABLE, 9, 9),
+};
+
+enum ina3221_channels {
+	INA3221_CHANNEL1,
+	INA3221_CHANNEL2,
+	INA3221_CHANNEL3,
+	INA3221_NUM_CHANNELS
+};
+
+static const unsigned int register_channel[] = {
+	[INA3221_SHUNT1] = INA3221_CHANNEL1,
+	[INA3221_SHUNT2] = INA3221_CHANNEL2,
+	[INA3221_SHUNT3] = INA3221_CHANNEL3,
+	[INA3221_CRIT1] = INA3221_CHANNEL1,
+	[INA3221_CRIT2] = INA3221_CHANNEL2,
+	[INA3221_CRIT3] = INA3221_CHANNEL3,
+	[INA3221_WARN1] = INA3221_CHANNEL1,
+	[INA3221_WARN2] = INA3221_CHANNEL2,
+	[INA3221_WARN3] = INA3221_CHANNEL3,
+};
+
+/**
+ * struct ina3221_data - device specific information
+ * @regmap: Register map of the device
+ * @fields: Register fields of the device
+ * @shunt_resistors: Array of resistor values per channel
+ */
+struct ina3221_data {
+	struct regmap *regmap;
+	struct regmap_field *fields[F_MAX_FIELDS];
+	int shunt_resistors[INA3221_NUM_CHANNELS];
+};
+
+static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg,
+			      int *val)
+{
+	unsigned int regval;
+	int ret;
+
+	ret = regmap_read(ina->regmap, reg, &regval);
+	if (ret)
+		return ret;
+
+	*val = sign_extend32(regval >> 3, 12);
+
+	return 0;
+}
+
+static ssize_t ina3221_show_bus_voltage(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct sensor_device_attribute *sd_attr = to_sensor_dev_attr(attr);
+	struct ina3221_data *ina = dev_get_drvdata(dev);
+	unsigned int reg = sd_attr->index;
+	int val, voltage_mv, ret;
+
+	ret = ina3221_read_value(ina, reg, &val);
+	if (ret)
+		return ret;
+
+	voltage_mv = val * 8;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", voltage_mv);
+}
+
+static ssize_t ina3221_show_shunt_voltage(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct sensor_device_attribute *sd_attr = to_sensor_dev_attr(attr);
+	struct ina3221_data *ina = dev_get_drvdata(dev);
+	unsigned int reg = sd_attr->index;
+	int val, voltage_uv, ret;
+
+	ret = ina3221_read_value(ina, reg, &val);
+	if (ret)
+		return ret;
+	voltage_uv = val * 40;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", voltage_uv);
+}
+
+static ssize_t ina3221_show_current(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct sensor_device_attribute *sd_attr = to_sensor_dev_attr(attr);
+	struct ina3221_data *ina = dev_get_drvdata(dev);
+	unsigned int reg = sd_attr->index;
+	unsigned int channel = register_channel[reg];
+	int resistance_uo = ina->shunt_resistors[channel];
+	int val, current_ma, voltage_nv, ret;
+
+	ret = ina3221_read_value(ina, reg, &val);
+	if (ret)
+		return ret;
+	voltage_nv = val * 40000;
+
+	current_ma = DIV_ROUND_CLOSEST(voltage_nv, resistance_uo);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", current_ma);
+}
+
+static ssize_t ina3221_set_current(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct sensor_device_attribute *sd_attr = to_sensor_dev_attr(attr);
+	struct ina3221_data *ina = dev_get_drvdata(dev);
+	unsigned int reg = sd_attr->index;
+	unsigned int channel = register_channel[reg];
+	int resistance_uo = ina->shunt_resistors[channel];
+	int val, current_ma, voltage_uv, ret;
+
+	ret = kstrtoint(buf, 0, &current_ma);
+	if (ret)
+		return ret;
+
+	/* clamp current */
+	current_ma = clamp_val(current_ma,
+			       INT_MIN / resistance_uo,
+			       INT_MAX / resistance_uo);
+
+	voltage_uv = DIV_ROUND_CLOSEST(current_ma * resistance_uo, 1000);
+
+	/* clamp voltage */
+	voltage_uv = clamp_val(voltage_uv, -163800, 163800);
+
+	/* 1 / 40uV(scale) << 3(register shift) = 5 */
+	val = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8;
+
+	ret = regmap_write(ina->regmap, reg, val);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t ina3221_show_shunt(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct sensor_device_attribute *sd_attr = to_sensor_dev_attr(attr);
+	struct ina3221_data *ina = dev_get_drvdata(dev);
+	unsigned int channel = sd_attr->index;
+	unsigned int resistance_uo;
+
+	resistance_uo = ina->shunt_resistors[channel];
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", resistance_uo);
+}
+
+static ssize_t ina3221_set_shunt(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct sensor_device_attribute *sd_attr = to_sensor_dev_attr(attr);
+	struct ina3221_data *ina = dev_get_drvdata(dev);
+	unsigned int channel = sd_attr->index;
+	int val;
+	int ret;
+
+	ret = kstrtoint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	val = clamp_val(val, 1, INT_MAX);
+
+	ina->shunt_resistors[channel] = val;
+
+	return count;
+}
+
+static ssize_t ina3221_show_alert(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct sensor_device_attribute *sd_attr = to_sensor_dev_attr(attr);
+	struct ina3221_data *ina = dev_get_drvdata(dev);
+	unsigned int field = sd_attr->index;
+	unsigned int regval;
+	int ret;
+
+	ret = regmap_field_read(ina->fields[field], &regval);
+	if (ret)
+		return ret;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", regval);
+}
+
+/* bus voltage */
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO,
+		ina3221_show_bus_voltage, NULL, INA3221_BUS1);
+static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO,
+		ina3221_show_bus_voltage, NULL, INA3221_BUS2);
+static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO,
+		ina3221_show_bus_voltage, NULL, INA3221_BUS3);
+
+/* calculated current */
+static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO,
+		ina3221_show_current, NULL, INA3221_SHUNT1);
+static SENSOR_DEVICE_ATTR(curr2_input, S_IRUGO,
+		ina3221_show_current, NULL, INA3221_SHUNT2);
+static SENSOR_DEVICE_ATTR(curr3_input, S_IRUGO,
+		ina3221_show_current, NULL, INA3221_SHUNT3);
+
+/* shunt resistance */
+static SENSOR_DEVICE_ATTR(shunt1_resistor, S_IRUGO | S_IWUSR,
+		ina3221_show_shunt, ina3221_set_shunt, INA3221_CHANNEL1);
+static SENSOR_DEVICE_ATTR(shunt2_resistor, S_IRUGO | S_IWUSR,
+		ina3221_show_shunt, ina3221_set_shunt, INA3221_CHANNEL2);
+static SENSOR_DEVICE_ATTR(shunt3_resistor, S_IRUGO | S_IWUSR,
+		ina3221_show_shunt, ina3221_set_shunt, INA3221_CHANNEL3);
+
+/* critical current */
+static SENSOR_DEVICE_ATTR(curr1_crit, S_IRUGO | S_IWUSR,
+		ina3221_show_current, ina3221_set_current, INA3221_CRIT1);
+static SENSOR_DEVICE_ATTR(curr2_crit, S_IRUGO | S_IWUSR,
+		ina3221_show_current, ina3221_set_current, INA3221_CRIT2);
+static SENSOR_DEVICE_ATTR(curr3_crit, S_IRUGO | S_IWUSR,
+		ina3221_show_current, ina3221_set_current, INA3221_CRIT3);
+
+/* critical current alert */
+static SENSOR_DEVICE_ATTR(curr1_crit_alarm, S_IRUGO,
+		ina3221_show_alert, NULL, F_CF1);
+static SENSOR_DEVICE_ATTR(curr2_crit_alarm, S_IRUGO,
+		ina3221_show_alert, NULL, F_CF2);
+static SENSOR_DEVICE_ATTR(curr3_crit_alarm, S_IRUGO,
+		ina3221_show_alert, NULL, F_CF3);
+
+/* warning current */
+static SENSOR_DEVICE_ATTR(curr1_max, S_IRUGO | S_IWUSR,
+		ina3221_show_current, ina3221_set_current, INA3221_WARN1);
+static SENSOR_DEVICE_ATTR(curr2_max, S_IRUGO | S_IWUSR,
+		ina3221_show_current, ina3221_set_current, INA3221_WARN2);
+static SENSOR_DEVICE_ATTR(curr3_max, S_IRUGO | S_IWUSR,
+		ina3221_show_current, ina3221_set_current, INA3221_WARN3);
+
+/* warning current alert */
+static SENSOR_DEVICE_ATTR(curr1_max_alarm, S_IRUGO,
+		ina3221_show_alert, NULL, F_WF1);
+static SENSOR_DEVICE_ATTR(curr2_max_alarm, S_IRUGO,
+		ina3221_show_alert, NULL, F_WF2);
+static SENSOR_DEVICE_ATTR(curr3_max_alarm, S_IRUGO,
+		ina3221_show_alert, NULL, F_WF3);
+
+/* shunt voltage */
+static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO,
+		ina3221_show_shunt_voltage, NULL, INA3221_SHUNT1);
+static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO,
+		ina3221_show_shunt_voltage, NULL, INA3221_SHUNT2);
+static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO,
+		ina3221_show_shunt_voltage, NULL, INA3221_SHUNT3);
+
+static struct attribute *ina3221_attrs[] = {
+	/* channel 1 */
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	&sensor_dev_attr_curr1_input.dev_attr.attr,
+	&sensor_dev_attr_shunt1_resistor.dev_attr.attr,
+	&sensor_dev_attr_curr1_crit.dev_attr.attr,
+	&sensor_dev_attr_curr1_crit_alarm.dev_attr.attr,
+	&sensor_dev_attr_curr1_max.dev_attr.attr,
+	&sensor_dev_attr_curr1_max_alarm.dev_attr.attr,
+	&sensor_dev_attr_in4_input.dev_attr.attr,
+
+	/* channel 2 */
+	&sensor_dev_attr_in2_input.dev_attr.attr,
+	&sensor_dev_attr_curr2_input.dev_attr.attr,
+	&sensor_dev_attr_shunt2_resistor.dev_attr.attr,
+	&sensor_dev_attr_curr2_crit.dev_attr.attr,
+	&sensor_dev_attr_curr2_crit_alarm.dev_attr.attr,
+	&sensor_dev_attr_curr2_max.dev_attr.attr,
+	&sensor_dev_attr_curr2_max_alarm.dev_attr.attr,
+	&sensor_dev_attr_in5_input.dev_attr.attr,
+
+	/* channel 3 */
+	&sensor_dev_attr_in3_input.dev_attr.attr,
+	&sensor_dev_attr_curr3_input.dev_attr.attr,
+	&sensor_dev_attr_shunt3_resistor.dev_attr.attr,
+	&sensor_dev_attr_curr3_crit.dev_attr.attr,
+	&sensor_dev_attr_curr3_crit_alarm.dev_attr.attr,
+	&sensor_dev_attr_curr3_max.dev_attr.attr,
+	&sensor_dev_attr_curr3_max_alarm.dev_attr.attr,
+	&sensor_dev_attr_in6_input.dev_attr.attr,
+
+	NULL,
+};
+ATTRIBUTE_GROUPS(ina3221);
+
+static const struct regmap_range ina3221_yes_ranges[] = {
+	regmap_reg_range(INA3221_SHUNT1, INA3221_BUS3),
+	regmap_reg_range(INA3221_MASK_ENABLE, INA3221_MASK_ENABLE),
+};
+
+static const struct regmap_access_table ina3221_volatile_table = {
+	.yes_ranges = ina3221_yes_ranges,
+	.n_yes_ranges = ARRAY_SIZE(ina3221_yes_ranges),
+};
+
+static const struct regmap_config ina3221_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_table = &ina3221_volatile_table,
+};
+
+static int ina3221_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct device *dev = &client->dev;
+	struct ina3221_data *ina;
+	struct device *hwmon_dev;
+	int i, ret;
+
+	ina = devm_kzalloc(dev, sizeof(*ina), GFP_KERNEL);
+	if (!ina)
+		return -ENOMEM;
+
+	ina->regmap = devm_regmap_init_i2c(client, &ina3221_regmap_config);
+	if (IS_ERR(ina->regmap)) {
+		dev_err(dev, "Unable to allocate register map\n");
+		return PTR_ERR(ina->regmap);
+	}
+
+	for (i = 0; i < F_MAX_FIELDS; i++) {
+		ina->fields[i] = devm_regmap_field_alloc(dev,
+							 ina->regmap,
+							 ina3221_reg_fields[i]);
+		if (IS_ERR(ina->fields[i])) {
+			dev_err(dev, "Unable to allocate regmap fields\n");
+			return PTR_ERR(ina->fields[i]);
+		}
+	}
+
+	for (i = 0; i < INA3221_NUM_CHANNELS; i++)
+		ina->shunt_resistors[i] = INA3221_RSHUNT_DEFAULT;
+
+	ret = regmap_field_write(ina->fields[F_RST], true);
+	if (ret) {
+		dev_err(dev, "Unable to reset device\n");
+		return ret;
+	}
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
+							   client->name,
+							   ina, ina3221_groups);
+	if (IS_ERR(hwmon_dev)) {
+		dev_err(dev, "Unable to register hwmon device\n");
+		return PTR_ERR(hwmon_dev);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id ina3221_of_match_table[] = {
+	{ .compatible = "ti,ina3221", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ina3221_of_match_table);
+
+static const struct i2c_device_id ina3221_ids[] = {
+	{ "ina3221", 0 },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, ina3221_ids);
+
+static struct i2c_driver ina3221_i2c_driver = {
+	.probe = ina3221_probe,
+	.driver = {
+		.name = INA3221_DRIVER_NAME,
+		.of_match_table = ina3221_of_match_table,
+	},
+	.id_table = ina3221_ids,
+};
+module_i2c_driver(ina3221_i2c_driver);
+
+MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
+MODULE_DESCRIPTION("Texas Instruments INA3221 HWMon Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 9887d3224a86..9d5f85f3384f 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -31,6 +31,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
 
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = {
@@ -104,6 +105,9 @@ static const unsigned short normal_i2c[] = {
 #define MCP9804_DEVID		0x0200
 #define MCP9804_DEVID_MASK	0xfffc
 
+#define MCP9808_DEVID		0x0400
+#define MCP9808_DEVID_MASK	0xfffc
+
 #define MCP98242_DEVID		0x2000
 #define MCP98242_DEVID_MASK	0xfffc
 
@@ -160,6 +164,7 @@ static struct jc42_chips jc42_chips[] = {
 	{ IDT_MANID, TS3001_DEVID, TS3001_DEVID_MASK },
 	{ MAX_MANID, MAX6604_DEVID, MAX6604_DEVID_MASK },
 	{ MCP_MANID, MCP9804_DEVID, MCP9804_DEVID_MASK },
+	{ MCP_MANID, MCP9808_DEVID, MCP9808_DEVID_MASK },
 	{ MCP_MANID, MCP98242_DEVID, MCP98242_DEVID_MASK },
 	{ MCP_MANID, MCP98243_DEVID, MCP98243_DEVID_MASK },
 	{ MCP_MANID, MCP98244_DEVID, MCP98244_DEVID_MASK },
@@ -537,11 +542,20 @@ static const struct i2c_device_id jc42_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, jc42_id);
 
+#ifdef CONFIG_OF
+static const struct of_device_id jc42_of_ids[] = {
+	{ .compatible = "jedec,jc-42.4-temp", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, jc42_of_ids);
+#endif
+
 static struct i2c_driver jc42_driver = {
-	.class		= I2C_CLASS_SPD,
+	.class		= I2C_CLASS_SPD | I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "jc42",
 		.pm = JC42_DEV_PM_OPS,
+		.of_match_table = of_match_ptr(jc42_of_ids),
 	},
 	.probe		= jc42_probe,
 	.remove		= jc42_remove,
diff --git a/drivers/hwmon/jz4740-hwmon.c b/drivers/hwmon/jz4740-hwmon.c
index df9b3447f2a8..0621ee1b3c98 100644
--- a/drivers/hwmon/jz4740-hwmon.c
+++ b/drivers/hwmon/jz4740-hwmon.c
@@ -29,23 +29,13 @@
 
 struct jz4740_hwmon {
 	void __iomem *base;
-
 	int irq;
-
 	const struct mfd_cell *cell;
-	struct device *hwmon;
-
+	struct platform_device *pdev;
 	struct completion read_completion;
-
 	struct mutex lock;
 };
 
-static ssize_t jz4740_hwmon_show_name(struct device *dev,
-	struct device_attribute *dev_attr, char *buf)
-{
-	return sprintf(buf, "jz4740\n");
-}
-
 static irqreturn_t jz4740_hwmon_irq(int irq, void *data)
 {
 	struct jz4740_hwmon *hwmon = data;
@@ -58,6 +48,7 @@ static ssize_t jz4740_hwmon_read_adcin(struct device *dev,
 	struct device_attribute *dev_attr, char *buf)
 {
 	struct jz4740_hwmon *hwmon = dev_get_drvdata(dev);
+	struct platform_device *pdev = hwmon->pdev;
 	struct completion *completion = &hwmon->read_completion;
 	long t;
 	unsigned long val;
@@ -68,7 +59,7 @@ static ssize_t jz4740_hwmon_read_adcin(struct device *dev,
 	reinit_completion(completion);
 
 	enable_irq(hwmon->irq);
-	hwmon->cell->enable(to_platform_device(dev));
+	hwmon->cell->enable(pdev);
 
 	t = wait_for_completion_interruptible_timeout(completion, HZ);
 
@@ -80,7 +71,7 @@ static ssize_t jz4740_hwmon_read_adcin(struct device *dev,
 		ret = t ? t : -ETIMEDOUT;
 	}
 
-	hwmon->cell->disable(to_platform_device(dev));
+	hwmon->cell->disable(pdev);
 	disable_irq(hwmon->irq);
 
 	mutex_unlock(&hwmon->lock);
@@ -88,26 +79,24 @@ static ssize_t jz4740_hwmon_read_adcin(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(name, S_IRUGO, jz4740_hwmon_show_name, NULL);
 static DEVICE_ATTR(in0_input, S_IRUGO, jz4740_hwmon_read_adcin, NULL);
 
-static struct attribute *jz4740_hwmon_attributes[] = {
-	&dev_attr_name.attr,
+static struct attribute *jz4740_attrs[] = {
 	&dev_attr_in0_input.attr,
 	NULL
 };
 
-static const struct attribute_group jz4740_hwmon_attr_group = {
-	.attrs = jz4740_hwmon_attributes,
-};
+ATTRIBUTE_GROUPS(jz4740);
 
 static int jz4740_hwmon_probe(struct platform_device *pdev)
 {
 	int ret;
+	struct device *dev = &pdev->dev;
 	struct jz4740_hwmon *hwmon;
+	struct device *hwmon_dev;
 	struct resource *mem;
 
-	hwmon = devm_kzalloc(&pdev->dev, sizeof(*hwmon), GFP_KERNEL);
+	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
 	if (!hwmon)
 		return -ENOMEM;
 
@@ -125,12 +114,11 @@ static int jz4740_hwmon_probe(struct platform_device *pdev)
 	if (IS_ERR(hwmon->base))
 		return PTR_ERR(hwmon->base);
 
+	hwmon->pdev = pdev;
 	init_completion(&hwmon->read_completion);
 	mutex_init(&hwmon->lock);
 
-	platform_set_drvdata(pdev, hwmon);
-
-	ret = devm_request_irq(&pdev->dev, hwmon->irq, jz4740_hwmon_irq, 0,
+	ret = devm_request_irq(dev, hwmon->irq, jz4740_hwmon_irq, 0,
 			       pdev->name, hwmon);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to request irq: %d\n", ret);
@@ -138,38 +126,13 @@ static int jz4740_hwmon_probe(struct platform_device *pdev)
 	}
 	disable_irq(hwmon->irq);
 
-	ret = sysfs_create_group(&pdev->dev.kobj, &jz4740_hwmon_attr_group);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to create sysfs group: %d\n", ret);
-		return ret;
-	}
-
-	hwmon->hwmon = hwmon_device_register(&pdev->dev);
-	if (IS_ERR(hwmon->hwmon)) {
-		ret = PTR_ERR(hwmon->hwmon);
-		goto err_remove_file;
-	}
-
-	return 0;
-
-err_remove_file:
-	sysfs_remove_group(&pdev->dev.kobj, &jz4740_hwmon_attr_group);
-	return ret;
-}
-
-static int jz4740_hwmon_remove(struct platform_device *pdev)
-{
-	struct jz4740_hwmon *hwmon = platform_get_drvdata(pdev);
-
-	hwmon_device_unregister(hwmon->hwmon);
-	sysfs_remove_group(&pdev->dev.kobj, &jz4740_hwmon_attr_group);
-
-	return 0;
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, "jz4740", hwmon,
+							   jz4740_groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
 static struct platform_driver jz4740_hwmon_driver = {
 	.probe	= jz4740_hwmon_probe,
-	.remove = jz4740_hwmon_remove,
 	.driver = {
 		.name = "jz4740-hwmon",
 	},
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 69166ab3151d..547a9c87c68c 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -26,8 +26,8 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
-#include <linux/mutex.h>
 #include <linux/of.h>
+#include <linux/regmap.h>
 #include <linux/thermal.h>
 #include "lm75.h"
 
@@ -66,35 +66,21 @@ static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c,
 
 
 /* The LM75 registers */
+#define LM75_REG_TEMP		0x00
 #define LM75_REG_CONF		0x01
-static const u8 LM75_REG_TEMP[3] = {
-	0x00,		/* input */
-	0x03,		/* max */
-	0x02,		/* hyst */
-};
+#define LM75_REG_HYST		0x02
+#define LM75_REG_MAX		0x03
 
 /* Each client has this additional data */
 struct lm75_data {
 	struct i2c_client	*client;
-	struct device		*hwmon_dev;
-	struct mutex		update_lock;
+	struct regmap		*regmap;
 	u8			orig_conf;
 	u8			resolution;	/* In bits, between 9 and 12 */
 	u8			resolution_limits;
-	char			valid;		/* !=0 if registers are valid */
-	unsigned long		last_updated;	/* In jiffies */
-	unsigned long		sample_time;	/* In jiffies */
-	s16			temp[3];	/* Register values,
-						   0 = input
-						   1 = max
-						   2 = hyst */
+	unsigned int		sample_time;	/* In ms */
 };
 
-static int lm75_read_value(struct i2c_client *client, u8 reg);
-static int lm75_write_value(struct i2c_client *client, u8 reg, u16 value);
-static struct lm75_data *lm75_update_device(struct device *dev);
-
-
 /*-----------------------------------------------------------------------*/
 
 static inline long lm75_reg_to_mc(s16 temp, u8 resolution)
@@ -106,12 +92,15 @@ static inline long lm75_reg_to_mc(s16 temp, u8 resolution)
 
 static int lm75_read_temp(void *dev, int *temp)
 {
-	struct lm75_data *data = lm75_update_device(dev);
+	struct lm75_data *data = dev_get_drvdata(dev);
+	unsigned int _temp;
+	int err;
 
-	if (IS_ERR(data))
-		return PTR_ERR(data);
+	err = regmap_read(data->regmap, LM75_REG_TEMP, &_temp);
+	if (err < 0)
+		return err;
 
-	*temp = lm75_reg_to_mc(data->temp[0], data->resolution);
+	*temp = lm75_reg_to_mc(_temp, data->resolution);
 
 	return 0;
 }
@@ -120,13 +109,15 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *da,
 			 char *buf)
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	struct lm75_data *data = lm75_update_device(dev);
+	struct lm75_data *data = dev_get_drvdata(dev);
+	unsigned int temp = 0;
+	int err;
 
-	if (IS_ERR(data))
-		return PTR_ERR(data);
+	err = regmap_read(data->regmap, attr->index, &temp);
+	if (err < 0)
+		return err;
 
-	return sprintf(buf, "%ld\n", lm75_reg_to_mc(data->temp[attr->index],
-						    data->resolution));
+	return sprintf(buf, "%ld\n", lm75_reg_to_mc(temp, data->resolution));
 }
 
 static ssize_t set_temp(struct device *dev, struct device_attribute *da,
@@ -134,8 +125,6 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *da,
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
 	struct lm75_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	int nr = attr->index;
 	long temp;
 	int error;
 	u8 resolution;
@@ -153,25 +142,36 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *da,
 	else
 		resolution = data->resolution;
 
-	mutex_lock(&data->update_lock);
 	temp = clamp_val(temp, LM75_TEMP_MIN, LM75_TEMP_MAX);
-	data->temp[nr] = DIV_ROUND_CLOSEST(temp  << (resolution - 8),
-					   1000) << (16 - resolution);
-	lm75_write_value(client, LM75_REG_TEMP[nr], data->temp[nr]);
-	mutex_unlock(&data->update_lock);
+	temp = DIV_ROUND_CLOSEST(temp  << (resolution - 8),
+				 1000) << (16 - resolution);
+	error = regmap_write(data->regmap, attr->index, temp);
+	if (error < 0)
+		return error;
+
 	return count;
 }
 
+static ssize_t show_update_interval(struct device *dev,
+				    struct device_attribute *da, char *buf)
+{
+	struct lm75_data *data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", data->sample_time);
+}
+
 static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO,
-			show_temp, set_temp, 1);
+			show_temp, set_temp, LM75_REG_MAX);
 static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IWUSR | S_IRUGO,
-			show_temp, set_temp, 2);
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, 0);
+			show_temp, set_temp, LM75_REG_HYST);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, LM75_REG_TEMP);
+static DEVICE_ATTR(update_interval, S_IRUGO, show_update_interval, NULL);
 
 static struct attribute *lm75_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_max.dev_attr.attr,
 	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
+	&dev_attr_update_interval.attr,
 
 	NULL
 };
@@ -185,10 +185,40 @@ static const struct thermal_zone_of_device_ops lm75_of_thermal_ops = {
 
 /* device probe and removal */
 
+static bool lm75_is_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return reg != LM75_REG_TEMP;
+}
+
+static bool lm75_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return reg == LM75_REG_TEMP;
+}
+
+static const struct regmap_config lm75_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+	.max_register = LM75_REG_MAX,
+	.writeable_reg = lm75_is_writeable_reg,
+	.volatile_reg = lm75_is_volatile_reg,
+	.val_format_endian = REGMAP_ENDIAN_BIG,
+	.cache_type = REGCACHE_RBTREE,
+	.use_single_rw = true,
+};
+
+static void lm75_remove(void *data)
+{
+	struct lm75_data *lm75 = data;
+	struct i2c_client *client = lm75->client;
+
+	i2c_smbus_write_byte_data(client, LM75_REG_CONF, lm75->orig_conf);
+}
+
 static int
 lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
+	struct device *hwmon_dev;
 	struct lm75_data *data;
 	int status;
 	u8 set_mask, clr_mask;
@@ -204,8 +234,10 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		return -ENOMEM;
 
 	data->client = client;
-	i2c_set_clientdata(client, data);
-	mutex_init(&data->update_lock);
+
+	data->regmap = devm_regmap_init_i2c(client, &lm75_regmap_config);
+	if (IS_ERR(data->regmap))
+		return PTR_ERR(data->regmap);
 
 	/* Set to LM75 resolution (9 bits, 1/2 degree C) and range.
 	 * Then tweak to be more precise when appropriate.
@@ -217,7 +249,7 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	case adt75:
 		clr_mask |= 1 << 5;		/* not one-shot mode */
 		data->resolution = 12;
-		data->sample_time = HZ / 8;
+		data->sample_time = MSEC_PER_SEC / 8;
 		break;
 	case ds1775:
 	case ds75:
@@ -225,35 +257,35 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		clr_mask |= 3 << 5;
 		set_mask |= 2 << 5;		/* 11-bit mode */
 		data->resolution = 11;
-		data->sample_time = HZ;
+		data->sample_time = MSEC_PER_SEC;
 		break;
 	case ds7505:
 		set_mask |= 3 << 5;		/* 12-bit mode */
 		data->resolution = 12;
-		data->sample_time = HZ / 4;
+		data->sample_time = MSEC_PER_SEC / 4;
 		break;
 	case g751:
 	case lm75:
 	case lm75a:
 		data->resolution = 9;
-		data->sample_time = HZ / 2;
+		data->sample_time = MSEC_PER_SEC / 2;
 		break;
 	case lm75b:
 		data->resolution = 11;
-		data->sample_time = HZ / 4;
+		data->sample_time = MSEC_PER_SEC / 4;
 		break;
 	case max6625:
 		data->resolution = 9;
-		data->sample_time = HZ / 4;
+		data->sample_time = MSEC_PER_SEC / 4;
 		break;
 	case max6626:
 		data->resolution = 12;
 		data->resolution_limits = 9;
-		data->sample_time = HZ / 4;
+		data->sample_time = MSEC_PER_SEC / 4;
 		break;
 	case tcn75:
 		data->resolution = 9;
-		data->sample_time = HZ / 8;
+		data->sample_time = MSEC_PER_SEC / 8;
 		break;
 	case mcp980x:
 		data->resolution_limits = 9;
@@ -262,14 +294,14 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	case tmp101:
 		set_mask |= 3 << 5;		/* 12-bit mode */
 		data->resolution = 12;
-		data->sample_time = HZ;
+		data->sample_time = MSEC_PER_SEC;
 		clr_mask |= 1 << 7;		/* not one-shot mode */
 		break;
 	case tmp112:
 		set_mask |= 3 << 5;		/* 12-bit mode */
 		clr_mask |= 1 << 7;		/* not one-shot mode */
 		data->resolution = 12;
-		data->sample_time = HZ / 4;
+		data->sample_time = MSEC_PER_SEC / 4;
 		break;
 	case tmp105:
 	case tmp175:
@@ -278,17 +310,17 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		set_mask |= 3 << 5;		/* 12-bit mode */
 		clr_mask |= 1 << 7;		/* not one-shot mode */
 		data->resolution = 12;
-		data->sample_time = HZ / 2;
+		data->sample_time = MSEC_PER_SEC / 2;
 		break;
 	case tmp75c:
 		clr_mask |= 1 << 5;		/* not one-shot mode */
 		data->resolution = 12;
-		data->sample_time = HZ / 4;
+		data->sample_time = MSEC_PER_SEC / 4;
 		break;
 	}
 
 	/* configure as specified */
-	status = lm75_read_value(client, LM75_REG_CONF);
+	status = i2c_smbus_read_byte_data(client, LM75_REG_CONF);
 	if (status < 0) {
 		dev_dbg(dev, "Can't read config? %d\n", status);
 		return status;
@@ -297,30 +329,23 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	new = status & ~clr_mask;
 	new |= set_mask;
 	if (status != new)
-		lm75_write_value(client, LM75_REG_CONF, new);
-	dev_dbg(dev, "Config %02x\n", new);
+		i2c_smbus_write_byte_data(client, LM75_REG_CONF, new);
 
-	data->hwmon_dev = hwmon_device_register_with_groups(dev, client->name,
-							    data, lm75_groups);
-	if (IS_ERR(data->hwmon_dev))
-		return PTR_ERR(data->hwmon_dev);
+	devm_add_action(dev, lm75_remove, data);
 
-	devm_thermal_zone_of_sensor_register(data->hwmon_dev, 0,
-					     data->hwmon_dev,
-					     &lm75_of_thermal_ops);
+	dev_dbg(dev, "Config %02x\n", new);
 
-	dev_info(dev, "%s: sensor '%s'\n",
-		 dev_name(data->hwmon_dev), client->name);
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
+							   data, lm75_groups);
+	if (IS_ERR(hwmon_dev))
+		return PTR_ERR(hwmon_dev);
 
-	return 0;
-}
+	devm_thermal_zone_of_sensor_register(hwmon_dev, 0,
+					     hwmon_dev,
+					     &lm75_of_thermal_ops);
 
-static int lm75_remove(struct i2c_client *client)
-{
-	struct lm75_data *data = i2c_get_clientdata(client);
+	dev_info(dev, "%s: sensor '%s'\n", dev_name(hwmon_dev), client->name);
 
-	hwmon_device_unregister(data->hwmon_dev);
-	lm75_write_value(client, LM75_REG_CONF, data->orig_conf);
 	return 0;
 }
 
@@ -449,13 +474,13 @@ static int lm75_suspend(struct device *dev)
 {
 	int status;
 	struct i2c_client *client = to_i2c_client(dev);
-	status = lm75_read_value(client, LM75_REG_CONF);
+	status = i2c_smbus_read_byte_data(client, LM75_REG_CONF);
 	if (status < 0) {
 		dev_dbg(&client->dev, "Can't read config? %d\n", status);
 		return status;
 	}
 	status = status | LM75_SHUTDOWN;
-	lm75_write_value(client, LM75_REG_CONF, status);
+	i2c_smbus_write_byte_data(client, LM75_REG_CONF, status);
 	return 0;
 }
 
@@ -463,13 +488,13 @@ static int lm75_resume(struct device *dev)
 {
 	int status;
 	struct i2c_client *client = to_i2c_client(dev);
-	status = lm75_read_value(client, LM75_REG_CONF);
+	status = i2c_smbus_read_byte_data(client, LM75_REG_CONF);
 	if (status < 0) {
 		dev_dbg(&client->dev, "Can't read config? %d\n", status);
 		return status;
 	}
 	status = status & ~LM75_SHUTDOWN;
-	lm75_write_value(client, LM75_REG_CONF, status);
+	i2c_smbus_write_byte_data(client, LM75_REG_CONF, status);
 	return 0;
 }
 
@@ -489,73 +514,11 @@ static struct i2c_driver lm75_driver = {
 		.pm	= LM75_DEV_PM_OPS,
 	},
 	.probe		= lm75_probe,
-	.remove		= lm75_remove,
 	.id_table	= lm75_ids,
 	.detect		= lm75_detect,
 	.address_list	= normal_i2c,
 };
 
-/*-----------------------------------------------------------------------*/
-
-/* register access */
-
-/*
- * All registers are word-sized, except for the configuration register.
- * LM75 uses a high-byte first convention, which is exactly opposite to
- * the SMBus standard.
- */
-static int lm75_read_value(struct i2c_client *client, u8 reg)
-{
-	if (reg == LM75_REG_CONF)
-		return i2c_smbus_read_byte_data(client, reg);
-	else
-		return i2c_smbus_read_word_swapped(client, reg);
-}
-
-static int lm75_write_value(struct i2c_client *client, u8 reg, u16 value)
-{
-	if (reg == LM75_REG_CONF)
-		return i2c_smbus_write_byte_data(client, reg, value);
-	else
-		return i2c_smbus_write_word_swapped(client, reg, value);
-}
-
-static struct lm75_data *lm75_update_device(struct device *dev)
-{
-	struct lm75_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	struct lm75_data *ret = data;
-
-	mutex_lock(&data->update_lock);
-
-	if (time_after(jiffies, data->last_updated + data->sample_time)
-	    || !data->valid) {
-		int i;
-		dev_dbg(&client->dev, "Starting lm75 update\n");
-
-		for (i = 0; i < ARRAY_SIZE(data->temp); i++) {
-			int status;
-
-			status = lm75_read_value(client, LM75_REG_TEMP[i]);
-			if (unlikely(status < 0)) {
-				dev_dbg(dev,
-					"LM75: Failed to read value: reg %d, error %d\n",
-					LM75_REG_TEMP[i], status);
-				ret = ERR_PTR(status);
-				data->valid = 0;
-				goto abort;
-			}
-			data->temp[i] = status;
-		}
-		data->last_updated = jiffies;
-		data->valid = 1;
-	}
-
-abort:
-	mutex_unlock(&data->update_lock);
-	return ret;
-}
-
 module_i2c_driver(lm75_driver);
 
 MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>");
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index e30a5939dc0d..f51e758ba529 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -171,7 +171,6 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680,
 
 #define SA56004_REG_R_LOCAL_TEMPL 0x22
 
-#define LM90_DEF_CONVRATE_RVAL	6	/* Def conversion rate register value */
 #define LM90_MAX_CONVRATE_MS	16000	/* Maximum conversion rate in ms */
 
 /* TMP451 registers */
@@ -366,11 +365,9 @@ enum lm90_temp11_reg_index {
 
 struct lm90_data {
 	struct i2c_client *client;
-	struct device *hwmon_dev;
 	const struct attribute_group *groups[6];
 	struct mutex update_lock;
-	struct regulator *regulator;
-	char valid; /* zero until following fields are valid */
+	bool valid;		/* true if register values are valid */
 	unsigned long last_updated; /* in jiffies */
 	int kind;
 	u32 flags;
@@ -412,7 +409,7 @@ static inline s32 adm1032_write_byte(struct i2c_client *client, u8 value)
  * because we don't want the address pointer to change between the write
  * byte and the read byte transactions.
  */
-static int lm90_read_reg(struct i2c_client *client, u8 reg, u8 *value)
+static int lm90_read_reg(struct i2c_client *client, u8 reg)
 {
 	int err;
 
@@ -423,20 +420,12 @@ static int lm90_read_reg(struct i2c_client *client, u8 reg, u8 *value)
 	} else
 		err = i2c_smbus_read_byte_data(client, reg);
 
-	if (err < 0) {
-		dev_warn(&client->dev, "Register %#02x read failed (%d)\n",
-			 reg, err);
-		return err;
-	}
-	*value = err;
-
-	return 0;
+	return err;
 }
 
-static int lm90_read16(struct i2c_client *client, u8 regh, u8 regl, u16 *value)
+static int lm90_read16(struct i2c_client *client, u8 regh, u8 regl)
 {
-	int err;
-	u8 oldh, newh, l;
+	int oldh, newh, l;
 
 	/*
 	 * There is a trick here. We have to read two registers to have the
@@ -451,18 +440,21 @@ static int lm90_read16(struct i2c_client *client, u8 regh, u8 regl, u16 *value)
 	 * we have to read the low byte again, and now we believe we have a
 	 * correct reading.
 	 */
-	if ((err = lm90_read_reg(client, regh, &oldh))
-	 || (err = lm90_read_reg(client, regl, &l))
-	 || (err = lm90_read_reg(client, regh, &newh)))
-		return err;
+	oldh = lm90_read_reg(client, regh);
+	if (oldh < 0)
+		return oldh;
+	l = lm90_read_reg(client, regl);
+	if (l < 0)
+		return l;
+	newh = lm90_read_reg(client, regh);
+	if (newh < 0)
+		return newh;
 	if (oldh != newh) {
-		err = lm90_read_reg(client, regl, &l);
-		if (err)
-			return err;
+		l = lm90_read_reg(client, regl);
+		if (l < 0)
+			return l;
 	}
-	*value = (newh << 8) | l;
-
-	return 0;
+	return (newh << 8) | l;
 }
 
 /*
@@ -473,20 +465,23 @@ static int lm90_read16(struct i2c_client *client, u8 regh, u8 regl, u16 *value)
  * various registers have different meanings as a result of selecting a
  * non-default remote channel.
  */
-static inline void lm90_select_remote_channel(struct i2c_client *client,
-					      struct lm90_data *data,
-					      int channel)
+static inline int lm90_select_remote_channel(struct i2c_client *client,
+					     struct lm90_data *data,
+					     int channel)
 {
-	u8 config;
+	int config;
 
 	if (data->kind == max6696) {
-		lm90_read_reg(client, LM90_REG_R_CONFIG1, &config);
+		config = lm90_read_reg(client, LM90_REG_R_CONFIG1);
+		if (config < 0)
+			return config;
 		config &= ~0x08;
 		if (channel)
 			config |= 0x08;
 		i2c_smbus_write_byte_data(client, LM90_REG_W_CONFIG1,
 					  config);
 	}
+	return 0;
 }
 
 /*
@@ -513,118 +508,204 @@ static void lm90_set_convrate(struct i2c_client *client, struct lm90_data *data,
 	data->update_interval = DIV_ROUND_CLOSEST(update_interval, 64);
 }
 
+static int lm90_update_limits(struct device *dev)
+{
+	struct lm90_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	int val;
+
+	val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT);
+	if (val < 0)
+		return val;
+	data->temp8[LOCAL_CRIT] = val;
+
+	val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT);
+	if (val < 0)
+		return val;
+	data->temp8[REMOTE_CRIT] = val;
+
+	val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST);
+	if (val < 0)
+		return val;
+	data->temp_hyst = val;
+
+	lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH);
+	if (val < 0)
+		return val;
+	data->temp11[REMOTE_LOW] = val << 8;
+
+	if (data->flags & LM90_HAVE_REM_LIMIT_EXT) {
+		val = lm90_read_reg(client, LM90_REG_R_REMOTE_LOWL);
+		if (val < 0)
+			return val;
+		data->temp11[REMOTE_LOW] |= val;
+	}
+
+	val = lm90_read_reg(client, LM90_REG_R_REMOTE_HIGHH);
+	if (val < 0)
+		return val;
+	data->temp11[REMOTE_HIGH] = val << 8;
+
+	if (data->flags & LM90_HAVE_REM_LIMIT_EXT) {
+		val = lm90_read_reg(client, LM90_REG_R_REMOTE_HIGHL);
+		if (val < 0)
+			return val;
+		data->temp11[REMOTE_HIGH] |= val;
+	}
+
+	if (data->flags & LM90_HAVE_OFFSET) {
+		val = lm90_read16(client, LM90_REG_R_REMOTE_OFFSH,
+				  LM90_REG_R_REMOTE_OFFSL);
+		if (val < 0)
+			return val;
+		data->temp11[REMOTE_OFFSET] = val;
+	}
+
+	if (data->flags & LM90_HAVE_EMERGENCY) {
+		val = lm90_read_reg(client, MAX6659_REG_R_LOCAL_EMERG);
+		if (val < 0)
+			return val;
+		data->temp8[LOCAL_EMERG] = val;
+
+		val = lm90_read_reg(client, MAX6659_REG_R_REMOTE_EMERG);
+		if (val < 0)
+			return val;
+		data->temp8[REMOTE_EMERG] = val;
+	}
+
+	if (data->kind == max6696) {
+		val = lm90_select_remote_channel(client, data, 1);
+		if (val < 0)
+			return val;
+
+		val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT);
+		if (val < 0)
+			return val;
+		data->temp8[REMOTE2_CRIT] = val;
+
+		val = lm90_read_reg(client, MAX6659_REG_R_REMOTE_EMERG);
+		if (val < 0)
+			return val;
+		data->temp8[REMOTE2_EMERG] = val;
+
+		val = lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH);
+		if (val < 0)
+			return val;
+		data->temp11[REMOTE2_LOW] = val << 8;
+
+		val = lm90_read_reg(client, LM90_REG_R_REMOTE_HIGHH);
+		if (val < 0)
+			return val;
+		data->temp11[REMOTE2_HIGH] = val << 8;
+
+		lm90_select_remote_channel(client, data, 0);
+	}
+
+	return 0;
+}
+
 static struct lm90_data *lm90_update_device(struct device *dev)
 {
 	struct lm90_data *data = dev_get_drvdata(dev);
 	struct i2c_client *client = data->client;
 	unsigned long next_update;
+	int val = 0;
 
 	mutex_lock(&data->update_lock);
 
+	if (!data->valid) {
+		val = lm90_update_limits(dev);
+		if (val < 0)
+			goto error;
+	}
+
 	next_update = data->last_updated +
 		      msecs_to_jiffies(data->update_interval);
 	if (time_after(jiffies, next_update) || !data->valid) {
-		u8 h, l;
-		u8 alarms;
-
 		dev_dbg(&client->dev, "Updating lm90 data.\n");
-		lm90_read_reg(client, LM90_REG_R_LOCAL_LOW,
-			      &data->temp8[LOCAL_LOW]);
-		lm90_read_reg(client, LM90_REG_R_LOCAL_HIGH,
-			      &data->temp8[LOCAL_HIGH]);
-		lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT,
-			      &data->temp8[LOCAL_CRIT]);
-		lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT,
-			      &data->temp8[REMOTE_CRIT]);
-		lm90_read_reg(client, LM90_REG_R_TCRIT_HYST, &data->temp_hyst);
+
+		data->valid = false;
+
+		val = lm90_read_reg(client, LM90_REG_R_LOCAL_LOW);
+		if (val < 0)
+			goto error;
+		data->temp8[LOCAL_LOW] = val;
+
+		val = lm90_read_reg(client, LM90_REG_R_LOCAL_HIGH);
+		if (val < 0)
+			goto error;
+		data->temp8[LOCAL_HIGH] = val;
 
 		if (data->reg_local_ext) {
-			lm90_read16(client, LM90_REG_R_LOCAL_TEMP,
-				    data->reg_local_ext,
-				    &data->temp11[LOCAL_TEMP]);
+			val = lm90_read16(client, LM90_REG_R_LOCAL_TEMP,
+					  data->reg_local_ext);
+			if (val < 0)
+				goto error;
+			data->temp11[LOCAL_TEMP] = val;
 		} else {
-			if (lm90_read_reg(client, LM90_REG_R_LOCAL_TEMP,
-					  &h) == 0)
-				data->temp11[LOCAL_TEMP] = h << 8;
-		}
-		lm90_read16(client, LM90_REG_R_REMOTE_TEMPH,
-			    LM90_REG_R_REMOTE_TEMPL,
-			    &data->temp11[REMOTE_TEMP]);
-
-		if (lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH, &h) == 0) {
-			data->temp11[REMOTE_LOW] = h << 8;
-			if ((data->flags & LM90_HAVE_REM_LIMIT_EXT)
-			 && lm90_read_reg(client, LM90_REG_R_REMOTE_LOWL,
-					  &l) == 0)
-				data->temp11[REMOTE_LOW] |= l;
-		}
-		if (lm90_read_reg(client, LM90_REG_R_REMOTE_HIGHH, &h) == 0) {
-			data->temp11[REMOTE_HIGH] = h << 8;
-			if ((data->flags & LM90_HAVE_REM_LIMIT_EXT)
-			 && lm90_read_reg(client, LM90_REG_R_REMOTE_HIGHL,
-					  &l) == 0)
-				data->temp11[REMOTE_HIGH] |= l;
+			val = lm90_read_reg(client, LM90_REG_R_LOCAL_TEMP);
+			if (val < 0)
+				goto error;
+			data->temp11[LOCAL_TEMP] = val << 8;
 		}
+		val = lm90_read16(client, LM90_REG_R_REMOTE_TEMPH,
+				  LM90_REG_R_REMOTE_TEMPL);
+		if (val < 0)
+			goto error;
+		data->temp11[REMOTE_TEMP] = val;
 
-		if (data->flags & LM90_HAVE_OFFSET) {
-			if (lm90_read_reg(client, LM90_REG_R_REMOTE_OFFSH,
-					  &h) == 0
-			 && lm90_read_reg(client, LM90_REG_R_REMOTE_OFFSL,
-					  &l) == 0)
-				data->temp11[REMOTE_OFFSET] = (h << 8) | l;
-		}
-		if (data->flags & LM90_HAVE_EMERGENCY) {
-			lm90_read_reg(client, MAX6659_REG_R_LOCAL_EMERG,
-				      &data->temp8[LOCAL_EMERG]);
-			lm90_read_reg(client, MAX6659_REG_R_REMOTE_EMERG,
-				      &data->temp8[REMOTE_EMERG]);
-		}
-		lm90_read_reg(client, LM90_REG_R_STATUS, &alarms);
-		data->alarms = alarms;	/* save as 16 bit value */
+		val = lm90_read_reg(client, LM90_REG_R_STATUS);
+		if (val < 0)
+			goto error;
+		data->alarms = val;	/* lower 8 bit of alarms */
 
 		if (data->kind == max6696) {
-			lm90_select_remote_channel(client, data, 1);
-			lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT,
-				      &data->temp8[REMOTE2_CRIT]);
-			lm90_read_reg(client, MAX6659_REG_R_REMOTE_EMERG,
-				      &data->temp8[REMOTE2_EMERG]);
-			lm90_read16(client, LM90_REG_R_REMOTE_TEMPH,
-				    LM90_REG_R_REMOTE_TEMPL,
-				    &data->temp11[REMOTE2_TEMP]);
-			if (!lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH, &h))
-				data->temp11[REMOTE2_LOW] = h << 8;
-			if (!lm90_read_reg(client, LM90_REG_R_REMOTE_HIGHH, &h))
-				data->temp11[REMOTE2_HIGH] = h << 8;
+			val = lm90_select_remote_channel(client, data, 1);
+			if (val < 0)
+				goto error;
+
+			val = lm90_read16(client, LM90_REG_R_REMOTE_TEMPH,
+					  LM90_REG_R_REMOTE_TEMPL);
+			if (val < 0)
+				goto error;
+			data->temp11[REMOTE2_TEMP] = val;
+
 			lm90_select_remote_channel(client, data, 0);
 
-			if (!lm90_read_reg(client, MAX6696_REG_R_STATUS2,
-					   &alarms))
-				data->alarms |= alarms << 8;
+			val = lm90_read_reg(client, MAX6696_REG_R_STATUS2);
+			if (val < 0)
+				goto error;
+			data->alarms |= val << 8;
 		}
 
 		/*
 		 * Re-enable ALERT# output if it was originally enabled and
 		 * relevant alarms are all clear
 		 */
-		if ((data->config_orig & 0x80) == 0
-		 && (data->alarms & data->alert_alarms) == 0) {
-			u8 config;
+		if (!(data->config_orig & 0x80) &&
+		    !(data->alarms & data->alert_alarms)) {
+			val = lm90_read_reg(client, LM90_REG_R_CONFIG1);
+			if (val < 0)
+				goto error;
 
-			lm90_read_reg(client, LM90_REG_R_CONFIG1, &config);
-			if (config & 0x80) {
+			if (val & 0x80) {
 				dev_dbg(&client->dev, "Re-enabling ALERT#\n");
 				i2c_smbus_write_byte_data(client,
 							  LM90_REG_W_CONFIG1,
-							  config & ~0x80);
+							  val & ~0x80);
 			}
 		}
 
 		data->last_updated = jiffies;
-		data->valid = 1;
+		data->valid = true;
 	}
 
+error:
 	mutex_unlock(&data->update_lock);
 
+	if (val < 0)
+		return ERR_PTR(val);
+
 	return data;
 }
 
@@ -709,16 +790,14 @@ static inline int temp_from_u8_adt7461(struct lm90_data *data, u8 val)
 {
 	if (data->flags & LM90_FLAG_ADT7461_EXT)
 		return (val - 64) * 1000;
-	else
-		return temp_from_s8(val);
+	return temp_from_s8(val);
 }
 
 static inline int temp_from_u16_adt7461(struct lm90_data *data, u16 val)
 {
 	if (data->flags & LM90_FLAG_ADT7461_EXT)
 		return (val - 0x4000) / 64 * 250;
-	else
-		return temp_from_s16(val);
+	return temp_from_s16(val);
 }
 
 static u8 temp_to_u8_adt7461(struct lm90_data *data, long val)
@@ -729,13 +808,12 @@ static u8 temp_to_u8_adt7461(struct lm90_data *data, long val)
 		if (val >= 191000)
 			return 0xFF;
 		return (val + 500 + 64000) / 1000;
-	} else {
-		if (val <= 0)
-			return 0;
-		if (val >= 127000)
-			return 127;
-		return (val + 500) / 1000;
 	}
+	if (val <= 0)
+		return 0;
+	if (val >= 127000)
+		return 127;
+	return (val + 500) / 1000;
 }
 
 static u16 temp_to_u16_adt7461(struct lm90_data *data, long val)
@@ -746,13 +824,12 @@ static u16 temp_to_u16_adt7461(struct lm90_data *data, long val)
 		if (val >= 191750)
 			return 0xFFC0;
 		return (val + 64000 + 125) / 250 * 64;
-	} else {
-		if (val <= 0)
-			return 0;
-		if (val >= 127750)
-			return 0x7FC0;
-		return (val + 125) / 250 * 64;
 	}
+	if (val <= 0)
+		return 0;
+	if (val >= 127750)
+		return 0x7FC0;
+	return (val + 125) / 250 * 64;
 }
 
 /*
@@ -766,6 +843,9 @@ static ssize_t show_temp8(struct device *dev, struct device_attribute *devattr,
 	struct lm90_data *data = lm90_update_device(dev);
 	int temp;
 
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
 	if (data->kind == adt7461 || data->kind == tmp451)
 		temp = temp_from_u8_adt7461(data, data->temp8[attr->index]);
 	else if (data->kind == max6646)
@@ -832,6 +912,9 @@ static ssize_t show_temp11(struct device *dev, struct device_attribute *devattr,
 	struct lm90_data *data = lm90_update_device(dev);
 	int temp;
 
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
 	if (data->kind == adt7461 || data->kind == tmp451)
 		temp = temp_from_u16_adt7461(data, data->temp11[attr->index]);
 	else if (data->kind == max6646)
@@ -907,6 +990,9 @@ static ssize_t show_temphyst(struct device *dev,
 	struct lm90_data *data = lm90_update_device(dev);
 	int temp;
 
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
 	if (data->kind == adt7461 || data->kind == tmp451)
 		temp = temp_from_u8_adt7461(data, data->temp8[attr->index]);
 	else if (data->kind == max6646)
@@ -953,6 +1039,10 @@ static ssize_t show_alarms(struct device *dev, struct device_attribute *dummy,
 			   char *buf)
 {
 	struct lm90_data *data = lm90_update_device(dev);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
 	return sprintf(buf, "%d\n", data->alarms);
 }
 
@@ -963,6 +1053,9 @@ static ssize_t show_alarm(struct device *dev, struct device_attribute
 	struct lm90_data *data = lm90_update_device(dev);
 	int bitnr = attr->index;
 
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
 	return sprintf(buf, "%d\n", (data->alarms >> bitnr) & 1);
 }
 
@@ -1404,8 +1497,11 @@ static int lm90_detect(struct i2c_client *client,
 	return 0;
 }
 
-static void lm90_restore_conf(struct i2c_client *client, struct lm90_data *data)
+static void lm90_restore_conf(void *_data)
 {
+	struct lm90_data *data = _data;
+	struct i2c_client *client = data->client;
+
 	/* Restore initial configuration */
 	i2c_smbus_write_byte_data(client, LM90_REG_W_CONVRATE,
 				  data->convrate_orig);
@@ -1413,24 +1509,22 @@ static void lm90_restore_conf(struct i2c_client *client, struct lm90_data *data)
 				  data->config_orig);
 }
 
-static void lm90_init_client(struct i2c_client *client, struct lm90_data *data)
+static int lm90_init_client(struct i2c_client *client, struct lm90_data *data)
 {
-	u8 config, convrate;
+	int config, convrate;
 
-	if (lm90_read_reg(client, LM90_REG_R_CONVRATE, &convrate) < 0) {
-		dev_warn(&client->dev, "Failed to read convrate register!\n");
-		convrate = LM90_DEF_CONVRATE_RVAL;
-	}
+	convrate = lm90_read_reg(client, LM90_REG_R_CONVRATE);
+	if (convrate < 0)
+		return convrate;
 	data->convrate_orig = convrate;
 
 	/*
 	 * Start the conversions.
 	 */
 	lm90_set_convrate(client, data, 500);	/* 500ms; 2Hz conversion rate */
-	if (lm90_read_reg(client, LM90_REG_R_CONFIG1, &config) < 0) {
-		dev_warn(&client->dev, "Initialization failed!\n");
-		return;
-	}
+	config = lm90_read_reg(client, LM90_REG_R_CONFIG1);
+	if (config < 0)
+		return config;
 	data->config_orig = config;
 
 	/* Check Temperature Range Select */
@@ -1456,17 +1550,26 @@ static void lm90_init_client(struct i2c_client *client, struct lm90_data *data)
 	config &= 0xBF;	/* run */
 	if (config != data->config_orig) /* Only write if changed */
 		i2c_smbus_write_byte_data(client, LM90_REG_W_CONFIG1, config);
+
+	devm_add_action(&client->dev, lm90_restore_conf, data);
+
+	return 0;
 }
 
 static bool lm90_is_tripped(struct i2c_client *client, u16 *status)
 {
 	struct lm90_data *data = i2c_get_clientdata(client);
-	u8 st, st2 = 0;
+	int st, st2 = 0;
 
-	lm90_read_reg(client, LM90_REG_R_STATUS, &st);
+	st = lm90_read_reg(client, LM90_REG_R_STATUS);
+	if (st < 0)
+		return false;
 
-	if (data->kind == max6696)
-		lm90_read_reg(client, MAX6696_REG_R_STATUS2, &st2);
+	if (data->kind == max6696) {
+		st2 = lm90_read_reg(client, MAX6696_REG_R_STATUS2);
+		if (st2 < 0)
+			return false;
+	}
 
 	*status = st | (st2 << 8);
 
@@ -1506,6 +1609,16 @@ static irqreturn_t lm90_irq_thread(int irq, void *dev_id)
 		return IRQ_NONE;
 }
 
+static void lm90_remove_pec(void *dev)
+{
+	device_remove_file(dev, &dev_attr_pec);
+}
+
+static void lm90_regulator_disable(void *regulator)
+{
+	regulator_disable(regulator);
+}
+
 static int lm90_probe(struct i2c_client *client,
 		      const struct i2c_device_id *id)
 {
@@ -1513,6 +1626,7 @@ static int lm90_probe(struct i2c_client *client,
 	struct i2c_adapter *adapter = to_i2c_adapter(dev->parent);
 	struct lm90_data *data;
 	struct regulator *regulator;
+	struct device *hwmon_dev;
 	int groups = 0;
 	int err;
 
@@ -1526,6 +1640,8 @@ static int lm90_probe(struct i2c_client *client,
 		return err;
 	}
 
+	devm_add_action(dev, lm90_regulator_disable, regulator);
+
 	data = devm_kzalloc(dev, sizeof(struct lm90_data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
@@ -1534,8 +1650,6 @@ static int lm90_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, data);
 	mutex_init(&data->update_lock);
 
-	data->regulator = regulator;
-
 	/* Set the device type */
 	data->kind = id->driver_data;
 	if (data->kind == adm1032) {
@@ -1557,7 +1671,11 @@ static int lm90_probe(struct i2c_client *client,
 	data->max_convrate = lm90_params[data->kind].max_convrate;
 
 	/* Initialize the LM90 chip */
-	lm90_init_client(client, data);
+	err = lm90_init_client(client, data);
+	if (err < 0) {
+		dev_err(dev, "Failed to initialize device\n");
+		return err;
+	}
 
 	/* Register sysfs hooks */
 	data->groups[groups++] = &lm90_group;
@@ -1577,15 +1695,14 @@ static int lm90_probe(struct i2c_client *client,
 	if (client->flags & I2C_CLIENT_PEC) {
 		err = device_create_file(dev, &dev_attr_pec);
 		if (err)
-			goto exit_restore;
+			return err;
+		devm_add_action(dev, lm90_remove_pec, dev);
 	}
 
-	data->hwmon_dev = hwmon_device_register_with_groups(dev, client->name,
-							    data, data->groups);
-	if (IS_ERR(data->hwmon_dev)) {
-		err = PTR_ERR(data->hwmon_dev);
-		goto exit_remove_pec;
-	}
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
+							   data, data->groups);
+	if (IS_ERR(hwmon_dev))
+		return PTR_ERR(hwmon_dev);
 
 	if (client->irq) {
 		dev_dbg(dev, "IRQ: %d\n", client->irq);
@@ -1595,32 +1712,10 @@ static int lm90_probe(struct i2c_client *client,
 						"lm90", client);
 		if (err < 0) {
 			dev_err(dev, "cannot request IRQ %d\n", client->irq);
-			goto exit_unregister;
+			return err;
 		}
 	}
 
-	return 0;
-
-exit_unregister:
-	hwmon_device_unregister(data->hwmon_dev);
-exit_remove_pec:
-	device_remove_file(dev, &dev_attr_pec);
-exit_restore:
-	lm90_restore_conf(client, data);
-	regulator_disable(data->regulator);
-
-	return err;
-}
-
-static int lm90_remove(struct i2c_client *client)
-{
-	struct lm90_data *data = i2c_get_clientdata(client);
-
-	hwmon_device_unregister(data->hwmon_dev);
-	device_remove_file(&client->dev, &dev_attr_pec);
-	lm90_restore_conf(client, data);
-	regulator_disable(data->regulator);
-
 	return 0;
 }
 
@@ -1636,13 +1731,16 @@ static void lm90_alert(struct i2c_client *client, unsigned int flag)
 		 */
 		struct lm90_data *data = i2c_get_clientdata(client);
 
-		if ((data->flags & LM90_HAVE_BROKEN_ALERT)
-		 && (alarms & data->alert_alarms)) {
-			u8 config;
+		if ((data->flags & LM90_HAVE_BROKEN_ALERT) &&
+		    (alarms & data->alert_alarms)) {
+			int config;
+
 			dev_dbg(&client->dev, "Disabling ALERT#\n");
-			lm90_read_reg(client, LM90_REG_R_CONFIG1, &config);
-			i2c_smbus_write_byte_data(client, LM90_REG_W_CONFIG1,
-						  config | 0x80);
+			config = lm90_read_reg(client, LM90_REG_R_CONFIG1);
+			if (config >= 0)
+				i2c_smbus_write_byte_data(client,
+							  LM90_REG_W_CONFIG1,
+							  config | 0x80);
 		}
 	} else {
 		dev_info(&client->dev, "Everything OK\n");
@@ -1655,7 +1753,6 @@ static struct i2c_driver lm90_driver = {
 		.name	= "lm90",
 	},
 	.probe		= lm90_probe,
-	.remove		= lm90_remove,
 	.alert		= lm90_alert,
 	.id_table	= lm90_id,
 	.detect		= lm90_detect,
diff --git a/drivers/hwmon/sht3x.c b/drivers/hwmon/sht3x.c
new file mode 100644
index 000000000000..b73a48832732
--- /dev/null
+++ b/drivers/hwmon/sht3x.c
@@ -0,0 +1,775 @@
+/* Sensirion SHT3x-DIS humidity and temperature sensor driver.
+ * The SHT3x comes in many different versions, this driver is for the
+ * I2C version only.
+ *
+ * Copyright (C) 2016 Sensirion AG, Switzerland
+ * Author: David Frey <david.frey@sensirion.com>
+ * Author: Pascal Sachs <pascal.sachs@sensirion.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <asm/page.h>
+#include <linux/crc8.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/platform_data/sht3x.h>
+
+/* commands (high precision mode) */
+static const unsigned char sht3x_cmd_measure_blocking_hpm[]    = { 0x2c, 0x06 };
+static const unsigned char sht3x_cmd_measure_nonblocking_hpm[] = { 0x24, 0x00 };
+
+/* commands (low power mode) */
+static const unsigned char sht3x_cmd_measure_blocking_lpm[]    = { 0x2c, 0x10 };
+static const unsigned char sht3x_cmd_measure_nonblocking_lpm[] = { 0x24, 0x16 };
+
+/* commands for periodic mode */
+static const unsigned char sht3x_cmd_measure_periodic_mode[]   = { 0xe0, 0x00 };
+static const unsigned char sht3x_cmd_break[]                   = { 0x30, 0x93 };
+
+/* commands for heater control */
+static const unsigned char sht3x_cmd_heater_on[]               = { 0x30, 0x6d };
+static const unsigned char sht3x_cmd_heater_off[]              = { 0x30, 0x66 };
+
+/* other commands */
+static const unsigned char sht3x_cmd_read_status_reg[]         = { 0xf3, 0x2d };
+static const unsigned char sht3x_cmd_clear_status_reg[]        = { 0x30, 0x41 };
+
+/* delays for non-blocking i2c commands, both in us */
+#define SHT3X_NONBLOCKING_WAIT_TIME_HPM  15000
+#define SHT3X_NONBLOCKING_WAIT_TIME_LPM   4000
+
+#define SHT3X_WORD_LEN         2
+#define SHT3X_CMD_LENGTH       2
+#define SHT3X_CRC8_LEN         1
+#define SHT3X_RESPONSE_LENGTH  6
+#define SHT3X_CRC8_POLYNOMIAL  0x31
+#define SHT3X_CRC8_INIT        0xFF
+#define SHT3X_MIN_TEMPERATURE  -45000
+#define SHT3X_MAX_TEMPERATURE  130000
+#define SHT3X_MIN_HUMIDITY     0
+#define SHT3X_MAX_HUMIDITY     100000
+
+enum sht3x_chips {
+	sht3x,
+	sts3x,
+};
+
+enum sht3x_limits {
+	limit_max = 0,
+	limit_max_hyst,
+	limit_min,
+	limit_min_hyst,
+};
+
+DECLARE_CRC8_TABLE(sht3x_crc8_table);
+
+/* periodic measure commands (high precision mode) */
+static const char periodic_measure_commands_hpm[][SHT3X_CMD_LENGTH] = {
+	/* 0.5 measurements per second */
+	{0x20, 0x32},
+	/* 1 measurements per second */
+	{0x21, 0x30},
+	/* 2 measurements per second */
+	{0x22, 0x36},
+	/* 4 measurements per second */
+	{0x23, 0x34},
+	/* 10 measurements per second */
+	{0x27, 0x37},
+};
+
+/* periodic measure commands (low power mode) */
+static const char periodic_measure_commands_lpm[][SHT3X_CMD_LENGTH] = {
+	/* 0.5 measurements per second */
+	{0x20, 0x2f},
+	/* 1 measurements per second */
+	{0x21, 0x2d},
+	/* 2 measurements per second */
+	{0x22, 0x2b},
+	/* 4 measurements per second */
+	{0x23, 0x29},
+	/* 10 measurements per second */
+	{0x27, 0x2a},
+};
+
+struct sht3x_limit_commands {
+	const char read_command[SHT3X_CMD_LENGTH];
+	const char write_command[SHT3X_CMD_LENGTH];
+};
+
+static const struct sht3x_limit_commands limit_commands[] = {
+	/* temp1_max, humidity1_max */
+	[limit_max] = { {0xe1, 0x1f}, {0x61, 0x1d} },
+	/* temp_1_max_hyst, humidity1_max_hyst */
+	[limit_max_hyst] = { {0xe1, 0x14}, {0x61, 0x16} },
+	/* temp1_min, humidity1_min */
+	[limit_min] = { {0xe1, 0x02}, {0x61, 0x00} },
+	/* temp_1_min_hyst, humidity1_min_hyst */
+	[limit_min_hyst] = { {0xe1, 0x09}, {0x61, 0x0B} },
+};
+
+#define SHT3X_NUM_LIMIT_CMD  ARRAY_SIZE(limit_commands)
+
+static const u16 mode_to_update_interval[] = {
+	   0,
+	2000,
+	1000,
+	 500,
+	 250,
+	 100,
+};
+
+struct sht3x_data {
+	struct i2c_client *client;
+	struct mutex i2c_lock; /* lock for sending i2c commands */
+	struct mutex data_lock; /* lock for updating driver data */
+
+	u8 mode;
+	const unsigned char *command;
+	u32 wait_time;			/* in us*/
+	unsigned long last_update;	/* last update in periodic mode*/
+
+	struct sht3x_platform_data setup;
+
+	/*
+	 * cached values for temperature and humidity and limits
+	 * the limits arrays have the following order:
+	 * max, max_hyst, min, min_hyst
+	 */
+	int temperature;
+	int temperature_limits[SHT3X_NUM_LIMIT_CMD];
+	u32 humidity;
+	u32 humidity_limits[SHT3X_NUM_LIMIT_CMD];
+};
+
+static u8 get_mode_from_update_interval(u16 value)
+{
+	size_t index;
+	u8 number_of_modes = ARRAY_SIZE(mode_to_update_interval);
+
+	if (value == 0)
+		return 0;
+
+	/* find next faster update interval */
+	for (index = 1; index < number_of_modes; index++) {
+		if (mode_to_update_interval[index] <= value)
+			return index;
+	}
+
+	return number_of_modes - 1;
+}
+
+static int sht3x_read_from_command(struct i2c_client *client,
+				   struct sht3x_data *data,
+				   const char *command,
+				   char *buf, int length, u32 wait_time)
+{
+	int ret;
+
+	mutex_lock(&data->i2c_lock);
+	ret = i2c_master_send(client, command, SHT3X_CMD_LENGTH);
+
+	if (ret != SHT3X_CMD_LENGTH) {
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	if (wait_time)
+		usleep_range(wait_time, wait_time + 1000);
+
+	ret = i2c_master_recv(client, buf, length);
+	if (ret != length) {
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	ret = 0;
+out:
+	mutex_unlock(&data->i2c_lock);
+	return ret;
+}
+
+static int sht3x_extract_temperature(u16 raw)
+{
+	/*
+	 * From datasheet:
+	 * T = -45 + 175 * ST / 2^16
+	 * Adapted for integer fixed point (3 digit) arithmetic.
+	 */
+	return ((21875 * (int)raw) >> 13) - 45000;
+}
+
+static u32 sht3x_extract_humidity(u16 raw)
+{
+	/*
+	 * From datasheet:
+	 * RH = 100 * SRH / 2^16
+	 * Adapted for integer fixed point (3 digit) arithmetic.
+	 */
+	return (12500 * (u32)raw) >> 13;
+}
+
+static struct sht3x_data *sht3x_update_client(struct device *dev)
+{
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	u16 interval_ms = mode_to_update_interval[data->mode];
+	unsigned long interval_jiffies = msecs_to_jiffies(interval_ms);
+	unsigned char buf[SHT3X_RESPONSE_LENGTH];
+	u16 val;
+	int ret = 0;
+
+	mutex_lock(&data->data_lock);
+	/*
+	 * Only update cached readings once per update interval in periodic
+	 * mode. In single shot mode the sensor measures values on demand, so
+	 * every time the sysfs interface is called, a measurement is triggered.
+	 * In periodic mode however, the measurement process is handled
+	 * internally by the sensor and reading out sensor values only makes
+	 * sense if a new reading is available.
+	 */
+	if (time_after(jiffies, data->last_update + interval_jiffies)) {
+		ret = sht3x_read_from_command(client, data, data->command, buf,
+					      sizeof(buf), data->wait_time);
+		if (ret)
+			goto out;
+
+		val = be16_to_cpup((__be16 *)buf);
+		data->temperature = sht3x_extract_temperature(val);
+		val = be16_to_cpup((__be16 *)(buf + 3));
+		data->humidity = sht3x_extract_humidity(val);
+		data->last_update = jiffies;
+	}
+
+out:
+	mutex_unlock(&data->data_lock);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return data;
+}
+
+/* sysfs attributes */
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct sht3x_data *data = sht3x_update_client(dev);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->temperature);
+}
+
+static ssize_t humidity1_input_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct sht3x_data *data = sht3x_update_client(dev);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%u\n", data->humidity);
+}
+
+/*
+ * limits_update must only be called from probe or with data_lock held
+ */
+static int limits_update(struct sht3x_data *data)
+{
+	int ret;
+	u8 index;
+	int temperature;
+	u32 humidity;
+	u16 raw;
+	char buffer[SHT3X_RESPONSE_LENGTH];
+	const struct sht3x_limit_commands *commands;
+	struct i2c_client *client = data->client;
+
+	for (index = 0; index < SHT3X_NUM_LIMIT_CMD; index++) {
+		commands = &limit_commands[index];
+		ret = sht3x_read_from_command(client, data,
+					      commands->read_command, buffer,
+					      SHT3X_RESPONSE_LENGTH, 0);
+
+		if (ret)
+			return ret;
+
+		raw = be16_to_cpup((__be16 *)buffer);
+		temperature = sht3x_extract_temperature((raw & 0x01ff) << 7);
+		humidity = sht3x_extract_humidity(raw & 0xfe00);
+		data->temperature_limits[index] = temperature;
+		data->humidity_limits[index] = humidity;
+	}
+
+	return ret;
+}
+
+static ssize_t temp1_limit_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	u8 index = to_sensor_dev_attr(attr)->index;
+	int temperature_limit = data->temperature_limits[index];
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", temperature_limit);
+}
+
+static ssize_t humidity1_limit_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	u8 index = to_sensor_dev_attr(attr)->index;
+	u32 humidity_limit = data->humidity_limits[index];
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", humidity_limit);
+}
+
+/*
+ * limit_store must only be called with data_lock held
+ */
+static size_t limit_store(struct device *dev,
+			  size_t count,
+			  u8 index,
+			  int temperature,
+			  u32 humidity)
+{
+	char buffer[SHT3X_CMD_LENGTH + SHT3X_WORD_LEN + SHT3X_CRC8_LEN];
+	char *position = buffer;
+	int ret;
+	u16 raw;
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	const struct sht3x_limit_commands *commands;
+
+	commands = &limit_commands[index];
+
+	memcpy(position, commands->write_command, SHT3X_CMD_LENGTH);
+	position += SHT3X_CMD_LENGTH;
+	/*
+	 * ST = (T + 45) / 175 * 2^16
+	 * SRH = RH / 100 * 2^16
+	 * adapted for fixed point arithmetic and packed the same as
+	 * in limit_show()
+	 */
+	raw = ((u32)(temperature + 45000) * 24543) >> (16 + 7);
+	raw |= ((humidity * 42950) >> 16) & 0xfe00;
+
+	*((__be16 *)position) = cpu_to_be16(raw);
+	position += SHT3X_WORD_LEN;
+	*position = crc8(sht3x_crc8_table,
+			 position - SHT3X_WORD_LEN,
+			 SHT3X_WORD_LEN,
+			 SHT3X_CRC8_INIT);
+
+	mutex_lock(&data->i2c_lock);
+	ret = i2c_master_send(client, buffer, sizeof(buffer));
+	mutex_unlock(&data->i2c_lock);
+
+	if (ret != sizeof(buffer))
+		return ret < 0 ? ret : -EIO;
+
+	data->temperature_limits[index] = temperature;
+	data->humidity_limits[index] = humidity;
+	return count;
+}
+
+static ssize_t temp1_limit_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf,
+				 size_t count)
+{
+	int temperature;
+	int ret;
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	u8 index = to_sensor_dev_attr(attr)->index;
+
+	ret = kstrtoint(buf, 0, &temperature);
+	if (ret)
+		return ret;
+
+	temperature = clamp_val(temperature, SHT3X_MIN_TEMPERATURE,
+				SHT3X_MAX_TEMPERATURE);
+	mutex_lock(&data->data_lock);
+	ret = limit_store(dev, count, index, temperature,
+			  data->humidity_limits[index]);
+	mutex_unlock(&data->data_lock);
+
+	return ret;
+}
+
+static ssize_t humidity1_limit_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf,
+				     size_t count)
+{
+	u32 humidity;
+	int ret;
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	u8 index = to_sensor_dev_attr(attr)->index;
+
+	ret = kstrtou32(buf, 0, &humidity);
+	if (ret)
+		return ret;
+
+	humidity = clamp_val(humidity, SHT3X_MIN_HUMIDITY, SHT3X_MAX_HUMIDITY);
+	mutex_lock(&data->data_lock);
+	ret = limit_store(dev, count, index, data->temperature_limits[index],
+			  humidity);
+	mutex_unlock(&data->data_lock);
+
+	return ret;
+}
+
+static void sht3x_select_command(struct sht3x_data *data)
+{
+	/*
+	 * In blocking mode (clock stretching mode) the I2C bus
+	 * is blocked for other traffic, thus the call to i2c_master_recv()
+	 * will wait until the data is ready. For non blocking mode, we
+	 * have to wait ourselves.
+	 */
+	if (data->mode > 0) {
+		data->command = sht3x_cmd_measure_periodic_mode;
+		data->wait_time = 0;
+	} else if (data->setup.blocking_io) {
+		data->command = data->setup.high_precision ?
+				sht3x_cmd_measure_blocking_hpm :
+				sht3x_cmd_measure_blocking_lpm;
+		data->wait_time = 0;
+	} else {
+		if (data->setup.high_precision) {
+			data->command = sht3x_cmd_measure_nonblocking_hpm;
+			data->wait_time = SHT3X_NONBLOCKING_WAIT_TIME_HPM;
+		} else {
+			data->command = sht3x_cmd_measure_nonblocking_lpm;
+			data->wait_time = SHT3X_NONBLOCKING_WAIT_TIME_LPM;
+		}
+	}
+}
+
+static int status_register_read(struct device *dev,
+				struct device_attribute *attr,
+				char *buffer, int length)
+{
+	int ret;
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+
+	ret = sht3x_read_from_command(client, data, sht3x_cmd_read_status_reg,
+				      buffer, length, 0);
+
+	return ret;
+}
+
+static ssize_t temp1_alarm_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	char buffer[SHT3X_WORD_LEN + SHT3X_CRC8_LEN];
+	int ret;
+
+	ret = status_register_read(dev, attr, buffer,
+				   SHT3X_WORD_LEN + SHT3X_CRC8_LEN);
+	if (ret)
+		return ret;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", !!(buffer[0] & 0x04));
+}
+
+static ssize_t humidity1_alarm_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	char buffer[SHT3X_WORD_LEN + SHT3X_CRC8_LEN];
+	int ret;
+
+	ret = status_register_read(dev, attr, buffer,
+				   SHT3X_WORD_LEN + SHT3X_CRC8_LEN);
+	if (ret)
+		return ret;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", !!(buffer[0] & 0x08));
+}
+
+static ssize_t heater_enable_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	char buffer[SHT3X_WORD_LEN + SHT3X_CRC8_LEN];
+	int ret;
+
+	ret = status_register_read(dev, attr, buffer,
+				   SHT3X_WORD_LEN + SHT3X_CRC8_LEN);
+	if (ret)
+		return ret;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", !!(buffer[0] & 0x20));
+}
+
+static ssize_t heater_enable_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf,
+				   size_t count)
+{
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	int ret;
+	bool status;
+
+	ret = kstrtobool(buf, &status);
+	if (ret)
+		return ret;
+
+	mutex_lock(&data->i2c_lock);
+
+	if (status)
+		ret = i2c_master_send(client, (char *)&sht3x_cmd_heater_on,
+				      SHT3X_CMD_LENGTH);
+	else
+		ret = i2c_master_send(client, (char *)&sht3x_cmd_heater_off,
+				      SHT3X_CMD_LENGTH);
+
+	mutex_unlock(&data->i2c_lock);
+
+	return ret;
+}
+
+static ssize_t update_interval_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct sht3x_data *data = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n",
+			 mode_to_update_interval[data->mode]);
+}
+
+static ssize_t update_interval_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf,
+				     size_t count)
+{
+	u16 update_interval;
+	u8 mode;
+	int ret;
+	const char *command;
+	struct sht3x_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+
+	ret = kstrtou16(buf, 0, &update_interval);
+	if (ret)
+		return ret;
+
+	mode = get_mode_from_update_interval(update_interval);
+
+	mutex_lock(&data->data_lock);
+	/* mode did not change */
+	if (mode == data->mode) {
+		mutex_unlock(&data->data_lock);
+		return count;
+	}
+
+	mutex_lock(&data->i2c_lock);
+	/*
+	 * Abort periodic measure mode.
+	 * To do any changes to the configuration while in periodic mode, we
+	 * have to send a break command to the sensor, which then falls back
+	 * to single shot (mode = 0).
+	 */
+	if (data->mode > 0) {
+		ret = i2c_master_send(client, sht3x_cmd_break,
+				      SHT3X_CMD_LENGTH);
+		if (ret != SHT3X_CMD_LENGTH)
+			goto out;
+		data->mode = 0;
+	}
+
+	if (mode > 0) {
+		if (data->setup.high_precision)
+			command = periodic_measure_commands_hpm[mode - 1];
+		else
+			command = periodic_measure_commands_lpm[mode - 1];
+
+		/* select mode */
+		ret = i2c_master_send(client, command, SHT3X_CMD_LENGTH);
+		if (ret != SHT3X_CMD_LENGTH)
+			goto out;
+	}
+
+	/* select mode and command */
+	data->mode = mode;
+	sht3x_select_command(data);
+
+out:
+	mutex_unlock(&data->i2c_lock);
+	mutex_unlock(&data->data_lock);
+	if (ret != SHT3X_CMD_LENGTH)
+		return ret < 0 ? ret : -EIO;
+
+	return count;
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, temp1_input_show, NULL, 0);
+static SENSOR_DEVICE_ATTR(humidity1_input, S_IRUGO, humidity1_input_show,
+			  NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO | S_IWUSR,
+			  temp1_limit_show, temp1_limit_store,
+			  limit_max);
+static SENSOR_DEVICE_ATTR(humidity1_max, S_IRUGO | S_IWUSR,
+			  humidity1_limit_show, humidity1_limit_store,
+			  limit_max);
+static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IRUGO | S_IWUSR,
+			  temp1_limit_show, temp1_limit_store,
+			  limit_max_hyst);
+static SENSOR_DEVICE_ATTR(humidity1_max_hyst, S_IRUGO | S_IWUSR,
+			  humidity1_limit_show, humidity1_limit_store,
+			  limit_max_hyst);
+static SENSOR_DEVICE_ATTR(temp1_min, S_IRUGO | S_IWUSR,
+			  temp1_limit_show, temp1_limit_store,
+			  limit_min);
+static SENSOR_DEVICE_ATTR(humidity1_min, S_IRUGO | S_IWUSR,
+			  humidity1_limit_show, humidity1_limit_store,
+			  limit_min);
+static SENSOR_DEVICE_ATTR(temp1_min_hyst, S_IRUGO | S_IWUSR,
+			  temp1_limit_show, temp1_limit_store,
+			  limit_min_hyst);
+static SENSOR_DEVICE_ATTR(humidity1_min_hyst, S_IRUGO | S_IWUSR,
+			  humidity1_limit_show, humidity1_limit_store,
+			  limit_min_hyst);
+static SENSOR_DEVICE_ATTR(temp1_alarm, S_IRUGO, temp1_alarm_show, NULL, 0);
+static SENSOR_DEVICE_ATTR(humidity1_alarm, S_IRUGO, humidity1_alarm_show,
+			  NULL, 0);
+static SENSOR_DEVICE_ATTR(heater_enable, S_IRUGO | S_IWUSR,
+			  heater_enable_show, heater_enable_store, 0);
+static SENSOR_DEVICE_ATTR(update_interval, S_IRUGO | S_IWUSR,
+			  update_interval_show, update_interval_store, 0);
+
+static struct attribute *sht3x_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_humidity1_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_max.dev_attr.attr,
+	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
+	&sensor_dev_attr_humidity1_max.dev_attr.attr,
+	&sensor_dev_attr_humidity1_max_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp1_min.dev_attr.attr,
+	&sensor_dev_attr_temp1_min_hyst.dev_attr.attr,
+	&sensor_dev_attr_humidity1_min.dev_attr.attr,
+	&sensor_dev_attr_humidity1_min_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp1_alarm.dev_attr.attr,
+	&sensor_dev_attr_humidity1_alarm.dev_attr.attr,
+	&sensor_dev_attr_heater_enable.dev_attr.attr,
+	&sensor_dev_attr_update_interval.dev_attr.attr,
+	NULL
+};
+
+static struct attribute *sts3x_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(sht3x);
+ATTRIBUTE_GROUPS(sts3x);
+
+static int sht3x_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	int ret;
+	struct sht3x_data *data;
+	struct device *hwmon_dev;
+	struct i2c_adapter *adap = client->adapter;
+	struct device *dev = &client->dev;
+	const struct attribute_group **attribute_groups;
+
+	/*
+	 * we require full i2c support since the sht3x uses multi-byte read and
+	 * writes as well as multi-byte commands which are not supported by
+	 * the smbus protocol
+	 */
+	if (!i2c_check_functionality(adap, I2C_FUNC_I2C))
+		return -ENODEV;
+
+	ret = i2c_master_send(client, sht3x_cmd_clear_status_reg,
+			      SHT3X_CMD_LENGTH);
+	if (ret != SHT3X_CMD_LENGTH)
+		return ret < 0 ? ret : -ENODEV;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->setup.blocking_io = false;
+	data->setup.high_precision = true;
+	data->mode = 0;
+	data->last_update = 0;
+	data->client = client;
+	crc8_populate_msb(sht3x_crc8_table, SHT3X_CRC8_POLYNOMIAL);
+
+	if (client->dev.platform_data)
+		data->setup = *(struct sht3x_platform_data *)dev->platform_data;
+
+	sht3x_select_command(data);
+
+	mutex_init(&data->i2c_lock);
+	mutex_init(&data->data_lock);
+
+	ret = limits_update(data);
+	if (ret)
+		return ret;
+
+	if (id->driver_data == sts3x)
+		attribute_groups = sts3x_groups;
+	else
+		attribute_groups = sht3x_groups;
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
+							   client->name,
+							   data,
+							   attribute_groups);
+
+	if (IS_ERR(hwmon_dev))
+		dev_dbg(dev, "unable to register hwmon device\n");
+
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+/* device ID table */
+static const struct i2c_device_id sht3x_ids[] = {
+	{"sht3x", sht3x},
+	{"sts3x", sts3x},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, sht3x_ids);
+
+static struct i2c_driver sht3x_i2c_driver = {
+	.driver.name = "sht3x",
+	.probe       = sht3x_probe,
+	.id_table    = sht3x_ids,
+};
+
+module_i2c_driver(sht3x_i2c_driver);
+
+MODULE_AUTHOR("David Frey <david.frey@sensirion.com>");
+MODULE_AUTHOR("Pascal Sachs <pascal.sachs@sensirion.com>");
+MODULE_DESCRIPTION("Sensirion SHT3x humidity and temperature sensor driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index f1e96fd7f445..a942a2574a4d 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -11,12 +11,9 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -27,6 +24,7 @@
 #include <linux/mutex.h>
 #include <linux/device.h>
 #include <linux/jiffies.h>
+#include <linux/regmap.h>
 #include <linux/thermal.h>
 #include <linux/of.h>
 
@@ -50,14 +48,23 @@
 #define	TMP102_TLOW_REG			0x02
 #define	TMP102_THIGH_REG		0x03
 
+#define TMP102_CONFREG_MASK	(TMP102_CONF_SD | TMP102_CONF_TM | \
+				 TMP102_CONF_POL | TMP102_CONF_F0 | \
+				 TMP102_CONF_F1 | TMP102_CONF_OS | \
+				 TMP102_CONF_EM | TMP102_CONF_AL | \
+				 TMP102_CONF_CR0 | TMP102_CONF_CR1)
+
+#define TMP102_CONFIG_CLEAR	(TMP102_CONF_SD | TMP102_CONF_OS | \
+				 TMP102_CONF_CR0)
+#define TMP102_CONFIG_SET	(TMP102_CONF_TM | TMP102_CONF_EM | \
+				 TMP102_CONF_CR1)
+
+#define CONVERSION_TIME_MS		35	/* in milli-seconds */
+
 struct tmp102 {
-	struct i2c_client *client;
-	struct device *hwmon_dev;
-	struct mutex lock;
+	struct regmap *regmap;
 	u16 config_orig;
-	unsigned long last_update;
-	int temp[3];
-	bool first_time;
+	unsigned long ready_time;
 };
 
 /* convert left adjusted 13-bit TMP102 register value to milliCelsius */
@@ -72,44 +79,22 @@ static inline u16 tmp102_mC_to_reg(int val)
 	return (val * 128) / 1000;
 }
 
-static const u8 tmp102_reg[] = {
-	TMP102_TEMP_REG,
-	TMP102_TLOW_REG,
-	TMP102_THIGH_REG,
-};
-
-static struct tmp102 *tmp102_update_device(struct device *dev)
-{
-	struct tmp102 *tmp102 = dev_get_drvdata(dev);
-	struct i2c_client *client = tmp102->client;
-
-	mutex_lock(&tmp102->lock);
-	if (time_after(jiffies, tmp102->last_update + HZ / 3)) {
-		int i;
-		for (i = 0; i < ARRAY_SIZE(tmp102->temp); ++i) {
-			int status = i2c_smbus_read_word_swapped(client,
-								 tmp102_reg[i]);
-			if (status > -1)
-				tmp102->temp[i] = tmp102_reg_to_mC(status);
-		}
-		tmp102->last_update = jiffies;
-		tmp102->first_time = false;
-	}
-	mutex_unlock(&tmp102->lock);
-	return tmp102;
-}
-
 static int tmp102_read_temp(void *dev, int *temp)
 {
-	struct tmp102 *tmp102 = tmp102_update_device(dev);
+	struct tmp102 *tmp102 = dev_get_drvdata(dev);
+	unsigned int reg;
+	int ret;
 
-	/* Is it too early even to return a conversion? */
-	if (tmp102->first_time) {
+	if (time_before(jiffies, tmp102->ready_time)) {
 		dev_dbg(dev, "%s: Conversion not ready yet..\n", __func__);
 		return -EAGAIN;
 	}
 
-	*temp = tmp102->temp[0];
+	ret = regmap_read(tmp102->regmap, TMP102_TEMP_REG, &reg);
+	if (ret < 0)
+		return ret;
+
+	*temp = tmp102_reg_to_mC(reg);
 
 	return 0;
 }
@@ -119,13 +104,20 @@ static ssize_t tmp102_show_temp(struct device *dev,
 				char *buf)
 {
 	struct sensor_device_attribute *sda = to_sensor_dev_attr(attr);
-	struct tmp102 *tmp102 = tmp102_update_device(dev);
+	struct tmp102 *tmp102 = dev_get_drvdata(dev);
+	int regaddr = sda->index;
+	unsigned int reg;
+	int err;
 
-	/* Is it too early even to return a read? */
-	if (tmp102->first_time)
+	if (regaddr == TMP102_TEMP_REG &&
+	    time_before(jiffies, tmp102->ready_time))
 		return -EAGAIN;
 
-	return sprintf(buf, "%d\n", tmp102->temp[sda->index]);
+	err = regmap_read(tmp102->regmap, regaddr, &reg);
+	if (err < 0)
+		return err;
+
+	return sprintf(buf, "%d\n", tmp102_reg_to_mC(reg));
 }
 
 static ssize_t tmp102_set_temp(struct device *dev,
@@ -134,29 +126,26 @@ static ssize_t tmp102_set_temp(struct device *dev,
 {
 	struct sensor_device_attribute *sda = to_sensor_dev_attr(attr);
 	struct tmp102 *tmp102 = dev_get_drvdata(dev);
-	struct i2c_client *client = tmp102->client;
+	int reg = sda->index;
 	long val;
-	int status;
+	int err;
 
 	if (kstrtol(buf, 10, &val) < 0)
 		return -EINVAL;
 	val = clamp_val(val, -256000, 255000);
 
-	mutex_lock(&tmp102->lock);
-	tmp102->temp[sda->index] = val;
-	status = i2c_smbus_write_word_swapped(client, tmp102_reg[sda->index],
-					      tmp102_mC_to_reg(val));
-	mutex_unlock(&tmp102->lock);
-	return status ? : count;
+	err = regmap_write(tmp102->regmap, reg, tmp102_mC_to_reg(val));
+	return err ? : count;
 }
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, tmp102_show_temp, NULL , 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, tmp102_show_temp, NULL,
+			  TMP102_TEMP_REG);
 
 static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IWUSR | S_IRUGO, tmp102_show_temp,
-			  tmp102_set_temp, 1);
+			  tmp102_set_temp, TMP102_TLOW_REG);
 
 static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, tmp102_show_temp,
-			  tmp102_set_temp, 2);
+			  tmp102_set_temp, TMP102_THIGH_REG);
 
 static struct attribute *tmp102_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
@@ -166,20 +155,46 @@ static struct attribute *tmp102_attrs[] = {
 };
 ATTRIBUTE_GROUPS(tmp102);
 
-#define TMP102_CONFIG  (TMP102_CONF_TM | TMP102_CONF_EM | TMP102_CONF_CR1)
-#define TMP102_CONFIG_RD_ONLY (TMP102_CONF_R0 | TMP102_CONF_R1 | TMP102_CONF_AL)
-
 static const struct thermal_zone_of_device_ops tmp102_of_thermal_ops = {
 	.get_temp = tmp102_read_temp,
 };
 
+static void tmp102_restore_config(void *data)
+{
+	struct tmp102 *tmp102 = data;
+
+	regmap_write(tmp102->regmap, TMP102_CONF_REG, tmp102->config_orig);
+}
+
+static bool tmp102_is_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return reg != TMP102_TEMP_REG;
+}
+
+static bool tmp102_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return reg == TMP102_TEMP_REG;
+}
+
+static const struct regmap_config tmp102_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+	.max_register = TMP102_THIGH_REG,
+	.writeable_reg = tmp102_is_writeable_reg,
+	.volatile_reg = tmp102_is_volatile_reg,
+	.val_format_endian = REGMAP_ENDIAN_BIG,
+	.cache_type = REGCACHE_RBTREE,
+	.use_single_rw = true,
+};
+
 static int tmp102_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
 	struct device *hwmon_dev;
 	struct tmp102 *tmp102;
-	int status;
+	unsigned int regval;
+	int err;
 
 	if (!i2c_check_functionality(client->adapter,
 				     I2C_FUNC_SMBUS_WORD_DATA)) {
@@ -193,73 +208,57 @@ static int tmp102_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	i2c_set_clientdata(client, tmp102);
-	tmp102->client = client;
 
-	status = i2c_smbus_read_word_swapped(client, TMP102_CONF_REG);
-	if (status < 0) {
+	tmp102->regmap = devm_regmap_init_i2c(client, &tmp102_regmap_config);
+	if (IS_ERR(tmp102->regmap))
+		return PTR_ERR(tmp102->regmap);
+
+	err = regmap_read(tmp102->regmap, TMP102_CONF_REG, &regval);
+	if (err < 0) {
 		dev_err(dev, "error reading config register\n");
-		return status;
+		return err;
 	}
-	tmp102->config_orig = status;
-	status = i2c_smbus_write_word_swapped(client, TMP102_CONF_REG,
-					      TMP102_CONFIG);
-	if (status < 0) {
-		dev_err(dev, "error writing config register\n");
-		goto fail_restore_config;
+
+	if ((regval & ~TMP102_CONFREG_MASK) !=
+	    (TMP102_CONF_R0 | TMP102_CONF_R1)) {
+		dev_err(dev, "unexpected config register value\n");
+		return -ENODEV;
 	}
-	status = i2c_smbus_read_word_swapped(client, TMP102_CONF_REG);
-	if (status < 0) {
-		dev_err(dev, "error reading config register\n");
-		goto fail_restore_config;
+
+	tmp102->config_orig = regval;
+
+	devm_add_action(dev, tmp102_restore_config, tmp102);
+
+	regval &= ~TMP102_CONFIG_CLEAR;
+	regval |= TMP102_CONFIG_SET;
+
+	err = regmap_write(tmp102->regmap, TMP102_CONF_REG, regval);
+	if (err < 0) {
+		dev_err(dev, "error writing config register\n");
+		return err;
 	}
-	status &= ~TMP102_CONFIG_RD_ONLY;
-	if (status != TMP102_CONFIG) {
-		dev_err(dev, "config settings did not stick\n");
-		status = -ENODEV;
-		goto fail_restore_config;
+
+	tmp102->ready_time = jiffies;
+	if (tmp102->config_orig & TMP102_CONF_SD) {
+		/*
+		 * Mark that we are not ready with data until the first
+		 * conversion is complete
+		 */
+		tmp102->ready_time += msecs_to_jiffies(CONVERSION_TIME_MS);
 	}
-	tmp102->last_update = jiffies;
-	/* Mark that we are not ready with data until conversion is complete */
-	tmp102->first_time = true;
-	mutex_init(&tmp102->lock);
 
-	hwmon_dev = hwmon_device_register_with_groups(dev, client->name,
-						      tmp102, tmp102_groups);
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
+							   tmp102,
+							   tmp102_groups);
 	if (IS_ERR(hwmon_dev)) {
 		dev_dbg(dev, "unable to register hwmon device\n");
-		status = PTR_ERR(hwmon_dev);
-		goto fail_restore_config;
+		return PTR_ERR(hwmon_dev);
 	}
-	tmp102->hwmon_dev = hwmon_dev;
 	devm_thermal_zone_of_sensor_register(hwmon_dev, 0, hwmon_dev,
 					     &tmp102_of_thermal_ops);
 
 	dev_info(dev, "initialized\n");
 
-	return 0;
-
-fail_restore_config:
-	i2c_smbus_write_word_swapped(client, TMP102_CONF_REG,
-				     tmp102->config_orig);
-	return status;
-}
-
-static int tmp102_remove(struct i2c_client *client)
-{
-	struct tmp102 *tmp102 = i2c_get_clientdata(client);
-
-	hwmon_device_unregister(tmp102->hwmon_dev);
-
-	/* Stop monitoring if device was stopped originally */
-	if (tmp102->config_orig & TMP102_CONF_SD) {
-		int config;
-
-		config = i2c_smbus_read_word_swapped(client, TMP102_CONF_REG);
-		if (config >= 0)
-			i2c_smbus_write_word_swapped(client, TMP102_CONF_REG,
-						     config | TMP102_CONF_SD);
-	}
-
 	return 0;
 }
 
@@ -267,27 +266,24 @@ static int tmp102_remove(struct i2c_client *client)
 static int tmp102_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	int config;
-
-	config = i2c_smbus_read_word_swapped(client, TMP102_CONF_REG);
-	if (config < 0)
-		return config;
+	struct tmp102 *tmp102 = i2c_get_clientdata(client);
 
-	config |= TMP102_CONF_SD;
-	return i2c_smbus_write_word_swapped(client, TMP102_CONF_REG, config);
+	return regmap_update_bits(tmp102->regmap, TMP102_CONF_REG,
+				  TMP102_CONF_SD, TMP102_CONF_SD);
 }
 
 static int tmp102_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	int config;
+	struct tmp102 *tmp102 = i2c_get_clientdata(client);
+	int err;
+
+	err = regmap_update_bits(tmp102->regmap, TMP102_CONF_REG,
+				 TMP102_CONF_SD, 0);
 
-	config = i2c_smbus_read_word_swapped(client, TMP102_CONF_REG);
-	if (config < 0)
-		return config;
+	tmp102->ready_time = jiffies + msecs_to_jiffies(CONVERSION_TIME_MS);
 
-	config &= ~TMP102_CONF_SD;
-	return i2c_smbus_write_word_swapped(client, TMP102_CONF_REG, config);
+	return err;
 }
 #endif /* CONFIG_PM */
 
@@ -303,7 +299,6 @@ static struct i2c_driver tmp102_driver = {
 	.driver.name	= DRIVER_NAME,
 	.driver.pm	= &tmp102_dev_pm_ops,
 	.probe		= tmp102_probe,
-	.remove		= tmp102_remove,
 	.id_table	= tmp102_id,
 };
 
diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c
index ccf4cffe0ee1..eeeed2c7d081 100644
--- a/drivers/hwmon/tmp401.c
+++ b/drivers/hwmon/tmp401.c
@@ -47,7 +47,7 @@
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4c, 0x4d,
 	0x4e, 0x4f, I2C_CLIENT_END };
 
-enum chips { tmp401, tmp411, tmp431, tmp432, tmp435 };
+enum chips { tmp401, tmp411, tmp431, tmp432, tmp435, tmp461 };
 
 /*
  * The TMP401 registers, note some registers have different addresses for
@@ -62,31 +62,34 @@ enum chips { tmp401, tmp411, tmp431, tmp432, tmp435 };
 #define TMP401_MANUFACTURER_ID_REG		0xFE
 #define TMP401_DEVICE_ID_REG			0xFF
 
-static const u8 TMP401_TEMP_MSB_READ[6][2] = {
+static const u8 TMP401_TEMP_MSB_READ[7][2] = {
 	{ 0x00, 0x01 },	/* temp */
 	{ 0x06, 0x08 },	/* low limit */
 	{ 0x05, 0x07 },	/* high limit */
 	{ 0x20, 0x19 },	/* therm (crit) limit */
 	{ 0x30, 0x34 },	/* lowest */
 	{ 0x32, 0x36 },	/* highest */
+	{ 0, 0x11 },	/* offset */
 };
 
-static const u8 TMP401_TEMP_MSB_WRITE[6][2] = {
+static const u8 TMP401_TEMP_MSB_WRITE[7][2] = {
 	{ 0, 0 },	/* temp (unused) */
 	{ 0x0C, 0x0E },	/* low limit */
 	{ 0x0B, 0x0D },	/* high limit */
 	{ 0x20, 0x19 },	/* therm (crit) limit */
 	{ 0x30, 0x34 },	/* lowest */
 	{ 0x32, 0x36 },	/* highest */
+	{ 0, 0x11 },	/* offset */
 };
 
-static const u8 TMP401_TEMP_LSB[6][2] = {
+static const u8 TMP401_TEMP_LSB[7][2] = {
 	{ 0x15, 0x10 },	/* temp */
 	{ 0x17, 0x14 },	/* low limit */
 	{ 0x16, 0x13 },	/* high limit */
 	{ 0, 0 },	/* therm (crit) limit (unused) */
 	{ 0x31, 0x35 },	/* lowest */
 	{ 0x33, 0x37 },	/* highest */
+	{ 0, 0x12 },	/* offset */
 };
 
 static const u8 TMP432_TEMP_MSB_READ[4][3] = {
@@ -149,6 +152,7 @@ static const struct i2c_device_id tmp401_id[] = {
 	{ "tmp431", tmp431 },
 	{ "tmp432", tmp432 },
 	{ "tmp435", tmp435 },
+	{ "tmp461", tmp461 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, tmp401_id);
@@ -170,7 +174,7 @@ struct tmp401_data {
 	/* register values */
 	u8 status[4];
 	u8 config;
-	u16 temp[6][3];
+	u16 temp[7][3];
 	u8 temp_crit_hyst;
 };
 
@@ -612,6 +616,22 @@ static const struct attribute_group tmp432_group = {
 	.attrs = tmp432_attributes,
 };
 
+/*
+ * Additional features of the TMP461 chip.
+ * The TMP461 temperature offset for the remote channel.
+ */
+static SENSOR_DEVICE_ATTR_2(temp2_offset, S_IWUSR | S_IRUGO, show_temp,
+			    store_temp, 6, 1);
+
+static struct attribute *tmp461_attributes[] = {
+	&sensor_dev_attr_temp2_offset.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group tmp461_group = {
+	.attrs = tmp461_attributes,
+};
+
 /*
  * Begin non sysfs callback code (aka Real code)
  */
@@ -714,7 +734,7 @@ static int tmp401_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	static const char * const names[] = {
-		"TMP401", "TMP411", "TMP431", "TMP432", "TMP435"
+		"TMP401", "TMP411", "TMP431", "TMP432", "TMP435", "TMP461"
 	};
 	struct device *dev = &client->dev;
 	struct device *hwmon_dev;
@@ -745,6 +765,9 @@ static int tmp401_probe(struct i2c_client *client,
 	if (data->kind == tmp432)
 		data->groups[groups++] = &tmp432_group;
 
+	if (data->kind == tmp461)
+		data->groups[groups++] = &tmp461_group;
+
 	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
 							   data, data->groups);
 	if (IS_ERR(hwmon_dev))
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index 1be543e8e42f..6f0a51a2c6ec 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -23,6 +23,7 @@
 #include <linux/debugfs.h>
 #include <linux/idr.h>
 #include <linux/pci.h>
+#include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
 
 #include "intel_th.h"
@@ -67,23 +68,33 @@ static int intel_th_probe(struct device *dev)
 
 	hubdrv = to_intel_th_driver(hub->dev.driver);
 
+	pm_runtime_set_active(dev);
+	pm_runtime_no_callbacks(dev);
+	pm_runtime_enable(dev);
+
 	ret = thdrv->probe(to_intel_th_device(dev));
 	if (ret)
-		return ret;
+		goto out_pm;
 
 	if (thdrv->attr_group) {
 		ret = sysfs_create_group(&thdev->dev.kobj, thdrv->attr_group);
-		if (ret) {
-			thdrv->remove(thdev);
-
-			return ret;
-		}
+		if (ret)
+			goto out;
 	}
 
 	if (thdev->type == INTEL_TH_OUTPUT &&
 	    !intel_th_output_assigned(thdev))
+		/* does not talk to hardware */
 		ret = hubdrv->assign(hub, thdev);
 
+out:
+	if (ret)
+		thdrv->remove(thdev);
+
+out_pm:
+	if (ret)
+		pm_runtime_disable(dev);
+
 	return ret;
 }
 
@@ -103,6 +114,8 @@ static int intel_th_remove(struct device *dev)
 	if (thdrv->attr_group)
 		sysfs_remove_group(&thdev->dev.kobj, thdrv->attr_group);
 
+	pm_runtime_get_sync(dev);
+
 	thdrv->remove(thdev);
 
 	if (intel_th_output_assigned(thdev)) {
@@ -110,9 +123,14 @@ static int intel_th_remove(struct device *dev)
 			to_intel_th_driver(dev->parent->driver);
 
 		if (hub->dev.driver)
+			/* does not talk to hardware */
 			hubdrv->unassign(hub, thdev);
 	}
 
+	pm_runtime_disable(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
 	return 0;
 }
 
@@ -185,6 +203,7 @@ static int intel_th_output_activate(struct intel_th_device *thdev)
 {
 	struct intel_th_driver *thdrv =
 		to_intel_th_driver_or_null(thdev->dev.driver);
+	int ret = 0;
 
 	if (!thdrv)
 		return -ENODEV;
@@ -192,12 +211,17 @@ static int intel_th_output_activate(struct intel_th_device *thdev)
 	if (!try_module_get(thdrv->driver.owner))
 		return -ENODEV;
 
+	pm_runtime_get_sync(&thdev->dev);
+
 	if (thdrv->activate)
-		return thdrv->activate(thdev);
+		ret = thdrv->activate(thdev);
+	else
+		intel_th_trace_enable(thdev);
 
-	intel_th_trace_enable(thdev);
+	if (ret)
+		pm_runtime_put(&thdev->dev);
 
-	return 0;
+	return ret;
 }
 
 static void intel_th_output_deactivate(struct intel_th_device *thdev)
@@ -213,6 +237,7 @@ static void intel_th_output_deactivate(struct intel_th_device *thdev)
 	else
 		intel_th_trace_disable(thdev);
 
+	pm_runtime_put(&thdev->dev);
 	module_put(thdrv->driver.owner);
 }
 
@@ -465,6 +490,38 @@ static struct intel_th_subdevice {
 	},
 };
 
+#ifdef CONFIG_MODULES
+static void __intel_th_request_hub_module(struct work_struct *work)
+{
+	struct intel_th *th = container_of(work, struct intel_th,
+					   request_module_work);
+
+	request_module("intel_th_%s", th->hub->name);
+}
+
+static int intel_th_request_hub_module(struct intel_th *th)
+{
+	INIT_WORK(&th->request_module_work, __intel_th_request_hub_module);
+	schedule_work(&th->request_module_work);
+
+	return 0;
+}
+
+static void intel_th_request_hub_module_flush(struct intel_th *th)
+{
+	flush_work(&th->request_module_work);
+}
+#else
+static inline int intel_th_request_hub_module(struct intel_th *th)
+{
+	return -EINVAL;
+}
+
+static inline void intel_th_request_hub_module_flush(struct intel_th *th)
+{
+}
+#endif /* CONFIG_MODULES */
+
 static int intel_th_populate(struct intel_th *th, struct resource *devres,
 			     unsigned int ndevres, int irq)
 {
@@ -535,7 +592,7 @@ static int intel_th_populate(struct intel_th *th, struct resource *devres,
 		/* need switch driver to be loaded to enumerate the rest */
 		if (subdev->type == INTEL_TH_SWITCH && !req) {
 			th->hub = thdev;
-			err = request_module("intel_th_%s", subdev->name);
+			err = intel_th_request_hub_module(th);
 			if (!err)
 				req++;
 		}
@@ -628,6 +685,10 @@ intel_th_alloc(struct device *dev, struct resource *devres,
 
 	dev_set_drvdata(dev, th);
 
+	pm_runtime_no_callbacks(dev);
+	pm_runtime_put(dev);
+	pm_runtime_allow(dev);
+
 	err = intel_th_populate(th, devres, ndevres, irq);
 	if (err)
 		goto err_chrdev;
@@ -635,6 +696,8 @@ intel_th_alloc(struct device *dev, struct resource *devres,
 	return th;
 
 err_chrdev:
+	pm_runtime_forbid(dev);
+
 	__unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS,
 			    "intel_th/output");
 
@@ -652,12 +715,16 @@ void intel_th_free(struct intel_th *th)
 {
 	int i;
 
+	intel_th_request_hub_module_flush(th);
 	for (i = 0; i < TH_SUBDEVICE_MAX; i++)
 		if (th->thdev[i] != th->hub)
 			intel_th_device_remove(th->thdev[i]);
 
 	intel_th_device_remove(th->hub);
 
+	pm_runtime_get_sync(th->dev);
+	pm_runtime_forbid(th->dev);
+
 	__unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS,
 			    "intel_th/output");
 
@@ -682,6 +749,7 @@ int intel_th_trace_enable(struct intel_th_device *thdev)
 	if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT))
 		return -EINVAL;
 
+	pm_runtime_get_sync(&thdev->dev);
 	hubdrv->enable(hub, &thdev->output);
 
 	return 0;
@@ -702,6 +770,7 @@ int intel_th_trace_disable(struct intel_th_device *thdev)
 		return -EINVAL;
 
 	hubdrv->disable(hub, &thdev->output);
+	pm_runtime_put(&thdev->dev);
 
 	return 0;
 }
diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c
index 9beea0b54231..33e09369a491 100644
--- a/drivers/hwtracing/intel_th/gth.c
+++ b/drivers/hwtracing/intel_th/gth.c
@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/bitmap.h>
+#include <linux/pm_runtime.h>
 
 #include "intel_th.h"
 #include "gth.h"
@@ -190,6 +191,11 @@ static ssize_t master_attr_store(struct device *dev,
 	if (old_port >= 0) {
 		gth->master[ma->master] = -1;
 		clear_bit(ma->master, gth->output[old_port].master);
+
+		/*
+		 * if the port is active, program this setting,
+		 * implies that runtime PM is on
+		 */
 		if (gth->output[old_port].output->active)
 			gth_master_set(gth, ma->master, -1);
 	}
@@ -204,7 +210,7 @@ static ssize_t master_attr_store(struct device *dev,
 
 		set_bit(ma->master, gth->output[port].master);
 
-		/* if the port is active, program this setting */
+		/* if the port is active, program this setting, see above */
 		if (gth->output[port].output->active)
 			gth_master_set(gth, ma->master, port);
 	}
@@ -326,11 +332,15 @@ static ssize_t output_attr_show(struct device *dev,
 	struct gth_device *gth = oa->gth;
 	size_t count;
 
+	pm_runtime_get_sync(dev);
+
 	spin_lock(&gth->gth_lock);
 	count = snprintf(buf, PAGE_SIZE, "%x\n",
 			 gth_output_parm_get(gth, oa->port, oa->parm));
 	spin_unlock(&gth->gth_lock);
 
+	pm_runtime_put(dev);
+
 	return count;
 }
 
@@ -346,10 +356,14 @@ static ssize_t output_attr_store(struct device *dev,
 	if (kstrtouint(buf, 16, &config) < 0)
 		return -EINVAL;
 
+	pm_runtime_get_sync(dev);
+
 	spin_lock(&gth->gth_lock);
 	gth_output_parm_set(gth, oa->port, oa->parm, config);
 	spin_unlock(&gth->gth_lock);
 
+	pm_runtime_put(dev);
+
 	return count;
 }
 
@@ -451,7 +465,7 @@ static int intel_th_output_attributes(struct gth_device *gth)
 }
 
 /**
- * intel_th_gth_disable() - enable tracing to an output device
+ * intel_th_gth_disable() - disable tracing to an output device
  * @thdev:	GTH device
  * @output:	output device's descriptor
  *
diff --git a/drivers/hwtracing/intel_th/intel_th.h b/drivers/hwtracing/intel_th/intel_th.h
index 0df22e30673d..4c195786bf1f 100644
--- a/drivers/hwtracing/intel_th/intel_th.h
+++ b/drivers/hwtracing/intel_th/intel_th.h
@@ -114,6 +114,9 @@ intel_th_output_assigned(struct intel_th_device *thdev)
  * @unassign:	deassociate an output type device from an output port
  * @enable:	enable tracing for a given output device
  * @disable:	disable tracing for a given output device
+ * @irq:	interrupt callback
+ * @activate:	enable tracing on the output's side
+ * @deactivate:	disable tracing on the output's side
  * @fops:	file operations for device nodes
  * @attr_group:	attributes provided by the driver
  *
@@ -205,6 +208,9 @@ struct intel_th {
 
 	int			id;
 	int			major;
+#ifdef CONFIG_MODULES
+	struct work_struct	request_module_work;
+#endif /* CONFIG_MODULES */
 #ifdef CONFIG_INTEL_TH_DEBUG
 	struct dentry		*dbg;
 #endif
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 5e25c7eb31d3..0bba3842336e 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -80,6 +80,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x1a8e),
 		.driver_data = (kernel_ulong_t)0,
 	},
+	{
+		/* Kaby Lake PCH-H */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6),
+		.driver_data = (kernel_ulong_t)0,
+	},
 	{ 0 },
 };
 
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index ff31108b066f..51f81d64ca37 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -15,6 +15,7 @@
  * as defined in MIPI STPv2 specification.
  */
 
+#include <linux/pm_runtime.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -482,14 +483,40 @@ static ssize_t stm_char_write(struct file *file, const char __user *buf,
 		return -EFAULT;
 	}
 
+	pm_runtime_get_sync(&stm->dev);
+
 	count = stm_write(stm->data, stmf->output.master, stmf->output.channel,
 			  kbuf, count);
 
+	pm_runtime_mark_last_busy(&stm->dev);
+	pm_runtime_put_autosuspend(&stm->dev);
 	kfree(kbuf);
 
 	return count;
 }
 
+static void stm_mmap_open(struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = vma->vm_file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	pm_runtime_get(&stm->dev);
+}
+
+static void stm_mmap_close(struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = vma->vm_file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	pm_runtime_mark_last_busy(&stm->dev);
+	pm_runtime_put_autosuspend(&stm->dev);
+}
+
+static const struct vm_operations_struct stm_mmap_vmops = {
+	.open	= stm_mmap_open,
+	.close	= stm_mmap_close,
+};
+
 static int stm_char_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct stm_file *stmf = file->private_data;
@@ -514,8 +541,11 @@ static int stm_char_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!phys)
 		return -EINVAL;
 
+	pm_runtime_get_sync(&stm->dev);
+
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_ops = &stm_mmap_vmops;
 	vm_iomap_memory(vma, phys, size);
 
 	return 0;
@@ -701,6 +731,17 @@ int stm_register_device(struct device *parent, struct stm_data *stm_data,
 	if (err)
 		goto err_device;
 
+	/*
+	 * Use delayed autosuspend to avoid bouncing back and forth
+	 * on recurring character device writes, with the initial
+	 * delay time of 2 seconds.
+	 */
+	pm_runtime_no_callbacks(&stm->dev);
+	pm_runtime_use_autosuspend(&stm->dev);
+	pm_runtime_set_autosuspend_delay(&stm->dev, 2000);
+	pm_runtime_set_suspended(&stm->dev);
+	pm_runtime_enable(&stm->dev);
+
 	return 0;
 
 err_device:
@@ -724,6 +765,9 @@ void stm_unregister_device(struct stm_data *stm_data)
 	struct stm_source_device *src, *iter;
 	int i, ret;
 
+	pm_runtime_dont_use_autosuspend(&stm->dev);
+	pm_runtime_disable(&stm->dev);
+
 	mutex_lock(&stm->link_mutex);
 	list_for_each_entry_safe(src, iter, &stm->link_list, link_entry) {
 		ret = __stm_source_link_drop(src, stm);
@@ -878,6 +922,8 @@ static int __stm_source_link_drop(struct stm_source_device *src,
 
 	stm_output_free(link, &src->output);
 	list_del_init(&src->link_entry);
+	pm_runtime_mark_last_busy(&link->dev);
+	pm_runtime_put_autosuspend(&link->dev);
 	/* matches stm_find_device() from stm_source_link_store() */
 	stm_put_device(link);
 	rcu_assign_pointer(src->link, NULL);
@@ -971,8 +1017,11 @@ static ssize_t stm_source_link_store(struct device *dev,
 	if (!link)
 		return -EINVAL;
 
+	pm_runtime_get(&link->dev);
+
 	err = stm_source_link_add(src, link);
 	if (err) {
+		pm_runtime_put_autosuspend(&link->dev);
 		/* matches the stm_find_device() above */
 		stm_put_device(link);
 	}
@@ -1033,6 +1082,9 @@ int stm_source_register_device(struct device *parent,
 	if (err)
 		goto err;
 
+	pm_runtime_no_callbacks(&src->dev);
+	pm_runtime_forbid(&src->dev);
+
 	err = device_add(&src->dev);
 	if (err)
 		goto err;
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index cc6439ab3f71..041050edd809 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -1268,6 +1268,8 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
 		}
 	}
 
+	idx = 0;
+
 	do {
 		if (msgs[idx].len == 0) {
 			ret = -EINVAL;
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 445398c314a3..b126dbaa47e3 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -912,7 +912,7 @@ static int tegra_i2c_probe(struct platform_device *pdev)
 	ret = tegra_i2c_init(i2c_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to initialize i2c controller");
-		goto unprepare_div_clk;
+		goto disable_div_clk;
 	}
 
 	ret = devm_request_irq(&pdev->dev, i2c_dev->irq,
diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c
index e33022e2d459..6e5fac6a5262 100644
--- a/drivers/i2c/i2c-boardinfo.c
+++ b/drivers/i2c/i2c-boardinfo.c
@@ -56,9 +56,7 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num);
  * The board info passed can safely be __initdata, but be careful of embedded
  * pointers (for platform_data, functions, etc) since that won't be copied.
  */
-int __init
-i2c_register_board_info(int busnum,
-	struct i2c_board_info const *info, unsigned len)
+int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len)
 {
 	int status;
 
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index af11b658984d..74e5aeaf84f9 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -107,12 +107,11 @@ struct acpi_i2c_lookup {
 	acpi_handle device_handle;
 };
 
-static int acpi_i2c_find_address(struct acpi_resource *ares, void *data)
+static int acpi_i2c_fill_info(struct acpi_resource *ares, void *data)
 {
 	struct acpi_i2c_lookup *lookup = data;
 	struct i2c_board_info *info = lookup->info;
 	struct acpi_resource_i2c_serialbus *sb;
-	acpi_handle adapter_handle;
 	acpi_status status;
 
 	if (info->addr || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
@@ -122,80 +121,102 @@ static int acpi_i2c_find_address(struct acpi_resource *ares, void *data)
 	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C)
 		return 1;
 
-	/*
-	 * Extract the ResourceSource and make sure that the handle matches
-	 * with the I2C adapter handle.
-	 */
 	status = acpi_get_handle(lookup->device_handle,
 				 sb->resource_source.string_ptr,
-				 &adapter_handle);
-	if (ACPI_SUCCESS(status) && adapter_handle == lookup->adapter_handle) {
-		info->addr = sb->slave_address;
-		if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-			info->flags |= I2C_CLIENT_TEN;
-	}
+				 &lookup->adapter_handle);
+	if (!ACPI_SUCCESS(status))
+		return 1;
+
+	info->addr = sb->slave_address;
+	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+		info->flags |= I2C_CLIENT_TEN;
 
 	return 1;
 }
 
-static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
-				       void *data, void **return_value)
+static int acpi_i2c_get_info(struct acpi_device *adev,
+			     struct i2c_board_info *info,
+			     acpi_handle *adapter_handle)
 {
-	struct i2c_adapter *adapter = data;
 	struct list_head resource_list;
-	struct acpi_i2c_lookup lookup;
 	struct resource_entry *entry;
-	struct i2c_board_info info;
-	struct acpi_device *adev;
+	struct acpi_i2c_lookup lookup;
 	int ret;
 
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
-		return AE_OK;
+	if (acpi_bus_get_status(adev) || !adev->status.present ||
+	    acpi_device_enumerated(adev))
+		return -EINVAL;
 
-	memset(&info, 0, sizeof(info));
-	info.fwnode = acpi_fwnode_handle(adev);
+	memset(info, 0, sizeof(*info));
+	info->fwnode = acpi_fwnode_handle(adev);
 
 	memset(&lookup, 0, sizeof(lookup));
-	lookup.adapter_handle = ACPI_HANDLE(&adapter->dev);
-	lookup.device_handle = handle;
-	lookup.info = &info;
+	lookup.device_handle = acpi_device_handle(adev);
+	lookup.info = info;
 
-	/*
-	 * Look up for I2cSerialBus resource with ResourceSource that
-	 * matches with this adapter.
-	 */
+	/* Look up for I2cSerialBus resource */
 	INIT_LIST_HEAD(&resource_list);
 	ret = acpi_dev_get_resources(adev, &resource_list,
-				     acpi_i2c_find_address, &lookup);
+				     acpi_i2c_fill_info, &lookup);
 	acpi_dev_free_resource_list(&resource_list);
 
-	if (ret < 0 || !info.addr)
-		return AE_OK;
+	if (ret < 0 || !info->addr)
+		return -EINVAL;
+
+	*adapter_handle = lookup.adapter_handle;
 
 	/* Then fill IRQ number if any */
 	ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
 	if (ret < 0)
-		return AE_OK;
+		return -EINVAL;
 
 	resource_list_for_each_entry(entry, &resource_list) {
 		if (resource_type(entry->res) == IORESOURCE_IRQ) {
-			info.irq = entry->res->start;
+			info->irq = entry->res->start;
 			break;
 		}
 	}
 
 	acpi_dev_free_resource_list(&resource_list);
 
+	strlcpy(info->type, dev_name(&adev->dev), sizeof(info->type));
+
+	return 0;
+}
+
+static void acpi_i2c_register_device(struct i2c_adapter *adapter,
+				     struct acpi_device *adev,
+				     struct i2c_board_info *info)
+{
 	adev->power.flags.ignore_parent = true;
-	strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type));
-	if (!i2c_new_device(adapter, &info)) {
+	acpi_device_set_enumerated(adev);
+
+	if (!i2c_new_device(adapter, info)) {
 		adev->power.flags.ignore_parent = false;
 		dev_err(&adapter->dev,
 			"failed to add I2C device %s from ACPI\n",
 			dev_name(&adev->dev));
 	}
+}
+
+static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
+				       void *data, void **return_value)
+{
+	struct i2c_adapter *adapter = data;
+	struct acpi_device *adev;
+	acpi_handle adapter_handle;
+	struct i2c_board_info info;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+
+	if (acpi_i2c_get_info(adev, &info, &adapter_handle))
+		return AE_OK;
+
+	if (adapter_handle != ACPI_HANDLE(&adapter->dev))
+		return AE_OK;
+
+	acpi_i2c_register_device(adapter, adev, &info);
 
 	return AE_OK;
 }
@@ -225,8 +246,80 @@ static void acpi_i2c_register_devices(struct i2c_adapter *adap)
 		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
 }
 
+static int acpi_i2c_match_adapter(struct device *dev, void *data)
+{
+	struct i2c_adapter *adapter = i2c_verify_adapter(dev);
+
+	if (!adapter)
+		return 0;
+
+	return ACPI_HANDLE(dev) == (acpi_handle)data;
+}
+
+static int acpi_i2c_match_device(struct device *dev, void *data)
+{
+	return ACPI_COMPANION(dev) == data;
+}
+
+static struct i2c_adapter *acpi_i2c_find_adapter_by_handle(acpi_handle handle)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, handle,
+			      acpi_i2c_match_adapter);
+	return dev ? i2c_verify_adapter(dev) : NULL;
+}
+
+static struct i2c_client *acpi_i2c_find_client_by_adev(struct acpi_device *adev)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, adev, acpi_i2c_match_device);
+	return dev ? i2c_verify_client(dev) : NULL;
+}
+
+static int acpi_i2c_notify(struct notifier_block *nb, unsigned long value,
+			   void *arg)
+{
+	struct acpi_device *adev = arg;
+	struct i2c_board_info info;
+	acpi_handle adapter_handle;
+	struct i2c_adapter *adapter;
+	struct i2c_client *client;
+
+	switch (value) {
+	case ACPI_RECONFIG_DEVICE_ADD:
+		if (acpi_i2c_get_info(adev, &info, &adapter_handle))
+			break;
+
+		adapter = acpi_i2c_find_adapter_by_handle(adapter_handle);
+		if (!adapter)
+			break;
+
+		acpi_i2c_register_device(adapter, adev, &info);
+		break;
+	case ACPI_RECONFIG_DEVICE_REMOVE:
+		if (!acpi_device_enumerated(adev))
+			break;
+
+		client = acpi_i2c_find_client_by_adev(adev);
+		if (!client)
+			break;
+
+		i2c_unregister_device(client);
+		put_device(&client->dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block i2c_acpi_notifier = {
+	.notifier_call = acpi_i2c_notify,
+};
 #else /* CONFIG_ACPI */
 static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { }
+extern struct notifier_block i2c_acpi_notifier;
 #endif /* CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI_I2C_OPREGION
@@ -1089,6 +1182,8 @@ void i2c_unregister_device(struct i2c_client *client)
 {
 	if (client->dev.of_node)
 		of_node_clear_flag(client->dev.of_node, OF_POPULATED);
+	if (ACPI_COMPANION(&client->dev))
+		acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev));
 	device_unregister(&client->dev);
 }
 EXPORT_SYMBOL_GPL(i2c_unregister_device);
@@ -2117,6 +2212,8 @@ static int __init i2c_init(void)
 
 	if (IS_ENABLED(CONFIG_OF_DYNAMIC))
 		WARN_ON(of_reconfig_notifier_register(&i2c_of_notifier));
+	if (IS_ENABLED(CONFIG_ACPI))
+		WARN_ON(acpi_reconfig_notifier_register(&i2c_acpi_notifier));
 
 	return 0;
 
@@ -2132,6 +2229,8 @@ bus_err:
 
 static void __exit i2c_exit(void)
 {
+	if (IS_ENABLED(CONFIG_ACPI))
+		WARN_ON(acpi_reconfig_notifier_unregister(&i2c_acpi_notifier));
 	if (IS_ENABLED(CONFIG_OF_DYNAMIC))
 		WARN_ON(of_reconfig_notifier_unregister(&i2c_of_notifier));
 	i2c_del_driver(&dummy_driver);
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 26e7c5187a58..c6a90b4a9c62 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -145,7 +145,7 @@ static int i2c_mux_reg_probe_dt(struct regmux *mux,
 		mux->data.idle_in_use = true;
 
 	/* map address from "reg" if exists */
-	if (of_address_to_resource(np, 0, &res)) {
+	if (of_address_to_resource(np, 0, &res) == 0) {
 		mux->data.reg_size = resource_size(&res);
 		mux->data.reg = devm_ioremap_resource(&pdev->dev, &res);
 		if (IS_ERR(mux->data.reg))
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index ef907fd5ba98..bf9a2ad296ed 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1770,7 +1770,6 @@ static int ide_cd_probe(ide_drive_t *drive)
 	drive->driver_data = info;
 
 	g->minors = 1;
-	g->driverfs_dev = &drive->gendev;
 	g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE;
 	if (ide_cdrom_setup(drive)) {
 		put_device(&info->dev);
@@ -1780,7 +1779,7 @@ static int ide_cd_probe(ide_drive_t *drive)
 	ide_cd_read_toc(drive, &sense);
 	g->fops = &idecd_ops;
 	g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
-	add_disk(g);
+	device_add_disk(&drive->gendev, g);
 	return 0;
 
 out_free_disk:
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 474173eb31bb..5887a7a09e37 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -459,9 +459,6 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
 	   layer. the packet must be complete, as we do not
 	   touch it at all. */
 
-	if (cgc->data_direction == CGC_DATA_WRITE)
-		flags |= REQ_WRITE;
-
 	if (cgc->sense)
 		memset(cgc->sense, 0, sizeof(struct request_sense));
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 5ceb176dc148..83679da0c3f0 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -431,7 +431,7 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
 	ide_drive_t *drive = q->queuedata;
 	struct ide_cmd *cmd;
 
-	if (!(rq->cmd_flags & REQ_FLUSH))
+	if (req_op(rq) != REQ_OP_FLUSH)
 		return BLKPREP_OK;
 
 	if (rq->special) {
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 2fb5350c5410..f079d8d1d856 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -206,7 +206,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 	memcpy(rq->cmd, pc->c, 12);
 
 	pc->rq = rq;
-	if (rq->cmd_flags & REQ_WRITE)
+	if (cmd == WRITE)
 		pc->flags |= PC_FLAG_WRITING;
 
 	pc->flags |= PC_FLAG_DMA_OK;
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 838996a0039e..e823394ed543 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -412,12 +412,11 @@ static int ide_gd_probe(ide_drive_t *drive)
 	set_capacity(g, ide_gd_capacity(drive));
 
 	g->minors = IDE_DISK_MINORS;
-	g->driverfs_dev = &drive->gendev;
 	g->flags |= GENHD_FL_EXT_DEVT;
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
 		g->flags = GENHD_FL_REMOVABLE;
 	g->fops = &ide_gd_ops;
-	add_disk(g);
+	device_add_disk(&drive->gendev, g);
 	return 0;
 
 out_free_disk:
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index c96649292b55..9b2ef248788d 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -46,8 +46,6 @@
  * to avoid complications with the lapic timer workaround.
  * Have not seen issues with suspend, but may need same workaround here.
  *
- * There is currently no kernel-based automatic probing/loading mechanism
- * if the driver is built as a module.
  */
 
 /* un-comment DEBUG to enable pr_debug() statements */
@@ -60,8 +58,9 @@
 #include <linux/sched.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include <asm/mwait.h>
 #include <asm/msr.h>
 
@@ -827,6 +826,35 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = NULL }
 };
 
+static struct cpuidle_state dnv_cstates[] = {
+	{
+		.name = "C1-DNV",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 2,
+		.target_residency = 2,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C1E-DNV",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01),
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C6-DNV",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 50,
+		.target_residency = 500,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.enter = NULL }
+};
+
 /**
  * intel_idle
  * @dev: cpuidle_device
@@ -1016,45 +1044,50 @@ static const struct idle_cpu idle_cpu_bxt = {
 	.disable_promotion_to_c1e = true,
 };
 
+static const struct idle_cpu idle_cpu_dnv = {
+	.state_table = dnv_cstates,
+	.disable_promotion_to_c1e = true,
+};
+
 #define ICPU(model, cpu) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
-	ICPU(0x1a, idle_cpu_nehalem),
-	ICPU(0x1e, idle_cpu_nehalem),
-	ICPU(0x1f, idle_cpu_nehalem),
-	ICPU(0x25, idle_cpu_nehalem),
-	ICPU(0x2c, idle_cpu_nehalem),
-	ICPU(0x2e, idle_cpu_nehalem),
-	ICPU(0x1c, idle_cpu_atom),
-	ICPU(0x26, idle_cpu_lincroft),
-	ICPU(0x2f, idle_cpu_nehalem),
-	ICPU(0x2a, idle_cpu_snb),
-	ICPU(0x2d, idle_cpu_snb),
-	ICPU(0x36, idle_cpu_atom),
-	ICPU(0x37, idle_cpu_byt),
-	ICPU(0x4c, idle_cpu_cht),
-	ICPU(0x3a, idle_cpu_ivb),
-	ICPU(0x3e, idle_cpu_ivt),
-	ICPU(0x3c, idle_cpu_hsw),
-	ICPU(0x3f, idle_cpu_hsw),
-	ICPU(0x45, idle_cpu_hsw),
-	ICPU(0x46, idle_cpu_hsw),
-	ICPU(0x4d, idle_cpu_avn),
-	ICPU(0x3d, idle_cpu_bdw),
-	ICPU(0x47, idle_cpu_bdw),
-	ICPU(0x4f, idle_cpu_bdw),
-	ICPU(0x56, idle_cpu_bdw),
-	ICPU(0x4e, idle_cpu_skl),
-	ICPU(0x5e, idle_cpu_skl),
-	ICPU(0x8e, idle_cpu_skl),
-	ICPU(0x9e, idle_cpu_skl),
-	ICPU(0x55, idle_cpu_skx),
-	ICPU(0x57, idle_cpu_knl),
-	ICPU(0x5c, idle_cpu_bxt),
+	ICPU(INTEL_FAM6_NEHALEM_EP,		idle_cpu_nehalem),
+	ICPU(INTEL_FAM6_NEHALEM,		idle_cpu_nehalem),
+	ICPU(INTEL_FAM6_WESTMERE2,		idle_cpu_nehalem),
+	ICPU(INTEL_FAM6_WESTMERE,		idle_cpu_nehalem),
+	ICPU(INTEL_FAM6_WESTMERE_EP,		idle_cpu_nehalem),
+	ICPU(INTEL_FAM6_NEHALEM_EX,		idle_cpu_nehalem),
+	ICPU(INTEL_FAM6_ATOM_PINEVIEW,		idle_cpu_atom),
+	ICPU(INTEL_FAM6_ATOM_LINCROFT,		idle_cpu_lincroft),
+	ICPU(INTEL_FAM6_WESTMERE_EX,		idle_cpu_nehalem),
+	ICPU(INTEL_FAM6_SANDYBRIDGE,		idle_cpu_snb),
+	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		idle_cpu_snb),
+	ICPU(INTEL_FAM6_ATOM_CEDARVIEW,		idle_cpu_atom),
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	idle_cpu_byt),
+	ICPU(INTEL_FAM6_ATOM_AIRMONT,		idle_cpu_cht),
+	ICPU(INTEL_FAM6_IVYBRIDGE,		idle_cpu_ivb),
+	ICPU(INTEL_FAM6_IVYBRIDGE_X,		idle_cpu_ivt),
+	ICPU(INTEL_FAM6_HASWELL_CORE,		idle_cpu_hsw),
+	ICPU(INTEL_FAM6_HASWELL_X,		idle_cpu_hsw),
+	ICPU(INTEL_FAM6_HASWELL_ULT,		idle_cpu_hsw),
+	ICPU(INTEL_FAM6_HASWELL_GT3E,		idle_cpu_hsw),
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT2,	idle_cpu_avn),
+	ICPU(INTEL_FAM6_BROADWELL_CORE,		idle_cpu_bdw),
+	ICPU(INTEL_FAM6_BROADWELL_GT3E,		idle_cpu_bdw),
+	ICPU(INTEL_FAM6_BROADWELL_X,		idle_cpu_bdw),
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	idle_cpu_bdw),
+	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		idle_cpu_skl),
+	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	idle_cpu_skl),
+	ICPU(INTEL_FAM6_KABYLAKE_MOBILE,	idle_cpu_skl),
+	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP,	idle_cpu_skl),
+	ICPU(INTEL_FAM6_SKYLAKE_X,		idle_cpu_skx),
+	ICPU(INTEL_FAM6_XEON_PHI_KNL,		idle_cpu_knl),
+	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		idle_cpu_bxt),
+	ICPU(INTEL_FAM6_ATOM_DENVERTON,		idle_cpu_dnv),
 	{}
 };
-MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
 
 /*
  * intel_idle_probe()
@@ -1154,7 +1187,10 @@ static unsigned long long irtl_2_usec(unsigned long long irtl)
 {
 	unsigned long long ns;
 
-	ns = irtl_ns_units[(irtl >> 10) & 0x3];
+	if (!irtl)
+		return 0;
+
+	ns = irtl_ns_units[(irtl >> 10) & 0x7];
 
 	return div64_u64((irtl & 0x3FF) * ns, 1000);
 }
@@ -1167,43 +1203,39 @@ static unsigned long long irtl_2_usec(unsigned long long irtl)
 static void bxt_idle_state_table_update(void)
 {
 	unsigned long long msr;
+	unsigned int usec;
 
 	rdmsrl(MSR_PKGC6_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[2].exit_latency = usec;
 		bxt_cstates[2].target_residency = usec;
 	}
 
 	rdmsrl(MSR_PKGC7_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[3].exit_latency = usec;
 		bxt_cstates[3].target_residency = usec;
 	}
 
 	rdmsrl(MSR_PKGC8_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[4].exit_latency = usec;
 		bxt_cstates[4].target_residency = usec;
 	}
 
 	rdmsrl(MSR_PKGC9_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[5].exit_latency = usec;
 		bxt_cstates[5].target_residency = usec;
 	}
 
 	rdmsrl(MSR_PKGC10_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[6].exit_latency = usec;
 		bxt_cstates[6].target_residency = usec;
 	}
@@ -1261,13 +1293,13 @@ static void intel_idle_state_table_update(void)
 {
 	switch (boot_cpu_data.x86_model) {
 
-	case 0x3e: /* IVT */
+	case INTEL_FAM6_IVYBRIDGE_X:
 		ivt_idle_state_table_update();
 		break;
-	case 0x5c: /* BXT */
+	case INTEL_FAM6_ATOM_GOLDMONT:
 		bxt_idle_state_table_update();
 		break;
-	case 0x5e: /* SKL-H */
+	case INTEL_FAM6_SKYLAKE_DESKTOP:
 		sklh_idle_state_table_update();
 		break;
 	}
@@ -1415,34 +1447,12 @@ static int __init intel_idle_init(void)
 
 	return 0;
 }
+device_initcall(intel_idle_init);
 
-static void __exit intel_idle_exit(void)
-{
-	struct cpuidle_device *dev;
-	int i;
-
-	cpu_notifier_register_begin();
-
-	if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
-		on_each_cpu(__setup_broadcast_timer, (void *)false, 1);
-	__unregister_cpu_notifier(&cpu_hotplug_notifier);
-
-	for_each_possible_cpu(i) {
-		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
-		cpuidle_unregister_device(dev);
-	}
-
-	cpu_notifier_register_done();
-
-	cpuidle_unregister_driver(&intel_idle_driver);
-	free_percpu(intel_idle_cpuidle_devices);
-}
-
-module_init(intel_idle_init);
-module_exit(intel_idle_exit);
-
+/*
+ * We are not really modular, but we used to support that.  Meaning we also
+ * support "intel_idle.max_cstate=..." at boot and also a read-only export of
+ * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
+ * is the easiest way (currently) to continue doing that.
+ */
 module_param(max_cstate, int, 0444);
-
-MODULE_AUTHOR("Len Brown <len.brown@intel.com>");
-MODULE_DESCRIPTION("Cpuidle driver for Intel Hardware v" INTEL_IDLE_VERSION);
-MODULE_LICENSE("GPL");
diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
index 505e921f0b19..6743b18194fb 100644
--- a/drivers/iio/Kconfig
+++ b/drivers/iio/Kconfig
@@ -46,6 +46,14 @@ config IIO_CONSUMERS_PER_TRIGGER
 	This value controls the maximum number of consumers that a
 	given trigger may handle. Default is 2.
 
+config IIO_SW_DEVICE
+	tristate "Enable software IIO device support"
+	select IIO_CONFIGFS
+	help
+	 Provides IIO core support for software devices. A software
+	 device can be created via configfs or directly by a driver
+	 using the API provided.
+
 config IIO_SW_TRIGGER
 	tristate "Enable software triggers support"
 	select IIO_CONFIGFS
diff --git a/drivers/iio/Makefile b/drivers/iio/Makefile
index 20f649073462..87e4c4369e2f 100644
--- a/drivers/iio/Makefile
+++ b/drivers/iio/Makefile
@@ -8,6 +8,7 @@ industrialio-$(CONFIG_IIO_BUFFER) += industrialio-buffer.o
 industrialio-$(CONFIG_IIO_TRIGGER) += industrialio-trigger.o
 
 obj-$(CONFIG_IIO_CONFIGFS) += industrialio-configfs.o
+obj-$(CONFIG_IIO_SW_DEVICE) += industrialio-sw-device.o
 obj-$(CONFIG_IIO_SW_TRIGGER) += industrialio-sw-trigger.o
 obj-$(CONFIG_IIO_TRIGGERED_EVENT) += industrialio-triggered-event.o
 
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index e4a758cd7d35..89d78208de3f 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -17,6 +17,16 @@ config BMA180
 	  To compile this driver as a module, choose M here: the
 	  module will be called bma180.
 
+config BMA220
+    tristate "Bosch BMA220 3-Axis Accelerometer Driver"
+	depends on SPI
+    help
+      Say yes here to add support for the Bosch BMA220 triaxial
+      acceleration sensor.
+
+      To compile this driver as a module, choose M here: the
+      module will be called bma220_spi.
+
 config BMC150_ACCEL
 	tristate "Bosch BMC150 Accelerometer Driver"
 	select IIO_BUFFER
@@ -136,13 +146,23 @@ config MMA7455_SPI
 	  To compile this driver as a module, choose M here: the module
 	  will be called mma7455_spi.
 
+config MMA7660
+	tristate "Freescale MMA7660FC 3-Axis Accelerometer Driver"
+	depends on I2C
+	help
+	  Say yes here to get support for the Freescale MMA7660FC 3-Axis
+	  accelerometer.
+
+	  Choosing M will build the driver as a module. If so, the module
+	  will be called mma7660.
+
 config MMA8452
-	tristate "Freescale MMA8452Q and similar Accelerometers Driver"
+	tristate "Freescale / NXP MMA8452Q and similar Accelerometers Driver"
 	depends on I2C
 	select IIO_BUFFER
 	select IIO_TRIGGERED_BUFFER
 	help
-	  Say yes here to build support for the following Freescale 3-axis
+	  Say yes here to build support for the following Freescale / NXP 3-axis
 	  accelerometers: MMA8451Q, MMA8452Q, MMA8453Q, MMA8652FC, MMA8653FC,
 	  FXLS8471Q.
 
diff --git a/drivers/iio/accel/Makefile b/drivers/iio/accel/Makefile
index 71b6794de885..6cedbecca2ee 100644
--- a/drivers/iio/accel/Makefile
+++ b/drivers/iio/accel/Makefile
@@ -4,6 +4,7 @@
 
 # When adding new entries keep the list in alphabetical order
 obj-$(CONFIG_BMA180) += bma180.o
+obj-$(CONFIG_BMA220) += bma220_spi.o
 obj-$(CONFIG_BMC150_ACCEL) += bmc150-accel-core.o
 obj-$(CONFIG_BMC150_ACCEL_I2C) += bmc150-accel-i2c.o
 obj-$(CONFIG_BMC150_ACCEL_SPI) += bmc150-accel-spi.o
@@ -15,6 +16,8 @@ obj-$(CONFIG_MMA7455)		+= mma7455_core.o
 obj-$(CONFIG_MMA7455_I2C)	+= mma7455_i2c.o
 obj-$(CONFIG_MMA7455_SPI)	+= mma7455_spi.o
 
+obj-$(CONFIG_MMA7660)	+= mma7660.o
+
 obj-$(CONFIG_MMA8452)	+= mma8452.o
 
 obj-$(CONFIG_MMA9551_CORE)	+= mma9551_core.o
diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c
index f04b88406995..e3f88ba5faf3 100644
--- a/drivers/iio/accel/bma180.c
+++ b/drivers/iio/accel/bma180.c
@@ -654,7 +654,7 @@ static irqreturn_t bma180_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
 	struct bma180_data *data = iio_priv(indio_dev);
-	int64_t time_ns = iio_get_time_ns();
+	s64 time_ns = iio_get_time_ns(indio_dev);
 	int bit, ret, i = 0;
 
 	mutex_lock(&data->mutex);
diff --git a/drivers/iio/accel/bma220_spi.c b/drivers/iio/accel/bma220_spi.c
new file mode 100644
index 000000000000..1098d10df8e8
--- /dev/null
+++ b/drivers/iio/accel/bma220_spi.c
@@ -0,0 +1,338 @@
+/**
+ * BMA220 Digital triaxial acceleration sensor driver
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License. See the file COPYING in the main
+ * directory of this archive for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/spi/spi.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+
+#define BMA220_REG_ID				0x00
+#define BMA220_REG_ACCEL_X			0x02
+#define BMA220_REG_ACCEL_Y			0x03
+#define BMA220_REG_ACCEL_Z			0x04
+#define BMA220_REG_RANGE			0x11
+#define BMA220_REG_SUSPEND			0x18
+
+#define BMA220_CHIP_ID				0xDD
+#define BMA220_READ_MASK			0x80
+#define BMA220_RANGE_MASK			0x03
+#define BMA220_DATA_SHIFT			2
+#define BMA220_SUSPEND_SLEEP			0xFF
+#define BMA220_SUSPEND_WAKE			0x00
+
+#define BMA220_DEVICE_NAME			"bma220"
+#define BMA220_SCALE_AVAILABLE			"0.623 1.248 2.491 4.983"
+
+#define BMA220_ACCEL_CHANNEL(index, reg, axis) {			\
+	.type = IIO_ACCEL,						\
+	.address = reg,							\
+	.modified = 1,							\
+	.channel2 = IIO_MOD_##axis,					\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),			\
+	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),		\
+	.scan_index = index,						\
+	.scan_type = {							\
+		.sign = 's',						\
+		.realbits = 6,						\
+		.storagebits = 8,					\
+		.shift = BMA220_DATA_SHIFT,				\
+		.endianness = IIO_CPU,					\
+	},								\
+}
+
+enum bma220_axis {
+	AXIS_X,
+	AXIS_Y,
+	AXIS_Z,
+};
+
+static IIO_CONST_ATTR(in_accel_scale_available, BMA220_SCALE_AVAILABLE);
+
+static struct attribute *bma220_attributes[] = {
+	&iio_const_attr_in_accel_scale_available.dev_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group bma220_attribute_group = {
+	.attrs = bma220_attributes,
+};
+
+static const int bma220_scale_table[][4] = {
+	{0, 623000}, {1, 248000}, {2, 491000}, {4, 983000}
+};
+
+struct bma220_data {
+	struct spi_device *spi_device;
+	struct mutex lock;
+	s8 buffer[16]; /* 3x8-bit channels + 5x8 padding + 8x8 timestamp */
+	u8 tx_buf[2] ____cacheline_aligned;
+};
+
+static const struct iio_chan_spec bma220_channels[] = {
+	BMA220_ACCEL_CHANNEL(0, BMA220_REG_ACCEL_X, X),
+	BMA220_ACCEL_CHANNEL(1, BMA220_REG_ACCEL_Y, Y),
+	BMA220_ACCEL_CHANNEL(2, BMA220_REG_ACCEL_Z, Z),
+	IIO_CHAN_SOFT_TIMESTAMP(3),
+};
+
+static inline int bma220_read_reg(struct spi_device *spi, u8 reg)
+{
+	return spi_w8r8(spi, reg | BMA220_READ_MASK);
+}
+
+static const unsigned long bma220_accel_scan_masks[] = {
+	BIT(AXIS_X) | BIT(AXIS_Y) | BIT(AXIS_Z),
+	0
+};
+
+static irqreturn_t bma220_trigger_handler(int irq, void *p)
+{
+	int ret;
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct bma220_data *data = iio_priv(indio_dev);
+	struct spi_device *spi = data->spi_device;
+
+	mutex_lock(&data->lock);
+	data->tx_buf[0] = BMA220_REG_ACCEL_X | BMA220_READ_MASK;
+	ret = spi_write_then_read(spi, data->tx_buf, 1, data->buffer,
+				  ARRAY_SIZE(bma220_channels) - 1);
+	if (ret < 0)
+		goto err;
+
+	iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
+					   pf->timestamp);
+err:
+	mutex_unlock(&data->lock);
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+
+static int bma220_read_raw(struct iio_dev *indio_dev,
+			   struct iio_chan_spec const *chan,
+			   int *val, int *val2, long mask)
+{
+	int ret;
+	u8 range_idx;
+	struct bma220_data *data = iio_priv(indio_dev);
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		ret = bma220_read_reg(data->spi_device, chan->address);
+		if (ret < 0)
+			return -EINVAL;
+		*val = sign_extend32(ret >> BMA220_DATA_SHIFT, 5);
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		ret = bma220_read_reg(data->spi_device, BMA220_REG_RANGE);
+		if (ret < 0)
+			return ret;
+		range_idx = ret & BMA220_RANGE_MASK;
+		*val = bma220_scale_table[range_idx][0];
+		*val2 = bma220_scale_table[range_idx][1];
+		return IIO_VAL_INT_PLUS_MICRO;
+	}
+
+	return -EINVAL;
+}
+
+static int bma220_write_raw(struct iio_dev *indio_dev,
+			    struct iio_chan_spec const *chan,
+			    int val, int val2, long mask)
+{
+	int i;
+	int ret;
+	int index = -1;
+	struct bma220_data *data = iio_priv(indio_dev);
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SCALE:
+		for (i = 0; i < ARRAY_SIZE(bma220_scale_table); i++)
+			if (val == bma220_scale_table[i][0] &&
+			    val2 == bma220_scale_table[i][1]) {
+				index = i;
+				break;
+			}
+		if (index < 0)
+			return -EINVAL;
+
+		mutex_lock(&data->lock);
+		data->tx_buf[0] = BMA220_REG_RANGE;
+		data->tx_buf[1] = index;
+		ret = spi_write(data->spi_device, data->tx_buf,
+				sizeof(data->tx_buf));
+		if (ret < 0)
+			dev_err(&data->spi_device->dev,
+				"failed to set measurement range\n");
+		mutex_unlock(&data->lock);
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static const struct iio_info bma220_info = {
+	.driver_module		= THIS_MODULE,
+	.read_raw		= bma220_read_raw,
+	.write_raw		= bma220_write_raw,
+	.attrs			= &bma220_attribute_group,
+};
+
+static int bma220_init(struct spi_device *spi)
+{
+	int ret;
+
+	ret = bma220_read_reg(spi, BMA220_REG_ID);
+	if (ret != BMA220_CHIP_ID)
+		return -ENODEV;
+
+	/* Make sure the chip is powered on */
+	ret = bma220_read_reg(spi, BMA220_REG_SUSPEND);
+	if (ret < 0)
+		return ret;
+	else if (ret == BMA220_SUSPEND_WAKE)
+		return bma220_read_reg(spi, BMA220_REG_SUSPEND);
+
+	return 0;
+}
+
+static int bma220_deinit(struct spi_device *spi)
+{
+	int ret;
+
+	/* Make sure the chip is powered off */
+	ret = bma220_read_reg(spi, BMA220_REG_SUSPEND);
+	if (ret < 0)
+		return ret;
+	else if (ret == BMA220_SUSPEND_SLEEP)
+		return bma220_read_reg(spi, BMA220_REG_SUSPEND);
+
+	return 0;
+}
+
+static int bma220_probe(struct spi_device *spi)
+{
+	int ret;
+	struct iio_dev *indio_dev;
+	struct bma220_data *data;
+
+	indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*data));
+	if (!indio_dev) {
+		dev_err(&spi->dev, "iio allocation failed!\n");
+		return -ENOMEM;
+	}
+
+	data = iio_priv(indio_dev);
+	data->spi_device = spi;
+	spi_set_drvdata(spi, indio_dev);
+	mutex_init(&data->lock);
+
+	indio_dev->dev.parent = &spi->dev;
+	indio_dev->info = &bma220_info;
+	indio_dev->name = BMA220_DEVICE_NAME;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = bma220_channels;
+	indio_dev->num_channels = ARRAY_SIZE(bma220_channels);
+	indio_dev->available_scan_masks = bma220_accel_scan_masks;
+
+	ret = bma220_init(data->spi_device);
+	if (ret < 0)
+		return ret;
+
+	ret = iio_triggered_buffer_setup(indio_dev, NULL,
+					 bma220_trigger_handler, NULL);
+	if (ret < 0) {
+		dev_err(&spi->dev, "iio triggered buffer setup failed\n");
+		goto err_suspend;
+	}
+
+	ret = iio_device_register(indio_dev);
+	if (ret < 0) {
+		dev_err(&spi->dev, "iio_device_register failed\n");
+		iio_triggered_buffer_cleanup(indio_dev);
+		goto err_suspend;
+	}
+
+	return 0;
+
+err_suspend:
+	return bma220_deinit(spi);
+}
+
+static int bma220_remove(struct spi_device *spi)
+{
+	struct iio_dev *indio_dev = spi_get_drvdata(spi);
+
+	iio_device_unregister(indio_dev);
+	iio_triggered_buffer_cleanup(indio_dev);
+
+	return bma220_deinit(spi);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int bma220_suspend(struct device *dev)
+{
+	struct bma220_data *data =
+			iio_priv(spi_get_drvdata(to_spi_device(dev)));
+
+	/* The chip can be suspended/woken up by a simple register read. */
+	return bma220_read_reg(data->spi_device, BMA220_REG_SUSPEND);
+}
+
+static int bma220_resume(struct device *dev)
+{
+	struct bma220_data *data =
+			iio_priv(spi_get_drvdata(to_spi_device(dev)));
+
+	return bma220_read_reg(data->spi_device, BMA220_REG_SUSPEND);
+}
+
+static SIMPLE_DEV_PM_OPS(bma220_pm_ops, bma220_suspend, bma220_resume);
+
+#define BMA220_PM_OPS (&bma220_pm_ops)
+#else
+#define BMA220_PM_OPS NULL
+#endif
+
+static const struct spi_device_id bma220_spi_id[] = {
+	{"bma220", 0},
+	{}
+};
+
+static const struct acpi_device_id bma220_acpi_id[] = {
+	{"BMA0220", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(spi, bma220_spi_id);
+
+static struct spi_driver bma220_driver = {
+	.driver = {
+		.name = "bma220_spi",
+		.pm = BMA220_PM_OPS,
+		.acpi_match_table = ACPI_PTR(bma220_acpi_id),
+	},
+	.probe =            bma220_probe,
+	.remove =           bma220_remove,
+	.id_table =         bma220_spi_id,
+};
+
+module_spi_driver(bma220_driver);
+
+MODULE_AUTHOR("Tiberiu Breana <tiberiu.a.breana@intel.com>");
+MODULE_DESCRIPTION("BMA220 acceleration sensor driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index 197e693e7e7b..bf17aae66145 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -901,7 +901,7 @@ static int __bmc150_accel_fifo_flush(struct iio_dev *indio_dev,
 	 */
 	if (!irq) {
 		data->old_timestamp = data->timestamp;
-		data->timestamp = iio_get_time_ns();
+		data->timestamp = iio_get_time_ns(indio_dev);
 	}
 
 	/*
@@ -1303,7 +1303,7 @@ static irqreturn_t bmc150_accel_irq_handler(int irq, void *private)
 	int i;
 
 	data->old_timestamp = data->timestamp;
-	data->timestamp = iio_get_time_ns();
+	data->timestamp = iio_get_time_ns(indio_dev);
 
 	for (i = 0; i < BMC150_ACCEL_TRIGGERS; i++) {
 		if (data->triggers[i].enabled) {
diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
index bfe219a8bea2..765a72362dc6 100644
--- a/drivers/iio/accel/kxcjk-1013.c
+++ b/drivers/iio/accel/kxcjk-1013.c
@@ -1129,7 +1129,7 @@ static irqreturn_t kxcjk1013_data_rdy_trig_poll(int irq, void *private)
 	struct iio_dev *indio_dev = private;
 	struct kxcjk1013_data *data = iio_priv(indio_dev);
 
-	data->timestamp = iio_get_time_ns();
+	data->timestamp = iio_get_time_ns(indio_dev);
 
 	if (data->dready_trigger_on)
 		iio_trigger_poll(data->dready_trig);
diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c
index 923f56598d4b..3a9f106787d2 100644
--- a/drivers/iio/accel/kxsd9.c
+++ b/drivers/iio/accel/kxsd9.c
@@ -81,7 +81,7 @@ static int kxsd9_write_scale(struct iio_dev *indio_dev, int micro)
 
 	mutex_lock(&st->buf_lock);
 	ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
-	if (ret)
+	if (ret < 0)
 		goto error_ret;
 	st->tx[0] = KXSD9_WRITE(KXSD9_REG_CTRL_C);
 	st->tx[1] = (ret & ~KXSD9_FS_MASK) | i;
@@ -163,7 +163,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev,
 		break;
 	case IIO_CHAN_INFO_SCALE:
 		ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
-		if (ret)
+		if (ret < 0)
 			goto error_ret;
 		*val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK];
 		ret = IIO_VAL_INT_PLUS_MICRO;
diff --git a/drivers/iio/accel/mma7455_core.c b/drivers/iio/accel/mma7455_core.c
index c902f54c23f5..6551085bedd7 100644
--- a/drivers/iio/accel/mma7455_core.c
+++ b/drivers/iio/accel/mma7455_core.c
@@ -97,7 +97,8 @@ static irqreturn_t mma7455_trigger_handler(int irq, void *p)
 	if (ret)
 		goto done;
 
-	iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buf,
+					   iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/accel/mma7660.c b/drivers/iio/accel/mma7660.c
new file mode 100644
index 000000000000..0acdee516973
--- /dev/null
+++ b/drivers/iio/accel/mma7660.c
@@ -0,0 +1,277 @@
+/**
+ * Freescale MMA7660FC 3-Axis Accelerometer
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License. See the file COPYING in the main
+ * directory of this archive for more details.
+ *
+ * IIO driver for Freescale MMA7660FC; 7-bit I2C address: 0x4c.
+ */
+
+#include <linux/acpi.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+
+#define MMA7660_DRIVER_NAME	"mma7660"
+
+#define MMA7660_REG_XOUT	0x00
+#define MMA7660_REG_YOUT	0x01
+#define MMA7660_REG_ZOUT	0x02
+#define MMA7660_REG_OUT_BIT_ALERT	BIT(6)
+
+#define MMA7660_REG_MODE	0x07
+#define MMA7660_REG_MODE_BIT_MODE	BIT(0)
+#define MMA7660_REG_MODE_BIT_TON	BIT(2)
+
+#define MMA7660_I2C_READ_RETRIES	5
+
+/*
+ * The accelerometer has one measurement range:
+ *
+ * -1.5g - +1.5g (6-bit, signed)
+ *
+ * scale = (1.5 + 1.5) * 9.81 / (2^6 - 1)	= 0.467142857
+ */
+
+#define MMA7660_SCALE_AVAIL	"0.467142857"
+
+const int mma7660_nscale = 467142857;
+
+#define MMA7660_CHANNEL(reg, axis) {	\
+	.type = IIO_ACCEL,	\
+	.address = reg,	\
+	.modified = 1,	\
+	.channel2 = IIO_MOD_##axis,	\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),	\
+	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),	\
+}
+
+static const struct iio_chan_spec mma7660_channels[] = {
+	MMA7660_CHANNEL(MMA7660_REG_XOUT, X),
+	MMA7660_CHANNEL(MMA7660_REG_YOUT, Y),
+	MMA7660_CHANNEL(MMA7660_REG_ZOUT, Z),
+};
+
+enum mma7660_mode {
+	MMA7660_MODE_STANDBY,
+	MMA7660_MODE_ACTIVE
+};
+
+struct mma7660_data {
+	struct i2c_client *client;
+	struct mutex lock;
+	enum mma7660_mode mode;
+};
+
+static IIO_CONST_ATTR(in_accel_scale_available, MMA7660_SCALE_AVAIL);
+
+static struct attribute *mma7660_attributes[] = {
+	&iio_const_attr_in_accel_scale_available.dev_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group mma7660_attribute_group = {
+	.attrs = mma7660_attributes
+};
+
+static int mma7660_set_mode(struct mma7660_data *data,
+				enum mma7660_mode mode)
+{
+	int ret;
+	struct i2c_client *client = data->client;
+
+	if (mode == data->mode)
+		return 0;
+
+	ret = i2c_smbus_read_byte_data(client, MMA7660_REG_MODE);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed to read sensor mode\n");
+		return ret;
+	}
+
+	if (mode == MMA7660_MODE_ACTIVE) {
+		ret &= ~MMA7660_REG_MODE_BIT_TON;
+		ret |= MMA7660_REG_MODE_BIT_MODE;
+	} else {
+		ret &= ~MMA7660_REG_MODE_BIT_TON;
+		ret &= ~MMA7660_REG_MODE_BIT_MODE;
+	}
+
+	ret = i2c_smbus_write_byte_data(client, MMA7660_REG_MODE, ret);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed to change sensor mode\n");
+		return ret;
+	}
+
+	data->mode = mode;
+
+	return ret;
+}
+
+static int mma7660_read_accel(struct mma7660_data *data, u8 address)
+{
+	int ret, retries = MMA7660_I2C_READ_RETRIES;
+	struct i2c_client *client = data->client;
+
+	/*
+	 * Read data. If the Alert bit is set, the register was read at
+	 * the same time as the device was attempting to update the content.
+	 * The solution is to read the register again. Do this only
+	 * MMA7660_I2C_READ_RETRIES times to avoid spending too much time
+	 * in the kernel.
+	 */
+	do {
+		ret = i2c_smbus_read_byte_data(client, address);
+		if (ret < 0) {
+			dev_err(&client->dev, "register read failed\n");
+			return ret;
+		}
+	} while (retries-- > 0 && ret & MMA7660_REG_OUT_BIT_ALERT);
+
+	if (ret & MMA7660_REG_OUT_BIT_ALERT) {
+		dev_err(&client->dev, "all register read retries failed\n");
+		return -ETIMEDOUT;
+	}
+
+	return ret;
+}
+
+static int mma7660_read_raw(struct iio_dev *indio_dev,
+				struct iio_chan_spec const *chan,
+				int *val, int *val2, long mask)
+{
+	struct mma7660_data *data = iio_priv(indio_dev);
+	int ret;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		mutex_lock(&data->lock);
+		ret = mma7660_read_accel(data, chan->address);
+		mutex_unlock(&data->lock);
+		if (ret < 0)
+			return ret;
+		*val = sign_extend32(ret, 5);
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		*val = 0;
+		*val2 = mma7660_nscale;
+		return IIO_VAL_INT_PLUS_NANO;
+	default:
+		return -EINVAL;
+	}
+
+	return -EINVAL;
+}
+
+static const struct iio_info mma7660_info = {
+	.driver_module	= THIS_MODULE,
+	.read_raw		= mma7660_read_raw,
+	.attrs			= &mma7660_attribute_group,
+};
+
+static int mma7660_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	int ret;
+	struct iio_dev *indio_dev;
+	struct mma7660_data *data;
+
+	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
+	if (!indio_dev) {
+		dev_err(&client->dev, "iio allocation failed!\n");
+		return -ENOMEM;
+	}
+
+	data = iio_priv(indio_dev);
+	data->client = client;
+	i2c_set_clientdata(client, indio_dev);
+	mutex_init(&data->lock);
+	data->mode = MMA7660_MODE_STANDBY;
+
+	indio_dev->dev.parent = &client->dev;
+	indio_dev->info = &mma7660_info;
+	indio_dev->name = MMA7660_DRIVER_NAME;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = mma7660_channels;
+	indio_dev->num_channels = ARRAY_SIZE(mma7660_channels);
+
+	ret = mma7660_set_mode(data, MMA7660_MODE_ACTIVE);
+	if (ret < 0)
+		return ret;
+
+	ret = iio_device_register(indio_dev);
+	if (ret < 0) {
+		dev_err(&client->dev, "device_register failed\n");
+		mma7660_set_mode(data, MMA7660_MODE_STANDBY);
+	}
+
+	return ret;
+}
+
+static int mma7660_remove(struct i2c_client *client)
+{
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+
+	iio_device_unregister(indio_dev);
+
+	return mma7660_set_mode(iio_priv(indio_dev), MMA7660_MODE_STANDBY);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mma7660_suspend(struct device *dev)
+{
+	struct mma7660_data *data;
+
+	data = iio_priv(i2c_get_clientdata(to_i2c_client(dev)));
+
+	return mma7660_set_mode(data, MMA7660_MODE_STANDBY);
+}
+
+static int mma7660_resume(struct device *dev)
+{
+	struct mma7660_data *data;
+
+	data = iio_priv(i2c_get_clientdata(to_i2c_client(dev)));
+
+	return mma7660_set_mode(data, MMA7660_MODE_ACTIVE);
+}
+
+static SIMPLE_DEV_PM_OPS(mma7660_pm_ops, mma7660_suspend, mma7660_resume);
+
+#define MMA7660_PM_OPS (&mma7660_pm_ops)
+#else
+#define MMA7660_PM_OPS NULL
+#endif
+
+static const struct i2c_device_id mma7660_i2c_id[] = {
+	{"mma7660", 0},
+	{}
+};
+
+static const struct acpi_device_id mma7660_acpi_id[] = {
+	{"MMA7660", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, mma7660_acpi_id);
+
+static struct i2c_driver mma7660_driver = {
+	.driver = {
+		.name = "mma7660",
+		.pm = MMA7660_PM_OPS,
+		.acpi_match_table = ACPI_PTR(mma7660_acpi_id),
+	},
+	.probe		= mma7660_probe,
+	.remove		= mma7660_remove,
+	.id_table	= mma7660_i2c_id,
+};
+
+module_i2c_driver(mma7660_driver);
+
+MODULE_AUTHOR("Constantin Musca <constantin.musca@intel.com>");
+MODULE_DESCRIPTION("Freescale MMA7660FC 3-Axis Accelerometer driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c
index e225d3c53bd5..d41e1b588e68 100644
--- a/drivers/iio/accel/mma8452.c
+++ b/drivers/iio/accel/mma8452.c
@@ -1,22 +1,22 @@
 /*
- * mma8452.c - Support for following Freescale 3-axis accelerometers:
+ * mma8452.c - Support for following Freescale / NXP 3-axis accelerometers:
  *
- * MMA8451Q (14 bit)
- * MMA8452Q (12 bit)
- * MMA8453Q (10 bit)
- * MMA8652FC (12 bit)
- * MMA8653FC (10 bit)
- * FXLS8471Q (14 bit)
+ * device name	digital output	7-bit I2C slave address (pin selectable)
+ * ---------------------------------------------------------------------
+ * MMA8451Q	14 bit		0x1c / 0x1d
+ * MMA8452Q	12 bit		0x1c / 0x1d
+ * MMA8453Q	10 bit		0x1c / 0x1d
+ * MMA8652FC	12 bit		0x1d
+ * MMA8653FC	10 bit		0x1d
+ * FXLS8471Q	14 bit		0x1e / 0x1d / 0x1c / 0x1f
  *
- * Copyright 2015 Martin Kepplinger <martin.kepplinger@theobroma-systems.com>
+ * Copyright 2015 Martin Kepplinger <martink@posteo.de>
  * Copyright 2014 Peter Meerwald <pmeerw@pmeerw.net>
  *
  * This file is subject to the terms and conditions of version 2 of
  * the GNU General Public License.  See the file COPYING in the main
  * directory of this archive for more details.
  *
- * 7-bit I2C slave address 0x1c/0x1d (pin selectable)
- *
  * TODO: orientation events
  */
 
@@ -76,6 +76,8 @@
 #define  MMA8452_CTRL_DR_DEFAULT		0x4 /* 50 Hz sample frequency */
 #define MMA8452_CTRL_REG2			0x2b
 #define  MMA8452_CTRL_REG2_RST			BIT(6)
+#define  MMA8452_CTRL_REG2_MODS_SHIFT		3
+#define  MMA8452_CTRL_REG2_MODS_MASK		0x1b
 #define MMA8452_CTRL_REG4			0x2d
 #define MMA8452_CTRL_REG5			0x2e
 #define MMA8452_OFF_X				0x2f
@@ -106,7 +108,7 @@ struct mma8452_data {
 };
 
 /**
- * struct mma_chip_info - chip specific data for Freescale's accelerometers
+ * struct mma_chip_info - chip specific data
  * @chip_id:			WHO_AM_I register's value
  * @channels:			struct iio_chan_spec matching the device's
  *				capabilities
@@ -257,20 +259,17 @@ static const int mma8452_samp_freq[8][2] = {
 	{6, 250000}, {1, 560000}
 };
 
-/* Datasheet table 35  (step time vs sample frequency) */
-static const int mma8452_transient_time_step_us[8] = {
-	1250,
-	2500,
-	5000,
-	10000,
-	20000,
-	20000,
-	20000,
-	20000
+/* Datasheet table: step time "Relationship with the ODR" (sample frequency) */
+static const int mma8452_transient_time_step_us[4][8] = {
+	{ 1250, 2500, 5000, 10000, 20000, 20000, 20000, 20000 },  /* normal */
+	{ 1250, 2500, 5000, 10000, 20000, 80000, 80000, 80000 },  /* l p l n */
+	{ 1250, 2500, 2500, 2500, 2500, 2500, 2500, 2500 },	  /* high res*/
+	{ 1250, 2500, 5000, 10000, 20000, 80000, 160000, 160000 } /* l p */
 };
 
-/* Datasheet table 18 (normal mode) */
-static const int mma8452_hp_filter_cutoff[8][4][2] = {
+/* Datasheet table "High-Pass Filter Cutoff Options" */
+static const int mma8452_hp_filter_cutoff[4][8][4][2] = {
+	{ /* normal */
 	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },		/* 800 Hz sample */
 	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },		/* 400 Hz sample */
 	{ {8, 0}, {4, 0}, {2, 0}, {1, 0} },		/* 200 Hz sample */
@@ -279,8 +278,61 @@ static const int mma8452_hp_filter_cutoff[8][4][2] = {
 	{ {2, 0}, {1, 0}, {0, 500000}, {0, 250000} },	/* 12.5 Hz sample */
 	{ {2, 0}, {1, 0}, {0, 500000}, {0, 250000} },	/* 6.25 Hz sample */
 	{ {2, 0}, {1, 0}, {0, 500000}, {0, 250000} }	/* 1.56 Hz sample */
+	},
+	{ /* low noise low power */
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {8, 0}, {4, 0}, {2, 0}, {1, 0} },
+	{ {4, 0}, {2, 0}, {1, 0}, {0, 500000} },
+	{ {2, 0}, {1, 0}, {0, 500000}, {0, 250000} },
+	{ {0, 500000}, {0, 250000}, {0, 125000}, {0, 063000} },
+	{ {0, 500000}, {0, 250000}, {0, 125000}, {0, 063000} },
+	{ {0, 500000}, {0, 250000}, {0, 125000}, {0, 063000} }
+	},
+	{ /* high resolution */
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} }
+	},
+	{ /* low power */
+	{ {16, 0}, {8, 0}, {4, 0}, {2, 0} },
+	{ {8, 0}, {4, 0}, {2, 0}, {1, 0} },
+	{ {4, 0}, {2, 0}, {1, 0}, {0, 500000} },
+	{ {2, 0}, {1, 0}, {0, 500000}, {0, 250000} },
+	{ {1, 0}, {0, 500000}, {0, 250000}, {0, 125000} },
+	{ {0, 250000}, {0, 125000}, {0, 063000}, {0, 031000} },
+	{ {0, 250000}, {0, 125000}, {0, 063000}, {0, 031000} },
+	{ {0, 250000}, {0, 125000}, {0, 063000}, {0, 031000} }
+	}
 };
 
+/* Datasheet table "MODS Oversampling modes averaging values at each ODR" */
+static const u16 mma8452_os_ratio[4][8] = {
+	/* 800 Hz, 400 Hz, ... , 1.56 Hz */
+	{ 2, 4, 4, 4, 4, 16, 32, 128 },		/* normal */
+	{ 2, 4, 4, 4, 4, 4, 8, 32 },		/* low power low noise */
+	{ 2, 4, 8, 16, 32, 128, 256, 1024 },	/* high resolution */
+	{ 2, 2, 2, 2, 2, 2, 4, 16 }		/* low power */
+};
+
+static int mma8452_get_power_mode(struct mma8452_data *data)
+{
+	int reg;
+
+	reg = i2c_smbus_read_byte_data(data->client,
+				       MMA8452_CTRL_REG2);
+	if (reg < 0)
+		return reg;
+
+	return ((reg & MMA8452_CTRL_REG2_MODS_MASK) >>
+		MMA8452_CTRL_REG2_MODS_SHIFT);
+}
+
 static ssize_t mma8452_show_samp_freq_avail(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
@@ -303,13 +355,42 @@ static ssize_t mma8452_show_scale_avail(struct device *dev,
 static ssize_t mma8452_show_hp_cutoff_avail(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct mma8452_data *data = iio_priv(indio_dev);
+	int i, j;
+
+	i = mma8452_get_odr_index(data);
+	j = mma8452_get_power_mode(data);
+	if (j < 0)
+		return j;
+
+	return mma8452_show_int_plus_micros(buf, mma8452_hp_filter_cutoff[j][i],
+		ARRAY_SIZE(mma8452_hp_filter_cutoff[0][0]));
+}
+
+static ssize_t mma8452_show_os_ratio_avail(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct mma8452_data *data = iio_priv(indio_dev);
 	int i = mma8452_get_odr_index(data);
+	int j;
+	u16 val = 0;
+	size_t len = 0;
 
-	return mma8452_show_int_plus_micros(buf, mma8452_hp_filter_cutoff[i],
-		ARRAY_SIZE(mma8452_hp_filter_cutoff[0]));
+	for (j = 0; j < ARRAY_SIZE(mma8452_os_ratio); j++) {
+		if (val == mma8452_os_ratio[j][i])
+			continue;
+
+		val = mma8452_os_ratio[j][i];
+
+		len += scnprintf(buf + len, PAGE_SIZE - len, "%d ", val);
+	}
+	buf[len - 1] = '\n';
+
+	return len;
 }
 
 static IIO_DEV_ATTR_SAMP_FREQ_AVAIL(mma8452_show_samp_freq_avail);
@@ -317,6 +398,8 @@ static IIO_DEVICE_ATTR(in_accel_scale_available, S_IRUGO,
 		       mma8452_show_scale_avail, NULL, 0);
 static IIO_DEVICE_ATTR(in_accel_filter_high_pass_3db_frequency_available,
 		       S_IRUGO, mma8452_show_hp_cutoff_avail, NULL, 0);
+static IIO_DEVICE_ATTR(in_accel_oversampling_ratio_available, S_IRUGO,
+		       mma8452_show_os_ratio_avail, NULL, 0);
 
 static int mma8452_get_samp_freq_index(struct mma8452_data *data,
 				       int val, int val2)
@@ -335,24 +418,33 @@ static int mma8452_get_scale_index(struct mma8452_data *data, int val, int val2)
 static int mma8452_get_hp_filter_index(struct mma8452_data *data,
 				       int val, int val2)
 {
-	int i = mma8452_get_odr_index(data);
+	int i, j;
+
+	i = mma8452_get_odr_index(data);
+	j = mma8452_get_power_mode(data);
+	if (j < 0)
+		return j;
 
-	return mma8452_get_int_plus_micros_index(mma8452_hp_filter_cutoff[i],
-		ARRAY_SIZE(mma8452_hp_filter_cutoff[0]), val, val2);
+	return mma8452_get_int_plus_micros_index(mma8452_hp_filter_cutoff[j][i],
+		ARRAY_SIZE(mma8452_hp_filter_cutoff[0][0]), val, val2);
 }
 
 static int mma8452_read_hp_filter(struct mma8452_data *data, int *hz, int *uHz)
 {
-	int i, ret;
+	int j, i, ret;
 
 	ret = i2c_smbus_read_byte_data(data->client, MMA8452_HP_FILTER_CUTOFF);
 	if (ret < 0)
 		return ret;
 
 	i = mma8452_get_odr_index(data);
+	j = mma8452_get_power_mode(data);
+	if (j < 0)
+		return j;
+
 	ret &= MMA8452_HP_FILTER_CUTOFF_SEL_MASK;
-	*hz = mma8452_hp_filter_cutoff[i][ret][0];
-	*uHz = mma8452_hp_filter_cutoff[i][ret][1];
+	*hz = mma8452_hp_filter_cutoff[j][i][ret][0];
+	*uHz = mma8452_hp_filter_cutoff[j][i][ret][1];
 
 	return 0;
 }
@@ -414,6 +506,15 @@ static int mma8452_read_raw(struct iio_dev *indio_dev,
 		}
 
 		return IIO_VAL_INT_PLUS_MICRO;
+	case IIO_CHAN_INFO_OVERSAMPLING_RATIO:
+		ret = mma8452_get_power_mode(data);
+		if (ret < 0)
+			return ret;
+
+		i = mma8452_get_odr_index(data);
+
+		*val = mma8452_os_ratio[ret][i];
+		return IIO_VAL_INT;
 	}
 
 	return -EINVAL;
@@ -480,6 +581,21 @@ fail:
 	return ret;
 }
 
+static int mma8452_set_power_mode(struct mma8452_data *data, u8 mode)
+{
+	int reg;
+
+	reg = i2c_smbus_read_byte_data(data->client,
+				       MMA8452_CTRL_REG2);
+	if (reg < 0)
+		return reg;
+
+	reg &= ~MMA8452_CTRL_REG2_MODS_MASK;
+	reg |= mode << MMA8452_CTRL_REG2_MODS_SHIFT;
+
+	return mma8452_change_config(data, MMA8452_CTRL_REG2, reg);
+}
+
 /* returns >0 if in freefall mode, 0 if not or <0 if an error occurred */
 static int mma8452_freefall_mode_enabled(struct mma8452_data *data)
 {
@@ -518,11 +634,7 @@ static int mma8452_set_freefall_mode(struct mma8452_data *data, bool state)
 		val |= MMA8452_FF_MT_CFG_OAE;
 	}
 
-	val = mma8452_change_config(data, chip->ev_cfg, val);
-	if (val)
-		return val;
-
-	return 0;
+	return mma8452_change_config(data, chip->ev_cfg, val);
 }
 
 static int mma8452_set_hp_filter_frequency(struct mma8452_data *data,
@@ -597,6 +709,14 @@ static int mma8452_write_raw(struct iio_dev *indio_dev,
 		return mma8452_change_config(data, MMA8452_DATA_CFG,
 					     data->data_cfg);
 
+	case IIO_CHAN_INFO_OVERSAMPLING_RATIO:
+		ret = mma8452_get_odr_index(data);
+
+		for (i = 0; i < ARRAY_SIZE(mma8452_os_ratio); i++) {
+			if (mma8452_os_ratio[i][ret] == val)
+				return mma8452_set_power_mode(data, i);
+		}
+
 	default:
 		return -EINVAL;
 	}
@@ -610,7 +730,7 @@ static int mma8452_read_thresh(struct iio_dev *indio_dev,
 			       int *val, int *val2)
 {
 	struct mma8452_data *data = iio_priv(indio_dev);
-	int ret, us;
+	int ret, us, power_mode;
 
 	switch (info) {
 	case IIO_EV_INFO_VALUE:
@@ -629,7 +749,11 @@ static int mma8452_read_thresh(struct iio_dev *indio_dev,
 		if (ret < 0)
 			return ret;
 
-		us = ret * mma8452_transient_time_step_us[
+		power_mode = mma8452_get_power_mode(data);
+		if (power_mode < 0)
+			return power_mode;
+
+		us = ret * mma8452_transient_time_step_us[power_mode][
 				mma8452_get_odr_index(data)];
 		*val = us / USEC_PER_SEC;
 		*val2 = us % USEC_PER_SEC;
@@ -677,8 +801,12 @@ static int mma8452_write_thresh(struct iio_dev *indio_dev,
 					     val);
 
 	case IIO_EV_INFO_PERIOD:
+		ret = mma8452_get_power_mode(data);
+		if (ret < 0)
+			return ret;
+
 		steps = (val * USEC_PER_SEC + val2) /
-				mma8452_transient_time_step_us[
+				mma8452_transient_time_step_us[ret][
 					mma8452_get_odr_index(data)];
 
 		if (steps < 0 || steps > 0xff)
@@ -785,7 +913,7 @@ static int mma8452_write_event_config(struct iio_dev *indio_dev,
 static void mma8452_transient_interrupt(struct iio_dev *indio_dev)
 {
 	struct mma8452_data *data = iio_priv(indio_dev);
-	s64 ts = iio_get_time_ns();
+	s64 ts = iio_get_time_ns(indio_dev);
 	int src;
 
 	src = i2c_smbus_read_byte_data(data->client, data->chip_info->ev_src);
@@ -865,7 +993,7 @@ static irqreturn_t mma8452_trigger_handler(int irq, void *p)
 		goto done;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, buffer,
-					   iio_get_time_ns());
+					   iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
@@ -978,7 +1106,8 @@ static struct attribute_group mma8452_event_attribute_group = {
 			      BIT(IIO_CHAN_INFO_CALIBBIAS), \
 	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ) | \
 			BIT(IIO_CHAN_INFO_SCALE) | \
-			BIT(IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY), \
+			BIT(IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY) | \
+			BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \
 	.scan_index = idx, \
 	.scan_type = { \
 		.sign = 's', \
@@ -998,7 +1127,8 @@ static struct attribute_group mma8452_event_attribute_group = {
 	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
 		BIT(IIO_CHAN_INFO_CALIBBIAS), \
 	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ) | \
-		BIT(IIO_CHAN_INFO_SCALE), \
+		BIT(IIO_CHAN_INFO_SCALE) | \
+		BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \
 	.scan_index = idx, \
 	.scan_type = { \
 		.sign = 's', \
@@ -1171,6 +1301,7 @@ static struct attribute *mma8452_attributes[] = {
 	&iio_dev_attr_sampling_frequency_available.dev_attr.attr,
 	&iio_dev_attr_in_accel_scale_available.dev_attr.attr,
 	&iio_dev_attr_in_accel_filter_high_pass_3db_frequency_available.dev_attr.attr,
+	&iio_dev_attr_in_accel_oversampling_ratio_available.dev_attr.attr,
 	NULL
 };
 
@@ -1444,8 +1575,8 @@ static int mma8452_probe(struct i2c_client *client,
 		goto buffer_cleanup;
 
 	ret = mma8452_set_freefall_mode(data, false);
-	if (ret)
-		return ret;
+	if (ret < 0)
+		goto buffer_cleanup;
 
 	return 0;
 
@@ -1558,5 +1689,5 @@ static struct i2c_driver mma8452_driver = {
 module_i2c_driver(mma8452_driver);
 
 MODULE_AUTHOR("Peter Meerwald <pmeerw@pmeerw.net>");
-MODULE_DESCRIPTION("Freescale MMA8452 accelerometer driver");
+MODULE_DESCRIPTION("Freescale / NXP MMA8452 accelerometer driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/iio/accel/mma9551.c b/drivers/iio/accel/mma9551.c
index d899a4d4307f..bf2704435629 100644
--- a/drivers/iio/accel/mma9551.c
+++ b/drivers/iio/accel/mma9551.c
@@ -391,7 +391,7 @@ static irqreturn_t mma9551_event_handler(int irq, void *private)
 	iio_push_event(indio_dev,
 		       IIO_MOD_EVENT_CODE(IIO_INCLI, 0, (mma_axis + 1),
 					  IIO_EV_TYPE_ROC, IIO_EV_DIR_RISING),
-		       iio_get_time_ns());
+		       iio_get_time_ns(indio_dev));
 
 out:
 	mutex_unlock(&data->mutex);
diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c
index bb05f3efddca..36bf19733be0 100644
--- a/drivers/iio/accel/mma9553.c
+++ b/drivers/iio/accel/mma9553.c
@@ -1001,7 +1001,7 @@ static irqreturn_t mma9553_irq_handler(int irq, void *private)
 	struct iio_dev *indio_dev = private;
 	struct mma9553_data *data = iio_priv(indio_dev);
 
-	data->timestamp = iio_get_time_ns();
+	data->timestamp = iio_get_time_ns(indio_dev);
 	/*
 	 * Since we only configure the interrupt pin when an
 	 * event is enabled, we are sure we have at least
diff --git a/drivers/iio/accel/st_accel.h b/drivers/iio/accel/st_accel.h
index 57f83a67948c..f8dfdb690563 100644
--- a/drivers/iio/accel/st_accel.h
+++ b/drivers/iio/accel/st_accel.h
@@ -29,6 +29,7 @@
 #define LSM330_ACCEL_DEV_NAME		"lsm330_accel"
 #define LSM303AGR_ACCEL_DEV_NAME	"lsm303agr_accel"
 #define LIS2DH12_ACCEL_DEV_NAME		"lis2dh12_accel"
+#define LIS3L02DQ_ACCEL_DEV_NAME	"lis3l02dq"
 
 /**
 * struct st_sensors_platform_data - default accel platform data
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 4d95bfc4786c..da3fb069ec5c 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -215,6 +215,22 @@
 #define ST_ACCEL_6_IHL_IRQ_MASK			0x80
 #define ST_ACCEL_6_MULTIREAD_BIT		true
 
+/* CUSTOM VALUES FOR SENSOR 7 */
+#define ST_ACCEL_7_ODR_ADDR			0x20
+#define ST_ACCEL_7_ODR_MASK			0x30
+#define ST_ACCEL_7_ODR_AVL_280HZ_VAL		0x00
+#define ST_ACCEL_7_ODR_AVL_560HZ_VAL		0x01
+#define ST_ACCEL_7_ODR_AVL_1120HZ_VAL		0x02
+#define ST_ACCEL_7_ODR_AVL_4480HZ_VAL		0x03
+#define ST_ACCEL_7_PW_ADDR			0x20
+#define ST_ACCEL_7_PW_MASK			0xc0
+#define ST_ACCEL_7_FS_AVL_2_GAIN		IIO_G_TO_M_S_2(488)
+#define ST_ACCEL_7_BDU_ADDR			0x21
+#define ST_ACCEL_7_BDU_MASK			0x40
+#define ST_ACCEL_7_DRDY_IRQ_ADDR		0x21
+#define ST_ACCEL_7_DRDY_IRQ_INT1_MASK		0x04
+#define ST_ACCEL_7_MULTIREAD_BIT		false
+
 static const struct iio_chan_spec st_accel_8bit_channels[] = {
 	ST_SENSORS_LSM_CHANNELS(IIO_ACCEL,
 			BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
@@ -662,6 +678,54 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 		.multi_read_bit = ST_ACCEL_6_MULTIREAD_BIT,
 		.bootime = 2,
 	},
+	{
+		/* No WAI register present */
+		.sensors_supported = {
+			[0] = LIS3L02DQ_ACCEL_DEV_NAME,
+		},
+		.ch = (struct iio_chan_spec *)st_accel_12bit_channels,
+		.odr = {
+			.addr = ST_ACCEL_7_ODR_ADDR,
+			.mask = ST_ACCEL_7_ODR_MASK,
+			.odr_avl = {
+				{ 280, ST_ACCEL_7_ODR_AVL_280HZ_VAL, },
+				{ 560, ST_ACCEL_7_ODR_AVL_560HZ_VAL, },
+				{ 1120, ST_ACCEL_7_ODR_AVL_1120HZ_VAL, },
+				{ 4480, ST_ACCEL_7_ODR_AVL_4480HZ_VAL, },
+			},
+		},
+		.pw = {
+			.addr = ST_ACCEL_7_PW_ADDR,
+			.mask = ST_ACCEL_7_PW_MASK,
+			.value_on = ST_SENSORS_DEFAULT_POWER_ON_VALUE,
+			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
+		},
+		.enable_axis = {
+			.addr = ST_SENSORS_DEFAULT_AXIS_ADDR,
+			.mask = ST_SENSORS_DEFAULT_AXIS_MASK,
+		},
+		.fs = {
+			.fs_avl = {
+				[0] = {
+					.num = ST_ACCEL_FS_AVL_2G,
+					.gain = ST_ACCEL_7_FS_AVL_2_GAIN,
+				},
+			},
+		},
+		/*
+		 * The part has a BDU bit but if set the data is never
+		 * updated so don't set it.
+		 */
+		.bdu = {
+		},
+		.drdy_irq = {
+			.addr = ST_ACCEL_7_DRDY_IRQ_ADDR,
+			.mask_int1 = ST_ACCEL_7_DRDY_IRQ_INT1_MASK,
+			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
+		},
+		.multi_read_bit = ST_ACCEL_7_MULTIREAD_BIT,
+		.bootime = 2,
+	},
 };
 
 static int st_accel_read_raw(struct iio_dev *indio_dev,
@@ -758,13 +822,15 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 	indio_dev->info = &accel_info;
 	mutex_init(&adata->tb.buf_lock);
 
-	st_sensors_power_enable(indio_dev);
+	err = st_sensors_power_enable(indio_dev);
+	if (err)
+		return err;
 
 	err = st_sensors_check_device_support(indio_dev,
 					ARRAY_SIZE(st_accel_sensors_settings),
 					st_accel_sensors_settings);
 	if (err < 0)
-		return err;
+		goto st_accel_power_off;
 
 	adata->num_data_channels = ST_ACCEL_NUMBER_DATA_CHANNELS;
 	adata->multiread_bit = adata->sensor_settings->multi_read_bit;
@@ -781,11 +847,11 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 
 	err = st_sensors_init_sensor(indio_dev, adata->dev->platform_data);
 	if (err < 0)
-		return err;
+		goto st_accel_power_off;
 
 	err = st_accel_allocate_ring(indio_dev);
 	if (err < 0)
-		return err;
+		goto st_accel_power_off;
 
 	if (irq > 0) {
 		err = st_sensors_allocate_trigger(indio_dev,
@@ -808,6 +874,8 @@ st_accel_device_register_error:
 		st_sensors_deallocate_trigger(indio_dev);
 st_accel_probe_trigger_error:
 	st_accel_deallocate_ring(indio_dev);
+st_accel_power_off:
+	st_sensors_power_disable(indio_dev);
 
 	return err;
 }
diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c
index 7333ee9fb11b..e9d427a5df7c 100644
--- a/drivers/iio/accel/st_accel_i2c.c
+++ b/drivers/iio/accel/st_accel_i2c.c
@@ -80,6 +80,10 @@ static const struct of_device_id st_accel_of_match[] = {
 		.compatible = "st,h3lis331dl-accel",
 		.data = H3LIS331DL_DRIVER_NAME,
 	},
+	{
+		.compatible = "st,lis3l02dq",
+		.data = LIS3L02DQ_ACCEL_DEV_NAME,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_accel_of_match);
@@ -130,6 +134,7 @@ static const struct i2c_device_id st_accel_id_table[] = {
 	{ LSM330_ACCEL_DEV_NAME },
 	{ LSM303AGR_ACCEL_DEV_NAME },
 	{ LIS2DH12_ACCEL_DEV_NAME },
+	{ LIS3L02DQ_ACCEL_DEV_NAME },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, st_accel_id_table);
diff --git a/drivers/iio/accel/st_accel_spi.c b/drivers/iio/accel/st_accel_spi.c
index fcd5847a3fd3..efd43941d45d 100644
--- a/drivers/iio/accel/st_accel_spi.c
+++ b/drivers/iio/accel/st_accel_spi.c
@@ -59,6 +59,7 @@ static const struct spi_device_id st_accel_id_table[] = {
 	{ LSM330_ACCEL_DEV_NAME },
 	{ LSM303AGR_ACCEL_DEV_NAME },
 	{ LIS2DH12_ACCEL_DEV_NAME },
+	{ LIS3L02DQ_ACCEL_DEV_NAME },
 	{},
 };
 MODULE_DEVICE_TABLE(spi, st_accel_id_table);
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 25378c5882e2..1de31bdd4ce4 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -153,6 +153,18 @@ config AXP288_ADC
 	  To compile this driver as a module, choose M here: the module will be
 	  called axp288_adc.
 
+config BCM_IPROC_ADC
+	tristate "Broadcom IPROC ADC driver"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	depends on MFD_SYSCON
+	default ARCH_BCM_CYGNUS
+	help
+	  Say Y here if you want to add support for the Broadcom static
+	  ADC driver.
+
+	  Broadcom iProc ADC driver. Broadcom iProc ADC controller has 8
+	  channels. The driver allows the user to read voltage values.
+
 config BERLIN2_ADC
 	tristate "Marvell Berlin2 ADC driver"
 	depends on ARCH_BERLIN
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 38638d46f972..0ba0d500eedb 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_AD799X) += ad799x.o
 obj-$(CONFIG_AT91_ADC) += at91_adc.o
 obj-$(CONFIG_AT91_SAMA5D2_ADC) += at91-sama5d2_adc.o
 obj-$(CONFIG_AXP288_ADC) += axp288_adc.o
+obj-$(CONFIG_BCM_IPROC_ADC) += bcm_iproc_adc.o
 obj-$(CONFIG_BERLIN2_ADC) += berlin2-adc.o
 obj-$(CONFIG_CC10001_ADC) += cc10001_adc.o
 obj-$(CONFIG_DA9150_GPADC) += da9150-gpadc.o
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 21e19b60e2b9..c0f6a98fd9bd 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -154,12 +154,11 @@ static int ad7266_read_raw(struct iio_dev *indio_dev,
 
 	switch (m) {
 	case IIO_CHAN_INFO_RAW:
-		if (iio_buffer_enabled(indio_dev))
-			return -EBUSY;
-
-		ret = ad7266_read_single(st, val, chan->address);
+		ret = iio_device_claim_direct_mode(indio_dev);
 		if (ret)
 			return ret;
+		ret = ad7266_read_single(st, val, chan->address);
+		iio_device_release_direct_mode(indio_dev);
 
 		*val = (*val >> 2) & 0xfff;
 		if (chan->scan_type.sign == 's')
@@ -396,8 +395,8 @@ static int ad7266_probe(struct spi_device *spi)
 
 	st = iio_priv(indio_dev);
 
-	st->reg = devm_regulator_get(&spi->dev, "vref");
-	if (!IS_ERR_OR_NULL(st->reg)) {
+	st->reg = devm_regulator_get_optional(&spi->dev, "vref");
+	if (!IS_ERR(st->reg)) {
 		ret = regulator_enable(st->reg);
 		if (ret)
 			return ret;
@@ -408,6 +407,9 @@ static int ad7266_probe(struct spi_device *spi)
 
 		st->vref_mv = ret / 1000;
 	} else {
+		/* Any other error indicates that the regulator does exist */
+		if (PTR_ERR(st->reg) != -ENODEV)
+			return PTR_ERR(st->reg);
 		/* Use internal reference */
 		st->vref_mv = 2500;
 	}
@@ -438,6 +440,7 @@ static int ad7266_probe(struct spi_device *spi)
 	st->spi = spi;
 
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &ad7266_info;
diff --git a/drivers/iio/adc/ad7291.c b/drivers/iio/adc/ad7291.c
index c0eabf156702..1d90b02732bb 100644
--- a/drivers/iio/adc/ad7291.c
+++ b/drivers/iio/adc/ad7291.c
@@ -115,7 +115,7 @@ static irqreturn_t ad7291_event_handler(int irq, void *private)
 	u16 t_status, v_status;
 	u16 command;
 	int i;
-	s64 timestamp = iio_get_time_ns();
+	s64 timestamp = iio_get_time_ns(indio_dev);
 
 	if (ad7291_i2c_read(chip, AD7291_T_ALERT_STATUS, &t_status))
 		return IRQ_HANDLED;
@@ -505,6 +505,7 @@ static int ad7291_probe(struct i2c_client *client,
 	indio_dev->num_channels = ARRAY_SIZE(ad7291_channels);
 
 	indio_dev->dev.parent = &client->dev;
+	indio_dev->dev.of_node = client->dev.of_node;
 	indio_dev->info = &ad7291_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
diff --git a/drivers/iio/adc/ad7298.c b/drivers/iio/adc/ad7298.c
index 62bb8f7ce4a0..10ec8fce395f 100644
--- a/drivers/iio/adc/ad7298.c
+++ b/drivers/iio/adc/ad7298.c
@@ -163,7 +163,7 @@ static irqreturn_t ad7298_trigger_handler(int irq, void *p)
 		goto done;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, st->rx_buf,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
@@ -315,6 +315,7 @@ static int ad7298_probe(struct spi_device *spi)
 
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = ad7298_channels;
 	indio_dev->num_channels = ARRAY_SIZE(ad7298_channels);
diff --git a/drivers/iio/adc/ad7476.c b/drivers/iio/adc/ad7476.c
index be85c2a0ad97..b7ecf9aab90f 100644
--- a/drivers/iio/adc/ad7476.c
+++ b/drivers/iio/adc/ad7476.c
@@ -70,7 +70,7 @@ static irqreturn_t ad7476_trigger_handler(int irq, void  *p)
 		goto done;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, st->data,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 done:
 	iio_trigger_notify_done(indio_dev->trig);
 
@@ -106,12 +106,11 @@ static int ad7476_read_raw(struct iio_dev *indio_dev,
 
 	switch (m) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
-		if (iio_buffer_enabled(indio_dev))
-			ret = -EBUSY;
-		else
-			ret = ad7476_scan_direct(st);
-		mutex_unlock(&indio_dev->mlock);
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
+		ret = ad7476_scan_direct(st);
+		iio_device_release_direct_mode(indio_dev);
 
 		if (ret < 0)
 			return ret;
@@ -228,6 +227,7 @@ static int ad7476_probe(struct spi_device *spi)
 
 	/* Establish that the iio_dev is a child of the spi device */
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = st->chip_info->channel;
diff --git a/drivers/iio/adc/ad7791.c b/drivers/iio/adc/ad7791.c
index cf172d58cd44..1817ebf5ad84 100644
--- a/drivers/iio/adc/ad7791.c
+++ b/drivers/iio/adc/ad7791.c
@@ -272,30 +272,22 @@ static ssize_t ad7791_write_frequency(struct device *dev,
 	struct ad7791_state *st = iio_priv(indio_dev);
 	int i, ret;
 
-	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
-		mutex_unlock(&indio_dev->mlock);
-		return -EBUSY;
-	}
-	mutex_unlock(&indio_dev->mlock);
-
-	ret = -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(ad7791_sample_freq_avail); i++) {
-		if (sysfs_streq(ad7791_sample_freq_avail[i], buf)) {
-
-			mutex_lock(&indio_dev->mlock);
-			st->filter &= ~AD7791_FILTER_RATE_MASK;
-			st->filter |= i;
-			ad_sd_write_reg(&st->sd, AD7791_REG_FILTER,
-					 sizeof(st->filter), st->filter);
-			mutex_unlock(&indio_dev->mlock);
-			ret = 0;
+	for (i = 0; i < ARRAY_SIZE(ad7791_sample_freq_avail); i++)
+		if (sysfs_streq(ad7791_sample_freq_avail[i], buf))
 			break;
-		}
-	}
+	if (i == ARRAY_SIZE(ad7791_sample_freq_avail))
+		return -EINVAL;
+
+	ret = iio_device_claim_direct_mode(indio_dev);
+	if (ret)
+		return ret;
+	st->filter &= ~AD7791_FILTER_RATE_MASK;
+	st->filter |= i;
+	ad_sd_write_reg(&st->sd, AD7791_REG_FILTER, sizeof(st->filter),
+			st->filter);
+	iio_device_release_direct_mode(indio_dev);
 
-	return ret ? ret : len;
+	return len;
 }
 
 static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
@@ -383,6 +375,7 @@ static int ad7791_probe(struct spi_device *spi)
 	spi_set_drvdata(spi, indio_dev);
 
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = st->info->channels;
diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c
index 7b07bb651671..847789bae821 100644
--- a/drivers/iio/adc/ad7793.c
+++ b/drivers/iio/adc/ad7793.c
@@ -369,13 +369,6 @@ static ssize_t ad7793_write_frequency(struct device *dev,
 	long lval;
 	int i, ret;
 
-	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
-		mutex_unlock(&indio_dev->mlock);
-		return -EBUSY;
-	}
-	mutex_unlock(&indio_dev->mlock);
-
 	ret = kstrtol(buf, 10, &lval);
 	if (ret)
 		return ret;
@@ -383,20 +376,21 @@ static ssize_t ad7793_write_frequency(struct device *dev,
 	if (lval == 0)
 		return -EINVAL;
 
-	ret = -EINVAL;
-
 	for (i = 0; i < 16; i++)
-		if (lval == st->chip_info->sample_freq_avail[i]) {
-			mutex_lock(&indio_dev->mlock);
-			st->mode &= ~AD7793_MODE_RATE(-1);
-			st->mode |= AD7793_MODE_RATE(i);
-			ad_sd_write_reg(&st->sd, AD7793_REG_MODE,
-					 sizeof(st->mode), st->mode);
-			mutex_unlock(&indio_dev->mlock);
-			ret = 0;
-		}
+		if (lval == st->chip_info->sample_freq_avail[i])
+			break;
+	if (i == 16)
+		return -EINVAL;
 
-	return ret ? ret : len;
+	ret = iio_device_claim_direct_mode(indio_dev);
+	if (ret)
+		return ret;
+	st->mode &= ~AD7793_MODE_RATE(-1);
+	st->mode |= AD7793_MODE_RATE(i);
+	ad_sd_write_reg(&st->sd, AD7793_REG_MODE, sizeof(st->mode), st->mode);
+	iio_device_release_direct_mode(indio_dev);
+
+	return len;
 }
 
 static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
@@ -790,6 +784,7 @@ static int ad7793_probe(struct spi_device *spi)
 	spi_set_drvdata(spi, indio_dev);
 
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = st->chip_info->channels;
diff --git a/drivers/iio/adc/ad7887.c b/drivers/iio/adc/ad7887.c
index 2d3c397e66ad..7a483bfbd70c 100644
--- a/drivers/iio/adc/ad7887.c
+++ b/drivers/iio/adc/ad7887.c
@@ -122,7 +122,7 @@ static irqreturn_t ad7887_trigger_handler(int irq, void *p)
 		goto done;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, st->data,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 done:
 	iio_trigger_notify_done(indio_dev->trig);
 
@@ -156,12 +156,11 @@ static int ad7887_read_raw(struct iio_dev *indio_dev,
 
 	switch (m) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
-		if (iio_buffer_enabled(indio_dev))
-			ret = -EBUSY;
-		else
-			ret = ad7887_scan_direct(st, chan->address);
-		mutex_unlock(&indio_dev->mlock);
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
+		ret = ad7887_scan_direct(st, chan->address);
+		iio_device_release_direct_mode(indio_dev);
 
 		if (ret < 0)
 			return ret;
@@ -265,6 +264,7 @@ static int ad7887_probe(struct spi_device *spi)
 
 	/* Estabilish that the iio_dev is a child of the spi device */
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->info = &ad7887_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
diff --git a/drivers/iio/adc/ad7923.c b/drivers/iio/adc/ad7923.c
index 45e29ccd824f..77a675e11ebb 100644
--- a/drivers/iio/adc/ad7923.c
+++ b/drivers/iio/adc/ad7923.c
@@ -181,7 +181,7 @@ static irqreturn_t ad7923_trigger_handler(int irq, void *p)
 		goto done;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, st->rx_buf,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
@@ -233,12 +233,11 @@ static int ad7923_read_raw(struct iio_dev *indio_dev,
 
 	switch (m) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
-		if (iio_buffer_enabled(indio_dev))
-			ret = -EBUSY;
-		else
-			ret = ad7923_scan_direct(st, chan->address);
-		mutex_unlock(&indio_dev->mlock);
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
+		ret = ad7923_scan_direct(st, chan->address);
+		iio_device_release_direct_mode(indio_dev);
 
 		if (ret < 0)
 			return ret;
@@ -289,6 +288,7 @@ static int ad7923_probe(struct spi_device *spi)
 
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = info->channels;
 	indio_dev->num_channels = info->num_channels;
diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c
index a3f5254f4e51..b6163764489c 100644
--- a/drivers/iio/adc/ad799x.c
+++ b/drivers/iio/adc/ad799x.c
@@ -212,7 +212,7 @@ static irqreturn_t ad799x_trigger_handler(int irq, void *p)
 		goto out;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, st->rx_buf,
-			iio_get_time_ns());
+			iio_get_time_ns(indio_dev));
 out:
 	iio_trigger_notify_done(indio_dev->trig);
 
@@ -282,12 +282,11 @@ static int ad799x_read_raw(struct iio_dev *indio_dev,
 
 	switch (m) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
-		if (iio_buffer_enabled(indio_dev))
-			ret = -EBUSY;
-		else
-			ret = ad799x_scan_direct(st, chan->scan_index);
-		mutex_unlock(&indio_dev->mlock);
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
+		ret = ad799x_scan_direct(st, chan->scan_index);
+		iio_device_release_direct_mode(indio_dev);
 
 		if (ret < 0)
 			return ret;
@@ -395,11 +394,9 @@ static int ad799x_write_event_config(struct iio_dev *indio_dev,
 	struct ad799x_state *st = iio_priv(indio_dev);
 	int ret;
 
-	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
-		ret = -EBUSY;
-		goto done;
-	}
+	ret = iio_device_claim_direct_mode(indio_dev);
+	if (ret)
+		return ret;
 
 	if (state)
 		st->config |= BIT(chan->scan_index) << AD799X_CHANNEL_SHIFT;
@@ -412,10 +409,7 @@ static int ad799x_write_event_config(struct iio_dev *indio_dev,
 		st->config &= ~AD7998_ALERT_EN;
 
 	ret = ad799x_write_config(st, st->config);
-
-done:
-	mutex_unlock(&indio_dev->mlock);
-
+	iio_device_release_direct_mode(indio_dev);
 	return ret;
 }
 
@@ -508,7 +502,7 @@ static irqreturn_t ad799x_event_handler(int irq, void *private)
 							    (i >> 1),
 							    IIO_EV_TYPE_THRESH,
 							    IIO_EV_DIR_FALLING),
-				       iio_get_time_ns());
+				       iio_get_time_ns(indio_dev));
 	}
 
 done:
@@ -812,6 +806,7 @@ static int ad799x_probe(struct i2c_client *client,
 	st->client = client;
 
 	indio_dev->dev.parent = &client->dev;
+	indio_dev->dev.of_node = client->dev.of_node;
 	indio_dev->name = id->name;
 	indio_dev->info = st->chip_config->info;
 
diff --git a/drivers/iio/adc/bcm_iproc_adc.c b/drivers/iio/adc/bcm_iproc_adc.c
new file mode 100644
index 000000000000..21d38c8af21e
--- /dev/null
+++ b/drivers/iio/adc/bcm_iproc_adc.c
@@ -0,0 +1,644 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+
+#include <linux/iio/iio.h>
+
+/* Below Register's are common to IPROC ADC and Touchscreen IP */
+#define IPROC_REGCTL1			0x00
+#define IPROC_REGCTL2			0x04
+#define IPROC_INTERRUPT_THRES		0x08
+#define IPROC_INTERRUPT_MASK		0x0c
+#define IPROC_INTERRUPT_STATUS		0x10
+#define IPROC_ANALOG_CONTROL		0x1c
+#define IPROC_CONTROLLER_STATUS		0x14
+#define IPROC_AUX_DATA			0x20
+#define IPROC_SOFT_BYPASS_CONTROL	0x38
+#define IPROC_SOFT_BYPASS_DATA		0x3C
+
+/* IPROC ADC Channel register offsets */
+#define IPROC_ADC_CHANNEL_REGCTL1		0x800
+#define IPROC_ADC_CHANNEL_REGCTL2		0x804
+#define IPROC_ADC_CHANNEL_STATUS		0x808
+#define IPROC_ADC_CHANNEL_INTERRUPT_STATUS	0x80c
+#define IPROC_ADC_CHANNEL_INTERRUPT_MASK	0x810
+#define IPROC_ADC_CHANNEL_DATA			0x814
+#define IPROC_ADC_CHANNEL_OFFSET		0x20
+
+/* Bit definitions for IPROC_REGCTL2 */
+#define IPROC_ADC_AUXIN_SCAN_ENA	BIT(0)
+#define IPROC_ADC_PWR_LDO		BIT(5)
+#define IPROC_ADC_PWR_ADC		BIT(4)
+#define IPROC_ADC_PWR_BG		BIT(3)
+#define IPROC_ADC_CONTROLLER_EN		BIT(17)
+
+/* Bit definitions for IPROC_INTERRUPT_MASK and IPROC_INTERRUPT_STATUS */
+#define IPROC_ADC_AUXDATA_RDY_INTR	BIT(3)
+#define IPROC_ADC_INTR			9
+#define IPROC_ADC_INTR_MASK		(0xFF << IPROC_ADC_INTR)
+
+/* Bit definitions for IPROC_ANALOG_CONTROL */
+#define IPROC_ADC_CHANNEL_SEL		11
+#define IPROC_ADC_CHANNEL_SEL_MASK	(0x7 << IPROC_ADC_CHANNEL_SEL)
+
+/* Bit definitions for IPROC_ADC_CHANNEL_REGCTL1 */
+#define IPROC_ADC_CHANNEL_ROUNDS	0x2
+#define IPROC_ADC_CHANNEL_ROUNDS_MASK	(0x3F << IPROC_ADC_CHANNEL_ROUNDS)
+#define IPROC_ADC_CHANNEL_MODE		0x1
+#define IPROC_ADC_CHANNEL_MODE_MASK	(0x1 << IPROC_ADC_CHANNEL_MODE)
+#define IPROC_ADC_CHANNEL_MODE_TDM	0x1
+#define IPROC_ADC_CHANNEL_MODE_SNAPSHOT 0x0
+#define IPROC_ADC_CHANNEL_ENABLE	0x0
+#define IPROC_ADC_CHANNEL_ENABLE_MASK	0x1
+
+/* Bit definitions for IPROC_ADC_CHANNEL_REGCTL2 */
+#define IPROC_ADC_CHANNEL_WATERMARK	0x0
+#define IPROC_ADC_CHANNEL_WATERMARK_MASK \
+		(0x3F << IPROC_ADC_CHANNEL_WATERMARK)
+
+#define IPROC_ADC_WATER_MARK_LEVEL	0x1
+
+/* Bit definitions for IPROC_ADC_CHANNEL_STATUS */
+#define IPROC_ADC_CHANNEL_DATA_LOST		0x0
+#define IPROC_ADC_CHANNEL_DATA_LOST_MASK	\
+		(0x0 << IPROC_ADC_CHANNEL_DATA_LOST)
+#define IPROC_ADC_CHANNEL_VALID_ENTERIES	0x1
+#define IPROC_ADC_CHANNEL_VALID_ENTERIES_MASK	\
+		(0xFF << IPROC_ADC_CHANNEL_VALID_ENTERIES)
+#define IPROC_ADC_CHANNEL_TOTAL_ENTERIES	0x9
+#define IPROC_ADC_CHANNEL_TOTAL_ENTERIES_MASK	\
+		(0xFF << IPROC_ADC_CHANNEL_TOTAL_ENTERIES)
+
+/* Bit definitions for IPROC_ADC_CHANNEL_INTERRUPT_MASK */
+#define IPROC_ADC_CHANNEL_WTRMRK_INTR			0x0
+#define IPROC_ADC_CHANNEL_WTRMRK_INTR_MASK		\
+		(0x1 << IPROC_ADC_CHANNEL_WTRMRK_INTR)
+#define IPROC_ADC_CHANNEL_FULL_INTR			0x1
+#define IPROC_ADC_CHANNEL_FULL_INTR_MASK		\
+		(0x1 << IPROC_ADC_IPROC_ADC_CHANNEL_FULL_INTR)
+#define IPROC_ADC_CHANNEL_EMPTY_INTR			0x2
+#define IPROC_ADC_CHANNEL_EMPTY_INTR_MASK		\
+		(0x1 << IPROC_ADC_CHANNEL_EMPTY_INTR)
+
+#define IPROC_ADC_WATER_MARK_INTR_ENABLE		0x1
+
+/* Number of time to retry a set of the interrupt mask reg */
+#define IPROC_ADC_INTMASK_RETRY_ATTEMPTS		10
+
+#define IPROC_ADC_READ_TIMEOUT        (HZ*2)
+
+#define iproc_adc_dbg_reg(dev, priv, reg) \
+do { \
+	u32 val; \
+	regmap_read(priv->regmap, reg, &val); \
+	dev_dbg(dev, "%20s= 0x%08x\n", #reg, val); \
+} while (0)
+
+struct iproc_adc_priv {
+	struct regmap *regmap;
+	struct clk *adc_clk;
+	struct mutex mutex;
+	int  irqno;
+	int chan_val;
+	int chan_id;
+	struct completion completion;
+};
+
+static void iproc_adc_reg_dump(struct iio_dev *indio_dev)
+{
+	struct device *dev = &indio_dev->dev;
+	struct iproc_adc_priv *adc_priv = iio_priv(indio_dev);
+
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_REGCTL1);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_REGCTL2);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_INTERRUPT_THRES);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_INTERRUPT_MASK);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_INTERRUPT_STATUS);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_CONTROLLER_STATUS);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_ANALOG_CONTROL);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_AUX_DATA);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_SOFT_BYPASS_CONTROL);
+	iproc_adc_dbg_reg(dev, adc_priv, IPROC_SOFT_BYPASS_DATA);
+}
+
+static irqreturn_t iproc_adc_interrupt_handler(int irq, void *data)
+{
+	u32 channel_intr_status;
+	u32 intr_status;
+	u32 intr_mask;
+	struct iio_dev *indio_dev = data;
+	struct iproc_adc_priv *adc_priv = iio_priv(indio_dev);
+
+	/*
+	 * This interrupt is shared with the touchscreen driver.
+	 * Make sure this interrupt is intended for us.
+	 * Handle only ADC channel specific interrupts.
+	 */
+	regmap_read(adc_priv->regmap, IPROC_INTERRUPT_STATUS, &intr_status);
+	regmap_read(adc_priv->regmap, IPROC_INTERRUPT_MASK, &intr_mask);
+	intr_status = intr_status & intr_mask;
+	channel_intr_status = (intr_status & IPROC_ADC_INTR_MASK) >>
+				IPROC_ADC_INTR;
+	if (channel_intr_status)
+		return IRQ_WAKE_THREAD;
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t iproc_adc_interrupt_thread(int irq, void *data)
+{
+	irqreturn_t retval = IRQ_NONE;
+	struct iproc_adc_priv *adc_priv;
+	struct iio_dev *indio_dev = data;
+	unsigned int valid_entries;
+	u32 intr_status;
+	u32 intr_channels;
+	u32 channel_status;
+	u32 ch_intr_status;
+
+	adc_priv = iio_priv(indio_dev);
+
+	regmap_read(adc_priv->regmap, IPROC_INTERRUPT_STATUS, &intr_status);
+	dev_dbg(&indio_dev->dev, "iproc_adc_interrupt_thread(),INTRPT_STS:%x\n",
+			intr_status);
+
+	intr_channels = (intr_status & IPROC_ADC_INTR_MASK) >> IPROC_ADC_INTR;
+	if (intr_channels) {
+		regmap_read(adc_priv->regmap,
+			    IPROC_ADC_CHANNEL_INTERRUPT_STATUS +
+			    IPROC_ADC_CHANNEL_OFFSET * adc_priv->chan_id,
+			    &ch_intr_status);
+
+		if (ch_intr_status & IPROC_ADC_CHANNEL_WTRMRK_INTR_MASK) {
+			regmap_read(adc_priv->regmap,
+					IPROC_ADC_CHANNEL_STATUS +
+					IPROC_ADC_CHANNEL_OFFSET *
+					adc_priv->chan_id,
+					&channel_status);
+
+			valid_entries = ((channel_status &
+				IPROC_ADC_CHANNEL_VALID_ENTERIES_MASK) >>
+				IPROC_ADC_CHANNEL_VALID_ENTERIES);
+			if (valid_entries >= 1) {
+				regmap_read(adc_priv->regmap,
+					IPROC_ADC_CHANNEL_DATA +
+					IPROC_ADC_CHANNEL_OFFSET *
+					adc_priv->chan_id,
+					&adc_priv->chan_val);
+				complete(&adc_priv->completion);
+			} else {
+				dev_err(&indio_dev->dev,
+					"No data rcvd on channel %d\n",
+					adc_priv->chan_id);
+			}
+			regmap_write(adc_priv->regmap,
+					IPROC_ADC_CHANNEL_INTERRUPT_MASK +
+					IPROC_ADC_CHANNEL_OFFSET *
+					adc_priv->chan_id,
+					(ch_intr_status &
+					~(IPROC_ADC_CHANNEL_WTRMRK_INTR_MASK)));
+		}
+		regmap_write(adc_priv->regmap,
+				IPROC_ADC_CHANNEL_INTERRUPT_STATUS +
+				IPROC_ADC_CHANNEL_OFFSET * adc_priv->chan_id,
+				ch_intr_status);
+		regmap_write(adc_priv->regmap, IPROC_INTERRUPT_STATUS,
+				intr_channels);
+		retval = IRQ_HANDLED;
+	}
+
+	return retval;
+}
+
+static int iproc_adc_do_read(struct iio_dev *indio_dev,
+			   int channel,
+			   u16 *p_adc_data)
+{
+	int read_len = 0;
+	u32 val;
+	u32 mask;
+	u32 val_check;
+	int failed_cnt = 0;
+	struct iproc_adc_priv *adc_priv = iio_priv(indio_dev);
+
+	mutex_lock(&adc_priv->mutex);
+
+	/*
+	 * After a read is complete the ADC interrupts will be disabled so
+	 * we can assume this section of code is safe from interrupts.
+	 */
+	adc_priv->chan_val = -1;
+	adc_priv->chan_id = channel;
+
+	reinit_completion(&adc_priv->completion);
+	/* Clear any pending interrupt */
+	regmap_update_bits(adc_priv->regmap, IPROC_INTERRUPT_STATUS,
+			IPROC_ADC_INTR_MASK | IPROC_ADC_AUXDATA_RDY_INTR,
+			((0x0 << channel) << IPROC_ADC_INTR) |
+			IPROC_ADC_AUXDATA_RDY_INTR);
+
+	/* Configure channel for snapshot mode and enable  */
+	val = (BIT(IPROC_ADC_CHANNEL_ROUNDS) |
+		(IPROC_ADC_CHANNEL_MODE_SNAPSHOT << IPROC_ADC_CHANNEL_MODE) |
+		(0x1 << IPROC_ADC_CHANNEL_ENABLE));
+
+	mask = IPROC_ADC_CHANNEL_ROUNDS_MASK | IPROC_ADC_CHANNEL_MODE_MASK |
+		IPROC_ADC_CHANNEL_ENABLE_MASK;
+	regmap_update_bits(adc_priv->regmap, (IPROC_ADC_CHANNEL_REGCTL1 +
+				IPROC_ADC_CHANNEL_OFFSET * channel),
+				mask, val);
+
+	/* Set the Watermark for a channel */
+	regmap_update_bits(adc_priv->regmap, (IPROC_ADC_CHANNEL_REGCTL2 +
+					IPROC_ADC_CHANNEL_OFFSET * channel),
+					IPROC_ADC_CHANNEL_WATERMARK_MASK,
+					0x1);
+
+	/* Enable water mark interrupt */
+	regmap_update_bits(adc_priv->regmap, (IPROC_ADC_CHANNEL_INTERRUPT_MASK +
+					IPROC_ADC_CHANNEL_OFFSET *
+					channel),
+					IPROC_ADC_CHANNEL_WTRMRK_INTR_MASK,
+					IPROC_ADC_WATER_MARK_INTR_ENABLE);
+	regmap_read(adc_priv->regmap, IPROC_INTERRUPT_MASK, &val);
+
+	/* Enable ADC interrupt for a channel */
+	val |= (BIT(channel) << IPROC_ADC_INTR);
+	regmap_write(adc_priv->regmap, IPROC_INTERRUPT_MASK, val);
+
+	/*
+	 * There seems to be a very rare issue where writing to this register
+	 * does not take effect.  To work around the issue we will try multiple
+	 * writes.  In total we will spend about 10*10 = 100 us attempting this.
+	 * Testing has shown that this may loop a few time, but we have never
+	 * hit the full count.
+	 */
+	regmap_read(adc_priv->regmap, IPROC_INTERRUPT_MASK, &val_check);
+	while (val_check != val) {
+		failed_cnt++;
+
+		if (failed_cnt > IPROC_ADC_INTMASK_RETRY_ATTEMPTS)
+			break;
+
+		udelay(10);
+		regmap_update_bits(adc_priv->regmap, IPROC_INTERRUPT_MASK,
+				IPROC_ADC_INTR_MASK,
+				((0x1 << channel) <<
+				IPROC_ADC_INTR));
+
+		regmap_read(adc_priv->regmap, IPROC_INTERRUPT_MASK, &val_check);
+	}
+
+	if (failed_cnt) {
+		dev_dbg(&indio_dev->dev,
+			"IntMask failed (%d times)", failed_cnt);
+		if (failed_cnt > IPROC_ADC_INTMASK_RETRY_ATTEMPTS) {
+			dev_err(&indio_dev->dev,
+				"IntMask set failed. Read will likely fail.");
+			read_len = -EIO;
+			goto adc_err;
+		};
+	}
+	regmap_read(adc_priv->regmap, IPROC_INTERRUPT_MASK, &val_check);
+
+	if (wait_for_completion_timeout(&adc_priv->completion,
+		IPROC_ADC_READ_TIMEOUT) > 0) {
+
+		/* Only the lower 16 bits are relevant */
+		*p_adc_data = adc_priv->chan_val & 0xFFFF;
+		read_len = sizeof(*p_adc_data);
+
+	} else {
+		/*
+		 * We never got the interrupt, something went wrong.
+		 * Perhaps the interrupt may still be coming, we do not want
+		 * that now.  Lets disable the ADC interrupt, and clear the
+		 * status to put it back in to normal state.
+		 */
+		read_len = -ETIMEDOUT;
+		goto adc_err;
+	}
+	mutex_unlock(&adc_priv->mutex);
+
+	return read_len;
+
+adc_err:
+	regmap_update_bits(adc_priv->regmap, IPROC_INTERRUPT_MASK,
+			   IPROC_ADC_INTR_MASK,
+			   ((0x0 << channel) << IPROC_ADC_INTR));
+
+	regmap_update_bits(adc_priv->regmap, IPROC_INTERRUPT_STATUS,
+			   IPROC_ADC_INTR_MASK,
+			   ((0x0 << channel) << IPROC_ADC_INTR));
+
+	dev_err(&indio_dev->dev, "Timed out waiting for ADC data!\n");
+	iproc_adc_reg_dump(indio_dev);
+	mutex_unlock(&adc_priv->mutex);
+
+	return read_len;
+}
+
+static int iproc_adc_enable(struct iio_dev *indio_dev)
+{
+	u32 val;
+	u32 channel_id;
+	struct iproc_adc_priv *adc_priv = iio_priv(indio_dev);
+	int ret;
+
+	/* Set i_amux = 3b'000, select channel 0 */
+	ret = regmap_update_bits(adc_priv->regmap, IPROC_ANALOG_CONTROL,
+				IPROC_ADC_CHANNEL_SEL_MASK, 0);
+	if (ret) {
+		dev_err(&indio_dev->dev,
+			"failed to write IPROC_ANALOG_CONTROL %d\n", ret);
+		return ret;
+	}
+	adc_priv->chan_val = -1;
+
+	/*
+	 * PWR up LDO, ADC, and Band Gap (0 to enable)
+	 * Also enable ADC controller (set high)
+	 */
+	ret = regmap_read(adc_priv->regmap, IPROC_REGCTL2, &val);
+	if (ret) {
+		dev_err(&indio_dev->dev,
+			"failed to read IPROC_REGCTL2 %d\n", ret);
+		return ret;
+	}
+
+	val &= ~(IPROC_ADC_PWR_LDO | IPROC_ADC_PWR_ADC | IPROC_ADC_PWR_BG);
+
+	ret = regmap_write(adc_priv->regmap, IPROC_REGCTL2, val);
+	if (ret) {
+		dev_err(&indio_dev->dev,
+			"failed to write IPROC_REGCTL2 %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(adc_priv->regmap, IPROC_REGCTL2, &val);
+	if (ret) {
+		dev_err(&indio_dev->dev,
+			"failed to read IPROC_REGCTL2 %d\n", ret);
+		return ret;
+	}
+
+	val |= IPROC_ADC_CONTROLLER_EN;
+	ret = regmap_write(adc_priv->regmap, IPROC_REGCTL2, val);
+	if (ret) {
+		dev_err(&indio_dev->dev,
+			"failed to write IPROC_REGCTL2 %d\n", ret);
+		return ret;
+	}
+
+	for (channel_id = 0; channel_id < indio_dev->num_channels;
+		channel_id++) {
+		ret = regmap_write(adc_priv->regmap,
+				IPROC_ADC_CHANNEL_INTERRUPT_MASK +
+				IPROC_ADC_CHANNEL_OFFSET * channel_id, 0);
+		if (ret) {
+			dev_err(&indio_dev->dev,
+			    "failed to write ADC_CHANNEL_INTERRUPT_MASK %d\n",
+			    ret);
+			return ret;
+		}
+
+		ret = regmap_write(adc_priv->regmap,
+				IPROC_ADC_CHANNEL_INTERRUPT_STATUS +
+				IPROC_ADC_CHANNEL_OFFSET * channel_id, 0);
+		if (ret) {
+			dev_err(&indio_dev->dev,
+			    "failed to write ADC_CHANNEL_INTERRUPT_STATUS %d\n",
+			    ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void iproc_adc_disable(struct iio_dev *indio_dev)
+{
+	u32 val;
+	int ret;
+	struct iproc_adc_priv *adc_priv = iio_priv(indio_dev);
+
+	ret = regmap_read(adc_priv->regmap, IPROC_REGCTL2, &val);
+	if (ret) {
+		dev_err(&indio_dev->dev,
+			"failed to read IPROC_REGCTL2 %d\n", ret);
+		return;
+	}
+
+	val &= ~IPROC_ADC_CONTROLLER_EN;
+	ret = regmap_write(adc_priv->regmap, IPROC_REGCTL2, val);
+	if (ret) {
+		dev_err(&indio_dev->dev,
+			"failed to write IPROC_REGCTL2 %d\n", ret);
+		return;
+	}
+}
+
+static int iproc_adc_read_raw(struct iio_dev *indio_dev,
+			  struct iio_chan_spec const *chan,
+			  int *val,
+			  int *val2,
+			  long mask)
+{
+	u16 adc_data;
+	int err;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		err =  iproc_adc_do_read(indio_dev, chan->channel, &adc_data);
+		if (err < 0)
+			return err;
+		*val = adc_data;
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		switch (chan->type) {
+		case IIO_VOLTAGE:
+			*val = 1800;
+			*val2 = 10;
+			return IIO_VAL_FRACTIONAL_LOG2;
+		default:
+			return -EINVAL;
+		}
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct iio_info iproc_adc_iio_info = {
+	.read_raw = &iproc_adc_read_raw,
+	.driver_module = THIS_MODULE,
+};
+
+#define IPROC_ADC_CHANNEL(_index, _id) {                \
+	.type = IIO_VOLTAGE,                            \
+	.indexed = 1,                                   \
+	.channel = _index,                              \
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),   \
+	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \
+	.datasheet_name = _id,                          \
+}
+
+static const struct iio_chan_spec iproc_adc_iio_channels[] = {
+	IPROC_ADC_CHANNEL(0, "adc0"),
+	IPROC_ADC_CHANNEL(1, "adc1"),
+	IPROC_ADC_CHANNEL(2, "adc2"),
+	IPROC_ADC_CHANNEL(3, "adc3"),
+	IPROC_ADC_CHANNEL(4, "adc4"),
+	IPROC_ADC_CHANNEL(5, "adc5"),
+	IPROC_ADC_CHANNEL(6, "adc6"),
+	IPROC_ADC_CHANNEL(7, "adc7"),
+};
+
+static int iproc_adc_probe(struct platform_device *pdev)
+{
+	struct iproc_adc_priv *adc_priv;
+	struct iio_dev *indio_dev = NULL;
+	int ret;
+
+	indio_dev = devm_iio_device_alloc(&pdev->dev,
+					sizeof(*adc_priv));
+	if (!indio_dev) {
+		dev_err(&pdev->dev, "failed to allocate iio device\n");
+		return -ENOMEM;
+	}
+
+	adc_priv = iio_priv(indio_dev);
+	platform_set_drvdata(pdev, indio_dev);
+
+	mutex_init(&adc_priv->mutex);
+
+	init_completion(&adc_priv->completion);
+
+	adc_priv->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+			   "adc-syscon");
+	if (IS_ERR(adc_priv->regmap)) {
+		dev_err(&pdev->dev, "failed to get handle for tsc syscon\n");
+		ret = PTR_ERR(adc_priv->regmap);
+		return ret;
+	}
+
+	adc_priv->adc_clk = devm_clk_get(&pdev->dev, "tsc_clk");
+	if (IS_ERR(adc_priv->adc_clk)) {
+		dev_err(&pdev->dev,
+			"failed getting clock tsc_clk\n");
+		ret = PTR_ERR(adc_priv->adc_clk);
+		return ret;
+	}
+
+	adc_priv->irqno = platform_get_irq(pdev, 0);
+	if (adc_priv->irqno <= 0) {
+		dev_err(&pdev->dev, "platform_get_irq failed\n");
+		ret = -ENODEV;
+		return ret;
+	}
+
+	ret = regmap_update_bits(adc_priv->regmap, IPROC_REGCTL2,
+				IPROC_ADC_AUXIN_SCAN_ENA, 0);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to write IPROC_REGCTL2 %d\n", ret);
+		return ret;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, adc_priv->irqno,
+				iproc_adc_interrupt_thread,
+				iproc_adc_interrupt_handler,
+				IRQF_SHARED, "iproc-adc", indio_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "request_irq error %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(adc_priv->adc_clk);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"clk_prepare_enable failed %d\n", ret);
+		return ret;
+	}
+
+	ret = iproc_adc_enable(indio_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable adc %d\n", ret);
+		goto err_adc_enable;
+	}
+
+	indio_dev->name = "iproc-static-adc";
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->dev.of_node = pdev->dev.of_node;
+	indio_dev->info = &iproc_adc_iio_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = iproc_adc_iio_channels;
+	indio_dev->num_channels = ARRAY_SIZE(iproc_adc_iio_channels);
+
+	ret = iio_device_register(indio_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "iio_device_register failed:err %d\n", ret);
+		goto err_clk;
+	}
+
+	return 0;
+
+err_clk:
+	iproc_adc_disable(indio_dev);
+err_adc_enable:
+	clk_disable_unprepare(adc_priv->adc_clk);
+
+	return ret;
+}
+
+static int iproc_adc_remove(struct platform_device *pdev)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+	struct iproc_adc_priv *adc_priv = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+	iproc_adc_disable(indio_dev);
+	clk_disable_unprepare(adc_priv->adc_clk);
+
+	return 0;
+}
+
+static const struct of_device_id iproc_adc_of_match[] = {
+	{.compatible = "brcm,iproc-static-adc", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, iproc_adc_of_match);
+
+static struct platform_driver iproc_adc_driver = {
+	.probe  = iproc_adc_probe,
+	.remove	= iproc_adc_remove,
+	.driver	= {
+		.name	= "iproc-static-adc",
+		.of_match_table = of_match_ptr(iproc_adc_of_match),
+	},
+};
+module_platform_driver(iproc_adc_driver);
+
+MODULE_DESCRIPTION("Broadcom iProc ADC controller driver");
+MODULE_AUTHOR("Raveendra Padasalagi <raveendra.padasalagi@broadcom.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c
index 8254f529b2a9..91636c0ba5b5 100644
--- a/drivers/iio/adc/cc10001_adc.c
+++ b/drivers/iio/adc/cc10001_adc.c
@@ -186,7 +186,7 @@ done:
 
 	if (!sample_invalid)
 		iio_push_to_buffers_with_timestamp(indio_dev, data,
-						   iio_get_time_ns());
+						   iio_get_time_ns(indio_dev));
 	iio_trigger_notify_done(indio_dev->trig);
 
 	return IRQ_HANDLED;
diff --git a/drivers/iio/adc/hi8435.c b/drivers/iio/adc/hi8435.c
index c73c6c62a6ac..678e8c7ea763 100644
--- a/drivers/iio/adc/hi8435.c
+++ b/drivers/iio/adc/hi8435.c
@@ -400,7 +400,7 @@ static void hi8435_iio_push_event(struct iio_dev *idev, unsigned int val)
 			iio_push_event(idev,
 				       IIO_UNMOD_EVENT_CODE(IIO_VOLTAGE, i,
 						    IIO_EV_TYPE_THRESH, dir),
-				       iio_get_time_ns());
+				       iio_get_time_ns(idev));
 		}
 	}
 
@@ -455,6 +455,7 @@ static int hi8435_probe(struct spi_device *spi)
 	mutex_init(&priv->lock);
 
 	idev->dev.parent	= &spi->dev;
+	idev->dev.of_node	= spi->dev.of_node;
 	idev->name		= spi_get_device_id(spi)->name;
 	idev->modes		= INDIO_DIRECT_MODE;
 	idev->info		= &hi8435_info;
diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c
index 502f2fbe8aef..955f3fdaf519 100644
--- a/drivers/iio/adc/ina2xx-adc.c
+++ b/drivers/iio/adc/ina2xx-adc.c
@@ -465,7 +465,7 @@ static int ina2xx_work_buffer(struct iio_dev *indio_dev)
 	s64 time_a, time_b;
 	unsigned int alert;
 
-	time_a = iio_get_time_ns();
+	time_a = iio_get_time_ns(indio_dev);
 
 	/*
 	 * Because the timer thread and the chip conversion clock
@@ -504,7 +504,7 @@ static int ina2xx_work_buffer(struct iio_dev *indio_dev)
 		data[i++] = val;
 	}
 
-	time_b = iio_get_time_ns();
+	time_b = iio_get_time_ns(indio_dev);
 
 	iio_push_to_buffers_with_timestamp(indio_dev,
 					   (unsigned int *)data, time_a);
@@ -554,7 +554,7 @@ static int ina2xx_buffer_enable(struct iio_dev *indio_dev)
 	dev_dbg(&indio_dev->dev, "Async readout mode: %d\n",
 		chip->allow_async_readout);
 
-	chip->prev_ns = iio_get_time_ns();
+	chip->prev_ns = iio_get_time_ns(indio_dev);
 
 	chip->task = kthread_run(ina2xx_capture_thread, (void *)indio_dev,
 				 "%s:%d-%uus", indio_dev->name, indio_dev->id,
@@ -691,6 +691,7 @@ static int ina2xx_probe(struct i2c_client *client,
 
 	indio_dev->modes = INDIO_DIRECT_MODE | INDIO_BUFFER_SOFTWARE;
 	indio_dev->dev.parent = &client->dev;
+	indio_dev->dev.of_node = client->dev.of_node;
 	indio_dev->channels = ina2xx_channels;
 	indio_dev->num_channels = ARRAY_SIZE(ina2xx_channels);
 	indio_dev->name = id->name;
diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c
index 41d495c6035e..712fbd2b1f16 100644
--- a/drivers/iio/adc/max1027.c
+++ b/drivers/iio/adc/max1027.c
@@ -426,6 +426,7 @@ static int max1027_probe(struct spi_device *spi)
 
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->info = &max1027_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = st->info->channels;
diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c
index 998dc3caad4c..841a13c9b6ea 100644
--- a/drivers/iio/adc/max1363.c
+++ b/drivers/iio/adc/max1363.c
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
@@ -788,7 +790,7 @@ static irqreturn_t max1363_event_handler(int irq, void *private)
 {
 	struct iio_dev *indio_dev = private;
 	struct max1363_state *st = iio_priv(indio_dev);
-	s64 timestamp = iio_get_time_ns();
+	s64 timestamp = iio_get_time_ns(indio_dev);
 	unsigned long mask, loc;
 	u8 rx;
 	u8 tx[2] = { st->setupbyte,
@@ -1506,7 +1508,8 @@ static irqreturn_t max1363_trigger_handler(int irq, void *p)
 	if (b_sent < 0)
 		goto done_free;
 
-	iio_push_to_buffers_with_timestamp(indio_dev, rxbuf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, rxbuf,
+					   iio_get_time_ns(indio_dev));
 
 done_free:
 	kfree(rxbuf);
@@ -1516,6 +1519,56 @@ done:
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_OF
+
+#define MAX1363_COMPATIBLE(of_compatible, cfg) {		\
+			.compatible = of_compatible,		\
+			.data = &max1363_chip_info_tbl[cfg],	\
+}
+
+static const struct of_device_id max1363_of_match[] = {
+	MAX1363_COMPATIBLE("maxim,max1361", max1361),
+	MAX1363_COMPATIBLE("maxim,max1362", max1362),
+	MAX1363_COMPATIBLE("maxim,max1363", max1363),
+	MAX1363_COMPATIBLE("maxim,max1364", max1364),
+	MAX1363_COMPATIBLE("maxim,max1036", max1036),
+	MAX1363_COMPATIBLE("maxim,max1037", max1037),
+	MAX1363_COMPATIBLE("maxim,max1038", max1038),
+	MAX1363_COMPATIBLE("maxim,max1039", max1039),
+	MAX1363_COMPATIBLE("maxim,max1136", max1136),
+	MAX1363_COMPATIBLE("maxim,max1137", max1137),
+	MAX1363_COMPATIBLE("maxim,max1138", max1138),
+	MAX1363_COMPATIBLE("maxim,max1139", max1139),
+	MAX1363_COMPATIBLE("maxim,max1236", max1236),
+	MAX1363_COMPATIBLE("maxim,max1237", max1237),
+	MAX1363_COMPATIBLE("maxim,max1238", max1238),
+	MAX1363_COMPATIBLE("maxim,max1239", max1239),
+	MAX1363_COMPATIBLE("maxim,max11600", max11600),
+	MAX1363_COMPATIBLE("maxim,max11601", max11601),
+	MAX1363_COMPATIBLE("maxim,max11602", max11602),
+	MAX1363_COMPATIBLE("maxim,max11603", max11603),
+	MAX1363_COMPATIBLE("maxim,max11604", max11604),
+	MAX1363_COMPATIBLE("maxim,max11605", max11605),
+	MAX1363_COMPATIBLE("maxim,max11606", max11606),
+	MAX1363_COMPATIBLE("maxim,max11607", max11607),
+	MAX1363_COMPATIBLE("maxim,max11608", max11608),
+	MAX1363_COMPATIBLE("maxim,max11609", max11609),
+	MAX1363_COMPATIBLE("maxim,max11610", max11610),
+	MAX1363_COMPATIBLE("maxim,max11611", max11611),
+	MAX1363_COMPATIBLE("maxim,max11612", max11612),
+	MAX1363_COMPATIBLE("maxim,max11613", max11613),
+	MAX1363_COMPATIBLE("maxim,max11614", max11614),
+	MAX1363_COMPATIBLE("maxim,max11615", max11615),
+	MAX1363_COMPATIBLE("maxim,max11616", max11616),
+	MAX1363_COMPATIBLE("maxim,max11617", max11617),
+	MAX1363_COMPATIBLE("maxim,max11644", max11644),
+	MAX1363_COMPATIBLE("maxim,max11645", max11645),
+	MAX1363_COMPATIBLE("maxim,max11646", max11646),
+	MAX1363_COMPATIBLE("maxim,max11647", max11647),
+	{ /* sentinel */ }
+};
+#endif
+
 static int max1363_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -1523,6 +1576,7 @@ static int max1363_probe(struct i2c_client *client,
 	struct max1363_state *st;
 	struct iio_dev *indio_dev;
 	struct regulator *vref;
+	const struct of_device_id *match;
 
 	indio_dev = devm_iio_device_alloc(&client->dev,
 					  sizeof(struct max1363_state));
@@ -1549,7 +1603,12 @@ static int max1363_probe(struct i2c_client *client,
 	/* this is only used for device removal purposes */
 	i2c_set_clientdata(client, indio_dev);
 
-	st->chip_info = &max1363_chip_info_tbl[id->driver_data];
+	match = of_match_device(of_match_ptr(max1363_of_match),
+				&client->dev);
+	if (match)
+		st->chip_info = of_device_get_match_data(&client->dev);
+	else
+		st->chip_info = &max1363_chip_info_tbl[id->driver_data];
 	st->client = client;
 
 	st->vref_uv = st->chip_info->int_vref_mv * 1000;
@@ -1587,6 +1646,7 @@ static int max1363_probe(struct i2c_client *client,
 
 	/* Establish that the iio_dev is a child of the i2c device */
 	indio_dev->dev.parent = &client->dev;
+	indio_dev->dev.of_node = client->dev.of_node;
 	indio_dev->name = id->name;
 	indio_dev->channels = st->chip_info->channels;
 	indio_dev->num_channels = st->chip_info->num_channels;
@@ -1692,6 +1752,7 @@ MODULE_DEVICE_TABLE(i2c, max1363_id);
 static struct i2c_driver max1363_driver = {
 	.driver = {
 		.name = "max1363",
+		.of_match_table = of_match_ptr(max1363_of_match),
 	},
 	.probe = max1363_probe,
 	.remove = max1363_remove,
diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c
index a850ca7d1eda..634717ae12f3 100644
--- a/drivers/iio/adc/mcp320x.c
+++ b/drivers/iio/adc/mcp320x.c
@@ -308,6 +308,7 @@ static int mcp320x_probe(struct spi_device *spi)
 	adc->spi = spi;
 
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &mcp320x_info;
diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c
index d1172dc1e8e2..254135e07792 100644
--- a/drivers/iio/adc/mcp3422.c
+++ b/drivers/iio/adc/mcp3422.c
@@ -352,6 +352,7 @@ static int mcp3422_probe(struct i2c_client *client,
 	mutex_init(&adc->lock);
 
 	indio_dev->dev.parent = &client->dev;
+	indio_dev->dev.of_node = client->dev.of_node;
 	indio_dev->name = dev_name(&client->dev);
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &mcp3422_info;
diff --git a/drivers/iio/adc/mxs-lradc.c b/drivers/iio/adc/mxs-lradc.c
index ad26da1edbee..b84d37c80a94 100644
--- a/drivers/iio/adc/mxs-lradc.c
+++ b/drivers/iio/adc/mxs-lradc.c
@@ -373,13 +373,6 @@ static u32 mxs_lradc_plate_mask(struct mxs_lradc *lradc)
 	return LRADC_CTRL0_MX28_PLATE_MASK;
 }
 
-static u32 mxs_lradc_irq_en_mask(struct mxs_lradc *lradc)
-{
-	if (lradc->soc == IMX23_LRADC)
-		return LRADC_CTRL1_MX23_LRADC_IRQ_EN_MASK;
-	return LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK;
-}
-
 static u32 mxs_lradc_irq_mask(struct mxs_lradc *lradc)
 {
 	if (lradc->soc == IMX23_LRADC)
@@ -1120,18 +1113,16 @@ static int mxs_lradc_ts_register(struct mxs_lradc *lradc)
 {
 	struct input_dev *input;
 	struct device *dev = lradc->dev;
-	int ret;
 
 	if (!lradc->use_touchscreen)
 		return 0;
 
-	input = input_allocate_device();
+	input = devm_input_allocate_device(dev);
 	if (!input)
 		return -ENOMEM;
 
 	input->name = DRIVER_NAME;
 	input->id.bustype = BUS_HOST;
-	input->dev.parent = dev;
 	input->open = mxs_lradc_ts_open;
 	input->close = mxs_lradc_ts_close;
 
@@ -1146,20 +1137,8 @@ static int mxs_lradc_ts_register(struct mxs_lradc *lradc)
 
 	lradc->ts_input = input;
 	input_set_drvdata(input, lradc);
-	ret = input_register_device(input);
-	if (ret)
-		input_free_device(lradc->ts_input);
-
-	return ret;
-}
-
-static void mxs_lradc_ts_unregister(struct mxs_lradc *lradc)
-{
-	if (!lradc->use_touchscreen)
-		return;
 
-	mxs_lradc_disable_ts(lradc);
-	input_unregister_device(lradc->ts_input);
+	return input_register_device(input);
 }
 
 /*
@@ -1510,7 +1489,9 @@ static void mxs_lradc_hw_stop(struct mxs_lradc *lradc)
 {
 	int i;
 
-	mxs_lradc_reg_clear(lradc, mxs_lradc_irq_en_mask(lradc), LRADC_CTRL1);
+	mxs_lradc_reg_clear(lradc,
+		lradc->buffer_vchans << LRADC_CTRL1_LRADC_IRQ_EN_OFFSET,
+		LRADC_CTRL1);
 
 	for (i = 0; i < LRADC_MAX_DELAY_CHANS; i++)
 		mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(i));
@@ -1721,13 +1702,11 @@ static int mxs_lradc_probe(struct platform_device *pdev)
 	ret = iio_device_register(iio);
 	if (ret) {
 		dev_err(dev, "Failed to register IIO device\n");
-		goto err_ts;
+		return ret;
 	}
 
 	return 0;
 
-err_ts:
-	mxs_lradc_ts_unregister(lradc);
 err_ts_register:
 	mxs_lradc_hw_stop(lradc);
 err_dev:
@@ -1745,7 +1724,6 @@ static int mxs_lradc_remove(struct platform_device *pdev)
 	struct mxs_lradc *lradc = iio_priv(iio);
 
 	iio_device_unregister(iio);
-	mxs_lradc_ts_unregister(lradc);
 	mxs_lradc_hw_stop(lradc);
 	mxs_lradc_trigger_remove(iio);
 	iio_triggered_buffer_cleanup(iio);
diff --git a/drivers/iio/adc/nau7802.c b/drivers/iio/adc/nau7802.c
index e525aa6475c4..db9b829ccf0d 100644
--- a/drivers/iio/adc/nau7802.c
+++ b/drivers/iio/adc/nau7802.c
@@ -79,10 +79,29 @@ static const struct iio_chan_spec nau7802_chan_array[] = {
 static const u16 nau7802_sample_freq_avail[] = {10, 20, 40, 80,
 						10, 10, 10, 320};
 
+static ssize_t nau7802_show_scales(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct nau7802_state *st = iio_priv(dev_to_iio_dev(dev));
+	int i, len = 0;
+
+	for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++)
+		len += scnprintf(buf + len, PAGE_SIZE - len, "0.%09d ",
+				 st->scale_avail[i]);
+
+	buf[len-1] = '\n';
+
+	return len;
+}
+
 static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("10 40 80 320");
 
+static IIO_DEVICE_ATTR(in_voltage_scale_available, S_IRUGO, nau7802_show_scales,
+		       NULL, 0);
+
 static struct attribute *nau7802_attributes[] = {
 	&iio_const_attr_sampling_frequency_available.dev_attr.attr,
+	&iio_dev_attr_in_voltage_scale_available.dev_attr.attr,
 	NULL
 };
 
@@ -414,6 +433,7 @@ static int nau7802_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, indio_dev);
 
 	indio_dev->dev.parent = &client->dev;
+	indio_dev->dev.of_node = client->dev.of_node;
 	indio_dev->name = dev_name(&client->dev);
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &nau7802_info;
diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c
index 9fd032d9f402..319172cf7da8 100644
--- a/drivers/iio/adc/ti-adc081c.c
+++ b/drivers/iio/adc/ti-adc081c.c
@@ -22,6 +22,7 @@
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/acpi.h>
 
 #include <linux/iio/iio.h>
 #include <linux/iio/buffer.h>
@@ -138,7 +139,8 @@ static irqreturn_t adc081c_trigger_handler(int irq, void *p)
 	if (ret < 0)
 		goto out;
 	buf[0] = ret;
-	iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buf,
+					   iio_get_time_ns(indio_dev));
 out:
 	iio_trigger_notify_done(indio_dev->trig);
 	return IRQ_HANDLED;
@@ -149,12 +151,24 @@ static int adc081c_probe(struct i2c_client *client,
 {
 	struct iio_dev *iio;
 	struct adc081c *adc;
-	struct adcxx1c_model *model = &adcxx1c_models[id->driver_data];
+	struct adcxx1c_model *model;
 	int err;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA))
 		return -EOPNOTSUPP;
 
+	if (ACPI_COMPANION(&client->dev)) {
+		const struct acpi_device_id *ad_id;
+
+		ad_id = acpi_match_device(client->dev.driver->acpi_match_table,
+					  &client->dev);
+		if (!ad_id)
+			return -ENODEV;
+		model = &adcxx1c_models[ad_id->driver_data];
+	} else {
+		model = &adcxx1c_models[id->driver_data];
+	}
+
 	iio = devm_iio_device_alloc(&client->dev, sizeof(*adc));
 	if (!iio)
 		return -ENOMEM;
@@ -172,6 +186,7 @@ static int adc081c_probe(struct i2c_client *client,
 		return err;
 
 	iio->dev.parent = &client->dev;
+	iio->dev.of_node = client->dev.of_node;
 	iio->name = dev_name(&client->dev);
 	iio->modes = INDIO_DIRECT_MODE;
 	iio->info = &adc081c_info;
@@ -231,10 +246,21 @@ static const struct of_device_id adc081c_of_match[] = {
 MODULE_DEVICE_TABLE(of, adc081c_of_match);
 #endif
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id adc081c_acpi_match[] = {
+	{ "ADC081C", ADC081C },
+	{ "ADC101C", ADC101C },
+	{ "ADC121C", ADC121C },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, adc081c_acpi_match);
+#endif
+
 static struct i2c_driver adc081c_driver = {
 	.driver = {
 		.name = "adc081c",
 		.of_match_table = of_match_ptr(adc081c_of_match),
+		.acpi_match_table = ACPI_PTR(adc081c_acpi_match),
 	},
 	.probe = adc081c_probe,
 	.remove = adc081c_remove,
diff --git a/drivers/iio/adc/ti-adc0832.c b/drivers/iio/adc/ti-adc0832.c
index 0afeac0c9bad..f4ba23effe9a 100644
--- a/drivers/iio/adc/ti-adc0832.c
+++ b/drivers/iio/adc/ti-adc0832.c
@@ -194,6 +194,7 @@ static int adc0832_probe(struct spi_device *spi)
 
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->info = &adc0832_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
diff --git a/drivers/iio/adc/ti-adc128s052.c b/drivers/iio/adc/ti-adc128s052.c
index bc58867d6e8d..89dfbd31be5c 100644
--- a/drivers/iio/adc/ti-adc128s052.c
+++ b/drivers/iio/adc/ti-adc128s052.c
@@ -150,6 +150,7 @@ static int adc128_probe(struct spi_device *spi)
 	spi_set_drvdata(spi, indio_dev);
 
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &adc128_info;
diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c
index 73cbf0b54e54..1ef398770a1f 100644
--- a/drivers/iio/adc/ti-ads1015.c
+++ b/drivers/iio/adc/ti-ads1015.c
@@ -55,6 +55,11 @@
 #define ADS1015_DEFAULT_DATA_RATE	4
 #define ADS1015_DEFAULT_CHAN		0
 
+enum {
+	ADS1015,
+	ADS1115,
+};
+
 enum ads1015_channels {
 	ADS1015_AIN0_AIN1 = 0,
 	ADS1015_AIN0_AIN3,
@@ -71,6 +76,10 @@ static const unsigned int ads1015_data_rate[] = {
 	128, 250, 490, 920, 1600, 2400, 3300, 3300
 };
 
+static const unsigned int ads1115_data_rate[] = {
+	8, 16, 32, 64, 128, 250, 475, 860
+};
+
 static const struct {
 	int scale;
 	int uscale;
@@ -101,6 +110,7 @@ static const struct {
 		.shift = 4,					\
 		.endianness = IIO_CPU,				\
 	},							\
+	.datasheet_name = "AIN"#_chan,				\
 }
 
 #define ADS1015_V_DIFF_CHAN(_chan, _chan2, _addr) {		\
@@ -121,6 +131,45 @@ static const struct {
 		.shift = 4,					\
 		.endianness = IIO_CPU,				\
 	},							\
+	.datasheet_name = "AIN"#_chan"-AIN"#_chan2,		\
+}
+
+#define ADS1115_V_CHAN(_chan, _addr) {				\
+	.type = IIO_VOLTAGE,					\
+	.indexed = 1,						\
+	.address = _addr,					\
+	.channel = _chan,					\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |		\
+				BIT(IIO_CHAN_INFO_SCALE) |	\
+				BIT(IIO_CHAN_INFO_SAMP_FREQ),	\
+	.scan_index = _addr,					\
+	.scan_type = {						\
+		.sign = 's',					\
+		.realbits = 16,					\
+		.storagebits = 16,				\
+		.endianness = IIO_CPU,				\
+	},							\
+	.datasheet_name = "AIN"#_chan,				\
+}
+
+#define ADS1115_V_DIFF_CHAN(_chan, _chan2, _addr) {		\
+	.type = IIO_VOLTAGE,					\
+	.differential = 1,					\
+	.indexed = 1,						\
+	.address = _addr,					\
+	.channel = _chan,					\
+	.channel2 = _chan2,					\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |		\
+				BIT(IIO_CHAN_INFO_SCALE) |	\
+				BIT(IIO_CHAN_INFO_SAMP_FREQ),	\
+	.scan_index = _addr,					\
+	.scan_type = {						\
+		.sign = 's',					\
+		.realbits = 16,					\
+		.storagebits = 16,				\
+		.endianness = IIO_CPU,				\
+	},							\
+	.datasheet_name = "AIN"#_chan"-AIN"#_chan2,		\
 }
 
 struct ads1015_data {
@@ -131,6 +180,8 @@ struct ads1015_data {
 	 */
 	struct mutex lock;
 	struct ads1015_channel_data channel_data[ADS1015_CHANNELS];
+
+	unsigned int *data_rate;
 };
 
 static bool ads1015_is_writeable_reg(struct device *dev, unsigned int reg)
@@ -157,6 +208,18 @@ static const struct iio_chan_spec ads1015_channels[] = {
 	IIO_CHAN_SOFT_TIMESTAMP(ADS1015_TIMESTAMP),
 };
 
+static const struct iio_chan_spec ads1115_channels[] = {
+	ADS1115_V_DIFF_CHAN(0, 1, ADS1015_AIN0_AIN1),
+	ADS1115_V_DIFF_CHAN(0, 3, ADS1015_AIN0_AIN3),
+	ADS1115_V_DIFF_CHAN(1, 3, ADS1015_AIN1_AIN3),
+	ADS1115_V_DIFF_CHAN(2, 3, ADS1015_AIN2_AIN3),
+	ADS1115_V_CHAN(0, ADS1015_AIN0),
+	ADS1115_V_CHAN(1, ADS1015_AIN1),
+	ADS1115_V_CHAN(2, ADS1015_AIN2),
+	ADS1115_V_CHAN(3, ADS1015_AIN3),
+	IIO_CHAN_SOFT_TIMESTAMP(ADS1015_TIMESTAMP),
+};
+
 static int ads1015_set_power_state(struct ads1015_data *data, bool on)
 {
 	int ret;
@@ -196,7 +259,7 @@ int ads1015_get_adc_result(struct ads1015_data *data, int chan, int *val)
 		return ret;
 
 	if (change) {
-		conv_time = DIV_ROUND_UP(USEC_PER_SEC, ads1015_data_rate[dr]);
+		conv_time = DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr]);
 		usleep_range(conv_time, conv_time + 1);
 	}
 
@@ -225,7 +288,8 @@ static irqreturn_t ads1015_trigger_handler(int irq, void *p)
 	buf[0] = res;
 	mutex_unlock(&data->lock);
 
-	iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buf,
+					   iio_get_time_ns(indio_dev));
 
 err:
 	iio_trigger_notify_done(indio_dev->trig);
@@ -263,7 +327,7 @@ static int ads1015_set_data_rate(struct ads1015_data *data, int chan, int rate)
 	int i, ret, rindex = -1;
 
 	for (i = 0; i < ARRAY_SIZE(ads1015_data_rate); i++)
-		if (ads1015_data_rate[i] == rate) {
+		if (data->data_rate[i] == rate) {
 			rindex = i;
 			break;
 		}
@@ -291,7 +355,9 @@ static int ads1015_read_raw(struct iio_dev *indio_dev,
 	mutex_lock(&indio_dev->mlock);
 	mutex_lock(&data->lock);
 	switch (mask) {
-	case IIO_CHAN_INFO_RAW:
+	case IIO_CHAN_INFO_RAW: {
+		int shift = chan->scan_type.shift;
+
 		if (iio_buffer_enabled(indio_dev)) {
 			ret = -EBUSY;
 			break;
@@ -307,8 +373,7 @@ static int ads1015_read_raw(struct iio_dev *indio_dev,
 			break;
 		}
 
-		/* 12 bit res, D0 is bit 4 in conversion register */
-		*val = sign_extend32(*val >> 4, 11);
+		*val = sign_extend32(*val >> shift, 15 - shift);
 
 		ret = ads1015_set_power_state(data, false);
 		if (ret < 0)
@@ -316,6 +381,7 @@ static int ads1015_read_raw(struct iio_dev *indio_dev,
 
 		ret = IIO_VAL_INT;
 		break;
+	}
 	case IIO_CHAN_INFO_SCALE:
 		idx = data->channel_data[chan->address].pga;
 		*val = ads1015_scale[idx].scale;
@@ -324,7 +390,7 @@ static int ads1015_read_raw(struct iio_dev *indio_dev,
 		break;
 	case IIO_CHAN_INFO_SAMP_FREQ:
 		idx = data->channel_data[chan->address].data_rate;
-		*val = ads1015_data_rate[idx];
+		*val = data->data_rate[idx];
 		ret = IIO_VAL_INT;
 		break;
 	default:
@@ -380,12 +446,15 @@ static const struct iio_buffer_setup_ops ads1015_buffer_setup_ops = {
 };
 
 static IIO_CONST_ATTR(scale_available, "3 2 1 0.5 0.25 0.125");
-static IIO_CONST_ATTR(sampling_frequency_available,
-		      "128 250 490 920 1600 2400 3300");
+
+static IIO_CONST_ATTR_NAMED(ads1015_sampling_frequency_available,
+	sampling_frequency_available, "128 250 490 920 1600 2400 3300");
+static IIO_CONST_ATTR_NAMED(ads1115_sampling_frequency_available,
+	sampling_frequency_available, "8 16 32 64 128 250 475 860");
 
 static struct attribute *ads1015_attributes[] = {
 	&iio_const_attr_scale_available.dev_attr.attr,
-	&iio_const_attr_sampling_frequency_available.dev_attr.attr,
+	&iio_const_attr_ads1015_sampling_frequency_available.dev_attr.attr,
 	NULL,
 };
 
@@ -393,11 +462,28 @@ static const struct attribute_group ads1015_attribute_group = {
 	.attrs = ads1015_attributes,
 };
 
-static const struct iio_info ads1015_info = {
+static struct attribute *ads1115_attributes[] = {
+	&iio_const_attr_scale_available.dev_attr.attr,
+	&iio_const_attr_ads1115_sampling_frequency_available.dev_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ads1115_attribute_group = {
+	.attrs = ads1115_attributes,
+};
+
+static struct iio_info ads1015_info = {
+	.driver_module	= THIS_MODULE,
+	.read_raw	= ads1015_read_raw,
+	.write_raw	= ads1015_write_raw,
+	.attrs          = &ads1015_attribute_group,
+};
+
+static struct iio_info ads1115_info = {
 	.driver_module	= THIS_MODULE,
 	.read_raw	= ads1015_read_raw,
 	.write_raw	= ads1015_write_raw,
-	.attrs		= &ads1015_attribute_group,
+	.attrs          = &ads1115_attribute_group,
 };
 
 #ifdef CONFIG_OF
@@ -500,12 +586,25 @@ static int ads1015_probe(struct i2c_client *client,
 	mutex_init(&data->lock);
 
 	indio_dev->dev.parent = &client->dev;
-	indio_dev->info = &ads1015_info;
+	indio_dev->dev.of_node = client->dev.of_node;
 	indio_dev->name = ADS1015_DRV_NAME;
-	indio_dev->channels = ads1015_channels;
-	indio_dev->num_channels = ARRAY_SIZE(ads1015_channels);
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
+	switch (id->driver_data) {
+	case ADS1015:
+		indio_dev->channels = ads1015_channels;
+		indio_dev->num_channels = ARRAY_SIZE(ads1015_channels);
+		indio_dev->info = &ads1015_info;
+		data->data_rate = (unsigned int *) &ads1015_data_rate;
+		break;
+	case ADS1115:
+		indio_dev->channels = ads1115_channels;
+		indio_dev->num_channels = ARRAY_SIZE(ads1115_channels);
+		indio_dev->info = &ads1115_info;
+		data->data_rate = (unsigned int *) &ads1115_data_rate;
+		break;
+	}
+
 	/* we need to keep this ABI the same as used by hwmon ADS1015 driver */
 	ads1015_get_channels_config(client);
 
@@ -590,7 +689,8 @@ static const struct dev_pm_ops ads1015_pm_ops = {
 };
 
 static const struct i2c_device_id ads1015_id[] = {
-	{"ads1015", 0},
+	{"ads1015", ADS1015},
+	{"ads1115", ADS1115},
 	{}
 };
 MODULE_DEVICE_TABLE(i2c, ads1015_id);
diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c
index 03e907028cb6..c400439900af 100644
--- a/drivers/iio/adc/ti-ads8688.c
+++ b/drivers/iio/adc/ti-ads8688.c
@@ -421,6 +421,7 @@ static int ads8688_probe(struct spi_device *spi)
 
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->dev.parent = &spi->dev;
+	indio_dev->dev.of_node = spi->dev.of_node;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = st->chip_info->channels;
 	indio_dev->num_channels = st->chip_info->num_channels;
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index c1e05532d437..8a368756881b 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -326,8 +326,7 @@ static int tiadc_channel_init(struct iio_dev *indio_dev, int channels)
 	int i;
 
 	indio_dev->num_channels = channels;
-	chan_array = kcalloc(channels,
-			sizeof(struct iio_chan_spec), GFP_KERNEL);
+	chan_array = kcalloc(channels, sizeof(*chan_array), GFP_KERNEL);
 	if (chan_array == NULL)
 		return -ENOMEM;
 
@@ -467,8 +466,7 @@ static int tiadc_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	indio_dev = devm_iio_device_alloc(&pdev->dev,
-					  sizeof(struct tiadc_device));
+	indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*indio_dev));
 	if (indio_dev == NULL) {
 		dev_err(&pdev->dev, "failed to allocate iio device\n");
 		return -ENOMEM;
@@ -531,8 +529,7 @@ static int tiadc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int tiadc_suspend(struct device *dev)
+static int __maybe_unused tiadc_suspend(struct device *dev)
 {
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
@@ -550,7 +547,7 @@ static int tiadc_suspend(struct device *dev)
 	return 0;
 }
 
-static int tiadc_resume(struct device *dev)
+static int __maybe_unused tiadc_resume(struct device *dev)
 {
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
@@ -567,14 +564,7 @@ static int tiadc_resume(struct device *dev)
 	return 0;
 }
 
-static const struct dev_pm_ops tiadc_pm_ops = {
-	.suspend = tiadc_suspend,
-	.resume = tiadc_resume,
-};
-#define TIADC_PM_OPS (&tiadc_pm_ops)
-#else
-#define TIADC_PM_OPS NULL
-#endif
+static SIMPLE_DEV_PM_OPS(tiadc_pm_ops, tiadc_suspend, tiadc_resume);
 
 static const struct of_device_id ti_adc_dt_ids[] = {
 	{ .compatible = "ti,am3359-adc", },
@@ -585,7 +575,7 @@ MODULE_DEVICE_TABLE(of, ti_adc_dt_ids);
 static struct platform_driver tiadc_driver = {
 	.driver = {
 		.name   = "TI-am335x-adc",
-		.pm	= TIADC_PM_OPS,
+		.pm	= &tiadc_pm_ops,
 		.of_match_table = ti_adc_dt_ids,
 	},
 	.probe	= tiadc_probe,
diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c
index 653bf1379d2e..228a003adeed 100644
--- a/drivers/iio/adc/vf610_adc.c
+++ b/drivers/iio/adc/vf610_adc.c
@@ -594,7 +594,8 @@ static irqreturn_t vf610_adc_isr(int irq, void *dev_id)
 		if (iio_buffer_enabled(indio_dev)) {
 			info->buffer[0] = info->value;
 			iio_push_to_buffers_with_timestamp(indio_dev,
-					info->buffer, iio_get_time_ns());
+					info->buffer,
+					iio_get_time_ns(indio_dev));
 			iio_trigger_notify_done(indio_dev->trig);
 		} else
 			complete(&info->completion);
diff --git a/drivers/iio/adc/xilinx-xadc-events.c b/drivers/iio/adc/xilinx-xadc-events.c
index edcf3aabd70d..6d5c2a6f4e6e 100644
--- a/drivers/iio/adc/xilinx-xadc-events.c
+++ b/drivers/iio/adc/xilinx-xadc-events.c
@@ -46,7 +46,7 @@ static void xadc_handle_event(struct iio_dev *indio_dev, unsigned int event)
 		iio_push_event(indio_dev,
 			IIO_UNMOD_EVENT_CODE(chan->type, chan->channel,
 				IIO_EV_TYPE_THRESH, IIO_EV_DIR_RISING),
-			iio_get_time_ns());
+			iio_get_time_ns(indio_dev));
 	} else {
 		/*
 		 * For other channels we don't know whether it is a upper or
@@ -56,7 +56,7 @@ static void xadc_handle_event(struct iio_dev *indio_dev, unsigned int event)
 		iio_push_event(indio_dev,
 			IIO_UNMOD_EVENT_CODE(chan->type, chan->channel,
 				IIO_EV_TYPE_THRESH, IIO_EV_DIR_EITHER),
-			iio_get_time_ns());
+			iio_get_time_ns(indio_dev));
 	}
 }
 
diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c b/drivers/iio/buffer/industrialio-buffer-dma.c
index 212cbedc7abb..dd99d273bae9 100644
--- a/drivers/iio/buffer/industrialio-buffer-dma.c
+++ b/drivers/iio/buffer/industrialio-buffer-dma.c
@@ -305,7 +305,7 @@ int iio_dma_buffer_request_update(struct iio_buffer *buffer)
 	queue->fileio.active_block = NULL;
 
 	spin_lock_irq(&queue->list_lock);
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < ARRAY_SIZE(queue->fileio.blocks); i++) {
 		block = queue->fileio.blocks[i];
 
 		/* If we can't re-use it free it */
@@ -323,7 +323,7 @@ int iio_dma_buffer_request_update(struct iio_buffer *buffer)
 
 	INIT_LIST_HEAD(&queue->incoming);
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < ARRAY_SIZE(queue->fileio.blocks); i++) {
 		if (queue->fileio.blocks[i]) {
 			block = queue->fileio.blocks[i];
 			if (block->state == IIO_BLOCK_STATE_DEAD) {
diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig
index f73290f84c90..4bcc025e8c8a 100644
--- a/drivers/iio/chemical/Kconfig
+++ b/drivers/iio/chemical/Kconfig
@@ -5,15 +5,17 @@
 menu "Chemical Sensors"
 
 config ATLAS_PH_SENSOR
-	tristate "Atlas Scientific OEM pH-SM sensor"
+	tristate "Atlas Scientific OEM SM sensors"
 	depends on I2C
 	select REGMAP_I2C
 	select IIO_BUFFER
 	select IIO_TRIGGERED_BUFFER
 	select IRQ_WORK
 	help
-	 Say Y here to build I2C interface support for the Atlas
-	 Scientific OEM pH-SM sensor.
+	 Say Y here to build I2C interface support for the following
+	 Atlas Scientific OEM SM sensors:
+	    * pH SM sensor
+	    * EC SM sensor
 
 	 To compile this driver as module, choose M here: the
 	 module will be called atlas-ph-sensor.
diff --git a/drivers/iio/chemical/atlas-ph-sensor.c b/drivers/iio/chemical/atlas-ph-sensor.c
index 62b37cd8fb56..ae038a59d256 100644
--- a/drivers/iio/chemical/atlas-ph-sensor.c
+++ b/drivers/iio/chemical/atlas-ph-sensor.c
@@ -24,6 +24,7 @@
 #include <linux/irq_work.h>
 #include <linux/gpio.h>
 #include <linux/i2c.h>
+#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/iio/iio.h>
 #include <linux/iio/buffer.h>
@@ -43,29 +44,50 @@
 
 #define ATLAS_REG_PWR_CONTROL		0x06
 
-#define ATLAS_REG_CALIB_STATUS		0x0d
-#define ATLAS_REG_CALIB_STATUS_MASK	0x07
-#define ATLAS_REG_CALIB_STATUS_LOW	BIT(0)
-#define ATLAS_REG_CALIB_STATUS_MID	BIT(1)
-#define ATLAS_REG_CALIB_STATUS_HIGH	BIT(2)
+#define ATLAS_REG_PH_CALIB_STATUS	0x0d
+#define ATLAS_REG_PH_CALIB_STATUS_MASK	0x07
+#define ATLAS_REG_PH_CALIB_STATUS_LOW	BIT(0)
+#define ATLAS_REG_PH_CALIB_STATUS_MID	BIT(1)
+#define ATLAS_REG_PH_CALIB_STATUS_HIGH	BIT(2)
 
-#define ATLAS_REG_TEMP_DATA		0x0e
+#define ATLAS_REG_EC_CALIB_STATUS		0x0f
+#define ATLAS_REG_EC_CALIB_STATUS_MASK		0x0f
+#define ATLAS_REG_EC_CALIB_STATUS_DRY		BIT(0)
+#define ATLAS_REG_EC_CALIB_STATUS_SINGLE	BIT(1)
+#define ATLAS_REG_EC_CALIB_STATUS_LOW		BIT(2)
+#define ATLAS_REG_EC_CALIB_STATUS_HIGH		BIT(3)
+
+#define ATLAS_REG_PH_TEMP_DATA		0x0e
 #define ATLAS_REG_PH_DATA		0x16
 
+#define ATLAS_REG_EC_PROBE		0x08
+#define ATLAS_REG_EC_TEMP_DATA		0x10
+#define ATLAS_REG_EC_DATA		0x18
+#define ATLAS_REG_TDS_DATA		0x1c
+#define ATLAS_REG_PSS_DATA		0x20
+
 #define ATLAS_PH_INT_TIME_IN_US		450000
+#define ATLAS_EC_INT_TIME_IN_US		650000
+
+enum {
+	ATLAS_PH_SM,
+	ATLAS_EC_SM,
+};
 
 struct atlas_data {
 	struct i2c_client *client;
 	struct iio_trigger *trig;
+	struct atlas_device *chip;
 	struct regmap *regmap;
 	struct irq_work work;
 
-	__be32 buffer[4]; /* 32-bit pH data + 32-bit pad + 64-bit timestamp */
+	__be32 buffer[6]; /* 96-bit data + 32-bit pad + 64-bit timestamp */
 };
 
 static const struct regmap_range atlas_volatile_ranges[] = {
 	regmap_reg_range(ATLAS_REG_INT_CONTROL, ATLAS_REG_INT_CONTROL),
 	regmap_reg_range(ATLAS_REG_PH_DATA, ATLAS_REG_PH_DATA + 4),
+	regmap_reg_range(ATLAS_REG_EC_DATA, ATLAS_REG_PSS_DATA + 4),
 };
 
 static const struct regmap_access_table atlas_volatile_table = {
@@ -80,13 +102,14 @@ static const struct regmap_config atlas_regmap_config = {
 	.val_bits = 8,
 
 	.volatile_table = &atlas_volatile_table,
-	.max_register = ATLAS_REG_PH_DATA + 4,
+	.max_register = ATLAS_REG_PSS_DATA + 4,
 	.cache_type = REGCACHE_RBTREE,
 };
 
-static const struct iio_chan_spec atlas_channels[] = {
+static const struct iio_chan_spec atlas_ph_channels[] = {
 	{
 		.type = IIO_PH,
+		.address = ATLAS_REG_PH_DATA,
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
 		.scan_index = 0,
@@ -100,7 +123,7 @@ static const struct iio_chan_spec atlas_channels[] = {
 	IIO_CHAN_SOFT_TIMESTAMP(1),
 	{
 		.type = IIO_TEMP,
-		.address = ATLAS_REG_TEMP_DATA,
+		.address = ATLAS_REG_PH_TEMP_DATA,
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
 		.output = 1,
@@ -108,6 +131,142 @@ static const struct iio_chan_spec atlas_channels[] = {
 	},
 };
 
+#define ATLAS_EC_CHANNEL(_idx, _addr) \
+	{\
+		.type = IIO_CONCENTRATION, \
+		.indexed = 1, \
+		.channel = _idx, \
+		.address = _addr, \
+		.info_mask_separate = \
+			BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE), \
+		.scan_index = _idx + 1, \
+		.scan_type = { \
+			.sign = 'u', \
+			.realbits = 32, \
+			.storagebits = 32, \
+			.endianness = IIO_BE, \
+		}, \
+	}
+
+static const struct iio_chan_spec atlas_ec_channels[] = {
+	{
+		.type = IIO_ELECTRICALCONDUCTIVITY,
+		.address = ATLAS_REG_EC_DATA,
+		.info_mask_separate =
+			BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
+		.scan_index = 0,
+		.scan_type = {
+			.sign = 'u',
+			.realbits = 32,
+			.storagebits = 32,
+			.endianness = IIO_BE,
+		},
+	},
+	ATLAS_EC_CHANNEL(0, ATLAS_REG_TDS_DATA),
+	ATLAS_EC_CHANNEL(1, ATLAS_REG_PSS_DATA),
+	IIO_CHAN_SOFT_TIMESTAMP(3),
+	{
+		.type = IIO_TEMP,
+		.address = ATLAS_REG_EC_TEMP_DATA,
+		.info_mask_separate =
+			BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
+		.output = 1,
+		.scan_index = -1
+	},
+};
+
+static int atlas_check_ph_calibration(struct atlas_data *data)
+{
+	struct device *dev = &data->client->dev;
+	int ret;
+	unsigned int val;
+
+	ret = regmap_read(data->regmap, ATLAS_REG_PH_CALIB_STATUS, &val);
+	if (ret)
+		return ret;
+
+	if (!(val & ATLAS_REG_PH_CALIB_STATUS_MASK)) {
+		dev_warn(dev, "device has not been calibrated\n");
+		return 0;
+	}
+
+	if (!(val & ATLAS_REG_PH_CALIB_STATUS_LOW))
+		dev_warn(dev, "device missing low point calibration\n");
+
+	if (!(val & ATLAS_REG_PH_CALIB_STATUS_MID))
+		dev_warn(dev, "device missing mid point calibration\n");
+
+	if (!(val & ATLAS_REG_PH_CALIB_STATUS_HIGH))
+		dev_warn(dev, "device missing high point calibration\n");
+
+	return 0;
+}
+
+static int atlas_check_ec_calibration(struct atlas_data *data)
+{
+	struct device *dev = &data->client->dev;
+	int ret;
+	unsigned int val;
+
+	ret = regmap_bulk_read(data->regmap, ATLAS_REG_EC_PROBE, &val, 2);
+	if (ret)
+		return ret;
+
+	dev_info(dev, "probe set to K = %d.%.2d", be16_to_cpu(val) / 100,
+						 be16_to_cpu(val) % 100);
+
+	ret = regmap_read(data->regmap, ATLAS_REG_EC_CALIB_STATUS, &val);
+	if (ret)
+		return ret;
+
+	if (!(val & ATLAS_REG_EC_CALIB_STATUS_MASK)) {
+		dev_warn(dev, "device has not been calibrated\n");
+		return 0;
+	}
+
+	if (!(val & ATLAS_REG_EC_CALIB_STATUS_DRY))
+		dev_warn(dev, "device missing dry point calibration\n");
+
+	if (val & ATLAS_REG_EC_CALIB_STATUS_SINGLE) {
+		dev_warn(dev, "device using single point calibration\n");
+	} else {
+		if (!(val & ATLAS_REG_EC_CALIB_STATUS_LOW))
+			dev_warn(dev, "device missing low point calibration\n");
+
+		if (!(val & ATLAS_REG_EC_CALIB_STATUS_HIGH))
+			dev_warn(dev, "device missing high point calibration\n");
+	}
+
+	return 0;
+}
+
+struct atlas_device {
+	const struct iio_chan_spec *channels;
+	int num_channels;
+	int data_reg;
+
+	int (*calibration)(struct atlas_data *data);
+	int delay;
+};
+
+static struct atlas_device atlas_devices[] = {
+	[ATLAS_PH_SM] = {
+				.channels = atlas_ph_channels,
+				.num_channels = 3,
+				.data_reg = ATLAS_REG_PH_DATA,
+				.calibration = &atlas_check_ph_calibration,
+				.delay = ATLAS_PH_INT_TIME_IN_US,
+	},
+	[ATLAS_EC_SM] = {
+				.channels = atlas_ec_channels,
+				.num_channels = 5,
+				.data_reg = ATLAS_REG_EC_DATA,
+				.calibration = &atlas_check_ec_calibration,
+				.delay = ATLAS_EC_INT_TIME_IN_US,
+	},
+
+};
+
 static int atlas_set_powermode(struct atlas_data *data, int on)
 {
 	return regmap_write(data->regmap, ATLAS_REG_PWR_CONTROL, on);
@@ -178,12 +337,13 @@ static irqreturn_t atlas_trigger_handler(int irq, void *private)
 	struct atlas_data *data = iio_priv(indio_dev);
 	int ret;
 
-	ret = regmap_bulk_read(data->regmap, ATLAS_REG_PH_DATA,
-			      (u8 *) &data->buffer, sizeof(data->buffer[0]));
+	ret = regmap_bulk_read(data->regmap, data->chip->data_reg,
+			      (u8 *) &data->buffer,
+			      sizeof(__be32) * (data->chip->num_channels - 2));
 
 	if (!ret)
 		iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
-				iio_get_time_ns());
+				iio_get_time_ns(indio_dev));
 
 	iio_trigger_notify_done(indio_dev->trig);
 
@@ -200,7 +360,7 @@ static irqreturn_t atlas_interrupt_handler(int irq, void *private)
 	return IRQ_HANDLED;
 }
 
-static int atlas_read_ph_measurement(struct atlas_data *data, __be32 *val)
+static int atlas_read_measurement(struct atlas_data *data, int reg, __be32 *val)
 {
 	struct device *dev = &data->client->dev;
 	int suspended = pm_runtime_suspended(dev);
@@ -213,11 +373,9 @@ static int atlas_read_ph_measurement(struct atlas_data *data, __be32 *val)
 	}
 
 	if (suspended)
-		usleep_range(ATLAS_PH_INT_TIME_IN_US,
-			     ATLAS_PH_INT_TIME_IN_US + 100000);
+		usleep_range(data->chip->delay, data->chip->delay + 100000);
 
-	ret = regmap_bulk_read(data->regmap, ATLAS_REG_PH_DATA,
-			      (u8 *) val, sizeof(*val));
+	ret = regmap_bulk_read(data->regmap, reg, (u8 *) val, sizeof(*val));
 
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
@@ -242,12 +400,15 @@ static int atlas_read_raw(struct iio_dev *indio_dev,
 					      (u8 *) &reg, sizeof(reg));
 			break;
 		case IIO_PH:
+		case IIO_CONCENTRATION:
+		case IIO_ELECTRICALCONDUCTIVITY:
 			mutex_lock(&indio_dev->mlock);
 
 			if (iio_buffer_enabled(indio_dev))
 				ret = -EBUSY;
 			else
-				ret = atlas_read_ph_measurement(data, &reg);
+				ret = atlas_read_measurement(data,
+							chan->address, &reg);
 
 			mutex_unlock(&indio_dev->mlock);
 			break;
@@ -271,6 +432,14 @@ static int atlas_read_raw(struct iio_dev *indio_dev,
 			*val = 1; /* 0.001 */
 			*val2 = 1000;
 			break;
+		case IIO_ELECTRICALCONDUCTIVITY:
+			*val = 1; /* 0.00001 */
+			*val = 100000;
+			break;
+		case IIO_CONCENTRATION:
+			*val = 0; /* 0.000000001 */
+			*val2 = 1000;
+			return IIO_VAL_INT_PLUS_NANO;
 		default:
 			return -EINVAL;
 		}
@@ -303,37 +472,26 @@ static const struct iio_info atlas_info = {
 	.write_raw = atlas_write_raw,
 };
 
-static int atlas_check_calibration(struct atlas_data *data)
-{
-	struct device *dev = &data->client->dev;
-	int ret;
-	unsigned int val;
-
-	ret = regmap_read(data->regmap, ATLAS_REG_CALIB_STATUS, &val);
-	if (ret)
-		return ret;
-
-	if (!(val & ATLAS_REG_CALIB_STATUS_MASK)) {
-		dev_warn(dev, "device has not been calibrated\n");
-		return 0;
-	}
-
-	if (!(val & ATLAS_REG_CALIB_STATUS_LOW))
-		dev_warn(dev, "device missing low point calibration\n");
-
-	if (!(val & ATLAS_REG_CALIB_STATUS_MID))
-		dev_warn(dev, "device missing mid point calibration\n");
-
-	if (!(val & ATLAS_REG_CALIB_STATUS_HIGH))
-		dev_warn(dev, "device missing high point calibration\n");
+static const struct i2c_device_id atlas_id[] = {
+	{ "atlas-ph-sm", ATLAS_PH_SM},
+	{ "atlas-ec-sm", ATLAS_EC_SM},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, atlas_id);
 
-	return 0;
+static const struct of_device_id atlas_dt_ids[] = {
+	{ .compatible = "atlas,ph-sm", .data = (void *)ATLAS_PH_SM, },
+	{ .compatible = "atlas,ec-sm", .data = (void *)ATLAS_EC_SM, },
+	{ }
 };
+MODULE_DEVICE_TABLE(of, atlas_dt_ids);
 
 static int atlas_probe(struct i2c_client *client,
 		       const struct i2c_device_id *id)
 {
 	struct atlas_data *data;
+	struct atlas_device *chip;
+	const struct of_device_id *of_id;
 	struct iio_trigger *trig;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -342,10 +500,16 @@ static int atlas_probe(struct i2c_client *client,
 	if (!indio_dev)
 		return -ENOMEM;
 
+	of_id = of_match_device(atlas_dt_ids, &client->dev);
+	if (!of_id)
+		chip = &atlas_devices[id->driver_data];
+	else
+		chip = &atlas_devices[(unsigned long)of_id->data];
+
 	indio_dev->info = &atlas_info;
 	indio_dev->name = ATLAS_DRV_NAME;
-	indio_dev->channels = atlas_channels;
-	indio_dev->num_channels = ARRAY_SIZE(atlas_channels);
+	indio_dev->channels = chip->channels;
+	indio_dev->num_channels = chip->num_channels;
 	indio_dev->modes = INDIO_BUFFER_SOFTWARE | INDIO_DIRECT_MODE;
 	indio_dev->dev.parent = &client->dev;
 
@@ -358,6 +522,7 @@ static int atlas_probe(struct i2c_client *client,
 	data = iio_priv(indio_dev);
 	data->client = client;
 	data->trig = trig;
+	data->chip = chip;
 	trig->dev.parent = indio_dev->dev.parent;
 	trig->ops = &atlas_interrupt_trigger_ops;
 	iio_trigger_set_drvdata(trig, indio_dev);
@@ -379,7 +544,7 @@ static int atlas_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
-	ret = atlas_check_calibration(data);
+	ret = chip->calibration(data);
 	if (ret)
 		return ret;
 
@@ -480,18 +645,6 @@ static const struct dev_pm_ops atlas_pm_ops = {
 			   atlas_runtime_resume, NULL)
 };
 
-static const struct i2c_device_id atlas_id[] = {
-	{ "atlas-ph-sm", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, atlas_id);
-
-static const struct of_device_id atlas_dt_ids[] = {
-	{ .compatible = "atlas,ph-sm" },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, atlas_dt_ids);
-
 static struct i2c_driver atlas_driver = {
 	.driver = {
 		.name	= ATLAS_DRV_NAME,
diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c
index f1693dbebb8a..d06e728cea37 100644
--- a/drivers/iio/common/st_sensors/st_sensors_buffer.c
+++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c
@@ -22,34 +22,32 @@
 #include <linux/iio/common/st_sensors.h>
 
 
-int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf)
+static int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf)
 {
-	int i, len;
-	int total = 0;
+	int i;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 	unsigned int num_data_channels = sdata->num_data_channels;
 
-	for (i = 0; i < num_data_channels; i++) {
-		unsigned int bytes_to_read;
-
-		if (test_bit(i, indio_dev->active_scan_mask)) {
-			bytes_to_read = indio_dev->channels[i].scan_type.storagebits >> 3;
-			len = sdata->tf->read_multiple_byte(&sdata->tb,
-				sdata->dev, indio_dev->channels[i].address,
-				bytes_to_read,
-				buf + total, sdata->multiread_bit);
-
-			if (len < bytes_to_read)
-				return -EIO;
-
-			/* Advance the buffer pointer */
-			total += len;
-		}
+	for_each_set_bit(i, indio_dev->active_scan_mask, num_data_channels) {
+		const struct iio_chan_spec *channel = &indio_dev->channels[i];
+		unsigned int bytes_to_read = channel->scan_type.realbits >> 3;
+		unsigned int storage_bytes =
+			channel->scan_type.storagebits >> 3;
+
+		buf = PTR_ALIGN(buf, storage_bytes);
+		if (sdata->tf->read_multiple_byte(&sdata->tb, sdata->dev,
+						  channel->address,
+						  bytes_to_read, buf,
+						  sdata->multiread_bit) <
+		    bytes_to_read)
+			return -EIO;
+
+		/* Advance the buffer pointer */
+		buf += storage_bytes;
 	}
 
-	return total;
+	return 0;
 }
-EXPORT_SYMBOL(st_sensors_get_buffer_element);
 
 irqreturn_t st_sensors_trigger_handler(int irq, void *p)
 {
@@ -59,11 +57,16 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p)
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 	s64 timestamp;
 
-	/* If we do timetamping here, do it before reading the values */
+	/*
+	 * If we do timetamping here, do it before reading the values, because
+	 * once we've read the values, new interrupts can occur (when using
+	 * the hardware trigger) and the hw_timestamp may get updated.
+	 * By storing it in a local variable first, we are safe.
+	 */
 	if (sdata->hw_irq_trigger)
 		timestamp = sdata->hw_timestamp;
 	else
-		timestamp = iio_get_time_ns();
+		timestamp = iio_get_time_ns(indio_dev);
 
 	len = st_sensors_get_buffer_element(indio_dev, sdata->buffer_data);
 	if (len < 0)
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 9e59c90f6a8d..2d5282e05482 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -228,7 +228,7 @@ int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable)
 }
 EXPORT_SYMBOL(st_sensors_set_axis_enable);
 
-void st_sensors_power_enable(struct iio_dev *indio_dev)
+int st_sensors_power_enable(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *pdata = iio_priv(indio_dev);
 	int err;
@@ -237,18 +237,37 @@ void st_sensors_power_enable(struct iio_dev *indio_dev)
 	pdata->vdd = devm_regulator_get_optional(indio_dev->dev.parent, "vdd");
 	if (!IS_ERR(pdata->vdd)) {
 		err = regulator_enable(pdata->vdd);
-		if (err != 0)
+		if (err != 0) {
 			dev_warn(&indio_dev->dev,
 				 "Failed to enable specified Vdd supply\n");
+			return err;
+		}
+	} else {
+		err = PTR_ERR(pdata->vdd);
+		if (err != -ENODEV)
+			return err;
 	}
 
 	pdata->vdd_io = devm_regulator_get_optional(indio_dev->dev.parent, "vddio");
 	if (!IS_ERR(pdata->vdd_io)) {
 		err = regulator_enable(pdata->vdd_io);
-		if (err != 0)
+		if (err != 0) {
 			dev_warn(&indio_dev->dev,
 				 "Failed to enable specified Vdd_IO supply\n");
+			goto st_sensors_disable_vdd;
+		}
+	} else {
+		err = PTR_ERR(pdata->vdd_io);
+		if (err != -ENODEV)
+			goto st_sensors_disable_vdd;
 	}
+
+	return 0;
+
+st_sensors_disable_vdd:
+	if (!IS_ERR_OR_NULL(pdata->vdd))
+		regulator_disable(pdata->vdd);
+	return err;
 }
 EXPORT_SYMBOL(st_sensors_power_enable);
 
@@ -256,10 +275,10 @@ void st_sensors_power_disable(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *pdata = iio_priv(indio_dev);
 
-	if (!IS_ERR(pdata->vdd))
+	if (!IS_ERR_OR_NULL(pdata->vdd))
 		regulator_disable(pdata->vdd);
 
-	if (!IS_ERR(pdata->vdd_io))
+	if (!IS_ERR_OR_NULL(pdata->vdd_io))
 		regulator_disable(pdata->vdd_io);
 }
 EXPORT_SYMBOL(st_sensors_power_disable);
@@ -471,7 +490,7 @@ static int st_sensors_read_axis_data(struct iio_dev *indio_dev,
 	int err;
 	u8 *outdata;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
-	unsigned int byte_for_channel = ch->scan_type.storagebits >> 3;
+	unsigned int byte_for_channel = ch->scan_type.realbits >> 3;
 
 	outdata = kmalloc(byte_for_channel, GFP_KERNEL);
 	if (!outdata)
@@ -531,7 +550,7 @@ int st_sensors_check_device_support(struct iio_dev *indio_dev,
 			int num_sensors_list,
 			const struct st_sensor_settings *sensor_settings)
 {
-	int i, n, err;
+	int i, n, err = 0;
 	u8 wai;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
@@ -551,17 +570,21 @@ int st_sensors_check_device_support(struct iio_dev *indio_dev,
 		return -ENODEV;
 	}
 
-	err = sdata->tf->read_byte(&sdata->tb, sdata->dev,
-					sensor_settings[i].wai_addr, &wai);
-	if (err < 0) {
-		dev_err(&indio_dev->dev, "failed to read Who-Am-I register.\n");
-		return err;
-	}
+	if (sensor_settings[i].wai_addr) {
+		err = sdata->tf->read_byte(&sdata->tb, sdata->dev,
+					   sensor_settings[i].wai_addr, &wai);
+		if (err < 0) {
+			dev_err(&indio_dev->dev,
+				"failed to read Who-Am-I register.\n");
+			return err;
+		}
 
-	if (sensor_settings[i].wai != wai) {
-		dev_err(&indio_dev->dev, "%s: WhoAmI mismatch (0x%x).\n",
-						indio_dev->name, wai);
-		return -EINVAL;
+		if (sensor_settings[i].wai != wai) {
+			dev_err(&indio_dev->dev,
+				"%s: WhoAmI mismatch (0x%x).\n",
+				indio_dev->name, wai);
+			return -EINVAL;
+		}
 	}
 
 	sdata->sensor_settings =
diff --git a/drivers/iio/common/st_sensors/st_sensors_i2c.c b/drivers/iio/common/st_sensors/st_sensors_i2c.c
index 98cfee296d46..b43aa36031f8 100644
--- a/drivers/iio/common/st_sensors/st_sensors_i2c.c
+++ b/drivers/iio/common/st_sensors/st_sensors_i2c.c
@@ -48,8 +48,8 @@ static int st_sensors_i2c_read_multiple_byte(
 	if (multiread_bit)
 		reg_addr |= ST_SENSORS_I2C_MULTIREAD;
 
-	return i2c_smbus_read_i2c_block_data(to_i2c_client(dev),
-							reg_addr, len, data);
+	return i2c_smbus_read_i2c_block_data_or_emulated(to_i2c_client(dev),
+							 reg_addr, len, data);
 }
 
 static int st_sensors_i2c_write_byte(struct st_sensor_transfer_buffer *tb,
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 296e4ff19ae8..e66f12ee8a55 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -17,6 +17,50 @@
 #include <linux/iio/common/st_sensors.h>
 #include "st_sensors_core.h"
 
+/**
+ * st_sensors_new_samples_available() - check if more samples came in
+ * returns:
+ * 0 - no new samples available
+ * 1 - new samples available
+ * negative - error or unknown
+ */
+static int st_sensors_new_samples_available(struct iio_dev *indio_dev,
+					    struct st_sensor_data *sdata)
+{
+	u8 status;
+	int ret;
+
+	/* How would I know if I can't check it? */
+	if (!sdata->sensor_settings->drdy_irq.addr_stat_drdy)
+		return -EINVAL;
+
+	/* No scan mask, no interrupt */
+	if (!indio_dev->active_scan_mask)
+		return 0;
+
+	ret = sdata->tf->read_byte(&sdata->tb, sdata->dev,
+			sdata->sensor_settings->drdy_irq.addr_stat_drdy,
+			&status);
+	if (ret < 0) {
+		dev_err(sdata->dev,
+			"error checking samples available\n");
+		return ret;
+	}
+	/*
+	 * the lower bits of .active_scan_mask[0] is directly mapped
+	 * to the channels on the sensor: either bit 0 for
+	 * one-dimensional sensors, or e.g. x,y,z for accelerometers,
+	 * gyroscopes or magnetometers. No sensor use more than 3
+	 * channels, so cut the other status bits here.
+	 */
+	status &= 0x07;
+
+	if (status & (u8)indio_dev->active_scan_mask[0])
+		return 1;
+
+	return 0;
+}
+
 /**
  * st_sensors_irq_handler() - top half of the IRQ-based triggers
  * @irq: irq number
@@ -29,7 +73,7 @@ irqreturn_t st_sensors_irq_handler(int irq, void *p)
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
 	/* Get the time stamp as close in time as possible */
-	sdata->hw_timestamp = iio_get_time_ns();
+	sdata->hw_timestamp = iio_get_time_ns(indio_dev);
 	return IRQ_WAKE_THREAD;
 }
 
@@ -43,44 +87,43 @@ irqreturn_t st_sensors_irq_thread(int irq, void *p)
 	struct iio_trigger *trig = p;
 	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
-	int ret;
 
 	/*
 	 * If this trigger is backed by a hardware interrupt and we have a
-	 * status register, check if this IRQ came from us
+	 * status register, check if this IRQ came from us. Notice that
+	 * we will process also if st_sensors_new_samples_available()
+	 * returns negative: if we can't check status, then poll
+	 * unconditionally.
 	 */
-	if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) {
-		u8 status;
-
-		ret = sdata->tf->read_byte(&sdata->tb, sdata->dev,
-			   sdata->sensor_settings->drdy_irq.addr_stat_drdy,
-			   &status);
-		if (ret < 0) {
-			dev_err(sdata->dev, "could not read channel status\n");
-			goto out_poll;
-		}
-		/*
-		 * the lower bits of .active_scan_mask[0] is directly mapped
-		 * to the channels on the sensor: either bit 0 for
-		 * one-dimensional sensors, or e.g. x,y,z for accelerometers,
-		 * gyroscopes or magnetometers. No sensor use more than 3
-		 * channels, so cut the other status bits here.
-		 */
-		status &= 0x07;
+	if (sdata->hw_irq_trigger &&
+	    st_sensors_new_samples_available(indio_dev, sdata)) {
+		iio_trigger_poll_chained(p);
+	} else {
+		dev_dbg(sdata->dev, "spurious IRQ\n");
+		return IRQ_NONE;
+	}
 
-		/*
-		 * If this was not caused by any channels on this sensor,
-		 * return IRQ_NONE
-		 */
-		if (!indio_dev->active_scan_mask)
-			return IRQ_NONE;
-		if (!(status & (u8)indio_dev->active_scan_mask[0]))
-			return IRQ_NONE;
+	/*
+	 * If we have proper level IRQs the handler will be re-entered if
+	 * the line is still active, so return here and come back in through
+	 * the top half if need be.
+	 */
+	if (!sdata->edge_irq)
+		return IRQ_HANDLED;
+
+	/*
+	 * If we are using egde IRQs, new samples arrived while processing
+	 * the IRQ and those may be missed unless we pick them here, so poll
+	 * again. If the sensor delivery frequency is very high, this thread
+	 * turns into a polled loop handler.
+	 */
+	while (sdata->hw_irq_trigger &&
+	       st_sensors_new_samples_available(indio_dev, sdata)) {
+		dev_dbg(sdata->dev, "more samples came in during polling\n");
+		sdata->hw_timestamp = iio_get_time_ns(indio_dev);
+		iio_trigger_poll_chained(p);
 	}
 
-out_poll:
-	/* It's our IRQ: proceed to handle the register polling */
-	iio_trigger_poll_chained(p);
 	return IRQ_HANDLED;
 }
 
@@ -107,13 +150,18 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	 * If the IRQ is triggered on falling edge, we need to mark the
 	 * interrupt as active low, if the hardware supports this.
 	 */
-	if (irq_trig == IRQF_TRIGGER_FALLING) {
+	switch(irq_trig) {
+	case IRQF_TRIGGER_FALLING:
+	case IRQF_TRIGGER_LOW:
 		if (!sdata->sensor_settings->drdy_irq.addr_ihl) {
 			dev_err(&indio_dev->dev,
-				"falling edge specified for IRQ but hardware "
-				"only support rising edge, will request "
-				"rising edge\n");
-			irq_trig = IRQF_TRIGGER_RISING;
+				"falling/low specified for IRQ "
+				"but hardware only support rising/high: "
+				"will request rising/high\n");
+			if (irq_trig == IRQF_TRIGGER_FALLING)
+				irq_trig = IRQF_TRIGGER_RISING;
+			if (irq_trig == IRQF_TRIGGER_LOW)
+				irq_trig = IRQF_TRIGGER_HIGH;
 		} else {
 			/* Set up INT active low i.e. falling edge */
 			err = st_sensors_write_data_with_mask(indio_dev,
@@ -122,20 +170,39 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 			if (err < 0)
 				goto iio_trigger_free;
 			dev_info(&indio_dev->dev,
-				 "interrupts on the falling edge\n");
+				 "interrupts on the falling edge or "
+				 "active low level\n");
 		}
-	} else if (irq_trig == IRQF_TRIGGER_RISING) {
+		break;
+	case IRQF_TRIGGER_RISING:
 		dev_info(&indio_dev->dev,
 			 "interrupts on the rising edge\n");
-
-	} else {
+		break;
+	case IRQF_TRIGGER_HIGH:
+		dev_info(&indio_dev->dev,
+			 "interrupts active high level\n");
+		break;
+	default:
+		/* This is the most preferred mode, if possible */
 		dev_err(&indio_dev->dev,
-		"unsupported IRQ trigger specified (%lx), only "
-			"rising and falling edges supported, enforce "
+			"unsupported IRQ trigger specified (%lx), enforce "
 			"rising edge\n", irq_trig);
 		irq_trig = IRQF_TRIGGER_RISING;
 	}
 
+	/* Tell the interrupt handler that we're dealing with edges */
+	if (irq_trig == IRQF_TRIGGER_FALLING ||
+	    irq_trig == IRQF_TRIGGER_RISING)
+		sdata->edge_irq = true;
+	else
+		/*
+		 * If we're not using edges (i.e. level interrupts) we
+		 * just mask off the IRQ, handle one interrupt, then
+		 * if the line is still low, we return to the
+		 * interrupt handler top half again and start over.
+		 */
+		irq_trig |= IRQF_ONESHOT;
+
 	/*
 	 * If the interrupt pin is Open Drain, by definition this
 	 * means that the interrupt line may be shared with other
@@ -148,9 +215,6 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	    sdata->sensor_settings->drdy_irq.addr_stat_drdy)
 		irq_trig |= IRQF_SHARED;
 
-	/* Let's create an interrupt thread masking the hard IRQ here */
-	irq_trig |= IRQF_ONESHOT;
-
 	err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev),
 			st_sensors_irq_handler,
 			st_sensors_irq_thread,
diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index f7c71da42f15..ca814479fadf 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -248,11 +248,12 @@ config MCP4922
 config STX104
 	tristate "Apex Embedded Systems STX104 DAC driver"
 	depends on X86 && ISA_BUS_API
+	select GPIOLIB
 	help
-	  Say yes here to build support for the 2-channel DAC on the Apex
-	  Embedded Systems STX104 integrated analog PC/104 card. The base port
-	  addresses for the devices may be configured via the "base" module
-	  parameter array.
+	  Say yes here to build support for the 2-channel DAC and GPIO on the
+	  Apex Embedded Systems STX104 integrated analog PC/104 card. The base
+	  port addresses for the devices may be configured via the base array
+	  module parameter.
 
 config VF610_DAC
 	tristate "Vybrid vf610 DAC driver"
diff --git a/drivers/iio/dac/ad5421.c b/drivers/iio/dac/ad5421.c
index 968712be967f..559061ab1982 100644
--- a/drivers/iio/dac/ad5421.c
+++ b/drivers/iio/dac/ad5421.c
@@ -242,7 +242,7 @@ static irqreturn_t ad5421_fault_handler(int irq, void *data)
 					0,
 					IIO_EV_TYPE_THRESH,
 					IIO_EV_DIR_RISING),
-			iio_get_time_ns());
+			iio_get_time_ns(indio_dev));
 		}
 
 		if (events & AD5421_FAULT_UNDER_CURRENT) {
@@ -251,7 +251,7 @@ static irqreturn_t ad5421_fault_handler(int irq, void *data)
 					0,
 					IIO_EV_TYPE_THRESH,
 					IIO_EV_DIR_FALLING),
-				iio_get_time_ns());
+				iio_get_time_ns(indio_dev));
 		}
 
 		if (events & AD5421_FAULT_TEMP_OVER_140) {
@@ -260,7 +260,7 @@ static irqreturn_t ad5421_fault_handler(int irq, void *data)
 					0,
 					IIO_EV_TYPE_MAG,
 					IIO_EV_DIR_RISING),
-				iio_get_time_ns());
+				iio_get_time_ns(indio_dev));
 		}
 
 		old_fault = fault;
diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c
index 4e4c20d6d8b5..788b3d6fd1cc 100644
--- a/drivers/iio/dac/ad5504.c
+++ b/drivers/iio/dac/ad5504.c
@@ -223,7 +223,7 @@ static irqreturn_t ad5504_event_handler(int irq, void *private)
 					    0,
 					    IIO_EV_TYPE_THRESH,
 					    IIO_EV_DIR_RISING),
-		       iio_get_time_ns());
+		       iio_get_time_ns((struct iio_dev *)private));
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/iio/dac/ad5755.c b/drivers/iio/dac/ad5755.c
index bfb350a85a16..0fde593ec0d9 100644
--- a/drivers/iio/dac/ad5755.c
+++ b/drivers/iio/dac/ad5755.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/sysfs.h>
 #include <linux/delay.h>
+#include <linux/of.h>
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
 #include <linux/platform_data/ad5755.h>
@@ -109,6 +110,51 @@ enum ad5755_type {
 	ID_AD5737,
 };
 
+#ifdef CONFIG_OF
+static const int ad5755_dcdc_freq_table[][2] = {
+	{ 250000, AD5755_DC_DC_FREQ_250kHZ },
+	{ 410000, AD5755_DC_DC_FREQ_410kHZ },
+	{ 650000, AD5755_DC_DC_FREQ_650kHZ }
+};
+
+static const int ad5755_dcdc_maxv_table[][2] = {
+	{ 23000000, AD5755_DC_DC_MAXV_23V },
+	{ 24500000, AD5755_DC_DC_MAXV_24V5 },
+	{ 27000000, AD5755_DC_DC_MAXV_27V },
+	{ 29500000, AD5755_DC_DC_MAXV_29V5 },
+};
+
+static const int ad5755_slew_rate_table[][2] = {
+	{ 64000, AD5755_SLEW_RATE_64k },
+	{ 32000, AD5755_SLEW_RATE_32k },
+	{ 16000, AD5755_SLEW_RATE_16k },
+	{ 8000, AD5755_SLEW_RATE_8k },
+	{ 4000, AD5755_SLEW_RATE_4k },
+	{ 2000, AD5755_SLEW_RATE_2k },
+	{ 1000, AD5755_SLEW_RATE_1k },
+	{ 500, AD5755_SLEW_RATE_500 },
+	{ 250, AD5755_SLEW_RATE_250 },
+	{ 125, AD5755_SLEW_RATE_125 },
+	{ 64, AD5755_SLEW_RATE_64 },
+	{ 32, AD5755_SLEW_RATE_32 },
+	{ 16, AD5755_SLEW_RATE_16 },
+	{ 8, AD5755_SLEW_RATE_8 },
+	{ 4, AD5755_SLEW_RATE_4 },
+	{ 0, AD5755_SLEW_RATE_0_5 },
+};
+
+static const int ad5755_slew_step_table[][2] = {
+	{ 256, AD5755_SLEW_STEP_SIZE_256 },
+	{ 128, AD5755_SLEW_STEP_SIZE_128 },
+	{ 64, AD5755_SLEW_STEP_SIZE_64 },
+	{ 32, AD5755_SLEW_STEP_SIZE_32 },
+	{ 16, AD5755_SLEW_STEP_SIZE_16 },
+	{ 4, AD5755_SLEW_STEP_SIZE_4 },
+	{ 2, AD5755_SLEW_STEP_SIZE_2 },
+	{ 1, AD5755_SLEW_STEP_SIZE_1 },
+};
+#endif
+
 static int ad5755_write_unlocked(struct iio_dev *indio_dev,
 	unsigned int reg, unsigned int val)
 {
@@ -556,6 +602,129 @@ static const struct ad5755_platform_data ad5755_default_pdata = {
 	},
 };
 
+#ifdef CONFIG_OF
+static struct ad5755_platform_data *ad5755_parse_dt(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct device_node *pp;
+	struct ad5755_platform_data *pdata;
+	unsigned int tmp;
+	unsigned int tmparray[3];
+	int devnr, i;
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	pdata->ext_dc_dc_compenstation_resistor =
+	    of_property_read_bool(np, "adi,ext-dc-dc-compenstation-resistor");
+
+	if (!of_property_read_u32(np, "adi,dc-dc-phase", &tmp))
+		pdata->dc_dc_phase = tmp;
+	else
+		pdata->dc_dc_phase = AD5755_DC_DC_PHASE_ALL_SAME_EDGE;
+
+	pdata->dc_dc_freq = AD5755_DC_DC_FREQ_410kHZ;
+	if (!of_property_read_u32(np, "adi,dc-dc-freq-hz", &tmp)) {
+		for (i = 0; i < ARRAY_SIZE(ad5755_dcdc_freq_table); i++) {
+			if (tmp == ad5755_dcdc_freq_table[i][0]) {
+				pdata->dc_dc_freq = ad5755_dcdc_freq_table[i][1];
+				break;
+			}
+		}
+
+		if (i == ARRAY_SIZE(ad5755_dcdc_freq_table)) {
+			dev_err(dev,
+				"adi,dc-dc-freq out of range selecting 410kHz");
+		}
+	}
+
+	pdata->dc_dc_maxv = AD5755_DC_DC_MAXV_23V;
+	if (!of_property_read_u32(np, "adi,dc-dc-max-microvolt", &tmp)) {
+		for (i = 0; i < ARRAY_SIZE(ad5755_dcdc_maxv_table); i++) {
+			if (tmp == ad5755_dcdc_maxv_table[i][0]) {
+				pdata->dc_dc_maxv = ad5755_dcdc_maxv_table[i][1];
+				break;
+			}
+		}
+		if (i == ARRAY_SIZE(ad5755_dcdc_maxv_table)) {
+				dev_err(dev,
+					"adi,dc-dc-maxv out of range selecting 23V");
+		}
+	}
+
+	devnr = 0;
+	for_each_child_of_node(np, pp) {
+		if (devnr > AD5755_NUM_CHANNELS) {
+			dev_err(dev,
+				"There is to many channels defined in DT\n");
+			goto error_out;
+		}
+
+		if (!of_property_read_u32(pp, "adi,mode", &tmp))
+			pdata->dac[devnr].mode = tmp;
+		else
+			pdata->dac[devnr].mode = AD5755_MODE_CURRENT_4mA_20mA;
+
+		pdata->dac[devnr].ext_current_sense_resistor =
+		    of_property_read_bool(pp, "adi,ext-current-sense-resistor");
+
+		pdata->dac[devnr].enable_voltage_overrange =
+		    of_property_read_bool(pp, "adi,enable-voltage-overrange");
+
+		if (!of_property_read_u32_array(pp, "adi,slew", tmparray, 3)) {
+			pdata->dac[devnr].slew.enable = tmparray[0];
+
+			pdata->dac[devnr].slew.rate = AD5755_SLEW_RATE_64k;
+			for (i = 0; i < ARRAY_SIZE(ad5755_slew_rate_table); i++) {
+				if (tmparray[1] == ad5755_slew_rate_table[i][0]) {
+					pdata->dac[devnr].slew.rate =
+						ad5755_slew_rate_table[i][1];
+					break;
+				}
+			}
+			if (i == ARRAY_SIZE(ad5755_slew_rate_table)) {
+				dev_err(dev,
+					"channel %d slew rate out of range selecting 64kHz",
+					devnr);
+			}
+
+			pdata->dac[devnr].slew.step_size = AD5755_SLEW_STEP_SIZE_1;
+			for (i = 0; i < ARRAY_SIZE(ad5755_slew_step_table); i++) {
+				if (tmparray[2] == ad5755_slew_step_table[i][0]) {
+					pdata->dac[devnr].slew.step_size =
+						ad5755_slew_step_table[i][1];
+					break;
+				}
+			}
+			if (i == ARRAY_SIZE(ad5755_slew_step_table)) {
+				dev_err(dev,
+					"channel %d slew step size out of range selecting 1 LSB",
+					devnr);
+			}
+		} else {
+			pdata->dac[devnr].slew.enable = false;
+			pdata->dac[devnr].slew.rate = AD5755_SLEW_RATE_64k;
+			pdata->dac[devnr].slew.step_size =
+			    AD5755_SLEW_STEP_SIZE_1;
+		}
+		devnr++;
+	}
+
+	return pdata;
+
+ error_out:
+	devm_kfree(dev, pdata);
+	return NULL;
+}
+#else
+static
+struct ad5755_platform_data *ad5755_parse_dt(struct device *dev)
+{
+	return NULL;
+}
+#endif
+
 static int ad5755_probe(struct spi_device *spi)
 {
 	enum ad5755_type type = spi_get_device_id(spi)->driver_data;
@@ -583,8 +752,15 @@ static int ad5755_probe(struct spi_device *spi)
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->num_channels = AD5755_NUM_CHANNELS;
 
-	if (!pdata)
+	if (spi->dev.of_node)
+		pdata = ad5755_parse_dt(&spi->dev);
+	else
+		pdata = spi->dev.platform_data;
+
+	if (!pdata) {
+		dev_warn(&spi->dev, "no platform data? using default\n");
 		pdata = &ad5755_default_pdata;
+	}
 
 	ret = ad5755_init_channels(indio_dev, pdata);
 	if (ret)
@@ -607,6 +783,16 @@ static const struct spi_device_id ad5755_id[] = {
 };
 MODULE_DEVICE_TABLE(spi, ad5755_id);
 
+static const struct of_device_id ad5755_of_match[] = {
+	{ .compatible = "adi,ad5755" },
+	{ .compatible = "adi,ad5755-1" },
+	{ .compatible = "adi,ad5757" },
+	{ .compatible = "adi,ad5735" },
+	{ .compatible = "adi,ad5737" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ad5755_of_match);
+
 static struct spi_driver ad5755_driver = {
 	.driver = {
 		.name = "ad5755",
diff --git a/drivers/iio/dac/stx104.c b/drivers/iio/dac/stx104.c
index 27941220872f..792a97164cb2 100644
--- a/drivers/iio/dac/stx104.c
+++ b/drivers/iio/dac/stx104.c
@@ -14,6 +14,7 @@
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/gpio/driver.h>
 #include <linux/iio/iio.h>
 #include <linux/iio/types.h>
 #include <linux/io.h>
@@ -21,6 +22,7 @@
 #include <linux/isa.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/spinlock.h>
 
 #define STX104_NUM_CHAN 2
 
@@ -49,6 +51,20 @@ struct stx104_iio {
 	unsigned base;
 };
 
+/**
+ * struct stx104_gpio - GPIO device private data structure
+ * @chip:	instance of the gpio_chip
+ * @lock:	synchronization lock to prevent I/O race conditions
+ * @base:	base port address of the GPIO device
+ * @out_state:	output bits state
+ */
+struct stx104_gpio {
+	struct gpio_chip chip;
+	spinlock_t lock;
+	unsigned int base;
+	unsigned int out_state;
+};
+
 static int stx104_read_raw(struct iio_dev *indio_dev,
 	struct iio_chan_spec const *chan, int *val, int *val2, long mask)
 {
@@ -88,15 +104,81 @@ static const struct iio_chan_spec stx104_channels[STX104_NUM_CHAN] = {
 	STX104_CHAN(1)
 };
 
+static int stx104_gpio_get_direction(struct gpio_chip *chip,
+	unsigned int offset)
+{
+	if (offset < 4)
+		return 1;
+
+	return 0;
+}
+
+static int stx104_gpio_direction_input(struct gpio_chip *chip,
+	unsigned int offset)
+{
+	if (offset >= 4)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int stx104_gpio_direction_output(struct gpio_chip *chip,
+	unsigned int offset, int value)
+{
+	if (offset < 4)
+		return -EINVAL;
+
+	chip->set(chip, offset, value);
+	return 0;
+}
+
+static int stx104_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
+
+	if (offset >= 4)
+		return -EINVAL;
+
+	return !!(inb(stx104gpio->base) & BIT(offset));
+}
+
+static void stx104_gpio_set(struct gpio_chip *chip, unsigned int offset,
+	int value)
+{
+	struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
+	const unsigned int mask = BIT(offset) >> 4;
+	unsigned long flags;
+
+	if (offset < 4)
+		return;
+
+	spin_lock_irqsave(&stx104gpio->lock, flags);
+
+	if (value)
+		stx104gpio->out_state |= mask;
+	else
+		stx104gpio->out_state &= ~mask;
+
+	outb(stx104gpio->out_state, stx104gpio->base);
+
+	spin_unlock_irqrestore(&stx104gpio->lock, flags);
+}
+
 static int stx104_probe(struct device *dev, unsigned int id)
 {
 	struct iio_dev *indio_dev;
 	struct stx104_iio *priv;
+	struct stx104_gpio *stx104gpio;
+	int err;
 
 	indio_dev = devm_iio_device_alloc(dev, sizeof(*priv));
 	if (!indio_dev)
 		return -ENOMEM;
 
+	stx104gpio = devm_kzalloc(dev, sizeof(*stx104gpio), GFP_KERNEL);
+	if (!stx104gpio)
+		return -ENOMEM;
+
 	if (!devm_request_region(dev, base[id], STX104_EXTENT,
 		dev_name(dev))) {
 		dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n",
@@ -117,14 +199,53 @@ static int stx104_probe(struct device *dev, unsigned int id)
 	outw(0, base[id] + 4);
 	outw(0, base[id] + 6);
 
-	return devm_iio_device_register(dev, indio_dev);
+	err = devm_iio_device_register(dev, indio_dev);
+	if (err) {
+		dev_err(dev, "IIO device registering failed (%d)\n", err);
+		return err;
+	}
+
+	stx104gpio->chip.label = dev_name(dev);
+	stx104gpio->chip.parent = dev;
+	stx104gpio->chip.owner = THIS_MODULE;
+	stx104gpio->chip.base = -1;
+	stx104gpio->chip.ngpio = 8;
+	stx104gpio->chip.get_direction = stx104_gpio_get_direction;
+	stx104gpio->chip.direction_input = stx104_gpio_direction_input;
+	stx104gpio->chip.direction_output = stx104_gpio_direction_output;
+	stx104gpio->chip.get = stx104_gpio_get;
+	stx104gpio->chip.set = stx104_gpio_set;
+	stx104gpio->base = base[id] + 3;
+	stx104gpio->out_state = 0x0;
+
+	spin_lock_init(&stx104gpio->lock);
+
+	dev_set_drvdata(dev, stx104gpio);
+
+	err = gpiochip_add_data(&stx104gpio->chip, stx104gpio);
+	if (err) {
+		dev_err(dev, "GPIO registering failed (%d)\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int stx104_remove(struct device *dev, unsigned int id)
+{
+	struct stx104_gpio *const stx104gpio = dev_get_drvdata(dev);
+
+	gpiochip_remove(&stx104gpio->chip);
+
+	return 0;
 }
 
 static struct isa_driver stx104_driver = {
 	.probe = stx104_probe,
 	.driver = {
 		.name = "stx104"
-	}
+	},
+	.remove = stx104_remove
 };
 
 module_isa_driver(stx104_driver, num_stx104);
diff --git a/drivers/iio/dummy/Kconfig b/drivers/iio/dummy/Kconfig
index 71805ced1aae..aa5824d96a43 100644
--- a/drivers/iio/dummy/Kconfig
+++ b/drivers/iio/dummy/Kconfig
@@ -10,6 +10,7 @@ config IIO_DUMMY_EVGEN
 
 config IIO_SIMPLE_DUMMY
        tristate "An example driver with no hardware requirements"
+       depends on IIO_SW_DEVICE
        help
 	 Driver intended mainly as documentation for how to write
 	 a driver. May also be useful for testing userspace code
diff --git a/drivers/iio/dummy/iio_simple_dummy.c b/drivers/iio/dummy/iio_simple_dummy.c
index 43fe4ba7d0dc..ad3410e528b6 100644
--- a/drivers/iio/dummy/iio_simple_dummy.c
+++ b/drivers/iio/dummy/iio_simple_dummy.c
@@ -17,26 +17,18 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/string.h>
 
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
 #include <linux/iio/events.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/sw_device.h>
 #include "iio_simple_dummy.h"
 
-/*
- * A few elements needed to fake a bus for this driver
- * Note instances parameter controls how many of these
- * dummy devices are registered.
- */
-static unsigned instances = 1;
-module_param(instances, uint, 0);
-
-/* Pointer array used to fake bus elements */
-static struct iio_dev **iio_dummy_devs;
-
-/* Fake a name for the part number, usually obtained from the id table */
-static const char *iio_dummy_part_number = "iio_dummy_part_no";
+static struct config_item_type iio_dummy_type = {
+	.ct_owner = THIS_MODULE,
+};
 
 /**
  * struct iio_dummy_accel_calibscale - realworld to register mapping
@@ -572,12 +564,18 @@ static int iio_dummy_init_device(struct iio_dev *indio_dev)
  *                      const struct i2c_device_id *id)
  * SPI: iio_dummy_probe(struct spi_device *spi)
  */
-static int iio_dummy_probe(int index)
+static struct iio_sw_device *iio_dummy_probe(const char *name)
 {
 	int ret;
 	struct iio_dev *indio_dev;
 	struct iio_dummy_state *st;
+	struct iio_sw_device *swd;
 
+	swd = kzalloc(sizeof(*swd), GFP_KERNEL);
+	if (!swd) {
+		ret = -ENOMEM;
+		goto error_kzalloc;
+	}
 	/*
 	 * Allocate an IIO device.
 	 *
@@ -608,7 +606,7 @@ static int iio_dummy_probe(int index)
 	 * i2c_set_clientdata(client, indio_dev);
 	 * spi_set_drvdata(spi, indio_dev);
 	 */
-	iio_dummy_devs[index] = indio_dev;
+	swd->device = indio_dev;
 
 	/*
 	 * Set the device name.
@@ -619,7 +617,7 @@ static int iio_dummy_probe(int index)
 	 *    indio_dev->name = id->name;
 	 *    indio_dev->name = spi_get_device_id(spi)->name;
 	 */
-	indio_dev->name = iio_dummy_part_number;
+	indio_dev->name = kstrdup(name, GFP_KERNEL);
 
 	/* Provide description of available channels */
 	indio_dev->channels = iio_dummy_channels;
@@ -646,7 +644,9 @@ static int iio_dummy_probe(int index)
 	if (ret < 0)
 		goto error_unconfigure_buffer;
 
-	return 0;
+	iio_swd_group_init_type_name(swd, name, &iio_dummy_type);
+
+	return swd;
 error_unconfigure_buffer:
 	iio_simple_dummy_unconfigure_buffer(indio_dev);
 error_unregister_events:
@@ -654,16 +654,18 @@ error_unregister_events:
 error_free_device:
 	iio_device_free(indio_dev);
 error_ret:
-	return ret;
+	kfree(swd);
+error_kzalloc:
+	return ERR_PTR(ret);
 }
 
 /**
  * iio_dummy_remove() - device instance removal function
- * @index: device index.
+ * @swd: pointer to software IIO device abstraction
  *
  * Parameters follow those of iio_dummy_probe for buses.
  */
-static void iio_dummy_remove(int index)
+static int iio_dummy_remove(struct iio_sw_device *swd)
 {
 	/*
 	 * Get a pointer to the device instance iio_dev structure
@@ -671,7 +673,7 @@ static void iio_dummy_remove(int index)
 	 * struct iio_dev *indio_dev = i2c_get_clientdata(client);
 	 * struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	 */
-	struct iio_dev *indio_dev = iio_dummy_devs[index];
+	struct iio_dev *indio_dev = swd->device;
 
 	/* Unregister the device */
 	iio_device_unregister(indio_dev);
@@ -684,11 +686,13 @@ static void iio_dummy_remove(int index)
 	iio_simple_dummy_events_unregister(indio_dev);
 
 	/* Free all structures */
+	kfree(indio_dev->name);
 	iio_device_free(indio_dev);
-}
 
+	return 0;
+}
 /**
- * iio_dummy_init() -  device driver registration
+ * module_iio_sw_device_driver() -  device driver registration
  *
  * Varies depending on bus type of the device. As there is no device
  * here, call probe directly. For information on device registration
@@ -697,50 +701,18 @@ static void iio_dummy_remove(int index)
  * spi:
  * Documentation/spi/spi-summary
  */
-static __init int iio_dummy_init(void)
-{
-	int i, ret;
-
-	if (instances > 10) {
-		instances = 1;
-		return -EINVAL;
-	}
-
-	/* Fake a bus */
-	iio_dummy_devs = kcalloc(instances, sizeof(*iio_dummy_devs),
-				 GFP_KERNEL);
-	/* Here we have no actual device so call probe */
-	for (i = 0; i < instances; i++) {
-		ret = iio_dummy_probe(i);
-		if (ret < 0)
-			goto error_remove_devs;
-	}
-	return 0;
-
-error_remove_devs:
-	while (i--)
-		iio_dummy_remove(i);
-
-	kfree(iio_dummy_devs);
-	return ret;
-}
-module_init(iio_dummy_init);
+static const struct iio_sw_device_ops iio_dummy_device_ops = {
+	.probe = iio_dummy_probe,
+	.remove = iio_dummy_remove,
+};
 
-/**
- * iio_dummy_exit() - device driver removal
- *
- * Varies depending on bus type of the device.
- * As there is no device here, call remove directly.
- */
-static __exit void iio_dummy_exit(void)
-{
-	int i;
+static struct iio_sw_device_type iio_dummy_device = {
+	.name = "dummy",
+	.owner = THIS_MODULE,
+	.ops = &iio_dummy_device_ops,
+};
 
-	for (i = 0; i < instances; i++)
-		iio_dummy_remove(i);
-	kfree(iio_dummy_devs);
-}
-module_exit(iio_dummy_exit);
+module_iio_sw_device_driver(iio_dummy_device);
 
 MODULE_AUTHOR("Jonathan Cameron <jic23@kernel.org>");
 MODULE_DESCRIPTION("IIO dummy driver");
diff --git a/drivers/iio/dummy/iio_simple_dummy_buffer.c b/drivers/iio/dummy/iio_simple_dummy_buffer.c
index cf44a6f79431..b383892a5193 100644
--- a/drivers/iio/dummy/iio_simple_dummy_buffer.c
+++ b/drivers/iio/dummy/iio_simple_dummy_buffer.c
@@ -85,7 +85,8 @@ static irqreturn_t iio_simple_dummy_trigger_h(int irq, void *p)
 		}
 	}
 
-	iio_push_to_buffers_with_timestamp(indio_dev, data, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, data,
+					   iio_get_time_ns(indio_dev));
 
 	kfree(data);
 
diff --git a/drivers/iio/dummy/iio_simple_dummy_events.c b/drivers/iio/dummy/iio_simple_dummy_events.c
index 6eb600ff7056..ed63ffd849f8 100644
--- a/drivers/iio/dummy/iio_simple_dummy_events.c
+++ b/drivers/iio/dummy/iio_simple_dummy_events.c
@@ -158,7 +158,7 @@ static irqreturn_t iio_simple_dummy_get_timestamp(int irq, void *private)
 	struct iio_dev *indio_dev = private;
 	struct iio_dummy_state *st = iio_priv(indio_dev);
 
-	st->event_timestamp = iio_get_time_ns();
+	st->event_timestamp = iio_get_time_ns(indio_dev);
 	return IRQ_WAKE_THREAD;
 }
 
diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
index 7ccc044063f6..f7fcfa886f72 100644
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -50,6 +50,7 @@
 #define BMG160_REG_PMU_BW		0x10
 #define BMG160_NO_FILTER		0
 #define BMG160_DEF_BW			100
+#define BMG160_REG_PMU_BW_RES		BIT(7)
 
 #define BMG160_REG_INT_MAP_0		0x17
 #define BMG160_INT_MAP_0_BIT_ANY	BIT(1)
@@ -100,7 +101,6 @@ struct bmg160_data {
 	struct iio_trigger *motion_trig;
 	struct mutex mutex;
 	s16 buffer[8];
-	u8 bw_bits;
 	u32 dps_range;
 	int ev_enable_state;
 	int slope_thres;
@@ -117,13 +117,16 @@ enum bmg160_axis {
 };
 
 static const struct {
-	int val;
+	int odr;
+	int filter;
 	int bw_bits;
-} bmg160_samp_freq_table[] = { {100, 0x07},
-			       {200, 0x06},
-			       {400, 0x03},
-			       {1000, 0x02},
-			       {2000, 0x01} };
+} bmg160_samp_freq_table[] = { {100, 32, 0x07},
+			       {200, 64, 0x06},
+			       {100, 12, 0x05},
+			       {200, 23, 0x04},
+			       {400, 47, 0x03},
+			       {1000, 116, 0x02},
+			       {2000, 230, 0x01} };
 
 static const struct {
 	int scale;
@@ -153,7 +156,7 @@ static int bmg160_convert_freq_to_bit(int val)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(bmg160_samp_freq_table); ++i) {
-		if (bmg160_samp_freq_table[i].val == val)
+		if (bmg160_samp_freq_table[i].odr == val)
 			return bmg160_samp_freq_table[i].bw_bits;
 	}
 
@@ -176,7 +179,53 @@ static int bmg160_set_bw(struct bmg160_data *data, int val)
 		return ret;
 	}
 
-	data->bw_bits = bw_bits;
+	return 0;
+}
+
+static int bmg160_get_filter(struct bmg160_data *data, int *val)
+{
+	struct device *dev = regmap_get_device(data->regmap);
+	int ret;
+	int i;
+	unsigned int bw_bits;
+
+	ret = regmap_read(data->regmap, BMG160_REG_PMU_BW, &bw_bits);
+	if (ret < 0) {
+		dev_err(dev, "Error reading reg_pmu_bw\n");
+		return ret;
+	}
+
+	/* Ignore the readonly reserved bit. */
+	bw_bits &= ~BMG160_REG_PMU_BW_RES;
+
+	for (i = 0; i < ARRAY_SIZE(bmg160_samp_freq_table); ++i) {
+		if (bmg160_samp_freq_table[i].bw_bits == bw_bits)
+			break;
+	}
+
+	*val = bmg160_samp_freq_table[i].filter;
+
+	return ret ? ret : IIO_VAL_INT;
+}
+
+
+static int bmg160_set_filter(struct bmg160_data *data, int val)
+{
+	struct device *dev = regmap_get_device(data->regmap);
+	int ret;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(bmg160_samp_freq_table); ++i) {
+		if (bmg160_samp_freq_table[i].filter == val)
+			break;
+	}
+
+	ret = regmap_write(data->regmap, BMG160_REG_PMU_BW,
+			   bmg160_samp_freq_table[i].bw_bits);
+	if (ret < 0) {
+		dev_err(dev, "Error writing reg_pmu_bw\n");
+		return ret;
+	}
 
 	return 0;
 }
@@ -386,11 +435,23 @@ static int bmg160_setup_new_data_interrupt(struct bmg160_data *data,
 
 static int bmg160_get_bw(struct bmg160_data *data, int *val)
 {
+	struct device *dev = regmap_get_device(data->regmap);	
 	int i;
+	unsigned int bw_bits;
+	int ret;
+
+	ret = regmap_read(data->regmap, BMG160_REG_PMU_BW, &bw_bits);
+	if (ret < 0) {
+		dev_err(dev, "Error reading reg_pmu_bw\n");
+		return ret;
+	}
+
+	/* Ignore the readonly reserved bit. */
+	bw_bits &= ~BMG160_REG_PMU_BW_RES;
 
 	for (i = 0; i < ARRAY_SIZE(bmg160_samp_freq_table); ++i) {
-		if (bmg160_samp_freq_table[i].bw_bits == data->bw_bits) {
-			*val = bmg160_samp_freq_table[i].val;
+		if (bmg160_samp_freq_table[i].bw_bits == bw_bits) {
+			*val = bmg160_samp_freq_table[i].odr;
 			return IIO_VAL_INT;
 		}
 	}
@@ -507,6 +568,8 @@ static int bmg160_read_raw(struct iio_dev *indio_dev,
 			return IIO_VAL_INT;
 		} else
 			return -EINVAL;
+	case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
+		return bmg160_get_filter(data, val);
 	case IIO_CHAN_INFO_SCALE:
 		*val = 0;
 		switch (chan->type) {
@@ -571,6 +634,26 @@ static int bmg160_write_raw(struct iio_dev *indio_dev,
 		ret = bmg160_set_power_state(data, false);
 		mutex_unlock(&data->mutex);
 		return ret;
+	case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
+		if (val2)
+			return -EINVAL;
+
+		mutex_lock(&data->mutex);
+		ret = bmg160_set_power_state(data, true);
+		if (ret < 0) {
+			bmg160_set_power_state(data, false);
+			mutex_unlock(&data->mutex);
+			return ret;
+		}
+		ret = bmg160_set_filter(data, val);
+		if (ret < 0) {
+			bmg160_set_power_state(data, false);
+			mutex_unlock(&data->mutex);
+			return ret;
+		}
+		ret = bmg160_set_power_state(data, false);
+		mutex_unlock(&data->mutex);
+		return ret;
 	case IIO_CHAN_INFO_SCALE:
 		if (val)
 			return -EINVAL;
@@ -728,7 +811,8 @@ static const struct iio_event_spec bmg160_event = {
 	.channel2 = IIO_MOD_##_axis,					\
 	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),			\
 	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) |		\
-				    BIT(IIO_CHAN_INFO_SAMP_FREQ),	\
+		BIT(IIO_CHAN_INFO_SAMP_FREQ) |				\
+		BIT(IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY),	\
 	.scan_index = AXIS_##_axis,					\
 	.scan_type = {							\
 		.sign = 's',						\
@@ -885,25 +969,25 @@ static irqreturn_t bmg160_event_handler(int irq, void *private)
 
 	if (val & BMG160_ANY_MOTION_BIT_X)
 		iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_ANGL_VEL,
-							0,
-							IIO_MOD_X,
-							IIO_EV_TYPE_ROC,
-							dir),
-							iio_get_time_ns());
+							     0,
+							     IIO_MOD_X,
+							     IIO_EV_TYPE_ROC,
+							     dir),
+			       iio_get_time_ns(indio_dev));
 	if (val & BMG160_ANY_MOTION_BIT_Y)
 		iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_ANGL_VEL,
-							0,
-							IIO_MOD_Y,
-							IIO_EV_TYPE_ROC,
-							dir),
-							iio_get_time_ns());
+							     0,
+							     IIO_MOD_Y,
+							     IIO_EV_TYPE_ROC,
+							     dir),
+			       iio_get_time_ns(indio_dev));
 	if (val & BMG160_ANY_MOTION_BIT_Z)
 		iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_ANGL_VEL,
-							0,
-							IIO_MOD_Z,
-							IIO_EV_TYPE_ROC,
-							dir),
-							iio_get_time_ns());
+							     0,
+							     IIO_MOD_Z,
+							     IIO_EV_TYPE_ROC,
+							     dir),
+			       iio_get_time_ns(indio_dev));
 
 ack_intr_status:
 	if (!data->dready_trigger_on) {
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index a8012955a1f6..aea034d8fe0f 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -426,13 +426,15 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 	indio_dev->info = &gyro_info;
 	mutex_init(&gdata->tb.buf_lock);
 
-	st_sensors_power_enable(indio_dev);
+	err = st_sensors_power_enable(indio_dev);
+	if (err)
+		return err;
 
 	err = st_sensors_check_device_support(indio_dev,
 					ARRAY_SIZE(st_gyro_sensors_settings),
 					st_gyro_sensors_settings);
 	if (err < 0)
-		return err;
+		goto st_gyro_power_off;
 
 	gdata->num_data_channels = ST_GYRO_NUMBER_DATA_CHANNELS;
 	gdata->multiread_bit = gdata->sensor_settings->multi_read_bit;
@@ -446,11 +448,11 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 	err = st_sensors_init_sensor(indio_dev,
 				(struct st_sensors_platform_data *)&gyro_pdata);
 	if (err < 0)
-		return err;
+		goto st_gyro_power_off;
 
 	err = st_gyro_allocate_ring(indio_dev);
 	if (err < 0)
-		return err;
+		goto st_gyro_power_off;
 
 	if (irq > 0) {
 		err = st_sensors_allocate_trigger(indio_dev,
@@ -473,6 +475,8 @@ st_gyro_device_register_error:
 		st_sensors_deallocate_trigger(indio_dev);
 st_gyro_probe_trigger_error:
 	st_gyro_deallocate_ring(indio_dev);
+st_gyro_power_off:
+	st_sensors_power_disable(indio_dev);
 
 	return err;
 }
diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c
index 88e43f87b926..9a081465c42f 100644
--- a/drivers/iio/health/afe4403.c
+++ b/drivers/iio/health/afe4403.c
@@ -1,7 +1,7 @@
 /*
  * AFE4403 Heart Rate Monitors and Low-Cost Pulse Oximeters
  *
- * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
  *	Andrew F. Davis <afd@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -39,127 +39,90 @@
 #define AFE4403_TIAGAIN			0x20
 #define AFE4403_TIA_AMB_GAIN		0x21
 
-/* AFE4403 GAIN register fields */
-#define AFE4403_TIAGAIN_RES_MASK	GENMASK(2, 0)
-#define AFE4403_TIAGAIN_RES_SHIFT	0
-#define AFE4403_TIAGAIN_CAP_MASK	GENMASK(7, 3)
-#define AFE4403_TIAGAIN_CAP_SHIFT	3
-
-/* AFE4403 LEDCNTRL register fields */
-#define AFE440X_LEDCNTRL_LED1_MASK		GENMASK(15, 8)
-#define AFE440X_LEDCNTRL_LED1_SHIFT		8
-#define AFE440X_LEDCNTRL_LED2_MASK		GENMASK(7, 0)
-#define AFE440X_LEDCNTRL_LED2_SHIFT		0
-#define AFE440X_LEDCNTRL_LED_RANGE_MASK		GENMASK(17, 16)
-#define AFE440X_LEDCNTRL_LED_RANGE_SHIFT	16
-
-/* AFE4403 CONTROL2 register fields */
-#define AFE440X_CONTROL2_PWR_DWN_TX	BIT(2)
-#define AFE440X_CONTROL2_EN_SLOW_DIAG	BIT(8)
-#define AFE440X_CONTROL2_DIAG_OUT_TRI	BIT(10)
-#define AFE440X_CONTROL2_TX_BRDG_MOD	BIT(11)
-#define AFE440X_CONTROL2_TX_REF_MASK	GENMASK(18, 17)
-#define AFE440X_CONTROL2_TX_REF_SHIFT	17
-
-/* AFE4404 NULL fields */
-#define NULL_MASK	0
-#define NULL_SHIFT	0
-
-/* AFE4403 LEDCNTRL values */
-#define AFE440X_LEDCNTRL_RANGE_TX_HALF	0x1
-#define AFE440X_LEDCNTRL_RANGE_TX_FULL	0x2
-#define AFE440X_LEDCNTRL_RANGE_TX_OFF	0x3
-
-/* AFE4403 CONTROL2 values */
-#define AFE440X_CONTROL2_TX_REF_025	0x0
-#define AFE440X_CONTROL2_TX_REF_050	0x1
-#define AFE440X_CONTROL2_TX_REF_100	0x2
-#define AFE440X_CONTROL2_TX_REF_075	0x3
-
-/* AFE4403 CONTROL3 values */
-#define AFE440X_CONTROL3_CLK_DIV_2	0x0
-#define AFE440X_CONTROL3_CLK_DIV_4	0x2
-#define AFE440X_CONTROL3_CLK_DIV_6	0x3
-#define AFE440X_CONTROL3_CLK_DIV_8	0x4
-#define AFE440X_CONTROL3_CLK_DIV_12	0x5
-#define AFE440X_CONTROL3_CLK_DIV_1	0x7
-
-/* AFE4403 TIAGAIN_CAP values */
-#define AFE4403_TIAGAIN_CAP_5_P		0x0
-#define AFE4403_TIAGAIN_CAP_10_P	0x1
-#define AFE4403_TIAGAIN_CAP_20_P	0x2
-#define AFE4403_TIAGAIN_CAP_30_P	0x3
-#define AFE4403_TIAGAIN_CAP_55_P	0x8
-#define AFE4403_TIAGAIN_CAP_155_P	0x10
-
-/* AFE4403 TIAGAIN_RES values */
-#define AFE4403_TIAGAIN_RES_500_K	0x0
-#define AFE4403_TIAGAIN_RES_250_K	0x1
-#define AFE4403_TIAGAIN_RES_100_K	0x2
-#define AFE4403_TIAGAIN_RES_50_K	0x3
-#define AFE4403_TIAGAIN_RES_25_K	0x4
-#define AFE4403_TIAGAIN_RES_10_K	0x5
-#define AFE4403_TIAGAIN_RES_1_M		0x6
-#define AFE4403_TIAGAIN_RES_NONE	0x7
+enum afe4403_fields {
+	/* Gains */
+	F_RF_LED1, F_CF_LED1,
+	F_RF_LED, F_CF_LED,
+
+	/* LED Current */
+	F_ILED1, F_ILED2,
+
+	/* sentinel */
+	F_MAX_FIELDS
+};
+
+static const struct reg_field afe4403_reg_fields[] = {
+	/* Gains */
+	[F_RF_LED1]	= REG_FIELD(AFE4403_TIAGAIN, 0, 2),
+	[F_CF_LED1]	= REG_FIELD(AFE4403_TIAGAIN, 3, 7),
+	[F_RF_LED]	= REG_FIELD(AFE4403_TIA_AMB_GAIN, 0, 2),
+	[F_CF_LED]	= REG_FIELD(AFE4403_TIA_AMB_GAIN, 3, 7),
+	/* LED Current */
+	[F_ILED1]	= REG_FIELD(AFE440X_LEDCNTRL, 0, 7),
+	[F_ILED2]	= REG_FIELD(AFE440X_LEDCNTRL, 8, 15),
+};
 
 /**
- * struct afe4403_data
- * @dev - Device structure
- * @spi - SPI device handle
- * @regmap - Register map of the device
- * @regulator - Pointer to the regulator for the IC
- * @trig - IIO trigger for this device
- * @irq - ADC_RDY line interrupt number
+ * struct afe4403_data - AFE4403 device instance data
+ * @dev: Device structure
+ * @spi: SPI device handle
+ * @regmap: Register map of the device
+ * @fields: Register fields of the device
+ * @regulator: Pointer to the regulator for the IC
+ * @trig: IIO trigger for this device
+ * @irq: ADC_RDY line interrupt number
  */
 struct afe4403_data {
 	struct device *dev;
 	struct spi_device *spi;
 	struct regmap *regmap;
+	struct regmap_field *fields[F_MAX_FIELDS];
 	struct regulator *regulator;
 	struct iio_trigger *trig;
 	int irq;
 };
 
 enum afe4403_chan_id {
+	LED2 = 1,
+	ALED2,
 	LED1,
 	ALED1,
-	LED2,
-	ALED2,
-	LED1_ALED1,
 	LED2_ALED2,
-	ILED1,
-	ILED2,
+	LED1_ALED1,
 };
 
-static const struct afe440x_reg_info afe4403_reg_info[] = {
-	[LED1] = AFE440X_REG_INFO(AFE440X_LED1VAL, 0, NULL),
-	[ALED1] = AFE440X_REG_INFO(AFE440X_ALED1VAL, 0, NULL),
-	[LED2] = AFE440X_REG_INFO(AFE440X_LED2VAL, 0, NULL),
-	[ALED2] = AFE440X_REG_INFO(AFE440X_ALED2VAL, 0, NULL),
-	[LED1_ALED1] = AFE440X_REG_INFO(AFE440X_LED1_ALED1VAL, 0, NULL),
-	[LED2_ALED2] = AFE440X_REG_INFO(AFE440X_LED2_ALED2VAL, 0, NULL),
-	[ILED1] = AFE440X_REG_INFO(AFE440X_LEDCNTRL, 0, AFE440X_LEDCNTRL_LED1),
-	[ILED2] = AFE440X_REG_INFO(AFE440X_LEDCNTRL, 0, AFE440X_LEDCNTRL_LED2),
+static const unsigned int afe4403_channel_values[] = {
+	[LED2] = AFE440X_LED2VAL,
+	[ALED2] = AFE440X_ALED2VAL,
+	[LED1] = AFE440X_LED1VAL,
+	[ALED1] = AFE440X_ALED1VAL,
+	[LED2_ALED2] = AFE440X_LED2_ALED2VAL,
+	[LED1_ALED1] = AFE440X_LED1_ALED1VAL,
+};
+
+static const unsigned int afe4403_channel_leds[] = {
+	[LED2] = F_ILED2,
+	[LED1] = F_ILED1,
 };
 
 static const struct iio_chan_spec afe4403_channels[] = {
 	/* ADC values */
-	AFE440X_INTENSITY_CHAN(LED1, "led1", 0),
-	AFE440X_INTENSITY_CHAN(ALED1, "led1_ambient", 0),
-	AFE440X_INTENSITY_CHAN(LED2, "led2", 0),
-	AFE440X_INTENSITY_CHAN(ALED2, "led2_ambient", 0),
-	AFE440X_INTENSITY_CHAN(LED1_ALED1, "led1-led1_ambient", 0),
-	AFE440X_INTENSITY_CHAN(LED2_ALED2, "led2-led2_ambient", 0),
+	AFE440X_INTENSITY_CHAN(LED2, 0),
+	AFE440X_INTENSITY_CHAN(ALED2, 0),
+	AFE440X_INTENSITY_CHAN(LED1, 0),
+	AFE440X_INTENSITY_CHAN(ALED1, 0),
+	AFE440X_INTENSITY_CHAN(LED2_ALED2, 0),
+	AFE440X_INTENSITY_CHAN(LED1_ALED1, 0),
 	/* LED current */
-	AFE440X_CURRENT_CHAN(ILED1, "led1"),
-	AFE440X_CURRENT_CHAN(ILED2, "led2"),
+	AFE440X_CURRENT_CHAN(LED2),
+	AFE440X_CURRENT_CHAN(LED1),
 };
 
 static const struct afe440x_val_table afe4403_res_table[] = {
 	{ 500000 }, { 250000 }, { 100000 }, { 50000 },
 	{ 25000 }, { 10000 }, { 1000000 }, { 0 },
 };
-AFE440X_TABLE_ATTR(tia_resistance_available, afe4403_res_table);
+AFE440X_TABLE_ATTR(in_intensity_resistance_available, afe4403_res_table);
 
 static const struct afe440x_val_table afe4403_cap_table[] = {
 	{ 0, 5000 }, { 0, 10000 }, { 0, 20000 }, { 0, 25000 },
@@ -171,7 +134,7 @@ static const struct afe440x_val_table afe4403_cap_table[] = {
 	{ 0, 205000 }, { 0, 210000 }, { 0, 220000 }, { 0, 225000 },
 	{ 0, 230000 }, { 0, 235000 }, { 0, 245000 }, { 0, 250000 },
 };
-AFE440X_TABLE_ATTR(tia_capacitance_available, afe4403_cap_table);
+AFE440X_TABLE_ATTR(in_intensity_capacitance_available, afe4403_cap_table);
 
 static ssize_t afe440x_show_register(struct device *dev,
 				     struct device_attribute *attr,
@@ -180,38 +143,21 @@ static ssize_t afe440x_show_register(struct device *dev,
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct afe4403_data *afe = iio_priv(indio_dev);
 	struct afe440x_attr *afe440x_attr = to_afe440x_attr(attr);
-	unsigned int reg_val, type;
+	unsigned int reg_val;
 	int vals[2];
-	int ret, val_len;
+	int ret;
 
-	ret = regmap_read(afe->regmap, afe440x_attr->reg, &reg_val);
+	ret = regmap_field_read(afe->fields[afe440x_attr->field], &reg_val);
 	if (ret)
 		return ret;
 
-	reg_val &= afe440x_attr->mask;
-	reg_val >>= afe440x_attr->shift;
-
-	switch (afe440x_attr->type) {
-	case SIMPLE:
-		type = IIO_VAL_INT;
-		val_len = 1;
-		vals[0] = reg_val;
-		break;
-	case RESISTANCE:
-	case CAPACITANCE:
-		type = IIO_VAL_INT_PLUS_MICRO;
-		val_len = 2;
-		if (reg_val < afe440x_attr->table_size) {
-			vals[0] = afe440x_attr->val_table[reg_val].integer;
-			vals[1] = afe440x_attr->val_table[reg_val].fract;
-			break;
-		}
-		return -EINVAL;
-	default:
+	if (reg_val >= afe440x_attr->table_size)
 		return -EINVAL;
-	}
 
-	return iio_format_value(buf, type, val_len, vals);
+	vals[0] = afe440x_attr->val_table[reg_val].integer;
+	vals[1] = afe440x_attr->val_table[reg_val].fract;
+
+	return iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, 2, vals);
 }
 
 static ssize_t afe440x_store_register(struct device *dev,
@@ -227,48 +173,43 @@ static ssize_t afe440x_store_register(struct device *dev,
 	if (ret)
 		return ret;
 
-	switch (afe440x_attr->type) {
-	case SIMPLE:
-		val = integer;
-		break;
-	case RESISTANCE:
-	case CAPACITANCE:
-		for (val = 0; val < afe440x_attr->table_size; val++)
-			if (afe440x_attr->val_table[val].integer == integer &&
-			    afe440x_attr->val_table[val].fract == fract)
-				break;
-		if (val == afe440x_attr->table_size)
-			return -EINVAL;
-		break;
-	default:
+	for (val = 0; val < afe440x_attr->table_size; val++)
+		if (afe440x_attr->val_table[val].integer == integer &&
+		    afe440x_attr->val_table[val].fract == fract)
+			break;
+	if (val == afe440x_attr->table_size)
 		return -EINVAL;
-	}
 
-	ret = regmap_update_bits(afe->regmap, afe440x_attr->reg,
-				 afe440x_attr->mask,
-				 (val << afe440x_attr->shift));
+	ret = regmap_field_write(afe->fields[afe440x_attr->field], val);
 	if (ret)
 		return ret;
 
 	return count;
 }
 
-static AFE440X_ATTR(tia_separate_en, AFE4403_TIAGAIN, AFE440X_TIAGAIN_ENSEPGAIN, SIMPLE, NULL, 0);
+static AFE440X_ATTR(in_intensity1_resistance, F_RF_LED, afe4403_res_table);
+static AFE440X_ATTR(in_intensity1_capacitance, F_CF_LED, afe4403_cap_table);
+
+static AFE440X_ATTR(in_intensity2_resistance, F_RF_LED, afe4403_res_table);
+static AFE440X_ATTR(in_intensity2_capacitance, F_CF_LED, afe4403_cap_table);
 
-static AFE440X_ATTR(tia_resistance1, AFE4403_TIAGAIN, AFE4403_TIAGAIN_RES, RESISTANCE, afe4403_res_table, ARRAY_SIZE(afe4403_res_table));
-static AFE440X_ATTR(tia_capacitance1, AFE4403_TIAGAIN, AFE4403_TIAGAIN_CAP, CAPACITANCE, afe4403_cap_table, ARRAY_SIZE(afe4403_cap_table));
+static AFE440X_ATTR(in_intensity3_resistance, F_RF_LED1, afe4403_res_table);
+static AFE440X_ATTR(in_intensity3_capacitance, F_CF_LED1, afe4403_cap_table);
 
-static AFE440X_ATTR(tia_resistance2, AFE4403_TIA_AMB_GAIN, AFE4403_TIAGAIN_RES, RESISTANCE, afe4403_res_table, ARRAY_SIZE(afe4403_res_table));
-static AFE440X_ATTR(tia_capacitance2, AFE4403_TIA_AMB_GAIN, AFE4403_TIAGAIN_RES, CAPACITANCE, afe4403_cap_table, ARRAY_SIZE(afe4403_cap_table));
+static AFE440X_ATTR(in_intensity4_resistance, F_RF_LED1, afe4403_res_table);
+static AFE440X_ATTR(in_intensity4_capacitance, F_CF_LED1, afe4403_cap_table);
 
 static struct attribute *afe440x_attributes[] = {
-	&afe440x_attr_tia_separate_en.dev_attr.attr,
-	&afe440x_attr_tia_resistance1.dev_attr.attr,
-	&afe440x_attr_tia_capacitance1.dev_attr.attr,
-	&afe440x_attr_tia_resistance2.dev_attr.attr,
-	&afe440x_attr_tia_capacitance2.dev_attr.attr,
-	&dev_attr_tia_resistance_available.attr,
-	&dev_attr_tia_capacitance_available.attr,
+	&dev_attr_in_intensity_resistance_available.attr,
+	&dev_attr_in_intensity_capacitance_available.attr,
+	&afe440x_attr_in_intensity1_resistance.dev_attr.attr,
+	&afe440x_attr_in_intensity1_capacitance.dev_attr.attr,
+	&afe440x_attr_in_intensity2_resistance.dev_attr.attr,
+	&afe440x_attr_in_intensity2_capacitance.dev_attr.attr,
+	&afe440x_attr_in_intensity3_resistance.dev_attr.attr,
+	&afe440x_attr_in_intensity3_capacitance.dev_attr.attr,
+	&afe440x_attr_in_intensity4_resistance.dev_attr.attr,
+	&afe440x_attr_in_intensity4_capacitance.dev_attr.attr,
 	NULL
 };
 
@@ -309,35 +250,26 @@ static int afe4403_read_raw(struct iio_dev *indio_dev,
 			    int *val, int *val2, long mask)
 {
 	struct afe4403_data *afe = iio_priv(indio_dev);
-	const struct afe440x_reg_info reg_info = afe4403_reg_info[chan->address];
+	unsigned int reg = afe4403_channel_values[chan->address];
+	unsigned int field = afe4403_channel_leds[chan->address];
 	int ret;
 
 	switch (chan->type) {
 	case IIO_INTENSITY:
 		switch (mask) {
 		case IIO_CHAN_INFO_RAW:
-			ret = afe4403_read(afe, reg_info.reg, val);
-			if (ret)
-				return ret;
-			return IIO_VAL_INT;
-		case IIO_CHAN_INFO_OFFSET:
-			ret = regmap_read(afe->regmap, reg_info.offreg,
-					  val);
+			ret = afe4403_read(afe, reg, val);
 			if (ret)
 				return ret;
-			*val &= reg_info.mask;
-			*val >>= reg_info.shift;
 			return IIO_VAL_INT;
 		}
 		break;
 	case IIO_CURRENT:
 		switch (mask) {
 		case IIO_CHAN_INFO_RAW:
-			ret = regmap_read(afe->regmap, reg_info.reg, val);
+			ret = regmap_field_read(afe->fields[field], val);
 			if (ret)
 				return ret;
-			*val &= reg_info.mask;
-			*val >>= reg_info.shift;
 			return IIO_VAL_INT;
 		case IIO_CHAN_INFO_SCALE:
 			*val = 0;
@@ -357,25 +289,13 @@ static int afe4403_write_raw(struct iio_dev *indio_dev,
 			     int val, int val2, long mask)
 {
 	struct afe4403_data *afe = iio_priv(indio_dev);
-	const struct afe440x_reg_info reg_info = afe4403_reg_info[chan->address];
+	unsigned int field = afe4403_channel_leds[chan->address];
 
 	switch (chan->type) {
-	case IIO_INTENSITY:
-		switch (mask) {
-		case IIO_CHAN_INFO_OFFSET:
-			return regmap_update_bits(afe->regmap,
-				reg_info.offreg,
-				reg_info.mask,
-				(val << reg_info.shift));
-		}
-		break;
 	case IIO_CURRENT:
 		switch (mask) {
 		case IIO_CHAN_INFO_RAW:
-			return regmap_update_bits(afe->regmap,
-				reg_info.reg,
-				reg_info.mask,
-				(val << reg_info.shift));
+			return regmap_field_write(afe->fields[field], val);
 		}
 		break;
 	default:
@@ -410,7 +330,7 @@ static irqreturn_t afe4403_trigger_handler(int irq, void *private)
 	for_each_set_bit(bit, indio_dev->active_scan_mask,
 			 indio_dev->masklength) {
 		ret = spi_write_then_read(afe->spi,
-					  &afe4403_reg_info[bit].reg, 1,
+					  &afe4403_channel_values[bit], 1,
 					  rx, 3);
 		if (ret)
 			goto err;
@@ -472,12 +392,8 @@ static const struct iio_trigger_ops afe4403_trigger_ops = {
 
 static const struct reg_sequence afe4403_reg_sequences[] = {
 	AFE4403_TIMING_PAIRS,
-	{ AFE440X_CONTROL1, AFE440X_CONTROL1_TIMEREN | 0x000007},
-	{ AFE4403_TIA_AMB_GAIN, AFE4403_TIAGAIN_RES_1_M },
-	{ AFE440X_LEDCNTRL, (0x14 << AFE440X_LEDCNTRL_LED1_SHIFT) |
-			    (0x14 << AFE440X_LEDCNTRL_LED2_SHIFT) },
-	{ AFE440X_CONTROL2, AFE440X_CONTROL2_TX_REF_050 <<
-			    AFE440X_CONTROL2_TX_REF_SHIFT },
+	{ AFE440X_CONTROL1, AFE440X_CONTROL1_TIMEREN },
+	{ AFE4403_TIAGAIN, AFE440X_TIAGAIN_ENSEPGAIN },
 };
 
 static const struct regmap_range afe4403_yes_ranges[] = {
@@ -498,13 +414,11 @@ static const struct regmap_config afe4403_regmap_config = {
 	.volatile_table = &afe4403_volatile_table,
 };
 
-#ifdef CONFIG_OF
 static const struct of_device_id afe4403_of_match[] = {
 	{ .compatible = "ti,afe4403", },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, afe4403_of_match);
-#endif
 
 static int __maybe_unused afe4403_suspend(struct device *dev)
 {
@@ -553,7 +467,7 @@ static int afe4403_probe(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev;
 	struct afe4403_data *afe;
-	int ret;
+	int i, ret;
 
 	indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*afe));
 	if (!indio_dev)
@@ -572,6 +486,15 @@ static int afe4403_probe(struct spi_device *spi)
 		return PTR_ERR(afe->regmap);
 	}
 
+	for (i = 0; i < F_MAX_FIELDS; i++) {
+		afe->fields[i] = devm_regmap_field_alloc(afe->dev, afe->regmap,
+							 afe4403_reg_fields[i]);
+		if (IS_ERR(afe->fields[i])) {
+			dev_err(afe->dev, "Unable to allocate regmap fields\n");
+			return PTR_ERR(afe->fields[i]);
+		}
+	}
+
 	afe->regulator = devm_regulator_get(afe->dev, "tx_sup");
 	if (IS_ERR(afe->regulator)) {
 		dev_err(afe->dev, "Unable to get regulator\n");
@@ -694,7 +617,7 @@ MODULE_DEVICE_TABLE(spi, afe4403_ids);
 static struct spi_driver afe4403_spi_driver = {
 	.driver = {
 		.name = AFE4403_DRIVER_NAME,
-		.of_match_table = of_match_ptr(afe4403_of_match),
+		.of_match_table = afe4403_of_match,
 		.pm = &afe4403_pm_ops,
 	},
 	.probe = afe4403_probe,
@@ -704,5 +627,5 @@ static struct spi_driver afe4403_spi_driver = {
 module_spi_driver(afe4403_spi_driver);
 
 MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
-MODULE_DESCRIPTION("TI AFE4403 Heart Rate and Pulse Oximeter");
+MODULE_DESCRIPTION("TI AFE4403 Heart Rate Monitor and Pulse Oximeter AFE");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c
index 5096a4643784..45266404f7e3 100644
--- a/drivers/iio/health/afe4404.c
+++ b/drivers/iio/health/afe4404.c
@@ -1,7 +1,7 @@
 /*
  * AFE4404 Heart Rate Monitors and Low-Cost Pulse Oximeters
  *
- * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
  *	Andrew F. Davis <afd@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -48,118 +48,102 @@
 #define AFE4404_AVG_LED2_ALED2VAL	0x3f
 #define AFE4404_AVG_LED1_ALED1VAL	0x40
 
-/* AFE4404 GAIN register fields */
-#define AFE4404_TIA_GAIN_RES_MASK	GENMASK(2, 0)
-#define AFE4404_TIA_GAIN_RES_SHIFT	0
-#define AFE4404_TIA_GAIN_CAP_MASK	GENMASK(5, 3)
-#define AFE4404_TIA_GAIN_CAP_SHIFT	3
+/* AFE4404 CONTROL2 register fields */
+#define AFE440X_CONTROL2_OSC_ENABLE	BIT(9)
 
-/* AFE4404 LEDCNTRL register fields */
-#define AFE4404_LEDCNTRL_ILED1_MASK	GENMASK(5, 0)
-#define AFE4404_LEDCNTRL_ILED1_SHIFT	0
-#define AFE4404_LEDCNTRL_ILED2_MASK	GENMASK(11, 6)
-#define AFE4404_LEDCNTRL_ILED2_SHIFT	6
-#define AFE4404_LEDCNTRL_ILED3_MASK	GENMASK(17, 12)
-#define AFE4404_LEDCNTRL_ILED3_SHIFT	12
+enum afe4404_fields {
+	/* Gains */
+	F_TIA_GAIN_SEP, F_TIA_CF_SEP,
+	F_TIA_GAIN, TIA_CF,
 
-/* AFE4404 CONTROL2 register fields */
-#define AFE440X_CONTROL2_ILED_2X_MASK	BIT(17)
-#define AFE440X_CONTROL2_ILED_2X_SHIFT	17
-
-/* AFE4404 CONTROL3 register fields */
-#define AFE440X_CONTROL3_OSC_ENABLE	BIT(9)
-
-/* AFE4404 OFFDAC register current fields */
-#define AFE4404_OFFDAC_CURR_LED1_MASK	GENMASK(9, 5)
-#define AFE4404_OFFDAC_CURR_LED1_SHIFT	5
-#define AFE4404_OFFDAC_CURR_LED2_MASK	GENMASK(19, 15)
-#define AFE4404_OFFDAC_CURR_LED2_SHIFT	15
-#define AFE4404_OFFDAC_CURR_LED3_MASK	GENMASK(4, 0)
-#define AFE4404_OFFDAC_CURR_LED3_SHIFT	0
-#define AFE4404_OFFDAC_CURR_ALED1_MASK	GENMASK(14, 10)
-#define AFE4404_OFFDAC_CURR_ALED1_SHIFT	10
-#define AFE4404_OFFDAC_CURR_ALED2_MASK	GENMASK(4, 0)
-#define AFE4404_OFFDAC_CURR_ALED2_SHIFT	0
-
-/* AFE4404 NULL fields */
-#define NULL_MASK	0
-#define NULL_SHIFT	0
-
-/* AFE4404 TIA_GAIN_CAP values */
-#define AFE4404_TIA_GAIN_CAP_5_P	0x0
-#define AFE4404_TIA_GAIN_CAP_2_5_P	0x1
-#define AFE4404_TIA_GAIN_CAP_10_P	0x2
-#define AFE4404_TIA_GAIN_CAP_7_5_P	0x3
-#define AFE4404_TIA_GAIN_CAP_20_P	0x4
-#define AFE4404_TIA_GAIN_CAP_17_5_P	0x5
-#define AFE4404_TIA_GAIN_CAP_25_P	0x6
-#define AFE4404_TIA_GAIN_CAP_22_5_P	0x7
-
-/* AFE4404 TIA_GAIN_RES values */
-#define AFE4404_TIA_GAIN_RES_500_K	0x0
-#define AFE4404_TIA_GAIN_RES_250_K	0x1
-#define AFE4404_TIA_GAIN_RES_100_K	0x2
-#define AFE4404_TIA_GAIN_RES_50_K	0x3
-#define AFE4404_TIA_GAIN_RES_25_K	0x4
-#define AFE4404_TIA_GAIN_RES_10_K	0x5
-#define AFE4404_TIA_GAIN_RES_1_M	0x6
-#define AFE4404_TIA_GAIN_RES_2_M	0x7
+	/* LED Current */
+	F_ILED1, F_ILED2, F_ILED3,
+
+	/* Offset DAC */
+	F_OFFDAC_AMB2, F_OFFDAC_LED1, F_OFFDAC_AMB1, F_OFFDAC_LED2,
+
+	/* sentinel */
+	F_MAX_FIELDS
+};
+
+static const struct reg_field afe4404_reg_fields[] = {
+	/* Gains */
+	[F_TIA_GAIN_SEP]	= REG_FIELD(AFE4404_TIA_GAIN_SEP, 0, 2),
+	[F_TIA_CF_SEP]		= REG_FIELD(AFE4404_TIA_GAIN_SEP, 3, 5),
+	[F_TIA_GAIN]		= REG_FIELD(AFE4404_TIA_GAIN, 0, 2),
+	[TIA_CF]		= REG_FIELD(AFE4404_TIA_GAIN, 3, 5),
+	/* LED Current */
+	[F_ILED1]		= REG_FIELD(AFE440X_LEDCNTRL, 0, 5),
+	[F_ILED2]		= REG_FIELD(AFE440X_LEDCNTRL, 6, 11),
+	[F_ILED3]		= REG_FIELD(AFE440X_LEDCNTRL, 12, 17),
+	/* Offset DAC */
+	[F_OFFDAC_AMB2]		= REG_FIELD(AFE4404_OFFDAC, 0, 4),
+	[F_OFFDAC_LED1]		= REG_FIELD(AFE4404_OFFDAC, 5, 9),
+	[F_OFFDAC_AMB1]		= REG_FIELD(AFE4404_OFFDAC, 10, 14),
+	[F_OFFDAC_LED2]		= REG_FIELD(AFE4404_OFFDAC, 15, 19),
+};
 
 /**
- * struct afe4404_data
- * @dev - Device structure
- * @regmap - Register map of the device
- * @regulator - Pointer to the regulator for the IC
- * @trig - IIO trigger for this device
- * @irq - ADC_RDY line interrupt number
+ * struct afe4404_data - AFE4404 device instance data
+ * @dev: Device structure
+ * @regmap: Register map of the device
+ * @fields: Register fields of the device
+ * @regulator: Pointer to the regulator for the IC
+ * @trig: IIO trigger for this device
+ * @irq: ADC_RDY line interrupt number
  */
 struct afe4404_data {
 	struct device *dev;
 	struct regmap *regmap;
+	struct regmap_field *fields[F_MAX_FIELDS];
 	struct regulator *regulator;
 	struct iio_trigger *trig;
 	int irq;
 };
 
 enum afe4404_chan_id {
+	LED2 = 1,
+	ALED2,
 	LED1,
 	ALED1,
-	LED2,
-	ALED2,
-	LED3,
-	LED1_ALED1,
 	LED2_ALED2,
-	ILED1,
-	ILED2,
-	ILED3,
+	LED1_ALED1,
+};
+
+static const unsigned int afe4404_channel_values[] = {
+	[LED2] = AFE440X_LED2VAL,
+	[ALED2] = AFE440X_ALED2VAL,
+	[LED1] = AFE440X_LED1VAL,
+	[ALED1] = AFE440X_ALED1VAL,
+	[LED2_ALED2] = AFE440X_LED2_ALED2VAL,
+	[LED1_ALED1] = AFE440X_LED1_ALED1VAL,
 };
 
-static const struct afe440x_reg_info afe4404_reg_info[] = {
-	[LED1] = AFE440X_REG_INFO(AFE440X_LED1VAL, AFE4404_OFFDAC, AFE4404_OFFDAC_CURR_LED1),
-	[ALED1] = AFE440X_REG_INFO(AFE440X_ALED1VAL, AFE4404_OFFDAC, AFE4404_OFFDAC_CURR_ALED1),
-	[LED2] = AFE440X_REG_INFO(AFE440X_LED2VAL, AFE4404_OFFDAC, AFE4404_OFFDAC_CURR_LED2),
-	[ALED2] = AFE440X_REG_INFO(AFE440X_ALED2VAL, AFE4404_OFFDAC, AFE4404_OFFDAC_CURR_ALED2),
-	[LED3] = AFE440X_REG_INFO(AFE440X_ALED2VAL, 0, NULL),
-	[LED1_ALED1] = AFE440X_REG_INFO(AFE440X_LED1_ALED1VAL, 0, NULL),
-	[LED2_ALED2] = AFE440X_REG_INFO(AFE440X_LED2_ALED2VAL, 0, NULL),
-	[ILED1] = AFE440X_REG_INFO(AFE440X_LEDCNTRL, 0, AFE4404_LEDCNTRL_ILED1),
-	[ILED2] = AFE440X_REG_INFO(AFE440X_LEDCNTRL, 0, AFE4404_LEDCNTRL_ILED2),
-	[ILED3] = AFE440X_REG_INFO(AFE440X_LEDCNTRL, 0, AFE4404_LEDCNTRL_ILED3),
+static const unsigned int afe4404_channel_leds[] = {
+	[LED2] = F_ILED2,
+	[ALED2] = F_ILED3,
+	[LED1] = F_ILED1,
+};
+
+static const unsigned int afe4404_channel_offdacs[] = {
+	[LED2] = F_OFFDAC_LED2,
+	[ALED2] = F_OFFDAC_AMB2,
+	[LED1] = F_OFFDAC_LED1,
+	[ALED1] = F_OFFDAC_AMB1,
 };
 
 static const struct iio_chan_spec afe4404_channels[] = {
 	/* ADC values */
-	AFE440X_INTENSITY_CHAN(LED1, "led1", BIT(IIO_CHAN_INFO_OFFSET)),
-	AFE440X_INTENSITY_CHAN(ALED1, "led1_ambient", BIT(IIO_CHAN_INFO_OFFSET)),
-	AFE440X_INTENSITY_CHAN(LED2, "led2", BIT(IIO_CHAN_INFO_OFFSET)),
-	AFE440X_INTENSITY_CHAN(ALED2, "led2_ambient", BIT(IIO_CHAN_INFO_OFFSET)),
-	AFE440X_INTENSITY_CHAN(LED3, "led3", BIT(IIO_CHAN_INFO_OFFSET)),
-	AFE440X_INTENSITY_CHAN(LED1_ALED1, "led1-led1_ambient", 0),
-	AFE440X_INTENSITY_CHAN(LED2_ALED2, "led2-led2_ambient", 0),
+	AFE440X_INTENSITY_CHAN(LED2, BIT(IIO_CHAN_INFO_OFFSET)),
+	AFE440X_INTENSITY_CHAN(ALED2, BIT(IIO_CHAN_INFO_OFFSET)),
+	AFE440X_INTENSITY_CHAN(LED1, BIT(IIO_CHAN_INFO_OFFSET)),
+	AFE440X_INTENSITY_CHAN(ALED1, BIT(IIO_CHAN_INFO_OFFSET)),
+	AFE440X_INTENSITY_CHAN(LED2_ALED2, 0),
+	AFE440X_INTENSITY_CHAN(LED1_ALED1, 0),
 	/* LED current */
-	AFE440X_CURRENT_CHAN(ILED1, "led1"),
-	AFE440X_CURRENT_CHAN(ILED2, "led2"),
-	AFE440X_CURRENT_CHAN(ILED3, "led3"),
+	AFE440X_CURRENT_CHAN(LED2),
+	AFE440X_CURRENT_CHAN(ALED2),
+	AFE440X_CURRENT_CHAN(LED1),
 };
 
 static const struct afe440x_val_table afe4404_res_table[] = {
@@ -172,7 +156,7 @@ static const struct afe440x_val_table afe4404_res_table[] = {
 	{ .integer = 1000000, .fract = 0 },
 	{ .integer = 2000000, .fract = 0 },
 };
-AFE440X_TABLE_ATTR(tia_resistance_available, afe4404_res_table);
+AFE440X_TABLE_ATTR(in_intensity_resistance_available, afe4404_res_table);
 
 static const struct afe440x_val_table afe4404_cap_table[] = {
 	{ .integer = 0, .fract = 5000 },
@@ -184,7 +168,7 @@ static const struct afe440x_val_table afe4404_cap_table[] = {
 	{ .integer = 0, .fract = 25000 },
 	{ .integer = 0, .fract = 22500 },
 };
-AFE440X_TABLE_ATTR(tia_capacitance_available, afe4404_cap_table);
+AFE440X_TABLE_ATTR(in_intensity_capacitance_available, afe4404_cap_table);
 
 static ssize_t afe440x_show_register(struct device *dev,
 				     struct device_attribute *attr,
@@ -193,38 +177,21 @@ static ssize_t afe440x_show_register(struct device *dev,
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct afe4404_data *afe = iio_priv(indio_dev);
 	struct afe440x_attr *afe440x_attr = to_afe440x_attr(attr);
-	unsigned int reg_val, type;
+	unsigned int reg_val;
 	int vals[2];
-	int ret, val_len;
+	int ret;
 
-	ret = regmap_read(afe->regmap, afe440x_attr->reg, &reg_val);
+	ret = regmap_field_read(afe->fields[afe440x_attr->field], &reg_val);
 	if (ret)
 		return ret;
 
-	reg_val &= afe440x_attr->mask;
-	reg_val >>= afe440x_attr->shift;
-
-	switch (afe440x_attr->type) {
-	case SIMPLE:
-		type = IIO_VAL_INT;
-		val_len = 1;
-		vals[0] = reg_val;
-		break;
-	case RESISTANCE:
-	case CAPACITANCE:
-		type = IIO_VAL_INT_PLUS_MICRO;
-		val_len = 2;
-		if (reg_val < afe440x_attr->table_size) {
-			vals[0] = afe440x_attr->val_table[reg_val].integer;
-			vals[1] = afe440x_attr->val_table[reg_val].fract;
-			break;
-		}
-		return -EINVAL;
-	default:
+	if (reg_val >= afe440x_attr->table_size)
 		return -EINVAL;
-	}
 
-	return iio_format_value(buf, type, val_len, vals);
+	vals[0] = afe440x_attr->val_table[reg_val].integer;
+	vals[1] = afe440x_attr->val_table[reg_val].fract;
+
+	return iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, 2, vals);
 }
 
 static ssize_t afe440x_store_register(struct device *dev,
@@ -240,48 +207,43 @@ static ssize_t afe440x_store_register(struct device *dev,
 	if (ret)
 		return ret;
 
-	switch (afe440x_attr->type) {
-	case SIMPLE:
-		val = integer;
-		break;
-	case RESISTANCE:
-	case CAPACITANCE:
-		for (val = 0; val < afe440x_attr->table_size; val++)
-			if (afe440x_attr->val_table[val].integer == integer &&
-			    afe440x_attr->val_table[val].fract == fract)
-				break;
-		if (val == afe440x_attr->table_size)
-			return -EINVAL;
-		break;
-	default:
+	for (val = 0; val < afe440x_attr->table_size; val++)
+		if (afe440x_attr->val_table[val].integer == integer &&
+		    afe440x_attr->val_table[val].fract == fract)
+			break;
+	if (val == afe440x_attr->table_size)
 		return -EINVAL;
-	}
 
-	ret = regmap_update_bits(afe->regmap, afe440x_attr->reg,
-				 afe440x_attr->mask,
-				 (val << afe440x_attr->shift));
+	ret = regmap_field_write(afe->fields[afe440x_attr->field], val);
 	if (ret)
 		return ret;
 
 	return count;
 }
 
-static AFE440X_ATTR(tia_separate_en, AFE4404_TIA_GAIN_SEP, AFE440X_TIAGAIN_ENSEPGAIN, SIMPLE, NULL, 0);
+static AFE440X_ATTR(in_intensity1_resistance, F_TIA_GAIN_SEP, afe4404_res_table);
+static AFE440X_ATTR(in_intensity1_capacitance, F_TIA_CF_SEP, afe4404_cap_table);
+
+static AFE440X_ATTR(in_intensity2_resistance, F_TIA_GAIN_SEP, afe4404_res_table);
+static AFE440X_ATTR(in_intensity2_capacitance, F_TIA_CF_SEP, afe4404_cap_table);
 
-static AFE440X_ATTR(tia_resistance1, AFE4404_TIA_GAIN, AFE4404_TIA_GAIN_RES, RESISTANCE, afe4404_res_table, ARRAY_SIZE(afe4404_res_table));
-static AFE440X_ATTR(tia_capacitance1, AFE4404_TIA_GAIN, AFE4404_TIA_GAIN_CAP, CAPACITANCE, afe4404_cap_table, ARRAY_SIZE(afe4404_cap_table));
+static AFE440X_ATTR(in_intensity3_resistance, F_TIA_GAIN, afe4404_res_table);
+static AFE440X_ATTR(in_intensity3_capacitance, TIA_CF, afe4404_cap_table);
 
-static AFE440X_ATTR(tia_resistance2, AFE4404_TIA_GAIN_SEP, AFE4404_TIA_GAIN_RES, RESISTANCE, afe4404_res_table, ARRAY_SIZE(afe4404_res_table));
-static AFE440X_ATTR(tia_capacitance2, AFE4404_TIA_GAIN_SEP, AFE4404_TIA_GAIN_CAP, CAPACITANCE, afe4404_cap_table, ARRAY_SIZE(afe4404_cap_table));
+static AFE440X_ATTR(in_intensity4_resistance, F_TIA_GAIN, afe4404_res_table);
+static AFE440X_ATTR(in_intensity4_capacitance, TIA_CF, afe4404_cap_table);
 
 static struct attribute *afe440x_attributes[] = {
-	&afe440x_attr_tia_separate_en.dev_attr.attr,
-	&afe440x_attr_tia_resistance1.dev_attr.attr,
-	&afe440x_attr_tia_capacitance1.dev_attr.attr,
-	&afe440x_attr_tia_resistance2.dev_attr.attr,
-	&afe440x_attr_tia_capacitance2.dev_attr.attr,
-	&dev_attr_tia_resistance_available.attr,
-	&dev_attr_tia_capacitance_available.attr,
+	&dev_attr_in_intensity_resistance_available.attr,
+	&dev_attr_in_intensity_capacitance_available.attr,
+	&afe440x_attr_in_intensity1_resistance.dev_attr.attr,
+	&afe440x_attr_in_intensity1_capacitance.dev_attr.attr,
+	&afe440x_attr_in_intensity2_resistance.dev_attr.attr,
+	&afe440x_attr_in_intensity2_capacitance.dev_attr.attr,
+	&afe440x_attr_in_intensity3_resistance.dev_attr.attr,
+	&afe440x_attr_in_intensity3_capacitance.dev_attr.attr,
+	&afe440x_attr_in_intensity4_resistance.dev_attr.attr,
+	&afe440x_attr_in_intensity4_capacitance.dev_attr.attr,
 	NULL
 };
 
@@ -294,35 +256,32 @@ static int afe4404_read_raw(struct iio_dev *indio_dev,
 			    int *val, int *val2, long mask)
 {
 	struct afe4404_data *afe = iio_priv(indio_dev);
-	const struct afe440x_reg_info reg_info = afe4404_reg_info[chan->address];
+	unsigned int value_reg = afe4404_channel_values[chan->address];
+	unsigned int led_field = afe4404_channel_leds[chan->address];
+	unsigned int offdac_field = afe4404_channel_offdacs[chan->address];
 	int ret;
 
 	switch (chan->type) {
 	case IIO_INTENSITY:
 		switch (mask) {
 		case IIO_CHAN_INFO_RAW:
-			ret = regmap_read(afe->regmap, reg_info.reg, val);
+			ret = regmap_read(afe->regmap, value_reg, val);
 			if (ret)
 				return ret;
 			return IIO_VAL_INT;
 		case IIO_CHAN_INFO_OFFSET:
-			ret = regmap_read(afe->regmap, reg_info.offreg,
-					  val);
+			ret = regmap_field_read(afe->fields[offdac_field], val);
 			if (ret)
 				return ret;
-			*val &= reg_info.mask;
-			*val >>= reg_info.shift;
 			return IIO_VAL_INT;
 		}
 		break;
 	case IIO_CURRENT:
 		switch (mask) {
 		case IIO_CHAN_INFO_RAW:
-			ret = regmap_read(afe->regmap, reg_info.reg, val);
+			ret = regmap_field_read(afe->fields[led_field], val);
 			if (ret)
 				return ret;
-			*val &= reg_info.mask;
-			*val >>= reg_info.shift;
 			return IIO_VAL_INT;
 		case IIO_CHAN_INFO_SCALE:
 			*val = 0;
@@ -342,25 +301,20 @@ static int afe4404_write_raw(struct iio_dev *indio_dev,
 			     int val, int val2, long mask)
 {
 	struct afe4404_data *afe = iio_priv(indio_dev);
-	const struct afe440x_reg_info reg_info = afe4404_reg_info[chan->address];
+	unsigned int led_field = afe4404_channel_leds[chan->address];
+	unsigned int offdac_field = afe4404_channel_offdacs[chan->address];
 
 	switch (chan->type) {
 	case IIO_INTENSITY:
 		switch (mask) {
 		case IIO_CHAN_INFO_OFFSET:
-			return regmap_update_bits(afe->regmap,
-				reg_info.offreg,
-				reg_info.mask,
-				(val << reg_info.shift));
+			return regmap_field_write(afe->fields[offdac_field], val);
 		}
 		break;
 	case IIO_CURRENT:
 		switch (mask) {
 		case IIO_CHAN_INFO_RAW:
-			return regmap_update_bits(afe->regmap,
-				reg_info.reg,
-				reg_info.mask,
-				(val << reg_info.shift));
+			return regmap_field_write(afe->fields[led_field], val);
 		}
 		break;
 	default:
@@ -387,7 +341,7 @@ static irqreturn_t afe4404_trigger_handler(int irq, void *private)
 
 	for_each_set_bit(bit, indio_dev->active_scan_mask,
 			 indio_dev->masklength) {
-		ret = regmap_read(afe->regmap, afe4404_reg_info[bit].reg,
+		ret = regmap_read(afe->regmap, afe4404_channel_values[bit],
 				  &buffer[i++]);
 		if (ret)
 			goto err;
@@ -443,11 +397,8 @@ static const struct iio_trigger_ops afe4404_trigger_ops = {
 static const struct reg_sequence afe4404_reg_sequences[] = {
 	AFE4404_TIMING_PAIRS,
 	{ AFE440X_CONTROL1, AFE440X_CONTROL1_TIMEREN },
-	{ AFE4404_TIA_GAIN, AFE4404_TIA_GAIN_RES_50_K },
-	{ AFE440X_LEDCNTRL, (0xf << AFE4404_LEDCNTRL_ILED1_SHIFT) |
-			    (0x3 << AFE4404_LEDCNTRL_ILED2_SHIFT) |
-			    (0x3 << AFE4404_LEDCNTRL_ILED3_SHIFT) },
-	{ AFE440X_CONTROL2, AFE440X_CONTROL3_OSC_ENABLE	},
+	{ AFE4404_TIA_GAIN_SEP, AFE440X_TIAGAIN_ENSEPGAIN },
+	{ AFE440X_CONTROL2, AFE440X_CONTROL2_OSC_ENABLE	},
 };
 
 static const struct regmap_range afe4404_yes_ranges[] = {
@@ -469,13 +420,11 @@ static const struct regmap_config afe4404_regmap_config = {
 	.volatile_table = &afe4404_volatile_table,
 };
 
-#ifdef CONFIG_OF
 static const struct of_device_id afe4404_of_match[] = {
 	{ .compatible = "ti,afe4404", },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, afe4404_of_match);
-#endif
 
 static int __maybe_unused afe4404_suspend(struct device *dev)
 {
@@ -525,7 +474,7 @@ static int afe4404_probe(struct i2c_client *client,
 {
 	struct iio_dev *indio_dev;
 	struct afe4404_data *afe;
-	int ret;
+	int i, ret;
 
 	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*afe));
 	if (!indio_dev)
@@ -543,6 +492,15 @@ static int afe4404_probe(struct i2c_client *client,
 		return PTR_ERR(afe->regmap);
 	}
 
+	for (i = 0; i < F_MAX_FIELDS; i++) {
+		afe->fields[i] = devm_regmap_field_alloc(afe->dev, afe->regmap,
+							 afe4404_reg_fields[i]);
+		if (IS_ERR(afe->fields[i])) {
+			dev_err(afe->dev, "Unable to allocate regmap fields\n");
+			return PTR_ERR(afe->fields[i]);
+		}
+	}
+
 	afe->regulator = devm_regulator_get(afe->dev, "tx_sup");
 	if (IS_ERR(afe->regulator)) {
 		dev_err(afe->dev, "Unable to get regulator\n");
@@ -665,7 +623,7 @@ MODULE_DEVICE_TABLE(i2c, afe4404_ids);
 static struct i2c_driver afe4404_i2c_driver = {
 	.driver = {
 		.name = AFE4404_DRIVER_NAME,
-		.of_match_table = of_match_ptr(afe4404_of_match),
+		.of_match_table = afe4404_of_match,
 		.pm = &afe4404_pm_ops,
 	},
 	.probe = afe4404_probe,
@@ -675,5 +633,5 @@ static struct i2c_driver afe4404_i2c_driver = {
 module_i2c_driver(afe4404_i2c_driver);
 
 MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
-MODULE_DESCRIPTION("TI AFE4404 Heart Rate and Pulse Oximeter");
+MODULE_DESCRIPTION("TI AFE4404 Heart Rate Monitor and Pulse Oximeter AFE");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/health/afe440x.h b/drivers/iio/health/afe440x.h
index c671ab78a23a..1a0f247043ca 100644
--- a/drivers/iio/health/afe440x.h
+++ b/drivers/iio/health/afe440x.h
@@ -71,8 +71,7 @@
 #define AFE440X_CONTROL1_TIMEREN	BIT(8)
 
 /* TIAGAIN register fields */
-#define AFE440X_TIAGAIN_ENSEPGAIN_MASK	BIT(15)
-#define AFE440X_TIAGAIN_ENSEPGAIN_SHIFT	15
+#define AFE440X_TIAGAIN_ENSEPGAIN	BIT(15)
 
 /* CONTROL2 register fields */
 #define AFE440X_CONTROL2_PDN_AFE	BIT(0)
@@ -89,22 +88,7 @@
 #define AFE440X_CONTROL0_WRITE		0x0
 #define AFE440X_CONTROL0_READ		0x1
 
-struct afe440x_reg_info {
-	unsigned int reg;
-	unsigned int offreg;
-	unsigned int shift;
-	unsigned int mask;
-};
-
-#define AFE440X_REG_INFO(_reg, _offreg, _sm)			\
-	{							\
-		.reg = _reg,					\
-		.offreg = _offreg,				\
-		.shift = _sm ## _SHIFT,				\
-		.mask = _sm ## _MASK,				\
-	}
-
-#define AFE440X_INTENSITY_CHAN(_index, _name, _mask)		\
+#define AFE440X_INTENSITY_CHAN(_index, _mask)			\
 	{							\
 		.type = IIO_INTENSITY,				\
 		.channel = _index,				\
@@ -116,29 +100,23 @@ struct afe440x_reg_info {
 				.storagebits = 32,		\
 				.endianness = IIO_CPU,		\
 		},						\
-		.extend_name = _name,				\
 		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |	\
 			_mask,					\
+		.indexed = true,				\
 	}
 
-#define AFE440X_CURRENT_CHAN(_index, _name)			\
+#define AFE440X_CURRENT_CHAN(_index)				\
 	{							\
 		.type = IIO_CURRENT,				\
 		.channel = _index,				\
 		.address = _index,				\
-		.scan_index = _index,				\
-		.extend_name = _name,				\
+		.scan_index = -1,				\
 		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |	\
 			BIT(IIO_CHAN_INFO_SCALE),		\
+		.indexed = true,				\
 		.output = true,					\
 	}
 
-enum afe440x_reg_type {
-	SIMPLE,
-	RESISTANCE,
-	CAPACITANCE,
-};
-
 struct afe440x_val_table {
 	int integer;
 	int fract;
@@ -164,10 +142,7 @@ static DEVICE_ATTR_RO(_name)
 
 struct afe440x_attr {
 	struct device_attribute dev_attr;
-	unsigned int reg;
-	unsigned int shift;
-	unsigned int mask;
-	enum afe440x_reg_type type;
+	unsigned int field;
 	const struct afe440x_val_table *val_table;
 	unsigned int table_size;
 };
@@ -175,17 +150,14 @@ struct afe440x_attr {
 #define to_afe440x_attr(_dev_attr)				\
 	container_of(_dev_attr, struct afe440x_attr, dev_attr)
 
-#define AFE440X_ATTR(_name, _reg, _field, _type, _table, _size)	\
+#define AFE440X_ATTR(_name, _field, _table)			\
 	struct afe440x_attr afe440x_attr_##_name = {		\
 		.dev_attr = __ATTR(_name, (S_IRUGO | S_IWUSR),	\
 				   afe440x_show_register,	\
 				   afe440x_store_register),	\
-		.reg = _reg,					\
-		.shift = _field ## _SHIFT,			\
-		.mask = _field ## _MASK,			\
-		.type = _type,					\
+		.field = _field,				\
 		.val_table = _table,				\
-		.table_size = _size,				\
+		.table_size = ARRAY_SIZE(_table),		\
 	}
 
 #endif /* _AFE440X_H */
diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c
index 11535911a5c6..3e200f69e886 100644
--- a/drivers/iio/humidity/am2315.c
+++ b/drivers/iio/humidity/am2315.c
@@ -276,6 +276,7 @@ static const struct i2c_device_id am2315_i2c_id[] = {
 	{"am2315", 0},
 	{}
 };
+MODULE_DEVICE_TABLE(i2c, am2315_i2c_id);
 
 static const struct acpi_device_id am2315_acpi_id[] = {
 	{"AOS2315", 0},
diff --git a/drivers/iio/humidity/htu21.c b/drivers/iio/humidity/htu21.c
index 11cbc38b450f..0fbbd8c40894 100644
--- a/drivers/iio/humidity/htu21.c
+++ b/drivers/iio/humidity/htu21.c
@@ -236,6 +236,7 @@ static const struct i2c_device_id htu21_id[] = {
 	{"ms8607-humidity", MS8607},
 	{}
 };
+MODULE_DEVICE_TABLE(i2c, htu21_id);
 
 static struct i2c_driver htu21_driver = {
 	.probe = htu21_probe,
diff --git a/drivers/iio/iio_core.h b/drivers/iio/iio_core.h
index 359883525ab7..4c45488e3a7f 100644
--- a/drivers/iio/iio_core.h
+++ b/drivers/iio/iio_core.h
@@ -79,4 +79,7 @@ void iio_device_unregister_eventset(struct iio_dev *indio_dev);
 void iio_device_wakeup_eventset(struct iio_dev *indio_dev);
 int iio_event_getfd(struct iio_dev *indio_dev);
 
+struct iio_event_interface;
+bool iio_event_enabled(const struct iio_event_interface *ev_int);
+
 #endif
diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c
index b8a290ec984e..e0251b8c1a52 100644
--- a/drivers/iio/imu/bmi160/bmi160_core.c
+++ b/drivers/iio/imu/bmi160/bmi160_core.c
@@ -20,6 +20,7 @@
 #include <linux/iio/triggered_buffer.h>
 #include <linux/iio/trigger_consumer.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/sysfs.h>
 
 #include "bmi160.h"
 
@@ -410,7 +411,8 @@ static irqreturn_t bmi160_trigger_handler(int irq, void *p)
 		buf[j++] = sample;
 	}
 
-	iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buf,
+					   iio_get_time_ns(indio_dev));
 done:
 	iio_trigger_notify_done(indio_dev->trig);
 	return IRQ_HANDLED;
@@ -466,10 +468,36 @@ static int bmi160_write_raw(struct iio_dev *indio_dev,
 	return 0;
 }
 
+static
+IIO_CONST_ATTR(in_accel_sampling_frequency_available,
+	       "0.78125 1.5625 3.125 6.25 12.5 25 50 100 200 400 800 1600");
+static
+IIO_CONST_ATTR(in_anglvel_sampling_frequency_available,
+	       "25 50 100 200 400 800 1600 3200");
+static
+IIO_CONST_ATTR(in_accel_scale_available,
+	       "0.000598 0.001197 0.002394 0.004788");
+static
+IIO_CONST_ATTR(in_anglvel_scale_available,
+	       "0.001065 0.000532 0.000266 0.000133 0.000066");
+
+static struct attribute *bmi160_attrs[] = {
+	&iio_const_attr_in_accel_sampling_frequency_available.dev_attr.attr,
+	&iio_const_attr_in_anglvel_sampling_frequency_available.dev_attr.attr,
+	&iio_const_attr_in_accel_scale_available.dev_attr.attr,
+	&iio_const_attr_in_anglvel_scale_available.dev_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group bmi160_attrs_group = {
+	.attrs = bmi160_attrs,
+};
+
 static const struct iio_info bmi160_info = {
 	.driver_module = THIS_MODULE,
 	.read_raw = bmi160_read_raw,
 	.write_raw = bmi160_write_raw,
+	.attrs = &bmi160_attrs_group,
 };
 
 static const char *bmi160_match_acpi_device(struct device *dev)
diff --git a/drivers/iio/imu/inv_mpu6050/Kconfig b/drivers/iio/imu/inv_mpu6050/Kconfig
index f756feecfa4c..5483b2ea754d 100644
--- a/drivers/iio/imu/inv_mpu6050/Kconfig
+++ b/drivers/iio/imu/inv_mpu6050/Kconfig
@@ -13,8 +13,8 @@ config INV_MPU6050_I2C
 	select INV_MPU6050_IIO
 	select REGMAP_I2C
 	help
-	  This driver supports the Invensense MPU6050/6500/9150 motion tracking
-	  devices over I2C.
+	  This driver supports the Invensense MPU6050/6500/9150 and ICM20608
+	  motion tracking devices over I2C.
 	  This driver can be built as a module. The module will be called
 	  inv-mpu6050-i2c.
 
@@ -24,7 +24,7 @@ config INV_MPU6050_SPI
 	select INV_MPU6050_IIO
 	select REGMAP_SPI
 	help
-	  This driver supports the Invensense MPU6000/6500/9150 motion tracking
-	  devices over SPI.
+	  This driver supports the Invensense MPU6050/6500/9150 and ICM20608
+	  motion tracking devices over SPI.
 	  This driver can be built as a module. The module will be called
 	  inv-mpu6050-spi.
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
index f62b8bd9ad7e..dd6fc6d21f9d 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
@@ -56,6 +56,7 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev,
 	int i;
 	acpi_status status;
 	union acpi_object *cpm;
+	int ret;
 
 	status = acpi_evaluate_object(adev->handle, "CNF0", NULL, &buffer);
 	if (ACPI_FAILURE(status))
@@ -82,10 +83,10 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev,
 			}
 		}
 	}
-
+	ret = cpm->package.count;
 	kfree(buffer.pointer);
 
-	return cpm->package.count;
+	return ret;
 }
 
 static int acpi_i2c_check_resource(struct acpi_resource *ares, void *data)
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
index ee40dae5ab58..b9fcbf18aa99 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
@@ -113,6 +113,12 @@ static const struct inv_mpu6050_hw hw_info[] = {
 		.reg = &reg_set_6050,
 		.config = &chip_config_6050,
 	},
+	{
+		.whoami = INV_ICM20608_WHOAMI_VALUE,
+		.name = "ICM20608",
+		.reg = &reg_set_6500,
+		.config = &chip_config_6050,
+	},
 };
 
 int inv_mpu6050_switch_engine(struct inv_mpu6050_state *st, bool en, u32 mask)
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
index e1fd7fa53e3b..19580d1db597 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
@@ -170,6 +170,7 @@ static const struct i2c_device_id inv_mpu_id[] = {
 	{"mpu6050", INV_MPU6050},
 	{"mpu6500", INV_MPU6500},
 	{"mpu9150", INV_MPU9150},
+	{"icm20608", INV_ICM20608},
 	{}
 };
 
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
index 3bf8544ccc9f..f0e8c5dd9fae 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
@@ -70,6 +70,7 @@ enum inv_devices {
 	INV_MPU6500,
 	INV_MPU6000,
 	INV_MPU9150,
+	INV_ICM20608,
 	INV_NUM_PARTS
 };
 
@@ -225,6 +226,7 @@ struct inv_mpu6050_state {
 #define INV_MPU6050_WHOAMI_VALUE		0x68
 #define INV_MPU6500_WHOAMI_VALUE		0x70
 #define INV_MPU9150_WHOAMI_VALUE		0x68
+#define INV_ICM20608_WHOAMI_VALUE		0xAF
 
 /* scan element definition */
 enum inv_mpu6050_scan {
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
index d0700628ee6d..3a9f3eac91ab 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
@@ -107,7 +107,7 @@ irqreturn_t inv_mpu6050_irq_handler(int irq, void *p)
 	struct inv_mpu6050_state *st = iio_priv(indio_dev);
 	s64 timestamp;
 
-	timestamp = iio_get_time_ns();
+	timestamp = iio_get_time_ns(indio_dev);
 	kfifo_in_spinlocked(&st->timestamps, &timestamp, 1,
 			    &st->time_stamp_lock);
 
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
index 190a4a51c830..6e6476dfa188 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
@@ -82,6 +82,7 @@ static const struct spi_device_id inv_mpu_id[] = {
 	{"mpu6000", INV_MPU6000},
 	{"mpu6500", INV_MPU6500},
 	{"mpu9150", INV_MPU9150},
+	{"icm20608", INV_ICM20608},
 	{}
 };
 
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index e6319a9346b2..f914d5d140e4 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -80,6 +80,7 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_RESISTANCE] = "resistance",
 	[IIO_PH] = "ph",
 	[IIO_UVINDEX] = "uvindex",
+	[IIO_ELECTRICALCONDUCTIVITY] = "electricalconductivity",
 };
 
 static const char * const iio_modifier_names[] = {
@@ -177,6 +178,86 @@ ssize_t iio_read_const_attr(struct device *dev,
 }
 EXPORT_SYMBOL(iio_read_const_attr);
 
+static int iio_device_set_clock(struct iio_dev *indio_dev, clockid_t clock_id)
+{
+	int ret;
+	const struct iio_event_interface *ev_int = indio_dev->event_interface;
+
+	ret = mutex_lock_interruptible(&indio_dev->mlock);
+	if (ret)
+		return ret;
+	if ((ev_int && iio_event_enabled(ev_int)) ||
+	    iio_buffer_enabled(indio_dev)) {
+		mutex_unlock(&indio_dev->mlock);
+		return -EBUSY;
+	}
+	indio_dev->clock_id = clock_id;
+	mutex_unlock(&indio_dev->mlock);
+
+	return 0;
+}
+
+/**
+ * iio_get_time_ns() - utility function to get a time stamp for events etc
+ * @indio_dev: device
+ */
+s64 iio_get_time_ns(const struct iio_dev *indio_dev)
+{
+	struct timespec tp;
+
+	switch (iio_device_get_clock(indio_dev)) {
+	case CLOCK_REALTIME:
+		ktime_get_real_ts(&tp);
+		break;
+	case CLOCK_MONOTONIC:
+		ktime_get_ts(&tp);
+		break;
+	case CLOCK_MONOTONIC_RAW:
+		getrawmonotonic(&tp);
+		break;
+	case CLOCK_REALTIME_COARSE:
+		tp = current_kernel_time();
+		break;
+	case CLOCK_MONOTONIC_COARSE:
+		tp = get_monotonic_coarse();
+		break;
+	case CLOCK_BOOTTIME:
+		get_monotonic_boottime(&tp);
+		break;
+	case CLOCK_TAI:
+		timekeeping_clocktai(&tp);
+		break;
+	default:
+		BUG();
+	}
+
+	return timespec_to_ns(&tp);
+}
+EXPORT_SYMBOL(iio_get_time_ns);
+
+/**
+ * iio_get_time_res() - utility function to get time stamp clock resolution in
+ *                      nano seconds.
+ * @indio_dev: device
+ */
+unsigned int iio_get_time_res(const struct iio_dev *indio_dev)
+{
+	switch (iio_device_get_clock(indio_dev)) {
+	case CLOCK_REALTIME:
+	case CLOCK_MONOTONIC:
+	case CLOCK_MONOTONIC_RAW:
+	case CLOCK_BOOTTIME:
+	case CLOCK_TAI:
+		return hrtimer_resolution;
+	case CLOCK_REALTIME_COARSE:
+	case CLOCK_MONOTONIC_COARSE:
+		return LOW_RES_NSEC;
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(iio_get_time_res);
+
 static int __init iio_init(void)
 {
 	int ret;
@@ -989,11 +1070,91 @@ static ssize_t iio_show_dev_name(struct device *dev,
 
 static DEVICE_ATTR(name, S_IRUGO, iio_show_dev_name, NULL);
 
+static ssize_t iio_show_timestamp_clock(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	const struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	const clockid_t clk = iio_device_get_clock(indio_dev);
+	const char *name;
+	ssize_t sz;
+
+	switch (clk) {
+	case CLOCK_REALTIME:
+		name = "realtime\n";
+		sz = sizeof("realtime\n");
+		break;
+	case CLOCK_MONOTONIC:
+		name = "monotonic\n";
+		sz = sizeof("monotonic\n");
+		break;
+	case CLOCK_MONOTONIC_RAW:
+		name = "monotonic_raw\n";
+		sz = sizeof("monotonic_raw\n");
+		break;
+	case CLOCK_REALTIME_COARSE:
+		name = "realtime_coarse\n";
+		sz = sizeof("realtime_coarse\n");
+		break;
+	case CLOCK_MONOTONIC_COARSE:
+		name = "monotonic_coarse\n";
+		sz = sizeof("monotonic_coarse\n");
+		break;
+	case CLOCK_BOOTTIME:
+		name = "boottime\n";
+		sz = sizeof("boottime\n");
+		break;
+	case CLOCK_TAI:
+		name = "tai\n";
+		sz = sizeof("tai\n");
+		break;
+	default:
+		BUG();
+	}
+
+	memcpy(buf, name, sz);
+	return sz;
+}
+
+static ssize_t iio_store_timestamp_clock(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t len)
+{
+	clockid_t clk;
+	int ret;
+
+	if (sysfs_streq(buf, "realtime"))
+		clk = CLOCK_REALTIME;
+	else if (sysfs_streq(buf, "monotonic"))
+		clk = CLOCK_MONOTONIC;
+	else if (sysfs_streq(buf, "monotonic_raw"))
+		clk = CLOCK_MONOTONIC_RAW;
+	else if (sysfs_streq(buf, "realtime_coarse"))
+		clk = CLOCK_REALTIME_COARSE;
+	else if (sysfs_streq(buf, "monotonic_coarse"))
+		clk = CLOCK_MONOTONIC_COARSE;
+	else if (sysfs_streq(buf, "boottime"))
+		clk = CLOCK_BOOTTIME;
+	else if (sysfs_streq(buf, "tai"))
+		clk = CLOCK_TAI;
+	else
+		return -EINVAL;
+
+	ret = iio_device_set_clock(dev_to_iio_dev(dev), clk);
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+static DEVICE_ATTR(current_timestamp_clock, S_IRUGO | S_IWUSR,
+		   iio_show_timestamp_clock, iio_store_timestamp_clock);
+
 static int iio_device_register_sysfs(struct iio_dev *indio_dev)
 {
 	int i, ret = 0, attrcount, attrn, attrcount_orig = 0;
 	struct iio_dev_attr *p;
-	struct attribute **attr;
+	struct attribute **attr, *clk = NULL;
 
 	/* First count elements in any existing group */
 	if (indio_dev->info->attrs) {
@@ -1008,16 +1169,25 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev)
 	 */
 	if (indio_dev->channels)
 		for (i = 0; i < indio_dev->num_channels; i++) {
-			ret = iio_device_add_channel_sysfs(indio_dev,
-							   &indio_dev
-							   ->channels[i]);
+			const struct iio_chan_spec *chan =
+				&indio_dev->channels[i];
+
+			if (chan->type == IIO_TIMESTAMP)
+				clk = &dev_attr_current_timestamp_clock.attr;
+
+			ret = iio_device_add_channel_sysfs(indio_dev, chan);
 			if (ret < 0)
 				goto error_clear_attrs;
 			attrcount += ret;
 		}
 
+	if (indio_dev->event_interface)
+		clk = &dev_attr_current_timestamp_clock.attr;
+
 	if (indio_dev->name)
 		attrcount++;
+	if (clk)
+		attrcount++;
 
 	indio_dev->chan_attr_group.attrs = kcalloc(attrcount + 1,
 						   sizeof(indio_dev->chan_attr_group.attrs[0]),
@@ -1038,6 +1208,8 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev)
 		indio_dev->chan_attr_group.attrs[attrn++] = &p->dev_attr.attr;
 	if (indio_dev->name)
 		indio_dev->chan_attr_group.attrs[attrn++] = &dev_attr_name.attr;
+	if (clk)
+		indio_dev->chan_attr_group.attrs[attrn++] = clk;
 
 	indio_dev->groups[indio_dev->groupcounter++] =
 		&indio_dev->chan_attr_group;
diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index cae332b1d7ea..0ebfc923a997 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -44,6 +44,11 @@ struct iio_event_interface {
 	struct mutex		read_lock;
 };
 
+bool iio_event_enabled(const struct iio_event_interface *ev_int)
+{
+	return !!test_bit(IIO_BUSY_BIT_POS, &ev_int->flags);
+}
+
 /**
  * iio_push_event() - try to add event to the list for userspace reading
  * @indio_dev:		IIO device structure
@@ -60,7 +65,7 @@ int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp)
 	int copied;
 
 	/* Does anyone care? */
-	if (test_bit(IIO_BUSY_BIT_POS, &ev_int->flags)) {
+	if (iio_event_enabled(ev_int)) {
 
 		ev.id = ev_code;
 		ev.timestamp = timestamp;
@@ -180,8 +185,14 @@ int iio_event_getfd(struct iio_dev *indio_dev)
 	if (ev_int == NULL)
 		return -ENODEV;
 
-	if (test_and_set_bit(IIO_BUSY_BIT_POS, &ev_int->flags))
-		return -EBUSY;
+	fd = mutex_lock_interruptible(&indio_dev->mlock);
+	if (fd)
+		return fd;
+
+	if (test_and_set_bit(IIO_BUSY_BIT_POS, &ev_int->flags)) {
+		fd = -EBUSY;
+		goto unlock;
+	}
 
 	iio_device_get(indio_dev);
 
@@ -194,6 +205,8 @@ int iio_event_getfd(struct iio_dev *indio_dev)
 		kfifo_reset_out(&ev_int->det_events);
 	}
 
+unlock:
+	mutex_unlock(&indio_dev->mlock);
 	return fd;
 }
 
diff --git a/drivers/iio/industrialio-sw-device.c b/drivers/iio/industrialio-sw-device.c
new file mode 100644
index 000000000000..81b49cfca452
--- /dev/null
+++ b/drivers/iio/industrialio-sw-device.c
@@ -0,0 +1,182 @@
+/*
+ * The Industrial I/O core, software IIO devices functions
+ *
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include <linux/iio/sw_device.h>
+#include <linux/iio/configfs.h>
+#include <linux/configfs.h>
+
+static struct config_group *iio_devices_group;
+static struct config_item_type iio_device_type_group_type;
+
+static struct config_item_type iio_devices_group_type = {
+	.ct_owner = THIS_MODULE,
+};
+
+static LIST_HEAD(iio_device_types_list);
+static DEFINE_MUTEX(iio_device_types_lock);
+
+static
+struct iio_sw_device_type *__iio_find_sw_device_type(const char *name,
+						     unsigned len)
+{
+	struct iio_sw_device_type *d = NULL, *iter;
+
+	list_for_each_entry(iter, &iio_device_types_list, list)
+		if (!strcmp(iter->name, name)) {
+			d = iter;
+			break;
+		}
+
+	return d;
+}
+
+int iio_register_sw_device_type(struct iio_sw_device_type *d)
+{
+	struct iio_sw_device_type *iter;
+	int ret = 0;
+
+	mutex_lock(&iio_device_types_lock);
+	iter = __iio_find_sw_device_type(d->name, strlen(d->name));
+	if (iter)
+		ret = -EBUSY;
+	else
+		list_add_tail(&d->list, &iio_device_types_list);
+	mutex_unlock(&iio_device_types_lock);
+
+	if (ret)
+		return ret;
+
+	d->group = configfs_register_default_group(iio_devices_group, d->name,
+						&iio_device_type_group_type);
+	if (IS_ERR(d->group))
+		ret = PTR_ERR(d->group);
+
+	return ret;
+}
+EXPORT_SYMBOL(iio_register_sw_device_type);
+
+void iio_unregister_sw_device_type(struct iio_sw_device_type *dt)
+{
+	struct iio_sw_device_type *iter;
+
+	mutex_lock(&iio_device_types_lock);
+	iter = __iio_find_sw_device_type(dt->name, strlen(dt->name));
+	if (iter)
+		list_del(&dt->list);
+	mutex_unlock(&iio_device_types_lock);
+
+	configfs_unregister_default_group(dt->group);
+}
+EXPORT_SYMBOL(iio_unregister_sw_device_type);
+
+static
+struct iio_sw_device_type *iio_get_sw_device_type(const char *name)
+{
+	struct iio_sw_device_type *dt;
+
+	mutex_lock(&iio_device_types_lock);
+	dt = __iio_find_sw_device_type(name, strlen(name));
+	if (dt && !try_module_get(dt->owner))
+		dt = NULL;
+	mutex_unlock(&iio_device_types_lock);
+
+	return dt;
+}
+
+struct iio_sw_device *iio_sw_device_create(const char *type, const char *name)
+{
+	struct iio_sw_device *d;
+	struct iio_sw_device_type *dt;
+
+	dt = iio_get_sw_device_type(type);
+	if (!dt) {
+		pr_err("Invalid device type: %s\n", type);
+		return ERR_PTR(-EINVAL);
+	}
+	d = dt->ops->probe(name);
+	if (IS_ERR(d))
+		goto out_module_put;
+
+	d->device_type = dt;
+
+	return d;
+out_module_put:
+	module_put(dt->owner);
+	return d;
+}
+EXPORT_SYMBOL(iio_sw_device_create);
+
+void iio_sw_device_destroy(struct iio_sw_device *d)
+{
+	struct iio_sw_device_type *dt = d->device_type;
+
+	dt->ops->remove(d);
+	module_put(dt->owner);
+}
+EXPORT_SYMBOL(iio_sw_device_destroy);
+
+static struct config_group *device_make_group(struct config_group *group,
+					      const char *name)
+{
+	struct iio_sw_device *d;
+
+	d = iio_sw_device_create(group->cg_item.ci_name, name);
+	if (IS_ERR(d))
+		return ERR_CAST(d);
+
+	config_item_set_name(&d->group.cg_item, "%s", name);
+
+	return &d->group;
+}
+
+static void device_drop_group(struct config_group *group,
+			      struct config_item *item)
+{
+	struct iio_sw_device *d = to_iio_sw_device(item);
+
+	iio_sw_device_destroy(d);
+	config_item_put(item);
+}
+
+static struct configfs_group_operations device_ops = {
+	.make_group	= &device_make_group,
+	.drop_item	= &device_drop_group,
+};
+
+static struct config_item_type iio_device_type_group_type = {
+	.ct_group_ops = &device_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+static int __init iio_sw_device_init(void)
+{
+	iio_devices_group =
+		configfs_register_default_group(&iio_configfs_subsys.su_group,
+						"devices",
+						&iio_devices_group_type);
+	return PTR_ERR_OR_ZERO(iio_devices_group);
+}
+module_init(iio_sw_device_init);
+
+static void __exit iio_sw_device_exit(void)
+{
+	configfs_unregister_default_group(iio_devices_group);
+}
+module_exit(iio_sw_device_exit);
+
+MODULE_AUTHOR("Daniel Baluta <daniel.baluta@intel.com>");
+MODULE_DESCRIPTION("Industrial I/O software devices support");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 0c52dfe64977..7ad82fdd3e5b 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -64,10 +64,16 @@ static struct attribute *iio_trig_dev_attrs[] = {
 };
 ATTRIBUTE_GROUPS(iio_trig_dev);
 
+static struct iio_trigger *__iio_trigger_find_by_name(const char *name);
+
 int iio_trigger_register(struct iio_trigger *trig_info)
 {
 	int ret;
 
+	/* trig_info->ops is required for the module member */
+	if (!trig_info->ops)
+		return -EINVAL;
+
 	trig_info->id = ida_simple_get(&iio_trigger_ida, 0, 0, GFP_KERNEL);
 	if (trig_info->id < 0)
 		return trig_info->id;
@@ -82,11 +88,19 @@ int iio_trigger_register(struct iio_trigger *trig_info)
 
 	/* Add to list of available triggers held by the IIO core */
 	mutex_lock(&iio_trigger_list_lock);
+	if (__iio_trigger_find_by_name(trig_info->name)) {
+		pr_err("Duplicate trigger name '%s'\n", trig_info->name);
+		ret = -EEXIST;
+		goto error_device_del;
+	}
 	list_add_tail(&trig_info->list, &iio_trigger_list);
 	mutex_unlock(&iio_trigger_list_lock);
 
 	return 0;
 
+error_device_del:
+	mutex_unlock(&iio_trigger_list_lock);
+	device_del(&trig_info->dev);
 error_unregister_id:
 	ida_simple_remove(&iio_trigger_ida, trig_info->id);
 	return ret;
@@ -105,6 +119,18 @@ void iio_trigger_unregister(struct iio_trigger *trig_info)
 }
 EXPORT_SYMBOL(iio_trigger_unregister);
 
+/* Search for trigger by name, assuming iio_trigger_list_lock held */
+static struct iio_trigger *__iio_trigger_find_by_name(const char *name)
+{
+	struct iio_trigger *iter;
+
+	list_for_each_entry(iter, &iio_trigger_list, list)
+		if (!strcmp(iter->name, name))
+			return iter;
+
+	return NULL;
+}
+
 static struct iio_trigger *iio_trigger_find_by_name(const char *name,
 						    size_t len)
 {
@@ -164,8 +190,7 @@ EXPORT_SYMBOL(iio_trigger_poll_chained);
 
 void iio_trigger_notify_done(struct iio_trigger *trig)
 {
-	if (atomic_dec_and_test(&trig->use_count) && trig->ops &&
-		trig->ops->try_reenable)
+	if (atomic_dec_and_test(&trig->use_count) && trig->ops->try_reenable)
 		if (trig->ops->try_reenable(trig))
 			/* Missed an interrupt so launch new poll now */
 			iio_trigger_poll(trig);
@@ -224,7 +249,7 @@ static int iio_trigger_attach_poll_func(struct iio_trigger *trig,
 		goto out_put_irq;
 
 	/* Enable trigger in driver */
-	if (trig->ops && trig->ops->set_trigger_state && notinuse) {
+	if (trig->ops->set_trigger_state && notinuse) {
 		ret = trig->ops->set_trigger_state(trig, true);
 		if (ret < 0)
 			goto out_free_irq;
@@ -249,7 +274,7 @@ static int iio_trigger_detach_poll_func(struct iio_trigger *trig,
 		= (bitmap_weight(trig->pool,
 				 CONFIG_IIO_CONSUMERS_PER_TRIGGER)
 		   == 1);
-	if (trig->ops && trig->ops->set_trigger_state && no_other_users) {
+	if (trig->ops->set_trigger_state && no_other_users) {
 		ret = trig->ops->set_trigger_state(trig, false);
 		if (ret)
 			return ret;
@@ -264,7 +289,7 @@ static int iio_trigger_detach_poll_func(struct iio_trigger *trig,
 irqreturn_t iio_pollfunc_store_time(int irq, void *p)
 {
 	struct iio_poll_func *pf = p;
-	pf->timestamp = iio_get_time_ns();
+	pf->timestamp = iio_get_time_ns(pf->indio_dev);
 	return IRQ_WAKE_THREAD;
 }
 EXPORT_SYMBOL(iio_pollfunc_store_time);
@@ -371,7 +396,7 @@ static ssize_t iio_trigger_write_current(struct device *dev,
 			return ret;
 	}
 
-	if (trig && trig->ops && trig->ops->validate_device) {
+	if (trig && trig->ops->validate_device) {
 		ret = trig->ops->validate_device(trig, indio_dev);
 		if (ret)
 			return ret;
diff --git a/drivers/iio/light/acpi-als.c b/drivers/iio/light/acpi-als.c
index 53201d99a16c..f0b47c501f4e 100644
--- a/drivers/iio/light/acpi-als.c
+++ b/drivers/iio/light/acpi-als.c
@@ -118,7 +118,7 @@ static void acpi_als_notify(struct acpi_device *device, u32 event)
 	struct iio_dev *indio_dev = acpi_driver_data(device);
 	struct acpi_als *als = iio_priv(indio_dev);
 	s32 *buffer = als->evt_buffer;
-	s64 time_ns = iio_get_time_ns();
+	s64 time_ns = iio_get_time_ns(indio_dev);
 	s32 val;
 	int ret;
 
diff --git a/drivers/iio/light/adjd_s311.c b/drivers/iio/light/adjd_s311.c
index 09ad5f1ce539..0113fc843a81 100644
--- a/drivers/iio/light/adjd_s311.c
+++ b/drivers/iio/light/adjd_s311.c
@@ -118,7 +118,7 @@ static irqreturn_t adjd_s311_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
 	struct adjd_s311_data *data = iio_priv(indio_dev);
-	s64 time_ns = iio_get_time_ns();
+	s64 time_ns = iio_get_time_ns(indio_dev);
 	int i, j = 0;
 
 	int ret = adjd_s311_req_data(indio_dev);
diff --git a/drivers/iio/light/apds9300.c b/drivers/iio/light/apds9300.c
index e1b9fa5a7e91..649b26f67813 100644
--- a/drivers/iio/light/apds9300.c
+++ b/drivers/iio/light/apds9300.c
@@ -396,7 +396,7 @@ static irqreturn_t apds9300_interrupt_handler(int irq, void *private)
 		       IIO_UNMOD_EVENT_CODE(IIO_INTENSITY, 0,
 					    IIO_EV_TYPE_THRESH,
 					    IIO_EV_DIR_EITHER),
-		       iio_get_time_ns());
+		       iio_get_time_ns(dev_info));
 
 	apds9300_clear_intr(data);
 
diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index 651d57b8abbf..a4304edc3e0f 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -807,7 +807,7 @@ static irqreturn_t apds9960_interrupt_handler(int irq, void *private)
 			       IIO_UNMOD_EVENT_CODE(IIO_INTENSITY, 0,
 						    IIO_EV_TYPE_THRESH,
 						    IIO_EV_DIR_EITHER),
-			       iio_get_time_ns());
+			       iio_get_time_ns(indio_dev));
 		regmap_write(data->regmap, APDS9960_REG_CICLEAR, 1);
 	}
 
@@ -816,7 +816,7 @@ static irqreturn_t apds9960_interrupt_handler(int irq, void *private)
 			       IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, 0,
 						    IIO_EV_TYPE_THRESH,
 						    IIO_EV_DIR_EITHER),
-			       iio_get_time_ns());
+			       iio_get_time_ns(indio_dev));
 		regmap_write(data->regmap, APDS9960_REG_PICLEAR, 1);
 	}
 
diff --git a/drivers/iio/light/cm36651.c b/drivers/iio/light/cm36651.c
index c8d7b5ea7e78..9d66e89c57ef 100644
--- a/drivers/iio/light/cm36651.c
+++ b/drivers/iio/light/cm36651.c
@@ -268,7 +268,7 @@ static irqreturn_t cm36651_irq_handler(int irq, void *data)
 				CM36651_CMD_READ_RAW_PROXIMITY,
 				IIO_EV_TYPE_THRESH, ev_dir);
 
-	iio_push_event(indio_dev, ev_code, iio_get_time_ns());
+	iio_push_event(indio_dev, ev_code, iio_get_time_ns(indio_dev));
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/iio/light/gp2ap020a00f.c b/drivers/iio/light/gp2ap020a00f.c
index 6d41086f7c64..6ada9149f142 100644
--- a/drivers/iio/light/gp2ap020a00f.c
+++ b/drivers/iio/light/gp2ap020a00f.c
@@ -851,7 +851,7 @@ static irqreturn_t gp2ap020a00f_prox_sensing_handler(int irq, void *data)
 				    GP2AP020A00F_SCAN_MODE_PROXIMITY,
 				    IIO_EV_TYPE_ROC,
 				    IIO_EV_DIR_RISING),
-			       iio_get_time_ns());
+			       iio_get_time_ns(indio_dev));
 		} else {
 			iio_push_event(indio_dev,
 			       IIO_UNMOD_EVENT_CODE(
@@ -859,7 +859,7 @@ static irqreturn_t gp2ap020a00f_prox_sensing_handler(int irq, void *data)
 				    GP2AP020A00F_SCAN_MODE_PROXIMITY,
 				    IIO_EV_TYPE_ROC,
 				    IIO_EV_DIR_FALLING),
-			       iio_get_time_ns());
+			       iio_get_time_ns(indio_dev));
 		}
 	}
 
@@ -925,7 +925,7 @@ static irqreturn_t gp2ap020a00f_thresh_event_handler(int irq, void *data)
 					    IIO_MOD_LIGHT_CLEAR,
 					    IIO_EV_TYPE_THRESH,
 					    IIO_EV_DIR_RISING),
-				       iio_get_time_ns());
+				       iio_get_time_ns(indio_dev));
 		}
 
 		if (test_bit(GP2AP020A00F_FLAG_ALS_FALLING_EV, &priv->flags)) {
@@ -939,7 +939,7 @@ static irqreturn_t gp2ap020a00f_thresh_event_handler(int irq, void *data)
 					    IIO_MOD_LIGHT_CLEAR,
 					    IIO_EV_TYPE_THRESH,
 					    IIO_EV_DIR_FALLING),
-				       iio_get_time_ns());
+				       iio_get_time_ns(indio_dev));
 		}
 	}
 
@@ -1287,22 +1287,14 @@ static int gp2ap020a00f_read_raw(struct iio_dev *indio_dev,
 	struct gp2ap020a00f_data *data = iio_priv(indio_dev);
 	int err = -EINVAL;
 
-	mutex_lock(&data->lock);
-
-	switch (mask) {
-	case IIO_CHAN_INFO_RAW:
-		if (iio_buffer_enabled(indio_dev)) {
-			err = -EBUSY;
-			goto error_unlock;
-		}
+	if (mask == IIO_CHAN_INFO_RAW) {
+		err = iio_device_claim_direct_mode(indio_dev);
+		if (err)
+			return err;
 
 		err = gp2ap020a00f_read_channel(data, chan, val);
-		break;
+		iio_device_release_direct_mode(indio_dev);
 	}
-
-error_unlock:
-	mutex_unlock(&data->lock);
-
 	return err < 0 ? err : IIO_VAL_INT;
 }
 
diff --git a/drivers/iio/light/isl29125.c b/drivers/iio/light/isl29125.c
index e2945a20e5f6..1d2c0c8a1d4f 100644
--- a/drivers/iio/light/isl29125.c
+++ b/drivers/iio/light/isl29125.c
@@ -44,13 +44,15 @@
 #define ISL29125_MODE_B 0x3
 #define ISL29125_MODE_RGB 0x5
 
+#define ISL29125_SENSING_RANGE_0 5722   /* 375 lux full range */
+#define ISL29125_SENSING_RANGE_1 152590 /* 10k lux full range */
+
 #define ISL29125_MODE_RANGE BIT(3)
 
 #define ISL29125_STATUS_CONV BIT(1)
 
 struct isl29125_data {
 	struct i2c_client *client;
-	struct mutex lock;
 	u8 conf1;
 	u16 buffer[8]; /* 3x 16-bit, padding, 8 bytes timestamp */
 };
@@ -128,11 +130,11 @@ static int isl29125_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		if (iio_buffer_enabled(indio_dev))
-			return -EBUSY;
-		mutex_lock(&data->lock);
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
 		ret = isl29125_read_data(data, chan->scan_index);
-		mutex_unlock(&data->lock);
+		iio_device_release_direct_mode(indio_dev);
 		if (ret < 0)
 			return ret;
 		*val = ret;
@@ -140,9 +142,9 @@ static int isl29125_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_SCALE:
 		*val = 0;
 		if (data->conf1 & ISL29125_MODE_RANGE)
-			*val2 = 152590; /* 10k lux full range */
+			*val2 = ISL29125_SENSING_RANGE_1; /*10k lux full range*/
 		else
-			*val2 = 5722; /* 375 lux full range */
+			*val2 = ISL29125_SENSING_RANGE_0; /*375 lux full range*/
 		return IIO_VAL_INT_PLUS_MICRO;
 	}
 	return -EINVAL;
@@ -158,9 +160,9 @@ static int isl29125_write_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_SCALE:
 		if (val != 0)
 			return -EINVAL;
-		if (val2 == 152590)
+		if (val2 == ISL29125_SENSING_RANGE_1)
 			data->conf1 |= ISL29125_MODE_RANGE;
-		else if (val2 == 5722)
+		else if (val2 == ISL29125_SENSING_RANGE_0)
 			data->conf1 &= ~ISL29125_MODE_RANGE;
 		else
 			return -EINVAL;
@@ -189,7 +191,7 @@ static irqreturn_t isl29125_trigger_handler(int irq, void *p)
 	}
 
 	iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
@@ -259,7 +261,6 @@ static int isl29125_probe(struct i2c_client *client,
 	data = iio_priv(indio_dev);
 	i2c_set_clientdata(client, indio_dev);
 	data->client = client;
-	mutex_init(&data->lock);
 
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &isl29125_info;
diff --git a/drivers/iio/light/jsa1212.c b/drivers/iio/light/jsa1212.c
index 99a62816c3b4..e8a8931b4f50 100644
--- a/drivers/iio/light/jsa1212.c
+++ b/drivers/iio/light/jsa1212.c
@@ -325,9 +325,6 @@ static int jsa1212_probe(struct i2c_client *client,
 	struct regmap *regmap;
 	int ret;
 
-	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
-		return -EOPNOTSUPP;
-
 	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
 	if (!indio_dev)
 		return -ENOMEM;
diff --git a/drivers/iio/light/lm3533-als.c b/drivers/iio/light/lm3533-als.c
index e56937c40a18..f409c2047c05 100644
--- a/drivers/iio/light/lm3533-als.c
+++ b/drivers/iio/light/lm3533-als.c
@@ -267,7 +267,7 @@ static irqreturn_t lm3533_als_isr(int irq, void *dev_id)
 					    0,
 					    IIO_EV_TYPE_THRESH,
 					    IIO_EV_DIR_EITHER),
-		       iio_get_time_ns());
+		       iio_get_time_ns(indio_dev));
 out:
 	return IRQ_HANDLED;
 }
diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c
index 6bf89d8f3741..3afc53a3d0b6 100644
--- a/drivers/iio/light/ltr501.c
+++ b/drivers/iio/light/ltr501.c
@@ -1256,7 +1256,8 @@ static irqreturn_t ltr501_trigger_handler(int irq, void *p)
 		buf[j++] = psdata & LTR501_PS_DATA_MASK;
 	}
 
-	iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buf,
+					   iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
@@ -1282,14 +1283,14 @@ static irqreturn_t ltr501_interrupt_handler(int irq, void *private)
 			       IIO_UNMOD_EVENT_CODE(IIO_INTENSITY, 0,
 						    IIO_EV_TYPE_THRESH,
 						    IIO_EV_DIR_EITHER),
-			       iio_get_time_ns());
+			       iio_get_time_ns(indio_dev));
 
 	if (status & LTR501_STATUS_PS_INTR)
 		iio_push_event(indio_dev,
 			       IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, 0,
 						    IIO_EV_TYPE_THRESH,
 						    IIO_EV_DIR_EITHER),
-			       iio_get_time_ns());
+			       iio_get_time_ns(indio_dev));
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c
index f17cb2ea18f5..6511b20a2a29 100644
--- a/drivers/iio/light/max44000.c
+++ b/drivers/iio/light/max44000.c
@@ -511,7 +511,8 @@ static irqreturn_t max44000_trigger_handler(int irq, void *p)
 	}
 	mutex_unlock(&data->lock);
 
-	iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buf,
+					   iio_get_time_ns(indio_dev));
 	iio_trigger_notify_done(indio_dev->trig);
 	return IRQ_HANDLED;
 
diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c
index b776c8ed4387..78c9b3a6453a 100644
--- a/drivers/iio/light/opt3001.c
+++ b/drivers/iio/light/opt3001.c
@@ -713,13 +713,13 @@ static irqreturn_t opt3001_irq(int irq, void *_iio)
 					IIO_UNMOD_EVENT_CODE(IIO_LIGHT, 0,
 							IIO_EV_TYPE_THRESH,
 							IIO_EV_DIR_RISING),
-					iio_get_time_ns());
+					iio_get_time_ns(iio));
 		if (ret & OPT3001_CONFIGURATION_FL)
 			iio_push_event(iio,
 					IIO_UNMOD_EVENT_CODE(IIO_LIGHT, 0,
 							IIO_EV_TYPE_THRESH,
 							IIO_EV_DIR_FALLING),
-					iio_get_time_ns());
+					iio_get_time_ns(iio));
 	} else if (ret & OPT3001_CONFIGURATION_CRF) {
 		ret = i2c_smbus_read_word_swapped(opt->client, OPT3001_RESULT);
 		if (ret < 0) {
diff --git a/drivers/iio/light/stk3310.c b/drivers/iio/light/stk3310.c
index 9e847f8f4f0c..45cf8b0a4363 100644
--- a/drivers/iio/light/stk3310.c
+++ b/drivers/iio/light/stk3310.c
@@ -528,7 +528,7 @@ static irqreturn_t stk3310_irq_handler(int irq, void *private)
 	struct iio_dev *indio_dev = private;
 	struct stk3310_data *data = iio_priv(indio_dev);
 
-	data->timestamp = iio_get_time_ns();
+	data->timestamp = iio_get_time_ns(indio_dev);
 
 	return IRQ_WAKE_THREAD;
 }
diff --git a/drivers/iio/light/tcs3414.c b/drivers/iio/light/tcs3414.c
index f90f8c5919fe..a795afb7667b 100644
--- a/drivers/iio/light/tcs3414.c
+++ b/drivers/iio/light/tcs3414.c
@@ -53,7 +53,6 @@
 
 struct tcs3414_data {
 	struct i2c_client *client;
-	struct mutex lock;
 	u8 control;
 	u8 gain;
 	u8 timing;
@@ -134,16 +133,16 @@ static int tcs3414_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		if (iio_buffer_enabled(indio_dev))
-			return -EBUSY;
-		mutex_lock(&data->lock);
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
 		ret = tcs3414_req_data(data);
 		if (ret < 0) {
-			mutex_unlock(&data->lock);
+			iio_device_release_direct_mode(indio_dev);
 			return ret;
 		}
 		ret = i2c_smbus_read_word_data(data->client, chan->address);
-		mutex_unlock(&data->lock);
+		iio_device_release_direct_mode(indio_dev);
 		if (ret < 0)
 			return ret;
 		*val = ret;
@@ -217,7 +216,7 @@ static irqreturn_t tcs3414_trigger_handler(int irq, void *p)
 	}
 
 	iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
@@ -288,7 +287,6 @@ static int tcs3414_probe(struct i2c_client *client,
 	data = iio_priv(indio_dev);
 	i2c_set_clientdata(client, indio_dev);
 	data->client = client;
-	mutex_init(&data->lock);
 
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &tcs3414_info;
diff --git a/drivers/iio/light/tcs3472.c b/drivers/iio/light/tcs3472.c
index 1b530bf04c89..3aa71e34ae28 100644
--- a/drivers/iio/light/tcs3472.c
+++ b/drivers/iio/light/tcs3472.c
@@ -52,7 +52,6 @@
 
 struct tcs3472_data {
 	struct i2c_client *client;
-	struct mutex lock;
 	u8 enable;
 	u8 control;
 	u8 atime;
@@ -117,17 +116,16 @@ static int tcs3472_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		if (iio_buffer_enabled(indio_dev))
-			return -EBUSY;
-
-		mutex_lock(&data->lock);
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
 		ret = tcs3472_req_data(data);
 		if (ret < 0) {
-			mutex_unlock(&data->lock);
+			iio_device_release_direct_mode(indio_dev);
 			return ret;
 		}
 		ret = i2c_smbus_read_word_data(data->client, chan->address);
-		mutex_unlock(&data->lock);
+		iio_device_release_direct_mode(indio_dev);
 		if (ret < 0)
 			return ret;
 		*val = ret;
@@ -204,7 +202,7 @@ static irqreturn_t tcs3472_trigger_handler(int irq, void *p)
 	}
 
 	iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
@@ -263,7 +261,6 @@ static int tcs3472_probe(struct i2c_client *client,
 	data = iio_priv(indio_dev);
 	i2c_set_clientdata(client, indio_dev);
 	data->client = client;
-	mutex_init(&data->lock);
 
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &tcs3472_info;
diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
index 57b108c30e98..04598ae993d4 100644
--- a/drivers/iio/light/tsl2563.c
+++ b/drivers/iio/light/tsl2563.c
@@ -630,7 +630,7 @@ static irqreturn_t tsl2563_event_handler(int irq, void *private)
 					    0,
 					    IIO_EV_TYPE_THRESH,
 					    IIO_EV_DIR_EITHER),
-		       iio_get_time_ns());
+		       iio_get_time_ns(dev_info));
 
 	/* clear the interrupt and push the event */
 	i2c_smbus_write_byte(chip->client, TSL2563_CMD | TSL2563_CLEARINT);
diff --git a/drivers/iio/light/us5182d.c b/drivers/iio/light/us5182d.c
index 45bc2f742f46..20c40f780964 100644
--- a/drivers/iio/light/us5182d.c
+++ b/drivers/iio/light/us5182d.c
@@ -833,7 +833,7 @@ static irqreturn_t us5182d_irq_thread_handler(int irq, void *private)
 	dir = ret & US5182D_CFG0_PROX ? IIO_EV_DIR_RISING : IIO_EV_DIR_FALLING;
 	ev = IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, 1, IIO_EV_TYPE_THRESH, dir);
 
-	iio_push_event(indio_dev, ev, iio_get_time_ns());
+	iio_push_event(indio_dev, ev, iio_get_time_ns(indio_dev));
 
 	ret = i2c_smbus_write_byte_data(data->client, US5182D_REG_CFG0,
 					ret & ~US5182D_CFG0_PX_IRQ);
diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig
index 84e6559ccc65..1f842abcb4a4 100644
--- a/drivers/iio/magnetometer/Kconfig
+++ b/drivers/iio/magnetometer/Kconfig
@@ -44,6 +44,7 @@ config BMC150_MAGN_I2C
 	  This driver is only implementing magnetometer part, which has
 	  its own address and register map.
 
+	  This driver also supports I2C Bosch BMC156 and BMM150 chips.
 	  To compile this driver as a module, choose M here: the module will be
 	  called bmc150_magn_i2c.
 
@@ -60,6 +61,7 @@ config BMC150_MAGN_SPI
 	  This driver is only implementing magnetometer part, which has
 	  its own address and register map.
 
+	  This driver also supports SPI Bosch BMC156 and BMM150 chips.
 	  To compile this driver as a module, choose M here: the module will be
 	  called bmc150_magn_spi.
 
diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
index 609a2c401b5d..af8606cc7812 100644
--- a/drivers/iio/magnetometer/ak8975.c
+++ b/drivers/iio/magnetometer/ak8975.c
@@ -33,6 +33,7 @@
 #include <linux/of_gpio.h>
 #include <linux/acpi.h>
 #include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
@@ -379,37 +380,40 @@ struct ak8975_data {
 	u8			cntl_cache;
 	struct iio_mount_matrix orientation;
 	struct regulator	*vdd;
+	struct regulator	*vid;
 };
 
 /* Enable attached power regulator if any. */
-static int ak8975_power_on(struct i2c_client *client)
+static int ak8975_power_on(const struct ak8975_data *data)
 {
-	const struct iio_dev *indio_dev = i2c_get_clientdata(client);
-	struct ak8975_data *data = iio_priv(indio_dev);
 	int ret;
 
-	data->vdd = devm_regulator_get(&client->dev, "vdd");
-	if (IS_ERR_OR_NULL(data->vdd)) {
-		ret = PTR_ERR(data->vdd);
-		if (ret == -ENODEV)
-			ret = 0;
-	} else {
-		ret = regulator_enable(data->vdd);
+	ret = regulator_enable(data->vdd);
+	if (ret) {
+		dev_warn(&data->client->dev,
+			 "Failed to enable specified Vdd supply\n");
+		return ret;
 	}
-
-	if (ret)
-		dev_err(&client->dev, "failed to enable Vdd supply: %d\n", ret);
-	return ret;
+	ret = regulator_enable(data->vid);
+	if (ret) {
+		dev_warn(&data->client->dev,
+			 "Failed to enable specified Vid supply\n");
+		return ret;
+	}
+	/*
+	 * According to the datasheet the power supply rise time i 200us
+	 * and the minimum wait time before mode setting is 100us, in
+	 * total 300 us. Add some margin and say minimum 500us here.
+	 */
+	usleep_range(500, 1000);
+	return 0;
 }
 
 /* Disable attached power regulator if any. */
-static void ak8975_power_off(const struct i2c_client *client)
+static void ak8975_power_off(const struct ak8975_data *data)
 {
-	const struct iio_dev *indio_dev = i2c_get_clientdata(client);
-	const struct ak8975_data *data = iio_priv(indio_dev);
-
-	if (!IS_ERR_OR_NULL(data->vdd))
-		regulator_disable(data->vdd);
+	regulator_disable(data->vid);
+	regulator_disable(data->vdd);
 }
 
 /*
@@ -430,8 +434,8 @@ static int ak8975_who_i_am(struct i2c_client *client,
 	 * AK8975   |  DEVICE_ID |  NA
 	 * AK8963   |  DEVICE_ID |  NA
 	 */
-	ret = i2c_smbus_read_i2c_block_data(client, AK09912_REG_WIA1,
-					    2, wia_val);
+	ret = i2c_smbus_read_i2c_block_data_or_emulated(
+			client, AK09912_REG_WIA1, 2, wia_val);
 	if (ret < 0) {
 		dev_err(&client->dev, "Error reading WIA\n");
 		return ret;
@@ -543,9 +547,9 @@ static int ak8975_setup(struct i2c_client *client)
 	}
 
 	/* Get asa data and store in the device data. */
-	ret = i2c_smbus_read_i2c_block_data(client,
-					    data->def->ctrl_regs[ASA_BASE],
-					    3, data->asa);
+	ret = i2c_smbus_read_i2c_block_data_or_emulated(
+			client, data->def->ctrl_regs[ASA_BASE],
+			3, data->asa);
 	if (ret < 0) {
 		dev_err(&client->dev, "Not able to read asa data\n");
 		return ret;
@@ -686,22 +690,31 @@ static int ak8975_read_axis(struct iio_dev *indio_dev, int index, int *val)
 	struct ak8975_data *data = iio_priv(indio_dev);
 	const struct i2c_client *client = data->client;
 	const struct ak_def *def = data->def;
+	u16 buff;
 	int ret;
 
+	pm_runtime_get_sync(&data->client->dev);
+
 	mutex_lock(&data->lock);
 
 	ret = ak8975_start_read_axis(data, client);
 	if (ret)
 		goto exit;
 
-	ret = i2c_smbus_read_word_data(client, def->data_regs[index]);
+	ret = i2c_smbus_read_i2c_block_data_or_emulated(
+			client, def->data_regs[index],
+			sizeof(buff), (u8*)&buff);
 	if (ret < 0)
 		goto exit;
 
 	mutex_unlock(&data->lock);
 
-	/* Clamp to valid range. */
-	*val = clamp_t(s16, ret, -def->range, def->range);
+	pm_runtime_mark_last_busy(&data->client->dev);
+	pm_runtime_put_autosuspend(&data->client->dev);
+
+	/* Swap bytes and convert to valid range. */
+	buff = le16_to_cpu(buff);
+	*val = clamp_t(s16, buff, -def->range, def->range);
 	return IIO_VAL_INT;
 
 exit:
@@ -825,7 +838,8 @@ static void ak8975_fill_buffer(struct iio_dev *indio_dev)
 	buff[1] = clamp_t(s16, le16_to_cpu(buff[1]), -def->range, def->range);
 	buff[2] = clamp_t(s16, le16_to_cpu(buff[2]), -def->range, def->range);
 
-	iio_push_to_buffers_with_timestamp(indio_dev, buff, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buff,
+					   iio_get_time_ns(indio_dev));
 	return;
 
 unlock:
@@ -919,7 +933,15 @@ static int ak8975_probe(struct i2c_client *client,
 
 	data->def = &ak_def_array[chipset];
 
-	err = ak8975_power_on(client);
+	/* Fetch the regulators */
+	data->vdd = devm_regulator_get(&client->dev, "vdd");
+	if (IS_ERR(data->vdd))
+		return PTR_ERR(data->vdd);
+	data->vid = devm_regulator_get(&client->dev, "vid");
+	if (IS_ERR(data->vid))
+		return PTR_ERR(data->vid);
+
+	err = ak8975_power_on(data);
 	if (err)
 		return err;
 
@@ -959,26 +981,93 @@ static int ak8975_probe(struct i2c_client *client,
 		goto cleanup_buffer;
 	}
 
+	/* Enable runtime PM */
+	pm_runtime_get_noresume(&client->dev);
+	pm_runtime_set_active(&client->dev);
+	pm_runtime_enable(&client->dev);
+	/*
+	 * The device comes online in 500us, so add two orders of magnitude
+	 * of delay before autosuspending: 50 ms.
+	 */
+	pm_runtime_set_autosuspend_delay(&client->dev, 50);
+	pm_runtime_use_autosuspend(&client->dev);
+	pm_runtime_put(&client->dev);
+
 	return 0;
 
 cleanup_buffer:
 	iio_triggered_buffer_cleanup(indio_dev);
 power_off:
-	ak8975_power_off(client);
+	ak8975_power_off(data);
 	return err;
 }
 
 static int ak8975_remove(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct ak8975_data *data = iio_priv(indio_dev);
 
+	pm_runtime_get_sync(&client->dev);
+	pm_runtime_put_noidle(&client->dev);
+	pm_runtime_disable(&client->dev);
 	iio_device_unregister(indio_dev);
 	iio_triggered_buffer_cleanup(indio_dev);
-	ak8975_power_off(client);
+	ak8975_set_mode(data, POWER_DOWN);
+	ak8975_power_off(data);
 
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int ak8975_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct ak8975_data *data = iio_priv(indio_dev);
+	int ret;
+
+	/* Set the device in power down if it wasn't already */
+	ret = ak8975_set_mode(data, POWER_DOWN);
+	if (ret < 0) {
+		dev_err(&client->dev, "Error in setting power-down mode\n");
+		return ret;
+	}
+	/* Next cut the regulators */
+	ak8975_power_off(data);
+
+	return 0;
+}
+
+static int ak8975_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct ak8975_data *data = iio_priv(indio_dev);
+	int ret;
+
+	/* Take up the regulators */
+	ak8975_power_on(data);
+	/*
+	 * We come up in powered down mode, the reading routines will
+	 * put us in the mode to read values later.
+	 */
+	ret = ak8975_set_mode(data, POWER_DOWN);
+	if (ret < 0) {
+		dev_err(&client->dev, "Error in setting power-down mode\n");
+		return ret;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+static const struct dev_pm_ops ak8975_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(ak8975_runtime_suspend,
+			   ak8975_runtime_resume, NULL)
+};
+
 static const struct i2c_device_id ak8975_id[] = {
 	{"ak8975", AK8975},
 	{"ak8963", AK8963},
@@ -1006,6 +1095,7 @@ MODULE_DEVICE_TABLE(of, ak8975_of_match);
 static struct i2c_driver ak8975_driver = {
 	.driver = {
 		.name	= "ak8975",
+		.pm = &ak8975_dev_pm_ops,
 		.of_match_table = of_match_ptr(ak8975_of_match),
 		.acpi_match_table = ACPI_PTR(ak_acpi_match),
 	},
diff --git a/drivers/iio/magnetometer/bmc150_magn_i2c.c b/drivers/iio/magnetometer/bmc150_magn_i2c.c
index eddc7f0d0096..ee05722587aa 100644
--- a/drivers/iio/magnetometer/bmc150_magn_i2c.c
+++ b/drivers/iio/magnetometer/bmc150_magn_i2c.c
@@ -2,6 +2,7 @@
  * 3-axis magnetometer driver supporting following I2C Bosch-Sensortec chips:
  *  - BMC150
  *  - BMC156
+ *  - BMM150
  *
  * Copyright (c) 2016, Intel Corporation.
  *
@@ -49,6 +50,7 @@ static int bmc150_magn_i2c_remove(struct i2c_client *client)
 static const struct acpi_device_id bmc150_magn_acpi_match[] = {
 	{"BMC150B", 0},
 	{"BMC156B", 0},
+	{"BMM150B", 0},
 	{},
 };
 MODULE_DEVICE_TABLE(acpi, bmc150_magn_acpi_match);
@@ -56,6 +58,7 @@ MODULE_DEVICE_TABLE(acpi, bmc150_magn_acpi_match);
 static const struct i2c_device_id bmc150_magn_i2c_id[] = {
 	{"bmc150_magn",	0},
 	{"bmc156_magn", 0},
+	{"bmm150_magn", 0},
 	{}
 };
 MODULE_DEVICE_TABLE(i2c, bmc150_magn_i2c_id);
diff --git a/drivers/iio/magnetometer/bmc150_magn_spi.c b/drivers/iio/magnetometer/bmc150_magn_spi.c
index c4c738a07695..7d4152d4d01e 100644
--- a/drivers/iio/magnetometer/bmc150_magn_spi.c
+++ b/drivers/iio/magnetometer/bmc150_magn_spi.c
@@ -2,6 +2,7 @@
  * 3-axis magnetometer driver support following SPI Bosch-Sensortec chips:
  *  - BMC150
  *  - BMC156
+ *  - BMM150
  *
  * Copyright (c) 2016, Intel Corporation.
  *
@@ -41,6 +42,7 @@ static int bmc150_magn_spi_remove(struct spi_device *spi)
 static const struct spi_device_id bmc150_magn_spi_id[] = {
 	{"bmc150_magn", 0},
 	{"bmc156_magn", 0},
+	{"bmm150_magn", 0},
 	{}
 };
 MODULE_DEVICE_TABLE(spi, bmc150_magn_spi_id);
@@ -48,6 +50,7 @@ MODULE_DEVICE_TABLE(spi, bmc150_magn_spi_id);
 static const struct acpi_device_id bmc150_magn_acpi_match[] = {
 	{"BMC150B", 0},
 	{"BMC156B", 0},
+	{"BMM150B", 0},
 	{},
 };
 MODULE_DEVICE_TABLE(acpi, bmc150_magn_acpi_match);
diff --git a/drivers/iio/magnetometer/hmc5843_core.c b/drivers/iio/magnetometer/hmc5843_core.c
index 77882b466e0f..ba3e2a374ee5 100644
--- a/drivers/iio/magnetometer/hmc5843_core.c
+++ b/drivers/iio/magnetometer/hmc5843_core.c
@@ -451,7 +451,7 @@ static irqreturn_t hmc5843_trigger_handler(int irq, void *p)
 		goto done;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
-					   iio_get_time_ns());
+					   iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/magnetometer/mag3110.c b/drivers/iio/magnetometer/mag3110.c
index 261d517428e4..f2be4a049056 100644
--- a/drivers/iio/magnetometer/mag3110.c
+++ b/drivers/iio/magnetometer/mag3110.c
@@ -261,7 +261,7 @@ static irqreturn_t mag3110_trigger_handler(int irq, void *p)
 	}
 
 	iio_push_to_buffers_with_timestamp(indio_dev, buffer,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 8250fc322c56..3e1f06b2224c 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -589,13 +589,15 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 	indio_dev->info = &magn_info;
 	mutex_init(&mdata->tb.buf_lock);
 
-	st_sensors_power_enable(indio_dev);
+	err = st_sensors_power_enable(indio_dev);
+	if (err)
+		return err;
 
 	err = st_sensors_check_device_support(indio_dev,
 					ARRAY_SIZE(st_magn_sensors_settings),
 					st_magn_sensors_settings);
 	if (err < 0)
-		return err;
+		goto st_magn_power_off;
 
 	mdata->num_data_channels = ST_MAGN_NUMBER_DATA_CHANNELS;
 	mdata->multiread_bit = mdata->sensor_settings->multi_read_bit;
@@ -608,11 +610,11 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 
 	err = st_sensors_init_sensor(indio_dev, NULL);
 	if (err < 0)
-		return err;
+		goto st_magn_power_off;
 
 	err = st_magn_allocate_ring(indio_dev);
 	if (err < 0)
-		return err;
+		goto st_magn_power_off;
 
 	if (irq > 0) {
 		err = st_sensors_allocate_trigger(indio_dev,
@@ -635,6 +637,8 @@ st_magn_device_register_error:
 		st_sensors_deallocate_trigger(indio_dev);
 st_magn_probe_trigger_error:
 	st_magn_deallocate_ring(indio_dev);
+st_magn_power_off:
+	st_sensors_power_disable(indio_dev);
 
 	return err;
 }
diff --git a/drivers/iio/potentiometer/Kconfig b/drivers/iio/potentiometer/Kconfig
index 6acb23810bb4..2e9da1cf3297 100644
--- a/drivers/iio/potentiometer/Kconfig
+++ b/drivers/iio/potentiometer/Kconfig
@@ -10,11 +10,22 @@ config DS1803
 	depends on I2C
 	help
 	  Say yes here to build support for the Maxim Integrated DS1803
-	  digital potentiomenter chip.
+	  digital potentiometer chip.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called ds1803.
 
+config MAX5487
+        tristate "Maxim MAX5487/MAX5488/MAX5489 Digital Potentiometer driver"
+        depends on SPI
+        help
+          Say yes here to build support for the Maxim
+          MAX5487, MAX5488, MAX5489 digital potentiometer
+          chips.
+
+          To compile this driver as a module, choose M here: the
+          module will be called max5487.
+
 config MCP4131
 	tristate "Microchip MCP413X/414X/415X/416X/423X/424X/425X/426X Digital Potentiometer driver"
 	depends on SPI
@@ -28,7 +39,7 @@ config MCP4131
 	  MCP4241, MCP4242,
 	  MCP4251, MCP4252,
 	  MCP4261, MCP4262,
-	  digital potentiomenter chips.
+	  digital potentiometer chips.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called mcp4131.
@@ -38,9 +49,11 @@ config MCP4531
 	depends on I2C
 	help
 	  Say yes here to build support for the Microchip
-	  MCP4531, MCP4532, MCP4551, MCP4552,
-	  MCP4631, MCP4632, MCP4651, MCP4652
-	  digital potentiomenter chips.
+	  MCP4531, MCP4532, MCP4541, MCP4542,
+	  MCP4551, MCP4552, MCP4561, MCP4562,
+	  MCP4631, MCP4632, MCP4641, MCP4642,
+	  MCP4651, MCP4652, MCP4661, MCP4662
+	  digital potentiometer chips.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called mcp4531.
diff --git a/drivers/iio/potentiometer/Makefile b/drivers/iio/potentiometer/Makefile
index 6007faa2fb02..8adb58f38c0b 100644
--- a/drivers/iio/potentiometer/Makefile
+++ b/drivers/iio/potentiometer/Makefile
@@ -4,6 +4,7 @@
 
 # When adding new entries keep the list in alphabetical order
 obj-$(CONFIG_DS1803) += ds1803.o
+obj-$(CONFIG_MAX5487) += max5487.o
 obj-$(CONFIG_MCP4131) += mcp4131.o
 obj-$(CONFIG_MCP4531) += mcp4531.o
 obj-$(CONFIG_TPL0102) += tpl0102.o
diff --git a/drivers/iio/potentiometer/max5487.c b/drivers/iio/potentiometer/max5487.c
new file mode 100644
index 000000000000..6c50939a2e83
--- /dev/null
+++ b/drivers/iio/potentiometer/max5487.c
@@ -0,0 +1,161 @@
+/*
+ * max5487.c - Support for MAX5487, MAX5488, MAX5489 digital potentiometers
+ *
+ * Copyright (C) 2016 Cristina-Gabriela Moraru <cristina.moraru09@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/acpi.h>
+
+#include <linux/iio/sysfs.h>
+#include <linux/iio/iio.h>
+
+#define MAX5487_WRITE_WIPER_A	(0x01 << 8)
+#define MAX5487_WRITE_WIPER_B	(0x02 << 8)
+
+/* copy both wiper regs to NV regs */
+#define MAX5487_COPY_AB_TO_NV	(0x23 << 8)
+/* copy both NV regs to wiper regs */
+#define MAX5487_COPY_NV_TO_AB	(0x33 << 8)
+
+#define MAX5487_MAX_POS		255
+
+struct max5487_data {
+	struct spi_device *spi;
+	int kohms;
+};
+
+#define MAX5487_CHANNEL(ch, addr) {				\
+	.type = IIO_RESISTANCE,					\
+	.indexed = 1,						\
+	.output = 1,						\
+	.channel = ch,						\
+	.address = addr,					\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),		\
+	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),	\
+}
+
+static const struct iio_chan_spec max5487_channels[] = {
+	MAX5487_CHANNEL(0, MAX5487_WRITE_WIPER_A),
+	MAX5487_CHANNEL(1, MAX5487_WRITE_WIPER_B),
+};
+
+static int max5487_write_cmd(struct spi_device *spi, u16 cmd)
+{
+	return spi_write(spi, (const void *) &cmd, sizeof(u16));
+}
+
+static int max5487_read_raw(struct iio_dev *indio_dev,
+			    struct iio_chan_spec const *chan,
+			    int *val, int *val2, long mask)
+{
+	struct max5487_data *data = iio_priv(indio_dev);
+
+	if (mask != IIO_CHAN_INFO_SCALE)
+		return -EINVAL;
+
+	*val = 1000 * data->kohms;
+	*val2 = MAX5487_MAX_POS;
+
+	return IIO_VAL_FRACTIONAL;
+}
+
+static int max5487_write_raw(struct iio_dev *indio_dev,
+			     struct iio_chan_spec const *chan,
+			     int val, int val2, long mask)
+{
+	struct max5487_data *data = iio_priv(indio_dev);
+
+	if (mask != IIO_CHAN_INFO_RAW)
+		return -EINVAL;
+
+	if (val < 0 || val > MAX5487_MAX_POS)
+		return -EINVAL;
+
+	return max5487_write_cmd(data->spi, chan->address | val);
+}
+
+static const struct iio_info max5487_info = {
+	.read_raw = max5487_read_raw,
+	.write_raw = max5487_write_raw,
+	.driver_module = THIS_MODULE,
+};
+
+static int max5487_spi_probe(struct spi_device *spi)
+{
+	struct iio_dev *indio_dev;
+	struct max5487_data *data;
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	int ret;
+
+	indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*data));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	dev_set_drvdata(&spi->dev, indio_dev);
+	data = iio_priv(indio_dev);
+
+	data->spi = spi;
+	data->kohms = id->driver_data;
+
+	indio_dev->info = &max5487_info;
+	indio_dev->name = id->name;
+	indio_dev->dev.parent = &spi->dev;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = max5487_channels;
+	indio_dev->num_channels = ARRAY_SIZE(max5487_channels);
+
+	/* restore both wiper regs from NV regs */
+	ret = max5487_write_cmd(data->spi, MAX5487_COPY_NV_TO_AB);
+	if (ret < 0)
+		return ret;
+
+	return iio_device_register(indio_dev);
+}
+
+static int max5487_spi_remove(struct spi_device *spi)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(&spi->dev);
+
+	iio_device_unregister(indio_dev);
+
+	/* save both wiper regs to NV regs */
+	return max5487_write_cmd(spi, MAX5487_COPY_AB_TO_NV);
+}
+
+static const struct spi_device_id max5487_id[] = {
+	{ "MAX5487", 10 },
+	{ "MAX5488", 50 },
+	{ "MAX5489", 100 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, max5487_id);
+
+static const struct acpi_device_id max5487_acpi_match[] = {
+	{ "MAX5487", 10 },
+	{ "MAX5488", 50 },
+	{ "MAX5489", 100 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, max5487_acpi_match);
+
+static struct spi_driver max5487_driver = {
+	.driver = {
+		.name = "max5487",
+		.owner = THIS_MODULE,
+		.acpi_match_table = ACPI_PTR(max5487_acpi_match),
+	},
+	.id_table = max5487_id,
+	.probe = max5487_spi_probe,
+	.remove = max5487_spi_remove
+};
+module_spi_driver(max5487_driver);
+
+MODULE_AUTHOR("Cristina-Gabriela Moraru <cristina.moraru09@gmail.com>");
+MODULE_DESCRIPTION("max5487 SPI driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/potentiometer/mcp4531.c b/drivers/iio/potentiometer/mcp4531.c
index 3b72e1a595db..13b6ae2fcf7b 100644
--- a/drivers/iio/potentiometer/mcp4531.c
+++ b/drivers/iio/potentiometer/mcp4531.c
@@ -8,12 +8,20 @@
  * DEVID	#Wipers	#Positions	Resistor Opts (kOhm)	i2c address
  * mcp4531	1	129		5, 10, 50, 100          010111x
  * mcp4532	1	129		5, 10, 50, 100          01011xx
+ * mcp4541	1	129		5, 10, 50, 100          010111x
+ * mcp4542	1	129		5, 10, 50, 100          01011xx
  * mcp4551	1	257		5, 10, 50, 100          010111x
  * mcp4552	1	257		5, 10, 50, 100          01011xx
+ * mcp4561	1	257		5, 10, 50, 100          010111x
+ * mcp4562	1	257		5, 10, 50, 100          01011xx
  * mcp4631	2	129		5, 10, 50, 100          0101xxx
  * mcp4632	2	129		5, 10, 50, 100          01011xx
+ * mcp4641	2	129		5, 10, 50, 100          0101xxx
+ * mcp4642	2	129		5, 10, 50, 100          01011xx
  * mcp4651	2	257		5, 10, 50, 100          0101xxx
  * mcp4652	2	257		5, 10, 50, 100          01011xx
+ * mcp4661	2	257		5, 10, 50, 100          0101xxx
+ * mcp4662	2	257		5, 10, 50, 100          01011xx
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
@@ -23,6 +31,8 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <linux/iio/iio.h>
 
@@ -37,18 +47,34 @@ enum mcp4531_type {
 	MCP453x_103,
 	MCP453x_503,
 	MCP453x_104,
+	MCP454x_502,
+	MCP454x_103,
+	MCP454x_503,
+	MCP454x_104,
 	MCP455x_502,
 	MCP455x_103,
 	MCP455x_503,
 	MCP455x_104,
+	MCP456x_502,
+	MCP456x_103,
+	MCP456x_503,
+	MCP456x_104,
 	MCP463x_502,
 	MCP463x_103,
 	MCP463x_503,
 	MCP463x_104,
+	MCP464x_502,
+	MCP464x_103,
+	MCP464x_503,
+	MCP464x_104,
 	MCP465x_502,
 	MCP465x_103,
 	MCP465x_503,
 	MCP465x_104,
+	MCP466x_502,
+	MCP466x_103,
+	MCP466x_503,
+	MCP466x_104,
 };
 
 static const struct mcp4531_cfg mcp4531_cfg[] = {
@@ -56,18 +82,34 @@ static const struct mcp4531_cfg mcp4531_cfg[] = {
 	[MCP453x_103] = { .wipers = 1, .max_pos = 128, .kohms =  10, },
 	[MCP453x_503] = { .wipers = 1, .max_pos = 128, .kohms =  50, },
 	[MCP453x_104] = { .wipers = 1, .max_pos = 128, .kohms = 100, },
+	[MCP454x_502] = { .wipers = 1, .max_pos = 128, .kohms =   5, },
+	[MCP454x_103] = { .wipers = 1, .max_pos = 128, .kohms =  10, },
+	[MCP454x_503] = { .wipers = 1, .max_pos = 128, .kohms =  50, },
+	[MCP454x_104] = { .wipers = 1, .max_pos = 128, .kohms = 100, },
 	[MCP455x_502] = { .wipers = 1, .max_pos = 256, .kohms =   5, },
 	[MCP455x_103] = { .wipers = 1, .max_pos = 256, .kohms =  10, },
 	[MCP455x_503] = { .wipers = 1, .max_pos = 256, .kohms =  50, },
 	[MCP455x_104] = { .wipers = 1, .max_pos = 256, .kohms = 100, },
+	[MCP456x_502] = { .wipers = 1, .max_pos = 256, .kohms =   5, },
+	[MCP456x_103] = { .wipers = 1, .max_pos = 256, .kohms =  10, },
+	[MCP456x_503] = { .wipers = 1, .max_pos = 256, .kohms =  50, },
+	[MCP456x_104] = { .wipers = 1, .max_pos = 256, .kohms = 100, },
 	[MCP463x_502] = { .wipers = 2, .max_pos = 128, .kohms =   5, },
 	[MCP463x_103] = { .wipers = 2, .max_pos = 128, .kohms =  10, },
 	[MCP463x_503] = { .wipers = 2, .max_pos = 128, .kohms =  50, },
 	[MCP463x_104] = { .wipers = 2, .max_pos = 128, .kohms = 100, },
+	[MCP464x_502] = { .wipers = 2, .max_pos = 128, .kohms =   5, },
+	[MCP464x_103] = { .wipers = 2, .max_pos = 128, .kohms =  10, },
+	[MCP464x_503] = { .wipers = 2, .max_pos = 128, .kohms =  50, },
+	[MCP464x_104] = { .wipers = 2, .max_pos = 128, .kohms = 100, },
 	[MCP465x_502] = { .wipers = 2, .max_pos = 256, .kohms =   5, },
 	[MCP465x_103] = { .wipers = 2, .max_pos = 256, .kohms =  10, },
 	[MCP465x_503] = { .wipers = 2, .max_pos = 256, .kohms =  50, },
 	[MCP465x_104] = { .wipers = 2, .max_pos = 256, .kohms = 100, },
+	[MCP466x_502] = { .wipers = 2, .max_pos = 256, .kohms =   5, },
+	[MCP466x_103] = { .wipers = 2, .max_pos = 256, .kohms =  10, },
+	[MCP466x_503] = { .wipers = 2, .max_pos = 256, .kohms =  50, },
+	[MCP466x_104] = { .wipers = 2, .max_pos = 256, .kohms = 100, },
 };
 
 #define MCP4531_WRITE (0 << 2)
@@ -148,12 +190,89 @@ static const struct iio_info mcp4531_info = {
 	.driver_module = THIS_MODULE,
 };
 
+#ifdef CONFIG_OF
+
+#define MCP4531_COMPATIBLE(of_compatible, cfg) {	\
+			.compatible = of_compatible,	\
+			.data = &mcp4531_cfg[cfg],	\
+}
+
+static const struct of_device_id mcp4531_of_match[] = {
+	MCP4531_COMPATIBLE("microchip,mcp4531-502", MCP453x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4531-103", MCP453x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4531-503", MCP453x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4531-104", MCP453x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4532-502", MCP453x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4532-103", MCP453x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4532-503", MCP453x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4532-104", MCP453x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4541-502", MCP454x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4541-103", MCP454x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4541-503", MCP454x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4541-104", MCP454x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4542-502", MCP454x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4542-103", MCP454x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4542-503", MCP454x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4542-104", MCP454x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4551-502", MCP455x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4551-103", MCP455x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4551-503", MCP455x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4551-104", MCP455x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4552-502", MCP455x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4552-103", MCP455x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4552-503", MCP455x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4552-104", MCP455x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4561-502", MCP456x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4561-103", MCP456x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4561-503", MCP456x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4561-104", MCP456x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4562-502", MCP456x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4562-103", MCP456x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4562-503", MCP456x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4562-104", MCP456x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4631-502", MCP463x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4631-103", MCP463x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4631-503", MCP463x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4631-104", MCP463x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4632-502", MCP463x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4632-103", MCP463x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4632-503", MCP463x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4632-104", MCP463x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4641-502", MCP464x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4641-103", MCP464x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4641-503", MCP464x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4641-104", MCP464x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4642-502", MCP464x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4642-103", MCP464x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4642-503", MCP464x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4642-104", MCP464x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4651-502", MCP465x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4651-103", MCP465x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4651-503", MCP465x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4651-104", MCP465x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4652-502", MCP465x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4652-103", MCP465x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4652-503", MCP465x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4652-104", MCP465x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4661-502", MCP466x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4661-103", MCP466x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4661-503", MCP466x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4661-104", MCP466x_104),
+	MCP4531_COMPATIBLE("microchip,mcp4662-502", MCP466x_502),
+	MCP4531_COMPATIBLE("microchip,mcp4662-103", MCP466x_103),
+	MCP4531_COMPATIBLE("microchip,mcp4662-503", MCP466x_503),
+	MCP4531_COMPATIBLE("microchip,mcp4662-104", MCP466x_104),
+	{ /* sentinel */ }
+};
+#endif
+
 static int mcp4531_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
 	struct mcp4531_data *data;
 	struct iio_dev *indio_dev;
+	const struct of_device_id *match;
 
 	if (!i2c_check_functionality(client->adapter,
 				     I2C_FUNC_SMBUS_WORD_DATA)) {
@@ -167,7 +286,12 @@ static int mcp4531_probe(struct i2c_client *client,
 	data = iio_priv(indio_dev);
 	i2c_set_clientdata(client, indio_dev);
 	data->client = client;
-	data->cfg = &mcp4531_cfg[id->driver_data];
+
+	match = of_match_device(of_match_ptr(mcp4531_of_match), dev);
+	if (match)
+		data->cfg = of_device_get_match_data(dev);
+	else
+		data->cfg = &mcp4531_cfg[id->driver_data];
 
 	indio_dev->dev.parent = dev;
 	indio_dev->info = &mcp4531_info;
@@ -187,6 +311,14 @@ static const struct i2c_device_id mcp4531_id[] = {
 	{ "mcp4532-103", MCP453x_103 },
 	{ "mcp4532-503", MCP453x_503 },
 	{ "mcp4532-104", MCP453x_104 },
+	{ "mcp4541-502", MCP454x_502 },
+	{ "mcp4541-103", MCP454x_103 },
+	{ "mcp4541-503", MCP454x_503 },
+	{ "mcp4541-104", MCP454x_104 },
+	{ "mcp4542-502", MCP454x_502 },
+	{ "mcp4542-103", MCP454x_103 },
+	{ "mcp4542-503", MCP454x_503 },
+	{ "mcp4542-104", MCP454x_104 },
 	{ "mcp4551-502", MCP455x_502 },
 	{ "mcp4551-103", MCP455x_103 },
 	{ "mcp4551-503", MCP455x_503 },
@@ -195,6 +327,14 @@ static const struct i2c_device_id mcp4531_id[] = {
 	{ "mcp4552-103", MCP455x_103 },
 	{ "mcp4552-503", MCP455x_503 },
 	{ "mcp4552-104", MCP455x_104 },
+	{ "mcp4561-502", MCP456x_502 },
+	{ "mcp4561-103", MCP456x_103 },
+	{ "mcp4561-503", MCP456x_503 },
+	{ "mcp4561-104", MCP456x_104 },
+	{ "mcp4562-502", MCP456x_502 },
+	{ "mcp4562-103", MCP456x_103 },
+	{ "mcp4562-503", MCP456x_503 },
+	{ "mcp4562-104", MCP456x_104 },
 	{ "mcp4631-502", MCP463x_502 },
 	{ "mcp4631-103", MCP463x_103 },
 	{ "mcp4631-503", MCP463x_503 },
@@ -203,6 +343,14 @@ static const struct i2c_device_id mcp4531_id[] = {
 	{ "mcp4632-103", MCP463x_103 },
 	{ "mcp4632-503", MCP463x_503 },
 	{ "mcp4632-104", MCP463x_104 },
+	{ "mcp4641-502", MCP464x_502 },
+	{ "mcp4641-103", MCP464x_103 },
+	{ "mcp4641-503", MCP464x_503 },
+	{ "mcp4641-104", MCP464x_104 },
+	{ "mcp4642-502", MCP464x_502 },
+	{ "mcp4642-103", MCP464x_103 },
+	{ "mcp4642-503", MCP464x_503 },
+	{ "mcp4642-104", MCP464x_104 },
 	{ "mcp4651-502", MCP465x_502 },
 	{ "mcp4651-103", MCP465x_103 },
 	{ "mcp4651-503", MCP465x_503 },
@@ -211,6 +359,14 @@ static const struct i2c_device_id mcp4531_id[] = {
 	{ "mcp4652-103", MCP465x_103 },
 	{ "mcp4652-503", MCP465x_503 },
 	{ "mcp4652-104", MCP465x_104 },
+	{ "mcp4661-502", MCP466x_502 },
+	{ "mcp4661-103", MCP466x_103 },
+	{ "mcp4661-503", MCP466x_503 },
+	{ "mcp4661-104", MCP466x_104 },
+	{ "mcp4662-502", MCP466x_502 },
+	{ "mcp4662-103", MCP466x_103 },
+	{ "mcp4662-503", MCP466x_503 },
+	{ "mcp4662-104", MCP466x_104 },
 	{}
 };
 MODULE_DEVICE_TABLE(i2c, mcp4531_id);
@@ -218,6 +374,7 @@ MODULE_DEVICE_TABLE(i2c, mcp4531_id);
 static struct i2c_driver mcp4531_driver = {
 	.driver = {
 		.name	= "mcp4531",
+		.of_match_table = of_match_ptr(mcp4531_of_match),
 	},
 	.probe		= mcp4531_probe,
 	.id_table	= mcp4531_id,
diff --git a/drivers/iio/potentiometer/tpl0102.c b/drivers/iio/potentiometer/tpl0102.c
index 5c304d42d713..7b6b54531ea2 100644
--- a/drivers/iio/potentiometer/tpl0102.c
+++ b/drivers/iio/potentiometer/tpl0102.c
@@ -116,10 +116,6 @@ static int tpl0102_probe(struct i2c_client *client,
 	struct tpl0102_data *data;
 	struct iio_dev *indio_dev;
 
-	if (!i2c_check_functionality(client->adapter,
-				     I2C_FUNC_SMBUS_WORD_DATA))
-		return -EOPNOTSUPP;
-
 	indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
 	if (!indio_dev)
 		return -ENOMEM;
diff --git a/drivers/iio/pressure/Kconfig b/drivers/iio/pressure/Kconfig
index cda9f128f3a4..d130cdc78f43 100644
--- a/drivers/iio/pressure/Kconfig
+++ b/drivers/iio/pressure/Kconfig
@@ -6,16 +6,33 @@
 menu "Pressure sensors"
 
 config BMP280
-	tristate "Bosch Sensortec BMP180 and BMP280 pressure sensor driver"
-	depends on I2C
+	tristate "Bosch Sensortec BMP180/BMP280 pressure sensor I2C driver"
+	depends on (I2C || SPI_MASTER)
 	depends on !(BMP085_I2C=y || BMP085_I2C=m)
-	select REGMAP_I2C
+	depends on !(BMP085_SPI=y || BMP085_SPI=m)
+	select REGMAP
+	select BMP280_I2C if (I2C)
+	select BMP280_SPI if (SPI_MASTER)
 	help
 	  Say yes here to build support for Bosch Sensortec BMP180 and BMP280
-	  pressure and temperature sensors.
+	  pressure and temperature sensors. Also supports the BE280 with
+	  an additional humidity sensor channel.
 
-	  To compile this driver as a module, choose M here: the module
-	  will be called bmp280.
+	  To compile this driver as a module, choose M here: the core module
+	  will be called bmp280 and you will also get bmp280-i2c for I2C
+	  and/or bmp280-spi for SPI support.
+
+config BMP280_I2C
+	tristate
+	depends on BMP280
+	depends on I2C
+	select REGMAP_I2C
+
+config BMP280_SPI
+	tristate
+	depends on BMP280
+	depends on SPI_MASTER
+	select REGMAP
 
 config HID_SENSOR_PRESS
 	depends on HID_SENSOR_HUB
@@ -130,7 +147,7 @@ config IIO_ST_PRESS
 	select IIO_TRIGGERED_BUFFER if (IIO_BUFFER)
 	help
 	  Say yes here to build support for STMicroelectronics pressure
-	  sensors: LPS001WP, LPS25H, LPS331AP.
+	  sensors: LPS001WP, LPS25H, LPS331AP, LPS22HB.
 
 	  This driver can also be built as a module. If so, these modules
 	  will be created:
diff --git a/drivers/iio/pressure/Makefile b/drivers/iio/pressure/Makefile
index 17d6e7afa1ff..7f395bed5e88 100644
--- a/drivers/iio/pressure/Makefile
+++ b/drivers/iio/pressure/Makefile
@@ -4,6 +4,9 @@
 
 # When adding new entries keep the list in alphabetical order
 obj-$(CONFIG_BMP280) += bmp280.o
+bmp280-objs := bmp280-core.o bmp280-regmap.o
+obj-$(CONFIG_BMP280_I2C) += bmp280-i2c.o
+obj-$(CONFIG_BMP280_SPI) += bmp280-spi.o
 obj-$(CONFIG_HID_SENSOR_PRESS)   += hid-sensor-press.o
 obj-$(CONFIG_HP03) += hp03.o
 obj-$(CONFIG_MPL115) += mpl115.o
diff --git a/drivers/iio/pressure/bmp280.c b/drivers/iio/pressure/bmp280-core.c
similarity index 58%
rename from drivers/iio/pressure/bmp280.c
rename to drivers/iio/pressure/bmp280-core.c
index 724452d61846..6943688e66df 100644
--- a/drivers/iio/pressure/bmp280.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -1,5 +1,9 @@
 /*
+ * Copyright (c) 2010 Christoph Mair <christoph.mair@gmail.com>
+ * Copyright (c) 2012 Bosch Sensortec GmbH
+ * Copyright (c) 2012 Unixphere AB
  * Copyright (c) 2014 Intel Corporation
+ * Copyright (c) 2016 Linus Walleij <linus.walleij@linaro.org>
  *
  * Driver for Bosch Sensortec BMP180 and BMP280 digital pressure sensor.
  *
@@ -10,99 +14,63 @@
  * Datasheet:
  * https://ae-bst.resource.bosch.com/media/_tech/media/datasheets/BST-BMP180-DS000-121.pdf
  * https://ae-bst.resource.bosch.com/media/_tech/media/datasheets/BST-BMP280-DS001-12.pdf
+ * https://ae-bst.resource.bosch.com/media/_tech/media/datasheets/BST-BME280_DS001-11.pdf
  */
 
 #define pr_fmt(fmt) "bmp280: " fmt
 
+#include <linux/device.h>
 #include <linux/module.h>
-#include <linux/i2c.h>
-#include <linux/acpi.h>
 #include <linux/regmap.h>
 #include <linux/delay.h>
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h> /* For irq_get_irq_data() */
+#include <linux/completion.h>
+#include <linux/pm_runtime.h>
+#include <linux/random.h>
 
-/* BMP280 specific registers */
-#define BMP280_REG_TEMP_XLSB		0xFC
-#define BMP280_REG_TEMP_LSB		0xFB
-#define BMP280_REG_TEMP_MSB		0xFA
-#define BMP280_REG_PRESS_XLSB		0xF9
-#define BMP280_REG_PRESS_LSB		0xF8
-#define BMP280_REG_PRESS_MSB		0xF7
-
-#define BMP280_REG_CONFIG		0xF5
-#define BMP280_REG_STATUS		0xF3
-
-#define BMP280_REG_COMP_TEMP_START	0x88
-#define BMP280_COMP_TEMP_REG_COUNT	6
-
-#define BMP280_REG_COMP_PRESS_START	0x8E
-#define BMP280_COMP_PRESS_REG_COUNT	18
-
-#define BMP280_FILTER_MASK		(BIT(4) | BIT(3) | BIT(2))
-#define BMP280_FILTER_OFF		0
-#define BMP280_FILTER_2X		BIT(2)
-#define BMP280_FILTER_4X		BIT(3)
-#define BMP280_FILTER_8X		(BIT(3) | BIT(2))
-#define BMP280_FILTER_16X		BIT(4)
-
-#define BMP280_OSRS_TEMP_MASK		(BIT(7) | BIT(6) | BIT(5))
-#define BMP280_OSRS_TEMP_SKIP		0
-#define BMP280_OSRS_TEMP_X(osrs_t)	((osrs_t) << 5)
-#define BMP280_OSRS_TEMP_1X		BMP280_OSRS_TEMP_X(1)
-#define BMP280_OSRS_TEMP_2X		BMP280_OSRS_TEMP_X(2)
-#define BMP280_OSRS_TEMP_4X		BMP280_OSRS_TEMP_X(3)
-#define BMP280_OSRS_TEMP_8X		BMP280_OSRS_TEMP_X(4)
-#define BMP280_OSRS_TEMP_16X		BMP280_OSRS_TEMP_X(5)
-
-#define BMP280_OSRS_PRESS_MASK		(BIT(4) | BIT(3) | BIT(2))
-#define BMP280_OSRS_PRESS_SKIP		0
-#define BMP280_OSRS_PRESS_X(osrs_p)	((osrs_p) << 2)
-#define BMP280_OSRS_PRESS_1X		BMP280_OSRS_PRESS_X(1)
-#define BMP280_OSRS_PRESS_2X		BMP280_OSRS_PRESS_X(2)
-#define BMP280_OSRS_PRESS_4X		BMP280_OSRS_PRESS_X(3)
-#define BMP280_OSRS_PRESS_8X		BMP280_OSRS_PRESS_X(4)
-#define BMP280_OSRS_PRESS_16X		BMP280_OSRS_PRESS_X(5)
-
-#define BMP280_MODE_MASK		(BIT(1) | BIT(0))
-#define BMP280_MODE_SLEEP		0
-#define BMP280_MODE_FORCED		BIT(0)
-#define BMP280_MODE_NORMAL		(BIT(1) | BIT(0))
-
-/* BMP180 specific registers */
-#define BMP180_REG_OUT_XLSB		0xF8
-#define BMP180_REG_OUT_LSB		0xF7
-#define BMP180_REG_OUT_MSB		0xF6
-
-#define BMP180_REG_CALIB_START		0xAA
-#define BMP180_REG_CALIB_COUNT		22
-
-#define BMP180_MEAS_SCO			BIT(5)
-#define BMP180_MEAS_TEMP		(0x0E | BMP180_MEAS_SCO)
-#define BMP180_MEAS_PRESS_X(oss)	((oss) << 6 | 0x14 | BMP180_MEAS_SCO)
-#define BMP180_MEAS_PRESS_1X		BMP180_MEAS_PRESS_X(0)
-#define BMP180_MEAS_PRESS_2X		BMP180_MEAS_PRESS_X(1)
-#define BMP180_MEAS_PRESS_4X		BMP180_MEAS_PRESS_X(2)
-#define BMP180_MEAS_PRESS_8X		BMP180_MEAS_PRESS_X(3)
-
-/* BMP180 and BMP280 common registers */
-#define BMP280_REG_CTRL_MEAS		0xF4
-#define BMP280_REG_RESET		0xE0
-#define BMP280_REG_ID			0xD0
-
-#define BMP180_CHIP_ID			0x55
-#define BMP280_CHIP_ID			0x58
-#define BMP280_SOFT_RESET_VAL		0xB6
+#include "bmp280.h"
+
+/*
+ * These enums are used for indexing into the array of calibration
+ * coefficients for BMP180.
+ */
+enum { AC1, AC2, AC3, AC4, AC5, AC6, B1, B2, MB, MC, MD };
+
+struct bmp180_calib {
+	s16 AC1;
+	s16 AC2;
+	s16 AC3;
+	u16 AC4;
+	u16 AC5;
+	u16 AC6;
+	s16 B1;
+	s16 B2;
+	s16 MB;
+	s16 MC;
+	s16 MD;
+};
 
 struct bmp280_data {
-	struct i2c_client *client;
+	struct device *dev;
 	struct mutex lock;
 	struct regmap *regmap;
+	struct completion done;
+	bool use_eoc;
 	const struct bmp280_chip_info *chip_info;
+	struct bmp180_calib calib;
+	struct regulator *vddd;
+	struct regulator *vdda;
+	unsigned int start_up_time; /* in milliseconds */
 
 	/* log of base 2 of oversampling rate */
 	u8 oversampling_press;
 	u8 oversampling_temp;
+	u8 oversampling_humid;
 
 	/*
 	 * Carryover value from temperature conversion, used in pressure
@@ -112,17 +80,19 @@ struct bmp280_data {
 };
 
 struct bmp280_chip_info {
-	const struct regmap_config *regmap_config;
-
 	const int *oversampling_temp_avail;
 	int num_oversampling_temp_avail;
 
 	const int *oversampling_press_avail;
 	int num_oversampling_press_avail;
 
+	const int *oversampling_humid_avail;
+	int num_oversampling_humid_avail;
+
 	int (*chip_config)(struct bmp280_data *);
 	int (*read_temp)(struct bmp280_data *, int *);
 	int (*read_press)(struct bmp280_data *, int *, int *);
+	int (*read_humid)(struct bmp280_data *, int *, int *);
 };
 
 /*
@@ -143,45 +113,75 @@ static const struct iio_chan_spec bmp280_channels[] = {
 		.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) |
 				      BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO),
 	},
+	{
+		.type = IIO_HUMIDITYRELATIVE,
+		.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) |
+				      BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO),
+	},
 };
 
-static bool bmp280_is_writeable_reg(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case BMP280_REG_CONFIG:
-	case BMP280_REG_CTRL_MEAS:
-	case BMP280_REG_RESET:
-		return true;
-	default:
-		return false;
-	};
-}
+/*
+ * Returns humidity in percent, resolution is 0.01 percent. Output value of
+ * "47445" represents 47445/1024 = 46.333 %RH.
+ *
+ * Taken from BME280 datasheet, Section 4.2.3, "Compensation formula".
+ */
 
-static bool bmp280_is_volatile_reg(struct device *dev, unsigned int reg)
+static u32 bmp280_compensate_humidity(struct bmp280_data *data,
+				      s32 adc_humidity)
 {
-	switch (reg) {
-	case BMP280_REG_TEMP_XLSB:
-	case BMP280_REG_TEMP_LSB:
-	case BMP280_REG_TEMP_MSB:
-	case BMP280_REG_PRESS_XLSB:
-	case BMP280_REG_PRESS_LSB:
-	case BMP280_REG_PRESS_MSB:
-	case BMP280_REG_STATUS:
-		return true;
-	default:
-		return false;
+	struct device *dev = data->dev;
+	unsigned int H1, H3, tmp;
+	int H2, H4, H5, H6, ret, var;
+
+	ret = regmap_read(data->regmap, BMP280_REG_COMP_H1, &H1);
+	if (ret < 0) {
+		dev_err(dev, "failed to read H1 comp value\n");
+		return ret;
 	}
-}
 
-static const struct regmap_config bmp280_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
+	ret = regmap_bulk_read(data->regmap, BMP280_REG_COMP_H2, &tmp, 2);
+	if (ret < 0) {
+		dev_err(dev, "failed to read H2 comp value\n");
+		return ret;
+	}
+	H2 = sign_extend32(le16_to_cpu(tmp), 15);
+
+	ret = regmap_read(data->regmap, BMP280_REG_COMP_H3, &H3);
+	if (ret < 0) {
+		dev_err(dev, "failed to read H3 comp value\n");
+		return ret;
+	}
+
+	ret = regmap_bulk_read(data->regmap, BMP280_REG_COMP_H4, &tmp, 2);
+	if (ret < 0) {
+		dev_err(dev, "failed to read H4 comp value\n");
+		return ret;
+	}
+	H4 = sign_extend32(((be16_to_cpu(tmp) >> 4) & 0xff0) |
+			  (be16_to_cpu(tmp) & 0xf), 11);
+
+	ret = regmap_bulk_read(data->regmap, BMP280_REG_COMP_H5, &tmp, 2);
+	if (ret < 0) {
+		dev_err(dev, "failed to read H5 comp value\n");
+		return ret;
+	}
+	H5 = sign_extend32(((le16_to_cpu(tmp) >> 4) & 0xfff), 11);
+
+	ret = regmap_read(data->regmap, BMP280_REG_COMP_H6, &tmp);
+	if (ret < 0) {
+		dev_err(dev, "failed to read H6 comp value\n");
+		return ret;
+	}
+	H6 = sign_extend32(tmp, 7);
 
-	.max_register = BMP280_REG_TEMP_XLSB,
-	.cache_type = REGCACHE_RBTREE,
+	var = ((s32)data->t_fine) - 76800;
+	var = ((((adc_humidity << 14) - (H4 << 20) - (H5 * var)) + 16384) >> 15)
+		* (((((((var * H6) >> 10) * (((var * H3) >> 11) + 32768)) >> 10)
+		+ 2097152) * H2 + 8192) >> 14);
+	var -= ((((var >> 15) * (var >> 15)) >> 7) * H1) >> 4;
 
-	.writeable_reg = bmp280_is_writeable_reg,
-	.volatile_reg = bmp280_is_volatile_reg,
+	return var >> 12;
 };
 
 /*
@@ -201,7 +201,7 @@ static s32 bmp280_compensate_temp(struct bmp280_data *data,
 	ret = regmap_bulk_read(data->regmap, BMP280_REG_COMP_TEMP_START,
 			       buf, BMP280_COMP_TEMP_REG_COUNT);
 	if (ret < 0) {
-		dev_err(&data->client->dev,
+		dev_err(data->dev,
 			"failed to read temperature calibration parameters\n");
 		return ret;
 	}
@@ -241,7 +241,7 @@ static u32 bmp280_compensate_press(struct bmp280_data *data,
 	ret = regmap_bulk_read(data->regmap, BMP280_REG_COMP_PRESS_START,
 			       buf, BMP280_COMP_PRESS_REG_COUNT);
 	if (ret < 0) {
-		dev_err(&data->client->dev,
+		dev_err(data->dev,
 			"failed to read pressure calibration parameters\n");
 		return ret;
 	}
@@ -276,7 +276,7 @@ static int bmp280_read_temp(struct bmp280_data *data,
 	ret = regmap_bulk_read(data->regmap, BMP280_REG_TEMP_MSB,
 			       (u8 *) &tmp, 3);
 	if (ret < 0) {
-		dev_err(&data->client->dev, "failed to read temperature\n");
+		dev_err(data->dev, "failed to read temperature\n");
 		return ret;
 	}
 
@@ -311,7 +311,7 @@ static int bmp280_read_press(struct bmp280_data *data,
 	ret = regmap_bulk_read(data->regmap, BMP280_REG_PRESS_MSB,
 			       (u8 *) &tmp, 3);
 	if (ret < 0) {
-		dev_err(&data->client->dev, "failed to read pressure\n");
+		dev_err(data->dev, "failed to read pressure\n");
 		return ret;
 	}
 
@@ -324,6 +324,34 @@ static int bmp280_read_press(struct bmp280_data *data,
 	return IIO_VAL_FRACTIONAL;
 }
 
+static int bmp280_read_humid(struct bmp280_data *data, int *val, int *val2)
+{
+	int ret;
+	__be16 tmp = 0;
+	s32 adc_humidity;
+	u32 comp_humidity;
+
+	/* Read and compensate temperature so we get a reading of t_fine. */
+	ret = bmp280_read_temp(data, NULL);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_bulk_read(data->regmap, BMP280_REG_HUMIDITY_MSB,
+			       (u8 *) &tmp, 2);
+	if (ret < 0) {
+		dev_err(data->dev, "failed to read humidity\n");
+		return ret;
+	}
+
+	adc_humidity = be16_to_cpu(tmp);
+	comp_humidity = bmp280_compensate_humidity(data, adc_humidity);
+
+	*val = comp_humidity;
+	*val2 = 1024;
+
+	return IIO_VAL_FRACTIONAL;
+}
+
 static int bmp280_read_raw(struct iio_dev *indio_dev,
 			   struct iio_chan_spec const *chan,
 			   int *val, int *val2, long mask)
@@ -331,11 +359,15 @@ static int bmp280_read_raw(struct iio_dev *indio_dev,
 	int ret;
 	struct bmp280_data *data = iio_priv(indio_dev);
 
+	pm_runtime_get_sync(data->dev);
 	mutex_lock(&data->lock);
 
 	switch (mask) {
 	case IIO_CHAN_INFO_PROCESSED:
 		switch (chan->type) {
+		case IIO_HUMIDITYRELATIVE:
+			ret = data->chip_info->read_humid(data, val, val2);
+			break;
 		case IIO_PRESSURE:
 			ret = data->chip_info->read_press(data, val, val2);
 			break;
@@ -349,6 +381,10 @@ static int bmp280_read_raw(struct iio_dev *indio_dev,
 		break;
 	case IIO_CHAN_INFO_OVERSAMPLING_RATIO:
 		switch (chan->type) {
+		case IIO_HUMIDITYRELATIVE:
+			*val = 1 << data->oversampling_humid;
+			ret = IIO_VAL_INT;
+			break;
 		case IIO_PRESSURE:
 			*val = 1 << data->oversampling_press;
 			ret = IIO_VAL_INT;
@@ -368,10 +404,29 @@ static int bmp280_read_raw(struct iio_dev *indio_dev,
 	}
 
 	mutex_unlock(&data->lock);
+	pm_runtime_mark_last_busy(data->dev);
+	pm_runtime_put_autosuspend(data->dev);
 
 	return ret;
 }
 
+static int bmp280_write_oversampling_ratio_humid(struct bmp280_data *data,
+					       int val)
+{
+	int i;
+	const int *avail = data->chip_info->oversampling_humid_avail;
+	const int n = data->chip_info->num_oversampling_humid_avail;
+
+	for (i = 0; i < n; i++) {
+		if (avail[i] == val) {
+			data->oversampling_humid = ilog2(val);
+
+			return data->chip_info->chip_config(data);
+		}
+	}
+	return -EINVAL;
+}
+
 static int bmp280_write_oversampling_ratio_temp(struct bmp280_data *data,
 					       int val)
 {
@@ -415,8 +470,12 @@ static int bmp280_write_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_OVERSAMPLING_RATIO:
+		pm_runtime_get_sync(data->dev);
 		mutex_lock(&data->lock);
 		switch (chan->type) {
+		case IIO_HUMIDITYRELATIVE:
+			ret = bmp280_write_oversampling_ratio_humid(data, val);
+			break;
 		case IIO_PRESSURE:
 			ret = bmp280_write_oversampling_ratio_press(data, val);
 			break;
@@ -428,6 +487,8 @@ static int bmp280_write_raw(struct iio_dev *indio_dev,
 			break;
 		}
 		mutex_unlock(&data->lock);
+		pm_runtime_mark_last_busy(data->dev);
+		pm_runtime_put_autosuspend(data->dev);
 		break;
 	default:
 		return -EINVAL;
@@ -502,7 +563,7 @@ static int bmp280_chip_config(struct bmp280_data *data)
 				 BMP280_MODE_MASK,
 				 osrs | BMP280_MODE_NORMAL);
 	if (ret < 0) {
-		dev_err(&data->client->dev,
+		dev_err(data->dev,
 			"failed to write ctrl_meas register\n");
 		return ret;
 	}
@@ -511,7 +572,7 @@ static int bmp280_chip_config(struct bmp280_data *data)
 				 BMP280_FILTER_MASK,
 				 BMP280_FILTER_4X);
 	if (ret < 0) {
-		dev_err(&data->client->dev,
+		dev_err(data->dev,
 			"failed to write config register\n");
 		return ret;
 	}
@@ -522,8 +583,6 @@ static int bmp280_chip_config(struct bmp280_data *data)
 static const int bmp280_oversampling_avail[] = { 1, 2, 4, 8, 16 };
 
 static const struct bmp280_chip_info bmp280_chip_info = {
-	.regmap_config = &bmp280_regmap_config,
-
 	.oversampling_temp_avail = bmp280_oversampling_avail,
 	.num_oversampling_temp_avail = ARRAY_SIZE(bmp280_oversampling_avail),
 
@@ -535,39 +594,32 @@ static const struct bmp280_chip_info bmp280_chip_info = {
 	.read_press = bmp280_read_press,
 };
 
-static bool bmp180_is_writeable_reg(struct device *dev, unsigned int reg)
+static int bme280_chip_config(struct bmp280_data *data)
 {
-	switch (reg) {
-	case BMP280_REG_CTRL_MEAS:
-	case BMP280_REG_RESET:
-		return true;
-	default:
-		return false;
-	};
-}
+	int ret = bmp280_chip_config(data);
+	u8 osrs = BMP280_OSRS_HUMIDITIY_X(data->oversampling_humid + 1);
 
-static bool bmp180_is_volatile_reg(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case BMP180_REG_OUT_XLSB:
-	case BMP180_REG_OUT_LSB:
-	case BMP180_REG_OUT_MSB:
-	case BMP280_REG_CTRL_MEAS:
-		return true;
-	default:
-		return false;
-	}
+	if (ret < 0)
+		return ret;
+
+	return regmap_update_bits(data->regmap, BMP280_REG_CTRL_HUMIDITY,
+				  BMP280_OSRS_HUMIDITY_MASK, osrs);
 }
 
-static const struct regmap_config bmp180_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
+static const struct bmp280_chip_info bme280_chip_info = {
+	.oversampling_temp_avail = bmp280_oversampling_avail,
+	.num_oversampling_temp_avail = ARRAY_SIZE(bmp280_oversampling_avail),
+
+	.oversampling_press_avail = bmp280_oversampling_avail,
+	.num_oversampling_press_avail = ARRAY_SIZE(bmp280_oversampling_avail),
 
-	.max_register = BMP180_REG_OUT_XLSB,
-	.cache_type = REGCACHE_RBTREE,
+	.oversampling_humid_avail = bmp280_oversampling_avail,
+	.num_oversampling_humid_avail = ARRAY_SIZE(bmp280_oversampling_avail),
 
-	.writeable_reg = bmp180_is_writeable_reg,
-	.volatile_reg = bmp180_is_volatile_reg,
+	.chip_config = bme280_chip_config,
+	.read_temp = bmp280_read_temp,
+	.read_press = bmp280_read_press,
+	.read_humid = bmp280_read_humid,
 };
 
 static int bmp180_measure(struct bmp280_data *data, u8 ctrl_meas)
@@ -577,16 +629,32 @@ static int bmp180_measure(struct bmp280_data *data, u8 ctrl_meas)
 	unsigned int delay_us;
 	unsigned int ctrl;
 
+	if (data->use_eoc)
+		init_completion(&data->done);
+
 	ret = regmap_write(data->regmap, BMP280_REG_CTRL_MEAS, ctrl_meas);
 	if (ret)
 		return ret;
 
-	if (ctrl_meas == BMP180_MEAS_TEMP)
-		delay_us = 4500;
-	else
-		delay_us = conversion_time_max[data->oversampling_press];
-
-	usleep_range(delay_us, delay_us + 1000);
+	if (data->use_eoc) {
+		/*
+		 * If we have a completion interrupt, use it, wait up to
+		 * 100ms. The longest conversion time listed is 76.5 ms for
+		 * advanced resolution mode.
+		 */
+		ret = wait_for_completion_timeout(&data->done,
+						  1 + msecs_to_jiffies(100));
+		if (!ret)
+			dev_err(data->dev, "timeout waiting for completion\n");
+	} else {
+		if (ctrl_meas == BMP180_MEAS_TEMP)
+			delay_us = 4500;
+		else
+			delay_us =
+				conversion_time_max[data->oversampling_press];
+
+		usleep_range(delay_us, delay_us + 1000);
+	}
 
 	ret = regmap_read(data->regmap, BMP280_REG_CTRL_MEAS, &ctrl);
 	if (ret)
@@ -617,26 +685,6 @@ static int bmp180_read_adc_temp(struct bmp280_data *data, int *val)
 	return 0;
 }
 
-/*
- * These enums are used for indexing into the array of calibration
- * coefficients for BMP180.
- */
-enum { AC1, AC2, AC3, AC4, AC5, AC6, B1, B2, MB, MC, MD };
-
-struct bmp180_calib {
-	s16 AC1;
-	s16 AC2;
-	s16 AC3;
-	u16 AC4;
-	u16 AC5;
-	u16 AC6;
-	s16 B1;
-	s16 B2;
-	s16 MB;
-	s16 MC;
-	s16 MD;
-};
-
 static int bmp180_read_calib(struct bmp280_data *data,
 			     struct bmp180_calib *calib)
 {
@@ -656,6 +704,9 @@ static int bmp180_read_calib(struct bmp280_data *data,
 			return -EIO;
 	}
 
+	/* Toss the calibration data into the entropy pool */
+	add_device_randomness(buf, sizeof(buf));
+
 	calib->AC1 = be16_to_cpu(buf[AC1]);
 	calib->AC2 = be16_to_cpu(buf[AC2]);
 	calib->AC3 = be16_to_cpu(buf[AC3]);
@@ -679,19 +730,11 @@ static int bmp180_read_calib(struct bmp280_data *data,
  */
 static s32 bmp180_compensate_temp(struct bmp280_data *data, s32 adc_temp)
 {
-	int ret;
 	s32 x1, x2;
-	struct bmp180_calib calib;
+	struct bmp180_calib *calib = &data->calib;
 
-	ret = bmp180_read_calib(data, &calib);
-	if (ret < 0) {
-		dev_err(&data->client->dev,
-			"failed to read calibration coefficients\n");
-		return ret;
-	}
-
-	x1 = ((adc_temp - calib.AC6) * calib.AC5) >> 15;
-	x2 = (calib.MC << 11) / (x1 + calib.MD);
+	x1 = ((adc_temp - calib->AC6) * calib->AC5) >> 15;
+	x2 = (calib->MC << 11) / (x1 + calib->MD);
 	data->t_fine = x1 + x2;
 
 	return (data->t_fine + 8) >> 4;
@@ -746,29 +789,21 @@ static int bmp180_read_adc_press(struct bmp280_data *data, int *val)
  */
 static u32 bmp180_compensate_press(struct bmp280_data *data, s32 adc_press)
 {
-	int ret;
 	s32 x1, x2, x3, p;
 	s32 b3, b6;
 	u32 b4, b7;
 	s32 oss = data->oversampling_press;
-	struct bmp180_calib calib;
-
-	ret = bmp180_read_calib(data, &calib);
-	if (ret < 0) {
-		dev_err(&data->client->dev,
-			"failed to read calibration coefficients\n");
-		return ret;
-	}
+	struct bmp180_calib *calib = &data->calib;
 
 	b6 = data->t_fine - 4000;
-	x1 = (calib.B2 * (b6 * b6 >> 12)) >> 11;
-	x2 = calib.AC2 * b6 >> 11;
+	x1 = (calib->B2 * (b6 * b6 >> 12)) >> 11;
+	x2 = calib->AC2 * b6 >> 11;
 	x3 = x1 + x2;
-	b3 = ((((s32)calib.AC1 * 4 + x3) << oss) + 2) / 4;
-	x1 = calib.AC3 * b6 >> 13;
-	x2 = (calib.B1 * ((b6 * b6) >> 12)) >> 16;
+	b3 = ((((s32)calib->AC1 * 4 + x3) << oss) + 2) / 4;
+	x1 = calib->AC3 * b6 >> 13;
+	x2 = (calib->B1 * ((b6 * b6) >> 12)) >> 16;
 	x3 = (x1 + x2 + 2) >> 2;
-	b4 = calib.AC4 * (u32)(x3 + 32768) >> 15;
+	b4 = calib->AC4 * (u32)(x3 + 32768) >> 15;
 	b7 = ((u32)adc_press - b3) * (50000 >> oss);
 	if (b7 < 0x80000000)
 		p = (b7 * 2) / b4;
@@ -815,8 +850,6 @@ static const int bmp180_oversampling_temp_avail[] = { 1 };
 static const int bmp180_oversampling_press_avail[] = { 1, 2, 4, 8 };
 
 static const struct bmp280_chip_info bmp180_chip_info = {
-	.regmap_config = &bmp180_regmap_config,
-
 	.oversampling_temp_avail = bmp180_oversampling_temp_avail,
 	.num_oversampling_temp_avail =
 		ARRAY_SIZE(bmp180_oversampling_temp_avail),
@@ -830,92 +863,254 @@ static const struct bmp280_chip_info bmp180_chip_info = {
 	.read_press = bmp180_read_press,
 };
 
-static int bmp280_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static irqreturn_t bmp085_eoc_irq(int irq, void *d)
+{
+	struct bmp280_data *data = d;
+
+	complete(&data->done);
+
+	return IRQ_HANDLED;
+}
+
+static int bmp085_fetch_eoc_irq(struct device *dev,
+				const char *name,
+				int irq,
+				struct bmp280_data *data)
+{
+	unsigned long irq_trig;
+	int ret;
+
+	irq_trig = irqd_get_trigger_type(irq_get_irq_data(irq));
+	if (irq_trig != IRQF_TRIGGER_RISING) {
+		dev_err(dev, "non-rising trigger given for EOC interrupt, "
+			"trying to enforce it\n");
+		irq_trig = IRQF_TRIGGER_RISING;
+	}
+	ret = devm_request_threaded_irq(dev,
+			irq,
+			bmp085_eoc_irq,
+			NULL,
+			irq_trig,
+			name,
+			data);
+	if (ret) {
+		/* Bail out without IRQ but keep the driver in place */
+		dev_err(dev, "unable to request DRDY IRQ\n");
+		return 0;
+	}
+
+	data->use_eoc = true;
+	return 0;
+}
+
+int bmp280_common_probe(struct device *dev,
+			struct regmap *regmap,
+			unsigned int chip,
+			const char *name,
+			int irq)
 {
 	int ret;
 	struct iio_dev *indio_dev;
 	struct bmp280_data *data;
 	unsigned int chip_id;
+	struct gpio_desc *gpiod;
 
-	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
 	if (!indio_dev)
 		return -ENOMEM;
 
 	data = iio_priv(indio_dev);
 	mutex_init(&data->lock);
-	data->client = client;
+	data->dev = dev;
 
-	indio_dev->dev.parent = &client->dev;
-	indio_dev->name = id->name;
+	indio_dev->dev.parent = dev;
+	indio_dev->name = name;
 	indio_dev->channels = bmp280_channels;
-	indio_dev->num_channels = ARRAY_SIZE(bmp280_channels);
 	indio_dev->info = &bmp280_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
-	switch (id->driver_data) {
+	switch (chip) {
 	case BMP180_CHIP_ID:
+		indio_dev->num_channels = 2;
 		data->chip_info = &bmp180_chip_info;
 		data->oversampling_press = ilog2(8);
 		data->oversampling_temp = ilog2(1);
+		data->start_up_time = 10;
 		break;
 	case BMP280_CHIP_ID:
+		indio_dev->num_channels = 2;
 		data->chip_info = &bmp280_chip_info;
 		data->oversampling_press = ilog2(16);
 		data->oversampling_temp = ilog2(2);
+		data->start_up_time = 2;
+		break;
+	case BME280_CHIP_ID:
+		indio_dev->num_channels = 3;
+		data->chip_info = &bme280_chip_info;
+		data->oversampling_press = ilog2(16);
+		data->oversampling_humid = ilog2(16);
+		data->oversampling_temp = ilog2(2);
+		data->start_up_time = 2;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	data->regmap = devm_regmap_init_i2c(client,
-					data->chip_info->regmap_config);
-	if (IS_ERR(data->regmap)) {
-		dev_err(&client->dev, "failed to allocate register map\n");
-		return PTR_ERR(data->regmap);
+	/* Bring up regulators */
+	data->vddd = devm_regulator_get(dev, "vddd");
+	if (IS_ERR(data->vddd)) {
+		dev_err(dev, "failed to get VDDD regulator\n");
+		return PTR_ERR(data->vddd);
+	}
+	ret = regulator_enable(data->vddd);
+	if (ret) {
+		dev_err(dev, "failed to enable VDDD regulator\n");
+		return ret;
+	}
+	data->vdda = devm_regulator_get(dev, "vdda");
+	if (IS_ERR(data->vdda)) {
+		dev_err(dev, "failed to get VDDA regulator\n");
+		ret = PTR_ERR(data->vddd);
+		goto out_disable_vddd;
+	}
+	ret = regulator_enable(data->vdda);
+	if (ret) {
+		dev_err(dev, "failed to enable VDDA regulator\n");
+		goto out_disable_vddd;
+	}
+	/* Wait to make sure we started up properly */
+	mdelay(data->start_up_time);
+
+	/* Bring chip out of reset if there is an assigned GPIO line */
+	gpiod = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	/* Deassert the signal */
+	if (!IS_ERR(gpiod)) {
+		dev_info(dev, "release reset\n");
+		gpiod_set_value(gpiod, 0);
 	}
 
-	ret = regmap_read(data->regmap, BMP280_REG_ID, &chip_id);
+	data->regmap = regmap;
+	ret = regmap_read(regmap, BMP280_REG_ID, &chip_id);
 	if (ret < 0)
-		return ret;
-	if (chip_id != id->driver_data) {
-		dev_err(&client->dev, "bad chip id.  expected %lx got %x\n",
-			id->driver_data, chip_id);
-		return -EINVAL;
+		goto out_disable_vdda;
+	if (chip_id != chip) {
+		dev_err(dev, "bad chip id: expected %x got %x\n",
+			chip, chip_id);
+		ret = -EINVAL;
+		goto out_disable_vdda;
 	}
 
 	ret = data->chip_info->chip_config(data);
 	if (ret < 0)
-		return ret;
+		goto out_disable_vdda;
+
+	dev_set_drvdata(dev, indio_dev);
+
+	/*
+	 * The BMP085 and BMP180 has calibration in an E2PROM, read it out
+	 * at probe time. It will not change.
+	 */
+	if (chip_id  == BMP180_CHIP_ID) {
+		ret = bmp180_read_calib(data, &data->calib);
+		if (ret < 0) {
+			dev_err(data->dev,
+				"failed to read calibration coefficients\n");
+			goto out_disable_vdda;
+		}
+	}
+
+	/*
+	 * Attempt to grab an optional EOC IRQ - only the BMP085 has this
+	 * however as it happens, the BMP085 shares the chip ID of BMP180
+	 * so we look for an IRQ if we have that.
+	 */
+	if (irq > 0 || (chip_id  == BMP180_CHIP_ID)) {
+		ret = bmp085_fetch_eoc_irq(dev, name, irq, data);
+		if (ret)
+			goto out_disable_vdda;
+	}
+
+	/* Enable runtime PM */
+	pm_runtime_get_noresume(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+	/*
+	 * Set autosuspend to two orders of magnitude larger than the
+	 * start-up time.
+	 */
+	pm_runtime_set_autosuspend_delay(dev, data->start_up_time *100);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_put(dev);
+
+	ret = iio_device_register(indio_dev);
+	if (ret)
+		goto out_runtime_pm_disable;
+
 
-	return devm_iio_device_register(&client->dev, indio_dev);
+	return 0;
+
+out_runtime_pm_disable:
+	pm_runtime_get_sync(data->dev);
+	pm_runtime_put_noidle(data->dev);
+	pm_runtime_disable(data->dev);
+out_disable_vdda:
+	regulator_disable(data->vdda);
+out_disable_vddd:
+	regulator_disable(data->vddd);
+	return ret;
 }
+EXPORT_SYMBOL(bmp280_common_probe);
 
-static const struct acpi_device_id bmp280_acpi_match[] = {
-	{"BMP0280", BMP280_CHIP_ID },
-	{"BMP0180", BMP180_CHIP_ID },
-	{"BMP0085", BMP180_CHIP_ID },
-	{ },
-};
-MODULE_DEVICE_TABLE(acpi, bmp280_acpi_match);
+int bmp280_common_remove(struct device *dev)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct bmp280_data *data = iio_priv(indio_dev);
 
-static const struct i2c_device_id bmp280_id[] = {
-	{"bmp280", BMP280_CHIP_ID },
-	{"bmp180", BMP180_CHIP_ID },
-	{"bmp085", BMP180_CHIP_ID },
-	{ },
-};
-MODULE_DEVICE_TABLE(i2c, bmp280_id);
+	iio_device_unregister(indio_dev);
+	pm_runtime_get_sync(data->dev);
+	pm_runtime_put_noidle(data->dev);
+	pm_runtime_disable(data->dev);
+	regulator_disable(data->vdda);
+	regulator_disable(data->vddd);
+	return 0;
+}
+EXPORT_SYMBOL(bmp280_common_remove);
 
-static struct i2c_driver bmp280_driver = {
-	.driver = {
-		.name	= "bmp280",
-		.acpi_match_table = ACPI_PTR(bmp280_acpi_match),
-	},
-	.probe		= bmp280_probe,
-	.id_table	= bmp280_id,
+#ifdef CONFIG_PM
+static int bmp280_runtime_suspend(struct device *dev)
+{
+	struct bmp280_data *data = dev_get_drvdata(dev);
+	int ret;
+
+	ret = regulator_disable(data->vdda);
+	if (ret)
+		return ret;
+	return regulator_disable(data->vddd);
+}
+
+static int bmp280_runtime_resume(struct device *dev)
+{
+	struct bmp280_data *data = dev_get_drvdata(dev);
+	int ret;
+
+	ret = regulator_enable(data->vddd);
+	if (ret)
+		return ret;
+	ret = regulator_enable(data->vdda);
+	if (ret)
+		return ret;
+	msleep(data->start_up_time);
+	return data->chip_info->chip_config(data);
+}
+#endif /* CONFIG_PM */
+
+const struct dev_pm_ops bmp280_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(bmp280_runtime_suspend,
+			   bmp280_runtime_resume, NULL)
 };
-module_i2c_driver(bmp280_driver);
+EXPORT_SYMBOL(bmp280_dev_pm_ops);
 
 MODULE_AUTHOR("Vlad Dogaru <vlad.dogaru@intel.com>");
 MODULE_DESCRIPTION("Driver for Bosch Sensortec BMP180/BMP280 pressure and temperature sensor");
diff --git a/drivers/iio/pressure/bmp280-i2c.c b/drivers/iio/pressure/bmp280-i2c.c
new file mode 100644
index 000000000000..03742b15b72a
--- /dev/null
+++ b/drivers/iio/pressure/bmp280-i2c.c
@@ -0,0 +1,91 @@
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+
+#include "bmp280.h"
+
+static int bmp280_i2c_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	struct regmap *regmap;
+	const struct regmap_config *regmap_config;
+
+	switch (id->driver_data) {
+	case BMP180_CHIP_ID:
+		regmap_config = &bmp180_regmap_config;
+		break;
+	case BMP280_CHIP_ID:
+	case BME280_CHIP_ID:
+		regmap_config = &bmp280_regmap_config;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap = devm_regmap_init_i2c(client, regmap_config);
+	if (IS_ERR(regmap)) {
+		dev_err(&client->dev, "failed to allocate register map\n");
+		return PTR_ERR(regmap);
+	}
+
+	return bmp280_common_probe(&client->dev,
+				   regmap,
+				   id->driver_data,
+				   id->name,
+				   client->irq);
+}
+
+static int bmp280_i2c_remove(struct i2c_client *client)
+{
+	return bmp280_common_remove(&client->dev);
+}
+
+static const struct acpi_device_id bmp280_acpi_i2c_match[] = {
+	{"BMP0280", BMP280_CHIP_ID },
+	{"BMP0180", BMP180_CHIP_ID },
+	{"BMP0085", BMP180_CHIP_ID },
+	{"BME0280", BME280_CHIP_ID },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, bmp280_acpi_i2c_match);
+
+#ifdef CONFIG_OF
+static const struct of_device_id bmp280_of_i2c_match[] = {
+	{ .compatible = "bosch,bme280", .data = (void *)BME280_CHIP_ID },
+	{ .compatible = "bosch,bmp280", .data = (void *)BMP280_CHIP_ID },
+	{ .compatible = "bosch,bmp180", .data = (void *)BMP180_CHIP_ID },
+	{ .compatible = "bosch,bmp085", .data = (void *)BMP180_CHIP_ID },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, bmp280_of_i2c_match);
+#else
+#define bmp280_of_i2c_match NULL
+#endif
+
+static const struct i2c_device_id bmp280_i2c_id[] = {
+	{"bmp280", BMP280_CHIP_ID },
+	{"bmp180", BMP180_CHIP_ID },
+	{"bmp085", BMP180_CHIP_ID },
+	{"bme280", BME280_CHIP_ID },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, bmp280_i2c_id);
+
+static struct i2c_driver bmp280_i2c_driver = {
+	.driver = {
+		.name	= "bmp280",
+		.acpi_match_table = ACPI_PTR(bmp280_acpi_i2c_match),
+		.of_match_table = of_match_ptr(bmp280_of_i2c_match),
+		.pm = &bmp280_dev_pm_ops,
+	},
+	.probe		= bmp280_i2c_probe,
+	.remove		= bmp280_i2c_remove,
+	.id_table	= bmp280_i2c_id,
+};
+module_i2c_driver(bmp280_i2c_driver);
+
+MODULE_AUTHOR("Vlad Dogaru <vlad.dogaru@intel.com>");
+MODULE_DESCRIPTION("Driver for Bosch Sensortec BMP180/BMP280 pressure and temperature sensor");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/pressure/bmp280-regmap.c b/drivers/iio/pressure/bmp280-regmap.c
new file mode 100644
index 000000000000..6807113ec09f
--- /dev/null
+++ b/drivers/iio/pressure/bmp280-regmap.c
@@ -0,0 +1,84 @@
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+#include "bmp280.h"
+
+static bool bmp180_is_writeable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case BMP280_REG_CTRL_MEAS:
+	case BMP280_REG_RESET:
+		return true;
+	default:
+		return false;
+	};
+}
+
+static bool bmp180_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case BMP180_REG_OUT_XLSB:
+	case BMP180_REG_OUT_LSB:
+	case BMP180_REG_OUT_MSB:
+	case BMP280_REG_CTRL_MEAS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+const struct regmap_config bmp180_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = BMP180_REG_OUT_XLSB,
+	.cache_type = REGCACHE_RBTREE,
+
+	.writeable_reg = bmp180_is_writeable_reg,
+	.volatile_reg = bmp180_is_volatile_reg,
+};
+EXPORT_SYMBOL(bmp180_regmap_config);
+
+static bool bmp280_is_writeable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case BMP280_REG_CONFIG:
+	case BMP280_REG_CTRL_HUMIDITY:
+	case BMP280_REG_CTRL_MEAS:
+	case BMP280_REG_RESET:
+		return true;
+	default:
+		return false;
+	};
+}
+
+static bool bmp280_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case BMP280_REG_HUMIDITY_LSB:
+	case BMP280_REG_HUMIDITY_MSB:
+	case BMP280_REG_TEMP_XLSB:
+	case BMP280_REG_TEMP_LSB:
+	case BMP280_REG_TEMP_MSB:
+	case BMP280_REG_PRESS_XLSB:
+	case BMP280_REG_PRESS_LSB:
+	case BMP280_REG_PRESS_MSB:
+	case BMP280_REG_STATUS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+const struct regmap_config bmp280_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = BMP280_REG_HUMIDITY_LSB,
+	.cache_type = REGCACHE_RBTREE,
+
+	.writeable_reg = bmp280_is_writeable_reg,
+	.volatile_reg = bmp280_is_volatile_reg,
+};
+EXPORT_SYMBOL(bmp280_regmap_config);
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
new file mode 100644
index 000000000000..17bc95586f9e
--- /dev/null
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -0,0 +1,125 @@
+/*
+ * SPI interface for the BMP280 driver
+ *
+ * Inspired by the older BMP085 driver drivers/misc/bmp085-spi.c
+ */
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
+
+#include "bmp280.h"
+
+static int bmp280_regmap_spi_write(void *context, const void *data,
+                                   size_t count)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+	u8 buf[2];
+
+	memcpy(buf, data, 2);
+	/*
+	 * The SPI register address (= full register address without bit 7) and
+	 * the write command (bit7 = RW = '0')
+	 */
+	buf[0] &= ~0x80;
+
+	return spi_write_then_read(spi, buf, 2, NULL, 0);
+}
+
+static int bmp280_regmap_spi_read(void *context, const void *reg,
+                                  size_t reg_size, void *val, size_t val_size)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+
+	return spi_write_then_read(spi, reg, reg_size, val, val_size);
+}
+
+static struct regmap_bus bmp280_regmap_bus = {
+	.write = bmp280_regmap_spi_write,
+	.read = bmp280_regmap_spi_read,
+	.reg_format_endian_default = REGMAP_ENDIAN_BIG,
+	.val_format_endian_default = REGMAP_ENDIAN_BIG,
+};
+
+static int bmp280_spi_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	struct regmap *regmap;
+	const struct regmap_config *regmap_config;
+	int ret;
+
+	spi->bits_per_word = 8;
+	ret = spi_setup(spi);
+	if (ret < 0) {
+		dev_err(&spi->dev, "spi_setup failed!\n");
+		return ret;
+	}
+
+	switch (id->driver_data) {
+	case BMP180_CHIP_ID:
+		regmap_config = &bmp180_regmap_config;
+		break;
+	case BMP280_CHIP_ID:
+	case BME280_CHIP_ID:
+		regmap_config = &bmp280_regmap_config;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap = devm_regmap_init(&spi->dev,
+				  &bmp280_regmap_bus,
+				  &spi->dev,
+				  regmap_config);
+	if (IS_ERR(regmap)) {
+		dev_err(&spi->dev, "failed to allocate register map\n");
+		return PTR_ERR(regmap);
+	}
+
+	return bmp280_common_probe(&spi->dev,
+				   regmap,
+				   id->driver_data,
+				   id->name,
+				   spi->irq);
+}
+
+static int bmp280_spi_remove(struct spi_device *spi)
+{
+	return bmp280_common_remove(&spi->dev);
+}
+
+static const struct of_device_id bmp280_of_spi_match[] = {
+	{ .compatible = "bosch,bmp085", },
+	{ .compatible = "bosch,bmp180", },
+	{ .compatible = "bosch,bmp181", },
+	{ .compatible = "bosch,bmp280", },
+	{ .compatible = "bosch,bme280", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, bmp280_of_spi_match);
+
+static const struct spi_device_id bmp280_spi_id[] = {
+	{ "bmp180", BMP180_CHIP_ID },
+	{ "bmp181", BMP180_CHIP_ID },
+	{ "bmp280", BMP280_CHIP_ID },
+	{ "bme280", BME280_CHIP_ID },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, bmp280_spi_id);
+
+static struct spi_driver bmp280_spi_driver = {
+	.driver = {
+		.name = "bmp280",
+		.of_match_table = bmp280_of_spi_match,
+		.pm = &bmp280_dev_pm_ops,
+	},
+	.id_table = bmp280_spi_id,
+	.probe = bmp280_spi_probe,
+	.remove = bmp280_spi_remove,
+};
+module_spi_driver(bmp280_spi_driver);
+
+MODULE_DESCRIPTION("BMP280 SPI bus driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/iio/pressure/bmp280.h b/drivers/iio/pressure/bmp280.h
new file mode 100644
index 000000000000..2c770e13be0e
--- /dev/null
+++ b/drivers/iio/pressure/bmp280.h
@@ -0,0 +1,112 @@
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+/* BMP280 specific registers */
+#define BMP280_REG_HUMIDITY_LSB		0xFE
+#define BMP280_REG_HUMIDITY_MSB		0xFD
+#define BMP280_REG_TEMP_XLSB		0xFC
+#define BMP280_REG_TEMP_LSB		0xFB
+#define BMP280_REG_TEMP_MSB		0xFA
+#define BMP280_REG_PRESS_XLSB		0xF9
+#define BMP280_REG_PRESS_LSB		0xF8
+#define BMP280_REG_PRESS_MSB		0xF7
+
+#define BMP280_REG_CONFIG		0xF5
+#define BMP280_REG_CTRL_MEAS		0xF4
+#define BMP280_REG_STATUS		0xF3
+#define BMP280_REG_CTRL_HUMIDITY	0xF2
+
+/* Due to non linear mapping, and data sizes we can't do a bulk read */
+#define BMP280_REG_COMP_H1		0xA1
+#define BMP280_REG_COMP_H2		0xE1
+#define BMP280_REG_COMP_H3		0xE3
+#define BMP280_REG_COMP_H4		0xE4
+#define BMP280_REG_COMP_H5		0xE5
+#define BMP280_REG_COMP_H6		0xE7
+
+#define BMP280_REG_COMP_TEMP_START	0x88
+#define BMP280_COMP_TEMP_REG_COUNT	6
+
+#define BMP280_REG_COMP_PRESS_START	0x8E
+#define BMP280_COMP_PRESS_REG_COUNT	18
+
+#define BMP280_FILTER_MASK		(BIT(4) | BIT(3) | BIT(2))
+#define BMP280_FILTER_OFF		0
+#define BMP280_FILTER_2X		BIT(2)
+#define BMP280_FILTER_4X		BIT(3)
+#define BMP280_FILTER_8X		(BIT(3) | BIT(2))
+#define BMP280_FILTER_16X		BIT(4)
+
+#define BMP280_OSRS_HUMIDITY_MASK	(BIT(2) | BIT(1) | BIT(0))
+#define BMP280_OSRS_HUMIDITIY_X(osrs_h)	((osrs_h) << 0)
+#define BMP280_OSRS_HUMIDITY_SKIP	0
+#define BMP280_OSRS_HUMIDITY_1X		BMP280_OSRS_HUMIDITIY_X(1)
+#define BMP280_OSRS_HUMIDITY_2X		BMP280_OSRS_HUMIDITIY_X(2)
+#define BMP280_OSRS_HUMIDITY_4X		BMP280_OSRS_HUMIDITIY_X(3)
+#define BMP280_OSRS_HUMIDITY_8X		BMP280_OSRS_HUMIDITIY_X(4)
+#define BMP280_OSRS_HUMIDITY_16X	BMP280_OSRS_HUMIDITIY_X(5)
+
+#define BMP280_OSRS_TEMP_MASK		(BIT(7) | BIT(6) | BIT(5))
+#define BMP280_OSRS_TEMP_SKIP		0
+#define BMP280_OSRS_TEMP_X(osrs_t)	((osrs_t) << 5)
+#define BMP280_OSRS_TEMP_1X		BMP280_OSRS_TEMP_X(1)
+#define BMP280_OSRS_TEMP_2X		BMP280_OSRS_TEMP_X(2)
+#define BMP280_OSRS_TEMP_4X		BMP280_OSRS_TEMP_X(3)
+#define BMP280_OSRS_TEMP_8X		BMP280_OSRS_TEMP_X(4)
+#define BMP280_OSRS_TEMP_16X		BMP280_OSRS_TEMP_X(5)
+
+#define BMP280_OSRS_PRESS_MASK		(BIT(4) | BIT(3) | BIT(2))
+#define BMP280_OSRS_PRESS_SKIP		0
+#define BMP280_OSRS_PRESS_X(osrs_p)	((osrs_p) << 2)
+#define BMP280_OSRS_PRESS_1X		BMP280_OSRS_PRESS_X(1)
+#define BMP280_OSRS_PRESS_2X		BMP280_OSRS_PRESS_X(2)
+#define BMP280_OSRS_PRESS_4X		BMP280_OSRS_PRESS_X(3)
+#define BMP280_OSRS_PRESS_8X		BMP280_OSRS_PRESS_X(4)
+#define BMP280_OSRS_PRESS_16X		BMP280_OSRS_PRESS_X(5)
+
+#define BMP280_MODE_MASK		(BIT(1) | BIT(0))
+#define BMP280_MODE_SLEEP		0
+#define BMP280_MODE_FORCED		BIT(0)
+#define BMP280_MODE_NORMAL		(BIT(1) | BIT(0))
+
+/* BMP180 specific registers */
+#define BMP180_REG_OUT_XLSB		0xF8
+#define BMP180_REG_OUT_LSB		0xF7
+#define BMP180_REG_OUT_MSB		0xF6
+
+#define BMP180_REG_CALIB_START		0xAA
+#define BMP180_REG_CALIB_COUNT		22
+
+#define BMP180_MEAS_SCO			BIT(5)
+#define BMP180_MEAS_TEMP		(0x0E | BMP180_MEAS_SCO)
+#define BMP180_MEAS_PRESS_X(oss)	((oss) << 6 | 0x14 | BMP180_MEAS_SCO)
+#define BMP180_MEAS_PRESS_1X		BMP180_MEAS_PRESS_X(0)
+#define BMP180_MEAS_PRESS_2X		BMP180_MEAS_PRESS_X(1)
+#define BMP180_MEAS_PRESS_4X		BMP180_MEAS_PRESS_X(2)
+#define BMP180_MEAS_PRESS_8X		BMP180_MEAS_PRESS_X(3)
+
+/* BMP180 and BMP280 common registers */
+#define BMP280_REG_CTRL_MEAS		0xF4
+#define BMP280_REG_RESET		0xE0
+#define BMP280_REG_ID			0xD0
+
+#define BMP180_CHIP_ID			0x55
+#define BMP280_CHIP_ID			0x58
+#define BME280_CHIP_ID			0x60
+#define BMP280_SOFT_RESET_VAL		0xB6
+
+/* Regmap configurations */
+extern const struct regmap_config bmp180_regmap_config;
+extern const struct regmap_config bmp280_regmap_config;
+
+/* Probe called from different transports */
+int bmp280_common_probe(struct device *dev,
+			struct regmap *regmap,
+			unsigned int chip,
+			const char *name,
+			int irq);
+int bmp280_common_remove(struct device *dev);
+
+/* PM ops */
+extern const struct dev_pm_ops bmp280_dev_pm_ops;
diff --git a/drivers/iio/pressure/hp206c.c b/drivers/iio/pressure/hp206c.c
index 90f2b6e4a920..12f769e86355 100644
--- a/drivers/iio/pressure/hp206c.c
+++ b/drivers/iio/pressure/hp206c.c
@@ -401,6 +401,7 @@ static const struct i2c_device_id hp206c_id[] = {
 	{"hp206c"},
 	{}
 };
+MODULE_DEVICE_TABLE(i2c, hp206c_id);
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id hp206c_acpi_match[] = {
diff --git a/drivers/iio/pressure/mpl3115.c b/drivers/iio/pressure/mpl3115.c
index 01b2e0b18878..6392d7b62841 100644
--- a/drivers/iio/pressure/mpl3115.c
+++ b/drivers/iio/pressure/mpl3115.c
@@ -171,7 +171,7 @@ static irqreturn_t mpl3115_trigger_handler(int irq, void *p)
 	mutex_unlock(&data->lock);
 
 	iio_push_to_buffers_with_timestamp(indio_dev, buffer,
-		iio_get_time_ns());
+		iio_get_time_ns(indio_dev));
 
 done:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c
index 76578b07bb6e..feb41f82c64a 100644
--- a/drivers/iio/pressure/ms5611_core.c
+++ b/drivers/iio/pressure/ms5611_core.c
@@ -224,7 +224,8 @@ static irqreturn_t ms5611_trigger_handler(int irq, void *p)
 	if (ret < 0)
 		goto err;
 
-	iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buf,
+					   iio_get_time_ns(indio_dev));
 
 err:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/pressure/ms5637.c b/drivers/iio/pressure/ms5637.c
index e68052c118e6..953ffbc0ef96 100644
--- a/drivers/iio/pressure/ms5637.c
+++ b/drivers/iio/pressure/ms5637.c
@@ -1,6 +1,6 @@
 /*
- * ms5637.c - Support for Measurement-Specialties ms5637 and ms8607
- *            pressure & temperature sensor
+ * ms5637.c - Support for Measurement-Specialties MS5637, MS5805
+ *            MS5837 and MS8607 pressure & temperature sensor
  *
  * Copyright (c) 2015 Measurement-Specialties
  *
@@ -11,6 +11,10 @@
  * Datasheet:
  *  http://www.meas-spec.com/downloads/MS5637-02BA03.pdf
  * Datasheet:
+ *  http://www.meas-spec.com/downloads/MS5805-02BA01.pdf
+ * Datasheet:
+ *  http://www.meas-spec.com/downloads/MS5837-30BA.pdf
+ * Datasheet:
  *  http://www.meas-spec.com/downloads/MS8607-02BA01.pdf
  */
 
@@ -170,9 +174,12 @@ static int ms5637_probe(struct i2c_client *client,
 
 static const struct i2c_device_id ms5637_id[] = {
 	{"ms5637", 0},
-	{"ms8607-temppressure", 1},
+	{"ms5805", 0},
+	{"ms5837", 0},
+	{"ms8607-temppressure", 0},
 	{}
 };
+MODULE_DEVICE_TABLE(i2c, ms5637_id);
 
 static struct i2c_driver ms5637_driver = {
 	.probe = ms5637_probe,
diff --git a/drivers/iio/pressure/st_pressure.h b/drivers/iio/pressure/st_pressure.h
index f5f41490060b..903a21e46874 100644
--- a/drivers/iio/pressure/st_pressure.h
+++ b/drivers/iio/pressure/st_pressure.h
@@ -17,6 +17,7 @@
 #define LPS001WP_PRESS_DEV_NAME		"lps001wp"
 #define LPS25H_PRESS_DEV_NAME		"lps25h"
 #define LPS331AP_PRESS_DEV_NAME		"lps331ap"
+#define LPS22HB_PRESS_DEV_NAME		"lps22hb"
 
 /**
  * struct st_sensors_platform_data - default press platform data
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 92a118c3c4ac..55df9a75eb3a 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -28,6 +28,72 @@
 #include <linux/iio/common/st_sensors.h>
 #include "st_pressure.h"
 
+/*
+ * About determining pressure scaling factors
+ * ------------------------------------------
+ *
+ * Datasheets specify typical pressure sensitivity so that pressure is computed
+ * according to the following equation :
+ *     pressure[mBar] = raw / sensitivity
+ * where :
+ *     raw          the 24 bits long raw sampled pressure
+ *     sensitivity  a scaling factor specified by the datasheet in LSB/mBar
+ *
+ * IIO ABI expects pressure to be expressed as kPascal, hence pressure should be
+ * computed according to :
+ *     pressure[kPascal] = pressure[mBar] / 10
+ *                       = raw / (sensitivity * 10)                          (1)
+ *
+ * Finally, st_press_read_raw() returns pressure scaling factor as an
+ * IIO_VAL_INT_PLUS_NANO with a zero integral part and "gain" as decimal part.
+ * Therefore, from (1), "gain" becomes :
+ *     gain = 10^9 / (sensitivity * 10)
+ *          = 10^8 / sensitivity
+ *
+ * About determining temperature scaling factors and offsets
+ * ---------------------------------------------------------
+ *
+ * Datasheets specify typical temperature sensitivity and offset so that
+ * temperature is computed according to the following equation :
+ *     temp[Celsius] = offset[Celsius] + (raw / sensitivity)
+ * where :
+ *     raw          the 16 bits long raw sampled temperature
+ *     offset       a constant specified by the datasheet in degree Celsius
+ *                  (sometimes zero)
+ *     sensitivity  a scaling factor specified by the datasheet in LSB/Celsius
+ *
+ * IIO ABI expects temperature to be expressed as milli degree Celsius such as
+ * user space should compute temperature according to :
+ *     temp[mCelsius] = temp[Celsius] * 10^3
+ *                    = (offset[Celsius] + (raw / sensitivity)) * 10^3
+ *                    = ((offset[Celsius] * sensitivity) + raw) *
+ *                      (10^3 / sensitivity)                                 (2)
+ *
+ * IIO ABI expects user space to apply offset and scaling factors to raw samples
+ * according to :
+ *     temp[mCelsius] = (OFFSET + raw) * SCALE
+ * where :
+ *     OFFSET an arbitrary constant exposed by device
+ *     SCALE  an arbitrary scaling factor exposed by device
+ *
+ * Matching OFFSET and SCALE with members of (2) gives :
+ *     OFFSET = offset[Celsius] * sensitivity                                (3)
+ *     SCALE  = 10^3 / sensitivity                                           (4)
+ *
+ * st_press_read_raw() returns temperature scaling factor as an
+ * IIO_VAL_FRACTIONAL with a 10^3 numerator and "gain2" as denominator.
+ * Therefore, from (3), "gain2" becomes :
+ *     gain2 = sensitivity
+ *
+ * When declared within channel, i.e. for a non zero specified offset,
+ * st_press_read_raw() will return the latter as an IIO_VAL_FRACTIONAL such as :
+ *     numerator = OFFSET * 10^3
+ *     denominator = 10^3
+ * giving from (4):
+ *     numerator = offset[Celsius] * 10^3 * sensitivity
+ *               = offset[mCelsius] * gain2
+ */
+
 #define MCELSIUS_PER_CELSIUS			1000
 
 /* Default pressure sensitivity */
@@ -39,8 +105,6 @@
 #define ST_PRESS_LSB_PER_CELSIUS		480UL
 #define ST_PRESS_MILLI_CELSIUS_OFFSET		42500UL
 
-#define ST_PRESS_NUMBER_DATA_CHANNELS		1
-
 /* FULLSCALE */
 #define ST_PRESS_FS_AVL_1100MB			1100
 #define ST_PRESS_FS_AVL_1260MB			1260
@@ -48,7 +112,11 @@
 #define ST_PRESS_1_OUT_XL_ADDR			0x28
 #define ST_TEMP_1_OUT_L_ADDR			0x2b
 
-/* CUSTOM VALUES FOR LPS331AP SENSOR */
+/*
+ * CUSTOM VALUES FOR LPS331AP SENSOR
+ * See LPS331AP datasheet:
+ * http://www2.st.com/resource/en/datasheet/lps331ap.pdf
+ */
 #define ST_PRESS_LPS331AP_WAI_EXP		0xbb
 #define ST_PRESS_LPS331AP_ODR_ADDR		0x20
 #define ST_PRESS_LPS331AP_ODR_MASK		0x70
@@ -71,7 +139,9 @@
 #define ST_PRESS_LPS331AP_OD_IRQ_MASK		0x40
 #define ST_PRESS_LPS331AP_MULTIREAD_BIT		true
 
-/* CUSTOM VALUES FOR LPS001WP SENSOR */
+/*
+ * CUSTOM VALUES FOR THE OBSOLETE LPS001WP SENSOR
+ */
 
 /* LPS001WP pressure resolution */
 #define ST_PRESS_LPS001WP_LSB_PER_MBAR		16UL
@@ -94,7 +164,11 @@
 #define ST_PRESS_LPS001WP_OUT_L_ADDR		0x28
 #define ST_TEMP_LPS001WP_OUT_L_ADDR		0x2a
 
-/* CUSTOM VALUES FOR LPS25H SENSOR */
+/*
+ * CUSTOM VALUES FOR LPS25H SENSOR
+ * See LPS25H datasheet:
+ * http://www2.st.com/resource/en/datasheet/lps25h.pdf
+ */
 #define ST_PRESS_LPS25H_WAI_EXP			0xbd
 #define ST_PRESS_LPS25H_ODR_ADDR		0x20
 #define ST_PRESS_LPS25H_ODR_MASK		0x70
@@ -117,27 +191,54 @@
 #define ST_PRESS_LPS25H_OUT_XL_ADDR		0x28
 #define ST_TEMP_LPS25H_OUT_L_ADDR		0x2b
 
+/*
+ * CUSTOM VALUES FOR LPS22HB SENSOR
+ * See LPS22HB datasheet:
+ * http://www2.st.com/resource/en/datasheet/lps22hb.pdf
+ */
+
+/* LPS22HB temperature sensitivity */
+#define ST_PRESS_LPS22HB_LSB_PER_CELSIUS	100UL
+
+#define ST_PRESS_LPS22HB_WAI_EXP		0xb1
+#define ST_PRESS_LPS22HB_ODR_ADDR		0x10
+#define ST_PRESS_LPS22HB_ODR_MASK		0x70
+#define ST_PRESS_LPS22HB_ODR_AVL_1HZ_VAL	0x01
+#define ST_PRESS_LPS22HB_ODR_AVL_10HZ_VAL	0x02
+#define ST_PRESS_LPS22HB_ODR_AVL_25HZ_VAL	0x03
+#define ST_PRESS_LPS22HB_ODR_AVL_50HZ_VAL	0x04
+#define ST_PRESS_LPS22HB_ODR_AVL_75HZ_VAL	0x05
+#define ST_PRESS_LPS22HB_PW_ADDR		0x10
+#define ST_PRESS_LPS22HB_PW_MASK		0x70
+#define ST_PRESS_LPS22HB_BDU_ADDR		0x10
+#define ST_PRESS_LPS22HB_BDU_MASK		0x02
+#define ST_PRESS_LPS22HB_DRDY_IRQ_ADDR		0x12
+#define ST_PRESS_LPS22HB_DRDY_IRQ_INT1_MASK	0x04
+#define ST_PRESS_LPS22HB_DRDY_IRQ_INT2_MASK	0x08
+#define ST_PRESS_LPS22HB_IHL_IRQ_ADDR		0x12
+#define ST_PRESS_LPS22HB_IHL_IRQ_MASK		0x80
+#define ST_PRESS_LPS22HB_OD_IRQ_ADDR		0x12
+#define ST_PRESS_LPS22HB_OD_IRQ_MASK		0x40
+#define ST_PRESS_LPS22HB_MULTIREAD_BIT		true
+
 static const struct iio_chan_spec st_press_1_channels[] = {
 	{
 		.type = IIO_PRESSURE,
-		.channel2 = IIO_NO_MOD,
 		.address = ST_PRESS_1_OUT_XL_ADDR,
-		.scan_index = ST_SENSORS_SCAN_X,
+		.scan_index = 0,
 		.scan_type = {
 			.sign = 'u',
 			.realbits = 24,
-			.storagebits = 24,
+			.storagebits = 32,
 			.endianness = IIO_LE,
 		},
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
-		.modified = 0,
 	},
 	{
 		.type = IIO_TEMP,
-		.channel2 = IIO_NO_MOD,
 		.address = ST_TEMP_1_OUT_L_ADDR,
-		.scan_index = -1,
+		.scan_index = 1,
 		.scan_type = {
 			.sign = 'u',
 			.realbits = 16,
@@ -148,17 +249,15 @@ static const struct iio_chan_spec st_press_1_channels[] = {
 			BIT(IIO_CHAN_INFO_RAW) |
 			BIT(IIO_CHAN_INFO_SCALE) |
 			BIT(IIO_CHAN_INFO_OFFSET),
-		.modified = 0,
 	},
-	IIO_CHAN_SOFT_TIMESTAMP(1)
+	IIO_CHAN_SOFT_TIMESTAMP(2)
 };
 
 static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 	{
 		.type = IIO_PRESSURE,
-		.channel2 = IIO_NO_MOD,
 		.address = ST_PRESS_LPS001WP_OUT_L_ADDR,
-		.scan_index = ST_SENSORS_SCAN_X,
+		.scan_index = 0,
 		.scan_type = {
 			.sign = 'u',
 			.realbits = 16,
@@ -168,13 +267,11 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
 			BIT(IIO_CHAN_INFO_SCALE),
-		.modified = 0,
 	},
 	{
 		.type = IIO_TEMP,
-		.channel2 = IIO_NO_MOD,
 		.address = ST_TEMP_LPS001WP_OUT_L_ADDR,
-		.scan_index = -1,
+		.scan_index = 1,
 		.scan_type = {
 			.sign = 'u',
 			.realbits = 16,
@@ -184,9 +281,42 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
 			BIT(IIO_CHAN_INFO_SCALE),
-		.modified = 0,
 	},
-	IIO_CHAN_SOFT_TIMESTAMP(1)
+	IIO_CHAN_SOFT_TIMESTAMP(2)
+};
+
+static const struct iio_chan_spec st_press_lps22hb_channels[] = {
+	{
+		.type = IIO_PRESSURE,
+		.address = ST_PRESS_1_OUT_XL_ADDR,
+		.scan_index = 0,
+		.scan_type = {
+			.sign = 'u',
+			.realbits = 24,
+			.storagebits = 32,
+			.endianness = IIO_LE,
+		},
+		.info_mask_separate =
+			BIT(IIO_CHAN_INFO_RAW) |
+			BIT(IIO_CHAN_INFO_SCALE),
+		.info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ),
+	},
+	{
+		.type = IIO_TEMP,
+		.address = ST_TEMP_1_OUT_L_ADDR,
+		.scan_index = 1,
+		.scan_type = {
+			.sign = 's',
+			.realbits = 16,
+			.storagebits = 16,
+			.endianness = IIO_LE,
+		},
+		.info_mask_separate =
+			BIT(IIO_CHAN_INFO_RAW) |
+			BIT(IIO_CHAN_INFO_SCALE),
+		.info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ),
+	},
+	IIO_CHAN_SOFT_TIMESTAMP(2)
 };
 
 static const struct st_sensor_settings st_press_sensors_settings[] = {
@@ -346,6 +476,59 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 		.multi_read_bit = ST_PRESS_LPS25H_MULTIREAD_BIT,
 		.bootime = 2,
 	},
+	{
+		.wai = ST_PRESS_LPS22HB_WAI_EXP,
+		.wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS,
+		.sensors_supported = {
+			[0] = LPS22HB_PRESS_DEV_NAME,
+		},
+		.ch = (struct iio_chan_spec *)st_press_lps22hb_channels,
+		.num_ch = ARRAY_SIZE(st_press_lps22hb_channels),
+		.odr = {
+			.addr = ST_PRESS_LPS22HB_ODR_ADDR,
+			.mask = ST_PRESS_LPS22HB_ODR_MASK,
+			.odr_avl = {
+				{ 1, ST_PRESS_LPS22HB_ODR_AVL_1HZ_VAL, },
+				{ 10, ST_PRESS_LPS22HB_ODR_AVL_10HZ_VAL, },
+				{ 25, ST_PRESS_LPS22HB_ODR_AVL_25HZ_VAL, },
+				{ 50, ST_PRESS_LPS22HB_ODR_AVL_50HZ_VAL, },
+				{ 75, ST_PRESS_LPS22HB_ODR_AVL_75HZ_VAL, },
+			},
+		},
+		.pw = {
+			.addr = ST_PRESS_LPS22HB_PW_ADDR,
+			.mask = ST_PRESS_LPS22HB_PW_MASK,
+			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
+		},
+		.fs = {
+			.fs_avl = {
+				/*
+				 * Pressure and temperature sensitivity values
+				 * as defined in table 3 of LPS22HB datasheet.
+				 */
+				[0] = {
+					.num = ST_PRESS_FS_AVL_1260MB,
+					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
+					.gain2 = ST_PRESS_LPS22HB_LSB_PER_CELSIUS,
+				},
+			},
+		},
+		.bdu = {
+			.addr = ST_PRESS_LPS22HB_BDU_ADDR,
+			.mask = ST_PRESS_LPS22HB_BDU_MASK,
+		},
+		.drdy_irq = {
+			.addr = ST_PRESS_LPS22HB_DRDY_IRQ_ADDR,
+			.mask_int1 = ST_PRESS_LPS22HB_DRDY_IRQ_INT1_MASK,
+			.mask_int2 = ST_PRESS_LPS22HB_DRDY_IRQ_INT2_MASK,
+			.addr_ihl = ST_PRESS_LPS22HB_IHL_IRQ_ADDR,
+			.mask_ihl = ST_PRESS_LPS22HB_IHL_IRQ_MASK,
+			.addr_od = ST_PRESS_LPS22HB_OD_IRQ_ADDR,
+			.mask_od = ST_PRESS_LPS22HB_OD_IRQ_MASK,
+			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
+		},
+		.multi_read_bit = ST_PRESS_LPS22HB_MULTIREAD_BIT,
+	},
 };
 
 static int st_press_write_raw(struct iio_dev *indio_dev,
@@ -462,23 +645,30 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 	indio_dev->info = &press_info;
 	mutex_init(&press_data->tb.buf_lock);
 
-	st_sensors_power_enable(indio_dev);
+	err = st_sensors_power_enable(indio_dev);
+	if (err)
+		return err;
 
 	err = st_sensors_check_device_support(indio_dev,
 					ARRAY_SIZE(st_press_sensors_settings),
 					st_press_sensors_settings);
 	if (err < 0)
-		return err;
-
-	press_data->num_data_channels = ST_PRESS_NUMBER_DATA_CHANNELS;
+		goto st_press_power_off;
+
+	/*
+	 * Skip timestamping channel while declaring available channels to
+	 * common st_sensor layer. Look at st_sensors_get_buffer_element() to
+	 * see how timestamps are explicitly pushed as last samples block
+	 * element.
+	 */
+	press_data->num_data_channels = press_data->sensor_settings->num_ch - 1;
 	press_data->multiread_bit = press_data->sensor_settings->multi_read_bit;
 	indio_dev->channels = press_data->sensor_settings->ch;
 	indio_dev->num_channels = press_data->sensor_settings->num_ch;
 
-	if (press_data->sensor_settings->fs.addr != 0)
-		press_data->current_fullscale =
-			(struct st_sensor_fullscale_avl *)
-				&press_data->sensor_settings->fs.fs_avl[0];
+	press_data->current_fullscale =
+		(struct st_sensor_fullscale_avl *)
+			&press_data->sensor_settings->fs.fs_avl[0];
 
 	press_data->odr = press_data->sensor_settings->odr.odr_avl[0].hz;
 
@@ -490,11 +680,11 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 
 	err = st_sensors_init_sensor(indio_dev, press_data->dev->platform_data);
 	if (err < 0)
-		return err;
+		goto st_press_power_off;
 
 	err = st_press_allocate_ring(indio_dev);
 	if (err < 0)
-		return err;
+		goto st_press_power_off;
 
 	if (irq > 0) {
 		err = st_sensors_allocate_trigger(indio_dev,
@@ -517,6 +707,8 @@ st_press_device_register_error:
 		st_sensors_deallocate_trigger(indio_dev);
 st_press_probe_trigger_error:
 	st_press_deallocate_ring(indio_dev);
+st_press_power_off:
+	st_sensors_power_disable(indio_dev);
 
 	return err;
 }
diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c
index 8fcf9766eaec..ed18701c68c9 100644
--- a/drivers/iio/pressure/st_pressure_i2c.c
+++ b/drivers/iio/pressure/st_pressure_i2c.c
@@ -32,6 +32,10 @@ static const struct of_device_id st_press_of_match[] = {
 		.compatible = "st,lps331ap-press",
 		.data = LPS331AP_PRESS_DEV_NAME,
 	},
+	{
+		.compatible = "st,lps22hb-press",
+		.data = LPS22HB_PRESS_DEV_NAME,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_press_of_match);
diff --git a/drivers/iio/pressure/st_pressure_spi.c b/drivers/iio/pressure/st_pressure_spi.c
index 40c0692ff1de..550508025af1 100644
--- a/drivers/iio/pressure/st_pressure_spi.c
+++ b/drivers/iio/pressure/st_pressure_spi.c
@@ -50,6 +50,7 @@ static const struct spi_device_id st_press_id_table[] = {
 	{ LPS001WP_PRESS_DEV_NAME },
 	{ LPS25H_PRESS_DEV_NAME },
 	{ LPS331AP_PRESS_DEV_NAME },
+	{ LPS22HB_PRESS_DEV_NAME },
 	{},
 };
 MODULE_DEVICE_TABLE(spi, st_press_id_table);
diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index e2f926cdcad2..2e3a70e1b245 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c
@@ -231,10 +231,16 @@ static void as3935_event_work(struct work_struct *work)
 {
 	struct as3935_state *st;
 	int val;
+	int ret;
 
 	st = container_of(work, struct as3935_state, work.work);
 
-	as3935_read(st, AS3935_INT, &val);
+	ret = as3935_read(st, AS3935_INT, &val);
+	if (ret) {
+		dev_warn(&st->spi->dev, "read error\n");
+		return;
+	}
+
 	val &= AS3935_INT_MASK;
 
 	switch (val) {
@@ -242,7 +248,7 @@ static void as3935_event_work(struct work_struct *work)
 		iio_trigger_poll(st->trig);
 		break;
 	case AS3935_NOISE_INT:
-		dev_warn(&st->spi->dev, "noise level is too high");
+		dev_warn(&st->spi->dev, "noise level is too high\n");
 		break;
 	}
 }
@@ -346,7 +352,6 @@ static int as3935_probe(struct spi_device *spi)
 
 	st = iio_priv(indio_dev);
 	st->spi = spi;
-	st->tune_cap = 0;
 
 	spi_set_drvdata(spi, indio_dev);
 	mutex_init(&st->lock);
@@ -468,4 +473,3 @@ module_spi_driver(as3935_driver);
 MODULE_AUTHOR("Matt Ranostay <mranostay@gmail.com>");
 MODULE_DESCRIPTION("AS3935 lightning sensor");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("spi:as3935");
diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
index 4f502386aa86..3141c3c161bb 100644
--- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
+++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
@@ -203,22 +203,19 @@ static int lidar_read_raw(struct iio_dev *indio_dev,
 	struct lidar_data *data = iio_priv(indio_dev);
 	int ret = -EINVAL;
 
-	mutex_lock(&indio_dev->mlock);
-
-	if (iio_buffer_enabled(indio_dev) && mask == IIO_CHAN_INFO_RAW) {
-		ret = -EBUSY;
-		goto error_busy;
-	}
-
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW: {
 		u16 reg;
 
+		if (iio_device_claim_direct_mode(indio_dev))
+			return -EBUSY;
+
 		ret = lidar_get_measurement(data, &reg);
 		if (!ret) {
 			*val = reg;
 			ret = IIO_VAL_INT;
 		}
+		iio_device_release_direct_mode(indio_dev);
 		break;
 	}
 	case IIO_CHAN_INFO_SCALE:
@@ -228,9 +225,6 @@ static int lidar_read_raw(struct iio_dev *indio_dev,
 		break;
 	}
 
-error_busy:
-	mutex_unlock(&indio_dev->mlock);
-
 	return ret;
 }
 
@@ -244,7 +238,7 @@ static irqreturn_t lidar_trigger_handler(int irq, void *private)
 	ret = lidar_get_measurement(data, data->buffer);
 	if (!ret) {
 		iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
-						   iio_get_time_ns());
+						   iio_get_time_ns(indio_dev));
 	} else if (ret != -EINVAL) {
 		dev_err(&data->client->dev, "cannot read LIDAR measurement");
 	}
diff --git a/drivers/iio/proximity/sx9500.c b/drivers/iio/proximity/sx9500.c
index 66cd09a18786..1d74b3aafeed 100644
--- a/drivers/iio/proximity/sx9500.c
+++ b/drivers/iio/proximity/sx9500.c
@@ -492,7 +492,7 @@ static void sx9500_push_events(struct iio_dev *indio_dev)
 		dir = new_prox ? IIO_EV_DIR_FALLING : IIO_EV_DIR_RISING;
 		ev = IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, chan,
 					  IIO_EV_TYPE_THRESH, dir);
-		iio_push_event(indio_dev, ev, iio_get_time_ns());
+		iio_push_event(indio_dev, ev, iio_get_time_ns(indio_dev));
 		data->prox_stat[chan] = new_prox;
 	}
 }
@@ -669,7 +669,7 @@ static irqreturn_t sx9500_trigger_handler(int irq, void *private)
 	}
 
 	iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
-					   iio_get_time_ns());
+					   iio_get_time_ns(indio_dev));
 
 out:
 	mutex_unlock(&data->mutex);
diff --git a/drivers/iio/temperature/tsys02d.c b/drivers/iio/temperature/tsys02d.c
index ab6fe8f6f2d1..c0a19a000387 100644
--- a/drivers/iio/temperature/tsys02d.c
+++ b/drivers/iio/temperature/tsys02d.c
@@ -174,6 +174,7 @@ static const struct i2c_device_id tsys02d_id[] = {
 	{"tsys02d", 0},
 	{}
 };
+MODULE_DEVICE_TABLE(i2c, tsys02d_id);
 
 static struct i2c_driver tsys02d_driver = {
 	.probe = tsys02d_probe,
diff --git a/drivers/iio/trigger/Kconfig b/drivers/iio/trigger/Kconfig
index 519e6772f6f5..809b2e7d58fa 100644
--- a/drivers/iio/trigger/Kconfig
+++ b/drivers/iio/trigger/Kconfig
@@ -24,6 +24,18 @@ config IIO_INTERRUPT_TRIGGER
 	  To compile this driver as a module, choose M here: the
 	  module will be called iio-trig-interrupt.
 
+config IIO_TIGHTLOOP_TRIGGER
+	tristate "A kthread based hammering loop trigger"
+	depends on IIO_SW_TRIGGER
+	help
+	  An experimental trigger, used to allow sensors to be sampled as fast
+	  as possible under the limitations of whatever else is going on.
+	  Uses a tight loop in a kthread.  Will only work with lower half only
+	  trigger consumers.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called iio-trig-loop.
+
 config IIO_SYSFS_TRIGGER
 	tristate "SYSFS trigger"
 	depends on SYSFS
diff --git a/drivers/iio/trigger/Makefile b/drivers/iio/trigger/Makefile
index fe06eb564367..aab4dc23303d 100644
--- a/drivers/iio/trigger/Makefile
+++ b/drivers/iio/trigger/Makefile
@@ -7,3 +7,4 @@
 obj-$(CONFIG_IIO_HRTIMER_TRIGGER) += iio-trig-hrtimer.o
 obj-$(CONFIG_IIO_INTERRUPT_TRIGGER) += iio-trig-interrupt.o
 obj-$(CONFIG_IIO_SYSFS_TRIGGER) += iio-trig-sysfs.o
+obj-$(CONFIG_IIO_TIGHTLOOP_TRIGGER) += iio-trig-loop.o
diff --git a/drivers/iio/trigger/iio-trig-loop.c b/drivers/iio/trigger/iio-trig-loop.c
new file mode 100644
index 000000000000..dc6be28f96fe
--- /dev/null
+++ b/drivers/iio/trigger/iio-trig-loop.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2016 Jonathan Cameron <jic23@kernel.org>
+ *
+ * Licensed under the GPL-2.
+ *
+ * Based on a mashup of the hrtimer trigger and continuous sampling proposal of
+ * Gregor Boirie <gregor.boirie@parrot.com>
+ *
+ * Note this is still rather experimental and may eat babies.
+ *
+ * Todo
+ * * Protect against connection of devices that 'need' the top half
+ *   handler.
+ * * Work out how to run top half handlers in this context if it is
+ *   safe to do so (timestamp grabbing for example)
+ *
+ * Tested against a max1363. Used about 33% cpu for the thread and 20%
+ * for generic_buffer piping to /dev/null. Watermark set at 64 on a 128
+ * element kfifo buffer.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/irq_work.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/sw_trigger.h>
+
+struct iio_loop_info {
+	struct iio_sw_trigger swt;
+	struct task_struct *task;
+};
+
+static struct config_item_type iio_loop_type = {
+	.ct_owner = THIS_MODULE,
+};
+
+static int iio_loop_thread(void *data)
+{
+	struct iio_trigger *trig = data;
+
+	set_freezable();
+
+	do {
+		iio_trigger_poll_chained(trig);
+	} while (likely(!kthread_freezable_should_stop(NULL)));
+
+	return 0;
+}
+
+static int iio_loop_trigger_set_state(struct iio_trigger *trig, bool state)
+{
+	struct iio_loop_info *loop_trig = iio_trigger_get_drvdata(trig);
+
+	if (state) {
+		loop_trig->task = kthread_run(iio_loop_thread,
+					      trig, trig->name);
+		if (unlikely(IS_ERR(loop_trig->task))) {
+			dev_err(&trig->dev,
+				"failed to create trigger loop thread\n");
+			return PTR_ERR(loop_trig->task);
+		}
+	} else {
+		kthread_stop(loop_trig->task);
+	}
+
+	return 0;
+}
+
+static const struct iio_trigger_ops iio_loop_trigger_ops = {
+	.set_trigger_state = iio_loop_trigger_set_state,
+	.owner = THIS_MODULE,
+};
+
+static struct iio_sw_trigger *iio_trig_loop_probe(const char *name)
+{
+	struct iio_loop_info *trig_info;
+	int ret;
+
+	trig_info = kzalloc(sizeof(*trig_info), GFP_KERNEL);
+	if (!trig_info)
+		return ERR_PTR(-ENOMEM);
+
+	trig_info->swt.trigger = iio_trigger_alloc("%s", name);
+	if (!trig_info->swt.trigger) {
+		ret = -ENOMEM;
+		goto err_free_trig_info;
+	}
+
+	iio_trigger_set_drvdata(trig_info->swt.trigger, trig_info);
+	trig_info->swt.trigger->ops = &iio_loop_trigger_ops;
+
+	ret = iio_trigger_register(trig_info->swt.trigger);
+	if (ret)
+		goto err_free_trigger;
+
+	iio_swt_group_init_type_name(&trig_info->swt, name, &iio_loop_type);
+
+	return &trig_info->swt;
+
+err_free_trigger:
+	iio_trigger_free(trig_info->swt.trigger);
+err_free_trig_info:
+	kfree(trig_info);
+
+	return ERR_PTR(ret);
+}
+
+static int iio_trig_loop_remove(struct iio_sw_trigger *swt)
+{
+	struct iio_loop_info *trig_info;
+
+	trig_info = iio_trigger_get_drvdata(swt->trigger);
+
+	iio_trigger_unregister(swt->trigger);
+	iio_trigger_free(swt->trigger);
+	kfree(trig_info);
+
+	return 0;
+}
+
+static const struct iio_sw_trigger_ops iio_trig_loop_ops = {
+	.probe = iio_trig_loop_probe,
+	.remove = iio_trig_loop_remove,
+};
+
+static struct iio_sw_trigger_type iio_trig_loop = {
+	.name = "loop",
+	.owner = THIS_MODULE,
+	.ops = &iio_trig_loop_ops,
+};
+
+module_iio_sw_trigger_driver(iio_trig_loop);
+
+MODULE_AUTHOR("Jonathan Cameron <jic23@kernel.org>");
+MODULE_DESCRIPTION("Loop based trigger for the iio subsystem");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:iio-trig-loop");
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 040966775f40..1a2984c28b95 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -411,7 +411,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 
 	for (ix = 0; ix < table->sz; ix++)
 		if (table->data_vec[ix].attr.ndev == ndev)
-			if (!del_gid(ib_dev, port, table, ix, false))
+			if (!del_gid(ib_dev, port, table, ix,
+				     !!(table->data_vec[ix].props &
+					GID_TABLE_ENTRY_DEFAULT)))
 				deleted = true;
 
 	write_unlock_irq(&table->rwlock);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f0c91ba3178a..ad1b1adcf6f0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -708,17 +708,6 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
 		complete(&id_priv->comp);
 }
 
-static int cma_disable_callback(struct rdma_id_private *id_priv,
-				enum rdma_cm_state state)
-{
-	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state != state) {
-		mutex_unlock(&id_priv->handler_mutex);
-		return -EINVAL;
-	}
-	return 0;
-}
-
 struct rdma_cm_id *rdma_create_id(struct net *net,
 				  rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps,
@@ -1671,11 +1660,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	struct rdma_cm_event event;
 	int ret = 0;
 
+	mutex_lock(&id_priv->handler_mutex);
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
+	     id_priv->state != RDMA_CM_CONNECT) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
-		return 0;
+	     id_priv->state != RDMA_CM_DISCONNECT))
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -1870,7 +1860,7 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
 
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
-	struct rdma_id_private *listen_id, *conn_id;
+	struct rdma_id_private *listen_id, *conn_id = NULL;
 	struct rdma_cm_event event;
 	struct net_device *net_dev;
 	int offset, ret;
@@ -1884,9 +1874,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		goto net_dev_put;
 	}
 
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) {
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN) {
 		ret = -ECONNABORTED;
-		goto net_dev_put;
+		goto err1;
 	}
 
 	memset(&event, 0, sizeof event);
@@ -1976,8 +1967,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (iw_event->event) {
@@ -2029,6 +2021,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 		return ret;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
@@ -2039,13 +2032,15 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	struct rdma_cm_id *new_cm_id;
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = -ECONNABORTED;
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
-		return -ECONNABORTED;
+
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN)
+		goto out;
 
 	/* Create a new RDMA id for the new IW CM ID */
 	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
@@ -3216,8 +3211,9 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -3673,12 +3669,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc = multicast->context;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = 0;
 
 	id_priv = mc->id_priv;
-	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
-	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_ADDR_BOUND &&
+	    id_priv->state != RDMA_CM_ADDR_RESOLVED)
+		goto out;
 
 	if (!status)
 		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
@@ -3720,6 +3717,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 		return 0;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return 0;
 }
@@ -3878,12 +3876,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 	gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
 		   rdma_start_port(id_priv->cma_dev->device)];
 	if (addr->sa_family == AF_INET) {
-		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
 			err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
 					    true);
-		if (!err) {
-			mc->igmp_joined = true;
-			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+			if (!err)
+				mc->igmp_joined = true;
 		}
 	} else {
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index a5793c8f1590..60df4f8e81be 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -530,6 +530,7 @@ static PORT_PMA_ATTR(port_xmit_data		    , 12, 32, 192);
 static PORT_PMA_ATTR(port_rcv_data		    , 13, 32, 224);
 static PORT_PMA_ATTR(port_xmit_packets		    , 14, 32, 256);
 static PORT_PMA_ATTR(port_rcv_packets		    , 15, 32, 288);
+static PORT_PMA_ATTR(port_xmit_wait		    ,  0, 32, 320);
 
 /*
  * Counters added by extended set
@@ -560,6 +561,7 @@ static struct attribute *pma_attrs[] = {
 	&port_pma_attr_port_rcv_data.attr.attr,
 	&port_pma_attr_port_xmit_packets.attr.attr,
 	&port_pma_attr_port_rcv_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	NULL
 };
 
@@ -579,6 +581,7 @@ static struct attribute *pma_attrs_ext[] = {
 	&port_pma_attr_ext_port_xmit_data.attr.attr,
 	&port_pma_attr_ext_port_rcv_data.attr.attr,
 	&port_pma_attr_ext_port_xmit_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	&port_pma_attr_ext_port_rcv_packets.attr.attr,
 	&port_pma_attr_ext_unicast_rcv_packets.attr.attr,
 	&port_pma_attr_ext_unicast_xmit_packets.attr.attr,
@@ -604,6 +607,7 @@ static struct attribute *pma_attrs_noietf[] = {
 	&port_pma_attr_ext_port_rcv_data.attr.attr,
 	&port_pma_attr_ext_port_xmit_packets.attr.attr,
 	&port_pma_attr_ext_port_rcv_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	NULL
 };
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1a8babb8ee3c..825021d1008b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1747,7 +1747,7 @@ static int create_qp(struct ib_uverbs_file *file,
 	struct ib_srq			*srq = NULL;
 	struct ib_qp			*qp;
 	char				*buf;
-	struct ib_qp_init_attr		attr;
+	struct ib_qp_init_attr		attr = {};
 	struct ib_uverbs_ex_create_qp_resp resp;
 	int				ret;
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 1d7d4cf442e3..6298f54b4137 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -511,12 +511,16 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 		ah_attr->grh.dgid = sgid;
 
 		if (!rdma_cap_eth_ah(device, port_num)) {
-			ret = ib_find_cached_gid_by_port(device, &dgid,
-							 IB_GID_TYPE_IB,
-							 port_num, NULL,
-							 &gid_index);
-			if (ret)
-				return ret;
+			if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+				ret = ib_find_cached_gid_by_port(device, &dgid,
+								 IB_GID_TYPE_IB,
+								 port_num, NULL,
+								 &gid_index);
+				if (ret)
+					return ret;
+			} else {
+				gid_index = 0;
+			}
 		}
 
 		ah_attr->grh.sgid_index = (u8) gid_index;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 81619fbb5842..dad4d0ebbdff 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1037,7 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *);
 static void dc_start(struct hfi1_devdata *);
 static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
 			   unsigned int *np);
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
 
 /*
  * Error interrupt table entry.  This is used as input to the interrupt
@@ -6962,8 +6962,6 @@ void handle_link_down(struct work_struct *work)
 	}
 
 	reset_neighbor_info(ppd);
-	if (ppd->mgmt_allowed)
-		remove_full_mgmt_pkey(ppd);
 
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -7070,12 +7068,16 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 			    __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
 	ppd->pkeys[2] = FULL_MGMT_P_KEY;
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	hfi1_event_pkey_change(ppd->dd, ppd->port);
 }
 
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd)
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 {
-	ppd->pkeys[2] = 0;
-	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	if (ppd->pkeys[2] != 0) {
+		ppd->pkeys[2] = 0;
+		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+		hfi1_event_pkey_change(ppd->dd, ppd->port);
+	}
 }
 
 /*
@@ -9168,6 +9170,13 @@ int start_link(struct hfi1_pportdata *ppd)
 		return 0;
 	}
 
+	/*
+	 * FULL_MGMT_P_KEY is cleared from the pkey table, so that the
+	 * pkey table can be configured properly if the HFI unit is connected
+	 * to switch port with MgmtAllowed=NO
+	 */
+	clear_full_mgmt_pkey(ppd);
+
 	return set_link_state(ppd, HLS_DN_POLL);
 }
 
@@ -9777,7 +9786,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 	u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
 			      & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
 		SEND_LEN_CHECK1_LEN_VL15_SHIFT;
-	int i;
+	int i, j;
 	u32 thres;
 
 	for (i = 0; i < ppd->vls_supported; i++) {
@@ -9801,7 +9810,10 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 			    sc_mtu_to_threshold(dd->vld[i].sc,
 						dd->vld[i].mtu,
 						dd->rcd[0]->rcvhdrqentsize));
-		sc_set_cr_threshold(dd->vld[i].sc, thres);
+		for (j = 0; j < INIT_SC_PER_VL; j++)
+			sc_set_cr_threshold(
+					pio_select_send_context_vl(dd, j, i),
+					    thres);
 	}
 	thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50),
 		    sc_mtu_to_threshold(dd->vld[15].sc,
@@ -14101,8 +14113,14 @@ static int init_asic_data(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
 	struct hfi1_devdata *tmp, *peer = NULL;
+	struct hfi1_asic_data *asic_data;
 	int ret = 0;
 
+	/* pre-allocate the asic structure in case we are the first device */
+	asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
+	if (!asic_data)
+		return -ENOMEM;
+
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	/* Find our peer device */
 	list_for_each_entry(tmp, &hfi1_dev_list, list) {
@@ -14114,18 +14132,14 @@ static int init_asic_data(struct hfi1_devdata *dd)
 	}
 
 	if (peer) {
+		/* use already allocated structure */
 		dd->asic_data = peer->asic_data;
+		kfree(asic_data);
 	} else {
-		dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
-		if (!dd->asic_data) {
-			ret = -ENOMEM;
-			goto done;
-		}
+		dd->asic_data = asic_data;
 		mutex_init(&dd->asic_data->asic_resource_mutex);
 	}
 	dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
-
-done:
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
 	return ret;
 }
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 7a5b0e676cc7..c702a009608f 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -203,6 +203,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 
 	switch (cmd) {
 	case HFI1_IOCTL_ASSIGN_CTXT:
+		if (uctxt)
+			return -EINVAL;
+
 		if (copy_from_user(&uinfo,
 				   (struct hfi1_user_info __user *)arg,
 				   sizeof(uinfo)))
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 0d28a5a40fae..eed971ccd2a1 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1383,7 +1383,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
-	struct hfi1_devdata *dd = NULL;
+	struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
 	struct hfi1_pportdata *ppd;
 
 	/* First, lock the non-writable module parameters */
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 219029576ba0..fca07a1d6c28 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -78,6 +78,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp)
 	memset(data, 0, size);
 }
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
+{
+	struct ib_event event;
+
+	event.event = IB_EVENT_PKEY_CHANGE;
+	event.device = &dd->verbs_dev.rdi.ibdev;
+	event.element.port_num = port;
+	ib_dispatch_event(&event);
+}
+
 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 {
 	struct ib_mad_send_buf *send_buf;
@@ -1418,15 +1428,10 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 	}
 
 	if (changed) {
-		struct ib_event event;
-
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
-
-		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.rdi.ibdev;
-		event.element.port_num = port;
-		ib_dispatch_event(&event);
+		hfi1_event_pkey_change(dd, port);
 	}
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 55ee08675333..8b734aaae88a 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -434,4 +434,6 @@ struct sc2vlnt {
 		    COUNTER_MASK(1, 3) | \
 		    COUNTER_MASK(1, 4))
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
+
 #endif				/* _HFI1_MAD_H */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d5edb1afbb8f..d4022450b73f 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -995,7 +995,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
 		/* counter is reset if occupancy count changes */
 		if (reg != reg_prev)
 			loop = 0;
-		if (loop > 500) {
+		if (loop > 50000) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd,
 				   "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
@@ -1797,6 +1797,21 @@ static void pio_map_rcu_callback(struct rcu_head *list)
 	pio_map_free(m);
 }
 
+/*
+ * Set credit return threshold for the kernel send context
+ */
+static void set_threshold(struct hfi1_devdata *dd, int scontext, int i)
+{
+	u32 thres;
+
+	thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext],
+					    50),
+		    sc_mtu_to_threshold(dd->kernel_send_context[scontext],
+					dd->vld[i].mtu,
+					dd->rcd[0]->rcvhdrqentsize));
+	sc_set_cr_threshold(dd->kernel_send_context[scontext], thres);
+}
+
 /*
  * pio_map_init - called when #vls change
  * @dd: hfi1_devdata
@@ -1872,11 +1887,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
 			if (!newmap->map[i])
 				goto bail;
 			newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
-			/* assign send contexts */
+			/*
+			 * assign send contexts and
+			 * adjust credit return threshold
+			 */
 			for (j = 0; j < sz; j++) {
-				if (dd->kernel_send_context[scontext])
+				if (dd->kernel_send_context[scontext]) {
 					newmap->map[i]->ksc[j] =
 					dd->kernel_send_context[scontext];
+					set_threshold(dd, scontext, i);
+				}
 				if (++scontext >= first_scontext +
 						  vl_scontexts[i])
 					/* wrap back to first send context */
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 2441669f0817..9fb561682c66 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -579,7 +579,8 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 
 	if (ppd->qsfp_info.cache_valid) {
 		if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
-			sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
+			snprintf(lenstr, sizeof(lenstr), "%dM ",
+				 cache[QSFP_MOD_LEN_OFFS]);
 
 		power_byte = cache[QSFP_MOD_PWR_OFFS];
 		sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 1e503ad0bebb..be91f6fa1c87 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -678,8 +678,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	u32 tlen = packet->tlen;
 	struct rvt_qp *qp = packet->qp;
 	bool has_grh = rcv_flags & HFI1_HAS_GRH;
-	bool sc4_bit = has_sc4_bit(packet);
-	u8 sc;
+	u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
 	u32 bth1;
 	int is_mcast;
 	struct ib_grh *grh = NULL;
@@ -697,10 +696,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		 */
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
-		u8 sl, sc5;
+		u8 sl;
 
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 		sl = ibp->sc_to_sl[sc5];
 
 		process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD);
@@ -717,10 +714,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 
 	if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) {
 		u16 slid = be16_to_cpu(hdr->lrh[3]);
-		u8 sc5;
-
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 
 		return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
 	}
@@ -745,10 +738,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		if (qp->ibqp.qp_num > 1) {
 			struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 			u16 slid;
-			u8 sc5;
-
-			sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-			sc5 |= sc4_bit;
 
 			slid = be16_to_cpu(hdr->lrh[3]);
 			if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
@@ -790,10 +779,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		/* Received on QP0, and so by definition, this is an SMP */
 		struct opa_smp *smp = (struct opa_smp *)data;
 		u16 slid = be16_to_cpu(hdr->lrh[3]);
-		u8 sc5;
-
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 
 		if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
 			goto drop;
@@ -890,9 +875,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	}
 
 	wc.slid = be16_to_cpu(hdr->lrh[3]);
-	sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-	sc |= sc4_bit;
-	wc.sl = ibp->sc_to_sl[sc];
+	wc.sl = ibp->sc_to_sl[sc5];
 
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index bc95c4112c61..d8fb056526f8 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
 
 struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 				struct rvt_qp *qp)
+	__must_hold(&qp->s_lock)
 {
 	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
-	unsigned long flags;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
 	write_seqlock(&dev->iowait_lock);
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct hfi1_qp_priv *priv;
@@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 	}
 out:
 	write_sequnlock(&dev->iowait_lock);
-	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return tx;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1cf69b2fe4a5..a1d6e0807f97 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 
 static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
 					    struct rvt_qp *qp)
+	__must_hold(&qp->slock)
 {
 	struct verbs_txreq *tx;
 	struct hfi1_qp_priv *priv = qp->priv;
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8b9532034558..b738acdb9b02 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -113,6 +113,8 @@
 
 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
 #define IW_CFG_FPM_QP_COUNT		32768
+#define I40IW_MAX_PAGES_PER_FMR		512
+#define I40IW_MIN_PAGES_PER_FMR		1
 
 #define I40IW_MTU_TO_MSS		40
 #define I40IW_DEFAULT_MSS		1460
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index c963cad92f5a..6e9081380a27 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -600,8 +600,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
 	cqp_init_info.scratch_array = cqp->scratch_array;
 	status = dev->cqp_ops->cqp_init(dev->cqp, &cqp_init_info);
 	if (status) {
-		i40iw_pr_err("cqp init status %d maj_err %d min_err %d\n",
-			     status, maj_err, min_err);
+		i40iw_pr_err("cqp init status %d\n", status);
 		goto exit;
 	}
 	status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 02a735b64208..283b64c942ee 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -79,6 +79,7 @@ static int i40iw_query_device(struct ib_device *ibdev,
 	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
 	props->atomic_cap = IB_ATOMIC_NONE;
 	props->max_map_per_fmr = 1;
+	props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR;
 	return 0;
 }
 
@@ -1473,6 +1474,7 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr
 	info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
 	info->pd_id = iwpd->sc_pd.pd_id;
 	info->total_len = iwmr->length;
+	info->remote_access = true;
 	cqp_info->cqp_cmd = OP_ALLOC_STAG;
 	cqp_info->post_sq = 1;
 	cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
@@ -1527,7 +1529,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
 	mutex_lock(&iwdev->pbl_mutex);
 	status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
 	mutex_unlock(&iwdev->pbl_mutex);
-	if (!status)
+	if (status)
 		goto err1;
 
 	if (palloc->level != I40IW_LEVEL_1)
@@ -2149,6 +2151,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
 			struct i40iw_fast_reg_stag_info info;
 
+			memset(&info, 0, sizeof(info));
 			info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
 			info.access_rights |= i40iw_get_user_access(flags);
 			info.stag_key = reg_wr(ib_wr)->key & 0xff;
@@ -2158,10 +2161,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
 			info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
 			info.total_len = iwmr->ibmr.length;
+			info.reg_addr_pa = *(u64 *)palloc->level1.addr;
 			info.first_pm_pbl_index = palloc->level1.idx;
 			info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
 			info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
 
+			if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
+				info.chunk_size = 1;
+
 			if (page_shift == 21)
 				info.page_size = 1; /* 2M page */
 
@@ -2327,13 +2334,16 @@ static int i40iw_req_notify_cq(struct ib_cq *ibcq,
 {
 	struct i40iw_cq *iwcq;
 	struct i40iw_cq_uk *ukcq;
-	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED;
+	unsigned long flags;
+	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT;
 
 	iwcq = (struct i40iw_cq *)ibcq;
 	ukcq = &iwcq->sc_cq.cq_uk;
-	if (notify_flags == IB_CQ_NEXT_COMP)
-		cq_notify = IW_CQ_COMPL_EVENT;
+	if (notify_flags == IB_CQ_SOLICITED)
+		cq_notify = IW_CQ_COMPL_SOLICITED;
+	spin_lock_irqsave(&iwcq->lock, flags);
 	ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+	spin_unlock_irqrestore(&iwcq->lock, flags);
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 105246fba2e7..5fc623362731 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -47,6 +47,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 
 	ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
 	ah->av.ib.g_slid  = ah_attr->src_path_bits;
+	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ah->av.ib.g_slid   |= 0x80;
 		ah->av.ib.gid_index = ah_attr->grh.sgid_index;
@@ -64,7 +65,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 		       !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
 			--ah->av.ib.stat_rate;
 	}
-	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 
 	return &ah->ibah;
 }
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index d68f506c1922..9c2e53d28f98 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -527,7 +527,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 		tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
 	spin_unlock(&tun_qp->tx_lock);
 	if (ret)
-		goto out;
+		goto end;
 
 	tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
 	if (tun_qp->tx_ring[tun_tx_ix].ah)
@@ -596,9 +596,15 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	wr.wr.send_flags = IB_SEND_SIGNALED;
 
 	ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
-out:
-	if (ret)
-		ib_destroy_ah(ah);
+	if (!ret)
+		return 0;
+ out:
+	spin_lock(&tun_qp->tx_lock);
+	tun_qp->tx_ix_tail++;
+	spin_unlock(&tun_qp->tx_lock);
+	tun_qp->tx_ring[tun_tx_ix].ah = NULL;
+end:
+	ib_destroy_ah(ah);
 	return ret;
 }
 
@@ -1326,9 +1332,15 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 
 
 	ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
+	if (!ret)
+		return 0;
+
+	spin_lock(&sqp->tx_lock);
+	sqp->tx_ix_tail++;
+	spin_unlock(&sqp->tx_lock);
+	sqp->tx_ring[wire_tx_ix].ah = NULL;
 out:
-	if (ret)
-		ib_destroy_ah(ah);
+	ib_destroy_ah(ah);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0eb09e104542..42a46078d7d5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1704,6 +1704,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	int is_bonded = mlx4_is_bonded(dev);
 
+	if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
+		return ERR_PTR(-EINVAL);
+
 	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 		return ERR_PTR(-EOPNOTSUPP);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6c5ac5d8f32f..29acda249612 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -139,7 +139,7 @@ struct mlx4_ib_mr {
 	u32			max_pages;
 	struct mlx4_mr		mmr;
 	struct ib_umem	       *umem;
-	void			*pages_alloc;
+	size_t			page_map_size;
 };
 
 struct mlx4_ib_mw {
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 631272172a0b..5d73989d9771 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -277,20 +277,23 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 		      struct mlx4_ib_mr *mr,
 		      int max_pages)
 {
-	int size = max_pages * sizeof(u64);
-	int add_size;
 	int ret;
 
-	add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+	/* Ensure that size is aligned to DMA cacheline
+	 * requirements.
+	 * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
+	 * so page_map_size will never cross PAGE_SIZE.
+	 */
+	mr->page_map_size = roundup(max_pages * sizeof(u64),
+				    MLX4_MR_PAGES_ALIGN);
 
-	mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
-	if (!mr->pages_alloc)
+	/* Prevent cross page boundary allocation. */
+	mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
+	if (!mr->pages)
 		return -ENOMEM;
 
-	mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
-
 	mr->page_map = dma_map_single(device->dma_device, mr->pages,
-				      size, DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	if (dma_mapping_error(device->dma_device, mr->page_map)) {
 		ret = -ENOMEM;
@@ -298,9 +301,9 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 	}
 
 	return 0;
-err:
-	kfree(mr->pages_alloc);
 
+err:
+	free_page((unsigned long)mr->pages);
 	return ret;
 }
 
@@ -309,11 +312,10 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
 {
 	if (mr->pages) {
 		struct ib_device *device = mr->ibmr.device;
-		int size = mr->max_pages * sizeof(u64);
 
 		dma_unmap_single(device->dma_device, mr->page_map,
-				 size, DMA_TO_DEVICE);
-		kfree(mr->pages_alloc);
+				 mr->page_map_size, DMA_TO_DEVICE);
+		free_page((unsigned long)mr->pages);
 		mr->pages = NULL;
 	}
 }
@@ -537,14 +539,12 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
 	mr->npages = 0;
 
 	ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
-				   sizeof(u64) * mr->max_pages,
-				   DMA_TO_DEVICE);
+				   mr->page_map_size, DMA_TO_DEVICE);
 
 	rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
 
 	ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
-				      sizeof(u64) * mr->max_pages,
-				      DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	return rc;
 }
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 81b0e1fbec1d..768085f59566 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -232,7 +232,7 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
 		}
 	} else {
 		ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
-		s = (ctrl->fence_size & 0x3f) << 4;
+		s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4;
 		for (i = 64; i < s; i += 64) {
 			wqe = buf + i;
 			*wqe = cpu_to_be32(0xffffffff);
@@ -264,7 +264,7 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
 		inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
 	}
 	ctrl->srcrb_flags = 0;
-	ctrl->fence_size = size / 16;
+	ctrl->qpn_vlan.fence_size = size / 16;
 	/*
 	 * Make sure descriptor is fully written before setting ownership bit
 	 * (because HW can start executing as soon as we do).
@@ -362,7 +362,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_RC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
-			sizeof (struct mlx4_wqe_atomic_seg) +
+			sizeof (struct mlx4_wqe_masked_atomic_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_SMI:
 	case MLX4_IB_QPT_GSI:
@@ -1191,8 +1191,10 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
 				       udata, 0, &qp, gfp);
-		if (err)
+		if (err) {
+			kfree(qp);
 			return ERR_PTR(err);
+		}
 
 		qp->ibqp.qp_num = qp->mqp.qpn;
 		qp->xrcdn = xrcdn;
@@ -1990,7 +1992,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 			ctrl = get_send_wqe(qp, i);
 			ctrl->owner_opcode = cpu_to_be32(1 << 31);
 			if (qp->sq_max_wqes_per_wr == 1)
-				ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
+				ctrl->qpn_vlan.fence_size =
+						1 << (qp->sq.wqe_shift - 4);
 
 			stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
 		}
@@ -3167,8 +3170,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		wmb();
 		*lso_wqe = lso_hdr_sz;
 
-		ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
-				    MLX4_WQE_CTRL_FENCE : 0) | size;
+		ctrl->qpn_vlan.fence_size = (wr->send_flags & IB_SEND_FENCE ?
+					     MLX4_WQE_CTRL_FENCE : 0) | size;
 
 		/*
 		 * Make sure descriptor is fully written before
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1534af113058..364aab9f3c9e 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -121,7 +121,7 @@ static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
 	pma_cnt_ext->port_xmit_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
 					 transmitted_ib_multicast.octets) >> 2);
-	pma_cnt_ext->port_xmit_data =
+	pma_cnt_ext->port_rcv_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
 					 received_ib_multicast.octets) >> 2);
 	pma_cnt_ext->port_xmit_packets =
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b48ad85315dc..dad63f038bb8 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1528,21 +1528,18 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 {
 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
 	struct mlx5_ib_flow_handler *handler;
+	struct mlx5_flow_spec *spec;
 	void *ib_flow = flow_attr + 1;
-	u8 match_criteria_enable = 0;
 	unsigned int spec_index;
-	u32 *match_c;
-	u32 *match_v;
 	u32 action;
 	int err = 0;
 
 	if (!is_valid_attr(flow_attr))
 		return ERR_PTR(-EINVAL);
 
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+	spec = mlx5_vzalloc(sizeof(*spec));
 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
-	if (!handler || !match_c || !match_v) {
+	if (!handler || !spec) {
 		err = -ENOMEM;
 		goto free;
 	}
@@ -1550,7 +1547,8 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	INIT_LIST_HEAD(&handler->list);
 
 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
-		err = parse_flow_attr(match_c, match_v, ib_flow);
+		err = parse_flow_attr(spec->match_criteria,
+				      spec->match_value, ib_flow);
 		if (err < 0)
 			goto free;
 
@@ -1558,11 +1556,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	}
 
 	/* Outer header support only */
-	match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria))
+		<< 0;
 	action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
 		MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
-	handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
-					   match_c, match_v,
+	handler->rule = mlx5_add_flow_rule(ft, spec,
 					   action,
 					   MLX5_FS_DEFAULT_FLOW_TAG,
 					   dst);
@@ -1578,8 +1576,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 free:
 	if (err)
 		kfree(handler);
-	kfree(match_c);
-	kfree(match_v);
+	kvfree(spec);
 	return err ? ERR_PTR(err) : handler;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ce434228a5ea..ce0a7ab35a22 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3332,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
 			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
 		else
 			return fence;
-
-	} else {
-		return 0;
+	} else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
+		return MLX5_FENCE_MODE_FENCE;
 	}
+
+	return 0;
 }
 
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index ff946d5f59e4..382466a90da7 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2178,6 +2178,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
 
 	switch (cmd.type) {
 	case QIB_CMD_ASSIGN_CTXT:
+		if (rcd) {
+			ret = -EINVAL;
+			goto bail;
+		}
+
 		ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
 		if (ret)
 			goto bail;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 7de5134bec85..41ba7e9cadaa 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -369,8 +369,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 			/* wrap to first map page, invert bit 0 */
 			offset = qpt->incr | ((offset & 1) ^ 1);
 		}
-		/* there can be no bits at shift and below */
-		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+		/* there can be no set bits in low-order QoS bits */
+		WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
 		qpn = mk_qpn(qpt, map, offset);
 	}
 
@@ -576,12 +576,6 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
 	qp->s_mig_state = IB_MIG_MIGRATED;
-	if (qp->s_ack_queue)
-		memset(
-			qp->s_ack_queue,
-			0,
-			rvt_max_atomic(rdi) *
-				sizeof(*qp->s_ack_queue));
 	qp->r_head_ack_queue = 0;
 	qp->s_tail_ack_queue = 0;
 	qp->s_num_rd_atomic = 0;
@@ -705,8 +699,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		 * initialization that is needed.
 		 */
 		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
-		if (!priv)
+		if (IS_ERR(priv)) {
+			ret = priv;
 			goto bail_qp;
+		}
 		qp->priv = priv;
 		qp->timeout_jiffies =
 			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index e1cc2cc42f25..30c4fda7a05a 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -501,9 +501,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
 			    !rdi->driver_f.quiesce_qp ||
 			    !rdi->driver_f.notify_error_qp ||
 			    !rdi->driver_f.mtu_from_qp ||
-			    !rdi->driver_f.mtu_to_path_mtu ||
-			    !rdi->driver_f.shut_down_port ||
-			    !rdi->driver_f.cap_mask_chg)
+			    !rdi->driver_f.mtu_to_path_mtu)
 				return -EINVAL;
 		break;
 
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index e68b20cba70b..4a4155640d51 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1638,8 +1638,7 @@ retry:
 	 */
 	qp_init->cap.max_send_wr = srp_sq_size / 2;
 	qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
-	qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd,
-					sdev->device->attrs.max_sge);
+	qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
 	qp_init->port_num = ch->sport->port;
 
 	ch->qp = ib_create_qp(sdev->pd, qp_init);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index fee6bfd7ca21..389030487da7 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -106,6 +106,7 @@ enum {
 	SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
 
 	SRPT_DEF_SG_TABLESIZE = 128,
+	SRPT_DEF_SG_PER_WQE = 16,
 
 	MIN_SRPT_SQ_SIZE = 16,
 	DEF_SRPT_SQ_SIZE = 4096,
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 804dbcc37d3f..a529a4535457 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1031,17 +1031,17 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
 
 	case XTYPE_XBOXONE:
 		packet->data[0] = 0x09; /* activate rumble */
-		packet->data[1] = 0x08;
+		packet->data[1] = 0x00;
 		packet->data[2] = xpad->odata_serial++;
-		packet->data[3] = 0x08; /* continuous effect */
-		packet->data[4] = 0x00; /* simple rumble mode */
-		packet->data[5] = 0x03; /* L and R actuator only */
-		packet->data[6] = 0x00; /* TODO: LT actuator */
-		packet->data[7] = 0x00; /* TODO: RT actuator */
+		packet->data[3] = 0x09;
+		packet->data[4] = 0x00;
+		packet->data[5] = 0x0F;
+		packet->data[6] = 0x00;
+		packet->data[7] = 0x00;
 		packet->data[8] = strong / 512;	/* left actuator */
 		packet->data[9] = weak / 512;	/* right actuator */
-		packet->data[10] = 0x80;	/* length of pulse */
-		packet->data[11] = 0x00;	/* stop period of pulse */
+		packet->data[10] = 0xFF;
+		packet->data[11] = 0x00;
 		packet->data[12] = 0x00;
 		packet->len = 13;
 		packet->pending = true;
@@ -1431,22 +1431,15 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 	int ep_irq_in_idx;
 	int i, error;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints != 2)
+		return -ENODEV;
+
 	for (i = 0; xpad_device[i].idVendor; i++) {
 		if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) &&
 		    (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct))
 			break;
 	}
 
-	if (xpad_device[i].xtype == XTYPE_XBOXONE &&
-	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
-		/*
-		 * The Xbox One controller lists three interfaces all with the
-		 * same interface class, subclass and protocol. Differentiate by
-		 * interface number.
-		 */
-		return -ENODEV;
-	}
-
 	xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL);
 	if (!xpad)
 		return -ENOMEM;
@@ -1478,6 +1471,8 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 		if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
 			if (intf->cur_altsetting->desc.bInterfaceProtocol == 129)
 				xpad->xtype = XTYPE_XBOX360W;
+			else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208)
+				xpad->xtype = XTYPE_XBOXONE;
 			else
 				xpad->xtype = XTYPE_XBOX360;
 		} else {
@@ -1492,6 +1487,17 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 			xpad->mapping |= MAP_STICKS_TO_NULL;
 	}
 
+	if (xpad->xtype == XTYPE_XBOXONE &&
+	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
+		/*
+		 * The Xbox One controller lists three interfaces all with the
+		 * same interface class, subclass and protocol. Differentiate by
+		 * interface number.
+		 */
+		error = -ENODEV;
+		goto err_free_in_urb;
+	}
+
 	error = xpad_init_output(intf, xpad);
 	if (error)
 		goto err_free_in_urb;
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 78f93cf68840..be5b399da5d3 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1568,13 +1568,7 @@ static int elantech_set_properties(struct elantech_data *etd)
 		case 5:
 			etd->hw_version = 3;
 			break;
-		case 6:
-		case 7:
-		case 8:
-		case 9:
-		case 10:
-		case 13:
-		case 14:
+		case 6 ... 14:
 			etd->hw_version = 4;
 			break;
 		default:
diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
index a3f0f5a47490..0f586780ceb4 100644
--- a/drivers/input/mouse/vmmouse.c
+++ b/drivers/input/mouse/vmmouse.c
@@ -355,18 +355,11 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
 		return -ENXIO;
 	}
 
-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
-		return -EBUSY;
-	}
-
 	/* Check if the device is present */
 	response = ~VMMOUSE_PROTO_MAGIC;
 	VMMOUSE_CMD(GETVERSION, 0, version, response, dummy1, dummy2);
-	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU) {
-		release_region(VMMOUSE_PROTO_PORT, 4);
+	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU)
 		return -ENXIO;
-	}
 
 	if (set_properties) {
 		psmouse->vendor = VMMOUSE_VENDOR;
@@ -374,8 +367,6 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
 		psmouse->model = version;
 	}
 
-	release_region(VMMOUSE_PROTO_PORT, 4);
-
 	return 0;
 }
 
@@ -394,7 +385,6 @@ static void vmmouse_disconnect(struct psmouse *psmouse)
 	psmouse_reset(psmouse);
 	input_unregister_device(priv->abs_dev);
 	kfree(priv);
-	release_region(VMMOUSE_PROTO_PORT, 4);
 }
 
 /**
@@ -438,15 +428,10 @@ int vmmouse_init(struct psmouse *psmouse)
 	struct input_dev *rel_dev = psmouse->dev, *abs_dev;
 	int error;
 
-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
-		return -EBUSY;
-	}
-
 	psmouse_reset(psmouse);
 	error = vmmouse_enable(psmouse);
 	if (error)
-		goto release_region;
+		return error;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	abs_dev = input_allocate_device();
@@ -502,8 +487,5 @@ init_fail:
 	kfree(priv);
 	psmouse->private = NULL;
 
-release_region:
-	release_region(VMMOUSE_PROTO_PORT, 4);
-
 	return error;
 }
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
index b368b0515c5a..253df96be427 100644
--- a/drivers/input/rmi4/rmi_bus.c
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -157,11 +157,11 @@ static int rmi_function_match(struct device *dev, struct device_driver *drv)
 static void rmi_function_of_probe(struct rmi_function *fn)
 {
 	char of_name[9];
+	struct device_node *node = fn->rmi_dev->xport->dev->of_node;
 
 	snprintf(of_name, sizeof(of_name), "rmi4-f%02x",
 		fn->fd.function_number);
-	fn->dev.of_node = of_find_node_by_name(
-				fn->rmi_dev->xport->dev->of_node, of_name);
+	fn->dev.of_node = of_get_child_by_name(node, of_name);
 }
 #else
 static inline void rmi_function_of_probe(struct rmi_function *fn)
diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c
index 8dd3fb5e1f94..88e91559c84e 100644
--- a/drivers/input/rmi4/rmi_f12.c
+++ b/drivers/input/rmi4/rmi_f12.c
@@ -66,7 +66,7 @@ static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
 	struct rmi_device *rmi_dev = fn->rmi_dev;
 	int ret;
 	int offset;
-	u8 buf[14];
+	u8 buf[15];
 	int pitch_x = 0;
 	int pitch_y = 0;
 	int clip_x_low = 0;
@@ -86,9 +86,10 @@ static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
 
 	offset = rmi_register_desc_calc_reg_offset(&f12->control_reg_desc, 8);
 
-	if (item->reg_size > 14) {
-		dev_err(&fn->dev, "F12 control8 should be 14 bytes, not: %ld\n",
-			item->reg_size);
+	if (item->reg_size > sizeof(buf)) {
+		dev_err(&fn->dev,
+			"F12 control8 should be no bigger than %zd bytes, not: %ld\n",
+			sizeof(buf), item->reg_size);
 		return -ENODEV;
 	}
 
diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c
index 880c40b23f66..4ea475775d58 100644
--- a/drivers/input/touchscreen/sur40.c
+++ b/drivers/input/touchscreen/sur40.c
@@ -126,7 +126,7 @@ struct sur40_image_header {
 #define VIDEO_PACKET_SIZE  16384
 
 /* polling interval (ms) */
-#define POLL_INTERVAL 4
+#define POLL_INTERVAL 1
 
 /* maximum number of contacts FIXME: this is a guess? */
 #define MAX_CONTACTS 64
@@ -151,7 +151,6 @@ struct sur40_state {
 	struct mutex lock;
 
 	struct vb2_queue queue;
-	struct vb2_alloc_ctx *alloc_ctx;
 	struct list_head buf_list;
 	spinlock_t qlock;
 	int sequence;
@@ -448,7 +447,7 @@ static void sur40_process_video(struct sur40_state *sur40)
 
 	/* return error if streaming was stopped in the meantime */
 	if (sur40->sequence == -1)
-		goto err_poll;
+		return;
 
 	/* mark as finished */
 	new_buf->vb.vb2_buf.timestamp = ktime_get_ns();
@@ -580,19 +579,13 @@ static int sur40_probe(struct usb_interface *interface,
 	sur40->queue = sur40_queue;
 	sur40->queue.drv_priv = sur40;
 	sur40->queue.lock = &sur40->lock;
+	sur40->queue.dev = sur40->dev;
 
 	/* initialize the queue */
 	error = vb2_queue_init(&sur40->queue);
 	if (error)
 		goto err_unreg_v4l2;
 
-	sur40->alloc_ctx = vb2_dma_sg_init_ctx(sur40->dev);
-	if (IS_ERR(sur40->alloc_ctx)) {
-		dev_err(sur40->dev, "Can't allocate buffer context");
-		error = PTR_ERR(sur40->alloc_ctx);
-		goto err_unreg_v4l2;
-	}
-
 	sur40->vdev = sur40_video_device;
 	sur40->vdev.v4l2_dev = &sur40->v4l2;
 	sur40->vdev.lock = &sur40->lock;
@@ -633,7 +626,6 @@ static void sur40_disconnect(struct usb_interface *interface)
 
 	video_unregister_device(&sur40->vdev);
 	v4l2_device_unregister(&sur40->v4l2);
-	vb2_dma_sg_cleanup_ctx(sur40->alloc_ctx);
 
 	input_unregister_polled_device(sur40->input);
 	input_free_polled_device(sur40->input);
@@ -653,13 +645,10 @@ static void sur40_disconnect(struct usb_interface *interface)
  */
 static int sur40_queue_setup(struct vb2_queue *q,
 		       unsigned int *nbuffers, unsigned int *nplanes,
-		       unsigned int sizes[], void *alloc_ctxs[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
-	struct sur40_state *sur40 = vb2_get_drv_priv(q);
-
 	if (q->num_buffers + *nbuffers < 3)
 		*nbuffers = 3 - q->num_buffers;
-	alloc_ctxs[0] = sur40->alloc_ctx;
 
 	if (*nplanes)
 		return sizes[0] < sur40_video_format.sizeimage ? -EINVAL : 0;
@@ -736,6 +725,7 @@ static int sur40_start_streaming(struct vb2_queue *vq, unsigned int count)
 static void sur40_stop_streaming(struct vb2_queue *vq)
 {
 	struct sur40_state *sur40 = vb2_get_drv_priv(vq);
+	vb2_wait_for_all_buffers(vq);
 	sur40->sequence = -1;
 
 	/* Release all active buffers */
@@ -793,7 +783,6 @@ static int sur40_vidioc_enum_fmt(struct file *file, void *priv,
 {
 	if (f->index != 0)
 		return -EINVAL;
-	strlcpy(f->description, "8-bit greyscale", sizeof(f->description));
 	f->pixelformat = V4L2_PIX_FMT_GREY;
 	f->flags = 0;
 	return 0;
diff --git a/drivers/input/touchscreen/ts4800-ts.c b/drivers/input/touchscreen/ts4800-ts.c
index 3c3dd78303be..fed73eeb47b3 100644
--- a/drivers/input/touchscreen/ts4800-ts.c
+++ b/drivers/input/touchscreen/ts4800-ts.c
@@ -118,6 +118,13 @@ static int ts4800_parse_dt(struct platform_device *pdev,
 		return -ENODEV;
 	}
 
+	ts->regmap = syscon_node_to_regmap(syscon_np);
+	of_node_put(syscon_np);
+	if (IS_ERR(ts->regmap)) {
+		dev_err(dev, "cannot get parent's regmap\n");
+		return PTR_ERR(ts->regmap);
+	}
+
 	error = of_property_read_u32_index(np, "syscon", 1, &reg);
 	if (error < 0) {
 		dev_err(dev, "no offset in syscon\n");
@@ -134,12 +141,6 @@ static int ts4800_parse_dt(struct platform_device *pdev,
 
 	ts->bit = BIT(bit);
 
-	ts->regmap = syscon_node_to_regmap(syscon_np);
-	if (IS_ERR(ts->regmap)) {
-		dev_err(dev, "cannot get parent's regmap\n");
-		return PTR_ERR(ts->regmap);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/input/touchscreen/tsc2004.c b/drivers/input/touchscreen/tsc2004.c
index 7295c198aa08..6fe55d598fac 100644
--- a/drivers/input/touchscreen/tsc2004.c
+++ b/drivers/input/touchscreen/tsc2004.c
@@ -22,6 +22,11 @@
 #include <linux/regmap.h>
 #include "tsc200x-core.h"
 
+static const struct input_id tsc2004_input_id = {
+	.bustype = BUS_I2C,
+	.product = 2004,
+};
+
 static int tsc2004_cmd(struct device *dev, u8 cmd)
 {
 	u8 tx = TSC200X_CMD | TSC200X_CMD_12BIT | cmd;
@@ -42,7 +47,7 @@ static int tsc2004_probe(struct i2c_client *i2c,
 			 const struct i2c_device_id *id)
 
 {
-	return tsc200x_probe(&i2c->dev, i2c->irq, BUS_I2C,
+	return tsc200x_probe(&i2c->dev, i2c->irq, &tsc2004_input_id,
 			     devm_regmap_init_i2c(i2c, &tsc200x_regmap_config),
 			     tsc2004_cmd);
 }
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index b9f593dfd2ef..f2c5f0e47f77 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -24,6 +24,11 @@
 #include <linux/regmap.h>
 #include "tsc200x-core.h"
 
+static const struct input_id tsc2005_input_id = {
+	.bustype = BUS_SPI,
+	.product = 2005,
+};
+
 static int tsc2005_cmd(struct device *dev, u8 cmd)
 {
 	u8 tx = TSC200X_CMD | TSC200X_CMD_12BIT | cmd;
@@ -62,7 +67,7 @@ static int tsc2005_probe(struct spi_device *spi)
 	if (error)
 		return error;
 
-	return tsc200x_probe(&spi->dev, spi->irq, BUS_SPI,
+	return tsc200x_probe(&spi->dev, spi->irq, &tsc2005_input_id,
 			     devm_regmap_init_spi(spi, &tsc200x_regmap_config),
 			     tsc2005_cmd);
 }
diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
index 15240c1ee850..dfa7f1c4f545 100644
--- a/drivers/input/touchscreen/tsc200x-core.c
+++ b/drivers/input/touchscreen/tsc200x-core.c
@@ -450,7 +450,7 @@ static void tsc200x_close(struct input_dev *input)
 	mutex_unlock(&ts->mutex);
 }
 
-int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
+int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		  struct regmap *regmap,
 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd))
 {
@@ -547,9 +547,18 @@ int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
 	snprintf(ts->phys, sizeof(ts->phys),
 		 "%s/input-ts", dev_name(dev));
 
-	input_dev->name = "TSC200X touchscreen";
+	if (tsc_id->product == 2004) {
+		input_dev->name = "TSC200X touchscreen";
+	} else {
+		input_dev->name = devm_kasprintf(dev, GFP_KERNEL,
+						 "TSC%04d touchscreen",
+						 tsc_id->product);
+		if (!input_dev->name)
+			return -ENOMEM;
+	}
+
 	input_dev->phys = ts->phys;
-	input_dev->id.bustype = bustype;
+	input_dev->id = *tsc_id;
 	input_dev->dev.parent = dev;
 	input_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
 	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
diff --git a/drivers/input/touchscreen/tsc200x-core.h b/drivers/input/touchscreen/tsc200x-core.h
index 7a482d102614..49a63a3c6840 100644
--- a/drivers/input/touchscreen/tsc200x-core.h
+++ b/drivers/input/touchscreen/tsc200x-core.h
@@ -70,7 +70,7 @@
 extern const struct regmap_config tsc200x_regmap_config;
 extern const struct dev_pm_ops tsc200x_pm_ops;
 
-int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
+int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		  struct regmap *regmap,
 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd));
 int tsc200x_remove(struct device *dev);
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index bab3c6acf6a2..b6fc4bde79de 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -27,7 +27,7 @@ MODULE_AUTHOR("Jaya Kumar <jayakumar.lkml@gmail.com>");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-#define W8001_MAX_LENGTH	11
+#define W8001_MAX_LENGTH	13
 #define W8001_LEAD_MASK		0x80
 #define W8001_LEAD_BYTE		0x80
 #define W8001_TAB_MASK		0x40
@@ -155,6 +155,7 @@ static void parse_multi_touch(struct w8001 *w8001)
 		bool touch = data[0] & (1 << i);
 
 		input_mt_slot(dev, i);
+		input_mt_report_slot_state(dev, MT_TOOL_FINGER, touch);
 		if (touch) {
 			x = (data[6 * i + 1] << 7) | data[6 * i + 2];
 			y = (data[6 * i + 3] << 7) | data[6 * i + 4];
@@ -339,6 +340,15 @@ static irqreturn_t w8001_interrupt(struct serio *serio,
 		w8001->idx = 0;
 		parse_multi_touch(w8001);
 		break;
+
+	default:
+		/*
+		 * ThinkPad X60 Tablet PC (pen only device) sometimes
+		 * sends invalid data packets that are larger than
+		 * W8001_PKTLEN_TPCPEN. Let's start over again.
+		 */
+		if (!w8001->touch_dev && w8001->idx > W8001_PKTLEN_TPCPEN - 1)
+			w8001->idx = 0;
 	}
 
 	return IRQ_HANDLED;
@@ -513,6 +523,8 @@ static int w8001_setup_touch(struct w8001 *w8001, char *basename,
 					0, touch.x, 0, 0);
 		input_set_abs_params(dev, ABS_MT_POSITION_Y,
 					0, touch.y, 0, 0);
+		input_set_abs_params(dev, ABS_MT_TOOL_TYPE,
+					0, MT_TOOL_MAX, 0, 0);
 
 		strlcat(basename, " 2FG", basename_sz);
 		if (w8001->max_pen_x && w8001->max_pen_y)
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9e0034196e10..59741ead7e15 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1107,13 +1107,13 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
 				break;
 			}
 
+			devid = e->devid;
 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
 				    hid, uid,
 				    PCI_BUS_NUM(devid),
 				    PCI_SLOT(devid),
 				    PCI_FUNC(devid));
 
-			devid  = e->devid;
 			flags = e->flags;
 
 			ret = add_acpi_hid_device(hid, uid, &devid, false);
@@ -1568,13 +1568,23 @@ static int __init amd_iommu_init_pci(void)
 			break;
 	}
 
+	/*
+	 * Order is important here to make sure any unity map requirements are
+	 * fulfilled. The unity mappings are created and written to the device
+	 * table during the amd_iommu_init_api() call.
+	 *
+	 * After that we call init_device_table_dma() to make sure any
+	 * uninitialized DTE will block DMA, and in the end we flush the caches
+	 * of all IOMMUs to make sure the changes to the device table are
+	 * active.
+	 */
+	ret = amd_iommu_init_api();
+
 	init_device_table_dma();
 
 	for_each_iommu(iommu)
 		iommu_flush_all_caches(iommu);
 
-	ret = amd_iommu_init_api();
-
 	if (!ret)
 		print_iommu_info();
 
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 56999d2fac07..fbdaf81ae925 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -538,8 +538,7 @@ static void do_fault(struct work_struct *work)
 	if (access_error(vma, fault))
 		goto out;
 
-	ret = handle_mm_fault(mm, vma, address, flags);
-
+	ret = handle_mm_fault(vma, address, flags);
 out:
 	up_read(&mm->mmap_sem);
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 10700945994e..323dac9900ba 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4602,13 +4602,13 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
 	for (i = 0; i < g_num_of_iommus; i++) {
 		struct intel_iommu *iommu = g_iommus[i];
 		struct dmar_domain *domain;
-		u16 did;
+		int did;
 
 		if (!iommu)
 			continue;
 
-		for (did = 0; did < 0xffff; did++) {
-			domain = get_iommu_domain(iommu, did);
+		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
+			domain = get_iommu_domain(iommu, (u16)did);
 
 			if (!domain)
 				continue;
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index d9939fa9b588..8ebb3530afa7 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -583,7 +583,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		if (access_error(vma, req))
 			goto invalid;
 
-		ret = handle_mm_fault(svm->mm, vma, address,
+		ret = handle_mm_fault(vma, address,
 				      req->wr_req ? FAULT_FLAG_WRITE : 0);
 		if (ret & VM_FAULT_ERROR)
 			goto invalid;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index ba764a0835d3..e23001bfcfee 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -420,8 +420,10 @@ retry:
 
 		/* Try replenishing IOVAs by flushing rcache. */
 		flushed_rcache = true;
+		preempt_disable();
 		for_each_online_cpu(cpu)
 			free_cpu_cached_iovas(cpu, iovad);
+		preempt_enable();
 		goto retry;
 	}
 
@@ -749,7 +751,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 	bool can_insert = false;
 	unsigned long flags;
 
-	cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -779,6 +781,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
 
 	if (mag_to_free) {
 		iova_magazine_free_pfns(mag_to_free, iovad);
@@ -812,7 +815,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 	bool has_pfn = false;
 	unsigned long flags;
 
-	cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
@@ -834,6 +837,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
 
 	return iova_pfn;
 }
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index fa33c50b0e5a..5495a5ba8039 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -8,6 +8,12 @@ config ARM_GIC
 	select IRQ_DOMAIN_HIERARCHY
 	select MULTI_IRQ_HANDLER
 
+config ARM_GIC_PM
+	bool
+	depends on PM
+	select ARM_GIC
+	select PM_CLK
+
 config ARM_GIC_MAX_NR
 	int
 	default 2 if ARCH_REALVIEW
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 38853a187607..4c203b6b8163 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_ARCH_SUNXI)		+= irq-sun4i.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_PM)		+= irq-gic-pm.o
 obj-$(CONFIG_REALVIEW_DT)		+= irq-gic-realview.o
 obj-$(CONFIG_ARM_GIC_V2M)		+= irq-gic-v2m.o
 obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-common.o
@@ -69,3 +70,4 @@ obj-$(CONFIG_PIC32_EVIC)		+= irq-pic32-evic.o
 obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
 obj-$(CONFIG_LS_SCFG_MSI)		+= irq-ls-scfg-msi.o
 obj-$(CONFIG_EZNPS_GIC)			+= irq-eznps.o
+obj-$(CONFIG_ARCH_ASPEED)		+= irq-aspeed-vic.o
diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c
index ead15be2d20a..b78a169c9c83 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -55,14 +55,14 @@ static void combiner_mask_irq(struct irq_data *data)
 {
 	u32 mask = 1 << (data->hwirq % 32);
 
-	__raw_writel(mask, combiner_base(data) + COMBINER_ENABLE_CLEAR);
+	writel_relaxed(mask, combiner_base(data) + COMBINER_ENABLE_CLEAR);
 }
 
 static void combiner_unmask_irq(struct irq_data *data)
 {
 	u32 mask = 1 << (data->hwirq % 32);
 
-	__raw_writel(mask, combiner_base(data) + COMBINER_ENABLE_SET);
+	writel_relaxed(mask, combiner_base(data) + COMBINER_ENABLE_SET);
 }
 
 static void combiner_handle_cascade_irq(struct irq_desc *desc)
@@ -75,7 +75,7 @@ static void combiner_handle_cascade_irq(struct irq_desc *desc)
 	chained_irq_enter(chip, desc);
 
 	spin_lock(&irq_controller_lock);
-	status = __raw_readl(chip_data->base + COMBINER_INT_STATUS);
+	status = readl_relaxed(chip_data->base + COMBINER_INT_STATUS);
 	spin_unlock(&irq_controller_lock);
 	status &= chip_data->irq_mask;
 
@@ -135,7 +135,7 @@ static void __init combiner_init_one(struct combiner_chip_data *combiner_data,
 	combiner_data->parent_irq = irq;
 
 	/* Disable all interrupts */
-	__raw_writel(combiner_data->irq_mask, base + COMBINER_ENABLE_CLEAR);
+	writel_relaxed(combiner_data->irq_mask, base + COMBINER_ENABLE_CLEAR);
 }
 
 static int combiner_irq_domain_xlate(struct irq_domain *d,
@@ -218,7 +218,7 @@ static int combiner_suspend(void)
 
 	for (i = 0; i < max_nr; i++)
 		combiner_data[i].pm_save =
-			__raw_readl(combiner_data[i].base + COMBINER_ENABLE_SET);
+			readl_relaxed(combiner_data[i].base + COMBINER_ENABLE_SET);
 
 	return 0;
 }
@@ -235,9 +235,9 @@ static void combiner_resume(void)
 	int i;
 
 	for (i = 0; i < max_nr; i++) {
-		__raw_writel(combiner_data[i].irq_mask,
+		writel_relaxed(combiner_data[i].irq_mask,
 			     combiner_data[i].base + COMBINER_ENABLE_CLEAR);
-		__raw_writel(combiner_data[i].pm_save,
+		writel_relaxed(combiner_data[i].pm_save,
 			     combiner_data[i].base + COMBINER_ENABLE_SET);
 	}
 }
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index e7dc6cbda2a1..7c42b1d13faf 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -541,7 +541,7 @@ static void armada_370_xp_mpic_resume(void)
 		writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
 }
 
-struct syscore_ops armada_370_xp_mpic_syscore_ops = {
+static struct syscore_ops armada_370_xp_mpic_syscore_ops = {
 	.suspend	= armada_370_xp_mpic_suspend,
 	.resume		= armada_370_xp_mpic_resume,
 };
diff --git a/drivers/irqchip/irq-aspeed-vic.c b/drivers/irqchip/irq-aspeed-vic.c
new file mode 100644
index 000000000000..d24451d5bf8a
--- /dev/null
+++ b/drivers/irqchip/irq-aspeed-vic.c
@@ -0,0 +1,230 @@
+/*
+ *  Copyright (C) 2015 - Ben Herrenschmidt, IBM Corp.
+ *
+ *  Driver for Aspeed "new" VIC as found in SoC generation 3 and later
+ *
+ *  Based on irq-vic.c:
+ *
+ *  Copyright (C) 1999 - 2003 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/syscore_ops.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <asm/exception.h>
+#include <asm/irq.h>
+
+/* These definitions correspond to the "new mapping" of the
+ * register set that interleaves "high" and "low". The offsets
+ * below are for the "low" register, add 4 to get to the high one
+ */
+#define AVIC_IRQ_STATUS		0x00
+#define AVIC_FIQ_STATUS		0x08
+#define AVIC_RAW_STATUS		0x10
+#define AVIC_INT_SELECT		0x18
+#define AVIC_INT_ENABLE		0x20
+#define AVIC_INT_ENABLE_CLR	0x28
+#define AVIC_INT_TRIGGER	0x30
+#define AVIC_INT_TRIGGER_CLR	0x38
+#define AVIC_INT_SENSE		0x40
+#define AVIC_INT_DUAL_EDGE	0x48
+#define AVIC_INT_EVENT		0x50
+#define AVIC_EDGE_CLR		0x58
+#define AVIC_EDGE_STATUS	0x60
+
+#define NUM_IRQS		64
+
+struct aspeed_vic {
+	void __iomem		*base;
+	u32			edge_sources[2];
+	struct irq_domain	*dom;
+};
+static struct aspeed_vic *system_avic;
+
+static void vic_init_hw(struct aspeed_vic *vic)
+{
+	u32 sense;
+
+	/* Disable all interrupts */
+	writel(0xffffffff, vic->base + AVIC_INT_ENABLE_CLR);
+	writel(0xffffffff, vic->base + AVIC_INT_ENABLE_CLR + 4);
+
+	/* Make sure no soft trigger is on */
+	writel(0xffffffff, vic->base + AVIC_INT_TRIGGER_CLR);
+	writel(0xffffffff, vic->base + AVIC_INT_TRIGGER_CLR + 4);
+
+	/* Set everything to be IRQ */
+	writel(0, vic->base + AVIC_INT_SELECT);
+	writel(0, vic->base + AVIC_INT_SELECT + 4);
+
+	/* Some interrupts have a programable high/low level trigger
+	 * (4 GPIO direct inputs), for now we assume this was configured
+	 * by firmware. We read which ones are edge now.
+	 */
+	sense = readl(vic->base + AVIC_INT_SENSE);
+	vic->edge_sources[0] = ~sense;
+	sense = readl(vic->base + AVIC_INT_SENSE + 4);
+	vic->edge_sources[1] = ~sense;
+
+	/* Clear edge detection latches */
+	writel(0xffffffff, vic->base + AVIC_EDGE_CLR);
+	writel(0xffffffff, vic->base + AVIC_EDGE_CLR + 4);
+}
+
+static void __exception_irq_entry avic_handle_irq(struct pt_regs *regs)
+{
+	struct aspeed_vic *vic = system_avic;
+	u32 stat, irq;
+
+	for (;;) {
+		irq = 0;
+		stat = readl_relaxed(vic->base + AVIC_IRQ_STATUS);
+		if (!stat) {
+			stat = readl_relaxed(vic->base + AVIC_IRQ_STATUS + 4);
+			irq = 32;
+		}
+		if (stat == 0)
+			break;
+		irq += ffs(stat) - 1;
+		handle_domain_irq(vic->dom, irq, regs);
+	}
+}
+
+static void avic_ack_irq(struct irq_data *d)
+{
+	struct aspeed_vic *vic = irq_data_get_irq_chip_data(d);
+	unsigned int sidx = d->hwirq >> 5;
+	unsigned int sbit = 1u << (d->hwirq & 0x1f);
+
+	/* Clear edge latch for edge interrupts, nop for level */
+	if (vic->edge_sources[sidx] & sbit)
+		writel(sbit, vic->base + AVIC_EDGE_CLR + sidx * 4);
+}
+
+static void avic_mask_irq(struct irq_data *d)
+{
+	struct aspeed_vic *vic = irq_data_get_irq_chip_data(d);
+	unsigned int sidx = d->hwirq >> 5;
+	unsigned int sbit = 1u << (d->hwirq & 0x1f);
+
+	writel(sbit, vic->base + AVIC_INT_ENABLE_CLR + sidx * 4);
+}
+
+static void avic_unmask_irq(struct irq_data *d)
+{
+	struct aspeed_vic *vic = irq_data_get_irq_chip_data(d);
+	unsigned int sidx = d->hwirq >> 5;
+	unsigned int sbit = 1u << (d->hwirq & 0x1f);
+
+	writel(sbit, vic->base + AVIC_INT_ENABLE + sidx * 4);
+}
+
+/* For level irq, faster than going through a nop "ack" and mask */
+static void avic_mask_ack_irq(struct irq_data *d)
+{
+	struct aspeed_vic *vic = irq_data_get_irq_chip_data(d);
+	unsigned int sidx = d->hwirq >> 5;
+	unsigned int sbit = 1u << (d->hwirq & 0x1f);
+
+	/* First mask */
+	writel(sbit, vic->base + AVIC_INT_ENABLE_CLR + sidx * 4);
+
+	/* Then clear edge latch for edge interrupts */
+	if (vic->edge_sources[sidx] & sbit)
+		writel(sbit, vic->base + AVIC_EDGE_CLR + sidx * 4);
+}
+
+static struct irq_chip avic_chip = {
+	.name		= "AVIC",
+	.irq_ack	= avic_ack_irq,
+	.irq_mask	= avic_mask_irq,
+	.irq_unmask	= avic_unmask_irq,
+	.irq_mask_ack	= avic_mask_ack_irq,
+};
+
+static int avic_map(struct irq_domain *d, unsigned int irq,
+		    irq_hw_number_t hwirq)
+{
+	struct aspeed_vic *vic = d->host_data;
+	unsigned int sidx = hwirq >> 5;
+	unsigned int sbit = 1u << (hwirq & 0x1f);
+
+	/* Check if interrupt exists */
+	if (sidx > 1)
+		return -EPERM;
+
+	if (vic->edge_sources[sidx] & sbit)
+		irq_set_chip_and_handler(irq, &avic_chip, handle_edge_irq);
+	else
+		irq_set_chip_and_handler(irq, &avic_chip, handle_level_irq);
+	irq_set_chip_data(irq, vic);
+	irq_set_probe(irq);
+	return 0;
+}
+
+static struct irq_domain_ops avic_dom_ops = {
+	.map = avic_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static int __init avic_of_init(struct device_node *node,
+			       struct device_node *parent)
+{
+	void __iomem *regs;
+	struct aspeed_vic *vic;
+
+	if (WARN(parent, "non-root Aspeed VIC not supported"))
+		return -EINVAL;
+	if (WARN(system_avic, "duplicate Aspeed VIC not supported"))
+		return -EINVAL;
+
+	regs = of_iomap(node, 0);
+	if (WARN_ON(!regs))
+		return -EIO;
+
+	vic = kzalloc(sizeof(struct aspeed_vic), GFP_KERNEL);
+	if (WARN_ON(!vic)) {
+		iounmap(regs);
+		return -ENOMEM;
+	}
+	vic->base = regs;
+
+	/* Initialize soures, all masked */
+	vic_init_hw(vic);
+
+	/* Ready to receive interrupts */
+	system_avic = vic;
+	set_handle_irq(avic_handle_irq);
+
+	/* Register our domain */
+	vic->dom = irq_domain_add_simple(node, NUM_IRQS, 0,
+					 &avic_dom_ops, vic);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(aspeed_new_vic, "aspeed,ast2400-vic", avic_of_init);
diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index bf9cc5f2e839..44d7c38dde47 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -52,7 +52,6 @@
 #include <linux/irqdomain.h>
 
 #include <asm/exception.h>
-#include <asm/mach/irq.h>
 
 /* Put the bank and irq (32 bits) into the hwirq */
 #define MAKE_HWIRQ(b, n)	((b << 5) | (n))
@@ -242,7 +241,7 @@ static void __exception_irq_entry bcm2835_handle_irq(
 	u32 hwirq;
 
 	while ((hwirq = get_next_armctrl_hwirq()) != ~0)
-		handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
+		handle_domain_irq(intc.domain, hwirq, regs);
 }
 
 static void bcm2836_chained_handle_irq(struct irq_desc *desc)
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index 72ff1d5c5de6..df1949c0aa23 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -180,7 +180,7 @@ __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
 	} else if (stat) {
 		u32 hwirq = ffs(stat) - 1;
 
-		handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
+		handle_domain_irq(intc.domain, hwirq, regs);
 	}
 }
 
@@ -224,8 +224,8 @@ static struct notifier_block bcm2836_arm_irqchip_cpu_notifier = {
 };
 
 #ifdef CONFIG_ARM
-int __init bcm2836_smp_boot_secondary(unsigned int cpu,
-				      struct task_struct *idle)
+static int __init bcm2836_smp_boot_secondary(unsigned int cpu,
+					     struct task_struct *idle)
 {
 	unsigned long secondary_startup_phys =
 		(unsigned long)virt_to_phys((void *)secondary_startup);
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index 61b18ab33ad9..0ec92631e23c 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -215,7 +215,7 @@ static int __init bcm7120_l2_intc_iomap_3380(struct device_node *dn,
 	return 0;
 }
 
-int __init bcm7120_l2_intc_probe(struct device_node *dn,
+static int __init bcm7120_l2_intc_probe(struct device_node *dn,
 				 struct device_node *parent,
 				 int (*iomap_regs_fn)(struct device_node *,
 					struct bcm7120_l2_intc_data *),
@@ -339,15 +339,15 @@ out_unmap:
 	return ret;
 }
 
-int __init bcm7120_l2_intc_probe_7120(struct device_node *dn,
-				      struct device_node *parent)
+static int __init bcm7120_l2_intc_probe_7120(struct device_node *dn,
+					     struct device_node *parent)
 {
 	return bcm7120_l2_intc_probe(dn, parent, bcm7120_l2_intc_iomap_7120,
 				     "BCM7120 L2");
 }
 
-int __init bcm7120_l2_intc_probe_3380(struct device_node *dn,
-				      struct device_node *parent)
+static int __init bcm7120_l2_intc_probe_3380(struct device_node *dn,
+					     struct device_node *parent)
 {
 	return bcm7120_l2_intc_probe(dn, parent, bcm7120_l2_intc_iomap_3380,
 				     "BCM3380 L2");
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index 65cd341f331a..1d4a5b46d9ae 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -112,8 +112,8 @@ static void brcmstb_l2_intc_resume(struct irq_data *d)
 	irq_gc_unlock(gc);
 }
 
-int __init brcmstb_l2_intc_of_init(struct device_node *np,
-					struct device_node *parent)
+static int __init brcmstb_l2_intc_of_init(struct device_node *np,
+					  struct device_node *parent)
 {
 	unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
 	struct brcmstb_l2_intc_data *data;
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 89e7423f0ebb..9ae71804b5dd 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -90,8 +90,8 @@ int gic_configure_irq(unsigned int irq, unsigned int type,
 	return ret;
 }
 
-void __init gic_dist_config(void __iomem *base, int gic_irqs,
-			    void (*sync_access)(void))
+void gic_dist_config(void __iomem *base, int gic_irqs,
+		     void (*sync_access)(void))
 {
 	unsigned int i;
 
diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c
new file mode 100644
index 000000000000..4cbffba3ff13
--- /dev/null
+++ b/drivers/irqchip/irq-gic-pm.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/platform_device.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+struct gic_clk_data {
+	unsigned int num_clocks;
+	const char *const *clocks;
+};
+
+static int gic_runtime_resume(struct device *dev)
+{
+	struct gic_chip_data *gic = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_clk_resume(dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * On the very first resume, the pointer to the driver data
+	 * will be NULL and this is intentional, because we do not
+	 * want to restore the GIC on the very first resume. So if
+	 * the pointer is not valid just return.
+	 */
+	if (!gic)
+		return 0;
+
+	gic_dist_restore(gic);
+	gic_cpu_restore(gic);
+
+	return 0;
+}
+
+static int gic_runtime_suspend(struct device *dev)
+{
+	struct gic_chip_data *gic = dev_get_drvdata(dev);
+
+	gic_dist_save(gic);
+	gic_cpu_save(gic);
+
+	return pm_clk_suspend(dev);
+}
+
+static int gic_get_clocks(struct device *dev, const struct gic_clk_data *data)
+{
+	struct clk *clk;
+	unsigned int i;
+	int ret;
+
+	if (!dev || !data)
+		return -EINVAL;
+
+	ret = pm_clk_create(dev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < data->num_clocks; i++) {
+		clk = of_clk_get_by_name(dev->of_node, data->clocks[i]);
+		if (IS_ERR(clk)) {
+			dev_err(dev, "failed to get clock %s\n",
+				data->clocks[i]);
+			ret = PTR_ERR(clk);
+			goto error;
+		}
+
+		ret = pm_clk_add_clk(dev, clk);
+		if (ret) {
+			dev_err(dev, "failed to add clock at index %d\n", i);
+			clk_put(clk);
+			goto error;
+		}
+	}
+
+	return 0;
+
+error:
+	pm_clk_destroy(dev);
+
+	return ret;
+}
+
+static int gic_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct gic_clk_data *data;
+	struct gic_chip_data *gic;
+	int ret, irq;
+
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data) {
+		dev_err(&pdev->dev, "no device match found\n");
+		return -ENODEV;
+	}
+
+	irq = irq_of_parse_and_map(dev->of_node, 0);
+	if (!irq) {
+		dev_err(dev, "no parent interrupt found!\n");
+		return -EINVAL;
+	}
+
+	ret = gic_get_clocks(dev, data);
+	if (ret)
+		goto irq_dispose;
+
+	pm_runtime_enable(dev);
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0)
+		goto rpm_disable;
+
+	ret = gic_of_init_child(dev, &gic, irq);
+	if (ret)
+		goto rpm_put;
+
+	platform_set_drvdata(pdev, gic);
+
+	pm_runtime_put(dev);
+
+	dev_info(dev, "GIC IRQ controller registered\n");
+
+	return 0;
+
+rpm_put:
+	pm_runtime_put_sync(dev);
+rpm_disable:
+	pm_runtime_disable(dev);
+	pm_clk_destroy(dev);
+irq_dispose:
+	irq_dispose_mapping(irq);
+
+	return ret;
+}
+
+static const struct dev_pm_ops gic_pm_ops = {
+	SET_RUNTIME_PM_OPS(gic_runtime_suspend,
+			   gic_runtime_resume, NULL)
+};
+
+static const char * const gic400_clocks[] = {
+	"clk",
+};
+
+static const struct gic_clk_data gic400_data = {
+	.num_clocks = ARRAY_SIZE(gic400_clocks),
+	.clocks = gic400_clocks,
+};
+
+static const struct of_device_id gic_match[] = {
+	{ .compatible = "nvidia,tegra210-agic",	.data = &gic400_data },
+	{},
+};
+MODULE_DEVICE_TABLE(of, gic_match);
+
+static struct platform_driver gic_driver = {
+	.probe		= gic_probe,
+	.driver		= {
+		.name	= "gic",
+		.of_match_table	= gic_match,
+		.pm	= &gic_pm_ops,
+	}
+};
+
+builtin_platform_driver(gic_driver);
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index ad0d2960b664..35eb7ac5d21f 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -24,6 +24,7 @@
 #include <linux/of_pci.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/irqchip/arm-gic.h>
 
 /*
 * MSI_TYPER:
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 5eb1f9e17a98..7ceaba81efb4 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -56,13 +56,14 @@ struct its_collection {
 };
 
 /*
- * The ITS_BASER structure - contains memory information and cached
- * value of BASER register configuration.
+ * The ITS_BASER structure - contains memory information, cached
+ * value of BASER register configuration and ITS page size.
  */
 struct its_baser {
 	void		*base;
 	u64		val;
 	u32		order;
+	u32		psz;
 };
 
 /*
@@ -824,180 +825,241 @@ static const char *its_base_type_string[] = {
 	[GITS_BASER_TYPE_RESERVED7] 	= "Reserved (7)",
 };
 
-static void its_free_tables(struct its_node *its)
+static u64 its_read_baser(struct its_node *its, struct its_baser *baser)
 {
-	int i;
+	u32 idx = baser - its->tables;
 
-	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
-		if (its->tables[i].base) {
-			free_pages((unsigned long)its->tables[i].base,
-				   its->tables[i].order);
-			its->tables[i].base = NULL;
-		}
-	}
+	return readq_relaxed(its->base + GITS_BASER + (idx << 3));
 }
 
-static int its_alloc_tables(const char *node_name, struct its_node *its)
+static void its_write_baser(struct its_node *its, struct its_baser *baser,
+			    u64 val)
 {
-	int err;
-	int i;
-	int psz = SZ_64K;
-	u64 shr = GITS_BASER_InnerShareable;
-	u64 cache;
-	u64 typer;
-	u32 ids;
+	u32 idx = baser - its->tables;
 
-	if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_22375) {
-		/*
-		 * erratum 22375: only alloc 8MB table size
-		 * erratum 24313: ignore memory access type
-		 */
-		cache	= 0;
-		ids	= 0x14;			/* 20 bits, 8MB */
-	} else {
-		cache	= GITS_BASER_WaWb;
-		typer	= readq_relaxed(its->base + GITS_TYPER);
-		ids	= GITS_TYPER_DEVBITS(typer);
+	writeq_relaxed(val, its->base + GITS_BASER + (idx << 3));
+	baser->val = its_read_baser(its, baser);
+}
+
+static int its_setup_baser(struct its_node *its, struct its_baser *baser,
+			   u64 cache, u64 shr, u32 psz, u32 order,
+			   bool indirect)
+{
+	u64 val = its_read_baser(its, baser);
+	u64 esz = GITS_BASER_ENTRY_SIZE(val);
+	u64 type = GITS_BASER_TYPE(val);
+	u32 alloc_pages;
+	void *base;
+	u64 tmp;
+
+retry_alloc_baser:
+	alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
+	if (alloc_pages > GITS_BASER_PAGES_MAX) {
+		pr_warn("ITS@%pa: %s too large, reduce ITS pages %u->%u\n",
+			&its->phys_base, its_base_type_string[type],
+			alloc_pages, GITS_BASER_PAGES_MAX);
+		alloc_pages = GITS_BASER_PAGES_MAX;
+		order = get_order(GITS_BASER_PAGES_MAX * psz);
 	}
 
-	its->device_ids = ids;
+	base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!base)
+		return -ENOMEM;
 
-	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
-		u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
-		u64 type = GITS_BASER_TYPE(val);
-		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
-		int order = get_order(psz);
-		int alloc_pages;
-		u64 tmp;
-		void *base;
+retry_baser:
+	val = (virt_to_phys(base)				 |
+		(type << GITS_BASER_TYPE_SHIFT)			 |
+		((esz - 1) << GITS_BASER_ENTRY_SIZE_SHIFT)	 |
+		((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT)	 |
+		cache						 |
+		shr						 |
+		GITS_BASER_VALID);
+
+	val |=	indirect ? GITS_BASER_INDIRECT : 0x0;
+
+	switch (psz) {
+	case SZ_4K:
+		val |= GITS_BASER_PAGE_SIZE_4K;
+		break;
+	case SZ_16K:
+		val |= GITS_BASER_PAGE_SIZE_16K;
+		break;
+	case SZ_64K:
+		val |= GITS_BASER_PAGE_SIZE_64K;
+		break;
+	}
 
-		if (type == GITS_BASER_TYPE_NONE)
-			continue;
+	its_write_baser(its, baser, val);
+	tmp = baser->val;
 
+	if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
 		/*
-		 * Allocate as many entries as required to fit the
-		 * range of device IDs that the ITS can grok... The ID
-		 * space being incredibly sparse, this results in a
-		 * massive waste of memory.
-		 *
-		 * For other tables, only allocate a single page.
+		 * Shareability didn't stick. Just use
+		 * whatever the read reported, which is likely
+		 * to be the only thing this redistributor
+		 * supports. If that's zero, make it
+		 * non-cacheable as well.
 		 */
-		if (type == GITS_BASER_TYPE_DEVICE) {
-			/*
-			 * 'order' was initialized earlier to the default page
-			 * granule of the the ITS.  We can't have an allocation
-			 * smaller than that.  If the requested allocation
-			 * is smaller, round up to the default page granule.
-			 */
-			order = max(get_order((1UL << ids) * entry_size),
-				    order);
-			if (order >= MAX_ORDER) {
-				order = MAX_ORDER - 1;
-				pr_warn("%s: Device Table too large, reduce its page order to %u\n",
-					node_name, order);
-			}
-		}
-
-retry_alloc_baser:
-		alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
-		if (alloc_pages > GITS_BASER_PAGES_MAX) {
-			alloc_pages = GITS_BASER_PAGES_MAX;
-			order = get_order(GITS_BASER_PAGES_MAX * psz);
-			pr_warn("%s: Device Table too large, reduce its page order to %u (%u pages)\n",
-				node_name, order, alloc_pages);
+		shr = tmp & GITS_BASER_SHAREABILITY_MASK;
+		if (!shr) {
+			cache = GITS_BASER_nC;
+			__flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
 		}
+		goto retry_baser;
+	}
 
-		base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-		if (!base) {
-			err = -ENOMEM;
-			goto out_free;
-		}
-
-		its->tables[i].base = base;
-		its->tables[i].order = order;
-
-retry_baser:
-		val = (virt_to_phys(base) 				 |
-		       (type << GITS_BASER_TYPE_SHIFT)			 |
-		       ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
-		       cache						 |
-		       shr						 |
-		       GITS_BASER_VALID);
+	if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
+		/*
+		 * Page size didn't stick. Let's try a smaller
+		 * size and retry. If we reach 4K, then
+		 * something is horribly wrong...
+		 */
+		free_pages((unsigned long)base, order);
+		baser->base = NULL;
 
 		switch (psz) {
-		case SZ_4K:
-			val |= GITS_BASER_PAGE_SIZE_4K;
-			break;
 		case SZ_16K:
-			val |= GITS_BASER_PAGE_SIZE_16K;
-			break;
+			psz = SZ_4K;
+			goto retry_alloc_baser;
 		case SZ_64K:
-			val |= GITS_BASER_PAGE_SIZE_64K;
-			break;
+			psz = SZ_16K;
+			goto retry_alloc_baser;
 		}
+	}
+
+	if (val != tmp) {
+		pr_err("ITS@%pa: %s doesn't stick: %lx %lx\n",
+		       &its->phys_base, its_base_type_string[type],
+		       (unsigned long) val, (unsigned long) tmp);
+		free_pages((unsigned long)base, order);
+		return -ENXIO;
+	}
 
-		val |= alloc_pages - 1;
-		its->tables[i].val = val;
+	baser->order = order;
+	baser->base = base;
+	baser->psz = psz;
+	tmp = indirect ? GITS_LVL1_ENTRY_SIZE : esz;
 
-		writeq_relaxed(val, its->base + GITS_BASER + i * 8);
-		tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
+	pr_info("ITS@%pa: allocated %d %s @%lx (%s, esz %d, psz %dK, shr %d)\n",
+		&its->phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / tmp),
+		its_base_type_string[type],
+		(unsigned long)virt_to_phys(base),
+		indirect ? "indirect" : "flat", (int)esz,
+		psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
 
-		if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
+	return 0;
+}
+
+static bool its_parse_baser_device(struct its_node *its, struct its_baser *baser,
+				   u32 psz, u32 *order)
+{
+	u64 esz = GITS_BASER_ENTRY_SIZE(its_read_baser(its, baser));
+	u64 val = GITS_BASER_InnerShareable | GITS_BASER_WaWb;
+	u32 ids = its->device_ids;
+	u32 new_order = *order;
+	bool indirect = false;
+
+	/* No need to enable Indirection if memory requirement < (psz*2)bytes */
+	if ((esz << ids) > (psz * 2)) {
+		/*
+		 * Find out whether hw supports a single or two-level table by
+		 * table by reading bit at offset '62' after writing '1' to it.
+		 */
+		its_write_baser(its, baser, val | GITS_BASER_INDIRECT);
+		indirect = !!(baser->val & GITS_BASER_INDIRECT);
+
+		if (indirect) {
 			/*
-			 * Shareability didn't stick. Just use
-			 * whatever the read reported, which is likely
-			 * to be the only thing this redistributor
-			 * supports. If that's zero, make it
-			 * non-cacheable as well.
+			 * The size of the lvl2 table is equal to ITS page size
+			 * which is 'psz'. For computing lvl1 table size,
+			 * subtract ID bits that sparse lvl2 table from 'ids'
+			 * which is reported by ITS hardware times lvl1 table
+			 * entry size.
 			 */
-			shr = tmp & GITS_BASER_SHAREABILITY_MASK;
-			if (!shr) {
-				cache = GITS_BASER_nC;
-				__flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
-			}
-			goto retry_baser;
+			ids -= ilog2(psz / esz);
+			esz = GITS_LVL1_ENTRY_SIZE;
 		}
+	}
 
-		if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
-			/*
-			 * Page size didn't stick. Let's try a smaller
-			 * size and retry. If we reach 4K, then
-			 * something is horribly wrong...
-			 */
-			free_pages((unsigned long)base, order);
-			its->tables[i].base = NULL;
+	/*
+	 * Allocate as many entries as required to fit the
+	 * range of device IDs that the ITS can grok... The ID
+	 * space being incredibly sparse, this results in a
+	 * massive waste of memory if two-level device table
+	 * feature is not supported by hardware.
+	 */
+	new_order = max_t(u32, get_order(esz << ids), new_order);
+	if (new_order >= MAX_ORDER) {
+		new_order = MAX_ORDER - 1;
+		ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / esz);
+		pr_warn("ITS@%pa: Device Table too large, reduce ids %u->%u\n",
+			&its->phys_base, its->device_ids, ids);
+	}
 
-			switch (psz) {
-			case SZ_16K:
-				psz = SZ_4K;
-				goto retry_alloc_baser;
-			case SZ_64K:
-				psz = SZ_16K;
-				goto retry_alloc_baser;
-			}
-		}
+	*order = new_order;
+
+	return indirect;
+}
 
-		if (val != tmp) {
-			pr_err("ITS: %s: GITS_BASER%d doesn't stick: %lx %lx\n",
-			       node_name, i,
-			       (unsigned long) val, (unsigned long) tmp);
-			err = -ENXIO;
-			goto out_free;
+static void its_free_tables(struct its_node *its)
+{
+	int i;
+
+	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+		if (its->tables[i].base) {
+			free_pages((unsigned long)its->tables[i].base,
+				   its->tables[i].order);
+			its->tables[i].base = NULL;
 		}
+	}
+}
+
+static int its_alloc_tables(struct its_node *its)
+{
+	u64 typer = readq_relaxed(its->base + GITS_TYPER);
+	u32 ids = GITS_TYPER_DEVBITS(typer);
+	u64 shr = GITS_BASER_InnerShareable;
+	u64 cache = GITS_BASER_WaWb;
+	u32 psz = SZ_64K;
+	int err, i;
 
-		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
-			(int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
-			its_base_type_string[type],
-			(unsigned long)virt_to_phys(base),
-			psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
+	if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_22375) {
+		/*
+		* erratum 22375: only alloc 8MB table size
+		* erratum 24313: ignore memory access type
+		*/
+		cache   = GITS_BASER_nCnB;
+		ids     = 0x14;                 /* 20 bits, 8MB */
 	}
 
-	return 0;
+	its->device_ids = ids;
 
-out_free:
-	its_free_tables(its);
+	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+		struct its_baser *baser = its->tables + i;
+		u64 val = its_read_baser(its, baser);
+		u64 type = GITS_BASER_TYPE(val);
+		u32 order = get_order(psz);
+		bool indirect = false;
 
-	return err;
+		if (type == GITS_BASER_TYPE_NONE)
+			continue;
+
+		if (type == GITS_BASER_TYPE_DEVICE)
+			indirect = its_parse_baser_device(its, baser, psz, &order);
+
+		err = its_setup_baser(its, baser, cache, shr, psz, order, indirect);
+		if (err < 0) {
+			its_free_tables(its);
+			return err;
+		}
+
+		/* Update settings which will be used for next BASERn */
+		psz = baser->psz;
+		cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
+		shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
+	}
+
+	return 0;
 }
 
 static int its_alloc_collections(struct its_node *its)
@@ -1185,10 +1247,57 @@ static struct its_baser *its_get_baser(struct its_node *its, u32 type)
 	return NULL;
 }
 
+static bool its_alloc_device_table(struct its_node *its, u32 dev_id)
+{
+	struct its_baser *baser;
+	struct page *page;
+	u32 esz, idx;
+	__le64 *table;
+
+	baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
+
+	/* Don't allow device id that exceeds ITS hardware limit */
+	if (!baser)
+		return (ilog2(dev_id) < its->device_ids);
+
+	/* Don't allow device id that exceeds single, flat table limit */
+	esz = GITS_BASER_ENTRY_SIZE(baser->val);
+	if (!(baser->val & GITS_BASER_INDIRECT))
+		return (dev_id < (PAGE_ORDER_TO_SIZE(baser->order) / esz));
+
+	/* Compute 1st level table index & check if that exceeds table limit */
+	idx = dev_id >> ilog2(baser->psz / esz);
+	if (idx >= (PAGE_ORDER_TO_SIZE(baser->order) / GITS_LVL1_ENTRY_SIZE))
+		return false;
+
+	table = baser->base;
+
+	/* Allocate memory for 2nd level table */
+	if (!table[idx]) {
+		page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(baser->psz));
+		if (!page)
+			return false;
+
+		/* Flush Lvl2 table to PoC if hw doesn't support coherency */
+		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
+			__flush_dcache_area(page_address(page), baser->psz);
+
+		table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID);
+
+		/* Flush Lvl1 entry to PoC if hw doesn't support coherency */
+		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
+			__flush_dcache_area(table + idx, GITS_LVL1_ENTRY_SIZE);
+
+		/* Ensure updated table contents are visible to ITS hardware */
+		dsb(sy);
+	}
+
+	return true;
+}
+
 static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 					    int nvecs)
 {
-	struct its_baser *baser;
 	struct its_device *dev;
 	unsigned long *lpi_map;
 	unsigned long flags;
@@ -1199,14 +1308,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 	int nr_ites;
 	int sz;
 
-	baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
-
-	/* Don't allow 'dev_id' that exceeds single, flat table limit */
-	if (baser) {
-		if (dev_id >= (PAGE_ORDER_TO_SIZE(baser->order) /
-			      GITS_BASER_ENTRY_SIZE(baser->val)))
-			return NULL;
-	} else if (ilog2(dev_id) >= its->device_ids)
+	if (!its_alloc_device_table(its, dev_id))
 		return NULL;
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1569,7 +1671,7 @@ static int __init its_probe(struct device_node *node,
 
 	its_enable_quirks(its);
 
-	err = its_alloc_tables(node->full_name, its);
+	err = its_alloc_tables(its);
 	if (err)
 		goto out_free_cmd;
 
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index fbc4ae2afd29..1de07eb5839c 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -75,7 +75,7 @@ struct gic_chip_data {
 	void __iomem *raw_dist_base;
 	void __iomem *raw_cpu_base;
 	u32 percpu_offset;
-#ifdef CONFIG_CPU_PM
+#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)
 	u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
 	u32 saved_spi_active[DIV_ROUND_UP(1020, 32)];
 	u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
@@ -449,7 +449,7 @@ static void gic_cpu_if_up(struct gic_chip_data *gic)
 }
 
 
-static void __init gic_dist_init(struct gic_chip_data *gic)
+static void gic_dist_init(struct gic_chip_data *gic)
 {
 	unsigned int i;
 	u32 cpumask;
@@ -528,14 +528,14 @@ int gic_cpu_if_down(unsigned int gic_nr)
 	return 0;
 }
 
-#ifdef CONFIG_CPU_PM
+#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)
 /*
  * Saves the GIC distributor registers during suspend or idle.  Must be called
  * with interrupts disabled but before powering down the GIC.  After calling
  * this function, no interrupts will be delivered by the GIC, and another
  * platform-specific wakeup source must be enabled.
  */
-static void gic_dist_save(struct gic_chip_data *gic)
+void gic_dist_save(struct gic_chip_data *gic)
 {
 	unsigned int gic_irqs;
 	void __iomem *dist_base;
@@ -574,7 +574,7 @@ static void gic_dist_save(struct gic_chip_data *gic)
  * handled normally, but any edge interrupts that occured will not be seen by
  * the GIC and need to be handled by the platform-specific wakeup source.
  */
-static void gic_dist_restore(struct gic_chip_data *gic)
+void gic_dist_restore(struct gic_chip_data *gic)
 {
 	unsigned int gic_irqs;
 	unsigned int i;
@@ -620,7 +620,7 @@ static void gic_dist_restore(struct gic_chip_data *gic)
 	writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL);
 }
 
-static void gic_cpu_save(struct gic_chip_data *gic)
+void gic_cpu_save(struct gic_chip_data *gic)
 {
 	int i;
 	u32 *ptr;
@@ -650,7 +650,7 @@ static void gic_cpu_save(struct gic_chip_data *gic)
 
 }
 
-static void gic_cpu_restore(struct gic_chip_data *gic)
+void gic_cpu_restore(struct gic_chip_data *gic)
 {
 	int i;
 	u32 *ptr;
@@ -727,7 +727,7 @@ static struct notifier_block gic_notifier_block = {
 	.notifier_call = gic_notifier,
 };
 
-static int __init gic_pm_init(struct gic_chip_data *gic)
+static int gic_pm_init(struct gic_chip_data *gic)
 {
 	gic->saved_ppi_enable = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
 		sizeof(u32));
@@ -757,7 +757,7 @@ free_ppi_enable:
 	return -ENOMEM;
 }
 #else
-static int __init gic_pm_init(struct gic_chip_data *gic)
+static int gic_pm_init(struct gic_chip_data *gic)
 {
 	return 0;
 }
@@ -1032,32 +1032,31 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
 	.unmap = gic_irq_domain_unmap,
 };
 
-static int __init __gic_init_bases(struct gic_chip_data *gic, int irq_start,
-				   struct fwnode_handle *handle)
+static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
+			  const char *name, bool use_eoimode1)
 {
-	irq_hw_number_t hwirq_base;
-	int gic_irqs, irq_base, i, ret;
-
-	if (WARN_ON(!gic || gic->domain))
-		return -EINVAL;
-
 	/* Initialize irq_chip */
 	gic->chip = gic_chip;
+	gic->chip.name = name;
+	gic->chip.parent_device = dev;
 
-	if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
+	if (use_eoimode1) {
 		gic->chip.irq_mask = gic_eoimode1_mask_irq;
 		gic->chip.irq_eoi = gic_eoimode1_eoi_irq;
 		gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity;
-		gic->chip.name = kasprintf(GFP_KERNEL, "GICv2");
-	} else {
-		gic->chip.name = kasprintf(GFP_KERNEL, "GIC-%d",
-					   (int)(gic - &gic_data[0]));
 	}
 
 #ifdef CONFIG_SMP
 	if (gic == &gic_data[0])
 		gic->chip.irq_set_affinity = gic_set_affinity;
 #endif
+}
+
+static int gic_init_bases(struct gic_chip_data *gic, int irq_start,
+			  struct fwnode_handle *handle)
+{
+	irq_hw_number_t hwirq_base;
+	int gic_irqs, irq_base, ret;
 
 	if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
 		/* Frankein-GIC without banked registers... */
@@ -1138,6 +1137,36 @@ static int __init __gic_init_bases(struct gic_chip_data *gic, int irq_start,
 		goto error;
 	}
 
+	gic_dist_init(gic);
+	ret = gic_cpu_init(gic);
+	if (ret)
+		goto error;
+
+	ret = gic_pm_init(gic);
+	if (ret)
+		goto error;
+
+	return 0;
+
+error:
+	if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
+		free_percpu(gic->dist_base.percpu_base);
+		free_percpu(gic->cpu_base.percpu_base);
+	}
+
+	return ret;
+}
+
+static int __init __gic_init_bases(struct gic_chip_data *gic,
+				   int irq_start,
+				   struct fwnode_handle *handle)
+{
+	char *name;
+	int i, ret;
+
+	if (WARN_ON(!gic || gic->domain))
+		return -EINVAL;
+
 	if (gic == &gic_data[0]) {
 		/*
 		 * Initialize the CPU interface map to all CPUs.
@@ -1155,24 +1184,17 @@ static int __init __gic_init_bases(struct gic_chip_data *gic, int irq_start,
 			pr_info("GIC: Using split EOI/Deactivate mode\n");
 	}
 
-	gic_dist_init(gic);
-	ret = gic_cpu_init(gic);
-	if (ret)
-		goto error;
-
-	ret = gic_pm_init(gic);
-	if (ret)
-		goto error;
-
-	return 0;
-
-error:
-	if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
-		free_percpu(gic->dist_base.percpu_base);
-		free_percpu(gic->cpu_base.percpu_base);
+	if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
+		name = kasprintf(GFP_KERNEL, "GICv2");
+		gic_init_chip(gic, NULL, name, true);
+	} else {
+		name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
+		gic_init_chip(gic, NULL, name, false);
 	}
 
-	kfree(gic->chip.name);
+	ret = gic_init_bases(gic, irq_start, handle);
+	if (ret)
+		kfree(name);
 
 	return ret;
 }
@@ -1250,7 +1272,7 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
 	return true;
 }
 
-static int __init gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
+static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
 {
 	if (!gic || !node)
 		return -EINVAL;
@@ -1274,6 +1296,34 @@ error:
 	return -ENOMEM;
 }
 
+int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
+{
+	int ret;
+
+	if (!dev || !dev->of_node || !gic || !irq)
+		return -EINVAL;
+
+	*gic = devm_kzalloc(dev, sizeof(**gic), GFP_KERNEL);
+	if (!*gic)
+		return -ENOMEM;
+
+	gic_init_chip(*gic, dev, dev->of_node->name, false);
+
+	ret = gic_of_setup(*gic, dev->of_node);
+	if (ret)
+		return ret;
+
+	ret = gic_init_bases(*gic, -1, &dev->of_node->fwnode);
+	if (ret) {
+		gic_teardown(*gic);
+		return ret;
+	}
+
+	irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic);
+
+	return 0;
+}
+
 static void __init gic_of_setup_kvm_info(struct device_node *node)
 {
 	int ret;
@@ -1353,7 +1403,11 @@ IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init);
 IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
 IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
 IRQCHIP_DECLARE(pl390, "arm,pl390", gic_of_init);
-
+#else
+int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
+{
+	return -ENOTSUPP;
+}
 #endif
 
 #ifdef CONFIG_ACPI
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 3b5e10aa48ab..3786d0f21972 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -718,7 +718,7 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
 
 	spin_lock_irqsave(&gic_lock, flags);
 	gic_map_to_pin(intr, gic_cpu_pin);
-	gic_map_to_vpe(intr, vpe);
+	gic_map_to_vpe(intr, mips_cm_vp_id(vpe));
 	for (i = 0; i < min(gic_vpes, NR_CPUS); i++)
 		clear_bit(intr, pcpu_masks[i].pcpu_mask);
 	set_bit(intr, pcpu_masks[vpe].pcpu_mask);
@@ -746,6 +746,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
 		/* verify that it doesn't conflict with an IPI irq */
 		if (test_bit(spec->hwirq, ipi_resrv))
 			return -EBUSY;
+
+		hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
+
+		return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+						     &gic_level_irq_controller,
+						     NULL);
 	} else {
 		base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
 		if (base_hwirq == gic_shared_intrs) {
@@ -867,10 +873,14 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq,
 						    &gic_level_irq_controller,
 						    NULL);
 		if (ret)
-			return ret;
+			goto error;
 	}
 
 	return 0;
+
+error:
+	irq_domain_free_irqs_parent(d, virq, nr_irqs);
+	return ret;
 }
 
 void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
@@ -949,7 +959,7 @@ int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node,
 	switch (bus_token) {
 	case DOMAIN_BUS_IPI:
 		is_ipi = d->bus_token == bus_token;
-		return to_of_node(d->fwnode) == node && is_ipi;
+		return (!node || to_of_node(d->fwnode) == node) && is_ipi;
 		break;
 	default:
 		return 0;
@@ -1032,12 +1042,14 @@ static void __init __gic_init(unsigned long gic_base_addr,
 					       &gic_irq_domain_ops, NULL);
 	if (!gic_irq_domain)
 		panic("Failed to add GIC IRQ domain");
+	gic_irq_domain->name = "mips-gic-irq";
 
 	gic_dev_domain = irq_domain_add_hierarchy(gic_irq_domain, 0,
 						  GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
 						  node, &gic_dev_domain_ops, NULL);
 	if (!gic_dev_domain)
 		panic("Failed to add GIC DEV domain");
+	gic_dev_domain->name = "mips-gic-dev";
 
 	gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
 						  IRQ_DOMAIN_FLAG_IPI_PER_CPU,
@@ -1046,6 +1058,7 @@ static void __init __gic_init(unsigned long gic_base_addr,
 	if (!gic_ipi_domain)
 		panic("Failed to add GIC IPI domain");
 
+	gic_ipi_domain->name = "mips-gic-ipi";
 	gic_ipi_domain->bus_token = DOMAIN_BUS_IPI;
 
 	if (node &&
diff --git a/drivers/irqchip/irq-omap-intc.c b/drivers/irqchip/irq-omap-intc.c
index 9d1bcfc33e4c..b04a8ac6e744 100644
--- a/drivers/irqchip/irq-omap-intc.c
+++ b/drivers/irqchip/irq-omap-intc.c
@@ -23,6 +23,8 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 
+#include <linux/irqchip/irq-omap-intc.h>
+
 /* Define these here for now until we drop all board-files */
 #define OMAP24XX_IC_BASE	0x480fe000
 #define OMAP34XX_IC_BASE	0x48200000
diff --git a/drivers/irqchip/irq-s3c24xx.c b/drivers/irqchip/irq-s3c24xx.c
index 5dc5a760c723..c25ce5af091a 100644
--- a/drivers/irqchip/irq-s3c24xx.c
+++ b/drivers/irqchip/irq-s3c24xx.c
@@ -92,9 +92,9 @@ static void s3c_irq_mask(struct irq_data *data)
 	unsigned long mask;
 	unsigned int irqno;
 
-	mask = __raw_readl(intc->reg_mask);
+	mask = readl_relaxed(intc->reg_mask);
 	mask |= (1UL << irq_data->offset);
-	__raw_writel(mask, intc->reg_mask);
+	writel_relaxed(mask, intc->reg_mask);
 
 	if (parent_intc) {
 		parent_data = &parent_intc->irqs[irq_data->parent_irq];
@@ -119,9 +119,9 @@ static void s3c_irq_unmask(struct irq_data *data)
 	unsigned long mask;
 	unsigned int irqno;
 
-	mask = __raw_readl(intc->reg_mask);
+	mask = readl_relaxed(intc->reg_mask);
 	mask &= ~(1UL << irq_data->offset);
-	__raw_writel(mask, intc->reg_mask);
+	writel_relaxed(mask, intc->reg_mask);
 
 	if (parent_intc) {
 		irqno = irq_find_mapping(parent_intc->domain,
@@ -136,9 +136,9 @@ static inline void s3c_irq_ack(struct irq_data *data)
 	struct s3c_irq_intc *intc = irq_data->intc;
 	unsigned long bitval = 1UL << irq_data->offset;
 
-	__raw_writel(bitval, intc->reg_pending);
+	writel_relaxed(bitval, intc->reg_pending);
 	if (intc->reg_intpnd)
-		__raw_writel(bitval, intc->reg_intpnd);
+		writel_relaxed(bitval, intc->reg_intpnd);
 }
 
 static int s3c_irq_type(struct irq_data *data, unsigned int type)
@@ -172,9 +172,9 @@ static int s3c_irqext_type_set(void __iomem *gpcon_reg,
 	unsigned long newvalue = 0, value;
 
 	/* Set the GPIO to external interrupt mode */
-	value = __raw_readl(gpcon_reg);
+	value = readl_relaxed(gpcon_reg);
 	value = (value & ~(3 << gpcon_offset)) | (0x02 << gpcon_offset);
-	__raw_writel(value, gpcon_reg);
+	writel_relaxed(value, gpcon_reg);
 
 	/* Set the external interrupt to pointed trigger type */
 	switch (type)
@@ -208,9 +208,9 @@ static int s3c_irqext_type_set(void __iomem *gpcon_reg,
 			return -EINVAL;
 	}
 
-	value = __raw_readl(extint_reg);
+	value = readl_relaxed(extint_reg);
 	value = (value & ~(7 << extint_offset)) | (newvalue << extint_offset);
-	__raw_writel(value, extint_reg);
+	writel_relaxed(value, extint_reg);
 
 	return 0;
 }
@@ -315,8 +315,8 @@ static void s3c_irq_demux(struct irq_desc *desc)
 
 	chained_irq_enter(chip, desc);
 
-	src = __raw_readl(sub_intc->reg_pending);
-	msk = __raw_readl(sub_intc->reg_mask);
+	src = readl_relaxed(sub_intc->reg_pending);
+	msk = readl_relaxed(sub_intc->reg_mask);
 
 	src &= ~msk;
 	src &= irq_data->sub_bits;
@@ -337,7 +337,7 @@ static inline int s3c24xx_handle_intc(struct s3c_irq_intc *intc,
 	int pnd;
 	int offset;
 
-	pnd = __raw_readl(intc->reg_intpnd);
+	pnd = readl_relaxed(intc->reg_intpnd);
 	if (!pnd)
 		return false;
 
@@ -352,7 +352,7 @@ static inline int s3c24xx_handle_intc(struct s3c_irq_intc *intc,
 	 *
 	 * Thanks to Klaus, Shannon, et al for helping to debug this problem
 	 */
-	offset = __raw_readl(intc->reg_intpnd + 4);
+	offset = readl_relaxed(intc->reg_intpnd + 4);
 
 	/* Find the bit manually, when the offset is wrong.
 	 * The pending register only ever contains the one bit of the next
@@ -406,7 +406,7 @@ int s3c24xx_set_fiq(unsigned int irq, bool on)
 		intmod = 0;
 	}
 
-	__raw_writel(intmod, S3C2410_INTMOD);
+	writel_relaxed(intmod, S3C2410_INTMOD);
 	return 0;
 }
 
@@ -508,14 +508,14 @@ static void s3c24xx_clear_intc(struct s3c_irq_intc *intc)
 
 	last = 0;
 	for (i = 0; i < 4; i++) {
-		pend = __raw_readl(reg_source);
+		pend = readl_relaxed(reg_source);
 
 		if (pend == 0 || pend == last)
 			break;
 
-		__raw_writel(pend, intc->reg_pending);
+		writel_relaxed(pend, intc->reg_pending);
 		if (intc->reg_intpnd)
-			__raw_writel(pend, intc->reg_intpnd);
+			writel_relaxed(pend, intc->reg_intpnd);
 
 		pr_info("irq: clearing pending status %08x\n", (int)pend);
 		last = pend;
diff --git a/drivers/irqchip/irq-sirfsoc.c b/drivers/irqchip/irq-sirfsoc.c
index 10cb21b9ba3d..e1336848affa 100644
--- a/drivers/irqchip/irq-sirfsoc.c
+++ b/drivers/irqchip/irq-sirfsoc.c
@@ -29,6 +29,11 @@
 
 static struct irq_domain *sirfsoc_irqdomain;
 
+static void __iomem *sirfsoc_irq_get_regbase(void)
+{
+	return (void __iomem __force *)sirfsoc_irqdomain->host_data;
+}
+
 static __init void sirfsoc_alloc_gc(void __iomem *base)
 {
 	unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
@@ -53,7 +58,7 @@ static __init void sirfsoc_alloc_gc(void __iomem *base)
 
 static void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs)
 {
-	void __iomem *base = sirfsoc_irqdomain->host_data;
+	void __iomem *base = sirfsoc_irq_get_regbase();
 	u32 irqstat;
 
 	irqstat = readl_relaxed(base + SIRFSOC_INIT_IRQ_ID);
@@ -94,7 +99,7 @@ static struct sirfsoc_irq_status sirfsoc_irq_st;
 
 static int sirfsoc_irq_suspend(void)
 {
-	void __iomem *base = sirfsoc_irqdomain->host_data;
+	void __iomem *base = sirfsoc_irq_get_regbase();
 
 	sirfsoc_irq_st.mask0 = readl_relaxed(base + SIRFSOC_INT_RISC_MASK0);
 	sirfsoc_irq_st.mask1 = readl_relaxed(base + SIRFSOC_INT_RISC_MASK1);
@@ -106,7 +111,7 @@ static int sirfsoc_irq_suspend(void)
 
 static void sirfsoc_irq_resume(void)
 {
-	void __iomem *base = sirfsoc_irqdomain->host_data;
+	void __iomem *base = sirfsoc_irq_get_regbase();
 
 	writel_relaxed(sirfsoc_irq_st.mask0, base + SIRFSOC_INT_RISC_MASK0);
 	writel_relaxed(sirfsoc_irq_st.mask1, base + SIRFSOC_INT_RISC_MASK1);
diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c
index e902f081e16c..3973a14bb15b 100644
--- a/drivers/irqchip/irq-tegra.c
+++ b/drivers/irqchip/irq-tegra.c
@@ -90,7 +90,7 @@ static struct tegra_ictlr_info *lic;
 
 static inline void tegra_ictlr_write_mask(struct irq_data *d, unsigned long reg)
 {
-	void __iomem *base = d->chip_data;
+	void __iomem *base = (void __iomem __force *)d->chip_data;
 	u32 mask;
 
 	mask = BIT(d->hwirq % 32);
@@ -266,7 +266,7 @@ static int tegra_ictlr_domain_alloc(struct irq_domain *domain,
 
 		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
 					      &tegra_ictlr_chip,
-					      info->base[ictlr]);
+					      (void __force *)info->base[ictlr]);
 	}
 
 	parent_fwspec = *fwspec;
diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
index b956dfffe78c..f811a7de5857 100644
--- a/drivers/irqchip/irq-vic.c
+++ b/drivers/irqchip/irq-vic.c
@@ -167,7 +167,7 @@ static int vic_suspend(void)
 	return 0;
 }
 
-struct syscore_ops vic_syscore_ops = {
+static struct syscore_ops vic_syscore_ops = {
 	.suspend	= vic_suspend,
 	.resume		= vic_resume,
 };
@@ -517,7 +517,8 @@ int __init vic_init_cascaded(void __iomem *base, unsigned int parent_irq,
 EXPORT_SYMBOL_GPL(vic_init_cascaded);
 
 #ifdef CONFIG_OF
-int __init vic_of_init(struct device_node *node, struct device_node *parent)
+static int __init vic_of_init(struct device_node *node,
+			      struct device_node *parent)
 {
 	void __iomem *regs;
 	u32 interrupt_mask = ~0;
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index a2e0ed6c9a4d..32f34511c416 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -445,32 +445,32 @@ void divasa_unmap_pci_bar(void __iomem *bar)
 /*********************************************************
  ** I/O port access
  *********************************************************/
-byte __inline__ inpp(void __iomem *addr)
+inline byte inpp(void __iomem *addr)
 {
 	return (inb((unsigned long) addr));
 }
 
-word __inline__ inppw(void __iomem *addr)
+inline word inppw(void __iomem *addr)
 {
 	return (inw((unsigned long) addr));
 }
 
-void __inline__ inppw_buffer(void __iomem *addr, void *P, int length)
+inline void inppw_buffer(void __iomem *addr, void *P, int length)
 {
 	insw((unsigned long) addr, (word *) P, length >> 1);
 }
 
-void __inline__ outppw_buffer(void __iomem *addr, void *P, int length)
+inline void outppw_buffer(void __iomem *addr, void *P, int length)
 {
 	outsw((unsigned long) addr, (word *) P, length >> 1);
 }
 
-void __inline__ outppw(void __iomem *addr, word w)
+inline void outppw(void __iomem *addr, word w)
 {
 	outw(w, (unsigned long) addr);
 }
 
-void __inline__ outpp(void __iomem *addr, word p)
+inline void outpp(void __iomem *addr, word p)
 {
 	outb(p, (unsigned long) addr);
 }
diff --git a/drivers/isdn/hardware/eicon/platform.h b/drivers/isdn/hardware/eicon/platform.h
index b2edb7590dda..62e2073c3690 100644
--- a/drivers/isdn/hardware/eicon/platform.h
+++ b/drivers/isdn/hardware/eicon/platform.h
@@ -203,7 +203,7 @@ void PCIread(byte bus, byte func, int offset, void *data, int length, void *pci_
 /*
 **  I/O Port utilities
 */
-int diva_os_register_io_port(void *adapter, int register, unsigned long port,
+int diva_os_register_io_port(void *adapter, int reg, unsigned long port,
 			     unsigned long length, const char *name, int id);
 /*
 **  I/O port access abstraction
@@ -271,13 +271,13 @@ void diva_os_get_time(dword *sec, dword *usec);
 **  atomic operation, fake because we use threads
 */
 typedef int diva_os_atomic_t;
-static diva_os_atomic_t __inline__
+static inline diva_os_atomic_t
 diva_os_atomic_increment(diva_os_atomic_t *pv)
 {
 	*pv += 1;
 	return (*pv);
 }
-static diva_os_atomic_t __inline__
+static inline diva_os_atomic_t
 diva_os_atomic_decrement(diva_os_atomic_t *pv)
 {
 	*pv -= 1;
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 85a339030e4b..61c68a1f054a 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -27,11 +27,13 @@ config NVM_DEBUG
 	It is required to create/remove targets without IOCTLs.
 
 config NVM_GENNVM
-	tristate "Generic NVM manager for Open-Channel SSDs"
+	tristate "General Non-Volatile Memory Manager for Open-Channel SSDs"
 	---help---
-	NVM media manager for Open-Channel SSDs that offload management
-	functionality to device, while keeping data placement and garbage
-	collection decisions on the host.
+	Non-volatile memory media manager for Open-Channel SSDs that implements
+	physical media metadata management and block provisioning API.
+
+	This is the standard media manager for using Open-Channel SSDs, and
+	required for targets to be instantiated.
 
 config NVM_RRPC
 	tristate "Round-robin Hybrid Open-Channel SSD target"
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 160c1a6838e1..9ebd2cfbd849 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -18,8 +18,6 @@
  *
  */
 
-#include <linux/blkdev.h>
-#include <linux/blk-mq.h>
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/sem.h>
@@ -28,46 +26,42 @@
 #include <linux/miscdevice.h>
 #include <linux/lightnvm.h>
 #include <linux/sched/sysctl.h>
-#include <uapi/linux/lightnvm.h>
 
 static LIST_HEAD(nvm_tgt_types);
+static DECLARE_RWSEM(nvm_tgtt_lock);
 static LIST_HEAD(nvm_mgrs);
 static LIST_HEAD(nvm_devices);
-static LIST_HEAD(nvm_targets);
 static DECLARE_RWSEM(nvm_lock);
 
-static struct nvm_target *nvm_find_target(const char *name)
+struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock)
 {
-	struct nvm_target *tgt;
+	struct nvm_tgt_type *tmp, *tt = NULL;
 
-	list_for_each_entry(tgt, &nvm_targets, list)
-		if (!strcmp(name, tgt->disk->disk_name))
-			return tgt;
+	if (lock)
+		down_write(&nvm_tgtt_lock);
 
-	return NULL;
-}
-
-static struct nvm_tgt_type *nvm_find_target_type(const char *name)
-{
-	struct nvm_tgt_type *tt;
-
-	list_for_each_entry(tt, &nvm_tgt_types, list)
-		if (!strcmp(name, tt->name))
-			return tt;
+	list_for_each_entry(tmp, &nvm_tgt_types, list)
+		if (!strcmp(name, tmp->name)) {
+			tt = tmp;
+			break;
+		}
 
-	return NULL;
+	if (lock)
+		up_write(&nvm_tgtt_lock);
+	return tt;
 }
+EXPORT_SYMBOL(nvm_find_target_type);
 
 int nvm_register_tgt_type(struct nvm_tgt_type *tt)
 {
 	int ret = 0;
 
-	down_write(&nvm_lock);
-	if (nvm_find_target_type(tt->name))
+	down_write(&nvm_tgtt_lock);
+	if (nvm_find_target_type(tt->name, 0))
 		ret = -EEXIST;
 	else
 		list_add(&tt->list, &nvm_tgt_types);
-	up_write(&nvm_lock);
+	up_write(&nvm_tgtt_lock);
 
 	return ret;
 }
@@ -110,7 +104,7 @@ static struct nvmm_type *nvm_find_mgr_type(const char *name)
 	return NULL;
 }
 
-struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev)
+static struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev)
 {
 	struct nvmm_type *mt;
 	int ret;
@@ -182,20 +176,6 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
 	return NULL;
 }
 
-struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *dev, struct nvm_lun *lun,
-							unsigned long flags)
-{
-	return dev->mt->get_blk_unlocked(dev, lun, flags);
-}
-EXPORT_SYMBOL(nvm_get_blk_unlocked);
-
-/* Assumes that all valid pages have already been moved on release to bm */
-void nvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk)
-{
-	return dev->mt->put_blk_unlocked(dev, blk);
-}
-EXPORT_SYMBOL(nvm_put_blk_unlocked);
-
 struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun,
 							unsigned long flags)
 {
@@ -210,6 +190,12 @@ void nvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
 }
 EXPORT_SYMBOL(nvm_put_blk);
 
+void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
+{
+	return dev->mt->mark_blk(dev, ppa, type);
+}
+EXPORT_SYMBOL(nvm_mark_blk);
+
 int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
 	return dev->mt->submit_io(dev, rqd);
@@ -251,9 +237,10 @@ void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
 EXPORT_SYMBOL(nvm_generic_to_addr_mode);
 
 int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
-				struct ppa_addr *ppas, int nr_ppas, int vblk)
+			const struct ppa_addr *ppas, int nr_ppas, int vblk)
 {
 	int i, plane_cnt, pl_idx;
+	struct ppa_addr ppa;
 
 	if ((!vblk || dev->plane_mode == NVM_PLANE_SINGLE) && nr_ppas == 1) {
 		rqd->nr_ppas = nr_ppas;
@@ -278,8 +265,9 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
 
 		for (i = 0; i < nr_ppas; i++) {
 			for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
-				ppas[i].g.pl = pl_idx;
-				rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppas[i];
+				ppa = ppas[i];
+				ppa.g.pl = pl_idx;
+				rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa;
 			}
 		}
 	}
@@ -337,7 +325,7 @@ static void nvm_end_io_sync(struct nvm_rq *rqd)
 	complete(waiting);
 }
 
-int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode,
+static int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode,
 						int flags, void *buf, int len)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
@@ -367,7 +355,9 @@ int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode,
 	/* Prevent hang_check timer from firing at us during very long I/O */
 	hang_check = sysctl_hung_task_timeout_secs;
 	if (hang_check)
-		while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
+		while (!wait_for_completion_io_timeout(&wait,
+							hang_check * (HZ/2)))
+			;
 	else
 		wait_for_completion_io(&wait);
 
@@ -510,7 +500,8 @@ static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
 	/* The lower page table encoding consists of a list of bytes, where each
 	 * has a lower and an upper half. The first half byte maintains the
 	 * increment value and every value after is an offset added to the
-	 * previous incrementation value */
+	 * previous incrementation value
+	 */
 	dev->lptbl[0] = mlc->pairs[0] & 0xF;
 	for (i = 1; i < dev->lps_per_blk; i++) {
 		p = mlc->pairs[i >> 1];
@@ -596,42 +587,11 @@ err_fmtype:
 	return ret;
 }
 
-static void nvm_remove_target(struct nvm_target *t)
-{
-	struct nvm_tgt_type *tt = t->type;
-	struct gendisk *tdisk = t->disk;
-	struct request_queue *q = tdisk->queue;
-
-	lockdep_assert_held(&nvm_lock);
-
-	del_gendisk(tdisk);
-	blk_cleanup_queue(q);
-
-	if (tt->exit)
-		tt->exit(tdisk->private_data);
-
-	put_disk(tdisk);
-
-	list_del(&t->list);
-	kfree(t);
-}
-
 static void nvm_free_mgr(struct nvm_dev *dev)
 {
-	struct nvm_target *tgt, *tmp;
-
 	if (!dev->mt)
 		return;
 
-	down_write(&nvm_lock);
-	list_for_each_entry_safe(tgt, tmp, &nvm_targets, list) {
-		if (tgt->dev != dev)
-			continue;
-
-		nvm_remove_target(tgt);
-	}
-	up_write(&nvm_lock);
-
 	dev->mt->unregister_mgr(dev);
 	dev->mt = NULL;
 }
@@ -778,91 +738,6 @@ void nvm_unregister(char *disk_name)
 }
 EXPORT_SYMBOL(nvm_unregister);
 
-static const struct block_device_operations nvm_fops = {
-	.owner		= THIS_MODULE,
-};
-
-static int nvm_create_target(struct nvm_dev *dev,
-						struct nvm_ioctl_create *create)
-{
-	struct nvm_ioctl_create_simple *s = &create->conf.s;
-	struct request_queue *tqueue;
-	struct gendisk *tdisk;
-	struct nvm_tgt_type *tt;
-	struct nvm_target *t;
-	void *targetdata;
-
-	if (!dev->mt) {
-		pr_info("nvm: device has no media manager registered.\n");
-		return -ENODEV;
-	}
-
-	down_write(&nvm_lock);
-	tt = nvm_find_target_type(create->tgttype);
-	if (!tt) {
-		pr_err("nvm: target type %s not found\n", create->tgttype);
-		up_write(&nvm_lock);
-		return -EINVAL;
-	}
-
-	t = nvm_find_target(create->tgtname);
-	if (t) {
-		pr_err("nvm: target name already exists.\n");
-		up_write(&nvm_lock);
-		return -EINVAL;
-	}
-	up_write(&nvm_lock);
-
-	t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
-	if (!t)
-		return -ENOMEM;
-
-	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
-	if (!tqueue)
-		goto err_t;
-	blk_queue_make_request(tqueue, tt->make_rq);
-
-	tdisk = alloc_disk(0);
-	if (!tdisk)
-		goto err_queue;
-
-	sprintf(tdisk->disk_name, "%s", create->tgtname);
-	tdisk->flags = GENHD_FL_EXT_DEVT;
-	tdisk->major = 0;
-	tdisk->first_minor = 0;
-	tdisk->fops = &nvm_fops;
-	tdisk->queue = tqueue;
-
-	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
-	if (IS_ERR(targetdata))
-		goto err_init;
-
-	tdisk->private_data = targetdata;
-	tqueue->queuedata = targetdata;
-
-	blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
-
-	set_capacity(tdisk, tt->capacity(targetdata));
-	add_disk(tdisk);
-
-	t->type = tt;
-	t->disk = tdisk;
-	t->dev = dev;
-
-	down_write(&nvm_lock);
-	list_add_tail(&t->list, &nvm_targets);
-	up_write(&nvm_lock);
-
-	return 0;
-err_init:
-	put_disk(tdisk);
-err_queue:
-	blk_cleanup_queue(tqueue);
-err_t:
-	kfree(t);
-	return -ENOMEM;
-}
-
 static int __nvm_configure_create(struct nvm_ioctl_create *create)
 {
 	struct nvm_dev *dev;
@@ -871,11 +746,17 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
 	down_write(&nvm_lock);
 	dev = nvm_find_nvm_dev(create->dev);
 	up_write(&nvm_lock);
+
 	if (!dev) {
 		pr_err("nvm: device not found\n");
 		return -EINVAL;
 	}
 
+	if (!dev->mt) {
+		pr_info("nvm: device has no media manager registered.\n");
+		return -ENODEV;
+	}
+
 	if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) {
 		pr_err("nvm: config type not valid\n");
 		return -EINVAL;
@@ -888,25 +769,7 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
 		return -EINVAL;
 	}
 
-	return nvm_create_target(dev, create);
-}
-
-static int __nvm_configure_remove(struct nvm_ioctl_remove *remove)
-{
-	struct nvm_target *t;
-
-	down_write(&nvm_lock);
-	t = nvm_find_target(remove->tgtname);
-	if (!t) {
-		pr_err("nvm: target \"%s\" doesn't exist.\n", remove->tgtname);
-		up_write(&nvm_lock);
-		return -EINVAL;
-	}
-
-	nvm_remove_target(t);
-	up_write(&nvm_lock);
-
-	return 0;
+	return dev->mt->create_tgt(dev, create);
 }
 
 #ifdef CONFIG_NVM_DEBUG
@@ -941,8 +804,9 @@ static int nvm_configure_show(const char *val)
 static int nvm_configure_remove(const char *val)
 {
 	struct nvm_ioctl_remove remove;
+	struct nvm_dev *dev;
 	char opcode;
-	int ret;
+	int ret = 0;
 
 	ret = sscanf(val, "%c %256s", &opcode, remove.tgtname);
 	if (ret != 2) {
@@ -952,7 +816,13 @@ static int nvm_configure_remove(const char *val)
 
 	remove.flags = 0;
 
-	return __nvm_configure_remove(&remove);
+	list_for_each_entry(dev, &nvm_devices, devices) {
+		ret = dev->mt->remove_tgt(dev, &remove);
+		if (!ret)
+			break;
+	}
+
+	return ret;
 }
 
 static int nvm_configure_create(const char *val)
@@ -1149,6 +1019,8 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
 static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
 {
 	struct nvm_ioctl_remove remove;
+	struct nvm_dev *dev;
+	int ret = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1163,7 +1035,13 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
 		return -EINVAL;
 	}
 
-	return __nvm_configure_remove(&remove);
+	list_for_each_entry(dev, &nvm_devices, devices) {
+		ret = dev->mt->remove_tgt(dev, &remove);
+		if (!ret)
+			break;
+	}
+
+	return ret;
 }
 
 static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info)
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index ec9fb6876e38..b74174c6d021 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -15,22 +15,160 @@
  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
  * USA.
  *
- * Implementation of a generic nvm manager for Open-Channel SSDs.
+ * Implementation of a general nvm manager for Open-Channel SSDs.
  */
 
 #include "gennvm.h"
 
-static int gennvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
+static struct nvm_target *gen_find_target(struct gen_dev *gn, const char *name)
 {
-	struct gen_nvm *gn = dev->mp;
-	struct gennvm_area *area, *prev, *next;
+	struct nvm_target *tgt;
+
+	list_for_each_entry(tgt, &gn->targets, list)
+		if (!strcmp(name, tgt->disk->disk_name))
+			return tgt;
+
+	return NULL;
+}
+
+static const struct block_device_operations gen_fops = {
+	.owner		= THIS_MODULE,
+};
+
+static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
+{
+	struct gen_dev *gn = dev->mp;
+	struct nvm_ioctl_create_simple *s = &create->conf.s;
+	struct request_queue *tqueue;
+	struct gendisk *tdisk;
+	struct nvm_tgt_type *tt;
+	struct nvm_target *t;
+	void *targetdata;
+
+	tt = nvm_find_target_type(create->tgttype, 1);
+	if (!tt) {
+		pr_err("nvm: target type %s not found\n", create->tgttype);
+		return -EINVAL;
+	}
+
+	mutex_lock(&gn->lock);
+	t = gen_find_target(gn, create->tgtname);
+	if (t) {
+		pr_err("nvm: target name already exists.\n");
+		mutex_unlock(&gn->lock);
+		return -EINVAL;
+	}
+	mutex_unlock(&gn->lock);
+
+	t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
+	if (!t)
+		return -ENOMEM;
+
+	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+	if (!tqueue)
+		goto err_t;
+	blk_queue_make_request(tqueue, tt->make_rq);
+
+	tdisk = alloc_disk(0);
+	if (!tdisk)
+		goto err_queue;
+
+	sprintf(tdisk->disk_name, "%s", create->tgtname);
+	tdisk->flags = GENHD_FL_EXT_DEVT;
+	tdisk->major = 0;
+	tdisk->first_minor = 0;
+	tdisk->fops = &gen_fops;
+	tdisk->queue = tqueue;
+
+	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
+	if (IS_ERR(targetdata))
+		goto err_init;
+
+	tdisk->private_data = targetdata;
+	tqueue->queuedata = targetdata;
+
+	blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
+
+	set_capacity(tdisk, tt->capacity(targetdata));
+	add_disk(tdisk);
+
+	t->type = tt;
+	t->disk = tdisk;
+	t->dev = dev;
+
+	mutex_lock(&gn->lock);
+	list_add_tail(&t->list, &gn->targets);
+	mutex_unlock(&gn->lock);
+
+	return 0;
+err_init:
+	put_disk(tdisk);
+err_queue:
+	blk_cleanup_queue(tqueue);
+err_t:
+	kfree(t);
+	return -ENOMEM;
+}
+
+static void __gen_remove_target(struct nvm_target *t)
+{
+	struct nvm_tgt_type *tt = t->type;
+	struct gendisk *tdisk = t->disk;
+	struct request_queue *q = tdisk->queue;
+
+	del_gendisk(tdisk);
+	blk_cleanup_queue(q);
+
+	if (tt->exit)
+		tt->exit(tdisk->private_data);
+
+	put_disk(tdisk);
+
+	list_del(&t->list);
+	kfree(t);
+}
+
+/**
+ * gen_remove_tgt - Removes a target from the media manager
+ * @dev:	device
+ * @remove:	ioctl structure with target name to remove.
+ *
+ * Returns:
+ * 0: on success
+ * 1: on not found
+ * <0: on error
+ */
+static int gen_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
+{
+	struct gen_dev *gn = dev->mp;
+	struct nvm_target *t;
+
+	if (!gn)
+		return 1;
+
+	mutex_lock(&gn->lock);
+	t = gen_find_target(gn, remove->tgtname);
+	if (!t) {
+		mutex_unlock(&gn->lock);
+		return 1;
+	}
+	__gen_remove_target(t);
+	mutex_unlock(&gn->lock);
+
+	return 0;
+}
+
+static int gen_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
+{
+	struct gen_dev *gn = dev->mp;
+	struct gen_area *area, *prev, *next;
 	sector_t begin = 0;
 	sector_t max_sectors = (dev->sec_size * dev->total_secs) >> 9;
 
 	if (len > max_sectors)
 		return -EINVAL;
 
-	area = kmalloc(sizeof(struct gennvm_area), GFP_KERNEL);
+	area = kmalloc(sizeof(struct gen_area), GFP_KERNEL);
 	if (!area)
 		return -ENOMEM;
 
@@ -64,10 +202,10 @@ static int gennvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
 	return 0;
 }
 
-static void gennvm_put_area(struct nvm_dev *dev, sector_t begin)
+static void gen_put_area(struct nvm_dev *dev, sector_t begin)
 {
-	struct gen_nvm *gn = dev->mp;
-	struct gennvm_area *area;
+	struct gen_dev *gn = dev->mp;
+	struct gen_area *area;
 
 	spin_lock(&dev->lock);
 	list_for_each_entry(area, &gn->area_list, list) {
@@ -82,27 +220,27 @@ static void gennvm_put_area(struct nvm_dev *dev, sector_t begin)
 	spin_unlock(&dev->lock);
 }
 
-static void gennvm_blocks_free(struct nvm_dev *dev)
+static void gen_blocks_free(struct nvm_dev *dev)
 {
-	struct gen_nvm *gn = dev->mp;
+	struct gen_dev *gn = dev->mp;
 	struct gen_lun *lun;
 	int i;
 
-	gennvm_for_each_lun(gn, lun, i) {
+	gen_for_each_lun(gn, lun, i) {
 		if (!lun->vlun.blocks)
 			break;
 		vfree(lun->vlun.blocks);
 	}
 }
 
-static void gennvm_luns_free(struct nvm_dev *dev)
+static void gen_luns_free(struct nvm_dev *dev)
 {
-	struct gen_nvm *gn = dev->mp;
+	struct gen_dev *gn = dev->mp;
 
 	kfree(gn->luns);
 }
 
-static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
+static int gen_luns_init(struct nvm_dev *dev, struct gen_dev *gn)
 {
 	struct gen_lun *lun;
 	int i;
@@ -111,7 +249,7 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
 	if (!gn->luns)
 		return -ENOMEM;
 
-	gennvm_for_each_lun(gn, lun, i) {
+	gen_for_each_lun(gn, lun, i) {
 		spin_lock_init(&lun->vlun.lock);
 		INIT_LIST_HEAD(&lun->free_list);
 		INIT_LIST_HEAD(&lun->used_list);
@@ -122,14 +260,11 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
 		lun->vlun.lun_id = i % dev->luns_per_chnl;
 		lun->vlun.chnl_id = i / dev->luns_per_chnl;
 		lun->vlun.nr_free_blocks = dev->blks_per_lun;
-		lun->vlun.nr_open_blocks = 0;
-		lun->vlun.nr_closed_blocks = 0;
-		lun->vlun.nr_bad_blocks = 0;
 	}
 	return 0;
 }
 
-static int gennvm_block_bb(struct gen_nvm *gn, struct ppa_addr ppa,
+static int gen_block_bb(struct gen_dev *gn, struct ppa_addr ppa,
 							u8 *blks, int nr_blks)
 {
 	struct nvm_dev *dev = gn->dev;
@@ -149,17 +284,16 @@ static int gennvm_block_bb(struct gen_nvm *gn, struct ppa_addr ppa,
 
 		blk = &lun->vlun.blocks[i];
 		list_move_tail(&blk->list, &lun->bb_list);
-		lun->vlun.nr_bad_blocks++;
 		lun->vlun.nr_free_blocks--;
 	}
 
 	return 0;
 }
 
-static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
+static int gen_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 {
 	struct nvm_dev *dev = private;
-	struct gen_nvm *gn = dev->mp;
+	struct gen_dev *gn = dev->mp;
 	u64 elba = slba + nlb;
 	struct gen_lun *lun;
 	struct nvm_block *blk;
@@ -167,7 +301,7 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 	int lun_id;
 
 	if (unlikely(elba > dev->total_secs)) {
-		pr_err("gennvm: L2P data from device is out of bounds!\n");
+		pr_err("gen: L2P data from device is out of bounds!\n");
 		return -EINVAL;
 	}
 
@@ -175,7 +309,7 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 		u64 pba = le64_to_cpu(entries[i]);
 
 		if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) {
-			pr_err("gennvm: L2P data entry is out of bounds!\n");
+			pr_err("gen: L2P data entry is out of bounds!\n");
 			return -EINVAL;
 		}
 
@@ -200,16 +334,15 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 			 * block state. The block is assumed to be open.
 			 */
 			list_move_tail(&blk->list, &lun->used_list);
-			blk->state = NVM_BLK_ST_OPEN;
+			blk->state = NVM_BLK_ST_TGT;
 			lun->vlun.nr_free_blocks--;
-			lun->vlun.nr_open_blocks++;
 		}
 	}
 
 	return 0;
 }
 
-static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
+static int gen_blocks_init(struct nvm_dev *dev, struct gen_dev *gn)
 {
 	struct gen_lun *lun;
 	struct nvm_block *block;
@@ -222,7 +355,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 	if (!blks)
 		return -ENOMEM;
 
-	gennvm_for_each_lun(gn, lun, lun_iter) {
+	gen_for_each_lun(gn, lun, lun_iter) {
 		lun->vlun.blocks = vzalloc(sizeof(struct nvm_block) *
 							dev->blks_per_lun);
 		if (!lun->vlun.blocks) {
@@ -256,20 +389,20 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 
 			ret = nvm_get_bb_tbl(dev, ppa, blks);
 			if (ret)
-				pr_err("gennvm: could not get BB table\n");
+				pr_err("gen: could not get BB table\n");
 
-			ret = gennvm_block_bb(gn, ppa, blks, nr_blks);
+			ret = gen_block_bb(gn, ppa, blks, nr_blks);
 			if (ret)
-				pr_err("gennvm: BB table map failed\n");
+				pr_err("gen: BB table map failed\n");
 		}
 	}
 
 	if ((dev->identity.dom & NVM_RSP_L2P) && dev->ops->get_l2p_tbl) {
 		ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs,
-							gennvm_block_map, dev);
+							gen_block_map, dev);
 		if (ret) {
-			pr_err("gennvm: could not read L2P table.\n");
-			pr_warn("gennvm: default block initialization");
+			pr_err("gen: could not read L2P table.\n");
+			pr_warn("gen: default block initialization");
 		}
 	}
 
@@ -277,67 +410,79 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 	return 0;
 }
 
-static void gennvm_free(struct nvm_dev *dev)
+static void gen_free(struct nvm_dev *dev)
 {
-	gennvm_blocks_free(dev);
-	gennvm_luns_free(dev);
+	gen_blocks_free(dev);
+	gen_luns_free(dev);
 	kfree(dev->mp);
 	dev->mp = NULL;
 }
 
-static int gennvm_register(struct nvm_dev *dev)
+static int gen_register(struct nvm_dev *dev)
 {
-	struct gen_nvm *gn;
+	struct gen_dev *gn;
 	int ret;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
-	gn = kzalloc(sizeof(struct gen_nvm), GFP_KERNEL);
+	gn = kzalloc(sizeof(struct gen_dev), GFP_KERNEL);
 	if (!gn)
 		return -ENOMEM;
 
 	gn->dev = dev;
 	gn->nr_luns = dev->nr_luns;
 	INIT_LIST_HEAD(&gn->area_list);
+	mutex_init(&gn->lock);
+	INIT_LIST_HEAD(&gn->targets);
 	dev->mp = gn;
 
-	ret = gennvm_luns_init(dev, gn);
+	ret = gen_luns_init(dev, gn);
 	if (ret) {
-		pr_err("gennvm: could not initialize luns\n");
+		pr_err("gen: could not initialize luns\n");
 		goto err;
 	}
 
-	ret = gennvm_blocks_init(dev, gn);
+	ret = gen_blocks_init(dev, gn);
 	if (ret) {
-		pr_err("gennvm: could not initialize blocks\n");
+		pr_err("gen: could not initialize blocks\n");
 		goto err;
 	}
 
 	return 1;
 err:
-	gennvm_free(dev);
+	gen_free(dev);
 	module_put(THIS_MODULE);
 	return ret;
 }
 
-static void gennvm_unregister(struct nvm_dev *dev)
+static void gen_unregister(struct nvm_dev *dev)
 {
-	gennvm_free(dev);
+	struct gen_dev *gn = dev->mp;
+	struct nvm_target *t, *tmp;
+
+	mutex_lock(&gn->lock);
+	list_for_each_entry_safe(t, tmp, &gn->targets, list) {
+		if (t->dev != dev)
+			continue;
+		__gen_remove_target(t);
+	}
+	mutex_unlock(&gn->lock);
+
+	gen_free(dev);
 	module_put(THIS_MODULE);
 }
 
-static struct nvm_block *gennvm_get_blk_unlocked(struct nvm_dev *dev,
+static struct nvm_block *gen_get_blk(struct nvm_dev *dev,
 				struct nvm_lun *vlun, unsigned long flags)
 {
 	struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
 	struct nvm_block *blk = NULL;
 	int is_gc = flags & NVM_IOTYPE_GC;
 
-	assert_spin_locked(&vlun->lock);
-
+	spin_lock(&vlun->lock);
 	if (list_empty(&lun->free_list)) {
-		pr_err_ratelimited("gennvm: lun %u have no free pages available",
+		pr_err_ratelimited("gen: lun %u have no free pages available",
 								lun->vlun.id);
 		goto out;
 	}
@@ -346,88 +491,58 @@ static struct nvm_block *gennvm_get_blk_unlocked(struct nvm_dev *dev,
 		goto out;
 
 	blk = list_first_entry(&lun->free_list, struct nvm_block, list);
-	list_move_tail(&blk->list, &lun->used_list);
-	blk->state = NVM_BLK_ST_OPEN;
 
+	list_move_tail(&blk->list, &lun->used_list);
+	blk->state = NVM_BLK_ST_TGT;
 	lun->vlun.nr_free_blocks--;
-	lun->vlun.nr_open_blocks++;
-
 out:
-	return blk;
-}
-
-static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
-				struct nvm_lun *vlun, unsigned long flags)
-{
-	struct nvm_block *blk;
-
-	spin_lock(&vlun->lock);
-	blk = gennvm_get_blk_unlocked(dev, vlun, flags);
 	spin_unlock(&vlun->lock);
 	return blk;
 }
 
-static void gennvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk)
+static void gen_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
 {
 	struct nvm_lun *vlun = blk->lun;
 	struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
 
-	assert_spin_locked(&vlun->lock);
-
-	if (blk->state & NVM_BLK_ST_OPEN) {
-		list_move_tail(&blk->list, &lun->free_list);
-		lun->vlun.nr_open_blocks--;
-		lun->vlun.nr_free_blocks++;
-		blk->state = NVM_BLK_ST_FREE;
-	} else if (blk->state & NVM_BLK_ST_CLOSED) {
+	spin_lock(&vlun->lock);
+	if (blk->state & NVM_BLK_ST_TGT) {
 		list_move_tail(&blk->list, &lun->free_list);
-		lun->vlun.nr_closed_blocks--;
 		lun->vlun.nr_free_blocks++;
 		blk->state = NVM_BLK_ST_FREE;
 	} else if (blk->state & NVM_BLK_ST_BAD) {
 		list_move_tail(&blk->list, &lun->bb_list);
-		lun->vlun.nr_bad_blocks++;
 		blk->state = NVM_BLK_ST_BAD;
 	} else {
 		WARN_ON_ONCE(1);
-		pr_err("gennvm: erroneous block type (%lu -> %u)\n",
+		pr_err("gen: erroneous block type (%lu -> %u)\n",
 							blk->id, blk->state);
 		list_move_tail(&blk->list, &lun->bb_list);
-		lun->vlun.nr_bad_blocks++;
-		blk->state = NVM_BLK_ST_BAD;
 	}
-}
-
-static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
-{
-	struct nvm_lun *vlun = blk->lun;
-
-	spin_lock(&vlun->lock);
-	gennvm_put_blk_unlocked(dev, blk);
 	spin_unlock(&vlun->lock);
 }
 
-static void gennvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
+static void gen_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 {
-	struct gen_nvm *gn = dev->mp;
+	struct gen_dev *gn = dev->mp;
 	struct gen_lun *lun;
 	struct nvm_block *blk;
 
-	pr_debug("gennvm: ppa  (ch: %u lun: %u blk: %u pg: %u) -> %u\n",
+	pr_debug("gen: ppa  (ch: %u lun: %u blk: %u pg: %u) -> %u\n",
 			ppa.g.ch, ppa.g.lun, ppa.g.blk, ppa.g.pg, type);
 
 	if (unlikely(ppa.g.ch > dev->nr_chnls ||
 					ppa.g.lun > dev->luns_per_chnl ||
 					ppa.g.blk > dev->blks_per_lun)) {
 		WARN_ON_ONCE(1);
-		pr_err("gennvm: ppa broken (ch: %u > %u lun: %u > %u blk: %u > %u",
+		pr_err("gen: ppa broken (ch: %u > %u lun: %u > %u blk: %u > %u",
 				ppa.g.ch, dev->nr_chnls,
 				ppa.g.lun, dev->luns_per_chnl,
 				ppa.g.blk, dev->blks_per_lun);
 		return;
 	}
 
-	lun = &gn->luns[ppa.g.lun * ppa.g.ch];
+	lun = &gn->luns[(dev->luns_per_chnl * ppa.g.ch) + ppa.g.lun];
 	blk = &lun->vlun.blocks[ppa.g.blk];
 
 	/* will be moved to bb list on put_blk from target */
@@ -435,9 +550,9 @@ static void gennvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type)
 }
 
 /*
- * mark block bad in gennvm. It is expected that the target recovers separately
+ * mark block bad in gen. It is expected that the target recovers separately
  */
-static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
+static void gen_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
 	int bit = -1;
 	int max_secs = dev->ops->max_phys_sect;
@@ -447,25 +562,25 @@ static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
 
 	/* look up blocks and mark them as bad */
 	if (rqd->nr_ppas == 1) {
-		gennvm_mark_blk(dev, rqd->ppa_addr, NVM_BLK_ST_BAD);
+		gen_mark_blk(dev, rqd->ppa_addr, NVM_BLK_ST_BAD);
 		return;
 	}
 
 	while ((bit = find_next_bit(comp_bits, max_secs, bit + 1)) < max_secs)
-		gennvm_mark_blk(dev, rqd->ppa_list[bit], NVM_BLK_ST_BAD);
+		gen_mark_blk(dev, rqd->ppa_list[bit], NVM_BLK_ST_BAD);
 }
 
-static void gennvm_end_io(struct nvm_rq *rqd)
+static void gen_end_io(struct nvm_rq *rqd)
 {
 	struct nvm_tgt_instance *ins = rqd->ins;
 
 	if (rqd->error == NVM_RSP_ERR_FAILWRITE)
-		gennvm_mark_blk_bad(rqd->dev, rqd);
+		gen_mark_blk_bad(rqd->dev, rqd);
 
 	ins->tt->end_io(rqd);
 }
 
-static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+static int gen_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
 	if (!dev->ops->submit_io)
 		return -ENODEV;
@@ -474,11 +589,11 @@ static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	nvm_generic_to_addr_mode(dev, rqd);
 
 	rqd->dev = dev;
-	rqd->end_io = gennvm_end_io;
+	rqd->end_io = gen_end_io;
 	return dev->ops->submit_io(dev, rqd);
 }
 
-static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
+static int gen_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
 							unsigned long flags)
 {
 	struct ppa_addr addr = block_to_ppa(dev, blk);
@@ -486,19 +601,19 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
 	return nvm_erase_ppa(dev, &addr, 1);
 }
 
-static int gennvm_reserve_lun(struct nvm_dev *dev, int lunid)
+static int gen_reserve_lun(struct nvm_dev *dev, int lunid)
 {
 	return test_and_set_bit(lunid, dev->lun_map);
 }
 
-static void gennvm_release_lun(struct nvm_dev *dev, int lunid)
+static void gen_release_lun(struct nvm_dev *dev, int lunid)
 {
 	WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
 }
 
-static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
+static struct nvm_lun *gen_get_lun(struct nvm_dev *dev, int lunid)
 {
-	struct gen_nvm *gn = dev->mp;
+	struct gen_dev *gn = dev->mp;
 
 	if (unlikely(lunid >= dev->nr_luns))
 		return NULL;
@@ -506,66 +621,62 @@ static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
 	return &gn->luns[lunid].vlun;
 }
 
-static void gennvm_lun_info_print(struct nvm_dev *dev)
+static void gen_lun_info_print(struct nvm_dev *dev)
 {
-	struct gen_nvm *gn = dev->mp;
+	struct gen_dev *gn = dev->mp;
 	struct gen_lun *lun;
 	unsigned int i;
 
 
-	gennvm_for_each_lun(gn, lun, i) {
+	gen_for_each_lun(gn, lun, i) {
 		spin_lock(&lun->vlun.lock);
 
-		pr_info("%s: lun%8u\t%u\t%u\t%u\t%u\n",
-				dev->name, i,
-				lun->vlun.nr_free_blocks,
-				lun->vlun.nr_open_blocks,
-				lun->vlun.nr_closed_blocks,
-				lun->vlun.nr_bad_blocks);
+		pr_info("%s: lun%8u\t%u\n", dev->name, i,
+						lun->vlun.nr_free_blocks);
 
 		spin_unlock(&lun->vlun.lock);
 	}
 }
 
-static struct nvmm_type gennvm = {
+static struct nvmm_type gen = {
 	.name			= "gennvm",
 	.version		= {0, 1, 0},
 
-	.register_mgr		= gennvm_register,
-	.unregister_mgr		= gennvm_unregister,
+	.register_mgr		= gen_register,
+	.unregister_mgr		= gen_unregister,
 
-	.get_blk_unlocked	= gennvm_get_blk_unlocked,
-	.put_blk_unlocked	= gennvm_put_blk_unlocked,
+	.create_tgt		= gen_create_tgt,
+	.remove_tgt		= gen_remove_tgt,
 
-	.get_blk		= gennvm_get_blk,
-	.put_blk		= gennvm_put_blk,
+	.get_blk		= gen_get_blk,
+	.put_blk		= gen_put_blk,
 
-	.submit_io		= gennvm_submit_io,
-	.erase_blk		= gennvm_erase_blk,
+	.submit_io		= gen_submit_io,
+	.erase_blk		= gen_erase_blk,
 
-	.mark_blk		= gennvm_mark_blk,
+	.mark_blk		= gen_mark_blk,
 
-	.get_lun		= gennvm_get_lun,
-	.reserve_lun		= gennvm_reserve_lun,
-	.release_lun		= gennvm_release_lun,
-	.lun_info_print		= gennvm_lun_info_print,
+	.get_lun		= gen_get_lun,
+	.reserve_lun		= gen_reserve_lun,
+	.release_lun		= gen_release_lun,
+	.lun_info_print		= gen_lun_info_print,
 
-	.get_area		= gennvm_get_area,
-	.put_area		= gennvm_put_area,
+	.get_area		= gen_get_area,
+	.put_area		= gen_put_area,
 
 };
 
-static int __init gennvm_module_init(void)
+static int __init gen_module_init(void)
 {
-	return nvm_register_mgr(&gennvm);
+	return nvm_register_mgr(&gen);
 }
 
-static void gennvm_module_exit(void)
+static void gen_module_exit(void)
 {
-	nvm_unregister_mgr(&gennvm);
+	nvm_unregister_mgr(&gen);
 }
 
-module_init(gennvm_module_init);
-module_exit(gennvm_module_exit);
+module_init(gen_module_init);
+module_exit(gen_module_exit);
 MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("Generic media manager for Open-Channel SSDs");
+MODULE_DESCRIPTION("General media manager for Open-Channel SSDs");
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index 04d7c23cfc61..8ecfa817d21d 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h
@@ -34,20 +34,24 @@ struct gen_lun {
 					 */
 };
 
-struct gen_nvm {
+struct gen_dev {
 	struct nvm_dev *dev;
 
 	int nr_luns;
 	struct gen_lun *luns;
 	struct list_head area_list;
+
+	struct mutex lock;
+	struct list_head targets;
 };
 
-struct gennvm_area {
+struct gen_area {
 	struct list_head list;
 	sector_t begin;
 	sector_t end;	/* end is excluded */
 };
-#define gennvm_for_each_lun(bm, lun, i) \
+
+#define gen_for_each_lun(bm, lun, i) \
 		for ((i) = 0, lun = &(bm)->luns[0]; \
 			(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
 
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 2103e97a974f..37fcaadbf80c 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -48,7 +48,7 @@ static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
 }
 
 static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
-								unsigned len)
+							unsigned int len)
 {
 	sector_t i;
 
@@ -96,10 +96,13 @@ static void rrpc_discard(struct rrpc *rrpc, struct bio *bio)
 	sector_t len = bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE;
 	struct nvm_rq *rqd;
 
-	do {
+	while (1) {
 		rqd = rrpc_inflight_laddr_acquire(rrpc, slba, len);
+		if (rqd)
+			break;
+
 		schedule();
-	} while (!rqd);
+	}
 
 	if (IS_ERR(rqd)) {
 		pr_err("rrpc: unable to acquire inflight IO\n");
@@ -172,39 +175,32 @@ static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
 }
 
 /* requires lun->lock taken */
-static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *rblk)
+static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *new_rblk,
+						struct rrpc_block **cur_rblk)
 {
 	struct rrpc *rrpc = rlun->rrpc;
 
-	BUG_ON(!rblk);
-
-	if (rlun->cur) {
-		spin_lock(&rlun->cur->lock);
-		WARN_ON(!block_is_full(rrpc, rlun->cur));
-		spin_unlock(&rlun->cur->lock);
+	if (*cur_rblk) {
+		spin_lock(&(*cur_rblk)->lock);
+		WARN_ON(!block_is_full(rrpc, *cur_rblk));
+		spin_unlock(&(*cur_rblk)->lock);
 	}
-	rlun->cur = rblk;
+	*cur_rblk = new_rblk;
 }
 
 static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 							unsigned long flags)
 {
-	struct nvm_lun *lun = rlun->parent;
 	struct nvm_block *blk;
 	struct rrpc_block *rblk;
 
-	spin_lock(&lun->lock);
-	blk = nvm_get_blk_unlocked(rrpc->dev, rlun->parent, flags);
+	blk = nvm_get_blk(rrpc->dev, rlun->parent, flags);
 	if (!blk) {
 		pr_err("nvm: rrpc: cannot get new block from media manager\n");
-		spin_unlock(&lun->lock);
 		return NULL;
 	}
 
 	rblk = rrpc_get_rblk(rlun, blk->id);
-	list_add_tail(&rblk->list, &rlun->open_list);
-	spin_unlock(&lun->lock);
-
 	blk->priv = rblk;
 	bitmap_zero(rblk->invalid_pages, rrpc->dev->sec_per_blk);
 	rblk->next_page = 0;
@@ -216,13 +212,7 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 
 static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-	struct rrpc_lun *rlun = rblk->rlun;
-	struct nvm_lun *lun = rlun->parent;
-
-	spin_lock(&lun->lock);
-	nvm_put_blk_unlocked(rrpc->dev, rblk->parent);
-	list_del(&rblk->list);
-	spin_unlock(&lun->lock);
+	nvm_put_blk(rrpc->dev, rblk->parent);
 }
 
 static void rrpc_put_blks(struct rrpc *rrpc)
@@ -342,7 +332,7 @@ try:
 
 		/* Perform read to do GC */
 		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
-		bio->bi_rw = READ;
+		bio_set_op_attrs(bio,  REQ_OP_READ, 0);
 		bio->bi_private = &wait;
 		bio->bi_end_io = rrpc_end_sync_bio;
 
@@ -364,7 +354,7 @@ try:
 		reinit_completion(&wait);
 
 		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
-		bio->bi_rw = WRITE;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 		bio->bi_private = &wait;
 		bio->bi_end_io = rrpc_end_sync_bio;
 
@@ -508,21 +498,11 @@ static void rrpc_gc_queue(struct work_struct *work)
 	struct rrpc *rrpc = gcb->rrpc;
 	struct rrpc_block *rblk = gcb->rblk;
 	struct rrpc_lun *rlun = rblk->rlun;
-	struct nvm_lun *lun = rblk->parent->lun;
-	struct nvm_block *blk = rblk->parent;
 
 	spin_lock(&rlun->lock);
 	list_add_tail(&rblk->prio, &rlun->prio_list);
 	spin_unlock(&rlun->lock);
 
-	spin_lock(&lun->lock);
-	lun->nr_open_blocks--;
-	lun->nr_closed_blocks++;
-	blk->state &= ~NVM_BLK_ST_OPEN;
-	blk->state |= NVM_BLK_ST_CLOSED;
-	list_move_tail(&rblk->list, &rlun->closed_list);
-	spin_unlock(&lun->lock);
-
 	mempool_free(gcb, rrpc->gcb_pool);
 	pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
 							rblk->parent->id);
@@ -596,21 +576,20 @@ out:
 	return addr;
 }
 
-/* Simple round-robin Logical to physical address translation.
- *
- * Retrieve the mapping using the active append point. Then update the ap for
- * the next write to the disk.
+/* Map logical address to a physical page. The mapping implements a round robin
+ * approach and allocates a page from the next lun available.
  *
- * Returns rrpc_addr with the physical address and block. Remember to return to
- * rrpc->addr_cache when request is finished.
+ * Returns rrpc_addr with the physical address and block. Returns NULL if no
+ * blocks in the next rlun are available.
  */
 static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
 								int is_gc)
 {
 	struct rrpc_lun *rlun;
-	struct rrpc_block *rblk;
+	struct rrpc_block *rblk, **cur_rblk;
 	struct nvm_lun *lun;
 	u64 paddr;
+	int gc_force = 0;
 
 	rlun = rrpc_get_lun_rr(rrpc, is_gc);
 	lun = rlun->parent;
@@ -618,41 +597,65 @@ static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
 	if (!is_gc && lun->nr_free_blocks < rrpc->nr_luns * 4)
 		return NULL;
 
-	spin_lock(&rlun->lock);
+	/*
+	 * page allocation steps:
+	 * 1. Try to allocate new page from current rblk
+	 * 2a. If succeed, proceed to map it in and return
+	 * 2b. If fail, first try to allocate a new block from media manger,
+	 *     and then retry step 1. Retry until the normal block pool is
+	 *     exhausted.
+	 * 3. If exhausted, and garbage collector is requesting the block,
+	 *    go to the reserved block and retry step 1.
+	 *    In the case that this fails as well, or it is not GC
+	 *    requesting, report not able to retrieve a block and let the
+	 *    caller handle further processing.
+	 */
 
+	spin_lock(&rlun->lock);
+	cur_rblk = &rlun->cur;
 	rblk = rlun->cur;
 retry:
 	paddr = rrpc_alloc_addr(rrpc, rblk);
 
-	if (paddr == ADDR_EMPTY) {
-		rblk = rrpc_get_blk(rrpc, rlun, 0);
-		if (rblk) {
-			rrpc_set_lun_cur(rlun, rblk);
-			goto retry;
-		}
+	if (paddr != ADDR_EMPTY)
+		goto done;
 
-		if (is_gc) {
-			/* retry from emergency gc block */
-			paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur);
-			if (paddr == ADDR_EMPTY) {
-				rblk = rrpc_get_blk(rrpc, rlun, 1);
-				if (!rblk) {
-					pr_err("rrpc: no more blocks");
-					goto err;
-				}
-
-				rlun->gc_cur = rblk;
-				paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur);
-			}
-			rblk = rlun->gc_cur;
-		}
+	if (!list_empty(&rlun->wblk_list)) {
+new_blk:
+		rblk = list_first_entry(&rlun->wblk_list, struct rrpc_block,
+									prio);
+		rrpc_set_lun_cur(rlun, rblk, cur_rblk);
+		list_del(&rblk->prio);
+		goto retry;
+	}
+	spin_unlock(&rlun->lock);
+
+	rblk = rrpc_get_blk(rrpc, rlun, gc_force);
+	if (rblk) {
+		spin_lock(&rlun->lock);
+		list_add_tail(&rblk->prio, &rlun->wblk_list);
+		/*
+		 * another thread might already have added a new block,
+		 * Therefore, make sure that one is used, instead of the
+		 * one just added.
+		 */
+		goto new_blk;
 	}
 
+	if (unlikely(is_gc) && !gc_force) {
+		/* retry from emergency gc block */
+		cur_rblk = &rlun->gc_cur;
+		rblk = rlun->gc_cur;
+		gc_force = 1;
+		spin_lock(&rlun->lock);
+		goto retry;
+	}
+
+	pr_err("rrpc: failed to allocate new block\n");
+	return NULL;
+done:
 	spin_unlock(&rlun->lock);
 	return rrpc_update_map(rrpc, laddr, rblk, paddr);
-err:
-	spin_unlock(&rlun->lock);
-	return NULL;
 }
 
 static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
@@ -850,14 +853,14 @@ static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio,
 			return NVM_IO_ERR;
 		}
 
-		if (bio_rw(bio) == WRITE)
+		if (bio_op(bio) == REQ_OP_WRITE)
 			return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags,
 									npages);
 
 		return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages);
 	}
 
-	if (bio_rw(bio) == WRITE)
+	if (bio_op(bio) == REQ_OP_WRITE)
 		return rrpc_write_rq(rrpc, bio, rqd, flags);
 
 	return rrpc_read_rq(rrpc, bio, rqd, flags);
@@ -908,7 +911,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	struct nvm_rq *rqd;
 	int err;
 
-	if (bio->bi_rw & REQ_DISCARD) {
+	if (bio_op(bio) == REQ_OP_DISCARD) {
 		rrpc_discard(rrpc, bio);
 		return BLK_QC_T_NONE;
 	}
@@ -1196,8 +1199,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 
 		rlun->rrpc = rrpc;
 		INIT_LIST_HEAD(&rlun->prio_list);
-		INIT_LIST_HEAD(&rlun->open_list);
-		INIT_LIST_HEAD(&rlun->closed_list);
+		INIT_LIST_HEAD(&rlun->wblk_list);
 
 		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
 		spin_lock_init(&rlun->lock);
@@ -1338,14 +1340,13 @@ static int rrpc_luns_configure(struct rrpc *rrpc)
 		rblk = rrpc_get_blk(rrpc, rlun, 0);
 		if (!rblk)
 			goto err;
-
-		rrpc_set_lun_cur(rlun, rblk);
+		rrpc_set_lun_cur(rlun, rblk, &rlun->cur);
 
 		/* Emergency gc block */
 		rblk = rrpc_get_blk(rrpc, rlun, 1);
 		if (!rblk)
 			goto err;
-		rlun->gc_cur = rblk;
+		rrpc_set_lun_cur(rlun, rblk, &rlun->gc_cur);
 	}
 
 	return 0;
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 87e84b5fc1cc..5e87d52cb983 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -56,7 +56,6 @@ struct rrpc_block {
 	struct nvm_block *parent;
 	struct rrpc_lun *rlun;
 	struct list_head prio;
-	struct list_head list;
 
 #define MAX_INVALID_PAGES_STORAGE 8
 	/* Bitmap for invalid page intries */
@@ -77,13 +76,7 @@ struct rrpc_lun {
 	struct rrpc_block *blocks;	/* Reference to block allocation */
 
 	struct list_head prio_list;	/* Blocks that may be GC'ed */
-	struct list_head open_list;	/* In-use open blocks. These are blocks
-					 * that can be both written to and read
-					 * from
-					 */
-	struct list_head closed_list;	/* In-use closed blocks. These are
-					 * blocks that can _only_ be read from
-					 */
+	struct list_head wblk_list;	/* Queued blocks to be written to */
 
 	struct work_struct ws_gc;
 
@@ -188,7 +181,7 @@ static inline int request_intersects(struct rrpc_inflight_rq *r,
 }
 
 static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
-			     unsigned pages, struct rrpc_inflight_rq *r)
+			     unsigned int pages, struct rrpc_inflight_rq *r)
 {
 	sector_t laddr_end = laddr + pages - 1;
 	struct rrpc_inflight_rq *rtmp;
@@ -213,7 +206,7 @@ static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
 }
 
 static inline int rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
-				 unsigned pages,
+				 unsigned int pages,
 				 struct rrpc_inflight_rq *r)
 {
 	BUG_ON((laddr + pages) > rrpc->nr_sects);
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c
index 994697ac786e..a75bd28aaca3 100644
--- a/drivers/lightnvm/sysblk.c
+++ b/drivers/lightnvm/sysblk.c
@@ -39,7 +39,8 @@ static inline int scan_ppa_idx(int row, int blkid)
 	return (row * MAX_BLKS_PR_SYSBLK) + blkid;
 }
 
-void nvm_sysblk_to_cpu(struct nvm_sb_info *info, struct nvm_system_block *sb)
+static void nvm_sysblk_to_cpu(struct nvm_sb_info *info,
+			      struct nvm_system_block *sb)
 {
 	info->seqnr = be32_to_cpu(sb->seqnr);
 	info->erase_cnt = be32_to_cpu(sb->erase_cnt);
@@ -48,7 +49,8 @@ void nvm_sysblk_to_cpu(struct nvm_sb_info *info, struct nvm_system_block *sb)
 	info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa);
 }
 
-void nvm_cpu_to_sysblk(struct nvm_system_block *sb, struct nvm_sb_info *info)
+static void nvm_cpu_to_sysblk(struct nvm_system_block *sb,
+			      struct nvm_sb_info *info)
 {
 	sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC);
 	sb->seqnr = cpu_to_be32(info->seqnr);
@@ -86,7 +88,7 @@ static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
 	return nr_rows;
 }
 
-void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s,
+static void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s,
 						struct ppa_addr *sysblk_ppas)
 {
 	memset(s, 0, sizeof(struct sysblk_scan));
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 52ba8dd82821..3cbda1af87a0 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -3,7 +3,8 @@
 #
 
 dm-mod-y	+= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
-		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o
+		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
+		   dm-rq.o
 dm-multipath-y	+= dm-path-selector.o dm-mpath.o
 dm-snapshot-y	+= dm-snap.o dm-exception-store.o dm-snap-transient.o \
 		    dm-snap-persistent.o
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index eab505ee0027..76f7534d1dd1 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -294,10 +294,10 @@ static void bch_btree_node_read(struct btree *b)
 	closure_init_stack(&cl);
 
 	bio = bch_bbio_alloc(b->c);
-	bio->bi_rw	= REQ_META|READ_SYNC;
 	bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9;
 	bio->bi_end_io	= btree_node_read_endio;
 	bio->bi_private	= &cl;
+	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
 
 	bch_bio_map(bio, b->keys.set[0].data);
 
@@ -396,8 +396,8 @@ static void do_btree_node_write(struct btree *b)
 
 	b->bio->bi_end_io	= btree_node_write_endio;
 	b->bio->bi_private	= cl;
-	b->bio->bi_rw		= REQ_META|WRITE_SYNC|REQ_FUA;
 	b->bio->bi_iter.bi_size	= roundup(set_bytes(i), block_bytes(b->c));
+	bio_set_op_attrs(b->bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA);
 	bch_bio_map(b->bio, i);
 
 	/*
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 9eaf1d6e8302..864e673aec39 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -112,7 +112,7 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
 EXPORT_SYMBOL(closure_wait);
 
 /**
- * closure_sync - sleep until a closure a closure has nothing left to wait on
+ * closure_sync - sleep until a closure has nothing left to wait on
  *
  * Sleeps until the refcount hits 1 - the thread that's running the closure owns
  * the last refcount.
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 782cc2c8a185..9b2fe2d3e3a9 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -31,7 +31,8 @@
  * passing it, as you might expect, the function to run when nothing is pending
  * and the workqueue to run that function out of.
  *
- * continue_at() also, critically, is a macro that returns the calling function.
+ * continue_at() also, critically, requires a 'return' immediately following the
+ * location where this macro is referenced, to return to the calling function.
  * There's good reason for this.
  *
  * To use safely closures asynchronously, they must always have a refcount while
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 8b1f1d5c1819..c28df164701e 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -52,9 +52,10 @@ void bch_btree_verify(struct btree *b)
 	bio->bi_bdev		= PTR_CACHE(b->c, &b->key, 0)->bdev;
 	bio->bi_iter.bi_sector	= PTR_OFFSET(&b->key, 0);
 	bio->bi_iter.bi_size	= KEY_SIZE(&v->key) << 9;
+	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
 	bch_bio_map(bio, sorted);
 
-	submit_bio_wait(REQ_META|READ_SYNC, bio);
+	submit_bio_wait(bio);
 	bch_bbio_free(bio, b->c);
 
 	memcpy(ondisk, sorted, KEY_SIZE(&v->key) << 9);
@@ -113,11 +114,12 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 	check = bio_clone(bio, GFP_NOIO);
 	if (!check)
 		return;
+	bio_set_op_attrs(check, REQ_OP_READ, READ_SYNC);
 
 	if (bio_alloc_pages(check, GFP_NOIO))
 		goto out_put;
 
-	submit_bio_wait(READ_SYNC, check);
+	submit_bio_wait(check);
 
 	bio_for_each_segment(bv, bio, iter) {
 		void *p1 = kmap_atomic(bv.bv_page);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 86a0bb87124e..e97b0acf7b8d 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -25,7 +25,6 @@ struct bio *bch_bbio_alloc(struct cache_set *c)
 	struct bio *bio = &b->bio;
 
 	bio_init(bio);
-	bio->bi_flags		|= BIO_POOL_NONE << BIO_POOL_OFFSET;
 	bio->bi_max_vecs	 = bucket_pages(c);
 	bio->bi_io_vec		 = bio->bi_inline_vecs;
 
@@ -111,7 +110,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 	struct bbio *b = container_of(bio, struct bbio, bio);
 	struct cache *ca = PTR_CACHE(c, &b->key, 0);
 
-	unsigned threshold = bio->bi_rw & REQ_WRITE
+	unsigned threshold = op_is_write(bio_op(bio))
 		? c->congested_write_threshold_us
 		: c->congested_read_threshold_us;
 
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 29eba7219b01..6925023e12d4 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -54,11 +54,11 @@ reread:		left = ca->sb.bucket_size - offset;
 		bio_reset(bio);
 		bio->bi_iter.bi_sector	= bucket + offset;
 		bio->bi_bdev	= ca->bdev;
-		bio->bi_rw	= READ;
 		bio->bi_iter.bi_size	= len << 9;
 
 		bio->bi_end_io	= journal_read_endio;
 		bio->bi_private = &cl;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 		bch_bio_map(bio, data);
 
 		closure_bio_submit(bio, &cl);
@@ -418,7 +418,7 @@ static void journal_discard_work(struct work_struct *work)
 	struct journal_device *ja =
 		container_of(work, struct journal_device, discard_work);
 
-	submit_bio(0, &ja->discard_bio);
+	submit_bio(&ja->discard_bio);
 }
 
 static void do_journal_discard(struct cache *ca)
@@ -449,10 +449,10 @@ static void do_journal_discard(struct cache *ca)
 		atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
 
 		bio_init(bio);
+		bio_set_op_attrs(bio, REQ_OP_DISCARD, 0);
 		bio->bi_iter.bi_sector	= bucket_to_sector(ca->set,
 						ca->sb.d[ja->discard_idx]);
 		bio->bi_bdev		= ca->bdev;
-		bio->bi_rw		= REQ_WRITE|REQ_DISCARD;
 		bio->bi_max_vecs	= 1;
 		bio->bi_io_vec		= bio->bi_inline_vecs;
 		bio->bi_iter.bi_size	= bucket_bytes(ca);
@@ -626,11 +626,12 @@ static void journal_write_unlocked(struct closure *cl)
 		bio_reset(bio);
 		bio->bi_iter.bi_sector	= PTR_OFFSET(k, i);
 		bio->bi_bdev	= ca->bdev;
-		bio->bi_rw	= REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
 		bio->bi_iter.bi_size = sectors << 9;
 
 		bio->bi_end_io	= journal_write_endio;
 		bio->bi_private = w;
+		bio_set_op_attrs(bio, REQ_OP_WRITE,
+				 REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
 		bch_bio_map(bio, w->data);
 
 		trace_bcache_journal_write(bio);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index b929fc944e9c..1881319f2298 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -163,7 +163,7 @@ static void read_moving(struct cache_set *c)
 		moving_init(io);
 		bio = &io->bio.bio;
 
-		bio->bi_rw	= READ;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 		bio->bi_end_io	= read_moving_endio;
 
 		if (bio_alloc_pages(bio, GFP_KERNEL))
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 25fa8445bb24..69f16f43f8ab 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -205,10 +205,10 @@ static void bch_data_insert_start(struct closure *cl)
 		return bch_data_invalidate(cl);
 
 	/*
-	 * Journal writes are marked REQ_FLUSH; if the original write was a
+	 * Journal writes are marked REQ_PREFLUSH; if the original write was a
 	 * flush, it'll wait on the journal write.
 	 */
-	bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA);
+	bio->bi_rw &= ~(REQ_PREFLUSH|REQ_FUA);
 
 	do {
 		unsigned i;
@@ -253,7 +253,7 @@ static void bch_data_insert_start(struct closure *cl)
 		trace_bcache_cache_insert(k);
 		bch_keylist_push(&op->insert_keys);
 
-		n->bi_rw |= REQ_WRITE;
+		bio_set_op_attrs(n, REQ_OP_WRITE, 0);
 		bch_submit_bbio(n, op->c, k, 0);
 	} while (n != bio);
 
@@ -378,12 +378,12 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 
 	if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
 	    c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
-	    (bio->bi_rw & REQ_DISCARD))
+	    (bio_op(bio) == REQ_OP_DISCARD))
 		goto skip;
 
 	if (mode == CACHE_MODE_NONE ||
 	    (mode == CACHE_MODE_WRITEAROUND &&
-	     (bio->bi_rw & REQ_WRITE)))
+	     op_is_write(bio_op(bio))))
 		goto skip;
 
 	if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
@@ -404,7 +404,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 
 	if (!congested &&
 	    mode == CACHE_MODE_WRITEBACK &&
-	    (bio->bi_rw & REQ_WRITE) &&
+	    op_is_write(bio_op(bio)) &&
 	    (bio->bi_rw & REQ_SYNC))
 		goto rescale;
 
@@ -657,7 +657,7 @@ static inline struct search *search_alloc(struct bio *bio,
 	s->cache_miss		= NULL;
 	s->d			= d;
 	s->recoverable		= 1;
-	s->write		= (bio->bi_rw & REQ_WRITE) != 0;
+	s->write		= op_is_write(bio_op(bio));
 	s->read_dirty_data	= 0;
 	s->start_time		= jiffies;
 
@@ -668,7 +668,7 @@ static inline struct search *search_alloc(struct bio *bio,
 	s->iop.write_prio	= 0;
 	s->iop.error		= 0;
 	s->iop.flags		= 0;
-	s->iop.flush_journal	= (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
+	s->iop.flush_journal	= (bio->bi_rw & (REQ_PREFLUSH|REQ_FUA)) != 0;
 	s->iop.wq		= bcache_wq;
 
 	return s;
@@ -899,7 +899,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
 	 * But check_overlapping drops dirty keys for which io hasn't started,
 	 * so we still want to call it.
 	 */
-	if (bio->bi_rw & REQ_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD)
 		s->iop.bypass = true;
 
 	if (should_writeback(dc, s->orig_bio,
@@ -913,22 +913,22 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
 		s->iop.bio = s->orig_bio;
 		bio_get(s->iop.bio);
 
-		if (!(bio->bi_rw & REQ_DISCARD) ||
+		if ((bio_op(bio) != REQ_OP_DISCARD) ||
 		    blk_queue_discard(bdev_get_queue(dc->bdev)))
 			closure_bio_submit(bio, cl);
 	} else if (s->iop.writeback) {
 		bch_writeback_add(dc);
 		s->iop.bio = bio;
 
-		if (bio->bi_rw & REQ_FLUSH) {
+		if (bio->bi_rw & REQ_PREFLUSH) {
 			/* Also need to send a flush to the backing device */
 			struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
 							     dc->disk.bio_split);
 
-			flush->bi_rw	= WRITE_FLUSH;
 			flush->bi_bdev	= bio->bi_bdev;
 			flush->bi_end_io = request_endio;
 			flush->bi_private = cl;
+			bio_set_op_attrs(flush, REQ_OP_WRITE, WRITE_FLUSH);
 
 			closure_bio_submit(flush, cl);
 		}
@@ -992,7 +992,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
 				cached_dev_read(dc, s);
 		}
 	} else {
-		if ((bio->bi_rw & REQ_DISCARD) &&
+		if ((bio_op(bio) == REQ_OP_DISCARD) &&
 		    !blk_queue_discard(bdev_get_queue(dc->bdev)))
 			bio_endio(bio);
 		else
@@ -1103,7 +1103,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
 					&KEY(d->id, bio->bi_iter.bi_sector, 0),
 					&KEY(d->id, bio_end_sector(bio), 0));
 
-		s->iop.bypass		= (bio->bi_rw & REQ_DISCARD) != 0;
+		s->iop.bypass		= (bio_op(bio) == REQ_OP_DISCARD) != 0;
 		s->iop.writeback	= true;
 		s->iop.bio		= bio;
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f5dbb4e884d8..88ef6d14cce3 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -134,7 +134,6 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
 	case BCACHE_SB_VERSION_CDEV:
 	case BCACHE_SB_VERSION_CDEV_WITH_UUID:
 		sb->nbuckets	= le64_to_cpu(s->nbuckets);
-		sb->block_size	= le16_to_cpu(s->block_size);
 		sb->bucket_size	= le16_to_cpu(s->bucket_size);
 
 		sb->nr_in_set	= le16_to_cpu(s->nr_in_set);
@@ -212,8 +211,8 @@ static void __write_super(struct cache_sb *sb, struct bio *bio)
 	unsigned i;
 
 	bio->bi_iter.bi_sector	= SB_SECTOR;
-	bio->bi_rw		= REQ_SYNC|REQ_META;
 	bio->bi_iter.bi_size	= SB_SIZE;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
 	bch_bio_map(bio, NULL);
 
 	out->offset		= cpu_to_le64(sb->offset);
@@ -238,7 +237,7 @@ static void __write_super(struct cache_sb *sb, struct bio *bio)
 	pr_debug("ver %llu, flags %llu, seq %llu",
 		 sb->version, sb->flags, sb->seq);
 
-	submit_bio(REQ_WRITE, bio);
+	submit_bio(bio);
 }
 
 static void bch_write_bdev_super_unlock(struct closure *cl)
@@ -333,7 +332,7 @@ static void uuid_io_unlock(struct closure *cl)
 	up(&c->uuid_write_mutex);
 }
 
-static void uuid_io(struct cache_set *c, unsigned long rw,
+static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
 		    struct bkey *k, struct closure *parent)
 {
 	struct closure *cl = &c->uuid_write;
@@ -348,21 +347,22 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
 	for (i = 0; i < KEY_PTRS(k); i++) {
 		struct bio *bio = bch_bbio_alloc(c);
 
-		bio->bi_rw	= REQ_SYNC|REQ_META|rw;
+		bio->bi_rw	= REQ_SYNC|REQ_META|op_flags;
 		bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
 
 		bio->bi_end_io	= uuid_endio;
 		bio->bi_private = cl;
+		bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
 		bch_bio_map(bio, c->uuids);
 
 		bch_submit_bbio(bio, c, k, i);
 
-		if (!(rw & WRITE))
+		if (op != REQ_OP_WRITE)
 			break;
 	}
 
 	bch_extent_to_text(buf, sizeof(buf), k);
-	pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
+	pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf);
 
 	for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
 		if (!bch_is_zero(u->uuid, 16))
@@ -381,7 +381,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
 		return "bad uuid pointer";
 
 	bkey_copy(&c->uuid_bucket, k);
-	uuid_io(c, READ_SYNC, k, cl);
+	uuid_io(c, REQ_OP_READ, READ_SYNC, k, cl);
 
 	if (j->version < BCACHE_JSET_VERSION_UUIDv1) {
 		struct uuid_entry_v0	*u0 = (void *) c->uuids;
@@ -426,7 +426,7 @@ static int __uuid_write(struct cache_set *c)
 		return 1;
 
 	SET_KEY_SIZE(&k.key, c->sb.bucket_size);
-	uuid_io(c, REQ_WRITE, &k.key, &cl);
+	uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl);
 	closure_sync(&cl);
 
 	bkey_copy(&c->uuid_bucket, &k.key);
@@ -498,7 +498,8 @@ static void prio_endio(struct bio *bio)
 	closure_put(&ca->prio);
 }
 
-static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
+static void prio_io(struct cache *ca, uint64_t bucket, int op,
+		    unsigned long op_flags)
 {
 	struct closure *cl = &ca->prio;
 	struct bio *bio = bch_bbio_alloc(ca->set);
@@ -507,11 +508,11 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
 
 	bio->bi_iter.bi_sector	= bucket * ca->sb.bucket_size;
 	bio->bi_bdev		= ca->bdev;
-	bio->bi_rw		= REQ_SYNC|REQ_META|rw;
 	bio->bi_iter.bi_size	= bucket_bytes(ca);
 
 	bio->bi_end_io	= prio_endio;
 	bio->bi_private = ca;
+	bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
 	bch_bio_map(bio, ca->disk_buckets);
 
 	closure_bio_submit(bio, &ca->prio);
@@ -557,7 +558,7 @@ void bch_prio_write(struct cache *ca)
 		BUG_ON(bucket == -1);
 
 		mutex_unlock(&ca->set->bucket_lock);
-		prio_io(ca, bucket, REQ_WRITE);
+		prio_io(ca, bucket, REQ_OP_WRITE, 0);
 		mutex_lock(&ca->set->bucket_lock);
 
 		ca->prio_buckets[i] = bucket;
@@ -599,7 +600,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
 			ca->prio_last_buckets[bucket_nr] = bucket;
 			bucket_nr++;
 
-			prio_io(ca, bucket, READ_SYNC);
+			prio_io(ca, bucket, REQ_OP_READ, READ_SYNC);
 
 			if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8))
 				pr_warn("bad csum reading priorities");
@@ -1518,7 +1519,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
 	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
-	    !(c->moving_gc_wq = create_workqueue("bcache_gc")) ||
+	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
+						WQ_MEM_RECLAIM, 0)) ||
 	    bch_journal_alloc(c) ||
 	    bch_btree_cache_alloc(c) ||
 	    bch_open_buckets_alloc(c) ||
@@ -1803,7 +1805,7 @@ void bch_cache_release(struct kobject *kobj)
 	module_put(THIS_MODULE);
 }
 
-static int cache_alloc(struct cache_sb *sb, struct cache *ca)
+static int cache_alloc(struct cache *ca)
 {
 	size_t free;
 	struct bucket *b;
@@ -1858,7 +1860,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
 	if (blk_queue_discard(bdev_get_queue(ca->bdev)))
 		ca->discard = CACHE_DISCARD(&ca->sb);
 
-	ret = cache_alloc(sb, ca);
+	ret = cache_alloc(ca);
 	if (ret != 0)
 		goto err;
 
@@ -2097,7 +2099,7 @@ static int __init bcache_init(void)
 		return bcache_major;
 	}
 
-	if (!(bcache_wq = create_workqueue("bcache")) ||
+	if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
 	    !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
 	    sysfs_create_files(bcache_kobj, files) ||
 	    bch_request_init() ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 60123677b382..d9fd2a62e5f6 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -182,7 +182,7 @@ static void write_dirty(struct closure *cl)
 	struct keybuf_key *w = io->bio.bi_private;
 
 	dirty_init(w);
-	io->bio.bi_rw		= WRITE;
+	bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
 	io->bio.bi_iter.bi_sector = KEY_START(&w->key);
 	io->bio.bi_bdev		= io->dc->bdev;
 	io->bio.bi_end_io	= dirty_endio;
@@ -251,10 +251,10 @@ static void read_dirty(struct cached_dev *dc)
 		io->dc		= dc;
 
 		dirty_init(w);
+		bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
 		io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
 		io->bio.bi_bdev		= PTR_CACHE(dc->disk.c,
 						    &w->key, 0)->bdev;
-		io->bio.bi_rw		= READ;
 		io->bio.bi_end_io	= read_dirty_endio;
 
 		if (bio_alloc_pages(&io->bio, GFP_KERNEL))
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index d8129ec93ebd..6fff794e0c72 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -162,7 +162,7 @@ static int read_sb_page(struct mddev *mddev, loff_t offset,
 
 		if (sync_page_io(rdev, target,
 				 roundup(size, bdev_logical_block_size(rdev->bdev)),
-				 page, READ, true)) {
+				 page, REQ_OP_READ, 0, true)) {
 			page->index = index;
 			return 0;
 		}
@@ -297,7 +297,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
 			atomic_inc(&bitmap->pending_writes);
 			set_buffer_locked(bh);
 			set_buffer_mapped(bh);
-			submit_bh(WRITE | REQ_SYNC, bh);
+			submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
 			bh = bh->b_this_page;
 		}
 
@@ -392,7 +392,7 @@ static int read_page(struct file *file, unsigned long index,
 			atomic_inc(&bitmap->pending_writes);
 			set_buffer_locked(bh);
 			set_buffer_mapped(bh);
-			submit_bh(READ, bh);
+			submit_bh(REQ_OP_READ, 0, bh);
 		}
 		block++;
 		bh = bh->b_this_page;
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index cd77216beff1..6571c81465e1 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -574,7 +574,8 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
 {
 	int r;
 	struct dm_io_request io_req = {
-		.bi_rw = rw,
+		.bi_op = rw,
+		.bi_op_flags = 0,
 		.notify.fn = dmio_complete,
 		.notify.context = b,
 		.client = b->c->dm_io,
@@ -634,6 +635,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
 	 * the dm_buffer's inline bio is local to bufio.
 	 */
 	b->bio.bi_private = end_io;
+	bio_set_op_attrs(&b->bio, rw, 0);
 
 	/*
 	 * We assume that if len >= PAGE_SIZE ptr is page-aligned.
@@ -660,7 +662,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
 		ptr += PAGE_SIZE;
 	} while (len > 0);
 
-	submit_bio(rw, &b->bio);
+	submit_bio(&b->bio);
 }
 
 static void submit_io(struct dm_buffer *b, int rw, sector_t block,
@@ -1326,7 +1328,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
 int dm_bufio_issue_flush(struct dm_bufio_client *c)
 {
 	struct dm_io_request io_req = {
-		.bi_rw = WRITE_FLUSH,
+		.bi_op = REQ_OP_WRITE,
+		.bi_op_flags = WRITE_FLUSH,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = c->dm_io,
diff --git a/drivers/md/dm-builtin.c b/drivers/md/dm-builtin.c
index 6c9049c51b2b..f092771878c2 100644
--- a/drivers/md/dm-builtin.c
+++ b/drivers/md/dm-builtin.c
@@ -1,4 +1,4 @@
-#include "dm.h"
+#include "dm-core.h"
 
 /*
  * The kobject release method must not be placed in the module itself,
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index ee0510f9a85e..718744db62df 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -788,7 +788,8 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
 
 	spin_lock_irqsave(&cache->lock, flags);
 	if (cache->need_tick_bio &&
-	    !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
+	    !(bio->bi_rw & (REQ_FUA | REQ_PREFLUSH)) &&
+	    bio_op(bio) != REQ_OP_DISCARD) {
 		pb->tick = true;
 		cache->need_tick_bio = false;
 	}
@@ -829,7 +830,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
 
 static int bio_triggers_commit(struct cache *cache, struct bio *bio)
 {
-	return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+	return bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
 }
 
 /*
@@ -851,7 +852,7 @@ static void inc_ds(struct cache *cache, struct bio *bio,
 static bool accountable_bio(struct cache *cache, struct bio *bio)
 {
 	return ((bio->bi_bdev == cache->origin_dev->bdev) &&
-		!(bio->bi_rw & REQ_DISCARD));
+		bio_op(bio) != REQ_OP_DISCARD);
 }
 
 static void accounted_begin(struct cache *cache, struct bio *bio)
@@ -1067,7 +1068,8 @@ static void dec_io_migrations(struct cache *cache)
 
 static bool discard_or_flush(struct bio *bio)
 {
-	return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD);
+	return bio_op(bio) == REQ_OP_DISCARD ||
+	       bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
 }
 
 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
@@ -1612,8 +1614,8 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
 		remap_to_cache(cache, bio, 0);
 
 	/*
-	 * REQ_FLUSH is not directed at any particular block so we don't
-	 * need to inc_ds().  REQ_FUA's are split into a write + REQ_FLUSH
+	 * REQ_PREFLUSH is not directed at any particular block so we don't
+	 * need to inc_ds().  REQ_FUA's are split into a write + REQ_PREFLUSH
 	 * by dm-core.
 	 */
 	issue(cache, bio);
@@ -1978,9 +1980,9 @@ static void process_deferred_bios(struct cache *cache)
 
 		bio = bio_list_pop(&bios);
 
-		if (bio->bi_rw & REQ_FLUSH)
+		if (bio->bi_rw & REQ_PREFLUSH)
 			process_flush_bio(cache, bio);
-		else if (bio->bi_rw & REQ_DISCARD)
+		else if (bio_op(bio) == REQ_OP_DISCARD)
 			process_discard_bio(cache, &structs, bio);
 		else
 			process_bio(cache, &structs, bio);
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
new file mode 100644
index 000000000000..40ceba1fe8be
--- /dev/null
+++ b/drivers/md/dm-core.h
@@ -0,0 +1,149 @@
+/*
+ * Internal header file _only_ for device mapper core
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DM_CORE_INTERNAL_H
+#define DM_CORE_INTERNAL_H
+
+#include <linux/kthread.h>
+#include <linux/ktime.h>
+#include <linux/blk-mq.h>
+
+#include <trace/events/block.h>
+
+#include "dm.h"
+
+#define DM_RESERVED_MAX_IOS		1024
+
+struct dm_kobject_holder {
+	struct kobject kobj;
+	struct completion completion;
+};
+
+/*
+ * DM core internal structure that used directly by dm.c and dm-rq.c
+ * DM targets must _not_ deference a mapped_device to directly access its members!
+ */
+struct mapped_device {
+	struct srcu_struct io_barrier;
+	struct mutex suspend_lock;
+
+	/*
+	 * The current mapping (struct dm_table *).
+	 * Use dm_get_live_table{_fast} or take suspend_lock for
+	 * dereference.
+	 */
+	void __rcu *map;
+
+	struct list_head table_devices;
+	struct mutex table_devices_lock;
+
+	unsigned long flags;
+
+	struct request_queue *queue;
+	int numa_node_id;
+
+	unsigned type;
+	/* Protect queue and type against concurrent access. */
+	struct mutex type_lock;
+
+	atomic_t holders;
+	atomic_t open_count;
+
+	struct dm_target *immutable_target;
+	struct target_type *immutable_target_type;
+
+	struct gendisk *disk;
+	char name[16];
+
+	void *interface_ptr;
+
+	/*
+	 * A list of ios that arrived while we were suspended.
+	 */
+	atomic_t pending[2];
+	wait_queue_head_t wait;
+	struct work_struct work;
+	spinlock_t deferred_lock;
+	struct bio_list deferred;
+
+	/*
+	 * Event handling.
+	 */
+	wait_queue_head_t eventq;
+	atomic_t event_nr;
+	atomic_t uevent_seq;
+	struct list_head uevent_list;
+	spinlock_t uevent_lock; /* Protect access to uevent_list */
+
+	/* the number of internal suspends */
+	unsigned internal_suspend_count;
+
+	/*
+	 * Processing queue (flush)
+	 */
+	struct workqueue_struct *wq;
+
+	/*
+	 * io objects are allocated from here.
+	 */
+	mempool_t *io_pool;
+	mempool_t *rq_pool;
+
+	struct bio_set *bs;
+
+	/*
+	 * freeze/thaw support require holding onto a super block
+	 */
+	struct super_block *frozen_sb;
+
+	/* forced geometry settings */
+	struct hd_geometry geometry;
+
+	struct block_device *bdev;
+
+	/* kobject and completion */
+	struct dm_kobject_holder kobj_holder;
+
+	/* zero-length flush that will be cloned and submitted to targets */
+	struct bio flush_bio;
+
+	struct dm_stats stats;
+
+	struct kthread_worker kworker;
+	struct task_struct *kworker_task;
+
+	/* for request-based merge heuristic in dm_request_fn() */
+	unsigned seq_rq_merge_deadline_usecs;
+	int last_rq_rw;
+	sector_t last_rq_pos;
+	ktime_t last_rq_start_time;
+
+	/* for blk-mq request-based DM support */
+	struct blk_mq_tag_set *tag_set;
+	bool use_blk_mq:1;
+	bool init_tio_pdu:1;
+};
+
+void dm_init_md_queue(struct mapped_device *md);
+void dm_init_normal_md_queue(struct mapped_device *md);
+int md_in_flight(struct mapped_device *md);
+void disable_write_same(struct mapped_device *md);
+
+static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
+{
+	return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
+}
+
+unsigned __dm_get_module_param(unsigned *module_param, unsigned def, unsigned max);
+
+static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
+{
+	return !maxlen || strlen(result) + 1 >= maxlen;
+}
+
+#endif
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4f3cb3554944..8f2e3e2ffd26 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -683,7 +683,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
 				  u8 *data)
 {
 	struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
-	u64 sector = cpu_to_le64((u64)dmreq->iv_sector);
+	__le64 sector = cpu_to_le64(dmreq->iv_sector);
 	u8 buf[TCW_WHITENING_SIZE];
 	SHASH_DESC_ON_STACK(desc, tcw->crc32_tfm);
 	int i, r;
@@ -722,7 +722,7 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv,
 			    struct dm_crypt_request *dmreq)
 {
 	struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
-	u64 sector = cpu_to_le64((u64)dmreq->iv_sector);
+	__le64 sector = cpu_to_le64(dmreq->iv_sector);
 	u8 *src;
 	int r = 0;
 
@@ -1136,7 +1136,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
 	clone->bi_private = io;
 	clone->bi_end_io  = crypt_endio;
 	clone->bi_bdev    = cc->dev->bdev;
-	clone->bi_rw      = io->base_bio->bi_rw;
+	bio_set_op_attrs(clone, bio_op(io->base_bio), io->base_bio->bi_rw);
 }
 
 static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
@@ -1911,11 +1911,12 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 	struct crypt_config *cc = ti->private;
 
 	/*
-	 * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues.
-	 * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight
-	 * - for REQ_DISCARD caller must use flush if IO ordering matters
+	 * If bio is REQ_PREFLUSH or REQ_OP_DISCARD, just bypass crypt queues.
+	 * - for REQ_PREFLUSH device-mapper core ensures that no IO is in-flight
+	 * - for REQ_OP_DISCARD caller must use flush if IO ordering matters
 	 */
-	if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
+	if (unlikely(bio->bi_rw & REQ_PREFLUSH ||
+	    bio_op(bio) == REQ_OP_DISCARD)) {
 		bio->bi_bdev = cc->dev->bdev;
 		if (bio_sectors(bio))
 			bio->bi_iter.bi_sector = cc->start +
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 665bf3285618..2faf49d8f4d7 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1540,9 +1540,9 @@ static int era_map(struct dm_target *ti, struct bio *bio)
 	remap_to_origin(era, bio);
 
 	/*
-	 * REQ_FLUSH bios carry no data, so we're not interested in them.
+	 * REQ_PREFLUSH bios carry no data, so we're not interested in them.
 	 */
-	if (!(bio->bi_rw & REQ_FLUSH) &&
+	if (!(bio->bi_rw & REQ_PREFLUSH) &&
 	    (bio_data_dir(bio) == WRITE) &&
 	    !metadata_current_marked(era->md, block)) {
 		defer_bio(era, bio);
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index b7341de87015..29b99fb6a16a 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -266,7 +266,7 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
 		data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
 
 		DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
-			"(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
+			"(rw=%c bi_rw=%u bi_sector=%llu cur_bytes=%u)\n",
 			bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
 			(bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_rw,
 			(unsigned long long)bio->bi_iter.bi_sector, bio_bytes);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 06d426eb5a30..daa03e41654a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -5,7 +5,7 @@
  * This file is released under the GPL.
  */
 
-#include "dm.h"
+#include "dm-core.h"
 
 #include <linux/device-mapper.h>
 
@@ -278,8 +278,9 @@ static void km_dp_init(struct dpages *dp, void *data)
 /*-----------------------------------------------------------------
  * IO routines that accept a list of pages.
  *---------------------------------------------------------------*/
-static void do_region(int rw, unsigned region, struct dm_io_region *where,
-		      struct dpages *dp, struct io *io)
+static void do_region(int op, int op_flags, unsigned region,
+		      struct dm_io_region *where, struct dpages *dp,
+		      struct io *io)
 {
 	struct bio *bio;
 	struct page *page;
@@ -295,24 +296,25 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 	/*
 	 * Reject unsupported discard and write same requests.
 	 */
-	if (rw & REQ_DISCARD)
+	if (op == REQ_OP_DISCARD)
 		special_cmd_max_sectors = q->limits.max_discard_sectors;
-	else if (rw & REQ_WRITE_SAME)
+	else if (op == REQ_OP_WRITE_SAME)
 		special_cmd_max_sectors = q->limits.max_write_same_sectors;
-	if ((rw & (REQ_DISCARD | REQ_WRITE_SAME)) && special_cmd_max_sectors == 0) {
+	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_SAME) &&
+	    special_cmd_max_sectors == 0) {
 		dec_count(io, region, -EOPNOTSUPP);
 		return;
 	}
 
 	/*
-	 * where->count may be zero if rw holds a flush and we need to
+	 * where->count may be zero if op holds a flush and we need to
 	 * send a zero-sized flush.
 	 */
 	do {
 		/*
 		 * Allocate a suitably sized-bio.
 		 */
-		if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
+		if ((op == REQ_OP_DISCARD) || (op == REQ_OP_WRITE_SAME))
 			num_bvecs = 1;
 		else
 			num_bvecs = min_t(int, BIO_MAX_PAGES,
@@ -322,13 +324,14 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 		bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
 		bio->bi_bdev = where->bdev;
 		bio->bi_end_io = endio;
+		bio_set_op_attrs(bio, op, op_flags);
 		store_io_and_region_in_bio(bio, io, region);
 
-		if (rw & REQ_DISCARD) {
+		if (op == REQ_OP_DISCARD) {
 			num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
 			bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
 			remaining -= num_sectors;
-		} else if (rw & REQ_WRITE_SAME) {
+		} else if (op == REQ_OP_WRITE_SAME) {
 			/*
 			 * WRITE SAME only uses a single page.
 			 */
@@ -355,11 +358,11 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 		}
 
 		atomic_inc(&io->count);
-		submit_bio(rw, bio);
+		submit_bio(bio);
 	} while (remaining);
 }
 
-static void dispatch_io(int rw, unsigned int num_regions,
+static void dispatch_io(int op, int op_flags, unsigned int num_regions,
 			struct dm_io_region *where, struct dpages *dp,
 			struct io *io, int sync)
 {
@@ -369,7 +372,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
 	BUG_ON(num_regions > DM_IO_MAX_REGIONS);
 
 	if (sync)
-		rw |= REQ_SYNC;
+		op_flags |= REQ_SYNC;
 
 	/*
 	 * For multiple regions we need to be careful to rewind
@@ -377,8 +380,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
 	 */
 	for (i = 0; i < num_regions; i++) {
 		*dp = old_pages;
-		if (where[i].count || (rw & REQ_FLUSH))
-			do_region(rw, i, where + i, dp, io);
+		if (where[i].count || (op_flags & REQ_PREFLUSH))
+			do_region(op, op_flags, i, where + i, dp, io);
 	}
 
 	/*
@@ -402,13 +405,13 @@ static void sync_io_complete(unsigned long error, void *context)
 }
 
 static int sync_io(struct dm_io_client *client, unsigned int num_regions,
-		   struct dm_io_region *where, int rw, struct dpages *dp,
-		   unsigned long *error_bits)
+		   struct dm_io_region *where, int op, int op_flags,
+		   struct dpages *dp, unsigned long *error_bits)
 {
 	struct io *io;
 	struct sync_io sio;
 
-	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
+	if (num_regions > 1 && !op_is_write(op)) {
 		WARN_ON(1);
 		return -EIO;
 	}
@@ -425,7 +428,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	io->vma_invalidate_address = dp->vma_invalidate_address;
 	io->vma_invalidate_size = dp->vma_invalidate_size;
 
-	dispatch_io(rw, num_regions, where, dp, io, 1);
+	dispatch_io(op, op_flags, num_regions, where, dp, io, 1);
 
 	wait_for_completion_io(&sio.wait);
 
@@ -436,12 +439,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 }
 
 static int async_io(struct dm_io_client *client, unsigned int num_regions,
-		    struct dm_io_region *where, int rw, struct dpages *dp,
-		    io_notify_fn fn, void *context)
+		    struct dm_io_region *where, int op, int op_flags,
+		    struct dpages *dp, io_notify_fn fn, void *context)
 {
 	struct io *io;
 
-	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
+	if (num_regions > 1 && !op_is_write(op)) {
 		WARN_ON(1);
 		fn(1, context);
 		return -EIO;
@@ -457,7 +460,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
 	io->vma_invalidate_address = dp->vma_invalidate_address;
 	io->vma_invalidate_size = dp->vma_invalidate_size;
 
-	dispatch_io(rw, num_regions, where, dp, io, 0);
+	dispatch_io(op, op_flags, num_regions, where, dp, io, 0);
 	return 0;
 }
 
@@ -480,7 +483,7 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
 
 	case DM_IO_VMA:
 		flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
-		if ((io_req->bi_rw & RW_MASK) == READ) {
+		if (io_req->bi_op == REQ_OP_READ) {
 			dp->vma_invalidate_address = io_req->mem.ptr.vma;
 			dp->vma_invalidate_size = size;
 		}
@@ -518,10 +521,12 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
 
 	if (!io_req->notify.fn)
 		return sync_io(io_req->client, num_regions, where,
-			       io_req->bi_rw, &dp, sync_error_bits);
+			       io_req->bi_op, io_req->bi_op_flags, &dp,
+			       sync_error_bits);
 
-	return async_io(io_req->client, num_regions, where, io_req->bi_rw,
-			&dp, io_req->notify.fn, io_req->notify.context);
+	return async_io(io_req->client, num_regions, where, io_req->bi_op,
+			io_req->bi_op_flags, &dp, io_req->notify.fn,
+			io_req->notify.context);
 }
 EXPORT_SYMBOL(dm_io);
 
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 2c7ca258c4e4..966eb4b61aed 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -5,7 +5,7 @@
  * This file is released under the GPL.
  */
 
-#include "dm.h"
+#include "dm-core.h"
 
 #include <linux/module.h>
 #include <linux/vmalloc.h>
@@ -1267,6 +1267,15 @@ static int populate_table(struct dm_table *table,
 	return dm_table_complete(table);
 }
 
+static bool is_valid_type(unsigned cur, unsigned new)
+{
+	if (cur == new ||
+	    (cur == DM_TYPE_BIO_BASED && new == DM_TYPE_DAX_BIO_BASED))
+		return true;
+
+	return false;
+}
+
 static int table_load(struct dm_ioctl *param, size_t param_size)
 {
 	int r;
@@ -1309,7 +1318,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
 			DMWARN("unable to set up device queue for new table.");
 			goto err_unlock_md_type;
 		}
-	} else if (dm_get_md_type(md) != dm_table_get_type(t)) {
+	} else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) {
 		DMWARN("can't change device type after initial table load.");
 		r = -EINVAL;
 		goto err_unlock_md_type;
@@ -1670,8 +1679,7 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
 	return r;
 }
 
-#define DM_PARAMS_KMALLOC	0x0001	/* Params alloced with kmalloc */
-#define DM_PARAMS_VMALLOC	0x0002	/* Params alloced with vmalloc */
+#define DM_PARAMS_MALLOC	0x0001	/* Params allocated with kvmalloc() */
 #define DM_WIPE_BUFFER		0x0010	/* Wipe input buffer before returning from ioctl */
 
 static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
@@ -1679,10 +1687,8 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla
 	if (param_flags & DM_WIPE_BUFFER)
 		memset(param, 0, param_size);
 
-	if (param_flags & DM_PARAMS_KMALLOC)
-		kfree(param);
-	if (param_flags & DM_PARAMS_VMALLOC)
-		vfree(param);
+	if (param_flags & DM_PARAMS_MALLOC)
+		kvfree(param);
 }
 
 static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
@@ -1714,19 +1720,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
 	 * Use kmalloc() rather than vmalloc() when we can.
 	 */
 	dmi = NULL;
-	if (param_kernel->data_size <= KMALLOC_MAX_SIZE) {
+	if (param_kernel->data_size <= KMALLOC_MAX_SIZE)
 		dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-		if (dmi)
-			*param_flags |= DM_PARAMS_KMALLOC;
-	}
 
 	if (!dmi) {
 		unsigned noio_flag;
 		noio_flag = memalloc_noio_save();
 		dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_HIGH | __GFP_HIGHMEM, PAGE_KERNEL);
 		memalloc_noio_restore(noio_flag);
-		if (dmi)
-			*param_flags |= DM_PARAMS_VMALLOC;
 	}
 
 	if (!dmi) {
@@ -1735,6 +1736,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
 		return -ENOMEM;
 	}
 
+	*param_flags |= DM_PARAMS_MALLOC;
+
 	if (copy_from_user(dmi, user, param_kernel->data_size))
 		goto bad;
 
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 1452ed9aacb4..9e9d04cb7d51 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -26,7 +26,7 @@
 #include <linux/device-mapper.h>
 #include <linux/dm-kcopyd.h>
 
-#include "dm.h"
+#include "dm-core.h"
 
 #define SUB_JOB_SIZE	128
 #define SPLIT_COUNT	8
@@ -465,7 +465,7 @@ static void complete_io(unsigned long error, void *context)
 	io_job_finish(kc->throttle);
 
 	if (error) {
-		if (job->rw & WRITE)
+		if (op_is_write(job->rw))
 			job->write_err |= error;
 		else
 			job->read_err = 1;
@@ -477,7 +477,7 @@ static void complete_io(unsigned long error, void *context)
 		}
 	}
 
-	if (job->rw & WRITE)
+	if (op_is_write(job->rw))
 		push(&kc->complete_jobs, job);
 
 	else {
@@ -496,7 +496,8 @@ static int run_io_job(struct kcopyd_job *job)
 {
 	int r;
 	struct dm_io_request io_req = {
-		.bi_rw = job->rw,
+		.bi_op = job->rw,
+		.bi_op_flags = 0,
 		.mem.type = DM_IO_PAGE_LIST,
 		.mem.ptr.pl = job->pages,
 		.mem.offset = 0,
@@ -550,7 +551,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
 
 		if (r < 0) {
 			/* error this rogue job */
-			if (job->rw & WRITE)
+			if (op_is_write(job->rw))
 				job->write_err = (unsigned long) -1L;
 			else
 				job->read_err = 1;
@@ -734,7 +735,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 		/*
 		 * Use WRITE SAME to optimize zeroing if all dests support it.
 		 */
-		job->rw = WRITE | REQ_WRITE_SAME;
+		job->rw = REQ_OP_WRITE_SAME;
 		for (i = 0; i < job->num_dests; i++)
 			if (!bdev_write_same(job->dests[i].bdev)) {
 				job->rw = WRITE;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 05c35aacb3aa..6d35dd4e9efb 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -141,9 +141,27 @@ static int linear_iterate_devices(struct dm_target *ti,
 	return fn(ti, lc->dev, lc->start, ti->len, data);
 }
 
+static long linear_direct_access(struct dm_target *ti, sector_t sector,
+				 void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	struct linear_c *lc = ti->private;
+	struct block_device *bdev = lc->dev->bdev;
+	struct blk_dax_ctl dax = {
+		.sector = linear_map_sector(ti, sector),
+		.size = size,
+	};
+	long ret;
+
+	ret = bdev_direct_access(bdev, &dax);
+	*kaddr = dax.addr;
+	*pfn = dax.pfn;
+
+	return ret;
+}
+
 static struct target_type linear_target = {
 	.name   = "linear",
-	.version = {1, 2, 1},
+	.version = {1, 3, 0},
 	.module = THIS_MODULE,
 	.ctr    = linear_ctr,
 	.dtr    = linear_dtr,
@@ -151,6 +169,7 @@ static struct target_type linear_target = {
 	.status = linear_status,
 	.prepare_ioctl = linear_prepare_ioctl,
 	.iterate_devices = linear_iterate_devices,
+	.direct_access = linear_direct_access,
 };
 
 int __init dm_linear_init(void)
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 608302e222af..b5dbf7a0515e 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -205,6 +205,7 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
 	bio->bi_bdev = lc->logdev->bdev;
 	bio->bi_end_io = log_end_io;
 	bio->bi_private = lc;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	page = alloc_page(GFP_KERNEL);
 	if (!page) {
@@ -226,7 +227,7 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
 		DMERR("Couldn't add page to the log block");
 		goto error_bio;
 	}
-	submit_bio(WRITE, bio);
+	submit_bio(bio);
 	return 0;
 error_bio:
 	bio_put(bio);
@@ -269,6 +270,7 @@ static int log_one_block(struct log_writes_c *lc,
 	bio->bi_bdev = lc->logdev->bdev;
 	bio->bi_end_io = log_end_io;
 	bio->bi_private = lc;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	for (i = 0; i < block->vec_cnt; i++) {
 		/*
@@ -279,7 +281,7 @@ static int log_one_block(struct log_writes_c *lc,
 				   block->vecs[i].bv_len, 0);
 		if (ret != block->vecs[i].bv_len) {
 			atomic_inc(&lc->io_blocks);
-			submit_bio(WRITE, bio);
+			submit_bio(bio);
 			bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i);
 			if (!bio) {
 				DMERR("Couldn't alloc log bio");
@@ -290,6 +292,7 @@ static int log_one_block(struct log_writes_c *lc,
 			bio->bi_bdev = lc->logdev->bdev;
 			bio->bi_end_io = log_end_io;
 			bio->bi_private = lc;
+			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 			ret = bio_add_page(bio, block->vecs[i].bv_page,
 					   block->vecs[i].bv_len, 0);
@@ -301,7 +304,7 @@ static int log_one_block(struct log_writes_c *lc,
 		}
 		sector += block->vecs[i].bv_len >> SECTOR_SHIFT;
 	}
-	submit_bio(WRITE, bio);
+	submit_bio(bio);
 out:
 	kfree(block->data);
 	kfree(block);
@@ -552,9 +555,9 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
 	struct bio_vec bv;
 	size_t alloc_size;
 	int i = 0;
-	bool flush_bio = (bio->bi_rw & REQ_FLUSH);
+	bool flush_bio = (bio->bi_rw & REQ_PREFLUSH);
 	bool fua_bio = (bio->bi_rw & REQ_FUA);
-	bool discard_bio = (bio->bi_rw & REQ_DISCARD);
+	bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD);
 
 	pb->block = NULL;
 
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 627d19186d5a..4ca2d1df5b44 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -293,7 +293,7 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis
 
 static int rw_header(struct log_c *lc, int rw)
 {
-	lc->io_req.bi_rw = rw;
+	lc->io_req.bi_op = rw;
 
 	return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
 }
@@ -306,7 +306,8 @@ static int flush_header(struct log_c *lc)
 		.count = 0,
 	};
 
-	lc->io_req.bi_rw = WRITE_FLUSH;
+	lc->io_req.bi_op = REQ_OP_WRITE;
+	lc->io_req.bi_op_flags = WRITE_FLUSH;
 
 	return dm_io(&lc->io_req, 1, &null_location, NULL);
 }
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 52baf8a5b0f4..7eac080fcb18 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -7,7 +7,8 @@
 
 #include <linux/device-mapper.h>
 
-#include "dm.h"
+#include "dm-rq.h"
+#include "dm-bio-record.h"
 #include "dm-path-selector.h"
 #include "dm-uevent.h"
 
@@ -89,6 +90,8 @@ struct multipath {
 	atomic_t pg_init_in_progress;	/* Only one pg_init allowed at once */
 	atomic_t pg_init_count;		/* Number of times pg_init called */
 
+	unsigned queue_mode;
+
 	/*
 	 * We must use a mempool of dm_mpath_io structs so that we
 	 * can resubmit bios on error.
@@ -97,10 +100,13 @@ struct multipath {
 
 	struct mutex work_mutex;
 	struct work_struct trigger_event;
+
+	struct work_struct process_queued_bios;
+	struct bio_list queued_bios;
 };
 
 /*
- * Context information attached to each bio we process.
+ * Context information attached to each io we process.
  */
 struct dm_mpath_io {
 	struct pgpath *pgpath;
@@ -114,6 +120,7 @@ static struct kmem_cache *_mpio_cache;
 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
 static void trigger_event(struct work_struct *work);
 static void activate_path(struct work_struct *work);
+static void process_queued_bios(struct work_struct *work);
 
 /*-----------------------------------------------
  * Multipath state flags.
@@ -185,7 +192,7 @@ static void free_priority_group(struct priority_group *pg,
 	kfree(pg);
 }
 
-static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
+static struct multipath *alloc_multipath(struct dm_target *ti)
 {
 	struct multipath *m;
 
@@ -203,15 +210,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
 		mutex_init(&m->work_mutex);
 
 		m->mpio_pool = NULL;
-		if (!use_blk_mq) {
-			unsigned min_ios = dm_get_reserved_rq_based_ios();
-
-			m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
-			if (!m->mpio_pool) {
-				kfree(m);
-				return NULL;
-			}
-		}
+		m->queue_mode = DM_TYPE_NONE;
 
 		m->ti = ti;
 		ti->private = m;
@@ -220,6 +219,39 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
 	return m;
 }
 
+static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
+{
+	if (m->queue_mode == DM_TYPE_NONE) {
+		/*
+		 * Default to request-based.
+		 */
+		if (dm_use_blk_mq(dm_table_get_md(ti->table)))
+			m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
+		else
+			m->queue_mode = DM_TYPE_REQUEST_BASED;
+	}
+
+	if (m->queue_mode == DM_TYPE_REQUEST_BASED) {
+		unsigned min_ios = dm_get_reserved_rq_based_ios();
+
+		m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
+		if (!m->mpio_pool)
+			return -ENOMEM;
+	}
+	else if (m->queue_mode == DM_TYPE_BIO_BASED) {
+		INIT_WORK(&m->process_queued_bios, process_queued_bios);
+		/*
+		 * bio-based doesn't support any direct scsi_dh management;
+		 * it just discovers if a scsi_dh is attached.
+		 */
+		set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
+	}
+
+	dm_table_set_type(ti->table, m->queue_mode);
+
+	return 0;
+}
+
 static void free_multipath(struct multipath *m)
 {
 	struct priority_group *pg, *tmp;
@@ -272,6 +304,41 @@ static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
 	}
 }
 
+static size_t multipath_per_bio_data_size(void)
+{
+	return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
+}
+
+static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
+{
+	return dm_per_bio_data(bio, multipath_per_bio_data_size());
+}
+
+static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio)
+{
+	/* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
+	struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
+	void *bio_details = mpio + 1;
+
+	return bio_details;
+}
+
+static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p,
+					struct dm_bio_details **bio_details_p)
+{
+	struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
+	struct dm_bio_details *bio_details = get_bio_details_from_bio(bio);
+
+	memset(mpio, 0, sizeof(*mpio));
+	memset(bio_details, 0, sizeof(*bio_details));
+	dm_bio_record(bio_details, bio);
+
+	if (mpio_p)
+		*mpio_p = mpio;
+	if (bio_details_p)
+		*bio_details_p = bio_details;
+}
+
 /*-----------------------------------------------
  * Path selection
  *-----------------------------------------------*/
@@ -431,16 +498,26 @@ failed:
  * and multipath_resume() calls and we have no need to check
  * for the DMF_NOFLUSH_SUSPENDING flag.
  */
-static int must_push_back(struct multipath *m)
+static bool __must_push_back(struct multipath *m)
+{
+	return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
+		 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
+		dm_noflush_suspending(m->ti));
+}
+
+static bool must_push_back_rq(struct multipath *m)
 {
 	return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
-		((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
-		  test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
-		 dm_noflush_suspending(m->ti)));
+		__must_push_back(m));
+}
+
+static bool must_push_back_bio(struct multipath *m)
+{
+	return __must_push_back(m);
 }
 
 /*
- * Map cloned requests
+ * Map cloned requests (request-based multipath)
  */
 static int __multipath_map(struct dm_target *ti, struct request *clone,
 			   union map_info *map_context,
@@ -459,7 +536,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
 		pgpath = choose_pgpath(m, nr_bytes);
 
 	if (!pgpath) {
-		if (!must_push_back(m))
+		if (!must_push_back_rq(m))
 			r = -EIO;	/* Failed */
 		return r;
 	} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
@@ -529,6 +606,108 @@ static void multipath_release_clone(struct request *clone)
 	blk_mq_free_request(clone);
 }
 
+/*
+ * Map cloned bios (bio-based multipath)
+ */
+static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio)
+{
+	size_t nr_bytes = bio->bi_iter.bi_size;
+	struct pgpath *pgpath;
+	unsigned long flags;
+	bool queue_io;
+
+	/* Do we need to select a new pgpath? */
+	pgpath = lockless_dereference(m->current_pgpath);
+	queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
+	if (!pgpath || !queue_io)
+		pgpath = choose_pgpath(m, nr_bytes);
+
+	if ((pgpath && queue_io) ||
+	    (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
+		/* Queue for the daemon to resubmit */
+		spin_lock_irqsave(&m->lock, flags);
+		bio_list_add(&m->queued_bios, bio);
+		spin_unlock_irqrestore(&m->lock, flags);
+		/* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
+		if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
+			pg_init_all_paths(m);
+		else if (!queue_io)
+			queue_work(kmultipathd, &m->process_queued_bios);
+		return DM_MAPIO_SUBMITTED;
+	}
+
+	if (!pgpath) {
+		if (!must_push_back_bio(m))
+			return -EIO;
+		return DM_MAPIO_REQUEUE;
+	}
+
+	mpio->pgpath = pgpath;
+	mpio->nr_bytes = nr_bytes;
+
+	bio->bi_error = 0;
+	bio->bi_bdev = pgpath->path.dev->bdev;
+	bio->bi_rw |= REQ_FAILFAST_TRANSPORT;
+
+	if (pgpath->pg->ps.type->start_io)
+		pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
+					      &pgpath->path,
+					      nr_bytes);
+	return DM_MAPIO_REMAPPED;
+}
+
+static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
+{
+	struct multipath *m = ti->private;
+	struct dm_mpath_io *mpio = NULL;
+
+	multipath_init_per_bio_data(bio, &mpio, NULL);
+
+	return __multipath_map_bio(m, bio, mpio);
+}
+
+static void process_queued_bios_list(struct multipath *m)
+{
+	if (m->queue_mode == DM_TYPE_BIO_BASED)
+		queue_work(kmultipathd, &m->process_queued_bios);
+}
+
+static void process_queued_bios(struct work_struct *work)
+{
+	int r;
+	unsigned long flags;
+	struct bio *bio;
+	struct bio_list bios;
+	struct blk_plug plug;
+	struct multipath *m =
+		container_of(work, struct multipath, process_queued_bios);
+
+	bio_list_init(&bios);
+
+	spin_lock_irqsave(&m->lock, flags);
+
+	if (bio_list_empty(&m->queued_bios)) {
+		spin_unlock_irqrestore(&m->lock, flags);
+		return;
+	}
+
+	bio_list_merge(&bios, &m->queued_bios);
+	bio_list_init(&m->queued_bios);
+
+	spin_unlock_irqrestore(&m->lock, flags);
+
+	blk_start_plug(&plug);
+	while ((bio = bio_list_pop(&bios))) {
+		r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
+		if (r < 0 || r == DM_MAPIO_REQUEUE) {
+			bio->bi_error = r;
+			bio_endio(bio);
+		} else if (r == DM_MAPIO_REMAPPED)
+			generic_make_request(bio);
+	}
+	blk_finish_plug(&plug);
+}
+
 /*
  * If we run out of usable paths, should we queue I/O or error it?
  */
@@ -557,8 +736,10 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
 
 	spin_unlock_irqrestore(&m->lock, flags);
 
-	if (!queue_if_no_path)
+	if (!queue_if_no_path) {
 		dm_table_run_md_queue_async(m->ti->table);
+		process_queued_bios_list(m);
+	}
 
 	return 0;
 }
@@ -798,6 +979,12 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
 	if (!hw_argc)
 		return 0;
 
+	if (m->queue_mode == DM_TYPE_BIO_BASED) {
+		dm_consume_args(as, hw_argc);
+		DMERR("bio-based multipath doesn't allow hardware handler args");
+		return 0;
+	}
+
 	m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
 
 	if (hw_argc > 1) {
@@ -833,7 +1020,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 	const char *arg_name;
 
 	static struct dm_arg _args[] = {
-		{0, 6, "invalid number of feature args"},
+		{0, 8, "invalid number of feature args"},
 		{1, 50, "pg_init_retries must be between 1 and 50"},
 		{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
 	};
@@ -873,6 +1060,24 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 			continue;
 		}
 
+		if (!strcasecmp(arg_name, "queue_mode") &&
+		    (argc >= 1)) {
+			const char *queue_mode_name = dm_shift_arg(as);
+
+			if (!strcasecmp(queue_mode_name, "bio"))
+				m->queue_mode = DM_TYPE_BIO_BASED;
+			else if (!strcasecmp(queue_mode_name, "rq"))
+				m->queue_mode = DM_TYPE_REQUEST_BASED;
+			else if (!strcasecmp(queue_mode_name, "mq"))
+				m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
+			else {
+				ti->error = "Unknown 'queue_mode' requested";
+				r = -EINVAL;
+			}
+			argc--;
+			continue;
+		}
+
 		ti->error = "Unrecognised multipath feature request";
 		r = -EINVAL;
 	} while (argc && !r);
@@ -880,8 +1085,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 	return r;
 }
 
-static int multipath_ctr(struct dm_target *ti, unsigned int argc,
-			 char **argv)
+static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
 {
 	/* target arguments */
 	static struct dm_arg _args[] = {
@@ -894,12 +1098,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 	struct dm_arg_set as;
 	unsigned pg_count = 0;
 	unsigned next_pg_num;
-	bool use_blk_mq = dm_use_blk_mq(dm_table_get_md(ti->table));
 
 	as.argc = argc;
 	as.argv = argv;
 
-	m = alloc_multipath(ti, use_blk_mq);
+	m = alloc_multipath(ti);
 	if (!m) {
 		ti->error = "can't allocate multipath";
 		return -EINVAL;
@@ -909,6 +1112,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 	if (r)
 		goto bad;
 
+	r = alloc_multipath_stage2(ti, m);
+	if (r)
+		goto bad;
+
 	r = parse_hw_handler(&as, m);
 	if (r)
 		goto bad;
@@ -958,7 +1165,9 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 	ti->num_flush_bios = 1;
 	ti->num_discard_bios = 1;
 	ti->num_write_same_bios = 1;
-	if (use_blk_mq)
+	if (m->queue_mode == DM_TYPE_BIO_BASED)
+		ti->per_io_data_size = multipath_per_bio_data_size();
+	else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
 		ti->per_io_data_size = sizeof(struct dm_mpath_io);
 
 	return 0;
@@ -1083,8 +1292,10 @@ static int reinstate_path(struct pgpath *pgpath)
 
 out:
 	spin_unlock_irqrestore(&m->lock, flags);
-	if (run_queue)
+	if (run_queue) {
 		dm_table_run_md_queue_async(m->ti->table);
+		process_queued_bios_list(m);
+	}
 
 	return r;
 }
@@ -1281,6 +1492,8 @@ static void pg_init_done(void *data, int errors)
 	}
 	clear_bit(MPATHF_QUEUE_IO, &m->flags);
 
+	process_queued_bios_list(m);
+
 	/*
 	 * Wake up any thread waiting to suspend.
 	 */
@@ -1328,7 +1541,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
 	 * during end I/O handling, since those clone requests don't have
 	 * bio clones.  If we queue them inside the multipath target,
 	 * we need to make bio clones, that requires memory allocation.
-	 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+	 * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests
 	 *  don't have bio clones.)
 	 * Instead of queueing the clone request here, we queue the original
 	 * request into dm core, which will remake a clone request and
@@ -1347,7 +1560,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
 
 	if (!atomic_read(&m->nr_valid_paths)) {
 		if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-			if (!must_push_back(m))
+			if (!must_push_back_rq(m))
 				r = -EIO;
 		} else {
 			if (error == -EBADE)
@@ -1381,6 +1594,64 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 	return r;
 }
 
+static int do_end_io_bio(struct multipath *m, struct bio *clone,
+			 int error, struct dm_mpath_io *mpio)
+{
+	unsigned long flags;
+
+	if (!error)
+		return 0;	/* I/O complete */
+
+	if (noretry_error(error))
+		return error;
+
+	if (mpio->pgpath)
+		fail_path(mpio->pgpath);
+
+	if (!atomic_read(&m->nr_valid_paths)) {
+		if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+			if (!must_push_back_bio(m))
+				return -EIO;
+			return DM_ENDIO_REQUEUE;
+		} else {
+			if (error == -EBADE)
+				return error;
+		}
+	}
+
+	/* Queue for the daemon to resubmit */
+	dm_bio_restore(get_bio_details_from_bio(clone), clone);
+
+	spin_lock_irqsave(&m->lock, flags);
+	bio_list_add(&m->queued_bios, clone);
+	spin_unlock_irqrestore(&m->lock, flags);
+	if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
+		queue_work(kmultipathd, &m->process_queued_bios);
+
+	return DM_ENDIO_INCOMPLETE;
+}
+
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
+{
+	struct multipath *m = ti->private;
+	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+	struct pgpath *pgpath;
+	struct path_selector *ps;
+	int r;
+
+	BUG_ON(!mpio);
+
+	r = do_end_io_bio(m, clone, error, mpio);
+	pgpath = mpio->pgpath;
+	if (pgpath) {
+		ps = &pgpath->pg->ps;
+		if (ps->type->end_io)
+			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
+	}
+
+	return r;
+}
+
 /*
  * Suspend can't complete until all the I/O is processed so if
  * the last path fails we must error any remaining I/O.
@@ -1454,7 +1725,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
 		DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
 			      (m->pg_init_retries > 0) * 2 +
 			      (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
-			      test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags));
+			      test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) +
+			      (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2);
+
 		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
 			DMEMIT("queue_if_no_path ");
 		if (m->pg_init_retries)
@@ -1463,6 +1736,16 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
 			DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
 		if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
 			DMEMIT("retain_attached_hw_handler ");
+		if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
+			switch(m->queue_mode) {
+			case DM_TYPE_BIO_BASED:
+				DMEMIT("queue_mode bio ");
+				break;
+			case DM_TYPE_MQ_REQUEST_BASED:
+				DMEMIT("queue_mode mq ");
+				break;
+			}
+		}
 	}
 
 	if (!m->hw_handler_name || type == STATUSTYPE_INFO)
@@ -1642,6 +1925,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
 		if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
 			pg_init_all_paths(m);
 		dm_table_run_md_queue_async(m->ti->table);
+		process_queued_bios_list(m);
 	}
 
 	/*
@@ -1748,7 +2032,7 @@ static int multipath_busy(struct dm_target *ti)
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 11, 0},
+	.version = {1, 12, 0},
 	.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
@@ -1757,6 +2041,8 @@ static struct target_type multipath_target = {
 	.clone_and_map_rq = multipath_clone_and_map,
 	.release_clone_rq = multipath_release_clone,
 	.rq_end_io = multipath_end_io,
+	.map = multipath_map_bio,
+	.end_io = multipath_end_io_bio,
 	.presuspend = multipath_presuspend,
 	.postsuspend = multipath_postsuspend,
 	.resume = multipath_resume,
@@ -1771,14 +2057,14 @@ static int __init dm_multipath_init(void)
 {
 	int r;
 
-	/* allocate a slab for the dm_ios */
+	/* allocate a slab for the dm_mpath_ios */
 	_mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
 	if (!_mpio_cache)
 		return -ENOMEM;
 
 	r = dm_register_target(&multipath_target);
 	if (r < 0) {
-		DMERR("register failed %d", r);
+		DMERR("request-based register failed %d", r);
 		r = -EINVAL;
 		goto bad_register_target;
 	}
@@ -1804,10 +2090,6 @@ static int __init dm_multipath_init(void)
 		goto bad_alloc_kmpath_handlerd;
 	}
 
-	DMINFO("version %u.%u.%u loaded",
-	       multipath_target.version[0], multipath_target.version[1],
-	       multipath_target.version[2]);
-
 	return 0;
 
 bad_alloc_kmpath_handlerd:
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 52532745a50f..84983549b5e1 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010-2011 Neil Brown
- * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2010-2016 Red Hat, Inc. All rights reserved.
  *
  * This file is released under the GPL.
  */
@@ -17,7 +17,12 @@
 #include <linux/device-mapper.h>
 
 #define DM_MSG_PREFIX "raid"
-#define	MAX_RAID_DEVICES	253 /* raid4/5/6 limit */
+#define	MAX_RAID_DEVICES	253 /* md-raid kernel limit */
+
+/*
+ * Minimum sectors of free reshape space per raid device
+ */
+#define	MIN_FREE_RESHAPE_SPACE to_sector(4*4096)
 
 static bool devices_handle_discard_safely = false;
 
@@ -25,12 +30,12 @@ static bool devices_handle_discard_safely = false;
  * The following flags are used by dm-raid.c to set up the array state.
  * They must be cleared before md_run is called.
  */
-#define FirstUse 10             /* rdev flag */
+#define FirstUse 10		/* rdev flag */
 
 struct raid_dev {
 	/*
 	 * Two DM devices, one to hold metadata and one to hold the
-	 * actual data/parity.  The reason for this is to not confuse
+	 * actual data/parity.	The reason for this is to not confuse
 	 * ti->len and give more flexibility in altering size and
 	 * characteristics.
 	 *
@@ -45,26 +50,176 @@ struct raid_dev {
 	struct md_rdev rdev;
 };
 
+/*
+ * Bits for establishing rs->ctr_flags
+ *
+ * 1 = no flag value
+ * 2 = flag with value
+ */
+#define __CTR_FLAG_SYNC			0  /* 1 */ /* Not with raid0! */
+#define __CTR_FLAG_NOSYNC		1  /* 1 */ /* Not with raid0! */
+#define __CTR_FLAG_REBUILD		2  /* 2 */ /* Not with raid0! */
+#define __CTR_FLAG_DAEMON_SLEEP		3  /* 2 */ /* Not with raid0! */
+#define __CTR_FLAG_MIN_RECOVERY_RATE	4  /* 2 */ /* Not with raid0! */
+#define __CTR_FLAG_MAX_RECOVERY_RATE	5  /* 2 */ /* Not with raid0! */
+#define __CTR_FLAG_MAX_WRITE_BEHIND	6  /* 2 */ /* Only with raid1! */
+#define __CTR_FLAG_WRITE_MOSTLY		7  /* 2 */ /* Only with raid1! */
+#define __CTR_FLAG_STRIPE_CACHE		8  /* 2 */ /* Only with raid4/5/6! */
+#define __CTR_FLAG_REGION_SIZE		9  /* 2 */ /* Not with raid0! */
+#define __CTR_FLAG_RAID10_COPIES	10 /* 2 */ /* Only with raid10 */
+#define __CTR_FLAG_RAID10_FORMAT	11 /* 2 */ /* Only with raid10 */
+/* New for v1.9.0 */
+#define __CTR_FLAG_DELTA_DISKS		12 /* 2 */ /* Only with reshapable raid1/4/5/6/10! */
+#define __CTR_FLAG_DATA_OFFSET		13 /* 2 */ /* Only with reshapable raid4/5/6/10! */
+#define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */
+
 /*
  * Flags for rs->ctr_flags field.
  */
-#define CTR_FLAG_SYNC              0x1
-#define CTR_FLAG_NOSYNC            0x2
-#define CTR_FLAG_REBUILD           0x4
-#define CTR_FLAG_DAEMON_SLEEP      0x8
-#define CTR_FLAG_MIN_RECOVERY_RATE 0x10
-#define CTR_FLAG_MAX_RECOVERY_RATE 0x20
-#define CTR_FLAG_MAX_WRITE_BEHIND  0x40
-#define CTR_FLAG_STRIPE_CACHE      0x80
-#define CTR_FLAG_REGION_SIZE       0x100
-#define CTR_FLAG_RAID10_COPIES     0x200
-#define CTR_FLAG_RAID10_FORMAT     0x400
+#define CTR_FLAG_SYNC			(1 << __CTR_FLAG_SYNC)
+#define CTR_FLAG_NOSYNC			(1 << __CTR_FLAG_NOSYNC)
+#define CTR_FLAG_REBUILD		(1 << __CTR_FLAG_REBUILD)
+#define CTR_FLAG_DAEMON_SLEEP		(1 << __CTR_FLAG_DAEMON_SLEEP)
+#define CTR_FLAG_MIN_RECOVERY_RATE	(1 << __CTR_FLAG_MIN_RECOVERY_RATE)
+#define CTR_FLAG_MAX_RECOVERY_RATE	(1 << __CTR_FLAG_MAX_RECOVERY_RATE)
+#define CTR_FLAG_MAX_WRITE_BEHIND	(1 << __CTR_FLAG_MAX_WRITE_BEHIND)
+#define CTR_FLAG_WRITE_MOSTLY		(1 << __CTR_FLAG_WRITE_MOSTLY)
+#define CTR_FLAG_STRIPE_CACHE		(1 << __CTR_FLAG_STRIPE_CACHE)
+#define CTR_FLAG_REGION_SIZE		(1 << __CTR_FLAG_REGION_SIZE)
+#define CTR_FLAG_RAID10_COPIES		(1 << __CTR_FLAG_RAID10_COPIES)
+#define CTR_FLAG_RAID10_FORMAT		(1 << __CTR_FLAG_RAID10_FORMAT)
+#define CTR_FLAG_DELTA_DISKS		(1 << __CTR_FLAG_DELTA_DISKS)
+#define CTR_FLAG_DATA_OFFSET		(1 << __CTR_FLAG_DATA_OFFSET)
+#define CTR_FLAG_RAID10_USE_NEAR_SETS	(1 << __CTR_FLAG_RAID10_USE_NEAR_SETS)
+
+/*
+ * Definitions of various constructor flags to
+ * be used in checks of valid / invalid flags
+ * per raid level.
+ */
+/* Define all any sync flags */
+#define	CTR_FLAGS_ANY_SYNC		(CTR_FLAG_SYNC | CTR_FLAG_NOSYNC)
+
+/* Define flags for options without argument (e.g. 'nosync') */
+#define	CTR_FLAG_OPTIONS_NO_ARGS	(CTR_FLAGS_ANY_SYNC | \
+					 CTR_FLAG_RAID10_USE_NEAR_SETS)
+
+/* Define flags for options with one argument (e.g. 'delta_disks +2') */
+#define CTR_FLAG_OPTIONS_ONE_ARG (CTR_FLAG_REBUILD | \
+				  CTR_FLAG_WRITE_MOSTLY | \
+				  CTR_FLAG_DAEMON_SLEEP | \
+				  CTR_FLAG_MIN_RECOVERY_RATE | \
+				  CTR_FLAG_MAX_RECOVERY_RATE | \
+				  CTR_FLAG_MAX_WRITE_BEHIND | \
+				  CTR_FLAG_STRIPE_CACHE | \
+				  CTR_FLAG_REGION_SIZE | \
+				  CTR_FLAG_RAID10_COPIES | \
+				  CTR_FLAG_RAID10_FORMAT | \
+				  CTR_FLAG_DELTA_DISKS | \
+				  CTR_FLAG_DATA_OFFSET)
+
+/* Valid options definitions per raid level... */
+
+/* "raid0" does only accept data offset */
+#define RAID0_VALID_FLAGS	(CTR_FLAG_DATA_OFFSET)
+
+/* "raid1" does not accept stripe cache, data offset, delta_disks or any raid10 options */
+#define RAID1_VALID_FLAGS	(CTR_FLAGS_ANY_SYNC | \
+				 CTR_FLAG_REBUILD | \
+				 CTR_FLAG_WRITE_MOSTLY | \
+				 CTR_FLAG_DAEMON_SLEEP | \
+				 CTR_FLAG_MIN_RECOVERY_RATE | \
+				 CTR_FLAG_MAX_RECOVERY_RATE | \
+				 CTR_FLAG_MAX_WRITE_BEHIND | \
+				 CTR_FLAG_REGION_SIZE | \
+				 CTR_FLAG_DELTA_DISKS | \
+				 CTR_FLAG_DATA_OFFSET)
+
+/* "raid10" does not accept any raid1 or stripe cache options */
+#define RAID10_VALID_FLAGS	(CTR_FLAGS_ANY_SYNC | \
+				 CTR_FLAG_REBUILD | \
+				 CTR_FLAG_DAEMON_SLEEP | \
+				 CTR_FLAG_MIN_RECOVERY_RATE | \
+				 CTR_FLAG_MAX_RECOVERY_RATE | \
+				 CTR_FLAG_REGION_SIZE | \
+				 CTR_FLAG_RAID10_COPIES | \
+				 CTR_FLAG_RAID10_FORMAT | \
+				 CTR_FLAG_DELTA_DISKS | \
+				 CTR_FLAG_DATA_OFFSET | \
+				 CTR_FLAG_RAID10_USE_NEAR_SETS)
+
+/*
+ * "raid4/5/6" do not accept any raid1 or raid10 specific options
+ *
+ * "raid6" does not accept "nosync", because it is not guaranteed
+ * that both parity and q-syndrome are being written properly with
+ * any writes
+ */
+#define RAID45_VALID_FLAGS	(CTR_FLAGS_ANY_SYNC | \
+				 CTR_FLAG_REBUILD | \
+				 CTR_FLAG_DAEMON_SLEEP | \
+				 CTR_FLAG_MIN_RECOVERY_RATE | \
+				 CTR_FLAG_MAX_RECOVERY_RATE | \
+				 CTR_FLAG_MAX_WRITE_BEHIND | \
+				 CTR_FLAG_STRIPE_CACHE | \
+				 CTR_FLAG_REGION_SIZE | \
+				 CTR_FLAG_DELTA_DISKS | \
+				 CTR_FLAG_DATA_OFFSET)
+
+#define RAID6_VALID_FLAGS	(CTR_FLAG_SYNC | \
+				 CTR_FLAG_REBUILD | \
+				 CTR_FLAG_DAEMON_SLEEP | \
+				 CTR_FLAG_MIN_RECOVERY_RATE | \
+				 CTR_FLAG_MAX_RECOVERY_RATE | \
+				 CTR_FLAG_MAX_WRITE_BEHIND | \
+				 CTR_FLAG_STRIPE_CACHE | \
+				 CTR_FLAG_REGION_SIZE | \
+				 CTR_FLAG_DELTA_DISKS | \
+				 CTR_FLAG_DATA_OFFSET)
+/* ...valid options definitions per raid level */
+
+/*
+ * Flags for rs->runtime_flags field
+ * (RT_FLAG prefix meaning "runtime flag")
+ *
+ * These are all internal and used to define runtime state,
+ * e.g. to prevent another resume from preresume processing
+ * the raid set all over again.
+ */
+#define RT_FLAG_RS_PRERESUMED		0
+#define RT_FLAG_RS_RESUMED		1
+#define RT_FLAG_RS_BITMAP_LOADED	2
+#define RT_FLAG_UPDATE_SBS		3
+#define RT_FLAG_RESHAPE_RS		4
+#define RT_FLAG_KEEP_RS_FROZEN		5
+
+/* Array elements of 64 bit needed for rebuild/failed disk bits */
+#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
+
+/*
+ * raid set level, layout and chunk sectors backup/restore
+ */
+struct rs_layout {
+	int new_level;
+	int new_layout;
+	int new_chunk_sectors;
+};
 
 struct raid_set {
 	struct dm_target *ti;
 
 	uint32_t bitmap_loaded;
-	uint32_t ctr_flags;
+	uint32_t stripe_cache_entries;
+	unsigned long ctr_flags;
+	unsigned long runtime_flags;
+
+	uint64_t rebuild_disks[DISKS_ARRAY_ELEMS];
+
+	int raid_disks;
+	int delta_disks;
+	int data_offset;
+	int raid10_copies;
+	int requested_bitmap_chunk_sectors;
 
 	struct mddev md;
 	struct raid_type *raid_type;
@@ -73,82 +228,446 @@ struct raid_set {
 	struct raid_dev dev[0];
 };
 
+static void rs_config_backup(struct raid_set *rs, struct rs_layout *l)
+{
+	struct mddev *mddev = &rs->md;
+
+	l->new_level = mddev->new_level;
+	l->new_layout = mddev->new_layout;
+	l->new_chunk_sectors = mddev->new_chunk_sectors;
+}
+
+static void rs_config_restore(struct raid_set *rs, struct rs_layout *l)
+{
+	struct mddev *mddev = &rs->md;
+
+	mddev->new_level = l->new_level;
+	mddev->new_layout = l->new_layout;
+	mddev->new_chunk_sectors = l->new_chunk_sectors;
+}
+
+/* raid10 algorithms (i.e. formats) */
+#define	ALGORITHM_RAID10_DEFAULT	0
+#define	ALGORITHM_RAID10_NEAR		1
+#define	ALGORITHM_RAID10_OFFSET		2
+#define	ALGORITHM_RAID10_FAR		3
+
 /* Supported raid types and properties. */
 static struct raid_type {
 	const char *name;		/* RAID algorithm. */
 	const char *descr;		/* Descriptor text for logging. */
-	const unsigned parity_devs;	/* # of parity devices. */
-	const unsigned minimal_devs;	/* minimal # of devices in set. */
-	const unsigned level;		/* RAID level. */
-	const unsigned algorithm;	/* RAID algorithm. */
+	const unsigned int parity_devs;	/* # of parity devices. */
+	const unsigned int minimal_devs;/* minimal # of devices in set. */
+	const unsigned int level;	/* RAID level. */
+	const unsigned int algorithm;	/* RAID algorithm. */
 } raid_types[] = {
-	{"raid0",    "RAID0 (striping)",                0, 2, 0, 0 /* NONE */},
-	{"raid1",    "RAID1 (mirroring)",               0, 2, 1, 0 /* NONE */},
-	{"raid10",   "RAID10 (striped mirrors)",        0, 2, 10, UINT_MAX /* Varies */},
-	{"raid4",    "RAID4 (dedicated parity disk)",	1, 2, 5, ALGORITHM_PARITY_0},
-	{"raid5_la", "RAID5 (left asymmetric)",		1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
-	{"raid5_ra", "RAID5 (right asymmetric)",	1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
-	{"raid5_ls", "RAID5 (left symmetric)",		1, 2, 5, ALGORITHM_LEFT_SYMMETRIC},
-	{"raid5_rs", "RAID5 (right symmetric)",		1, 2, 5, ALGORITHM_RIGHT_SYMMETRIC},
-	{"raid6_zr", "RAID6 (zero restart)",		2, 4, 6, ALGORITHM_ROTATING_ZERO_RESTART},
-	{"raid6_nr", "RAID6 (N restart)",		2, 4, 6, ALGORITHM_ROTATING_N_RESTART},
-	{"raid6_nc", "RAID6 (N continue)",		2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE}
+	{"raid0",	  "raid0 (striping)",			    0, 2, 0,  0 /* NONE */},
+	{"raid1",	  "raid1 (mirroring)",			    0, 2, 1,  0 /* NONE */},
+	{"raid10_far",	  "raid10 far (striped mirrors)",	    0, 2, 10, ALGORITHM_RAID10_FAR},
+	{"raid10_offset", "raid10 offset (striped mirrors)",	    0, 2, 10, ALGORITHM_RAID10_OFFSET},
+	{"raid10_near",	  "raid10 near (striped mirrors)",	    0, 2, 10, ALGORITHM_RAID10_NEAR},
+	{"raid10",	  "raid10 (striped mirrors)",		    0, 2, 10, ALGORITHM_RAID10_DEFAULT},
+	{"raid4",	  "raid4 (dedicated last parity disk)",	    1, 2, 4,  ALGORITHM_PARITY_N}, /* raid4 layout = raid5_n */
+	{"raid5_n",	  "raid5 (dedicated last parity disk)",	    1, 2, 5,  ALGORITHM_PARITY_N},
+	{"raid5_ls",	  "raid5 (left symmetric)",		    1, 2, 5,  ALGORITHM_LEFT_SYMMETRIC},
+	{"raid5_rs",	  "raid5 (right symmetric)",		    1, 2, 5,  ALGORITHM_RIGHT_SYMMETRIC},
+	{"raid5_la",	  "raid5 (left asymmetric)",		    1, 2, 5,  ALGORITHM_LEFT_ASYMMETRIC},
+	{"raid5_ra",	  "raid5 (right asymmetric)",		    1, 2, 5,  ALGORITHM_RIGHT_ASYMMETRIC},
+	{"raid6_zr",	  "raid6 (zero restart)",		    2, 4, 6,  ALGORITHM_ROTATING_ZERO_RESTART},
+	{"raid6_nr",	  "raid6 (N restart)",			    2, 4, 6,  ALGORITHM_ROTATING_N_RESTART},
+	{"raid6_nc",	  "raid6 (N continue)",			    2, 4, 6,  ALGORITHM_ROTATING_N_CONTINUE},
+	{"raid6_n_6",	  "raid6 (dedicated parity/Q n/6)",	    2, 4, 6,  ALGORITHM_PARITY_N_6},
+	{"raid6_ls_6",	  "raid6 (left symmetric dedicated Q 6)",   2, 4, 6,  ALGORITHM_LEFT_SYMMETRIC_6},
+	{"raid6_rs_6",	  "raid6 (right symmetric dedicated Q 6)",  2, 4, 6,  ALGORITHM_RIGHT_SYMMETRIC_6},
+	{"raid6_la_6",	  "raid6 (left asymmetric dedicated Q 6)",  2, 4, 6,  ALGORITHM_LEFT_ASYMMETRIC_6},
+	{"raid6_ra_6",	  "raid6 (right asymmetric dedicated Q 6)", 2, 4, 6,  ALGORITHM_RIGHT_ASYMMETRIC_6}
+};
+
+/* True, if @v is in inclusive range [@min, @max] */
+static bool __within_range(long v, long min, long max)
+{
+	return v >= min && v <= max;
+}
+
+/* All table line arguments are defined here */
+static struct arg_name_flag {
+	const unsigned long flag;
+	const char *name;
+} __arg_name_flags[] = {
+	{ CTR_FLAG_SYNC, "sync"},
+	{ CTR_FLAG_NOSYNC, "nosync"},
+	{ CTR_FLAG_REBUILD, "rebuild"},
+	{ CTR_FLAG_DAEMON_SLEEP, "daemon_sleep"},
+	{ CTR_FLAG_MIN_RECOVERY_RATE, "min_recovery_rate"},
+	{ CTR_FLAG_MAX_RECOVERY_RATE, "max_recovery_rate"},
+	{ CTR_FLAG_MAX_WRITE_BEHIND, "max_write_behind"},
+	{ CTR_FLAG_WRITE_MOSTLY, "write_mostly"},
+	{ CTR_FLAG_STRIPE_CACHE, "stripe_cache"},
+	{ CTR_FLAG_REGION_SIZE, "region_size"},
+	{ CTR_FLAG_RAID10_COPIES, "raid10_copies"},
+	{ CTR_FLAG_RAID10_FORMAT, "raid10_format"},
+	{ CTR_FLAG_DATA_OFFSET, "data_offset"},
+	{ CTR_FLAG_DELTA_DISKS, "delta_disks"},
+	{ CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"},
 };
 
-static char *raid10_md_layout_to_format(int layout)
+/* Return argument name string for given @flag */
+static const char *dm_raid_arg_name_by_flag(const uint32_t flag)
+{
+	if (hweight32(flag) == 1) {
+		struct arg_name_flag *anf = __arg_name_flags + ARRAY_SIZE(__arg_name_flags);
+
+		while (anf-- > __arg_name_flags)
+			if (flag & anf->flag)
+				return anf->name;
+
+	} else
+		DMERR("%s called with more than one flag!", __func__);
+
+	return NULL;
+}
+
+/*
+ * Bool helpers to test for various raid levels of a raid set.
+ * It's level as reported by the superblock rather than
+ * the requested raid_type passed to the constructor.
+ */
+/* Return true, if raid set in @rs is raid0 */
+static bool rs_is_raid0(struct raid_set *rs)
+{
+	return !rs->md.level;
+}
+
+/* Return true, if raid set in @rs is raid1 */
+static bool rs_is_raid1(struct raid_set *rs)
+{
+	return rs->md.level == 1;
+}
+
+/* Return true, if raid set in @rs is raid10 */
+static bool rs_is_raid10(struct raid_set *rs)
+{
+	return rs->md.level == 10;
+}
+
+/* Return true, if raid set in @rs is level 6 */
+static bool rs_is_raid6(struct raid_set *rs)
+{
+	return rs->md.level == 6;
+}
+
+/* Return true, if raid set in @rs is level 4, 5 or 6 */
+static bool rs_is_raid456(struct raid_set *rs)
+{
+	return __within_range(rs->md.level, 4, 6);
+}
+
+/* Return true, if raid set in @rs is reshapable */
+static bool __is_raid10_far(int layout);
+static bool rs_is_reshapable(struct raid_set *rs)
+{
+	return rs_is_raid456(rs) ||
+	       (rs_is_raid10(rs) && !__is_raid10_far(rs->md.new_layout));
+}
+
+/* Return true, if raid set in @rs is recovering */
+static bool rs_is_recovering(struct raid_set *rs)
+{
+	return rs->md.recovery_cp < rs->dev[0].rdev.sectors;
+}
+
+/* Return true, if raid set in @rs is reshaping */
+static bool rs_is_reshaping(struct raid_set *rs)
+{
+	return rs->md.reshape_position != MaxSector;
+}
+
+/*
+ * bool helpers to test for various raid levels of a raid type @rt
+ */
+
+/* Return true, if raid type in @rt is raid0 */
+static bool rt_is_raid0(struct raid_type *rt)
+{
+	return !rt->level;
+}
+
+/* Return true, if raid type in @rt is raid1 */
+static bool rt_is_raid1(struct raid_type *rt)
+{
+	return rt->level == 1;
+}
+
+/* Return true, if raid type in @rt is raid10 */
+static bool rt_is_raid10(struct raid_type *rt)
+{
+	return rt->level == 10;
+}
+
+/* Return true, if raid type in @rt is raid4/5 */
+static bool rt_is_raid45(struct raid_type *rt)
+{
+	return __within_range(rt->level, 4, 5);
+}
+
+/* Return true, if raid type in @rt is raid6 */
+static bool rt_is_raid6(struct raid_type *rt)
+{
+	return rt->level == 6;
+}
+
+/* Return true, if raid type in @rt is raid4/5/6 */
+static bool rt_is_raid456(struct raid_type *rt)
+{
+	return __within_range(rt->level, 4, 6);
+}
+/* END: raid level bools */
+
+/* Return valid ctr flags for the raid level of @rs */
+static unsigned long __valid_flags(struct raid_set *rs)
+{
+	if (rt_is_raid0(rs->raid_type))
+		return RAID0_VALID_FLAGS;
+	else if (rt_is_raid1(rs->raid_type))
+		return RAID1_VALID_FLAGS;
+	else if (rt_is_raid10(rs->raid_type))
+		return RAID10_VALID_FLAGS;
+	else if (rt_is_raid45(rs->raid_type))
+		return RAID45_VALID_FLAGS;
+	else if (rt_is_raid6(rs->raid_type))
+		return RAID6_VALID_FLAGS;
+
+	return 0;
+}
+
+/*
+ * Check for valid flags set on @rs
+ *
+ * Has to be called after parsing of the ctr flags!
+ */
+static int rs_check_for_valid_flags(struct raid_set *rs)
+{
+	if (rs->ctr_flags & ~__valid_flags(rs)) {
+		rs->ti->error = "Invalid flags combination";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* MD raid10 bit definitions and helpers */
+#define RAID10_OFFSET			(1 << 16) /* stripes with data copies area adjacent on devices */
+#define RAID10_BROCKEN_USE_FAR_SETS	(1 << 17) /* Broken in raid10.c: use sets instead of whole stripe rotation */
+#define RAID10_USE_FAR_SETS		(1 << 18) /* Use sets instead of whole stripe rotation */
+#define RAID10_FAR_COPIES_SHIFT		8	  /* raid10 # far copies shift (2nd byte of layout) */
+
+/* Return md raid10 near copies for @layout */
+static unsigned int __raid10_near_copies(int layout)
+{
+	return layout & 0xFF;
+}
+
+/* Return md raid10 far copies for @layout */
+static unsigned int __raid10_far_copies(int layout)
+{
+	return __raid10_near_copies(layout >> RAID10_FAR_COPIES_SHIFT);
+}
+
+/* Return true if md raid10 offset for @layout */
+static bool __is_raid10_offset(int layout)
+{
+	return !!(layout & RAID10_OFFSET);
+}
+
+/* Return true if md raid10 near for @layout */
+static bool __is_raid10_near(int layout)
+{
+	return !__is_raid10_offset(layout) && __raid10_near_copies(layout) > 1;
+}
+
+/* Return true if md raid10 far for @layout */
+static bool __is_raid10_far(int layout)
+{
+	return !__is_raid10_offset(layout) && __raid10_far_copies(layout) > 1;
+}
+
+/* Return md raid10 layout string for @layout */
+static const char *raid10_md_layout_to_format(int layout)
 {
 	/*
-	 * Bit 16 and 17 stand for "offset" and "use_far_sets"
+	 * Bit 16 stands for "offset"
+	 * (i.e. adjacent stripes hold copies)
+	 *
 	 * Refer to MD's raid10.c for details
 	 */
-	if ((layout & 0x10000) && (layout & 0x20000))
+	if (__is_raid10_offset(layout))
 		return "offset";
 
-	if ((layout & 0xFF) > 1)
+	if (__raid10_near_copies(layout) > 1)
 		return "near";
 
+	WARN_ON(__raid10_far_copies(layout) < 2);
+
 	return "far";
 }
 
-static unsigned raid10_md_layout_to_copies(int layout)
+/* Return md raid10 algorithm for @name */
+static int raid10_name_to_format(const char *name)
 {
-	if ((layout & 0xFF) > 1)
-		return layout & 0xFF;
-	return (layout >> 8) & 0xFF;
+	if (!strcasecmp(name, "near"))
+		return ALGORITHM_RAID10_NEAR;
+	else if (!strcasecmp(name, "offset"))
+		return ALGORITHM_RAID10_OFFSET;
+	else if (!strcasecmp(name, "far"))
+		return ALGORITHM_RAID10_FAR;
+
+	return -EINVAL;
+}
+
+/* Return md raid10 copies for @layout */
+static unsigned int raid10_md_layout_to_copies(int layout)
+{
+	return max(__raid10_near_copies(layout), __raid10_far_copies(layout));
 }
 
-static int raid10_format_to_md_layout(char *format, unsigned copies)
+/* Return md raid10 format id for @format string */
+static int raid10_format_to_md_layout(struct raid_set *rs,
+				      unsigned int algorithm,
+				      unsigned int copies)
 {
-	unsigned n = 1, f = 1;
+	unsigned int n = 1, f = 1, r = 0;
 
-	if (!strcasecmp("near", format))
+	/*
+	 * MD resilienece flaw:
+	 *
+	 * enabling use_far_sets for far/offset formats causes copies
+	 * to be colocated on the same devs together with their origins!
+	 *
+	 * -> disable it for now in the definition above
+	 */
+	if (algorithm == ALGORITHM_RAID10_DEFAULT ||
+	    algorithm == ALGORITHM_RAID10_NEAR)
 		n = copies;
-	else
+
+	else if (algorithm == ALGORITHM_RAID10_OFFSET) {
 		f = copies;
+		r = RAID10_OFFSET;
+		if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))
+			r |= RAID10_USE_FAR_SETS;
 
-	if (!strcasecmp("offset", format))
-		return 0x30000 | (f << 8) | n;
+	} else if (algorithm == ALGORITHM_RAID10_FAR) {
+		f = copies;
+		r = !RAID10_OFFSET;
+		if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))
+			r |= RAID10_USE_FAR_SETS;
 
-	if (!strcasecmp("far", format))
-		return 0x20000 | (f << 8) | n;
+	} else
+		return -EINVAL;
 
-	return (f << 8) | n;
+	return r | (f << RAID10_FAR_COPIES_SHIFT) | n;
 }
+/* END: MD raid10 bit definitions and helpers */
 
-static struct raid_type *get_raid_type(char *name)
+/* Check for any of the raid10 algorithms */
+static bool __got_raid10(struct raid_type *rtp, const int layout)
 {
-	int i;
+	if (rtp->level == 10) {
+		switch (rtp->algorithm) {
+		case ALGORITHM_RAID10_DEFAULT:
+		case ALGORITHM_RAID10_NEAR:
+			return __is_raid10_near(layout);
+		case ALGORITHM_RAID10_OFFSET:
+			return __is_raid10_offset(layout);
+		case ALGORITHM_RAID10_FAR:
+			return __is_raid10_far(layout);
+		default:
+			break;
+		}
+	}
+
+	return false;
+}
+
+/* Return raid_type for @name */
+static struct raid_type *get_raid_type(const char *name)
+{
+	struct raid_type *rtp = raid_types + ARRAY_SIZE(raid_types);
+
+	while (rtp-- > raid_types)
+		if (!strcasecmp(rtp->name, name))
+			return rtp;
+
+	return NULL;
+}
+
+/* Return raid_type for @name based derived from @level and @layout */
+static struct raid_type *get_raid_type_by_ll(const int level, const int layout)
+{
+	struct raid_type *rtp = raid_types + ARRAY_SIZE(raid_types);
 
-	for (i = 0; i < ARRAY_SIZE(raid_types); i++)
-		if (!strcmp(raid_types[i].name, name))
-			return &raid_types[i];
+	while (rtp-- > raid_types) {
+		/* RAID10 special checks based on @layout flags/properties */
+		if (rtp->level == level &&
+		    (__got_raid10(rtp, layout) || rtp->algorithm == layout))
+			return rtp;
+	}
 
 	return NULL;
 }
 
-static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *raid_type, unsigned raid_devs)
+/*
+ * Conditionally change bdev capacity of @rs
+ * in case of a disk add/remove reshape
+ */
+static void rs_set_capacity(struct raid_set *rs)
+{
+	struct mddev *mddev = &rs->md;
+	struct md_rdev *rdev;
+	struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table));
+
+	/*
+	 * raid10 sets rdev->sector to the device size, which
+	 * is unintended in case of out-of-place reshaping
+	 */
+	rdev_for_each(rdev, mddev)
+		rdev->sectors = mddev->dev_sectors;
+
+	set_capacity(gendisk, mddev->array_sectors);
+	revalidate_disk(gendisk);
+}
+
+/*
+ * Set the mddev properties in @rs to the current
+ * ones retrieved from the freshest superblock
+ */
+static void rs_set_cur(struct raid_set *rs)
+{
+	struct mddev *mddev = &rs->md;
+
+	mddev->new_level = mddev->level;
+	mddev->new_layout = mddev->layout;
+	mddev->new_chunk_sectors = mddev->chunk_sectors;
+}
+
+/*
+ * Set the mddev properties in @rs to the new
+ * ones requested by the ctr
+ */
+static void rs_set_new(struct raid_set *rs)
+{
+	struct mddev *mddev = &rs->md;
+
+	mddev->level = mddev->new_level;
+	mddev->layout = mddev->new_layout;
+	mddev->chunk_sectors = mddev->new_chunk_sectors;
+	mddev->raid_disks = rs->raid_disks;
+	mddev->delta_disks = 0;
+}
+
+static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *raid_type,
+				       unsigned int raid_devs)
 {
-	unsigned i;
+	unsigned int i;
 	struct raid_set *rs;
 
 	if (raid_devs <= raid_type->parity_devs) {
@@ -164,15 +683,19 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
 
 	mddev_init(&rs->md);
 
+	rs->raid_disks = raid_devs;
+	rs->delta_disks = 0;
+
 	rs->ti = ti;
 	rs->raid_type = raid_type;
+	rs->stripe_cache_entries = 256;
 	rs->md.raid_disks = raid_devs;
 	rs->md.level = raid_type->level;
 	rs->md.new_level = rs->md.level;
 	rs->md.layout = raid_type->algorithm;
 	rs->md.new_layout = rs->md.layout;
 	rs->md.delta_disks = 0;
-	rs->md.recovery_cp = 0;
+	rs->md.recovery_cp = MaxSector;
 
 	for (i = 0; i < raid_devs; i++)
 		md_rdev_init(&rs->dev[i].rdev);
@@ -189,11 +712,11 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
 	return rs;
 }
 
-static void context_free(struct raid_set *rs)
+static void raid_set_free(struct raid_set *rs)
 {
 	int i;
 
-	for (i = 0; i < rs->md.raid_disks; i++) {
+	for (i = 0; i < rs->raid_disks; i++) {
 		if (rs->dev[i].meta_dev)
 			dm_put_device(rs->ti, rs->dev[i].meta_dev);
 		md_rdev_clear(&rs->dev[i].rdev);
@@ -218,16 +741,22 @@ static void context_free(struct raid_set *rs)
  *    <meta_dev> -
  *
  * This code parses those words.  If there is a failure,
- * the caller must use context_free to unwind the operations.
+ * the caller must use raid_set_free() to unwind the operations.
  */
-static int dev_parms(struct raid_set *rs, char **argv)
+static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
 {
 	int i;
 	int rebuild = 0;
 	int metadata_available = 0;
-	int ret = 0;
+	int r = 0;
+	const char *arg;
 
-	for (i = 0; i < rs->md.raid_disks; i++, argv += 2) {
+	/* Put off the number of raid devices argument to get to dev pairs */
+	arg = dm_shift_arg(as);
+	if (!arg)
+		return -EINVAL;
+
+	for (i = 0; i < rs->raid_disks; i++) {
 		rs->dev[i].rdev.raid_disk = i;
 
 		rs->dev[i].meta_dev = NULL;
@@ -240,39 +769,49 @@ static int dev_parms(struct raid_set *rs, char **argv)
 		rs->dev[i].rdev.data_offset = 0;
 		rs->dev[i].rdev.mddev = &rs->md;
 
-		if (strcmp(argv[0], "-")) {
-			ret = dm_get_device(rs->ti, argv[0],
-					    dm_table_get_mode(rs->ti->table),
-					    &rs->dev[i].meta_dev);
-			rs->ti->error = "RAID metadata device lookup failure";
-			if (ret)
-				return ret;
+		arg = dm_shift_arg(as);
+		if (!arg)
+			return -EINVAL;
+
+		if (strcmp(arg, "-")) {
+			r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table),
+					  &rs->dev[i].meta_dev);
+			if (r) {
+				rs->ti->error = "RAID metadata device lookup failure";
+				return r;
+			}
 
 			rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
-			if (!rs->dev[i].rdev.sb_page)
+			if (!rs->dev[i].rdev.sb_page) {
+				rs->ti->error = "Failed to allocate superblock page";
 				return -ENOMEM;
+			}
 		}
 
-		if (!strcmp(argv[1], "-")) {
+		arg = dm_shift_arg(as);
+		if (!arg)
+			return -EINVAL;
+
+		if (!strcmp(arg, "-")) {
 			if (!test_bit(In_sync, &rs->dev[i].rdev.flags) &&
 			    (!rs->dev[i].rdev.recovery_offset)) {
 				rs->ti->error = "Drive designated for rebuild not specified";
 				return -EINVAL;
 			}
 
-			rs->ti->error = "No data device supplied with metadata device";
-			if (rs->dev[i].meta_dev)
+			if (rs->dev[i].meta_dev) {
+				rs->ti->error = "No data device supplied with metadata device";
 				return -EINVAL;
+			}
 
 			continue;
 		}
 
-		ret = dm_get_device(rs->ti, argv[1],
-				    dm_table_get_mode(rs->ti->table),
-				    &rs->dev[i].data_dev);
-		if (ret) {
+		r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table),
+				  &rs->dev[i].data_dev);
+		if (r) {
 			rs->ti->error = "RAID device lookup failure";
-			return ret;
+			return r;
 		}
 
 		if (rs->dev[i].meta_dev) {
@@ -280,7 +819,7 @@ static int dev_parms(struct raid_set *rs, char **argv)
 			rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
 		}
 		rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
-		list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
+		list_add_tail(&rs->dev[i].rdev.same_set, &rs->md.disks);
 		if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
 			rebuild++;
 	}
@@ -301,8 +840,7 @@ static int dev_parms(struct raid_set *rs, char **argv)
 		 *
 		 * User could specify 'nosync' option if desperate.
 		 */
-		DMERR("Unable to rebuild drive while array is not in-sync");
-		rs->ti->error = "RAID device lookup failure";
+		rs->ti->error = "Unable to rebuild drive while array is not in-sync";
 		return -EINVAL;
 	}
 
@@ -325,7 +863,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
 
 	if (!region_size) {
 		/*
-		 * Choose a reasonable default.  All figures in sectors.
+		 * Choose a reasonable default.	 All figures in sectors.
 		 */
 		if (min_region_size > (1 << 13)) {
 			/* If not a power of 2, make it the next power of 2 */
@@ -366,7 +904,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
 	/*
 	 * Convert sectors to bytes.
 	 */
-	rs->md.bitmap_info.chunksize = (region_size << 9);
+	rs->md.bitmap_info.chunksize = to_bytes(region_size);
 
 	return 0;
 }
@@ -382,9 +920,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
  */
 static int validate_raid_redundancy(struct raid_set *rs)
 {
-	unsigned i, rebuild_cnt = 0;
-	unsigned rebuilds_per_group = 0, copies, d;
-	unsigned group_size, last_group_start;
+	unsigned int i, rebuild_cnt = 0;
+	unsigned int rebuilds_per_group = 0, copies;
+	unsigned int group_size, last_group_start;
 
 	for (i = 0; i < rs->md.raid_disks; i++)
 		if (!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
@@ -403,7 +941,7 @@ static int validate_raid_redundancy(struct raid_set *rs)
 			goto too_many;
 		break;
 	case 10:
-		copies = raid10_md_layout_to_copies(rs->md.layout);
+		copies = raid10_md_layout_to_copies(rs->md.new_layout);
 		if (rebuild_cnt < copies)
 			break;
 
@@ -417,17 +955,16 @@ static int validate_raid_redundancy(struct raid_set *rs)
 		 * simple case where the number of devices is a multiple of the
 		 * number of copies, we must also handle cases where the number
 		 * of devices is not a multiple of the number of copies.
-		 * E.g.    dev1 dev2 dev3 dev4 dev5
-		 *          A    A    B    B    C
-		 *          C    D    D    E    E
+		 * E.g.	   dev1 dev2 dev3 dev4 dev5
+		 *	    A	 A    B	   B	C
+		 *	    C	 D    D	   E	E
 		 */
-		if (!strcmp("near", raid10_md_layout_to_format(rs->md.layout))) {
-			for (i = 0; i < rs->md.raid_disks * copies; i++) {
+		if (__is_raid10_near(rs->md.new_layout)) {
+			for (i = 0; i < rs->md.raid_disks; i++) {
 				if (!(i % copies))
 					rebuilds_per_group = 0;
-				d = i % rs->md.raid_disks;
-				if ((!rs->dev[d].rdev.sb_page ||
-				     !test_bit(In_sync, &rs->dev[d].rdev.flags)) &&
+				if ((!rs->dev[i].rdev.sb_page ||
+				    !test_bit(In_sync, &rs->dev[i].rdev.flags)) &&
 				    (++rebuilds_per_group >= copies))
 					goto too_many;
 			}
@@ -442,7 +979,7 @@ static int validate_raid_redundancy(struct raid_set *rs)
 		 * use the 'use_far_sets' variant.)
 		 *
 		 * This check is somewhat complicated by the need to account
-		 * for arrays that are not a multiple of (far) copies.  This
+		 * for arrays that are not a multiple of (far) copies.	This
 		 * results in the need to treat the last (potentially larger)
 		 * set differently.
 		 */
@@ -475,42 +1012,48 @@ too_many:
  *
  * Argument definitions
  *    <chunk_size>			The number of sectors per disk that
- *                                      will form the "stripe"
+ *					will form the "stripe"
  *    [[no]sync]			Force or prevent recovery of the
- *                                      entire array
+ *					entire array
  *    [rebuild <idx>]			Rebuild the drive indicated by the index
  *    [daemon_sleep <ms>]		Time between bitmap daemon work to
- *                                      clear bits
+ *					clear bits
  *    [min_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
  *    [max_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
  *    [write_mostly <idx>]		Indicate a write mostly drive via index
  *    [max_write_behind <sectors>]	See '-write-behind=' (man mdadm)
  *    [stripe_cache <sectors>]		Stripe cache size for higher RAIDs
- *    [region_size <sectors>]           Defines granularity of bitmap
+ *    [region_size <sectors>]		Defines granularity of bitmap
  *
  * RAID10-only options:
- *    [raid10_copies <# copies>]        Number of copies.  (Default: 2)
+ *    [raid10_copies <# copies>]	Number of copies.  (Default: 2)
  *    [raid10_format <near|far|offset>] Layout algorithm.  (Default: near)
  */
-static int parse_raid_params(struct raid_set *rs, char **argv,
-			     unsigned num_raid_params)
-{
-	char *raid10_format = "near";
-	unsigned raid10_copies = 2;
-	unsigned i;
-	unsigned long value, region_size = 0;
-	sector_t sectors_per_dev = rs->ti->len;
+static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
+			     unsigned int num_raid_params)
+{
+	int value, raid10_format = ALGORITHM_RAID10_DEFAULT;
+	unsigned int raid10_copies = 2;
+	unsigned int i, write_mostly = 0;
+	unsigned int region_size = 0;
 	sector_t max_io_len;
-	char *key;
+	const char *arg, *key;
+	struct raid_dev *rd;
+	struct raid_type *rt = rs->raid_type;
+
+	arg = dm_shift_arg(as);
+	num_raid_params--; /* Account for chunk_size argument */
+
+	if (kstrtoint(arg, 10, &value) < 0) {
+		rs->ti->error = "Bad numerical argument given for chunk_size";
+		return -EINVAL;
+	}
 
 	/*
 	 * First, parse the in-order required arguments
 	 * "chunk_size" is the only argument of this type.
 	 */
-	if ((kstrtoul(argv[0], 10, &value) < 0)) {
-		rs->ti->error = "Bad chunk size";
-		return -EINVAL;
-	} else if (rs->raid_type->level == 1) {
+	if (rt_is_raid1(rt)) {
 		if (value)
 			DMERR("Ignoring chunk size parameter for RAID 1");
 		value = 0;
@@ -523,8 +1066,6 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 	}
 
 	rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
-	argv++;
-	num_raid_params--;
 
 	/*
 	 * We set each individual device as In_sync with a completed
@@ -532,18 +1073,18 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 	 * replacement then one of the following cases applies:
 	 *
 	 *   1) User specifies 'rebuild'.
-	 *      - Device is reset when param is read.
+	 *	- Device is reset when param is read.
 	 *   2) A new device is supplied.
-	 *      - No matching superblock found, resets device.
+	 *	- No matching superblock found, resets device.
 	 *   3) Device failure was transient and returns on reload.
-	 *      - Failure noticed, resets device for bitmap replay.
+	 *	- Failure noticed, resets device for bitmap replay.
 	 *   4) Device hadn't completed recovery after previous failure.
-	 *      - Superblock is read and overrides recovery_offset.
+	 *	- Superblock is read and overrides recovery_offset.
 	 *
 	 * What is found in the superblocks of the devices is always
 	 * authoritative, unless 'rebuild' or '[no]sync' was specified.
 	 */
-	for (i = 0; i < rs->md.raid_disks; i++) {
+	for (i = 0; i < rs->raid_disks; i++) {
 		set_bit(In_sync, &rs->dev[i].rdev.flags);
 		rs->dev[i].rdev.recovery_offset = MaxSector;
 	}
@@ -552,72 +1093,112 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 	 * Second, parse the unordered optional arguments
 	 */
 	for (i = 0; i < num_raid_params; i++) {
-		if (!strcasecmp(argv[i], "nosync")) {
-			rs->md.recovery_cp = MaxSector;
-			rs->ctr_flags |= CTR_FLAG_NOSYNC;
+		key = dm_shift_arg(as);
+		if (!key) {
+			rs->ti->error = "Not enough raid parameters given";
+			return -EINVAL;
+		}
+
+		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC))) {
+			if (test_and_set_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) {
+				rs->ti->error = "Only one 'nosync' argument allowed";
+				return -EINVAL;
+			}
+			continue;
+		}
+		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_SYNC))) {
+			if (test_and_set_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) {
+				rs->ti->error = "Only one 'sync' argument allowed";
+				return -EINVAL;
+			}
 			continue;
 		}
-		if (!strcasecmp(argv[i], "sync")) {
-			rs->md.recovery_cp = 0;
-			rs->ctr_flags |= CTR_FLAG_SYNC;
+		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_USE_NEAR_SETS))) {
+			if (test_and_set_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) {
+				rs->ti->error = "Only one 'raid10_use_new_sets' argument allowed";
+				return -EINVAL;
+			}
 			continue;
 		}
 
-		/* The rest of the optional arguments come in key/value pairs */
-		if ((i + 1) >= num_raid_params) {
+		arg = dm_shift_arg(as);
+		i++; /* Account for the argument pairs */
+		if (!arg) {
 			rs->ti->error = "Wrong number of raid parameters given";
 			return -EINVAL;
 		}
 
-		key = argv[i++];
+		/*
+		 * Parameters that take a string value are checked here.
+		 */
 
-		/* Parameters that take a string value are checked here. */
-		if (!strcasecmp(key, "raid10_format")) {
-			if (rs->raid_type->level != 10) {
+		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT))) {
+			if (test_and_set_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) {
+				rs->ti->error = "Only one 'raid10_format' argument pair allowed";
+				return -EINVAL;
+			}
+			if (!rt_is_raid10(rt)) {
 				rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type";
 				return -EINVAL;
 			}
-			if (strcmp("near", argv[i]) &&
-			    strcmp("far", argv[i]) &&
-			    strcmp("offset", argv[i])) {
+			raid10_format = raid10_name_to_format(arg);
+			if (raid10_format < 0) {
 				rs->ti->error = "Invalid 'raid10_format' value given";
-				return -EINVAL;
+				return raid10_format;
 			}
-			raid10_format = argv[i];
-			rs->ctr_flags |= CTR_FLAG_RAID10_FORMAT;
 			continue;
 		}
 
-		if (kstrtoul(argv[i], 10, &value) < 0) {
+		if (kstrtoint(arg, 10, &value) < 0) {
 			rs->ti->error = "Bad numerical argument given in raid params";
 			return -EINVAL;
 		}
 
-		/* Parameters that take a numeric value are checked here */
-		if (!strcasecmp(key, "rebuild")) {
-			if (value >= rs->md.raid_disks) {
+		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD))) {
+			/*
+			 * "rebuild" is being passed in by userspace to provide
+			 * indexes of replaced devices and to set up additional
+			 * devices on raid level takeover.
+			 */
+			if (!__within_range(value, 0, rs->raid_disks - 1)) {
 				rs->ti->error = "Invalid rebuild index given";
 				return -EINVAL;
 			}
-			clear_bit(In_sync, &rs->dev[value].rdev.flags);
-			rs->dev[value].rdev.recovery_offset = 0;
-			rs->ctr_flags |= CTR_FLAG_REBUILD;
-		} else if (!strcasecmp(key, "write_mostly")) {
-			if (rs->raid_type->level != 1) {
+
+			if (test_and_set_bit(value, (void *) rs->rebuild_disks)) {
+				rs->ti->error = "rebuild for this index already given";
+				return -EINVAL;
+			}
+
+			rd = rs->dev + value;
+			clear_bit(In_sync, &rd->rdev.flags);
+			clear_bit(Faulty, &rd->rdev.flags);
+			rd->rdev.recovery_offset = 0;
+			set_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags);
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY))) {
+			if (!rt_is_raid1(rt)) {
 				rs->ti->error = "write_mostly option is only valid for RAID1";
 				return -EINVAL;
 			}
-			if (value >= rs->md.raid_disks) {
-				rs->ti->error = "Invalid write_mostly drive index given";
+
+			if (!__within_range(value, 0, rs->md.raid_disks - 1)) {
+				rs->ti->error = "Invalid write_mostly index given";
 				return -EINVAL;
 			}
+
+			write_mostly++;
 			set_bit(WriteMostly, &rs->dev[value].rdev.flags);
-		} else if (!strcasecmp(key, "max_write_behind")) {
-			if (rs->raid_type->level != 1) {
+			set_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags);
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_WRITE_BEHIND))) {
+			if (!rt_is_raid1(rt)) {
 				rs->ti->error = "max_write_behind option is only valid for RAID1";
 				return -EINVAL;
 			}
-			rs->ctr_flags |= CTR_FLAG_MAX_WRITE_BEHIND;
+
+			if (test_and_set_bit(__CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags)) {
+				rs->ti->error = "Only one max_write_behind argument pair allowed";
+				return -EINVAL;
+			}
 
 			/*
 			 * In device-mapper, we specify things in sectors, but
@@ -628,65 +1209,122 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 				rs->ti->error = "Max write-behind limit out of range";
 				return -EINVAL;
 			}
+
 			rs->md.bitmap_info.max_write_behind = value;
-		} else if (!strcasecmp(key, "daemon_sleep")) {
-			rs->ctr_flags |= CTR_FLAG_DAEMON_SLEEP;
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP))) {
+			if (test_and_set_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) {
+				rs->ti->error = "Only one daemon_sleep argument pair allowed";
+				return -EINVAL;
+			}
 			if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
 				rs->ti->error = "daemon sleep period out of range";
 				return -EINVAL;
 			}
 			rs->md.bitmap_info.daemon_sleep = value;
-		} else if (!strcasecmp(key, "stripe_cache")) {
-			rs->ctr_flags |= CTR_FLAG_STRIPE_CACHE;
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET))) {
+			/* Userspace passes new data_offset after having extended the the data image LV */
+			if (test_and_set_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) {
+				rs->ti->error = "Only one data_offset argument pair allowed";
+				return -EINVAL;
+			}
+			/* Ensure sensible data offset */
+			if (value < 0 ||
+			    (value && (value < MIN_FREE_RESHAPE_SPACE || value % to_sector(PAGE_SIZE)))) {
+				rs->ti->error = "Bogus data_offset value";
+				return -EINVAL;
+			}
+			rs->data_offset = value;
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS))) {
+			/* Define the +/-# of disks to add to/remove from the given raid set */
+			if (test_and_set_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) {
+				rs->ti->error = "Only one delta_disks argument pair allowed";
+				return -EINVAL;
+			}
+			/* Ensure MAX_RAID_DEVICES and raid type minimal_devs! */
+			if (!__within_range(abs(value), 1, MAX_RAID_DEVICES - rt->minimal_devs)) {
+				rs->ti->error = "Too many delta_disk requested";
+				return -EINVAL;
+			}
 
-			/*
-			 * In device-mapper, we specify things in sectors, but
-			 * MD records this value in kB
-			 */
-			value /= 2;
+			rs->delta_disks = value;
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE))) {
+			if (test_and_set_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags)) {
+				rs->ti->error = "Only one stripe_cache argument pair allowed";
+				return -EINVAL;
+			}
 
-			if ((rs->raid_type->level != 5) &&
-			    (rs->raid_type->level != 6)) {
+			if (!rt_is_raid456(rt)) {
 				rs->ti->error = "Inappropriate argument: stripe_cache";
 				return -EINVAL;
 			}
-			if (raid5_set_cache_size(&rs->md, (int)value)) {
-				rs->ti->error = "Bad stripe_cache size";
+
+			rs->stripe_cache_entries = value;
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE))) {
+			if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) {
+				rs->ti->error = "Only one min_recovery_rate argument pair allowed";
 				return -EINVAL;
 			}
-		} else if (!strcasecmp(key, "min_recovery_rate")) {
-			rs->ctr_flags |= CTR_FLAG_MIN_RECOVERY_RATE;
 			if (value > INT_MAX) {
 				rs->ti->error = "min_recovery_rate out of range";
 				return -EINVAL;
 			}
 			rs->md.sync_speed_min = (int)value;
-		} else if (!strcasecmp(key, "max_recovery_rate")) {
-			rs->ctr_flags |= CTR_FLAG_MAX_RECOVERY_RATE;
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) {
+			if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) {
+				rs->ti->error = "Only one max_recovery_rate argument pair allowed";
+				return -EINVAL;
+			}
 			if (value > INT_MAX) {
 				rs->ti->error = "max_recovery_rate out of range";
 				return -EINVAL;
 			}
 			rs->md.sync_speed_max = (int)value;
-		} else if (!strcasecmp(key, "region_size")) {
-			rs->ctr_flags |= CTR_FLAG_REGION_SIZE;
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE))) {
+			if (test_and_set_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) {
+				rs->ti->error = "Only one region_size argument pair allowed";
+				return -EINVAL;
+			}
+
 			region_size = value;
-		} else if (!strcasecmp(key, "raid10_copies") &&
-			   (rs->raid_type->level == 10)) {
-			if ((value < 2) || (value > 0xFF)) {
+			rs->requested_bitmap_chunk_sectors = value;
+		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES))) {
+			if (test_and_set_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags)) {
+				rs->ti->error = "Only one raid10_copies argument pair allowed";
+				return -EINVAL;
+			}
+
+			if (!__within_range(value, 2, rs->md.raid_disks)) {
 				rs->ti->error = "Bad value for 'raid10_copies'";
 				return -EINVAL;
 			}
-			rs->ctr_flags |= CTR_FLAG_RAID10_COPIES;
+
 			raid10_copies = value;
 		} else {
 			DMERR("Unable to parse RAID parameter: %s", key);
-			rs->ti->error = "Unable to parse RAID parameters";
+			rs->ti->error = "Unable to parse RAID parameter";
 			return -EINVAL;
 		}
 	}
 
-	if (validate_region_size(rs, region_size))
+	if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) &&
+	    test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) {
+		rs->ti->error = "sync and nosync are mutually exclusive";
+		return -EINVAL;
+	}
+
+	if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) &&
+	    (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ||
+	     test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))) {
+		rs->ti->error = "sync/nosync and rebuild are mutually exclusive";
+		return -EINVAL;
+	}
+
+	if (write_mostly >= rs->md.raid_disks) {
+		rs->ti->error = "Can't set all raid1 devices to write_mostly";
+		return -EINVAL;
+	}
+
+	if (validate_region_size(rs, region_size))
 		return -EINVAL;
 
 	if (rs->md.chunk_sectors)
@@ -697,47 +1335,193 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 	if (dm_set_target_max_io_len(rs->ti, max_io_len))
 		return -EINVAL;
 
-	if (rs->raid_type->level == 10) {
+	if (rt_is_raid10(rt)) {
 		if (raid10_copies > rs->md.raid_disks) {
 			rs->ti->error = "Not enough devices to satisfy specification";
 			return -EINVAL;
 		}
 
-		/*
-		 * If the format is not "near", we only support
-		 * two copies at the moment.
-		 */
-		if (strcmp("near", raid10_format) && (raid10_copies > 2)) {
-			rs->ti->error = "Too many copies for given RAID10 format.";
+		rs->md.new_layout = raid10_format_to_md_layout(rs, raid10_format, raid10_copies);
+		if (rs->md.new_layout < 0) {
+			rs->ti->error = "Error getting raid10 format";
+			return rs->md.new_layout;
+		}
+
+		rt = get_raid_type_by_ll(10, rs->md.new_layout);
+		if (!rt) {
+			rs->ti->error = "Failed to recognize new raid10 layout";
 			return -EINVAL;
 		}
 
-		/* (Len * #mirrors) / #devices */
-		sectors_per_dev = rs->ti->len * raid10_copies;
-		sector_div(sectors_per_dev, rs->md.raid_disks);
-
-		rs->md.layout = raid10_format_to_md_layout(raid10_format,
-							   raid10_copies);
-		rs->md.new_layout = rs->md.layout;
-	} else if ((!rs->raid_type->level || rs->raid_type->level > 1) &&
-		   sector_div(sectors_per_dev,
-			      (rs->md.raid_disks - rs->raid_type->parity_devs))) {
-		rs->ti->error = "Target length not divisible by number of data devices";
-		return -EINVAL;
+		if ((rt->algorithm == ALGORITHM_RAID10_DEFAULT ||
+		     rt->algorithm == ALGORITHM_RAID10_NEAR) &&
+		    test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) {
+			rs->ti->error = "RAID10 format 'near' and 'raid10_use_near_sets' are incompatible";
+			return -EINVAL;
+		}
 	}
-	rs->md.dev_sectors = sectors_per_dev;
+
+	rs->raid10_copies = raid10_copies;
 
 	/* Assume there are no metadata devices until the drives are parsed */
 	rs->md.persistent = 0;
 	rs->md.external = 1;
 
+	/* Check, if any invalid ctr arguments have been passed in for the raid level */
+	return rs_check_for_valid_flags(rs);
+}
+
+/* Set raid4/5/6 cache size */
+static int rs_set_raid456_stripe_cache(struct raid_set *rs)
+{
+	int r;
+	struct r5conf *conf;
+	struct mddev *mddev = &rs->md;
+	uint32_t min_stripes = max(mddev->chunk_sectors, mddev->new_chunk_sectors) / 2;
+	uint32_t nr_stripes = rs->stripe_cache_entries;
+
+	if (!rt_is_raid456(rs->raid_type)) {
+		rs->ti->error = "Inappropriate raid level; cannot change stripe_cache size";
+		return -EINVAL;
+	}
+
+	if (nr_stripes < min_stripes) {
+		DMINFO("Adjusting requested %u stripe cache entries to %u to suit stripe size",
+		       nr_stripes, min_stripes);
+		nr_stripes = min_stripes;
+	}
+
+	conf = mddev->private;
+	if (!conf) {
+		rs->ti->error = "Cannot change stripe_cache size on inactive RAID set";
+		return -EINVAL;
+	}
+
+	/* Try setting number of stripes in raid456 stripe cache */
+	if (conf->min_nr_stripes != nr_stripes) {
+		r = raid5_set_cache_size(mddev, nr_stripes);
+		if (r) {
+			rs->ti->error = "Failed to set raid4/5/6 stripe cache size";
+			return r;
+		}
+
+		DMINFO("%u stripe cache entries", nr_stripes);
+	}
+
 	return 0;
 }
 
+/* Return # of data stripes as kept in mddev as of @rs (i.e. as of superblock) */
+static unsigned int mddev_data_stripes(struct raid_set *rs)
+{
+	return rs->md.raid_disks - rs->raid_type->parity_devs;
+}
+
+/* Return # of data stripes of @rs (i.e. as of ctr) */
+static unsigned int rs_data_stripes(struct raid_set *rs)
+{
+	return rs->raid_disks - rs->raid_type->parity_devs;
+}
+
+/* Calculate the sectors per device and per array used for @rs */
+static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
+{
+	int delta_disks;
+	unsigned int data_stripes;
+	struct mddev *mddev = &rs->md;
+	struct md_rdev *rdev;
+	sector_t array_sectors = rs->ti->len, dev_sectors = rs->ti->len;
+
+	if (use_mddev) {
+		delta_disks = mddev->delta_disks;
+		data_stripes = mddev_data_stripes(rs);
+	} else {
+		delta_disks = rs->delta_disks;
+		data_stripes = rs_data_stripes(rs);
+	}
+
+	/* Special raid1 case w/o delta_disks support (yet) */
+	if (rt_is_raid1(rs->raid_type))
+		;
+	else if (rt_is_raid10(rs->raid_type)) {
+		if (rs->raid10_copies < 2 ||
+		    delta_disks < 0) {
+			rs->ti->error = "Bogus raid10 data copies or delta disks";
+			return -EINVAL;
+		}
+
+		dev_sectors *= rs->raid10_copies;
+		if (sector_div(dev_sectors, data_stripes))
+			goto bad;
+
+		array_sectors = (data_stripes + delta_disks) * dev_sectors;
+		if (sector_div(array_sectors, rs->raid10_copies))
+			goto bad;
+
+	} else if (sector_div(dev_sectors, data_stripes))
+		goto bad;
+
+	else
+		/* Striped layouts */
+		array_sectors = (data_stripes + delta_disks) * dev_sectors;
+
+	rdev_for_each(rdev, mddev)
+		rdev->sectors = dev_sectors;
+
+	mddev->array_sectors = array_sectors;
+	mddev->dev_sectors = dev_sectors;
+
+	return 0;
+bad:
+	rs->ti->error = "Target length not divisible by number of data devices";
+	return -EINVAL;
+}
+
+/* Setup recovery on @rs */
+static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
+{
+	/* raid0 does not recover */
+	if (rs_is_raid0(rs))
+		rs->md.recovery_cp = MaxSector;
+	/*
+	 * A raid6 set has to be recovered either
+	 * completely or for the grown part to
+	 * ensure proper parity and Q-Syndrome
+	 */
+	else if (rs_is_raid6(rs))
+		rs->md.recovery_cp = dev_sectors;
+	/*
+	 * Other raid set types may skip recovery
+	 * depending on the 'nosync' flag.
+	 */
+	else
+		rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)
+				     ? MaxSector : dev_sectors;
+}
+
+/* Setup recovery on @rs based on raid type, device size and 'nosync' flag */
+static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
+{
+	if (!dev_sectors)
+		/* New raid set or 'sync' flag provided */
+		__rs_setup_recovery(rs, 0);
+	else if (dev_sectors == MaxSector)
+		/* Prevent recovery */
+		__rs_setup_recovery(rs, MaxSector);
+	else if (rs->dev[0].rdev.sectors < dev_sectors)
+		/* Grown raid set */
+		__rs_setup_recovery(rs, rs->dev[0].rdev.sectors);
+	else
+		__rs_setup_recovery(rs, MaxSector);
+}
+
 static void do_table_event(struct work_struct *ws)
 {
 	struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
 
+	smp_rmb(); /* Make sure we access most actual mddev properties */
+	if (!rs_is_reshaping(rs))
+		rs_set_capacity(rs);
 	dm_table_event(rs->ti->table);
 }
 
@@ -748,6 +1532,211 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
 	return mddev_congested(&rs->md, bits);
 }
 
+/*
+ * Make sure a valid takover (level switch) is being requested on @rs
+ *
+ * Conversions of raid sets from one MD personality to another
+ * have to conform to restrictions which are enforced here.
+ */
+static int rs_check_takeover(struct raid_set *rs)
+{
+	struct mddev *mddev = &rs->md;
+	unsigned int near_copies;
+
+	if (rs->md.degraded) {
+		rs->ti->error = "Can't takeover degraded raid set";
+		return -EPERM;
+	}
+
+	if (rs_is_reshaping(rs)) {
+		rs->ti->error = "Can't takeover reshaping raid set";
+		return -EPERM;
+	}
+
+	switch (mddev->level) {
+	case 0:
+		/* raid0 -> raid1/5 with one disk */
+		if ((mddev->new_level == 1 || mddev->new_level == 5) &&
+		    mddev->raid_disks == 1)
+			return 0;
+
+		/* raid0 -> raid10 */
+		if (mddev->new_level == 10 &&
+		    !(rs->raid_disks % mddev->raid_disks))
+			return 0;
+
+		/* raid0 with multiple disks -> raid4/5/6 */
+		if (__within_range(mddev->new_level, 4, 6) &&
+		    mddev->new_layout == ALGORITHM_PARITY_N &&
+		    mddev->raid_disks > 1)
+			return 0;
+
+		break;
+
+	case 10:
+		/* Can't takeover raid10_offset! */
+		if (__is_raid10_offset(mddev->layout))
+			break;
+
+		near_copies = __raid10_near_copies(mddev->layout);
+
+		/* raid10* -> raid0 */
+		if (mddev->new_level == 0) {
+			/* Can takeover raid10_near with raid disks divisable by data copies! */
+			if (near_copies > 1 &&
+			    !(mddev->raid_disks % near_copies)) {
+				mddev->raid_disks /= near_copies;
+				mddev->delta_disks = mddev->raid_disks;
+				return 0;
+			}
+
+			/* Can takeover raid10_far */
+			if (near_copies == 1 &&
+			    __raid10_far_copies(mddev->layout) > 1)
+				return 0;
+
+			break;
+		}
+
+		/* raid10_{near,far} -> raid1 */
+		if (mddev->new_level == 1 &&
+		    max(near_copies, __raid10_far_copies(mddev->layout)) == mddev->raid_disks)
+			return 0;
+
+		/* raid10_{near,far} with 2 disks -> raid4/5 */
+		if (__within_range(mddev->new_level, 4, 5) &&
+		    mddev->raid_disks == 2)
+			return 0;
+		break;
+
+	case 1:
+		/* raid1 with 2 disks -> raid4/5 */
+		if (__within_range(mddev->new_level, 4, 5) &&
+		    mddev->raid_disks == 2) {
+			mddev->degraded = 1;
+			return 0;
+		}
+
+		/* raid1 -> raid0 */
+		if (mddev->new_level == 0 &&
+		    mddev->raid_disks == 1)
+			return 0;
+
+		/* raid1 -> raid10 */
+		if (mddev->new_level == 10)
+			return 0;
+		break;
+
+	case 4:
+		/* raid4 -> raid0 */
+		if (mddev->new_level == 0)
+			return 0;
+
+		/* raid4 -> raid1/5 with 2 disks */
+		if ((mddev->new_level == 1 || mddev->new_level == 5) &&
+		    mddev->raid_disks == 2)
+			return 0;
+
+		/* raid4 -> raid5/6 with parity N */
+		if (__within_range(mddev->new_level, 5, 6) &&
+		    mddev->layout == ALGORITHM_PARITY_N)
+			return 0;
+		break;
+
+	case 5:
+		/* raid5 with parity N -> raid0 */
+		if (mddev->new_level == 0 &&
+		    mddev->layout == ALGORITHM_PARITY_N)
+			return 0;
+
+		/* raid5 with parity N -> raid4 */
+		if (mddev->new_level == 4 &&
+		    mddev->layout == ALGORITHM_PARITY_N)
+			return 0;
+
+		/* raid5 with 2 disks -> raid1/4/10 */
+		if ((mddev->new_level == 1 || mddev->new_level == 4 || mddev->new_level == 10) &&
+		    mddev->raid_disks == 2)
+			return 0;
+
+		/* raid5_* ->  raid6_*_6 with Q-Syndrome N (e.g. raid5_ra -> raid6_ra_6 */
+		if (mddev->new_level == 6 &&
+		    ((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) ||
+		      __within_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC_6, ALGORITHM_RIGHT_SYMMETRIC_6)))
+			return 0;
+		break;
+
+	case 6:
+		/* raid6 with parity N -> raid0 */
+		if (mddev->new_level == 0 &&
+		    mddev->layout == ALGORITHM_PARITY_N)
+			return 0;
+
+		/* raid6 with parity N -> raid4 */
+		if (mddev->new_level == 4 &&
+		    mddev->layout == ALGORITHM_PARITY_N)
+			return 0;
+
+		/* raid6_*_n with Q-Syndrome N -> raid5_* */
+		if (mddev->new_level == 5 &&
+		    ((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) ||
+		     __within_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC, ALGORITHM_RIGHT_SYMMETRIC)))
+			return 0;
+
+	default:
+		break;
+	}
+
+	rs->ti->error = "takeover not possible";
+	return -EINVAL;
+}
+
+/* True if @rs requested to be taken over */
+static bool rs_takeover_requested(struct raid_set *rs)
+{
+	return rs->md.new_level != rs->md.level;
+}
+
+/* True if @rs is requested to reshape by ctr */
+static bool rs_reshape_requested(struct raid_set *rs)
+{
+	bool change;
+	struct mddev *mddev = &rs->md;
+
+	if (rs_takeover_requested(rs))
+		return false;
+
+	if (!mddev->level)
+		return false;
+
+	change = mddev->new_layout != mddev->layout ||
+		 mddev->new_chunk_sectors != mddev->chunk_sectors ||
+		 rs->delta_disks;
+
+	/* Historical case to support raid1 reshape without delta disks */
+	if (mddev->level == 1) {
+		if (rs->delta_disks)
+			return !!rs->delta_disks;
+
+		return !change &&
+		       mddev->raid_disks != rs->raid_disks;
+	}
+
+	if (mddev->level == 10)
+		return change &&
+		       !__is_raid10_far(mddev->new_layout) &&
+		       rs->delta_disks >= 0;
+
+	return change;
+}
+
+/*  Features */
+#define	FEATURE_FLAG_SUPPORTS_V190	0x1 /* Supports extended superblock */
+
+/* State flags for sb->flags */
+#define	SB_FLAG_RESHAPE_ACTIVE		0x1
+#define	SB_FLAG_RESHAPE_BACKWARDS	0x2
+
 /*
  * This structure is never routinely used by userspace, unlike md superblocks.
  * Devices with this superblock should only ever be accessed via device-mapper.
@@ -755,13 +1744,14 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
 #define DM_RAID_MAGIC 0x64526D44
 struct dm_raid_superblock {
 	__le32 magic;		/* "DmRd" */
-	__le32 features;	/* Used to indicate possible future changes */
+	__le32 compat_features;	/* Used to indicate compatible features (like 1.9.0 ondisk metadata extension) */
 
-	__le32 num_devices;	/* Number of devices in this array. (Max 64) */
-	__le32 array_position;	/* The position of this drive in the array */
+	__le32 num_devices;	/* Number of devices in this raid set. (Max 64) */
+	__le32 array_position;	/* The position of this drive in the raid set */
 
 	__le64 events;		/* Incremented by md when superblock updated */
-	__le64 failed_devices;	/* Bit field of devices to indicate failures */
+	__le64 failed_devices;	/* Pre 1.9.0 part of bit field of devices to */
+				/* indicate failures (see extension below) */
 
 	/*
 	 * This offset tracks the progress of the repair or replacement of
@@ -770,21 +1760,95 @@ struct dm_raid_superblock {
 	__le64 disk_recovery_offset;
 
 	/*
-	 * This offset tracks the progress of the initial array
+	 * This offset tracks the progress of the initial raid set
 	 * synchronisation/parity calculation.
 	 */
 	__le64 array_resync_offset;
 
 	/*
-	 * RAID characteristics
+	 * raid characteristics
 	 */
 	__le32 level;
 	__le32 layout;
 	__le32 stripe_sectors;
 
-	/* Remainder of a logical block is zero-filled when writing (see super_sync()). */
+	/********************************************************************
+	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+	 *
+	 * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+	 */
+
+	__le32 flags; /* Flags defining array states for reshaping */
+
+	/*
+	 * This offset tracks the progress of a raid
+	 * set reshape in order to be able to restart it
+	 */
+	__le64 reshape_position;
+
+	/*
+	 * These define the properties of the array in case of an interrupted reshape
+	 */
+	__le32 new_level;
+	__le32 new_layout;
+	__le32 new_stripe_sectors;
+	__le32 delta_disks;
+
+	__le64 array_sectors; /* Array size in sectors */
+
+	/*
+	 * Sector offsets to data on devices (reshaping).
+	 * Needed to support out of place reshaping, thus
+	 * not writing over any stripes whilst converting
+	 * them from old to new layout
+	 */
+	__le64 data_offset;
+	__le64 new_data_offset;
+
+	__le64 sectors; /* Used device size in sectors */
+
+	/*
+	 * Additonal Bit field of devices indicating failures to support
+	 * up to 256 devices with the 1.9.0 on-disk metadata format
+	 */
+	__le64 extended_failed_devices[DISKS_ARRAY_ELEMS - 1];
+
+	__le32 incompat_features;	/* Used to indicate any incompatible features */
+
+	/* Always set rest up to logical block size to 0 when writing (see get_metadata_device() below). */
 } __packed;
 
+/*
+ * Check for reshape constraints on raid set @rs:
+ *
+ * - reshape function non-existent
+ * - degraded set
+ * - ongoing recovery
+ * - ongoing reshape
+ *
+ * Returns 0 if none or -EPERM if given constraint
+ * and error message reference in @errmsg
+ */
+static int rs_check_reshape(struct raid_set *rs)
+{
+	struct mddev *mddev = &rs->md;
+
+	if (!mddev->pers || !mddev->pers->check_reshape)
+		rs->ti->error = "Reshape not supported";
+	else if (mddev->degraded)
+		rs->ti->error = "Can't reshape degraded raid set";
+	else if (rs_is_recovering(rs))
+		rs->ti->error = "Convert request on recovering raid set prohibited";
+	else if (rs_is_reshaping(rs))
+		rs->ti->error = "raid set already reshaping!";
+	else if (!(rs_is_raid1(rs) || rs_is_raid10(rs) || rs_is_raid456(rs)))
+		rs->ti->error = "Reshaping only supported for raid1/4/5/6/10";
+	else
+		return 0;
+
+	return -EPERM;
+}
+
 static int read_disk_sb(struct md_rdev *rdev, int size)
 {
 	BUG_ON(!rdev->sb_page);
@@ -792,7 +1856,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
 	if (rdev->sb_loaded)
 		return 0;
 
-	if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
+	if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) {
 		DMERR("Failed to read superblock of device at position %d",
 		      rdev->raid_disk);
 		md_error(rdev->mddev, rdev);
@@ -804,31 +1868,67 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
 	return 0;
 }
 
+static void sb_retrieve_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices)
+{
+	failed_devices[0] = le64_to_cpu(sb->failed_devices);
+	memset(failed_devices + 1, 0, sizeof(sb->extended_failed_devices));
+
+	if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) {
+		int i = ARRAY_SIZE(sb->extended_failed_devices);
+
+		while (i--)
+			failed_devices[i+1] = le64_to_cpu(sb->extended_failed_devices[i]);
+	}
+}
+
+static void sb_update_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices)
+{
+	int i = ARRAY_SIZE(sb->extended_failed_devices);
+
+	sb->failed_devices = cpu_to_le64(failed_devices[0]);
+	while (i--)
+		sb->extended_failed_devices[i] = cpu_to_le64(failed_devices[i+1]);
+}
+
+/*
+ * Synchronize the superblock members with the raid set properties
+ *
+ * All superblock data is little endian.
+ */
 static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
 {
-	int i;
-	uint64_t failed_devices;
+	bool update_failed_devices = false;
+	unsigned int i;
+	uint64_t failed_devices[DISKS_ARRAY_ELEMS];
 	struct dm_raid_superblock *sb;
 	struct raid_set *rs = container_of(mddev, struct raid_set, md);
 
+	/* No metadata device, no superblock */
+	if (!rdev->meta_bdev)
+		return;
+
+	BUG_ON(!rdev->sb_page);
+
 	sb = page_address(rdev->sb_page);
-	failed_devices = le64_to_cpu(sb->failed_devices);
 
-	for (i = 0; i < mddev->raid_disks; i++)
-		if (!rs->dev[i].data_dev ||
-		    test_bit(Faulty, &(rs->dev[i].rdev.flags)))
-			failed_devices |= (1ULL << i);
+	sb_retrieve_failed_devices(sb, failed_devices);
 
-	memset(sb + 1, 0, rdev->sb_size - sizeof(*sb));
+	for (i = 0; i < rs->raid_disks; i++)
+		if (!rs->dev[i].data_dev || test_bit(Faulty, &rs->dev[i].rdev.flags)) {
+			update_failed_devices = true;
+			set_bit(i, (void *) failed_devices);
+		}
+
+	if (update_failed_devices)
+		sb_update_failed_devices(sb, failed_devices);
 
 	sb->magic = cpu_to_le32(DM_RAID_MAGIC);
-	sb->features = cpu_to_le32(0);	/* No features yet */
+	sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190);
 
 	sb->num_devices = cpu_to_le32(mddev->raid_disks);
 	sb->array_position = cpu_to_le32(rdev->raid_disk);
 
 	sb->events = cpu_to_le64(mddev->events);
-	sb->failed_devices = cpu_to_le64(failed_devices);
 
 	sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
 	sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
@@ -836,6 +1936,33 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
 	sb->level = cpu_to_le32(mddev->level);
 	sb->layout = cpu_to_le32(mddev->layout);
 	sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
+
+	sb->new_level = cpu_to_le32(mddev->new_level);
+	sb->new_layout = cpu_to_le32(mddev->new_layout);
+	sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
+
+	sb->delta_disks = cpu_to_le32(mddev->delta_disks);
+
+	smp_rmb(); /* Make sure we access most recent reshape position */
+	sb->reshape_position = cpu_to_le64(mddev->reshape_position);
+	if (le64_to_cpu(sb->reshape_position) != MaxSector) {
+		/* Flag ongoing reshape */
+		sb->flags |= cpu_to_le32(SB_FLAG_RESHAPE_ACTIVE);
+
+		if (mddev->delta_disks < 0 || mddev->reshape_backwards)
+			sb->flags |= cpu_to_le32(SB_FLAG_RESHAPE_BACKWARDS);
+	} else {
+		/* Clear reshape flags */
+		sb->flags &= ~(cpu_to_le32(SB_FLAG_RESHAPE_ACTIVE|SB_FLAG_RESHAPE_BACKWARDS));
+	}
+
+	sb->array_sectors = cpu_to_le64(mddev->array_sectors);
+	sb->data_offset = cpu_to_le64(rdev->data_offset);
+	sb->new_data_offset = cpu_to_le64(rdev->new_data_offset);
+	sb->sectors = cpu_to_le64(rdev->sectors);
+
+	/* Zero out the rest of the payload after the size of the superblock */
+	memset(sb + 1, 0, rdev->sb_size - sizeof(*sb));
 }
 
 /*
@@ -848,7 +1975,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
  */
 static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
 {
-	int ret;
+	int r;
 	struct dm_raid_superblock *sb;
 	struct dm_raid_superblock *refsb;
 	uint64_t events_sb, events_refsb;
@@ -860,9 +1987,9 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
 		return -EINVAL;
 	}
 
-	ret = read_disk_sb(rdev, rdev->sb_size);
-	if (ret)
-		return ret;
+	r = read_disk_sb(rdev, rdev->sb_size);
+	if (r)
+		return r;
 
 	sb = page_address(rdev->sb_page);
 
@@ -876,6 +2003,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
 		super_sync(rdev->mddev, rdev);
 
 		set_bit(FirstUse, &rdev->flags);
+		sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190);
 
 		/* Force writing of superblocks to disk */
 		set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
@@ -895,129 +2023,212 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
 	return (events_sb > events_refsb) ? 1 : 0;
 }
 
-static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
+static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
 {
 	int role;
-	struct raid_set *rs = container_of(mddev, struct raid_set, md);
+	unsigned int d;
+	struct mddev *mddev = &rs->md;
 	uint64_t events_sb;
-	uint64_t failed_devices;
+	uint64_t failed_devices[DISKS_ARRAY_ELEMS];
 	struct dm_raid_superblock *sb;
-	uint32_t new_devs = 0;
-	uint32_t rebuilds = 0;
+	uint32_t new_devs = 0, rebuild_and_new = 0, rebuilds = 0;
 	struct md_rdev *r;
 	struct dm_raid_superblock *sb2;
 
 	sb = page_address(rdev->sb_page);
 	events_sb = le64_to_cpu(sb->events);
-	failed_devices = le64_to_cpu(sb->failed_devices);
 
 	/*
 	 * Initialise to 1 if this is a new superblock.
 	 */
 	mddev->events = events_sb ? : 1;
 
+	mddev->reshape_position = MaxSector;
+
 	/*
-	 * Reshaping is not currently allowed
+	 * Reshaping is supported, e.g. reshape_position is valid
+	 * in superblock and superblock content is authoritative.
 	 */
-	if (le32_to_cpu(sb->level) != mddev->level) {
-		DMERR("Reshaping arrays not yet supported. (RAID level change)");
-		return -EINVAL;
-	}
-	if (le32_to_cpu(sb->layout) != mddev->layout) {
-		DMERR("Reshaping arrays not yet supported. (RAID layout change)");
-		DMERR("  0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout);
-		DMERR("  Old layout: %s w/ %d copies",
-		      raid10_md_layout_to_format(le32_to_cpu(sb->layout)),
-		      raid10_md_layout_to_copies(le32_to_cpu(sb->layout)));
-		DMERR("  New layout: %s w/ %d copies",
-		      raid10_md_layout_to_format(mddev->layout),
-		      raid10_md_layout_to_copies(mddev->layout));
-		return -EINVAL;
-	}
-	if (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors) {
-		DMERR("Reshaping arrays not yet supported. (stripe sectors change)");
-		return -EINVAL;
-	}
+	if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) {
+		/* Superblock is authoritative wrt given raid set layout! */
+		mddev->raid_disks = le32_to_cpu(sb->num_devices);
+		mddev->level = le32_to_cpu(sb->level);
+		mddev->layout = le32_to_cpu(sb->layout);
+		mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors);
+		mddev->new_level = le32_to_cpu(sb->new_level);
+		mddev->new_layout = le32_to_cpu(sb->new_layout);
+		mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors);
+		mddev->delta_disks = le32_to_cpu(sb->delta_disks);
+		mddev->array_sectors = le64_to_cpu(sb->array_sectors);
+
+		/* raid was reshaping and got interrupted */
+		if (le32_to_cpu(sb->flags) & SB_FLAG_RESHAPE_ACTIVE) {
+			if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) {
+				DMERR("Reshape requested but raid set is still reshaping");
+				return -EINVAL;
+			}
 
-	/* We can only change the number of devices in RAID1 right now */
-	if ((rs->raid_type->level != 1) &&
-	    (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
-		DMERR("Reshaping arrays not yet supported. (device count change)");
-		return -EINVAL;
+			if (mddev->delta_disks < 0 ||
+			    (!mddev->delta_disks && (le32_to_cpu(sb->flags) & SB_FLAG_RESHAPE_BACKWARDS)))
+				mddev->reshape_backwards = 1;
+			else
+				mddev->reshape_backwards = 0;
+
+			mddev->reshape_position = le64_to_cpu(sb->reshape_position);
+			rs->raid_type = get_raid_type_by_ll(mddev->level, mddev->layout);
+		}
+
+	} else {
+		/*
+		 * No takeover/reshaping, because we don't have the extended v1.9.0 metadata
+		 */
+		if (le32_to_cpu(sb->level) != mddev->level) {
+			DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)");
+			return -EINVAL;
+		}
+		if (le32_to_cpu(sb->layout) != mddev->layout) {
+			DMERR("Reshaping raid sets not yet supported. (raid layout change)");
+			DMERR("	 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout);
+			DMERR("	 Old layout: %s w/ %d copies",
+			      raid10_md_layout_to_format(le32_to_cpu(sb->layout)),
+			      raid10_md_layout_to_copies(le32_to_cpu(sb->layout)));
+			DMERR("	 New layout: %s w/ %d copies",
+			      raid10_md_layout_to_format(mddev->layout),
+			      raid10_md_layout_to_copies(mddev->layout));
+			return -EINVAL;
+		}
+		if (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors) {
+			DMERR("Reshaping raid sets not yet supported. (stripe sectors change)");
+			return -EINVAL;
+		}
+
+		/* We can only change the number of devices in raid1 with old (i.e. pre 1.0.7) metadata */
+		if (!rt_is_raid1(rs->raid_type) &&
+		    (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
+			DMERR("Reshaping raid sets not yet supported. (device count change from %u to %u)",
+			      sb->num_devices, mddev->raid_disks);
+			return -EINVAL;
+		}
+
+		/* Table line is checked vs. authoritative superblock */
+		rs_set_new(rs);
 	}
 
-	if (!(rs->ctr_flags & (CTR_FLAG_SYNC | CTR_FLAG_NOSYNC)))
+	if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
 		mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
 
 	/*
 	 * During load, we set FirstUse if a new superblock was written.
 	 * There are two reasons we might not have a superblock:
-	 * 1) The array is brand new - in which case, all of the
-	 *    devices must have their In_sync bit set.  Also,
+	 * 1) The raid set is brand new - in which case, all of the
+	 *    devices must have their In_sync bit set.	Also,
 	 *    recovery_cp must be 0, unless forced.
-	 * 2) This is a new device being added to an old array
+	 * 2) This is a new device being added to an old raid set
 	 *    and the new device needs to be rebuilt - in which
 	 *    case the In_sync bit will /not/ be set and
 	 *    recovery_cp must be MaxSector.
+	 * 3) This is/are a new device(s) being added to an old
+	 *    raid set during takeover to a higher raid level
+	 *    to provide capacity for redundancy or during reshape
+	 *    to add capacity to grow the raid set.
 	 */
+	d = 0;
 	rdev_for_each(r, mddev) {
+		if (test_bit(FirstUse, &r->flags))
+			new_devs++;
+
 		if (!test_bit(In_sync, &r->flags)) {
-			DMINFO("Device %d specified for rebuild: "
-			       "Clearing superblock", r->raid_disk);
+			DMINFO("Device %d specified for rebuild; clearing superblock",
+				r->raid_disk);
 			rebuilds++;
-		} else if (test_bit(FirstUse, &r->flags))
-			new_devs++;
+
+			if (test_bit(FirstUse, &r->flags))
+				rebuild_and_new++;
+		}
+
+		d++;
 	}
 
-	if (!rebuilds) {
-		if (new_devs == mddev->raid_disks) {
-			DMINFO("Superblocks created for new array");
+	if (new_devs == rs->raid_disks || !rebuilds) {
+		/* Replace a broken device */
+		if (new_devs == 1 && !rs->delta_disks)
+			;
+		if (new_devs == rs->raid_disks) {
+			DMINFO("Superblocks created for new raid set");
 			set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
-		} else if (new_devs) {
-			DMERR("New device injected "
-			      "into existing array without 'rebuild' "
-			      "parameter specified");
+		} else if (new_devs != rebuilds &&
+			   new_devs != rs->delta_disks) {
+			DMERR("New device injected into existing raid set without "
+			      "'delta_disks' or 'rebuild' parameter specified");
 			return -EINVAL;
 		}
-	} else if (new_devs) {
-		DMERR("'rebuild' devices cannot be "
-		      "injected into an array with other first-time devices");
-		return -EINVAL;
-	} else if (mddev->recovery_cp != MaxSector) {
-		DMERR("'rebuild' specified while array is not in-sync");
+	} else if (new_devs && new_devs != rebuilds) {
+		DMERR("%u 'rebuild' devices cannot be injected into"
+		      " a raid set with %u other first-time devices",
+		      rebuilds, new_devs);
 		return -EINVAL;
+	} else if (rebuilds) {
+		if (rebuild_and_new && rebuilds != rebuild_and_new) {
+			DMERR("new device%s provided without 'rebuild'",
+			      new_devs > 1 ? "s" : "");
+			return -EINVAL;
+		} else if (rs_is_recovering(rs)) {
+			DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)",
+			      (unsigned long long) mddev->recovery_cp);
+			return -EINVAL;
+		} else if (rs_is_reshaping(rs)) {
+			DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)",
+			      (unsigned long long) mddev->reshape_position);
+			return -EINVAL;
+		}
 	}
 
 	/*
 	 * Now we set the Faulty bit for those devices that are
 	 * recorded in the superblock as failed.
 	 */
+	sb_retrieve_failed_devices(sb, failed_devices);
 	rdev_for_each(r, mddev) {
 		if (!r->sb_page)
 			continue;
 		sb2 = page_address(r->sb_page);
 		sb2->failed_devices = 0;
+		memset(sb2->extended_failed_devices, 0, sizeof(sb2->extended_failed_devices));
 
 		/*
 		 * Check for any device re-ordering.
 		 */
 		if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
 			role = le32_to_cpu(sb2->array_position);
+			if (role < 0)
+				continue;
+
 			if (role != r->raid_disk) {
-				if (rs->raid_type->level != 1) {
-					rs->ti->error = "Cannot change device "
-						"positions in RAID array";
+				if (__is_raid10_near(mddev->layout)) {
+					if (mddev->raid_disks % __raid10_near_copies(mddev->layout) ||
+					    rs->raid_disks % rs->raid10_copies) {
+						rs->ti->error =
+							"Cannot change raid10 near set to odd # of devices!";
+						return -EINVAL;
+					}
+
+					sb2->array_position = cpu_to_le32(r->raid_disk);
+
+				} else if (!(rs_is_raid10(rs) && rt_is_raid0(rs->raid_type)) &&
+					   !(rs_is_raid0(rs) && rt_is_raid10(rs->raid_type)) &&
+					   !rt_is_raid1(rs->raid_type)) {
+					rs->ti->error = "Cannot change device positions in raid set";
 					return -EINVAL;
 				}
-				DMINFO("RAID1 device #%d now at position #%d",
-				       role, r->raid_disk);
+
+				DMINFO("raid device #%d now at position #%d", role, r->raid_disk);
 			}
 
 			/*
 			 * Partial recovery is performed on
 			 * returning failed devices.
 			 */
-			if (failed_devices & (1 << role))
+			if (test_bit(role, (void *) failed_devices))
 				set_bit(Faulty, &r->flags);
 		}
 	}
@@ -1028,41 +2239,60 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
 static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
 {
 	struct mddev *mddev = &rs->md;
-	struct dm_raid_superblock *sb = page_address(rdev->sb_page);
+	struct dm_raid_superblock *sb;
+
+	if (rs_is_raid0(rs) || !rdev->sb_page)
+		return 0;
+
+	sb = page_address(rdev->sb_page);
 
 	/*
 	 * If mddev->events is not set, we know we have not yet initialized
 	 * the array.
 	 */
-	if (!mddev->events && super_init_validation(mddev, rdev))
+	if (!mddev->events && super_init_validation(rs, rdev))
+		return -EINVAL;
+
+	if (le32_to_cpu(sb->compat_features) != FEATURE_FLAG_SUPPORTS_V190) {
+		rs->ti->error = "Unable to assemble array: Unknown flag(s) in compatible feature flags";
 		return -EINVAL;
+	}
 
-	if (le32_to_cpu(sb->features)) {
-		rs->ti->error = "Unable to assemble array: No feature flags supported yet";
+	if (sb->incompat_features) {
+		rs->ti->error = "Unable to assemble array: No incompatible feature flags supported yet";
 		return -EINVAL;
 	}
 
 	/* Enable bitmap creation for RAID levels != 0 */
-	mddev->bitmap_info.offset = (rs->raid_type->level) ? to_sector(4096) : 0;
+	mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096);
 	rdev->mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
 
-	if (!test_bit(FirstUse, &rdev->flags)) {
+	if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
+		/* Retrieve device size stored in superblock to be prepared for shrink */
+		rdev->sectors = le64_to_cpu(sb->sectors);
 		rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
-		if (rdev->recovery_offset != MaxSector)
-			clear_bit(In_sync, &rdev->flags);
+		if (rdev->recovery_offset == MaxSector)
+			set_bit(In_sync, &rdev->flags);
+		/*
+		 * If no reshape in progress -> we're recovering single
+		 * disk(s) and have to set the device(s) to out-of-sync
+		 */
+		else if (!rs_is_reshaping(rs))
+			clear_bit(In_sync, &rdev->flags); /* Mandatory for recovery */
 	}
 
 	/*
 	 * If a device comes back, set it as not In_sync and no longer faulty.
 	 */
-	if (test_bit(Faulty, &rdev->flags)) {
-		clear_bit(Faulty, &rdev->flags);
+	if (test_and_clear_bit(Faulty, &rdev->flags)) {
+		rdev->recovery_offset = 0;
 		clear_bit(In_sync, &rdev->flags);
 		rdev->saved_raid_disk = rdev->raid_disk;
-		rdev->recovery_offset = 0;
 	}
 
-	clear_bit(FirstUse, &rdev->flags);
+	/* Reshape support -> restore repective data offsets */
+	rdev->data_offset = le64_to_cpu(sb->data_offset);
+	rdev->new_data_offset = le64_to_cpu(sb->new_data_offset);
 
 	return 0;
 }
@@ -1072,7 +2302,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
  */
 static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
 {
-	int ret;
+	int r;
 	struct raid_dev *dev;
 	struct md_rdev *rdev, *tmp, *freshest;
 	struct mddev *mddev = &rs->md;
@@ -1082,24 +2312,22 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
 		/*
 		 * Skipping super_load due to CTR_FLAG_SYNC will cause
 		 * the array to undergo initialization again as
-		 * though it were new.  This is the intended effect
+		 * though it were new.	This is the intended effect
 		 * of the "sync" directive.
 		 *
 		 * When reshaping capability is added, we must ensure
 		 * that the "sync" directive is disallowed during the
 		 * reshape.
 		 */
-		rdev->sectors = to_sector(i_size_read(rdev->bdev->bd_inode));
-
-		if (rs->ctr_flags & CTR_FLAG_SYNC)
+		if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
 			continue;
 
 		if (!rdev->meta_bdev)
 			continue;
 
-		ret = super_load(rdev, freshest);
+		r = super_load(rdev, freshest);
 
-		switch (ret) {
+		switch (r) {
 		case 1:
 			freshest = rdev;
 			break;
@@ -1116,57 +2344,368 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
 			if (rdev->sb_page)
 				put_page(rdev->sb_page);
 
-			rdev->sb_page = NULL;
+			rdev->sb_page = NULL;
+
+			rdev->sb_loaded = 0;
+
+			/*
+			 * We might be able to salvage the data device
+			 * even though the meta device has failed.  For
+			 * now, we behave as though '- -' had been
+			 * set for this device in the table.
+			 */
+			if (dev->data_dev)
+				dm_put_device(ti, dev->data_dev);
+
+			dev->data_dev = NULL;
+			rdev->bdev = NULL;
+
+			list_del(&rdev->same_set);
+		}
+	}
+
+	if (!freshest)
+		return 0;
+
+	if (validate_raid_redundancy(rs)) {
+		rs->ti->error = "Insufficient redundancy to activate array";
+		return -EINVAL;
+	}
+
+	/*
+	 * Validation of the freshest device provides the source of
+	 * validation for the remaining devices.
+	 */
+	rs->ti->error = "Unable to assemble array: Invalid superblocks";
+	if (super_validate(rs, freshest))
+		return -EINVAL;
+
+	rdev_for_each(rdev, mddev)
+		if ((rdev != freshest) && super_validate(rs, rdev))
+			return -EINVAL;
+	return 0;
+}
+
+/*
+ * Adjust data_offset and new_data_offset on all disk members of @rs
+ * for out of place reshaping if requested by contructor
+ *
+ * We need free space at the beginning of each raid disk for forward
+ * and at the end for backward reshapes which userspace has to provide
+ * via remapping/reordering of space.
+ */
+static int rs_adjust_data_offsets(struct raid_set *rs)
+{
+	sector_t data_offset = 0, new_data_offset = 0;
+	struct md_rdev *rdev;
+
+	/* Constructor did not request data offset change */
+	if (!test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) {
+		if (!rs_is_reshapable(rs))
+			goto out;
+
+		return 0;
+	}
+
+	/* HM FIXME: get InSync raid_dev? */
+	rdev = &rs->dev[0].rdev;
+
+	if (rs->delta_disks < 0) {
+		/*
+		 * Removing disks (reshaping backwards):
+		 *
+		 * - before reshape: data is at offset 0 and free space
+		 *		     is at end of each component LV
+		 *
+		 * - after reshape: data is at offset rs->data_offset != 0 on each component LV
+		 */
+		data_offset = 0;
+		new_data_offset = rs->data_offset;
+
+	} else if (rs->delta_disks > 0) {
+		/*
+		 * Adding disks (reshaping forwards):
+		 *
+		 * - before reshape: data is at offset rs->data_offset != 0 and
+		 *		     free space is at begin of each component LV
+		 *
+		 * - after reshape: data is at offset 0 on each component LV
+		 */
+		data_offset = rs->data_offset;
+		new_data_offset = 0;
+
+	} else {
+		/*
+		 * User space passes in 0 for data offset after having removed reshape space
+		 *
+		 * - or - (data offset != 0)
+		 *
+		 * Changing RAID layout or chunk size -> toggle offsets
+		 *
+		 * - before reshape: data is at offset rs->data_offset 0 and
+		 *		     free space is at end of each component LV
+		 *		     -or-
+		 *                   data is at offset rs->data_offset != 0 and
+		 *		     free space is at begin of each component LV
+		 *
+		 * - after reshape: data is at offset 0 if it was at offset != 0
+		 *                  or at offset != 0 if it was at offset 0
+		 *                  on each component LV
+		 *
+		 */
+		data_offset = rs->data_offset ? rdev->data_offset : 0;
+		new_data_offset = data_offset ? 0 : rs->data_offset;
+		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+	}
+
+	/*
+	 * Make sure we got a minimum amount of free sectors per device
+	 */
+	if (rs->data_offset &&
+	    to_sector(i_size_read(rdev->bdev->bd_inode)) - rdev->sectors < MIN_FREE_RESHAPE_SPACE) {
+		rs->ti->error = data_offset ? "No space for forward reshape" :
+					      "No space for backward reshape";
+		return -ENOSPC;
+	}
+out:
+	/* Adjust data offsets on all rdevs */
+	rdev_for_each(rdev, &rs->md) {
+		rdev->data_offset = data_offset;
+		rdev->new_data_offset = new_data_offset;
+	}
+
+	return 0;
+}
+
+/* Userpace reordered disks -> adjust raid_disk indexes in @rs */
+static void __reorder_raid_disk_indexes(struct raid_set *rs)
+{
+	int i = 0;
+	struct md_rdev *rdev;
+
+	rdev_for_each(rdev, &rs->md) {
+		rdev->raid_disk = i++;
+		rdev->saved_raid_disk = rdev->new_raid_disk = -1;
+	}
+}
+
+/*
+ * Setup @rs for takeover by a different raid level
+ */
+static int rs_setup_takeover(struct raid_set *rs)
+{
+	struct mddev *mddev = &rs->md;
+	struct md_rdev *rdev;
+	unsigned int d = mddev->raid_disks = rs->raid_disks;
+	sector_t new_data_offset = rs->dev[0].rdev.data_offset ? 0 : rs->data_offset;
+
+	if (rt_is_raid10(rs->raid_type)) {
+		if (mddev->level == 0) {
+			/* Userpace reordered disks -> adjust raid_disk indexes */
+			__reorder_raid_disk_indexes(rs);
+
+			/* raid0 -> raid10_far layout */
+			mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_FAR,
+								   rs->raid10_copies);
+		} else if (mddev->level == 1)
+			/* raid1 -> raid10_near layout */
+			mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_NEAR,
+								   rs->raid_disks);
+		else
+			return -EINVAL;
+
+	}
+
+	clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
+	mddev->recovery_cp = MaxSector;
+
+	while (d--) {
+		rdev = &rs->dev[d].rdev;
+
+		if (test_bit(d, (void *) rs->rebuild_disks)) {
+			clear_bit(In_sync, &rdev->flags);
+			clear_bit(Faulty, &rdev->flags);
+			mddev->recovery_cp = rdev->recovery_offset = 0;
+			/* Bitmap has to be created when we do an "up" takeover */
+			set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
+		}
+
+		rdev->new_data_offset = new_data_offset;
+	}
+
+	return 0;
+}
+
+/* Prepare @rs for reshape */
+static int rs_prepare_reshape(struct raid_set *rs)
+{
+	bool reshape;
+	struct mddev *mddev = &rs->md;
+
+	if (rs_is_raid10(rs)) {
+		if (rs->raid_disks != mddev->raid_disks &&
+		    __is_raid10_near(mddev->layout) &&
+		    rs->raid10_copies &&
+		    rs->raid10_copies != __raid10_near_copies(mddev->layout)) {
+			/*
+			 * raid disk have to be multiple of data copies to allow this conversion,
+			 *
+			 * This is actually not a reshape it is a
+			 * rebuild of any additional mirrors per group
+			 */
+			if (rs->raid_disks % rs->raid10_copies) {
+				rs->ti->error = "Can't reshape raid10 mirror groups";
+				return -EINVAL;
+			}
+
+			/* Userpace reordered disks to add/remove mirrors -> adjust raid_disk indexes */
+			__reorder_raid_disk_indexes(rs);
+			mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_NEAR,
+								   rs->raid10_copies);
+			mddev->new_layout = mddev->layout;
+			reshape = false;
+		} else
+			reshape = true;
+
+	} else if (rs_is_raid456(rs))
+		reshape = true;
+
+	else if (rs_is_raid1(rs)) {
+		if (rs->delta_disks) {
+			/* Process raid1 via delta_disks */
+			mddev->degraded = rs->delta_disks < 0 ? -rs->delta_disks : rs->delta_disks;
+			reshape = true;
+		} else {
+			/* Process raid1 without delta_disks */
+			mddev->raid_disks = rs->raid_disks;
+			set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
+			reshape = false;
+		}
+	} else {
+		rs->ti->error = "Called with bogus raid type";
+		return -EINVAL;
+	}
+
+	if (reshape) {
+		set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags);
+		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+		set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
+	} else if (mddev->raid_disks < rs->raid_disks)
+		/* Create new superblocks and bitmaps, if any new disks */
+		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+
+	return 0;
+}
+
+/*
+ *
+ * - change raid layout
+ * - change chunk size
+ * - add disks
+ * - remove disks
+ */
+static int rs_setup_reshape(struct raid_set *rs)
+{
+	int r = 0;
+	unsigned int cur_raid_devs, d;
+	struct mddev *mddev = &rs->md;
+	struct md_rdev *rdev;
+
+	mddev->delta_disks = rs->delta_disks;
+	cur_raid_devs = mddev->raid_disks;
+
+	/* Ignore impossible layout change whilst adding/removing disks */
+	if (mddev->delta_disks &&
+	    mddev->layout != mddev->new_layout) {
+		DMINFO("Ignoring invalid layout change with delta_disks=%d", rs->delta_disks);
+		mddev->new_layout = mddev->layout;
+	}
+
+	/*
+	 * Adjust array size:
+	 *
+	 * - in case of adding disks, array size has
+	 *   to grow after the disk adding reshape,
+	 *   which'll hapen in the event handler;
+	 *   reshape will happen forward, so space has to
+	 *   be available at the beginning of each disk
+	 *
+	 * - in case of removing disks, array size
+	 *   has to shrink before starting the reshape,
+	 *   which'll happen here;
+	 *   reshape will happen backward, so space has to
+	 *   be available at the end of each disk
+	 *
+	 * - data_offset and new_data_offset are
+	 *   adjusted for aforementioned out of place
+	 *   reshaping based on userspace passing in
+	 *   the "data_offset <sectors>" key/value
+	 *   pair via the constructor
+	 */
 
-			rdev->sb_loaded = 0;
+	/* Add disk(s) */
+	if (rs->delta_disks > 0) {
+		/* Prepare disks for check in raid4/5/6/10 {check|start}_reshape */
+		for (d = cur_raid_devs; d < rs->raid_disks; d++) {
+			rdev = &rs->dev[d].rdev;
+			clear_bit(In_sync, &rdev->flags);
 
 			/*
-			 * We might be able to salvage the data device
-			 * even though the meta device has failed.  For
-			 * now, we behave as though '- -' had been
-			 * set for this device in the table.
+			 * save_raid_disk needs to be -1, or recovery_offset will be set to 0
+			 * by md, which'll store that erroneously in the superblock on reshape
 			 */
-			if (dev->data_dev)
-				dm_put_device(ti, dev->data_dev);
-
-			dev->data_dev = NULL;
-			rdev->bdev = NULL;
+			rdev->saved_raid_disk = -1;
+			rdev->raid_disk = d;
 
-			list_del(&rdev->same_set);
+			rdev->sectors = mddev->dev_sectors;
+			rdev->recovery_offset = rs_is_raid1(rs) ? 0 : MaxSector;
 		}
-	}
 
-	if (!freshest)
-		return 0;
-
-	if (validate_raid_redundancy(rs)) {
-		rs->ti->error = "Insufficient redundancy to activate array";
-		return -EINVAL;
-	}
+		mddev->reshape_backwards = 0; /* adding disks -> forward reshape */
 
-	/*
-	 * Validation of the freshest device provides the source of
-	 * validation for the remaining devices.
-	 */
-	ti->error = "Unable to assemble array: Invalid superblocks";
-	if (super_validate(rs, freshest))
-		return -EINVAL;
+	/* Remove disk(s) */
+	} else if (rs->delta_disks < 0) {
+		r = rs_set_dev_and_array_sectors(rs, true);
+		mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */
 
-	rdev_for_each(rdev, mddev)
-		if ((rdev != freshest) && super_validate(rs, rdev))
-			return -EINVAL;
+	/* Change layout and/or chunk size */
+	} else {
+		/*
+		 * Reshape layout (e.g. raid5_ls -> raid5_n) and/or chunk size:
+		 *
+		 * keeping number of disks and do layout change ->
+		 *
+		 * toggle reshape_backward depending on data_offset:
+		 *
+		 * - free space upfront -> reshape forward
+		 *
+		 * - free space at the end -> reshape backward
+		 *
+		 *
+		 * This utilizes free reshape space avoiding the need
+		 * for userspace to move (parts of) LV segments in
+		 * case of layout/chunksize change  (for disk
+		 * adding/removing reshape space has to be at
+		 * the proper address (see above with delta_disks):
+		 *
+		 * add disk(s)   -> begin
+		 * remove disk(s)-> end
+		 */
+		mddev->reshape_backwards = rs->dev[0].rdev.data_offset ? 0 : 1;
+	}
 
-	return 0;
+	return r;
 }
 
 /*
  * Enable/disable discard support on RAID set depending on
  * RAID level and discard properties of underlying RAID members.
  */
-static void configure_discard_support(struct dm_target *ti, struct raid_set *rs)
+static void configure_discard_support(struct raid_set *rs)
 {
 	int i;
 	bool raid456;
+	struct dm_target *ti = rs->ti;
 
 	/* Assume discards not supported until after checks below. */
 	ti->discards_supported = false;
@@ -1174,7 +2713,7 @@ static void configure_discard_support(struct dm_target *ti, struct raid_set *rs)
 	/* RAID level 4,5,6 require discard_zeroes_data for data integrity! */
 	raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6);
 
-	for (i = 0; i < rs->md.raid_disks; i++) {
+	for (i = 0; i < rs->raid_disks; i++) {
 		struct request_queue *q;
 
 		if (!rs->dev[i].rdev.bdev)
@@ -1207,118 +2746,252 @@ static void configure_discard_support(struct dm_target *ti, struct raid_set *rs)
 }
 
 /*
- * Construct a RAID4/5/6 mapping:
+ * Construct a RAID0/1/10/4/5/6 mapping:
  * Args:
- *	<raid_type> <#raid_params> <raid_params>		\
- *	<#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
+ *	<raid_type> <#raid_params> <raid_params>{0,}	\
+ *	<#raid_devs> [<meta_dev1> <dev1>]{1,}
  *
- * <raid_params> varies by <raid_type>.  See 'parse_raid_params' for
+ * <raid_params> varies by <raid_type>.	 See 'parse_raid_params' for
  * details on possible <raid_params>.
+ *
+ * Userspace is free to initialize the metadata devices, hence the superblocks to
+ * enforce recreation based on the passed in table parameters.
+ *
  */
-static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
+static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
-	int ret;
+	int r;
+	bool resize;
 	struct raid_type *rt;
-	unsigned long num_raid_params, num_raid_devs;
+	unsigned int num_raid_params, num_raid_devs;
+	sector_t calculated_dev_sectors;
 	struct raid_set *rs = NULL;
-
-	/* Must have at least <raid_type> <#raid_params> */
-	if (argc < 2) {
-		ti->error = "Too few arguments";
+	const char *arg;
+	struct rs_layout rs_layout;
+	struct dm_arg_set as = { argc, argv }, as_nrd;
+	struct dm_arg _args[] = {
+		{ 0, as.argc, "Cannot understand number of raid parameters" },
+		{ 1, 254, "Cannot understand number of raid devices parameters" }
+	};
+
+	/* Must have <raid_type> */
+	arg = dm_shift_arg(&as);
+	if (!arg) {
+		ti->error = "No arguments";
 		return -EINVAL;
 	}
 
-	/* raid type */
-	rt = get_raid_type(argv[0]);
+	rt = get_raid_type(arg);
 	if (!rt) {
 		ti->error = "Unrecognised raid_type";
 		return -EINVAL;
 	}
-	argc--;
-	argv++;
 
-	/* number of RAID parameters */
-	if (kstrtoul(argv[0], 10, &num_raid_params) < 0) {
-		ti->error = "Cannot understand number of RAID parameters";
+	/* Must have <#raid_params> */
+	if (dm_read_arg_group(_args, &as, &num_raid_params, &ti->error))
 		return -EINVAL;
-	}
-	argc--;
-	argv++;
-
-	/* Skip over RAID params for now and find out # of devices */
-	if (num_raid_params >= argc) {
-		ti->error = "Arguments do not agree with counts given";
-		return -EINVAL;
-	}
 
-	if ((kstrtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) ||
-	    (num_raid_devs > MAX_RAID_DEVICES)) {
-		ti->error = "Cannot understand number of raid devices";
+	/* number of raid device tupples <meta_dev data_dev> */
+	as_nrd = as;
+	dm_consume_args(&as_nrd, num_raid_params);
+	_args[1].max = (as_nrd.argc - 1) / 2;
+	if (dm_read_arg(_args + 1, &as_nrd, &num_raid_devs, &ti->error))
 		return -EINVAL;
-	}
 
-	argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */
-	if (argc != (num_raid_devs * 2)) {
-		ti->error = "Supplied RAID devices does not match the count given";
+	if (!__within_range(num_raid_devs, 1, MAX_RAID_DEVICES)) {
+		ti->error = "Invalid number of supplied raid devices";
 		return -EINVAL;
 	}
 
-	rs = context_alloc(ti, rt, (unsigned)num_raid_devs);
+	rs = raid_set_alloc(ti, rt, num_raid_devs);
 	if (IS_ERR(rs))
 		return PTR_ERR(rs);
 
-	ret = parse_raid_params(rs, argv, (unsigned)num_raid_params);
-	if (ret)
+	r = parse_raid_params(rs, &as, num_raid_params);
+	if (r)
 		goto bad;
 
-	argv += num_raid_params + 1;
-
-	ret = dev_parms(rs, argv);
-	if (ret)
+	r = parse_dev_params(rs, &as);
+	if (r)
 		goto bad;
 
 	rs->md.sync_super = super_sync;
-	ret = analyse_superblocks(ti, rs);
-	if (ret)
+
+	/*
+	 * Calculate ctr requested array and device sizes to allow
+	 * for superblock analysis needing device sizes defined.
+	 *
+	 * Any existing superblock will overwrite the array and device sizes
+	 */
+	r = rs_set_dev_and_array_sectors(rs, false);
+	if (r)
+		goto bad;
+
+	calculated_dev_sectors = rs->dev[0].rdev.sectors;
+
+	/*
+	 * Backup any new raid set level, layout, ...
+	 * requested to be able to compare to superblock
+	 * members for conversion decisions.
+	 */
+	rs_config_backup(rs, &rs_layout);
+
+	r = analyse_superblocks(ti, rs);
+	if (r)
 		goto bad;
 
+	resize = calculated_dev_sectors != rs->dev[0].rdev.sectors;
+
 	INIT_WORK(&rs->md.event_work, do_table_event);
 	ti->private = rs;
 	ti->num_flush_bios = 1;
 
+	/* Restore any requested new layout for conversion decision */
+	rs_config_restore(rs, &rs_layout);
+
 	/*
-	 * Disable/enable discard support on RAID set.
+	 * Now that we have any superblock metadata available,
+	 * check for new, recovering, reshaping, to be taken over,
+	 * to be reshaped or an existing, unchanged raid set to
+	 * run in sequence.
 	 */
-	configure_discard_support(ti, rs);
+	if (test_bit(MD_ARRAY_FIRST_USE, &rs->md.flags)) {
+		/* A new raid6 set has to be recovered to ensure proper parity and Q-Syndrome */
+		if (rs_is_raid6(rs) &&
+		    test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) {
+			ti->error = "'nosync' not allowed for new raid6 set";
+			r = -EINVAL;
+			goto bad;
+		}
+		rs_setup_recovery(rs, 0);
+		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+		rs_set_new(rs);
+	} else if (rs_is_recovering(rs)) {
+		/* A recovering raid set may be resized */
+		; /* skip setup rs */
+	} else if (rs_is_reshaping(rs)) {
+		/* Have to reject size change request during reshape */
+		if (resize) {
+			ti->error = "Can't resize a reshaping raid set";
+			r = -EPERM;
+			goto bad;
+		}
+		/* skip setup rs */
+	} else if (rs_takeover_requested(rs)) {
+		if (rs_is_reshaping(rs)) {
+			ti->error = "Can't takeover a reshaping raid set";
+			r = -EPERM;
+			goto bad;
+		}
+
+		/*
+		 * If a takeover is needed, userspace sets any additional
+		 * devices to rebuild and we can check for a valid request here.
+		 *
+		 * If acceptible, set the level to the new requested
+		 * one, prohibit requesting recovery, allow the raid
+		 * set to run and store superblocks during resume.
+		 */
+		r = rs_check_takeover(rs);
+		if (r)
+			goto bad;
+
+		r = rs_setup_takeover(rs);
+		if (r)
+			goto bad;
+
+		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+		set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
+		/* Takeover ain't recovery, so disable recovery */
+		rs_setup_recovery(rs, MaxSector);
+		rs_set_new(rs);
+	} else if (rs_reshape_requested(rs)) {
+		/*
+		  * We can only prepare for a reshape here, because the
+		  * raid set needs to run to provide the repective reshape
+		  * check functions via its MD personality instance.
+		  *
+		  * So do the reshape check after md_run() succeeded.
+		  */
+		r = rs_prepare_reshape(rs);
+		if (r)
+			return r;
+
+		/* Reshaping ain't recovery, so disable recovery */
+		rs_setup_recovery(rs, MaxSector);
+		rs_set_cur(rs);
+	} else {
+		/* May not set recovery when a device rebuild is requested */
+		if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
+			rs_setup_recovery(rs, MaxSector);
+			set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+		} else
+			rs_setup_recovery(rs, test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ?
+					      0 : (resize ? calculated_dev_sectors : MaxSector));
+		rs_set_cur(rs);
+	}
+
+	/* If constructor requested it, change data and new_data offsets */
+	r = rs_adjust_data_offsets(rs);
+	if (r)
+		goto bad;
+
+	/* Start raid set read-only and assumed clean to change in raid_resume() */
+	rs->md.ro = 1;
+	rs->md.in_sync = 1;
+	set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
 
 	/* Has to be held on running the array */
 	mddev_lock_nointr(&rs->md);
-	ret = md_run(&rs->md);
+	r = md_run(&rs->md);
 	rs->md.in_sync = 0; /* Assume already marked dirty */
-	mddev_unlock(&rs->md);
 
-	if (ret) {
-		ti->error = "Fail to run raid array";
+	if (r) {
+		ti->error = "Failed to run raid array";
+		mddev_unlock(&rs->md);
 		goto bad;
 	}
 
-	if (ti->len != rs->md.array_sectors) {
-		ti->error = "Array size does not match requested target length";
-		ret = -EINVAL;
-		goto size_mismatch;
-	}
 	rs->callbacks.congested_fn = raid_is_congested;
 	dm_table_add_target_callbacks(ti->table, &rs->callbacks);
 
 	mddev_suspend(&rs->md);
+
+	/* Try to adjust the raid4/5/6 stripe cache size to the stripe size */
+	if (rs_is_raid456(rs)) {
+		r = rs_set_raid456_stripe_cache(rs);
+		if (r)
+			goto bad_stripe_cache;
+	}
+
+	/* Now do an early reshape check */
+	if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
+		r = rs_check_reshape(rs);
+		if (r)
+			goto bad_check_reshape;
+
+		/* Restore new, ctr requested layout to perform check */
+		rs_config_restore(rs, &rs_layout);
+
+		if (rs->md.pers->start_reshape) {
+			r = rs->md.pers->check_reshape(&rs->md);
+			if (r) {
+				ti->error = "Reshape check failed";
+				goto bad_check_reshape;
+			}
+		}
+	}
+
+	mddev_unlock(&rs->md);
 	return 0;
 
-size_mismatch:
+bad_stripe_cache:
+bad_check_reshape:
 	md_stop(&rs->md);
 bad:
-	context_free(rs);
+	raid_set_free(rs);
 
-	return ret;
+	return r;
 }
 
 static void raid_dtr(struct dm_target *ti)
@@ -1327,7 +3000,7 @@ static void raid_dtr(struct dm_target *ti)
 
 	list_del_init(&rs->callbacks.list);
 	md_stop(&rs->md);
-	context_free(rs);
+	raid_set_free(rs);
 }
 
 static int raid_map(struct dm_target *ti, struct bio *bio)
@@ -1335,11 +3008,23 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
 	struct raid_set *rs = ti->private;
 	struct mddev *mddev = &rs->md;
 
+	/*
+	 * If we're reshaping to add disk(s)), ti->len and
+	 * mddev->array_sectors will differ during the process
+	 * (ti->len > mddev->array_sectors), so we have to requeue
+	 * bios with addresses > mddev->array_sectors here or
+	 * there will occur accesses past EOD of the component
+	 * data images thus erroring the raid set.
+	 */
+	if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
+		return DM_MAPIO_REQUEUE;
+
 	mddev->pers->make_request(mddev, bio);
 
 	return DM_MAPIO_SUBMITTED;
 }
 
+/* Return string describing the current sync action of @mddev */
 static const char *decipher_sync_action(struct mddev *mddev)
 {
 	if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
@@ -1365,195 +3050,260 @@ static const char *decipher_sync_action(struct mddev *mddev)
 	return "idle";
 }
 
-static void raid_status(struct dm_target *ti, status_type_t type,
-			unsigned status_flags, char *result, unsigned maxlen)
+/*
+ * Return status string @rdev
+ *
+ * Status characters:
+ *
+ *  'D' = Dead/Failed device
+ *  'a' = Alive but not in-sync
+ *  'A' = Alive and in-sync
+ */
+static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync)
 {
-	struct raid_set *rs = ti->private;
-	unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
-	unsigned sz = 0;
-	int i, array_in_sync = 0;
-	sector_t sync;
+	if (test_bit(Faulty, &rdev->flags))
+		return "D";
+	else if (!array_in_sync || !test_bit(In_sync, &rdev->flags))
+		return "a";
+	else
+		return "A";
+}
 
-	switch (type) {
-	case STATUSTYPE_INFO:
-		DMEMIT("%s %d ", rs->raid_type->name, rs->md.raid_disks);
+/* Helper to return resync/reshape progress for @rs and @array_in_sync */
+static sector_t rs_get_progress(struct raid_set *rs,
+				sector_t resync_max_sectors, bool *array_in_sync)
+{
+	sector_t r, recovery_cp, curr_resync_completed;
+	struct mddev *mddev = &rs->md;
 
-		if (rs->raid_type->level) {
-			if (test_bit(MD_RECOVERY_RUNNING, &rs->md.recovery))
-				sync = rs->md.curr_resync_completed;
-			else
-				sync = rs->md.recovery_cp;
-
-			if (sync >= rs->md.resync_max_sectors) {
-				/*
-				 * Sync complete.
-				 */
-				array_in_sync = 1;
-				sync = rs->md.resync_max_sectors;
-			} else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
-				/*
-				 * If "check" or "repair" is occurring, the array has
-				 * undergone and initial sync and the health characters
-				 * should not be 'a' anymore.
-				 */
-				array_in_sync = 1;
+	curr_resync_completed = mddev->curr_resync_completed ?: mddev->recovery_cp;
+	recovery_cp = mddev->recovery_cp;
+	*array_in_sync = false;
+
+	if (rs_is_raid0(rs)) {
+		r = resync_max_sectors;
+		*array_in_sync = true;
+
+	} else {
+		r = mddev->reshape_position;
+
+		/* Reshape is relative to the array size */
+		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
+		    r != MaxSector) {
+			if (r == MaxSector) {
+				*array_in_sync = true;
+				r = resync_max_sectors;
 			} else {
-				/*
-				 * The array may be doing an initial sync, or it may
-				 * be rebuilding individual components.  If all the
-				 * devices are In_sync, then it is the array that is
-				 * being initialized.
-				 */
-				for (i = 0; i < rs->md.raid_disks; i++)
-					if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
-						array_in_sync = 1;
+				/* Got to reverse on backward reshape */
+				if (mddev->reshape_backwards)
+					r = mddev->array_sectors - r;
+
+				/* Devide by # of data stripes */
+				sector_div(r, mddev_data_stripes(rs));
 			}
+
+		/* Sync is relative to the component device size */
+		} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+			r = curr_resync_completed;
+		else
+			r = recovery_cp;
+
+		if (r == MaxSector) {
+			/*
+			 * Sync complete.
+			 */
+			*array_in_sync = true;
+			r = resync_max_sectors;
+		} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+			/*
+			 * If "check" or "repair" is occurring, the raid set has
+			 * undergone an initial sync and the health characters
+			 * should not be 'a' anymore.
+			 */
+			*array_in_sync = true;
 		} else {
-			/* RAID0 */
-			array_in_sync = 1;
-			sync = rs->md.resync_max_sectors;
-		}
+			struct md_rdev *rdev;
 
-		/*
-		 * Status characters:
-		 *  'D' = Dead/Failed device
-		 *  'a' = Alive but not in-sync
-		 *  'A' = Alive and in-sync
-		 */
-		for (i = 0; i < rs->md.raid_disks; i++) {
-			if (test_bit(Faulty, &rs->dev[i].rdev.flags))
-				DMEMIT("D");
-			else if (!array_in_sync ||
-				 !test_bit(In_sync, &rs->dev[i].rdev.flags))
-				DMEMIT("a");
-			else
-				DMEMIT("A");
+			/*
+			 * The raid set may be doing an initial sync, or it may
+			 * be rebuilding individual components.	 If all the
+			 * devices are In_sync, then it is the raid set that is
+			 * being initialized.
+			 */
+			rdev_for_each(rdev, mddev)
+				if (!test_bit(In_sync, &rdev->flags))
+					*array_in_sync = true;
+#if 0
+			r = 0; /* HM FIXME: TESTME: https://bugzilla.redhat.com/show_bug.cgi?id=1210637 ? */
+#endif
 		}
+	}
+
+	return r;
+}
+
+/* Helper to return @dev name or "-" if !@dev */
+static const char *__get_dev_name(struct dm_dev *dev)
+{
+	return dev ? dev->name : "-";
+}
+
+static void raid_status(struct dm_target *ti, status_type_t type,
+			unsigned int status_flags, char *result, unsigned int maxlen)
+{
+	struct raid_set *rs = ti->private;
+	struct mddev *mddev = &rs->md;
+	struct r5conf *conf = mddev->private;
+	int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
+	bool array_in_sync;
+	unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
+	unsigned int sz = 0;
+	unsigned int rebuild_disks;
+	unsigned int write_mostly_params = 0;
+	sector_t progress, resync_max_sectors, resync_mismatches;
+	const char *sync_action;
+	struct raid_type *rt;
+	struct md_rdev *rdev;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		/* *Should* always succeed */
+		rt = get_raid_type_by_ll(mddev->new_level, mddev->new_layout);
+		if (!rt)
+			return;
+
+		DMEMIT("%s %d ", rt->name, mddev->raid_disks);
+
+		/* Access most recent mddev properties for status output */
+		smp_rmb();
+		/* Get sensible max sectors even if raid set not yet started */
+		resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ?
+				      mddev->resync_max_sectors : mddev->dev_sectors;
+		progress = rs_get_progress(rs, resync_max_sectors, &array_in_sync);
+		resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
+				    atomic64_read(&mddev->resync_mismatches) : 0;
+		sync_action = decipher_sync_action(&rs->md);
+
+		/* HM FIXME: do we want another state char for raid0? It shows 'D' or 'A' now */
+		rdev_for_each(rdev, mddev)
+			DMEMIT(__raid_dev_status(rdev, array_in_sync));
 
 		/*
-		 * In-sync ratio:
+		 * In-sync/Reshape ratio:
 		 *  The in-sync ratio shows the progress of:
-		 *   - Initializing the array
-		 *   - Rebuilding a subset of devices of the array
+		 *   - Initializing the raid set
+		 *   - Rebuilding a subset of devices of the raid set
 		 *  The user can distinguish between the two by referring
 		 *  to the status characters.
+		 *
+		 *  The reshape ratio shows the progress of
+		 *  changing the raid layout or the number of
+		 *  disks of a raid set
 		 */
-		DMEMIT(" %llu/%llu",
-		       (unsigned long long) sync,
-		       (unsigned long long) rs->md.resync_max_sectors);
+		DMEMIT(" %llu/%llu", (unsigned long long) progress,
+				     (unsigned long long) resync_max_sectors);
 
 		/*
+		 * v1.5.0+:
+		 *
 		 * Sync action:
-		 *   See Documentation/device-mapper/dm-raid.c for
+		 *   See Documentation/device-mapper/dm-raid.txt for
 		 *   information on each of these states.
 		 */
-		DMEMIT(" %s", decipher_sync_action(&rs->md));
+		DMEMIT(" %s", sync_action);
 
 		/*
+		 * v1.5.0+:
+		 *
 		 * resync_mismatches/mismatch_cnt
 		 *   This field shows the number of discrepancies found when
-		 *   performing a "check" of the array.
+		 *   performing a "check" of the raid set.
 		 */
-		DMEMIT(" %llu",
-		       (strcmp(rs->md.last_sync_action, "check")) ? 0 :
-		       (unsigned long long)
-		       atomic64_read(&rs->md.resync_mismatches));
-		break;
-	case STATUSTYPE_TABLE:
-		/* The string you would use to construct this array */
-		for (i = 0; i < rs->md.raid_disks; i++) {
-			if ((rs->ctr_flags & CTR_FLAG_REBUILD) &&
-			    rs->dev[i].data_dev &&
-			    !test_bit(In_sync, &rs->dev[i].rdev.flags))
-				raid_param_cnt += 2; /* for rebuilds */
-			if (rs->dev[i].data_dev &&
-			    test_bit(WriteMostly, &rs->dev[i].rdev.flags))
-				raid_param_cnt += 2;
-		}
-
-		raid_param_cnt += (hweight32(rs->ctr_flags & ~CTR_FLAG_REBUILD) * 2);
-		if (rs->ctr_flags & (CTR_FLAG_SYNC | CTR_FLAG_NOSYNC))
-			raid_param_cnt--;
-
-		DMEMIT("%s %u %u", rs->raid_type->name,
-		       raid_param_cnt, rs->md.chunk_sectors);
-
-		if ((rs->ctr_flags & CTR_FLAG_SYNC) &&
-		    (rs->md.recovery_cp == MaxSector))
-			DMEMIT(" sync");
-		if (rs->ctr_flags & CTR_FLAG_NOSYNC)
-			DMEMIT(" nosync");
-
-		for (i = 0; i < rs->md.raid_disks; i++)
-			if ((rs->ctr_flags & CTR_FLAG_REBUILD) &&
-			    rs->dev[i].data_dev &&
-			    !test_bit(In_sync, &rs->dev[i].rdev.flags))
-				DMEMIT(" rebuild %u", i);
-
-		if (rs->ctr_flags & CTR_FLAG_DAEMON_SLEEP)
-			DMEMIT(" daemon_sleep %lu",
-			       rs->md.bitmap_info.daemon_sleep);
-
-		if (rs->ctr_flags & CTR_FLAG_MIN_RECOVERY_RATE)
-			DMEMIT(" min_recovery_rate %d", rs->md.sync_speed_min);
+		DMEMIT(" %llu", (unsigned long long) resync_mismatches);
 
-		if (rs->ctr_flags & CTR_FLAG_MAX_RECOVERY_RATE)
-			DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
-
-		for (i = 0; i < rs->md.raid_disks; i++)
-			if (rs->dev[i].data_dev &&
-			    test_bit(WriteMostly, &rs->dev[i].rdev.flags))
-				DMEMIT(" write_mostly %u", i);
-
-		if (rs->ctr_flags & CTR_FLAG_MAX_WRITE_BEHIND)
-			DMEMIT(" max_write_behind %lu",
-			       rs->md.bitmap_info.max_write_behind);
-
-		if (rs->ctr_flags & CTR_FLAG_STRIPE_CACHE) {
-			struct r5conf *conf = rs->md.private;
-
-			/* convert from kiB to sectors */
-			DMEMIT(" stripe_cache %d",
-			       conf ? conf->max_nr_stripes * 2 : 0);
-		}
-
-		if (rs->ctr_flags & CTR_FLAG_REGION_SIZE)
-			DMEMIT(" region_size %lu",
-			       rs->md.bitmap_info.chunksize >> 9);
-
-		if (rs->ctr_flags & CTR_FLAG_RAID10_COPIES)
-			DMEMIT(" raid10_copies %u",
-			       raid10_md_layout_to_copies(rs->md.layout));
-
-		if (rs->ctr_flags & CTR_FLAG_RAID10_FORMAT)
-			DMEMIT(" raid10_format %s",
-			       raid10_md_layout_to_format(rs->md.layout));
-
-		DMEMIT(" %d", rs->md.raid_disks);
-		for (i = 0; i < rs->md.raid_disks; i++) {
-			if (rs->dev[i].meta_dev)
-				DMEMIT(" %s", rs->dev[i].meta_dev->name);
-			else
-				DMEMIT(" -");
+		/*
+		 * v1.9.0+:
+		 *
+		 * data_offset (needed for out of space reshaping)
+		 *   This field shows the data offset into the data
+		 *   image LV where the first stripes data starts.
+		 *
+		 * We keep data_offset equal on all raid disks of the set,
+		 * so retrieving it from the first raid disk is sufficient.
+		 */
+		DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset);
+		break;
 
-			if (rs->dev[i].data_dev)
-				DMEMIT(" %s", rs->dev[i].data_dev->name);
-			else
-				DMEMIT(" -");
-		}
+	case STATUSTYPE_TABLE:
+		/* Report the table line string you would use to construct this raid set */
+
+		/* Calculate raid parameter count */
+		for (i = 0; i < rs->raid_disks; i++)
+			if (test_bit(WriteMostly, &rs->dev[i].rdev.flags))
+				write_mostly_params += 2;
+		rebuild_disks = memweight(rs->rebuild_disks, DISKS_ARRAY_ELEMS * sizeof(*rs->rebuild_disks));
+		raid_param_cnt += rebuild_disks * 2 +
+				  write_mostly_params +
+				  hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) +
+				  hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2;
+		/* Emit table line */
+		DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors);
+		if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags))
+			DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT),
+					 raid10_md_layout_to_format(mddev->layout));
+		if (test_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags))
+			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES),
+					 raid10_md_layout_to_copies(mddev->layout));
+		if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
+			DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC));
+		if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
+			DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC));
+		if (test_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags))
+			DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE),
+					   (unsigned long long) to_sector(mddev->bitmap_info.chunksize));
+		if (test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags))
+			DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET),
+					   (unsigned long long) rs->data_offset);
+		if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags))
+			DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP),
+					  mddev->bitmap_info.daemon_sleep);
+		if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags))
+			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS),
+					 max(rs->delta_disks, mddev->delta_disks));
+		if (test_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags))
+			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE),
+					 max_nr_stripes);
+		if (rebuild_disks)
+			for (i = 0; i < rs->raid_disks; i++)
+				if (test_bit(rs->dev[i].rdev.raid_disk, (void *) rs->rebuild_disks))
+					DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD),
+							 rs->dev[i].rdev.raid_disk);
+		if (write_mostly_params)
+			for (i = 0; i < rs->raid_disks; i++)
+				if (test_bit(WriteMostly, &rs->dev[i].rdev.flags))
+					DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY),
+					       rs->dev[i].rdev.raid_disk);
+		if (test_bit(__CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags))
+			DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_WRITE_BEHIND),
+					  mddev->bitmap_info.max_write_behind);
+		if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags))
+			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE),
+					 mddev->sync_speed_max);
+		if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags))
+			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE),
+					 mddev->sync_speed_min);
+		DMEMIT(" %d", rs->raid_disks);
+		for (i = 0; i < rs->raid_disks; i++)
+			DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev),
+					 __get_dev_name(rs->dev[i].data_dev));
 	}
 }
 
-static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
+static int raid_message(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct raid_set *rs = ti->private;
 	struct mddev *mddev = &rs->md;
 
-	if (!strcasecmp(argv[0], "reshape")) {
-		DMERR("Reshape not supported.");
-		return -EINVAL;
-	}
-
 	if (!mddev->pers || !mddev->pers->sync_request)
 		return -EINVAL;
 
@@ -1571,11 +3321,10 @@ static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
 		   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
 		return -EBUSY;
 	else if (!strcasecmp(argv[0], "resync"))
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	else if (!strcasecmp(argv[0], "recover")) {
+		; /* MD_RECOVERY_NEEDED set below */
+	else if (!strcasecmp(argv[0], "recover"))
 		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	} else {
+	else {
 		if (!strcasecmp(argv[0], "check"))
 			set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
 		else if (!!strcasecmp(argv[0], "repair"))
@@ -1588,11 +3337,11 @@ static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
 		 * canceling read-auto mode
 		 */
 		mddev->ro = 0;
-		if (!mddev->suspended)
+		if (!mddev->suspended && mddev->sync_thread)
 			md_wakeup_thread(mddev->sync_thread);
 	}
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	if (!mddev->suspended)
+	if (!mddev->suspended && mddev->thread)
 		md_wakeup_thread(mddev->thread);
 
 	return 0;
@@ -1602,28 +3351,27 @@ static int raid_iterate_devices(struct dm_target *ti,
 				iterate_devices_callout_fn fn, void *data)
 {
 	struct raid_set *rs = ti->private;
-	unsigned i;
-	int ret = 0;
+	unsigned int i;
+	int r = 0;
 
-	for (i = 0; !ret && i < rs->md.raid_disks; i++)
+	for (i = 0; !r && i < rs->md.raid_disks; i++)
 		if (rs->dev[i].data_dev)
-			ret = fn(ti,
+			r = fn(ti,
 				 rs->dev[i].data_dev,
 				 0, /* No offset on data devs */
 				 rs->md.dev_sectors,
 				 data);
 
-	return ret;
+	return r;
 }
 
 static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct raid_set *rs = ti->private;
-	unsigned chunk_size = rs->md.chunk_sectors << 9;
-	struct r5conf *conf = rs->md.private;
+	unsigned int chunk_size = to_bytes(rs->md.chunk_sectors);
 
 	blk_limits_io_min(limits, chunk_size);
-	blk_limits_io_opt(limits, chunk_size * (conf->raid_disks - conf->max_degraded));
+	blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
 }
 
 static void raid_presuspend(struct dm_target *ti)
@@ -1637,7 +3385,11 @@ static void raid_postsuspend(struct dm_target *ti)
 {
 	struct raid_set *rs = ti->private;
 
-	mddev_suspend(&rs->md);
+	if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
+		if (!rs->md.suspended)
+			mddev_suspend(&rs->md);
+		rs->md.ro = 1;
+	}
 }
 
 static void attempt_restore_of_faulty_devices(struct raid_set *rs)
@@ -1651,7 +3403,8 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
 	for (i = 0; i < rs->md.raid_disks; i++) {
 		r = &rs->dev[i].rdev;
 		if (test_bit(Faulty, &r->flags) && r->sb_page &&
-		    sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) {
+		    sync_page_io(r, 0, r->sb_size, r->sb_page,
+				 REQ_OP_READ, 0, true)) {
 			DMINFO("Faulty %s device #%d has readable super block."
 			       "  Attempting to revive it.",
 			       rs->raid_type->name, i);
@@ -1660,7 +3413,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
 			 * Faulty bit may be set, but sometimes the array can
 			 * be suspended before the personalities can respond
 			 * by removing the device from the array (i.e. calling
-			 * 'hot_remove_disk').  If they haven't yet removed
+			 * 'hot_remove_disk').	If they haven't yet removed
 			 * the failed device, its 'raid_disk' number will be
 			 * '>= 0' - meaning we must call this function
 			 * ourselves.
@@ -1696,34 +3449,192 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
 	}
 }
 
-static void raid_resume(struct dm_target *ti)
+static int __load_dirty_region_bitmap(struct raid_set *rs)
 {
-	struct raid_set *rs = ti->private;
+	int r = 0;
+
+	/* Try loading the bitmap unless "raid0", which does not have one */
+	if (!rs_is_raid0(rs) &&
+	    !test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) {
+		r = bitmap_load(&rs->md);
+		if (r)
+			DMERR("Failed to load bitmap");
+	}
 
-	if (rs->raid_type->level) {
-		set_bit(MD_CHANGE_DEVS, &rs->md.flags);
+	return r;
+}
 
-		if (!rs->bitmap_loaded) {
-			bitmap_load(&rs->md);
-			rs->bitmap_loaded = 1;
-		} else {
-			/*
-			 * A secondary resume while the device is active.
-			 * Take this opportunity to check whether any failed
-			 * devices are reachable again.
-			 */
-			attempt_restore_of_faulty_devices(rs);
+/* Enforce updating all superblocks */
+static void rs_update_sbs(struct raid_set *rs)
+{
+	struct mddev *mddev = &rs->md;
+	int ro = mddev->ro;
+
+	set_bit(MD_CHANGE_DEVS, &mddev->flags);
+	mddev->ro = 0;
+	md_update_sb(mddev, 1);
+	mddev->ro = ro;
+}
+
+/*
+ * Reshape changes raid algorithm of @rs to new one within personality
+ * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes
+ * disks from a raid set thus growing/shrinking it or resizes the set
+ *
+ * Call mddev_lock_nointr() before!
+ */
+static int rs_start_reshape(struct raid_set *rs)
+{
+	int r;
+	struct mddev *mddev = &rs->md;
+	struct md_personality *pers = mddev->pers;
+
+	r = rs_setup_reshape(rs);
+	if (r)
+		return r;
+
+	/* Need to be resumed to be able to start reshape, recovery is frozen until raid_resume() though */
+	if (mddev->suspended)
+		mddev_resume(mddev);
+
+	/*
+	 * Check any reshape constraints enforced by the personalility
+	 *
+	 * May as well already kick the reshape off so that * pers->start_reshape() becomes optional.
+	 */
+	r = pers->check_reshape(mddev);
+	if (r) {
+		rs->ti->error = "pers->check_reshape() failed";
+		return r;
+	}
+
+	/*
+	 * Personality may not provide start reshape method in which
+	 * case check_reshape above has already covered everything
+	 */
+	if (pers->start_reshape) {
+		r = pers->start_reshape(mddev);
+		if (r) {
+			rs->ti->error = "pers->start_reshape() failed";
+			return r;
 		}
+	}
+
+	/* Suspend because a resume will happen in raid_resume() */
+	if (!mddev->suspended)
+		mddev_suspend(mddev);
+
+	/*
+	 * Now reshape got set up, update superblocks to
+	 * reflect the fact so that a table reload will
+	 * access proper superblock content in the ctr.
+	 */
+	rs_update_sbs(rs);
+
+	return 0;
+}
+
+static int raid_preresume(struct dm_target *ti)
+{
+	int r;
+	struct raid_set *rs = ti->private;
+	struct mddev *mddev = &rs->md;
+
+	/* This is a resume after a suspend of the set -> it's already started */
+	if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
+		return 0;
+
+	/*
+	 * The superblocks need to be updated on disk if the
+	 * array is new or new devices got added (thus zeroed
+	 * out by userspace) or __load_dirty_region_bitmap
+	 * will overwrite them in core with old data or fail.
+	 */
+	if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags))
+		rs_update_sbs(rs);
+
+	/*
+	 * Disable/enable discard support on raid set after any
+	 * conversion, because devices can have been added
+	 */
+	configure_discard_support(rs);
+
+	/* Load the bitmap from disk unless raid0 */
+	r = __load_dirty_region_bitmap(rs);
+	if (r)
+		return r;
+
+	/* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
+	if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) &&
+	    mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
+		r = bitmap_resize(mddev->bitmap, mddev->dev_sectors,
+				  to_bytes(rs->requested_bitmap_chunk_sectors), 0);
+		if (r)
+			DMERR("Failed to resize bitmap");
+	}
+
+	/* Check for any resize/reshape on @rs and adjust/initiate */
+	/* Be prepared for mddev_resume() in raid_resume() */
+	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
+		set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+		mddev->resync_min = mddev->recovery_cp;
+	}
 
-		clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
+	rs_set_capacity(rs);
+
+	/* Check for any reshape request unless new raid set */
+	if (test_and_clear_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
+		/* Initiate a reshape. */
+		mddev_lock_nointr(mddev);
+		r = rs_start_reshape(rs);
+		mddev_unlock(mddev);
+		if (r)
+			DMWARN("Failed to check/start reshape, continuing without change");
+		r = 0;
 	}
 
-	mddev_resume(&rs->md);
+	return r;
+}
+
+static void raid_resume(struct dm_target *ti)
+{
+	struct raid_set *rs = ti->private;
+	struct mddev *mddev = &rs->md;
+
+	if (test_and_set_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
+		/*
+		 * A secondary resume while the device is active.
+		 * Take this opportunity to check whether any failed
+		 * devices are reachable again.
+		 */
+		attempt_restore_of_faulty_devices(rs);
+	} else {
+		mddev->ro = 0;
+		mddev->in_sync = 0;
+
+		/*
+		 * When passing in flags to the ctr, we expect userspace
+		 * to reset them because they made it to the superblocks
+		 * and reload the mapping anyway.
+		 *
+		 * -> only unfreeze recovery in case of a table reload or
+		 *    we'll have a bogus recovery/reshape position
+		 *    retrieved from the superblock by the ctr because
+		 *    the ongoing recovery/reshape will change it after read.
+		 */
+		if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
+			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+		if (mddev->suspended)
+			mddev_resume(mddev);
+	}
 }
 
 static struct target_type raid_target = {
 	.name = "raid",
-	.version = {1, 8, 0},
+	.version = {1, 9, 0},
 	.module = THIS_MODULE,
 	.ctr = raid_ctr,
 	.dtr = raid_dtr,
@@ -1734,6 +3645,7 @@ static struct target_type raid_target = {
 	.io_hints = raid_io_hints,
 	.presuspend = raid_presuspend,
 	.postsuspend = raid_postsuspend,
+	.preresume = raid_preresume,
 	.resume = raid_resume,
 };
 
@@ -1758,11 +3670,13 @@ module_param(devices_handle_discard_safely, bool, 0644);
 MODULE_PARM_DESC(devices_handle_discard_safely,
 		 "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions");
 
-MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target");
+MODULE_DESCRIPTION(DM_NAME " raid0/1/10/4/5/6 target");
+MODULE_ALIAS("dm-raid0");
 MODULE_ALIAS("dm-raid1");
 MODULE_ALIAS("dm-raid10");
 MODULE_ALIAS("dm-raid4");
 MODULE_ALIAS("dm-raid5");
 MODULE_ALIAS("dm-raid6");
 MODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>");
+MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index b3ccf1e0d4f2..dac55b254a09 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -260,7 +260,8 @@ static int mirror_flush(struct dm_target *ti)
 	struct dm_io_region io[ms->nr_mirrors];
 	struct mirror *m;
 	struct dm_io_request io_req = {
-		.bi_rw = WRITE_FLUSH,
+		.bi_op = REQ_OP_WRITE,
+		.bi_op_flags = WRITE_FLUSH,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = ms->io_client,
@@ -527,7 +528,7 @@ static void read_callback(unsigned long error, void *context)
 		DMWARN_LIMIT("Read failure on mirror device %s.  "
 			     "Trying alternative device.",
 			     m->dev->name);
-		queue_bio(m->ms, bio, bio_rw(bio));
+		queue_bio(m->ms, bio, bio_data_dir(bio));
 		return;
 	}
 
@@ -541,7 +542,8 @@ static void read_async_bio(struct mirror *m, struct bio *bio)
 {
 	struct dm_io_region io;
 	struct dm_io_request io_req = {
-		.bi_rw = READ,
+		.bi_op = REQ_OP_READ,
+		.bi_op_flags = 0,
 		.mem.type = DM_IO_BIO,
 		.mem.ptr.bio = bio,
 		.notify.fn = read_callback,
@@ -624,7 +626,7 @@ static void write_callback(unsigned long error, void *context)
 	 * If the bio is discard, return an error, but do not
 	 * degrade the array.
 	 */
-	if (bio->bi_rw & REQ_DISCARD) {
+	if (bio_op(bio) == REQ_OP_DISCARD) {
 		bio->bi_error = -EOPNOTSUPP;
 		bio_endio(bio);
 		return;
@@ -654,7 +656,8 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
 	struct dm_io_region io[ms->nr_mirrors], *dest = io;
 	struct mirror *m;
 	struct dm_io_request io_req = {
-		.bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
+		.bi_op = REQ_OP_WRITE,
+		.bi_op_flags = bio->bi_rw & WRITE_FLUSH_FUA,
 		.mem.type = DM_IO_BIO,
 		.mem.ptr.bio = bio,
 		.notify.fn = write_callback,
@@ -662,8 +665,8 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
 		.client = ms->io_client,
 	};
 
-	if (bio->bi_rw & REQ_DISCARD) {
-		io_req.bi_rw |= REQ_DISCARD;
+	if (bio_op(bio) == REQ_OP_DISCARD) {
+		io_req.bi_op = REQ_OP_DISCARD;
 		io_req.mem.type = DM_IO_KMEM;
 		io_req.mem.ptr.addr = NULL;
 	}
@@ -701,8 +704,8 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 	bio_list_init(&requeue);
 
 	while ((bio = bio_list_pop(writes))) {
-		if ((bio->bi_rw & REQ_FLUSH) ||
-		    (bio->bi_rw & REQ_DISCARD)) {
+		if ((bio->bi_rw & REQ_PREFLUSH) ||
+		    (bio_op(bio) == REQ_OP_DISCARD)) {
 			bio_list_add(&sync, bio);
 			continue;
 		}
@@ -1190,7 +1193,7 @@ static void mirror_dtr(struct dm_target *ti)
  */
 static int mirror_map(struct dm_target *ti, struct bio *bio)
 {
-	int r, rw = bio_rw(bio);
+	int r, rw = bio_data_dir(bio);
 	struct mirror *m;
 	struct mirror_set *ms = ti->private;
 	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
@@ -1214,7 +1217,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	 * If region is not in-sync queue the bio.
 	 */
 	if (!r || (r == -EWOULDBLOCK)) {
-		if (rw == READA)
+		if (bio->bi_rw & REQ_RAHEAD)
 			return -EWOULDBLOCK;
 
 		queue_bio(ms, bio, rw);
@@ -1239,7 +1242,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 
 static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
-	int rw = bio_rw(bio);
+	int rw = bio_data_dir(bio);
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
 	struct mirror *m = NULL;
 	struct dm_bio_details *bd = NULL;
@@ -1250,7 +1253,8 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 	 * We need to dec pending if this was a write.
 	 */
 	if (rw == WRITE) {
-		if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
+		if (!(bio->bi_rw & REQ_PREFLUSH) &&
+		    bio_op(bio) != REQ_OP_DISCARD)
 			dm_rh_dec(ms->rh, bio_record->write_region);
 		return error;
 	}
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 74cb7b991d41..b11813431f31 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -398,12 +398,12 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
 	region_t region = dm_rh_bio_to_region(rh, bio);
 	int recovering = 0;
 
-	if (bio->bi_rw & REQ_FLUSH) {
+	if (bio->bi_rw & REQ_PREFLUSH) {
 		rh->flush_failure = 1;
 		return;
 	}
 
-	if (bio->bi_rw & REQ_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD)
 		return;
 
 	/* We must inform the log that the sync count has changed. */
@@ -526,7 +526,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
 	struct bio *bio;
 
 	for (bio = bios->head; bio; bio = bio->bi_next) {
-		if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
+		if (bio->bi_rw & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)
 			continue;
 		rh_inc(rh, dm_rh_bio_to_region(rh, bio));
 	}
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
new file mode 100644
index 000000000000..7a9661868496
--- /dev/null
+++ b/drivers/md/dm-rq.c
@@ -0,0 +1,970 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-core.h"
+#include "dm-rq.h"
+
+#include <linux/elevator.h> /* for rq_end_sector() */
+#include <linux/blk-mq.h>
+
+#define DM_MSG_PREFIX "core-rq"
+
+#define DM_MQ_NR_HW_QUEUES 1
+#define DM_MQ_QUEUE_DEPTH 2048
+static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
+static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
+
+/*
+ * Request-based DM's mempools' reserved IOs set by the user.
+ */
+#define RESERVED_REQUEST_BASED_IOS	256
+static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
+
+#ifdef CONFIG_DM_MQ_DEFAULT
+static bool use_blk_mq = true;
+#else
+static bool use_blk_mq = false;
+#endif
+
+bool dm_use_blk_mq_default(void)
+{
+	return use_blk_mq;
+}
+
+bool dm_use_blk_mq(struct mapped_device *md)
+{
+	return md->use_blk_mq;
+}
+EXPORT_SYMBOL_GPL(dm_use_blk_mq);
+
+unsigned dm_get_reserved_rq_based_ios(void)
+{
+	return __dm_get_module_param(&reserved_rq_based_ios,
+				     RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
+}
+EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
+
+static unsigned dm_get_blk_mq_nr_hw_queues(void)
+{
+	return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
+}
+
+static unsigned dm_get_blk_mq_queue_depth(void)
+{
+	return __dm_get_module_param(&dm_mq_queue_depth,
+				     DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
+}
+
+int dm_request_based(struct mapped_device *md)
+{
+	return blk_queue_stackable(md->queue);
+}
+
+static void dm_old_start_queue(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (blk_queue_stopped(q))
+		blk_start_queue(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+void dm_start_queue(struct request_queue *q)
+{
+	if (!q->mq_ops)
+		dm_old_start_queue(q);
+	else {
+		blk_mq_start_stopped_hw_queues(q, true);
+		blk_mq_kick_requeue_list(q);
+	}
+}
+
+static void dm_old_stop_queue(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (blk_queue_stopped(q)) {
+		spin_unlock_irqrestore(q->queue_lock, flags);
+		return;
+	}
+
+	blk_stop_queue(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+void dm_stop_queue(struct request_queue *q)
+{
+	if (!q->mq_ops)
+		dm_old_stop_queue(q);
+	else
+		blk_mq_stop_hw_queues(q);
+}
+
+static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
+						gfp_t gfp_mask)
+{
+	return mempool_alloc(md->io_pool, gfp_mask);
+}
+
+static void free_old_rq_tio(struct dm_rq_target_io *tio)
+{
+	mempool_free(tio, tio->md->io_pool);
+}
+
+static struct request *alloc_old_clone_request(struct mapped_device *md,
+					       gfp_t gfp_mask)
+{
+	return mempool_alloc(md->rq_pool, gfp_mask);
+}
+
+static void free_old_clone_request(struct mapped_device *md, struct request *rq)
+{
+	mempool_free(rq, md->rq_pool);
+}
+
+/*
+ * Partial completion handling for request-based dm
+ */
+static void end_clone_bio(struct bio *clone)
+{
+	struct dm_rq_clone_bio_info *info =
+		container_of(clone, struct dm_rq_clone_bio_info, clone);
+	struct dm_rq_target_io *tio = info->tio;
+	struct bio *bio = info->orig;
+	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
+	int error = clone->bi_error;
+
+	bio_put(clone);
+
+	if (tio->error)
+		/*
+		 * An error has already been detected on the request.
+		 * Once error occurred, just let clone->end_io() handle
+		 * the remainder.
+		 */
+		return;
+	else if (error) {
+		/*
+		 * Don't notice the error to the upper layer yet.
+		 * The error handling decision is made by the target driver,
+		 * when the request is completed.
+		 */
+		tio->error = error;
+		return;
+	}
+
+	/*
+	 * I/O for the bio successfully completed.
+	 * Notice the data completion to the upper layer.
+	 */
+
+	/*
+	 * bios are processed from the head of the list.
+	 * So the completing bio should always be rq->bio.
+	 * If it's not, something wrong is happening.
+	 */
+	if (tio->orig->bio != bio)
+		DMERR("bio completion is going in the middle of the request");
+
+	/*
+	 * Update the original request.
+	 * Do not use blk_end_request() here, because it may complete
+	 * the original request before the clone, and break the ordering.
+	 */
+	blk_update_request(tio->orig, 0, nr_bytes);
+}
+
+static struct dm_rq_target_io *tio_from_request(struct request *rq)
+{
+	return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
+}
+
+static void rq_end_stats(struct mapped_device *md, struct request *orig)
+{
+	if (unlikely(dm_stats_used(&md->stats))) {
+		struct dm_rq_target_io *tio = tio_from_request(orig);
+		tio->duration_jiffies = jiffies - tio->duration_jiffies;
+		dm_stats_account_io(&md->stats, rq_data_dir(orig),
+				    blk_rq_pos(orig), tio->n_sectors, true,
+				    tio->duration_jiffies, &tio->stats_aux);
+	}
+}
+
+/*
+ * Don't touch any member of the md after calling this function because
+ * the md may be freed in dm_put() at the end of this function.
+ * Or do dm_get() before calling this function and dm_put() later.
+ */
+static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
+{
+	atomic_dec(&md->pending[rw]);
+
+	/* nudge anyone waiting on suspend queue */
+	if (!md_in_flight(md))
+		wake_up(&md->wait);
+
+	/*
+	 * Run this off this callpath, as drivers could invoke end_io while
+	 * inside their request_fn (and holding the queue lock). Calling
+	 * back into ->request_fn() could deadlock attempting to grab the
+	 * queue lock again.
+	 */
+	if (!md->queue->mq_ops && run_queue)
+		blk_run_queue_async(md->queue);
+
+	/*
+	 * dm_put() must be at the end of this function. See the comment above
+	 */
+	dm_put(md);
+}
+
+static void free_rq_clone(struct request *clone)
+{
+	struct dm_rq_target_io *tio = clone->end_io_data;
+	struct mapped_device *md = tio->md;
+
+	blk_rq_unprep_clone(clone);
+
+	/*
+	 * It is possible for a clone_old_rq() allocated clone to
+	 * get passed in -- it may not yet have a request_queue.
+	 * This is known to occur if the error target replaces
+	 * a multipath target that has a request_fn queue stacked
+	 * on blk-mq queue(s).
+	 */
+	if (clone->q && clone->q->mq_ops)
+		/* stacked on blk-mq queue(s) */
+		tio->ti->type->release_clone_rq(clone);
+	else if (!md->queue->mq_ops)
+		/* request_fn queue stacked on request_fn queue(s) */
+		free_old_clone_request(md, clone);
+
+	if (!md->queue->mq_ops)
+		free_old_rq_tio(tio);
+}
+
+/*
+ * Complete the clone and the original request.
+ * Must be called without clone's queue lock held,
+ * see end_clone_request() for more details.
+ */
+static void dm_end_request(struct request *clone, int error)
+{
+	int rw = rq_data_dir(clone);
+	struct dm_rq_target_io *tio = clone->end_io_data;
+	struct mapped_device *md = tio->md;
+	struct request *rq = tio->orig;
+
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
+		rq->errors = clone->errors;
+		rq->resid_len = clone->resid_len;
+
+		if (rq->sense)
+			/*
+			 * We are using the sense buffer of the original
+			 * request.
+			 * So setting the length of the sense data is enough.
+			 */
+			rq->sense_len = clone->sense_len;
+	}
+
+	free_rq_clone(clone);
+	rq_end_stats(md, rq);
+	if (!rq->q->mq_ops)
+		blk_end_request_all(rq, error);
+	else
+		blk_mq_end_request(rq, error);
+	rq_completed(md, rw, true);
+}
+
+static void dm_unprep_request(struct request *rq)
+{
+	struct dm_rq_target_io *tio = tio_from_request(rq);
+	struct request *clone = tio->clone;
+
+	if (!rq->q->mq_ops) {
+		rq->special = NULL;
+		rq->cmd_flags &= ~REQ_DONTPREP;
+	}
+
+	if (clone)
+		free_rq_clone(clone);
+	else if (!tio->md->queue->mq_ops)
+		free_old_rq_tio(tio);
+}
+
+/*
+ * Requeue the original request of a clone.
+ */
+static void dm_old_requeue_request(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	blk_requeue_request(q, rq);
+	blk_run_queue_async(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void dm_mq_requeue_request(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+	unsigned long flags;
+
+	blk_mq_requeue_request(rq);
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!blk_queue_stopped(q))
+		blk_mq_kick_requeue_list(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void dm_requeue_original_request(struct mapped_device *md,
+					struct request *rq)
+{
+	int rw = rq_data_dir(rq);
+
+	rq_end_stats(md, rq);
+	dm_unprep_request(rq);
+
+	if (!rq->q->mq_ops)
+		dm_old_requeue_request(rq);
+	else
+		dm_mq_requeue_request(rq);
+
+	rq_completed(md, rw, false);
+}
+
+static void dm_done(struct request *clone, int error, bool mapped)
+{
+	int r = error;
+	struct dm_rq_target_io *tio = clone->end_io_data;
+	dm_request_endio_fn rq_end_io = NULL;
+
+	if (tio->ti) {
+		rq_end_io = tio->ti->type->rq_end_io;
+
+		if (mapped && rq_end_io)
+			r = rq_end_io(tio->ti, clone, error, &tio->info);
+	}
+
+	if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
+		     !clone->q->limits.max_write_same_sectors))
+		disable_write_same(tio->md);
+
+	if (r <= 0)
+		/* The target wants to complete the I/O */
+		dm_end_request(clone, r);
+	else if (r == DM_ENDIO_INCOMPLETE)
+		/* The target will handle the I/O */
+		return;
+	else if (r == DM_ENDIO_REQUEUE)
+		/* The target wants to requeue the I/O */
+		dm_requeue_original_request(tio->md, tio->orig);
+	else {
+		DMWARN("unimplemented target endio return value: %d", r);
+		BUG();
+	}
+}
+
+/*
+ * Request completion handler for request-based dm
+ */
+static void dm_softirq_done(struct request *rq)
+{
+	bool mapped = true;
+	struct dm_rq_target_io *tio = tio_from_request(rq);
+	struct request *clone = tio->clone;
+	int rw;
+
+	if (!clone) {
+		rq_end_stats(tio->md, rq);
+		rw = rq_data_dir(rq);
+		if (!rq->q->mq_ops) {
+			blk_end_request_all(rq, tio->error);
+			rq_completed(tio->md, rw, false);
+			free_old_rq_tio(tio);
+		} else {
+			blk_mq_end_request(rq, tio->error);
+			rq_completed(tio->md, rw, false);
+		}
+		return;
+	}
+
+	if (rq->cmd_flags & REQ_FAILED)
+		mapped = false;
+
+	dm_done(clone, tio->error, mapped);
+}
+
+/*
+ * Complete the clone and the original request with the error status
+ * through softirq context.
+ */
+static void dm_complete_request(struct request *rq, int error)
+{
+	struct dm_rq_target_io *tio = tio_from_request(rq);
+
+	tio->error = error;
+	if (!rq->q->mq_ops)
+		blk_complete_request(rq);
+	else
+		blk_mq_complete_request(rq, error);
+}
+
+/*
+ * Complete the not-mapped clone and the original request with the error status
+ * through softirq context.
+ * Target's rq_end_io() function isn't called.
+ * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
+ */
+static void dm_kill_unmapped_request(struct request *rq, int error)
+{
+	rq->cmd_flags |= REQ_FAILED;
+	dm_complete_request(rq, error);
+}
+
+/*
+ * Called with the clone's queue lock held (in the case of .request_fn)
+ */
+static void end_clone_request(struct request *clone, int error)
+{
+	struct dm_rq_target_io *tio = clone->end_io_data;
+
+	if (!clone->q->mq_ops) {
+		/*
+		 * For just cleaning up the information of the queue in which
+		 * the clone was dispatched.
+		 * The clone is *NOT* freed actually here because it is alloced
+		 * from dm own mempool (REQ_ALLOCED isn't set).
+		 */
+		__blk_put_request(clone->q, clone);
+	}
+
+	/*
+	 * Actual request completion is done in a softirq context which doesn't
+	 * hold the clone's queue lock.  Otherwise, deadlock could occur because:
+	 *     - another request may be submitted by the upper level driver
+	 *       of the stacking during the completion
+	 *     - the submission which requires queue lock may be done
+	 *       against this clone's queue
+	 */
+	dm_complete_request(tio->orig, error);
+}
+
+static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
+{
+	int r;
+
+	if (blk_queue_io_stat(clone->q))
+		clone->cmd_flags |= REQ_IO_STAT;
+
+	clone->start_time = jiffies;
+	r = blk_insert_cloned_request(clone->q, clone);
+	if (r)
+		/* must complete clone in terms of original request */
+		dm_complete_request(rq, r);
+}
+
+static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
+				 void *data)
+{
+	struct dm_rq_target_io *tio = data;
+	struct dm_rq_clone_bio_info *info =
+		container_of(bio, struct dm_rq_clone_bio_info, clone);
+
+	info->orig = bio_orig;
+	info->tio = tio;
+	bio->bi_end_io = end_clone_bio;
+
+	return 0;
+}
+
+static int setup_clone(struct request *clone, struct request *rq,
+		       struct dm_rq_target_io *tio, gfp_t gfp_mask)
+{
+	int r;
+
+	r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
+			      dm_rq_bio_constructor, tio);
+	if (r)
+		return r;
+
+	clone->cmd = rq->cmd;
+	clone->cmd_len = rq->cmd_len;
+	clone->sense = rq->sense;
+	clone->end_io = end_clone_request;
+	clone->end_io_data = tio;
+
+	tio->clone = clone;
+
+	return 0;
+}
+
+static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
+				    struct dm_rq_target_io *tio, gfp_t gfp_mask)
+{
+	/*
+	 * Create clone for use with .request_fn request_queue
+	 */
+	struct request *clone;
+
+	clone = alloc_old_clone_request(md, gfp_mask);
+	if (!clone)
+		return NULL;
+
+	blk_rq_init(NULL, clone);
+	if (setup_clone(clone, rq, tio, gfp_mask)) {
+		/* -ENOMEM */
+		free_old_clone_request(md, clone);
+		return NULL;
+	}
+
+	return clone;
+}
+
+static void map_tio_request(struct kthread_work *work);
+
+static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
+		     struct mapped_device *md)
+{
+	tio->md = md;
+	tio->ti = NULL;
+	tio->clone = NULL;
+	tio->orig = rq;
+	tio->error = 0;
+	/*
+	 * Avoid initializing info for blk-mq; it passes
+	 * target-specific data through info.ptr
+	 * (see: dm_mq_init_request)
+	 */
+	if (!md->init_tio_pdu)
+		memset(&tio->info, 0, sizeof(tio->info));
+	if (md->kworker_task)
+		init_kthread_work(&tio->work, map_tio_request);
+}
+
+static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
+					       struct mapped_device *md,
+					       gfp_t gfp_mask)
+{
+	struct dm_rq_target_io *tio;
+	int srcu_idx;
+	struct dm_table *table;
+
+	tio = alloc_old_rq_tio(md, gfp_mask);
+	if (!tio)
+		return NULL;
+
+	init_tio(tio, rq, md);
+
+	table = dm_get_live_table(md, &srcu_idx);
+	/*
+	 * Must clone a request if this .request_fn DM device
+	 * is stacked on .request_fn device(s).
+	 */
+	if (!dm_table_all_blk_mq_devices(table)) {
+		if (!clone_old_rq(rq, md, tio, gfp_mask)) {
+			dm_put_live_table(md, srcu_idx);
+			free_old_rq_tio(tio);
+			return NULL;
+		}
+	}
+	dm_put_live_table(md, srcu_idx);
+
+	return tio;
+}
+
+/*
+ * Called with the queue lock held.
+ */
+static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
+{
+	struct mapped_device *md = q->queuedata;
+	struct dm_rq_target_io *tio;
+
+	if (unlikely(rq->special)) {
+		DMWARN("Already has something in rq->special.");
+		return BLKPREP_KILL;
+	}
+
+	tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
+	if (!tio)
+		return BLKPREP_DEFER;
+
+	rq->special = tio;
+	rq->cmd_flags |= REQ_DONTPREP;
+
+	return BLKPREP_OK;
+}
+
+/*
+ * Returns:
+ * 0                : the request has been processed
+ * DM_MAPIO_REQUEUE : the original request needs to be requeued
+ * < 0              : the request was completed due to failure
+ */
+static int map_request(struct dm_rq_target_io *tio, struct request *rq,
+		       struct mapped_device *md)
+{
+	int r;
+	struct dm_target *ti = tio->ti;
+	struct request *clone = NULL;
+
+	if (tio->clone) {
+		clone = tio->clone;
+		r = ti->type->map_rq(ti, clone, &tio->info);
+	} else {
+		r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
+		if (r < 0) {
+			/* The target wants to complete the I/O */
+			dm_kill_unmapped_request(rq, r);
+			return r;
+		}
+		if (r != DM_MAPIO_REMAPPED)
+			return r;
+		if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
+			/* -ENOMEM */
+			ti->type->release_clone_rq(clone);
+			return DM_MAPIO_REQUEUE;
+		}
+	}
+
+	switch (r) {
+	case DM_MAPIO_SUBMITTED:
+		/* The target has taken the I/O to submit by itself later */
+		break;
+	case DM_MAPIO_REMAPPED:
+		/* The target has remapped the I/O so dispatch it */
+		trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
+				     blk_rq_pos(rq));
+		dm_dispatch_clone_request(clone, rq);
+		break;
+	case DM_MAPIO_REQUEUE:
+		/* The target wants to requeue the I/O */
+		dm_requeue_original_request(md, tio->orig);
+		break;
+	default:
+		if (r > 0) {
+			DMWARN("unimplemented target map return value: %d", r);
+			BUG();
+		}
+
+		/* The target wants to complete the I/O */
+		dm_kill_unmapped_request(rq, r);
+		return r;
+	}
+
+	return 0;
+}
+
+static void dm_start_request(struct mapped_device *md, struct request *orig)
+{
+	if (!orig->q->mq_ops)
+		blk_start_request(orig);
+	else
+		blk_mq_start_request(orig);
+	atomic_inc(&md->pending[rq_data_dir(orig)]);
+
+	if (md->seq_rq_merge_deadline_usecs) {
+		md->last_rq_pos = rq_end_sector(orig);
+		md->last_rq_rw = rq_data_dir(orig);
+		md->last_rq_start_time = ktime_get();
+	}
+
+	if (unlikely(dm_stats_used(&md->stats))) {
+		struct dm_rq_target_io *tio = tio_from_request(orig);
+		tio->duration_jiffies = jiffies;
+		tio->n_sectors = blk_rq_sectors(orig);
+		dm_stats_account_io(&md->stats, rq_data_dir(orig),
+				    blk_rq_pos(orig), tio->n_sectors, false, 0,
+				    &tio->stats_aux);
+	}
+
+	/*
+	 * Hold the md reference here for the in-flight I/O.
+	 * We can't rely on the reference count by device opener,
+	 * because the device may be closed during the request completion
+	 * when all bios are completed.
+	 * See the comment in rq_completed() too.
+	 */
+	dm_get(md);
+}
+
+static void map_tio_request(struct kthread_work *work)
+{
+	struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
+	struct request *rq = tio->orig;
+	struct mapped_device *md = tio->md;
+
+	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
+		dm_requeue_original_request(md, rq);
+}
+
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
+{
+	return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
+}
+
+#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
+
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
+						     const char *buf, size_t count)
+{
+	unsigned deadline;
+
+	if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
+		return count;
+
+	if (kstrtouint(buf, 10, &deadline))
+		return -EINVAL;
+
+	if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
+		deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
+
+	md->seq_rq_merge_deadline_usecs = deadline;
+
+	return count;
+}
+
+static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
+{
+	ktime_t kt_deadline;
+
+	if (!md->seq_rq_merge_deadline_usecs)
+		return false;
+
+	kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
+	kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
+
+	return !ktime_after(ktime_get(), kt_deadline);
+}
+
+/*
+ * q->request_fn for old request-based dm.
+ * Called with the queue lock held.
+ */
+static void dm_old_request_fn(struct request_queue *q)
+{
+	struct mapped_device *md = q->queuedata;
+	struct dm_target *ti = md->immutable_target;
+	struct request *rq;
+	struct dm_rq_target_io *tio;
+	sector_t pos = 0;
+
+	if (unlikely(!ti)) {
+		int srcu_idx;
+		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+
+		ti = dm_table_find_target(map, pos);
+		dm_put_live_table(md, srcu_idx);
+	}
+
+	/*
+	 * For suspend, check blk_queue_stopped() and increment
+	 * ->pending within a single queue_lock not to increment the
+	 * number of in-flight I/Os after the queue is stopped in
+	 * dm_suspend().
+	 */
+	while (!blk_queue_stopped(q)) {
+		rq = blk_peek_request(q);
+		if (!rq)
+			return;
+
+		/* always use block 0 to find the target for flushes for now */
+		pos = 0;
+		if (req_op(rq) != REQ_OP_FLUSH)
+			pos = blk_rq_pos(rq);
+
+		if ((dm_old_request_peeked_before_merge_deadline(md) &&
+		     md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
+		     md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
+		    (ti->type->busy && ti->type->busy(ti))) {
+			blk_delay_queue(q, 10);
+			return;
+		}
+
+		dm_start_request(md, rq);
+
+		tio = tio_from_request(rq);
+		/* Establish tio->ti before queuing work (map_tio_request) */
+		tio->ti = ti;
+		queue_kthread_work(&md->kworker, &tio->work);
+		BUG_ON(!irqs_disabled());
+	}
+}
+
+/*
+ * Fully initialize a .request_fn request-based queue.
+ */
+int dm_old_init_request_queue(struct mapped_device *md)
+{
+	/* Fully initialize the queue */
+	if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL))
+		return -EINVAL;
+
+	/* disable dm_old_request_fn's merge heuristic by default */
+	md->seq_rq_merge_deadline_usecs = 0;
+
+	dm_init_normal_md_queue(md);
+	blk_queue_softirq_done(md->queue, dm_softirq_done);
+	blk_queue_prep_rq(md->queue, dm_old_prep_fn);
+
+	/* Initialize the request-based DM worker thread */
+	init_kthread_worker(&md->kworker);
+	md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
+				       "kdmwork-%s", dm_device_name(md));
+	if (IS_ERR(md->kworker_task))
+		return PTR_ERR(md->kworker_task);
+
+	elv_register_queue(md->queue);
+
+	return 0;
+}
+
+static int dm_mq_init_request(void *data, struct request *rq,
+		       unsigned int hctx_idx, unsigned int request_idx,
+		       unsigned int numa_node)
+{
+	struct mapped_device *md = data;
+	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+
+	/*
+	 * Must initialize md member of tio, otherwise it won't
+	 * be available in dm_mq_queue_rq.
+	 */
+	tio->md = md;
+
+	if (md->init_tio_pdu) {
+		/* target-specific per-io data is immediately after the tio */
+		tio->info.ptr = tio + 1;
+	}
+
+	return 0;
+}
+
+static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+			  const struct blk_mq_queue_data *bd)
+{
+	struct request *rq = bd->rq;
+	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+	struct mapped_device *md = tio->md;
+	struct dm_target *ti = md->immutable_target;
+
+	if (unlikely(!ti)) {
+		int srcu_idx;
+		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+
+		ti = dm_table_find_target(map, 0);
+		dm_put_live_table(md, srcu_idx);
+	}
+
+	if (ti->type->busy && ti->type->busy(ti))
+		return BLK_MQ_RQ_QUEUE_BUSY;
+
+	dm_start_request(md, rq);
+
+	/* Init tio using md established in .init_request */
+	init_tio(tio, rq, md);
+
+	/*
+	 * Establish tio->ti before calling map_request().
+	 */
+	tio->ti = ti;
+
+	/* Direct call is fine since .queue_rq allows allocations */
+	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+		/* Undo dm_start_request() before requeuing */
+		rq_end_stats(md, rq);
+		rq_completed(md, rq_data_dir(rq), false);
+		return BLK_MQ_RQ_QUEUE_BUSY;
+	}
+
+	return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static struct blk_mq_ops dm_mq_ops = {
+	.queue_rq = dm_mq_queue_rq,
+	.map_queue = blk_mq_map_queue,
+	.complete = dm_softirq_done,
+	.init_request = dm_mq_init_request,
+};
+
+int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
+{
+	struct request_queue *q;
+	struct dm_target *immutable_tgt;
+	int err;
+
+	if (!dm_table_all_blk_mq_devices(t)) {
+		DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
+		return -EINVAL;
+	}
+
+	md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
+	if (!md->tag_set)
+		return -ENOMEM;
+
+	md->tag_set->ops = &dm_mq_ops;
+	md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
+	md->tag_set->numa_node = md->numa_node_id;
+	md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
+	md->tag_set->driver_data = md;
+
+	md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
+	immutable_tgt = dm_table_get_immutable_target(t);
+	if (immutable_tgt && immutable_tgt->per_io_data_size) {
+		/* any target-specific per-io data is immediately after the tio */
+		md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
+		md->init_tio_pdu = true;
+	}
+
+	err = blk_mq_alloc_tag_set(md->tag_set);
+	if (err)
+		goto out_kfree_tag_set;
+
+	q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
+	if (IS_ERR(q)) {
+		err = PTR_ERR(q);
+		goto out_tag_set;
+	}
+	dm_init_md_queue(md);
+
+	/* backfill 'mq' sysfs registration normally done in blk_register_queue */
+	blk_mq_register_disk(md->disk);
+
+	return 0;
+
+out_tag_set:
+	blk_mq_free_tag_set(md->tag_set);
+out_kfree_tag_set:
+	kfree(md->tag_set);
+
+	return err;
+}
+
+void dm_mq_cleanup_mapped_device(struct mapped_device *md)
+{
+	if (md->tag_set) {
+		blk_mq_free_tag_set(md->tag_set);
+		kfree(md->tag_set);
+	}
+}
+
+module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
+
+module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
+
+module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
+
+module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
new file mode 100644
index 000000000000..9e6f0a3773d4
--- /dev/null
+++ b/drivers/md/dm-rq.h
@@ -0,0 +1,64 @@
+/*
+ * Internal header file for device mapper
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DM_RQ_INTERNAL_H
+#define DM_RQ_INTERNAL_H
+
+#include <linux/bio.h>
+#include <linux/kthread.h>
+
+#include "dm-stats.h"
+
+struct mapped_device;
+
+/*
+ * One of these is allocated per request.
+ */
+struct dm_rq_target_io {
+	struct mapped_device *md;
+	struct dm_target *ti;
+	struct request *orig, *clone;
+	struct kthread_work work;
+	int error;
+	union map_info info;
+	struct dm_stats_aux stats_aux;
+	unsigned long duration_jiffies;
+	unsigned n_sectors;
+};
+
+/*
+ * For request-based dm - the bio clones we allocate are embedded in these
+ * structs.
+ *
+ * We allocate these with bio_alloc_bioset, using the front_pad parameter when
+ * the bioset is created - this means the bio has to come at the end of the
+ * struct.
+ */
+struct dm_rq_clone_bio_info {
+	struct bio *orig;
+	struct dm_rq_target_io *tio;
+	struct bio clone;
+};
+
+bool dm_use_blk_mq_default(void);
+bool dm_use_blk_mq(struct mapped_device *md);
+
+int dm_old_init_request_queue(struct mapped_device *md);
+int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
+void dm_mq_cleanup_mapped_device(struct mapped_device *md);
+
+void dm_start_queue(struct request_queue *q);
+void dm_stop_queue(struct request_queue *q);
+
+unsigned dm_get_reserved_rq_based_ios(void);
+
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
+						     const char *buf, size_t count);
+
+#endif
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 4d3909393f2c..b8cf956b577b 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -226,8 +226,8 @@ static void do_metadata(struct work_struct *work)
 /*
  * Read or write a chunk aligned and sized block of data from a device.
  */
-static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
-		    int metadata)
+static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
+		    int op_flags, int metadata)
 {
 	struct dm_io_region where = {
 		.bdev = dm_snap_cow(ps->store->snap)->bdev,
@@ -235,7 +235,8 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
 		.count = ps->store->chunk_size,
 	};
 	struct dm_io_request io_req = {
-		.bi_rw = rw,
+		.bi_op = op,
+		.bi_op_flags = op_flags,
 		.mem.type = DM_IO_VMA,
 		.mem.ptr.vma = area,
 		.client = ps->io_client,
@@ -281,14 +282,14 @@ static void skip_metadata(struct pstore *ps)
  * Read or write a metadata area.  Remembering to skip the first
  * chunk which holds the header.
  */
-static int area_io(struct pstore *ps, int rw)
+static int area_io(struct pstore *ps, int op, int op_flags)
 {
 	int r;
 	chunk_t chunk;
 
 	chunk = area_location(ps, ps->current_area);
 
-	r = chunk_io(ps, ps->area, chunk, rw, 0);
+	r = chunk_io(ps, ps->area, chunk, op, op_flags, 0);
 	if (r)
 		return r;
 
@@ -302,7 +303,8 @@ static void zero_memory_area(struct pstore *ps)
 
 static int zero_disk_area(struct pstore *ps, chunk_t area)
 {
-	return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
+	return chunk_io(ps, ps->zero_area, area_location(ps, area),
+			REQ_OP_WRITE, 0, 0);
 }
 
 static int read_header(struct pstore *ps, int *new_snapshot)
@@ -334,7 +336,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	if (r)
 		return r;
 
-	r = chunk_io(ps, ps->header_area, 0, READ, 1);
+	r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1);
 	if (r)
 		goto bad;
 
@@ -395,7 +397,7 @@ static int write_header(struct pstore *ps)
 	dh->version = cpu_to_le32(ps->version);
 	dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
 
-	return chunk_io(ps, ps->header_area, 0, WRITE, 1);
+	return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1);
 }
 
 /*
@@ -739,7 +741,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 	/*
 	 * Commit exceptions to disk.
 	 */
-	if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
+	if (ps->valid && area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA))
 		ps->valid = 0;
 
 	/*
@@ -779,7 +781,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
 			return 0;
 
 		ps->current_area--;
-		r = area_io(ps, READ);
+		r = area_io(ps, REQ_OP_READ, 0);
 		if (r < 0)
 			return r;
 		ps->current_committed = ps->exceptions_per_area;
@@ -816,7 +818,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
 	for (i = 0; i < nr_merged; i++)
 		clear_exception(ps, ps->current_committed - 1 - i);
 
-	r = area_io(ps, WRITE_FLUSH_FUA);
+	r = area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA);
 	if (r < 0)
 		return r;
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 70bb0e8b62ce..731e1f5bd895 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1680,7 +1680,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 
 	init_tracked_chunk(bio);
 
-	if (bio->bi_rw & REQ_FLUSH) {
+	if (bio->bi_rw & REQ_PREFLUSH) {
 		bio->bi_bdev = s->cow->bdev;
 		return DM_MAPIO_REMAPPED;
 	}
@@ -1696,7 +1696,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 	 * to copy an exception */
 	down_write(&s->lock);
 
-	if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_rw(bio) == WRITE)) {
+	if (!s->valid || (unlikely(s->snapshot_overflowed) &&
+	    bio_data_dir(bio) == WRITE)) {
 		r = -EIO;
 		goto out_unlock;
 	}
@@ -1713,7 +1714,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 	 * flags so we should only get this if we are
 	 * writeable.
 	 */
-	if (bio_rw(bio) == WRITE) {
+	if (bio_data_dir(bio) == WRITE) {
 		pe = __lookup_pending_exception(s, chunk);
 		if (!pe) {
 			up_write(&s->lock);
@@ -1799,7 +1800,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 
 	init_tracked_chunk(bio);
 
-	if (bio->bi_rw & REQ_FLUSH) {
+	if (bio->bi_rw & REQ_PREFLUSH) {
 		if (!dm_bio_get_target_bio_nr(bio))
 			bio->bi_bdev = s->origin->bdev;
 		else
@@ -1819,7 +1820,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 	e = dm_lookup_exception(&s->complete, chunk);
 	if (e) {
 		/* Queue writes overlapping with chunks being merged */
-		if (bio_rw(bio) == WRITE &&
+		if (bio_data_dir(bio) == WRITE &&
 		    chunk >= s->first_merging_chunk &&
 		    chunk < (s->first_merging_chunk +
 			     s->num_merging_chunks)) {
@@ -1831,7 +1832,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 
 		remap_exception(s, e, bio, chunk);
 
-		if (bio_rw(bio) == WRITE)
+		if (bio_data_dir(bio) == WRITE)
 			track_chunk(s, bio, chunk);
 		goto out_unlock;
 	}
@@ -1839,7 +1840,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 redirect_to_origin:
 	bio->bi_bdev = s->origin->bdev;
 
-	if (bio_rw(bio) == WRITE) {
+	if (bio_data_dir(bio) == WRITE) {
 		up_write(&s->lock);
 		return do_origin(s->origin, bio);
 	}
@@ -2285,10 +2286,10 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
 
 	bio->bi_bdev = o->dev->bdev;
 
-	if (unlikely(bio->bi_rw & REQ_FLUSH))
+	if (unlikely(bio->bi_rw & REQ_PREFLUSH))
 		return DM_MAPIO_REMAPPED;
 
-	if (bio_rw(bio) != WRITE)
+	if (bio_data_dir(bio) != WRITE)
 		return DM_MAPIO_REMAPPED;
 
 	available_sectors = o->split_boundary -
@@ -2301,6 +2302,13 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
 	return do_origin(o->dev, bio);
 }
 
+static long origin_direct_access(struct dm_target *ti, sector_t sector,
+		void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	DMWARN("device does not support dax.");
+	return -EIO;
+}
+
 /*
  * Set the target "max_io_len" field to the minimum of all the snapshots'
  * chunk sizes.
@@ -2360,6 +2368,7 @@ static struct target_type origin_target = {
 	.postsuspend = origin_postsuspend,
 	.status  = origin_status,
 	.iterate_devices = origin_iterate_devices,
+	.direct_access = origin_direct_access,
 };
 
 static struct target_type snapshot_target = {
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 8289804ccd99..38b05f23b96c 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/device-mapper.h>
 
-#include "dm.h"
+#include "dm-core.h"
 #include "dm-stats.h"
 
 #define DM_MSG_PREFIX "stats"
@@ -514,11 +514,10 @@ static void dm_stat_round(struct dm_stat *s, struct dm_stat_shared *shared,
 }
 
 static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
-			      unsigned long bi_rw, sector_t len,
+			      int idx, sector_t len,
 			      struct dm_stats_aux *stats_aux, bool end,
 			      unsigned long duration_jiffies)
 {
-	unsigned long idx = bi_rw & REQ_WRITE;
 	struct dm_stat_shared *shared = &s->stat_shared[entry];
 	struct dm_stat_percpu *p;
 
@@ -584,7 +583,7 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
 #endif
 }
 
-static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
+static void __dm_stat_bio(struct dm_stat *s, int bi_rw,
 			  sector_t bi_sector, sector_t end_sector,
 			  bool end, unsigned long duration_jiffies,
 			  struct dm_stats_aux *stats_aux)
@@ -645,8 +644,8 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
 		last = raw_cpu_ptr(stats->last);
 		stats_aux->merged =
 			(bi_sector == (ACCESS_ONCE(last->last_sector) &&
-				       ((bi_rw & (REQ_WRITE | REQ_DISCARD)) ==
-					(ACCESS_ONCE(last->last_rw) & (REQ_WRITE | REQ_DISCARD)))
+				       ((bi_rw == WRITE) ==
+					(ACCESS_ONCE(last->last_rw) == WRITE))
 				       ));
 		ACCESS_ONCE(last->last_sector) = end_sector;
 		ACCESS_ONCE(last->last_rw) = bi_rw;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 797ddb900b06..01bb9cf2a8c2 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -286,14 +286,14 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 	uint32_t stripe;
 	unsigned target_bio_nr;
 
-	if (bio->bi_rw & REQ_FLUSH) {
+	if (bio->bi_rw & REQ_PREFLUSH) {
 		target_bio_nr = dm_bio_get_target_bio_nr(bio);
 		BUG_ON(target_bio_nr >= sc->stripes);
 		bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev;
 		return DM_MAPIO_REMAPPED;
 	}
-	if (unlikely(bio->bi_rw & REQ_DISCARD) ||
-	    unlikely(bio->bi_rw & REQ_WRITE_SAME)) {
+	if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
+	    unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
 		target_bio_nr = dm_bio_get_target_bio_nr(bio);
 		BUG_ON(target_bio_nr >= sc->stripes);
 		return stripe_map_range(sc, bio, target_bio_nr);
@@ -308,6 +308,29 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
+static long stripe_direct_access(struct dm_target *ti, sector_t sector,
+				 void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	struct stripe_c *sc = ti->private;
+	uint32_t stripe;
+	struct block_device *bdev;
+	struct blk_dax_ctl dax = {
+		.size = size,
+	};
+	long ret;
+
+	stripe_map_sector(sc, sector, &stripe, &dax.sector);
+
+	dax.sector += sc->stripe[stripe].physical_start;
+	bdev = sc->stripe[stripe].dev->bdev;
+
+	ret = bdev_direct_access(bdev, &dax);
+	*kaddr = dax.addr;
+	*pfn = dax.pfn;
+
+	return ret;
+}
+
 /*
  * Stripe status:
  *
@@ -416,7 +439,7 @@ static void stripe_io_hints(struct dm_target *ti,
 
 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 5, 1},
+	.version = {1, 6, 0},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
@@ -425,6 +448,7 @@ static struct target_type stripe_target = {
 	.status = stripe_status,
 	.iterate_devices = stripe_iterate_devices,
 	.io_hints = stripe_io_hints,
+	.direct_access = stripe_direct_access,
 };
 
 int __init dm_stripe_init(void)
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index 7e818f5f1dc4..c209b8a19b84 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -6,7 +6,8 @@
 
 #include <linux/sysfs.h>
 #include <linux/dm-ioctl.h>
-#include "dm.h"
+#include "dm-core.h"
+#include "dm-rq.h"
 
 struct dm_sysfs_attr {
 	struct attribute attr;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 626a5ec04466..3e407a9cde1f 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -5,7 +5,7 @@
  * This file is released under the GPL.
  */
 
-#include "dm.h"
+#include "dm-core.h"
 
 #include <linux/module.h>
 #include <linux/vmalloc.h>
@@ -43,8 +43,10 @@ struct dm_table {
 	struct dm_target *targets;
 
 	struct target_type *immutable_target_type;
-	unsigned integrity_supported:1;
-	unsigned singleton:1;
+
+	bool integrity_supported:1;
+	bool singleton:1;
+	bool all_blk_mq:1;
 
 	/*
 	 * Indicates the rw permissions for the new logical
@@ -206,6 +208,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 		return -ENOMEM;
 	}
 
+	t->type = DM_TYPE_NONE;
 	t->mode = mode;
 	t->md = md;
 	*result = t;
@@ -703,7 +706,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 			      dm_device_name(t->md), type);
 			return -EINVAL;
 		}
-		t->singleton = 1;
+		t->singleton = true;
 	}
 
 	if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
@@ -824,22 +827,70 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
 }
 EXPORT_SYMBOL(dm_consume_args);
 
+static bool __table_type_bio_based(unsigned table_type)
+{
+	return (table_type == DM_TYPE_BIO_BASED ||
+		table_type == DM_TYPE_DAX_BIO_BASED);
+}
+
 static bool __table_type_request_based(unsigned table_type)
 {
 	return (table_type == DM_TYPE_REQUEST_BASED ||
 		table_type == DM_TYPE_MQ_REQUEST_BASED);
 }
 
-static int dm_table_set_type(struct dm_table *t)
+void dm_table_set_type(struct dm_table *t, unsigned type)
+{
+	t->type = type;
+}
+EXPORT_SYMBOL_GPL(dm_table_set_type);
+
+static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+			       sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+
+	return q && blk_queue_dax(q);
+}
+
+static bool dm_table_supports_dax(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i = 0;
+
+	/* Ensure that all targets support DAX. */
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+
+		if (!ti->type->direct_access)
+			return false;
+
+		if (!ti->type->iterate_devices ||
+		    !ti->type->iterate_devices(ti, device_supports_dax, NULL))
+			return false;
+	}
+
+	return true;
+}
+
+static int dm_table_determine_type(struct dm_table *t)
 {
 	unsigned i;
 	unsigned bio_based = 0, request_based = 0, hybrid = 0;
-	bool use_blk_mq = false;
+	bool verify_blk_mq = false;
 	struct dm_target *tgt;
 	struct dm_dev_internal *dd;
-	struct list_head *devices;
+	struct list_head *devices = dm_table_get_devices(t);
 	unsigned live_md_type = dm_get_md_type(t->md);
 
+	if (t->type != DM_TYPE_NONE) {
+		/* target already set the table's type */
+		if (t->type == DM_TYPE_BIO_BASED)
+			return 0;
+		BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
+		goto verify_rq_based;
+	}
+
 	for (i = 0; i < t->num_targets; i++) {
 		tgt = t->targets + i;
 		if (dm_target_hybrid(tgt))
@@ -871,11 +922,27 @@ static int dm_table_set_type(struct dm_table *t)
 	if (bio_based) {
 		/* We must use this table as bio-based */
 		t->type = DM_TYPE_BIO_BASED;
+		if (dm_table_supports_dax(t) ||
+		    (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED))
+			t->type = DM_TYPE_DAX_BIO_BASED;
 		return 0;
 	}
 
 	BUG_ON(!request_based); /* No targets in this table */
 
+	if (list_empty(devices) && __table_type_request_based(live_md_type)) {
+		/* inherit live MD type */
+		t->type = live_md_type;
+		return 0;
+	}
+
+	/*
+	 * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
+	 * having a compatible target use dm_table_set_type.
+	 */
+	t->type = DM_TYPE_REQUEST_BASED;
+
+verify_rq_based:
 	/*
 	 * Request-based dm supports only tables that have a single target now.
 	 * To support multiple targets, request splitting support is needed,
@@ -888,7 +955,6 @@ static int dm_table_set_type(struct dm_table *t)
 	}
 
 	/* Non-request-stackable devices can't be used for request-based dm */
-	devices = dm_table_get_devices(t);
 	list_for_each_entry(dd, devices, list) {
 		struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
 
@@ -899,10 +965,10 @@ static int dm_table_set_type(struct dm_table *t)
 		}
 
 		if (q->mq_ops)
-			use_blk_mq = true;
+			verify_blk_mq = true;
 	}
 
-	if (use_blk_mq) {
+	if (verify_blk_mq) {
 		/* verify _all_ devices in the table are blk-mq devices */
 		list_for_each_entry(dd, devices, list)
 			if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
@@ -910,14 +976,9 @@ static int dm_table_set_type(struct dm_table *t)
 				      " are blk-mq request-stackable");
 				return -EINVAL;
 			}
-		t->type = DM_TYPE_MQ_REQUEST_BASED;
 
-	} else if (list_empty(devices) && __table_type_request_based(live_md_type)) {
-		/* inherit live MD type */
-		t->type = live_md_type;
-
-	} else
-		t->type = DM_TYPE_REQUEST_BASED;
+		t->all_blk_mq = true;
+	}
 
 	return 0;
 }
@@ -956,14 +1017,19 @@ struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
 	return NULL;
 }
 
+bool dm_table_bio_based(struct dm_table *t)
+{
+	return __table_type_bio_based(dm_table_get_type(t));
+}
+
 bool dm_table_request_based(struct dm_table *t)
 {
 	return __table_type_request_based(dm_table_get_type(t));
 }
 
-bool dm_table_mq_request_based(struct dm_table *t)
+bool dm_table_all_blk_mq_devices(struct dm_table *t)
 {
-	return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED;
+	return t->all_blk_mq;
 }
 
 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
@@ -978,7 +1044,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
 		return -EINVAL;
 	}
 
-	if (type == DM_TYPE_BIO_BASED)
+	if (__table_type_bio_based(type))
 		for (i = 0; i < t->num_targets; i++) {
 			tgt = t->targets + i;
 			per_io_data_size = max(per_io_data_size, tgt->per_io_data_size);
@@ -1106,7 +1172,7 @@ static int dm_table_register_integrity(struct dm_table *t)
 		return 0;
 
 	if (!integrity_profile_exists(dm_disk(md))) {
-		t->integrity_supported = 1;
+		t->integrity_supported = true;
 		/*
 		 * Register integrity profile during table load; we can do
 		 * this because the final profile must match during resume.
@@ -1129,7 +1195,7 @@ static int dm_table_register_integrity(struct dm_table *t)
 	}
 
 	/* Preserve existing integrity profile */
-	t->integrity_supported = 1;
+	t->integrity_supported = true;
 	return 0;
 }
 
@@ -1141,9 +1207,9 @@ int dm_table_complete(struct dm_table *t)
 {
 	int r;
 
-	r = dm_table_set_type(t);
+	r = dm_table_determine_type(t);
 	if (r) {
-		DMERR("unable to set table type");
+		DMERR("unable to determine table type");
 		return r;
 	}
 
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index a317dd884ba6..6eecd6b36f76 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -4,7 +4,7 @@
  * This file is released under the GPL.
  */
 
-#include "dm.h"
+#include "dm-core.h"
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -148,9 +148,15 @@ static void io_err_release_clone_rq(struct request *clone)
 {
 }
 
+static long io_err_direct_access(struct dm_target *ti, sector_t sector,
+				 void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	return -EIO;
+}
+
 static struct target_type error_target = {
 	.name = "error",
-	.version = {1, 4, 0},
+	.version = {1, 5, 0},
 	.features = DM_TARGET_WILDCARD,
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
@@ -158,6 +164,7 @@ static struct target_type error_target = {
 	.map_rq = io_err_map_rq,
 	.clone_and_map_rq = io_err_clone_and_map_rq,
 	.release_clone_rq = io_err_release_clone_rq,
+	.direct_access = io_err_direct_access,
 };
 
 int __init dm_target_init(void)
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 43824d73366d..a15091a0d40c 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1677,6 +1677,36 @@ int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *resu
 	return r;
 }
 
+int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
+{
+	int r = 0;
+
+	down_write(&pmd->root_lock);
+	for (; b != e; b++) {
+		r = dm_sm_inc_block(pmd->data_sm, b);
+		if (r)
+			break;
+	}
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
+int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
+{
+	int r = 0;
+
+	down_write(&pmd->root_lock);
+	for (; b != e; b++) {
+		r = dm_sm_dec_block(pmd->data_sm, b);
+		if (r)
+			break;
+	}
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
 bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
 {
 	int r;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index a938babe4258..35e954ea20a9 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -197,6 +197,9 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
 
 int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result);
 
+int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
+int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
+
 /*
  * Returns -ENOSPC if the new size is too small and already allocated
  * blocks would be lost.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fc803d50f9f0..197ea2003400 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -253,6 +253,7 @@ struct pool {
 	struct bio_list deferred_flush_bios;
 	struct list_head prepared_mappings;
 	struct list_head prepared_discards;
+	struct list_head prepared_discards_pt2;
 	struct list_head active_thins;
 
 	struct dm_deferred_set *shared_read_ds;
@@ -269,6 +270,7 @@ struct pool {
 
 	process_mapping_fn process_prepared_mapping;
 	process_mapping_fn process_prepared_discard;
+	process_mapping_fn process_prepared_discard_pt2;
 
 	struct dm_bio_prison_cell **cell_sort_array;
 };
@@ -360,7 +362,7 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
 	sector_t len = block_to_sectors(tc->pool, data_e - data_b);
 
 	return __blkdev_issue_discard(tc->pool_dev->bdev, s, len,
-				      GFP_NOWAIT, REQ_WRITE | REQ_DISCARD, &op->bio);
+				      GFP_NOWAIT, 0, &op->bio);
 }
 
 static void end_discard(struct discard_op *op, int r)
@@ -371,7 +373,8 @@ static void end_discard(struct discard_op *op, int r)
 		 * need to wait for the chain to complete.
 		 */
 		bio_chain(op->bio, op->parent_bio);
-		submit_bio(REQ_WRITE | REQ_DISCARD, op->bio);
+		bio_set_op_attrs(op->bio, REQ_OP_DISCARD, 0);
+		submit_bio(op->bio);
 	}
 
 	blk_finish_plug(&op->plug);
@@ -696,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
 
 static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
 {
-	return (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+	return (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) &&
 		dm_thin_changed_this_transaction(tc->td);
 }
 
@@ -704,7 +707,7 @@ static void inc_all_io_entry(struct pool *pool, struct bio *bio)
 {
 	struct dm_thin_endio_hook *h;
 
-	if (bio->bi_rw & REQ_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD)
 		return;
 
 	h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -867,7 +870,8 @@ static void __inc_remap_and_issue_cell(void *context,
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(&cell->bios))) {
-		if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA))
+		if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+		    bio_op(bio) == REQ_OP_DISCARD)
 			bio_list_add(&info->defer_bios, bio);
 		else {
 			inc_all_io_entry(info->tc->pool, bio);
@@ -999,7 +1003,8 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)
 
 /*----------------------------------------------------------------*/
 
-static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m)
+static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m,
+						   struct bio *discard_parent)
 {
 	/*
 	 * We've already unmapped this range of blocks, but before we
@@ -1012,7 +1017,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
 	dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
 	struct discard_op op;
 
-	begin_discard(&op, tc, m->bio);
+	begin_discard(&op, tc, discard_parent);
 	while (b != end) {
 		/* find start of unmapped run */
 		for (; b < end; b++) {
@@ -1047,28 +1052,101 @@ out:
 	end_discard(&op, r);
 }
 
-static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
+static void queue_passdown_pt2(struct dm_thin_new_mapping *m)
+{
+	unsigned long flags;
+	struct pool *pool = m->tc->pool;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	list_add_tail(&m->list, &pool->prepared_discards_pt2);
+	spin_unlock_irqrestore(&pool->lock, flags);
+	wake_worker(pool);
+}
+
+static void passdown_endio(struct bio *bio)
+{
+	/*
+	 * It doesn't matter if the passdown discard failed, we still want
+	 * to unmap (we ignore err).
+	 */
+	queue_passdown_pt2(bio->bi_private);
+}
+
+static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 {
 	int r;
 	struct thin_c *tc = m->tc;
 	struct pool *pool = tc->pool;
+	struct bio *discard_parent;
+	dm_block_t data_end = m->data_block + (m->virt_end - m->virt_begin);
 
+	/*
+	 * Only this thread allocates blocks, so we can be sure that the
+	 * newly unmapped blocks will not be allocated before the end of
+	 * the function.
+	 */
 	r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end);
 	if (r) {
 		metadata_operation_failed(pool, "dm_thin_remove_range", r);
 		bio_io_error(m->bio);
+		cell_defer_no_holder(tc, m->cell);
+		mempool_free(m, pool->mapping_pool);
+		return;
+	}
 
-	} else if (m->maybe_shared) {
-		passdown_double_checking_shared_status(m);
+	discard_parent = bio_alloc(GFP_NOIO, 1);
+	if (!discard_parent) {
+		DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
+		       dm_device_name(tc->pool->pool_md));
+		queue_passdown_pt2(m);
 
 	} else {
-		struct discard_op op;
-		begin_discard(&op, tc, m->bio);
-		r = issue_discard(&op, m->data_block,
-				  m->data_block + (m->virt_end - m->virt_begin));
-		end_discard(&op, r);
+		discard_parent->bi_end_io = passdown_endio;
+		discard_parent->bi_private = m;
+
+		if (m->maybe_shared)
+			passdown_double_checking_shared_status(m, discard_parent);
+		else {
+			struct discard_op op;
+
+			begin_discard(&op, tc, discard_parent);
+			r = issue_discard(&op, m->data_block, data_end);
+			end_discard(&op, r);
+		}
 	}
 
+	/*
+	 * Increment the unmapped blocks.  This prevents a race between the
+	 * passdown io and reallocation of freed blocks.
+	 */
+	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+	if (r) {
+		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+		bio_io_error(m->bio);
+		cell_defer_no_holder(tc, m->cell);
+		mempool_free(m, pool->mapping_pool);
+		return;
+	}
+}
+
+static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
+{
+	int r;
+	struct thin_c *tc = m->tc;
+	struct pool *pool = tc->pool;
+
+	/*
+	 * The passdown has completed, so now we can decrement all those
+	 * unmapped blocks.
+	 */
+	r = dm_pool_dec_data_range(pool->pmd, m->data_block,
+				   m->data_block + (m->virt_end - m->virt_begin));
+	if (r) {
+		metadata_operation_failed(pool, "dm_pool_dec_data_range", r);
+		bio_io_error(m->bio);
+	} else
+		bio_endio(m->bio);
+
 	cell_defer_no_holder(tc, m->cell);
 	mempool_free(m, pool->mapping_pool);
 }
@@ -1639,7 +1717,8 @@ static void __remap_and_issue_shared_cell(void *context,
 
 	while ((bio = bio_list_pop(&cell->bios))) {
 		if ((bio_data_dir(bio) == WRITE) ||
-		    (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)))
+		    (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+		     bio_op(bio) == REQ_OP_DISCARD))
 			bio_list_add(&info->defer_bios, bio);
 		else {
 			struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
@@ -2028,7 +2107,7 @@ static void process_thin_deferred_bios(struct thin_c *tc)
 			break;
 		}
 
-		if (bio->bi_rw & REQ_DISCARD)
+		if (bio_op(bio) == REQ_OP_DISCARD)
 			pool->process_discard(tc, bio);
 		else
 			pool->process_bio(tc, bio);
@@ -2115,7 +2194,7 @@ static void process_thin_deferred_cells(struct thin_c *tc)
 				return;
 			}
 
-			if (cell->holder->bi_rw & REQ_DISCARD)
+			if (bio_op(cell->holder) == REQ_OP_DISCARD)
 				pool->process_discard_cell(tc, cell);
 			else
 				pool->process_cell(tc, cell);
@@ -2212,6 +2291,8 @@ static void do_worker(struct work_struct *ws)
 	throttle_work_update(&pool->throttle);
 	process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
 	throttle_work_update(&pool->throttle);
+	process_prepared(pool, &pool->prepared_discards_pt2, &pool->process_prepared_discard_pt2);
+	throttle_work_update(&pool->throttle);
 	process_deferred_bios(pool);
 	throttle_work_complete(&pool->throttle);
 }
@@ -2340,7 +2421,8 @@ static void set_discard_callbacks(struct pool *pool)
 
 	if (passdown_enabled(pt)) {
 		pool->process_discard_cell = process_discard_cell_passdown;
-		pool->process_prepared_discard = process_prepared_discard_passdown;
+		pool->process_prepared_discard = process_prepared_discard_passdown_pt1;
+		pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2;
 	} else {
 		pool->process_discard_cell = process_discard_cell_no_passdown;
 		pool->process_prepared_discard = process_prepared_discard_no_passdown;
@@ -2553,7 +2635,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 		return DM_MAPIO_SUBMITTED;
 	}
 
-	if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
+	if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+	    bio_op(bio) == REQ_OP_DISCARD) {
 		thin_defer_bio_with_throttle(tc, bio);
 		return DM_MAPIO_SUBMITTED;
 	}
@@ -2826,6 +2909,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 	bio_list_init(&pool->deferred_flush_bios);
 	INIT_LIST_HEAD(&pool->prepared_mappings);
 	INIT_LIST_HEAD(&pool->prepared_discards);
+	INIT_LIST_HEAD(&pool->prepared_discards_pt2);
 	INIT_LIST_HEAD(&pool->active_thins);
 	pool->low_water_triggered = false;
 	pool->suspended = true;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 459a9f8905ed..0f0eb8a3d922 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -453,9 +453,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
 	 */
 
 	offset = block << v->data_dev_block_bits;
-
-	res = offset;
-	div64_u64(res, v->fec->rounds << v->data_dev_block_bits);
+	res = div64_u64(offset, v->fec->rounds << v->data_dev_block_bits);
 
 	/*
 	 * The base RS block we can feed to the interleaver to find out all
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index 766bc93006e6..618b8752dcf1 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -35,16 +35,19 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
  */
 static int zero_map(struct dm_target *ti, struct bio *bio)
 {
-	switch(bio_rw(bio)) {
-	case READ:
+	switch (bio_op(bio)) {
+	case REQ_OP_READ:
+		if (bio->bi_rw & REQ_RAHEAD) {
+			/* readahead of null bytes only wastes buffer cache */
+			return -EIO;
+		}
 		zero_fill_bio(bio);
 		break;
-	case READA:
-		/* readahead of null bytes only wastes buffer cache */
-		return -EIO;
-	case WRITE:
+	case REQ_OP_WRITE:
 		/* writes get silently dropped */
 		break;
+	default:
+		return -EIO;
 	}
 
 	bio_endio(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1b2f96205361..ceb69fc0b10b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -5,13 +5,13 @@
  * This file is released under the GPL.
  */
 
-#include "dm.h"
+#include "dm-core.h"
+#include "dm-rq.h"
 #include "dm-uevent.h"
 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/moduleparam.h>
 #include <linux/blkpg.h>
 #include <linux/bio.h>
 #include <linux/mempool.h>
@@ -20,14 +20,8 @@
 #include <linux/hdreg.h>
 #include <linux/delay.h>
 #include <linux/wait.h>
-#include <linux/kthread.h>
-#include <linux/ktime.h>
-#include <linux/elevator.h> /* for rq_end_sector() */
-#include <linux/blk-mq.h>
 #include <linux/pr.h>
 
-#include <trace/events/block.h>
-
 #define DM_MSG_PREFIX "core"
 
 #ifdef CONFIG_PRINTK
@@ -63,7 +57,6 @@ static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
 static struct workqueue_struct *deferred_remove_workqueue;
 
 /*
- * For bio-based dm.
  * One of these is allocated per bio.
  */
 struct dm_io {
@@ -76,36 +69,6 @@ struct dm_io {
 	struct dm_stats_aux stats_aux;
 };
 
-/*
- * For request-based dm.
- * One of these is allocated per request.
- */
-struct dm_rq_target_io {
-	struct mapped_device *md;
-	struct dm_target *ti;
-	struct request *orig, *clone;
-	struct kthread_work work;
-	int error;
-	union map_info info;
-	struct dm_stats_aux stats_aux;
-	unsigned long duration_jiffies;
-	unsigned n_sectors;
-};
-
-/*
- * For request-based dm - the bio clones we allocate are embedded in these
- * structs.
- *
- * We allocate these with bio_alloc_bioset, using the front_pad parameter when
- * the bioset is created - this means the bio has to come at the end of the
- * struct.
- */
-struct dm_rq_clone_bio_info {
-	struct bio *orig;
-	struct dm_rq_target_io *tio;
-	struct bio clone;
-};
-
 #define MINOR_ALLOCED ((void *)-1)
 
 /*
@@ -120,130 +83,9 @@ struct dm_rq_clone_bio_info {
 #define DMF_DEFERRED_REMOVE 6
 #define DMF_SUSPENDED_INTERNALLY 7
 
-/*
- * Work processed by per-device workqueue.
- */
-struct mapped_device {
-	struct srcu_struct io_barrier;
-	struct mutex suspend_lock;
-
-	/*
-	 * The current mapping (struct dm_table *).
-	 * Use dm_get_live_table{_fast} or take suspend_lock for
-	 * dereference.
-	 */
-	void __rcu *map;
-
-	struct list_head table_devices;
-	struct mutex table_devices_lock;
-
-	unsigned long flags;
-
-	struct request_queue *queue;
-	int numa_node_id;
-
-	unsigned type;
-	/* Protect queue and type against concurrent access. */
-	struct mutex type_lock;
-
-	atomic_t holders;
-	atomic_t open_count;
-
-	struct dm_target *immutable_target;
-	struct target_type *immutable_target_type;
-
-	struct gendisk *disk;
-	char name[16];
-
-	void *interface_ptr;
-
-	/*
-	 * A list of ios that arrived while we were suspended.
-	 */
-	atomic_t pending[2];
-	wait_queue_head_t wait;
-	struct work_struct work;
-	spinlock_t deferred_lock;
-	struct bio_list deferred;
-
-	/*
-	 * Event handling.
-	 */
-	wait_queue_head_t eventq;
-	atomic_t event_nr;
-	atomic_t uevent_seq;
-	struct list_head uevent_list;
-	spinlock_t uevent_lock; /* Protect access to uevent_list */
-
-	/* the number of internal suspends */
-	unsigned internal_suspend_count;
-
-	/*
-	 * Processing queue (flush)
-	 */
-	struct workqueue_struct *wq;
-
-	/*
-	 * io objects are allocated from here.
-	 */
-	mempool_t *io_pool;
-	mempool_t *rq_pool;
-
-	struct bio_set *bs;
-
-	/*
-	 * freeze/thaw support require holding onto a super block
-	 */
-	struct super_block *frozen_sb;
-
-	/* forced geometry settings */
-	struct hd_geometry geometry;
-
-	struct block_device *bdev;
-
-	/* kobject and completion */
-	struct dm_kobject_holder kobj_holder;
-
-	/* zero-length flush that will be cloned and submitted to targets */
-	struct bio flush_bio;
-
-	struct dm_stats stats;
-
-	struct kthread_worker kworker;
-	struct task_struct *kworker_task;
-
-	/* for request-based merge heuristic in dm_request_fn() */
-	unsigned seq_rq_merge_deadline_usecs;
-	int last_rq_rw;
-	sector_t last_rq_pos;
-	ktime_t last_rq_start_time;
-
-	/* for blk-mq request-based DM support */
-	struct blk_mq_tag_set *tag_set;
-	bool use_blk_mq:1;
-	bool init_tio_pdu:1;
-};
-
-#ifdef CONFIG_DM_MQ_DEFAULT
-static bool use_blk_mq = true;
-#else
-static bool use_blk_mq = false;
-#endif
-
-#define DM_MQ_NR_HW_QUEUES 1
-#define DM_MQ_QUEUE_DEPTH 2048
 #define DM_NUMA_NODE NUMA_NO_NODE
-
-static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
-static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
 static int dm_numa_node = DM_NUMA_NODE;
 
-bool dm_use_blk_mq(struct mapped_device *md)
-{
-	return md->use_blk_mq;
-}
-EXPORT_SYMBOL_GPL(dm_use_blk_mq);
-
 /*
  * For mempools pre-allocation at the table loading time.
  */
@@ -259,9 +101,6 @@ struct table_device {
 	struct dm_dev dm_dev;
 };
 
-#define RESERVED_BIO_BASED_IOS		16
-#define RESERVED_REQUEST_BASED_IOS	256
-#define RESERVED_MAX_IOS		1024
 static struct kmem_cache *_io_cache;
 static struct kmem_cache *_rq_tio_cache;
 static struct kmem_cache *_rq_cache;
@@ -269,13 +108,9 @@ static struct kmem_cache *_rq_cache;
 /*
  * Bio-based DM's mempools' reserved IOs set by the user.
  */
+#define RESERVED_BIO_BASED_IOS		16
 static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
 
-/*
- * Request-based DM's mempools' reserved IOs set by the user.
- */
-static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
-
 static int __dm_get_module_param_int(int *module_param, int min, int max)
 {
 	int param = ACCESS_ONCE(*module_param);
@@ -297,8 +132,8 @@ static int __dm_get_module_param_int(int *module_param, int min, int max)
 	return param;
 }
 
-static unsigned __dm_get_module_param(unsigned *module_param,
-				      unsigned def, unsigned max)
+unsigned __dm_get_module_param(unsigned *module_param,
+			       unsigned def, unsigned max)
 {
 	unsigned param = ACCESS_ONCE(*module_param);
 	unsigned modified_param = 0;
@@ -319,28 +154,10 @@ static unsigned __dm_get_module_param(unsigned *module_param,
 unsigned dm_get_reserved_bio_based_ios(void)
 {
 	return __dm_get_module_param(&reserved_bio_based_ios,
-				     RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
+				     RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS);
 }
 EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
 
-unsigned dm_get_reserved_rq_based_ios(void)
-{
-	return __dm_get_module_param(&reserved_rq_based_ios,
-				     RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
-}
-EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
-
-static unsigned dm_get_blk_mq_nr_hw_queues(void)
-{
-	return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
-}
-
-static unsigned dm_get_blk_mq_queue_depth(void)
-{
-	return __dm_get_module_param(&dm_mq_queue_depth,
-				     DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
-}
-
 static unsigned dm_get_numa_node(void)
 {
 	return __dm_get_module_param_int(&dm_numa_node,
@@ -679,29 +496,7 @@ static void free_tio(struct dm_target_io *tio)
 	bio_put(&tio->clone);
 }
 
-static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
-						gfp_t gfp_mask)
-{
-	return mempool_alloc(md->io_pool, gfp_mask);
-}
-
-static void free_old_rq_tio(struct dm_rq_target_io *tio)
-{
-	mempool_free(tio, tio->md->io_pool);
-}
-
-static struct request *alloc_old_clone_request(struct mapped_device *md,
-					       gfp_t gfp_mask)
-{
-	return mempool_alloc(md->rq_pool, gfp_mask);
-}
-
-static void free_old_clone_request(struct mapped_device *md, struct request *rq)
-{
-	mempool_free(rq, md->rq_pool);
-}
-
-static int md_in_flight(struct mapped_device *md)
+int md_in_flight(struct mapped_device *md)
 {
 	return atomic_read(&md->pending[READ]) +
 	       atomic_read(&md->pending[WRITE]);
@@ -723,8 +518,9 @@ static void start_io_acct(struct dm_io *io)
 		atomic_inc_return(&md->pending[rw]));
 
 	if (unlikely(dm_stats_used(&md->stats)))
-		dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
-				    bio_sectors(bio), false, 0, &io->stats_aux);
+		dm_stats_account_io(&md->stats, bio_data_dir(bio),
+				    bio->bi_iter.bi_sector, bio_sectors(bio),
+				    false, 0, &io->stats_aux);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -738,8 +534,9 @@ static void end_io_acct(struct dm_io *io)
 	generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
 
 	if (unlikely(dm_stats_used(&md->stats)))
-		dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
-				    bio_sectors(bio), true, duration, &io->stats_aux);
+		dm_stats_account_io(&md->stats, bio_data_dir(bio),
+				    bio->bi_iter.bi_sector, bio_sectors(bio),
+				    true, duration, &io->stats_aux);
 
 	/*
 	 * After this is decremented the bio must not be touched if it is
@@ -1001,12 +798,12 @@ static void dec_pending(struct dm_io *io, int error)
 		if (io_error == DM_ENDIO_REQUEUE)
 			return;
 
-		if ((bio->bi_rw & REQ_FLUSH) && bio->bi_iter.bi_size) {
+		if ((bio->bi_rw & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
 			/*
 			 * Preflush done for flush with data, reissue
-			 * without REQ_FLUSH.
+			 * without REQ_PREFLUSH.
 			 */
-			bio->bi_rw &= ~REQ_FLUSH;
+			bio->bi_rw &= ~REQ_PREFLUSH;
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
@@ -1017,7 +814,7 @@ static void dec_pending(struct dm_io *io, int error)
 	}
 }
 
-static void disable_write_same(struct mapped_device *md)
+void disable_write_same(struct mapped_device *md)
 {
 	struct queue_limits *limits = dm_get_queue_limits(md);
 
@@ -1051,7 +848,7 @@ static void clone_endio(struct bio *bio)
 		}
 	}
 
-	if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) &&
+	if (unlikely(r == -EREMOTEIO && (bio_op(bio) == REQ_OP_WRITE_SAME) &&
 		     !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
 		disable_write_same(md);
 
@@ -1059,371 +856,6 @@ static void clone_endio(struct bio *bio)
 	dec_pending(io, error);
 }
 
-/*
- * Partial completion handling for request-based dm
- */
-static void end_clone_bio(struct bio *clone)
-{
-	struct dm_rq_clone_bio_info *info =
-		container_of(clone, struct dm_rq_clone_bio_info, clone);
-	struct dm_rq_target_io *tio = info->tio;
-	struct bio *bio = info->orig;
-	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-	int error = clone->bi_error;
-
-	bio_put(clone);
-
-	if (tio->error)
-		/*
-		 * An error has already been detected on the request.
-		 * Once error occurred, just let clone->end_io() handle
-		 * the remainder.
-		 */
-		return;
-	else if (error) {
-		/*
-		 * Don't notice the error to the upper layer yet.
-		 * The error handling decision is made by the target driver,
-		 * when the request is completed.
-		 */
-		tio->error = error;
-		return;
-	}
-
-	/*
-	 * I/O for the bio successfully completed.
-	 * Notice the data completion to the upper layer.
-	 */
-
-	/*
-	 * bios are processed from the head of the list.
-	 * So the completing bio should always be rq->bio.
-	 * If it's not, something wrong is happening.
-	 */
-	if (tio->orig->bio != bio)
-		DMERR("bio completion is going in the middle of the request");
-
-	/*
-	 * Update the original request.
-	 * Do not use blk_end_request() here, because it may complete
-	 * the original request before the clone, and break the ordering.
-	 */
-	blk_update_request(tio->orig, 0, nr_bytes);
-}
-
-static struct dm_rq_target_io *tio_from_request(struct request *rq)
-{
-	return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
-}
-
-static void rq_end_stats(struct mapped_device *md, struct request *orig)
-{
-	if (unlikely(dm_stats_used(&md->stats))) {
-		struct dm_rq_target_io *tio = tio_from_request(orig);
-		tio->duration_jiffies = jiffies - tio->duration_jiffies;
-		dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
-				    tio->n_sectors, true, tio->duration_jiffies,
-				    &tio->stats_aux);
-	}
-}
-
-/*
- * Don't touch any member of the md after calling this function because
- * the md may be freed in dm_put() at the end of this function.
- * Or do dm_get() before calling this function and dm_put() later.
- */
-static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
-{
-	atomic_dec(&md->pending[rw]);
-
-	/* nudge anyone waiting on suspend queue */
-	if (!md_in_flight(md))
-		wake_up(&md->wait);
-
-	/*
-	 * Run this off this callpath, as drivers could invoke end_io while
-	 * inside their request_fn (and holding the queue lock). Calling
-	 * back into ->request_fn() could deadlock attempting to grab the
-	 * queue lock again.
-	 */
-	if (!md->queue->mq_ops && run_queue)
-		blk_run_queue_async(md->queue);
-
-	/*
-	 * dm_put() must be at the end of this function. See the comment above
-	 */
-	dm_put(md);
-}
-
-static void free_rq_clone(struct request *clone)
-{
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	struct mapped_device *md = tio->md;
-
-	blk_rq_unprep_clone(clone);
-
-	if (md->type == DM_TYPE_MQ_REQUEST_BASED)
-		/* stacked on blk-mq queue(s) */
-		tio->ti->type->release_clone_rq(clone);
-	else if (!md->queue->mq_ops)
-		/* request_fn queue stacked on request_fn queue(s) */
-		free_old_clone_request(md, clone);
-
-	if (!md->queue->mq_ops)
-		free_old_rq_tio(tio);
-}
-
-/*
- * Complete the clone and the original request.
- * Must be called without clone's queue lock held,
- * see end_clone_request() for more details.
- */
-static void dm_end_request(struct request *clone, int error)
-{
-	int rw = rq_data_dir(clone);
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	struct mapped_device *md = tio->md;
-	struct request *rq = tio->orig;
-
-	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		rq->errors = clone->errors;
-		rq->resid_len = clone->resid_len;
-
-		if (rq->sense)
-			/*
-			 * We are using the sense buffer of the original
-			 * request.
-			 * So setting the length of the sense data is enough.
-			 */
-			rq->sense_len = clone->sense_len;
-	}
-
-	free_rq_clone(clone);
-	rq_end_stats(md, rq);
-	if (!rq->q->mq_ops)
-		blk_end_request_all(rq, error);
-	else
-		blk_mq_end_request(rq, error);
-	rq_completed(md, rw, true);
-}
-
-static void dm_unprep_request(struct request *rq)
-{
-	struct dm_rq_target_io *tio = tio_from_request(rq);
-	struct request *clone = tio->clone;
-
-	if (!rq->q->mq_ops) {
-		rq->special = NULL;
-		rq->cmd_flags &= ~REQ_DONTPREP;
-	}
-
-	if (clone)
-		free_rq_clone(clone);
-	else if (!tio->md->queue->mq_ops)
-		free_old_rq_tio(tio);
-}
-
-/*
- * Requeue the original request of a clone.
- */
-static void dm_old_requeue_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_requeue_request(q, rq);
-	blk_run_queue_async(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_mq_requeue_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	unsigned long flags;
-
-	blk_mq_requeue_request(rq);
-	spin_lock_irqsave(q->queue_lock, flags);
-	if (!blk_queue_stopped(q))
-		blk_mq_kick_requeue_list(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_requeue_original_request(struct mapped_device *md,
-					struct request *rq)
-{
-	int rw = rq_data_dir(rq);
-
-	rq_end_stats(md, rq);
-	dm_unprep_request(rq);
-
-	if (!rq->q->mq_ops)
-		dm_old_requeue_request(rq);
-	else
-		dm_mq_requeue_request(rq);
-
-	rq_completed(md, rw, false);
-}
-
-static void dm_old_stop_queue(struct request_queue *q)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	if (blk_queue_stopped(q)) {
-		spin_unlock_irqrestore(q->queue_lock, flags);
-		return;
-	}
-
-	blk_stop_queue(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_stop_queue(struct request_queue *q)
-{
-	if (!q->mq_ops)
-		dm_old_stop_queue(q);
-	else
-		blk_mq_stop_hw_queues(q);
-}
-
-static void dm_old_start_queue(struct request_queue *q)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	if (blk_queue_stopped(q))
-		blk_start_queue(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_start_queue(struct request_queue *q)
-{
-	if (!q->mq_ops)
-		dm_old_start_queue(q);
-	else {
-		blk_mq_start_stopped_hw_queues(q, true);
-		blk_mq_kick_requeue_list(q);
-	}
-}
-
-static void dm_done(struct request *clone, int error, bool mapped)
-{
-	int r = error;
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	dm_request_endio_fn rq_end_io = NULL;
-
-	if (tio->ti) {
-		rq_end_io = tio->ti->type->rq_end_io;
-
-		if (mapped && rq_end_io)
-			r = rq_end_io(tio->ti, clone, error, &tio->info);
-	}
-
-	if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) &&
-		     !clone->q->limits.max_write_same_sectors))
-		disable_write_same(tio->md);
-
-	if (r <= 0)
-		/* The target wants to complete the I/O */
-		dm_end_request(clone, r);
-	else if (r == DM_ENDIO_INCOMPLETE)
-		/* The target will handle the I/O */
-		return;
-	else if (r == DM_ENDIO_REQUEUE)
-		/* The target wants to requeue the I/O */
-		dm_requeue_original_request(tio->md, tio->orig);
-	else {
-		DMWARN("unimplemented target endio return value: %d", r);
-		BUG();
-	}
-}
-
-/*
- * Request completion handler for request-based dm
- */
-static void dm_softirq_done(struct request *rq)
-{
-	bool mapped = true;
-	struct dm_rq_target_io *tio = tio_from_request(rq);
-	struct request *clone = tio->clone;
-	int rw;
-
-	if (!clone) {
-		rq_end_stats(tio->md, rq);
-		rw = rq_data_dir(rq);
-		if (!rq->q->mq_ops) {
-			blk_end_request_all(rq, tio->error);
-			rq_completed(tio->md, rw, false);
-			free_old_rq_tio(tio);
-		} else {
-			blk_mq_end_request(rq, tio->error);
-			rq_completed(tio->md, rw, false);
-		}
-		return;
-	}
-
-	if (rq->cmd_flags & REQ_FAILED)
-		mapped = false;
-
-	dm_done(clone, tio->error, mapped);
-}
-
-/*
- * Complete the clone and the original request with the error status
- * through softirq context.
- */
-static void dm_complete_request(struct request *rq, int error)
-{
-	struct dm_rq_target_io *tio = tio_from_request(rq);
-
-	tio->error = error;
-	if (!rq->q->mq_ops)
-		blk_complete_request(rq);
-	else
-		blk_mq_complete_request(rq, error);
-}
-
-/*
- * Complete the not-mapped clone and the original request with the error status
- * through softirq context.
- * Target's rq_end_io() function isn't called.
- * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
- */
-static void dm_kill_unmapped_request(struct request *rq, int error)
-{
-	rq->cmd_flags |= REQ_FAILED;
-	dm_complete_request(rq, error);
-}
-
-/*
- * Called with the clone's queue lock held (in the case of .request_fn)
- */
-static void end_clone_request(struct request *clone, int error)
-{
-	struct dm_rq_target_io *tio = clone->end_io_data;
-
-	if (!clone->q->mq_ops) {
-		/*
-		 * For just cleaning up the information of the queue in which
-		 * the clone was dispatched.
-		 * The clone is *NOT* freed actually here because it is alloced
-		 * from dm own mempool (REQ_ALLOCED isn't set).
-		 */
-		__blk_put_request(clone->q, clone);
-	}
-
-	/*
-	 * Actual request completion is done in a softirq context which doesn't
-	 * hold the clone's queue lock.  Otherwise, deadlock could occur because:
-	 *     - another request may be submitted by the upper level driver
-	 *       of the stacking during the completion
-	 *     - the submission which requires queue lock may be done
-	 *       against this clone's queue
-	 */
-	dm_complete_request(tio->orig, error);
-}
-
 /*
  * Return maximum size of I/O possible at the supplied sector up to the current
  * target boundary.
@@ -1473,15 +905,42 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 }
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
-/*
- * A target may call dm_accept_partial_bio only from the map routine.  It is
- * allowed for all bio types except REQ_FLUSH.
- *
- * dm_accept_partial_bio informs the dm that the target only wants to process
- * additional n_sectors sectors of the bio and the rest of the data should be
- * sent in a next bio.
- *
- * A diagram that explains the arithmetics:
+static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
+				 void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	struct mapped_device *md = bdev->bd_disk->private_data;
+	struct dm_table *map;
+	struct dm_target *ti;
+	int srcu_idx;
+	long len, ret = -EIO;
+
+	map = dm_get_live_table(md, &srcu_idx);
+	if (!map)
+		goto out;
+
+	ti = dm_table_find_target(map, sector);
+	if (!dm_target_is_valid(ti))
+		goto out;
+
+	len = max_io_len(sector, ti) << SECTOR_SHIFT;
+	size = min(len, size);
+
+	if (ti->type->direct_access)
+		ret = ti->type->direct_access(ti, sector, kaddr, pfn, size);
+out:
+	dm_put_live_table(md, srcu_idx);
+	return min(ret, size);
+}
+
+/*
+ * A target may call dm_accept_partial_bio only from the map routine.  It is
+ * allowed for all bio types except REQ_PREFLUSH.
+ *
+ * dm_accept_partial_bio informs the dm that the target only wants to process
+ * additional n_sectors sectors of the bio and the rest of the data should be
+ * sent in a next bio.
+ *
+ * A diagram that explains the arithmetics:
  * +--------------------+---------------+-------+
  * |         1          |       2       |   3   |
  * +--------------------+---------------+-------+
@@ -1505,7 +964,7 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
 {
 	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
 	unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
-	BUG_ON(bio->bi_rw & REQ_FLUSH);
+	BUG_ON(bio->bi_rw & REQ_PREFLUSH);
 	BUG_ON(bi_size > *tio->len_ptr);
 	BUG_ON(n_sectors > bi_size);
 	*tio->len_ptr -= bi_size - n_sectors;
@@ -1672,521 +1131,175 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 
 typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
 
-static unsigned get_num_discard_bios(struct dm_target *ti)
-{
-	return ti->num_discard_bios;
-}
-
-static unsigned get_num_write_same_bios(struct dm_target *ti)
-{
-	return ti->num_write_same_bios;
-}
-
-typedef bool (*is_split_required_fn)(struct dm_target *ti);
-
-static bool is_split_required_for_discard(struct dm_target *ti)
-{
-	return ti->split_discard_bios;
-}
-
-static int __send_changing_extent_only(struct clone_info *ci,
-				       get_num_bios_fn get_num_bios,
-				       is_split_required_fn is_split_required)
-{
-	struct dm_target *ti;
-	unsigned len;
-	unsigned num_bios;
-
-	do {
-		ti = dm_table_find_target(ci->map, ci->sector);
-		if (!dm_target_is_valid(ti))
-			return -EIO;
-
-		/*
-		 * Even though the device advertised support for this type of
-		 * request, that does not mean every target supports it, and
-		 * reconfiguration might also have changed that since the
-		 * check was performed.
-		 */
-		num_bios = get_num_bios ? get_num_bios(ti) : 0;
-		if (!num_bios)
-			return -EOPNOTSUPP;
-
-		if (is_split_required && !is_split_required(ti))
-			len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
-		else
-			len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
-
-		__send_duplicate_bios(ci, ti, num_bios, &len);
-
-		ci->sector += len;
-	} while (ci->sector_count -= len);
-
-	return 0;
-}
-
-static int __send_discard(struct clone_info *ci)
-{
-	return __send_changing_extent_only(ci, get_num_discard_bios,
-					   is_split_required_for_discard);
-}
-
-static int __send_write_same(struct clone_info *ci)
-{
-	return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
-}
-
-/*
- * Select the correct strategy for processing a non-flush bio.
- */
-static int __split_and_process_non_flush(struct clone_info *ci)
-{
-	struct bio *bio = ci->bio;
-	struct dm_target *ti;
-	unsigned len;
-	int r;
-
-	if (unlikely(bio->bi_rw & REQ_DISCARD))
-		return __send_discard(ci);
-	else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
-		return __send_write_same(ci);
-
-	ti = dm_table_find_target(ci->map, ci->sector);
-	if (!dm_target_is_valid(ti))
-		return -EIO;
-
-	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
-
-	r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
-	if (r < 0)
-		return r;
-
-	ci->sector += len;
-	ci->sector_count -= len;
-
-	return 0;
-}
-
-/*
- * Entry point to split a bio into clones and submit them to the targets.
- */
-static void __split_and_process_bio(struct mapped_device *md,
-				    struct dm_table *map, struct bio *bio)
-{
-	struct clone_info ci;
-	int error = 0;
-
-	if (unlikely(!map)) {
-		bio_io_error(bio);
-		return;
-	}
-
-	ci.map = map;
-	ci.md = md;
-	ci.io = alloc_io(md);
-	ci.io->error = 0;
-	atomic_set(&ci.io->io_count, 1);
-	ci.io->bio = bio;
-	ci.io->md = md;
-	spin_lock_init(&ci.io->endio_lock);
-	ci.sector = bio->bi_iter.bi_sector;
-
-	start_io_acct(ci.io);
-
-	if (bio->bi_rw & REQ_FLUSH) {
-		ci.bio = &ci.md->flush_bio;
-		ci.sector_count = 0;
-		error = __send_empty_flush(&ci);
-		/* dec_pending submits any data associated with flush */
-	} else {
-		ci.bio = bio;
-		ci.sector_count = bio_sectors(bio);
-		while (ci.sector_count && !error)
-			error = __split_and_process_non_flush(&ci);
-	}
-
-	/* drop the extra reference count */
-	dec_pending(ci.io, error);
-}
-/*-----------------------------------------------------------------
- * CRUD END
- *---------------------------------------------------------------*/
-
-/*
- * The request function that just remaps the bio built up by
- * dm_merge_bvec.
- */
-static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
-{
-	int rw = bio_data_dir(bio);
-	struct mapped_device *md = q->queuedata;
-	int srcu_idx;
-	struct dm_table *map;
-
-	map = dm_get_live_table(md, &srcu_idx);
-
-	generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
-
-	/* if we're suspended, we have to queue this io for later */
-	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
-		dm_put_live_table(md, srcu_idx);
-
-		if (bio_rw(bio) != READA)
-			queue_io(md, bio);
-		else
-			bio_io_error(bio);
-		return BLK_QC_T_NONE;
-	}
-
-	__split_and_process_bio(md, map, bio);
-	dm_put_live_table(md, srcu_idx);
-	return BLK_QC_T_NONE;
-}
-
-int dm_request_based(struct mapped_device *md)
-{
-	return blk_queue_stackable(md->queue);
-}
-
-static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
-{
-	int r;
-
-	if (blk_queue_io_stat(clone->q))
-		clone->cmd_flags |= REQ_IO_STAT;
-
-	clone->start_time = jiffies;
-	r = blk_insert_cloned_request(clone->q, clone);
-	if (r)
-		/* must complete clone in terms of original request */
-		dm_complete_request(rq, r);
-}
-
-static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
-				 void *data)
-{
-	struct dm_rq_target_io *tio = data;
-	struct dm_rq_clone_bio_info *info =
-		container_of(bio, struct dm_rq_clone_bio_info, clone);
-
-	info->orig = bio_orig;
-	info->tio = tio;
-	bio->bi_end_io = end_clone_bio;
-
-	return 0;
-}
-
-static int setup_clone(struct request *clone, struct request *rq,
-		       struct dm_rq_target_io *tio, gfp_t gfp_mask)
-{
-	int r;
-
-	r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
-			      dm_rq_bio_constructor, tio);
-	if (r)
-		return r;
-
-	clone->cmd = rq->cmd;
-	clone->cmd_len = rq->cmd_len;
-	clone->sense = rq->sense;
-	clone->end_io = end_clone_request;
-	clone->end_io_data = tio;
-
-	tio->clone = clone;
-
-	return 0;
-}
-
-static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
-				    struct dm_rq_target_io *tio, gfp_t gfp_mask)
-{
-	/*
-	 * Create clone for use with .request_fn request_queue
-	 */
-	struct request *clone;
-
-	clone = alloc_old_clone_request(md, gfp_mask);
-	if (!clone)
-		return NULL;
-
-	blk_rq_init(NULL, clone);
-	if (setup_clone(clone, rq, tio, gfp_mask)) {
-		/* -ENOMEM */
-		free_old_clone_request(md, clone);
-		return NULL;
-	}
-
-	return clone;
-}
-
-static void map_tio_request(struct kthread_work *work);
-
-static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
-		     struct mapped_device *md)
-{
-	tio->md = md;
-	tio->ti = NULL;
-	tio->clone = NULL;
-	tio->orig = rq;
-	tio->error = 0;
-	/*
-	 * Avoid initializing info for blk-mq; it passes
-	 * target-specific data through info.ptr
-	 * (see: dm_mq_init_request)
-	 */
-	if (!md->init_tio_pdu)
-		memset(&tio->info, 0, sizeof(tio->info));
-	if (md->kworker_task)
-		init_kthread_work(&tio->work, map_tio_request);
-}
-
-static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
-					       struct mapped_device *md,
-					       gfp_t gfp_mask)
-{
-	struct dm_rq_target_io *tio;
-	int srcu_idx;
-	struct dm_table *table;
-
-	tio = alloc_old_rq_tio(md, gfp_mask);
-	if (!tio)
-		return NULL;
-
-	init_tio(tio, rq, md);
-
-	table = dm_get_live_table(md, &srcu_idx);
-	/*
-	 * Must clone a request if this .request_fn DM device
-	 * is stacked on .request_fn device(s).
-	 */
-	if (!dm_table_mq_request_based(table)) {
-		if (!clone_old_rq(rq, md, tio, gfp_mask)) {
-			dm_put_live_table(md, srcu_idx);
-			free_old_rq_tio(tio);
-			return NULL;
-		}
-	}
-	dm_put_live_table(md, srcu_idx);
-
-	return tio;
-}
-
-/*
- * Called with the queue lock held.
- */
-static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
-{
-	struct mapped_device *md = q->queuedata;
-	struct dm_rq_target_io *tio;
-
-	if (unlikely(rq->special)) {
-		DMWARN("Already has something in rq->special.");
-		return BLKPREP_KILL;
-	}
-
-	tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
-	if (!tio)
-		return BLKPREP_DEFER;
-
-	rq->special = tio;
-	rq->cmd_flags |= REQ_DONTPREP;
-
-	return BLKPREP_OK;
-}
-
-/*
- * Returns:
- * 0                : the request has been processed
- * DM_MAPIO_REQUEUE : the original request needs to be requeued
- * < 0              : the request was completed due to failure
- */
-static int map_request(struct dm_rq_target_io *tio, struct request *rq,
-		       struct mapped_device *md)
-{
-	int r;
-	struct dm_target *ti = tio->ti;
-	struct request *clone = NULL;
-
-	if (tio->clone) {
-		clone = tio->clone;
-		r = ti->type->map_rq(ti, clone, &tio->info);
-	} else {
-		r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
-		if (r < 0) {
-			/* The target wants to complete the I/O */
-			dm_kill_unmapped_request(rq, r);
-			return r;
-		}
-		if (r != DM_MAPIO_REMAPPED)
-			return r;
-		if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
-			/* -ENOMEM */
-			ti->type->release_clone_rq(clone);
-			return DM_MAPIO_REQUEUE;
-		}
-	}
-
-	switch (r) {
-	case DM_MAPIO_SUBMITTED:
-		/* The target has taken the I/O to submit by itself later */
-		break;
-	case DM_MAPIO_REMAPPED:
-		/* The target has remapped the I/O so dispatch it */
-		trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
-				     blk_rq_pos(rq));
-		dm_dispatch_clone_request(clone, rq);
-		break;
-	case DM_MAPIO_REQUEUE:
-		/* The target wants to requeue the I/O */
-		dm_requeue_original_request(md, tio->orig);
-		break;
-	default:
-		if (r > 0) {
-			DMWARN("unimplemented target map return value: %d", r);
-			BUG();
-		}
-
-		/* The target wants to complete the I/O */
-		dm_kill_unmapped_request(rq, r);
-		return r;
-	}
-
-	return 0;
+static unsigned get_num_discard_bios(struct dm_target *ti)
+{
+	return ti->num_discard_bios;
 }
 
-static void map_tio_request(struct kthread_work *work)
+static unsigned get_num_write_same_bios(struct dm_target *ti)
 {
-	struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
-	struct request *rq = tio->orig;
-	struct mapped_device *md = tio->md;
+	return ti->num_write_same_bios;
+}
+
+typedef bool (*is_split_required_fn)(struct dm_target *ti);
 
-	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-		dm_requeue_original_request(md, rq);
+static bool is_split_required_for_discard(struct dm_target *ti)
+{
+	return ti->split_discard_bios;
 }
 
-static void dm_start_request(struct mapped_device *md, struct request *orig)
+static int __send_changing_extent_only(struct clone_info *ci,
+				       get_num_bios_fn get_num_bios,
+				       is_split_required_fn is_split_required)
 {
-	if (!orig->q->mq_ops)
-		blk_start_request(orig);
-	else
-		blk_mq_start_request(orig);
-	atomic_inc(&md->pending[rq_data_dir(orig)]);
+	struct dm_target *ti;
+	unsigned len;
+	unsigned num_bios;
 
-	if (md->seq_rq_merge_deadline_usecs) {
-		md->last_rq_pos = rq_end_sector(orig);
-		md->last_rq_rw = rq_data_dir(orig);
-		md->last_rq_start_time = ktime_get();
-	}
+	do {
+		ti = dm_table_find_target(ci->map, ci->sector);
+		if (!dm_target_is_valid(ti))
+			return -EIO;
 
-	if (unlikely(dm_stats_used(&md->stats))) {
-		struct dm_rq_target_io *tio = tio_from_request(orig);
-		tio->duration_jiffies = jiffies;
-		tio->n_sectors = blk_rq_sectors(orig);
-		dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
-				    tio->n_sectors, false, 0, &tio->stats_aux);
-	}
+		/*
+		 * Even though the device advertised support for this type of
+		 * request, that does not mean every target supports it, and
+		 * reconfiguration might also have changed that since the
+		 * check was performed.
+		 */
+		num_bios = get_num_bios ? get_num_bios(ti) : 0;
+		if (!num_bios)
+			return -EOPNOTSUPP;
 
-	/*
-	 * Hold the md reference here for the in-flight I/O.
-	 * We can't rely on the reference count by device opener,
-	 * because the device may be closed during the request completion
-	 * when all bios are completed.
-	 * See the comment in rq_completed() too.
-	 */
-	dm_get(md);
+		if (is_split_required && !is_split_required(ti))
+			len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
+		else
+			len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
+
+		__send_duplicate_bios(ci, ti, num_bios, &len);
+
+		ci->sector += len;
+	} while (ci->sector_count -= len);
+
+	return 0;
 }
 
-#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
+static int __send_discard(struct clone_info *ci)
+{
+	return __send_changing_extent_only(ci, get_num_discard_bios,
+					   is_split_required_for_discard);
+}
 
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
+static int __send_write_same(struct clone_info *ci)
 {
-	return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
+	return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
 }
 
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
-						     const char *buf, size_t count)
+/*
+ * Select the correct strategy for processing a non-flush bio.
+ */
+static int __split_and_process_non_flush(struct clone_info *ci)
 {
-	unsigned deadline;
+	struct bio *bio = ci->bio;
+	struct dm_target *ti;
+	unsigned len;
+	int r;
+
+	if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
+		return __send_discard(ci);
+	else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
+		return __send_write_same(ci);
 
-	if (!dm_request_based(md) || md->use_blk_mq)
-		return count;
+	ti = dm_table_find_target(ci->map, ci->sector);
+	if (!dm_target_is_valid(ti))
+		return -EIO;
 
-	if (kstrtouint(buf, 10, &deadline))
-		return -EINVAL;
+	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
 
-	if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
-		deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
+	r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
+	if (r < 0)
+		return r;
 
-	md->seq_rq_merge_deadline_usecs = deadline;
+	ci->sector += len;
+	ci->sector_count -= len;
 
-	return count;
+	return 0;
 }
 
-static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md)
+/*
+ * Entry point to split a bio into clones and submit them to the targets.
+ */
+static void __split_and_process_bio(struct mapped_device *md,
+				    struct dm_table *map, struct bio *bio)
 {
-	ktime_t kt_deadline;
+	struct clone_info ci;
+	int error = 0;
+
+	if (unlikely(!map)) {
+		bio_io_error(bio);
+		return;
+	}
+
+	ci.map = map;
+	ci.md = md;
+	ci.io = alloc_io(md);
+	ci.io->error = 0;
+	atomic_set(&ci.io->io_count, 1);
+	ci.io->bio = bio;
+	ci.io->md = md;
+	spin_lock_init(&ci.io->endio_lock);
+	ci.sector = bio->bi_iter.bi_sector;
 
-	if (!md->seq_rq_merge_deadline_usecs)
-		return false;
+	start_io_acct(ci.io);
 
-	kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
-	kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
+	if (bio->bi_rw & REQ_PREFLUSH) {
+		ci.bio = &ci.md->flush_bio;
+		ci.sector_count = 0;
+		error = __send_empty_flush(&ci);
+		/* dec_pending submits any data associated with flush */
+	} else {
+		ci.bio = bio;
+		ci.sector_count = bio_sectors(bio);
+		while (ci.sector_count && !error)
+			error = __split_and_process_non_flush(&ci);
+	}
 
-	return !ktime_after(ktime_get(), kt_deadline);
+	/* drop the extra reference count */
+	dec_pending(ci.io, error);
 }
+/*-----------------------------------------------------------------
+ * CRUD END
+ *---------------------------------------------------------------*/
 
 /*
- * q->request_fn for request-based dm.
- * Called with the queue lock held.
+ * The request function that just remaps the bio built up by
+ * dm_merge_bvec.
  */
-static void dm_request_fn(struct request_queue *q)
+static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
 {
+	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
-	struct dm_target *ti = md->immutable_target;
-	struct request *rq;
-	struct dm_rq_target_io *tio;
-	sector_t pos = 0;
-
-	if (unlikely(!ti)) {
-		int srcu_idx;
-		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
-
-		ti = dm_table_find_target(map, pos);
-		dm_put_live_table(md, srcu_idx);
-	}
-
-	/*
-	 * For suspend, check blk_queue_stopped() and increment
-	 * ->pending within a single queue_lock not to increment the
-	 * number of in-flight I/Os after the queue is stopped in
-	 * dm_suspend().
-	 */
-	while (!blk_queue_stopped(q)) {
-		rq = blk_peek_request(q);
-		if (!rq)
-			return;
+	int srcu_idx;
+	struct dm_table *map;
 
-		/* always use block 0 to find the target for flushes for now */
-		pos = 0;
-		if (!(rq->cmd_flags & REQ_FLUSH))
-			pos = blk_rq_pos(rq);
+	map = dm_get_live_table(md, &srcu_idx);
 
-		if ((dm_request_peeked_before_merge_deadline(md) &&
-		     md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
-		     md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
-		    (ti->type->busy && ti->type->busy(ti))) {
-			blk_delay_queue(q, HZ / 100);
-			return;
-		}
+	generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
 
-		dm_start_request(md, rq);
+	/* if we're suspended, we have to queue this io for later */
+	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
+		dm_put_live_table(md, srcu_idx);
 
-		tio = tio_from_request(rq);
-		/* Establish tio->ti before queuing work (map_tio_request) */
-		tio->ti = ti;
-		queue_kthread_work(&md->kworker, &tio->work);
-		BUG_ON(!irqs_disabled());
+		if (!(bio->bi_rw & REQ_RAHEAD))
+			queue_io(md, bio);
+		else
+			bio_io_error(bio);
+		return BLK_QC_T_NONE;
 	}
+
+	__split_and_process_bio(md, map, bio);
+	dm_put_live_table(md, srcu_idx);
+	return BLK_QC_T_NONE;
 }
 
 static int dm_any_congested(void *congested_data, int bdi_bits)
@@ -2266,7 +1379,7 @@ static const struct block_device_operations dm_blk_dops;
 
 static void dm_wq_work(struct work_struct *work);
 
-static void dm_init_md_queue(struct mapped_device *md)
+void dm_init_md_queue(struct mapped_device *md)
 {
 	/*
 	 * Request-based dm devices cannot be stacked on top of bio-based dm
@@ -2287,7 +1400,7 @@ static void dm_init_md_queue(struct mapped_device *md)
 	md->queue->backing_dev_info.congested_data = md;
 }
 
-static void dm_init_normal_md_queue(struct mapped_device *md)
+void dm_init_normal_md_queue(struct mapped_device *md)
 {
 	md->use_blk_mq = false;
 	dm_init_md_queue(md);
@@ -2327,6 +1440,8 @@ static void cleanup_mapped_device(struct mapped_device *md)
 		bdput(md->bdev);
 		md->bdev = NULL;
 	}
+
+	dm_mq_cleanup_mapped_device(md);
 }
 
 /*
@@ -2360,7 +1475,7 @@ static struct mapped_device *alloc_dev(int minor)
 		goto bad_io_barrier;
 
 	md->numa_node_id = numa_node_id;
-	md->use_blk_mq = use_blk_mq;
+	md->use_blk_mq = dm_use_blk_mq_default();
 	md->init_tio_pdu = false;
 	md->type = DM_TYPE_NONE;
 	mutex_init(&md->suspend_lock);
@@ -2412,7 +1527,7 @@ static struct mapped_device *alloc_dev(int minor)
 
 	bio_init(&md->flush_bio);
 	md->flush_bio.bi_bdev = md->bdev;
-	md->flush_bio.bi_rw = WRITE_FLUSH;
+	bio_set_op_attrs(&md->flush_bio, REQ_OP_WRITE, WRITE_FLUSH);
 
 	dm_stats_init(&md->stats);
 
@@ -2445,10 +1560,6 @@ static void free_dev(struct mapped_device *md)
 	unlock_fs(md);
 
 	cleanup_mapped_device(md);
-	if (md->tag_set) {
-		blk_mq_free_tag_set(md->tag_set);
-		kfree(md->tag_set);
-	}
 
 	free_table_devices(&md->table_devices);
 	dm_stats_cleanup(&md->stats);
@@ -2464,7 +1575,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
 
 	if (md->bs) {
 		/* The md already has necessary mempools. */
-		if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) {
+		if (dm_table_bio_based(t)) {
 			/*
 			 * Reload bioset because front_pad may have changed
 			 * because a different table was loaded.
@@ -2654,176 +1765,15 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_get_queue_limits);
 
-static void dm_old_init_rq_based_worker_thread(struct mapped_device *md)
-{
-	/* Initialize the request-based DM worker thread */
-	init_kthread_worker(&md->kworker);
-	md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
-				       "kdmwork-%s", dm_device_name(md));
-}
-
-/*
- * Fully initialize a .request_fn request-based queue.
- */
-static int dm_old_init_request_queue(struct mapped_device *md)
-{
-	/* Fully initialize the queue */
-	if (!blk_init_allocated_queue(md->queue, dm_request_fn, NULL))
-		return -EINVAL;
-
-	/* disable dm_request_fn's merge heuristic by default */
-	md->seq_rq_merge_deadline_usecs = 0;
-
-	dm_init_normal_md_queue(md);
-	blk_queue_softirq_done(md->queue, dm_softirq_done);
-	blk_queue_prep_rq(md->queue, dm_old_prep_fn);
-
-	dm_old_init_rq_based_worker_thread(md);
-
-	elv_register_queue(md->queue);
-
-	return 0;
-}
-
-static int dm_mq_init_request(void *data, struct request *rq,
-			      unsigned int hctx_idx, unsigned int request_idx,
-			      unsigned int numa_node)
-{
-	struct mapped_device *md = data;
-	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
-
-	/*
-	 * Must initialize md member of tio, otherwise it won't
-	 * be available in dm_mq_queue_rq.
-	 */
-	tio->md = md;
-
-	if (md->init_tio_pdu) {
-		/* target-specific per-io data is immediately after the tio */
-		tio->info.ptr = tio + 1;
-	}
-
-	return 0;
-}
-
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
-			  const struct blk_mq_queue_data *bd)
-{
-	struct request *rq = bd->rq;
-	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
-	struct mapped_device *md = tio->md;
-	struct dm_target *ti = md->immutable_target;
-
-	if (unlikely(!ti)) {
-		int srcu_idx;
-		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
-
-		ti = dm_table_find_target(map, 0);
-		dm_put_live_table(md, srcu_idx);
-	}
-
-	if (ti->type->busy && ti->type->busy(ti))
-		return BLK_MQ_RQ_QUEUE_BUSY;
-
-	dm_start_request(md, rq);
-
-	/* Init tio using md established in .init_request */
-	init_tio(tio, rq, md);
-
-	/*
-	 * Establish tio->ti before queuing work (map_tio_request)
-	 * or making direct call to map_request().
-	 */
-	tio->ti = ti;
-
-	/* Direct call is fine since .queue_rq allows allocations */
-	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
-		/* Undo dm_start_request() before requeuing */
-		rq_end_stats(md, rq);
-		rq_completed(md, rq_data_dir(rq), false);
-		return BLK_MQ_RQ_QUEUE_BUSY;
-	}
-
-	return BLK_MQ_RQ_QUEUE_OK;
-}
-
-static struct blk_mq_ops dm_mq_ops = {
-	.queue_rq = dm_mq_queue_rq,
-	.map_queue = blk_mq_map_queue,
-	.complete = dm_softirq_done,
-	.init_request = dm_mq_init_request,
-};
-
-static int dm_mq_init_request_queue(struct mapped_device *md,
-				    struct dm_target *immutable_tgt)
-{
-	struct request_queue *q;
-	int err;
-
-	if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) {
-		DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
-		return -EINVAL;
-	}
-
-	md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
-	if (!md->tag_set)
-		return -ENOMEM;
-
-	md->tag_set->ops = &dm_mq_ops;
-	md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
-	md->tag_set->numa_node = md->numa_node_id;
-	md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
-	md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
-	md->tag_set->driver_data = md;
-
-	md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
-	if (immutable_tgt && immutable_tgt->per_io_data_size) {
-		/* any target-specific per-io data is immediately after the tio */
-		md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
-		md->init_tio_pdu = true;
-	}
-
-	err = blk_mq_alloc_tag_set(md->tag_set);
-	if (err)
-		goto out_kfree_tag_set;
-
-	q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
-	if (IS_ERR(q)) {
-		err = PTR_ERR(q);
-		goto out_tag_set;
-	}
-	dm_init_md_queue(md);
-
-	/* backfill 'mq' sysfs registration normally done in blk_register_queue */
-	blk_mq_register_disk(md->disk);
-
-	return 0;
-
-out_tag_set:
-	blk_mq_free_tag_set(md->tag_set);
-out_kfree_tag_set:
-	kfree(md->tag_set);
-
-	return err;
-}
-
-static unsigned filter_md_type(unsigned type, struct mapped_device *md)
-{
-	if (type == DM_TYPE_BIO_BASED)
-		return type;
-
-	return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;
-}
-
 /*
  * Setup the DM device's queue based on md's type
  */
 int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
 {
 	int r;
-	unsigned md_type = filter_md_type(dm_get_md_type(md), md);
+	unsigned type = dm_get_md_type(md);
 
-	switch (md_type) {
+	switch (type) {
 	case DM_TYPE_REQUEST_BASED:
 		r = dm_old_init_request_queue(md);
 		if (r) {
@@ -2832,13 +1782,14 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
 		}
 		break;
 	case DM_TYPE_MQ_REQUEST_BASED:
-		r = dm_mq_init_request_queue(md, dm_table_get_immutable_target(t));
+		r = dm_mq_init_request_queue(md, t);
 		if (r) {
 			DMERR("Cannot initialize queue for request-based dm-mq mapped device");
 			return r;
 		}
 		break;
 	case DM_TYPE_BIO_BASED:
+	case DM_TYPE_DAX_BIO_BASED:
 		dm_init_normal_md_queue(md);
 		blk_queue_make_request(md->queue, dm_make_request);
 		/*
@@ -2847,6 +1798,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
 		 */
 		bioset_free(md->queue->bio_split);
 		md->queue->bio_split = NULL;
+
+		if (type == DM_TYPE_DAX_BIO_BASED)
+			queue_flag_set_unlocked(QUEUE_FLAG_DAX, md->queue);
 		break;
 	}
 
@@ -3541,10 +2495,9 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 	if (!pools)
 		return NULL;
 
-	type = filter_md_type(type, md);
-
 	switch (type) {
 	case DM_TYPE_BIO_BASED:
+	case DM_TYPE_DAX_BIO_BASED:
 		cachep = _io_cache;
 		pool_size = dm_get_reserved_bio_based_ios();
 		front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
@@ -3601,26 +2554,76 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
 	kfree(pools);
 }
 
-static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
-			  u32 flags)
+struct dm_pr {
+	u64	old_key;
+	u64	new_key;
+	u32	flags;
+	bool	fail_early;
+};
+
+static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
+		      void *data)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
-	const struct pr_ops *ops;
-	fmode_t mode;
-	int r;
+	struct dm_table *table;
+	struct dm_target *ti;
+	int ret = -ENOTTY, srcu_idx;
 
-	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
-	if (r < 0)
-		return r;
+	table = dm_get_live_table(md, &srcu_idx);
+	if (!table || !dm_table_get_size(table))
+		goto out;
 
-	ops = bdev->bd_disk->fops->pr_ops;
-	if (ops && ops->pr_register)
-		r = ops->pr_register(bdev, old_key, new_key, flags);
-	else
-		r = -EOPNOTSUPP;
+	/* We only support devices that have a single target */
+	if (dm_table_get_num_targets(table) != 1)
+		goto out;
+	ti = dm_table_get_target(table, 0);
 
-	bdput(bdev);
-	return r;
+	ret = -EINVAL;
+	if (!ti->type->iterate_devices)
+		goto out;
+
+	ret = ti->type->iterate_devices(ti, fn, data);
+out:
+	dm_put_live_table(md, srcu_idx);
+	return ret;
+}
+
+/*
+ * For register / unregister we need to manually call out to every path.
+ */
+static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
+			    sector_t start, sector_t len, void *data)
+{
+	struct dm_pr *pr = data;
+	const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+	if (!ops || !ops->pr_register)
+		return -EOPNOTSUPP;
+	return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
+}
+
+static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
+			  u32 flags)
+{
+	struct dm_pr pr = {
+		.old_key	= old_key,
+		.new_key	= new_key,
+		.flags		= flags,
+		.fail_early	= true,
+	};
+	int ret;
+
+	ret = dm_call_pr(bdev, __dm_pr_register, &pr);
+	if (ret && new_key) {
+		/* unregister all paths if we failed to register any path */
+		pr.old_key = new_key;
+		pr.new_key = 0;
+		pr.flags = 0;
+		pr.fail_early = false;
+		dm_call_pr(bdev, __dm_pr_register, &pr);
+	}
+
+	return ret;
 }
 
 static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
@@ -3721,6 +2724,7 @@ static const struct block_device_operations dm_blk_dops = {
 	.open = dm_blk_open,
 	.release = dm_blk_close,
 	.ioctl = dm_blk_ioctl,
+	.direct_access = dm_blk_direct_access,
 	.getgeo = dm_blk_getgeo,
 	.pr_ops = &dm_pr_ops,
 	.owner = THIS_MODULE
@@ -3738,18 +2742,6 @@ MODULE_PARM_DESC(major, "The major number of the device mapper");
 module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
 
-module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
-
-module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
-
-module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
-
-module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
-
 module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
 
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 13a758ec0f88..f0aad08b9654 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -13,6 +13,7 @@
 #include <linux/fs.h>
 #include <linux/device-mapper.h>
 #include <linux/list.h>
+#include <linux/moduleparam.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/hdreg.h>
@@ -32,14 +33,6 @@
  */
 #define DM_STATUS_NOFLUSH_FLAG		(1 << 0)
 
-/*
- * Type of table and mapped_device's mempool
- */
-#define DM_TYPE_NONE			0
-#define DM_TYPE_BIO_BASED		1
-#define DM_TYPE_REQUEST_BASED		2
-#define DM_TYPE_MQ_REQUEST_BASED	3
-
 /*
  * List of devices that a metadevice uses and should open/close.
  */
@@ -75,8 +68,9 @@ unsigned dm_table_get_type(struct dm_table *t);
 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
 struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
+bool dm_table_bio_based(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
-bool dm_table_mq_request_based(struct dm_table *t);
+bool dm_table_all_blk_mq_devices(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
@@ -161,16 +155,6 @@ void dm_interface_exit(void);
 /*
  * sysfs interface
  */
-struct dm_kobject_holder {
-	struct kobject kobj;
-	struct completion completion;
-};
-
-static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
-{
-	return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
-}
-
 int dm_sysfs_init(struct mapped_device *md);
 void dm_sysfs_exit(struct mapped_device *md);
 struct kobject *dm_kobject(struct mapped_device *md);
@@ -212,8 +196,6 @@ int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
 void dm_internal_suspend(struct mapped_device *md);
 void dm_internal_resume(struct mapped_device *md);
 
-bool dm_use_blk_mq(struct mapped_device *md);
-
 int dm_io_init(void);
 void dm_io_exit(void);
 
@@ -228,18 +210,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 void dm_free_md_mempools(struct dm_md_mempools *pools);
 
 /*
- * Helpers that are used by DM core
+ * Various helpers
  */
 unsigned dm_get_reserved_bio_based_ios(void);
-unsigned dm_get_reserved_rq_based_ios(void);
-
-static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
-{
-	return !maxlen || strlen(result) + 1 >= maxlen;
-}
-
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
-						     const char *buf, size_t count);
 
 #endif
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index b7fe7e9fc777..70ff888d25d0 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -221,7 +221,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
 	struct bio *split;
 	sector_t start_sector, end_sector, data_offset;
 
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+	if (unlikely(bio->bi_rw & REQ_PREFLUSH)) {
 		md_flush_request(mddev, bio);
 		return;
 	}
@@ -252,7 +252,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
 		split->bi_iter.bi_sector = split->bi_iter.bi_sector -
 			start_sector + data_offset;
 
-		if (unlikely((split->bi_rw & REQ_DISCARD) &&
+		if (unlikely((bio_op(split) == REQ_OP_DISCARD) &&
 			 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
 			/* Just ignore it */
 			bio_endio(split);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 866825f10b4c..1f123f5a29da 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -394,8 +394,9 @@ static void submit_flushes(struct work_struct *ws)
 			bi->bi_end_io = md_end_flush;
 			bi->bi_private = rdev;
 			bi->bi_bdev = rdev->bdev;
+			bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH);
 			atomic_inc(&mddev->flush_pending);
-			submit_bio(WRITE_FLUSH, bi);
+			submit_bio(bi);
 			rcu_read_lock();
 			rdev_dec_pending(rdev, mddev);
 		}
@@ -413,7 +414,7 @@ static void md_submit_flush_data(struct work_struct *ws)
 		/* an empty barrier - all done */
 		bio_endio(bio);
 	else {
-		bio->bi_rw &= ~REQ_FLUSH;
+		bio->bi_rw &= ~REQ_PREFLUSH;
 		mddev->pers->make_request(mddev, bio);
 	}
 
@@ -742,9 +743,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 	bio_add_page(bio, page, size, 0);
 	bio->bi_private = rdev;
 	bio->bi_end_io = super_written;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);
 
 	atomic_inc(&mddev->pending_writes);
-	submit_bio(WRITE_FLUSH_FUA, bio);
+	submit_bio(bio);
 }
 
 void md_super_wait(struct mddev *mddev)
@@ -754,13 +756,14 @@ void md_super_wait(struct mddev *mddev)
 }
 
 int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
-		 struct page *page, int rw, bool metadata_op)
+		 struct page *page, int op, int op_flags, bool metadata_op)
 {
 	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
 	int ret;
 
 	bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
 		rdev->meta_bdev : rdev->bdev;
+	bio_set_op_attrs(bio, op, op_flags);
 	if (metadata_op)
 		bio->bi_iter.bi_sector = sector + rdev->sb_start;
 	else if (rdev->mddev->reshape_position != MaxSector &&
@@ -770,7 +773,8 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 	else
 		bio->bi_iter.bi_sector = sector + rdev->data_offset;
 	bio_add_page(bio, page, size, 0);
-	submit_bio_wait(rw, bio);
+
+	submit_bio_wait(bio);
 
 	ret = !bio->bi_error;
 	bio_put(bio);
@@ -785,7 +789,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
 	if (rdev->sb_loaded)
 		return 0;
 
-	if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
+	if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
 		goto fail;
 	rdev->sb_loaded = 1;
 	return 0;
@@ -1471,7 +1475,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 			return -EINVAL;
 		bb_sector = (long long)offset;
 		if (!sync_page_io(rdev, bb_sector, sectors << 9,
-				  rdev->bb_page, READ, true))
+				  rdev->bb_page, REQ_OP_READ, 0, true))
 			return -EIO;
 		bbp = (u64 *)page_address(rdev->bb_page);
 		rdev->badblocks.shift = sb->bblog_shift;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b5c4be73e6e4..b4f335245bd6 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -424,7 +424,7 @@ struct mddev {
 
 	/* Generic flush handling.
 	 * The last to finish preflush schedules a worker to submit
-	 * the rest of the request (without the REQ_FLUSH flag).
+	 * the rest of the request (without the REQ_PREFLUSH flag).
 	 */
 	struct bio *flush_bio;
 	atomic_t flush_pending;
@@ -618,7 +618,8 @@ extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 			   sector_t sector, int size, struct page *page);
 extern void md_super_wait(struct mddev *mddev);
 extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
-			struct page *page, int rw, bool metadata_op);
+			struct page *page, int op, int op_flags,
+			bool metadata_op);
 extern void md_do_sync(struct md_thread *thread);
 extern void md_new_event(struct mddev *mddev);
 extern int md_allow_write(struct mddev *mddev);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index dd483bb2e111..72ea98e89e57 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -111,7 +111,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
 	struct multipath_bh * mp_bh;
 	struct multipath_info *multipath;
 
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+	if (unlikely(bio->bi_rw & REQ_PREFLUSH)) {
 		md_flush_request(mddev, bio);
 		return;
 	}
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index ea3d3b656fd0..2cc1877804c2 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -429,7 +429,14 @@ static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t ro
 
 	if (flags & INTERNAL_NODE) {
 		i = lower_bound(n, key);
-		if (i < 0 || i >= nr_entries) {
+		if (i < 0) {
+			/*
+			 * avoid early -ENODATA return when all entries are
+			 * higher than the search @key.
+			 */
+			i = 0;
+		}
+		if (i >= nr_entries) {
 			r = -ENODATA;
 			goto out;
 		}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 34783a3c8b3c..c3d439083212 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -458,7 +458,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 	struct md_rdev *tmp_dev;
 	struct bio *split;
 
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+	if (unlikely(bio->bi_rw & REQ_PREFLUSH)) {
 		md_flush_request(mddev, bio);
 		return;
 	}
@@ -488,7 +488,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 		split->bi_iter.bi_sector = sector + zone->dev_start +
 			tmp_dev->data_offset;
 
-		if (unlikely((split->bi_rw & REQ_DISCARD) &&
+		if (unlikely((bio_op(split) == REQ_OP_DISCARD) &&
 			 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
 			/* Just ignore it */
 			bio_endio(split);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c7c8cde0ab21..4e6da4497553 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -759,7 +759,7 @@ static void flush_pending_writes(struct r1conf *conf)
 		while (bio) { /* submit pending writes */
 			struct bio *next = bio->bi_next;
 			bio->bi_next = NULL;
-			if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+			if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
 			    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
 				/* Just ignore it */
 				bio_endio(bio);
@@ -1033,7 +1033,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	while (bio) { /* submit pending writes */
 		struct bio *next = bio->bi_next;
 		bio->bi_next = NULL;
-		if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+		if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
 		    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
 			/* Just ignore it */
 			bio_endio(bio);
@@ -1053,12 +1053,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 	int i, disks;
 	struct bitmap *bitmap;
 	unsigned long flags;
+	const int op = bio_op(bio);
 	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
-	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
-	const unsigned long do_discard = (bio->bi_rw
-					  & (REQ_DISCARD | REQ_SECURE));
-	const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME);
+	const unsigned long do_flush_fua = (bio->bi_rw &
+						(REQ_PREFLUSH | REQ_FUA));
 	struct md_rdev *blocked_rdev;
 	struct blk_plug_cb *cb;
 	struct raid1_plug_cb *plug = NULL;
@@ -1106,7 +1105,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 	bitmap = mddev->bitmap;
 
 	/*
-	 * make_request() can abort the operation when READA is being
+	 * make_request() can abort the operation when read-ahead is being
 	 * used and no empty request is available.
 	 *
 	 */
@@ -1166,7 +1165,7 @@ read_again:
 			mirror->rdev->data_offset;
 		read_bio->bi_bdev = mirror->rdev->bdev;
 		read_bio->bi_end_io = raid1_end_read_request;
-		read_bio->bi_rw = READ | do_sync;
+		bio_set_op_attrs(read_bio, op, do_sync);
 		read_bio->bi_private = r1_bio;
 
 		if (max_sectors < r1_bio->sectors) {
@@ -1376,8 +1375,7 @@ read_again:
 				   conf->mirrors[i].rdev->data_offset);
 		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
 		mbio->bi_end_io	= raid1_end_write_request;
-		mbio->bi_rw =
-			WRITE | do_flush_fua | do_sync | do_discard | do_same;
+		bio_set_op_attrs(mbio, op, do_flush_fua | do_sync);
 		mbio->bi_private = r1_bio;
 
 		atomic_inc(&r1_bio->remaining);
@@ -1771,7 +1769,7 @@ static void end_sync_write(struct bio *bio)
 static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
 			    int sectors, struct page *page, int rw)
 {
-	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+	if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false))
 		/* success */
 		return 1;
 	if (rw == WRITE) {
@@ -1825,7 +1823,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 				rdev = conf->mirrors[d].rdev;
 				if (sync_page_io(rdev, sect, s<<9,
 						 bio->bi_io_vec[idx].bv_page,
-						 READ, false)) {
+						 REQ_OP_READ, 0, false)) {
 					success = 1;
 					break;
 				}
@@ -2030,7 +2028,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
 		      !test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
 			continue;
 
-		wbio->bi_rw = WRITE;
+		bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
 		wbio->bi_end_io = end_sync_write;
 		atomic_inc(&r1_bio->remaining);
 		md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
@@ -2090,7 +2088,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
 			    is_badblock(rdev, sect, s,
 					&first_bad, &bad_sectors) == 0 &&
 			    sync_page_io(rdev, sect, s<<9,
-					 conf->tmppage, READ, false))
+					 conf->tmppage, REQ_OP_READ, 0, false))
 				success = 1;
 			else {
 				d++;
@@ -2201,14 +2199,15 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
 			wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
 		}
 
-		wbio->bi_rw = WRITE;
+		bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
 		wbio->bi_iter.bi_sector = r1_bio->sector;
 		wbio->bi_iter.bi_size = r1_bio->sectors << 9;
 
 		bio_trim(wbio, sector - r1_bio->sector, sectors);
 		wbio->bi_iter.bi_sector += rdev->data_offset;
 		wbio->bi_bdev = rdev->bdev;
-		if (submit_bio_wait(WRITE, wbio) < 0)
+
+		if (submit_bio_wait(wbio) < 0)
 			/* failure! */
 			ok = rdev_set_badblocks(rdev, sector,
 						sectors, 0)
@@ -2343,7 +2342,7 @@ read_more:
 		bio->bi_iter.bi_sector = r1_bio->sector + rdev->data_offset;
 		bio->bi_bdev = rdev->bdev;
 		bio->bi_end_io = raid1_end_read_request;
-		bio->bi_rw = READ | do_sync;
+		bio_set_op_attrs(bio, REQ_OP_READ, do_sync);
 		bio->bi_private = r1_bio;
 		if (max_sectors < r1_bio->sectors) {
 			/* Drat - have to split this up more */
@@ -2571,7 +2570,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
 			if (i < conf->raid_disks)
 				still_degraded = 1;
 		} else if (!test_bit(In_sync, &rdev->flags)) {
-			bio->bi_rw = WRITE;
+			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 			bio->bi_end_io = end_sync_write;
 			write_targets ++;
 		} else {
@@ -2598,7 +2597,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
 					if (disk < 0)
 						disk = i;
 				}
-				bio->bi_rw = READ;
+				bio_set_op_attrs(bio, REQ_OP_READ, 0);
 				bio->bi_end_io = end_sync_read;
 				read_targets++;
 			} else if (!test_bit(WriteErrorSeen, &rdev->flags) &&
@@ -2610,7 +2609,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
 				 * if we are doing resync or repair. Otherwise, leave
 				 * this device alone for this sync request.
 				 */
-				bio->bi_rw = WRITE;
+				bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 				bio->bi_end_io = end_sync_write;
 				write_targets++;
 			}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c7de2a53e625..26ae74fd0d01 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -865,7 +865,7 @@ static void flush_pending_writes(struct r10conf *conf)
 		while (bio) { /* submit pending writes */
 			struct bio *next = bio->bi_next;
 			bio->bi_next = NULL;
-			if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+			if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 			    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
 				/* Just ignore it */
 				bio_endio(bio);
@@ -1041,7 +1041,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	while (bio) { /* submit pending writes */
 		struct bio *next = bio->bi_next;
 		bio->bi_next = NULL;
-		if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+		if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 		    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
 			/* Just ignore it */
 			bio_endio(bio);
@@ -1058,12 +1058,10 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 	struct r10bio *r10_bio;
 	struct bio *read_bio;
 	int i;
+	const int op = bio_op(bio);
 	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
 	const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
-	const unsigned long do_discard = (bio->bi_rw
-					  & (REQ_DISCARD | REQ_SECURE));
-	const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME);
 	unsigned long flags;
 	struct md_rdev *blocked_rdev;
 	struct blk_plug_cb *cb;
@@ -1156,7 +1154,7 @@ read_again:
 			choose_data_offset(r10_bio, rdev);
 		read_bio->bi_bdev = rdev->bdev;
 		read_bio->bi_end_io = raid10_end_read_request;
-		read_bio->bi_rw = READ | do_sync;
+		bio_set_op_attrs(read_bio, op, do_sync);
 		read_bio->bi_private = r10_bio;
 
 		if (max_sectors < r10_bio->sectors) {
@@ -1363,8 +1361,7 @@ retry_write:
 							      rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
-			mbio->bi_rw =
-				WRITE | do_sync | do_fua | do_discard | do_same;
+			bio_set_op_attrs(mbio, op, do_sync | do_fua);
 			mbio->bi_private = r10_bio;
 
 			atomic_inc(&r10_bio->remaining);
@@ -1406,8 +1403,7 @@ retry_write:
 						   r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
-			mbio->bi_rw =
-				WRITE | do_sync | do_fua | do_discard | do_same;
+			bio_set_op_attrs(mbio, op, do_sync | do_fua);
 			mbio->bi_private = r10_bio;
 
 			atomic_inc(&r10_bio->remaining);
@@ -1450,7 +1446,7 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 
 	struct bio *split;
 
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+	if (unlikely(bio->bi_rw & REQ_PREFLUSH)) {
 		md_flush_request(mddev, bio);
 		return;
 	}
@@ -1992,10 +1988,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 
 		tbio->bi_vcnt = vcnt;
 		tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
-		tbio->bi_rw = WRITE;
 		tbio->bi_private = r10_bio;
 		tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
 		tbio->bi_end_io = end_sync_write;
+		bio_set_op_attrs(tbio, REQ_OP_WRITE, 0);
 
 		bio_copy_data(tbio, fbio);
 
@@ -2078,7 +2074,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
 				  addr,
 				  s << 9,
 				  bio->bi_io_vec[idx].bv_page,
-				  READ, false);
+				  REQ_OP_READ, 0, false);
 		if (ok) {
 			rdev = conf->mirrors[dw].rdev;
 			addr = r10_bio->devs[1].addr + sect;
@@ -2086,7 +2082,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
 					  addr,
 					  s << 9,
 					  bio->bi_io_vec[idx].bv_page,
-					  WRITE, false);
+					  REQ_OP_WRITE, 0, false);
 			if (!ok) {
 				set_bit(WriteErrorSeen, &rdev->flags);
 				if (!test_and_set_bit(WantReplacement,
@@ -2213,7 +2209,7 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
 	if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
 	    && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
 		return -1;
-	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+	if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false))
 		/* success */
 		return 1;
 	if (rw == WRITE) {
@@ -2299,7 +2295,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 						       r10_bio->devs[sl].addr +
 						       sect,
 						       s<<9,
-						       conf->tmppage, READ, false);
+						       conf->tmppage,
+						       REQ_OP_READ, 0, false);
 				rdev_dec_pending(rdev, mddev);
 				rcu_read_lock();
 				if (success)
@@ -2474,7 +2471,9 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
 				   choose_data_offset(r10_bio, rdev) +
 				   (sector - r10_bio->sector));
 		wbio->bi_bdev = rdev->bdev;
-		if (submit_bio_wait(WRITE, wbio) < 0)
+		bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
+
+		if (submit_bio_wait(wbio) < 0)
 			/* Failure! */
 			ok = rdev_set_badblocks(rdev, sector,
 						sectors, 0)
@@ -2548,7 +2547,7 @@ read_more:
 	bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
 		+ choose_data_offset(r10_bio, rdev);
 	bio->bi_bdev = rdev->bdev;
-	bio->bi_rw = READ | do_sync;
+	bio_set_op_attrs(bio, REQ_OP_READ, do_sync);
 	bio->bi_private = r10_bio;
 	bio->bi_end_io = raid10_end_read_request;
 	if (max_sectors < r10_bio->sectors) {
@@ -3038,7 +3037,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 				biolist = bio;
 				bio->bi_private = r10_bio;
 				bio->bi_end_io = end_sync_read;
-				bio->bi_rw = READ;
+				bio_set_op_attrs(bio, REQ_OP_READ, 0);
 				from_addr = r10_bio->devs[j].addr;
 				bio->bi_iter.bi_sector = from_addr +
 					rdev->data_offset;
@@ -3064,7 +3063,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 					biolist = bio;
 					bio->bi_private = r10_bio;
 					bio->bi_end_io = end_sync_write;
-					bio->bi_rw = WRITE;
+					bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 					bio->bi_iter.bi_sector = to_addr
 						+ rdev->data_offset;
 					bio->bi_bdev = rdev->bdev;
@@ -3093,7 +3092,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 				biolist = bio;
 				bio->bi_private = r10_bio;
 				bio->bi_end_io = end_sync_write;
-				bio->bi_rw = WRITE;
+				bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 				bio->bi_iter.bi_sector = to_addr +
 					rdev->data_offset;
 				bio->bi_bdev = rdev->bdev;
@@ -3213,7 +3212,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 			biolist = bio;
 			bio->bi_private = r10_bio;
 			bio->bi_end_io = end_sync_read;
-			bio->bi_rw = READ;
+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 			bio->bi_iter.bi_sector = sector +
 				conf->mirrors[d].rdev->data_offset;
 			bio->bi_bdev = conf->mirrors[d].rdev->bdev;
@@ -3235,7 +3234,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 			biolist = bio;
 			bio->bi_private = r10_bio;
 			bio->bi_end_io = end_sync_write;
-			bio->bi_rw = WRITE;
+			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 			bio->bi_iter.bi_sector = sector +
 				conf->mirrors[d].replacement->data_offset;
 			bio->bi_bdev = conf->mirrors[d].replacement->bdev;
@@ -4320,7 +4319,7 @@ read_more:
 			       + rdev->data_offset);
 	read_bio->bi_private = r10_bio;
 	read_bio->bi_end_io = end_sync_read;
-	read_bio->bi_rw = READ;
+	bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
 	read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
 	read_bio->bi_error = 0;
 	read_bio->bi_vcnt = 0;
@@ -4354,7 +4353,7 @@ read_more:
 			rdev2->new_data_offset;
 		b->bi_private = r10_bio;
 		b->bi_end_io = end_reshape_write;
-		b->bi_rw = WRITE;
+		bio_set_op_attrs(b, REQ_OP_WRITE, 0);
 		b->bi_next = blist;
 		blist = b;
 	}
@@ -4522,7 +4521,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
 					       addr,
 					       s << 9,
 					       bvec[idx].bv_page,
-					       READ, false);
+					       REQ_OP_READ, 0, false);
 			if (success)
 				break;
 		failed:
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index e889e2deb7b3..5504ce2bac06 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -254,14 +254,14 @@ static void r5l_submit_current_io(struct r5l_log *log)
 	__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
 	spin_unlock_irqrestore(&log->io_list_lock, flags);
 
-	submit_bio(WRITE, io->current_bio);
+	submit_bio(io->current_bio);
 }
 
 static struct bio *r5l_bio_alloc(struct r5l_log *log)
 {
 	struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, log->bs);
 
-	bio->bi_rw = WRITE;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	bio->bi_bdev = log->rdev->bdev;
 	bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start;
 
@@ -373,7 +373,7 @@ static void r5l_append_payload_page(struct r5l_log *log, struct page *page)
 		io->current_bio = r5l_bio_alloc(log);
 		bio_chain(io->current_bio, prev);
 
-		submit_bio(WRITE, prev);
+		submit_bio(prev);
 	}
 
 	if (!bio_add_page(io->current_bio, page, PAGE_SIZE, 0))
@@ -536,7 +536,7 @@ int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio)
 		bio_endio(bio);
 		return 0;
 	}
-	bio->bi_rw &= ~REQ_FLUSH;
+	bio->bi_rw &= ~REQ_PREFLUSH;
 	return -EAGAIN;
 }
 
@@ -686,7 +686,8 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
 	bio_reset(&log->flush_bio);
 	log->flush_bio.bi_bdev = log->rdev->bdev;
 	log->flush_bio.bi_end_io = r5l_log_flush_endio;
-	submit_bio(WRITE_FLUSH, &log->flush_bio);
+	bio_set_op_attrs(&log->flush_bio, REQ_OP_WRITE, WRITE_FLUSH);
+	submit_bio(&log->flush_bio);
 }
 
 static void r5l_write_super(struct r5l_log *log, sector_t cp);
@@ -881,7 +882,8 @@ static int r5l_read_meta_block(struct r5l_log *log,
 	struct r5l_meta_block *mb;
 	u32 crc, stored_crc;
 
-	if (!sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, READ, false))
+	if (!sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, REQ_OP_READ, 0,
+			  false))
 		return -EIO;
 
 	mb = page_address(page);
@@ -926,7 +928,8 @@ static int r5l_recovery_flush_one_stripe(struct r5l_log *log,
 					     &disk_index, sh);
 
 			sync_page_io(log->rdev, *log_offset, PAGE_SIZE,
-				     sh->dev[disk_index].page, READ, false);
+				     sh->dev[disk_index].page, REQ_OP_READ, 0,
+				     false);
 			sh->dev[disk_index].log_checksum =
 				le32_to_cpu(payload->checksum[0]);
 			set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
@@ -934,7 +937,8 @@ static int r5l_recovery_flush_one_stripe(struct r5l_log *log,
 		} else {
 			disk_index = sh->pd_idx;
 			sync_page_io(log->rdev, *log_offset, PAGE_SIZE,
-				     sh->dev[disk_index].page, READ, false);
+				     sh->dev[disk_index].page, REQ_OP_READ, 0,
+				     false);
 			sh->dev[disk_index].log_checksum =
 				le32_to_cpu(payload->checksum[0]);
 			set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
@@ -944,7 +948,7 @@ static int r5l_recovery_flush_one_stripe(struct r5l_log *log,
 				sync_page_io(log->rdev,
 					     r5l_ring_add(log, *log_offset, BLOCK_SECTORS),
 					     PAGE_SIZE, sh->dev[disk_index].page,
-					     READ, false);
+					     REQ_OP_READ, 0, false);
 				sh->dev[disk_index].log_checksum =
 					le32_to_cpu(payload->checksum[1]);
 				set_bit(R5_Wantwrite,
@@ -986,11 +990,13 @@ static int r5l_recovery_flush_one_stripe(struct r5l_log *log,
 		rdev = rcu_dereference(conf->disks[disk_index].rdev);
 		if (rdev)
 			sync_page_io(rdev, stripe_sect, PAGE_SIZE,
-				     sh->dev[disk_index].page, WRITE, false);
+				     sh->dev[disk_index].page, REQ_OP_WRITE, 0,
+				     false);
 		rrdev = rcu_dereference(conf->disks[disk_index].replacement);
 		if (rrdev)
 			sync_page_io(rrdev, stripe_sect, PAGE_SIZE,
-				     sh->dev[disk_index].page, WRITE, false);
+				     sh->dev[disk_index].page, REQ_OP_WRITE, 0,
+				     false);
 	}
 	raid5_release_stripe(sh);
 	return 0;
@@ -1062,7 +1068,8 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
 	crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
 	mb->checksum = cpu_to_le32(crc);
 
-	if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, WRITE_FUA, false)) {
+	if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE,
+			  WRITE_FUA, false)) {
 		__free_page(page);
 		return -EIO;
 	}
@@ -1137,7 +1144,7 @@ static int r5l_load_log(struct r5l_log *log)
 	if (!page)
 		return -ENOMEM;
 
-	if (!sync_page_io(rdev, cp, PAGE_SIZE, page, READ, false)) {
+	if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, 0, false)) {
 		ret = -EIO;
 		goto ioerr;
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8959e6dd31dd..6953d78297b0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -806,7 +806,8 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
 	dd_idx = 0;
 	while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
 		dd_idx++;
-	if (head->dev[dd_idx].towrite->bi_rw != sh->dev[dd_idx].towrite->bi_rw)
+	if (head->dev[dd_idx].towrite->bi_rw != sh->dev[dd_idx].towrite->bi_rw ||
+	    bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite))
 		goto unlock_out;
 
 	if (head->batch_head) {
@@ -891,29 +892,28 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 	if (r5l_write_stripe(conf->log, sh) == 0)
 		return;
 	for (i = disks; i--; ) {
-		int rw;
+		int op, op_flags = 0;
 		int replace_only = 0;
 		struct bio *bi, *rbi;
 		struct md_rdev *rdev, *rrdev = NULL;
 
 		sh = head_sh;
 		if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
+			op = REQ_OP_WRITE;
 			if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
-				rw = WRITE_FUA;
-			else
-				rw = WRITE;
+				op_flags = WRITE_FUA;
 			if (test_bit(R5_Discard, &sh->dev[i].flags))
-				rw |= REQ_DISCARD;
+				op = REQ_OP_DISCARD;
 		} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
-			rw = READ;
+			op = REQ_OP_READ;
 		else if (test_and_clear_bit(R5_WantReplace,
 					    &sh->dev[i].flags)) {
-			rw = WRITE;
+			op = REQ_OP_WRITE;
 			replace_only = 1;
 		} else
 			continue;
 		if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
-			rw |= REQ_SYNC;
+			op_flags |= REQ_SYNC;
 
 again:
 		bi = &sh->dev[i].req;
@@ -927,7 +927,7 @@ again:
 			rdev = rrdev;
 			rrdev = NULL;
 		}
-		if (rw & WRITE) {
+		if (op_is_write(op)) {
 			if (replace_only)
 				rdev = NULL;
 			if (rdev == rrdev)
@@ -953,7 +953,7 @@ again:
 		 * need to check for writes.  We never accept write errors
 		 * on the replacement, so we don't to check rrdev.
 		 */
-		while ((rw & WRITE) && rdev &&
+		while (op_is_write(op) && rdev &&
 		       test_bit(WriteErrorSeen, &rdev->flags)) {
 			sector_t first_bad;
 			int bad_sectors;
@@ -995,13 +995,13 @@ again:
 
 			bio_reset(bi);
 			bi->bi_bdev = rdev->bdev;
-			bi->bi_rw = rw;
-			bi->bi_end_io = (rw & WRITE)
+			bio_set_op_attrs(bi, op, op_flags);
+			bi->bi_end_io = op_is_write(op)
 				? raid5_end_write_request
 				: raid5_end_read_request;
 			bi->bi_private = sh;
 
-			pr_debug("%s: for %llu schedule op %ld on disc %d\n",
+			pr_debug("%s: for %llu schedule op %d on disc %d\n",
 				__func__, (unsigned long long)sh->sector,
 				bi->bi_rw, i);
 			atomic_inc(&sh->count);
@@ -1027,7 +1027,7 @@ again:
 			 * If this is discard request, set bi_vcnt 0. We don't
 			 * want to confuse SCSI because SCSI will replace payload
 			 */
-			if (rw & REQ_DISCARD)
+			if (op == REQ_OP_DISCARD)
 				bi->bi_vcnt = 0;
 			if (rrdev)
 				set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
@@ -1047,12 +1047,12 @@ again:
 
 			bio_reset(rbi);
 			rbi->bi_bdev = rrdev->bdev;
-			rbi->bi_rw = rw;
-			BUG_ON(!(rw & WRITE));
+			bio_set_op_attrs(rbi, op, op_flags);
+			BUG_ON(!op_is_write(op));
 			rbi->bi_end_io = raid5_end_write_request;
 			rbi->bi_private = sh;
 
-			pr_debug("%s: for %llu schedule op %ld on "
+			pr_debug("%s: for %llu schedule op %d on "
 				 "replacement disc %d\n",
 				__func__, (unsigned long long)sh->sector,
 				rbi->bi_rw, i);
@@ -1076,7 +1076,7 @@ again:
 			 * If this is discard request, set bi_vcnt 0. We don't
 			 * want to confuse SCSI because SCSI will replace payload
 			 */
-			if (rw & REQ_DISCARD)
+			if (op == REQ_OP_DISCARD)
 				rbi->bi_vcnt = 0;
 			if (conf->mddev->gendisk)
 				trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
@@ -1085,9 +1085,9 @@ again:
 			generic_make_request(rbi);
 		}
 		if (!rdev && !rrdev) {
-			if (rw & WRITE)
+			if (op_is_write(op))
 				set_bit(STRIPE_DEGRADED, &sh->state);
-			pr_debug("skip op %ld on disc %d for sector %llu\n",
+			pr_debug("skip op %d on disc %d for sector %llu\n",
 				bi->bi_rw, i, (unsigned long long)sh->sector);
 			clear_bit(R5_LOCKED, &sh->dev[i].flags);
 			set_bit(STRIPE_HANDLE, &sh->state);
@@ -1623,7 +1623,7 @@ again:
 					set_bit(R5_WantFUA, &dev->flags);
 				if (wbi->bi_rw & REQ_SYNC)
 					set_bit(R5_SyncIO, &dev->flags);
-				if (wbi->bi_rw & REQ_DISCARD)
+				if (bio_op(wbi) == REQ_OP_DISCARD)
 					set_bit(R5_Discard, &dev->flags);
 				else {
 					tx = async_copy_data(1, wbi, &dev->page,
@@ -5150,7 +5150,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 	DEFINE_WAIT(w);
 	bool do_prepare;
 
-	if (unlikely(bi->bi_rw & REQ_FLUSH)) {
+	if (unlikely(bi->bi_rw & REQ_PREFLUSH)) {
 		int ret = r5l_handle_flush_request(conf->log, bi);
 
 		if (ret == 0)
@@ -5176,7 +5176,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 			return;
 	}
 
-	if (unlikely(bi->bi_rw & REQ_DISCARD)) {
+	if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) {
 		make_discard_request(mddev, bi);
 		return;
 	}
@@ -5233,7 +5233,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 			(unsigned long long)logical_sector);
 
 		sh = raid5_get_active_stripe(conf, new_sector, previous,
-				       (bi->bi_rw&RWA_MASK), 0);
+				       (bi->bi_rw & REQ_RAHEAD), 0);
 		if (sh) {
 			if (unlikely(previous)) {
 				/* expansion might have moved on while waiting for a
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index a8518fb3bca7..962f2a9a6614 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -80,6 +80,9 @@ config MEDIA_RC_SUPPORT
 
 	  Say Y when you have a TV or an IR device.
 
+config MEDIA_CEC_EDID
+	bool
+
 #
 # Media controller
 #	Selectable only for webcam/grabbers, as other drivers don't use it
diff --git a/drivers/media/Makefile b/drivers/media/Makefile
index e608bbce0c35..081a7866fd44 100644
--- a/drivers/media/Makefile
+++ b/drivers/media/Makefile
@@ -2,6 +2,10 @@
 # Makefile for the kernel multimedia device drivers.
 #
 
+ifeq ($(CONFIG_MEDIA_CEC_EDID),y)
+  obj-$(CONFIG_MEDIA_SUPPORT) += cec-edid.o
+endif
+
 media-objs	:= media-device.o media-devnode.o media-entity.o
 
 #
diff --git a/drivers/media/cec-edid.c b/drivers/media/cec-edid.c
new file mode 100644
index 000000000000..70018247bdda
--- /dev/null
+++ b/drivers/media/cec-edid.c
@@ -0,0 +1,168 @@
+/*
+ * cec-edid - HDMI Consumer Electronics Control EDID & CEC helper functions
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <media/cec-edid.h>
+
+/*
+ * This EDID is expected to be a CEA-861 compliant, which means that there are
+ * at least two blocks and one or more of the extensions blocks are CEA-861
+ * blocks.
+ *
+ * The returned location is guaranteed to be < size - 1.
+ */
+static unsigned int cec_get_edid_spa_location(const u8 *edid, unsigned int size)
+{
+	unsigned int blocks = size / 128;
+	unsigned int block;
+	u8 d;
+
+	/* Sanity check: at least 2 blocks and a multiple of the block size */
+	if (blocks < 2 || size % 128)
+		return 0;
+
+	/*
+	 * If there are fewer extension blocks than the size, then update
+	 * 'blocks'. It is allowed to have more extension blocks than the size,
+	 * since some hardware can only read e.g. 256 bytes of the EDID, even
+	 * though more blocks are present. The first CEA-861 extension block
+	 * should normally be in block 1 anyway.
+	 */
+	if (edid[0x7e] + 1 < blocks)
+		blocks = edid[0x7e] + 1;
+
+	for (block = 1; block < blocks; block++) {
+		unsigned int offset = block * 128;
+
+		/* Skip any non-CEA-861 extension blocks */
+		if (edid[offset] != 0x02 || edid[offset + 1] != 0x03)
+			continue;
+
+		/* search Vendor Specific Data Block (tag 3) */
+		d = edid[offset + 2] & 0x7f;
+		/* Check if there are Data Blocks */
+		if (d <= 4)
+			continue;
+		if (d > 4) {
+			unsigned int i = offset + 4;
+			unsigned int end = offset + d;
+
+			/* Note: 'end' is always < 'size' */
+			do {
+				u8 tag = edid[i] >> 5;
+				u8 len = edid[i] & 0x1f;
+
+				if (tag == 3 && len >= 5 && i + len <= end)
+					return i + 4;
+				i += len + 1;
+			} while (i < end);
+		}
+	}
+	return 0;
+}
+
+u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size,
+			   unsigned int *offset)
+{
+	unsigned int loc = cec_get_edid_spa_location(edid, size);
+
+	if (offset)
+		*offset = loc;
+	if (loc == 0)
+		return CEC_PHYS_ADDR_INVALID;
+	return (edid[loc] << 8) | edid[loc + 1];
+}
+EXPORT_SYMBOL_GPL(cec_get_edid_phys_addr);
+
+void cec_set_edid_phys_addr(u8 *edid, unsigned int size, u16 phys_addr)
+{
+	unsigned int loc = cec_get_edid_spa_location(edid, size);
+	u8 sum = 0;
+	unsigned int i;
+
+	if (loc == 0)
+		return;
+	edid[loc] = phys_addr >> 8;
+	edid[loc + 1] = phys_addr & 0xff;
+	loc &= ~0x7f;
+
+	/* update the checksum */
+	for (i = loc; i < loc + 127; i++)
+		sum += edid[i];
+	edid[i] = 256 - sum;
+}
+EXPORT_SYMBOL_GPL(cec_set_edid_phys_addr);
+
+u16 cec_phys_addr_for_input(u16 phys_addr, u8 input)
+{
+	/* Check if input is sane */
+	if (WARN_ON(input == 0 || input > 0xf))
+		return CEC_PHYS_ADDR_INVALID;
+
+	if (phys_addr == 0)
+		return input << 12;
+
+	if ((phys_addr & 0x0fff) == 0)
+		return phys_addr | (input << 8);
+
+	if ((phys_addr & 0x00ff) == 0)
+		return phys_addr | (input << 4);
+
+	if ((phys_addr & 0x000f) == 0)
+		return phys_addr | input;
+
+	/*
+	 * All nibbles are used so no valid physical addresses can be assigned
+	 * to the input.
+	 */
+	return CEC_PHYS_ADDR_INVALID;
+}
+EXPORT_SYMBOL_GPL(cec_phys_addr_for_input);
+
+int cec_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port)
+{
+	int i;
+
+	if (parent)
+		*parent = phys_addr;
+	if (port)
+		*port = 0;
+	if (phys_addr == CEC_PHYS_ADDR_INVALID)
+		return 0;
+	for (i = 0; i < 16; i += 4)
+		if (phys_addr & (0xf << i))
+			break;
+	if (i == 16)
+		return 0;
+	if (parent)
+		*parent = phys_addr & (0xfff0 << i);
+	if (port)
+		*port = (phys_addr >> i) & 0xf;
+	for (i += 4; i < 16; i += 4)
+		if ((phys_addr & (0xf << i)) == 0)
+			return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cec_phys_addr_validate);
+
+MODULE_AUTHOR("Hans Verkuil <hans.verkuil@cisco.com>");
+MODULE_DESCRIPTION("CEC EDID helper functions");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
index cf1dadd0be9e..3ec3cebe62b9 100644
--- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
+++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
@@ -777,7 +777,7 @@ static void precalculate_color(struct tpg_data *tpg, int k)
 	 * Remember that r, g and b are still in the 0 - 0xff0 range.
 	 */
 	if (tpg->real_rgb_range == V4L2_DV_RGB_RANGE_LIMITED &&
-	    tpg->rgb_range == V4L2_DV_RGB_RANGE_FULL) {
+	    tpg->rgb_range == V4L2_DV_RGB_RANGE_FULL && !tpg->is_yuv) {
 		/*
 		 * Convert from full range (which is what r, g and b are)
 		 * to limited range (which is the 'real' RGB range), which
@@ -787,7 +787,7 @@ static void precalculate_color(struct tpg_data *tpg, int k)
 		g = (g * 219) / 255 + (16 << 4);
 		b = (b * 219) / 255 + (16 << 4);
 	} else if (tpg->real_rgb_range != V4L2_DV_RGB_RANGE_LIMITED &&
-		   tpg->rgb_range == V4L2_DV_RGB_RANGE_LIMITED) {
+		   tpg->rgb_range == V4L2_DV_RGB_RANGE_LIMITED && !tpg->is_yuv) {
 		/*
 		 * Clamp r, g and b to the limited range and convert to full
 		 * range since that's what we deliver.
diff --git a/drivers/media/dvb-core/demux.h b/drivers/media/dvb-core/demux.h
index 6d3b95b8939d..7f1dffef4353 100644
--- a/drivers/media/dvb-core/demux.h
+++ b/drivers/media/dvb-core/demux.h
@@ -143,7 +143,7 @@ struct dmx_ts_feed {
 		   int type,
 		   enum dmx_ts_pes pes_type,
 		   size_t circular_buffer_size,
-		   struct timespec timeout);
+		   ktime_t timeout);
 	int (*start_filtering)(struct dmx_ts_feed *feed);
 	int (*stop_filtering)(struct dmx_ts_feed *feed);
 };
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index a168cbe1c998..7b67e1dd97fd 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -556,7 +556,7 @@ static int dvb_dmxdev_start_feed(struct dmxdev *dmxdev,
 				 struct dmxdev_filter *filter,
 				 struct dmxdev_feed *feed)
 {
-	struct timespec timeout = { 0 };
+	ktime_t timeout = ktime_set(0, 0);
 	struct dmx_pes_filter_params *para = &filter->params.pes;
 	dmx_output_t otype;
 	int ret;
diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c
index f82cd1ff4f3a..b5b5b195ea7f 100644
--- a/drivers/media/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb-core/dvb_ca_en50221.c
@@ -123,6 +123,7 @@ struct dvb_ca_slot {
 
 /* Private CA-interface information */
 struct dvb_ca_private {
+	struct kref refcount;
 
 	/* pointer back to the public data structure */
 	struct dvb_ca_en50221 *pub;
@@ -161,6 +162,34 @@ struct dvb_ca_private {
 	struct mutex ioctl_mutex;
 };
 
+static void dvb_ca_private_free(struct dvb_ca_private *ca)
+{
+	unsigned int i;
+
+	dvb_unregister_device(ca->dvbdev);
+	for (i = 0; i < ca->slot_count; i++)
+		vfree(ca->slot_info[i].rx_buffer.data);
+
+	kfree(ca->slot_info);
+	kfree(ca);
+}
+
+static void dvb_ca_private_release(struct kref *ref)
+{
+	struct dvb_ca_private *ca = container_of(ref, struct dvb_ca_private, refcount);
+	dvb_ca_private_free(ca);
+}
+
+static void dvb_ca_private_get(struct dvb_ca_private *ca)
+{
+	kref_get(&ca->refcount);
+}
+
+static void dvb_ca_private_put(struct dvb_ca_private *ca)
+{
+	kref_put(&ca->refcount, dvb_ca_private_release);
+}
+
 static void dvb_ca_en50221_thread_wakeup(struct dvb_ca_private *ca);
 static int dvb_ca_en50221_read_data(struct dvb_ca_private *ca, int slot, u8 * ebuf, int ecount);
 static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot, u8 * ebuf, int ecount);
@@ -1558,6 +1587,8 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file)
 	dvb_ca_en50221_thread_update_delay(ca);
 	dvb_ca_en50221_thread_wakeup(ca);
 
+	dvb_ca_private_get(ca);
+
 	return 0;
 }
 
@@ -1586,6 +1617,8 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file)
 
 	module_put(ca->pub->owner);
 
+	dvb_ca_private_put(ca);
+
 	return err;
 }
 
@@ -1681,6 +1714,7 @@ int dvb_ca_en50221_init(struct dvb_adapter *dvb_adapter,
 		ret = -ENOMEM;
 		goto exit;
 	}
+	kref_init(&ca->refcount);
 	ca->pub = pubca;
 	ca->flags = flags;
 	ca->slot_count = slot_count;
@@ -1759,10 +1793,7 @@ void dvb_ca_en50221_release(struct dvb_ca_en50221 *pubca)
 
 	for (i = 0; i < ca->slot_count; i++) {
 		dvb_ca_en50221_slot_shutdown(ca, i);
-		vfree(ca->slot_info[i].rx_buffer.data);
 	}
-	kfree(ca->slot_info);
-	dvb_unregister_device(ca->dvbdev);
-	kfree(ca);
+	dvb_ca_private_put(ca);
 	pubca->private = NULL;
 }
diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c
index 0cc5e935166c..a0cf7b0d03e8 100644
--- a/drivers/media/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb-core/dvb_demux.c
@@ -398,28 +398,23 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
 	int dvr_done = 0;
 
 	if (dvb_demux_speedcheck) {
-		struct timespec cur_time, delta_time;
+		ktime_t cur_time;
 		u64 speed_bytes, speed_timedelta;
 
 		demux->speed_pkts_cnt++;
 
 		/* show speed every SPEED_PKTS_INTERVAL packets */
 		if (!(demux->speed_pkts_cnt % SPEED_PKTS_INTERVAL)) {
-			cur_time = current_kernel_time();
+			cur_time = ktime_get();
 
-			if (demux->speed_last_time.tv_sec != 0 &&
-					demux->speed_last_time.tv_nsec != 0) {
-				delta_time = timespec_sub(cur_time,
-						demux->speed_last_time);
+			if (ktime_to_ns(demux->speed_last_time) != 0) {
 				speed_bytes = (u64)demux->speed_pkts_cnt
 					* 188 * 8;
 				/* convert to 1024 basis */
 				speed_bytes = 1000 * div64_u64(speed_bytes,
 						1024);
-				speed_timedelta =
-					(u64)timespec_to_ns(&delta_time);
-				speed_timedelta = div64_u64(speed_timedelta,
-						1000000); /* nsec -> usec */
+				speed_timedelta = ktime_ms_delta(cur_time,
+							demux->speed_last_time);
 				printk(KERN_INFO "TS speed %llu Kbits/sec \n",
 						div64_u64(speed_bytes,
 							speed_timedelta));
@@ -666,7 +661,7 @@ out:
 
 static int dmx_ts_feed_set(struct dmx_ts_feed *ts_feed, u16 pid, int ts_type,
 			   enum dmx_ts_pes pes_type,
-			   size_t circular_buffer_size, struct timespec timeout)
+			   size_t circular_buffer_size, ktime_t timeout)
 {
 	struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed;
 	struct dvb_demux *demux = feed->demux;
diff --git a/drivers/media/dvb-core/dvb_demux.h b/drivers/media/dvb-core/dvb_demux.h
index ae7fc33c3231..5ed3cab4ad28 100644
--- a/drivers/media/dvb-core/dvb_demux.h
+++ b/drivers/media/dvb-core/dvb_demux.h
@@ -83,7 +83,7 @@ struct dvb_demux_feed {
 	u8 *buffer;
 	int buffer_size;
 
-	struct timespec timeout;
+	ktime_t timeout;
 	struct dvb_demux_filter *filter;
 
 	int ts_type;
@@ -134,7 +134,7 @@ struct dvb_demux {
 
 	uint8_t *cnt_storage; /* for TS continuity check */
 
-	struct timespec speed_last_time; /* for TS speed check */
+	ktime_t speed_last_time; /* for TS speed check */
 	uint32_t speed_pkts_cnt; /* for TS speed check */
 };
 
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index c0142614c408..be99c8dbc5f8 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -99,6 +99,7 @@ MODULE_PARM_DESC(dvb_mfe_wait_time, "Wait up to <mfe_wait_time> seconds on open(
 static DEFINE_MUTEX(frontend_mutex);
 
 struct dvb_frontend_private {
+	struct kref refcount;
 
 	/* thread/frontend values */
 	struct dvb_device *dvbdev;
@@ -137,6 +138,23 @@ struct dvb_frontend_private {
 #endif
 };
 
+static void dvb_frontend_private_free(struct kref *ref)
+{
+	struct dvb_frontend_private *fepriv =
+		container_of(ref, struct dvb_frontend_private, refcount);
+	kfree(fepriv);
+}
+
+static void dvb_frontend_private_put(struct dvb_frontend_private *fepriv)
+{
+	kref_put(&fepriv->refcount, dvb_frontend_private_free);
+}
+
+static void dvb_frontend_private_get(struct dvb_frontend_private *fepriv)
+{
+	kref_get(&fepriv->refcount);
+}
+
 static void dvb_frontend_wakeup(struct dvb_frontend *fe);
 static int dtv_get_frontend(struct dvb_frontend *fe,
 			    struct dtv_frontend_properties *c,
@@ -2543,6 +2561,8 @@ static int dvb_frontend_open(struct inode *inode, struct file *file)
 		fepriv->events.eventr = fepriv->events.eventw = 0;
 	}
 
+	dvb_frontend_private_get(fepriv);
+
 	if (adapter->mfe_shared)
 		mutex_unlock (&adapter->mfe_lock);
 	return ret;
@@ -2591,6 +2611,8 @@ static int dvb_frontend_release(struct inode *inode, struct file *file)
 			fe->ops.ts_bus_ctrl(fe, 0);
 	}
 
+	dvb_frontend_private_put(fepriv);
+
 	return ret;
 }
 
@@ -2679,6 +2701,8 @@ int dvb_register_frontend(struct dvb_adapter* dvb,
 	}
 	fepriv = fe->frontend_priv;
 
+	kref_init(&fepriv->refcount);
+
 	sema_init(&fepriv->sem, 1);
 	init_waitqueue_head (&fepriv->wait_queue);
 	init_waitqueue_head (&fepriv->events.wait_queue);
@@ -2713,18 +2737,11 @@ int dvb_unregister_frontend(struct dvb_frontend* fe)
 
 	mutex_lock(&frontend_mutex);
 	dvb_frontend_stop (fe);
-	mutex_unlock(&frontend_mutex);
-
-	if (fepriv->dvbdev->users < -1)
-		wait_event(fepriv->dvbdev->wait_queue,
-				fepriv->dvbdev->users==-1);
-
-	mutex_lock(&frontend_mutex);
 	dvb_unregister_device (fepriv->dvbdev);
 
 	/* fe is invalid now */
-	kfree(fepriv);
 	mutex_unlock(&frontend_mutex);
+	dvb_frontend_private_put(fepriv);
 	return 0;
 }
 EXPORT_SYMBOL(dvb_unregister_frontend);
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index ce6a711b42d4..9914f69a4a02 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -997,7 +997,7 @@ static int dvb_net_feed_start(struct net_device *dev)
 		netdev_dbg(dev, "start filtering\n");
 		priv->secfeed->start_filtering(priv->secfeed);
 	} else if (priv->feedtype == DVB_NET_FEEDTYPE_ULE) {
-		struct timespec timeout = { 0, 10000000 }; // 10 msec
+		ktime_t timeout = ns_to_ktime(10 * NSEC_PER_MSEC);
 
 		/* we have payloads encapsulated in TS */
 		netdev_dbg(dev, "alloc tsfeed\n");
diff --git a/drivers/media/dvb-core/dvb_ringbuffer.c b/drivers/media/dvb-core/dvb_ringbuffer.c
index 1100e98a7b1d..7df7fb3738a0 100644
--- a/drivers/media/dvb-core/dvb_ringbuffer.c
+++ b/drivers/media/dvb-core/dvb_ringbuffer.c
@@ -55,7 +55,13 @@ void dvb_ringbuffer_init(struct dvb_ringbuffer *rbuf, void *data, size_t len)
 
 int dvb_ringbuffer_empty(struct dvb_ringbuffer *rbuf)
 {
-	return (rbuf->pread==rbuf->pwrite);
+	/* smp_load_acquire() to load write pointer on reader side
+	 * this pairs with smp_store_release() in dvb_ringbuffer_write(),
+	 * dvb_ringbuffer_write_user(), or dvb_ringbuffer_reset()
+	 *
+	 * for memory barriers also see Documentation/circular-buffers.txt
+	 */
+	return (rbuf->pread == smp_load_acquire(&rbuf->pwrite));
 }
 
 
@@ -64,7 +70,12 @@ ssize_t dvb_ringbuffer_free(struct dvb_ringbuffer *rbuf)
 {
 	ssize_t free;
 
-	free = rbuf->pread - rbuf->pwrite;
+	/* ACCESS_ONCE() to load read pointer on writer side
+	 * this pairs with smp_store_release() in dvb_ringbuffer_read(),
+	 * dvb_ringbuffer_read_user(), dvb_ringbuffer_flush(),
+	 * or dvb_ringbuffer_reset()
+	 */
+	free = ACCESS_ONCE(rbuf->pread) - rbuf->pwrite;
 	if (free <= 0)
 		free += rbuf->size;
 	return free-1;
@@ -76,7 +87,11 @@ ssize_t dvb_ringbuffer_avail(struct dvb_ringbuffer *rbuf)
 {
 	ssize_t avail;
 
-	avail = rbuf->pwrite - rbuf->pread;
+	/* smp_load_acquire() to load write pointer on reader side
+	 * this pairs with smp_store_release() in dvb_ringbuffer_write(),
+	 * dvb_ringbuffer_write_user(), or dvb_ringbuffer_reset()
+	 */
+	avail = smp_load_acquire(&rbuf->pwrite) - rbuf->pread;
 	if (avail < 0)
 		avail += rbuf->size;
 	return avail;
@@ -86,14 +101,25 @@ ssize_t dvb_ringbuffer_avail(struct dvb_ringbuffer *rbuf)
 
 void dvb_ringbuffer_flush(struct dvb_ringbuffer *rbuf)
 {
-	rbuf->pread = rbuf->pwrite;
+	/* dvb_ringbuffer_flush() counts as read operation
+	 * smp_load_acquire() to load write pointer
+	 * smp_store_release() to update read pointer, this ensures that the
+	 * correct pointer is visible for subsequent dvb_ringbuffer_free()
+	 * calls on other cpu cores
+	 */
+	smp_store_release(&rbuf->pread, smp_load_acquire(&rbuf->pwrite));
 	rbuf->error = 0;
 }
 EXPORT_SYMBOL(dvb_ringbuffer_flush);
 
 void dvb_ringbuffer_reset(struct dvb_ringbuffer *rbuf)
 {
-	rbuf->pread = rbuf->pwrite = 0;
+	/* dvb_ringbuffer_reset() counts as read and write operation
+	 * smp_store_release() to update read pointer
+	 */
+	smp_store_release(&rbuf->pread, 0);
+	/* smp_store_release() to update write pointer */
+	smp_store_release(&rbuf->pwrite, 0);
 	rbuf->error = 0;
 }
 
@@ -119,12 +145,17 @@ ssize_t dvb_ringbuffer_read_user(struct dvb_ringbuffer *rbuf, u8 __user *buf, si
 			return -EFAULT;
 		buf += split;
 		todo -= split;
-		rbuf->pread = 0;
+		/* smp_store_release() for read pointer update to ensure
+		 * that buf is not overwritten until read is complete,
+		 * this pairs with ACCESS_ONCE() in dvb_ringbuffer_free()
+		 */
+		smp_store_release(&rbuf->pread, 0);
 	}
 	if (copy_to_user(buf, rbuf->data+rbuf->pread, todo))
 		return -EFAULT;
 
-	rbuf->pread = (rbuf->pread + todo) % rbuf->size;
+	/* smp_store_release() to update read pointer, see above */
+	smp_store_release(&rbuf->pread, (rbuf->pread + todo) % rbuf->size);
 
 	return len;
 }
@@ -139,11 +170,16 @@ void dvb_ringbuffer_read(struct dvb_ringbuffer *rbuf, u8 *buf, size_t len)
 		memcpy(buf, rbuf->data+rbuf->pread, split);
 		buf += split;
 		todo -= split;
-		rbuf->pread = 0;
+		/* smp_store_release() for read pointer update to ensure
+		 * that buf is not overwritten until read is complete,
+		 * this pairs with ACCESS_ONCE() in dvb_ringbuffer_free()
+		 */
+		smp_store_release(&rbuf->pread, 0);
 	}
 	memcpy(buf, rbuf->data+rbuf->pread, todo);
 
-	rbuf->pread = (rbuf->pread + todo) % rbuf->size;
+	/* smp_store_release() to update read pointer, see above */
+	smp_store_release(&rbuf->pread, (rbuf->pread + todo) % rbuf->size);
 }
 
 
@@ -158,10 +194,16 @@ ssize_t dvb_ringbuffer_write(struct dvb_ringbuffer *rbuf, const u8 *buf, size_t
 		memcpy(rbuf->data+rbuf->pwrite, buf, split);
 		buf += split;
 		todo -= split;
-		rbuf->pwrite = 0;
+		/* smp_store_release() for write pointer update to ensure that
+		 * written data is visible on other cpu cores before the pointer
+		 * update, this pairs with smp_load_acquire() in
+		 * dvb_ringbuffer_empty() or dvb_ringbuffer_avail()
+		 */
+		smp_store_release(&rbuf->pwrite, 0);
 	}
 	memcpy(rbuf->data+rbuf->pwrite, buf, todo);
-	rbuf->pwrite = (rbuf->pwrite + todo) % rbuf->size;
+	/* smp_store_release() for write pointer update, see above */
+	smp_store_release(&rbuf->pwrite, (rbuf->pwrite + todo) % rbuf->size);
 
 	return len;
 }
@@ -181,12 +223,18 @@ ssize_t dvb_ringbuffer_write_user(struct dvb_ringbuffer *rbuf,
 			return len - todo;
 		buf += split;
 		todo -= split;
-		rbuf->pwrite = 0;
+		/* smp_store_release() for write pointer update to ensure that
+		 * written data is visible on other cpu cores before the pointer
+		 * update, this pairs with smp_load_acquire() in
+		 * dvb_ringbuffer_empty() or dvb_ringbuffer_avail()
+		 */
+		smp_store_release(&rbuf->pwrite, 0);
 	}
 	status = copy_from_user(rbuf->data+rbuf->pwrite, buf, todo);
 	if (status)
 		return len - todo;
-	rbuf->pwrite = (rbuf->pwrite + todo) % rbuf->size;
+	/* smp_store_release() for write pointer update, see above */
+	smp_store_release(&rbuf->pwrite, (rbuf->pwrite + todo) % rbuf->size);
 
 	return len;
 }
diff --git a/drivers/media/dvb-frontends/Kconfig b/drivers/media/dvb-frontends/Kconfig
index a82f77c49bd5..c645aa81f423 100644
--- a/drivers/media/dvb-frontends/Kconfig
+++ b/drivers/media/dvb-frontends/Kconfig
@@ -73,6 +73,14 @@ config DVB_SI2165
 
 	  Say Y when you want to support this frontend.
 
+config DVB_MN88472
+	tristate "Panasonic MN88472"
+	depends on DVB_CORE && I2C
+	select REGMAP_I2C
+	default m if !MEDIA_SUBDRV_AUTOSELECT
+	help
+	  Say Y when you want to support this frontend.
+
 config DVB_MN88473
 	tristate "Panasonic MN88473"
 	depends on DVB_CORE && I2C
@@ -853,6 +861,13 @@ config DVB_ASCOT2E
 	help
 	  Say Y when you want to support this frontend.
 
+config DVB_HELENE
+	tristate "Sony HELENE Sat/Ter tuner (CXD2858ER)"
+	depends on DVB_CORE && I2C
+	default m if !MEDIA_SUBDRV_AUTOSELECT
+	help
+	Say Y when you want to support this frontend.
+
 comment "Tools to develop new frontends"
 
 config DVB_DUMMY_FE
diff --git a/drivers/media/dvb-frontends/Makefile b/drivers/media/dvb-frontends/Makefile
index eb7191f4219d..e90165ad361b 100644
--- a/drivers/media/dvb-frontends/Makefile
+++ b/drivers/media/dvb-frontends/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_DVB_STV0900) += stv0900.o
 obj-$(CONFIG_DVB_STV090x) += stv090x.o
 obj-$(CONFIG_DVB_STV6110x) += stv6110x.o
 obj-$(CONFIG_DVB_M88DS3103) += m88ds3103.o
+obj-$(CONFIG_DVB_MN88472) += mn88472.o
 obj-$(CONFIG_DVB_MN88473) += mn88473.o
 obj-$(CONFIG_DVB_ISL6423) += isl6423.o
 obj-$(CONFIG_DVB_EC100) += ec100.o
@@ -123,3 +124,4 @@ obj-$(CONFIG_DVB_AS102_FE) += as102_fe.o
 obj-$(CONFIG_DVB_TC90522) += tc90522.o
 obj-$(CONFIG_DVB_HORUS3A) += horus3a.o
 obj-$(CONFIG_DVB_ASCOT2E) += ascot2e.o
+obj-$(CONFIG_DVB_HELENE) += helene.o
diff --git a/drivers/media/dvb-frontends/af9033.c b/drivers/media/dvb-frontends/af9033.c
index efebe5ce2429..9a8157a5f49d 100644
--- a/drivers/media/dvb-frontends/af9033.c
+++ b/drivers/media/dvb-frontends/af9033.c
@@ -41,7 +41,6 @@ struct af9033_dev {
 	u64 post_bit_count;
 	u64 error_block_count;
 	u64 total_block_count;
-	struct delayed_work stat_work;
 };
 
 /* write multiple registers */
@@ -468,8 +467,6 @@ static int af9033_init(struct dvb_frontend *fe)
 	c->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
 	c->post_bit_error.len = 1;
 	c->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-	/* start statistics polling */
-	schedule_delayed_work(&dev->stat_work, msecs_to_jiffies(2000));
 
 	return 0;
 
@@ -485,9 +482,6 @@ static int af9033_sleep(struct dvb_frontend *fe)
 	int ret, i;
 	u8 tmp;
 
-	/* stop statistics polling */
-	cancel_delayed_work_sync(&dev->stat_work);
-
 	ret = af9033_wr_reg(dev, 0x80004c, 1);
 	if (ret < 0)
 		goto err;
@@ -821,36 +815,39 @@ err:
 static int af9033_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct af9033_dev *dev = fe->demodulator_priv;
-	int ret;
-	u8 tmp;
+	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
+	int ret, i, tmp = 0;
+	u8 u8tmp, buf[7];
+
+	dev_dbg(&dev->client->dev, "\n");
 
 	*status = 0;
 
 	/* radio channel status, 0=no result, 1=has signal, 2=no signal */
-	ret = af9033_rd_reg(dev, 0x800047, &tmp);
+	ret = af9033_rd_reg(dev, 0x800047, &u8tmp);
 	if (ret < 0)
 		goto err;
 
 	/* has signal */
-	if (tmp == 0x01)
+	if (u8tmp == 0x01)
 		*status |= FE_HAS_SIGNAL;
 
-	if (tmp != 0x02) {
+	if (u8tmp != 0x02) {
 		/* TPS lock */
-		ret = af9033_rd_reg_mask(dev, 0x80f5a9, &tmp, 0x01);
+		ret = af9033_rd_reg_mask(dev, 0x80f5a9, &u8tmp, 0x01);
 		if (ret < 0)
 			goto err;
 
-		if (tmp)
+		if (u8tmp)
 			*status |= FE_HAS_SIGNAL | FE_HAS_CARRIER |
 					FE_HAS_VITERBI;
 
 		/* full lock */
-		ret = af9033_rd_reg_mask(dev, 0x80f999, &tmp, 0x01);
+		ret = af9033_rd_reg_mask(dev, 0x80f999, &u8tmp, 0x01);
 		if (ret < 0)
 			goto err;
 
-		if (tmp)
+		if (u8tmp)
 			*status |= FE_HAS_SIGNAL | FE_HAS_CARRIER |
 					FE_HAS_VITERBI | FE_HAS_SYNC |
 					FE_HAS_LOCK;
@@ -858,6 +855,148 @@ static int af9033_read_status(struct dvb_frontend *fe, enum fe_status *status)
 
 	dev->fe_status = *status;
 
+	/* signal strength */
+	if (dev->fe_status & FE_HAS_SIGNAL) {
+		if (dev->is_af9035) {
+			ret = af9033_rd_reg(dev, 0x80004a, &u8tmp);
+			if (ret)
+				goto err;
+			tmp = -u8tmp * 1000;
+		} else {
+			ret = af9033_rd_reg(dev, 0x8000f7, &u8tmp);
+			if (ret)
+				goto err;
+			tmp = (u8tmp - 100) * 1000;
+		}
+
+		c->strength.len = 1;
+		c->strength.stat[0].scale = FE_SCALE_DECIBEL;
+		c->strength.stat[0].svalue = tmp;
+	} else {
+		c->strength.len = 1;
+		c->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	}
+
+	/* CNR */
+	if (dev->fe_status & FE_HAS_VITERBI) {
+		u32 snr_val, snr_lut_size;
+		const struct val_snr *snr_lut = NULL;
+
+		/* read value */
+		ret = af9033_rd_regs(dev, 0x80002c, buf, 3);
+		if (ret)
+			goto err;
+
+		snr_val = (buf[2] << 16) | (buf[1] << 8) | (buf[0] << 0);
+
+		/* read superframe number */
+		ret = af9033_rd_reg(dev, 0x80f78b, &u8tmp);
+		if (ret)
+			goto err;
+
+		if (u8tmp)
+			snr_val /= u8tmp;
+
+		/* read current transmission mode */
+		ret = af9033_rd_reg(dev, 0x80f900, &u8tmp);
+		if (ret)
+			goto err;
+
+		switch ((u8tmp >> 0) & 3) {
+		case 0:
+			snr_val *= 4;
+			break;
+		case 1:
+			snr_val *= 1;
+			break;
+		case 2:
+			snr_val *= 2;
+			break;
+		default:
+			snr_val *= 0;
+			break;
+		}
+
+		/* read current modulation */
+		ret = af9033_rd_reg(dev, 0x80f903, &u8tmp);
+		if (ret)
+			goto err;
+
+		switch ((u8tmp >> 0) & 3) {
+		case 0:
+			snr_lut_size = ARRAY_SIZE(qpsk_snr_lut);
+			snr_lut = qpsk_snr_lut;
+			break;
+		case 1:
+			snr_lut_size = ARRAY_SIZE(qam16_snr_lut);
+			snr_lut = qam16_snr_lut;
+			break;
+		case 2:
+			snr_lut_size = ARRAY_SIZE(qam64_snr_lut);
+			snr_lut = qam64_snr_lut;
+			break;
+		default:
+			snr_lut_size = 0;
+			tmp = 0;
+			break;
+		}
+
+		for (i = 0; i < snr_lut_size; i++) {
+			tmp = snr_lut[i].snr * 1000;
+			if (snr_val < snr_lut[i].val)
+				break;
+		}
+
+		c->cnr.len = 1;
+		c->cnr.stat[0].scale = FE_SCALE_DECIBEL;
+		c->cnr.stat[0].svalue = tmp;
+	} else {
+		c->cnr.len = 1;
+		c->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	}
+
+	/* UCB/PER/BER */
+	if (dev->fe_status & FE_HAS_LOCK) {
+		/* outer FEC, 204 byte packets */
+		u16 abort_packet_count, rsd_packet_count;
+		/* inner FEC, bits */
+		u32 rsd_bit_err_count;
+
+		/*
+		 * Packet count used for measurement is 10000
+		 * (rsd_packet_count). Maybe it should be increased?
+		 */
+
+		ret = af9033_rd_regs(dev, 0x800032, buf, 7);
+		if (ret)
+			goto err;
+
+		abort_packet_count = (buf[1] << 8) | (buf[0] << 0);
+		rsd_bit_err_count = (buf[4] << 16) | (buf[3] << 8) | buf[2];
+		rsd_packet_count = (buf[6] << 8) | (buf[5] << 0);
+
+		dev->error_block_count += abort_packet_count;
+		dev->total_block_count += rsd_packet_count;
+		dev->post_bit_error += rsd_bit_err_count;
+		dev->post_bit_count += rsd_packet_count * 204 * 8;
+
+		c->block_count.len = 1;
+		c->block_count.stat[0].scale = FE_SCALE_COUNTER;
+		c->block_count.stat[0].uvalue = dev->total_block_count;
+
+		c->block_error.len = 1;
+		c->block_error.stat[0].scale = FE_SCALE_COUNTER;
+		c->block_error.stat[0].uvalue = dev->error_block_count;
+
+		c->post_bit_count.len = 1;
+		c->post_bit_count.stat[0].scale = FE_SCALE_COUNTER;
+		c->post_bit_count.stat[0].uvalue = dev->post_bit_count;
+
+		c->post_bit_error.len = 1;
+		c->post_bit_error.stat[0].scale = FE_SCALE_COUNTER;
+		c->post_bit_error.stat[0].uvalue = dev->post_bit_error;
+	}
+
 	return 0;
 
 err:
@@ -1059,159 +1198,6 @@ err:
 	return ret;
 }
 
-static void af9033_stat_work(struct work_struct *work)
-{
-	struct af9033_dev *dev = container_of(work, struct af9033_dev, stat_work.work);
-	struct dtv_frontend_properties *c = &dev->fe.dtv_property_cache;
-	int ret, tmp, i, len;
-	u8 u8tmp, buf[7];
-
-	dev_dbg(&dev->client->dev, "\n");
-
-	/* signal strength */
-	if (dev->fe_status & FE_HAS_SIGNAL) {
-		if (dev->is_af9035) {
-			ret = af9033_rd_reg(dev, 0x80004a, &u8tmp);
-			tmp = -u8tmp * 1000;
-		} else {
-			ret = af9033_rd_reg(dev, 0x8000f7, &u8tmp);
-			tmp = (u8tmp - 100) * 1000;
-		}
-		if (ret)
-			goto err;
-
-		c->strength.len = 1;
-		c->strength.stat[0].scale = FE_SCALE_DECIBEL;
-		c->strength.stat[0].svalue = tmp;
-	} else {
-		c->strength.len = 1;
-		c->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-	}
-
-	/* CNR */
-	if (dev->fe_status & FE_HAS_VITERBI) {
-		u32 snr_val;
-		const struct val_snr *snr_lut;
-
-		/* read value */
-		ret = af9033_rd_regs(dev, 0x80002c, buf, 3);
-		if (ret)
-			goto err;
-
-		snr_val = (buf[2] << 16) | (buf[1] << 8) | (buf[0] << 0);
-
-		/* read superframe number */
-		ret = af9033_rd_reg(dev, 0x80f78b, &u8tmp);
-		if (ret)
-			goto err;
-
-		if (u8tmp)
-			snr_val /= u8tmp;
-
-		/* read current transmission mode */
-		ret = af9033_rd_reg(dev, 0x80f900, &u8tmp);
-		if (ret)
-			goto err;
-
-		switch ((u8tmp >> 0) & 3) {
-		case 0:
-			snr_val *= 4;
-			break;
-		case 1:
-			snr_val *= 1;
-			break;
-		case 2:
-			snr_val *= 2;
-			break;
-		default:
-			goto err_schedule_delayed_work;
-		}
-
-		/* read current modulation */
-		ret = af9033_rd_reg(dev, 0x80f903, &u8tmp);
-		if (ret)
-			goto err;
-
-		switch ((u8tmp >> 0) & 3) {
-		case 0:
-			len = ARRAY_SIZE(qpsk_snr_lut);
-			snr_lut = qpsk_snr_lut;
-			break;
-		case 1:
-			len = ARRAY_SIZE(qam16_snr_lut);
-			snr_lut = qam16_snr_lut;
-			break;
-		case 2:
-			len = ARRAY_SIZE(qam64_snr_lut);
-			snr_lut = qam64_snr_lut;
-			break;
-		default:
-			goto err_schedule_delayed_work;
-		}
-
-		for (i = 0; i < len; i++) {
-			tmp = snr_lut[i].snr * 1000;
-			if (snr_val < snr_lut[i].val)
-				break;
-		}
-
-		c->cnr.len = 1;
-		c->cnr.stat[0].scale = FE_SCALE_DECIBEL;
-		c->cnr.stat[0].svalue = tmp;
-	} else {
-		c->cnr.len = 1;
-		c->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-	}
-
-	/* UCB/PER/BER */
-	if (dev->fe_status & FE_HAS_LOCK) {
-		/* outer FEC, 204 byte packets */
-		u16 abort_packet_count, rsd_packet_count;
-		/* inner FEC, bits */
-		u32 rsd_bit_err_count;
-
-		/*
-		 * Packet count used for measurement is 10000
-		 * (rsd_packet_count). Maybe it should be increased?
-		 */
-
-		ret = af9033_rd_regs(dev, 0x800032, buf, 7);
-		if (ret)
-			goto err;
-
-		abort_packet_count = (buf[1] << 8) | (buf[0] << 0);
-		rsd_bit_err_count = (buf[4] << 16) | (buf[3] << 8) | buf[2];
-		rsd_packet_count = (buf[6] << 8) | (buf[5] << 0);
-
-		dev->error_block_count += abort_packet_count;
-		dev->total_block_count += rsd_packet_count;
-		dev->post_bit_error += rsd_bit_err_count;
-		dev->post_bit_count += rsd_packet_count * 204 * 8;
-
-		c->block_count.len = 1;
-		c->block_count.stat[0].scale = FE_SCALE_COUNTER;
-		c->block_count.stat[0].uvalue = dev->total_block_count;
-
-		c->block_error.len = 1;
-		c->block_error.stat[0].scale = FE_SCALE_COUNTER;
-		c->block_error.stat[0].uvalue = dev->error_block_count;
-
-		c->post_bit_count.len = 1;
-		c->post_bit_count.stat[0].scale = FE_SCALE_COUNTER;
-		c->post_bit_count.stat[0].uvalue = dev->post_bit_count;
-
-		c->post_bit_error.len = 1;
-		c->post_bit_error.stat[0].scale = FE_SCALE_COUNTER;
-		c->post_bit_error.stat[0].uvalue = dev->post_bit_error;
-	}
-
-err_schedule_delayed_work:
-	schedule_delayed_work(&dev->stat_work, msecs_to_jiffies(2000));
-	return;
-err:
-	dev_dbg(&dev->client->dev, "failed=%d\n", ret);
-}
-
 static struct dvb_frontend_ops af9033_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
@@ -1272,7 +1258,6 @@ static int af9033_probe(struct i2c_client *client,
 
 	/* setup the state */
 	dev->client = client;
-	INIT_DELAYED_WORK(&dev->stat_work, af9033_stat_work);
 	memcpy(&dev->cfg, cfg, sizeof(struct af9033_config));
 
 	if (dev->cfg.clock != 12000000) {
@@ -1372,9 +1357,6 @@ static int af9033_remove(struct i2c_client *client)
 
 	dev_dbg(&dev->client->dev, "\n");
 
-	/* stop statistics polling */
-	cancel_delayed_work_sync(&dev->stat_work);
-
 	dev->fe.ops.release = NULL;
 	dev->fe.demodulator_priv = NULL;
 	kfree(dev);
@@ -1391,6 +1373,7 @@ MODULE_DEVICE_TABLE(i2c, af9033_id_table);
 static struct i2c_driver af9033_driver = {
 	.driver = {
 		.name	= "af9033",
+		.suppress_bind_attrs	= true,
 	},
 	.probe		= af9033_probe,
 	.remove		= af9033_remove,
diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c
index f770f6a2c987..8cc8c4597b6a 100644
--- a/drivers/media/dvb-frontends/ascot2e.c
+++ b/drivers/media/dvb-frontends/ascot2e.c
@@ -132,7 +132,7 @@ static int ascot2e_write_regs(struct ascot2e_priv *priv,
 		}
 	};
 
-	if (len + 1 >= sizeof(buf)) {
+	if (len + 1 > sizeof(buf)) {
 		dev_warn(&priv->i2c->dev,"wr reg=%04x: len=%d is too big!\n",
 			 reg, len + 1);
 		return -E2BIG;
diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c
index 900186ba8e62..09c39346167f 100644
--- a/drivers/media/dvb-frontends/cxd2841er.c
+++ b/drivers/media/dvb-frontends/cxd2841er.c
@@ -1,7 +1,9 @@
 /*
  * cxd2841er.c
  *
- * Sony CXD2441ER digital demodulator driver
+ * Sony digital demodulator driver for
+ *	CXD2841ER - DVB-S/S2/T/T2/C/C2
+ *	CXD2854ER - DVB-S/S2/T/T2/C/C2, ISDB-T/S
  *
  * Copyright 2012 Sony Corporation
  * Copyright (C) 2014 NetUP Inc.
@@ -34,6 +36,16 @@
 #include "cxd2841er_priv.h"
 
 #define MAX_WRITE_REGSIZE	16
+#define LOG2_E_100X 144
+
+/* DVB-C constellation */
+enum sony_dvbc_constellation_t {
+	SONY_DVBC_CONSTELLATION_16QAM,
+	SONY_DVBC_CONSTELLATION_32QAM,
+	SONY_DVBC_CONSTELLATION_64QAM,
+	SONY_DVBC_CONSTELLATION_128QAM,
+	SONY_DVBC_CONSTELLATION_256QAM
+};
 
 enum cxd2841er_state {
 	STATE_SHUTDOWN = 0,
@@ -51,6 +63,8 @@ struct cxd2841er_priv {
 	const struct cxd2841er_config	*config;
 	enum cxd2841er_state		state;
 	u8				system;
+	enum cxd2841er_xtal		xtal;
+	enum fe_caps caps;
 };
 
 static const struct cxd2841er_cnr_data s_cn_data[] = {
@@ -188,6 +202,9 @@ static const struct cxd2841er_cnr_data s2_cn_data[] = {
 };
 
 #define MAKE_IFFREQ_CONFIG(iffreq) ((u32)(((iffreq)/41.0)*16777216.0 + 0.5))
+#define MAKE_IFFREQ_CONFIG_XTAL(xtal, iffreq) ((xtal == SONY_XTAL_24000) ? \
+		(u32)(((iffreq)/48.0)*16777216.0 + 0.5) : \
+		(u32)(((iffreq)/41.0)*16777216.0 + 0.5))
 
 static void cxd2841er_i2c_debug(struct cxd2841er_priv *priv,
 				u8 addr, u8 reg, u8 write,
@@ -217,7 +234,7 @@ static int cxd2841er_write_regs(struct cxd2841er_priv *priv,
 	};
 
 	if (len + 1 >= sizeof(buf)) {
-		dev_warn(&priv->i2c->dev,"wr reg=%04x: len=%d is too big!\n",
+		dev_warn(&priv->i2c->dev, "wr reg=%04x: len=%d is too big!\n",
 			 reg, len + 1);
 		return -E2BIG;
 	}
@@ -282,6 +299,7 @@ static int cxd2841er_read_regs(struct cxd2841er_priv *priv,
 			KBUILD_MODNAME, ret, i2c_addr, reg);
 		return ret;
 	}
+	cxd2841er_i2c_debug(priv, i2c_addr, reg, 0, val, len);
 	return 0;
 }
 
@@ -427,6 +445,15 @@ static int cxd2841er_sleep_tc_to_active_t2_band(struct cxd2841er_priv *priv,
 static int cxd2841er_sleep_tc_to_active_c_band(struct cxd2841er_priv *priv,
 					       u32 bandwidth);
 
+static int cxd2841er_sleep_tc_to_active_i(struct cxd2841er_priv *priv,
+		u32 bandwidth);
+
+static int cxd2841er_active_i_to_sleep_tc(struct cxd2841er_priv *priv);
+
+static int cxd2841er_sleep_tc_to_shutdown(struct cxd2841er_priv *priv);
+
+static int cxd2841er_shutdown_to_sleep_tc(struct cxd2841er_priv *priv);
+
 static int cxd2841er_retune_active(struct cxd2841er_priv *priv,
 				   struct dtv_frontend_properties *p)
 {
@@ -454,7 +481,13 @@ static int cxd2841er_retune_active(struct cxd2841er_priv *priv,
 					priv, p->bandwidth_hz);
 		case SYS_DVBC_ANNEX_A:
 			return cxd2841er_sleep_tc_to_active_c_band(
-					priv, 8000000);
+					priv, p->bandwidth_hz);
+		case SYS_ISDBT:
+			cxd2841er_active_i_to_sleep_tc(priv);
+			cxd2841er_sleep_tc_to_shutdown(priv);
+			cxd2841er_shutdown_to_sleep_tc(priv);
+			return cxd2841er_sleep_tc_to_active_i(
+					priv, p->bandwidth_hz);
 		}
 	}
 	dev_dbg(&priv->i2c->dev, "%s(): invalid delivery system %d\n",
@@ -669,6 +702,45 @@ static int cxd2841er_active_c_to_sleep_tc(struct cxd2841er_priv *priv)
 	return 0;
 }
 
+static int cxd2841er_active_i_to_sleep_tc(struct cxd2841er_priv *priv)
+{
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	if (priv->state != STATE_ACTIVE_TC) {
+		dev_err(&priv->i2c->dev, "%s(): invalid state %d\n",
+				__func__, priv->state);
+		return -EINVAL;
+	}
+	/* Set SLV-T Bank : 0x00 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
+	/* disable TS output */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0xc3, 0x01);
+	/* enable Hi-Z setting 1 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x80, 0x3f);
+	/* enable Hi-Z setting 2 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x81, 0xff);
+
+	/* TODO: Cancel demod parameter */
+
+	/* Set SLV-X Bank : 0x00 */
+	cxd2841er_write_reg(priv, I2C_SLVX, 0x00, 0x00);
+	/* disable ADC 1 */
+	cxd2841er_write_reg(priv, I2C_SLVX, 0x18, 0x01);
+	/* Set SLV-T Bank : 0x00 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
+	/* Disable ADC 2 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x43, 0x0a);
+	/* Disable ADC 3 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x41, 0x0a);
+	/* Disable ADC clock */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x30, 0x00);
+	/* Disable RF level monitor */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x2f, 0x00);
+	/* Disable demod clock */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x2c, 0x00);
+	priv->state = STATE_SLEEP_TC;
+	return 0;
+}
+
 static int cxd2841er_shutdown_to_sleep_s(struct cxd2841er_priv *priv)
 {
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
@@ -686,8 +758,25 @@ static int cxd2841er_shutdown_to_sleep_s(struct cxd2841er_priv *priv)
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x00, 0x00);
 	/* Set demod SW reset */
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x10, 0x01);
-	/* Set X'tal clock to 20.5Mhz */
-	cxd2841er_write_reg(priv, I2C_SLVX, 0x14, 0x00);
+
+	switch (priv->xtal) {
+	case SONY_XTAL_20500:
+		cxd2841er_write_reg(priv, I2C_SLVX, 0x14, 0x00);
+		break;
+	case SONY_XTAL_24000:
+		/* Select demod frequency */
+		cxd2841er_write_reg(priv, I2C_SLVX, 0x12, 0x00);
+		cxd2841er_write_reg(priv, I2C_SLVX, 0x14, 0x03);
+		break;
+	case SONY_XTAL_41000:
+		cxd2841er_write_reg(priv, I2C_SLVX, 0x14, 0x01);
+		break;
+	default:
+		dev_dbg(&priv->i2c->dev, "%s(): invalid demod xtal %d\n",
+				__func__, priv->xtal);
+		return -EINVAL;
+	}
+
 	/* Set demod mode */
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x17, 0x0a);
 	/* Clear demod SW reset */
@@ -712,6 +801,8 @@ static int cxd2841er_shutdown_to_sleep_s(struct cxd2841er_priv *priv)
 
 static int cxd2841er_shutdown_to_sleep_tc(struct cxd2841er_priv *priv)
 {
+	u8 data = 0;
+
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
 	if (priv->state != STATE_SHUTDOWN) {
 		dev_dbg(&priv->i2c->dev, "%s(): invalid demod state %d\n",
@@ -727,9 +818,24 @@ static int cxd2841er_shutdown_to_sleep_tc(struct cxd2841er_priv *priv)
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x00, 0x00);
 	/* Set demod SW reset */
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x10, 0x01);
-	/* Set X'tal clock to 20.5Mhz */
+  /* Select ADC clock mode */
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x13, 0x00);
-	cxd2841er_write_reg(priv, I2C_SLVX, 0x14, 0x00);
+
+	switch (priv->xtal) {
+	case SONY_XTAL_20500:
+		data = 0x0;
+		break;
+	case SONY_XTAL_24000:
+		/* Select demod frequency */
+		cxd2841er_write_reg(priv, I2C_SLVX, 0x12, 0x00);
+		data = 0x3;
+		break;
+	case SONY_XTAL_41000:
+		cxd2841er_write_reg(priv, I2C_SLVX, 0x12, 0x00);
+		data = 0x1;
+		break;
+	}
+	cxd2841er_write_reg(priv, I2C_SLVX, 0x14, data);
 	/* Clear demod SW reset */
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x10, 0x00);
 	usleep_range(1000, 2000);
@@ -809,11 +915,14 @@ static void cxd2841er_set_ts_clock_mode(struct cxd2841er_priv *priv,
 
 static u8 cxd2841er_chip_id(struct cxd2841er_priv *priv)
 {
-	u8 chip_id;
+	u8 chip_id = 0;
 
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
-	cxd2841er_write_reg(priv, I2C_SLVT, 0, 0);
-	cxd2841er_read_reg(priv, I2C_SLVT, 0xfd, &chip_id);
+	if (cxd2841er_write_reg(priv, I2C_SLVT, 0, 0) == 0)
+		cxd2841er_read_reg(priv, I2C_SLVT, 0xfd, &chip_id);
+	else if (cxd2841er_write_reg(priv, I2C_SLVX, 0, 0) == 0)
+		cxd2841er_read_reg(priv, I2C_SLVX, 0xfd, &chip_id);
+
 	return chip_id;
 }
 
@@ -896,6 +1005,25 @@ static int cxd2841er_read_status_c(struct cxd2841er_priv *priv, u8 *tslock)
 	return 0;
 }
 
+static int cxd2841er_read_status_i(struct cxd2841er_priv *priv,
+		u8 *sync, u8 *tslock, u8 *unlock)
+{
+	u8 data = 0;
+
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	if (priv->state != STATE_ACTIVE_TC)
+		return -EINVAL;
+	/* Set SLV-T Bank : 0x60 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x60);
+	cxd2841er_read_reg(priv, I2C_SLVT, 0x10, &data);
+	dev_dbg(&priv->i2c->dev,
+			"%s(): lock=0x%x\n", __func__, data);
+	*sync = ((data & 0x02) ? 1 : 0);
+	*tslock = ((data & 0x01) ? 1 : 0);
+	*unlock = ((data & 0x10) ? 1 : 0);
+	return 0;
+}
+
 static int cxd2841er_read_status_tc(struct dvb_frontend *fe,
 				    enum fe_status *status)
 {
@@ -921,6 +1049,20 @@ static int cxd2841er_read_status_tc(struct dvb_frontend *fe,
 					FE_HAS_SYNC;
 			if (tslock)
 				*status |= FE_HAS_LOCK;
+		} else if (priv->system == SYS_ISDBT) {
+			ret = cxd2841er_read_status_i(
+					priv, &sync, &tslock, &unlock);
+			if (ret)
+				goto done;
+			if (unlock)
+				goto done;
+			if (sync)
+				*status = FE_HAS_SIGNAL |
+					FE_HAS_CARRIER |
+					FE_HAS_VITERBI |
+					FE_HAS_SYNC;
+			if (tslock)
+				*status |= FE_HAS_LOCK;
 		} else if (priv->system == SYS_DVBC_ANNEX_A) {
 			ret = cxd2841er_read_status_c(priv, &tslock);
 			if (ret)
@@ -997,6 +1139,76 @@ static int cxd2841er_get_carrier_offset_s_s2(struct cxd2841er_priv *priv,
 	return 0;
 }
 
+static int cxd2841er_get_carrier_offset_i(struct cxd2841er_priv *priv,
+					   u32 bandwidth, int *offset)
+{
+	u8 data[4];
+
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	if (priv->state != STATE_ACTIVE_TC) {
+		dev_dbg(&priv->i2c->dev, "%s(): invalid state %d\n",
+			__func__, priv->state);
+		return -EINVAL;
+	}
+	if (priv->system != SYS_ISDBT) {
+		dev_dbg(&priv->i2c->dev, "%s(): invalid delivery system %d\n",
+			__func__, priv->system);
+		return -EINVAL;
+	}
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x60);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0x4c, data, sizeof(data));
+	*offset = -1 * sign_extend32(
+		((u32)(data[0] & 0x1F) << 24) | ((u32)data[1] << 16) |
+		((u32)data[2] << 8) | (u32)data[3], 29);
+
+	switch (bandwidth) {
+	case 6000000:
+		*offset = -1 * ((*offset) * 8/264);
+		break;
+	case 7000000:
+		*offset = -1 * ((*offset) * 8/231);
+		break;
+	case 8000000:
+		*offset = -1 * ((*offset) * 8/198);
+		break;
+	default:
+		dev_dbg(&priv->i2c->dev, "%s(): invalid bandwidth %d\n",
+				__func__, bandwidth);
+		return -EINVAL;
+	}
+
+	dev_dbg(&priv->i2c->dev, "%s(): bandwidth %d offset %d\n",
+			__func__, bandwidth, *offset);
+
+	return 0;
+}
+
+static int cxd2841er_get_carrier_offset_t(struct cxd2841er_priv *priv,
+					   u32 bandwidth, int *offset)
+{
+	u8 data[4];
+
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	if (priv->state != STATE_ACTIVE_TC) {
+		dev_dbg(&priv->i2c->dev, "%s(): invalid state %d\n",
+			__func__, priv->state);
+		return -EINVAL;
+	}
+	if (priv->system != SYS_DVBT) {
+		dev_dbg(&priv->i2c->dev, "%s(): invalid delivery system %d\n",
+			__func__, priv->system);
+		return -EINVAL;
+	}
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0x4c, data, sizeof(data));
+	*offset = -1 * sign_extend32(
+		((u32)(data[0] & 0x1F) << 24) | ((u32)data[1] << 16) |
+		((u32)data[2] << 8) | (u32)data[3], 29);
+	*offset *= (bandwidth / 1000000);
+	*offset /= 235;
+	return 0;
+}
+
 static int cxd2841er_get_carrier_offset_t2(struct cxd2841er_priv *priv,
 					   u32 bandwidth, int *offset)
 {
@@ -1060,6 +1272,24 @@ static int cxd2841er_get_carrier_offset_c(struct cxd2841er_priv *priv,
 	return 0;
 }
 
+static int cxd2841er_read_packet_errors_c(
+		struct cxd2841er_priv *priv, u32 *penum)
+{
+	u8 data[3];
+
+	*penum = 0;
+	if (priv->state != STATE_ACTIVE_TC) {
+		dev_dbg(&priv->i2c->dev, "%s(): invalid state %d\n",
+				__func__, priv->state);
+		return -EINVAL;
+	}
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x40);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0xea, data, sizeof(data));
+	if (data[2] & 0x01)
+		*penum = ((u32)data[0] << 8) | (u32)data[1];
+	return 0;
+}
+
 static int cxd2841er_read_packet_errors_t(
 		struct cxd2841er_priv *priv, u32 *penum)
 {
@@ -1096,11 +1326,85 @@ static int cxd2841er_read_packet_errors_t2(
 	return 0;
 }
 
-static u32 cxd2841er_mon_read_ber_s(struct cxd2841er_priv *priv)
+static int cxd2841er_read_packet_errors_i(
+		struct cxd2841er_priv *priv, u32 *penum)
+{
+	u8 data[2];
+
+	*penum = 0;
+	if (priv->state != STATE_ACTIVE_TC) {
+		dev_dbg(&priv->i2c->dev, "%s(): invalid state %d\n",
+				__func__, priv->state);
+		return -EINVAL;
+	}
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x60);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0xA1, data, 1);
+
+	if (!(data[0] & 0x01))
+		return 0;
+
+	/* Layer A */
+	cxd2841er_read_regs(priv, I2C_SLVT, 0xA2, data, sizeof(data));
+	*penum = ((u32)data[0] << 8) | (u32)data[1];
+
+	/* Layer B */
+	cxd2841er_read_regs(priv, I2C_SLVT, 0xA4, data, sizeof(data));
+	*penum += ((u32)data[0] << 8) | (u32)data[1];
+
+	/* Layer C */
+	cxd2841er_read_regs(priv, I2C_SLVT, 0xA6, data, sizeof(data));
+	*penum += ((u32)data[0] << 8) | (u32)data[1];
+
+	return 0;
+}
+
+static int cxd2841er_read_ber_c(struct cxd2841er_priv *priv,
+		u32 *bit_error, u32 *bit_count)
+{
+	u8 data[3];
+	u32 bit_err, period_exp;
+
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	if (priv->state != STATE_ACTIVE_TC) {
+		dev_dbg(&priv->i2c->dev, "%s(): invalid state %d\n",
+				__func__, priv->state);
+		return -EINVAL;
+	}
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x40);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0x62, data, sizeof(data));
+	if (!(data[0] & 0x80)) {
+		dev_dbg(&priv->i2c->dev,
+				"%s(): no valid BER data\n", __func__);
+		return -EINVAL;
+	}
+	bit_err = ((u32)(data[0] & 0x3f) << 16) |
+		((u32)data[1] << 8) |
+		(u32)data[2];
+	cxd2841er_read_reg(priv, I2C_SLVT, 0x60, data);
+	period_exp = data[0] & 0x1f;
+
+	if ((period_exp <= 11) && (bit_err > (1 << period_exp) * 204 * 8)) {
+		dev_dbg(&priv->i2c->dev,
+				"%s(): period_exp(%u) or bit_err(%u)  not in range. no valid BER data\n",
+				__func__, period_exp, bit_err);
+		return -EINVAL;
+	}
+
+	dev_dbg(&priv->i2c->dev,
+			"%s(): period_exp(%u) or bit_err(%u) count=%d\n",
+			__func__, period_exp, bit_err,
+			((1 << period_exp) * 204 * 8));
+
+	*bit_error = bit_err;
+	*bit_count = ((1 << period_exp) * 204 * 8);
+
+	return 0;
+}
+
+static int cxd2841er_mon_read_ber_s(struct cxd2841er_priv *priv,
+				    u32 *bit_error, u32 *bit_count)
 {
 	u8 data[11];
-	u32 bit_error, bit_count;
-	u32 temp_q, temp_r;
 
 	/* Set SLV-T Bank : 0xA0 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0xa0);
@@ -1116,40 +1420,30 @@ static u32 cxd2841er_mon_read_ber_s(struct cxd2841er_priv *priv)
 	 */
 	cxd2841er_read_regs(priv, I2C_SLVT, 0x35, data, 11);
 	if (data[0] & 0x01) {
-		bit_error = ((u32)(data[1]  & 0x3F) << 16) |
-			((u32)(data[2]  & 0xFF) <<  8) |
-			(u32)(data[3]  & 0xFF);
-		bit_count = ((u32)(data[8]  & 0x3F) << 16) |
-			((u32)(data[9]  & 0xFF) <<  8) |
-			(u32)(data[10] & 0xFF);
-		/*
-		 *	BER = bitError / bitCount
-		 *	= (bitError * 10^7) / bitCount
-		 *	= ((bitError * 625 * 125 * 128) / bitCount
-		 */
-		if ((bit_count == 0) || (bit_error > bit_count)) {
+		*bit_error = ((u32)(data[1]  & 0x3F) << 16) |
+			     ((u32)(data[2]  & 0xFF) <<  8) |
+			     (u32)(data[3]  & 0xFF);
+		*bit_count = ((u32)(data[8]  & 0x3F) << 16) |
+			     ((u32)(data[9]  & 0xFF) <<  8) |
+			     (u32)(data[10] & 0xFF);
+		if ((*bit_count == 0) || (*bit_error > *bit_count)) {
 			dev_dbg(&priv->i2c->dev,
 				"%s(): invalid bit_error %d, bit_count %d\n",
-				__func__, bit_error, bit_count);
-			return 0;
+				__func__, *bit_error, *bit_count);
+			return -EINVAL;
 		}
-		temp_q = div_u64_rem(10000000ULL * bit_error,
-						bit_count, &temp_r);
-		if (bit_count != 1 && temp_r >= bit_count / 2)
-			temp_q++;
-		return temp_q;
+		return 0;
 	}
 	dev_dbg(&priv->i2c->dev, "%s(): no data available\n", __func__);
-	return 0;
+	return -EINVAL;
 }
 
 
-static u32 cxd2841er_mon_read_ber_s2(struct cxd2841er_priv *priv)
+static int cxd2841er_mon_read_ber_s2(struct cxd2841er_priv *priv,
+				     u32 *bit_error, u32 *bit_count)
 {
 	u8 data[5];
-	u32 bit_error, period;
-	u32 temp_q, temp_r;
-	u32 result = 0;
+	u32 period;
 
 	/* Set SLV-T Bank : 0xB2 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0xb2);
@@ -1164,10 +1458,10 @@ static u32 cxd2841er_mon_read_ber_s2(struct cxd2841er_priv *priv)
 	cxd2841er_read_regs(priv, I2C_SLVT, 0x30, data, 5);
 	if (data[0] & 0x01) {
 		/* Bit error count */
-		bit_error = ((u32)(data[1] & 0x0F) << 24) |
-			((u32)(data[2] & 0xFF) << 16) |
-			((u32)(data[3] & 0xFF) <<  8) |
-			(u32)(data[4] & 0xFF);
+		*bit_error = ((u32)(data[1] & 0x0F) << 24) |
+			     ((u32)(data[2] & 0xFF) << 16) |
+			     ((u32)(data[3] & 0xFF) <<  8) |
+			     (u32)(data[4] & 0xFF);
 
 		/* Set SLV-T Bank : 0xA0 */
 		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0xa0);
@@ -1177,40 +1471,30 @@ static u32 cxd2841er_mon_read_ber_s2(struct cxd2841er_priv *priv)
 		if (period == 0) {
 			dev_dbg(&priv->i2c->dev,
 				"%s(): period is 0\n", __func__);
-			return 0;
+			return -EINVAL;
 		}
-		if (bit_error > (period * 64800)) {
+		if (*bit_error > (period * 64800)) {
 			dev_dbg(&priv->i2c->dev,
 				"%s(): invalid bit_err 0x%x period 0x%x\n",
-				__func__, bit_error, period);
-			return 0;
+				__func__, *bit_error, period);
+			return -EINVAL;
 		}
-		/*
-		 * BER = bitError / (period * 64800)
-		 *	= (bitError * 10^7) / (period * 64800)
-		 *	= (bitError * 10^5) / (period * 648)
-		 *	= (bitError * 12500) / (period * 81)
-		 *	= (bitError * 10) * 1250 / (period * 81)
-		 */
-		temp_q = div_u64_rem(12500ULL * bit_error,
-					period * 81, &temp_r);
-		if (temp_r >= period * 40)
-			temp_q++;
-		result = temp_q;
+		*bit_count = period * 64800;
+
+		return 0;
 	} else {
 		dev_dbg(&priv->i2c->dev,
 			"%s(): no data available\n", __func__);
 	}
-	return result;
+	return -EINVAL;
 }
 
-static int cxd2841er_read_ber_t2(struct cxd2841er_priv *priv, u32 *ber)
+static int cxd2841er_read_ber_t2(struct cxd2841er_priv *priv,
+				 u32 *bit_error, u32 *bit_count)
 {
 	u8 data[4];
-	u32 div, q, r;
-	u32 bit_err, period_exp, n_ldpc;
+	u32 period_exp, n_ldpc;
 
-	*ber = 0;
 	if (priv->state != STATE_ACTIVE_TC) {
 		dev_dbg(&priv->i2c->dev,
 			"%s(): invalid state %d\n", __func__, priv->state);
@@ -1221,40 +1505,44 @@ static int cxd2841er_read_ber_t2(struct cxd2841er_priv *priv, u32 *ber)
 	if (!(data[0] & 0x10)) {
 		dev_dbg(&priv->i2c->dev,
 			"%s(): no valid BER data\n", __func__);
-		return 0;
+		return -EINVAL;
 	}
-	bit_err = ((u32)(data[0] & 0x0f) << 24) |
-		((u32)data[1] << 16) |
-		((u32)data[2] << 8) |
-		(u32)data[3];
+	*bit_error = ((u32)(data[0] & 0x0f) << 24) |
+		     ((u32)data[1] << 16) |
+		     ((u32)data[2] << 8) |
+		     (u32)data[3];
 	cxd2841er_read_reg(priv, I2C_SLVT, 0x6f, data);
 	period_exp = data[0] & 0x0f;
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x22);
 	cxd2841er_read_reg(priv, I2C_SLVT, 0x5e, data);
 	n_ldpc = ((data[0] & 0x03) == 0 ? 16200 : 64800);
-	if (bit_err > ((1U << period_exp) * n_ldpc)) {
+	if (*bit_error > ((1U << period_exp) * n_ldpc)) {
 		dev_dbg(&priv->i2c->dev,
 			"%s(): invalid BER value\n", __func__);
 		return -EINVAL;
 	}
+
+	/*
+	 * FIXME: the right thing would be to return bit_error untouched,
+	 * but, as we don't know the scale returned by the counters, let's
+	 * at least preserver BER = bit_error/bit_count.
+	 */
 	if (period_exp >= 4) {
-		div = (1U << (period_exp - 4)) * (n_ldpc / 200);
-		q = div_u64_rem(3125ULL * bit_err, div, &r);
+		*bit_count = (1U << (period_exp - 4)) * (n_ldpc / 200);
+		*bit_error *= 3125ULL;
 	} else {
-		div = (1U << period_exp) * (n_ldpc / 200);
-		q = div_u64_rem(50000ULL * bit_err, div, &r);
+		*bit_count = (1U << period_exp) * (n_ldpc / 200);
+		*bit_error *= 50000ULL;
 	}
-	*ber = (r >= div / 2) ? q + 1 : q;
 	return 0;
 }
 
-static int cxd2841er_read_ber_t(struct cxd2841er_priv *priv, u32 *ber)
+static int cxd2841er_read_ber_t(struct cxd2841er_priv *priv,
+				u32 *bit_error, u32 *bit_count)
 {
 	u8 data[2];
-	u32 div, q, r;
-	u32 bit_err, period;
+	u32 period;
 
-	*ber = 0;
 	if (priv->state != STATE_ACTIVE_TC) {
 		dev_dbg(&priv->i2c->dev,
 			"%s(): invalid state %d\n", __func__, priv->state);
@@ -1268,16 +1556,22 @@ static int cxd2841er_read_ber_t(struct cxd2841er_priv *priv, u32 *ber)
 		return 0;
 	}
 	cxd2841er_read_regs(priv, I2C_SLVT, 0x22, data, sizeof(data));
-	bit_err = ((u32)data[0] << 8) | (u32)data[1];
+	*bit_error = ((u32)data[0] << 8) | (u32)data[1];
 	cxd2841er_read_reg(priv, I2C_SLVT, 0x6f, data);
 	period = ((data[0] & 0x07) == 0) ? 256 : (4096 << (data[0] & 0x07));
-	div = period / 128;
-	q = div_u64_rem(78125ULL * bit_err, div, &r);
-	*ber = (r >= div / 2) ? q + 1 : q;
+
+	/*
+	 * FIXME: the right thing would be to return bit_error untouched,
+	 * but, as we don't know the scale returned by the counters, let's
+	 * at least preserver BER = bit_error/bit_count.
+	 */
+	*bit_count = period / 128;
+	*bit_error *= 78125ULL;
 	return 0;
 }
 
-static u32 cxd2841er_dvbs_read_snr(struct cxd2841er_priv *priv, u8 delsys)
+static u32 cxd2841er_dvbs_read_snr(struct cxd2841er_priv *priv,
+		u8 delsys, u32 *snr)
 {
 	u8 data[3];
 	u32 res = 0, value;
@@ -1335,9 +1629,71 @@ static u32 cxd2841er_dvbs_read_snr(struct cxd2841er_priv *priv, u8 delsys)
 	} else {
 		dev_dbg(&priv->i2c->dev,
 			"%s(): no data available\n", __func__);
+		return -EINVAL;
 	}
 done:
-	return res;
+	*snr = res;
+	return 0;
+}
+
+static uint32_t sony_log(uint32_t x)
+{
+	return (((10000>>8)*(intlog2(x)>>16) + LOG2_E_100X/2)/LOG2_E_100X);
+}
+
+static int cxd2841er_read_snr_c(struct cxd2841er_priv *priv, u32 *snr)
+{
+	u32 reg;
+	u8 data[2];
+	enum sony_dvbc_constellation_t qam = SONY_DVBC_CONSTELLATION_16QAM;
+
+	*snr = 0;
+	if (priv->state != STATE_ACTIVE_TC) {
+		dev_dbg(&priv->i2c->dev,
+				"%s(): invalid state %d\n",
+				__func__, priv->state);
+		return -EINVAL;
+	}
+
+	/*
+	 * Freeze registers: ensure multiple separate register reads
+	 * are from the same snapshot
+	 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x01, 0x01);
+
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x40);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0x19, data, 1);
+	qam = (enum sony_dvbc_constellation_t) (data[0] & 0x07);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0x4C, data, 2);
+
+	reg = ((u32)(data[0]&0x1f) << 8) | (u32)data[1];
+	if (reg == 0) {
+		dev_dbg(&priv->i2c->dev,
+				"%s(): reg value out of range\n", __func__);
+		return 0;
+	}
+
+	switch (qam) {
+	case SONY_DVBC_CONSTELLATION_16QAM:
+	case SONY_DVBC_CONSTELLATION_64QAM:
+	case SONY_DVBC_CONSTELLATION_256QAM:
+		/* SNR(dB) = -9.50 * ln(IREG_SNR_ESTIMATE / (24320)) */
+		if (reg < 126)
+			reg = 126;
+		*snr = -95 * (int32_t)sony_log(reg) + 95941;
+		break;
+	case SONY_DVBC_CONSTELLATION_32QAM:
+	case SONY_DVBC_CONSTELLATION_128QAM:
+		/* SNR(dB) = -8.75 * ln(IREG_SNR_ESTIMATE / (20800)) */
+		if (reg < 69)
+			reg = 69;
+		*snr = -88 * (int32_t)sony_log(reg) + 86999;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static int cxd2841er_read_snr_t(struct cxd2841er_priv *priv, u32 *snr)
@@ -1391,14 +1747,80 @@ static int cxd2841er_read_snr_t2(struct cxd2841er_priv *priv, u32 *snr)
 	return 0;
 }
 
-static u16 cxd2841er_read_agc_gain_t_t2(struct cxd2841er_priv *priv,
-					u8 delsys)
+static int cxd2841er_read_snr_i(struct cxd2841er_priv *priv, u32 *snr)
+{
+	u32 reg;
+	u8 data[2];
+
+	*snr = 0;
+	if (priv->state != STATE_ACTIVE_TC) {
+		dev_dbg(&priv->i2c->dev,
+				"%s(): invalid state %d\n", __func__,
+				priv->state);
+		return -EINVAL;
+	}
+
+	/* Freeze all registers */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x01, 0x01);
+
+
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x60);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0x28, data, sizeof(data));
+	reg = ((u32)data[0] << 8) | (u32)data[1];
+	if (reg == 0) {
+		dev_dbg(&priv->i2c->dev,
+				"%s(): reg value out of range\n", __func__);
+		return 0;
+	}
+	if (reg > 4996)
+		reg = 4996;
+	*snr = 100 * intlog10(reg) - 9031;
+	return 0;
+}
+
+static u16 cxd2841er_read_agc_gain_c(struct cxd2841er_priv *priv,
+					u8 delsys)
+{
+	u8 data[2];
+
+	cxd2841er_write_reg(
+		priv, I2C_SLVT, 0x00, 0x40);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0x49, data, 2);
+	dev_dbg(&priv->i2c->dev,
+			"%s(): AGC value=%u\n",
+			__func__, (((u16)data[0] & 0x0F) << 8) |
+			(u16)(data[1] & 0xFF));
+	return ((((u16)data[0] & 0x0F) << 8) | (u16)(data[1] & 0xFF)) << 4;
+}
+
+static u16 cxd2841er_read_agc_gain_t_t2(struct cxd2841er_priv *priv,
+					u8 delsys)
 {
 	u8 data[2];
 
 	cxd2841er_write_reg(
 		priv, I2C_SLVT, 0x00, (delsys == SYS_DVBT ? 0x10 : 0x20));
 	cxd2841er_read_regs(priv, I2C_SLVT, 0x26, data, 2);
+	dev_dbg(&priv->i2c->dev,
+			"%s(): AGC value=%u\n",
+			__func__, (((u16)data[0] & 0x0F) << 8) |
+			(u16)(data[1] & 0xFF));
+	return ((((u16)data[0] & 0x0F) << 8) | (u16)(data[1] & 0xFF)) << 4;
+}
+
+static u16 cxd2841er_read_agc_gain_i(struct cxd2841er_priv *priv,
+		u8 delsys)
+{
+	u8 data[2];
+
+	cxd2841er_write_reg(
+			priv, I2C_SLVT, 0x00, 0x60);
+	cxd2841er_read_regs(priv, I2C_SLVT, 0x26, data, 2);
+
+	dev_dbg(&priv->i2c->dev,
+			"%s(): AGC value=%u\n",
+			__func__, (((u16)data[0] & 0x0F) << 8) |
+			(u16)(data[1] & 0xFF));
 	return ((((u16)data[0] & 0x0F) << 8) | (u16)(data[1] & 0xFF)) << 4;
 }
 
@@ -1417,101 +1839,170 @@ static u16 cxd2841er_read_agc_gain_s(struct cxd2841er_priv *priv)
 	return ((((u16)data[0] & 0x1F) << 8) | (u16)(data[1] & 0xFF)) << 3;
 }
 
-static int cxd2841er_read_ber(struct dvb_frontend *fe, u32 *ber)
+static void cxd2841er_read_ber(struct dvb_frontend *fe)
 {
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
+	u32 ret, bit_error = 0, bit_count = 0;
 
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
-	*ber = 0;
 	switch (p->delivery_system) {
+	case SYS_DVBC_ANNEX_A:
+	case SYS_DVBC_ANNEX_B:
+	case SYS_DVBC_ANNEX_C:
+		ret = cxd2841er_read_ber_c(priv, &bit_error, &bit_count);
+		break;
 	case SYS_DVBS:
-		*ber = cxd2841er_mon_read_ber_s(priv);
+		ret = cxd2841er_mon_read_ber_s(priv, &bit_error, &bit_count);
 		break;
 	case SYS_DVBS2:
-		*ber = cxd2841er_mon_read_ber_s2(priv);
+		ret = cxd2841er_mon_read_ber_s2(priv, &bit_error, &bit_count);
 		break;
 	case SYS_DVBT:
-		return cxd2841er_read_ber_t(priv, ber);
+		ret = cxd2841er_read_ber_t(priv, &bit_error, &bit_count);
+		break;
 	case SYS_DVBT2:
-		return cxd2841er_read_ber_t2(priv, ber);
-	default:
-		*ber = 0;
+		ret = cxd2841er_read_ber_t2(priv, &bit_error, &bit_count);
 		break;
+	default:
+		p->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+		p->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+		return;
+	}
+
+	if (!ret) {
+		p->post_bit_error.stat[0].scale = FE_SCALE_COUNTER;
+		p->post_bit_error.stat[0].uvalue += bit_error;
+		p->post_bit_count.stat[0].scale = FE_SCALE_COUNTER;
+		p->post_bit_count.stat[0].uvalue += bit_count;
+	} else {
+		p->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+		p->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
 	}
-	return 0;
 }
 
-static int cxd2841er_read_signal_strength(struct dvb_frontend *fe,
-					  u16 *strength)
+static void cxd2841er_read_signal_strength(struct dvb_frontend *fe)
 {
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
+	s32 strength;
 
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
 	switch (p->delivery_system) {
 	case SYS_DVBT:
 	case SYS_DVBT2:
-		*strength = 65535 - cxd2841er_read_agc_gain_t_t2(
-			priv, p->delivery_system);
+		strength = cxd2841er_read_agc_gain_t_t2(priv,
+							p->delivery_system);
+		p->strength.stat[0].scale = FE_SCALE_DECIBEL;
+		/* Formula was empirically determinated @ 410 MHz */
+		p->strength.stat[0].uvalue = strength * 366 / 100 - 89520;
+		break;	/* Code moved out of the function */
+	case SYS_DVBC_ANNEX_A:
+	case SYS_DVBC_ANNEX_B:
+	case SYS_DVBC_ANNEX_C:
+		strength = cxd2841er_read_agc_gain_c(priv,
+							p->delivery_system);
+		p->strength.stat[0].scale = FE_SCALE_DECIBEL;
+		/*
+		 * Formula was empirically determinated via linear regression,
+		 * using frequencies: 175 MHz, 410 MHz and 800 MHz, and a
+		 * stream modulated with QAM64
+		 */
+		p->strength.stat[0].uvalue = strength * 4045 / 1000 - 85224;
+		break;
+	case SYS_ISDBT:
+		strength = cxd2841er_read_agc_gain_i(priv, p->delivery_system);
+		p->strength.stat[0].scale = FE_SCALE_DECIBEL;
+		/*
+		 * Formula was empirically determinated via linear regression,
+		 * using frequencies: 175 MHz, 410 MHz and 800 MHz.
+		 */
+		p->strength.stat[0].uvalue = strength * 3775 / 1000 - 90185;
 		break;
 	case SYS_DVBS:
 	case SYS_DVBS2:
-		*strength = 65535 - cxd2841er_read_agc_gain_s(priv);
+		strength = 65535 - cxd2841er_read_agc_gain_s(priv);
+		p->strength.stat[0].scale = FE_SCALE_RELATIVE;
+		p->strength.stat[0].uvalue = strength;
 		break;
 	default:
-		*strength = 0;
+		p->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
 		break;
 	}
-	return 0;
 }
 
-static int cxd2841er_read_snr(struct dvb_frontend *fe, u16 *snr)
+static void cxd2841er_read_snr(struct dvb_frontend *fe)
 {
 	u32 tmp = 0;
+	int ret = 0;
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
 
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
 	switch (p->delivery_system) {
+	case SYS_DVBC_ANNEX_A:
+	case SYS_DVBC_ANNEX_B:
+	case SYS_DVBC_ANNEX_C:
+		ret = cxd2841er_read_snr_c(priv, &tmp);
+		break;
 	case SYS_DVBT:
-		cxd2841er_read_snr_t(priv, &tmp);
+		ret = cxd2841er_read_snr_t(priv, &tmp);
 		break;
 	case SYS_DVBT2:
-		cxd2841er_read_snr_t2(priv, &tmp);
+		ret = cxd2841er_read_snr_t2(priv, &tmp);
+		break;
+	case SYS_ISDBT:
+		ret = cxd2841er_read_snr_i(priv, &tmp);
 		break;
 	case SYS_DVBS:
 	case SYS_DVBS2:
-		tmp = cxd2841er_dvbs_read_snr(priv, p->delivery_system);
+		ret = cxd2841er_dvbs_read_snr(priv, p->delivery_system, &tmp);
 		break;
 	default:
 		dev_dbg(&priv->i2c->dev, "%s(): unknown delivery system %d\n",
 			__func__, p->delivery_system);
-		break;
+		p->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+		return;
+	}
+
+	if (!ret) {
+		p->cnr.stat[0].scale = FE_SCALE_DECIBEL;
+		p->cnr.stat[0].svalue = tmp;
+	} else {
+		p->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
 	}
-	*snr = tmp & 0xffff;
-	return 0;
 }
 
-static int cxd2841er_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
+static void cxd2841er_read_ucblocks(struct dvb_frontend *fe)
 {
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
+	u32 ucblocks;
 
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
 	switch (p->delivery_system) {
+	case SYS_DVBC_ANNEX_A:
+	case SYS_DVBC_ANNEX_B:
+	case SYS_DVBC_ANNEX_C:
+		cxd2841er_read_packet_errors_c(priv, &ucblocks);
+		break;
 	case SYS_DVBT:
-		cxd2841er_read_packet_errors_t(priv, ucblocks);
+		cxd2841er_read_packet_errors_t(priv, &ucblocks);
 		break;
 	case SYS_DVBT2:
-		cxd2841er_read_packet_errors_t2(priv, ucblocks);
+		cxd2841er_read_packet_errors_t2(priv, &ucblocks);
 		break;
-	default:
-		*ucblocks = 0;
+	case SYS_ISDBT:
+		cxd2841er_read_packet_errors_i(priv, &ucblocks);
 		break;
+	default:
+		p->block_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+		return;
 	}
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
-	return 0;
+
+	p->block_error.stat[0].scale = FE_SCALE_COUNTER;
+	p->block_error.stat[0].uvalue = ucblocks;
 }
 
 static int cxd2841er_dvbt2_set_profile(
@@ -1524,15 +2015,18 @@ static int cxd2841er_dvbt2_set_profile(
 	switch (profile) {
 	case DVBT2_PROFILE_BASE:
 		tune_mode = 0x01;
-		seq_not2d_time = 12;
+		/* Set early unlock time */
+		seq_not2d_time = (priv->xtal == SONY_XTAL_24000)?0x0E:0x0C;
 		break;
 	case DVBT2_PROFILE_LITE:
 		tune_mode = 0x05;
-		seq_not2d_time = 40;
+		/* Set early unlock time */
+		seq_not2d_time = (priv->xtal == SONY_XTAL_24000)?0x2E:0x28;
 		break;
 	case DVBT2_PROFILE_ANY:
 		tune_mode = 0x00;
-		seq_not2d_time = 40;
+		/* Set early unlock time */
+		seq_not2d_time = (priv->xtal == SONY_XTAL_24000)?0x2E:0x28;
 		break;
 	default:
 		return -EINVAL;
@@ -1574,254 +2068,617 @@ static int cxd2841er_sleep_tc_to_active_t2_band(struct cxd2841er_priv *priv,
 						u32 bandwidth)
 {
 	u32 iffreq;
-	u8 b20_9f[5];
-	u8 b10_a6[14];
-	u8 b10_b6[3];
-	u8 b10_d7;
+	u8 data[MAX_WRITE_REGSIZE];
+
+	const uint8_t nominalRate8bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x11, 0xF0, 0x00, 0x00, 0x00}, /* 20.5MHz XTal */
+		{0x15, 0x00, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x11, 0xF0, 0x00, 0x00, 0x00}  /* 41MHz XTal */
+	};
+
+	const uint8_t nominalRate7bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x14, 0x80, 0x00, 0x00, 0x00}, /* 20.5MHz XTal */
+		{0x18, 0x00, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x14, 0x80, 0x00, 0x00, 0x00}  /* 41MHz XTal */
+	};
+
+	const uint8_t nominalRate6bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x17, 0xEA, 0xAA, 0xAA, 0xAA}, /* 20.5MHz XTal */
+		{0x1C, 0x00, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x17, 0xEA, 0xAA, 0xAA, 0xAA}  /* 41MHz XTal */
+	};
+
+	const uint8_t nominalRate5bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x1C, 0xB3, 0x33, 0x33, 0x33}, /* 20.5MHz XTal */
+		{0x21, 0x99, 0x99, 0x99, 0x99}, /* 24MHz XTal */
+		{0x1C, 0xB3, 0x33, 0x33, 0x33}  /* 41MHz XTal */
+	};
+
+	const uint8_t nominalRate17bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x58, 0xE2, 0xAF, 0xE0, 0xBC}, /* 20.5MHz XTal */
+		{0x68, 0x0F, 0xA2, 0x32, 0xD0}, /* 24MHz XTal */
+		{0x58, 0xE2, 0xAF, 0xE0, 0xBC}  /* 41MHz XTal */
+	};
+
+	const uint8_t itbCoef8bw[3][14] = {
+		{0x26, 0xAF, 0x06, 0xCD, 0x13, 0xBB, 0x28, 0xBA,
+			0x23, 0xA9, 0x1F, 0xA8, 0x2C, 0xC8}, /* 20.5MHz XTal */
+		{0x2F, 0xBA, 0x28, 0x9B, 0x28, 0x9D, 0x28, 0xA1,
+			0x29, 0xA5, 0x2A, 0xAC, 0x29, 0xB5}, /* 24MHz XTal   */
+		{0x26, 0xAF, 0x06, 0xCD, 0x13, 0xBB, 0x28, 0xBA,
+			0x23, 0xA9, 0x1F, 0xA8, 0x2C, 0xC8}  /* 41MHz XTal   */
+	};
+
+	const uint8_t itbCoef7bw[3][14] = {
+		{0x2C, 0xBD, 0x02, 0xCF, 0x04, 0xF8, 0x23, 0xA6,
+			0x29, 0xB0, 0x26, 0xA9, 0x21, 0xA5}, /* 20.5MHz XTal */
+		{0x30, 0xB1, 0x29, 0x9A, 0x28, 0x9C, 0x28, 0xA0,
+			0x29, 0xA2, 0x2B, 0xA6, 0x2B, 0xAD}, /* 24MHz XTal   */
+		{0x2C, 0xBD, 0x02, 0xCF, 0x04, 0xF8, 0x23, 0xA6,
+			0x29, 0xB0, 0x26, 0xA9, 0x21, 0xA5}  /* 41MHz XTal   */
+	};
+
+	const uint8_t itbCoef6bw[3][14] = {
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8,
+			0x00, 0xCF, 0x00, 0xE6, 0x23, 0xA4}, /* 20.5MHz XTal */
+		{0x31, 0xA8, 0x29, 0x9B, 0x27, 0x9C, 0x28, 0x9E,
+			0x29, 0xA4, 0x29, 0xA2, 0x29, 0xA8}, /* 24MHz XTal   */
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8,
+			0x00, 0xCF, 0x00, 0xE6, 0x23, 0xA4}  /* 41MHz XTal   */
+	};
+
+	const uint8_t itbCoef5bw[3][14] = {
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8,
+			0x00, 0xCF, 0x00, 0xE6, 0x23, 0xA4}, /* 20.5MHz XTal */
+		{0x31, 0xA8, 0x29, 0x9B, 0x27, 0x9C, 0x28, 0x9E,
+			0x29, 0xA4, 0x29, 0xA2, 0x29, 0xA8}, /* 24MHz XTal   */
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8,
+			0x00, 0xCF, 0x00, 0xE6, 0x23, 0xA4}  /* 41MHz XTal   */
+	};
+
+	const uint8_t itbCoef17bw[3][14] = {
+		{0x25, 0xA0, 0x36, 0x8D, 0x2E, 0x94, 0x28, 0x9B,
+			0x32, 0x90, 0x2C, 0x9D, 0x29, 0x99}, /* 20.5MHz XTal */
+		{0x33, 0x8E, 0x2B, 0x97, 0x2D, 0x95, 0x37, 0x8B,
+			0x30, 0x97, 0x2D, 0x9A, 0x21, 0xA4}, /* 24MHz XTal   */
+		{0x25, 0xA0, 0x36, 0x8D, 0x2E, 0x94, 0x28, 0x9B,
+			0x32, 0x90, 0x2C, 0x9D, 0x29, 0x99}  /* 41MHz XTal   */
+	};
+
+	/* Set SLV-T Bank : 0x20 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x20);
 
-	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
 	switch (bandwidth) {
 	case 8000000:
-		/* bank 0x20, reg 0x9f */
-		b20_9f[0] = 0x11;
-		b20_9f[1] = 0xf0;
-		b20_9f[2] = 0x00;
-		b20_9f[3] = 0x00;
-		b20_9f[4] = 0x00;
-		/* bank 0x10, reg 0xa6 */
-		b10_a6[0] = 0x26;
-		b10_a6[1] = 0xaf;
-		b10_a6[2] = 0x06;
-		b10_a6[3] = 0xcd;
-		b10_a6[4] = 0x13;
-		b10_a6[5] = 0xbb;
-		b10_a6[6] = 0x28;
-		b10_a6[7] = 0xba;
-		b10_a6[8] = 0x23;
-		b10_a6[9] = 0xa9;
-		b10_a6[10] = 0x1f;
-		b10_a6[11] = 0xa8;
-		b10_a6[12] = 0x2c;
-		b10_a6[13] = 0xc8;
-		iffreq = MAKE_IFFREQ_CONFIG(4.80);
-		b10_d7 = 0x00;
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate8bw[priv->xtal], 5);
+
+		/* Set SLV-T Bank : 0x27 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x27);
+		cxd2841er_set_reg_bits(priv, I2C_SLVT,
+				0x7a, 0x00, 0x0f);
+
+		/* Set SLV-T Bank : 0x10 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		 */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef8bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 4.80);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+				priv, I2C_SLVT, 0xD7, 0x00, 0x07);
 		break;
 	case 7000000:
-		/* bank 0x20, reg 0x9f */
-		b20_9f[0] = 0x14;
-		b20_9f[1] = 0x80;
-		b20_9f[2] = 0x00;
-		b20_9f[3] = 0x00;
-		b20_9f[4] = 0x00;
-		/* bank 0x10, reg 0xa6 */
-		b10_a6[0] = 0x2C;
-		b10_a6[1] = 0xBD;
-		b10_a6[2] = 0x02;
-		b10_a6[3] = 0xCF;
-		b10_a6[4] = 0x04;
-		b10_a6[5] = 0xF8;
-		b10_a6[6] = 0x23;
-		b10_a6[7] = 0xA6;
-		b10_a6[8] = 0x29;
-		b10_a6[9] = 0xB0;
-		b10_a6[10] = 0x26;
-		b10_a6[11] = 0xA9;
-		b10_a6[12] = 0x21;
-		b10_a6[13] = 0xA5;
-		iffreq = MAKE_IFFREQ_CONFIG(4.2);
-		b10_d7 = 0x02;
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate7bw[priv->xtal], 5);
+
+		/* Set SLV-T Bank : 0x27 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x27);
+		cxd2841er_set_reg_bits(priv, I2C_SLVT,
+				0x7a, 0x00, 0x0f);
+
+		/* Set SLV-T Bank : 0x10 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		 */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef7bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 4.20);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+				priv, I2C_SLVT, 0xD7, 0x02, 0x07);
 		break;
 	case 6000000:
-		/* bank 0x20, reg 0x9f */
-		b20_9f[0] = 0x17;
-		b20_9f[1] = 0xEA;
-		b20_9f[2] = 0xAA;
-		b20_9f[3] = 0xAA;
-		b20_9f[4] = 0xAA;
-		/* bank 0x10, reg 0xa6 */
-		b10_a6[0] = 0x27;
-		b10_a6[1] = 0xA7;
-		b10_a6[2] = 0x28;
-		b10_a6[3] = 0xB3;
-		b10_a6[4] = 0x02;
-		b10_a6[5] = 0xF0;
-		b10_a6[6] = 0x01;
-		b10_a6[7] = 0xE8;
-		b10_a6[8] = 0x00;
-		b10_a6[9] = 0xCF;
-		b10_a6[10] = 0x00;
-		b10_a6[11] = 0xE6;
-		b10_a6[12] = 0x23;
-		b10_a6[13] = 0xA4;
-		iffreq = MAKE_IFFREQ_CONFIG(3.6);
-		b10_d7 = 0x04;
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate6bw[priv->xtal], 5);
+
+		/* Set SLV-T Bank : 0x27 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x27);
+		cxd2841er_set_reg_bits(priv, I2C_SLVT,
+				0x7a, 0x00, 0x0f);
+
+		/* Set SLV-T Bank : 0x10 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		 */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef6bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 3.60);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+				priv, I2C_SLVT, 0xD7, 0x04, 0x07);
 		break;
 	case 5000000:
-		/* bank 0x20, reg 0x9f */
-		b20_9f[0] = 0x1C;
-		b20_9f[1] = 0xB3;
-		b20_9f[2] = 0x33;
-		b20_9f[3] = 0x33;
-		b20_9f[4] = 0x33;
-		/* bank 0x10, reg 0xa6 */
-		b10_a6[0] = 0x27;
-		b10_a6[1] = 0xA7;
-		b10_a6[2] = 0x28;
-		b10_a6[3] = 0xB3;
-		b10_a6[4] = 0x02;
-		b10_a6[5] = 0xF0;
-		b10_a6[6] = 0x01;
-		b10_a6[7] = 0xE8;
-		b10_a6[8] = 0x00;
-		b10_a6[9] = 0xCF;
-		b10_a6[10] = 0x00;
-		b10_a6[11] = 0xE6;
-		b10_a6[12] = 0x23;
-		b10_a6[13] = 0xA4;
-		iffreq = MAKE_IFFREQ_CONFIG(3.6);
-		b10_d7 = 0x06;
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate5bw[priv->xtal], 5);
+
+		/* Set SLV-T Bank : 0x27 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x27);
+		cxd2841er_set_reg_bits(priv, I2C_SLVT,
+				0x7a, 0x00, 0x0f);
+
+		/* Set SLV-T Bank : 0x10 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		 */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef5bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 3.60);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+				priv, I2C_SLVT, 0xD7, 0x06, 0x07);
 		break;
 	case 1712000:
-		/* bank 0x20, reg 0x9f */
-		b20_9f[0] = 0x58;
-		b20_9f[1] = 0xE2;
-		b20_9f[2] = 0xAF;
-		b20_9f[3] = 0xE0;
-		b20_9f[4] = 0xBC;
-		/* bank 0x10, reg 0xa6 */
-		b10_a6[0] = 0x25;
-		b10_a6[1] = 0xA0;
-		b10_a6[2] = 0x36;
-		b10_a6[3] = 0x8D;
-		b10_a6[4] = 0x2E;
-		b10_a6[5] = 0x94;
-		b10_a6[6] = 0x28;
-		b10_a6[7] = 0x9B;
-		b10_a6[8] = 0x32;
-		b10_a6[9] = 0x90;
-		b10_a6[10] = 0x2C;
-		b10_a6[11] = 0x9D;
-		b10_a6[12] = 0x29;
-		b10_a6[13] = 0x99;
-		iffreq = MAKE_IFFREQ_CONFIG(3.5);
-		b10_d7 = 0x03;
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate17bw[priv->xtal], 5);
+
+		/* Set SLV-T Bank : 0x27 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x27);
+		cxd2841er_set_reg_bits(priv, I2C_SLVT,
+				0x7a, 0x03, 0x0f);
+
+		/* Set SLV-T Bank : 0x10 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		 */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef17bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 3.50);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+				priv, I2C_SLVT, 0xD7, 0x03, 0x07);
 		break;
 	default:
 		return -EINVAL;
 	}
-	/* Set SLV-T Bank : 0x20 */
-	cxd2841er_write_reg(priv, I2C_SLVX, 0x00, 0x20);
-	cxd2841er_write_regs(priv, I2C_SLVT, 0x9f, b20_9f, sizeof(b20_9f));
-	/* Set SLV-T Bank : 0x27 */
-	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x27);
-	cxd2841er_set_reg_bits(
-		priv, I2C_SLVT, 0x7a,
-		(bandwidth == 1712000 ? 0x03 : 0x00), 0x0f);
-	/* Set SLV-T Bank : 0x10 */
-	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
-	/* Group delay equaliser sett. for ASCOT2E */
-	cxd2841er_write_regs(priv, I2C_SLVT, 0xa6, b10_a6, sizeof(b10_a6));
-	/* <IF freq setting> */
-	b10_b6[0] = (u8) ((iffreq >> 16) & 0xff);
-	b10_b6[1] = (u8)((iffreq >> 8) & 0xff);
-	b10_b6[2] = (u8)(iffreq & 0xff);
-	cxd2841er_write_regs(priv, I2C_SLVT, 0xb6, b10_b6, sizeof(b10_b6));
-	/* System bandwidth setting */
-	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xd7, b10_d7, 0x07);
 	return 0;
 }
 
 static int cxd2841er_sleep_tc_to_active_t_band(
 		struct cxd2841er_priv *priv, u32 bandwidth)
 {
-	u8 b13_9c[2] = { 0x01, 0x14 };
-	u8 bw8mhz_b10_9f[] = { 0x11, 0xF0, 0x00, 0x00, 0x00 };
-	u8 bw8mhz_b10_a6[] = { 0x26, 0xAF, 0x06, 0xCD, 0x13, 0xBB,
-			0x28, 0xBA, 0x23, 0xA9, 0x1F, 0xA8, 0x2C, 0xC8 };
-	u8 bw8mhz_b10_d9[] = { 0x01, 0xE0 };
-	u8 bw8mhz_b17_38[] = { 0x01, 0x02 };
-	u8 bw7mhz_b10_9f[] = { 0x14, 0x80, 0x00, 0x00, 0x00 };
-	u8 bw7mhz_b10_a6[] = { 0x2C, 0xBD, 0x02, 0xCF, 0x04, 0xF8,
-			0x23, 0xA6, 0x29, 0xB0, 0x26, 0xA9, 0x21, 0xA5 };
-	u8 bw7mhz_b10_d9[] = { 0x12, 0xF8 };
-	u8 bw7mhz_b17_38[] = { 0x00, 0x03 };
-	u8 bw6mhz_b10_9f[] = { 0x17, 0xEA, 0xAA, 0xAA, 0xAA };
-	u8 bw6mhz_b10_a6[] = { 0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0,
-			0x01, 0xE8, 0x00, 0xCF, 0x00, 0xE6, 0x23, 0xA4 };
-	u8 bw6mhz_b10_d9[] = { 0x1F, 0xDC };
-	u8 bw6mhz_b17_38[] = { 0x00, 0x03 };
-	u8 bw5mhz_b10_9f[] = { 0x1C, 0xB3, 0x33, 0x33, 0x33 };
-	u8 bw5mhz_b10_a6[] = { 0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0,
-			0x01, 0xE8, 0x00, 0xCF, 0x00, 0xE6, 0x23, 0xA4 };
-	u8 bw5mhz_b10_d9[] = { 0x26, 0x3C };
-	u8 bw5mhz_b17_38[] = { 0x00, 0x03 };
-	u8 b10_b6[3];
-	u8 d7val;
+	u8 data[MAX_WRITE_REGSIZE];
 	u32 iffreq;
-	u8 *b10_9f;
-	u8 *b10_a6;
-	u8 *b10_d9;
-	u8 *b17_38;
+	u8 nominalRate8bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x11, 0xF0, 0x00, 0x00, 0x00}, /* 20.5MHz XTal */
+		{0x15, 0x00, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x11, 0xF0, 0x00, 0x00, 0x00}  /* 41MHz XTal */
+	};
+	u8 nominalRate7bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x14, 0x80, 0x00, 0x00, 0x00}, /* 20.5MHz XTal */
+		{0x18, 0x00, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x14, 0x80, 0x00, 0x00, 0x00}  /* 41MHz XTal */
+	};
+	u8 nominalRate6bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x17, 0xEA, 0xAA, 0xAA, 0xAA}, /* 20.5MHz XTal */
+		{0x1C, 0x00, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x17, 0xEA, 0xAA, 0xAA, 0xAA}  /* 41MHz XTal */
+	};
+	u8 nominalRate5bw[3][5] = {
+		/* TRCG Nominal Rate [37:0] */
+		{0x1C, 0xB3, 0x33, 0x33, 0x33}, /* 20.5MHz XTal */
+		{0x21, 0x99, 0x99, 0x99, 0x99}, /* 24MHz XTal */
+		{0x1C, 0xB3, 0x33, 0x33, 0x33}  /* 41MHz XTal */
+	};
 
-	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	u8 itbCoef8bw[3][14] = {
+		{0x26, 0xAF, 0x06, 0xCD, 0x13, 0xBB, 0x28, 0xBA, 0x23, 0xA9,
+			0x1F, 0xA8, 0x2C, 0xC8}, /* 20.5MHz XTal */
+		{0x2F, 0xBA, 0x28, 0x9B, 0x28, 0x9D, 0x28, 0xA1, 0x29, 0xA5,
+			0x2A, 0xAC, 0x29, 0xB5}, /* 24MHz XTal   */
+		{0x26, 0xAF, 0x06, 0xCD, 0x13, 0xBB, 0x28, 0xBA, 0x23, 0xA9,
+			0x1F, 0xA8, 0x2C, 0xC8}  /* 41MHz XTal   */
+	};
+	u8 itbCoef7bw[3][14] = {
+		{0x2C, 0xBD, 0x02, 0xCF, 0x04, 0xF8, 0x23, 0xA6, 0x29, 0xB0,
+			0x26, 0xA9, 0x21, 0xA5}, /* 20.5MHz XTal */
+		{0x30, 0xB1, 0x29, 0x9A, 0x28, 0x9C, 0x28, 0xA0, 0x29, 0xA2,
+			0x2B, 0xA6, 0x2B, 0xAD}, /* 24MHz XTal   */
+		{0x2C, 0xBD, 0x02, 0xCF, 0x04, 0xF8, 0x23, 0xA6, 0x29, 0xB0,
+			0x26, 0xA9, 0x21, 0xA5}  /* 41MHz XTal   */
+	};
+	u8 itbCoef6bw[3][14] = {
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8, 0x00, 0xCF,
+			0x00, 0xE6, 0x23, 0xA4}, /* 20.5MHz XTal */
+		{0x31, 0xA8, 0x29, 0x9B, 0x27, 0x9C, 0x28, 0x9E, 0x29, 0xA4,
+			0x29, 0xA2, 0x29, 0xA8}, /* 24MHz XTal   */
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8, 0x00, 0xCF,
+			0x00, 0xE6, 0x23, 0xA4}  /* 41MHz XTal   */
+	};
+	u8 itbCoef5bw[3][14] = {
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8, 0x00, 0xCF,
+			0x00, 0xE6, 0x23, 0xA4}, /* 20.5MHz XTal */
+		{0x31, 0xA8, 0x29, 0x9B, 0x27, 0x9C, 0x28, 0x9E, 0x29, 0xA4,
+			0x29, 0xA2, 0x29, 0xA8}, /* 24MHz XTal   */
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8, 0x00, 0xCF,
+			0x00, 0xE6, 0x23, 0xA4}  /* 41MHz XTal   */
+	};
+
+	/* Set SLV-T Bank : 0x13 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x13);
 	/* Echo performance optimization setting */
-	cxd2841er_write_regs(priv, I2C_SLVT, 0x9c, b13_9c, sizeof(b13_9c));
+	data[0] = 0x01;
+	data[1] = 0x14;
+	cxd2841er_write_regs(priv, I2C_SLVT, 0x9C, data, 2);
+
+	/* Set SLV-T Bank : 0x10 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
 
 	switch (bandwidth) {
 	case 8000000:
-		b10_9f = bw8mhz_b10_9f;
-		b10_a6 = bw8mhz_b10_a6;
-		b10_d9 = bw8mhz_b10_d9;
-		b17_38 = bw8mhz_b17_38;
-		d7val = 0;
-		iffreq = MAKE_IFFREQ_CONFIG(4.80);
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate8bw[priv->xtal], 5);
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		*/
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef8bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 4.80);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+			priv, I2C_SLVT, 0xD7, 0x00, 0x07);
+
+		/* Demod core latency setting */
+		if (priv->xtal == SONY_XTAL_24000) {
+			data[0] = 0x15;
+			data[1] = 0x28;
+		} else {
+			data[0] = 0x01;
+			data[1] = 0xE0;
+		}
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD9, data, 2);
+
+		/* Notch filter setting */
+		data[0] = 0x01;
+		data[1] = 0x02;
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x17);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x38, data, 2);
 		break;
 	case 7000000:
-		b10_9f = bw7mhz_b10_9f;
-		b10_a6 = bw7mhz_b10_a6;
-		b10_d9 = bw7mhz_b10_d9;
-		b17_38 = bw7mhz_b17_38;
-		d7val = 2;
-		iffreq = MAKE_IFFREQ_CONFIG(4.20);
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate7bw[priv->xtal], 5);
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		*/
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef7bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 4.20);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+			priv, I2C_SLVT, 0xD7, 0x02, 0x07);
+
+		/* Demod core latency setting */
+		if (priv->xtal == SONY_XTAL_24000) {
+			data[0] = 0x1F;
+			data[1] = 0xF8;
+		} else {
+			data[0] = 0x12;
+			data[1] = 0xF8;
+		}
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD9, data, 2);
+
+		/* Notch filter setting */
+		data[0] = 0x00;
+		data[1] = 0x03;
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x17);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x38, data, 2);
 		break;
 	case 6000000:
-		b10_9f = bw6mhz_b10_9f;
-		b10_a6 = bw6mhz_b10_a6;
-		b10_d9 = bw6mhz_b10_d9;
-		b17_38 = bw6mhz_b17_38;
-		d7val = 4;
-		iffreq = MAKE_IFFREQ_CONFIG(3.60);
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate6bw[priv->xtal], 5);
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		*/
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef6bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 3.60);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+			priv, I2C_SLVT, 0xD7, 0x04, 0x07);
+
+		/* Demod core latency setting */
+		if (priv->xtal == SONY_XTAL_24000) {
+			data[0] = 0x25;
+			data[1] = 0x4C;
+		} else {
+			data[0] = 0x1F;
+			data[1] = 0xDC;
+		}
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD9, data, 2);
+
+		/* Notch filter setting */
+		data[0] = 0x00;
+		data[1] = 0x03;
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x17);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x38, data, 2);
 		break;
 	case 5000000:
-		b10_9f = bw5mhz_b10_9f;
-		b10_a6 = bw5mhz_b10_a6;
-		b10_d9 = bw5mhz_b10_d9;
-		b17_38 = bw5mhz_b17_38;
-		d7val = 6;
-		iffreq = MAKE_IFFREQ_CONFIG(3.60);
+		/* <Timing Recovery setting> */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate5bw[priv->xtal], 5);
+		/* Group delay equaliser settings for
+		 * ASCOT2D, ASCOT2E and ASCOT3 tuners
+		*/
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef5bw[priv->xtal], 14);
+		/* <IF freq setting> */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 3.60);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(
+			priv, I2C_SLVT, 0xD7, 0x06, 0x07);
+
+		/* Demod core latency setting */
+		if (priv->xtal == SONY_XTAL_24000) {
+			data[0] = 0x2C;
+			data[1] = 0xC2;
+		} else {
+			data[0] = 0x26;
+			data[1] = 0x3C;
+		}
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD9, data, 2);
+
+		/* Notch filter setting */
+		data[0] = 0x00;
+		data[1] = 0x03;
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x17);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x38, data, 2);
+		break;
+	}
+
+	return 0;
+}
+
+static int cxd2841er_sleep_tc_to_active_i_band(
+		struct cxd2841er_priv *priv, u32 bandwidth)
+{
+	u32 iffreq;
+	u8 data[3];
+
+	/* TRCG Nominal Rate */
+	u8 nominalRate8bw[3][5] = {
+		{0x00, 0x00, 0x00, 0x00, 0x00}, /* 20.5MHz XTal */
+		{0x11, 0xB8, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x00, 0x00, 0x00, 0x00, 0x00}  /* 41MHz XTal */
+	};
+
+	u8 nominalRate7bw[3][5] = {
+		{0x00, 0x00, 0x00, 0x00, 0x00}, /* 20.5MHz XTal */
+		{0x14, 0x40, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x00, 0x00, 0x00, 0x00, 0x00}  /* 41MHz XTal */
+	};
+
+	u8 nominalRate6bw[3][5] = {
+		{0x14, 0x2E, 0x00, 0x00, 0x00}, /* 20.5MHz XTal */
+		{0x17, 0xA0, 0x00, 0x00, 0x00}, /* 24MHz XTal */
+		{0x14, 0x2E, 0x00, 0x00, 0x00}  /* 41MHz XTal */
+	};
+
+	u8 itbCoef8bw[3][14] = {
+		{0x00}, /* 20.5MHz XTal */
+		{0x2F, 0xBA, 0x28, 0x9B, 0x28, 0x9D, 0x28, 0xA1, 0x29,
+			0xA5, 0x2A, 0xAC, 0x29, 0xB5}, /* 24MHz Xtal */
+		{0x0}, /* 41MHz XTal   */
+	};
+
+	u8 itbCoef7bw[3][14] = {
+		{0x00}, /* 20.5MHz XTal */
+		{0x30, 0xB1, 0x29, 0x9A, 0x28, 0x9C, 0x28, 0xA0, 0x29,
+			0xA2, 0x2B, 0xA6, 0x2B, 0xAD}, /* 24MHz Xtal */
+		{0x00}, /* 41MHz XTal   */
+	};
+
+	u8 itbCoef6bw[3][14] = {
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8, 0x00,
+			0xCF, 0x00, 0xE6, 0x23, 0xA4}, /* 20.5MHz XTal */
+		{0x31, 0xA8, 0x29, 0x9B, 0x27, 0x9C, 0x28, 0x9E, 0x29,
+			0xA4, 0x29, 0xA2, 0x29, 0xA8}, /* 24MHz Xtal   */
+		{0x27, 0xA7, 0x28, 0xB3, 0x02, 0xF0, 0x01, 0xE8, 0x00,
+			0xCF, 0x00, 0xE6, 0x23, 0xA4}, /* 41MHz XTal   */
+	};
+
+	dev_dbg(&priv->i2c->dev, "%s() bandwidth=%u\n", __func__, bandwidth);
+	/* Set SLV-T Bank : 0x10 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+
+	/*  20.5/41MHz Xtal support is not available
+	 *  on ISDB-T 7MHzBW and 8MHzBW
+	*/
+	if (priv->xtal != SONY_XTAL_24000 && bandwidth > 6000000) {
+		dev_err(&priv->i2c->dev,
+			"%s(): bandwidth %d supported only for 24MHz xtal\n",
+			__func__, bandwidth);
+		return -EINVAL;
+	}
+
+	switch (bandwidth) {
+	case 8000000:
+		/* TRCG Nominal Rate */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate8bw[priv->xtal], 5);
+		/*  Group delay equaliser settings for ASCOT tuners optimized */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef8bw[priv->xtal], 14);
+
+		/* IF freq setting */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 4.75);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xd7, 0x0, 0x7);
+
+		/* Demod core latency setting */
+		data[0] = 0x13;
+		data[1] = 0xFC;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD9, data, 2);
+
+		/* Acquisition optimization setting */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x12);
+		cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x71, 0x03, 0x07);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x15);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0xBE, 0x03);
+		break;
+	case 7000000:
+		/* TRCG Nominal Rate */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate7bw[priv->xtal], 5);
+		/*  Group delay equaliser settings for ASCOT tuners optimized */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef7bw[priv->xtal], 14);
+
+		/* IF freq setting */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 4.15);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xd7, 0x02, 0x7);
+
+		/* Demod core latency setting */
+		data[0] = 0x1A;
+		data[1] = 0xFA;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD9, data, 2);
+
+		/* Acquisition optimization setting */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x12);
+		cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x71, 0x03, 0x07);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x15);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0xBE, 0x02);
+		break;
+	case 6000000:
+		/* TRCG Nominal Rate */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0x9F, nominalRate6bw[priv->xtal], 5);
+		/*  Group delay equaliser settings for ASCOT tuners optimized */
+		cxd2841er_write_regs(priv, I2C_SLVT,
+				0xA6, itbCoef6bw[priv->xtal], 14);
+
+		/* IF freq setting */
+		iffreq = MAKE_IFFREQ_CONFIG_XTAL(priv->xtal, 3.55);
+		data[0] = (u8) ((iffreq >> 16) & 0xff);
+		data[1] = (u8)((iffreq >> 8) & 0xff);
+		data[2] = (u8)(iffreq & 0xff);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xB6, data, 3);
+
+		/* System bandwidth setting */
+		cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xd7, 0x04, 0x7);
+
+		/* Demod core latency setting */
+		if (priv->xtal == SONY_XTAL_24000) {
+			data[0] = 0x1F;
+			data[1] = 0x79;
+		} else {
+			data[0] = 0x1A;
+			data[1] = 0xE2;
+		}
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD9, data, 2);
+
+		/* Acquisition optimization setting */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x12);
+		cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x71, 0x07, 0x07);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x15);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0xBE, 0x02);
 		break;
 	default:
 		dev_dbg(&priv->i2c->dev, "%s(): invalid bandwidth %d\n",
-			__func__, bandwidth);
+				__func__, bandwidth);
 		return -EINVAL;
 	}
-	/* <IF freq setting> */
-	b10_b6[0] = (u8) ((iffreq >> 16) & 0xff);
-	b10_b6[1] = (u8)((iffreq >> 8) & 0xff);
-	b10_b6[2] = (u8)(iffreq & 0xff);
-	cxd2841er_write_regs(
-		priv, I2C_SLVT, 0x9f, b10_9f, sizeof(bw8mhz_b10_9f));
-	cxd2841er_write_regs(
-		priv, I2C_SLVT, 0xa6, b10_a6, sizeof(bw8mhz_b10_a6));
-	cxd2841er_write_regs(priv, I2C_SLVT, 0xb6, b10_b6, sizeof(b10_b6));
-	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xd7, d7val, 0x7);
-	cxd2841er_write_regs(
-		priv, I2C_SLVT, 0xd9, b10_d9, sizeof(bw8mhz_b10_d9));
-	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x17);
-	cxd2841er_write_regs(
-		priv, I2C_SLVT, 0x38, b17_38, sizeof(bw8mhz_b17_38));
 	return 0;
 }
 
@@ -1837,7 +2694,7 @@ static int cxd2841er_sleep_tc_to_active_c_band(struct cxd2841er_priv *priv,
 	u8 b10_b6[3];
 	u32 iffreq;
 
-	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	dev_dbg(&priv->i2c->dev, "%s() bw=%d\n", __func__, bandwidth);
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
 	switch (bandwidth) {
 	case 8000000:
@@ -1854,7 +2711,7 @@ static int cxd2841er_sleep_tc_to_active_c_band(struct cxd2841er_priv *priv,
 		iffreq = MAKE_IFFREQ_CONFIG(3.7);
 		break;
 	default:
-		dev_dbg(&priv->i2c->dev, "%s(): unsupported bandwidth %d\n",
+		dev_err(&priv->i2c->dev, "%s(): unsupported bandwidth %d\n",
 			__func__, bandwidth);
 		return -EINVAL;
 	}
@@ -1902,6 +2759,7 @@ static int cxd2841er_sleep_tc_to_active_t(struct cxd2841er_priv *priv,
 					  u32 bandwidth)
 {
 	u8 data[2] = { 0x09, 0x54 };
+	u8 data24m[3] = {0xDC, 0x6C, 0x00};
 
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
 	cxd2841er_set_ts_clock_mode(priv, SYS_DVBT);
@@ -1919,7 +2777,11 @@ static int cxd2841er_sleep_tc_to_active_t(struct cxd2841er_priv *priv,
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x30, 0x00);
 	/* Enable ADC 1 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x41, 0x1a);
-	/* xtal freq 20.5MHz */
+	/* Enable ADC 2 & 3 */
+	if (priv->xtal == SONY_XTAL_41000) {
+		data[0] = 0x0A;
+		data[1] = 0xD4;
+	}
 	cxd2841er_write_regs(priv, I2C_SLVT, 0x43, data, 2);
 	/* Enable ADC 4 */
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x18, 0x00);
@@ -1947,6 +2809,15 @@ static int cxd2841er_sleep_tc_to_active_t(struct cxd2841er_priv *priv,
 	/* TSIF setting */
 	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xce, 0x01, 0x01);
 	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xcf, 0x01, 0x01);
+
+	if (priv->xtal == SONY_XTAL_24000) {
+		/* Set SLV-T Bank : 0x10 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0xBF, 0x60);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x18);
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x24, data24m, 3);
+	}
+
 	cxd2841er_sleep_tc_to_active_t_band(priv, bandwidth);
 	/* Set SLV-T Bank : 0x00 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
@@ -1961,7 +2832,7 @@ static int cxd2841er_sleep_tc_to_active_t(struct cxd2841er_priv *priv,
 static int cxd2841er_sleep_tc_to_active_t2(struct cxd2841er_priv *priv,
 					   u32 bandwidth)
 {
-	u8 data[2] = { 0x09, 0x54 };
+	u8 data[MAX_WRITE_REGSIZE];
 
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
 	cxd2841er_set_ts_clock_mode(priv, SYS_DVBT2);
@@ -1974,12 +2845,21 @@ static int cxd2841er_sleep_tc_to_active_t2(struct cxd2841er_priv *priv,
 	/* Enable demod clock */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x2c, 0x01);
 	/* Disable RF level monitor */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x59, 0x00);
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x2f, 0x00);
 	/* Enable ADC clock */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x30, 0x00);
 	/* Enable ADC 1 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x41, 0x1a);
-	/* xtal freq 20.5MHz */
+
+	if (priv->xtal == SONY_XTAL_41000) {
+		data[0] = 0x0A;
+		data[1] = 0xD4;
+	} else {
+		data[0] = 0x09;
+		data[1] = 0x54;
+	}
+
 	cxd2841er_write_regs(priv, I2C_SLVT, 0x43, data, 2);
 	/* Enable ADC 4 */
 	cxd2841er_write_reg(priv, I2C_SLVX, 0x18, 0x00);
@@ -2002,6 +2882,10 @@ static int cxd2841er_sleep_tc_to_active_t2(struct cxd2841er_priv *priv,
 	/* Set SLV-T Bank : 0x2b */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x2b);
 	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x76, 0x20, 0x70);
+	/* Set SLV-T Bank : 0x23 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x23);
+	/* L1 Control setting */
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xE6, 0x00, 0x03);
 	/* Set SLV-T Bank : 0x00 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
 	/* TSIF setting */
@@ -2020,6 +2904,72 @@ static int cxd2841er_sleep_tc_to_active_t2(struct cxd2841er_priv *priv,
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x2b);
 	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x11, 0x20, 0x3f);
 
+	/* 24MHz Xtal setting */
+	if (priv->xtal == SONY_XTAL_24000) {
+		/* Set SLV-T Bank : 0x11 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x11);
+		data[0] = 0xEB;
+		data[1] = 0x03;
+		data[2] = 0x3B;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x33, data, 3);
+
+		/* Set SLV-T Bank : 0x20 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x20);
+		data[0] = 0x5E;
+		data[1] = 0x5E;
+		data[2] = 0x47;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x95, data, 3);
+
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x99, 0x18);
+
+		data[0] = 0x3F;
+		data[1] = 0xFF;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD9, data, 2);
+
+		/* Set SLV-T Bank : 0x24 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x24);
+		data[0] = 0x0B;
+		data[1] = 0x72;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x34, data, 2);
+
+		data[0] = 0x93;
+		data[1] = 0xF3;
+		data[2] = 0x00;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xD2, data, 3);
+
+		data[0] = 0x05;
+		data[1] = 0xB8;
+		data[2] = 0xD8;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0xDD, data, 3);
+
+		cxd2841er_write_reg(priv, I2C_SLVT, 0xE0, 0x00);
+
+		/* Set SLV-T Bank : 0x25 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x25);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0xED, 0x60);
+
+		/* Set SLV-T Bank : 0x27 */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x27);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0xFA, 0x34);
+
+		/* Set SLV-T Bank : 0x2B */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x2B);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x4B, 0x2F);
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x9E, 0x0E);
+
+		/* Set SLV-T Bank : 0x2D */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x2D);
+		data[0] = 0x89;
+		data[1] = 0x89;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x24, data, 2);
+
+		/* Set SLV-T Bank : 0x5E */
+		cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x5E);
+		data[0] = 0x24;
+		data[1] = 0x95;
+		cxd2841er_write_regs(priv, I2C_SLVT, 0x8C, data, 2);
+	}
+
 	cxd2841er_sleep_tc_to_active_t2_band(priv, bandwidth);
 
 	/* Set SLV-T Bank : 0x00 */
@@ -2032,6 +2982,84 @@ static int cxd2841er_sleep_tc_to_active_t2(struct cxd2841er_priv *priv,
 	return 0;
 }
 
+/* ISDB-Tb part */
+static int cxd2841er_sleep_tc_to_active_i(struct cxd2841er_priv *priv,
+		u32 bandwidth)
+{
+	u8 data[2] = { 0x09, 0x54 };
+	u8 data24m[2] = {0x60, 0x00};
+	u8 data24m2[3] = {0xB7, 0x1B, 0x00};
+
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	cxd2841er_set_ts_clock_mode(priv, SYS_DVBT);
+	/* Set SLV-X Bank : 0x00 */
+	cxd2841er_write_reg(priv, I2C_SLVX, 0x00, 0x00);
+	/* Set demod mode */
+	cxd2841er_write_reg(priv, I2C_SLVX, 0x17, 0x06);
+	/* Set SLV-T Bank : 0x00 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
+	/* Enable demod clock */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x2c, 0x01);
+	/* Enable RF level monitor */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x2f, 0x01);
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x59, 0x01);
+	/* Enable ADC clock */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x30, 0x00);
+	/* Enable ADC 1 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x41, 0x1a);
+	/* xtal freq 20.5MHz or 24M */
+	cxd2841er_write_regs(priv, I2C_SLVT, 0x43, data, 2);
+	/* Enable ADC 4 */
+	cxd2841er_write_reg(priv, I2C_SLVX, 0x18, 0x00);
+	/* ASCOT setting ON */
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xa5, 0x01, 0x01);
+	/* FEC Auto Recovery setting */
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x30, 0x01, 0x01);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x31, 0x00, 0x01);
+	/* ISDB-T initial setting */
+	/* Set SLV-T Bank : 0x00 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xce, 0x00, 0x01);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xcf, 0x00, 0x01);
+	/* Set SLV-T Bank : 0x10 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x69, 0x04, 0x07);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x6B, 0x03, 0x07);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x9D, 0x50, 0xFF);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xD3, 0x06, 0x1F);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xED, 0x00, 0x01);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xE2, 0xCE, 0x80);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xF2, 0x13, 0x10);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xDE, 0x2E, 0x3F);
+	/* Set SLV-T Bank : 0x15 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x15);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xDE, 0x02, 0x03);
+	/* Set SLV-T Bank : 0x1E */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x1E);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x73, 0x68, 0xFF);
+	/* Set SLV-T Bank : 0x63 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x63);
+	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0x81, 0x00, 0x01);
+
+	/* for xtal 24MHz */
+	/* Set SLV-T Bank : 0x10 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
+	cxd2841er_write_regs(priv, I2C_SLVT, 0xBF, data24m, 2);
+	/* Set SLV-T Bank : 0x60 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x60);
+	cxd2841er_write_regs(priv, I2C_SLVT, 0xA8, data24m2, 3);
+
+	cxd2841er_sleep_tc_to_active_i_band(priv, bandwidth);
+	/* Set SLV-T Bank : 0x00 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
+	/* Disable HiZ Setting 1 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x80, 0x28);
+	/* Disable HiZ Setting 2 */
+	cxd2841er_write_reg(priv, I2C_SLVT, 0x81, 0x00);
+	priv->state = STATE_ACTIVE_TC;
+	return 0;
+}
+
 static int cxd2841er_sleep_tc_to_active_c(struct cxd2841er_priv *priv,
 					  u32 bandwidth)
 {
@@ -2079,7 +3107,7 @@ static int cxd2841er_sleep_tc_to_active_c(struct cxd2841er_priv *priv,
 	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xce, 0x01, 0x01);
 	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xcf, 0x01, 0x01);
 
-	cxd2841er_sleep_tc_to_active_c_band(priv, 8000000);
+	cxd2841er_sleep_tc_to_active_c_band(priv, bandwidth);
 	/* Set SLV-T Bank : 0x00 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
 	/* Disable HiZ Setting 1 */
@@ -2094,8 +3122,6 @@ static int cxd2841er_get_frontend(struct dvb_frontend *fe,
 				  struct dtv_frontend_properties *p)
 {
 	enum fe_status status = 0;
-	u16 strength = 0, snr = 0;
-	u32 errors = 0, ber = 0;
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
 
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
@@ -2104,32 +3130,18 @@ static int cxd2841er_get_frontend(struct dvb_frontend *fe,
 	else if (priv->state == STATE_ACTIVE_TC)
 		cxd2841er_read_status_tc(fe, &status);
 
+	cxd2841er_read_signal_strength(fe);
+
 	if (status & FE_HAS_LOCK) {
-		cxd2841er_read_signal_strength(fe, &strength);
-		p->strength.len = 1;
-		p->strength.stat[0].scale = FE_SCALE_RELATIVE;
-		p->strength.stat[0].uvalue = strength;
-		cxd2841er_read_snr(fe, &snr);
-		p->cnr.len = 1;
-		p->cnr.stat[0].scale = FE_SCALE_DECIBEL;
-		p->cnr.stat[0].svalue = snr;
-		cxd2841er_read_ucblocks(fe, &errors);
-		p->block_error.len = 1;
-		p->block_error.stat[0].scale = FE_SCALE_COUNTER;
-		p->block_error.stat[0].uvalue = errors;
-		cxd2841er_read_ber(fe, &ber);
-		p->post_bit_error.len = 1;
-		p->post_bit_error.stat[0].scale = FE_SCALE_COUNTER;
-		p->post_bit_error.stat[0].uvalue = ber;
+		cxd2841er_read_snr(fe);
+		cxd2841er_read_ucblocks(fe);
+
+		cxd2841er_read_ber(fe);
 	} else {
-		p->strength.len = 1;
-		p->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-		p->cnr.len = 1;
 		p->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-		p->block_error.len = 1;
 		p->block_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-		p->post_bit_error.len = 1;
 		p->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+		p->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
 	}
 	return 0;
 }
@@ -2142,10 +3154,10 @@ static int cxd2841er_set_frontend_s(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
 	u32 symbol_rate = p->symbol_rate/1000;
 
-	dev_dbg(&priv->i2c->dev, "%s(): %s frequency=%d symbol_rate=%d\n",
+	dev_dbg(&priv->i2c->dev, "%s(): %s frequency=%d symbol_rate=%d xtal=%d\n",
 		__func__,
 		(p->delivery_system == SYS_DVBS ? "DVB-S" : "DVB-S2"),
-		 p->frequency, symbol_rate);
+		 p->frequency, symbol_rate, priv->xtal);
 	switch (priv->state) {
 	case STATE_SLEEP_S:
 		ret = cxd2841er_sleep_s_to_active_s(
@@ -2189,6 +3201,13 @@ static int cxd2841er_set_frontend_s(struct dvb_frontend *fe)
 			__func__, carr_offset);
 	}
 done:
+	/* Reset stats */
+	p->strength.stat[0].scale = FE_SCALE_RELATIVE;
+	p->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	p->block_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	p->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	p->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+
 	return ret;
 }
 
@@ -2199,7 +3218,8 @@ static int cxd2841er_set_frontend_tc(struct dvb_frontend *fe)
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
 
-	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	dev_dbg(&priv->i2c->dev, "%s() delivery_system=%d bandwidth_hz=%d\n",
+		 __func__, p->delivery_system, p->bandwidth_hz);
 	if (p->delivery_system == SYS_DVBT) {
 		priv->system = SYS_DVBT;
 		switch (priv->state) {
@@ -2233,9 +3253,33 @@ static int cxd2841er_set_frontend_tc(struct dvb_frontend *fe)
 				__func__, priv->state);
 			ret = -EINVAL;
 		}
+	} else if (p->delivery_system == SYS_ISDBT) {
+		priv->system = SYS_ISDBT;
+		switch (priv->state) {
+		case STATE_SLEEP_TC:
+			ret = cxd2841er_sleep_tc_to_active_i(
+					priv, p->bandwidth_hz);
+			break;
+		case STATE_ACTIVE_TC:
+			ret = cxd2841er_retune_active(priv, p);
+			break;
+		default:
+			dev_dbg(&priv->i2c->dev, "%s(): invalid state %d\n",
+					__func__, priv->state);
+			ret = -EINVAL;
+		}
 	} else if (p->delivery_system == SYS_DVBC_ANNEX_A ||
 			p->delivery_system == SYS_DVBC_ANNEX_C) {
 		priv->system = SYS_DVBC_ANNEX_A;
+		/* correct bandwidth */
+		if (p->bandwidth_hz != 6000000 &&
+				p->bandwidth_hz != 7000000 &&
+				p->bandwidth_hz != 8000000) {
+			p->bandwidth_hz = 8000000;
+			dev_dbg(&priv->i2c->dev, "%s(): forcing bandwidth to %d\n",
+					__func__, p->bandwidth_hz);
+		}
+
 		switch (priv->state) {
 		case STATE_SLEEP_TC:
 			ret = cxd2841er_sleep_tc_to_active_c(
@@ -2321,7 +3365,8 @@ static int cxd2841er_tune_tc(struct dvb_frontend *fe,
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
 
-	dev_dbg(&priv->i2c->dev, "%s(): re_tune %d\n", __func__, re_tune);
+	dev_dbg(&priv->i2c->dev, "%s(): re_tune %d bandwidth=%d\n", __func__,
+			re_tune, p->bandwidth_hz);
 	if (re_tune) {
 		ret = cxd2841er_set_frontend_tc(fe);
 		if (ret)
@@ -2329,7 +3374,16 @@ static int cxd2841er_tune_tc(struct dvb_frontend *fe,
 		cxd2841er_read_status_tc(fe, status);
 		if (*status & FE_HAS_LOCK) {
 			switch (priv->system) {
+			case SYS_ISDBT:
+				ret = cxd2841er_get_carrier_offset_i(
+						priv, p->bandwidth_hz,
+						&carrier_offset);
+				break;
 			case SYS_DVBT:
+				ret = cxd2841er_get_carrier_offset_t(
+					priv, p->bandwidth_hz,
+					&carrier_offset);
+				break;
 			case SYS_DVBT2:
 				ret = cxd2841er_get_carrier_offset_t2(
 					priv, p->bandwidth_hz,
@@ -2382,6 +3436,9 @@ static int cxd2841er_sleep_tc(struct dvb_frontend *fe)
 		case SYS_DVBT2:
 			cxd2841er_active_t2_to_sleep_tc(priv);
 			break;
+		case SYS_ISDBT:
+			cxd2841er_active_i_to_sleep_tc(priv);
+			break;
 		case SYS_DVBC_ANNEX_A:
 			cxd2841er_active_c_to_sleep_tc(priv);
 			break;
@@ -2512,23 +3569,57 @@ static enum dvbfe_algo cxd2841er_get_algo(struct dvb_frontend *fe)
 	return DVBFE_ALGO_HW;
 }
 
+static void cxd2841er_init_stats(struct dvb_frontend *fe)
+{
+	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
+
+	p->strength.len = 1;
+	p->strength.stat[0].scale = FE_SCALE_RELATIVE;
+	p->cnr.len = 1;
+	p->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	p->block_error.len = 1;
+	p->block_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	p->post_bit_error.len = 1;
+	p->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	p->post_bit_count.len = 1;
+	p->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+}
+
+
 static int cxd2841er_init_s(struct dvb_frontend *fe)
 {
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
 
+	/* sanity. force demod to SHUTDOWN state */
+	if (priv->state == STATE_SLEEP_S) {
+		dev_dbg(&priv->i2c->dev, "%s() forcing sleep->shutdown\n",
+				__func__);
+		cxd2841er_sleep_s_to_shutdown(priv);
+	} else if (priv->state == STATE_ACTIVE_S) {
+		dev_dbg(&priv->i2c->dev, "%s() forcing active->sleep->shutdown\n",
+				__func__);
+		cxd2841er_active_s_to_sleep_s(priv);
+		cxd2841er_sleep_s_to_shutdown(priv);
+	}
+
 	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
 	cxd2841er_shutdown_to_sleep_s(priv);
 	/* SONY_DEMOD_CONFIG_SAT_IFAGCNEG set to 1 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0xa0);
 	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xb9, 0x01, 0x01);
+
+	cxd2841er_init_stats(fe);
+
 	return 0;
 }
 
 static int cxd2841er_init_tc(struct dvb_frontend *fe)
 {
 	struct cxd2841er_priv *priv = fe->demodulator_priv;
+	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
 
-	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	dev_dbg(&priv->i2c->dev, "%s() bandwidth_hz=%d\n",
+			__func__, p->bandwidth_hz);
 	cxd2841er_shutdown_to_sleep_tc(priv);
 	/* SONY_DEMOD_CONFIG_IFAGCNEG = 1 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x10);
@@ -2538,12 +3629,14 @@ static int cxd2841er_init_tc(struct dvb_frontend *fe)
 	/* SONY_DEMOD_CONFIG_PARALLEL_SEL = 1 */
 	cxd2841er_write_reg(priv, I2C_SLVT, 0x00, 0x00);
 	cxd2841er_set_reg_bits(priv, I2C_SLVT, 0xc4, 0x00, 0x80);
+
+	cxd2841er_init_stats(fe);
+
 	return 0;
 }
 
 static struct dvb_frontend_ops cxd2841er_dvbs_s2_ops;
-static struct dvb_frontend_ops cxd2841er_dvbt_t2_ops;
-static struct dvb_frontend_ops cxd2841er_dvbc_ops;
+static struct dvb_frontend_ops cxd2841er_t_c_ops;
 
 static struct dvb_frontend *cxd2841er_attach(struct cxd2841er_config *cfg,
 					     struct i2c_adapter *i2c,
@@ -2551,6 +3644,7 @@ static struct dvb_frontend *cxd2841er_attach(struct cxd2841er_config *cfg,
 {
 	u8 chip_id = 0;
 	const char *type;
+	const char *name;
 	struct cxd2841er_priv *priv = NULL;
 
 	/* allocate memory for the internal state */
@@ -2561,46 +3655,49 @@ static struct dvb_frontend *cxd2841er_attach(struct cxd2841er_config *cfg,
 	priv->config = cfg;
 	priv->i2c_addr_slvx = (cfg->i2c_addr + 4) >> 1;
 	priv->i2c_addr_slvt = (cfg->i2c_addr) >> 1;
-	/* create dvb_frontend */
-	switch (system) {
-	case SYS_DVBS:
-		memcpy(&priv->frontend.ops,
-			&cxd2841er_dvbs_s2_ops,
-			sizeof(struct dvb_frontend_ops));
-		type = "S/S2";
-		break;
-	case SYS_DVBT:
-		memcpy(&priv->frontend.ops,
-			&cxd2841er_dvbt_t2_ops,
-			sizeof(struct dvb_frontend_ops));
-		type = "T/T2";
-		break;
-	case SYS_DVBC_ANNEX_A:
-		memcpy(&priv->frontend.ops,
-			&cxd2841er_dvbc_ops,
-			sizeof(struct dvb_frontend_ops));
-		type = "C/C2";
-		break;
-	default:
-		kfree(priv);
-		return NULL;
-	}
+	priv->xtal = cfg->xtal;
 	priv->frontend.demodulator_priv = priv;
-	dev_info(&priv->i2c->dev,
-		"%s(): attaching CXD2841ER DVB-%s frontend\n",
-		__func__, type);
 	dev_info(&priv->i2c->dev,
 		"%s(): I2C adapter %p SLVX addr %x SLVT addr %x\n",
 		__func__, priv->i2c,
 		priv->i2c_addr_slvx, priv->i2c_addr_slvt);
 	chip_id = cxd2841er_chip_id(priv);
-	if (chip_id != CXD2841ER_CHIP_ID) {
+	switch (chip_id) {
+	case CXD2841ER_CHIP_ID:
+		snprintf(cxd2841er_t_c_ops.info.name, 128,
+				"Sony CXD2841ER DVB-T/T2/C demodulator");
+		name = "CXD2841ER";
+		break;
+	case CXD2854ER_CHIP_ID:
+		snprintf(cxd2841er_t_c_ops.info.name, 128,
+				"Sony CXD2854ER DVB-T/T2/C and ISDB-T demodulator");
+		cxd2841er_t_c_ops.delsys[3] = SYS_ISDBT;
+		name = "CXD2854ER";
+		break;
+	default:
 		dev_err(&priv->i2c->dev, "%s(): invalid chip ID 0x%02x\n",
-			__func__, chip_id);
+				__func__, chip_id);
 		priv->frontend.demodulator_priv = NULL;
 		kfree(priv);
 		return NULL;
 	}
+
+	/* create dvb_frontend */
+	if (system == SYS_DVBS) {
+		memcpy(&priv->frontend.ops,
+			&cxd2841er_dvbs_s2_ops,
+			sizeof(struct dvb_frontend_ops));
+		type = "S/S2";
+	} else {
+		memcpy(&priv->frontend.ops,
+			&cxd2841er_t_c_ops,
+			sizeof(struct dvb_frontend_ops));
+		type = "T/T2/C/ISDB-T";
+	}
+
+	dev_info(&priv->i2c->dev,
+		"%s(): attaching %s DVB-%s frontend\n",
+		__func__, name, type);
 	dev_info(&priv->i2c->dev, "%s(): chip ID 0x%02x OK.\n",
 		__func__, chip_id);
 	return &priv->frontend;
@@ -2613,19 +3710,12 @@ struct dvb_frontend *cxd2841er_attach_s(struct cxd2841er_config *cfg,
 }
 EXPORT_SYMBOL(cxd2841er_attach_s);
 
-struct dvb_frontend *cxd2841er_attach_t(struct cxd2841er_config *cfg,
-					struct i2c_adapter *i2c)
-{
-	return cxd2841er_attach(cfg, i2c, SYS_DVBT);
-}
-EXPORT_SYMBOL(cxd2841er_attach_t);
-
-struct dvb_frontend *cxd2841er_attach_c(struct cxd2841er_config *cfg,
+struct dvb_frontend *cxd2841er_attach_t_c(struct cxd2841er_config *cfg,
 					struct i2c_adapter *i2c)
 {
-	return cxd2841er_attach(cfg, i2c, SYS_DVBC_ANNEX_A);
+	return cxd2841er_attach(cfg, i2c, 0);
 }
-EXPORT_SYMBOL(cxd2841er_attach_c);
+EXPORT_SYMBOL(cxd2841er_attach_t_c);
 
 static struct dvb_frontend_ops cxd2841er_dvbs_s2_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2 },
@@ -2655,10 +3745,10 @@ static struct dvb_frontend_ops cxd2841er_dvbs_s2_ops = {
 	.tune = cxd2841er_tune_s
 };
 
-static struct  dvb_frontend_ops cxd2841er_dvbt_t2_ops = {
-	.delsys = { SYS_DVBT, SYS_DVBT2 },
+static struct  dvb_frontend_ops cxd2841er_t_c_ops = {
+	.delsys = { SYS_DVBT, SYS_DVBT2, SYS_DVBC_ANNEX_A },
 	.info = {
-		.name	= "Sony CXD2841ER DVB-T/T2 demodulator",
+		.name	= "", /* will set in attach function */
 		.caps = FE_CAN_FEC_1_2 |
 			FE_CAN_FEC_2_3 |
 			FE_CAN_FEC_3_4 |
@@ -2691,37 +3781,6 @@ static struct  dvb_frontend_ops cxd2841er_dvbt_t2_ops = {
 	.get_frontend_algo = cxd2841er_get_algo
 };
 
-static struct  dvb_frontend_ops cxd2841er_dvbc_ops = {
-	.delsys = { SYS_DVBC_ANNEX_A },
-	.info = {
-		.name	= "Sony CXD2841ER DVB-C demodulator",
-		.caps = FE_CAN_FEC_1_2 |
-			FE_CAN_FEC_2_3 |
-			FE_CAN_FEC_3_4 |
-			FE_CAN_FEC_5_6 |
-			FE_CAN_FEC_7_8 |
-			FE_CAN_FEC_AUTO |
-			FE_CAN_QAM_16 |
-			FE_CAN_QAM_32 |
-			FE_CAN_QAM_64 |
-			FE_CAN_QAM_128 |
-			FE_CAN_QAM_256 |
-			FE_CAN_QAM_AUTO |
-			FE_CAN_INVERSION_AUTO,
-		.frequency_min = 42000000,
-		.frequency_max = 1002000000
-	},
-	.init = cxd2841er_init_tc,
-	.sleep = cxd2841er_sleep_tc,
-	.release = cxd2841er_release,
-	.set_frontend = cxd2841er_set_frontend_tc,
-	.get_frontend = cxd2841er_get_frontend,
-	.read_status = cxd2841er_read_status_tc,
-	.tune = cxd2841er_tune_tc,
-	.i2c_gate_ctrl = cxd2841er_i2c_gate_ctrl,
-	.get_frontend_algo = cxd2841er_get_algo,
-};
-
-MODULE_DESCRIPTION("Sony CXD2841ER DVB-C/C2/T/T2/S/S2 demodulator driver");
-MODULE_AUTHOR("Sergey Kozlov <serjk@netup.ru>");
+MODULE_DESCRIPTION("Sony CXD2841ER/CXD2854ER DVB-C/C2/T/T2/S/S2 demodulator driver");
+MODULE_AUTHOR("Sergey Kozlov <serjk@netup.ru>, Abylay Ospan <aospan@netup.ru>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb-frontends/cxd2841er.h b/drivers/media/dvb-frontends/cxd2841er.h
index 3472bdd58949..62ad5f07390b 100644
--- a/drivers/media/dvb-frontends/cxd2841er.h
+++ b/drivers/media/dvb-frontends/cxd2841er.h
@@ -25,41 +25,39 @@
 #include <linux/kconfig.h>
 #include <linux/dvb/frontend.h>
 
+enum cxd2841er_xtal {
+	SONY_XTAL_20500, /* 20.5 MHz */
+	SONY_XTAL_24000, /* 24 MHz */
+	SONY_XTAL_41000 /* 41 MHz */
+};
+
 struct cxd2841er_config {
 	u8	i2c_addr;
+	enum cxd2841er_xtal	xtal;
 };
 
 #if IS_REACHABLE(CONFIG_DVB_CXD2841ER)
 extern struct dvb_frontend *cxd2841er_attach_s(struct cxd2841er_config *cfg,
 					       struct i2c_adapter *i2c);
 
-extern struct dvb_frontend *cxd2841er_attach_t(struct cxd2841er_config *cfg,
-					       struct i2c_adapter *i2c);
-
-extern struct dvb_frontend *cxd2841er_attach_c(struct cxd2841er_config *cfg,
+extern struct dvb_frontend *cxd2841er_attach_t_c(struct cxd2841er_config *cfg,
 					       struct i2c_adapter *i2c);
 #else
 static inline struct dvb_frontend *cxd2841er_attach_s(
 					struct cxd2841er_config *cfg,
 					struct i2c_adapter *i2c)
 {
-	printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
+	pr_warn("%s: driver disabled by Kconfig\n", __func__);
 	return NULL;
 }
 
-static inline struct dvb_frontend *cxd2841er_attach_t(
+static inline struct dvb_frontend *cxd2841er_attach_t_c(
 		struct cxd2841er_config *cfg, struct i2c_adapter *i2c)
 {
-	printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
+	pr_warn("%s: driver disabled by Kconfig\n", __func__);
 	return NULL;
 }
 
-static inline struct dvb_frontend *cxd2841er_attach_c(
-		struct cxd2841er_config *cfg, struct i2c_adapter *i2c)
-{
-	printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
-	return NULL;
-}
 #endif
 
 #endif
diff --git a/drivers/media/dvb-frontends/cxd2841er_priv.h b/drivers/media/dvb-frontends/cxd2841er_priv.h
index 33e2f495277b..0bbce451149f 100644
--- a/drivers/media/dvb-frontends/cxd2841er_priv.h
+++ b/drivers/media/dvb-frontends/cxd2841er_priv.h
@@ -26,6 +26,7 @@
 #define I2C_SLVT			1
 
 #define CXD2841ER_CHIP_ID		0xa7
+#define CXD2854ER_CHIP_ID		0xc1
 
 #define CXD2841ER_DVBS_POLLING_INVL	10
 
diff --git a/drivers/media/dvb-frontends/dib0090.c b/drivers/media/dvb-frontends/dib0090.c
index d879dc0607f4..14c403254fe0 100644
--- a/drivers/media/dvb-frontends/dib0090.c
+++ b/drivers/media/dvb-frontends/dib0090.c
@@ -797,6 +797,8 @@ static const u16 bb_ramp_pwm_normal[] = {
 	(0  << 9) | 400, /* BB_RAMP6 */
 };
 
+#if 0
+/* Currently unused */
 static const u16 bb_ramp_pwm_boost[] = {
 	550, /* max BB gain in 10th of dB */
 	8, /* ramp_slope = 1dB of gain -> clock_ticks_per_db = clk_khz / ramp_slope -> BB_RAMP2 */
@@ -806,6 +808,7 @@ static const u16 bb_ramp_pwm_boost[] = {
 	(2  << 9) | 208, /* BB_RAMP5 = 29dB */
 	(0  << 9) | 440, /* BB_RAMP6 */
 };
+#endif
 
 static const u16 rf_ramp_pwm_cband[] = {
 	314, /* max RF gain in 10th of dB */
@@ -849,6 +852,8 @@ static const u16 rf_ramp_pwm_uhf[] = {
 	(0  << 10) | 580, /* GAIN_4_2, LNA 4 */
 };
 
+#if 0
+/* Currently unused */
 static const u16 rf_ramp_pwm_sband[] = {
 	253, /* max RF gain in 10th of dB */
 	38, /* ramp_slope = 1dB of gain -> clock_ticks_per_db = clk_khz / ramp_slope -> RF_RAMP2 */
@@ -862,6 +867,7 @@ static const u16 rf_ramp_pwm_sband[] = {
 	(0  << 10) | 0, /* GAIN_4_1, LNA 4 = 0dB */
 	(0  << 10) | 0, /* GAIN_4_2, LNA 4 */
 };
+#endif
 
 struct slope {
 	s16 range;
diff --git a/drivers/media/dvb-frontends/drx39xyj/drxj.c b/drivers/media/dvb-frontends/drx39xyj/drxj.c
index e48b741d439e..bd6d2ee0f7c9 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drxj.c
+++ b/drivers/media/dvb-frontends/drx39xyj/drxj.c
@@ -1240,12 +1240,15 @@ static u32 frac_times1e6(u32 N, u32 D)
 *        and rounded. For calc used formula: 16*10^(prescaleGain[dB]/20).
 *
 */
+#if 0
+/* Currently, unused as we lack support for analog TV */
 static const u16 nicam_presc_table_val[43] = {
 	1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
 	5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16,
 	18, 20, 23, 25, 28, 32, 36, 40, 45,
 	51, 57, 64, 71, 80, 90, 101, 113, 127
 };
+#endif
 
 /*============================================================================*/
 /*==                        END HELPER FUNCTIONS                            ==*/
diff --git a/drivers/media/dvb-frontends/ds3000.c b/drivers/media/dvb-frontends/ds3000.c
index addffc33993a..447b518e287a 100644
--- a/drivers/media/dvb-frontends/ds3000.c
+++ b/drivers/media/dvb-frontends/ds3000.c
@@ -959,6 +959,15 @@ static int ds3000_set_frontend(struct dvb_frontend *fe)
 	/* enable ac coupling */
 	ds3000_writereg(state, 0x25, 0x8a);
 
+	if ((c->symbol_rate < ds3000_ops.info.symbol_rate_min) ||
+			(c->symbol_rate > ds3000_ops.info.symbol_rate_max)) {
+		dprintk("%s() symbol_rate %u out of range (%u ... %u)\n",
+				__func__, c->symbol_rate,
+				ds3000_ops.info.symbol_rate_min,
+				ds3000_ops.info.symbol_rate_max);
+		return -EINVAL;
+	}
+
 	/* enhance symbol rate performance */
 	if ((c->symbol_rate / 1000) <= 5000) {
 		value = 29777 / (c->symbol_rate / 1000) + 1;
diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c
new file mode 100644
index 000000000000..97a8982740a6
--- /dev/null
+++ b/drivers/media/dvb-frontends/helene.c
@@ -0,0 +1,1042 @@
+/*
+ * helene.c
+ *
+ * Sony HELENE DVB-S/S2 DVB-T/T2 DVB-C/C2 ISDB-T/S tuner driver (CXD2858ER)
+ *
+ * Copyright 2012 Sony Corporation
+ * Copyright (C) 2014 NetUP Inc.
+ * Copyright (C) 2014 Abylay Ospan <aospan@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+  */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/dvb/frontend.h>
+#include <linux/types.h>
+#include "helene.h"
+#include "dvb_frontend.h"
+
+#define MAX_WRITE_REGSIZE 20
+
+enum helene_state {
+	STATE_UNKNOWN,
+	STATE_SLEEP,
+	STATE_ACTIVE
+};
+
+struct helene_priv {
+	u32			frequency;
+	u8			i2c_address;
+	struct i2c_adapter	*i2c;
+	enum helene_state	state;
+	void			*set_tuner_data;
+	int			(*set_tuner)(void *, int);
+	enum helene_xtal xtal;
+};
+
+#define TERR_INTERNAL_LOOPFILTER_AVAILABLE(tv_system) \
+	(((tv_system) != SONY_HELENE_DTV_DVBC_6) && \
+	 ((tv_system) != SONY_HELENE_DTV_DVBC_8)\
+	 && ((tv_system) != SONY_HELENE_DTV_DVBC2_6) && \
+	 ((tv_system) != SONY_HELENE_DTV_DVBC2_8))
+
+#define HELENE_AUTO		0xff
+#define HELENE_OFFSET(ofs)	((u8)(ofs) & 0x1F)
+#define HELENE_BW_6		0x00
+#define HELENE_BW_7		0x01
+#define HELENE_BW_8		0x02
+#define HELENE_BW_1_7		0x03
+
+enum helene_tv_system_t {
+	SONY_HELENE_TV_SYSTEM_UNKNOWN,
+	/* Terrestrial Analog */
+	SONY_HELENE_ATV_MN_EIAJ,
+	/**< System-M (Japan) (IF: Fp=5.75MHz in default) */
+	SONY_HELENE_ATV_MN_SAP,
+	/**< System-M (US)    (IF: Fp=5.75MHz in default) */
+	SONY_HELENE_ATV_MN_A2,
+	/**< System-M (Korea) (IF: Fp=5.9MHz in default) */
+	SONY_HELENE_ATV_BG,
+	/**< System-B/G       (IF: Fp=7.3MHz in default) */
+	SONY_HELENE_ATV_I,
+	/**< System-I         (IF: Fp=7.85MHz in default) */
+	SONY_HELENE_ATV_DK,
+	/**< System-D/K       (IF: Fp=7.85MHz in default) */
+	SONY_HELENE_ATV_L,
+	/**< System-L         (IF: Fp=7.85MHz in default) */
+	SONY_HELENE_ATV_L_DASH,
+	/**< System-L DASH    (IF: Fp=2.2MHz in default) */
+	/* Terrestrial/Cable Digital */
+	SONY_HELENE_DTV_8VSB,
+	/**< ATSC 8VSB        (IF: Fc=3.7MHz in default) */
+	SONY_HELENE_DTV_QAM,
+	/**< US QAM           (IF: Fc=3.7MHz in default) */
+	SONY_HELENE_DTV_ISDBT_6,
+	/**< ISDB-T 6MHzBW    (IF: Fc=3.55MHz in default) */
+	SONY_HELENE_DTV_ISDBT_7,
+	/**< ISDB-T 7MHzBW    (IF: Fc=4.15MHz in default) */
+	SONY_HELENE_DTV_ISDBT_8,
+	/**< ISDB-T 8MHzBW    (IF: Fc=4.75MHz in default) */
+	SONY_HELENE_DTV_DVBT_5,
+	/**< DVB-T 5MHzBW     (IF: Fc=3.6MHz in default) */
+	SONY_HELENE_DTV_DVBT_6,
+	/**< DVB-T 6MHzBW     (IF: Fc=3.6MHz in default) */
+	SONY_HELENE_DTV_DVBT_7,
+	/**< DVB-T 7MHzBW     (IF: Fc=4.2MHz in default) */
+	SONY_HELENE_DTV_DVBT_8,
+	/**< DVB-T 8MHzBW     (IF: Fc=4.8MHz in default) */
+	SONY_HELENE_DTV_DVBT2_1_7,
+	/**< DVB-T2 1.7MHzBW  (IF: Fc=3.5MHz in default) */
+	SONY_HELENE_DTV_DVBT2_5,
+	/**< DVB-T2 5MHzBW    (IF: Fc=3.6MHz in default) */
+	SONY_HELENE_DTV_DVBT2_6,
+	/**< DVB-T2 6MHzBW    (IF: Fc=3.6MHz in default) */
+	SONY_HELENE_DTV_DVBT2_7,
+	/**< DVB-T2 7MHzBW    (IF: Fc=4.2MHz in default) */
+	SONY_HELENE_DTV_DVBT2_8,
+	/**< DVB-T2 8MHzBW    (IF: Fc=4.8MHz in default) */
+	SONY_HELENE_DTV_DVBC_6,
+	/**< DVB-C 6MHzBW     (IF: Fc=3.7MHz in default) */
+	SONY_HELENE_DTV_DVBC_8,
+	/**< DVB-C 8MHzBW     (IF: Fc=4.9MHz in default) */
+	SONY_HELENE_DTV_DVBC2_6,
+	/**< DVB-C2 6MHzBW    (IF: Fc=3.7MHz in default) */
+	SONY_HELENE_DTV_DVBC2_8,
+	/**< DVB-C2 8MHzBW    (IF: Fc=4.9MHz in default) */
+	SONY_HELENE_DTV_DTMB,
+	/**< DTMB             (IF: Fc=5.1MHz in default) */
+	/* Satellite */
+	SONY_HELENE_STV_ISDBS,
+	/**< ISDB-S */
+	SONY_HELENE_STV_DVBS,
+	/**< DVB-S */
+	SONY_HELENE_STV_DVBS2,
+	/**< DVB-S2 */
+
+	SONY_HELENE_ATV_MIN = SONY_HELENE_ATV_MN_EIAJ,
+	/**< Minimum analog terrestrial system */
+	SONY_HELENE_ATV_MAX = SONY_HELENE_ATV_L_DASH,
+	/**< Maximum analog terrestrial system */
+	SONY_HELENE_DTV_MIN = SONY_HELENE_DTV_8VSB,
+	/**< Minimum digital terrestrial system */
+	SONY_HELENE_DTV_MAX = SONY_HELENE_DTV_DTMB,
+	/**< Maximum digital terrestrial system */
+	SONY_HELENE_TERR_TV_SYSTEM_NUM,
+	/**< Number of supported terrestrial broadcasting system */
+	SONY_HELENE_STV_MIN = SONY_HELENE_STV_ISDBS,
+	/**< Minimum satellite system */
+	SONY_HELENE_STV_MAX = SONY_HELENE_STV_DVBS2
+	/**< Maximum satellite system */
+};
+
+struct helene_terr_adjust_param_t {
+	/* < Addr:0x69 Bit[6:4] : RFVGA gain.
+	 * 0xFF means Auto. (RF_GAIN_SEL = 1)
+	 */
+	uint8_t RF_GAIN;
+	/* < Addr:0x69 Bit[3:0] : IF_BPF gain.
+	*/
+	uint8_t IF_BPF_GC;
+	/* < Addr:0x6B Bit[3:0] : RF overload
+	 * RF input detect level. (FRF <= 172MHz)
+	*/
+	uint8_t RFOVLD_DET_LV1_VL;
+	/* < Addr:0x6B Bit[3:0] : RF overload
+	 * RF input detect level. (172MHz < FRF <= 464MHz)
+	*/
+	uint8_t RFOVLD_DET_LV1_VH;
+	/* < Addr:0x6B Bit[3:0] : RF overload
+	 * RF input detect level. (FRF > 464MHz)
+	*/
+	uint8_t RFOVLD_DET_LV1_U;
+	/* < Addr:0x6C Bit[2:0] :
+	 * Internal RFAGC detect level. (FRF <= 172MHz)
+	*/
+	uint8_t IFOVLD_DET_LV_VL;
+	/* < Addr:0x6C Bit[2:0] :
+	 * Internal RFAGC detect level. (172MHz < FRF <= 464MHz)
+	*/
+	uint8_t IFOVLD_DET_LV_VH;
+	/* < Addr:0x6C Bit[2:0] :
+	 * Internal RFAGC detect level. (FRF > 464MHz)
+	*/
+	uint8_t IFOVLD_DET_LV_U;
+	/* < Addr:0x6D Bit[5:4] :
+	 * IF filter center offset.
+	*/
+	uint8_t IF_BPF_F0;
+	/* < Addr:0x6D Bit[1:0] :
+	 * 6MHzBW(0x00) or 7MHzBW(0x01)
+	 * or 8MHzBW(0x02) or 1.7MHzBW(0x03)
+	*/
+	uint8_t BW;
+	/* < Addr:0x6E Bit[4:0] :
+	 * 5bit signed. IF offset (kHz) = FIF_OFFSET x 50
+	*/
+	uint8_t FIF_OFFSET;
+	/* < Addr:0x6F Bit[4:0] :
+	 * 5bit signed. BW offset (kHz) =
+	 * BW_OFFSET x 50 (BW_OFFSET x 10 in 1.7MHzBW)
+	*/
+	uint8_t BW_OFFSET;
+	/* < Addr:0x9C Bit[0]   :
+	 * Local polarity. (0: Upper Local, 1: Lower Local)
+	*/
+	uint8_t IS_LOWERLOCAL;
+};
+
+static const struct helene_terr_adjust_param_t
+terr_params[SONY_HELENE_TERR_TV_SYSTEM_NUM] = {
+	/*< SONY_HELENE_TV_SYSTEM_UNKNOWN */
+	{HELENE_AUTO, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		HELENE_BW_6, HELENE_OFFSET(0),  HELENE_OFFSET(0),  0x00},
+	/* Analog */
+	/**< SONY_HELENE_ATV_MN_EIAJ   (System-M (Japan)) */
+	{HELENE_AUTO, 0x05, 0x03, 0x06, 0x03, 0x01, 0x01, 0x01, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(0),  HELENE_OFFSET(1),  0x00},
+	/**< SONY_HELENE_ATV_MN_SAP    (System-M (US)) */
+	{HELENE_AUTO, 0x05, 0x03, 0x06, 0x03, 0x01, 0x01, 0x01, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(0),  HELENE_OFFSET(1),  0x00},
+	{HELENE_AUTO, 0x05, 0x03, 0x06, 0x03, 0x01, 0x01, 0x01, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(3),  HELENE_OFFSET(1),  0x00},
+	/**< SONY_HELENE_ATV_MN_A2     (System-M (Korea)) */
+	{HELENE_AUTO, 0x05, 0x03, 0x06, 0x03, 0x01, 0x01, 0x01, 0x00,
+		HELENE_BW_7,  HELENE_OFFSET(11), HELENE_OFFSET(5),  0x00},
+	/**< SONY_HELENE_ATV_BG        (System-B/G) */
+	{HELENE_AUTO, 0x05, 0x03, 0x06, 0x03, 0x01, 0x01, 0x01, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(2),  HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_ATV_I         (System-I) */
+	{HELENE_AUTO, 0x05, 0x03, 0x06, 0x03, 0x01, 0x01, 0x01, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(2),  HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_ATV_DK        (System-D/K) */
+	{HELENE_AUTO, 0x03, 0x04, 0x0A, 0x04, 0x04, 0x04, 0x04, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(2),  HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_ATV_L         (System-L) */
+	{HELENE_AUTO, 0x03, 0x04, 0x0A, 0x04, 0x04, 0x04, 0x04, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(-1), HELENE_OFFSET(4),  0x00},
+	/**< SONY_HELENE_ATV_L_DASH    (System-L DASH) */
+	/* Digital */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x03, 0x03, 0x03, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-6), HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_DTV_8VSB      (ATSC 8VSB) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-6), HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_DTV_QAM       (US QAM) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-9), HELENE_OFFSET(-5), 0x00},
+	/**< SONY_HELENE_DTV_ISDBT_6   (ISDB-T 6MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_7,  HELENE_OFFSET(-7), HELENE_OFFSET(-6), 0x00},
+	/**< SONY_HELENE_DTV_ISDBT_7   (ISDB-T 7MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(-5), HELENE_OFFSET(-7), 0x00},
+	/**< SONY_HELENE_DTV_ISDBT_8   (ISDB-T 8MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-8), HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_DTV_DVBT_5    (DVB-T 5MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-8), HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_DTV_DVBT_6    (DVB-T 6MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_7,  HELENE_OFFSET(-6), HELENE_OFFSET(-5), 0x00},
+	/**< SONY_HELENE_DTV_DVBT_7    (DVB-T 7MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(-4), HELENE_OFFSET(-6), 0x00},
+	/**< SONY_HELENE_DTV_DVBT_8    (DVB-T 8MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_1_7, HELENE_OFFSET(-10), HELENE_OFFSET(-10), 0x00},
+	/**< SONY_HELENE_DTV_DVBT2_1_7 (DVB-T2 1.7MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-8), HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_DTV_DVBT2_5   (DVB-T2 5MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-8), HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_DTV_DVBT2_6   (DVB-T2 6MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_7,  HELENE_OFFSET(-6), HELENE_OFFSET(-5), 0x00},
+	/**< SONY_HELENE_DTV_DVBT2_7   (DVB-T2 7MHzBW) */
+	{HELENE_AUTO, 0x09, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(-4), HELENE_OFFSET(-6), 0x00},
+	/**< SONY_HELENE_DTV_DVBT2_8   (DVB-T2 8MHzBW) */
+	{HELENE_AUTO, 0x05, 0x02, 0x02, 0x02, 0x01, 0x01, 0x01, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-6), HELENE_OFFSET(-4), 0x00},
+	/**< SONY_HELENE_DTV_DVBC_6    (DVB-C 6MHzBW) */
+	{HELENE_AUTO, 0x05, 0x02, 0x02, 0x02, 0x01, 0x01, 0x01, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(-2), HELENE_OFFSET(-3), 0x00},
+	/**< SONY_HELENE_DTV_DVBC_8    (DVB-C 8MHzBW) */
+	{HELENE_AUTO, 0x03, 0x09, 0x09, 0x09, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_6,  HELENE_OFFSET(-6), HELENE_OFFSET(-2), 0x00},
+	/**< SONY_HELENE_DTV_DVBC2_6   (DVB-C2 6MHzBW) */
+	{HELENE_AUTO, 0x03, 0x09, 0x09, 0x09, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(-2), HELENE_OFFSET(0),  0x00},
+	/**< SONY_HELENE_DTV_DVBC2_8   (DVB-C2 8MHzBW) */
+	{HELENE_AUTO, 0x04, 0x0B, 0x0B, 0x0B, 0x02, 0x02, 0x02, 0x00,
+		HELENE_BW_8,  HELENE_OFFSET(2),  HELENE_OFFSET(1),  0x00}
+	/**< SONY_HELENE_DTV_DTMB      (DTMB) */
+};
+
+static void helene_i2c_debug(struct helene_priv *priv,
+		u8 reg, u8 write, const u8 *data, u32 len)
+{
+	dev_dbg(&priv->i2c->dev, "helene: I2C %s reg 0x%02x size %d\n",
+			(write == 0 ? "read" : "write"), reg, len);
+	print_hex_dump_bytes("helene: I2C data: ",
+			DUMP_PREFIX_OFFSET, data, len);
+}
+
+static int helene_write_regs(struct helene_priv *priv,
+		u8 reg, const u8 *data, u32 len)
+{
+	int ret;
+	u8 buf[MAX_WRITE_REGSIZE + 1];
+	struct i2c_msg msg[1] = {
+		{
+			.addr = priv->i2c_address,
+			.flags = 0,
+			.len = len + 1,
+			.buf = buf,
+		}
+	};
+
+	if (len + 1 > sizeof(buf)) {
+		dev_warn(&priv->i2c->dev,
+				"wr reg=%04x: len=%d vs %Zu is too big!\n",
+				reg, len + 1, sizeof(buf));
+		return -E2BIG;
+	}
+
+	helene_i2c_debug(priv, reg, 1, data, len);
+	buf[0] = reg;
+	memcpy(&buf[1], data, len);
+	ret = i2c_transfer(priv->i2c, msg, 1);
+	if (ret >= 0 && ret != 1)
+		ret = -EREMOTEIO;
+	if (ret < 0) {
+		dev_warn(&priv->i2c->dev,
+				"%s: i2c wr failed=%d reg=%02x len=%d\n",
+				KBUILD_MODNAME, ret, reg, len);
+		return ret;
+	}
+	return 0;
+}
+
+static int helene_write_reg(struct helene_priv *priv, u8 reg, u8 val)
+{
+	return helene_write_regs(priv, reg, &val, 1);
+}
+
+static int helene_read_regs(struct helene_priv *priv,
+		u8 reg, u8 *val, u32 len)
+{
+	int ret;
+	struct i2c_msg msg[2] = {
+		{
+			.addr = priv->i2c_address,
+			.flags = 0,
+			.len = 1,
+			.buf = &reg,
+		}, {
+			.addr = priv->i2c_address,
+			.flags = I2C_M_RD,
+			.len = len,
+			.buf = val,
+		}
+	};
+
+	ret = i2c_transfer(priv->i2c, &msg[0], 1);
+	if (ret >= 0 && ret != 1)
+		ret = -EREMOTEIO;
+	if (ret < 0) {
+		dev_warn(&priv->i2c->dev,
+				"%s: I2C rw failed=%d addr=%02x reg=%02x\n",
+				KBUILD_MODNAME, ret, priv->i2c_address, reg);
+		return ret;
+	}
+	ret = i2c_transfer(priv->i2c, &msg[1], 1);
+	if (ret >= 0 && ret != 1)
+		ret = -EREMOTEIO;
+	if (ret < 0) {
+		dev_warn(&priv->i2c->dev,
+				"%s: i2c rd failed=%d addr=%02x reg=%02x\n",
+				KBUILD_MODNAME, ret, priv->i2c_address, reg);
+		return ret;
+	}
+	helene_i2c_debug(priv, reg, 0, val, len);
+	return 0;
+}
+
+static int helene_read_reg(struct helene_priv *priv, u8 reg, u8 *val)
+{
+	return helene_read_regs(priv, reg, val, 1);
+}
+
+static int helene_set_reg_bits(struct helene_priv *priv,
+		u8 reg, u8 data, u8 mask)
+{
+	int res;
+	u8 rdata;
+
+	if (mask != 0xff) {
+		res = helene_read_reg(priv, reg, &rdata);
+		if (res != 0)
+			return res;
+		data = ((data & mask) | (rdata & (mask ^ 0xFF)));
+	}
+	return helene_write_reg(priv, reg, data);
+}
+
+static int helene_enter_power_save(struct helene_priv *priv)
+{
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	if (priv->state == STATE_SLEEP)
+		return 0;
+
+	/* Standby setting for CPU */
+	helene_write_reg(priv, 0x88, 0x0);
+
+	/* Standby setting for internal logic block */
+	helene_write_reg(priv, 0x87, 0xC0);
+
+	priv->state = STATE_SLEEP;
+	return 0;
+}
+
+static int helene_leave_power_save(struct helene_priv *priv)
+{
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	if (priv->state == STATE_ACTIVE)
+		return 0;
+
+	/* Standby setting for internal logic block */
+	helene_write_reg(priv, 0x87, 0xC4);
+
+	/* Standby setting for CPU */
+	helene_write_reg(priv, 0x88, 0x40);
+
+	priv->state = STATE_ACTIVE;
+	return 0;
+}
+
+static int helene_init(struct dvb_frontend *fe)
+{
+	struct helene_priv *priv = fe->tuner_priv;
+
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	return helene_leave_power_save(priv);
+}
+
+static int helene_release(struct dvb_frontend *fe)
+{
+	struct helene_priv *priv = fe->tuner_priv;
+
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	kfree(fe->tuner_priv);
+	fe->tuner_priv = NULL;
+	return 0;
+}
+
+static int helene_sleep(struct dvb_frontend *fe)
+{
+	struct helene_priv *priv = fe->tuner_priv;
+
+	dev_dbg(&priv->i2c->dev, "%s()\n", __func__);
+	helene_enter_power_save(priv);
+	return 0;
+}
+
+static enum helene_tv_system_t helene_get_tv_system(struct dvb_frontend *fe)
+{
+	enum helene_tv_system_t system = SONY_HELENE_TV_SYSTEM_UNKNOWN;
+	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
+	struct helene_priv *priv = fe->tuner_priv;
+
+	if (p->delivery_system == SYS_DVBT) {
+		if (p->bandwidth_hz <= 5000000)
+			system = SONY_HELENE_DTV_DVBT_5;
+		else if (p->bandwidth_hz <= 6000000)
+			system = SONY_HELENE_DTV_DVBT_6;
+		else if (p->bandwidth_hz <= 7000000)
+			system = SONY_HELENE_DTV_DVBT_7;
+		else if (p->bandwidth_hz <= 8000000)
+			system = SONY_HELENE_DTV_DVBT_8;
+		else {
+			system = SONY_HELENE_DTV_DVBT_8;
+			p->bandwidth_hz = 8000000;
+		}
+	} else if (p->delivery_system == SYS_DVBT2) {
+		if (p->bandwidth_hz <= 5000000)
+			system = SONY_HELENE_DTV_DVBT2_5;
+		else if (p->bandwidth_hz <= 6000000)
+			system = SONY_HELENE_DTV_DVBT2_6;
+		else if (p->bandwidth_hz <= 7000000)
+			system = SONY_HELENE_DTV_DVBT2_7;
+		else if (p->bandwidth_hz <= 8000000)
+			system = SONY_HELENE_DTV_DVBT2_8;
+		else {
+			system = SONY_HELENE_DTV_DVBT2_8;
+			p->bandwidth_hz = 8000000;
+		}
+	} else if (p->delivery_system == SYS_DVBS) {
+		system = SONY_HELENE_STV_DVBS;
+	} else if (p->delivery_system == SYS_DVBS2) {
+		system = SONY_HELENE_STV_DVBS2;
+	} else if (p->delivery_system == SYS_ISDBS) {
+		system = SONY_HELENE_STV_ISDBS;
+	} else if (p->delivery_system == SYS_ISDBT) {
+		if (p->bandwidth_hz <= 6000000)
+			system = SONY_HELENE_DTV_ISDBT_6;
+		else if (p->bandwidth_hz <= 7000000)
+			system = SONY_HELENE_DTV_ISDBT_7;
+		else if (p->bandwidth_hz <= 8000000)
+			system = SONY_HELENE_DTV_ISDBT_8;
+		else {
+			system = SONY_HELENE_DTV_ISDBT_8;
+			p->bandwidth_hz = 8000000;
+		}
+	} else if (p->delivery_system == SYS_DVBC_ANNEX_A) {
+		if (p->bandwidth_hz <= 6000000)
+			system = SONY_HELENE_DTV_DVBC_6;
+		else if (p->bandwidth_hz <= 8000000)
+			system = SONY_HELENE_DTV_DVBC_8;
+	}
+	dev_dbg(&priv->i2c->dev,
+			"%s(): HELENE DTV system %d (delsys %d, bandwidth %d)\n",
+			__func__, (int)system, p->delivery_system,
+			p->bandwidth_hz);
+	return system;
+}
+
+static int helene_set_params_s(struct dvb_frontend *fe)
+{
+	u8 data[MAX_WRITE_REGSIZE];
+	u32 frequency;
+	enum helene_tv_system_t tv_system;
+	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
+	struct helene_priv *priv = fe->tuner_priv;
+	int frequencykHz = p->frequency;
+	uint32_t frequency4kHz = 0;
+	u32 symbol_rate = p->symbol_rate/1000;
+
+	dev_dbg(&priv->i2c->dev, "%s(): tune frequency %dkHz sr=%uKsps\n",
+			__func__, frequencykHz, symbol_rate);
+	tv_system = helene_get_tv_system(fe);
+
+	if (tv_system == SONY_HELENE_TV_SYSTEM_UNKNOWN) {
+		dev_err(&priv->i2c->dev, "%s(): unknown DTV system\n",
+				__func__);
+		return -EINVAL;
+	}
+	/* RF switch turn to satellite */
+	if (priv->set_tuner)
+		priv->set_tuner(priv->set_tuner_data, 0);
+	frequency = roundup(p->frequency / 1000, 1);
+
+	/* Disable IF signal output */
+	helene_write_reg(priv, 0x15, 0x02);
+
+	/* RFIN matching in power save (Sat) reset */
+	helene_write_reg(priv, 0x43, 0x06);
+
+	/* Analog block setting (0x6A, 0x6B) */
+	data[0] = 0x00;
+	data[1] = 0x00;
+	helene_write_regs(priv, 0x6A, data, 2);
+	helene_write_reg(priv, 0x75, 0x99);
+	helene_write_reg(priv, 0x9D, 0x00);
+
+	/* Tuning setting for CPU (0x61) */
+	helene_write_reg(priv, 0x61, 0x07);
+
+	/* Satellite mode select (0x01) */
+	helene_write_reg(priv, 0x01, 0x01);
+
+	/* Clock enable for internal logic block, CPU wake-up (0x04, 0x05) */
+	data[0] = 0xC4;
+	data[1] = 0x40;
+
+	switch (priv->xtal) {
+	case SONY_HELENE_XTAL_16000:
+		data[2] = 0x02;
+		break;
+	case SONY_HELENE_XTAL_20500:
+		data[2] = 0x02;
+		break;
+	case SONY_HELENE_XTAL_24000:
+		data[2] = 0x03;
+		break;
+	case SONY_HELENE_XTAL_41000:
+		data[2] = 0x05;
+		break;
+	default:
+		dev_err(&priv->i2c->dev, "%s(): unknown xtal %d\n",
+				__func__, priv->xtal);
+		return -EINVAL;
+	}
+
+	/* Setting for analog block (0x07). LOOPFILTER INTERNAL */
+	data[3] = 0x80;
+
+	/* Tuning setting for analog block
+	 * (0x08, 0x09, 0x0A, 0x0B). LOOPFILTER INTERNAL
+	*/
+	if (priv->xtal == SONY_HELENE_XTAL_20500)
+		data[4] = 0x58;
+	else
+		data[4] = 0x70;
+
+	data[5] = 0x1E;
+	data[6] = 0x02;
+	data[7] = 0x24;
+
+	/* Enable for analog block (0x0C, 0x0D, 0x0E). SAT LNA ON */
+	data[8] = 0x0F;
+	data[8] |= 0xE0; /* POWERSAVE_TERR_RF_ACTIVE */
+	data[9]  = 0x02;
+	data[10] = 0x1E;
+
+	/* Setting for LPF cutoff frequency (0x0F) */
+	switch (tv_system) {
+	case SONY_HELENE_STV_ISDBS:
+		data[11] = 0x22; /* 22MHz */
+		break;
+	case SONY_HELENE_STV_DVBS:
+		if (symbol_rate <= 4000)
+			data[11] = 0x05;
+		else if (symbol_rate <= 10000)
+			data[11] = (uint8_t)((symbol_rate * 47
+						+ (40000-1)) / 40000);
+		else
+			data[11] = (uint8_t)((symbol_rate * 27
+						+ (40000-1)) / 40000 + 5);
+
+		if (data[11] > 36)
+			data[11] = 36; /* 5 <= lpf_cutoff <= 36 is valid */
+		break;
+	case SONY_HELENE_STV_DVBS2:
+		if (symbol_rate <= 4000)
+			data[11] = 0x05;
+		else if (symbol_rate <= 10000)
+			data[11] = (uint8_t)((symbol_rate * 11
+						+ (10000-1)) / 10000);
+		else
+			data[11] = (uint8_t)((symbol_rate * 3
+						+ (5000-1)) / 5000 + 5);
+
+		if (data[11] > 36)
+			data[11] = 36; /* 5 <= lpf_cutoff <= 36 is valid */
+		break;
+	default:
+		dev_err(&priv->i2c->dev, "%s(): unknown standard %d\n",
+				__func__, tv_system);
+		return -EINVAL;
+	}
+
+	/* RF tuning frequency setting (0x10, 0x11, 0x12) */
+	frequency4kHz = (frequencykHz + 2) / 4;
+	data[12] = (uint8_t)(frequency4kHz & 0xFF);         /* FRF_L */
+	data[13] = (uint8_t)((frequency4kHz >> 8) & 0xFF);  /* FRF_M */
+	/* FRF_H (bit[3:0]) */
+	data[14] = (uint8_t)((frequency4kHz >> 16) & 0x0F);
+
+	/* Tuning command (0x13) */
+	data[15] = 0xFF;
+
+	/* Setting for IQOUT_LIMIT (0x14) 0.75Vpp */
+	data[16] = 0x00;
+
+	/* Enable IQ output (0x15) */
+	data[17] = 0x01;
+
+	helene_write_regs(priv, 0x04, data, 18);
+
+	dev_dbg(&priv->i2c->dev, "%s(): tune done\n",
+			__func__);
+
+	priv->frequency = frequency;
+	return 0;
+}
+
+static int helene_set_params(struct dvb_frontend *fe)
+{
+	u8 data[MAX_WRITE_REGSIZE];
+	u32 frequency;
+	enum helene_tv_system_t tv_system;
+	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
+	struct helene_priv *priv = fe->tuner_priv;
+	int frequencykHz = p->frequency / 1000;
+
+	dev_dbg(&priv->i2c->dev, "%s(): tune frequency %dkHz\n",
+			__func__, frequencykHz);
+	tv_system = helene_get_tv_system(fe);
+
+	if (tv_system == SONY_HELENE_TV_SYSTEM_UNKNOWN) {
+		dev_dbg(&priv->i2c->dev, "%s(): unknown DTV system\n",
+				__func__);
+		return -EINVAL;
+	}
+	if (priv->set_tuner)
+		priv->set_tuner(priv->set_tuner_data, 1);
+	frequency = roundup(p->frequency / 1000, 25);
+
+	/* mode select */
+	helene_write_reg(priv, 0x01, 0x00);
+
+	/* Disable IF signal output */
+	helene_write_reg(priv, 0x74, 0x02);
+
+	if (priv->state == STATE_SLEEP)
+		helene_leave_power_save(priv);
+
+	/* Initial setting for internal analog block (0x91, 0x92) */
+	if ((tv_system == SONY_HELENE_DTV_DVBC_6) ||
+			(tv_system == SONY_HELENE_DTV_DVBC_8)) {
+		data[0] = 0x16;
+		data[1] = 0x26;
+	} else {
+		data[0] = 0x10;
+		data[1] = 0x20;
+	}
+	helene_write_regs(priv, 0x91, data, 2);
+
+	/* Setting for analog block */
+	if (TERR_INTERNAL_LOOPFILTER_AVAILABLE(tv_system))
+		data[0] = 0x90;
+	else
+		data[0] = 0x00;
+
+	/* Setting for local polarity (0x9D) */
+	data[1] = (uint8_t)(terr_params[tv_system].IS_LOWERLOCAL & 0x01);
+	helene_write_regs(priv, 0x9C, data, 2);
+
+	/* Enable for analog block */
+	data[0] = 0xEE;
+	data[1] = 0x02;
+	data[2] = 0x1E;
+	data[3] = 0x67; /* Tuning setting for CPU */
+
+	/* Setting for PLL reference divider for xtal=24MHz */
+	if ((tv_system == SONY_HELENE_DTV_DVBC_6) ||
+			(tv_system == SONY_HELENE_DTV_DVBC_8))
+		data[4] = 0x18;
+	else
+		data[4] = 0x03;
+
+	/* Tuning setting for analog block */
+	if (TERR_INTERNAL_LOOPFILTER_AVAILABLE(tv_system)) {
+		data[5] = 0x38;
+		data[6] = 0x1E;
+		data[7] = 0x02;
+		data[8] = 0x24;
+	} else if ((tv_system == SONY_HELENE_DTV_DVBC_6) ||
+			(tv_system == SONY_HELENE_DTV_DVBC_8)) {
+		data[5] = 0x1C;
+		data[6] = 0x78;
+		data[7] = 0x08;
+		data[8] = 0x1C;
+	} else {
+		data[5] = 0xB4;
+		data[6] = 0x78;
+		data[7] = 0x08;
+		data[8] = 0x30;
+	}
+	helene_write_regs(priv, 0x5E, data, 9);
+
+	/* LT_AMP_EN should be 0 */
+	helene_set_reg_bits(priv, 0x67, 0x0, 0x02);
+
+	/* Setting for IFOUT_LIMIT */
+	data[0] = 0x00; /* 1.5Vpp */
+
+	/* RF_GAIN setting */
+	if (terr_params[tv_system].RF_GAIN == HELENE_AUTO)
+		data[1] = 0x80; /* RF_GAIN_SEL = 1 */
+	else
+		data[1] = (uint8_t)((terr_params[tv_system].RF_GAIN
+					<< 4) & 0x70);
+
+	/* IF_BPF_GC setting */
+	data[1] |= (uint8_t)(terr_params[tv_system].IF_BPF_GC & 0x0F);
+
+	/* Setting for internal RFAGC (0x6A, 0x6B, 0x6C) */
+	data[2] = 0x00;
+	if (frequencykHz <= 172000) {
+		data[3] = (uint8_t)(terr_params[tv_system].RFOVLD_DET_LV1_VL
+				& 0x0F);
+		data[4] = (uint8_t)(terr_params[tv_system].IFOVLD_DET_LV_VL
+				& 0x07);
+	} else if (frequencykHz <= 464000) {
+		data[3] = (uint8_t)(terr_params[tv_system].RFOVLD_DET_LV1_VH
+				& 0x0F);
+		data[4] = (uint8_t)(terr_params[tv_system].IFOVLD_DET_LV_VH
+				& 0x07);
+	} else {
+		data[3] = (uint8_t)(terr_params[tv_system].RFOVLD_DET_LV1_U
+				& 0x0F);
+		data[4] = (uint8_t)(terr_params[tv_system].IFOVLD_DET_LV_U
+				& 0x07);
+	}
+	data[4] |= 0x20;
+
+	/* Setting for IF frequency and bandwidth */
+
+	/* IF filter center frequency offset (IF_BPF_F0) (0x6D) */
+	data[5] = (uint8_t)((terr_params[tv_system].IF_BPF_F0 << 4) & 0x30);
+
+	/* IF filter band width (BW) (0x6D) */
+	data[5] |= (uint8_t)(terr_params[tv_system].BW & 0x03);
+
+	/* IF frequency offset value (FIF_OFFSET) (0x6E) */
+	data[6] = (uint8_t)(terr_params[tv_system].FIF_OFFSET & 0x1F);
+
+	/* IF band width offset value (BW_OFFSET) (0x6F) */
+	data[7] = (uint8_t)(terr_params[tv_system].BW_OFFSET & 0x1F);
+
+	/* RF tuning frequency setting (0x70, 0x71, 0x72) */
+	data[8]  = (uint8_t)(frequencykHz & 0xFF);         /* FRF_L */
+	data[9]  = (uint8_t)((frequencykHz >> 8) & 0xFF);  /* FRF_M */
+	data[10] = (uint8_t)((frequencykHz >> 16)
+			& 0x0F); /* FRF_H (bit[3:0]) */
+
+	/* Tuning command */
+	data[11] = 0xFF;
+
+	/* Enable IF output, AGC and IFOUT pin selection (0x74) */
+	data[12] = 0x01;
+
+	if ((tv_system == SONY_HELENE_DTV_DVBC_6) ||
+			(tv_system == SONY_HELENE_DTV_DVBC_8)) {
+		data[13] = 0xD9;
+		data[14] = 0x0F;
+		data[15] = 0x24;
+		data[16] = 0x87;
+	} else {
+		data[13] = 0x99;
+		data[14] = 0x00;
+		data[15] = 0x24;
+		data[16] = 0x87;
+	}
+
+	helene_write_regs(priv, 0x68, data, 17);
+
+	dev_dbg(&priv->i2c->dev, "%s(): tune done\n",
+			__func__);
+
+	priv->frequency = frequency;
+	return 0;
+}
+
+static int helene_get_frequency(struct dvb_frontend *fe, u32 *frequency)
+{
+	struct helene_priv *priv = fe->tuner_priv;
+
+	*frequency = priv->frequency * 1000;
+	return 0;
+}
+
+static struct dvb_tuner_ops helene_tuner_ops = {
+	.info = {
+		.name = "Sony HELENE Ter tuner",
+		.frequency_min = 1000000,
+		.frequency_max = 1200000000,
+		.frequency_step = 25000,
+	},
+	.init = helene_init,
+	.release = helene_release,
+	.sleep = helene_sleep,
+	.set_params = helene_set_params,
+	.get_frequency = helene_get_frequency,
+};
+
+static struct dvb_tuner_ops helene_tuner_ops_s = {
+	.info = {
+		.name = "Sony HELENE Sat tuner",
+		.frequency_min = 500000,
+		.frequency_max = 2500000,
+		.frequency_step = 1000,
+	},
+	.init = helene_init,
+	.release = helene_release,
+	.sleep = helene_sleep,
+	.set_params = helene_set_params_s,
+	.get_frequency = helene_get_frequency,
+};
+
+/* power-on tuner
+ * call once after reset
+ */
+static int helene_x_pon(struct helene_priv *priv)
+{
+	/* RFIN matching in power save (terrestrial) = ACTIVE */
+	/* RFIN matching in power save (satellite) = ACTIVE */
+	u8 dataT[] = { 0x06, 0x00, 0x02, 0x00 };
+	/* SAT_RF_ACTIVE = true, lnaOff = false, terrRfActive = true */
+	u8 dataS[] = { 0x05, 0x06 };
+	u8 cdata[] = {0x7A, 0x01};
+	u8 data[20];
+	u8 rdata[2];
+
+	/* mode select */
+	helene_write_reg(priv, 0x01, 0x00);
+
+	helene_write_reg(priv, 0x67, dataT[3]);
+	helene_write_reg(priv, 0x43, dataS[1]);
+	helene_write_regs(priv, 0x5E, dataT, 3);
+	helene_write_reg(priv, 0x0C, dataS[0]);
+
+	/* Initial setting for internal logic block */
+	helene_write_regs(priv, 0x99, cdata, sizeof(cdata));
+
+	/* 0x81 - 0x94 */
+	data[0] = 0x18; /* xtal 24 MHz */
+	data[1] = (uint8_t)(0x80 | (0x04 & 0x1F)); /* 4 x 25 = 100uA */
+	data[2] = (uint8_t)(0x80 | (0x26 & 0x7F)); /* 38 x 0.25 = 9.5pF */
+	data[3] = 0x80; /* REFOUT signal output 500mVpp */
+	data[4] = 0x00; /* GPIO settings */
+	data[5] = 0x00; /* GPIO settings */
+	data[6] = 0xC4; /* Clock enable for internal logic block */
+	data[7] = 0x40; /* Start CPU boot-up */
+	data[8] = 0x10; /* For burst-write */
+
+	/* Setting for internal RFAGC */
+	data[9] = 0x00;
+	data[10] = 0x45;
+	data[11] = 0x75;
+
+	data[12] = 0x07; /* Setting for analog block */
+
+	/* Initial setting for internal analog block */
+	data[13] = 0x1C;
+	data[14] = 0x3F;
+	data[15] = 0x02;
+	data[16] = 0x10;
+	data[17] = 0x20;
+	data[18] = 0x0A;
+	data[19] = 0x00;
+
+	helene_write_regs(priv, 0x81, data, sizeof(data));
+
+	/* Setting for internal RFAGC */
+	helene_write_reg(priv, 0x9B, 0x00);
+
+	msleep(20);
+
+	/* Check CPU_STT/CPU_ERR */
+	helene_read_regs(priv, 0x1A, rdata, sizeof(rdata));
+
+	if (rdata[0] != 0x00) {
+		dev_err(&priv->i2c->dev,
+				"HELENE tuner CPU error 0x%x\n", rdata[0]);
+		return -EIO;
+	}
+
+	/* VCO current setting */
+	cdata[0] = 0x90;
+	cdata[1] = 0x06;
+	helene_write_regs(priv, 0x17, cdata, sizeof(cdata));
+	msleep(20);
+	helene_read_reg(priv, 0x19, data);
+	helene_write_reg(priv, 0x95, (uint8_t)((data[0] >> 4) & 0x0F));
+
+	/* Disable IF signal output */
+	helene_write_reg(priv, 0x74, 0x02);
+
+	/* Standby setting for CPU */
+	helene_write_reg(priv, 0x88, 0x00);
+
+	/* Standby setting for internal logic block */
+	helene_write_reg(priv, 0x87, 0xC0);
+
+	/* Load capacitance control setting for crystal oscillator */
+	helene_write_reg(priv, 0x80, 0x01);
+
+	/* Satellite initial setting */
+	cdata[0] = 0x07;
+	cdata[1] = 0x00;
+	helene_write_regs(priv, 0x41, cdata, sizeof(cdata));
+
+	dev_info(&priv->i2c->dev,
+			"HELENE tuner x_pon done\n");
+
+	return 0;
+}
+
+struct dvb_frontend *helene_attach_s(struct dvb_frontend *fe,
+		const struct helene_config *config,
+		struct i2c_adapter *i2c)
+{
+	struct helene_priv *priv = NULL;
+
+	priv = kzalloc(sizeof(struct helene_priv), GFP_KERNEL);
+	if (priv == NULL)
+		return NULL;
+	priv->i2c_address = (config->i2c_address >> 1);
+	priv->i2c = i2c;
+	priv->set_tuner_data = config->set_tuner_priv;
+	priv->set_tuner = config->set_tuner_callback;
+	priv->xtal = config->xtal;
+
+	if (fe->ops.i2c_gate_ctrl)
+		fe->ops.i2c_gate_ctrl(fe, 1);
+
+	if (helene_x_pon(priv) != 0)
+		return NULL;
+
+	if (fe->ops.i2c_gate_ctrl)
+		fe->ops.i2c_gate_ctrl(fe, 0);
+
+	memcpy(&fe->ops.tuner_ops, &helene_tuner_ops_s,
+			sizeof(struct dvb_tuner_ops));
+	fe->tuner_priv = priv;
+	dev_info(&priv->i2c->dev,
+			"Sony HELENE Sat attached on addr=%x at I2C adapter %p\n",
+			priv->i2c_address, priv->i2c);
+	return fe;
+}
+EXPORT_SYMBOL(helene_attach_s);
+
+struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
+		const struct helene_config *config,
+		struct i2c_adapter *i2c)
+{
+	struct helene_priv *priv = NULL;
+
+	priv = kzalloc(sizeof(struct helene_priv), GFP_KERNEL);
+	if (priv == NULL)
+		return NULL;
+	priv->i2c_address = (config->i2c_address >> 1);
+	priv->i2c = i2c;
+	priv->set_tuner_data = config->set_tuner_priv;
+	priv->set_tuner = config->set_tuner_callback;
+	priv->xtal = config->xtal;
+
+	if (fe->ops.i2c_gate_ctrl)
+		fe->ops.i2c_gate_ctrl(fe, 1);
+
+	if (helene_x_pon(priv) != 0)
+		return NULL;
+
+	if (fe->ops.i2c_gate_ctrl)
+		fe->ops.i2c_gate_ctrl(fe, 0);
+
+	memcpy(&fe->ops.tuner_ops, &helene_tuner_ops,
+			sizeof(struct dvb_tuner_ops));
+	fe->tuner_priv = priv;
+	dev_info(&priv->i2c->dev,
+			"Sony HELENE Ter attached on addr=%x at I2C adapter %p\n",
+			priv->i2c_address, priv->i2c);
+	return fe;
+}
+EXPORT_SYMBOL(helene_attach);
+
+MODULE_DESCRIPTION("Sony HELENE Sat/Ter tuner driver");
+MODULE_AUTHOR("Abylay Ospan <aospan@netup.ru>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb-frontends/helene.h b/drivers/media/dvb-frontends/helene.h
new file mode 100644
index 000000000000..e1b9224cfc55
--- /dev/null
+++ b/drivers/media/dvb-frontends/helene.h
@@ -0,0 +1,79 @@
+/*
+ * helene.h
+ *
+ * Sony HELENE DVB-S/S2/T/T2/C/C2/ISDB-T/S tuner driver (CXD2858ER)
+ *
+ * Copyright 2012 Sony Corporation
+ * Copyright (C) 2014 NetUP Inc.
+ * Copyright (C) 2014 Abylay Ospan <aospan@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+  */
+
+#ifndef __DVB_HELENE_H__
+#define __DVB_HELENE_H__
+
+#include <linux/kconfig.h>
+#include <linux/dvb/frontend.h>
+#include <linux/i2c.h>
+
+enum helene_xtal {
+	SONY_HELENE_XTAL_16000, /* 16 MHz */
+	SONY_HELENE_XTAL_20500, /* 20.5 MHz */
+	SONY_HELENE_XTAL_24000, /* 24 MHz */
+	SONY_HELENE_XTAL_41000 /* 41 MHz */
+};
+
+/**
+ * struct helene_config - the configuration of 'Helene' tuner driver
+ * @i2c_address:	I2C address of the tuner
+ * @xtal_freq_mhz:	Oscillator frequency, MHz
+ * @set_tuner_priv:	Callback function private context
+ * @set_tuner_callback:	Callback function that notifies the parent driver
+ *			which tuner is active now
+ */
+struct helene_config {
+	u8	i2c_address;
+	u8	xtal_freq_mhz;
+	void	*set_tuner_priv;
+	int	(*set_tuner_callback)(void *, int);
+	enum helene_xtal xtal;
+};
+
+#if IS_REACHABLE(CONFIG_DVB_HELENE)
+extern struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
+					const struct helene_config *config,
+					struct i2c_adapter *i2c);
+#else
+static inline struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
+					const struct helene_config *config,
+					struct i2c_adapter *i2c)
+{
+	pr_warn("%s: driver disabled by Kconfig\n", __func__);
+	return NULL;
+}
+#endif
+
+#if IS_REACHABLE(CONFIG_DVB_HELENE)
+extern struct dvb_frontend *helene_attach_s(struct dvb_frontend *fe,
+					const struct helene_config *config,
+					struct i2c_adapter *i2c);
+#else
+static inline struct dvb_frontend *helene_attach_s(struct dvb_frontend *fe,
+					const struct helene_config *config,
+					struct i2c_adapter *i2c)
+{
+	pr_warn("%s: driver disabled by Kconfig\n", __func__);
+	return NULL;
+}
+#endif
+
+#endif
diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c
index 000606af70f7..a98bca5270d9 100644
--- a/drivers/media/dvb-frontends/horus3a.c
+++ b/drivers/media/dvb-frontends/horus3a.c
@@ -66,7 +66,7 @@ static int horus3a_write_regs(struct horus3a_priv *priv,
 		}
 	};
 
-	if (len + 1 >= sizeof(buf)) {
+	if (len + 1 > sizeof(buf)) {
 		dev_warn(&priv->i2c->dev,"wr reg=%04x: len=%d is too big!\n",
 			 reg, len + 1);
 		return -E2BIG;
@@ -272,24 +272,6 @@ static int horus3a_set_params(struct dvb_frontend *fe)
 		if (fc_lpf > 36)
 			fc_lpf = 36;
 	} else if (p->delivery_system == SYS_DVBS2) {
-		int rolloff;
-
-		switch (p->rolloff) {
-		case ROLLOFF_35:
-			rolloff = 35;
-			break;
-		case ROLLOFF_25:
-			rolloff = 25;
-			break;
-		case ROLLOFF_20:
-			rolloff = 20;
-			break;
-		case ROLLOFF_AUTO:
-		default:
-			dev_err(&priv->i2c->dev,
-				"horus3a: auto roll-off is not supported\n");
-			return -EINVAL;
-		}
 		/*
 		 * SR <= 4.5:
 		 * fc_lpf = 5
@@ -302,11 +284,9 @@ static int horus3a_set_params(struct dvb_frontend *fe)
 		if (symbol_rate <= 4500)
 			fc_lpf = 5;
 		else if (symbol_rate <= 10000)
-			fc_lpf = (u8)DIV_ROUND_UP(
-				symbol_rate * (200 + rolloff), 200000);
+			fc_lpf = (u8)((symbol_rate * 11 + (10000-1)) / 10000);
 		else
-			fc_lpf = (u8)DIV_ROUND_UP(
-				symbol_rate * (100 + rolloff), 200000) + 5;
+			fc_lpf = (u8)((symbol_rate * 3 + (5000-1)) / 5000 + 5);
 		/* 5 <= fc_lpf <= 36 is valid */
 		if (fc_lpf > 36)
 			fc_lpf = 36;
diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c
index 5557ef8fc704..e0fe5bc9dbce 100644
--- a/drivers/media/dvb-frontends/m88ds3103.c
+++ b/drivers/media/dvb-frontends/m88ds3103.c
@@ -306,8 +306,8 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 	const struct m88ds3103_reg_val *init;
 	u8 u8tmp, u8tmp1 = 0, u8tmp2 = 0; /* silence compiler warning */
 	u8 buf[3];
-	u16 u16tmp, divide_ratio = 0;
-	u32 tuner_frequency, target_mclk;
+	u16 u16tmp;
+	u32 tuner_frequency_khz, target_mclk;
 	s32 s32tmp;
 
 	dev_dbg(&client->dev,
@@ -344,7 +344,7 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 	}
 
 	if (fe->ops.tuner_ops.get_frequency) {
-		ret = fe->ops.tuner_ops.get_frequency(fe, &tuner_frequency);
+		ret = fe->ops.tuner_ops.get_frequency(fe, &tuner_frequency_khz);
 		if (ret)
 			goto err;
 	} else {
@@ -353,20 +353,20 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 		 * actual frequency used. Carrier offset calculation is not
 		 * valid.
 		 */
-		tuner_frequency = c->frequency;
+		tuner_frequency_khz = c->frequency;
 	}
 
 	/* select M88RS6000 demod main mclk and ts mclk from tuner die. */
 	if (dev->chip_id == M88RS6000_CHIP_ID) {
 		if (c->symbol_rate > 45010000)
-			dev->mclk_khz = 110250;
+			dev->mclk = 110250000;
 		else
-			dev->mclk_khz = 96000;
+			dev->mclk = 96000000;
 
 		if (c->delivery_system == SYS_DVBS)
-			target_mclk = 96000;
+			target_mclk = 96000000;
 		else
-			target_mclk = 144000;
+			target_mclk = 144000000;
 
 		/* Enable demod clock path */
 		ret = regmap_write(dev->regmap, 0x06, 0x00);
@@ -375,7 +375,7 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 		usleep_range(10000, 20000);
 	} else {
 	/* set M88DS3103 mclk and ts mclk. */
-		dev->mclk_khz = 96000;
+		dev->mclk = 96000000;
 
 		switch (dev->cfg->ts_mode) {
 		case M88DS3103_TS_SERIAL:
@@ -385,14 +385,14 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 		case M88DS3103_TS_PARALLEL:
 		case M88DS3103_TS_CI:
 			if (c->delivery_system == SYS_DVBS)
-				target_mclk = 96000;
+				target_mclk = 96000000;
 			else {
 				if (c->symbol_rate < 18000000)
-					target_mclk = 96000;
+					target_mclk = 96000000;
 				else if (c->symbol_rate < 28000000)
-					target_mclk = 144000;
+					target_mclk = 144000000;
 				else
-					target_mclk = 192000;
+					target_mclk = 192000000;
 			}
 			break;
 		default:
@@ -402,15 +402,15 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 		}
 
 		switch (target_mclk) {
-		case 96000:
+		case 96000000:
 			u8tmp1 = 0x02; /* 0b10 */
 			u8tmp2 = 0x01; /* 0b01 */
 			break;
-		case 144000:
+		case 144000000:
 			u8tmp1 = 0x00; /* 0b00 */
 			u8tmp2 = 0x01; /* 0b01 */
 			break;
-		case 192000:
+		case 192000000:
 			u8tmp1 = 0x03; /* 0b11 */
 			u8tmp2 = 0x00; /* 0b00 */
 			break;
@@ -464,8 +464,8 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 	}
 
 	if (dev->chip_id == M88RS6000_CHIP_ID) {
-		if ((c->delivery_system == SYS_DVBS2)
-			&& ((c->symbol_rate / 1000) <= 5000)) {
+		if (c->delivery_system == SYS_DVBS2 &&
+		    c->symbol_rate <= 5000000) {
 			ret = regmap_write(dev->regmap, 0xc0, 0x04);
 			if (ret)
 				goto err;
@@ -522,37 +522,25 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 		ret = m88ds3103_update_bits(dev, 0x29, 0x20, u8tmp1);
 		if (ret)
 			goto err;
-		u8tmp1 = 0;
-		u8tmp2 = 0;
+		u16tmp = 0;
+		u8tmp1 = 0x3f;
+		u8tmp2 = 0x3f;
 		break;
 	default:
-		if (dev->cfg->ts_clk) {
-			divide_ratio = DIV_ROUND_UP(target_mclk, dev->cfg->ts_clk);
-			u8tmp1 = divide_ratio / 2;
-			u8tmp2 = DIV_ROUND_UP(divide_ratio, 2);
-		}
+		u16tmp = DIV_ROUND_UP(target_mclk, dev->cfg->ts_clk);
+		u8tmp1 = u16tmp / 2 - 1;
+		u8tmp2 = DIV_ROUND_UP(u16tmp, 2) - 1;
 	}
 
-	dev_dbg(&client->dev,
-		"target_mclk=%d ts_clk=%d divide_ratio=%d\n",
-		target_mclk, dev->cfg->ts_clk, divide_ratio);
+	dev_dbg(&client->dev, "target_mclk=%u ts_clk=%u ts_clk_divide_ratio=%u\n",
+		target_mclk, dev->cfg->ts_clk, u16tmp);
 
-	u8tmp1--;
-	u8tmp2--;
 	/* u8tmp1[5:2] => fe[3:0], u8tmp1[1:0] => ea[7:6] */
-	u8tmp1 &= 0x3f;
 	/* u8tmp2[5:0] => ea[5:0] */
-	u8tmp2 &= 0x3f;
-
-	ret = regmap_bulk_read(dev->regmap, 0xfe, &u8tmp, 1);
+	u8tmp = (u8tmp1 >> 2) & 0x0f;
+	ret = regmap_update_bits(dev->regmap, 0xfe, 0x0f, u8tmp);
 	if (ret)
 		goto err;
-
-	u8tmp = ((u8tmp  & 0xf0) << 0) | u8tmp1 >> 2;
-	ret = regmap_write(dev->regmap, 0xfe, u8tmp);
-	if (ret)
-		goto err;
-
 	u8tmp = ((u8tmp1 & 0x03) << 6) | u8tmp2 >> 0;
 	ret = regmap_write(dev->regmap, 0xea, u8tmp);
 	if (ret)
@@ -581,7 +569,7 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 	if (ret)
 		goto err;
 
-	u16tmp = DIV_ROUND_CLOSEST((c->symbol_rate / 1000) << 15, dev->mclk_khz / 2);
+	u16tmp = DIV_ROUND_CLOSEST_ULL((u64)c->symbol_rate * 0x10000, dev->mclk);
 	buf[0] = (u16tmp >> 0) & 0xff;
 	buf[1] = (u16tmp >> 8) & 0xff;
 	ret = regmap_bulk_write(dev->regmap, 0x61, buf, 2);
@@ -601,13 +589,11 @@ static int m88ds3103_set_frontend(struct dvb_frontend *fe)
 		goto err;
 
 	dev_dbg(&client->dev, "carrier offset=%d\n",
-		(tuner_frequency - c->frequency));
-
-	s32tmp = 0x10000 * (tuner_frequency - c->frequency);
-	s32tmp = DIV_ROUND_CLOSEST(s32tmp, dev->mclk_khz);
-	if (s32tmp < 0)
-		s32tmp += 0x10000;
+		(tuner_frequency_khz - c->frequency));
 
+	/* Use 32-bit calc as there is no s64 version of DIV_ROUND_CLOSEST() */
+	s32tmp = 0x10000 * (tuner_frequency_khz - c->frequency);
+	s32tmp = DIV_ROUND_CLOSEST(s32tmp, dev->mclk / 1000);
 	buf[0] = (s32tmp >> 0) & 0xff;
 	buf[1] = (s32tmp >> 8) & 0xff;
 	ret = regmap_bulk_write(dev->regmap, 0x5e, buf, 2);
@@ -635,10 +621,10 @@ static int m88ds3103_init(struct dvb_frontend *fe)
 	struct m88ds3103_dev *dev = fe->demodulator_priv;
 	struct i2c_client *client = dev->client;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	int ret, len, remaining;
+	int ret, len, rem;
 	unsigned int utmp;
-	const struct firmware *fw = NULL;
-	u8 *fw_file;
+	const struct firmware *firmware;
+	const char *name;
 
 	dev_dbg(&client->dev, "\n");
 
@@ -664,7 +650,7 @@ static int m88ds3103_init(struct dvb_frontend *fe)
 	dev_dbg(&client->dev, "firmware=%02x\n", utmp);
 
 	if (utmp)
-		goto skip_fw_download;
+		goto warm;
 
 	/* global reset, global diseqc reset, golbal fec reset */
 	ret = regmap_write(dev->regmap, 0x07, 0xe0);
@@ -679,52 +665,47 @@ static int m88ds3103_init(struct dvb_frontend *fe)
 		 m88ds3103_ops.info.name);
 
 	if (dev->chip_id == M88RS6000_CHIP_ID)
-		fw_file = M88RS6000_FIRMWARE;
+		name = M88RS6000_FIRMWARE;
 	else
-		fw_file = M88DS3103_FIRMWARE;
+		name = M88DS3103_FIRMWARE;
 	/* request the firmware, this will block and timeout */
-	ret = request_firmware(&fw, fw_file, &client->dev);
+	ret = request_firmware(&firmware, name, &client->dev);
 	if (ret) {
-		dev_err(&client->dev, "firmware file '%s' not found\n", fw_file);
+		dev_err(&client->dev, "firmware file '%s' not found\n", name);
 		goto err;
 	}
 
-	dev_info(&client->dev, "downloading firmware from file '%s'\n",
-		 fw_file);
+	dev_info(&client->dev, "downloading firmware from file '%s'\n", name);
 
 	ret = regmap_write(dev->regmap, 0xb2, 0x01);
 	if (ret)
-		goto error_fw_release;
-
-	for (remaining = fw->size; remaining > 0;
-			remaining -= (dev->cfg->i2c_wr_max - 1)) {
-		len = remaining;
-		if (len > (dev->cfg->i2c_wr_max - 1))
-			len = (dev->cfg->i2c_wr_max - 1);
+		goto err_release_firmware;
 
+	for (rem = firmware->size; rem > 0; rem -= (dev->cfg->i2c_wr_max - 1)) {
+		len = min(dev->cfg->i2c_wr_max - 1, rem);
 		ret = regmap_bulk_write(dev->regmap, 0xb0,
-				&fw->data[fw->size - remaining], len);
+					&firmware->data[firmware->size - rem],
+					len);
 		if (ret) {
-			dev_err(&client->dev, "firmware download failed=%d\n",
+			dev_err(&client->dev, "firmware download failed %d\n",
 				ret);
-			goto error_fw_release;
+			goto err_release_firmware;
 		}
 	}
 
 	ret = regmap_write(dev->regmap, 0xb2, 0x00);
 	if (ret)
-		goto error_fw_release;
+		goto err_release_firmware;
 
-	release_firmware(fw);
-	fw = NULL;
+	release_firmware(firmware);
 
 	ret = regmap_read(dev->regmap, 0xb9, &utmp);
 	if (ret)
 		goto err;
 
 	if (!utmp) {
+		ret = -EINVAL;
 		dev_info(&client->dev, "firmware did not run\n");
-		ret = -EFAULT;
 		goto err;
 	}
 
@@ -733,7 +714,7 @@ static int m88ds3103_init(struct dvb_frontend *fe)
 	dev_info(&client->dev, "firmware version: %X.%X\n",
 		 (utmp >> 4) & 0xf, (utmp >> 0 & 0xf));
 
-skip_fw_download:
+warm:
 	/* warm state */
 	dev->warm = true;
 
@@ -746,8 +727,8 @@ skip_fw_download:
 	c->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
 
 	return 0;
-error_fw_release:
-	release_firmware(fw);
+err_release_firmware:
+	release_firmware(firmware);
 err:
 	dev_dbg(&client->dev, "failed=%d\n", ret);
 	return ret;
@@ -952,8 +933,7 @@ static int m88ds3103_get_frontend(struct dvb_frontend *fe,
 	if (ret)
 		goto err;
 
-	c->symbol_rate = 1ull * ((buf[1] << 8) | (buf[0] << 0)) *
-			dev->mclk_khz * 1000 / 0x10000;
+	c->symbol_rate = DIV_ROUND_CLOSEST_ULL((u64)(buf[1] << 8 | buf[0] << 0) * dev->mclk, 0x10000);
 
 	return 0;
 err:
@@ -1119,8 +1099,9 @@ static int m88ds3103_diseqc_send_master_cmd(struct dvb_frontend *fe,
 	#define SEND_MASTER_CMD_TIMEOUT 120
 	timeout = jiffies + msecs_to_jiffies(SEND_MASTER_CMD_TIMEOUT);
 
-	/* DiSEqC message typical period is 54 ms */
-	usleep_range(50000, 54000);
+	/* DiSEqC message period is 13.5 ms per byte */
+	utmp = diseqc_cmd->msg_len * 13500;
+	usleep_range(utmp - 4000, utmp);
 
 	for (utmp = 1; !time_after(jiffies, timeout) && utmp;) {
 		ret = regmap_read(dev->regmap, 0xa1, &utmp);
@@ -1395,7 +1376,7 @@ static int m88ds3103_probe(struct i2c_client *client,
 	dev->config.clock = pdata->clk;
 	dev->config.i2c_wr_max = pdata->i2c_wr_max;
 	dev->config.ts_mode = pdata->ts_mode;
-	dev->config.ts_clk = pdata->ts_clk;
+	dev->config.ts_clk = pdata->ts_clk * 1000;
 	dev->config.ts_clk_pol = pdata->ts_clk_pol;
 	dev->config.spec_inv = pdata->spec_inv;
 	dev->config.agc_inv = pdata->agc_inv;
@@ -1446,6 +1427,11 @@ static int m88ds3103_probe(struct i2c_client *client,
 		goto err_kfree;
 	}
 
+	if (!pdata->ts_clk) {
+		ret = -EINVAL;
+		goto err_kfree;
+	}
+
 	/* 0x29 register is defined differently for m88rs6000. */
 	/* set internal tuner address to 0x21 */
 	if (dev->chip_id == M88RS6000_CHIP_ID)
diff --git a/drivers/media/dvb-frontends/m88ds3103_priv.h b/drivers/media/dvb-frontends/m88ds3103_priv.h
index d78e467295d2..07f20c269c67 100644
--- a/drivers/media/dvb-frontends/m88ds3103_priv.h
+++ b/drivers/media/dvb-frontends/m88ds3103_priv.h
@@ -27,7 +27,6 @@
 
 #define M88DS3103_FIRMWARE "dvb-demod-m88ds3103.fw"
 #define M88RS6000_FIRMWARE "dvb-demod-m88rs6000.fw"
-#define M88DS3103_MCLK_KHZ 96000
 #define M88RS6000_CHIP_ID 0x74
 #define M88DS3103_CHIP_ID 0x70
 
@@ -46,7 +45,7 @@ struct m88ds3103_dev {
 	/* auto detect chip id to do different config */
 	u8 chip_id;
 	/* main mclk is calculated for M88RS6000 dynamically */
-	s32 mclk_khz;
+	s32 mclk;
 	u64 post_bit_error;
 	u64 post_bit_count;
 };
diff --git a/drivers/media/dvb-frontends/m88rs2000.c b/drivers/media/dvb-frontends/m88rs2000.c
index a09b12313a73..ef79a4ec31e2 100644
--- a/drivers/media/dvb-frontends/m88rs2000.c
+++ b/drivers/media/dvb-frontends/m88rs2000.c
@@ -609,7 +609,7 @@ static int m88rs2000_set_frontend(struct dvb_frontend *fe)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	enum fe_status status;
+	enum fe_status status = 0;
 	int i, ret = 0;
 	u32 tuner_freq;
 	s16 offset = 0;
diff --git a/drivers/media/dvb-frontends/mb86a20s.c b/drivers/media/dvb-frontends/mb86a20s.c
index fb88dddaf3a3..41325328a22e 100644
--- a/drivers/media/dvb-frontends/mb86a20s.c
+++ b/drivers/media/dvb-frontends/mb86a20s.c
@@ -301,10 +301,11 @@ static int mb86a20s_read_status(struct dvb_frontend *fe, enum fe_status *status)
 
 	*status = 0;
 
-	val = mb86a20s_readreg(state, 0x0a) & 0xf;
+	val = mb86a20s_readreg(state, 0x0a);
 	if (val < 0)
 		return val;
 
+	val &= 0xf;
 	if (val >= 2)
 		*status |= FE_HAS_SIGNAL;
 
diff --git a/drivers/staging/media/mn88472/mn88472.c b/drivers/media/dvb-frontends/mn88472.c
similarity index 58%
rename from drivers/staging/media/mn88472/mn88472.c
rename to drivers/media/dvb-frontends/mn88472.c
index 7ea749cf19f9..18fb2df1e2bd 100644
--- a/drivers/staging/media/mn88472/mn88472.c
+++ b/drivers/media/dvb-frontends/mn88472.c
@@ -17,28 +17,90 @@
 #include "mn88472_priv.h"
 
 static int mn88472_get_tune_settings(struct dvb_frontend *fe,
-	struct dvb_frontend_tune_settings *s)
+				     struct dvb_frontend_tune_settings *s)
 {
-	s->min_delay_ms = 800;
+	s->min_delay_ms = 1000;
 	return 0;
 }
 
+static int mn88472_read_status(struct dvb_frontend *fe, enum fe_status *status)
+{
+	struct i2c_client *client = fe->demodulator_priv;
+	struct mn88472_dev *dev = i2c_get_clientdata(client);
+	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
+	int ret;
+	unsigned int utmp;
+
+	if (!dev->active) {
+		ret = -EAGAIN;
+		goto err;
+	}
+
+	switch (c->delivery_system) {
+	case SYS_DVBT:
+		ret = regmap_read(dev->regmap[0], 0x7f, &utmp);
+		if (ret)
+			goto err;
+		if ((utmp & 0x0f) >= 0x09)
+			*status = FE_HAS_SIGNAL | FE_HAS_CARRIER |
+				  FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK;
+		else
+			*status = 0;
+		break;
+	case SYS_DVBT2:
+		ret = regmap_read(dev->regmap[2], 0x92, &utmp);
+		if (ret)
+			goto err;
+		if ((utmp & 0x0f) >= 0x0d)
+			*status = FE_HAS_SIGNAL | FE_HAS_CARRIER |
+				  FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK;
+		else if ((utmp & 0x0f) >= 0x0a)
+			*status = FE_HAS_SIGNAL | FE_HAS_CARRIER |
+				  FE_HAS_VITERBI;
+		else if ((utmp & 0x0f) >= 0x07)
+			*status = FE_HAS_SIGNAL | FE_HAS_CARRIER;
+		else
+			*status = 0;
+		break;
+	case SYS_DVBC_ANNEX_A:
+		ret = regmap_read(dev->regmap[1], 0x84, &utmp);
+		if (ret)
+			goto err;
+		if ((utmp & 0x0f) >= 0x08)
+			*status = FE_HAS_SIGNAL | FE_HAS_CARRIER |
+				  FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK;
+		else
+			*status = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err;
+	}
+
+	return 0;
+err:
+	dev_dbg(&client->dev, "failed=%d\n", ret);
+	return ret;
+}
+
 static int mn88472_set_frontend(struct dvb_frontend *fe)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct mn88472_dev *dev = i2c_get_clientdata(client);
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	int ret, i;
-	u32 if_frequency = 0;
-	u64 tmp;
-	u8 delivery_system_val, if_val[3], bw_val[7], bw_val2;
+	unsigned int utmp;
+	u32 if_frequency;
+	u8 buf[3], delivery_system_val, bandwidth_val, *bandwidth_vals_ptr;
+	u8 reg_bank0_b4_val, reg_bank0_cd_val, reg_bank0_d4_val;
+	u8 reg_bank0_d6_val;
 
 	dev_dbg(&client->dev,
-			"delivery_system=%d modulation=%d frequency=%d symbol_rate=%d inversion=%d\n",
-			c->delivery_system, c->modulation,
-			c->frequency, c->symbol_rate, c->inversion);
+		"delivery_system=%u modulation=%u frequency=%u bandwidth_hz=%u symbol_rate=%u inversion=%d stream_id=%d\n",
+		c->delivery_system, c->modulation, c->frequency,
+		c->bandwidth_hz, c->symbol_rate, c->inversion, c->stream_id);
 
-	if (!dev->warm) {
+	if (!dev->active) {
 		ret = -EAGAIN;
 		goto err;
 	}
@@ -46,39 +108,64 @@ static int mn88472_set_frontend(struct dvb_frontend *fe)
 	switch (c->delivery_system) {
 	case SYS_DVBT:
 		delivery_system_val = 0x02;
+		reg_bank0_b4_val = 0x00;
+		reg_bank0_cd_val = 0x1f;
+		reg_bank0_d4_val = 0x0a;
+		reg_bank0_d6_val = 0x48;
 		break;
 	case SYS_DVBT2:
 		delivery_system_val = 0x03;
+		reg_bank0_b4_val = 0xf6;
+		reg_bank0_cd_val = 0x01;
+		reg_bank0_d4_val = 0x09;
+		reg_bank0_d6_val = 0x46;
 		break;
 	case SYS_DVBC_ANNEX_A:
 		delivery_system_val = 0x04;
+		reg_bank0_b4_val = 0x00;
+		reg_bank0_cd_val = 0x17;
+		reg_bank0_d4_val = 0x09;
+		reg_bank0_d6_val = 0x48;
 		break;
 	default:
 		ret = -EINVAL;
 		goto err;
 	}
 
-	if (c->bandwidth_hz <= 5000000) {
-		memcpy(bw_val, "\xe5\x99\x9a\x1b\xa9\x1b\xa9", 7);
-		bw_val2 = 0x03;
-	} else if (c->bandwidth_hz <= 6000000) {
-		/* IF 3570000 Hz, BW 6000000 Hz */
-		memcpy(bw_val, "\xbf\x55\x55\x15\x6b\x15\x6b", 7);
-		bw_val2 = 0x02;
-	} else if (c->bandwidth_hz <= 7000000) {
-		/* IF 4570000 Hz, BW 7000000 Hz */
-		memcpy(bw_val, "\xa4\x00\x00\x0f\x2c\x0f\x2c", 7);
-		bw_val2 = 0x01;
-	} else if (c->bandwidth_hz <= 8000000) {
-		/* IF 4570000 Hz, BW 8000000 Hz */
-		memcpy(bw_val, "\x8f\x80\x00\x08\xee\x08\xee", 7);
-		bw_val2 = 0x00;
-	} else {
-		ret = -EINVAL;
-		goto err;
+	switch (c->delivery_system) {
+	case SYS_DVBT:
+	case SYS_DVBT2:
+		switch (c->bandwidth_hz) {
+		case 5000000:
+			bandwidth_vals_ptr = "\xe5\x99\x9a\x1b\xa9\x1b\xa9";
+			bandwidth_val = 0x03;
+			break;
+		case 6000000:
+			bandwidth_vals_ptr = "\xbf\x55\x55\x15\x6b\x15\x6b";
+			bandwidth_val = 0x02;
+			break;
+		case 7000000:
+			bandwidth_vals_ptr = "\xa4\x00\x00\x0f\x2c\x0f\x2c";
+			bandwidth_val = 0x01;
+			break;
+		case 8000000:
+			bandwidth_vals_ptr = "\x8f\x80\x00\x08\xee\x08\xee";
+			bandwidth_val = 0x00;
+			break;
+		default:
+			ret = -EINVAL;
+			goto err;
+		}
+		break;
+	case SYS_DVBC_ANNEX_A:
+		bandwidth_vals_ptr = NULL;
+		bandwidth_val = 0x00;
+		break;
+	default:
+		break;
 	}
 
-	/* program tuner */
+	/* Program tuner */
 	if (fe->ops.tuner_ops.set_params) {
 		ret = fe->ops.tuner_ops.set_params(fe);
 		if (ret)
@@ -91,20 +178,10 @@ static int mn88472_set_frontend(struct dvb_frontend *fe)
 			goto err;
 
 		dev_dbg(&client->dev, "get_if_frequency=%d\n", if_frequency);
-	}
-
-	/* Calculate IF registers ( (1<<24)*IF / Xtal ) */
-	tmp =  div_u64(if_frequency * (u64)(1<<24) + (dev->xtal / 2),
-				   dev->xtal);
-	if_val[0] = (tmp >> 16) & 0xff;
-	if_val[1] = (tmp >>  8) & 0xff;
-	if_val[2] = (tmp >>  0) & 0xff;
-
-	ret = regmap_write(dev->regmap[2], 0xfb, 0x13);
-	ret = regmap_write(dev->regmap[2], 0xef, 0x13);
-	ret = regmap_write(dev->regmap[2], 0xf9, 0x13);
-	if (ret)
+	} else {
+		ret = -EINVAL;
 		goto err;
+	}
 
 	ret = regmap_write(dev->regmap[2], 0x00, 0x66);
 	if (ret)
@@ -118,157 +195,81 @@ static int mn88472_set_frontend(struct dvb_frontend *fe)
 	ret = regmap_write(dev->regmap[2], 0x03, delivery_system_val);
 	if (ret)
 		goto err;
-	ret = regmap_write(dev->regmap[2], 0x04, bw_val2);
+	ret = regmap_write(dev->regmap[2], 0x04, bandwidth_val);
 	if (ret)
 		goto err;
 
-	for (i = 0; i < sizeof(if_val); i++) {
-		ret = regmap_write(dev->regmap[2], 0x10 + i, if_val[i]);
+	/* IF */
+	utmp = DIV_ROUND_CLOSEST_ULL((u64)if_frequency * 0x1000000, dev->clk);
+	buf[0] = (utmp >> 16) & 0xff;
+	buf[1] = (utmp >>  8) & 0xff;
+	buf[2] = (utmp >>  0) & 0xff;
+	for (i = 0; i < 3; i++) {
+		ret = regmap_write(dev->regmap[2], 0x10 + i, buf[i]);
 		if (ret)
 			goto err;
 	}
 
-	for (i = 0; i < sizeof(bw_val); i++) {
-		ret = regmap_write(dev->regmap[2], 0x13 + i, bw_val[i]);
-		if (ret)
-			goto err;
+	/* Bandwidth */
+	if (bandwidth_vals_ptr) {
+		for (i = 0; i < 7; i++) {
+			ret = regmap_write(dev->regmap[2], 0x13 + i,
+					   bandwidth_vals_ptr[i]);
+			if (ret)
+				goto err;
+		}
 	}
 
+	ret = regmap_write(dev->regmap[0], 0xb4, reg_bank0_b4_val);
+	if (ret)
+		goto err;
+	ret = regmap_write(dev->regmap[0], 0xcd, reg_bank0_cd_val);
+	if (ret)
+		goto err;
+	ret = regmap_write(dev->regmap[0], 0xd4, reg_bank0_d4_val);
+	if (ret)
+		goto err;
+	ret = regmap_write(dev->regmap[0], 0xd6, reg_bank0_d6_val);
+	if (ret)
+		goto err;
+
 	switch (c->delivery_system) {
 	case SYS_DVBT:
 		ret = regmap_write(dev->regmap[0], 0x07, 0x26);
-		ret = regmap_write(dev->regmap[0], 0xb0, 0x0a);
-		ret = regmap_write(dev->regmap[0], 0xb4, 0x00);
-		ret = regmap_write(dev->regmap[0], 0xcd, 0x1f);
-		ret = regmap_write(dev->regmap[0], 0xd4, 0x0a);
-		ret = regmap_write(dev->regmap[0], 0xd6, 0x48);
+		if (ret)
+			goto err;
 		ret = regmap_write(dev->regmap[0], 0x00, 0xba);
+		if (ret)
+			goto err;
 		ret = regmap_write(dev->regmap[0], 0x01, 0x13);
 		if (ret)
 			goto err;
 		break;
 	case SYS_DVBT2:
 		ret = regmap_write(dev->regmap[2], 0x2b, 0x13);
+		if (ret)
+			goto err;
 		ret = regmap_write(dev->regmap[2], 0x4f, 0x05);
+		if (ret)
+			goto err;
 		ret = regmap_write(dev->regmap[1], 0xf6, 0x05);
-		ret = regmap_write(dev->regmap[0], 0xb0, 0x0a);
-		ret = regmap_write(dev->regmap[0], 0xb4, 0xf6);
-		ret = regmap_write(dev->regmap[0], 0xcd, 0x01);
-		ret = regmap_write(dev->regmap[0], 0xd4, 0x09);
-		ret = regmap_write(dev->regmap[0], 0xd6, 0x46);
-		ret = regmap_write(dev->regmap[2], 0x30, 0x80);
-		ret = regmap_write(dev->regmap[2], 0x32, 0x00);
 		if (ret)
 			goto err;
-		break;
-	case SYS_DVBC_ANNEX_A:
-		ret = regmap_write(dev->regmap[0], 0xb0, 0x0b);
-		ret = regmap_write(dev->regmap[0], 0xb4, 0x00);
-		ret = regmap_write(dev->regmap[0], 0xcd, 0x17);
-		ret = regmap_write(dev->regmap[0], 0xd4, 0x09);
-		ret = regmap_write(dev->regmap[0], 0xd6, 0x48);
-		ret = regmap_write(dev->regmap[1], 0x00, 0xb0);
+		ret = regmap_write(dev->regmap[2], 0x32, c->stream_id);
 		if (ret)
 			goto err;
 		break;
-	default:
-		ret = -EINVAL;
-		goto err;
-	}
-
-	ret = regmap_write(dev->regmap[0], 0x46, 0x00);
-	ret = regmap_write(dev->regmap[0], 0xae, 0x00);
-
-	switch (dev->ts_mode) {
-	case SERIAL_TS_MODE:
-		ret = regmap_write(dev->regmap[2], 0x08, 0x1d);
-		break;
-	case PARALLEL_TS_MODE:
-		ret = regmap_write(dev->regmap[2], 0x08, 0x00);
+	case SYS_DVBC_ANNEX_A:
 		break;
 	default:
-		dev_dbg(&client->dev, "ts_mode error: %d\n", dev->ts_mode);
-		ret = -EINVAL;
-		goto err;
-	}
-
-	switch (dev->ts_clock) {
-	case VARIABLE_TS_CLOCK:
-		ret = regmap_write(dev->regmap[0], 0xd9, 0xe3);
 		break;
-	case FIXED_TS_CLOCK:
-		ret = regmap_write(dev->regmap[0], 0xd9, 0xe1);
-		break;
-	default:
-		dev_dbg(&client->dev, "ts_clock error: %d\n", dev->ts_clock);
-		ret = -EINVAL;
-		goto err;
 	}
 
-	/* Reset demod */
+	/* Reset FSM */
 	ret = regmap_write(dev->regmap[2], 0xf8, 0x9f);
 	if (ret)
 		goto err;
 
-	dev->delivery_system = c->delivery_system;
-
-	return 0;
-err:
-	dev_dbg(&client->dev, "failed=%d\n", ret);
-	return ret;
-}
-
-static int mn88472_read_status(struct dvb_frontend *fe, enum fe_status *status)
-{
-	struct i2c_client *client = fe->demodulator_priv;
-	struct mn88472_dev *dev = i2c_get_clientdata(client);
-	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	int ret;
-	unsigned int utmp;
-	int lock = 0;
-
-	*status = 0;
-
-	if (!dev->warm) {
-		ret = -EAGAIN;
-		goto err;
-	}
-
-	switch (c->delivery_system) {
-	case SYS_DVBT:
-		ret = regmap_read(dev->regmap[0], 0x7F, &utmp);
-		if (ret)
-			goto err;
-		if ((utmp & 0xF) >= 0x09)
-			lock = 1;
-		break;
-	case SYS_DVBT2:
-		ret = regmap_read(dev->regmap[2], 0x92, &utmp);
-		if (ret)
-			goto err;
-		if ((utmp & 0xF) >= 0x07)
-			*status |= FE_HAS_SIGNAL;
-		if ((utmp & 0xF) >= 0x0a)
-			*status |= FE_HAS_CARRIER;
-		if ((utmp & 0xF) >= 0x0d)
-			*status |= FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK;
-		break;
-	case SYS_DVBC_ANNEX_A:
-		ret = regmap_read(dev->regmap[1], 0x84, &utmp);
-		if (ret)
-			goto err;
-		if ((utmp & 0xF) >= 0x08)
-			lock = 1;
-		break;
-	default:
-		ret = -EINVAL;
-		goto err;
-	}
-
-	if (lock)
-		*status = FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_VITERBI |
-				FE_HAS_SYNC | FE_HAS_LOCK;
-
 	return 0;
 err:
 	dev_dbg(&client->dev, "failed=%d\n", ret);
@@ -279,93 +280,107 @@ static int mn88472_init(struct dvb_frontend *fe)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct mn88472_dev *dev = i2c_get_clientdata(client);
-	int ret, len, remaining;
-	const struct firmware *fw = NULL;
-	u8 *fw_file = MN88472_FIRMWARE;
-	unsigned int tmp;
+	int ret, len, rem;
+	unsigned int utmp;
+	const struct firmware *firmware;
+	const char *name = MN88472_FIRMWARE;
 
 	dev_dbg(&client->dev, "\n");
 
-	/* set cold state by default */
-	dev->warm = false;
-
-	/* power on */
+	/* Power up */
 	ret = regmap_write(dev->regmap[2], 0x05, 0x00);
 	if (ret)
 		goto err;
-
-	ret = regmap_bulk_write(dev->regmap[2], 0x0b, "\x00\x00", 2);
+	ret = regmap_write(dev->regmap[2], 0x0b, 0x00);
 	if (ret)
 		goto err;
-
-	/* check if firmware is already running */
-	ret = regmap_read(dev->regmap[0], 0xf5, &tmp);
+	ret = regmap_write(dev->regmap[2], 0x0c, 0x00);
 	if (ret)
 		goto err;
 
-	if (!(tmp & 0x1)) {
-		dev_info(&client->dev, "firmware already running\n");
-		dev->warm = true;
-		return 0;
-	}
+	/* Check if firmware is already running */
+	ret = regmap_read(dev->regmap[0], 0xf5, &utmp);
+	if (ret)
+		goto err;
+	if (!(utmp & 0x01))
+		goto warm;
 
-	/* request the firmware, this will block and timeout */
-	ret = request_firmware(&fw, fw_file, &client->dev);
+	ret = request_firmware(&firmware, name, &client->dev);
 	if (ret) {
-		dev_err(&client->dev, "firmare file '%s' not found\n",
-				fw_file);
+		dev_err(&client->dev, "firmware file '%s' not found\n", name);
 		goto err;
 	}
 
-	dev_info(&client->dev, "downloading firmware from file '%s'\n",
-			fw_file);
+	dev_info(&client->dev, "downloading firmware from file '%s'\n", name);
 
 	ret = regmap_write(dev->regmap[0], 0xf5, 0x03);
 	if (ret)
-		goto firmware_release;
-
-	for (remaining = fw->size; remaining > 0;
-			remaining -= (dev->i2c_wr_max - 1)) {
-		len = remaining;
-		if (len > (dev->i2c_wr_max - 1))
-			len = dev->i2c_wr_max - 1;
+		goto err_release_firmware;
 
+	for (rem = firmware->size; rem > 0; rem -= (dev->i2c_write_max - 1)) {
+		len = min(dev->i2c_write_max - 1, rem);
 		ret = regmap_bulk_write(dev->regmap[0], 0xf6,
-				&fw->data[fw->size - remaining], len);
+					&firmware->data[firmware->size - rem],
+					len);
 		if (ret) {
-			dev_err(&client->dev,
-					"firmware download failed=%d\n", ret);
-			goto firmware_release;
+			dev_err(&client->dev, "firmware download failed %d\n",
+				ret);
+			goto err_release_firmware;
 		}
 	}
 
-	/* parity check of firmware */
-	ret = regmap_read(dev->regmap[0], 0xf8, &tmp);
-	if (ret) {
-		dev_err(&client->dev,
-				"parity reg read failed=%d\n", ret);
-		goto firmware_release;
-	}
-	if (tmp & 0x10) {
-		dev_err(&client->dev,
-				"firmware parity check failed=0x%x\n", tmp);
-		goto firmware_release;
+	/* Parity check of firmware */
+	ret = regmap_read(dev->regmap[0], 0xf8, &utmp);
+	if (ret)
+		goto err_release_firmware;
+	if (utmp & 0x10) {
+		ret = -EINVAL;
+		dev_err(&client->dev, "firmware did not run\n");
+		goto err_release_firmware;
 	}
-	dev_err(&client->dev, "firmware parity check succeeded=0x%x\n", tmp);
 
 	ret = regmap_write(dev->regmap[0], 0xf5, 0x00);
 	if (ret)
-		goto firmware_release;
+		goto err_release_firmware;
+
+	release_firmware(firmware);
+warm:
+	/* TS config */
+	switch (dev->ts_mode) {
+	case SERIAL_TS_MODE:
+		utmp = 0x1d;
+		break;
+	case PARALLEL_TS_MODE:
+		utmp = 0x00;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err;
+	}
+	ret = regmap_write(dev->regmap[2], 0x08, utmp);
+	if (ret)
+		goto err;
 
-	release_firmware(fw);
-	fw = NULL;
+	switch (dev->ts_clk) {
+	case VARIABLE_TS_CLOCK:
+		utmp = 0xe3;
+		break;
+	case FIXED_TS_CLOCK:
+		utmp = 0xe1;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err;
+	}
+	ret = regmap_write(dev->regmap[0], 0xd9, utmp);
+	if (ret)
+		goto err;
 
-	/* warm state */
-	dev->warm = true;
+	dev->active = true;
 
 	return 0;
-firmware_release:
-	release_firmware(fw);
+err_release_firmware:
+	release_firmware(firmware);
 err:
 	dev_dbg(&client->dev, "failed=%d\n", ret);
 	return ret;
@@ -379,18 +394,17 @@ static int mn88472_sleep(struct dvb_frontend *fe)
 
 	dev_dbg(&client->dev, "\n");
 
-	/* power off */
+	/* Power down */
+	ret = regmap_write(dev->regmap[2], 0x0c, 0x30);
+	if (ret)
+		goto err;
 	ret = regmap_write(dev->regmap[2], 0x0b, 0x30);
-
 	if (ret)
 		goto err;
-
 	ret = regmap_write(dev->regmap[2], 0x05, 0x3e);
 	if (ret)
 		goto err;
 
-	dev->delivery_system = SYS_UNDEFINED;
-
 	return 0;
 err:
 	dev_dbg(&client->dev, "failed=%d\n", ret);
@@ -434,10 +448,19 @@ static struct dvb_frontend_ops mn88472_ops = {
 	.read_status = mn88472_read_status,
 };
 
+static struct dvb_frontend *mn88472_get_dvb_frontend(struct i2c_client *client)
+{
+	struct mn88472_dev *dev = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "\n");
+
+	return &dev->fe;
+}
+
 static int mn88472_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+			 const struct i2c_device_id *id)
 {
-	struct mn88472_config *config = client->dev.platform_data;
+	struct mn88472_config *pdata = client->dev.platform_data;
 	struct mn88472_dev *dev;
 	int ret;
 	unsigned int utmp;
@@ -448,23 +471,16 @@ static int mn88472_probe(struct i2c_client *client,
 
 	dev_dbg(&client->dev, "\n");
 
-	/* Caller really need to provide pointer for frontend we create. */
-	if (config->fe == NULL) {
-		dev_err(&client->dev, "frontend pointer not defined\n");
-		ret = -EINVAL;
-		goto err;
-	}
-
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	dev->i2c_wr_max = config->i2c_wr_max;
-	dev->xtal = config->xtal;
-	dev->ts_mode = config->ts_mode;
-	dev->ts_clock = config->ts_clock;
+	dev->i2c_write_max = pdata->i2c_wr_max ? pdata->i2c_wr_max : ~0;
+	dev->clk = pdata->xtal;
+	dev->ts_mode = pdata->ts_mode;
+	dev->ts_clk = pdata->ts_clock;
 	dev->client[0] = client;
 	dev->regmap[0] = regmap_init_i2c(dev->client[0], &regmap_config);
 	if (IS_ERR(dev->regmap[0])) {
@@ -472,15 +488,25 @@ static int mn88472_probe(struct i2c_client *client,
 		goto err_kfree;
 	}
 
-	/* check demod answers to I2C */
-	ret = regmap_read(dev->regmap[0], 0x00, &utmp);
+	/* Check demod answers with correct chip id */
+	ret = regmap_read(dev->regmap[0], 0xff, &utmp);
 	if (ret)
 		goto err_regmap_0_regmap_exit;
 
+	dev_dbg(&client->dev, "chip id=%02x\n", utmp);
+
+	if (utmp != 0x02) {
+		ret = -ENODEV;
+		goto err_regmap_0_regmap_exit;
+	}
+
 	/*
-	 * Chip has three I2C addresses for different register pages. Used
+	 * Chip has three I2C addresses for different register banks. Used
 	 * addresses are 0x18, 0x1a and 0x1c. We register two dummy clients,
-	 * 0x1a and 0x1c, in order to get own I2C client for each register page.
+	 * 0x1a and 0x1c, in order to get own I2C client for each register bank.
+	 *
+	 * Also, register bank 2 do not support sequential I/O. Only single
+	 * register write or read is allowed to that bank.
 	 */
 	dev->client[1] = i2c_new_dummy(client->adapter, 0x1a);
 	if (!dev->client[1]) {
@@ -510,15 +536,25 @@ static int mn88472_probe(struct i2c_client *client,
 	}
 	i2c_set_clientdata(dev->client[2], dev);
 
-	/* create dvb_frontend */
+	/* Sleep because chip is active by default */
+	ret = regmap_write(dev->regmap[2], 0x05, 0x3e);
+	if (ret)
+		goto err_regmap_2_regmap_exit;
+
+	/* Create dvb frontend */
 	memcpy(&dev->fe.ops, &mn88472_ops, sizeof(struct dvb_frontend_ops));
 	dev->fe.demodulator_priv = client;
-	*config->fe = &dev->fe;
+	*pdata->fe = &dev->fe;
 	i2c_set_clientdata(client, dev);
 
-	dev_info(&client->dev, "Panasonic MN88472 successfully attached\n");
-	return 0;
+	/* Setup callbacks */
+	pdata->get_dvb_frontend = mn88472_get_dvb_frontend;
 
+	dev_info(&client->dev, "Panasonic MN88472 successfully identified\n");
+
+	return 0;
+err_regmap_2_regmap_exit:
+	regmap_exit(dev->regmap[2]);
 err_client_2_i2c_unregister_device:
 	i2c_unregister_device(dev->client[2]);
 err_regmap_1_regmap_exit:
@@ -561,11 +597,12 @@ MODULE_DEVICE_TABLE(i2c, mn88472_id_table);
 
 static struct i2c_driver mn88472_driver = {
 	.driver = {
-		.name	= "mn88472",
+		.name = "mn88472",
+		.suppress_bind_attrs = true,
 	},
-	.probe		= mn88472_probe,
-	.remove		= mn88472_remove,
-	.id_table	= mn88472_id_table,
+	.probe    = mn88472_probe,
+	.remove   = mn88472_remove,
+	.id_table = mn88472_id_table,
 };
 
 module_i2c_driver(mn88472_driver);
diff --git a/drivers/media/dvb-frontends/mn88472.h b/drivers/media/dvb-frontends/mn88472.h
index 095294d292f3..323632523876 100644
--- a/drivers/media/dvb-frontends/mn88472.h
+++ b/drivers/media/dvb-frontends/mn88472.h
@@ -19,23 +19,33 @@
 
 #include <linux/dvb/frontend.h>
 
-enum ts_clock {
-	VARIABLE_TS_CLOCK,
-	FIXED_TS_CLOCK,
-};
+/**
+ * struct mn88472_config - Platform data for the mn88472 driver
+ * @xtal: Clock frequency.
+ * @ts_mode: TS mode.
+ * @ts_clock: TS clock config.
+ * @i2c_wr_max: Max number of bytes driver writes to I2C at once.
+ * @get_dvb_frontend: Get DVB frontend.
+ */
 
-enum ts_mode {
-	SERIAL_TS_MODE,
-	PARALLEL_TS_MODE,
-};
+/* Define old names for backward compatibility */
+#define VARIABLE_TS_CLOCK   MN88472_TS_CLK_VARIABLE
+#define FIXED_TS_CLOCK      MN88472_TS_CLK_FIXED
+#define SERIAL_TS_MODE      MN88472_TS_MODE_SERIAL
+#define PARALLEL_TS_MODE    MN88472_TS_MODE_PARALLEL
 
 struct mn88472_config {
-	/*
-	 * Max num of bytes given I2C adapter could write at once.
-	 * Default: none
-	 */
-	u16 i2c_wr_max;
+	unsigned int xtal;
+
+#define MN88472_TS_MODE_SERIAL      0
+#define MN88472_TS_MODE_PARALLEL    1
+	int ts_mode;
 
+#define MN88472_TS_CLK_FIXED        0
+#define MN88472_TS_CLK_VARIABLE     1
+	int ts_clock;
+
+	u16 i2c_wr_max;
 
 	/* Everything after that is returned by the driver. */
 
@@ -43,14 +53,7 @@ struct mn88472_config {
 	 * DVB frontend.
 	 */
 	struct dvb_frontend **fe;
-
-	/*
-	 * Xtal frequency.
-	 * Hz
-	 */
-	u32 xtal;
-	int ts_mode;
-	int ts_clock;
+	struct dvb_frontend* (*get_dvb_frontend)(struct i2c_client *);
 };
 
 #endif
diff --git a/drivers/staging/media/mn88472/mn88472_priv.h b/drivers/media/dvb-frontends/mn88472_priv.h
similarity index 88%
rename from drivers/staging/media/mn88472/mn88472_priv.h
rename to drivers/media/dvb-frontends/mn88472_priv.h
index 1a0de9e46b66..cdf2597a25d1 100644
--- a/drivers/staging/media/mn88472/mn88472_priv.h
+++ b/drivers/media/dvb-frontends/mn88472_priv.h
@@ -28,12 +28,11 @@ struct mn88472_dev {
 	struct i2c_client *client[3];
 	struct regmap *regmap[3];
 	struct dvb_frontend fe;
-	u16 i2c_wr_max;
-	enum fe_delivery_system delivery_system;
-	bool warm; /* FW running */
-	u32 xtal;
-	int ts_mode;
-	int ts_clock;
+	u16 i2c_write_max;
+	unsigned int clk;
+	unsigned int active:1;
+	unsigned int ts_mode:1;
+	unsigned int ts_clk:1;
 };
 
 #endif
diff --git a/drivers/media/dvb-frontends/mn88473.c b/drivers/media/dvb-frontends/mn88473.c
index 6c5d592161d4..451974a1d7ed 100644
--- a/drivers/media/dvb-frontends/mn88473.c
+++ b/drivers/media/dvb-frontends/mn88473.c
@@ -330,7 +330,7 @@ static int mn88473_init(struct dvb_frontend *fe)
 	/* Request the firmware, this will block and timeout */
 	ret = request_firmware(&fw, name, &client->dev);
 	if (ret) {
-		dev_err(&client->dev, "firmare file '%s' not found\n", name);
+		dev_err(&client->dev, "firmware file '%s' not found\n", name);
 		goto err;
 	}
 
@@ -536,7 +536,7 @@ static int mn88473_probe(struct i2c_client *client,
 	/* Sleep because chip is active by default */
 	ret = regmap_write(dev->regmap[2], 0x05, 0x3e);
 	if (ret)
-		goto err_client_2_i2c_unregister_device;
+		goto err_regmap_2_regmap_exit;
 
 	/* Create dvb frontend */
 	memcpy(&dev->frontend.ops, &mn88473_ops, sizeof(dev->frontend.ops));
@@ -547,7 +547,8 @@ static int mn88473_probe(struct i2c_client *client,
 	dev_info(&client->dev, "Panasonic MN88473 successfully identified\n");
 
 	return 0;
-
+err_regmap_2_regmap_exit:
+	regmap_exit(dev->regmap[2]);
 err_client_2_i2c_unregister_device:
 	i2c_unregister_device(dev->client[2]);
 err_regmap_1_regmap_exit:
diff --git a/drivers/media/dvb-frontends/rtl2830.c b/drivers/media/dvb-frontends/rtl2830.c
index d25d1e0cd4ca..87226056f226 100644
--- a/drivers/media/dvb-frontends/rtl2830.c
+++ b/drivers/media/dvb-frontends/rtl2830.c
@@ -135,8 +135,6 @@ static int rtl2830_init(struct dvb_frontend *fe)
 	c->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
 	c->post_bit_count.len = 1;
 	c->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-	/* start statistics polling */
-	schedule_delayed_work(&dev->stat_work, msecs_to_jiffies(2000));
 
 	dev->sleeping = false;
 
@@ -152,8 +150,6 @@ static int rtl2830_sleep(struct dvb_frontend *fe)
 	struct rtl2830_dev *dev = i2c_get_clientdata(client);
 
 	dev->sleeping = true;
-	/* stop statistics polling */
-	cancel_delayed_work_sync(&dev->stat_work);
 	dev->fe_status = 0;
 
 	return 0;
@@ -396,8 +392,10 @@ static int rtl2830_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct rtl2830_dev *dev = i2c_get_clientdata(client);
-	int ret;
-	u8 u8tmp;
+	struct dtv_frontend_properties *c = &dev->fe.dtv_property_cache;
+	int ret, stmp;
+	unsigned int utmp;
+	u8 u8tmp, buf[2];
 
 	*status = 0;
 
@@ -419,6 +417,89 @@ static int rtl2830_read_status(struct dvb_frontend *fe, enum fe_status *status)
 
 	dev->fe_status = *status;
 
+	/* Signal strength */
+	if (dev->fe_status & FE_HAS_SIGNAL) {
+		/* Read IF AGC */
+		ret = rtl2830_bulk_read(client, 0x359, buf, 2);
+		if (ret)
+			goto err;
+
+		stmp = buf[0] << 8 | buf[1] << 0;
+		stmp = sign_extend32(stmp, 13);
+		utmp = clamp_val(-4 * stmp + 32767, 0x0000, 0xffff);
+
+		dev_dbg(&client->dev, "IF AGC=%d\n", stmp);
+
+		c->strength.stat[0].scale = FE_SCALE_RELATIVE;
+		c->strength.stat[0].uvalue = utmp;
+	} else {
+		c->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	}
+
+	/* CNR */
+	if (dev->fe_status & FE_HAS_VITERBI) {
+		unsigned int hierarchy, constellation;
+		#define CONSTELLATION_NUM 3
+		#define HIERARCHY_NUM 4
+		static const u32 constant[CONSTELLATION_NUM][HIERARCHY_NUM] = {
+			{70705899, 70705899, 70705899, 70705899},
+			{82433173, 82433173, 87483115, 94445660},
+			{92888734, 92888734, 95487525, 99770748},
+		};
+
+		ret = rtl2830_bulk_read(client, 0x33c, &u8tmp, 1);
+		if (ret)
+			goto err;
+
+		constellation = (u8tmp >> 2) & 0x03; /* [3:2] */
+		if (constellation > CONSTELLATION_NUM - 1)
+			goto err;
+
+		hierarchy = (u8tmp >> 4) & 0x07; /* [6:4] */
+		if (hierarchy > HIERARCHY_NUM - 1)
+			goto err;
+
+		ret = rtl2830_bulk_read(client, 0x40c, buf, 2);
+		if (ret)
+			goto err;
+
+		utmp = buf[0] << 8 | buf[1] << 0;
+		if (utmp)
+			stmp = (constant[constellation][hierarchy] -
+			       intlog10(utmp)) / ((1 << 24) / 10000);
+		else
+			stmp = 0;
+
+		dev_dbg(&client->dev, "CNR raw=%u\n", utmp);
+
+		c->cnr.stat[0].scale = FE_SCALE_DECIBEL;
+		c->cnr.stat[0].svalue = stmp;
+	} else {
+		c->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	}
+
+	/* BER */
+	if (dev->fe_status & FE_HAS_LOCK) {
+		ret = rtl2830_bulk_read(client, 0x34e, buf, 2);
+		if (ret)
+			goto err;
+
+		utmp = buf[0] << 8 | buf[1] << 0;
+		dev->post_bit_error += utmp;
+		dev->post_bit_count += 1000000;
+
+		dev_dbg(&client->dev, "BER errors=%u total=1000000\n", utmp);
+
+		c->post_bit_error.stat[0].scale = FE_SCALE_COUNTER;
+		c->post_bit_error.stat[0].uvalue = dev->post_bit_error;
+		c->post_bit_count.stat[0].scale = FE_SCALE_COUNTER;
+		c->post_bit_count.stat[0].uvalue = dev->post_bit_count;
+	} else {
+		c->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+		c->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
+	}
+
+
 	return ret;
 err:
 	dev_dbg(&client->dev, "failed=%d\n", ret);
@@ -503,109 +584,6 @@ static struct dvb_frontend_ops rtl2830_ops = {
 	.read_signal_strength = rtl2830_read_signal_strength,
 };
 
-static void rtl2830_stat_work(struct work_struct *work)
-{
-	struct rtl2830_dev *dev = container_of(work, struct rtl2830_dev, stat_work.work);
-	struct i2c_client *client = dev->client;
-	struct dtv_frontend_properties *c = &dev->fe.dtv_property_cache;
-	int ret, tmp;
-	u8 u8tmp, buf[2];
-	u16 u16tmp;
-
-	dev_dbg(&client->dev, "\n");
-
-	/* signal strength */
-	if (dev->fe_status & FE_HAS_SIGNAL) {
-		struct {signed int x:14; } s;
-
-		/* read IF AGC */
-		ret = rtl2830_bulk_read(client, 0x359, buf, 2);
-		if (ret)
-			goto err;
-
-		u16tmp = buf[0] << 8 | buf[1] << 0;
-		u16tmp &= 0x3fff; /* [13:0] */
-		tmp = s.x = u16tmp; /* 14-bit bin to 2 complement */
-		u16tmp = clamp_val(-4 * tmp + 32767, 0x0000, 0xffff);
-
-		dev_dbg(&client->dev, "IF AGC=%d\n", tmp);
-
-		c->strength.stat[0].scale = FE_SCALE_RELATIVE;
-		c->strength.stat[0].uvalue = u16tmp;
-	} else {
-		c->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-	}
-
-	/* CNR */
-	if (dev->fe_status & FE_HAS_VITERBI) {
-		unsigned hierarchy, constellation;
-		#define CONSTELLATION_NUM 3
-		#define HIERARCHY_NUM 4
-		static const u32 constant[CONSTELLATION_NUM][HIERARCHY_NUM] = {
-			{70705899, 70705899, 70705899, 70705899},
-			{82433173, 82433173, 87483115, 94445660},
-			{92888734, 92888734, 95487525, 99770748},
-		};
-
-		ret = rtl2830_bulk_read(client, 0x33c, &u8tmp, 1);
-		if (ret)
-			goto err;
-
-		constellation = (u8tmp >> 2) & 0x03; /* [3:2] */
-		if (constellation > CONSTELLATION_NUM - 1)
-			goto err_schedule_delayed_work;
-
-		hierarchy = (u8tmp >> 4) & 0x07; /* [6:4] */
-		if (hierarchy > HIERARCHY_NUM - 1)
-			goto err_schedule_delayed_work;
-
-		ret = rtl2830_bulk_read(client, 0x40c, buf, 2);
-		if (ret)
-			goto err;
-
-		u16tmp = buf[0] << 8 | buf[1] << 0;
-		if (u16tmp)
-			tmp = (constant[constellation][hierarchy] -
-			       intlog10(u16tmp)) / ((1 << 24) / 10000);
-		else
-			tmp = 0;
-
-		dev_dbg(&client->dev, "CNR raw=%u\n", u16tmp);
-
-		c->cnr.stat[0].scale = FE_SCALE_DECIBEL;
-		c->cnr.stat[0].svalue = tmp;
-	} else {
-		c->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-	}
-
-	/* BER */
-	if (dev->fe_status & FE_HAS_LOCK) {
-		ret = rtl2830_bulk_read(client, 0x34e, buf, 2);
-		if (ret)
-			goto err;
-
-		u16tmp = buf[0] << 8 | buf[1] << 0;
-		dev->post_bit_error += u16tmp;
-		dev->post_bit_count += 1000000;
-
-		dev_dbg(&client->dev, "BER errors=%u total=1000000\n", u16tmp);
-
-		c->post_bit_error.stat[0].scale = FE_SCALE_COUNTER;
-		c->post_bit_error.stat[0].uvalue = dev->post_bit_error;
-		c->post_bit_count.stat[0].scale = FE_SCALE_COUNTER;
-		c->post_bit_count.stat[0].uvalue = dev->post_bit_count;
-	} else {
-		c->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-		c->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE;
-	}
-
-err_schedule_delayed_work:
-	schedule_delayed_work(&dev->stat_work, msecs_to_jiffies(2000));
-	return;
-err:
-	dev_dbg(&client->dev, "failed=%d\n", ret);
-}
-
 static int rtl2830_pid_filter_ctrl(struct dvb_frontend *fe, int onoff)
 {
 	struct i2c_client *client = fe->demodulator_priv;
@@ -851,7 +829,6 @@ static int rtl2830_probe(struct i2c_client *client,
 	dev->client = client;
 	dev->pdata = client->dev.platform_data;
 	dev->sleeping = true;
-	INIT_DELAYED_WORK(&dev->stat_work, rtl2830_stat_work);
 	dev->regmap = regmap_init(&client->dev, &regmap_bus, client,
 				  &regmap_config);
 	if (IS_ERR(dev->regmap)) {
@@ -904,9 +881,6 @@ static int rtl2830_remove(struct i2c_client *client)
 
 	dev_dbg(&client->dev, "\n");
 
-	/* stop statistics polling */
-	cancel_delayed_work_sync(&dev->stat_work);
-
 	i2c_mux_del_adapters(dev->muxc);
 	regmap_exit(dev->regmap);
 	kfree(dev);
@@ -922,7 +896,8 @@ MODULE_DEVICE_TABLE(i2c, rtl2830_id_table);
 
 static struct i2c_driver rtl2830_driver = {
 	.driver = {
-		.name	= "rtl2830",
+		.name			= "rtl2830",
+		.suppress_bind_attrs	= true,
 	},
 	.probe		= rtl2830_probe,
 	.remove		= rtl2830_remove,
diff --git a/drivers/media/dvb-frontends/rtl2830_priv.h b/drivers/media/dvb-frontends/rtl2830_priv.h
index da4909543da2..8ec4721d79ac 100644
--- a/drivers/media/dvb-frontends/rtl2830_priv.h
+++ b/drivers/media/dvb-frontends/rtl2830_priv.h
@@ -24,6 +24,7 @@
 #include <linux/i2c-mux.h>
 #include <linux/math64.h>
 #include <linux/regmap.h>
+#include <linux/bitops.h>
 
 struct rtl2830_dev {
 	struct rtl2830_platform_data *pdata;
@@ -33,7 +34,6 @@ struct rtl2830_dev {
 	struct dvb_frontend fe;
 	bool sleeping;
 	unsigned long filters;
-	struct delayed_work stat_work;
 	enum fe_status fe_status;
 	u64 post_bit_error_prev; /* for old DVBv3 read_ber() calculation */
 	u64 post_bit_error;
diff --git a/drivers/media/dvb-frontends/rtl2832.c b/drivers/media/dvb-frontends/rtl2832.c
index bfb6beedd40b..0ced01f1012e 100644
--- a/drivers/media/dvb-frontends/rtl2832.c
+++ b/drivers/media/dvb-frontends/rtl2832.c
@@ -947,6 +947,8 @@ static int rtl2832_slave_ts_ctrl(struct i2c_client *client, bool enable)
 			goto err;
 	}
 
+	dev->slave_ts = enable;
+
 	return 0;
 err:
 	dev_dbg(&client->dev, "failed=%d\n", ret);
@@ -960,7 +962,7 @@ static int rtl2832_pid_filter_ctrl(struct dvb_frontend *fe, int onoff)
 	int ret;
 	u8 u8tmp;
 
-	dev_dbg(&client->dev, "onoff=%d\n", onoff);
+	dev_dbg(&client->dev, "onoff=%d, slave_ts=%d\n", onoff, dev->slave_ts);
 
 	/* enable / disable PID filter */
 	if (onoff)
@@ -968,7 +970,10 @@ static int rtl2832_pid_filter_ctrl(struct dvb_frontend *fe, int onoff)
 	else
 		u8tmp = 0x00;
 
-	ret = regmap_update_bits(dev->regmap, 0x061, 0xc0, u8tmp);
+	if (dev->slave_ts)
+		ret = regmap_update_bits(dev->regmap, 0x021, 0xc0, u8tmp);
+	else
+		ret = regmap_update_bits(dev->regmap, 0x061, 0xc0, u8tmp);
 	if (ret)
 		goto err;
 
@@ -986,8 +991,8 @@ static int rtl2832_pid_filter(struct dvb_frontend *fe, u8 index, u16 pid,
 	int ret;
 	u8 buf[4];
 
-	dev_dbg(&client->dev, "index=%d pid=%04x onoff=%d\n",
-		index, pid, onoff);
+	dev_dbg(&client->dev, "index=%d pid=%04x onoff=%d slave_ts=%d\n",
+		index, pid, onoff, dev->slave_ts);
 
 	/* skip invalid PIDs (0x2000) */
 	if (pid > 0x1fff || index > 32)
@@ -1003,14 +1008,22 @@ static int rtl2832_pid_filter(struct dvb_frontend *fe, u8 index, u16 pid,
 	buf[1] = (dev->filters >>  8) & 0xff;
 	buf[2] = (dev->filters >> 16) & 0xff;
 	buf[3] = (dev->filters >> 24) & 0xff;
-	ret = regmap_bulk_write(dev->regmap, 0x062, buf, 4);
+
+	if (dev->slave_ts)
+		ret = regmap_bulk_write(dev->regmap, 0x022, buf, 4);
+	else
+		ret = regmap_bulk_write(dev->regmap, 0x062, buf, 4);
 	if (ret)
 		goto err;
 
 	/* add PID */
 	buf[0] = (pid >> 8) & 0xff;
 	buf[1] = (pid >> 0) & 0xff;
-	ret = regmap_bulk_write(dev->regmap, 0x066 + 2 * index, buf, 2);
+
+	if (dev->slave_ts)
+		ret = regmap_bulk_write(dev->regmap, 0x026 + 2 * index, buf, 2);
+	else
+		ret = regmap_bulk_write(dev->regmap, 0x066 + 2 * index, buf, 2);
 	if (ret)
 		goto err;
 
@@ -1135,6 +1148,7 @@ MODULE_DEVICE_TABLE(i2c, rtl2832_id_table);
 static struct i2c_driver rtl2832_driver = {
 	.driver = {
 		.name	= "rtl2832",
+		.suppress_bind_attrs	= true,
 	},
 	.probe		= rtl2832_probe,
 	.remove		= rtl2832_remove,
diff --git a/drivers/media/dvb-frontends/rtl2832_priv.h b/drivers/media/dvb-frontends/rtl2832_priv.h
index c1a8a69e9015..9a6d01a9c690 100644
--- a/drivers/media/dvb-frontends/rtl2832_priv.h
+++ b/drivers/media/dvb-frontends/rtl2832_priv.h
@@ -44,6 +44,7 @@ struct rtl2832_dev {
 	bool sleeping;
 	struct delayed_work i2c_gate_work;
 	unsigned long filters; /* PID filter */
+	bool slave_ts;
 };
 
 struct rtl2832_reg_entry {
diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.c b/drivers/media/dvb-frontends/rtl2832_sdr.c
index 47a480a7d46c..6e22af36b637 100644
--- a/drivers/media/dvb-frontends/rtl2832_sdr.c
+++ b/drivers/media/dvb-frontends/rtl2832_sdr.c
@@ -452,7 +452,7 @@ static int rtl2832_sdr_querycap(struct file *file, void *fh,
 /* Videobuf2 operations */
 static int rtl2832_sdr_queue_setup(struct vb2_queue *vq,
 		unsigned int *nbuffers,
-		unsigned int *nplanes, unsigned int sizes[], void *alloc_ctxs[])
+		unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct rtl2832_sdr_dev *dev = vb2_get_drv_priv(vq);
 	struct platform_device *pdev = dev->pdev;
diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c
index 108a069fa1ae..20b4a659e2e4 100644
--- a/drivers/media/dvb-frontends/si2168.c
+++ b/drivers/media/dvb-frontends/si2168.c
@@ -357,9 +357,7 @@ static int si2168_init(struct dvb_frontend *fe)
 	struct si2168_dev *dev = i2c_get_clientdata(client);
 	int ret, len, remaining;
 	const struct firmware *fw;
-	const char *fw_name;
 	struct si2168_cmd cmd;
-	unsigned int chip_id;
 
 	dev_dbg(&client->dev, "\n");
 
@@ -371,7 +369,7 @@ static int si2168_init(struct dvb_frontend *fe)
 	if (ret)
 		goto err;
 
-	if (dev->fw_loaded) {
+	if (dev->warm) {
 		/* resume */
 		memcpy(cmd.args, "\xc0\x06\x08\x0f\x00\x20\x21\x01", 8);
 		cmd.wlen = 8;
@@ -398,49 +396,14 @@ static int si2168_init(struct dvb_frontend *fe)
 	if (ret)
 		goto err;
 
-	/* query chip revision */
-	memcpy(cmd.args, "\x02", 1);
-	cmd.wlen = 1;
-	cmd.rlen = 13;
-	ret = si2168_cmd_execute(client, &cmd);
-	if (ret)
-		goto err;
-
-	chip_id = cmd.args[1] << 24 | cmd.args[2] << 16 | cmd.args[3] << 8 |
-			cmd.args[4] << 0;
-
-	#define SI2168_A20 ('A' << 24 | 68 << 16 | '2' << 8 | '0' << 0)
-	#define SI2168_A30 ('A' << 24 | 68 << 16 | '3' << 8 | '0' << 0)
-	#define SI2168_B40 ('B' << 24 | 68 << 16 | '4' << 8 | '0' << 0)
-
-	switch (chip_id) {
-	case SI2168_A20:
-		fw_name = SI2168_A20_FIRMWARE;
-		break;
-	case SI2168_A30:
-		fw_name = SI2168_A30_FIRMWARE;
-		break;
-	case SI2168_B40:
-		fw_name = SI2168_B40_FIRMWARE;
-		break;
-	default:
-		dev_err(&client->dev, "unknown chip version Si21%d-%c%c%c\n",
-				cmd.args[2], cmd.args[1],
-				cmd.args[3], cmd.args[4]);
-		ret = -EINVAL;
-		goto err;
-	}
-
-	dev_info(&client->dev, "found a 'Silicon Labs Si21%d-%c%c%c'\n",
-			cmd.args[2], cmd.args[1], cmd.args[3], cmd.args[4]);
-
 	/* request the firmware, this will block and timeout */
-	ret = request_firmware(&fw, fw_name, &client->dev);
+	ret = request_firmware(&fw, dev->firmware_name, &client->dev);
 	if (ret) {
 		/* fallback mechanism to handle old name for Si2168 B40 fw */
-		if (chip_id == SI2168_B40) {
-			fw_name = SI2168_B40_FIRMWARE_FALLBACK;
-			ret = request_firmware(&fw, fw_name, &client->dev);
+		if (dev->chip_id == SI2168_CHIP_ID_B40) {
+			dev->firmware_name = SI2168_B40_FIRMWARE_FALLBACK;
+			ret = request_firmware(&fw, dev->firmware_name,
+					       &client->dev);
 		}
 
 		if (ret == 0) {
@@ -450,13 +413,13 @@ static int si2168_init(struct dvb_frontend *fe)
 		} else {
 			dev_err(&client->dev,
 					"firmware file '%s' not found\n",
-					fw_name);
+					dev->firmware_name);
 			goto err_release_firmware;
 		}
 	}
 
 	dev_info(&client->dev, "downloading firmware from file '%s'\n",
-			fw_name);
+			dev->firmware_name);
 
 	if ((fw->size % 17 == 0) && (fw->data[0] > 5)) {
 		/* firmware is in the new format */
@@ -511,8 +474,11 @@ static int si2168_init(struct dvb_frontend *fe)
 	if (ret)
 		goto err;
 
-	dev_info(&client->dev, "firmware version: %c.%c.%d\n",
-			cmd.args[6], cmd.args[7], cmd.args[8]);
+	dev->version = (cmd.args[9] + '@') << 24 | (cmd.args[6] - '0') << 16 |
+		       (cmd.args[7] - '0') << 8 | (cmd.args[8]) << 0;
+	dev_info(&client->dev, "firmware version: %c %d.%d.%d\n",
+		 dev->version >> 24 & 0xff, dev->version >> 16 & 0xff,
+		 dev->version >> 8 & 0xff, dev->version >> 0 & 0xff);
 
 	/* set ts mode */
 	memcpy(cmd.args, "\x14\x00\x01\x10\x10\x00", 6);
@@ -525,7 +491,7 @@ static int si2168_init(struct dvb_frontend *fe)
 	if (ret)
 		goto err;
 
-	dev->fw_loaded = true;
+	dev->warm = true;
 warm:
 	dev->active = true;
 
@@ -549,6 +515,10 @@ static int si2168_sleep(struct dvb_frontend *fe)
 
 	dev->active = false;
 
+	/* Firmware B 4.0-11 or later loses warm state during sleep */
+	if (dev->version > ('B' << 24 | 4 << 16 | 0 << 8 | 11 << 0))
+		dev->warm = false;
+
 	memcpy(cmd.args, "\x13", 1);
 	cmd.wlen = 1;
 	cmd.rlen = 0;
@@ -653,6 +623,7 @@ static int si2168_probe(struct i2c_client *client,
 	struct si2168_config *config = client->dev.platform_data;
 	struct si2168_dev *dev;
 	int ret;
+	struct si2168_cmd cmd;
 
 	dev_dbg(&client->dev, "\n");
 
@@ -663,8 +634,56 @@ static int si2168_probe(struct i2c_client *client,
 		goto err;
 	}
 
+	i2c_set_clientdata(client, dev);
 	mutex_init(&dev->i2c_mutex);
 
+	/* Initialize */
+	memcpy(cmd.args, "\xc0\x12\x00\x0c\x00\x0d\x16\x00\x00\x00\x00\x00\x00", 13);
+	cmd.wlen = 13;
+	cmd.rlen = 0;
+	ret = si2168_cmd_execute(client, &cmd);
+	if (ret)
+		goto err_kfree;
+
+	/* Power up */
+	memcpy(cmd.args, "\xc0\x06\x01\x0f\x00\x20\x20\x01", 8);
+	cmd.wlen = 8;
+	cmd.rlen = 1;
+	ret = si2168_cmd_execute(client, &cmd);
+	if (ret)
+		goto err_kfree;
+
+	/* Query chip revision */
+	memcpy(cmd.args, "\x02", 1);
+	cmd.wlen = 1;
+	cmd.rlen = 13;
+	ret = si2168_cmd_execute(client, &cmd);
+	if (ret)
+		goto err_kfree;
+
+	dev->chip_id = cmd.args[1] << 24 | cmd.args[2] << 16 |
+		       cmd.args[3] << 8 | cmd.args[4] << 0;
+
+	switch (dev->chip_id) {
+	case SI2168_CHIP_ID_A20:
+		dev->firmware_name = SI2168_A20_FIRMWARE;
+		break;
+	case SI2168_CHIP_ID_A30:
+		dev->firmware_name = SI2168_A30_FIRMWARE;
+		break;
+	case SI2168_CHIP_ID_B40:
+		dev->firmware_name = SI2168_B40_FIRMWARE;
+		break;
+	default:
+		dev_dbg(&client->dev, "unknown chip version Si21%d-%c%c%c\n",
+			cmd.args[2], cmd.args[1], cmd.args[3], cmd.args[4]);
+		ret = -ENODEV;
+		goto err_kfree;
+	}
+
+	dev->version = (cmd.args[1]) << 24 | (cmd.args[3] - '0') << 16 |
+		       (cmd.args[4] - '0') << 8 | (cmd.args[5]) << 0;
+
 	/* create mux i2c adapter for tuner */
 	dev->muxc = i2c_mux_alloc(client->adapter, &client->dev,
 				  1, 0, I2C_MUX_LOCKED,
@@ -686,11 +705,14 @@ static int si2168_probe(struct i2c_client *client,
 	dev->ts_mode = config->ts_mode;
 	dev->ts_clock_inv = config->ts_clock_inv;
 	dev->ts_clock_gapped = config->ts_clock_gapped;
-	dev->fw_loaded = false;
 
-	i2c_set_clientdata(client, dev);
+	dev_info(&client->dev, "Silicon Labs Si2168-%c%d%d successfully identified\n",
+		 dev->version >> 24 & 0xff, dev->version >> 16 & 0xff,
+		 dev->version >> 8 & 0xff);
+	dev_info(&client->dev, "firmware version: %c %d.%d.%d\n",
+		 dev->version >> 24 & 0xff, dev->version >> 16 & 0xff,
+		 dev->version >> 8 & 0xff, dev->version >> 0 & 0xff);
 
-	dev_info(&client->dev, "Silicon Labs Si2168 successfully attached\n");
 	return 0;
 err_kfree:
 	kfree(dev);
@@ -723,7 +745,8 @@ MODULE_DEVICE_TABLE(i2c, si2168_id_table);
 
 static struct i2c_driver si2168_driver = {
 	.driver = {
-		.name	= "si2168",
+		.name                = "si2168",
+		.suppress_bind_attrs = true,
 	},
 	.probe		= si2168_probe,
 	.remove		= si2168_remove,
diff --git a/drivers/media/dvb-frontends/si2168_priv.h b/drivers/media/dvb-frontends/si2168_priv.h
index 8a1f36d2014d..7843ccb448a0 100644
--- a/drivers/media/dvb-frontends/si2168_priv.h
+++ b/drivers/media/dvb-frontends/si2168_priv.h
@@ -34,8 +34,14 @@ struct si2168_dev {
 	struct dvb_frontend fe;
 	enum fe_delivery_system delivery_system;
 	enum fe_status fe_status;
+	#define SI2168_CHIP_ID_A20 ('A' << 24 | 68 << 16 | '2' << 8 | '0' << 0)
+	#define SI2168_CHIP_ID_A30 ('A' << 24 | 68 << 16 | '3' << 8 | '0' << 0)
+	#define SI2168_CHIP_ID_B40 ('B' << 24 | 68 << 16 | '4' << 8 | '0' << 0)
+	unsigned int chip_id;
+	unsigned int version;
+	const char *firmware_name;
 	bool active;
-	bool fw_loaded;
+	bool warm;
 	u8 ts_mode;
 	bool ts_clock_inv;
 	bool ts_clock_gapped;
diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index 993dc50c12db..ce9006e10a30 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -209,6 +209,7 @@ config VIDEO_ADV7604
 	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API
 	depends on GPIOLIB || COMPILE_TEST
 	select HDMI
+	select MEDIA_CEC_EDID
 	---help---
 	  Support for the Analog Devices ADV7604 video decoder.
 
@@ -218,10 +219,18 @@ config VIDEO_ADV7604
 	  To compile this driver as a module, choose M here: the
 	  module will be called adv7604.
 
+config VIDEO_ADV7604_CEC
+	bool "Enable Analog Devices ADV7604 CEC support"
+	depends on VIDEO_ADV7604 && MEDIA_CEC
+	---help---
+	  When selected the adv7604 will support the optional
+	  HDMI CEC feature.
+
 config VIDEO_ADV7842
 	tristate "Analog Devices ADV7842 decoder"
 	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API
 	select HDMI
+	select MEDIA_CEC_EDID
 	---help---
 	  Support for the Analog Devices ADV7842 video decoder.
 
@@ -231,6 +240,13 @@ config VIDEO_ADV7842
 	  To compile this driver as a module, choose M here: the
 	  module will be called adv7842.
 
+config VIDEO_ADV7842_CEC
+	bool "Enable Analog Devices ADV7842 CEC support"
+	depends on VIDEO_ADV7842 && MEDIA_CEC
+	---help---
+	  When selected the adv7842 will support the optional
+	  HDMI CEC feature.
+
 config VIDEO_BT819
 	tristate "BT819A VideoStream decoder"
 	depends on VIDEO_V4L2 && I2C
@@ -447,6 +463,7 @@ config VIDEO_ADV7511
 	tristate "Analog Devices ADV7511 encoder"
 	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API
 	select HDMI
+	select MEDIA_CEC_EDID
 	---help---
 	  Support for the Analog Devices ADV7511 video encoder.
 
@@ -455,6 +472,13 @@ config VIDEO_ADV7511
 	  To compile this driver as a module, choose M here: the
 	  module will be called adv7511.
 
+config VIDEO_ADV7511_CEC
+	bool "Enable Analog Devices ADV7511 CEC support"
+	depends on VIDEO_ADV7511 && MEDIA_CEC
+	---help---
+	  When selected the adv7511 will support the optional
+	  HDMI CEC feature.
+
 config VIDEO_AD9389B
 	tristate "Analog Devices AD9389B encoder"
 	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API
diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c
index 39271c35da48..6d7cad54a65d 100644
--- a/drivers/media/i2c/adv7511.c
+++ b/drivers/media/i2c/adv7511.c
@@ -33,6 +33,7 @@
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-dv-timings.h>
 #include <media/i2c/adv7511.h>
+#include <media/cec.h>
 
 static int debug;
 module_param(debug, int, 0644);
@@ -59,6 +60,8 @@ MODULE_LICENSE("GPL v2");
 #define ADV7511_MIN_PIXELCLOCK 20000000
 #define ADV7511_MAX_PIXELCLOCK 225000000
 
+#define ADV7511_MAX_ADDRS (3)
+
 /*
 **********************************************************************
 *
@@ -90,12 +93,20 @@ struct adv7511_state {
 	struct v4l2_ctrl_handler hdl;
 	int chip_revision;
 	u8 i2c_edid_addr;
-	u8 i2c_cec_addr;
 	u8 i2c_pktmem_addr;
+	u8 i2c_cec_addr;
+
+	struct i2c_client *i2c_cec;
+	struct cec_adapter *cec_adap;
+	u8   cec_addr[ADV7511_MAX_ADDRS];
+	u8   cec_valid_addrs;
+	bool cec_enabled_adap;
+
 	/* Is the adv7511 powered on? */
 	bool power_on;
 	/* Did we receive hotplug and rx-sense signals? */
 	bool have_monitor;
+	bool enabled_irq;
 	/* timings from s_dv_timings */
 	struct v4l2_dv_timings dv_timings;
 	u32 fmt_code;
@@ -227,7 +238,7 @@ static int adv_smbus_read_i2c_block_data(struct i2c_client *client,
 	return ret;
 }
 
-static inline void adv7511_edid_rd(struct v4l2_subdev *sd, u16 len, u8 *buf)
+static void adv7511_edid_rd(struct v4l2_subdev *sd, uint16_t len, uint8_t *buf)
 {
 	struct adv7511_state *state = get_adv7511_state(sd);
 	int i;
@@ -242,6 +253,34 @@ static inline void adv7511_edid_rd(struct v4l2_subdev *sd, u16 len, u8 *buf)
 		v4l2_err(sd, "%s: i2c read error\n", __func__);
 }
 
+static inline int adv7511_cec_read(struct v4l2_subdev *sd, u8 reg)
+{
+	struct adv7511_state *state = get_adv7511_state(sd);
+
+	return i2c_smbus_read_byte_data(state->i2c_cec, reg);
+}
+
+static int adv7511_cec_write(struct v4l2_subdev *sd, u8 reg, u8 val)
+{
+	struct adv7511_state *state = get_adv7511_state(sd);
+	int ret;
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		ret = i2c_smbus_write_byte_data(state->i2c_cec, reg, val);
+		if (ret == 0)
+			return 0;
+	}
+	v4l2_err(sd, "%s: I2C Write Problem\n", __func__);
+	return ret;
+}
+
+static inline int adv7511_cec_write_and_or(struct v4l2_subdev *sd, u8 reg, u8 mask,
+				   u8 val)
+{
+	return adv7511_cec_write(sd, reg, (adv7511_cec_read(sd, reg) & mask) | val);
+}
+
 static int adv7511_pktmem_rd(struct v4l2_subdev *sd, u8 reg)
 {
 	struct adv7511_state *state = get_adv7511_state(sd);
@@ -343,28 +382,20 @@ static void adv7511_csc_rgb_full2limit(struct v4l2_subdev *sd, bool enable)
 	}
 }
 
-static void adv7511_set_IT_content_AVI_InfoFrame(struct v4l2_subdev *sd)
+static void adv7511_set_rgb_quantization_mode(struct v4l2_subdev *sd, struct v4l2_ctrl *ctrl)
 {
 	struct adv7511_state *state = get_adv7511_state(sd);
-	if (state->dv_timings.bt.flags & V4L2_DV_FL_IS_CE_VIDEO) {
-		/* CE format, not IT  */
-		adv7511_wr_and_or(sd, 0x57, 0x7f, 0x00);
-	} else {
-		/* IT format */
-		adv7511_wr_and_or(sd, 0x57, 0x7f, 0x80);
+
+	/* Only makes sense for RGB formats */
+	if (state->fmt_code != MEDIA_BUS_FMT_RGB888_1X24) {
+		/* so just keep quantization */
+		adv7511_csc_rgb_full2limit(sd, false);
+		return;
 	}
-}
 
-static int adv7511_set_rgb_quantization_mode(struct v4l2_subdev *sd, struct v4l2_ctrl *ctrl)
-{
 	switch (ctrl->val) {
-	default:
-		return -EINVAL;
-		break;
-	case V4L2_DV_RGB_RANGE_AUTO: {
+	case V4L2_DV_RGB_RANGE_AUTO:
 		/* automatic */
-		struct adv7511_state *state = get_adv7511_state(sd);
-
 		if (state->dv_timings.bt.flags & V4L2_DV_FL_IS_CE_VIDEO) {
 			/* CE format, RGB limited range (16-235) */
 			adv7511_csc_rgb_full2limit(sd, true);
@@ -372,7 +403,6 @@ static int adv7511_set_rgb_quantization_mode(struct v4l2_subdev *sd, struct v4l2
 			/* not CE format, RGB full range (0-255) */
 			adv7511_csc_rgb_full2limit(sd, false);
 		}
-	}
 		break;
 	case V4L2_DV_RGB_RANGE_LIMITED:
 		/* RGB limited range (16-235) */
@@ -383,7 +413,6 @@ static int adv7511_set_rgb_quantization_mode(struct v4l2_subdev *sd, struct v4l2
 		adv7511_csc_rgb_full2limit(sd, false);
 		break;
 	}
-	return 0;
 }
 
 /* ------------------------------ CTRL OPS ------------------------------ */
@@ -400,8 +429,10 @@ static int adv7511_s_ctrl(struct v4l2_ctrl *ctrl)
 		adv7511_wr_and_or(sd, 0xaf, 0xfd, ctrl->val == V4L2_DV_TX_MODE_HDMI ? 0x02 : 0x00);
 		return 0;
 	}
-	if (state->rgb_quantization_range_ctrl == ctrl)
-		return adv7511_set_rgb_quantization_mode(sd, ctrl);
+	if (state->rgb_quantization_range_ctrl == ctrl) {
+		adv7511_set_rgb_quantization_mode(sd, ctrl);
+		return 0;
+	}
 	if (state->content_type_ctrl == ctrl) {
 		u8 itc, cn;
 
@@ -425,16 +456,28 @@ static const struct v4l2_ctrl_ops adv7511_ctrl_ops = {
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static void adv7511_inv_register(struct v4l2_subdev *sd)
 {
+	struct adv7511_state *state = get_adv7511_state(sd);
+
 	v4l2_info(sd, "0x000-0x0ff: Main Map\n");
+	if (state->i2c_cec)
+		v4l2_info(sd, "0x100-0x1ff: CEC Map\n");
 }
 
 static int adv7511_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
+	struct adv7511_state *state = get_adv7511_state(sd);
+
 	reg->size = 1;
 	switch (reg->reg >> 8) {
 	case 0:
 		reg->val = adv7511_rd(sd, reg->reg & 0xff);
 		break;
+	case 1:
+		if (state->i2c_cec) {
+			reg->val = adv7511_cec_read(sd, reg->reg & 0xff);
+			break;
+		}
+		/* fall through */
 	default:
 		v4l2_info(sd, "Register %03llx not supported\n", reg->reg);
 		adv7511_inv_register(sd);
@@ -445,10 +488,18 @@ static int adv7511_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *
 
 static int adv7511_s_register(struct v4l2_subdev *sd, const struct v4l2_dbg_register *reg)
 {
+	struct adv7511_state *state = get_adv7511_state(sd);
+
 	switch (reg->reg >> 8) {
 	case 0:
 		adv7511_wr(sd, reg->reg & 0xff, reg->val & 0xff);
 		break;
+	case 1:
+		if (state->i2c_cec) {
+			adv7511_cec_write(sd, reg->reg & 0xff, reg->val & 0xff);
+			break;
+		}
+		/* fall through */
 	default:
 		v4l2_info(sd, "Register %03llx not supported\n", reg->reg);
 		adv7511_inv_register(sd);
@@ -536,6 +587,7 @@ static int adv7511_log_status(struct v4l2_subdev *sd)
 {
 	struct adv7511_state *state = get_adv7511_state(sd);
 	struct adv7511_state_edid *edid = &state->edid;
+	int i;
 
 	static const char * const states[] = {
 		"in reset",
@@ -605,7 +657,23 @@ static int adv7511_log_status(struct v4l2_subdev *sd)
 	else
 		v4l2_info(sd, "no timings set\n");
 	v4l2_info(sd, "i2c edid addr: 0x%x\n", state->i2c_edid_addr);
+
+	if (state->i2c_cec == NULL)
+		return 0;
+
 	v4l2_info(sd, "i2c cec addr: 0x%x\n", state->i2c_cec_addr);
+
+	v4l2_info(sd, "CEC: %s\n", state->cec_enabled_adap ?
+			"enabled" : "disabled");
+	if (state->cec_enabled_adap) {
+		for (i = 0; i < ADV7511_MAX_ADDRS; i++) {
+			bool is_valid = state->cec_valid_addrs & (1 << i);
+
+			if (is_valid)
+				v4l2_info(sd, "CEC Logical Address: 0x%x\n",
+					  state->cec_addr[i]);
+		}
+	}
 	v4l2_info(sd, "i2c pktmem addr: 0x%x\n", state->i2c_pktmem_addr);
 	return 0;
 }
@@ -663,15 +731,197 @@ static int adv7511_s_power(struct v4l2_subdev *sd, int on)
 	return true;
 }
 
+#if IS_ENABLED(CONFIG_VIDEO_ADV7511_CEC)
+static int adv7511_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+	struct adv7511_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+
+	if (state->i2c_cec == NULL)
+		return -EIO;
+
+	if (!state->cec_enabled_adap && enable) {
+		/* power up cec section */
+		adv7511_cec_write_and_or(sd, 0x4e, 0xfc, 0x01);
+		/* legacy mode and clear all rx buffers */
+		adv7511_cec_write(sd, 0x4a, 0x07);
+		adv7511_cec_write(sd, 0x4a, 0);
+		adv7511_cec_write_and_or(sd, 0x11, 0xfe, 0); /* initially disable tx */
+		/* enabled irqs: */
+		/* tx: ready */
+		/* tx: arbitration lost */
+		/* tx: retry timeout */
+		/* rx: ready 1 */
+		if (state->enabled_irq)
+			adv7511_wr_and_or(sd, 0x95, 0xc0, 0x39);
+	} else if (state->cec_enabled_adap && !enable) {
+		if (state->enabled_irq)
+			adv7511_wr_and_or(sd, 0x95, 0xc0, 0x00);
+		/* disable address mask 1-3 */
+		adv7511_cec_write_and_or(sd, 0x4b, 0x8f, 0x00);
+		/* power down cec section */
+		adv7511_cec_write_and_or(sd, 0x4e, 0xfc, 0x00);
+		state->cec_valid_addrs = 0;
+	}
+	state->cec_enabled_adap = enable;
+	return 0;
+}
+
+static int adv7511_cec_adap_log_addr(struct cec_adapter *adap, u8 addr)
+{
+	struct adv7511_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+	unsigned int i, free_idx = ADV7511_MAX_ADDRS;
+
+	if (!state->cec_enabled_adap)
+		return addr == CEC_LOG_ADDR_INVALID ? 0 : -EIO;
+
+	if (addr == CEC_LOG_ADDR_INVALID) {
+		adv7511_cec_write_and_or(sd, 0x4b, 0x8f, 0);
+		state->cec_valid_addrs = 0;
+		return 0;
+	}
+
+	for (i = 0; i < ADV7511_MAX_ADDRS; i++) {
+		bool is_valid = state->cec_valid_addrs & (1 << i);
+
+		if (free_idx == ADV7511_MAX_ADDRS && !is_valid)
+			free_idx = i;
+		if (is_valid && state->cec_addr[i] == addr)
+			return 0;
+	}
+	if (i == ADV7511_MAX_ADDRS) {
+		i = free_idx;
+		if (i == ADV7511_MAX_ADDRS)
+			return -ENXIO;
+	}
+	state->cec_addr[i] = addr;
+	state->cec_valid_addrs |= 1 << i;
+
+	switch (i) {
+	case 0:
+		/* enable address mask 0 */
+		adv7511_cec_write_and_or(sd, 0x4b, 0xef, 0x10);
+		/* set address for mask 0 */
+		adv7511_cec_write_and_or(sd, 0x4c, 0xf0, addr);
+		break;
+	case 1:
+		/* enable address mask 1 */
+		adv7511_cec_write_and_or(sd, 0x4b, 0xdf, 0x20);
+		/* set address for mask 1 */
+		adv7511_cec_write_and_or(sd, 0x4c, 0x0f, addr << 4);
+		break;
+	case 2:
+		/* enable address mask 2 */
+		adv7511_cec_write_and_or(sd, 0x4b, 0xbf, 0x40);
+		/* set address for mask 1 */
+		adv7511_cec_write_and_or(sd, 0x4d, 0xf0, addr);
+		break;
+	}
+	return 0;
+}
+
+static int adv7511_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
+				     u32 signal_free_time, struct cec_msg *msg)
+{
+	struct adv7511_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+	u8 len = msg->len;
+	unsigned int i;
+
+	v4l2_dbg(1, debug, sd, "%s: len %d\n", __func__, len);
+
+	if (len > 16) {
+		v4l2_err(sd, "%s: len exceeded 16 (%d)\n", __func__, len);
+		return -EINVAL;
+	}
+
+	/*
+	 * The number of retries is the number of attempts - 1, but retry
+	 * at least once. It's not clear if a value of 0 is allowed, so
+	 * let's do at least one retry.
+	 */
+	adv7511_cec_write_and_or(sd, 0x12, ~0x70, max(1, attempts - 1) << 4);
+
+	/* blocking, clear cec tx irq status */
+	adv7511_wr_and_or(sd, 0x97, 0xc7, 0x38);
+
+	/* write data */
+	for (i = 0; i < len; i++)
+		adv7511_cec_write(sd, i, msg->msg[i]);
+
+	/* set length (data + header) */
+	adv7511_cec_write(sd, 0x10, len);
+	/* start transmit, enable tx */
+	adv7511_cec_write(sd, 0x11, 0x01);
+	return 0;
+}
+
+static void adv_cec_tx_raw_status(struct v4l2_subdev *sd, u8 tx_raw_status)
+{
+	struct adv7511_state *state = get_adv7511_state(sd);
+
+	if ((adv7511_cec_read(sd, 0x11) & 0x01) == 0) {
+		v4l2_dbg(1, debug, sd, "%s: tx raw: tx disabled\n", __func__);
+		return;
+	}
+
+	if (tx_raw_status & 0x10) {
+		v4l2_dbg(1, debug, sd,
+			 "%s: tx raw: arbitration lost\n", __func__);
+		cec_transmit_done(state->cec_adap, CEC_TX_STATUS_ARB_LOST,
+				  1, 0, 0, 0);
+		return;
+	}
+	if (tx_raw_status & 0x08) {
+		u8 status;
+		u8 nack_cnt;
+		u8 low_drive_cnt;
+
+		v4l2_dbg(1, debug, sd, "%s: tx raw: retry failed\n", __func__);
+		/*
+		 * We set this status bit since this hardware performs
+		 * retransmissions.
+		 */
+		status = CEC_TX_STATUS_MAX_RETRIES;
+		nack_cnt = adv7511_cec_read(sd, 0x14) & 0xf;
+		if (nack_cnt)
+			status |= CEC_TX_STATUS_NACK;
+		low_drive_cnt = adv7511_cec_read(sd, 0x14) >> 4;
+		if (low_drive_cnt)
+			status |= CEC_TX_STATUS_LOW_DRIVE;
+		cec_transmit_done(state->cec_adap, status,
+				  0, nack_cnt, low_drive_cnt, 0);
+		return;
+	}
+	if (tx_raw_status & 0x20) {
+		v4l2_dbg(1, debug, sd, "%s: tx raw: ready ok\n", __func__);
+		cec_transmit_done(state->cec_adap, CEC_TX_STATUS_OK, 0, 0, 0, 0);
+		return;
+	}
+}
+
+static const struct cec_adap_ops adv7511_cec_adap_ops = {
+	.adap_enable = adv7511_cec_adap_enable,
+	.adap_log_addr = adv7511_cec_adap_log_addr,
+	.adap_transmit = adv7511_cec_adap_transmit,
+};
+#endif
+
 /* Enable interrupts */
 static void adv7511_set_isr(struct v4l2_subdev *sd, bool enable)
 {
+	struct adv7511_state *state = get_adv7511_state(sd);
 	u8 irqs = MASK_ADV7511_HPD_INT | MASK_ADV7511_MSEN_INT;
 	u8 irqs_rd;
 	int retries = 100;
 
 	v4l2_dbg(2, debug, sd, "%s: %s\n", __func__, enable ? "enable" : "disable");
 
+	if (state->enabled_irq == enable)
+		return;
+	state->enabled_irq = enable;
+
 	/* The datasheet says that the EDID ready interrupt should be
 	   disabled if there is no hotplug. */
 	if (!enable)
@@ -679,6 +929,9 @@ static void adv7511_set_isr(struct v4l2_subdev *sd, bool enable)
 	else if (adv7511_have_hotplug(sd))
 		irqs |= MASK_ADV7511_EDID_RDY_INT;
 
+	adv7511_wr_and_or(sd, 0x95, 0xc0,
+			  (state->cec_enabled_adap && enable) ? 0x39 : 0x00);
+
 	/*
 	 * This i2c write can fail (approx. 1 in 1000 writes). But it
 	 * is essential that this register is correct, so retry it
@@ -701,20 +954,53 @@ static void adv7511_set_isr(struct v4l2_subdev *sd, bool enable)
 static int adv7511_isr(struct v4l2_subdev *sd, u32 status, bool *handled)
 {
 	u8 irq_status;
+	u8 cec_irq;
 
 	/* disable interrupts to prevent a race condition */
 	adv7511_set_isr(sd, false);
 	irq_status = adv7511_rd(sd, 0x96);
+	cec_irq = adv7511_rd(sd, 0x97);
 	/* clear detected interrupts */
 	adv7511_wr(sd, 0x96, irq_status);
+	adv7511_wr(sd, 0x97, cec_irq);
 
-	v4l2_dbg(1, debug, sd, "%s: irq 0x%x\n", __func__, irq_status);
+	v4l2_dbg(1, debug, sd, "%s: irq 0x%x, cec-irq 0x%x\n", __func__,
+		 irq_status, cec_irq);
 
 	if (irq_status & (MASK_ADV7511_HPD_INT | MASK_ADV7511_MSEN_INT))
 		adv7511_check_monitor_present_status(sd);
 	if (irq_status & MASK_ADV7511_EDID_RDY_INT)
 		adv7511_check_edid_status(sd);
 
+#if IS_ENABLED(CONFIG_VIDEO_ADV7511_CEC)
+	if (cec_irq & 0x38)
+		adv_cec_tx_raw_status(sd, cec_irq);
+
+	if (cec_irq & 1) {
+		struct adv7511_state *state = get_adv7511_state(sd);
+		struct cec_msg msg;
+
+		msg.len = adv7511_cec_read(sd, 0x25) & 0x1f;
+
+		v4l2_dbg(1, debug, sd, "%s: cec msg len %d\n", __func__,
+			 msg.len);
+
+		if (msg.len > 16)
+			msg.len = 16;
+
+		if (msg.len) {
+			u8 i;
+
+			for (i = 0; i < msg.len; i++)
+				msg.msg[i] = adv7511_cec_read(sd, i + 0x15);
+
+			adv7511_cec_write(sd, 0x4a, 1); /* toggle to re-enable rx 1 */
+			adv7511_cec_write(sd, 0x4a, 0);
+			cec_received_msg(state->cec_adap, &msg);
+		}
+	}
+#endif
+
 	/* enable interrupts */
 	adv7511_set_isr(sd, true);
 
@@ -771,12 +1057,14 @@ static int adv7511_s_dv_timings(struct v4l2_subdev *sd,
 	/* save timings */
 	state->dv_timings = *timings;
 
+	/* set h/vsync polarities */
+	adv7511_wr_and_or(sd, 0x17, 0x9f,
+		((timings->bt.polarities & V4L2_DV_VSYNC_POS_POL) ? 0 : 0x40) |
+		((timings->bt.polarities & V4L2_DV_HSYNC_POS_POL) ? 0 : 0x20));
+
 	/* update quantization range based on new dv_timings */
 	adv7511_set_rgb_quantization_mode(sd, state->rgb_quantization_range_ctrl);
 
-	/* update AVI infoframe */
-	adv7511_set_IT_content_AVI_InfoFrame(sd);
-
 	return 0;
 }
 
@@ -956,8 +1244,6 @@ static int adv7511_enum_mbus_code(struct v4l2_subdev *sd,
 static void adv7511_fill_format(struct adv7511_state *state,
 				struct v4l2_mbus_framefmt *format)
 {
-	memset(format, 0, sizeof(*format));
-
 	format->width = state->dv_timings.bt.width;
 	format->height = state->dv_timings.bt.height;
 	format->field = V4L2_FIELD_NONE;
@@ -972,6 +1258,7 @@ static int adv7511_get_fmt(struct v4l2_subdev *sd,
 	if (format->pad != 0)
 		return -EINVAL;
 
+	memset(&format->format, 0, sizeof(format->format));
 	adv7511_fill_format(state, &format->format);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
@@ -1132,6 +1419,7 @@ static int adv7511_set_fmt(struct v4l2_subdev *sd,
 	adv7511_wr_and_or(sd, 0x57, 0x83, (ec << 4) | (q << 2) | (itc << 7));
 	adv7511_wr_and_or(sd, 0x59, 0x0f, (yq << 6) | (cn << 4));
 	adv7511_wr_and_or(sd, 0x4a, 0xff, 1);
+	adv7511_set_rgb_quantization_mode(sd, state->rgb_quantization_range_ctrl);
 
 	return 0;
 }
@@ -1183,6 +1471,8 @@ static void adv7511_notify_no_edid(struct v4l2_subdev *sd)
 	/* We failed to read the EDID, so send an event for this. */
 	ed.present = false;
 	ed.segment = adv7511_rd(sd, 0xc4);
+	ed.phys_addr = CEC_PHYS_ADDR_INVALID;
+	cec_s_phys_addr(state->cec_adap, ed.phys_addr, false);
 	v4l2_subdev_notify(sd, ADV7511_EDID_DETECT, (void *)&ed);
 	v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, 0x0);
 }
@@ -1406,13 +1696,16 @@ static bool adv7511_check_edid_status(struct v4l2_subdev *sd)
 
 		v4l2_dbg(1, debug, sd, "%s: edid complete with %d segment(s)\n", __func__, state->edid.segments);
 		state->edid.complete = true;
-
+		ed.phys_addr = cec_get_edid_phys_addr(state->edid.data,
+						      state->edid.segments * 256,
+						      NULL);
 		/* report when we have all segments
 		   but report only for segment 0
 		 */
 		ed.present = true;
 		ed.segment = 0;
 		state->edid_detect_counter++;
+		cec_s_phys_addr(state->cec_adap, ed.phys_addr, false);
 		v4l2_subdev_notify(sd, ADV7511_EDID_DETECT, (void *)&ed);
 		return ed.present;
 	}
@@ -1420,17 +1713,43 @@ static bool adv7511_check_edid_status(struct v4l2_subdev *sd)
 	return false;
 }
 
+static int adv7511_registered(struct v4l2_subdev *sd)
+{
+	struct adv7511_state *state = get_adv7511_state(sd);
+	int err;
+
+	err = cec_register_adapter(state->cec_adap);
+	if (err)
+		cec_delete_adapter(state->cec_adap);
+	return err;
+}
+
+static void adv7511_unregistered(struct v4l2_subdev *sd)
+{
+	struct adv7511_state *state = get_adv7511_state(sd);
+
+	cec_unregister_adapter(state->cec_adap);
+}
+
+static const struct v4l2_subdev_internal_ops adv7511_int_ops = {
+	.registered = adv7511_registered,
+	.unregistered = adv7511_unregistered,
+};
+
 /* ----------------------------------------------------------------------- */
 /* Setup ADV7511 */
 static void adv7511_init_setup(struct v4l2_subdev *sd)
 {
 	struct adv7511_state *state = get_adv7511_state(sd);
 	struct adv7511_state_edid *edid = &state->edid;
+	u32 cec_clk = state->pdata.cec_clk;
+	u8 ratio;
 
 	v4l2_dbg(1, debug, sd, "%s\n", __func__);
 
 	/* clear all interrupts */
 	adv7511_wr(sd, 0x96, 0xff);
+	adv7511_wr(sd, 0x97, 0xff);
 	/*
 	 * Stop HPD from resetting a lot of registers.
 	 * It might leave the chip in a partly un-initialized state,
@@ -1442,6 +1761,25 @@ static void adv7511_init_setup(struct v4l2_subdev *sd)
 	adv7511_set_isr(sd, false);
 	adv7511_s_stream(sd, false);
 	adv7511_s_audio_stream(sd, false);
+
+	if (state->i2c_cec == NULL)
+		return;
+
+	v4l2_dbg(1, debug, sd, "%s: cec_clk %d\n", __func__, cec_clk);
+
+	/* cec soft reset */
+	adv7511_cec_write(sd, 0x50, 0x01);
+	adv7511_cec_write(sd, 0x50, 0x00);
+
+	/* legacy mode */
+	adv7511_cec_write(sd, 0x4a, 0x00);
+
+	if (cec_clk % 750000 != 0)
+		v4l2_err(sd, "%s: cec_clk %d, not multiple of 750 Khz\n",
+			 __func__, cec_clk);
+
+	ratio = (cec_clk / 750000) - 1;
+	adv7511_cec_write(sd, 0x4e, ratio << 2);
 }
 
 static int adv7511_probe(struct i2c_client *client, const struct i2c_device_id *id)
@@ -1476,6 +1814,7 @@ static int adv7511_probe(struct i2c_client *client, const struct i2c_device_id *
 			 client->addr << 1);
 
 	v4l2_i2c_subdev_init(sd, client, &adv7511_ops);
+	sd->internal_ops = &adv7511_int_ops;
 
 	hdl = &state->hdl;
 	v4l2_ctrl_handler_init(hdl, 10);
@@ -1516,26 +1855,47 @@ static int adv7511_probe(struct i2c_client *client, const struct i2c_device_id *
 	chip_id[0] = adv7511_rd(sd, 0xf5);
 	chip_id[1] = adv7511_rd(sd, 0xf6);
 	if (chip_id[0] != 0x75 || chip_id[1] != 0x11) {
-		v4l2_err(sd, "chip_id != 0x7511, read 0x%02x%02x\n", chip_id[0], chip_id[1]);
+		v4l2_err(sd, "chip_id != 0x7511, read 0x%02x%02x\n", chip_id[0],
+			 chip_id[1]);
 		err = -EIO;
 		goto err_entity;
 	}
 
-	state->i2c_edid = i2c_new_dummy(client->adapter, state->i2c_edid_addr >> 1);
+	state->i2c_edid = i2c_new_dummy(client->adapter,
+					state->i2c_edid_addr >> 1);
 	if (state->i2c_edid == NULL) {
 		v4l2_err(sd, "failed to register edid i2c client\n");
 		err = -ENOMEM;
 		goto err_entity;
 	}
 
+	adv7511_wr(sd, 0xe1, state->i2c_cec_addr);
+	if (state->pdata.cec_clk < 3000000 ||
+	    state->pdata.cec_clk > 100000000) {
+		v4l2_err(sd, "%s: cec_clk %u outside range, disabling cec\n",
+				__func__, state->pdata.cec_clk);
+		state->pdata.cec_clk = 0;
+	}
+
+	if (state->pdata.cec_clk) {
+		state->i2c_cec = i2c_new_dummy(client->adapter,
+					       state->i2c_cec_addr >> 1);
+		if (state->i2c_cec == NULL) {
+			v4l2_err(sd, "failed to register cec i2c client\n");
+			goto err_unreg_edid;
+		}
+		adv7511_wr(sd, 0xe2, 0x00); /* power up cec section */
+	} else {
+		adv7511_wr(sd, 0xe2, 0x01); /* power down cec section */
+	}
+
 	state->i2c_pktmem = i2c_new_dummy(client->adapter, state->i2c_pktmem_addr >> 1);
 	if (state->i2c_pktmem == NULL) {
 		v4l2_err(sd, "failed to register pktmem i2c client\n");
 		err = -ENOMEM;
-		goto err_unreg_edid;
+		goto err_unreg_cec;
 	}
 
-	adv7511_wr(sd, 0xe2, 0x01); /* power down cec section */
 	state->work_queue = create_singlethread_workqueue(sd->name);
 	if (state->work_queue == NULL) {
 		v4l2_err(sd, "could not create workqueue\n");
@@ -1546,6 +1906,19 @@ static int adv7511_probe(struct i2c_client *client, const struct i2c_device_id *
 	INIT_DELAYED_WORK(&state->edid_handler, adv7511_edid_handler);
 
 	adv7511_init_setup(sd);
+
+#if IS_ENABLED(CONFIG_VIDEO_ADV7511_CEC)
+	state->cec_adap = cec_allocate_adapter(&adv7511_cec_adap_ops,
+		state, dev_name(&client->dev), CEC_CAP_TRANSMIT |
+		CEC_CAP_LOG_ADDRS | CEC_CAP_PASSTHROUGH | CEC_CAP_RC,
+		ADV7511_MAX_ADDRS, &client->dev);
+	err = PTR_ERR_OR_ZERO(state->cec_adap);
+	if (err) {
+		destroy_workqueue(state->work_queue);
+		goto err_unreg_pktmem;
+	}
+#endif
+
 	adv7511_set_isr(sd, true);
 	adv7511_check_monitor_present_status(sd);
 
@@ -1555,6 +1928,9 @@ static int adv7511_probe(struct i2c_client *client, const struct i2c_device_id *
 
 err_unreg_pktmem:
 	i2c_unregister_device(state->i2c_pktmem);
+err_unreg_cec:
+	if (state->i2c_cec)
+		i2c_unregister_device(state->i2c_cec);
 err_unreg_edid:
 	i2c_unregister_device(state->i2c_edid);
 err_entity:
@@ -1576,9 +1952,12 @@ static int adv7511_remove(struct i2c_client *client)
 	v4l2_dbg(1, debug, sd, "%s removed @ 0x%x (%s)\n", client->name,
 		 client->addr << 1, client->adapter->name);
 
+	adv7511_set_isr(sd, false);
 	adv7511_init_setup(sd);
 	cancel_delayed_work(&state->edid_handler);
 	i2c_unregister_device(state->i2c_edid);
+	if (state->i2c_cec)
+		i2c_unregister_device(state->i2c_cec);
 	i2c_unregister_device(state->i2c_pktmem);
 	destroy_workqueue(state->work_queue);
 	v4l2_device_unregister_subdev(sd);
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index beb2841ceae5..4003831de712 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -40,6 +40,7 @@
 #include <linux/regmap.h>
 
 #include <media/i2c/adv7604.h>
+#include <media/cec.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-event.h>
@@ -80,6 +81,8 @@ MODULE_LICENSE("GPL");
 
 #define ADV76XX_OP_SWAP_CB_CR				(1 << 0)
 
+#define ADV76XX_MAX_ADDRS (3)
+
 enum adv76xx_type {
 	ADV7604,
 	ADV7611,
@@ -164,6 +167,7 @@ struct adv76xx_state {
 	struct adv76xx_platform_data pdata;
 
 	struct gpio_desc *hpd_gpio[4];
+	struct gpio_desc *reset_gpio;
 
 	struct v4l2_subdev sd;
 	struct media_pad pads[ADV76XX_PAD_MAX];
@@ -184,10 +188,15 @@ struct adv76xx_state {
 	u16 spa_port_a[2];
 	struct v4l2_fract aspect_ratio;
 	u32 rgb_quantization_range;
-	struct workqueue_struct *work_queues;
 	struct delayed_work delayed_work_enable_hotplug;
 	bool restart_stdi_once;
 
+	/* CEC */
+	struct cec_adapter *cec_adap;
+	u8   cec_addr[ADV76XX_MAX_ADDRS];
+	u8   cec_valid_addrs;
+	bool cec_enabled_adap;
+
 	/* i2c clients */
 	struct i2c_client *i2c_clients[ADV76XX_PAGE_MAX];
 
@@ -381,7 +390,8 @@ static inline int io_write(struct v4l2_subdev *sd, u8 reg, u8 val)
 	return regmap_write(state->regmap[ADV76XX_PAGE_IO], reg, val);
 }
 
-static inline int io_write_clr_set(struct v4l2_subdev *sd, u8 reg, u8 mask, u8 val)
+static inline int io_write_clr_set(struct v4l2_subdev *sd, u8 reg, u8 mask,
+				   u8 val)
 {
 	return io_write(sd, reg, (io_read(sd, reg) & ~mask) | val);
 }
@@ -414,6 +424,12 @@ static inline int cec_write(struct v4l2_subdev *sd, u8 reg, u8 val)
 	return regmap_write(state->regmap[ADV76XX_PAGE_CEC], reg, val);
 }
 
+static inline int cec_write_clr_set(struct v4l2_subdev *sd, u8 reg, u8 mask,
+				   u8 val)
+{
+	return cec_write(sd, reg, (cec_read(sd, reg) & ~mask) | val);
+}
+
 static inline int infoframe_read(struct v4l2_subdev *sd, u8 reg)
 {
 	struct adv76xx_state *state = to_state(sd);
@@ -779,11 +795,31 @@ static const struct v4l2_dv_timings_cap adv76xx_timings_cap_digital = {
 			V4L2_DV_BT_CAP_CUSTOM)
 };
 
-static inline const struct v4l2_dv_timings_cap *
-adv76xx_get_dv_timings_cap(struct v4l2_subdev *sd)
+/*
+ * Return the DV timings capabilities for the requested sink pad. As a special
+ * case, pad value -1 returns the capabilities for the currently selected input.
+ */
+static const struct v4l2_dv_timings_cap *
+adv76xx_get_dv_timings_cap(struct v4l2_subdev *sd, int pad)
 {
-	return is_digital_input(sd) ? &adv76xx_timings_cap_digital :
-				      &adv7604_timings_cap_analog;
+	if (pad == -1) {
+		struct adv76xx_state *state = to_state(sd);
+
+		pad = state->selected_input;
+	}
+
+	switch (pad) {
+	case ADV76XX_PAD_HDMI_PORT_A:
+	case ADV7604_PAD_HDMI_PORT_B:
+	case ADV7604_PAD_HDMI_PORT_C:
+	case ADV7604_PAD_HDMI_PORT_D:
+		return &adv76xx_timings_cap_digital;
+
+	case ADV7604_PAD_VGA_RGB:
+	case ADV7604_PAD_VGA_COMP:
+	default:
+		return &adv7604_timings_cap_analog;
+	}
 }
 
 
@@ -872,9 +908,9 @@ static int adv76xx_s_detect_tx_5v_ctrl(struct v4l2_subdev *sd)
 {
 	struct adv76xx_state *state = to_state(sd);
 	const struct adv76xx_chip_info *info = state->info;
+	u16 cable_det = info->read_cable_det(sd);
 
-	return v4l2_ctrl_s_ctrl(state->detect_tx_5v_ctrl,
-				info->read_cable_det(sd));
+	return v4l2_ctrl_s_ctrl(state->detect_tx_5v_ctrl, cable_det);
 }
 
 static int find_and_set_predefined_video_timings(struct v4l2_subdev *sd,
@@ -1066,6 +1102,10 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 	struct adv76xx_state *state = to_state(sd);
 	bool rgb_output = io_read(sd, 0x02) & 0x02;
 	bool hdmi_signal = hdmi_read(sd, 0x05) & 0x80;
+	u8 y = HDMI_COLORSPACE_RGB;
+
+	if (hdmi_signal && (io_read(sd, 0x60) & 1))
+		y = infoframe_read(sd, 0x01) >> 5;
 
 	v4l2_dbg(2, debug, sd, "%s: RGB quantization range: %d, RGB out: %d, HDMI: %d\n",
 			__func__, state->rgb_quantization_range,
@@ -1073,6 +1113,7 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 
 	adv76xx_set_gain(sd, true, 0x0, 0x0, 0x0);
 	adv76xx_set_offset(sd, true, 0x0, 0x0, 0x0);
+	io_write_clr_set(sd, 0x02, 0x04, rgb_output ? 0 : 4);
 
 	switch (state->rgb_quantization_range) {
 	case V4L2_DV_RGB_RANGE_AUTO:
@@ -1122,6 +1163,9 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 			break;
 		}
 
+		if (y != HDMI_COLORSPACE_RGB)
+			break;
+
 		/* RGB limited range (16-235) */
 		io_write_clr_set(sd, 0x02, 0xf0, 0x00);
 
@@ -1133,6 +1177,9 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 			break;
 		}
 
+		if (y != HDMI_COLORSPACE_RGB)
+			break;
+
 		/* RGB full range (0-255) */
 		io_write_clr_set(sd, 0x02, 0xf0, 0x10);
 
@@ -1329,7 +1376,7 @@ static int stdi2dv_timings(struct v4l2_subdev *sd,
 		const struct v4l2_bt_timings *bt = &v4l2_dv_timings_presets[i].bt;
 
 		if (!v4l2_valid_dv_timings(&v4l2_dv_timings_presets[i],
-					   adv76xx_get_dv_timings_cap(sd),
+					   adv76xx_get_dv_timings_cap(sd, -1),
 					   adv76xx_check_dv_timings, NULL))
 			continue;
 		if (vtotal(bt) != stdi->lcf + 1)
@@ -1430,18 +1477,22 @@ static int adv76xx_enum_dv_timings(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	return v4l2_enum_dv_timings_cap(timings,
-		adv76xx_get_dv_timings_cap(sd), adv76xx_check_dv_timings, NULL);
+		adv76xx_get_dv_timings_cap(sd, timings->pad),
+		adv76xx_check_dv_timings, NULL);
 }
 
 static int adv76xx_dv_timings_cap(struct v4l2_subdev *sd,
 			struct v4l2_dv_timings_cap *cap)
 {
 	struct adv76xx_state *state = to_state(sd);
+	unsigned int pad = cap->pad;
 
 	if (cap->pad >= state->source_pad)
 		return -EINVAL;
 
-	*cap = *adv76xx_get_dv_timings_cap(sd);
+	*cap = *adv76xx_get_dv_timings_cap(sd, pad);
+	cap->pad = pad;
+
 	return 0;
 }
 
@@ -1450,9 +1501,9 @@ static int adv76xx_dv_timings_cap(struct v4l2_subdev *sd,
 static void adv76xx_fill_optional_dv_timings_fields(struct v4l2_subdev *sd,
 		struct v4l2_dv_timings *timings)
 {
-	v4l2_find_dv_timings_cap(timings, adv76xx_get_dv_timings_cap(sd),
-			is_digital_input(sd) ? 250000 : 1000000,
-			adv76xx_check_dv_timings, NULL);
+	v4l2_find_dv_timings_cap(timings, adv76xx_get_dv_timings_cap(sd, -1),
+				 is_digital_input(sd) ? 250000 : 1000000,
+				 adv76xx_check_dv_timings, NULL);
 }
 
 static unsigned int adv7604_read_hdmi_pixelclock(struct v4l2_subdev *sd)
@@ -1620,7 +1671,7 @@ static int adv76xx_s_dv_timings(struct v4l2_subdev *sd,
 
 	bt = &timings->bt;
 
-	if (!v4l2_valid_dv_timings(timings, adv76xx_get_dv_timings_cap(sd),
+	if (!v4l2_valid_dv_timings(timings, adv76xx_get_dv_timings_cap(sd, -1),
 				   adv76xx_check_dv_timings, NULL))
 		return -ERANGE;
 
@@ -1825,6 +1876,7 @@ static void adv76xx_setup_format(struct adv76xx_state *state)
 	io_write_clr_set(sd, 0x04, 0xe0, adv76xx_op_ch_sel(state));
 	io_write_clr_set(sd, 0x05, 0x01,
 			state->format->swap_cb_cr ? ADV76XX_OP_SWAP_CB_CR : 0);
+	set_rgb_quantization_range(sd);
 }
 
 static int adv76xx_get_format(struct v4l2_subdev *sd,
@@ -1900,6 +1952,210 @@ static int adv76xx_set_format(struct v4l2_subdev *sd,
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_VIDEO_ADV7604_CEC)
+static void adv76xx_cec_tx_raw_status(struct v4l2_subdev *sd, u8 tx_raw_status)
+{
+	struct adv76xx_state *state = to_state(sd);
+
+	if ((cec_read(sd, 0x11) & 0x01) == 0) {
+		v4l2_dbg(1, debug, sd, "%s: tx raw: tx disabled\n", __func__);
+		return;
+	}
+
+	if (tx_raw_status & 0x02) {
+		v4l2_dbg(1, debug, sd, "%s: tx raw: arbitration lost\n",
+			 __func__);
+		cec_transmit_done(state->cec_adap, CEC_TX_STATUS_ARB_LOST,
+				  1, 0, 0, 0);
+	}
+	if (tx_raw_status & 0x04) {
+		u8 status;
+		u8 nack_cnt;
+		u8 low_drive_cnt;
+
+		v4l2_dbg(1, debug, sd, "%s: tx raw: retry failed\n", __func__);
+		/*
+		 * We set this status bit since this hardware performs
+		 * retransmissions.
+		 */
+		status = CEC_TX_STATUS_MAX_RETRIES;
+		nack_cnt = cec_read(sd, 0x14) & 0xf;
+		if (nack_cnt)
+			status |= CEC_TX_STATUS_NACK;
+		low_drive_cnt = cec_read(sd, 0x14) >> 4;
+		if (low_drive_cnt)
+			status |= CEC_TX_STATUS_LOW_DRIVE;
+		cec_transmit_done(state->cec_adap, status,
+				  0, nack_cnt, low_drive_cnt, 0);
+		return;
+	}
+	if (tx_raw_status & 0x01) {
+		v4l2_dbg(1, debug, sd, "%s: tx raw: ready ok\n", __func__);
+		cec_transmit_done(state->cec_adap, CEC_TX_STATUS_OK, 0, 0, 0, 0);
+		return;
+	}
+}
+
+static void adv76xx_cec_isr(struct v4l2_subdev *sd, bool *handled)
+{
+	struct adv76xx_state *state = to_state(sd);
+	u8 cec_irq;
+
+	/* cec controller */
+	cec_irq = io_read(sd, 0x4d) & 0x0f;
+	if (!cec_irq)
+		return;
+
+	v4l2_dbg(1, debug, sd, "%s: cec: irq 0x%x\n", __func__, cec_irq);
+	adv76xx_cec_tx_raw_status(sd, cec_irq);
+	if (cec_irq & 0x08) {
+		struct cec_msg msg;
+
+		msg.len = cec_read(sd, 0x25) & 0x1f;
+		if (msg.len > 16)
+			msg.len = 16;
+
+		if (msg.len) {
+			u8 i;
+
+			for (i = 0; i < msg.len; i++)
+				msg.msg[i] = cec_read(sd, i + 0x15);
+			cec_write(sd, 0x26, 0x01); /* re-enable rx */
+			cec_received_msg(state->cec_adap, &msg);
+		}
+	}
+
+	/* note: the bit order is swapped between 0x4d and 0x4e */
+	cec_irq = ((cec_irq & 0x08) >> 3) | ((cec_irq & 0x04) >> 1) |
+		  ((cec_irq & 0x02) << 1) | ((cec_irq & 0x01) << 3);
+	io_write(sd, 0x4e, cec_irq);
+
+	if (handled)
+		*handled = true;
+}
+
+static int adv76xx_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+	struct adv76xx_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+
+	if (!state->cec_enabled_adap && enable) {
+		cec_write_clr_set(sd, 0x2a, 0x01, 0x01); /* power up cec */
+		cec_write(sd, 0x2c, 0x01);	/* cec soft reset */
+		cec_write_clr_set(sd, 0x11, 0x01, 0); /* initially disable tx */
+		/* enabled irqs: */
+		/* tx: ready */
+		/* tx: arbitration lost */
+		/* tx: retry timeout */
+		/* rx: ready */
+		io_write_clr_set(sd, 0x50, 0x0f, 0x0f);
+		cec_write(sd, 0x26, 0x01);            /* enable rx */
+	} else if (state->cec_enabled_adap && !enable) {
+		/* disable cec interrupts */
+		io_write_clr_set(sd, 0x50, 0x0f, 0x00);
+		/* disable address mask 1-3 */
+		cec_write_clr_set(sd, 0x27, 0x70, 0x00);
+		/* power down cec section */
+		cec_write_clr_set(sd, 0x2a, 0x01, 0x00);
+		state->cec_valid_addrs = 0;
+	}
+	state->cec_enabled_adap = enable;
+	adv76xx_s_detect_tx_5v_ctrl(sd);
+	return 0;
+}
+
+static int adv76xx_cec_adap_log_addr(struct cec_adapter *adap, u8 addr)
+{
+	struct adv76xx_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+	unsigned int i, free_idx = ADV76XX_MAX_ADDRS;
+
+	if (!state->cec_enabled_adap)
+		return addr == CEC_LOG_ADDR_INVALID ? 0 : -EIO;
+
+	if (addr == CEC_LOG_ADDR_INVALID) {
+		cec_write_clr_set(sd, 0x27, 0x70, 0);
+		state->cec_valid_addrs = 0;
+		return 0;
+	}
+
+	for (i = 0; i < ADV76XX_MAX_ADDRS; i++) {
+		bool is_valid = state->cec_valid_addrs & (1 << i);
+
+		if (free_idx == ADV76XX_MAX_ADDRS && !is_valid)
+			free_idx = i;
+		if (is_valid && state->cec_addr[i] == addr)
+			return 0;
+	}
+	if (i == ADV76XX_MAX_ADDRS) {
+		i = free_idx;
+		if (i == ADV76XX_MAX_ADDRS)
+			return -ENXIO;
+	}
+	state->cec_addr[i] = addr;
+	state->cec_valid_addrs |= 1 << i;
+
+	switch (i) {
+	case 0:
+		/* enable address mask 0 */
+		cec_write_clr_set(sd, 0x27, 0x10, 0x10);
+		/* set address for mask 0 */
+		cec_write_clr_set(sd, 0x28, 0x0f, addr);
+		break;
+	case 1:
+		/* enable address mask 1 */
+		cec_write_clr_set(sd, 0x27, 0x20, 0x20);
+		/* set address for mask 1 */
+		cec_write_clr_set(sd, 0x28, 0xf0, addr << 4);
+		break;
+	case 2:
+		/* enable address mask 2 */
+		cec_write_clr_set(sd, 0x27, 0x40, 0x40);
+		/* set address for mask 1 */
+		cec_write_clr_set(sd, 0x29, 0x0f, addr);
+		break;
+	}
+	return 0;
+}
+
+static int adv76xx_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
+				     u32 signal_free_time, struct cec_msg *msg)
+{
+	struct adv76xx_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+	u8 len = msg->len;
+	unsigned int i;
+
+	/*
+	 * The number of retries is the number of attempts - 1, but retry
+	 * at least once. It's not clear if a value of 0 is allowed, so
+	 * let's do at least one retry.
+	 */
+	cec_write_clr_set(sd, 0x12, 0x70, max(1, attempts - 1) << 4);
+
+	if (len > 16) {
+		v4l2_err(sd, "%s: len exceeded 16 (%d)\n", __func__, len);
+		return -EINVAL;
+	}
+
+	/* write data */
+	for (i = 0; i < len; i++)
+		cec_write(sd, i, msg->msg[i]);
+
+	/* set length (data + header) */
+	cec_write(sd, 0x10, len);
+	/* start transmit, enable tx */
+	cec_write(sd, 0x11, 0x01);
+	return 0;
+}
+
+static const struct cec_adap_ops adv76xx_cec_adap_ops = {
+	.adap_enable = adv76xx_cec_adap_enable,
+	.adap_log_addr = adv76xx_cec_adap_log_addr,
+	.adap_transmit = adv76xx_cec_adap_transmit,
+};
+#endif
+
 static int adv76xx_isr(struct v4l2_subdev *sd, u32 status, bool *handled)
 {
 	struct adv76xx_state *state = to_state(sd);
@@ -1945,6 +2201,11 @@ static int adv76xx_isr(struct v4l2_subdev *sd, u32 status, bool *handled)
 			*handled = true;
 	}
 
+#if IS_ENABLED(CONFIG_VIDEO_ADV7604_CEC)
+	/* cec */
+	adv76xx_cec_isr(sd, handled);
+#endif
+
 	/* tx 5v detect */
 	tx_5v = irq_reg_0x70 & info->cable_det_mask;
 	if (tx_5v) {
@@ -1994,39 +2255,12 @@ static int adv76xx_get_edid(struct v4l2_subdev *sd, struct v4l2_edid *edid)
 	return 0;
 }
 
-static int get_edid_spa_location(const u8 *edid)
-{
-	u8 d;
-
-	if ((edid[0x7e] != 1) ||
-	    (edid[0x80] != 0x02) ||
-	    (edid[0x81] != 0x03)) {
-		return -1;
-	}
-
-	/* search Vendor Specific Data Block (tag 3) */
-	d = edid[0x82] & 0x7f;
-	if (d > 4) {
-		int i = 0x84;
-		int end = 0x80 + d;
-
-		do {
-			u8 tag = edid[i] >> 5;
-			u8 len = edid[i] & 0x1f;
-
-			if ((tag == 3) && (len >= 5))
-				return i + 4;
-			i += len + 1;
-		} while (i < end);
-	}
-	return -1;
-}
-
 static int adv76xx_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *edid)
 {
 	struct adv76xx_state *state = to_state(sd);
 	const struct adv76xx_chip_info *info = state->info;
-	int spa_loc;
+	unsigned int spa_loc;
+	u16 pa;
 	int err;
 	int i;
 
@@ -2057,6 +2291,10 @@ static int adv76xx_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *edid)
 		edid->blocks = 2;
 		return -E2BIG;
 	}
+	pa = cec_get_edid_phys_addr(edid->edid, edid->blocks * 128, &spa_loc);
+	err = cec_phys_addr_validate(pa, &pa, NULL);
+	if (err)
+		return err;
 
 	v4l2_dbg(2, debug, sd, "%s: write EDID pad %d, edid.present = 0x%x\n",
 			__func__, edid->pad, state->edid.present);
@@ -2066,9 +2304,12 @@ static int adv76xx_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *edid)
 	adv76xx_set_hpd(state, 0);
 	rep_write_clr_set(sd, info->edid_enable_reg, 0x0f, 0x00);
 
-	spa_loc = get_edid_spa_location(edid->edid);
-	if (spa_loc < 0)
-		spa_loc = 0xc0; /* Default value [REF_02, p. 116] */
+	/*
+	 * Return an error if no location of the source physical address
+	 * was found.
+	 */
+	if (spa_loc == 0)
+		return -EINVAL;
 
 	switch (edid->pad) {
 	case ADV76XX_PAD_HDMI_PORT_A:
@@ -2128,10 +2369,10 @@ static int adv76xx_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *edid)
 		v4l2_err(sd, "error enabling edid (0x%x)\n", state->edid.present);
 		return -EIO;
 	}
+	cec_s_phys_addr(state->cec_adap, pa, false);
 
 	/* enable hotplug after 100 ms */
-	queue_delayed_work(state->work_queues,
-			&state->delayed_work_enable_hotplug, HZ / 10);
+	schedule_delayed_work(&state->delayed_work_enable_hotplug, HZ / 10);
 	return 0;
 }
 
@@ -2252,8 +2493,19 @@ static int adv76xx_log_status(struct v4l2_subdev *sd)
 			((edid_enabled & 0x02) ? "Yes" : "No"),
 			((edid_enabled & 0x04) ? "Yes" : "No"),
 			((edid_enabled & 0x08) ? "Yes" : "No"));
-	v4l2_info(sd, "CEC: %s\n", !!(cec_read(sd, 0x2a) & 0x01) ?
+	v4l2_info(sd, "CEC: %s\n", state->cec_enabled_adap ?
 			"enabled" : "disabled");
+	if (state->cec_enabled_adap) {
+		int i;
+
+		for (i = 0; i < ADV76XX_MAX_ADDRS; i++) {
+			bool is_valid = state->cec_valid_addrs & (1 << i);
+
+			if (is_valid)
+				v4l2_info(sd, "CEC Logical Address: 0x%x\n",
+					  state->cec_addr[i]);
+		}
+	}
 
 	v4l2_info(sd, "-----Signal status-----\n");
 	cable_det = info->read_cable_det(sd);
@@ -2299,11 +2551,10 @@ static int adv76xx_log_status(struct v4l2_subdev *sd)
 			rgb_quantization_range_txt[state->rgb_quantization_range]);
 	v4l2_info(sd, "Input color space: %s\n",
 			input_color_space_txt[reg_io_0x02 >> 4]);
-	v4l2_info(sd, "Output color space: %s %s, saturator %s, alt-gamma %s\n",
+	v4l2_info(sd, "Output color space: %s %s, alt-gamma %s\n",
 			(reg_io_0x02 & 0x02) ? "RGB" : "YCbCr",
-			(reg_io_0x02 & 0x04) ? "(16-235)" : "(0-255)",
 			(((reg_io_0x02 >> 2) & 0x01) ^ (reg_io_0x02 & 0x01)) ?
-				"enabled" : "disabled",
+				"(16-235)" : "(0-255)",
 			(reg_io_0x02 & 0x08) ? "enabled" : "disabled");
 	v4l2_info(sd, "Color space conversion: %s\n",
 			csc_coeff_sel_rb[cp_read(sd, info->cp_csc) >> 4]);
@@ -2363,6 +2614,24 @@ static int adv76xx_subscribe_event(struct v4l2_subdev *sd,
 	}
 }
 
+static int adv76xx_registered(struct v4l2_subdev *sd)
+{
+	struct adv76xx_state *state = to_state(sd);
+	int err;
+
+	err = cec_register_adapter(state->cec_adap);
+	if (err)
+		cec_delete_adapter(state->cec_adap);
+	return err;
+}
+
+static void adv76xx_unregistered(struct v4l2_subdev *sd)
+{
+	struct adv76xx_state *state = to_state(sd);
+
+	cec_unregister_adapter(state->cec_adap);
+}
+
 /* ----------------------------------------------------------------------- */
 
 static const struct v4l2_ctrl_ops adv76xx_ctrl_ops = {
@@ -2406,6 +2675,11 @@ static const struct v4l2_subdev_ops adv76xx_ops = {
 	.pad = &adv76xx_pad_ops,
 };
 
+static const struct v4l2_subdev_internal_ops adv76xx_int_ops = {
+	.registered = adv76xx_registered,
+	.unregistered = adv76xx_unregistered,
+};
+
 /* -------------------------- custom ctrls ---------------------------------- */
 
 static const struct v4l2_ctrl_config adv7604_ctrl_analog_sampling_phase = {
@@ -2468,10 +2742,7 @@ static int adv76xx_core_init(struct v4l2_subdev *sd)
 	cp_write(sd, 0xcf, 0x01);   /* Power down macrovision */
 
 	/* video format */
-	io_write_clr_set(sd, 0x02, 0x0f,
-			pdata->alt_gamma << 3 |
-			pdata->op_656_range << 2 |
-			pdata->alt_data_sat << 0);
+	io_write_clr_set(sd, 0x02, 0x0f, pdata->alt_gamma << 3);
 	io_write_clr_set(sd, 0x05, 0x0e, pdata->blank_data << 3 |
 			pdata->insert_av_codes << 2 |
 			pdata->replicate_av_codes << 1);
@@ -2821,10 +3092,8 @@ static int adv76xx_parse_dt(struct adv76xx_state *state)
 	if (flags & V4L2_MBUS_PCLK_SAMPLE_RISING)
 		state->pdata.inv_llc_pol = 1;
 
-	if (bus_cfg.bus_type == V4L2_MBUS_BT656) {
+	if (bus_cfg.bus_type == V4L2_MBUS_BT656)
 		state->pdata.insert_av_codes = 1;
-		state->pdata.op_656_range = 1;
-	}
 
 	/* Disable the interrupt for now as no DT-based board uses it. */
 	state->pdata.int1_config = ADV76XX_INT1_CONFIG_DISABLED;
@@ -2847,7 +3116,6 @@ static int adv76xx_parse_dt(struct adv76xx_state *state)
 	state->pdata.disable_pwrdnb = 0;
 	state->pdata.disable_cable_det_rst = 0;
 	state->pdata.blank_data = 1;
-	state->pdata.alt_data_sat = 1;
 	state->pdata.op_format_mode_sel = ADV7604_OP_FORMAT_MODE0;
 	state->pdata.bus_order = ADV7604_BUS_ORDER_RGB;
 
@@ -2996,6 +3264,19 @@ static int configure_regmaps(struct adv76xx_state *state)
 	return 0;
 }
 
+static void adv76xx_reset(struct adv76xx_state *state)
+{
+	if (state->reset_gpio) {
+		/* ADV76XX can be reset by a low reset pulse of minimum 5 ms. */
+		gpiod_set_value_cansleep(state->reset_gpio, 0);
+		usleep_range(5000, 10000);
+		gpiod_set_value_cansleep(state->reset_gpio, 1);
+		/* It is recommended to wait 5 ms after the low pulse before */
+		/* an I2C write is performed to the ADV76XX. */
+		usleep_range(5000, 10000);
+	}
+}
+
 static int adv76xx_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -3059,6 +3340,12 @@ static int adv76xx_probe(struct i2c_client *client,
 		if (state->hpd_gpio[i])
 			v4l_info(client, "Handling HPD %u GPIO\n", i);
 	}
+	state->reset_gpio = devm_gpiod_get_optional(&client->dev, "reset",
+								GPIOD_OUT_HIGH);
+	if (IS_ERR(state->reset_gpio))
+		return PTR_ERR(state->reset_gpio);
+
+	adv76xx_reset(state);
 
 	state->timings = cea640x480;
 	state->format = adv76xx_format_info(state, MEDIA_BUS_FMT_YUYV8_2X8);
@@ -3069,6 +3356,7 @@ static int adv76xx_probe(struct i2c_client *client,
 		id->name, i2c_adapter_id(client->adapter),
 		client->addr);
 	sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS;
+	sd->internal_ops = &adv76xx_int_ops;
 
 	/* Configure IO Regmap region */
 	err = configure_regmap(state, ADV76XX_PAGE_IO);
@@ -3182,14 +3470,6 @@ static int adv76xx_probe(struct i2c_client *client,
 		}
 	}
 
-	/* work queues */
-	state->work_queues = create_singlethread_workqueue(client->name);
-	if (!state->work_queues) {
-		v4l2_err(sd, "Could not create work queue\n");
-		err = -ENOMEM;
-		goto err_i2c;
-	}
-
 	INIT_DELAYED_WORK(&state->delayed_work_enable_hotplug,
 			adv76xx_delayed_work_enable_hotplug);
 
@@ -3212,6 +3492,18 @@ static int adv76xx_probe(struct i2c_client *client,
 	err = adv76xx_core_init(sd);
 	if (err)
 		goto err_entity;
+
+#if IS_ENABLED(CONFIG_VIDEO_ADV7604_CEC)
+	state->cec_adap = cec_allocate_adapter(&adv76xx_cec_adap_ops,
+		state, dev_name(&client->dev),
+		CEC_CAP_TRANSMIT | CEC_CAP_LOG_ADDRS |
+		CEC_CAP_PASSTHROUGH | CEC_CAP_RC, ADV76XX_MAX_ADDRS,
+		&client->dev);
+	err = PTR_ERR_OR_ZERO(state->cec_adap);
+	if (err)
+		goto err_entity;
+#endif
+
 	v4l2_info(sd, "%s found @ 0x%x (%s)\n", client->name,
 			client->addr << 1, client->adapter->name);
 
@@ -3225,7 +3517,6 @@ err_entity:
 	media_entity_cleanup(&sd->entity);
 err_work_queues:
 	cancel_delayed_work(&state->delayed_work_enable_hotplug);
-	destroy_workqueue(state->work_queues);
 err_i2c:
 	adv76xx_unregister_clients(state);
 err_hdl:
@@ -3240,8 +3531,14 @@ static int adv76xx_remove(struct i2c_client *client)
 	struct v4l2_subdev *sd = i2c_get_clientdata(client);
 	struct adv76xx_state *state = to_state(sd);
 
+	/* disable interrupts */
+	io_write(sd, 0x40, 0);
+	io_write(sd, 0x41, 0);
+	io_write(sd, 0x46, 0);
+	io_write(sd, 0x6e, 0);
+	io_write(sd, 0x73, 0);
+
 	cancel_delayed_work(&state->delayed_work_enable_hotplug);
-	destroy_workqueue(state->work_queues);
 	v4l2_async_unregister_subdev(sd);
 	media_entity_cleanup(&sd->entity);
 	adv76xx_unregister_clients(to_state(sd));
diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c
index ecaacb0a6fa1..8c2a52e280af 100644
--- a/drivers/media/i2c/adv7842.c
+++ b/drivers/media/i2c/adv7842.c
@@ -39,6 +39,7 @@
 #include <linux/workqueue.h>
 #include <linux/v4l2-dv-timings.h>
 #include <linux/hdmi.h>
+#include <media/cec.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-event.h>
 #include <media/v4l2-ctrls.h>
@@ -79,6 +80,8 @@ MODULE_LICENSE("GPL");
 
 #define ADV7842_OP_SWAP_CB_CR				(1 << 0)
 
+#define ADV7842_MAX_ADDRS (3)
+
 /*
 **********************************************************************
 *
@@ -118,7 +121,6 @@ struct adv7842_state {
 	struct v4l2_fract aspect_ratio;
 	u32 rgb_quantization_range;
 	bool is_cea_format;
-	struct workqueue_struct *work_queues;
 	struct delayed_work delayed_work_enable_hotplug;
 	bool restart_stdi_once;
 	bool hdmi_port_a;
@@ -142,6 +144,11 @@ struct adv7842_state {
 	struct v4l2_ctrl *free_run_color_ctrl_manual;
 	struct v4l2_ctrl *free_run_color_ctrl;
 	struct v4l2_ctrl *rgb_quantization_range_ctrl;
+
+	struct cec_adapter *cec_adap;
+	u8   cec_addr[ADV7842_MAX_ADDRS];
+	u8   cec_valid_addrs;
+	bool cec_enabled_adap;
 };
 
 /* Unsupported timings. This device cannot support 720p30. */
@@ -418,9 +425,9 @@ static inline int cec_write(struct v4l2_subdev *sd, u8 reg, u8 val)
 	return adv_smbus_write_byte_data(state->i2c_cec, reg, val);
 }
 
-static inline int cec_write_and_or(struct v4l2_subdev *sd, u8 reg, u8 mask, u8 val)
+static inline int cec_write_clr_set(struct v4l2_subdev *sd, u8 reg, u8 mask, u8 val)
 {
-	return cec_write(sd, reg, (cec_read(sd, reg) & mask) | val);
+	return cec_write(sd, reg, (cec_read(sd, reg) & ~mask) | val);
 }
 
 static inline int infoframe_read(struct v4l2_subdev *sd, u8 reg)
@@ -696,6 +703,18 @@ adv7842_get_dv_timings_cap(struct v4l2_subdev *sd)
 
 /* ----------------------------------------------------------------------- */
 
+static u16 adv7842_read_cable_det(struct v4l2_subdev *sd)
+{
+	u8 reg = io_read(sd, 0x6f);
+	u16 val = 0;
+
+	if (reg & 0x02)
+		val |= 1; /* port A */
+	if (reg & 0x01)
+		val |= 2; /* port B */
+	return val;
+}
+
 static void adv7842_delayed_work_enable_hotplug(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
@@ -756,56 +775,23 @@ static int edid_write_vga_segment(struct v4l2_subdev *sd)
 	}
 
 	/* enable hotplug after 200 ms */
-	queue_delayed_work(state->work_queues,
-			&state->delayed_work_enable_hotplug, HZ / 5);
+	schedule_delayed_work(&state->delayed_work_enable_hotplug, HZ / 5);
 
 	return 0;
 }
 
-static int edid_spa_location(const u8 *edid)
-{
-	u8 d;
-
-	/*
-	 * TODO, improve and update for other CEA extensions
-	 * currently only for 1 segment (256 bytes),
-	 * i.e. 1 extension block and CEA revision 3.
-	 */
-	if ((edid[0x7e] != 1) ||
-	    (edid[0x80] != 0x02) ||
-	    (edid[0x81] != 0x03)) {
-		return -EINVAL;
-	}
-	/*
-	 * search Vendor Specific Data Block (tag 3)
-	 */
-	d = edid[0x82] & 0x7f;
-	if (d > 4) {
-		int i = 0x84;
-		int end = 0x80 + d;
-		do {
-			u8 tag = edid[i]>>5;
-			u8 len = edid[i] & 0x1f;
-
-			if ((tag == 3) && (len >= 5))
-				return i + 4;
-			i += len + 1;
-		} while (i < end);
-	}
-	return -EINVAL;
-}
-
 static int edid_write_hdmi_segment(struct v4l2_subdev *sd, u8 port)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	struct adv7842_state *state = to_state(sd);
-	const u8 *val = state->hdmi_edid.edid;
-	int spa_loc = edid_spa_location(val);
+	const u8 *edid = state->hdmi_edid.edid;
+	int spa_loc;
+	u16 pa;
 	int err = 0;
 	int i;
 
-	v4l2_dbg(2, debug, sd, "%s: write EDID on port %c (spa at 0x%x)\n",
-			__func__, (port == ADV7842_EDID_PORT_A) ? 'A' : 'B', spa_loc);
+	v4l2_dbg(2, debug, sd, "%s: write EDID on port %c\n",
+			__func__, (port == ADV7842_EDID_PORT_A) ? 'A' : 'B');
 
 	/* HPA disable on port A and B */
 	io_write_and_or(sd, 0x20, 0xcf, 0x00);
@@ -816,24 +802,33 @@ static int edid_write_hdmi_segment(struct v4l2_subdev *sd, u8 port)
 	if (!state->hdmi_edid.present)
 		return 0;
 
+	pa = cec_get_edid_phys_addr(edid, 256, &spa_loc);
+	err = cec_phys_addr_validate(pa, &pa, NULL);
+	if (err)
+		return err;
+
+	/*
+	 * Return an error if no location of the source physical address
+	 * was found.
+	 */
+	if (spa_loc == 0)
+		return -EINVAL;
+
 	/* edid segment pointer '0' for HDMI ports */
 	rep_write_and_or(sd, 0x77, 0xef, 0x00);
 
 	for (i = 0; !err && i < 256; i += I2C_SMBUS_BLOCK_MAX)
 		err = adv_smbus_write_i2c_block_data(state->i2c_edid, i,
-						     I2C_SMBUS_BLOCK_MAX, val + i);
+						     I2C_SMBUS_BLOCK_MAX, edid + i);
 	if (err)
 		return err;
 
-	if (spa_loc < 0)
-		spa_loc = 0xc0; /* Default value [REF_02, p. 199] */
-
 	if (port == ADV7842_EDID_PORT_A) {
-		rep_write(sd, 0x72, val[spa_loc]);
-		rep_write(sd, 0x73, val[spa_loc + 1]);
+		rep_write(sd, 0x72, edid[spa_loc]);
+		rep_write(sd, 0x73, edid[spa_loc + 1]);
 	} else {
-		rep_write(sd, 0x74, val[spa_loc]);
-		rep_write(sd, 0x75, val[spa_loc + 1]);
+		rep_write(sd, 0x74, edid[spa_loc]);
+		rep_write(sd, 0x75, edid[spa_loc + 1]);
 	}
 	rep_write(sd, 0x76, spa_loc & 0xff);
 	rep_write_and_or(sd, 0x77, 0xbf, (spa_loc >> 2) & 0x40);
@@ -853,10 +848,10 @@ static int edid_write_hdmi_segment(struct v4l2_subdev *sd, u8 port)
 				(port == ADV7842_EDID_PORT_A) ? 'A' : 'B');
 		return -EIO;
 	}
+	cec_s_phys_addr(state->cec_adap, pa, false);
 
 	/* enable hotplug after 200 ms */
-	queue_delayed_work(state->work_queues,
-			&state->delayed_work_enable_hotplug, HZ / 5);
+	schedule_delayed_work(&state->delayed_work_enable_hotplug, HZ / 5);
 
 	return 0;
 }
@@ -983,20 +978,11 @@ static int adv7842_s_register(struct v4l2_subdev *sd,
 static int adv7842_s_detect_tx_5v_ctrl(struct v4l2_subdev *sd)
 {
 	struct adv7842_state *state = to_state(sd);
-	int prev = v4l2_ctrl_g_ctrl(state->detect_tx_5v_ctrl);
-	u8 reg_io_6f = io_read(sd, 0x6f);
-	int val = 0;
+	u16 cable_det = adv7842_read_cable_det(sd);
 
-	if (reg_io_6f & 0x02)
-		val |= 1; /* port A */
-	if (reg_io_6f & 0x01)
-		val |= 2; /* port B */
-
-	v4l2_dbg(1, debug, sd, "%s: 0x%x -> 0x%x\n", __func__, prev, val);
+	v4l2_dbg(1, debug, sd, "%s: 0x%x\n", __func__, cable_det);
 
-	if (val != prev)
-		return v4l2_ctrl_s_ctrl(state->detect_tx_5v_ctrl, val);
-	return 0;
+	return v4l2_ctrl_s_ctrl(state->detect_tx_5v_ctrl, cable_det);
 }
 
 static int find_and_set_predefined_video_timings(struct v4l2_subdev *sd,
@@ -1198,6 +1184,10 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 	struct adv7842_state *state = to_state(sd);
 	bool rgb_output = io_read(sd, 0x02) & 0x02;
 	bool hdmi_signal = hdmi_read(sd, 0x05) & 0x80;
+	u8 y = HDMI_COLORSPACE_RGB;
+
+	if (hdmi_signal && (io_read(sd, 0x60) & 1))
+		y = infoframe_read(sd, 0x01) >> 5;
 
 	v4l2_dbg(2, debug, sd, "%s: RGB quantization range: %d, RGB out: %d, HDMI: %d\n",
 			__func__, state->rgb_quantization_range,
@@ -1205,6 +1195,7 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 
 	adv7842_set_gain(sd, true, 0x0, 0x0, 0x0);
 	adv7842_set_offset(sd, true, 0x0, 0x0, 0x0);
+	io_write_clr_set(sd, 0x02, 0x04, rgb_output ? 0 : 4);
 
 	switch (state->rgb_quantization_range) {
 	case V4L2_DV_RGB_RANGE_AUTO:
@@ -1254,6 +1245,9 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 			break;
 		}
 
+		if (y != HDMI_COLORSPACE_RGB)
+			break;
+
 		/* RGB limited range (16-235) */
 		io_write_and_or(sd, 0x02, 0x0f, 0x00);
 
@@ -1265,6 +1259,9 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 			break;
 		}
 
+		if (y != HDMI_COLORSPACE_RGB)
+			break;
+
 		/* RGB full range (0-255) */
 		io_write_and_or(sd, 0x02, 0x0f, 0x10);
 
@@ -2072,6 +2069,7 @@ static void adv7842_setup_format(struct adv7842_state *state)
 	io_write_clr_set(sd, 0x04, 0xe0, adv7842_op_ch_sel(state));
 	io_write_clr_set(sd, 0x05, 0x01,
 			state->format->swap_cb_cr ? ADV7842_OP_SWAP_CB_CR : 0);
+	set_rgb_quantization_range(sd);
 }
 
 static int adv7842_get_format(struct v4l2_subdev *sd,
@@ -2170,6 +2168,207 @@ static void adv7842_irq_enable(struct v4l2_subdev *sd, bool enable)
 	}
 }
 
+#if IS_ENABLED(CONFIG_VIDEO_ADV7842_CEC)
+static void adv7842_cec_tx_raw_status(struct v4l2_subdev *sd, u8 tx_raw_status)
+{
+	struct adv7842_state *state = to_state(sd);
+
+	if ((cec_read(sd, 0x11) & 0x01) == 0) {
+		v4l2_dbg(1, debug, sd, "%s: tx raw: tx disabled\n", __func__);
+		return;
+	}
+
+	if (tx_raw_status & 0x02) {
+		v4l2_dbg(1, debug, sd, "%s: tx raw: arbitration lost\n",
+			 __func__);
+		cec_transmit_done(state->cec_adap, CEC_TX_STATUS_ARB_LOST,
+				  1, 0, 0, 0);
+		return;
+	}
+	if (tx_raw_status & 0x04) {
+		u8 status;
+		u8 nack_cnt;
+		u8 low_drive_cnt;
+
+		v4l2_dbg(1, debug, sd, "%s: tx raw: retry failed\n", __func__);
+		/*
+		 * We set this status bit since this hardware performs
+		 * retransmissions.
+		 */
+		status = CEC_TX_STATUS_MAX_RETRIES;
+		nack_cnt = cec_read(sd, 0x14) & 0xf;
+		if (nack_cnt)
+			status |= CEC_TX_STATUS_NACK;
+		low_drive_cnt = cec_read(sd, 0x14) >> 4;
+		if (low_drive_cnt)
+			status |= CEC_TX_STATUS_LOW_DRIVE;
+		cec_transmit_done(state->cec_adap, status,
+				  0, nack_cnt, low_drive_cnt, 0);
+		return;
+	}
+	if (tx_raw_status & 0x01) {
+		v4l2_dbg(1, debug, sd, "%s: tx raw: ready ok\n", __func__);
+		cec_transmit_done(state->cec_adap, CEC_TX_STATUS_OK, 0, 0, 0, 0);
+		return;
+	}
+}
+
+static void adv7842_cec_isr(struct v4l2_subdev *sd, bool *handled)
+{
+	u8 cec_irq;
+
+	/* cec controller */
+	cec_irq = io_read(sd, 0x93) & 0x0f;
+	if (!cec_irq)
+		return;
+
+	v4l2_dbg(1, debug, sd, "%s: cec: irq 0x%x\n", __func__, cec_irq);
+	adv7842_cec_tx_raw_status(sd, cec_irq);
+	if (cec_irq & 0x08) {
+		struct adv7842_state *state = to_state(sd);
+		struct cec_msg msg;
+
+		msg.len = cec_read(sd, 0x25) & 0x1f;
+		if (msg.len > 16)
+			msg.len = 16;
+
+		if (msg.len) {
+			u8 i;
+
+			for (i = 0; i < msg.len; i++)
+				msg.msg[i] = cec_read(sd, i + 0x15);
+			cec_write(sd, 0x26, 0x01); /* re-enable rx */
+			cec_received_msg(state->cec_adap, &msg);
+		}
+	}
+
+	io_write(sd, 0x94, cec_irq);
+
+	if (handled)
+		*handled = true;
+}
+
+static int adv7842_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+	struct adv7842_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+
+	if (!state->cec_enabled_adap && enable) {
+		cec_write_clr_set(sd, 0x2a, 0x01, 0x01); /* power up cec */
+		cec_write(sd, 0x2c, 0x01);	/* cec soft reset */
+		cec_write_clr_set(sd, 0x11, 0x01, 0); /* initially disable tx */
+		/* enabled irqs: */
+		/* tx: ready */
+		/* tx: arbitration lost */
+		/* tx: retry timeout */
+		/* rx: ready */
+		io_write_clr_set(sd, 0x96, 0x0f, 0x0f);
+		cec_write(sd, 0x26, 0x01);            /* enable rx */
+	} else if (state->cec_enabled_adap && !enable) {
+		/* disable cec interrupts */
+		io_write_clr_set(sd, 0x96, 0x0f, 0x00);
+		/* disable address mask 1-3 */
+		cec_write_clr_set(sd, 0x27, 0x70, 0x00);
+		/* power down cec section */
+		cec_write_clr_set(sd, 0x2a, 0x01, 0x00);
+		state->cec_valid_addrs = 0;
+	}
+	state->cec_enabled_adap = enable;
+	return 0;
+}
+
+static int adv7842_cec_adap_log_addr(struct cec_adapter *adap, u8 addr)
+{
+	struct adv7842_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+	unsigned int i, free_idx = ADV7842_MAX_ADDRS;
+
+	if (!state->cec_enabled_adap)
+		return addr == CEC_LOG_ADDR_INVALID ? 0 : -EIO;
+
+	if (addr == CEC_LOG_ADDR_INVALID) {
+		cec_write_clr_set(sd, 0x27, 0x70, 0);
+		state->cec_valid_addrs = 0;
+		return 0;
+	}
+
+	for (i = 0; i < ADV7842_MAX_ADDRS; i++) {
+		bool is_valid = state->cec_valid_addrs & (1 << i);
+
+		if (free_idx == ADV7842_MAX_ADDRS && !is_valid)
+			free_idx = i;
+		if (is_valid && state->cec_addr[i] == addr)
+			return 0;
+	}
+	if (i == ADV7842_MAX_ADDRS) {
+		i = free_idx;
+		if (i == ADV7842_MAX_ADDRS)
+			return -ENXIO;
+	}
+	state->cec_addr[i] = addr;
+	state->cec_valid_addrs |= 1 << i;
+
+	switch (i) {
+	case 0:
+		/* enable address mask 0 */
+		cec_write_clr_set(sd, 0x27, 0x10, 0x10);
+		/* set address for mask 0 */
+		cec_write_clr_set(sd, 0x28, 0x0f, addr);
+		break;
+	case 1:
+		/* enable address mask 1 */
+		cec_write_clr_set(sd, 0x27, 0x20, 0x20);
+		/* set address for mask 1 */
+		cec_write_clr_set(sd, 0x28, 0xf0, addr << 4);
+		break;
+	case 2:
+		/* enable address mask 2 */
+		cec_write_clr_set(sd, 0x27, 0x40, 0x40);
+		/* set address for mask 1 */
+		cec_write_clr_set(sd, 0x29, 0x0f, addr);
+		break;
+	}
+	return 0;
+}
+
+static int adv7842_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
+				     u32 signal_free_time, struct cec_msg *msg)
+{
+	struct adv7842_state *state = adap->priv;
+	struct v4l2_subdev *sd = &state->sd;
+	u8 len = msg->len;
+	unsigned int i;
+
+	/*
+	 * The number of retries is the number of attempts - 1, but retry
+	 * at least once. It's not clear if a value of 0 is allowed, so
+	 * let's do at least one retry.
+	 */
+	cec_write_clr_set(sd, 0x12, 0x70, max(1, attempts - 1) << 4);
+
+	if (len > 16) {
+		v4l2_err(sd, "%s: len exceeded 16 (%d)\n", __func__, len);
+		return -EINVAL;
+	}
+
+	/* write data */
+	for (i = 0; i < len; i++)
+		cec_write(sd, i, msg->msg[i]);
+
+	/* set length (data + header) */
+	cec_write(sd, 0x10, len);
+	/* start transmit, enable tx */
+	cec_write(sd, 0x11, 0x01);
+	return 0;
+}
+
+static const struct cec_adap_ops adv7842_cec_adap_ops = {
+	.adap_enable = adv7842_cec_adap_enable,
+	.adap_log_addr = adv7842_cec_adap_log_addr,
+	.adap_transmit = adv7842_cec_adap_transmit,
+};
+#endif
+
 static int adv7842_isr(struct v4l2_subdev *sd, u32 status, bool *handled)
 {
 	struct adv7842_state *state = to_state(sd);
@@ -2241,6 +2440,11 @@ static int adv7842_isr(struct v4l2_subdev *sd, u32 status, bool *handled)
 			*handled = true;
 	}
 
+#if IS_ENABLED(CONFIG_VIDEO_ADV7842_CEC)
+	/* cec */
+	adv7842_cec_isr(sd, handled);
+#endif
+
 	/* tx 5v detect */
 	if (irq_status[2] & 0x3) {
 		v4l2_dbg(1, debug, sd, "%s: irq tx_5v\n", __func__);
@@ -2321,10 +2525,12 @@ static int adv7842_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *e)
 	case ADV7842_EDID_PORT_A:
 	case ADV7842_EDID_PORT_B:
 		memset(&state->hdmi_edid.edid, 0, 256);
-		if (e->blocks)
+		if (e->blocks) {
 			state->hdmi_edid.present |= 0x04 << e->pad;
-		else
+		} else {
 			state->hdmi_edid.present &= ~(0x04 << e->pad);
+			adv7842_s_detect_tx_5v_ctrl(sd);
+		}
 		memcpy(&state->hdmi_edid.edid, e->edid, 128 * e->blocks);
 		err = edid_write_hdmi_segment(sd, e->pad);
 		break;
@@ -2397,6 +2603,8 @@ static void adv7842_log_infoframes(struct v4l2_subdev *sd)
 		log_infoframe(sd, &cri[i]);
 }
 
+#if 0
+/* Let's keep it here for now, as it could be useful for debug */
 static const char * const prim_mode_txt[] = {
 	"SDP",
 	"Component",
@@ -2415,6 +2623,7 @@ static const char * const prim_mode_txt[] = {
 	"Reserved",
 	"Reserved",
 };
+#endif
 
 static int adv7842_sdp_log_status(struct v4l2_subdev *sd)
 {
@@ -2509,8 +2718,19 @@ static int adv7842_cp_log_status(struct v4l2_subdev *sd)
 	v4l2_info(sd, "HPD A %s, B %s\n",
 		  reg_io_0x21 & 0x02 ? "enabled" : "disabled",
 		  reg_io_0x21 & 0x01 ? "enabled" : "disabled");
-	v4l2_info(sd, "CEC %s\n", !!(cec_read(sd, 0x2a) & 0x01) ?
+	v4l2_info(sd, "CEC: %s\n", state->cec_enabled_adap ?
 			"enabled" : "disabled");
+	if (state->cec_enabled_adap) {
+		int i;
+
+		for (i = 0; i < ADV7842_MAX_ADDRS; i++) {
+			bool is_valid = state->cec_valid_addrs & (1 << i);
+
+			if (is_valid)
+				v4l2_info(sd, "CEC Logical Address: 0x%x\n",
+					  state->cec_addr[i]);
+		}
+	}
 
 	v4l2_info(sd, "-----Signal status-----\n");
 	if (state->hdmi_port_a) {
@@ -2569,11 +2789,11 @@ static int adv7842_cp_log_status(struct v4l2_subdev *sd)
 		  rgb_quantization_range_txt[state->rgb_quantization_range]);
 	v4l2_info(sd, "Input color space: %s\n",
 		  input_color_space_txt[reg_io_0x02 >> 4]);
-	v4l2_info(sd, "Output color space: %s %s, saturator %s\n",
+	v4l2_info(sd, "Output color space: %s %s, alt-gamma %s\n",
 		  (reg_io_0x02 & 0x02) ? "RGB" : "YCbCr",
-		  (reg_io_0x02 & 0x04) ? "(16-235)" : "(0-255)",
-		  ((reg_io_0x02 & 0x04) ^ (reg_io_0x02 & 0x01)) ?
-					"enabled" : "disabled");
+		  (((reg_io_0x02 >> 2) & 0x01) ^ (reg_io_0x02 & 0x01)) ?
+			"(16-235)" : "(0-255)",
+		  (reg_io_0x02 & 0x08) ? "enabled" : "disabled");
 	v4l2_info(sd, "Color space conversion: %s\n",
 		  csc_coeff_sel_rb[cp_read(sd, 0xf4) >> 4]);
 
@@ -2777,11 +2997,7 @@ static int adv7842_core_init(struct v4l2_subdev *sd)
 	io_write(sd, 0x15, 0x80);   /* Power up pads */
 
 	/* video format */
-	io_write(sd, 0x02,
-		 0xf0 |
-		 pdata->alt_gamma << 3 |
-		 pdata->op_656_range << 2 |
-		 pdata->alt_data_sat << 0);
+	io_write(sd, 0x02, 0xf0 | pdata->alt_gamma << 3);
 	io_write_and_or(sd, 0x05, 0xf0, pdata->blank_data << 3 |
 			pdata->insert_av_codes << 2 |
 			pdata->replicate_av_codes << 1);
@@ -3031,6 +3247,24 @@ static int adv7842_subscribe_event(struct v4l2_subdev *sd,
 	}
 }
 
+static int adv7842_registered(struct v4l2_subdev *sd)
+{
+	struct adv7842_state *state = to_state(sd);
+	int err;
+
+	err = cec_register_adapter(state->cec_adap);
+	if (err)
+		cec_delete_adapter(state->cec_adap);
+	return err;
+}
+
+static void adv7842_unregistered(struct v4l2_subdev *sd)
+{
+	struct adv7842_state *state = to_state(sd);
+
+	cec_unregister_adapter(state->cec_adap);
+}
+
 /* ----------------------------------------------------------------------- */
 
 static const struct v4l2_ctrl_ops adv7842_ctrl_ops = {
@@ -3077,6 +3311,11 @@ static const struct v4l2_subdev_ops adv7842_ops = {
 	.pad = &adv7842_pad_ops,
 };
 
+static const struct v4l2_subdev_internal_ops adv7842_int_ops = {
+	.registered = adv7842_registered,
+	.unregistered = adv7842_unregistered,
+};
+
 /* -------------------------- custom ctrls ---------------------------------- */
 
 static const struct v4l2_ctrl_config adv7842_ctrl_analog_sampling_phase = {
@@ -3241,6 +3480,7 @@ static int adv7842_probe(struct i2c_client *client,
 	sd = &state->sd;
 	v4l2_i2c_subdev_init(sd, client, &adv7842_ops);
 	sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS;
+	sd->internal_ops = &adv7842_int_ops;
 	state->mode = pdata->mode;
 
 	state->hdmi_port_a = pdata->input == ADV7842_SELECT_HDMI_PORT_A;
@@ -3311,13 +3551,6 @@ static int adv7842_probe(struct i2c_client *client,
 		goto err_i2c;
 	}
 
-	/* work queues */
-	state->work_queues = create_singlethread_workqueue(client->name);
-	if (!state->work_queues) {
-		v4l2_err(sd, "Could not create work queue\n");
-		err = -ENOMEM;
-		goto err_i2c;
-	}
 
 	INIT_DELAYED_WORK(&state->delayed_work_enable_hotplug,
 			adv7842_delayed_work_enable_hotplug);
@@ -3331,6 +3564,17 @@ static int adv7842_probe(struct i2c_client *client,
 	if (err)
 		goto err_entity;
 
+#if IS_ENABLED(CONFIG_VIDEO_ADV7842_CEC)
+	state->cec_adap = cec_allocate_adapter(&adv7842_cec_adap_ops,
+		state, dev_name(&client->dev),
+		CEC_CAP_TRANSMIT | CEC_CAP_LOG_ADDRS |
+		CEC_CAP_PASSTHROUGH | CEC_CAP_RC, ADV7842_MAX_ADDRS,
+		&client->dev);
+	err = PTR_ERR_OR_ZERO(state->cec_adap);
+	if (err)
+		goto err_entity;
+#endif
+
 	v4l2_info(sd, "%s found @ 0x%x (%s)\n", client->name,
 		  client->addr << 1, client->adapter->name);
 	return 0;
@@ -3339,7 +3583,6 @@ err_entity:
 	media_entity_cleanup(&sd->entity);
 err_work_queues:
 	cancel_delayed_work(&state->delayed_work_enable_hotplug);
-	destroy_workqueue(state->work_queues);
 err_i2c:
 	adv7842_unregister_clients(sd);
 err_hdl:
@@ -3355,9 +3598,7 @@ static int adv7842_remove(struct i2c_client *client)
 	struct adv7842_state *state = to_state(sd);
 
 	adv7842_irq_enable(sd, false);
-
 	cancel_delayed_work(&state->delayed_work_enable_hotplug);
-	destroy_workqueue(state->work_queues);
 	v4l2_device_unregister_subdev(sd);
 	media_entity_cleanup(&sd->entity);
 	adv7842_unregister_clients(sd);
diff --git a/drivers/media/i2c/cs53l32a.c b/drivers/media/i2c/cs53l32a.c
index b7e87e38642a..e4b3cf49dd38 100644
--- a/drivers/media/i2c/cs53l32a.c
+++ b/drivers/media/i2c/cs53l32a.c
@@ -121,13 +121,6 @@ static const struct v4l2_ctrl_ops cs53l32a_ctrl_ops = {
 
 static const struct v4l2_subdev_core_ops cs53l32a_core_ops = {
 	.log_status = cs53l32a_log_status,
-	.g_ext_ctrls = v4l2_subdev_g_ext_ctrls,
-	.try_ext_ctrls = v4l2_subdev_try_ext_ctrls,
-	.s_ext_ctrls = v4l2_subdev_s_ext_ctrls,
-	.g_ctrl = v4l2_subdev_g_ctrl,
-	.s_ctrl = v4l2_subdev_s_ctrl,
-	.queryctrl = v4l2_subdev_queryctrl,
-	.querymenu = v4l2_subdev_querymenu,
 };
 
 static const struct v4l2_subdev_audio_ops cs53l32a_audio_ops = {
diff --git a/drivers/media/i2c/cx25840/cx25840-core.c b/drivers/media/i2c/cx25840/cx25840-core.c
index 07a3e7173144..142ae28803bb 100644
--- a/drivers/media/i2c/cx25840/cx25840-core.c
+++ b/drivers/media/i2c/cx25840/cx25840-core.c
@@ -5042,13 +5042,6 @@ static const struct v4l2_ctrl_ops cx25840_ctrl_ops = {
 
 static const struct v4l2_subdev_core_ops cx25840_core_ops = {
 	.log_status = cx25840_log_status,
-	.g_ctrl = v4l2_subdev_g_ctrl,
-	.s_ctrl = v4l2_subdev_s_ctrl,
-	.s_ext_ctrls = v4l2_subdev_s_ext_ctrls,
-	.try_ext_ctrls = v4l2_subdev_try_ext_ctrls,
-	.g_ext_ctrls = v4l2_subdev_g_ext_ctrls,
-	.queryctrl = v4l2_subdev_queryctrl,
-	.querymenu = v4l2_subdev_querymenu,
 	.reset = cx25840_reset,
 	.load_fw = cx25840_load_fw,
 	.s_io_pin_config = common_s_io_pin_config,
diff --git a/drivers/media/i2c/msp3400-driver.c b/drivers/media/i2c/msp3400-driver.c
index e016626ebf89..503b7c4f0a9b 100644
--- a/drivers/media/i2c/msp3400-driver.c
+++ b/drivers/media/i2c/msp3400-driver.c
@@ -642,13 +642,6 @@ static const struct v4l2_ctrl_ops msp_ctrl_ops = {
 
 static const struct v4l2_subdev_core_ops msp_core_ops = {
 	.log_status = msp_log_status,
-	.g_ext_ctrls = v4l2_subdev_g_ext_ctrls,
-	.try_ext_ctrls = v4l2_subdev_try_ext_ctrls,
-	.s_ext_ctrls = v4l2_subdev_s_ext_ctrls,
-	.g_ctrl = v4l2_subdev_g_ctrl,
-	.s_ctrl = v4l2_subdev_s_ctrl,
-	.queryctrl = v4l2_subdev_queryctrl,
-	.querymenu = v4l2_subdev_querymenu,
 };
 
 static const struct v4l2_subdev_video_ops msp_video_ops = {
diff --git a/drivers/media/i2c/mt9t001.c b/drivers/media/i2c/mt9t001.c
index 702d562f8e39..842017fa4aab 100644
--- a/drivers/media/i2c/mt9t001.c
+++ b/drivers/media/i2c/mt9t001.c
@@ -233,10 +233,21 @@ static int __mt9t001_set_power(struct mt9t001 *mt9t001, bool on)
 	ret = mt9t001_reset(mt9t001);
 	if (ret < 0) {
 		dev_err(&client->dev, "Failed to reset the camera\n");
-		return ret;
+		goto e_power;
 	}
 
-	return v4l2_ctrl_handler_setup(&mt9t001->ctrls);
+	ret = v4l2_ctrl_handler_setup(&mt9t001->ctrls);
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to set up control handlers\n");
+		goto e_power;
+	}
+
+	return 0;
+
+e_power:
+	mt9t001_power_off(mt9t001);
+
+	return ret;
 }
 
 /* -----------------------------------------------------------------------------
@@ -834,7 +845,7 @@ static struct v4l2_subdev_ops mt9t001_subdev_ops = {
 	.pad = &mt9t001_subdev_pad_ops,
 };
 
-static struct v4l2_subdev_internal_ops mt9t001_subdev_internal_ops = {
+static const struct v4l2_subdev_internal_ops mt9t001_subdev_internal_ops = {
 	.registered = mt9t001_registered,
 	.open = mt9t001_open,
 	.close = mt9t001_close,
diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c
index 501b37039449..58eb62f1ba21 100644
--- a/drivers/media/i2c/mt9v032.c
+++ b/drivers/media/i2c/mt9v032.c
@@ -19,7 +19,6 @@
 #include <linux/log2.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/videodev2.h>
@@ -133,9 +132,16 @@
 #define		MT9V032_TEST_PATTERN_GRAY_DIAGONAL	(3 << 11)
 #define		MT9V032_TEST_PATTERN_ENABLE		(1 << 13)
 #define		MT9V032_TEST_PATTERN_FLIP		(1 << 14)
+#define MT9V032_AEGC_DESIRED_BIN			0xa5
+#define MT9V032_AEC_UPDATE_FREQUENCY			0xa6
+#define MT9V032_AEC_LPF					0xa8
+#define MT9V032_AGC_UPDATE_FREQUENCY			0xa9
+#define MT9V032_AGC_LPF					0xaa
 #define MT9V032_AEC_AGC_ENABLE				0xaf
 #define		MT9V032_AEC_ENABLE			(1 << 0)
 #define		MT9V032_AGC_ENABLE			(1 << 1)
+#define MT9V034_AEC_MAX_SHUTTER_WIDTH			0xad
+#define MT9V032_AEC_MAX_SHUTTER_WIDTH			0xbd
 #define MT9V032_THERMAL_INFO				0xc1
 
 enum mt9v032_model {
@@ -162,6 +168,8 @@ struct mt9v032_model_data {
 	unsigned int min_shutter;
 	unsigned int max_shutter;
 	unsigned int pclk_reg;
+	unsigned int aec_max_shutter_reg;
+	const struct v4l2_ctrl_config * const aec_max_shutter_v4l2_ctrl;
 };
 
 struct mt9v032_model_info {
@@ -175,63 +183,6 @@ static const struct mt9v032_model_version mt9v032_versions[] = {
 	{ MT9V034_CHIP_ID_REV1, "MT9V024/MT9V034 rev1" },
 };
 
-static const struct mt9v032_model_data mt9v032_model_data[] = {
-	{
-		/* MT9V022, MT9V032 revisions 1/2/3 */
-		.min_row_time = 660,
-		.min_hblank = MT9V032_HORIZONTAL_BLANKING_MIN,
-		.min_vblank = MT9V032_VERTICAL_BLANKING_MIN,
-		.max_vblank = MT9V032_VERTICAL_BLANKING_MAX,
-		.min_shutter = MT9V032_TOTAL_SHUTTER_WIDTH_MIN,
-		.max_shutter = MT9V032_TOTAL_SHUTTER_WIDTH_MAX,
-		.pclk_reg = MT9V032_PIXEL_CLOCK,
-	}, {
-		/* MT9V024, MT9V034 */
-		.min_row_time = 690,
-		.min_hblank = MT9V034_HORIZONTAL_BLANKING_MIN,
-		.min_vblank = MT9V034_VERTICAL_BLANKING_MIN,
-		.max_vblank = MT9V034_VERTICAL_BLANKING_MAX,
-		.min_shutter = MT9V034_TOTAL_SHUTTER_WIDTH_MIN,
-		.max_shutter = MT9V034_TOTAL_SHUTTER_WIDTH_MAX,
-		.pclk_reg = MT9V034_PIXEL_CLOCK,
-	},
-};
-
-static const struct mt9v032_model_info mt9v032_models[] = {
-	[MT9V032_MODEL_V022_COLOR] = {
-		.data = &mt9v032_model_data[0],
-		.color = true,
-	},
-	[MT9V032_MODEL_V022_MONO] = {
-		.data = &mt9v032_model_data[0],
-		.color = false,
-	},
-	[MT9V032_MODEL_V024_COLOR] = {
-		.data = &mt9v032_model_data[1],
-		.color = true,
-	},
-	[MT9V032_MODEL_V024_MONO] = {
-		.data = &mt9v032_model_data[1],
-		.color = false,
-	},
-	[MT9V032_MODEL_V032_COLOR] = {
-		.data = &mt9v032_model_data[0],
-		.color = true,
-	},
-	[MT9V032_MODEL_V032_MONO] = {
-		.data = &mt9v032_model_data[0],
-		.color = false,
-	},
-	[MT9V032_MODEL_V034_COLOR] = {
-		.data = &mt9v032_model_data[1],
-		.color = true,
-	},
-	[MT9V032_MODEL_V034_MONO] = {
-		.data = &mt9v032_model_data[1],
-		.color = false,
-	},
-};
-
 struct mt9v032 {
 	struct v4l2_subdev subdev;
 	struct media_pad pad;
@@ -349,7 +300,8 @@ static int mt9v032_power_on(struct mt9v032 *mt9v032)
 	if (ret < 0)
 		return ret;
 
-	return regmap_write(map, MT9V032_CHIP_CONTROL, 0);
+	return regmap_write(map, MT9V032_CHIP_CONTROL,
+			    MT9V032_CHIP_CONTROL_MASTER_MODE);
 }
 
 static void mt9v032_power_off(struct mt9v032 *mt9v032)
@@ -421,8 +373,7 @@ __mt9v032_get_pad_crop(struct mt9v032 *mt9v032, struct v4l2_subdev_pad_config *c
 
 static int mt9v032_s_stream(struct v4l2_subdev *subdev, int enable)
 {
-	const u16 mode = MT9V032_CHIP_CONTROL_MASTER_MODE
-		       | MT9V032_CHIP_CONTROL_DOUT_ENABLE
+	const u16 mode = MT9V032_CHIP_CONTROL_DOUT_ENABLE
 		       | MT9V032_CHIP_CONTROL_SEQUENTIAL;
 	struct mt9v032 *mt9v032 = to_mt9v032(subdev);
 	struct v4l2_rect *crop = &mt9v032->crop;
@@ -647,6 +598,34 @@ static int mt9v032_set_selection(struct v4l2_subdev *subdev,
  */
 
 #define V4L2_CID_TEST_PATTERN_COLOR	(V4L2_CID_USER_BASE | 0x1001)
+/*
+ * Value between 1 and 64 to set the desired bin. This is effectively a measure
+ * of how bright the image is supposed to be. Both AGC and AEC try to reach
+ * this.
+ */
+#define V4L2_CID_AEGC_DESIRED_BIN	(V4L2_CID_USER_BASE | 0x1002)
+/*
+ * LPF is the low pass filter capability of the chip. Both AEC and AGC have
+ * this setting. This limits the speed in which AGC/AEC adjust their settings.
+ * Possible values are 0-2. 0 means no LPF. For 1 and 2 this equation is used:
+ *
+ * if |(calculated new exp - current exp)| > (current exp / 4)
+ *	next exp = calculated new exp
+ * else
+ *	next exp = current exp + ((calculated new exp - current exp) / 2^LPF)
+ */
+#define V4L2_CID_AEC_LPF		(V4L2_CID_USER_BASE | 0x1003)
+#define V4L2_CID_AGC_LPF		(V4L2_CID_USER_BASE | 0x1004)
+/*
+ * Value between 0 and 15. This is the number of frames being skipped before
+ * updating the auto exposure/gain.
+ */
+#define V4L2_CID_AEC_UPDATE_INTERVAL	(V4L2_CID_USER_BASE | 0x1005)
+#define V4L2_CID_AGC_UPDATE_INTERVAL	(V4L2_CID_USER_BASE | 0x1006)
+/*
+ * Maximum shutter width used for AEC.
+ */
+#define V4L2_CID_AEC_MAX_SHUTTER_WIDTH	(V4L2_CID_USER_BASE | 0x1007)
 
 static int mt9v032_s_ctrl(struct v4l2_ctrl *ctrl)
 {
@@ -716,6 +695,28 @@ static int mt9v032_s_ctrl(struct v4l2_ctrl *ctrl)
 			break;
 		}
 		return regmap_write(map, MT9V032_TEST_PATTERN, data);
+
+	case V4L2_CID_AEGC_DESIRED_BIN:
+		return regmap_write(map, MT9V032_AEGC_DESIRED_BIN, ctrl->val);
+
+	case V4L2_CID_AEC_LPF:
+		return regmap_write(map, MT9V032_AEC_LPF, ctrl->val);
+
+	case V4L2_CID_AGC_LPF:
+		return regmap_write(map, MT9V032_AGC_LPF, ctrl->val);
+
+	case V4L2_CID_AEC_UPDATE_INTERVAL:
+		return regmap_write(map, MT9V032_AEC_UPDATE_FREQUENCY,
+				    ctrl->val);
+
+	case V4L2_CID_AGC_UPDATE_INTERVAL:
+		return regmap_write(map, MT9V032_AGC_UPDATE_FREQUENCY,
+				    ctrl->val);
+
+	case V4L2_CID_AEC_MAX_SHUTTER_WIDTH:
+		return regmap_write(map,
+				    mt9v032->model->data->aec_max_shutter_reg,
+				    ctrl->val);
 	}
 
 	return 0;
@@ -745,6 +746,84 @@ static const struct v4l2_ctrl_config mt9v032_test_pattern_color = {
 	.flags		= 0,
 };
 
+static const struct v4l2_ctrl_config mt9v032_aegc_controls[] = {
+	{
+		.ops		= &mt9v032_ctrl_ops,
+		.id		= V4L2_CID_AEGC_DESIRED_BIN,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "AEC/AGC Desired Bin",
+		.min		= 1,
+		.max		= 64,
+		.step		= 1,
+		.def		= 58,
+		.flags		= 0,
+	}, {
+		.ops		= &mt9v032_ctrl_ops,
+		.id		= V4L2_CID_AEC_LPF,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "AEC Low Pass Filter",
+		.min		= 0,
+		.max		= 2,
+		.step		= 1,
+		.def		= 0,
+		.flags		= 0,
+	}, {
+		.ops		= &mt9v032_ctrl_ops,
+		.id		= V4L2_CID_AGC_LPF,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "AGC Low Pass Filter",
+		.min		= 0,
+		.max		= 2,
+		.step		= 1,
+		.def		= 2,
+		.flags		= 0,
+	}, {
+		.ops		= &mt9v032_ctrl_ops,
+		.id		= V4L2_CID_AEC_UPDATE_INTERVAL,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "AEC Update Interval",
+		.min		= 0,
+		.max		= 16,
+		.step		= 1,
+		.def		= 2,
+		.flags		= 0,
+	}, {
+		.ops		= &mt9v032_ctrl_ops,
+		.id		= V4L2_CID_AGC_UPDATE_INTERVAL,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "AGC Update Interval",
+		.min		= 0,
+		.max		= 16,
+		.step		= 1,
+		.def		= 2,
+		.flags		= 0,
+	}
+};
+
+static const struct v4l2_ctrl_config mt9v032_aec_max_shutter_width = {
+	.ops		= &mt9v032_ctrl_ops,
+	.id		= V4L2_CID_AEC_MAX_SHUTTER_WIDTH,
+	.type		= V4L2_CTRL_TYPE_INTEGER,
+	.name		= "AEC Max Shutter Width",
+	.min		= 1,
+	.max		= 2047,
+	.step		= 1,
+	.def		= 480,
+	.flags		= 0,
+};
+
+static const struct v4l2_ctrl_config mt9v034_aec_max_shutter_width = {
+	.ops		= &mt9v032_ctrl_ops,
+	.id		= V4L2_CID_AEC_MAX_SHUTTER_WIDTH,
+	.type		= V4L2_CTRL_TYPE_INTEGER,
+	.name		= "AEC Max Shutter Width",
+	.min		= 1,
+	.max		= 32765,
+	.step		= 1,
+	.def		= 480,
+	.flags		= 0,
+};
+
 /* -----------------------------------------------------------------------------
  * V4L2 subdev core operations
  */
@@ -953,13 +1032,6 @@ static int mt9v032_probe(struct i2c_client *client,
 	unsigned int i;
 	int ret;
 
-	if (!i2c_check_functionality(client->adapter,
-				     I2C_FUNC_SMBUS_WORD_DATA)) {
-		dev_warn(&client->adapter->dev,
-			 "I2C-Adapter doesn't support I2C_FUNC_SMBUS_WORD\n");
-		return -EIO;
-	}
-
 	mt9v032 = devm_kzalloc(&client->dev, sizeof(*mt9v032), GFP_KERNEL);
 	if (!mt9v032)
 		return -ENOMEM;
@@ -986,7 +1058,8 @@ static int mt9v032_probe(struct i2c_client *client,
 	mt9v032->pdata = pdata;
 	mt9v032->model = (const void *)did->driver_data;
 
-	v4l2_ctrl_handler_init(&mt9v032->ctrls, 10);
+	v4l2_ctrl_handler_init(&mt9v032->ctrls, 11 +
+			       ARRAY_SIZE(mt9v032_aegc_controls));
 
 	v4l2_ctrl_new_std(&mt9v032->ctrls, &mt9v032_ctrl_ops,
 			  V4L2_CID_AUTOGAIN, 0, 1, 1, 1);
@@ -1015,6 +1088,13 @@ static int mt9v032_probe(struct i2c_client *client,
 	mt9v032->test_pattern_color = v4l2_ctrl_new_custom(&mt9v032->ctrls,
 				      &mt9v032_test_pattern_color, NULL);
 
+	v4l2_ctrl_new_custom(&mt9v032->ctrls,
+			     mt9v032->model->data->aec_max_shutter_v4l2_ctrl,
+			     NULL);
+	for (i = 0; i < ARRAY_SIZE(mt9v032_aegc_controls); ++i)
+		v4l2_ctrl_new_custom(&mt9v032->ctrls, &mt9v032_aegc_controls[i],
+				     NULL);
+
 	v4l2_ctrl_cluster(2, &mt9v032->test_pattern);
 
 	mt9v032->pixel_rate =
@@ -1103,6 +1183,67 @@ static int mt9v032_remove(struct i2c_client *client)
 	return 0;
 }
 
+static const struct mt9v032_model_data mt9v032_model_data[] = {
+	{
+		/* MT9V022, MT9V032 revisions 1/2/3 */
+		.min_row_time = 660,
+		.min_hblank = MT9V032_HORIZONTAL_BLANKING_MIN,
+		.min_vblank = MT9V032_VERTICAL_BLANKING_MIN,
+		.max_vblank = MT9V032_VERTICAL_BLANKING_MAX,
+		.min_shutter = MT9V032_TOTAL_SHUTTER_WIDTH_MIN,
+		.max_shutter = MT9V032_TOTAL_SHUTTER_WIDTH_MAX,
+		.pclk_reg = MT9V032_PIXEL_CLOCK,
+		.aec_max_shutter_reg = MT9V032_AEC_MAX_SHUTTER_WIDTH,
+		.aec_max_shutter_v4l2_ctrl = &mt9v032_aec_max_shutter_width,
+	}, {
+		/* MT9V024, MT9V034 */
+		.min_row_time = 690,
+		.min_hblank = MT9V034_HORIZONTAL_BLANKING_MIN,
+		.min_vblank = MT9V034_VERTICAL_BLANKING_MIN,
+		.max_vblank = MT9V034_VERTICAL_BLANKING_MAX,
+		.min_shutter = MT9V034_TOTAL_SHUTTER_WIDTH_MIN,
+		.max_shutter = MT9V034_TOTAL_SHUTTER_WIDTH_MAX,
+		.pclk_reg = MT9V034_PIXEL_CLOCK,
+		.aec_max_shutter_reg = MT9V034_AEC_MAX_SHUTTER_WIDTH,
+		.aec_max_shutter_v4l2_ctrl = &mt9v034_aec_max_shutter_width,
+	},
+};
+
+static const struct mt9v032_model_info mt9v032_models[] = {
+	[MT9V032_MODEL_V022_COLOR] = {
+		.data = &mt9v032_model_data[0],
+		.color = true,
+	},
+	[MT9V032_MODEL_V022_MONO] = {
+		.data = &mt9v032_model_data[0],
+		.color = false,
+	},
+	[MT9V032_MODEL_V024_COLOR] = {
+		.data = &mt9v032_model_data[1],
+		.color = true,
+	},
+	[MT9V032_MODEL_V024_MONO] = {
+		.data = &mt9v032_model_data[1],
+		.color = false,
+	},
+	[MT9V032_MODEL_V032_COLOR] = {
+		.data = &mt9v032_model_data[0],
+		.color = true,
+	},
+	[MT9V032_MODEL_V032_MONO] = {
+		.data = &mt9v032_model_data[0],
+		.color = false,
+	},
+	[MT9V032_MODEL_V034_COLOR] = {
+		.data = &mt9v032_model_data[1],
+		.color = true,
+	},
+	[MT9V032_MODEL_V034_MONO] = {
+		.data = &mt9v032_model_data[1],
+		.color = false,
+	},
+};
+
 static const struct i2c_device_id mt9v032_id[] = {
 	{ "mt9v022", (kernel_ulong_t)&mt9v032_models[MT9V032_MODEL_V022_COLOR] },
 	{ "mt9v022m", (kernel_ulong_t)&mt9v032_models[MT9V032_MODEL_V022_MONO] },
diff --git a/drivers/media/i2c/saa7115.c b/drivers/media/i2c/saa7115.c
index bd3526bdd539..58062b41c923 100644
--- a/drivers/media/i2c/saa7115.c
+++ b/drivers/media/i2c/saa7115.c
@@ -1585,13 +1585,6 @@ static const struct v4l2_ctrl_ops saa711x_ctrl_ops = {
 
 static const struct v4l2_subdev_core_ops saa711x_core_ops = {
 	.log_status = saa711x_log_status,
-	.g_ext_ctrls = v4l2_subdev_g_ext_ctrls,
-	.try_ext_ctrls = v4l2_subdev_try_ext_ctrls,
-	.s_ext_ctrls = v4l2_subdev_s_ext_ctrls,
-	.g_ctrl = v4l2_subdev_g_ctrl,
-	.s_ctrl = v4l2_subdev_s_ctrl,
-	.queryctrl = v4l2_subdev_queryctrl,
-	.querymenu = v4l2_subdev_querymenu,
 	.reset = saa711x_reset,
 	.s_gpio = saa711x_s_gpio,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c
index 3dfe387abf6e..d08ab6c8357c 100644
--- a/drivers/media/i2c/smiapp/smiapp-core.c
+++ b/drivers/media/i2c/smiapp/smiapp-core.c
@@ -3044,10 +3044,8 @@ static struct smiapp_platform_data *smiapp_get_pdata(struct device *dev)
 	pdata->op_sys_clock = devm_kcalloc(
 		dev, bus_cfg->nr_of_link_frequencies + 1 /* guardian */,
 		sizeof(*pdata->op_sys_clock), GFP_KERNEL);
-	if (!pdata->op_sys_clock) {
-		rval = -ENOMEM;
+	if (!pdata->op_sys_clock)
 		goto out_err;
-	}
 
 	for (i = 0; i < bus_cfg->nr_of_link_frequencies; i++) {
 		pdata->op_sys_clock[i] = bus_cfg->link_frequencies[i];
diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c
index 6cf6d06737a5..1e3a0dd2238c 100644
--- a/drivers/media/i2c/tc358743.c
+++ b/drivers/media/i2c/tc358743.c
@@ -89,8 +89,6 @@ struct tc358743_state {
 	struct v4l2_ctrl *audio_sampling_rate_ctrl;
 	struct v4l2_ctrl *audio_present_ctrl;
 
-	/* work queues */
-	struct workqueue_struct *work_queues;
 	struct delayed_work delayed_work_enable_hotplug;
 
 	/* edid  */
@@ -425,8 +423,7 @@ static void tc358743_enable_edid(struct v4l2_subdev *sd)
 
 	/* Enable hotplug after 100 ms. DDC access to EDID is also enabled when
 	 * hotplug is enabled. See register DDC_CTL */
-	queue_delayed_work(state->work_queues,
-			   &state->delayed_work_enable_hotplug, HZ / 10);
+	schedule_delayed_work(&state->delayed_work_enable_hotplug, HZ / 10);
 
 	tc358743_enable_interrupts(sd, true);
 	tc358743_s_ctrl_detect_tx_5v(sd);
@@ -1884,14 +1881,6 @@ static int tc358743_probe(struct i2c_client *client,
 		goto err_hdl;
 	}
 
-	/* work queues */
-	state->work_queues = create_singlethread_workqueue(client->name);
-	if (!state->work_queues) {
-		v4l2_err(sd, "Could not create work queue\n");
-		err = -ENOMEM;
-		goto err_hdl;
-	}
-
 	state->pad.flags = MEDIA_PAD_FL_SOURCE;
 	err = media_entity_pads_init(&sd->entity, 1, &state->pad);
 	if (err < 0)
@@ -1940,7 +1929,6 @@ static int tc358743_probe(struct i2c_client *client,
 
 err_work_queues:
 	cancel_delayed_work(&state->delayed_work_enable_hotplug);
-	destroy_workqueue(state->work_queues);
 	mutex_destroy(&state->confctl_mutex);
 err_hdl:
 	media_entity_cleanup(&sd->entity);
@@ -1954,7 +1942,6 @@ static int tc358743_remove(struct i2c_client *client)
 	struct tc358743_state *state = to_state(sd);
 
 	cancel_delayed_work(&state->delayed_work_enable_hotplug);
-	destroy_workqueue(state->work_queues);
 	v4l2_async_unregister_subdev(sd);
 	v4l2_device_unregister_subdev(sd);
 	mutex_destroy(&state->confctl_mutex);
diff --git a/drivers/media/i2c/tvaudio.c b/drivers/media/i2c/tvaudio.c
index fece2a4339a1..42d1e26e581c 100644
--- a/drivers/media/i2c/tvaudio.c
+++ b/drivers/media/i2c/tvaudio.c
@@ -1855,13 +1855,6 @@ static const struct v4l2_ctrl_ops tvaudio_ctrl_ops = {
 
 static const struct v4l2_subdev_core_ops tvaudio_core_ops = {
 	.log_status = tvaudio_log_status,
-	.g_ext_ctrls = v4l2_subdev_g_ext_ctrls,
-	.try_ext_ctrls = v4l2_subdev_try_ext_ctrls,
-	.s_ext_ctrls = v4l2_subdev_s_ext_ctrls,
-	.g_ctrl = v4l2_subdev_g_ctrl,
-	.s_ctrl = v4l2_subdev_s_ctrl,
-	.queryctrl = v4l2_subdev_queryctrl,
-	.querymenu = v4l2_subdev_querymenu,
 };
 
 static const struct v4l2_subdev_tuner_ops tvaudio_tuner_ops = {
diff --git a/drivers/media/i2c/wm8775.c b/drivers/media/i2c/wm8775.c
index 6e00f145b948..5581f4db02af 100644
--- a/drivers/media/i2c/wm8775.c
+++ b/drivers/media/i2c/wm8775.c
@@ -178,13 +178,6 @@ static const struct v4l2_ctrl_ops wm8775_ctrl_ops = {
 
 static const struct v4l2_subdev_core_ops wm8775_core_ops = {
 	.log_status = wm8775_log_status,
-	.g_ext_ctrls = v4l2_subdev_g_ext_ctrls,
-	.try_ext_ctrls = v4l2_subdev_try_ext_ctrls,
-	.s_ext_ctrls = v4l2_subdev_s_ext_ctrls,
-	.g_ctrl = v4l2_subdev_g_ctrl,
-	.s_ctrl = v4l2_subdev_s_ctrl,
-	.queryctrl = v4l2_subdev_queryctrl,
-	.querymenu = v4l2_subdev_querymenu,
 };
 
 static const struct v4l2_subdev_tuner_ops wm8775_tuner_ops = {
diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index a1cd50f331f1..1795abeda658 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -423,7 +423,7 @@ static long media_device_ioctl(struct file *filp, unsigned int cmd,
 			       unsigned long arg)
 {
 	struct media_devnode *devnode = media_devnode_data(filp);
-	struct media_device *dev = to_media_device(devnode);
+	struct media_device *dev = devnode->media_dev;
 	long ret;
 
 	mutex_lock(&dev->graph_mutex);
@@ -495,7 +495,7 @@ static long media_device_compat_ioctl(struct file *filp, unsigned int cmd,
 				      unsigned long arg)
 {
 	struct media_devnode *devnode = media_devnode_data(filp);
-	struct media_device *dev = to_media_device(devnode);
+	struct media_device *dev = devnode->media_dev;
 	long ret;
 
 	switch (cmd) {
@@ -531,7 +531,8 @@ static const struct media_file_operations media_device_fops = {
 static ssize_t show_model(struct device *cd,
 			  struct device_attribute *attr, char *buf)
 {
-	struct media_device *mdev = to_media_device(to_media_devnode(cd));
+	struct media_devnode *devnode = to_media_devnode(cd);
+	struct media_device *mdev = devnode->media_dev;
 
 	return sprintf(buf, "%.*s\n", (int)sizeof(mdev->model), mdev->model);
 }
@@ -704,23 +705,35 @@ EXPORT_SYMBOL_GPL(media_device_cleanup);
 int __must_check __media_device_register(struct media_device *mdev,
 					 struct module *owner)
 {
+	struct media_devnode *devnode;
 	int ret;
 
+	devnode = kzalloc(sizeof(*devnode), GFP_KERNEL);
+	if (!devnode)
+		return -ENOMEM;
+
 	/* Register the device node. */
-	mdev->devnode.fops = &media_device_fops;
-	mdev->devnode.parent = mdev->dev;
-	mdev->devnode.release = media_device_release;
+	mdev->devnode = devnode;
+	devnode->fops = &media_device_fops;
+	devnode->parent = mdev->dev;
+	devnode->release = media_device_release;
 
 	/* Set version 0 to indicate user-space that the graph is static */
 	mdev->topology_version = 0;
 
-	ret = media_devnode_register(&mdev->devnode, owner);
-	if (ret < 0)
+	ret = media_devnode_register(mdev, devnode, owner);
+	if (ret < 0) {
+		/* devnode free is handled in media_devnode_*() */
+		mdev->devnode = NULL;
 		return ret;
+	}
 
-	ret = device_create_file(&mdev->devnode.dev, &dev_attr_model);
+	ret = device_create_file(&devnode->dev, &dev_attr_model);
 	if (ret < 0) {
-		media_devnode_unregister(&mdev->devnode);
+		/* devnode free is handled in media_devnode_*() */
+		mdev->devnode = NULL;
+		media_devnode_unregister_prepare(devnode);
+		media_devnode_unregister(devnode);
 		return ret;
 	}
 
@@ -771,11 +784,14 @@ void media_device_unregister(struct media_device *mdev)
 	mutex_lock(&mdev->graph_mutex);
 
 	/* Check if mdev was ever registered at all */
-	if (!media_devnode_is_registered(&mdev->devnode)) {
+	if (!media_devnode_is_registered(mdev->devnode)) {
 		mutex_unlock(&mdev->graph_mutex);
 		return;
 	}
 
+	/* Clear the devnode register bit to avoid races with media dev open */
+	media_devnode_unregister_prepare(mdev->devnode);
+
 	/* Remove all entities from the media device */
 	list_for_each_entry_safe(entity, next, &mdev->entities, graph_obj.list)
 		__media_device_unregister_entity(entity);
@@ -794,9 +810,12 @@ void media_device_unregister(struct media_device *mdev)
 
 	mutex_unlock(&mdev->graph_mutex);
 
-	device_remove_file(&mdev->devnode.dev, &dev_attr_model);
-	dev_dbg(mdev->dev, "Media device unregistering\n");
-	media_devnode_unregister(&mdev->devnode);
+	dev_dbg(mdev->dev, "Media device unregistered\n");
+
+	device_remove_file(&mdev->devnode->dev, &dev_attr_model);
+	media_devnode_unregister(mdev->devnode);
+	/* devnode free is handled in media_devnode_*() */
+	mdev->devnode = NULL;
 }
 EXPORT_SYMBOL_GPL(media_device_unregister);
 
diff --git a/drivers/media/media-devnode.c b/drivers/media/media-devnode.c
index b66dc9d0766b..f2772ba6f611 100644
--- a/drivers/media/media-devnode.c
+++ b/drivers/media/media-devnode.c
@@ -44,6 +44,7 @@
 #include <linux/uaccess.h>
 
 #include <media/media-devnode.h>
+#include <media/media-device.h>
 
 #define MEDIA_NUM_DEVICES	256
 #define MEDIA_NAME		"media"
@@ -59,21 +60,19 @@ static DECLARE_BITMAP(media_devnode_nums, MEDIA_NUM_DEVICES);
 /* Called when the last user of the media device exits. */
 static void media_devnode_release(struct device *cd)
 {
-	struct media_devnode *mdev = to_media_devnode(cd);
+	struct media_devnode *devnode = to_media_devnode(cd);
 
 	mutex_lock(&media_devnode_lock);
-
-	/* Delete the cdev on this minor as well */
-	cdev_del(&mdev->cdev);
-
 	/* Mark device node number as free */
-	clear_bit(mdev->minor, media_devnode_nums);
-
+	clear_bit(devnode->minor, media_devnode_nums);
 	mutex_unlock(&media_devnode_lock);
 
 	/* Release media_devnode and perform other cleanups as needed. */
-	if (mdev->release)
-		mdev->release(mdev);
+	if (devnode->release)
+		devnode->release(devnode);
+
+	kfree(devnode);
+	pr_debug("%s: Media Devnode Deallocated\n", __func__);
 }
 
 static struct bus_type media_bus_type = {
@@ -83,37 +82,37 @@ static struct bus_type media_bus_type = {
 static ssize_t media_read(struct file *filp, char __user *buf,
 		size_t sz, loff_t *off)
 {
-	struct media_devnode *mdev = media_devnode_data(filp);
+	struct media_devnode *devnode = media_devnode_data(filp);
 
-	if (!mdev->fops->read)
+	if (!devnode->fops->read)
 		return -EINVAL;
-	if (!media_devnode_is_registered(mdev))
+	if (!media_devnode_is_registered(devnode))
 		return -EIO;
-	return mdev->fops->read(filp, buf, sz, off);
+	return devnode->fops->read(filp, buf, sz, off);
 }
 
 static ssize_t media_write(struct file *filp, const char __user *buf,
 		size_t sz, loff_t *off)
 {
-	struct media_devnode *mdev = media_devnode_data(filp);
+	struct media_devnode *devnode = media_devnode_data(filp);
 
-	if (!mdev->fops->write)
+	if (!devnode->fops->write)
 		return -EINVAL;
-	if (!media_devnode_is_registered(mdev))
+	if (!media_devnode_is_registered(devnode))
 		return -EIO;
-	return mdev->fops->write(filp, buf, sz, off);
+	return devnode->fops->write(filp, buf, sz, off);
 }
 
 static unsigned int media_poll(struct file *filp,
 			       struct poll_table_struct *poll)
 {
-	struct media_devnode *mdev = media_devnode_data(filp);
+	struct media_devnode *devnode = media_devnode_data(filp);
 
-	if (!media_devnode_is_registered(mdev))
+	if (!media_devnode_is_registered(devnode))
 		return POLLERR | POLLHUP;
-	if (!mdev->fops->poll)
+	if (!devnode->fops->poll)
 		return DEFAULT_POLLMASK;
-	return mdev->fops->poll(filp, poll);
+	return devnode->fops->poll(filp, poll);
 }
 
 static long
@@ -121,12 +120,12 @@ __media_ioctl(struct file *filp, unsigned int cmd, unsigned long arg,
 	      long (*ioctl_func)(struct file *filp, unsigned int cmd,
 				 unsigned long arg))
 {
-	struct media_devnode *mdev = media_devnode_data(filp);
+	struct media_devnode *devnode = media_devnode_data(filp);
 
 	if (!ioctl_func)
 		return -ENOTTY;
 
-	if (!media_devnode_is_registered(mdev))
+	if (!media_devnode_is_registered(devnode))
 		return -EIO;
 
 	return ioctl_func(filp, cmd, arg);
@@ -134,9 +133,9 @@ __media_ioctl(struct file *filp, unsigned int cmd, unsigned long arg,
 
 static long media_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct media_devnode *mdev = media_devnode_data(filp);
+	struct media_devnode *devnode = media_devnode_data(filp);
 
-	return __media_ioctl(filp, cmd, arg, mdev->fops->ioctl);
+	return __media_ioctl(filp, cmd, arg, devnode->fops->ioctl);
 }
 
 #ifdef CONFIG_COMPAT
@@ -144,9 +143,9 @@ static long media_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 static long media_compat_ioctl(struct file *filp, unsigned int cmd,
 			       unsigned long arg)
 {
-	struct media_devnode *mdev = media_devnode_data(filp);
+	struct media_devnode *devnode = media_devnode_data(filp);
 
-	return __media_ioctl(filp, cmd, arg, mdev->fops->compat_ioctl);
+	return __media_ioctl(filp, cmd, arg, devnode->fops->compat_ioctl);
 }
 
 #endif /* CONFIG_COMPAT */
@@ -154,7 +153,7 @@ static long media_compat_ioctl(struct file *filp, unsigned int cmd,
 /* Override for the open function */
 static int media_open(struct inode *inode, struct file *filp)
 {
-	struct media_devnode *mdev;
+	struct media_devnode *devnode;
 	int ret;
 
 	/* Check if the media device is available. This needs to be done with
@@ -164,23 +163,23 @@ static int media_open(struct inode *inode, struct file *filp)
 	 * a crash.
 	 */
 	mutex_lock(&media_devnode_lock);
-	mdev = container_of(inode->i_cdev, struct media_devnode, cdev);
+	devnode = container_of(inode->i_cdev, struct media_devnode, cdev);
 	/* return ENXIO if the media device has been removed
 	   already or if it is not registered anymore. */
-	if (!media_devnode_is_registered(mdev)) {
+	if (!media_devnode_is_registered(devnode)) {
 		mutex_unlock(&media_devnode_lock);
 		return -ENXIO;
 	}
 	/* and increase the device refcount */
-	get_device(&mdev->dev);
+	get_device(&devnode->dev);
 	mutex_unlock(&media_devnode_lock);
 
-	filp->private_data = mdev;
+	filp->private_data = devnode;
 
-	if (mdev->fops->open) {
-		ret = mdev->fops->open(filp);
+	if (devnode->fops->open) {
+		ret = devnode->fops->open(filp);
 		if (ret) {
-			put_device(&mdev->dev);
+			put_device(&devnode->dev);
 			filp->private_data = NULL;
 			return ret;
 		}
@@ -192,16 +191,18 @@ static int media_open(struct inode *inode, struct file *filp)
 /* Override for the release function */
 static int media_release(struct inode *inode, struct file *filp)
 {
-	struct media_devnode *mdev = media_devnode_data(filp);
+	struct media_devnode *devnode = media_devnode_data(filp);
 
-	if (mdev->fops->release)
-		mdev->fops->release(filp);
+	if (devnode->fops->release)
+		devnode->fops->release(filp);
 
 	filp->private_data = NULL;
 
 	/* decrease the refcount unconditionally since the release()
 	   return value is ignored. */
-	put_device(&mdev->dev);
+	put_device(&devnode->dev);
+
+	pr_debug("%s: Media Release\n", __func__);
 	return 0;
 }
 
@@ -219,7 +220,8 @@ static const struct file_operations media_devnode_fops = {
 	.llseek = no_llseek,
 };
 
-int __must_check media_devnode_register(struct media_devnode *mdev,
+int __must_check media_devnode_register(struct media_device *mdev,
+					struct media_devnode *devnode,
 					struct module *owner)
 {
 	int minor;
@@ -231,61 +233,80 @@ int __must_check media_devnode_register(struct media_devnode *mdev,
 	if (minor == MEDIA_NUM_DEVICES) {
 		mutex_unlock(&media_devnode_lock);
 		pr_err("could not get a free minor\n");
+		kfree(devnode);
 		return -ENFILE;
 	}
 
 	set_bit(minor, media_devnode_nums);
 	mutex_unlock(&media_devnode_lock);
 
-	mdev->minor = minor;
+	devnode->minor = minor;
+	devnode->media_dev = mdev;
+
+	/* Part 1: Initialize dev now to use dev.kobj for cdev.kobj.parent */
+	devnode->dev.bus = &media_bus_type;
+	devnode->dev.devt = MKDEV(MAJOR(media_dev_t), devnode->minor);
+	devnode->dev.release = media_devnode_release;
+	if (devnode->parent)
+		devnode->dev.parent = devnode->parent;
+	dev_set_name(&devnode->dev, "media%d", devnode->minor);
+	device_initialize(&devnode->dev);
 
 	/* Part 2: Initialize and register the character device */
-	cdev_init(&mdev->cdev, &media_devnode_fops);
-	mdev->cdev.owner = owner;
+	cdev_init(&devnode->cdev, &media_devnode_fops);
+	devnode->cdev.owner = owner;
+	devnode->cdev.kobj.parent = &devnode->dev.kobj;
 
-	ret = cdev_add(&mdev->cdev, MKDEV(MAJOR(media_dev_t), mdev->minor), 1);
+	ret = cdev_add(&devnode->cdev, MKDEV(MAJOR(media_dev_t), devnode->minor), 1);
 	if (ret < 0) {
 		pr_err("%s: cdev_add failed\n", __func__);
-		goto error;
+		goto cdev_add_error;
 	}
 
-	/* Part 3: Register the media device */
-	mdev->dev.bus = &media_bus_type;
-	mdev->dev.devt = MKDEV(MAJOR(media_dev_t), mdev->minor);
-	mdev->dev.release = media_devnode_release;
-	if (mdev->parent)
-		mdev->dev.parent = mdev->parent;
-	dev_set_name(&mdev->dev, "media%d", mdev->minor);
-	ret = device_register(&mdev->dev);
+	/* Part 3: Add the media device */
+	ret = device_add(&devnode->dev);
 	if (ret < 0) {
-		pr_err("%s: device_register failed\n", __func__);
-		goto error;
+		pr_err("%s: device_add failed\n", __func__);
+		goto device_add_error;
 	}
 
 	/* Part 4: Activate this minor. The char device can now be used. */
-	set_bit(MEDIA_FLAG_REGISTERED, &mdev->flags);
+	set_bit(MEDIA_FLAG_REGISTERED, &devnode->flags);
 
 	return 0;
 
-error:
+device_add_error:
+	cdev_del(&devnode->cdev);
+cdev_add_error:
 	mutex_lock(&media_devnode_lock);
-	cdev_del(&mdev->cdev);
-	clear_bit(mdev->minor, media_devnode_nums);
+	clear_bit(devnode->minor, media_devnode_nums);
+	devnode->media_dev = NULL;
 	mutex_unlock(&media_devnode_lock);
 
+	put_device(&devnode->dev);
 	return ret;
 }
 
-void media_devnode_unregister(struct media_devnode *mdev)
+void media_devnode_unregister_prepare(struct media_devnode *devnode)
 {
-	/* Check if mdev was ever registered at all */
-	if (!media_devnode_is_registered(mdev))
+	/* Check if devnode was ever registered at all */
+	if (!media_devnode_is_registered(devnode))
 		return;
 
 	mutex_lock(&media_devnode_lock);
-	clear_bit(MEDIA_FLAG_REGISTERED, &mdev->flags);
+	clear_bit(MEDIA_FLAG_REGISTERED, &devnode->flags);
+	mutex_unlock(&media_devnode_lock);
+}
+
+void media_devnode_unregister(struct media_devnode *devnode)
+{
+	mutex_lock(&media_devnode_lock);
+	/* Delete the cdev on this minor as well */
+	cdev_del(&devnode->cdev);
 	mutex_unlock(&media_devnode_lock);
-	device_unregister(&mdev->dev);
+	device_del(&devnode->dev);
+	devnode->media_dev = NULL;
+	put_device(&devnode->dev);
 }
 
 /*
diff --git a/drivers/media/pci/bt8xx/dst_ca.c b/drivers/media/pci/bt8xx/dst_ca.c
index da8b414fd824..8681b9143a35 100644
--- a/drivers/media/pci/bt8xx/dst_ca.c
+++ b/drivers/media/pci/bt8xx/dst_ca.c
@@ -655,7 +655,6 @@ static long dst_ca_ioctl(struct file *file, unsigned int cmd, unsigned long ioct
 static int dst_ca_open(struct inode *inode, struct file *file)
 {
 	dprintk(verbose, DST_CA_DEBUG, 1, " Device opened [%p] ", file);
-	try_module_get(THIS_MODULE);
 
 	return 0;
 }
@@ -663,7 +662,6 @@ static int dst_ca_open(struct inode *inode, struct file *file)
 static int dst_ca_release(struct inode *inode, struct file *file)
 {
 	dprintk(verbose, DST_CA_DEBUG, 1, " Device closed.");
-	module_put(THIS_MODULE);
 
 	return 0;
 }
diff --git a/drivers/media/pci/cobalt/cobalt-driver.c b/drivers/media/pci/cobalt/cobalt-driver.c
index 8d6f04fc8013..476f7f0dcf81 100644
--- a/drivers/media/pci/cobalt/cobalt-driver.c
+++ b/drivers/media/pci/cobalt/cobalt-driver.c
@@ -492,7 +492,6 @@ static int cobalt_subdevs_init(struct cobalt *cobalt)
 		.ain_sel = ADV7604_AIN7_8_9_NC_SYNC_3_1,
 		.bus_order = ADV7604_BUS_ORDER_BRG,
 		.blank_data = 1,
-		.op_656_range = 1,
 		.op_format_mode_sel = ADV7604_OP_FORMAT_MODE0,
 		.int1_config = ADV76XX_INT1_CONFIG_ACTIVE_HIGH,
 		.dr_str_data = ADV76XX_DR_STR_HIGH,
@@ -571,7 +570,6 @@ static int cobalt_subdevs_hsma_init(struct cobalt *cobalt)
 		.bus_order = ADV7842_BUS_ORDER_RBG,
 		.op_format_mode_sel = ADV7842_OP_FORMAT_MODE0,
 		.blank_data = 1,
-		.op_656_range = 1,
 		.dr_str_data = 3,
 		.dr_str_clk = 3,
 		.dr_str_sync = 3,
@@ -691,17 +689,10 @@ static int cobalt_probe(struct pci_dev *pci_dev,
 	cobalt->pci_dev = pci_dev;
 	cobalt->instance = i;
 
-	cobalt->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
-	if (IS_ERR(cobalt->alloc_ctx)) {
-		kfree(cobalt);
-		return -ENOMEM;
-	}
-
 	retval = v4l2_device_register(&pci_dev->dev, &cobalt->v4l2_dev);
 	if (retval) {
 		pr_err("cobalt: v4l2_device_register of card %d failed\n",
 				cobalt->instance);
-		vb2_dma_sg_cleanup_ctx(cobalt->alloc_ctx);
 		kfree(cobalt);
 		return retval;
 	}
@@ -782,7 +773,6 @@ err:
 	cobalt_err("error %d on initialization\n", retval);
 
 	v4l2_device_unregister(&cobalt->v4l2_dev);
-	vb2_dma_sg_cleanup_ctx(cobalt->alloc_ctx);
 	kfree(cobalt);
 	return retval;
 }
@@ -818,7 +808,6 @@ static void cobalt_remove(struct pci_dev *pci_dev)
 	cobalt_info("removed cobalt card\n");
 
 	v4l2_device_unregister(v4l2_dev);
-	vb2_dma_sg_cleanup_ctx(cobalt->alloc_ctx);
 	kfree(cobalt);
 }
 
diff --git a/drivers/media/pci/cobalt/cobalt-driver.h b/drivers/media/pci/cobalt/cobalt-driver.h
index b2f08e4a68bf..ed00dc9d9399 100644
--- a/drivers/media/pci/cobalt/cobalt-driver.h
+++ b/drivers/media/pci/cobalt/cobalt-driver.h
@@ -262,7 +262,6 @@ struct cobalt {
 	int instance;
 	struct pci_dev *pci_dev;
 	struct v4l2_device v4l2_dev;
-	void *alloc_ctx;
 
 	void __iomem *bar0, *bar1;
 
diff --git a/drivers/media/pci/cobalt/cobalt-v4l2.c b/drivers/media/pci/cobalt/cobalt-v4l2.c
index c0ba458f6cf3..d05672fe9ff9 100644
--- a/drivers/media/pci/cobalt/cobalt-v4l2.c
+++ b/drivers/media/pci/cobalt/cobalt-v4l2.c
@@ -45,7 +45,7 @@ static const struct v4l2_dv_timings cea1080p60 = V4L2_DV_BT_CEA_1920X1080P60;
 
 static int cobalt_queue_setup(struct vb2_queue *q,
 			unsigned int *num_buffers, unsigned int *num_planes,
-			unsigned int sizes[], void *alloc_ctxs[])
+			unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cobalt_stream *s = q->drv_priv;
 	unsigned size = s->stride * s->height;
@@ -54,7 +54,6 @@ static int cobalt_queue_setup(struct vb2_queue *q,
 		*num_buffers = 3;
 	if (*num_buffers > NR_BUFS)
 		*num_buffers = NR_BUFS;
-	alloc_ctxs[0] = s->cobalt->alloc_ctx;
 	if (*num_planes)
 		return sizes[0] < size ? -EINVAL : 0;
 	*num_planes = 1;
@@ -1224,6 +1223,7 @@ static int cobalt_node_register(struct cobalt *cobalt, int node)
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->min_buffers_needed = 2;
 	q->lock = &s->lock;
+	q->dev = &cobalt->pci_dev->dev;
 	vdev->queue = q;
 
 	video_set_drvdata(vdev, s);
diff --git a/drivers/media/pci/cx18/cx18-alsa-mixer.c b/drivers/media/pci/cx18/cx18-alsa-mixer.c
index 341bddc00b77..284275270f1b 100644
--- a/drivers/media/pci/cx18/cx18-alsa-mixer.c
+++ b/drivers/media/pci/cx18/cx18-alsa-mixer.c
@@ -93,7 +93,7 @@ static int snd_cx18_mixer_tv_vol_get(struct snd_kcontrol *kctl,
 	vctrl.value = dB_to_cx18_av_vol(uctl->value.integer.value[0]);
 
 	snd_cx18_lock(cxsc);
-	ret = v4l2_subdev_call(cx->sd_av, core, g_ctrl, &vctrl);
+	ret = v4l2_g_ctrl(cx->sd_av->ctrl_handler, &vctrl);
 	snd_cx18_unlock(cxsc);
 
 	if (!ret)
@@ -115,14 +115,14 @@ static int snd_cx18_mixer_tv_vol_put(struct snd_kcontrol *kctl,
 	snd_cx18_lock(cxsc);
 
 	/* Fetch current state */
-	ret = v4l2_subdev_call(cx->sd_av, core, g_ctrl, &vctrl);
+	ret = v4l2_g_ctrl(cx->sd_av->ctrl_handler, &vctrl);
 
 	if (ret ||
 	    (cx18_av_vol_to_dB(vctrl.value) != uctl->value.integer.value[0])) {
 
 		/* Set, if needed */
 		vctrl.value = dB_to_cx18_av_vol(uctl->value.integer.value[0]);
-		ret = v4l2_subdev_call(cx->sd_av, core, s_ctrl, &vctrl);
+		ret = v4l2_s_ctrl(cx->sd_av->ctrl_handler, &vctrl);
 		if (!ret)
 			ret = 1; /* Indicate control was changed w/o error */
 	}
diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c
index 260e462d91b4..2f23b26b16c0 100644
--- a/drivers/media/pci/cx18/cx18-driver.c
+++ b/drivers/media/pci/cx18/cx18-driver.c
@@ -560,7 +560,7 @@ static void cx18_process_options(struct cx18 *cx)
 	cx->stream_buf_size[CX18_ENC_STREAM_TYPE_MPG] = enc_mpg_bufsize;
 	cx->stream_buf_size[CX18_ENC_STREAM_TYPE_IDX] = enc_idx_bufsize;
 	cx->stream_buf_size[CX18_ENC_STREAM_TYPE_YUV] = enc_yuv_bufsize;
-	cx->stream_buf_size[CX18_ENC_STREAM_TYPE_VBI] = vbi_active_samples * 36;
+	cx->stream_buf_size[CX18_ENC_STREAM_TYPE_VBI] = VBI_ACTIVE_SAMPLES * 36;
 	cx->stream_buf_size[CX18_ENC_STREAM_TYPE_PCM] = enc_pcm_bufsize;
 	cx->stream_buf_size[CX18_ENC_STREAM_TYPE_RAD] = 0; /* control no data */
 
diff --git a/drivers/media/pci/cx18/cx18-driver.h b/drivers/media/pci/cx18/cx18-driver.h
index 47ce80fa73b9..ef308a10e870 100644
--- a/drivers/media/pci/cx18/cx18-driver.h
+++ b/drivers/media/pci/cx18/cx18-driver.h
@@ -492,9 +492,9 @@ struct cx18_card;
  *  (1/15.625 kHz) * 2 * 13.5 MHz = 1728 samples/line =
  *  4 bytes SAV + 280 bytes anc data + 4 bytes SAV + 1440 active samples
  */
-static const u32 vbi_active_samples = 1444; /* 4 byte SAV + 720 Y + 720 U/V */
-static const u32 vbi_hblank_samples_60Hz = 272; /* 4 byte EAV + 268 anc/fill */
-static const u32 vbi_hblank_samples_50Hz = 284; /* 4 byte EAV + 280 anc/fill */
+#define VBI_ACTIVE_SAMPLES	1444 /* 4 byte SAV + 720 Y + 720 U/V */
+#define VBI_HBLANK_SAMPLES_60HZ	272 /* 4 byte EAV + 268 anc/fill */
+#define VBI_HBLANK_SAMPLES_50HZ	284 /* 4 byte EAV + 280 anc/fill */
 
 #define CX18_VBI_FRAMES 32
 
diff --git a/drivers/media/pci/cx18/cx18-ioctl.c b/drivers/media/pci/cx18/cx18-ioctl.c
index eeb741c7db1b..fecca2a63891 100644
--- a/drivers/media/pci/cx18/cx18-ioctl.c
+++ b/drivers/media/pci/cx18/cx18-ioctl.c
@@ -177,7 +177,7 @@ static int cx18_g_fmt_vbi_cap(struct file *file, void *fh,
 
 	vbifmt->sampling_rate = 27000000;
 	vbifmt->offset = 248; /* FIXME - slightly wrong for both 50 & 60 Hz */
-	vbifmt->samples_per_line = vbi_active_samples - 4;
+	vbifmt->samples_per_line = VBI_ACTIVE_SAMPLES - 4;
 	vbifmt->sample_format = V4L2_PIX_FMT_GREY;
 	vbifmt->start[0] = cx->vbi.start[0];
 	vbifmt->start[1] = cx->vbi.start[1];
diff --git a/drivers/media/pci/cx18/cx18-streams.c b/drivers/media/pci/cx18/cx18-streams.c
index c9860845264f..f3802ec1b383 100644
--- a/drivers/media/pci/cx18/cx18-streams.c
+++ b/drivers/media/pci/cx18/cx18-streams.c
@@ -605,9 +605,9 @@ static void cx18_vbi_setup(struct cx18_stream *s)
 	/* Lines per field */
 	data[1] = (lines / 2) | ((lines / 2) << 16);
 	/* bytes per line */
-	data[2] = (raw ? vbi_active_samples
-		       : (cx->is_60hz ? vbi_hblank_samples_60Hz
-				      : vbi_hblank_samples_50Hz));
+	data[2] = (raw ? VBI_ACTIVE_SAMPLES
+		       : (cx->is_60hz ? VBI_HBLANK_SAMPLES_60HZ
+				      : VBI_HBLANK_SAMPLES_50HZ));
 	/* Every X number of frames a VBI interrupt arrives
 	   (frames as in 25 or 30 fps) */
 	data[3] = 1;
@@ -761,7 +761,7 @@ static void cx18_stream_configure_mdls(struct cx18_stream *s)
 		s->bufs_per_mdl = 1;
 		if  (cx18_raw_vbi(s->cx)) {
 			s->mdl_size = (s->cx->is_60hz ? 12 : 18)
-						       * 2 * vbi_active_samples;
+						       * 2 * VBI_ACTIVE_SAMPLES;
 		} else {
 			/*
 			 * See comment in cx18_vbi_setup() below about the
@@ -769,8 +769,8 @@ static void cx18_stream_configure_mdls(struct cx18_stream *s)
 			 * the lines on which EAV RP codes toggle.
 			*/
 			s->mdl_size = s->cx->is_60hz
-				   ? (21 - 4 + 1) * 2 * vbi_hblank_samples_60Hz
-				   : (23 - 2 + 1) * 2 * vbi_hblank_samples_50Hz;
+				   ? (21 - 4 + 1) * 2 * VBI_HBLANK_SAMPLES_60HZ
+				   : (23 - 2 + 1) * 2 * VBI_HBLANK_SAMPLES_50HZ;
 		}
 		break;
 	default:
diff --git a/drivers/media/pci/cx18/cx18-vbi.c b/drivers/media/pci/cx18/cx18-vbi.c
index add99642f1e2..43360cbcf24b 100644
--- a/drivers/media/pci/cx18/cx18-vbi.c
+++ b/drivers/media/pci/cx18/cx18-vbi.c
@@ -108,7 +108,7 @@ static void copy_vbi_data(struct cx18 *cx, int lines, u32 pts_stamp)
 /* FIXME - this function ignores the input size. */
 static u32 compress_raw_buf(struct cx18 *cx, u8 *buf, u32 size, u32 hdr_size)
 {
-	u32 line_size = vbi_active_samples;
+	u32 line_size = VBI_ACTIVE_SAMPLES;
 	u32 lines = cx->vbi.count * 2;
 	u8 *q = buf;
 	u8 *p;
@@ -145,8 +145,8 @@ static u32 compress_sliced_buf(struct cx18 *cx, u8 *buf, u32 size,
 	struct v4l2_decode_vbi_line vbi;
 	int i;
 	u32 line = 0;
-	u32 line_size = cx->is_60hz ? vbi_hblank_samples_60Hz
-				    : vbi_hblank_samples_50Hz;
+	u32 line_size = cx->is_60hz ? VBI_HBLANK_SAMPLES_60HZ
+				    : VBI_HBLANK_SAMPLES_50HZ;
 
 	/* find the first valid line */
 	for (i = hdr_size, buf += hdr_size; i < size; i++, buf++) {
diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c
index bd333875a1f7..efec2d1a7afd 100644
--- a/drivers/media/pci/cx23885/cx23885-417.c
+++ b/drivers/media/pci/cx23885/cx23885-417.c
@@ -1140,7 +1140,7 @@ static int cx23885_initialize_codec(struct cx23885_dev *dev, int startencoder)
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx23885_dev *dev = q->drv_priv;
 
@@ -1148,7 +1148,6 @@ static int queue_setup(struct vb2_queue *q,
 	dev->ts1.ts_packet_count = mpeglines;
 	*num_planes = 1;
 	sizes[0] = mpeglinesize * mpeglines;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	*num_buffers = mpegbufs;
 	return 0;
 }
diff --git a/drivers/media/pci/cx23885/cx23885-cards.c b/drivers/media/pci/cx23885/cx23885-cards.c
index 310ee769aed4..4abf50f2694f 100644
--- a/drivers/media/pci/cx23885/cx23885-cards.c
+++ b/drivers/media/pci/cx23885/cx23885-cards.c
@@ -765,6 +765,11 @@ struct cx23885_board cx23885_boards[] = {
 			.amux   = CX25840_AUDIO7,
 		} },
 	},
+	[CX23885_BOARD_HAUPPAUGE_QUADHD_DVB] = {
+		.name        = "Hauppauge WinTV-QuadHD-DVB",
+		.portb        = CX23885_MPEG_DVB,
+		.portc        = CX23885_MPEG_DVB,
+	},
 };
 const unsigned int cx23885_bcount = ARRAY_SIZE(cx23885_boards);
 
@@ -1060,6 +1065,14 @@ struct cx23885_subid cx23885_subids[] = {
 		.subvendor = 0x1576,
 		.subdevice = 0x0460,
 		.card      = CX23885_BOARD_VIEWCAST_460E,
+	}, {
+		.subvendor = 0x0070,
+		.subdevice = 0x6a28,
+		.card      = CX23885_BOARD_HAUPPAUGE_QUADHD_DVB, /* Tuner Pair 1 */
+	}, {
+		.subvendor = 0x0070,
+		.subdevice = 0x6b28,
+		.card      = CX23885_BOARD_HAUPPAUGE_QUADHD_DVB, /* Tuner Pair 2 */
 	},
 };
 const unsigned int cx23885_idcount = ARRAY_SIZE(cx23885_subids);
@@ -1257,6 +1270,14 @@ static void hauppauge_eeprom(struct cx23885_dev *dev, u8 *eeprom_data)
 	case 150329:
 		/* WinTV-HVR5525 (PCIe, DVB-S/S2, DVB-T/T2/C) */
 		break;
+	case 166100:
+		/* WinTV-QuadHD (DVB) Tuner Pair 1 (PCIe, IR, half height,
+		   DVB-T/T2/C, DVB-T/T2/C */
+		break;
+	case 166101:
+		/* WinTV-QuadHD (DVB) Tuner Pair 2 (PCIe, IR, half height,
+		   DVB-T/T2/C, DVB-T/T2/C */
+		break;
 	default:
 		printk(KERN_WARNING "%s: warning: "
 			"unknown hauppauge model #%d\n",
@@ -1729,20 +1750,22 @@ void cx23885_gpio_setup(struct cx23885_dev *dev)
 		cx23885_gpio_set(dev, GPIO_2);
 		break;
 	case CX23885_BOARD_HAUPPAUGE_HVR5525:
+	case CX23885_BOARD_HAUPPAUGE_QUADHD_DVB:
 		/*
-		 * GPIO-00 IR_WIDE
-		 * GPIO-02 wake#
-		 * GPIO-03 VAUX Pres.
-		 * GPIO-07 PROG#
-		 * GPIO-08 SAT_RESN
-		 * GPIO-09 TER_RESN
-		 * GPIO-10 B2_SENSE
-		 * GPIO-11 B1_SENSE
-		 * GPIO-15 IR_LED_STATUS
-		 * GPIO-19 IR_NARROW
-		 * GPIO-20 Blauster1
-		 * ALTGPIO VAUX_SWITCH
-		 * AUX_PLL_CLK : Blaster2
+		 * HVR5525 GPIO Details:
+		 *  GPIO-00 IR_WIDE
+		 *  GPIO-02 wake#
+		 *  GPIO-03 VAUX Pres.
+		 *  GPIO-07 PROG#
+		 *  GPIO-08 SAT_RESN
+		 *  GPIO-09 TER_RESN
+		 *  GPIO-10 B2_SENSE
+		 *  GPIO-11 B1_SENSE
+		 *  GPIO-15 IR_LED_STATUS
+		 *  GPIO-19 IR_NARROW
+		 *  GPIO-20 Blauster1
+		 *  ALTGPIO VAUX_SWITCH
+		 *  AUX_PLL_CLK : Blaster2
 		 */
 		/* Put the parts into reset and back */
 		cx23885_gpio_enable(dev, GPIO_8 | GPIO_9, 1);
@@ -1802,6 +1825,7 @@ int cx23885_ir_init(struct cx23885_dev *dev)
 	case CX23885_BOARD_HAUPPAUGE_HVR1255:
 	case CX23885_BOARD_HAUPPAUGE_HVR1255_22111:
 	case CX23885_BOARD_HAUPPAUGE_HVR1210:
+	case CX23885_BOARD_HAUPPAUGE_QUADHD_DVB:
 		/* FIXME: Implement me */
 		break;
 	case CX23885_BOARD_HAUPPAUGE_HVR1270:
@@ -2000,6 +2024,7 @@ void cx23885_card_setup(struct cx23885_dev *dev)
 	case CX23885_BOARD_HAUPPAUGE_STARBURST:
 	case CX23885_BOARD_HAUPPAUGE_IMPACTVCBE:
 	case CX23885_BOARD_HAUPPAUGE_HVR5525:
+	case CX23885_BOARD_HAUPPAUGE_QUADHD_DVB:
 		if (dev->i2c_bus[0].i2c_rc == 0)
 			hauppauge_eeprom(dev, eeprom+0xc0);
 		break;
@@ -2145,6 +2170,14 @@ void cx23885_card_setup(struct cx23885_dev *dev)
 		ts2->ts_clk_en_val = 0x1; /* Enable TS_CLK */
 		ts2->src_sel_val   = CX23885_SRC_SEL_PARALLEL_MPEG_VIDEO;
 		break;
+	case CX23885_BOARD_HAUPPAUGE_QUADHD_DVB:
+		ts1->gen_ctrl_val  = 0xc; /* Serial bus + punctured clock */
+		ts1->ts_clk_en_val = 0x1; /* Enable TS_CLK */
+		ts1->src_sel_val   = CX23885_SRC_SEL_PARALLEL_MPEG_VIDEO;
+		ts2->gen_ctrl_val  = 0xc; /* Serial bus + punctured clock */
+		ts2->ts_clk_en_val = 0x1; /* Enable TS_CLK */
+		ts2->src_sel_val   = CX23885_SRC_SEL_PARALLEL_MPEG_VIDEO;
+		break;
 	case CX23885_BOARD_HAUPPAUGE_HVR1250:
 	case CX23885_BOARD_HAUPPAUGE_HVR1500:
 	case CX23885_BOARD_HAUPPAUGE_HVR1500Q:
diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
index 813c217b5e1a..c86b1093ab99 100644
--- a/drivers/media/pci/cx23885/cx23885-core.c
+++ b/drivers/media/pci/cx23885/cx23885-core.c
@@ -2005,14 +2005,9 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
 	err = pci_set_dma_mask(pci_dev, 0xffffffff);
 	if (err) {
 		printk("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name);
-		goto fail_context;
+		goto fail_ctrl;
 	}
 
-	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		err = PTR_ERR(dev->alloc_ctx);
-		goto fail_context;
-	}
 	err = request_irq(pci_dev->irq, cx23885_irq,
 			  IRQF_SHARED, dev->name, dev);
 	if (err < 0) {
@@ -2041,8 +2036,6 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
 	return 0;
 
 fail_irq:
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
-fail_context:
 	cx23885_dev_unregister(dev);
 fail_ctrl:
 	v4l2_ctrl_handler_free(hdl);
@@ -2068,7 +2061,6 @@ static void cx23885_finidev(struct pci_dev *pci_dev)
 	pci_disable_device(pci_dev);
 
 	cx23885_dev_unregister(dev);
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	v4l2_ctrl_handler_free(&dev->ctrl_handler);
 	v4l2_device_unregister(v4l2_dev);
 	kfree(dev);
diff --git a/drivers/media/pci/cx23885/cx23885-dvb.c b/drivers/media/pci/cx23885/cx23885-dvb.c
index f041b6931ba8..e5748a93c479 100644
--- a/drivers/media/pci/cx23885/cx23885-dvb.c
+++ b/drivers/media/pci/cx23885/cx23885-dvb.c
@@ -94,7 +94,7 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx23885_tsport *port = q->drv_priv;
 
@@ -102,7 +102,6 @@ static int queue_setup(struct vb2_queue *q,
 	port->ts_packet_count = 32;
 	*num_planes = 1;
 	sizes[0] = port->ts_packet_size * port->ts_packet_count;
-	alloc_ctxs[0] = port->dev->alloc_ctx;
 	*num_buffers = 32;
 	return 0;
 }
@@ -2269,9 +2268,107 @@ static int dvb_register(struct cx23885_tsport *port)
 		}
 		break;
 	}
+	case CX23885_BOARD_HAUPPAUGE_QUADHD_DVB:
+		switch (port->nr) {
+		/* port b - Terrestrial/cable */
+		case 1:
+			/* attach frontend */
+			memset(&si2168_config, 0, sizeof(si2168_config));
+			si2168_config.i2c_adapter = &adapter;
+			si2168_config.fe = &fe0->dvb.frontend;
+			si2168_config.ts_mode = SI2168_TS_SERIAL;
+			memset(&info, 0, sizeof(struct i2c_board_info));
+			strlcpy(info.type, "si2168", I2C_NAME_SIZE);
+			info.addr = 0x64;
+			info.platform_data = &si2168_config;
+			request_module("%s", info.type);
+			client_demod = i2c_new_device(&dev->i2c_bus[0].i2c_adap, &info);
+			if (!client_demod || !client_demod->dev.driver)
+				goto frontend_detach;
+			if (!try_module_get(client_demod->dev.driver->owner)) {
+				i2c_unregister_device(client_demod);
+				goto frontend_detach;
+			}
+			port->i2c_client_demod = client_demod;
+
+			/* attach tuner */
+			memset(&si2157_config, 0, sizeof(si2157_config));
+			si2157_config.fe = fe0->dvb.frontend;
+			si2157_config.if_port = 1;
+			memset(&info, 0, sizeof(struct i2c_board_info));
+			strlcpy(info.type, "si2157", I2C_NAME_SIZE);
+			info.addr = 0x60;
+			info.platform_data = &si2157_config;
+			request_module("%s", info.type);
+			client_tuner = i2c_new_device(&dev->i2c_bus[1].i2c_adap, &info);
+			if (!client_tuner || !client_tuner->dev.driver) {
+				module_put(client_demod->dev.driver->owner);
+				i2c_unregister_device(client_demod);
+				port->i2c_client_demod = NULL;
+				goto frontend_detach;
+			}
+			if (!try_module_get(client_tuner->dev.driver->owner)) {
+				i2c_unregister_device(client_tuner);
+				module_put(client_demod->dev.driver->owner);
+				i2c_unregister_device(client_demod);
+				port->i2c_client_demod = NULL;
+				goto frontend_detach;
+			}
+			port->i2c_client_tuner = client_tuner;
+			break;
+
+		/* port c - terrestrial/cable */
+		case 2:
+			/* attach frontend */
+			memset(&si2168_config, 0, sizeof(si2168_config));
+			si2168_config.i2c_adapter = &adapter;
+			si2168_config.fe = &fe0->dvb.frontend;
+			si2168_config.ts_mode = SI2168_TS_SERIAL;
+			memset(&info, 0, sizeof(struct i2c_board_info));
+			strlcpy(info.type, "si2168", I2C_NAME_SIZE);
+			info.addr = 0x66;
+			info.platform_data = &si2168_config;
+			request_module("%s", info.type);
+			client_demod = i2c_new_device(&dev->i2c_bus[0].i2c_adap, &info);
+			if (!client_demod || !client_demod->dev.driver)
+				goto frontend_detach;
+			if (!try_module_get(client_demod->dev.driver->owner)) {
+				i2c_unregister_device(client_demod);
+				goto frontend_detach;
+			}
+			port->i2c_client_demod = client_demod;
+
+			/* attach tuner */
+			memset(&si2157_config, 0, sizeof(si2157_config));
+			si2157_config.fe = fe0->dvb.frontend;
+			si2157_config.if_port = 1;
+			memset(&info, 0, sizeof(struct i2c_board_info));
+			strlcpy(info.type, "si2157", I2C_NAME_SIZE);
+			info.addr = 0x62;
+			info.platform_data = &si2157_config;
+			request_module("%s", info.type);
+			client_tuner = i2c_new_device(&dev->i2c_bus[1].i2c_adap, &info);
+			if (!client_tuner || !client_tuner->dev.driver) {
+				module_put(client_demod->dev.driver->owner);
+				i2c_unregister_device(client_demod);
+				port->i2c_client_demod = NULL;
+				goto frontend_detach;
+			}
+			if (!try_module_get(client_tuner->dev.driver->owner)) {
+				i2c_unregister_device(client_tuner);
+				module_put(client_demod->dev.driver->owner);
+				i2c_unregister_device(client_demod);
+				port->i2c_client_demod = NULL;
+				goto frontend_detach;
+			}
+			port->i2c_client_tuner = client_tuner;
+			break;
+		}
+		break;
+
 	default:
 		printk(KERN_INFO "%s: The frontend of your DVB/ATSC card "
-			" isn't supported yet\n",
+		       " isn't supported yet\n",
 		       dev->name);
 		break;
 	}
@@ -2397,6 +2494,7 @@ int cx23885_dvb_register(struct cx23885_tsport *port)
 		q->mem_ops = &vb2_dma_sg_memops;
 		q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		q->lock = &dev->lock;
+		q->dev = &dev->pci->dev;
 
 		err = vb2_queue_init(q);
 		if (err < 0)
diff --git a/drivers/media/pci/cx23885/cx23885-vbi.c b/drivers/media/pci/cx23885/cx23885-vbi.c
index 39750ebcc04c..75e7fa7b1121 100644
--- a/drivers/media/pci/cx23885/cx23885-vbi.c
+++ b/drivers/media/pci/cx23885/cx23885-vbi.c
@@ -122,7 +122,7 @@ static int cx23885_start_vbi_dma(struct cx23885_dev    *dev,
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx23885_dev *dev = q->drv_priv;
 	unsigned lines = VBI_PAL_LINE_COUNT;
@@ -131,7 +131,6 @@ static int queue_setup(struct vb2_queue *q,
 		lines = VBI_NTSC_LINE_COUNT;
 	*num_planes = 1;
 	sizes[0] = lines * VBI_LINE_LENGTH * 2;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
diff --git a/drivers/media/pci/cx23885/cx23885-video.c b/drivers/media/pci/cx23885/cx23885-video.c
index e1d7d0847167..6d735222a958 100644
--- a/drivers/media/pci/cx23885/cx23885-video.c
+++ b/drivers/media/pci/cx23885/cx23885-video.c
@@ -335,13 +335,12 @@ static int cx23885_start_video_dma(struct cx23885_dev *dev,
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx23885_dev *dev = q->drv_priv;
 
 	*num_planes = 1;
 	sizes[0] = (dev->fmt->depth * dev->width * dev->height) >> 3;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -1268,6 +1267,7 @@ int cx23885_video_register(struct cx23885_dev *dev)
 	q->mem_ops = &vb2_dma_sg_memops;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &dev->lock;
+	q->dev = &dev->pci->dev;
 
 	err = vb2_queue_init(q);
 	if (err < 0)
@@ -1284,6 +1284,7 @@ int cx23885_video_register(struct cx23885_dev *dev)
 	q->mem_ops = &vb2_dma_sg_memops;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &dev->lock;
+	q->dev = &dev->pci->dev;
 
 	err = vb2_queue_init(q);
 	if (err < 0)
diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
index b1a5409408c7..24a0a6c5b501 100644
--- a/drivers/media/pci/cx23885/cx23885.h
+++ b/drivers/media/pci/cx23885/cx23885.h
@@ -103,6 +103,7 @@
 #define CX23885_BOARD_HAUPPAUGE_STARBURST      53
 #define CX23885_BOARD_VIEWCAST_260E            54
 #define CX23885_BOARD_VIEWCAST_460E            55
+#define CX23885_BOARD_HAUPPAUGE_QUADHD_DVB    56
 
 #define GPIO_0 0x00000001
 #define GPIO_1 0x00000002
@@ -430,7 +431,6 @@ struct cx23885_dev {
 	struct vb2_queue           vb2_vidq;
 	struct cx23885_dmaqueue    vbiq;
 	struct vb2_queue           vb2_vbiq;
-	void			   *alloc_ctx;
 
 	spinlock_t                 slock;
 
diff --git a/drivers/media/pci/cx25821/cx25821-alsa.c b/drivers/media/pci/cx25821/cx25821-alsa.c
index b602eba2b601..df189b16af12 100644
--- a/drivers/media/pci/cx25821/cx25821-alsa.c
+++ b/drivers/media/pci/cx25821/cx25821-alsa.c
@@ -693,7 +693,7 @@ static int snd_cx25821_pcm(struct cx25821_audio_dev *chip, int device,
  * Only boards with eeprom and byte 1 at eeprom=1 have it
  */
 
-static const struct pci_device_id cx25821_audio_pci_tbl[] = {
+static const struct pci_device_id __maybe_unused cx25821_audio_pci_tbl[] = {
 	{0x14f1, 0x0920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{0,}
 };
diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c
index 0042803a9de7..9a5f912ca859 100644
--- a/drivers/media/pci/cx25821/cx25821-core.c
+++ b/drivers/media/pci/cx25821/cx25821-core.c
@@ -1301,15 +1301,10 @@ static int cx25821_initdev(struct pci_dev *pci_dev,
 
 		goto fail_unregister_device;
 	}
-	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		err = PTR_ERR(dev->alloc_ctx);
-		goto fail_unregister_pci;
-	}
 
 	err = cx25821_dev_setup(dev);
 	if (err)
-		goto fail_free_ctx;
+		goto fail_unregister_pci;
 
 	/* print pci info */
 	pci_read_config_byte(pci_dev, PCI_CLASS_REVISION, &dev->pci_rev);
@@ -1340,8 +1335,6 @@ fail_irq:
 	pr_info("cx25821_initdev() can't get IRQ !\n");
 	cx25821_dev_unregister(dev);
 
-fail_free_ctx:
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 fail_unregister_pci:
 	pci_disable_device(pci_dev);
 fail_unregister_device:
@@ -1365,7 +1358,6 @@ static void cx25821_finidev(struct pci_dev *pci_dev)
 		free_irq(pci_dev->irq, dev);
 
 	cx25821_dev_unregister(dev);
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	v4l2_device_unregister(v4l2_dev);
 	kfree(dev);
 }
diff --git a/drivers/media/pci/cx25821/cx25821-video.c b/drivers/media/pci/cx25821/cx25821-video.c
index c48bba9daf1f..adcd09be347d 100644
--- a/drivers/media/pci/cx25821/cx25821-video.c
+++ b/drivers/media/pci/cx25821/cx25821-video.c
@@ -143,13 +143,11 @@ int cx25821_video_irq(struct cx25821_dev *dev, int chan_num, u32 status)
 
 static int cx25821_queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx25821_channel *chan = q->drv_priv;
 	unsigned size = (chan->fmt->depth * chan->width * chan->height) >> 3;
 
-	alloc_ctxs[0] = chan->dev->alloc_ctx;
-
 	if (*num_planes)
 		return sizes[0] < size ? -EINVAL : 0;
 
@@ -759,6 +757,7 @@ int cx25821_video_register(struct cx25821_dev *dev)
 		q->mem_ops = &vb2_dma_sg_memops;
 		q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		q->lock = &dev->lock;
+		q->dev = &dev->pci->dev;
 
 		if (!is_output) {
 			err = vb2_queue_init(q);
diff --git a/drivers/media/pci/cx25821/cx25821.h b/drivers/media/pci/cx25821/cx25821.h
index a513b68be0fa..35c7375e4617 100644
--- a/drivers/media/pci/cx25821/cx25821.h
+++ b/drivers/media/pci/cx25821/cx25821.h
@@ -249,7 +249,6 @@ struct cx25821_dev {
 	int hwrevision;
 	/* used by cx25821-alsa */
 	struct snd_card *card;
-	void *alloc_ctx;
 
 	u32 clk_freq;
 
diff --git a/drivers/media/pci/cx88/cx88-alsa.c b/drivers/media/pci/cx88/cx88-alsa.c
index e158a1da1d41..f3f13eb0c16e 100644
--- a/drivers/media/pci/cx88/cx88-alsa.c
+++ b/drivers/media/pci/cx88/cx88-alsa.c
@@ -799,13 +799,9 @@ static int snd_cx88_alc_put(struct snd_kcontrol *kcontrol,
 {
 	snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
 	struct cx88_core *core = chip->core;
-	struct v4l2_control client_ctl;
-
-	memset(&client_ctl, 0, sizeof(client_ctl));
-	client_ctl.value = 0 != value->value.integer.value[0];
-	client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
-	call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
 
+	wm8775_s_ctrl(core, V4L2_CID_AUDIO_LOUDNESS,
+		      value->value.integer.value[0] != 0);
 	return 0;
 }
 
diff --git a/drivers/media/pci/cx88/cx88-blackbird.c b/drivers/media/pci/cx88/cx88-blackbird.c
index 3233d45d1e5b..04fe9af2a802 100644
--- a/drivers/media/pci/cx88/cx88-blackbird.c
+++ b/drivers/media/pci/cx88/cx88-blackbird.c
@@ -639,7 +639,7 @@ static int blackbird_stop_codec(struct cx8802_dev *dev)
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx8802_dev *dev = q->drv_priv;
 
@@ -647,7 +647,6 @@ static int queue_setup(struct vb2_queue *q,
 	dev->ts_packet_size  = 188 * 4;
 	dev->ts_packet_count  = 32;
 	sizes[0] = dev->ts_packet_size * dev->ts_packet_count;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -1183,6 +1182,7 @@ static int cx8802_blackbird_probe(struct cx8802_driver *drv)
 	q->mem_ops = &vb2_dma_sg_memops;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &core->lock;
+	q->dev = &dev->pci->dev;
 
 	err = vb2_queue_init(q);
 	if (err < 0)
diff --git a/drivers/media/pci/cx88/cx88-dvb.c b/drivers/media/pci/cx88/cx88-dvb.c
index 851d2a9caed3..5bb63e7a5691 100644
--- a/drivers/media/pci/cx88/cx88-dvb.c
+++ b/drivers/media/pci/cx88/cx88-dvb.c
@@ -84,7 +84,7 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx8802_dev *dev = q->drv_priv;
 
@@ -92,7 +92,6 @@ static int queue_setup(struct vb2_queue *q,
 	dev->ts_packet_size  = 188 * 4;
 	dev->ts_packet_count = dvb_buf_tscnt;
 	sizes[0] = dev->ts_packet_size * dev->ts_packet_count;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	*num_buffers = dvb_buf_tscnt;
 	return 0;
 }
@@ -1793,6 +1792,7 @@ static int cx8802_dvb_probe(struct cx8802_driver *drv)
 		q->mem_ops = &vb2_dma_sg_memops;
 		q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		q->lock = &core->lock;
+		q->dev = &dev->pci->dev;
 
 		err = vb2_queue_init(q);
 		if (err < 0)
diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c
index f34c229f9b37..245357adbc25 100644
--- a/drivers/media/pci/cx88/cx88-mpeg.c
+++ b/drivers/media/pci/cx88/cx88-mpeg.c
@@ -726,11 +726,6 @@ static int cx8802_probe(struct pci_dev *pci_dev,
 	if (NULL == dev)
 		goto fail_core;
 	dev->pci = pci_dev;
-	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		err = PTR_ERR(dev->alloc_ctx);
-		goto fail_dev;
-	}
 	dev->core = core;
 
 	/* Maintain a reference so cx88-video can query the 8802 device. */
@@ -738,7 +733,7 @@ static int cx8802_probe(struct pci_dev *pci_dev,
 
 	err = cx8802_init_common(dev);
 	if (err != 0)
-		goto fail_free;
+		goto fail_dev;
 
 	INIT_LIST_HEAD(&dev->drvlist);
 	mutex_lock(&cx8802_mutex);
@@ -749,8 +744,6 @@ static int cx8802_probe(struct pci_dev *pci_dev,
 	request_modules(dev);
 	return 0;
 
- fail_free:
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
  fail_dev:
 	kfree(dev);
  fail_core:
@@ -798,7 +791,6 @@ static void cx8802_remove(struct pci_dev *pci_dev)
 	/* common */
 	cx8802_fini_common(dev);
 	cx88_core_put(dev->core,dev->pci);
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	kfree(dev);
 }
 
diff --git a/drivers/media/pci/cx88/cx88-vbi.c b/drivers/media/pci/cx88/cx88-vbi.c
index ccc646d819f2..d3237cf8ffa3 100644
--- a/drivers/media/pci/cx88/cx88-vbi.c
+++ b/drivers/media/pci/cx88/cx88-vbi.c
@@ -109,7 +109,7 @@ int cx8800_restart_vbi_queue(struct cx8800_dev    *dev,
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx8800_dev *dev = q->drv_priv;
 
@@ -118,7 +118,6 @@ static int queue_setup(struct vb2_queue *q,
 		sizes[0] = VBI_LINE_NTSC_COUNT * VBI_LINE_LENGTH * 2;
 	else
 		sizes[0] = VBI_LINE_PAL_COUNT * VBI_LINE_LENGTH * 2;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
index 5f331df65fb9..5dc1e3f08d50 100644
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c
@@ -431,14 +431,13 @@ static int restart_video_queue(struct cx8800_dev    *dev,
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cx8800_dev *dev = q->drv_priv;
 	struct cx88_core *core = dev->core;
 
 	*num_planes = 1;
 	sizes[0] = (dev->fmt->depth * core->width * core->height) >> 3;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -1319,12 +1318,6 @@ static int cx8800_initdev(struct pci_dev *pci_dev,
 		printk("%s/0: Oops: no 32bit PCI DMA ???\n",core->name);
 		goto fail_core;
 	}
-	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		err = PTR_ERR(dev->alloc_ctx);
-		goto fail_core;
-	}
-
 
 	/* initialize driver struct */
 	spin_lock_init(&dev->slock);
@@ -1445,6 +1438,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev,
 	q->mem_ops = &vb2_dma_sg_memops;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &core->lock;
+	q->dev = &dev->pci->dev;
 
 	err = vb2_queue_init(q);
 	if (err < 0)
@@ -1461,6 +1455,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev,
 	q->mem_ops = &vb2_dma_sg_memops;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &core->lock;
+	q->dev = &dev->pci->dev;
 
 	err = vb2_queue_init(q);
 	if (err < 0)
@@ -1530,7 +1525,6 @@ fail_unreg:
 	free_irq(pci_dev->irq, dev);
 	mutex_unlock(&core->lock);
 fail_core:
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	core->v4ldev = NULL;
 	cx88_core_put(core,dev->pci);
 fail_free:
@@ -1564,7 +1558,6 @@ static void cx8800_finidev(struct pci_dev *pci_dev)
 
 	/* free memory */
 	cx88_core_put(core,dev->pci);
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	kfree(dev);
 }
 
diff --git a/drivers/media/pci/cx88/cx88.h b/drivers/media/pci/cx88/cx88.h
index 78f817ee7e41..ecd4b7bece99 100644
--- a/drivers/media/pci/cx88/cx88.h
+++ b/drivers/media/pci/cx88/cx88.h
@@ -485,7 +485,6 @@ struct cx8800_dev {
 	/* pci i/o */
 	struct pci_dev             *pci;
 	unsigned char              pci_rev,pci_lat;
-	void			   *alloc_ctx;
 
 	const struct cx8800_fmt    *fmt;
 
@@ -549,7 +548,6 @@ struct cx8802_dev {
 	/* pci i/o */
 	struct pci_dev             *pci;
 	unsigned char              pci_rev,pci_lat;
-	void			   *alloc_ctx;
 
 	/* dma queues */
 	struct cx88_dmaqueue       mpegq;
diff --git a/drivers/media/pci/ddbridge/ddbridge-core.c b/drivers/media/pci/ddbridge/ddbridge-core.c
index 6e995ef8c37e..47def73b3502 100644
--- a/drivers/media/pci/ddbridge/ddbridge-core.c
+++ b/drivers/media/pci/ddbridge/ddbridge-core.c
@@ -1569,10 +1569,9 @@ static int ddb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (pci_enable_device(pdev) < 0)
 		return -ENODEV;
 
-	dev = vmalloc(sizeof(struct ddb));
+	dev = vzalloc(sizeof(struct ddb));
 	if (dev == NULL)
 		return -ENOMEM;
-	memset(dev, 0, sizeof(struct ddb));
 
 	dev->pdev = pdev;
 	pci_set_drvdata(pdev, dev);
diff --git a/drivers/media/pci/dt3155/dt3155.c b/drivers/media/pci/dt3155/dt3155.c
index 568c0c8fb2dc..6a219694b225 100644
--- a/drivers/media/pci/dt3155/dt3155.c
+++ b/drivers/media/pci/dt3155/dt3155.c
@@ -133,7 +133,7 @@ static int wait_i2c_reg(void __iomem *addr)
 static int
 dt3155_queue_setup(struct vb2_queue *vq,
 		unsigned int *nbuffers, unsigned int *num_planes,
-		unsigned int sizes[], void *alloc_ctxs[])
+		unsigned int sizes[], struct device *alloc_devs[])
 
 {
 	struct dt3155_priv *pd = vb2_get_drv_priv(vq);
@@ -141,7 +141,6 @@ dt3155_queue_setup(struct vb2_queue *vq,
 
 	if (vq->num_buffers + *nbuffers < 2)
 		*nbuffers = 2 - vq->num_buffers;
-	alloc_ctxs[0] = pd->alloc_ctx;
 	if (*num_planes)
 		return sizes[0] < size ? -EINVAL : 0;
 	*num_planes = 1;
@@ -544,21 +543,16 @@ static int dt3155_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pd->vidq.min_buffers_needed = 2;
 	pd->vidq.gfp_flags = GFP_DMA32;
 	pd->vidq.lock = &pd->mux; /* for locking v4l2_file_operations */
+	pd->vidq.dev = &pdev->dev;
 	pd->vdev.queue = &pd->vidq;
 	err = vb2_queue_init(&pd->vidq);
 	if (err < 0)
 		goto err_v4l2_dev_unreg;
-	pd->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(pd->alloc_ctx)) {
-		dev_err(&pdev->dev, "Can't allocate buffer context");
-		err = PTR_ERR(pd->alloc_ctx);
-		goto err_v4l2_dev_unreg;
-	}
 	spin_lock_init(&pd->lock);
 	pd->config = ACQ_MODE_EVEN;
 	err = pci_enable_device(pdev);
 	if (err)
-		goto err_free_ctx;
+		goto err_v4l2_dev_unreg;
 	err = pci_request_region(pdev, 0, pci_name(pdev));
 	if (err)
 		goto err_pci_disable;
@@ -588,8 +582,6 @@ err_free_reg:
 	pci_release_region(pdev, 0);
 err_pci_disable:
 	pci_disable_device(pdev);
-err_free_ctx:
-	vb2_dma_contig_cleanup_ctx(pd->alloc_ctx);
 err_v4l2_dev_unreg:
 	v4l2_device_unregister(&pd->v4l2_dev);
 	return err;
@@ -608,7 +600,6 @@ static void dt3155_remove(struct pci_dev *pdev)
 	pci_iounmap(pdev, pd->regs);
 	pci_release_region(pdev, 0);
 	pci_disable_device(pdev);
-	vb2_dma_contig_cleanup_ctx(pd->alloc_ctx);
 }
 
 static const struct pci_device_id pci_ids[] = {
diff --git a/drivers/media/pci/dt3155/dt3155.h b/drivers/media/pci/dt3155/dt3155.h
index b3531e0bc733..39442e58919d 100644
--- a/drivers/media/pci/dt3155/dt3155.h
+++ b/drivers/media/pci/dt3155/dt3155.h
@@ -161,7 +161,6 @@
  * @vdev:		video_device structure
  * @pdev:		pointer to pci_dev structure
  * @vidq:		vb2_queue structure
- * @alloc_ctx:		dma_contig allocation context
  * @curr_buf:		pointer to curren buffer
  * @mux:		mutex to protect the instance
  * @dmaq:		queue for dma buffers
@@ -181,7 +180,6 @@ struct dt3155_priv {
 	struct video_device vdev;
 	struct pci_dev *pdev;
 	struct vb2_queue vidq;
-	struct vb2_alloc_ctx *alloc_ctx;
 	struct vb2_v4l2_buffer *curr_buf;
 	struct mutex mux;
 	struct list_head dmaq;
diff --git a/drivers/media/pci/ivtv/ivtv-alsa-mixer.c b/drivers/media/pci/ivtv/ivtv-alsa-mixer.c
index 33ec05b09af3..79b24bde4a39 100644
--- a/drivers/media/pci/ivtv/ivtv-alsa-mixer.c
+++ b/drivers/media/pci/ivtv/ivtv-alsa-mixer.c
@@ -93,7 +93,7 @@ static int snd_ivtv_mixer_tv_vol_get(struct snd_kcontrol *kctl,
 	vctrl.value = dB_to_cx25840_vol(uctl->value.integer.value[0]);
 
 	snd_ivtv_lock(itvsc);
-	ret = v4l2_subdev_call(itv->sd_audio, core, g_ctrl, &vctrl);
+	ret = v4l2_g_ctrl(itv->sd_audio->ctrl_handler, &vctrl);
 	snd_ivtv_unlock(itvsc);
 
 	if (!ret)
@@ -115,14 +115,14 @@ static int snd_ivtv_mixer_tv_vol_put(struct snd_kcontrol *kctl,
 	snd_ivtv_lock(itvsc);
 
 	/* Fetch current state */
-	ret = v4l2_subdev_call(itv->sd_audio, core, g_ctrl, &vctrl);
+	ret = v4l2_g_ctrl(itv->sd_audio->ctrl_handler, &vctrl);
 
 	if (ret ||
 	    (cx25840_vol_to_dB(vctrl.value) != uctl->value.integer.value[0])) {
 
 		/* Set, if needed */
 		vctrl.value = dB_to_cx25840_vol(uctl->value.integer.value[0]);
-		ret = v4l2_subdev_call(itv->sd_audio, core, s_ctrl, &vctrl);
+		ret = v4l2_s_ctrl(itv->sd_audio->ctrl_handler, &vctrl);
 		if (!ret)
 			ret = 1; /* Indicate control was changed w/o error */
 	}
diff --git a/drivers/media/pci/netup_unidvb/Kconfig b/drivers/media/pci/netup_unidvb/Kconfig
index f277b0b10c2d..0ad37714c7fd 100644
--- a/drivers/media/pci/netup_unidvb/Kconfig
+++ b/drivers/media/pci/netup_unidvb/Kconfig
@@ -5,8 +5,13 @@ config DVB_NETUP_UNIDVB
     select VIDEOBUF2_VMALLOC
 	select DVB_HORUS3A if MEDIA_SUBDRV_AUTOSELECT
 	select DVB_ASCOT2E if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_HELENE if MEDIA_SUBDRV_AUTOSELECT
 	select DVB_LNBH25 if MEDIA_SUBDRV_AUTOSELECT
 	select DVB_CXD2841ER if MEDIA_SUBDRV_AUTOSELECT
 	---help---
 	  Support for NetUP PCI express Universal DVB card.
-
+     help
+	Say Y when you want to support NetUP Dual Universal DVB card
+	Card can receive two independent streams in following standards:
+		DVB-S/S2, T/T2, C/C2
+	Two CI slots available for CAM modules.
diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb.h b/drivers/media/pci/netup_unidvb/netup_unidvb.h
index a67b28111905..39b08ecda1fc 100644
--- a/drivers/media/pci/netup_unidvb/netup_unidvb.h
+++ b/drivers/media/pci/netup_unidvb/netup_unidvb.h
@@ -50,6 +50,15 @@
 #define NETUP_UNIDVB_IRQ_CAM0	(1 << 11)
 #define NETUP_UNIDVB_IRQ_CAM1	(1 << 12)
 
+/* NetUP Universal DVB card hardware revisions and it's PCI device id's:
+ * 1.3 - CXD2841ER demod, ASCOT2E and HORUS3A tuners
+ * 1.4 - CXD2854ER demod, HELENE tuner
+*/
+enum netup_hw_rev {
+	NETUP_HW_REV_1_3 = 0x18F6,
+	NETUP_HW_REV_1_4 = 0x18F7
+};
+
 struct netup_dma {
 	u8			num;
 	spinlock_t		lock;
@@ -119,6 +128,7 @@ struct netup_unidvb_dev {
 	struct netup_dma		dma[2];
 	struct netup_ci_state		ci[2];
 	struct netup_spi		*spi;
+	enum netup_hw_rev		rev;
 };
 
 int netup_i2c_register(struct netup_unidvb_dev *ndev);
diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_ci.c b/drivers/media/pci/netup_unidvb/netup_unidvb_ci.c
index f46ffac66ee9..f535270c2116 100644
--- a/drivers/media/pci/netup_unidvb/netup_unidvb_ci.c
+++ b/drivers/media/pci/netup_unidvb/netup_unidvb_ci.c
@@ -147,7 +147,7 @@ static int netup_unidvb_ci_read_attribute_mem(struct dvb_ca_en50221 *en50221,
 {
 	struct netup_ci_state *state = en50221->data;
 	struct netup_unidvb_dev *dev = state->dev;
-	u8 val = *((u8 __force *)state->membase8_io + addr);
+	u8 val = *((u8 __force *)state->membase8_config + addr);
 
 	dev_dbg(&dev->pci_dev->dev,
 		"%s(): addr=0x%x val=0x%x\n", __func__, addr, val);
@@ -162,7 +162,7 @@ static int netup_unidvb_ci_write_attribute_mem(struct dvb_ca_en50221 *en50221,
 
 	dev_dbg(&dev->pci_dev->dev,
 		"%s(): addr=0x%x data=0x%x\n", __func__, addr, data);
-	*((u8 __force *)state->membase8_io + addr) = data;
+	*((u8 __force *)state->membase8_config + addr) = data;
 	return 0;
 }
 
diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
index 2b667b315913..ac547cb84de8 100644
--- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
+++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
@@ -34,6 +34,7 @@
 #include "cxd2841er.h"
 #include "horus3a.h"
 #include "ascot2e.h"
+#include "helene.h"
 #include "lnbh25.h"
 
 static int spi_enable;
@@ -120,7 +121,8 @@ static int netup_unidvb_tuner_ctrl(void *priv, int is_dvb_tc);
 static void netup_unidvb_queue_cleanup(struct netup_dma *dma);
 
 static struct cxd2841er_config demod_config = {
-	.i2c_addr = 0xc8
+	.i2c_addr = 0xc8,
+	.xtal = SONY_XTAL_24000
 };
 
 static struct horus3a_config horus3a_conf = {
@@ -134,6 +136,12 @@ static struct ascot2e_config ascot2e_conf = {
 	.set_tuner_callback = netup_unidvb_tuner_ctrl
 };
 
+static struct helene_config helene_conf = {
+	.i2c_address = 0xc0,
+	.xtal = SONY_HELENE_XTAL_24000,
+	.set_tuner_callback = netup_unidvb_tuner_ctrl
+};
+
 static struct lnbh25_config lnbh25_conf = {
 	.i2c_address = 0x10,
 	.data2_config = LNBH25_TEN | LNBH25_EXTM
@@ -152,6 +160,11 @@ static int netup_unidvb_tuner_ctrl(void *priv, int is_dvb_tc)
 		__func__, dma->num, is_dvb_tc);
 	reg = readb(ndev->bmmio0 + GPIO_REG_IO);
 	mask = (dma->num == 0) ? GPIO_RFA_CTL : GPIO_RFB_CTL;
+
+	/* inverted tuner control in hw rev. 1.4 */
+	if (ndev->rev == NETUP_HW_REV_1_4)
+		is_dvb_tc = !is_dvb_tc;
+
 	if (!is_dvb_tc)
 		reg |= mask;
 	else
@@ -280,7 +293,7 @@ static int netup_unidvb_queue_setup(struct vb2_queue *vq,
 				    unsigned int *nbuffers,
 				    unsigned int *nplanes,
 				    unsigned int sizes[],
-				    void *alloc_ctxs[])
+				    struct device *alloc_devs[])
 {
 	struct netup_dma *dma = vb2_get_drv_priv(vq);
 
@@ -372,7 +385,15 @@ static int netup_unidvb_queue_init(struct netup_dma *dma,
 static int netup_unidvb_dvb_init(struct netup_unidvb_dev *ndev,
 				 int num)
 {
-	struct vb2_dvb_frontend *fe0, *fe1, *fe2;
+	int fe_count = 2;
+	int i = 0;
+	struct vb2_dvb_frontend *fes[2];
+	u8 fe_name[32];
+
+	if (ndev->rev == NETUP_HW_REV_1_3)
+		demod_config.xtal = SONY_XTAL_20500;
+	else
+		demod_config.xtal = SONY_XTAL_24000;
 
 	if (num < 0 || num > 1) {
 		dev_dbg(&ndev->pci_dev->dev,
@@ -381,84 +402,96 @@ static int netup_unidvb_dvb_init(struct netup_unidvb_dev *ndev,
 	}
 	mutex_init(&ndev->frontends[num].lock);
 	INIT_LIST_HEAD(&ndev->frontends[num].felist);
-	if (vb2_dvb_alloc_frontend(&ndev->frontends[num], 1) == NULL ||
-		vb2_dvb_alloc_frontend(
-			&ndev->frontends[num], 2) == NULL ||
-		vb2_dvb_alloc_frontend(
-			&ndev->frontends[num], 3) == NULL) {
-		dev_dbg(&ndev->pci_dev->dev,
-			"%s(): unable to allocate vb2_dvb_frontend\n",
-			__func__);
-		return -ENOMEM;
+
+	for (i = 0; i < fe_count; i++) {
+		if (vb2_dvb_alloc_frontend(&ndev->frontends[num], i+1)
+				== NULL) {
+			dev_err(&ndev->pci_dev->dev,
+					"%s(): unable to allocate vb2_dvb_frontend\n",
+					__func__);
+			return -ENOMEM;
+		}
 	}
-	fe0 = vb2_dvb_get_frontend(&ndev->frontends[num], 1);
-	fe1 = vb2_dvb_get_frontend(&ndev->frontends[num], 2);
-	fe2 = vb2_dvb_get_frontend(&ndev->frontends[num], 3);
-	if (fe0 == NULL || fe1 == NULL || fe2 == NULL) {
-		dev_dbg(&ndev->pci_dev->dev,
-			"%s(): frontends has not been allocated\n", __func__);
-		return -EINVAL;
+
+	for (i = 0; i < fe_count; i++) {
+		fes[i] = vb2_dvb_get_frontend(&ndev->frontends[num], i+1);
+		if (fes[i] == NULL) {
+			dev_err(&ndev->pci_dev->dev,
+				"%s(): frontends has not been allocated\n",
+				__func__);
+			return -EINVAL;
+		}
+	}
+
+	for (i = 0; i < fe_count; i++) {
+		netup_unidvb_queue_init(&ndev->dma[num], &fes[i]->dvb.dvbq);
+		snprintf(fe_name, sizeof(fe_name), "netup_fe%d", i);
+		fes[i]->dvb.name = fe_name;
 	}
-	netup_unidvb_queue_init(&ndev->dma[num], &fe0->dvb.dvbq);
-	netup_unidvb_queue_init(&ndev->dma[num], &fe1->dvb.dvbq);
-	netup_unidvb_queue_init(&ndev->dma[num], &fe2->dvb.dvbq);
-	fe0->dvb.name = "netup_fe0";
-	fe1->dvb.name = "netup_fe1";
-	fe2->dvb.name = "netup_fe2";
-	fe0->dvb.frontend = dvb_attach(cxd2841er_attach_s,
+
+	fes[0]->dvb.frontend = dvb_attach(cxd2841er_attach_s,
 		&demod_config, &ndev->i2c[num].adap);
-	if (fe0->dvb.frontend == NULL) {
+	if (fes[0]->dvb.frontend == NULL) {
 		dev_dbg(&ndev->pci_dev->dev,
 			"%s(): unable to attach DVB-S/S2 frontend\n",
 			__func__);
 		goto frontend_detach;
 	}
-	horus3a_conf.set_tuner_priv = &ndev->dma[num];
-	if (!dvb_attach(horus3a_attach, fe0->dvb.frontend,
-			&horus3a_conf, &ndev->i2c[num].adap)) {
-		dev_dbg(&ndev->pci_dev->dev,
-			"%s(): unable to attach DVB-S/S2 tuner frontend\n",
-			__func__);
-		goto frontend_detach;
+
+	if (ndev->rev == NETUP_HW_REV_1_3) {
+		horus3a_conf.set_tuner_priv = &ndev->dma[num];
+		if (!dvb_attach(horus3a_attach, fes[0]->dvb.frontend,
+					&horus3a_conf, &ndev->i2c[num].adap)) {
+			dev_dbg(&ndev->pci_dev->dev,
+					"%s(): unable to attach HORUS3A DVB-S/S2 tuner frontend\n",
+					__func__);
+			goto frontend_detach;
+		}
+	} else {
+		helene_conf.set_tuner_priv = &ndev->dma[num];
+		if (!dvb_attach(helene_attach_s, fes[0]->dvb.frontend,
+					&helene_conf, &ndev->i2c[num].adap)) {
+			dev_err(&ndev->pci_dev->dev,
+					"%s(): unable to attach HELENE DVB-S/S2 tuner frontend\n",
+					__func__);
+			goto frontend_detach;
+		}
 	}
-	if (!dvb_attach(lnbh25_attach, fe0->dvb.frontend,
+
+	if (!dvb_attach(lnbh25_attach, fes[0]->dvb.frontend,
 			&lnbh25_conf, &ndev->i2c[num].adap)) {
 		dev_dbg(&ndev->pci_dev->dev,
 			"%s(): unable to attach SEC frontend\n", __func__);
 		goto frontend_detach;
 	}
+
 	/* DVB-T/T2 frontend */
-	fe1->dvb.frontend = dvb_attach(cxd2841er_attach_t,
+	fes[1]->dvb.frontend = dvb_attach(cxd2841er_attach_t_c,
 		&demod_config, &ndev->i2c[num].adap);
-	if (fe1->dvb.frontend == NULL) {
-		dev_dbg(&ndev->pci_dev->dev,
-			"%s(): unable to attach DVB-T frontend\n", __func__);
-		goto frontend_detach;
-	}
-	fe1->dvb.frontend->id = 1;
-	ascot2e_conf.set_tuner_priv = &ndev->dma[num];
-	if (!dvb_attach(ascot2e_attach, fe1->dvb.frontend,
-			&ascot2e_conf, &ndev->i2c[num].adap)) {
-		dev_dbg(&ndev->pci_dev->dev,
-			"%s(): unable to attach DVB-T tuner frontend\n",
-			__func__);
-		goto frontend_detach;
-	}
-	/* DVB-C/C2 frontend */
-	fe2->dvb.frontend = dvb_attach(cxd2841er_attach_c,
-				&demod_config, &ndev->i2c[num].adap);
-	if (fe2->dvb.frontend == NULL) {
+	if (fes[1]->dvb.frontend == NULL) {
 		dev_dbg(&ndev->pci_dev->dev,
-			"%s(): unable to attach DVB-C frontend\n", __func__);
+			"%s(): unable to attach Ter frontend\n", __func__);
 		goto frontend_detach;
 	}
-	fe2->dvb.frontend->id = 2;
-	if (!dvb_attach(ascot2e_attach, fe2->dvb.frontend,
-			&ascot2e_conf, &ndev->i2c[num].adap)) {
-		dev_dbg(&ndev->pci_dev->dev,
-			"%s(): unable to attach DVB-T/C tuner frontend\n",
-			__func__);
-		goto frontend_detach;
+	fes[1]->dvb.frontend->id = 1;
+	if (ndev->rev == NETUP_HW_REV_1_3) {
+		ascot2e_conf.set_tuner_priv = &ndev->dma[num];
+		if (!dvb_attach(ascot2e_attach, fes[1]->dvb.frontend,
+					&ascot2e_conf, &ndev->i2c[num].adap)) {
+			dev_dbg(&ndev->pci_dev->dev,
+					"%s(): unable to attach Ter tuner frontend\n",
+					__func__);
+			goto frontend_detach;
+		}
+	} else {
+		helene_conf.set_tuner_priv = &ndev->dma[num];
+		if (!dvb_attach(helene_attach, fes[1]->dvb.frontend,
+					&helene_conf, &ndev->i2c[num].adap)) {
+			dev_err(&ndev->pci_dev->dev,
+					"%s(): unable to attach HELENE Ter tuner frontend\n",
+					__func__);
+			goto frontend_detach;
+		}
 	}
 
 	if (vb2_dvb_register_bus(&ndev->frontends[num],
@@ -730,7 +763,7 @@ static int netup_unidvb_request_mmio(struct pci_dev *pci_dev)
 static int netup_unidvb_request_modules(struct device *dev)
 {
 	static const char * const modules[] = {
-		"lnbh25", "ascot2e", "horus3a", "cxd2841er", NULL
+		"lnbh25", "ascot2e", "horus3a", "cxd2841er", "helene", NULL
 	};
 	const char * const *curr_mod = modules;
 	int err;
@@ -774,6 +807,16 @@ static int netup_unidvb_initdev(struct pci_dev *pci_dev,
 	if (!ndev)
 		goto dev_alloc_err;
 
+	/* detect hardware revision */
+	if (pci_dev->device == NETUP_HW_REV_1_3)
+		ndev->rev = NETUP_HW_REV_1_3;
+	else
+		ndev->rev = NETUP_HW_REV_1_4;
+
+	dev_info(&pci_dev->dev,
+		"%s(): board (0x%x) hardware revision 0x%x\n",
+		__func__, pci_dev->device, ndev->rev);
+
 	ndev->old_fw = old_firmware;
 	ndev->wq = create_singlethread_workqueue(NETUP_UNIDVB_NAME);
 	if (!ndev->wq) {
@@ -932,7 +975,7 @@ wq_create_err:
 	kfree(ndev);
 dev_alloc_err:
 	dev_err(&pci_dev->dev,
-		"%s(): failed to initizalize device\n", __func__);
+		"%s(): failed to initialize device\n", __func__);
 	return -EIO;
 }
 
@@ -972,7 +1015,8 @@ static void netup_unidvb_finidev(struct pci_dev *pci_dev)
 
 
 static struct pci_device_id netup_unidvb_pci_tbl[] = {
-	{ PCI_DEVICE(0x1b55, 0x18f6) },
+	{ PCI_DEVICE(0x1b55, 0x18f6) }, /* hw rev. 1.3 */
+	{ PCI_DEVICE(0x1b55, 0x18f7) }, /* hw rev. 1.4 */
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, netup_unidvb_pci_tbl);
diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c
index c0e1780ec831..ffb66a9ae23e 100644
--- a/drivers/media/pci/saa7134/saa7134-core.c
+++ b/drivers/media/pci/saa7134/saa7134-core.c
@@ -1164,18 +1164,13 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	saa7134_board_init1(dev);
 	saa7134_hwinit1(dev);
 
-	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		err = PTR_ERR(dev->alloc_ctx);
-		goto fail3;
-	}
 	/* get irq */
 	err = request_irq(pci_dev->irq, saa7134_irq,
 			  IRQF_SHARED, dev->name, dev);
 	if (err < 0) {
 		pr_err("%s: can't get IRQ %d\n",
 		       dev->name,pci_dev->irq);
-		goto fail4;
+		goto fail3;
 	}
 
 	/* wait a bit, register i2c bus */
@@ -1233,7 +1228,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	if (err < 0) {
 		pr_info("%s: can't register video device\n",
 		       dev->name);
-		goto fail5;
+		goto fail4;
 	}
 	pr_info("%s: registered device %s [v4l2]\n",
 	       dev->name, video_device_node_name(dev->video_dev));
@@ -1246,7 +1241,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	err = video_register_device(dev->vbi_dev,VFL_TYPE_VBI,
 				    vbi_nr[dev->nr]);
 	if (err < 0)
-		goto fail5;
+		goto fail4;
 	pr_info("%s: registered device %s\n",
 	       dev->name, video_device_node_name(dev->vbi_dev));
 
@@ -1257,7 +1252,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 		err = video_register_device(dev->radio_dev,VFL_TYPE_RADIO,
 					    radio_nr[dev->nr]);
 		if (err < 0)
-			goto fail5;
+			goto fail4;
 		pr_info("%s: registered device %s\n",
 		       dev->name, video_device_node_name(dev->radio_dev));
 	}
@@ -1268,7 +1263,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	err = v4l2_mc_create_media_graph(dev->media_dev);
 	if (err) {
 		pr_err("failed to create media graph\n");
-		goto fail5;
+		goto fail4;
 	}
 #endif
 	/* everything worked */
@@ -1287,17 +1282,15 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 #ifdef CONFIG_MEDIA_CONTROLLER
 	err = media_device_register(dev->media_dev);
 	if (err)
-		goto fail5;
+		goto fail4;
 #endif
 
 	return 0;
 
- fail5:
+ fail4:
 	saa7134_unregister_video(dev);
 	saa7134_i2c_unregister(dev);
 	free_irq(pci_dev->irq, dev);
- fail4:
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
  fail3:
 	saa7134_hwfini(dev);
 	iounmap(dev->lmmio);
@@ -1367,7 +1360,6 @@ static void saa7134_finidev(struct pci_dev *pci_dev)
 
 	/* release resources */
 	free_irq(pci_dev->irq, dev);
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	iounmap(dev->lmmio);
 	release_mem_region(pci_resource_start(pci_dev,0),
 			   pci_resource_len(pci_dev,0));
diff --git a/drivers/media/pci/saa7134/saa7134-ts.c b/drivers/media/pci/saa7134/saa7134-ts.c
index 0584a2adbe99..7eaf36a41db9 100644
--- a/drivers/media/pci/saa7134/saa7134-ts.c
+++ b/drivers/media/pci/saa7134/saa7134-ts.c
@@ -118,7 +118,7 @@ EXPORT_SYMBOL_GPL(saa7134_ts_buffer_prepare);
 
 int saa7134_ts_queue_setup(struct vb2_queue *q,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct saa7134_dmaqueue *dmaq = q->drv_priv;
 	struct saa7134_dev *dev = dmaq->dev;
@@ -131,7 +131,6 @@ int saa7134_ts_queue_setup(struct vb2_queue *q,
 		*nbuffers = 3;
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(saa7134_ts_queue_setup);
diff --git a/drivers/media/pci/saa7134/saa7134-vbi.c b/drivers/media/pci/saa7134/saa7134-vbi.c
index e76da37c4a8a..cf9a31e0a390 100644
--- a/drivers/media/pci/saa7134/saa7134-vbi.c
+++ b/drivers/media/pci/saa7134/saa7134-vbi.c
@@ -140,7 +140,7 @@ static int buffer_prepare(struct vb2_buffer *vb2)
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct saa7134_dmaqueue *dmaq = q->drv_priv;
 	struct saa7134_dev *dev = dmaq->dev;
@@ -155,7 +155,6 @@ static int queue_setup(struct vb2_queue *q,
 	*nbuffers = saa7134_buffer_count(size, *nbuffers);
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c
index ffa39543eb65..8a6ebd087889 100644
--- a/drivers/media/pci/saa7134/saa7134-video.c
+++ b/drivers/media/pci/saa7134/saa7134-video.c
@@ -963,7 +963,7 @@ static int buffer_prepare(struct vb2_buffer *vb2)
 
 static int queue_setup(struct vb2_queue *q,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct saa7134_dmaqueue *dmaq = q->drv_priv;
 	struct saa7134_dev *dev = dmaq->dev;
@@ -980,7 +980,6 @@ static int queue_setup(struct vb2_queue *q,
 	*nbuffers = saa7134_buffer_count(size, *nbuffers);
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = dev->alloc_ctx;
 
 	saa7134_enable_analog_tuner(dev);
 
@@ -2173,6 +2172,7 @@ int saa7134_video_init1(struct saa7134_dev *dev)
 	q->buf_struct_size = sizeof(struct saa7134_buf);
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &dev->lock;
+	q->dev = &dev->pci->dev;
 	ret = vb2_queue_init(q);
 	if (ret)
 		return ret;
@@ -2191,6 +2191,7 @@ int saa7134_video_init1(struct saa7134_dev *dev)
 	q->buf_struct_size = sizeof(struct saa7134_buf);
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &dev->lock;
+	q->dev = &dev->pci->dev;
 	ret = vb2_queue_init(q);
 	if (ret)
 		return ret;
diff --git a/drivers/media/pci/saa7134/saa7134.h b/drivers/media/pci/saa7134/saa7134.h
index 69a9bbf22d4d..3849083526a7 100644
--- a/drivers/media/pci/saa7134/saa7134.h
+++ b/drivers/media/pci/saa7134/saa7134.h
@@ -610,7 +610,6 @@ struct saa7134_dev {
 
 
 	/* video+ts+vbi capture */
-	void			   *alloc_ctx;
 	struct saa7134_dmaqueue    video_q;
 	struct vb2_queue           video_vbq;
 	struct saa7134_dmaqueue    vbi_q;
@@ -854,7 +853,7 @@ int saa7134_ts_buffer_init(struct vb2_buffer *vb2);
 int saa7134_ts_buffer_prepare(struct vb2_buffer *vb2);
 int saa7134_ts_queue_setup(struct vb2_queue *q,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[]);
+			   unsigned int sizes[], struct device *alloc_devs[]);
 int saa7134_ts_start_streaming(struct vb2_queue *vq, unsigned int count);
 void saa7134_ts_stop_streaming(struct vb2_queue *vq);
 
diff --git a/drivers/media/pci/saa7164/saa7164-encoder.c b/drivers/media/pci/saa7164/saa7164-encoder.c
index 1b184c39ba97..32a353d162e7 100644
--- a/drivers/media/pci/saa7164/saa7164-encoder.c
+++ b/drivers/media/pci/saa7164/saa7164-encoder.c
@@ -1022,8 +1022,7 @@ int saa7164_encoder_register(struct saa7164_port *port)
 
 	dprintk(DBGLVL_ENC, "%s()\n", __func__);
 
-	if (port->type != SAA7164_MPEG_ENCODER)
-		BUG();
+	BUG_ON(port->type != SAA7164_MPEG_ENCODER);
 
 	/* Sanity check that the PCI configuration space is active */
 	if (port->hwcfg.BARLocation == 0) {
@@ -1151,8 +1150,7 @@ void saa7164_encoder_unregister(struct saa7164_port *port)
 
 	dprintk(DBGLVL_ENC, "%s(port=%d)\n", __func__, port->nr);
 
-	if (port->type != SAA7164_MPEG_ENCODER)
-		BUG();
+	BUG_ON(port->type != SAA7164_MPEG_ENCODER);
 
 	if (port->v4l_device) {
 		if (port->v4l_device->minor != -1)
diff --git a/drivers/media/pci/saa7164/saa7164.h b/drivers/media/pci/saa7164/saa7164.h
index 8337524bfb8c..97411b0384c1 100644
--- a/drivers/media/pci/saa7164/saa7164.h
+++ b/drivers/media/pci/saa7164/saa7164.h
@@ -263,10 +263,6 @@ struct saa7164_i2c {
 	u32				i2c_rc;
 };
 
-struct saa7164_ctrl {
-	struct v4l2_queryctrl v;
-};
-
 struct saa7164_tvnorm {
 	char		*name;
 	v4l2_std_id	id;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
index 67a14c41c227..399164314c28 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
@@ -33,7 +33,7 @@
 #include "solo6x10-jpeg.h"
 
 #define MIN_VID_BUFFERS		2
-#define FRAME_BUF_SIZE		(196 * 1024)
+#define FRAME_BUF_SIZE		(400 * 1024)
 #define MP4_QS			16
 #define DMA_ALIGN		4096
 
@@ -664,12 +664,9 @@ static int solo_ring_thread(void *data)
 static int solo_enc_queue_setup(struct vb2_queue *q,
 				unsigned int *num_buffers,
 				unsigned int *num_planes, unsigned int sizes[],
-				void *alloc_ctxs[])
+				struct device *alloc_devs[])
 {
-	struct solo_enc_dev *solo_enc = vb2_get_drv_priv(q);
-
 	sizes[0] = FRAME_BUF_SIZE;
-	alloc_ctxs[0] = solo_enc->alloc_ctx;
 	*num_planes = 1;
 
 	if (*num_buffers < MIN_VID_BUFFERS)
@@ -1239,11 +1236,6 @@ static struct solo_enc_dev *solo_enc_alloc(struct solo_dev *solo_dev,
 		return ERR_PTR(-ENOMEM);
 
 	hdl = &solo_enc->hdl;
-	solo_enc->alloc_ctx = vb2_dma_sg_init_ctx(&solo_dev->pdev->dev);
-	if (IS_ERR(solo_enc->alloc_ctx)) {
-		ret = PTR_ERR(solo_enc->alloc_ctx);
-		goto hdl_free;
-	}
 	v4l2_ctrl_handler_init(hdl, 10);
 	v4l2_ctrl_new_std(hdl, &solo_ctrl_ops,
 			V4L2_CID_BRIGHTNESS, 0, 255, 1, 128);
@@ -1299,6 +1291,7 @@ static struct solo_enc_dev *solo_enc_alloc(struct solo_dev *solo_dev,
 	solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
 	solo_enc->vidq.lock = &solo_enc->lock;
+	solo_enc->vidq.dev = &solo_dev->pdev->dev;
 	ret = vb2_queue_init(&solo_enc->vidq);
 	if (ret)
 		goto hdl_free;
@@ -1347,7 +1340,6 @@ pci_free:
 			solo_enc->desc_items, solo_enc->desc_dma);
 hdl_free:
 	v4l2_ctrl_handler_free(hdl);
-	vb2_dma_sg_cleanup_ctx(solo_enc->alloc_ctx);
 	kfree(solo_enc);
 	return ERR_PTR(ret);
 }
@@ -1362,7 +1354,6 @@ static void solo_enc_free(struct solo_enc_dev *solo_enc)
 			solo_enc->desc_items, solo_enc->desc_dma);
 	video_unregister_device(solo_enc->vfd);
 	v4l2_ctrl_handler_free(&solo_enc->hdl);
-	vb2_dma_sg_cleanup_ctx(solo_enc->alloc_ctx);
 	kfree(solo_enc);
 }
 
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2.c b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
index 721ff5320de7..b4be47969b6b 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
@@ -315,12 +315,11 @@ static void solo_stop_thread(struct solo_dev *solo_dev)
 
 static int solo_queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct solo_dev *solo_dev = vb2_get_drv_priv(q);
 
 	sizes[0] = solo_image_size(solo_dev);
-	alloc_ctxs[0] = solo_dev->alloc_ctx;
 	*num_planes = 1;
 
 	if (*num_buffers < MIN_VID_BUFFERS)
@@ -386,26 +385,24 @@ static int solo_querycap(struct file *file, void  *priv,
 static int solo_enum_ext_input(struct solo_dev *solo_dev,
 			       struct v4l2_input *input)
 {
-	static const char * const dispnames_1[] = { "4UP" };
-	static const char * const dispnames_2[] = { "4UP-1", "4UP-2" };
-	static const char * const dispnames_5[] = {
-		"4UP-1", "4UP-2", "4UP-3", "4UP-4", "16UP"
-	};
-	const char * const *dispnames;
+	int ext = input->index - solo_dev->nr_chans;
+	unsigned int nup, first;
 
-	if (input->index >= (solo_dev->nr_chans + solo_dev->nr_ext))
+	if (ext >= solo_dev->nr_ext)
 		return -EINVAL;
 
-	if (solo_dev->nr_ext == 5)
-		dispnames = dispnames_5;
-	else if (solo_dev->nr_ext == 2)
-		dispnames = dispnames_2;
-	else
-		dispnames = dispnames_1;
-
-	snprintf(input->name, sizeof(input->name), "Multi %s",
-		 dispnames[input->index - solo_dev->nr_chans]);
-
+	nup   = (ext == 4) ? 16 : 4;
+	first = (ext & 3) << 2; /* first channel in the n-up */
+	snprintf(input->name, sizeof(input->name),
+		 "Multi %d-up (cameras %d-%d)",
+		 nup, first + 1, first + nup);
+	/* Possible outputs:
+	 *  Multi 4-up (cameras 1-4)
+	 *  Multi 4-up (cameras 5-8)
+	 *  Multi 4-up (cameras 9-12)
+	 *  Multi 4-up (cameras 13-16)
+	 *  Multi 16-up (cameras 1-16)
+	 */
 	return 0;
 }
 
@@ -681,16 +678,11 @@ int solo_v4l2_init(struct solo_dev *solo_dev, unsigned nr)
 	solo_dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
 	solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
 	solo_dev->vidq.lock = &solo_dev->lock;
+	solo_dev->vidq.dev = &solo_dev->pdev->dev;
 	ret = vb2_queue_init(&solo_dev->vidq);
 	if (ret < 0)
 		goto fail;
 
-	solo_dev->alloc_ctx = vb2_dma_contig_init_ctx(&solo_dev->pdev->dev);
-	if (IS_ERR(solo_dev->alloc_ctx)) {
-		dev_err(&solo_dev->pdev->dev, "Can't allocate buffer context");
-		return PTR_ERR(solo_dev->alloc_ctx);
-	}
-
 	/* Cycle all the channels and clear */
 	for (i = 0; i < solo_dev->nr_chans; i++) {
 		solo_v4l2_set_ch(solo_dev, i);
@@ -718,7 +710,6 @@ int solo_v4l2_init(struct solo_dev *solo_dev, unsigned nr)
 
 fail:
 	video_device_release(solo_dev->vfd);
-	vb2_dma_contig_cleanup_ctx(solo_dev->alloc_ctx);
 	v4l2_ctrl_handler_free(&solo_dev->disp_hdl);
 	solo_dev->vfd = NULL;
 	return ret;
@@ -730,7 +721,6 @@ void solo_v4l2_exit(struct solo_dev *solo_dev)
 		return;
 
 	video_unregister_device(solo_dev->vfd);
-	vb2_dma_contig_cleanup_ctx(solo_dev->alloc_ctx);
 	v4l2_ctrl_handler_free(&solo_dev->disp_hdl);
 	solo_dev->vfd = NULL;
 }
diff --git a/drivers/media/pci/solo6x10/solo6x10.h b/drivers/media/pci/solo6x10/solo6x10.h
index 4ab6586c0467..5bd498735a66 100644
--- a/drivers/media/pci/solo6x10/solo6x10.h
+++ b/drivers/media/pci/solo6x10/solo6x10.h
@@ -178,7 +178,6 @@ struct solo_enc_dev {
 	u32			sequence;
 	struct vb2_queue	vidq;
 	struct list_head	vidq_active;
-	void			*alloc_ctx;
 	int			desc_count;
 	int			desc_nelts;
 	struct solo_p2m_desc	*desc_items;
@@ -269,7 +268,6 @@ struct solo_dev {
 
 	/* Buffer handling */
 	struct vb2_queue	vidq;
-	struct vb2_alloc_ctx	*alloc_ctx;
 	u32			sequence;
 	struct task_struct      *kthread;
 	struct mutex		lock;
diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c
index 1fc195f89686..aeb2b4e2db35 100644
--- a/drivers/media/pci/sta2x11/sta2x11_vip.c
+++ b/drivers/media/pci/sta2x11/sta2x11_vip.c
@@ -111,7 +111,6 @@ static inline struct vip_buffer *to_vip_buffer(struct vb2_v4l2_buffer *vb2)
  * @input: input line for video signal ( 0 or 1 )
  * @disabled: Device is in power down state
  * @slock: for excluse acces of registers
- * @alloc_ctx: context for videobuf2
  * @vb_vidq: queue maintained by videobuf2 layer
  * @buffer_list: list of buffer in use
  * @sequence: sequence number of acquired buffer
@@ -141,7 +140,6 @@ struct sta2x11_vip {
 	int disabled;
 	spinlock_t slock;
 
-	struct vb2_alloc_ctx *alloc_ctx;
 	struct vb2_queue vb_vidq;
 	struct list_head buffer_list;
 	unsigned int sequence;
@@ -267,7 +265,7 @@ static void vip_active_buf_next(struct sta2x11_vip *vip)
 /* Videobuf2 Operations */
 static int queue_setup(struct vb2_queue *vq,
 		       unsigned int *nbuffers, unsigned int *nplanes,
-		       unsigned int sizes[], void *alloc_ctxs[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct sta2x11_vip *vip = vb2_get_drv_priv(vq);
 
@@ -276,7 +274,6 @@ static int queue_setup(struct vb2_queue *vq,
 
 	*nplanes = 1;
 	sizes[0] = vip->format.sizeimage;
-	alloc_ctxs[0] = vip->alloc_ctx;
 
 	vip->sequence = 0;
 	vip->active = NULL;
@@ -861,25 +858,15 @@ static int sta2x11_vip_init_buffer(struct sta2x11_vip *vip)
 	vip->vb_vidq.ops = &vip_video_qops;
 	vip->vb_vidq.mem_ops = &vb2_dma_contig_memops;
 	vip->vb_vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	vip->vb_vidq.dev = &vip->pdev->dev;
 	err = vb2_queue_init(&vip->vb_vidq);
 	if (err)
 		return err;
 	INIT_LIST_HEAD(&vip->buffer_list);
 	spin_lock_init(&vip->lock);
-
-
-	vip->alloc_ctx = vb2_dma_contig_init_ctx(&vip->pdev->dev);
-	if (IS_ERR(vip->alloc_ctx)) {
-		v4l2_err(&vip->v4l2_dev, "Can't allocate buffer context");
-		return PTR_ERR(vip->alloc_ctx);
-	}
-
 	return 0;
 }
-static void sta2x11_vip_release_buffer(struct sta2x11_vip *vip)
-{
-	vb2_dma_contig_cleanup_ctx(vip->alloc_ctx);
-}
+
 static int sta2x11_vip_init_controls(struct sta2x11_vip *vip)
 {
 	/*
@@ -1120,7 +1107,6 @@ vrelease:
 	video_unregister_device(&vip->video_dev);
 	free_irq(pdev->irq, vip);
 release_buf:
-	sta2x11_vip_release_buffer(vip);
 	pci_disable_msi(pdev);
 unmap:
 	vb2_queue_release(&vip->vb_vidq);
diff --git a/drivers/media/pci/tw68/tw68-core.c b/drivers/media/pci/tw68/tw68-core.c
index 4e77618fbb2b..8474528be91e 100644
--- a/drivers/media/pci/tw68/tw68-core.c
+++ b/drivers/media/pci/tw68/tw68-core.c
@@ -305,19 +305,13 @@ static int tw68_initdev(struct pci_dev *pci_dev,
 	/* Then do any initialisation wanted before interrupts are on */
 	tw68_hw_init1(dev);
 
-	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		err = PTR_ERR(dev->alloc_ctx);
-		goto fail3;
-	}
-
 	/* get irq */
 	err = devm_request_irq(&pci_dev->dev, pci_dev->irq, tw68_irq,
 			  IRQF_SHARED, dev->name, dev);
 	if (err < 0) {
 		pr_err("%s: can't get IRQ %d\n",
 		       dev->name, pci_dev->irq);
-		goto fail4;
+		goto fail3;
 	}
 
 	/*
@@ -331,7 +325,7 @@ static int tw68_initdev(struct pci_dev *pci_dev,
 	if (err < 0) {
 		pr_err("%s: can't register video device\n",
 		       dev->name);
-		goto fail5;
+		goto fail4;
 	}
 	tw_setl(TW68_INTMASK, dev->pci_irqmask);
 
@@ -340,10 +334,8 @@ static int tw68_initdev(struct pci_dev *pci_dev,
 
 	return 0;
 
-fail5:
-	video_unregister_device(&dev->vdev);
 fail4:
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
+	video_unregister_device(&dev->vdev);
 fail3:
 	iounmap(dev->lmmio);
 fail2:
@@ -367,7 +359,6 @@ static void tw68_finidev(struct pci_dev *pci_dev)
 	/* unregister */
 	video_unregister_device(&dev->vdev);
 	v4l2_ctrl_handler_free(&dev->hdl);
-	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 
 	/* release resources */
 	iounmap(dev->lmmio);
diff --git a/drivers/media/pci/tw68/tw68-video.c b/drivers/media/pci/tw68/tw68-video.c
index 07116a87a57b..5e8212845c87 100644
--- a/drivers/media/pci/tw68/tw68-video.c
+++ b/drivers/media/pci/tw68/tw68-video.c
@@ -378,7 +378,7 @@ static int tw68_buffer_count(unsigned int size, unsigned int count)
 
 static int tw68_queue_setup(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct tw68_dev *dev = vb2_get_drv_priv(q);
 	unsigned tot_bufs = q->num_buffers + *num_buffers;
@@ -388,7 +388,6 @@ static int tw68_queue_setup(struct vb2_queue *q,
 		tot_bufs = 2;
 	tot_bufs = tw68_buffer_count(size, tot_bufs);
 	*num_buffers = tot_bufs - q->num_buffers;
-	alloc_ctxs[0] = dev->alloc_ctx;
 	/*
 	 * We allow create_bufs, but only if the sizeimage is >= as the
 	 * current sizeimage. The tw68_buffer_count calculation becomes quite
@@ -983,6 +982,7 @@ int tw68_video_init2(struct tw68_dev *dev, int video_nr)
 	dev->vidq.buf_struct_size = sizeof(struct tw68_buf);
 	dev->vidq.lock = &dev->lock;
 	dev->vidq.min_buffers_needed = 2;
+	dev->vidq.dev = &dev->pci->dev;
 	ret = vb2_queue_init(&dev->vidq);
 	if (ret)
 		return ret;
diff --git a/drivers/media/pci/tw68/tw68.h b/drivers/media/pci/tw68/tw68.h
index 6c7dcb300f34..5585c7ee23f2 100644
--- a/drivers/media/pci/tw68/tw68.h
+++ b/drivers/media/pci/tw68/tw68.h
@@ -165,7 +165,6 @@ struct tw68_dev {
 	unsigned		field;
 	struct vb2_queue	vidq;
 	struct list_head	active;
-	void			*alloc_ctx;
 
 	/* various v4l controls */
 	const struct tw68_tvnorm *tvnorm;	/* video */
diff --git a/drivers/media/pci/tw686x/Kconfig b/drivers/media/pci/tw686x/Kconfig
index fb8536974052..34ff37712313 100644
--- a/drivers/media/pci/tw686x/Kconfig
+++ b/drivers/media/pci/tw686x/Kconfig
@@ -3,6 +3,8 @@ config VIDEO_TW686X
 	depends on PCI && VIDEO_DEV && VIDEO_V4L2 && SND
 	depends on HAS_DMA
 	select VIDEOBUF2_VMALLOC
+	select VIDEOBUF2_DMA_CONTIG
+	select VIDEOBUF2_DMA_SG
 	select SND_PCM
 	help
 	  Support for Intersil/Techwell TW686x-based frame grabber cards.
diff --git a/drivers/media/pci/tw686x/tw686x-audio.c b/drivers/media/pci/tw686x/tw686x-audio.c
index 91459ab715b2..96e444c49173 100644
--- a/drivers/media/pci/tw686x/tw686x-audio.c
+++ b/drivers/media/pci/tw686x/tw686x-audio.c
@@ -62,12 +62,22 @@ void tw686x_audio_irq(struct tw686x_dev *dev, unsigned long requests,
 		}
 		spin_unlock_irqrestore(&ac->lock, flags);
 
+		if (!done || !next)
+			continue;
+		/*
+		 * Checking for a non-nil dma_desc[pb]->virt buffer is
+		 * the same as checking for memcpy DMA mode.
+		 */
 		desc = &ac->dma_descs[pb];
-		if (done && next && desc->virt) {
-			memcpy(done->virt, desc->virt, desc->size);
-			ac->ptr = done->dma - ac->buf[0].dma;
-			snd_pcm_period_elapsed(ac->ss);
+		if (desc->virt) {
+			memcpy(done->virt, desc->virt,
+			       dev->period_size);
+		} else {
+			u32 reg = pb ? ADMA_B_ADDR[ch] : ADMA_P_ADDR[ch];
+			reg_write(dev, reg, next->dma);
 		}
+		ac->ptr = done->dma - ac->buf[0].dma;
+		snd_pcm_period_elapsed(ac->ss);
 	}
 }
 
@@ -83,10 +93,9 @@ static int tw686x_pcm_hw_free(struct snd_pcm_substream *ss)
 }
 
 /*
- * The audio device rate is global and shared among all
- * capture channels. The driver makes no effort to prevent
- * rate modifications. User is free change the rate, but it
- * means changing the rate for all capture sub-devices.
+ * Audio parameters are global and shared among all
+ * capture channels. The driver prevents changes to
+ * the parameters if any audio channel is capturing.
  */
 static const struct snd_pcm_hardware tw686x_capture_hw = {
 	.info			= (SNDRV_PCM_INFO_MMAP |
@@ -99,9 +108,9 @@ static const struct snd_pcm_hardware tw686x_capture_hw = {
 	.rate_max		= 48000,
 	.channels_min		= 1,
 	.channels_max		= 1,
-	.buffer_bytes_max	= TW686X_AUDIO_PAGE_MAX * TW686X_AUDIO_PAGE_SZ,
-	.period_bytes_min	= TW686X_AUDIO_PAGE_SZ,
-	.period_bytes_max	= TW686X_AUDIO_PAGE_SZ,
+	.buffer_bytes_max	= TW686X_AUDIO_PAGE_MAX * AUDIO_DMA_SIZE_MAX,
+	.period_bytes_min	= AUDIO_DMA_SIZE_MIN,
+	.period_bytes_max	= AUDIO_DMA_SIZE_MAX,
 	.periods_min		= TW686X_AUDIO_PERIODS_MIN,
 	.periods_max		= TW686X_AUDIO_PERIODS_MAX,
 };
@@ -143,6 +152,14 @@ static int tw686x_pcm_prepare(struct snd_pcm_substream *ss)
 	int i;
 
 	spin_lock_irqsave(&dev->lock, flags);
+	/*
+	 * Given the audio parameters are global (i.e. shared across
+	 * DMA channels), we need to check new params are allowed.
+	 */
+	if (((dev->audio_rate != rt->rate) ||
+	     (dev->period_size != period_size)) && dev->audio_enabled)
+		goto err_audio_busy;
+
 	tw686x_disable_channel(dev, AUDIO_CHANNEL_OFFSET + ac->ch);
 	spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -156,12 +173,21 @@ static int tw686x_pcm_prepare(struct snd_pcm_substream *ss)
 		reg_write(dev, AUDIO_CONTROL2, reg);
 	}
 
-	if (period_size != TW686X_AUDIO_PAGE_SZ ||
-	    rt->periods < TW686X_AUDIO_PERIODS_MIN ||
-	    rt->periods > TW686X_AUDIO_PERIODS_MAX) {
-		return -EINVAL;
+	if (dev->period_size != period_size) {
+		u32 reg;
+
+		dev->period_size = period_size;
+		reg = reg_read(dev, AUDIO_CONTROL1);
+		reg &= ~(AUDIO_DMA_SIZE_MASK << AUDIO_DMA_SIZE_SHIFT);
+		reg |= period_size << AUDIO_DMA_SIZE_SHIFT;
+
+		reg_write(dev, AUDIO_CONTROL1, reg);
 	}
 
+	if (rt->periods < TW686X_AUDIO_PERIODS_MIN ||
+	    rt->periods > TW686X_AUDIO_PERIODS_MAX)
+		return -EINVAL;
+
 	spin_lock_irqsave(&ac->lock, flags);
 	INIT_LIST_HEAD(&ac->buf_list);
 
@@ -181,9 +207,19 @@ static int tw686x_pcm_prepare(struct snd_pcm_substream *ss)
 	ac->curr_bufs[0] = p_buf;
 	ac->curr_bufs[1] = b_buf;
 	ac->ptr = 0;
+
+	if (dev->dma_mode != TW686X_DMA_MODE_MEMCPY) {
+		reg_write(dev, ADMA_P_ADDR[ac->ch], p_buf->dma);
+		reg_write(dev, ADMA_B_ADDR[ac->ch], b_buf->dma);
+	}
+
 	spin_unlock_irqrestore(&ac->lock, flags);
 
 	return 0;
+
+err_audio_busy:
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return -EBUSY;
 }
 
 static int tw686x_pcm_trigger(struct snd_pcm_substream *ss, int cmd)
@@ -197,6 +233,7 @@ static int tw686x_pcm_trigger(struct snd_pcm_substream *ss, int cmd)
 	case SNDRV_PCM_TRIGGER_START:
 		if (ac->curr_bufs[0] && ac->curr_bufs[1]) {
 			spin_lock_irqsave(&dev->lock, flags);
+			dev->audio_enabled = 1;
 			tw686x_enable_channel(dev,
 				AUDIO_CHANNEL_OFFSET + ac->ch);
 			spin_unlock_irqrestore(&dev->lock, flags);
@@ -209,6 +246,7 @@ static int tw686x_pcm_trigger(struct snd_pcm_substream *ss, int cmd)
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 		spin_lock_irqsave(&dev->lock, flags);
+		dev->audio_enabled = 0;
 		tw686x_disable_channel(dev, AUDIO_CHANNEL_OFFSET + ac->ch);
 		spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -266,8 +304,8 @@ static int tw686x_snd_pcm_init(struct tw686x_dev *dev)
 	return snd_pcm_lib_preallocate_pages_for_all(pcm,
 				SNDRV_DMA_TYPE_DEV,
 				snd_dma_pci_data(dev->pci_dev),
-				TW686X_AUDIO_PAGE_MAX * TW686X_AUDIO_PAGE_SZ,
-				TW686X_AUDIO_PAGE_MAX * TW686X_AUDIO_PAGE_SZ);
+				TW686X_AUDIO_PAGE_MAX * AUDIO_DMA_SIZE_MAX,
+				TW686X_AUDIO_PAGE_MAX * AUDIO_DMA_SIZE_MAX);
 }
 
 static void tw686x_audio_dma_free(struct tw686x_dev *dev,
@@ -290,11 +328,19 @@ static int tw686x_audio_dma_alloc(struct tw686x_dev *dev,
 {
 	int pb;
 
+	/*
+	 * In the memcpy DMA mode we allocate a consistent buffer
+	 * and use it for the DMA capture. Otherwise, DMA
+	 * acts on the ALSA buffers as received in pcm_prepare.
+	 */
+	if (dev->dma_mode != TW686X_DMA_MODE_MEMCPY)
+		return 0;
+
 	for (pb = 0; pb < 2; pb++) {
 		u32 reg = pb ? ADMA_B_ADDR[ac->ch] : ADMA_P_ADDR[ac->ch];
 		void *virt;
 
-		virt = pci_alloc_consistent(dev->pci_dev, TW686X_AUDIO_PAGE_SZ,
+		virt = pci_alloc_consistent(dev->pci_dev, AUDIO_DMA_SIZE_MAX,
 					    &ac->dma_descs[pb].phys);
 		if (!virt) {
 			dev_err(&dev->pci_dev->dev,
@@ -303,7 +349,7 @@ static int tw686x_audio_dma_alloc(struct tw686x_dev *dev,
 			return -ENOMEM;
 		}
 		ac->dma_descs[pb].virt = virt;
-		ac->dma_descs[pb].size = TW686X_AUDIO_PAGE_SZ;
+		ac->dma_descs[pb].size = AUDIO_DMA_SIZE_MAX;
 		reg_write(dev, reg, ac->dma_descs[pb].phys);
 	}
 	return 0;
@@ -334,12 +380,8 @@ int tw686x_audio_init(struct tw686x_dev *dev)
 	struct snd_card *card;
 	int err, ch;
 
-	/*
-	 * AUDIO_CONTROL1
-	 * DMA byte length [31:19] = 4096 (i.e. ALSA period)
-	 * External audio enable [0] = enabled
-	 */
-	reg_write(dev, AUDIO_CONTROL1, 0x80000001);
+	/* Enable external audio */
+	reg_write(dev, AUDIO_CONTROL1, BIT(0));
 
 	err = snd_card_new(&pci_dev->dev, SNDRV_DEFAULT_IDX1,
 			   SNDRV_DEFAULT_STR1,
diff --git a/drivers/media/pci/tw686x/tw686x-core.c b/drivers/media/pci/tw686x/tw686x-core.c
index cf53b0e97be2..71a0453b1af1 100644
--- a/drivers/media/pci/tw686x/tw686x-core.c
+++ b/drivers/media/pci/tw686x/tw686x-core.c
@@ -21,12 +21,14 @@
  * under stress testings it has been found that the machine can
  * freeze completely if DMA registers are programmed while streaming
  * is active.
- * This driver tries to access hardware registers as infrequently
- * as possible by:
- *   i.  allocating fixed DMA buffers and memcpy'ing into
- *       vmalloc'ed buffers
- *   ii. using a timer to mitigate the rate of DMA reset operations,
- *       on DMA channels error.
+ *
+ * Therefore, driver implements a dma_mode called 'memcpy' which
+ * avoids cycling the DMA buffers, and insteads allocates extra DMA buffers
+ * and then copies into vmalloc'ed user buffers.
+ *
+ * In addition to this, when streaming is on, the driver tries to access
+ * hardware registers as infrequently as possible. This is done by using
+ * a timer to limit the rate at which DMA is reset on DMA channels error.
  */
 
 #include <linux/init.h>
@@ -55,6 +57,42 @@ static u32 dma_interval = 0x00098968;
 module_param(dma_interval, int, 0444);
 MODULE_PARM_DESC(dma_interval, "Minimum time span for DMA interrupting host");
 
+static unsigned int dma_mode = TW686X_DMA_MODE_MEMCPY;
+static const char *dma_mode_name(unsigned int mode)
+{
+	switch (mode) {
+	case TW686X_DMA_MODE_MEMCPY:
+		return "memcpy";
+	case TW686X_DMA_MODE_CONTIG:
+		return "contig";
+	case TW686X_DMA_MODE_SG:
+		return "sg";
+	default:
+		return "unknown";
+	}
+}
+
+static int tw686x_dma_mode_get(char *buffer, struct kernel_param *kp)
+{
+	return sprintf(buffer, dma_mode_name(dma_mode));
+}
+
+static int tw686x_dma_mode_set(const char *val, struct kernel_param *kp)
+{
+	if (!strcasecmp(val, dma_mode_name(TW686X_DMA_MODE_MEMCPY)))
+		dma_mode = TW686X_DMA_MODE_MEMCPY;
+	else if (!strcasecmp(val, dma_mode_name(TW686X_DMA_MODE_CONTIG)))
+		dma_mode = TW686X_DMA_MODE_CONTIG;
+	else if (!strcasecmp(val, dma_mode_name(TW686X_DMA_MODE_SG)))
+		dma_mode = TW686X_DMA_MODE_SG;
+	else
+		return -EINVAL;
+	return 0;
+}
+module_param_call(dma_mode, tw686x_dma_mode_set, tw686x_dma_mode_get,
+		  &dma_mode, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(dma_mode, "DMA operation mode (memcpy/contig/sg, default=memcpy)");
+
 void tw686x_disable_channel(struct tw686x_dev *dev, unsigned int channel)
 {
 	u32 dma_en = reg_read(dev, DMA_CHANNEL_ENABLE);
@@ -212,6 +250,7 @@ static int tw686x_probe(struct pci_dev *pci_dev,
 	if (!dev)
 		return -ENOMEM;
 	dev->type = pci_id->driver_data;
+	dev->dma_mode = dma_mode;
 	sprintf(dev->name, "tw%04X", pci_dev->device);
 
 	dev->video_channels = kcalloc(max_channels(dev),
@@ -228,9 +267,10 @@ static int tw686x_probe(struct pci_dev *pci_dev,
 		goto free_video;
 	}
 
-	pr_info("%s: PCI %s, IRQ %d, MMIO 0x%lx\n", dev->name,
+	pr_info("%s: PCI %s, IRQ %d, MMIO 0x%lx (%s mode)\n", dev->name,
 		pci_name(pci_dev), pci_dev->irq,
-		(unsigned long)pci_resource_start(pci_dev, 0));
+		(unsigned long)pci_resource_start(pci_dev, 0),
+		dma_mode_name(dma_mode));
 
 	dev->pci_dev = pci_dev;
 	if (pci_enable_device(pci_dev)) {
diff --git a/drivers/media/pci/tw686x/tw686x-regs.h b/drivers/media/pci/tw686x/tw686x-regs.h
index fcef586a4c8c..15a956642ef4 100644
--- a/drivers/media/pci/tw686x/tw686x-regs.h
+++ b/drivers/media/pci/tw686x/tw686x-regs.h
@@ -105,6 +105,10 @@
 						  0x2d0, 0x2d1, 0x2d2, 0x2d3 })
 
 #define SYS_MODE_DMA_SHIFT	13
+#define AUDIO_DMA_SIZE_SHIFT	19
+#define AUDIO_DMA_SIZE_MIN	SZ_512
+#define AUDIO_DMA_SIZE_MAX	SZ_4K
+#define AUDIO_DMA_SIZE_MASK	(SZ_8K - 1)
 
 #define DMA_CMD_ENABLE		BIT(31)
 #define INT_STATUS_DMA_TOUT	BIT(17)
@@ -119,4 +123,9 @@
 #define TW686X_STD_PAL_CN	5
 #define TW686X_STD_PAL_60	6
 
+#define TW686X_FIELD_MODE	0x3
+#define TW686X_FRAME_MODE	0x2
+/* 0x1 is reserved */
+#define TW686X_SG_MODE		0x0
+
 #define TW686X_FIFO_ERROR(x)	(x & ~(0xff))
diff --git a/drivers/media/pci/tw686x/tw686x-video.c b/drivers/media/pci/tw686x/tw686x-video.c
index 253e10823ba3..cdb16de770fe 100644
--- a/drivers/media/pci/tw686x/tw686x-video.c
+++ b/drivers/media/pci/tw686x/tw686x-video.c
@@ -19,6 +19,8 @@
 #include <linux/slab.h>
 #include <media/v4l2-common.h>
 #include <media/v4l2-event.h>
+#include <media/videobuf2-dma-contig.h>
+#include <media/videobuf2-dma-sg.h>
 #include <media/videobuf2-vmalloc.h>
 #include "tw686x.h"
 #include "tw686x-regs.h"
@@ -26,6 +28,11 @@
 #define TW686X_INPUTS_PER_CH		4
 #define TW686X_VIDEO_WIDTH		720
 #define TW686X_VIDEO_HEIGHT(id)		((id & V4L2_STD_525_60) ? 480 : 576)
+#define TW686X_MAX_FPS(id)		((id & V4L2_STD_525_60) ? 30 : 25)
+
+#define TW686X_MAX_SG_ENTRY_SIZE	4096
+#define TW686X_MAX_SG_DESC_COUNT	256 /* PAL 720x576 needs 203 4-KB pages */
+#define TW686X_SG_TABLE_SIZE		(TW686X_MAX_SG_DESC_COUNT * sizeof(struct tw686x_sg_desc))
 
 static const struct tw686x_format formats[] = {
 	{
@@ -43,53 +50,367 @@ static const struct tw686x_format formats[] = {
 	}
 };
 
-static unsigned int tw686x_fields_map(v4l2_std_id std, unsigned int fps)
+static void tw686x_buf_done(struct tw686x_video_channel *vc,
+			    unsigned int pb)
 {
-	static const unsigned int map[15] = {
-		0x00000000, 0x00000001, 0x00004001, 0x00104001, 0x00404041,
-		0x01041041, 0x01104411, 0x01111111, 0x04444445, 0x04511445,
-		0x05145145, 0x05151515, 0x05515455, 0x05551555, 0x05555555
-	};
+	struct tw686x_dma_desc *desc = &vc->dma_descs[pb];
+	struct tw686x_dev *dev = vc->dev;
+	struct vb2_v4l2_buffer *vb;
+	struct vb2_buffer *vb2_buf;
 
-	static const unsigned int std_625_50[26] = {
-		0, 1, 1, 2,  3,  3,  4,  4,  5,  5,  6,  7,  7,
-		   8, 8, 9, 10, 10, 11, 11, 12, 13, 13, 14, 14, 0
-	};
+	if (vc->curr_bufs[pb]) {
+		vb = &vc->curr_bufs[pb]->vb;
 
-	static const unsigned int std_525_60[31] = {
-		0, 1, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,  6,  6, 7, 7,
-		   8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 0, 0
-	};
+		vb->field = dev->dma_ops->field;
+		vb->sequence = vc->sequence++;
+		vb2_buf = &vb->vb2_buf;
 
-	unsigned int i;
+		if (dev->dma_mode == TW686X_DMA_MODE_MEMCPY)
+			memcpy(vb2_plane_vaddr(vb2_buf, 0), desc->virt,
+			       desc->size);
+		vb2_buf->timestamp = ktime_get_ns();
+		vb2_buffer_done(vb2_buf, VB2_BUF_STATE_DONE);
+	}
+
+	vc->pb = !pb;
+}
+
+/*
+ * We can call this even when alloc_dma failed for the given channel
+ */
+static void tw686x_memcpy_dma_free(struct tw686x_video_channel *vc,
+				   unsigned int pb)
+{
+	struct tw686x_dma_desc *desc = &vc->dma_descs[pb];
+	struct tw686x_dev *dev = vc->dev;
+	struct pci_dev *pci_dev;
+	unsigned long flags;
+
+	/* Check device presence. Shouldn't really happen! */
+	spin_lock_irqsave(&dev->lock, flags);
+	pci_dev = dev->pci_dev;
+	spin_unlock_irqrestore(&dev->lock, flags);
+	if (!pci_dev) {
+		WARN(1, "trying to deallocate on missing device\n");
+		return;
+	}
+
+	if (desc->virt) {
+		pci_free_consistent(dev->pci_dev, desc->size,
+				    desc->virt, desc->phys);
+		desc->virt = NULL;
+	}
+}
+
+static int tw686x_memcpy_dma_alloc(struct tw686x_video_channel *vc,
+				   unsigned int pb)
+{
+	struct tw686x_dev *dev = vc->dev;
+	u32 reg = pb ? VDMA_B_ADDR[vc->ch] : VDMA_P_ADDR[vc->ch];
+	unsigned int len;
+	void *virt;
+
+	WARN(vc->dma_descs[pb].virt,
+	     "Allocating buffer but previous still here\n");
+
+	len = (vc->width * vc->height * vc->format->depth) >> 3;
+	virt = pci_alloc_consistent(dev->pci_dev, len,
+				    &vc->dma_descs[pb].phys);
+	if (!virt) {
+		v4l2_err(&dev->v4l2_dev,
+			 "dma%d: unable to allocate %s-buffer\n",
+			 vc->ch, pb ? "B" : "P");
+		return -ENOMEM;
+	}
+	vc->dma_descs[pb].size = len;
+	vc->dma_descs[pb].virt = virt;
+	reg_write(dev, reg, vc->dma_descs[pb].phys);
+
+	return 0;
+}
+
+static void tw686x_memcpy_buf_refill(struct tw686x_video_channel *vc,
+				     unsigned int pb)
+{
+	struct tw686x_v4l2_buf *buf;
+
+	while (!list_empty(&vc->vidq_queued)) {
+
+		buf = list_first_entry(&vc->vidq_queued,
+			struct tw686x_v4l2_buf, list);
+		list_del(&buf->list);
+
+		vc->curr_bufs[pb] = buf;
+		return;
+	}
+	vc->curr_bufs[pb] = NULL;
+}
 
-	if (std & V4L2_STD_525_60) {
-		if (fps >= ARRAY_SIZE(std_525_60))
-			fps = 30;
-		i = std_525_60[fps];
+static const struct tw686x_dma_ops memcpy_dma_ops = {
+	.alloc		= tw686x_memcpy_dma_alloc,
+	.free		= tw686x_memcpy_dma_free,
+	.buf_refill	= tw686x_memcpy_buf_refill,
+	.mem_ops	= &vb2_vmalloc_memops,
+	.hw_dma_mode	= TW686X_FRAME_MODE,
+	.field		= V4L2_FIELD_INTERLACED,
+};
+
+static void tw686x_contig_buf_refill(struct tw686x_video_channel *vc,
+				     unsigned int pb)
+{
+	struct tw686x_v4l2_buf *buf;
+
+	while (!list_empty(&vc->vidq_queued)) {
+		u32 reg = pb ? VDMA_B_ADDR[vc->ch] : VDMA_P_ADDR[vc->ch];
+		dma_addr_t phys;
+
+		buf = list_first_entry(&vc->vidq_queued,
+			struct tw686x_v4l2_buf, list);
+		list_del(&buf->list);
+
+		phys = vb2_dma_contig_plane_dma_addr(&buf->vb.vb2_buf, 0);
+		reg_write(vc->dev, reg, phys);
+
+		buf->vb.vb2_buf.state = VB2_BUF_STATE_ACTIVE;
+		vc->curr_bufs[pb] = buf;
+		return;
+	}
+	vc->curr_bufs[pb] = NULL;
+}
+
+static const struct tw686x_dma_ops contig_dma_ops = {
+	.buf_refill	= tw686x_contig_buf_refill,
+	.mem_ops	= &vb2_dma_contig_memops,
+	.hw_dma_mode	= TW686X_FRAME_MODE,
+	.field		= V4L2_FIELD_INTERLACED,
+};
+
+static int tw686x_sg_desc_fill(struct tw686x_sg_desc *descs,
+			       struct tw686x_v4l2_buf *buf,
+			       unsigned int buf_len)
+{
+	struct sg_table *vbuf = vb2_dma_sg_plane_desc(&buf->vb.vb2_buf, 0);
+	unsigned int len, entry_len;
+	struct scatterlist *sg;
+	int i, count;
+
+	/* Clear the scatter-gather table */
+	memset(descs, 0, TW686X_SG_TABLE_SIZE);
+
+	count = 0;
+	for_each_sg(vbuf->sgl, sg, vbuf->nents, i) {
+		dma_addr_t phys = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+
+		while (len && buf_len) {
+
+			if (count == TW686X_MAX_SG_DESC_COUNT)
+				return -ENOMEM;
+
+			entry_len = min_t(unsigned int, len,
+					  TW686X_MAX_SG_ENTRY_SIZE);
+			entry_len = min_t(unsigned int, entry_len, buf_len);
+			descs[count].phys = cpu_to_le32(phys);
+			descs[count++].flags_length =
+					cpu_to_le32(BIT(30) | entry_len);
+			phys += entry_len;
+			len -= entry_len;
+			buf_len -= entry_len;
+		}
+
+		if (!buf_len)
+			return 0;
+	}
+
+	return -ENOMEM;
+}
+
+static void tw686x_sg_buf_refill(struct tw686x_video_channel *vc,
+				 unsigned int pb)
+{
+	struct tw686x_dev *dev = vc->dev;
+	struct tw686x_v4l2_buf *buf;
+
+	while (!list_empty(&vc->vidq_queued)) {
+		unsigned int buf_len;
+
+		buf = list_first_entry(&vc->vidq_queued,
+			struct tw686x_v4l2_buf, list);
+		list_del(&buf->list);
+
+		buf_len = (vc->width * vc->height * vc->format->depth) >> 3;
+		if (tw686x_sg_desc_fill(vc->sg_descs[pb], buf, buf_len)) {
+			v4l2_err(&dev->v4l2_dev,
+				 "dma%d: unable to fill %s-buffer\n",
+				 vc->ch, pb ? "B" : "P");
+			vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
+			continue;
+		}
+
+		buf->vb.vb2_buf.state = VB2_BUF_STATE_ACTIVE;
+		vc->curr_bufs[pb] = buf;
+		return;
+	}
+
+	vc->curr_bufs[pb] = NULL;
+}
+
+static void tw686x_sg_dma_free(struct tw686x_video_channel *vc,
+			       unsigned int pb)
+{
+	struct tw686x_dma_desc *desc = &vc->dma_descs[pb];
+	struct tw686x_dev *dev = vc->dev;
+
+	if (desc->size) {
+		pci_free_consistent(dev->pci_dev, desc->size,
+				    desc->virt, desc->phys);
+		desc->virt = NULL;
+	}
+
+	vc->sg_descs[pb] = NULL;
+}
+
+static int tw686x_sg_dma_alloc(struct tw686x_video_channel *vc,
+			       unsigned int pb)
+{
+	struct tw686x_dma_desc *desc = &vc->dma_descs[pb];
+	struct tw686x_dev *dev = vc->dev;
+	u32 reg = pb ? DMA_PAGE_TABLE1_ADDR[vc->ch] :
+		       DMA_PAGE_TABLE0_ADDR[vc->ch];
+	void *virt;
+
+	if (desc->size) {
+
+		virt = pci_alloc_consistent(dev->pci_dev, desc->size,
+					    &desc->phys);
+		if (!virt) {
+			v4l2_err(&dev->v4l2_dev,
+				 "dma%d: unable to allocate %s-buffer\n",
+				 vc->ch, pb ? "B" : "P");
+			return -ENOMEM;
+		}
+		desc->virt = virt;
+		reg_write(dev, reg, desc->phys);
 	} else {
-		if (fps >= ARRAY_SIZE(std_625_50))
-			fps = 25;
-		i = std_625_50[fps];
+		virt = dev->video_channels[0].dma_descs[pb].virt +
+		       vc->ch * TW686X_SG_TABLE_SIZE;
 	}
 
-	return map[i];
+	vc->sg_descs[pb] = virt;
+	return 0;
+}
+
+static int tw686x_sg_setup(struct tw686x_dev *dev)
+{
+	unsigned int sg_table_size, pb, ch, channels;
+
+	if (is_second_gen(dev)) {
+		/*
+		 * TW6865/TW6869: each channel needs a pair of
+		 * P-B descriptor tables.
+		 */
+		channels = max_channels(dev);
+		sg_table_size = TW686X_SG_TABLE_SIZE;
+	} else {
+		/*
+		 * TW6864/TW6868: we need to allocate a pair of
+		 * P-B descriptor tables, common for all channels.
+		 * Each table will be bigger than 4 KB.
+		 */
+		channels = 1;
+		sg_table_size = max_channels(dev) * TW686X_SG_TABLE_SIZE;
+	}
+
+	for (ch = 0; ch < channels; ch++) {
+		struct tw686x_video_channel *vc = &dev->video_channels[ch];
+
+		for (pb = 0; pb < 2; pb++)
+			vc->dma_descs[pb].size = sg_table_size;
+	}
+
+	return 0;
+}
+
+static const struct tw686x_dma_ops sg_dma_ops = {
+	.setup		= tw686x_sg_setup,
+	.alloc		= tw686x_sg_dma_alloc,
+	.free		= tw686x_sg_dma_free,
+	.buf_refill	= tw686x_sg_buf_refill,
+	.mem_ops	= &vb2_dma_sg_memops,
+	.hw_dma_mode	= TW686X_SG_MODE,
+	.field		= V4L2_FIELD_SEQ_TB,
+};
+
+static const unsigned int fps_map[15] = {
+	/*
+	 * bit 31 enables selecting the field control register
+	 * bits 0-29 are a bitmask with fields that will be output.
+	 * For NTSC (and PAL-M, PAL-60), all 30 bits are used.
+	 * For other PAL standards, only the first 25 bits are used.
+	 */
+	0x00000000, /* output all fields */
+	0x80000006, /* 2 fps (60Hz), 2 fps (50Hz) */
+	0x80018006, /* 4 fps (60Hz), 4 fps (50Hz) */
+	0x80618006, /* 6 fps (60Hz), 6 fps (50Hz) */
+	0x81818186, /* 8 fps (60Hz), 8 fps (50Hz) */
+	0x86186186, /* 10 fps (60Hz), 8 fps (50Hz) */
+	0x86619866, /* 12 fps (60Hz), 10 fps (50Hz) */
+	0x86666666, /* 14 fps (60Hz), 12 fps (50Hz) */
+	0x9999999e, /* 16 fps (60Hz), 14 fps (50Hz) */
+	0x99e6799e, /* 18 fps (60Hz), 16 fps (50Hz) */
+	0x9e79e79e, /* 20 fps (60Hz), 16 fps (50Hz) */
+	0x9e7e7e7e, /* 22 fps (60Hz), 18 fps (50Hz) */
+	0x9fe7f9fe, /* 24 fps (60Hz), 20 fps (50Hz) */
+	0x9ffe7ffe, /* 26 fps (60Hz), 22 fps (50Hz) */
+	0x9ffffffe, /* 28 fps (60Hz), 24 fps (50Hz) */
+};
+
+static unsigned int tw686x_real_fps(unsigned int index, unsigned int max_fps)
+{
+	unsigned long mask;
+
+	if (!index || index >= ARRAY_SIZE(fps_map))
+		return max_fps;
+
+	mask = GENMASK(max_fps - 1, 0);
+	return hweight_long(fps_map[index] & mask);
+}
+
+static unsigned int tw686x_fps_idx(unsigned int fps, unsigned int max_fps)
+{
+	unsigned int idx, real_fps;
+	int delta;
+
+	/* First guess */
+	idx = (12 + 15 * fps) / max_fps;
+
+	/* Minimal possible framerate is 2 frames per second */
+	if (!idx)
+		return 1;
+
+	/* Check if the difference is bigger than abs(1) and adjust */
+	real_fps = tw686x_real_fps(idx, max_fps);
+	delta = real_fps - fps;
+	if (delta < -1)
+		idx++;
+	else if (delta > 1)
+		idx--;
+
+	/* Max framerate */
+	if (idx >= 15)
+		return 0;
+
+	return idx;
 }
 
 static void tw686x_set_framerate(struct tw686x_video_channel *vc,
 				 unsigned int fps)
 {
-	unsigned int map;
-
-	if (vc->fps == fps)
-		return;
+	unsigned int i;
 
-	map = tw686x_fields_map(vc->video_standard, fps) << 1;
-	map |= map << 1;
-	if (map > 0)
-		map |= BIT(31);
-	reg_write(vc->dev, VIDEO_FIELD_CTRL[vc->ch], map);
-	vc->fps = fps;
+	i = tw686x_fps_idx(fps, TW686X_MAX_FPS(vc->video_standard));
+	reg_write(vc->dev, VIDEO_FIELD_CTRL[vc->ch], fps_map[i]);
+	vc->fps = tw686x_real_fps(i, TW686X_MAX_FPS(vc->video_standard));
 }
 
 static const struct tw686x_format *format_by_fourcc(unsigned int fourcc)
@@ -104,7 +425,7 @@ static const struct tw686x_format *format_by_fourcc(unsigned int fourcc)
 
 static int tw686x_queue_setup(struct vb2_queue *vq,
 			      unsigned int *nbuffers, unsigned int *nplanes,
-			      unsigned int sizes[], void *alloc_ctxs[])
+			      unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct tw686x_video_channel *vc = vb2_get_drv_priv(vq);
 	unsigned int szimage =
@@ -152,75 +473,6 @@ static void tw686x_buf_queue(struct vb2_buffer *vb)
 	spin_unlock_irqrestore(&vc->qlock, flags);
 }
 
-/*
- * We can call this even when alloc_dma failed for the given channel
- */
-static void tw686x_free_dma(struct tw686x_video_channel *vc, unsigned int pb)
-{
-	struct tw686x_dma_desc *desc = &vc->dma_descs[pb];
-	struct tw686x_dev *dev = vc->dev;
-	struct pci_dev *pci_dev;
-	unsigned long flags;
-
-	/* Check device presence. Shouldn't really happen! */
-	spin_lock_irqsave(&dev->lock, flags);
-	pci_dev = dev->pci_dev;
-	spin_unlock_irqrestore(&dev->lock, flags);
-	if (!pci_dev) {
-		WARN(1, "trying to deallocate on missing device\n");
-		return;
-	}
-
-	if (desc->virt) {
-		pci_free_consistent(dev->pci_dev, desc->size,
-				    desc->virt, desc->phys);
-		desc->virt = NULL;
-	}
-}
-
-static int tw686x_alloc_dma(struct tw686x_video_channel *vc, unsigned int pb)
-{
-	struct tw686x_dev *dev = vc->dev;
-	u32 reg = pb ? VDMA_B_ADDR[vc->ch] : VDMA_P_ADDR[vc->ch];
-	unsigned int len;
-	void *virt;
-
-	WARN(vc->dma_descs[pb].virt,
-	     "Allocating buffer but previous still here\n");
-
-	len = (vc->width * vc->height * vc->format->depth) >> 3;
-	virt = pci_alloc_consistent(dev->pci_dev, len,
-				    &vc->dma_descs[pb].phys);
-	if (!virt) {
-		v4l2_err(&dev->v4l2_dev,
-			 "dma%d: unable to allocate %s-buffer\n",
-			 vc->ch, pb ? "B" : "P");
-		return -ENOMEM;
-	}
-	vc->dma_descs[pb].size = len;
-	vc->dma_descs[pb].virt = virt;
-	reg_write(dev, reg, vc->dma_descs[pb].phys);
-
-	return 0;
-}
-
-static void tw686x_buffer_refill(struct tw686x_video_channel *vc,
-				 unsigned int pb)
-{
-	struct tw686x_v4l2_buf *buf;
-
-	while (!list_empty(&vc->vidq_queued)) {
-
-		buf = list_first_entry(&vc->vidq_queued,
-			struct tw686x_v4l2_buf, list);
-		list_del(&buf->list);
-
-		vc->curr_bufs[pb] = buf;
-		return;
-	}
-	vc->curr_bufs[pb] = NULL;
-}
-
 static void tw686x_clear_queue(struct tw686x_video_channel *vc,
 			       enum vb2_buffer_state state)
 {
@@ -262,7 +514,8 @@ static int tw686x_start_streaming(struct vb2_queue *vq, unsigned int count)
 	spin_lock_irqsave(&vc->qlock, flags);
 
 	/* Sanity check */
-	if (!vc->dma_descs[0].virt || !vc->dma_descs[1].virt) {
+	if (dev->dma_mode == TW686X_DMA_MODE_MEMCPY &&
+	    (!vc->dma_descs[0].virt || !vc->dma_descs[1].virt)) {
 		spin_unlock_irqrestore(&vc->qlock, flags);
 		v4l2_err(&dev->v4l2_dev,
 			 "video%d: refusing to start without DMA buffers\n",
@@ -272,7 +525,7 @@ static int tw686x_start_streaming(struct vb2_queue *vq, unsigned int count)
 	}
 
 	for (pb = 0; pb < 2; pb++)
-		tw686x_buffer_refill(vc, pb);
+		dev->dma_ops->buf_refill(vc, pb);
 	spin_unlock_irqrestore(&vc->qlock, flags);
 
 	vc->sequence = 0;
@@ -375,10 +628,11 @@ static int tw686x_g_fmt_vid_cap(struct file *file, void *priv,
 				struct v4l2_format *f)
 {
 	struct tw686x_video_channel *vc = video_drvdata(file);
+	struct tw686x_dev *dev = vc->dev;
 
 	f->fmt.pix.width = vc->width;
 	f->fmt.pix.height = vc->height;
-	f->fmt.pix.field = V4L2_FIELD_INTERLACED;
+	f->fmt.pix.field = dev->dma_ops->field;
 	f->fmt.pix.pixelformat = vc->format->fourcc;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
 	f->fmt.pix.bytesperline = (f->fmt.pix.width * vc->format->depth) / 8;
@@ -390,6 +644,7 @@ static int tw686x_try_fmt_vid_cap(struct file *file, void *priv,
 				  struct v4l2_format *f)
 {
 	struct tw686x_video_channel *vc = video_drvdata(file);
+	struct tw686x_dev *dev = vc->dev;
 	unsigned int video_height = TW686X_VIDEO_HEIGHT(vc->video_standard);
 	const struct tw686x_format *format;
 
@@ -412,7 +667,7 @@ static int tw686x_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.bytesperline = (f->fmt.pix.width * format->depth) / 8;
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
-	f->fmt.pix.field = V4L2_FIELD_INTERLACED;
+	f->fmt.pix.field = dev->dma_ops->field;
 
 	return 0;
 }
@@ -421,6 +676,7 @@ static int tw686x_s_fmt_vid_cap(struct file *file, void *priv,
 				struct v4l2_format *f)
 {
 	struct tw686x_video_channel *vc = video_drvdata(file);
+	struct tw686x_dev *dev = vc->dev;
 	u32 val, width, line_width, height;
 	unsigned long bitsperframe;
 	int err, pb;
@@ -438,15 +694,16 @@ static int tw686x_s_fmt_vid_cap(struct file *file, void *priv,
 	vc->height = f->fmt.pix.height;
 
 	/* We need new DMA buffers if the framesize has changed */
-	if (bitsperframe != vc->width * vc->height * vc->format->depth) {
+	if (dev->dma_ops->alloc &&
+	    bitsperframe != vc->width * vc->height * vc->format->depth) {
 		for (pb = 0; pb < 2; pb++)
-			tw686x_free_dma(vc, pb);
+			dev->dma_ops->free(vc, pb);
 
 		for (pb = 0; pb < 2; pb++) {
-			err = tw686x_alloc_dma(vc, pb);
+			err = dev->dma_ops->alloc(vc, pb);
 			if (err) {
 				if (pb > 0)
-					tw686x_free_dma(vc, 0);
+					dev->dma_ops->free(vc, 0);
 				return err;
 			}
 		}
@@ -464,6 +721,19 @@ static int tw686x_s_fmt_vid_cap(struct file *file, void *priv,
 	else
 		val &= ~BIT(24);
 
+	val &= ~0x7ffff;
+
+	/* Program the DMA scatter-gather */
+	if (dev->dma_mode == TW686X_DMA_MODE_SG) {
+		u32 start_idx, end_idx;
+
+		start_idx = is_second_gen(dev) ?
+				0 : vc->ch * TW686X_MAX_SG_DESC_COUNT;
+		end_idx = start_idx + TW686X_MAX_SG_DESC_COUNT - 1;
+
+		val |= (end_idx << 10) | start_idx;
+	}
+
 	val &= ~(0x7 << 20);
 	val |= vc->format->mode << 20;
 	reg_write(vc->dev, VDMA_CHANNEL_CONFIG[vc->ch], val);
@@ -540,6 +810,12 @@ static int tw686x_s_std(struct file *file, void *priv, v4l2_std_id id)
 	ret = tw686x_g_fmt_vid_cap(file, priv, &f);
 	if (!ret)
 		tw686x_s_fmt_vid_cap(file, priv, &f);
+
+	/*
+	 * Frame decimation depends on the chosen standard,
+	 * so reset it to the current value.
+	 */
+	tw686x_set_framerate(vc, vc->fps);
 	return 0;
 }
 
@@ -609,6 +885,40 @@ static int tw686x_g_std(struct file *file, void *priv, v4l2_std_id *id)
 	return 0;
 }
 
+static int tw686x_g_parm(struct file *file, void *priv,
+			 struct v4l2_streamparm *sp)
+{
+	struct tw686x_video_channel *vc = video_drvdata(file);
+	struct v4l2_captureparm *cp = &sp->parm.capture;
+
+	if (sp->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+	sp->parm.capture.readbuffers = 3;
+
+	cp->capability = V4L2_CAP_TIMEPERFRAME;
+	cp->timeperframe.numerator = 1;
+	cp->timeperframe.denominator = vc->fps;
+	return 0;
+}
+
+static int tw686x_s_parm(struct file *file, void *priv,
+			 struct v4l2_streamparm *sp)
+{
+	struct tw686x_video_channel *vc = video_drvdata(file);
+	struct v4l2_captureparm *cp = &sp->parm.capture;
+	unsigned int denominator = cp->timeperframe.denominator;
+	unsigned int numerator = cp->timeperframe.numerator;
+	unsigned int fps;
+
+	if (vb2_is_busy(&vc->vidq))
+		return -EBUSY;
+
+	fps = (!numerator || !denominator) ? 0 : denominator / numerator;
+	if (vc->fps != fps)
+		tw686x_set_framerate(vc, fps);
+	return tw686x_g_parm(file, priv, sp);
+}
+
 static int tw686x_enum_fmt_vid_cap(struct file *file, void *priv,
 				   struct v4l2_fmtdesc *f)
 {
@@ -695,6 +1005,9 @@ static const struct v4l2_ioctl_ops tw686x_video_ioctl_ops = {
 	.vidioc_g_std			= tw686x_g_std,
 	.vidioc_s_std			= tw686x_s_std,
 
+	.vidioc_g_parm			= tw686x_g_parm,
+	.vidioc_s_parm			= tw686x_s_parm,
+
 	.vidioc_enum_input		= tw686x_enum_input,
 	.vidioc_g_input			= tw686x_g_input,
 	.vidioc_s_input			= tw686x_s_input,
@@ -713,26 +1026,11 @@ static const struct v4l2_ioctl_ops tw686x_video_ioctl_ops = {
 	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
 };
 
-static void tw686x_buffer_copy(struct tw686x_video_channel *vc,
-			       unsigned int pb, struct vb2_v4l2_buffer *vb)
-{
-	struct tw686x_dma_desc *desc = &vc->dma_descs[pb];
-	struct vb2_buffer *vb2_buf = &vb->vb2_buf;
-
-	vb->field = V4L2_FIELD_INTERLACED;
-	vb->sequence = vc->sequence++;
-
-	memcpy(vb2_plane_vaddr(vb2_buf, 0), desc->virt, desc->size);
-	vb2_buf->timestamp = ktime_get_ns();
-	vb2_buffer_done(vb2_buf, VB2_BUF_STATE_DONE);
-}
-
 void tw686x_video_irq(struct tw686x_dev *dev, unsigned long requests,
 		      unsigned int pb_status, unsigned int fifo_status,
 		      unsigned int *reset_ch)
 {
 	struct tw686x_video_channel *vc;
-	struct vb2_v4l2_buffer *vb;
 	unsigned long flags;
 	unsigned int ch, pb;
 
@@ -781,14 +1079,9 @@ void tw686x_video_irq(struct tw686x_dev *dev, unsigned long requests,
 			continue;
 		}
 
-		/* handle video stream */
 		spin_lock_irqsave(&vc->qlock, flags);
-		if (vc->curr_bufs[pb]) {
-			vb = &vc->curr_bufs[pb]->vb;
-			tw686x_buffer_copy(vc, pb, vb);
-		}
-		vc->pb = !pb;
-		tw686x_buffer_refill(vc, pb);
+		tw686x_buf_done(vc, pb);
+		dev->dma_ops->buf_refill(vc, pb);
 		spin_unlock_irqrestore(&vc->qlock, flags);
 	}
 }
@@ -803,8 +1096,9 @@ void tw686x_video_free(struct tw686x_dev *dev)
 		if (vc->device)
 			video_unregister_device(vc->device);
 
-		for (pb = 0; pb < 2; pb++)
-			tw686x_free_dma(vc, pb);
+		if (dev->dma_ops->free)
+			for (pb = 0; pb < 2; pb++)
+				dev->dma_ops->free(vc, pb);
 	}
 }
 
@@ -813,10 +1107,25 @@ int tw686x_video_init(struct tw686x_dev *dev)
 	unsigned int ch, val, pb;
 	int err;
 
+	if (dev->dma_mode == TW686X_DMA_MODE_MEMCPY)
+		dev->dma_ops = &memcpy_dma_ops;
+	else if (dev->dma_mode == TW686X_DMA_MODE_CONTIG)
+		dev->dma_ops = &contig_dma_ops;
+	else if (dev->dma_mode == TW686X_DMA_MODE_SG)
+		dev->dma_ops = &sg_dma_ops;
+	else
+		return -EINVAL;
+
 	err = v4l2_device_register(&dev->pci_dev->dev, &dev->v4l2_dev);
 	if (err)
 		return err;
 
+	if (dev->dma_ops->setup) {
+		err = dev->dma_ops->setup(dev);
+		if (err)
+			return err;
+	}
+
 	for (ch = 0; ch < max_channels(dev); ch++) {
 		struct tw686x_video_channel *vc = &dev->video_channels[ch];
 		struct video_device *vdev;
@@ -842,10 +1151,12 @@ int tw686x_video_init(struct tw686x_dev *dev)
 		reg_write(dev, HACTIVE_LO[ch], 0xd0);
 		reg_write(dev, VIDEO_SIZE[ch], 0);
 
-		for (pb = 0; pb < 2; pb++) {
-			err = tw686x_alloc_dma(vc, pb);
-			if (err)
-				goto error;
+		if (dev->dma_ops->alloc) {
+			for (pb = 0; pb < 2; pb++) {
+				err = dev->dma_ops->alloc(vc, pb);
+				if (err)
+					goto error;
+			}
 		}
 
 		vc->vidq.io_modes = VB2_READ | VB2_MMAP | VB2_DMABUF;
@@ -853,11 +1164,12 @@ int tw686x_video_init(struct tw686x_dev *dev)
 		vc->vidq.drv_priv = vc;
 		vc->vidq.buf_struct_size = sizeof(struct tw686x_v4l2_buf);
 		vc->vidq.ops = &tw686x_video_qops;
-		vc->vidq.mem_ops = &vb2_vmalloc_memops;
+		vc->vidq.mem_ops = dev->dma_ops->mem_ops;
 		vc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		vc->vidq.min_buffers_needed = 2;
 		vc->vidq.lock = &vc->vb_mutex;
 		vc->vidq.gfp_flags = GFP_DMA32;
+		vc->vidq.dev = &dev->pci_dev->dev;
 
 		err = vb2_queue_init(&vc->vidq);
 		if (err) {
@@ -915,10 +1227,9 @@ int tw686x_video_init(struct tw686x_dev *dev)
 		vc->num = vdev->num;
 	}
 
-	/* Set DMA frame mode on all channels. Only supported mode for now. */
 	val = TW686X_DEF_PHASE_REF;
 	for (ch = 0; ch < max_channels(dev); ch++)
-		val |= TW686X_FRAME_MODE << (16 + ch * 2);
+		val |= dev->dma_ops->hw_dma_mode << (16 + ch * 2);
 	reg_write(dev, PHASE_REF, val);
 
 	reg_write(dev, MISC2[0], 0xe7);
diff --git a/drivers/media/pci/tw686x/tw686x.h b/drivers/media/pci/tw686x/tw686x.h
index 44b5755acf02..f24a2a9bcdb2 100644
--- a/drivers/media/pci/tw686x/tw686x.h
+++ b/drivers/media/pci/tw686x/tw686x.h
@@ -27,16 +27,14 @@
 #define TYPE_SECOND_GEN		0x10
 #define TW686X_DEF_PHASE_REF	0x1518
 
-#define TW686X_FIELD_MODE	0x3
-#define TW686X_FRAME_MODE	0x2
-/* 0x1 is reserved */
-#define TW686X_SG_MODE		0x0
-
-#define TW686X_AUDIO_PAGE_SZ		4096
 #define TW686X_AUDIO_PAGE_MAX		16
 #define TW686X_AUDIO_PERIODS_MIN	2
 #define TW686X_AUDIO_PERIODS_MAX	TW686X_AUDIO_PAGE_MAX
 
+#define TW686X_DMA_MODE_MEMCPY		0
+#define TW686X_DMA_MODE_CONTIG		1
+#define TW686X_DMA_MODE_SG		2
+
 struct tw686x_format {
 	char *name;
 	unsigned int fourcc;
@@ -50,6 +48,12 @@ struct tw686x_dma_desc {
 	unsigned int size;
 };
 
+struct tw686x_sg_desc {
+	/* 3 MSBits for flags, 13 LSBits for length */
+	__le32 flags_length;
+	__le32 phys;
+};
+
 struct tw686x_audio_buf {
 	dma_addr_t dma;
 	void *virt;
@@ -82,6 +86,7 @@ struct tw686x_video_channel {
 	struct video_device *device;
 	struct tw686x_v4l2_buf *curr_bufs[2];
 	struct tw686x_dma_desc dma_descs[2];
+	struct tw686x_sg_desc *sg_descs[2];
 
 	struct v4l2_ctrl_handler ctrl_handler;
 	const struct tw686x_format *format;
@@ -99,6 +104,16 @@ struct tw686x_video_channel {
 	bool no_signal;
 };
 
+struct tw686x_dma_ops {
+	int (*setup)(struct tw686x_dev *dev);
+	int (*alloc)(struct tw686x_video_channel *vc, unsigned int pb);
+	void (*free)(struct tw686x_video_channel *vc, unsigned int pb);
+	void (*buf_refill)(struct tw686x_video_channel *vc, unsigned int pb);
+	const struct vb2_mem_ops *mem_ops;
+	enum v4l2_field field;
+	u32 hw_dma_mode;
+};
+
 /**
  * struct tw686x_dev - global device status
  * @lock: spinlock controlling access to the
@@ -112,15 +127,18 @@ struct tw686x_dev {
 
 	char name[32];
 	unsigned int type;
+	unsigned int dma_mode;
 	struct pci_dev *pci_dev;
 	__u32 __iomem *mmio;
 
-	void *alloc_ctx;
-
+	const struct tw686x_dma_ops *dma_ops;
 	struct tw686x_video_channel *video_channels;
 	struct tw686x_audio_channel *audio_channels;
 
-	int audio_rate; /* per-device value */
+	/* Per-device audio parameters */
+	int audio_rate;
+	int period_size;
+	int audio_enabled;
 
 	struct timer_list dma_delay_timer;
 	u32 pending_dma_en; /* must be protected by lock */
@@ -143,6 +161,12 @@ static inline unsigned int max_channels(struct tw686x_dev *dev)
 	return dev->type & TYPE_MAX_CHANNELS; /* 4 or 8 channels */
 }
 
+static inline unsigned is_second_gen(struct tw686x_dev *dev)
+{
+	/* each channel has its own DMA SG table */
+	return dev->type & TYPE_SECOND_GEN;
+}
+
 void tw686x_enable_channel(struct tw686x_dev *dev, unsigned int channel);
 void tw686x_disable_channel(struct tw686x_dev *dev, unsigned int channel);
 
diff --git a/drivers/media/pci/zoran/zr36016.c b/drivers/media/pci/zoran/zr36016.c
index b87ddba8608f..c12ca9f96bac 100644
--- a/drivers/media/pci/zoran/zr36016.c
+++ b/drivers/media/pci/zoran/zr36016.c
@@ -246,10 +246,6 @@ static int zr36016_pushit (struct zr36016 *ptr,
    //TODO//
    ========================================================================= */
 
-// needed offset values          PAL NTSC SECAM
-static const int zr016_xoff[] = { 20, 20, 20 };
-static const int zr016_yoff[] = { 8, 9, 7 };
-
 static void
 zr36016_init (struct zr36016 *ptr)
 {
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 84e041c0a70e..f25344bc7912 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -110,6 +110,7 @@ source "drivers/media/platform/exynos4-is/Kconfig"
 source "drivers/media/platform/s5p-tv/Kconfig"
 source "drivers/media/platform/am437x/Kconfig"
 source "drivers/media/platform/xilinx/Kconfig"
+source "drivers/media/platform/rcar-vin/Kconfig"
 
 config VIDEO_TI_CAL
 	tristate "TI CAL (Camera Adaptation Layer) driver"
@@ -152,6 +153,36 @@ config VIDEO_CODA
 	   Coda is a range of video codec IPs that supports
 	   H.264, MPEG-4, and other video formats.
 
+config VIDEO_MEDIATEK_VPU
+	tristate "Mediatek Video Processor Unit"
+	depends on VIDEO_DEV && VIDEO_V4L2
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	---help---
+	    This driver provides downloading VPU firmware and
+	    communicating with VPU. This driver for hw video
+	    codec embedded in Mediatek's MT8173 SOCs. It is able
+	    to handle video decoding/encoding in a range of formats.
+
+	    To compile this driver as a module, choose M here: the
+	    module will be called mtk-vpu.
+
+config VIDEO_MEDIATEK_VCODEC
+	tristate "Mediatek Video Codec driver"
+	depends on MTK_IOMMU || COMPILE_TEST
+	depends on VIDEO_DEV && VIDEO_V4L2
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select VIDEOBUF2_DMA_CONTIG
+	select V4L2_MEM2MEM_DEV
+	select VIDEO_MEDIATEK_VPU
+	default n
+	---help---
+	    Mediatek video codec driver provides HW capability to
+	    encode and decode in a range of video formats
+	    This driver rely on VPU driver to communicate with VPU.
+
+	    To compile this driver as a module, choose M here: the
+	    module will be called mtk-vcodec
+
 config VIDEO_MEM2MEM_DEINTERLACE
 	tristate "Deinterlace support"
 	depends on VIDEO_DEV && VIDEO_V4L2 && DMA_ENGINE
@@ -247,10 +278,24 @@ config VIDEO_RENESAS_JPU
 	  To compile this driver as a module, choose M here: the module
 	  will be called rcar_jpu.
 
+config VIDEO_RENESAS_FCP
+	tristate "Renesas Frame Compression Processor"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	depends on OF
+	---help---
+	  This is a driver for the Renesas Frame Compression Processor (FCP).
+	  The FCP is a companion module of video processing modules in the
+	  Renesas R-Car Gen3 SoCs. It handles memory access for the codec,
+	  VSP and FDP modules.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called rcar-fcp.
+
 config VIDEO_RENESAS_VSP1
 	tristate "Renesas VSP1 Video Processing Engine"
 	depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API && HAS_DMA
 	depends on (ARCH_RENESAS && OF) || COMPILE_TEST
+	depends on (!ARM64 && !VIDEO_RENESAS_FCP) || VIDEO_RENESAS_FCP
 	select VIDEOBUF2_DMA_CONTIG
 	---help---
 	  This is a V4L2 driver for the Renesas VSP1 video processing engine.
diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index bbb7bd1eb268..21771c1a13fb 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_VIDEO_SH_VOU)		+= sh_vou.o
 
 obj-$(CONFIG_SOC_CAMERA)		+= soc_camera/
 
+obj-$(CONFIG_VIDEO_RENESAS_FCP) 	+= rcar-fcp.o
 obj-$(CONFIG_VIDEO_RENESAS_JPU) 	+= rcar_jpu.o
 obj-$(CONFIG_VIDEO_RENESAS_VSP1)	+= vsp1/
 
@@ -55,4 +56,10 @@ obj-$(CONFIG_VIDEO_AM437X_VPFE)		+= am437x/
 
 obj-$(CONFIG_VIDEO_XILINX)		+= xilinx/
 
+obj-$(CONFIG_VIDEO_RCAR_VIN)		+= rcar-vin/
+
 ccflags-y += -I$(srctree)/drivers/media/i2c
+
+obj-$(CONFIG_VIDEO_MEDIATEK_VPU)	+= mtk-vpu/
+
+obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC)	+= mtk-vcodec/
diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
index e749eb7c3be9..b33b9e35e60e 100644
--- a/drivers/media/platform/am437x/am437x-vpfe.c
+++ b/drivers/media/platform/am437x/am437x-vpfe.c
@@ -1901,21 +1901,20 @@ static void vpfe_calculate_offsets(struct vpfe_device *vpfe)
  * @nbuffers: ptr to number of buffers requested by application
  * @nplanes:: contains number of distinct video planes needed to hold a frame
  * @sizes[]: contains the size (in bytes) of each plane.
- * @alloc_ctxs: ptr to allocation context
+ * @alloc_devs: ptr to allocation context
  *
  * This callback function is called when reqbuf() is called to adjust
  * the buffer count and buffer size
  */
 static int vpfe_queue_setup(struct vb2_queue *vq,
 			    unsigned int *nbuffers, unsigned int *nplanes,
-			    unsigned int sizes[], void *alloc_ctxs[])
+			    unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct vpfe_device *vpfe = vb2_get_drv_priv(vq);
 	unsigned size = vpfe->fmt.fmt.pix.sizeimage;
 
 	if (vq->num_buffers + *nbuffers < 3)
 		*nbuffers = 3 - vq->num_buffers;
-	alloc_ctxs[0] = vpfe->alloc_ctx;
 
 	if (*nplanes) {
 		if (sizes[0] < size)
@@ -2364,13 +2363,6 @@ static int vpfe_probe_complete(struct vpfe_device *vpfe)
 		goto probe_out;
 
 	/* Initialize videobuf2 queue as per the buffer type */
-	vpfe->alloc_ctx = vb2_dma_contig_init_ctx(vpfe->pdev);
-	if (IS_ERR(vpfe->alloc_ctx)) {
-		vpfe_err(vpfe, "Failed to get the context\n");
-		err = PTR_ERR(vpfe->alloc_ctx);
-		goto probe_out;
-	}
-
 	q = &vpfe->buffer_queue;
 	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	q->io_modes = VB2_MMAP | VB2_DMABUF | VB2_READ;
@@ -2381,11 +2373,11 @@ static int vpfe_probe_complete(struct vpfe_device *vpfe)
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &vpfe->lock;
 	q->min_buffers_needed = 1;
+	q->dev = vpfe->pdev;
 
 	err = vb2_queue_init(q);
 	if (err) {
 		vpfe_err(vpfe, "vb2_queue_init() failed\n");
-		vb2_dma_contig_cleanup_ctx(vpfe->alloc_ctx);
 		goto probe_out;
 	}
 
diff --git a/drivers/media/platform/am437x/am437x-vpfe.h b/drivers/media/platform/am437x/am437x-vpfe.h
index 777bf97fea57..17d7aa426788 100644
--- a/drivers/media/platform/am437x/am437x-vpfe.h
+++ b/drivers/media/platform/am437x/am437x-vpfe.h
@@ -264,8 +264,6 @@ struct vpfe_device {
 	struct v4l2_rect crop;
 	/* Buffer queue used in video-buf */
 	struct vb2_queue buffer_queue;
-	/* Allocator-specific contexts for each plane */
-	struct vb2_alloc_ctx *alloc_ctx;
 	/* Queue of filled frames */
 	struct list_head dma_queue;
 	/* IRQ lock for DMA queue */
diff --git a/drivers/media/platform/blackfin/bfin_capture.c b/drivers/media/platform/blackfin/bfin_capture.c
index d0092dae7a57..8eb03397d736 100644
--- a/drivers/media/platform/blackfin/bfin_capture.c
+++ b/drivers/media/platform/blackfin/bfin_capture.c
@@ -91,8 +91,6 @@ struct bcap_device {
 	struct bcap_buffer *cur_frm;
 	/* buffer queue used in videobuf2 */
 	struct vb2_queue buffer_queue;
-	/* allocator-specific contexts for each plane */
-	struct vb2_alloc_ctx *alloc_ctx;
 	/* queue of filled frames */
 	struct list_head dma_queue;
 	/* used in videobuf2 callback */
@@ -203,13 +201,12 @@ static void bcap_free_sensor_formats(struct bcap_device *bcap_dev)
 
 static int bcap_queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct bcap_device *bcap_dev = vb2_get_drv_priv(vq);
 
 	if (vq->num_buffers + *nbuffers < 2)
 		*nbuffers = 2;
-	alloc_ctxs[0] = bcap_dev->alloc_ctx;
 
 	if (*nplanes)
 		return sizes[0] < bcap_dev->fmt.sizeimage ? -EINVAL : 0;
@@ -820,12 +817,6 @@ static int bcap_probe(struct platform_device *pdev)
 	}
 	bcap_dev->ppi->priv = bcap_dev;
 
-	bcap_dev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(bcap_dev->alloc_ctx)) {
-		ret = PTR_ERR(bcap_dev->alloc_ctx);
-		goto err_free_ppi;
-	}
-
 	vfd = &bcap_dev->video_dev;
 	/* initialize field of video device */
 	vfd->release            = video_device_release_empty;
@@ -839,7 +830,7 @@ static int bcap_probe(struct platform_device *pdev)
 	if (ret) {
 		v4l2_err(pdev->dev.driver,
 				"Unable to register v4l2 device\n");
-		goto err_cleanup_ctx;
+		goto err_free_ppi;
 	}
 	v4l2_info(&bcap_dev->v4l2_dev, "v4l2 device registered\n");
 
@@ -863,6 +854,7 @@ static int bcap_probe(struct platform_device *pdev)
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &bcap_dev->mutex;
 	q->min_buffers_needed = 1;
+	q->dev = &pdev->dev;
 
 	ret = vb2_queue_init(q);
 	if (ret)
@@ -967,8 +959,6 @@ err_free_handler:
 	v4l2_ctrl_handler_free(&bcap_dev->ctrl_handler);
 err_unreg_v4l2:
 	v4l2_device_unregister(&bcap_dev->v4l2_dev);
-err_cleanup_ctx:
-	vb2_dma_contig_cleanup_ctx(bcap_dev->alloc_ctx);
 err_free_ppi:
 	ppi_delete_instance(bcap_dev->ppi);
 err_free_dev:
@@ -986,7 +976,6 @@ static int bcap_remove(struct platform_device *pdev)
 	video_unregister_device(&bcap_dev->video_dev);
 	v4l2_ctrl_handler_free(&bcap_dev->ctrl_handler);
 	v4l2_device_unregister(v4l2_dev);
-	vb2_dma_contig_cleanup_ctx(bcap_dev->alloc_ctx);
 	ppi_delete_instance(bcap_dev->ppi);
 	kfree(bcap_dev);
 	return 0;
diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c
index 133ab9f70f85..c39718a63e5e 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1139,7 +1139,7 @@ static void set_default_params(struct coda_ctx *ctx)
  */
 static int coda_queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct coda_ctx *ctx = vb2_get_drv_priv(vq);
 	struct coda_q_data *q_data;
@@ -1151,9 +1151,6 @@ static int coda_queue_setup(struct vb2_queue *vq,
 	*nplanes = 1;
 	sizes[0] = size;
 
-	/* Set to vb2-dma-contig allocator context, ignored by vb2-vmalloc */
-	alloc_ctxs[0] = ctx->dev->alloc_ctx;
-
 	v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
 		 "get %d buffer(s) of size %d each.\n", *nbuffers, size);
 
@@ -1599,6 +1596,7 @@ static int coda_queue_init(struct coda_ctx *ctx, struct vb2_queue *vq)
 	 * that videobuf2 will keep the value of bytesused intact.
 	 */
 	vq->allow_zero_bytesused = 1;
+	vq->dev = &ctx->dev->plat_dev->dev;
 
 	return vb2_queue_init(vq);
 }
@@ -2040,16 +2038,10 @@ static void coda_fw_callback(const struct firmware *fw, void *context)
 	if (ret < 0)
 		goto put_pm;
 
-	dev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		v4l2_err(&dev->v4l2_dev, "Failed to alloc vb2 context\n");
-		goto put_pm;
-	}
-
 	dev->m2m_dev = v4l2_m2m_init(&coda_m2m_ops);
 	if (IS_ERR(dev->m2m_dev)) {
 		v4l2_err(&dev->v4l2_dev, "Failed to init mem2mem device\n");
-		goto rel_ctx;
+		goto put_pm;
 	}
 
 	for (i = 0; i < dev->devtype->num_vdevs; i++) {
@@ -2072,8 +2064,6 @@ rel_vfd:
 	while (--i >= 0)
 		video_unregister_device(&dev->vfd[i]);
 	v4l2_m2m_release(dev->m2m_dev);
-rel_ctx:
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
 put_pm:
 	pm_runtime_put_sync(&pdev->dev);
 }
@@ -2226,7 +2216,7 @@ static int coda_probe(struct platform_device *pdev)
 	dev->rstc = devm_reset_control_get_optional(&pdev->dev, NULL);
 	if (IS_ERR(dev->rstc)) {
 		ret = PTR_ERR(dev->rstc);
-		if (ret == -ENOENT || ret == -ENOSYS) {
+		if (ret == -ENOENT || ret == -ENOTSUPP) {
 			dev->rstc = NULL;
 		} else {
 			dev_err(&pdev->dev, "failed get reset control: %d\n",
@@ -2324,8 +2314,6 @@ static int coda_remove(struct platform_device *pdev)
 	if (dev->m2m_dev)
 		v4l2_m2m_release(dev->m2m_dev);
 	pm_runtime_disable(&pdev->dev);
-	if (dev->alloc_ctx)
-		vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
 	v4l2_device_unregister(&dev->v4l2_dev);
 	destroy_workqueue(dev->workqueue);
 	if (dev->iram.vaddr)
diff --git a/drivers/media/platform/coda/coda.h b/drivers/media/platform/coda/coda.h
index 8f2c71e06966..53f96661683c 100644
--- a/drivers/media/platform/coda/coda.h
+++ b/drivers/media/platform/coda/coda.h
@@ -92,7 +92,6 @@ struct coda_dev {
 	struct mutex		coda_mutex;
 	struct workqueue_struct	*workqueue;
 	struct v4l2_m2m_dev	*m2m_dev;
-	struct vb2_alloc_ctx	*alloc_ctx;
 	struct list_head	instances;
 	unsigned long		instance_mask;
 	struct dentry		*debugfs_root;
diff --git a/drivers/media/platform/davinci/ccdc_hw_device.h b/drivers/media/platform/davinci/ccdc_hw_device.h
index 86b9b3518965..ae5605de7679 100644
--- a/drivers/media/platform/davinci/ccdc_hw_device.h
+++ b/drivers/media/platform/davinci/ccdc_hw_device.h
@@ -80,13 +80,6 @@ struct ccdc_hw_ops {
 	/* Pointer to function to get line length */
 	unsigned int (*get_line_length) (void);
 
-	/* Query CCDC control IDs */
-	int (*queryctrl)(struct v4l2_queryctrl *qctrl);
-	/* Set CCDC control */
-	int (*set_control)(struct v4l2_control *ctrl);
-	/* Get CCDC control */
-	int (*get_control)(struct v4l2_control *ctrl);
-
 	/* Pointer to function to set frame buffer address */
 	void (*setfbaddr) (unsigned long addr);
 	/* Pointer to function to get field id */
diff --git a/drivers/media/platform/davinci/vpbe_display.c b/drivers/media/platform/davinci/vpbe_display.c
index 0abcdfe97a6c..0b1709e96673 100644
--- a/drivers/media/platform/davinci/vpbe_display.c
+++ b/drivers/media/platform/davinci/vpbe_display.c
@@ -230,7 +230,7 @@ static int vpbe_buffer_prepare(struct vb2_buffer *vb)
 static int
 vpbe_buffer_queue_setup(struct vb2_queue *vq,
 			unsigned int *nbuffers, unsigned int *nplanes,
-			unsigned int sizes[], void *alloc_ctxs[])
+			unsigned int sizes[], struct device *alloc_devs[])
 
 {
 	/* Get the file handle object and layer object */
@@ -242,7 +242,6 @@ vpbe_buffer_queue_setup(struct vb2_queue *vq,
 	/* Store number of buffers allocated in numbuffer member */
 	if (vq->num_buffers + *nbuffers < VPBE_DEFAULT_NUM_BUFS)
 		*nbuffers = VPBE_DEFAULT_NUM_BUFS - vq->num_buffers;
-	alloc_ctxs[0] = layer->alloc_ctx;
 
 	if (*nplanes)
 		return sizes[0] < layer->pix_fmt.sizeimage ? -EINVAL : 0;
@@ -1451,20 +1450,13 @@ static int vpbe_display_probe(struct platform_device *pdev)
 		q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		q->min_buffers_needed = 1;
 		q->lock = &disp_dev->dev[i]->opslock;
+		q->dev = disp_dev->vpbe_dev->pdev;
 		err = vb2_queue_init(q);
 		if (err) {
 			v4l2_err(v4l2_dev, "vb2_queue_init() failed\n");
 			goto probe_out;
 		}
 
-		disp_dev->dev[i]->alloc_ctx =
-			vb2_dma_contig_init_ctx(disp_dev->vpbe_dev->pdev);
-		if (IS_ERR(disp_dev->dev[i]->alloc_ctx)) {
-			v4l2_err(v4l2_dev, "Failed to get the context\n");
-			err = PTR_ERR(disp_dev->dev[i]->alloc_ctx);
-			goto probe_out;
-		}
-
 		INIT_LIST_HEAD(&disp_dev->dev[i]->dma_queue);
 
 		if (register_device(disp_dev->dev[i], disp_dev, pdev)) {
@@ -1482,7 +1474,6 @@ probe_out:
 	for (k = 0; k < VPBE_DISPLAY_MAX_DEVICES; k++) {
 		/* Unregister video device */
 		if (disp_dev->dev[k] != NULL) {
-			vb2_dma_contig_cleanup_ctx(disp_dev->dev[k]->alloc_ctx);
 			video_unregister_device(&disp_dev->dev[k]->video_dev);
 			kfree(disp_dev->dev[k]);
 		}
@@ -1510,7 +1501,6 @@ static int vpbe_display_remove(struct platform_device *pdev)
 	for (i = 0; i < VPBE_DISPLAY_MAX_DEVICES; i++) {
 		/* Get the pointer to the layer object */
 		vpbe_display_layer = disp_dev->dev[i];
-		vb2_dma_contig_cleanup_ctx(vpbe_display_layer->alloc_ctx);
 		/* Unregister video device */
 		video_unregister_device(&vpbe_display_layer->video_dev);
 
diff --git a/drivers/media/platform/davinci/vpif_capture.c b/drivers/media/platform/davinci/vpif_capture.c
index 08f7028c7560..5104cc0ee40e 100644
--- a/drivers/media/platform/davinci/vpif_capture.c
+++ b/drivers/media/platform/davinci/vpif_capture.c
@@ -107,14 +107,14 @@ static int vpif_buffer_prepare(struct vb2_buffer *vb)
  * @nbuffers: ptr to number of buffers requested by application
  * @nplanes:: contains number of distinct video planes needed to hold a frame
  * @sizes[]: contains the size (in bytes) of each plane.
- * @alloc_ctxs: ptr to allocation context
+ * @alloc_devs: ptr to allocation context
  *
  * This callback function is called when reqbuf() is called to adjust
  * the buffer count and buffer size
  */
 static int vpif_buffer_queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct channel_obj *ch = vb2_get_drv_priv(vq);
 	struct common_obj *common = &ch->common[VPIF_VIDEO_INDEX];
@@ -133,7 +133,6 @@ static int vpif_buffer_queue_setup(struct vb2_queue *vq,
 
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = common->alloc_ctx;
 
 	/* Calculate the offset for Y and C data in the buffer */
 	vpif_calculate_offsets(ch);
@@ -1371,6 +1370,7 @@ static int vpif_probe_complete(void)
 		q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		q->min_buffers_needed = 1;
 		q->lock = &common->lock;
+		q->dev = vpif_dev;
 
 		err = vb2_queue_init(q);
 		if (err) {
@@ -1378,13 +1378,6 @@ static int vpif_probe_complete(void)
 			goto probe_out;
 		}
 
-		common->alloc_ctx = vb2_dma_contig_init_ctx(vpif_dev);
-		if (IS_ERR(common->alloc_ctx)) {
-			vpif_err("Failed to get the context\n");
-			err = PTR_ERR(common->alloc_ctx);
-			goto probe_out;
-		}
-
 		INIT_LIST_HEAD(&common->dma_queue);
 
 		/* Initialize the video_device structure */
@@ -1412,7 +1405,6 @@ probe_out:
 		/* Get the pointer to the channel object */
 		ch = vpif_obj.dev[k];
 		common = &ch->common[k];
-		vb2_dma_contig_cleanup_ctx(common->alloc_ctx);
 		/* Unregister video device */
 		video_unregister_device(&ch->video_dev);
 	}
@@ -1546,7 +1538,6 @@ static int vpif_remove(struct platform_device *device)
 		/* Get the pointer to the channel object */
 		ch = vpif_obj.dev[i];
 		common = &ch->common[VPIF_VIDEO_INDEX];
-		vb2_dma_contig_cleanup_ctx(common->alloc_ctx);
 		/* Unregister video device */
 		video_unregister_device(&ch->video_dev);
 		kfree(vpif_obj.dev[i]);
diff --git a/drivers/media/platform/davinci/vpif_capture.h b/drivers/media/platform/davinci/vpif_capture.h
index 4a7600929b61..9e35b6771d22 100644
--- a/drivers/media/platform/davinci/vpif_capture.h
+++ b/drivers/media/platform/davinci/vpif_capture.h
@@ -65,8 +65,6 @@ struct common_obj {
 	struct v4l2_format fmt;
 	/* Buffer queue used in video-buf */
 	struct vb2_queue buffer_queue;
-	/* allocator-specific contexts for each plane */
-	struct vb2_alloc_ctx *alloc_ctx;
 	/* Queue of filled frames */
 	struct list_head dma_queue;
 	/* Used in video-buf */
diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c
index f40755cf1bf2..75b27233ec2f 100644
--- a/drivers/media/platform/davinci/vpif_display.c
+++ b/drivers/media/platform/davinci/vpif_display.c
@@ -102,14 +102,14 @@ static int vpif_buffer_prepare(struct vb2_buffer *vb)
  * @nbuffers: ptr to number of buffers requested by application
  * @nplanes:: contains number of distinct video planes needed to hold a frame
  * @sizes[]: contains the size (in bytes) of each plane.
- * @alloc_ctxs: ptr to allocation context
+ * @alloc_devs: ptr to allocation context
  *
  * This callback function is called when reqbuf() is called to adjust
  * the buffer count and buffer size
  */
 static int vpif_buffer_queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct channel_obj *ch = vb2_get_drv_priv(vq);
 	struct common_obj *common = &ch->common[VPIF_VIDEO_INDEX];
@@ -126,7 +126,6 @@ static int vpif_buffer_queue_setup(struct vb2_queue *vq,
 
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = common->alloc_ctx;
 
 	/* Calculate the offset for Y and C data  in the buffer */
 	vpif_calculate_offsets(ch);
@@ -1191,19 +1190,13 @@ static int vpif_probe_complete(void)
 		q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		q->min_buffers_needed = 1;
 		q->lock = &common->lock;
+		q->dev = vpif_dev;
 		err = vb2_queue_init(q);
 		if (err) {
 			vpif_err("vpif_display: vb2_queue_init() failed\n");
 			goto probe_out;
 		}
 
-		common->alloc_ctx = vb2_dma_contig_init_ctx(vpif_dev);
-		if (IS_ERR(common->alloc_ctx)) {
-			vpif_err("Failed to get the context\n");
-			err = PTR_ERR(common->alloc_ctx);
-			goto probe_out;
-		}
-
 		INIT_LIST_HEAD(&common->dma_queue);
 
 		/* register video device */
@@ -1233,7 +1226,6 @@ probe_out:
 	for (k = 0; k < j; k++) {
 		ch = vpif_obj.dev[k];
 		common = &ch->common[k];
-		vb2_dma_contig_cleanup_ctx(common->alloc_ctx);
 		video_unregister_device(&ch->video_dev);
 	}
 	return err;
@@ -1355,7 +1347,6 @@ static int vpif_remove(struct platform_device *device)
 		/* Get the pointer to the channel object */
 		ch = vpif_obj.dev[i];
 		common = &ch->common[VPIF_VIDEO_INDEX];
-		vb2_dma_contig_cleanup_ctx(common->alloc_ctx);
 		/* Unregister video device */
 		video_unregister_device(&ch->video_dev);
 		kfree(vpif_obj.dev[i]);
diff --git a/drivers/media/platform/davinci/vpif_display.h b/drivers/media/platform/davinci/vpif_display.h
index e7a1723a1b7a..af2765fdcea8 100644
--- a/drivers/media/platform/davinci/vpif_display.h
+++ b/drivers/media/platform/davinci/vpif_display.h
@@ -74,8 +74,6 @@ struct common_obj {
 	struct v4l2_format fmt;			/* Used to store the format */
 	struct vb2_queue buffer_queue;		/* Buffer queue used in
 						 * video-buf */
-	/* allocator-specific contexts for each plane */
-	struct vb2_alloc_ctx *alloc_ctx;
 
 	struct list_head dma_queue;		/* Queue of filled frames */
 	spinlock_t irqlock;			/* Used in video-buf */
diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c
index c04973669a47..787bd16c19e5 100644
--- a/drivers/media/platform/exynos-gsc/gsc-core.c
+++ b/drivers/media/platform/exynos-gsc/gsc-core.c
@@ -1123,19 +1123,13 @@ static int gsc_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err_m2m;
 
-	/* Initialize continious memory allocator */
-	gsc->alloc_ctx = vb2_dma_contig_init_ctx(dev);
-	if (IS_ERR(gsc->alloc_ctx)) {
-		ret = PTR_ERR(gsc->alloc_ctx);
-		goto err_pm;
-	}
+	vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32));
 
 	dev_dbg(dev, "gsc-%d registered successfully\n", gsc->id);
 
 	pm_runtime_put(dev);
 	return 0;
-err_pm:
-	pm_runtime_put(dev);
+
 err_m2m:
 	gsc_unregister_m2m_device(gsc);
 err_v4l2:
@@ -1152,7 +1146,7 @@ static int gsc_remove(struct platform_device *pdev)
 	gsc_unregister_m2m_device(gsc);
 	v4l2_device_unregister(&gsc->v4l2_dev);
 
-	vb2_dma_contig_cleanup_ctx(gsc->alloc_ctx);
+	vb2_dma_contig_clear_max_seg_size(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	gsc_clk_put(gsc);
 
diff --git a/drivers/media/platform/exynos-gsc/gsc-core.h b/drivers/media/platform/exynos-gsc/gsc-core.h
index ec4000c72172..7ad7b9dc2243 100644
--- a/drivers/media/platform/exynos-gsc/gsc-core.h
+++ b/drivers/media/platform/exynos-gsc/gsc-core.h
@@ -327,7 +327,6 @@ struct gsc_driverdata {
  * @irq_queue:	interrupt handler waitqueue
  * @m2m:	memory-to-memory V4L2 device information
  * @state:	flags used to synchronize m2m and capture mode operation
- * @alloc_ctx:	videobuf2 memory allocator context
  * @vdev:	video device for G-Scaler instance
  */
 struct gsc_dev {
@@ -341,7 +340,6 @@ struct gsc_dev {
 	wait_queue_head_t		irq_queue;
 	struct gsc_m2m_device		m2m;
 	unsigned long			state;
-	struct vb2_alloc_ctx		*alloc_ctx;
 	struct video_device		vdev;
 	struct v4l2_device		v4l2_dev;
 };
diff --git a/drivers/media/platform/exynos-gsc/gsc-m2m.c b/drivers/media/platform/exynos-gsc/gsc-m2m.c
index a600e32e2543..ec6494cbdd45 100644
--- a/drivers/media/platform/exynos-gsc/gsc-m2m.c
+++ b/drivers/media/platform/exynos-gsc/gsc-m2m.c
@@ -213,7 +213,7 @@ put_device:
 
 static int gsc_m2m_queue_setup(struct vb2_queue *vq,
 			unsigned int *num_buffers, unsigned int *num_planes,
-			unsigned int sizes[], void *allocators[])
+			unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct gsc_ctx *ctx = vb2_get_drv_priv(vq);
 	struct gsc_frame *frame;
@@ -227,10 +227,8 @@ static int gsc_m2m_queue_setup(struct vb2_queue *vq,
 		return -EINVAL;
 
 	*num_planes = frame->fmt->num_planes;
-	for (i = 0; i < frame->fmt->num_planes; i++) {
+	for (i = 0; i < frame->fmt->num_planes; i++)
 		sizes[i] = frame->payload[i];
-		allocators[i] = ctx->gsc_dev->alloc_ctx;
-	}
 	return 0;
 }
 
@@ -591,6 +589,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->lock = &ctx->gsc_dev->lock;
+	src_vq->dev = &ctx->gsc_dev->pdev->dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret)
@@ -605,6 +604,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->lock = &ctx->gsc_dev->lock;
+	dst_vq->dev = &ctx->gsc_dev->pdev->dev;
 
 	return vb2_queue_init(dst_vq);
 }
diff --git a/drivers/media/platform/exynos4-is/fimc-capture.c b/drivers/media/platform/exynos4-is/fimc-capture.c
index bf47d3b9cbe7..fdec499fbbda 100644
--- a/drivers/media/platform/exynos4-is/fimc-capture.c
+++ b/drivers/media/platform/exynos4-is/fimc-capture.c
@@ -340,7 +340,7 @@ int fimc_capture_resume(struct fimc_dev *fimc)
 
 static int queue_setup(struct vb2_queue *vq,
 		       unsigned int *num_buffers, unsigned int *num_planes,
-		       unsigned int sizes[], void *allocators[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct fimc_ctx *ctx = vq->drv_priv;
 	struct fimc_frame *frame = &ctx->d_frame;
@@ -354,11 +354,9 @@ static int queue_setup(struct vb2_queue *vq,
 	if (*num_planes) {
 		if (*num_planes != fmt->memplanes)
 			return -EINVAL;
-		for (i = 0; i < *num_planes; i++) {
+		for (i = 0; i < *num_planes; i++)
 			if (sizes[i] < (wh * fmt->depth[i]) / 8)
 				return -EINVAL;
-			allocators[i] = ctx->fimc_dev->alloc_ctx;
-		}
 		return 0;
 	}
 
@@ -371,8 +369,6 @@ static int queue_setup(struct vb2_queue *vq,
 			sizes[i] = frame->payload[i];
 		else
 			sizes[i] = max_t(u32, size, frame->payload[i]);
-
-		allocators[i] = ctx->fimc_dev->alloc_ctx;
 	}
 
 	return 0;
@@ -1779,6 +1775,7 @@ static int fimc_register_capture_device(struct fimc_dev *fimc,
 	q->buf_struct_size = sizeof(struct fimc_vid_buffer);
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &fimc->lock;
+	q->dev = &fimc->pdev->dev;
 
 	ret = vb2_queue_init(q);
 	if (ret)
diff --git a/drivers/media/platform/exynos4-is/fimc-core.c b/drivers/media/platform/exynos4-is/fimc-core.c
index b1c1cea82a27..8f89ca21b631 100644
--- a/drivers/media/platform/exynos4-is/fimc-core.c
+++ b/drivers/media/platform/exynos4-is/fimc-core.c
@@ -1018,19 +1018,11 @@ static int fimc_probe(struct platform_device *pdev)
 			goto err_sd;
 	}
 
-	/* Initialize contiguous memory allocator */
-	fimc->alloc_ctx = vb2_dma_contig_init_ctx(dev);
-	if (IS_ERR(fimc->alloc_ctx)) {
-		ret = PTR_ERR(fimc->alloc_ctx);
-		goto err_gclk;
-	}
+	vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32));
 
 	dev_dbg(dev, "FIMC.%d registered successfully\n", fimc->id);
 	return 0;
 
-err_gclk:
-	if (!pm_runtime_enabled(dev))
-		clk_disable(fimc->clock[CLK_GATE]);
 err_sd:
 	fimc_unregister_capture_subdev(fimc);
 err_sclk:
@@ -1123,7 +1115,7 @@ static int fimc_remove(struct platform_device *pdev)
 	pm_runtime_set_suspended(&pdev->dev);
 
 	fimc_unregister_capture_subdev(fimc);
-	vb2_dma_contig_cleanup_ctx(fimc->alloc_ctx);
+	vb2_dma_contig_clear_max_seg_size(&pdev->dev);
 
 	clk_disable(fimc->clock[CLK_BUS]);
 	fimc_clk_put(fimc);
diff --git a/drivers/media/platform/exynos4-is/fimc-core.h b/drivers/media/platform/exynos4-is/fimc-core.h
index 6b7435453d2a..5615fefbf7af 100644
--- a/drivers/media/platform/exynos4-is/fimc-core.h
+++ b/drivers/media/platform/exynos4-is/fimc-core.h
@@ -307,7 +307,6 @@ struct fimc_m2m_device {
  */
 struct fimc_vid_cap {
 	struct fimc_ctx			*ctx;
-	struct vb2_alloc_ctx		*alloc_ctx;
 	struct v4l2_subdev		subdev;
 	struct exynos_video_entity	ve;
 	struct media_pad		vd_pad;
@@ -417,7 +416,6 @@ struct fimc_ctx;
  * @m2m:	memory-to-memory V4L2 device information
  * @vid_cap:	camera capture device information
  * @state:	flags used to synchronize m2m and capture mode operation
- * @alloc_ctx:	videobuf2 memory allocator context
  * @pipeline:	fimc video capture pipeline data structure
  */
 struct fimc_dev {
@@ -436,7 +434,6 @@ struct fimc_dev {
 	struct fimc_m2m_device		m2m;
 	struct fimc_vid_cap		vid_cap;
 	unsigned long			state;
-	struct vb2_alloc_ctx		*alloc_ctx;
 };
 
 /**
diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c
index 979c388ebf60..32ca55f16677 100644
--- a/drivers/media/platform/exynos4-is/fimc-is.c
+++ b/drivers/media/platform/exynos4-is/fimc-is.c
@@ -204,9 +204,6 @@ static int fimc_is_register_subdevs(struct fimc_is *is)
 	if (ret < 0)
 		return ret;
 
-	/* Initialize memory allocator context for the ISP DMA. */
-	is->isp.alloc_ctx = is->alloc_ctx;
-
 	for_each_compatible_node(i2c_bus, NULL, FIMC_IS_I2C_COMPATIBLE) {
 		for_each_available_child_of_node(i2c_bus, child) {
 			ret = fimc_is_parse_sensor_config(is, index, child);
@@ -847,18 +844,14 @@ static int fimc_is_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err_pm;
 
-	is->alloc_ctx = vb2_dma_contig_init_ctx(dev);
-	if (IS_ERR(is->alloc_ctx)) {
-		ret = PTR_ERR(is->alloc_ctx);
-		goto err_pm;
-	}
+	vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32));
 	/*
 	 * Register FIMC-IS V4L2 subdevs to this driver. The video nodes
 	 * will be created within the subdev's registered() callback.
 	 */
 	ret = fimc_is_register_subdevs(is);
 	if (ret < 0)
-		goto err_vb;
+		goto err_pm;
 
 	ret = fimc_is_debugfs_create(is);
 	if (ret < 0)
@@ -877,8 +870,6 @@ err_dfs:
 	fimc_is_debugfs_remove(is);
 err_sd:
 	fimc_is_unregister_subdevs(is);
-err_vb:
-	vb2_dma_contig_cleanup_ctx(is->alloc_ctx);
 err_pm:
 	if (!pm_runtime_enabled(dev))
 		fimc_is_runtime_suspend(dev);
@@ -939,7 +930,7 @@ static int fimc_is_remove(struct platform_device *pdev)
 		fimc_is_runtime_suspend(dev);
 	free_irq(is->irq, is);
 	fimc_is_unregister_subdevs(is);
-	vb2_dma_contig_cleanup_ctx(is->alloc_ctx);
+	vb2_dma_contig_clear_max_seg_size(dev);
 	fimc_is_put_clocks(is);
 	fimc_is_debugfs_remove(is);
 	release_firmware(is->fw.f_w);
diff --git a/drivers/media/platform/exynos4-is/fimc-is.h b/drivers/media/platform/exynos4-is/fimc-is.h
index 386eb49ece7e..3a82c6a214c7 100644
--- a/drivers/media/platform/exynos4-is/fimc-is.h
+++ b/drivers/media/platform/exynos4-is/fimc-is.h
@@ -233,7 +233,6 @@ struct chain_config {
  * @pdev: pointer to FIMC-IS platform device
  * @pctrl: pointer to pinctrl structure for this device
  * @v4l2_dev: pointer to top the level v4l2_device
- * @alloc_ctx: videobuf2 memory allocator context
  * @lock: mutex serializing video device and the subdev operations
  * @slock: spinlock protecting this data structure and the hw registers
  * @clocks: FIMC-LITE gate clock
@@ -256,7 +255,6 @@ struct fimc_is {
 	struct fimc_is_sensor		sensor[FIMC_IS_SENSORS_NUM];
 	struct fimc_is_setfile		setfile;
 
-	struct vb2_alloc_ctx		*alloc_ctx;
 	struct v4l2_ctrl_handler	ctrl_handler;
 
 	struct mutex			lock;
diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c
index c0816728cbfe..400ce0cb0c0d 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp-video.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c
@@ -40,7 +40,7 @@
 
 static int isp_video_capture_queue_setup(struct vb2_queue *vq,
 			unsigned int *num_buffers, unsigned int *num_planes,
-			unsigned int sizes[], void *allocators[])
+			unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct fimc_isp *isp = vb2_get_drv_priv(vq);
 	struct v4l2_pix_format_mplane *vid_fmt = &isp->video_capture.pixfmt;
@@ -57,20 +57,16 @@ static int isp_video_capture_queue_setup(struct vb2_queue *vq,
 	if (*num_planes) {
 		if (*num_planes != fmt->memplanes)
 			return -EINVAL;
-		for (i = 0; i < *num_planes; i++) {
+		for (i = 0; i < *num_planes; i++)
 			if (sizes[i] < (wh * fmt->depth[i]) / 8)
 				return -EINVAL;
-			allocators[i] = isp->alloc_ctx;
-		}
 		return 0;
 	}
 
 	*num_planes = fmt->memplanes;
 
-	for (i = 0; i < fmt->memplanes; i++) {
+	for (i = 0; i < fmt->memplanes; i++)
 		sizes[i] = (wh * fmt->depth[i]) / 8;
-		allocators[i] = isp->alloc_ctx;
-	}
 
 	return 0;
 }
@@ -597,6 +593,7 @@ int fimc_isp_video_device_register(struct fimc_isp *isp,
 	q->drv_priv = isp;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &isp->video_lock;
+	q->dev = &isp->pdev->dev;
 
 	ret = vb2_queue_init(q);
 	if (ret < 0)
diff --git a/drivers/media/platform/exynos4-is/fimc-isp.h b/drivers/media/platform/exynos4-is/fimc-isp.h
index e0686b5f1bf8..3cdd52491294 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp.h
+++ b/drivers/media/platform/exynos4-is/fimc-isp.h
@@ -148,7 +148,6 @@ struct fimc_is_video {
 /**
  * struct fimc_isp - FIMC-IS ISP data structure
  * @pdev: pointer to FIMC-IS platform device
- * @alloc_ctx: videobuf2 memory allocator context
  * @subdev: ISP v4l2_subdev
  * @subdev_pads: the ISP subdev media pads
  * @test_pattern: test pattern controls
@@ -161,7 +160,6 @@ struct fimc_is_video {
  */
 struct fimc_isp {
 	struct platform_device		*pdev;
-	struct vb2_alloc_ctx		*alloc_ctx;
 	struct v4l2_subdev		subdev;
 	struct media_pad		subdev_pads[FIMC_ISP_SD_PADS_NUM];
 	struct v4l2_mbus_framefmt	src_fmt;
diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c
index dc1b929f7a33..a0f149fb88e1 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.c
+++ b/drivers/media/platform/exynos4-is/fimc-lite.c
@@ -357,7 +357,7 @@ static void stop_streaming(struct vb2_queue *q)
 
 static int queue_setup(struct vb2_queue *vq,
 		       unsigned int *num_buffers, unsigned int *num_planes,
-		       unsigned int sizes[], void *allocators[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct fimc_lite *fimc = vq->drv_priv;
 	struct flite_frame *frame = &fimc->out_frame;
@@ -371,20 +371,16 @@ static int queue_setup(struct vb2_queue *vq,
 	if (*num_planes) {
 		if (*num_planes != fmt->memplanes)
 			return -EINVAL;
-		for (i = 0; i < *num_planes; i++) {
+		for (i = 0; i < *num_planes; i++)
 			if (sizes[i] < (wh * fmt->depth[i]) / 8)
 				return -EINVAL;
-			allocators[i] = fimc->alloc_ctx;
-		}
 		return 0;
 	}
 
 	*num_planes = fmt->memplanes;
 
-	for (i = 0; i < fmt->memplanes; i++) {
+	for (i = 0; i < fmt->memplanes; i++)
 		sizes[i] = (wh * fmt->depth[i]) / 8;
-		allocators[i] = fimc->alloc_ctx;
-	}
 
 	return 0;
 }
@@ -1300,6 +1296,7 @@ static int fimc_lite_subdev_registered(struct v4l2_subdev *sd)
 	q->drv_priv = fimc;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &fimc->lock;
+	q->dev = &fimc->pdev->dev;
 
 	ret = vb2_queue_init(q);
 	if (ret < 0)
@@ -1551,11 +1548,7 @@ static int fimc_lite_probe(struct platform_device *pdev)
 			goto err_sd;
 	}
 
-	fimc->alloc_ctx = vb2_dma_contig_init_ctx(dev);
-	if (IS_ERR(fimc->alloc_ctx)) {
-		ret = PTR_ERR(fimc->alloc_ctx);
-		goto err_clk_dis;
-	}
+	vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32));
 
 	fimc_lite_set_default_config(fimc);
 
@@ -1563,9 +1556,6 @@ static int fimc_lite_probe(struct platform_device *pdev)
 		fimc->index);
 	return 0;
 
-err_clk_dis:
-	if (!pm_runtime_enabled(dev))
-		clk_disable(fimc->clock);
 err_sd:
 	fimc_lite_unregister_capture_subdev(fimc);
 err_clk_put:
@@ -1651,7 +1641,7 @@ static int fimc_lite_remove(struct platform_device *pdev)
 	pm_runtime_disable(dev);
 	pm_runtime_set_suspended(dev);
 	fimc_lite_unregister_capture_subdev(fimc);
-	vb2_dma_contig_cleanup_ctx(fimc->alloc_ctx);
+	vb2_dma_contig_clear_max_seg_size(dev);
 	fimc_lite_clk_put(fimc);
 
 	dev_info(dev, "Driver unloaded\n");
diff --git a/drivers/media/platform/exynos4-is/fimc-lite.h b/drivers/media/platform/exynos4-is/fimc-lite.h
index 11690d563e06..9ae1e96a1bc7 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.h
+++ b/drivers/media/platform/exynos4-is/fimc-lite.h
@@ -113,7 +113,6 @@ struct flite_buffer {
  * @ve: exynos video device entity structure
  * @v4l2_dev: pointer to top the level v4l2_device
  * @fh: v4l2 file handle
- * @alloc_ctx: videobuf2 memory allocator context
  * @subdev: FIMC-LITE subdev
  * @vd_pad: media (sink) pad for the capture video node
  * @subdev_pads: the subdev media pads
@@ -148,7 +147,6 @@ struct fimc_lite {
 	struct exynos_video_entity ve;
 	struct v4l2_device	*v4l2_dev;
 	struct v4l2_fh		fh;
-	struct vb2_alloc_ctx	*alloc_ctx;
 	struct v4l2_subdev	subdev;
 	struct media_pad	vd_pad;
 	struct media_pad	subdev_pads[FLITE_SD_PADS_NUM];
diff --git a/drivers/media/platform/exynos4-is/fimc-m2m.c b/drivers/media/platform/exynos4-is/fimc-m2m.c
index 55ec4c99d484..b1309e114edb 100644
--- a/drivers/media/platform/exynos4-is/fimc-m2m.c
+++ b/drivers/media/platform/exynos4-is/fimc-m2m.c
@@ -50,30 +50,28 @@ void fimc_m2m_job_finish(struct fimc_ctx *ctx, int vb_state)
 	src_vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
 	dst_vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
 
-	if (src_vb && dst_vb) {
+	if (src_vb)
 		v4l2_m2m_buf_done(src_vb, vb_state);
+	if (dst_vb)
 		v4l2_m2m_buf_done(dst_vb, vb_state);
+	if (src_vb && dst_vb)
 		v4l2_m2m_job_finish(ctx->fimc_dev->m2m.m2m_dev,
 				    ctx->fh.m2m_ctx);
-	}
 }
 
 /* Complete the transaction which has been scheduled for execution. */
-static int fimc_m2m_shutdown(struct fimc_ctx *ctx)
+static void fimc_m2m_shutdown(struct fimc_ctx *ctx)
 {
 	struct fimc_dev *fimc = ctx->fimc_dev;
-	int ret;
 
 	if (!fimc_m2m_pending(fimc))
-		return 0;
+		return;
 
 	fimc_ctx_state_set(FIMC_CTX_SHUT, ctx);
 
-	ret = wait_event_timeout(fimc->irq_queue,
-			   !fimc_ctx_state_is_set(FIMC_CTX_SHUT, ctx),
-			   FIMC_SHUTDOWN_TIMEOUT);
-
-	return ret == 0 ? -ETIMEDOUT : ret;
+	wait_event_timeout(fimc->irq_queue,
+			!fimc_ctx_state_is_set(FIMC_CTX_SHUT, ctx),
+			FIMC_SHUTDOWN_TIMEOUT);
 }
 
 static int start_streaming(struct vb2_queue *q, unsigned int count)
@@ -88,12 +86,10 @@ static int start_streaming(struct vb2_queue *q, unsigned int count)
 static void stop_streaming(struct vb2_queue *q)
 {
 	struct fimc_ctx *ctx = q->drv_priv;
-	int ret;
 
-	ret = fimc_m2m_shutdown(ctx);
-	if (ret == -ETIMEDOUT)
-		fimc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR);
 
+	fimc_m2m_shutdown(ctx);
+	fimc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR);
 	pm_runtime_put(&ctx->fimc_dev->pdev->dev);
 }
 
@@ -178,7 +174,7 @@ static void fimc_job_abort(void *priv)
 
 static int fimc_queue_setup(struct vb2_queue *vq,
 			    unsigned int *num_buffers, unsigned int *num_planes,
-			    unsigned int sizes[], void *allocators[])
+			    unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct fimc_ctx *ctx = vb2_get_drv_priv(vq);
 	struct fimc_frame *f;
@@ -195,10 +191,8 @@ static int fimc_queue_setup(struct vb2_queue *vq,
 		return -EINVAL;
 
 	*num_planes = f->fmt->memplanes;
-	for (i = 0; i < f->fmt->memplanes; i++) {
+	for (i = 0; i < f->fmt->memplanes; i++)
 		sizes[i] = f->payload[i];
-		allocators[i] = ctx->fimc_dev->alloc_ctx;
-	}
 	return 0;
 }
 
@@ -562,6 +556,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->lock = &ctx->fimc_dev->lock;
+	src_vq->dev = &ctx->fimc_dev->pdev->dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret)
@@ -575,6 +570,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->lock = &ctx->fimc_dev->lock;
+	dst_vq->dev = &ctx->fimc_dev->pdev->dev;
 
 	return vb2_queue_init(dst_vq);
 }
diff --git a/drivers/media/platform/exynos4-is/mipi-csis.c b/drivers/media/platform/exynos4-is/mipi-csis.c
index bf954424e7be..86e681daa89d 100644
--- a/drivers/media/platform/exynos4-is/mipi-csis.c
+++ b/drivers/media/platform/exynos4-is/mipi-csis.c
@@ -649,23 +649,6 @@ static int s5pcsis_log_status(struct v4l2_subdev *sd)
 	return 0;
 }
 
-static int s5pcsis_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
-{
-	struct v4l2_mbus_framefmt *format = v4l2_subdev_get_try_format(sd, fh->pad, 0);
-
-	format->colorspace = V4L2_COLORSPACE_JPEG;
-	format->code = s5pcsis_formats[0].code;
-	format->width = S5PCSIS_DEF_PIX_WIDTH;
-	format->height = S5PCSIS_DEF_PIX_HEIGHT;
-	format->field = V4L2_FIELD_NONE;
-
-	return 0;
-}
-
-static const struct v4l2_subdev_internal_ops s5pcsis_sd_internal_ops = {
-	.open = s5pcsis_open,
-};
-
 static struct v4l2_subdev_core_ops s5pcsis_core_ops = {
 	.s_power = s5pcsis_s_power,
 	.log_status = s5pcsis_log_status,
diff --git a/drivers/media/platform/m2m-deinterlace.c b/drivers/media/platform/m2m-deinterlace.c
index 7383818c2be6..0fcb5c78031d 100644
--- a/drivers/media/platform/m2m-deinterlace.c
+++ b/drivers/media/platform/m2m-deinterlace.c
@@ -136,7 +136,6 @@ struct deinterlace_dev {
 	struct dma_chan		*dma_chan;
 
 	struct v4l2_m2m_dev	*m2m_dev;
-	struct vb2_alloc_ctx	*alloc_ctx;
 };
 
 struct deinterlace_ctx {
@@ -799,7 +798,7 @@ struct vb2_dc_conf {
 
 static int deinterlace_queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct deinterlace_ctx *ctx = vb2_get_drv_priv(vq);
 	struct deinterlace_q_data *q_data;
@@ -820,8 +819,6 @@ static int deinterlace_queue_setup(struct vb2_queue *vq,
 	*nbuffers = count;
 	sizes[0] = size;
 
-	alloc_ctxs[0] = ctx->dev->alloc_ctx;
-
 	dprintk(ctx->dev, "get %d buffer(s) of size %d each.\n", count, size);
 
 	return 0;
@@ -874,6 +871,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->ops = &deinterlace_qops;
 	src_vq->mem_ops = &vb2_dma_contig_memops;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	src_vq->dev = ctx->dev->v4l2_dev.dev;
 	q_data[V4L2_M2M_SRC].fmt = &formats[0];
 	q_data[V4L2_M2M_SRC].width = 640;
 	q_data[V4L2_M2M_SRC].height = 480;
@@ -891,6 +889,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->ops = &deinterlace_qops;
 	dst_vq->mem_ops = &vb2_dma_contig_memops;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	dst_vq->dev = ctx->dev->v4l2_dev.dev;
 	q_data[V4L2_M2M_DST].fmt = &formats[0];
 	q_data[V4L2_M2M_DST].width = 640;
 	q_data[V4L2_M2M_DST].height = 480;
@@ -1046,13 +1045,6 @@ static int deinterlace_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, pcdev);
 
-	pcdev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(pcdev->alloc_ctx)) {
-		v4l2_err(&pcdev->v4l2_dev, "Failed to alloc vb2 context\n");
-		ret = PTR_ERR(pcdev->alloc_ctx);
-		goto err_ctx;
-	}
-
 	pcdev->m2m_dev = v4l2_m2m_init(&m2m_ops);
 	if (IS_ERR(pcdev->m2m_dev)) {
 		v4l2_err(&pcdev->v4l2_dev, "Failed to init mem2mem device\n");
@@ -1064,8 +1056,6 @@ static int deinterlace_probe(struct platform_device *pdev)
 
 err_m2m:
 	video_unregister_device(&pcdev->vfd);
-err_ctx:
-	vb2_dma_contig_cleanup_ctx(pcdev->alloc_ctx);
 unreg_dev:
 	v4l2_device_unregister(&pcdev->v4l2_dev);
 rel_dma:
@@ -1082,7 +1072,6 @@ static int deinterlace_remove(struct platform_device *pdev)
 	v4l2_m2m_release(pcdev->m2m_dev);
 	video_unregister_device(&pcdev->vfd);
 	v4l2_device_unregister(&pcdev->v4l2_dev);
-	vb2_dma_contig_cleanup_ctx(pcdev->alloc_ctx);
 	dma_release_channel(pcdev->dma_chan);
 
 	return 0;
diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c
index 9b878deb1437..af59bf4dca2d 100644
--- a/drivers/media/platform/marvell-ccic/mcam-core.c
+++ b/drivers/media/platform/marvell-ccic/mcam-core.c
@@ -973,7 +973,7 @@ static int mcam_cam_set_flip(struct mcam_camera *cam)
 	memset(&ctrl, 0, sizeof(ctrl));
 	ctrl.id = V4L2_CID_VFLIP;
 	ctrl.value = flip;
-	return sensor_call(cam, core, s_ctrl, &ctrl);
+	return v4l2_s_ctrl(NULL, cam->sensor->ctrl_handler, &ctrl);
 }
 
 
@@ -1051,7 +1051,7 @@ static int mcam_read_setup(struct mcam_camera *cam)
 static int mcam_vb_queue_setup(struct vb2_queue *vq,
 		unsigned int *nbufs,
 		unsigned int *num_planes, unsigned int sizes[],
-		void *alloc_ctxs[])
+		struct device *alloc_devs[])
 {
 	struct mcam_camera *cam = vb2_get_drv_priv(vq);
 	int minbufs = (cam->buffer_mode == B_DMA_contig) ? 3 : 2;
@@ -1059,10 +1059,6 @@ static int mcam_vb_queue_setup(struct vb2_queue *vq,
 
 	if (*nbufs < minbufs)
 		*nbufs = minbufs;
-	if (cam->buffer_mode == B_DMA_contig)
-		alloc_ctxs[0] = cam->vb_alloc_ctx;
-	else if (cam->buffer_mode == B_DMA_sg)
-		alloc_ctxs[0] = cam->vb_alloc_ctx_sg;
 
 	if (*num_planes)
 		return sizes[0] < size ? -EINVAL : 0;
@@ -1271,6 +1267,7 @@ static int mcam_setup_vb2(struct mcam_camera *cam)
 	vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ;
 	vq->buf_struct_size = sizeof(struct mcam_vb_buffer);
+	vq->dev = cam->dev;
 	INIT_LIST_HEAD(&cam->buffers);
 	switch (cam->buffer_mode) {
 	case B_DMA_contig:
@@ -1279,9 +1276,6 @@ static int mcam_setup_vb2(struct mcam_camera *cam)
 		vq->mem_ops = &vb2_dma_contig_memops;
 		cam->dma_setup = mcam_ctlr_dma_contig;
 		cam->frame_complete = mcam_dma_contig_done;
-		cam->vb_alloc_ctx = vb2_dma_contig_init_ctx(cam->dev);
-		if (IS_ERR(cam->vb_alloc_ctx))
-			return PTR_ERR(cam->vb_alloc_ctx);
 #endif
 		break;
 	case B_DMA_sg:
@@ -1290,9 +1284,6 @@ static int mcam_setup_vb2(struct mcam_camera *cam)
 		vq->mem_ops = &vb2_dma_sg_memops;
 		cam->dma_setup = mcam_ctlr_dma_sg;
 		cam->frame_complete = mcam_dma_sg_done;
-		cam->vb_alloc_ctx_sg = vb2_dma_sg_init_ctx(cam->dev);
-		if (IS_ERR(cam->vb_alloc_ctx_sg))
-			return PTR_ERR(cam->vb_alloc_ctx_sg);
 #endif
 		break;
 	case B_vmalloc:
@@ -1309,18 +1300,6 @@ static int mcam_setup_vb2(struct mcam_camera *cam)
 	return vb2_queue_init(vq);
 }
 
-static void mcam_cleanup_vb2(struct mcam_camera *cam)
-{
-#ifdef MCAM_MODE_DMA_CONTIG
-	if (cam->buffer_mode == B_DMA_contig)
-		vb2_dma_contig_cleanup_ctx(cam->vb_alloc_ctx);
-#endif
-#ifdef MCAM_MODE_DMA_SG
-	if (cam->buffer_mode == B_DMA_sg)
-		vb2_dma_sg_cleanup_ctx(cam->vb_alloc_ctx_sg);
-#endif
-}
-
 
 /* ---------------------------------------------------------------------- */
 /*
@@ -1875,7 +1854,6 @@ void mccic_shutdown(struct mcam_camera *cam)
 		cam_warn(cam, "Removing a device with users!\n");
 		mcam_ctlr_power_down(cam);
 	}
-	mcam_cleanup_vb2(cam);
 	if (cam->buffer_mode == B_vmalloc)
 		mcam_free_dma_bufs(cam);
 	video_unregister_device(&cam->vdev);
diff --git a/drivers/media/platform/marvell-ccic/mcam-core.h b/drivers/media/platform/marvell-ccic/mcam-core.h
index 35cd9e5aedf8..beb339f5561f 100644
--- a/drivers/media/platform/marvell-ccic/mcam-core.h
+++ b/drivers/media/platform/marvell-ccic/mcam-core.h
@@ -176,8 +176,6 @@ struct mcam_camera {
 
 	/* DMA buffers - DMA modes */
 	struct mcam_vb_buffer *vb_bufs[MAX_DMA_BUFS];
-	struct vb2_alloc_ctx *vb_alloc_ctx;
-	struct vb2_alloc_ctx *vb_alloc_ctx_sg;
 
 	/* Mode-specific ops, set at open time */
 	void (*dma_setup)(struct mcam_camera *cam);
diff --git a/drivers/media/platform/mtk-vcodec/Makefile b/drivers/media/platform/mtk-vcodec/Makefile
new file mode 100644
index 000000000000..dc5cb006d600
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/Makefile
@@ -0,0 +1,19 @@
+
+
+obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-enc.o mtk-vcodec-common.o
+
+
+
+mtk-vcodec-enc-y := venc/venc_vp8_if.o \
+		venc/venc_h264_if.o \
+		mtk_vcodec_enc.o \
+		mtk_vcodec_enc_drv.o \
+		mtk_vcodec_enc_pm.o \
+		venc_drv_if.o \
+		venc_vpu_if.o \
+
+
+mtk-vcodec-common-y := mtk_vcodec_intr.o \
+		mtk_vcodec_util.o\
+
+ccflags-y += -I$(srctree)/drivers/media/platform/mtk-vpu
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
new file mode 100644
index 000000000000..94f0a425be42
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
@@ -0,0 +1,335 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: PC Chen <pc.chen@mediatek.com>
+*         Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef _MTK_VCODEC_DRV_H_
+#define _MTK_VCODEC_DRV_H_
+
+#include <linux/platform_device.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/videobuf2-core.h>
+
+#include "mtk_vcodec_util.h"
+
+#define MTK_VCODEC_DRV_NAME	"mtk_vcodec_drv"
+#define MTK_VCODEC_ENC_NAME	"mtk-vcodec-enc"
+#define MTK_PLATFORM_STR	"platform:mt8173"
+
+
+#define MTK_VCODEC_MAX_PLANES	3
+#define MTK_V4L2_BENCHMARK	0
+#define WAIT_INTR_TIMEOUT_MS	1000
+
+/**
+ * enum mtk_hw_reg_idx - MTK hw register base index
+ */
+enum mtk_hw_reg_idx {
+	VDEC_SYS,
+	VDEC_MISC,
+	VDEC_LD,
+	VDEC_TOP,
+	VDEC_CM,
+	VDEC_AD,
+	VDEC_AV,
+	VDEC_PP,
+	VDEC_HWD,
+	VDEC_HWQ,
+	VDEC_HWB,
+	VDEC_HWG,
+	NUM_MAX_VDEC_REG_BASE,
+	/* h264 encoder */
+	VENC_SYS = NUM_MAX_VDEC_REG_BASE,
+	/* vp8 encoder */
+	VENC_LT_SYS,
+	NUM_MAX_VCODEC_REG_BASE
+};
+
+/**
+ * enum mtk_instance_type - The type of an MTK Vcodec instance.
+ */
+enum mtk_instance_type {
+	MTK_INST_DECODER		= 0,
+	MTK_INST_ENCODER		= 1,
+};
+
+/**
+ * enum mtk_instance_state - The state of an MTK Vcodec instance.
+ * @MTK_STATE_FREE - default state when instance is created
+ * @MTK_STATE_INIT - vcodec instance is initialized
+ * @MTK_STATE_HEADER - vdec had sps/pps header parsed or venc
+ *			had sps/pps header encoded
+ * @MTK_STATE_FLUSH - vdec is flushing. Only used by decoder
+ * @MTK_STATE_ABORT - vcodec should be aborted
+ */
+enum mtk_instance_state {
+	MTK_STATE_FREE = 0,
+	MTK_STATE_INIT = 1,
+	MTK_STATE_HEADER = 2,
+	MTK_STATE_FLUSH = 3,
+	MTK_STATE_ABORT = 4,
+};
+
+/**
+ * struct mtk_encode_param - General encoding parameters type
+ */
+enum mtk_encode_param {
+	MTK_ENCODE_PARAM_NONE = 0,
+	MTK_ENCODE_PARAM_BITRATE = (1 << 0),
+	MTK_ENCODE_PARAM_FRAMERATE = (1 << 1),
+	MTK_ENCODE_PARAM_INTRA_PERIOD = (1 << 2),
+	MTK_ENCODE_PARAM_FORCE_INTRA = (1 << 3),
+	MTK_ENCODE_PARAM_GOP_SIZE = (1 << 4),
+};
+
+enum mtk_fmt_type {
+	MTK_FMT_DEC = 0,
+	MTK_FMT_ENC = 1,
+	MTK_FMT_FRAME = 2,
+};
+
+/**
+ * struct mtk_video_fmt - Structure used to store information about pixelformats
+ */
+struct mtk_video_fmt {
+	u32	fourcc;
+	enum mtk_fmt_type	type;
+	u32	num_planes;
+};
+
+/**
+ * struct mtk_codec_framesizes - Structure used to store information about
+ *							framesizes
+ */
+struct mtk_codec_framesizes {
+	u32	fourcc;
+	struct	v4l2_frmsize_stepwise	stepwise;
+};
+
+/**
+ * struct mtk_q_type - Type of queue
+ */
+enum mtk_q_type {
+	MTK_Q_DATA_SRC = 0,
+	MTK_Q_DATA_DST = 1,
+};
+
+/**
+ * struct mtk_q_data - Structure used to store information about queue
+ */
+struct mtk_q_data {
+	unsigned int	visible_width;
+	unsigned int	visible_height;
+	unsigned int	coded_width;
+	unsigned int	coded_height;
+	enum v4l2_field	field;
+	unsigned int	bytesperline[MTK_VCODEC_MAX_PLANES];
+	unsigned int	sizeimage[MTK_VCODEC_MAX_PLANES];
+	struct mtk_video_fmt	*fmt;
+};
+
+/**
+ * struct mtk_enc_params - General encoding parameters
+ * @bitrate: target bitrate in bits per second
+ * @num_b_frame: number of b frames between p-frame
+ * @rc_frame: frame based rate control
+ * @rc_mb: macroblock based rate control
+ * @seq_hdr_mode: H.264 sequence header is encoded separately or joined
+ *		  with the first frame
+ * @intra_period: I frame period
+ * @gop_size: group of picture size, it's used as the intra frame period
+ * @framerate_num: frame rate numerator. ex: framerate_num=30 and
+ *		   framerate_denom=1 menas FPS is 30
+ * @framerate_denom: frame rate denominator. ex: framerate_num=30 and
+ *		     framerate_denom=1 menas FPS is 30
+ * @h264_max_qp: Max value for H.264 quantization parameter
+ * @h264_profile: V4L2 defined H.264 profile
+ * @h264_level: V4L2 defined H.264 level
+ * @force_intra: force/insert intra frame
+ */
+struct mtk_enc_params {
+	unsigned int	bitrate;
+	unsigned int	num_b_frame;
+	unsigned int	rc_frame;
+	unsigned int	rc_mb;
+	unsigned int	seq_hdr_mode;
+	unsigned int	intra_period;
+	unsigned int	gop_size;
+	unsigned int	framerate_num;
+	unsigned int	framerate_denom;
+	unsigned int	h264_max_qp;
+	unsigned int	h264_profile;
+	unsigned int	h264_level;
+	unsigned int	force_intra;
+};
+
+/**
+ * struct mtk_vcodec_pm - Power management data structure
+ */
+struct mtk_vcodec_pm {
+	struct clk	*vcodecpll;
+	struct clk	*univpll_d2;
+	struct clk	*clk_cci400_sel;
+	struct clk	*vdecpll;
+	struct clk	*vdec_sel;
+	struct clk	*vencpll_d2;
+	struct clk	*venc_sel;
+	struct clk	*univpll1_d2;
+	struct clk	*venc_lt_sel;
+	struct device	*larbvdec;
+	struct device	*larbvenc;
+	struct device	*larbvenclt;
+	struct device	*dev;
+	struct mtk_vcodec_dev	*mtkdev;
+};
+
+/**
+ * struct mtk_vcodec_ctx - Context (instance) private data.
+ *
+ * @type: type of the instance - decoder or encoder
+ * @dev: pointer to the mtk_vcodec_dev of the device
+ * @list: link to ctx_list of mtk_vcodec_dev
+ * @fh: struct v4l2_fh
+ * @m2m_ctx: pointer to the v4l2_m2m_ctx of the context
+ * @q_data: store information of input and output queue
+ *	    of the context
+ * @id: index of the context that this structure describes
+ * @state: state of the context
+ * @param_change: indicate encode parameter type
+ * @enc_params: encoding parameters
+ * @enc_if: hoooked encoder driver interface
+ * @drv_handle: driver handle for specific decode/encode instance
+ *
+ * @int_cond: variable used by the waitqueue
+ * @int_type: type of the last interrupt
+ * @queue: waitqueue that can be used to wait for this context to
+ *	   finish
+ * @irq_status: irq status
+ *
+ * @ctrl_hdl: handler for v4l2 framework
+ * @encode_work: worker for the encoding
+ *
+ * @colorspace: enum v4l2_colorspace; supplemental to pixelformat
+ * @ycbcr_enc: enum v4l2_ycbcr_encoding, Y'CbCr encoding
+ * @quantization: enum v4l2_quantization, colorspace quantization
+ * @xfer_func: enum v4l2_xfer_func, colorspace transfer function
+ */
+struct mtk_vcodec_ctx {
+	enum mtk_instance_type type;
+	struct mtk_vcodec_dev *dev;
+	struct list_head list;
+
+	struct v4l2_fh fh;
+	struct v4l2_m2m_ctx *m2m_ctx;
+	struct mtk_q_data q_data[2];
+	int id;
+	enum mtk_instance_state state;
+	enum mtk_encode_param param_change;
+	struct mtk_enc_params enc_params;
+
+	struct venc_common_if *enc_if;
+	unsigned long drv_handle;
+
+	int int_cond;
+	int int_type;
+	wait_queue_head_t queue;
+	unsigned int irq_status;
+
+	struct v4l2_ctrl_handler ctrl_hdl;
+	struct work_struct encode_work;
+
+	enum v4l2_colorspace colorspace;
+	enum v4l2_ycbcr_encoding ycbcr_enc;
+	enum v4l2_quantization quantization;
+	enum v4l2_xfer_func xfer_func;
+};
+
+/**
+ * struct mtk_vcodec_dev - driver data
+ * @v4l2_dev: V4L2 device to register video devices for.
+ * @vfd_enc: Video device for encoder.
+ *
+ * @m2m_dev_enc: m2m device for encoder.
+ * @plat_dev: platform device
+ * @vpu_plat_dev: mtk vpu platform device
+ * @ctx_list: list of struct mtk_vcodec_ctx
+ * @irqlock: protect data access by irq handler and work thread
+ * @curr_ctx: The context that is waiting for codec hardware
+ *
+ * @reg_base: Mapped address of MTK Vcodec registers.
+ *
+ * @id_counter: used to identify current opened instance
+ * @num_instances: counter of active MTK Vcodec instances
+ *
+ * @encode_workqueue: encode work queue
+ *
+ * @int_cond: used to identify interrupt condition happen
+ * @int_type: used to identify what kind of interrupt condition happen
+ * @dev_mutex: video_device lock
+ * @queue: waitqueue for waiting for completion of device commands
+ *
+ * @enc_irq: h264 encoder irq resource
+ * @enc_lt_irq: vp8 encoder irq resource
+ *
+ * @enc_mutex: encoder hardware lock.
+ *
+ * @pm: power management control
+ * @dec_capability: used to identify decode capability, ex: 4k
+ * @enc_capability: used to identify encode capability
+ */
+struct mtk_vcodec_dev {
+	struct v4l2_device v4l2_dev;
+	struct video_device *vfd_enc;
+
+	struct v4l2_m2m_dev *m2m_dev_enc;
+	struct platform_device *plat_dev;
+	struct platform_device *vpu_plat_dev;
+	struct list_head ctx_list;
+	spinlock_t irqlock;
+	struct mtk_vcodec_ctx *curr_ctx;
+	void __iomem *reg_base[NUM_MAX_VCODEC_REG_BASE];
+
+	unsigned long id_counter;
+	int num_instances;
+
+	struct workqueue_struct *encode_workqueue;
+
+	int int_cond;
+	int int_type;
+	struct mutex dev_mutex;
+	wait_queue_head_t queue;
+
+	int enc_irq;
+	int enc_lt_irq;
+
+	struct mutex enc_mutex;
+
+	struct mtk_vcodec_pm pm;
+	unsigned int dec_capability;
+	unsigned int enc_capability;
+};
+
+static inline struct mtk_vcodec_ctx *fh_to_ctx(struct v4l2_fh *fh)
+{
+	return container_of(fh, struct mtk_vcodec_ctx, fh);
+}
+
+static inline struct mtk_vcodec_ctx *ctrl_to_ctx(struct v4l2_ctrl *ctrl)
+{
+	return container_of(ctrl->handler, struct mtk_vcodec_ctx, ctrl_hdl);
+}
+
+#endif /* _MTK_VCODEC_DRV_H_ */
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
new file mode 100644
index 000000000000..3ed3f2d31df5
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
@@ -0,0 +1,1292 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: PC Chen <pc.chen@mediatek.com>
+*         Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#include <media/v4l2-event.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-dma-contig.h>
+#include <soc/mediatek/smi.h>
+
+#include "mtk_vcodec_drv.h"
+#include "mtk_vcodec_enc.h"
+#include "mtk_vcodec_intr.h"
+#include "mtk_vcodec_util.h"
+#include "venc_drv_if.h"
+
+#define MTK_VENC_MIN_W	160U
+#define MTK_VENC_MIN_H	128U
+#define MTK_VENC_MAX_W	1920U
+#define MTK_VENC_MAX_H	1088U
+#define DFT_CFG_WIDTH	MTK_VENC_MIN_W
+#define DFT_CFG_HEIGHT	MTK_VENC_MIN_H
+#define MTK_MAX_CTRLS_HINT	20
+#define OUT_FMT_IDX		0
+#define CAP_FMT_IDX		4
+
+
+static void mtk_venc_worker(struct work_struct *work);
+
+static struct mtk_video_fmt mtk_video_formats[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_NV12M,
+		.type = MTK_FMT_FRAME,
+		.num_planes = 2,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_NV21M,
+		.type = MTK_FMT_FRAME,
+		.num_planes = 2,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_YUV420M,
+		.type = MTK_FMT_FRAME,
+		.num_planes = 3,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_YVU420M,
+		.type = MTK_FMT_FRAME,
+		.num_planes = 3,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_H264,
+		.type = MTK_FMT_ENC,
+		.num_planes = 1,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_VP8,
+		.type = MTK_FMT_ENC,
+		.num_planes = 1,
+	},
+};
+
+#define NUM_FORMATS ARRAY_SIZE(mtk_video_formats)
+
+static const struct mtk_codec_framesizes mtk_venc_framesizes[] = {
+	{
+		.fourcc	= V4L2_PIX_FMT_H264,
+		.stepwise = { MTK_VENC_MIN_W, MTK_VENC_MAX_W, 16,
+			      MTK_VENC_MIN_H, MTK_VENC_MAX_H, 16 },
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_VP8,
+		.stepwise = { MTK_VENC_MIN_W, MTK_VENC_MAX_W, 16,
+			      MTK_VENC_MIN_H, MTK_VENC_MAX_H, 16 },
+	},
+};
+
+#define NUM_SUPPORTED_FRAMESIZE ARRAY_SIZE(mtk_venc_framesizes)
+
+static int vidioc_venc_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct mtk_vcodec_ctx *ctx = ctrl_to_ctx(ctrl);
+	struct mtk_enc_params *p = &ctx->enc_params;
+	int ret = 0;
+
+	switch (ctrl->id) {
+	case V4L2_CID_MPEG_VIDEO_BITRATE:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_BITRATE val = %d",
+			       ctrl->val);
+		p->bitrate = ctrl->val;
+		ctx->param_change |= MTK_ENCODE_PARAM_BITRATE;
+		break;
+	case V4L2_CID_MPEG_VIDEO_B_FRAMES:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_B_FRAMES val = %d",
+			       ctrl->val);
+		p->num_b_frame = ctrl->val;
+		break;
+	case V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE val = %d",
+			       ctrl->val);
+		p->rc_frame = ctrl->val;
+		break;
+	case V4L2_CID_MPEG_VIDEO_H264_MAX_QP:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_H264_MAX_QP val = %d",
+			       ctrl->val);
+		p->h264_max_qp = ctrl->val;
+		break;
+	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_HEADER_MODE val = %d",
+			       ctrl->val);
+		p->seq_hdr_mode = ctrl->val;
+		break;
+	case V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE val = %d",
+			       ctrl->val);
+		p->rc_mb = ctrl->val;
+		break;
+	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_H264_PROFILE val = %d",
+			       ctrl->val);
+		p->h264_profile = ctrl->val;
+		break;
+	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_H264_LEVEL val = %d",
+			       ctrl->val);
+		p->h264_level = ctrl->val;
+		break;
+	case V4L2_CID_MPEG_VIDEO_H264_I_PERIOD:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_H264_I_PERIOD val = %d",
+			       ctrl->val);
+		p->intra_period = ctrl->val;
+		ctx->param_change |= MTK_ENCODE_PARAM_INTRA_PERIOD;
+		break;
+	case V4L2_CID_MPEG_VIDEO_GOP_SIZE:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_GOP_SIZE val = %d",
+			       ctrl->val);
+		p->gop_size = ctrl->val;
+		ctx->param_change |= MTK_ENCODE_PARAM_GOP_SIZE;
+		break;
+	case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME:
+		mtk_v4l2_debug(2, "V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME");
+		p->force_intra = 1;
+		ctx->param_change |= MTK_ENCODE_PARAM_FORCE_INTRA;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct v4l2_ctrl_ops mtk_vcodec_enc_ctrl_ops = {
+	.s_ctrl = vidioc_venc_s_ctrl,
+};
+
+static int vidioc_enum_fmt(struct v4l2_fmtdesc *f, bool output_queue)
+{
+	struct mtk_video_fmt *fmt;
+	int i, j = 0;
+
+	for (i = 0; i < NUM_FORMATS; ++i) {
+		if (output_queue && mtk_video_formats[i].type != MTK_FMT_FRAME)
+			continue;
+		if (!output_queue && mtk_video_formats[i].type != MTK_FMT_ENC)
+			continue;
+
+		if (j == f->index) {
+			fmt = &mtk_video_formats[i];
+			f->pixelformat = fmt->fourcc;
+			memset(f->reserved, 0, sizeof(f->reserved));
+			return 0;
+		}
+		++j;
+	}
+
+	return -EINVAL;
+}
+
+static int vidioc_enum_framesizes(struct file *file, void *fh,
+				  struct v4l2_frmsizeenum *fsize)
+{
+	int i = 0;
+
+	if (fsize->index != 0)
+		return -EINVAL;
+
+	for (i = 0; i < NUM_SUPPORTED_FRAMESIZE; ++i) {
+		if (fsize->pixel_format != mtk_venc_framesizes[i].fourcc)
+			continue;
+
+		fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE;
+		fsize->stepwise = mtk_venc_framesizes[i].stepwise;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int vidioc_enum_fmt_vid_cap_mplane(struct file *file, void *pirv,
+					  struct v4l2_fmtdesc *f)
+{
+	return vidioc_enum_fmt(f, false);
+}
+
+static int vidioc_enum_fmt_vid_out_mplane(struct file *file, void *prov,
+					  struct v4l2_fmtdesc *f)
+{
+	return vidioc_enum_fmt(f, true);
+}
+
+static int vidioc_venc_querycap(struct file *file, void *priv,
+				struct v4l2_capability *cap)
+{
+	strlcpy(cap->driver, MTK_VCODEC_ENC_NAME, sizeof(cap->driver));
+	strlcpy(cap->bus_info, MTK_PLATFORM_STR, sizeof(cap->bus_info));
+	strlcpy(cap->card, MTK_PLATFORM_STR, sizeof(cap->card));
+
+	return 0;
+}
+
+static int vidioc_venc_s_parm(struct file *file, void *priv,
+			      struct v4l2_streamparm *a)
+{
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
+
+	if (a->type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
+		return -EINVAL;
+
+	ctx->enc_params.framerate_num =
+			a->parm.output.timeperframe.denominator;
+	ctx->enc_params.framerate_denom =
+			a->parm.output.timeperframe.numerator;
+	ctx->param_change |= MTK_ENCODE_PARAM_FRAMERATE;
+
+	return 0;
+}
+
+static int vidioc_venc_g_parm(struct file *file, void *priv,
+			      struct v4l2_streamparm *a)
+{
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
+
+	if (a->type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
+		return -EINVAL;
+
+	a->parm.output.timeperframe.denominator =
+			ctx->enc_params.framerate_num;
+	a->parm.output.timeperframe.numerator =
+			ctx->enc_params.framerate_denom;
+
+	return 0;
+}
+
+static struct mtk_q_data *mtk_venc_get_q_data(struct mtk_vcodec_ctx *ctx,
+					      enum v4l2_buf_type type)
+{
+	if (V4L2_TYPE_IS_OUTPUT(type))
+		return &ctx->q_data[MTK_Q_DATA_SRC];
+
+	return &ctx->q_data[MTK_Q_DATA_DST];
+}
+
+static struct mtk_video_fmt *mtk_venc_find_format(struct v4l2_format *f)
+{
+	struct mtk_video_fmt *fmt;
+	unsigned int k;
+
+	for (k = 0; k < NUM_FORMATS; k++) {
+		fmt = &mtk_video_formats[k];
+		if (fmt->fourcc == f->fmt.pix.pixelformat)
+			return fmt;
+	}
+
+	return NULL;
+}
+
+/* V4L2 specification suggests the driver corrects the format struct if any of
+ * the dimensions is unsupported
+ */
+static int vidioc_try_fmt(struct v4l2_format *f, struct mtk_video_fmt *fmt)
+{
+	struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
+	int i;
+
+	pix_fmt_mp->field = V4L2_FIELD_NONE;
+
+	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
+		pix_fmt_mp->num_planes = 1;
+		pix_fmt_mp->plane_fmt[0].bytesperline = 0;
+	} else if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
+		int tmp_w, tmp_h;
+
+		pix_fmt_mp->height = clamp(pix_fmt_mp->height,
+					MTK_VENC_MIN_H,
+					MTK_VENC_MAX_H);
+		pix_fmt_mp->width = clamp(pix_fmt_mp->width,
+					MTK_VENC_MIN_W,
+					MTK_VENC_MAX_W);
+
+		/* find next closer width align 16, heign align 32, size align
+		 * 64 rectangle
+		 */
+		tmp_w = pix_fmt_mp->width;
+		tmp_h = pix_fmt_mp->height;
+		v4l_bound_align_image(&pix_fmt_mp->width,
+					MTK_VENC_MIN_W,
+					MTK_VENC_MAX_W, 4,
+					&pix_fmt_mp->height,
+					MTK_VENC_MIN_H,
+					MTK_VENC_MAX_H, 5, 6);
+
+		if (pix_fmt_mp->width < tmp_w &&
+			(pix_fmt_mp->width + 16) <= MTK_VENC_MAX_W)
+			pix_fmt_mp->width += 16;
+		if (pix_fmt_mp->height < tmp_h &&
+			(pix_fmt_mp->height + 32) <= MTK_VENC_MAX_H)
+			pix_fmt_mp->height += 32;
+
+		mtk_v4l2_debug(0,
+			"before resize width=%d, height=%d, after resize width=%d, height=%d, sizeimage=%d %d",
+			tmp_w, tmp_h, pix_fmt_mp->width,
+			pix_fmt_mp->height,
+			pix_fmt_mp->plane_fmt[0].sizeimage,
+			pix_fmt_mp->plane_fmt[1].sizeimage);
+
+		pix_fmt_mp->num_planes = fmt->num_planes;
+		pix_fmt_mp->plane_fmt[0].sizeimage =
+				pix_fmt_mp->width * pix_fmt_mp->height +
+				((ALIGN(pix_fmt_mp->width, 16) * 2) * 16);
+		pix_fmt_mp->plane_fmt[0].bytesperline = pix_fmt_mp->width;
+
+		if (pix_fmt_mp->num_planes == 2) {
+			pix_fmt_mp->plane_fmt[1].sizeimage =
+				(pix_fmt_mp->width * pix_fmt_mp->height) / 2 +
+				(ALIGN(pix_fmt_mp->width, 16) * 16);
+			pix_fmt_mp->plane_fmt[2].sizeimage = 0;
+			pix_fmt_mp->plane_fmt[1].bytesperline =
+							pix_fmt_mp->width;
+			pix_fmt_mp->plane_fmt[2].bytesperline = 0;
+		} else if (pix_fmt_mp->num_planes == 3) {
+			pix_fmt_mp->plane_fmt[1].sizeimage =
+			pix_fmt_mp->plane_fmt[2].sizeimage =
+				(pix_fmt_mp->width * pix_fmt_mp->height) / 4 +
+				((ALIGN(pix_fmt_mp->width, 16) / 2) * 16);
+			pix_fmt_mp->plane_fmt[1].bytesperline =
+				pix_fmt_mp->plane_fmt[2].bytesperline =
+				pix_fmt_mp->width / 2;
+		}
+	}
+
+	for (i = 0; i < pix_fmt_mp->num_planes; i++)
+		memset(&(pix_fmt_mp->plane_fmt[i].reserved[0]), 0x0,
+			   sizeof(pix_fmt_mp->plane_fmt[0].reserved));
+
+	pix_fmt_mp->flags = 0;
+	memset(&pix_fmt_mp->reserved, 0x0,
+		sizeof(pix_fmt_mp->reserved));
+
+	return 0;
+}
+
+static void mtk_venc_set_param(struct mtk_vcodec_ctx *ctx,
+				struct venc_enc_param *param)
+{
+	struct mtk_q_data *q_data_src = &ctx->q_data[MTK_Q_DATA_SRC];
+	struct mtk_enc_params *enc_params = &ctx->enc_params;
+
+	switch (q_data_src->fmt->fourcc) {
+	case V4L2_PIX_FMT_YUV420M:
+		param->input_yuv_fmt = VENC_YUV_FORMAT_I420;
+		break;
+	case V4L2_PIX_FMT_YVU420M:
+		param->input_yuv_fmt = VENC_YUV_FORMAT_YV12;
+		break;
+	case V4L2_PIX_FMT_NV12M:
+		param->input_yuv_fmt = VENC_YUV_FORMAT_NV12;
+		break;
+	case V4L2_PIX_FMT_NV21M:
+		param->input_yuv_fmt = VENC_YUV_FORMAT_NV21;
+		break;
+	default:
+		mtk_v4l2_err("Unsupport fourcc =%d", q_data_src->fmt->fourcc);
+		break;
+	}
+	param->h264_profile = enc_params->h264_profile;
+	param->h264_level = enc_params->h264_level;
+
+	/* Config visible resolution */
+	param->width = q_data_src->visible_width;
+	param->height = q_data_src->visible_height;
+	/* Config coded resolution */
+	param->buf_width = q_data_src->coded_width;
+	param->buf_height = q_data_src->coded_height;
+	param->frm_rate = enc_params->framerate_num /
+			enc_params->framerate_denom;
+	param->intra_period = enc_params->intra_period;
+	param->gop_size = enc_params->gop_size;
+	param->bitrate = enc_params->bitrate;
+
+	mtk_v4l2_debug(0,
+		"fmt 0x%x, P/L %d/%d, w/h %d/%d, buf %d/%d, fps/bps %d/%d, gop %d, i_period %d",
+		param->input_yuv_fmt, param->h264_profile,
+		param->h264_level, param->width, param->height,
+		param->buf_width, param->buf_height,
+		param->frm_rate, param->bitrate,
+		param->gop_size, param->intra_period);
+}
+
+static int vidioc_venc_s_fmt_cap(struct file *file, void *priv,
+			     struct v4l2_format *f)
+{
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
+	struct vb2_queue *vq;
+	struct mtk_q_data *q_data;
+	int i, ret;
+	struct mtk_video_fmt *fmt;
+
+	vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type);
+	if (!vq) {
+		mtk_v4l2_err("fail to get vq");
+		return -EINVAL;
+	}
+
+	if (vb2_is_busy(vq)) {
+		mtk_v4l2_err("queue busy");
+		return -EBUSY;
+	}
+
+	q_data = mtk_venc_get_q_data(ctx, f->type);
+	if (!q_data) {
+		mtk_v4l2_err("fail to get q data");
+		return -EINVAL;
+	}
+
+	fmt = mtk_venc_find_format(f);
+	if (!fmt) {
+		f->fmt.pix.pixelformat = mtk_video_formats[CAP_FMT_IDX].fourcc;
+		fmt = mtk_venc_find_format(f);
+	}
+
+	q_data->fmt = fmt;
+	ret = vidioc_try_fmt(f, q_data->fmt);
+	if (ret)
+		return ret;
+
+	q_data->coded_width = f->fmt.pix_mp.width;
+	q_data->coded_height = f->fmt.pix_mp.height;
+	q_data->field = f->fmt.pix_mp.field;
+
+	for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
+		struct v4l2_plane_pix_format	*plane_fmt;
+
+		plane_fmt = &f->fmt.pix_mp.plane_fmt[i];
+		q_data->bytesperline[i]	= plane_fmt->bytesperline;
+		q_data->sizeimage[i] = plane_fmt->sizeimage;
+	}
+
+	if (ctx->state == MTK_STATE_FREE) {
+		ret = venc_if_init(ctx, q_data->fmt->fourcc);
+		if (ret) {
+			mtk_v4l2_err("venc_if_init failed=%d, codec type=%x",
+					ret, q_data->fmt->fourcc);
+			return -EBUSY;
+		}
+		ctx->state = MTK_STATE_INIT;
+	}
+
+	return 0;
+}
+
+static int vidioc_venc_s_fmt_out(struct file *file, void *priv,
+			     struct v4l2_format *f)
+{
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
+	struct vb2_queue *vq;
+	struct mtk_q_data *q_data;
+	int ret, i;
+	struct mtk_video_fmt *fmt;
+	unsigned int pitch_w_div16;
+	struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
+
+	vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type);
+	if (!vq) {
+		mtk_v4l2_err("fail to get vq");
+		return -EINVAL;
+	}
+
+	if (vb2_is_busy(vq)) {
+		mtk_v4l2_err("queue busy");
+		return -EBUSY;
+	}
+
+	q_data = mtk_venc_get_q_data(ctx, f->type);
+	if (!q_data) {
+		mtk_v4l2_err("fail to get q data");
+		return -EINVAL;
+	}
+
+	fmt = mtk_venc_find_format(f);
+	if (!fmt) {
+		f->fmt.pix.pixelformat = mtk_video_formats[OUT_FMT_IDX].fourcc;
+		fmt = mtk_venc_find_format(f);
+	}
+
+	pix_fmt_mp->height = clamp(pix_fmt_mp->height,
+				MTK_VENC_MIN_H,
+				MTK_VENC_MAX_H);
+	pix_fmt_mp->width = clamp(pix_fmt_mp->width,
+				MTK_VENC_MIN_W,
+				MTK_VENC_MAX_W);
+
+	q_data->visible_width = f->fmt.pix_mp.width;
+	q_data->visible_height = f->fmt.pix_mp.height;
+	q_data->fmt = fmt;
+	ret = vidioc_try_fmt(f, q_data->fmt);
+	if (ret)
+		return ret;
+
+	q_data->coded_width = f->fmt.pix_mp.width;
+	q_data->coded_height = f->fmt.pix_mp.height;
+
+	pitch_w_div16 = DIV_ROUND_UP(q_data->visible_width, 16);
+	if (pitch_w_div16 % 8 != 0) {
+		/* Adjust returned width/height, so application could correctly
+		 * allocate hw required memory
+		 */
+		q_data->visible_height += 32;
+		vidioc_try_fmt(f, q_data->fmt);
+	}
+
+	q_data->field = f->fmt.pix_mp.field;
+	ctx->colorspace = f->fmt.pix_mp.colorspace;
+	ctx->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
+	ctx->quantization = f->fmt.pix_mp.quantization;
+	ctx->xfer_func = f->fmt.pix_mp.xfer_func;
+
+	for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
+		struct v4l2_plane_pix_format *plane_fmt;
+
+		plane_fmt = &f->fmt.pix_mp.plane_fmt[i];
+		q_data->bytesperline[i] = plane_fmt->bytesperline;
+		q_data->sizeimage[i] = plane_fmt->sizeimage;
+	}
+
+	return 0;
+}
+
+static int vidioc_venc_g_fmt(struct file *file, void *priv,
+			     struct v4l2_format *f)
+{
+	struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
+	struct vb2_queue *vq;
+	struct mtk_q_data *q_data;
+	int i;
+
+	vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type);
+	if (!vq)
+		return -EINVAL;
+
+	q_data = mtk_venc_get_q_data(ctx, f->type);
+
+	pix->width = q_data->coded_width;
+	pix->height = q_data->coded_height;
+	pix->pixelformat = q_data->fmt->fourcc;
+	pix->field = q_data->field;
+	pix->num_planes = q_data->fmt->num_planes;
+	for (i = 0; i < pix->num_planes; i++) {
+		pix->plane_fmt[i].bytesperline = q_data->bytesperline[i];
+		pix->plane_fmt[i].sizeimage = q_data->sizeimage[i];
+		memset(&(pix->plane_fmt[i].reserved[0]), 0x0,
+		       sizeof(pix->plane_fmt[i].reserved));
+	}
+
+	pix->flags = 0;
+	pix->colorspace = ctx->colorspace;
+	pix->ycbcr_enc = ctx->ycbcr_enc;
+	pix->quantization = ctx->quantization;
+	pix->xfer_func = ctx->xfer_func;
+
+	return 0;
+}
+
+static int vidioc_try_fmt_vid_cap_mplane(struct file *file, void *priv,
+					 struct v4l2_format *f)
+{
+	struct mtk_video_fmt *fmt;
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
+
+	fmt = mtk_venc_find_format(f);
+	if (!fmt) {
+		f->fmt.pix.pixelformat = mtk_video_formats[CAP_FMT_IDX].fourcc;
+		fmt = mtk_venc_find_format(f);
+	}
+	f->fmt.pix_mp.colorspace = ctx->colorspace;
+	f->fmt.pix_mp.ycbcr_enc = ctx->ycbcr_enc;
+	f->fmt.pix_mp.quantization = ctx->quantization;
+	f->fmt.pix_mp.xfer_func = ctx->xfer_func;
+
+	return vidioc_try_fmt(f, fmt);
+}
+
+static int vidioc_try_fmt_vid_out_mplane(struct file *file, void *priv,
+					 struct v4l2_format *f)
+{
+	struct mtk_video_fmt *fmt;
+
+	fmt = mtk_venc_find_format(f);
+	if (!fmt) {
+		f->fmt.pix.pixelformat = mtk_video_formats[OUT_FMT_IDX].fourcc;
+		fmt = mtk_venc_find_format(f);
+	}
+	if (!f->fmt.pix_mp.colorspace) {
+		f->fmt.pix_mp.colorspace = V4L2_COLORSPACE_REC709;
+		f->fmt.pix_mp.ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+		f->fmt.pix_mp.quantization = V4L2_QUANTIZATION_DEFAULT;
+		f->fmt.pix_mp.xfer_func = V4L2_XFER_FUNC_DEFAULT;
+	}
+
+	return vidioc_try_fmt(f, fmt);
+}
+
+static int vidioc_venc_qbuf(struct file *file, void *priv,
+			    struct v4l2_buffer *buf)
+{
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
+
+	if (ctx->state == MTK_STATE_ABORT) {
+		mtk_v4l2_err("[%d] Call on QBUF after unrecoverable error",
+				ctx->id);
+		return -EIO;
+	}
+
+	return v4l2_m2m_qbuf(file, ctx->m2m_ctx, buf);
+}
+
+static int vidioc_venc_dqbuf(struct file *file, void *priv,
+			     struct v4l2_buffer *buf)
+{
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
+
+	if (ctx->state == MTK_STATE_ABORT) {
+		mtk_v4l2_err("[%d] Call on QBUF after unrecoverable error",
+				ctx->id);
+		return -EIO;
+	}
+
+	return v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf);
+}
+
+const struct v4l2_ioctl_ops mtk_venc_ioctl_ops = {
+	.vidioc_streamon		= v4l2_m2m_ioctl_streamon,
+	.vidioc_streamoff		= v4l2_m2m_ioctl_streamoff,
+
+	.vidioc_reqbufs			= v4l2_m2m_ioctl_reqbufs,
+	.vidioc_querybuf		= v4l2_m2m_ioctl_querybuf,
+	.vidioc_qbuf			= vidioc_venc_qbuf,
+	.vidioc_dqbuf			= vidioc_venc_dqbuf,
+
+	.vidioc_querycap		= vidioc_venc_querycap,
+	.vidioc_enum_fmt_vid_cap_mplane = vidioc_enum_fmt_vid_cap_mplane,
+	.vidioc_enum_fmt_vid_out_mplane = vidioc_enum_fmt_vid_out_mplane,
+	.vidioc_enum_framesizes		= vidioc_enum_framesizes,
+
+	.vidioc_try_fmt_vid_cap_mplane	= vidioc_try_fmt_vid_cap_mplane,
+	.vidioc_try_fmt_vid_out_mplane	= vidioc_try_fmt_vid_out_mplane,
+	.vidioc_expbuf			= v4l2_m2m_ioctl_expbuf,
+	.vidioc_subscribe_event		= v4l2_ctrl_subscribe_event,
+	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
+
+	.vidioc_s_parm			= vidioc_venc_s_parm,
+	.vidioc_g_parm			= vidioc_venc_g_parm,
+	.vidioc_s_fmt_vid_cap_mplane	= vidioc_venc_s_fmt_cap,
+	.vidioc_s_fmt_vid_out_mplane	= vidioc_venc_s_fmt_out,
+
+	.vidioc_g_fmt_vid_cap_mplane	= vidioc_venc_g_fmt,
+	.vidioc_g_fmt_vid_out_mplane	= vidioc_venc_g_fmt,
+
+	.vidioc_create_bufs		= v4l2_m2m_ioctl_create_bufs,
+	.vidioc_prepare_buf		= v4l2_m2m_ioctl_prepare_buf,
+};
+
+static int vb2ops_venc_queue_setup(struct vb2_queue *vq,
+				   unsigned int *nbuffers,
+				   unsigned int *nplanes,
+				   unsigned int sizes[],
+				   struct device *alloc_devs[])
+{
+	struct mtk_vcodec_ctx *ctx = vb2_get_drv_priv(vq);
+	struct mtk_q_data *q_data;
+	unsigned int i;
+
+	q_data = mtk_venc_get_q_data(ctx, vq->type);
+
+	if (q_data == NULL)
+		return -EINVAL;
+
+	if (*nplanes) {
+		for (i = 0; i < *nplanes; i++)
+			if (sizes[i] < q_data->sizeimage[i])
+				return -EINVAL;
+	} else {
+		*nplanes = q_data->fmt->num_planes;
+		for (i = 0; i < *nplanes; i++)
+			sizes[i] = q_data->sizeimage[i];
+	}
+
+	return 0;
+}
+
+static int vb2ops_venc_buf_prepare(struct vb2_buffer *vb)
+{
+	struct mtk_vcodec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+	struct mtk_q_data *q_data;
+	int i;
+
+	q_data = mtk_venc_get_q_data(ctx, vb->vb2_queue->type);
+
+	for (i = 0; i < q_data->fmt->num_planes; i++) {
+		if (vb2_plane_size(vb, i) < q_data->sizeimage[i]) {
+			mtk_v4l2_err("data will not fit into plane %d (%lu < %d)",
+				i, vb2_plane_size(vb, i),
+				q_data->sizeimage[i]);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static void vb2ops_venc_buf_queue(struct vb2_buffer *vb)
+{
+	struct mtk_vcodec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+	struct vb2_v4l2_buffer *vb2_v4l2 =
+			container_of(vb, struct vb2_v4l2_buffer, vb2_buf);
+
+	struct mtk_video_enc_buf *mtk_buf =
+			container_of(vb2_v4l2, struct mtk_video_enc_buf, vb);
+
+	if ((vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) &&
+	    (ctx->param_change != MTK_ENCODE_PARAM_NONE)) {
+		mtk_v4l2_debug(1, "[%d] Before id=%d encode parameter change %x",
+			       ctx->id,
+			       mtk_buf->vb.vb2_buf.index,
+			       ctx->param_change);
+		mtk_buf->param_change = ctx->param_change;
+		mtk_buf->enc_params = ctx->enc_params;
+		ctx->param_change = MTK_ENCODE_PARAM_NONE;
+	}
+
+	v4l2_m2m_buf_queue(ctx->m2m_ctx, to_vb2_v4l2_buffer(vb));
+}
+
+static int vb2ops_venc_start_streaming(struct vb2_queue *q, unsigned int count)
+{
+	struct mtk_vcodec_ctx *ctx = vb2_get_drv_priv(q);
+	struct venc_enc_param param;
+	int ret;
+	int i;
+
+	/* Once state turn into MTK_STATE_ABORT, we need stop_streaming
+	  * to clear it
+	  */
+	if ((ctx->state == MTK_STATE_ABORT) || (ctx->state == MTK_STATE_FREE)) {
+		ret = -EIO;
+		goto err_set_param;
+	}
+
+	/* Do the initialization when both start_streaming have been called */
+	if (V4L2_TYPE_IS_OUTPUT(q->type)) {
+		if (!vb2_start_streaming_called(&ctx->m2m_ctx->cap_q_ctx.q))
+			return 0;
+	} else {
+		if (!vb2_start_streaming_called(&ctx->m2m_ctx->out_q_ctx.q))
+			return 0;
+	}
+
+	mtk_venc_set_param(ctx, &param);
+	ret = venc_if_set_param(ctx, VENC_SET_PARAM_ENC, &param);
+	if (ret) {
+		mtk_v4l2_err("venc_if_set_param failed=%d", ret);
+		ctx->state = MTK_STATE_ABORT;
+		goto err_set_param;
+	}
+	ctx->param_change = MTK_ENCODE_PARAM_NONE;
+
+	if ((ctx->q_data[MTK_Q_DATA_DST].fmt->fourcc == V4L2_PIX_FMT_H264) &&
+	    (ctx->enc_params.seq_hdr_mode !=
+				V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE)) {
+		ret = venc_if_set_param(ctx,
+					VENC_SET_PARAM_PREPEND_HEADER,
+					NULL);
+		if (ret) {
+			mtk_v4l2_err("venc_if_set_param failed=%d", ret);
+			ctx->state = MTK_STATE_ABORT;
+			goto err_set_param;
+		}
+		ctx->state = MTK_STATE_HEADER;
+	}
+
+	return 0;
+
+err_set_param:
+	for (i = 0; i < q->num_buffers; ++i) {
+		if (q->bufs[i]->state == VB2_BUF_STATE_ACTIVE) {
+			mtk_v4l2_debug(0, "[%d] id=%d, type=%d, %d -> VB2_BUF_STATE_QUEUED",
+					ctx->id, i, q->type,
+					(int)q->bufs[i]->state);
+			v4l2_m2m_buf_done(to_vb2_v4l2_buffer(q->bufs[i]),
+					VB2_BUF_STATE_QUEUED);
+		}
+	}
+
+	return ret;
+}
+
+static void vb2ops_venc_stop_streaming(struct vb2_queue *q)
+{
+	struct mtk_vcodec_ctx *ctx = vb2_get_drv_priv(q);
+	struct vb2_buffer *src_buf, *dst_buf;
+	int ret;
+
+	mtk_v4l2_debug(2, "[%d]-> type=%d", ctx->id, q->type);
+
+	if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
+		while ((dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx))) {
+			dst_buf->planes[0].bytesused = 0;
+			v4l2_m2m_buf_done(to_vb2_v4l2_buffer(dst_buf),
+					VB2_BUF_STATE_ERROR);
+		}
+	} else {
+		while ((src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx)))
+			v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf),
+					VB2_BUF_STATE_ERROR);
+	}
+
+	if ((q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE &&
+	     vb2_is_streaming(&ctx->m2m_ctx->out_q_ctx.q)) ||
+	    (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE &&
+	     vb2_is_streaming(&ctx->m2m_ctx->cap_q_ctx.q))) {
+		mtk_v4l2_debug(1, "[%d]-> q type %d out=%d cap=%d",
+			       ctx->id, q->type,
+			       vb2_is_streaming(&ctx->m2m_ctx->out_q_ctx.q),
+			       vb2_is_streaming(&ctx->m2m_ctx->cap_q_ctx.q));
+		return;
+	}
+
+	/* Release the encoder if both streams are stopped. */
+	ret = venc_if_deinit(ctx);
+	if (ret)
+		mtk_v4l2_err("venc_if_deinit failed=%d", ret);
+
+	ctx->state = MTK_STATE_FREE;
+}
+
+static struct vb2_ops mtk_venc_vb2_ops = {
+	.queue_setup		= vb2ops_venc_queue_setup,
+	.buf_prepare		= vb2ops_venc_buf_prepare,
+	.buf_queue		= vb2ops_venc_buf_queue,
+	.wait_prepare		= vb2_ops_wait_prepare,
+	.wait_finish		= vb2_ops_wait_finish,
+	.start_streaming	= vb2ops_venc_start_streaming,
+	.stop_streaming		= vb2ops_venc_stop_streaming,
+};
+
+static int mtk_venc_encode_header(void *priv)
+{
+	struct mtk_vcodec_ctx *ctx = priv;
+	int ret;
+	struct vb2_buffer *dst_buf;
+	struct mtk_vcodec_mem bs_buf;
+	struct venc_done_result enc_result;
+
+	dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
+	if (!dst_buf) {
+		mtk_v4l2_debug(1, "No dst buffer");
+		return -EINVAL;
+	}
+
+	bs_buf.va = vb2_plane_vaddr(dst_buf, 0);
+	bs_buf.dma_addr = vb2_dma_contig_plane_dma_addr(dst_buf, 0);
+	bs_buf.size = (size_t)dst_buf->planes[0].length;
+
+	mtk_v4l2_debug(1,
+			"[%d] buf id=%d va=0x%p dma_addr=0x%llx size=%zu",
+			ctx->id,
+			dst_buf->index, bs_buf.va,
+			(u64)bs_buf.dma_addr,
+			bs_buf.size);
+
+	ret = venc_if_encode(ctx,
+			VENC_START_OPT_ENCODE_SEQUENCE_HEADER,
+			NULL, &bs_buf, &enc_result);
+
+	if (ret) {
+		dst_buf->planes[0].bytesused = 0;
+		ctx->state = MTK_STATE_ABORT;
+		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(dst_buf),
+				  VB2_BUF_STATE_ERROR);
+		mtk_v4l2_err("venc_if_encode failed=%d", ret);
+		return -EINVAL;
+	}
+
+	ctx->state = MTK_STATE_HEADER;
+	dst_buf->planes[0].bytesused = enc_result.bs_size;
+	v4l2_m2m_buf_done(to_vb2_v4l2_buffer(dst_buf), VB2_BUF_STATE_DONE);
+
+	return 0;
+}
+
+static int mtk_venc_param_change(struct mtk_vcodec_ctx *ctx)
+{
+	struct venc_enc_param enc_prm;
+	struct vb2_buffer *vb = v4l2_m2m_next_src_buf(ctx->m2m_ctx);
+	struct vb2_v4l2_buffer *vb2_v4l2 =
+			container_of(vb, struct vb2_v4l2_buffer, vb2_buf);
+	struct mtk_video_enc_buf *mtk_buf =
+			container_of(vb2_v4l2, struct mtk_video_enc_buf, vb);
+
+	int ret = 0;
+
+	memset(&enc_prm, 0, sizeof(enc_prm));
+	if (mtk_buf->param_change == MTK_ENCODE_PARAM_NONE)
+		return 0;
+
+	if (mtk_buf->param_change & MTK_ENCODE_PARAM_BITRATE) {
+		enc_prm.bitrate = mtk_buf->enc_params.bitrate;
+		mtk_v4l2_debug(1, "[%d] id=%d, change param br=%d",
+				ctx->id,
+				mtk_buf->vb.vb2_buf.index,
+				enc_prm.bitrate);
+		ret |= venc_if_set_param(ctx,
+					 VENC_SET_PARAM_ADJUST_BITRATE,
+					 &enc_prm);
+	}
+	if (!ret && mtk_buf->param_change & MTK_ENCODE_PARAM_FRAMERATE) {
+		enc_prm.frm_rate = mtk_buf->enc_params.framerate_num /
+				   mtk_buf->enc_params.framerate_denom;
+		mtk_v4l2_debug(1, "[%d] id=%d, change param fr=%d",
+			       ctx->id,
+			       mtk_buf->vb.vb2_buf.index,
+			       enc_prm.frm_rate);
+		ret |= venc_if_set_param(ctx,
+					 VENC_SET_PARAM_ADJUST_FRAMERATE,
+					 &enc_prm);
+	}
+	if (!ret && mtk_buf->param_change & MTK_ENCODE_PARAM_GOP_SIZE) {
+		enc_prm.gop_size = mtk_buf->enc_params.gop_size;
+		mtk_v4l2_debug(1, "change param intra period=%d",
+			       enc_prm.gop_size);
+		ret |= venc_if_set_param(ctx,
+					 VENC_SET_PARAM_GOP_SIZE,
+					 &enc_prm);
+	}
+	if (!ret && mtk_buf->param_change & MTK_ENCODE_PARAM_FORCE_INTRA) {
+		mtk_v4l2_debug(1, "[%d] id=%d, change param force I=%d",
+				ctx->id,
+				mtk_buf->vb.vb2_buf.index,
+				mtk_buf->enc_params.force_intra);
+		if (mtk_buf->enc_params.force_intra)
+			ret |= venc_if_set_param(ctx,
+						 VENC_SET_PARAM_FORCE_INTRA,
+						 NULL);
+	}
+
+	mtk_buf->param_change = MTK_ENCODE_PARAM_NONE;
+
+	if (ret) {
+		ctx->state = MTK_STATE_ABORT;
+		mtk_v4l2_err("venc_if_set_param %d failed=%d",
+				mtk_buf->param_change, ret);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * v4l2_m2m_streamoff() holds dev_mutex and waits mtk_venc_worker()
+ * to call v4l2_m2m_job_finish().
+ * If mtk_venc_worker() tries to acquire dev_mutex, it will deadlock.
+ * So this function must not try to acquire dev->dev_mutex.
+ * This means v4l2 ioctls and mtk_venc_worker() can run at the same time.
+ * mtk_venc_worker() should be carefully implemented to avoid bugs.
+ */
+static void mtk_venc_worker(struct work_struct *work)
+{
+	struct mtk_vcodec_ctx *ctx = container_of(work, struct mtk_vcodec_ctx,
+				    encode_work);
+	struct vb2_buffer *src_buf, *dst_buf;
+	struct venc_frm_buf frm_buf;
+	struct mtk_vcodec_mem bs_buf;
+	struct venc_done_result enc_result;
+	int ret, i;
+	struct vb2_v4l2_buffer *vb2_v4l2;
+
+	/* check dst_buf, dst_buf may be removed in device_run
+	 * to stored encdoe header so we need check dst_buf and
+	 * call job_finish here to prevent recursion
+	 */
+	dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
+	if (!dst_buf) {
+		v4l2_m2m_job_finish(ctx->dev->m2m_dev_enc, ctx->m2m_ctx);
+		return;
+	}
+
+	src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx);
+	memset(&frm_buf, 0, sizeof(frm_buf));
+	for (i = 0; i < src_buf->num_planes ; i++) {
+		frm_buf.fb_addr[i].va = vb2_plane_vaddr(src_buf, i);
+		frm_buf.fb_addr[i].dma_addr =
+				vb2_dma_contig_plane_dma_addr(src_buf, i);
+		frm_buf.fb_addr[i].size =
+				(size_t)src_buf->planes[i].length;
+	}
+	bs_buf.va = vb2_plane_vaddr(dst_buf, 0);
+	bs_buf.dma_addr = vb2_dma_contig_plane_dma_addr(dst_buf, 0);
+	bs_buf.size = (size_t)dst_buf->planes[0].length;
+
+	mtk_v4l2_debug(2,
+			"Framebuf VA=%p PA=%llx Size=0x%zx;VA=%p PA=0x%llx Size=0x%zx;VA=%p PA=0x%llx Size=%zu",
+			frm_buf.fb_addr[0].va,
+			(u64)frm_buf.fb_addr[0].dma_addr,
+			frm_buf.fb_addr[0].size,
+			frm_buf.fb_addr[1].va,
+			(u64)frm_buf.fb_addr[1].dma_addr,
+			frm_buf.fb_addr[1].size,
+			frm_buf.fb_addr[2].va,
+			(u64)frm_buf.fb_addr[2].dma_addr,
+			frm_buf.fb_addr[2].size);
+
+	ret = venc_if_encode(ctx, VENC_START_OPT_ENCODE_FRAME,
+			     &frm_buf, &bs_buf, &enc_result);
+
+	vb2_v4l2 = container_of(dst_buf, struct vb2_v4l2_buffer, vb2_buf);
+	if (enc_result.is_key_frm)
+		vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME;
+
+	if (ret) {
+		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf),
+				  VB2_BUF_STATE_ERROR);
+		dst_buf->planes[0].bytesused = 0;
+		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(dst_buf),
+				  VB2_BUF_STATE_ERROR);
+		mtk_v4l2_err("venc_if_encode failed=%d", ret);
+	} else {
+		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf),
+				  VB2_BUF_STATE_DONE);
+		dst_buf->planes[0].bytesused = enc_result.bs_size;
+		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(dst_buf),
+				  VB2_BUF_STATE_DONE);
+		mtk_v4l2_debug(2, "venc_if_encode bs size=%d",
+				 enc_result.bs_size);
+	}
+
+	v4l2_m2m_job_finish(ctx->dev->m2m_dev_enc, ctx->m2m_ctx);
+
+	mtk_v4l2_debug(1, "<=== src_buf[%d] dst_buf[%d] venc_if_encode ret=%d Size=%u===>",
+			src_buf->index, dst_buf->index, ret,
+			enc_result.bs_size);
+}
+
+static void m2mops_venc_device_run(void *priv)
+{
+	struct mtk_vcodec_ctx *ctx = priv;
+
+	if ((ctx->q_data[MTK_Q_DATA_DST].fmt->fourcc == V4L2_PIX_FMT_H264) &&
+	    (ctx->state != MTK_STATE_HEADER)) {
+		/* encode h264 sps/pps header */
+		mtk_venc_encode_header(ctx);
+		queue_work(ctx->dev->encode_workqueue, &ctx->encode_work);
+		return;
+	}
+
+	mtk_venc_param_change(ctx);
+	queue_work(ctx->dev->encode_workqueue, &ctx->encode_work);
+}
+
+static int m2mops_venc_job_ready(void *m2m_priv)
+{
+	struct mtk_vcodec_ctx *ctx = m2m_priv;
+
+	if (ctx->state == MTK_STATE_ABORT || ctx->state == MTK_STATE_FREE) {
+		mtk_v4l2_debug(3, "[%d]Not ready: state=0x%x.",
+			       ctx->id, ctx->state);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void m2mops_venc_job_abort(void *priv)
+{
+	struct mtk_vcodec_ctx *ctx = priv;
+
+	ctx->state = MTK_STATE_ABORT;
+}
+
+static void m2mops_venc_lock(void *m2m_priv)
+{
+	struct mtk_vcodec_ctx *ctx = m2m_priv;
+
+	mutex_lock(&ctx->dev->dev_mutex);
+}
+
+static void m2mops_venc_unlock(void *m2m_priv)
+{
+	struct mtk_vcodec_ctx *ctx = m2m_priv;
+
+	mutex_unlock(&ctx->dev->dev_mutex);
+}
+
+const struct v4l2_m2m_ops mtk_venc_m2m_ops = {
+	.device_run	= m2mops_venc_device_run,
+	.job_ready	= m2mops_venc_job_ready,
+	.job_abort	= m2mops_venc_job_abort,
+	.lock		= m2mops_venc_lock,
+	.unlock		= m2mops_venc_unlock,
+};
+
+void mtk_vcodec_enc_set_default_params(struct mtk_vcodec_ctx *ctx)
+{
+	struct mtk_q_data *q_data;
+
+	ctx->m2m_ctx->q_lock = &ctx->dev->dev_mutex;
+	ctx->fh.m2m_ctx = ctx->m2m_ctx;
+	ctx->fh.ctrl_handler = &ctx->ctrl_hdl;
+	INIT_WORK(&ctx->encode_work, mtk_venc_worker);
+
+	ctx->colorspace = V4L2_COLORSPACE_REC709;
+	ctx->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+	ctx->quantization = V4L2_QUANTIZATION_DEFAULT;
+	ctx->xfer_func = V4L2_XFER_FUNC_DEFAULT;
+
+	q_data = &ctx->q_data[MTK_Q_DATA_SRC];
+	memset(q_data, 0, sizeof(struct mtk_q_data));
+	q_data->visible_width = DFT_CFG_WIDTH;
+	q_data->visible_height = DFT_CFG_HEIGHT;
+	q_data->coded_width = DFT_CFG_WIDTH;
+	q_data->coded_height = DFT_CFG_HEIGHT;
+	q_data->field = V4L2_FIELD_NONE;
+
+	q_data->fmt = &mtk_video_formats[OUT_FMT_IDX];
+
+	v4l_bound_align_image(&q_data->coded_width,
+				MTK_VENC_MIN_W,
+				MTK_VENC_MAX_W, 4,
+				&q_data->coded_height,
+				MTK_VENC_MIN_H,
+				MTK_VENC_MAX_H, 5, 6);
+
+	if (q_data->coded_width < DFT_CFG_WIDTH &&
+		(q_data->coded_width + 16) <= MTK_VENC_MAX_W)
+		q_data->coded_width += 16;
+	if (q_data->coded_height < DFT_CFG_HEIGHT &&
+		(q_data->coded_height + 32) <= MTK_VENC_MAX_H)
+		q_data->coded_height += 32;
+
+	q_data->sizeimage[0] =
+		q_data->coded_width * q_data->coded_height+
+		((ALIGN(q_data->coded_width, 16) * 2) * 16);
+	q_data->bytesperline[0] = q_data->coded_width;
+	q_data->sizeimage[1] =
+		(q_data->coded_width * q_data->coded_height) / 2 +
+		(ALIGN(q_data->coded_width, 16) * 16);
+	q_data->bytesperline[1] = q_data->coded_width;
+
+	q_data = &ctx->q_data[MTK_Q_DATA_DST];
+	memset(q_data, 0, sizeof(struct mtk_q_data));
+	q_data->coded_width = DFT_CFG_WIDTH;
+	q_data->coded_height = DFT_CFG_HEIGHT;
+	q_data->fmt = &mtk_video_formats[CAP_FMT_IDX];
+	q_data->field = V4L2_FIELD_NONE;
+	ctx->q_data[MTK_Q_DATA_DST].sizeimage[0] =
+		DFT_CFG_WIDTH * DFT_CFG_HEIGHT;
+	ctx->q_data[MTK_Q_DATA_DST].bytesperline[0] = 0;
+
+}
+
+int mtk_vcodec_enc_ctrls_setup(struct mtk_vcodec_ctx *ctx)
+{
+	const struct v4l2_ctrl_ops *ops = &mtk_vcodec_enc_ctrl_ops;
+	struct v4l2_ctrl_handler *handler = &ctx->ctrl_hdl;
+
+	v4l2_ctrl_handler_init(handler, MTK_MAX_CTRLS_HINT);
+
+	v4l2_ctrl_new_std(handler, ops, V4L2_CID_MPEG_VIDEO_BITRATE,
+			1, 4000000, 1, 4000000);
+	v4l2_ctrl_new_std(handler, ops, V4L2_CID_MPEG_VIDEO_B_FRAMES,
+			0, 2, 1, 0);
+	v4l2_ctrl_new_std(handler, ops, V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE,
+			0, 1, 1, 1);
+	v4l2_ctrl_new_std(handler, ops, V4L2_CID_MPEG_VIDEO_H264_MAX_QP,
+			0, 51, 1, 51);
+	v4l2_ctrl_new_std(handler, ops, V4L2_CID_MPEG_VIDEO_H264_I_PERIOD,
+			0, 65535, 1, 0);
+	v4l2_ctrl_new_std(handler, ops, V4L2_CID_MPEG_VIDEO_GOP_SIZE,
+			0, 65535, 1, 0);
+	v4l2_ctrl_new_std(handler, ops, V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE,
+			0, 1, 1, 0);
+	v4l2_ctrl_new_std(handler, ops, V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME,
+			0, 0, 0, 0);
+	v4l2_ctrl_new_std_menu(handler, ops,
+			V4L2_CID_MPEG_VIDEO_HEADER_MODE,
+			V4L2_MPEG_VIDEO_HEADER_MODE_JOINED_WITH_1ST_FRAME,
+			0, V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE);
+	v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE,
+			V4L2_MPEG_VIDEO_H264_PROFILE_HIGH,
+			0, V4L2_MPEG_VIDEO_H264_PROFILE_MAIN);
+	v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL,
+			V4L2_MPEG_VIDEO_H264_LEVEL_4_2,
+			0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0);
+	if (handler->error) {
+		mtk_v4l2_err("Init control handler fail %d",
+				handler->error);
+		return handler->error;
+	}
+
+	v4l2_ctrl_handler_setup(&ctx->ctrl_hdl);
+
+	return 0;
+}
+
+int mtk_vcodec_enc_queue_init(void *priv, struct vb2_queue *src_vq,
+			      struct vb2_queue *dst_vq)
+{
+	struct mtk_vcodec_ctx *ctx = priv;
+	int ret;
+
+	/* Note: VB2_USERPTR works with dma-contig because mt8173
+	 * support iommu
+	 * https://patchwork.kernel.org/patch/8335461/
+	 * https://patchwork.kernel.org/patch/7596181/
+	 */
+	src_vq->type		= V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
+	src_vq->io_modes	= VB2_DMABUF | VB2_MMAP | VB2_USERPTR;
+	src_vq->drv_priv	= ctx;
+	src_vq->buf_struct_size = sizeof(struct mtk_video_enc_buf);
+	src_vq->ops		= &mtk_venc_vb2_ops;
+	src_vq->mem_ops		= &vb2_dma_contig_memops;
+	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	src_vq->lock		= &ctx->dev->dev_mutex;
+	src_vq->dev		= &ctx->dev->plat_dev->dev;
+
+	ret = vb2_queue_init(src_vq);
+	if (ret)
+		return ret;
+
+	dst_vq->type		= V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+	dst_vq->io_modes	= VB2_DMABUF | VB2_MMAP | VB2_USERPTR;
+	dst_vq->drv_priv	= ctx;
+	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	dst_vq->ops		= &mtk_venc_vb2_ops;
+	dst_vq->mem_ops		= &vb2_dma_contig_memops;
+	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	dst_vq->lock		= &ctx->dev->dev_mutex;
+	dst_vq->dev		= &ctx->dev->plat_dev->dev;
+
+	return vb2_queue_init(dst_vq);
+}
+
+int mtk_venc_unlock(struct mtk_vcodec_ctx *ctx)
+{
+	struct mtk_vcodec_dev *dev = ctx->dev;
+
+	mutex_unlock(&dev->enc_mutex);
+	return 0;
+}
+
+int mtk_venc_lock(struct mtk_vcodec_ctx *ctx)
+{
+	struct mtk_vcodec_dev *dev = ctx->dev;
+
+	mutex_lock(&dev->enc_mutex);
+	return 0;
+}
+
+void mtk_vcodec_enc_release(struct mtk_vcodec_ctx *ctx)
+{
+	venc_if_deinit(ctx);
+}
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.h
new file mode 100644
index 000000000000..d7a154a97510
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.h
@@ -0,0 +1,58 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: PC Chen <pc.chen@mediatek.com>
+*         Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef _MTK_VCODEC_ENC_H_
+#define _MTK_VCODEC_ENC_H_
+
+#include <media/videobuf2-core.h>
+#include <media/videobuf2-v4l2.h>
+
+#define MTK_VENC_IRQ_STATUS_SPS	0x1
+#define MTK_VENC_IRQ_STATUS_PPS	0x2
+#define MTK_VENC_IRQ_STATUS_FRM	0x4
+#define MTK_VENC_IRQ_STATUS_DRAM	0x8
+#define MTK_VENC_IRQ_STATUS_PAUSE	0x10
+#define MTK_VENC_IRQ_STATUS_SWITCH	0x20
+
+#define MTK_VENC_IRQ_STATUS_OFFSET	0x05C
+#define MTK_VENC_IRQ_ACK_OFFSET	0x060
+
+/**
+ * struct mtk_video_enc_buf - Private data related to each VB2 buffer.
+ * @vb: Pointer to related VB2 buffer.
+ * @list:	list that buffer link to
+ * @param_change: Types of encode parameter change before encoding this
+ *				buffer
+ * @enc_params: Encode parameters changed before encode this buffer
+ */
+struct mtk_video_enc_buf {
+	struct vb2_v4l2_buffer vb;
+	struct list_head list;
+	u32 param_change;
+	struct mtk_enc_params enc_params;
+};
+
+extern const struct v4l2_ioctl_ops mtk_venc_ioctl_ops;
+extern const struct v4l2_m2m_ops mtk_venc_m2m_ops;
+
+int mtk_venc_unlock(struct mtk_vcodec_ctx *ctx);
+int mtk_venc_lock(struct mtk_vcodec_ctx *ctx);
+int mtk_vcodec_enc_queue_init(void *priv, struct vb2_queue *src_vq,
+			      struct vb2_queue *dst_vq);
+void mtk_vcodec_enc_release(struct mtk_vcodec_ctx *ctx);
+int mtk_vcodec_enc_ctrls_setup(struct mtk_vcodec_ctx *ctx);
+void mtk_vcodec_enc_set_default_params(struct mtk_vcodec_ctx *ctx);
+
+#endif /* _MTK_VCODEC_ENC_H_ */
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
new file mode 100644
index 000000000000..e277b7c23516
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
@@ -0,0 +1,439 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: PC Chen <pc.chen@mediatek.com>
+*	Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-dma-contig.h>
+#include <linux/pm_runtime.h>
+
+#include "mtk_vcodec_drv.h"
+#include "mtk_vcodec_enc.h"
+#include "mtk_vcodec_enc_pm.h"
+#include "mtk_vcodec_intr.h"
+#include "mtk_vcodec_util.h"
+#include "mtk_vpu.h"
+
+module_param(mtk_v4l2_dbg_level, int, S_IRUGO | S_IWUSR);
+module_param(mtk_vcodec_dbg, bool, S_IRUGO | S_IWUSR);
+
+/* Wake up context wait_queue */
+static void wake_up_ctx(struct mtk_vcodec_ctx *ctx, unsigned int reason)
+{
+	ctx->int_cond = 1;
+	ctx->int_type = reason;
+	wake_up_interruptible(&ctx->queue);
+}
+
+static void clean_irq_status(unsigned int irq_status, void __iomem *addr)
+{
+	if (irq_status & MTK_VENC_IRQ_STATUS_PAUSE)
+		writel(MTK_VENC_IRQ_STATUS_PAUSE, addr);
+
+	if (irq_status & MTK_VENC_IRQ_STATUS_SWITCH)
+		writel(MTK_VENC_IRQ_STATUS_SWITCH, addr);
+
+	if (irq_status & MTK_VENC_IRQ_STATUS_DRAM)
+		writel(MTK_VENC_IRQ_STATUS_DRAM, addr);
+
+	if (irq_status & MTK_VENC_IRQ_STATUS_SPS)
+		writel(MTK_VENC_IRQ_STATUS_SPS, addr);
+
+	if (irq_status & MTK_VENC_IRQ_STATUS_PPS)
+		writel(MTK_VENC_IRQ_STATUS_PPS, addr);
+
+	if (irq_status & MTK_VENC_IRQ_STATUS_FRM)
+		writel(MTK_VENC_IRQ_STATUS_FRM, addr);
+
+}
+static irqreturn_t mtk_vcodec_enc_irq_handler(int irq, void *priv)
+{
+	struct mtk_vcodec_dev *dev = priv;
+	struct mtk_vcodec_ctx *ctx;
+	unsigned long flags;
+	void __iomem *addr;
+
+	spin_lock_irqsave(&dev->irqlock, flags);
+	ctx = dev->curr_ctx;
+	spin_unlock_irqrestore(&dev->irqlock, flags);
+
+	mtk_v4l2_debug(1, "id=%d", ctx->id);
+	addr = dev->reg_base[VENC_SYS] + MTK_VENC_IRQ_ACK_OFFSET;
+
+	ctx->irq_status = readl(dev->reg_base[VENC_SYS] +
+				(MTK_VENC_IRQ_STATUS_OFFSET));
+
+	clean_irq_status(ctx->irq_status, addr);
+
+	wake_up_ctx(ctx, MTK_INST_IRQ_RECEIVED);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mtk_vcodec_enc_lt_irq_handler(int irq, void *priv)
+{
+	struct mtk_vcodec_dev *dev = priv;
+	struct mtk_vcodec_ctx *ctx;
+	unsigned long flags;
+	void __iomem *addr;
+
+	spin_lock_irqsave(&dev->irqlock, flags);
+	ctx = dev->curr_ctx;
+	spin_unlock_irqrestore(&dev->irqlock, flags);
+
+	mtk_v4l2_debug(1, "id=%d", ctx->id);
+	ctx->irq_status = readl(dev->reg_base[VENC_LT_SYS] +
+				(MTK_VENC_IRQ_STATUS_OFFSET));
+
+	addr = dev->reg_base[VENC_LT_SYS] + MTK_VENC_IRQ_ACK_OFFSET;
+
+	clean_irq_status(ctx->irq_status, addr);
+
+	wake_up_ctx(ctx, MTK_INST_IRQ_RECEIVED);
+	return IRQ_HANDLED;
+}
+
+static void mtk_vcodec_enc_reset_handler(void *priv)
+{
+	struct mtk_vcodec_dev *dev = priv;
+	struct mtk_vcodec_ctx *ctx;
+
+	mtk_v4l2_debug(0, "Watchdog timeout!!");
+
+	mutex_lock(&dev->dev_mutex);
+	list_for_each_entry(ctx, &dev->ctx_list, list) {
+		ctx->state = MTK_STATE_ABORT;
+		mtk_v4l2_debug(0, "[%d] Change to state MTK_STATE_ABORT",
+				ctx->id);
+	}
+	mutex_unlock(&dev->dev_mutex);
+}
+
+static int fops_vcodec_open(struct file *file)
+{
+	struct mtk_vcodec_dev *dev = video_drvdata(file);
+	struct mtk_vcodec_ctx *ctx = NULL;
+	int ret = 0;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	mutex_lock(&dev->dev_mutex);
+	/*
+	 * Use simple counter to uniquely identify this context. Only
+	 * used for logging.
+	 */
+	ctx->id = dev->id_counter++;
+	v4l2_fh_init(&ctx->fh, video_devdata(file));
+	file->private_data = &ctx->fh;
+	v4l2_fh_add(&ctx->fh);
+	INIT_LIST_HEAD(&ctx->list);
+	ctx->dev = dev;
+	init_waitqueue_head(&ctx->queue);
+
+	ctx->type = MTK_INST_ENCODER;
+	ret = mtk_vcodec_enc_ctrls_setup(ctx);
+	if (ret) {
+		mtk_v4l2_err("Failed to setup controls() (%d)",
+				ret);
+		goto err_ctrls_setup;
+	}
+	ctx->m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev_enc, ctx,
+				&mtk_vcodec_enc_queue_init);
+	if (IS_ERR((__force void *)ctx->m2m_ctx)) {
+		ret = PTR_ERR((__force void *)ctx->m2m_ctx);
+		mtk_v4l2_err("Failed to v4l2_m2m_ctx_init() (%d)",
+				ret);
+		goto err_m2m_ctx_init;
+	}
+	mtk_vcodec_enc_set_default_params(ctx);
+
+	if (v4l2_fh_is_singular(&ctx->fh)) {
+		/*
+		 * vpu_load_firmware checks if it was loaded already and
+		 * does nothing in that case
+		 */
+		ret = vpu_load_firmware(dev->vpu_plat_dev);
+		if (ret < 0) {
+			/*
+			 * Return 0 if downloading firmware successfully,
+			 * otherwise it is failed
+			 */
+			mtk_v4l2_err("vpu_load_firmware failed!");
+			goto err_load_fw;
+		}
+
+		dev->enc_capability =
+			vpu_get_venc_hw_capa(dev->vpu_plat_dev);
+		mtk_v4l2_debug(0, "encoder capability %x", dev->enc_capability);
+	}
+
+	mtk_v4l2_debug(2, "Create instance [%d]@%p m2m_ctx=%p ",
+			ctx->id, ctx, ctx->m2m_ctx);
+
+	dev->num_instances++;
+	list_add(&ctx->list, &dev->ctx_list);
+
+	mutex_unlock(&dev->dev_mutex);
+	mtk_v4l2_debug(0, "%s encoder [%d]", dev_name(&dev->plat_dev->dev),
+			ctx->id);
+	return ret;
+
+	/* Deinit when failure occurred */
+err_load_fw:
+	v4l2_m2m_ctx_release(ctx->m2m_ctx);
+err_m2m_ctx_init:
+	v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
+err_ctrls_setup:
+	v4l2_fh_del(&ctx->fh);
+	v4l2_fh_exit(&ctx->fh);
+	kfree(ctx);
+	mutex_unlock(&dev->dev_mutex);
+
+	return ret;
+}
+
+static int fops_vcodec_release(struct file *file)
+{
+	struct mtk_vcodec_dev *dev = video_drvdata(file);
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(file->private_data);
+
+	mtk_v4l2_debug(1, "[%d] encoder", ctx->id);
+	mutex_lock(&dev->dev_mutex);
+
+	mtk_vcodec_enc_release(ctx);
+	v4l2_fh_del(&ctx->fh);
+	v4l2_fh_exit(&ctx->fh);
+	v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
+	v4l2_m2m_ctx_release(ctx->m2m_ctx);
+
+	list_del_init(&ctx->list);
+	dev->num_instances--;
+	kfree(ctx);
+	mutex_unlock(&dev->dev_mutex);
+	return 0;
+}
+
+static const struct v4l2_file_operations mtk_vcodec_fops = {
+	.owner		= THIS_MODULE,
+	.open		= fops_vcodec_open,
+	.release	= fops_vcodec_release,
+	.poll		= v4l2_m2m_fop_poll,
+	.unlocked_ioctl	= video_ioctl2,
+	.mmap		= v4l2_m2m_fop_mmap,
+};
+
+static int mtk_vcodec_probe(struct platform_device *pdev)
+{
+	struct mtk_vcodec_dev *dev;
+	struct video_device *vfd_enc;
+	struct resource *res;
+	int i, j, ret;
+	DEFINE_DMA_ATTRS(attrs);
+
+	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dev->ctx_list);
+	dev->plat_dev = pdev;
+
+	dev->vpu_plat_dev = vpu_get_plat_device(dev->plat_dev);
+	if (dev->vpu_plat_dev == NULL) {
+		mtk_v4l2_err("[VPU] vpu device in not ready");
+		return -EPROBE_DEFER;
+	}
+
+	vpu_wdt_reg_handler(dev->vpu_plat_dev, mtk_vcodec_enc_reset_handler,
+				dev, VPU_RST_ENC);
+
+	ret = mtk_vcodec_init_enc_pm(dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to get mt vcodec clock source!");
+		return ret;
+	}
+
+	for (i = VENC_SYS, j = 0; i < NUM_MAX_VCODEC_REG_BASE; i++, j++) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, j);
+		if (res == NULL) {
+			dev_err(&pdev->dev, "get memory resource failed.");
+			ret = -ENXIO;
+			goto err_res;
+		}
+		dev->reg_base[i] = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR((__force void *)dev->reg_base[i])) {
+			ret = PTR_ERR((__force void *)dev->reg_base[i]);
+			goto err_res;
+		}
+		mtk_v4l2_debug(2, "reg[%d] base=0x%p", i, dev->reg_base[i]);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "failed to get irq resource");
+		ret = -ENOENT;
+		goto err_res;
+	}
+
+	dev->enc_irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(&pdev->dev, dev->enc_irq,
+			       mtk_vcodec_enc_irq_handler,
+			       0, pdev->name, dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to install dev->enc_irq %d (%d)",
+			dev->enc_irq,
+			ret);
+		ret = -EINVAL;
+		goto err_res;
+	}
+
+	dev->enc_lt_irq = platform_get_irq(pdev, 1);
+	ret = devm_request_irq(&pdev->dev,
+			       dev->enc_lt_irq, mtk_vcodec_enc_lt_irq_handler,
+			       0, pdev->name, dev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Failed to install dev->enc_lt_irq %d (%d)",
+			dev->enc_lt_irq, ret);
+		ret = -EINVAL;
+		goto err_res;
+	}
+
+	disable_irq(dev->enc_irq);
+	disable_irq(dev->enc_lt_irq); /* VENC_LT */
+	mutex_init(&dev->enc_mutex);
+	mutex_init(&dev->dev_mutex);
+	spin_lock_init(&dev->irqlock);
+
+	snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
+		 "[MTK_V4L2_VENC]");
+
+	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
+	if (ret) {
+		mtk_v4l2_err("v4l2_device_register err=%d", ret);
+		goto err_res;
+	}
+
+	init_waitqueue_head(&dev->queue);
+
+	/* allocate video device for encoder and register it */
+	vfd_enc = video_device_alloc();
+	if (!vfd_enc) {
+		mtk_v4l2_err("Failed to allocate video device");
+		ret = -ENOMEM;
+		goto err_enc_alloc;
+	}
+	vfd_enc->fops           = &mtk_vcodec_fops;
+	vfd_enc->ioctl_ops      = &mtk_venc_ioctl_ops;
+	vfd_enc->release        = video_device_release;
+	vfd_enc->lock           = &dev->dev_mutex;
+	vfd_enc->v4l2_dev       = &dev->v4l2_dev;
+	vfd_enc->vfl_dir        = VFL_DIR_M2M;
+	vfd_enc->device_caps	= V4L2_CAP_VIDEO_M2M_MPLANE |
+					V4L2_CAP_STREAMING;
+
+	snprintf(vfd_enc->name, sizeof(vfd_enc->name), "%s",
+		 MTK_VCODEC_ENC_NAME);
+	video_set_drvdata(vfd_enc, dev);
+	dev->vfd_enc = vfd_enc;
+	platform_set_drvdata(pdev, dev);
+
+	dev->m2m_dev_enc = v4l2_m2m_init(&mtk_venc_m2m_ops);
+	if (IS_ERR((__force void *)dev->m2m_dev_enc)) {
+		mtk_v4l2_err("Failed to init mem2mem enc device");
+		ret = PTR_ERR((__force void *)dev->m2m_dev_enc);
+		goto err_enc_mem_init;
+	}
+
+	dev->encode_workqueue =
+			alloc_ordered_workqueue(MTK_VCODEC_ENC_NAME,
+						WQ_MEM_RECLAIM |
+						WQ_FREEZABLE);
+	if (!dev->encode_workqueue) {
+		mtk_v4l2_err("Failed to create encode workqueue");
+		ret = -EINVAL;
+		goto err_event_workq;
+	}
+
+	ret = video_register_device(vfd_enc, VFL_TYPE_GRABBER, 1);
+	if (ret) {
+		mtk_v4l2_err("Failed to register video device");
+		goto err_enc_reg;
+	}
+
+	/* Avoid the iommu eat big hunks */
+	dma_set_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, &attrs);
+
+	mtk_v4l2_debug(0, "encoder registered as /dev/video%d",
+			vfd_enc->num);
+
+	return 0;
+
+err_enc_reg:
+	destroy_workqueue(dev->encode_workqueue);
+err_event_workq:
+	v4l2_m2m_release(dev->m2m_dev_enc);
+err_enc_mem_init:
+	video_unregister_device(vfd_enc);
+err_enc_alloc:
+	v4l2_device_unregister(&dev->v4l2_dev);
+err_res:
+	mtk_vcodec_release_enc_pm(dev);
+	return ret;
+}
+
+static const struct of_device_id mtk_vcodec_enc_match[] = {
+	{.compatible = "mediatek,mt8173-vcodec-enc",},
+	{},
+};
+MODULE_DEVICE_TABLE(of, mtk_vcodec_enc_match);
+
+static int mtk_vcodec_enc_remove(struct platform_device *pdev)
+{
+	struct mtk_vcodec_dev *dev = platform_get_drvdata(pdev);
+
+	mtk_v4l2_debug_enter();
+	flush_workqueue(dev->encode_workqueue);
+	destroy_workqueue(dev->encode_workqueue);
+	if (dev->m2m_dev_enc)
+		v4l2_m2m_release(dev->m2m_dev_enc);
+
+	if (dev->vfd_enc)
+		video_unregister_device(dev->vfd_enc);
+
+	v4l2_device_unregister(&dev->v4l2_dev);
+	mtk_vcodec_release_enc_pm(dev);
+	return 0;
+}
+
+static struct platform_driver mtk_vcodec_enc_driver = {
+	.probe	= mtk_vcodec_probe,
+	.remove	= mtk_vcodec_enc_remove,
+	.driver	= {
+		.name	= MTK_VCODEC_ENC_NAME,
+		.of_match_table = mtk_vcodec_enc_match,
+	},
+};
+
+module_platform_driver(mtk_vcodec_enc_driver);
+
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Mediatek video codec V4L2 encoder driver");
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c
new file mode 100644
index 000000000000..3e73e9db781f
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c
@@ -0,0 +1,137 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <soc/mediatek/smi.h>
+
+#include "mtk_vcodec_enc_pm.h"
+#include "mtk_vcodec_util.h"
+#include "mtk_vpu.h"
+
+
+int mtk_vcodec_init_enc_pm(struct mtk_vcodec_dev *mtkdev)
+{
+	struct device_node *node;
+	struct platform_device *pdev;
+	struct device *dev;
+	struct mtk_vcodec_pm *pm;
+	int ret = 0;
+
+	pdev = mtkdev->plat_dev;
+	pm = &mtkdev->pm;
+	memset(pm, 0, sizeof(struct mtk_vcodec_pm));
+	pm->mtkdev = mtkdev;
+	pm->dev = &pdev->dev;
+	dev = &pdev->dev;
+
+	node = of_parse_phandle(dev->of_node, "mediatek,larb", 0);
+	if (!node) {
+		mtk_v4l2_err("no mediatek,larb found");
+		return -1;
+	}
+	pdev = of_find_device_by_node(node);
+	if (!pdev) {
+		mtk_v4l2_err("no mediatek,larb device found");
+		return -1;
+	}
+	pm->larbvenc = &pdev->dev;
+
+	node = of_parse_phandle(dev->of_node, "mediatek,larb", 1);
+	if (!node) {
+		mtk_v4l2_err("no mediatek,larb found");
+		return -1;
+	}
+
+	pdev = of_find_device_by_node(node);
+	if (!pdev) {
+		mtk_v4l2_err("no mediatek,larb device found");
+		return -1;
+	}
+
+	pm->larbvenclt = &pdev->dev;
+	pdev = mtkdev->plat_dev;
+	pm->dev = &pdev->dev;
+
+	pm->vencpll_d2 = devm_clk_get(&pdev->dev, "venc_sel_src");
+	if (IS_ERR(pm->vencpll_d2)) {
+		mtk_v4l2_err("devm_clk_get vencpll_d2 fail");
+		ret = PTR_ERR(pm->vencpll_d2);
+	}
+
+	pm->venc_sel = devm_clk_get(&pdev->dev, "venc_sel");
+	if (IS_ERR(pm->venc_sel)) {
+		mtk_v4l2_err("devm_clk_get venc_sel fail");
+		ret = PTR_ERR(pm->venc_sel);
+	}
+
+	pm->univpll1_d2 = devm_clk_get(&pdev->dev, "venc_lt_sel_src");
+	if (IS_ERR(pm->univpll1_d2)) {
+		mtk_v4l2_err("devm_clk_get univpll1_d2 fail");
+		ret = PTR_ERR(pm->univpll1_d2);
+	}
+
+	pm->venc_lt_sel = devm_clk_get(&pdev->dev, "venc_lt_sel");
+	if (IS_ERR(pm->venc_lt_sel)) {
+		mtk_v4l2_err("devm_clk_get venc_lt_sel fail");
+		ret = PTR_ERR(pm->venc_lt_sel);
+	}
+
+	return ret;
+}
+
+void mtk_vcodec_release_enc_pm(struct mtk_vcodec_dev *mtkdev)
+{
+}
+
+
+void mtk_vcodec_enc_clock_on(struct mtk_vcodec_pm *pm)
+{
+	int ret;
+
+	ret = clk_prepare_enable(pm->venc_sel);
+	if (ret)
+		mtk_v4l2_err("clk_prepare_enable fail %d", ret);
+
+	ret = clk_set_parent(pm->venc_sel, pm->vencpll_d2);
+	if (ret)
+		mtk_v4l2_err("clk_set_parent fail %d", ret);
+
+	ret = clk_prepare_enable(pm->venc_lt_sel);
+	if (ret)
+		mtk_v4l2_err("clk_prepare_enable fail %d", ret);
+
+	ret = clk_set_parent(pm->venc_lt_sel, pm->univpll1_d2);
+	if (ret)
+		mtk_v4l2_err("clk_set_parent fail %d", ret);
+
+	ret = mtk_smi_larb_get(pm->larbvenc);
+	if (ret)
+		mtk_v4l2_err("mtk_smi_larb_get larb3 fail %d", ret);
+
+	ret = mtk_smi_larb_get(pm->larbvenclt);
+	if (ret)
+		mtk_v4l2_err("mtk_smi_larb_get larb4 fail %d", ret);
+
+}
+
+void mtk_vcodec_enc_clock_off(struct mtk_vcodec_pm *pm)
+{
+	mtk_smi_larb_put(pm->larbvenc);
+	mtk_smi_larb_put(pm->larbvenclt);
+	clk_disable_unprepare(pm->venc_lt_sel);
+	clk_disable_unprepare(pm->venc_sel);
+}
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.h
new file mode 100644
index 000000000000..f32167138976
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.h
@@ -0,0 +1,26 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef _MTK_VCODEC_ENC_PM_H_
+#define _MTK_VCODEC_ENC_PM_H_
+
+#include "mtk_vcodec_drv.h"
+
+int mtk_vcodec_init_enc_pm(struct mtk_vcodec_dev *dev);
+void mtk_vcodec_release_enc_pm(struct mtk_vcodec_dev *dev);
+
+void mtk_vcodec_enc_clock_on(struct mtk_vcodec_pm *pm);
+void mtk_vcodec_enc_clock_off(struct mtk_vcodec_pm *pm);
+
+#endif /* _MTK_VCODEC_ENC_PM_H_ */
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.c
new file mode 100644
index 000000000000..52e7e5c9afa0
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.c
@@ -0,0 +1,54 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#include <linux/errno.h>
+#include <linux/wait.h>
+
+#include "mtk_vcodec_drv.h"
+#include "mtk_vcodec_intr.h"
+#include "mtk_vcodec_util.h"
+
+int mtk_vcodec_wait_for_done_ctx(struct mtk_vcodec_ctx  *ctx, int command,
+				 unsigned int timeout_ms)
+{
+	wait_queue_head_t *waitqueue;
+	long timeout_jiff, ret;
+	int status = 0;
+
+	waitqueue = (wait_queue_head_t *)&ctx->queue;
+	timeout_jiff = msecs_to_jiffies(timeout_ms);
+
+	ret = wait_event_interruptible_timeout(*waitqueue,
+				(ctx->int_cond &&
+				(ctx->int_type == command)),
+				timeout_jiff);
+
+	if (!ret) {
+		status = -1;	/* timeout */
+		mtk_v4l2_err("[%d] cmd=%d, ctx->type=%d, wait_event_interruptible_timeout time=%ums out %d %d!",
+				ctx->id, ctx->type, command, timeout_ms,
+				ctx->int_cond, ctx->int_type);
+	} else if (-ERESTARTSYS == ret) {
+		mtk_v4l2_err("[%d] cmd=%d, ctx->type=%d, wait_event_interruptible_timeout interrupted by a signal %d %d",
+				ctx->id, ctx->type, command, ctx->int_cond,
+				ctx->int_type);
+		status = -1;
+	}
+
+	ctx->int_cond = 0;
+	ctx->int_type = 0;
+
+	return status;
+}
+EXPORT_SYMBOL(mtk_vcodec_wait_for_done_ctx);
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
new file mode 100644
index 000000000000..33e890f5aa9c
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
@@ -0,0 +1,27 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef _MTK_VCODEC_INTR_H_
+#define _MTK_VCODEC_INTR_H_
+
+#define MTK_INST_IRQ_RECEIVED		0x1
+#define MTK_INST_WORK_THREAD_ABORT_DONE	0x2
+
+struct mtk_vcodec_ctx;
+
+/* timeout is ms */
+int mtk_vcodec_wait_for_done_ctx(struct mtk_vcodec_ctx *data, int command,
+				unsigned int timeout_ms);
+
+#endif /* _MTK_VCODEC_INTR_H_ */
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.c
new file mode 100644
index 000000000000..5e3651372a3c
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.c
@@ -0,0 +1,94 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: PC Chen <pc.chen@mediatek.com>
+*	Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#include <linux/module.h>
+
+#include "mtk_vcodec_drv.h"
+#include "mtk_vcodec_util.h"
+#include "mtk_vpu.h"
+
+/* For encoder, this will enable logs in venc/*/
+bool mtk_vcodec_dbg;
+EXPORT_SYMBOL(mtk_vcodec_dbg);
+
+/* The log level of v4l2 encoder or decoder driver.
+ * That is, files under mtk-vcodec/.
+ */
+int mtk_v4l2_dbg_level;
+EXPORT_SYMBOL(mtk_v4l2_dbg_level);
+
+void __iomem *mtk_vcodec_get_reg_addr(struct mtk_vcodec_ctx *data,
+					unsigned int reg_idx)
+{
+	struct mtk_vcodec_ctx *ctx = (struct mtk_vcodec_ctx *)data;
+
+	if (!data || reg_idx >= NUM_MAX_VCODEC_REG_BASE) {
+		mtk_v4l2_err("Invalid arguments, reg_idx=%d", reg_idx);
+		return NULL;
+	}
+	return ctx->dev->reg_base[reg_idx];
+}
+EXPORT_SYMBOL(mtk_vcodec_get_reg_addr);
+
+int mtk_vcodec_mem_alloc(struct mtk_vcodec_ctx *data,
+			struct mtk_vcodec_mem *mem)
+{
+	unsigned long size = mem->size;
+	struct mtk_vcodec_ctx *ctx = (struct mtk_vcodec_ctx *)data;
+	struct device *dev = &ctx->dev->plat_dev->dev;
+
+	mem->va = dma_alloc_coherent(dev, size, &mem->dma_addr, GFP_KERNEL);
+
+	if (!mem->va) {
+		mtk_v4l2_err("%s dma_alloc size=%ld failed!", dev_name(dev),
+			     size);
+		return -ENOMEM;
+	}
+
+	memset(mem->va, 0, size);
+
+	mtk_v4l2_debug(3, "[%d]  - va      = %p", ctx->id, mem->va);
+	mtk_v4l2_debug(3, "[%d]  - dma     = 0x%lx", ctx->id,
+		       (unsigned long)mem->dma_addr);
+	mtk_v4l2_debug(3, "[%d]    size = 0x%lx", ctx->id, size);
+
+	return 0;
+}
+EXPORT_SYMBOL(mtk_vcodec_mem_alloc);
+
+void mtk_vcodec_mem_free(struct mtk_vcodec_ctx *data,
+			struct mtk_vcodec_mem *mem)
+{
+	unsigned long size = mem->size;
+	struct mtk_vcodec_ctx *ctx = (struct mtk_vcodec_ctx *)data;
+	struct device *dev = &ctx->dev->plat_dev->dev;
+
+	if (!mem->va) {
+		mtk_v4l2_err("%s dma_free size=%ld failed!", dev_name(dev),
+			     size);
+		return;
+	}
+
+	dma_free_coherent(dev, size, mem->va, mem->dma_addr);
+	mem->va = NULL;
+	mem->dma_addr = 0;
+	mem->size = 0;
+
+	mtk_v4l2_debug(3, "[%d]  - va      = %p", ctx->id, mem->va);
+	mtk_v4l2_debug(3, "[%d]  - dma     = 0x%lx", ctx->id,
+		       (unsigned long)mem->dma_addr);
+	mtk_v4l2_debug(3, "[%d]    size = 0x%lx", ctx->id, size);
+}
+EXPORT_SYMBOL(mtk_vcodec_mem_free);
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.h
new file mode 100644
index 000000000000..d6345fc04840
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.h
@@ -0,0 +1,87 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: PC Chen <pc.chen@mediatek.com>
+*	Tiffany Lin <tiffany.lin@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef _MTK_VCODEC_UTIL_H_
+#define _MTK_VCODEC_UTIL_H_
+
+#include <linux/types.h>
+#include <linux/dma-direction.h>
+
+struct mtk_vcodec_mem {
+	size_t size;
+	void *va;
+	dma_addr_t dma_addr;
+};
+
+struct mtk_vcodec_ctx;
+
+extern int mtk_v4l2_dbg_level;
+extern bool mtk_vcodec_dbg;
+
+#define DEBUG	1
+
+#if defined(DEBUG)
+
+#define mtk_v4l2_debug(level, fmt, args...)				 \
+	do {								 \
+		if (mtk_v4l2_dbg_level >= level)			 \
+			pr_info("[MTK_V4L2] level=%d %s(),%d: " fmt "\n",\
+				level, __func__, __LINE__, ##args);	 \
+	} while (0)
+
+#define mtk_v4l2_err(fmt, args...)                \
+	pr_err("[MTK_V4L2][ERROR] %s:%d: " fmt "\n", __func__, __LINE__, \
+	       ##args)
+
+
+#define mtk_v4l2_debug_enter()  mtk_v4l2_debug(3, "+")
+#define mtk_v4l2_debug_leave()  mtk_v4l2_debug(3, "-")
+
+#define mtk_vcodec_debug(h, fmt, args...)				\
+	do {								\
+		if (mtk_vcodec_dbg)					\
+			pr_info("[MTK_VCODEC][%d]: %s() " fmt "\n",	\
+				((struct mtk_vcodec_ctx *)h->ctx)->id, \
+				__func__, ##args);			\
+	} while (0)
+
+#define mtk_vcodec_err(h, fmt, args...)					\
+	pr_err("[MTK_VCODEC][ERROR][%d]: %s() " fmt "\n",		\
+	       ((struct mtk_vcodec_ctx *)h->ctx)->id, __func__, ##args)
+
+#define mtk_vcodec_debug_enter(h)  mtk_vcodec_debug(h, "+")
+#define mtk_vcodec_debug_leave(h)  mtk_vcodec_debug(h, "-")
+
+#else
+
+#define mtk_v4l2_debug(level, fmt, args...)
+#define mtk_v4l2_err(fmt, args...)
+#define mtk_v4l2_debug_enter()
+#define mtk_v4l2_debug_leave()
+
+#define mtk_vcodec_debug(h, fmt, args...)
+#define mtk_vcodec_err(h, fmt, args...)
+#define mtk_vcodec_debug_enter(h)
+#define mtk_vcodec_debug_leave(h)
+
+#endif
+
+void __iomem *mtk_vcodec_get_reg_addr(struct mtk_vcodec_ctx *data,
+				unsigned int reg_idx);
+int mtk_vcodec_mem_alloc(struct mtk_vcodec_ctx *data,
+				struct mtk_vcodec_mem *mem);
+void mtk_vcodec_mem_free(struct mtk_vcodec_ctx *data,
+				struct mtk_vcodec_mem *mem);
+#endif /* _MTK_VCODEC_UTIL_H_ */
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
new file mode 100644
index 000000000000..9a600525b3c1
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
@@ -0,0 +1,679 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Jungchang Tsao <jungchang.tsao@mediatek.com>
+ *         Daniel Hsiao <daniel.hsiao@mediatek.com>
+ *         PoChun Lin <pochun.lin@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "../mtk_vcodec_drv.h"
+#include "../mtk_vcodec_util.h"
+#include "../mtk_vcodec_intr.h"
+#include "../mtk_vcodec_enc.h"
+#include "../mtk_vcodec_enc_pm.h"
+#include "../venc_drv_base.h"
+#include "../venc_ipi_msg.h"
+#include "../venc_vpu_if.h"
+#include "mtk_vpu.h"
+
+static const char h264_filler_marker[] = {0x0, 0x0, 0x0, 0x1, 0xc};
+
+#define H264_FILLER_MARKER_SIZE ARRAY_SIZE(h264_filler_marker)
+#define VENC_PIC_BITSTREAM_BYTE_CNT 0x0098
+
+/**
+ * enum venc_h264_vpu_work_buf - h264 encoder buffer index
+ */
+enum venc_h264_vpu_work_buf {
+	VENC_H264_VPU_WORK_BUF_RC_INFO,
+	VENC_H264_VPU_WORK_BUF_RC_CODE,
+	VENC_H264_VPU_WORK_BUF_REC_LUMA,
+	VENC_H264_VPU_WORK_BUF_REC_CHROMA,
+	VENC_H264_VPU_WORK_BUF_REF_LUMA,
+	VENC_H264_VPU_WORK_BUF_REF_CHROMA,
+	VENC_H264_VPU_WORK_BUF_MV_INFO_1,
+	VENC_H264_VPU_WORK_BUF_MV_INFO_2,
+	VENC_H264_VPU_WORK_BUF_SKIP_FRAME,
+	VENC_H264_VPU_WORK_BUF_MAX,
+};
+
+/**
+ * enum venc_h264_bs_mode - for bs_mode argument in h264_enc_vpu_encode
+ */
+enum venc_h264_bs_mode {
+	H264_BS_MODE_SPS,
+	H264_BS_MODE_PPS,
+	H264_BS_MODE_FRAME,
+};
+
+/*
+ * struct venc_h264_vpu_config - Structure for h264 encoder configuration
+ * @input_fourcc: input fourcc
+ * @bitrate: target bitrate (in bps)
+ * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
+ *         to be used for display purposes; must be smaller or equal to buffer
+ *         size.
+ * @pic_h: picture height
+ * @buf_w: buffer width. Buffer size is stream resolution in pixels aligned to
+ *         hardware requirements.
+ * @buf_h: buffer height
+ * @gop_size: group of picture size (idr frame)
+ * @intra_period: intra frame period
+ * @framerate: frame rate in fps
+ * @profile: as specified in standard
+ * @level: as specified in standard
+ * @wfd: WFD mode 1:on, 0:off
+ */
+struct venc_h264_vpu_config {
+	u32 input_fourcc;
+	u32 bitrate;
+	u32 pic_w;
+	u32 pic_h;
+	u32 buf_w;
+	u32 buf_h;
+	u32 gop_size;
+	u32 intra_period;
+	u32 framerate;
+	u32 profile;
+	u32 level;
+	u32 wfd;
+};
+
+/*
+ * struct venc_h264_vpu_buf - Structure for buffer information
+ * @align: buffer alignment (in bytes)
+ * @iova: IO virtual address
+ * @vpua: VPU side memory addr which is used by RC_CODE
+ * @size: buffer size (in bytes)
+ */
+struct venc_h264_vpu_buf {
+	u32 align;
+	u32 iova;
+	u32 vpua;
+	u32 size;
+};
+
+/*
+ * struct venc_h264_vsi - Structure for VPU driver control and info share
+ * This structure is allocated in VPU side and shared to AP side.
+ * @config: h264 encoder configuration
+ * @work_bufs: working buffer information in VPU side
+ * The work_bufs here is for storing the 'size' info shared to AP side.
+ * The similar item in struct venc_h264_inst is for memory allocation
+ * in AP side. The AP driver will copy the 'size' from here to the one in
+ * struct mtk_vcodec_mem, then invoke mtk_vcodec_mem_alloc to allocate
+ * the buffer. After that, bypass the 'dma_addr' to the 'iova' field here for
+ * register setting in VPU side.
+ */
+struct venc_h264_vsi {
+	struct venc_h264_vpu_config config;
+	struct venc_h264_vpu_buf work_bufs[VENC_H264_VPU_WORK_BUF_MAX];
+};
+
+/*
+ * struct venc_h264_inst - h264 encoder AP driver instance
+ * @hw_base: h264 encoder hardware register base
+ * @work_bufs: working buffer
+ * @pps_buf: buffer to store the pps bitstream
+ * @work_buf_allocated: working buffer allocated flag
+ * @frm_cnt: encoded frame count
+ * @prepend_hdr: when the v4l2 layer send VENC_SET_PARAM_PREPEND_HEADER cmd
+ *  through h264_enc_set_param interface, it will set this flag and prepend the
+ *  sps/pps in h264_enc_encode function.
+ * @vpu_inst: VPU instance to exchange information between AP and VPU
+ * @vsi: driver structure allocated by VPU side and shared to AP side for
+ *	 control and info share
+ * @ctx: context for v4l2 layer integration
+ */
+struct venc_h264_inst {
+	void __iomem *hw_base;
+	struct mtk_vcodec_mem work_bufs[VENC_H264_VPU_WORK_BUF_MAX];
+	struct mtk_vcodec_mem pps_buf;
+	bool work_buf_allocated;
+	unsigned int frm_cnt;
+	unsigned int prepend_hdr;
+	struct venc_vpu_inst vpu_inst;
+	struct venc_h264_vsi *vsi;
+	struct mtk_vcodec_ctx *ctx;
+};
+
+static inline void h264_write_reg(struct venc_h264_inst *inst, u32 addr,
+				  u32 val)
+{
+	writel(val, inst->hw_base + addr);
+}
+
+static inline u32 h264_read_reg(struct venc_h264_inst *inst, u32 addr)
+{
+	return readl(inst->hw_base + addr);
+}
+
+static unsigned int h264_get_profile(struct venc_h264_inst *inst,
+				     unsigned int profile)
+{
+	switch (profile) {
+	case V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE:
+		return 66;
+	case V4L2_MPEG_VIDEO_H264_PROFILE_MAIN:
+		return 77;
+	case V4L2_MPEG_VIDEO_H264_PROFILE_HIGH:
+		return 100;
+	case V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE:
+		mtk_vcodec_err(inst, "unsupported CONSTRAINED_BASELINE");
+		return 0;
+	case V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED:
+		mtk_vcodec_err(inst, "unsupported EXTENDED");
+		return 0;
+	default:
+		mtk_vcodec_debug(inst, "unsupported profile %d", profile);
+		return 100;
+	}
+}
+
+static unsigned int h264_get_level(struct venc_h264_inst *inst,
+				   unsigned int level)
+{
+	switch (level) {
+	case V4L2_MPEG_VIDEO_H264_LEVEL_1B:
+		mtk_vcodec_err(inst, "unsupported 1B");
+		return 0;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_1_0:
+		return 10;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_1_1:
+		return 11;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_1_2:
+		return 12;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_1_3:
+		return 13;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_2_0:
+		return 20;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_2_1:
+		return 21;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_2_2:
+		return 22;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_3_0:
+		return 30;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_3_1:
+		return 31;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_3_2:
+		return 32;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_4_0:
+		return 40;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_4_1:
+		return 41;
+	default:
+		mtk_vcodec_debug(inst, "unsupported level %d", level);
+		return 31;
+	}
+}
+
+static void h264_enc_free_work_buf(struct venc_h264_inst *inst)
+{
+	int i;
+
+	mtk_vcodec_debug_enter(inst);
+
+	/* Except the SKIP_FRAME buffers,
+	 * other buffers need to be freed by AP.
+	 */
+	for (i = 0; i < VENC_H264_VPU_WORK_BUF_MAX; i++) {
+		if (i != VENC_H264_VPU_WORK_BUF_SKIP_FRAME)
+			mtk_vcodec_mem_free(inst->ctx, &inst->work_bufs[i]);
+	}
+
+	mtk_vcodec_mem_free(inst->ctx, &inst->pps_buf);
+
+	mtk_vcodec_debug_leave(inst);
+}
+
+static int h264_enc_alloc_work_buf(struct venc_h264_inst *inst)
+{
+	int i;
+	int ret = 0;
+	struct venc_h264_vpu_buf *wb = inst->vsi->work_bufs;
+
+	mtk_vcodec_debug_enter(inst);
+
+	for (i = 0; i < VENC_H264_VPU_WORK_BUF_MAX; i++) {
+		/*
+		 * This 'wb' structure is set by VPU side and shared to AP for
+		 * buffer allocation and IO virtual addr mapping. For most of
+		 * the buffers, AP will allocate the buffer according to 'size'
+		 * field and store the IO virtual addr in 'iova' field. There
+		 * are two exceptions:
+		 * (1) RC_CODE buffer, it's pre-allocated in the VPU side, and
+		 * save the VPU addr in the 'vpua' field. The AP will translate
+		 * the VPU addr to the corresponding IO virtual addr and store
+		 * in 'iova' field for reg setting in VPU side.
+		 * (2) SKIP_FRAME buffer, it's pre-allocated in the VPU side,
+		 * and save the VPU addr in the 'vpua' field. The AP will
+		 * translate the VPU addr to the corresponding AP side virtual
+		 * address and do some memcpy access to move to bitstream buffer
+		 * assigned by v4l2 layer.
+		 */
+		inst->work_bufs[i].size = wb[i].size;
+		if (i == VENC_H264_VPU_WORK_BUF_SKIP_FRAME) {
+			inst->work_bufs[i].va = vpu_mapping_dm_addr(
+				inst->vpu_inst.dev, wb[i].vpua);
+			inst->work_bufs[i].dma_addr = 0;
+		} else {
+			ret = mtk_vcodec_mem_alloc(inst->ctx,
+						   &inst->work_bufs[i]);
+			if (ret) {
+				mtk_vcodec_err(inst,
+					       "cannot allocate buf %d", i);
+				goto err_alloc;
+			}
+			/*
+			 * This RC_CODE is pre-allocated by VPU and saved in VPU
+			 * addr. So we need use memcpy to copy RC_CODE from VPU
+			 * addr into IO virtual addr in 'iova' field for reg
+			 * setting in VPU side.
+			 */
+			if (i == VENC_H264_VPU_WORK_BUF_RC_CODE) {
+				void *tmp_va;
+
+				tmp_va = vpu_mapping_dm_addr(inst->vpu_inst.dev,
+							     wb[i].vpua);
+				memcpy(inst->work_bufs[i].va, tmp_va,
+				       wb[i].size);
+			}
+		}
+		wb[i].iova = inst->work_bufs[i].dma_addr;
+
+		mtk_vcodec_debug(inst,
+				 "work_buf[%d] va=0x%p iova=%pad size=%zu",
+				 i, inst->work_bufs[i].va,
+				 &inst->work_bufs[i].dma_addr,
+				 inst->work_bufs[i].size);
+	}
+
+	/* the pps_buf is used by AP side only */
+	inst->pps_buf.size = 128;
+	ret = mtk_vcodec_mem_alloc(inst->ctx, &inst->pps_buf);
+	if (ret) {
+		mtk_vcodec_err(inst, "cannot allocate pps_buf");
+		goto err_alloc;
+	}
+
+	mtk_vcodec_debug_leave(inst);
+
+	return ret;
+
+err_alloc:
+	h264_enc_free_work_buf(inst);
+
+	return ret;
+}
+
+static unsigned int h264_enc_wait_venc_done(struct venc_h264_inst *inst)
+{
+	unsigned int irq_status = 0;
+	struct mtk_vcodec_ctx *ctx = (struct mtk_vcodec_ctx *)inst->ctx;
+
+	if (!mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
+					  WAIT_INTR_TIMEOUT_MS)) {
+		irq_status = ctx->irq_status;
+		mtk_vcodec_debug(inst, "irq_status %x <-", irq_status);
+	}
+	return irq_status;
+}
+
+static int h264_encode_sps(struct venc_h264_inst *inst,
+			   struct mtk_vcodec_mem *bs_buf,
+			   unsigned int *bs_size)
+{
+	int ret = 0;
+	unsigned int irq_status;
+
+	mtk_vcodec_debug_enter(inst);
+
+	ret = vpu_enc_encode(&inst->vpu_inst, H264_BS_MODE_SPS, NULL,
+			     bs_buf, bs_size);
+	if (ret)
+		return ret;
+
+	irq_status = h264_enc_wait_venc_done(inst);
+	if (irq_status != MTK_VENC_IRQ_STATUS_SPS) {
+		mtk_vcodec_err(inst, "expect irq status %d",
+			       MTK_VENC_IRQ_STATUS_SPS);
+		return -EINVAL;
+	}
+
+	*bs_size = h264_read_reg(inst, VENC_PIC_BITSTREAM_BYTE_CNT);
+	mtk_vcodec_debug(inst, "bs size %d <-", *bs_size);
+
+	return ret;
+}
+
+static int h264_encode_pps(struct venc_h264_inst *inst,
+			   struct mtk_vcodec_mem *bs_buf,
+			   unsigned int *bs_size)
+{
+	int ret = 0;
+	unsigned int irq_status;
+
+	mtk_vcodec_debug_enter(inst);
+
+	ret = vpu_enc_encode(&inst->vpu_inst, H264_BS_MODE_PPS, NULL,
+			     bs_buf, bs_size);
+	if (ret)
+		return ret;
+
+	irq_status = h264_enc_wait_venc_done(inst);
+	if (irq_status != MTK_VENC_IRQ_STATUS_PPS) {
+		mtk_vcodec_err(inst, "expect irq status %d",
+			       MTK_VENC_IRQ_STATUS_PPS);
+		return -EINVAL;
+	}
+
+	*bs_size = h264_read_reg(inst, VENC_PIC_BITSTREAM_BYTE_CNT);
+	mtk_vcodec_debug(inst, "bs size %d <-", *bs_size);
+
+	return ret;
+}
+
+static int h264_encode_header(struct venc_h264_inst *inst,
+			      struct mtk_vcodec_mem *bs_buf,
+			      unsigned int *bs_size)
+{
+	int ret = 0;
+	unsigned int bs_size_sps;
+	unsigned int bs_size_pps;
+
+	ret = h264_encode_sps(inst, bs_buf, &bs_size_sps);
+	if (ret)
+		return ret;
+
+	ret = h264_encode_pps(inst, &inst->pps_buf, &bs_size_pps);
+	if (ret)
+		return ret;
+
+	memcpy(bs_buf->va + bs_size_sps, inst->pps_buf.va, bs_size_pps);
+	*bs_size = bs_size_sps + bs_size_pps;
+
+	return ret;
+}
+
+static int h264_encode_frame(struct venc_h264_inst *inst,
+			     struct venc_frm_buf *frm_buf,
+			     struct mtk_vcodec_mem *bs_buf,
+			     unsigned int *bs_size)
+{
+	int ret = 0;
+	unsigned int irq_status;
+
+	mtk_vcodec_debug_enter(inst);
+
+	ret = vpu_enc_encode(&inst->vpu_inst, H264_BS_MODE_FRAME, frm_buf,
+			     bs_buf, bs_size);
+	if (ret)
+		return ret;
+
+	/*
+	 * skip frame case: The skip frame buffer is composed by vpu side only,
+	 * it does not trigger the hw, so skip the wait interrupt operation.
+	 */
+	if (inst->vpu_inst.state == VEN_IPI_MSG_ENC_STATE_SKIP) {
+		*bs_size = inst->vpu_inst.bs_size;
+		memcpy(bs_buf->va,
+		       inst->work_bufs[VENC_H264_VPU_WORK_BUF_SKIP_FRAME].va,
+		       *bs_size);
+		++inst->frm_cnt;
+		return ret;
+	}
+
+	irq_status = h264_enc_wait_venc_done(inst);
+	if (irq_status != MTK_VENC_IRQ_STATUS_FRM) {
+		mtk_vcodec_err(inst, "irq_status=%d failed", irq_status);
+		return -EIO;
+	}
+
+	*bs_size = h264_read_reg(inst, VENC_PIC_BITSTREAM_BYTE_CNT);
+
+	++inst->frm_cnt;
+	mtk_vcodec_debug(inst, "frm %d bs_size %d key_frm %d <-",
+			 inst->frm_cnt, *bs_size, inst->vpu_inst.is_key_frm);
+
+	return ret;
+}
+
+static void h264_encode_filler(struct venc_h264_inst *inst, void *buf,
+			       int size)
+{
+	unsigned char *p = buf;
+
+	if (size < H264_FILLER_MARKER_SIZE) {
+		mtk_vcodec_err(inst, "filler size too small %d", size);
+		return;
+	}
+
+	memcpy(p, h264_filler_marker, ARRAY_SIZE(h264_filler_marker));
+	size -= H264_FILLER_MARKER_SIZE;
+	p += H264_FILLER_MARKER_SIZE;
+	memset(p, 0xff, size);
+}
+
+static int h264_enc_init(struct mtk_vcodec_ctx *ctx, unsigned long *handle)
+{
+	int ret = 0;
+	struct venc_h264_inst *inst;
+
+	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	inst->ctx = ctx;
+	inst->vpu_inst.ctx = ctx;
+	inst->vpu_inst.dev = ctx->dev->vpu_plat_dev;
+	inst->vpu_inst.id = IPI_VENC_H264;
+	inst->hw_base = mtk_vcodec_get_reg_addr(inst->ctx, VENC_SYS);
+
+	mtk_vcodec_debug_enter(inst);
+
+	ret = vpu_enc_init(&inst->vpu_inst);
+
+	inst->vsi = (struct venc_h264_vsi *)inst->vpu_inst.vsi;
+
+	mtk_vcodec_debug_leave(inst);
+
+	if (ret)
+		kfree(inst);
+	else
+		(*handle) = (unsigned long)inst;
+
+	return ret;
+}
+
+static int h264_enc_encode(unsigned long handle,
+			   enum venc_start_opt opt,
+			   struct venc_frm_buf *frm_buf,
+			   struct mtk_vcodec_mem *bs_buf,
+			   struct venc_done_result *result)
+{
+	int ret = 0;
+	struct venc_h264_inst *inst = (struct venc_h264_inst *)handle;
+	struct mtk_vcodec_ctx *ctx = inst->ctx;
+
+	mtk_vcodec_debug(inst, "opt %d ->", opt);
+
+	enable_irq(ctx->dev->enc_irq);
+
+	switch (opt) {
+	case VENC_START_OPT_ENCODE_SEQUENCE_HEADER: {
+		unsigned int bs_size_hdr;
+
+		ret = h264_encode_header(inst, bs_buf, &bs_size_hdr);
+		if (ret)
+			goto encode_err;
+
+		result->bs_size = bs_size_hdr;
+		result->is_key_frm = false;
+		break;
+	}
+
+	case VENC_START_OPT_ENCODE_FRAME: {
+		int hdr_sz;
+		int hdr_sz_ext;
+		int filler_sz = 0;
+		const int bs_alignment = 128;
+		struct mtk_vcodec_mem tmp_bs_buf;
+		unsigned int bs_size_hdr;
+		unsigned int bs_size_frm;
+
+		if (!inst->prepend_hdr) {
+			ret = h264_encode_frame(inst, frm_buf, bs_buf,
+						&result->bs_size);
+			if (ret)
+				goto encode_err;
+			result->is_key_frm = inst->vpu_inst.is_key_frm;
+			break;
+		}
+
+		mtk_vcodec_debug(inst, "h264_encode_frame prepend SPS/PPS");
+
+		ret = h264_encode_header(inst, bs_buf, &bs_size_hdr);
+		if (ret)
+			goto encode_err;
+
+		hdr_sz = bs_size_hdr;
+		hdr_sz_ext = (hdr_sz & (bs_alignment - 1));
+		if (hdr_sz_ext) {
+			filler_sz = bs_alignment - hdr_sz_ext;
+			if (hdr_sz_ext + H264_FILLER_MARKER_SIZE > bs_alignment)
+				filler_sz += bs_alignment;
+			h264_encode_filler(inst, bs_buf->va + hdr_sz,
+					   filler_sz);
+		}
+
+		tmp_bs_buf.va = bs_buf->va + hdr_sz + filler_sz;
+		tmp_bs_buf.dma_addr = bs_buf->dma_addr + hdr_sz + filler_sz;
+		tmp_bs_buf.size = bs_buf->size - (hdr_sz + filler_sz);
+
+		ret = h264_encode_frame(inst, frm_buf, &tmp_bs_buf,
+					&bs_size_frm);
+		if (ret)
+			goto encode_err;
+
+		result->bs_size = hdr_sz + filler_sz + bs_size_frm;
+
+		mtk_vcodec_debug(inst, "hdr %d filler %d frame %d bs %d",
+				 hdr_sz, filler_sz, bs_size_frm,
+				 result->bs_size);
+
+		inst->prepend_hdr = 0;
+		result->is_key_frm = inst->vpu_inst.is_key_frm;
+		break;
+	}
+
+	default:
+		mtk_vcodec_err(inst, "venc_start_opt %d not supported", opt);
+		ret = -EINVAL;
+		break;
+	}
+
+encode_err:
+
+	disable_irq(ctx->dev->enc_irq);
+	mtk_vcodec_debug(inst, "opt %d <-", opt);
+
+	return ret;
+}
+
+static int h264_enc_set_param(unsigned long handle,
+			      enum venc_set_param_type type,
+			      struct venc_enc_param *enc_prm)
+{
+	int ret = 0;
+	struct venc_h264_inst *inst = (struct venc_h264_inst *)handle;
+
+	mtk_vcodec_debug(inst, "->type=%d", type);
+
+	switch (type) {
+	case VENC_SET_PARAM_ENC:
+		inst->vsi->config.input_fourcc = enc_prm->input_yuv_fmt;
+		inst->vsi->config.bitrate = enc_prm->bitrate;
+		inst->vsi->config.pic_w = enc_prm->width;
+		inst->vsi->config.pic_h = enc_prm->height;
+		inst->vsi->config.buf_w = enc_prm->buf_width;
+		inst->vsi->config.buf_h = enc_prm->buf_height;
+		inst->vsi->config.gop_size = enc_prm->gop_size;
+		inst->vsi->config.framerate = enc_prm->frm_rate;
+		inst->vsi->config.intra_period = enc_prm->intra_period;
+		inst->vsi->config.profile =
+			h264_get_profile(inst, enc_prm->h264_profile);
+		inst->vsi->config.level =
+			h264_get_level(inst, enc_prm->h264_level);
+		inst->vsi->config.wfd = 0;
+		ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
+		if (ret)
+			break;
+		if (inst->work_buf_allocated) {
+			h264_enc_free_work_buf(inst);
+			inst->work_buf_allocated = false;
+		}
+		ret = h264_enc_alloc_work_buf(inst);
+		if (ret)
+			break;
+		inst->work_buf_allocated = true;
+		break;
+
+	case VENC_SET_PARAM_PREPEND_HEADER:
+		inst->prepend_hdr = 1;
+		mtk_vcodec_debug(inst, "set prepend header mode");
+		break;
+
+	default:
+		ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
+		break;
+	}
+
+	mtk_vcodec_debug_leave(inst);
+
+	return ret;
+}
+
+static int h264_enc_deinit(unsigned long handle)
+{
+	int ret = 0;
+	struct venc_h264_inst *inst = (struct venc_h264_inst *)handle;
+
+	mtk_vcodec_debug_enter(inst);
+
+	ret = vpu_enc_deinit(&inst->vpu_inst);
+
+	if (inst->work_buf_allocated)
+		h264_enc_free_work_buf(inst);
+
+	mtk_vcodec_debug_leave(inst);
+	kfree(inst);
+
+	return ret;
+}
+
+static struct venc_common_if venc_h264_if = {
+	h264_enc_init,
+	h264_enc_encode,
+	h264_enc_set_param,
+	h264_enc_deinit,
+};
+
+struct venc_common_if *get_h264_enc_comm_if(void);
+
+struct venc_common_if *get_h264_enc_comm_if(void)
+{
+	return &venc_h264_if;
+}
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
new file mode 100644
index 000000000000..60bbcd2a0510
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Daniel Hsiao <daniel.hsiao@mediatek.com>
+ *         PoChun Lin <pochun.lin@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "../mtk_vcodec_drv.h"
+#include "../mtk_vcodec_util.h"
+#include "../mtk_vcodec_intr.h"
+#include "../mtk_vcodec_enc.h"
+#include "../mtk_vcodec_enc_pm.h"
+#include "../venc_drv_base.h"
+#include "../venc_ipi_msg.h"
+#include "../venc_vpu_if.h"
+#include "mtk_vpu.h"
+
+#define VENC_BITSTREAM_FRAME_SIZE 0x0098
+#define VENC_BITSTREAM_HEADER_LEN 0x00e8
+
+/* This ac_tag is vp8 frame tag. */
+#define MAX_AC_TAG_SIZE 10
+
+/**
+ * enum venc_vp8_vpu_work_buf - vp8 encoder buffer index
+ */
+enum venc_vp8_vpu_work_buf {
+	VENC_VP8_VPU_WORK_BUF_LUMA,
+	VENC_VP8_VPU_WORK_BUF_LUMA2,
+	VENC_VP8_VPU_WORK_BUF_LUMA3,
+	VENC_VP8_VPU_WORK_BUF_CHROMA,
+	VENC_VP8_VPU_WORK_BUF_CHROMA2,
+	VENC_VP8_VPU_WORK_BUF_CHROMA3,
+	VENC_VP8_VPU_WORK_BUF_MV_INFO,
+	VENC_VP8_VPU_WORK_BUF_BS_HEADER,
+	VENC_VP8_VPU_WORK_BUF_PROB_BUF,
+	VENC_VP8_VPU_WORK_BUF_RC_INFO,
+	VENC_VP8_VPU_WORK_BUF_RC_CODE,
+	VENC_VP8_VPU_WORK_BUF_RC_CODE2,
+	VENC_VP8_VPU_WORK_BUF_RC_CODE3,
+	VENC_VP8_VPU_WORK_BUF_MAX,
+};
+
+/*
+ * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration
+ * @input_fourcc: input fourcc
+ * @bitrate: target bitrate (in bps)
+ * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
+ *         to be used for display purposes; must be smaller or equal to buffer
+ *         size.
+ * @pic_h: picture height
+ * @buf_w: buffer width (with 16 alignment). Buffer size is stream resolution
+ *         in pixels aligned to hardware requirements.
+ * @buf_h: buffer height (with 16 alignment)
+ * @gop_size: group of picture size (key frame)
+ * @framerate: frame rate in fps
+ * @ts_mode: temporal scalability mode (0: disable, 1: enable)
+ *	     support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps.
+ */
+struct venc_vp8_vpu_config {
+	u32 input_fourcc;
+	u32 bitrate;
+	u32 pic_w;
+	u32 pic_h;
+	u32 buf_w;
+	u32 buf_h;
+	u32 gop_size;
+	u32 framerate;
+	u32 ts_mode;
+};
+
+/*
+ * struct venc_vp8_vpu_buf -Structure for buffer information
+ * @align: buffer alignment (in bytes)
+ * @iova: IO virtual address
+ * @vpua: VPU side memory addr which is used by RC_CODE
+ * @size: buffer size (in bytes)
+ */
+struct venc_vp8_vpu_buf {
+	u32 align;
+	u32 iova;
+	u32 vpua;
+	u32 size;
+};
+
+/*
+ * struct venc_vp8_vsi - Structure for VPU driver control and info share
+ * This structure is allocated in VPU side and shared to AP side.
+ * @config: vp8 encoder configuration
+ * @work_bufs: working buffer information in VPU side
+ * The work_bufs here is for storing the 'size' info shared to AP side.
+ * The similar item in struct venc_vp8_inst is for memory allocation
+ * in AP side. The AP driver will copy the 'size' from here to the one in
+ * struct mtk_vcodec_mem, then invoke mtk_vcodec_mem_alloc to allocate
+ * the buffer. After that, bypass the 'dma_addr' to the 'iova' field here for
+ * register setting in VPU side.
+ */
+struct venc_vp8_vsi {
+	struct venc_vp8_vpu_config config;
+	struct venc_vp8_vpu_buf work_bufs[VENC_VP8_VPU_WORK_BUF_MAX];
+};
+
+/*
+ * struct venc_vp8_inst - vp8 encoder AP driver instance
+ * @hw_base: vp8 encoder hardware register base
+ * @work_bufs: working buffer
+ * @work_buf_allocated: working buffer allocated flag
+ * @frm_cnt: encoded frame count, it's used for I-frame judgement and
+ *	     reset when force intra cmd received.
+ * @ts_mode: temporal scalability mode (0: disable, 1: enable)
+ *	     support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps.
+ * @vpu_inst: VPU instance to exchange information between AP and VPU
+ * @vsi: driver structure allocated by VPU side and shared to AP side for
+ *	 control and info share
+ * @ctx: context for v4l2 layer integration
+ */
+struct venc_vp8_inst {
+	void __iomem *hw_base;
+	struct mtk_vcodec_mem work_bufs[VENC_VP8_VPU_WORK_BUF_MAX];
+	bool work_buf_allocated;
+	unsigned int frm_cnt;
+	unsigned int ts_mode;
+	struct venc_vpu_inst vpu_inst;
+	struct venc_vp8_vsi *vsi;
+	struct mtk_vcodec_ctx *ctx;
+};
+
+static inline void vp8_enc_write_reg(struct venc_vp8_inst *inst, u32 addr,
+				     u32 val)
+{
+	writel(val, inst->hw_base + addr);
+}
+
+static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr)
+{
+	return readl(inst->hw_base + addr);
+}
+
+static void vp8_enc_free_work_buf(struct venc_vp8_inst *inst)
+{
+	int i;
+
+	mtk_vcodec_debug_enter(inst);
+
+	/* Buffers need to be freed by AP. */
+	for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) {
+		if ((inst->work_bufs[i].size == 0))
+			continue;
+		mtk_vcodec_mem_free(inst->ctx, &inst->work_bufs[i]);
+	}
+
+	mtk_vcodec_debug_leave(inst);
+}
+
+static int vp8_enc_alloc_work_buf(struct venc_vp8_inst *inst)
+{
+	int i;
+	int ret = 0;
+	struct venc_vp8_vpu_buf *wb = inst->vsi->work_bufs;
+
+	mtk_vcodec_debug_enter(inst);
+
+	for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) {
+		if ((wb[i].size == 0))
+			continue;
+		/*
+		 * This 'wb' structure is set by VPU side and shared to AP for
+		 * buffer allocation and IO virtual addr mapping. For most of
+		 * the buffers, AP will allocate the buffer according to 'size'
+		 * field and store the IO virtual addr in 'iova' field. For the
+		 * RC_CODEx buffers, they are pre-allocated in the VPU side
+		 * because they are inside VPU SRAM, and save the VPU addr in
+		 * the 'vpua' field. The AP will translate the VPU addr to the
+		 * corresponding IO virtual addr and store in 'iova' field.
+		 */
+		inst->work_bufs[i].size = wb[i].size;
+		ret = mtk_vcodec_mem_alloc(inst->ctx, &inst->work_bufs[i]);
+		if (ret) {
+			mtk_vcodec_err(inst,
+				       "cannot alloc work_bufs[%d]", i);
+			goto err_alloc;
+		}
+		/*
+		 * This RC_CODEx is pre-allocated by VPU and saved in VPU addr.
+		 * So we need use memcpy to copy RC_CODEx from VPU addr into IO
+		 * virtual addr in 'iova' field for reg setting in VPU side.
+		 */
+		if (i == VENC_VP8_VPU_WORK_BUF_RC_CODE ||
+		    i == VENC_VP8_VPU_WORK_BUF_RC_CODE2 ||
+		    i == VENC_VP8_VPU_WORK_BUF_RC_CODE3) {
+			void *tmp_va;
+
+			tmp_va = vpu_mapping_dm_addr(inst->vpu_inst.dev,
+						     wb[i].vpua);
+			memcpy(inst->work_bufs[i].va, tmp_va, wb[i].size);
+		}
+		wb[i].iova = inst->work_bufs[i].dma_addr;
+
+		mtk_vcodec_debug(inst,
+				 "work_bufs[%d] va=0x%p,iova=%pad,size=%zu",
+				 i, inst->work_bufs[i].va,
+				 &inst->work_bufs[i].dma_addr,
+				 inst->work_bufs[i].size);
+	}
+
+	mtk_vcodec_debug_leave(inst);
+
+	return ret;
+
+err_alloc:
+	vp8_enc_free_work_buf(inst);
+
+	return ret;
+}
+
+static unsigned int vp8_enc_wait_venc_done(struct venc_vp8_inst *inst)
+{
+	unsigned int irq_status = 0;
+	struct mtk_vcodec_ctx *ctx = (struct mtk_vcodec_ctx *)inst->ctx;
+
+	if (!mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
+					  WAIT_INTR_TIMEOUT_MS)) {
+		irq_status = ctx->irq_status;
+		mtk_vcodec_debug(inst, "isr return %x", irq_status);
+	}
+	return irq_status;
+}
+
+/*
+ * Compose ac_tag, bitstream header and bitstream payload into
+ * one bitstream buffer.
+ */
+static int vp8_enc_compose_one_frame(struct venc_vp8_inst *inst,
+				     struct mtk_vcodec_mem *bs_buf,
+				     unsigned int *bs_size)
+{
+	unsigned int not_key;
+	u32 bs_frm_size;
+	u32 bs_hdr_len;
+	unsigned int ac_tag_size;
+	u8 ac_tag[MAX_AC_TAG_SIZE];
+	u32 tag;
+
+	bs_frm_size = vp8_enc_read_reg(inst, VENC_BITSTREAM_FRAME_SIZE);
+	bs_hdr_len = vp8_enc_read_reg(inst, VENC_BITSTREAM_HEADER_LEN);
+
+	/* if a frame is key frame, not_key is 0 */
+	not_key = !inst->vpu_inst.is_key_frm;
+	tag = (bs_hdr_len << 5) | 0x10 | not_key;
+	ac_tag[0] = tag & 0xff;
+	ac_tag[1] = (tag >> 8) & 0xff;
+	ac_tag[2] = (tag >> 16) & 0xff;
+
+	/* key frame */
+	if (not_key == 0) {
+		ac_tag_size = MAX_AC_TAG_SIZE;
+		ac_tag[3] = 0x9d;
+		ac_tag[4] = 0x01;
+		ac_tag[5] = 0x2a;
+		ac_tag[6] = inst->vsi->config.pic_w;
+		ac_tag[7] = inst->vsi->config.pic_w >> 8;
+		ac_tag[8] = inst->vsi->config.pic_h;
+		ac_tag[9] = inst->vsi->config.pic_h >> 8;
+	} else {
+		ac_tag_size = 3;
+	}
+
+	if (bs_buf->size < bs_hdr_len + bs_frm_size + ac_tag_size) {
+		mtk_vcodec_err(inst, "bitstream buf size is too small(%zu)",
+			       bs_buf->size);
+		return -EINVAL;
+	}
+
+	/*
+	* (1) The vp8 bitstream header and body are generated by the HW vp8
+	* encoder separately at the same time. We cannot know the bitstream
+	* header length in advance.
+	* (2) From the vp8 spec, there is no stuffing byte allowed between the
+	* ac tag, bitstream header and bitstream body.
+	*/
+	memmove(bs_buf->va + bs_hdr_len + ac_tag_size,
+		bs_buf->va, bs_frm_size);
+	memcpy(bs_buf->va + ac_tag_size,
+	       inst->work_bufs[VENC_VP8_VPU_WORK_BUF_BS_HEADER].va,
+	       bs_hdr_len);
+	memcpy(bs_buf->va, ac_tag, ac_tag_size);
+	*bs_size = bs_frm_size + bs_hdr_len + ac_tag_size;
+
+	return 0;
+}
+
+static int vp8_enc_encode_frame(struct venc_vp8_inst *inst,
+				struct venc_frm_buf *frm_buf,
+				struct mtk_vcodec_mem *bs_buf,
+				unsigned int *bs_size)
+{
+	int ret = 0;
+	unsigned int irq_status;
+
+	mtk_vcodec_debug(inst, "->frm_cnt=%d", inst->frm_cnt);
+
+	ret = vpu_enc_encode(&inst->vpu_inst, 0, frm_buf, bs_buf, bs_size);
+	if (ret)
+		return ret;
+
+	irq_status = vp8_enc_wait_venc_done(inst);
+	if (irq_status != MTK_VENC_IRQ_STATUS_FRM) {
+		mtk_vcodec_err(inst, "irq_status=%d failed", irq_status);
+		return -EIO;
+	}
+
+	if (vp8_enc_compose_one_frame(inst, bs_buf, bs_size)) {
+		mtk_vcodec_err(inst, "vp8_enc_compose_one_frame failed");
+		return -EINVAL;
+	}
+
+	inst->frm_cnt++;
+	mtk_vcodec_debug(inst, "<-size=%d key_frm=%d", *bs_size,
+			 inst->vpu_inst.is_key_frm);
+
+	return ret;
+}
+
+static int vp8_enc_init(struct mtk_vcodec_ctx *ctx, unsigned long *handle)
+{
+	int ret = 0;
+	struct venc_vp8_inst *inst;
+
+	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	inst->ctx = ctx;
+	inst->vpu_inst.ctx = ctx;
+	inst->vpu_inst.dev = ctx->dev->vpu_plat_dev;
+	inst->vpu_inst.id = IPI_VENC_VP8;
+	inst->hw_base = mtk_vcodec_get_reg_addr(inst->ctx, VENC_LT_SYS);
+
+	mtk_vcodec_debug_enter(inst);
+
+	ret = vpu_enc_init(&inst->vpu_inst);
+
+	inst->vsi = (struct venc_vp8_vsi *)inst->vpu_inst.vsi;
+
+	mtk_vcodec_debug_leave(inst);
+
+	if (ret)
+		kfree(inst);
+	else
+		(*handle) = (unsigned long)inst;
+
+	return ret;
+}
+
+static int vp8_enc_encode(unsigned long handle,
+			  enum venc_start_opt opt,
+			  struct venc_frm_buf *frm_buf,
+			  struct mtk_vcodec_mem *bs_buf,
+			  struct venc_done_result *result)
+{
+	int ret = 0;
+	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
+	struct mtk_vcodec_ctx *ctx = inst->ctx;
+
+	mtk_vcodec_debug_enter(inst);
+
+	enable_irq(ctx->dev->enc_lt_irq);
+
+	switch (opt) {
+	case VENC_START_OPT_ENCODE_FRAME:
+		ret = vp8_enc_encode_frame(inst, frm_buf, bs_buf,
+					   &result->bs_size);
+		if (ret)
+			goto encode_err;
+		result->is_key_frm = inst->vpu_inst.is_key_frm;
+		break;
+
+	default:
+		mtk_vcodec_err(inst, "opt not support:%d", opt);
+		ret = -EINVAL;
+		break;
+	}
+
+encode_err:
+
+	disable_irq(ctx->dev->enc_lt_irq);
+	mtk_vcodec_debug_leave(inst);
+
+	return ret;
+}
+
+static int vp8_enc_set_param(unsigned long handle,
+			     enum venc_set_param_type type,
+			     struct venc_enc_param *enc_prm)
+{
+	int ret = 0;
+	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
+
+	mtk_vcodec_debug(inst, "->type=%d", type);
+
+	switch (type) {
+	case VENC_SET_PARAM_ENC:
+		inst->vsi->config.input_fourcc = enc_prm->input_yuv_fmt;
+		inst->vsi->config.bitrate = enc_prm->bitrate;
+		inst->vsi->config.pic_w = enc_prm->width;
+		inst->vsi->config.pic_h = enc_prm->height;
+		inst->vsi->config.buf_w = enc_prm->buf_width;
+		inst->vsi->config.buf_h = enc_prm->buf_height;
+		inst->vsi->config.gop_size = enc_prm->gop_size;
+		inst->vsi->config.framerate = enc_prm->frm_rate;
+		inst->vsi->config.ts_mode = inst->ts_mode;
+		ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
+		if (ret)
+			break;
+		if (inst->work_buf_allocated) {
+			vp8_enc_free_work_buf(inst);
+			inst->work_buf_allocated = false;
+		}
+		ret = vp8_enc_alloc_work_buf(inst);
+		if (ret)
+			break;
+		inst->work_buf_allocated = true;
+		break;
+
+	/*
+	 * VENC_SET_PARAM_TS_MODE must be called before VENC_SET_PARAM_ENC
+	 */
+	case VENC_SET_PARAM_TS_MODE:
+		inst->ts_mode = 1;
+		mtk_vcodec_debug(inst, "set ts_mode");
+		break;
+
+	default:
+		ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
+		break;
+	}
+
+	mtk_vcodec_debug_leave(inst);
+
+	return ret;
+}
+
+static int vp8_enc_deinit(unsigned long handle)
+{
+	int ret = 0;
+	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
+
+	mtk_vcodec_debug_enter(inst);
+
+	ret = vpu_enc_deinit(&inst->vpu_inst);
+
+	if (inst->work_buf_allocated)
+		vp8_enc_free_work_buf(inst);
+
+	mtk_vcodec_debug_leave(inst);
+	kfree(inst);
+
+	return ret;
+}
+
+static struct venc_common_if venc_vp8_if = {
+	vp8_enc_init,
+	vp8_enc_encode,
+	vp8_enc_set_param,
+	vp8_enc_deinit,
+};
+
+struct venc_common_if *get_vp8_enc_comm_if(void);
+
+struct venc_common_if *get_vp8_enc_comm_if(void)
+{
+	return &venc_vp8_if;
+}
diff --git a/drivers/media/platform/mtk-vcodec/venc_drv_base.h b/drivers/media/platform/mtk-vcodec/venc_drv_base.h
new file mode 100644
index 000000000000..6308d44dedf6
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/venc_drv_base.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Daniel Hsiao <daniel.hsiao@mediatek.com>
+ *	Jungchang Tsao <jungchang.tsao@mediatek.com>
+ *	Tiffany Lin <tiffany.lin@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _VENC_DRV_BASE_
+#define _VENC_DRV_BASE_
+
+#include "mtk_vcodec_drv.h"
+
+#include "venc_drv_if.h"
+
+struct venc_common_if {
+	/**
+	 * (*init)() - initialize driver
+	 * @ctx:	[in] mtk v4l2 context
+	 * @handle: [out] driver handle
+	 */
+	int (*init)(struct mtk_vcodec_ctx *ctx, unsigned long *handle);
+
+	/**
+	 * (*encode)() - trigger encode
+	 * @handle: [in] driver handle
+	 * @opt: [in] encode option
+	 * @frm_buf: [in] frame buffer to store input frame
+	 * @bs_buf: [in] bitstream buffer to store output bitstream
+	 * @result: [out] encode result
+	 */
+	int (*encode)(unsigned long handle, enum venc_start_opt opt,
+		      struct venc_frm_buf *frm_buf,
+		      struct mtk_vcodec_mem *bs_buf,
+		      struct venc_done_result *result);
+
+	/**
+	 * (*set_param)() - set driver's parameter
+	 * @handle: [in] driver handle
+	 * @type: [in] parameter type
+	 * @in: [in] buffer to store the parameter
+	 */
+	int (*set_param)(unsigned long handle, enum venc_set_param_type type,
+			 struct venc_enc_param *in);
+
+	/**
+	 * (*deinit)() - deinitialize driver.
+	 * @handle: [in] driver handle
+	 */
+	int (*deinit)(unsigned long handle);
+};
+
+#endif
diff --git a/drivers/media/platform/mtk-vcodec/venc_drv_if.c b/drivers/media/platform/mtk-vcodec/venc_drv_if.c
new file mode 100644
index 000000000000..c4c83e7189c3
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/venc_drv_if.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Daniel Hsiao <daniel.hsiao@mediatek.com>
+ *	Jungchang Tsao <jungchang.tsao@mediatek.com>
+ *	Tiffany Lin <tiffany.lin@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "venc_drv_base.h"
+#include "venc_drv_if.h"
+
+#include "mtk_vcodec_enc.h"
+#include "mtk_vcodec_enc_pm.h"
+#include "mtk_vpu.h"
+
+struct venc_common_if *get_h264_enc_comm_if(void);
+struct venc_common_if *get_vp8_enc_comm_if(void);
+
+int venc_if_init(struct mtk_vcodec_ctx *ctx, unsigned int fourcc)
+{
+	int ret = 0;
+
+	switch (fourcc) {
+	case V4L2_PIX_FMT_VP8:
+		ctx->enc_if = get_vp8_enc_comm_if();
+		break;
+	case V4L2_PIX_FMT_H264:
+		ctx->enc_if = get_h264_enc_comm_if();
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mtk_venc_lock(ctx);
+	mtk_vcodec_enc_clock_on(&ctx->dev->pm);
+	ret = ctx->enc_if->init(ctx, (unsigned long *)&ctx->drv_handle);
+	mtk_vcodec_enc_clock_off(&ctx->dev->pm);
+	mtk_venc_unlock(ctx);
+
+	return ret;
+}
+
+int venc_if_set_param(struct mtk_vcodec_ctx *ctx,
+		enum venc_set_param_type type, struct venc_enc_param *in)
+{
+	int ret = 0;
+
+	mtk_venc_lock(ctx);
+	mtk_vcodec_enc_clock_on(&ctx->dev->pm);
+	ret = ctx->enc_if->set_param(ctx->drv_handle, type, in);
+	mtk_vcodec_enc_clock_off(&ctx->dev->pm);
+	mtk_venc_unlock(ctx);
+
+	return ret;
+}
+
+int venc_if_encode(struct mtk_vcodec_ctx *ctx,
+		   enum venc_start_opt opt, struct venc_frm_buf *frm_buf,
+		   struct mtk_vcodec_mem *bs_buf,
+		   struct venc_done_result *result)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	mtk_venc_lock(ctx);
+
+	spin_lock_irqsave(&ctx->dev->irqlock, flags);
+	ctx->dev->curr_ctx = ctx;
+	spin_unlock_irqrestore(&ctx->dev->irqlock, flags);
+
+	mtk_vcodec_enc_clock_on(&ctx->dev->pm);
+	ret = ctx->enc_if->encode(ctx->drv_handle, opt, frm_buf,
+				  bs_buf, result);
+	mtk_vcodec_enc_clock_off(&ctx->dev->pm);
+
+	spin_lock_irqsave(&ctx->dev->irqlock, flags);
+	ctx->dev->curr_ctx = NULL;
+	spin_unlock_irqrestore(&ctx->dev->irqlock, flags);
+
+	mtk_venc_unlock(ctx);
+	return ret;
+}
+
+int venc_if_deinit(struct mtk_vcodec_ctx *ctx)
+{
+	int ret = 0;
+
+	if (ctx->drv_handle == 0)
+		return 0;
+
+	mtk_venc_lock(ctx);
+	mtk_vcodec_enc_clock_on(&ctx->dev->pm);
+	ret = ctx->enc_if->deinit(ctx->drv_handle);
+	mtk_vcodec_enc_clock_off(&ctx->dev->pm);
+	mtk_venc_unlock(ctx);
+
+	ctx->drv_handle = 0;
+
+	return ret;
+}
diff --git a/drivers/media/platform/mtk-vcodec/venc_drv_if.h b/drivers/media/platform/mtk-vcodec/venc_drv_if.h
new file mode 100644
index 000000000000..a6e7d32e55cb
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/venc_drv_if.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Daniel Hsiao <daniel.hsiao@mediatek.com>
+ *		Jungchang Tsao <jungchang.tsao@mediatek.com>
+ *		Tiffany Lin <tiffany.lin@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _VENC_DRV_IF_H_
+#define _VENC_DRV_IF_H_
+
+#include "mtk_vcodec_drv.h"
+#include "mtk_vcodec_util.h"
+
+/*
+ * enum venc_yuv_fmt - The type of input yuv format
+ * (VPU related: If you change the order, you must also update the VPU codes.)
+ * @VENC_YUV_FORMAT_I420: I420 YUV format
+ * @VENC_YUV_FORMAT_YV12: YV12 YUV format
+ * @VENC_YUV_FORMAT_NV12: NV12 YUV format
+ * @VENC_YUV_FORMAT_NV21: NV21 YUV format
+ */
+enum venc_yuv_fmt {
+	VENC_YUV_FORMAT_I420 = 3,
+	VENC_YUV_FORMAT_YV12 = 5,
+	VENC_YUV_FORMAT_NV12 = 6,
+	VENC_YUV_FORMAT_NV21 = 7,
+};
+
+/*
+ * enum venc_start_opt - encode frame option used in venc_if_encode()
+ * @VENC_START_OPT_ENCODE_SEQUENCE_HEADER: encode SPS/PPS for H264
+ * @VENC_START_OPT_ENCODE_FRAME: encode normal frame
+ */
+enum venc_start_opt {
+	VENC_START_OPT_ENCODE_SEQUENCE_HEADER,
+	VENC_START_OPT_ENCODE_FRAME,
+};
+
+/*
+ * enum venc_set_param_type - The type of set parameter used in
+ *						      venc_if_set_param()
+ * (VPU related: If you change the order, you must also update the VPU codes.)
+ * @VENC_SET_PARAM_ENC: set encoder parameters
+ * @VENC_SET_PARAM_FORCE_INTRA: force an intra frame
+ * @VENC_SET_PARAM_ADJUST_BITRATE: adjust bitrate (in bps)
+ * @VENC_SET_PARAM_ADJUST_FRAMERATE: set frame rate
+ * @VENC_SET_PARAM_GOP_SIZE: set IDR interval
+ * @VENC_SET_PARAM_INTRA_PERIOD: set I frame interval
+ * @VENC_SET_PARAM_SKIP_FRAME: set H264 skip one frame
+ * @VENC_SET_PARAM_PREPEND_HEADER: set H264 prepend SPS/PPS before IDR
+ * @VENC_SET_PARAM_TS_MODE: set VP8 temporal scalability mode
+ */
+enum venc_set_param_type {
+	VENC_SET_PARAM_ENC,
+	VENC_SET_PARAM_FORCE_INTRA,
+	VENC_SET_PARAM_ADJUST_BITRATE,
+	VENC_SET_PARAM_ADJUST_FRAMERATE,
+	VENC_SET_PARAM_GOP_SIZE,
+	VENC_SET_PARAM_INTRA_PERIOD,
+	VENC_SET_PARAM_SKIP_FRAME,
+	VENC_SET_PARAM_PREPEND_HEADER,
+	VENC_SET_PARAM_TS_MODE,
+};
+
+/*
+ * struct venc_enc_prm - encoder settings for VENC_SET_PARAM_ENC used in
+ *					  venc_if_set_param()
+ * @input_fourcc: input yuv format
+ * @h264_profile: V4L2 defined H.264 profile
+ * @h264_level: V4L2 defined H.264 level
+ * @width: image width
+ * @height: image height
+ * @buf_width: buffer width
+ * @buf_height: buffer height
+ * @frm_rate: frame rate in fps
+ * @intra_period: intra frame period
+ * @bitrate: target bitrate in bps
+ * @gop_size: group of picture size
+ */
+struct venc_enc_param {
+	enum venc_yuv_fmt input_yuv_fmt;
+	unsigned int h264_profile;
+	unsigned int h264_level;
+	unsigned int width;
+	unsigned int height;
+	unsigned int buf_width;
+	unsigned int buf_height;
+	unsigned int frm_rate;
+	unsigned int intra_period;
+	unsigned int bitrate;
+	unsigned int gop_size;
+};
+
+/*
+ * struct venc_frm_buf - frame buffer information used in venc_if_encode()
+ * @fb_addr: plane frame buffer addresses
+ */
+struct venc_frm_buf {
+	struct mtk_vcodec_mem fb_addr[MTK_VCODEC_MAX_PLANES];
+};
+
+/*
+ * struct venc_done_result - This is return information used in venc_if_encode()
+ * @bs_size: output bitstream size
+ * @is_key_frm: output is key frame or not
+ */
+struct venc_done_result {
+	unsigned int bs_size;
+	bool is_key_frm;
+};
+
+/*
+ * venc_if_init - Create the driver handle
+ * @ctx: device context
+ * @fourcc: encoder input format
+ * Return: 0 if creating handle successfully, otherwise it is failed.
+ */
+int venc_if_init(struct mtk_vcodec_ctx *ctx, unsigned int fourcc);
+
+/*
+ * venc_if_deinit - Release the driver handle
+ * @ctx: device context
+ * Return: 0 if releasing handle successfully, otherwise it is failed.
+ */
+int venc_if_deinit(struct mtk_vcodec_ctx *ctx);
+
+/*
+ * venc_if_set_param - Set parameter to driver
+ * @ctx: device context
+ * @type: parameter type
+ * @in: input parameter
+ * Return: 0 if setting param successfully, otherwise it is failed.
+ */
+int venc_if_set_param(struct mtk_vcodec_ctx *ctx,
+		      enum venc_set_param_type type,
+		      struct venc_enc_param *in);
+
+/*
+ * venc_if_encode - Encode one frame
+ * @ctx: device context
+ * @opt: encode frame option
+ * @frm_buf: input frame buffer information
+ * @bs_buf: output bitstream buffer infomraiton
+ * @result: encode result
+ * Return: 0 if encoding frame successfully, otherwise it is failed.
+ */
+int venc_if_encode(struct mtk_vcodec_ctx *ctx,
+		   enum venc_start_opt opt,
+		   struct venc_frm_buf *frm_buf,
+		   struct mtk_vcodec_mem *bs_buf,
+		   struct venc_done_result *result);
+
+#endif /* _VENC_DRV_IF_H_ */
diff --git a/drivers/media/platform/mtk-vcodec/venc_ipi_msg.h b/drivers/media/platform/mtk-vcodec/venc_ipi_msg.h
new file mode 100644
index 000000000000..4c869cb6fbf7
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/venc_ipi_msg.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Jungchang Tsao <jungchang.tsao@mediatek.com>
+ *	   Daniel Hsiao <daniel.hsiao@mediatek.com>
+ *	   Tiffany Lin <tiffany.lin@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _VENC_IPI_MSG_H_
+#define _VENC_IPI_MSG_H_
+
+#define AP_IPIMSG_VENC_BASE 0xC000
+#define VPU_IPIMSG_VENC_BASE 0xD000
+
+/**
+ * enum venc_ipi_msg_id - message id between AP and VPU
+ * (ipi stands for inter-processor interrupt)
+ * @AP_IPIMSG_ENC_XXX:		AP to VPU cmd message id
+ * @VPU_IPIMSG_ENC_XXX_DONE:	VPU ack AP cmd message id
+ */
+enum venc_ipi_msg_id {
+	AP_IPIMSG_ENC_INIT = AP_IPIMSG_VENC_BASE,
+	AP_IPIMSG_ENC_SET_PARAM,
+	AP_IPIMSG_ENC_ENCODE,
+	AP_IPIMSG_ENC_DEINIT,
+
+	VPU_IPIMSG_ENC_INIT_DONE = VPU_IPIMSG_VENC_BASE,
+	VPU_IPIMSG_ENC_SET_PARAM_DONE,
+	VPU_IPIMSG_ENC_ENCODE_DONE,
+	VPU_IPIMSG_ENC_DEINIT_DONE,
+};
+
+/**
+ * struct venc_ap_ipi_msg_init - AP to VPU init cmd structure
+ * @msg_id:	message id (AP_IPIMSG_XXX_ENC_INIT)
+ * @reserved:	reserved for future use. vpu is running in 32bit. Without
+ *		this reserved field, if kernel run in 64bit. this struct size
+ *		will be different between kernel and vpu
+ * @venc_inst:	AP encoder instance
+ *		(struct venc_vp8_inst/venc_h264_inst *)
+ */
+struct venc_ap_ipi_msg_init {
+	uint32_t msg_id;
+	uint32_t reserved;
+	uint64_t venc_inst;
+};
+
+/**
+ * struct venc_ap_ipi_msg_set_param - AP to VPU set_param cmd structure
+ * @msg_id:	message id (AP_IPIMSG_XXX_ENC_SET_PARAM)
+ * @vpu_inst_addr:	VPU encoder instance addr
+ *			(struct venc_vp8_vsi/venc_h264_vsi *)
+ * @param_id:	parameter id (venc_set_param_type)
+ * @data_item:	number of items in the data array
+ * @data[8]:	data array to store the set parameters
+ */
+struct venc_ap_ipi_msg_set_param {
+	uint32_t msg_id;
+	uint32_t vpu_inst_addr;
+	uint32_t param_id;
+	uint32_t data_item;
+	uint32_t data[8];
+};
+
+/**
+ * struct venc_ap_ipi_msg_enc - AP to VPU enc cmd structure
+ * @msg_id:	message id (AP_IPIMSG_XXX_ENC_ENCODE)
+ * @vpu_inst_addr:	VPU encoder instance addr
+ *			(struct venc_vp8_vsi/venc_h264_vsi *)
+ * @bs_mode:	bitstream mode for h264
+ *		(H264_BS_MODE_SPS/H264_BS_MODE_PPS/H264_BS_MODE_FRAME)
+ * @input_addr:	pointer to input image buffer plane
+ * @bs_addr:	pointer to output bit stream buffer
+ * @bs_size:	bit stream buffer size
+ */
+struct venc_ap_ipi_msg_enc {
+	uint32_t msg_id;
+	uint32_t vpu_inst_addr;
+	uint32_t bs_mode;
+	uint32_t input_addr[3];
+	uint32_t bs_addr;
+	uint32_t bs_size;
+};
+
+/**
+ * struct venc_ap_ipi_msg_deinit - AP to VPU deinit cmd structure
+ * @msg_id:	message id (AP_IPIMSG_XXX_ENC_DEINIT)
+ * @vpu_inst_addr:	VPU encoder instance addr
+ *			(struct venc_vp8_vsi/venc_h264_vsi *)
+ */
+struct venc_ap_ipi_msg_deinit {
+	uint32_t msg_id;
+	uint32_t vpu_inst_addr;
+};
+
+/**
+ * enum venc_ipi_msg_status - VPU ack AP cmd status
+ */
+enum venc_ipi_msg_status {
+	VENC_IPI_MSG_STATUS_OK,
+	VENC_IPI_MSG_STATUS_FAIL,
+};
+
+/**
+ * struct venc_vpu_ipi_msg_common - VPU ack AP cmd common structure
+ * @msg_id:	message id (VPU_IPIMSG_XXX_DONE)
+ * @status:	cmd status (venc_ipi_msg_status)
+ * @venc_inst:	AP encoder instance (struct venc_vp8_inst/venc_h264_inst *)
+ */
+struct venc_vpu_ipi_msg_common {
+	uint32_t msg_id;
+	uint32_t status;
+	uint64_t venc_inst;
+};
+
+/**
+ * struct venc_vpu_ipi_msg_init - VPU ack AP init cmd structure
+ * @msg_id:	message id (VPU_IPIMSG_XXX_ENC_SET_PARAM_DONE)
+ * @status:	cmd status (venc_ipi_msg_status)
+ * @venc_inst:	AP encoder instance (struct venc_vp8_inst/venc_h264_inst *)
+ * @vpu_inst_addr:	VPU encoder instance addr
+ *			(struct venc_vp8_vsi/venc_h264_vsi *)
+ * @reserved:	reserved for future use. vpu is running in 32bit. Without
+ *		this reserved field, if kernel run in 64bit. this struct size
+ *		will be different between kernel and vpu
+ */
+struct venc_vpu_ipi_msg_init {
+	uint32_t msg_id;
+	uint32_t status;
+	uint64_t venc_inst;
+	uint32_t vpu_inst_addr;
+	uint32_t reserved;
+};
+
+/**
+ * struct venc_vpu_ipi_msg_set_param - VPU ack AP set_param cmd structure
+ * @msg_id:	message id (VPU_IPIMSG_XXX_ENC_SET_PARAM_DONE)
+ * @status:	cmd status (venc_ipi_msg_status)
+ * @venc_inst:	AP encoder instance (struct venc_vp8_inst/venc_h264_inst *)
+ * @param_id:	parameter id (venc_set_param_type)
+ * @data_item:	number of items in the data array
+ * @data[6]:	data array to store the return result
+ */
+struct venc_vpu_ipi_msg_set_param {
+	uint32_t msg_id;
+	uint32_t status;
+	uint64_t venc_inst;
+	uint32_t param_id;
+	uint32_t data_item;
+	uint32_t data[6];
+};
+
+/**
+ * enum venc_ipi_msg_enc_state - Type of encode state
+ * VEN_IPI_MSG_ENC_STATE_FRAME:	one frame being encoded
+ * VEN_IPI_MSG_ENC_STATE_PART:	bit stream buffer full
+ * VEN_IPI_MSG_ENC_STATE_SKIP:	encoded skip frame
+ * VEN_IPI_MSG_ENC_STATE_ERROR:	encounter error
+ */
+enum venc_ipi_msg_enc_state {
+	VEN_IPI_MSG_ENC_STATE_FRAME,
+	VEN_IPI_MSG_ENC_STATE_PART,
+	VEN_IPI_MSG_ENC_STATE_SKIP,
+	VEN_IPI_MSG_ENC_STATE_ERROR,
+};
+
+/**
+ * struct venc_vpu_ipi_msg_enc - VPU ack AP enc cmd structure
+ * @msg_id:	message id (VPU_IPIMSG_XXX_ENC_ENCODE_DONE)
+ * @status:	cmd status (venc_ipi_msg_status)
+ * @venc_inst:	AP encoder instance (struct venc_vp8_inst/venc_h264_inst *)
+ * @state:	encode state (venc_ipi_msg_enc_state)
+ * @is_key_frm:	whether the encoded frame is key frame
+ * @bs_size:	encoded bitstream size
+ * @reserved:	reserved for future use. vpu is running in 32bit. Without
+ *		this reserved field, if kernel run in 64bit. this struct size
+ *		will be different between kernel and vpu
+ */
+struct venc_vpu_ipi_msg_enc {
+	uint32_t msg_id;
+	uint32_t status;
+	uint64_t venc_inst;
+	uint32_t state;
+	uint32_t is_key_frm;
+	uint32_t bs_size;
+	uint32_t reserved;
+};
+
+/**
+ * struct venc_vpu_ipi_msg_deinit - VPU ack AP deinit cmd structure
+ * @msg_id:   message id (VPU_IPIMSG_XXX_ENC_DEINIT_DONE)
+ * @status:   cmd status (venc_ipi_msg_status)
+ * @venc_inst:	AP encoder instance (struct venc_vp8_inst/venc_h264_inst *)
+ */
+struct venc_vpu_ipi_msg_deinit {
+	uint32_t msg_id;
+	uint32_t status;
+	uint64_t venc_inst;
+};
+
+#endif /* _VENC_IPI_MSG_H_ */
diff --git a/drivers/media/platform/mtk-vcodec/venc_vpu_if.c b/drivers/media/platform/mtk-vcodec/venc_vpu_if.c
new file mode 100644
index 000000000000..a01c7599b510
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/venc_vpu_if.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: PoChun Lin <pochun.lin@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "mtk_vpu.h"
+#include "venc_ipi_msg.h"
+#include "venc_vpu_if.h"
+
+static void handle_enc_init_msg(struct venc_vpu_inst *vpu, void *data)
+{
+	struct venc_vpu_ipi_msg_init *msg = data;
+
+	vpu->inst_addr = msg->vpu_inst_addr;
+	vpu->vsi = vpu_mapping_dm_addr(vpu->dev, msg->vpu_inst_addr);
+}
+
+static void handle_enc_encode_msg(struct venc_vpu_inst *vpu, void *data)
+{
+	struct venc_vpu_ipi_msg_enc *msg = data;
+
+	vpu->state = msg->state;
+	vpu->bs_size = msg->bs_size;
+	vpu->is_key_frm = msg->is_key_frm;
+}
+
+static void vpu_enc_ipi_handler(void *data, unsigned int len, void *priv)
+{
+	struct venc_vpu_ipi_msg_common *msg = data;
+	struct venc_vpu_inst *vpu =
+		(struct venc_vpu_inst *)(unsigned long)msg->venc_inst;
+
+	mtk_vcodec_debug(vpu, "msg_id %x inst %p status %d",
+			 msg->msg_id, vpu, msg->status);
+
+	switch (msg->msg_id) {
+	case VPU_IPIMSG_ENC_INIT_DONE:
+		handle_enc_init_msg(vpu, data);
+		break;
+	case VPU_IPIMSG_ENC_SET_PARAM_DONE:
+		break;
+	case VPU_IPIMSG_ENC_ENCODE_DONE:
+		handle_enc_encode_msg(vpu, data);
+		break;
+	case VPU_IPIMSG_ENC_DEINIT_DONE:
+		break;
+	default:
+		mtk_vcodec_err(vpu, "unknown msg id %x", msg->msg_id);
+		break;
+	}
+
+	vpu->signaled = 1;
+	vpu->failure = (msg->status != VENC_IPI_MSG_STATUS_OK);
+
+	mtk_vcodec_debug_leave(vpu);
+}
+
+static int vpu_enc_send_msg(struct venc_vpu_inst *vpu, void *msg,
+			    int len)
+{
+	int status;
+
+	mtk_vcodec_debug_enter(vpu);
+
+	if (!vpu->dev) {
+		mtk_vcodec_err(vpu, "inst dev is NULL");
+		return -EINVAL;
+	}
+
+	status = vpu_ipi_send(vpu->dev, vpu->id, msg, len);
+	if (status) {
+		uint32_t msg_id = *(uint32_t *)msg;
+
+		mtk_vcodec_err(vpu, "vpu_ipi_send msg_id %x len %d fail %d",
+			       msg_id, len, status);
+		return -EINVAL;
+	}
+	if (vpu->failure)
+		return -EINVAL;
+
+	mtk_vcodec_debug_leave(vpu);
+
+	return 0;
+}
+
+int vpu_enc_init(struct venc_vpu_inst *vpu)
+{
+	int status;
+	struct venc_ap_ipi_msg_init out;
+
+	mtk_vcodec_debug_enter(vpu);
+
+	init_waitqueue_head(&vpu->wq_hd);
+	vpu->signaled = 0;
+	vpu->failure = 0;
+
+	status = vpu_ipi_register(vpu->dev, vpu->id, vpu_enc_ipi_handler,
+				  NULL, NULL);
+	if (status) {
+		mtk_vcodec_err(vpu, "vpu_ipi_register fail %d", status);
+		return -EINVAL;
+	}
+
+	memset(&out, 0, sizeof(out));
+	out.msg_id = AP_IPIMSG_ENC_INIT;
+	out.venc_inst = (unsigned long)vpu;
+	if (vpu_enc_send_msg(vpu, &out, sizeof(out))) {
+		mtk_vcodec_err(vpu, "AP_IPIMSG_ENC_INIT fail");
+		return -EINVAL;
+	}
+
+	mtk_vcodec_debug_leave(vpu);
+
+	return 0;
+}
+
+int vpu_enc_set_param(struct venc_vpu_inst *vpu,
+		      enum venc_set_param_type id,
+		      struct venc_enc_param *enc_param)
+{
+	struct venc_ap_ipi_msg_set_param out;
+
+	mtk_vcodec_debug(vpu, "id %d ->", id);
+
+	memset(&out, 0, sizeof(out));
+	out.msg_id = AP_IPIMSG_ENC_SET_PARAM;
+	out.vpu_inst_addr = vpu->inst_addr;
+	out.param_id = id;
+	switch (id) {
+	case VENC_SET_PARAM_ENC:
+		out.data_item = 0;
+		break;
+	case VENC_SET_PARAM_FORCE_INTRA:
+		out.data_item = 0;
+		break;
+	case VENC_SET_PARAM_ADJUST_BITRATE:
+		out.data_item = 1;
+		out.data[0] = enc_param->bitrate;
+		break;
+	case VENC_SET_PARAM_ADJUST_FRAMERATE:
+		out.data_item = 1;
+		out.data[0] = enc_param->frm_rate;
+		break;
+	case VENC_SET_PARAM_GOP_SIZE:
+		out.data_item = 1;
+		out.data[0] = enc_param->gop_size;
+		break;
+	case VENC_SET_PARAM_INTRA_PERIOD:
+		out.data_item = 1;
+		out.data[0] = enc_param->intra_period;
+		break;
+	case VENC_SET_PARAM_SKIP_FRAME:
+		out.data_item = 0;
+		break;
+	default:
+		mtk_vcodec_err(vpu, "id %d not supported", id);
+		return -EINVAL;
+	}
+	if (vpu_enc_send_msg(vpu, &out, sizeof(out))) {
+		mtk_vcodec_err(vpu,
+			       "AP_IPIMSG_ENC_SET_PARAM %d fail", id);
+		return -EINVAL;
+	}
+
+	mtk_vcodec_debug(vpu, "id %d <-", id);
+
+	return 0;
+}
+
+int vpu_enc_encode(struct venc_vpu_inst *vpu, unsigned int bs_mode,
+		   struct venc_frm_buf *frm_buf,
+		   struct mtk_vcodec_mem *bs_buf,
+		   unsigned int *bs_size)
+{
+	struct venc_ap_ipi_msg_enc out;
+
+	mtk_vcodec_debug(vpu, "bs_mode %d ->", bs_mode);
+
+	memset(&out, 0, sizeof(out));
+	out.msg_id = AP_IPIMSG_ENC_ENCODE;
+	out.vpu_inst_addr = vpu->inst_addr;
+	out.bs_mode = bs_mode;
+	if (frm_buf) {
+		if ((frm_buf->fb_addr[0].dma_addr % 16 == 0) &&
+		    (frm_buf->fb_addr[1].dma_addr % 16 == 0) &&
+		    (frm_buf->fb_addr[2].dma_addr % 16 == 0)) {
+			out.input_addr[0] = frm_buf->fb_addr[0].dma_addr;
+			out.input_addr[1] = frm_buf->fb_addr[1].dma_addr;
+			out.input_addr[2] = frm_buf->fb_addr[2].dma_addr;
+		} else {
+			mtk_vcodec_err(vpu, "dma_addr not align to 16");
+			return -EINVAL;
+		}
+	}
+	if (bs_buf) {
+		out.bs_addr = bs_buf->dma_addr;
+		out.bs_size = bs_buf->size;
+	}
+	if (vpu_enc_send_msg(vpu, &out, sizeof(out))) {
+		mtk_vcodec_err(vpu, "AP_IPIMSG_ENC_ENCODE %d fail",
+			       bs_mode);
+		return -EINVAL;
+	}
+
+	mtk_vcodec_debug(vpu, "bs_mode %d state %d size %d key_frm %d <-",
+			 bs_mode, vpu->state, vpu->bs_size, vpu->is_key_frm);
+
+	return 0;
+}
+
+int vpu_enc_deinit(struct venc_vpu_inst *vpu)
+{
+	struct venc_ap_ipi_msg_deinit out;
+
+	mtk_vcodec_debug_enter(vpu);
+
+	memset(&out, 0, sizeof(out));
+	out.msg_id = AP_IPIMSG_ENC_DEINIT;
+	out.vpu_inst_addr = vpu->inst_addr;
+	if (vpu_enc_send_msg(vpu, &out, sizeof(out))) {
+		mtk_vcodec_err(vpu, "AP_IPIMSG_ENC_DEINIT fail");
+		return -EINVAL;
+	}
+
+	mtk_vcodec_debug_leave(vpu);
+
+	return 0;
+}
diff --git a/drivers/media/platform/mtk-vcodec/venc_vpu_if.h b/drivers/media/platform/mtk-vcodec/venc_vpu_if.h
new file mode 100644
index 000000000000..215d1e01362e
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/venc_vpu_if.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: PoChun Lin <pochun.lin@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _VENC_VPU_IF_H_
+#define _VENC_VPU_IF_H_
+
+#include "mtk_vpu.h"
+#include "venc_drv_if.h"
+
+/*
+ * struct venc_vpu_inst - encoder VPU driver instance
+ * @wq_hd: wait queue used for vpu cmd trigger then wait vpu interrupt done
+ * @signaled: flag used for checking vpu interrupt done
+ * @failure: flag to show vpu cmd succeeds or not
+ * @state: enum venc_ipi_msg_enc_state
+ * @bs_size: bitstream size for skip frame case usage
+ * @is_key_frm: key frame flag
+ * @inst_addr: VPU instance addr
+ * @vsi: driver structure allocated by VPU side and shared to AP side for
+ *	 control and info share
+ * @id: the id of inter-processor interrupt
+ * @ctx: context for v4l2 layer integration
+ * @dev: device for v4l2 layer integration
+ */
+struct venc_vpu_inst {
+	wait_queue_head_t wq_hd;
+	int signaled;
+	int failure;
+	int state;
+	int bs_size;
+	int is_key_frm;
+	unsigned int inst_addr;
+	void *vsi;
+	enum ipi_id id;
+	struct mtk_vcodec_ctx *ctx;
+	struct platform_device *dev;
+};
+
+int vpu_enc_init(struct venc_vpu_inst *vpu);
+int vpu_enc_set_param(struct venc_vpu_inst *vpu,
+		      enum venc_set_param_type id,
+		      struct venc_enc_param *param);
+int vpu_enc_encode(struct venc_vpu_inst *vpu, unsigned int bs_mode,
+		   struct venc_frm_buf *frm_buf,
+		   struct mtk_vcodec_mem *bs_buf,
+		   unsigned int *bs_size);
+int vpu_enc_deinit(struct venc_vpu_inst *vpu);
+
+#endif
diff --git a/drivers/media/platform/mtk-vpu/Makefile b/drivers/media/platform/mtk-vpu/Makefile
new file mode 100644
index 000000000000..58cc1b4bc9f2
--- /dev/null
+++ b/drivers/media/platform/mtk-vpu/Makefile
@@ -0,0 +1,3 @@
+mtk-vpu-y += mtk_vpu.o
+
+obj-$(CONFIG_VIDEO_MEDIATEK_VPU) += mtk-vpu.o
diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c
new file mode 100644
index 000000000000..c9bf58c97878
--- /dev/null
+++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c
@@ -0,0 +1,946 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: Andrew-CT Chen <andrew-ct.chen@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+
+#include "mtk_vpu.h"
+
+/**
+ * VPU (video processor unit) is a tiny processor controlling video hardware
+ * related to video codec, scaling and color format converting.
+ * VPU interfaces with other blocks by share memory and interrupt.
+ **/
+
+#define INIT_TIMEOUT_MS		2000U
+#define IPI_TIMEOUT_MS		2000U
+#define VPU_FW_VER_LEN		16
+
+/* maximum program/data TCM (Tightly-Coupled Memory) size */
+#define VPU_PTCM_SIZE		(96 * SZ_1K)
+#define VPU_DTCM_SIZE		(32 * SZ_1K)
+/* the offset to get data tcm address */
+#define VPU_DTCM_OFFSET		0x18000UL
+/* daynamic allocated maximum extended memory size */
+#define VPU_EXT_P_SIZE		SZ_1M
+#define VPU_EXT_D_SIZE		SZ_4M
+/* maximum binary firmware size */
+#define VPU_P_FW_SIZE		(VPU_PTCM_SIZE + VPU_EXT_P_SIZE)
+#define VPU_D_FW_SIZE		(VPU_DTCM_SIZE + VPU_EXT_D_SIZE)
+/* the size of share buffer between Host and  VPU */
+#define SHARE_BUF_SIZE		48
+
+/* binary firmware name */
+#define VPU_P_FW		"vpu_p.bin"
+#define VPU_D_FW		"vpu_d.bin"
+
+#define VPU_RESET		0x0
+#define VPU_TCM_CFG		0x0008
+#define VPU_PMEM_EXT0_ADDR	0x000C
+#define VPU_PMEM_EXT1_ADDR	0x0010
+#define VPU_TO_HOST		0x001C
+#define VPU_DMEM_EXT0_ADDR	0x0014
+#define VPU_DMEM_EXT1_ADDR	0x0018
+#define HOST_TO_VPU		0x0024
+#define VPU_PC_REG		0x0060
+#define VPU_WDT_REG		0x0084
+
+/* vpu inter-processor communication interrupt */
+#define VPU_IPC_INT		BIT(8)
+
+/**
+ * enum vpu_fw_type - VPU firmware type
+ *
+ * @P_FW: program firmware
+ * @D_FW: data firmware
+ *
+ */
+enum vpu_fw_type {
+	P_FW,
+	D_FW,
+};
+
+/**
+ * struct vpu_mem - VPU extended program/data memory information
+ *
+ * @va:		the kernel virtual memory address of VPU extended memory
+ * @pa:		the physical memory address of VPU extended memory
+ *
+ */
+struct vpu_mem {
+	void *va;
+	dma_addr_t pa;
+};
+
+/**
+ * struct vpu_regs - VPU TCM and configuration registers
+ *
+ * @tcm:	the register for VPU Tightly-Coupled Memory
+ * @cfg:	the register for VPU configuration
+ * @irq:	the irq number for VPU interrupt
+ */
+struct vpu_regs {
+	void __iomem *tcm;
+	void __iomem *cfg;
+	int irq;
+};
+
+/**
+ * struct vpu_wdt_handler - VPU watchdog reset handler
+ *
+ * @reset_func:	reset handler
+ * @priv:	private data
+ */
+struct vpu_wdt_handler {
+	void (*reset_func)(void *);
+	void *priv;
+};
+
+/**
+ * struct vpu_wdt - VPU watchdog workqueue
+ *
+ * @handler:	VPU watchdog reset handler
+ * @ws:		workstruct for VPU watchdog
+ * @wq:		workqueue for VPU watchdog
+ */
+struct vpu_wdt {
+	struct vpu_wdt_handler handler[VPU_RST_MAX];
+	struct work_struct ws;
+	struct workqueue_struct *wq;
+};
+
+/**
+ * struct vpu_run - VPU initialization status
+ *
+ * @signaled:		the signal of vpu initialization completed
+ * @fw_ver:		VPU firmware version
+ * @enc_capability:	encoder capability which is not used for now and
+ *			the value is reserved for future use
+ * @wq:			wait queue for VPU initialization status
+ */
+struct vpu_run {
+	u32 signaled;
+	char fw_ver[VPU_FW_VER_LEN];
+	unsigned int	enc_capability;
+	wait_queue_head_t wq;
+};
+
+/**
+ * struct vpu_ipi_desc - VPU IPI descriptor
+ *
+ * @handler:	IPI handler
+ * @name:	the name of IPI handler
+ * @priv:	the private data of IPI handler
+ */
+struct vpu_ipi_desc {
+	ipi_handler_t handler;
+	const char *name;
+	void *priv;
+};
+
+/**
+ * struct share_obj - DTCM (Data Tightly-Coupled Memory) buffer shared with
+ *		      AP and VPU
+ *
+ * @id:		IPI id
+ * @len:	share buffer length
+ * @share_buf:	share buffer data
+ */
+struct share_obj {
+	s32 id;
+	u32 len;
+	unsigned char share_buf[SHARE_BUF_SIZE];
+};
+
+/**
+ * struct mtk_vpu - vpu driver data
+ * @extmem:		VPU extended memory information
+ * @reg:		VPU TCM and configuration registers
+ * @run:		VPU initialization status
+ * @ipi_desc:		VPU IPI descriptor
+ * @recv_buf:		VPU DTCM share buffer for receiving. The
+ *			receive buffer is only accessed in interrupt context.
+ * @send_buf:		VPU DTCM share buffer for sending
+ * @dev:		VPU struct device
+ * @clk:		VPU clock on/off
+ * @fw_loaded:		indicate VPU firmware loaded
+ * @enable_4GB:		VPU 4GB mode on/off
+ * @vpu_mutex:		protect mtk_vpu (except recv_buf) and ensure only
+ *			one client to use VPU service at a time. For example,
+ *			suppose a client is using VPU to decode VP8.
+ *			If the other client wants to encode VP8,
+ *			it has to wait until VP8 decode completes.
+ * @wdt_refcnt		WDT reference count to make sure the watchdog can be
+ *			disabled if no other client is using VPU service
+ * @ack_wq:		The wait queue for each codec and mdp. When sleeping
+ *			processes wake up, they will check the condition
+ *			"ipi_id_ack" to run the corresponding action or
+ *			go back to sleep.
+ * @ipi_id_ack:		The ACKs for registered IPI function sending
+ *			interrupt to VPU
+ *
+ */
+struct mtk_vpu {
+	struct vpu_mem extmem[2];
+	struct vpu_regs reg;
+	struct vpu_run run;
+	struct vpu_wdt wdt;
+	struct vpu_ipi_desc ipi_desc[IPI_MAX];
+	struct share_obj *recv_buf;
+	struct share_obj *send_buf;
+	struct device *dev;
+	struct clk *clk;
+	bool fw_loaded;
+	bool enable_4GB;
+	struct mutex vpu_mutex; /* for protecting vpu data data structure */
+	u32 wdt_refcnt;
+	wait_queue_head_t ack_wq;
+	bool ipi_id_ack[IPI_MAX];
+};
+
+static inline void vpu_cfg_writel(struct mtk_vpu *vpu, u32 val, u32 offset)
+{
+	writel(val, vpu->reg.cfg + offset);
+}
+
+static inline u32 vpu_cfg_readl(struct mtk_vpu *vpu, u32 offset)
+{
+	return readl(vpu->reg.cfg + offset);
+}
+
+static inline bool vpu_running(struct mtk_vpu *vpu)
+{
+	return vpu_cfg_readl(vpu, VPU_RESET) & BIT(0);
+}
+
+static void vpu_clock_disable(struct mtk_vpu *vpu)
+{
+	/* Disable VPU watchdog */
+	mutex_lock(&vpu->vpu_mutex);
+	if (!--vpu->wdt_refcnt)
+		vpu_cfg_writel(vpu,
+			       vpu_cfg_readl(vpu, VPU_WDT_REG) & ~(1L << 31),
+			       VPU_WDT_REG);
+	mutex_unlock(&vpu->vpu_mutex);
+
+	clk_disable(vpu->clk);
+}
+
+static int vpu_clock_enable(struct mtk_vpu *vpu)
+{
+	int ret;
+
+	ret = clk_enable(vpu->clk);
+	if (ret)
+		return ret;
+	/* Enable VPU watchdog */
+	mutex_lock(&vpu->vpu_mutex);
+	if (!vpu->wdt_refcnt++)
+		vpu_cfg_writel(vpu,
+			       vpu_cfg_readl(vpu, VPU_WDT_REG) | (1L << 31),
+			       VPU_WDT_REG);
+	mutex_unlock(&vpu->vpu_mutex);
+
+	return ret;
+}
+
+int vpu_ipi_register(struct platform_device *pdev,
+		     enum ipi_id id, ipi_handler_t handler,
+		     const char *name, void *priv)
+{
+	struct mtk_vpu *vpu = platform_get_drvdata(pdev);
+	struct vpu_ipi_desc *ipi_desc;
+
+	if (!vpu) {
+		dev_err(&pdev->dev, "vpu device in not ready\n");
+		return -EPROBE_DEFER;
+	}
+
+	if (id >= 0 && id < IPI_MAX && handler) {
+		ipi_desc = vpu->ipi_desc;
+		ipi_desc[id].name = name;
+		ipi_desc[id].handler = handler;
+		ipi_desc[id].priv = priv;
+		return 0;
+	}
+
+	dev_err(&pdev->dev, "register vpu ipi id %d with invalid arguments\n",
+		id);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(vpu_ipi_register);
+
+int vpu_ipi_send(struct platform_device *pdev,
+		 enum ipi_id id, void *buf,
+		 unsigned int len)
+{
+	struct mtk_vpu *vpu = platform_get_drvdata(pdev);
+	struct share_obj *send_obj = vpu->send_buf;
+	unsigned long timeout;
+	int ret = 0;
+
+	if (id <= IPI_VPU_INIT || id >= IPI_MAX ||
+	    len > sizeof(send_obj->share_buf) || !buf) {
+		dev_err(vpu->dev, "failed to send ipi message\n");
+		return -EINVAL;
+	}
+
+	ret = vpu_clock_enable(vpu);
+	if (ret) {
+		dev_err(vpu->dev, "failed to enable vpu clock\n");
+		return ret;
+	}
+	if (!vpu_running(vpu)) {
+		dev_err(vpu->dev, "vpu_ipi_send: VPU is not running\n");
+		ret = -EINVAL;
+		goto clock_disable;
+	}
+
+	mutex_lock(&vpu->vpu_mutex);
+
+	 /* Wait until VPU receives the last command */
+	timeout = jiffies + msecs_to_jiffies(IPI_TIMEOUT_MS);
+	do {
+		if (time_after(jiffies, timeout)) {
+			dev_err(vpu->dev, "vpu_ipi_send: IPI timeout!\n");
+			ret = -EIO;
+			goto mut_unlock;
+		}
+	} while (vpu_cfg_readl(vpu, HOST_TO_VPU));
+
+	memcpy((void *)send_obj->share_buf, buf, len);
+	send_obj->len = len;
+	send_obj->id = id;
+
+	vpu->ipi_id_ack[id] = false;
+	/* send the command to VPU */
+	vpu_cfg_writel(vpu, 0x1, HOST_TO_VPU);
+
+	mutex_unlock(&vpu->vpu_mutex);
+
+	/* wait for VPU's ACK */
+	timeout = msecs_to_jiffies(IPI_TIMEOUT_MS);
+	ret = wait_event_timeout(vpu->ack_wq, vpu->ipi_id_ack[id], timeout);
+	vpu->ipi_id_ack[id] = false;
+	if (ret == 0) {
+		dev_err(vpu->dev, "vpu ipi %d ack time out !", id);
+		ret = -EIO;
+		goto clock_disable;
+	}
+	vpu_clock_disable(vpu);
+
+	return 0;
+
+mut_unlock:
+	mutex_unlock(&vpu->vpu_mutex);
+clock_disable:
+	vpu_clock_disable(vpu);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vpu_ipi_send);
+
+static void vpu_wdt_reset_func(struct work_struct *ws)
+{
+	struct vpu_wdt *wdt = container_of(ws, struct vpu_wdt, ws);
+	struct mtk_vpu *vpu = container_of(wdt, struct mtk_vpu, wdt);
+	struct vpu_wdt_handler *handler = wdt->handler;
+	int index, ret;
+
+	dev_info(vpu->dev, "vpu reset\n");
+	ret = vpu_clock_enable(vpu);
+	if (ret) {
+		dev_err(vpu->dev, "[VPU] wdt enables clock failed %d\n", ret);
+		return;
+	}
+	mutex_lock(&vpu->vpu_mutex);
+	vpu_cfg_writel(vpu, 0x0, VPU_RESET);
+	vpu->fw_loaded = false;
+	mutex_unlock(&vpu->vpu_mutex);
+	vpu_clock_disable(vpu);
+
+	for (index = 0; index < VPU_RST_MAX; index++) {
+		if (handler[index].reset_func) {
+			handler[index].reset_func(handler[index].priv);
+			dev_dbg(vpu->dev, "wdt handler func %d\n", index);
+		}
+	}
+}
+
+int vpu_wdt_reg_handler(struct platform_device *pdev,
+			void wdt_reset(void *),
+			void *priv, enum rst_id id)
+{
+	struct mtk_vpu *vpu = platform_get_drvdata(pdev);
+	struct vpu_wdt_handler *handler;
+
+	if (!vpu) {
+		dev_err(&pdev->dev, "vpu device in not ready\n");
+		return -EPROBE_DEFER;
+	}
+
+	handler = vpu->wdt.handler;
+
+	if (id >= 0 && id < VPU_RST_MAX && wdt_reset) {
+		dev_dbg(vpu->dev, "wdt register id %d\n", id);
+		mutex_lock(&vpu->vpu_mutex);
+		handler[id].reset_func = wdt_reset;
+		handler[id].priv = priv;
+		mutex_unlock(&vpu->vpu_mutex);
+		return 0;
+	}
+
+	dev_err(vpu->dev, "register vpu wdt handler failed\n");
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(vpu_wdt_reg_handler);
+
+unsigned int vpu_get_venc_hw_capa(struct platform_device *pdev)
+{
+	struct mtk_vpu *vpu = platform_get_drvdata(pdev);
+
+	return vpu->run.enc_capability;
+}
+EXPORT_SYMBOL_GPL(vpu_get_venc_hw_capa);
+
+void *vpu_mapping_dm_addr(struct platform_device *pdev,
+			  u32 dtcm_dmem_addr)
+{
+	struct mtk_vpu *vpu = platform_get_drvdata(pdev);
+
+	if (!dtcm_dmem_addr ||
+	    (dtcm_dmem_addr > (VPU_DTCM_SIZE + VPU_EXT_D_SIZE))) {
+		dev_err(vpu->dev, "invalid virtual data memory address\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (dtcm_dmem_addr < VPU_DTCM_SIZE)
+		return (__force void *)(dtcm_dmem_addr + vpu->reg.tcm +
+					VPU_DTCM_OFFSET);
+
+	return vpu->extmem[D_FW].va + (dtcm_dmem_addr - VPU_DTCM_SIZE);
+}
+EXPORT_SYMBOL_GPL(vpu_mapping_dm_addr);
+
+struct platform_device *vpu_get_plat_device(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *vpu_node;
+	struct platform_device *vpu_pdev;
+
+	vpu_node = of_parse_phandle(dev->of_node, "mediatek,vpu", 0);
+	if (!vpu_node) {
+		dev_err(dev, "can't get vpu node\n");
+		return NULL;
+	}
+
+	vpu_pdev = of_find_device_by_node(vpu_node);
+	if (WARN_ON(!vpu_pdev)) {
+		dev_err(dev, "vpu pdev failed\n");
+		of_node_put(vpu_node);
+		return NULL;
+	}
+
+	return vpu_pdev;
+}
+EXPORT_SYMBOL_GPL(vpu_get_plat_device);
+
+/* load vpu program/data memory */
+static int load_requested_vpu(struct mtk_vpu *vpu,
+			      const struct firmware *vpu_fw,
+			      u8 fw_type)
+{
+	size_t tcm_size = fw_type ? VPU_DTCM_SIZE : VPU_PTCM_SIZE;
+	size_t fw_size = fw_type ? VPU_D_FW_SIZE : VPU_P_FW_SIZE;
+	char *fw_name = fw_type ? VPU_D_FW : VPU_P_FW;
+	size_t dl_size = 0;
+	size_t extra_fw_size = 0;
+	void *dest;
+	int ret;
+
+	ret = request_firmware(&vpu_fw, fw_name, vpu->dev);
+	if (ret < 0) {
+		dev_err(vpu->dev, "Failed to load %s, %d\n", fw_name, ret);
+		return ret;
+	}
+	dl_size = vpu_fw->size;
+	if (dl_size > fw_size) {
+		dev_err(vpu->dev, "fw %s size %zu is abnormal\n", fw_name,
+			dl_size);
+		release_firmware(vpu_fw);
+		return  -EFBIG;
+	}
+	dev_dbg(vpu->dev, "Downloaded fw %s size: %zu.\n",
+		fw_name,
+		dl_size);
+	/* reset VPU */
+	vpu_cfg_writel(vpu, 0x0, VPU_RESET);
+
+	/* handle extended firmware size */
+	if (dl_size > tcm_size) {
+		dev_dbg(vpu->dev, "fw size %zu > limited fw size %zu\n",
+			dl_size, tcm_size);
+		extra_fw_size = dl_size - tcm_size;
+		dev_dbg(vpu->dev, "extra_fw_size %zu\n", extra_fw_size);
+		dl_size = tcm_size;
+	}
+	dest = (__force void *)vpu->reg.tcm;
+	if (fw_type == D_FW)
+		dest += VPU_DTCM_OFFSET;
+	memcpy(dest, vpu_fw->data, dl_size);
+	/* download to extended memory if need */
+	if (extra_fw_size > 0) {
+		dest = vpu->extmem[fw_type].va;
+		dev_dbg(vpu->dev, "download extended memory type %x\n",
+			fw_type);
+		memcpy(dest, vpu_fw->data + tcm_size, extra_fw_size);
+	}
+
+	release_firmware(vpu_fw);
+
+	return 0;
+}
+
+int vpu_load_firmware(struct platform_device *pdev)
+{
+	struct mtk_vpu *vpu = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+	struct vpu_run *run = &vpu->run;
+	const struct firmware *vpu_fw = NULL;
+	int ret;
+
+	if (!pdev) {
+		dev_err(dev, "VPU platform device is invalid\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&vpu->vpu_mutex);
+	if (vpu->fw_loaded) {
+		mutex_unlock(&vpu->vpu_mutex);
+		return 0;
+	}
+	mutex_unlock(&vpu->vpu_mutex);
+
+	ret = vpu_clock_enable(vpu);
+	if (ret) {
+		dev_err(dev, "enable clock failed %d\n", ret);
+		return ret;
+	}
+
+	mutex_lock(&vpu->vpu_mutex);
+
+	run->signaled = false;
+	dev_dbg(vpu->dev, "firmware request\n");
+	/* Downloading program firmware to device*/
+	ret = load_requested_vpu(vpu, vpu_fw, P_FW);
+	if (ret < 0) {
+		dev_err(dev, "Failed to request %s, %d\n", VPU_P_FW, ret);
+		goto OUT_LOAD_FW;
+	}
+
+	/* Downloading data firmware to device */
+	ret = load_requested_vpu(vpu, vpu_fw, D_FW);
+	if (ret < 0) {
+		dev_err(dev, "Failed to request %s, %d\n", VPU_D_FW, ret);
+		goto OUT_LOAD_FW;
+	}
+
+	vpu->fw_loaded = true;
+	/* boot up vpu */
+	vpu_cfg_writel(vpu, 0x1, VPU_RESET);
+
+	ret = wait_event_interruptible_timeout(run->wq,
+					       run->signaled,
+					       msecs_to_jiffies(INIT_TIMEOUT_MS)
+					       );
+	if (ret == 0) {
+		ret = -ETIME;
+		dev_err(dev, "wait vpu initialization timout!\n");
+		goto OUT_LOAD_FW;
+	} else if (-ERESTARTSYS == ret) {
+		dev_err(dev, "wait vpu interrupted by a signal!\n");
+		goto OUT_LOAD_FW;
+	}
+
+	ret = 0;
+	dev_info(dev, "vpu is ready. Fw version %s\n", run->fw_ver);
+
+OUT_LOAD_FW:
+	mutex_unlock(&vpu->vpu_mutex);
+	vpu_clock_disable(vpu);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vpu_load_firmware);
+
+static void vpu_init_ipi_handler(void *data, unsigned int len, void *priv)
+{
+	struct mtk_vpu *vpu = (struct mtk_vpu *)priv;
+	struct vpu_run *run = (struct vpu_run *)data;
+
+	vpu->run.signaled = run->signaled;
+	strncpy(vpu->run.fw_ver, run->fw_ver, VPU_FW_VER_LEN);
+	vpu->run.enc_capability = run->enc_capability;
+	wake_up_interruptible(&vpu->run.wq);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static ssize_t vpu_debug_read(struct file *file, char __user *user_buf,
+			      size_t count, loff_t *ppos)
+{
+	char buf[256];
+	unsigned int len;
+	unsigned int running, pc, vpu_to_host, host_to_vpu, wdt;
+	int ret;
+	struct device *dev = file->private_data;
+	struct mtk_vpu *vpu = dev_get_drvdata(dev);
+
+	ret = vpu_clock_enable(vpu);
+	if (ret) {
+		dev_err(vpu->dev, "[VPU] enable clock failed %d\n", ret);
+		return 0;
+	}
+
+	/* vpu register status */
+	running = vpu_running(vpu);
+	pc = vpu_cfg_readl(vpu, VPU_PC_REG);
+	wdt = vpu_cfg_readl(vpu, VPU_WDT_REG);
+	host_to_vpu = vpu_cfg_readl(vpu, HOST_TO_VPU);
+	vpu_to_host = vpu_cfg_readl(vpu, VPU_TO_HOST);
+	vpu_clock_disable(vpu);
+
+	if (running) {
+		len = snprintf(buf, sizeof(buf), "VPU is running\n\n"
+		"FW Version: %s\n"
+		"PC: 0x%x\n"
+		"WDT: 0x%x\n"
+		"Host to VPU: 0x%x\n"
+		"VPU to Host: 0x%x\n",
+		vpu->run.fw_ver, pc, wdt,
+		host_to_vpu, vpu_to_host);
+	} else {
+		len = snprintf(buf, sizeof(buf), "VPU not running\n");
+	}
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static const struct file_operations vpu_debug_fops = {
+	.open = simple_open,
+	.read = vpu_debug_read,
+};
+#endif /* CONFIG_DEBUG_FS */
+
+static void vpu_free_ext_mem(struct mtk_vpu *vpu, u8 fw_type)
+{
+	struct device *dev = vpu->dev;
+	size_t fw_ext_size = fw_type ? VPU_EXT_D_SIZE : VPU_EXT_P_SIZE;
+
+	dma_free_coherent(dev, fw_ext_size, vpu->extmem[fw_type].va,
+			  vpu->extmem[fw_type].pa);
+}
+
+static int vpu_alloc_ext_mem(struct mtk_vpu *vpu, u32 fw_type)
+{
+	struct device *dev = vpu->dev;
+	size_t fw_ext_size = fw_type ? VPU_EXT_D_SIZE : VPU_EXT_P_SIZE;
+	u32 vpu_ext_mem0 = fw_type ? VPU_DMEM_EXT0_ADDR : VPU_PMEM_EXT0_ADDR;
+	u32 vpu_ext_mem1 = fw_type ? VPU_DMEM_EXT1_ADDR : VPU_PMEM_EXT1_ADDR;
+	u32 offset_4gb = vpu->enable_4GB ? 0x40000000 : 0;
+
+	vpu->extmem[fw_type].va = dma_alloc_coherent(dev,
+					       fw_ext_size,
+					       &vpu->extmem[fw_type].pa,
+					       GFP_KERNEL);
+	if (!vpu->extmem[fw_type].va) {
+		dev_err(dev, "Failed to allocate the extended program memory\n");
+		return PTR_ERR(vpu->extmem[fw_type].va);
+	}
+
+	/* Disable extend0. Enable extend1 */
+	vpu_cfg_writel(vpu, 0x1, vpu_ext_mem0);
+	vpu_cfg_writel(vpu, (vpu->extmem[fw_type].pa & 0xFFFFF000) + offset_4gb,
+		       vpu_ext_mem1);
+
+	dev_info(dev, "%s extend memory phy=0x%llx virt=0x%p\n",
+		 fw_type ? "Data" : "Program",
+		 (unsigned long long)vpu->extmem[fw_type].pa,
+		 vpu->extmem[fw_type].va);
+
+	return 0;
+}
+
+static void vpu_ipi_handler(struct mtk_vpu *vpu)
+{
+	struct share_obj *rcv_obj = vpu->recv_buf;
+	struct vpu_ipi_desc *ipi_desc = vpu->ipi_desc;
+
+	if (rcv_obj->id < IPI_MAX && ipi_desc[rcv_obj->id].handler) {
+		ipi_desc[rcv_obj->id].handler(rcv_obj->share_buf,
+					      rcv_obj->len,
+					      ipi_desc[rcv_obj->id].priv);
+		if (rcv_obj->id > IPI_VPU_INIT) {
+			vpu->ipi_id_ack[rcv_obj->id] = true;
+			wake_up(&vpu->ack_wq);
+		}
+	} else {
+		dev_err(vpu->dev, "No such ipi id = %d\n", rcv_obj->id);
+	}
+}
+
+static int vpu_ipi_init(struct mtk_vpu *vpu)
+{
+	/* Disable VPU to host interrupt */
+	vpu_cfg_writel(vpu, 0x0, VPU_TO_HOST);
+
+	/* shared buffer initialization */
+	vpu->recv_buf = (__force struct share_obj *)(vpu->reg.tcm +
+						     VPU_DTCM_OFFSET);
+	vpu->send_buf = vpu->recv_buf + 1;
+	memset(vpu->recv_buf, 0, sizeof(struct share_obj));
+	memset(vpu->send_buf, 0, sizeof(struct share_obj));
+
+	return 0;
+}
+
+static irqreturn_t vpu_irq_handler(int irq, void *priv)
+{
+	struct mtk_vpu *vpu = priv;
+	u32 vpu_to_host;
+	int ret;
+
+	/*
+	 * Clock should have been enabled already.
+	 * Enable again in case vpu_ipi_send times out
+	 * and has disabled the clock.
+	 */
+	ret = clk_enable(vpu->clk);
+	if (ret) {
+		dev_err(vpu->dev, "[VPU] enable clock failed %d\n", ret);
+		return IRQ_NONE;
+	}
+	vpu_to_host = vpu_cfg_readl(vpu, VPU_TO_HOST);
+	if (vpu_to_host & VPU_IPC_INT) {
+		vpu_ipi_handler(vpu);
+	} else {
+		dev_err(vpu->dev, "vpu watchdog timeout! 0x%x", vpu_to_host);
+		queue_work(vpu->wdt.wq, &vpu->wdt.ws);
+	}
+
+	/* VPU won't send another interrupt until we set VPU_TO_HOST to 0. */
+	vpu_cfg_writel(vpu, 0x0, VPU_TO_HOST);
+	clk_disable(vpu->clk);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *vpu_debugfs;
+#endif
+static int mtk_vpu_probe(struct platform_device *pdev)
+{
+	struct mtk_vpu *vpu;
+	struct device *dev;
+	struct resource *res;
+	int ret = 0;
+
+	dev_dbg(&pdev->dev, "initialization\n");
+
+	dev = &pdev->dev;
+	vpu = devm_kzalloc(dev, sizeof(*vpu), GFP_KERNEL);
+	if (!vpu)
+		return -ENOMEM;
+
+	vpu->dev = &pdev->dev;
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "tcm");
+	vpu->reg.tcm = devm_ioremap_resource(dev, res);
+	if (IS_ERR((__force void *)vpu->reg.tcm))
+		return PTR_ERR((__force void *)vpu->reg.tcm);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg_reg");
+	vpu->reg.cfg = devm_ioremap_resource(dev, res);
+	if (IS_ERR((__force void *)vpu->reg.cfg))
+		return PTR_ERR((__force void *)vpu->reg.cfg);
+
+	/* Get VPU clock */
+	vpu->clk = devm_clk_get(dev, "main");
+	if (IS_ERR(vpu->clk)) {
+		dev_err(dev, "get vpu clock failed\n");
+		return PTR_ERR(vpu->clk);
+	}
+
+	platform_set_drvdata(pdev, vpu);
+
+	ret = clk_prepare(vpu->clk);
+	if (ret) {
+		dev_err(dev, "prepare vpu clock failed\n");
+		return ret;
+	}
+
+	/* VPU watchdog */
+	vpu->wdt.wq = create_singlethread_workqueue("vpu_wdt");
+	if (!vpu->wdt.wq) {
+		dev_err(dev, "initialize wdt workqueue failed\n");
+		return -ENOMEM;
+	}
+	INIT_WORK(&vpu->wdt.ws, vpu_wdt_reset_func);
+	mutex_init(&vpu->vpu_mutex);
+
+	ret = vpu_clock_enable(vpu);
+	if (ret) {
+		dev_err(dev, "enable vpu clock failed\n");
+		goto workqueue_destroy;
+	}
+
+	dev_dbg(dev, "vpu ipi init\n");
+	ret = vpu_ipi_init(vpu);
+	if (ret) {
+		dev_err(dev, "Failed to init ipi\n");
+		goto disable_vpu_clk;
+	}
+
+	/* register vpu initialization IPI */
+	ret = vpu_ipi_register(pdev, IPI_VPU_INIT, vpu_init_ipi_handler,
+			       "vpu_init", vpu);
+	if (ret) {
+		dev_err(dev, "Failed to register IPI_VPU_INIT\n");
+		goto vpu_mutex_destroy;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	vpu_debugfs = debugfs_create_file("mtk_vpu", S_IRUGO, NULL, (void *)dev,
+					  &vpu_debug_fops);
+	if (!vpu_debugfs) {
+		ret = -ENOMEM;
+		goto cleanup_ipi;
+	}
+#endif
+
+	/* Set PTCM to 96K and DTCM to 32K */
+	vpu_cfg_writel(vpu, 0x2, VPU_TCM_CFG);
+
+	vpu->enable_4GB = !!(totalram_pages > (SZ_2G >> PAGE_SHIFT));
+	dev_info(dev, "4GB mode %u\n", vpu->enable_4GB);
+
+	if (vpu->enable_4GB) {
+		ret = of_reserved_mem_device_init(dev);
+		if (ret)
+			dev_info(dev, "init reserved memory failed\n");
+			/* continue to use dynamic allocation if failed */
+	}
+
+	ret = vpu_alloc_ext_mem(vpu, D_FW);
+	if (ret) {
+		dev_err(dev, "Allocate DM failed\n");
+		goto remove_debugfs;
+	}
+
+	ret = vpu_alloc_ext_mem(vpu, P_FW);
+	if (ret) {
+		dev_err(dev, "Allocate PM failed\n");
+		goto free_d_mem;
+	}
+
+	init_waitqueue_head(&vpu->run.wq);
+	init_waitqueue_head(&vpu->ack_wq);
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "get IRQ resource failed.\n");
+		ret = -ENXIO;
+		goto free_p_mem;
+	}
+	vpu->reg.irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(dev, vpu->reg.irq, vpu_irq_handler, 0,
+			       pdev->name, vpu);
+	if (ret) {
+		dev_err(dev, "failed to request irq\n");
+		goto free_p_mem;
+	}
+
+	vpu_clock_disable(vpu);
+	dev_dbg(dev, "initialization completed\n");
+
+	return 0;
+
+free_p_mem:
+	vpu_free_ext_mem(vpu, P_FW);
+free_d_mem:
+	vpu_free_ext_mem(vpu, D_FW);
+remove_debugfs:
+	of_reserved_mem_device_release(dev);
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove(vpu_debugfs);
+cleanup_ipi:
+#endif
+	memset(vpu->ipi_desc, 0, sizeof(struct vpu_ipi_desc) * IPI_MAX);
+vpu_mutex_destroy:
+	mutex_destroy(&vpu->vpu_mutex);
+disable_vpu_clk:
+	vpu_clock_disable(vpu);
+workqueue_destroy:
+	destroy_workqueue(vpu->wdt.wq);
+
+	return ret;
+}
+
+static const struct of_device_id mtk_vpu_match[] = {
+	{
+		.compatible = "mediatek,mt8173-vpu",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, mtk_vpu_match);
+
+static int mtk_vpu_remove(struct platform_device *pdev)
+{
+	struct mtk_vpu *vpu = platform_get_drvdata(pdev);
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove(vpu_debugfs);
+#endif
+	if (vpu->wdt.wq) {
+		flush_workqueue(vpu->wdt.wq);
+		destroy_workqueue(vpu->wdt.wq);
+	}
+	vpu_free_ext_mem(vpu, P_FW);
+	vpu_free_ext_mem(vpu, D_FW);
+	mutex_destroy(&vpu->vpu_mutex);
+	clk_unprepare(vpu->clk);
+
+	return 0;
+}
+
+static struct platform_driver mtk_vpu_driver = {
+	.probe	= mtk_vpu_probe,
+	.remove	= mtk_vpu_remove,
+	.driver	= {
+		.name	= "mtk_vpu",
+		.of_match_table = mtk_vpu_match,
+	},
+};
+
+module_platform_driver(mtk_vpu_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Mediatek Video Prosessor Unit driver");
diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.h b/drivers/media/platform/mtk-vpu/mtk_vpu.h
new file mode 100644
index 000000000000..5ab37f04bdfd
--- /dev/null
+++ b/drivers/media/platform/mtk-vpu/mtk_vpu.h
@@ -0,0 +1,162 @@
+/*
+* Copyright (c) 2016 MediaTek Inc.
+* Author: Andrew-CT Chen <andrew-ct.chen@mediatek.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef _MTK_VPU_H
+#define _MTK_VPU_H
+
+#include <linux/platform_device.h>
+
+/**
+ * VPU (video processor unit) is a tiny processor controlling video hardware
+ * related to video codec, scaling and color format converting.
+ * VPU interfaces with other blocks by share memory and interrupt.
+ **/
+
+typedef void (*ipi_handler_t) (void *data,
+			       unsigned int len,
+			       void *priv);
+
+/**
+ * enum ipi_id - the id of inter-processor interrupt
+ *
+ * @IPI_VPU_INIT:	 The interrupt from vpu is to notfiy kernel
+			 VPU initialization completed.
+			 IPI_VPU_INIT is sent from VPU when firmware is
+			 loaded. AP doesn't need to send IPI_VPU_INIT
+			 command to VPU.
+			 For other IPI below, AP should send the request
+			 to VPU to trigger the interrupt.
+ * @IPI_VENC_H264:	 The interrupt from vpu is to notify kernel to
+			 handle H264 video encoder job, and vice versa.
+ * @IPI_VENC_VP8:	 The interrupt fro vpu is to notify kernel to
+			 handle VP8 video encoder job,, and vice versa.
+ * @IPI_MAX:		 The maximum IPI number
+ */
+
+enum ipi_id {
+	IPI_VPU_INIT = 0,
+	IPI_VENC_H264,
+	IPI_VENC_VP8,
+	IPI_MAX,
+};
+
+/**
+ * enum rst_id - reset id to register reset function for VPU watchdog timeout
+ *
+ * @VPU_RST_ENC: encoder reset id
+ * @VPU_RST_MAX: maximum reset id
+ */
+enum rst_id {
+	VPU_RST_ENC,
+	VPU_RST_MAX,
+};
+
+/**
+ * vpu_ipi_register - register an ipi function
+ *
+ * @pdev:	VPU platform device
+ * @id:		IPI ID
+ * @handler:	IPI handler
+ * @name:	IPI name
+ * @priv:	private data for IPI handler
+ *
+ * Register an ipi function to receive ipi interrupt from VPU.
+ *
+ * Return: Return 0 if ipi registers successfully, otherwise it is failed.
+ */
+int vpu_ipi_register(struct platform_device *pdev, enum ipi_id id,
+		     ipi_handler_t handler, const char *name, void *priv);
+
+/**
+ * vpu_ipi_send - send data from AP to vpu.
+ *
+ * @pdev:	VPU platform device
+ * @id:		IPI ID
+ * @buf:	the data buffer
+ * @len:	the data buffer length
+ *
+ * This function is thread-safe. When this function returns,
+ * VPU has received the data and starts the processing.
+ * When the processing completes, IPI handler registered
+ * by vpu_ipi_register will be called in interrupt context.
+ *
+ * Return: Return 0 if sending data successfully, otherwise it is failed.
+ **/
+int vpu_ipi_send(struct platform_device *pdev,
+		 enum ipi_id id, void *buf,
+		 unsigned int len);
+
+/**
+ * vpu_get_plat_device - get VPU's platform device
+ *
+ * @pdev:	the platform device of the module requesting VPU platform
+ *		device for using VPU API.
+ *
+ * Return: Return NULL if it is failed.
+ * otherwise it is VPU's platform device
+ **/
+struct platform_device *vpu_get_plat_device(struct platform_device *pdev);
+
+/**
+ * vpu_wdt_reg_handler - register a VPU watchdog handler
+ *
+ * @pdev:               VPU platform device
+ * @vpu_wdt_reset_func:	the callback reset function
+ * @private_data:       the private data for reset function
+ * @rst_id:		reset id
+ *
+ * Register a handler performing own tasks when vpu reset by watchdog
+ *
+ * Return: Return 0 if the handler is added successfully,
+ * otherwise it is failed.
+ *
+ **/
+int vpu_wdt_reg_handler(struct platform_device *pdev,
+			void vpu_wdt_reset_func(void *),
+			void *priv, enum rst_id id);
+/**
+ * vpu_get_venc_hw_capa - get video encoder hardware capability
+ *
+ * @pdev:	VPU platform device
+ *
+ * Return: video encoder hardware capability
+ **/
+unsigned int vpu_get_venc_hw_capa(struct platform_device *pdev);
+
+/**
+ * vpu_load_firmware - download VPU firmware and boot it
+ *
+ * @pdev:	VPU platform device
+ *
+ * Return: Return 0 if downloading firmware successfully,
+ * otherwise it is failed
+ **/
+int vpu_load_firmware(struct platform_device *pdev);
+
+/**
+ * vpu_mapping_dm_addr - Mapping DTCM/DMEM to kernel virtual address
+ *
+ * @pdev:	VPU platform device
+ * @dmem_addr:	VPU's data memory address
+ *
+ * Mapping the VPU's DTCM (Data Tightly-Coupled Memory) /
+ * DMEM (Data Extended Memory) memory address to
+ * kernel virtual address.
+ *
+ * Return: Return ERR_PTR(-EINVAL) if mapping failed,
+ * otherwise the mapped kernel virtual address
+ **/
+void *vpu_mapping_dm_addr(struct platform_device *pdev,
+			  u32 dtcm_dmem_addr);
+#endif /* _MTK_VPU_H */
diff --git a/drivers/media/platform/mx2_emmaprp.c b/drivers/media/platform/mx2_emmaprp.c
index 3c4012d42d69..c639406fe72e 100644
--- a/drivers/media/platform/mx2_emmaprp.c
+++ b/drivers/media/platform/mx2_emmaprp.c
@@ -211,7 +211,6 @@ struct emmaprp_dev {
 	struct clk		*clk_emma_ahb, *clk_emma_ipg;
 
 	struct v4l2_m2m_dev	*m2m_dev;
-	struct vb2_alloc_ctx	*alloc_ctx;
 };
 
 struct emmaprp_ctx {
@@ -690,7 +689,7 @@ static const struct v4l2_ioctl_ops emmaprp_ioctl_ops = {
  */
 static int emmaprp_queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct emmaprp_ctx *ctx = vb2_get_drv_priv(vq);
 	struct emmaprp_q_data *q_data;
@@ -710,8 +709,6 @@ static int emmaprp_queue_setup(struct vb2_queue *vq,
 	*nbuffers = count;
 	sizes[0] = size;
 
-	alloc_ctxs[0] = ctx->dev->alloc_ctx;
-
 	dprintk(ctx->dev, "get %d buffer(s) of size %d each.\n", count, size);
 
 	return 0;
@@ -765,6 +762,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->ops = &emmaprp_qops;
 	src_vq->mem_ops = &vb2_dma_contig_memops;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	src_vq->dev = ctx->dev->v4l2_dev.dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret)
@@ -777,6 +775,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->ops = &emmaprp_qops;
 	dst_vq->mem_ops = &vb2_dma_contig_memops;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	dst_vq->dev = ctx->dev->v4l2_dev.dev;
 
 	return vb2_queue_init(dst_vq);
 }
@@ -948,18 +947,11 @@ static int emmaprp_probe(struct platform_device *pdev)
 	if (ret)
 		goto rel_vdev;
 
-	pcdev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(pcdev->alloc_ctx)) {
-		v4l2_err(&pcdev->v4l2_dev, "Failed to alloc vb2 context\n");
-		ret = PTR_ERR(pcdev->alloc_ctx);
-		goto rel_vdev;
-	}
-
 	pcdev->m2m_dev = v4l2_m2m_init(&m2m_ops);
 	if (IS_ERR(pcdev->m2m_dev)) {
 		v4l2_err(&pcdev->v4l2_dev, "Failed to init mem2mem device\n");
 		ret = PTR_ERR(pcdev->m2m_dev);
-		goto rel_ctx;
+		goto rel_vdev;
 	}
 
 	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
@@ -973,8 +965,6 @@ static int emmaprp_probe(struct platform_device *pdev)
 
 rel_m2m:
 	v4l2_m2m_release(pcdev->m2m_dev);
-rel_ctx:
-	vb2_dma_contig_cleanup_ctx(pcdev->alloc_ctx);
 rel_vdev:
 	video_device_release(vfd);
 unreg_dev:
@@ -993,7 +983,6 @@ static int emmaprp_remove(struct platform_device *pdev)
 
 	video_unregister_device(pcdev->vfd);
 	v4l2_m2m_release(pcdev->m2m_dev);
-	vb2_dma_contig_cleanup_ctx(pcdev->alloc_ctx);
 	v4l2_device_unregister(&pcdev->v4l2_dev);
 	mutex_destroy(&pcdev->dev_mutex);
 
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index 70c28d19ea04..4afc999c0780 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -1318,71 +1318,16 @@ s_crop_err:
 	return ret;
 }
 
-static int vidioc_queryctrl(struct file *file, void *fh,
-		struct v4l2_queryctrl *ctrl)
+static int omap_vout_s_ctrl(struct v4l2_ctrl *ctrl)
 {
+	struct omap_vout_device *vout =
+		container_of(ctrl->handler, struct omap_vout_device, ctrl_handler);
 	int ret = 0;
 
 	switch (ctrl->id) {
-	case V4L2_CID_ROTATE:
-		ret = v4l2_ctrl_query_fill(ctrl, 0, 270, 90, 0);
-		break;
-	case V4L2_CID_BG_COLOR:
-		ret = v4l2_ctrl_query_fill(ctrl, 0, 0xFFFFFF, 1, 0);
-		break;
-	case V4L2_CID_VFLIP:
-		ret = v4l2_ctrl_query_fill(ctrl, 0, 1, 1, 0);
-		break;
-	default:
-		ctrl->name[0] = '\0';
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *ctrl)
-{
-	int ret = 0;
-	struct omap_vout_device *vout = fh;
-
-	switch (ctrl->id) {
-	case V4L2_CID_ROTATE:
-		ctrl->value = vout->control[0].value;
-		break;
-	case V4L2_CID_BG_COLOR:
-	{
-		struct omap_overlay_manager_info info;
-		struct omap_overlay *ovl;
-
-		ovl = vout->vid_info.overlays[0];
-		if (!ovl->manager || !ovl->manager->get_manager_info) {
-			ret = -EINVAL;
-			break;
-		}
-
-		ovl->manager->get_manager_info(ovl->manager, &info);
-		ctrl->value = info.default_color;
-		break;
-	}
-	case V4L2_CID_VFLIP:
-		ctrl->value = vout->control[2].value;
-		break;
-	default:
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *a)
-{
-	int ret = 0;
-	struct omap_vout_device *vout = fh;
-
-	switch (a->id) {
-	case V4L2_CID_ROTATE:
-	{
+	case V4L2_CID_ROTATE: {
 		struct omapvideo_info *ovid;
-		int rotation = a->value;
+		int rotation = ctrl->val;
 
 		ovid = &vout->vid_info;
 
@@ -1405,15 +1350,13 @@ static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *a)
 			ret = -EINVAL;
 			break;
 		}
-
-		vout->control[0].value = rotation;
 		mutex_unlock(&vout->lock);
 		break;
 	}
 	case V4L2_CID_BG_COLOR:
 	{
 		struct omap_overlay *ovl;
-		unsigned int  color = a->value;
+		unsigned int color = ctrl->val;
 		struct omap_overlay_manager_info info;
 
 		ovl = vout->vid_info.overlays[0];
@@ -1432,15 +1375,13 @@ static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *a)
 			ret = -EINVAL;
 			break;
 		}
-
-		vout->control[1].value = color;
 		mutex_unlock(&vout->lock);
 		break;
 	}
 	case V4L2_CID_VFLIP:
 	{
 		struct omapvideo_info *ovid;
-		unsigned int  mirror = a->value;
+		unsigned int mirror = ctrl->val;
 
 		ovid = &vout->vid_info;
 
@@ -1457,16 +1398,19 @@ static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *a)
 			break;
 		}
 		vout->mirror = mirror;
-		vout->control[2].value = mirror;
 		mutex_unlock(&vout->lock);
 		break;
 	}
 	default:
-		ret = -EINVAL;
+		return -EINVAL;
 	}
 	return ret;
 }
 
+static const struct v4l2_ctrl_ops omap_vout_ctrl_ops = {
+	.s_ctrl = omap_vout_s_ctrl,
+};
+
 static int vidioc_reqbufs(struct file *file, void *fh,
 			struct v4l2_requestbuffers *req)
 {
@@ -1831,11 +1775,8 @@ static const struct v4l2_ioctl_ops vout_ioctl_ops = {
 	.vidioc_g_fmt_vid_out			= vidioc_g_fmt_vid_out,
 	.vidioc_try_fmt_vid_out			= vidioc_try_fmt_vid_out,
 	.vidioc_s_fmt_vid_out			= vidioc_s_fmt_vid_out,
-	.vidioc_queryctrl    			= vidioc_queryctrl,
-	.vidioc_g_ctrl       			= vidioc_g_ctrl,
 	.vidioc_s_fbuf				= vidioc_s_fbuf,
 	.vidioc_g_fbuf				= vidioc_g_fbuf,
-	.vidioc_s_ctrl       			= vidioc_s_ctrl,
 	.vidioc_try_fmt_vid_out_overlay		= vidioc_try_fmt_vid_overlay,
 	.vidioc_s_fmt_vid_out_overlay		= vidioc_s_fmt_vid_overlay,
 	.vidioc_g_fmt_vid_out_overlay		= vidioc_g_fmt_vid_overlay,
@@ -1865,9 +1806,9 @@ static int __init omap_vout_setup_video_data(struct omap_vout_device *vout)
 {
 	struct video_device *vfd;
 	struct v4l2_pix_format *pix;
-	struct v4l2_control *control;
 	struct omap_overlay *ovl = vout->vid_info.overlays[0];
 	struct omap_dss_device *display = ovl->get_device(ovl);
+	struct v4l2_ctrl_handler *hdl;
 
 	/* set the default pix */
 	pix = &vout->pix;
@@ -1896,29 +1837,32 @@ static int __init omap_vout_setup_video_data(struct omap_vout_device *vout)
 
 	omap_vout_new_format(pix, &vout->fbuf, &vout->crop, &vout->win);
 
-	/*Initialize the control variables for
-	  rotation, flipping and background color. */
-	control = vout->control;
-	control[0].id = V4L2_CID_ROTATE;
-	control[0].value = 0;
+	hdl = &vout->ctrl_handler;
+	v4l2_ctrl_handler_init(hdl, 3);
+	v4l2_ctrl_new_std(hdl, &omap_vout_ctrl_ops,
+			  V4L2_CID_ROTATE, 0, 270, 90, 0);
+	v4l2_ctrl_new_std(hdl, &omap_vout_ctrl_ops,
+			  V4L2_CID_BG_COLOR, 0, 0xffffff, 1, 0);
+	v4l2_ctrl_new_std(hdl, &omap_vout_ctrl_ops,
+			  V4L2_CID_VFLIP, 0, 1, 1, 0);
+	if (hdl->error)
+		return hdl->error;
+
 	vout->rotation = 0;
 	vout->mirror = false;
-	vout->control[2].id = V4L2_CID_HFLIP;
-	vout->control[2].value = 0;
 	if (vout->vid_info.rotation_type == VOUT_ROT_VRFB)
 		vout->vrfb_bpp = 2;
 
-	control[1].id = V4L2_CID_BG_COLOR;
-	control[1].value = 0;
-
 	/* initialize the video_device struct */
 	vfd = vout->vfd = video_device_alloc();
 
 	if (!vfd) {
 		printk(KERN_ERR VOUT_NAME ": could not allocate"
 				" video device struct\n");
+		v4l2_ctrl_handler_free(hdl);
 		return -ENOMEM;
 	}
+	vfd->ctrl_handler = hdl;
 	vfd->release = video_device_release;
 	vfd->ioctl_ops = &vout_ioctl_ops;
 
@@ -2092,6 +2036,7 @@ static void omap_vout_cleanup_device(struct omap_vout_device *vout)
 			video_unregister_device(vfd);
 		}
 	}
+	v4l2_ctrl_handler_free(&vout->ctrl_handler);
 	if (ovid->rotation_type == VOUT_ROT_VRFB) {
 		omap_vout_release_vrfb(vout);
 		/* Free the VRFB buffer if allocated
diff --git a/drivers/media/platform/omap/omap_voutdef.h b/drivers/media/platform/omap/omap_voutdef.h
index 9ccfe1f475a4..49de1475e473 100644
--- a/drivers/media/platform/omap/omap_voutdef.h
+++ b/drivers/media/platform/omap/omap_voutdef.h
@@ -11,6 +11,7 @@
 #ifndef OMAP_VOUTDEF_H
 #define OMAP_VOUTDEF_H
 
+#include <media/v4l2-ctrls.h>
 #include <video/omapdss.h>
 #include <video/omapvrfb.h>
 
@@ -116,6 +117,7 @@ struct omap_vout_device {
 	struct omapvideo_info vid_info;
 	struct video_device *vfd;
 	struct omap2video_device *vid_dev;
+	struct v4l2_ctrl_handler ctrl_handler;
 	int vid;
 	int opened;
 
@@ -149,12 +151,9 @@ struct omap_vout_device {
 	/* Lock to protect the shared data structures in ioctl */
 	struct mutex lock;
 
-	/* V4L2 control structure for different control id */
-	struct v4l2_control control[MAX_CID];
 	enum dss_rotation rotation;
 	bool mirror;
 	int flicker_filter;
-	/* V4L2 control structure for different control id */
 
 	int bpp; /* bytes per pixel */
 	int vrfb_bpp; /* bytes per pixel with respect to VRFB */
diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c
index 1b1a95d546f6..7d9f35976d18 100644
--- a/drivers/media/platform/omap3isp/ispvideo.c
+++ b/drivers/media/platform/omap3isp/ispvideo.c
@@ -331,7 +331,7 @@ isp_video_check_format(struct isp_video *video, struct isp_video_fh *vfh)
 
 static int isp_video_queue_setup(struct vb2_queue *queue,
 				 unsigned int *count, unsigned int *num_planes,
-				 unsigned int sizes[], void *alloc_ctxs[])
+				 unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct isp_video_fh *vfh = vb2_get_drv_priv(queue);
 	struct isp_video *video = vfh->video;
@@ -342,8 +342,6 @@ static int isp_video_queue_setup(struct vb2_queue *queue,
 	if (sizes[0] == 0)
 		return -EINVAL;
 
-	alloc_ctxs[0] = video->alloc_ctx;
-
 	*count = min(*count, video->capture_mem / PAGE_ALIGN(sizes[0]));
 
 	return 0;
@@ -1308,6 +1306,7 @@ static int isp_video_open(struct file *file)
 	queue->mem_ops = &vb2_dma_contig_memops;
 	queue->buf_struct_size = sizeof(struct isp_buffer);
 	queue->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	queue->dev = video->isp->dev;
 
 	ret = vb2_queue_init(&handle->queue);
 	if (ret < 0) {
@@ -1414,15 +1413,9 @@ int omap3isp_video_init(struct isp_video *video, const char *name)
 		return -EINVAL;
 	}
 
-	video->alloc_ctx = vb2_dma_contig_init_ctx(video->isp->dev);
-	if (IS_ERR(video->alloc_ctx))
-		return PTR_ERR(video->alloc_ctx);
-
 	ret = media_entity_pads_init(&video->video.entity, 1, &video->pad);
-	if (ret < 0) {
-		vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
+	if (ret < 0)
 		return ret;
-	}
 
 	mutex_init(&video->mutex);
 	atomic_set(&video->active, 0);
@@ -1451,7 +1444,6 @@ int omap3isp_video_init(struct isp_video *video, const char *name)
 
 void omap3isp_video_cleanup(struct isp_video *video)
 {
-	vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
 	media_entity_cleanup(&video->video.entity);
 	mutex_destroy(&video->queue_lock);
 	mutex_destroy(&video->stream_lock);
diff --git a/drivers/media/platform/omap3isp/ispvideo.h b/drivers/media/platform/omap3isp/ispvideo.h
index 6a48d5879c56..f6a2082b4a0a 100644
--- a/drivers/media/platform/omap3isp/ispvideo.h
+++ b/drivers/media/platform/omap3isp/ispvideo.h
@@ -171,7 +171,6 @@ struct isp_video {
 	bool error;
 
 	/* Video buffers queue */
-	void *alloc_ctx;
 	struct vb2_queue *queue;
 	struct mutex queue_lock;	/* protects the queue */
 	spinlock_t irqlock;		/* protects dmaqueue */
diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c
new file mode 100644
index 000000000000..6a7bcc3028b1
--- /dev/null
+++ b/drivers/media/platform/rcar-fcp.c
@@ -0,0 +1,181 @@
+/*
+ * rcar-fcp.c  --  R-Car Frame Compression Processor Driver
+ *
+ * Copyright (C) 2016 Renesas Electronics Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include <media/rcar-fcp.h>
+
+struct rcar_fcp_device {
+	struct list_head list;
+	struct device *dev;
+};
+
+static LIST_HEAD(fcp_devices);
+static DEFINE_MUTEX(fcp_lock);
+
+/* -----------------------------------------------------------------------------
+ * Public API
+ */
+
+/**
+ * rcar_fcp_get - Find and acquire a reference to an FCP instance
+ * @np: Device node of the FCP instance
+ *
+ * Search the list of registered FCP instances for the instance corresponding to
+ * the given device node.
+ *
+ * Return a pointer to the FCP instance, or an ERR_PTR if the instance can't be
+ * found.
+ */
+struct rcar_fcp_device *rcar_fcp_get(const struct device_node *np)
+{
+	struct rcar_fcp_device *fcp;
+
+	mutex_lock(&fcp_lock);
+
+	list_for_each_entry(fcp, &fcp_devices, list) {
+		if (fcp->dev->of_node != np)
+			continue;
+
+		/*
+		 * Make sure the module won't be unloaded behind our back. This
+		 * is a poor man's safety net, the module should really not be
+		 * unloaded while FCP users can be active.
+		 */
+		if (!try_module_get(fcp->dev->driver->owner))
+			fcp = NULL;
+
+		goto done;
+	}
+
+	fcp = ERR_PTR(-EPROBE_DEFER);
+
+done:
+	mutex_unlock(&fcp_lock);
+	return fcp;
+}
+EXPORT_SYMBOL_GPL(rcar_fcp_get);
+
+/**
+ * rcar_fcp_put - Release a reference to an FCP instance
+ * @fcp: The FCP instance
+ *
+ * Release the FCP instance acquired by a call to rcar_fcp_get().
+ */
+void rcar_fcp_put(struct rcar_fcp_device *fcp)
+{
+	if (fcp)
+		module_put(fcp->dev->driver->owner);
+}
+EXPORT_SYMBOL_GPL(rcar_fcp_put);
+
+/**
+ * rcar_fcp_enable - Enable an FCP
+ * @fcp: The FCP instance
+ *
+ * Before any memory access through an FCP is performed by a module, the FCP
+ * must be enabled by a call to this function. The enable calls are reference
+ * counted, each successful call must be followed by one rcar_fcp_disable()
+ * call when no more memory transfer can occur through the FCP.
+ *
+ * Return 0 on success or a negative error code if an error occurs. The enable
+ * reference count isn't increased when this function returns an error.
+ */
+int rcar_fcp_enable(struct rcar_fcp_device *fcp)
+{
+	if (!fcp)
+		return 0;
+
+	return pm_runtime_get_sync(fcp->dev);
+}
+EXPORT_SYMBOL_GPL(rcar_fcp_enable);
+
+/**
+ * rcar_fcp_disable - Disable an FCP
+ * @fcp: The FCP instance
+ *
+ * This function is the counterpart of rcar_fcp_enable(). As enable calls are
+ * reference counted a disable call may not disable the FCP synchronously.
+ */
+void rcar_fcp_disable(struct rcar_fcp_device *fcp)
+{
+	if (fcp)
+		pm_runtime_put(fcp->dev);
+}
+EXPORT_SYMBOL_GPL(rcar_fcp_disable);
+
+/* -----------------------------------------------------------------------------
+ * Platform Driver
+ */
+
+static int rcar_fcp_probe(struct platform_device *pdev)
+{
+	struct rcar_fcp_device *fcp;
+
+	fcp = devm_kzalloc(&pdev->dev, sizeof(*fcp), GFP_KERNEL);
+	if (fcp == NULL)
+		return -ENOMEM;
+
+	fcp->dev = &pdev->dev;
+
+	pm_runtime_enable(&pdev->dev);
+
+	mutex_lock(&fcp_lock);
+	list_add_tail(&fcp->list, &fcp_devices);
+	mutex_unlock(&fcp_lock);
+
+	platform_set_drvdata(pdev, fcp);
+
+	return 0;
+}
+
+static int rcar_fcp_remove(struct platform_device *pdev)
+{
+	struct rcar_fcp_device *fcp = platform_get_drvdata(pdev);
+
+	mutex_lock(&fcp_lock);
+	list_del(&fcp->list);
+	mutex_unlock(&fcp_lock);
+
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id rcar_fcp_of_match[] = {
+	{ .compatible = "renesas,fcpv" },
+	{ },
+};
+
+static struct platform_driver rcar_fcp_platform_driver = {
+	.probe		= rcar_fcp_probe,
+	.remove		= rcar_fcp_remove,
+	.driver		= {
+		.name	= "rcar-fcp",
+		.of_match_table = rcar_fcp_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+
+module_platform_driver(rcar_fcp_platform_driver);
+
+MODULE_ALIAS("rcar-fcp");
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_DESCRIPTION("Renesas FCP Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/platform/rcar-vin/Kconfig b/drivers/media/platform/rcar-vin/Kconfig
new file mode 100644
index 000000000000..b2ff2d4e8bb1
--- /dev/null
+++ b/drivers/media/platform/rcar-vin/Kconfig
@@ -0,0 +1,11 @@
+config VIDEO_RCAR_VIN
+	tristate "R-Car Video Input (VIN) Driver"
+	depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API && OF && HAS_DMA
+	depends on ARCH_RENESAS || COMPILE_TEST
+	select VIDEOBUF2_DMA_CONTIG
+	---help---
+	  Support for Renesas R-Car Video Input (VIN) driver.
+	  Supports R-Car Gen2 SoCs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called rcar-vin.
diff --git a/drivers/media/platform/rcar-vin/Makefile b/drivers/media/platform/rcar-vin/Makefile
new file mode 100644
index 000000000000..48c5632c21dc
--- /dev/null
+++ b/drivers/media/platform/rcar-vin/Makefile
@@ -0,0 +1,3 @@
+rcar-vin-objs = rcar-core.o rcar-dma.o rcar-v4l2.o
+
+obj-$(CONFIG_VIDEO_RCAR_VIN) += rcar-vin.o
diff --git a/drivers/media/platform/rcar-vin/rcar-core.c b/drivers/media/platform/rcar-vin/rcar-core.c
new file mode 100644
index 000000000000..4b2007b73463
--- /dev/null
+++ b/drivers/media/platform/rcar-vin/rcar-core.c
@@ -0,0 +1,334 @@
+/*
+ * Driver for Renesas R-Car VIN
+ *
+ * Copyright (C) 2016 Renesas Electronics Corp.
+ * Copyright (C) 2011-2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Cogent Embedded, Inc., <source@cogentembedded.com>
+ * Copyright (C) 2008 Magnus Damm
+ *
+ * Based on the soc-camera rcar_vin driver
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <media/v4l2-of.h>
+
+#include "rcar-vin.h"
+
+/* -----------------------------------------------------------------------------
+ * Async notifier
+ */
+
+#define notifier_to_vin(n) container_of(n, struct rvin_dev, notifier)
+
+static int rvin_mbus_supported(struct rvin_dev *vin)
+{
+	struct v4l2_subdev *sd;
+	struct v4l2_subdev_mbus_code_enum code = {
+		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
+	};
+
+	sd = vin_to_source(vin);
+
+	code.index = 0;
+	while (!v4l2_subdev_call(sd, pad, enum_mbus_code, NULL, &code)) {
+		code.index++;
+		switch (code.code) {
+		case MEDIA_BUS_FMT_YUYV8_1X16:
+		case MEDIA_BUS_FMT_YUYV8_2X8:
+		case MEDIA_BUS_FMT_YUYV10_2X10:
+		case MEDIA_BUS_FMT_RGB888_1X24:
+			vin->source.code = code.code;
+			vin_dbg(vin, "Found supported media bus format: %d\n",
+				vin->source.code);
+			return true;
+		default:
+			break;
+		}
+	}
+
+	return false;
+}
+
+static int rvin_graph_notify_complete(struct v4l2_async_notifier *notifier)
+{
+	struct rvin_dev *vin = notifier_to_vin(notifier);
+	int ret;
+
+	ret = v4l2_device_register_subdev_nodes(&vin->v4l2_dev);
+	if (ret < 0) {
+		vin_err(vin, "Failed to register subdev nodes\n");
+		return ret;
+	}
+
+	if (!rvin_mbus_supported(vin)) {
+		vin_err(vin, "No supported mediabus format found\n");
+		return -EINVAL;
+	}
+
+	return rvin_v4l2_probe(vin);
+}
+
+static void rvin_graph_notify_unbind(struct v4l2_async_notifier *notifier,
+				     struct v4l2_subdev *sd,
+				     struct v4l2_async_subdev *asd)
+{
+	struct rvin_dev *vin = notifier_to_vin(notifier);
+
+	rvin_v4l2_remove(vin);
+}
+
+static int rvin_graph_notify_bound(struct v4l2_async_notifier *notifier,
+				   struct v4l2_subdev *subdev,
+				   struct v4l2_async_subdev *asd)
+{
+	struct rvin_dev *vin = notifier_to_vin(notifier);
+
+	vin_dbg(vin, "subdev %s bound\n", subdev->name);
+
+	vin->entity.entity = &subdev->entity;
+	vin->entity.subdev = subdev;
+
+	return 0;
+}
+
+static int rvin_graph_parse(struct rvin_dev *vin,
+			    struct device_node *node)
+{
+	struct device_node *remote;
+	struct device_node *ep = NULL;
+	struct device_node *next;
+	int ret = 0;
+
+	while (1) {
+		next = of_graph_get_next_endpoint(node, ep);
+		if (!next)
+			break;
+
+		of_node_put(ep);
+		ep = next;
+
+		remote = of_graph_get_remote_port_parent(ep);
+		if (!remote) {
+			ret = -EINVAL;
+			break;
+		}
+
+		/* Skip entities that we have already processed. */
+		if (remote == vin->dev->of_node) {
+			of_node_put(remote);
+			continue;
+		}
+
+		/* Remote node to connect */
+		if (!vin->entity.node) {
+			vin->entity.node = remote;
+			vin->entity.asd.match_type = V4L2_ASYNC_MATCH_OF;
+			vin->entity.asd.match.of.node = remote;
+			ret++;
+		}
+	}
+
+	of_node_put(ep);
+
+	return ret;
+}
+
+static int rvin_graph_init(struct rvin_dev *vin)
+{
+	struct v4l2_async_subdev **subdevs = NULL;
+	int ret;
+
+	/* Parse the graph to extract a list of subdevice DT nodes. */
+	ret = rvin_graph_parse(vin, vin->dev->of_node);
+	if (ret < 0) {
+		vin_err(vin, "Graph parsing failed\n");
+		goto done;
+	}
+
+	if (!ret) {
+		vin_err(vin, "No subdev found in graph\n");
+		goto done;
+	}
+
+	if (ret != 1) {
+		vin_err(vin, "More then one subdev found in graph\n");
+		goto done;
+	}
+
+	/* Register the subdevices notifier. */
+	subdevs = devm_kzalloc(vin->dev, sizeof(*subdevs), GFP_KERNEL);
+	if (subdevs == NULL) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	subdevs[0] = &vin->entity.asd;
+
+	vin->notifier.subdevs = subdevs;
+	vin->notifier.num_subdevs = 1;
+	vin->notifier.bound = rvin_graph_notify_bound;
+	vin->notifier.unbind = rvin_graph_notify_unbind;
+	vin->notifier.complete = rvin_graph_notify_complete;
+
+	ret = v4l2_async_notifier_register(&vin->v4l2_dev, &vin->notifier);
+	if (ret < 0) {
+		vin_err(vin, "Notifier registration failed\n");
+		goto done;
+	}
+
+	ret = 0;
+
+done:
+	if (ret < 0) {
+		v4l2_async_notifier_unregister(&vin->notifier);
+		of_node_put(vin->entity.node);
+	}
+
+	return ret;
+}
+
+/* -----------------------------------------------------------------------------
+ * Platform Device Driver
+ */
+
+static const struct of_device_id rvin_of_id_table[] = {
+	{ .compatible = "renesas,vin-r8a7794", .data = (void *)RCAR_GEN2 },
+	{ .compatible = "renesas,vin-r8a7793", .data = (void *)RCAR_GEN2 },
+	{ .compatible = "renesas,vin-r8a7791", .data = (void *)RCAR_GEN2 },
+	{ .compatible = "renesas,vin-r8a7790", .data = (void *)RCAR_GEN2 },
+	{ .compatible = "renesas,vin-r8a7779", .data = (void *)RCAR_H1 },
+	{ .compatible = "renesas,vin-r8a7778", .data = (void *)RCAR_M1 },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, rvin_of_id_table);
+
+static int rvin_parse_dt(struct rvin_dev *vin)
+{
+	const struct of_device_id *match;
+	struct v4l2_of_endpoint ep;
+	struct device_node *np;
+	int ret;
+
+	match = of_match_device(of_match_ptr(rvin_of_id_table), vin->dev);
+	if (!match)
+		return -ENODEV;
+
+	vin->chip = (enum chip_id)match->data;
+
+	np = of_graph_get_next_endpoint(vin->dev->of_node, NULL);
+	if (!np) {
+		vin_err(vin, "Could not find endpoint\n");
+		return -EINVAL;
+	}
+
+	ret = v4l2_of_parse_endpoint(np, &ep);
+	if (ret) {
+		vin_err(vin, "Could not parse endpoint\n");
+		return ret;
+	}
+
+	of_node_put(np);
+
+	vin->mbus_cfg.type = ep.bus_type;
+
+	switch (vin->mbus_cfg.type) {
+	case V4L2_MBUS_PARALLEL:
+		vin->mbus_cfg.flags = ep.bus.parallel.flags;
+		break;
+	case V4L2_MBUS_BT656:
+		vin->mbus_cfg.flags = 0;
+		break;
+	default:
+		vin_err(vin, "Unknown media bus type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rcar_vin_probe(struct platform_device *pdev)
+{
+	struct rvin_dev *vin;
+	struct resource *mem;
+	int irq, ret;
+
+	vin = devm_kzalloc(&pdev->dev, sizeof(*vin), GFP_KERNEL);
+	if (!vin)
+		return -ENOMEM;
+
+	vin->dev = &pdev->dev;
+
+	ret = rvin_parse_dt(vin);
+	if (ret)
+		return ret;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (mem == NULL)
+		return -EINVAL;
+
+	vin->base = devm_ioremap_resource(vin->dev, mem);
+	if (IS_ERR(vin->base))
+		return PTR_ERR(vin->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0)
+		return ret;
+
+	ret = rvin_dma_probe(vin, irq);
+	if (ret)
+		return ret;
+
+	ret = rvin_graph_init(vin);
+	if (ret < 0)
+		goto error;
+
+	pm_suspend_ignore_children(&pdev->dev, true);
+	pm_runtime_enable(&pdev->dev);
+
+	platform_set_drvdata(pdev, vin);
+
+	return 0;
+error:
+	rvin_dma_remove(vin);
+
+	return ret;
+}
+
+static int rcar_vin_remove(struct platform_device *pdev)
+{
+	struct rvin_dev *vin = platform_get_drvdata(pdev);
+
+	pm_runtime_disable(&pdev->dev);
+
+	v4l2_async_notifier_unregister(&vin->notifier);
+
+	rvin_dma_remove(vin);
+
+	return 0;
+}
+
+static struct platform_driver rcar_vin_driver = {
+	.driver = {
+		.name = "rcar-vin",
+		.of_match_table = rvin_of_id_table,
+	},
+	.probe = rcar_vin_probe,
+	.remove = rcar_vin_remove,
+};
+
+module_platform_driver(rcar_vin_driver);
+
+MODULE_AUTHOR("Niklas SÃ¶derlund <niklas.soderlund@ragnatech.se>");
+MODULE_DESCRIPTION("Renesas R-Car VIN camera host driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/media/platform/rcar-vin/rcar-dma.c b/drivers/media/platform/rcar-vin/rcar-dma.c
new file mode 100644
index 000000000000..496aa97b6400
--- /dev/null
+++ b/drivers/media/platform/rcar-vin/rcar-dma.c
@@ -0,0 +1,1187 @@
+/*
+ * Driver for Renesas R-Car VIN
+ *
+ * Copyright (C) 2016 Renesas Electronics Corp.
+ * Copyright (C) 2011-2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Cogent Embedded, Inc., <source@cogentembedded.com>
+ * Copyright (C) 2008 Magnus Damm
+ *
+ * Based on the soc-camera rcar_vin driver
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <media/videobuf2-dma-contig.h>
+
+#include "rcar-vin.h"
+
+/* -----------------------------------------------------------------------------
+ * HW Functions
+ */
+
+/* Register offsets for R-Car VIN */
+#define VNMC_REG	0x00	/* Video n Main Control Register */
+#define VNMS_REG	0x04	/* Video n Module Status Register */
+#define VNFC_REG	0x08	/* Video n Frame Capture Register */
+#define VNSLPRC_REG	0x0C	/* Video n Start Line Pre-Clip Register */
+#define VNELPRC_REG	0x10	/* Video n End Line Pre-Clip Register */
+#define VNSPPRC_REG	0x14	/* Video n Start Pixel Pre-Clip Register */
+#define VNEPPRC_REG	0x18	/* Video n End Pixel Pre-Clip Register */
+#define VNSLPOC_REG	0x1C	/* Video n Start Line Post-Clip Register */
+#define VNELPOC_REG	0x20	/* Video n End Line Post-Clip Register */
+#define VNSPPOC_REG	0x24	/* Video n Start Pixel Post-Clip Register */
+#define VNEPPOC_REG	0x28	/* Video n End Pixel Post-Clip Register */
+#define VNIS_REG	0x2C	/* Video n Image Stride Register */
+#define VNMB_REG(m)	(0x30 + ((m) << 2)) /* Video n Memory Base m Register */
+#define VNIE_REG	0x40	/* Video n Interrupt Enable Register */
+#define VNINTS_REG	0x44	/* Video n Interrupt Status Register */
+#define VNSI_REG	0x48	/* Video n Scanline Interrupt Register */
+#define VNMTC_REG	0x4C	/* Video n Memory Transfer Control Register */
+#define VNYS_REG	0x50	/* Video n Y Scale Register */
+#define VNXS_REG	0x54	/* Video n X Scale Register */
+#define VNDMR_REG	0x58	/* Video n Data Mode Register */
+#define VNDMR2_REG	0x5C	/* Video n Data Mode Register 2 */
+#define VNUVAOF_REG	0x60	/* Video n UV Address Offset Register */
+#define VNC1A_REG	0x80	/* Video n Coefficient Set C1A Register */
+#define VNC1B_REG	0x84	/* Video n Coefficient Set C1B Register */
+#define VNC1C_REG	0x88	/* Video n Coefficient Set C1C Register */
+#define VNC2A_REG	0x90	/* Video n Coefficient Set C2A Register */
+#define VNC2B_REG	0x94	/* Video n Coefficient Set C2B Register */
+#define VNC2C_REG	0x98	/* Video n Coefficient Set C2C Register */
+#define VNC3A_REG	0xA0	/* Video n Coefficient Set C3A Register */
+#define VNC3B_REG	0xA4	/* Video n Coefficient Set C3B Register */
+#define VNC3C_REG	0xA8	/* Video n Coefficient Set C3C Register */
+#define VNC4A_REG	0xB0	/* Video n Coefficient Set C4A Register */
+#define VNC4B_REG	0xB4	/* Video n Coefficient Set C4B Register */
+#define VNC4C_REG	0xB8	/* Video n Coefficient Set C4C Register */
+#define VNC5A_REG	0xC0	/* Video n Coefficient Set C5A Register */
+#define VNC5B_REG	0xC4	/* Video n Coefficient Set C5B Register */
+#define VNC5C_REG	0xC8	/* Video n Coefficient Set C5C Register */
+#define VNC6A_REG	0xD0	/* Video n Coefficient Set C6A Register */
+#define VNC6B_REG	0xD4	/* Video n Coefficient Set C6B Register */
+#define VNC6C_REG	0xD8	/* Video n Coefficient Set C6C Register */
+#define VNC7A_REG	0xE0	/* Video n Coefficient Set C7A Register */
+#define VNC7B_REG	0xE4	/* Video n Coefficient Set C7B Register */
+#define VNC7C_REG	0xE8	/* Video n Coefficient Set C7C Register */
+#define VNC8A_REG	0xF0	/* Video n Coefficient Set C8A Register */
+#define VNC8B_REG	0xF4	/* Video n Coefficient Set C8B Register */
+#define VNC8C_REG	0xF8	/* Video n Coefficient Set C8C Register */
+
+
+/* Register bit fields for R-Car VIN */
+/* Video n Main Control Register bits */
+#define VNMC_FOC		(1 << 21)
+#define VNMC_YCAL		(1 << 19)
+#define VNMC_INF_YUV8_BT656	(0 << 16)
+#define VNMC_INF_YUV8_BT601	(1 << 16)
+#define VNMC_INF_YUV10_BT656	(2 << 16)
+#define VNMC_INF_YUV10_BT601	(3 << 16)
+#define VNMC_INF_YUV16		(5 << 16)
+#define VNMC_INF_RGB888		(6 << 16)
+#define VNMC_VUP		(1 << 10)
+#define VNMC_IM_ODD		(0 << 3)
+#define VNMC_IM_ODD_EVEN	(1 << 3)
+#define VNMC_IM_EVEN		(2 << 3)
+#define VNMC_IM_FULL		(3 << 3)
+#define VNMC_BPS		(1 << 1)
+#define VNMC_ME			(1 << 0)
+
+/* Video n Module Status Register bits */
+#define VNMS_FBS_MASK		(3 << 3)
+#define VNMS_FBS_SHIFT		3
+#define VNMS_AV			(1 << 1)
+#define VNMS_CA			(1 << 0)
+
+/* Video n Frame Capture Register bits */
+#define VNFC_C_FRAME		(1 << 1)
+#define VNFC_S_FRAME		(1 << 0)
+
+/* Video n Interrupt Enable Register bits */
+#define VNIE_FIE		(1 << 4)
+#define VNIE_EFE		(1 << 1)
+
+/* Video n Data Mode Register bits */
+#define VNDMR_EXRGB		(1 << 8)
+#define VNDMR_BPSM		(1 << 4)
+#define VNDMR_DTMD_YCSEP	(1 << 1)
+#define VNDMR_DTMD_ARGB1555	(1 << 0)
+
+/* Video n Data Mode Register 2 bits */
+#define VNDMR2_VPS		(1 << 30)
+#define VNDMR2_HPS		(1 << 29)
+#define VNDMR2_FTEV		(1 << 17)
+#define VNDMR2_VLV(n)		((n & 0xf) << 12)
+
+static void rvin_write(struct rvin_dev *vin, u32 value, u32 offset)
+{
+	iowrite32(value, vin->base + offset);
+}
+
+static u32 rvin_read(struct rvin_dev *vin, u32 offset)
+{
+	return ioread32(vin->base + offset);
+}
+
+static int rvin_setup(struct rvin_dev *vin)
+{
+	u32 vnmc, dmr, dmr2, interrupts;
+	bool progressive = false, output_is_yuv = false, input_is_yuv = false;
+
+	switch (vin->format.field) {
+	case V4L2_FIELD_TOP:
+		vnmc = VNMC_IM_ODD;
+		break;
+	case V4L2_FIELD_BOTTOM:
+		vnmc = VNMC_IM_EVEN;
+		break;
+	case V4L2_FIELD_INTERLACED:
+	case V4L2_FIELD_INTERLACED_TB:
+		vnmc = VNMC_IM_FULL;
+		break;
+	case V4L2_FIELD_INTERLACED_BT:
+		vnmc = VNMC_IM_FULL | VNMC_FOC;
+		break;
+	case V4L2_FIELD_NONE:
+		if (vin->continuous) {
+			vnmc = VNMC_IM_ODD_EVEN;
+			progressive = true;
+		} else {
+			vnmc = VNMC_IM_ODD;
+		}
+		break;
+	default:
+		vnmc = VNMC_IM_ODD;
+		break;
+	}
+
+	/*
+	 * Input interface
+	 */
+	switch (vin->source.code) {
+	case MEDIA_BUS_FMT_YUYV8_1X16:
+		/* BT.601/BT.1358 16bit YCbCr422 */
+		vnmc |= VNMC_INF_YUV16;
+		input_is_yuv = true;
+		break;
+	case MEDIA_BUS_FMT_YUYV8_2X8:
+		/* BT.656 8bit YCbCr422 or BT.601 8bit YCbCr422 */
+		vnmc |= vin->mbus_cfg.type == V4L2_MBUS_BT656 ?
+			VNMC_INF_YUV8_BT656 : VNMC_INF_YUV8_BT601;
+		input_is_yuv = true;
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X24:
+		vnmc |= VNMC_INF_RGB888;
+		break;
+	case MEDIA_BUS_FMT_YUYV10_2X10:
+		/* BT.656 10bit YCbCr422 or BT.601 10bit YCbCr422 */
+		vnmc |= vin->mbus_cfg.type == V4L2_MBUS_BT656 ?
+			VNMC_INF_YUV10_BT656 : VNMC_INF_YUV10_BT601;
+		input_is_yuv = true;
+		break;
+	default:
+		break;
+	}
+
+	/* Enable VSYNC Field Toogle mode after one VSYNC input */
+	dmr2 = VNDMR2_FTEV | VNDMR2_VLV(1);
+
+	/* Hsync Signal Polarity Select */
+	if (!(vin->mbus_cfg.flags & V4L2_MBUS_HSYNC_ACTIVE_LOW))
+		dmr2 |= VNDMR2_HPS;
+
+	/* Vsync Signal Polarity Select */
+	if (!(vin->mbus_cfg.flags & V4L2_MBUS_VSYNC_ACTIVE_LOW))
+		dmr2 |= VNDMR2_VPS;
+
+	/*
+	 * Output format
+	 */
+	switch (vin->format.pixelformat) {
+	case V4L2_PIX_FMT_NV16:
+		rvin_write(vin,
+			   ALIGN(vin->format.width * vin->format.height, 0x80),
+			   VNUVAOF_REG);
+		dmr = VNDMR_DTMD_YCSEP;
+		output_is_yuv = true;
+		break;
+	case V4L2_PIX_FMT_YUYV:
+		dmr = VNDMR_BPSM;
+		output_is_yuv = true;
+		break;
+	case V4L2_PIX_FMT_UYVY:
+		dmr = 0;
+		output_is_yuv = true;
+		break;
+	case V4L2_PIX_FMT_XRGB555:
+		dmr = VNDMR_DTMD_ARGB1555;
+		break;
+	case V4L2_PIX_FMT_RGB565:
+		dmr = 0;
+		break;
+	case V4L2_PIX_FMT_XBGR32:
+		if (vin->chip == RCAR_GEN2 || vin->chip == RCAR_H1) {
+			dmr = VNDMR_EXRGB;
+			break;
+		}
+		/* fall through */
+	default:
+		vin_err(vin, "Invalid pixelformat (0x%x)\n",
+			vin->format.pixelformat);
+		return -EINVAL;
+	}
+
+	/* Always update on field change */
+	vnmc |= VNMC_VUP;
+
+	/* If input and output use the same colorspace, use bypass mode */
+	if (input_is_yuv == output_is_yuv)
+		vnmc |= VNMC_BPS;
+
+	/* Progressive or interlaced mode */
+	interrupts = progressive ? VNIE_FIE : VNIE_EFE;
+
+	/* Ack interrupts */
+	rvin_write(vin, interrupts, VNINTS_REG);
+	/* Enable interrupts */
+	rvin_write(vin, interrupts, VNIE_REG);
+	/* Start capturing */
+	rvin_write(vin, dmr, VNDMR_REG);
+	rvin_write(vin, dmr2, VNDMR2_REG);
+
+	/* Enable module */
+	rvin_write(vin, vnmc | VNMC_ME, VNMC_REG);
+
+	return 0;
+}
+
+static void rvin_capture_on(struct rvin_dev *vin)
+{
+	vin_dbg(vin, "Capture on in %s mode\n",
+		vin->continuous ? "continuous" : "single");
+
+	if (vin->continuous)
+		/* Continuous Frame Capture Mode */
+		rvin_write(vin, VNFC_C_FRAME, VNFC_REG);
+	else
+		/* Single Frame Capture Mode */
+		rvin_write(vin, VNFC_S_FRAME, VNFC_REG);
+}
+
+static void rvin_capture_off(struct rvin_dev *vin)
+{
+	/* Set continuous & single transfer off */
+	rvin_write(vin, 0, VNFC_REG);
+}
+
+static int rvin_capture_start(struct rvin_dev *vin)
+{
+	int ret;
+
+	rvin_crop_scale_comp(vin);
+
+	ret = rvin_setup(vin);
+	if (ret)
+		return ret;
+
+	rvin_capture_on(vin);
+
+	return 0;
+}
+
+static void rvin_capture_stop(struct rvin_dev *vin)
+{
+	rvin_capture_off(vin);
+
+	/* Disable module */
+	rvin_write(vin, rvin_read(vin, VNMC_REG) & ~VNMC_ME, VNMC_REG);
+}
+
+static void rvin_disable_interrupts(struct rvin_dev *vin)
+{
+	rvin_write(vin, 0, VNIE_REG);
+}
+
+static u32 rvin_get_interrupt_status(struct rvin_dev *vin)
+{
+	return rvin_read(vin, VNINTS_REG);
+}
+
+static void rvin_ack_interrupt(struct rvin_dev *vin)
+{
+	rvin_write(vin, rvin_read(vin, VNINTS_REG), VNINTS_REG);
+}
+
+static bool rvin_capture_active(struct rvin_dev *vin)
+{
+	return rvin_read(vin, VNMS_REG) & VNMS_CA;
+}
+
+static int rvin_get_active_slot(struct rvin_dev *vin)
+{
+	if (vin->continuous)
+		return (rvin_read(vin, VNMS_REG) & VNMS_FBS_MASK)
+			>> VNMS_FBS_SHIFT;
+
+	return 0;
+}
+
+static void rvin_set_slot_addr(struct rvin_dev *vin, int slot, dma_addr_t addr)
+{
+	const struct rvin_video_format *fmt;
+	int offsetx, offsety;
+	dma_addr_t offset;
+
+	fmt = rvin_format_from_pixel(vin->format.pixelformat);
+
+	/*
+	 * There is no HW support for composition do the beast we can
+	 * by modifying the buffer offset
+	 */
+	offsetx = vin->compose.left * fmt->bpp;
+	offsety = vin->compose.top * vin->format.bytesperline;
+	offset = addr + offsetx + offsety;
+
+	/*
+	 * The address needs to be 128 bytes aligned. Driver should never accept
+	 * settings that do not satisfy this in the first place...
+	 */
+	if (WARN_ON((offsetx | offsety | offset) & HW_BUFFER_MASK))
+		return;
+
+	rvin_write(vin, offset, VNMB_REG(slot));
+}
+
+/* -----------------------------------------------------------------------------
+ * Crop and Scaling Gen2
+ */
+
+struct vin_coeff {
+	unsigned short xs_value;
+	u32 coeff_set[24];
+};
+
+static const struct vin_coeff vin_coeff_set[] = {
+	{ 0x0000, {
+			  0x00000000, 0x00000000, 0x00000000,
+			  0x00000000, 0x00000000, 0x00000000,
+			  0x00000000, 0x00000000, 0x00000000,
+			  0x00000000, 0x00000000, 0x00000000,
+			  0x00000000, 0x00000000, 0x00000000,
+			  0x00000000, 0x00000000, 0x00000000,
+			  0x00000000, 0x00000000, 0x00000000,
+			  0x00000000, 0x00000000, 0x00000000 },
+	},
+	{ 0x1000, {
+			  0x000fa400, 0x000fa400, 0x09625902,
+			  0x000003f8, 0x00000403, 0x3de0d9f0,
+			  0x001fffed, 0x00000804, 0x3cc1f9c3,
+			  0x001003de, 0x00000c01, 0x3cb34d7f,
+			  0x002003d2, 0x00000c00, 0x3d24a92d,
+			  0x00200bca, 0x00000bff, 0x3df600d2,
+			  0x002013cc, 0x000007ff, 0x3ed70c7e,
+			  0x00100fde, 0x00000000, 0x3f87c036 },
+	},
+	{ 0x1200, {
+			  0x002ffff1, 0x002ffff1, 0x02a0a9c8,
+			  0x002003e7, 0x001ffffa, 0x000185bc,
+			  0x002007dc, 0x000003ff, 0x3e52859c,
+			  0x00200bd4, 0x00000002, 0x3d53996b,
+			  0x00100fd0, 0x00000403, 0x3d04ad2d,
+			  0x00000bd5, 0x00000403, 0x3d35ace7,
+			  0x3ff003e4, 0x00000801, 0x3dc674a1,
+			  0x3fffe800, 0x00000800, 0x3e76f461 },
+	},
+	{ 0x1400, {
+			  0x00100be3, 0x00100be3, 0x04d1359a,
+			  0x00000fdb, 0x002003ed, 0x0211fd93,
+			  0x00000fd6, 0x002003f4, 0x0002d97b,
+			  0x000007d6, 0x002ffffb, 0x3e93b956,
+			  0x3ff003da, 0x001003ff, 0x3db49926,
+			  0x3fffefe9, 0x00100001, 0x3d655cee,
+			  0x3fffd400, 0x00000003, 0x3d65f4b6,
+			  0x000fb421, 0x00000402, 0x3dc6547e },
+	},
+	{ 0x1600, {
+			  0x00000bdd, 0x00000bdd, 0x06519578,
+			  0x3ff007da, 0x00000be3, 0x03c24973,
+			  0x3ff003d9, 0x00000be9, 0x01b30d5f,
+			  0x3ffff7df, 0x001003f1, 0x0003c542,
+			  0x000fdfec, 0x001003f7, 0x3ec4711d,
+			  0x000fc400, 0x002ffffd, 0x3df504f1,
+			  0x001fa81a, 0x002ffc00, 0x3d957cc2,
+			  0x002f8c3c, 0x00100000, 0x3db5c891 },
+	},
+	{ 0x1800, {
+			  0x3ff003dc, 0x3ff003dc, 0x0791e558,
+			  0x000ff7dd, 0x3ff007de, 0x05328554,
+			  0x000fe7e3, 0x3ff00be2, 0x03232546,
+			  0x000fd7ee, 0x000007e9, 0x0143bd30,
+			  0x001fb800, 0x000007ee, 0x00044511,
+			  0x002fa015, 0x000007f4, 0x3ef4bcee,
+			  0x002f8832, 0x001003f9, 0x3e4514c7,
+			  0x001f7853, 0x001003fd, 0x3de54c9f },
+	},
+	{ 0x1a00, {
+			  0x000fefe0, 0x000fefe0, 0x08721d3c,
+			  0x001fdbe7, 0x000ffbde, 0x0652a139,
+			  0x001fcbf0, 0x000003df, 0x0463292e,
+			  0x002fb3ff, 0x3ff007e3, 0x0293a91d,
+			  0x002f9c12, 0x3ff00be7, 0x01241905,
+			  0x001f8c29, 0x000007ed, 0x3fe470eb,
+			  0x000f7c46, 0x000007f2, 0x3f04b8ca,
+			  0x3fef7865, 0x000007f6, 0x3e74e4a8 },
+	},
+	{ 0x1c00, {
+			  0x001fd3e9, 0x001fd3e9, 0x08f23d26,
+			  0x002fbff3, 0x001fe3e4, 0x0712ad23,
+			  0x002fa800, 0x000ff3e0, 0x05631d1b,
+			  0x001f9810, 0x000ffbe1, 0x03b3890d,
+			  0x000f8c23, 0x000003e3, 0x0233e8fa,
+			  0x3fef843b, 0x000003e7, 0x00f430e4,
+			  0x3fbf8456, 0x3ff00bea, 0x00046cc8,
+			  0x3f8f8c72, 0x3ff00bef, 0x3f3490ac },
+	},
+	{ 0x1e00, {
+			  0x001fbbf4, 0x001fbbf4, 0x09425112,
+			  0x001fa800, 0x002fc7ed, 0x0792b110,
+			  0x000f980e, 0x001fdbe6, 0x0613110a,
+			  0x3fff8c20, 0x001fe7e3, 0x04a368fd,
+			  0x3fcf8c33, 0x000ff7e2, 0x0343b8ed,
+			  0x3f9f8c4a, 0x000fffe3, 0x0203f8da,
+			  0x3f5f9c61, 0x000003e6, 0x00e428c5,
+			  0x3f1fb07b, 0x000003eb, 0x3fe440af },
+	},
+	{ 0x2000, {
+			  0x000fa400, 0x000fa400, 0x09625902,
+			  0x3fff980c, 0x001fb7f5, 0x0812b0ff,
+			  0x3fdf901c, 0x001fc7ed, 0x06b2fcfa,
+			  0x3faf902d, 0x001fd3e8, 0x055348f1,
+			  0x3f7f983f, 0x001fe3e5, 0x04038ce3,
+			  0x3f3fa454, 0x001fefe3, 0x02e3c8d1,
+			  0x3f0fb86a, 0x001ff7e4, 0x01c3e8c0,
+			  0x3ecfd880, 0x000fffe6, 0x00c404ac },
+	},
+	{ 0x2200, {
+			  0x3fdf9c0b, 0x3fdf9c0b, 0x09725cf4,
+			  0x3fbf9818, 0x3fffa400, 0x0842a8f1,
+			  0x3f8f9827, 0x000fb3f7, 0x0702f0ec,
+			  0x3f5fa037, 0x000fc3ef, 0x05d330e4,
+			  0x3f2fac49, 0x001fcfea, 0x04a364d9,
+			  0x3effc05c, 0x001fdbe7, 0x038394ca,
+			  0x3ecfdc6f, 0x001fe7e6, 0x0273b0bb,
+			  0x3ea00083, 0x001fefe6, 0x0183c0a9 },
+	},
+	{ 0x2400, {
+			  0x3f9fa014, 0x3f9fa014, 0x098260e6,
+			  0x3f7f9c23, 0x3fcf9c0a, 0x08629ce5,
+			  0x3f4fa431, 0x3fefa400, 0x0742d8e1,
+			  0x3f1fb440, 0x3fffb3f8, 0x062310d9,
+			  0x3eefc850, 0x000fbbf2, 0x050340d0,
+			  0x3ecfe062, 0x000fcbec, 0x041364c2,
+			  0x3ea00073, 0x001fd3ea, 0x03037cb5,
+			  0x3e902086, 0x001fdfe8, 0x022388a5 },
+	},
+	{ 0x2600, {
+			  0x3f5fa81e, 0x3f5fa81e, 0x096258da,
+			  0x3f3fac2b, 0x3f8fa412, 0x088290d8,
+			  0x3f0fbc38, 0x3fafa408, 0x0772c8d5,
+			  0x3eefcc47, 0x3fcfa800, 0x0672f4ce,
+			  0x3ecfe456, 0x3fefaffa, 0x05531cc6,
+			  0x3eb00066, 0x3fffbbf3, 0x047334bb,
+			  0x3ea01c77, 0x000fc7ee, 0x039348ae,
+			  0x3ea04486, 0x000fd3eb, 0x02b350a1 },
+	},
+	{ 0x2800, {
+			  0x3f2fb426, 0x3f2fb426, 0x094250ce,
+			  0x3f0fc032, 0x3f4fac1b, 0x086284cd,
+			  0x3eefd040, 0x3f7fa811, 0x0782acc9,
+			  0x3ecfe84c, 0x3f9fa807, 0x06a2d8c4,
+			  0x3eb0005b, 0x3fbfac00, 0x05b2f4bc,
+			  0x3eb0186a, 0x3fdfb3fa, 0x04c308b4,
+			  0x3eb04077, 0x3fefbbf4, 0x03f31ca8,
+			  0x3ec06884, 0x000fbff2, 0x03031c9e },
+	},
+	{ 0x2a00, {
+			  0x3f0fc42d, 0x3f0fc42d, 0x090240c4,
+			  0x3eefd439, 0x3f2fb822, 0x08526cc2,
+			  0x3edfe845, 0x3f4fb018, 0x078294bf,
+			  0x3ec00051, 0x3f6fac0f, 0x06b2b4bb,
+			  0x3ec0185f, 0x3f8fac07, 0x05e2ccb4,
+			  0x3ec0386b, 0x3fafac00, 0x0502e8ac,
+			  0x3ed05c77, 0x3fcfb3fb, 0x0432f0a3,
+			  0x3ef08482, 0x3fdfbbf6, 0x0372f898 },
+	},
+	{ 0x2c00, {
+			  0x3eefdc31, 0x3eefdc31, 0x08e238b8,
+			  0x3edfec3d, 0x3f0fc828, 0x082258b9,
+			  0x3ed00049, 0x3f1fc01e, 0x077278b6,
+			  0x3ed01455, 0x3f3fb815, 0x06c294b2,
+			  0x3ed03460, 0x3f5fb40d, 0x0602acac,
+			  0x3ef0506c, 0x3f7fb006, 0x0542c0a4,
+			  0x3f107476, 0x3f9fb400, 0x0472c89d,
+			  0x3f309c80, 0x3fbfb7fc, 0x03b2cc94 },
+	},
+	{ 0x2e00, {
+			  0x3eefec37, 0x3eefec37, 0x088220b0,
+			  0x3ee00041, 0x3effdc2d, 0x07f244ae,
+			  0x3ee0144c, 0x3f0fd023, 0x07625cad,
+			  0x3ef02c57, 0x3f1fc81a, 0x06c274a9,
+			  0x3f004861, 0x3f3fbc13, 0x060288a6,
+			  0x3f20686b, 0x3f5fb80c, 0x05529c9e,
+			  0x3f408c74, 0x3f6fb805, 0x04b2ac96,
+			  0x3f80ac7e, 0x3f8fb800, 0x0402ac8e },
+	},
+	{ 0x3000, {
+			  0x3ef0003a, 0x3ef0003a, 0x084210a6,
+			  0x3ef01045, 0x3effec32, 0x07b228a7,
+			  0x3f00284e, 0x3f0fdc29, 0x073244a4,
+			  0x3f104058, 0x3f0fd420, 0x06a258a2,
+			  0x3f305c62, 0x3f2fc818, 0x0612689d,
+			  0x3f508069, 0x3f3fc011, 0x05728496,
+			  0x3f80a072, 0x3f4fc00a, 0x04d28c90,
+			  0x3fc0c07b, 0x3f6fbc04, 0x04429088 },
+	},
+	{ 0x3200, {
+			  0x3f00103e, 0x3f00103e, 0x07f1fc9e,
+			  0x3f102447, 0x3f000035, 0x0782149d,
+			  0x3f203c4f, 0x3f0ff02c, 0x07122c9c,
+			  0x3f405458, 0x3f0fe424, 0x06924099,
+			  0x3f607061, 0x3f1fd41d, 0x06024c97,
+			  0x3f909068, 0x3f2fcc16, 0x05726490,
+			  0x3fc0b070, 0x3f3fc80f, 0x04f26c8a,
+			  0x0000d077, 0x3f4fc409, 0x04627484 },
+	},
+	{ 0x3400, {
+			  0x3f202040, 0x3f202040, 0x07a1e898,
+			  0x3f303449, 0x3f100c38, 0x0741fc98,
+			  0x3f504c50, 0x3f10002f, 0x06e21495,
+			  0x3f706459, 0x3f1ff028, 0x06722492,
+			  0x3fa08060, 0x3f1fe421, 0x05f2348f,
+			  0x3fd09c67, 0x3f1fdc19, 0x05824c89,
+			  0x0000bc6e, 0x3f2fd014, 0x04f25086,
+			  0x0040dc74, 0x3f3fcc0d, 0x04825c7f },
+	},
+	{ 0x3600, {
+			  0x3f403042, 0x3f403042, 0x0761d890,
+			  0x3f504848, 0x3f301c3b, 0x0701f090,
+			  0x3f805c50, 0x3f200c33, 0x06a2008f,
+			  0x3fa07458, 0x3f10002b, 0x06520c8d,
+			  0x3fd0905e, 0x3f1ff424, 0x05e22089,
+			  0x0000ac65, 0x3f1fe81d, 0x05823483,
+			  0x0030cc6a, 0x3f2fdc18, 0x04f23c81,
+			  0x0080e871, 0x3f2fd412, 0x0482407c },
+	},
+	{ 0x3800, {
+			  0x3f604043, 0x3f604043, 0x0721c88a,
+			  0x3f80544a, 0x3f502c3c, 0x06d1d88a,
+			  0x3fb06851, 0x3f301c35, 0x0681e889,
+			  0x3fd08456, 0x3f30082f, 0x0611fc88,
+			  0x00009c5d, 0x3f200027, 0x05d20884,
+			  0x0030b863, 0x3f2ff421, 0x05621880,
+			  0x0070d468, 0x3f2fe81b, 0x0502247c,
+			  0x00c0ec6f, 0x3f2fe015, 0x04a22877 },
+	},
+	{ 0x3a00, {
+			  0x3f904c44, 0x3f904c44, 0x06e1b884,
+			  0x3fb0604a, 0x3f70383e, 0x0691c885,
+			  0x3fe07451, 0x3f502c36, 0x0661d483,
+			  0x00009055, 0x3f401831, 0x0601ec81,
+			  0x0030a85b, 0x3f300c2a, 0x05b1f480,
+			  0x0070c061, 0x3f300024, 0x0562047a,
+			  0x00b0d867, 0x3f3ff41e, 0x05020c77,
+			  0x00f0f46b, 0x3f2fec19, 0x04a21474 },
+	},
+	{ 0x3c00, {
+			  0x3fb05c43, 0x3fb05c43, 0x06c1b07e,
+			  0x3fe06c4b, 0x3f902c3f, 0x0681c081,
+			  0x0000844f, 0x3f703838, 0x0631cc7d,
+			  0x00309855, 0x3f602433, 0x05d1d47e,
+			  0x0060b459, 0x3f50142e, 0x0581e47b,
+			  0x00a0c85f, 0x3f400828, 0x0531f078,
+			  0x00e0e064, 0x3f300021, 0x0501fc73,
+			  0x00b0fc6a, 0x3f3ff41d, 0x04a20873 },
+	},
+	{ 0x3e00, {
+			  0x3fe06444, 0x3fe06444, 0x0681a07a,
+			  0x00007849, 0x3fc0503f, 0x0641b07a,
+			  0x0020904d, 0x3fa0403a, 0x05f1c07a,
+			  0x0060a453, 0x3f803034, 0x05c1c878,
+			  0x0090b858, 0x3f70202f, 0x0571d477,
+			  0x00d0d05d, 0x3f501829, 0x0531e073,
+			  0x0110e462, 0x3f500825, 0x04e1e471,
+			  0x01510065, 0x3f40001f, 0x04a1f06d },
+	},
+	{ 0x4000, {
+			  0x00007044, 0x00007044, 0x06519476,
+			  0x00208448, 0x3fe05c3f, 0x0621a476,
+			  0x0050984d, 0x3fc04c3a, 0x05e1b075,
+			  0x0080ac52, 0x3fa03c35, 0x05a1b875,
+			  0x00c0c056, 0x3f803030, 0x0561c473,
+			  0x0100d45b, 0x3f70202b, 0x0521d46f,
+			  0x0140e860, 0x3f601427, 0x04d1d46e,
+			  0x01810064, 0x3f500822, 0x0491dc6b },
+	},
+	{ 0x5000, {
+			  0x0110a442, 0x0110a442, 0x0551545e,
+			  0x0140b045, 0x00e0983f, 0x0531585f,
+			  0x0160c047, 0x00c08c3c, 0x0511645e,
+			  0x0190cc4a, 0x00908039, 0x04f1685f,
+			  0x01c0dc4c, 0x00707436, 0x04d1705e,
+			  0x0200e850, 0x00506833, 0x04b1785b,
+			  0x0230f453, 0x00305c30, 0x0491805a,
+			  0x02710056, 0x0010542d, 0x04718059 },
+	},
+	{ 0x6000, {
+			  0x01c0bc40, 0x01c0bc40, 0x04c13052,
+			  0x01e0c841, 0x01a0b43d, 0x04c13851,
+			  0x0210cc44, 0x0180a83c, 0x04a13453,
+			  0x0230d845, 0x0160a03a, 0x04913c52,
+			  0x0260e047, 0x01409838, 0x04714052,
+			  0x0280ec49, 0x01208c37, 0x04514c50,
+			  0x02b0f44b, 0x01008435, 0x04414c50,
+			  0x02d1004c, 0x00e07c33, 0x0431544f },
+	},
+	{ 0x7000, {
+			  0x0230c83e, 0x0230c83e, 0x04711c4c,
+			  0x0250d03f, 0x0210c43c, 0x0471204b,
+			  0x0270d840, 0x0200b83c, 0x0451244b,
+			  0x0290dc42, 0x01e0b43a, 0x0441244c,
+			  0x02b0e443, 0x01c0b038, 0x0441284b,
+			  0x02d0ec44, 0x01b0a438, 0x0421304a,
+			  0x02f0f445, 0x0190a036, 0x04213449,
+			  0x0310f847, 0x01709c34, 0x04213848 },
+	},
+	{ 0x8000, {
+			  0x0280d03d, 0x0280d03d, 0x04310c48,
+			  0x02a0d43e, 0x0270c83c, 0x04311047,
+			  0x02b0dc3e, 0x0250c83a, 0x04311447,
+			  0x02d0e040, 0x0240c03a, 0x04211446,
+			  0x02e0e840, 0x0220bc39, 0x04111847,
+			  0x0300e842, 0x0210b438, 0x04012445,
+			  0x0310f043, 0x0200b037, 0x04012045,
+			  0x0330f444, 0x01e0ac36, 0x03f12445 },
+	},
+	{ 0xefff, {
+			  0x0340dc3a, 0x0340dc3a, 0x03b0ec40,
+			  0x0340e03a, 0x0330e039, 0x03c0f03e,
+			  0x0350e03b, 0x0330dc39, 0x03c0ec3e,
+			  0x0350e43a, 0x0320dc38, 0x03c0f43e,
+			  0x0360e43b, 0x0320d839, 0x03b0f03e,
+			  0x0360e83b, 0x0310d838, 0x03c0fc3b,
+			  0x0370e83b, 0x0310d439, 0x03a0f83d,
+			  0x0370e83c, 0x0300d438, 0x03b0fc3c },
+	}
+};
+
+static void rvin_set_coeff(struct rvin_dev *vin, unsigned short xs)
+{
+	int i;
+	const struct vin_coeff *p_prev_set = NULL;
+	const struct vin_coeff *p_set = NULL;
+
+	/* Look for suitable coefficient values */
+	for (i = 0; i < ARRAY_SIZE(vin_coeff_set); i++) {
+		p_prev_set = p_set;
+		p_set = &vin_coeff_set[i];
+
+		if (xs < p_set->xs_value)
+			break;
+	}
+
+	/* Use previous value if its XS value is closer */
+	if (p_prev_set && p_set &&
+	    xs - p_prev_set->xs_value < p_set->xs_value - xs)
+		p_set = p_prev_set;
+
+	/* Set coefficient registers */
+	rvin_write(vin, p_set->coeff_set[0], VNC1A_REG);
+	rvin_write(vin, p_set->coeff_set[1], VNC1B_REG);
+	rvin_write(vin, p_set->coeff_set[2], VNC1C_REG);
+
+	rvin_write(vin, p_set->coeff_set[3], VNC2A_REG);
+	rvin_write(vin, p_set->coeff_set[4], VNC2B_REG);
+	rvin_write(vin, p_set->coeff_set[5], VNC2C_REG);
+
+	rvin_write(vin, p_set->coeff_set[6], VNC3A_REG);
+	rvin_write(vin, p_set->coeff_set[7], VNC3B_REG);
+	rvin_write(vin, p_set->coeff_set[8], VNC3C_REG);
+
+	rvin_write(vin, p_set->coeff_set[9], VNC4A_REG);
+	rvin_write(vin, p_set->coeff_set[10], VNC4B_REG);
+	rvin_write(vin, p_set->coeff_set[11], VNC4C_REG);
+
+	rvin_write(vin, p_set->coeff_set[12], VNC5A_REG);
+	rvin_write(vin, p_set->coeff_set[13], VNC5B_REG);
+	rvin_write(vin, p_set->coeff_set[14], VNC5C_REG);
+
+	rvin_write(vin, p_set->coeff_set[15], VNC6A_REG);
+	rvin_write(vin, p_set->coeff_set[16], VNC6B_REG);
+	rvin_write(vin, p_set->coeff_set[17], VNC6C_REG);
+
+	rvin_write(vin, p_set->coeff_set[18], VNC7A_REG);
+	rvin_write(vin, p_set->coeff_set[19], VNC7B_REG);
+	rvin_write(vin, p_set->coeff_set[20], VNC7C_REG);
+
+	rvin_write(vin, p_set->coeff_set[21], VNC8A_REG);
+	rvin_write(vin, p_set->coeff_set[22], VNC8B_REG);
+	rvin_write(vin, p_set->coeff_set[23], VNC8C_REG);
+}
+
+void rvin_crop_scale_comp(struct rvin_dev *vin)
+{
+	u32 xs, ys;
+
+	/* Set Start/End Pixel/Line Pre-Clip */
+	rvin_write(vin, vin->crop.left, VNSPPRC_REG);
+	rvin_write(vin, vin->crop.left + vin->crop.width - 1, VNEPPRC_REG);
+	switch (vin->format.field) {
+	case V4L2_FIELD_INTERLACED:
+	case V4L2_FIELD_INTERLACED_TB:
+	case V4L2_FIELD_INTERLACED_BT:
+		rvin_write(vin, vin->crop.top / 2, VNSLPRC_REG);
+		rvin_write(vin, (vin->crop.top + vin->crop.height) / 2 - 1,
+			   VNELPRC_REG);
+		break;
+	default:
+		rvin_write(vin, vin->crop.top, VNSLPRC_REG);
+		rvin_write(vin, vin->crop.top + vin->crop.height - 1,
+			   VNELPRC_REG);
+		break;
+	}
+
+	/* Set scaling coefficient */
+	ys = 0;
+	if (vin->crop.height != vin->compose.height)
+		ys = (4096 * vin->crop.height) / vin->compose.height;
+	rvin_write(vin, ys, VNYS_REG);
+
+	xs = 0;
+	if (vin->crop.width != vin->compose.width)
+		xs = (4096 * vin->crop.width) / vin->compose.width;
+
+	/* Horizontal upscaling is up to double size */
+	if (xs > 0 && xs < 2048)
+		xs = 2048;
+
+	rvin_write(vin, xs, VNXS_REG);
+
+	/* Horizontal upscaling is done out by scaling down from double size */
+	if (xs < 4096)
+		xs *= 2;
+
+	rvin_set_coeff(vin, xs);
+
+	/* Set Start/End Pixel/Line Post-Clip */
+	rvin_write(vin, 0, VNSPPOC_REG);
+	rvin_write(vin, 0, VNSLPOC_REG);
+	rvin_write(vin, vin->format.width - 1, VNEPPOC_REG);
+	switch (vin->format.field) {
+	case V4L2_FIELD_INTERLACED:
+	case V4L2_FIELD_INTERLACED_TB:
+	case V4L2_FIELD_INTERLACED_BT:
+		rvin_write(vin, vin->format.height / 2 - 1, VNELPOC_REG);
+		break;
+	default:
+		rvin_write(vin, vin->format.height - 1, VNELPOC_REG);
+		break;
+	}
+
+	if (vin->format.pixelformat == V4L2_PIX_FMT_NV16)
+		rvin_write(vin, ALIGN(vin->format.width, 0x20), VNIS_REG);
+	else
+		rvin_write(vin, ALIGN(vin->format.width, 0x10), VNIS_REG);
+
+	vin_dbg(vin,
+		"Pre-Clip: %ux%u@%u:%u YS: %d XS: %d Post-Clip: %ux%u@%u:%u\n",
+		vin->crop.width, vin->crop.height, vin->crop.left,
+		vin->crop.top, ys, xs, vin->format.width, vin->format.height,
+		0, 0);
+}
+
+void rvin_scale_try(struct rvin_dev *vin, struct v4l2_pix_format *pix,
+		    u32 width, u32 height)
+{
+	/* All VIN channels on Gen2 have scalers */
+	pix->width = width;
+	pix->height = height;
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA Functions
+ */
+
+#define RVIN_TIMEOUT_MS 100
+#define RVIN_RETRIES 10
+
+struct rvin_buffer {
+	struct vb2_v4l2_buffer vb;
+	struct list_head list;
+};
+
+#define to_buf_list(vb2_buffer) (&container_of(vb2_buffer, \
+					       struct rvin_buffer, \
+					       vb)->list)
+
+/* Moves a buffer from the queue to the HW slots */
+static bool rvin_fill_hw_slot(struct rvin_dev *vin, int slot)
+{
+	struct rvin_buffer *buf;
+	struct vb2_v4l2_buffer *vbuf;
+	dma_addr_t phys_addr_top;
+
+	if (vin->queue_buf[slot] != NULL)
+		return true;
+
+	if (list_empty(&vin->buf_list))
+		return false;
+
+	vin_dbg(vin, "Filling HW slot: %d\n", slot);
+
+	/* Keep track of buffer we give to HW */
+	buf = list_entry(vin->buf_list.next, struct rvin_buffer, list);
+	vbuf = &buf->vb;
+	list_del_init(to_buf_list(vbuf));
+	vin->queue_buf[slot] = vbuf;
+
+	/* Setup DMA */
+	phys_addr_top = vb2_dma_contig_plane_dma_addr(&vbuf->vb2_buf, 0);
+	rvin_set_slot_addr(vin, slot, phys_addr_top);
+
+	return true;
+}
+
+static bool rvin_fill_hw(struct rvin_dev *vin)
+{
+	int slot, limit;
+
+	limit = vin->continuous ? HW_BUFFER_NUM : 1;
+
+	for (slot = 0; slot < limit; slot++)
+		if (!rvin_fill_hw_slot(vin, slot))
+			return false;
+	return true;
+}
+
+static irqreturn_t rvin_irq(int irq, void *data)
+{
+	struct rvin_dev *vin = data;
+	u32 int_status;
+	int slot;
+	unsigned int sequence, handled = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vin->qlock, flags);
+
+	int_status = rvin_get_interrupt_status(vin);
+	if (!int_status)
+		goto done;
+
+	rvin_ack_interrupt(vin);
+	handled = 1;
+
+	/* Nothing to do if capture status is 'STOPPED' */
+	if (vin->state == STOPPED) {
+		vin_dbg(vin, "IRQ while state stopped\n");
+		goto done;
+	}
+
+	/* Nothing to do if capture status is 'STOPPING' */
+	if (vin->state == STOPPING) {
+		vin_dbg(vin, "IRQ while state stopping\n");
+		goto done;
+	}
+
+	/* Prepare for capture and update state */
+	slot = rvin_get_active_slot(vin);
+	sequence = vin->sequence++;
+
+	vin_dbg(vin, "IRQ %02d: %d\tbuf0: %c buf1: %c buf2: %c\tmore: %d\n",
+		sequence, slot,
+		slot == 0 ? 'x' : vin->queue_buf[0] != NULL ? '1' : '0',
+		slot == 1 ? 'x' : vin->queue_buf[1] != NULL ? '1' : '0',
+		slot == 2 ? 'x' : vin->queue_buf[2] != NULL ? '1' : '0',
+		!list_empty(&vin->buf_list));
+
+	/* HW have written to a slot that is not prepared we are in trouble */
+	if (WARN_ON((vin->queue_buf[slot] == NULL)))
+		goto done;
+
+	/* Capture frame */
+	vin->queue_buf[slot]->field = vin->format.field;
+	vin->queue_buf[slot]->sequence = sequence;
+	vin->queue_buf[slot]->vb2_buf.timestamp = ktime_get_ns();
+	vb2_buffer_done(&vin->queue_buf[slot]->vb2_buf, VB2_BUF_STATE_DONE);
+	vin->queue_buf[slot] = NULL;
+
+	/* Prepare for next frame */
+	if (!rvin_fill_hw(vin)) {
+
+		/*
+		 * Can't supply HW with new buffers fast enough. Halt
+		 * capture until more buffers are available.
+		 */
+		vin->state = STALLED;
+
+		/*
+		 * The continuous capturing requires an explicit stop
+		 * operation when there is no buffer to be set into
+		 * the VnMBm registers.
+		 */
+		if (vin->continuous) {
+			rvin_capture_off(vin);
+			vin_dbg(vin, "IRQ %02d: hw not ready stop\n", sequence);
+		}
+	} else {
+		/*
+		 * The single capturing requires an explicit capture
+		 * operation to fetch the next frame.
+		 */
+		if (!vin->continuous)
+			rvin_capture_on(vin);
+	}
+done:
+	spin_unlock_irqrestore(&vin->qlock, flags);
+
+	return IRQ_RETVAL(handled);
+}
+
+/* Need to hold qlock before calling */
+static void return_all_buffers(struct rvin_dev *vin,
+			       enum vb2_buffer_state state)
+{
+	struct rvin_buffer *buf, *node;
+	int i;
+
+	for (i = 0; i < HW_BUFFER_NUM; i++) {
+		if (vin->queue_buf[i]) {
+			vb2_buffer_done(&vin->queue_buf[i]->vb2_buf,
+					state);
+			vin->queue_buf[i] = NULL;
+		}
+	}
+
+	list_for_each_entry_safe(buf, node, &vin->buf_list, list) {
+		vb2_buffer_done(&buf->vb.vb2_buf, state);
+		list_del(&buf->list);
+	}
+}
+
+static int rvin_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers,
+			    unsigned int *nplanes, unsigned int sizes[],
+			    struct device *alloc_devs[])
+
+{
+	struct rvin_dev *vin = vb2_get_drv_priv(vq);
+
+	/* Make sure the image size is large enough. */
+	if (*nplanes)
+		return sizes[0] < vin->format.sizeimage ? -EINVAL : 0;
+
+	*nplanes = 1;
+	sizes[0] = vin->format.sizeimage;
+
+	return 0;
+};
+
+static int rvin_buffer_prepare(struct vb2_buffer *vb)
+{
+	struct rvin_dev *vin = vb2_get_drv_priv(vb->vb2_queue);
+	unsigned long size = vin->format.sizeimage;
+
+	if (vb2_plane_size(vb, 0) < size) {
+		vin_err(vin, "buffer too small (%lu < %lu)\n",
+			vb2_plane_size(vb, 0), size);
+		return -EINVAL;
+	}
+
+	vb2_set_plane_payload(vb, 0, size);
+
+	return 0;
+}
+
+static void rvin_buffer_queue(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct rvin_dev *vin = vb2_get_drv_priv(vb->vb2_queue);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vin->qlock, flags);
+
+	list_add_tail(to_buf_list(vbuf), &vin->buf_list);
+
+	/*
+	 * If capture is stalled add buffer to HW and restart
+	 * capturing if HW is ready to continue.
+	 */
+	if (vin->state == STALLED)
+		if (rvin_fill_hw(vin))
+			rvin_capture_on(vin);
+
+	spin_unlock_irqrestore(&vin->qlock, flags);
+}
+
+static int rvin_start_streaming(struct vb2_queue *vq, unsigned int count)
+{
+	struct rvin_dev *vin = vb2_get_drv_priv(vq);
+	struct v4l2_subdev *sd;
+	unsigned long flags;
+	int ret;
+
+	sd = vin_to_source(vin);
+	v4l2_subdev_call(sd, video, s_stream, 1);
+
+	spin_lock_irqsave(&vin->qlock, flags);
+
+	vin->state = RUNNING;
+	vin->sequence = 0;
+
+	/* Continuous capture requires more buffers then there are HW slots */
+	vin->continuous = count > HW_BUFFER_NUM;
+
+	/*
+	 * This should never happen but if we don't have enough
+	 * buffers for HW bail out
+	 */
+	if (!rvin_fill_hw(vin)) {
+		vin_err(vin, "HW not ready to start, not enough buffers available\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = rvin_capture_start(vin);
+out:
+	/* Return all buffers if something went wrong */
+	if (ret) {
+		return_all_buffers(vin, VB2_BUF_STATE_QUEUED);
+		v4l2_subdev_call(sd, video, s_stream, 0);
+	}
+
+	spin_unlock_irqrestore(&vin->qlock, flags);
+
+	return ret;
+}
+
+static void rvin_stop_streaming(struct vb2_queue *vq)
+{
+	struct rvin_dev *vin = vb2_get_drv_priv(vq);
+	struct v4l2_subdev *sd;
+	unsigned long flags;
+	int retries = 0;
+
+	spin_lock_irqsave(&vin->qlock, flags);
+
+	vin->state = STOPPING;
+
+	/* Wait for streaming to stop */
+	while (retries++ < RVIN_RETRIES) {
+
+		rvin_capture_stop(vin);
+
+		/* Check if HW is stopped */
+		if (!rvin_capture_active(vin)) {
+			vin->state = STOPPED;
+			break;
+		}
+
+		spin_unlock_irqrestore(&vin->qlock, flags);
+		msleep(RVIN_TIMEOUT_MS);
+		spin_lock_irqsave(&vin->qlock, flags);
+	}
+
+	if (vin->state != STOPPED) {
+		/*
+		 * If this happens something have gone horribly wrong.
+		 * Set state to stopped to prevent the interrupt handler
+		 * to make things worse...
+		 */
+		vin_err(vin, "Failed stop HW, something is seriously broken\n");
+		vin->state = STOPPED;
+	}
+
+	/* Release all active buffers */
+	return_all_buffers(vin, VB2_BUF_STATE_ERROR);
+
+	spin_unlock_irqrestore(&vin->qlock, flags);
+
+	sd = vin_to_source(vin);
+	v4l2_subdev_call(sd, video, s_stream, 0);
+
+	/* disable interrupts */
+	rvin_disable_interrupts(vin);
+}
+
+static struct vb2_ops rvin_qops = {
+	.queue_setup		= rvin_queue_setup,
+	.buf_prepare		= rvin_buffer_prepare,
+	.buf_queue		= rvin_buffer_queue,
+	.start_streaming	= rvin_start_streaming,
+	.stop_streaming		= rvin_stop_streaming,
+	.wait_prepare		= vb2_ops_wait_prepare,
+	.wait_finish		= vb2_ops_wait_finish,
+};
+
+void rvin_dma_remove(struct rvin_dev *vin)
+{
+	mutex_destroy(&vin->lock);
+
+	v4l2_device_unregister(&vin->v4l2_dev);
+}
+
+int rvin_dma_probe(struct rvin_dev *vin, int irq)
+{
+	struct vb2_queue *q = &vin->queue;
+	int i, ret;
+
+	/* Initialize the top-level structure */
+	ret = v4l2_device_register(vin->dev, &vin->v4l2_dev);
+	if (ret)
+		return ret;
+
+	mutex_init(&vin->lock);
+	INIT_LIST_HEAD(&vin->buf_list);
+
+	spin_lock_init(&vin->qlock);
+
+	vin->state = STOPPED;
+
+	for (i = 0; i < HW_BUFFER_NUM; i++)
+		vin->queue_buf[i] = NULL;
+
+	/* buffer queue */
+	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	q->io_modes = VB2_MMAP | VB2_READ | VB2_DMABUF;
+	q->lock = &vin->lock;
+	q->drv_priv = vin;
+	q->buf_struct_size = sizeof(struct rvin_buffer);
+	q->ops = &rvin_qops;
+	q->mem_ops = &vb2_dma_contig_memops;
+	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->min_buffers_needed = 2;
+	q->dev = vin->dev;
+
+	ret = vb2_queue_init(q);
+	if (ret < 0) {
+		vin_err(vin, "failed to initialize VB2 queue\n");
+		goto error;
+	}
+
+	/* irq */
+	ret = devm_request_irq(vin->dev, irq, rvin_irq, IRQF_SHARED,
+			       KBUILD_MODNAME, vin);
+	if (ret) {
+		vin_err(vin, "failed to request irq\n");
+		goto error;
+	}
+
+	return 0;
+error:
+	rvin_dma_remove(vin);
+
+	return ret;
+}
diff --git a/drivers/media/platform/rcar-vin/rcar-v4l2.c b/drivers/media/platform/rcar-vin/rcar-v4l2.c
new file mode 100644
index 000000000000..10a5c107e8b9
--- /dev/null
+++ b/drivers/media/platform/rcar-vin/rcar-v4l2.c
@@ -0,0 +1,874 @@
+/*
+ * Driver for Renesas R-Car VIN
+ *
+ * Copyright (C) 2016 Renesas Electronics Corp.
+ * Copyright (C) 2011-2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Cogent Embedded, Inc., <source@cogentembedded.com>
+ * Copyright (C) 2008 Magnus Damm
+ *
+ * Based on the soc-camera rcar_vin driver
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/pm_runtime.h>
+
+#include <media/v4l2-event.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-rect.h>
+
+#include "rcar-vin.h"
+
+#define RVIN_DEFAULT_FORMAT	V4L2_PIX_FMT_YUYV
+#define RVIN_MAX_WIDTH		2048
+#define RVIN_MAX_HEIGHT		2048
+
+/* -----------------------------------------------------------------------------
+ * Format Conversions
+ */
+
+static const struct rvin_video_format rvin_formats[] = {
+	{
+		.fourcc			= V4L2_PIX_FMT_NV16,
+		.bpp			= 1,
+	},
+	{
+		.fourcc			= V4L2_PIX_FMT_YUYV,
+		.bpp			= 2,
+	},
+	{
+		.fourcc			= V4L2_PIX_FMT_UYVY,
+		.bpp			= 2,
+	},
+	{
+		.fourcc			= V4L2_PIX_FMT_RGB565,
+		.bpp			= 2,
+	},
+	{
+		.fourcc			= V4L2_PIX_FMT_XRGB555,
+		.bpp			= 2,
+	},
+	{
+		.fourcc			= V4L2_PIX_FMT_XBGR32,
+		.bpp			= 4,
+	},
+};
+
+const struct rvin_video_format *rvin_format_from_pixel(u32 pixelformat)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(rvin_formats); i++)
+		if (rvin_formats[i].fourcc == pixelformat)
+			return rvin_formats + i;
+
+	return NULL;
+}
+
+static u32 rvin_format_bytesperline(struct v4l2_pix_format *pix)
+{
+	const struct rvin_video_format *fmt;
+
+	fmt = rvin_format_from_pixel(pix->pixelformat);
+
+	if (WARN_ON(!fmt))
+		return -EINVAL;
+
+	return pix->width * fmt->bpp;
+}
+
+static u32 rvin_format_sizeimage(struct v4l2_pix_format *pix)
+{
+	if (pix->pixelformat == V4L2_PIX_FMT_NV16)
+		return pix->bytesperline * pix->height * 2;
+
+	return pix->bytesperline * pix->height;
+}
+
+/* -----------------------------------------------------------------------------
+ * V4L2
+ */
+
+static int __rvin_try_format_source(struct rvin_dev *vin,
+					u32 which,
+					struct v4l2_pix_format *pix,
+					struct rvin_source_fmt *source)
+{
+	struct v4l2_subdev *sd;
+	struct v4l2_subdev_pad_config *pad_cfg;
+	struct v4l2_subdev_format format = {
+		.which = which,
+	};
+	int ret;
+
+	sd = vin_to_source(vin);
+
+	v4l2_fill_mbus_format(&format.format, pix, vin->source.code);
+
+	pad_cfg = v4l2_subdev_alloc_pad_config(sd);
+	if (pad_cfg == NULL)
+		return -ENOMEM;
+
+	format.pad = vin->src_pad_idx;
+
+	ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, pad, set_fmt,
+					 pad_cfg, &format);
+	if (ret < 0)
+		goto cleanup;
+
+	v4l2_fill_pix_format(pix, &format.format);
+
+	source->width = pix->width;
+	source->height = pix->height;
+
+	vin_dbg(vin, "Source resolution: %ux%u\n", source->width,
+		source->height);
+
+cleanup:
+	v4l2_subdev_free_pad_config(pad_cfg);
+	return 0;
+}
+
+static int __rvin_try_format(struct rvin_dev *vin,
+				 u32 which,
+				 struct v4l2_pix_format *pix,
+				 struct rvin_source_fmt *source)
+{
+	const struct rvin_video_format *info;
+	u32 rwidth, rheight, walign;
+
+	/* Requested */
+	rwidth = pix->width;
+	rheight = pix->height;
+
+	/*
+	 * Retrieve format information and select the current format if the
+	 * requested format isn't supported.
+	 */
+	info = rvin_format_from_pixel(pix->pixelformat);
+	if (!info) {
+		vin_dbg(vin, "Format %x not found, keeping %x\n",
+			pix->pixelformat, vin->format.pixelformat);
+		*pix = vin->format;
+		pix->width = rwidth;
+		pix->height = rheight;
+	}
+
+	/* Always recalculate */
+	pix->bytesperline = 0;
+	pix->sizeimage = 0;
+
+	/* Limit to source capabilities */
+	__rvin_try_format_source(vin, which, pix, source);
+
+	/* If source can't match format try if VIN can scale */
+	if (source->width != rwidth || source->height != rheight)
+		rvin_scale_try(vin, pix, rwidth, rheight);
+
+	/* HW limit width to a multiple of 32 (2^5) for NV16 else 2 (2^1) */
+	walign = vin->format.pixelformat == V4L2_PIX_FMT_NV16 ? 5 : 1;
+
+	/* Limit to VIN capabilities */
+	v4l_bound_align_image(&pix->width, 2, RVIN_MAX_WIDTH, walign,
+			      &pix->height, 4, RVIN_MAX_HEIGHT, 2, 0);
+
+	switch (pix->field) {
+	case V4L2_FIELD_NONE:
+	case V4L2_FIELD_TOP:
+	case V4L2_FIELD_BOTTOM:
+	case V4L2_FIELD_INTERLACED_TB:
+	case V4L2_FIELD_INTERLACED_BT:
+	case V4L2_FIELD_INTERLACED:
+		break;
+	default:
+		pix->field = V4L2_FIELD_NONE;
+		break;
+	}
+
+	pix->bytesperline = max_t(u32, pix->bytesperline,
+				  rvin_format_bytesperline(pix));
+	pix->sizeimage = max_t(u32, pix->sizeimage,
+			       rvin_format_sizeimage(pix));
+
+	vin_dbg(vin, "Requested %ux%u Got %ux%u bpl: %d size: %d\n",
+		rwidth, rheight, pix->width, pix->height,
+		pix->bytesperline, pix->sizeimage);
+
+	return 0;
+}
+
+static int rvin_querycap(struct file *file, void *priv,
+			 struct v4l2_capability *cap)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+
+	strlcpy(cap->driver, KBUILD_MODNAME, sizeof(cap->driver));
+	strlcpy(cap->card, "R_Car_VIN", sizeof(cap->card));
+	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
+		 dev_name(vin->dev));
+	return 0;
+}
+
+static int rvin_try_fmt_vid_cap(struct file *file, void *priv,
+				struct v4l2_format *f)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct rvin_source_fmt source;
+
+	return __rvin_try_format(vin, V4L2_SUBDEV_FORMAT_TRY, &f->fmt.pix,
+				     &source);
+}
+
+static int rvin_s_fmt_vid_cap(struct file *file, void *priv,
+			      struct v4l2_format *f)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct rvin_source_fmt source;
+	int ret;
+
+	if (vb2_is_busy(&vin->queue))
+		return -EBUSY;
+
+	ret = __rvin_try_format(vin, V4L2_SUBDEV_FORMAT_ACTIVE, &f->fmt.pix,
+				    &source);
+	if (ret)
+		return ret;
+
+	vin->source.width = source.width;
+	vin->source.height = source.height;
+
+	vin->format = f->fmt.pix;
+
+	return 0;
+}
+
+static int rvin_g_fmt_vid_cap(struct file *file, void *priv,
+			      struct v4l2_format *f)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+
+	f->fmt.pix = vin->format;
+
+	return 0;
+}
+
+static int rvin_enum_fmt_vid_cap(struct file *file, void *priv,
+				 struct v4l2_fmtdesc *f)
+{
+	if (f->index >= ARRAY_SIZE(rvin_formats))
+		return -EINVAL;
+
+	f->pixelformat = rvin_formats[f->index].fourcc;
+
+	return 0;
+}
+
+static int rvin_g_selection(struct file *file, void *fh,
+			    struct v4l2_selection *s)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+
+	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	switch (s->target) {
+	case V4L2_SEL_TGT_CROP_BOUNDS:
+	case V4L2_SEL_TGT_CROP_DEFAULT:
+		s->r.left = s->r.top = 0;
+		s->r.width = vin->source.width;
+		s->r.height = vin->source.height;
+		break;
+	case V4L2_SEL_TGT_CROP:
+		s->r = vin->crop;
+		break;
+	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
+	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
+		s->r.left = s->r.top = 0;
+		s->r.width = vin->format.width;
+		s->r.height = vin->format.height;
+		break;
+	case V4L2_SEL_TGT_COMPOSE:
+		s->r = vin->compose;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rvin_s_selection(struct file *file, void *fh,
+			    struct v4l2_selection *s)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	const struct rvin_video_format *fmt;
+	struct v4l2_rect r = s->r;
+	struct v4l2_rect max_rect;
+	struct v4l2_rect min_rect = {
+		.width = 6,
+		.height = 2,
+	};
+
+	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	v4l2_rect_set_min_size(&r, &min_rect);
+
+	switch (s->target) {
+	case V4L2_SEL_TGT_CROP:
+		/* Can't crop outside of source input */
+		max_rect.top = max_rect.left = 0;
+		max_rect.width = vin->source.width;
+		max_rect.height = vin->source.height;
+		v4l2_rect_map_inside(&r, &max_rect);
+
+		v4l_bound_align_image(&r.width, 2, vin->source.width, 1,
+				      &r.height, 4, vin->source.height, 2, 0);
+
+		r.top  = clamp_t(s32, r.top, 0, vin->source.height - r.height);
+		r.left = clamp_t(s32, r.left, 0, vin->source.width - r.width);
+
+		vin->crop = s->r = r;
+
+		vin_dbg(vin, "Cropped %dx%d@%d:%d of %dx%d\n",
+			 r.width, r.height, r.left, r.top,
+			 vin->source.width, vin->source.height);
+		break;
+	case V4L2_SEL_TGT_COMPOSE:
+		/* Make sure compose rect fits inside output format */
+		max_rect.top = max_rect.left = 0;
+		max_rect.width = vin->format.width;
+		max_rect.height = vin->format.height;
+		v4l2_rect_map_inside(&r, &max_rect);
+
+		/*
+		 * Composing is done by adding a offset to the buffer address,
+		 * the HW wants this address to be aligned to HW_BUFFER_MASK.
+		 * Make sure the top and left values meets this requirement.
+		 */
+		while ((r.top * vin->format.bytesperline) & HW_BUFFER_MASK)
+			r.top--;
+
+		fmt = rvin_format_from_pixel(vin->format.pixelformat);
+		while ((r.left * fmt->bpp) & HW_BUFFER_MASK)
+			r.left--;
+
+		vin->compose = s->r = r;
+
+		vin_dbg(vin, "Compose %dx%d@%d:%d in %dx%d\n",
+			 r.width, r.height, r.left, r.top,
+			 vin->format.width, vin->format.height);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* HW supports modifying configuration while running */
+	rvin_crop_scale_comp(vin);
+
+	return 0;
+}
+
+static int rvin_cropcap(struct file *file, void *priv,
+			struct v4l2_cropcap *crop)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+
+	if (crop->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	return v4l2_subdev_call(sd, video, cropcap, crop);
+}
+
+static int rvin_enum_input(struct file *file, void *priv,
+			   struct v4l2_input *i)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+	int ret;
+
+	if (i->index != 0)
+		return -EINVAL;
+
+	ret = v4l2_subdev_call(sd, video, g_input_status, &i->status);
+	if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV)
+		return ret;
+
+	i->type = V4L2_INPUT_TYPE_CAMERA;
+	i->std = vin->vdev.tvnorms;
+
+	if (v4l2_subdev_has_op(sd, pad, dv_timings_cap))
+		i->capabilities = V4L2_IN_CAP_DV_TIMINGS;
+
+	strlcpy(i->name, "Camera", sizeof(i->name));
+
+	return 0;
+}
+
+static int rvin_g_input(struct file *file, void *priv, unsigned int *i)
+{
+	*i = 0;
+	return 0;
+}
+
+static int rvin_s_input(struct file *file, void *priv, unsigned int i)
+{
+	if (i > 0)
+		return -EINVAL;
+	return 0;
+}
+
+static int rvin_querystd(struct file *file, void *priv, v4l2_std_id *a)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+
+	return v4l2_subdev_call(sd, video, querystd, a);
+}
+
+static int rvin_s_std(struct file *file, void *priv, v4l2_std_id a)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+	struct v4l2_subdev_format fmt = {
+		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
+	};
+	struct v4l2_mbus_framefmt *mf = &fmt.format;
+	int ret = v4l2_subdev_call(sd, video, s_std, a);
+
+	if (ret < 0)
+		return ret;
+
+	/* Changing the standard will change the width/height */
+	ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &fmt);
+	if (ret) {
+		vin_err(vin, "Failed to get initial format\n");
+		return ret;
+	}
+
+	vin->format.width = mf->width;
+	vin->format.height = mf->height;
+
+	vin->crop.top = vin->crop.left = 0;
+	vin->crop.width = mf->width;
+	vin->crop.height = mf->height;
+
+	vin->compose.top = vin->compose.left = 0;
+	vin->compose.width = mf->width;
+	vin->compose.height = mf->height;
+
+	return 0;
+}
+
+static int rvin_g_std(struct file *file, void *priv, v4l2_std_id *a)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+
+	return v4l2_subdev_call(sd, video, g_std, a);
+}
+
+static int rvin_subscribe_event(struct v4l2_fh *fh,
+				const struct v4l2_event_subscription *sub)
+{
+	switch (sub->type) {
+	case V4L2_EVENT_SOURCE_CHANGE:
+		return v4l2_event_subscribe(fh, sub, 4, NULL);
+	}
+	return v4l2_ctrl_subscribe_event(fh, sub);
+}
+
+static int rvin_enum_dv_timings(struct file *file, void *priv_fh,
+				    struct v4l2_enum_dv_timings *timings)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+	int pad, ret;
+
+	pad = timings->pad;
+	timings->pad = vin->src_pad_idx;
+
+	ret = v4l2_subdev_call(sd, pad, enum_dv_timings, timings);
+
+	timings->pad = pad;
+
+	return ret;
+}
+
+static int rvin_s_dv_timings(struct file *file, void *priv_fh,
+				    struct v4l2_dv_timings *timings)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+	int err;
+
+	err = v4l2_subdev_call(sd,
+			video, s_dv_timings, timings);
+	if (!err) {
+		vin->source.width = timings->bt.width;
+		vin->source.height = timings->bt.height;
+		vin->format.width = timings->bt.width;
+		vin->format.height = timings->bt.height;
+	}
+	return err;
+}
+
+static int rvin_g_dv_timings(struct file *file, void *priv_fh,
+				    struct v4l2_dv_timings *timings)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+
+	return v4l2_subdev_call(sd,
+			video, g_dv_timings, timings);
+}
+
+static int rvin_query_dv_timings(struct file *file, void *priv_fh,
+				    struct v4l2_dv_timings *timings)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+
+	return v4l2_subdev_call(sd,
+			video, query_dv_timings, timings);
+}
+
+static int rvin_dv_timings_cap(struct file *file, void *priv_fh,
+				    struct v4l2_dv_timings_cap *cap)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	struct v4l2_subdev *sd = vin_to_source(vin);
+	int pad, ret;
+
+	pad = cap->pad;
+	cap->pad = vin->src_pad_idx;
+
+	ret = v4l2_subdev_call(sd, pad, dv_timings_cap, cap);
+
+	cap->pad = pad;
+
+	return ret;
+}
+
+static const struct v4l2_ioctl_ops rvin_ioctl_ops = {
+	.vidioc_querycap		= rvin_querycap,
+	.vidioc_try_fmt_vid_cap		= rvin_try_fmt_vid_cap,
+	.vidioc_g_fmt_vid_cap		= rvin_g_fmt_vid_cap,
+	.vidioc_s_fmt_vid_cap		= rvin_s_fmt_vid_cap,
+	.vidioc_enum_fmt_vid_cap	= rvin_enum_fmt_vid_cap,
+
+	.vidioc_g_selection		= rvin_g_selection,
+	.vidioc_s_selection		= rvin_s_selection,
+
+	.vidioc_cropcap			= rvin_cropcap,
+
+	.vidioc_enum_input		= rvin_enum_input,
+	.vidioc_g_input			= rvin_g_input,
+	.vidioc_s_input			= rvin_s_input,
+
+	.vidioc_dv_timings_cap		= rvin_dv_timings_cap,
+	.vidioc_enum_dv_timings		= rvin_enum_dv_timings,
+	.vidioc_g_dv_timings		= rvin_g_dv_timings,
+	.vidioc_s_dv_timings		= rvin_s_dv_timings,
+	.vidioc_query_dv_timings	= rvin_query_dv_timings,
+
+	.vidioc_querystd		= rvin_querystd,
+	.vidioc_g_std			= rvin_g_std,
+	.vidioc_s_std			= rvin_s_std,
+
+	.vidioc_reqbufs			= vb2_ioctl_reqbufs,
+	.vidioc_create_bufs		= vb2_ioctl_create_bufs,
+	.vidioc_querybuf		= vb2_ioctl_querybuf,
+	.vidioc_qbuf			= vb2_ioctl_qbuf,
+	.vidioc_dqbuf			= vb2_ioctl_dqbuf,
+	.vidioc_expbuf			= vb2_ioctl_expbuf,
+	.vidioc_prepare_buf		= vb2_ioctl_prepare_buf,
+	.vidioc_streamon		= vb2_ioctl_streamon,
+	.vidioc_streamoff		= vb2_ioctl_streamoff,
+
+	.vidioc_log_status		= v4l2_ctrl_log_status,
+	.vidioc_subscribe_event		= rvin_subscribe_event,
+	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
+};
+
+/* -----------------------------------------------------------------------------
+ * File Operations
+ */
+
+static int rvin_power_on(struct rvin_dev *vin)
+{
+	int ret;
+	struct v4l2_subdev *sd = vin_to_source(vin);
+
+	pm_runtime_get_sync(vin->v4l2_dev.dev);
+
+	ret = v4l2_subdev_call(sd, core, s_power, 1);
+	if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV)
+		return ret;
+	return 0;
+}
+
+static int rvin_power_off(struct rvin_dev *vin)
+{
+	int ret;
+	struct v4l2_subdev *sd = vin_to_source(vin);
+
+	ret = v4l2_subdev_call(sd, core, s_power, 0);
+
+	pm_runtime_put(vin->v4l2_dev.dev);
+
+	if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV)
+		return ret;
+
+	return 0;
+}
+
+static int rvin_initialize_device(struct file *file)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	int ret;
+
+	struct v4l2_format f = {
+		.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+		.fmt.pix = {
+			.width		= vin->format.width,
+			.height		= vin->format.height,
+			.field		= vin->format.field,
+			.colorspace	= vin->format.colorspace,
+			.pixelformat	= vin->format.pixelformat,
+		},
+	};
+
+	ret = rvin_power_on(vin);
+	if (ret < 0)
+		return ret;
+
+	pm_runtime_enable(&vin->vdev.dev);
+	ret = pm_runtime_resume(&vin->vdev.dev);
+	if (ret < 0 && ret != -ENOSYS)
+		goto eresume;
+
+	/*
+	 * Try to configure with default parameters. Notice: this is the
+	 * very first open, so, we cannot race against other calls,
+	 * apart from someone else calling open() simultaneously, but
+	 * .host_lock is protecting us against it.
+	 */
+	ret = rvin_s_fmt_vid_cap(file, NULL, &f);
+	if (ret < 0)
+		goto esfmt;
+
+	v4l2_ctrl_handler_setup(&vin->ctrl_handler);
+
+	return 0;
+esfmt:
+	pm_runtime_disable(&vin->vdev.dev);
+eresume:
+	rvin_power_off(vin);
+
+	return ret;
+}
+
+static int rvin_open(struct file *file)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	int ret;
+
+	mutex_lock(&vin->lock);
+
+	file->private_data = vin;
+
+	ret = v4l2_fh_open(file);
+	if (ret)
+		goto unlock;
+
+	if (!v4l2_fh_is_singular_file(file))
+		goto unlock;
+
+	if (rvin_initialize_device(file)) {
+		v4l2_fh_release(file);
+		ret = -ENODEV;
+	}
+
+unlock:
+	mutex_unlock(&vin->lock);
+	return ret;
+}
+
+static int rvin_release(struct file *file)
+{
+	struct rvin_dev *vin = video_drvdata(file);
+	bool fh_singular;
+	int ret;
+
+	mutex_lock(&vin->lock);
+
+	/* Save the singular status before we call the clean-up helper */
+	fh_singular = v4l2_fh_is_singular_file(file);
+
+	/* the release helper will cleanup any on-going streaming */
+	ret = _vb2_fop_release(file, NULL);
+
+	/*
+	 * If this was the last open file.
+	 * Then de-initialize hw module.
+	 */
+	if (fh_singular) {
+		pm_runtime_suspend(&vin->vdev.dev);
+		pm_runtime_disable(&vin->vdev.dev);
+		rvin_power_off(vin);
+	}
+
+	mutex_unlock(&vin->lock);
+
+	return ret;
+}
+
+static const struct v4l2_file_operations rvin_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= video_ioctl2,
+	.open		= rvin_open,
+	.release	= rvin_release,
+	.poll		= vb2_fop_poll,
+	.mmap		= vb2_fop_mmap,
+	.read		= vb2_fop_read,
+};
+
+void rvin_v4l2_remove(struct rvin_dev *vin)
+{
+	v4l2_info(&vin->v4l2_dev, "Removing %s\n",
+		  video_device_node_name(&vin->vdev));
+
+	/* Checks internaly if handlers have been init or not */
+	v4l2_ctrl_handler_free(&vin->ctrl_handler);
+
+	/* Checks internaly if vdev have been init or not */
+	video_unregister_device(&vin->vdev);
+}
+
+static void rvin_notify(struct v4l2_subdev *sd,
+			unsigned int notification, void *arg)
+{
+	struct rvin_dev *vin =
+		container_of(sd->v4l2_dev, struct rvin_dev, v4l2_dev);
+
+	switch (notification) {
+	case V4L2_DEVICE_NOTIFY_EVENT:
+		v4l2_event_queue(&vin->vdev, arg);
+		break;
+	default:
+		break;
+	}
+}
+
+int rvin_v4l2_probe(struct rvin_dev *vin)
+{
+	struct v4l2_subdev_format fmt = {
+		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
+	};
+	struct v4l2_mbus_framefmt *mf = &fmt.format;
+	struct video_device *vdev = &vin->vdev;
+	struct v4l2_subdev *sd = vin_to_source(vin);
+#if defined(CONFIG_MEDIA_CONTROLLER)
+	int pad_idx;
+#endif
+	int ret;
+
+	v4l2_set_subdev_hostdata(sd, vin);
+
+	vin->v4l2_dev.notify = rvin_notify;
+
+	ret = v4l2_subdev_call(sd, video, g_tvnorms, &vin->vdev.tvnorms);
+	if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV)
+		return ret;
+
+	if (vin->vdev.tvnorms == 0) {
+		/* Disable the STD API if there are no tvnorms defined */
+		v4l2_disable_ioctl(&vin->vdev, VIDIOC_G_STD);
+		v4l2_disable_ioctl(&vin->vdev, VIDIOC_S_STD);
+		v4l2_disable_ioctl(&vin->vdev, VIDIOC_QUERYSTD);
+		v4l2_disable_ioctl(&vin->vdev, VIDIOC_ENUMSTD);
+	}
+
+	/* Add the controls */
+	/*
+	 * Currently the subdev with the largest number of controls (13) is
+	 * ov6550. So let's pick 16 as a hint for the control handler. Note
+	 * that this is a hint only: too large and you waste some memory, too
+	 * small and there is a (very) small performance hit when looking up
+	 * controls in the internal hash.
+	 */
+	ret = v4l2_ctrl_handler_init(&vin->ctrl_handler, 16);
+	if (ret < 0)
+		return ret;
+
+	ret = v4l2_ctrl_add_handler(&vin->ctrl_handler, sd->ctrl_handler, NULL);
+	if (ret < 0)
+		return ret;
+
+	/* video node */
+	vdev->fops = &rvin_fops;
+	vdev->v4l2_dev = &vin->v4l2_dev;
+	vdev->queue = &vin->queue;
+	strlcpy(vdev->name, KBUILD_MODNAME, sizeof(vdev->name));
+	vdev->release = video_device_release_empty;
+	vdev->ioctl_ops = &rvin_ioctl_ops;
+	vdev->lock = &vin->lock;
+	vdev->ctrl_handler = &vin->ctrl_handler;
+	vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING |
+		V4L2_CAP_READWRITE;
+
+	vin->src_pad_idx = 0;
+#if defined(CONFIG_MEDIA_CONTROLLER)
+	for (pad_idx = 0; pad_idx < sd->entity.num_pads; pad_idx++)
+		if (sd->entity.pads[pad_idx].flags
+				== MEDIA_PAD_FL_SOURCE)
+			break;
+	if (pad_idx >= sd->entity.num_pads)
+		return -EINVAL;
+
+	vin->src_pad_idx = pad_idx;
+#endif
+	fmt.pad = vin->src_pad_idx;
+
+	/* Try to improve our guess of a reasonable window format */
+	ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &fmt);
+	if (ret) {
+		vin_err(vin, "Failed to get initial format\n");
+		return ret;
+	}
+
+	/* Set default format */
+	vin->format.width	= mf->width;
+	vin->format.height	= mf->height;
+	vin->format.colorspace	= mf->colorspace;
+	vin->format.field	= mf->field;
+	vin->format.pixelformat	= RVIN_DEFAULT_FORMAT;
+
+
+	/* Set initial crop and compose */
+	vin->crop.top = vin->crop.left = 0;
+	vin->crop.width = mf->width;
+	vin->crop.height = mf->height;
+
+	vin->compose.top = vin->compose.left = 0;
+	vin->compose.width = mf->width;
+	vin->compose.height = mf->height;
+
+	ret = video_register_device(&vin->vdev, VFL_TYPE_GRABBER, -1);
+	if (ret) {
+		vin_err(vin, "Failed to register video device\n");
+		return ret;
+	}
+
+	video_set_drvdata(&vin->vdev, vin);
+
+	v4l2_info(&vin->v4l2_dev, "Device registered as %s\n",
+		  video_device_node_name(&vin->vdev));
+
+	return ret;
+}
diff --git a/drivers/media/platform/rcar-vin/rcar-vin.h b/drivers/media/platform/rcar-vin/rcar-vin.h
new file mode 100644
index 000000000000..31ad39a39937
--- /dev/null
+++ b/drivers/media/platform/rcar-vin/rcar-vin.h
@@ -0,0 +1,163 @@
+/*
+ * Driver for Renesas R-Car VIN
+ *
+ * Copyright (C) 2016 Renesas Electronics Corp.
+ * Copyright (C) 2011-2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Cogent Embedded, Inc., <source@cogentembedded.com>
+ * Copyright (C) 2008 Magnus Damm
+ *
+ * Based on the soc-camera rcar_vin driver
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __RCAR_VIN__
+#define __RCAR_VIN__
+
+#include <media/v4l2-async.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-dev.h>
+#include <media/v4l2-device.h>
+#include <media/videobuf2-v4l2.h>
+
+/* Number of HW buffers */
+#define HW_BUFFER_NUM 3
+
+/* Address alignment mask for HW buffers */
+#define HW_BUFFER_MASK 0x7f
+
+enum chip_id {
+	RCAR_GEN2,
+	RCAR_H1,
+	RCAR_M1,
+};
+
+/**
+ * STOPPED  - No operation in progress
+ * RUNNING  - Operation in progress have buffers
+ * STALLED  - No operation in progress have no buffers
+ * STOPPING - Stopping operation
+ */
+enum rvin_dma_state {
+	STOPPED = 0,
+	RUNNING,
+	STALLED,
+	STOPPING,
+};
+
+/**
+ * struct rvin_source_fmt - Source information
+ * @code:	Media bus format from source
+ * @width:	Width from source
+ * @height:	Height from source
+ */
+struct rvin_source_fmt {
+	u32 code;
+	u32 width;
+	u32 height;
+};
+
+/**
+ * struct rvin_video_format - Data format stored in memory
+ * @fourcc:	Pixelformat
+ * @bpp:	Bytes per pixel
+ */
+struct rvin_video_format {
+	u32 fourcc;
+	u8 bpp;
+};
+
+struct rvin_graph_entity {
+	struct device_node *node;
+	struct media_entity *entity;
+
+	struct v4l2_async_subdev asd;
+	struct v4l2_subdev *subdev;
+};
+
+/**
+ * struct rvin_dev - Renesas VIN device structure
+ * @dev:		(OF) device
+ * @base:		device I/O register space remapped to virtual memory
+ * @chip:		type of VIN chip
+ * @mbus_cfg		media bus configuration
+ *
+ * @vdev:		V4L2 video device associated with VIN
+ * @v4l2_dev:		V4L2 device
+ * @src_pad_idx:	source pad index for media controller drivers
+ * @ctrl_handler:	V4L2 control handler
+ * @notifier:		V4L2 asynchronous subdevs notifier
+ * @entity:		entity in the DT for subdevice
+ *
+ * @lock:		protects @queue
+ * @queue:		vb2 buffers queue
+ *
+ * @qlock:		protects @queue_buf, @buf_list, @continuous, @sequence
+ *			@state
+ * @queue_buf:		Keeps track of buffers given to HW slot
+ * @buf_list:		list of queued buffers
+ * @continuous:		tracks if active operation is continuous or single mode
+ * @sequence:		V4L2 buffers sequence number
+ * @state:		keeps track of operation state
+ *
+ * @source:		active format from the video source
+ * @format:		active V4L2 pixel format
+ *
+ * @crop:		active cropping
+ * @compose:		active composing
+ */
+struct rvin_dev {
+	struct device *dev;
+	void __iomem *base;
+	enum chip_id chip;
+	struct v4l2_mbus_config mbus_cfg;
+
+	struct video_device vdev;
+	struct v4l2_device v4l2_dev;
+	int src_pad_idx;
+	struct v4l2_ctrl_handler ctrl_handler;
+	struct v4l2_async_notifier notifier;
+	struct rvin_graph_entity entity;
+
+	struct mutex lock;
+	struct vb2_queue queue;
+
+	spinlock_t qlock;
+	struct vb2_v4l2_buffer *queue_buf[HW_BUFFER_NUM];
+	struct list_head buf_list;
+	bool continuous;
+	unsigned int sequence;
+	enum rvin_dma_state state;
+
+	struct rvin_source_fmt source;
+	struct v4l2_pix_format format;
+
+	struct v4l2_rect crop;
+	struct v4l2_rect compose;
+};
+
+#define vin_to_source(vin)		vin->entity.subdev
+
+/* Debug */
+#define vin_dbg(d, fmt, arg...)		dev_dbg(d->dev, fmt, ##arg)
+#define vin_info(d, fmt, arg...)	dev_info(d->dev, fmt, ##arg)
+#define vin_warn(d, fmt, arg...)	dev_warn(d->dev, fmt, ##arg)
+#define vin_err(d, fmt, arg...)		dev_err(d->dev, fmt, ##arg)
+
+int rvin_dma_probe(struct rvin_dev *vin, int irq);
+void rvin_dma_remove(struct rvin_dev *vin);
+
+int rvin_v4l2_probe(struct rvin_dev *vin);
+void rvin_v4l2_remove(struct rvin_dev *vin);
+
+const struct rvin_video_format *rvin_format_from_pixel(u32 pixelformat);
+
+/* Cropping, composing and scaling */
+void rvin_scale_try(struct rvin_dev *vin, struct v4l2_pix_format *pix,
+		    u32 width, u32 height);
+void rvin_crop_scale_comp(struct rvin_dev *vin);
+
+#endif
diff --git a/drivers/media/platform/rcar_jpu.c b/drivers/media/platform/rcar_jpu.c
index 552789a69c86..16782ceb29c3 100644
--- a/drivers/media/platform/rcar_jpu.c
+++ b/drivers/media/platform/rcar_jpu.c
@@ -203,7 +203,6 @@
  * @irq: JPEG IP irq
  * @clk: JPEG IP clock
  * @dev: JPEG IP struct device
- * @alloc_ctx: videobuf2 memory allocator's context
  * @ref_count: reference counter
  */
 struct jpu {
@@ -220,7 +219,6 @@ struct jpu {
 	unsigned int		irq;
 	struct clk		*clk;
 	struct device		*dev;
-	void			*alloc_ctx;
 	int			ref_count;
 };
 
@@ -1016,7 +1014,7 @@ error_free:
  */
 static int jpu_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct jpu_ctx *ctx = vb2_get_drv_priv(vq);
 	struct jpu_q_data *q_data;
@@ -1033,17 +1031,14 @@ static int jpu_queue_setup(struct vb2_queue *vq,
 
 			if (sizes[i] < q_size)
 				return -EINVAL;
-			alloc_ctxs[i] = ctx->jpu->alloc_ctx;
 		}
 		return 0;
 	}
 
 	*nplanes = q_data->format.num_planes;
 
-	for (i = 0; i < *nplanes; i++) {
+	for (i = 0; i < *nplanes; i++)
 		sizes[i] = q_data->format.plane_fmt[i].sizeimage;
-		alloc_ctxs[i] = ctx->jpu->alloc_ctx;
-	}
 
 	return 0;
 }
@@ -1214,6 +1209,7 @@ static int jpu_queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->mem_ops = &vb2_dma_contig_memops;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->lock = &ctx->jpu->mutex;
+	src_vq->dev = ctx->jpu->v4l2_dev.dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret)
@@ -1228,6 +1224,7 @@ static int jpu_queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->mem_ops = &vb2_dma_contig_memops;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->lock = &ctx->jpu->mutex;
+	dst_vq->dev = ctx->jpu->v4l2_dev.dev;
 
 	return vb2_queue_init(dst_vq);
 }
@@ -1676,13 +1673,6 @@ static int jpu_probe(struct platform_device *pdev)
 		goto device_register_rollback;
 	}
 
-	jpu->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(jpu->alloc_ctx)) {
-		v4l2_err(&jpu->v4l2_dev, "Failed to init memory allocator\n");
-		ret = PTR_ERR(jpu->alloc_ctx);
-		goto m2m_init_rollback;
-	}
-
 	/* fill in qantization and Huffman tables for encoder */
 	for (i = 0; i < JPU_MAX_QUALITY; i++)
 		jpu_generate_hdr(i, (unsigned char *)jpeg_hdrs[i]);
@@ -1699,7 +1689,7 @@ static int jpu_probe(struct platform_device *pdev)
 	ret = video_register_device(&jpu->vfd_encoder, VFL_TYPE_GRABBER, -1);
 	if (ret) {
 		v4l2_err(&jpu->v4l2_dev, "Failed to register video device\n");
-		goto vb2_allocator_rollback;
+		goto m2m_init_rollback;
 	}
 
 	video_set_drvdata(&jpu->vfd_encoder, jpu);
@@ -1732,9 +1722,6 @@ static int jpu_probe(struct platform_device *pdev)
 enc_vdev_register_rollback:
 	video_unregister_device(&jpu->vfd_encoder);
 
-vb2_allocator_rollback:
-	vb2_dma_contig_cleanup_ctx(jpu->alloc_ctx);
-
 m2m_init_rollback:
 	v4l2_m2m_release(jpu->m2m_dev);
 
@@ -1750,7 +1737,6 @@ static int jpu_remove(struct platform_device *pdev)
 
 	video_unregister_device(&jpu->vfd_decoder);
 	video_unregister_device(&jpu->vfd_encoder);
-	vb2_dma_contig_cleanup_ctx(jpu->alloc_ctx);
 	v4l2_m2m_release(jpu->m2m_dev);
 	v4l2_device_unregister(&jpu->v4l2_dev);
 
diff --git a/drivers/media/platform/s3c-camif/camif-capture.c b/drivers/media/platform/s3c-camif/camif-capture.c
index bd060ef5d1e1..0413a861a59a 100644
--- a/drivers/media/platform/s3c-camif/camif-capture.c
+++ b/drivers/media/platform/s3c-camif/camif-capture.c
@@ -437,10 +437,9 @@ static void stop_streaming(struct vb2_queue *vq)
 
 static int queue_setup(struct vb2_queue *vq,
 		       unsigned int *num_buffers, unsigned int *num_planes,
-		       unsigned int sizes[], void *allocators[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct camif_vp *vp = vb2_get_drv_priv(vq);
-	struct camif_dev *camif = vp->camif;
 	struct camif_frame *frame = &vp->out_frame;
 	const struct camif_fmt *fmt = vp->out_fmt;
 	unsigned int size;
@@ -449,7 +448,6 @@ static int queue_setup(struct vb2_queue *vq,
 		return -EINVAL;
 
 	size = (frame->f_width * frame->f_height * fmt->depth) / 8;
-	allocators[0] = camif->alloc_ctx;
 
 	if (*num_planes)
 		return sizes[0] < size ? -EINVAL : 0;
@@ -1138,6 +1136,7 @@ int s3c_camif_register_video_node(struct camif_dev *camif, int idx)
 	q->drv_priv = vp;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &vp->camif->lock;
+	q->dev = camif->v4l2_dev.dev;
 
 	ret = vb2_queue_init(q);
 	if (ret)
diff --git a/drivers/media/platform/s3c-camif/camif-core.c b/drivers/media/platform/s3c-camif/camif-core.c
index af237af204e2..ec4001970313 100644
--- a/drivers/media/platform/s3c-camif/camif-core.c
+++ b/drivers/media/platform/s3c-camif/camif-core.c
@@ -474,16 +474,9 @@ static int s3c_camif_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err_pm;
 
-	/* Initialize contiguous memory allocator */
-	camif->alloc_ctx = vb2_dma_contig_init_ctx(dev);
-	if (IS_ERR(camif->alloc_ctx)) {
-		ret = PTR_ERR(camif->alloc_ctx);
-		goto err_alloc;
-	}
-
 	ret = camif_media_dev_init(camif);
 	if (ret < 0)
-		goto err_mdev;
+		goto err_alloc;
 
 	ret = camif_register_sensor(camif);
 	if (ret < 0)
@@ -517,8 +510,6 @@ err_sens:
 	media_device_unregister(&camif->media_dev);
 	media_device_cleanup(&camif->media_dev);
 	camif_unregister_media_entities(camif);
-err_mdev:
-	vb2_dma_contig_cleanup_ctx(camif->alloc_ctx);
 err_alloc:
 	pm_runtime_put(dev);
 	pm_runtime_disable(dev);
diff --git a/drivers/media/platform/s3c-camif/camif-core.h b/drivers/media/platform/s3c-camif/camif-core.h
index 57cbc3d9725d..1f5c8c94ce89 100644
--- a/drivers/media/platform/s3c-camif/camif-core.h
+++ b/drivers/media/platform/s3c-camif/camif-core.h
@@ -254,7 +254,6 @@ struct camif_vp {
  * @ctrl_handler: v4l2 control handler (owned by @subdev)
  * @test_pattern: test pattern controls
  * @vp:           video path (DMA) description (codec/preview)
- * @alloc_ctx:    memory buffer allocator context
  * @variant:      variant information for this device
  * @dev:	  pointer to the CAMIF device struct
  * @pdata:	  a copy of the driver's platform data
@@ -291,7 +290,6 @@ struct camif_dev {
 	u8				colorfx_cr;
 
 	struct camif_vp			vp[CAMIF_VP_NUM];
-	struct vb2_alloc_ctx		*alloc_ctx;
 
 	const struct s3c_camif_variant	*variant;
 	struct device			*dev;
diff --git a/drivers/media/platform/s5p-g2d/g2d.c b/drivers/media/platform/s5p-g2d/g2d.c
index 612d1ea514f1..391dd7a7b362 100644
--- a/drivers/media/platform/s5p-g2d/g2d.c
+++ b/drivers/media/platform/s5p-g2d/g2d.c
@@ -103,7 +103,7 @@ static struct g2d_frame *get_frame(struct g2d_ctx *ctx,
 
 static int g2d_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct g2d_ctx *ctx = vb2_get_drv_priv(vq);
 	struct g2d_frame *f = get_frame(ctx, vq->type);
@@ -113,7 +113,6 @@ static int g2d_queue_setup(struct vb2_queue *vq,
 
 	sizes[0] = f->size;
 	*nplanes = 1;
-	alloc_ctxs[0] = ctx->dev->alloc_ctx;
 
 	if (*nbuffers == 0)
 		*nbuffers = 1;
@@ -159,6 +158,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->lock = &ctx->dev->mutex;
+	src_vq->dev = ctx->dev->v4l2_dev.dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret)
@@ -172,6 +172,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->lock = &ctx->dev->mutex;
+	dst_vq->dev = ctx->dev->v4l2_dev.dev;
 
 	return vb2_queue_init(dst_vq);
 }
@@ -681,15 +682,11 @@ static int g2d_probe(struct platform_device *pdev)
 		goto put_clk_gate;
 	}
 
-	dev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		ret = PTR_ERR(dev->alloc_ctx);
-		goto unprep_clk_gate;
-	}
+	vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32));
 
 	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
 	if (ret)
-		goto alloc_ctx_cleanup;
+		goto unprep_clk_gate;
 	vfd = video_device_alloc();
 	if (!vfd) {
 		v4l2_err(&dev->v4l2_dev, "Failed to allocate video device\n");
@@ -734,8 +731,6 @@ rel_vdev:
 	video_device_release(vfd);
 unreg_v4l2_dev:
 	v4l2_device_unregister(&dev->v4l2_dev);
-alloc_ctx_cleanup:
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
 unprep_clk_gate:
 	clk_unprepare(dev->gate);
 put_clk_gate:
@@ -756,7 +751,7 @@ static int g2d_remove(struct platform_device *pdev)
 	v4l2_m2m_release(dev->m2m_dev);
 	video_unregister_device(dev->vfd);
 	v4l2_device_unregister(&dev->v4l2_dev);
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
+	vb2_dma_contig_clear_max_seg_size(&pdev->dev);
 	clk_unprepare(dev->gate);
 	clk_put(dev->gate);
 	clk_unprepare(dev->clk);
diff --git a/drivers/media/platform/s5p-g2d/g2d.h b/drivers/media/platform/s5p-g2d/g2d.h
index e31df541aa62..dd812b557e87 100644
--- a/drivers/media/platform/s5p-g2d/g2d.h
+++ b/drivers/media/platform/s5p-g2d/g2d.h
@@ -25,7 +25,6 @@ struct g2d_dev {
 	struct mutex		mutex;
 	spinlock_t		ctrl_lock;
 	atomic_t		num_inst;
-	struct vb2_alloc_ctx	*alloc_ctx;
 	void __iomem		*regs;
 	struct clk		*clk;
 	struct clk		*gate;
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c
index caa19b408551..785e6936c881 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c
@@ -2436,7 +2436,7 @@ static struct v4l2_m2m_ops exynos4_jpeg_m2m_ops = {
 
 static int s5p_jpeg_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct s5p_jpeg_ctx *ctx = vb2_get_drv_priv(vq);
 	struct s5p_jpeg_q_data *q_data = NULL;
@@ -2457,7 +2457,6 @@ static int s5p_jpeg_queue_setup(struct vb2_queue *vq,
 	*nbuffers = count;
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = ctx->jpeg->alloc_ctx;
 
 	return 0;
 }
@@ -2563,6 +2562,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->mem_ops = &vb2_dma_contig_memops;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->lock = &ctx->jpeg->lock;
+	src_vq->dev = ctx->jpeg->dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret)
@@ -2576,6 +2576,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->mem_ops = &vb2_dma_contig_memops;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->lock = &ctx->jpeg->lock;
+	dst_vq->dev = ctx->jpeg->dev;
 
 	return vb2_queue_init(dst_vq);
 }
@@ -2843,19 +2844,14 @@ static int s5p_jpeg_probe(struct platform_device *pdev)
 		goto device_register_rollback;
 	}
 
-	jpeg->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(jpeg->alloc_ctx)) {
-		v4l2_err(&jpeg->v4l2_dev, "Failed to init memory allocator\n");
-		ret = PTR_ERR(jpeg->alloc_ctx);
-		goto m2m_init_rollback;
-	}
+	vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32));
 
 	/* JPEG encoder /dev/videoX node */
 	jpeg->vfd_encoder = video_device_alloc();
 	if (!jpeg->vfd_encoder) {
 		v4l2_err(&jpeg->v4l2_dev, "Failed to allocate video device\n");
 		ret = -ENOMEM;
-		goto vb2_allocator_rollback;
+		goto m2m_init_rollback;
 	}
 	snprintf(jpeg->vfd_encoder->name, sizeof(jpeg->vfd_encoder->name),
 				"%s-enc", S5P_JPEG_M2M_NAME);
@@ -2871,7 +2867,7 @@ static int s5p_jpeg_probe(struct platform_device *pdev)
 	if (ret) {
 		v4l2_err(&jpeg->v4l2_dev, "Failed to register video device\n");
 		video_device_release(jpeg->vfd_encoder);
-		goto vb2_allocator_rollback;
+		goto m2m_init_rollback;
 	}
 
 	video_set_drvdata(jpeg->vfd_encoder, jpeg);
@@ -2920,9 +2916,6 @@ static int s5p_jpeg_probe(struct platform_device *pdev)
 enc_vdev_register_rollback:
 	video_unregister_device(jpeg->vfd_encoder);
 
-vb2_allocator_rollback:
-	vb2_dma_contig_cleanup_ctx(jpeg->alloc_ctx);
-
 m2m_init_rollback:
 	v4l2_m2m_release(jpeg->m2m_dev);
 
@@ -2941,7 +2934,7 @@ static int s5p_jpeg_remove(struct platform_device *pdev)
 
 	video_unregister_device(jpeg->vfd_decoder);
 	video_unregister_device(jpeg->vfd_encoder);
-	vb2_dma_contig_cleanup_ctx(jpeg->alloc_ctx);
+	vb2_dma_contig_clear_max_seg_size(&pdev->dev);
 	v4l2_m2m_release(jpeg->m2m_dev);
 	v4l2_device_unregister(&jpeg->v4l2_dev);
 
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.h b/drivers/media/platform/s5p-jpeg/jpeg-core.h
index 9b1db0934909..4492a3535df5 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.h
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.h
@@ -110,7 +110,6 @@ enum  exynos4_jpeg_img_quality_level {
  * @irq:		JPEG IP irq
  * @clocks:		JPEG IP clock(s)
  * @dev:		JPEG IP struct device
- * @alloc_ctx:		videobuf2 memory allocator's context
  * @variant:		driver variant to be used
  * @irq_status		interrupt flags set during single encode/decode
 			operation
@@ -130,7 +129,6 @@ struct s5p_jpeg {
 	enum exynos4_jpeg_result irq_ret;
 	struct clk		*clocks[JPEG_MAX_CLOCKS];
 	struct device		*dev;
-	void			*alloc_ctx;
 	struct s5p_jpeg_variant *variant;
 	u32			irq_status;
 };
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c
index b16466fe35ee..e3f104fafd0a 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c
@@ -22,6 +22,7 @@
 #include <media/v4l2-event.h>
 #include <linux/workqueue.h>
 #include <linux/of.h>
+#include <linux/of_reserved_mem.h>
 #include <media/videobuf2-v4l2.h>
 #include "s5p_mfc_common.h"
 #include "s5p_mfc_ctrl.h"
@@ -29,11 +30,11 @@
 #include "s5p_mfc_dec.h"
 #include "s5p_mfc_enc.h"
 #include "s5p_mfc_intr.h"
+#include "s5p_mfc_iommu.h"
 #include "s5p_mfc_opr.h"
 #include "s5p_mfc_cmd.h"
 #include "s5p_mfc_pm.h"
 
-#define S5P_MFC_NAME		"s5p-mfc"
 #define S5P_MFC_DEC_NAME	"s5p-mfc-dec"
 #define S5P_MFC_ENC_NAME	"s5p-mfc-enc"
 
@@ -1043,55 +1044,94 @@ static const struct v4l2_file_operations s5p_mfc_fops = {
 	.mmap = s5p_mfc_mmap,
 };
 
-static int match_child(struct device *dev, void *data)
+/* DMA memory related helper functions */
+static void s5p_mfc_memdev_release(struct device *dev)
 {
-	if (!dev_name(dev))
-		return 0;
-	return !strcmp(dev_name(dev), (char *)data);
+	of_reserved_mem_device_release(dev);
 }
 
-static void *mfc_get_drv_data(struct platform_device *pdev);
-
-static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev)
+static struct device *s5p_mfc_alloc_memdev(struct device *dev,
+					   const char *name, unsigned int idx)
 {
-	unsigned int mem_info[2] = { };
+	struct device *child;
+	int ret;
 
-	dev->mem_dev_l = devm_kzalloc(&dev->plat_dev->dev,
-			sizeof(struct device), GFP_KERNEL);
-	if (!dev->mem_dev_l) {
-		mfc_err("Not enough memory\n");
-		return -ENOMEM;
-	}
-	device_initialize(dev->mem_dev_l);
-	of_property_read_u32_array(dev->plat_dev->dev.of_node,
-			"samsung,mfc-l", mem_info, 2);
-	if (dma_declare_coherent_memory(dev->mem_dev_l, mem_info[0],
-				mem_info[0], mem_info[1],
-				DMA_MEMORY_MAP | DMA_MEMORY_EXCLUSIVE) == 0) {
-		mfc_err("Failed to declare coherent memory for\n"
-		"MFC device\n");
-		return -ENOMEM;
+	child = devm_kzalloc(dev, sizeof(struct device), GFP_KERNEL);
+	if (!child)
+		return NULL;
+
+	device_initialize(child);
+	dev_set_name(child, "%s:%s", dev_name(dev), name);
+	child->parent = dev;
+	child->bus = dev->bus;
+	child->coherent_dma_mask = dev->coherent_dma_mask;
+	child->dma_mask = dev->dma_mask;
+	child->release = s5p_mfc_memdev_release;
+
+	if (device_add(child) == 0) {
+		ret = of_reserved_mem_device_init_by_idx(child, dev->of_node,
+							 idx);
+		if (ret == 0)
+			return child;
 	}
 
-	dev->mem_dev_r = devm_kzalloc(&dev->plat_dev->dev,
-			sizeof(struct device), GFP_KERNEL);
-	if (!dev->mem_dev_r) {
-		mfc_err("Not enough memory\n");
-		return -ENOMEM;
+	put_device(child);
+	return NULL;
+}
+
+static int s5p_mfc_configure_dma_memory(struct s5p_mfc_dev *mfc_dev)
+{
+	struct device *dev = &mfc_dev->plat_dev->dev;
+
+	/*
+	 * When IOMMU is available, we cannot use the default configuration,
+	 * because of MFC firmware requirements: address space limited to
+	 * 256M and non-zero default start address.
+	 * This is still simplified, not optimal configuration, but for now
+	 * IOMMU core doesn't allow to configure device's IOMMUs channel
+	 * separately.
+	 */
+	if (exynos_is_iommu_available(dev)) {
+		int ret = exynos_configure_iommu(dev, S5P_MFC_IOMMU_DMA_BASE,
+						 S5P_MFC_IOMMU_DMA_SIZE);
+		if (ret == 0)
+			mfc_dev->mem_dev_l = mfc_dev->mem_dev_r = dev;
+		return ret;
 	}
-	device_initialize(dev->mem_dev_r);
-	of_property_read_u32_array(dev->plat_dev->dev.of_node,
-			"samsung,mfc-r", mem_info, 2);
-	if (dma_declare_coherent_memory(dev->mem_dev_r, mem_info[0],
-				mem_info[0], mem_info[1],
-				DMA_MEMORY_MAP | DMA_MEMORY_EXCLUSIVE) == 0) {
-		pr_err("Failed to declare coherent memory for\n"
-		"MFC device\n");
-		return -ENOMEM;
+
+	/*
+	 * Create and initialize virtual devices for accessing
+	 * reserved memory regions.
+	 */
+	mfc_dev->mem_dev_l = s5p_mfc_alloc_memdev(dev, "left",
+						  MFC_BANK1_ALLOC_CTX);
+	if (!mfc_dev->mem_dev_l)
+		return -ENODEV;
+	mfc_dev->mem_dev_r = s5p_mfc_alloc_memdev(dev, "right",
+						  MFC_BANK2_ALLOC_CTX);
+	if (!mfc_dev->mem_dev_r) {
+		device_unregister(mfc_dev->mem_dev_l);
+		return -ENODEV;
 	}
+
 	return 0;
 }
 
+static void s5p_mfc_unconfigure_dma_memory(struct s5p_mfc_dev *mfc_dev)
+{
+	struct device *dev = &mfc_dev->plat_dev->dev;
+
+	if (exynos_is_iommu_available(dev)) {
+		exynos_unconfigure_iommu(dev);
+		return;
+	}
+
+	device_unregister(mfc_dev->mem_dev_l);
+	device_unregister(mfc_dev->mem_dev_r);
+}
+
+static void *mfc_get_drv_data(struct platform_device *pdev);
+
 /* MFC probe function */
 static int s5p_mfc_probe(struct platform_device *pdev)
 {
@@ -1117,14 +1157,11 @@ static int s5p_mfc_probe(struct platform_device *pdev)
 
 	dev->variant = mfc_get_drv_data(pdev);
 
-	ret = s5p_mfc_init_pm(dev);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to get mfc clock source\n");
-		return ret;
-	}
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
+	if (res == NULL) {
+		dev_err(&pdev->dev, "failed to get io resource\n");
+		return -ENOENT;
+	}
 	dev->regs_base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(dev->regs_base))
 		return PTR_ERR(dev->regs_base);
@@ -1132,54 +1169,36 @@ static int s5p_mfc_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (res == NULL) {
 		dev_err(&pdev->dev, "failed to get irq resource\n");
-		ret = -ENOENT;
-		goto err_res;
+		return -ENOENT;
 	}
 	dev->irq = res->start;
 	ret = devm_request_irq(&pdev->dev, dev->irq, s5p_mfc_irq,
 					0, pdev->name, dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to install irq (%d)\n", ret);
-		goto err_res;
+		return ret;
 	}
 
-	if (pdev->dev.of_node) {
-		ret = s5p_mfc_alloc_memdevs(dev);
-		if (ret < 0)
-			goto err_res;
-	} else {
-		dev->mem_dev_l = device_find_child(&dev->plat_dev->dev,
-				"s5p-mfc-l", match_child);
-		if (!dev->mem_dev_l) {
-			mfc_err("Mem child (L) device get failed\n");
-			ret = -ENODEV;
-			goto err_res;
-		}
-		dev->mem_dev_r = device_find_child(&dev->plat_dev->dev,
-				"s5p-mfc-r", match_child);
-		if (!dev->mem_dev_r) {
-			mfc_err("Mem child (R) device get failed\n");
-			ret = -ENODEV;
-			goto err_res;
-		}
+	ret = s5p_mfc_configure_dma_memory(dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to configure DMA memory\n");
+		return ret;
 	}
 
-	dev->alloc_ctx[0] = vb2_dma_contig_init_ctx(dev->mem_dev_l);
-	if (IS_ERR(dev->alloc_ctx[0])) {
-		ret = PTR_ERR(dev->alloc_ctx[0]);
-		goto err_res;
-	}
-	dev->alloc_ctx[1] = vb2_dma_contig_init_ctx(dev->mem_dev_r);
-	if (IS_ERR(dev->alloc_ctx[1])) {
-		ret = PTR_ERR(dev->alloc_ctx[1]);
-		goto err_mem_init_ctx_1;
+	ret = s5p_mfc_init_pm(dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to get mfc clock source\n");
+		goto err_dma;
 	}
 
+	vb2_dma_contig_set_max_seg_size(dev->mem_dev_l, DMA_BIT_MASK(32));
+	vb2_dma_contig_set_max_seg_size(dev->mem_dev_r, DMA_BIT_MASK(32));
+
 	mutex_init(&dev->mfc_mutex);
 
 	ret = s5p_mfc_alloc_firmware(dev);
 	if (ret)
-		goto err_alloc_fw;
+		goto err_res;
 
 	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
 	if (ret)
@@ -1201,14 +1220,6 @@ static int s5p_mfc_probe(struct platform_device *pdev)
 	vfd->vfl_dir	= VFL_DIR_M2M;
 	snprintf(vfd->name, sizeof(vfd->name), "%s", S5P_MFC_DEC_NAME);
 	dev->vfd_dec	= vfd;
-	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
-	if (ret) {
-		v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
-		video_device_release(vfd);
-		goto err_dec_reg;
-	}
-	v4l2_info(&dev->v4l2_dev,
-		  "decoder registered as /dev/video%d\n", vfd->num);
 	video_set_drvdata(vfd, dev);
 
 	/* encoder */
@@ -1226,14 +1237,6 @@ static int s5p_mfc_probe(struct platform_device *pdev)
 	vfd->vfl_dir	= VFL_DIR_M2M;
 	snprintf(vfd->name, sizeof(vfd->name), "%s", S5P_MFC_ENC_NAME);
 	dev->vfd_enc	= vfd;
-	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
-	if (ret) {
-		v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
-		video_device_release(vfd);
-		goto err_enc_reg;
-	}
-	v4l2_info(&dev->v4l2_dev,
-		  "encoder registered as /dev/video%d\n", vfd->num);
 	video_set_drvdata(vfd, dev);
 	platform_set_drvdata(pdev, dev);
 
@@ -1250,26 +1253,41 @@ static int s5p_mfc_probe(struct platform_device *pdev)
 	s5p_mfc_init_hw_cmds(dev);
 	s5p_mfc_init_regs(dev);
 
+	/* Register decoder and encoder */
+	ret = video_register_device(dev->vfd_dec, VFL_TYPE_GRABBER, 0);
+	if (ret) {
+		v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
+		goto err_dec_reg;
+	}
+	v4l2_info(&dev->v4l2_dev,
+		  "decoder registered as /dev/video%d\n", dev->vfd_dec->num);
+
+	ret = video_register_device(dev->vfd_enc, VFL_TYPE_GRABBER, 0);
+	if (ret) {
+		v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
+		goto err_enc_reg;
+	}
+	v4l2_info(&dev->v4l2_dev,
+		  "encoder registered as /dev/video%d\n", dev->vfd_enc->num);
+
 	pr_debug("%s--\n", __func__);
 	return 0;
 
 /* Deinit MFC if probe had failed */
 err_enc_reg:
-	video_device_release(dev->vfd_enc);
-err_enc_alloc:
 	video_unregister_device(dev->vfd_dec);
 err_dec_reg:
+	video_device_release(dev->vfd_enc);
+err_enc_alloc:
 	video_device_release(dev->vfd_dec);
 err_dec_alloc:
 	v4l2_device_unregister(&dev->v4l2_dev);
 err_v4l2_dev_reg:
 	s5p_mfc_release_firmware(dev);
-err_alloc_fw:
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx[1]);
-err_mem_init_ctx_1:
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx[0]);
 err_res:
 	s5p_mfc_final_pm(dev);
+err_dma:
+	s5p_mfc_unconfigure_dma_memory(dev);
 
 	pr_debug("%s-- with error\n", __func__);
 	return ret;
@@ -1289,14 +1307,13 @@ static int s5p_mfc_remove(struct platform_device *pdev)
 
 	video_unregister_device(dev->vfd_enc);
 	video_unregister_device(dev->vfd_dec);
+	video_device_release(dev->vfd_enc);
+	video_device_release(dev->vfd_dec);
 	v4l2_device_unregister(&dev->v4l2_dev);
 	s5p_mfc_release_firmware(dev);
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx[0]);
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx[1]);
-	if (pdev->dev.of_node) {
-		put_device(dev->mem_dev_l);
-		put_device(dev->mem_dev_r);
-	}
+	s5p_mfc_unconfigure_dma_memory(dev);
+	vb2_dma_contig_clear_max_seg_size(dev->mem_dev_l);
+	vb2_dma_contig_clear_max_seg_size(dev->mem_dev_r);
 
 	s5p_mfc_final_pm(dev);
 	return 0;
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
index 9eb2481ec292..373e346fce3e 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
@@ -25,6 +25,8 @@
 #include "regs-mfc.h"
 #include "regs-mfc-v8.h"
 
+#define S5P_MFC_NAME		"s5p-mfc"
+
 /* Definitions related to MFC memory */
 
 /* Offset base used to differentiate between CAPTURE and OUTPUT
@@ -285,7 +287,6 @@ struct s5p_mfc_priv_buf {
  * @watchdog_cnt:	counter for the watchdog
  * @watchdog_workqueue:	workqueue for the watchdog
  * @watchdog_work:	worker for the watchdog
- * @alloc_ctx:		videobuf2 allocator contexts for two memory banks
  * @enter_suspend:	flag set when entering suspend
  * @ctx_buf:		common context memory (MFCv6)
  * @warn_start:		hardware error code from which warnings start
@@ -328,7 +329,6 @@ struct s5p_mfc_dev {
 	struct timer_list watchdog_timer;
 	struct workqueue_struct *watchdog_workqueue;
 	struct work_struct watchdog_work;
-	void *alloc_ctx[2];
 	unsigned long enter_suspend;
 
 	struct s5p_mfc_priv_buf ctx_buf;
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
index f2d6376ce618..47c997d9e8cb 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
@@ -265,9 +265,10 @@ static int vidioc_querycap(struct file *file, void *priv,
 {
 	struct s5p_mfc_dev *dev = video_drvdata(file);
 
-	strncpy(cap->driver, dev->plat_dev->name, sizeof(cap->driver) - 1);
-	strncpy(cap->card, dev->plat_dev->name, sizeof(cap->card) - 1);
-	cap->bus_info[0] = 0;
+	strncpy(cap->driver, S5P_MFC_NAME, sizeof(cap->driver) - 1);
+	strncpy(cap->card, dev->vfd_dec->name, sizeof(cap->card) - 1);
+	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
+		 dev_name(&dev->plat_dev->dev));
 	/*
 	 * This is only a mem-to-mem video device. The capture and output
 	 * device capability flags are left only for backward compatibility
@@ -423,7 +424,7 @@ static int vidioc_s_fmt(struct file *file, void *priv, struct v4l2_format *f)
 	pix_mp = &f->fmt.pix_mp;
 	if (ret)
 		return ret;
-	if (ctx->vq_src.streaming || ctx->vq_dst.streaming) {
+	if (vb2_is_streaming(&ctx->vq_src) || vb2_is_streaming(&ctx->vq_dst)) {
 		v4l2_err(&dev->v4l2_dev, "%s queue busy\n", __func__);
 		ret = -EBUSY;
 		goto out;
@@ -474,7 +475,6 @@ static int reqbufs_output(struct s5p_mfc_dev *dev, struct s5p_mfc_ctx *ctx,
 		ret = vb2_reqbufs(&ctx->vq_src, reqbufs);
 		if (ret)
 			goto out;
-		s5p_mfc_close_mfc_inst(dev, ctx);
 		ctx->src_bufs_cnt = 0;
 		ctx->output_state = QUEUE_FREE;
 	} else if (ctx->output_state == QUEUE_FREE) {
@@ -565,7 +565,7 @@ out:
 	return ret;
 }
 
-/* Reqeust buffers */
+/* Request buffers */
 static int vidioc_reqbufs(struct file *file, void *priv,
 					  struct v4l2_requestbuffers *reqbufs)
 {
@@ -573,7 +573,7 @@ static int vidioc_reqbufs(struct file *file, void *priv,
 	struct s5p_mfc_ctx *ctx = fh_to_ctx(priv);
 
 	if (reqbufs->memory != V4L2_MEMORY_MMAP) {
-		mfc_err("Only V4L2_MEMORY_MAP is supported\n");
+		mfc_debug(2, "Only V4L2_MEMORY_MMAP is supported\n");
 		return -EINVAL;
 	}
 
@@ -821,7 +821,7 @@ static int vidioc_decoder_cmd(struct file *file, void *priv,
 		if (cmd->flags != 0)
 			return -EINVAL;
 
-		if (!ctx->vq_src.streaming)
+		if (!vb2_is_streaming(&ctx->vq_src))
 			return -EINVAL;
 
 		spin_lock_irqsave(&dev->irqlock, flags);
@@ -890,7 +890,7 @@ static const struct v4l2_ioctl_ops s5p_mfc_dec_ioctl_ops = {
 static int s5p_mfc_queue_setup(struct vb2_queue *vq,
 			unsigned int *buf_count,
 			unsigned int *plane_count, unsigned int psize[],
-			void *allocators[])
+			struct device *alloc_devs[])
 {
 	struct s5p_mfc_ctx *ctx = fh_to_ctx(vq->drv_priv);
 	struct s5p_mfc_dev *dev = ctx->dev;
@@ -931,16 +931,14 @@ static int s5p_mfc_queue_setup(struct vb2_queue *vq,
 		psize[1] = ctx->chroma_size;
 
 		if (IS_MFCV6_PLUS(dev))
-			allocators[0] =
-				ctx->dev->alloc_ctx[MFC_BANK1_ALLOC_CTX];
+			alloc_devs[0] = ctx->dev->mem_dev_l;
 		else
-			allocators[0] =
-				ctx->dev->alloc_ctx[MFC_BANK2_ALLOC_CTX];
-		allocators[1] = ctx->dev->alloc_ctx[MFC_BANK1_ALLOC_CTX];
+			alloc_devs[0] = ctx->dev->mem_dev_r;
+		alloc_devs[1] = ctx->dev->mem_dev_l;
 	} else if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE &&
 		   ctx->state == MFCINST_INIT) {
 		psize[0] = ctx->dec_src_buf_size;
-		allocators[0] = ctx->dev->alloc_ctx[MFC_BANK1_ALLOC_CTX];
+		alloc_devs[0] = ctx->dev->mem_dev_l;
 	} else {
 		mfc_err("This video node is dedicated to decoding. Decoding not initialized\n");
 		return -EINVAL;
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
index 034b5c1d35a1..fcc2e054c61f 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
@@ -943,9 +943,10 @@ static int vidioc_querycap(struct file *file, void *priv,
 {
 	struct s5p_mfc_dev *dev = video_drvdata(file);
 
-	strncpy(cap->driver, dev->plat_dev->name, sizeof(cap->driver) - 1);
-	strncpy(cap->card, dev->plat_dev->name, sizeof(cap->card) - 1);
-	cap->bus_info[0] = 0;
+	strncpy(cap->driver, S5P_MFC_NAME, sizeof(cap->driver) - 1);
+	strncpy(cap->card, dev->vfd_enc->name, sizeof(cap->card) - 1);
+	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
+		 dev_name(&dev->plat_dev->dev));
 	/*
 	 * This is only a mem-to-mem video device. The capture and output
 	 * device capability flags are left only for backward compatibility
@@ -1043,10 +1044,6 @@ static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
 			mfc_err("failed to try output format\n");
 			return -EINVAL;
 		}
-		if (pix_fmt_mp->plane_fmt[0].sizeimage == 0) {
-			mfc_err("must be set encoding output size\n");
-			return -EINVAL;
-		}
 		if ((dev->variant->version_bit & fmt->versions) == 0) {
 			mfc_err("Unsupported format by this MFC version.\n");
 			return -EINVAL;
@@ -1060,11 +1057,6 @@ static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
 			mfc_err("failed to try output format\n");
 			return -EINVAL;
 		}
-
-		if (fmt->num_planes != pix_fmt_mp->num_planes) {
-			mfc_err("failed to try output format\n");
-			return -EINVAL;
-		}
 		if ((dev->variant->version_bit & fmt->versions) == 0) {
 			mfc_err("Unsupported format by this MFC version.\n");
 			return -EINVAL;
@@ -1144,7 +1136,10 @@ static int vidioc_reqbufs(struct file *file, void *priv,
 		return -EINVAL;
 	if (reqbufs->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
 		if (reqbufs->count == 0) {
+			mfc_debug(2, "Freeing buffers\n");
 			ret = vb2_reqbufs(&ctx->vq_dst, reqbufs);
+			s5p_mfc_hw_call(dev->mfc_ops, release_codec_buffers,
+					ctx);
 			ctx->capture_state = QUEUE_FREE;
 			return ret;
 		}
@@ -1817,7 +1812,7 @@ static int check_vb_with_fmt(struct s5p_mfc_fmt *fmt, struct vb2_buffer *vb)
 
 static int s5p_mfc_queue_setup(struct vb2_queue *vq,
 			unsigned int *buf_count, unsigned int *plane_count,
-			unsigned int psize[], void *allocators[])
+			unsigned int psize[], struct device *alloc_devs[])
 {
 	struct s5p_mfc_ctx *ctx = fh_to_ctx(vq->drv_priv);
 	struct s5p_mfc_dev *dev = ctx->dev;
@@ -1837,7 +1832,7 @@ static int s5p_mfc_queue_setup(struct vb2_queue *vq,
 		if (*buf_count > MFC_MAX_BUFFERS)
 			*buf_count = MFC_MAX_BUFFERS;
 		psize[0] = ctx->enc_dst_buf_size;
-		allocators[0] = ctx->dev->alloc_ctx[MFC_BANK1_ALLOC_CTX];
+		alloc_devs[0] = ctx->dev->mem_dev_l;
 	} else if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
 		if (ctx->src_fmt)
 			*plane_count = ctx->src_fmt->num_planes;
@@ -1853,15 +1848,11 @@ static int s5p_mfc_queue_setup(struct vb2_queue *vq,
 		psize[1] = ctx->chroma_size;
 
 		if (IS_MFCV6_PLUS(dev)) {
-			allocators[0] =
-				ctx->dev->alloc_ctx[MFC_BANK1_ALLOC_CTX];
-			allocators[1] =
-				ctx->dev->alloc_ctx[MFC_BANK1_ALLOC_CTX];
+			alloc_devs[0] = ctx->dev->mem_dev_l;
+			alloc_devs[1] = ctx->dev->mem_dev_l;
 		} else {
-			allocators[0] =
-				ctx->dev->alloc_ctx[MFC_BANK2_ALLOC_CTX];
-			allocators[1] =
-				ctx->dev->alloc_ctx[MFC_BANK2_ALLOC_CTX];
+			alloc_devs[0] = ctx->dev->mem_dev_r;
+			alloc_devs[1] = ctx->dev->mem_dev_r;
 		}
 	} else {
 		mfc_err("invalid queue type: %d\n", vq->type);
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_iommu.h b/drivers/media/platform/s5p-mfc/s5p_mfc_iommu.h
new file mode 100644
index 000000000000..6962132ae8fa
--- /dev/null
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_iommu.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2015 Samsung Electronics Co.Ltd
+ * Authors: Marek Szyprowski <m.szyprowski@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef S5P_MFC_IOMMU_H_
+#define S5P_MFC_IOMMU_H_
+
+#define S5P_MFC_IOMMU_DMA_BASE	0x20000000lu
+#define S5P_MFC_IOMMU_DMA_SIZE	SZ_256M
+
+#if defined(CONFIG_EXYNOS_IOMMU) && defined(CONFIG_ARM_DMA_USE_IOMMU)
+
+#include <asm/dma-iommu.h>
+
+static inline bool exynos_is_iommu_available(struct device *dev)
+{
+	return dev->archdata.iommu != NULL;
+}
+
+static inline void exynos_unconfigure_iommu(struct device *dev)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+	arm_iommu_detach_device(dev);
+	arm_iommu_release_mapping(mapping);
+}
+
+static inline int exynos_configure_iommu(struct device *dev,
+					 unsigned int base, unsigned int size)
+{
+	struct dma_iommu_mapping *mapping = NULL;
+	int ret;
+
+	/* Disable the default mapping created by device core */
+	if (to_dma_iommu_mapping(dev))
+		exynos_unconfigure_iommu(dev);
+
+	mapping = arm_iommu_create_mapping(dev->bus, base, size);
+	if (IS_ERR(mapping)) {
+		pr_warn("Failed to create IOMMU mapping for device %s\n",
+			dev_name(dev));
+		return PTR_ERR(mapping);
+	}
+
+	ret = arm_iommu_attach_device(dev, mapping);
+	if (ret) {
+		pr_warn("Failed to attached device %s to IOMMU_mapping\n",
+				dev_name(dev));
+		arm_iommu_release_mapping(mapping);
+		return ret;
+	}
+
+	return 0;
+}
+
+#else
+
+static inline bool exynos_is_iommu_available(struct device *dev)
+{
+	return false;
+}
+
+static inline int exynos_configure_iommu(struct device *dev,
+					 unsigned int base, unsigned int size)
+{
+	return -ENOSYS;
+}
+
+static inline void exynos_unconfigure_iommu(struct device *dev) { }
+
+#endif
+
+#endif /* S5P_MFC_IOMMU_H_ */
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_pm.c b/drivers/media/platform/s5p-mfc/s5p_mfc_pm.c
index 5f97a3398c11..930dc2dddae6 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_pm.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_pm.c
@@ -54,6 +54,7 @@ int s5p_mfc_init_pm(struct s5p_mfc_dev *dev)
 		pm->clock = clk_get(&dev->plat_dev->dev, MFC_SCLK_NAME);
 		if (IS_ERR(pm->clock)) {
 			mfc_info("Failed to get MFC special clock control\n");
+			pm->clock = NULL;
 		} else {
 			clk_set_rate(pm->clock, MFC_SCLK_RATE);
 			ret = clk_prepare_enable(pm->clock);
@@ -76,8 +77,10 @@ int s5p_mfc_init_pm(struct s5p_mfc_dev *dev)
 
 err_s_clk:
 	clk_put(pm->clock);
+	pm->clock = NULL;
 err_p_ip_clk:
 	clk_put(pm->clock_gate);
+	pm->clock_gate = NULL;
 err_g_ip_clk:
 	return ret;
 }
@@ -88,9 +91,11 @@ void s5p_mfc_final_pm(struct s5p_mfc_dev *dev)
 	    pm->clock) {
 		clk_disable_unprepare(pm->clock);
 		clk_put(pm->clock);
+		pm->clock = NULL;
 	}
 	clk_unprepare(pm->clock_gate);
 	clk_put(pm->clock_gate);
+	pm->clock_gate = NULL;
 #ifdef CONFIG_PM
 	pm_runtime_disable(pm->device);
 #endif
@@ -98,12 +103,13 @@ void s5p_mfc_final_pm(struct s5p_mfc_dev *dev)
 
 int s5p_mfc_clock_on(void)
 {
-	int ret;
+	int ret = 0;
 #ifdef CLK_DEBUG
 	atomic_inc(&clk_ref);
 	mfc_debug(3, "+ %d\n", atomic_read(&clk_ref));
 #endif
-	ret = clk_enable(pm->clock_gate);
+	if (!IS_ERR_OR_NULL(pm->clock_gate))
+		ret = clk_enable(pm->clock_gate);
 	return ret;
 }
 
@@ -113,7 +119,8 @@ void s5p_mfc_clock_off(void)
 	atomic_dec(&clk_ref);
 	mfc_debug(3, "- %d\n", atomic_read(&clk_ref));
 #endif
-	clk_disable(pm->clock_gate);
+	if (!IS_ERR_OR_NULL(pm->clock_gate))
+		clk_disable(pm->clock_gate);
 }
 
 int s5p_mfc_power_on(void)
diff --git a/drivers/media/platform/s5p-tv/mixer.h b/drivers/media/platform/s5p-tv/mixer.h
index 4dd62a918fcf..869f0ce86f6e 100644
--- a/drivers/media/platform/s5p-tv/mixer.h
+++ b/drivers/media/platform/s5p-tv/mixer.h
@@ -245,8 +245,6 @@ struct mxr_device {
 
 	/** V4L2 device */
 	struct v4l2_device v4l2_dev;
-	/** context of allocator */
-	void *alloc_ctx;
 	/** event wait queue */
 	wait_queue_head_t event_queue;
 	/** state flags */
diff --git a/drivers/media/platform/s5p-tv/mixer_video.c b/drivers/media/platform/s5p-tv/mixer_video.c
index 7ab5578a0405..ee74e2b44d69 100644
--- a/drivers/media/platform/s5p-tv/mixer_video.c
+++ b/drivers/media/platform/s5p-tv/mixer_video.c
@@ -80,12 +80,7 @@ int mxr_acquire_video(struct mxr_device *mdev,
 		goto fail;
 	}
 
-	mdev->alloc_ctx = vb2_dma_contig_init_ctx(mdev->dev);
-	if (IS_ERR(mdev->alloc_ctx)) {
-		mxr_err(mdev, "could not acquire vb2 allocator\n");
-		ret = PTR_ERR(mdev->alloc_ctx);
-		goto fail_v4l2_dev;
-	}
+	vb2_dma_contig_set_max_seg_size(mdev->dev, DMA_BIT_MASK(32));
 
 	/* registering outputs */
 	mdev->output_cnt = 0;
@@ -120,7 +115,7 @@ int mxr_acquire_video(struct mxr_device *mdev,
 		mxr_err(mdev, "failed to register any output\n");
 		ret = -ENODEV;
 		/* skipping fail_output because there is nothing to free */
-		goto fail_vb2_allocator;
+		goto fail_v4l2_dev;
 	}
 
 	return 0;
@@ -131,10 +126,6 @@ fail_output:
 		kfree(mdev->output[i]);
 	memset(mdev->output, 0, sizeof(mdev->output));
 
-fail_vb2_allocator:
-	/* freeing allocator context */
-	vb2_dma_contig_cleanup_ctx(mdev->alloc_ctx);
-
 fail_v4l2_dev:
 	/* NOTE: automatically unregister all subdevs */
 	v4l2_device_unregister(v4l2_dev);
@@ -151,7 +142,7 @@ void mxr_release_video(struct mxr_device *mdev)
 	for (i = 0; i < mdev->output_cnt; ++i)
 		kfree(mdev->output[i]);
 
-	vb2_dma_contig_cleanup_ctx(mdev->alloc_ctx);
+	vb2_dma_contig_clear_max_seg_size(mdev->dev);
 	v4l2_device_unregister(&mdev->v4l2_dev);
 }
 
@@ -883,7 +874,7 @@ static const struct v4l2_file_operations mxr_fops = {
 
 static int queue_setup(struct vb2_queue *vq,
 	unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[],
-	void *alloc_ctxs[])
+	struct device *alloc_devs[])
 {
 	struct mxr_layer *layer = vb2_get_drv_priv(vq);
 	const struct mxr_format *fmt = layer->fmt;
@@ -901,7 +892,6 @@ static int queue_setup(struct vb2_queue *vq,
 
 	*nplanes = fmt->num_subframes;
 	for (i = 0; i < fmt->num_subframes; ++i) {
-		alloc_ctxs[i] = layer->mdev->alloc_ctx;
 		sizes[i] = planes[i].sizeimage;
 		mxr_dbg(mdev, "size[%d] = %08x\n", i, sizes[i]);
 	}
@@ -1110,6 +1100,7 @@ struct mxr_layer *mxr_base_layer_create(struct mxr_device *mdev,
 		.min_buffers_needed = 1,
 		.mem_ops = &vb2_dma_contig_memops,
 		.lock = &layer->mutex,
+		.dev = mdev->dev,
 	};
 
 	return layer;
diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c
index 82b5d69b87fa..15a562af13c7 100644
--- a/drivers/media/platform/sh_veu.c
+++ b/drivers/media/platform/sh_veu.c
@@ -118,7 +118,6 @@ struct sh_veu_dev {
 	struct sh_veu_file *output;
 	struct mutex fop_lock;
 	void __iomem *base;
-	struct vb2_alloc_ctx *alloc_ctx;
 	spinlock_t lock;
 	bool is_2h;
 	unsigned int xaction;
@@ -866,7 +865,7 @@ static const struct v4l2_ioctl_ops sh_veu_ioctl_ops = {
 
 static int sh_veu_queue_setup(struct vb2_queue *vq,
 			      unsigned int *nbuffers, unsigned int *nplanes,
-			      unsigned int sizes[], void *alloc_ctxs[])
+			      unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct sh_veu_dev *veu = vb2_get_drv_priv(vq);
 	struct sh_veu_vfmt *vfmt = sh_veu_get_vfmt(veu, vq->type);
@@ -882,14 +881,11 @@ static int sh_veu_queue_setup(struct vb2_queue *vq,
 		*nbuffers = count;
 	}
 
-	if (*nplanes) {
-		alloc_ctxs[0] = veu->alloc_ctx;
+	if (*nplanes)
 		return sizes[0] < size ? -EINVAL : 0;
-	}
 
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = veu->alloc_ctx;
 
 	dev_dbg(veu->dev, "get %d buffer(s) of size %d each.\n", count, size);
 
@@ -948,6 +944,7 @@ static int sh_veu_queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->mem_ops = &vb2_dma_contig_memops;
 	src_vq->lock = &veu->fop_lock;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	src_vq->dev = veu->v4l2_dev.dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret < 0)
@@ -962,6 +959,7 @@ static int sh_veu_queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->mem_ops = &vb2_dma_contig_memops;
 	dst_vq->lock = &veu->fop_lock;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	dst_vq->dev = veu->v4l2_dev.dev;
 
 	return vb2_queue_init(dst_vq);
 }
@@ -1148,12 +1146,6 @@ static int sh_veu_probe(struct platform_device *pdev)
 
 	vdev = &veu->vdev;
 
-	veu->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(veu->alloc_ctx)) {
-		ret = PTR_ERR(veu->alloc_ctx);
-		goto einitctx;
-	}
-
 	*vdev = sh_veu_videodev;
 	vdev->v4l2_dev = &veu->v4l2_dev;
 	spin_lock_init(&veu->lock);
@@ -1187,8 +1179,6 @@ evidreg:
 	pm_runtime_disable(&pdev->dev);
 	v4l2_m2m_release(veu->m2m_dev);
 em2minit:
-	vb2_dma_contig_cleanup_ctx(veu->alloc_ctx);
-einitctx:
 	v4l2_device_unregister(&veu->v4l2_dev);
 	return ret;
 }
@@ -1202,7 +1192,6 @@ static int sh_veu_remove(struct platform_device *pdev)
 	video_unregister_device(&veu->vdev);
 	pm_runtime_disable(&pdev->dev);
 	v4l2_m2m_release(veu->m2m_dev);
-	vb2_dma_contig_cleanup_ctx(veu->alloc_ctx);
 	v4l2_device_unregister(&veu->v4l2_dev);
 
 	return 0;
diff --git a/drivers/media/platform/sh_vou.c b/drivers/media/platform/sh_vou.c
index 115740498274..e1f39b4cf1cd 100644
--- a/drivers/media/platform/sh_vou.c
+++ b/drivers/media/platform/sh_vou.c
@@ -86,7 +86,6 @@ struct sh_vou_device {
 	v4l2_std_id std;
 	int pix_idx;
 	struct vb2_queue queue;
-	struct vb2_alloc_ctx *alloc_ctx;
 	struct sh_vou_buffer *active;
 	enum sh_vou_status status;
 	unsigned sequence;
@@ -245,7 +244,7 @@ static void sh_vou_stream_config(struct sh_vou_device *vou_dev)
 /* Locking: caller holds fop_lock mutex */
 static int sh_vou_queue_setup(struct vb2_queue *vq,
 		       unsigned int *nbuffers, unsigned int *nplanes,
-		       unsigned int sizes[], void *alloc_ctxs[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct sh_vou_device *vou_dev = vb2_get_drv_priv(vq);
 	struct v4l2_pix_format *pix = &vou_dev->pix;
@@ -253,7 +252,6 @@ static int sh_vou_queue_setup(struct vb2_queue *vq,
 
 	dev_dbg(vou_dev->v4l2_dev.dev, "%s()\n", __func__);
 
-	alloc_ctxs[0] = vou_dev->alloc_ctx;
 	if (*nplanes)
 		return sizes[0] < pix->height * bytes_per_line ? -EINVAL : 0;
 	*nplanes = 1;
@@ -1304,16 +1302,11 @@ static int sh_vou_probe(struct platform_device *pdev)
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->min_buffers_needed = 2;
 	q->lock = &vou_dev->fop_lock;
+	q->dev = &pdev->dev;
 	ret = vb2_queue_init(q);
 	if (ret)
-		goto einitctx;
+		goto ei2cgadap;
 
-	vou_dev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(vou_dev->alloc_ctx)) {
-		dev_err(&pdev->dev, "Can't allocate buffer context");
-		ret = PTR_ERR(vou_dev->alloc_ctx);
-		goto einitctx;
-	}
 	vdev->queue = q;
 	INIT_LIST_HEAD(&vou_dev->buf_list);
 
@@ -1348,8 +1341,6 @@ ei2cnd:
 ereset:
 	i2c_put_adapter(i2c_adap);
 ei2cgadap:
-	vb2_dma_contig_cleanup_ctx(vou_dev->alloc_ctx);
-einitctx:
 	pm_runtime_disable(&pdev->dev);
 	v4l2_device_unregister(&vou_dev->v4l2_dev);
 	return ret;
@@ -1367,7 +1358,6 @@ static int sh_vou_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 	video_unregister_device(&vou_dev->vdev);
 	i2c_put_adapter(client->adapter);
-	vb2_dma_contig_cleanup_ctx(vou_dev->alloc_ctx);
 	v4l2_device_unregister(&vou_dev->v4l2_dev);
 	return 0;
 }
diff --git a/drivers/media/platform/soc_camera/Kconfig b/drivers/media/platform/soc_camera/Kconfig
index 83029a4854ae..39f66414f621 100644
--- a/drivers/media/platform/soc_camera/Kconfig
+++ b/drivers/media/platform/soc_camera/Kconfig
@@ -25,8 +25,8 @@ config VIDEO_PXA27x
 	---help---
 	  This is a v4l2 driver for the PXA27x Quick Capture Interface
 
-config VIDEO_RCAR_VIN
-	tristate "R-Car Video Input (VIN) support"
+config VIDEO_RCAR_VIN_OLD
+	tristate "R-Car Video Input (VIN) support (DEPRECATED)"
 	depends on VIDEO_DEV && SOC_CAMERA
 	depends on ARCH_RENESAS || COMPILE_TEST
 	depends on HAS_DMA
diff --git a/drivers/media/platform/soc_camera/Makefile b/drivers/media/platform/soc_camera/Makefile
index 7ee71ae231c7..7703cb7ce456 100644
--- a/drivers/media/platform/soc_camera/Makefile
+++ b/drivers/media/platform/soc_camera/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_VIDEO_ATMEL_ISI)		+= atmel-isi.o
 obj-$(CONFIG_VIDEO_PXA27x)		+= pxa_camera.o
 obj-$(CONFIG_VIDEO_SH_MOBILE_CEU)	+= sh_mobile_ceu_camera.o
 obj-$(CONFIG_VIDEO_SH_MOBILE_CSI2)	+= sh_mobile_csi2.o
-obj-$(CONFIG_VIDEO_RCAR_VIN)		+= rcar_vin.o
+obj-$(CONFIG_VIDEO_RCAR_VIN_OLD)	+= rcar_vin.o
diff --git a/drivers/media/platform/soc_camera/atmel-isi.c b/drivers/media/platform/soc_camera/atmel-isi.c
index ab2d9b9b1f5d..30211f6b4483 100644
--- a/drivers/media/platform/soc_camera/atmel-isi.c
+++ b/drivers/media/platform/soc_camera/atmel-isi.c
@@ -72,8 +72,6 @@ struct atmel_isi {
 
 	int				sequence;
 
-	struct vb2_alloc_ctx		*alloc_ctx;
-
 	/* Allocate descriptors for dma buffer use */
 	struct fbd			*p_fb_descriptors;
 	dma_addr_t			fb_descriptors_phys;
@@ -305,7 +303,7 @@ static int atmel_isi_wait_status(struct atmel_isi *isi, int wait_reset)
    ------------------------------------------------------------------*/
 static int queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct soc_camera_device *icd = soc_camera_from_vb2q(vq);
 	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
@@ -322,7 +320,6 @@ static int queue_setup(struct vb2_queue *vq,
 
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = isi->alloc_ctx;
 
 	isi->sequence = 0;
 	isi->active = NULL;
@@ -567,6 +564,7 @@ static int isi_camera_init_videobuf(struct vb2_queue *q,
 	q->mem_ops = &vb2_dma_contig_memops;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &ici->host_lock;
+	q->dev = ici->v4l2_dev.dev;
 
 	return vb2_queue_init(q);
 }
@@ -963,7 +961,6 @@ static int atmel_isi_remove(struct platform_device *pdev)
 					struct atmel_isi, soc_host);
 
 	soc_camera_host_unregister(soc_host);
-	vb2_dma_contig_cleanup_ctx(isi->alloc_ctx);
 	dma_free_coherent(&pdev->dev,
 			sizeof(struct fbd) * MAX_BUFFER_NUM,
 			isi->p_fb_descriptors,
@@ -1067,12 +1064,6 @@ static int atmel_isi_probe(struct platform_device *pdev)
 		list_add(&isi->dma_desc[i].list, &isi->dma_desc_head);
 	}
 
-	isi->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(isi->alloc_ctx)) {
-		ret = PTR_ERR(isi->alloc_ctx);
-		goto err_alloc_ctx;
-	}
-
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	isi->regs = devm_ioremap_resource(&pdev->dev, regs);
 	if (IS_ERR(isi->regs)) {
@@ -1119,8 +1110,6 @@ err_register_soc_camera_host:
 	pm_runtime_disable(&pdev->dev);
 err_req_irq:
 err_ioremap:
-	vb2_dma_contig_cleanup_ctx(isi->alloc_ctx);
-err_alloc_ctx:
 	dma_free_coherent(&pdev->dev,
 			sizeof(struct fbd) * MAX_BUFFER_NUM,
 			isi->p_fb_descriptors,
diff --git a/drivers/media/platform/soc_camera/rcar_vin.c b/drivers/media/platform/soc_camera/rcar_vin.c
index 3f9c1b8456c3..9c137522c660 100644
--- a/drivers/media/platform/soc_camera/rcar_vin.c
+++ b/drivers/media/platform/soc_camera/rcar_vin.c
@@ -484,7 +484,6 @@ struct rcar_vin_priv {
 	struct list_head		capture;
 #define MAX_BUFFER_NUM			3
 	struct vb2_v4l2_buffer		*queue_buf[MAX_BUFFER_NUM];
-	struct vb2_alloc_ctx		*alloc_ctx;
 	enum v4l2_field			field;
 	unsigned int			pdata_flags;
 	unsigned int			vb_count;
@@ -534,14 +533,12 @@ struct rcar_vin_cam {
 static int rcar_vin_videobuf_setup(struct vb2_queue *vq,
 				   unsigned int *count,
 				   unsigned int *num_planes,
-				   unsigned int sizes[], void *alloc_ctxs[])
+				   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct soc_camera_device *icd = soc_camera_from_vb2q(vq);
 	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
 	struct rcar_vin_priv *priv = ici->priv;
 
-	alloc_ctxs[0] = priv->alloc_ctx;
-
 	if (!vq->num_buffers)
 		priv->sequence = 0;
 
@@ -1816,6 +1813,7 @@ static int rcar_vin_init_videobuf2(struct vb2_queue *vq,
 	vq->buf_struct_size = sizeof(struct rcar_vin_buffer);
 	vq->timestamp_flags  = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	vq->lock = &ici->host_lock;
+	vq->dev = ici->v4l2_dev.dev;
 
 	return vb2_queue_init(vq);
 }
@@ -1912,10 +1910,6 @@ static int rcar_vin_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	priv->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(priv->alloc_ctx))
-		return PTR_ERR(priv->alloc_ctx);
-
 	priv->ici.priv = priv;
 	priv->ici.v4l2_dev.dev = &pdev->dev;
 	priv->ici.drv_name = dev_name(&pdev->dev);
@@ -1946,7 +1940,6 @@ static int rcar_vin_probe(struct platform_device *pdev)
 
 cleanup:
 	pm_runtime_disable(&pdev->dev);
-	vb2_dma_contig_cleanup_ctx(priv->alloc_ctx);
 
 	return ret;
 }
@@ -1954,12 +1947,9 @@ cleanup:
 static int rcar_vin_remove(struct platform_device *pdev)
 {
 	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
-	struct rcar_vin_priv *priv = container_of(soc_host,
-						  struct rcar_vin_priv, ici);
 
 	soc_camera_host_unregister(soc_host);
 	pm_runtime_disable(&pdev->dev);
-	vb2_dma_contig_cleanup_ctx(priv->alloc_ctx);
 
 	return 0;
 }
diff --git a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
index b9f369c0fb94..02b519dde42a 100644
--- a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
+++ b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
@@ -113,7 +113,6 @@ struct sh_mobile_ceu_dev {
 	spinlock_t lock;		/* Protects video buffer lists */
 	struct list_head capture;
 	struct vb2_v4l2_buffer *active;
-	struct vb2_alloc_ctx *alloc_ctx;
 
 	struct sh_mobile_ceu_info *pdata;
 	struct completion complete;
@@ -211,14 +210,12 @@ static int sh_mobile_ceu_soft_reset(struct sh_mobile_ceu_dev *pcdev)
  */
 static int sh_mobile_ceu_videobuf_setup(struct vb2_queue *vq,
 			unsigned int *count, unsigned int *num_planes,
-			unsigned int sizes[], void *alloc_ctxs[])
+			unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct soc_camera_device *icd = soc_camera_from_vb2q(vq);
 	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 
-	alloc_ctxs[0] = pcdev->alloc_ctx;
-
 	if (!vq->num_buffers)
 		pcdev->sequence = 0;
 
@@ -1670,6 +1667,7 @@ static int sh_mobile_ceu_init_videobuf(struct vb2_queue *q,
 	q->buf_struct_size = sizeof(struct sh_mobile_ceu_buffer);
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &ici->host_lock;
+	q->dev = ici->v4l2_dev.dev;
 
 	return vb2_queue_init(q);
 }
@@ -1822,12 +1820,6 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 	pcdev->ici.ops = &sh_mobile_ceu_host_ops;
 	pcdev->ici.capabilities = SOCAM_HOST_CAP_STRIDE;
 
-	pcdev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(pcdev->alloc_ctx)) {
-		err = PTR_ERR(pcdev->alloc_ctx);
-		goto exit_free_clk;
-	}
-
 	if (pcdev->pdata && pcdev->pdata->asd_sizes) {
 		struct v4l2_async_subdev **asd;
 		char name[] = "sh-mobile-csi2";
@@ -1872,7 +1864,7 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 
 		if (!csi2_pdev) {
 			err = -ENOMEM;
-			goto exit_free_ctx;
+			goto exit_free_clk;
 		}
 
 		pcdev->csi2_pdev		= csi2_pdev;
@@ -1955,8 +1947,6 @@ exit_pdev_put:
 		pcdev->csi2_pdev->resource = NULL;
 		platform_device_put(pcdev->csi2_pdev);
 	}
-exit_free_ctx:
-	vb2_dma_contig_cleanup_ctx(pcdev->alloc_ctx);
 exit_free_clk:
 	pm_runtime_disable(&pdev->dev);
 exit_release_mem:
@@ -1976,7 +1966,6 @@ static int sh_mobile_ceu_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 	if (platform_get_resource(pdev, IORESOURCE_MEM, 1))
 		dma_release_declared_memory(&pdev->dev);
-	vb2_dma_contig_cleanup_ctx(pcdev->alloc_ctx);
 	if (csi2_pdev && csi2_pdev->dev.driver) {
 		struct module *csi2_drv = csi2_pdev->dev.driver->owner;
 		platform_device_del(csi2_pdev);
diff --git a/drivers/media/platform/sti/bdisp/bdisp-filter.h b/drivers/media/platform/sti/bdisp/bdisp-filter.h
index fc8c54f725ad..53e52fb4127f 100644
--- a/drivers/media/platform/sti/bdisp/bdisp-filter.h
+++ b/drivers/media/platform/sti/bdisp/bdisp-filter.h
@@ -19,178 +19,6 @@ struct bdisp_filter_h_spec {
 	const u16 max;
 	const u8 coef[BDISP_HF_NB];
 };
-
-static const struct bdisp_filter_h_spec bdisp_h_spec[] = {
-	{
-		.min = 0,
-		.max = 921,
-		.coef = {
-			0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-			0x00, 0x00, 0xff, 0x07, 0x3d, 0xfc, 0x01, 0x00,
-			0x00, 0x01, 0xfd, 0x11, 0x36, 0xf9, 0x02, 0x00,
-			0x00, 0x01, 0xfb, 0x1b, 0x2e, 0xf9, 0x02, 0x00,
-			0x00, 0x01, 0xf9, 0x26, 0x26, 0xf9, 0x01, 0x00,
-			0x00, 0x02, 0xf9, 0x30, 0x19, 0xfb, 0x01, 0x00,
-			0x00, 0x02, 0xf9, 0x39, 0x0e, 0xfd, 0x01, 0x00,
-			0x00, 0x01, 0xfc, 0x3e, 0x06, 0xff, 0x00, 0x00
-		}
-	},
-	{
-		.min = 921,
-		.max = 1024,
-		.coef = {
-			0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-			0xff, 0x03, 0xfd, 0x08, 0x3e, 0xf9, 0x04, 0xfe,
-			0xfd, 0x06, 0xf8, 0x13, 0x3b, 0xf4, 0x07, 0xfc,
-			0xfb, 0x08, 0xf5, 0x1f, 0x34, 0xf1, 0x09, 0xfb,
-			0xfb, 0x09, 0xf2, 0x2b, 0x2a, 0xf1, 0x09, 0xfb,
-			0xfb, 0x09, 0xf2, 0x35, 0x1e, 0xf4, 0x08, 0xfb,
-			0xfc, 0x07, 0xf5, 0x3c, 0x12, 0xf7, 0x06, 0xfd,
-			0xfe, 0x04, 0xfa, 0x3f, 0x07, 0xfc, 0x03, 0xff
-		}
-	},
-	{
-		.min = 1024,
-		.max = 1126,
-		.coef = {
-			0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-			0xff, 0x03, 0xfd, 0x08, 0x3e, 0xf9, 0x04, 0xfe,
-			0xfd, 0x06, 0xf8, 0x13, 0x3b, 0xf4, 0x07, 0xfc,
-			0xfb, 0x08, 0xf5, 0x1f, 0x34, 0xf1, 0x09, 0xfb,
-			0xfb, 0x09, 0xf2, 0x2b, 0x2a, 0xf1, 0x09, 0xfb,
-			0xfb, 0x09, 0xf2, 0x35, 0x1e, 0xf4, 0x08, 0xfb,
-			0xfc, 0x07, 0xf5, 0x3c, 0x12, 0xf7, 0x06, 0xfd,
-			0xfe, 0x04, 0xfa, 0x3f, 0x07, 0xfc, 0x03, 0xff
-		}
-	},
-	{
-		.min = 1126,
-		.max = 1228,
-		.coef = {
-			0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-			0xff, 0x03, 0xfd, 0x08, 0x3e, 0xf9, 0x04, 0xfe,
-			0xfd, 0x06, 0xf8, 0x13, 0x3b, 0xf4, 0x07, 0xfc,
-			0xfb, 0x08, 0xf5, 0x1f, 0x34, 0xf1, 0x09, 0xfb,
-			0xfb, 0x09, 0xf2, 0x2b, 0x2a, 0xf1, 0x09, 0xfb,
-			0xfb, 0x09, 0xf2, 0x35, 0x1e, 0xf4, 0x08, 0xfb,
-			0xfc, 0x07, 0xf5, 0x3c, 0x12, 0xf7, 0x06, 0xfd,
-			0xfe, 0x04, 0xfa, 0x3f, 0x07, 0xfc, 0x03, 0xff
-		}
-	},
-	{
-		.min = 1228,
-		.max = 1331,
-		.coef = {
-			0xfd, 0x04, 0xfc, 0x05, 0x39, 0x05, 0xfc, 0x04,
-			0xfc, 0x06, 0xf9, 0x0c, 0x39, 0xfe, 0x00, 0x02,
-			0xfb, 0x08, 0xf6, 0x17, 0x35, 0xf9, 0x02, 0x00,
-			0xfc, 0x08, 0xf4, 0x20, 0x30, 0xf4, 0x05, 0xff,
-			0xfd, 0x07, 0xf4, 0x29, 0x28, 0xf3, 0x07, 0xfd,
-			0xff, 0x05, 0xf5, 0x31, 0x1f, 0xf3, 0x08, 0xfc,
-			0x00, 0x02, 0xf9, 0x38, 0x14, 0xf6, 0x08, 0xfb,
-			0x02, 0x00, 0xff, 0x3a, 0x0b, 0xf8, 0x06, 0xfc
-		}
-	},
-	{
-		.min = 1331,
-		.max = 1433,
-		.coef = {
-			0xfc, 0x06, 0xf9, 0x09, 0x34, 0x09, 0xf9, 0x06,
-			0xfd, 0x07, 0xf7, 0x10, 0x32, 0x02, 0xfc, 0x05,
-			0xfe, 0x07, 0xf6, 0x17, 0x2f, 0xfc, 0xff, 0x04,
-			0xff, 0x06, 0xf5, 0x20, 0x2a, 0xf9, 0x01, 0x02,
-			0x00, 0x04, 0xf6, 0x27, 0x25, 0xf6, 0x04, 0x00,
-			0x02, 0x01, 0xf9, 0x2d, 0x1d, 0xf5, 0x06, 0xff,
-			0x04, 0xff, 0xfd, 0x31, 0x15, 0xf5, 0x07, 0xfe,
-			0x05, 0xfc, 0x02, 0x35, 0x0d, 0xf7, 0x07, 0xfd
-		}
-	},
-	{
-		.min = 1433,
-		.max = 1536,
-		.coef = {
-			0xfe, 0x06, 0xf8, 0x0b, 0x30, 0x0b, 0xf8, 0x06,
-			0xff, 0x06, 0xf7, 0x12, 0x2d, 0x05, 0xfa, 0x06,
-			0x00, 0x04, 0xf6, 0x18, 0x2c, 0x00, 0xfc, 0x06,
-			0x01, 0x02, 0xf7, 0x1f, 0x27, 0xfd, 0xff, 0x04,
-			0x03, 0x00, 0xf9, 0x24, 0x24, 0xf9, 0x00, 0x03,
-			0x04, 0xff, 0xfd, 0x29, 0x1d, 0xf7, 0x02, 0x01,
-			0x06, 0xfc, 0x00, 0x2d, 0x17, 0xf6, 0x04, 0x00,
-			0x06, 0xfa, 0x05, 0x30, 0x0f, 0xf7, 0x06, 0xff
-		}
-	},
-	{
-		.min = 1536,
-		.max = 2048,
-		.coef = {
-			0x05, 0xfd, 0xfb, 0x13, 0x25, 0x13, 0xfb, 0xfd,
-			0x05, 0xfc, 0xfd, 0x17, 0x24, 0x0f, 0xf9, 0xff,
-			0x04, 0xfa, 0xff, 0x1b, 0x24, 0x0b, 0xf9, 0x00,
-			0x03, 0xf9, 0x01, 0x1f, 0x23, 0x08, 0xf8, 0x01,
-			0x02, 0xf9, 0x04, 0x22, 0x20, 0x04, 0xf9, 0x02,
-			0x01, 0xf8, 0x08, 0x25, 0x1d, 0x01, 0xf9, 0x03,
-			0x00, 0xf9, 0x0c, 0x25, 0x1a, 0xfe, 0xfa, 0x04,
-			0xff, 0xf9, 0x10, 0x26, 0x15, 0xfc, 0xfc, 0x05
-		}
-	},
-	{
-		.min = 2048,
-		.max = 3072,
-		.coef = {
-			0xfc, 0xfd, 0x06, 0x13, 0x18, 0x13, 0x06, 0xfd,
-			0xfc, 0xfe, 0x08, 0x15, 0x17, 0x12, 0x04, 0xfc,
-			0xfb, 0xfe, 0x0a, 0x16, 0x18, 0x10, 0x03, 0xfc,
-			0xfb, 0x00, 0x0b, 0x18, 0x17, 0x0f, 0x01, 0xfb,
-			0xfb, 0x00, 0x0d, 0x19, 0x17, 0x0d, 0x00, 0xfb,
-			0xfb, 0x01, 0x0f, 0x19, 0x16, 0x0b, 0x00, 0xfb,
-			0xfc, 0x03, 0x11, 0x19, 0x15, 0x09, 0xfe, 0xfb,
-			0xfc, 0x04, 0x12, 0x1a, 0x12, 0x08, 0xfe, 0xfc
-		}
-	},
-	{
-		.min = 3072,
-		.max = 4096,
-		.coef = {
-			0xfe, 0x02, 0x09, 0x0f, 0x0e, 0x0f, 0x09, 0x02,
-			0xff, 0x02, 0x09, 0x0f, 0x10, 0x0e, 0x08, 0x01,
-			0xff, 0x03, 0x0a, 0x10, 0x10, 0x0d, 0x07, 0x00,
-			0x00, 0x04, 0x0b, 0x10, 0x0f, 0x0c, 0x06, 0x00,
-			0x00, 0x05, 0x0c, 0x10, 0x0e, 0x0c, 0x05, 0x00,
-			0x00, 0x06, 0x0c, 0x11, 0x0e, 0x0b, 0x04, 0x00,
-			0x00, 0x07, 0x0d, 0x11, 0x0f, 0x0a, 0x03, 0xff,
-			0x01, 0x08, 0x0e, 0x11, 0x0e, 0x09, 0x02, 0xff
-		}
-	},
-	{
-		.min = 4096,
-		.max = 5120,
-		.coef = {
-			0x00, 0x04, 0x09, 0x0c, 0x0e, 0x0c, 0x09, 0x04,
-			0x01, 0x05, 0x09, 0x0c, 0x0d, 0x0c, 0x08, 0x04,
-			0x01, 0x05, 0x0a, 0x0c, 0x0e, 0x0b, 0x08, 0x03,
-			0x02, 0x06, 0x0a, 0x0d, 0x0c, 0x0b, 0x07, 0x03,
-			0x02, 0x07, 0x0a, 0x0d, 0x0d, 0x0a, 0x07, 0x02,
-			0x03, 0x07, 0x0b, 0x0d, 0x0c, 0x0a, 0x06, 0x02,
-			0x03, 0x08, 0x0b, 0x0d, 0x0d, 0x0a, 0x05, 0x01,
-			0x04, 0x08, 0x0c, 0x0d, 0x0c, 0x09, 0x05, 0x01
-		}
-	},
-	{
-		.min = 5120,
-		.max = 65535,
-		.coef = {
-			0x03, 0x06, 0x09, 0x0b, 0x09, 0x0b, 0x09, 0x06,
-			0x03, 0x06, 0x09, 0x0b, 0x0c, 0x0a, 0x08, 0x05,
-			0x03, 0x06, 0x09, 0x0b, 0x0c, 0x0a, 0x08, 0x05,
-			0x04, 0x07, 0x09, 0x0b, 0x0b, 0x0a, 0x08, 0x04,
-			0x04, 0x07, 0x0a, 0x0b, 0x0b, 0x0a, 0x07, 0x04,
-			0x04, 0x08, 0x0a, 0x0b, 0x0b, 0x09, 0x07, 0x04,
-			0x05, 0x08, 0x0a, 0x0b, 0x0c, 0x09, 0x06, 0x03,
-			0x05, 0x08, 0x0a, 0x0b, 0x0c, 0x09, 0x06, 0x03
-		}
-	}
-};
-
 /**
  * struct bdisp_filter_v_spec - Vertical filter specification
  *
@@ -204,138 +32,6 @@ struct bdisp_filter_v_spec {
 	const u8 coef[BDISP_VF_NB];
 };
 
-static const struct bdisp_filter_v_spec bdisp_v_spec[] = {
-	{
-		.min = 0,
-		.max = 1024,
-		.coef = {
-			0x00, 0x00, 0x40, 0x00, 0x00,
-			0x00, 0x06, 0x3d, 0xfd, 0x00,
-			0xfe, 0x0f, 0x38, 0xfb, 0x00,
-			0xfd, 0x19, 0x2f, 0xfb, 0x00,
-			0xfc, 0x24, 0x24, 0xfc, 0x00,
-			0xfb, 0x2f, 0x19, 0xfd, 0x00,
-			0xfb, 0x38, 0x0f, 0xfe, 0x00,
-			0xfd, 0x3d, 0x06, 0x00, 0x00
-		}
-	},
-	{
-		.min = 1024,
-		.max = 1331,
-		.coef = {
-			0xfc, 0x05, 0x3e, 0x05, 0xfc,
-			0xf8, 0x0e, 0x3b, 0xff, 0x00,
-			0xf5, 0x18, 0x38, 0xf9, 0x02,
-			0xf4, 0x21, 0x31, 0xf5, 0x05,
-			0xf4, 0x2a, 0x27, 0xf4, 0x07,
-			0xf6, 0x30, 0x1e, 0xf4, 0x08,
-			0xf9, 0x35, 0x15, 0xf6, 0x07,
-			0xff, 0x37, 0x0b, 0xf9, 0x06
-		}
-	},
-	{
-		.min = 1331,
-		.max = 1433,
-		.coef = {
-			0xf8, 0x0a, 0x3c, 0x0a, 0xf8,
-			0xf6, 0x12, 0x3b, 0x02, 0xfb,
-			0xf4, 0x1b, 0x35, 0xfd, 0xff,
-			0xf4, 0x23, 0x30, 0xf8, 0x01,
-			0xf6, 0x29, 0x27, 0xf6, 0x04,
-			0xf9, 0x2e, 0x1e, 0xf5, 0x06,
-			0xfd, 0x31, 0x16, 0xf6, 0x06,
-			0x02, 0x32, 0x0d, 0xf8, 0x07
-		}
-	},
-	{
-		.min = 1433,
-		.max = 1536,
-		.coef = {
-			0xf6, 0x0e, 0x38, 0x0e, 0xf6,
-			0xf5, 0x15, 0x38, 0x06, 0xf8,
-			0xf5, 0x1d, 0x33, 0x00, 0xfb,
-			0xf6, 0x23, 0x2d, 0xfc, 0xfe,
-			0xf9, 0x28, 0x26, 0xf9, 0x00,
-			0xfc, 0x2c, 0x1e, 0xf7, 0x03,
-			0x00, 0x2e, 0x18, 0xf6, 0x04,
-			0x05, 0x2e, 0x11, 0xf7, 0x05
-		}
-	},
-	{
-		.min = 1536,
-		.max = 2048,
-		.coef = {
-			0xfb, 0x13, 0x24, 0x13, 0xfb,
-			0xfd, 0x17, 0x23, 0x0f, 0xfa,
-			0xff, 0x1a, 0x23, 0x0b, 0xf9,
-			0x01, 0x1d, 0x22, 0x07, 0xf9,
-			0x04, 0x20, 0x1f, 0x04, 0xf9,
-			0x07, 0x22, 0x1c, 0x01, 0xfa,
-			0x0b, 0x24, 0x17, 0xff, 0xfb,
-			0x0f, 0x24, 0x14, 0xfd, 0xfc
-		}
-	},
-	{
-		.min = 2048,
-		.max = 3072,
-		.coef = {
-			0x05, 0x10, 0x16, 0x10, 0x05,
-			0x06, 0x11, 0x16, 0x0f, 0x04,
-			0x08, 0x13, 0x15, 0x0e, 0x02,
-			0x09, 0x14, 0x16, 0x0c, 0x01,
-			0x0b, 0x15, 0x15, 0x0b, 0x00,
-			0x0d, 0x16, 0x13, 0x0a, 0x00,
-			0x0f, 0x17, 0x13, 0x08, 0xff,
-			0x11, 0x18, 0x12, 0x07, 0xfe
-		}
-	},
-	{
-		.min = 3072,
-		.max = 4096,
-		.coef = {
-			0x09, 0x0f, 0x10, 0x0f, 0x09,
-			0x09, 0x0f, 0x12, 0x0e, 0x08,
-			0x0a, 0x10, 0x11, 0x0e, 0x07,
-			0x0b, 0x11, 0x11, 0x0d, 0x06,
-			0x0c, 0x11, 0x12, 0x0c, 0x05,
-			0x0d, 0x12, 0x11, 0x0c, 0x04,
-			0x0e, 0x12, 0x11, 0x0b, 0x04,
-			0x0f, 0x13, 0x11, 0x0a, 0x03
-		}
-	},
-	{
-		.min = 4096,
-		.max = 5120,
-		.coef = {
-			0x0a, 0x0e, 0x10, 0x0e, 0x0a,
-			0x0b, 0x0e, 0x0f, 0x0e, 0x0a,
-			0x0b, 0x0f, 0x10, 0x0d, 0x09,
-			0x0c, 0x0f, 0x10, 0x0d, 0x08,
-			0x0d, 0x0f, 0x0f, 0x0d, 0x08,
-			0x0d, 0x10, 0x10, 0x0c, 0x07,
-			0x0e, 0x10, 0x0f, 0x0c, 0x07,
-			0x0f, 0x10, 0x10, 0x0b, 0x06
-		}
-	},
-	{
-		.min = 5120,
-		.max = 65535,
-		.coef = {
-			0x0b, 0x0e, 0x0e, 0x0e, 0x0b,
-			0x0b, 0x0e, 0x0f, 0x0d, 0x0b,
-			0x0c, 0x0e, 0x0f, 0x0d, 0x0a,
-			0x0c, 0x0e, 0x0f, 0x0d, 0x0a,
-			0x0d, 0x0f, 0x0e, 0x0d, 0x09,
-			0x0d, 0x0f, 0x0f, 0x0c, 0x09,
-			0x0e, 0x0f, 0x0e, 0x0c, 0x09,
-			0x0e, 0x0f, 0x0f, 0x0c, 0x08
-		}
-	}
-};
-
-#define NB_H_FILTER ARRAY_SIZE(bdisp_h_spec)
-#define NB_V_FILTER ARRAY_SIZE(bdisp_v_spec)
-
 /* RGB YUV 601 standard conversion */
 static const u32 bdisp_rgb_to_yuv[] = {
 		0x0e1e8bee, 0x08420419, 0xfb5ed471, 0x08004080,
diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c
index 052c932ac942..3df66d11c795 100644
--- a/drivers/media/platform/sti/bdisp/bdisp-hw.c
+++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c
@@ -47,6 +47,311 @@ struct bdisp_filter_addr {
 	dma_addr_t paddr;    /* Physical address for filter table */
 };
 
+static const struct bdisp_filter_h_spec bdisp_h_spec[] = {
+	{
+		.min = 0,
+		.max = 921,
+		.coef = {
+			0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0xff, 0x07, 0x3d, 0xfc, 0x01, 0x00,
+			0x00, 0x01, 0xfd, 0x11, 0x36, 0xf9, 0x02, 0x00,
+			0x00, 0x01, 0xfb, 0x1b, 0x2e, 0xf9, 0x02, 0x00,
+			0x00, 0x01, 0xf9, 0x26, 0x26, 0xf9, 0x01, 0x00,
+			0x00, 0x02, 0xf9, 0x30, 0x19, 0xfb, 0x01, 0x00,
+			0x00, 0x02, 0xf9, 0x39, 0x0e, 0xfd, 0x01, 0x00,
+			0x00, 0x01, 0xfc, 0x3e, 0x06, 0xff, 0x00, 0x00
+		}
+	},
+	{
+		.min = 921,
+		.max = 1024,
+		.coef = {
+			0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+			0xff, 0x03, 0xfd, 0x08, 0x3e, 0xf9, 0x04, 0xfe,
+			0xfd, 0x06, 0xf8, 0x13, 0x3b, 0xf4, 0x07, 0xfc,
+			0xfb, 0x08, 0xf5, 0x1f, 0x34, 0xf1, 0x09, 0xfb,
+			0xfb, 0x09, 0xf2, 0x2b, 0x2a, 0xf1, 0x09, 0xfb,
+			0xfb, 0x09, 0xf2, 0x35, 0x1e, 0xf4, 0x08, 0xfb,
+			0xfc, 0x07, 0xf5, 0x3c, 0x12, 0xf7, 0x06, 0xfd,
+			0xfe, 0x04, 0xfa, 0x3f, 0x07, 0xfc, 0x03, 0xff
+		}
+	},
+	{
+		.min = 1024,
+		.max = 1126,
+		.coef = {
+			0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+			0xff, 0x03, 0xfd, 0x08, 0x3e, 0xf9, 0x04, 0xfe,
+			0xfd, 0x06, 0xf8, 0x13, 0x3b, 0xf4, 0x07, 0xfc,
+			0xfb, 0x08, 0xf5, 0x1f, 0x34, 0xf1, 0x09, 0xfb,
+			0xfb, 0x09, 0xf2, 0x2b, 0x2a, 0xf1, 0x09, 0xfb,
+			0xfb, 0x09, 0xf2, 0x35, 0x1e, 0xf4, 0x08, 0xfb,
+			0xfc, 0x07, 0xf5, 0x3c, 0x12, 0xf7, 0x06, 0xfd,
+			0xfe, 0x04, 0xfa, 0x3f, 0x07, 0xfc, 0x03, 0xff
+		}
+	},
+	{
+		.min = 1126,
+		.max = 1228,
+		.coef = {
+			0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+			0xff, 0x03, 0xfd, 0x08, 0x3e, 0xf9, 0x04, 0xfe,
+			0xfd, 0x06, 0xf8, 0x13, 0x3b, 0xf4, 0x07, 0xfc,
+			0xfb, 0x08, 0xf5, 0x1f, 0x34, 0xf1, 0x09, 0xfb,
+			0xfb, 0x09, 0xf2, 0x2b, 0x2a, 0xf1, 0x09, 0xfb,
+			0xfb, 0x09, 0xf2, 0x35, 0x1e, 0xf4, 0x08, 0xfb,
+			0xfc, 0x07, 0xf5, 0x3c, 0x12, 0xf7, 0x06, 0xfd,
+			0xfe, 0x04, 0xfa, 0x3f, 0x07, 0xfc, 0x03, 0xff
+		}
+	},
+	{
+		.min = 1228,
+		.max = 1331,
+		.coef = {
+			0xfd, 0x04, 0xfc, 0x05, 0x39, 0x05, 0xfc, 0x04,
+			0xfc, 0x06, 0xf9, 0x0c, 0x39, 0xfe, 0x00, 0x02,
+			0xfb, 0x08, 0xf6, 0x17, 0x35, 0xf9, 0x02, 0x00,
+			0xfc, 0x08, 0xf4, 0x20, 0x30, 0xf4, 0x05, 0xff,
+			0xfd, 0x07, 0xf4, 0x29, 0x28, 0xf3, 0x07, 0xfd,
+			0xff, 0x05, 0xf5, 0x31, 0x1f, 0xf3, 0x08, 0xfc,
+			0x00, 0x02, 0xf9, 0x38, 0x14, 0xf6, 0x08, 0xfb,
+			0x02, 0x00, 0xff, 0x3a, 0x0b, 0xf8, 0x06, 0xfc
+		}
+	},
+	{
+		.min = 1331,
+		.max = 1433,
+		.coef = {
+			0xfc, 0x06, 0xf9, 0x09, 0x34, 0x09, 0xf9, 0x06,
+			0xfd, 0x07, 0xf7, 0x10, 0x32, 0x02, 0xfc, 0x05,
+			0xfe, 0x07, 0xf6, 0x17, 0x2f, 0xfc, 0xff, 0x04,
+			0xff, 0x06, 0xf5, 0x20, 0x2a, 0xf9, 0x01, 0x02,
+			0x00, 0x04, 0xf6, 0x27, 0x25, 0xf6, 0x04, 0x00,
+			0x02, 0x01, 0xf9, 0x2d, 0x1d, 0xf5, 0x06, 0xff,
+			0x04, 0xff, 0xfd, 0x31, 0x15, 0xf5, 0x07, 0xfe,
+			0x05, 0xfc, 0x02, 0x35, 0x0d, 0xf7, 0x07, 0xfd
+		}
+	},
+	{
+		.min = 1433,
+		.max = 1536,
+		.coef = {
+			0xfe, 0x06, 0xf8, 0x0b, 0x30, 0x0b, 0xf8, 0x06,
+			0xff, 0x06, 0xf7, 0x12, 0x2d, 0x05, 0xfa, 0x06,
+			0x00, 0x04, 0xf6, 0x18, 0x2c, 0x00, 0xfc, 0x06,
+			0x01, 0x02, 0xf7, 0x1f, 0x27, 0xfd, 0xff, 0x04,
+			0x03, 0x00, 0xf9, 0x24, 0x24, 0xf9, 0x00, 0x03,
+			0x04, 0xff, 0xfd, 0x29, 0x1d, 0xf7, 0x02, 0x01,
+			0x06, 0xfc, 0x00, 0x2d, 0x17, 0xf6, 0x04, 0x00,
+			0x06, 0xfa, 0x05, 0x30, 0x0f, 0xf7, 0x06, 0xff
+		}
+	},
+	{
+		.min = 1536,
+		.max = 2048,
+		.coef = {
+			0x05, 0xfd, 0xfb, 0x13, 0x25, 0x13, 0xfb, 0xfd,
+			0x05, 0xfc, 0xfd, 0x17, 0x24, 0x0f, 0xf9, 0xff,
+			0x04, 0xfa, 0xff, 0x1b, 0x24, 0x0b, 0xf9, 0x00,
+			0x03, 0xf9, 0x01, 0x1f, 0x23, 0x08, 0xf8, 0x01,
+			0x02, 0xf9, 0x04, 0x22, 0x20, 0x04, 0xf9, 0x02,
+			0x01, 0xf8, 0x08, 0x25, 0x1d, 0x01, 0xf9, 0x03,
+			0x00, 0xf9, 0x0c, 0x25, 0x1a, 0xfe, 0xfa, 0x04,
+			0xff, 0xf9, 0x10, 0x26, 0x15, 0xfc, 0xfc, 0x05
+		}
+	},
+	{
+		.min = 2048,
+		.max = 3072,
+		.coef = {
+			0xfc, 0xfd, 0x06, 0x13, 0x18, 0x13, 0x06, 0xfd,
+			0xfc, 0xfe, 0x08, 0x15, 0x17, 0x12, 0x04, 0xfc,
+			0xfb, 0xfe, 0x0a, 0x16, 0x18, 0x10, 0x03, 0xfc,
+			0xfb, 0x00, 0x0b, 0x18, 0x17, 0x0f, 0x01, 0xfb,
+			0xfb, 0x00, 0x0d, 0x19, 0x17, 0x0d, 0x00, 0xfb,
+			0xfb, 0x01, 0x0f, 0x19, 0x16, 0x0b, 0x00, 0xfb,
+			0xfc, 0x03, 0x11, 0x19, 0x15, 0x09, 0xfe, 0xfb,
+			0xfc, 0x04, 0x12, 0x1a, 0x12, 0x08, 0xfe, 0xfc
+		}
+	},
+	{
+		.min = 3072,
+		.max = 4096,
+		.coef = {
+			0xfe, 0x02, 0x09, 0x0f, 0x0e, 0x0f, 0x09, 0x02,
+			0xff, 0x02, 0x09, 0x0f, 0x10, 0x0e, 0x08, 0x01,
+			0xff, 0x03, 0x0a, 0x10, 0x10, 0x0d, 0x07, 0x00,
+			0x00, 0x04, 0x0b, 0x10, 0x0f, 0x0c, 0x06, 0x00,
+			0x00, 0x05, 0x0c, 0x10, 0x0e, 0x0c, 0x05, 0x00,
+			0x00, 0x06, 0x0c, 0x11, 0x0e, 0x0b, 0x04, 0x00,
+			0x00, 0x07, 0x0d, 0x11, 0x0f, 0x0a, 0x03, 0xff,
+			0x01, 0x08, 0x0e, 0x11, 0x0e, 0x09, 0x02, 0xff
+		}
+	},
+	{
+		.min = 4096,
+		.max = 5120,
+		.coef = {
+			0x00, 0x04, 0x09, 0x0c, 0x0e, 0x0c, 0x09, 0x04,
+			0x01, 0x05, 0x09, 0x0c, 0x0d, 0x0c, 0x08, 0x04,
+			0x01, 0x05, 0x0a, 0x0c, 0x0e, 0x0b, 0x08, 0x03,
+			0x02, 0x06, 0x0a, 0x0d, 0x0c, 0x0b, 0x07, 0x03,
+			0x02, 0x07, 0x0a, 0x0d, 0x0d, 0x0a, 0x07, 0x02,
+			0x03, 0x07, 0x0b, 0x0d, 0x0c, 0x0a, 0x06, 0x02,
+			0x03, 0x08, 0x0b, 0x0d, 0x0d, 0x0a, 0x05, 0x01,
+			0x04, 0x08, 0x0c, 0x0d, 0x0c, 0x09, 0x05, 0x01
+		}
+	},
+	{
+		.min = 5120,
+		.max = 65535,
+		.coef = {
+			0x03, 0x06, 0x09, 0x0b, 0x09, 0x0b, 0x09, 0x06,
+			0x03, 0x06, 0x09, 0x0b, 0x0c, 0x0a, 0x08, 0x05,
+			0x03, 0x06, 0x09, 0x0b, 0x0c, 0x0a, 0x08, 0x05,
+			0x04, 0x07, 0x09, 0x0b, 0x0b, 0x0a, 0x08, 0x04,
+			0x04, 0x07, 0x0a, 0x0b, 0x0b, 0x0a, 0x07, 0x04,
+			0x04, 0x08, 0x0a, 0x0b, 0x0b, 0x09, 0x07, 0x04,
+			0x05, 0x08, 0x0a, 0x0b, 0x0c, 0x09, 0x06, 0x03,
+			0x05, 0x08, 0x0a, 0x0b, 0x0c, 0x09, 0x06, 0x03
+		}
+	}
+};
+
+#define NB_H_FILTER ARRAY_SIZE(bdisp_h_spec)
+
+
+static const struct bdisp_filter_v_spec bdisp_v_spec[] = {
+	{
+		.min = 0,
+		.max = 1024,
+		.coef = {
+			0x00, 0x00, 0x40, 0x00, 0x00,
+			0x00, 0x06, 0x3d, 0xfd, 0x00,
+			0xfe, 0x0f, 0x38, 0xfb, 0x00,
+			0xfd, 0x19, 0x2f, 0xfb, 0x00,
+			0xfc, 0x24, 0x24, 0xfc, 0x00,
+			0xfb, 0x2f, 0x19, 0xfd, 0x00,
+			0xfb, 0x38, 0x0f, 0xfe, 0x00,
+			0xfd, 0x3d, 0x06, 0x00, 0x00
+		}
+	},
+	{
+		.min = 1024,
+		.max = 1331,
+		.coef = {
+			0xfc, 0x05, 0x3e, 0x05, 0xfc,
+			0xf8, 0x0e, 0x3b, 0xff, 0x00,
+			0xf5, 0x18, 0x38, 0xf9, 0x02,
+			0xf4, 0x21, 0x31, 0xf5, 0x05,
+			0xf4, 0x2a, 0x27, 0xf4, 0x07,
+			0xf6, 0x30, 0x1e, 0xf4, 0x08,
+			0xf9, 0x35, 0x15, 0xf6, 0x07,
+			0xff, 0x37, 0x0b, 0xf9, 0x06
+		}
+	},
+	{
+		.min = 1331,
+		.max = 1433,
+		.coef = {
+			0xf8, 0x0a, 0x3c, 0x0a, 0xf8,
+			0xf6, 0x12, 0x3b, 0x02, 0xfb,
+			0xf4, 0x1b, 0x35, 0xfd, 0xff,
+			0xf4, 0x23, 0x30, 0xf8, 0x01,
+			0xf6, 0x29, 0x27, 0xf6, 0x04,
+			0xf9, 0x2e, 0x1e, 0xf5, 0x06,
+			0xfd, 0x31, 0x16, 0xf6, 0x06,
+			0x02, 0x32, 0x0d, 0xf8, 0x07
+		}
+	},
+	{
+		.min = 1433,
+		.max = 1536,
+		.coef = {
+			0xf6, 0x0e, 0x38, 0x0e, 0xf6,
+			0xf5, 0x15, 0x38, 0x06, 0xf8,
+			0xf5, 0x1d, 0x33, 0x00, 0xfb,
+			0xf6, 0x23, 0x2d, 0xfc, 0xfe,
+			0xf9, 0x28, 0x26, 0xf9, 0x00,
+			0xfc, 0x2c, 0x1e, 0xf7, 0x03,
+			0x00, 0x2e, 0x18, 0xf6, 0x04,
+			0x05, 0x2e, 0x11, 0xf7, 0x05
+		}
+	},
+	{
+		.min = 1536,
+		.max = 2048,
+		.coef = {
+			0xfb, 0x13, 0x24, 0x13, 0xfb,
+			0xfd, 0x17, 0x23, 0x0f, 0xfa,
+			0xff, 0x1a, 0x23, 0x0b, 0xf9,
+			0x01, 0x1d, 0x22, 0x07, 0xf9,
+			0x04, 0x20, 0x1f, 0x04, 0xf9,
+			0x07, 0x22, 0x1c, 0x01, 0xfa,
+			0x0b, 0x24, 0x17, 0xff, 0xfb,
+			0x0f, 0x24, 0x14, 0xfd, 0xfc
+		}
+	},
+	{
+		.min = 2048,
+		.max = 3072,
+		.coef = {
+			0x05, 0x10, 0x16, 0x10, 0x05,
+			0x06, 0x11, 0x16, 0x0f, 0x04,
+			0x08, 0x13, 0x15, 0x0e, 0x02,
+			0x09, 0x14, 0x16, 0x0c, 0x01,
+			0x0b, 0x15, 0x15, 0x0b, 0x00,
+			0x0d, 0x16, 0x13, 0x0a, 0x00,
+			0x0f, 0x17, 0x13, 0x08, 0xff,
+			0x11, 0x18, 0x12, 0x07, 0xfe
+		}
+	},
+	{
+		.min = 3072,
+		.max = 4096,
+		.coef = {
+			0x09, 0x0f, 0x10, 0x0f, 0x09,
+			0x09, 0x0f, 0x12, 0x0e, 0x08,
+			0x0a, 0x10, 0x11, 0x0e, 0x07,
+			0x0b, 0x11, 0x11, 0x0d, 0x06,
+			0x0c, 0x11, 0x12, 0x0c, 0x05,
+			0x0d, 0x12, 0x11, 0x0c, 0x04,
+			0x0e, 0x12, 0x11, 0x0b, 0x04,
+			0x0f, 0x13, 0x11, 0x0a, 0x03
+		}
+	},
+	{
+		.min = 4096,
+		.max = 5120,
+		.coef = {
+			0x0a, 0x0e, 0x10, 0x0e, 0x0a,
+			0x0b, 0x0e, 0x0f, 0x0e, 0x0a,
+			0x0b, 0x0f, 0x10, 0x0d, 0x09,
+			0x0c, 0x0f, 0x10, 0x0d, 0x08,
+			0x0d, 0x0f, 0x0f, 0x0d, 0x08,
+			0x0d, 0x10, 0x10, 0x0c, 0x07,
+			0x0e, 0x10, 0x0f, 0x0c, 0x07,
+			0x0f, 0x10, 0x10, 0x0b, 0x06
+		}
+	},
+	{
+		.min = 5120,
+		.max = 65535,
+		.coef = {
+			0x0b, 0x0e, 0x0e, 0x0e, 0x0b,
+			0x0b, 0x0e, 0x0f, 0x0d, 0x0b,
+			0x0c, 0x0e, 0x0f, 0x0d, 0x0a,
+			0x0c, 0x0e, 0x0f, 0x0d, 0x0a,
+			0x0d, 0x0f, 0x0e, 0x0d, 0x09,
+			0x0d, 0x0f, 0x0f, 0x0c, 0x09,
+			0x0e, 0x0f, 0x0e, 0x0c, 0x09,
+			0x0e, 0x0f, 0x0f, 0x0c, 0x08
+		}
+	}
+};
+
+#define NB_V_FILTER ARRAY_SIZE(bdisp_v_spec)
+
 static struct bdisp_filter_addr bdisp_h_filter[NB_H_FILTER];
 static struct bdisp_filter_addr bdisp_v_filter[NB_V_FILTER];
 
diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
index d12a419c044a..3b1ac687d0df 100644
--- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
+++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
@@ -439,7 +439,7 @@ static void bdisp_ctrls_delete(struct bdisp_ctx *ctx)
 
 static int bdisp_queue_setup(struct vb2_queue *vq,
 			     unsigned int *nb_buf, unsigned int *nb_planes,
-			     unsigned int sizes[], void *allocators[])
+			     unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct bdisp_ctx *ctx = vb2_get_drv_priv(vq);
 	struct bdisp_frame *frame = ctx_get_frame(ctx, vq->type);
@@ -453,7 +453,6 @@ static int bdisp_queue_setup(struct vb2_queue *vq,
 		dev_err(ctx->bdisp_dev->dev, "Invalid format\n");
 		return -EINVAL;
 	}
-	allocators[0] = ctx->bdisp_dev->alloc_ctx;
 
 	if (*nb_planes)
 		return sizes[0] < frame->sizeimage ? -EINVAL : 0;
@@ -553,6 +552,7 @@ static int queue_init(void *priv,
 	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->lock = &ctx->bdisp_dev->lock;
+	src_vq->dev = ctx->bdisp_dev->v4l2_dev.dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret)
@@ -567,6 +567,7 @@ static int queue_init(void *priv,
 	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->lock = &ctx->bdisp_dev->lock;
+	dst_vq->dev = ctx->bdisp_dev->v4l2_dev.dev;
 
 	return vb2_queue_init(dst_vq);
 }
@@ -1269,8 +1270,6 @@ static int bdisp_remove(struct platform_device *pdev)
 
 	bdisp_hw_free_filters(bdisp->dev);
 
-	vb2_dma_contig_cleanup_ctx(bdisp->alloc_ctx);
-
 	pm_runtime_disable(&pdev->dev);
 
 	bdisp_debugfs_remove(bdisp);
@@ -1371,18 +1370,11 @@ static int bdisp_probe(struct platform_device *pdev)
 		goto err_dbg;
 	}
 
-	/* Continuous memory allocator */
-	bdisp->alloc_ctx = vb2_dma_contig_init_ctx(dev);
-	if (IS_ERR(bdisp->alloc_ctx)) {
-		ret = PTR_ERR(bdisp->alloc_ctx);
-		goto err_pm;
-	}
-
 	/* Filters */
 	if (bdisp_hw_alloc_filters(bdisp->dev)) {
 		dev_err(bdisp->dev, "no memory for filters\n");
 		ret = -ENOMEM;
-		goto err_vb2_dma;
+		goto err_pm;
 	}
 
 	/* Register */
@@ -1401,8 +1393,6 @@ static int bdisp_probe(struct platform_device *pdev)
 
 err_filter:
 	bdisp_hw_free_filters(bdisp->dev);
-err_vb2_dma:
-	vb2_dma_contig_cleanup_ctx(bdisp->alloc_ctx);
 err_pm:
 	pm_runtime_put(dev);
 err_dbg:
diff --git a/drivers/media/platform/sti/bdisp/bdisp.h b/drivers/media/platform/sti/bdisp/bdisp.h
index 0cf98577222c..b3fbf9902595 100644
--- a/drivers/media/platform/sti/bdisp/bdisp.h
+++ b/drivers/media/platform/sti/bdisp/bdisp.h
@@ -175,7 +175,6 @@ struct bdisp_dbg {
  * @id:         device index
  * @m2m:        memory-to-memory V4L2 device information
  * @state:      flags used to synchronize m2m and capture mode operation
- * @alloc_ctx:  videobuf2 memory allocator context
  * @clock:      IP clock
  * @regs:       registers
  * @irq_queue:  interrupt handler waitqueue
@@ -193,7 +192,6 @@ struct bdisp_dev {
 	u16                     id;
 	struct bdisp_m2m_device m2m;
 	unsigned long           state;
-	struct vb2_alloc_ctx    *alloc_ctx;
 	struct clk              *clock;
 	void __iomem            *regs;
 	wait_queue_head_t       irq_queue;
diff --git a/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c b/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c
index 7dddf77a62cf..30c148b9d65e 100644
--- a/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c
+++ b/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c
@@ -1161,6 +1161,7 @@ static int load_c8sectpfe_fw(struct c8sectpfei *fei)
 	if (err) {
 		dev_err(fei->dev, "c8sectpfe_elf_sanity_check failed err=(%d)\n"
 			, err);
+		release_firmware(fw);
 		return err;
 	}
 
diff --git a/drivers/media/platform/ti-vpe/cal.c b/drivers/media/platform/ti-vpe/cal.c
index 82001e6b5553..e967fcfdc1d8 100644
--- a/drivers/media/platform/ti-vpe/cal.c
+++ b/drivers/media/platform/ti-vpe/cal.c
@@ -287,7 +287,6 @@ struct cal_ctx {
 	/* Several counters */
 	unsigned long		jiffies;
 
-	struct vb2_alloc_ctx	*alloc_ctx;
 	struct cal_dmaqueue	vidq;
 
 	/* Input Number */
@@ -1226,14 +1225,13 @@ static int cal_enum_frameintervals(struct file *file, void *priv,
  */
 static int cal_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct cal_ctx *ctx = vb2_get_drv_priv(vq);
 	unsigned size = ctx->v_fmt.fmt.pix.sizeimage;
 
 	if (vq->num_buffers + *nbuffers < 3)
 		*nbuffers = 3 - vq->num_buffers;
-	alloc_ctxs[0] = ctx->alloc_ctx;
 
 	if (*nplanes) {
 		if (sizes[0] < size)
@@ -1551,6 +1549,7 @@ static int cal_complete_ctx(struct cal_ctx *ctx)
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &ctx->mutex;
 	q->min_buffers_needed = 3;
+	q->dev = ctx->v4l2_dev.dev;
 
 	ret = vb2_queue_init(q);
 	if (ret)
@@ -1578,18 +1577,7 @@ static int cal_complete_ctx(struct cal_ctx *ctx)
 	v4l2_info(&ctx->v4l2_dev, "V4L2 device registered as %s\n",
 		  video_device_node_name(vfd));
 
-	ctx->alloc_ctx = vb2_dma_contig_init_ctx(vfd->v4l2_dev->dev);
-	if (IS_ERR(ctx->alloc_ctx)) {
-		ctx_err(ctx, "Failed to alloc vb2 context\n");
-		ret = PTR_ERR(ctx->alloc_ctx);
-		goto vdev_unreg;
-	}
-
 	return 0;
-
-vdev_unreg:
-	video_unregister_device(vfd);
-	return ret;
 }
 
 static struct device_node *
@@ -1914,7 +1902,6 @@ static int cal_remove(struct platform_device *pdev)
 				video_device_node_name(&ctx->vdev));
 			camerarx_phy_disable(ctx);
 			v4l2_async_notifier_unregister(&ctx->notifier);
-			vb2_dma_contig_cleanup_ctx(ctx->alloc_ctx);
 			v4l2_ctrl_handler_free(&ctx->ctrl_handler);
 			v4l2_device_unregister(&ctx->v4l2_dev);
 			video_unregister_device(&ctx->vdev);
diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c
index 1fa00c2cf3d7..55a1458ac783 100644
--- a/drivers/media/platform/ti-vpe/vpe.c
+++ b/drivers/media/platform/ti-vpe/vpe.c
@@ -362,7 +362,6 @@ struct vpe_dev {
 	void __iomem		*base;
 	struct resource		*res;
 
-	struct vb2_alloc_ctx	*alloc_ctx;
 	struct vpdma_data	*vpdma;		/* vpdma data handle */
 	struct sc_data		*sc;		/* scaler data handle */
 	struct csc_data		*csc;		/* csc data handle */
@@ -1797,7 +1796,7 @@ static const struct v4l2_ioctl_ops vpe_ioctl_ops = {
  */
 static int vpe_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	int i;
 	struct vpe_ctx *ctx = vb2_get_drv_priv(vq);
@@ -1807,10 +1806,8 @@ static int vpe_queue_setup(struct vb2_queue *vq,
 
 	*nplanes = q_data->fmt->coplanar ? 2 : 1;
 
-	for (i = 0; i < *nplanes; i++) {
+	for (i = 0; i < *nplanes; i++)
 		sizes[i] = q_data->sizeimage[i];
-		alloc_ctxs[i] = ctx->dev->alloc_ctx;
-	}
 
 	vpe_dbg(ctx->dev, "get %d buffer(s) of size %d", *nbuffers,
 		sizes[VPE_LUMA]);
@@ -1907,6 +1904,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->mem_ops = &vb2_dma_contig_memops;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->lock = &dev->dev_mutex;
+	src_vq->dev = dev->v4l2_dev.dev;
 
 	ret = vb2_queue_init(src_vq);
 	if (ret)
@@ -1921,6 +1919,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->mem_ops = &vb2_dma_contig_memops;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->lock = &dev->dev_mutex;
+	dst_vq->dev = dev->v4l2_dev.dev;
 
 	return vb2_queue_init(dst_vq);
 }
@@ -2161,7 +2160,6 @@ static void vpe_fw_cb(struct platform_device *pdev)
 		vpe_runtime_put(pdev);
 		pm_runtime_disable(&pdev->dev);
 		v4l2_m2m_release(dev->m2m_dev);
-		vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
 		v4l2_device_unregister(&dev->v4l2_dev);
 
 		return;
@@ -2213,18 +2211,11 @@ static int vpe_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, dev);
 
-	dev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(dev->alloc_ctx)) {
-		vpe_err(dev, "Failed to alloc vb2 context\n");
-		ret = PTR_ERR(dev->alloc_ctx);
-		goto v4l2_dev_unreg;
-	}
-
 	dev->m2m_dev = v4l2_m2m_init(&m2m_ops);
 	if (IS_ERR(dev->m2m_dev)) {
 		vpe_err(dev, "Failed to init mem2mem device\n");
 		ret = PTR_ERR(dev->m2m_dev);
-		goto rel_ctx;
+		goto v4l2_dev_unreg;
 	}
 
 	pm_runtime_enable(&pdev->dev);
@@ -2269,8 +2260,6 @@ runtime_put:
 rel_m2m:
 	pm_runtime_disable(&pdev->dev);
 	v4l2_m2m_release(dev->m2m_dev);
-rel_ctx:
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
 v4l2_dev_unreg:
 	v4l2_device_unregister(&dev->v4l2_dev);
 
@@ -2286,7 +2275,6 @@ static int vpe_remove(struct platform_device *pdev)
 	v4l2_m2m_release(dev->m2m_dev);
 	video_unregister_device(&dev->vfd);
 	v4l2_device_unregister(&dev->v4l2_dev);
-	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
 
 	vpe_set_clock_enable(dev, 0);
 	vpe_runtime_put(pdev);
diff --git a/drivers/media/platform/via-camera.c b/drivers/media/platform/via-camera.c
index 1254f7e4d732..7ca12deba89c 100644
--- a/drivers/media/platform/via-camera.c
+++ b/drivers/media/platform/via-camera.c
@@ -240,7 +240,7 @@ static int viacam_set_flip(struct via_camera *cam)
 	memset(&ctrl, 0, sizeof(ctrl));
 	ctrl.id = V4L2_CID_VFLIP;
 	ctrl.value = flip_image;
-	return sensor_call(cam, core, s_ctrl, &ctrl);
+	return v4l2_s_ctrl(NULL, cam->sensor->ctrl_handler, &ctrl);
 }
 
 /*
diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c
index c4b5fab83666..6b17015048ae 100644
--- a/drivers/media/platform/vim2m.c
+++ b/drivers/media/platform/vim2m.c
@@ -711,7 +711,7 @@ static const struct v4l2_ioctl_ops vim2m_ioctl_ops = {
 
 static int vim2m_queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct vim2m_ctx *ctx = vb2_get_drv_priv(vq);
 	struct vim2m_q_data *q_data;
@@ -731,11 +731,6 @@ static int vim2m_queue_setup(struct vb2_queue *vq,
 	*nplanes = 1;
 	sizes[0] = size;
 
-	/*
-	 * videobuf2-vmalloc allocator is context-less so no need to set
-	 * alloc_ctxs array.
-	 */
-
 	dprintk(ctx->dev, "get %d buffer(s) of size %d each.\n", count, size);
 
 	return 0;
diff --git a/drivers/media/platform/vivid/Kconfig b/drivers/media/platform/vivid/Kconfig
index f535f576913d..8e6918c5c87c 100644
--- a/drivers/media/platform/vivid/Kconfig
+++ b/drivers/media/platform/vivid/Kconfig
@@ -6,6 +6,7 @@ config VIDEO_VIVID
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
+	select MEDIA_CEC_EDID
 	select VIDEOBUF2_VMALLOC
 	select VIDEO_V4L2_TPG
 	default n
@@ -22,6 +23,13 @@ config VIDEO_VIVID
 	  Say Y here if you want to test video apps or debug V4L devices.
 	  When in doubt, say N.
 
+config VIDEO_VIVID_CEC
+	bool "Enable CEC emulation support"
+	depends on VIDEO_VIVID && MEDIA_CEC
+	---help---
+	  When selected the vivid module will emulate the optional
+	  HDMI CEC feature.
+
 config VIDEO_VIVID_MAX_DEVS
 	int "Maximum number of devices"
 	depends on VIDEO_VIVID
diff --git a/drivers/media/platform/vivid/Makefile b/drivers/media/platform/vivid/Makefile
index 633c8a1b2c27..29738810e3ee 100644
--- a/drivers/media/platform/vivid/Makefile
+++ b/drivers/media/platform/vivid/Makefile
@@ -3,4 +3,8 @@ vivid-objs := vivid-core.o vivid-ctrls.o vivid-vid-common.o vivid-vbi-gen.o \
 		vivid-radio-rx.o vivid-radio-tx.o vivid-radio-common.o \
 		vivid-rds-gen.o vivid-sdr-cap.o vivid-vbi-cap.o vivid-vbi-out.o \
 		vivid-osd.o
+ifeq ($(CONFIG_VIDEO_VIVID_CEC),y)
+  vivid-objs += vivid-cec.o
+endif
+
 obj-$(CONFIG_VIDEO_VIVID) += vivid.o
diff --git a/drivers/media/platform/vivid/vivid-cec.c b/drivers/media/platform/vivid/vivid-cec.c
new file mode 100644
index 000000000000..66aa7292076b
--- /dev/null
+++ b/drivers/media/platform/vivid/vivid-cec.c
@@ -0,0 +1,241 @@
+/*
+ * vivid-cec.c - A Virtual Video Test Driver, cec emulation
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <media/cec.h>
+
+#include "vivid-core.h"
+#include "vivid-cec.h"
+
+void vivid_cec_bus_free_work(struct vivid_dev *dev)
+{
+	spin_lock(&dev->cec_slock);
+	while (!list_empty(&dev->cec_work_list)) {
+		struct vivid_cec_work *cw =
+			list_first_entry(&dev->cec_work_list,
+					 struct vivid_cec_work, list);
+
+		spin_unlock(&dev->cec_slock);
+		cancel_delayed_work_sync(&cw->work);
+		spin_lock(&dev->cec_slock);
+		list_del(&cw->list);
+		cec_transmit_done(cw->adap, CEC_TX_STATUS_LOW_DRIVE, 0, 0, 1, 0);
+		kfree(cw);
+	}
+	spin_unlock(&dev->cec_slock);
+}
+
+static bool vivid_cec_find_dest_adap(struct vivid_dev *dev,
+				     struct cec_adapter *adap, u8 dest)
+{
+	unsigned int i;
+
+	if (dest >= 0xf)
+		return false;
+
+	if (adap != dev->cec_rx_adap && dev->cec_rx_adap &&
+	    dev->cec_rx_adap->is_configured &&
+	    cec_has_log_addr(dev->cec_rx_adap, dest))
+		return true;
+
+	for (i = 0; i < MAX_OUTPUTS && dev->cec_tx_adap[i]; i++) {
+		if (adap == dev->cec_tx_adap[i])
+			continue;
+		if (!dev->cec_tx_adap[i]->is_configured)
+			continue;
+		if (cec_has_log_addr(dev->cec_tx_adap[i], dest))
+			return true;
+	}
+	return false;
+}
+
+static void vivid_cec_xfer_done_worker(struct work_struct *work)
+{
+	struct vivid_cec_work *cw =
+		container_of(work, struct vivid_cec_work, work.work);
+	struct vivid_dev *dev = cw->dev;
+	struct cec_adapter *adap = cw->adap;
+	u8 dest = cec_msg_destination(&cw->msg);
+	bool valid_dest;
+	unsigned int i;
+
+	valid_dest = cec_msg_is_broadcast(&cw->msg);
+	if (!valid_dest)
+		valid_dest = vivid_cec_find_dest_adap(dev, adap, dest);
+
+	cw->tx_status = valid_dest ? CEC_TX_STATUS_OK : CEC_TX_STATUS_NACK;
+	spin_lock(&dev->cec_slock);
+	dev->cec_xfer_time_jiffies = 0;
+	dev->cec_xfer_start_jiffies = 0;
+	list_del(&cw->list);
+	spin_unlock(&dev->cec_slock);
+	cec_transmit_done(cw->adap, cw->tx_status, 0, valid_dest ? 0 : 1, 0, 0);
+
+	/* Broadcast message */
+	if (adap != dev->cec_rx_adap)
+		cec_received_msg(dev->cec_rx_adap, &cw->msg);
+	for (i = 0; i < MAX_OUTPUTS && dev->cec_tx_adap[i]; i++)
+		if (adap != dev->cec_tx_adap[i])
+			cec_received_msg(dev->cec_tx_adap[i], &cw->msg);
+	kfree(cw);
+}
+
+static void vivid_cec_xfer_try_worker(struct work_struct *work)
+{
+	struct vivid_cec_work *cw =
+		container_of(work, struct vivid_cec_work, work.work);
+	struct vivid_dev *dev = cw->dev;
+
+	spin_lock(&dev->cec_slock);
+	if (dev->cec_xfer_time_jiffies) {
+		list_del(&cw->list);
+		spin_unlock(&dev->cec_slock);
+		cec_transmit_done(cw->adap, CEC_TX_STATUS_ARB_LOST, 1, 0, 0, 0);
+		kfree(cw);
+	} else {
+		INIT_DELAYED_WORK(&cw->work, vivid_cec_xfer_done_worker);
+		dev->cec_xfer_start_jiffies = jiffies;
+		dev->cec_xfer_time_jiffies = usecs_to_jiffies(cw->usecs);
+		spin_unlock(&dev->cec_slock);
+		schedule_delayed_work(&cw->work, dev->cec_xfer_time_jiffies);
+	}
+}
+
+static int vivid_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+	return 0;
+}
+
+static int vivid_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr)
+{
+	return 0;
+}
+
+/*
+ * One data bit takes 2400 us, each byte needs 10 bits so that's 24000 us
+ * per byte.
+ */
+#define USECS_PER_BYTE 24000
+
+static int vivid_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
+				   u32 signal_free_time, struct cec_msg *msg)
+{
+	struct vivid_dev *dev = adap->priv;
+	struct vivid_cec_work *cw = kzalloc(sizeof(*cw), GFP_KERNEL);
+	long delta_jiffies = 0;
+
+	if (cw == NULL)
+		return -ENOMEM;
+	cw->dev = dev;
+	cw->adap = adap;
+	cw->usecs = CEC_FREE_TIME_TO_USEC(signal_free_time) +
+		    msg->len * USECS_PER_BYTE;
+	cw->msg = *msg;
+
+	spin_lock(&dev->cec_slock);
+	list_add(&cw->list, &dev->cec_work_list);
+	if (dev->cec_xfer_time_jiffies == 0) {
+		INIT_DELAYED_WORK(&cw->work, vivid_cec_xfer_done_worker);
+		dev->cec_xfer_start_jiffies = jiffies;
+		dev->cec_xfer_time_jiffies = usecs_to_jiffies(cw->usecs);
+		delta_jiffies = dev->cec_xfer_time_jiffies;
+	} else {
+		INIT_DELAYED_WORK(&cw->work, vivid_cec_xfer_try_worker);
+		delta_jiffies = dev->cec_xfer_start_jiffies +
+			dev->cec_xfer_time_jiffies - jiffies;
+	}
+	spin_unlock(&dev->cec_slock);
+	schedule_delayed_work(&cw->work, delta_jiffies < 0 ? 0 : delta_jiffies);
+	return 0;
+}
+
+static int vivid_received(struct cec_adapter *adap, struct cec_msg *msg)
+{
+	struct vivid_dev *dev = adap->priv;
+	struct cec_msg reply;
+	u8 dest = cec_msg_destination(msg);
+	u16 pa;
+	u8 disp_ctl;
+	char osd[14];
+
+	if (cec_msg_is_broadcast(msg))
+		dest = adap->log_addrs.log_addr[0];
+	cec_msg_init(&reply, dest, cec_msg_initiator(msg));
+
+	switch (cec_msg_opcode(msg)) {
+	case CEC_MSG_SET_STREAM_PATH:
+		if (cec_is_sink(adap))
+			return -ENOMSG;
+		cec_ops_set_stream_path(msg, &pa);
+		if (pa != adap->phys_addr)
+			return -ENOMSG;
+		cec_msg_active_source(&reply, adap->phys_addr);
+		cec_transmit_msg(adap, &reply, false);
+		break;
+	case CEC_MSG_SET_OSD_STRING:
+		if (!cec_is_sink(adap))
+			return -ENOMSG;
+		cec_ops_set_osd_string(msg, &disp_ctl, osd);
+		switch (disp_ctl) {
+		case CEC_OP_DISP_CTL_DEFAULT:
+			strcpy(dev->osd, osd);
+			dev->osd_jiffies = jiffies;
+			break;
+		case CEC_OP_DISP_CTL_UNTIL_CLEARED:
+			strcpy(dev->osd, osd);
+			dev->osd_jiffies = 0;
+			break;
+		case CEC_OP_DISP_CTL_CLEAR:
+			dev->osd[0] = 0;
+			dev->osd_jiffies = 0;
+			break;
+		default:
+			cec_msg_feature_abort(&reply, cec_msg_opcode(msg),
+					      CEC_OP_ABORT_INVALID_OP);
+			cec_transmit_msg(adap, &reply, false);
+			break;
+		}
+		break;
+	default:
+		return -ENOMSG;
+	}
+	return 0;
+}
+
+static const struct cec_adap_ops vivid_cec_adap_ops = {
+	.adap_enable = vivid_cec_adap_enable,
+	.adap_log_addr = vivid_cec_adap_log_addr,
+	.adap_transmit = vivid_cec_adap_transmit,
+	.received = vivid_received,
+};
+
+struct cec_adapter *vivid_cec_alloc_adap(struct vivid_dev *dev,
+					 unsigned int idx,
+					 struct device *parent,
+					 bool is_source)
+{
+	char name[sizeof(dev->vid_out_dev.name) + 2];
+	u32 caps = CEC_CAP_TRANSMIT | CEC_CAP_LOG_ADDRS |
+		CEC_CAP_PASSTHROUGH | CEC_CAP_RC | CEC_CAP_MONITOR_ALL;
+
+	snprintf(name, sizeof(name), "%s%d",
+		 is_source ? dev->vid_out_dev.name : dev->vid_cap_dev.name,
+		 idx);
+	return cec_allocate_adapter(&vivid_cec_adap_ops, dev,
+		name, caps, 1, parent);
+}
diff --git a/drivers/media/platform/vivid/vivid-cec.h b/drivers/media/platform/vivid/vivid-cec.h
new file mode 100644
index 000000000000..97892afa6b3b
--- /dev/null
+++ b/drivers/media/platform/vivid/vivid-cec.h
@@ -0,0 +1,33 @@
+/*
+ * vivid-cec.h - A Virtual Video Test Driver, cec emulation
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifdef CONFIG_VIDEO_VIVID_CEC
+struct cec_adapter *vivid_cec_alloc_adap(struct vivid_dev *dev,
+					 unsigned int idx,
+					 struct device *parent,
+					 bool is_source);
+void vivid_cec_bus_free_work(struct vivid_dev *dev);
+
+#else
+
+static inline void vivid_cec_bus_free_work(struct vivid_dev *dev)
+{
+}
+
+#endif
diff --git a/drivers/media/platform/vivid/vivid-core.c b/drivers/media/platform/vivid/vivid-core.c
index c14da84af09b..7f937136c3f5 100644
--- a/drivers/media/platform/vivid/vivid-core.c
+++ b/drivers/media/platform/vivid/vivid-core.c
@@ -46,6 +46,7 @@
 #include "vivid-vbi-cap.h"
 #include "vivid-vbi-out.h"
 #include "vivid-osd.h"
+#include "vivid-cec.h"
 #include "vivid-ctrls.h"
 
 #define VIVID_MODULE_NAME "vivid"
@@ -684,6 +685,11 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 		dev->input_name_counter[i] = in_type_counter[dev->input_type[i]]++;
 	}
 	dev->has_audio_inputs = in_type_counter[TV] && in_type_counter[SVID];
+	if (in_type_counter[HDMI] == 16) {
+		/* The CEC physical address only allows for max 15 inputs */
+		in_type_counter[HDMI]--;
+		dev->num_inputs--;
+	}
 
 	/* how many outputs do we have and of what type? */
 	dev->num_outputs = num_outputs[inst];
@@ -696,6 +702,15 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 		dev->output_name_counter[i] = out_type_counter[dev->output_type[i]]++;
 	}
 	dev->has_audio_outputs = out_type_counter[SVID];
+	if (out_type_counter[HDMI] == 16) {
+		/*
+		 * The CEC physical address only allows for max 15 inputs,
+		 * so outputs are also limited to 15 to allow for easy
+		 * CEC output to input mapping.
+		 */
+		out_type_counter[HDMI]--;
+		dev->num_outputs--;
+	}
 
 	/* do we create a video capture device? */
 	dev->has_vid_cap = node_type & 0x0001;
@@ -1010,6 +1025,17 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 	INIT_LIST_HEAD(&dev->vbi_out_active);
 	INIT_LIST_HEAD(&dev->sdr_cap_active);
 
+	INIT_LIST_HEAD(&dev->cec_work_list);
+	spin_lock_init(&dev->cec_slock);
+	/*
+	 * Same as create_singlethread_workqueue, but now I can use the
+	 * string formatting of alloc_ordered_workqueue.
+	 */
+	dev->cec_workqueue =
+		alloc_ordered_workqueue("vivid-%03d-cec", WQ_MEM_RECLAIM, inst);
+	if (!dev->cec_workqueue)
+		goto unreg_dev;
+
 	/* start creating the vb2 queues */
 	if (dev->has_vid_cap) {
 		/* initialize vid_cap queue */
@@ -1117,7 +1143,8 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 	/* finally start creating the device nodes */
 	if (dev->has_vid_cap) {
 		vfd = &dev->vid_cap_dev;
-		strlcpy(vfd->name, "vivid-vid-cap", sizeof(vfd->name));
+		snprintf(vfd->name, sizeof(vfd->name),
+			 "vivid-%03d-vid-cap", inst);
 		vfd->fops = &vivid_fops;
 		vfd->ioctl_ops = &vivid_ioctl_ops;
 		vfd->device_caps = dev->vid_cap_caps;
@@ -1133,6 +1160,27 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 		vfd->lock = &dev->mutex;
 		video_set_drvdata(vfd, dev);
 
+#ifdef CONFIG_VIDEO_VIVID_CEC
+		if (in_type_counter[HDMI]) {
+			struct cec_adapter *adap;
+
+			adap = vivid_cec_alloc_adap(dev, 0, &pdev->dev, false);
+			ret = PTR_ERR_OR_ZERO(adap);
+			if (ret < 0)
+				goto unreg_dev;
+			dev->cec_rx_adap = adap;
+			ret = cec_register_adapter(adap);
+			if (ret < 0) {
+				cec_delete_adapter(adap);
+				dev->cec_rx_adap = NULL;
+				goto unreg_dev;
+			}
+			cec_s_phys_addr(adap, 0, false);
+			v4l2_info(&dev->v4l2_dev, "CEC adapter %s registered for HDMI input %d\n",
+				  dev_name(&adap->devnode.dev), i);
+		}
+#endif
+
 		ret = video_register_device(vfd, VFL_TYPE_GRABBER, vid_cap_nr[inst]);
 		if (ret < 0)
 			goto unreg_dev;
@@ -1141,8 +1189,13 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 	}
 
 	if (dev->has_vid_out) {
+#ifdef CONFIG_VIDEO_VIVID_CEC
+		unsigned int bus_cnt = 0;
+#endif
+
 		vfd = &dev->vid_out_dev;
-		strlcpy(vfd->name, "vivid-vid-out", sizeof(vfd->name));
+		snprintf(vfd->name, sizeof(vfd->name),
+			 "vivid-%03d-vid-out", inst);
 		vfd->vfl_dir = VFL_DIR_TX;
 		vfd->fops = &vivid_fops;
 		vfd->ioctl_ops = &vivid_ioctl_ops;
@@ -1159,6 +1212,35 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 		vfd->lock = &dev->mutex;
 		video_set_drvdata(vfd, dev);
 
+#ifdef CONFIG_VIDEO_VIVID_CEC
+		for (i = 0; i < dev->num_outputs; i++) {
+			struct cec_adapter *adap;
+
+			if (dev->output_type[i] != HDMI)
+				continue;
+			dev->cec_output2bus_map[i] = bus_cnt;
+			adap = vivid_cec_alloc_adap(dev, bus_cnt,
+						     &pdev->dev, true);
+			ret = PTR_ERR_OR_ZERO(adap);
+			if (ret < 0)
+				goto unreg_dev;
+			dev->cec_tx_adap[bus_cnt] = adap;
+			ret = cec_register_adapter(adap);
+			if (ret < 0) {
+				cec_delete_adapter(adap);
+				dev->cec_tx_adap[bus_cnt] = NULL;
+				goto unreg_dev;
+			}
+			bus_cnt++;
+			if (bus_cnt <= out_type_counter[HDMI])
+				cec_s_phys_addr(adap, bus_cnt << 12, false);
+			else
+				cec_s_phys_addr(adap, 0x1000, false);
+			v4l2_info(&dev->v4l2_dev, "CEC adapter %s registered for HDMI output %d\n",
+				  dev_name(&adap->devnode.dev), i);
+		}
+#endif
+
 		ret = video_register_device(vfd, VFL_TYPE_GRABBER, vid_out_nr[inst]);
 		if (ret < 0)
 			goto unreg_dev;
@@ -1168,7 +1250,8 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 
 	if (dev->has_vbi_cap) {
 		vfd = &dev->vbi_cap_dev;
-		strlcpy(vfd->name, "vivid-vbi-cap", sizeof(vfd->name));
+		snprintf(vfd->name, sizeof(vfd->name),
+			 "vivid-%03d-vbi-cap", inst);
 		vfd->fops = &vivid_fops;
 		vfd->ioctl_ops = &vivid_ioctl_ops;
 		vfd->device_caps = dev->vbi_cap_caps;
@@ -1191,7 +1274,8 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 
 	if (dev->has_vbi_out) {
 		vfd = &dev->vbi_out_dev;
-		strlcpy(vfd->name, "vivid-vbi-out", sizeof(vfd->name));
+		snprintf(vfd->name, sizeof(vfd->name),
+			 "vivid-%03d-vbi-out", inst);
 		vfd->vfl_dir = VFL_DIR_TX;
 		vfd->fops = &vivid_fops;
 		vfd->ioctl_ops = &vivid_ioctl_ops;
@@ -1215,7 +1299,8 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 
 	if (dev->has_sdr_cap) {
 		vfd = &dev->sdr_cap_dev;
-		strlcpy(vfd->name, "vivid-sdr-cap", sizeof(vfd->name));
+		snprintf(vfd->name, sizeof(vfd->name),
+			 "vivid-%03d-sdr-cap", inst);
 		vfd->fops = &vivid_fops;
 		vfd->ioctl_ops = &vivid_ioctl_ops;
 		vfd->device_caps = dev->sdr_cap_caps;
@@ -1234,7 +1319,8 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 
 	if (dev->has_radio_rx) {
 		vfd = &dev->radio_rx_dev;
-		strlcpy(vfd->name, "vivid-rad-rx", sizeof(vfd->name));
+		snprintf(vfd->name, sizeof(vfd->name),
+			 "vivid-%03d-rad-rx", inst);
 		vfd->fops = &vivid_radio_fops;
 		vfd->ioctl_ops = &vivid_ioctl_ops;
 		vfd->device_caps = dev->radio_rx_caps;
@@ -1252,7 +1338,8 @@ static int vivid_create_instance(struct platform_device *pdev, int inst)
 
 	if (dev->has_radio_tx) {
 		vfd = &dev->radio_tx_dev;
-		strlcpy(vfd->name, "vivid-rad-tx", sizeof(vfd->name));
+		snprintf(vfd->name, sizeof(vfd->name),
+			 "vivid-%03d-rad-tx", inst);
 		vfd->vfl_dir = VFL_DIR_TX;
 		vfd->fops = &vivid_radio_fops;
 		vfd->ioctl_ops = &vivid_ioctl_ops;
@@ -1282,6 +1369,13 @@ unreg_dev:
 	video_unregister_device(&dev->vbi_cap_dev);
 	video_unregister_device(&dev->vid_out_dev);
 	video_unregister_device(&dev->vid_cap_dev);
+	cec_unregister_adapter(dev->cec_rx_adap);
+	for (i = 0; i < MAX_OUTPUTS; i++)
+		cec_unregister_adapter(dev->cec_tx_adap[i]);
+	if (dev->cec_workqueue) {
+		vivid_cec_bus_free_work(dev);
+		destroy_workqueue(dev->cec_workqueue);
+	}
 free_dev:
 	v4l2_device_put(&dev->v4l2_dev);
 	return ret;
@@ -1331,8 +1425,7 @@ static int vivid_probe(struct platform_device *pdev)
 static int vivid_remove(struct platform_device *pdev)
 {
 	struct vivid_dev *dev;
-	unsigned i;
-
+	unsigned int i, j;
 
 	for (i = 0; i < n_devs; i++) {
 		dev = vivid_devs[i];
@@ -1380,6 +1473,13 @@ static int vivid_remove(struct platform_device *pdev)
 			unregister_framebuffer(&dev->fb_info);
 			vivid_fb_release_buffers(dev);
 		}
+		cec_unregister_adapter(dev->cec_rx_adap);
+		for (j = 0; j < MAX_OUTPUTS; j++)
+			cec_unregister_adapter(dev->cec_tx_adap[j]);
+		if (dev->cec_workqueue) {
+			vivid_cec_bus_free_work(dev);
+			destroy_workqueue(dev->cec_workqueue);
+		}
 		v4l2_device_put(&dev->v4l2_dev);
 		vivid_devs[i] = NULL;
 	}
diff --git a/drivers/media/platform/vivid/vivid-core.h b/drivers/media/platform/vivid/vivid-core.h
index 776783bec227..a7daa40d0a49 100644
--- a/drivers/media/platform/vivid/vivid-core.h
+++ b/drivers/media/platform/vivid/vivid-core.h
@@ -21,6 +21,8 @@
 #define _VIVID_CORE_H_
 
 #include <linux/fb.h>
+#include <linux/workqueue.h>
+#include <media/cec.h>
 #include <media/videobuf2-v4l2.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-dev.h>
@@ -132,6 +134,17 @@ enum vivid_colorspace {
 #define VIVID_INVALID_SIGNAL(mode) \
 	((mode) == NO_SIGNAL || (mode) == NO_LOCK || (mode) == OUT_OF_RANGE)
 
+struct vivid_cec_work {
+	struct list_head	list;
+	struct delayed_work	work;
+	struct cec_adapter	*adap;
+	struct vivid_dev	*dev;
+	unsigned int		usecs;
+	unsigned int		timeout_ms;
+	u8			tx_status;
+	struct cec_msg		msg;
+};
+
 struct vivid_dev {
 	unsigned			inst;
 	struct v4l2_device		v4l2_dev;
@@ -497,6 +510,20 @@ struct vivid_dev {
 	/* Shared between radio receiver and transmitter */
 	bool				radio_rds_loop;
 	struct timespec			radio_rds_init_ts;
+
+	/* CEC */
+	struct cec_adapter		*cec_rx_adap;
+	struct cec_adapter		*cec_tx_adap[MAX_OUTPUTS];
+	struct workqueue_struct		*cec_workqueue;
+	spinlock_t			cec_slock;
+	struct list_head		cec_work_list;
+	unsigned int			cec_xfer_time_jiffies;
+	unsigned long			cec_xfer_start_jiffies;
+	u8				cec_output2bus_map[MAX_OUTPUTS];
+
+	/* CEC OSD String */
+	char				osd[14];
+	unsigned long			osd_jiffies;
 };
 
 static inline bool vivid_is_webcam(const struct vivid_dev *dev)
diff --git a/drivers/media/platform/vivid/vivid-kthread-cap.c b/drivers/media/platform/vivid/vivid-kthread-cap.c
index 3b8c10108dfa..6ca71aabb576 100644
--- a/drivers/media/platform/vivid/vivid-kthread-cap.c
+++ b/drivers/media/platform/vivid/vivid-kthread-cap.c
@@ -552,6 +552,19 @@ static void vivid_fillbuff(struct vivid_dev *dev, struct vivid_buffer *buf)
 			snprintf(str, sizeof(str), " button pressed!");
 			tpg_gen_text(tpg, basep, line++ * line_height, 16, str);
 		}
+		if (dev->osd[0]) {
+			if (vivid_is_hdmi_cap(dev)) {
+				snprintf(str, sizeof(str),
+					 " OSD \"%s\"", dev->osd);
+				tpg_gen_text(tpg, basep, line++ * line_height,
+					     16, str);
+			}
+			if (dev->osd_jiffies &&
+			    time_is_before_jiffies(dev->osd_jiffies + 5 * HZ)) {
+				dev->osd[0] = 0;
+				dev->osd_jiffies = 0;
+			}
+		}
 	}
 
 	/*
diff --git a/drivers/media/platform/vivid/vivid-sdr-cap.c b/drivers/media/platform/vivid/vivid-sdr-cap.c
index 3d1604cb982f..ebd7b9c4dd83 100644
--- a/drivers/media/platform/vivid/vivid-sdr-cap.c
+++ b/drivers/media/platform/vivid/vivid-sdr-cap.c
@@ -51,8 +51,6 @@ static const struct vivid_format formats[] = {
 	},
 };
 
-static const unsigned int NUM_FORMATS = ARRAY_SIZE(formats);
-
 static const struct v4l2_frequency_band bands_adc[] = {
 	{
 		.tuner = 0,
@@ -215,7 +213,7 @@ static int vivid_thread_sdr_cap(void *data)
 
 static int sdr_cap_queue_setup(struct vb2_queue *vq,
 		       unsigned *nbuffers, unsigned *nplanes,
-		       unsigned sizes[], void *alloc_ctxs[])
+		       unsigned sizes[], struct device *alloc_devs[])
 {
 	/* 2 = max 16-bit sample returned */
 	sizes[0] = SDR_CAP_SAMPLES_PER_BUF * 2;
diff --git a/drivers/media/platform/vivid/vivid-vbi-cap.c b/drivers/media/platform/vivid/vivid-vbi-cap.c
index cda45a582bfe..d66ef95dd2b5 100644
--- a/drivers/media/platform/vivid/vivid-vbi-cap.c
+++ b/drivers/media/platform/vivid/vivid-vbi-cap.c
@@ -137,7 +137,7 @@ void vivid_sliced_vbi_cap_process(struct vivid_dev *dev,
 
 static int vbi_cap_queue_setup(struct vb2_queue *vq,
 		       unsigned *nbuffers, unsigned *nplanes,
-		       unsigned sizes[], void *alloc_ctxs[])
+		       unsigned sizes[], struct device *alloc_devs[])
 {
 	struct vivid_dev *dev = vb2_get_drv_priv(vq);
 	bool is_60hz = dev->std_cap & V4L2_STD_525_60;
diff --git a/drivers/media/platform/vivid/vivid-vbi-out.c b/drivers/media/platform/vivid/vivid-vbi-out.c
index 3c5a469e6f49..d2989195cf03 100644
--- a/drivers/media/platform/vivid/vivid-vbi-out.c
+++ b/drivers/media/platform/vivid/vivid-vbi-out.c
@@ -29,7 +29,7 @@
 
 static int vbi_out_queue_setup(struct vb2_queue *vq,
 		       unsigned *nbuffers, unsigned *nplanes,
-		       unsigned sizes[], void *alloc_ctxs[])
+		       unsigned sizes[], struct device *alloc_devs[])
 {
 	struct vivid_dev *dev = vb2_get_drv_priv(vq);
 	bool is_60hz = dev->std_out & V4L2_STD_525_60;
diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c
index 4f730f355a17..d404a7ce33a4 100644
--- a/drivers/media/platform/vivid/vivid-vid-cap.c
+++ b/drivers/media/platform/vivid/vivid-vid-cap.c
@@ -36,8 +36,7 @@
 /* timeperframe: min/max and default */
 static const struct v4l2_fract
 	tpf_min     = {.numerator = 1,		.denominator = FPS_MAX},
-	tpf_max     = {.numerator = FPS_MAX,	.denominator = 1},
-	tpf_default = {.numerator = 1,		.denominator = 30};
+	tpf_max     = {.numerator = FPS_MAX,	.denominator = 1};
 
 static const struct vivid_fmt formats_ovl[] = {
 	{
@@ -98,7 +97,7 @@ static const struct v4l2_discrete_probe webcam_probe = {
 
 static int vid_cap_queue_setup(struct vb2_queue *vq,
 		       unsigned *nbuffers, unsigned *nplanes,
-		       unsigned sizes[], void *alloc_ctxs[])
+		       unsigned sizes[], struct device *alloc_devs[])
 {
 	struct vivid_dev *dev = vb2_get_drv_priv(vq);
 	unsigned buffers = tpg_g_buffers(&dev->tpg);
@@ -145,11 +144,6 @@ static int vid_cap_queue_setup(struct vb2_queue *vq,
 
 	*nplanes = buffers;
 
-	/*
-	 * videobuf2-vmalloc allocator is context-less so no need to set
-	 * alloc_ctxs array.
-	 */
-
 	dprintk(dev, 1, "%s: count=%d\n", __func__, *nbuffers);
 	for (p = 0; p < buffers; p++)
 		dprintk(dev, 1, "%s: size[%u]=%u\n", __func__, p, sizes[p]);
@@ -1701,6 +1695,9 @@ int vidioc_s_edid(struct file *file, void *_fh,
 			 struct v4l2_edid *edid)
 {
 	struct vivid_dev *dev = video_drvdata(file);
+	u16 phys_addr;
+	unsigned int i;
+	int ret;
 
 	memset(edid->reserved, 0, sizeof(edid->reserved));
 	if (edid->pad >= dev->num_inputs)
@@ -1709,14 +1706,32 @@ int vidioc_s_edid(struct file *file, void *_fh,
 		return -EINVAL;
 	if (edid->blocks == 0) {
 		dev->edid_blocks = 0;
-		return 0;
+		phys_addr = CEC_PHYS_ADDR_INVALID;
+		goto set_phys_addr;
 	}
 	if (edid->blocks > dev->edid_max_blocks) {
 		edid->blocks = dev->edid_max_blocks;
 		return -E2BIG;
 	}
+	phys_addr = cec_get_edid_phys_addr(edid->edid, edid->blocks * 128, NULL);
+	ret = cec_phys_addr_validate(phys_addr, &phys_addr, NULL);
+	if (ret)
+		return ret;
+
+	if (vb2_is_busy(&dev->vb_vid_cap_q))
+		return -EBUSY;
+
 	dev->edid_blocks = edid->blocks;
 	memcpy(dev->edid, edid->edid, edid->blocks * 128);
+
+set_phys_addr:
+	/* TODO: a proper hotplug detect cycle should be emulated here */
+	cec_s_phys_addr(dev->cec_rx_adap, phys_addr, false);
+
+	for (i = 0; i < MAX_OUTPUTS && dev->cec_tx_adap[i]; i++)
+		cec_s_phys_addr(dev->cec_tx_adap[i],
+				cec_phys_addr_for_input(phys_addr, i + 1),
+				false);
 	return 0;
 }
 
@@ -1836,6 +1851,7 @@ int vivid_vid_cap_s_parm(struct file *file, void *priv,
 	/* resync the thread's timings */
 	dev->cap_seq_resync = true;
 	dev->timeperframe_vid_cap = tpf;
+	parm->parm.capture.capability   = V4L2_CAP_TIMEPERFRAME;
 	parm->parm.capture.timeperframe = tpf;
 	parm->parm.capture.readbuffers  = 1;
 	return 0;
diff --git a/drivers/media/platform/vivid/vivid-vid-common.c b/drivers/media/platform/vivid/vivid-vid-common.c
index 39ea2284789c..fcda3ae4e6b0 100644
--- a/drivers/media/platform/vivid/vivid-vid-common.c
+++ b/drivers/media/platform/vivid/vivid-vid-common.c
@@ -811,6 +811,7 @@ int vidioc_g_edid(struct file *file, void *_fh,
 {
 	struct vivid_dev *dev = video_drvdata(file);
 	struct video_device *vdev = video_devdata(file);
+	struct cec_adapter *adap;
 
 	memset(edid->reserved, 0, sizeof(edid->reserved));
 	if (vdev->vfl_dir == VFL_DIR_RX) {
@@ -818,11 +819,16 @@ int vidioc_g_edid(struct file *file, void *_fh,
 			return -EINVAL;
 		if (dev->input_type[edid->pad] != HDMI)
 			return -EINVAL;
+		adap = dev->cec_rx_adap;
 	} else {
+		unsigned int bus_idx;
+
 		if (edid->pad >= dev->num_outputs)
 			return -EINVAL;
 		if (dev->output_type[edid->pad] != HDMI)
 			return -EINVAL;
+		bus_idx = dev->cec_output2bus_map[edid->pad];
+		adap = dev->cec_tx_adap[bus_idx];
 	}
 	if (edid->start_block == 0 && edid->blocks == 0) {
 		edid->blocks = dev->edid_blocks;
@@ -835,5 +841,6 @@ int vidioc_g_edid(struct file *file, void *_fh,
 	if (edid->start_block + edid->blocks > dev->edid_blocks)
 		edid->blocks = dev->edid_blocks - edid->start_block;
 	memcpy(edid->edid, dev->edid, edid->blocks * 128);
+	cec_set_edid_phys_addr(edid->edid, edid->blocks * 128, adap->phys_addr);
 	return 0;
 }
diff --git a/drivers/media/platform/vivid/vivid-vid-out.c b/drivers/media/platform/vivid/vivid-vid-out.c
index f92f4496d527..dd609eea4753 100644
--- a/drivers/media/platform/vivid/vivid-vid-out.c
+++ b/drivers/media/platform/vivid/vivid-vid-out.c
@@ -34,7 +34,7 @@
 
 static int vid_out_queue_setup(struct vb2_queue *vq,
 		       unsigned *nbuffers, unsigned *nplanes,
-		       unsigned sizes[], void *alloc_ctxs[])
+		       unsigned sizes[], struct device *alloc_devs[])
 {
 	struct vivid_dev *dev = vb2_get_drv_priv(vq);
 	const struct vivid_fmt *vfmt = dev->fmt_out;
@@ -87,11 +87,6 @@ static int vid_out_queue_setup(struct vb2_queue *vq,
 
 	*nplanes = planes;
 
-	/*
-	 * videobuf2-vmalloc allocator is context-less so no need to set
-	 * alloc_ctxs array.
-	 */
-
 	dprintk(dev, 1, "%s: count=%d\n", __func__, *nbuffers);
 	for (p = 0; p < planes; p++)
 		dprintk(dev, 1, "%s: size[%u]=%u\n", __func__, p, sizes[p]);
diff --git a/drivers/media/platform/vsp1/Makefile b/drivers/media/platform/vsp1/Makefile
index 95b3ac2ea7ef..1328e1bd2143 100644
--- a/drivers/media/platform/vsp1/Makefile
+++ b/drivers/media/platform/vsp1/Makefile
@@ -1,7 +1,8 @@
 vsp1-y					:= vsp1_drv.o vsp1_entity.o vsp1_pipe.o
 vsp1-y					+= vsp1_dl.o vsp1_drm.o vsp1_video.o
 vsp1-y					+= vsp1_rpf.o vsp1_rwpf.o vsp1_wpf.o
-vsp1-y					+= vsp1_hsit.o vsp1_lif.o vsp1_lut.o
+vsp1-y					+= vsp1_clu.o vsp1_hsit.o vsp1_lut.o
 vsp1-y					+= vsp1_bru.o vsp1_sru.o vsp1_uds.o
+vsp1-y					+= vsp1_lif.o
 
 obj-$(CONFIG_VIDEO_RENESAS_VSP1)	+= vsp1.o
diff --git a/drivers/media/platform/vsp1/vsp1.h b/drivers/media/platform/vsp1/vsp1.h
index 46738b6c5f72..06a2ec7e5ad4 100644
--- a/drivers/media/platform/vsp1/vsp1.h
+++ b/drivers/media/platform/vsp1/vsp1.h
@@ -25,11 +25,13 @@
 
 struct clk;
 struct device;
+struct rcar_fcp_device;
 
 struct vsp1_drm;
 struct vsp1_entity;
 struct vsp1_platform_data;
 struct vsp1_bru;
+struct vsp1_clu;
 struct vsp1_hsit;
 struct vsp1_lif;
 struct vsp1_lut;
@@ -45,6 +47,9 @@ struct vsp1_uds;
 #define VSP1_HAS_LUT		(1 << 1)
 #define VSP1_HAS_SRU		(1 << 2)
 #define VSP1_HAS_BRU		(1 << 3)
+#define VSP1_HAS_CLU		(1 << 4)
+#define VSP1_HAS_WPF_VFLIP	(1 << 5)
+#define VSP1_HAS_WPF_HFLIP	(1 << 6)
 
 struct vsp1_device_info {
 	u32 version;
@@ -62,12 +67,10 @@ struct vsp1_device {
 	const struct vsp1_device_info *info;
 
 	void __iomem *mmio;
-	struct clk *clock;
-
-	struct mutex lock;
-	int ref_count;
+	struct rcar_fcp_device *fcp;
 
 	struct vsp1_bru *bru;
+	struct vsp1_clu *clu;
 	struct vsp1_hsit *hsi;
 	struct vsp1_hsit *hst;
 	struct vsp1_lif *lif;
diff --git a/drivers/media/platform/vsp1/vsp1_bru.c b/drivers/media/platform/vsp1/vsp1_bru.c
index b1068c018011..8268b87727a7 100644
--- a/drivers/media/platform/vsp1/vsp1_bru.c
+++ b/drivers/media/platform/vsp1/vsp1_bru.c
@@ -249,7 +249,7 @@ static int bru_set_selection(struct v4l2_subdev *subdev,
 	return 0;
 }
 
-static struct v4l2_subdev_pad_ops bru_pad_ops = {
+static const struct v4l2_subdev_pad_ops bru_pad_ops = {
 	.init_cfg = vsp1_entity_init_cfg,
 	.enum_mbus_code = bru_enum_mbus_code,
 	.enum_frame_size = bru_enum_frame_size,
@@ -259,7 +259,7 @@ static struct v4l2_subdev_pad_ops bru_pad_ops = {
 	.set_selection = bru_set_selection,
 };
 
-static struct v4l2_subdev_ops bru_ops = {
+static const struct v4l2_subdev_ops bru_ops = {
 	.pad    = &bru_pad_ops,
 };
 
@@ -269,13 +269,16 @@ static struct v4l2_subdev_ops bru_ops = {
 
 static void bru_configure(struct vsp1_entity *entity,
 			  struct vsp1_pipeline *pipe,
-			  struct vsp1_dl_list *dl)
+			  struct vsp1_dl_list *dl, bool full)
 {
 	struct vsp1_bru *bru = to_bru(&entity->subdev);
 	struct v4l2_mbus_framefmt *format;
 	unsigned int flags;
 	unsigned int i;
 
+	if (!full)
+		return;
+
 	format = vsp1_entity_get_pad_format(&bru->entity, bru->entity.config,
 					    bru->entity.source_pad);
 
@@ -390,7 +393,8 @@ struct vsp1_bru *vsp1_bru_create(struct vsp1_device *vsp1)
 	bru->entity.type = VSP1_ENTITY_BRU;
 
 	ret = vsp1_entity_init(vsp1, &bru->entity, "bru",
-			       vsp1->info->num_bru_inputs + 1, &bru_ops);
+			       vsp1->info->num_bru_inputs + 1, &bru_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_COMPOSER);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
diff --git a/drivers/media/platform/vsp1/vsp1_clu.c b/drivers/media/platform/vsp1/vsp1_clu.c
new file mode 100644
index 000000000000..b63d2dbe5ea3
--- /dev/null
+++ b/drivers/media/platform/vsp1/vsp1_clu.c
@@ -0,0 +1,292 @@
+/*
+ * vsp1_clu.c  --  R-Car VSP1 Cubic Look-Up Table
+ *
+ * Copyright (C) 2015-2016 Renesas Electronics Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <media/v4l2-subdev.h>
+
+#include "vsp1.h"
+#include "vsp1_clu.h"
+#include "vsp1_dl.h"
+
+#define CLU_MIN_SIZE				4U
+#define CLU_MAX_SIZE				8190U
+
+/* -----------------------------------------------------------------------------
+ * Device Access
+ */
+
+static inline void vsp1_clu_write(struct vsp1_clu *clu, struct vsp1_dl_list *dl,
+				  u32 reg, u32 data)
+{
+	vsp1_dl_list_write(dl, reg, data);
+}
+
+/* -----------------------------------------------------------------------------
+ * Controls
+ */
+
+#define V4L2_CID_VSP1_CLU_TABLE			(V4L2_CID_USER_BASE | 0x1001)
+#define V4L2_CID_VSP1_CLU_MODE			(V4L2_CID_USER_BASE | 0x1002)
+#define V4L2_CID_VSP1_CLU_MODE_2D		0
+#define V4L2_CID_VSP1_CLU_MODE_3D		1
+
+static int clu_set_table(struct vsp1_clu *clu, struct v4l2_ctrl *ctrl)
+{
+	struct vsp1_dl_body *dlb;
+	unsigned int i;
+
+	dlb = vsp1_dl_fragment_alloc(clu->entity.vsp1, 1 + 17 * 17 * 17);
+	if (!dlb)
+		return -ENOMEM;
+
+	vsp1_dl_fragment_write(dlb, VI6_CLU_ADDR, 0);
+	for (i = 0; i < 17 * 17 * 17; ++i)
+		vsp1_dl_fragment_write(dlb, VI6_CLU_DATA, ctrl->p_new.p_u32[i]);
+
+	spin_lock_irq(&clu->lock);
+	swap(clu->clu, dlb);
+	spin_unlock_irq(&clu->lock);
+
+	vsp1_dl_fragment_free(dlb);
+	return 0;
+}
+
+static int clu_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct vsp1_clu *clu =
+		container_of(ctrl->handler, struct vsp1_clu, ctrls);
+
+	switch (ctrl->id) {
+	case V4L2_CID_VSP1_CLU_TABLE:
+		clu_set_table(clu, ctrl);
+		break;
+
+	case V4L2_CID_VSP1_CLU_MODE:
+		clu->mode = ctrl->val;
+		break;
+	}
+
+	return 0;
+}
+
+static const struct v4l2_ctrl_ops clu_ctrl_ops = {
+	.s_ctrl = clu_s_ctrl,
+};
+
+static const struct v4l2_ctrl_config clu_table_control = {
+	.ops = &clu_ctrl_ops,
+	.id = V4L2_CID_VSP1_CLU_TABLE,
+	.name = "Look-Up Table",
+	.type = V4L2_CTRL_TYPE_U32,
+	.min = 0x00000000,
+	.max = 0x00ffffff,
+	.step = 1,
+	.def = 0,
+	.dims = { 17, 17, 17 },
+};
+
+static const char * const clu_mode_menu[] = {
+	"2D",
+	"3D",
+	NULL,
+};
+
+static const struct v4l2_ctrl_config clu_mode_control = {
+	.ops = &clu_ctrl_ops,
+	.id = V4L2_CID_VSP1_CLU_MODE,
+	.name = "Mode",
+	.type = V4L2_CTRL_TYPE_MENU,
+	.min = 0,
+	.max = 1,
+	.def = 1,
+	.qmenu = clu_mode_menu,
+};
+
+/* -----------------------------------------------------------------------------
+ * V4L2 Subdevice Pad Operations
+ */
+
+static int clu_enum_mbus_code(struct v4l2_subdev *subdev,
+			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_mbus_code_enum *code)
+{
+	static const unsigned int codes[] = {
+		MEDIA_BUS_FMT_ARGB8888_1X32,
+		MEDIA_BUS_FMT_AHSV8888_1X32,
+		MEDIA_BUS_FMT_AYUV8_1X32,
+	};
+
+	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, codes,
+					  ARRAY_SIZE(codes));
+}
+
+static int clu_enum_frame_size(struct v4l2_subdev *subdev,
+			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_frame_size_enum *fse)
+{
+	return vsp1_subdev_enum_frame_size(subdev, cfg, fse, CLU_MIN_SIZE,
+					   CLU_MIN_SIZE, CLU_MAX_SIZE,
+					   CLU_MAX_SIZE);
+}
+
+static int clu_set_format(struct v4l2_subdev *subdev,
+			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_format *fmt)
+{
+	struct vsp1_clu *clu = to_clu(subdev);
+	struct v4l2_subdev_pad_config *config;
+	struct v4l2_mbus_framefmt *format;
+
+	config = vsp1_entity_get_pad_config(&clu->entity, cfg, fmt->which);
+	if (!config)
+		return -EINVAL;
+
+	/* Default to YUV if the requested format is not supported. */
+	if (fmt->format.code != MEDIA_BUS_FMT_ARGB8888_1X32 &&
+	    fmt->format.code != MEDIA_BUS_FMT_AHSV8888_1X32 &&
+	    fmt->format.code != MEDIA_BUS_FMT_AYUV8_1X32)
+		fmt->format.code = MEDIA_BUS_FMT_AYUV8_1X32;
+
+	format = vsp1_entity_get_pad_format(&clu->entity, config, fmt->pad);
+
+	if (fmt->pad == CLU_PAD_SOURCE) {
+		/* The CLU output format can't be modified. */
+		fmt->format = *format;
+		return 0;
+	}
+
+	format->code = fmt->format.code;
+	format->width = clamp_t(unsigned int, fmt->format.width,
+				CLU_MIN_SIZE, CLU_MAX_SIZE);
+	format->height = clamp_t(unsigned int, fmt->format.height,
+				 CLU_MIN_SIZE, CLU_MAX_SIZE);
+	format->field = V4L2_FIELD_NONE;
+	format->colorspace = V4L2_COLORSPACE_SRGB;
+
+	fmt->format = *format;
+
+	/* Propagate the format to the source pad. */
+	format = vsp1_entity_get_pad_format(&clu->entity, config,
+					    CLU_PAD_SOURCE);
+	*format = fmt->format;
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * V4L2 Subdevice Operations
+ */
+
+static const struct v4l2_subdev_pad_ops clu_pad_ops = {
+	.init_cfg = vsp1_entity_init_cfg,
+	.enum_mbus_code = clu_enum_mbus_code,
+	.enum_frame_size = clu_enum_frame_size,
+	.get_fmt = vsp1_subdev_get_pad_format,
+	.set_fmt = clu_set_format,
+};
+
+static const struct v4l2_subdev_ops clu_ops = {
+	.pad    = &clu_pad_ops,
+};
+
+/* -----------------------------------------------------------------------------
+ * VSP1 Entity Operations
+ */
+
+static void clu_configure(struct vsp1_entity *entity,
+			  struct vsp1_pipeline *pipe,
+			  struct vsp1_dl_list *dl, bool full)
+{
+	struct vsp1_clu *clu = to_clu(&entity->subdev);
+	struct vsp1_dl_body *dlb;
+	unsigned long flags;
+	u32 ctrl = VI6_CLU_CTRL_AAI | VI6_CLU_CTRL_MVS | VI6_CLU_CTRL_EN;
+
+	/* The format can't be changed during streaming, only verify it at
+	 * stream start and store the information internally for future partial
+	 * reconfiguration calls.
+	 */
+	if (full) {
+		struct v4l2_mbus_framefmt *format;
+
+		format = vsp1_entity_get_pad_format(&clu->entity,
+						    clu->entity.config,
+						    CLU_PAD_SINK);
+		clu->yuv_mode = format->code == MEDIA_BUS_FMT_AYUV8_1X32;
+		return;
+	}
+
+	/* 2D mode can only be used with the YCbCr pixel encoding. */
+	if (clu->mode == V4L2_CID_VSP1_CLU_MODE_2D && clu->yuv_mode)
+		ctrl |= VI6_CLU_CTRL_AX1I_2D | VI6_CLU_CTRL_AX2I_2D
+		     |  VI6_CLU_CTRL_OS0_2D | VI6_CLU_CTRL_OS1_2D
+		     |  VI6_CLU_CTRL_OS2_2D | VI6_CLU_CTRL_M2D;
+
+	vsp1_clu_write(clu, dl, VI6_CLU_CTRL, ctrl);
+
+	spin_lock_irqsave(&clu->lock, flags);
+	dlb = clu->clu;
+	clu->clu = NULL;
+	spin_unlock_irqrestore(&clu->lock, flags);
+
+	if (dlb)
+		vsp1_dl_list_add_fragment(dl, dlb);
+}
+
+static const struct vsp1_entity_operations clu_entity_ops = {
+	.configure = clu_configure,
+};
+
+/* -----------------------------------------------------------------------------
+ * Initialization and Cleanup
+ */
+
+struct vsp1_clu *vsp1_clu_create(struct vsp1_device *vsp1)
+{
+	struct vsp1_clu *clu;
+	int ret;
+
+	clu = devm_kzalloc(vsp1->dev, sizeof(*clu), GFP_KERNEL);
+	if (clu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&clu->lock);
+
+	clu->entity.ops = &clu_entity_ops;
+	clu->entity.type = VSP1_ENTITY_CLU;
+
+	ret = vsp1_entity_init(vsp1, &clu->entity, "clu", 2, &clu_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_LUT);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	/* Initialize the control handler. */
+	v4l2_ctrl_handler_init(&clu->ctrls, 2);
+	v4l2_ctrl_new_custom(&clu->ctrls, &clu_table_control, NULL);
+	v4l2_ctrl_new_custom(&clu->ctrls, &clu_mode_control, NULL);
+
+	clu->entity.subdev.ctrl_handler = &clu->ctrls;
+
+	if (clu->ctrls.error) {
+		dev_err(vsp1->dev, "clu: failed to initialize controls\n");
+		ret = clu->ctrls.error;
+		vsp1_entity_destroy(&clu->entity);
+		return ERR_PTR(ret);
+	}
+
+	v4l2_ctrl_handler_setup(&clu->ctrls);
+
+	return clu;
+}
diff --git a/drivers/media/platform/vsp1/vsp1_clu.h b/drivers/media/platform/vsp1/vsp1_clu.h
new file mode 100644
index 000000000000..036e0a2f1a42
--- /dev/null
+++ b/drivers/media/platform/vsp1/vsp1_clu.h
@@ -0,0 +1,48 @@
+/*
+ * vsp1_clu.h  --  R-Car VSP1 Cubic Look-Up Table
+ *
+ * Copyright (C) 2015 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __VSP1_CLU_H__
+#define __VSP1_CLU_H__
+
+#include <linux/spinlock.h>
+
+#include <media/media-entity.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-subdev.h>
+
+#include "vsp1_entity.h"
+
+struct vsp1_device;
+struct vsp1_dl_body;
+
+#define CLU_PAD_SINK				0
+#define CLU_PAD_SOURCE				1
+
+struct vsp1_clu {
+	struct vsp1_entity entity;
+
+	struct v4l2_ctrl_handler ctrls;
+
+	bool yuv_mode;
+	spinlock_t lock;
+	unsigned int mode;
+	struct vsp1_dl_body *clu;
+};
+
+static inline struct vsp1_clu *to_clu(struct v4l2_subdev *subdev)
+{
+	return container_of(subdev, struct vsp1_clu, entity.subdev);
+}
+
+struct vsp1_clu *vsp1_clu_create(struct vsp1_device *vsp1);
+
+#endif /* __VSP1_CLU_H__ */
diff --git a/drivers/media/platform/vsp1/vsp1_dl.c b/drivers/media/platform/vsp1/vsp1_dl.c
index e238d9b9376b..37c3518aa2a8 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.c
+++ b/drivers/media/platform/vsp1/vsp1_dl.c
@@ -15,6 +15,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/workqueue.h>
 
 #include "vsp1.h"
 #include "vsp1_dl.h"
@@ -92,11 +93,13 @@ enum vsp1_dl_mode {
  * @index: index of the related WPF
  * @mode: display list operation mode (header or headerless)
  * @vsp1: the VSP1 device
- * @lock: protects the active, queued and pending lists
+ * @lock: protects the free, active, queued, pending and gc_fragments lists
  * @free: array of all free display lists
  * @active: list currently being processed (loaded) by hardware
  * @queued: list queued to the hardware (written to the DL registers)
  * @pending: list waiting to be queued to the hardware
+ * @gc_work: fragments garbage collector work struct
+ * @gc_fragments: array of display list fragments waiting to be freed
  */
 struct vsp1_dl_manager {
 	unsigned int index;
@@ -108,6 +111,9 @@ struct vsp1_dl_manager {
 	struct vsp1_dl_list *active;
 	struct vsp1_dl_list *queued;
 	struct vsp1_dl_list *pending;
+
+	struct work_struct gc_work;
+	struct list_head gc_fragments;
 };
 
 /* -----------------------------------------------------------------------------
@@ -262,21 +268,10 @@ static struct vsp1_dl_list *vsp1_dl_list_alloc(struct vsp1_dl_manager *dlm)
 	return dl;
 }
 
-static void vsp1_dl_list_free_fragments(struct vsp1_dl_list *dl)
-{
-	struct vsp1_dl_body *dlb, *next;
-
-	list_for_each_entry_safe(dlb, next, &dl->fragments, list) {
-		list_del(&dlb->list);
-		vsp1_dl_body_cleanup(dlb);
-		kfree(dlb);
-	}
-}
-
 static void vsp1_dl_list_free(struct vsp1_dl_list *dl)
 {
 	vsp1_dl_body_cleanup(&dl->body0);
-	vsp1_dl_list_free_fragments(dl);
+	list_splice_init(&dl->fragments, &dl->dlm->gc_fragments);
 	kfree(dl);
 }
 
@@ -311,7 +306,16 @@ static void __vsp1_dl_list_put(struct vsp1_dl_list *dl)
 	if (!dl)
 		return;
 
-	vsp1_dl_list_free_fragments(dl);
+	/* We can't free fragments here as DMA memory can only be freed in
+	 * interruptible context. Move all fragments to the display list
+	 * manager's list of fragments to be freed, they will be
+	 * garbage-collected by the work queue.
+	 */
+	if (!list_empty(&dl->fragments)) {
+		list_splice_init(&dl->fragments, &dl->dlm->gc_fragments);
+		schedule_work(&dl->dlm->gc_work);
+	}
+
 	dl->body0.num_entries = 0;
 
 	list_add_tail(&dl->list, &dl->dlm->free);
@@ -550,6 +554,40 @@ void vsp1_dlm_reset(struct vsp1_dl_manager *dlm)
 	dlm->pending = NULL;
 }
 
+/*
+ * Free all fragments awaiting to be garbage-collected.
+ *
+ * This function must be called without the display list manager lock held.
+ */
+static void vsp1_dlm_fragments_free(struct vsp1_dl_manager *dlm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dlm->lock, flags);
+
+	while (!list_empty(&dlm->gc_fragments)) {
+		struct vsp1_dl_body *dlb;
+
+		dlb = list_first_entry(&dlm->gc_fragments, struct vsp1_dl_body,
+				       list);
+		list_del(&dlb->list);
+
+		spin_unlock_irqrestore(&dlm->lock, flags);
+		vsp1_dl_fragment_free(dlb);
+		spin_lock_irqsave(&dlm->lock, flags);
+	}
+
+	spin_unlock_irqrestore(&dlm->lock, flags);
+}
+
+static void vsp1_dlm_garbage_collect(struct work_struct *work)
+{
+	struct vsp1_dl_manager *dlm =
+		container_of(work, struct vsp1_dl_manager, gc_work);
+
+	vsp1_dlm_fragments_free(dlm);
+}
+
 struct vsp1_dl_manager *vsp1_dlm_create(struct vsp1_device *vsp1,
 					unsigned int index,
 					unsigned int prealloc)
@@ -568,6 +606,8 @@ struct vsp1_dl_manager *vsp1_dlm_create(struct vsp1_device *vsp1,
 
 	spin_lock_init(&dlm->lock);
 	INIT_LIST_HEAD(&dlm->free);
+	INIT_LIST_HEAD(&dlm->gc_fragments);
+	INIT_WORK(&dlm->gc_work, vsp1_dlm_garbage_collect);
 
 	for (i = 0; i < prealloc; ++i) {
 		struct vsp1_dl_list *dl;
@@ -589,8 +629,12 @@ void vsp1_dlm_destroy(struct vsp1_dl_manager *dlm)
 	if (!dlm)
 		return;
 
+	cancel_work_sync(&dlm->gc_work);
+
 	list_for_each_entry_safe(dl, next, &dlm->free, list) {
 		list_del(&dl->list);
 		vsp1_dl_list_free(dl);
 	}
+
+	vsp1_dlm_fragments_free(dlm);
 }
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index fc4bbc401e67..fe9665e57b3b 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -230,42 +230,33 @@ EXPORT_SYMBOL_GPL(vsp1_du_atomic_begin);
  * vsp1_du_atomic_update - Setup one RPF input of the VSP pipeline
  * @dev: the VSP device
  * @rpf_index: index of the RPF to setup (0-based)
- * @pixelformat: V4L2 pixel format for the RPF memory input
- * @pitch: number of bytes per line in the image stored in memory
- * @mem: DMA addresses of the memory buffers (one per plane)
- * @src: the source crop rectangle for the RPF
- * @dst: the destination compose rectangle for the BRU input
- * @alpha: global alpha value for the input
- * @zpos: the Z-order position of the input
+ * @cfg: the RPF configuration
  *
- * Configure the VSP to perform composition of the image referenced by @mem
- * through RPF @rpf_index, using the @src crop rectangle and the @dst
+ * Configure the VSP to perform image composition through RPF @rpf_index as
+ * described by the @cfg configuration. The image to compose is referenced by
+ * @cfg.mem and composed using the @cfg.src crop rectangle and the @cfg.dst
  * composition rectangle. The Z-order is configurable with higher @zpos values
  * displayed on top.
  *
- * Image format as stored in memory is expressed as a V4L2 @pixelformat value.
- * As a special case, setting the pixel format to 0 will disable the RPF. The
- * @pitch, @mem, @src and @dst parameters are ignored in that case. Calling the
+ * If the @cfg configuration is NULL, the RPF will be disabled. Calling the
  * function on a disabled RPF is allowed.
  *
- * The memory pitch is configurable to allow for padding at end of lines, or
- * simple for images that extend beyond the crop rectangle boundaries. The
- * @pitch value is expressed in bytes and applies to all planes for multiplanar
- * formats.
+ * Image format as stored in memory is expressed as a V4L2 @cfg.pixelformat
+ * value. The memory pitch is configurable to allow for padding at end of lines,
+ * or simply for images that extend beyond the crop rectangle boundaries. The
+ * @cfg.pitch value is expressed in bytes and applies to all planes for
+ * multiplanar formats.
  *
  * The source memory buffer is referenced by the DMA address of its planes in
- * the @mem array. Up to two planes are supported. The second plane DMA address
- * is ignored for formats using a single plane.
+ * the @cfg.mem array. Up to two planes are supported. The second plane DMA
+ * address is ignored for formats using a single plane.
  *
  * This function isn't reentrant, the caller needs to serialize calls.
  *
  * Return 0 on success or a negative error code on failure.
  */
-int vsp1_du_atomic_update_ext(struct device *dev, unsigned int rpf_index,
-			      u32 pixelformat, unsigned int pitch,
-			      dma_addr_t mem[2], const struct v4l2_rect *src,
-			      const struct v4l2_rect *dst, unsigned int alpha,
-			      unsigned int zpos)
+int vsp1_du_atomic_update(struct device *dev, unsigned int rpf_index,
+			  const struct vsp1_du_atomic_config *cfg)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 	const struct vsp1_format_info *fmtinfo;
@@ -276,7 +267,7 @@ int vsp1_du_atomic_update_ext(struct device *dev, unsigned int rpf_index,
 
 	rpf = vsp1->rpf[rpf_index];
 
-	if (pixelformat == 0) {
+	if (!cfg) {
 		dev_dbg(vsp1->dev, "%s: RPF%u: disable requested\n", __func__,
 			rpf_index);
 
@@ -287,38 +278,39 @@ int vsp1_du_atomic_update_ext(struct device *dev, unsigned int rpf_index,
 	dev_dbg(vsp1->dev,
 		"%s: RPF%u: (%u,%u)/%ux%u -> (%u,%u)/%ux%u (%08x), pitch %u dma { %pad, %pad } zpos %u\n",
 		__func__, rpf_index,
-		src->left, src->top, src->width, src->height,
-		dst->left, dst->top, dst->width, dst->height,
-		pixelformat, pitch, &mem[0], &mem[1], zpos);
+		cfg->src.left, cfg->src.top, cfg->src.width, cfg->src.height,
+		cfg->dst.left, cfg->dst.top, cfg->dst.width, cfg->dst.height,
+		cfg->pixelformat, cfg->pitch, &cfg->mem[0], &cfg->mem[1],
+		cfg->zpos);
 
 	/* Store the format, stride, memory buffer address, crop and compose
 	 * rectangles and Z-order position and for the input.
 	 */
-	fmtinfo = vsp1_get_format_info(pixelformat);
+	fmtinfo = vsp1_get_format_info(cfg->pixelformat);
 	if (!fmtinfo) {
 		dev_dbg(vsp1->dev, "Unsupport pixel format %08x for RPF\n",
-			pixelformat);
+			cfg->pixelformat);
 		return -EINVAL;
 	}
 
 	rpf->fmtinfo = fmtinfo;
 	rpf->format.num_planes = fmtinfo->planes;
-	rpf->format.plane_fmt[0].bytesperline = pitch;
-	rpf->format.plane_fmt[1].bytesperline = pitch;
-	rpf->alpha = alpha;
+	rpf->format.plane_fmt[0].bytesperline = cfg->pitch;
+	rpf->format.plane_fmt[1].bytesperline = cfg->pitch;
+	rpf->alpha = cfg->alpha;
 
-	rpf->mem.addr[0] = mem[0];
-	rpf->mem.addr[1] = mem[1];
+	rpf->mem.addr[0] = cfg->mem[0];
+	rpf->mem.addr[1] = cfg->mem[1];
 	rpf->mem.addr[2] = 0;
 
-	vsp1->drm->inputs[rpf_index].crop = *src;
-	vsp1->drm->inputs[rpf_index].compose = *dst;
-	vsp1->drm->inputs[rpf_index].zpos = zpos;
+	vsp1->drm->inputs[rpf_index].crop = cfg->src;
+	vsp1->drm->inputs[rpf_index].compose = cfg->dst;
+	vsp1->drm->inputs[rpf_index].zpos = cfg->zpos;
 	vsp1->drm->inputs[rpf_index].enabled = true;
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(vsp1_du_atomic_update_ext);
+EXPORT_SYMBOL_GPL(vsp1_du_atomic_update);
 
 static int vsp1_du_setup_rpf_pipe(struct vsp1_device *vsp1,
 				  struct vsp1_rwpf *rpf, unsigned int bru_input)
@@ -499,8 +491,10 @@ void vsp1_du_atomic_flush(struct device *dev)
 
 		vsp1_entity_route_setup(entity, pipe->dl);
 
-		if (entity->ops->configure)
-			entity->ops->configure(entity, pipe, pipe->dl);
+		if (entity->ops->configure) {
+			entity->ops->configure(entity, pipe, pipe->dl, true);
+			entity->ops->configure(entity, pipe, pipe->dl, false);
+		}
 
 		/* The memory buffer address must be applied after configuring
 		 * the RPF to make sure the crop offset are computed.
diff --git a/drivers/media/platform/vsp1/vsp1_drv.c b/drivers/media/platform/vsp1/vsp1_drv.c
index e2d779fac0eb..cc316d281687 100644
--- a/drivers/media/platform/vsp1/vsp1_drv.c
+++ b/drivers/media/platform/vsp1/vsp1_drv.c
@@ -19,12 +19,15 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/videodev2.h>
 
+#include <media/rcar-fcp.h>
 #include <media/v4l2-subdev.h>
 
 #include "vsp1.h"
 #include "vsp1_bru.h"
+#include "vsp1_clu.h"
 #include "vsp1_dl.h"
 #include "vsp1_drm.h"
 #include "vsp1_hsit.h"
@@ -145,7 +148,7 @@ static int vsp1_uapi_create_links(struct vsp1_device *vsp1)
 			return ret;
 	}
 
-	if (vsp1->info->features & VSP1_HAS_LIF) {
+	if (vsp1->lif) {
 		ret = media_create_pad_link(&vsp1->wpf[0]->entity.subdev.entity,
 					    RWPF_PAD_SOURCE,
 					    &vsp1->lif->entity.subdev.entity,
@@ -168,19 +171,15 @@ static int vsp1_uapi_create_links(struct vsp1_device *vsp1)
 
 	for (i = 0; i < vsp1->info->wpf_count; ++i) {
 		/* Connect the video device to the WPF. All connections are
-		 * immutable except for the WPF0 source link if a LIF is
-		 * present.
+		 * immutable.
 		 */
 		struct vsp1_rwpf *wpf = vsp1->wpf[i];
-		unsigned int flags = MEDIA_LNK_FL_ENABLED;
-
-		if (!(vsp1->info->features & VSP1_HAS_LIF) || i != 0)
-			flags |= MEDIA_LNK_FL_IMMUTABLE;
 
 		ret = media_create_pad_link(&wpf->entity.subdev.entity,
 					    RWPF_PAD_SOURCE,
 					    &wpf->video->video.entity, 0,
-					    flags);
+					    MEDIA_LNK_FL_IMMUTABLE |
+					    MEDIA_LNK_FL_ENABLED);
 		if (ret < 0)
 			return ret;
 	}
@@ -204,7 +203,8 @@ static void vsp1_destroy_entities(struct vsp1_device *vsp1)
 	}
 
 	v4l2_device_unregister(&vsp1->v4l2_dev);
-	media_device_unregister(&vsp1->media_dev);
+	if (vsp1->info->uapi)
+		media_device_unregister(&vsp1->media_dev);
 	media_device_cleanup(&vsp1->media_dev);
 
 	if (!vsp1->info->uapi)
@@ -252,6 +252,16 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
 		list_add_tail(&vsp1->bru->entity.list_dev, &vsp1->entities);
 	}
 
+	if (vsp1->info->features & VSP1_HAS_CLU) {
+		vsp1->clu = vsp1_clu_create(vsp1);
+		if (IS_ERR(vsp1->clu)) {
+			ret = PTR_ERR(vsp1->clu);
+			goto done;
+		}
+
+		list_add_tail(&vsp1->clu->entity.list_dev, &vsp1->entities);
+	}
+
 	vsp1->hsi = vsp1_hsit_create(vsp1, true);
 	if (IS_ERR(vsp1->hsi)) {
 		ret = PTR_ERR(vsp1->hsi);
@@ -268,7 +278,11 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
 
 	list_add_tail(&vsp1->hst->entity.list_dev, &vsp1->entities);
 
-	if (vsp1->info->features & VSP1_HAS_LIF) {
+	/* The LIF is only supported when used in conjunction with the DU, in
+	 * which case the userspace API is disabled. If the userspace API is
+	 * enabled skip the LIF, even when present.
+	 */
+	if (vsp1->info->features & VSP1_HAS_LIF && !vsp1->info->uapi) {
 		vsp1->lif = vsp1_lif_create(vsp1);
 		if (IS_ERR(vsp1->lif)) {
 			ret = PTR_ERR(vsp1->lif);
@@ -379,14 +393,15 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
 	/* Register subdev nodes if the userspace API is enabled or initialize
 	 * the DRM pipeline otherwise.
 	 */
-	if (vsp1->info->uapi)
+	if (vsp1->info->uapi) {
 		ret = v4l2_device_register_subdev_nodes(&vsp1->v4l2_dev);
-	else
-		ret = vsp1_drm_init(vsp1);
-	if (ret < 0)
-		goto done;
+		if (ret < 0)
+			goto done;
 
-	ret = media_device_register(mdev);
+		ret = media_device_register(mdev);
+	} else {
+		ret = vsp1_drm_init(vsp1);
+	}
 
 done:
 	if (ret < 0)
@@ -462,35 +477,16 @@ static int vsp1_device_init(struct vsp1_device *vsp1)
 /*
  * vsp1_device_get - Acquire the VSP1 device
  *
- * Increment the VSP1 reference count and initialize the device if the first
- * reference is taken.
+ * Make sure the device is not suspended and initialize it if needed.
  *
  * Return 0 on success or a negative error code otherwise.
  */
 int vsp1_device_get(struct vsp1_device *vsp1)
 {
-	int ret = 0;
-
-	mutex_lock(&vsp1->lock);
-	if (vsp1->ref_count > 0)
-		goto done;
-
-	ret = clk_prepare_enable(vsp1->clock);
-	if (ret < 0)
-		goto done;
-
-	ret = vsp1_device_init(vsp1);
-	if (ret < 0) {
-		clk_disable_unprepare(vsp1->clock);
-		goto done;
-	}
-
-done:
-	if (!ret)
-		vsp1->ref_count++;
+	int ret;
 
-	mutex_unlock(&vsp1->lock);
-	return ret;
+	ret = pm_runtime_get_sync(vsp1->dev);
+	return ret < 0 ? ret : 0;
 }
 
 /*
@@ -501,54 +497,59 @@ done:
  */
 void vsp1_device_put(struct vsp1_device *vsp1)
 {
-	mutex_lock(&vsp1->lock);
-
-	if (--vsp1->ref_count == 0)
-		clk_disable_unprepare(vsp1->clock);
-
-	mutex_unlock(&vsp1->lock);
+	pm_runtime_put_sync(vsp1->dev);
 }
 
 /* -----------------------------------------------------------------------------
  * Power Management
  */
 
-#ifdef CONFIG_PM_SLEEP
-static int vsp1_pm_suspend(struct device *dev)
+static int __maybe_unused vsp1_pm_suspend(struct device *dev)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 
-	WARN_ON(mutex_is_locked(&vsp1->lock));
+	vsp1_pipelines_suspend(vsp1);
+	pm_runtime_force_suspend(vsp1->dev);
 
-	if (vsp1->ref_count == 0)
-		return 0;
+	return 0;
+}
 
-	vsp1_pipelines_suspend(vsp1);
+static int __maybe_unused vsp1_pm_resume(struct device *dev)
+{
+	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 
-	clk_disable_unprepare(vsp1->clock);
+	pm_runtime_force_resume(vsp1->dev);
+	vsp1_pipelines_resume(vsp1);
 
 	return 0;
 }
 
-static int vsp1_pm_resume(struct device *dev)
+static int __maybe_unused vsp1_pm_runtime_suspend(struct device *dev)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 
-	WARN_ON(mutex_is_locked(&vsp1->lock));
+	rcar_fcp_disable(vsp1->fcp);
 
-	if (vsp1->ref_count == 0)
-		return 0;
+	return 0;
+}
 
-	clk_prepare_enable(vsp1->clock);
+static int __maybe_unused vsp1_pm_runtime_resume(struct device *dev)
+{
+	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
+	int ret;
 
-	vsp1_pipelines_resume(vsp1);
+	if (vsp1->info) {
+		ret = vsp1_device_init(vsp1);
+		if (ret < 0)
+			return ret;
+	}
 
-	return 0;
+	return rcar_fcp_enable(vsp1->fcp);
 }
-#endif
 
 static const struct dev_pm_ops vsp1_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(vsp1_pm_suspend, vsp1_pm_resume)
+	SET_RUNTIME_PM_OPS(vsp1_pm_runtime_suspend, vsp1_pm_runtime_resume, NULL)
 };
 
 /* -----------------------------------------------------------------------------
@@ -559,7 +560,8 @@ static const struct vsp1_device_info vsp1_device_infos[] = {
 	{
 		.version = VI6_IP_VERSION_MODEL_VSPS_H2,
 		.gen = 2,
-		.features = VSP1_HAS_BRU | VSP1_HAS_LUT | VSP1_HAS_SRU,
+		.features = VSP1_HAS_BRU | VSP1_HAS_CLU | VSP1_HAS_LUT
+			  | VSP1_HAS_SRU | VSP1_HAS_WPF_VFLIP,
 		.rpf_count = 5,
 		.uds_count = 3,
 		.wpf_count = 4,
@@ -568,9 +570,9 @@ static const struct vsp1_device_info vsp1_device_infos[] = {
 	}, {
 		.version = VI6_IP_VERSION_MODEL_VSPR_H2,
 		.gen = 2,
-		.features = VSP1_HAS_BRU | VSP1_HAS_SRU,
+		.features = VSP1_HAS_BRU | VSP1_HAS_SRU | VSP1_HAS_WPF_VFLIP,
 		.rpf_count = 5,
-		.uds_count = 1,
+		.uds_count = 3,
 		.wpf_count = 4,
 		.num_bru_inputs = 4,
 		.uapi = true,
@@ -580,22 +582,24 @@ static const struct vsp1_device_info vsp1_device_infos[] = {
 		.features = VSP1_HAS_BRU | VSP1_HAS_LIF | VSP1_HAS_LUT,
 		.rpf_count = 4,
 		.uds_count = 1,
-		.wpf_count = 4,
+		.wpf_count = 1,
 		.num_bru_inputs = 4,
 		.uapi = true,
 	}, {
 		.version = VI6_IP_VERSION_MODEL_VSPS_M2,
 		.gen = 2,
-		.features = VSP1_HAS_BRU | VSP1_HAS_LUT | VSP1_HAS_SRU,
+		.features = VSP1_HAS_BRU | VSP1_HAS_CLU | VSP1_HAS_LUT
+			  | VSP1_HAS_SRU | VSP1_HAS_WPF_VFLIP,
 		.rpf_count = 5,
-		.uds_count = 3,
+		.uds_count = 1,
 		.wpf_count = 4,
 		.num_bru_inputs = 4,
 		.uapi = true,
 	}, {
 		.version = VI6_IP_VERSION_MODEL_VSPI_GEN3,
 		.gen = 3,
-		.features = VSP1_HAS_LUT | VSP1_HAS_SRU,
+		.features = VSP1_HAS_CLU | VSP1_HAS_LUT | VSP1_HAS_SRU
+			  | VSP1_HAS_WPF_HFLIP | VSP1_HAS_WPF_VFLIP,
 		.rpf_count = 1,
 		.uds_count = 1,
 		.wpf_count = 1,
@@ -603,7 +607,7 @@ static const struct vsp1_device_info vsp1_device_infos[] = {
 	}, {
 		.version = VI6_IP_VERSION_MODEL_VSPBD_GEN3,
 		.gen = 3,
-		.features = VSP1_HAS_BRU,
+		.features = VSP1_HAS_BRU | VSP1_HAS_WPF_VFLIP,
 		.rpf_count = 5,
 		.wpf_count = 1,
 		.num_bru_inputs = 5,
@@ -611,7 +615,8 @@ static const struct vsp1_device_info vsp1_device_infos[] = {
 	}, {
 		.version = VI6_IP_VERSION_MODEL_VSPBC_GEN3,
 		.gen = 3,
-		.features = VSP1_HAS_BRU | VSP1_HAS_LUT,
+		.features = VSP1_HAS_BRU | VSP1_HAS_CLU | VSP1_HAS_LUT
+			  | VSP1_HAS_WPF_VFLIP,
 		.rpf_count = 5,
 		.wpf_count = 1,
 		.num_bru_inputs = 5,
@@ -619,7 +624,7 @@ static const struct vsp1_device_info vsp1_device_infos[] = {
 	}, {
 		.version = VI6_IP_VERSION_MODEL_VSPD_GEN3,
 		.gen = 3,
-		.features = VSP1_HAS_BRU | VSP1_HAS_LIF,
+		.features = VSP1_HAS_BRU | VSP1_HAS_LIF | VSP1_HAS_WPF_VFLIP,
 		.rpf_count = 5,
 		.wpf_count = 2,
 		.num_bru_inputs = 5,
@@ -629,6 +634,7 @@ static const struct vsp1_device_info vsp1_device_infos[] = {
 static int vsp1_probe(struct platform_device *pdev)
 {
 	struct vsp1_device *vsp1;
+	struct device_node *fcp_node;
 	struct resource *irq;
 	struct resource *io;
 	unsigned int i;
@@ -640,22 +646,17 @@ static int vsp1_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	vsp1->dev = &pdev->dev;
-	mutex_init(&vsp1->lock);
 	INIT_LIST_HEAD(&vsp1->entities);
 	INIT_LIST_HEAD(&vsp1->videos);
 
-	/* I/O, IRQ and clock resources */
+	platform_set_drvdata(pdev, vsp1);
+
+	/* I/O and IRQ resources (clock managed by the clock PM domain) */
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	vsp1->mmio = devm_ioremap_resource(&pdev->dev, io);
 	if (IS_ERR(vsp1->mmio))
 		return PTR_ERR(vsp1->mmio);
 
-	vsp1->clock = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(vsp1->clock)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		return PTR_ERR(vsp1->clock);
-	}
-
 	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!irq) {
 		dev_err(&pdev->dev, "missing IRQ\n");
@@ -669,13 +670,27 @@ static int vsp1_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	/* FCP (optional) */
+	fcp_node = of_parse_phandle(pdev->dev.of_node, "renesas,fcp", 0);
+	if (fcp_node) {
+		vsp1->fcp = rcar_fcp_get(fcp_node);
+		of_node_put(fcp_node);
+		if (IS_ERR(vsp1->fcp)) {
+			dev_dbg(&pdev->dev, "FCP not found (%ld)\n",
+				PTR_ERR(vsp1->fcp));
+			return PTR_ERR(vsp1->fcp);
+		}
+	}
+
 	/* Configure device parameters based on the version register. */
-	ret = clk_prepare_enable(vsp1->clock);
+	pm_runtime_enable(&pdev->dev);
+
+	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0)
-		return ret;
+		goto done;
 
 	version = vsp1_read(vsp1, VI6_IP_VERSION);
-	clk_disable_unprepare(vsp1->clock);
+	pm_runtime_put_sync(&pdev->dev);
 
 	for (i = 0; i < ARRAY_SIZE(vsp1_device_infos); ++i) {
 		if ((version & VI6_IP_VERSION_MODEL_MASK) ==
@@ -687,7 +702,8 @@ static int vsp1_probe(struct platform_device *pdev)
 
 	if (!vsp1->info) {
 		dev_err(&pdev->dev, "unsupported IP version 0x%08x\n", version);
-		return -ENXIO;
+		ret = -ENXIO;
+		goto done;
 	}
 
 	dev_dbg(&pdev->dev, "IP version 0x%08x\n", version);
@@ -696,12 +712,14 @@ static int vsp1_probe(struct platform_device *pdev)
 	ret = vsp1_create_entities(vsp1);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to create entities\n");
-		return ret;
+		goto done;
 	}
 
-	platform_set_drvdata(pdev, vsp1);
+done:
+	if (ret)
+		pm_runtime_disable(&pdev->dev);
 
-	return 0;
+	return ret;
 }
 
 static int vsp1_remove(struct platform_device *pdev)
@@ -709,6 +727,9 @@ static int vsp1_remove(struct platform_device *pdev)
 	struct vsp1_device *vsp1 = platform_get_drvdata(pdev);
 
 	vsp1_destroy_entities(vsp1);
+	rcar_fcp_put(vsp1->fcp);
+
+	pm_runtime_disable(&pdev->dev);
 
 	return 0;
 }
diff --git a/drivers/media/platform/vsp1/vsp1_entity.c b/drivers/media/platform/vsp1/vsp1_entity.c
index 3d070bcc6053..4cf6cc719c00 100644
--- a/drivers/media/platform/vsp1/vsp1_entity.c
+++ b/drivers/media/platform/vsp1/vsp1_entity.c
@@ -22,6 +22,12 @@
 #include "vsp1_dl.h"
 #include "vsp1_entity.h"
 
+static inline struct vsp1_entity *
+media_entity_to_vsp1_entity(struct media_entity *entity)
+{
+	return container_of(entity, struct vsp1_entity, subdev.entity);
+}
+
 void vsp1_entity_route_setup(struct vsp1_entity *source,
 			     struct vsp1_dl_list *dl)
 {
@@ -30,7 +36,7 @@ void vsp1_entity_route_setup(struct vsp1_entity *source,
 	if (source->route->reg == 0)
 		return;
 
-	sink = container_of(source->sink, struct vsp1_entity, subdev.entity);
+	sink = media_entity_to_vsp1_entity(source->sink);
 	vsp1_dl_list_write(dl, source->route->reg,
 			   sink->route->inputs[source->sink_pad]);
 }
@@ -81,12 +87,30 @@ vsp1_entity_get_pad_format(struct vsp1_entity *entity,
 	return v4l2_subdev_get_try_format(&entity->subdev, cfg, pad);
 }
 
+/**
+ * vsp1_entity_get_pad_selection - Get a pad selection from storage for entity
+ * @entity: the entity
+ * @cfg: the configuration storage
+ * @pad: the pad number
+ * @target: the selection target
+ *
+ * Return the selection rectangle stored in the given configuration for an
+ * entity's pad. The configuration can be an ACTIVE or TRY configuration. The
+ * selection target can be COMPOSE or CROP.
+ */
 struct v4l2_rect *
-vsp1_entity_get_pad_compose(struct vsp1_entity *entity,
-			    struct v4l2_subdev_pad_config *cfg,
-			    unsigned int pad)
+vsp1_entity_get_pad_selection(struct vsp1_entity *entity,
+			      struct v4l2_subdev_pad_config *cfg,
+			      unsigned int pad, unsigned int target)
 {
-	return v4l2_subdev_get_try_compose(&entity->subdev, cfg, pad);
+	switch (target) {
+	case V4L2_SEL_TGT_COMPOSE:
+		return v4l2_subdev_get_try_compose(&entity->subdev, cfg, pad);
+	case V4L2_SEL_TGT_CROP:
+		return v4l2_subdev_get_try_crop(&entity->subdev, cfg, pad);
+	default:
+		return NULL;
+	}
 }
 
 /*
@@ -252,7 +276,7 @@ int vsp1_entity_link_setup(struct media_entity *entity,
 	if (!(local->flags & MEDIA_PAD_FL_SOURCE))
 		return 0;
 
-	source = container_of(local->entity, struct vsp1_entity, subdev.entity);
+	source = media_entity_to_vsp1_entity(local->entity);
 
 	if (!source->route)
 		return 0;
@@ -274,33 +298,50 @@ int vsp1_entity_link_setup(struct media_entity *entity,
  * Initialization
  */
 
+#define VSP1_ENTITY_ROUTE(ent)						\
+	{ VSP1_ENTITY_##ent, 0, VI6_DPR_##ent##_ROUTE,			\
+	  { VI6_DPR_NODE_##ent }, VI6_DPR_NODE_##ent }
+
+#define VSP1_ENTITY_ROUTE_RPF(idx)					\
+	{ VSP1_ENTITY_RPF, idx, VI6_DPR_RPF_ROUTE(idx),			\
+	  { 0, }, VI6_DPR_NODE_RPF(idx) }
+
+#define VSP1_ENTITY_ROUTE_UDS(idx)					\
+	{ VSP1_ENTITY_UDS, idx, VI6_DPR_UDS_ROUTE(idx),			\
+	  { VI6_DPR_NODE_UDS(idx) }, VI6_DPR_NODE_UDS(idx) }
+
+#define VSP1_ENTITY_ROUTE_WPF(idx)					\
+	{ VSP1_ENTITY_WPF, idx, 0,					\
+	  { VI6_DPR_NODE_WPF(idx) }, VI6_DPR_NODE_WPF(idx) }
+
 static const struct vsp1_route vsp1_routes[] = {
 	{ VSP1_ENTITY_BRU, 0, VI6_DPR_BRU_ROUTE,
 	  { VI6_DPR_NODE_BRU_IN(0), VI6_DPR_NODE_BRU_IN(1),
 	    VI6_DPR_NODE_BRU_IN(2), VI6_DPR_NODE_BRU_IN(3),
-	    VI6_DPR_NODE_BRU_IN(4) } },
-	{ VSP1_ENTITY_HSI, 0, VI6_DPR_HSI_ROUTE, { VI6_DPR_NODE_HSI, } },
-	{ VSP1_ENTITY_HST, 0, VI6_DPR_HST_ROUTE, { VI6_DPR_NODE_HST, } },
-	{ VSP1_ENTITY_LIF, 0, 0, { VI6_DPR_NODE_LIF, } },
-	{ VSP1_ENTITY_LUT, 0, VI6_DPR_LUT_ROUTE, { VI6_DPR_NODE_LUT, } },
-	{ VSP1_ENTITY_RPF, 0, VI6_DPR_RPF_ROUTE(0), { 0, } },
-	{ VSP1_ENTITY_RPF, 1, VI6_DPR_RPF_ROUTE(1), { 0, } },
-	{ VSP1_ENTITY_RPF, 2, VI6_DPR_RPF_ROUTE(2), { 0, } },
-	{ VSP1_ENTITY_RPF, 3, VI6_DPR_RPF_ROUTE(3), { 0, } },
-	{ VSP1_ENTITY_RPF, 4, VI6_DPR_RPF_ROUTE(4), { 0, } },
-	{ VSP1_ENTITY_SRU, 0, VI6_DPR_SRU_ROUTE, { VI6_DPR_NODE_SRU, } },
-	{ VSP1_ENTITY_UDS, 0, VI6_DPR_UDS_ROUTE(0), { VI6_DPR_NODE_UDS(0), } },
-	{ VSP1_ENTITY_UDS, 1, VI6_DPR_UDS_ROUTE(1), { VI6_DPR_NODE_UDS(1), } },
-	{ VSP1_ENTITY_UDS, 2, VI6_DPR_UDS_ROUTE(2), { VI6_DPR_NODE_UDS(2), } },
-	{ VSP1_ENTITY_WPF, 0, 0, { VI6_DPR_NODE_WPF(0), } },
-	{ VSP1_ENTITY_WPF, 1, 0, { VI6_DPR_NODE_WPF(1), } },
-	{ VSP1_ENTITY_WPF, 2, 0, { VI6_DPR_NODE_WPF(2), } },
-	{ VSP1_ENTITY_WPF, 3, 0, { VI6_DPR_NODE_WPF(3), } },
+	    VI6_DPR_NODE_BRU_IN(4) }, VI6_DPR_NODE_BRU_OUT },
+	VSP1_ENTITY_ROUTE(CLU),
+	VSP1_ENTITY_ROUTE(HSI),
+	VSP1_ENTITY_ROUTE(HST),
+	{ VSP1_ENTITY_LIF, 0, 0, { VI6_DPR_NODE_LIF, }, VI6_DPR_NODE_LIF },
+	VSP1_ENTITY_ROUTE(LUT),
+	VSP1_ENTITY_ROUTE_RPF(0),
+	VSP1_ENTITY_ROUTE_RPF(1),
+	VSP1_ENTITY_ROUTE_RPF(2),
+	VSP1_ENTITY_ROUTE_RPF(3),
+	VSP1_ENTITY_ROUTE_RPF(4),
+	VSP1_ENTITY_ROUTE(SRU),
+	VSP1_ENTITY_ROUTE_UDS(0),
+	VSP1_ENTITY_ROUTE_UDS(1),
+	VSP1_ENTITY_ROUTE_UDS(2),
+	VSP1_ENTITY_ROUTE_WPF(0),
+	VSP1_ENTITY_ROUTE_WPF(1),
+	VSP1_ENTITY_ROUTE_WPF(2),
+	VSP1_ENTITY_ROUTE_WPF(3),
 };
 
 int vsp1_entity_init(struct vsp1_device *vsp1, struct vsp1_entity *entity,
 		     const char *name, unsigned int num_pads,
-		     const struct v4l2_subdev_ops *ops)
+		     const struct v4l2_subdev_ops *ops, u32 function)
 {
 	struct v4l2_subdev *subdev;
 	unsigned int i;
@@ -341,6 +382,7 @@ int vsp1_entity_init(struct vsp1_device *vsp1, struct vsp1_entity *entity,
 	subdev = &entity->subdev;
 	v4l2_subdev_init(subdev, ops);
 
+	subdev->entity.function = function;
 	subdev->entity.ops = &vsp1->media_ops;
 	subdev->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
 
diff --git a/drivers/media/platform/vsp1/vsp1_entity.h b/drivers/media/platform/vsp1/vsp1_entity.h
index 69eff4e17350..b43457fd2c43 100644
--- a/drivers/media/platform/vsp1/vsp1_entity.h
+++ b/drivers/media/platform/vsp1/vsp1_entity.h
@@ -24,6 +24,7 @@ struct vsp1_pipeline;
 
 enum vsp1_entity_type {
 	VSP1_ENTITY_BRU,
+	VSP1_ENTITY_CLU,
 	VSP1_ENTITY_HSI,
 	VSP1_ENTITY_HST,
 	VSP1_ENTITY_LIF,
@@ -42,17 +43,21 @@ enum vsp1_entity_type {
  * @index: Entity index this routing entry is associated with
  * @reg: Output routing configuration register
  * @inputs: Target node value for each input
+ * @output: Target node value for entity output
  *
  * Each $vsp1_route entry describes routing configuration for the entity
  * specified by the entry's @type and @index. @reg indicates the register that
  * holds output routing configuration for the entity, and the @inputs array
- * store the target node value for each input of the entity.
+ * store the target node value for each input of the entity. The @output field
+ * stores the target node value of the entity output when used as a source for
+ * histogram generation.
  */
 struct vsp1_route {
 	enum vsp1_entity_type type;
 	unsigned int index;
 	unsigned int reg;
 	unsigned int inputs[VSP1_ENTITY_MAX_INPUTS];
+	unsigned int output;
 };
 
 /**
@@ -68,7 +73,7 @@ struct vsp1_entity_operations {
 	void (*destroy)(struct vsp1_entity *);
 	void (*set_memory)(struct vsp1_entity *, struct vsp1_dl_list *dl);
 	void (*configure)(struct vsp1_entity *, struct vsp1_pipeline *,
-			  struct vsp1_dl_list *);
+			  struct vsp1_dl_list *, bool);
 };
 
 struct vsp1_entity {
@@ -100,7 +105,7 @@ static inline struct vsp1_entity *to_vsp1_entity(struct v4l2_subdev *subdev)
 
 int vsp1_entity_init(struct vsp1_device *vsp1, struct vsp1_entity *entity,
 		     const char *name, unsigned int num_pads,
-		     const struct v4l2_subdev_ops *ops);
+		     const struct v4l2_subdev_ops *ops, u32 function);
 void vsp1_entity_destroy(struct vsp1_entity *entity);
 
 extern const struct v4l2_subdev_internal_ops vsp1_subdev_internal_ops;
@@ -118,9 +123,9 @@ vsp1_entity_get_pad_format(struct vsp1_entity *entity,
 			   struct v4l2_subdev_pad_config *cfg,
 			   unsigned int pad);
 struct v4l2_rect *
-vsp1_entity_get_pad_compose(struct vsp1_entity *entity,
-			    struct v4l2_subdev_pad_config *cfg,
-			    unsigned int pad);
+vsp1_entity_get_pad_selection(struct vsp1_entity *entity,
+			      struct v4l2_subdev_pad_config *cfg,
+			      unsigned int pad, unsigned int target);
 int vsp1_entity_init_cfg(struct v4l2_subdev *subdev,
 			 struct v4l2_subdev_pad_config *cfg);
 
diff --git a/drivers/media/platform/vsp1/vsp1_hsit.c b/drivers/media/platform/vsp1/vsp1_hsit.c
index 68b8567b374d..6e5077beb38c 100644
--- a/drivers/media/platform/vsp1/vsp1_hsit.c
+++ b/drivers/media/platform/vsp1/vsp1_hsit.c
@@ -107,7 +107,7 @@ static int hsit_set_format(struct v4l2_subdev *subdev,
 	return 0;
 }
 
-static struct v4l2_subdev_pad_ops hsit_pad_ops = {
+static const struct v4l2_subdev_pad_ops hsit_pad_ops = {
 	.init_cfg = vsp1_entity_init_cfg,
 	.enum_mbus_code = hsit_enum_mbus_code,
 	.enum_frame_size = hsit_enum_frame_size,
@@ -115,7 +115,7 @@ static struct v4l2_subdev_pad_ops hsit_pad_ops = {
 	.set_fmt = hsit_set_format,
 };
 
-static struct v4l2_subdev_ops hsit_ops = {
+static const struct v4l2_subdev_ops hsit_ops = {
 	.pad    = &hsit_pad_ops,
 };
 
@@ -125,10 +125,13 @@ static struct v4l2_subdev_ops hsit_ops = {
 
 static void hsit_configure(struct vsp1_entity *entity,
 			   struct vsp1_pipeline *pipe,
-			   struct vsp1_dl_list *dl)
+			   struct vsp1_dl_list *dl, bool full)
 {
 	struct vsp1_hsit *hsit = to_hsit(&entity->subdev);
 
+	if (!full)
+		return;
+
 	if (hsit->inverse)
 		vsp1_hsit_write(hsit, dl, VI6_HSI_CTRL, VI6_HSI_CTRL_EN);
 	else
@@ -161,8 +164,9 @@ struct vsp1_hsit *vsp1_hsit_create(struct vsp1_device *vsp1, bool inverse)
 	else
 		hsit->entity.type = VSP1_ENTITY_HST;
 
-	ret = vsp1_entity_init(vsp1, &hsit->entity, inverse ? "hsi" : "hst", 2,
-			       &hsit_ops);
+	ret = vsp1_entity_init(vsp1, &hsit->entity, inverse ? "hsi" : "hst",
+			       2, &hsit_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_PIXEL_ENC_CONV);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
diff --git a/drivers/media/platform/vsp1/vsp1_lif.c b/drivers/media/platform/vsp1/vsp1_lif.c
index 0217393f22df..a720063f38c5 100644
--- a/drivers/media/platform/vsp1/vsp1_lif.c
+++ b/drivers/media/platform/vsp1/vsp1_lif.c
@@ -104,7 +104,7 @@ static int lif_set_format(struct v4l2_subdev *subdev,
 	return 0;
 }
 
-static struct v4l2_subdev_pad_ops lif_pad_ops = {
+static const struct v4l2_subdev_pad_ops lif_pad_ops = {
 	.init_cfg = vsp1_entity_init_cfg,
 	.enum_mbus_code = lif_enum_mbus_code,
 	.enum_frame_size = lif_enum_frame_size,
@@ -112,7 +112,7 @@ static struct v4l2_subdev_pad_ops lif_pad_ops = {
 	.set_fmt = lif_set_format,
 };
 
-static struct v4l2_subdev_ops lif_ops = {
+static const struct v4l2_subdev_ops lif_ops = {
 	.pad    = &lif_pad_ops,
 };
 
@@ -122,7 +122,7 @@ static struct v4l2_subdev_ops lif_ops = {
 
 static void lif_configure(struct vsp1_entity *entity,
 			  struct vsp1_pipeline *pipe,
-			  struct vsp1_dl_list *dl)
+			  struct vsp1_dl_list *dl, bool full)
 {
 	const struct v4l2_mbus_framefmt *format;
 	struct vsp1_lif *lif = to_lif(&entity->subdev);
@@ -130,6 +130,9 @@ static void lif_configure(struct vsp1_entity *entity,
 	unsigned int obth = 400;
 	unsigned int lbth = 200;
 
+	if (!full)
+		return;
+
 	format = vsp1_entity_get_pad_format(&lif->entity, lif->entity.config,
 					    LIF_PAD_SOURCE);
 
@@ -165,7 +168,12 @@ struct vsp1_lif *vsp1_lif_create(struct vsp1_device *vsp1)
 	lif->entity.ops = &lif_entity_ops;
 	lif->entity.type = VSP1_ENTITY_LIF;
 
-	ret = vsp1_entity_init(vsp1, &lif->entity, "lif", 2, &lif_ops);
+	/* The LIF is never exposed to userspace, but media entity registration
+	 * requires a function to be set. Use PROC_VIDEO_PIXEL_FORMATTER just to
+	 * avoid triggering a WARN_ON(), the value won't be seen anywhere.
+	 */
+	ret = vsp1_entity_init(vsp1, &lif->entity, "lif", 2, &lif_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
diff --git a/drivers/media/platform/vsp1/vsp1_lut.c b/drivers/media/platform/vsp1/vsp1_lut.c
index aa09e59f0ab8..dc31de9602ba 100644
--- a/drivers/media/platform/vsp1/vsp1_lut.c
+++ b/drivers/media/platform/vsp1/vsp1_lut.c
@@ -13,7 +13,6 @@
 
 #include <linux/device.h>
 #include <linux/gfp.h>
-#include <linux/vsp1.h>
 
 #include <media/v4l2-subdev.h>
 
@@ -35,43 +34,62 @@ static inline void vsp1_lut_write(struct vsp1_lut *lut, struct vsp1_dl_list *dl,
 }
 
 /* -----------------------------------------------------------------------------
- * V4L2 Subdevice Core Operations
+ * Controls
  */
 
-static int lut_set_table(struct vsp1_lut *lut, struct vsp1_lut_config *config)
+#define V4L2_CID_VSP1_LUT_TABLE			(V4L2_CID_USER_BASE | 0x1001)
+
+static int lut_set_table(struct vsp1_lut *lut, struct v4l2_ctrl *ctrl)
 {
 	struct vsp1_dl_body *dlb;
 	unsigned int i;
 
-	dlb = vsp1_dl_fragment_alloc(lut->entity.vsp1, ARRAY_SIZE(config->lut));
+	dlb = vsp1_dl_fragment_alloc(lut->entity.vsp1, 256);
 	if (!dlb)
 		return -ENOMEM;
 
-	for (i = 0; i < ARRAY_SIZE(config->lut); ++i)
+	for (i = 0; i < 256; ++i)
 		vsp1_dl_fragment_write(dlb, VI6_LUT_TABLE + 4 * i,
-				       config->lut[i]);
+				       ctrl->p_new.p_u32[i]);
 
-	mutex_lock(&lut->lock);
+	spin_lock_irq(&lut->lock);
 	swap(lut->lut, dlb);
-	mutex_unlock(&lut->lock);
+	spin_unlock_irq(&lut->lock);
 
 	vsp1_dl_fragment_free(dlb);
 	return 0;
 }
 
-static long lut_ioctl(struct v4l2_subdev *subdev, unsigned int cmd, void *arg)
+static int lut_s_ctrl(struct v4l2_ctrl *ctrl)
 {
-	struct vsp1_lut *lut = to_lut(subdev);
-
-	switch (cmd) {
-	case VIDIOC_VSP1_LUT_CONFIG:
-		return lut_set_table(lut, arg);
+	struct vsp1_lut *lut =
+		container_of(ctrl->handler, struct vsp1_lut, ctrls);
 
-	default:
-		return -ENOIOCTLCMD;
+	switch (ctrl->id) {
+	case V4L2_CID_VSP1_LUT_TABLE:
+		lut_set_table(lut, ctrl);
+		break;
 	}
+
+	return 0;
 }
 
+static const struct v4l2_ctrl_ops lut_ctrl_ops = {
+	.s_ctrl = lut_s_ctrl,
+};
+
+static const struct v4l2_ctrl_config lut_table_control = {
+	.ops = &lut_ctrl_ops,
+	.id = V4L2_CID_VSP1_LUT_TABLE,
+	.name = "Look-Up Table",
+	.type = V4L2_CTRL_TYPE_U32,
+	.min = 0x00000000,
+	.max = 0x00ffffff,
+	.step = 1,
+	.def = 0,
+	.dims = { 256},
+};
+
 /* -----------------------------------------------------------------------------
  * V4L2 Subdevice Pad Operations
  */
@@ -147,11 +165,7 @@ static int lut_set_format(struct v4l2_subdev *subdev,
  * V4L2 Subdevice Operations
  */
 
-static struct v4l2_subdev_core_ops lut_core_ops = {
-	.ioctl = lut_ioctl,
-};
-
-static struct v4l2_subdev_pad_ops lut_pad_ops = {
+static const struct v4l2_subdev_pad_ops lut_pad_ops = {
 	.init_cfg = vsp1_entity_init_cfg,
 	.enum_mbus_code = lut_enum_mbus_code,
 	.enum_frame_size = lut_enum_frame_size,
@@ -159,8 +173,7 @@ static struct v4l2_subdev_pad_ops lut_pad_ops = {
 	.set_fmt = lut_set_format,
 };
 
-static struct v4l2_subdev_ops lut_ops = {
-	.core	= &lut_core_ops,
+static const struct v4l2_subdev_ops lut_ops = {
 	.pad    = &lut_pad_ops,
 };
 
@@ -170,18 +183,24 @@ static struct v4l2_subdev_ops lut_ops = {
 
 static void lut_configure(struct vsp1_entity *entity,
 			  struct vsp1_pipeline *pipe,
-			  struct vsp1_dl_list *dl)
+			  struct vsp1_dl_list *dl, bool full)
 {
 	struct vsp1_lut *lut = to_lut(&entity->subdev);
+	struct vsp1_dl_body *dlb;
+	unsigned long flags;
 
-	vsp1_lut_write(lut, dl, VI6_LUT_CTRL, VI6_LUT_CTRL_EN);
-
-	mutex_lock(&lut->lock);
-	if (lut->lut) {
-		vsp1_dl_list_add_fragment(dl, lut->lut);
-		lut->lut = NULL;
+	if (full) {
+		vsp1_lut_write(lut, dl, VI6_LUT_CTRL, VI6_LUT_CTRL_EN);
+		return;
 	}
-	mutex_unlock(&lut->lock);
+
+	spin_lock_irqsave(&lut->lock, flags);
+	dlb = lut->lut;
+	lut->lut = NULL;
+	spin_unlock_irqrestore(&lut->lock, flags);
+
+	if (dlb)
+		vsp1_dl_list_add_fragment(dl, dlb);
 }
 
 static const struct vsp1_entity_operations lut_entity_ops = {
@@ -201,12 +220,30 @@ struct vsp1_lut *vsp1_lut_create(struct vsp1_device *vsp1)
 	if (lut == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	spin_lock_init(&lut->lock);
+
 	lut->entity.ops = &lut_entity_ops;
 	lut->entity.type = VSP1_ENTITY_LUT;
 
-	ret = vsp1_entity_init(vsp1, &lut->entity, "lut", 2, &lut_ops);
+	ret = vsp1_entity_init(vsp1, &lut->entity, "lut", 2, &lut_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_LUT);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
+	/* Initialize the control handler. */
+	v4l2_ctrl_handler_init(&lut->ctrls, 1);
+	v4l2_ctrl_new_custom(&lut->ctrls, &lut_table_control, NULL);
+
+	lut->entity.subdev.ctrl_handler = &lut->ctrls;
+
+	if (lut->ctrls.error) {
+		dev_err(vsp1->dev, "lut: failed to initialize controls\n");
+		ret = lut->ctrls.error;
+		vsp1_entity_destroy(&lut->entity);
+		return ERR_PTR(ret);
+	}
+
+	v4l2_ctrl_handler_setup(&lut->ctrls);
+
 	return lut;
 }
diff --git a/drivers/media/platform/vsp1/vsp1_lut.h b/drivers/media/platform/vsp1/vsp1_lut.h
index cef874f22b6a..f8c4e8f0a79d 100644
--- a/drivers/media/platform/vsp1/vsp1_lut.h
+++ b/drivers/media/platform/vsp1/vsp1_lut.h
@@ -13,9 +13,10 @@
 #ifndef __VSP1_LUT_H__
 #define __VSP1_LUT_H__
 
-#include <linux/mutex.h>
+#include <linux/spinlock.h>
 
 #include <media/media-entity.h>
+#include <media/v4l2-ctrls.h>
 #include <media/v4l2-subdev.h>
 
 #include "vsp1_entity.h"
@@ -28,7 +29,9 @@ struct vsp1_device;
 struct vsp1_lut {
 	struct vsp1_entity entity;
 
-	struct mutex lock;
+	struct v4l2_ctrl_handler ctrls;
+
+	spinlock_t lock;
 	struct vsp1_dl_body *lut;
 };
 
diff --git a/drivers/media/platform/vsp1/vsp1_pipe.c b/drivers/media/platform/vsp1/vsp1_pipe.c
index 4f3b4a1d028a..3e75fb3fcace 100644
--- a/drivers/media/platform/vsp1/vsp1_pipe.c
+++ b/drivers/media/platform/vsp1/vsp1_pipe.c
@@ -172,13 +172,17 @@ void vsp1_pipeline_reset(struct vsp1_pipeline *pipe)
 			bru->inputs[i].rpf = NULL;
 	}
 
-	for (i = 0; i < pipe->num_inputs; ++i) {
-		pipe->inputs[i]->pipe = NULL;
-		pipe->inputs[i] = NULL;
+	for (i = 0; i < ARRAY_SIZE(pipe->inputs); ++i) {
+		if (pipe->inputs[i]) {
+			pipe->inputs[i]->pipe = NULL;
+			pipe->inputs[i] = NULL;
+		}
 	}
 
-	pipe->output->pipe = NULL;
-	pipe->output = NULL;
+	if (pipe->output) {
+		pipe->output->pipe = NULL;
+		pipe->output = NULL;
+	}
 
 	INIT_LIST_HEAD(&pipe->entities);
 	pipe->state = VSP1_PIPELINE_STOPPED;
@@ -286,6 +290,8 @@ void vsp1_pipeline_frame_end(struct vsp1_pipeline *pipe)
 
 	if (pipe->frame_end)
 		pipe->frame_end(pipe);
+
+	pipe->sequence++;
 }
 
 /*
@@ -295,42 +301,20 @@ void vsp1_pipeline_frame_end(struct vsp1_pipeline *pipe)
  * to be scaled, we disable alpha scaling when the UDS input has a fixed alpha
  * value. The UDS then outputs a fixed alpha value which needs to be programmed
  * from the input RPF alpha.
- *
- * This function can only be called from a subdev s_stream handler as it
- * requires a valid display list context.
  */
 void vsp1_pipeline_propagate_alpha(struct vsp1_pipeline *pipe,
-				   struct vsp1_entity *input,
-				   struct vsp1_dl_list *dl,
-				   unsigned int alpha)
+				   struct vsp1_dl_list *dl, unsigned int alpha)
 {
-	struct vsp1_entity *entity;
-	struct media_pad *pad;
-
-	pad = media_entity_remote_pad(&input->pads[RWPF_PAD_SOURCE]);
-
-	while (pad) {
-		if (!is_media_entity_v4l2_subdev(pad->entity))
-			break;
-
-		entity = to_vsp1_entity(media_entity_to_v4l2_subdev(pad->entity));
-
-		/* The BRU background color has a fixed alpha value set to 255,
-		 * the output alpha value is thus always equal to 255.
-		 */
-		if (entity->type == VSP1_ENTITY_BRU)
-			alpha = 255;
-
-		if (entity->type == VSP1_ENTITY_UDS) {
-			struct vsp1_uds *uds = to_uds(&entity->subdev);
+	if (!pipe->uds)
+		return;
 
-			vsp1_uds_set_alpha(uds, dl, alpha);
-			break;
-		}
+	/* The BRU background color has a fixed alpha value set to 255, the
+	 * output alpha value is thus always equal to 255.
+	 */
+	if (pipe->uds_input->type == VSP1_ENTITY_BRU)
+		alpha = 255;
 
-		pad = &entity->pads[entity->source_pad];
-		pad = media_entity_remote_pad(pad);
-	}
+	vsp1_uds_set_alpha(pipe->uds, dl, alpha);
 }
 
 void vsp1_pipelines_suspend(struct vsp1_device *vsp1)
@@ -383,7 +367,7 @@ void vsp1_pipelines_resume(struct vsp1_device *vsp1)
 {
 	unsigned int i;
 
-	/* Resume pipeline all running pipelines. */
+	/* Resume all running pipelines. */
 	for (i = 0; i < vsp1->info->wpf_count; ++i) {
 		struct vsp1_rwpf *wpf = vsp1->wpf[i];
 		struct vsp1_pipeline *pipe;
diff --git a/drivers/media/platform/vsp1/vsp1_pipe.h b/drivers/media/platform/vsp1/vsp1_pipe.h
index 7b56113511dd..d20d997b1fda 100644
--- a/drivers/media/platform/vsp1/vsp1_pipe.h
+++ b/drivers/media/platform/vsp1/vsp1_pipe.h
@@ -61,12 +61,13 @@ enum vsp1_pipeline_state {
  * @pipe: the media pipeline
  * @irqlock: protects the pipeline state
  * @state: current state
- * @wq: work queue to wait for state change completion
+ * @wq: wait queue to wait for state change completion
  * @frame_end: frame end interrupt handler
  * @lock: protects the pipeline use count and stream count
  * @kref: pipeline reference count
  * @stream_count: number of streaming video nodes
  * @buffers_ready: bitmask of RPFs and WPFs with at least one buffer available
+ * @sequence: frame sequence number
  * @num_inputs: number of RPFs
  * @inputs: array of RPFs in the pipeline (indexed by RPF index)
  * @output: WPF at the output of the pipeline
@@ -90,6 +91,7 @@ struct vsp1_pipeline {
 	struct kref kref;
 	unsigned int stream_count;
 	unsigned int buffers_ready;
+	unsigned int sequence;
 
 	unsigned int num_inputs;
 	struct vsp1_rwpf *inputs[VSP1_MAX_RPF];
@@ -115,9 +117,7 @@ bool vsp1_pipeline_ready(struct vsp1_pipeline *pipe);
 void vsp1_pipeline_frame_end(struct vsp1_pipeline *pipe);
 
 void vsp1_pipeline_propagate_alpha(struct vsp1_pipeline *pipe,
-				   struct vsp1_entity *input,
-				   struct vsp1_dl_list *dl,
-				   unsigned int alpha);
+				   struct vsp1_dl_list *dl, unsigned int alpha);
 
 void vsp1_pipelines_suspend(struct vsp1_device *vsp1);
 void vsp1_pipelines_resume(struct vsp1_device *vsp1);
diff --git a/drivers/media/platform/vsp1/vsp1_regs.h b/drivers/media/platform/vsp1/vsp1_regs.h
index 927b5fb94c48..3b03007ba625 100644
--- a/drivers/media/platform/vsp1/vsp1_regs.h
+++ b/drivers/media/platform/vsp1/vsp1_regs.h
@@ -154,10 +154,10 @@
 #define VI6_RPF_ALPH_SEL_AEXT_EXT	(1 << 18)
 #define VI6_RPF_ALPH_SEL_AEXT_ONE	(2 << 18)
 #define VI6_RPF_ALPH_SEL_AEXT_MASK	(3 << 18)
-#define VI6_RPF_ALPH_SEL_ALPHA0_MASK	(0xff << 8)
-#define VI6_RPF_ALPH_SEL_ALPHA0_SHIFT	8
-#define VI6_RPF_ALPH_SEL_ALPHA1_MASK	(0xff << 0)
-#define VI6_RPF_ALPH_SEL_ALPHA1_SHIFT	0
+#define VI6_RPF_ALPH_SEL_ALPHA1_MASK	(0xff << 8)
+#define VI6_RPF_ALPH_SEL_ALPHA1_SHIFT	8
+#define VI6_RPF_ALPH_SEL_ALPHA0_MASK	(0xff << 0)
+#define VI6_RPF_ALPH_SEL_ALPHA0_SHIFT	0
 
 #define VI6_RPF_VRTCOL_SET		0x0318
 #define VI6_RPF_VRTCOL_SET_LAYA_MASK	(0xff << 24)
@@ -255,6 +255,8 @@
 #define VI6_WPF_OUTFMT_PDV_MASK		(0xff << 24)
 #define VI6_WPF_OUTFMT_PDV_SHIFT	24
 #define VI6_WPF_OUTFMT_PXA		(1 << 23)
+#define VI6_WPF_OUTFMT_ROT		(1 << 18)
+#define VI6_WPF_OUTFMT_HFLP		(1 << 17)
 #define VI6_WPF_OUTFMT_FLP		(1 << 16)
 #define VI6_WPF_OUTFMT_SPYCS		(1 << 15)
 #define VI6_WPF_OUTFMT_SPUVS		(1 << 14)
@@ -289,6 +291,11 @@
 #define VI6_WPF_RNDCTRL_CLMD_EXT	(2 << 12)
 #define VI6_WPF_RNDCTRL_CLMD_MASK	(3 << 12)
 
+#define VI6_WPF_ROT_CTRL		0x1018
+#define VI6_WPF_ROT_CTRL_LN16		(1 << 17)
+#define VI6_WPF_ROT_CTRL_LMEM_WD_MASK	(0x1fff << 0)
+#define VI6_WPF_ROT_CTRL_LMEM_WD_SHIFT	0
+
 #define VI6_WPF_DSTM_STRIDE_Y		0x101c
 #define VI6_WPF_DSTM_STRIDE_C		0x1020
 #define VI6_WPF_DSTM_ADDR_Y		0x1024
@@ -444,6 +451,15 @@
  */
 
 #define VI6_CLU_CTRL			0x2900
+#define VI6_CLU_CTRL_AAI		(1 << 28)
+#define VI6_CLU_CTRL_MVS		(1 << 24)
+#define VI6_CLU_CTRL_AX1I_2D		(3 << 14)
+#define VI6_CLU_CTRL_AX2I_2D		(1 << 12)
+#define VI6_CLU_CTRL_OS0_2D		(3 << 8)
+#define VI6_CLU_CTRL_OS1_2D		(1 << 6)
+#define VI6_CLU_CTRL_OS2_2D		(3 << 4)
+#define VI6_CLU_CTRL_M2D		(1 << 1)
+#define VI6_CLU_CTRL_EN			(1 << 0)
 
 /* -----------------------------------------------------------------------------
  * HST Control Registers
diff --git a/drivers/media/platform/vsp1/vsp1_rpf.c b/drivers/media/platform/vsp1/vsp1_rpf.c
index 49168db3f529..388838913205 100644
--- a/drivers/media/platform/vsp1/vsp1_rpf.c
+++ b/drivers/media/platform/vsp1/vsp1_rpf.c
@@ -38,7 +38,7 @@ static inline void vsp1_rpf_write(struct vsp1_rwpf *rpf,
  * V4L2 Subdevice Operations
  */
 
-static struct v4l2_subdev_ops rpf_ops = {
+static const struct v4l2_subdev_ops rpf_ops = {
 	.pad    = &vsp1_rwpf_pad_ops,
 };
 
@@ -60,7 +60,7 @@ static void rpf_set_memory(struct vsp1_entity *entity, struct vsp1_dl_list *dl)
 
 static void rpf_configure(struct vsp1_entity *entity,
 			  struct vsp1_pipeline *pipe,
-			  struct vsp1_dl_list *dl)
+			  struct vsp1_dl_list *dl, bool full)
 {
 	struct vsp1_rwpf *rpf = to_rwpf(&entity->subdev);
 	const struct vsp1_format_info *fmtinfo = rpf->fmtinfo;
@@ -73,6 +73,16 @@ static void rpf_configure(struct vsp1_entity *entity,
 	u32 pstride;
 	u32 infmt;
 
+	if (!full) {
+		vsp1_rpf_write(rpf, dl, VI6_RPF_VRTCOL_SET,
+			       rpf->alpha << VI6_RPF_VRTCOL_SET_LAYA_SHIFT);
+		vsp1_rpf_write(rpf, dl, VI6_RPF_MULT_ALPHA, rpf->mult_alpha |
+			       (rpf->alpha << VI6_RPF_MULT_ALPHA_RATIO_SHIFT));
+
+		vsp1_pipeline_propagate_alpha(pipe, dl, rpf->alpha);
+		return;
+	}
+
 	/* Source size, stride and crop offsets.
 	 *
 	 * The crop offsets correspond to the location of the crop rectangle top
@@ -130,9 +140,10 @@ static void rpf_configure(struct vsp1_entity *entity,
 	if (pipe->bru) {
 		const struct v4l2_rect *compose;
 
-		compose = vsp1_entity_get_pad_compose(pipe->bru,
-						      pipe->bru->config,
-						      rpf->bru_input);
+		compose = vsp1_entity_get_pad_selection(pipe->bru,
+							pipe->bru->config,
+							rpf->bru_input,
+							V4L2_SEL_TGT_COMPOSE);
 		left = compose->left;
 		top = compose->top;
 	}
@@ -167,9 +178,6 @@ static void rpf_configure(struct vsp1_entity *entity,
 		       (fmtinfo->alpha ? VI6_RPF_ALPH_SEL_ASEL_PACKED
 				       : VI6_RPF_ALPH_SEL_ASEL_FIXED));
 
-	vsp1_rpf_write(rpf, dl, VI6_RPF_VRTCOL_SET,
-		       rpf->alpha << VI6_RPF_VRTCOL_SET_LAYA_SHIFT);
-
 	if (entity->vsp1->info->gen == 3) {
 		u32 mult;
 
@@ -187,8 +195,7 @@ static void rpf_configure(struct vsp1_entity *entity,
 			mult = VI6_RPF_MULT_ALPHA_A_MMD_RATIO
 			     | (premultiplied ?
 				VI6_RPF_MULT_ALPHA_P_MMD_RATIO :
-				VI6_RPF_MULT_ALPHA_P_MMD_NONE)
-			     | (rpf->alpha << VI6_RPF_MULT_ALPHA_RATIO_SHIFT);
+				VI6_RPF_MULT_ALPHA_P_MMD_NONE);
 		} else {
 			/* When the input doesn't contain an alpha channel the
 			 * global alpha value is applied in the unpacking unit,
@@ -199,11 +206,9 @@ static void rpf_configure(struct vsp1_entity *entity,
 			     | VI6_RPF_MULT_ALPHA_P_MMD_NONE;
 		}
 
-		vsp1_rpf_write(rpf, dl, VI6_RPF_MULT_ALPHA, mult);
+		rpf->mult_alpha = mult;
 	}
 
-	vsp1_pipeline_propagate_alpha(pipe, &rpf->entity, dl, rpf->alpha);
-
 	vsp1_rpf_write(rpf, dl, VI6_RPF_MSK_CTRL, 0);
 	vsp1_rpf_write(rpf, dl, VI6_RPF_CKEY_CTRL, 0);
 
@@ -236,18 +241,21 @@ struct vsp1_rwpf *vsp1_rpf_create(struct vsp1_device *vsp1, unsigned int index)
 	rpf->entity.index = index;
 
 	sprintf(name, "rpf.%u", index);
-	ret = vsp1_entity_init(vsp1, &rpf->entity, name, 2, &rpf_ops);
+	ret = vsp1_entity_init(vsp1, &rpf->entity, name, 2, &rpf_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
 	/* Initialize the control handler. */
-	ret = vsp1_rwpf_init_ctrls(rpf);
+	ret = vsp1_rwpf_init_ctrls(rpf, 0);
 	if (ret < 0) {
 		dev_err(vsp1->dev, "rpf%u: failed to initialize controls\n",
 			index);
 		goto error;
 	}
 
+	v4l2_ctrl_handler_setup(&rpf->ctrls);
+
 	return rpf;
 
 error:
diff --git a/drivers/media/platform/vsp1/vsp1_rwpf.c b/drivers/media/platform/vsp1/vsp1_rwpf.c
index 3b6e032e7806..8d461b375e91 100644
--- a/drivers/media/platform/vsp1/vsp1_rwpf.c
+++ b/drivers/media/platform/vsp1/vsp1_rwpf.c
@@ -241,11 +241,9 @@ static const struct v4l2_ctrl_ops vsp1_rwpf_ctrl_ops = {
 	.s_ctrl = vsp1_rwpf_s_ctrl,
 };
 
-int vsp1_rwpf_init_ctrls(struct vsp1_rwpf *rwpf)
+int vsp1_rwpf_init_ctrls(struct vsp1_rwpf *rwpf, unsigned int ncontrols)
 {
-	rwpf->alpha = 255;
-
-	v4l2_ctrl_handler_init(&rwpf->ctrls, 1);
+	v4l2_ctrl_handler_init(&rwpf->ctrls, ncontrols + 1);
 	v4l2_ctrl_new_std(&rwpf->ctrls, &vsp1_rwpf_ctrl_ops,
 			  V4L2_CID_ALPHA_COMPONENT, 0, 255, 1, 255);
 
diff --git a/drivers/media/platform/vsp1/vsp1_rwpf.h b/drivers/media/platform/vsp1/vsp1_rwpf.h
index 9ff7c78f239e..cb20484e80da 100644
--- a/drivers/media/platform/vsp1/vsp1_rwpf.h
+++ b/drivers/media/platform/vsp1/vsp1_rwpf.h
@@ -13,6 +13,8 @@
 #ifndef __VSP1_RWPF_H__
 #define __VSP1_RWPF_H__
 
+#include <linux/spinlock.h>
+
 #include <media/media-entity.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-subdev.h>
@@ -49,6 +51,16 @@ struct vsp1_rwpf {
 
 	unsigned int alpha;
 
+	u32 mult_alpha;
+	u32 outfmt;
+
+	struct {
+		spinlock_t lock;
+		struct v4l2_ctrl *ctrls[2];
+		unsigned int pending;
+		unsigned int active;
+	} flip;
+
 	unsigned int offsets[2];
 	struct vsp1_rwpf_memory mem;
 
@@ -68,7 +80,7 @@ static inline struct vsp1_rwpf *entity_to_rwpf(struct vsp1_entity *entity)
 struct vsp1_rwpf *vsp1_rpf_create(struct vsp1_device *vsp1, unsigned int index);
 struct vsp1_rwpf *vsp1_wpf_create(struct vsp1_device *vsp1, unsigned int index);
 
-int vsp1_rwpf_init_ctrls(struct vsp1_rwpf *rwpf);
+int vsp1_rwpf_init_ctrls(struct vsp1_rwpf *rwpf, unsigned int ncontrols);
 
 extern const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops;
 
diff --git a/drivers/media/platform/vsp1/vsp1_sru.c b/drivers/media/platform/vsp1/vsp1_sru.c
index 97ef997ae735..47f5e0cea2ce 100644
--- a/drivers/media/platform/vsp1/vsp1_sru.c
+++ b/drivers/media/platform/vsp1/vsp1_sru.c
@@ -37,7 +37,7 @@ static inline void vsp1_sru_write(struct vsp1_sru *sru, struct vsp1_dl_list *dl,
  * Controls
  */
 
-#define V4L2_CID_VSP1_SRU_INTENSITY		(V4L2_CID_USER_BASE + 1)
+#define V4L2_CID_VSP1_SRU_INTENSITY		(V4L2_CID_USER_BASE | 0x1001)
 
 struct vsp1_sru_param {
 	u32 ctrl0;
@@ -239,7 +239,7 @@ static int sru_set_format(struct v4l2_subdev *subdev,
 	return 0;
 }
 
-static struct v4l2_subdev_pad_ops sru_pad_ops = {
+static const struct v4l2_subdev_pad_ops sru_pad_ops = {
 	.init_cfg = vsp1_entity_init_cfg,
 	.enum_mbus_code = sru_enum_mbus_code,
 	.enum_frame_size = sru_enum_frame_size,
@@ -247,7 +247,7 @@ static struct v4l2_subdev_pad_ops sru_pad_ops = {
 	.set_fmt = sru_set_format,
 };
 
-static struct v4l2_subdev_ops sru_ops = {
+static const struct v4l2_subdev_ops sru_ops = {
 	.pad    = &sru_pad_ops,
 };
 
@@ -257,7 +257,7 @@ static struct v4l2_subdev_ops sru_ops = {
 
 static void sru_configure(struct vsp1_entity *entity,
 			  struct vsp1_pipeline *pipe,
-			  struct vsp1_dl_list *dl)
+			  struct vsp1_dl_list *dl, bool full)
 {
 	const struct vsp1_sru_param *param;
 	struct vsp1_sru *sru = to_sru(&entity->subdev);
@@ -265,6 +265,9 @@ static void sru_configure(struct vsp1_entity *entity,
 	struct v4l2_mbus_framefmt *output;
 	u32 ctrl0;
 
+	if (!full)
+		return;
+
 	input = vsp1_entity_get_pad_format(&sru->entity, sru->entity.config,
 					   SRU_PAD_SINK);
 	output = vsp1_entity_get_pad_format(&sru->entity, sru->entity.config,
@@ -308,7 +311,8 @@ struct vsp1_sru *vsp1_sru_create(struct vsp1_device *vsp1)
 	sru->entity.ops = &sru_entity_ops;
 	sru->entity.type = VSP1_ENTITY_SRU;
 
-	ret = vsp1_entity_init(vsp1, &sru->entity, "sru", 2, &sru_ops);
+	ret = vsp1_entity_init(vsp1, &sru->entity, "sru", 2, &sru_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_SCALER);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
diff --git a/drivers/media/platform/vsp1/vsp1_uds.c b/drivers/media/platform/vsp1/vsp1_uds.c
index 1875e29da184..652dcd895022 100644
--- a/drivers/media/platform/vsp1/vsp1_uds.c
+++ b/drivers/media/platform/vsp1/vsp1_uds.c
@@ -40,9 +40,11 @@ static inline void vsp1_uds_write(struct vsp1_uds *uds, struct vsp1_dl_list *dl,
  * Scaling Computation
  */
 
-void vsp1_uds_set_alpha(struct vsp1_uds *uds, struct vsp1_dl_list *dl,
+void vsp1_uds_set_alpha(struct vsp1_entity *entity, struct vsp1_dl_list *dl,
 			unsigned int alpha)
 {
+	struct vsp1_uds *uds = to_uds(&entity->subdev);
+
 	vsp1_uds_write(uds, dl, VI6_UDS_ALPVAL,
 		       alpha << VI6_UDS_ALPVAL_VAL0_SHIFT);
 }
@@ -226,7 +228,7 @@ static int uds_set_format(struct v4l2_subdev *subdev,
  * V4L2 Subdevice Operations
  */
 
-static struct v4l2_subdev_pad_ops uds_pad_ops = {
+static const struct v4l2_subdev_pad_ops uds_pad_ops = {
 	.init_cfg = vsp1_entity_init_cfg,
 	.enum_mbus_code = uds_enum_mbus_code,
 	.enum_frame_size = uds_enum_frame_size,
@@ -234,7 +236,7 @@ static struct v4l2_subdev_pad_ops uds_pad_ops = {
 	.set_fmt = uds_set_format,
 };
 
-static struct v4l2_subdev_ops uds_ops = {
+static const struct v4l2_subdev_ops uds_ops = {
 	.pad    = &uds_pad_ops,
 };
 
@@ -244,7 +246,7 @@ static struct v4l2_subdev_ops uds_ops = {
 
 static void uds_configure(struct vsp1_entity *entity,
 			  struct vsp1_pipeline *pipe,
-			  struct vsp1_dl_list *dl)
+			  struct vsp1_dl_list *dl, bool full)
 {
 	struct vsp1_uds *uds = to_uds(&entity->subdev);
 	const struct v4l2_mbus_framefmt *output;
@@ -253,6 +255,9 @@ static void uds_configure(struct vsp1_entity *entity,
 	unsigned int vscale;
 	bool multitap;
 
+	if (!full)
+		return;
+
 	input = vsp1_entity_get_pad_format(&uds->entity, uds->entity.config,
 					   UDS_PAD_SINK);
 	output = vsp1_entity_get_pad_format(&uds->entity, uds->entity.config,
@@ -314,7 +319,8 @@ struct vsp1_uds *vsp1_uds_create(struct vsp1_device *vsp1, unsigned int index)
 	uds->entity.index = index;
 
 	sprintf(name, "uds.%u", index);
-	ret = vsp1_entity_init(vsp1, &uds->entity, name, 2, &uds_ops);
+	ret = vsp1_entity_init(vsp1, &uds->entity, name, 2, &uds_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_SCALER);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
diff --git a/drivers/media/platform/vsp1/vsp1_uds.h b/drivers/media/platform/vsp1/vsp1_uds.h
index 5c8cbfcad4cc..7bf3cdcffc65 100644
--- a/drivers/media/platform/vsp1/vsp1_uds.h
+++ b/drivers/media/platform/vsp1/vsp1_uds.h
@@ -35,7 +35,7 @@ static inline struct vsp1_uds *to_uds(struct v4l2_subdev *subdev)
 
 struct vsp1_uds *vsp1_uds_create(struct vsp1_device *vsp1, unsigned int index);
 
-void vsp1_uds_set_alpha(struct vsp1_uds *uds, struct vsp1_dl_list *dl,
+void vsp1_uds_set_alpha(struct vsp1_entity *uds, struct vsp1_dl_list *dl,
 			unsigned int alpha);
 
 #endif /* __VSP1_UDS_H__ */
diff --git a/drivers/media/platform/vsp1/vsp1_video.c b/drivers/media/platform/vsp1/vsp1_video.c
index a9aec5c0bec6..9fb4fc26a359 100644
--- a/drivers/media/platform/vsp1/vsp1_video.c
+++ b/drivers/media/platform/vsp1/vsp1_video.c
@@ -219,7 +219,7 @@ vsp1_video_complete_buffer(struct vsp1_video *video)
 
 	spin_unlock_irqrestore(&video->irqlock, flags);
 
-	done->buf.sequence = video->sequence++;
+	done->buf.sequence = pipe->sequence;
 	done->buf.vb2_buf.timestamp = ktime_get_ns();
 	for (i = 0; i < done->buf.vb2_buf.num_planes; ++i)
 		vb2_set_plane_payload(&done->buf.vb2_buf, i,
@@ -251,11 +251,17 @@ static void vsp1_video_frame_end(struct vsp1_pipeline *pipe,
 static void vsp1_video_pipeline_run(struct vsp1_pipeline *pipe)
 {
 	struct vsp1_device *vsp1 = pipe->output->entity.vsp1;
+	struct vsp1_entity *entity;
 	unsigned int i;
 
 	if (!pipe->dl)
 		pipe->dl = vsp1_dl_list_get(pipe->output->dlm);
 
+	list_for_each_entry(entity, &pipe->entities, list_pipe) {
+		if (entity->ops->configure)
+			entity->ops->configure(entity, pipe, pipe->dl, false);
+	}
+
 	for (i = 0; i < vsp1->info->rpf_count; ++i) {
 		struct vsp1_rwpf *rwpf = pipe->inputs[i];
 
@@ -519,8 +525,8 @@ static void vsp1_video_pipeline_put(struct vsp1_pipeline *pipe)
 
 static int
 vsp1_video_queue_setup(struct vb2_queue *vq,
-		     unsigned int *nbuffers, unsigned int *nplanes,
-		     unsigned int sizes[], void *alloc_ctxs[])
+		       unsigned int *nbuffers, unsigned int *nplanes,
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct vsp1_video *video = vb2_get_drv_priv(vq);
 	const struct v4l2_pix_format_mplane *format = &video->rwpf->format;
@@ -530,20 +536,16 @@ vsp1_video_queue_setup(struct vb2_queue *vq,
 		if (*nplanes != format->num_planes)
 			return -EINVAL;
 
-		for (i = 0; i < *nplanes; i++) {
+		for (i = 0; i < *nplanes; i++)
 			if (sizes[i] < format->plane_fmt[i].sizeimage)
 				return -EINVAL;
-			alloc_ctxs[i] = video->alloc_ctx;
-		}
 		return 0;
 	}
 
 	*nplanes = format->num_planes;
 
-	for (i = 0; i < format->num_planes; ++i) {
+	for (i = 0; i < format->num_planes; ++i)
 		sizes[i] = format->plane_fmt[i].sizeimage;
-		alloc_ctxs[i] = video->alloc_ctx;
-	}
 
 	return 0;
 }
@@ -632,7 +634,7 @@ static int vsp1_video_setup_pipeline(struct vsp1_pipeline *pipe)
 		vsp1_entity_route_setup(entity, pipe->dl);
 
 		if (entity->ops->configure)
-			entity->ops->configure(entity, pipe, pipe->dl);
+			entity->ops->configure(entity, pipe, pipe->dl, true);
 	}
 
 	return 0;
@@ -674,7 +676,7 @@ static void vsp1_video_stop_streaming(struct vb2_queue *vq)
 	int ret;
 
 	mutex_lock(&pipe->lock);
-	if (--pipe->stream_count == 0) {
+	if (--pipe->stream_count == pipe->num_inputs) {
 		/* Stop the pipeline. */
 		ret = vsp1_pipeline_stop(pipe);
 		if (ret == -ETIMEDOUT)
@@ -696,7 +698,7 @@ static void vsp1_video_stop_streaming(struct vb2_queue *vq)
 	spin_unlock_irqrestore(&video->irqlock, flags);
 }
 
-static struct vb2_ops vsp1_video_queue_qops = {
+static const struct vb2_ops vsp1_video_queue_qops = {
 	.queue_setup = vsp1_video_queue_setup,
 	.buf_prepare = vsp1_video_buffer_prepare,
 	.buf_queue = vsp1_video_buffer_queue,
@@ -805,8 +807,6 @@ vsp1_video_streamon(struct file *file, void *fh, enum v4l2_buf_type type)
 	if (video->queue.owner && video->queue.owner != file->private_data)
 		return -EBUSY;
 
-	video->sequence = 0;
-
 	/* Get a pipeline for the video node and start streaming on it. No link
 	 * touching an entity in the pipeline can be activated or deactivated
 	 * once streaming is started.
@@ -915,7 +915,7 @@ static int vsp1_video_release(struct file *file)
 	return 0;
 }
 
-static struct v4l2_file_operations vsp1_video_fops = {
+static const struct v4l2_file_operations vsp1_video_fops = {
 	.owner = THIS_MODULE,
 	.unlocked_ioctl = video_ioctl2,
 	.open = vsp1_video_open,
@@ -982,13 +982,6 @@ struct vsp1_video *vsp1_video_create(struct vsp1_device *vsp1,
 
 	video_set_drvdata(&video->video, video);
 
-	/* ... and the buffers queue... */
-	video->alloc_ctx = vb2_dma_contig_init_ctx(video->vsp1->dev);
-	if (IS_ERR(video->alloc_ctx)) {
-		ret = PTR_ERR(video->alloc_ctx);
-		goto error;
-	}
-
 	video->queue.type = video->type;
 	video->queue.io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
 	video->queue.lock = &video->lock;
@@ -997,6 +990,7 @@ struct vsp1_video *vsp1_video_create(struct vsp1_device *vsp1,
 	video->queue.ops = &vsp1_video_queue_qops;
 	video->queue.mem_ops = &vb2_dma_contig_memops;
 	video->queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	video->queue.dev = video->vsp1->dev;
 	ret = vb2_queue_init(&video->queue);
 	if (ret < 0) {
 		dev_err(video->vsp1->dev, "failed to initialize vb2 queue\n");
@@ -1014,7 +1008,6 @@ struct vsp1_video *vsp1_video_create(struct vsp1_device *vsp1,
 	return video;
 
 error:
-	vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
 	vsp1_video_cleanup(video);
 	return ERR_PTR(ret);
 }
@@ -1024,6 +1017,5 @@ void vsp1_video_cleanup(struct vsp1_video *video)
 	if (video_is_registered(&video->video))
 		video_unregister_device(&video->video);
 
-	vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
 	media_entity_cleanup(&video->video.entity);
 }
diff --git a/drivers/media/platform/vsp1/vsp1_video.h b/drivers/media/platform/vsp1/vsp1_video.h
index 867b00807c46..50ea7f02205f 100644
--- a/drivers/media/platform/vsp1/vsp1_video.h
+++ b/drivers/media/platform/vsp1/vsp1_video.h
@@ -46,10 +46,8 @@ struct vsp1_video {
 	unsigned int pipe_index;
 
 	struct vb2_queue queue;
-	void *alloc_ctx;
 	spinlock_t irqlock;
 	struct list_head irqqueue;
-	unsigned int sequence;
 };
 
 static inline struct vsp1_video *to_vsp1_video(struct video_device *vdev)
diff --git a/drivers/media/platform/vsp1/vsp1_wpf.c b/drivers/media/platform/vsp1/vsp1_wpf.c
index 6c91eaa35e75..31983169c24a 100644
--- a/drivers/media/platform/vsp1/vsp1_wpf.c
+++ b/drivers/media/platform/vsp1/vsp1_wpf.c
@@ -36,6 +36,97 @@ static inline void vsp1_wpf_write(struct vsp1_rwpf *wpf,
 	vsp1_dl_list_write(dl, reg + wpf->entity.index * VI6_WPF_OFFSET, data);
 }
 
+/* -----------------------------------------------------------------------------
+ * Controls
+ */
+
+enum wpf_flip_ctrl {
+	WPF_CTRL_VFLIP = 0,
+	WPF_CTRL_HFLIP = 1,
+	WPF_CTRL_MAX,
+};
+
+static int vsp1_wpf_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct vsp1_rwpf *wpf =
+		container_of(ctrl->handler, struct vsp1_rwpf, ctrls);
+	unsigned int i;
+	u32 flip = 0;
+
+	switch (ctrl->id) {
+	case V4L2_CID_HFLIP:
+	case V4L2_CID_VFLIP:
+		for (i = 0; i < WPF_CTRL_MAX; ++i) {
+			if (wpf->flip.ctrls[i])
+				flip |= wpf->flip.ctrls[i]->val ? BIT(i) : 0;
+		}
+
+		spin_lock_irq(&wpf->flip.lock);
+		wpf->flip.pending = flip;
+		spin_unlock_irq(&wpf->flip.lock);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct v4l2_ctrl_ops vsp1_wpf_ctrl_ops = {
+	.s_ctrl = vsp1_wpf_s_ctrl,
+};
+
+static int wpf_init_controls(struct vsp1_rwpf *wpf)
+{
+	struct vsp1_device *vsp1 = wpf->entity.vsp1;
+	unsigned int num_flip_ctrls;
+
+	spin_lock_init(&wpf->flip.lock);
+
+	if (wpf->entity.index != 0) {
+		/* Only WPF0 supports flipping. */
+		num_flip_ctrls = 0;
+	} else if (vsp1->info->features & VSP1_HAS_WPF_HFLIP) {
+		/* When horizontal flip is supported the WPF implements two
+		 * controls (horizontal flip and vertical flip).
+		 */
+		num_flip_ctrls = 2;
+	} else if (vsp1->info->features & VSP1_HAS_WPF_VFLIP) {
+		/* When only vertical flip is supported the WPF implements a
+		 * single control (vertical flip).
+		 */
+		num_flip_ctrls = 1;
+	} else {
+		/* Otherwise flipping is not supported. */
+		num_flip_ctrls = 0;
+	}
+
+	vsp1_rwpf_init_ctrls(wpf, num_flip_ctrls);
+
+	if (num_flip_ctrls >= 1) {
+		wpf->flip.ctrls[WPF_CTRL_VFLIP] =
+			v4l2_ctrl_new_std(&wpf->ctrls, &vsp1_wpf_ctrl_ops,
+					  V4L2_CID_VFLIP, 0, 1, 1, 0);
+	}
+
+	if (num_flip_ctrls == 2) {
+		wpf->flip.ctrls[WPF_CTRL_HFLIP] =
+			v4l2_ctrl_new_std(&wpf->ctrls, &vsp1_wpf_ctrl_ops,
+					  V4L2_CID_HFLIP, 0, 1, 1, 0);
+
+		v4l2_ctrl_cluster(2, wpf->flip.ctrls);
+	}
+
+	if (wpf->ctrls.error) {
+		dev_err(vsp1->dev, "wpf%u: failed to initialize controls\n",
+			wpf->entity.index);
+		return wpf->ctrls.error;
+	}
+
+	return 0;
+}
+
 /* -----------------------------------------------------------------------------
  * V4L2 Subdevice Core Operations
  */
@@ -62,11 +153,11 @@ static int wpf_s_stream(struct v4l2_subdev *subdev, int enable)
  * V4L2 Subdevice Operations
  */
 
-static struct v4l2_subdev_video_ops wpf_video_ops = {
+static const struct v4l2_subdev_video_ops wpf_video_ops = {
 	.s_stream = wpf_s_stream,
 };
 
-static struct v4l2_subdev_ops wpf_ops = {
+static const struct v4l2_subdev_ops wpf_ops = {
 	.video	= &wpf_video_ops,
 	.pad    = &vsp1_rwpf_pad_ops,
 };
@@ -85,15 +176,37 @@ static void vsp1_wpf_destroy(struct vsp1_entity *entity)
 static void wpf_set_memory(struct vsp1_entity *entity, struct vsp1_dl_list *dl)
 {
 	struct vsp1_rwpf *wpf = entity_to_rwpf(entity);
+	const struct v4l2_pix_format_mplane *format = &wpf->format;
+	struct vsp1_rwpf_memory mem = wpf->mem;
+	unsigned int flip = wpf->flip.active;
+	unsigned int offset;
+
+	/* Update the memory offsets based on flipping configuration. The
+	 * destination addresses point to the locations where the VSP starts
+	 * writing to memory, which can be different corners of the image
+	 * depending on vertical flipping. Horizontal flipping is handled
+	 * through a line buffer and doesn't modify the start address.
+	 */
+	if (flip & BIT(WPF_CTRL_VFLIP)) {
+		mem.addr[0] += (format->height - 1)
+			     * format->plane_fmt[0].bytesperline;
+
+		if (format->num_planes > 1) {
+			offset = (format->height / wpf->fmtinfo->vsub - 1)
+			       * format->plane_fmt[1].bytesperline;
+			mem.addr[1] += offset;
+			mem.addr[2] += offset;
+		}
+	}
 
-	vsp1_wpf_write(wpf, dl, VI6_WPF_DSTM_ADDR_Y, wpf->mem.addr[0]);
-	vsp1_wpf_write(wpf, dl, VI6_WPF_DSTM_ADDR_C0, wpf->mem.addr[1]);
-	vsp1_wpf_write(wpf, dl, VI6_WPF_DSTM_ADDR_C1, wpf->mem.addr[2]);
+	vsp1_wpf_write(wpf, dl, VI6_WPF_DSTM_ADDR_Y, mem.addr[0]);
+	vsp1_wpf_write(wpf, dl, VI6_WPF_DSTM_ADDR_C0, mem.addr[1]);
+	vsp1_wpf_write(wpf, dl, VI6_WPF_DSTM_ADDR_C1, mem.addr[2]);
 }
 
 static void wpf_configure(struct vsp1_entity *entity,
 			  struct vsp1_pipeline *pipe,
-			  struct vsp1_dl_list *dl)
+			  struct vsp1_dl_list *dl, bool full)
 {
 	struct vsp1_rwpf *wpf = to_rwpf(&entity->subdev);
 	struct vsp1_device *vsp1 = wpf->entity.vsp1;
@@ -104,6 +217,26 @@ static void wpf_configure(struct vsp1_entity *entity,
 	u32 outfmt = 0;
 	u32 srcrpf = 0;
 
+	if (!full) {
+		const unsigned int mask = BIT(WPF_CTRL_VFLIP)
+					| BIT(WPF_CTRL_HFLIP);
+
+		spin_lock(&wpf->flip.lock);
+		wpf->flip.active = (wpf->flip.active & ~mask)
+				 | (wpf->flip.pending & mask);
+		spin_unlock(&wpf->flip.lock);
+
+		outfmt = (wpf->alpha << VI6_WPF_OUTFMT_PDV_SHIFT) | wpf->outfmt;
+
+		if (wpf->flip.active & BIT(WPF_CTRL_VFLIP))
+			outfmt |= VI6_WPF_OUTFMT_FLP;
+		if (wpf->flip.active & BIT(WPF_CTRL_HFLIP))
+			outfmt |= VI6_WPF_OUTFMT_HFLP;
+
+		vsp1_wpf_write(wpf, dl, VI6_WPF_OUTFMT, outfmt);
+		return;
+	}
+
 	/* Cropping */
 	crop = vsp1_rwpf_get_crop(wpf, wpf->entity.config);
 
@@ -143,13 +276,18 @@ static void wpf_configure(struct vsp1_entity *entity,
 				       format->plane_fmt[1].bytesperline);
 
 		vsp1_wpf_write(wpf, dl, VI6_WPF_DSWAP, fmtinfo->swap);
+
+		if (vsp1->info->features & VSP1_HAS_WPF_HFLIP &&
+		    wpf->entity.index == 0)
+			vsp1_wpf_write(wpf, dl, VI6_WPF_ROT_CTRL,
+				       VI6_WPF_ROT_CTRL_LN16 |
+				       (256 << VI6_WPF_ROT_CTRL_LMEM_WD_SHIFT));
 	}
 
 	if (sink_format->code != source_format->code)
 		outfmt |= VI6_WPF_OUTFMT_CSC;
 
-	outfmt |= wpf->alpha << VI6_WPF_OUTFMT_PDV_SHIFT;
-	vsp1_wpf_write(wpf, dl, VI6_WPF_OUTFMT, outfmt);
+	wpf->outfmt = outfmt;
 
 	vsp1_dl_list_write(dl, VI6_DPR_WPF_FPORCH(wpf->entity.index),
 			   VI6_DPR_WPF_FPORCH_FP_WPFN);
@@ -216,7 +354,8 @@ struct vsp1_rwpf *vsp1_wpf_create(struct vsp1_device *vsp1, unsigned int index)
 	wpf->entity.index = index;
 
 	sprintf(name, "wpf.%u", index);
-	ret = vsp1_entity_init(vsp1, &wpf->entity, name, 2, &wpf_ops);
+	ret = vsp1_entity_init(vsp1, &wpf->entity, name, 2, &wpf_ops,
+			       MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
@@ -228,13 +367,15 @@ struct vsp1_rwpf *vsp1_wpf_create(struct vsp1_device *vsp1, unsigned int index)
 	}
 
 	/* Initialize the control handler. */
-	ret = vsp1_rwpf_init_ctrls(wpf);
+	ret = wpf_init_controls(wpf);
 	if (ret < 0) {
 		dev_err(vsp1->dev, "wpf%u: failed to initialize controls\n",
 			index);
 		goto error;
 	}
 
+	v4l2_ctrl_handler_setup(&wpf->ctrls);
+
 	return wpf;
 
 error:
diff --git a/drivers/media/platform/xilinx/xilinx-dma.c b/drivers/media/platform/xilinx/xilinx-dma.c
index 7f6898b13cac..7ae1a134b1ff 100644
--- a/drivers/media/platform/xilinx/xilinx-dma.c
+++ b/drivers/media/platform/xilinx/xilinx-dma.c
@@ -318,11 +318,10 @@ static void xvip_dma_complete(void *param)
 static int
 xvip_dma_queue_setup(struct vb2_queue *vq,
 		     unsigned int *nbuffers, unsigned int *nplanes,
-		     unsigned int sizes[], void *alloc_ctxs[])
+		     unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct xvip_dma *dma = vb2_get_drv_priv(vq);
 
-	alloc_ctxs[0] = dma->alloc_ctx;
 	/* Make sure the image size is large enough. */
 	if (*nplanes)
 		return sizes[0] < dma->format.sizeimage ? -EINVAL : 0;
@@ -706,12 +705,6 @@ int xvip_dma_init(struct xvip_composite_device *xdev, struct xvip_dma *dma,
 	video_set_drvdata(&dma->video, dma);
 
 	/* ... and the buffers queue... */
-	dma->alloc_ctx = vb2_dma_contig_init_ctx(dma->xdev->dev);
-	if (IS_ERR(dma->alloc_ctx)) {
-		ret = PTR_ERR(dma->alloc_ctx);
-		goto error;
-	}
-
 	/* Don't enable VB2_READ and VB2_WRITE, as using the read() and write()
 	 * V4L2 APIs would be inefficient. Testing on the command line with a
 	 * 'cat /dev/video?' thus won't be possible, but given that the driver
@@ -728,6 +721,7 @@ int xvip_dma_init(struct xvip_composite_device *xdev, struct xvip_dma *dma,
 	dma->queue.mem_ops = &vb2_dma_contig_memops;
 	dma->queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC
 				   | V4L2_BUF_FLAG_TSTAMP_SRC_EOF;
+	dma->queue.dev = dma->xdev->dev;
 	ret = vb2_queue_init(&dma->queue);
 	if (ret < 0) {
 		dev_err(dma->xdev->dev, "failed to initialize VB2 queue\n");
@@ -766,9 +760,6 @@ void xvip_dma_cleanup(struct xvip_dma *dma)
 	if (dma->dma)
 		dma_release_channel(dma->dma);
 
-	if (!IS_ERR_OR_NULL(dma->alloc_ctx))
-		vb2_dma_contig_cleanup_ctx(dma->alloc_ctx);
-
 	media_entity_cleanup(&dma->video.entity);
 
 	mutex_destroy(&dma->lock);
diff --git a/drivers/media/platform/xilinx/xilinx-dma.h b/drivers/media/platform/xilinx/xilinx-dma.h
index 7a1621a2ef40..e95d136c153a 100644
--- a/drivers/media/platform/xilinx/xilinx-dma.h
+++ b/drivers/media/platform/xilinx/xilinx-dma.h
@@ -65,7 +65,6 @@ static inline struct xvip_pipeline *to_xvip_pipeline(struct media_entity *e)
  * @format: active V4L2 pixel format
  * @fmtinfo: format information corresponding to the active @format
  * @queue: vb2 buffers queue
- * @alloc_ctx: allocation context for the vb2 @queue
  * @sequence: V4L2 buffers sequence number
  * @queued_bufs: list of queued buffers
  * @queued_lock: protects the buf_queued list
@@ -88,7 +87,6 @@ struct xvip_dma {
 	const struct xvip_video_format *fmtinfo;
 
 	struct vb2_queue queue;
-	void *alloc_ctx;
 	unsigned int sequence;
 
 	struct list_head queued_bufs;
diff --git a/drivers/media/radio/radio-aztech.c b/drivers/media/radio/radio-aztech.c
index 705dd6f9162c..f445327f282d 100644
--- a/drivers/media/radio/radio-aztech.c
+++ b/drivers/media/radio/radio-aztech.c
@@ -43,7 +43,6 @@ MODULE_VERSION("1.0.0");
 static int io[AZTECH_MAX] = { [0] = CONFIG_RADIO_AZTECH_PORT,
 			      [1 ... (AZTECH_MAX - 1)] = -1 };
 static int radio_nr[AZTECH_MAX]	= { [0 ... (AZTECH_MAX - 1)] = -1 };
-static const int radio_wait_time = 1000;
 
 module_param_array(io, int, NULL, 0444);
 MODULE_PARM_DESC(io, "I/O addresses of the Aztech card (0x350 or 0x358)");
diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c
index 70fd8e80198a..8253f79d5d75 100644
--- a/drivers/media/radio/radio-maxiradio.c
+++ b/drivers/media/radio/radio-maxiradio.c
@@ -183,6 +183,7 @@ static void maxiradio_remove(struct pci_dev *pdev)
 	outb(0, dev->io);
 	v4l2_device_unregister(v4l2_dev);
 	release_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+	kfree(dev);
 }
 
 static struct pci_device_id maxiradio_pci_tbl[] = {
diff --git a/drivers/media/radio/wl128x/fmdrv_common.c b/drivers/media/radio/wl128x/fmdrv_common.c
index 3f9e6df7d837..642b89c66bcb 100644
--- a/drivers/media/radio/wl128x/fmdrv_common.c
+++ b/drivers/media/radio/wl128x/fmdrv_common.c
@@ -1472,7 +1472,7 @@ static long fm_st_receive(void *arg, struct sk_buff *skb)
  * Called by ST layer to indicate protocol registration completion
  * status.
  */
-static void fm_st_reg_comp_cb(void *arg, char data)
+static void fm_st_reg_comp_cb(void *arg, int data)
 {
 	struct fmdev *fmdev;
 
diff --git a/drivers/media/rc/ene_ir.c b/drivers/media/rc/ene_ir.c
index 8d77e1c4a141..d1c61cd035f6 100644
--- a/drivers/media/rc/ene_ir.c
+++ b/drivers/media/rc/ene_ir.c
@@ -904,7 +904,7 @@ static int ene_set_tx_carrier(struct rc_dev *rdev, u32 carrier)
 
 		dbg("TX: out of range %d-%d kHz carrier",
 			2000 / ENE_CIRMOD_PRD_MIN, 2000 / ENE_CIRMOD_PRD_MAX);
-		return -1;
+		return -EINVAL;
 	}
 
 	dev->tx_period = period;
diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c
index ee60e17fba05..5f634545ddd8 100644
--- a/drivers/media/rc/iguanair.c
+++ b/drivers/media/rc/iguanair.c
@@ -330,7 +330,7 @@ static int iguanair_set_tx_carrier(struct rc_dev *dev, uint32_t carrier)
 
 	mutex_unlock(&ir->lock);
 
-	return carrier;
+	return 0;
 }
 
 static int iguanair_set_tx_mask(struct rc_dev *dev, uint32_t mask)
diff --git a/drivers/media/rc/ir-lirc-codec.c b/drivers/media/rc/ir-lirc-codec.c
index 5effc65d2947..c3277308a70b 100644
--- a/drivers/media/rc/ir-lirc-codec.c
+++ b/drivers/media/rc/ir-lirc-codec.c
@@ -292,7 +292,10 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 		    tmp > dev->max_timeout)
 				return -EINVAL;
 
-		dev->timeout = tmp;
+		if (dev->s_timeout)
+			ret = dev->s_timeout(dev, tmp);
+		if (!ret)
+			dev->timeout = tmp;
 		break;
 
 	case LIRC_SET_REC_TIMEOUT_REPORTS:
diff --git a/drivers/media/rc/ir-rc5-decoder.c b/drivers/media/rc/ir-rc5-decoder.c
index 6ffe776abf6b..a0fd4e6b2155 100644
--- a/drivers/media/rc/ir-rc5-decoder.c
+++ b/drivers/media/rc/ir-rc5-decoder.c
@@ -29,7 +29,7 @@
 #define RC5_BIT_START		(1 * RC5_UNIT)
 #define RC5_BIT_END		(1 * RC5_UNIT)
 #define RC5X_SPACE		(4 * RC5_UNIT)
-#define RC5_TRAILER		(10 * RC5_UNIT) /* In reality, approx 100 */
+#define RC5_TRAILER		(6 * RC5_UNIT) /* In reality, approx 100 */
 
 enum rc5_state {
 	STATE_INACTIVE,
diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index fbbd3bbcd252..d7b13fae1267 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-behold.o \
 			rc-behold-columbus.o \
 			rc-budget-ci-old.o \
+			rc-cec.o \
 			rc-cinergy-1400.o \
 			rc-cinergy.o \
 			rc-delock-61959.o \
@@ -28,6 +29,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-dm1105-nec.o \
 			rc-dntv-live-dvb-t.o \
 			rc-dntv-live-dvbt-pro.o \
+			rc-dtt200u.o \
 			rc-dvbsky.o \
 			rc-em-terratec.o \
 			rc-encore-enltv2.o \
diff --git a/drivers/media/rc/keymaps/rc-cec.c b/drivers/media/rc/keymaps/rc-cec.c
new file mode 100644
index 000000000000..354c8e724b8e
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-cec.c
@@ -0,0 +1,182 @@
+/* Keytable for the CEC remote control
+ *
+ * Copyright (c) 2015 by Kamil Debski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+/*
+ * CEC Spec "High-Definition Multimedia Interface Specification" can be obtained
+ * here: http://xtreamerdev.googlecode.com/files/CEC_Specs.pdf
+ * The list of control codes is listed in Table 27: User Control Codes p. 95
+ */
+
+static struct rc_map_table cec[] = {
+	{ 0x00, KEY_OK },
+	{ 0x01, KEY_UP },
+	{ 0x02, KEY_DOWN },
+	{ 0x03, KEY_LEFT },
+	{ 0x04, KEY_RIGHT },
+	{ 0x05, KEY_RIGHT_UP },
+	{ 0x06, KEY_RIGHT_DOWN },
+	{ 0x07, KEY_LEFT_UP },
+	{ 0x08, KEY_LEFT_DOWN },
+	{ 0x09, KEY_ROOT_MENU }, /* CEC Spec: Device Root Menu - see Note 2 */
+	/*
+	 * Note 2: This is the initial display that a device shows. It is
+	 * device-dependent and can be, for example, a contents menu, setup
+	 * menu, favorite menu or other menu. The actual menu displayed
+	 * may also depend on the device's current state.
+	 */
+	{ 0x0a, KEY_SETUP },
+	{ 0x0b, KEY_MENU }, /* CEC Spec: Contents Menu */
+	{ 0x0c, KEY_FAVORITES }, /* CEC Spec: Favorite Menu */
+	{ 0x0d, KEY_EXIT },
+	/* 0x0e-0x0f: Reserved */
+	{ 0x10, KEY_MEDIA_TOP_MENU },
+	{ 0x11, KEY_CONTEXT_MENU },
+	/* 0x12-0x1c: Reserved */
+	{ 0x1d, KEY_DIGITS }, /* CEC Spec: select/toggle a Number Entry Mode */
+	{ 0x1e, KEY_NUMERIC_11 },
+	{ 0x1f, KEY_NUMERIC_12 },
+	/* 0x20-0x29: Keys 0 to 9 */
+	{ 0x20, KEY_NUMERIC_0 },
+	{ 0x21, KEY_NUMERIC_1 },
+	{ 0x22, KEY_NUMERIC_2 },
+	{ 0x23, KEY_NUMERIC_3 },
+	{ 0x24, KEY_NUMERIC_4 },
+	{ 0x25, KEY_NUMERIC_5 },
+	{ 0x26, KEY_NUMERIC_6 },
+	{ 0x27, KEY_NUMERIC_7 },
+	{ 0x28, KEY_NUMERIC_8 },
+	{ 0x29, KEY_NUMERIC_9 },
+	{ 0x2a, KEY_DOT },
+	{ 0x2b, KEY_ENTER },
+	{ 0x2c, KEY_CLEAR },
+	/* 0x2d-0x2e: Reserved */
+	{ 0x2f, KEY_NEXT_FAVORITE }, /* CEC Spec: Next Favorite */
+	{ 0x30, KEY_CHANNELUP },
+	{ 0x31, KEY_CHANNELDOWN },
+	{ 0x32, KEY_PREVIOUS }, /* CEC Spec: Previous Channel */
+	{ 0x33, KEY_SOUND }, /* CEC Spec: Sound Select */
+	{ 0x34, KEY_VIDEO }, /* 0x34: CEC Spec: Input Select */
+	{ 0x35, KEY_INFO }, /* CEC Spec: Display Information */
+	{ 0x36, KEY_HELP },
+	{ 0x37, KEY_PAGEUP },
+	{ 0x38, KEY_PAGEDOWN },
+	/* 0x39-0x3f: Reserved */
+	{ 0x40, KEY_POWER },
+	{ 0x41, KEY_VOLUMEUP },
+	{ 0x42, KEY_VOLUMEDOWN },
+	{ 0x43, KEY_MUTE },
+	{ 0x44, KEY_PLAYCD },
+	{ 0x45, KEY_STOPCD },
+	{ 0x46, KEY_PAUSECD },
+	{ 0x47, KEY_RECORD },
+	{ 0x48, KEY_REWIND },
+	{ 0x49, KEY_FASTFORWARD },
+	{ 0x4a, KEY_EJECTCD }, /* CEC Spec: Eject */
+	{ 0x4b, KEY_FORWARD },
+	{ 0x4c, KEY_BACK },
+	{ 0x4d, KEY_STOP_RECORD }, /* CEC Spec: Stop-Record */
+	{ 0x4e, KEY_PAUSE_RECORD }, /* CEC Spec: Pause-Record */
+	/* 0x4f: Reserved */
+	{ 0x50, KEY_ANGLE },
+	{ 0x51, KEY_TV2 },
+	{ 0x52, KEY_VOD }, /* CEC Spec: Video on Demand */
+	{ 0x53, KEY_EPG },
+	{ 0x54, KEY_TIME }, /* CEC Spec: Timer */
+	{ 0x55, KEY_CONFIG },
+	/*
+	 * The following codes are hard to implement at this moment, as they
+	 * carry an additional additional argument. Most likely changes to RC
+	 * framework are necessary.
+	 * For now they are interpreted by the CEC framework as non keycodes
+	 * and are passed as messages enabling user application to parse them.
+	 */
+	/* 0x56: CEC Spec: Select Broadcast Type */
+	/* 0x57: CEC Spec: Select Sound presentation */
+	{ 0x58, KEY_AUDIO_DESC }, /* CEC 2.0 and up */
+	{ 0x59, KEY_WWW }, /* CEC 2.0 and up */
+	{ 0x5a, KEY_3D_MODE }, /* CEC 2.0 and up */
+	/* 0x5b-0x5f: Reserved */
+	{ 0x60, KEY_PLAYCD }, /* CEC Spec: Play Function */
+	{ 0x6005, KEY_FASTFORWARD },
+	{ 0x6006, KEY_FASTFORWARD },
+	{ 0x6007, KEY_FASTFORWARD },
+	{ 0x6015, KEY_SLOW },
+	{ 0x6016, KEY_SLOW },
+	{ 0x6017, KEY_SLOW },
+	{ 0x6009, KEY_FASTREVERSE },
+	{ 0x600a, KEY_FASTREVERSE },
+	{ 0x600b, KEY_FASTREVERSE },
+	{ 0x6019, KEY_SLOWREVERSE },
+	{ 0x601a, KEY_SLOWREVERSE },
+	{ 0x601b, KEY_SLOWREVERSE },
+	{ 0x6020, KEY_REWIND },
+	{ 0x6024, KEY_PLAYCD },
+	{ 0x6025, KEY_PAUSECD },
+	{ 0x61, KEY_PLAYPAUSE }, /* CEC Spec: Pause-Play Function */
+	{ 0x62, KEY_RECORD }, /* Spec: Record Function */
+	{ 0x63, KEY_PAUSE_RECORD }, /* CEC Spec: Pause-Record Function */
+	{ 0x64, KEY_STOPCD }, /* CEC Spec: Stop Function */
+	{ 0x65, KEY_MUTE }, /* CEC Spec: Mute Function */
+	{ 0x66, KEY_UNMUTE }, /* CEC Spec: Restore the volume */
+	/*
+	 * The following codes are hard to implement at this moment, as they
+	 * carry an additional additional argument. Most likely changes to RC
+	 * framework are necessary.
+	 * For now they are interpreted by the CEC framework as non keycodes
+	 * and are passed as messages enabling user application to parse them.
+	 */
+	/* 0x67: CEC Spec: Tune Function */
+	/* 0x68: CEC Spec: Seleect Media Function */
+	/* 0x69: CEC Spec: Select A/V Input Function */
+	/* 0x6a: CEC Spec: Select Audio Input Function */
+	{ 0x6b, KEY_POWER }, /* CEC Spec: Power Toggle Function */
+	{ 0x6c, KEY_SLEEP }, /* CEC Spec: Power Off Function */
+	{ 0x6d, KEY_WAKEUP }, /* CEC Spec: Power On Function */
+	/* 0x6e-0x70: Reserved */
+	{ 0x71, KEY_BLUE }, /* CEC Spec: F1 (Blue) */
+	{ 0x72, KEY_RED }, /* CEC Spec: F2 (Red) */
+	{ 0x73, KEY_GREEN }, /* CEC Spec: F3 (Green) */
+	{ 0x74, KEY_YELLOW }, /* CEC Spec: F4 (Yellow) */
+	{ 0x75, KEY_F5 },
+	{ 0x76, KEY_DATA }, /* CEC Spec: Data - see Note 3 */
+	/*
+	 * Note 3: This is used, for example, to enter or leave a digital TV
+	 * data broadcast application.
+	 */
+	/* 0x77-0xff: Reserved */
+};
+
+static struct rc_map_list cec_map = {
+	.map = {
+		.scan		= cec,
+		.size		= ARRAY_SIZE(cec),
+		.rc_type	= RC_TYPE_CEC,
+		.name		= RC_MAP_CEC,
+	}
+};
+
+static int __init init_rc_map_cec(void)
+{
+	return rc_map_register(&cec_map);
+}
+
+static void __exit exit_rc_map_cec(void)
+{
+	rc_map_unregister(&cec_map);
+}
+
+module_init(init_rc_map_cec);
+module_exit(exit_rc_map_cec);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kamil Debski");
diff --git a/drivers/media/rc/keymaps/rc-dtt200u.c b/drivers/media/rc/keymaps/rc-dtt200u.c
new file mode 100644
index 000000000000..25650e9e4664
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-dtt200u.c
@@ -0,0 +1,59 @@
+/* Keytable for Wideview WT-220U.
+ *
+ * Copyright (c) 2016 Jonathan McDowell <noodles@earth.li>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+/* key list for the tiny remote control (Yakumo, don't know about the others) */
+static struct rc_map_table dtt200u_table[] = {
+	{ 0x8001, KEY_MUTE },
+	{ 0x8002, KEY_CHANNELDOWN },
+	{ 0x8003, KEY_VOLUMEDOWN },
+	{ 0x8004, KEY_1 },
+	{ 0x8005, KEY_2 },
+	{ 0x8006, KEY_3 },
+	{ 0x8007, KEY_4 },
+	{ 0x8008, KEY_5 },
+	{ 0x8009, KEY_6 },
+	{ 0x800a, KEY_7 },
+	{ 0x800c, KEY_ZOOM },
+	{ 0x800d, KEY_0 },
+	{ 0x800e, KEY_SELECT },
+	{ 0x8012, KEY_POWER },
+	{ 0x801a, KEY_CHANNELUP },
+	{ 0x801b, KEY_8 },
+	{ 0x801e, KEY_VOLUMEUP },
+	{ 0x801f, KEY_9 },
+};
+
+static struct rc_map_list dtt200u_map = {
+	.map = {
+		.scan    = dtt200u_table,
+		.size    = ARRAY_SIZE(dtt200u_table),
+		.rc_type = RC_TYPE_NEC,
+		.name    = RC_MAP_DTT200U,
+	}
+};
+
+static int __init init_rc_map_dtt200u(void)
+{
+	return rc_map_register(&dtt200u_map);
+}
+
+static void __exit exit_rc_map_dtt200u(void)
+{
+	rc_map_unregister(&dtt200u_map);
+}
+
+module_init(init_rc_map_dtt200u)
+module_exit(exit_rc_map_dtt200u)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jonathan McDowell <noodles@earth.li>");
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index 92ae1903c010..91f9bb87ce68 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -19,6 +19,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -80,8 +82,6 @@ static void lirc_irctl_init(struct irctl *ir)
 
 static void lirc_irctl_cleanup(struct irctl *ir)
 {
-	dev_dbg(ir->d.dev, LOGHEAD "cleaning up\n", ir->d.name, ir->d.minor);
-
 	device_destroy(lirc_class, MKDEV(MAJOR(lirc_base_dev), ir->d.minor));
 
 	if (ir->buf != ir->d.rbuf) {
@@ -97,28 +97,25 @@ static void lirc_irctl_cleanup(struct irctl *ir)
  */
 static int lirc_add_to_buf(struct irctl *ir)
 {
-	if (ir->d.add_to_buf) {
-		int res = -ENODATA;
-		int got_data = 0;
+	int res;
+	int got_data = -1;
 
-		/*
-		 * service the device as long as it is returning
-		 * data and we have space
-		 */
-get_data:
-		res = ir->d.add_to_buf(ir->d.data, ir->buf);
-		if (res == 0) {
-			got_data++;
-			goto get_data;
-		}
+	if (!ir->d.add_to_buf)
+		return 0;
 
-		if (res == -ENODEV)
-			kthread_stop(ir->task);
+	/*
+	 * service the device as long as it is returning
+	 * data and we have space
+	 */
+	do {
+		got_data++;
+		res = ir->d.add_to_buf(ir->d.data, ir->buf);
+	} while (!res);
 
-		return got_data ? 0 : res;
-	}
+	if (res == -ENODEV)
+		kthread_stop(ir->task);
 
-	return 0;
+	return got_data ? 0 : res;
 }
 
 /* main function of the polling thread
@@ -127,9 +124,6 @@ static int lirc_thread(void *irctl)
 {
 	struct irctl *ir = irctl;
 
-	dev_dbg(ir->d.dev, LOGHEAD "poll thread started\n",
-		ir->d.name, ir->d.minor);
-
 	do {
 		if (ir->open) {
 			if (ir->jiffies_to_wait) {
@@ -146,9 +140,6 @@ static int lirc_thread(void *irctl)
 		}
 	} while (!kthread_should_stop());
 
-	dev_dbg(ir->d.dev, LOGHEAD "poll thread ended\n",
-		ir->d.name, ir->d.minor);
-
 	return 0;
 }
 
@@ -203,74 +194,86 @@ err_out:
 	return retval;
 }
 
-int lirc_register_driver(struct lirc_driver *d)
+static int lirc_allocate_buffer(struct irctl *ir)
 {
-	struct irctl *ir;
-	int minor;
+	int err = 0;
 	int bytes_in_key;
 	unsigned int chunk_size;
 	unsigned int buffer_size;
+	struct lirc_driver *d = &ir->d;
+
+	mutex_lock(&lirc_dev_lock);
+
+	bytes_in_key = BITS_TO_LONGS(d->code_length) +
+						(d->code_length % 8 ? 1 : 0);
+	buffer_size = d->buffer_size ? d->buffer_size : BUFLEN / bytes_in_key;
+	chunk_size  = d->chunk_size  ? d->chunk_size  : bytes_in_key;
+
+	if (d->rbuf) {
+		ir->buf = d->rbuf;
+	} else {
+		ir->buf = kmalloc(sizeof(struct lirc_buffer), GFP_KERNEL);
+		if (!ir->buf) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = lirc_buffer_init(ir->buf, chunk_size, buffer_size);
+		if (err) {
+			kfree(ir->buf);
+			goto out;
+		}
+	}
+	ir->chunk_size = ir->buf->chunk_size;
+
+out:
+	mutex_unlock(&lirc_dev_lock);
+
+	return err;
+}
+
+static int lirc_allocate_driver(struct lirc_driver *d)
+{
+	struct irctl *ir;
+	int minor;
 	int err;
 
 	if (!d) {
-		printk(KERN_ERR "lirc_dev: lirc_register_driver: "
-		       "driver pointer must be not NULL!\n");
-		err = -EBADRQC;
-		goto out;
+		pr_err("driver pointer must be not NULL!\n");
+		return -EBADRQC;
 	}
 
 	if (!d->dev) {
-		printk(KERN_ERR "%s: dev pointer not filled in!\n", __func__);
-		err = -EINVAL;
-		goto out;
+		pr_err("dev pointer not filled in!\n");
+		return -EINVAL;
 	}
 
-	if (MAX_IRCTL_DEVICES <= d->minor) {
-		dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-			"\"minor\" must be between 0 and %d (%d)!\n",
-			MAX_IRCTL_DEVICES - 1, d->minor);
-		err = -EBADRQC;
-		goto out;
+	if (d->minor >= MAX_IRCTL_DEVICES) {
+		dev_err(d->dev, "minor must be between 0 and %d!\n",
+						MAX_IRCTL_DEVICES - 1);
+		return -EBADRQC;
 	}
 
-	if (1 > d->code_length || (BUFLEN * 8) < d->code_length) {
-		dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-			"code length in bits for minor (%d) "
-			"must be less than %d!\n",
-			d->minor, BUFLEN * 8);
-		err = -EBADRQC;
-		goto out;
+	if (d->code_length < 1 || d->code_length > (BUFLEN * 8)) {
+		dev_err(d->dev, "code length must be less than %d bits\n",
+								BUFLEN * 8);
+		return -EBADRQC;
 	}
 
-	dev_dbg(d->dev, "lirc_dev: lirc_register_driver: sample_rate: %d\n",
-		d->sample_rate);
 	if (d->sample_rate) {
 		if (2 > d->sample_rate || HZ < d->sample_rate) {
-			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-				"sample_rate must be between 2 and %d!\n", HZ);
-			err = -EBADRQC;
-			goto out;
+			dev_err(d->dev, "invalid %d sample rate\n",
+							d->sample_rate);
+			return -EBADRQC;
 		}
 		if (!d->add_to_buf) {
-			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-				"add_to_buf cannot be NULL when "
-				"sample_rate is set\n");
-			err = -EBADRQC;
-			goto out;
-		}
-	} else if (!(d->fops && d->fops->read) && !d->rbuf) {
-		dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-			"fops->read and rbuf cannot all be NULL!\n");
-		err = -EBADRQC;
-		goto out;
-	} else if (!d->rbuf) {
-		if (!(d->fops && d->fops->read && d->fops->poll &&
-		      d->fops->unlocked_ioctl)) {
-			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-				"neither read, poll nor unlocked_ioctl can be NULL!\n");
-			err = -EBADRQC;
-			goto out;
+			dev_err(d->dev, "add_to_buf not set\n");
+			return -EBADRQC;
 		}
+	} else if (!d->rbuf && !(d->fops && d->fops->read &&
+				d->fops->poll && d->fops->unlocked_ioctl)) {
+		dev_err(d->dev, "undefined read, poll, ioctl\n");
+		return -EBADRQC;
 	}
 
 	mutex_lock(&lirc_dev_lock);
@@ -282,15 +285,13 @@ int lirc_register_driver(struct lirc_driver *d)
 		for (minor = 0; minor < MAX_IRCTL_DEVICES; minor++)
 			if (!irctls[minor])
 				break;
-		if (MAX_IRCTL_DEVICES == minor) {
-			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-				"no free slots for drivers!\n");
+		if (minor == MAX_IRCTL_DEVICES) {
+			dev_err(d->dev, "no free slots for drivers!\n");
 			err = -ENOMEM;
 			goto out_lock;
 		}
 	} else if (irctls[minor]) {
-		dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-			"minor (%d) just registered!\n", minor);
+		dev_err(d->dev, "minor (%d) just registered!\n", minor);
 		err = -EBUSY;
 		goto out_lock;
 	}
@@ -304,37 +305,9 @@ int lirc_register_driver(struct lirc_driver *d)
 	irctls[minor] = ir;
 	d->minor = minor;
 
-	if (d->sample_rate) {
-		ir->jiffies_to_wait = HZ / d->sample_rate;
-	} else {
-		/* it means - wait for external event in task queue */
-		ir->jiffies_to_wait = 0;
-	}
-
 	/* some safety check 8-) */
 	d->name[sizeof(d->name)-1] = '\0';
 
-	bytes_in_key = BITS_TO_LONGS(d->code_length) +
-			(d->code_length % 8 ? 1 : 0);
-	buffer_size = d->buffer_size ? d->buffer_size : BUFLEN / bytes_in_key;
-	chunk_size  = d->chunk_size  ? d->chunk_size  : bytes_in_key;
-
-	if (d->rbuf) {
-		ir->buf = d->rbuf;
-	} else {
-		ir->buf = kmalloc(sizeof(struct lirc_buffer), GFP_KERNEL);
-		if (!ir->buf) {
-			err = -ENOMEM;
-			goto out_lock;
-		}
-		err = lirc_buffer_init(ir->buf, chunk_size, buffer_size);
-		if (err) {
-			kfree(ir->buf);
-			goto out_lock;
-		}
-	}
-	ir->chunk_size = ir->buf->chunk_size;
-
 	if (d->features == 0)
 		d->features = LIRC_CAN_REC_LIRCCODE;
 
@@ -345,15 +318,19 @@ int lirc_register_driver(struct lirc_driver *d)
 		      "lirc%u", ir->d.minor);
 
 	if (d->sample_rate) {
+		ir->jiffies_to_wait = HZ / d->sample_rate;
+
 		/* try to fire up polling thread */
 		ir->task = kthread_run(lirc_thread, (void *)ir, "lirc_dev");
 		if (IS_ERR(ir->task)) {
-			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
-				"cannot run poll thread for minor = %d\n",
-				d->minor);
+			dev_err(d->dev, "cannot run thread for minor = %d\n",
+								d->minor);
 			err = -ECHILD;
 			goto out_sysfs;
 		}
+	} else {
+		/* it means - wait for external event in task queue */
+		ir->jiffies_to_wait = 0;
 	}
 
 	err = lirc_cdev_add(ir);
@@ -371,9 +348,26 @@ out_sysfs:
 	device_destroy(lirc_class, MKDEV(MAJOR(lirc_base_dev), ir->d.minor));
 out_lock:
 	mutex_unlock(&lirc_dev_lock);
-out:
+
 	return err;
 }
+
+int lirc_register_driver(struct lirc_driver *d)
+{
+	int minor, err = 0;
+
+	minor = lirc_allocate_driver(d);
+	if (minor < 0)
+		return minor;
+
+	if (LIRC_CAN_REC(d->features)) {
+		err = lirc_allocate_buffer(irctls[minor]);
+		if (err)
+			lirc_unregister_driver(minor);
+	}
+
+	return err ? err : minor;
+}
 EXPORT_SYMBOL(lirc_register_driver);
 
 int lirc_unregister_driver(int minor)
@@ -382,15 +376,14 @@ int lirc_unregister_driver(int minor)
 	struct cdev *cdev;
 
 	if (minor < 0 || minor >= MAX_IRCTL_DEVICES) {
-		printk(KERN_ERR "lirc_dev: %s: minor (%d) must be between "
-		       "0 and %d!\n", __func__, minor, MAX_IRCTL_DEVICES - 1);
+		pr_err("minor (%d) must be between 0 and %d!\n",
+					minor, MAX_IRCTL_DEVICES - 1);
 		return -EBADRQC;
 	}
 
 	ir = irctls[minor];
 	if (!ir) {
-		printk(KERN_ERR "lirc_dev: %s: failed to get irctl struct "
-		       "for minor %d!\n", __func__, minor);
+		pr_err("failed to get irctl\n");
 		return -ENOENT;
 	}
 
@@ -399,8 +392,8 @@ int lirc_unregister_driver(int minor)
 	mutex_lock(&lirc_dev_lock);
 
 	if (ir->d.minor != minor) {
-		printk(KERN_ERR "lirc_dev: %s: minor (%d) device not "
-		       "registered!\n", __func__, minor);
+		dev_err(ir->d.dev, "lirc_dev: minor %d device not registered\n",
+									minor);
 		mutex_unlock(&lirc_dev_lock);
 		return -ENOENT;
 	}
@@ -418,7 +411,10 @@ int lirc_unregister_driver(int minor)
 			ir->d.name, ir->d.minor);
 		wake_up_interruptible(&ir->buf->wait_poll);
 		mutex_lock(&ir->irctl_lock);
-		ir->d.set_use_dec(ir->d.data);
+
+		if (ir->d.set_use_dec)
+			ir->d.set_use_dec(ir->d.data);
+
 		module_put(cdev->owner);
 		mutex_unlock(&ir->irctl_lock);
 	} else {
@@ -442,8 +438,7 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
 	int retval = 0;
 
 	if (iminor(inode) >= MAX_IRCTL_DEVICES) {
-		printk(KERN_WARNING "lirc_dev [%d]: open result = -ENODEV\n",
-		       iminor(inode));
+		pr_err("open result for %d is -ENODEV\n", iminor(inode));
 		return -ENODEV;
 	}
 
@@ -477,7 +472,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
 	cdev = ir->cdev;
 	if (try_module_get(cdev->owner)) {
 		ir->open++;
-		retval = ir->d.set_use_inc(ir->d.data);
+		if (ir->d.set_use_inc)
+			retval = ir->d.set_use_inc(ir->d.data);
 
 		if (retval) {
 			module_put(cdev->owner);
@@ -490,10 +486,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
 	}
 
 error:
-	if (ir)
-		dev_dbg(ir->d.dev, LOGHEAD "open result = %d\n",
-			ir->d.name, ir->d.minor, retval);
-
 	mutex_unlock(&lirc_dev_lock);
 
 	nonseekable_open(inode, file);
@@ -509,14 +501,12 @@ int lirc_dev_fop_close(struct inode *inode, struct file *file)
 	int ret;
 
 	if (!ir) {
-		printk(KERN_ERR "%s: called with invalid irctl\n", __func__);
+		pr_err("called with invalid irctl\n");
 		return -EINVAL;
 	}
 
 	cdev = ir->cdev;
 
-	dev_dbg(ir->d.dev, LOGHEAD "close called\n", ir->d.name, ir->d.minor);
-
 	ret = mutex_lock_killable(&lirc_dev_lock);
 	WARN_ON(ret);
 
@@ -524,7 +514,8 @@ int lirc_dev_fop_close(struct inode *inode, struct file *file)
 
 	ir->open--;
 	if (ir->attached) {
-		ir->d.set_use_dec(ir->d.data);
+		if (ir->d.set_use_dec)
+			ir->d.set_use_dec(ir->d.data);
 		module_put(cdev->owner);
 	} else {
 		lirc_irctl_cleanup(ir);
@@ -547,12 +538,10 @@ unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait)
 	unsigned int ret;
 
 	if (!ir) {
-		printk(KERN_ERR "%s: called with invalid irctl\n", __func__);
+		pr_err("called with invalid irctl\n");
 		return POLLERR;
 	}
 
-	dev_dbg(ir->d.dev, LOGHEAD "poll called\n", ir->d.name, ir->d.minor);
-
 	if (!ir->attached)
 		return POLLERR;
 
@@ -580,7 +569,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct irctl *ir = irctls[iminor(file_inode(file))];
 
 	if (!ir) {
-		printk(KERN_ERR "lirc_dev: %s: no irctl found!\n", __func__);
+		pr_err("no irctl found!\n");
 		return -ENODEV;
 	}
 
@@ -588,7 +577,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		ir->d.name, ir->d.minor, cmd);
 
 	if (ir->d.minor == NOPLUG || !ir->attached) {
-		dev_dbg(ir->d.dev, LOGHEAD "ioctl result = -ENODEV\n",
+		dev_err(ir->d.dev, LOGHEAD "ioctl result = -ENODEV\n",
 			ir->d.name, ir->d.minor);
 		return -ENODEV;
 	}
@@ -600,8 +589,8 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		result = put_user(ir->d.features, (__u32 __user *)arg);
 		break;
 	case LIRC_GET_REC_MODE:
-		if (!(ir->d.features & LIRC_CAN_REC_MASK)) {
-			result = -ENOSYS;
+		if (LIRC_CAN_REC(ir->d.features)) {
+			result = -ENOTTY;
 			break;
 		}
 
@@ -610,8 +599,8 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 				  (__u32 __user *)arg);
 		break;
 	case LIRC_SET_REC_MODE:
-		if (!(ir->d.features & LIRC_CAN_REC_MASK)) {
-			result = -ENOSYS;
+		if (LIRC_CAN_REC(ir->d.features)) {
+			result = -ENOTTY;
 			break;
 		}
 
@@ -629,7 +618,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case LIRC_GET_MIN_TIMEOUT:
 		if (!(ir->d.features & LIRC_CAN_SET_REC_TIMEOUT) ||
 		    ir->d.min_timeout == 0) {
-			result = -ENOSYS;
+			result = -ENOTTY;
 			break;
 		}
 
@@ -638,7 +627,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case LIRC_GET_MAX_TIMEOUT:
 		if (!(ir->d.features & LIRC_CAN_SET_REC_TIMEOUT) ||
 		    ir->d.max_timeout == 0) {
-			result = -ENOSYS;
+			result = -ENOTTY;
 			break;
 		}
 
@@ -648,9 +637,6 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		result = -EINVAL;
 	}
 
-	dev_dbg(ir->d.dev, LOGHEAD "ioctl result = %d\n",
-		ir->d.name, ir->d.minor, result);
-
 	mutex_unlock(&ir->irctl_lock);
 
 	return result;
@@ -668,7 +654,7 @@ ssize_t lirc_dev_fop_read(struct file *file,
 	DECLARE_WAITQUEUE(wait, current);
 
 	if (!ir) {
-		printk(KERN_ERR "%s: called with invalid irctl\n", __func__);
+		pr_err("called with invalid irctl\n");
 		return -ENODEV;
 	}
 
@@ -709,7 +695,8 @@ ssize_t lirc_dev_fop_read(struct file *file,
 			/* According to the read(2) man page, 'written' can be
 			 * returned as less than 'length', instead of blocking
 			 * again, returning -EWOULDBLOCK, or returning
-			 * -ERESTARTSYS */
+			 * -ERESTARTSYS
+			 */
 			if (written)
 				break;
 			if (file->f_flags & O_NONBLOCK) {
@@ -755,8 +742,6 @@ out_locked:
 
 out_unlocked:
 	kfree(buf);
-	dev_dbg(ir->d.dev, LOGHEAD "read result = %s (%d)\n",
-		ir->d.name, ir->d.minor, ret ? "<fail>" : "<ok>", ret);
 
 	return ret ? ret : written;
 }
@@ -775,12 +760,10 @@ ssize_t lirc_dev_fop_write(struct file *file, const char __user *buffer,
 	struct irctl *ir = irctls[iminor(file_inode(file))];
 
 	if (!ir) {
-		printk(KERN_ERR "%s: called with invalid irctl\n", __func__);
+		pr_err("called with invalid irctl\n");
 		return -ENODEV;
 	}
 
-	dev_dbg(ir->d.dev, LOGHEAD "write called\n", ir->d.name, ir->d.minor);
-
 	if (!ir->attached)
 		return -ENODEV;
 
@@ -795,25 +778,23 @@ static int __init lirc_dev_init(void)
 
 	lirc_class = class_create(THIS_MODULE, "lirc");
 	if (IS_ERR(lirc_class)) {
-		retval = PTR_ERR(lirc_class);
-		printk(KERN_ERR "lirc_dev: class_create failed\n");
-		goto error;
+		pr_err("class_create failed\n");
+		return PTR_ERR(lirc_class);
 	}
 
 	retval = alloc_chrdev_region(&lirc_base_dev, 0, MAX_IRCTL_DEVICES,
 				     IRCTL_DEV_NAME);
 	if (retval) {
 		class_destroy(lirc_class);
-		printk(KERN_ERR "lirc_dev: alloc_chrdev_region failed\n");
-		goto error;
+		pr_err("alloc_chrdev_region failed\n");
+		return retval;
 	}
 
 
-	printk(KERN_INFO "lirc_dev: IR Remote Control driver registered, "
-	       "major %d \n", MAJOR(lirc_base_dev));
+	pr_info("IR Remote Control driver registered, major %d\n",
+						MAJOR(lirc_base_dev));
 
-error:
-	return retval;
+	return 0;
 }
 
 
@@ -822,7 +803,7 @@ static void __exit lirc_dev_exit(void)
 {
 	class_destroy(lirc_class);
 	unregister_chrdev_region(lirc_base_dev, MAX_IRCTL_DEVICES);
-	printk(KERN_INFO "lirc_dev: module unloaded\n");
+	pr_info("module unloaded\n");
 }
 
 module_init(lirc_dev_init);
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 5cf2e749b9eb..4f8c7effdcee 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -887,6 +887,12 @@ static int mceusb_set_tx_mask(struct rc_dev *dev, u32 mask)
 {
 	struct mceusb_dev *ir = dev->priv;
 
+	/* return number of transmitters */
+	int emitters = ir->num_txports ? ir->num_txports : 2;
+
+	if (mask >= (1 << emitters))
+		return emitters;
+
 	if (ir->flags.tx_mask_normal)
 		ir->tx_mask = mask;
 	else
@@ -936,7 +942,7 @@ static int mceusb_set_tx_carrier(struct rc_dev *dev, u32 carrier)
 
 	}
 
-	return carrier;
+	return 0;
 }
 
 /*
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index 99b303b702ac..00215f343819 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -139,11 +139,7 @@ static inline void nvt_cir_reg_write(struct nvt_dev *nvt, u8 val, u8 offset)
 /* read val from cir config register */
 static u8 nvt_cir_reg_read(struct nvt_dev *nvt, u8 offset)
 {
-	u8 val;
-
-	val = inb(nvt->cir_addr + offset);
-
-	return val;
+	return inb(nvt->cir_addr + offset);
 }
 
 /* write val to cir wake register */
@@ -156,11 +152,7 @@ static inline void nvt_cir_wake_reg_write(struct nvt_dev *nvt,
 /* read val from cir wake config register */
 static u8 nvt_cir_wake_reg_read(struct nvt_dev *nvt, u8 offset)
 {
-	u8 val;
-
-	val = inb(nvt->cir_wake_addr + offset);
-
-	return val;
+	return inb(nvt->cir_wake_addr + offset);
 }
 
 /* don't override io address if one is set already */
@@ -401,6 +393,7 @@ static int nvt_hw_detect(struct nvt_dev *nvt)
 	/* Check if we're wired for the alternate EFER setup */
 	nvt->chip_major = nvt_cr_read(nvt, CR_CHIP_ID_HI);
 	if (nvt->chip_major == 0xff) {
+		nvt_efm_disable(nvt);
 		nvt->cr_efir = CR_EFIR2;
 		nvt->cr_efdr = CR_EFDR2;
 		nvt_efm_enable(nvt);
@@ -480,18 +473,14 @@ static void nvt_cir_wake_ldev_init(struct nvt_dev *nvt)
 
 	nvt_set_ioaddr(nvt, &nvt->cir_wake_addr);
 
-	nvt_cr_write(nvt, nvt->cir_wake_irq, CR_CIR_IRQ_RSRC);
-
-	nvt_dbg("CIR Wake initialized, base io port address: 0x%lx, irq: %d",
-		nvt->cir_wake_addr, nvt->cir_wake_irq);
+	nvt_dbg("CIR Wake initialized, base io port address: 0x%lx",
+		nvt->cir_wake_addr);
 }
 
 /* clear out the hardware's cir rx fifo */
 static void nvt_clear_cir_fifo(struct nvt_dev *nvt)
 {
-	u8 val;
-
-	val = nvt_cir_reg_read(nvt, CIR_FIFOCON);
+	u8 val = nvt_cir_reg_read(nvt, CIR_FIFOCON);
 	nvt_cir_reg_write(nvt, val | CIR_FIFOCON_RXFIFOCLR, CIR_FIFOCON);
 }
 
@@ -527,7 +516,7 @@ static void nvt_set_cir_iren(struct nvt_dev *nvt)
 {
 	u8 iren;
 
-	iren = CIR_IREN_RTR | CIR_IREN_PE;
+	iren = CIR_IREN_RTR | CIR_IREN_PE | CIR_IREN_RFO;
 	nvt_cir_reg_write(nvt, iren, CIR_IREN);
 }
 
@@ -566,34 +555,15 @@ static void nvt_cir_regs_init(struct nvt_dev *nvt)
 
 static void nvt_cir_wake_regs_init(struct nvt_dev *nvt)
 {
-	/* set number of bytes needed for wake from s3 (default 65) */
-	nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_CMP_BYTES,
-			       CIR_WAKE_FIFO_CMP_DEEP);
-
-	/* set tolerance/variance allowed per byte during wake compare */
-	nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE,
-			       CIR_WAKE_FIFO_CMP_TOL);
-
-	/* set sample limit count (PE interrupt raised when reached) */
-	nvt_cir_wake_reg_write(nvt, CIR_RX_LIMIT_COUNT >> 8, CIR_WAKE_SLCH);
-	nvt_cir_wake_reg_write(nvt, CIR_RX_LIMIT_COUNT & 0xff, CIR_WAKE_SLCL);
-
-	/* set cir wake fifo rx trigger level (currently 67) */
-	nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFOCON_RX_TRIGGER_LEV,
-			       CIR_WAKE_FIFOCON);
-
 	/*
-	 * Enable TX and RX, specific carrier on = low, off = high, and set
-	 * sample period (currently 50us)
+	 * Disable RX, set specific carrier on = low, off = high,
+	 * and sample period (currently 50us)
 	 */
-	nvt_cir_wake_reg_write(nvt, CIR_WAKE_IRCON_MODE0 | CIR_WAKE_IRCON_RXEN |
+	nvt_cir_wake_reg_write(nvt, CIR_WAKE_IRCON_MODE0 |
 			       CIR_WAKE_IRCON_R | CIR_WAKE_IRCON_RXINV |
 			       CIR_WAKE_IRCON_SAMPLE_PERIOD_SEL,
 			       CIR_WAKE_IRCON);
 
-	/* clear cir wake rx fifo */
-	nvt_clear_cir_wake_fifo(nvt);
-
 	/* clear any and all stray interrupts */
 	nvt_cir_wake_reg_write(nvt, 0xff, CIR_WAKE_IRSTS);
 
@@ -788,8 +758,6 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
 
 	nvt_dbg_verbose("Processing buffer of len %d", nvt->pkts);
 
-	init_ir_raw_event(&rawir);
-
 	for (i = 0; i < nvt->pkts; i++) {
 		sample = nvt->buf[i];
 
@@ -835,19 +803,10 @@ static void nvt_get_rx_ir_data(struct nvt_dev *nvt)
 {
 	u8 fifocount, val;
 	unsigned int b_idx;
-	bool overrun = false;
 	int i;
 
 	/* Get count of how many bytes to read from RX FIFO */
 	fifocount = nvt_cir_reg_read(nvt, CIR_RXFCONT);
-	/* if we get 0xff, probably means the logical dev is disabled */
-	if (fifocount == 0xff)
-		return;
-	/* watch out for a fifo overrun condition */
-	else if (fifocount > RX_BUF_LEN) {
-		overrun = true;
-		fifocount = RX_BUF_LEN;
-	}
 
 	nvt_dbg("attempting to fetch %u bytes from hw rx fifo", fifocount);
 
@@ -869,9 +828,6 @@ static void nvt_get_rx_ir_data(struct nvt_dev *nvt)
 	nvt_dbg("%s: pkts now %d", __func__, nvt->pkts);
 
 	nvt_process_rx_ir_data(nvt);
-
-	if (overrun)
-		nvt_handle_rx_fifo_overrun(nvt);
 }
 
 static void nvt_cir_log_irqs(u8 status, u8 iren)
@@ -907,7 +863,7 @@ static bool nvt_cir_tx_inactive(struct nvt_dev *nvt)
 static irqreturn_t nvt_cir_isr(int irq, void *data)
 {
 	struct nvt_dev *nvt = data;
-	u8 status, iren, cur_state;
+	u8 status, iren;
 	unsigned long flags;
 
 	nvt_dbg_verbose("%s firing", __func__);
@@ -945,23 +901,15 @@ static irqreturn_t nvt_cir_isr(int irq, void *data)
 
 	nvt_cir_log_irqs(status, iren);
 
-	if (status & CIR_IRSTS_RTR) {
-		/* FIXME: add code for study/learn mode */
+	if (status & CIR_IRSTS_RFO)
+		nvt_handle_rx_fifo_overrun(nvt);
+
+	else if (status & (CIR_IRSTS_RTR | CIR_IRSTS_PE)) {
 		/* We only do rx if not tx'ing */
 		if (nvt_cir_tx_inactive(nvt))
 			nvt_get_rx_ir_data(nvt);
 	}
 
-	if (status & CIR_IRSTS_PE) {
-		if (nvt_cir_tx_inactive(nvt))
-			nvt_get_rx_ir_data(nvt);
-
-		cur_state = nvt->study_state;
-
-		if (cur_state == ST_STUDY_NONE)
-			nvt_clear_cir_fifo(nvt);
-	}
-
 	spin_unlock_irqrestore(&nvt->nvt_lock, flags);
 
 	if (status & CIR_IRSTS_TE)
@@ -1003,51 +951,6 @@ static irqreturn_t nvt_cir_isr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-/* Interrupt service routine for CIR Wake */
-static irqreturn_t nvt_cir_wake_isr(int irq, void *data)
-{
-	u8 status, iren, val;
-	struct nvt_dev *nvt = data;
-	unsigned long flags;
-
-	nvt_dbg_wake("%s firing", __func__);
-
-	spin_lock_irqsave(&nvt->nvt_lock, flags);
-
-	status = nvt_cir_wake_reg_read(nvt, CIR_WAKE_IRSTS);
-	iren = nvt_cir_wake_reg_read(nvt, CIR_WAKE_IREN);
-
-	/* IRQ may be shared with CIR, therefore check for each
-	 * status bit whether the related interrupt source is enabled
-	 */
-	if (!(status & iren)) {
-		spin_unlock_irqrestore(&nvt->nvt_lock, flags);
-		return IRQ_NONE;
-	}
-
-	if (status & CIR_WAKE_IRSTS_IR_PENDING)
-		nvt_clear_cir_wake_fifo(nvt);
-
-	nvt_cir_wake_reg_write(nvt, status, CIR_WAKE_IRSTS);
-	nvt_cir_wake_reg_write(nvt, 0, CIR_WAKE_IRSTS);
-
-	if ((status & CIR_WAKE_IRSTS_PE) &&
-	    (nvt->wake_state == ST_WAKE_START)) {
-		while (nvt_cir_wake_reg_read(nvt, CIR_WAKE_RD_FIFO_ONLY_IDX)) {
-			val = nvt_cir_wake_reg_read(nvt, CIR_WAKE_RD_FIFO_ONLY);
-			nvt_dbg("setting wake up key: 0x%x", val);
-		}
-
-		nvt_cir_wake_reg_write(nvt, 0, CIR_WAKE_IREN);
-		nvt->wake_state = ST_WAKE_FINISH;
-	}
-
-	spin_unlock_irqrestore(&nvt->nvt_lock, flags);
-
-	nvt_dbg_wake("%s done", __func__);
-	return IRQ_HANDLED;
-}
-
 static void nvt_disable_cir(struct nvt_dev *nvt)
 {
 	unsigned long flags;
@@ -1151,8 +1054,6 @@ static int nvt_probe(struct pnp_dev *pdev, const struct pnp_device_id *dev_id)
 	nvt->cir_irq  = pnp_irq(pdev, 0);
 
 	nvt->cir_wake_addr = pnp_port_start(pdev, 1);
-	/* irq is always shared between cir and cir wake */
-	nvt->cir_wake_irq  = nvt->cir_irq;
 
 	nvt->cr_efir = CR_EFIR;
 	nvt->cr_efdr = CR_EFDR;
@@ -1228,11 +1129,6 @@ static int nvt_probe(struct pnp_dev *pdev, const struct pnp_device_id *dev_id)
 			    CIR_IOREG_LENGTH, NVT_DRIVER_NAME "-wake"))
 		goto exit_unregister_device;
 
-	if (devm_request_irq(&pdev->dev, nvt->cir_wake_irq,
-			     nvt_cir_wake_isr, IRQF_SHARED,
-			     NVT_DRIVER_NAME "-wake", (void *)nvt))
-		goto exit_unregister_device;
-
 	ret = device_create_file(&rdev->dev, &dev_attr_wakeup_data);
 	if (ret)
 		goto exit_unregister_device;
@@ -1283,10 +1179,6 @@ static int nvt_suspend(struct pnp_dev *pdev, pm_message_t state)
 
 	spin_lock_irqsave(&nvt->nvt_lock, flags);
 
-	/* zero out misc state tracking */
-	nvt->study_state = ST_STUDY_NONE;
-	nvt->wake_state = ST_WAKE_NONE;
-
 	/* disable all CIR interrupts */
 	nvt_cir_reg_write(nvt, 0, CIR_IREN);
 
diff --git a/drivers/media/rc/nuvoton-cir.h b/drivers/media/rc/nuvoton-cir.h
index c9c98ebb19ee..acf735fc7170 100644
--- a/drivers/media/rc/nuvoton-cir.h
+++ b/drivers/media/rc/nuvoton-cir.h
@@ -104,7 +104,6 @@ struct nvt_dev {
 	unsigned long cir_addr;
 	unsigned long cir_wake_addr;
 	int cir_irq;
-	int cir_wake_irq;
 
 	enum nvt_chip_ver chip_ver;
 	/* hardware id */
@@ -112,36 +111,12 @@ struct nvt_dev {
 	u8 chip_minor;
 
 	/* hardware features */
-	bool hw_learning_capable;
 	bool hw_tx_capable;
 
-	/* rx settings */
-	bool learning_enabled;
-
-	/* track cir wake state */
-	u8 wake_state;
-	/* for study */
-	u8 study_state;
 	/* carrier period = 1 / frequency */
 	u32 carrier;
 };
 
-/* study states */
-#define ST_STUDY_NONE      0x0
-#define ST_STUDY_START     0x1
-#define ST_STUDY_CARRIER   0x2
-#define ST_STUDY_ALL_RECV  0x4
-
-/* wake states */
-#define ST_WAKE_NONE	0x0
-#define ST_WAKE_START	0x1
-#define ST_WAKE_FINISH	0x2
-
-/* receive states */
-#define ST_RX_WAIT_7F		0x1
-#define ST_RX_WAIT_HEAD		0x2
-#define ST_RX_WAIT_SILENT_END	0x4
-
 /* send states */
 #define ST_TX_NONE	0x0
 #define ST_TX_REQUEST	0x2
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 7dfc7c2188f0..8e7f2929fa6f 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -130,13 +130,18 @@ static struct rc_map_list empty_map = {
 static int ir_create_table(struct rc_map *rc_map,
 			   const char *name, u64 rc_type, size_t size)
 {
-	rc_map->name = name;
+	rc_map->name = kstrdup(name, GFP_KERNEL);
+	if (!rc_map->name)
+		return -ENOMEM;
 	rc_map->rc_type = rc_type;
 	rc_map->alloc = roundup_pow_of_two(size * sizeof(struct rc_map_table));
 	rc_map->size = rc_map->alloc / sizeof(struct rc_map_table);
 	rc_map->scan = kmalloc(rc_map->alloc, GFP_KERNEL);
-	if (!rc_map->scan)
+	if (!rc_map->scan) {
+		kfree(rc_map->name);
+		rc_map->name = NULL;
 		return -ENOMEM;
+	}
 
 	IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n",
 		   rc_map->size, rc_map->alloc);
@@ -153,6 +158,7 @@ static int ir_create_table(struct rc_map *rc_map,
 static void ir_free_table(struct rc_map *rc_map)
 {
 	rc_map->size = 0;
+	kfree(rc_map->name);
 	kfree(rc_map->scan);
 	rc_map->scan = NULL;
 }
@@ -804,6 +810,7 @@ static const struct {
 	{ RC_BIT_SHARP,		"sharp",	"ir-sharp-decoder"	},
 	{ RC_BIT_MCE_KBD,	"mce_kbd",	"ir-mce_kbd-decoder"	},
 	{ RC_BIT_XMP,		"xmp",		"ir-xmp-decoder"	},
+	{ RC_BIT_CEC,		"cec",		NULL			},
 };
 
 /**
diff --git a/drivers/media/rc/redrat3.c b/drivers/media/rc/redrat3.c
index ec74244a3853..399f44d89a29 100644
--- a/drivers/media/rc/redrat3.c
+++ b/drivers/media/rc/redrat3.c
@@ -188,8 +188,7 @@ struct redrat3_dev {
 	/* usb dma */
 	dma_addr_t dma_in;
 
-	/* rx signal timeout timer */
-	struct timer_list rx_timeout;
+	/* rx signal timeout */
 	u32 hw_timeout;
 
 	/* Is the device currently transmitting?*/
@@ -330,22 +329,11 @@ static u32 redrat3_us_to_len(u32 microsec)
 	return result ? result : 1;
 }
 
-/* timer callback to send reset event */
-static void redrat3_rx_timeout(unsigned long data)
-{
-	struct redrat3_dev *rr3 = (struct redrat3_dev *)data;
-
-	dev_dbg(rr3->dev, "calling ir_raw_event_reset\n");
-	ir_raw_event_reset(rr3->rc);
-}
-
 static void redrat3_process_ir_data(struct redrat3_dev *rr3)
 {
 	DEFINE_IR_RAW_EVENT(rawir);
 	struct device *dev;
-	unsigned i, trailer = 0;
-	unsigned sig_size, single_len, offset, val;
-	unsigned long delay;
+	unsigned int i, sig_size, single_len, offset, val;
 	u32 mod_freq;
 
 	if (!rr3) {
@@ -355,10 +343,6 @@ static void redrat3_process_ir_data(struct redrat3_dev *rr3)
 
 	dev = rr3->dev;
 
-	/* Make sure we reset the IR kfifo after a bit of inactivity */
-	delay = usecs_to_jiffies(rr3->hw_timeout);
-	mod_timer(&rr3->rx_timeout, jiffies + delay);
-
 	mod_freq = redrat3_val_to_mod_freq(&rr3->irdata);
 	dev_dbg(dev, "Got mod_freq of %u\n", mod_freq);
 
@@ -376,9 +360,6 @@ static void redrat3_process_ir_data(struct redrat3_dev *rr3)
 			rawir.pulse = true;
 
 		rawir.duration = US_TO_NS(single_len);
-		/* Save initial pulse length to fudge trailer */
-		if (i == 0)
-			trailer = rawir.duration;
 		/* cap the value to IR_MAX_DURATION */
 		rawir.duration = (rawir.duration > IR_MAX_DURATION) ?
 				 IR_MAX_DURATION : rawir.duration;
@@ -388,18 +369,13 @@ static void redrat3_process_ir_data(struct redrat3_dev *rr3)
 		ir_raw_event_store_with_filter(rr3->rc, &rawir);
 	}
 
-	/* add a trailing space, if need be */
-	if (i % 2) {
-		rawir.pulse = false;
-		/* this duration is made up, and may not be ideal... */
-		if (trailer < US_TO_NS(1000))
-			rawir.duration = US_TO_NS(2800);
-		else
-			rawir.duration = trailer;
-		dev_dbg(dev, "storing trailing space with duration %d\n",
-			rawir.duration);
-		ir_raw_event_store_with_filter(rr3->rc, &rawir);
-	}
+	/* add a trailing space */
+	rawir.pulse = false;
+	rawir.timeout = true;
+	rawir.duration = US_TO_NS(rr3->hw_timeout);
+	dev_dbg(dev, "storing trailing timeout with duration %d\n",
+							rawir.duration);
+	ir_raw_event_store_with_filter(rr3->rc, &rawir);
 
 	dev_dbg(dev, "calling ir_raw_event_handle\n");
 	ir_raw_event_handle(rr3->rc);
@@ -499,6 +475,37 @@ static u32 redrat3_get_timeout(struct redrat3_dev *rr3)
 	return timeout;
 }
 
+static int redrat3_set_timeout(struct rc_dev *rc_dev, unsigned int timeoutns)
+{
+	struct redrat3_dev *rr3 = rc_dev->priv;
+	struct usb_device *udev = rr3->udev;
+	struct device *dev = rr3->dev;
+	u32 *timeout;
+	int ret;
+
+	timeout = kmalloc(sizeof(*timeout), GFP_KERNEL);
+	if (!timeout)
+		return -ENOMEM;
+
+	*timeout = cpu_to_be32(redrat3_us_to_len(timeoutns / 1000));
+	ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RR3_SET_IR_PARAM,
+		     USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+		     RR3_IR_IO_SIG_TIMEOUT, 0, timeout, sizeof(*timeout),
+		     HZ * 25);
+	dev_dbg(dev, "set ir parm timeout %d ret 0x%02x\n",
+						be32_to_cpu(*timeout), ret);
+
+	if (ret == sizeof(*timeout)) {
+		rr3->hw_timeout = timeoutns / 1000;
+		ret = 0;
+	} else if (ret >= 0)
+		ret = -EIO;
+
+	kfree(timeout);
+
+	return ret;
+}
+
 static void redrat3_reset(struct redrat3_dev *rr3)
 {
 	struct usb_device *udev = rr3->udev;
@@ -708,7 +715,7 @@ static int redrat3_set_tx_carrier(struct rc_dev *rcdev, u32 carrier)
 
 	rr3->carrier = carrier;
 
-	return carrier;
+	return 0;
 }
 
 static int redrat3_transmit_ir(struct rc_dev *rcdev, unsigned *txbuf,
@@ -880,7 +887,10 @@ static struct rc_dev *redrat3_init_rc_dev(struct redrat3_dev *rr3)
 	rc->priv = rr3;
 	rc->driver_type = RC_DRIVER_IR_RAW;
 	rc->allowed_protocols = RC_BIT_ALL;
-	rc->timeout = US_TO_NS(2750);
+	rc->min_timeout = MS_TO_NS(RR3_RX_MIN_TIMEOUT);
+	rc->max_timeout = MS_TO_NS(RR3_RX_MAX_TIMEOUT);
+	rc->timeout = US_TO_NS(rr3->hw_timeout);
+	rc->s_timeout = redrat3_set_timeout;
 	rc->tx_ir = redrat3_transmit_ir;
 	rc->s_tx_carrier = redrat3_set_tx_carrier;
 	rc->driver_name = DRIVER_NAME;
@@ -990,7 +1000,7 @@ static int redrat3_dev_probe(struct usb_interface *intf,
 	if (retval < 0)
 		goto error;
 
-	/* store current hardware timeout, in us, will use for kfifo resets */
+	/* store current hardware timeout, in Âµs */
 	rr3->hw_timeout = redrat3_get_timeout(rr3);
 
 	/* default.. will get overridden by any sends with a freq defined */
@@ -1026,7 +1036,6 @@ static int redrat3_dev_probe(struct usb_interface *intf,
 		retval = -ENOMEM;
 		goto led_free_error;
 	}
-	setup_timer(&rr3->rx_timeout, redrat3_rx_timeout, (unsigned long)rr3);
 
 	/* we can register the device now, as it is ready */
 	usb_set_intfdata(intf, rr3);
@@ -1055,7 +1064,6 @@ static void redrat3_dev_disconnect(struct usb_interface *intf)
 	usb_set_intfdata(intf, NULL);
 	rc_unregister_device(rr3->rc);
 	led_classdev_unregister(&rr3->led);
-	del_timer_sync(&rr3->rx_timeout);
 	redrat3_delete(rr3, udev);
 }
 
diff --git a/drivers/media/rc/winbond-cir.c b/drivers/media/rc/winbond-cir.c
index d839f73f6a05..95ae60e659a1 100644
--- a/drivers/media/rc/winbond-cir.c
+++ b/drivers/media/rc/winbond-cir.c
@@ -615,6 +615,10 @@ wbcir_txmask(struct rc_dev *dev, u32 mask)
 	unsigned long flags;
 	u8 val;
 
+	/* return the number of transmitters */
+	if (mask > 15)
+		return 4;
+
 	/* Four outputs, only one output can be enabled at a time */
 	switch (mask) {
 	case 0x1:
diff --git a/drivers/media/tuners/it913x.c b/drivers/media/tuners/it913x.c
index 5c96da693289..6c3ef2181fcd 100644
--- a/drivers/media/tuners/it913x.c
+++ b/drivers/media/tuners/it913x.c
@@ -464,6 +464,7 @@ MODULE_DEVICE_TABLE(i2c, it913x_id_table);
 static struct i2c_driver it913x_driver = {
 	.driver = {
 		.name	= "it913x",
+		.suppress_bind_attrs	= true,
 	},
 	.probe		= it913x_probe,
 	.remove		= it913x_remove,
diff --git a/drivers/media/tuners/mt2063.c b/drivers/media/tuners/mt2063.c
index 6457ac91ef09..7f0b9d5940db 100644
--- a/drivers/media/tuners/mt2063.c
+++ b/drivers/media/tuners/mt2063.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/videodev2.h>
+#include <linux/gcd.h>
 
 #include "mt2063.h"
 
@@ -664,27 +665,6 @@ static u32 MT2063_ChooseFirstIF(struct MT2063_AvoidSpursData_t *pAS_Info)
 	return f_Center + (bestDiff * f_Step);
 }
 
-/**
- * gcd() - Uses Euclid's algorithm
- *
- * @u, @v:	Unsigned values whose GCD is desired.
- *
- * Returns THE greatest common divisor of u and v, if either value is 0,
- * the other value is returned as the result.
- */
-static u32 MT2063_gcd(u32 u, u32 v)
-{
-	u32 r;
-
-	while (v != 0) {
-		r = u % v;
-		u = v;
-		v = r;
-	}
-
-	return u;
-}
-
 /**
  * IsSpurInBand() - Checks to see if a spur will be present within the IF's
  *                  bandwidth. (fIFOut +/- fIFBW, -fIFOut +/- fIFBW)
@@ -731,12 +711,12 @@ static u32 IsSpurInBand(struct MT2063_AvoidSpursData_t *pAS_Info,
 	 ** of f_LO1, f_LO2 and the edge value.  Use the larger of this
 	 ** gcd-based scale factor or f_Scale.
 	 */
-	lo_gcd = MT2063_gcd(f_LO1, f_LO2);
-	gd_Scale = max((u32) MT2063_gcd(lo_gcd, d), f_Scale);
+	lo_gcd = gcd(f_LO1, f_LO2);
+	gd_Scale = max((u32) gcd(lo_gcd, d), f_Scale);
 	hgds = gd_Scale / 2;
-	gc_Scale = max((u32) MT2063_gcd(lo_gcd, c), f_Scale);
+	gc_Scale = max((u32) gcd(lo_gcd, c), f_Scale);
 	hgcs = gc_Scale / 2;
-	gf_Scale = max((u32) MT2063_gcd(lo_gcd, f), f_Scale);
+	gf_Scale = max((u32) gcd(lo_gcd, f), f_Scale);
 	hgfs = gf_Scale / 2;
 
 	n0 = DIV_ROUND_UP(f_LO2 - d, f_LO1 - f_LO2);
diff --git a/drivers/media/tuners/r820t.c b/drivers/media/tuners/r820t.c
index 6ab35e315fe7..08dca40356d2 100644
--- a/drivers/media/tuners/r820t.c
+++ b/drivers/media/tuners/r820t.c
@@ -336,20 +336,6 @@ static int r820t_xtal_capacitor[][2] = {
 	{ 0x10, XTAL_HIGH_CAP_0P },
 };
 
-/*
- * measured with a Racal 6103E GSM test set at 928 MHz with -60 dBm
- * input power, for raw results see:
- *	http://steve-m.de/projects/rtl-sdr/gain_measurement/r820t/
- */
-
-static const int r820t_lna_gain_steps[]  = {
-	0, 9, 13, 40, 38, 13, 31, 22, 26, 31, 26, 14, 19, 5, 35, 13
-};
-
-static const int r820t_mixer_gain_steps[]  = {
-	0, 5, 10, 10, 19, 9, 10, 25, 17, 10, 8, 16, 13, 6, 3, -8
-};
-
 /*
  * I2C read/write code and shadow registers logic
  */
@@ -1216,6 +1202,21 @@ static int r820t_read_gain(struct r820t_priv *priv)
 
 #if 0
 /* FIXME: This routine requires more testing */
+
+/*
+ * measured with a Racal 6103E GSM test set at 928 MHz with -60 dBm
+ * input power, for raw results see:
+ *	http://steve-m.de/projects/rtl-sdr/gain_measurement/r820t/
+ */
+
+static const int r820t_lna_gain_steps[]  = {
+	0, 9, 13, 40, 38, 13, 31, 22, 26, 31, 26, 14, 19, 5, 35, 13
+};
+
+static const int r820t_mixer_gain_steps[]  = {
+	0, 5, 10, 10, 19, 9, 10, 25, 17, 10, 8, 16, 13, 6, 3, -8
+};
+
 static int r820t_set_gain_mode(struct r820t_priv *priv,
 			       bool set_manual_gain,
 			       int gain)
diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c
index b07a681f3fbc..57b250847cd3 100644
--- a/drivers/media/tuners/si2157.c
+++ b/drivers/media/tuners/si2157.c
@@ -514,7 +514,8 @@ MODULE_DEVICE_TABLE(i2c, si2157_id_table);
 
 static struct i2c_driver si2157_driver = {
 	.driver = {
-		.name	= "si2157",
+		.name	             = "si2157",
+		.suppress_bind_attrs = true,
 	},
 	.probe		= si2157_probe,
 	.remove		= si2157_remove,
diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c
index 87c12930416f..fe031b06935f 100644
--- a/drivers/media/usb/airspy/airspy.c
+++ b/drivers/media/usb/airspy/airspy.c
@@ -488,7 +488,7 @@ static void airspy_disconnect(struct usb_interface *intf)
 /* Videobuf2 operations */
 static int airspy_queue_setup(struct vb2_queue *vq,
 		unsigned int *nbuffers,
-		unsigned int *nplanes, unsigned int sizes[], void *alloc_ctxs[])
+		unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct airspy *s = vb2_get_drv_priv(vq);
 
@@ -1072,7 +1072,7 @@ static int airspy_probe(struct usb_interface *intf,
 	if (ret) {
 		dev_err(s->dev, "Failed to register as video device (%d)\n",
 				ret);
-		goto err_unregister_v4l2_dev;
+		goto err_free_controls;
 	}
 	dev_info(s->dev, "Registered as %s\n",
 			video_device_node_name(&s->vdev));
@@ -1081,7 +1081,6 @@ static int airspy_probe(struct usb_interface *intf,
 
 err_free_controls:
 	v4l2_ctrl_handler_free(&s->hdl);
-err_unregister_v4l2_dev:
 	v4l2_device_unregister(&s->v4l2_dev);
 err_free_mem:
 	kfree(s);
diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c
index 321ea5cf1329..bf53553d2624 100644
--- a/drivers/media/usb/au0828/au0828-core.c
+++ b/drivers/media/usb/au0828/au0828-core.c
@@ -142,7 +142,7 @@ static void au0828_unregister_media_device(struct au0828_dev *dev)
 	struct media_device *mdev = dev->media_dev;
 	struct media_entity_notify *notify, *nextp;
 
-	if (!mdev || !media_devnode_is_registered(&mdev->devnode))
+	if (!mdev || !media_devnode_is_registered(mdev->devnode))
 		return;
 
 	/* Remove au0828 entity_notify callbacks */
@@ -482,7 +482,7 @@ static int au0828_media_device_register(struct au0828_dev *dev,
 	if (!dev->media_dev)
 		return 0;
 
-	if (!media_devnode_is_registered(&dev->media_dev->devnode)) {
+	if (!media_devnode_is_registered(dev->media_dev->devnode)) {
 
 		/* register media device */
 		ret = media_device_register(dev->media_dev);
diff --git a/drivers/media/usb/au0828/au0828-vbi.c b/drivers/media/usb/au0828/au0828-vbi.c
index b4efc103ae57..e0930ce59b8d 100644
--- a/drivers/media/usb/au0828/au0828-vbi.c
+++ b/drivers/media/usb/au0828/au0828-vbi.c
@@ -32,7 +32,7 @@
 
 static int vbi_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct au0828_dev *dev = vb2_get_drv_priv(vq);
 	unsigned long size = dev->vbi_width * dev->vbi_height * 2;
diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 7d0ec4cb248c..82b026985868 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c
@@ -698,7 +698,7 @@ int au0828_v4l2_device_register(struct usb_interface *interface,
 
 static int queue_setup(struct vb2_queue *vq,
 		       unsigned int *nbuffers, unsigned int *nplanes,
-		       unsigned int sizes[], void *alloc_ctxs[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct au0828_dev *dev = vb2_get_drv_priv(vq);
 	unsigned long size = dev->height * dev->bytesperline;
diff --git a/drivers/media/usb/cx231xx/cx231xx-417.c b/drivers/media/usb/cx231xx/cx231xx-417.c
index 00da024b47a6..29d450c15f29 100644
--- a/drivers/media/usb/cx231xx/cx231xx-417.c
+++ b/drivers/media/usb/cx231xx/cx231xx-417.c
@@ -1570,10 +1570,12 @@ static int vidioc_s_ctrl(struct file *file, void *priv,
 {
 	struct cx231xx_fh  *fh  = file->private_data;
 	struct cx231xx *dev = fh->dev;
+	struct v4l2_subdev *sd;
 
 	dprintk(3, "enter vidioc_s_ctrl()\n");
 	/* Update the A/V core */
-	call_all(dev, core, s_ctrl, ctl);
+	v4l2_device_for_each_subdev(sd, &dev->v4l2_dev)
+		v4l2_s_ctrl(NULL, sd->ctrl_handler, ctl);
 	dprintk(3, "exit vidioc_s_ctrl()\n");
 	return 0;
 }
diff --git a/drivers/media/usb/dvb-usb-v2/Kconfig b/drivers/media/usb/dvb-usb-v2/Kconfig
index 3dc8ef004f8b..524533d3eb29 100644
--- a/drivers/media/usb/dvb-usb-v2/Kconfig
+++ b/drivers/media/usb/dvb-usb-v2/Kconfig
@@ -127,17 +127,22 @@ config DVB_USB_MXL111SF
 config DVB_USB_RTL28XXU
 	tristate "Realtek RTL28xxU DVB USB support"
 	depends on DVB_USB_V2 && I2C_MUX
+	select DVB_MN88472 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_MN88473 if MEDIA_SUBDRV_AUTOSELECT
 	select DVB_RTL2830
 	select DVB_RTL2832
 	select DVB_RTL2832_SDR if (MEDIA_SUBDRV_AUTOSELECT && MEDIA_SDR_SUPPORT)
-	select MEDIA_TUNER_QT1010 if MEDIA_SUBDRV_AUTOSELECT
-	select MEDIA_TUNER_MT2060 if MEDIA_SUBDRV_AUTOSELECT
-	select MEDIA_TUNER_MXL5005S if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_SI2168 if MEDIA_SUBDRV_AUTOSELECT
+	select MEDIA_TUNER_E4000 if MEDIA_SUBDRV_AUTOSELECT
 	select MEDIA_TUNER_FC0012 if MEDIA_SUBDRV_AUTOSELECT
 	select MEDIA_TUNER_FC0013 if MEDIA_SUBDRV_AUTOSELECT
-	select MEDIA_TUNER_E4000 if MEDIA_SUBDRV_AUTOSELECT
 	select MEDIA_TUNER_FC2580 if MEDIA_SUBDRV_AUTOSELECT
+	select MEDIA_TUNER_MT2060 if MEDIA_SUBDRV_AUTOSELECT
+	select MEDIA_TUNER_MXL5005S if MEDIA_SUBDRV_AUTOSELECT
+	select MEDIA_TUNER_QT1010 if MEDIA_SUBDRV_AUTOSELECT
 	select MEDIA_TUNER_R820T if MEDIA_SUBDRV_AUTOSELECT
+	select MEDIA_TUNER_SI2157 if MEDIA_SUBDRV_AUTOSELECT
+	select MEDIA_TUNER_TUA9001 if MEDIA_SUBDRV_AUTOSELECT
 	help
 	  Say Y here to support the Realtek RTL28xxU DVB USB receiver.
 
diff --git a/drivers/media/usb/dvb-usb-v2/af9035.c b/drivers/media/usb/dvb-usb-v2/af9035.c
index 2638e3251f2a..ca018cd3fcd4 100644
--- a/drivers/media/usb/dvb-usb-v2/af9035.c
+++ b/drivers/media/usb/dvb-usb-v2/af9035.c
@@ -49,6 +49,7 @@ static int af9035_ctrl_msg(struct dvb_usb_device *d, struct usb_req *req)
 #define CHECKSUM_LEN 2
 #define USB_TIMEOUT 2000
 	struct state *state = d_to_priv(d);
+	struct usb_interface *intf = d->intf;
 	int ret, wlen, rlen;
 	u16 checksum, tmp_checksum;
 
@@ -57,8 +58,8 @@ static int af9035_ctrl_msg(struct dvb_usb_device *d, struct usb_req *req)
 	/* buffer overflow check */
 	if (req->wlen > (BUF_LEN - REQ_HDR_LEN - CHECKSUM_LEN) ||
 			req->rlen > (BUF_LEN - ACK_HDR_LEN - CHECKSUM_LEN)) {
-		dev_err(&d->udev->dev, "%s: too much data wlen=%d rlen=%d\n",
-				KBUILD_MODNAME, req->wlen, req->rlen);
+		dev_err(&intf->dev, "too much data wlen=%d rlen=%d\n",
+			req->wlen, req->rlen);
 		ret = -EINVAL;
 		goto exit;
 	}
@@ -94,10 +95,8 @@ static int af9035_ctrl_msg(struct dvb_usb_device *d, struct usb_req *req)
 	checksum = af9035_checksum(state->buf, rlen - 2);
 	tmp_checksum = (state->buf[rlen - 2] << 8) | state->buf[rlen - 1];
 	if (tmp_checksum != checksum) {
-		dev_err(&d->udev->dev,
-				"%s: command=%02x checksum mismatch (%04x != %04x)\n",
-				KBUILD_MODNAME, req->cmd, tmp_checksum,
-				checksum);
+		dev_err(&intf->dev, "command=%02x checksum mismatch (%04x != %04x)\n",
+			req->cmd, tmp_checksum, checksum);
 		ret = -EIO;
 		goto exit;
 	}
@@ -110,8 +109,8 @@ static int af9035_ctrl_msg(struct dvb_usb_device *d, struct usb_req *req)
 			goto exit;
 		}
 
-		dev_dbg(&d->udev->dev, "%s: command=%02x failed fw error=%d\n",
-				__func__, req->cmd, state->buf[2]);
+		dev_dbg(&intf->dev, "command=%02x failed fw error=%d\n",
+			req->cmd, state->buf[2]);
 		ret = -EIO;
 		goto exit;
 	}
@@ -122,20 +121,20 @@ static int af9035_ctrl_msg(struct dvb_usb_device *d, struct usb_req *req)
 exit:
 	mutex_unlock(&d->usb_mutex);
 	if (ret < 0)
-		dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+		dev_dbg(&intf->dev, "failed=%d\n", ret);
 	return ret;
 }
 
 /* write multiple registers */
 static int af9035_wr_regs(struct dvb_usb_device *d, u32 reg, u8 *val, int len)
 {
+	struct usb_interface *intf = d->intf;
 	u8 wbuf[MAX_XFER_SIZE];
 	u8 mbox = (reg >> 16) & 0xff;
 	struct usb_req req = { CMD_MEM_WR, mbox, 6 + len, wbuf, 0, NULL };
 
 	if (6 + len > sizeof(wbuf)) {
-		dev_warn(&d->udev->dev, "%s: i2c wr: len=%d is too big!\n",
-			 KBUILD_MODNAME, len);
+		dev_warn(&intf->dev, "i2c wr: len=%d is too big!\n", len);
 		return -EOPNOTSUPP;
 	}
 
@@ -198,6 +197,7 @@ static int af9035_add_i2c_dev(struct dvb_usb_device *d, const char *type,
 {
 	int ret, num;
 	struct state *state = d_to_priv(d);
+	struct usb_interface *intf = d->intf;
 	struct i2c_client *client;
 	struct i2c_board_info board_info = {
 		.addr = addr,
@@ -212,11 +212,10 @@ static int af9035_add_i2c_dev(struct dvb_usb_device *d, const char *type,
 			break;
 	}
 
-	dev_dbg(&d->udev->dev, "%s: num=%d\n", __func__, num);
+	dev_dbg(&intf->dev, "num=%d\n", num);
 
 	if (num == AF9035_I2C_CLIENT_MAX) {
-		dev_err(&d->udev->dev, "%s: I2C client out of index\n",
-				KBUILD_MODNAME);
+		dev_err(&intf->dev, "I2C client out of index\n");
 		ret = -ENODEV;
 		goto err;
 	}
@@ -240,7 +239,7 @@ static int af9035_add_i2c_dev(struct dvb_usb_device *d, const char *type,
 	state->i2c_client[num] = client;
 	return 0;
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 	return ret;
 }
 
@@ -248,6 +247,7 @@ static void af9035_del_i2c_dev(struct dvb_usb_device *d)
 {
 	int num;
 	struct state *state = d_to_priv(d);
+	struct usb_interface *intf = d->intf;
 	struct i2c_client *client;
 
 	/* find last used client */
@@ -257,11 +257,10 @@ static void af9035_del_i2c_dev(struct dvb_usb_device *d)
 			break;
 	}
 
-	dev_dbg(&d->udev->dev, "%s: num=%d\n", __func__, num);
+	dev_dbg(&intf->dev, "num=%d\n", num);
 
 	if (num == -1) {
-		dev_err(&d->udev->dev, "%s: I2C client out of index\n",
-				KBUILD_MODNAME);
+		dev_err(&intf->dev, "I2C client out of index\n");
 		goto err;
 	}
 
@@ -276,7 +275,7 @@ static void af9035_del_i2c_dev(struct dvb_usb_device *d)
 	state->i2c_client[num] = NULL;
 	return;
 err:
-	dev_dbg(&d->udev->dev, "%s: failed\n", __func__);
+	dev_dbg(&intf->dev, "failed\n");
 }
 
 static int af9035_i2c_master_xfer(struct i2c_adapter *adap,
@@ -348,6 +347,9 @@ static int af9035_i2c_master_xfer(struct i2c_adapter *adap,
 
 			ret = af9035_rd_regs(d, reg, &msg[1].buf[0],
 					msg[1].len);
+		} else if (state->no_read) {
+			memset(msg[1].buf, 0, msg[1].len);
+			ret = 0;
 		} else {
 			/* I2C write + read */
 			u8 buf[MAX_XFER_SIZE];
@@ -367,10 +369,25 @@ static int af9035_i2c_master_xfer(struct i2c_adapter *adap,
 				memcpy(&buf[3], msg[0].buf, msg[0].len);
 			} else {
 				buf[1] = msg[0].addr << 1;
-				buf[2] = 0x00; /* reg addr len */
 				buf[3] = 0x00; /* reg addr MSB */
 				buf[4] = 0x00; /* reg addr LSB */
-				memcpy(&buf[5], msg[0].buf, msg[0].len);
+
+				/* Keep prev behavior for write req len > 2*/
+				if (msg[0].len > 2) {
+					buf[2] = 0x00; /* reg addr len */
+					memcpy(&buf[5], msg[0].buf, msg[0].len);
+
+				/* Use reg addr fields if write req len <= 2 */
+				} else {
+					req.wlen = 5;
+					buf[2] = msg[0].len;
+					if (msg[0].len == 2) {
+						buf[3] = msg[0].buf[0];
+						buf[4] = msg[0].buf[1];
+					} else if (msg[0].len == 1) {
+						buf[4] = msg[0].buf[0];
+					}
+				}
 			}
 			ret = af9035_ctrl_msg(d, &req);
 		}
@@ -421,6 +438,9 @@ static int af9035_i2c_master_xfer(struct i2c_adapter *adap,
 		if (msg[0].len > 40) {
 			/* TODO: correct limits > 40 */
 			ret = -EOPNOTSUPP;
+		} else if (state->no_read) {
+			memset(msg[0].buf, 0, msg[0].len);
+			ret = 0;
 		} else {
 			/* I2C read */
 			u8 buf[5];
@@ -475,7 +495,9 @@ static struct i2c_algorithm af9035_i2c_algo = {
 static int af9035_identify_state(struct dvb_usb_device *d, const char **name)
 {
 	struct state *state = d_to_priv(d);
-	int ret;
+	struct usb_interface *intf = d->intf;
+	int ret, ts_mode_invalid;
+	u8 tmp;
 	u8 wbuf[1] = { 1 };
 	u8 rbuf[4];
 	struct usb_req req = { CMD_FW_QUERYINFO, 0, sizeof(wbuf), wbuf,
@@ -492,10 +514,8 @@ static int af9035_identify_state(struct dvb_usb_device *d, const char **name)
 	if (ret < 0)
 		goto err;
 
-	dev_info(&d->udev->dev,
-			"%s: prechip_version=%02x chip_version=%02x chip_type=%04x\n",
-			KBUILD_MODNAME, state->prechip_version,
-			state->chip_version, state->chip_type);
+	dev_info(&intf->dev, "prechip_version=%02x chip_version=%02x chip_type=%04x\n",
+		 state->prechip_version, state->chip_version, state->chip_type);
 
 	if (state->chip_type == 0x9135) {
 		if (state->chip_version == 0x02)
@@ -511,11 +531,41 @@ static int af9035_identify_state(struct dvb_usb_device *d, const char **name)
 		state->eeprom_addr = EEPROM_BASE_AF9035;
 	}
 
+
+	/* check for dual tuner mode */
+	ret = af9035_rd_reg(d, state->eeprom_addr + EEPROM_TS_MODE, &tmp);
+	if (ret < 0)
+		goto err;
+
+	ts_mode_invalid = 0;
+	switch (tmp) {
+	case 0:
+		break;
+	case 1:
+	case 3:
+		state->dual_mode = true;
+		break;
+	case 5:
+		if (state->chip_type != 0x9135 && state->chip_type != 0x9306)
+			state->dual_mode = true;	/* AF9035 */
+		else
+			ts_mode_invalid = 1;
+		break;
+	default:
+		ts_mode_invalid = 1;
+	}
+
+	dev_dbg(&intf->dev, "ts mode=%d dual mode=%d\n", tmp, state->dual_mode);
+
+	if (ts_mode_invalid)
+		dev_info(&intf->dev, "ts mode=%d not supported, defaulting to single tuner mode!", tmp);
+
+
 	ret = af9035_ctrl_msg(d, &req);
 	if (ret < 0)
 		goto err;
 
-	dev_dbg(&d->udev->dev, "%s: reply=%*ph\n", __func__, 4, rbuf);
+	dev_dbg(&intf->dev, "reply=%*ph\n", 4, rbuf);
 	if (rbuf[0] || rbuf[1] || rbuf[2] || rbuf[3])
 		ret = WARM;
 	else
@@ -524,7 +574,7 @@ static int af9035_identify_state(struct dvb_usb_device *d, const char **name)
 	return ret;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -532,6 +582,7 @@ err:
 static int af9035_download_firmware_old(struct dvb_usb_device *d,
 		const struct firmware *fw)
 {
+	struct usb_interface *intf = d->intf;
 	int ret, i, j, len;
 	u8 wbuf[1];
 	struct usb_req req = { 0, 0, 0, NULL, 0, NULL };
@@ -562,14 +613,12 @@ static int af9035_download_firmware_old(struct dvb_usb_device *d,
 		hdr_checksum = fw->data[fw->size - i + 5] << 8;
 		hdr_checksum |= fw->data[fw->size - i + 6] << 0;
 
-		dev_dbg(&d->udev->dev,
-				"%s: core=%d addr=%04x data_len=%d checksum=%04x\n",
-				__func__, hdr_core, hdr_addr, hdr_data_len,
-				hdr_checksum);
+		dev_dbg(&intf->dev, "core=%d addr=%04x data_len=%d checksum=%04x\n",
+			hdr_core, hdr_addr, hdr_data_len, hdr_checksum);
 
 		if (((hdr_core != 1) && (hdr_core != 2)) ||
 				(hdr_data_len > i)) {
-			dev_dbg(&d->udev->dev, "%s: bad firmware\n", __func__);
+			dev_dbg(&intf->dev, "bad firmware\n");
 			break;
 		}
 
@@ -600,18 +649,17 @@ static int af9035_download_firmware_old(struct dvb_usb_device *d,
 
 		i -= hdr_data_len + HDR_SIZE;
 
-		dev_dbg(&d->udev->dev, "%s: data uploaded=%zu\n",
-				__func__, fw->size - i);
+		dev_dbg(&intf->dev, "data uploaded=%zu\n", fw->size - i);
 	}
 
 	/* print warn if firmware is bad, continue and see what happens */
 	if (i)
-		dev_warn(&d->udev->dev, "%s: bad firmware\n", KBUILD_MODNAME);
+		dev_warn(&intf->dev, "bad firmware\n");
 
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -619,6 +667,7 @@ err:
 static int af9035_download_firmware_new(struct dvb_usb_device *d,
 		const struct firmware *fw)
 {
+	struct usb_interface *intf = d->intf;
 	int ret, i, i_prev;
 	struct usb_req req_fw_dl = { CMD_FW_SCATTER_WR, 0, 0, NULL, 0, NULL };
 	#define HDR_SIZE 7
@@ -648,15 +697,14 @@ static int af9035_download_firmware_new(struct dvb_usb_device *d,
 			if (ret < 0)
 				goto err;
 
-			dev_dbg(&d->udev->dev, "%s: data uploaded=%d\n",
-					__func__, i);
+			dev_dbg(&intf->dev, "data uploaded=%d\n", i);
 		}
 	}
 
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -664,6 +712,7 @@ err:
 static int af9035_download_firmware(struct dvb_usb_device *d,
 		const struct firmware *fw)
 {
+	struct usb_interface *intf = d->intf;
 	struct state *state = d_to_priv(d);
 	int ret;
 	u8 wbuf[1];
@@ -672,7 +721,7 @@ static int af9035_download_firmware(struct dvb_usb_device *d,
 	struct usb_req req = { 0, 0, 0, NULL, 0, NULL };
 	struct usb_req req_fw_ver = { CMD_FW_QUERYINFO, 0, 1, wbuf, 4, rbuf };
 
-	dev_dbg(&d->udev->dev, "%s:\n", __func__);
+	dev_dbg(&intf->dev, "\n");
 
 	/*
 	 * In case of dual tuner configuration we need to do some extra
@@ -680,11 +729,7 @@ static int af9035_download_firmware(struct dvb_usb_device *d,
 	 * which is done by master demod.
 	 * Master feeds also clock and controls power via GPIO.
 	 */
-	ret = af9035_rd_reg(d, state->eeprom_addr + EEPROM_TS_MODE, &tmp);
-	if (ret < 0)
-		goto err;
-
-	if (tmp == 1 || tmp == 3 || tmp == 5) {
+	if (state->dual_mode) {
 		/* configure gpioh1, reset & power slave demod */
 		ret = af9035_wr_reg_mask(d, 0x00d8b0, 0x01, 0x01);
 		if (ret < 0)
@@ -752,25 +797,25 @@ static int af9035_download_firmware(struct dvb_usb_device *d,
 		goto err;
 
 	if (!(rbuf[0] || rbuf[1] || rbuf[2] || rbuf[3])) {
-		dev_err(&d->udev->dev, "%s: firmware did not run\n",
-				KBUILD_MODNAME);
+		dev_err(&intf->dev, "firmware did not run\n");
 		ret = -ENODEV;
 		goto err;
 	}
 
-	dev_info(&d->udev->dev, "%s: firmware version=%d.%d.%d.%d",
-			KBUILD_MODNAME, rbuf[0], rbuf[1], rbuf[2], rbuf[3]);
+	dev_info(&intf->dev, "firmware version=%d.%d.%d.%d",
+		 rbuf[0], rbuf[1], rbuf[2], rbuf[3]);
 
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
 
 static int af9035_read_config(struct dvb_usb_device *d)
 {
+	struct usb_interface *intf = d->intf;
 	struct state *state = d_to_priv(d);
 	int ret, i;
 	u8 tmp;
@@ -805,7 +850,7 @@ static int af9035_read_config(struct dvb_usb_device *d)
 			goto err;
 
 		if (tmp == 0x00) {
-			dev_dbg(&d->udev->dev, "%s: no eeprom\n", __func__);
+			dev_dbg(&intf->dev, "no eeprom\n");
 			goto skip_eeprom;
 		}
 	} else if (state->chip_type == 0x9306) {
@@ -817,18 +862,6 @@ static int af9035_read_config(struct dvb_usb_device *d)
 	}
 
 
-
-	/* check if there is dual tuners */
-	ret = af9035_rd_reg(d, state->eeprom_addr + EEPROM_TS_MODE, &tmp);
-	if (ret < 0)
-		goto err;
-
-	if (tmp == 1 || tmp == 3 || tmp == 5)
-		state->dual_mode = true;
-
-	dev_dbg(&d->udev->dev, "%s: ts mode=%d dual mode=%d\n", __func__,
-			tmp, state->dual_mode);
-
 	if (state->dual_mode) {
 		/* read 2nd demodulator I2C address */
 		ret = af9035_rd_reg(d,
@@ -840,8 +873,7 @@ static int af9035_read_config(struct dvb_usb_device *d)
 		if (tmp)
 			state->af9033_i2c_addr[1] = tmp;
 
-		dev_dbg(&d->udev->dev, "%s: 2nd demod I2C addr=%02x\n",
-				__func__, tmp);
+		dev_dbg(&intf->dev, "2nd demod I2C addr=%02x\n", tmp);
 	}
 
 	addr = state->eeprom_addr;
@@ -852,8 +884,7 @@ static int af9035_read_config(struct dvb_usb_device *d)
 		if (ret < 0)
 			goto err;
 
-		dev_dbg(&d->udev->dev, "%s: [%d]tuner=%02x\n",
-				__func__, i, tmp);
+		dev_dbg(&intf->dev, "[%d]tuner=%02x\n", i, tmp);
 
 		/* tuner sanity check */
 		if (state->chip_type == 0x9135) {
@@ -882,10 +913,8 @@ static int af9035_read_config(struct dvb_usb_device *d)
 		}
 
 		if (state->af9033_config[i].tuner != tmp) {
-			dev_info(&d->udev->dev,
-					"%s: [%d] overriding tuner from %02x to %02x\n",
-					KBUILD_MODNAME, i, tmp,
-					state->af9033_config[i].tuner);
+			dev_info(&intf->dev, "[%d] overriding tuner from %02x to %02x\n",
+				 i, tmp, state->af9033_config[i].tuner);
 		}
 
 		switch (state->af9033_config[i].tuner) {
@@ -905,9 +934,8 @@ static int af9035_read_config(struct dvb_usb_device *d)
 		case AF9033_TUNER_IT9135_62:
 			break;
 		default:
-			dev_warn(&d->udev->dev,
-					"%s: tuner id=%02x not supported, please report!",
-					KBUILD_MODNAME, tmp);
+			dev_warn(&intf->dev, "tuner id=%02x not supported, please report!",
+				 tmp);
 		}
 
 		/* disable dual mode if driver does not support it */
@@ -924,9 +952,7 @@ static int af9035_read_config(struct dvb_usb_device *d)
 				break;
 			default:
 				state->dual_mode = false;
-				dev_info(&d->udev->dev,
-						"%s: driver does not support 2nd tuner and will disable it",
-						KBUILD_MODNAME);
+				dev_info(&intf->dev, "driver does not support 2nd tuner and will disable it");
 		}
 
 		/* tuner IF frequency */
@@ -942,7 +968,7 @@ static int af9035_read_config(struct dvb_usb_device *d)
 
 		tmp16 |= tmp << 8;
 
-		dev_dbg(&d->udev->dev, "%s: [%d]IF=%d\n", __func__, i, tmp16);
+		dev_dbg(&intf->dev, "[%d]IF=%d\n", i, tmp16);
 
 		addr += 0x10; /* shift for the 2nd tuner params */
 	}
@@ -962,10 +988,24 @@ skip_eeprom:
 			state->af9033_config[i].clock = clock_lut_af9035[tmp];
 	}
 
+	state->no_read = false;
+	/* Some MXL5007T devices cannot properly handle tuner I2C read ops. */
+	if (state->af9033_config[0].tuner == AF9033_TUNER_MXL5007T &&
+		le16_to_cpu(d->udev->descriptor.idVendor) == USB_VID_AVERMEDIA)
+
+		switch (le16_to_cpu(d->udev->descriptor.idProduct)) {
+		case USB_PID_AVERMEDIA_A867:
+		case USB_PID_AVERMEDIA_TWINSTAR:
+			dev_info(&intf->dev,
+				 "Device may have issues with I2C read operations. Enabling fix.\n");
+			state->no_read = true;
+			break;
+		}
+
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -973,10 +1013,11 @@ err:
 static int af9035_tua9001_tuner_callback(struct dvb_usb_device *d,
 		int cmd, int arg)
 {
+	struct usb_interface *intf = d->intf;
 	int ret;
 	u8 val;
 
-	dev_dbg(&d->udev->dev, "%s: cmd=%d arg=%d\n", __func__, cmd, arg);
+	dev_dbg(&intf->dev, "cmd=%d arg=%d\n", cmd, arg);
 
 	/*
 	 * CEN     always enabled by hardware wiring
@@ -1010,7 +1051,7 @@ static int af9035_tua9001_tuner_callback(struct dvb_usb_device *d,
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1019,6 +1060,7 @@ err:
 static int af9035_fc0011_tuner_callback(struct dvb_usb_device *d,
 		int cmd, int arg)
 {
+	struct usb_interface *intf = d->intf;
 	int ret;
 
 	switch (cmd) {
@@ -1076,7 +1118,7 @@ static int af9035_fc0011_tuner_callback(struct dvb_usb_device *d,
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1102,9 +1144,10 @@ static int af9035_frontend_callback(void *adapter_priv, int component,
 {
 	struct i2c_adapter *adap = adapter_priv;
 	struct dvb_usb_device *d = i2c_get_adapdata(adap);
+	struct usb_interface *intf = d->intf;
 
-	dev_dbg(&d->udev->dev, "%s: component=%d cmd=%d arg=%d\n",
-			__func__, component, cmd, arg);
+	dev_dbg(&intf->dev, "component=%d cmd=%d arg=%d\n",
+		component, cmd, arg);
 
 	switch (component) {
 	case DVB_FRONTEND_COMPONENT_TUNER:
@@ -1127,9 +1170,10 @@ static int af9035_frontend_attach(struct dvb_usb_adapter *adap)
 {
 	struct state *state = adap_to_priv(adap);
 	struct dvb_usb_device *d = adap_to_d(adap);
+	struct usb_interface *intf = d->intf;
 	int ret;
 
-	dev_dbg(&d->udev->dev, "%s: adap->id=%d\n", __func__, adap->id);
+	dev_dbg(&intf->dev, "adap->id=%d\n", adap->id);
 
 	if (!state->af9033_config[adap->id].tuner) {
 		/* unsupported tuner */
@@ -1156,7 +1200,7 @@ static int af9035_frontend_attach(struct dvb_usb_adapter *adap)
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1165,11 +1209,12 @@ static int it930x_frontend_attach(struct dvb_usb_adapter *adap)
 {
 	struct state *state = adap_to_priv(adap);
 	struct dvb_usb_device *d = adap_to_d(adap);
+	struct usb_interface *intf = d->intf;
 	int ret;
 	struct si2168_config si2168_config;
 	struct i2c_adapter *adapter;
 
-	dev_dbg(&d->udev->dev, "adap->id=%d\n", adap->id);
+	dev_dbg(&intf->dev, "adap->id=%d\n", adap->id);
 
 	memset(&si2168_config, 0, sizeof(si2168_config));
 	si2168_config.i2c_adapter = &adapter;
@@ -1192,7 +1237,7 @@ static int it930x_frontend_attach(struct dvb_usb_adapter *adap)
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1201,9 +1246,10 @@ static int af9035_frontend_detach(struct dvb_usb_adapter *adap)
 {
 	struct state *state = adap_to_priv(adap);
 	struct dvb_usb_device *d = adap_to_d(adap);
+	struct usb_interface *intf = d->intf;
 	int demod2;
 
-	dev_dbg(&d->udev->dev, "%s: adap->id=%d\n", __func__, adap->id);
+	dev_dbg(&intf->dev, "adap->id=%d\n", adap->id);
 
 	/*
 	 * For dual tuner devices we have to resolve 2nd demod client, as there
@@ -1279,12 +1325,13 @@ static int af9035_tuner_attach(struct dvb_usb_adapter *adap)
 {
 	struct state *state = adap_to_priv(adap);
 	struct dvb_usb_device *d = adap_to_d(adap);
+	struct usb_interface *intf = d->intf;
 	int ret;
 	struct dvb_frontend *fe;
 	struct i2c_msg msg[1];
 	u8 tuner_addr;
 
-	dev_dbg(&d->udev->dev, "%s: adap->id=%d\n", __func__, adap->id);
+	dev_dbg(&intf->dev, "adap->id=%d\n", adap->id);
 
 	/*
 	 * XXX: Hack used in that function: we abuse unused I2C address bit [7]
@@ -1522,7 +1569,7 @@ static int af9035_tuner_attach(struct dvb_usb_adapter *adap)
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1531,10 +1578,11 @@ static int it930x_tuner_attach(struct dvb_usb_adapter *adap)
 {
 	struct state *state = adap_to_priv(adap);
 	struct dvb_usb_device *d = adap_to_d(adap);
+	struct usb_interface *intf = d->intf;
 	int ret;
 	struct si2157_config si2157_config;
 
-	dev_dbg(&d->udev->dev, "%s: adap->id=%d\n", __func__, adap->id);
+	dev_dbg(&intf->dev, "adap->id=%d\n", adap->id);
 
 	/* I2C master bus 2 clock speed 300k */
 	ret = af9035_wr_reg(d, 0x00f6a7, 0x07);
@@ -1590,7 +1638,7 @@ static int it930x_tuner_attach(struct dvb_usb_adapter *adap)
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1600,8 +1648,9 @@ static int it930x_tuner_detach(struct dvb_usb_adapter *adap)
 {
 	struct state *state = adap_to_priv(adap);
 	struct dvb_usb_device *d = adap_to_d(adap);
+	struct usb_interface *intf = d->intf;
 
-	dev_dbg(&d->udev->dev, "adap->id=%d\n", adap->id);
+	dev_dbg(&intf->dev, "adap->id=%d\n", adap->id);
 
 	if (adap->id == 1) {
 		if (state->i2c_client[3])
@@ -1619,8 +1668,9 @@ static int af9035_tuner_detach(struct dvb_usb_adapter *adap)
 {
 	struct state *state = adap_to_priv(adap);
 	struct dvb_usb_device *d = adap_to_d(adap);
+	struct usb_interface *intf = d->intf;
 
-	dev_dbg(&d->udev->dev, "%s: adap->id=%d\n", __func__, adap->id);
+	dev_dbg(&intf->dev, "adap->id=%d\n", adap->id);
 
 	switch (state->af9033_config[adap->id].tuner) {
 	case AF9033_TUNER_TUA9001:
@@ -1646,6 +1696,7 @@ static int af9035_tuner_detach(struct dvb_usb_adapter *adap)
 static int af9035_init(struct dvb_usb_device *d)
 {
 	struct state *state = d_to_priv(d);
+	struct usb_interface *intf = d->intf;
 	int ret, i;
 	u16 frame_size = (d->udev->speed == USB_SPEED_FULL ? 5 : 87) * 188 / 4;
 	u8 packet_size = (d->udev->speed == USB_SPEED_FULL ? 64 : 512) / 4;
@@ -1670,9 +1721,8 @@ static int af9035_init(struct dvb_usb_device *d)
 		{ 0x80f9a4, 0x00, 0x01 },
 	};
 
-	dev_dbg(&d->udev->dev,
-			"%s: USB speed=%d frame_size=%04x packet_size=%02x\n",
-			__func__, d->udev->speed, frame_size, packet_size);
+	dev_dbg(&intf->dev, "USB speed=%d frame_size=%04x packet_size=%02x\n",
+		d->udev->speed, frame_size, packet_size);
 
 	/* init endpoints */
 	for (i = 0; i < ARRAY_SIZE(tab); i++) {
@@ -1685,7 +1735,7 @@ static int af9035_init(struct dvb_usb_device *d)
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1693,6 +1743,7 @@ err:
 static int it930x_init(struct dvb_usb_device *d)
 {
 	struct state *state = d_to_priv(d);
+	struct usb_interface *intf = d->intf;
 	int ret, i;
 	u16 frame_size = (d->udev->speed == USB_SPEED_FULL ? 5 : 816) * 188 / 4;
 	u8 packet_size = (d->udev->speed == USB_SPEED_FULL ? 64 : 512) / 4;
@@ -1752,9 +1803,8 @@ static int it930x_init(struct dvb_usb_device *d)
 		{ 0x00da5a, 0x1f, 0xff }, /* ts_fail_ignore */
 	};
 
-	dev_dbg(&d->udev->dev,
-			"%s: USB speed=%d frame_size=%04x packet_size=%02x\n",
-			__func__, d->udev->speed, frame_size, packet_size);
+	dev_dbg(&intf->dev, "USB speed=%d frame_size=%04x packet_size=%02x\n",
+		d->udev->speed, frame_size, packet_size);
 
 	/* init endpoints */
 	for (i = 0; i < ARRAY_SIZE(tab); i++) {
@@ -1767,7 +1817,7 @@ static int it930x_init(struct dvb_usb_device *d)
 
 	return 0;
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1776,6 +1826,7 @@ err:
 #if IS_ENABLED(CONFIG_RC_CORE)
 static int af9035_rc_query(struct dvb_usb_device *d)
 {
+	struct usb_interface *intf = d->intf;
 	int ret;
 	u32 key;
 	u8 buf[4];
@@ -1801,14 +1852,14 @@ static int af9035_rc_query(struct dvb_usb_device *d)
 					buf[2] << 8  | buf[3]);
 	}
 
-	dev_dbg(&d->udev->dev, "%s: %*ph\n", __func__, 4, buf);
+	dev_dbg(&intf->dev, "%*ph\n", 4, buf);
 
 	rc_keydown(d->rc_dev, RC_TYPE_NEC, key, 0);
 
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1816,6 +1867,7 @@ err:
 static int af9035_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc)
 {
 	struct state *state = d_to_priv(d);
+	struct usb_interface *intf = d->intf;
 	int ret;
 	u8 tmp;
 
@@ -1823,7 +1875,7 @@ static int af9035_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc)
 	if (ret < 0)
 		goto err;
 
-	dev_dbg(&d->udev->dev, "%s: ir_mode=%02x\n", __func__, tmp);
+	dev_dbg(&intf->dev, "ir_mode=%02x\n", tmp);
 
 	/* don't activate rc if in HID mode or if not available */
 	if (tmp == 5) {
@@ -1832,7 +1884,7 @@ static int af9035_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc)
 		if (ret < 0)
 			goto err;
 
-		dev_dbg(&d->udev->dev, "%s: ir_type=%02x\n", __func__, tmp);
+		dev_dbg(&intf->dev, "ir_type=%02x\n", tmp);
 
 		switch (tmp) {
 		case 0: /* NEC */
@@ -1855,7 +1907,7 @@ static int af9035_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc)
 	return 0;
 
 err:
-	dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret);
+	dev_dbg(&intf->dev, "failed=%d\n", ret);
 
 	return ret;
 }
@@ -1867,8 +1919,9 @@ static int af9035_get_stream_config(struct dvb_frontend *fe, u8 *ts_type,
 		struct usb_data_stream_properties *stream)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
+	struct usb_interface *intf = d->intf;
 
-	dev_dbg(&d->udev->dev, "%s: adap=%d\n", __func__, fe_to_adap(fe)->id);
+	dev_dbg(&intf->dev, "adap=%d\n", fe_to_adap(fe)->id);
 
 	if (d->udev->speed == USB_SPEED_FULL)
 		stream->u.bulk.buffersize = 5 * 188;
@@ -1920,7 +1973,7 @@ static int af9035_probe(struct usb_interface *intf,
 	if ((le16_to_cpu(udev->descriptor.idVendor) == USB_VID_TERRATEC) &&
 			(le16_to_cpu(udev->descriptor.idProduct) == 0x0099)) {
 		if (!strcmp("Afatech", manufacturer)) {
-			dev_dbg(&udev->dev, "%s: rejecting device\n", __func__);
+			dev_dbg(&udev->dev, "rejecting device\n");
 			return -ENODEV;
 		}
 	}
diff --git a/drivers/media/usb/dvb-usb-v2/af9035.h b/drivers/media/usb/dvb-usb-v2/af9035.h
index 89e629a24aec..1f83c9218ad0 100644
--- a/drivers/media/usb/dvb-usb-v2/af9035.h
+++ b/drivers/media/usb/dvb-usb-v2/af9035.h
@@ -62,6 +62,7 @@ struct state {
 	u8 chip_version;
 	u16 chip_type;
 	u8 dual_mode:1;
+	u8 no_read:1;
 	u16 eeprom_addr;
 	u8 af9033_i2c_addr[2];
 	struct af9033_config af9033_config[2];
@@ -112,7 +113,7 @@ static const u32 clock_lut_it9135[] = {
  * 0  TS
  * 1  DCA + PIP
  * 3  PIP
- * 5  DCA + PIP
+ * 5  DCA + PIP (AF9035 only)
  * n  DCA
  *
  * Values 0, 3 and 5 are seen to this day. 0 for single TS and 3/5 for dual TS.
diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
index eb7af8cb8aca..6643762a9ff7 100644
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -624,7 +624,7 @@ static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name)
 	dev_dbg(&d->intf->dev, "chip_id=%u\n", dev->chip_id);
 
 	/* Retry failed I2C messages */
-	d->i2c_adap.retries = 1;
+	d->i2c_adap.retries = 3;
 	d->i2c_adap.timeout = msecs_to_jiffies(10);
 
 	return WARM;
diff --git a/drivers/media/usb/dvb-usb/dtt200u.c b/drivers/media/usb/dvb-usb/dtt200u.c
index ca3b69aa9688..be633ece4194 100644
--- a/drivers/media/usb/dvb-usb/dtt200u.c
+++ b/drivers/media/usb/dvb-usb/dtt200u.c
@@ -55,36 +55,36 @@ static int dtt200u_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid,
 	return dvb_usb_generic_write(adap->dev, b_pid, 4);
 }
 
-/* remote control */
-/* key list for the tiny remote control (Yakumo, don't know about the others) */
-static struct rc_map_table rc_map_dtt200u_table[] = {
-	{ 0x8001, KEY_MUTE },
-	{ 0x8002, KEY_CHANNELDOWN },
-	{ 0x8003, KEY_VOLUMEDOWN },
-	{ 0x8004, KEY_1 },
-	{ 0x8005, KEY_2 },
-	{ 0x8006, KEY_3 },
-	{ 0x8007, KEY_4 },
-	{ 0x8008, KEY_5 },
-	{ 0x8009, KEY_6 },
-	{ 0x800a, KEY_7 },
-	{ 0x800c, KEY_ZOOM },
-	{ 0x800d, KEY_0 },
-	{ 0x800e, KEY_SELECT },
-	{ 0x8012, KEY_POWER },
-	{ 0x801a, KEY_CHANNELUP },
-	{ 0x801b, KEY_8 },
-	{ 0x801e, KEY_VOLUMEUP },
-	{ 0x801f, KEY_9 },
-};
-
-static int dtt200u_rc_query(struct dvb_usb_device *d, u32 *event, int *state)
+static int dtt200u_rc_query(struct dvb_usb_device *d)
 {
 	u8 key[5],cmd = GET_RC_CODE;
+	u32 scancode;
+
 	dvb_usb_generic_rw(d,&cmd,1,key,5,0);
-	dvb_usb_nec_rc_key_to_event(d,key,event,state);
+	if (key[0] == 1) {
+		scancode = key[1];
+		if ((u8) ~key[1] != key[2]) {
+			/* Extended NEC */
+			scancode = scancode << 8;
+			scancode |= key[2];
+		}
+		scancode = scancode << 8;
+		scancode |= key[3];
+
+		/* Check command checksum is ok */
+		if ((u8) ~key[3] == key[4])
+			rc_keydown(d->rc_dev, RC_TYPE_NEC, scancode, 0);
+		else
+			rc_keyup(d->rc_dev);
+	} else if (key[0] == 2) {
+		rc_repeat(d->rc_dev);
+	} else {
+		rc_keyup(d->rc_dev);
+	}
+
 	if (key[0] != 0)
 		deb_info("key: %*ph\n", 5, key);
+
 	return 0;
 }
 
@@ -164,11 +164,11 @@ static struct dvb_usb_device_properties dtt200u_properties = {
 	},
 	.power_ctrl      = dtt200u_power_ctrl,
 
-	.rc.legacy = {
+	.rc.core = {
 		.rc_interval     = 300,
-		.rc_map_table    = rc_map_dtt200u_table,
-		.rc_map_size     = ARRAY_SIZE(rc_map_dtt200u_table),
+		.rc_codes        = RC_MAP_DTT200U,
 		.rc_query        = dtt200u_rc_query,
+		.allowed_protos  = RC_BIT_NEC,
 	},
 
 	.generic_bulk_ctrl_endpoint = 0x01,
@@ -214,11 +214,11 @@ static struct dvb_usb_device_properties wt220u_properties = {
 	},
 	.power_ctrl      = dtt200u_power_ctrl,
 
-	.rc.legacy = {
+	.rc.core = {
 		.rc_interval     = 300,
-		.rc_map_table      = rc_map_dtt200u_table,
-		.rc_map_size = ARRAY_SIZE(rc_map_dtt200u_table),
+		.rc_codes        = RC_MAP_DTT200U,
 		.rc_query        = dtt200u_rc_query,
+		.allowed_protos  = RC_BIT_NEC,
 	},
 
 	.generic_bulk_ctrl_endpoint = 0x01,
@@ -264,11 +264,11 @@ static struct dvb_usb_device_properties wt220u_fc_properties = {
 	},
 	.power_ctrl      = dtt200u_power_ctrl,
 
-	.rc.legacy = {
+	.rc.core = {
 		.rc_interval     = 300,
-		.rc_map_table    = rc_map_dtt200u_table,
-		.rc_map_size     = ARRAY_SIZE(rc_map_dtt200u_table),
+		.rc_codes        = RC_MAP_DTT200U,
 		.rc_query        = dtt200u_rc_query,
+		.allowed_protos  = RC_BIT_NEC,
 	},
 
 	.generic_bulk_ctrl_endpoint = 0x01,
@@ -314,11 +314,11 @@ static struct dvb_usb_device_properties wt220u_zl0353_properties = {
 	},
 	.power_ctrl      = dtt200u_power_ctrl,
 
-	.rc.legacy = {
+	.rc.core = {
 		.rc_interval     = 300,
-		.rc_map_table    = rc_map_dtt200u_table,
-		.rc_map_size     = ARRAY_SIZE(rc_map_dtt200u_table),
+		.rc_codes        = RC_MAP_DTT200U,
 		.rc_query        = dtt200u_rc_query,
+		.allowed_protos  = RC_BIT_NEC,
 	},
 
 	.generic_bulk_ctrl_endpoint = 0x01,
diff --git a/drivers/media/usb/dvb-usb/dvb-usb-dvb.c b/drivers/media/usb/dvb-usb/dvb-usb-dvb.c
index 6477b04e95c7..a04c0a250625 100644
--- a/drivers/media/usb/dvb-usb/dvb-usb-dvb.c
+++ b/drivers/media/usb/dvb-usb/dvb-usb-dvb.c
@@ -320,8 +320,6 @@ int dvb_usb_adapter_frontend_init(struct dvb_usb_adapter *adap)
 
 		adap->num_frontends_initialized++;
 	}
-	if (ret)
-		return ret;
 
 	ret = dvb_create_media_graph(&adap->dvb_adap, true);
 	if (ret)
diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c
index 49b55d7069b1..5fb0c650926e 100644
--- a/drivers/media/usb/dvb-usb/dw2102.c
+++ b/drivers/media/usb/dvb-usb/dw2102.c
@@ -847,7 +847,7 @@ static int su3000_power_ctrl(struct dvb_usb_device *d, int i)
 	struct dw2102_state *state = (struct dw2102_state *)d->priv;
 	u8 obuf[] = {0xde, 0};
 
-	info("%s: %d, initialized %d\n", __func__, i, state->initialized);
+	info("%s: %d, initialized %d", __func__, i, state->initialized);
 
 	if (i && !state->initialized) {
 		state->initialized = 1;
@@ -894,7 +894,7 @@ static int su3000_identify_state(struct usb_device *udev,
 				 struct dvb_usb_device_description **desc,
 				 int *cold)
 {
-	info("%s\n", __func__);
+	info("%s", __func__);
 
 	*cold = 0;
 	return 0;
@@ -1132,7 +1132,7 @@ static int dw2104_frontend_attach(struct dvb_usb_adapter *d)
 				tuner_ops->set_bandwidth = stb6100_set_bandw;
 				tuner_ops->get_bandwidth = stb6100_get_bandw;
 				d->fe_adap[0].fe->ops.set_voltage = dw210x_set_voltage;
-				info("Attached STV0900+STB6100!\n");
+				info("Attached STV0900+STB6100!");
 				return 0;
 			}
 		}
@@ -1146,7 +1146,7 @@ static int dw2104_frontend_attach(struct dvb_usb_adapter *d)
 					&dw2104_stv6110_config,
 					&d->dev->i2c_adap)) {
 				d->fe_adap[0].fe->ops.set_voltage = dw210x_set_voltage;
-				info("Attached STV0900+STV6110A!\n");
+				info("Attached STV0900+STV6110A!");
 				return 0;
 			}
 		}
@@ -1157,7 +1157,7 @@ static int dw2104_frontend_attach(struct dvb_usb_adapter *d)
 				&d->dev->i2c_adap);
 		if (d->fe_adap[0].fe != NULL) {
 			d->fe_adap[0].fe->ops.set_voltage = dw210x_set_voltage;
-			info("Attached cx24116!\n");
+			info("Attached cx24116!");
 			return 0;
 		}
 	}
@@ -1168,7 +1168,7 @@ static int dw2104_frontend_attach(struct dvb_usb_adapter *d)
 		dvb_attach(ts2020_attach, d->fe_adap[0].fe,
 			&dw2104_ts2020_config, &d->dev->i2c_adap);
 		d->fe_adap[0].fe->ops.set_voltage = dw210x_set_voltage;
-		info("Attached DS3000!\n");
+		info("Attached DS3000!");
 		return 0;
 	}
 
@@ -1187,7 +1187,7 @@ static int dw2102_frontend_attach(struct dvb_usb_adapter *d)
 					&d->dev->i2c_adap);
 		if (d->fe_adap[0].fe != NULL) {
 			d->fe_adap[0].fe->ops.set_voltage = dw210x_set_voltage;
-			info("Attached si21xx!\n");
+			info("Attached si21xx!");
 			return 0;
 		}
 	}
@@ -1199,7 +1199,7 @@ static int dw2102_frontend_attach(struct dvb_usb_adapter *d)
 			if (dvb_attach(stb6000_attach, d->fe_adap[0].fe, 0x61,
 					&d->dev->i2c_adap)) {
 				d->fe_adap[0].fe->ops.set_voltage = dw210x_set_voltage;
-				info("Attached stv0288!\n");
+				info("Attached stv0288!");
 				return 0;
 			}
 		}
@@ -1211,7 +1211,7 @@ static int dw2102_frontend_attach(struct dvb_usb_adapter *d)
 					&d->dev->i2c_adap);
 		if (d->fe_adap[0].fe != NULL) {
 			d->fe_adap[0].fe->ops.set_voltage = dw210x_set_voltage;
-			info("Attached stv0299!\n");
+			info("Attached stv0299!");
 			return 0;
 		}
 	}
@@ -1223,7 +1223,7 @@ static int dw3101_frontend_attach(struct dvb_usb_adapter *d)
 	d->fe_adap[0].fe = dvb_attach(tda10023_attach, &dw3101_tda10023_config,
 				&d->dev->i2c_adap, 0x48);
 	if (d->fe_adap[0].fe != NULL) {
-		info("Attached tda10023!\n");
+		info("Attached tda10023!");
 		return 0;
 	}
 	return -EIO;
@@ -1237,7 +1237,7 @@ static int zl100313_frontend_attach(struct dvb_usb_adapter *d)
 		if (dvb_attach(zl10039_attach, d->fe_adap[0].fe, 0x60,
 				&d->dev->i2c_adap)) {
 			d->fe_adap[0].fe->ops.set_voltage = dw210x_set_voltage;
-			info("Attached zl100313+zl10039!\n");
+			info("Attached zl100313+zl10039!");
 			return 0;
 		}
 	}
@@ -1262,7 +1262,7 @@ static int stv0288_frontend_attach(struct dvb_usb_adapter *d)
 
 	dw210x_op_rw(d->dev->udev, 0x8a, 0, 0, obuf, 2, DW210X_WRITE_MSG);
 
-	info("Attached stv0288+stb6000!\n");
+	info("Attached stv0288+stb6000!");
 
 	return 0;
 
@@ -1287,7 +1287,7 @@ static int ds3000_frontend_attach(struct dvb_usb_adapter *d)
 
 	dw210x_op_rw(d->dev->udev, 0x8a, 0, 0, obuf, 2, DW210X_WRITE_MSG);
 
-	info("Attached ds3000+ts2020!\n");
+	info("Attached ds3000+ts2020!");
 
 	return 0;
 }
@@ -1305,7 +1305,7 @@ static int prof_7500_frontend_attach(struct dvb_usb_adapter *d)
 
 	dw210x_op_rw(d->dev->udev, 0x8a, 0, 0, obuf, 2, DW210X_WRITE_MSG);
 
-	info("Attached STV0900+STB6100A!\n");
+	info("Attached STV0900+STB6100A!");
 
 	return 0;
 }
@@ -1353,11 +1353,11 @@ static int su3000_frontend_attach(struct dvb_usb_adapter *d)
 	if (dvb_attach(ts2020_attach, d->fe_adap[0].fe,
 				&dw2104_ts2020_config,
 				&d->dev->i2c_adap)) {
-		info("Attached DS3000/TS2020!\n");
+		info("Attached DS3000/TS2020!");
 		return 0;
 	}
 
-	info("Failed to attach DS3000/TS2020!\n");
+	info("Failed to attach DS3000/TS2020!");
 	return -EIO;
 }
 
@@ -1402,12 +1402,12 @@ static int t220_frontend_attach(struct dvb_usb_adapter *d)
 	if (d->fe_adap[0].fe != NULL) {
 		if (dvb_attach(tda18271_attach, d->fe_adap[0].fe, 0x60,
 					&d->dev->i2c_adap, &tda18271_config)) {
-			info("Attached TDA18271HD/CXD2820R!\n");
+			info("Attached TDA18271HD/CXD2820R!");
 			return 0;
 		}
 	}
 
-	info("Failed to attach TDA18271HD/CXD2820R!\n");
+	info("Failed to attach TDA18271HD/CXD2820R!");
 	return -EIO;
 }
 
@@ -1428,11 +1428,11 @@ static int m88rs2000_frontend_attach(struct dvb_usb_adapter *d)
 	if (dvb_attach(ts2020_attach, d->fe_adap[0].fe,
 				&dw2104_ts2020_config,
 				&d->dev->i2c_adap)) {
-		info("Attached RS2000/TS2020!\n");
+		info("Attached RS2000/TS2020!");
 		return 0;
 	}
 
-	info("Failed to attach RS2000/TS2020!\n");
+	info("Failed to attach RS2000/TS2020!");
 	return -EIO;
 }
 
@@ -1641,6 +1641,7 @@ enum dw2102_table_entry {
 	TEVII_S421,
 	TEVII_S632,
 	TERRATEC_CINERGY_S2_R2,
+	TERRATEC_CINERGY_S2_R3,
 	GOTVIEW_SAT_HD,
 	GENIATECH_T220,
 	TECHNOTREND_S2_4600,
@@ -1669,6 +1670,7 @@ static struct usb_device_id dw2102_table[] = {
 	[TEVII_S421] = {USB_DEVICE(0x9022, USB_PID_TEVII_S421)},
 	[TEVII_S632] = {USB_DEVICE(0x9022, USB_PID_TEVII_S632)},
 	[TERRATEC_CINERGY_S2_R2] = {USB_DEVICE(USB_VID_TERRATEC, USB_PID_TERRATEC_CINERGY_S2_R2)},
+	[TERRATEC_CINERGY_S2_R3] = {USB_DEVICE(USB_VID_TERRATEC, USB_PID_TERRATEC_CINERGY_S2_R3)},
 	[GOTVIEW_SAT_HD] = {USB_DEVICE(0x1FE1, USB_PID_GOTVIEW_SAT_HD)},
 	[GENIATECH_T220] = {USB_DEVICE(0x1f4d, 0xD220)},
 	[TECHNOTREND_S2_4600] = {USB_DEVICE(USB_VID_TECHNOTREND,
@@ -2083,7 +2085,7 @@ static struct dvb_usb_device_properties su3000_properties = {
 		}},
 		}
 	},
-	.num_device_descs = 5,
+	.num_device_descs = 6,
 	.devices = {
 		{ "SU3000HD DVB-S USB2.0",
 			{ &dw2102_table[GENIATECH_SU3000], NULL },
@@ -2101,6 +2103,10 @@ static struct dvb_usb_device_properties su3000_properties = {
 			{ &dw2102_table[TERRATEC_CINERGY_S2_R2], NULL },
 			{ NULL },
 		},
+		{ "Terratec Cinergy S2 USB HD Rev.3",
+			{ &dw2102_table[TERRATEC_CINERGY_S2_R3], NULL },
+			{ NULL },
+		},
 		{ "GOTVIEW Satellite HD",
 			{ &dw2102_table[GOTVIEW_SAT_HD], NULL },
 			{ NULL },
diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c
index 1a5c01202f73..8cedef0daae4 100644
--- a/drivers/media/usb/em28xx/em28xx-dvb.c
+++ b/drivers/media/usb/em28xx/em28xx-dvb.c
@@ -904,17 +904,6 @@ static struct tda18271_config c3tech_duo_tda18271_config = {
 	.small_i2c = TDA18271_03_BYTE_CHUNK_INIT,
 };
 
-static const struct m88ds3103_config pctv_461e_m88ds3103_config = {
-	.i2c_addr = 0x68,
-	.clock = 27000000,
-	.i2c_wr_max = 33,
-	.clock_out = 0,
-	.ts_mode = M88DS3103_TS_PARALLEL,
-	.ts_clk = 16000,
-	.ts_clk_pol = 1,
-	.agc = 0x99,
-};
-
 static struct tda18271_std_map drx_j_std_map = {
 	.atsc_6   = { .if_freq = 5000, .agc_mode = 3, .std = 0, .if_lvl = 1,
 		      .rfagc_top = 0x37, },
diff --git a/drivers/media/usb/em28xx/em28xx-i2c.c b/drivers/media/usb/em28xx/em28xx-i2c.c
index a19b5c8b56ff..1a9e1e556706 100644
--- a/drivers/media/usb/em28xx/em28xx-i2c.c
+++ b/drivers/media/usb/em28xx/em28xx-i2c.c
@@ -507,9 +507,8 @@ static int em28xx_i2c_xfer(struct i2c_adapter *i2c_adap,
 	if (dev->disconnected)
 		return -ENODEV;
 
-	rc = rt_mutex_trylock(&dev->i2c_bus_lock);
-	if (rc < 0)
-		return rc;
+	if (!rt_mutex_trylock(&dev->i2c_bus_lock))
+		return -EAGAIN;
 
 	/* Switch I2C bus if needed */
 	if (bus != dev->cur_i2c_bus &&
diff --git a/drivers/media/usb/em28xx/em28xx-vbi.c b/drivers/media/usb/em28xx/em28xx-vbi.c
index fe94c9225dd7..836c6b53b16c 100644
--- a/drivers/media/usb/em28xx/em28xx-vbi.c
+++ b/drivers/media/usb/em28xx/em28xx-vbi.c
@@ -33,7 +33,7 @@
 
 static int vbi_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct em28xx *dev = vb2_get_drv_priv(vq);
 	struct em28xx_v4l2 *v4l2 = dev->v4l2;
diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c
index 44834b2eff55..7968695217f3 100644
--- a/drivers/media/usb/em28xx/em28xx-video.c
+++ b/drivers/media/usb/em28xx/em28xx-video.c
@@ -1013,7 +1013,7 @@ static void em28xx_v4l2_create_entities(struct em28xx *dev)
 
 static int queue_setup(struct vb2_queue *vq,
 		       unsigned int *nbuffers, unsigned int *nplanes,
-		       unsigned int sizes[], void *alloc_ctxs[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct em28xx *dev = vb2_get_drv_priv(vq);
 	struct em28xx_v4l2 *v4l2 = dev->v4l2;
diff --git a/drivers/media/usb/go7007/go7007-v4l2.c b/drivers/media/usb/go7007/go7007-v4l2.c
index ea01ee5df60a..af8458996d91 100644
--- a/drivers/media/usb/go7007/go7007-v4l2.c
+++ b/drivers/media/usb/go7007/go7007-v4l2.c
@@ -370,7 +370,7 @@ static int vidioc_s_fmt_vid_cap(struct file *file, void *priv,
 
 static int go7007_queue_setup(struct vb2_queue *q,
 		unsigned int *num_buffers, unsigned int *num_planes,
-		unsigned int sizes[], void *alloc_ctxs[])
+		unsigned int sizes[], struct device *alloc_devs[])
 {
 	sizes[0] = GO7007_BUF_SIZE;
 	*num_planes = 1;
diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c
index f23df4a9d8c5..52b88e9e656b 100644
--- a/drivers/media/usb/gspca/cpia1.c
+++ b/drivers/media/usb/gspca/cpia1.c
@@ -1624,7 +1624,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
 
 static void sd_stopN(struct gspca_dev *gspca_dev)
 {
-	struct sd *sd = (struct sd *) gspca_dev;
+	struct sd *sd __maybe_unused = (struct sd *) gspca_dev;
 
 	command_pause(gspca_dev);
 
diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c
index af5cd8213e8b..b17bd7ebcb47 100644
--- a/drivers/media/usb/gspca/gspca.c
+++ b/drivers/media/usb/gspca/gspca.c
@@ -522,7 +522,7 @@ static int frame_alloc(struct gspca_dev *gspca_dev, struct file *file,
 		frame = &gspca_dev->frame[i];
 		frame->v4l2_buf.index = i;
 		frame->v4l2_buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-		frame->v4l2_buf.flags = 0;
+		frame->v4l2_buf.flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		frame->v4l2_buf.field = V4L2_FIELD_NONE;
 		frame->v4l2_buf.length = frsz;
 		frame->v4l2_buf.memory = memory;
@@ -705,7 +705,7 @@ static int build_isoc_ep_tb(struct gspca_dev *gspca_dev,
 			psize = (psize & 0x07ff) * (1 + ((psize >> 11) & 3));
 			bandwidth = psize * 1000;
 			if (gspca_dev->dev->speed == USB_SPEED_HIGH
-			 || gspca_dev->dev->speed == USB_SPEED_SUPER)
+			 || gspca_dev->dev->speed >= USB_SPEED_SUPER)
 				bandwidth *= 8;
 			bandwidth /= 1 << (ep->desc.bInterval - 1);
 			if (bandwidth <= last_bw)
@@ -996,6 +996,19 @@ static int wxh_to_mode(struct gspca_dev *gspca_dev,
 {
 	int i;
 
+	for (i = 0; i < gspca_dev->cam.nmodes; i++) {
+		if (width == gspca_dev->cam.cam_mode[i].width
+		    && height == gspca_dev->cam.cam_mode[i].height)
+			return i;
+	}
+	return -EINVAL;
+}
+
+static int wxh_to_nearest_mode(struct gspca_dev *gspca_dev,
+			int width, int height)
+{
+	int i;
+
 	for (i = gspca_dev->cam.nmodes; --i > 0; ) {
 		if (width >= gspca_dev->cam.cam_mode[i].width
 		    && height >= gspca_dev->cam.cam_mode[i].height)
@@ -1125,8 +1138,8 @@ static int try_fmt_vid_cap(struct gspca_dev *gspca_dev,
 	PDEBUG_MODE(gspca_dev, D_CONF, "try fmt cap",
 		    fmt->fmt.pix.pixelformat, w, h);
 
-	/* search the closest mode for width and height */
-	mode = wxh_to_mode(gspca_dev, w, h);
+	/* search the nearest mode for width and height */
+	mode = wxh_to_nearest_mode(gspca_dev, w, h);
 
 	/* OK if right palette */
 	if (gspca_dev->cam.cam_mode[mode].pixelformat
@@ -1233,9 +1246,13 @@ static int vidioc_enum_frameintervals(struct file *filp, void *priv,
 				      struct v4l2_frmivalenum *fival)
 {
 	struct gspca_dev *gspca_dev = video_drvdata(filp);
-	int mode = wxh_to_mode(gspca_dev, fival->width, fival->height);
+	int mode;
 	__u32 i;
 
+	mode = wxh_to_mode(gspca_dev, fival->width, fival->height);
+	if (mode < 0)
+		return -EINVAL;
+
 	if (gspca_dev->cam.mode_framerates == NULL ||
 			gspca_dev->cam.mode_framerates[mode].nrates == 0)
 		return -EINVAL;
@@ -1246,7 +1263,7 @@ static int vidioc_enum_frameintervals(struct file *filp, void *priv,
 
 	for (i = 0; i < gspca_dev->cam.mode_framerates[mode].nrates; i++) {
 		if (fival->index == i) {
-			fival->type = V4L2_FRMSIZE_TYPE_DISCRETE;
+			fival->type = V4L2_FRMIVAL_TYPE_DISCRETE;
 			fival->discrete.numerator = 1;
 			fival->discrete.denominator =
 				gspca_dev->cam.mode_framerates[mode].rates[i];
diff --git a/drivers/media/usb/gspca/konica.c b/drivers/media/usb/gspca/konica.c
index 39c96bb4c985..0712b1bc90b4 100644
--- a/drivers/media/usb/gspca/konica.c
+++ b/drivers/media/usb/gspca/konica.c
@@ -243,7 +243,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
 
 static void sd_stopN(struct gspca_dev *gspca_dev)
 {
-	struct sd *sd = (struct sd *) gspca_dev;
+	struct sd *sd __maybe_unused = (struct sd *) gspca_dev;
 
 	konica_stream_off(gspca_dev);
 #if IS_ENABLED(CONFIG_INPUT)
diff --git a/drivers/media/usb/gspca/m5602/m5602_bridge.h b/drivers/media/usb/gspca/m5602/m5602_bridge.h
index 19eb1a64f9d6..43ebc03d844d 100644
--- a/drivers/media/usb/gspca/m5602/m5602_bridge.h
+++ b/drivers/media/usb/gspca/m5602/m5602_bridge.h
@@ -115,21 +115,6 @@
 
 /*****************************************************************************/
 
-/* A skeleton used for sending messages to the m5602 bridge */
-static const unsigned char bridge_urb_skeleton[] = {
-	0x13, 0x00, 0x81, 0x00
-};
-
-/* A skeleton used for sending messages to the sensor */
-static const unsigned char sensor_urb_skeleton[] = {
-	0x23, M5602_XB_GPIO_EN_H, 0x81, 0x06,
-	0x23, M5602_XB_MISC_CTRL, 0x81, 0x80,
-	0x13, M5602_XB_I2C_DEV_ADDR, 0x81, 0x00,
-	0x13, M5602_XB_I2C_REG_ADDR, 0x81, 0x00,
-	0x13, M5602_XB_I2C_DATA, 0x81, 0x00,
-	0x13, M5602_XB_I2C_CTRL, 0x81, 0x11
-};
-
 struct sd {
 	struct gspca_dev gspca_dev;
 
diff --git a/drivers/media/usb/gspca/m5602/m5602_core.c b/drivers/media/usb/gspca/m5602/m5602_core.c
index d926e62cb80b..e4a0658e3f83 100644
--- a/drivers/media/usb/gspca/m5602/m5602_core.c
+++ b/drivers/media/usb/gspca/m5602/m5602_core.c
@@ -37,6 +37,21 @@ static const struct usb_device_id m5602_table[] = {
 
 MODULE_DEVICE_TABLE(usb, m5602_table);
 
+/* A skeleton used for sending messages to the sensor */
+static const unsigned char sensor_urb_skeleton[] = {
+	0x23, M5602_XB_GPIO_EN_H, 0x81, 0x06,
+	0x23, M5602_XB_MISC_CTRL, 0x81, 0x80,
+	0x13, M5602_XB_I2C_DEV_ADDR, 0x81, 0x00,
+	0x13, M5602_XB_I2C_REG_ADDR, 0x81, 0x00,
+	0x13, M5602_XB_I2C_DATA, 0x81, 0x00,
+	0x13, M5602_XB_I2C_CTRL, 0x81, 0x11
+};
+
+/* A skeleton used for sending messages to the m5602 bridge */
+static const unsigned char bridge_urb_skeleton[] = {
+	0x13, 0x00, 0x81, 0x00
+};
+
 /* Reads a byte from the m5602 */
 int m5602_read_bridge(struct sd *sd, const u8 address, u8 *i2c_data)
 {
diff --git a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
index 27fcef11aef4..7d01ddd7ed01 100644
--- a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
+++ b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
@@ -23,6 +23,150 @@
 static int mt9m111_s_ctrl(struct v4l2_ctrl *ctrl);
 static void mt9m111_dump_registers(struct sd *sd);
 
+static const unsigned char preinit_mt9m111[][4] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
+
+	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00},
+	{SENSOR, MT9M111_SC_RESET,
+		MT9M111_RESET |
+		MT9M111_RESTART |
+		MT9M111_ANALOG_STANDBY |
+		MT9M111_CHIP_DISABLE,
+		MT9M111_SHOW_BAD_FRAMES |
+		MT9M111_RESTART_BAD_FRAMES |
+		MT9M111_SYNCHRONIZE_CHANGES},
+
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3e, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3e, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x02, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
+
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x07, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x0b, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
+
+	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a, 0x00}
+};
+
+static const unsigned char init_mt9m111[][4] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
+
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3e, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x02, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x07, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x0b, 0x00},
+	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a, 0x00},
+
+	{SENSOR, MT9M111_SC_RESET, 0x00, 0x29},
+	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00},
+	{SENSOR, MT9M111_SC_RESET, 0x00, 0x08},
+	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x01},
+	{SENSOR, MT9M111_CP_OPERATING_MODE_CTL, 0x00,
+			MT9M111_CP_OPERATING_MODE_CTL},
+	{SENSOR, MT9M111_CP_LENS_CORRECTION_1, 0x04, 0x2a},
+	{SENSOR, MT9M111_CP_DEFECT_CORR_CONTEXT_A, 0x00,
+				MT9M111_2D_DEFECT_CORRECTION_ENABLE},
+	{SENSOR, MT9M111_CP_DEFECT_CORR_CONTEXT_B, 0x00,
+				MT9M111_2D_DEFECT_CORRECTION_ENABLE},
+	{SENSOR, MT9M111_CP_LUMA_OFFSET, 0x00, 0x00},
+	{SENSOR, MT9M111_CP_LUMA_CLIP, 0xff, 0x00},
+	{SENSOR, MT9M111_CP_OUTPUT_FORMAT_CTL2_CONTEXT_A, 0x14, 0x00},
+	{SENSOR, MT9M111_CP_OUTPUT_FORMAT_CTL2_CONTEXT_B, 0x14, 0x00},
+	{SENSOR, 0xcd, 0x00, 0x0e},
+	{SENSOR, 0xd0, 0x00, 0x40},
+
+	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x02},
+	{SENSOR, MT9M111_CC_AUTO_EXPOSURE_PARAMETER_18, 0x00, 0x00},
+	{SENSOR, MT9M111_CC_AWB_PARAMETER_7, 0xef, 0x03},
+
+	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00},
+	{SENSOR, 0x33, 0x03, 0x49},
+	{SENSOR, 0x34, 0xc0, 0x19},
+	{SENSOR, 0x3f, 0x20, 0x20},
+	{SENSOR, 0x40, 0x20, 0x20},
+	{SENSOR, 0x5a, 0xc0, 0x0a},
+	{SENSOR, 0x70, 0x7b, 0x0a},
+	{SENSOR, 0x71, 0xff, 0x00},
+	{SENSOR, 0x72, 0x19, 0x0e},
+	{SENSOR, 0x73, 0x18, 0x0f},
+	{SENSOR, 0x74, 0x57, 0x32},
+	{SENSOR, 0x75, 0x56, 0x34},
+	{SENSOR, 0x76, 0x73, 0x35},
+	{SENSOR, 0x77, 0x30, 0x12},
+	{SENSOR, 0x78, 0x79, 0x02},
+	{SENSOR, 0x79, 0x75, 0x06},
+	{SENSOR, 0x7a, 0x77, 0x0a},
+	{SENSOR, 0x7b, 0x78, 0x09},
+	{SENSOR, 0x7c, 0x7d, 0x06},
+	{SENSOR, 0x7d, 0x31, 0x10},
+	{SENSOR, 0x7e, 0x00, 0x7e},
+	{SENSOR, 0x80, 0x59, 0x04},
+	{SENSOR, 0x81, 0x59, 0x04},
+	{SENSOR, 0x82, 0x57, 0x0a},
+	{SENSOR, 0x83, 0x58, 0x0b},
+	{SENSOR, 0x84, 0x47, 0x0c},
+	{SENSOR, 0x85, 0x48, 0x0e},
+	{SENSOR, 0x86, 0x5b, 0x02},
+	{SENSOR, 0x87, 0x00, 0x5c},
+	{SENSOR, MT9M111_CONTEXT_CONTROL, 0x00, MT9M111_SEL_CONTEXT_B},
+	{SENSOR, 0x60, 0x00, 0x80},
+	{SENSOR, 0x61, 0x00, 0x00},
+	{SENSOR, 0x62, 0x00, 0x00},
+	{SENSOR, 0x63, 0x00, 0x00},
+	{SENSOR, 0x64, 0x00, 0x00},
+
+	{SENSOR, MT9M111_SC_ROWSTART, 0x00, 0x0d}, /* 13 */
+	{SENSOR, MT9M111_SC_COLSTART, 0x00, 0x12}, /* 18 */
+	{SENSOR, MT9M111_SC_WINDOW_HEIGHT, 0x04, 0x00}, /* 1024 */
+	{SENSOR, MT9M111_SC_WINDOW_WIDTH, 0x05, 0x10}, /* 1296 */
+	{SENSOR, MT9M111_SC_HBLANK_CONTEXT_B, 0x01, 0x60}, /* 352 */
+	{SENSOR, MT9M111_SC_VBLANK_CONTEXT_B, 0x00, 0x11}, /* 17 */
+	{SENSOR, MT9M111_SC_HBLANK_CONTEXT_A, 0x01, 0x60}, /* 352 */
+	{SENSOR, MT9M111_SC_VBLANK_CONTEXT_A, 0x00, 0x11}, /* 17 */
+	{SENSOR, MT9M111_SC_R_MODE_CONTEXT_A, 0x01, 0x0f}, /* 271 */
+	{SENSOR, 0x30, 0x04, 0x00},
+	/* Set number of blank rows chosen to 400 */
+	{SENSOR, MT9M111_SC_SHUTTER_WIDTH, 0x01, 0x90},
+};
+
+static const unsigned char start_mt9m111[][4] = {
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
+	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00},
+	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+};
+
 static struct v4l2_pix_format mt9m111_modes[] = {
 	{
 		640,
diff --git a/drivers/media/usb/gspca/m5602/m5602_mt9m111.h b/drivers/media/usb/gspca/m5602/m5602_mt9m111.h
index 07448d35e3cd..781a16311822 100644
--- a/drivers/media/usb/gspca/m5602/m5602_mt9m111.h
+++ b/drivers/media/usb/gspca/m5602/m5602_mt9m111.h
@@ -126,148 +126,4 @@ static const struct m5602_sensor mt9m111 = {
 	.disconnect = mt9m111_disconnect,
 	.start = mt9m111_start,
 };
-
-static const unsigned char preinit_mt9m111[][4] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
-
-	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00},
-	{SENSOR, MT9M111_SC_RESET,
-		MT9M111_RESET |
-		MT9M111_RESTART |
-		MT9M111_ANALOG_STANDBY |
-		MT9M111_CHIP_DISABLE,
-		MT9M111_SHOW_BAD_FRAMES |
-		MT9M111_RESTART_BAD_FRAMES |
-		MT9M111_SYNCHRONIZE_CHANGES},
-
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3e, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3e, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x02, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
-
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x07, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x0b, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
-
-	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a, 0x00}
-};
-
-static const unsigned char init_mt9m111[][4] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
-
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3e, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x02, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x07, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x0b, 0x00},
-	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a, 0x00},
-
-	{SENSOR, MT9M111_SC_RESET, 0x00, 0x29},
-	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00},
-	{SENSOR, MT9M111_SC_RESET, 0x00, 0x08},
-	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x01},
-	{SENSOR, MT9M111_CP_OPERATING_MODE_CTL, 0x00,
-			MT9M111_CP_OPERATING_MODE_CTL},
-	{SENSOR, MT9M111_CP_LENS_CORRECTION_1, 0x04, 0x2a},
-	{SENSOR, MT9M111_CP_DEFECT_CORR_CONTEXT_A, 0x00,
-				MT9M111_2D_DEFECT_CORRECTION_ENABLE},
-	{SENSOR, MT9M111_CP_DEFECT_CORR_CONTEXT_B, 0x00,
-				MT9M111_2D_DEFECT_CORRECTION_ENABLE},
-	{SENSOR, MT9M111_CP_LUMA_OFFSET, 0x00, 0x00},
-	{SENSOR, MT9M111_CP_LUMA_CLIP, 0xff, 0x00},
-	{SENSOR, MT9M111_CP_OUTPUT_FORMAT_CTL2_CONTEXT_A, 0x14, 0x00},
-	{SENSOR, MT9M111_CP_OUTPUT_FORMAT_CTL2_CONTEXT_B, 0x14, 0x00},
-	{SENSOR, 0xcd, 0x00, 0x0e},
-	{SENSOR, 0xd0, 0x00, 0x40},
-
-	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x02},
-	{SENSOR, MT9M111_CC_AUTO_EXPOSURE_PARAMETER_18, 0x00, 0x00},
-	{SENSOR, MT9M111_CC_AWB_PARAMETER_7, 0xef, 0x03},
-
-	{SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00},
-	{SENSOR, 0x33, 0x03, 0x49},
-	{SENSOR, 0x34, 0xc0, 0x19},
-	{SENSOR, 0x3f, 0x20, 0x20},
-	{SENSOR, 0x40, 0x20, 0x20},
-	{SENSOR, 0x5a, 0xc0, 0x0a},
-	{SENSOR, 0x70, 0x7b, 0x0a},
-	{SENSOR, 0x71, 0xff, 0x00},
-	{SENSOR, 0x72, 0x19, 0x0e},
-	{SENSOR, 0x73, 0x18, 0x0f},
-	{SENSOR, 0x74, 0x57, 0x32},
-	{SENSOR, 0x75, 0x56, 0x34},
-	{SENSOR, 0x76, 0x73, 0x35},
-	{SENSOR, 0x77, 0x30, 0x12},
-	{SENSOR, 0x78, 0x79, 0x02},
-	{SENSOR, 0x79, 0x75, 0x06},
-	{SENSOR, 0x7a, 0x77, 0x0a},
-	{SENSOR, 0x7b, 0x78, 0x09},
-	{SENSOR, 0x7c, 0x7d, 0x06},
-	{SENSOR, 0x7d, 0x31, 0x10},
-	{SENSOR, 0x7e, 0x00, 0x7e},
-	{SENSOR, 0x80, 0x59, 0x04},
-	{SENSOR, 0x81, 0x59, 0x04},
-	{SENSOR, 0x82, 0x57, 0x0a},
-	{SENSOR, 0x83, 0x58, 0x0b},
-	{SENSOR, 0x84, 0x47, 0x0c},
-	{SENSOR, 0x85, 0x48, 0x0e},
-	{SENSOR, 0x86, 0x5b, 0x02},
-	{SENSOR, 0x87, 0x00, 0x5c},
-	{SENSOR, MT9M111_CONTEXT_CONTROL, 0x00, MT9M111_SEL_CONTEXT_B},
-	{SENSOR, 0x60, 0x00, 0x80},
-	{SENSOR, 0x61, 0x00, 0x00},
-	{SENSOR, 0x62, 0x00, 0x00},
-	{SENSOR, 0x63, 0x00, 0x00},
-	{SENSOR, 0x64, 0x00, 0x00},
-
-	{SENSOR, MT9M111_SC_ROWSTART, 0x00, 0x0d}, /* 13 */
-	{SENSOR, MT9M111_SC_COLSTART, 0x00, 0x12}, /* 18 */
-	{SENSOR, MT9M111_SC_WINDOW_HEIGHT, 0x04, 0x00}, /* 1024 */
-	{SENSOR, MT9M111_SC_WINDOW_WIDTH, 0x05, 0x10}, /* 1296 */
-	{SENSOR, MT9M111_SC_HBLANK_CONTEXT_B, 0x01, 0x60}, /* 352 */
-	{SENSOR, MT9M111_SC_VBLANK_CONTEXT_B, 0x00, 0x11}, /* 17 */
-	{SENSOR, MT9M111_SC_HBLANK_CONTEXT_A, 0x01, 0x60}, /* 352 */
-	{SENSOR, MT9M111_SC_VBLANK_CONTEXT_A, 0x00, 0x11}, /* 17 */
-	{SENSOR, MT9M111_SC_R_MODE_CONTEXT_A, 0x01, 0x0f}, /* 271 */
-	{SENSOR, 0x30, 0x04, 0x00},
-	/* Set number of blank rows chosen to 400 */
-	{SENSOR, MT9M111_SC_SHUTTER_WIDTH, 0x01, 0x90},
-};
-
-static const unsigned char start_mt9m111[][4] = {
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
-	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00},
-	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-};
 #endif
diff --git a/drivers/media/usb/gspca/m5602/m5602_ov7660.c b/drivers/media/usb/gspca/m5602/m5602_ov7660.c
index 64b3b03a9141..672b7a520695 100644
--- a/drivers/media/usb/gspca/m5602/m5602_ov7660.c
+++ b/drivers/media/usb/gspca/m5602/m5602_ov7660.c
@@ -23,6 +23,159 @@
 static int ov7660_s_ctrl(struct v4l2_ctrl *ctrl);
 static void ov7660_dump_registers(struct sd *sd);
 
+static const unsigned char preinit_ov7660[][4] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x03},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x03},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+
+	{SENSOR, OV7660_OFON, 0x0c},
+	{SENSOR, OV7660_COM2, 0x11},
+	{SENSOR, OV7660_COM7, 0x05},
+
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x08},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00}
+};
+
+static const unsigned char init_ov7660[][4] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
+	{SENSOR, OV7660_COM7, 0x80},
+	{SENSOR, OV7660_CLKRC, 0x80},
+	{SENSOR, OV7660_COM9, 0x4c},
+	{SENSOR, OV7660_OFON, 0x43},
+	{SENSOR, OV7660_COM12, 0x28},
+	{SENSOR, OV7660_COM8, 0x00},
+	{SENSOR, OV7660_COM10, 0x40},
+	{SENSOR, OV7660_HSTART, 0x0c},
+	{SENSOR, OV7660_HSTOP, 0x61},
+	{SENSOR, OV7660_HREF, 0xa4},
+	{SENSOR, OV7660_PSHFT, 0x0b},
+	{SENSOR, OV7660_VSTART, 0x01},
+	{SENSOR, OV7660_VSTOP, 0x7a},
+	{SENSOR, OV7660_VSTOP, 0x00},
+	{SENSOR, OV7660_COM7, 0x05},
+	{SENSOR, OV7660_COM6, 0x42},
+	{SENSOR, OV7660_BBIAS, 0x94},
+	{SENSOR, OV7660_GbBIAS, 0x94},
+	{SENSOR, OV7660_RSVD29, 0x94},
+	{SENSOR, OV7660_RBIAS, 0x94},
+	{SENSOR, OV7660_COM1, 0x00},
+	{SENSOR, OV7660_AECH, 0x00},
+	{SENSOR, OV7660_AECHH, 0x00},
+	{SENSOR, OV7660_ADC, 0x05},
+	{SENSOR, OV7660_COM13, 0x00},
+	{SENSOR, OV7660_RSVDA1, 0x23},
+	{SENSOR, OV7660_TSLB, 0x0d},
+	{SENSOR, OV7660_HV, 0x80},
+	{SENSOR, OV7660_LCC1, 0x00},
+	{SENSOR, OV7660_LCC2, 0x00},
+	{SENSOR, OV7660_LCC3, 0x10},
+	{SENSOR, OV7660_LCC4, 0x40},
+	{SENSOR, OV7660_LCC5, 0x01},
+
+	{SENSOR, OV7660_AECH, 0x20},
+	{SENSOR, OV7660_COM1, 0x00},
+	{SENSOR, OV7660_OFON, 0x0c},
+	{SENSOR, OV7660_COM2, 0x11},
+	{SENSOR, OV7660_COM7, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x08},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
+	{SENSOR, OV7660_AECH, 0x5f},
+	{SENSOR, OV7660_COM1, 0x03},
+	{SENSOR, OV7660_OFON, 0x0c},
+	{SENSOR, OV7660_COM2, 0x11},
+	{SENSOR, OV7660_COM7, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x08},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
+
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81},
+	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82},
+	{BRIDGE, M5602_XB_SIG_INI, 0x01},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x08},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x01},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0xec},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x02},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x27},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x02},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0xa7},
+	{BRIDGE, M5602_XB_SIG_INI, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+};
+
 static struct v4l2_pix_format ov7660_modes[] = {
 	{
 		640,
diff --git a/drivers/media/usb/gspca/m5602/m5602_ov7660.h b/drivers/media/usb/gspca/m5602/m5602_ov7660.h
index 6fece1ce1232..72445d5df195 100644
--- a/drivers/media/usb/gspca/m5602/m5602_ov7660.h
+++ b/drivers/media/usb/gspca/m5602/m5602_ov7660.h
@@ -107,157 +107,4 @@ static const struct m5602_sensor ov7660 = {
 	.stop = ov7660_stop,
 	.disconnect = ov7660_disconnect,
 };
-
-static const unsigned char preinit_ov7660[][4] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x03},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x03},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-
-	{SENSOR, OV7660_OFON, 0x0c},
-	{SENSOR, OV7660_COM2, 0x11},
-	{SENSOR, OV7660_COM7, 0x05},
-
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x08},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00}
-};
-
-static const unsigned char init_ov7660[][4] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
-	{SENSOR, OV7660_COM7, 0x80},
-	{SENSOR, OV7660_CLKRC, 0x80},
-	{SENSOR, OV7660_COM9, 0x4c},
-	{SENSOR, OV7660_OFON, 0x43},
-	{SENSOR, OV7660_COM12, 0x28},
-	{SENSOR, OV7660_COM8, 0x00},
-	{SENSOR, OV7660_COM10, 0x40},
-	{SENSOR, OV7660_HSTART, 0x0c},
-	{SENSOR, OV7660_HSTOP, 0x61},
-	{SENSOR, OV7660_HREF, 0xa4},
-	{SENSOR, OV7660_PSHFT, 0x0b},
-	{SENSOR, OV7660_VSTART, 0x01},
-	{SENSOR, OV7660_VSTOP, 0x7a},
-	{SENSOR, OV7660_VSTOP, 0x00},
-	{SENSOR, OV7660_COM7, 0x05},
-	{SENSOR, OV7660_COM6, 0x42},
-	{SENSOR, OV7660_BBIAS, 0x94},
-	{SENSOR, OV7660_GbBIAS, 0x94},
-	{SENSOR, OV7660_RSVD29, 0x94},
-	{SENSOR, OV7660_RBIAS, 0x94},
-	{SENSOR, OV7660_COM1, 0x00},
-	{SENSOR, OV7660_AECH, 0x00},
-	{SENSOR, OV7660_AECHH, 0x00},
-	{SENSOR, OV7660_ADC, 0x05},
-	{SENSOR, OV7660_COM13, 0x00},
-	{SENSOR, OV7660_RSVDA1, 0x23},
-	{SENSOR, OV7660_TSLB, 0x0d},
-	{SENSOR, OV7660_HV, 0x80},
-	{SENSOR, OV7660_LCC1, 0x00},
-	{SENSOR, OV7660_LCC2, 0x00},
-	{SENSOR, OV7660_LCC3, 0x10},
-	{SENSOR, OV7660_LCC4, 0x40},
-	{SENSOR, OV7660_LCC5, 0x01},
-
-	{SENSOR, OV7660_AECH, 0x20},
-	{SENSOR, OV7660_COM1, 0x00},
-	{SENSOR, OV7660_OFON, 0x0c},
-	{SENSOR, OV7660_COM2, 0x11},
-	{SENSOR, OV7660_COM7, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x08},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
-	{SENSOR, OV7660_AECH, 0x5f},
-	{SENSOR, OV7660_COM1, 0x03},
-	{SENSOR, OV7660_OFON, 0x0c},
-	{SENSOR, OV7660_COM2, 0x11},
-	{SENSOR, OV7660_COM7, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x01},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x08},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
-
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81},
-	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82},
-	{BRIDGE, M5602_XB_SIG_INI, 0x01},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x08},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x01},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0xec},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x02},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x27},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x02},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0xa7},
-	{BRIDGE, M5602_XB_SIG_INI, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-};
 #endif
diff --git a/drivers/media/usb/gspca/m5602/m5602_ov9650.c b/drivers/media/usb/gspca/m5602/m5602_ov9650.c
index 59bc62bfae26..4544d3a1ad58 100644
--- a/drivers/media/usb/gspca/m5602/m5602_ov9650.c
+++ b/drivers/media/usb/gspca/m5602/m5602_ov9650.c
@@ -1,3 +1,4 @@
+
 /*
  * Driver for the ov9650 sensor
  *
@@ -23,6 +24,157 @@
 static int ov9650_s_ctrl(struct v4l2_ctrl *ctrl);
 static void ov9650_dump_registers(struct sd *sd);
 
+static const unsigned char preinit_ov9650[][3] = {
+	/* [INITCAM] */
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
+
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
+	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a},
+	/* Reset chip */
+	{SENSOR, OV9650_COM7, OV9650_REGISTER_RESET},
+	/* Enable double clock */
+	{SENSOR, OV9650_CLKRC, 0x80},
+	/* Do something out of spec with the power */
+	{SENSOR, OV9650_OFON, 0x40}
+};
+
+static const unsigned char init_ov9650[][3] = {
+	/* [INITCAM] */
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
+
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
+	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a},
+
+	/* Reset chip */
+	{SENSOR, OV9650_COM7, OV9650_REGISTER_RESET},
+	/* One extra reset is needed in order to make the sensor behave
+	   properly when resuming from ram, could be a timing issue */
+	{SENSOR, OV9650_COM7, OV9650_REGISTER_RESET},
+
+	/* Enable double clock */
+	{SENSOR, OV9650_CLKRC, 0x80},
+	/* Do something out of spec with the power */
+	{SENSOR, OV9650_OFON, 0x40},
+
+	/* Set fast AGC/AEC algorithm with unlimited step size */
+	{SENSOR, OV9650_COM8, OV9650_FAST_AGC_AEC |
+			      OV9650_AEC_UNLIM_STEP_SIZE},
+
+	{SENSOR, OV9650_CHLF, 0x10},
+	{SENSOR, OV9650_ARBLM, 0xbf},
+	{SENSOR, OV9650_ACOM38, 0x81},
+	/* Turn off color matrix coefficient double option */
+	{SENSOR, OV9650_COM16, 0x00},
+	/* Enable color matrix for RGB/YUV, Delay Y channel,
+	set output Y/UV delay to 1 */
+	{SENSOR, OV9650_COM13, 0x19},
+	/* Enable digital BLC, Set output mode to U Y V Y */
+	{SENSOR, OV9650_TSLB, 0x0c},
+	/* Limit the AGC/AEC stable upper region */
+	{SENSOR, OV9650_COM24, 0x00},
+	/* Enable HREF and some out of spec things */
+	{SENSOR, OV9650_COM12, 0x73},
+	/* Set all DBLC offset signs to positive and
+	do some out of spec stuff */
+	{SENSOR, OV9650_DBLC1, 0xdf},
+	{SENSOR, OV9650_COM21, 0x06},
+	{SENSOR, OV9650_RSVD35, 0x91},
+	/* Necessary, no camera stream without it */
+	{SENSOR, OV9650_RSVD16, 0x06},
+	{SENSOR, OV9650_RSVD94, 0x99},
+	{SENSOR, OV9650_RSVD95, 0x99},
+	{SENSOR, OV9650_RSVD96, 0x04},
+	/* Enable full range output */
+	{SENSOR, OV9650_COM15, 0x0},
+	/* Enable HREF at optical black, enable ADBLC bias,
+	enable ADBLC, reset timings at format change */
+	{SENSOR, OV9650_COM6, 0x4b},
+	/* Subtract 32 from the B channel bias */
+	{SENSOR, OV9650_BBIAS, 0xa0},
+	/* Subtract 32 from the Gb channel bias */
+	{SENSOR, OV9650_GbBIAS, 0xa0},
+	/* Do not bypass the analog BLC and to some out of spec stuff */
+	{SENSOR, OV9650_Gr_COM, 0x00},
+	/* Subtract 32 from the R channel bias */
+	{SENSOR, OV9650_RBIAS, 0xa0},
+	/* Subtract 32 from the R channel bias */
+	{SENSOR, OV9650_RBIAS, 0x0},
+	{SENSOR, OV9650_COM26, 0x80},
+	{SENSOR, OV9650_ACOMA9, 0x98},
+	/* Set the AGC/AEC stable region upper limit */
+	{SENSOR, OV9650_AEW, 0x68},
+	/* Set the AGC/AEC stable region lower limit */
+	{SENSOR, OV9650_AEB, 0x5c},
+	/* Set the high and low limit nibbles to 3 */
+	{SENSOR, OV9650_VPT, 0xc3},
+	/* Set the Automatic Gain Ceiling (AGC) to 128x,
+	drop VSYNC at frame drop,
+	limit exposure timing,
+	drop frame when the AEC step is larger than the exposure gap */
+	{SENSOR, OV9650_COM9, 0x6e},
+	/* Set VSYNC negative, Set RESET to SLHS (slave mode horizontal sync)
+	and set PWDN to SLVS (slave mode vertical sync) */
+	{SENSOR, OV9650_COM10, 0x42},
+	/* Set horizontal column start high to default value */
+	{SENSOR, OV9650_HSTART, 0x1a}, /* 210 */
+	/* Set horizontal column end */
+	{SENSOR, OV9650_HSTOP, 0xbf}, /* 1534 */
+	/* Complementing register to the two writes above */
+	{SENSOR, OV9650_HREF, 0xb2},
+	/* Set vertical row start high bits */
+	{SENSOR, OV9650_VSTRT, 0x02},
+	/* Set vertical row end low bits */
+	{SENSOR, OV9650_VSTOP, 0x7e},
+	/* Set complementing vertical frame control */
+	{SENSOR, OV9650_VREF, 0x10},
+	{SENSOR, OV9650_ADC, 0x04},
+	{SENSOR, OV9650_HV, 0x40},
+
+	/* Enable denoise, and white-pixel erase */
+	{SENSOR, OV9650_COM22, OV9650_DENOISE_ENABLE |
+		 OV9650_WHITE_PIXEL_ENABLE |
+		 OV9650_WHITE_PIXEL_OPTION},
+
+	/* Enable VARIOPIXEL */
+	{SENSOR, OV9650_COM3, OV9650_VARIOPIXEL},
+	{SENSOR, OV9650_COM4, OV9650_QVGA_VARIOPIXEL},
+
+	/* Put the sensor in soft sleep mode */
+	{SENSOR, OV9650_COM2, OV9650_SOFT_SLEEP | OV9650_OUTPUT_DRIVE_2X},
+};
+
+static const unsigned char res_init_ov9650[][3] = {
+	{SENSOR, OV9650_COM2, OV9650_OUTPUT_DRIVE_2X},
+
+	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x82},
+	{BRIDGE, M5602_XB_LINE_OF_FRAME_L, 0x00},
+	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82},
+	{BRIDGE, M5602_XB_PIX_OF_LINE_L, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x01}
+};
+
 /* Vertically and horizontally flips the image if matched, needed for machines
    where the sensor is mounted upside down */
 static
diff --git a/drivers/media/usb/gspca/m5602/m5602_ov9650.h b/drivers/media/usb/gspca/m5602/m5602_ov9650.h
index f9f5870da60f..ce3db062c740 100644
--- a/drivers/media/usb/gspca/m5602/m5602_ov9650.h
+++ b/drivers/media/usb/gspca/m5602/m5602_ov9650.h
@@ -156,154 +156,4 @@ static const struct m5602_sensor ov9650 = {
 	.disconnect = ov9650_disconnect,
 };
 
-static const unsigned char preinit_ov9650[][3] = {
-	/* [INITCAM] */
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
-
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
-	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a},
-	/* Reset chip */
-	{SENSOR, OV9650_COM7, OV9650_REGISTER_RESET},
-	/* Enable double clock */
-	{SENSOR, OV9650_CLKRC, 0x80},
-	/* Do something out of spec with the power */
-	{SENSOR, OV9650_OFON, 0x40}
-};
-
-static const unsigned char init_ov9650[][3] = {
-	/* [INITCAM] */
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
-
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
-	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a},
-
-	/* Reset chip */
-	{SENSOR, OV9650_COM7, OV9650_REGISTER_RESET},
-	/* One extra reset is needed in order to make the sensor behave
-	   properly when resuming from ram, could be a timing issue */
-	{SENSOR, OV9650_COM7, OV9650_REGISTER_RESET},
-
-	/* Enable double clock */
-	{SENSOR, OV9650_CLKRC, 0x80},
-	/* Do something out of spec with the power */
-	{SENSOR, OV9650_OFON, 0x40},
-
-	/* Set fast AGC/AEC algorithm with unlimited step size */
-	{SENSOR, OV9650_COM8, OV9650_FAST_AGC_AEC |
-			      OV9650_AEC_UNLIM_STEP_SIZE},
-
-	{SENSOR, OV9650_CHLF, 0x10},
-	{SENSOR, OV9650_ARBLM, 0xbf},
-	{SENSOR, OV9650_ACOM38, 0x81},
-	/* Turn off color matrix coefficient double option */
-	{SENSOR, OV9650_COM16, 0x00},
-	/* Enable color matrix for RGB/YUV, Delay Y channel,
-	set output Y/UV delay to 1 */
-	{SENSOR, OV9650_COM13, 0x19},
-	/* Enable digital BLC, Set output mode to U Y V Y */
-	{SENSOR, OV9650_TSLB, 0x0c},
-	/* Limit the AGC/AEC stable upper region */
-	{SENSOR, OV9650_COM24, 0x00},
-	/* Enable HREF and some out of spec things */
-	{SENSOR, OV9650_COM12, 0x73},
-	/* Set all DBLC offset signs to positive and
-	do some out of spec stuff */
-	{SENSOR, OV9650_DBLC1, 0xdf},
-	{SENSOR, OV9650_COM21, 0x06},
-	{SENSOR, OV9650_RSVD35, 0x91},
-	/* Necessary, no camera stream without it */
-	{SENSOR, OV9650_RSVD16, 0x06},
-	{SENSOR, OV9650_RSVD94, 0x99},
-	{SENSOR, OV9650_RSVD95, 0x99},
-	{SENSOR, OV9650_RSVD96, 0x04},
-	/* Enable full range output */
-	{SENSOR, OV9650_COM15, 0x0},
-	/* Enable HREF at optical black, enable ADBLC bias,
-	enable ADBLC, reset timings at format change */
-	{SENSOR, OV9650_COM6, 0x4b},
-	/* Subtract 32 from the B channel bias */
-	{SENSOR, OV9650_BBIAS, 0xa0},
-	/* Subtract 32 from the Gb channel bias */
-	{SENSOR, OV9650_GbBIAS, 0xa0},
-	/* Do not bypass the analog BLC and to some out of spec stuff */
-	{SENSOR, OV9650_Gr_COM, 0x00},
-	/* Subtract 32 from the R channel bias */
-	{SENSOR, OV9650_RBIAS, 0xa0},
-	/* Subtract 32 from the R channel bias */
-	{SENSOR, OV9650_RBIAS, 0x0},
-	{SENSOR, OV9650_COM26, 0x80},
-	{SENSOR, OV9650_ACOMA9, 0x98},
-	/* Set the AGC/AEC stable region upper limit */
-	{SENSOR, OV9650_AEW, 0x68},
-	/* Set the AGC/AEC stable region lower limit */
-	{SENSOR, OV9650_AEB, 0x5c},
-	/* Set the high and low limit nibbles to 3 */
-	{SENSOR, OV9650_VPT, 0xc3},
-	/* Set the Automatic Gain Ceiling (AGC) to 128x,
-	drop VSYNC at frame drop,
-	limit exposure timing,
-	drop frame when the AEC step is larger than the exposure gap */
-	{SENSOR, OV9650_COM9, 0x6e},
-	/* Set VSYNC negative, Set RESET to SLHS (slave mode horizontal sync)
-	and set PWDN to SLVS (slave mode vertical sync) */
-	{SENSOR, OV9650_COM10, 0x42},
-	/* Set horizontal column start high to default value */
-	{SENSOR, OV9650_HSTART, 0x1a}, /* 210 */
-	/* Set horizontal column end */
-	{SENSOR, OV9650_HSTOP, 0xbf}, /* 1534 */
-	/* Complementing register to the two writes above */
-	{SENSOR, OV9650_HREF, 0xb2},
-	/* Set vertical row start high bits */
-	{SENSOR, OV9650_VSTRT, 0x02},
-	/* Set vertical row end low bits */
-	{SENSOR, OV9650_VSTOP, 0x7e},
-	/* Set complementing vertical frame control */
-	{SENSOR, OV9650_VREF, 0x10},
-	{SENSOR, OV9650_ADC, 0x04},
-	{SENSOR, OV9650_HV, 0x40},
-
-	/* Enable denoise, and white-pixel erase */
-	{SENSOR, OV9650_COM22, OV9650_DENOISE_ENABLE |
-		 OV9650_WHITE_PIXEL_ENABLE |
-		 OV9650_WHITE_PIXEL_OPTION},
-
-	/* Enable VARIOPIXEL */
-	{SENSOR, OV9650_COM3, OV9650_VARIOPIXEL},
-	{SENSOR, OV9650_COM4, OV9650_QVGA_VARIOPIXEL},
-
-	/* Put the sensor in soft sleep mode */
-	{SENSOR, OV9650_COM2, OV9650_SOFT_SLEEP | OV9650_OUTPUT_DRIVE_2X},
-};
-
-static const unsigned char res_init_ov9650[][3] = {
-	{SENSOR, OV9650_COM2, OV9650_OUTPUT_DRIVE_2X},
-
-	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x82},
-	{BRIDGE, M5602_XB_LINE_OF_FRAME_L, 0x00},
-	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82},
-	{BRIDGE, M5602_XB_PIX_OF_LINE_L, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x01}
-};
 #endif
diff --git a/drivers/media/usb/gspca/m5602/m5602_po1030.c b/drivers/media/usb/gspca/m5602/m5602_po1030.c
index 4bf5c43424b7..a0a90dd34ca8 100644
--- a/drivers/media/usb/gspca/m5602/m5602_po1030.c
+++ b/drivers/media/usb/gspca/m5602/m5602_po1030.c
@@ -23,6 +23,110 @@
 static int po1030_s_ctrl(struct v4l2_ctrl *ctrl);
 static void po1030_dump_registers(struct sd *sd);
 
+static const unsigned char preinit_po1030[][3] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x02},
+
+	{SENSOR, PO1030_AUTOCTRL2, PO1030_SENSOR_RESET | (1 << 2)},
+
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x04},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00}
+};
+
+static const unsigned char init_po1030[][3] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
+
+	{SENSOR, PO1030_AUTOCTRL2, PO1030_SENSOR_RESET | (1 << 2)},
+
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x02},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x04},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
+
+	{SENSOR, PO1030_AUTOCTRL2, 0x04},
+
+	{SENSOR, PO1030_OUTFORMCTRL2, PO1030_RAW_RGB_BAYER},
+	{SENSOR, PO1030_AUTOCTRL1, PO1030_WEIGHT_WIN_2X},
+
+	{SENSOR, PO1030_CONTROL2, 0x03},
+	{SENSOR, 0x21, 0x90},
+	{SENSOR, PO1030_YTARGET, 0x60},
+	{SENSOR, 0x59, 0x13},
+	{SENSOR, PO1030_OUTFORMCTRL1, PO1030_HREF_ENABLE},
+	{SENSOR, PO1030_EDGE_ENH_OFF, 0x00},
+	{SENSOR, PO1030_EGA, 0x80},
+	{SENSOR, 0x78, 0x14},
+	{SENSOR, 0x6f, 0x01},
+	{SENSOR, PO1030_GLOBALGAINMAX, 0x14},
+	{SENSOR, PO1030_Cb_U_GAIN, 0x38},
+	{SENSOR, PO1030_Cr_V_GAIN, 0x38},
+	{SENSOR, PO1030_CONTROL1, PO1030_SHUTTER_MODE |
+				  PO1030_AUTO_SUBSAMPLING |
+				  PO1030_FRAME_EQUAL},
+	{SENSOR, PO1030_GC0, 0x10},
+	{SENSOR, PO1030_GC1, 0x20},
+	{SENSOR, PO1030_GC2, 0x40},
+	{SENSOR, PO1030_GC3, 0x60},
+	{SENSOR, PO1030_GC4, 0x80},
+	{SENSOR, PO1030_GC5, 0xa0},
+	{SENSOR, PO1030_GC6, 0xc0},
+	{SENSOR, PO1030_GC7, 0xff},
+
+	/* Set the width to 751 */
+	{SENSOR, PO1030_FRAMEWIDTH_H, 0x02},
+	{SENSOR, PO1030_FRAMEWIDTH_L, 0xef},
+
+	/* Set the height to 540 */
+	{SENSOR, PO1030_FRAMEHEIGHT_H, 0x02},
+	{SENSOR, PO1030_FRAMEHEIGHT_L, 0x1c},
+
+	/* Set the x window to 1 */
+	{SENSOR, PO1030_WINDOWX_H, 0x00},
+	{SENSOR, PO1030_WINDOWX_L, 0x01},
+
+	/* Set the y window to 1 */
+	{SENSOR, PO1030_WINDOWY_H, 0x00},
+	{SENSOR, PO1030_WINDOWY_L, 0x01},
+
+	/* with a very low lighted environment increase the exposure but
+	 * decrease the FPS (Frame Per Second) */
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
+
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
+};
+
 static struct v4l2_pix_format po1030_modes[] = {
 	{
 		640,
diff --git a/drivers/media/usb/gspca/m5602/m5602_po1030.h b/drivers/media/usb/gspca/m5602/m5602_po1030.h
index a6ab76149bd0..981a91aa7450 100644
--- a/drivers/media/usb/gspca/m5602/m5602_po1030.h
+++ b/drivers/media/usb/gspca/m5602/m5602_po1030.h
@@ -167,108 +167,4 @@ static const struct m5602_sensor po1030 = {
 	.start = po1030_start,
 	.disconnect = po1030_disconnect,
 };
-
-static const unsigned char preinit_po1030[][3] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x02},
-
-	{SENSOR, PO1030_AUTOCTRL2, PO1030_SENSOR_RESET | (1 << 2)},
-
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x04},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00}
-};
-
-static const unsigned char init_po1030[][3] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0c},
-
-	{SENSOR, PO1030_AUTOCTRL2, PO1030_SENSOR_RESET | (1 << 2)},
-
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x04},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x02},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x04},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
-
-	{SENSOR, PO1030_AUTOCTRL2, 0x04},
-
-	{SENSOR, PO1030_OUTFORMCTRL2, PO1030_RAW_RGB_BAYER},
-	{SENSOR, PO1030_AUTOCTRL1, PO1030_WEIGHT_WIN_2X},
-
-	{SENSOR, PO1030_CONTROL2, 0x03},
-	{SENSOR, 0x21, 0x90},
-	{SENSOR, PO1030_YTARGET, 0x60},
-	{SENSOR, 0x59, 0x13},
-	{SENSOR, PO1030_OUTFORMCTRL1, PO1030_HREF_ENABLE},
-	{SENSOR, PO1030_EDGE_ENH_OFF, 0x00},
-	{SENSOR, PO1030_EGA, 0x80},
-	{SENSOR, 0x78, 0x14},
-	{SENSOR, 0x6f, 0x01},
-	{SENSOR, PO1030_GLOBALGAINMAX, 0x14},
-	{SENSOR, PO1030_Cb_U_GAIN, 0x38},
-	{SENSOR, PO1030_Cr_V_GAIN, 0x38},
-	{SENSOR, PO1030_CONTROL1, PO1030_SHUTTER_MODE |
-				  PO1030_AUTO_SUBSAMPLING |
-				  PO1030_FRAME_EQUAL},
-	{SENSOR, PO1030_GC0, 0x10},
-	{SENSOR, PO1030_GC1, 0x20},
-	{SENSOR, PO1030_GC2, 0x40},
-	{SENSOR, PO1030_GC3, 0x60},
-	{SENSOR, PO1030_GC4, 0x80},
-	{SENSOR, PO1030_GC5, 0xa0},
-	{SENSOR, PO1030_GC6, 0xc0},
-	{SENSOR, PO1030_GC7, 0xff},
-
-	/* Set the width to 751 */
-	{SENSOR, PO1030_FRAMEWIDTH_H, 0x02},
-	{SENSOR, PO1030_FRAMEWIDTH_L, 0xef},
-
-	/* Set the height to 540 */
-	{SENSOR, PO1030_FRAMEHEIGHT_H, 0x02},
-	{SENSOR, PO1030_FRAMEHEIGHT_L, 0x1c},
-
-	/* Set the x window to 1 */
-	{SENSOR, PO1030_WINDOWX_H, 0x00},
-	{SENSOR, PO1030_WINDOWX_L, 0x01},
-
-	/* Set the y window to 1 */
-	{SENSOR, PO1030_WINDOWY_H, 0x00},
-	{SENSOR, PO1030_WINDOWY_L, 0x01},
-
-	/* with a very low lighted environment increase the exposure but
-	 * decrease the FPS (Frame Per Second) */
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0},
-
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x05},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00},
-};
 #endif
diff --git a/drivers/media/usb/gspca/m5602/m5602_s5k4aa.c b/drivers/media/usb/gspca/m5602/m5602_s5k4aa.c
index 7d12599458e2..8447b9c5f8e0 100644
--- a/drivers/media/usb/gspca/m5602/m5602_s5k4aa.c
+++ b/drivers/media/usb/gspca/m5602/m5602_s5k4aa.c
@@ -20,6 +20,205 @@
 
 #include "m5602_s5k4aa.h"
 
+static const unsigned char preinit_s5k4aa[][4] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00},
+
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0x80, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3f, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3f, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08, 0x00},
+
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x14, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xf0, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x1c, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x20, 0x00},
+
+	{SENSOR, S5K4AA_PAGE_MAP, 0x00, 0x00}
+};
+
+static const unsigned char init_s5k4aa[][4] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00},
+
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0x80, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3f, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3f, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08, 0x00},
+
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x14, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xf0, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x1c, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x20, 0x00},
+
+	{SENSOR, S5K4AA_PAGE_MAP, 0x07, 0x00},
+	{SENSOR, 0x36, 0x01, 0x00},
+	{SENSOR, S5K4AA_PAGE_MAP, 0x00, 0x00},
+	{SENSOR, 0x7b, 0xff, 0x00},
+	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
+	{SENSOR, 0x0c, 0x05, 0x00},
+	{SENSOR, 0x02, 0x0e, 0x00},
+	{SENSOR, S5K4AA_READ_MODE, 0xa0, 0x00},
+	{SENSOR, 0x37, 0x00, 0x00},
+};
+
+static const unsigned char VGA_s5k4aa[][4] = {
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08, 0x00},
+	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00},
+	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	/* VSYNC_PARA, VSYNC_PARA : img height 480 = 0x01e0 */
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x01, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0xe0, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x02, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
+	/* HSYNC_PARA, HSYNC_PARA : img width 640 = 0x0280 */
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x02, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x80, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xa0, 0x00}, /* 48 MHz */
+
+	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
+	{SENSOR, S5K4AA_READ_MODE, S5K4AA_RM_H_FLIP | S5K4AA_RM_ROW_SKIP_2X
+		| S5K4AA_RM_COL_SKIP_2X, 0x00},
+	/* 0x37 : Fix image stability when light is too bright and improves
+	 * image quality in 640x480, but worsens it in 1280x1024 */
+	{SENSOR, 0x37, 0x01, 0x00},
+	/* ROWSTART_HI, ROWSTART_LO : 10 + (1024-960)/2 = 42 = 0x002a */
+	{SENSOR, S5K4AA_ROWSTART_HI, 0x00, 0x00},
+	{SENSOR, S5K4AA_ROWSTART_LO, 0x29, 0x00},
+	{SENSOR, S5K4AA_COLSTART_HI, 0x00, 0x00},
+	{SENSOR, S5K4AA_COLSTART_LO, 0x0c, 0x00},
+	/* window_height_hi, window_height_lo : 960 = 0x03c0 */
+	{SENSOR, S5K4AA_WINDOW_HEIGHT_HI, 0x03, 0x00},
+	{SENSOR, S5K4AA_WINDOW_HEIGHT_LO, 0xc0, 0x00},
+	/* window_width_hi, window_width_lo : 1280 = 0x0500 */
+	{SENSOR, S5K4AA_WINDOW_WIDTH_HI, 0x05, 0x00},
+	{SENSOR, S5K4AA_WINDOW_WIDTH_LO, 0x00, 0x00},
+	{SENSOR, S5K4AA_H_BLANK_HI__, 0x00, 0x00},
+	{SENSOR, S5K4AA_H_BLANK_LO__, 0xa8, 0x00}, /* helps to sync... */
+	{SENSOR, S5K4AA_EXPOSURE_HI, 0x01, 0x00},
+	{SENSOR, S5K4AA_EXPOSURE_LO, 0x00, 0x00},
+	{SENSOR, 0x11, 0x04, 0x00},
+	{SENSOR, 0x12, 0xc3, 0x00},
+	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
+	{SENSOR, 0x02, 0x0e, 0x00},
+};
+
+static const unsigned char SXGA_s5k4aa[][4] = {
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08, 0x00},
+	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00},
+	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	/* VSYNC_PARA, VSYNC_PARA : img height 1024 = 0x0400 */
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x04, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x02, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
+	/* HSYNC_PARA, HSYNC_PARA : img width 1280 = 0x0500 */
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x05, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xa0, 0x00}, /* 48 MHz */
+
+	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
+	{SENSOR, S5K4AA_READ_MODE, S5K4AA_RM_H_FLIP, 0x00},
+	{SENSOR, 0x37, 0x01, 0x00},
+	{SENSOR, S5K4AA_ROWSTART_HI, 0x00, 0x00},
+	{SENSOR, S5K4AA_ROWSTART_LO, 0x09, 0x00},
+	{SENSOR, S5K4AA_COLSTART_HI, 0x00, 0x00},
+	{SENSOR, S5K4AA_COLSTART_LO, 0x0a, 0x00},
+	{SENSOR, S5K4AA_WINDOW_HEIGHT_HI, 0x04, 0x00},
+	{SENSOR, S5K4AA_WINDOW_HEIGHT_LO, 0x00, 0x00},
+	{SENSOR, S5K4AA_WINDOW_WIDTH_HI, 0x05, 0x00},
+	{SENSOR, S5K4AA_WINDOW_WIDTH_LO, 0x00, 0x00},
+	{SENSOR, S5K4AA_H_BLANK_HI__, 0x01, 0x00},
+	{SENSOR, S5K4AA_H_BLANK_LO__, 0xa8, 0x00},
+	{SENSOR, S5K4AA_EXPOSURE_HI, 0x01, 0x00},
+	{SENSOR, S5K4AA_EXPOSURE_LO, 0x00, 0x00},
+	{SENSOR, 0x11, 0x04, 0x00},
+	{SENSOR, 0x12, 0xc3, 0x00},
+	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
+	{SENSOR, 0x02, 0x0e, 0x00},
+};
+
+
 static int s5k4aa_s_ctrl(struct v4l2_ctrl *ctrl);
 static void s5k4aa_dump_registers(struct sd *sd);
 
diff --git a/drivers/media/usb/gspca/m5602/m5602_s5k4aa.h b/drivers/media/usb/gspca/m5602/m5602_s5k4aa.h
index 9953e9766954..8407682d6823 100644
--- a/drivers/media/usb/gspca/m5602/m5602_s5k4aa.h
+++ b/drivers/media/usb/gspca/m5602/m5602_s5k4aa.h
@@ -85,201 +85,4 @@ static const struct m5602_sensor s5k4aa = {
 	.disconnect = s5k4aa_disconnect,
 };
 
-static const unsigned char preinit_s5k4aa[][4] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00},
-
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0x80, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3f, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3f, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08, 0x00},
-
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x14, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xf0, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x1c, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x20, 0x00},
-
-	{SENSOR, S5K4AA_PAGE_MAP, 0x00, 0x00}
-};
-
-static const unsigned char init_s5k4aa[][4] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00},
-
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0x80, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3f, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3f, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08, 0x00},
-
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x14, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xf0, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x1c, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x20, 0x00},
-
-	{SENSOR, S5K4AA_PAGE_MAP, 0x07, 0x00},
-	{SENSOR, 0x36, 0x01, 0x00},
-	{SENSOR, S5K4AA_PAGE_MAP, 0x00, 0x00},
-	{SENSOR, 0x7b, 0xff, 0x00},
-	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
-	{SENSOR, 0x0c, 0x05, 0x00},
-	{SENSOR, 0x02, 0x0e, 0x00},
-	{SENSOR, S5K4AA_READ_MODE, 0xa0, 0x00},
-	{SENSOR, 0x37, 0x00, 0x00},
-};
-
-static const unsigned char VGA_s5k4aa[][4] = {
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08, 0x00},
-	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00},
-	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	/* VSYNC_PARA, VSYNC_PARA : img height 480 = 0x01e0 */
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x01, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0xe0, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x02, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
-	/* HSYNC_PARA, HSYNC_PARA : img width 640 = 0x0280 */
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x02, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x80, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xa0, 0x00}, /* 48 MHz */
-
-	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
-	{SENSOR, S5K4AA_READ_MODE, S5K4AA_RM_H_FLIP | S5K4AA_RM_ROW_SKIP_2X
-		| S5K4AA_RM_COL_SKIP_2X, 0x00},
-	/* 0x37 : Fix image stability when light is too bright and improves
-	 * image quality in 640x480, but worsens it in 1280x1024 */
-	{SENSOR, 0x37, 0x01, 0x00},
-	/* ROWSTART_HI, ROWSTART_LO : 10 + (1024-960)/2 = 42 = 0x002a */
-	{SENSOR, S5K4AA_ROWSTART_HI, 0x00, 0x00},
-	{SENSOR, S5K4AA_ROWSTART_LO, 0x29, 0x00},
-	{SENSOR, S5K4AA_COLSTART_HI, 0x00, 0x00},
-	{SENSOR, S5K4AA_COLSTART_LO, 0x0c, 0x00},
-	/* window_height_hi, window_height_lo : 960 = 0x03c0 */
-	{SENSOR, S5K4AA_WINDOW_HEIGHT_HI, 0x03, 0x00},
-	{SENSOR, S5K4AA_WINDOW_HEIGHT_LO, 0xc0, 0x00},
-	/* window_width_hi, window_width_lo : 1280 = 0x0500 */
-	{SENSOR, S5K4AA_WINDOW_WIDTH_HI, 0x05, 0x00},
-	{SENSOR, S5K4AA_WINDOW_WIDTH_LO, 0x00, 0x00},
-	{SENSOR, S5K4AA_H_BLANK_HI__, 0x00, 0x00},
-	{SENSOR, S5K4AA_H_BLANK_LO__, 0xa8, 0x00}, /* helps to sync... */
-	{SENSOR, S5K4AA_EXPOSURE_HI, 0x01, 0x00},
-	{SENSOR, S5K4AA_EXPOSURE_LO, 0x00, 0x00},
-	{SENSOR, 0x11, 0x04, 0x00},
-	{SENSOR, 0x12, 0xc3, 0x00},
-	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
-	{SENSOR, 0x02, 0x0e, 0x00},
-};
-
-static const unsigned char SXGA_s5k4aa[][4] = {
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x08, 0x00},
-	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00},
-	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	/* VSYNC_PARA, VSYNC_PARA : img height 1024 = 0x0400 */
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x04, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x02, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
-	/* HSYNC_PARA, HSYNC_PARA : img width 1280 = 0x0500 */
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x05, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xa0, 0x00}, /* 48 MHz */
-
-	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
-	{SENSOR, S5K4AA_READ_MODE, S5K4AA_RM_H_FLIP, 0x00},
-	{SENSOR, 0x37, 0x01, 0x00},
-	{SENSOR, S5K4AA_ROWSTART_HI, 0x00, 0x00},
-	{SENSOR, S5K4AA_ROWSTART_LO, 0x09, 0x00},
-	{SENSOR, S5K4AA_COLSTART_HI, 0x00, 0x00},
-	{SENSOR, S5K4AA_COLSTART_LO, 0x0a, 0x00},
-	{SENSOR, S5K4AA_WINDOW_HEIGHT_HI, 0x04, 0x00},
-	{SENSOR, S5K4AA_WINDOW_HEIGHT_LO, 0x00, 0x00},
-	{SENSOR, S5K4AA_WINDOW_WIDTH_HI, 0x05, 0x00},
-	{SENSOR, S5K4AA_WINDOW_WIDTH_LO, 0x00, 0x00},
-	{SENSOR, S5K4AA_H_BLANK_HI__, 0x01, 0x00},
-	{SENSOR, S5K4AA_H_BLANK_LO__, 0xa8, 0x00},
-	{SENSOR, S5K4AA_EXPOSURE_HI, 0x01, 0x00},
-	{SENSOR, S5K4AA_EXPOSURE_LO, 0x00, 0x00},
-	{SENSOR, 0x11, 0x04, 0x00},
-	{SENSOR, 0x12, 0xc3, 0x00},
-	{SENSOR, S5K4AA_PAGE_MAP, 0x02, 0x00},
-	{SENSOR, 0x02, 0x0e, 0x00},
-};
 #endif
diff --git a/drivers/media/usb/gspca/m5602/m5602_s5k83a.c b/drivers/media/usb/gspca/m5602/m5602_s5k83a.c
index bf6b215438e3..be5e25d1a2e8 100644
--- a/drivers/media/usb/gspca/m5602/m5602_s5k83a.c
+++ b/drivers/media/usb/gspca/m5602/m5602_s5k83a.c
@@ -41,6 +41,130 @@ static struct v4l2_pix_format s5k83a_modes[] = {
 	}
 };
 
+static const unsigned char preinit_s5k83a[][4] = {
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00},
+
+	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3f, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3f, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0x80, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xf0, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x1c, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x20, 0x00},
+};
+
+/* This could probably be considerably shortened.
+   I don't have the hardware to experiment with it, patches welcome
+*/
+static const unsigned char init_s5k83a[][4] = {
+	/* The following sequence is useless after a clean boot
+	   but is necessary after resume from suspend */
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3f, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3f, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0x80, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
+	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xf0, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06, 0x00},
+	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
+	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
+	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x20, 0x00},
+
+	{SENSOR, S5K83A_PAGE_MAP, 0x04, 0x00},
+	{SENSOR, 0xaf, 0x01, 0x00},
+	{SENSOR, S5K83A_PAGE_MAP, 0x00, 0x00},
+	{SENSOR, 0x7b, 0xff, 0x00},
+	{SENSOR, S5K83A_PAGE_MAP, 0x05, 0x00},
+	{SENSOR, 0x01, 0x50, 0x00},
+	{SENSOR, 0x12, 0x20, 0x00},
+	{SENSOR, 0x17, 0x40, 0x00},
+	{SENSOR, 0x1c, 0x00, 0x00},
+	{SENSOR, 0x02, 0x70, 0x00},
+	{SENSOR, 0x03, 0x0b, 0x00},
+	{SENSOR, 0x04, 0xf0, 0x00},
+	{SENSOR, 0x05, 0x0b, 0x00},
+	{SENSOR, 0x06, 0x71, 0x00},
+	{SENSOR, 0x07, 0xe8, 0x00}, /* 488 */
+	{SENSOR, 0x08, 0x02, 0x00},
+	{SENSOR, 0x09, 0x88, 0x00}, /* 648 */
+	{SENSOR, 0x14, 0x00, 0x00},
+	{SENSOR, 0x15, 0x20, 0x00}, /* 32 */
+	{SENSOR, 0x19, 0x00, 0x00},
+	{SENSOR, 0x1a, 0x98, 0x00}, /* 152 */
+	{SENSOR, 0x0f, 0x02, 0x00},
+	{SENSOR, 0x10, 0xe5, 0x00}, /* 741 */
+	/* normal colors
+	(this is value after boot, but after tries can be different) */
+	{SENSOR, 0x00, 0x06, 0x00},
+};
+
+static const unsigned char start_s5k83a[][4] = {
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
+	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
+	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00},
+	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x01, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0xe4, 0x00}, /* 484 */
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SIG_INI, 0x02, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x02, 0x00},
+	{BRIDGE, M5602_XB_HSYNC_PARA, 0x7f, 0x00}, /* 639 */
+	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
+	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
+};
+
 static void s5k83a_dump_registers(struct sd *sd);
 static int s5k83a_get_rotation(struct sd *sd, u8 *reg_data);
 static int s5k83a_set_led_indication(struct sd *sd, u8 val);
diff --git a/drivers/media/usb/gspca/m5602/m5602_s5k83a.h b/drivers/media/usb/gspca/m5602/m5602_s5k83a.h
index d61b918228df..3212bfe53d22 100644
--- a/drivers/media/usb/gspca/m5602/m5602_s5k83a.h
+++ b/drivers/media/usb/gspca/m5602/m5602_s5k83a.h
@@ -61,128 +61,4 @@ static const struct m5602_sensor s5k83a = {
 	.i2c_slave_id = 0x5a,
 	.i2c_regW = 2,
 };
-
-static const unsigned char preinit_s5k83a[][4] = {
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00},
-
-	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3f, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3f, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0x80, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xf0, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x1c, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x20, 0x00},
-};
-
-/* This could probably be considerably shortened.
-   I don't have the hardware to experiment with it, patches welcome
-*/
-static const unsigned char init_s5k83a[][4] = {
-	/* The following sequence is useless after a clean boot
-	   but is necessary after resume from suspend */
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x3f, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x3f, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0x80, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00},
-	{BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xf0, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR, 0x1d, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT, 0x08, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DIR_H, 0x06, 0x00},
-	{BRIDGE, M5602_XB_GPIO_DAT_H, 0x00, 0x00},
-	{BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00},
-	{BRIDGE, M5602_XB_I2C_CLK_DIV, 0x20, 0x00},
-
-	{SENSOR, S5K83A_PAGE_MAP, 0x04, 0x00},
-	{SENSOR, 0xaf, 0x01, 0x00},
-	{SENSOR, S5K83A_PAGE_MAP, 0x00, 0x00},
-	{SENSOR, 0x7b, 0xff, 0x00},
-	{SENSOR, S5K83A_PAGE_MAP, 0x05, 0x00},
-	{SENSOR, 0x01, 0x50, 0x00},
-	{SENSOR, 0x12, 0x20, 0x00},
-	{SENSOR, 0x17, 0x40, 0x00},
-	{SENSOR, 0x1c, 0x00, 0x00},
-	{SENSOR, 0x02, 0x70, 0x00},
-	{SENSOR, 0x03, 0x0b, 0x00},
-	{SENSOR, 0x04, 0xf0, 0x00},
-	{SENSOR, 0x05, 0x0b, 0x00},
-	{SENSOR, 0x06, 0x71, 0x00},
-	{SENSOR, 0x07, 0xe8, 0x00}, /* 488 */
-	{SENSOR, 0x08, 0x02, 0x00},
-	{SENSOR, 0x09, 0x88, 0x00}, /* 648 */
-	{SENSOR, 0x14, 0x00, 0x00},
-	{SENSOR, 0x15, 0x20, 0x00}, /* 32 */
-	{SENSOR, 0x19, 0x00, 0x00},
-	{SENSOR, 0x1a, 0x98, 0x00}, /* 152 */
-	{SENSOR, 0x0f, 0x02, 0x00},
-	{SENSOR, 0x10, 0xe5, 0x00}, /* 741 */
-	/* normal colors
-	(this is value after boot, but after tries can be different) */
-	{SENSOR, 0x00, 0x06, 0x00},
-};
-
-static const unsigned char start_s5k83a[][4] = {
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-	{BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00},
-	{BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00},
-	{BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00},
-	{BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x01, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0xe4, 0x00}, /* 484 */
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SIG_INI, 0x02, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x00, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x02, 0x00},
-	{BRIDGE, M5602_XB_HSYNC_PARA, 0x7f, 0x00}, /* 639 */
-	{BRIDGE, M5602_XB_SIG_INI, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00},
-	{BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00},
-};
 #endif
diff --git a/drivers/media/usb/gspca/ov534.c b/drivers/media/usb/gspca/ov534.c
index bfff1d1c70ab..9266a5c9abc5 100644
--- a/drivers/media/usb/gspca/ov534.c
+++ b/drivers/media/usb/gspca/ov534.c
@@ -51,6 +51,7 @@
 #define OV534_OP_READ_2		0xf9
 
 #define CTRL_TIMEOUT 500
+#define DEFAULT_FRAME_RATE 30
 
 MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>");
 MODULE_DESCRIPTION("GSPCA/OV534 USB Camera Driver");
@@ -1061,7 +1062,7 @@ static int sd_config(struct gspca_dev *gspca_dev,
 	cam->cam_mode = ov772x_mode;
 	cam->nmodes = ARRAY_SIZE(ov772x_mode);
 
-	sd->frame_rate = 30;
+	sd->frame_rate = DEFAULT_FRAME_RATE;
 
 	return 0;
 }
@@ -1492,10 +1493,8 @@ static void sd_set_streamparm(struct gspca_dev *gspca_dev,
 	struct sd *sd = (struct sd *) gspca_dev;
 
 	if (tpf->numerator == 0 || tpf->denominator == 0)
-		/* Set default framerate */
-		sd->frame_rate = 30;
+		sd->frame_rate = DEFAULT_FRAME_RATE;
 	else
-		/* Set requested framerate */
 		sd->frame_rate = tpf->denominator / tpf->numerator;
 
 	if (gspca_dev->streaming)
diff --git a/drivers/media/usb/gspca/sn9c20x.c b/drivers/media/usb/gspca/sn9c20x.c
index d0ee899584a9..10269dad9d20 100644
--- a/drivers/media/usb/gspca/sn9c20x.c
+++ b/drivers/media/usb/gspca/sn9c20x.c
@@ -92,7 +92,6 @@ struct sd {
 	struct v4l2_ctrl *jpegqual;
 
 	struct work_struct work;
-	struct workqueue_struct *work_thread;
 
 	u32 pktsz;			/* (used by pkt_scan) */
 	u16 npkt;
@@ -2051,8 +2050,6 @@ static int sd_start(struct gspca_dev *gspca_dev)
 	if (mode & MODE_JPEG) {
 		sd->pktsz = sd->npkt = 0;
 		sd->nchg = 0;
-		sd->work_thread =
-			create_singlethread_workqueue(KBUILD_MODNAME);
 	}
 
 	return gspca_dev->usb_err;
@@ -2070,12 +2067,9 @@ static void sd_stop0(struct gspca_dev *gspca_dev)
 {
 	struct sd *sd = (struct sd *) gspca_dev;
 
-	if (sd->work_thread != NULL) {
-		mutex_unlock(&gspca_dev->usb_lock);
-		destroy_workqueue(sd->work_thread);
-		mutex_lock(&gspca_dev->usb_lock);
-		sd->work_thread = NULL;
-	}
+	mutex_unlock(&gspca_dev->usb_lock);
+	flush_work(&sd->work);
+	mutex_lock(&gspca_dev->usb_lock);
 }
 
 static void do_autoexposure(struct gspca_dev *gspca_dev, u16 avg_lum)
@@ -2228,7 +2222,7 @@ static void transfer_check(struct gspca_dev *gspca_dev,
 				new_qual = sd->jpegqual->maximum;
 			if (new_qual != curqual) {
 				sd->jpegqual->cur.val = new_qual;
-				queue_work(sd->work_thread, &sd->work);
+				schedule_work(&sd->work);
 			}
 		}
 	} else {
diff --git a/drivers/media/usb/gspca/t613.c b/drivers/media/usb/gspca/t613.c
index e2cc4e5a0ccb..bb52fc1fe598 100644
--- a/drivers/media/usb/gspca/t613.c
+++ b/drivers/media/usb/gspca/t613.c
@@ -837,7 +837,7 @@ static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 			u8 *data,			/* isoc packet */
 			int len)			/* iso packet length */
 {
-	struct sd *sd = (struct sd *) gspca_dev;
+	struct sd *sd __maybe_unused = (struct sd *) gspca_dev;
 	int pkt_type;
 
 	if (data[0] == 0x5a) {
diff --git a/drivers/media/usb/gspca/topro.c b/drivers/media/usb/gspca/topro.c
index c028a5c2438e..15eb069ab60b 100644
--- a/drivers/media/usb/gspca/topro.c
+++ b/drivers/media/usb/gspca/topro.c
@@ -175,6 +175,8 @@ static const u8 jpeg_q[17] = {
 #error "USB buffer too small"
 #endif
 
+#define DEFAULT_FRAME_RATE 30
+
 static const u8 rates[] = {30, 20, 15, 10, 7, 5};
 static const struct framerates framerates[] = {
 	{
@@ -4020,7 +4022,7 @@ static int sd_config(struct gspca_dev *gspca_dev,
 	gspca_dev->cam.mode_framerates = sd->bridge == BRIDGE_TP6800 ?
 			framerates : framerates_6810;
 
-	sd->framerate = 30;		/* default: 30 fps */
+	sd->framerate = DEFAULT_FRAME_RATE;
 	return 0;
 }
 
@@ -4803,7 +4805,7 @@ static void sd_set_streamparm(struct gspca_dev *gspca_dev,
 	int fr, i;
 
 	if (tpf->numerator == 0 || tpf->denominator == 0)
-		sd->framerate = 30;
+		sd->framerate = DEFAULT_FRAME_RATE;
 	else
 		sd->framerate = tpf->denominator / tpf->numerator;
 
diff --git a/drivers/media/usb/gspca/zc3xx.c b/drivers/media/usb/gspca/zc3xx.c
index c5d8ee6fa3c7..5f7254d2bc9a 100644
--- a/drivers/media/usb/gspca/zc3xx.c
+++ b/drivers/media/usb/gspca/zc3xx.c
@@ -53,7 +53,6 @@ struct sd {
 	struct v4l2_ctrl *jpegqual;
 
 	struct work_struct work;
-	struct workqueue_struct *work_thread;
 
 	u8 reg08;		/* webcam compression quality */
 
@@ -6826,8 +6825,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
 		return gspca_dev->usb_err;
 
 	/* Start the transfer parameters update thread */
-	sd->work_thread = create_singlethread_workqueue(KBUILD_MODNAME);
-	queue_work(sd->work_thread, &sd->work);
+	schedule_work(&sd->work);
 
 	return 0;
 }
@@ -6838,12 +6836,9 @@ static void sd_stop0(struct gspca_dev *gspca_dev)
 {
 	struct sd *sd = (struct sd *) gspca_dev;
 
-	if (sd->work_thread != NULL) {
-		mutex_unlock(&gspca_dev->usb_lock);
-		destroy_workqueue(sd->work_thread);
-		mutex_lock(&gspca_dev->usb_lock);
-		sd->work_thread = NULL;
-	}
+	mutex_unlock(&gspca_dev->usb_lock);
+	flush_work(&sd->work);
+	mutex_lock(&gspca_dev->usb_lock);
 	if (!gspca_dev->present)
 		return;
 	send_unknown(gspca_dev, sd->sensor);
diff --git a/drivers/media/usb/hackrf/hackrf.c b/drivers/media/usb/hackrf/hackrf.c
index 9e700caf0d66..b1e229a44192 100644
--- a/drivers/media/usb/hackrf/hackrf.c
+++ b/drivers/media/usb/hackrf/hackrf.c
@@ -760,7 +760,7 @@ static void hackrf_return_all_buffers(struct vb2_queue *vq,
 
 static int hackrf_queue_setup(struct vb2_queue *vq,
 		unsigned int *nbuffers,
-		unsigned int *nplanes, unsigned int sizes[], void *alloc_ctxs[])
+		unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct hackrf_dev *dev = vb2_get_drv_priv(vq);
 
diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c
index 08f0ca7aa012..a61d8fd63c12 100644
--- a/drivers/media/usb/hdpvr/hdpvr-core.c
+++ b/drivers/media/usb/hdpvr/hdpvr-core.c
@@ -310,10 +310,6 @@ static int hdpvr_probe(struct usb_interface *interface,
 	init_waitqueue_head(&dev->wait_buffer);
 	init_waitqueue_head(&dev->wait_data);
 
-	dev->workqueue = create_singlethread_workqueue("hdpvr_buffer");
-	if (!dev->workqueue)
-		goto error;
-
 	dev->options = hdpvr_default_options;
 
 	if (default_video_input < HDPVR_VIDEO_INPUTS)
@@ -404,9 +400,7 @@ reg_fail:
 #endif
 error:
 	if (dev) {
-		/* Destroy single thread */
-		if (dev->workqueue)
-			destroy_workqueue(dev->workqueue);
+		flush_work(&dev->worker);
 		/* this frees allocated memory */
 		hdpvr_delete(dev);
 	}
@@ -427,7 +421,7 @@ static void hdpvr_disconnect(struct usb_interface *interface)
 	mutex_unlock(&dev->io_mutex);
 	v4l2_device_disconnect(&dev->v4l2_dev);
 	msleep(100);
-	flush_workqueue(dev->workqueue);
+	flush_work(&dev->worker);
 	mutex_lock(&dev->io_mutex);
 	hdpvr_cancel_queue(dev);
 	mutex_unlock(&dev->io_mutex);
diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c
index ba7f02270c83..2a3a8b470555 100644
--- a/drivers/media/usb/hdpvr/hdpvr-video.c
+++ b/drivers/media/usb/hdpvr/hdpvr-video.c
@@ -316,7 +316,7 @@ static int hdpvr_start_streaming(struct hdpvr_device *dev)
 	dev->status = STATUS_STREAMING;
 
 	INIT_WORK(&dev->worker, hdpvr_transmit_buffers);
-	queue_work(dev->workqueue, &dev->worker);
+	schedule_work(&dev->worker);
 
 	v4l2_dbg(MSG_BUFFER, hdpvr_debug, &dev->v4l2_dev,
 			"streaming started\n");
@@ -350,7 +350,7 @@ static int hdpvr_stop_streaming(struct hdpvr_device *dev)
 	wake_up_interruptible(&dev->wait_buffer);
 	msleep(50);
 
-	flush_workqueue(dev->workqueue);
+	flush_work(&dev->worker);
 
 	mutex_lock(&dev->io_mutex);
 	/* kill the still outstanding urbs */
@@ -1123,7 +1123,7 @@ static void hdpvr_device_release(struct video_device *vdev)
 
 	hdpvr_delete(dev);
 	mutex_lock(&dev->io_mutex);
-	destroy_workqueue(dev->workqueue);
+	flush_work(&dev->worker);
 	mutex_unlock(&dev->io_mutex);
 
 	v4l2_device_unregister(&dev->v4l2_dev);
diff --git a/drivers/media/usb/hdpvr/hdpvr.h b/drivers/media/usb/hdpvr/hdpvr.h
index 78e815441f95..a12e0af1d4e1 100644
--- a/drivers/media/usb/hdpvr/hdpvr.h
+++ b/drivers/media/usb/hdpvr/hdpvr.h
@@ -107,8 +107,6 @@ struct hdpvr_device {
 	/* waitqueue for data */
 	wait_queue_head_t	wait_data;
 	/**/
-	struct workqueue_struct	*workqueue;
-	/**/
 	struct work_struct	worker;
 	/* current stream owner */
 	struct v4l2_fh		*owner;
diff --git a/drivers/media/usb/msi2500/msi2500.c b/drivers/media/usb/msi2500/msi2500.c
index 2d33033682af..e7f167d44c61 100644
--- a/drivers/media/usb/msi2500/msi2500.c
+++ b/drivers/media/usb/msi2500/msi2500.c
@@ -618,7 +618,7 @@ static int msi2500_querycap(struct file *file, void *fh,
 static int msi2500_queue_setup(struct vb2_queue *vq,
 			       unsigned int *nbuffers,
 			       unsigned int *nplanes, unsigned int sizes[],
-			       void *alloc_ctxs[])
+			       struct device *alloc_devs[])
 {
 	struct msi2500_dev *dev = vb2_get_drv_priv(vq);
 
diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
index 83e9a3eb3859..fe20fe4f2330 100644
--- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
@@ -2856,11 +2856,15 @@ static void pvr2_subdev_set_control(struct pvr2_hdw *hdw, int id,
 				    const char *name, int val)
 {
 	struct v4l2_control ctrl;
+	struct v4l2_subdev *sd;
+
 	pvr2_trace(PVR2_TRACE_CHIPS, "subdev v4l2 %s=%d", name, val);
 	memset(&ctrl, 0, sizeof(ctrl));
 	ctrl.id = id;
 	ctrl.value = val;
-	v4l2_device_call_all(&hdw->v4l2_dev, 0, core, s_ctrl, &ctrl);
+
+	v4l2_device_for_each_subdev(sd, &hdw->v4l2_dev)
+		v4l2_s_ctrl(NULL, sd->ctrl_handler, &ctrl);
 }
 
 #define PVR2_SUBDEV_SET_CONTROL(hdw, id, lab) \
diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
index 18aed5dd325e..b51b27a3fd61 100644
--- a/drivers/media/usb/pwc/pwc-if.c
+++ b/drivers/media/usb/pwc/pwc-if.c
@@ -573,7 +573,7 @@ static void pwc_video_release(struct v4l2_device *v)
 
 static int queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct pwc_device *pdev = vb2_get_drv_priv(vq);
 	int size;
@@ -1118,8 +1118,10 @@ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id
 
 	return 0;
 
+#ifdef CONFIG_USB_PWC_INPUT_EVDEV
 err_video_unreg:
 	video_unregister_device(&pdev->vdev);
+#endif
 err_unregister_v4l2_dev:
 	v4l2_device_unregister(&pdev->v4l2_dev);
 err_free_controls:
diff --git a/drivers/media/usb/s2255/s2255drv.c b/drivers/media/usb/s2255/s2255drv.c
index 9acdaa3716fb..43ba71a7d02b 100644
--- a/drivers/media/usb/s2255/s2255drv.c
+++ b/drivers/media/usb/s2255/s2255drv.c
@@ -662,7 +662,7 @@ static void s2255_fillbuff(struct s2255_vc *vc,
 
 static int queue_setup(struct vb2_queue *vq,
 		       unsigned int *nbuffers, unsigned int *nplanes,
-		       unsigned int sizes[], void *alloc_ctxs[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct s2255_vc *vc = vb2_get_drv_priv(vq);
 	if (*nbuffers < S2255_MIN_BUFS)
diff --git a/drivers/media/usb/stk1160/stk1160-v4l.c b/drivers/media/usb/stk1160/stk1160-v4l.c
index 77131fd614a5..5fab3bee8c74 100644
--- a/drivers/media/usb/stk1160/stk1160-v4l.c
+++ b/drivers/media/usb/stk1160/stk1160-v4l.c
@@ -666,7 +666,7 @@ static const struct v4l2_ioctl_ops stk1160_ioctl_ops = {
  */
 static int queue_setup(struct vb2_queue *vq,
 				unsigned int *nbuffers, unsigned int *nplanes,
-				unsigned int sizes[], void *alloc_ctxs[])
+				unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct stk1160 *dev = vb2_get_drv_priv(vq);
 	unsigned long size;
@@ -680,6 +680,9 @@ static int queue_setup(struct vb2_queue *vq,
 	*nbuffers = clamp_t(unsigned int, *nbuffers,
 			STK1160_MIN_VIDEO_BUFFERS, STK1160_MAX_VIDEO_BUFFERS);
 
+	if (*nplanes)
+		return sizes[0] < size ? -EINVAL : 0;
+
 	/* This means a packed colorformat */
 	*nplanes = 1;
 
diff --git a/drivers/media/usb/usbtv/usbtv-audio.c b/drivers/media/usb/usbtv/usbtv-audio.c
index 78c12d22dfbb..1965ff1b1f12 100644
--- a/drivers/media/usb/usbtv/usbtv-audio.c
+++ b/drivers/media/usb/usbtv/usbtv-audio.c
@@ -1,13 +1,6 @@
 /*
- * Fushicai USBTV007 Audio-Video Grabber Driver
- *
- * Product web site:
- * http://www.fushicai.com/products_detail/&productId=d05449ee-b690-42f9-a661-aa7353894bed.html
- *
  * Copyright (c) 2013 Federico Simoncelli
  * All rights reserved.
- * No physical hardware was harmed running Windows during the
- * reverse-engineering activity
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -20,6 +13,27 @@
  *
  * Alternatively, this software may be distributed under the terms of the
  * GNU General Public License ("GPL").
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Fushicai USBTV007 Audio-Video Grabber Driver
+ *
+ * Product web site:
+ * http://www.fushicai.com/products_detail/&productId=d05449ee-b690-42f9-a661-aa7353894bed.html
+ *
+ * No physical hardware was harmed running Windows during the
+ * reverse-engineering activity
  */
 
 #include <sound/core.h>
@@ -278,6 +292,9 @@ static void snd_usbtv_trigger(struct work_struct *work)
 {
 	struct usbtv *chip = container_of(work, struct usbtv, snd_trigger);
 
+	if (!chip->snd)
+		return;
+
 	if (atomic_read(&chip->snd_stream))
 		usbtv_audio_start(chip);
 	else
@@ -378,6 +395,8 @@ err:
 
 void usbtv_audio_free(struct usbtv *usbtv)
 {
+	cancel_work_sync(&usbtv->snd_trigger);
+
 	if (usbtv->snd && usbtv->udev) {
 		snd_card_free(usbtv->snd);
 		usbtv->snd = NULL;
diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c
index 29428bef272c..dc76fd41e00f 100644
--- a/drivers/media/usb/usbtv/usbtv-core.c
+++ b/drivers/media/usb/usbtv/usbtv-core.c
@@ -1,19 +1,6 @@
 /*
- * Fushicai USBTV007 Audio-Video Grabber Driver
- *
- * Product web site:
- * http://www.fushicai.com/products_detail/&productId=d05449ee-b690-42f9-a661-aa7353894bed.html
- *
- * Following LWN articles were very useful in construction of this driver:
- * Video4Linux2 API series: http://lwn.net/Articles/203924/
- * videobuf2 API explanation: http://lwn.net/Articles/447435/
- * Thanks go to Jonathan Corbet for providing this quality documentation.
- * He is awesome.
- *
  * Copyright (c) 2013 Lubomir Rintel
  * All rights reserved.
- * No physical hardware was harmed running Windows during the
- * reverse-engineering activity
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,6 +13,33 @@
  *
  * Alternatively, this software may be distributed under the terms of the
  * GNU General Public License ("GPL").
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Fushicai USBTV007 Audio-Video Grabber Driver
+ *
+ * Product web site:
+ * http://www.fushicai.com/products_detail/&productId=d05449ee-b690-42f9-a661-aa7353894bed.html
+ *
+ * Following LWN articles were very useful in construction of this driver:
+ * Video4Linux2 API series: http://lwn.net/Articles/203924/
+ * videobuf2 API explanation: http://lwn.net/Articles/447435/
+ * Thanks go to Jonathan Corbet for providing this quality documentation.
+ * He is awesome.
+ *
+ * No physical hardware was harmed running Windows during the
+ * reverse-engineering activity
  */
 
 #include "usbtv.h"
diff --git a/drivers/media/usb/usbtv/usbtv-video.c b/drivers/media/usb/usbtv/usbtv-video.c
index f6cfad46547e..2a089756c988 100644
--- a/drivers/media/usb/usbtv/usbtv-video.c
+++ b/drivers/media/usb/usbtv/usbtv-video.c
@@ -1,19 +1,6 @@
 /*
- * Fushicai USBTV007 Audio-Video Grabber Driver
- *
- * Product web site:
- * http://www.fushicai.com/products_detail/&productId=d05449ee-b690-42f9-a661-aa7353894bed.html
- *
- * Following LWN articles were very useful in construction of this driver:
- * Video4Linux2 API series: http://lwn.net/Articles/203924/
- * videobuf2 API explanation: http://lwn.net/Articles/447435/
- * Thanks go to Jonathan Corbet for providing this quality documentation.
- * He is awesome.
- *
  * Copyright (c) 2013 Lubomir Rintel
  * All rights reserved.
- * No physical hardware was harmed running Windows during the
- * reverse-engineering activity
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,6 +13,33 @@
  *
  * Alternatively, this software may be distributed under the terms of the
  * GNU General Public License ("GPL").
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Fushicai USBTV007 Audio-Video Grabber Driver
+ *
+ * Product web site:
+ * http://www.fushicai.com/products_detail/&productId=d05449ee-b690-42f9-a661-aa7353894bed.html
+ *
+ * Following LWN articles were very useful in construction of this driver:
+ * Video4Linux2 API series: http://lwn.net/Articles/203924/
+ * videobuf2 API explanation: http://lwn.net/Articles/447435/
+ * Thanks go to Jonathan Corbet for providing this quality documentation.
+ * He is awesome.
+ *
+ * No physical hardware was harmed running Windows during the
+ * reverse-engineering activity
  */
 
 #include <media/v4l2-ioctl.h>
@@ -251,8 +265,23 @@ static int usbtv_setup_capture(struct usbtv *usbtv)
 /* Copy data from chunk into a frame buffer, deinterlacing the data
  * into every second line. Unfortunately, they don't align nicely into
  * 720 pixel lines, as the chunk is 240 words long, which is 480 pixels.
- * Therefore, we break down the chunk into two halves before copyting,
- * so that we can interleave a line if needed. */
+ * Therefore, we break down the chunk into two halves before copying,
+ * so that we can interleave a line if needed.
+ *
+ * Each "chunk" is 240 words; a word in this context equals 4 bytes.
+ * Image format is YUYV/YUV 4:2:2, consisting of Y Cr Y Cb, defining two
+ * pixels, the Cr and Cb shared between the two pixels, but each having
+ * separate Y values. Thus, the 240 words equal 480 pixels. It therefore,
+ * takes 1.5 chunks to make a 720 pixel-wide line for the frame.
+ * The image is interlaced, so there is a "scan" of odd lines, followed
+ * by "scan" of even numbered lines.
+ *
+ * Following code is writing the chunks in correct sequence, skipping
+ * the rows based on "odd" value.
+ * line 1: chunk[0][  0..479] chunk[0][480..959] chunk[1][  0..479]
+ * line 3: chunk[1][480..959] chunk[2][  0..479] chunk[2][480..959]
+ * ...etc.
+ */
 static void usbtv_chunk_to_vbuf(u32 *frame, __be32 *src, int chunk_no, int odd)
 {
 	int half;
@@ -608,7 +637,7 @@ static struct v4l2_file_operations usbtv_fops = {
 
 static int usbtv_queue_setup(struct vb2_queue *vq,
 	unsigned int *nbuffers,
-	unsigned int *nplanes, unsigned int sizes[], void *alloc_ctxs[])
+	unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct usbtv *usbtv = vb2_get_drv_priv(vq);
 	unsigned size = USBTV_CHUNK * usbtv->n_chunks * 2 * sizeof(u32);
diff --git a/drivers/media/usb/usbtv/usbtv.h b/drivers/media/usb/usbtv/usbtv.h
index 161b38d5cfa0..011f9fdc77a9 100644
--- a/drivers/media/usb/usbtv/usbtv.h
+++ b/drivers/media/usb/usbtv/usbtv.h
@@ -1,10 +1,6 @@
 /*
- * Fushicai USBTV007 Audio-Video Grabber Driver
- *
  * Copyright (c) 2013 Lubomir Rintel
  * All rights reserved.
- * No physical hardware was harmed running Windows during the
- * reverse-engineering activity
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -17,6 +13,24 @@
  *
  * Alternatively, this software may be distributed under the terms of the
  * GNU General Public License ("GPL").
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Fushicai USBTV007 Audio-Video Grabber Driver
+ *
+ * No physical hardware was harmed running Windows during the
+ * reverse-engineering activity
  */
 
 #include <linux/module.h>
diff --git a/drivers/media/usb/usbvision/usbvision-core.c b/drivers/media/usb/usbvision/usbvision-core.c
index 1ea04e75fb36..52ac4391582c 100644
--- a/drivers/media/usb/usbvision/usbvision-core.c
+++ b/drivers/media/usb/usbvision/usbvision-core.c
@@ -88,11 +88,6 @@ MODULE_PARM_DESC(adjust_y_offset, "adjust Y offset display [core]");
 #define DBG_SCRATCH	(1 << 4)
 #define DBG_FUNC	(1 << 5)
 
-static const int max_imgwidth = MAX_FRAME_WIDTH;
-static const int max_imgheight = MAX_FRAME_HEIGHT;
-static const int min_imgwidth = MIN_FRAME_WIDTH;
-static const int min_imgheight = MIN_FRAME_HEIGHT;
-
 /* The value of 'scratch_buf_size' affects quality of the picture
  * in many ways. Shorter buffers may cause loss of data when client
  * is too slow. Larger buffers are memory-consuming and take longer
diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c
index ad2f3d27b266..c8b4eb2ee7a2 100644
--- a/drivers/media/usb/usbvision/usbvision-video.c
+++ b/drivers/media/usb/usbvision/usbvision-video.c
@@ -188,12 +188,10 @@ static ssize_t show_hue(struct device *cd,
 {
 	struct video_device *vdev = to_video_device(cd);
 	struct usb_usbvision *usbvision = video_get_drvdata(vdev);
-	struct v4l2_control ctrl;
-	ctrl.id = V4L2_CID_HUE;
-	ctrl.value = 0;
-	if (usbvision->user)
-		call_all(usbvision, core, g_ctrl, &ctrl);
-	return sprintf(buf, "%d\n", ctrl.value);
+	s32 val = v4l2_ctrl_g_ctrl(v4l2_ctrl_find(&usbvision->hdl,
+						  V4L2_CID_HUE));
+
+	return sprintf(buf, "%d\n", val);
 }
 static DEVICE_ATTR(hue, S_IRUGO, show_hue, NULL);
 
@@ -202,12 +200,10 @@ static ssize_t show_contrast(struct device *cd,
 {
 	struct video_device *vdev = to_video_device(cd);
 	struct usb_usbvision *usbvision = video_get_drvdata(vdev);
-	struct v4l2_control ctrl;
-	ctrl.id = V4L2_CID_CONTRAST;
-	ctrl.value = 0;
-	if (usbvision->user)
-		call_all(usbvision, core, g_ctrl, &ctrl);
-	return sprintf(buf, "%d\n", ctrl.value);
+	s32 val = v4l2_ctrl_g_ctrl(v4l2_ctrl_find(&usbvision->hdl,
+						  V4L2_CID_CONTRAST));
+
+	return sprintf(buf, "%d\n", val);
 }
 static DEVICE_ATTR(contrast, S_IRUGO, show_contrast, NULL);
 
@@ -216,12 +212,10 @@ static ssize_t show_brightness(struct device *cd,
 {
 	struct video_device *vdev = to_video_device(cd);
 	struct usb_usbvision *usbvision = video_get_drvdata(vdev);
-	struct v4l2_control ctrl;
-	ctrl.id = V4L2_CID_BRIGHTNESS;
-	ctrl.value = 0;
-	if (usbvision->user)
-		call_all(usbvision, core, g_ctrl, &ctrl);
-	return sprintf(buf, "%d\n", ctrl.value);
+	s32 val = v4l2_ctrl_g_ctrl(v4l2_ctrl_find(&usbvision->hdl,
+						  V4L2_CID_BRIGHTNESS));
+
+	return sprintf(buf, "%d\n", val);
 }
 static DEVICE_ATTR(brightness, S_IRUGO, show_brightness, NULL);
 
@@ -230,12 +224,10 @@ static ssize_t show_saturation(struct device *cd,
 {
 	struct video_device *vdev = to_video_device(cd);
 	struct usb_usbvision *usbvision = video_get_drvdata(vdev);
-	struct v4l2_control ctrl;
-	ctrl.id = V4L2_CID_SATURATION;
-	ctrl.value = 0;
-	if (usbvision->user)
-		call_all(usbvision, core, g_ctrl, &ctrl);
-	return sprintf(buf, "%d\n", ctrl.value);
+	s32 val = v4l2_ctrl_g_ctrl(v4l2_ctrl_find(&usbvision->hdl,
+						  V4L2_CID_SATURATION));
+
+	return sprintf(buf, "%d\n", val);
 }
 static DEVICE_ATTR(saturation, S_IRUGO, show_saturation, NULL);
 
diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c
index 451e84e962e2..302e284a95eb 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -1674,7 +1674,7 @@ static void uvc_delete(struct uvc_device *dev)
 	if (dev->vdev.dev)
 		v4l2_device_unregister(&dev->vdev);
 #ifdef CONFIG_MEDIA_CONTROLLER
-	if (media_devnode_is_registered(&dev->mdev.devnode))
+	if (media_devnode_is_registered(dev->mdev.devnode))
 		media_device_unregister(&dev->mdev);
 	media_device_cleanup(&dev->mdev);
 #endif
diff --git a/drivers/media/usb/uvc/uvc_queue.c b/drivers/media/usb/uvc/uvc_queue.c
index 54394722756f..773fefb52d7a 100644
--- a/drivers/media/usb/uvc/uvc_queue.c
+++ b/drivers/media/usb/uvc/uvc_queue.c
@@ -71,7 +71,7 @@ static void uvc_queue_return_buffers(struct uvc_video_queue *queue,
 
 static int uvc_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
 	struct uvc_streaming *stream = uvc_queue_to_stream(queue);
diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
index c04bc6afb965..05eed4be25df 100644
--- a/drivers/media/usb/uvc/uvc_v4l2.c
+++ b/drivers/media/usb/uvc/uvc_v4l2.c
@@ -142,6 +142,21 @@ static __u32 uvc_try_frame_interval(struct uvc_frame *frame, __u32 interval)
 	return interval;
 }
 
+static __u32 uvc_v4l2_get_bytesperline(const struct uvc_format *format,
+	const struct uvc_frame *frame)
+{
+	switch (format->fcc) {
+	case V4L2_PIX_FMT_NV12:
+	case V4L2_PIX_FMT_YVU420:
+	case V4L2_PIX_FMT_YUV420:
+	case V4L2_PIX_FMT_M420:
+		return frame->wWidth;
+
+	default:
+		return format->bpp * frame->wWidth / 8;
+	}
+}
+
 static int uvc_v4l2_try_format(struct uvc_streaming *stream,
 	struct v4l2_format *fmt, struct uvc_streaming_control *probe,
 	struct uvc_format **uvc_format, struct uvc_frame **uvc_frame)
@@ -245,7 +260,7 @@ static int uvc_v4l2_try_format(struct uvc_streaming *stream,
 	fmt->fmt.pix.width = frame->wWidth;
 	fmt->fmt.pix.height = frame->wHeight;
 	fmt->fmt.pix.field = V4L2_FIELD_NONE;
-	fmt->fmt.pix.bytesperline = format->bpp * frame->wWidth / 8;
+	fmt->fmt.pix.bytesperline = uvc_v4l2_get_bytesperline(format, frame);
 	fmt->fmt.pix.sizeimage = probe->dwMaxVideoFrameSize;
 	fmt->fmt.pix.colorspace = format->colorspace;
 	fmt->fmt.pix.priv = 0;
@@ -282,7 +297,7 @@ static int uvc_v4l2_get_format(struct uvc_streaming *stream,
 	fmt->fmt.pix.width = frame->wWidth;
 	fmt->fmt.pix.height = frame->wHeight;
 	fmt->fmt.pix.field = V4L2_FIELD_NONE;
-	fmt->fmt.pix.bytesperline = format->bpp * frame->wWidth / 8;
+	fmt->fmt.pix.bytesperline = uvc_v4l2_get_bytesperline(format, frame);
 	fmt->fmt.pix.sizeimage = stream->ctrl.dwMaxVideoFrameSize;
 	fmt->fmt.pix.colorspace = format->colorspace;
 	fmt->fmt.pix.priv = 0;
diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index 075a0fe77485..b5589d5f5da4 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1470,6 +1470,7 @@ static unsigned int uvc_endpoint_max_bpi(struct usb_device *dev,
 
 	switch (dev->speed) {
 	case USB_SPEED_SUPER:
+	case USB_SPEED_SUPER_PLUS:
 		return le16_to_cpu(ep->ss_ep_comp.wBytesPerInterval);
 	case USB_SPEED_HIGH:
 		psize = usb_endpoint_maxp(&ep->desc);
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 8b321e0aae62..f7abfad9ad23 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -2606,14 +2606,6 @@ int v4l2_queryctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_queryctrl *qc)
 }
 EXPORT_SYMBOL(v4l2_queryctrl);
 
-int v4l2_subdev_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
-{
-	if (qc->id & (V4L2_CTRL_FLAG_NEXT_CTRL | V4L2_CTRL_FLAG_NEXT_COMPOUND))
-		return -EINVAL;
-	return v4l2_queryctrl(sd->ctrl_handler, qc);
-}
-EXPORT_SYMBOL(v4l2_subdev_queryctrl);
-
 /* Implement VIDIOC_QUERYMENU */
 int v4l2_querymenu(struct v4l2_ctrl_handler *hdl, struct v4l2_querymenu *qm)
 {
@@ -2657,13 +2649,6 @@ int v4l2_querymenu(struct v4l2_ctrl_handler *hdl, struct v4l2_querymenu *qm)
 }
 EXPORT_SYMBOL(v4l2_querymenu);
 
-int v4l2_subdev_querymenu(struct v4l2_subdev *sd, struct v4l2_querymenu *qm)
-{
-	return v4l2_querymenu(sd->ctrl_handler, qm);
-}
-EXPORT_SYMBOL(v4l2_subdev_querymenu);
-
-
 
 /* Some general notes on the atomic requirements of VIDIOC_G/TRY/S_EXT_CTRLS:
 
@@ -2890,12 +2875,6 @@ int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct v4l2_ext_controls *cs
 }
 EXPORT_SYMBOL(v4l2_g_ext_ctrls);
 
-int v4l2_subdev_g_ext_ctrls(struct v4l2_subdev *sd, struct v4l2_ext_controls *cs)
-{
-	return v4l2_g_ext_ctrls(sd->ctrl_handler, cs);
-}
-EXPORT_SYMBOL(v4l2_subdev_g_ext_ctrls);
-
 /* Helper function to get a single control */
 static int get_ctrl(struct v4l2_ctrl *ctrl, struct v4l2_ext_control *c)
 {
@@ -2941,12 +2920,6 @@ int v4l2_g_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_control *control)
 }
 EXPORT_SYMBOL(v4l2_g_ctrl);
 
-int v4l2_subdev_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *control)
-{
-	return v4l2_g_ctrl(sd->ctrl_handler, control);
-}
-EXPORT_SYMBOL(v4l2_subdev_g_ctrl);
-
 s32 v4l2_ctrl_g_ctrl(struct v4l2_ctrl *ctrl)
 {
 	struct v4l2_ext_control c;
@@ -3194,18 +3167,6 @@ int v4l2_s_ext_ctrls(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl,
 }
 EXPORT_SYMBOL(v4l2_s_ext_ctrls);
 
-int v4l2_subdev_try_ext_ctrls(struct v4l2_subdev *sd, struct v4l2_ext_controls *cs)
-{
-	return try_set_ext_ctrls(NULL, sd->ctrl_handler, cs, false);
-}
-EXPORT_SYMBOL(v4l2_subdev_try_ext_ctrls);
-
-int v4l2_subdev_s_ext_ctrls(struct v4l2_subdev *sd, struct v4l2_ext_controls *cs)
-{
-	return try_set_ext_ctrls(NULL, sd->ctrl_handler, cs, true);
-}
-EXPORT_SYMBOL(v4l2_subdev_s_ext_ctrls);
-
 /* Helper function for VIDIOC_S_CTRL compatibility */
 static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags)
 {
@@ -3268,12 +3229,6 @@ int v4l2_s_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl,
 }
 EXPORT_SYMBOL(v4l2_s_ctrl);
 
-int v4l2_subdev_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *control)
-{
-	return v4l2_s_ctrl(NULL, sd->ctrl_handler, control);
-}
-EXPORT_SYMBOL(v4l2_subdev_s_ctrl);
-
 int __v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val)
 {
 	lockdep_assert_held(ctrl->handler->lock);
diff --git a/drivers/media/v4l2-core/v4l2-flash-led-class.c b/drivers/media/v4l2-core/v4l2-flash-led-class.c
index fc5ff8b215f9..ae7544d5469a 100644
--- a/drivers/media/v4l2-core/v4l2-flash-led-class.c
+++ b/drivers/media/v4l2-core/v4l2-flash-led-class.c
@@ -609,14 +609,7 @@ static const struct v4l2_subdev_internal_ops v4l2_flash_subdev_internal_ops = {
 	.close = v4l2_flash_close,
 };
 
-static const struct v4l2_subdev_core_ops v4l2_flash_core_ops = {
-	.queryctrl = v4l2_subdev_queryctrl,
-	.querymenu = v4l2_subdev_querymenu,
-};
-
-static const struct v4l2_subdev_ops v4l2_flash_subdev_ops = {
-	.core = &v4l2_flash_core_ops,
-};
+static const struct v4l2_subdev_ops v4l2_flash_subdev_ops;
 
 struct v4l2_flash *v4l2_flash_init(
 	struct device *dev, struct device_node *of_node,
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 28e5be2c2eef..51a0fa144392 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -2171,7 +2171,7 @@ static int v4l_cropcap(const struct v4l2_ioctl_ops *ops,
 	 * The determine_valid_ioctls() call already should ensure
 	 * that this can never happen, but just in case...
 	 */
-	if (WARN_ON(!ops->vidioc_cropcap && !ops->vidioc_cropcap))
+	if (WARN_ON(!ops->vidioc_cropcap && !ops->vidioc_g_selection))
 		return -ENOTTY;
 
 	if (ops->vidioc_cropcap)
@@ -2541,14 +2541,14 @@ static struct v4l2_ioctl_info v4l2_ioctls[] = {
 	IOCTL_INFO_FNC(VIDIOC_DBG_S_REGISTER, v4l_dbg_s_register, v4l_print_dbg_register, 0),
 	IOCTL_INFO_FNC(VIDIOC_DBG_G_REGISTER, v4l_dbg_g_register, v4l_print_dbg_register, 0),
 	IOCTL_INFO_FNC(VIDIOC_S_HW_FREQ_SEEK, v4l_s_hw_freq_seek, v4l_print_hw_freq_seek, INFO_FL_PRIO),
-	IOCTL_INFO_STD(VIDIOC_S_DV_TIMINGS, vidioc_s_dv_timings, v4l_print_dv_timings, INFO_FL_PRIO),
+	IOCTL_INFO_STD(VIDIOC_S_DV_TIMINGS, vidioc_s_dv_timings, v4l_print_dv_timings, INFO_FL_PRIO | INFO_FL_CLEAR(v4l2_dv_timings, bt.flags)),
 	IOCTL_INFO_STD(VIDIOC_G_DV_TIMINGS, vidioc_g_dv_timings, v4l_print_dv_timings, 0),
 	IOCTL_INFO_FNC(VIDIOC_DQEVENT, v4l_dqevent, v4l_print_event, 0),
 	IOCTL_INFO_FNC(VIDIOC_SUBSCRIBE_EVENT, v4l_subscribe_event, v4l_print_event_subscription, 0),
 	IOCTL_INFO_FNC(VIDIOC_UNSUBSCRIBE_EVENT, v4l_unsubscribe_event, v4l_print_event_subscription, 0),
 	IOCTL_INFO_FNC(VIDIOC_CREATE_BUFS, v4l_create_bufs, v4l_print_create_buffers, INFO_FL_PRIO | INFO_FL_QUEUE),
 	IOCTL_INFO_FNC(VIDIOC_PREPARE_BUF, v4l_prepare_buf, v4l_print_buffer, INFO_FL_QUEUE),
-	IOCTL_INFO_STD(VIDIOC_ENUM_DV_TIMINGS, vidioc_enum_dv_timings, v4l_print_enum_dv_timings, 0),
+	IOCTL_INFO_STD(VIDIOC_ENUM_DV_TIMINGS, vidioc_enum_dv_timings, v4l_print_enum_dv_timings, INFO_FL_CLEAR(v4l2_enum_dv_timings, pad)),
 	IOCTL_INFO_STD(VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings, v4l_print_dv_timings, 0),
 	IOCTL_INFO_STD(VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap, v4l_print_dv_timings_cap, INFO_FL_CLEAR(v4l2_dv_timings_cap, type)),
 	IOCTL_INFO_FNC(VIDIOC_ENUM_FREQ_BANDS, v4l_enum_freq_bands, v4l_print_freq_band, 0),
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 9fbcb67a9ee6..ca8ffeb56d72 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -206,8 +206,9 @@ static int __vb2_buf_mem_alloc(struct vb2_buffer *vb)
 	for (plane = 0; plane < vb->num_planes; ++plane) {
 		unsigned long size = PAGE_ALIGN(vb->planes[plane].length);
 
-		mem_priv = call_ptr_memop(vb, alloc, q->alloc_ctx[plane],
-				      size, dma_dir, q->gfp_flags);
+		mem_priv = call_ptr_memop(vb, alloc,
+				q->alloc_devs[plane] ? : q->dev,
+				q->dma_attrs, size, dma_dir, q->gfp_flags);
 		if (IS_ERR_OR_NULL(mem_priv))
 			goto free;
 
@@ -737,7 +738,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
 	 */
 	num_buffers = min_t(unsigned int, *count, VB2_MAX_FRAME);
 	num_buffers = max_t(unsigned int, num_buffers, q->min_buffers_needed);
-	memset(q->alloc_ctx, 0, sizeof(q->alloc_ctx));
+	memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
 	q->memory = memory;
 
 	/*
@@ -745,7 +746,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
 	 * Driver also sets the size and allocator context for each plane.
 	 */
 	ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes,
-		       plane_sizes, q->alloc_ctx);
+		       plane_sizes, q->alloc_devs);
 	if (ret)
 		return ret;
 
@@ -778,7 +779,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
 		num_planes = 0;
 
 		ret = call_qop(q, queue_setup, q, &num_buffers,
-			       &num_planes, plane_sizes, q->alloc_ctx);
+			       &num_planes, plane_sizes, q->alloc_devs);
 
 		if (!ret && allocated_buffers < num_buffers)
 			ret = -ENOMEM;
@@ -844,7 +845,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
 	}
 
 	if (!q->num_buffers) {
-		memset(q->alloc_ctx, 0, sizeof(q->alloc_ctx));
+		memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
 		q->memory = memory;
 		q->waiting_for_buffers = !q->is_output;
 	}
@@ -861,7 +862,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
 	 * buffer and their sizes are acceptable
 	 */
 	ret = call_qop(q, queue_setup, q, &num_buffers,
-		       &num_planes, plane_sizes, q->alloc_ctx);
+		       &num_planes, plane_sizes, q->alloc_devs);
 	if (ret)
 		return ret;
 
@@ -884,7 +885,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
 		 * queue driver has set up
 		 */
 		ret = call_qop(q, queue_setup, q, &num_buffers,
-			       &num_planes, plane_sizes, q->alloc_ctx);
+			       &num_planes, plane_sizes, q->alloc_devs);
 
 		if (!ret && allocated_buffers < num_buffers)
 			ret = -ENOMEM;
@@ -1131,9 +1132,10 @@ static int __qbuf_userptr(struct vb2_buffer *vb, const void *pb)
 		vb->planes[plane].data_offset = 0;
 
 		/* Acquire each plane's memory */
-		mem_priv = call_ptr_memop(vb, get_userptr, q->alloc_ctx[plane],
-				      planes[plane].m.userptr,
-				      planes[plane].length, dma_dir);
+		mem_priv = call_ptr_memop(vb, get_userptr,
+				q->alloc_devs[plane] ? : q->dev,
+				planes[plane].m.userptr,
+				planes[plane].length, dma_dir);
 		if (IS_ERR_OR_NULL(mem_priv)) {
 			dprintk(1, "failed acquiring userspace "
 						"memory for plane %d\n", plane);
@@ -1256,8 +1258,8 @@ static int __qbuf_dmabuf(struct vb2_buffer *vb, const void *pb)
 
 		/* Acquire each plane's memory */
 		mem_priv = call_ptr_memop(vb, attach_dmabuf,
-			q->alloc_ctx[plane], dbuf, planes[plane].length,
-			dma_dir);
+				q->alloc_devs[plane] ? : q->dev,
+				dbuf, planes[plane].length, dma_dir);
 		if (IS_ERR(mem_priv)) {
 			dprintk(1, "failed to attach dmabuf\n");
 			ret = PTR_ERR(mem_priv);
@@ -1648,7 +1650,7 @@ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
 			     void *pb, int nonblocking)
 {
 	unsigned long flags;
-	int ret;
+	int ret = 0;
 
 	/*
 	 * Wait for at least one buffer to become available on the done_list.
@@ -1664,10 +1666,12 @@ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
 	spin_lock_irqsave(&q->done_lock, flags);
 	*vb = list_first_entry(&q->done_list, struct vb2_buffer, done_entry);
 	/*
-	 * Only remove the buffer from done_list if v4l2_buffer can handle all
-	 * the planes.
+	 * Only remove the buffer from done_list if all planes can be
+	 * handled. Some cases such as V4L2 file I/O and DVB have pb
+	 * == NULL; skip the check then as there's nothing to verify.
 	 */
-	ret = call_bufop(q, verify_planes_array, *vb, pb);
+	if (pb)
+		ret = call_bufop(q, verify_planes_array, *vb, pb);
 	if (!ret)
 		list_del(&(*vb)->done_entry);
 	spin_unlock_irqrestore(&q->done_lock, flags);
@@ -1843,7 +1847,7 @@ static void __vb2_queue_cancel(struct vb2_queue *q)
 	 * Make sure to call buf_finish for any queued buffers. Normally
 	 * that's done in dqbuf, but that's not going to happen when we
 	 * cancel the whole queue. Note: this code belongs here, not in
-	 * __vb2_dqbuf() since in vb2_internal_dqbuf() there is a critical
+	 * __vb2_dqbuf() since in vb2_core_dqbuf() there is a critical
 	 * call to __fill_user_buffer() after buf_finish(). That order can't
 	 * be changed, so we can't move the buf_finish() to __vb2_dqbuf().
 	 */
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index 5361197f3e57..863f658a3fa1 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -21,11 +21,6 @@
 #include <media/videobuf2-dma-contig.h>
 #include <media/videobuf2-memops.h>
 
-struct vb2_dc_conf {
-	struct device		*dev;
-	struct dma_attrs	attrs;
-};
-
 struct vb2_dc_buf {
 	struct device			*dev;
 	void				*vaddr;
@@ -140,18 +135,18 @@ static void vb2_dc_put(void *buf_priv)
 	kfree(buf);
 }
 
-static void *vb2_dc_alloc(void *alloc_ctx, unsigned long size,
-			  enum dma_data_direction dma_dir, gfp_t gfp_flags)
+static void *vb2_dc_alloc(struct device *dev, const struct dma_attrs *attrs,
+			  unsigned long size, enum dma_data_direction dma_dir,
+			  gfp_t gfp_flags)
 {
-	struct vb2_dc_conf *conf = alloc_ctx;
-	struct device *dev = conf->dev;
 	struct vb2_dc_buf *buf;
 
 	buf = kzalloc(sizeof *buf, GFP_KERNEL);
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
-	buf->attrs = conf->attrs;
+	if (attrs)
+		buf->attrs = *attrs;
 	buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr,
 					GFP_KERNEL | gfp_flags, &buf->attrs);
 	if (!buf->cookie) {
@@ -478,10 +473,9 @@ static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long pfn
 }
 #endif
 
-static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,
+static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr,
 	unsigned long size, enum dma_data_direction dma_dir)
 {
-	struct vb2_dc_conf *conf = alloc_ctx;
 	struct vb2_dc_buf *buf;
 	struct frame_vector *vec;
 	unsigned long offset;
@@ -509,7 +503,7 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
-	buf->dev = conf->dev;
+	buf->dev = dev;
 	buf->dma_dir = dma_dir;
 
 	offset = vaddr & ~PAGE_MASK;
@@ -676,10 +670,9 @@ static void vb2_dc_detach_dmabuf(void *mem_priv)
 	kfree(buf);
 }
 
-static void *vb2_dc_attach_dmabuf(void *alloc_ctx, struct dma_buf *dbuf,
+static void *vb2_dc_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
 	unsigned long size, enum dma_data_direction dma_dir)
 {
-	struct vb2_dc_conf *conf = alloc_ctx;
 	struct vb2_dc_buf *buf;
 	struct dma_buf_attachment *dba;
 
@@ -690,7 +683,7 @@ static void *vb2_dc_attach_dmabuf(void *alloc_ctx, struct dma_buf *dbuf,
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
-	buf->dev = conf->dev;
+	buf->dev = dev;
 	/* create attachment for the dmabuf with the user device */
 	dba = dma_buf_attach(dbuf, buf->dev);
 	if (IS_ERR(dba)) {
@@ -729,29 +722,58 @@ const struct vb2_mem_ops vb2_dma_contig_memops = {
 };
 EXPORT_SYMBOL_GPL(vb2_dma_contig_memops);
 
-void *vb2_dma_contig_init_ctx_attrs(struct device *dev,
-				    struct dma_attrs *attrs)
+/**
+ * vb2_dma_contig_set_max_seg_size() - configure DMA max segment size
+ * @dev:	device for configuring DMA parameters
+ * @size:	size of DMA max segment size to set
+ *
+ * To allow mapping the scatter-list into a single chunk in the DMA
+ * address space, the device is required to have the DMA max segment
+ * size parameter set to a value larger than the buffer size. Otherwise,
+ * the DMA-mapping subsystem will split the mapping into max segment
+ * size chunks. This function sets the DMA max segment size
+ * parameter to let DMA-mapping map a buffer as a single chunk in DMA
+ * address space.
+ * This code assumes that the DMA-mapping subsystem will merge all
+ * scatterlist segments if this is really possible (for example when
+ * an IOMMU is available and enabled).
+ * Ideally, this parameter should be set by the generic bus code, but it
+ * is left with the default 64KiB value due to historical litmiations in
+ * other subsystems (like limited USB host drivers) and there no good
+ * place to set it to the proper value.
+ * This function should be called from the drivers, which are known to
+ * operate on platforms with IOMMU and provide access to shared buffers
+ * (either USERPTR or DMABUF). This should be done before initializing
+ * videobuf2 queue.
+ */
+int vb2_dma_contig_set_max_seg_size(struct device *dev, unsigned int size)
 {
-	struct vb2_dc_conf *conf;
-
-	conf = kzalloc(sizeof *conf, GFP_KERNEL);
-	if (!conf)
-		return ERR_PTR(-ENOMEM);
-
-	conf->dev = dev;
-	if (attrs)
-		conf->attrs = *attrs;
+	if (!dev->dma_parms) {
+		dev->dma_parms = kzalloc(sizeof(dev->dma_parms), GFP_KERNEL);
+		if (!dev->dma_parms)
+			return -ENOMEM;
+	}
+	if (dma_get_max_seg_size(dev) < size)
+		return dma_set_max_seg_size(dev, size);
 
-	return conf;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(vb2_dma_contig_init_ctx_attrs);
+EXPORT_SYMBOL_GPL(vb2_dma_contig_set_max_seg_size);
 
-void vb2_dma_contig_cleanup_ctx(void *alloc_ctx)
+/*
+ * vb2_dma_contig_clear_max_seg_size() - release resources for DMA parameters
+ * @dev:	device for configuring DMA parameters
+ *
+ * This function releases resources allocated to configure DMA parameters
+ * (see vb2_dma_contig_set_max_seg_size() function). It should be called from
+ * device drivers on driver remove.
+ */
+void vb2_dma_contig_clear_max_seg_size(struct device *dev)
 {
-	if (!IS_ERR_OR_NULL(alloc_ctx))
-		kfree(alloc_ctx);
+	kfree(dev->dma_parms);
+	dev->dma_parms = NULL;
 }
-EXPORT_SYMBOL_GPL(vb2_dma_contig_cleanup_ctx);
+EXPORT_SYMBOL_GPL(vb2_dma_contig_clear_max_seg_size);
 
 MODULE_DESCRIPTION("DMA-contig memory handling routines for videobuf2");
 MODULE_AUTHOR("Pawel Osciak <pawel@osciak.com>");
diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
index 9985c89f0513..a39db8a6db7a 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
@@ -30,10 +30,6 @@ module_param(debug, int, 0644);
 			printk(KERN_DEBUG "vb2-dma-sg: " fmt, ## arg);	\
 	} while (0)
 
-struct vb2_dma_sg_conf {
-	struct device		*dev;
-};
-
 struct vb2_dma_sg_buf {
 	struct device			*dev;
 	void				*vaddr;
@@ -99,10 +95,10 @@ static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
 	return 0;
 }
 
-static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size,
-			      enum dma_data_direction dma_dir, gfp_t gfp_flags)
+static void *vb2_dma_sg_alloc(struct device *dev, const struct dma_attrs *dma_attrs,
+			      unsigned long size, enum dma_data_direction dma_dir,
+			      gfp_t gfp_flags)
 {
-	struct vb2_dma_sg_conf *conf = alloc_ctx;
 	struct vb2_dma_sg_buf *buf;
 	struct sg_table *sgt;
 	int ret;
@@ -111,7 +107,7 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size,
 
 	dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs);
 
-	if (WARN_ON(alloc_ctx == NULL))
+	if (WARN_ON(dev == NULL))
 		return NULL;
 	buf = kzalloc(sizeof *buf, GFP_KERNEL);
 	if (!buf)
@@ -140,7 +136,7 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size,
 		goto fail_table_alloc;
 
 	/* Prevent the device from being released while the buffer is used */
-	buf->dev = get_device(conf->dev);
+	buf->dev = get_device(dev);
 
 	sgt = &buf->sg_table;
 	/*
@@ -226,11 +222,10 @@ static void vb2_dma_sg_finish(void *buf_priv)
 	dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir);
 }
 
-static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr,
+static void *vb2_dma_sg_get_userptr(struct device *dev, unsigned long vaddr,
 				    unsigned long size,
 				    enum dma_data_direction dma_dir)
 {
-	struct vb2_dma_sg_conf *conf = alloc_ctx;
 	struct vb2_dma_sg_buf *buf;
 	struct sg_table *sgt;
 	DEFINE_DMA_ATTRS(attrs);
@@ -242,7 +237,7 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr,
 		return NULL;
 
 	buf->vaddr = NULL;
-	buf->dev = conf->dev;
+	buf->dev = dev;
 	buf->dma_dir = dma_dir;
 	buf->offset = vaddr & ~PAGE_MASK;
 	buf->size = size;
@@ -616,10 +611,9 @@ static void vb2_dma_sg_detach_dmabuf(void *mem_priv)
 	kfree(buf);
 }
 
-static void *vb2_dma_sg_attach_dmabuf(void *alloc_ctx, struct dma_buf *dbuf,
+static void *vb2_dma_sg_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
 	unsigned long size, enum dma_data_direction dma_dir)
 {
-	struct vb2_dma_sg_conf *conf = alloc_ctx;
 	struct vb2_dma_sg_buf *buf;
 	struct dma_buf_attachment *dba;
 
@@ -630,7 +624,7 @@ static void *vb2_dma_sg_attach_dmabuf(void *alloc_ctx, struct dma_buf *dbuf,
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
-	buf->dev = conf->dev;
+	buf->dev = dev;
 	/* create attachment for the dmabuf with the user device */
 	dba = dma_buf_attach(dbuf, buf->dev);
 	if (IS_ERR(dba)) {
@@ -672,27 +666,6 @@ const struct vb2_mem_ops vb2_dma_sg_memops = {
 };
 EXPORT_SYMBOL_GPL(vb2_dma_sg_memops);
 
-void *vb2_dma_sg_init_ctx(struct device *dev)
-{
-	struct vb2_dma_sg_conf *conf;
-
-	conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-	if (!conf)
-		return ERR_PTR(-ENOMEM);
-
-	conf->dev = dev;
-
-	return conf;
-}
-EXPORT_SYMBOL_GPL(vb2_dma_sg_init_ctx);
-
-void vb2_dma_sg_cleanup_ctx(void *alloc_ctx)
-{
-	if (!IS_ERR_OR_NULL(alloc_ctx))
-		kfree(alloc_ctx);
-}
-EXPORT_SYMBOL_GPL(vb2_dma_sg_cleanup_ctx);
-
 MODULE_DESCRIPTION("dma scatter/gather memory handling routines for videobuf2");
 MODULE_AUTHOR("Andrzej Pietrasiewicz");
 MODULE_LICENSE("GPL");
diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c
index 0b1b8c7b6ce5..9cfbb6e4bc28 100644
--- a/drivers/media/v4l2-core/videobuf2-v4l2.c
+++ b/drivers/media/v4l2-core/videobuf2-v4l2.c
@@ -74,6 +74,11 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer
 	return 0;
 }
 
+static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb)
+{
+	return __verify_planes_array(vb, pb);
+}
+
 /**
  * __verify_length() - Verify that the bytesused value for each plane fits in
  * the plane length and that the data offset doesn't exceed the bytesused value.
@@ -422,7 +427,7 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb,
 	if (V4L2_TYPE_IS_OUTPUT(b->type)) {
 		/*
 		 * For output buffers mask out the timecode flag:
-		 * this will be handled later in vb2_internal_qbuf().
+		 * this will be handled later in vb2_qbuf().
 		 * The 'field' is valid metadata for this output buffer
 		 * and so that needs to be copied here.
 		 */
@@ -437,6 +442,7 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb,
 }
 
 static const struct vb2_buf_ops v4l2_buf_ops = {
+	.verify_planes_array	= __verify_planes_array_core,
 	.fill_user_buffer	= __fill_v4l2_buffer,
 	.fill_vb2_buffer	= __fill_vb2_buffer,
 	.copy_timestamp		= __copy_timestamp,
@@ -580,13 +586,6 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create)
 }
 EXPORT_SYMBOL_GPL(vb2_create_bufs);
 
-static int vb2_internal_qbuf(struct vb2_queue *q, struct v4l2_buffer *b)
-{
-	int ret = vb2_queue_or_prepare_buf(q, b, "qbuf");
-
-	return ret ? ret : vb2_core_qbuf(q, b->index, b);
-}
-
 /**
  * vb2_qbuf() - Queue a buffer from userspace
  * @q:		videobuf2 queue
@@ -606,30 +605,18 @@ static int vb2_internal_qbuf(struct vb2_queue *q, struct v4l2_buffer *b)
  */
 int vb2_qbuf(struct vb2_queue *q, struct v4l2_buffer *b)
 {
+	int ret;
+
 	if (vb2_fileio_is_active(q)) {
 		dprintk(1, "file io in progress\n");
 		return -EBUSY;
 	}
 
-	return vb2_internal_qbuf(q, b);
+	ret = vb2_queue_or_prepare_buf(q, b, "qbuf");
+	return ret ? ret : vb2_core_qbuf(q, b->index, b);
 }
 EXPORT_SYMBOL_GPL(vb2_qbuf);
 
-static int vb2_internal_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b,
-		bool nonblocking)
-{
-	int ret;
-
-	if (b->type != q->type) {
-		dprintk(1, "invalid buffer type\n");
-		return -EINVAL;
-	}
-
-	ret = vb2_core_dqbuf(q, NULL, b, nonblocking);
-
-	return ret;
-}
-
 /**
  * vb2_dqbuf() - Dequeue a buffer to the userspace
  * @q:		videobuf2 queue
@@ -653,11 +640,27 @@ static int vb2_internal_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b,
  */
 int vb2_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b, bool nonblocking)
 {
+	int ret;
+
 	if (vb2_fileio_is_active(q)) {
 		dprintk(1, "file io in progress\n");
 		return -EBUSY;
 	}
-	return vb2_internal_dqbuf(q, b, nonblocking);
+
+	if (b->type != q->type) {
+		dprintk(1, "invalid buffer type\n");
+		return -EINVAL;
+	}
+
+	ret = vb2_core_dqbuf(q, NULL, b, nonblocking);
+
+	/*
+	 *  After calling the VIDIOC_DQBUF V4L2_BUF_FLAG_DONE must be
+	 *  cleared.
+	 */
+	b->flags &= ~V4L2_BUF_FLAG_DONE;
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(vb2_dqbuf);
 
diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c b/drivers/media/v4l2-core/videobuf2-vmalloc.c
index 1c302743a1fd..7e8a07ed8d82 100644
--- a/drivers/media/v4l2-core/videobuf2-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c
@@ -33,8 +33,9 @@ struct vb2_vmalloc_buf {
 
 static void vb2_vmalloc_put(void *buf_priv);
 
-static void *vb2_vmalloc_alloc(void *alloc_ctx, unsigned long size,
-			       enum dma_data_direction dma_dir, gfp_t gfp_flags)
+static void *vb2_vmalloc_alloc(struct device *dev, const struct dma_attrs *attrs,
+			       unsigned long size, enum dma_data_direction dma_dir,
+			       gfp_t gfp_flags)
 {
 	struct vb2_vmalloc_buf *buf;
 
@@ -69,7 +70,7 @@ static void vb2_vmalloc_put(void *buf_priv)
 	}
 }
 
-static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr,
+static void *vb2_vmalloc_get_userptr(struct device *dev, unsigned long vaddr,
 				     unsigned long size,
 				     enum dma_data_direction dma_dir)
 {
@@ -403,7 +404,7 @@ static void vb2_vmalloc_detach_dmabuf(void *mem_priv)
 	kfree(buf);
 }
 
-static void *vb2_vmalloc_attach_dmabuf(void *alloc_ctx, struct dma_buf *dbuf,
+static void *vb2_vmalloc_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
 	unsigned long size, enum dma_data_direction dma_dir)
 {
 	struct vb2_vmalloc_buf *buf;
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 3cd68152ddf8..40bb8ae5853c 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2002,8 +2002,7 @@ static int msb_bd_getgeo(struct block_device *bdev,
 
 static int msb_prepare_req(struct request_queue *q, struct request *req)
 {
-	if (req->cmd_type != REQ_TYPE_FS &&
-				req->cmd_type != REQ_TYPE_BLOCK_PC) {
+	if (req->cmd_type != REQ_TYPE_FS) {
 		blk_dump_rq_flags(req, "MS unsupported request");
 		return BLKPREP_KILL;
 	}
@@ -2146,7 +2145,6 @@ static int msb_init_disk(struct memstick_dev *card)
 	msb->disk->fops = &msb_bdops;
 	msb->disk->private_data = msb;
 	msb->disk->queue = msb->queue;
-	msb->disk->driverfs_dev = &card->dev;
 	msb->disk->flags |= GENHD_FL_EXT_DEVT;
 
 	capacity = msb->pages_in_block * msb->logical_block_count;
@@ -2163,7 +2161,7 @@ static int msb_init_disk(struct memstick_dev *card)
 		set_disk_ro(msb->disk, 1);
 
 	msb_start(card);
-	add_disk(msb->disk);
+	device_add_disk(&card->dev, msb->disk);
 	dbg("Disk added");
 	return 0;
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 0fb27d338811..c1472275fe57 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -829,8 +829,7 @@ static void mspro_block_start(struct memstick_dev *card)
 
 static int mspro_block_prepare_req(struct request_queue *q, struct request *req)
 {
-	if (req->cmd_type != REQ_TYPE_FS &&
-	    req->cmd_type != REQ_TYPE_BLOCK_PC) {
+	if (req->cmd_type != REQ_TYPE_FS) {
 		blk_dump_rq_flags(req, "MSPro unsupported request");
 		return BLKPREP_KILL;
 	}
@@ -1243,7 +1242,6 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	msb->usage_count = 1;
 	msb->disk->private_data = msb;
 	msb->disk->queue = msb->queue;
-	msb->disk->driverfs_dev = &card->dev;
 
 	sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
 
@@ -1255,7 +1253,7 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	set_capacity(msb->disk, capacity);
 	dev_dbg(&card->dev, "capacity set %ld\n", capacity);
 
-	add_disk(msb->disk);
+	device_add_disk(&card->dev, msb->disk);
 	msb->active = 1;
 	return 0;
 
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 1bcf601de5bc..ff031a7735a5 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -852,13 +852,14 @@ config MFD_RK808
 	  including interrupts, RTC, LDO & DCDC regulators, and onkey.
 
 config MFD_RN5T618
-	tristate "Ricoh RN5T5618 PMIC"
+	tristate "Ricoh RN5T567/618 PMIC"
 	depends on I2C
+	depends on OF
 	select MFD_CORE
 	select REGMAP_I2C
 	help
-	  Say yes here to add support for the Ricoh RN5T618 PMIC. This
-	  driver provides common support for accessing the device,
+	  Say yes here to add support for the Ricoh RN5T567 or R5T618 PMIC.
+	  This driver provides common support for accessing the device,
 	  additional drivers must be enabled in order to use the
 	  functionality of the device.
 
diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c
index 199d261990be..f32fbb8e8129 100644
--- a/drivers/mfd/max77620.c
+++ b/drivers/mfd/max77620.c
@@ -203,6 +203,7 @@ static int max77620_get_fps_period_reg_value(struct max77620_chip *chip,
 		break;
 	case MAX77620:
 		fps_min_period = MAX77620_FPS_PERIOD_MIN_US;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -236,6 +237,7 @@ static int max77620_config_fps(struct max77620_chip *chip,
 		break;
 	case MAX77620:
 		fps_max_period = MAX77620_FPS_PERIOD_MAX_US;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c
index 0ad51d792feb..ee94080e1cbb 100644
--- a/drivers/mfd/rn5t618.c
+++ b/drivers/mfd/rn5t618.c
@@ -2,6 +2,7 @@
  * MFD core driver for Ricoh RN5T618 PMIC
  *
  * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+ * Copyright (C) 2016 Toradex AG
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -11,10 +12,13 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/rn5t618.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/reboot.h>
 #include <linux/regmap.h>
 
 static const struct mfd_cell rn5t618_cells[] = {
@@ -48,28 +52,64 @@ static const struct regmap_config rn5t618_regmap_config = {
 };
 
 static struct rn5t618 *rn5t618_pm_power_off;
+static struct notifier_block rn5t618_restart_handler;
 
-static void rn5t618_power_off(void)
+static void rn5t618_trigger_poweroff_sequence(bool repower)
 {
 	/* disable automatic repower-on */
 	regmap_update_bits(rn5t618_pm_power_off->regmap, RN5T618_REPCNT,
-			   RN5T618_REPCNT_REPWRON, 0);
+			   RN5T618_REPCNT_REPWRON,
+			   repower ? RN5T618_REPCNT_REPWRON : 0);
 	/* start power-off sequence */
 	regmap_update_bits(rn5t618_pm_power_off->regmap, RN5T618_SLPCNT,
 			   RN5T618_SLPCNT_SWPWROFF, RN5T618_SLPCNT_SWPWROFF);
 }
 
+static void rn5t618_power_off(void)
+{
+	rn5t618_trigger_poweroff_sequence(false);
+}
+
+static int rn5t618_restart(struct notifier_block *this,
+			    unsigned long mode, void *cmd)
+{
+	rn5t618_trigger_poweroff_sequence(true);
+
+	/*
+	 * Re-power factor detection on PMIC side is not instant. 1ms
+	 * proved to be enough time until reset takes effect.
+	 */
+	mdelay(1);
+
+	return NOTIFY_DONE;
+}
+
+static const struct of_device_id rn5t618_of_match[] = {
+	{ .compatible = "ricoh,rn5t567", .data = (void *)RN5T567 },
+	{ .compatible = "ricoh,rn5t618", .data = (void *)RN5T618 },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rn5t618_of_match);
+
 static int rn5t618_i2c_probe(struct i2c_client *i2c,
 			     const struct i2c_device_id *id)
 {
+	const struct of_device_id *of_id;
 	struct rn5t618 *priv;
 	int ret;
 
+	of_id = of_match_device(rn5t618_of_match, &i2c->dev);
+	if (!of_id) {
+		dev_err(&i2c->dev, "Failed to find matching DT ID\n");
+		return -EINVAL;
+	}
+
 	priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
 	i2c_set_clientdata(i2c, priv);
+	priv->variant = (long)of_id->data;
 
 	priv->regmap = devm_regmap_init_i2c(i2c, &rn5t618_regmap_config);
 	if (IS_ERR(priv->regmap)) {
@@ -85,9 +125,21 @@ static int rn5t618_i2c_probe(struct i2c_client *i2c,
 		return ret;
 	}
 
-	if (!pm_power_off) {
-		rn5t618_pm_power_off = priv;
-		pm_power_off = rn5t618_power_off;
+	rn5t618_pm_power_off = priv;
+	if (of_device_is_system_power_controller(i2c->dev.of_node)) {
+		if (!pm_power_off)
+			pm_power_off = rn5t618_power_off;
+		else
+			dev_warn(&i2c->dev, "Poweroff callback already assigned\n");
+	}
+
+	rn5t618_restart_handler.notifier_call = rn5t618_restart;
+	rn5t618_restart_handler.priority = 192;
+
+	ret = register_restart_handler(&rn5t618_restart_handler);
+	if (ret) {
+		dev_err(&i2c->dev, "cannot register restart handler, %d\n", ret);
+		return ret;
 	}
 
 	return 0;
@@ -105,12 +157,6 @@ static int rn5t618_i2c_remove(struct i2c_client *i2c)
 	return 0;
 }
 
-static const struct of_device_id rn5t618_of_match[] = {
-	{ .compatible = "ricoh,rn5t618" },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, rn5t618_of_match);
-
 static const struct i2c_device_id rn5t618_i2c_id[] = {
 	{ }
 };
@@ -129,5 +175,5 @@ static struct i2c_driver rn5t618_i2c_driver = {
 module_i2c_driver(rn5t618_i2c_driver);
 
 MODULE_AUTHOR("Beniamino Galvani <b.galvani@gmail.com>");
-MODULE_DESCRIPTION("Ricoh RN5T618 MFD driver");
+MODULE_DESCRIPTION("Ricoh RN5T567/618 MFD driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index b2fb6dbffcef..4387ccb79e64 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -57,3 +57,17 @@ obj-$(CONFIG_ECHO)		+= echo/
 obj-$(CONFIG_VEXPRESS_SYSCFG)	+= vexpress-syscfg.o
 obj-$(CONFIG_CXL_BASE)		+= cxl/
 obj-$(CONFIG_PANEL)             += panel.o
+
+lkdtm-$(CONFIG_LKDTM)		+= lkdtm_core.o
+lkdtm-$(CONFIG_LKDTM)		+= lkdtm_bugs.o
+lkdtm-$(CONFIG_LKDTM)		+= lkdtm_heap.o
+lkdtm-$(CONFIG_LKDTM)		+= lkdtm_perms.o
+lkdtm-$(CONFIG_LKDTM)		+= lkdtm_rodata_objcopy.o
+lkdtm-$(CONFIG_LKDTM)		+= lkdtm_usercopy.o
+
+OBJCOPYFLAGS :=
+OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \
+			--set-section-flags .text=alloc,readonly \
+			--rename-section .text=.rodata
+$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o
+	$(call if_changed,objcopy)
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
deleted file mode 100644
index 0a5cbbe12452..000000000000
--- a/drivers/misc/lkdtm.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-/*
- * Kprobe module for testing crash dumps
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Ankita Garg <ankita@in.ibm.com>
- *
- * This module induces system failures at predefined crashpoints to
- * evaluate the reliability of crash dumps obtained using different dumping
- * solutions.
- *
- * It is adapted from the Linux Kernel Dump Test Tool by
- * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net>
- *
- * Debugfs support added by Simon Kagstrom <simon.kagstrom@netinsight.net>
- *
- * See Documentation/fault-injection/provoke-crashes.txt for instructions
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/buffer_head.h>
-#include <linux/kprobes.h>
-#include <linux/list.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/hrtimer.h>
-#include <linux/slab.h>
-#include <scsi/scsi_cmnd.h>
-#include <linux/debugfs.h>
-#include <linux/vmalloc.h>
-#include <linux/mman.h>
-#include <asm/cacheflush.h>
-
-#ifdef CONFIG_IDE
-#include <linux/ide.h>
-#endif
-
-/*
- * Make sure our attempts to over run the kernel stack doesn't trigger
- * a compiler warning when CONFIG_FRAME_WARN is set. Then make sure we
- * recurse past the end of THREAD_SIZE by default.
- */
-#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0)
-#define REC_STACK_SIZE (CONFIG_FRAME_WARN / 2)
-#else
-#define REC_STACK_SIZE (THREAD_SIZE / 8)
-#endif
-#define REC_NUM_DEFAULT ((THREAD_SIZE / REC_STACK_SIZE) * 2)
-
-#define DEFAULT_COUNT 10
-#define EXEC_SIZE 64
-
-enum cname {
-	CN_INVALID,
-	CN_INT_HARDWARE_ENTRY,
-	CN_INT_HW_IRQ_EN,
-	CN_INT_TASKLET_ENTRY,
-	CN_FS_DEVRW,
-	CN_MEM_SWAPOUT,
-	CN_TIMERADD,
-	CN_SCSI_DISPATCH_CMD,
-	CN_IDE_CORE_CP,
-	CN_DIRECT,
-};
-
-enum ctype {
-	CT_NONE,
-	CT_PANIC,
-	CT_BUG,
-	CT_WARNING,
-	CT_EXCEPTION,
-	CT_LOOP,
-	CT_OVERFLOW,
-	CT_CORRUPT_STACK,
-	CT_UNALIGNED_LOAD_STORE_WRITE,
-	CT_OVERWRITE_ALLOCATION,
-	CT_WRITE_AFTER_FREE,
-	CT_READ_AFTER_FREE,
-	CT_WRITE_BUDDY_AFTER_FREE,
-	CT_READ_BUDDY_AFTER_FREE,
-	CT_SOFTLOCKUP,
-	CT_HARDLOCKUP,
-	CT_SPINLOCKUP,
-	CT_HUNG_TASK,
-	CT_EXEC_DATA,
-	CT_EXEC_STACK,
-	CT_EXEC_KMALLOC,
-	CT_EXEC_VMALLOC,
-	CT_EXEC_USERSPACE,
-	CT_ACCESS_USERSPACE,
-	CT_WRITE_RO,
-	CT_WRITE_RO_AFTER_INIT,
-	CT_WRITE_KERN,
-	CT_WRAP_ATOMIC
-};
-
-static char* cp_name[] = {
-	"INT_HARDWARE_ENTRY",
-	"INT_HW_IRQ_EN",
-	"INT_TASKLET_ENTRY",
-	"FS_DEVRW",
-	"MEM_SWAPOUT",
-	"TIMERADD",
-	"SCSI_DISPATCH_CMD",
-	"IDE_CORE_CP",
-	"DIRECT",
-};
-
-static char* cp_type[] = {
-	"PANIC",
-	"BUG",
-	"WARNING",
-	"EXCEPTION",
-	"LOOP",
-	"OVERFLOW",
-	"CORRUPT_STACK",
-	"UNALIGNED_LOAD_STORE_WRITE",
-	"OVERWRITE_ALLOCATION",
-	"WRITE_AFTER_FREE",
-	"READ_AFTER_FREE",
-	"WRITE_BUDDY_AFTER_FREE",
-	"READ_BUDDY_AFTER_FREE",
-	"SOFTLOCKUP",
-	"HARDLOCKUP",
-	"SPINLOCKUP",
-	"HUNG_TASK",
-	"EXEC_DATA",
-	"EXEC_STACK",
-	"EXEC_KMALLOC",
-	"EXEC_VMALLOC",
-	"EXEC_USERSPACE",
-	"ACCESS_USERSPACE",
-	"WRITE_RO",
-	"WRITE_RO_AFTER_INIT",
-	"WRITE_KERN",
-	"WRAP_ATOMIC"
-};
-
-static struct jprobe lkdtm;
-
-static int lkdtm_parse_commandline(void);
-static void lkdtm_handler(void);
-
-static char* cpoint_name;
-static char* cpoint_type;
-static int cpoint_count = DEFAULT_COUNT;
-static int recur_count = REC_NUM_DEFAULT;
-
-static enum cname cpoint = CN_INVALID;
-static enum ctype cptype = CT_NONE;
-static int count = DEFAULT_COUNT;
-static DEFINE_SPINLOCK(count_lock);
-static DEFINE_SPINLOCK(lock_me_up);
-
-static u8 data_area[EXEC_SIZE];
-
-static const unsigned long rodata = 0xAA55AA55;
-static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
-
-module_param(recur_count, int, 0644);
-MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
-module_param(cpoint_name, charp, 0444);
-MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
-module_param(cpoint_type, charp, 0444);
-MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
-				"hitting the crash point");
-module_param(cpoint_count, int, 0644);
-MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
-				"crash point is to be hit to trigger action");
-
-static unsigned int jp_do_irq(unsigned int irq)
-{
-	lkdtm_handler();
-	jprobe_return();
-	return 0;
-}
-
-static irqreturn_t jp_handle_irq_event(unsigned int irq,
-				       struct irqaction *action)
-{
-	lkdtm_handler();
-	jprobe_return();
-	return 0;
-}
-
-static void jp_tasklet_action(struct softirq_action *a)
-{
-	lkdtm_handler();
-	jprobe_return();
-}
-
-static void jp_ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
-{
-	lkdtm_handler();
-	jprobe_return();
-}
-
-struct scan_control;
-
-static unsigned long jp_shrink_inactive_list(unsigned long max_scan,
-					     struct zone *zone,
-					     struct scan_control *sc)
-{
-	lkdtm_handler();
-	jprobe_return();
-	return 0;
-}
-
-static int jp_hrtimer_start(struct hrtimer *timer, ktime_t tim,
-			    const enum hrtimer_mode mode)
-{
-	lkdtm_handler();
-	jprobe_return();
-	return 0;
-}
-
-static int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd)
-{
-	lkdtm_handler();
-	jprobe_return();
-	return 0;
-}
-
-#ifdef CONFIG_IDE
-static int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file,
-			struct block_device *bdev, unsigned int cmd,
-			unsigned long arg)
-{
-	lkdtm_handler();
-	jprobe_return();
-	return 0;
-}
-#endif
-
-/* Return the crashpoint number or NONE if the name is invalid */
-static enum ctype parse_cp_type(const char *what, size_t count)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(cp_type); i++) {
-		if (!strcmp(what, cp_type[i]))
-			return i + 1;
-	}
-
-	return CT_NONE;
-}
-
-static const char *cp_type_to_str(enum ctype type)
-{
-	if (type == CT_NONE || type < 0 || type > ARRAY_SIZE(cp_type))
-		return "None";
-
-	return cp_type[type - 1];
-}
-
-static const char *cp_name_to_str(enum cname name)
-{
-	if (name == CN_INVALID || name < 0 || name > ARRAY_SIZE(cp_name))
-		return "INVALID";
-
-	return cp_name[name - 1];
-}
-
-
-static int lkdtm_parse_commandline(void)
-{
-	int i;
-	unsigned long flags;
-
-	if (cpoint_count < 1 || recur_count < 1)
-		return -EINVAL;
-
-	spin_lock_irqsave(&count_lock, flags);
-	count = cpoint_count;
-	spin_unlock_irqrestore(&count_lock, flags);
-
-	/* No special parameters */
-	if (!cpoint_type && !cpoint_name)
-		return 0;
-
-	/* Neither or both of these need to be set */
-	if (!cpoint_type || !cpoint_name)
-		return -EINVAL;
-
-	cptype = parse_cp_type(cpoint_type, strlen(cpoint_type));
-	if (cptype == CT_NONE)
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(cp_name); i++) {
-		if (!strcmp(cpoint_name, cp_name[i])) {
-			cpoint = i + 1;
-			return 0;
-		}
-	}
-
-	/* Could not find a valid crash point */
-	return -EINVAL;
-}
-
-static int recursive_loop(int remaining)
-{
-	char buf[REC_STACK_SIZE];
-
-	/* Make sure compiler does not optimize this away. */
-	memset(buf, (remaining & 0xff) | 0x1, REC_STACK_SIZE);
-	if (!remaining)
-		return 0;
-	else
-		return recursive_loop(remaining - 1);
-}
-
-static void do_nothing(void)
-{
-	return;
-}
-
-/* Must immediately follow do_nothing for size calculuations to work out. */
-static void do_overwritten(void)
-{
-	pr_info("do_overwritten wasn't overwritten!\n");
-	return;
-}
-
-static noinline void corrupt_stack(void)
-{
-	/* Use default char array length that triggers stack protection. */
-	char data[8];
-
-	memset((void *)data, 0, 64);
-}
-
-static void noinline execute_location(void *dst)
-{
-	void (*func)(void) = dst;
-
-	pr_info("attempting ok execution at %p\n", do_nothing);
-	do_nothing();
-
-	memcpy(dst, do_nothing, EXEC_SIZE);
-	flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE);
-	pr_info("attempting bad execution at %p\n", func);
-	func();
-}
-
-static void execute_user_location(void *dst)
-{
-	/* Intentionally crossing kernel/user memory boundary. */
-	void (*func)(void) = dst;
-
-	pr_info("attempting ok execution at %p\n", do_nothing);
-	do_nothing();
-
-	if (copy_to_user((void __user *)dst, do_nothing, EXEC_SIZE))
-		return;
-	flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE);
-	pr_info("attempting bad execution at %p\n", func);
-	func();
-}
-
-static void lkdtm_do_action(enum ctype which)
-{
-	switch (which) {
-	case CT_PANIC:
-		panic("dumptest");
-		break;
-	case CT_BUG:
-		BUG();
-		break;
-	case CT_WARNING:
-		WARN_ON(1);
-		break;
-	case CT_EXCEPTION:
-		*((int *) 0) = 0;
-		break;
-	case CT_LOOP:
-		for (;;)
-			;
-		break;
-	case CT_OVERFLOW:
-		(void) recursive_loop(recur_count);
-		break;
-	case CT_CORRUPT_STACK:
-		corrupt_stack();
-		break;
-	case CT_UNALIGNED_LOAD_STORE_WRITE: {
-		static u8 data[5] __attribute__((aligned(4))) = {1, 2,
-				3, 4, 5};
-		u32 *p;
-		u32 val = 0x12345678;
-
-		p = (u32 *)(data + 1);
-		if (*p == 0)
-			val = 0x87654321;
-		*p = val;
-		 break;
-	}
-	case CT_OVERWRITE_ALLOCATION: {
-		size_t len = 1020;
-		u32 *data = kmalloc(len, GFP_KERNEL);
-
-		data[1024 / sizeof(u32)] = 0x12345678;
-		kfree(data);
-		break;
-	}
-	case CT_WRITE_AFTER_FREE: {
-		int *base, *again;
-		size_t len = 1024;
-		/*
-		 * The slub allocator uses the first word to store the free
-		 * pointer in some configurations. Use the middle of the
-		 * allocation to avoid running into the freelist
-		 */
-		size_t offset = (len / sizeof(*base)) / 2;
-
-		base = kmalloc(len, GFP_KERNEL);
-		pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]);
-		pr_info("Attempting bad write to freed memory at %p\n",
-			&base[offset]);
-		kfree(base);
-		base[offset] = 0x0abcdef0;
-		/* Attempt to notice the overwrite. */
-		again = kmalloc(len, GFP_KERNEL);
-		kfree(again);
-		if (again != base)
-			pr_info("Hmm, didn't get the same memory range.\n");
-
-		break;
-	}
-	case CT_READ_AFTER_FREE: {
-		int *base, *val, saw;
-		size_t len = 1024;
-		/*
-		 * The slub allocator uses the first word to store the free
-		 * pointer in some configurations. Use the middle of the
-		 * allocation to avoid running into the freelist
-		 */
-		size_t offset = (len / sizeof(*base)) / 2;
-
-		base = kmalloc(len, GFP_KERNEL);
-		if (!base)
-			break;
-
-		val = kmalloc(len, GFP_KERNEL);
-		if (!val) {
-			kfree(base);
-			break;
-		}
-
-		*val = 0x12345678;
-		base[offset] = *val;
-		pr_info("Value in memory before free: %x\n", base[offset]);
-
-		kfree(base);
-
-		pr_info("Attempting bad read from freed memory\n");
-		saw = base[offset];
-		if (saw != *val) {
-			/* Good! Poisoning happened, so declare a win. */
-			pr_info("Memory correctly poisoned (%x)\n", saw);
-			BUG();
-		}
-		pr_info("Memory was not poisoned\n");
-
-		kfree(val);
-		break;
-	}
-	case CT_WRITE_BUDDY_AFTER_FREE: {
-		unsigned long p = __get_free_page(GFP_KERNEL);
-		if (!p)
-			break;
-		pr_info("Writing to the buddy page before free\n");
-		memset((void *)p, 0x3, PAGE_SIZE);
-		free_page(p);
-		schedule();
-		pr_info("Attempting bad write to the buddy page after free\n");
-		memset((void *)p, 0x78, PAGE_SIZE);
-		/* Attempt to notice the overwrite. */
-		p = __get_free_page(GFP_KERNEL);
-		free_page(p);
-		schedule();
-
-		break;
-	}
-	case CT_READ_BUDDY_AFTER_FREE: {
-		unsigned long p = __get_free_page(GFP_KERNEL);
-		int saw, *val;
-		int *base;
-
-		if (!p)
-			break;
-
-		val = kmalloc(1024, GFP_KERNEL);
-		if (!val) {
-			free_page(p);
-			break;
-		}
-
-		base = (int *)p;
-
-		*val = 0x12345678;
-		base[0] = *val;
-		pr_info("Value in memory before free: %x\n", base[0]);
-		free_page(p);
-		pr_info("Attempting to read from freed memory\n");
-		saw = base[0];
-		if (saw != *val) {
-			/* Good! Poisoning happened, so declare a win. */
-			pr_info("Memory correctly poisoned (%x)\n", saw);
-			BUG();
-		}
-		pr_info("Buddy page was not poisoned\n");
-
-		kfree(val);
-		break;
-	}
-	case CT_SOFTLOCKUP:
-		preempt_disable();
-		for (;;)
-			cpu_relax();
-		break;
-	case CT_HARDLOCKUP:
-		local_irq_disable();
-		for (;;)
-			cpu_relax();
-		break;
-	case CT_SPINLOCKUP:
-		/* Must be called twice to trigger. */
-		spin_lock(&lock_me_up);
-		/* Let sparse know we intended to exit holding the lock. */
-		__release(&lock_me_up);
-		break;
-	case CT_HUNG_TASK:
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule();
-		break;
-	case CT_EXEC_DATA:
-		execute_location(data_area);
-		break;
-	case CT_EXEC_STACK: {
-		u8 stack_area[EXEC_SIZE];
-		execute_location(stack_area);
-		break;
-	}
-	case CT_EXEC_KMALLOC: {
-		u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL);
-		execute_location(kmalloc_area);
-		kfree(kmalloc_area);
-		break;
-	}
-	case CT_EXEC_VMALLOC: {
-		u32 *vmalloc_area = vmalloc(EXEC_SIZE);
-		execute_location(vmalloc_area);
-		vfree(vmalloc_area);
-		break;
-	}
-	case CT_EXEC_USERSPACE: {
-		unsigned long user_addr;
-
-		user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
-				    PROT_READ | PROT_WRITE | PROT_EXEC,
-				    MAP_ANONYMOUS | MAP_PRIVATE, 0);
-		if (user_addr >= TASK_SIZE) {
-			pr_warn("Failed to allocate user memory\n");
-			return;
-		}
-		execute_user_location((void *)user_addr);
-		vm_munmap(user_addr, PAGE_SIZE);
-		break;
-	}
-	case CT_ACCESS_USERSPACE: {
-		unsigned long user_addr, tmp = 0;
-		unsigned long *ptr;
-
-		user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
-				    PROT_READ | PROT_WRITE | PROT_EXEC,
-				    MAP_ANONYMOUS | MAP_PRIVATE, 0);
-		if (user_addr >= TASK_SIZE) {
-			pr_warn("Failed to allocate user memory\n");
-			return;
-		}
-
-		if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) {
-			pr_warn("copy_to_user failed\n");
-			vm_munmap(user_addr, PAGE_SIZE);
-			return;
-		}
-
-		ptr = (unsigned long *)user_addr;
-
-		pr_info("attempting bad read at %p\n", ptr);
-		tmp = *ptr;
-		tmp += 0xc0dec0de;
-
-		pr_info("attempting bad write at %p\n", ptr);
-		*ptr = tmp;
-
-		vm_munmap(user_addr, PAGE_SIZE);
-
-		break;
-	}
-	case CT_WRITE_RO: {
-		/* Explicitly cast away "const" for the test. */
-		unsigned long *ptr = (unsigned long *)&rodata;
-
-		pr_info("attempting bad rodata write at %p\n", ptr);
-		*ptr ^= 0xabcd1234;
-
-		break;
-	}
-	case CT_WRITE_RO_AFTER_INIT: {
-		unsigned long *ptr = &ro_after_init;
-
-		/*
-		 * Verify we were written to during init. Since an Oops
-		 * is considered a "success", a failure is to just skip the
-		 * real test.
-		 */
-		if ((*ptr & 0xAA) != 0xAA) {
-			pr_info("%p was NOT written during init!?\n", ptr);
-			break;
-		}
-
-		pr_info("attempting bad ro_after_init write at %p\n", ptr);
-		*ptr ^= 0xabcd1234;
-
-		break;
-	}
-	case CT_WRITE_KERN: {
-		size_t size;
-		unsigned char *ptr;
-
-		size = (unsigned long)do_overwritten -
-		       (unsigned long)do_nothing;
-		ptr = (unsigned char *)do_overwritten;
-
-		pr_info("attempting bad %zu byte write at %p\n", size, ptr);
-		memcpy(ptr, (unsigned char *)do_nothing, size);
-		flush_icache_range((unsigned long)ptr,
-				   (unsigned long)(ptr + size));
-
-		do_overwritten();
-		break;
-	}
-	case CT_WRAP_ATOMIC: {
-		atomic_t under = ATOMIC_INIT(INT_MIN);
-		atomic_t over = ATOMIC_INIT(INT_MAX);
-
-		pr_info("attempting atomic underflow\n");
-		atomic_dec(&under);
-		pr_info("attempting atomic overflow\n");
-		atomic_inc(&over);
-
-		return;
-	}
-	case CT_NONE:
-	default:
-		break;
-	}
-
-}
-
-static void lkdtm_handler(void)
-{
-	unsigned long flags;
-	bool do_it = false;
-
-	spin_lock_irqsave(&count_lock, flags);
-	count--;
-	pr_info("Crash point %s of type %s hit, trigger in %d rounds\n",
-		cp_name_to_str(cpoint), cp_type_to_str(cptype), count);
-
-	if (count == 0) {
-		do_it = true;
-		count = cpoint_count;
-	}
-	spin_unlock_irqrestore(&count_lock, flags);
-
-	if (do_it)
-		lkdtm_do_action(cptype);
-}
-
-static int lkdtm_register_cpoint(enum cname which)
-{
-	int ret;
-
-	cpoint = CN_INVALID;
-	if (lkdtm.entry != NULL)
-		unregister_jprobe(&lkdtm);
-
-	switch (which) {
-	case CN_DIRECT:
-		lkdtm_do_action(cptype);
-		return 0;
-	case CN_INT_HARDWARE_ENTRY:
-		lkdtm.kp.symbol_name = "do_IRQ";
-		lkdtm.entry = (kprobe_opcode_t*) jp_do_irq;
-		break;
-	case CN_INT_HW_IRQ_EN:
-		lkdtm.kp.symbol_name = "handle_IRQ_event";
-		lkdtm.entry = (kprobe_opcode_t*) jp_handle_irq_event;
-		break;
-	case CN_INT_TASKLET_ENTRY:
-		lkdtm.kp.symbol_name = "tasklet_action";
-		lkdtm.entry = (kprobe_opcode_t*) jp_tasklet_action;
-		break;
-	case CN_FS_DEVRW:
-		lkdtm.kp.symbol_name = "ll_rw_block";
-		lkdtm.entry = (kprobe_opcode_t*) jp_ll_rw_block;
-		break;
-	case CN_MEM_SWAPOUT:
-		lkdtm.kp.symbol_name = "shrink_inactive_list";
-		lkdtm.entry = (kprobe_opcode_t*) jp_shrink_inactive_list;
-		break;
-	case CN_TIMERADD:
-		lkdtm.kp.symbol_name = "hrtimer_start";
-		lkdtm.entry = (kprobe_opcode_t*) jp_hrtimer_start;
-		break;
-	case CN_SCSI_DISPATCH_CMD:
-		lkdtm.kp.symbol_name = "scsi_dispatch_cmd";
-		lkdtm.entry = (kprobe_opcode_t*) jp_scsi_dispatch_cmd;
-		break;
-	case CN_IDE_CORE_CP:
-#ifdef CONFIG_IDE
-		lkdtm.kp.symbol_name = "generic_ide_ioctl";
-		lkdtm.entry = (kprobe_opcode_t*) jp_generic_ide_ioctl;
-#else
-		pr_info("Crash point not available\n");
-		return -EINVAL;
-#endif
-		break;
-	default:
-		pr_info("Invalid Crash Point\n");
-		return -EINVAL;
-	}
-
-	cpoint = which;
-	if ((ret = register_jprobe(&lkdtm)) < 0) {
-		pr_info("Couldn't register jprobe\n");
-		cpoint = CN_INVALID;
-	}
-
-	return ret;
-}
-
-static ssize_t do_register_entry(enum cname which, struct file *f,
-		const char __user *user_buf, size_t count, loff_t *off)
-{
-	char *buf;
-	int err;
-
-	if (count >= PAGE_SIZE)
-		return -EINVAL;
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	if (copy_from_user(buf, user_buf, count)) {
-		free_page((unsigned long) buf);
-		return -EFAULT;
-	}
-	/* NULL-terminate and remove enter */
-	buf[count] = '\0';
-	strim(buf);
-
-	cptype = parse_cp_type(buf, count);
-	free_page((unsigned long) buf);
-
-	if (cptype == CT_NONE)
-		return -EINVAL;
-
-	err = lkdtm_register_cpoint(which);
-	if (err < 0)
-		return err;
-
-	*off += count;
-
-	return count;
-}
-
-/* Generic read callback that just prints out the available crash types */
-static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
-		size_t count, loff_t *off)
-{
-	char *buf;
-	int i, n, out;
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	n = snprintf(buf, PAGE_SIZE, "Available crash types:\n");
-	for (i = 0; i < ARRAY_SIZE(cp_type); i++)
-		n += snprintf(buf + n, PAGE_SIZE - n, "%s\n", cp_type[i]);
-	buf[n] = '\0';
-
-	out = simple_read_from_buffer(user_buf, count, off,
-				      buf, n);
-	free_page((unsigned long) buf);
-
-	return out;
-}
-
-static int lkdtm_debugfs_open(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-
-static ssize_t int_hardware_entry(struct file *f, const char __user *buf,
-		size_t count, loff_t *off)
-{
-	return do_register_entry(CN_INT_HARDWARE_ENTRY, f, buf, count, off);
-}
-
-static ssize_t int_hw_irq_en(struct file *f, const char __user *buf,
-		size_t count, loff_t *off)
-{
-	return do_register_entry(CN_INT_HW_IRQ_EN, f, buf, count, off);
-}
-
-static ssize_t int_tasklet_entry(struct file *f, const char __user *buf,
-		size_t count, loff_t *off)
-{
-	return do_register_entry(CN_INT_TASKLET_ENTRY, f, buf, count, off);
-}
-
-static ssize_t fs_devrw_entry(struct file *f, const char __user *buf,
-		size_t count, loff_t *off)
-{
-	return do_register_entry(CN_FS_DEVRW, f, buf, count, off);
-}
-
-static ssize_t mem_swapout_entry(struct file *f, const char __user *buf,
-		size_t count, loff_t *off)
-{
-	return do_register_entry(CN_MEM_SWAPOUT, f, buf, count, off);
-}
-
-static ssize_t timeradd_entry(struct file *f, const char __user *buf,
-		size_t count, loff_t *off)
-{
-	return do_register_entry(CN_TIMERADD, f, buf, count, off);
-}
-
-static ssize_t scsi_dispatch_cmd_entry(struct file *f,
-		const char __user *buf, size_t count, loff_t *off)
-{
-	return do_register_entry(CN_SCSI_DISPATCH_CMD, f, buf, count, off);
-}
-
-static ssize_t ide_core_cp_entry(struct file *f, const char __user *buf,
-		size_t count, loff_t *off)
-{
-	return do_register_entry(CN_IDE_CORE_CP, f, buf, count, off);
-}
-
-/* Special entry to just crash directly. Available without KPROBEs */
-static ssize_t direct_entry(struct file *f, const char __user *user_buf,
-		size_t count, loff_t *off)
-{
-	enum ctype type;
-	char *buf;
-
-	if (count >= PAGE_SIZE)
-		return -EINVAL;
-	if (count < 1)
-		return -EINVAL;
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	if (copy_from_user(buf, user_buf, count)) {
-		free_page((unsigned long) buf);
-		return -EFAULT;
-	}
-	/* NULL-terminate and remove enter */
-	buf[count] = '\0';
-	strim(buf);
-
-	type = parse_cp_type(buf, count);
-	free_page((unsigned long) buf);
-	if (type == CT_NONE)
-		return -EINVAL;
-
-	pr_info("Performing direct entry %s\n", cp_type_to_str(type));
-	lkdtm_do_action(type);
-	*off += count;
-
-	return count;
-}
-
-struct crash_entry {
-	const char *name;
-	const struct file_operations fops;
-};
-
-static const struct crash_entry crash_entries[] = {
-	{"DIRECT", {.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = direct_entry} },
-	{"INT_HARDWARE_ENTRY", {.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = int_hardware_entry} },
-	{"INT_HW_IRQ_EN", {.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = int_hw_irq_en} },
-	{"INT_TASKLET_ENTRY", {.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = int_tasklet_entry} },
-	{"FS_DEVRW", {.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = fs_devrw_entry} },
-	{"MEM_SWAPOUT", {.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = mem_swapout_entry} },
-	{"TIMERADD", {.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = timeradd_entry} },
-	{"SCSI_DISPATCH_CMD", {.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = scsi_dispatch_cmd_entry} },
-	{"IDE_CORE_CP",	{.read = lkdtm_debugfs_read,
-			.llseek = generic_file_llseek,
-			.open = lkdtm_debugfs_open,
-			.write = ide_core_cp_entry} },
-};
-
-static struct dentry *lkdtm_debugfs_root;
-
-static int __init lkdtm_module_init(void)
-{
-	int ret = -EINVAL;
-	int n_debugfs_entries = 1; /* Assume only the direct entry */
-	int i;
-
-	/* Make sure we can write to __ro_after_init values during __init */
-	ro_after_init |= 0xAA;
-
-	/* Register debugfs interface */
-	lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
-	if (!lkdtm_debugfs_root) {
-		pr_err("creating root dir failed\n");
-		return -ENODEV;
-	}
-
-#ifdef CONFIG_KPROBES
-	n_debugfs_entries = ARRAY_SIZE(crash_entries);
-#endif
-
-	for (i = 0; i < n_debugfs_entries; i++) {
-		const struct crash_entry *cur = &crash_entries[i];
-		struct dentry *de;
-
-		de = debugfs_create_file(cur->name, 0644, lkdtm_debugfs_root,
-				NULL, &cur->fops);
-		if (de == NULL) {
-			pr_err("could not create %s\n", cur->name);
-			goto out_err;
-		}
-	}
-
-	if (lkdtm_parse_commandline() == -EINVAL) {
-		pr_info("Invalid command\n");
-		goto out_err;
-	}
-
-	if (cpoint != CN_INVALID && cptype != CT_NONE) {
-		ret = lkdtm_register_cpoint(cpoint);
-		if (ret < 0) {
-			pr_info("Invalid crash point %d\n", cpoint);
-			goto out_err;
-		}
-		pr_info("Crash point %s of type %s registered\n",
-			cpoint_name, cpoint_type);
-	} else {
-		pr_info("No crash points registered, enable through debugfs\n");
-	}
-
-	return 0;
-
-out_err:
-	debugfs_remove_recursive(lkdtm_debugfs_root);
-	return ret;
-}
-
-static void __exit lkdtm_module_exit(void)
-{
-	debugfs_remove_recursive(lkdtm_debugfs_root);
-
-	unregister_jprobe(&lkdtm);
-	pr_info("Crash point unregistered\n");
-}
-
-module_init(lkdtm_module_init);
-module_exit(lkdtm_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Kprobe module for testing crash dumps");
diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h
new file mode 100644
index 000000000000..fdf954c2107f
--- /dev/null
+++ b/drivers/misc/lkdtm.h
@@ -0,0 +1,60 @@
+#ifndef __LKDTM_H
+#define __LKDTM_H
+
+#define pr_fmt(fmt) "lkdtm: " fmt
+
+#include <linux/kernel.h>
+
+/* lkdtm_bugs.c */
+void __init lkdtm_bugs_init(int *recur_param);
+void lkdtm_PANIC(void);
+void lkdtm_BUG(void);
+void lkdtm_WARNING(void);
+void lkdtm_EXCEPTION(void);
+void lkdtm_LOOP(void);
+void lkdtm_OVERFLOW(void);
+void lkdtm_CORRUPT_STACK(void);
+void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void);
+void lkdtm_SOFTLOCKUP(void);
+void lkdtm_HARDLOCKUP(void);
+void lkdtm_SPINLOCKUP(void);
+void lkdtm_HUNG_TASK(void);
+void lkdtm_ATOMIC_UNDERFLOW(void);
+void lkdtm_ATOMIC_OVERFLOW(void);
+
+/* lkdtm_heap.c */
+void lkdtm_OVERWRITE_ALLOCATION(void);
+void lkdtm_WRITE_AFTER_FREE(void);
+void lkdtm_READ_AFTER_FREE(void);
+void lkdtm_WRITE_BUDDY_AFTER_FREE(void);
+void lkdtm_READ_BUDDY_AFTER_FREE(void);
+
+/* lkdtm_perms.c */
+void __init lkdtm_perms_init(void);
+void lkdtm_WRITE_RO(void);
+void lkdtm_WRITE_RO_AFTER_INIT(void);
+void lkdtm_WRITE_KERN(void);
+void lkdtm_EXEC_DATA(void);
+void lkdtm_EXEC_STACK(void);
+void lkdtm_EXEC_KMALLOC(void);
+void lkdtm_EXEC_VMALLOC(void);
+void lkdtm_EXEC_RODATA(void);
+void lkdtm_EXEC_USERSPACE(void);
+void lkdtm_ACCESS_USERSPACE(void);
+
+/* lkdtm_rodata.c */
+void lkdtm_rodata_do_nothing(void);
+
+/* lkdtm_usercopy.c */
+void __init lkdtm_usercopy_init(void);
+void __exit lkdtm_usercopy_exit(void);
+void lkdtm_USERCOPY_HEAP_SIZE_TO(void);
+void lkdtm_USERCOPY_HEAP_SIZE_FROM(void);
+void lkdtm_USERCOPY_HEAP_FLAG_TO(void);
+void lkdtm_USERCOPY_HEAP_FLAG_FROM(void);
+void lkdtm_USERCOPY_STACK_FRAME_TO(void);
+void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
+void lkdtm_USERCOPY_STACK_BEYOND(void);
+void lkdtm_USERCOPY_KERNEL(void);
+
+#endif
diff --git a/drivers/misc/lkdtm_bugs.c b/drivers/misc/lkdtm_bugs.c
new file mode 100644
index 000000000000..182ae1894b32
--- /dev/null
+++ b/drivers/misc/lkdtm_bugs.c
@@ -0,0 +1,148 @@
+/*
+ * This is for all the tests related to logic bugs (e.g. bad dereferences,
+ * bad alignment, bad loops, bad locking, bad scheduling, deep stacks, and
+ * lockups) along with other things that don't fit well into existing LKDTM
+ * test source files.
+ */
+#include "lkdtm.h"
+#include <linux/sched.h>
+
+/*
+ * Make sure our attempts to over run the kernel stack doesn't trigger
+ * a compiler warning when CONFIG_FRAME_WARN is set. Then make sure we
+ * recurse past the end of THREAD_SIZE by default.
+ */
+#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0)
+#define REC_STACK_SIZE (CONFIG_FRAME_WARN / 2)
+#else
+#define REC_STACK_SIZE (THREAD_SIZE / 8)
+#endif
+#define REC_NUM_DEFAULT ((THREAD_SIZE / REC_STACK_SIZE) * 2)
+
+static int recur_count = REC_NUM_DEFAULT;
+
+static DEFINE_SPINLOCK(lock_me_up);
+
+static int recursive_loop(int remaining)
+{
+	char buf[REC_STACK_SIZE];
+
+	/* Make sure compiler does not optimize this away. */
+	memset(buf, (remaining & 0xff) | 0x1, REC_STACK_SIZE);
+	if (!remaining)
+		return 0;
+	else
+		return recursive_loop(remaining - 1);
+}
+
+/* If the depth is negative, use the default, otherwise keep parameter. */
+void __init lkdtm_bugs_init(int *recur_param)
+{
+	if (*recur_param < 0)
+		*recur_param = recur_count;
+	else
+		recur_count = *recur_param;
+}
+
+void lkdtm_PANIC(void)
+{
+	panic("dumptest");
+}
+
+void lkdtm_BUG(void)
+{
+	BUG();
+}
+
+void lkdtm_WARNING(void)
+{
+	WARN_ON(1);
+}
+
+void lkdtm_EXCEPTION(void)
+{
+	*((int *) 0) = 0;
+}
+
+void lkdtm_LOOP(void)
+{
+	for (;;)
+		;
+}
+
+void lkdtm_OVERFLOW(void)
+{
+	(void) recursive_loop(recur_count);
+}
+
+noinline void lkdtm_CORRUPT_STACK(void)
+{
+	/* Use default char array length that triggers stack protection. */
+	char data[8];
+
+	memset((void *)data, 0, 64);
+}
+
+void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void)
+{
+	static u8 data[5] __attribute__((aligned(4))) = {1, 2, 3, 4, 5};
+	u32 *p;
+	u32 val = 0x12345678;
+
+	p = (u32 *)(data + 1);
+	if (*p == 0)
+		val = 0x87654321;
+	*p = val;
+}
+
+void lkdtm_SOFTLOCKUP(void)
+{
+	preempt_disable();
+	for (;;)
+		cpu_relax();
+}
+
+void lkdtm_HARDLOCKUP(void)
+{
+	local_irq_disable();
+	for (;;)
+		cpu_relax();
+}
+
+void lkdtm_SPINLOCKUP(void)
+{
+	/* Must be called twice to trigger. */
+	spin_lock(&lock_me_up);
+	/* Let sparse know we intended to exit holding the lock. */
+	__release(&lock_me_up);
+}
+
+void lkdtm_HUNG_TASK(void)
+{
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule();
+}
+
+void lkdtm_ATOMIC_UNDERFLOW(void)
+{
+	atomic_t under = ATOMIC_INIT(INT_MIN);
+
+	pr_info("attempting good atomic increment\n");
+	atomic_inc(&under);
+	atomic_dec(&under);
+
+	pr_info("attempting bad atomic underflow\n");
+	atomic_dec(&under);
+}
+
+void lkdtm_ATOMIC_OVERFLOW(void)
+{
+	atomic_t over = ATOMIC_INIT(INT_MAX);
+
+	pr_info("attempting good atomic decrement\n");
+	atomic_dec(&over);
+	atomic_inc(&over);
+
+	pr_info("attempting bad atomic overflow\n");
+	atomic_inc(&over);
+}
diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c
new file mode 100644
index 000000000000..f9154b8d67f6
--- /dev/null
+++ b/drivers/misc/lkdtm_core.c
@@ -0,0 +1,544 @@
+/*
+ * Linux Kernel Dump Test Module for testing kernel crashes conditions:
+ * induces system failures at predefined crashpoints and under predefined
+ * operational conditions in order to evaluate the reliability of kernel
+ * sanity checking and crash dumps obtained using different dumping
+ * solutions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Ankita Garg <ankita@in.ibm.com>
+ *
+ * It is adapted from the Linux Kernel Dump Test Tool by
+ * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net>
+ *
+ * Debugfs support added by Simon Kagstrom <simon.kagstrom@netinsight.net>
+ *
+ * See Documentation/fault-injection/provoke-crashes.txt for instructions
+ */
+#include "lkdtm.h"
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/kprobes.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <scsi/scsi_cmnd.h>
+#include <linux/debugfs.h>
+
+#ifdef CONFIG_IDE
+#include <linux/ide.h>
+#endif
+
+#define DEFAULT_COUNT 10
+
+static int lkdtm_debugfs_open(struct inode *inode, struct file *file);
+static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
+		size_t count, loff_t *off);
+static ssize_t direct_entry(struct file *f, const char __user *user_buf,
+			    size_t count, loff_t *off);
+
+#ifdef CONFIG_KPROBES
+static void lkdtm_handler(void);
+static ssize_t lkdtm_debugfs_entry(struct file *f,
+				   const char __user *user_buf,
+				   size_t count, loff_t *off);
+
+
+/* jprobe entry point handlers. */
+static unsigned int jp_do_irq(unsigned int irq)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+static irqreturn_t jp_handle_irq_event(unsigned int irq,
+				       struct irqaction *action)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+static void jp_tasklet_action(struct softirq_action *a)
+{
+	lkdtm_handler();
+	jprobe_return();
+}
+
+static void jp_ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
+{
+	lkdtm_handler();
+	jprobe_return();
+}
+
+struct scan_control;
+
+static unsigned long jp_shrink_inactive_list(unsigned long max_scan,
+					     struct zone *zone,
+					     struct scan_control *sc)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+static int jp_hrtimer_start(struct hrtimer *timer, ktime_t tim,
+			    const enum hrtimer_mode mode)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+static int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+
+# ifdef CONFIG_IDE
+static int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file,
+			struct block_device *bdev, unsigned int cmd,
+			unsigned long arg)
+{
+	lkdtm_handler();
+	jprobe_return();
+	return 0;
+}
+# endif
+#endif
+
+/* Crash points */
+struct crashpoint {
+	const char *name;
+	const struct file_operations fops;
+	struct jprobe jprobe;
+};
+
+#define CRASHPOINT(_name, _write, _symbol, _entry)		\
+	{							\
+		.name = _name,					\
+		.fops = {					\
+			.read	= lkdtm_debugfs_read,		\
+			.llseek	= generic_file_llseek,		\
+			.open	= lkdtm_debugfs_open,		\
+			.write	= _write,			\
+		},						\
+		.jprobe = {					\
+			.kp.symbol_name = _symbol,		\
+			.entry = (kprobe_opcode_t *)_entry,	\
+		},						\
+	}
+
+/* Define the possible places where we can trigger a crash point. */
+struct crashpoint crashpoints[] = {
+	CRASHPOINT("DIRECT",			direct_entry,
+		   NULL,			NULL),
+#ifdef CONFIG_KPROBES
+	CRASHPOINT("INT_HARDWARE_ENTRY",	lkdtm_debugfs_entry,
+		   "do_IRQ",			jp_do_irq),
+	CRASHPOINT("INT_HW_IRQ_EN",		lkdtm_debugfs_entry,
+		   "handle_IRQ_event",		jp_handle_irq_event),
+	CRASHPOINT("INT_TASKLET_ENTRY",		lkdtm_debugfs_entry,
+		   "tasklet_action",		jp_tasklet_action),
+	CRASHPOINT("FS_DEVRW",			lkdtm_debugfs_entry,
+		   "ll_rw_block",		jp_ll_rw_block),
+	CRASHPOINT("MEM_SWAPOUT",		lkdtm_debugfs_entry,
+		   "shrink_inactive_list",	jp_shrink_inactive_list),
+	CRASHPOINT("TIMERADD",			lkdtm_debugfs_entry,
+		   "hrtimer_start",		jp_hrtimer_start),
+	CRASHPOINT("SCSI_DISPATCH_CMD",		lkdtm_debugfs_entry,
+		   "scsi_dispatch_cmd",		jp_scsi_dispatch_cmd),
+# ifdef CONFIG_IDE
+	CRASHPOINT("IDE_CORE_CP",		lkdtm_debugfs_entry,
+		   "generic_ide_ioctl",		jp_generic_ide_ioctl),
+# endif
+#endif
+};
+
+
+/* Crash types. */
+struct crashtype {
+	const char *name;
+	void (*func)(void);
+};
+
+#define CRASHTYPE(_name)			\
+	{					\
+		.name = __stringify(_name),	\
+		.func = lkdtm_ ## _name,	\
+	}
+
+/* Define the possible types of crashes that can be triggered. */
+struct crashtype crashtypes[] = {
+	CRASHTYPE(PANIC),
+	CRASHTYPE(BUG),
+	CRASHTYPE(WARNING),
+	CRASHTYPE(EXCEPTION),
+	CRASHTYPE(LOOP),
+	CRASHTYPE(OVERFLOW),
+	CRASHTYPE(CORRUPT_STACK),
+	CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
+	CRASHTYPE(OVERWRITE_ALLOCATION),
+	CRASHTYPE(WRITE_AFTER_FREE),
+	CRASHTYPE(READ_AFTER_FREE),
+	CRASHTYPE(WRITE_BUDDY_AFTER_FREE),
+	CRASHTYPE(READ_BUDDY_AFTER_FREE),
+	CRASHTYPE(SOFTLOCKUP),
+	CRASHTYPE(HARDLOCKUP),
+	CRASHTYPE(SPINLOCKUP),
+	CRASHTYPE(HUNG_TASK),
+	CRASHTYPE(EXEC_DATA),
+	CRASHTYPE(EXEC_STACK),
+	CRASHTYPE(EXEC_KMALLOC),
+	CRASHTYPE(EXEC_VMALLOC),
+	CRASHTYPE(EXEC_RODATA),
+	CRASHTYPE(EXEC_USERSPACE),
+	CRASHTYPE(ACCESS_USERSPACE),
+	CRASHTYPE(WRITE_RO),
+	CRASHTYPE(WRITE_RO_AFTER_INIT),
+	CRASHTYPE(WRITE_KERN),
+	CRASHTYPE(ATOMIC_UNDERFLOW),
+	CRASHTYPE(ATOMIC_OVERFLOW),
+	CRASHTYPE(USERCOPY_HEAP_SIZE_TO),
+	CRASHTYPE(USERCOPY_HEAP_SIZE_FROM),
+	CRASHTYPE(USERCOPY_HEAP_FLAG_TO),
+	CRASHTYPE(USERCOPY_HEAP_FLAG_FROM),
+	CRASHTYPE(USERCOPY_STACK_FRAME_TO),
+	CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
+	CRASHTYPE(USERCOPY_STACK_BEYOND),
+	CRASHTYPE(USERCOPY_KERNEL),
+};
+
+
+/* Global jprobe entry and crashtype. */
+static struct jprobe *lkdtm_jprobe;
+struct crashpoint *lkdtm_crashpoint;
+struct crashtype *lkdtm_crashtype;
+
+/* Module parameters */
+static int recur_count = -1;
+module_param(recur_count, int, 0644);
+MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
+
+static char* cpoint_name;
+module_param(cpoint_name, charp, 0444);
+MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
+
+static char* cpoint_type;
+module_param(cpoint_type, charp, 0444);
+MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
+				"hitting the crash point");
+
+static int cpoint_count = DEFAULT_COUNT;
+module_param(cpoint_count, int, 0644);
+MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
+				"crash point is to be hit to trigger action");
+
+
+/* Return the crashtype number or NULL if the name is invalid */
+static struct crashtype *find_crashtype(const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(crashtypes); i++) {
+		if (!strcmp(name, crashtypes[i].name))
+			return &crashtypes[i];
+	}
+
+	return NULL;
+}
+
+/*
+ * This is forced noinline just so it distinctly shows up in the stackdump
+ * which makes validation of expected lkdtm crashes easier.
+ */
+static noinline void lkdtm_do_action(struct crashtype *crashtype)
+{
+	BUG_ON(!crashtype || !crashtype->func);
+	crashtype->func();
+}
+
+static int lkdtm_register_cpoint(struct crashpoint *crashpoint,
+				 struct crashtype *crashtype)
+{
+	int ret;
+
+	/* If this doesn't have a symbol, just call immediately. */
+	if (!crashpoint->jprobe.kp.symbol_name) {
+		lkdtm_do_action(crashtype);
+		return 0;
+	}
+
+	if (lkdtm_jprobe != NULL)
+		unregister_jprobe(lkdtm_jprobe);
+
+	lkdtm_crashpoint = crashpoint;
+	lkdtm_crashtype = crashtype;
+	lkdtm_jprobe = &crashpoint->jprobe;
+	ret = register_jprobe(lkdtm_jprobe);
+	if (ret < 0) {
+		pr_info("Couldn't register jprobe %s\n",
+			crashpoint->jprobe.kp.symbol_name);
+		lkdtm_jprobe = NULL;
+		lkdtm_crashpoint = NULL;
+		lkdtm_crashtype = NULL;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_KPROBES
+/* Global crash counter and spinlock. */
+static int crash_count = DEFAULT_COUNT;
+static DEFINE_SPINLOCK(crash_count_lock);
+
+/* Called by jprobe entry points. */
+static void lkdtm_handler(void)
+{
+	unsigned long flags;
+	bool do_it = false;
+
+	BUG_ON(!lkdtm_crashpoint || !lkdtm_crashtype);
+
+	spin_lock_irqsave(&crash_count_lock, flags);
+	crash_count--;
+	pr_info("Crash point %s of type %s hit, trigger in %d rounds\n",
+		lkdtm_crashpoint->name, lkdtm_crashtype->name, crash_count);
+
+	if (crash_count == 0) {
+		do_it = true;
+		crash_count = cpoint_count;
+	}
+	spin_unlock_irqrestore(&crash_count_lock, flags);
+
+	if (do_it)
+		lkdtm_do_action(lkdtm_crashtype);
+}
+
+static ssize_t lkdtm_debugfs_entry(struct file *f,
+				   const char __user *user_buf,
+				   size_t count, loff_t *off)
+{
+	struct crashpoint *crashpoint = file_inode(f)->i_private;
+	struct crashtype *crashtype = NULL;
+	char *buf;
+	int err;
+
+	if (count >= PAGE_SIZE)
+		return -EINVAL;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	if (copy_from_user(buf, user_buf, count)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
+	}
+	/* NULL-terminate and remove enter */
+	buf[count] = '\0';
+	strim(buf);
+
+	crashtype = find_crashtype(buf);
+	free_page((unsigned long)buf);
+
+	if (!crashtype)
+		return -EINVAL;
+
+	err = lkdtm_register_cpoint(crashpoint, crashtype);
+	if (err < 0)
+		return err;
+
+	*off += count;
+
+	return count;
+}
+#endif
+
+/* Generic read callback that just prints out the available crash types */
+static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
+		size_t count, loff_t *off)
+{
+	char *buf;
+	int i, n, out;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	n = snprintf(buf, PAGE_SIZE, "Available crash types:\n");
+	for (i = 0; i < ARRAY_SIZE(crashtypes); i++) {
+		n += snprintf(buf + n, PAGE_SIZE - n, "%s\n",
+			      crashtypes[i].name);
+	}
+	buf[n] = '\0';
+
+	out = simple_read_from_buffer(user_buf, count, off,
+				      buf, n);
+	free_page((unsigned long) buf);
+
+	return out;
+}
+
+static int lkdtm_debugfs_open(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+/* Special entry to just crash directly. Available without KPROBEs */
+static ssize_t direct_entry(struct file *f, const char __user *user_buf,
+		size_t count, loff_t *off)
+{
+	struct crashtype *crashtype;
+	char *buf;
+
+	if (count >= PAGE_SIZE)
+		return -EINVAL;
+	if (count < 1)
+		return -EINVAL;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	if (copy_from_user(buf, user_buf, count)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
+	}
+	/* NULL-terminate and remove enter */
+	buf[count] = '\0';
+	strim(buf);
+
+	crashtype = find_crashtype(buf);
+	free_page((unsigned long) buf);
+	if (!crashtype)
+		return -EINVAL;
+
+	pr_info("Performing direct entry %s\n", crashtype->name);
+	lkdtm_do_action(crashtype);
+	*off += count;
+
+	return count;
+}
+
+static struct dentry *lkdtm_debugfs_root;
+
+static int __init lkdtm_module_init(void)
+{
+	struct crashpoint *crashpoint = NULL;
+	struct crashtype *crashtype = NULL;
+	int ret = -EINVAL;
+	int i;
+
+	/* Neither or both of these need to be set */
+	if ((cpoint_type || cpoint_name) && !(cpoint_type && cpoint_name)) {
+		pr_err("Need both cpoint_type and cpoint_name or neither\n");
+		return -EINVAL;
+	}
+
+	if (cpoint_type) {
+		crashtype = find_crashtype(cpoint_type);
+		if (!crashtype) {
+			pr_err("Unknown crashtype '%s'\n", cpoint_type);
+			return -EINVAL;
+		}
+	}
+
+	if (cpoint_name) {
+		for (i = 0; i < ARRAY_SIZE(crashpoints); i++) {
+			if (!strcmp(cpoint_name, crashpoints[i].name))
+				crashpoint = &crashpoints[i];
+		}
+
+		/* Refuse unknown crashpoints. */
+		if (!crashpoint) {
+			pr_err("Invalid crashpoint %s\n", cpoint_name);
+			return -EINVAL;
+		}
+	}
+
+#ifdef CONFIG_KPROBES
+	/* Set crash count. */
+	crash_count = cpoint_count;
+#endif
+
+	/* Handle test-specific initialization. */
+	lkdtm_bugs_init(&recur_count);
+	lkdtm_perms_init();
+	lkdtm_usercopy_init();
+
+	/* Register debugfs interface */
+	lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
+	if (!lkdtm_debugfs_root) {
+		pr_err("creating root dir failed\n");
+		return -ENODEV;
+	}
+
+	/* Install debugfs trigger files. */
+	for (i = 0; i < ARRAY_SIZE(crashpoints); i++) {
+		struct crashpoint *cur = &crashpoints[i];
+		struct dentry *de;
+
+		de = debugfs_create_file(cur->name, 0644, lkdtm_debugfs_root,
+					 cur, &cur->fops);
+		if (de == NULL) {
+			pr_err("could not create crashpoint %s\n", cur->name);
+			goto out_err;
+		}
+	}
+
+	/* Install crashpoint if one was selected. */
+	if (crashpoint) {
+		ret = lkdtm_register_cpoint(crashpoint, crashtype);
+		if (ret < 0) {
+			pr_info("Invalid crashpoint %s\n", crashpoint->name);
+			goto out_err;
+		}
+		pr_info("Crash point %s of type %s registered\n",
+			crashpoint->name, cpoint_type);
+	} else {
+		pr_info("No crash points registered, enable through debugfs\n");
+	}
+
+	return 0;
+
+out_err:
+	debugfs_remove_recursive(lkdtm_debugfs_root);
+	return ret;
+}
+
+static void __exit lkdtm_module_exit(void)
+{
+	debugfs_remove_recursive(lkdtm_debugfs_root);
+
+	/* Handle test-specific clean-up. */
+	lkdtm_usercopy_exit();
+
+	unregister_jprobe(lkdtm_jprobe);
+	pr_info("Crash point unregistered\n");
+}
+
+module_init(lkdtm_module_init);
+module_exit(lkdtm_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Kernel crash testing module");
diff --git a/drivers/misc/lkdtm_heap.c b/drivers/misc/lkdtm_heap.c
new file mode 100644
index 000000000000..0f1581664c1c
--- /dev/null
+++ b/drivers/misc/lkdtm_heap.c
@@ -0,0 +1,142 @@
+/*
+ * This is for all the tests relating directly to heap memory, including
+ * page allocation and slab allocations.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+
+/*
+ * This tries to stay within the next largest power-of-2 kmalloc cache
+ * to avoid actually overwriting anything important if it's not detected
+ * correctly.
+ */
+void lkdtm_OVERWRITE_ALLOCATION(void)
+{
+	size_t len = 1020;
+	u32 *data = kmalloc(len, GFP_KERNEL);
+
+	data[1024 / sizeof(u32)] = 0x12345678;
+	kfree(data);
+}
+
+void lkdtm_WRITE_AFTER_FREE(void)
+{
+	int *base, *again;
+	size_t len = 1024;
+	/*
+	 * The slub allocator uses the first word to store the free
+	 * pointer in some configurations. Use the middle of the
+	 * allocation to avoid running into the freelist
+	 */
+	size_t offset = (len / sizeof(*base)) / 2;
+
+	base = kmalloc(len, GFP_KERNEL);
+	pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]);
+	pr_info("Attempting bad write to freed memory at %p\n",
+		&base[offset]);
+	kfree(base);
+	base[offset] = 0x0abcdef0;
+	/* Attempt to notice the overwrite. */
+	again = kmalloc(len, GFP_KERNEL);
+	kfree(again);
+	if (again != base)
+		pr_info("Hmm, didn't get the same memory range.\n");
+}
+
+void lkdtm_READ_AFTER_FREE(void)
+{
+	int *base, *val, saw;
+	size_t len = 1024;
+	/*
+	 * The slub allocator uses the first word to store the free
+	 * pointer in some configurations. Use the middle of the
+	 * allocation to avoid running into the freelist
+	 */
+	size_t offset = (len / sizeof(*base)) / 2;
+
+	base = kmalloc(len, GFP_KERNEL);
+	if (!base) {
+		pr_info("Unable to allocate base memory.\n");
+		return;
+	}
+
+	val = kmalloc(len, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate val memory.\n");
+		kfree(base);
+		return;
+	}
+
+	*val = 0x12345678;
+	base[offset] = *val;
+	pr_info("Value in memory before free: %x\n", base[offset]);
+
+	kfree(base);
+
+	pr_info("Attempting bad read from freed memory\n");
+	saw = base[offset];
+	if (saw != *val) {
+		/* Good! Poisoning happened, so declare a win. */
+		pr_info("Memory correctly poisoned (%x)\n", saw);
+		BUG();
+	}
+	pr_info("Memory was not poisoned\n");
+
+	kfree(val);
+}
+
+void lkdtm_WRITE_BUDDY_AFTER_FREE(void)
+{
+	unsigned long p = __get_free_page(GFP_KERNEL);
+	if (!p) {
+		pr_info("Unable to allocate free page\n");
+		return;
+	}
+
+	pr_info("Writing to the buddy page before free\n");
+	memset((void *)p, 0x3, PAGE_SIZE);
+	free_page(p);
+	schedule();
+	pr_info("Attempting bad write to the buddy page after free\n");
+	memset((void *)p, 0x78, PAGE_SIZE);
+	/* Attempt to notice the overwrite. */
+	p = __get_free_page(GFP_KERNEL);
+	free_page(p);
+	schedule();
+}
+
+void lkdtm_READ_BUDDY_AFTER_FREE(void)
+{
+	unsigned long p = __get_free_page(GFP_KERNEL);
+	int saw, *val;
+	int *base;
+
+	if (!p) {
+		pr_info("Unable to allocate free page\n");
+		return;
+	}
+
+	val = kmalloc(1024, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate val memory.\n");
+		free_page(p);
+		return;
+	}
+
+	base = (int *)p;
+
+	*val = 0x12345678;
+	base[0] = *val;
+	pr_info("Value in memory before free: %x\n", base[0]);
+	free_page(p);
+	pr_info("Attempting to read from freed memory\n");
+	saw = base[0];
+	if (saw != *val) {
+		/* Good! Poisoning happened, so declare a win. */
+		pr_info("Memory correctly poisoned (%x)\n", saw);
+		BUG();
+	}
+	pr_info("Buddy page was not poisoned\n");
+
+	kfree(val);
+}
diff --git a/drivers/misc/lkdtm_perms.c b/drivers/misc/lkdtm_perms.c
new file mode 100644
index 000000000000..45f1c0f96612
--- /dev/null
+++ b/drivers/misc/lkdtm_perms.c
@@ -0,0 +1,199 @@
+/*
+ * This is for all the tests related to validating kernel memory
+ * permissions: non-executable regions, non-writable regions, and
+ * even non-readable regions.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+/* Whether or not to fill the target memory area with do_nothing(). */
+#define CODE_WRITE	true
+#define CODE_AS_IS	false
+
+/* How many bytes to copy to be sure we've copied enough of do_nothing(). */
+#define EXEC_SIZE 64
+
+/* This is non-const, so it will end up in the .data section. */
+static u8 data_area[EXEC_SIZE];
+
+/* This is cost, so it will end up in the .rodata section. */
+static const unsigned long rodata = 0xAA55AA55;
+
+/* This is marked __ro_after_init, so it should ultimately be .rodata. */
+static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
+
+/*
+ * This just returns to the caller. It is designed to be copied into
+ * non-executable memory regions.
+ */
+static void do_nothing(void)
+{
+	return;
+}
+
+/* Must immediately follow do_nothing for size calculuations to work out. */
+static void do_overwritten(void)
+{
+	pr_info("do_overwritten wasn't overwritten!\n");
+	return;
+}
+
+static noinline void execute_location(void *dst, bool write)
+{
+	void (*func)(void) = dst;
+
+	pr_info("attempting ok execution at %p\n", do_nothing);
+	do_nothing();
+
+	if (write == CODE_WRITE) {
+		memcpy(dst, do_nothing, EXEC_SIZE);
+		flush_icache_range((unsigned long)dst,
+				   (unsigned long)dst + EXEC_SIZE);
+	}
+	pr_info("attempting bad execution at %p\n", func);
+	func();
+}
+
+static void execute_user_location(void *dst)
+{
+	/* Intentionally crossing kernel/user memory boundary. */
+	void (*func)(void) = dst;
+
+	pr_info("attempting ok execution at %p\n", do_nothing);
+	do_nothing();
+
+	if (copy_to_user((void __user *)dst, do_nothing, EXEC_SIZE))
+		return;
+	flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE);
+	pr_info("attempting bad execution at %p\n", func);
+	func();
+}
+
+void lkdtm_WRITE_RO(void)
+{
+	/* Explicitly cast away "const" for the test. */
+	unsigned long *ptr = (unsigned long *)&rodata;
+
+	pr_info("attempting bad rodata write at %p\n", ptr);
+	*ptr ^= 0xabcd1234;
+}
+
+void lkdtm_WRITE_RO_AFTER_INIT(void)
+{
+	unsigned long *ptr = &ro_after_init;
+
+	/*
+	 * Verify we were written to during init. Since an Oops
+	 * is considered a "success", a failure is to just skip the
+	 * real test.
+	 */
+	if ((*ptr & 0xAA) != 0xAA) {
+		pr_info("%p was NOT written during init!?\n", ptr);
+		return;
+	}
+
+	pr_info("attempting bad ro_after_init write at %p\n", ptr);
+	*ptr ^= 0xabcd1234;
+}
+
+void lkdtm_WRITE_KERN(void)
+{
+	size_t size;
+	unsigned char *ptr;
+
+	size = (unsigned long)do_overwritten - (unsigned long)do_nothing;
+	ptr = (unsigned char *)do_overwritten;
+
+	pr_info("attempting bad %zu byte write at %p\n", size, ptr);
+	memcpy(ptr, (unsigned char *)do_nothing, size);
+	flush_icache_range((unsigned long)ptr, (unsigned long)(ptr + size));
+
+	do_overwritten();
+}
+
+void lkdtm_EXEC_DATA(void)
+{
+	execute_location(data_area, CODE_WRITE);
+}
+
+void lkdtm_EXEC_STACK(void)
+{
+	u8 stack_area[EXEC_SIZE];
+	execute_location(stack_area, CODE_WRITE);
+}
+
+void lkdtm_EXEC_KMALLOC(void)
+{
+	u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL);
+	execute_location(kmalloc_area, CODE_WRITE);
+	kfree(kmalloc_area);
+}
+
+void lkdtm_EXEC_VMALLOC(void)
+{
+	u32 *vmalloc_area = vmalloc(EXEC_SIZE);
+	execute_location(vmalloc_area, CODE_WRITE);
+	vfree(vmalloc_area);
+}
+
+void lkdtm_EXEC_RODATA(void)
+{
+	execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS);
+}
+
+void lkdtm_EXEC_USERSPACE(void)
+{
+	unsigned long user_addr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+	execute_user_location((void *)user_addr);
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+void lkdtm_ACCESS_USERSPACE(void)
+{
+	unsigned long user_addr, tmp = 0;
+	unsigned long *ptr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) {
+		pr_warn("copy_to_user failed\n");
+		vm_munmap(user_addr, PAGE_SIZE);
+		return;
+	}
+
+	ptr = (unsigned long *)user_addr;
+
+	pr_info("attempting bad read at %p\n", ptr);
+	tmp = *ptr;
+	tmp += 0xc0dec0de;
+
+	pr_info("attempting bad write at %p\n", ptr);
+	*ptr = tmp;
+
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+void __init lkdtm_perms_init(void)
+{
+	/* Make sure we can write to __ro_after_init values during __init */
+	ro_after_init |= 0xAA;
+
+}
diff --git a/drivers/misc/lkdtm_rodata.c b/drivers/misc/lkdtm_rodata.c
new file mode 100644
index 000000000000..166b1db3969f
--- /dev/null
+++ b/drivers/misc/lkdtm_rodata.c
@@ -0,0 +1,10 @@
+/*
+ * This includes functions that are meant to live entirely in .rodata
+ * (via objcopy tricks), to validate the non-executability of .rodata.
+ */
+#include "lkdtm.h"
+
+void lkdtm_rodata_do_nothing(void)
+{
+	/* Does nothing. We just want an architecture agnostic "return". */
+}
diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c
new file mode 100644
index 000000000000..5a3fd76eec27
--- /dev/null
+++ b/drivers/misc/lkdtm_usercopy.c
@@ -0,0 +1,313 @@
+/*
+ * This is for all the tests related to copy_to_user() and copy_from_user()
+ * hardening.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+static size_t cache_size = 1024;
+static struct kmem_cache *bad_cache;
+
+static const unsigned char test_text[] = "This is a test.\n";
+
+/*
+ * Instead of adding -Wno-return-local-addr, just pass the stack address
+ * through a function to obfuscate it from the compiler.
+ */
+static noinline unsigned char *trick_compiler(unsigned char *stack)
+{
+	return stack + 0;
+}
+
+static noinline unsigned char *do_usercopy_stack_callee(int value)
+{
+	unsigned char buf[32];
+	int i;
+
+	/* Exercise stack to avoid everything living in registers. */
+	for (i = 0; i < sizeof(buf); i++) {
+		buf[i] = value & 0xff;
+	}
+
+	return trick_compiler(buf);
+}
+
+static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
+{
+	unsigned long user_addr;
+	unsigned char good_stack[32];
+	unsigned char *bad_stack;
+	int i;
+
+	/* Exercise stack to avoid everything living in registers. */
+	for (i = 0; i < sizeof(good_stack); i++)
+		good_stack[i] = test_text[i % sizeof(test_text)];
+
+	/* This is a pointer to outside our current stack frame. */
+	if (bad_frame) {
+		bad_stack = do_usercopy_stack_callee((uintptr_t)bad_stack);
+	} else {
+		/* Put start address just inside stack. */
+		bad_stack = task_stack_page(current) + THREAD_SIZE;
+		bad_stack -= sizeof(unsigned long);
+	}
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user of local stack\n");
+		if (copy_to_user((void __user *)user_addr, good_stack,
+				 sizeof(good_stack))) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user of distant stack\n");
+		if (copy_to_user((void __user *)user_addr, bad_stack,
+				 sizeof(good_stack))) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		/*
+		 * There isn't a safe way to not be protected by usercopy
+		 * if we're going to write to another thread's stack.
+		 */
+		if (!bad_frame)
+			goto free_user;
+
+		pr_info("attempting good copy_from_user of local stack\n");
+		if (copy_from_user(good_stack, (void __user *)user_addr,
+				   sizeof(good_stack))) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user of distant stack\n");
+		if (copy_from_user(bad_stack, (void __user *)user_addr,
+				   sizeof(good_stack))) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+static void do_usercopy_heap_size(bool to_user)
+{
+	unsigned long user_addr;
+	unsigned char *one, *two;
+	const size_t size = 1024;
+
+	one = kmalloc(size, GFP_KERNEL);
+	two = kmalloc(size, GFP_KERNEL);
+	if (!one || !two) {
+		pr_warn("Failed to allocate kernel memory\n");
+		goto free_kernel;
+	}
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		goto free_kernel;
+	}
+
+	memset(one, 'A', size);
+	memset(two, 'B', size);
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user of correct size\n");
+		if (copy_to_user((void __user *)user_addr, one, size)) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user of too large size\n");
+		if (copy_to_user((void __user *)user_addr, one, 2 * size)) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		pr_info("attempting good copy_from_user of correct size\n");
+		if (copy_from_user(one, (void __user *)user_addr, size)) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user of too large size\n");
+		if (copy_from_user(one, (void __user *)user_addr, 2 * size)) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+free_kernel:
+	kfree(one);
+	kfree(two);
+}
+
+static void do_usercopy_heap_flag(bool to_user)
+{
+	unsigned long user_addr;
+	unsigned char *good_buf = NULL;
+	unsigned char *bad_buf = NULL;
+
+	/* Make sure cache was prepared. */
+	if (!bad_cache) {
+		pr_warn("Failed to allocate kernel cache\n");
+		return;
+	}
+
+	/*
+	 * Allocate one buffer from each cache (kmalloc will have the
+	 * SLAB_USERCOPY flag already, but "bad_cache" won't).
+	 */
+	good_buf = kmalloc(cache_size, GFP_KERNEL);
+	bad_buf = kmem_cache_alloc(bad_cache, GFP_KERNEL);
+	if (!good_buf || !bad_buf) {
+		pr_warn("Failed to allocate buffers from caches\n");
+		goto free_alloc;
+	}
+
+	/* Allocate user memory we'll poke at. */
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		goto free_alloc;
+	}
+
+	memset(good_buf, 'A', cache_size);
+	memset(bad_buf, 'B', cache_size);
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user with SLAB_USERCOPY\n");
+		if (copy_to_user((void __user *)user_addr, good_buf,
+				 cache_size)) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user w/o SLAB_USERCOPY\n");
+		if (copy_to_user((void __user *)user_addr, bad_buf,
+				 cache_size)) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		pr_info("attempting good copy_from_user with SLAB_USERCOPY\n");
+		if (copy_from_user(good_buf, (void __user *)user_addr,
+				   cache_size)) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user w/o SLAB_USERCOPY\n");
+		if (copy_from_user(bad_buf, (void __user *)user_addr,
+				   cache_size)) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+free_alloc:
+	if (bad_buf)
+		kmem_cache_free(bad_cache, bad_buf);
+	kfree(good_buf);
+}
+
+/* Callable tests. */
+void lkdtm_USERCOPY_HEAP_SIZE_TO(void)
+{
+	do_usercopy_heap_size(true);
+}
+
+void lkdtm_USERCOPY_HEAP_SIZE_FROM(void)
+{
+	do_usercopy_heap_size(false);
+}
+
+void lkdtm_USERCOPY_HEAP_FLAG_TO(void)
+{
+	do_usercopy_heap_flag(true);
+}
+
+void lkdtm_USERCOPY_HEAP_FLAG_FROM(void)
+{
+	do_usercopy_heap_flag(false);
+}
+
+void lkdtm_USERCOPY_STACK_FRAME_TO(void)
+{
+	do_usercopy_stack(true, true);
+}
+
+void lkdtm_USERCOPY_STACK_FRAME_FROM(void)
+{
+	do_usercopy_stack(false, true);
+}
+
+void lkdtm_USERCOPY_STACK_BEYOND(void)
+{
+	do_usercopy_stack(true, false);
+}
+
+void lkdtm_USERCOPY_KERNEL(void)
+{
+	unsigned long user_addr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	pr_info("attempting good copy_to_user from kernel rodata\n");
+	if (copy_to_user((void __user *)user_addr, test_text,
+			 sizeof(test_text))) {
+		pr_warn("copy_to_user failed unexpectedly?!\n");
+		goto free_user;
+	}
+
+	pr_info("attempting bad copy_to_user from kernel text\n");
+	if (copy_to_user((void __user *)user_addr, vm_mmap, PAGE_SIZE)) {
+		pr_warn("copy_to_user failed, but lacked Oops\n");
+		goto free_user;
+	}
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+void __init lkdtm_usercopy_init(void)
+{
+	/* Prepare cache that lacks SLAB_USERCOPY flag. */
+	bad_cache = kmem_cache_create("lkdtm-no-usercopy", cache_size, 0,
+				      0, NULL);
+}
+
+void __exit lkdtm_usercopy_exit(void)
+{
+	kmem_cache_destroy(bad_cache);
+}
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 5aa606c8a827..085f3aafe6fa 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -132,6 +132,7 @@ static inline void mei_hbm_hdr(struct mei_msg_hdr *hdr, size_t length)
 	hdr->length = length;
 	hdr->msg_complete = 1;
 	hdr->reserved = 0;
+	hdr->internal = 0;
 }
 
 /**
@@ -165,15 +166,15 @@ void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len)
  * Return: 0 on success, <0 on failure.
  */
 static inline
-int mei_hbm_cl_write(struct mei_device *dev,
-		     struct mei_cl *cl, u8 hbm_cmd, size_t len)
+int mei_hbm_cl_write(struct mei_device *dev, struct mei_cl *cl,
+		     u8 hbm_cmd, u8 *buf, size_t len)
 {
-	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+	struct mei_msg_hdr mei_hdr;
 
-	mei_hbm_hdr(mei_hdr, len);
-	mei_hbm_cl_hdr(cl, hbm_cmd, dev->wr_msg.data, len);
+	mei_hbm_hdr(&mei_hdr, len);
+	mei_hbm_cl_hdr(cl, hbm_cmd, buf, len);
 
-	return mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	return mei_write_message(dev, &mei_hdr, buf);
 }
 
 /**
@@ -250,24 +251,23 @@ int mei_hbm_start_wait(struct mei_device *dev)
  */
 int mei_hbm_start_req(struct mei_device *dev)
 {
-	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
-	struct hbm_host_version_request *start_req;
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_version_request start_req;
 	const size_t len = sizeof(struct hbm_host_version_request);
 	int ret;
 
 	mei_hbm_reset(dev);
 
-	mei_hbm_hdr(mei_hdr, len);
+	mei_hbm_hdr(&mei_hdr, len);
 
 	/* host start message */
-	start_req = (struct hbm_host_version_request *)dev->wr_msg.data;
-	memset(start_req, 0, len);
-	start_req->hbm_cmd = HOST_START_REQ_CMD;
-	start_req->host_version.major_version = HBM_MAJOR_VERSION;
-	start_req->host_version.minor_version = HBM_MINOR_VERSION;
+	memset(&start_req, 0, len);
+	start_req.hbm_cmd = HOST_START_REQ_CMD;
+	start_req.host_version.major_version = HBM_MAJOR_VERSION;
+	start_req.host_version.minor_version = HBM_MINOR_VERSION;
 
 	dev->hbm_state = MEI_HBM_IDLE;
-	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	ret = mei_write_message(dev, &mei_hdr, &start_req);
 	if (ret) {
 		dev_err(dev->dev, "version message write failed: ret = %d\n",
 			ret);
@@ -288,23 +288,22 @@ int mei_hbm_start_req(struct mei_device *dev)
  */
 static int mei_hbm_enum_clients_req(struct mei_device *dev)
 {
-	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
-	struct hbm_host_enum_request *enum_req;
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_enum_request enum_req;
 	const size_t len = sizeof(struct hbm_host_enum_request);
 	int ret;
 
 	/* enumerate clients */
-	mei_hbm_hdr(mei_hdr, len);
+	mei_hbm_hdr(&mei_hdr, len);
 
-	enum_req = (struct hbm_host_enum_request *)dev->wr_msg.data;
-	memset(enum_req, 0, len);
-	enum_req->hbm_cmd = HOST_ENUM_REQ_CMD;
-	enum_req->flags |= dev->hbm_f_dc_supported ?
-			   MEI_HBM_ENUM_F_ALLOW_ADD : 0;
-	enum_req->flags |= dev->hbm_f_ie_supported ?
-			   MEI_HBM_ENUM_F_IMMEDIATE_ENUM : 0;
+	memset(&enum_req, 0, len);
+	enum_req.hbm_cmd = HOST_ENUM_REQ_CMD;
+	enum_req.flags |= dev->hbm_f_dc_supported ?
+			  MEI_HBM_ENUM_F_ALLOW_ADD : 0;
+	enum_req.flags |= dev->hbm_f_ie_supported ?
+			  MEI_HBM_ENUM_F_IMMEDIATE_ENUM : 0;
 
-	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	ret = mei_write_message(dev, &mei_hdr, &enum_req);
 	if (ret) {
 		dev_err(dev->dev, "enumeration request write failed: ret = %d.\n",
 			ret);
@@ -358,23 +357,21 @@ static int mei_hbm_me_cl_add(struct mei_device *dev,
  */
 static int mei_hbm_add_cl_resp(struct mei_device *dev, u8 addr, u8 status)
 {
-	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
-	struct hbm_add_client_response *resp;
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_add_client_response resp;
 	const size_t len = sizeof(struct hbm_add_client_response);
 	int ret;
 
 	dev_dbg(dev->dev, "adding client response\n");
 
-	resp = (struct hbm_add_client_response *)dev->wr_msg.data;
+	mei_hbm_hdr(&mei_hdr, len);
 
-	mei_hbm_hdr(mei_hdr, len);
-	memset(resp, 0, sizeof(struct hbm_add_client_response));
+	memset(&resp, 0, sizeof(struct hbm_add_client_response));
+	resp.hbm_cmd = MEI_HBM_ADD_CLIENT_RES_CMD;
+	resp.me_addr = addr;
+	resp.status  = status;
 
-	resp->hbm_cmd = MEI_HBM_ADD_CLIENT_RES_CMD;
-	resp->me_addr = addr;
-	resp->status  = status;
-
-	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	ret = mei_write_message(dev, &mei_hdr, &resp);
 	if (ret)
 		dev_err(dev->dev, "add client response write failed: ret = %d\n",
 			ret);
@@ -421,18 +418,17 @@ int mei_hbm_cl_notify_req(struct mei_device *dev,
 			  struct mei_cl *cl, u8 start)
 {
 
-	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
-	struct hbm_notification_request *req;
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_notification_request req;
 	const size_t len = sizeof(struct hbm_notification_request);
 	int ret;
 
-	mei_hbm_hdr(mei_hdr, len);
-	mei_hbm_cl_hdr(cl, MEI_HBM_NOTIFY_REQ_CMD, dev->wr_msg.data, len);
+	mei_hbm_hdr(&mei_hdr, len);
+	mei_hbm_cl_hdr(cl, MEI_HBM_NOTIFY_REQ_CMD, &req, len);
 
-	req = (struct hbm_notification_request *)dev->wr_msg.data;
-	req->start = start;
+	req.start = start;
 
-	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	ret = mei_write_message(dev, &mei_hdr, &req);
 	if (ret)
 		dev_err(dev->dev, "notify request failed: ret = %d\n", ret);
 
@@ -534,8 +530,8 @@ static void mei_hbm_cl_notify(struct mei_device *dev,
  */
 static int mei_hbm_prop_req(struct mei_device *dev, unsigned long start_idx)
 {
-	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
-	struct hbm_props_request *prop_req;
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_props_request prop_req;
 	const size_t len = sizeof(struct hbm_props_request);
 	unsigned long addr;
 	int ret;
@@ -550,15 +546,14 @@ static int mei_hbm_prop_req(struct mei_device *dev, unsigned long start_idx)
 		return 0;
 	}
 
-	mei_hbm_hdr(mei_hdr, len);
-	prop_req = (struct hbm_props_request *)dev->wr_msg.data;
+	mei_hbm_hdr(&mei_hdr, len);
 
-	memset(prop_req, 0, sizeof(struct hbm_props_request));
+	memset(&prop_req, 0, sizeof(struct hbm_props_request));
 
-	prop_req->hbm_cmd = HOST_CLIENT_PROPERTIES_REQ_CMD;
-	prop_req->me_addr = addr;
+	prop_req.hbm_cmd = HOST_CLIENT_PROPERTIES_REQ_CMD;
+	prop_req.me_addr = addr;
 
-	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	ret = mei_write_message(dev, &mei_hdr, &prop_req);
 	if (ret) {
 		dev_err(dev->dev, "properties request write failed: ret = %d\n",
 			ret);
@@ -581,21 +576,20 @@ static int mei_hbm_prop_req(struct mei_device *dev, unsigned long start_idx)
  */
 int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd)
 {
-	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
-	struct hbm_power_gate *req;
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_power_gate req;
 	const size_t len = sizeof(struct hbm_power_gate);
 	int ret;
 
 	if (!dev->hbm_f_pg_supported)
 		return -EOPNOTSUPP;
 
-	mei_hbm_hdr(mei_hdr, len);
+	mei_hbm_hdr(&mei_hdr, len);
 
-	req = (struct hbm_power_gate *)dev->wr_msg.data;
-	memset(req, 0, len);
-	req->hbm_cmd = pg_cmd;
+	memset(&req, 0, len);
+	req.hbm_cmd = pg_cmd;
 
-	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	ret = mei_write_message(dev, &mei_hdr, &req);
 	if (ret)
 		dev_err(dev->dev, "power gate command write failed.\n");
 	return ret;
@@ -611,18 +605,17 @@ EXPORT_SYMBOL_GPL(mei_hbm_pg);
  */
 static int mei_hbm_stop_req(struct mei_device *dev)
 {
-	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
-	struct hbm_host_stop_request *req =
-			(struct hbm_host_stop_request *)dev->wr_msg.data;
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_stop_request req;
 	const size_t len = sizeof(struct hbm_host_stop_request);
 
-	mei_hbm_hdr(mei_hdr, len);
+	mei_hbm_hdr(&mei_hdr, len);
 
-	memset(req, 0, len);
-	req->hbm_cmd = HOST_STOP_REQ_CMD;
-	req->reason = DRIVER_STOP_REQUEST;
+	memset(&req, 0, len);
+	req.hbm_cmd = HOST_STOP_REQ_CMD;
+	req.reason = DRIVER_STOP_REQUEST;
 
-	return mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	return mei_write_message(dev, &mei_hdr, &req);
 }
 
 /**
@@ -636,9 +629,10 @@ static int mei_hbm_stop_req(struct mei_device *dev)
 int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl)
 {
 	const size_t len = sizeof(struct hbm_flow_control);
+	u8 buf[len];
 
 	cl_dbg(dev, cl, "sending flow control\n");
-	return mei_hbm_cl_write(dev, cl, MEI_FLOW_CONTROL_CMD, len);
+	return mei_hbm_cl_write(dev, cl, MEI_FLOW_CONTROL_CMD, buf, len);
 }
 
 /**
@@ -714,8 +708,9 @@ static void mei_hbm_cl_flow_control_res(struct mei_device *dev,
 int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl)
 {
 	const size_t len = sizeof(struct hbm_client_connect_request);
+	u8 buf[len];
 
-	return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_REQ_CMD, len);
+	return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_REQ_CMD, buf, len);
 }
 
 /**
@@ -729,8 +724,9 @@ int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl)
 int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl)
 {
 	const size_t len = sizeof(struct hbm_client_connect_response);
+	u8 buf[len];
 
-	return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_RES_CMD, len);
+	return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_RES_CMD, buf, len);
 }
 
 /**
@@ -765,8 +761,9 @@ static void mei_hbm_cl_disconnect_res(struct mei_device *dev, struct mei_cl *cl,
 int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl)
 {
 	const size_t len = sizeof(struct hbm_client_connect_request);
+	u8 buf[len];
 
-	return mei_hbm_cl_write(dev, cl, CLIENT_CONNECT_REQ_CMD, len);
+	return mei_hbm_cl_write(dev, cl, CLIENT_CONNECT_REQ_CMD, buf, len);
 }
 
 /**
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index c9e01021eadf..e5e32503d4bc 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -382,7 +382,6 @@ const char *mei_pg_state_str(enum mei_pg_state state);
  *
  * @hbuf_depth  : depth of hardware host/write buffer is slots
  * @hbuf_is_ready : query if the host host/write buffer is ready
- * @wr_msg      : the buffer for hbm control messages
  *
  * @version     : HBM protocol version in use
  * @hbm_f_pg_supported  : hbm feature pgi protocol
@@ -467,12 +466,6 @@ struct mei_device {
 	u8 hbuf_depth;
 	bool hbuf_is_ready;
 
-	/* used for control messages */
-	struct {
-		struct mei_msg_hdr hdr;
-		unsigned char data[128];
-	} wr_msg;
-
 	struct hbm_version version;
 	unsigned int hbm_f_pg_supported:1;
 	unsigned int hbm_f_dc_supported:1;
@@ -670,8 +663,7 @@ static inline size_t mei_hbuf_max_len(const struct mei_device *dev)
 }
 
 static inline int mei_write_message(struct mei_device *dev,
-			struct mei_msg_hdr *hdr,
-			unsigned char *buf)
+			struct mei_msg_hdr *hdr, void *buf)
 {
 	return dev->ops->write(dev, hdr, buf);
 }
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index dcdbd58672cc..00051590e00f 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -141,7 +141,7 @@ static void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata)
  * This function is being called with spin lock held, protocol drivers are
  * only expected to complete their waits and do nothing more than that.
  */
-static void st_reg_complete(struct st_data_s *st_gdata, char err)
+static void st_reg_complete(struct st_data_s *st_gdata, int err)
 {
 	unsigned char i = 0;
 	pr_info(" %s ", __func__);
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index e62fde3ac431..10b553765ee7 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -93,6 +93,7 @@ static DEFINE_SPINLOCK(mmc_blk_lock);
  */
 struct mmc_blk_data {
 	spinlock_t	lock;
+	struct device	*parent;
 	struct gendisk	*disk;
 	struct mmc_queue queue;
 	struct list_head part;
@@ -355,8 +356,10 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
 		goto idata_err;
 	}
 
-	if (!idata->buf_bytes)
+	if (!idata->buf_bytes) {
+		idata->buf = NULL;
 		return idata;
+	}
 
 	idata->buf = kmalloc(idata->buf_bytes, GFP_KERNEL);
 	if (!idata->buf) {
@@ -1722,8 +1725,8 @@ static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req)
 		    !IS_ALIGNED(blk_rq_sectors(next), 8))
 			break;
 
-		if (next->cmd_flags & REQ_DISCARD ||
-		    next->cmd_flags & REQ_FLUSH)
+		if (req_op(next) == REQ_OP_DISCARD ||
+		    req_op(next) == REQ_OP_FLUSH)
 			break;
 
 		if (rq_data_dir(cur) != rq_data_dir(next))
@@ -1786,8 +1789,8 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
 
 	packed_cmd_hdr = packed->cmd_hdr;
 	memset(packed_cmd_hdr, 0, sizeof(packed->cmd_hdr));
-	packed_cmd_hdr[0] = (packed->nr_entries << 16) |
-		(PACKED_CMD_WR << 8) | PACKED_CMD_VER;
+	packed_cmd_hdr[0] = cpu_to_le32((packed->nr_entries << 16) |
+		(PACKED_CMD_WR << 8) | PACKED_CMD_VER);
 	hdr_blocks = mmc_large_sector(card) ? 8 : 1;
 
 	/*
@@ -1801,14 +1804,14 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
 			((brq->data.blocks * brq->data.blksz) >=
 			 card->ext_csd.data_tag_unit_size);
 		/* Argument of CMD23 */
-		packed_cmd_hdr[(i * 2)] =
+		packed_cmd_hdr[(i * 2)] = cpu_to_le32(
 			(do_rel_wr ? MMC_CMD23_ARG_REL_WR : 0) |
 			(do_data_tag ? MMC_CMD23_ARG_TAG_REQ : 0) |
-			blk_rq_sectors(prq);
+			blk_rq_sectors(prq));
 		/* Argument of CMD18 or CMD25 */
-		packed_cmd_hdr[((i * 2)) + 1] =
+		packed_cmd_hdr[((i * 2)) + 1] = cpu_to_le32(
 			mmc_card_blockaddr(card) ?
-			blk_rq_pos(prq) : blk_rq_pos(prq) << 9;
+			blk_rq_pos(prq) : blk_rq_pos(prq) << 9);
 		packed->blocks += blk_rq_sectors(prq);
 		i++;
 	}
@@ -2148,7 +2151,6 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = md->queue.card;
 	struct mmc_host *host = card->host;
 	unsigned long flags;
-	unsigned int cmd_flags = req ? req->cmd_flags : 0;
 
 	if (req && !mq->mqrq_prev->req)
 		/* claim host only for the first request */
@@ -2164,15 +2166,17 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	}
 
 	mq->flags &= ~MMC_QUEUE_NEW_REQUEST;
-	if (cmd_flags & REQ_DISCARD) {
+	if (req && req_op(req) == REQ_OP_DISCARD) {
 		/* complete ongoing async transfer before issuing discard */
 		if (card->host->areq)
 			mmc_blk_issue_rw_rq(mq, NULL);
-		if (req->cmd_flags & REQ_SECURE)
-			ret = mmc_blk_issue_secdiscard_rq(mq, req);
-		else
-			ret = mmc_blk_issue_discard_rq(mq, req);
-	} else if (cmd_flags & REQ_FLUSH) {
+		ret = mmc_blk_issue_discard_rq(mq, req);
+	} else if (req && req_op(req) == REQ_OP_SECURE_ERASE) {
+		/* complete ongoing async transfer before issuing secure erase*/
+		if (card->host->areq)
+			mmc_blk_issue_rw_rq(mq, NULL);
+		ret = mmc_blk_issue_secdiscard_rq(mq, req);
+	} else if (req && req_op(req) == REQ_OP_FLUSH) {
 		/* complete ongoing async transfer before issuing flush */
 		if (card->host->areq)
 			mmc_blk_issue_rw_rq(mq, NULL);
@@ -2188,7 +2192,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 
 out:
 	if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) ||
-	     (cmd_flags & MMC_REQ_SPECIAL_MASK))
+	    mmc_req_is_special(req))
 		/*
 		 * Release host when there are no more requests
 		 * and after special request(discard, flush) is done.
@@ -2269,7 +2273,7 @@ again:
 	md->disk->fops = &mmc_bdops;
 	md->disk->private_data = md;
 	md->disk->queue = md->queue.queue;
-	md->disk->driverfs_dev = parent;
+	md->parent = parent;
 	set_disk_ro(md->disk, md->read_only || default_ro);
 	md->disk->flags = GENHD_FL_EXT_DEVT;
 	if (area_type & (MMC_BLK_DATA_AREA_RPMB | MMC_BLK_DATA_AREA_BOOT))
@@ -2457,7 +2461,7 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 	int ret;
 	struct mmc_card *card = md->queue.card;
 
-	add_disk(md->disk);
+	device_add_disk(md->parent, md->disk);
 	md->force_ro.show = force_ro_show;
 	md->force_ro.store = force_ro_store;
 	sysfs_attr_init(&md->force_ro.attr);
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 6f4323c6d653..bf14642a576a 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -33,7 +33,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	/*
 	 * We only like normal block requests and discards.
 	 */
-	if (req->cmd_type != REQ_TYPE_FS && !(req->cmd_flags & REQ_DISCARD)) {
+	if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD) {
 		blk_dump_rq_flags(req, "MMC bad request");
 		return BLKPREP_KILL;
 	}
@@ -56,7 +56,6 @@ static int mmc_queue_thread(void *d)
 	down(&mq->thread_sem);
 	do {
 		struct request *req = NULL;
-		unsigned int cmd_flags = 0;
 
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -66,7 +65,6 @@ static int mmc_queue_thread(void *d)
 
 		if (req || mq->mqrq_prev->req) {
 			set_current_state(TASK_RUNNING);
-			cmd_flags = req ? req->cmd_flags : 0;
 			mq->issue_fn(mq, req);
 			cond_resched();
 			if (mq->flags & MMC_QUEUE_NEW_REQUEST) {
@@ -81,7 +79,7 @@ static int mmc_queue_thread(void *d)
 			 * has been finished. Do not assign it to previous
 			 * request.
 			 */
-			if (cmd_flags & MMC_REQ_SPECIAL_MASK)
+			if (mmc_req_is_special(req))
 				mq->mqrq_cur->req = NULL;
 
 			mq->mqrq_prev->brq.mrq.data = NULL;
@@ -173,7 +171,7 @@ static void mmc_queue_setup_discard(struct request_queue *q,
 	if (card->pref_erase > max_discard)
 		q->limits.discard_granularity = 0;
 	if (mmc_can_secure_erase_trim(card))
-		queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
+		queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q);
 }
 
 /**
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index 36cddab57d77..d62531124d54 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -1,7 +1,11 @@
 #ifndef MMC_QUEUE_H
 #define MMC_QUEUE_H
 
-#define MMC_REQ_SPECIAL_MASK	(REQ_DISCARD | REQ_FLUSH)
+static inline bool mmc_req_is_special(struct request *req)
+{
+	return req &&
+		(req_op(req) == REQ_OP_FLUSH || req_op(req) == REQ_OP_DISCARD);
+}
 
 struct request;
 struct task_struct;
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 03ddf0ecf402..684087db170b 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -1068,8 +1068,6 @@ static int jz4740_mmc_probe(struct platform_device* pdev)
 	jz4740_mmc_clock_disable(host);
 	setup_timer(&host->timeout_timer, jz4740_mmc_timeout,
 			(unsigned long)host);
-	/* It is not important when it times out, it just needs to timeout. */
-	set_timer_slack(&host->timeout_timer, HZ);
 
 	host->use_dma = true;
 	if (host->use_dma && jz4740_mmc_acquire_dma_channels(host) != 0)
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 86fac3e86833..c763b404510f 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -789,14 +789,16 @@ static int pxamci_probe(struct platform_device *pdev)
 		gpio_direction_output(gpio_power,
 				      host->pdata->gpio_power_invert);
 	}
-	if (gpio_is_valid(gpio_ro))
+	if (gpio_is_valid(gpio_ro)) {
 		ret = mmc_gpio_request_ro(mmc, gpio_ro);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_ro);
-		goto out;
-	} else {
-		mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
-			0 : MMC_CAP2_RO_ACTIVE_HIGH;
+		if (ret) {
+			dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n",
+				gpio_ro);
+			goto out;
+		} else {
+			mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
+				0 : MMC_CAP2_RO_ACTIVE_HIGH;
+		}
 	}
 
 	if (gpio_is_valid(gpio_cd))
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 458ffb7637e5..008709c5cb09 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -43,6 +43,7 @@
 
 #ifdef CONFIG_X86
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
 #endif
 
@@ -126,7 +127,7 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = {
 static bool sdhci_acpi_byt(void)
 {
 	static const struct x86_cpu_id byt[] = {
-		{ X86_VENDOR_INTEL, 6, 0x37 },
+		{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
 		{}
 	};
 
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index f73c41697a00..64a248556d29 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -114,7 +114,7 @@ config MTD_SST25L
 
 config MTD_BCM47XXSFLASH
 	tristate "R/O support for serial flash on BCMA bus"
-	depends on BCMA_SFLASH
+	depends on BCMA_SFLASH && (MIPS || ARM)
 	help
 	  BCMA bus can have various flash memories attached, they are
 	  registered by bcma as platform devices. This enables driver for
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 74ae24364a8d..8d58acf33021 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -87,14 +87,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	if (req->cmd_type != REQ_TYPE_FS)
 		return -EIO;
 
-	if (req->cmd_flags & REQ_FLUSH)
+	if (req_op(req) == REQ_OP_FLUSH)
 		return tr->flush(dev);
 
 	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
 	    get_capacity(req->rq_disk))
 		return -EIO;
 
-	if (req->cmd_flags & REQ_DISCARD)
+	if (req_op(req) == REQ_OP_DISCARD)
 		return tr->discard(dev, block, nsect);
 
 	if (rq_data_dir(req) == READ) {
@@ -431,12 +431,10 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 		goto error4;
 	INIT_WORK(&new->work, mtd_blktrans_work);
 
-	gd->driverfs_dev = &new->mtd->dev;
-
 	if (new->readonly)
 		set_disk_ro(gd, 1);
 
-	add_disk(gd);
+	device_add_disk(&new->mtd->dev, gd);
 
 	if (new->disk_attributes) {
 		ret = sysfs_create_group(&disk_to_dev(gd)->kobj,
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 08e158895635..a136da8df6fe 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1657,8 +1657,11 @@ static int omap_get_dt_info(struct device *dev, struct omap_nand_info *info)
 
 	/* detect availability of ELM module. Won't be present pre-OMAP4 */
 	info->elm_of_node = of_parse_phandle(child, "ti,elm-id", 0);
-	if (!info->elm_of_node)
-		dev_dbg(dev, "ti,elm-id not in DT\n");
+	if (!info->elm_of_node) {
+		info->elm_of_node = of_parse_phandle(child, "elm_id", 0);
+		if (!info->elm_of_node)
+			dev_dbg(dev, "ti,elm-id not in DT\n");
+	}
 
 	/* select ecc-scheme for NAND */
 	if (of_property_read_string(child, "ti,nand-ecc-opt", &s)) {
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 5780dd1ba79d..ebf517271d29 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -575,6 +575,7 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
 	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
 	struct ubi_volume *vol = ubi->volumes[idx];
 	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 	if (!vid_hdr)
@@ -599,14 +600,8 @@ retry:
 		goto out_put;
 	}
 
-	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
-	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
-	if (err) {
-		up_read(&ubi->fm_eba_sem);
-		goto write_error;
-	}
+	ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC);
 
-	data_size = offset + len;
 	mutex_lock(&ubi->buf_mutex);
 	memset(ubi->peb_buf + offset, 0xFF, len);
 
@@ -621,6 +616,19 @@ retry:
 
 	memcpy(ubi->peb_buf + offset, buf, len);
 
+	data_size = offset + len;
+	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
+	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
+	vid_hdr->copy_flag = 1;
+	vid_hdr->data_size = cpu_to_be32(data_size);
+	vid_hdr->data_crc = cpu_to_be32(crc);
+	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+	if (err) {
+		mutex_unlock(&ubi->buf_mutex);
+		up_read(&ubi->fm_eba_sem);
+		goto write_error;
+	}
+
 	err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size);
 	if (err) {
 		mutex_unlock(&ubi->buf_mutex);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index b9304a295f86..edc70ffad660 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -101,11 +101,14 @@ enum ad_link_speed_type {
 #define MAC_ADDRESS_EQUAL(A, B)	\
 	ether_addr_equal_64bits((const u8 *)A, (const u8 *)B)
 
-static struct mac_addr null_mac_addr = { { 0, 0, 0, 0, 0, 0 } };
+static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
+	0, 0, 0, 0, 0, 0
+};
 static u16 ad_ticks_per_sec;
 static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
 
-static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
+static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned =
+	MULTICAST_LACPDU_ADDR;
 
 /* ================= main 802.3ad protocol functions ================== */
 static int ad_lacpdu_send(struct port *port);
@@ -657,6 +660,20 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
 	}
 }
 
+static int __agg_active_ports(struct aggregator *agg)
+{
+	struct port *port;
+	int active = 0;
+
+	for (port = agg->lag_ports; port;
+	     port = port->next_port_in_aggregator) {
+		if (port->is_enabled)
+			active++;
+	}
+
+	return active;
+}
+
 /**
  * __get_agg_bandwidth - get the total bandwidth of an aggregator
  * @aggregator: the aggregator we're looking at
@@ -664,39 +681,40 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
  */
 static u32 __get_agg_bandwidth(struct aggregator *aggregator)
 {
+	int nports = __agg_active_ports(aggregator);
 	u32 bandwidth = 0;
 
-	if (aggregator->num_of_ports) {
+	if (nports) {
 		switch (__get_link_speed(aggregator->lag_ports)) {
 		case AD_LINK_SPEED_1MBPS:
-			bandwidth = aggregator->num_of_ports;
+			bandwidth = nports;
 			break;
 		case AD_LINK_SPEED_10MBPS:
-			bandwidth = aggregator->num_of_ports * 10;
+			bandwidth = nports * 10;
 			break;
 		case AD_LINK_SPEED_100MBPS:
-			bandwidth = aggregator->num_of_ports * 100;
+			bandwidth = nports * 100;
 			break;
 		case AD_LINK_SPEED_1000MBPS:
-			bandwidth = aggregator->num_of_ports * 1000;
+			bandwidth = nports * 1000;
 			break;
 		case AD_LINK_SPEED_2500MBPS:
-			bandwidth = aggregator->num_of_ports * 2500;
+			bandwidth = nports * 2500;
 			break;
 		case AD_LINK_SPEED_10000MBPS:
-			bandwidth = aggregator->num_of_ports * 10000;
+			bandwidth = nports * 10000;
 			break;
 		case AD_LINK_SPEED_20000MBPS:
-			bandwidth = aggregator->num_of_ports * 20000;
+			bandwidth = nports * 20000;
 			break;
 		case AD_LINK_SPEED_40000MBPS:
-			bandwidth = aggregator->num_of_ports * 40000;
+			bandwidth = nports * 40000;
 			break;
 		case AD_LINK_SPEED_56000MBPS:
-			bandwidth = aggregator->num_of_ports * 56000;
+			bandwidth = nports * 56000;
 			break;
 		case AD_LINK_SPEED_100000MBPS:
-			bandwidth = aggregator->num_of_ports * 100000;
+			bandwidth = nports * 100000;
 			break;
 		default:
 			bandwidth = 0; /* to silence the compiler */
@@ -1530,10 +1548,10 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
 
 	switch (__get_agg_selection_mode(curr->lag_ports)) {
 	case BOND_AD_COUNT:
-		if (curr->num_of_ports > best->num_of_ports)
+		if (__agg_active_ports(curr) > __agg_active_ports(best))
 			return curr;
 
-		if (curr->num_of_ports < best->num_of_ports)
+		if (__agg_active_ports(curr) < __agg_active_ports(best))
 			return best;
 
 		/*FALLTHROUGH*/
@@ -1561,8 +1579,14 @@ static int agg_device_up(const struct aggregator *agg)
 	if (!port)
 		return 0;
 
-	return netif_running(port->slave->dev) &&
-	       netif_carrier_ok(port->slave->dev);
+	for (port = agg->lag_ports; port;
+	     port = port->next_port_in_aggregator) {
+		if (netif_running(port->slave->dev) &&
+		    netif_carrier_ok(port->slave->dev))
+			return 1;
+	}
+
+	return 0;
 }
 
 /**
@@ -1610,7 +1634,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 
 		agg->is_active = 0;
 
-		if (agg->num_of_ports && agg_device_up(agg))
+		if (__agg_active_ports(agg) && agg_device_up(agg))
 			best = ad_agg_selection_test(best, agg);
 	}
 
@@ -1622,7 +1646,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 		 * answering partner.
 		 */
 		if (active && active->lag_ports &&
-		    active->lag_ports->is_enabled &&
+		    __agg_active_ports(active) &&
 		    (__agg_has_partner(active) ||
 		     (!__agg_has_partner(active) &&
 		     !__agg_has_partner(best)))) {
@@ -1718,7 +1742,7 @@ static void ad_clear_agg(struct aggregator *aggregator)
 		aggregator->is_individual = false;
 		aggregator->actor_admin_aggregator_key = 0;
 		aggregator->actor_oper_aggregator_key = 0;
-		aggregator->partner_system = null_mac_addr;
+		eth_zero_addr(aggregator->partner_system.mac_addr_value);
 		aggregator->partner_system_priority = 0;
 		aggregator->partner_oper_aggregator_key = 0;
 		aggregator->receive_state = 0;
@@ -1740,7 +1764,7 @@ static void ad_initialize_agg(struct aggregator *aggregator)
 	if (aggregator) {
 		ad_clear_agg(aggregator);
 
-		aggregator->aggregator_mac_address = null_mac_addr;
+		eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value);
 		aggregator->aggregator_identifier = 0;
 		aggregator->slave = NULL;
 	}
@@ -2133,7 +2157,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 				else
 					temp_aggregator->lag_ports = temp_port->next_port_in_aggregator;
 				temp_aggregator->num_of_ports--;
-				if (temp_aggregator->num_of_ports == 0) {
+				if (__agg_active_ports(temp_aggregator) == 0) {
 					select_new_active_agg = temp_aggregator->is_active;
 					ad_clear_agg(temp_aggregator);
 					if (select_new_active_agg) {
@@ -2432,7 +2456,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave)
  */
 void bond_3ad_handle_link_change(struct slave *slave, char link)
 {
+	struct aggregator *agg;
 	struct port *port;
+	bool dummy;
 
 	port = &(SLAVE_AD_INFO(slave)->port);
 
@@ -2459,6 +2485,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 		port->is_enabled = false;
 		ad_update_actor_keys(port, true);
 	}
+	agg = __get_first_agg(port);
+	ad_agg_selection_logic(agg, &dummy);
+
 	netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n",
 		   port->actor_port_number,
 		   link == BOND_LINK_UP ? "UP" : "DOWN");
@@ -2499,7 +2528,7 @@ int bond_3ad_set_carrier(struct bonding *bond)
 	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));
 	if (active) {
 		/* are enough slaves available to consider link up? */
-		if (active->num_of_ports < bond->params.min_links) {
+		if (__agg_active_ports(active) < bond->params.min_links) {
 			if (netif_carrier_ok(bond->dev)) {
 				netif_carrier_off(bond->dev);
 				goto out;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c5ac160a8ae9..551f0f8dead3 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -42,13 +42,10 @@
 
 
 
-#ifndef __long_aligned
-#define __long_aligned __attribute__((aligned((sizeof(long)))))
-#endif
-static const u8 mac_bcast[ETH_ALEN] __long_aligned = {
+static const u8 mac_bcast[ETH_ALEN + 2] __long_aligned = {
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
-static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = {
+static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
 };
 static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 941ec99cd3b6..1f276fa30ba6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1422,7 +1422,16 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		return -EINVAL;
 	}
 
-	if (slave_ops->ndo_set_mac_address == NULL) {
+	if (slave_dev->type == ARPHRD_INFINIBAND &&
+	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+		netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n",
+			    slave_dev->type);
+		res = -EOPNOTSUPP;
+		goto err_undo_flags;
+	}
+
+	if (!slave_ops->ndo_set_mac_address ||
+	    slave_dev->type == ARPHRD_INFINIBAND) {
 		netdev_warn(bond_dev, "The slave device specified does not support setting the MAC address\n");
 		if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
 		    bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
@@ -1584,6 +1593,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	/* check for initial state */
+	new_slave->link = BOND_LINK_NOCHANGE;
 	if (bond->params.miimon) {
 		if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
 			if (bond->params.updelay) {
@@ -4137,6 +4147,8 @@ static const struct net_device_ops bond_netdev_ops = {
 	.ndo_add_slave		= bond_enslave,
 	.ndo_del_slave		= bond_release,
 	.ndo_fix_features	= bond_fix_features,
+	.ndo_neigh_construct	= netdev_default_l2upper_neigh_construct,
+	.ndo_neigh_destroy	= netdev_default_l2upper_neigh_destroy,
 	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
 	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
 	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
@@ -4607,26 +4619,6 @@ static int bond_check_params(struct bond_params *params)
 	return 0;
 }
 
-static struct lock_class_key bonding_netdev_xmit_lock_key;
-static struct lock_class_key bonding_netdev_addr_lock_key;
-static struct lock_class_key bonding_tx_busylock_key;
-
-static void bond_set_lockdep_class_one(struct net_device *dev,
-				       struct netdev_queue *txq,
-				       void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock,
-			  &bonding_netdev_xmit_lock_key);
-}
-
-static void bond_set_lockdep_class(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock,
-			  &bonding_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL);
-	dev->qdisc_tx_busylock = &bonding_tx_busylock_key;
-}
-
 /* Called from registration process */
 static int bond_init(struct net_device *bond_dev)
 {
@@ -4639,7 +4631,7 @@ static int bond_init(struct net_device *bond_dev)
 	if (!bond->wq)
 		return -ENOMEM;
 
-	bond_set_lockdep_class(bond_dev);
+	netdev_lockdep_set_classes(bond_dev);
 
 	list_add_tail(&bond->bond_list, &bn->dev_list);
 
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index db760e84119f..b8df0f5e8c25 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev,
 	if (err < 0)
 		return err;
 
-	return register_netdevice(bond_dev);
+	err = register_netdevice(bond_dev);
+
+	netif_carrier_off(bond_dev);
+
+	return err;
 }
 
 static size_t bond_get_size(const struct net_device *bond_dev)
diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index 615c65da39be..ddabce759456 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -1201,7 +1201,7 @@ static int cfhsi_open(struct net_device *ndev)
 	clear_bit(CFHSI_AWAKE, &cfhsi->bits);
 
 	/* Create work thread. */
-	cfhsi->wq = create_singlethread_workqueue(cfhsi->ndev->name);
+	cfhsi->wq = alloc_ordered_workqueue(cfhsi->ndev->name, WQ_MEM_RECLAIM);
 	if (!cfhsi->wq) {
 		netdev_err(cfhsi->ndev, "%s: Failed to create work queue.\n",
 			__func__);
@@ -1267,9 +1267,6 @@ static int cfhsi_close(struct net_device *ndev)
 	/* going to shutdown driver */
 	set_bit(CFHSI_SHUTDOWN, &cfhsi->bits);
 
-	/* Flush workqueue */
-	flush_workqueue(cfhsi->wq);
-
 	/* Delete timers if pending */
 	del_timer_sync(&cfhsi->inactivity_timer);
 	del_timer_sync(&cfhsi->rx_slowpath_timer);
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 0d40aef928e2..22570ea3a8d2 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -104,16 +104,6 @@ config CAN_JANZ_ICAN3
 	  This driver can also be built as a module. If so, the module will be
 	  called janz-ican3.ko.
 
-config CAN_RCAR
-	tristate "Renesas R-Car CAN controller"
-	depends on ARCH_RENESAS || ARM
-	---help---
-	  Say Y here if you want to use CAN controller found on Renesas R-Car
-	  SoCs.
-
-	  To compile this driver as a module, choose M here: the module will
-	  be called rcar_can.
-
 config CAN_SUN4I
 	tristate "Allwinner A10 CAN controller"
 	depends on MACH_SUN4I || MACH_SUN7I || COMPILE_TEST
@@ -152,6 +142,7 @@ source "drivers/net/can/cc770/Kconfig"
 source "drivers/net/can/ifi_canfd/Kconfig"
 source "drivers/net/can/m_can/Kconfig"
 source "drivers/net/can/mscan/Kconfig"
+source "drivers/net/can/rcar/Kconfig"
 source "drivers/net/can/sja1000/Kconfig"
 source "drivers/net/can/softing/Kconfig"
 source "drivers/net/can/spi/Kconfig"
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index e3db0c807f55..26ba4b794a0b 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -10,6 +10,7 @@ can-dev-y			:= dev.o
 
 can-dev-$(CONFIG_CAN_LEDS)	+= led.o
 
+obj-y				+= rcar/
 obj-y				+= spi/
 obj-y				+= usb/
 obj-y				+= softing/
@@ -24,7 +25,6 @@ obj-$(CONFIG_CAN_IFI_CANFD)	+= ifi_canfd/
 obj-$(CONFIG_CAN_JANZ_ICAN3)	+= janz-ican3.o
 obj-$(CONFIG_CAN_MSCAN)		+= mscan/
 obj-$(CONFIG_CAN_M_CAN)		+= m_can/
-obj-$(CONFIG_CAN_RCAR)		+= rcar_can.o
 obj-$(CONFIG_CAN_SJA1000)	+= sja1000/
 obj-$(CONFIG_CAN_SUN4I)		+= sun4i_can.o
 obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 8b3275d7792a..8f5e93cb7975 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -712,9 +712,10 @@ static int at91_poll_rx(struct net_device *dev, int quota)
 
 	/* upper group completed, look again in lower */
 	if (priv->rx_next > get_mb_rx_low_last(priv) &&
-	    quota > 0 && mb > get_mb_rx_last(priv)) {
+	    mb > get_mb_rx_last(priv)) {
 		priv->rx_next = get_mb_rx_first(priv);
-		goto again;
+		if (quota > 0)
+			goto again;
 	}
 
 	return received;
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index f91b094288da..e3dccd3200d5 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -332,9 +332,23 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface,
 
 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl);
 
-	for (i = 0; i < frame->can_dlc; i += 2) {
-		priv->write_reg(priv, C_CAN_IFACE(DATA1_REG, iface) + i / 2,
-				frame->data[i] | (frame->data[i + 1] << 8));
+	if (priv->type == BOSCH_D_CAN) {
+		u32 data = 0, dreg = C_CAN_IFACE(DATA1_REG, iface);
+
+		for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
+			data = (u32)frame->data[i];
+			data |= (u32)frame->data[i + 1] << 8;
+			data |= (u32)frame->data[i + 2] << 16;
+			data |= (u32)frame->data[i + 3] << 24;
+			priv->write_reg32(priv, dreg, data);
+		}
+	} else {
+		for (i = 0; i < frame->can_dlc; i += 2) {
+			priv->write_reg(priv,
+					C_CAN_IFACE(DATA1_REG, iface) + i / 2,
+					frame->data[i] |
+					(frame->data[i + 1] << 8));
+		}
 	}
 }
 
@@ -402,10 +416,20 @@ static int c_can_read_msg_object(struct net_device *dev, int iface, u32 ctrl)
 	} else {
 		int i, dreg = C_CAN_IFACE(DATA1_REG, iface);
 
-		for (i = 0; i < frame->can_dlc; i += 2, dreg ++) {
-			data = priv->read_reg(priv, dreg);
-			frame->data[i] = data;
-			frame->data[i + 1] = data >> 8;
+		if (priv->type == BOSCH_D_CAN) {
+			for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
+				data = priv->read_reg32(priv, dreg);
+				frame->data[i] = data;
+				frame->data[i + 1] = data >> 8;
+				frame->data[i + 2] = data >> 16;
+				frame->data[i + 3] = data >> 24;
+			}
+		} else {
+			for (i = 0; i < frame->can_dlc; i += 2, dreg++) {
+				data = priv->read_reg(priv, dreg);
+				frame->data[i] = data;
+				frame->data[i + 1] = data >> 8;
+			}
 		}
 	}
 
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 910c12e2638e..e21f7cc5ae4d 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -69,6 +69,7 @@ EXPORT_SYMBOL_GPL(can_len2dlc);
 
 #ifdef CONFIG_CAN_CALC_BITTIMING
 #define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */
+#define CAN_CALC_SYNC_SEG 1
 
 /*
  * Bit-timing calculation derived from:
@@ -83,98 +84,126 @@ EXPORT_SYMBOL_GPL(can_len2dlc);
  * registers of the CAN controller. You can find more information
  * in the header file linux/can/netlink.h.
  */
-static int can_update_spt(const struct can_bittiming_const *btc,
-			  int sampl_pt, int tseg, int *tseg1, int *tseg2)
+static int can_update_sample_point(const struct can_bittiming_const *btc,
+			  unsigned int sample_point_nominal, unsigned int tseg,
+			  unsigned int *tseg1_ptr, unsigned int *tseg2_ptr,
+			  unsigned int *sample_point_error_ptr)
 {
-	*tseg2 = tseg + 1 - (sampl_pt * (tseg + 1)) / 1000;
-	if (*tseg2 < btc->tseg2_min)
-		*tseg2 = btc->tseg2_min;
-	if (*tseg2 > btc->tseg2_max)
-		*tseg2 = btc->tseg2_max;
-	*tseg1 = tseg - *tseg2;
-	if (*tseg1 > btc->tseg1_max) {
-		*tseg1 = btc->tseg1_max;
-		*tseg2 = tseg - *tseg1;
+	unsigned int sample_point_error, best_sample_point_error = UINT_MAX;
+	unsigned int sample_point, best_sample_point = 0;
+	unsigned int tseg1, tseg2;
+	int i;
+
+	for (i = 0; i <= 1; i++) {
+		tseg2 = tseg + CAN_CALC_SYNC_SEG - (sample_point_nominal * (tseg + CAN_CALC_SYNC_SEG)) / 1000 - i;
+		tseg2 = clamp(tseg2, btc->tseg2_min, btc->tseg2_max);
+		tseg1 = tseg - tseg2;
+		if (tseg1 > btc->tseg1_max) {
+			tseg1 = btc->tseg1_max;
+			tseg2 = tseg - tseg1;
+		}
+
+		sample_point = 1000 * (tseg + CAN_CALC_SYNC_SEG - tseg2) / (tseg + CAN_CALC_SYNC_SEG);
+		sample_point_error = abs(sample_point_nominal - sample_point);
+
+		if ((sample_point <= sample_point_nominal) && (sample_point_error < best_sample_point_error)) {
+			best_sample_point = sample_point;
+			best_sample_point_error = sample_point_error;
+			*tseg1_ptr = tseg1;
+			*tseg2_ptr = tseg2;
+		}
 	}
-	return 1000 * (tseg + 1 - *tseg2) / (tseg + 1);
+
+	if (sample_point_error_ptr)
+		*sample_point_error_ptr = best_sample_point_error;
+
+	return best_sample_point;
 }
 
 static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 			      const struct can_bittiming_const *btc)
 {
 	struct can_priv *priv = netdev_priv(dev);
-	long best_error = 1000000000, error = 0;
-	int best_tseg = 0, best_brp = 0, brp = 0;
-	int tsegall, tseg = 0, tseg1 = 0, tseg2 = 0;
-	int spt_error = 1000, spt = 0, sampl_pt;
-	long rate;
+	unsigned int bitrate;			/* current bitrate */
+	unsigned int bitrate_error;		/* difference between current and nominal value */
+	unsigned int best_bitrate_error = UINT_MAX;
+	unsigned int sample_point_error;	/* difference between current and nominal value */
+	unsigned int best_sample_point_error = UINT_MAX;
+	unsigned int sample_point_nominal;	/* nominal sample point */
+	unsigned int best_tseg = 0;		/* current best value for tseg */
+	unsigned int best_brp = 0;		/* current best value for brp */
+	unsigned int brp, tsegall, tseg, tseg1 = 0, tseg2 = 0;
 	u64 v64;
 
 	/* Use CiA recommended sample points */
 	if (bt->sample_point) {
-		sampl_pt = bt->sample_point;
+		sample_point_nominal = bt->sample_point;
 	} else {
 		if (bt->bitrate > 800000)
-			sampl_pt = 750;
+			sample_point_nominal = 750;
 		else if (bt->bitrate > 500000)
-			sampl_pt = 800;
+			sample_point_nominal = 800;
 		else
-			sampl_pt = 875;
+			sample_point_nominal = 875;
 	}
 
 	/* tseg even = round down, odd = round up */
 	for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1;
 	     tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) {
-		tsegall = 1 + tseg / 2;
+		tsegall = CAN_CALC_SYNC_SEG + tseg / 2;
+
 		/* Compute all possible tseg choices (tseg=tseg1+tseg2) */
 		brp = priv->clock.freq / (tsegall * bt->bitrate) + tseg % 2;
-		/* chose brp step which is possible in system */
+
+		/* choose brp step which is possible in system */
 		brp = (brp / btc->brp_inc) * btc->brp_inc;
 		if ((brp < btc->brp_min) || (brp > btc->brp_max))
 			continue;
-		rate = priv->clock.freq / (brp * tsegall);
-		error = bt->bitrate - rate;
+
+		bitrate = priv->clock.freq / (brp * tsegall);
+		bitrate_error = abs(bt->bitrate - bitrate);
+
 		/* tseg brp biterror */
-		if (error < 0)
-			error = -error;
-		if (error > best_error)
+		if (bitrate_error > best_bitrate_error)
 			continue;
-		best_error = error;
-		if (error == 0) {
-			spt = can_update_spt(btc, sampl_pt, tseg / 2,
-					     &tseg1, &tseg2);
-			error = sampl_pt - spt;
-			if (error < 0)
-				error = -error;
-			if (error > spt_error)
-				continue;
-			spt_error = error;
-		}
+
+		/* reset sample point error if we have a better bitrate */
+		if (bitrate_error < best_bitrate_error)
+			best_sample_point_error = UINT_MAX;
+
+		can_update_sample_point(btc, sample_point_nominal, tseg / 2, &tseg1, &tseg2, &sample_point_error);
+		if (sample_point_error > best_sample_point_error)
+			continue;
+
+		best_sample_point_error = sample_point_error;
+		best_bitrate_error = bitrate_error;
 		best_tseg = tseg / 2;
 		best_brp = brp;
-		if (error == 0)
+
+		if (bitrate_error == 0 && sample_point_error == 0)
 			break;
 	}
 
-	if (best_error) {
+	if (best_bitrate_error) {
 		/* Error in one-tenth of a percent */
-		error = (best_error * 1000) / bt->bitrate;
-		if (error > CAN_CALC_MAX_ERROR) {
+		v64 = (u64)best_bitrate_error * 1000;
+		do_div(v64, bt->bitrate);
+		bitrate_error = (u32)v64;
+		if (bitrate_error > CAN_CALC_MAX_ERROR) {
 			netdev_err(dev,
-				   "bitrate error %ld.%ld%% too high\n",
-				   error / 10, error % 10);
+				   "bitrate error %d.%d%% too high\n",
+				   bitrate_error / 10, bitrate_error % 10);
 			return -EDOM;
-		} else {
-			netdev_warn(dev, "bitrate error %ld.%ld%%\n",
-				    error / 10, error % 10);
 		}
+		netdev_warn(dev, "bitrate error %d.%d%%\n",
+			    bitrate_error / 10, bitrate_error % 10);
 	}
 
 	/* real sample point */
-	bt->sample_point = can_update_spt(btc, sampl_pt, best_tseg,
-					  &tseg1, &tseg2);
+	bt->sample_point = can_update_sample_point(btc, sample_point_nominal, best_tseg,
+					  &tseg1, &tseg2, NULL);
 
-	v64 = (u64)best_brp * 1000000000UL;
+	v64 = (u64)best_brp * 1000 * 1000 * 1000;
 	do_div(v64, priv->clock.freq);
 	bt->tq = (u32)v64;
 	bt->prop_seg = tseg1 / 2;
@@ -182,9 +211,9 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	bt->phase_seg2 = tseg2;
 
 	/* check for sjw user settings */
-	if (!bt->sjw || !btc->sjw_max)
+	if (!bt->sjw || !btc->sjw_max) {
 		bt->sjw = 1;
-	else {
+	} else {
 		/* bt->sjw is at least 1 -> sanitize upper bound to sjw_max */
 		if (bt->sjw > btc->sjw_max)
 			bt->sjw = btc->sjw_max;
@@ -194,8 +223,9 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	}
 
 	bt->brp = best_brp;
-	/* real bit-rate */
-	bt->bitrate = priv->clock.freq / (bt->brp * (tseg1 + tseg2 + 1));
+
+	/* real bitrate */
+	bt->bitrate = priv->clock.freq / (bt->brp * (CAN_CALC_SYNC_SEG + tseg1 + tseg2));
 
 	return 0;
 }
@@ -798,6 +828,9 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[])
 	 * - control mode with CAN_CTRLMODE_FD set
 	 */
 
+	if (!data)
+		return 0;
+
 	if (data[IFLA_CAN_CTRLMODE]) {
 		struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]);
 
@@ -1008,6 +1041,11 @@ static int can_newlink(struct net *src_net, struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
+static void can_dellink(struct net_device *dev, struct list_head *head)
+{
+	return;
+}
+
 static struct rtnl_link_ops can_link_ops __read_mostly = {
 	.kind		= "can",
 	.maxtype	= IFLA_CAN_MAX,
@@ -1016,6 +1054,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = {
 	.validate	= can_validate,
 	.newlink	= can_newlink,
 	.changelink	= can_changelink,
+	.dellink	= can_dellink,
 	.get_size	= can_get_size,
 	.fill_info	= can_fill_info,
 	.get_xstats_size = can_get_xstats_size,
diff --git a/drivers/net/can/rcar/Kconfig b/drivers/net/can/rcar/Kconfig
new file mode 100644
index 000000000000..7b03a3a37db7
--- /dev/null
+++ b/drivers/net/can/rcar/Kconfig
@@ -0,0 +1,21 @@
+config CAN_RCAR
+	tristate "Renesas R-Car CAN controller"
+	depends on ARCH_RENESAS || ARM
+	---help---
+	  Say Y here if you want to use CAN controller found on Renesas R-Car
+	  SoCs.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called rcar_can.
+
+config CAN_RCAR_CANFD
+	tristate "Renesas R-Car CAN FD controller"
+	depends on ARCH_RENESAS || ARM
+	---help---
+	  Say Y here if you want to use CAN FD controller found on
+	  Renesas R-Car SoCs. The driver puts the controller in CAN FD only
+	  mode, which can interoperate with CAN2.0 nodes but does not support
+	  dedicated CAN 2.0 mode.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called rcar_canfd.
diff --git a/drivers/net/can/rcar/Makefile b/drivers/net/can/rcar/Makefile
new file mode 100644
index 000000000000..08de36a4cfcc
--- /dev/null
+++ b/drivers/net/can/rcar/Makefile
@@ -0,0 +1,6 @@
+#
+#  Makefile for the Renesas R-Car CAN & CAN FD controller drivers
+#
+
+obj-$(CONFIG_CAN_RCAR)		+= rcar_can.o
+obj-$(CONFIG_CAN_RCAR_CANFD)	+= rcar_canfd.o
diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar/rcar_can.c
similarity index 100%
rename from drivers/net/can/rcar_can.c
rename to drivers/net/can/rcar/rcar_can.c
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
new file mode 100644
index 000000000000..43cdd5544b0c
--- /dev/null
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -0,0 +1,1858 @@
+/* Renesas R-Car CAN FD device driver
+ *
+ * Copyright (C) 2015 Renesas Electronics Corp.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/* The R-Car CAN FD controller can operate in either one of the below two modes
+ *  - CAN FD only mode
+ *  - Classical CAN (CAN 2.0) only mode
+ *
+ * This driver puts the controller in CAN FD only mode by default. In this
+ * mode, the controller acts as a CAN FD node that can also interoperate with
+ * CAN 2.0 nodes.
+ *
+ * To switch the controller to Classical CAN (CAN 2.0) only mode, add
+ * "renesas,no-can-fd" optional property to the device tree node. A h/w reset is
+ * also required to switch modes.
+ *
+ * Note: The h/w manual register naming convention is clumsy and not acceptable
+ * to use as it is in the driver. However, those names are added as comments
+ * wherever it is modified to a readable name.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/can/led.h>
+#include <linux/can/dev.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/iopoll.h>
+
+#define RCANFD_DRV_NAME			"rcar_canfd"
+
+/* Global register bits */
+
+/* RSCFDnCFDGRMCFG */
+#define RCANFD_GRMCFG_RCMC		BIT(0)
+
+/* RSCFDnCFDGCFG / RSCFDnGCFG */
+#define RCANFD_GCFG_EEFE		BIT(6)
+#define RCANFD_GCFG_CMPOC		BIT(5)	/* CAN FD only */
+#define RCANFD_GCFG_DCS			BIT(4)
+#define RCANFD_GCFG_DCE			BIT(1)
+#define RCANFD_GCFG_TPRI		BIT(0)
+
+/* RSCFDnCFDGCTR / RSCFDnGCTR */
+#define RCANFD_GCTR_TSRST		BIT(16)
+#define RCANFD_GCTR_CFMPOFIE		BIT(11)	/* CAN FD only */
+#define RCANFD_GCTR_THLEIE		BIT(10)
+#define RCANFD_GCTR_MEIE		BIT(9)
+#define RCANFD_GCTR_DEIE		BIT(8)
+#define RCANFD_GCTR_GSLPR		BIT(2)
+#define RCANFD_GCTR_GMDC_MASK		(0x3)
+#define RCANFD_GCTR_GMDC_GOPM		(0x0)
+#define RCANFD_GCTR_GMDC_GRESET		(0x1)
+#define RCANFD_GCTR_GMDC_GTEST		(0x2)
+
+/* RSCFDnCFDGSTS / RSCFDnGSTS */
+#define RCANFD_GSTS_GRAMINIT		BIT(3)
+#define RCANFD_GSTS_GSLPSTS		BIT(2)
+#define RCANFD_GSTS_GHLTSTS		BIT(1)
+#define RCANFD_GSTS_GRSTSTS		BIT(0)
+/* Non-operational status */
+#define RCANFD_GSTS_GNOPM		(BIT(0) | BIT(1) | BIT(2) | BIT(3))
+
+/* RSCFDnCFDGERFL / RSCFDnGERFL */
+#define RCANFD_GERFL_EEF1		BIT(17)
+#define RCANFD_GERFL_EEF0		BIT(16)
+#define RCANFD_GERFL_CMPOF		BIT(3)	/* CAN FD only */
+#define RCANFD_GERFL_THLES		BIT(2)
+#define RCANFD_GERFL_MES		BIT(1)
+#define RCANFD_GERFL_DEF		BIT(0)
+
+#define RCANFD_GERFL_ERR(gpriv, x)	((x) & (RCANFD_GERFL_EEF1 |\
+					RCANFD_GERFL_EEF0 | RCANFD_GERFL_MES |\
+					(gpriv->fdmode ?\
+					 RCANFD_GERFL_CMPOF : 0)))
+
+/* AFL Rx rules registers */
+
+/* RSCFDnCFDGAFLCFG0 / RSCFDnGAFLCFG0 */
+#define RCANFD_GAFLCFG_SETRNC(n, x)	(((x) & 0xff) << (24 - n * 8))
+#define RCANFD_GAFLCFG_GETRNC(n, x)	(((x) >> (24 - n * 8)) & 0xff)
+
+/* RSCFDnCFDGAFLECTR / RSCFDnGAFLECTR */
+#define RCANFD_GAFLECTR_AFLDAE		BIT(8)
+#define RCANFD_GAFLECTR_AFLPN(x)	((x) & 0x1f)
+
+/* RSCFDnCFDGAFLIDj / RSCFDnGAFLIDj */
+#define RCANFD_GAFLID_GAFLLB		BIT(29)
+
+/* RSCFDnCFDGAFLP1_j / RSCFDnGAFLP1_j */
+#define RCANFD_GAFLP1_GAFLFDP(x)	(1 << (x))
+
+/* Channel register bits */
+
+/* RSCFDnCmCFG - Classical CAN only */
+#define RCANFD_CFG_SJW(x)		(((x) & 0x3) << 24)
+#define RCANFD_CFG_TSEG2(x)		(((x) & 0x7) << 20)
+#define RCANFD_CFG_TSEG1(x)		(((x) & 0xf) << 16)
+#define RCANFD_CFG_BRP(x)		(((x) & 0x3ff) << 0)
+
+/* RSCFDnCFDCmNCFG - CAN FD only */
+#define RCANFD_NCFG_NTSEG2(x)		(((x) & 0x1f) << 24)
+#define RCANFD_NCFG_NTSEG1(x)		(((x) & 0x7f) << 16)
+#define RCANFD_NCFG_NSJW(x)		(((x) & 0x1f) << 11)
+#define RCANFD_NCFG_NBRP(x)		(((x) & 0x3ff) << 0)
+
+/* RSCFDnCFDCmCTR / RSCFDnCmCTR */
+#define RCANFD_CCTR_CTME		BIT(24)
+#define RCANFD_CCTR_ERRD		BIT(23)
+#define RCANFD_CCTR_BOM_MASK		(0x3 << 21)
+#define RCANFD_CCTR_BOM_ISO		(0x0 << 21)
+#define RCANFD_CCTR_BOM_BENTRY		(0x1 << 21)
+#define RCANFD_CCTR_BOM_BEND		(0x2 << 21)
+#define RCANFD_CCTR_TDCVFIE		BIT(19)
+#define RCANFD_CCTR_SOCOIE		BIT(18)
+#define RCANFD_CCTR_EOCOIE		BIT(17)
+#define RCANFD_CCTR_TAIE		BIT(16)
+#define RCANFD_CCTR_ALIE		BIT(15)
+#define RCANFD_CCTR_BLIE		BIT(14)
+#define RCANFD_CCTR_OLIE		BIT(13)
+#define RCANFD_CCTR_BORIE		BIT(12)
+#define RCANFD_CCTR_BOEIE		BIT(11)
+#define RCANFD_CCTR_EPIE		BIT(10)
+#define RCANFD_CCTR_EWIE		BIT(9)
+#define RCANFD_CCTR_BEIE		BIT(8)
+#define RCANFD_CCTR_CSLPR		BIT(2)
+#define RCANFD_CCTR_CHMDC_MASK		(0x3)
+#define RCANFD_CCTR_CHDMC_COPM		(0x0)
+#define RCANFD_CCTR_CHDMC_CRESET	(0x1)
+#define RCANFD_CCTR_CHDMC_CHLT		(0x2)
+
+/* RSCFDnCFDCmSTS / RSCFDnCmSTS */
+#define RCANFD_CSTS_COMSTS		BIT(7)
+#define RCANFD_CSTS_RECSTS		BIT(6)
+#define RCANFD_CSTS_TRMSTS		BIT(5)
+#define RCANFD_CSTS_BOSTS		BIT(4)
+#define RCANFD_CSTS_EPSTS		BIT(3)
+#define RCANFD_CSTS_SLPSTS		BIT(2)
+#define RCANFD_CSTS_HLTSTS		BIT(1)
+#define RCANFD_CSTS_CRSTSTS		BIT(0)
+
+#define RCANFD_CSTS_TECCNT(x)		(((x) >> 24) & 0xff)
+#define RCANFD_CSTS_RECCNT(x)		(((x) >> 16) & 0xff)
+
+/* RSCFDnCFDCmERFL / RSCFDnCmERFL */
+#define RCANFD_CERFL_ADERR		BIT(14)
+#define RCANFD_CERFL_B0ERR		BIT(13)
+#define RCANFD_CERFL_B1ERR		BIT(12)
+#define RCANFD_CERFL_CERR		BIT(11)
+#define RCANFD_CERFL_AERR		BIT(10)
+#define RCANFD_CERFL_FERR		BIT(9)
+#define RCANFD_CERFL_SERR		BIT(8)
+#define RCANFD_CERFL_ALF		BIT(7)
+#define RCANFD_CERFL_BLF		BIT(6)
+#define RCANFD_CERFL_OVLF		BIT(5)
+#define RCANFD_CERFL_BORF		BIT(4)
+#define RCANFD_CERFL_BOEF		BIT(3)
+#define RCANFD_CERFL_EPF		BIT(2)
+#define RCANFD_CERFL_EWF		BIT(1)
+#define RCANFD_CERFL_BEF		BIT(0)
+
+#define RCANFD_CERFL_ERR(x)		((x) & (0x7fff)) /* above bits 14:0 */
+
+/* RSCFDnCFDCmDCFG */
+#define RCANFD_DCFG_DSJW(x)		(((x) & 0x7) << 24)
+#define RCANFD_DCFG_DTSEG2(x)		(((x) & 0x7) << 20)
+#define RCANFD_DCFG_DTSEG1(x)		(((x) & 0xf) << 16)
+#define RCANFD_DCFG_DBRP(x)		(((x) & 0xff) << 0)
+
+/* RSCFDnCFDCmFDCFG */
+#define RCANFD_FDCFG_TDCE		BIT(9)
+#define RCANFD_FDCFG_TDCOC		BIT(8)
+#define RCANFD_FDCFG_TDCO(x)		(((x) & 0x7f) >> 16)
+
+/* RSCFDnCFDRFCCx */
+#define RCANFD_RFCC_RFIM		BIT(12)
+#define RCANFD_RFCC_RFDC(x)		(((x) & 0x7) << 8)
+#define RCANFD_RFCC_RFPLS(x)		(((x) & 0x7) << 4)
+#define RCANFD_RFCC_RFIE		BIT(1)
+#define RCANFD_RFCC_RFE			BIT(0)
+
+/* RSCFDnCFDRFSTSx */
+#define RCANFD_RFSTS_RFIF		BIT(3)
+#define RCANFD_RFSTS_RFMLT		BIT(2)
+#define RCANFD_RFSTS_RFFLL		BIT(1)
+#define RCANFD_RFSTS_RFEMP		BIT(0)
+
+/* RSCFDnCFDRFIDx */
+#define RCANFD_RFID_RFIDE		BIT(31)
+#define RCANFD_RFID_RFRTR		BIT(30)
+
+/* RSCFDnCFDRFPTRx */
+#define RCANFD_RFPTR_RFDLC(x)		(((x) >> 28) & 0xf)
+#define RCANFD_RFPTR_RFPTR(x)		(((x) >> 16) & 0xfff)
+#define RCANFD_RFPTR_RFTS(x)		(((x) >> 0) & 0xffff)
+
+/* RSCFDnCFDRFFDSTSx */
+#define RCANFD_RFFDSTS_RFFDF		BIT(2)
+#define RCANFD_RFFDSTS_RFBRS		BIT(1)
+#define RCANFD_RFFDSTS_RFESI		BIT(0)
+
+/* Common FIFO bits */
+
+/* RSCFDnCFDCFCCk */
+#define RCANFD_CFCC_CFTML(x)		(((x) & 0xf) << 20)
+#define RCANFD_CFCC_CFM(x)		(((x) & 0x3) << 16)
+#define RCANFD_CFCC_CFIM		BIT(12)
+#define RCANFD_CFCC_CFDC(x)		(((x) & 0x7) << 8)
+#define RCANFD_CFCC_CFPLS(x)		(((x) & 0x7) << 4)
+#define RCANFD_CFCC_CFTXIE		BIT(2)
+#define RCANFD_CFCC_CFE			BIT(0)
+
+/* RSCFDnCFDCFSTSk */
+#define RCANFD_CFSTS_CFMC(x)		(((x) >> 8) & 0xff)
+#define RCANFD_CFSTS_CFTXIF		BIT(4)
+#define RCANFD_CFSTS_CFMLT		BIT(2)
+#define RCANFD_CFSTS_CFFLL		BIT(1)
+#define RCANFD_CFSTS_CFEMP		BIT(0)
+
+/* RSCFDnCFDCFIDk */
+#define RCANFD_CFID_CFIDE		BIT(31)
+#define RCANFD_CFID_CFRTR		BIT(30)
+#define RCANFD_CFID_CFID_MASK(x)	((x) & 0x1fffffff)
+
+/* RSCFDnCFDCFPTRk */
+#define RCANFD_CFPTR_CFDLC(x)		(((x) & 0xf) << 28)
+#define RCANFD_CFPTR_CFPTR(x)		(((x) & 0xfff) << 16)
+#define RCANFD_CFPTR_CFTS(x)		(((x) & 0xff) << 0)
+
+/* RSCFDnCFDCFFDCSTSk */
+#define RCANFD_CFFDCSTS_CFFDF		BIT(2)
+#define RCANFD_CFFDCSTS_CFBRS		BIT(1)
+#define RCANFD_CFFDCSTS_CFESI		BIT(0)
+
+/* This controller supports either Classical CAN only mode or CAN FD only mode.
+ * These modes are supported in two separate set of register maps & names.
+ * However, some of the register offsets are common for both modes. Those
+ * offsets are listed below as Common registers.
+ *
+ * The CAN FD only mode specific registers & Classical CAN only mode specific
+ * registers are listed separately. Their register names starts with
+ * RCANFD_F_xxx & RCANFD_C_xxx respectively.
+ */
+
+/* Common registers */
+
+/* RSCFDnCFDCmNCFG / RSCFDnCmCFG */
+#define RCANFD_CCFG(m)			(0x0000 + (0x10 * (m)))
+/* RSCFDnCFDCmCTR / RSCFDnCmCTR */
+#define RCANFD_CCTR(m)			(0x0004 + (0x10 * (m)))
+/* RSCFDnCFDCmSTS / RSCFDnCmSTS */
+#define RCANFD_CSTS(m)			(0x0008 + (0x10 * (m)))
+/* RSCFDnCFDCmERFL / RSCFDnCmERFL */
+#define RCANFD_CERFL(m)			(0x000C + (0x10 * (m)))
+
+/* RSCFDnCFDGCFG / RSCFDnGCFG */
+#define RCANFD_GCFG			(0x0084)
+/* RSCFDnCFDGCTR / RSCFDnGCTR */
+#define RCANFD_GCTR			(0x0088)
+/* RSCFDnCFDGCTS / RSCFDnGCTS */
+#define RCANFD_GSTS			(0x008c)
+/* RSCFDnCFDGERFL / RSCFDnGERFL */
+#define RCANFD_GERFL			(0x0090)
+/* RSCFDnCFDGTSC / RSCFDnGTSC */
+#define RCANFD_GTSC			(0x0094)
+/* RSCFDnCFDGAFLECTR / RSCFDnGAFLECTR */
+#define RCANFD_GAFLECTR			(0x0098)
+/* RSCFDnCFDGAFLCFG0 / RSCFDnGAFLCFG0 */
+#define RCANFD_GAFLCFG0			(0x009c)
+/* RSCFDnCFDGAFLCFG1 / RSCFDnGAFLCFG1 */
+#define RCANFD_GAFLCFG1			(0x00a0)
+/* RSCFDnCFDRMNB / RSCFDnRMNB */
+#define RCANFD_RMNB			(0x00a4)
+/* RSCFDnCFDRMND / RSCFDnRMND */
+#define RCANFD_RMND(y)			(0x00a8 + (0x04 * (y)))
+
+/* RSCFDnCFDRFCCx / RSCFDnRFCCx */
+#define RCANFD_RFCC(x)			(0x00b8 + (0x04 * (x)))
+/* RSCFDnCFDRFSTSx / RSCFDnRFSTSx */
+#define RCANFD_RFSTS(x)			(0x00d8 + (0x04 * (x)))
+/* RSCFDnCFDRFPCTRx / RSCFDnRFPCTRx */
+#define RCANFD_RFPCTR(x)		(0x00f8 + (0x04 * (x)))
+
+/* Common FIFO Control registers */
+
+/* RSCFDnCFDCFCCx / RSCFDnCFCCx */
+#define RCANFD_CFCC(ch, idx)		(0x0118 + (0x0c * (ch)) + \
+					 (0x04 * (idx)))
+/* RSCFDnCFDCFSTSx / RSCFDnCFSTSx */
+#define RCANFD_CFSTS(ch, idx)		(0x0178 + (0x0c * (ch)) + \
+					 (0x04 * (idx)))
+/* RSCFDnCFDCFPCTRx / RSCFDnCFPCTRx */
+#define RCANFD_CFPCTR(ch, idx)		(0x01d8 + (0x0c * (ch)) + \
+					 (0x04 * (idx)))
+
+/* RSCFDnCFDFESTS / RSCFDnFESTS */
+#define RCANFD_FESTS			(0x0238)
+/* RSCFDnCFDFFSTS / RSCFDnFFSTS */
+#define RCANFD_FFSTS			(0x023c)
+/* RSCFDnCFDFMSTS / RSCFDnFMSTS */
+#define RCANFD_FMSTS			(0x0240)
+/* RSCFDnCFDRFISTS / RSCFDnRFISTS */
+#define RCANFD_RFISTS			(0x0244)
+/* RSCFDnCFDCFRISTS / RSCFDnCFRISTS */
+#define RCANFD_CFRISTS			(0x0248)
+/* RSCFDnCFDCFTISTS / RSCFDnCFTISTS */
+#define RCANFD_CFTISTS			(0x024c)
+
+/* RSCFDnCFDTMCp / RSCFDnTMCp */
+#define RCANFD_TMC(p)			(0x0250 + (0x01 * (p)))
+/* RSCFDnCFDTMSTSp / RSCFDnTMSTSp */
+#define RCANFD_TMSTS(p)			(0x02d0 + (0x01 * (p)))
+
+/* RSCFDnCFDTMTRSTSp / RSCFDnTMTRSTSp */
+#define RCANFD_TMTRSTS(y)		(0x0350 + (0x04 * (y)))
+/* RSCFDnCFDTMTARSTSp / RSCFDnTMTARSTSp */
+#define RCANFD_TMTARSTS(y)		(0x0360 + (0x04 * (y)))
+/* RSCFDnCFDTMTCSTSp / RSCFDnTMTCSTSp */
+#define RCANFD_TMTCSTS(y)		(0x0370 + (0x04 * (y)))
+/* RSCFDnCFDTMTASTSp / RSCFDnTMTASTSp */
+#define RCANFD_TMTASTS(y)		(0x0380 + (0x04 * (y)))
+/* RSCFDnCFDTMIECy / RSCFDnTMIECy */
+#define RCANFD_TMIEC(y)			(0x0390 + (0x04 * (y)))
+
+/* RSCFDnCFDTXQCCm / RSCFDnTXQCCm */
+#define RCANFD_TXQCC(m)			(0x03a0 + (0x04 * (m)))
+/* RSCFDnCFDTXQSTSm / RSCFDnTXQSTSm */
+#define RCANFD_TXQSTS(m)		(0x03c0 + (0x04 * (m)))
+/* RSCFDnCFDTXQPCTRm / RSCFDnTXQPCTRm */
+#define RCANFD_TXQPCTR(m)		(0x03e0 + (0x04 * (m)))
+
+/* RSCFDnCFDTHLCCm / RSCFDnTHLCCm */
+#define RCANFD_THLCC(m)			(0x0400 + (0x04 * (m)))
+/* RSCFDnCFDTHLSTSm / RSCFDnTHLSTSm */
+#define RCANFD_THLSTS(m)		(0x0420 + (0x04 * (m)))
+/* RSCFDnCFDTHLPCTRm / RSCFDnTHLPCTRm */
+#define RCANFD_THLPCTR(m)		(0x0440 + (0x04 * (m)))
+
+/* RSCFDnCFDGTINTSTS0 / RSCFDnGTINTSTS0 */
+#define RCANFD_GTINTSTS0		(0x0460)
+/* RSCFDnCFDGTINTSTS1 / RSCFDnGTINTSTS1 */
+#define RCANFD_GTINTSTS1		(0x0464)
+/* RSCFDnCFDGTSTCFG / RSCFDnGTSTCFG */
+#define RCANFD_GTSTCFG			(0x0468)
+/* RSCFDnCFDGTSTCTR / RSCFDnGTSTCTR */
+#define RCANFD_GTSTCTR			(0x046c)
+/* RSCFDnCFDGLOCKK / RSCFDnGLOCKK */
+#define RCANFD_GLOCKK			(0x047c)
+/* RSCFDnCFDGRMCFG */
+#define RCANFD_GRMCFG			(0x04fc)
+
+/* RSCFDnCFDGAFLIDj / RSCFDnGAFLIDj */
+#define RCANFD_GAFLID(offset, j)	((offset) + (0x10 * (j)))
+/* RSCFDnCFDGAFLMj / RSCFDnGAFLMj */
+#define RCANFD_GAFLM(offset, j)		((offset) + 0x04 + (0x10 * (j)))
+/* RSCFDnCFDGAFLP0j / RSCFDnGAFLP0j */
+#define RCANFD_GAFLP0(offset, j)	((offset) + 0x08 + (0x10 * (j)))
+/* RSCFDnCFDGAFLP1j / RSCFDnGAFLP1j */
+#define RCANFD_GAFLP1(offset, j)	((offset) + 0x0c + (0x10 * (j)))
+
+/* Classical CAN only mode register map */
+
+/* RSCFDnGAFLXXXj offset */
+#define RCANFD_C_GAFL_OFFSET		(0x0500)
+
+/* RSCFDnRMXXXq -> RCANFD_C_RMXXX(q) */
+#define RCANFD_C_RMID(q)		(0x0600 + (0x10 * (q)))
+#define RCANFD_C_RMPTR(q)		(0x0604 + (0x10 * (q)))
+#define RCANFD_C_RMDF0(q)		(0x0608 + (0x10 * (q)))
+#define RCANFD_C_RMDF1(q)		(0x060c + (0x10 * (q)))
+
+/* RSCFDnRFXXx -> RCANFD_C_RFXX(x) */
+#define RCANFD_C_RFOFFSET		(0x0e00)
+#define RCANFD_C_RFID(x)		(RCANFD_C_RFOFFSET + (0x10 * (x)))
+#define RCANFD_C_RFPTR(x)		(RCANFD_C_RFOFFSET + 0x04 + \
+					 (0x10 * (x)))
+#define RCANFD_C_RFDF(x, df)		(RCANFD_C_RFOFFSET + 0x08 + \
+					 (0x10 * (x)) + (0x04 * (df)))
+
+/* RSCFDnCFXXk -> RCANFD_C_CFXX(ch, k) */
+#define RCANFD_C_CFOFFSET		(0x0e80)
+#define RCANFD_C_CFID(ch, idx)		(RCANFD_C_CFOFFSET + (0x30 * (ch)) + \
+					 (0x10 * (idx)))
+#define RCANFD_C_CFPTR(ch, idx)		(RCANFD_C_CFOFFSET + 0x04 + \
+					 (0x30 * (ch)) + (0x10 * (idx)))
+#define RCANFD_C_CFDF(ch, idx, df)	(RCANFD_C_CFOFFSET + 0x08 + \
+					 (0x30 * (ch)) + (0x10 * (idx)) + \
+					 (0x04 * (df)))
+
+/* RSCFDnTMXXp -> RCANFD_C_TMXX(p) */
+#define RCANFD_C_TMID(p)		(0x1000 + (0x10 * (p)))
+#define RCANFD_C_TMPTR(p)		(0x1004 + (0x10 * (p)))
+#define RCANFD_C_TMDF0(p)		(0x1008 + (0x10 * (p)))
+#define RCANFD_C_TMDF1(p)		(0x100c + (0x10 * (p)))
+
+/* RSCFDnTHLACCm */
+#define RCANFD_C_THLACC(m)		(0x1800 + (0x04 * (m)))
+/* RSCFDnRPGACCr */
+#define RCANFD_C_RPGACC(r)		(0x1900 + (0x04 * (r)))
+
+/* CAN FD mode specific regsiter map */
+
+/* RSCFDnCFDCmXXX -> RCANFD_F_XXX(m) */
+#define RCANFD_F_DCFG(m)		(0x0500 + (0x20 * (m)))
+#define RCANFD_F_CFDCFG(m)		(0x0504 + (0x20 * (m)))
+#define RCANFD_F_CFDCTR(m)		(0x0508 + (0x20 * (m)))
+#define RCANFD_F_CFDSTS(m)		(0x050c + (0x20 * (m)))
+#define RCANFD_F_CFDCRC(m)		(0x0510 + (0x20 * (m)))
+
+/* RSCFDnCFDGAFLXXXj offset */
+#define RCANFD_F_GAFL_OFFSET		(0x1000)
+
+/* RSCFDnCFDRMXXXq -> RCANFD_F_RMXXX(q) */
+#define RCANFD_F_RMID(q)		(0x2000 + (0x20 * (q)))
+#define RCANFD_F_RMPTR(q)		(0x2004 + (0x20 * (q)))
+#define RCANFD_F_RMFDSTS(q)		(0x2008 + (0x20 * (q)))
+#define RCANFD_F_RMDF(q, b)		(0x200c + (0x04 * (b)) + (0x20 * (q)))
+
+/* RSCFDnCFDRFXXx -> RCANFD_F_RFXX(x) */
+#define RCANFD_F_RFOFFSET		(0x3000)
+#define RCANFD_F_RFID(x)		(RCANFD_F_RFOFFSET + (0x80 * (x)))
+#define RCANFD_F_RFPTR(x)		(RCANFD_F_RFOFFSET + 0x04 + \
+					 (0x80 * (x)))
+#define RCANFD_F_RFFDSTS(x)		(RCANFD_F_RFOFFSET + 0x08 + \
+					 (0x80 * (x)))
+#define RCANFD_F_RFDF(x, df)		(RCANFD_F_RFOFFSET + 0x0c + \
+					 (0x80 * (x)) + (0x04 * (df)))
+
+/* RSCFDnCFDCFXXk -> RCANFD_F_CFXX(ch, k) */
+#define RCANFD_F_CFOFFSET		(0x3400)
+#define RCANFD_F_CFID(ch, idx)		(RCANFD_F_CFOFFSET + (0x180 * (ch)) + \
+					 (0x80 * (idx)))
+#define RCANFD_F_CFPTR(ch, idx)		(RCANFD_F_CFOFFSET + 0x04 + \
+					 (0x180 * (ch)) + (0x80 * (idx)))
+#define RCANFD_F_CFFDCSTS(ch, idx)	(RCANFD_F_CFOFFSET + 0x08 + \
+					 (0x180 * (ch)) + (0x80 * (idx)))
+#define RCANFD_F_CFDF(ch, idx, df)	(RCANFD_F_CFOFFSET + 0x0c + \
+					 (0x180 * (ch)) + (0x80 * (idx)) + \
+					 (0x04 * (df)))
+
+/* RSCFDnCFDTMXXp -> RCANFD_F_TMXX(p) */
+#define RCANFD_F_TMID(p)		(0x4000 + (0x20 * (p)))
+#define RCANFD_F_TMPTR(p)		(0x4004 + (0x20 * (p)))
+#define RCANFD_F_TMFDCTR(p)		(0x4008 + (0x20 * (p)))
+#define RCANFD_F_TMDF(p, b)		(0x400c + (0x20 * (p)) + (0x04 * (b)))
+
+/* RSCFDnCFDTHLACCm */
+#define RCANFD_F_THLACC(m)		(0x6000 + (0x04 * (m)))
+/* RSCFDnCFDRPGACCr */
+#define RCANFD_F_RPGACC(r)		(0x6400 + (0x04 * (r)))
+
+/* Constants */
+#define RCANFD_FIFO_DEPTH		8	/* Tx FIFO depth */
+#define RCANFD_NAPI_WEIGHT		8	/* Rx poll quota */
+
+#define RCANFD_NUM_CHANNELS		2	/* Two channels max */
+#define RCANFD_CHANNELS_MASK		BIT((RCANFD_NUM_CHANNELS) - 1)
+
+#define RCANFD_GAFL_PAGENUM(entry)	((entry) / 16)
+#define RCANFD_CHANNEL_NUMRULES		1	/* only one rule per channel */
+
+/* Rx FIFO is a global resource of the controller. There are 8 such FIFOs
+ * available. Each channel gets a dedicated Rx FIFO (i.e.) the channel
+ * number is added to RFFIFO index.
+ */
+#define RCANFD_RFFIFO_IDX		0
+
+/* Tx/Rx or Common FIFO is a per channel resource. Each channel has 3 Common
+ * FIFOs dedicated to them. Use the first (index 0) FIFO out of the 3 for Tx.
+ */
+#define RCANFD_CFFIFO_IDX		0
+
+/* fCAN clock select register settings */
+enum rcar_canfd_fcanclk {
+	RCANFD_CANFDCLK = 0,		/* CANFD clock */
+	RCANFD_EXTCLK,			/* Externally input clock */
+};
+
+struct rcar_canfd_global;
+
+/* Channel priv data */
+struct rcar_canfd_channel {
+	struct can_priv can;			/* Must be the first member */
+	struct net_device *ndev;
+	struct rcar_canfd_global *gpriv;	/* Controller reference */
+	void __iomem *base;			/* Register base address */
+	struct napi_struct napi;
+	u8  tx_len[RCANFD_FIFO_DEPTH];		/* For net stats */
+	u32 tx_head;				/* Incremented on xmit */
+	u32 tx_tail;				/* Incremented on xmit done */
+	u32 channel;				/* Channel number */
+	spinlock_t tx_lock;			/* To protect tx path */
+};
+
+/* Global priv data */
+struct rcar_canfd_global {
+	struct rcar_canfd_channel *ch[RCANFD_NUM_CHANNELS];
+	void __iomem *base;		/* Register base address */
+	struct platform_device *pdev;	/* Respective platform device */
+	struct clk *clkp;		/* Peripheral clock */
+	struct clk *can_clk;		/* fCAN clock */
+	enum rcar_canfd_fcanclk fcan;	/* CANFD or Ext clock */
+	unsigned long channels_mask;	/* Enabled channels mask */
+	bool fdmode;			/* CAN FD or Classical CAN only mode */
+};
+
+/* CAN FD mode nominal rate constants */
+static const struct can_bittiming_const rcar_canfd_nom_bittiming_const = {
+	.name = RCANFD_DRV_NAME,
+	.tseg1_min = 2,
+	.tseg1_max = 128,
+	.tseg2_min = 2,
+	.tseg2_max = 32,
+	.sjw_max = 32,
+	.brp_min = 1,
+	.brp_max = 1024,
+	.brp_inc = 1,
+};
+
+/* CAN FD mode data rate constants */
+static const struct can_bittiming_const rcar_canfd_data_bittiming_const = {
+	.name = RCANFD_DRV_NAME,
+	.tseg1_min = 2,
+	.tseg1_max = 16,
+	.tseg2_min = 2,
+	.tseg2_max = 8,
+	.sjw_max = 8,
+	.brp_min = 1,
+	.brp_max = 256,
+	.brp_inc = 1,
+};
+
+/* Classical CAN mode bitrate constants */
+static const struct can_bittiming_const rcar_canfd_bittiming_const = {
+	.name = RCANFD_DRV_NAME,
+	.tseg1_min = 4,
+	.tseg1_max = 16,
+	.tseg2_min = 2,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 1024,
+	.brp_inc = 1,
+};
+
+/* Helper functions */
+static inline void rcar_canfd_update(u32 mask, u32 val, u32 __iomem *reg)
+{
+	u32 data = readl(reg);
+
+	data &= ~mask;
+	data |= (val & mask);
+	writel(data, reg);
+}
+
+static inline u32 rcar_canfd_read(void __iomem *base, u32 offset)
+{
+	return readl(base + (offset));
+}
+
+static inline void rcar_canfd_write(void __iomem *base, u32 offset, u32 val)
+{
+	writel(val, base + (offset));
+}
+
+static void rcar_canfd_set_bit(void __iomem *base, u32 reg, u32 val)
+{
+	rcar_canfd_update(val, val, base + (reg));
+}
+
+static void rcar_canfd_clear_bit(void __iomem *base, u32 reg, u32 val)
+{
+	rcar_canfd_update(val, 0, base + (reg));
+}
+
+static void rcar_canfd_update_bit(void __iomem *base, u32 reg,
+				  u32 mask, u32 val)
+{
+	rcar_canfd_update(mask, val, base + (reg));
+}
+
+static void rcar_canfd_get_data(struct rcar_canfd_channel *priv,
+				struct canfd_frame *cf, u32 off)
+{
+	u32 i, lwords;
+
+	lwords = DIV_ROUND_UP(cf->len, sizeof(u32));
+	for (i = 0; i < lwords; i++)
+		*((u32 *)cf->data + i) =
+			rcar_canfd_read(priv->base, off + (i * sizeof(u32)));
+}
+
+static void rcar_canfd_put_data(struct rcar_canfd_channel *priv,
+				struct canfd_frame *cf, u32 off)
+{
+	u32 i, lwords;
+
+	lwords = DIV_ROUND_UP(cf->len, sizeof(u32));
+	for (i = 0; i < lwords; i++)
+		rcar_canfd_write(priv->base, off + (i * sizeof(u32)),
+				 *((u32 *)cf->data + i));
+}
+
+static void rcar_canfd_tx_failure_cleanup(struct net_device *ndev)
+{
+	u32 i;
+
+	for (i = 0; i < RCANFD_FIFO_DEPTH; i++)
+		can_free_echo_skb(ndev, i);
+}
+
+static int rcar_canfd_reset_controller(struct rcar_canfd_global *gpriv)
+{
+	u32 sts, ch;
+	int err;
+
+	/* Check RAMINIT flag as CAN RAM initialization takes place
+	 * after the MCU reset
+	 */
+	err = readl_poll_timeout((gpriv->base + RCANFD_GSTS), sts,
+				 !(sts & RCANFD_GSTS_GRAMINIT), 2, 500000);
+	if (err) {
+		dev_dbg(&gpriv->pdev->dev, "global raminit failed\n");
+		return err;
+	}
+
+	/* Transition to Global Reset mode */
+	rcar_canfd_clear_bit(gpriv->base, RCANFD_GCTR, RCANFD_GCTR_GSLPR);
+	rcar_canfd_update_bit(gpriv->base, RCANFD_GCTR,
+			      RCANFD_GCTR_GMDC_MASK, RCANFD_GCTR_GMDC_GRESET);
+
+	/* Ensure Global reset mode */
+	err = readl_poll_timeout((gpriv->base + RCANFD_GSTS), sts,
+				 (sts & RCANFD_GSTS_GRSTSTS), 2, 500000);
+	if (err) {
+		dev_dbg(&gpriv->pdev->dev, "global reset failed\n");
+		return err;
+	}
+
+	/* Reset Global error flags */
+	rcar_canfd_write(gpriv->base, RCANFD_GERFL, 0x0);
+
+	/* Set the controller into appropriate mode */
+	if (gpriv->fdmode)
+		rcar_canfd_set_bit(gpriv->base, RCANFD_GRMCFG,
+				   RCANFD_GRMCFG_RCMC);
+	else
+		rcar_canfd_clear_bit(gpriv->base, RCANFD_GRMCFG,
+				     RCANFD_GRMCFG_RCMC);
+
+	/* Transition all Channels to reset mode */
+	for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
+		rcar_canfd_clear_bit(gpriv->base,
+				     RCANFD_CCTR(ch), RCANFD_CCTR_CSLPR);
+
+		rcar_canfd_update_bit(gpriv->base, RCANFD_CCTR(ch),
+				      RCANFD_CCTR_CHMDC_MASK,
+				      RCANFD_CCTR_CHDMC_CRESET);
+
+		/* Ensure Channel reset mode */
+		err = readl_poll_timeout((gpriv->base + RCANFD_CSTS(ch)), sts,
+					 (sts & RCANFD_CSTS_CRSTSTS),
+					 2, 500000);
+		if (err) {
+			dev_dbg(&gpriv->pdev->dev,
+				"channel %u reset failed\n", ch);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static void rcar_canfd_configure_controller(struct rcar_canfd_global *gpriv)
+{
+	u32 cfg, ch;
+
+	/* Global configuration settings */
+
+	/* ECC Error flag Enable */
+	cfg = RCANFD_GCFG_EEFE;
+
+	if (gpriv->fdmode)
+		/* Truncate payload to configured message size RFPLS */
+		cfg |= RCANFD_GCFG_CMPOC;
+
+	/* Set External Clock if selected */
+	if (gpriv->fcan != RCANFD_CANFDCLK)
+		cfg |= RCANFD_GCFG_DCS;
+
+	rcar_canfd_set_bit(gpriv->base, RCANFD_GCFG, cfg);
+
+	/* Channel configuration settings */
+	for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
+		rcar_canfd_set_bit(gpriv->base, RCANFD_CCTR(ch),
+				   RCANFD_CCTR_ERRD);
+		rcar_canfd_update_bit(gpriv->base, RCANFD_CCTR(ch),
+				      RCANFD_CCTR_BOM_MASK,
+				      RCANFD_CCTR_BOM_BENTRY);
+	}
+}
+
+static void rcar_canfd_configure_afl_rules(struct rcar_canfd_global *gpriv,
+					   u32 ch)
+{
+	u32 cfg;
+	int offset, start, page, num_rules = RCANFD_CHANNEL_NUMRULES;
+	u32 ridx = ch + RCANFD_RFFIFO_IDX;
+
+	if (ch == 0) {
+		start = 0; /* Channel 0 always starts from 0th rule */
+	} else {
+		/* Get number of Channel 0 rules and adjust */
+		cfg = rcar_canfd_read(gpriv->base, RCANFD_GAFLCFG0);
+		start = RCANFD_GAFLCFG_GETRNC(0, cfg);
+	}
+
+	/* Enable write access to entry */
+	page = RCANFD_GAFL_PAGENUM(start);
+	rcar_canfd_set_bit(gpriv->base, RCANFD_GAFLECTR,
+			   (RCANFD_GAFLECTR_AFLPN(page) |
+			    RCANFD_GAFLECTR_AFLDAE));
+
+	/* Write number of rules for channel */
+	rcar_canfd_set_bit(gpriv->base, RCANFD_GAFLCFG0,
+			   RCANFD_GAFLCFG_SETRNC(ch, num_rules));
+	if (gpriv->fdmode)
+		offset = RCANFD_F_GAFL_OFFSET;
+	else
+		offset = RCANFD_C_GAFL_OFFSET;
+
+	/* Accept all IDs */
+	rcar_canfd_write(gpriv->base, RCANFD_GAFLID(offset, start), 0);
+	/* IDE or RTR is not considered for matching */
+	rcar_canfd_write(gpriv->base, RCANFD_GAFLM(offset, start), 0);
+	/* Any data length accepted */
+	rcar_canfd_write(gpriv->base, RCANFD_GAFLP0(offset, start), 0);
+	/* Place the msg in corresponding Rx FIFO entry */
+	rcar_canfd_write(gpriv->base, RCANFD_GAFLP1(offset, start),
+			 RCANFD_GAFLP1_GAFLFDP(ridx));
+
+	/* Disable write access to page */
+	rcar_canfd_clear_bit(gpriv->base,
+			     RCANFD_GAFLECTR, RCANFD_GAFLECTR_AFLDAE);
+}
+
+static void rcar_canfd_configure_rx(struct rcar_canfd_global *gpriv, u32 ch)
+{
+	/* Rx FIFO is used for reception */
+	u32 cfg;
+	u16 rfdc, rfpls;
+
+	/* Select Rx FIFO based on channel */
+	u32 ridx = ch + RCANFD_RFFIFO_IDX;
+
+	rfdc = 2;		/* b010 - 8 messages Rx FIFO depth */
+	if (gpriv->fdmode)
+		rfpls = 7;	/* b111 - Max 64 bytes payload */
+	else
+		rfpls = 0;	/* b000 - Max 8 bytes payload */
+
+	cfg = (RCANFD_RFCC_RFIM | RCANFD_RFCC_RFDC(rfdc) |
+		RCANFD_RFCC_RFPLS(rfpls) | RCANFD_RFCC_RFIE);
+	rcar_canfd_write(gpriv->base, RCANFD_RFCC(ridx), cfg);
+}
+
+static void rcar_canfd_configure_tx(struct rcar_canfd_global *gpriv, u32 ch)
+{
+	/* Tx/Rx(Common) FIFO configured in Tx mode is
+	 * used for transmission
+	 *
+	 * Each channel has 3 Common FIFO dedicated to them.
+	 * Use the 1st (index 0) out of 3
+	 */
+	u32 cfg;
+	u16 cftml, cfm, cfdc, cfpls;
+
+	cftml = 0;		/* 0th buffer */
+	cfm = 1;		/* b01 - Transmit mode */
+	cfdc = 2;		/* b010 - 8 messages Tx FIFO depth */
+	if (gpriv->fdmode)
+		cfpls = 7;	/* b111 - Max 64 bytes payload */
+	else
+		cfpls = 0;	/* b000 - Max 8 bytes payload */
+
+	cfg = (RCANFD_CFCC_CFTML(cftml) | RCANFD_CFCC_CFM(cfm) |
+		RCANFD_CFCC_CFIM | RCANFD_CFCC_CFDC(cfdc) |
+		RCANFD_CFCC_CFPLS(cfpls) | RCANFD_CFCC_CFTXIE);
+	rcar_canfd_write(gpriv->base, RCANFD_CFCC(ch, RCANFD_CFFIFO_IDX), cfg);
+
+	if (gpriv->fdmode)
+		/* Clear FD mode specific control/status register */
+		rcar_canfd_write(gpriv->base,
+				 RCANFD_F_CFFDCSTS(ch, RCANFD_CFFIFO_IDX), 0);
+}
+
+static void rcar_canfd_enable_global_interrupts(struct rcar_canfd_global *gpriv)
+{
+	u32 ctr;
+
+	/* Clear any stray error interrupt flags */
+	rcar_canfd_write(gpriv->base, RCANFD_GERFL, 0);
+
+	/* Global interrupts setup */
+	ctr = RCANFD_GCTR_MEIE;
+	if (gpriv->fdmode)
+		ctr |= RCANFD_GCTR_CFMPOFIE;
+
+	rcar_canfd_set_bit(gpriv->base, RCANFD_GCTR, ctr);
+}
+
+static void rcar_canfd_disable_global_interrupts(struct rcar_canfd_global
+						 *gpriv)
+{
+	/* Disable all interrupts */
+	rcar_canfd_write(gpriv->base, RCANFD_GCTR, 0);
+
+	/* Clear any stray error interrupt flags */
+	rcar_canfd_write(gpriv->base, RCANFD_GERFL, 0);
+}
+
+static void rcar_canfd_enable_channel_interrupts(struct rcar_canfd_channel
+						 *priv)
+{
+	u32 ctr, ch = priv->channel;
+
+	/* Clear any stray error flags */
+	rcar_canfd_write(priv->base, RCANFD_CERFL(ch), 0);
+
+	/* Channel interrupts setup */
+	ctr = (RCANFD_CCTR_TAIE |
+	       RCANFD_CCTR_ALIE | RCANFD_CCTR_BLIE |
+	       RCANFD_CCTR_OLIE | RCANFD_CCTR_BORIE |
+	       RCANFD_CCTR_BOEIE | RCANFD_CCTR_EPIE |
+	       RCANFD_CCTR_EWIE | RCANFD_CCTR_BEIE);
+	rcar_canfd_set_bit(priv->base, RCANFD_CCTR(ch), ctr);
+}
+
+static void rcar_canfd_disable_channel_interrupts(struct rcar_canfd_channel
+						  *priv)
+{
+	u32 ctr, ch = priv->channel;
+
+	ctr = (RCANFD_CCTR_TAIE |
+	       RCANFD_CCTR_ALIE | RCANFD_CCTR_BLIE |
+	       RCANFD_CCTR_OLIE | RCANFD_CCTR_BORIE |
+	       RCANFD_CCTR_BOEIE | RCANFD_CCTR_EPIE |
+	       RCANFD_CCTR_EWIE | RCANFD_CCTR_BEIE);
+	rcar_canfd_clear_bit(priv->base, RCANFD_CCTR(ch), ctr);
+
+	/* Clear any stray error flags */
+	rcar_canfd_write(priv->base, RCANFD_CERFL(ch), 0);
+}
+
+static void rcar_canfd_global_error(struct net_device *ndev)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	struct rcar_canfd_global *gpriv = priv->gpriv;
+	struct net_device_stats *stats = &ndev->stats;
+	u32 ch = priv->channel;
+	u32 gerfl, sts;
+	u32 ridx = ch + RCANFD_RFFIFO_IDX;
+
+	gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL);
+	if ((gerfl & RCANFD_GERFL_EEF0) && (ch == 0)) {
+		netdev_dbg(ndev, "Ch0: ECC Error flag\n");
+		stats->tx_dropped++;
+	}
+	if ((gerfl & RCANFD_GERFL_EEF1) && (ch == 1)) {
+		netdev_dbg(ndev, "Ch1: ECC Error flag\n");
+		stats->tx_dropped++;
+	}
+	if (gerfl & RCANFD_GERFL_MES) {
+		sts = rcar_canfd_read(priv->base,
+				      RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX));
+		if (sts & RCANFD_CFSTS_CFMLT) {
+			netdev_dbg(ndev, "Tx Message Lost flag\n");
+			stats->tx_dropped++;
+			rcar_canfd_write(priv->base,
+					 RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX),
+					 sts & ~RCANFD_CFSTS_CFMLT);
+		}
+
+		sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
+		if (sts & RCANFD_RFSTS_RFMLT) {
+			netdev_dbg(ndev, "Rx Message Lost flag\n");
+			stats->rx_dropped++;
+			rcar_canfd_write(priv->base, RCANFD_RFSTS(ridx),
+					 sts & ~RCANFD_RFSTS_RFMLT);
+		}
+	}
+	if (gpriv->fdmode && gerfl & RCANFD_GERFL_CMPOF) {
+		/* Message Lost flag will be set for respective channel
+		 * when this condition happens with counters and flags
+		 * already updated.
+		 */
+		netdev_dbg(ndev, "global payload overflow interrupt\n");
+	}
+
+	/* Clear all global error interrupts. Only affected channels bits
+	 * get cleared
+	 */
+	rcar_canfd_write(priv->base, RCANFD_GERFL, 0);
+}
+
+static void rcar_canfd_error(struct net_device *ndev, u32 cerfl,
+			     u16 txerr, u16 rxerr)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u32 ch = priv->channel;
+
+	netdev_dbg(ndev, "ch erfl %x txerr %u rxerr %u\n", cerfl, txerr, rxerr);
+
+	/* Propagate the error condition to the CAN stack */
+	skb = alloc_can_err_skb(ndev, &cf);
+	if (!skb) {
+		stats->rx_dropped++;
+		return;
+	}
+
+	/* Channel error interrupts */
+	if (cerfl & RCANFD_CERFL_BEF) {
+		netdev_dbg(ndev, "Bus error\n");
+		cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
+		cf->data[2] = CAN_ERR_PROT_UNSPEC;
+		priv->can.can_stats.bus_error++;
+	}
+	if (cerfl & RCANFD_CERFL_ADERR) {
+		netdev_dbg(ndev, "ACK Delimiter Error\n");
+		stats->tx_errors++;
+		cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL;
+	}
+	if (cerfl & RCANFD_CERFL_B0ERR) {
+		netdev_dbg(ndev, "Bit Error (dominant)\n");
+		stats->tx_errors++;
+		cf->data[2] |= CAN_ERR_PROT_BIT0;
+	}
+	if (cerfl & RCANFD_CERFL_B1ERR) {
+		netdev_dbg(ndev, "Bit Error (recessive)\n");
+		stats->tx_errors++;
+		cf->data[2] |= CAN_ERR_PROT_BIT1;
+	}
+	if (cerfl & RCANFD_CERFL_CERR) {
+		netdev_dbg(ndev, "CRC Error\n");
+		stats->rx_errors++;
+		cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+	}
+	if (cerfl & RCANFD_CERFL_AERR) {
+		netdev_dbg(ndev, "ACK Error\n");
+		stats->tx_errors++;
+		cf->can_id |= CAN_ERR_ACK;
+		cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+	}
+	if (cerfl & RCANFD_CERFL_FERR) {
+		netdev_dbg(ndev, "Form Error\n");
+		stats->rx_errors++;
+		cf->data[2] |= CAN_ERR_PROT_FORM;
+	}
+	if (cerfl & RCANFD_CERFL_SERR) {
+		netdev_dbg(ndev, "Stuff Error\n");
+		stats->rx_errors++;
+		cf->data[2] |= CAN_ERR_PROT_STUFF;
+	}
+	if (cerfl & RCANFD_CERFL_ALF) {
+		netdev_dbg(ndev, "Arbitration lost Error\n");
+		priv->can.can_stats.arbitration_lost++;
+		cf->can_id |= CAN_ERR_LOSTARB;
+		cf->data[0] |= CAN_ERR_LOSTARB_UNSPEC;
+	}
+	if (cerfl & RCANFD_CERFL_BLF) {
+		netdev_dbg(ndev, "Bus Lock Error\n");
+		stats->rx_errors++;
+		cf->can_id |= CAN_ERR_BUSERROR;
+	}
+	if (cerfl & RCANFD_CERFL_EWF) {
+		netdev_dbg(ndev, "Error warning interrupt\n");
+		priv->can.state = CAN_STATE_ERROR_WARNING;
+		priv->can.can_stats.error_warning++;
+		cf->can_id |= CAN_ERR_CRTL;
+		cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_WARNING :
+			CAN_ERR_CRTL_RX_WARNING;
+		cf->data[6] = txerr;
+		cf->data[7] = rxerr;
+	}
+	if (cerfl & RCANFD_CERFL_EPF) {
+		netdev_dbg(ndev, "Error passive interrupt\n");
+		priv->can.state = CAN_STATE_ERROR_PASSIVE;
+		priv->can.can_stats.error_passive++;
+		cf->can_id |= CAN_ERR_CRTL;
+		cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_PASSIVE :
+			CAN_ERR_CRTL_RX_PASSIVE;
+		cf->data[6] = txerr;
+		cf->data[7] = rxerr;
+	}
+	if (cerfl & RCANFD_CERFL_BOEF) {
+		netdev_dbg(ndev, "Bus-off entry interrupt\n");
+		rcar_canfd_tx_failure_cleanup(ndev);
+		priv->can.state = CAN_STATE_BUS_OFF;
+		priv->can.can_stats.bus_off++;
+		can_bus_off(ndev);
+		cf->can_id |= CAN_ERR_BUSOFF;
+	}
+	if (cerfl & RCANFD_CERFL_OVLF) {
+		netdev_dbg(ndev,
+			   "Overload Frame Transmission error interrupt\n");
+		stats->tx_errors++;
+		cf->can_id |= CAN_ERR_PROT;
+		cf->data[2] |= CAN_ERR_PROT_OVERLOAD;
+	}
+
+	/* Clear channel error interrupts that are handled */
+	rcar_canfd_write(priv->base, RCANFD_CERFL(ch),
+			 RCANFD_CERFL_ERR(~cerfl));
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+	netif_rx(skb);
+}
+
+static void rcar_canfd_tx_done(struct net_device *ndev)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	u32 sts;
+	unsigned long flags;
+	u32 ch = priv->channel;
+
+	do {
+		u8 unsent, sent;
+
+		sent = priv->tx_tail % RCANFD_FIFO_DEPTH;
+		stats->tx_packets++;
+		stats->tx_bytes += priv->tx_len[sent];
+		priv->tx_len[sent] = 0;
+		can_get_echo_skb(ndev, sent);
+
+		spin_lock_irqsave(&priv->tx_lock, flags);
+		priv->tx_tail++;
+		sts = rcar_canfd_read(priv->base,
+				      RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX));
+		unsent = RCANFD_CFSTS_CFMC(sts);
+
+		/* Wake producer only when there is room */
+		if (unsent != RCANFD_FIFO_DEPTH)
+			netif_wake_queue(ndev);
+
+		if (priv->tx_head - priv->tx_tail <= unsent) {
+			spin_unlock_irqrestore(&priv->tx_lock, flags);
+			break;
+		}
+		spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	} while (1);
+
+	/* Clear interrupt */
+	rcar_canfd_write(priv->base, RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX),
+			 sts & ~RCANFD_CFSTS_CFTXIF);
+	can_led_event(ndev, CAN_LED_EVENT_TX);
+}
+
+static irqreturn_t rcar_canfd_global_interrupt(int irq, void *dev_id)
+{
+	struct rcar_canfd_global *gpriv = dev_id;
+	struct net_device *ndev;
+	struct rcar_canfd_channel *priv;
+	u32 sts, gerfl;
+	u32 ch, ridx;
+
+	/* Global error interrupts still indicate a condition specific
+	 * to a channel. RxFIFO interrupt is a global interrupt.
+	 */
+	for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
+		priv = gpriv->ch[ch];
+		ndev = priv->ndev;
+		ridx = ch + RCANFD_RFFIFO_IDX;
+
+		/* Global error interrupts */
+		gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL);
+		if (unlikely(RCANFD_GERFL_ERR(gpriv, gerfl)))
+			rcar_canfd_global_error(ndev);
+
+		/* Handle Rx interrupts */
+		sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
+		if (likely(sts & RCANFD_RFSTS_RFIF)) {
+			if (napi_schedule_prep(&priv->napi)) {
+				/* Disable Rx FIFO interrupts */
+				rcar_canfd_clear_bit(priv->base,
+						     RCANFD_RFCC(ridx),
+						     RCANFD_RFCC_RFIE);
+				__napi_schedule(&priv->napi);
+			}
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+static void rcar_canfd_state_change(struct net_device *ndev,
+				    u16 txerr, u16 rxerr)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	enum can_state rx_state, tx_state, state = priv->can.state;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+
+	/* Handle transition from error to normal states */
+	if (txerr < 96 && rxerr < 96)
+		state = CAN_STATE_ERROR_ACTIVE;
+	else if (txerr < 128 && rxerr < 128)
+		state = CAN_STATE_ERROR_WARNING;
+
+	if (state != priv->can.state) {
+		netdev_dbg(ndev, "state: new %d, old %d: txerr %u, rxerr %u\n",
+			   state, priv->can.state, txerr, rxerr);
+		skb = alloc_can_err_skb(ndev, &cf);
+		if (!skb) {
+			stats->rx_dropped++;
+			return;
+		}
+		tx_state = txerr >= rxerr ? state : 0;
+		rx_state = txerr <= rxerr ? state : 0;
+
+		can_change_state(ndev, cf, tx_state, rx_state);
+		stats->rx_packets++;
+		stats->rx_bytes += cf->can_dlc;
+		netif_rx(skb);
+	}
+}
+
+static irqreturn_t rcar_canfd_channel_interrupt(int irq, void *dev_id)
+{
+	struct rcar_canfd_global *gpriv = dev_id;
+	struct net_device *ndev;
+	struct rcar_canfd_channel *priv;
+	u32 sts, ch, cerfl;
+	u16 txerr, rxerr;
+
+	/* Common FIFO is a per channel resource */
+	for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
+		priv = gpriv->ch[ch];
+		ndev = priv->ndev;
+
+		/* Channel error interrupts */
+		cerfl = rcar_canfd_read(priv->base, RCANFD_CERFL(ch));
+		sts = rcar_canfd_read(priv->base, RCANFD_CSTS(ch));
+		txerr = RCANFD_CSTS_TECCNT(sts);
+		rxerr = RCANFD_CSTS_RECCNT(sts);
+		if (unlikely(RCANFD_CERFL_ERR(cerfl)))
+			rcar_canfd_error(ndev, cerfl, txerr, rxerr);
+
+		/* Handle state change to lower states */
+		if (unlikely((priv->can.state != CAN_STATE_ERROR_ACTIVE) &&
+			     (priv->can.state != CAN_STATE_BUS_OFF)))
+			rcar_canfd_state_change(ndev, txerr, rxerr);
+
+		/* Handle Tx interrupts */
+		sts = rcar_canfd_read(priv->base,
+				      RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX));
+		if (likely(sts & RCANFD_CFSTS_CFTXIF))
+			rcar_canfd_tx_done(ndev);
+	}
+	return IRQ_HANDLED;
+}
+
+static void rcar_canfd_set_bittiming(struct net_device *dev)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(dev);
+	const struct can_bittiming *bt = &priv->can.bittiming;
+	const struct can_bittiming *dbt = &priv->can.data_bittiming;
+	u16 brp, sjw, tseg1, tseg2;
+	u32 cfg;
+	u32 ch = priv->channel;
+
+	/* Nominal bit timing settings */
+	brp = bt->brp - 1;
+	sjw = bt->sjw - 1;
+	tseg1 = bt->prop_seg + bt->phase_seg1 - 1;
+	tseg2 = bt->phase_seg2 - 1;
+
+	if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+		/* CAN FD only mode */
+		cfg = (RCANFD_NCFG_NTSEG1(tseg1) | RCANFD_NCFG_NBRP(brp) |
+		       RCANFD_NCFG_NSJW(sjw) | RCANFD_NCFG_NTSEG2(tseg2));
+
+		rcar_canfd_write(priv->base, RCANFD_CCFG(ch), cfg);
+		netdev_dbg(priv->ndev, "nrate: brp %u, sjw %u, tseg1 %u, tseg2 %u\n",
+			   brp, sjw, tseg1, tseg2);
+
+		/* Data bit timing settings */
+		brp = dbt->brp - 1;
+		sjw = dbt->sjw - 1;
+		tseg1 = dbt->prop_seg + dbt->phase_seg1 - 1;
+		tseg2 = dbt->phase_seg2 - 1;
+
+		cfg = (RCANFD_DCFG_DTSEG1(tseg1) | RCANFD_DCFG_DBRP(brp) |
+		       RCANFD_DCFG_DSJW(sjw) | RCANFD_DCFG_DTSEG2(tseg2));
+
+		rcar_canfd_write(priv->base, RCANFD_F_DCFG(ch), cfg);
+		netdev_dbg(priv->ndev, "drate: brp %u, sjw %u, tseg1 %u, tseg2 %u\n",
+			   brp, sjw, tseg1, tseg2);
+	} else {
+		/* Classical CAN only mode */
+		cfg = (RCANFD_CFG_TSEG1(tseg1) | RCANFD_CFG_BRP(brp) |
+			RCANFD_CFG_SJW(sjw) | RCANFD_CFG_TSEG2(tseg2));
+
+		rcar_canfd_write(priv->base, RCANFD_CCFG(ch), cfg);
+		netdev_dbg(priv->ndev,
+			   "rate: brp %u, sjw %u, tseg1 %u, tseg2 %u\n",
+			   brp, sjw, tseg1, tseg2);
+	}
+}
+
+static int rcar_canfd_start(struct net_device *ndev)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	int err = -EOPNOTSUPP;
+	u32 sts, ch = priv->channel;
+	u32 ridx = ch + RCANFD_RFFIFO_IDX;
+
+	rcar_canfd_set_bittiming(ndev);
+
+	rcar_canfd_enable_channel_interrupts(priv);
+
+	/* Set channel to Operational mode */
+	rcar_canfd_update_bit(priv->base, RCANFD_CCTR(ch),
+			      RCANFD_CCTR_CHMDC_MASK, RCANFD_CCTR_CHDMC_COPM);
+
+	/* Verify channel mode change */
+	err = readl_poll_timeout((priv->base + RCANFD_CSTS(ch)), sts,
+				 (sts & RCANFD_CSTS_COMSTS), 2, 500000);
+	if (err) {
+		netdev_err(ndev, "channel %u communication state failed\n", ch);
+		goto fail_mode_change;
+	}
+
+	/* Enable Common & Rx FIFO */
+	rcar_canfd_set_bit(priv->base, RCANFD_CFCC(ch, RCANFD_CFFIFO_IDX),
+			   RCANFD_CFCC_CFE);
+	rcar_canfd_set_bit(priv->base, RCANFD_RFCC(ridx), RCANFD_RFCC_RFE);
+
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+	return 0;
+
+fail_mode_change:
+	rcar_canfd_disable_channel_interrupts(priv);
+	return err;
+}
+
+static int rcar_canfd_open(struct net_device *ndev)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	struct rcar_canfd_global *gpriv = priv->gpriv;
+	int err;
+
+	/* Peripheral clock is already enabled in probe */
+	err = clk_prepare_enable(gpriv->can_clk);
+	if (err) {
+		netdev_err(ndev, "failed to enable CAN clock, error %d\n", err);
+		goto out_clock;
+	}
+
+	err = open_candev(ndev);
+	if (err) {
+		netdev_err(ndev, "open_candev() failed, error %d\n", err);
+		goto out_can_clock;
+	}
+
+	napi_enable(&priv->napi);
+	err = rcar_canfd_start(ndev);
+	if (err)
+		goto out_close;
+	netif_start_queue(ndev);
+	can_led_event(ndev, CAN_LED_EVENT_OPEN);
+	return 0;
+out_close:
+	napi_disable(&priv->napi);
+	close_candev(ndev);
+out_can_clock:
+	clk_disable_unprepare(gpriv->can_clk);
+out_clock:
+	return err;
+}
+
+static void rcar_canfd_stop(struct net_device *ndev)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	int err;
+	u32 sts, ch = priv->channel;
+	u32 ridx = ch + RCANFD_RFFIFO_IDX;
+
+	/* Transition to channel reset mode  */
+	rcar_canfd_update_bit(priv->base, RCANFD_CCTR(ch),
+			      RCANFD_CCTR_CHMDC_MASK, RCANFD_CCTR_CHDMC_CRESET);
+
+	/* Check Channel reset mode */
+	err = readl_poll_timeout((priv->base + RCANFD_CSTS(ch)), sts,
+				 (sts & RCANFD_CSTS_CRSTSTS), 2, 500000);
+	if (err)
+		netdev_err(ndev, "channel %u reset failed\n", ch);
+
+	rcar_canfd_disable_channel_interrupts(priv);
+
+	/* Disable Common & Rx FIFO */
+	rcar_canfd_clear_bit(priv->base, RCANFD_CFCC(ch, RCANFD_CFFIFO_IDX),
+			     RCANFD_CFCC_CFE);
+	rcar_canfd_clear_bit(priv->base, RCANFD_RFCC(ridx), RCANFD_RFCC_RFE);
+
+	/* Set the state as STOPPED */
+	priv->can.state = CAN_STATE_STOPPED;
+}
+
+static int rcar_canfd_close(struct net_device *ndev)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	struct rcar_canfd_global *gpriv = priv->gpriv;
+
+	netif_stop_queue(ndev);
+	rcar_canfd_stop(ndev);
+	napi_disable(&priv->napi);
+	clk_disable_unprepare(gpriv->can_clk);
+	close_candev(ndev);
+	can_led_event(ndev, CAN_LED_EVENT_STOP);
+	return 0;
+}
+
+static netdev_tx_t rcar_canfd_start_xmit(struct sk_buff *skb,
+					 struct net_device *ndev)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(ndev);
+	struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+	u32 sts = 0, id, dlc;
+	unsigned long flags;
+	u32 ch = priv->channel;
+
+	if (can_dropped_invalid_skb(ndev, skb))
+		return NETDEV_TX_OK;
+
+	if (cf->can_id & CAN_EFF_FLAG) {
+		id = cf->can_id & CAN_EFF_MASK;
+		id |= RCANFD_CFID_CFIDE;
+	} else {
+		id = cf->can_id & CAN_SFF_MASK;
+	}
+
+	if (cf->can_id & CAN_RTR_FLAG)
+		id |= RCANFD_CFID_CFRTR;
+
+	dlc = RCANFD_CFPTR_CFDLC(can_len2dlc(cf->len));
+
+	if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+		rcar_canfd_write(priv->base,
+				 RCANFD_F_CFID(ch, RCANFD_CFFIFO_IDX), id);
+		rcar_canfd_write(priv->base,
+				 RCANFD_F_CFPTR(ch, RCANFD_CFFIFO_IDX), dlc);
+
+		if (can_is_canfd_skb(skb)) {
+			/* CAN FD frame format */
+			sts |= RCANFD_CFFDCSTS_CFFDF;
+			if (cf->flags & CANFD_BRS)
+				sts |= RCANFD_CFFDCSTS_CFBRS;
+
+			if (priv->can.state == CAN_STATE_ERROR_PASSIVE)
+				sts |= RCANFD_CFFDCSTS_CFESI;
+		}
+
+		rcar_canfd_write(priv->base,
+				 RCANFD_F_CFFDCSTS(ch, RCANFD_CFFIFO_IDX), sts);
+
+		rcar_canfd_put_data(priv, cf,
+				    RCANFD_F_CFDF(ch, RCANFD_CFFIFO_IDX, 0));
+	} else {
+		rcar_canfd_write(priv->base,
+				 RCANFD_C_CFID(ch, RCANFD_CFFIFO_IDX), id);
+		rcar_canfd_write(priv->base,
+				 RCANFD_C_CFPTR(ch, RCANFD_CFFIFO_IDX), dlc);
+		rcar_canfd_put_data(priv, cf,
+				    RCANFD_C_CFDF(ch, RCANFD_CFFIFO_IDX, 0));
+	}
+
+	priv->tx_len[priv->tx_head % RCANFD_FIFO_DEPTH] = cf->len;
+	can_put_echo_skb(skb, ndev, priv->tx_head % RCANFD_FIFO_DEPTH);
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	priv->tx_head++;
+
+	/* Stop the queue if we've filled all FIFO entries */
+	if (priv->tx_head - priv->tx_tail >= RCANFD_FIFO_DEPTH)
+		netif_stop_queue(ndev);
+
+	/* Start Tx: Write 0xff to CFPC to increment the CPU-side
+	 * pointer for the Common FIFO
+	 */
+	rcar_canfd_write(priv->base,
+			 RCANFD_CFPCTR(ch, RCANFD_CFFIFO_IDX), 0xff);
+
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+	return NETDEV_TX_OK;
+}
+
+static void rcar_canfd_rx_pkt(struct rcar_canfd_channel *priv)
+{
+	struct net_device_stats *stats = &priv->ndev->stats;
+	struct canfd_frame *cf;
+	struct sk_buff *skb;
+	u32 sts = 0, id, dlc;
+	u32 ch = priv->channel;
+	u32 ridx = ch + RCANFD_RFFIFO_IDX;
+
+	if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+		id = rcar_canfd_read(priv->base, RCANFD_F_RFID(ridx));
+		dlc = rcar_canfd_read(priv->base, RCANFD_F_RFPTR(ridx));
+
+		sts = rcar_canfd_read(priv->base, RCANFD_F_RFFDSTS(ridx));
+		if (sts & RCANFD_RFFDSTS_RFFDF)
+			skb = alloc_canfd_skb(priv->ndev, &cf);
+		else
+			skb = alloc_can_skb(priv->ndev,
+					    (struct can_frame **)&cf);
+	} else {
+		id = rcar_canfd_read(priv->base, RCANFD_C_RFID(ridx));
+		dlc = rcar_canfd_read(priv->base, RCANFD_C_RFPTR(ridx));
+		skb = alloc_can_skb(priv->ndev, (struct can_frame **)&cf);
+	}
+
+	if (!skb) {
+		stats->rx_dropped++;
+		return;
+	}
+
+	if (id & RCANFD_RFID_RFIDE)
+		cf->can_id = (id & CAN_EFF_MASK) | CAN_EFF_FLAG;
+	else
+		cf->can_id = id & CAN_SFF_MASK;
+
+	if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+		if (sts & RCANFD_RFFDSTS_RFFDF)
+			cf->len = can_dlc2len(RCANFD_RFPTR_RFDLC(dlc));
+		else
+			cf->len = get_can_dlc(RCANFD_RFPTR_RFDLC(dlc));
+
+		if (sts & RCANFD_RFFDSTS_RFESI) {
+			cf->flags |= CANFD_ESI;
+			netdev_dbg(priv->ndev, "ESI Error\n");
+		}
+
+		if (!(sts & RCANFD_RFFDSTS_RFFDF) && (id & RCANFD_RFID_RFRTR)) {
+			cf->can_id |= CAN_RTR_FLAG;
+		} else {
+			if (sts & RCANFD_RFFDSTS_RFBRS)
+				cf->flags |= CANFD_BRS;
+
+			rcar_canfd_get_data(priv, cf, RCANFD_F_RFDF(ridx, 0));
+		}
+	} else {
+		cf->len = get_can_dlc(RCANFD_RFPTR_RFDLC(dlc));
+		if (id & RCANFD_RFID_RFRTR)
+			cf->can_id |= CAN_RTR_FLAG;
+		else
+			rcar_canfd_get_data(priv, cf, RCANFD_C_RFDF(ridx, 0));
+	}
+
+	/* Write 0xff to RFPC to increment the CPU-side
+	 * pointer of the Rx FIFO
+	 */
+	rcar_canfd_write(priv->base, RCANFD_RFPCTR(ridx), 0xff);
+
+	can_led_event(priv->ndev, CAN_LED_EVENT_RX);
+
+	stats->rx_bytes += cf->len;
+	stats->rx_packets++;
+	netif_receive_skb(skb);
+}
+
+static int rcar_canfd_rx_poll(struct napi_struct *napi, int quota)
+{
+	struct rcar_canfd_channel *priv =
+		container_of(napi, struct rcar_canfd_channel, napi);
+	int num_pkts;
+	u32 sts;
+	u32 ch = priv->channel;
+	u32 ridx = ch + RCANFD_RFFIFO_IDX;
+
+	for (num_pkts = 0; num_pkts < quota; num_pkts++) {
+		sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
+		/* Check FIFO empty condition */
+		if (sts & RCANFD_RFSTS_RFEMP)
+			break;
+
+		rcar_canfd_rx_pkt(priv);
+
+		/* Clear interrupt bit */
+		if (sts & RCANFD_RFSTS_RFIF)
+			rcar_canfd_write(priv->base, RCANFD_RFSTS(ridx),
+					 sts & ~RCANFD_RFSTS_RFIF);
+	}
+
+	/* All packets processed */
+	if (num_pkts < quota) {
+		napi_complete(napi);
+		/* Enable Rx FIFO interrupts */
+		rcar_canfd_set_bit(priv->base, RCANFD_RFCC(ridx),
+				   RCANFD_RFCC_RFIE);
+	}
+	return num_pkts;
+}
+
+static int rcar_canfd_do_set_mode(struct net_device *ndev, enum can_mode mode)
+{
+	int err;
+
+	switch (mode) {
+	case CAN_MODE_START:
+		err = rcar_canfd_start(ndev);
+		if (err)
+			return err;
+		netif_wake_queue(ndev);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int rcar_canfd_get_berr_counter(const struct net_device *dev,
+				       struct can_berr_counter *bec)
+{
+	struct rcar_canfd_channel *priv = netdev_priv(dev);
+	u32 val, ch = priv->channel;
+
+	/* Peripheral clock is already enabled in probe */
+	val = rcar_canfd_read(priv->base, RCANFD_CSTS(ch));
+	bec->txerr = RCANFD_CSTS_TECCNT(val);
+	bec->rxerr = RCANFD_CSTS_RECCNT(val);
+	return 0;
+}
+
+static const struct net_device_ops rcar_canfd_netdev_ops = {
+	.ndo_open = rcar_canfd_open,
+	.ndo_stop = rcar_canfd_close,
+	.ndo_start_xmit = rcar_canfd_start_xmit,
+	.ndo_change_mtu = can_change_mtu,
+};
+
+static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
+				    u32 fcan_freq)
+{
+	struct platform_device *pdev = gpriv->pdev;
+	struct rcar_canfd_channel *priv;
+	struct net_device *ndev;
+	int err = -ENODEV;
+
+	ndev = alloc_candev(sizeof(*priv), RCANFD_FIFO_DEPTH);
+	if (!ndev) {
+		dev_err(&pdev->dev, "alloc_candev() failed\n");
+		err = -ENOMEM;
+		goto fail;
+	}
+	priv = netdev_priv(ndev);
+
+	ndev->netdev_ops = &rcar_canfd_netdev_ops;
+	ndev->flags |= IFF_ECHO;
+	priv->ndev = ndev;
+	priv->base = gpriv->base;
+	priv->channel = ch;
+	priv->can.clock.freq = fcan_freq;
+	dev_info(&pdev->dev, "can_clk rate is %u\n", priv->can.clock.freq);
+
+	if (gpriv->fdmode) {
+		priv->can.bittiming_const = &rcar_canfd_nom_bittiming_const;
+		priv->can.data_bittiming_const =
+			&rcar_canfd_data_bittiming_const;
+
+		/* Controller starts in CAN FD only mode */
+		can_set_static_ctrlmode(ndev, CAN_CTRLMODE_FD);
+		priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING;
+	} else {
+		/* Controller starts in Classical CAN only mode */
+		priv->can.bittiming_const = &rcar_canfd_bittiming_const;
+		priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING;
+	}
+
+	priv->can.do_set_mode = rcar_canfd_do_set_mode;
+	priv->can.do_get_berr_counter = rcar_canfd_get_berr_counter;
+	priv->gpriv = gpriv;
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	netif_napi_add(ndev, &priv->napi, rcar_canfd_rx_poll,
+		       RCANFD_NAPI_WEIGHT);
+	err = register_candev(ndev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"register_candev() failed, error %d\n", err);
+		goto fail_candev;
+	}
+	spin_lock_init(&priv->tx_lock);
+	devm_can_led_init(ndev);
+	gpriv->ch[priv->channel] = priv;
+	dev_info(&pdev->dev, "device registered (channel %u)\n", priv->channel);
+	return 0;
+
+fail_candev:
+	netif_napi_del(&priv->napi);
+	free_candev(ndev);
+fail:
+	return err;
+}
+
+static void rcar_canfd_channel_remove(struct rcar_canfd_global *gpriv, u32 ch)
+{
+	struct rcar_canfd_channel *priv = gpriv->ch[ch];
+
+	if (priv) {
+		unregister_candev(priv->ndev);
+		netif_napi_del(&priv->napi);
+		free_candev(priv->ndev);
+	}
+}
+
+static int rcar_canfd_probe(struct platform_device *pdev)
+{
+	struct resource *mem;
+	void __iomem *addr;
+	u32 sts, ch, fcan_freq;
+	struct rcar_canfd_global *gpriv;
+	struct device_node *of_child;
+	unsigned long channels_mask = 0;
+	int err, ch_irq, g_irq;
+	bool fdmode = true;			/* CAN FD only mode - default */
+
+	if (of_property_read_bool(pdev->dev.of_node, "renesas,no-can-fd"))
+		fdmode = false;			/* Classical CAN only mode */
+
+	of_child = of_get_child_by_name(pdev->dev.of_node, "channel0");
+	if (of_child && of_device_is_available(of_child))
+		channels_mask |= BIT(0);	/* Channel 0 */
+
+	of_child = of_get_child_by_name(pdev->dev.of_node, "channel1");
+	if (of_child && of_device_is_available(of_child))
+		channels_mask |= BIT(1);	/* Channel 1 */
+
+	ch_irq = platform_get_irq(pdev, 0);
+	if (ch_irq < 0) {
+		dev_err(&pdev->dev, "no Channel IRQ resource\n");
+		err = ch_irq;
+		goto fail_dev;
+	}
+
+	g_irq = platform_get_irq(pdev, 1);
+	if (g_irq < 0) {
+		dev_err(&pdev->dev, "no Global IRQ resource\n");
+		err = g_irq;
+		goto fail_dev;
+	}
+
+	/* Global controller context */
+	gpriv = devm_kzalloc(&pdev->dev, sizeof(*gpriv), GFP_KERNEL);
+	if (!gpriv) {
+		err = -ENOMEM;
+		goto fail_dev;
+	}
+	gpriv->pdev = pdev;
+	gpriv->channels_mask = channels_mask;
+	gpriv->fdmode = fdmode;
+
+	/* Peripheral clock */
+	gpriv->clkp = devm_clk_get(&pdev->dev, "fck");
+	if (IS_ERR(gpriv->clkp)) {
+		err = PTR_ERR(gpriv->clkp);
+		dev_err(&pdev->dev, "cannot get peripheral clock, error %d\n",
+			err);
+		goto fail_dev;
+	}
+
+	/* fCAN clock: Pick External clock. If not available fallback to
+	 * CANFD clock
+	 */
+	gpriv->can_clk = devm_clk_get(&pdev->dev, "can_clk");
+	if (IS_ERR(gpriv->can_clk) || (clk_get_rate(gpriv->can_clk) == 0)) {
+		gpriv->can_clk = devm_clk_get(&pdev->dev, "canfd");
+		if (IS_ERR(gpriv->can_clk)) {
+			err = PTR_ERR(gpriv->can_clk);
+			dev_err(&pdev->dev,
+				"cannot get canfd clock, error %d\n", err);
+			goto fail_dev;
+		}
+		gpriv->fcan = RCANFD_CANFDCLK;
+
+	} else {
+		gpriv->fcan = RCANFD_EXTCLK;
+	}
+	fcan_freq = clk_get_rate(gpriv->can_clk);
+
+	if (gpriv->fcan == RCANFD_CANFDCLK)
+		/* CANFD clock is further divided by (1/2) within the IP */
+		fcan_freq /= 2;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(addr)) {
+		err = PTR_ERR(addr);
+		goto fail_dev;
+	}
+	gpriv->base = addr;
+
+	/* Request IRQ that's common for both channels */
+	err = devm_request_irq(&pdev->dev, ch_irq,
+			       rcar_canfd_channel_interrupt, 0,
+			       "canfd.chn", gpriv);
+	if (err) {
+		dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+			ch_irq, err);
+		goto fail_dev;
+	}
+	err = devm_request_irq(&pdev->dev, g_irq,
+			       rcar_canfd_global_interrupt, 0,
+			       "canfd.gbl", gpriv);
+	if (err) {
+		dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+			g_irq, err);
+		goto fail_dev;
+	}
+
+	/* Enable peripheral clock for register access */
+	err = clk_prepare_enable(gpriv->clkp);
+	if (err) {
+		dev_err(&pdev->dev,
+			"failed to enable peripheral clock, error %d\n", err);
+		goto fail_dev;
+	}
+
+	err = rcar_canfd_reset_controller(gpriv);
+	if (err) {
+		dev_err(&pdev->dev, "reset controller failed\n");
+		goto fail_clk;
+	}
+
+	/* Controller in Global reset & Channel reset mode */
+	rcar_canfd_configure_controller(gpriv);
+
+	/* Configure per channel attributes */
+	for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
+		/* Configure Channel's Rx fifo */
+		rcar_canfd_configure_rx(gpriv, ch);
+
+		/* Configure Channel's Tx (Common) fifo */
+		rcar_canfd_configure_tx(gpriv, ch);
+
+		/* Configure receive rules */
+		rcar_canfd_configure_afl_rules(gpriv, ch);
+	}
+
+	/* Configure common interrupts */
+	rcar_canfd_enable_global_interrupts(gpriv);
+
+	/* Start Global operation mode */
+	rcar_canfd_update_bit(gpriv->base, RCANFD_GCTR, RCANFD_GCTR_GMDC_MASK,
+			      RCANFD_GCTR_GMDC_GOPM);
+
+	/* Verify mode change */
+	err = readl_poll_timeout((gpriv->base + RCANFD_GSTS), sts,
+				 !(sts & RCANFD_GSTS_GNOPM), 2, 500000);
+	if (err) {
+		dev_err(&pdev->dev, "global operational mode failed\n");
+		goto fail_mode;
+	}
+
+	for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
+		err = rcar_canfd_channel_probe(gpriv, ch, fcan_freq);
+		if (err)
+			goto fail_channel;
+	}
+
+	platform_set_drvdata(pdev, gpriv);
+	dev_info(&pdev->dev, "global operational state (clk %d, fdmode %d)\n",
+		 gpriv->fcan, gpriv->fdmode);
+	return 0;
+
+fail_channel:
+	for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+		rcar_canfd_channel_remove(gpriv, ch);
+fail_mode:
+	rcar_canfd_disable_global_interrupts(gpriv);
+fail_clk:
+	clk_disable_unprepare(gpriv->clkp);
+fail_dev:
+	return err;
+}
+
+static int rcar_canfd_remove(struct platform_device *pdev)
+{
+	struct rcar_canfd_global *gpriv = platform_get_drvdata(pdev);
+	u32 ch;
+
+	rcar_canfd_reset_controller(gpriv);
+	rcar_canfd_disable_global_interrupts(gpriv);
+
+	for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
+		rcar_canfd_disable_channel_interrupts(gpriv->ch[ch]);
+		rcar_canfd_channel_remove(gpriv, ch);
+	}
+
+	/* Enter global sleep mode */
+	rcar_canfd_set_bit(gpriv->base, RCANFD_GCTR, RCANFD_GCTR_GSLPR);
+	clk_disable_unprepare(gpriv->clkp);
+	return 0;
+}
+
+static int __maybe_unused rcar_canfd_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int __maybe_unused rcar_canfd_resume(struct device *dev)
+{
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rcar_canfd_pm_ops, rcar_canfd_suspend,
+			 rcar_canfd_resume);
+
+static const struct of_device_id rcar_canfd_of_table[] = {
+	{ .compatible = "renesas,rcar-gen3-canfd" },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, rcar_canfd_of_table);
+
+static struct platform_driver rcar_canfd_driver = {
+	.driver = {
+		.name = RCANFD_DRV_NAME,
+		.of_match_table = of_match_ptr(rcar_canfd_of_table),
+		.pm = &rcar_canfd_pm_ops,
+	},
+	.probe = rcar_canfd_probe,
+	.remove = rcar_canfd_remove,
+};
+
+module_platform_driver(rcar_canfd_driver);
+
+MODULE_AUTHOR("Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CAN FD driver for Renesas R-Car SoC");
+MODULE_ALIAS("platform:" RCANFD_DRV_NAME);
diff --git a/drivers/net/can/sja1000/tscan1.c b/drivers/net/can/sja1000/tscan1.c
index 76513dd780c7..79572457a2d6 100644
--- a/drivers/net/can/sja1000/tscan1.c
+++ b/drivers/net/can/sja1000/tscan1.c
@@ -203,14 +203,4 @@ static struct isa_driver tscan1_isa_driver = {
 	},
 };
 
-static int __init tscan1_init(void)
-{
-	return isa_register_driver(&tscan1_isa_driver, TSCAN1_MAXDEV);
-}
-module_init(tscan1_init);
-
-static void __exit tscan1_exit(void)
-{
-	isa_unregister_driver(&tscan1_isa_driver);
-}
-module_exit(tscan1_exit);
+module_isa_driver(tscan1_isa_driver, TSCAN1_MAXDEV);
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index 9a3f15cb7ef4..eb7173713bbc 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -354,7 +354,7 @@ static netdev_tx_t slc_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct slcan *sl = netdev_priv(dev);
 
-	if (skb->len != sizeof(struct can_frame))
+	if (skb->len != CAN_MTU)
 		goto out;
 
 	spin_lock(&sl->lock);
@@ -442,7 +442,7 @@ static void slc_setup(struct net_device *dev)
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 10;
 
-	dev->mtu		= sizeof(struct can_frame);
+	dev->mtu		= CAN_MTU;
 	dev->type		= ARPHRD_CAN;
 
 	/* New-style flags. */
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index cf36d26ef002..f3f05fea8e1f 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -1145,8 +1145,11 @@ static int mcp251x_can_probe(struct spi_device *spi)
 
 	/* Here is OK to not lock the MCP, no one knows about it yet */
 	ret = mcp251x_hw_probe(spi);
-	if (ret)
+	if (ret) {
+		if (ret == -ENODEV)
+			dev_err(&spi->dev, "Cannot initialize MCP%x. Wrong wiring?\n", priv->model);
 		goto error_probe;
+	}
 
 	mcp251x_hw_sleep(spi);
 
@@ -1156,6 +1159,7 @@ static int mcp251x_can_probe(struct spi_device *spi)
 
 	devm_can_led_init(net);
 
+	netdev_info(net, "MCP%x successfully initialized.\n", priv->model);
 	return 0;
 
 error_probe:
@@ -1168,6 +1172,7 @@ out_clk:
 out_free:
 	free_candev(net);
 
+	dev_err(&spi->dev, "Probe failed, err=%d\n", -ret);
 	return ret;
 }
 
diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index bcb272f6c68a..8483a40e7e9e 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -16,7 +16,8 @@ config CAN_ESD_USB2
 config CAN_GS_USB
 	tristate "Geschwister Schneider UG interfaces"
 	---help---
-	  This driver supports the Geschwister Schneider USB/CAN devices.
+	  This driver supports the Geschwister Schneider and bytewerk.org
+	  candleLight USB CAN interfaces USB/CAN devices
 	  If unsure choose N,
 	  choose Y for built in support,
 	  M to compile as module (module will be named: gs_usb).
@@ -46,6 +47,8 @@ config CAN_KVASER_USB
 	    - Kvaser USBcan R
 	    - Kvaser Leaf Light v2
 	    - Kvaser Mini PCI Express HS
+	    - Kvaser Mini PCI Express 2xHS
+	    - Kvaser USBcan Light 2xHS
 	    - Kvaser USBcan II HS/HS
 	    - Kvaser USBcan II HS/LS
 	    - Kvaser USBcan Rugged ("USBcan Rev B")
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 1556d4286235..6f0cbc38782e 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -1,7 +1,9 @@
-/* CAN driver for Geschwister Schneider USB/CAN devices.
+/* CAN driver for Geschwister Schneider USB/CAN devices
+ * and bytewerk.org candleLight USB CAN interfaces.
  *
- * Copyright (C) 2013 Geschwister Schneider Technologie-,
+ * Copyright (C) 2013-2016 Geschwister Schneider Technologie-,
  * Entwicklungs- und Vertriebs UG (HaftungsbeschrÃ¤nkt).
+ * Copyright (C) 2016 Hubert Denkmair
  *
  * Many thanks to all socketcan devs!
  *
@@ -29,6 +31,9 @@
 #define USB_GSUSB_1_VENDOR_ID      0x1d50
 #define USB_GSUSB_1_PRODUCT_ID     0x606f
 
+#define USB_CANDLELIGHT_VENDOR_ID  0x1209
+#define USB_CANDLELIGHT_PRODUCT_ID 0x2323
+
 #define GSUSB_ENDPOINT_IN          1
 #define GSUSB_ENDPOINT_OUT         2
 
@@ -39,7 +44,9 @@ enum gs_usb_breq {
 	GS_USB_BREQ_MODE,
 	GS_USB_BREQ_BERR,
 	GS_USB_BREQ_BT_CONST,
-	GS_USB_BREQ_DEVICE_CONFIG
+	GS_USB_BREQ_DEVICE_CONFIG,
+	GS_USB_BREQ_TIMESTAMP,
+	GS_USB_BREQ_IDENTIFY,
 };
 
 enum gs_can_mode {
@@ -58,6 +65,11 @@ enum gs_can_state {
 	GS_CAN_STATE_SLEEPING
 };
 
+enum gs_can_identify_mode {
+	GS_CAN_IDENTIFY_OFF = 0,
+	GS_CAN_IDENTIFY_ON
+};
+
 /* data types passed between host and device */
 struct gs_host_config {
 	u32 byte_order;
@@ -77,10 +89,10 @@ struct gs_device_config {
 } __packed;
 
 #define GS_CAN_MODE_NORMAL               0
-#define GS_CAN_MODE_LISTEN_ONLY          (1<<0)
-#define GS_CAN_MODE_LOOP_BACK            (1<<1)
-#define GS_CAN_MODE_TRIPLE_SAMPLE        (1<<2)
-#define GS_CAN_MODE_ONE_SHOT             (1<<3)
+#define GS_CAN_MODE_LISTEN_ONLY          BIT(0)
+#define GS_CAN_MODE_LOOP_BACK            BIT(1)
+#define GS_CAN_MODE_TRIPLE_SAMPLE        BIT(2)
+#define GS_CAN_MODE_ONE_SHOT             BIT(3)
 
 struct gs_device_mode {
 	u32 mode;
@@ -101,10 +113,16 @@ struct gs_device_bittiming {
 	u32 brp;
 } __packed;
 
-#define GS_CAN_FEATURE_LISTEN_ONLY      (1<<0)
-#define GS_CAN_FEATURE_LOOP_BACK        (1<<1)
-#define GS_CAN_FEATURE_TRIPLE_SAMPLE    (1<<2)
-#define GS_CAN_FEATURE_ONE_SHOT         (1<<3)
+struct gs_identify_mode {
+	u32 mode;
+} __packed;
+
+#define GS_CAN_FEATURE_LISTEN_ONLY      BIT(0)
+#define GS_CAN_FEATURE_LOOP_BACK        BIT(1)
+#define GS_CAN_FEATURE_TRIPLE_SAMPLE    BIT(2)
+#define GS_CAN_FEATURE_ONE_SHOT         BIT(3)
+#define GS_CAN_FEATURE_HW_TIMESTAMP     BIT(4)
+#define GS_CAN_FEATURE_IDENTIFY         BIT(5)
 
 struct gs_device_bt_const {
 	u32 feature;
@@ -209,7 +227,8 @@ static void gs_free_tx_context(struct gs_tx_context *txc)
 
 /* Get a tx context by id.
  */
-static struct gs_tx_context *gs_get_tx_context(struct gs_can *dev, unsigned int id)
+static struct gs_tx_context *gs_get_tx_context(struct gs_can *dev,
+					       unsigned int id)
 {
 	unsigned long flags;
 
@@ -452,7 +471,8 @@ static void gs_usb_xmit_callback(struct urb *urb)
 		netif_wake_queue(netdev);
 }
 
-static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
+				     struct net_device *netdev)
 {
 	struct gs_can *dev = netdev_priv(netdev);
 	struct net_device_stats *stats = &dev->netdev->stats;
@@ -658,7 +678,8 @@ static int gs_can_open(struct net_device *netdev)
 	rc = usb_control_msg(interface_to_usbdev(dev->iface),
 			     usb_sndctrlpipe(interface_to_usbdev(dev->iface), 0),
 			     GS_USB_BREQ_MODE,
-			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_OUT | USB_TYPE_VENDOR |
+			     USB_RECIP_INTERFACE,
 			     dev->channel,
 			     0,
 			     dm,
@@ -721,7 +742,59 @@ static const struct net_device_ops gs_usb_netdev_ops = {
 	.ndo_change_mtu = can_change_mtu,
 };
 
-static struct gs_can *gs_make_candev(unsigned int channel, struct usb_interface *intf)
+static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
+{
+	struct gs_can *dev = netdev_priv(netdev);
+	struct gs_identify_mode imode;
+	int rc;
+
+	if (do_identify)
+		imode.mode = GS_CAN_IDENTIFY_ON;
+	else
+		imode.mode = GS_CAN_IDENTIFY_OFF;
+
+	rc = usb_control_msg(interface_to_usbdev(dev->iface),
+			     usb_sndctrlpipe(interface_to_usbdev(dev->iface),
+					     0),
+			     GS_USB_BREQ_IDENTIFY,
+			     USB_DIR_OUT | USB_TYPE_VENDOR |
+			     USB_RECIP_INTERFACE,
+			     dev->channel,
+			     0,
+			     &imode,
+			     sizeof(imode),
+			     100);
+
+	return (rc > 0) ? 0 : rc;
+}
+
+/* blink LED's for finding the this interface */
+static int gs_usb_set_phys_id(struct net_device *dev,
+			      enum ethtool_phys_id_state state)
+{
+	int rc = 0;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		rc = gs_usb_set_identify(dev, GS_CAN_IDENTIFY_ON);
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		rc = gs_usb_set_identify(dev, GS_CAN_IDENTIFY_OFF);
+		break;
+	default:
+		break;
+	}
+
+	return rc;
+}
+
+static const struct ethtool_ops gs_usb_ethtool_ops = {
+	.set_phys_id = gs_usb_set_phys_id,
+};
+
+static struct gs_can *gs_make_candev(unsigned int channel,
+				     struct usb_interface *intf,
+				     struct gs_device_config *dconf)
 {
 	struct gs_can *dev;
 	struct net_device *netdev;
@@ -809,10 +882,14 @@ static struct gs_can *gs_make_candev(unsigned int channel, struct usb_interface
 	if (bt_const->feature & GS_CAN_FEATURE_ONE_SHOT)
 		dev->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT;
 
-	kfree(bt_const);
-
 	SET_NETDEV_DEV(netdev, &intf->dev);
 
+	if (dconf->sw_version > 1)
+		if (bt_const->feature & GS_CAN_FEATURE_IDENTIFY)
+			netdev->ethtool_ops = &gs_usb_ethtool_ops;
+
+	kfree(bt_const);
+
 	rc = register_candev(dev->netdev);
 	if (rc) {
 		free_candev(dev->netdev);
@@ -830,19 +907,16 @@ static void gs_destroy_candev(struct gs_can *dev)
 	free_candev(dev->netdev);
 }
 
-static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
+static int gs_usb_probe(struct usb_interface *intf,
+			const struct usb_device_id *id)
 {
 	struct gs_usb *dev;
 	int rc = -ENOMEM;
 	unsigned int icount, i;
-	struct gs_host_config *hconf;
-	struct gs_device_config *dconf;
-
-	hconf = kmalloc(sizeof(*hconf), GFP_KERNEL);
-	if (!hconf)
-		return -ENOMEM;
-
-	hconf->byte_order = 0x0000beef;
+	struct gs_host_config hconf = {
+		.byte_order = 0x0000beef,
+	};
+	struct gs_device_config dconf;
 
 	/* send host config */
 	rc = usb_control_msg(interface_to_usbdev(intf),
@@ -851,22 +925,16 @@ static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *
 			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
-			     hconf,
-			     sizeof(*hconf),
+			     &hconf,
+			     sizeof(hconf),
 			     1000);
 
-	kfree(hconf);
-
 	if (rc < 0) {
 		dev_err(&intf->dev, "Couldn't send data format (err=%d)\n",
 			rc);
 		return rc;
 	}
 
-	dconf = kmalloc(sizeof(*dconf), GFP_KERNEL);
-	if (!dconf)
-		return -ENOMEM;
-
 	/* read device config */
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
@@ -874,22 +942,16 @@ static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *
 			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
-			     dconf,
-			     sizeof(*dconf),
+			     &dconf,
+			     sizeof(dconf),
 			     1000);
 	if (rc < 0) {
 		dev_err(&intf->dev, "Couldn't get device config: (err=%d)\n",
 			rc);
-
-		kfree(dconf);
-
 		return rc;
 	}
 
-	icount = dconf->icount+1;
-
-	kfree(dconf);
-
+	icount = dconf.icount + 1;
 	dev_info(&intf->dev, "Configuring for %d interfaces\n", icount);
 
 	if (icount > GS_MAX_INTF) {
@@ -910,7 +972,7 @@ static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *
 	dev->udev = interface_to_usbdev(intf);
 
 	for (i = 0; i < icount; i++) {
-		dev->canch[i] = gs_make_candev(i, intf);
+		dev->canch[i] = gs_make_candev(i, intf, &dconf);
 		if (IS_ERR_OR_NULL(dev->canch[i])) {
 			/* save error code to return later */
 			rc = PTR_ERR(dev->canch[i]);
@@ -952,6 +1014,8 @@ static void gs_usb_disconnect(struct usb_interface *intf)
 static const struct usb_device_id gs_usb_table[] = {
 	{ USB_DEVICE_INTERFACE_NUMBER(USB_GSUSB_1_VENDOR_ID,
 				      USB_GSUSB_1_PRODUCT_ID, 0) },
+	{ USB_DEVICE_INTERFACE_NUMBER(USB_CANDLELIGHT_VENDOR_ID,
+				      USB_CANDLELIGHT_PRODUCT_ID, 0) },
 	{} /* Terminating entry */
 };
 
@@ -969,5 +1033,6 @@ module_usb_driver(gs_usb_driver);
 MODULE_AUTHOR("Maximilian Schneider <mws@schneidersoft.net>");
 MODULE_DESCRIPTION(
 "Socket CAN device driver for Geschwister Schneider Technologie-, "
-"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces.");
+"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces\n"
+"and bytewerk.org candleLight USB CAN interfaces.");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 022bfa13ebfa..6f1f3b675ff5 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -59,11 +59,14 @@
 #define USB_CAN_R_PRODUCT_ID		39
 #define USB_LEAF_LITE_V2_PRODUCT_ID	288
 #define USB_MINI_PCIE_HS_PRODUCT_ID	289
+#define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID 290
+#define USB_USBCAN_LIGHT_2HS_PRODUCT_ID	291
+#define USB_MINI_PCIE_2HS_PRODUCT_ID	292
 
 static inline bool kvaser_is_leaf(const struct usb_device_id *id)
 {
 	return id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
-	       id->idProduct <= USB_MINI_PCIE_HS_PRODUCT_ID;
+	       id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID;
 }
 
 /* Kvaser USBCan-II devices */
@@ -537,6 +540,9 @@ static const struct usb_device_id kvaser_usb_table[] = {
 		.driver_info = KVASER_HAS_TXRX_ERRORS },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) },
 
 	/* USBCANII family IDs */
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 200663c43ce9..8f4544394f44 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -9,14 +9,6 @@ config NET_DSA_MV88E6060
 	  This enables support for the Marvell 88E6060 ethernet switch
 	  chip.
 
-config NET_DSA_MV88E6XXX
-	tristate "Marvell 88E6xxx Ethernet switch chip support"
-	depends on NET_DSA
-	select NET_DSA_TAG_EDSA
-	---help---
-	  This enables support for most of the Marvell 88E6xxx models of
-	  Ethernet switch chips, except 88E6060.
-
 config NET_DSA_BCM_SF2
 	tristate "Broadcom Starfighter 2 Ethernet switch support"
 	depends on HAS_IOMEM && NET_DSA
@@ -28,4 +20,8 @@ config NET_DSA_BCM_SF2
 	  This enables support for the Broadcom Starfighter 2 Ethernet
 	  switch chips.
 
+source "drivers/net/dsa/b53/Kconfig"
+
+source "drivers/net/dsa/mv88e6xxx/Kconfig"
+
 endmenu
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index 76b751dd9efd..ca1e71b853a6 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -1,3 +1,5 @@
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
-obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
 obj-$(CONFIG_NET_DSA_BCM_SF2)	+= bcm_sf2.o
+
+obj-y				+= b53/
+obj-y				+= mv88e6xxx/
diff --git a/drivers/net/dsa/b53/Kconfig b/drivers/net/dsa/b53/Kconfig
new file mode 100644
index 000000000000..27f32a50df57
--- /dev/null
+++ b/drivers/net/dsa/b53/Kconfig
@@ -0,0 +1,33 @@
+menuconfig B53
+	tristate "Broadcom BCM53xx managed switch support"
+	depends on NET_DSA
+	help
+	  This driver adds support for Broadcom managed switch chips. It supports
+	  BCM5325E, BCM5365, BCM539x, BCM53115 and BCM53125 as well as BCM63XX
+	  integrated switches.
+
+config B53_SPI_DRIVER
+	tristate "B53 SPI connected switch driver"
+	depends on B53 && SPI
+	help
+	  Select to enable support for registering switches configured through SPI.
+
+config B53_MDIO_DRIVER
+	tristate "B53 MDIO connected switch driver"
+	depends on B53
+	help
+	  Select to enable support for registering switches configured through MDIO.
+
+config B53_MMAP_DRIVER
+	tristate "B53 MMAP connected switch driver"
+	depends on B53 && HAS_IOMEM
+	help
+	  Select to enable support for memory-mapped switches like the BCM63XX
+	  integrated switches.
+
+config B53_SRAB_DRIVER
+	tristate "B53 SRAB connected switch driver"
+	depends on B53 && HAS_IOMEM
+	help
+	  Select to enable support for memory-mapped Switch Register Access
+	  Bridge Registers (SRAB) like it is found on the BCM53010
diff --git a/drivers/net/dsa/b53/Makefile b/drivers/net/dsa/b53/Makefile
new file mode 100644
index 000000000000..7e6f9a8bfd75
--- /dev/null
+++ b/drivers/net/dsa/b53/Makefile
@@ -0,0 +1,6 @@
+obj-$(CONFIG_B53)		+= b53_common.o
+
+obj-$(CONFIG_B53_SPI_DRIVER)	+= b53_spi.o
+obj-$(CONFIG_B53_MDIO_DRIVER)	+= b53_mdio.o
+obj-$(CONFIG_B53_MMAP_DRIVER)	+= b53_mmap.o
+obj-$(CONFIG_B53_SRAB_DRIVER)	+= b53_srab.o
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
new file mode 100644
index 000000000000..bda37d336736
--- /dev/null
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -0,0 +1,1799 @@
+/*
+ * B53 switch driver main logic
+ *
+ * Copyright (C) 2011-2013 Jonas Gorski <jogo@openwrt.org>
+ * Copyright (C) 2016 Florian Fainelli <f.fainelli@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/gpio.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_data/b53.h>
+#include <linux/phy.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
+#include <net/dsa.h>
+#include <net/switchdev.h>
+
+#include "b53_regs.h"
+#include "b53_priv.h"
+
+struct b53_mib_desc {
+	u8 size;
+	u8 offset;
+	const char *name;
+};
+
+/* BCM5365 MIB counters */
+static const struct b53_mib_desc b53_mibs_65[] = {
+	{ 8, 0x00, "TxOctets" },
+	{ 4, 0x08, "TxDropPkts" },
+	{ 4, 0x10, "TxBroadcastPkts" },
+	{ 4, 0x14, "TxMulticastPkts" },
+	{ 4, 0x18, "TxUnicastPkts" },
+	{ 4, 0x1c, "TxCollisions" },
+	{ 4, 0x20, "TxSingleCollision" },
+	{ 4, 0x24, "TxMultipleCollision" },
+	{ 4, 0x28, "TxDeferredTransmit" },
+	{ 4, 0x2c, "TxLateCollision" },
+	{ 4, 0x30, "TxExcessiveCollision" },
+	{ 4, 0x38, "TxPausePkts" },
+	{ 8, 0x44, "RxOctets" },
+	{ 4, 0x4c, "RxUndersizePkts" },
+	{ 4, 0x50, "RxPausePkts" },
+	{ 4, 0x54, "Pkts64Octets" },
+	{ 4, 0x58, "Pkts65to127Octets" },
+	{ 4, 0x5c, "Pkts128to255Octets" },
+	{ 4, 0x60, "Pkts256to511Octets" },
+	{ 4, 0x64, "Pkts512to1023Octets" },
+	{ 4, 0x68, "Pkts1024to1522Octets" },
+	{ 4, 0x6c, "RxOversizePkts" },
+	{ 4, 0x70, "RxJabbers" },
+	{ 4, 0x74, "RxAlignmentErrors" },
+	{ 4, 0x78, "RxFCSErrors" },
+	{ 8, 0x7c, "RxGoodOctets" },
+	{ 4, 0x84, "RxDropPkts" },
+	{ 4, 0x88, "RxUnicastPkts" },
+	{ 4, 0x8c, "RxMulticastPkts" },
+	{ 4, 0x90, "RxBroadcastPkts" },
+	{ 4, 0x94, "RxSAChanges" },
+	{ 4, 0x98, "RxFragments" },
+};
+
+#define B53_MIBS_65_SIZE	ARRAY_SIZE(b53_mibs_65)
+
+/* BCM63xx MIB counters */
+static const struct b53_mib_desc b53_mibs_63xx[] = {
+	{ 8, 0x00, "TxOctets" },
+	{ 4, 0x08, "TxDropPkts" },
+	{ 4, 0x0c, "TxQoSPkts" },
+	{ 4, 0x10, "TxBroadcastPkts" },
+	{ 4, 0x14, "TxMulticastPkts" },
+	{ 4, 0x18, "TxUnicastPkts" },
+	{ 4, 0x1c, "TxCollisions" },
+	{ 4, 0x20, "TxSingleCollision" },
+	{ 4, 0x24, "TxMultipleCollision" },
+	{ 4, 0x28, "TxDeferredTransmit" },
+	{ 4, 0x2c, "TxLateCollision" },
+	{ 4, 0x30, "TxExcessiveCollision" },
+	{ 4, 0x38, "TxPausePkts" },
+	{ 8, 0x3c, "TxQoSOctets" },
+	{ 8, 0x44, "RxOctets" },
+	{ 4, 0x4c, "RxUndersizePkts" },
+	{ 4, 0x50, "RxPausePkts" },
+	{ 4, 0x54, "Pkts64Octets" },
+	{ 4, 0x58, "Pkts65to127Octets" },
+	{ 4, 0x5c, "Pkts128to255Octets" },
+	{ 4, 0x60, "Pkts256to511Octets" },
+	{ 4, 0x64, "Pkts512to1023Octets" },
+	{ 4, 0x68, "Pkts1024to1522Octets" },
+	{ 4, 0x6c, "RxOversizePkts" },
+	{ 4, 0x70, "RxJabbers" },
+	{ 4, 0x74, "RxAlignmentErrors" },
+	{ 4, 0x78, "RxFCSErrors" },
+	{ 8, 0x7c, "RxGoodOctets" },
+	{ 4, 0x84, "RxDropPkts" },
+	{ 4, 0x88, "RxUnicastPkts" },
+	{ 4, 0x8c, "RxMulticastPkts" },
+	{ 4, 0x90, "RxBroadcastPkts" },
+	{ 4, 0x94, "RxSAChanges" },
+	{ 4, 0x98, "RxFragments" },
+	{ 4, 0xa0, "RxSymbolErrors" },
+	{ 4, 0xa4, "RxQoSPkts" },
+	{ 8, 0xa8, "RxQoSOctets" },
+	{ 4, 0xb0, "Pkts1523to2047Octets" },
+	{ 4, 0xb4, "Pkts2048to4095Octets" },
+	{ 4, 0xb8, "Pkts4096to8191Octets" },
+	{ 4, 0xbc, "Pkts8192to9728Octets" },
+	{ 4, 0xc0, "RxDiscarded" },
+};
+
+#define B53_MIBS_63XX_SIZE	ARRAY_SIZE(b53_mibs_63xx)
+
+/* MIB counters */
+static const struct b53_mib_desc b53_mibs[] = {
+	{ 8, 0x00, "TxOctets" },
+	{ 4, 0x08, "TxDropPkts" },
+	{ 4, 0x10, "TxBroadcastPkts" },
+	{ 4, 0x14, "TxMulticastPkts" },
+	{ 4, 0x18, "TxUnicastPkts" },
+	{ 4, 0x1c, "TxCollisions" },
+	{ 4, 0x20, "TxSingleCollision" },
+	{ 4, 0x24, "TxMultipleCollision" },
+	{ 4, 0x28, "TxDeferredTransmit" },
+	{ 4, 0x2c, "TxLateCollision" },
+	{ 4, 0x30, "TxExcessiveCollision" },
+	{ 4, 0x38, "TxPausePkts" },
+	{ 8, 0x50, "RxOctets" },
+	{ 4, 0x58, "RxUndersizePkts" },
+	{ 4, 0x5c, "RxPausePkts" },
+	{ 4, 0x60, "Pkts64Octets" },
+	{ 4, 0x64, "Pkts65to127Octets" },
+	{ 4, 0x68, "Pkts128to255Octets" },
+	{ 4, 0x6c, "Pkts256to511Octets" },
+	{ 4, 0x70, "Pkts512to1023Octets" },
+	{ 4, 0x74, "Pkts1024to1522Octets" },
+	{ 4, 0x78, "RxOversizePkts" },
+	{ 4, 0x7c, "RxJabbers" },
+	{ 4, 0x80, "RxAlignmentErrors" },
+	{ 4, 0x84, "RxFCSErrors" },
+	{ 8, 0x88, "RxGoodOctets" },
+	{ 4, 0x90, "RxDropPkts" },
+	{ 4, 0x94, "RxUnicastPkts" },
+	{ 4, 0x98, "RxMulticastPkts" },
+	{ 4, 0x9c, "RxBroadcastPkts" },
+	{ 4, 0xa0, "RxSAChanges" },
+	{ 4, 0xa4, "RxFragments" },
+	{ 4, 0xa8, "RxJumboPkts" },
+	{ 4, 0xac, "RxSymbolErrors" },
+	{ 4, 0xc0, "RxDiscarded" },
+};
+
+#define B53_MIBS_SIZE	ARRAY_SIZE(b53_mibs)
+
+static int b53_do_vlan_op(struct b53_device *dev, u8 op)
+{
+	unsigned int i;
+
+	b53_write8(dev, B53_ARLIO_PAGE, dev->vta_regs[0], VTA_START_CMD | op);
+
+	for (i = 0; i < 10; i++) {
+		u8 vta;
+
+		b53_read8(dev, B53_ARLIO_PAGE, dev->vta_regs[0], &vta);
+		if (!(vta & VTA_START_CMD))
+			return 0;
+
+		usleep_range(100, 200);
+	}
+
+	return -EIO;
+}
+
+static void b53_set_vlan_entry(struct b53_device *dev, u16 vid,
+			       struct b53_vlan *vlan)
+{
+	if (is5325(dev)) {
+		u32 entry = 0;
+
+		if (vlan->members) {
+			entry = ((vlan->untag & VA_UNTAG_MASK_25) <<
+				 VA_UNTAG_S_25) | vlan->members;
+			if (dev->core_rev >= 3)
+				entry |= VA_VALID_25_R4 | vid << VA_VID_HIGH_S;
+			else
+				entry |= VA_VALID_25;
+		}
+
+		b53_write32(dev, B53_VLAN_PAGE, B53_VLAN_WRITE_25, entry);
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_TABLE_ACCESS_25, vid |
+			    VTA_RW_STATE_WR | VTA_RW_OP_EN);
+	} else if (is5365(dev)) {
+		u16 entry = 0;
+
+		if (vlan->members)
+			entry = ((vlan->untag & VA_UNTAG_MASK_65) <<
+				 VA_UNTAG_S_65) | vlan->members | VA_VALID_65;
+
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_WRITE_65, entry);
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_TABLE_ACCESS_65, vid |
+			    VTA_RW_STATE_WR | VTA_RW_OP_EN);
+	} else {
+		b53_write16(dev, B53_ARLIO_PAGE, dev->vta_regs[1], vid);
+		b53_write32(dev, B53_ARLIO_PAGE, dev->vta_regs[2],
+			    (vlan->untag << VTE_UNTAG_S) | vlan->members);
+
+		b53_do_vlan_op(dev, VTA_CMD_WRITE);
+	}
+
+	dev_dbg(dev->ds->dev, "VID: %d, members: 0x%04x, untag: 0x%04x\n",
+		vid, vlan->members, vlan->untag);
+}
+
+static void b53_get_vlan_entry(struct b53_device *dev, u16 vid,
+			       struct b53_vlan *vlan)
+{
+	if (is5325(dev)) {
+		u32 entry = 0;
+
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_TABLE_ACCESS_25, vid |
+			    VTA_RW_STATE_RD | VTA_RW_OP_EN);
+		b53_read32(dev, B53_VLAN_PAGE, B53_VLAN_WRITE_25, &entry);
+
+		if (dev->core_rev >= 3)
+			vlan->valid = !!(entry & VA_VALID_25_R4);
+		else
+			vlan->valid = !!(entry & VA_VALID_25);
+		vlan->members = entry & VA_MEMBER_MASK;
+		vlan->untag = (entry >> VA_UNTAG_S_25) & VA_UNTAG_MASK_25;
+
+	} else if (is5365(dev)) {
+		u16 entry = 0;
+
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_TABLE_ACCESS_65, vid |
+			    VTA_RW_STATE_WR | VTA_RW_OP_EN);
+		b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_WRITE_65, &entry);
+
+		vlan->valid = !!(entry & VA_VALID_65);
+		vlan->members = entry & VA_MEMBER_MASK;
+		vlan->untag = (entry >> VA_UNTAG_S_65) & VA_UNTAG_MASK_65;
+	} else {
+		u32 entry = 0;
+
+		b53_write16(dev, B53_ARLIO_PAGE, dev->vta_regs[1], vid);
+		b53_do_vlan_op(dev, VTA_CMD_READ);
+		b53_read32(dev, B53_ARLIO_PAGE, dev->vta_regs[2], &entry);
+		vlan->members = entry & VTE_MEMBERS;
+		vlan->untag = (entry >> VTE_UNTAG_S) & VTE_MEMBERS;
+		vlan->valid = true;
+	}
+}
+
+static void b53_set_forwarding(struct b53_device *dev, int enable)
+{
+	u8 mgmt;
+
+	b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
+
+	if (enable)
+		mgmt |= SM_SW_FWD_EN;
+	else
+		mgmt &= ~SM_SW_FWD_EN;
+
+	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
+}
+
+static void b53_enable_vlan(struct b53_device *dev, bool enable)
+{
+	u8 mgmt, vc0, vc1, vc4 = 0, vc5;
+
+	b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
+	b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL0, &vc0);
+	b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL1, &vc1);
+
+	if (is5325(dev) || is5365(dev)) {
+		b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL4_25, &vc4);
+		b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5_25, &vc5);
+	} else if (is63xx(dev)) {
+		b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL4_63XX, &vc4);
+		b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5_63XX, &vc5);
+	} else {
+		b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL4, &vc4);
+		b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5, &vc5);
+	}
+
+	mgmt &= ~SM_SW_FWD_MODE;
+
+	if (enable) {
+		vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID;
+		vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN;
+		vc4 &= ~VC4_ING_VID_CHECK_MASK;
+		vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S;
+		vc5 |= VC5_DROP_VTABLE_MISS;
+
+		if (is5325(dev))
+			vc0 &= ~VC0_RESERVED_1;
+
+		if (is5325(dev) || is5365(dev))
+			vc1 |= VC1_RX_MCST_TAG_EN;
+
+	} else {
+		vc0 &= ~(VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID);
+		vc1 &= ~(VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN);
+		vc4 &= ~VC4_ING_VID_CHECK_MASK;
+		vc5 &= ~VC5_DROP_VTABLE_MISS;
+
+		if (is5325(dev) || is5365(dev))
+			vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S;
+		else
+			vc4 |= VC4_ING_VID_VIO_TO_IMP << VC4_ING_VID_CHECK_S;
+
+		if (is5325(dev) || is5365(dev))
+			vc1 &= ~VC1_RX_MCST_TAG_EN;
+	}
+
+	if (!is5325(dev) && !is5365(dev))
+		vc5 &= ~VC5_VID_FFF_EN;
+
+	b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL0, vc0);
+	b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL1, vc1);
+
+	if (is5325(dev) || is5365(dev)) {
+		/* enable the high 8 bit vid check on 5325 */
+		if (is5325(dev) && enable)
+			b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL3,
+				   VC3_HIGH_8BIT_EN);
+		else
+			b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL3, 0);
+
+		b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL4_25, vc4);
+		b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5_25, vc5);
+	} else if (is63xx(dev)) {
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_CTRL3_63XX, 0);
+		b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL4_63XX, vc4);
+		b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5_63XX, vc5);
+	} else {
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_CTRL3, 0);
+		b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL4, vc4);
+		b53_write8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5, vc5);
+	}
+
+	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
+}
+
+static int b53_set_jumbo(struct b53_device *dev, bool enable, bool allow_10_100)
+{
+	u32 port_mask = 0;
+	u16 max_size = JMS_MIN_SIZE;
+
+	if (is5325(dev) || is5365(dev))
+		return -EINVAL;
+
+	if (enable) {
+		port_mask = dev->enabled_ports;
+		max_size = JMS_MAX_SIZE;
+		if (allow_10_100)
+			port_mask |= JPM_10_100_JUMBO_EN;
+	}
+
+	b53_write32(dev, B53_JUMBO_PAGE, dev->jumbo_pm_reg, port_mask);
+	return b53_write16(dev, B53_JUMBO_PAGE, dev->jumbo_size_reg, max_size);
+}
+
+static int b53_flush_arl(struct b53_device *dev, u8 mask)
+{
+	unsigned int i;
+
+	b53_write8(dev, B53_CTRL_PAGE, B53_FAST_AGE_CTRL,
+		   FAST_AGE_DONE | FAST_AGE_DYNAMIC | mask);
+
+	for (i = 0; i < 10; i++) {
+		u8 fast_age_ctrl;
+
+		b53_read8(dev, B53_CTRL_PAGE, B53_FAST_AGE_CTRL,
+			  &fast_age_ctrl);
+
+		if (!(fast_age_ctrl & FAST_AGE_DONE))
+			goto out;
+
+		msleep(1);
+	}
+
+	return -ETIMEDOUT;
+out:
+	/* Only age dynamic entries (default behavior) */
+	b53_write8(dev, B53_CTRL_PAGE, B53_FAST_AGE_CTRL, FAST_AGE_DYNAMIC);
+	return 0;
+}
+
+static int b53_fast_age_port(struct b53_device *dev, int port)
+{
+	b53_write8(dev, B53_CTRL_PAGE, B53_FAST_AGE_PORT_CTRL, port);
+
+	return b53_flush_arl(dev, FAST_AGE_PORT);
+}
+
+static int b53_fast_age_vlan(struct b53_device *dev, u16 vid)
+{
+	b53_write16(dev, B53_CTRL_PAGE, B53_FAST_AGE_VID_CTRL, vid);
+
+	return b53_flush_arl(dev, FAST_AGE_VLAN);
+}
+
+static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	unsigned int i;
+	u16 pvlan;
+
+	/* Enable the IMP port to be in the same VLAN as the other ports
+	 * on a per-port basis such that we only have Port i and IMP in
+	 * the same VLAN.
+	 */
+	b53_for_each_port(dev, i) {
+		b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), &pvlan);
+		pvlan |= BIT(cpu_port);
+		b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), pvlan);
+	}
+}
+
+static int b53_enable_port(struct dsa_switch *ds, int port,
+			   struct phy_device *phy)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	unsigned int cpu_port = dev->cpu_port;
+	u16 pvlan;
+
+	/* Clear the Rx and Tx disable bits and set to no spanning tree */
+	b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), 0);
+
+	/* Set this port, and only this one to be in the default VLAN,
+	 * if member of a bridge, restore its membership prior to
+	 * bringing down this port.
+	 */
+	b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan);
+	pvlan &= ~0x1ff;
+	pvlan |= BIT(port);
+	pvlan |= dev->ports[port].vlan_ctl_mask;
+	b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan);
+
+	b53_imp_vlan_setup(ds, cpu_port);
+
+	return 0;
+}
+
+static void b53_disable_port(struct dsa_switch *ds, int port,
+			     struct phy_device *phy)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	u8 reg;
+
+	/* Disable Tx/Rx for the port */
+	b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), &reg);
+	reg |= PORT_CTRL_RX_DISABLE | PORT_CTRL_TX_DISABLE;
+	b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg);
+}
+
+static void b53_enable_cpu_port(struct b53_device *dev)
+{
+	unsigned int cpu_port = dev->cpu_port;
+	u8 port_ctrl;
+
+	/* BCM5325 CPU port is at 8 */
+	if ((is5325(dev) || is5365(dev)) && cpu_port == B53_CPU_PORT_25)
+		cpu_port = B53_CPU_PORT;
+
+	port_ctrl = PORT_CTRL_RX_BCST_EN |
+		    PORT_CTRL_RX_MCST_EN |
+		    PORT_CTRL_RX_UCST_EN;
+	b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(cpu_port), port_ctrl);
+}
+
+static void b53_enable_mib(struct b53_device *dev)
+{
+	u8 gc;
+
+	b53_read8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, &gc);
+	gc &= ~(GC_RESET_MIB | GC_MIB_AC_EN);
+	b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc);
+}
+
+static int b53_configure_vlan(struct b53_device *dev)
+{
+	struct b53_vlan vl = { 0 };
+	int i;
+
+	/* clear all vlan entries */
+	if (is5325(dev) || is5365(dev)) {
+		for (i = 1; i < dev->num_vlans; i++)
+			b53_set_vlan_entry(dev, i, &vl);
+	} else {
+		b53_do_vlan_op(dev, VTA_CMD_CLEAR);
+	}
+
+	b53_enable_vlan(dev, false);
+
+	b53_for_each_port(dev, i)
+		b53_write16(dev, B53_VLAN_PAGE,
+			    B53_VLAN_PORT_DEF_TAG(i), 1);
+
+	if (!is5325(dev) && !is5365(dev))
+		b53_set_jumbo(dev, dev->enable_jumbo, false);
+
+	return 0;
+}
+
+static void b53_switch_reset_gpio(struct b53_device *dev)
+{
+	int gpio = dev->reset_gpio;
+
+	if (gpio < 0)
+		return;
+
+	/* Reset sequence: RESET low(50ms)->high(20ms)
+	 */
+	gpio_set_value(gpio, 0);
+	mdelay(50);
+
+	gpio_set_value(gpio, 1);
+	mdelay(20);
+
+	dev->current_page = 0xff;
+}
+
+static int b53_switch_reset(struct b53_device *dev)
+{
+	u8 mgmt;
+
+	b53_switch_reset_gpio(dev);
+
+	if (is539x(dev)) {
+		b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, 0x83);
+		b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, 0x00);
+	}
+
+	b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
+
+	if (!(mgmt & SM_SW_FWD_EN)) {
+		mgmt &= ~SM_SW_FWD_MODE;
+		mgmt |= SM_SW_FWD_EN;
+
+		b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
+		b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
+
+		if (!(mgmt & SM_SW_FWD_EN)) {
+			dev_err(dev->dev, "Failed to enable switch!\n");
+			return -EINVAL;
+		}
+	}
+
+	b53_enable_mib(dev);
+
+	return b53_flush_arl(dev, FAST_AGE_STATIC);
+}
+
+static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg)
+{
+	struct b53_device *priv = ds_to_priv(ds);
+	u16 value = 0;
+	int ret;
+
+	if (priv->ops->phy_read16)
+		ret = priv->ops->phy_read16(priv, addr, reg, &value);
+	else
+		ret = b53_read16(priv, B53_PORT_MII_PAGE(addr),
+				 reg * 2, &value);
+
+	return ret ? ret : value;
+}
+
+static int b53_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
+{
+	struct b53_device *priv = ds_to_priv(ds);
+
+	if (priv->ops->phy_write16)
+		return priv->ops->phy_write16(priv, addr, reg, val);
+
+	return b53_write16(priv, B53_PORT_MII_PAGE(addr), reg * 2, val);
+}
+
+static int b53_reset_switch(struct b53_device *priv)
+{
+	/* reset vlans */
+	priv->enable_jumbo = false;
+
+	memset(priv->vlans, 0, sizeof(*priv->vlans) * priv->num_vlans);
+	memset(priv->ports, 0, sizeof(*priv->ports) * priv->num_ports);
+
+	return b53_switch_reset(priv);
+}
+
+static int b53_apply_config(struct b53_device *priv)
+{
+	/* disable switching */
+	b53_set_forwarding(priv, 0);
+
+	b53_configure_vlan(priv);
+
+	/* enable switching */
+	b53_set_forwarding(priv, 1);
+
+	return 0;
+}
+
+static void b53_reset_mib(struct b53_device *priv)
+{
+	u8 gc;
+
+	b53_read8(priv, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, &gc);
+
+	b53_write8(priv, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc | GC_RESET_MIB);
+	msleep(1);
+	b53_write8(priv, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc & ~GC_RESET_MIB);
+	msleep(1);
+}
+
+static const struct b53_mib_desc *b53_get_mib(struct b53_device *dev)
+{
+	if (is5365(dev))
+		return b53_mibs_65;
+	else if (is63xx(dev))
+		return b53_mibs_63xx;
+	else
+		return b53_mibs;
+}
+
+static unsigned int b53_get_mib_size(struct b53_device *dev)
+{
+	if (is5365(dev))
+		return B53_MIBS_65_SIZE;
+	else if (is63xx(dev))
+		return B53_MIBS_63XX_SIZE;
+	else
+		return B53_MIBS_SIZE;
+}
+
+static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	const struct b53_mib_desc *mibs = b53_get_mib(dev);
+	unsigned int mib_size = b53_get_mib_size(dev);
+	unsigned int i;
+
+	for (i = 0; i < mib_size; i++)
+		memcpy(data + i * ETH_GSTRING_LEN,
+		       mibs[i].name, ETH_GSTRING_LEN);
+}
+
+static void b53_get_ethtool_stats(struct dsa_switch *ds, int port,
+				  uint64_t *data)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	const struct b53_mib_desc *mibs = b53_get_mib(dev);
+	unsigned int mib_size = b53_get_mib_size(dev);
+	const struct b53_mib_desc *s;
+	unsigned int i;
+	u64 val = 0;
+
+	if (is5365(dev) && port == 5)
+		port = 8;
+
+	mutex_lock(&dev->stats_mutex);
+
+	for (i = 0; i < mib_size; i++) {
+		s = &mibs[i];
+
+		if (s->size == 8) {
+			b53_read64(dev, B53_MIB_PAGE(port), s->offset, &val);
+		} else {
+			u32 val32;
+
+			b53_read32(dev, B53_MIB_PAGE(port), s->offset,
+				   &val32);
+			val = val32;
+		}
+		data[i] = (u64)val;
+	}
+
+	mutex_unlock(&dev->stats_mutex);
+}
+
+static int b53_get_sset_count(struct dsa_switch *ds)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+
+	return b53_get_mib_size(dev);
+}
+
+static int b53_set_addr(struct dsa_switch *ds, u8 *addr)
+{
+	return 0;
+}
+
+static int b53_setup(struct dsa_switch *ds)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	unsigned int port;
+	int ret;
+
+	ret = b53_reset_switch(dev);
+	if (ret) {
+		dev_err(ds->dev, "failed to reset switch\n");
+		return ret;
+	}
+
+	b53_reset_mib(dev);
+
+	ret = b53_apply_config(dev);
+	if (ret)
+		dev_err(ds->dev, "failed to apply configuration\n");
+
+	for (port = 0; port < dev->num_ports; port++) {
+		if (BIT(port) & ds->enabled_port_mask)
+			b53_enable_port(ds, port, NULL);
+		else if (dsa_is_cpu_port(ds, port))
+			b53_enable_cpu_port(dev);
+		else
+			b53_disable_port(ds, port, NULL);
+	}
+
+	return ret;
+}
+
+static void b53_adjust_link(struct dsa_switch *ds, int port,
+			    struct phy_device *phydev)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	u8 rgmii_ctrl = 0, reg = 0, off;
+
+	if (!phy_is_pseudo_fixed_link(phydev))
+		return;
+
+	/* Override the port settings */
+	if (port == dev->cpu_port) {
+		off = B53_PORT_OVERRIDE_CTRL;
+		reg = PORT_OVERRIDE_EN;
+	} else {
+		off = B53_GMII_PORT_OVERRIDE_CTRL(port);
+		reg = GMII_PO_EN;
+	}
+
+	/* Set the link UP */
+	if (phydev->link)
+		reg |= PORT_OVERRIDE_LINK;
+
+	if (phydev->duplex == DUPLEX_FULL)
+		reg |= PORT_OVERRIDE_FULL_DUPLEX;
+
+	switch (phydev->speed) {
+	case 2000:
+		reg |= PORT_OVERRIDE_SPEED_2000M;
+		/* fallthrough */
+	case SPEED_1000:
+		reg |= PORT_OVERRIDE_SPEED_1000M;
+		break;
+	case SPEED_100:
+		reg |= PORT_OVERRIDE_SPEED_100M;
+		break;
+	case SPEED_10:
+		reg |= PORT_OVERRIDE_SPEED_10M;
+		break;
+	default:
+		dev_err(ds->dev, "unknown speed: %d\n", phydev->speed);
+		return;
+	}
+
+	/* Enable flow control on BCM5301x's CPU port */
+	if (is5301x(dev) && port == dev->cpu_port)
+		reg |= PORT_OVERRIDE_RX_FLOW | PORT_OVERRIDE_TX_FLOW;
+
+	if (phydev->pause) {
+		if (phydev->asym_pause)
+			reg |= PORT_OVERRIDE_TX_FLOW;
+		reg |= PORT_OVERRIDE_RX_FLOW;
+	}
+
+	b53_write8(dev, B53_CTRL_PAGE, off, reg);
+
+	if (is531x5(dev) && phy_interface_is_rgmii(phydev)) {
+		if (port == 8)
+			off = B53_RGMII_CTRL_IMP;
+		else
+			off = B53_RGMII_CTRL_P(port);
+
+		/* Configure the port RGMII clock delay by DLL disabled and
+		 * tx_clk aligned timing (restoring to reset defaults)
+		 */
+		b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl);
+		rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC |
+				RGMII_CTRL_TIMING_SEL);
+
+		/* PHY_INTERFACE_MODE_RGMII_TXID means TX internal delay, make
+		 * sure that we enable the port TX clock internal delay to
+		 * account for this internal delay that is inserted, otherwise
+		 * the switch won't be able to receive correctly.
+		 *
+		 * PHY_INTERFACE_MODE_RGMII means that we are not introducing
+		 * any delay neither on transmission nor reception, so the
+		 * BCM53125 must also be configured accordingly to account for
+		 * the lack of delay and introduce
+		 *
+		 * The BCM53125 switch has its RX clock and TX clock control
+		 * swapped, hence the reason why we modify the TX clock path in
+		 * the "RGMII" case
+		 */
+		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+			rgmii_ctrl |= RGMII_CTRL_DLL_TXC;
+		if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
+			rgmii_ctrl |= RGMII_CTRL_DLL_TXC | RGMII_CTRL_DLL_RXC;
+		rgmii_ctrl |= RGMII_CTRL_TIMING_SEL;
+		b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl);
+
+		dev_info(ds->dev, "Configured port %d for %s\n", port,
+			 phy_modes(phydev->interface));
+	}
+
+	/* configure MII port if necessary */
+	if (is5325(dev)) {
+		b53_read8(dev, B53_CTRL_PAGE, B53_PORT_OVERRIDE_CTRL,
+			  &reg);
+
+		/* reverse mii needs to be enabled */
+		if (!(reg & PORT_OVERRIDE_RV_MII_25)) {
+			b53_write8(dev, B53_CTRL_PAGE, B53_PORT_OVERRIDE_CTRL,
+				   reg | PORT_OVERRIDE_RV_MII_25);
+			b53_read8(dev, B53_CTRL_PAGE, B53_PORT_OVERRIDE_CTRL,
+				  &reg);
+
+			if (!(reg & PORT_OVERRIDE_RV_MII_25)) {
+				dev_err(ds->dev,
+					"Failed to enable reverse MII mode\n");
+				return;
+			}
+		}
+	} else if (is5301x(dev)) {
+		if (port != dev->cpu_port) {
+			u8 po_reg = B53_GMII_PORT_OVERRIDE_CTRL(dev->cpu_port);
+			u8 gmii_po;
+
+			b53_read8(dev, B53_CTRL_PAGE, po_reg, &gmii_po);
+			gmii_po |= GMII_PO_LINK |
+				   GMII_PO_RX_FLOW |
+				   GMII_PO_TX_FLOW |
+				   GMII_PO_EN |
+				   GMII_PO_SPEED_2000M;
+			b53_write8(dev, B53_CTRL_PAGE, po_reg, gmii_po);
+		}
+	}
+}
+
+static int b53_vlan_filtering(struct dsa_switch *ds, int port,
+			      bool vlan_filtering)
+{
+	return 0;
+}
+
+static int b53_vlan_prepare(struct dsa_switch *ds, int port,
+			    const struct switchdev_obj_port_vlan *vlan,
+			    struct switchdev_trans *trans)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+
+	if ((is5325(dev) || is5365(dev)) && vlan->vid_begin == 0)
+		return -EOPNOTSUPP;
+
+	if (vlan->vid_end > dev->num_vlans)
+		return -ERANGE;
+
+	b53_enable_vlan(dev, true);
+
+	return 0;
+}
+
+static void b53_vlan_add(struct dsa_switch *ds, int port,
+			 const struct switchdev_obj_port_vlan *vlan,
+			 struct switchdev_trans *trans)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+	bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+	unsigned int cpu_port = dev->cpu_port;
+	struct b53_vlan *vl;
+	u16 vid;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+		vl = &dev->vlans[vid];
+
+		b53_get_vlan_entry(dev, vid, vl);
+
+		vl->members |= BIT(port) | BIT(cpu_port);
+		if (untagged)
+			vl->untag |= BIT(port) | BIT(cpu_port);
+		else
+			vl->untag &= ~(BIT(port) | BIT(cpu_port));
+
+		b53_set_vlan_entry(dev, vid, vl);
+		b53_fast_age_vlan(dev, vid);
+	}
+
+	if (pvid) {
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port),
+			    vlan->vid_end);
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port),
+			    vlan->vid_end);
+		b53_fast_age_vlan(dev, vid);
+	}
+}
+
+static int b53_vlan_del(struct dsa_switch *ds, int port,
+			const struct switchdev_obj_port_vlan *vlan)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+	unsigned int cpu_port = dev->cpu_port;
+	struct b53_vlan *vl;
+	u16 vid;
+	u16 pvid;
+
+	b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid);
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+		vl = &dev->vlans[vid];
+
+		b53_get_vlan_entry(dev, vid, vl);
+
+		vl->members &= ~BIT(port);
+		if ((vl->members & BIT(cpu_port)) == BIT(cpu_port))
+			vl->members = 0;
+
+		if (pvid == vid) {
+			if (is5325(dev) || is5365(dev))
+				pvid = 1;
+			else
+				pvid = 0;
+		}
+
+		if (untagged) {
+			vl->untag &= ~(BIT(port));
+			if ((vl->untag & BIT(cpu_port)) == BIT(cpu_port))
+				vl->untag = 0;
+		}
+
+		b53_set_vlan_entry(dev, vid, vl);
+		b53_fast_age_vlan(dev, vid);
+	}
+
+	b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), pvid);
+	b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), pvid);
+	b53_fast_age_vlan(dev, pvid);
+
+	return 0;
+}
+
+static int b53_vlan_dump(struct dsa_switch *ds, int port,
+			 struct switchdev_obj_port_vlan *vlan,
+			 int (*cb)(struct switchdev_obj *obj))
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	u16 vid, vid_start = 0, pvid;
+	struct b53_vlan *vl;
+	int err = 0;
+
+	if (is5325(dev) || is5365(dev))
+		vid_start = 1;
+
+	b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid);
+
+	/* Use our software cache for dumps, since we do not have any HW
+	 * operation returning only the used/valid VLANs
+	 */
+	for (vid = vid_start; vid < dev->num_vlans; vid++) {
+		vl = &dev->vlans[vid];
+
+		if (!vl->valid)
+			continue;
+
+		if (!(vl->members & BIT(port)))
+			continue;
+
+		vlan->vid_begin = vlan->vid_end = vid;
+		vlan->flags = 0;
+
+		if (vl->untag & BIT(port))
+			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+		if (pvid == vid)
+			vlan->flags |= BRIDGE_VLAN_INFO_PVID;
+
+		err = cb(&vlan->obj);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/* Address Resolution Logic routines */
+static int b53_arl_op_wait(struct b53_device *dev)
+{
+	unsigned int timeout = 10;
+	u8 reg;
+
+	do {
+		b53_read8(dev, B53_ARLIO_PAGE, B53_ARLTBL_RW_CTRL, &reg);
+		if (!(reg & ARLTBL_START_DONE))
+			return 0;
+
+		usleep_range(1000, 2000);
+	} while (timeout--);
+
+	dev_warn(dev->dev, "timeout waiting for ARL to finish: 0x%02x\n", reg);
+
+	return -ETIMEDOUT;
+}
+
+static int b53_arl_rw_op(struct b53_device *dev, unsigned int op)
+{
+	u8 reg;
+
+	if (op > ARLTBL_RW)
+		return -EINVAL;
+
+	b53_read8(dev, B53_ARLIO_PAGE, B53_ARLTBL_RW_CTRL, &reg);
+	reg |= ARLTBL_START_DONE;
+	if (op)
+		reg |= ARLTBL_RW;
+	else
+		reg &= ~ARLTBL_RW;
+	b53_write8(dev, B53_ARLIO_PAGE, B53_ARLTBL_RW_CTRL, reg);
+
+	return b53_arl_op_wait(dev);
+}
+
+static int b53_arl_read(struct b53_device *dev, u64 mac,
+			u16 vid, struct b53_arl_entry *ent, u8 *idx,
+			bool is_valid)
+{
+	unsigned int i;
+	int ret;
+
+	ret = b53_arl_op_wait(dev);
+	if (ret)
+		return ret;
+
+	/* Read the bins */
+	for (i = 0; i < dev->num_arl_entries; i++) {
+		u64 mac_vid;
+		u32 fwd_entry;
+
+		b53_read64(dev, B53_ARLIO_PAGE,
+			   B53_ARLTBL_MAC_VID_ENTRY(i), &mac_vid);
+		b53_read32(dev, B53_ARLIO_PAGE,
+			   B53_ARLTBL_DATA_ENTRY(i), &fwd_entry);
+		b53_arl_to_entry(ent, mac_vid, fwd_entry);
+
+		if (!(fwd_entry & ARLTBL_VALID))
+			continue;
+		if ((mac_vid & ARLTBL_MAC_MASK) != mac)
+			continue;
+		*idx = i;
+	}
+
+	return -ENOENT;
+}
+
+static int b53_arl_op(struct b53_device *dev, int op, int port,
+		      const unsigned char *addr, u16 vid, bool is_valid)
+{
+	struct b53_arl_entry ent;
+	u32 fwd_entry;
+	u64 mac, mac_vid = 0;
+	u8 idx = 0;
+	int ret;
+
+	/* Convert the array into a 64-bit MAC */
+	mac = b53_mac_to_u64(addr);
+
+	/* Perform a read for the given MAC and VID */
+	b53_write48(dev, B53_ARLIO_PAGE, B53_MAC_ADDR_IDX, mac);
+	b53_write16(dev, B53_ARLIO_PAGE, B53_VLAN_ID_IDX, vid);
+
+	/* Issue a read operation for this MAC */
+	ret = b53_arl_rw_op(dev, 1);
+	if (ret)
+		return ret;
+
+	ret = b53_arl_read(dev, mac, vid, &ent, &idx, is_valid);
+	/* If this is a read, just finish now */
+	if (op)
+		return ret;
+
+	/* We could not find a matching MAC, so reset to a new entry */
+	if (ret) {
+		fwd_entry = 0;
+		idx = 1;
+	}
+
+	memset(&ent, 0, sizeof(ent));
+	ent.port = port;
+	ent.is_valid = is_valid;
+	ent.vid = vid;
+	ent.is_static = true;
+	memcpy(ent.mac, addr, ETH_ALEN);
+	b53_arl_from_entry(&mac_vid, &fwd_entry, &ent);
+
+	b53_write64(dev, B53_ARLIO_PAGE,
+		    B53_ARLTBL_MAC_VID_ENTRY(idx), mac_vid);
+	b53_write32(dev, B53_ARLIO_PAGE,
+		    B53_ARLTBL_DATA_ENTRY(idx), fwd_entry);
+
+	return b53_arl_rw_op(dev, 0);
+}
+
+static int b53_fdb_prepare(struct dsa_switch *ds, int port,
+			   const struct switchdev_obj_port_fdb *fdb,
+			   struct switchdev_trans *trans)
+{
+	struct b53_device *priv = ds_to_priv(ds);
+
+	/* 5325 and 5365 require some more massaging, but could
+	 * be supported eventually
+	 */
+	if (is5325(priv) || is5365(priv))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static void b53_fdb_add(struct dsa_switch *ds, int port,
+			const struct switchdev_obj_port_fdb *fdb,
+			struct switchdev_trans *trans)
+{
+	struct b53_device *priv = ds_to_priv(ds);
+
+	if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
+		pr_err("%s: failed to add MAC address\n", __func__);
+}
+
+static int b53_fdb_del(struct dsa_switch *ds, int port,
+		       const struct switchdev_obj_port_fdb *fdb)
+{
+	struct b53_device *priv = ds_to_priv(ds);
+
+	return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
+}
+
+static int b53_arl_search_wait(struct b53_device *dev)
+{
+	unsigned int timeout = 1000;
+	u8 reg;
+
+	do {
+		b53_read8(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_CTL, &reg);
+		if (!(reg & ARL_SRCH_STDN))
+			return 0;
+
+		if (reg & ARL_SRCH_VLID)
+			return 0;
+
+		usleep_range(1000, 2000);
+	} while (timeout--);
+
+	return -ETIMEDOUT;
+}
+
+static void b53_arl_search_rd(struct b53_device *dev, u8 idx,
+			      struct b53_arl_entry *ent)
+{
+	u64 mac_vid;
+	u32 fwd_entry;
+
+	b53_read64(dev, B53_ARLIO_PAGE,
+		   B53_ARL_SRCH_RSTL_MACVID(idx), &mac_vid);
+	b53_read32(dev, B53_ARLIO_PAGE,
+		   B53_ARL_SRCH_RSTL(idx), &fwd_entry);
+	b53_arl_to_entry(ent, mac_vid, fwd_entry);
+}
+
+static int b53_fdb_copy(struct net_device *dev, int port,
+			const struct b53_arl_entry *ent,
+			struct switchdev_obj_port_fdb *fdb,
+			int (*cb)(struct switchdev_obj *obj))
+{
+	if (!ent->is_valid)
+		return 0;
+
+	if (port != ent->port)
+		return 0;
+
+	ether_addr_copy(fdb->addr, ent->mac);
+	fdb->vid = ent->vid;
+	fdb->ndm_state = ent->is_static ? NUD_NOARP : NUD_REACHABLE;
+
+	return cb(&fdb->obj);
+}
+
+static int b53_fdb_dump(struct dsa_switch *ds, int port,
+			struct switchdev_obj_port_fdb *fdb,
+			int (*cb)(struct switchdev_obj *obj))
+{
+	struct b53_device *priv = ds_to_priv(ds);
+	struct net_device *dev = ds->ports[port].netdev;
+	struct b53_arl_entry results[2];
+	unsigned int count = 0;
+	int ret;
+	u8 reg;
+
+	/* Start search operation */
+	reg = ARL_SRCH_STDN;
+	b53_write8(priv, B53_ARLIO_PAGE, B53_ARL_SRCH_CTL, reg);
+
+	do {
+		ret = b53_arl_search_wait(priv);
+		if (ret)
+			return ret;
+
+		b53_arl_search_rd(priv, 0, &results[0]);
+		ret = b53_fdb_copy(dev, port, &results[0], fdb, cb);
+		if (ret)
+			return ret;
+
+		if (priv->num_arl_entries > 2) {
+			b53_arl_search_rd(priv, 1, &results[1]);
+			ret = b53_fdb_copy(dev, port, &results[1], fdb, cb);
+			if (ret)
+				return ret;
+
+			if (!results[0].is_valid && !results[1].is_valid)
+				break;
+		}
+
+	} while (count++ < 1024);
+
+	return 0;
+}
+
+static int b53_br_join(struct dsa_switch *ds, int port,
+		       struct net_device *bridge)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	u16 pvlan, reg;
+	unsigned int i;
+
+	dev->ports[port].bridge_dev = bridge;
+	b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan);
+
+	b53_for_each_port(dev, i) {
+		if (dev->ports[i].bridge_dev != bridge)
+			continue;
+
+		/* Add this local port to the remote port VLAN control
+		 * membership and update the remote port bitmask
+		 */
+		b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), &reg);
+		reg |= BIT(port);
+		b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), reg);
+		dev->ports[i].vlan_ctl_mask = reg;
+
+		pvlan |= BIT(i);
+	}
+
+	/* Configure the local port VLAN control membership to include
+	 * remote ports and update the local port bitmask
+	 */
+	b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan);
+	dev->ports[port].vlan_ctl_mask = pvlan;
+
+	return 0;
+}
+
+static void b53_br_leave(struct dsa_switch *ds, int port)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	struct net_device *bridge = dev->ports[port].bridge_dev;
+	struct b53_vlan *vl = &dev->vlans[0];
+	unsigned int i;
+	u16 pvlan, reg, pvid;
+
+	b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan);
+
+	b53_for_each_port(dev, i) {
+		/* Don't touch the remaining ports */
+		if (dev->ports[i].bridge_dev != bridge)
+			continue;
+
+		b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), &reg);
+		reg &= ~BIT(port);
+		b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), reg);
+		dev->ports[port].vlan_ctl_mask = reg;
+
+		/* Prevent self removal to preserve isolation */
+		if (port != i)
+			pvlan &= ~BIT(i);
+	}
+
+	b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan);
+	dev->ports[port].vlan_ctl_mask = pvlan;
+	dev->ports[port].bridge_dev = NULL;
+
+	if (is5325(dev) || is5365(dev))
+		pvid = 1;
+	else
+		pvid = 0;
+
+	b53_get_vlan_entry(dev, pvid, vl);
+	vl->members |= BIT(port) | BIT(dev->cpu_port);
+	vl->untag |= BIT(port) | BIT(dev->cpu_port);
+	b53_set_vlan_entry(dev, pvid, vl);
+}
+
+static void b53_br_set_stp_state(struct dsa_switch *ds, int port,
+				 u8 state)
+{
+	struct b53_device *dev = ds_to_priv(ds);
+	u8 hw_state, cur_hw_state;
+	u8 reg;
+
+	b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), &reg);
+	cur_hw_state = reg & PORT_CTRL_STP_STATE_MASK;
+
+	switch (state) {
+	case BR_STATE_DISABLED:
+		hw_state = PORT_CTRL_DIS_STATE;
+		break;
+	case BR_STATE_LISTENING:
+		hw_state = PORT_CTRL_LISTEN_STATE;
+		break;
+	case BR_STATE_LEARNING:
+		hw_state = PORT_CTRL_LEARN_STATE;
+		break;
+	case BR_STATE_FORWARDING:
+		hw_state = PORT_CTRL_FWD_STATE;
+		break;
+	case BR_STATE_BLOCKING:
+		hw_state = PORT_CTRL_BLOCK_STATE;
+		break;
+	default:
+		dev_err(ds->dev, "invalid STP state: %d\n", state);
+		return;
+	}
+
+	/* Fast-age ARL entries if we are moving a port from Learning or
+	 * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
+	 * state (hw_state)
+	 */
+	if (cur_hw_state != hw_state) {
+		if (cur_hw_state >= PORT_CTRL_LEARN_STATE &&
+		    hw_state <= PORT_CTRL_LISTEN_STATE) {
+			if (b53_fast_age_port(dev, port)) {
+				dev_err(ds->dev, "fast ageing failed\n");
+				return;
+			}
+		}
+	}
+
+	b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), &reg);
+	reg &= ~PORT_CTRL_STP_STATE_MASK;
+	reg |= hw_state;
+	b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg);
+}
+
+static struct dsa_switch_driver b53_switch_ops = {
+	.tag_protocol		= DSA_TAG_PROTO_NONE,
+	.setup			= b53_setup,
+	.set_addr		= b53_set_addr,
+	.get_strings		= b53_get_strings,
+	.get_ethtool_stats	= b53_get_ethtool_stats,
+	.get_sset_count		= b53_get_sset_count,
+	.phy_read		= b53_phy_read16,
+	.phy_write		= b53_phy_write16,
+	.adjust_link		= b53_adjust_link,
+	.port_enable		= b53_enable_port,
+	.port_disable		= b53_disable_port,
+	.port_bridge_join	= b53_br_join,
+	.port_bridge_leave	= b53_br_leave,
+	.port_stp_state_set	= b53_br_set_stp_state,
+	.port_vlan_filtering	= b53_vlan_filtering,
+	.port_vlan_prepare	= b53_vlan_prepare,
+	.port_vlan_add		= b53_vlan_add,
+	.port_vlan_del		= b53_vlan_del,
+	.port_vlan_dump		= b53_vlan_dump,
+	.port_fdb_prepare	= b53_fdb_prepare,
+	.port_fdb_dump		= b53_fdb_dump,
+	.port_fdb_add		= b53_fdb_add,
+	.port_fdb_del		= b53_fdb_del,
+};
+
+struct b53_chip_data {
+	u32 chip_id;
+	const char *dev_name;
+	u16 vlans;
+	u16 enabled_ports;
+	u8 cpu_port;
+	u8 vta_regs[3];
+	u8 arl_entries;
+	u8 duplex_reg;
+	u8 jumbo_pm_reg;
+	u8 jumbo_size_reg;
+};
+
+#define B53_VTA_REGS	\
+	{ B53_VT_ACCESS, B53_VT_INDEX, B53_VT_ENTRY }
+#define B53_VTA_REGS_9798 \
+	{ B53_VT_ACCESS_9798, B53_VT_INDEX_9798, B53_VT_ENTRY_9798 }
+#define B53_VTA_REGS_63XX \
+	{ B53_VT_ACCESS_63XX, B53_VT_INDEX_63XX, B53_VT_ENTRY_63XX }
+
+static const struct b53_chip_data b53_switch_chips[] = {
+	{
+		.chip_id = BCM5325_DEVICE_ID,
+		.dev_name = "BCM5325",
+		.vlans = 16,
+		.enabled_ports = 0x1f,
+		.arl_entries = 2,
+		.cpu_port = B53_CPU_PORT_25,
+		.duplex_reg = B53_DUPLEX_STAT_FE,
+	},
+	{
+		.chip_id = BCM5365_DEVICE_ID,
+		.dev_name = "BCM5365",
+		.vlans = 256,
+		.enabled_ports = 0x1f,
+		.arl_entries = 2,
+		.cpu_port = B53_CPU_PORT_25,
+		.duplex_reg = B53_DUPLEX_STAT_FE,
+	},
+	{
+		.chip_id = BCM5395_DEVICE_ID,
+		.dev_name = "BCM5395",
+		.vlans = 4096,
+		.enabled_ports = 0x1f,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM5397_DEVICE_ID,
+		.dev_name = "BCM5397",
+		.vlans = 4096,
+		.enabled_ports = 0x1f,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS_9798,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM5398_DEVICE_ID,
+		.dev_name = "BCM5398",
+		.vlans = 4096,
+		.enabled_ports = 0x7f,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS_9798,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM53115_DEVICE_ID,
+		.dev_name = "BCM53115",
+		.vlans = 4096,
+		.enabled_ports = 0x1f,
+		.arl_entries = 4,
+		.vta_regs = B53_VTA_REGS,
+		.cpu_port = B53_CPU_PORT,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM53125_DEVICE_ID,
+		.dev_name = "BCM53125",
+		.vlans = 4096,
+		.enabled_ports = 0xff,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM53128_DEVICE_ID,
+		.dev_name = "BCM53128",
+		.vlans = 4096,
+		.enabled_ports = 0x1ff,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM63XX_DEVICE_ID,
+		.dev_name = "BCM63xx",
+		.vlans = 4096,
+		.enabled_ports = 0, /* pdata must provide them */
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS_63XX,
+		.duplex_reg = B53_DUPLEX_STAT_63XX,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK_63XX,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE_63XX,
+	},
+	{
+		.chip_id = BCM53010_DEVICE_ID,
+		.dev_name = "BCM53010",
+		.vlans = 4096,
+		.enabled_ports = 0x1f,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM53011_DEVICE_ID,
+		.dev_name = "BCM53011",
+		.vlans = 4096,
+		.enabled_ports = 0x1bf,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM53012_DEVICE_ID,
+		.dev_name = "BCM53012",
+		.vlans = 4096,
+		.enabled_ports = 0x1bf,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM53018_DEVICE_ID,
+		.dev_name = "BCM53018",
+		.vlans = 4096,
+		.enabled_ports = 0x1f,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM53019_DEVICE_ID,
+		.dev_name = "BCM53019",
+		.vlans = 4096,
+		.enabled_ports = 0x1f,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
+		.chip_id = BCM58XX_DEVICE_ID,
+		.dev_name = "BCM585xx/586xx/88312",
+		.vlans	= 4096,
+		.enabled_ports = 0x1ff,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT_25,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+};
+
+static int b53_switch_init(struct b53_device *dev)
+{
+	struct dsa_switch *ds = dev->ds;
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(b53_switch_chips); i++) {
+		const struct b53_chip_data *chip = &b53_switch_chips[i];
+
+		if (chip->chip_id == dev->chip_id) {
+			if (!dev->enabled_ports)
+				dev->enabled_ports = chip->enabled_ports;
+			dev->name = chip->dev_name;
+			dev->duplex_reg = chip->duplex_reg;
+			dev->vta_regs[0] = chip->vta_regs[0];
+			dev->vta_regs[1] = chip->vta_regs[1];
+			dev->vta_regs[2] = chip->vta_regs[2];
+			dev->jumbo_pm_reg = chip->jumbo_pm_reg;
+			ds->drv = &b53_switch_ops;
+			dev->cpu_port = chip->cpu_port;
+			dev->num_vlans = chip->vlans;
+			dev->num_arl_entries = chip->arl_entries;
+			break;
+		}
+	}
+
+	/* check which BCM5325x version we have */
+	if (is5325(dev)) {
+		u8 vc4;
+
+		b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL4_25, &vc4);
+
+		/* check reserved bits */
+		switch (vc4 & 3) {
+		case 1:
+			/* BCM5325E */
+			break;
+		case 3:
+			/* BCM5325F - do not use port 4 */
+			dev->enabled_ports &= ~BIT(4);
+			break;
+		default:
+/* On the BCM47XX SoCs this is the supported internal switch.*/
+#ifndef CONFIG_BCM47XX
+			/* BCM5325M */
+			return -EINVAL;
+#else
+			break;
+#endif
+		}
+	} else if (dev->chip_id == BCM53115_DEVICE_ID) {
+		u64 strap_value;
+
+		b53_read48(dev, B53_STAT_PAGE, B53_STRAP_VALUE, &strap_value);
+		/* use second IMP port if GMII is enabled */
+		if (strap_value & SV_GMII_CTRL_115)
+			dev->cpu_port = 5;
+	}
+
+	/* cpu port is always last */
+	dev->num_ports = dev->cpu_port + 1;
+	dev->enabled_ports |= BIT(dev->cpu_port);
+
+	dev->ports = devm_kzalloc(dev->dev,
+				  sizeof(struct b53_port) * dev->num_ports,
+				  GFP_KERNEL);
+	if (!dev->ports)
+		return -ENOMEM;
+
+	dev->vlans = devm_kzalloc(dev->dev,
+				  sizeof(struct b53_vlan) * dev->num_vlans,
+				  GFP_KERNEL);
+	if (!dev->vlans)
+		return -ENOMEM;
+
+	dev->reset_gpio = b53_switch_get_reset_gpio(dev);
+	if (dev->reset_gpio >= 0) {
+		ret = devm_gpio_request_one(dev->dev, dev->reset_gpio,
+					    GPIOF_OUT_INIT_HIGH, "robo_reset");
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
+				    void *priv)
+{
+	struct dsa_switch *ds;
+	struct b53_device *dev;
+
+	ds = devm_kzalloc(base, sizeof(*ds) + sizeof(*dev), GFP_KERNEL);
+	if (!ds)
+		return NULL;
+
+	dev = (struct b53_device *)(ds + 1);
+
+	ds->priv = dev;
+	ds->dev = base;
+	dev->dev = base;
+
+	dev->ds = ds;
+	dev->priv = priv;
+	dev->ops = ops;
+	mutex_init(&dev->reg_mutex);
+	mutex_init(&dev->stats_mutex);
+
+	return dev;
+}
+EXPORT_SYMBOL(b53_switch_alloc);
+
+int b53_switch_detect(struct b53_device *dev)
+{
+	u32 id32;
+	u16 tmp;
+	u8 id8;
+	int ret;
+
+	ret = b53_read8(dev, B53_MGMT_PAGE, B53_DEVICE_ID, &id8);
+	if (ret)
+		return ret;
+
+	switch (id8) {
+	case 0:
+		/* BCM5325 and BCM5365 do not have this register so reads
+		 * return 0. But the read operation did succeed, so assume this
+		 * is one of them.
+		 *
+		 * Next check if we can write to the 5325's VTA register; for
+		 * 5365 it is read only.
+		 */
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_TABLE_ACCESS_25, 0xf);
+		b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_TABLE_ACCESS_25, &tmp);
+
+		if (tmp == 0xf)
+			dev->chip_id = BCM5325_DEVICE_ID;
+		else
+			dev->chip_id = BCM5365_DEVICE_ID;
+		break;
+	case BCM5395_DEVICE_ID:
+	case BCM5397_DEVICE_ID:
+	case BCM5398_DEVICE_ID:
+		dev->chip_id = id8;
+		break;
+	default:
+		ret = b53_read32(dev, B53_MGMT_PAGE, B53_DEVICE_ID, &id32);
+		if (ret)
+			return ret;
+
+		switch (id32) {
+		case BCM53115_DEVICE_ID:
+		case BCM53125_DEVICE_ID:
+		case BCM53128_DEVICE_ID:
+		case BCM53010_DEVICE_ID:
+		case BCM53011_DEVICE_ID:
+		case BCM53012_DEVICE_ID:
+		case BCM53018_DEVICE_ID:
+		case BCM53019_DEVICE_ID:
+			dev->chip_id = id32;
+			break;
+		default:
+			pr_err("unsupported switch detected (BCM53%02x/BCM%x)\n",
+			       id8, id32);
+			return -ENODEV;
+		}
+	}
+
+	if (dev->chip_id == BCM5325_DEVICE_ID)
+		return b53_read8(dev, B53_STAT_PAGE, B53_REV_ID_25,
+				 &dev->core_rev);
+	else
+		return b53_read8(dev, B53_MGMT_PAGE, B53_REV_ID,
+				 &dev->core_rev);
+}
+EXPORT_SYMBOL(b53_switch_detect);
+
+int b53_switch_register(struct b53_device *dev)
+{
+	int ret;
+
+	if (dev->pdata) {
+		dev->chip_id = dev->pdata->chip_id;
+		dev->enabled_ports = dev->pdata->enabled_ports;
+	}
+
+	if (!dev->chip_id && b53_switch_detect(dev))
+		return -EINVAL;
+
+	ret = b53_switch_init(dev);
+	if (ret)
+		return ret;
+
+	pr_info("found switch: %s, rev %i\n", dev->name, dev->core_rev);
+
+	return dsa_register_switch(dev->ds, dev->ds->dev->of_node);
+}
+EXPORT_SYMBOL(b53_switch_register);
+
+MODULE_AUTHOR("Jonas Gorski <jogo@openwrt.org>");
+MODULE_DESCRIPTION("B53 switch library");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
new file mode 100644
index 000000000000..aa87c3fffdac
--- /dev/null
+++ b/drivers/net/dsa/b53/b53_mdio.c
@@ -0,0 +1,392 @@
+/*
+ * B53 register access through MII registers
+ *
+ * Copyright (C) 2011-2013 Jonas Gorski <jogo@openwrt.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/phy.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/brcmphy.h>
+#include <linux/rtnetlink.h>
+#include <net/dsa.h>
+
+#include "b53_priv.h"
+
+/* MII registers */
+#define REG_MII_PAGE    0x10    /* MII Page register */
+#define REG_MII_ADDR    0x11    /* MII Address register */
+#define REG_MII_DATA0   0x18    /* MII Data register 0 */
+#define REG_MII_DATA1   0x19    /* MII Data register 1 */
+#define REG_MII_DATA2   0x1a    /* MII Data register 2 */
+#define REG_MII_DATA3   0x1b    /* MII Data register 3 */
+
+#define REG_MII_PAGE_ENABLE     BIT(0)
+#define REG_MII_ADDR_WRITE      BIT(0)
+#define REG_MII_ADDR_READ       BIT(1)
+
+static int b53_mdio_op(struct b53_device *dev, u8 page, u8 reg, u16 op)
+{
+	int i;
+	u16 v;
+	int ret;
+	struct mii_bus *bus = dev->priv;
+
+	if (dev->current_page != page) {
+		/* set page number */
+		v = (page << 8) | REG_MII_PAGE_ENABLE;
+		ret = mdiobus_write_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+					   REG_MII_PAGE, v);
+		if (ret)
+			return ret;
+		dev->current_page = page;
+	}
+
+	/* set register address */
+	v = (reg << 8) | op;
+	ret = mdiobus_write_nested(bus, BRCM_PSEUDO_PHY_ADDR, REG_MII_ADDR, v);
+	if (ret)
+		return ret;
+
+	/* check if operation completed */
+	for (i = 0; i < 5; ++i) {
+		v = mdiobus_read_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+					REG_MII_ADDR);
+		if (!(v & (REG_MII_ADDR_WRITE | REG_MII_ADDR_READ)))
+			break;
+		usleep_range(10, 100);
+	}
+
+	if (WARN_ON(i == 5))
+		return -EIO;
+
+	return 0;
+}
+
+static int b53_mdio_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
+{
+	struct mii_bus *bus = dev->priv;
+	int ret;
+
+	ret = b53_mdio_op(dev, page, reg, REG_MII_ADDR_READ);
+	if (ret)
+		return ret;
+
+	*val = mdiobus_read_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+				   REG_MII_DATA0) & 0xff;
+
+	return 0;
+}
+
+static int b53_mdio_read16(struct b53_device *dev, u8 page, u8 reg, u16 *val)
+{
+	struct mii_bus *bus = dev->priv;
+	int ret;
+
+	ret = b53_mdio_op(dev, page, reg, REG_MII_ADDR_READ);
+	if (ret)
+		return ret;
+
+	*val = mdiobus_read_nested(bus, BRCM_PSEUDO_PHY_ADDR, REG_MII_DATA0);
+
+	return 0;
+}
+
+static int b53_mdio_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val)
+{
+	struct mii_bus *bus = dev->priv;
+	int ret;
+
+	ret = b53_mdio_op(dev, page, reg, REG_MII_ADDR_READ);
+	if (ret)
+		return ret;
+
+	*val = mdiobus_read_nested(bus, BRCM_PSEUDO_PHY_ADDR, REG_MII_DATA0);
+	*val |= mdiobus_read_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+				    REG_MII_DATA1) << 16;
+
+	return 0;
+}
+
+static int b53_mdio_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	struct mii_bus *bus = dev->priv;
+	u64 temp = 0;
+	int i;
+	int ret;
+
+	ret = b53_mdio_op(dev, page, reg, REG_MII_ADDR_READ);
+	if (ret)
+		return ret;
+
+	for (i = 2; i >= 0; i--) {
+		temp <<= 16;
+		temp |= mdiobus_read_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+				     REG_MII_DATA0 + i);
+	}
+
+	*val = temp;
+
+	return 0;
+}
+
+static int b53_mdio_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	struct mii_bus *bus = dev->priv;
+	u64 temp = 0;
+	int i;
+	int ret;
+
+	ret = b53_mdio_op(dev, page, reg, REG_MII_ADDR_READ);
+	if (ret)
+		return ret;
+
+	for (i = 3; i >= 0; i--) {
+		temp <<= 16;
+		temp |= mdiobus_read_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+					    REG_MII_DATA0 + i);
+	}
+
+	*val = temp;
+
+	return 0;
+}
+
+static int b53_mdio_write8(struct b53_device *dev, u8 page, u8 reg, u8 value)
+{
+	struct mii_bus *bus = dev->priv;
+	int ret;
+
+	ret = mdiobus_write_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+				   REG_MII_DATA0, value);
+	if (ret)
+		return ret;
+
+	return b53_mdio_op(dev, page, reg, REG_MII_ADDR_WRITE);
+}
+
+static int b53_mdio_write16(struct b53_device *dev, u8 page, u8 reg,
+			    u16 value)
+{
+	struct mii_bus *bus = dev->priv;
+	int ret;
+
+	ret = mdiobus_write_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+				   REG_MII_DATA0, value);
+	if (ret)
+		return ret;
+
+	return b53_mdio_op(dev, page, reg, REG_MII_ADDR_WRITE);
+}
+
+static int b53_mdio_write32(struct b53_device *dev, u8 page, u8 reg,
+			    u32 value)
+{
+	struct mii_bus *bus = dev->priv;
+	unsigned int i;
+	u32 temp = value;
+
+	for (i = 0; i < 2; i++) {
+		int ret = mdiobus_write_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+					       REG_MII_DATA0 + i,
+					       temp & 0xffff);
+		if (ret)
+			return ret;
+		temp >>= 16;
+	}
+
+	return b53_mdio_op(dev, page, reg, REG_MII_ADDR_WRITE);
+}
+
+static int b53_mdio_write48(struct b53_device *dev, u8 page, u8 reg,
+			    u64 value)
+{
+	struct mii_bus *bus = dev->priv;
+	unsigned int i;
+	u64 temp = value;
+
+	for (i = 0; i < 3; i++) {
+		int ret = mdiobus_write_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+					       REG_MII_DATA0 + i,
+					       temp & 0xffff);
+		if (ret)
+			return ret;
+		temp >>= 16;
+	}
+
+	return b53_mdio_op(dev, page, reg, REG_MII_ADDR_WRITE);
+}
+
+static int b53_mdio_write64(struct b53_device *dev, u8 page, u8 reg,
+			    u64 value)
+{
+	struct mii_bus *bus = dev->priv;
+	unsigned int i;
+	u64 temp = value;
+
+	for (i = 0; i < 4; i++) {
+		int ret = mdiobus_write_nested(bus, BRCM_PSEUDO_PHY_ADDR,
+					       REG_MII_DATA0 + i,
+					       temp & 0xffff);
+		if (ret)
+			return ret;
+		temp >>= 16;
+	}
+
+	return b53_mdio_op(dev, page, reg, REG_MII_ADDR_WRITE);
+}
+
+static int b53_mdio_phy_read16(struct b53_device *dev, int addr, int reg,
+			       u16 *value)
+{
+	struct mii_bus *bus = dev->priv;
+
+	*value = mdiobus_read_nested(bus, addr, reg);
+
+	return 0;
+}
+
+static int b53_mdio_phy_write16(struct b53_device *dev, int addr, int reg,
+				u16 value)
+{
+	struct mii_bus *bus = dev->bus;
+
+	return mdiobus_write_nested(bus, addr, reg, value);
+}
+
+static struct b53_io_ops b53_mdio_ops = {
+	.read8 = b53_mdio_read8,
+	.read16 = b53_mdio_read16,
+	.read32 = b53_mdio_read32,
+	.read48 = b53_mdio_read48,
+	.read64 = b53_mdio_read64,
+	.write8 = b53_mdio_write8,
+	.write16 = b53_mdio_write16,
+	.write32 = b53_mdio_write32,
+	.write48 = b53_mdio_write48,
+	.write64 = b53_mdio_write64,
+	.phy_read16 = b53_mdio_phy_read16,
+	.phy_write16 = b53_mdio_phy_write16,
+};
+
+#define B53_BRCM_OUI_1	0x0143bc00
+#define B53_BRCM_OUI_2	0x03625c00
+#define B53_BRCM_OUI_3	0x00406000
+
+static int b53_mdio_probe(struct mdio_device *mdiodev)
+{
+	struct b53_device *dev;
+	u32 phy_id;
+	int ret;
+
+	/* allow the generic PHY driver to take over the non-management MDIO
+	 * addresses
+	 */
+	if (mdiodev->addr != BRCM_PSEUDO_PHY_ADDR && mdiodev->addr != 0) {
+		dev_err(&mdiodev->dev, "leaving address %d to PHY\n",
+			mdiodev->addr);
+		return -ENODEV;
+	}
+
+	/* read the first port's id */
+	phy_id = mdiobus_read(mdiodev->bus, 0, 2) << 16;
+	phy_id |= mdiobus_read(mdiodev->bus, 0, 3);
+
+	/* BCM5325, BCM539x (OUI_1)
+	 * BCM53125, BCM53128 (OUI_2)
+	 * BCM5365 (OUI_3)
+	 */
+	if ((phy_id & 0xfffffc00) != B53_BRCM_OUI_1 &&
+	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_2 &&
+	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_3) {
+		dev_err(&mdiodev->dev, "Unsupported device: 0x%08x\n", phy_id);
+		return -ENODEV;
+	}
+
+	/* First probe will come from SWITCH_MDIO controller on the 7445D0
+	 * switch, which will conflict with the 7445 integrated switch
+	 * pseudo-phy (we end-up programming both). In that case, we return
+	 * -EPROBE_DEFER for the first time we get here, and wait until we come
+	 * back with the slave MDIO bus which has the correct indirection
+	 * layer setup
+	 */
+	if (of_machine_is_compatible("brcm,bcm7445d0") &&
+	    strcmp(mdiodev->bus->name, "sf2 slave mii"))
+		return -EPROBE_DEFER;
+
+	dev = b53_switch_alloc(&mdiodev->dev, &b53_mdio_ops, mdiodev->bus);
+	if (!dev)
+		return -ENOMEM;
+
+	/* we don't use page 0xff, so force a page set */
+	dev->current_page = 0xff;
+	dev->bus = mdiodev->bus;
+
+	dev_set_drvdata(&mdiodev->dev, dev);
+
+	ret = b53_switch_register(dev);
+	if (ret) {
+		dev_err(&mdiodev->dev, "failed to register switch: %i\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void b53_mdio_remove(struct mdio_device *mdiodev)
+{
+	struct b53_device *dev = dev_get_drvdata(&mdiodev->dev);
+	struct dsa_switch *ds = dev->ds;
+
+	dsa_unregister_switch(ds);
+}
+
+static const struct of_device_id b53_of_match[] = {
+	{ .compatible = "brcm,bcm5325" },
+	{ .compatible = "brcm,bcm53115" },
+	{ .compatible = "brcm,bcm53125" },
+	{ .compatible = "brcm,bcm53128" },
+	{ .compatible = "brcm,bcm5365" },
+	{ .compatible = "brcm,bcm5395" },
+	{ .compatible = "brcm,bcm5397" },
+	{ .compatible = "brcm,bcm5398" },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, b53_of_match);
+
+static struct mdio_driver b53_mdio_driver = {
+	.probe	= b53_mdio_probe,
+	.remove	= b53_mdio_remove,
+	.mdiodrv.driver = {
+		.name = "bcm53xx",
+		.of_match_table = b53_of_match,
+	},
+};
+
+static int __init b53_mdio_driver_register(void)
+{
+	return mdio_driver_register(&b53_mdio_driver);
+}
+module_init(b53_mdio_driver_register);
+
+static void __exit b53_mdio_driver_unregister(void)
+{
+	mdio_driver_unregister(&b53_mdio_driver);
+}
+module_exit(b53_mdio_driver_unregister);
+
+MODULE_DESCRIPTION("B53 MDIO access driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
new file mode 100644
index 000000000000..21f1068b0804
--- /dev/null
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -0,0 +1,274 @@
+/*
+ * B53 register access through memory mapped registers
+ *
+ * Copyright (C) 2012-2013 Jonas Gorski <jogo@openwrt.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kconfig.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/b53.h>
+
+#include "b53_priv.h"
+
+struct b53_mmap_priv {
+	void __iomem *regs;
+};
+
+static int b53_mmap_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
+{
+	u8 __iomem *regs = dev->priv;
+
+	*val = readb(regs + (page << 8) + reg);
+
+	return 0;
+}
+
+static int b53_mmap_read16(struct b53_device *dev, u8 page, u8 reg, u16 *val)
+{
+	u8 __iomem *regs = dev->priv;
+
+	if (WARN_ON(reg % 2))
+		return -EINVAL;
+
+	if (dev->pdata && dev->pdata->big_endian)
+		*val = ioread16be(regs + (page << 8) + reg);
+	else
+		*val = readw(regs + (page << 8) + reg);
+
+	return 0;
+}
+
+static int b53_mmap_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val)
+{
+	u8 __iomem *regs = dev->priv;
+
+	if (WARN_ON(reg % 4))
+		return -EINVAL;
+
+	if (dev->pdata && dev->pdata->big_endian)
+		*val = ioread32be(regs + (page << 8) + reg);
+	else
+		*val = readl(regs + (page << 8) + reg);
+
+	return 0;
+}
+
+static int b53_mmap_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	u8 __iomem *regs = dev->priv;
+
+	if (WARN_ON(reg % 2))
+		return -EINVAL;
+
+	if (reg % 4) {
+		u16 lo;
+		u32 hi;
+
+		if (dev->pdata && dev->pdata->big_endian) {
+			lo = ioread16be(regs + (page << 8) + reg);
+			hi = ioread32be(regs + (page << 8) + reg + 2);
+		} else {
+			lo = readw(regs + (page << 8) + reg);
+			hi = readl(regs + (page << 8) + reg + 2);
+		}
+
+		*val = ((u64)hi << 16) | lo;
+	} else {
+		u32 lo;
+		u16 hi;
+
+		if (dev->pdata && dev->pdata->big_endian) {
+			lo = ioread32be(regs + (page << 8) + reg);
+			hi = ioread16be(regs + (page << 8) + reg + 4);
+		} else {
+			lo = readl(regs + (page << 8) + reg);
+			hi = readw(regs + (page << 8) + reg + 4);
+		}
+
+		*val = ((u64)hi << 32) | lo;
+	}
+
+	return 0;
+}
+
+static int b53_mmap_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	u8 __iomem *regs = dev->priv;
+	u32 hi, lo;
+
+	if (WARN_ON(reg % 4))
+		return -EINVAL;
+
+	if (dev->pdata && dev->pdata->big_endian) {
+		lo = ioread32be(regs + (page << 8) + reg);
+		hi = ioread32be(regs + (page << 8) + reg + 4);
+	} else {
+		lo = readl(regs + (page << 8) + reg);
+		hi = readl(regs + (page << 8) + reg + 4);
+	}
+
+	*val = ((u64)hi << 32) | lo;
+
+	return 0;
+}
+
+static int b53_mmap_write8(struct b53_device *dev, u8 page, u8 reg, u8 value)
+{
+	u8 __iomem *regs = dev->priv;
+
+	writeb(value, regs + (page << 8) + reg);
+
+	return 0;
+}
+
+static int b53_mmap_write16(struct b53_device *dev, u8 page, u8 reg,
+			    u16 value)
+{
+	u8 __iomem *regs = dev->priv;
+
+	if (WARN_ON(reg % 2))
+		return -EINVAL;
+
+	if (dev->pdata && dev->pdata->big_endian)
+		iowrite16be(value, regs + (page << 8) + reg);
+	else
+		writew(value, regs + (page << 8) + reg);
+
+	return 0;
+}
+
+static int b53_mmap_write32(struct b53_device *dev, u8 page, u8 reg,
+			    u32 value)
+{
+	u8 __iomem *regs = dev->priv;
+
+	if (WARN_ON(reg % 4))
+		return -EINVAL;
+
+	if (dev->pdata && dev->pdata->big_endian)
+		iowrite32be(value, regs + (page << 8) + reg);
+	else
+		writel(value, regs + (page << 8) + reg);
+
+	return 0;
+}
+
+static int b53_mmap_write48(struct b53_device *dev, u8 page, u8 reg,
+			    u64 value)
+{
+	if (WARN_ON(reg % 2))
+		return -EINVAL;
+
+	if (reg % 4) {
+		u32 hi = (u32)(value >> 16);
+		u16 lo = (u16)value;
+
+		b53_mmap_write16(dev, page, reg, lo);
+		b53_mmap_write32(dev, page, reg + 2, hi);
+	} else {
+		u16 hi = (u16)(value >> 32);
+		u32 lo = (u32)value;
+
+		b53_mmap_write32(dev, page, reg, lo);
+		b53_mmap_write16(dev, page, reg + 4, hi);
+	}
+
+	return 0;
+}
+
+static int b53_mmap_write64(struct b53_device *dev, u8 page, u8 reg,
+			    u64 value)
+{
+	u32 hi, lo;
+
+	hi = upper_32_bits(value);
+	lo = lower_32_bits(value);
+
+	if (WARN_ON(reg % 4))
+		return -EINVAL;
+
+	b53_mmap_write32(dev, page, reg, lo);
+	b53_mmap_write32(dev, page, reg + 4, hi);
+
+	return 0;
+}
+
+static struct b53_io_ops b53_mmap_ops = {
+	.read8 = b53_mmap_read8,
+	.read16 = b53_mmap_read16,
+	.read32 = b53_mmap_read32,
+	.read48 = b53_mmap_read48,
+	.read64 = b53_mmap_read64,
+	.write8 = b53_mmap_write8,
+	.write16 = b53_mmap_write16,
+	.write32 = b53_mmap_write32,
+	.write48 = b53_mmap_write48,
+	.write64 = b53_mmap_write64,
+};
+
+static int b53_mmap_probe(struct platform_device *pdev)
+{
+	struct b53_platform_data *pdata = pdev->dev.platform_data;
+	struct b53_device *dev;
+
+	if (!pdata)
+		return -EINVAL;
+
+	dev = b53_switch_alloc(&pdev->dev, &b53_mmap_ops, pdata->regs);
+	if (!dev)
+		return -ENOMEM;
+
+	if (pdata)
+		dev->pdata = pdata;
+
+	platform_set_drvdata(pdev, dev);
+
+	return b53_switch_register(dev);
+}
+
+static int b53_mmap_remove(struct platform_device *pdev)
+{
+	struct b53_device *dev = platform_get_drvdata(pdev);
+
+	if (dev)
+		b53_switch_remove(dev);
+
+	return 0;
+}
+
+static const struct of_device_id b53_mmap_of_table[] = {
+	{ .compatible = "brcm,bcm3384-switch" },
+	{ .compatible = "brcm,bcm6328-switch" },
+	{ .compatible = "brcm,bcm6368-switch" },
+	{ .compatible = "brcm,bcm63xx-switch" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver b53_mmap_driver = {
+	.probe = b53_mmap_probe,
+	.remove = b53_mmap_remove,
+	.driver = {
+		.name = "b53-switch",
+		.of_match_table = b53_mmap_of_table,
+	},
+};
+
+module_platform_driver(b53_mmap_driver);
+MODULE_AUTHOR("Jonas Gorski <jogo@openwrt.org>");
+MODULE_DESCRIPTION("B53 MMAP access driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
new file mode 100644
index 000000000000..835a744f206e
--- /dev/null
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -0,0 +1,388 @@
+/*
+ * B53 common definitions
+ *
+ * Copyright (C) 2011-2013 Jonas Gorski <jogo@openwrt.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __B53_PRIV_H
+#define __B53_PRIV_H
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/phy.h>
+#include <net/dsa.h>
+
+#include "b53_regs.h"
+
+struct b53_device;
+struct net_device;
+
+struct b53_io_ops {
+	int (*read8)(struct b53_device *dev, u8 page, u8 reg, u8 *value);
+	int (*read16)(struct b53_device *dev, u8 page, u8 reg, u16 *value);
+	int (*read32)(struct b53_device *dev, u8 page, u8 reg, u32 *value);
+	int (*read48)(struct b53_device *dev, u8 page, u8 reg, u64 *value);
+	int (*read64)(struct b53_device *dev, u8 page, u8 reg, u64 *value);
+	int (*write8)(struct b53_device *dev, u8 page, u8 reg, u8 value);
+	int (*write16)(struct b53_device *dev, u8 page, u8 reg, u16 value);
+	int (*write32)(struct b53_device *dev, u8 page, u8 reg, u32 value);
+	int (*write48)(struct b53_device *dev, u8 page, u8 reg, u64 value);
+	int (*write64)(struct b53_device *dev, u8 page, u8 reg, u64 value);
+	int (*phy_read16)(struct b53_device *dev, int addr, int reg, u16 *value);
+	int (*phy_write16)(struct b53_device *dev, int addr, int reg, u16 value);
+};
+
+enum {
+	BCM5325_DEVICE_ID = 0x25,
+	BCM5365_DEVICE_ID = 0x65,
+	BCM5395_DEVICE_ID = 0x95,
+	BCM5397_DEVICE_ID = 0x97,
+	BCM5398_DEVICE_ID = 0x98,
+	BCM53115_DEVICE_ID = 0x53115,
+	BCM53125_DEVICE_ID = 0x53125,
+	BCM53128_DEVICE_ID = 0x53128,
+	BCM63XX_DEVICE_ID = 0x6300,
+	BCM53010_DEVICE_ID = 0x53010,
+	BCM53011_DEVICE_ID = 0x53011,
+	BCM53012_DEVICE_ID = 0x53012,
+	BCM53018_DEVICE_ID = 0x53018,
+	BCM53019_DEVICE_ID = 0x53019,
+	BCM58XX_DEVICE_ID = 0x5800,
+};
+
+#define B53_N_PORTS	9
+#define B53_N_PORTS_25	6
+
+struct b53_port {
+	u16		vlan_ctl_mask;
+	struct net_device *bridge_dev;
+};
+
+struct b53_vlan {
+	u16 members;
+	u16 untag;
+	bool valid;
+};
+
+struct b53_device {
+	struct dsa_switch *ds;
+	struct b53_platform_data *pdata;
+	const char *name;
+
+	struct mutex reg_mutex;
+	struct mutex stats_mutex;
+	const struct b53_io_ops *ops;
+
+	/* chip specific data */
+	u32 chip_id;
+	u8 core_rev;
+	u8 vta_regs[3];
+	u8 duplex_reg;
+	u8 jumbo_pm_reg;
+	u8 jumbo_size_reg;
+	int reset_gpio;
+	u8 num_arl_entries;
+
+	/* used ports mask */
+	u16 enabled_ports;
+	unsigned int cpu_port;
+
+	/* connect specific data */
+	u8 current_page;
+	struct device *dev;
+
+	/* Master MDIO bus we got probed from */
+	struct mii_bus *bus;
+
+	void *priv;
+
+	/* run time configuration */
+	bool enable_jumbo;
+
+	unsigned int num_vlans;
+	struct b53_vlan *vlans;
+	unsigned int num_ports;
+	struct b53_port *ports;
+};
+
+#define b53_for_each_port(dev, i) \
+	for (i = 0; i < B53_N_PORTS; i++) \
+		if (dev->enabled_ports & BIT(i))
+
+
+static inline int is5325(struct b53_device *dev)
+{
+	return dev->chip_id == BCM5325_DEVICE_ID;
+}
+
+static inline int is5365(struct b53_device *dev)
+{
+#ifdef CONFIG_BCM47XX
+	return dev->chip_id == BCM5365_DEVICE_ID;
+#else
+	return 0;
+#endif
+}
+
+static inline int is5397_98(struct b53_device *dev)
+{
+	return dev->chip_id == BCM5397_DEVICE_ID ||
+		dev->chip_id == BCM5398_DEVICE_ID;
+}
+
+static inline int is539x(struct b53_device *dev)
+{
+	return dev->chip_id == BCM5395_DEVICE_ID ||
+		dev->chip_id == BCM5397_DEVICE_ID ||
+		dev->chip_id == BCM5398_DEVICE_ID;
+}
+
+static inline int is531x5(struct b53_device *dev)
+{
+	return dev->chip_id == BCM53115_DEVICE_ID ||
+		dev->chip_id == BCM53125_DEVICE_ID ||
+		dev->chip_id == BCM53128_DEVICE_ID;
+}
+
+static inline int is63xx(struct b53_device *dev)
+{
+#ifdef CONFIG_BCM63XX
+	return dev->chip_id == BCM63XX_DEVICE_ID;
+#else
+	return 0;
+#endif
+}
+
+static inline int is5301x(struct b53_device *dev)
+{
+	return dev->chip_id == BCM53010_DEVICE_ID ||
+		dev->chip_id == BCM53011_DEVICE_ID ||
+		dev->chip_id == BCM53012_DEVICE_ID ||
+		dev->chip_id == BCM53018_DEVICE_ID ||
+		dev->chip_id == BCM53019_DEVICE_ID;
+}
+
+#define B53_CPU_PORT_25	5
+#define B53_CPU_PORT	8
+
+static inline int is_cpu_port(struct b53_device *dev, int port)
+{
+	return dev->cpu_port;
+}
+
+struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
+				    void *priv);
+
+int b53_switch_detect(struct b53_device *dev);
+
+int b53_switch_register(struct b53_device *dev);
+
+static inline void b53_switch_remove(struct b53_device *dev)
+{
+	dsa_unregister_switch(dev->ds);
+}
+
+static inline int b53_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read8(dev, page, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_read16(struct b53_device *dev, u8 page, u8 reg, u16 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read16(dev, page, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read32(dev, page, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read48(dev, page, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read64(dev, page, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_write8(struct b53_device *dev, u8 page, u8 reg, u8 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write8(dev, page, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_write16(struct b53_device *dev, u8 page, u8 reg,
+			      u16 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write16(dev, page, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_write32(struct b53_device *dev, u8 page, u8 reg,
+			      u32 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write32(dev, page, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_write48(struct b53_device *dev, u8 page, u8 reg,
+			      u64 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write48(dev, page, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int b53_write64(struct b53_device *dev, u8 page, u8 reg,
+			       u64 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write64(dev, page, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+struct b53_arl_entry {
+	u8 port;
+	u8 mac[ETH_ALEN];
+	u16 vid;
+	u8 is_valid:1;
+	u8 is_age:1;
+	u8 is_static:1;
+};
+
+static inline void b53_mac_from_u64(u64 src, u8 *dst)
+{
+	unsigned int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		dst[ETH_ALEN - 1 - i] = (src >> (8 * i)) & 0xff;
+}
+
+static inline u64 b53_mac_to_u64(const u8 *src)
+{
+	unsigned int i;
+	u64 dst = 0;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		dst |= (u64)src[ETH_ALEN - 1 - i] << (8 * i);
+
+	return dst;
+}
+
+static inline void b53_arl_to_entry(struct b53_arl_entry *ent,
+				    u64 mac_vid, u32 fwd_entry)
+{
+	memset(ent, 0, sizeof(*ent));
+	ent->port = fwd_entry & ARLTBL_DATA_PORT_ID_MASK;
+	ent->is_valid = !!(fwd_entry & ARLTBL_VALID);
+	ent->is_age = !!(fwd_entry & ARLTBL_AGE);
+	ent->is_static = !!(fwd_entry & ARLTBL_STATIC);
+	b53_mac_from_u64(mac_vid, ent->mac);
+	ent->vid = mac_vid >> ARLTBL_VID_S;
+}
+
+static inline void b53_arl_from_entry(u64 *mac_vid, u32 *fwd_entry,
+				      const struct b53_arl_entry *ent)
+{
+	*mac_vid = b53_mac_to_u64(ent->mac);
+	*mac_vid |= (u64)(ent->vid & ARLTBL_VID_MASK) << ARLTBL_VID_S;
+	*fwd_entry = ent->port & ARLTBL_DATA_PORT_ID_MASK;
+	if (ent->is_valid)
+		*fwd_entry |= ARLTBL_VALID;
+	if (ent->is_static)
+		*fwd_entry |= ARLTBL_STATIC;
+	if (ent->is_age)
+		*fwd_entry |= ARLTBL_AGE;
+}
+
+#ifdef CONFIG_BCM47XX
+
+#include <linux/version.h>
+#include <linux/bcm47xx_nvram.h>
+#include <bcm47xx_board.h>
+static inline int b53_switch_get_reset_gpio(struct b53_device *dev)
+{
+	enum bcm47xx_board board = bcm47xx_board_get();
+
+	switch (board) {
+	case BCM47XX_BOARD_LINKSYS_WRT300NV11:
+	case BCM47XX_BOARD_LINKSYS_WRT310NV1:
+		return 8;
+	default:
+		return bcm47xx_nvram_gpio_pin("robo_reset");
+	}
+}
+#else
+static inline int b53_switch_get_reset_gpio(struct b53_device *dev)
+{
+	return -ENOENT;
+}
+#endif
+#endif
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
new file mode 100644
index 000000000000..8f12bddd5dc9
--- /dev/null
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -0,0 +1,434 @@
+/*
+ * B53 register definitions
+ *
+ * Copyright (C) 2004 Broadcom Corporation
+ * Copyright (C) 2011-2013 Jonas Gorski <jogo@openwrt.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __B53_REGS_H
+#define __B53_REGS_H
+
+/* Management Port (SMP) Page offsets */
+#define B53_CTRL_PAGE			0x00 /* Control */
+#define B53_STAT_PAGE			0x01 /* Status */
+#define B53_MGMT_PAGE			0x02 /* Management Mode */
+#define B53_MIB_AC_PAGE			0x03 /* MIB Autocast */
+#define B53_ARLCTRL_PAGE		0x04 /* ARL Control */
+#define B53_ARLIO_PAGE			0x05 /* ARL Access */
+#define B53_FRAMEBUF_PAGE		0x06 /* Management frame access */
+#define B53_MEM_ACCESS_PAGE		0x08 /* Memory access */
+
+/* PHY Registers */
+#define B53_PORT_MII_PAGE(i)		(0x10 + (i)) /* Port i MII Registers */
+#define B53_IM_PORT_PAGE		0x18 /* Inverse MII Port (to EMAC) */
+#define B53_ALL_PORT_PAGE		0x19 /* All ports MII (broadcast) */
+
+/* MIB registers */
+#define B53_MIB_PAGE(i)			(0x20 + (i))
+
+/* Quality of Service (QoS) Registers */
+#define B53_QOS_PAGE			0x30
+
+/* Port VLAN Page */
+#define B53_PVLAN_PAGE			0x31
+
+/* VLAN Registers */
+#define B53_VLAN_PAGE			0x34
+
+/* Jumbo Frame Registers */
+#define B53_JUMBO_PAGE			0x40
+
+/* CFP Configuration Registers Page */
+#define B53_CFP_PAGE			0xa1
+
+/*************************************************************************
+ * Control Page registers
+ *************************************************************************/
+
+/* Port Control Register (8 bit) */
+#define B53_PORT_CTRL(i)		(0x00 + (i))
+#define   PORT_CTRL_RX_DISABLE		BIT(0)
+#define   PORT_CTRL_TX_DISABLE		BIT(1)
+#define   PORT_CTRL_RX_BCST_EN		BIT(2) /* Broadcast RX (P8 only) */
+#define   PORT_CTRL_RX_MCST_EN		BIT(3) /* Multicast RX (P8 only) */
+#define   PORT_CTRL_RX_UCST_EN		BIT(4) /* Unicast RX (P8 only) */
+#define	  PORT_CTRL_STP_STATE_S		5
+#define   PORT_CTRL_NO_STP		(0 << PORT_CTRL_STP_STATE_S)
+#define   PORT_CTRL_DIS_STATE		(1 << PORT_CTRL_STP_STATE_S)
+#define   PORT_CTRL_BLOCK_STATE		(2 << PORT_CTRL_STP_STATE_S)
+#define   PORT_CTRL_LISTEN_STATE	(3 << PORT_CTRL_STP_STATE_S)
+#define   PORT_CTRL_LEARN_STATE		(4 << PORT_CTRL_STP_STATE_S)
+#define   PORT_CTRL_FWD_STATE		(5 << PORT_CTRL_STP_STATE_S)
+#define   PORT_CTRL_STP_STATE_MASK	(0x7 << PORT_CTRL_STP_STATE_S)
+
+/* SMP Control Register (8 bit) */
+#define B53_SMP_CTRL			0x0a
+
+/* Switch Mode Control Register (8 bit) */
+#define B53_SWITCH_MODE			0x0b
+#define   SM_SW_FWD_MODE		BIT(0)	/* 1 = Managed Mode */
+#define   SM_SW_FWD_EN			BIT(1)	/* Forwarding Enable */
+
+/* IMP Port state override register (8 bit) */
+#define B53_PORT_OVERRIDE_CTRL		0x0e
+#define   PORT_OVERRIDE_LINK		BIT(0)
+#define   PORT_OVERRIDE_FULL_DUPLEX	BIT(1) /* 0 = Half Duplex */
+#define   PORT_OVERRIDE_SPEED_S		2
+#define   PORT_OVERRIDE_SPEED_10M	(0 << PORT_OVERRIDE_SPEED_S)
+#define   PORT_OVERRIDE_SPEED_100M	(1 << PORT_OVERRIDE_SPEED_S)
+#define   PORT_OVERRIDE_SPEED_1000M	(2 << PORT_OVERRIDE_SPEED_S)
+#define   PORT_OVERRIDE_RV_MII_25	BIT(4) /* BCM5325 only */
+#define   PORT_OVERRIDE_RX_FLOW		BIT(4)
+#define   PORT_OVERRIDE_TX_FLOW		BIT(5)
+#define   PORT_OVERRIDE_SPEED_2000M	BIT(6) /* BCM5301X only, requires setting 1000M */
+#define   PORT_OVERRIDE_EN		BIT(7) /* Use the register contents */
+
+/* Power-down mode control */
+#define B53_PD_MODE_CTRL_25		0x0f
+
+/* IP Multicast control (8 bit) */
+#define B53_IP_MULTICAST_CTRL		0x21
+#define  B53_IPMC_FWD_EN		BIT(1)
+#define  B53_UC_FWD_EN			BIT(6)
+#define  B53_MC_FWD_EN			BIT(7)
+
+/* (16 bit) */
+#define B53_UC_FLOOD_MASK		0x32
+#define B53_MC_FLOOD_MASK		0x34
+#define B53_IPMC_FLOOD_MASK		0x36
+
+/*
+ * Override Ports 0-7 State on devices with xMII interfaces (8 bit)
+ *
+ * For port 8 still use B53_PORT_OVERRIDE_CTRL
+ * Please note that not all ports are available on every hardware, e.g. BCM5301X
+ * don't include overriding port 6, BCM63xx also have some limitations.
+ */
+#define B53_GMII_PORT_OVERRIDE_CTRL(i)	(0x58 + (i))
+#define   GMII_PO_LINK			BIT(0)
+#define   GMII_PO_FULL_DUPLEX		BIT(1) /* 0 = Half Duplex */
+#define   GMII_PO_SPEED_S		2
+#define   GMII_PO_SPEED_10M		(0 << GMII_PO_SPEED_S)
+#define   GMII_PO_SPEED_100M		(1 << GMII_PO_SPEED_S)
+#define   GMII_PO_SPEED_1000M		(2 << GMII_PO_SPEED_S)
+#define   GMII_PO_RX_FLOW		BIT(4)
+#define   GMII_PO_TX_FLOW		BIT(5)
+#define   GMII_PO_EN			BIT(6) /* Use the register contents */
+#define   GMII_PO_SPEED_2000M		BIT(7) /* BCM5301X only, requires setting 1000M */
+
+#define B53_RGMII_CTRL_IMP		0x60
+#define   RGMII_CTRL_ENABLE_GMII	BIT(7)
+#define   RGMII_CTRL_TIMING_SEL		BIT(2)
+#define   RGMII_CTRL_DLL_RXC		BIT(1)
+#define   RGMII_CTRL_DLL_TXC		BIT(0)
+
+#define B53_RGMII_CTRL_P(i)		(B53_RGMII_CTRL_IMP + (i))
+
+/* Software reset register (8 bit) */
+#define B53_SOFTRESET			0x79
+#define   SW_RST			BIT(7)
+#define   EN_SW_RST			BIT(4)
+
+/* Fast Aging Control register (8 bit) */
+#define B53_FAST_AGE_CTRL		0x88
+#define   FAST_AGE_STATIC		BIT(0)
+#define   FAST_AGE_DYNAMIC		BIT(1)
+#define   FAST_AGE_PORT			BIT(2)
+#define   FAST_AGE_VLAN			BIT(3)
+#define   FAST_AGE_STP			BIT(4)
+#define   FAST_AGE_MC			BIT(5)
+#define   FAST_AGE_DONE			BIT(7)
+
+/* Fast Aging Port Control register (8 bit) */
+#define B53_FAST_AGE_PORT_CTRL		0x89
+
+/* Fast Aging VID Control register (16 bit) */
+#define B53_FAST_AGE_VID_CTRL		0x8a
+
+/*************************************************************************
+ * Status Page registers
+ *************************************************************************/
+
+/* Link Status Summary Register (16bit) */
+#define B53_LINK_STAT			0x00
+
+/* Link Status Change Register (16 bit) */
+#define B53_LINK_STAT_CHANGE		0x02
+
+/* Port Speed Summary Register (16 bit for FE, 32 bit for GE) */
+#define B53_SPEED_STAT			0x04
+#define  SPEED_PORT_FE(reg, port)	(((reg) >> (port)) & 1)
+#define  SPEED_PORT_GE(reg, port)	(((reg) >> 2 * (port)) & 3)
+#define  SPEED_STAT_10M			0
+#define  SPEED_STAT_100M		1
+#define  SPEED_STAT_1000M		2
+
+/* Duplex Status Summary (16 bit) */
+#define B53_DUPLEX_STAT_FE		0x06
+#define B53_DUPLEX_STAT_GE		0x08
+#define B53_DUPLEX_STAT_63XX		0x0c
+
+/* Revision ID register for BCM5325 */
+#define B53_REV_ID_25			0x50
+
+/* Strap Value (48 bit) */
+#define B53_STRAP_VALUE			0x70
+#define   SV_GMII_CTRL_115		BIT(27)
+
+/*************************************************************************
+ * Management Mode Page Registers
+ *************************************************************************/
+
+/* Global Management Config Register (8 bit) */
+#define B53_GLOBAL_CONFIG		0x00
+#define   GC_RESET_MIB			0x01
+#define   GC_RX_BPDU_EN			0x02
+#define   GC_MIB_AC_HDR_EN		0x10
+#define   GC_MIB_AC_EN			0x20
+#define   GC_FRM_MGMT_PORT_M		0xC0
+#define   GC_FRM_MGMT_PORT_04		0x00
+#define   GC_FRM_MGMT_PORT_MII		0x80
+
+/* Broadcom Header control register (8 bit) */
+#define B53_BRCM_HDR			0x03
+#define   BRCM_HDR_P8_EN		BIT(0) /* Enable tagging on port 8 */
+#define   BRCM_HDR_P5_EN		BIT(1) /* Enable tagging on port 5 */
+
+/* Device ID register (8 or 32 bit) */
+#define B53_DEVICE_ID			0x30
+
+/* Revision ID register (8 bit) */
+#define B53_REV_ID			0x40
+
+/*************************************************************************
+ * ARL Access Page Registers
+ *************************************************************************/
+
+/* VLAN Table Access Register (8 bit) */
+#define B53_VT_ACCESS			0x80
+#define B53_VT_ACCESS_9798		0x60 /* for BCM5397/BCM5398 */
+#define B53_VT_ACCESS_63XX		0x60 /* for BCM6328/62/68 */
+#define   VTA_CMD_WRITE			0
+#define   VTA_CMD_READ			1
+#define   VTA_CMD_CLEAR			2
+#define   VTA_START_CMD			BIT(7)
+
+/* VLAN Table Index Register (16 bit) */
+#define B53_VT_INDEX			0x81
+#define B53_VT_INDEX_9798		0x61
+#define B53_VT_INDEX_63XX		0x62
+
+/* VLAN Table Entry Register (32 bit) */
+#define B53_VT_ENTRY			0x83
+#define B53_VT_ENTRY_9798		0x63
+#define B53_VT_ENTRY_63XX		0x64
+#define   VTE_MEMBERS			0x1ff
+#define   VTE_UNTAG_S			9
+#define   VTE_UNTAG			(0x1ff << 9)
+
+/*************************************************************************
+ * ARL I/O Registers
+ *************************************************************************/
+
+/* ARL Table Read/Write Register (8 bit) */
+#define B53_ARLTBL_RW_CTRL		0x00
+#define    ARLTBL_RW			BIT(0)
+#define    ARLTBL_START_DONE		BIT(7)
+
+/* MAC Address Index Register (48 bit) */
+#define B53_MAC_ADDR_IDX		0x02
+
+/* VLAN ID Index Register (16 bit) */
+#define B53_VLAN_ID_IDX			0x08
+
+/* ARL Table MAC/VID Entry N Registers (64 bit)
+ *
+ * BCM5325 and BCM5365 share most definitions below
+ */
+#define B53_ARLTBL_MAC_VID_ENTRY(n)	(0x10 * (n))
+#define   ARLTBL_MAC_MASK		0xffffffffffff
+#define   ARLTBL_VID_S			48
+#define   ARLTBL_VID_MASK_25		0xff
+#define   ARLTBL_VID_MASK		0xfff
+#define   ARLTBL_DATA_PORT_ID_S_25	48
+#define   ARLTBL_DATA_PORT_ID_MASK_25	0xf
+#define   ARLTBL_AGE_25			BIT(61)
+#define   ARLTBL_STATIC_25		BIT(62)
+#define   ARLTBL_VALID_25		BIT(63)
+
+/* ARL Table Data Entry N Registers (32 bit) */
+#define B53_ARLTBL_DATA_ENTRY(n)	((0x10 * (n)) + 0x08)
+#define   ARLTBL_DATA_PORT_ID_MASK	0x1ff
+#define   ARLTBL_TC(tc)			((3 & tc) << 11)
+#define   ARLTBL_AGE			BIT(14)
+#define   ARLTBL_STATIC			BIT(15)
+#define   ARLTBL_VALID			BIT(16)
+
+/* ARL Search Control Register (8 bit) */
+#define B53_ARL_SRCH_CTL		0x50
+#define B53_ARL_SRCH_CTL_25		0x20
+#define   ARL_SRCH_VLID			BIT(0)
+#define   ARL_SRCH_STDN			BIT(7)
+
+/* ARL Search Address Register (16 bit) */
+#define B53_ARL_SRCH_ADDR		0x51
+#define B53_ARL_SRCH_ADDR_25		0x22
+#define B53_ARL_SRCH_ADDR_65		0x24
+#define  ARL_ADDR_MASK			GENMASK(14, 0)
+
+/* ARL Search MAC/VID Result (64 bit) */
+#define B53_ARL_SRCH_RSTL_0_MACVID	0x60
+
+/* Single register search result on 5325 */
+#define B53_ARL_SRCH_RSTL_0_MACVID_25	0x24
+/* Single register search result on 5365 */
+#define B53_ARL_SRCH_RSTL_0_MACVID_65	0x30
+
+/* ARL Search Data Result (32 bit) */
+#define B53_ARL_SRCH_RSTL_0		0x68
+
+#define B53_ARL_SRCH_RSTL_MACVID(x)	(B53_ARL_SRCH_RSTL_0_MACVID + ((x) * 0x10))
+#define B53_ARL_SRCH_RSTL(x)		(B53_ARL_SRCH_RSTL_0 + ((x) * 0x10))
+
+/*************************************************************************
+ * Port VLAN Registers
+ *************************************************************************/
+
+/* Port VLAN mask (16 bit) IMP port is always 8, also on 5325 & co */
+#define B53_PVLAN_PORT_MASK(i)		((i) * 2)
+
+/*************************************************************************
+ * 802.1Q Page Registers
+ *************************************************************************/
+
+/* Global QoS Control (8 bit) */
+#define B53_QOS_GLOBAL_CTL		0x00
+
+/* Enable 802.1Q for individual Ports (16 bit) */
+#define B53_802_1P_EN			0x04
+
+/*************************************************************************
+ * VLAN Page Registers
+ *************************************************************************/
+
+/* VLAN Control 0 (8 bit) */
+#define B53_VLAN_CTRL0			0x00
+#define   VC0_8021PF_CTRL_MASK		0x3
+#define   VC0_8021PF_CTRL_NONE		0x0
+#define   VC0_8021PF_CTRL_CHANGE_PRI	0x1
+#define   VC0_8021PF_CTRL_CHANGE_VID	0x2
+#define   VC0_8021PF_CTRL_CHANGE_BOTH	0x3
+#define   VC0_8021QF_CTRL_MASK		0xc
+#define   VC0_8021QF_CTRL_CHANGE_PRI	0x1
+#define   VC0_8021QF_CTRL_CHANGE_VID	0x2
+#define   VC0_8021QF_CTRL_CHANGE_BOTH	0x3
+#define   VC0_RESERVED_1		BIT(1)
+#define   VC0_DROP_VID_MISS		BIT(4)
+#define   VC0_VID_HASH_VID		BIT(5)
+#define   VC0_VID_CHK_EN		BIT(6)	/* Use VID,DA or VID,SA */
+#define   VC0_VLAN_EN			BIT(7)	/* 802.1Q VLAN Enabled */
+
+/* VLAN Control 1 (8 bit) */
+#define B53_VLAN_CTRL1			0x01
+#define   VC1_RX_MCST_TAG_EN		BIT(1)
+#define   VC1_RX_MCST_FWD_EN		BIT(2)
+#define   VC1_RX_MCST_UNTAG_EN		BIT(3)
+
+/* VLAN Control 2 (8 bit) */
+#define B53_VLAN_CTRL2			0x02
+
+/* VLAN Control 3 (8 bit when BCM5325, 16 bit else) */
+#define B53_VLAN_CTRL3			0x03
+#define B53_VLAN_CTRL3_63XX		0x04
+#define   VC3_MAXSIZE_1532		BIT(6) /* 5325 only */
+#define   VC3_HIGH_8BIT_EN		BIT(7) /* 5325 only */
+
+/* VLAN Control 4 (8 bit) */
+#define B53_VLAN_CTRL4			0x05
+#define B53_VLAN_CTRL4_25		0x04
+#define B53_VLAN_CTRL4_63XX		0x06
+#define   VC4_ING_VID_CHECK_S		6
+#define   VC4_ING_VID_CHECK_MASK	(0x3 << VC4_ING_VID_CHECK_S)
+#define   VC4_ING_VID_VIO_FWD		0 /* forward, but do not learn */
+#define   VC4_ING_VID_VIO_DROP		1 /* drop VID violations */
+#define   VC4_NO_ING_VID_CHK		2 /* do not check */
+#define   VC4_ING_VID_VIO_TO_IMP	3 /* redirect to MII port */
+
+/* VLAN Control 5 (8 bit) */
+#define B53_VLAN_CTRL5			0x06
+#define B53_VLAN_CTRL5_25		0x05
+#define B53_VLAN_CTRL5_63XX		0x07
+#define   VC5_VID_FFF_EN		BIT(2)
+#define   VC5_DROP_VTABLE_MISS		BIT(3)
+
+/* VLAN Control 6 (8 bit) */
+#define B53_VLAN_CTRL6			0x07
+#define B53_VLAN_CTRL6_63XX		0x08
+
+/* VLAN Table Access Register (16 bit) */
+#define B53_VLAN_TABLE_ACCESS_25	0x06	/* BCM5325E/5350 */
+#define B53_VLAN_TABLE_ACCESS_65	0x08	/* BCM5365 */
+#define   VTA_VID_LOW_MASK_25		0xf
+#define   VTA_VID_LOW_MASK_65		0xff
+#define   VTA_VID_HIGH_S_25		4
+#define   VTA_VID_HIGH_S_65		8
+#define   VTA_VID_HIGH_MASK_25		(0xff << VTA_VID_HIGH_S_25E)
+#define   VTA_VID_HIGH_MASK_65		(0xf << VTA_VID_HIGH_S_65)
+#define   VTA_RW_STATE			BIT(12)
+#define   VTA_RW_STATE_RD		0
+#define   VTA_RW_STATE_WR		BIT(12)
+#define   VTA_RW_OP_EN			BIT(13)
+
+/* VLAN Read/Write Registers for (16/32 bit) */
+#define B53_VLAN_WRITE_25		0x08
+#define B53_VLAN_WRITE_65		0x0a
+#define B53_VLAN_READ			0x0c
+#define   VA_MEMBER_MASK		0x3f
+#define   VA_UNTAG_S_25			6
+#define   VA_UNTAG_MASK_25		0x3f
+#define   VA_UNTAG_S_65			7
+#define   VA_UNTAG_MASK_65		0x1f
+#define   VA_VID_HIGH_S			12
+#define   VA_VID_HIGH_MASK		(0xffff << VA_VID_HIGH_S)
+#define   VA_VALID_25			BIT(20)
+#define   VA_VALID_25_R4		BIT(24)
+#define   VA_VALID_65			BIT(14)
+
+/* VLAN Port Default Tag (16 bit) */
+#define B53_VLAN_PORT_DEF_TAG(i)	(0x10 + 2 * (i))
+
+/*************************************************************************
+ * Jumbo Frame Page Registers
+ *************************************************************************/
+
+/* Jumbo Enable Port Mask (bit i == port i enabled) (32 bit) */
+#define B53_JUMBO_PORT_MASK		0x01
+#define B53_JUMBO_PORT_MASK_63XX	0x04
+#define   JPM_10_100_JUMBO_EN		BIT(24) /* GigE always enabled */
+
+/* Good Frame Max Size without 802.1Q TAG (16 bit) */
+#define B53_JUMBO_MAX_SIZE		0x05
+#define B53_JUMBO_MAX_SIZE_63XX		0x08
+#define   JMS_MIN_SIZE			1518
+#define   JMS_MAX_SIZE			9724
+
+/*************************************************************************
+ * CFP Configuration Page Registers
+ *************************************************************************/
+
+/* CFP Control Register with ports map (8 bit) */
+#define B53_CFP_CTRL			0x00
+
+#endif /* !__B53_REGS_H */
diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
new file mode 100644
index 000000000000..2bda0b5f1578
--- /dev/null
+++ b/drivers/net/dsa/b53/b53_spi.c
@@ -0,0 +1,331 @@
+/*
+ * B53 register access through SPI
+ *
+ * Copyright (C) 2011-2013 Jonas Gorski <jogo@openwrt.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <asm/unaligned.h>
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/platform_data/b53.h>
+
+#include "b53_priv.h"
+
+#define B53_SPI_DATA		0xf0
+
+#define B53_SPI_STATUS		0xfe
+#define B53_SPI_CMD_SPIF	BIT(7)
+#define B53_SPI_CMD_RACK	BIT(5)
+
+#define B53_SPI_CMD_READ	0x00
+#define B53_SPI_CMD_WRITE	0x01
+#define B53_SPI_CMD_NORMAL	0x60
+#define B53_SPI_CMD_FAST	0x10
+
+#define B53_SPI_PAGE_SELECT	0xff
+
+static inline int b53_spi_read_reg(struct spi_device *spi, u8 reg, u8 *val,
+				   unsigned int len)
+{
+	u8 txbuf[2];
+
+	txbuf[0] = B53_SPI_CMD_NORMAL | B53_SPI_CMD_READ;
+	txbuf[1] = reg;
+
+	return spi_write_then_read(spi, txbuf, 2, val, len);
+}
+
+static inline int b53_spi_clear_status(struct spi_device *spi)
+{
+	unsigned int i;
+	u8 rxbuf;
+	int ret;
+
+	for (i = 0; i < 10; i++) {
+		ret = b53_spi_read_reg(spi, B53_SPI_STATUS, &rxbuf, 1);
+		if (ret)
+			return ret;
+
+		if (!(rxbuf & B53_SPI_CMD_SPIF))
+			break;
+
+		mdelay(1);
+	}
+
+	if (i == 10)
+		return -EIO;
+
+	return 0;
+}
+
+static inline int b53_spi_set_page(struct spi_device *spi, u8 page)
+{
+	u8 txbuf[3];
+
+	txbuf[0] = B53_SPI_CMD_NORMAL | B53_SPI_CMD_WRITE;
+	txbuf[1] = B53_SPI_PAGE_SELECT;
+	txbuf[2] = page;
+
+	return spi_write(spi, txbuf, sizeof(txbuf));
+}
+
+static inline int b53_prepare_reg_access(struct spi_device *spi, u8 page)
+{
+	int ret = b53_spi_clear_status(spi);
+
+	if (ret)
+		return ret;
+
+	return b53_spi_set_page(spi, page);
+}
+
+static int b53_spi_prepare_reg_read(struct spi_device *spi, u8 reg)
+{
+	u8 rxbuf;
+	int retry_count;
+	int ret;
+
+	ret = b53_spi_read_reg(spi, reg, &rxbuf, 1);
+	if (ret)
+		return ret;
+
+	for (retry_count = 0; retry_count < 10; retry_count++) {
+		ret = b53_spi_read_reg(spi, B53_SPI_STATUS, &rxbuf, 1);
+		if (ret)
+			return ret;
+
+		if (rxbuf & B53_SPI_CMD_RACK)
+			break;
+
+		mdelay(1);
+	}
+
+	if (retry_count == 10)
+		return -EIO;
+
+	return 0;
+}
+
+static int b53_spi_read(struct b53_device *dev, u8 page, u8 reg, u8 *data,
+			unsigned int len)
+{
+	struct spi_device *spi = dev->priv;
+	int ret;
+
+	ret = b53_prepare_reg_access(spi, page);
+	if (ret)
+		return ret;
+
+	ret = b53_spi_prepare_reg_read(spi, reg);
+	if (ret)
+		return ret;
+
+	return b53_spi_read_reg(spi, B53_SPI_DATA, data, len);
+}
+
+static int b53_spi_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
+{
+	return b53_spi_read(dev, page, reg, val, 1);
+}
+
+static int b53_spi_read16(struct b53_device *dev, u8 page, u8 reg, u16 *val)
+{
+	int ret = b53_spi_read(dev, page, reg, (u8 *)val, 2);
+
+	if (!ret)
+		*val = le16_to_cpu(*val);
+
+	return ret;
+}
+
+static int b53_spi_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val)
+{
+	int ret = b53_spi_read(dev, page, reg, (u8 *)val, 4);
+
+	if (!ret)
+		*val = le32_to_cpu(*val);
+
+	return ret;
+}
+
+static int b53_spi_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	int ret;
+
+	*val = 0;
+	ret = b53_spi_read(dev, page, reg, (u8 *)val, 6);
+	if (!ret)
+		*val = le64_to_cpu(*val);
+
+	return ret;
+}
+
+static int b53_spi_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	int ret = b53_spi_read(dev, page, reg, (u8 *)val, 8);
+
+	if (!ret)
+		*val = le64_to_cpu(*val);
+
+	return ret;
+}
+
+static int b53_spi_write8(struct b53_device *dev, u8 page, u8 reg, u8 value)
+{
+	struct spi_device *spi = dev->priv;
+	int ret;
+	u8 txbuf[3];
+
+	ret = b53_prepare_reg_access(spi, page);
+	if (ret)
+		return ret;
+
+	txbuf[0] = B53_SPI_CMD_NORMAL | B53_SPI_CMD_WRITE;
+	txbuf[1] = reg;
+	txbuf[2] = value;
+
+	return spi_write(spi, txbuf, sizeof(txbuf));
+}
+
+static int b53_spi_write16(struct b53_device *dev, u8 page, u8 reg, u16 value)
+{
+	struct spi_device *spi = dev->priv;
+	int ret;
+	u8 txbuf[4];
+
+	ret = b53_prepare_reg_access(spi, page);
+	if (ret)
+		return ret;
+
+	txbuf[0] = B53_SPI_CMD_NORMAL | B53_SPI_CMD_WRITE;
+	txbuf[1] = reg;
+	put_unaligned_le16(value, &txbuf[2]);
+
+	return spi_write(spi, txbuf, sizeof(txbuf));
+}
+
+static int b53_spi_write32(struct b53_device *dev, u8 page, u8 reg, u32 value)
+{
+	struct spi_device *spi = dev->priv;
+	int ret;
+	u8 txbuf[6];
+
+	ret = b53_prepare_reg_access(spi, page);
+	if (ret)
+		return ret;
+
+	txbuf[0] = B53_SPI_CMD_NORMAL | B53_SPI_CMD_WRITE;
+	txbuf[1] = reg;
+	put_unaligned_le32(value, &txbuf[2]);
+
+	return spi_write(spi, txbuf, sizeof(txbuf));
+}
+
+static int b53_spi_write48(struct b53_device *dev, u8 page, u8 reg, u64 value)
+{
+	struct spi_device *spi = dev->priv;
+	int ret;
+	u8 txbuf[10];
+
+	ret = b53_prepare_reg_access(spi, page);
+	if (ret)
+		return ret;
+
+	txbuf[0] = B53_SPI_CMD_NORMAL | B53_SPI_CMD_WRITE;
+	txbuf[1] = reg;
+	put_unaligned_le64(value, &txbuf[2]);
+
+	return spi_write(spi, txbuf, sizeof(txbuf) - 2);
+}
+
+static int b53_spi_write64(struct b53_device *dev, u8 page, u8 reg, u64 value)
+{
+	struct spi_device *spi = dev->priv;
+	int ret;
+	u8 txbuf[10];
+
+	ret = b53_prepare_reg_access(spi, page);
+	if (ret)
+		return ret;
+
+	txbuf[0] = B53_SPI_CMD_NORMAL | B53_SPI_CMD_WRITE;
+	txbuf[1] = reg;
+	put_unaligned_le64(value, &txbuf[2]);
+
+	return spi_write(spi, txbuf, sizeof(txbuf));
+}
+
+static struct b53_io_ops b53_spi_ops = {
+	.read8 = b53_spi_read8,
+	.read16 = b53_spi_read16,
+	.read32 = b53_spi_read32,
+	.read48 = b53_spi_read48,
+	.read64 = b53_spi_read64,
+	.write8 = b53_spi_write8,
+	.write16 = b53_spi_write16,
+	.write32 = b53_spi_write32,
+	.write48 = b53_spi_write48,
+	.write64 = b53_spi_write64,
+};
+
+static int b53_spi_probe(struct spi_device *spi)
+{
+	struct b53_device *dev;
+	int ret;
+
+	dev = b53_switch_alloc(&spi->dev, &b53_spi_ops, spi);
+	if (!dev)
+		return -ENOMEM;
+
+	if (spi->dev.platform_data)
+		dev->pdata = spi->dev.platform_data;
+
+	ret = b53_switch_register(dev);
+	if (ret)
+		return ret;
+
+	spi_set_drvdata(spi, dev);
+
+	return 0;
+}
+
+static int b53_spi_remove(struct spi_device *spi)
+{
+	struct b53_device *dev = spi_get_drvdata(spi);
+
+	if (dev)
+		b53_switch_remove(dev);
+
+	return 0;
+}
+
+static struct spi_driver b53_spi_driver = {
+	.driver = {
+		.name	= "b53-switch",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+	.probe	= b53_spi_probe,
+	.remove	= b53_spi_remove,
+};
+
+module_spi_driver(b53_spi_driver);
+
+MODULE_AUTHOR("Jonas Gorski <jogo@openwrt.org>");
+MODULE_DESCRIPTION("B53 SPI access driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
new file mode 100644
index 000000000000..3e2d4a5fcd5a
--- /dev/null
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -0,0 +1,442 @@
+/*
+ * B53 register access through Switch Register Access Bridge Registers
+ *
+ * Copyright (C) 2013 Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/b53.h>
+#include <linux/of.h>
+
+#include "b53_priv.h"
+
+/* command and status register of the SRAB */
+#define B53_SRAB_CMDSTAT		0x2c
+#define  B53_SRAB_CMDSTAT_RST		BIT(2)
+#define  B53_SRAB_CMDSTAT_WRITE		BIT(1)
+#define  B53_SRAB_CMDSTAT_GORDYN	BIT(0)
+#define  B53_SRAB_CMDSTAT_PAGE		24
+#define  B53_SRAB_CMDSTAT_REG		16
+
+/* high order word of write data to switch registe */
+#define B53_SRAB_WD_H			0x30
+
+/* low order word of write data to switch registe */
+#define B53_SRAB_WD_L			0x34
+
+/* high order word of read data from switch register */
+#define B53_SRAB_RD_H			0x38
+
+/* low order word of read data from switch register */
+#define B53_SRAB_RD_L			0x3c
+
+/* command and status register of the SRAB */
+#define B53_SRAB_CTRLS			0x40
+#define  B53_SRAB_CTRLS_RCAREQ		BIT(3)
+#define  B53_SRAB_CTRLS_RCAGNT		BIT(4)
+#define  B53_SRAB_CTRLS_SW_INIT_DONE	BIT(6)
+
+/* the register captures interrupt pulses from the switch */
+#define B53_SRAB_INTR			0x44
+#define  B53_SRAB_INTR_P(x)		BIT(x)
+#define  B53_SRAB_SWITCH_PHY		BIT(8)
+#define  B53_SRAB_1588_SYNC		BIT(9)
+#define  B53_SRAB_IMP1_SLEEP_TIMER	BIT(10)
+#define  B53_SRAB_P7_SLEEP_TIMER	BIT(11)
+#define  B53_SRAB_IMP0_SLEEP_TIMER	BIT(12)
+
+struct b53_srab_priv {
+	void __iomem *regs;
+};
+
+static int b53_srab_request_grant(struct b53_device *dev)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	u32 ctrls;
+	int i;
+
+	ctrls = readl(regs + B53_SRAB_CTRLS);
+	ctrls |= B53_SRAB_CTRLS_RCAREQ;
+	writel(ctrls, regs + B53_SRAB_CTRLS);
+
+	for (i = 0; i < 20; i++) {
+		ctrls = readl(regs + B53_SRAB_CTRLS);
+		if (ctrls & B53_SRAB_CTRLS_RCAGNT)
+			break;
+		usleep_range(10, 100);
+	}
+	if (WARN_ON(i == 5))
+		return -EIO;
+
+	return 0;
+}
+
+static void b53_srab_release_grant(struct b53_device *dev)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	u32 ctrls;
+
+	ctrls = readl(regs + B53_SRAB_CTRLS);
+	ctrls &= ~B53_SRAB_CTRLS_RCAREQ;
+	writel(ctrls, regs + B53_SRAB_CTRLS);
+}
+
+static int b53_srab_op(struct b53_device *dev, u8 page, u8 reg, u32 op)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int i;
+	u32 cmdstat;
+
+	/* set register address */
+	cmdstat = (page << B53_SRAB_CMDSTAT_PAGE) |
+		  (reg << B53_SRAB_CMDSTAT_REG) |
+		  B53_SRAB_CMDSTAT_GORDYN |
+		  op;
+	writel(cmdstat, regs + B53_SRAB_CMDSTAT);
+
+	/* check if operation completed */
+	for (i = 0; i < 5; ++i) {
+		cmdstat = readl(regs + B53_SRAB_CMDSTAT);
+		if (!(cmdstat & B53_SRAB_CMDSTAT_GORDYN))
+			break;
+		usleep_range(10, 100);
+	}
+
+	if (WARN_ON(i == 5))
+		return -EIO;
+
+	return 0;
+}
+
+static int b53_srab_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	ret = b53_srab_op(dev, page, reg, 0);
+	if (ret)
+		goto err;
+
+	*val = readl(regs + B53_SRAB_RD_L) & 0xff;
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_read16(struct b53_device *dev, u8 page, u8 reg, u16 *val)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	ret = b53_srab_op(dev, page, reg, 0);
+	if (ret)
+		goto err;
+
+	*val = readl(regs + B53_SRAB_RD_L) & 0xffff;
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	ret = b53_srab_op(dev, page, reg, 0);
+	if (ret)
+		goto err;
+
+	*val = readl(regs + B53_SRAB_RD_L);
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	ret = b53_srab_op(dev, page, reg, 0);
+	if (ret)
+		goto err;
+
+	*val = readl(regs + B53_SRAB_RD_L);
+	*val += ((u64)readl(regs + B53_SRAB_RD_H) & 0xffff) << 32;
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	ret = b53_srab_op(dev, page, reg, 0);
+	if (ret)
+		goto err;
+
+	*val = readl(regs + B53_SRAB_RD_L);
+	*val += (u64)readl(regs + B53_SRAB_RD_H) << 32;
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_write8(struct b53_device *dev, u8 page, u8 reg, u8 value)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	writel(value, regs + B53_SRAB_WD_L);
+
+	ret = b53_srab_op(dev, page, reg, B53_SRAB_CMDSTAT_WRITE);
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_write16(struct b53_device *dev, u8 page, u8 reg,
+			    u16 value)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	writel(value, regs + B53_SRAB_WD_L);
+
+	ret = b53_srab_op(dev, page, reg, B53_SRAB_CMDSTAT_WRITE);
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_write32(struct b53_device *dev, u8 page, u8 reg,
+			    u32 value)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	writel(value, regs + B53_SRAB_WD_L);
+
+	ret = b53_srab_op(dev, page, reg, B53_SRAB_CMDSTAT_WRITE);
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_write48(struct b53_device *dev, u8 page, u8 reg,
+			    u64 value)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	writel((u32)value, regs + B53_SRAB_WD_L);
+	writel((u16)(value >> 32), regs + B53_SRAB_WD_H);
+
+	ret = b53_srab_op(dev, page, reg, B53_SRAB_CMDSTAT_WRITE);
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static int b53_srab_write64(struct b53_device *dev, u8 page, u8 reg,
+			    u64 value)
+{
+	struct b53_srab_priv *priv = dev->priv;
+	u8 __iomem *regs = priv->regs;
+	int ret = 0;
+
+	ret = b53_srab_request_grant(dev);
+	if (ret)
+		goto err;
+
+	writel((u32)value, regs + B53_SRAB_WD_L);
+	writel((u32)(value >> 32), regs + B53_SRAB_WD_H);
+
+	ret = b53_srab_op(dev, page, reg, B53_SRAB_CMDSTAT_WRITE);
+
+err:
+	b53_srab_release_grant(dev);
+
+	return ret;
+}
+
+static struct b53_io_ops b53_srab_ops = {
+	.read8 = b53_srab_read8,
+	.read16 = b53_srab_read16,
+	.read32 = b53_srab_read32,
+	.read48 = b53_srab_read48,
+	.read64 = b53_srab_read64,
+	.write8 = b53_srab_write8,
+	.write16 = b53_srab_write16,
+	.write32 = b53_srab_write32,
+	.write48 = b53_srab_write48,
+	.write64 = b53_srab_write64,
+};
+
+static const struct of_device_id b53_srab_of_match[] = {
+	{ .compatible = "brcm,bcm53010-srab" },
+	{ .compatible = "brcm,bcm53011-srab" },
+	{ .compatible = "brcm,bcm53012-srab" },
+	{ .compatible = "brcm,bcm53018-srab" },
+	{ .compatible = "brcm,bcm53019-srab" },
+	{ .compatible = "brcm,bcm5301x-srab" },
+	{ .compatible = "brcm,bcm58522-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58525-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58535-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58622-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58623-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58625-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm88312-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,nsp-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, b53_srab_of_match);
+
+static int b53_srab_probe(struct platform_device *pdev)
+{
+	struct b53_platform_data *pdata = pdev->dev.platform_data;
+	struct device_node *dn = pdev->dev.of_node;
+	const struct of_device_id *of_id = NULL;
+	struct b53_srab_priv *priv;
+	struct b53_device *dev;
+	struct resource *r;
+
+	if (dn)
+		of_id = of_match_node(b53_srab_of_match, dn);
+
+	if (of_id) {
+		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+
+		pdata->chip_id = (u32)(unsigned long)of_id->data;
+	}
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->regs = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(priv->regs))
+		return -ENOMEM;
+
+	dev = b53_switch_alloc(&pdev->dev, &b53_srab_ops, priv);
+	if (!dev)
+		return -ENOMEM;
+
+	if (pdata)
+		dev->pdata = pdata;
+
+	platform_set_drvdata(pdev, dev);
+
+	return b53_switch_register(dev);
+}
+
+static int b53_srab_remove(struct platform_device *pdev)
+{
+	struct b53_device *dev = platform_get_drvdata(pdev);
+
+	if (dev)
+		b53_switch_remove(dev);
+
+	return 0;
+}
+
+static struct platform_driver b53_srab_driver = {
+	.probe = b53_srab_probe,
+	.remove = b53_srab_remove,
+	.driver = {
+		.name = "b53-srab-switch",
+		.of_match_table = b53_srab_of_match,
+	},
+};
+
+module_platform_driver(b53_srab_driver);
+MODULE_AUTHOR("Hauke Mehrtens <hauke@hauke-m.de>");
+MODULE_DESCRIPTION("B53 Switch Register Access Bridge Registers (SRAB) access driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 10ddd5a5dfb6..cd1d630ae3a9 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -22,6 +22,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/of_net.h>
+#include <linux/of_mdio.h>
 #include <net/dsa.h>
 #include <linux/ethtool.h>
 #include <linux/if_bridge.h>
@@ -460,19 +461,13 @@ static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-/* Fast-ageing of ARL entries for a given port, equivalent to an ARL
- * flush for that port.
- */
-static int bcm_sf2_sw_fast_age_port(struct dsa_switch  *ds, int port)
+static int bcm_sf2_fast_age_op(struct bcm_sf2_priv *priv)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
 	unsigned int timeout = 1000;
 	u32 reg;
 
-	core_writel(priv, port, CORE_FAST_AGE_PORT);
-
 	reg = core_readl(priv, CORE_FAST_AGE_CTRL);
-	reg |= EN_AGE_PORT | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE;
+	reg |= EN_AGE_PORT | EN_AGE_VLAN | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE;
 	core_writel(priv, reg, CORE_FAST_AGE_CTRL);
 
 	do {
@@ -491,13 +486,98 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch  *ds, int port)
 	return 0;
 }
 
+/* Fast-ageing of ARL entries for a given port, equivalent to an ARL
+ * flush for that port.
+ */
+static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port)
+{
+	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+
+	core_writel(priv, port, CORE_FAST_AGE_PORT);
+
+	return bcm_sf2_fast_age_op(priv);
+}
+
+static int bcm_sf2_sw_fast_age_vlan(struct bcm_sf2_priv *priv, u16 vid)
+{
+	core_writel(priv, vid, CORE_FAST_AGE_VID);
+
+	return bcm_sf2_fast_age_op(priv);
+}
+
+static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv)
+{
+	unsigned int timeout = 10;
+	u32 reg;
+
+	do {
+		reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL);
+		if (!(reg & ARLA_VTBL_STDN))
+			return 0;
+
+		usleep_range(1000, 2000);
+	} while (timeout--);
+
+	return -ETIMEDOUT;
+}
+
+static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op)
+{
+	core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL);
+
+	return bcm_sf2_vlan_op_wait(priv);
+}
+
+static void bcm_sf2_set_vlan_entry(struct bcm_sf2_priv *priv, u16 vid,
+				   struct bcm_sf2_vlan *vlan)
+{
+	int ret;
+
+	core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR);
+	core_writel(priv, vlan->untag << UNTAG_MAP_SHIFT | vlan->members,
+		    CORE_ARLA_VTBL_ENTRY);
+
+	ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_WRITE);
+	if (ret)
+		pr_err("failed to write VLAN entry\n");
+}
+
+static int bcm_sf2_get_vlan_entry(struct bcm_sf2_priv *priv, u16 vid,
+				  struct bcm_sf2_vlan *vlan)
+{
+	u32 entry;
+	int ret;
+
+	core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR);
+
+	ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_READ);
+	if (ret)
+		return ret;
+
+	entry = core_readl(priv, CORE_ARLA_VTBL_ENTRY);
+	vlan->members = entry & FWD_MAP_MASK;
+	vlan->untag = (entry >> UNTAG_MAP_SHIFT) & UNTAG_MAP_MASK;
+
+	return 0;
+}
+
 static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port,
 			      struct net_device *bridge)
 {
 	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	s8 cpu_port = ds->dst->cpu_port;
 	unsigned int i;
 	u32 reg, p_ctl;
 
+	/* Make this port leave the all VLANs join since we will have proper
+	 * VLAN entries from now on
+	 */
+	reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN);
+	reg &= ~BIT(port);
+	if ((reg & BIT(cpu_port)) == BIT(cpu_port))
+		reg &= ~BIT(cpu_port);
+	core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN);
+
 	priv->port_sts[port].bridge_dev = bridge;
 	p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
 
@@ -529,6 +609,7 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port)
 {
 	struct bcm_sf2_priv *priv = ds_to_priv(ds);
 	struct net_device *bridge = priv->port_sts[port].bridge_dev;
+	s8 cpu_port = ds->dst->cpu_port;
 	unsigned int i;
 	u32 reg, p_ctl;
 
@@ -552,6 +633,13 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port)
 	core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
 	priv->port_sts[port].vlan_ctl_mask = p_ctl;
 	priv->port_sts[port].bridge_dev = NULL;
+
+	/* Make this port join all VLANs without VLAN entries */
+	reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN);
+	reg |= BIT(port);
+	if (!(reg & BIT(cpu_port)))
+		reg |= BIT(cpu_port);
+	core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN);
 }
 
 static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
@@ -804,7 +892,7 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port,
 			       int (*cb)(struct switchdev_obj *obj))
 {
 	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	struct net_device *dev = ds->ports[port];
+	struct net_device *dev = ds->ports[port].netdev;
 	struct bcm_sf2_arl_entry results[2];
 	unsigned int count = 0;
 	int ret;
@@ -836,6 +924,66 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port,
 	return 0;
 }
 
+static int bcm_sf2_sw_indir_rw(struct bcm_sf2_priv *priv, int op, int addr,
+			       int regnum, u16 val)
+{
+	int ret = 0;
+	u32 reg;
+
+	reg = reg_readl(priv, REG_SWITCH_CNTRL);
+	reg |= MDIO_MASTER_SEL;
+	reg_writel(priv, reg, REG_SWITCH_CNTRL);
+
+	/* Page << 8 | offset */
+	reg = 0x70;
+	reg <<= 2;
+	core_writel(priv, addr, reg);
+
+	/* Page << 8 | offset */
+	reg = 0x80 << 8 | regnum << 1;
+	reg <<= 2;
+
+	if (op)
+		ret = core_readl(priv, reg);
+	else
+		core_writel(priv, val, reg);
+
+	reg = reg_readl(priv, REG_SWITCH_CNTRL);
+	reg &= ~MDIO_MASTER_SEL;
+	reg_writel(priv, reg, REG_SWITCH_CNTRL);
+
+	return ret & 0xffff;
+}
+
+static int bcm_sf2_sw_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+	struct bcm_sf2_priv *priv = bus->priv;
+
+	/* Intercept reads from Broadcom pseudo-PHY address, else, send
+	 * them to our master MDIO bus controller
+	 */
+	if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr))
+		return bcm_sf2_sw_indir_rw(priv, 1, addr, regnum, 0);
+	else
+		return mdiobus_read(priv->master_mii_bus, addr, regnum);
+}
+
+static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum,
+				 u16 val)
+{
+	struct bcm_sf2_priv *priv = bus->priv;
+
+	/* Intercept writes to the Broadcom pseudo-PHY address, else,
+	 * send them to our master MDIO bus controller
+	 */
+	if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr))
+		bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val);
+	else
+		mdiobus_write(priv->master_mii_bus, addr, regnum, val);
+
+	return 0;
+}
+
 static irqreturn_t bcm_sf2_switch_0_isr(int irq, void *dev_id)
 {
 	struct bcm_sf2_priv *priv = dev_id;
@@ -932,133 +1080,70 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
 	}
 }
 
-static int bcm_sf2_sw_setup(struct dsa_switch *ds)
+static int bcm_sf2_mdio_register(struct dsa_switch *ds)
 {
-	const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME;
 	struct bcm_sf2_priv *priv = ds_to_priv(ds);
 	struct device_node *dn;
-	void __iomem **base;
-	unsigned int port;
-	unsigned int i;
-	u32 reg, rev;
-	int ret;
-
-	spin_lock_init(&priv->indir_lock);
-	mutex_init(&priv->stats_mutex);
-
-	/* All the interesting properties are at the parent device_node
-	 * level
-	 */
-	dn = ds->cd->of_node->parent;
-	bcm_sf2_identify_ports(priv, ds->cd->of_node);
-
-	priv->irq0 = irq_of_parse_and_map(dn, 0);
-	priv->irq1 = irq_of_parse_and_map(dn, 1);
-
-	base = &priv->core;
-	for (i = 0; i < BCM_SF2_REGS_NUM; i++) {
-		*base = of_iomap(dn, i);
-		if (*base == NULL) {
-			pr_err("unable to find register: %s\n", reg_names[i]);
-			ret = -ENOMEM;
-			goto out_unmap;
-		}
-		base++;
-	}
-
-	ret = bcm_sf2_sw_rst(priv);
-	if (ret) {
-		pr_err("unable to software reset switch: %d\n", ret);
-		goto out_unmap;
-	}
-
-	/* Disable all interrupts and request them */
-	bcm_sf2_intr_disable(priv);
-
-	ret = request_irq(priv->irq0, bcm_sf2_switch_0_isr, 0,
-			  "switch_0", priv);
-	if (ret < 0) {
-		pr_err("failed to request switch_0 IRQ\n");
-		goto out_unmap;
-	}
-
-	ret = request_irq(priv->irq1, bcm_sf2_switch_1_isr, 0,
-			  "switch_1", priv);
-	if (ret < 0) {
-		pr_err("failed to request switch_1 IRQ\n");
-		goto out_free_irq0;
-	}
-
-	/* Reset the MIB counters */
-	reg = core_readl(priv, CORE_GMNCFGCFG);
-	reg |= RST_MIB_CNT;
-	core_writel(priv, reg, CORE_GMNCFGCFG);
-	reg &= ~RST_MIB_CNT;
-	core_writel(priv, reg, CORE_GMNCFGCFG);
-
-	/* Get the maximum number of ports for this switch */
-	priv->hw_params.num_ports = core_readl(priv, CORE_IMP0_PRT_ID) + 1;
-	if (priv->hw_params.num_ports > DSA_MAX_PORTS)
-		priv->hw_params.num_ports = DSA_MAX_PORTS;
-
-	/* Assume a single GPHY setup if we can't read that property */
-	if (of_property_read_u32(dn, "brcm,num-gphy",
-				 &priv->hw_params.num_gphy))
-		priv->hw_params.num_gphy = 1;
-
-	/* Enable all valid ports and disable those unused */
-	for (port = 0; port < priv->hw_params.num_ports; port++) {
-		/* IMP port receives special treatment */
-		if ((1 << port) & ds->enabled_port_mask)
-			bcm_sf2_port_setup(ds, port, NULL);
-		else if (dsa_is_cpu_port(ds, port))
-			bcm_sf2_imp_setup(ds, port);
-		else
-			bcm_sf2_port_disable(ds, port, NULL);
-	}
-
-	/* Include the pseudo-PHY address and the broadcast PHY address to
-	 * divert reads towards our workaround. This is only required for
-	 * 7445D0, since 7445E0 disconnects the internal switch pseudo-PHY such
-	 * that we can use the regular SWITCH_MDIO master controller instead.
+	static int index;
+	int err;
+
+	/* Find our integrated MDIO bus node */
+	dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio");
+	priv->master_mii_bus = of_mdio_find_bus(dn);
+	if (!priv->master_mii_bus)
+		return -EPROBE_DEFER;
+
+	get_device(&priv->master_mii_bus->dev);
+	priv->master_mii_dn = dn;
+
+	priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+	if (!priv->slave_mii_bus)
+		return -ENOMEM;
+
+	priv->slave_mii_bus->priv = priv;
+	priv->slave_mii_bus->name = "sf2 slave mii";
+	priv->slave_mii_bus->read = bcm_sf2_sw_mdio_read;
+	priv->slave_mii_bus->write = bcm_sf2_sw_mdio_write;
+	snprintf(priv->slave_mii_bus->id, MII_BUS_ID_SIZE, "sf2-%d",
+		 index++);
+	priv->slave_mii_bus->dev.of_node = dn;
+
+	/* Include the pseudo-PHY address to divert reads towards our
+	 * workaround. This is only required for 7445D0, since 7445E0
+	 * disconnects the internal switch pseudo-PHY such that we can use the
+	 * regular SWITCH_MDIO master controller instead.
 	 *
-	 * By default, DSA initializes ds->phys_mii_mask to
-	 * ds->enabled_port_mask to have a 1:1 mapping between Port address
-	 * and PHY address in order to utilize the slave_mii_bus instance to
-	 * read from Port PHYs. This is not what we want here, so we
-	 * initialize phys_mii_mask 0 to always utilize the "master" MDIO
-	 * bus backed by the "mdio-unimac" driver.
+	 * Here we flag the pseudo PHY as needing special treatment and would
+	 * otherwise make all other PHY read/writes go to the master MDIO bus
+	 * controller that comes with this switch backed by the "mdio-unimac"
+	 * driver.
 	 */
 	if (of_machine_is_compatible("brcm,bcm7445d0"))
-		ds->phys_mii_mask |= ((1 << BRCM_PSEUDO_PHY_ADDR) | (1 << 0));
+		priv->indir_phy_mask |= (1 << BRCM_PSEUDO_PHY_ADDR);
 	else
-		ds->phys_mii_mask = 0;
+		priv->indir_phy_mask = 0;
 
-	rev = reg_readl(priv, REG_SWITCH_REVISION);
-	priv->hw_params.top_rev = (rev >> SWITCH_TOP_REV_SHIFT) &
-					SWITCH_TOP_REV_MASK;
-	priv->hw_params.core_rev = (rev & SF2_REV_MASK);
+	ds->phys_mii_mask = priv->indir_phy_mask;
+	ds->slave_mii_bus = priv->slave_mii_bus;
+	priv->slave_mii_bus->parent = ds->dev->parent;
+	priv->slave_mii_bus->phy_mask = ~priv->indir_phy_mask;
 
-	rev = reg_readl(priv, REG_PHY_REVISION);
-	priv->hw_params.gphy_rev = rev & PHY_REVISION_MASK;
+	if (dn)
+		err = of_mdiobus_register(priv->slave_mii_bus, dn);
+	else
+		err = mdiobus_register(priv->slave_mii_bus);
 
-	pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n",
-		priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff,
-		priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff,
-		priv->core, priv->irq0, priv->irq1);
+	if (err)
+		of_node_put(dn);
 
-	return 0;
+	return err;
+}
 
-out_free_irq0:
-	free_irq(priv->irq0, priv);
-out_unmap:
-	base = &priv->core;
-	for (i = 0; i < BCM_SF2_REGS_NUM; i++) {
-		if (*base)
-			iounmap(*base);
-		base++;
-	}
-	return ret;
+static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv)
+{
+	mdiobus_unregister(priv->slave_mii_bus);
+	if (priv->master_mii_dn)
+		of_node_put(priv->master_mii_dn);
 }
 
 static int bcm_sf2_sw_set_addr(struct dsa_switch *ds, u8 *addr)
@@ -1078,68 +1163,6 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
 	return priv->hw_params.gphy_rev;
 }
 
-static int bcm_sf2_sw_indir_rw(struct dsa_switch *ds, int op, int addr,
-			       int regnum, u16 val)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	int ret = 0;
-	u32 reg;
-
-	reg = reg_readl(priv, REG_SWITCH_CNTRL);
-	reg |= MDIO_MASTER_SEL;
-	reg_writel(priv, reg, REG_SWITCH_CNTRL);
-
-	/* Page << 8 | offset */
-	reg = 0x70;
-	reg <<= 2;
-	core_writel(priv, addr, reg);
-
-	/* Page << 8 | offset */
-	reg = 0x80 << 8 | regnum << 1;
-	reg <<= 2;
-
-	if (op)
-		ret = core_readl(priv, reg);
-	else
-		core_writel(priv, val, reg);
-
-	reg = reg_readl(priv, REG_SWITCH_CNTRL);
-	reg &= ~MDIO_MASTER_SEL;
-	reg_writel(priv, reg, REG_SWITCH_CNTRL);
-
-	return ret & 0xffff;
-}
-
-static int bcm_sf2_sw_phy_read(struct dsa_switch *ds, int addr, int regnum)
-{
-	/* Intercept reads from the MDIO broadcast address or Broadcom
-	 * pseudo-PHY address
-	 */
-	switch (addr) {
-	case 0:
-	case BRCM_PSEUDO_PHY_ADDR:
-		return bcm_sf2_sw_indir_rw(ds, 1, addr, regnum, 0);
-	default:
-		return 0xffff;
-	}
-}
-
-static int bcm_sf2_sw_phy_write(struct dsa_switch *ds, int addr, int regnum,
-				u16 val)
-{
-	/* Intercept writes to the MDIO broadcast address or Broadcom
-	 * pseudo-PHY address
-	 */
-	switch (addr) {
-	case 0:
-	case BRCM_PSEUDO_PHY_ADDR:
-		bcm_sf2_sw_indir_rw(ds, 0, addr, regnum, val);
-		break;
-	}
-
-	return 0;
-}
-
 static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
 				   struct phy_device *phydev)
 {
@@ -1248,7 +1271,7 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
 		 * state machine and make it go in PHY_FORCING state instead.
 		 */
 		if (!status->link)
-			netif_carrier_off(ds->ports[port]);
+			netif_carrier_off(ds->ports[port].netdev);
 		status->duplex = 1;
 	} else {
 		status->link = 1;
@@ -1370,14 +1393,309 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
 	return p->ethtool_ops->set_wol(p, wol);
 }
 
+static void bcm_sf2_enable_vlan(struct bcm_sf2_priv *priv, bool enable)
+{
+	u32 mgmt, vc0, vc1, vc4, vc5;
+
+	mgmt = core_readl(priv, CORE_SWMODE);
+	vc0 = core_readl(priv, CORE_VLAN_CTRL0);
+	vc1 = core_readl(priv, CORE_VLAN_CTRL1);
+	vc4 = core_readl(priv, CORE_VLAN_CTRL4);
+	vc5 = core_readl(priv, CORE_VLAN_CTRL5);
+
+	mgmt &= ~SW_FWDG_MODE;
+
+	if (enable) {
+		vc0 |= VLAN_EN | VLAN_LEARN_MODE_IVL;
+		vc1 |= EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP;
+		vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT);
+		vc4 |= INGR_VID_CHK_DROP;
+		vc5 |= DROP_VTABLE_MISS | EN_VID_FFF_FWD;
+	} else {
+		vc0 &= ~(VLAN_EN | VLAN_LEARN_MODE_IVL);
+		vc1 &= ~(EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP);
+		vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT);
+		vc5 &= ~(DROP_VTABLE_MISS | EN_VID_FFF_FWD);
+		vc4 |= INGR_VID_CHK_VID_VIOL_IMP;
+	}
+
+	core_writel(priv, vc0, CORE_VLAN_CTRL0);
+	core_writel(priv, vc1, CORE_VLAN_CTRL1);
+	core_writel(priv, 0, CORE_VLAN_CTRL3);
+	core_writel(priv, vc4, CORE_VLAN_CTRL4);
+	core_writel(priv, vc5, CORE_VLAN_CTRL5);
+	core_writel(priv, mgmt, CORE_SWMODE);
+}
+
+static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds)
+{
+	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	unsigned int port;
+
+	/* Clear all VLANs */
+	bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_CLEAR);
+
+	for (port = 0; port < priv->hw_params.num_ports; port++) {
+		if (!((1 << port) & ds->enabled_port_mask))
+			continue;
+
+		core_writel(priv, 1, CORE_DEFAULT_1Q_TAG_P(port));
+	}
+}
+
+static int bcm_sf2_sw_vlan_filtering(struct dsa_switch *ds, int port,
+				     bool vlan_filtering)
+{
+	return 0;
+}
+
+static int bcm_sf2_sw_vlan_prepare(struct dsa_switch *ds, int port,
+				   const struct switchdev_obj_port_vlan *vlan,
+				   struct switchdev_trans *trans)
+{
+	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+
+	bcm_sf2_enable_vlan(priv, true);
+
+	return 0;
+}
+
+static void bcm_sf2_sw_vlan_add(struct dsa_switch *ds, int port,
+				const struct switchdev_obj_port_vlan *vlan,
+				struct switchdev_trans *trans)
+{
+	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+	bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+	s8 cpu_port = ds->dst->cpu_port;
+	struct bcm_sf2_vlan *vl;
+	u16 vid;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+		vl = &priv->vlans[vid];
+
+		bcm_sf2_get_vlan_entry(priv, vid, vl);
+
+		vl->members |= BIT(port) | BIT(cpu_port);
+		if (untagged)
+			vl->untag |= BIT(port) | BIT(cpu_port);
+		else
+			vl->untag &= ~(BIT(port) | BIT(cpu_port));
+
+		bcm_sf2_set_vlan_entry(priv, vid, vl);
+		bcm_sf2_sw_fast_age_vlan(priv, vid);
+	}
+
+	if (pvid) {
+		core_writel(priv, vlan->vid_end, CORE_DEFAULT_1Q_TAG_P(port));
+		core_writel(priv, vlan->vid_end,
+			    CORE_DEFAULT_1Q_TAG_P(cpu_port));
+		bcm_sf2_sw_fast_age_vlan(priv, vid);
+	}
+}
+
+static int bcm_sf2_sw_vlan_del(struct dsa_switch *ds, int port,
+			       const struct switchdev_obj_port_vlan *vlan)
+{
+	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+	s8 cpu_port = ds->dst->cpu_port;
+	struct bcm_sf2_vlan *vl;
+	u16 vid, pvid;
+	int ret;
+
+	pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port));
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+		vl = &priv->vlans[vid];
+
+		ret = bcm_sf2_get_vlan_entry(priv, vid, vl);
+		if (ret)
+			return ret;
+
+		vl->members &= ~BIT(port);
+		if ((vl->members & BIT(cpu_port)) == BIT(cpu_port))
+			vl->members = 0;
+		if (pvid == vid)
+			pvid = 0;
+		if (untagged) {
+			vl->untag &= ~BIT(port);
+			if ((vl->untag & BIT(port)) == BIT(cpu_port))
+				vl->untag = 0;
+		}
+
+		bcm_sf2_set_vlan_entry(priv, vid, vl);
+		bcm_sf2_sw_fast_age_vlan(priv, vid);
+	}
+
+	core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(port));
+	core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(cpu_port));
+	bcm_sf2_sw_fast_age_vlan(priv, vid);
+
+	return 0;
+}
+
+static int bcm_sf2_sw_vlan_dump(struct dsa_switch *ds, int port,
+				struct switchdev_obj_port_vlan *vlan,
+				int (*cb)(struct switchdev_obj *obj))
+{
+	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_port_status *p = &priv->port_sts[port];
+	struct bcm_sf2_vlan *vl;
+	u16 vid, pvid;
+	int err = 0;
+
+	pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port));
+
+	for (vid = 0; vid < VLAN_N_VID; vid++) {
+		vl = &priv->vlans[vid];
+
+		if (!(vl->members & BIT(port)))
+			continue;
+
+		vlan->vid_begin = vlan->vid_end = vid;
+		vlan->flags = 0;
+
+		if (vl->untag & BIT(port))
+			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+		if (p->pvid == vid)
+			vlan->flags |= BRIDGE_VLAN_INFO_PVID;
+
+		err = cb(&vlan->obj);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int bcm_sf2_sw_setup(struct dsa_switch *ds)
+{
+	const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME;
+	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct device_node *dn;
+	void __iomem **base;
+	unsigned int port;
+	unsigned int i;
+	u32 reg, rev;
+	int ret;
+
+	spin_lock_init(&priv->indir_lock);
+	mutex_init(&priv->stats_mutex);
+
+	/* All the interesting properties are at the parent device_node
+	 * level
+	 */
+	dn = ds->cd->of_node->parent;
+	bcm_sf2_identify_ports(priv, ds->cd->of_node);
+
+	priv->irq0 = irq_of_parse_and_map(dn, 0);
+	priv->irq1 = irq_of_parse_and_map(dn, 1);
+
+	base = &priv->core;
+	for (i = 0; i < BCM_SF2_REGS_NUM; i++) {
+		*base = of_iomap(dn, i);
+		if (*base == NULL) {
+			pr_err("unable to find register: %s\n", reg_names[i]);
+			ret = -ENOMEM;
+			goto out_unmap;
+		}
+		base++;
+	}
+
+	ret = bcm_sf2_sw_rst(priv);
+	if (ret) {
+		pr_err("unable to software reset switch: %d\n", ret);
+		goto out_unmap;
+	}
+
+	ret = bcm_sf2_mdio_register(ds);
+	if (ret) {
+		pr_err("failed to register MDIO bus\n");
+		goto out_unmap;
+	}
+
+	/* Disable all interrupts and request them */
+	bcm_sf2_intr_disable(priv);
+
+	ret = request_irq(priv->irq0, bcm_sf2_switch_0_isr, 0,
+			  "switch_0", priv);
+	if (ret < 0) {
+		pr_err("failed to request switch_0 IRQ\n");
+		goto out_unmap;
+	}
+
+	ret = request_irq(priv->irq1, bcm_sf2_switch_1_isr, 0,
+			  "switch_1", priv);
+	if (ret < 0) {
+		pr_err("failed to request switch_1 IRQ\n");
+		goto out_free_irq0;
+	}
+
+	/* Reset the MIB counters */
+	reg = core_readl(priv, CORE_GMNCFGCFG);
+	reg |= RST_MIB_CNT;
+	core_writel(priv, reg, CORE_GMNCFGCFG);
+	reg &= ~RST_MIB_CNT;
+	core_writel(priv, reg, CORE_GMNCFGCFG);
+
+	/* Get the maximum number of ports for this switch */
+	priv->hw_params.num_ports = core_readl(priv, CORE_IMP0_PRT_ID) + 1;
+	if (priv->hw_params.num_ports > DSA_MAX_PORTS)
+		priv->hw_params.num_ports = DSA_MAX_PORTS;
+
+	/* Assume a single GPHY setup if we can't read that property */
+	if (of_property_read_u32(dn, "brcm,num-gphy",
+				 &priv->hw_params.num_gphy))
+		priv->hw_params.num_gphy = 1;
+
+	/* Enable all valid ports and disable those unused */
+	for (port = 0; port < priv->hw_params.num_ports; port++) {
+		/* IMP port receives special treatment */
+		if ((1 << port) & ds->enabled_port_mask)
+			bcm_sf2_port_setup(ds, port, NULL);
+		else if (dsa_is_cpu_port(ds, port))
+			bcm_sf2_imp_setup(ds, port);
+		else
+			bcm_sf2_port_disable(ds, port, NULL);
+	}
+
+	bcm_sf2_sw_configure_vlan(ds);
+
+	rev = reg_readl(priv, REG_SWITCH_REVISION);
+	priv->hw_params.top_rev = (rev >> SWITCH_TOP_REV_SHIFT) &
+					SWITCH_TOP_REV_MASK;
+	priv->hw_params.core_rev = (rev & SF2_REV_MASK);
+
+	rev = reg_readl(priv, REG_PHY_REVISION);
+	priv->hw_params.gphy_rev = rev & PHY_REVISION_MASK;
+
+	pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n",
+		priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff,
+		priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff,
+		priv->core, priv->irq0, priv->irq1);
+
+	return 0;
+
+out_free_irq0:
+	free_irq(priv->irq0, priv);
+out_unmap:
+	base = &priv->core;
+	for (i = 0; i < BCM_SF2_REGS_NUM; i++) {
+		if (*base)
+			iounmap(*base);
+		base++;
+	}
+	bcm_sf2_mdio_unregister(priv);
+	return ret;
+}
+
 static struct dsa_switch_driver bcm_sf2_switch_driver = {
 	.tag_protocol		= DSA_TAG_PROTO_BRCM,
 	.probe			= bcm_sf2_sw_drv_probe,
 	.setup			= bcm_sf2_sw_setup,
 	.set_addr		= bcm_sf2_sw_set_addr,
 	.get_phy_flags		= bcm_sf2_sw_get_phy_flags,
-	.phy_read		= bcm_sf2_sw_phy_read,
-	.phy_write		= bcm_sf2_sw_phy_write,
 	.get_strings		= bcm_sf2_sw_get_strings,
 	.get_ethtool_stats	= bcm_sf2_sw_get_ethtool_stats,
 	.get_sset_count		= bcm_sf2_sw_get_sset_count,
@@ -1398,6 +1716,11 @@ static struct dsa_switch_driver bcm_sf2_switch_driver = {
 	.port_fdb_add		= bcm_sf2_sw_fdb_add,
 	.port_fdb_del		= bcm_sf2_sw_fdb_del,
 	.port_fdb_dump		= bcm_sf2_sw_fdb_dump,
+	.port_vlan_filtering	= bcm_sf2_sw_vlan_filtering,
+	.port_vlan_prepare	= bcm_sf2_sw_vlan_prepare,
+	.port_vlan_add		= bcm_sf2_sw_vlan_add,
+	.port_vlan_del		= bcm_sf2_sw_vlan_del,
+	.port_vlan_dump		= bcm_sf2_sw_vlan_dump,
 };
 
 static int __init bcm_sf2_init(void)
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 200b1f5fdb56..463bed8cbe4c 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -21,6 +21,7 @@
 #include <linux/ethtool.h>
 #include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/if_vlan.h>
 
 #include <net/dsa.h>
 
@@ -50,6 +51,7 @@ struct bcm_sf2_port_status {
 	struct ethtool_eee eee;
 
 	u32 vlan_ctl_mask;
+	u16 pvid;
 
 	struct net_device *bridge_dev;
 };
@@ -63,6 +65,11 @@ struct bcm_sf2_arl_entry {
 	u8 is_static:1;
 };
 
+struct bcm_sf2_vlan {
+	u16 members;
+	u16 untag;
+};
+
 static inline void bcm_sf2_mac_from_u64(u64 src, u8 *dst)
 {
 	unsigned int i;
@@ -142,6 +149,15 @@ struct bcm_sf2_priv {
 
 	/* Bitmask of ports having an integrated PHY */
 	unsigned int			int_phy_mask;
+
+	/* Master and slave MDIO bus controller */
+	unsigned int			indir_phy_mask;
+	struct device_node		*master_mii_dn;
+	struct mii_bus			*slave_mii_bus;
+	struct mii_bus			*master_mii_bus;
+
+	/* Cache of programmed VLANs */
+	struct bcm_sf2_vlan		vlans[VLAN_N_VID];
 };
 
 struct bcm_sf2_hw_stats {
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 97780d43b5c0..9f2a9cb42074 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -274,6 +274,23 @@
 #define CORE_ARLA_SRCH_RSLT_MACVID(x)	(CORE_ARLA_SRCH_RSLT_0_MACVID + ((x) * 0x40))
 #define CORE_ARLA_SRCH_RSLT(x)		(CORE_ARLA_SRCH_RSLT_0 + ((x) * 0x40))
 
+#define CORE_ARLA_VTBL_RWCTRL		0x1600
+#define  ARLA_VTBL_CMD_WRITE		0
+#define  ARLA_VTBL_CMD_READ		1
+#define  ARLA_VTBL_CMD_CLEAR		2
+#define  ARLA_VTBL_STDN			(1 << 7)
+
+#define CORE_ARLA_VTBL_ADDR		0x1604
+#define  VTBL_ADDR_INDEX_MASK		0xfff
+
+#define CORE_ARLA_VTBL_ENTRY		0x160c
+#define  FWD_MAP_MASK			0x1ff
+#define  UNTAG_MAP_MASK			0x1ff
+#define  UNTAG_MAP_SHIFT		9
+#define  MSTP_INDEX_MASK		0x7
+#define  MSTP_INDEX_SHIFT		18
+#define  FWD_MODE			(1 << 21)
+
 #define CORE_MEM_PSM_VDD_CTRL		0x2380
 #define  P_TXQ_PSM_VDD_SHIFT		2
 #define  P_TXQ_PSM_VDD_MASK		0x3
@@ -287,6 +304,59 @@
 #define CORE_PORT_VLAN_CTL_PORT(x)	(0xc400 + ((x) * 0x8))
 #define  PORT_VLAN_CTRL_MASK		0x1ff
 
+#define CORE_VLAN_CTRL0			0xd000
+#define  CHANGE_1P_VID_INNER		(1 << 0)
+#define  CHANGE_1P_VID_OUTER		(1 << 1)
+#define  CHANGE_1Q_VID			(1 << 3)
+#define  VLAN_LEARN_MODE_SVL		(0 << 5)
+#define  VLAN_LEARN_MODE_IVL		(3 << 5)
+#define  VLAN_EN			(1 << 7)
+
+#define CORE_VLAN_CTRL1			0xd004
+#define  EN_RSV_MCAST_FWDMAP		(1 << 2)
+#define  EN_RSV_MCAST_UNTAG		(1 << 3)
+#define  EN_IPMC_BYPASS_FWDMAP		(1 << 5)
+#define  EN_IPMC_BYPASS_UNTAG		(1 << 6)
+
+#define CORE_VLAN_CTRL2			0xd008
+#define  EN_MIIM_BYPASS_V_FWDMAP	(1 << 2)
+#define  EN_GMRP_GVRP_V_FWDMAP		(1 << 5)
+#define  EN_GMRP_GVRP_UNTAG_MAP		(1 << 6)
+
+#define CORE_VLAN_CTRL3			0xd00c
+#define  EN_DROP_NON1Q_MASK		0x1ff
+
+#define CORE_VLAN_CTRL4			0xd014
+#define  RESV_MCAST_FLOOD		(1 << 1)
+#define  EN_DOUBLE_TAG_MASK		0x3
+#define  EN_DOUBLE_TAG_SHIFT		2
+#define  EN_MGE_REV_GMRP		(1 << 4)
+#define  EN_MGE_REV_GVRP		(1 << 5)
+#define  INGR_VID_CHK_SHIFT		6
+#define  INGR_VID_CHK_MASK		0x3
+#define  INGR_VID_CHK_FWD		(0 << INGR_VID_CHK_SHIFT)
+#define  INGR_VID_CHK_DROP		(1 << INGR_VID_CHK_SHIFT)
+#define  INGR_VID_CHK_NO_CHK		(2 << INGR_VID_CHK_SHIFT)
+#define  INGR_VID_CHK_VID_VIOL_IMP	(3 << INGR_VID_CHK_SHIFT)
+
+#define CORE_VLAN_CTRL5			0xd018
+#define  EN_CPU_RX_BYP_INNER_CRCCHCK	(1 << 0)
+#define  EN_VID_FFF_FWD			(1 << 2)
+#define  DROP_VTABLE_MISS		(1 << 3)
+#define  EGRESS_DIR_FRM_BYP_TRUNK_EN	(1 << 4)
+#define  PRESV_NON1Q			(1 << 6)
+
+#define CORE_VLAN_CTRL6			0xd01c
+#define  STRICT_SFD_DETECT		(1 << 0)
+#define  DIS_ARL_BUST_LMIT		(1 << 4)
+
+#define CORE_DEFAULT_1Q_TAG_P(x)	(0xd040 + ((x) * 8))
+#define  CFI_SHIFT			12
+#define  PRI_SHIFT			13
+#define  PRI_MASK			0x7
+
+#define CORE_JOIN_ALL_VLAN_EN		0xd140
+
 #define CORE_EEE_EN_CTRL		0x24800
 #define CORE_EEE_LPI_INDICATE		0x24810
 
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
deleted file mode 100644
index ba9dfc9421ef..000000000000
--- a/drivers/net/dsa/mv88e6xxx.c
+++ /dev/null
@@ -1,3723 +0,0 @@
-/*
- * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support
- * Copyright (c) 2008 Marvell Semiconductor
- *
- * Copyright (c) 2015 CMC Electronics, Inc.
- *	Added support for VLAN Table Unit operations
- *
- * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/delay.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/if_bridge.h>
-#include <linux/jiffies.h>
-#include <linux/list.h>
-#include <linux/mdio.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/gpio/consumer.h>
-#include <linux/phy.h>
-#include <net/dsa.h>
-#include <net/switchdev.h>
-#include "mv88e6xxx.h"
-
-static void assert_smi_lock(struct mv88e6xxx_priv_state *ps)
-{
-	if (unlikely(!mutex_is_locked(&ps->smi_mutex))) {
-		dev_err(ps->dev, "SMI lock not held!\n");
-		dump_stack();
-	}
-}
-
-/* If the switch's ADDR[4:0] strap pins are strapped to zero, it will
- * use all 32 SMI bus addresses on its SMI bus, and all switch registers
- * will be directly accessible on some {device address,register address}
- * pair.  If the ADDR[4:0] pins are not strapped to zero, the switch
- * will only respond to SMI transactions to that specific address, and
- * an indirect addressing mechanism needs to be used to access its
- * registers.
- */
-static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr)
-{
-	int ret;
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		ret = mdiobus_read_nested(bus, sw_addr, SMI_CMD);
-		if (ret < 0)
-			return ret;
-
-		if ((ret & SMI_CMD_BUSY) == 0)
-			return 0;
-	}
-
-	return -ETIMEDOUT;
-}
-
-static int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr,
-				int reg)
-{
-	int ret;
-
-	if (sw_addr == 0)
-		return mdiobus_read_nested(bus, addr, reg);
-
-	/* Wait for the bus to become free. */
-	ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
-	if (ret < 0)
-		return ret;
-
-	/* Transmit the read command. */
-	ret = mdiobus_write_nested(bus, sw_addr, SMI_CMD,
-				   SMI_CMD_OP_22_READ | (addr << 5) | reg);
-	if (ret < 0)
-		return ret;
-
-	/* Wait for the read command to complete. */
-	ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
-	if (ret < 0)
-		return ret;
-
-	/* Read the data. */
-	ret = mdiobus_read_nested(bus, sw_addr, SMI_DATA);
-	if (ret < 0)
-		return ret;
-
-	return ret & 0xffff;
-}
-
-static int _mv88e6xxx_reg_read(struct mv88e6xxx_priv_state *ps,
-			       int addr, int reg)
-{
-	int ret;
-
-	assert_smi_lock(ps);
-
-	ret = __mv88e6xxx_reg_read(ps->bus, ps->sw_addr, addr, reg);
-	if (ret < 0)
-		return ret;
-
-	dev_dbg(ps->dev, "<- addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n",
-		addr, reg, ret);
-
-	return ret;
-}
-
-int mv88e6xxx_reg_read(struct mv88e6xxx_priv_state *ps, int addr, int reg)
-{
-	int ret;
-
-	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_reg_read(ps, addr, reg);
-	mutex_unlock(&ps->smi_mutex);
-
-	return ret;
-}
-
-static int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
-				 int reg, u16 val)
-{
-	int ret;
-
-	if (sw_addr == 0)
-		return mdiobus_write_nested(bus, addr, reg, val);
-
-	/* Wait for the bus to become free. */
-	ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
-	if (ret < 0)
-		return ret;
-
-	/* Transmit the data to write. */
-	ret = mdiobus_write_nested(bus, sw_addr, SMI_DATA, val);
-	if (ret < 0)
-		return ret;
-
-	/* Transmit the write command. */
-	ret = mdiobus_write_nested(bus, sw_addr, SMI_CMD,
-				   SMI_CMD_OP_22_WRITE | (addr << 5) | reg);
-	if (ret < 0)
-		return ret;
-
-	/* Wait for the write command to complete. */
-	ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int _mv88e6xxx_reg_write(struct mv88e6xxx_priv_state *ps, int addr,
-				int reg, u16 val)
-{
-	assert_smi_lock(ps);
-
-	dev_dbg(ps->dev, "-> addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n",
-		addr, reg, val);
-
-	return __mv88e6xxx_reg_write(ps->bus, ps->sw_addr, addr, reg, val);
-}
-
-int mv88e6xxx_reg_write(struct mv88e6xxx_priv_state *ps, int addr,
-			int reg, u16 val)
-{
-	int ret;
-
-	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_reg_write(ps, addr, reg, val);
-	mutex_unlock(&ps->smi_mutex);
-
-	return ret;
-}
-
-static int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int err;
-
-	err = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MAC_01,
-				  (addr[0] << 8) | addr[1]);
-	if (err)
-		return err;
-
-	err = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MAC_23,
-				  (addr[2] << 8) | addr[3]);
-	if (err)
-		return err;
-
-	return mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MAC_45,
-				   (addr[4] << 8) | addr[5]);
-}
-
-static int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
-	int i;
-
-	for (i = 0; i < 6; i++) {
-		int j;
-
-		/* Write the MAC address byte. */
-		ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SWITCH_MAC,
-					  GLOBAL2_SWITCH_MAC_BUSY |
-					  (i << 8) | addr[i]);
-		if (ret)
-			return ret;
-
-		/* Wait for the write to complete. */
-		for (j = 0; j < 16; j++) {
-			ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2,
-						 GLOBAL2_SWITCH_MAC);
-			if (ret < 0)
-				return ret;
-
-			if ((ret & GLOBAL2_SWITCH_MAC_BUSY) == 0)
-				break;
-		}
-		if (j == 16)
-			return -ETIMEDOUT;
-	}
-
-	return 0;
-}
-
-int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_SWITCH_MAC))
-		return mv88e6xxx_set_addr_indirect(ds, addr);
-	else
-		return mv88e6xxx_set_addr_direct(ds, addr);
-}
-
-static int _mv88e6xxx_phy_read(struct mv88e6xxx_priv_state *ps, int addr,
-			       int regnum)
-{
-	if (addr >= 0)
-		return _mv88e6xxx_reg_read(ps, addr, regnum);
-	return 0xffff;
-}
-
-static int _mv88e6xxx_phy_write(struct mv88e6xxx_priv_state *ps, int addr,
-				int regnum, u16 val)
-{
-	if (addr >= 0)
-		return _mv88e6xxx_reg_write(ps, addr, regnum, val);
-	return 0;
-}
-
-static int mv88e6xxx_ppu_disable(struct mv88e6xxx_priv_state *ps)
-{
-	int ret;
-	unsigned long timeout;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_CONTROL);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL,
-				   ret & ~GLOBAL_CONTROL_PPU_ENABLE);
-	if (ret)
-		return ret;
-
-	timeout = jiffies + 1 * HZ;
-	while (time_before(jiffies, timeout)) {
-		ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATUS);
-		if (ret < 0)
-			return ret;
-
-		usleep_range(1000, 2000);
-		if ((ret & GLOBAL_STATUS_PPU_MASK) !=
-		    GLOBAL_STATUS_PPU_POLLING)
-			return 0;
-	}
-
-	return -ETIMEDOUT;
-}
-
-static int mv88e6xxx_ppu_enable(struct mv88e6xxx_priv_state *ps)
-{
-	int ret, err;
-	unsigned long timeout;
-
-	ret = mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_CONTROL);
-	if (ret < 0)
-		return ret;
-
-	err = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL,
-				  ret | GLOBAL_CONTROL_PPU_ENABLE);
-	if (err)
-		return err;
-
-	timeout = jiffies + 1 * HZ;
-	while (time_before(jiffies, timeout)) {
-		ret = mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATUS);
-		if (ret < 0)
-			return ret;
-
-		usleep_range(1000, 2000);
-		if ((ret & GLOBAL_STATUS_PPU_MASK) ==
-		    GLOBAL_STATUS_PPU_POLLING)
-			return 0;
-	}
-
-	return -ETIMEDOUT;
-}
-
-static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly)
-{
-	struct mv88e6xxx_priv_state *ps;
-
-	ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work);
-	if (mutex_trylock(&ps->ppu_mutex)) {
-		if (mv88e6xxx_ppu_enable(ps) == 0)
-			ps->ppu_disabled = 0;
-		mutex_unlock(&ps->ppu_mutex);
-	}
-}
-
-static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps)
-{
-	struct mv88e6xxx_priv_state *ps = (void *)_ps;
-
-	schedule_work(&ps->ppu_work);
-}
-
-static int mv88e6xxx_ppu_access_get(struct mv88e6xxx_priv_state *ps)
-{
-	int ret;
-
-	mutex_lock(&ps->ppu_mutex);
-
-	/* If the PHY polling unit is enabled, disable it so that
-	 * we can access the PHY registers.  If it was already
-	 * disabled, cancel the timer that is going to re-enable
-	 * it.
-	 */
-	if (!ps->ppu_disabled) {
-		ret = mv88e6xxx_ppu_disable(ps);
-		if (ret < 0) {
-			mutex_unlock(&ps->ppu_mutex);
-			return ret;
-		}
-		ps->ppu_disabled = 1;
-	} else {
-		del_timer(&ps->ppu_timer);
-		ret = 0;
-	}
-
-	return ret;
-}
-
-static void mv88e6xxx_ppu_access_put(struct mv88e6xxx_priv_state *ps)
-{
-	/* Schedule a timer to re-enable the PHY polling unit. */
-	mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10));
-	mutex_unlock(&ps->ppu_mutex);
-}
-
-void mv88e6xxx_ppu_state_init(struct mv88e6xxx_priv_state *ps)
-{
-	mutex_init(&ps->ppu_mutex);
-	INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work);
-	init_timer(&ps->ppu_timer);
-	ps->ppu_timer.data = (unsigned long)ps;
-	ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
-}
-
-static int mv88e6xxx_phy_read_ppu(struct mv88e6xxx_priv_state *ps, int addr,
-				  int regnum)
-{
-	int ret;
-
-	ret = mv88e6xxx_ppu_access_get(ps);
-	if (ret >= 0) {
-		ret = _mv88e6xxx_reg_read(ps, addr, regnum);
-		mv88e6xxx_ppu_access_put(ps);
-	}
-
-	return ret;
-}
-
-static int mv88e6xxx_phy_write_ppu(struct mv88e6xxx_priv_state *ps, int addr,
-				   int regnum, u16 val)
-{
-	int ret;
-
-	ret = mv88e6xxx_ppu_access_get(ps);
-	if (ret >= 0) {
-		ret = _mv88e6xxx_reg_write(ps, addr, regnum, val);
-		mv88e6xxx_ppu_access_put(ps);
-	}
-
-	return ret;
-}
-
-static bool mv88e6xxx_6065_family(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->family == MV88E6XXX_FAMILY_6065;
-}
-
-static bool mv88e6xxx_6095_family(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->family == MV88E6XXX_FAMILY_6095;
-}
-
-static bool mv88e6xxx_6097_family(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->family == MV88E6XXX_FAMILY_6097;
-}
-
-static bool mv88e6xxx_6165_family(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->family == MV88E6XXX_FAMILY_6165;
-}
-
-static bool mv88e6xxx_6185_family(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->family == MV88E6XXX_FAMILY_6185;
-}
-
-static bool mv88e6xxx_6320_family(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->family == MV88E6XXX_FAMILY_6320;
-}
-
-static bool mv88e6xxx_6351_family(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->family == MV88E6XXX_FAMILY_6351;
-}
-
-static bool mv88e6xxx_6352_family(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->family == MV88E6XXX_FAMILY_6352;
-}
-
-static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_priv_state *ps)
-{
-	return ps->info->num_databases;
-}
-
-static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_priv_state *ps)
-{
-	/* Does the device have dedicated FID registers for ATU and VTU ops? */
-	if (mv88e6xxx_6097_family(ps) || mv88e6xxx_6165_family(ps) ||
-	    mv88e6xxx_6351_family(ps) || mv88e6xxx_6352_family(ps))
-		return true;
-
-	return false;
-}
-
-/* We expect the switch to perform auto negotiation if there is a real
- * phy. However, in the case of a fixed link phy, we force the port
- * settings from the fixed link settings.
- */
-static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
-				  struct phy_device *phydev)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	u32 reg;
-	int ret;
-
-	if (!phy_is_pseudo_fixed_link(phydev))
-		return;
-
-	mutex_lock(&ps->smi_mutex);
-
-	ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_PCS_CTRL);
-	if (ret < 0)
-		goto out;
-
-	reg = ret & ~(PORT_PCS_CTRL_LINK_UP |
-		      PORT_PCS_CTRL_FORCE_LINK |
-		      PORT_PCS_CTRL_DUPLEX_FULL |
-		      PORT_PCS_CTRL_FORCE_DUPLEX |
-		      PORT_PCS_CTRL_UNFORCED);
-
-	reg |= PORT_PCS_CTRL_FORCE_LINK;
-	if (phydev->link)
-			reg |= PORT_PCS_CTRL_LINK_UP;
-
-	if (mv88e6xxx_6065_family(ps) && phydev->speed > SPEED_100)
-		goto out;
-
-	switch (phydev->speed) {
-	case SPEED_1000:
-		reg |= PORT_PCS_CTRL_1000;
-		break;
-	case SPEED_100:
-		reg |= PORT_PCS_CTRL_100;
-		break;
-	case SPEED_10:
-		reg |= PORT_PCS_CTRL_10;
-		break;
-	default:
-		pr_info("Unknown speed");
-		goto out;
-	}
-
-	reg |= PORT_PCS_CTRL_FORCE_DUPLEX;
-	if (phydev->duplex == DUPLEX_FULL)
-		reg |= PORT_PCS_CTRL_DUPLEX_FULL;
-
-	if ((mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps)) &&
-	    (port >= ps->info->num_ports - 2)) {
-		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
-			reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK;
-		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
-			reg |= PORT_PCS_CTRL_RGMII_DELAY_TXCLK;
-		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
-			reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK |
-				PORT_PCS_CTRL_RGMII_DELAY_TXCLK);
-	}
-	_mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_PCS_CTRL, reg);
-
-out:
-	mutex_unlock(&ps->smi_mutex);
-}
-
-static int _mv88e6xxx_stats_wait(struct mv88e6xxx_priv_state *ps)
-{
-	int ret;
-	int i;
-
-	for (i = 0; i < 10; i++) {
-		ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATS_OP);
-		if ((ret & GLOBAL_STATS_OP_BUSY) == 0)
-			return 0;
-	}
-
-	return -ETIMEDOUT;
-}
-
-static int _mv88e6xxx_stats_snapshot(struct mv88e6xxx_priv_state *ps,
-				     int port)
-{
-	int ret;
-
-	if (mv88e6xxx_6320_family(ps) || mv88e6xxx_6352_family(ps))
-		port = (port + 1) << 5;
-
-	/* Snapshot the hardware statistics counters for this port. */
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_STATS_OP,
-				   GLOBAL_STATS_OP_CAPTURE_PORT |
-				   GLOBAL_STATS_OP_HIST_RX_TX | port);
-	if (ret < 0)
-		return ret;
-
-	/* Wait for the snapshotting to complete. */
-	ret = _mv88e6xxx_stats_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static void _mv88e6xxx_stats_read(struct mv88e6xxx_priv_state *ps,
-				  int stat, u32 *val)
-{
-	u32 _val;
-	int ret;
-
-	*val = 0;
-
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_STATS_OP,
-				   GLOBAL_STATS_OP_READ_CAPTURED |
-				   GLOBAL_STATS_OP_HIST_RX_TX | stat);
-	if (ret < 0)
-		return;
-
-	ret = _mv88e6xxx_stats_wait(ps);
-	if (ret < 0)
-		return;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATS_COUNTER_32);
-	if (ret < 0)
-		return;
-
-	_val = ret << 16;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATS_COUNTER_01);
-	if (ret < 0)
-		return;
-
-	*val = _val | ret;
-}
-
-static struct mv88e6xxx_hw_stat mv88e6xxx_hw_stats[] = {
-	{ "in_good_octets",	8, 0x00, BANK0, },
-	{ "in_bad_octets",	4, 0x02, BANK0, },
-	{ "in_unicast",		4, 0x04, BANK0, },
-	{ "in_broadcasts",	4, 0x06, BANK0, },
-	{ "in_multicasts",	4, 0x07, BANK0, },
-	{ "in_pause",		4, 0x16, BANK0, },
-	{ "in_undersize",	4, 0x18, BANK0, },
-	{ "in_fragments",	4, 0x19, BANK0, },
-	{ "in_oversize",	4, 0x1a, BANK0, },
-	{ "in_jabber",		4, 0x1b, BANK0, },
-	{ "in_rx_error",	4, 0x1c, BANK0, },
-	{ "in_fcs_error",	4, 0x1d, BANK0, },
-	{ "out_octets",		8, 0x0e, BANK0, },
-	{ "out_unicast",	4, 0x10, BANK0, },
-	{ "out_broadcasts",	4, 0x13, BANK0, },
-	{ "out_multicasts",	4, 0x12, BANK0, },
-	{ "out_pause",		4, 0x15, BANK0, },
-	{ "excessive",		4, 0x11, BANK0, },
-	{ "collisions",		4, 0x1e, BANK0, },
-	{ "deferred",		4, 0x05, BANK0, },
-	{ "single",		4, 0x14, BANK0, },
-	{ "multiple",		4, 0x17, BANK0, },
-	{ "out_fcs_error",	4, 0x03, BANK0, },
-	{ "late",		4, 0x1f, BANK0, },
-	{ "hist_64bytes",	4, 0x08, BANK0, },
-	{ "hist_65_127bytes",	4, 0x09, BANK0, },
-	{ "hist_128_255bytes",	4, 0x0a, BANK0, },
-	{ "hist_256_511bytes",	4, 0x0b, BANK0, },
-	{ "hist_512_1023bytes", 4, 0x0c, BANK0, },
-	{ "hist_1024_max_bytes", 4, 0x0d, BANK0, },
-	{ "sw_in_discards",	4, 0x10, PORT, },
-	{ "sw_in_filtered",	2, 0x12, PORT, },
-	{ "sw_out_filtered",	2, 0x13, PORT, },
-	{ "in_discards",	4, 0x00 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_filtered",	4, 0x01 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_accepted",	4, 0x02 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_bad_accepted",	4, 0x03 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_good_avb_class_a", 4, 0x04 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_good_avb_class_b", 4, 0x05 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_bad_avb_class_a", 4, 0x06 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_bad_avb_class_b", 4, 0x07 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "tcam_counter_0",	4, 0x08 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "tcam_counter_1",	4, 0x09 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "tcam_counter_2",	4, 0x0a | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "tcam_counter_3",	4, 0x0b | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_da_unknown",	4, 0x0e | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "in_management",	4, 0x0f | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_queue_0",	4, 0x10 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_queue_1",	4, 0x11 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_queue_2",	4, 0x12 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_queue_3",	4, 0x13 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_queue_4",	4, 0x14 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_queue_5",	4, 0x15 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_queue_6",	4, 0x16 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_queue_7",	4, 0x17 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_cut_through",	4, 0x18 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_octets_a",	4, 0x1a | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_octets_b",	4, 0x1b | GLOBAL_STATS_OP_BANK_1, BANK1, },
-	{ "out_management",	4, 0x1f | GLOBAL_STATS_OP_BANK_1, BANK1, },
-};
-
-static bool mv88e6xxx_has_stat(struct mv88e6xxx_priv_state *ps,
-			       struct mv88e6xxx_hw_stat *stat)
-{
-	switch (stat->type) {
-	case BANK0:
-		return true;
-	case BANK1:
-		return mv88e6xxx_6320_family(ps);
-	case PORT:
-		return mv88e6xxx_6095_family(ps) ||
-			mv88e6xxx_6185_family(ps) ||
-			mv88e6xxx_6097_family(ps) ||
-			mv88e6xxx_6165_family(ps) ||
-			mv88e6xxx_6351_family(ps) ||
-			mv88e6xxx_6352_family(ps);
-	}
-	return false;
-}
-
-static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_priv_state *ps,
-					    struct mv88e6xxx_hw_stat *s,
-					    int port)
-{
-	u32 low;
-	u32 high = 0;
-	int ret;
-	u64 value;
-
-	switch (s->type) {
-	case PORT:
-		ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), s->reg);
-		if (ret < 0)
-			return UINT64_MAX;
-
-		low = ret;
-		if (s->sizeof_stat == 4) {
-			ret = _mv88e6xxx_reg_read(ps, REG_PORT(port),
-						  s->reg + 1);
-			if (ret < 0)
-				return UINT64_MAX;
-			high = ret;
-		}
-		break;
-	case BANK0:
-	case BANK1:
-		_mv88e6xxx_stats_read(ps, s->reg, &low);
-		if (s->sizeof_stat == 8)
-			_mv88e6xxx_stats_read(ps, s->reg + 1, &high);
-	}
-	value = (((u64)high) << 16) | low;
-	return value;
-}
-
-static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
-				  uint8_t *data)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	struct mv88e6xxx_hw_stat *stat;
-	int i, j;
-
-	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
-		stat = &mv88e6xxx_hw_stats[i];
-		if (mv88e6xxx_has_stat(ps, stat)) {
-			memcpy(data + j * ETH_GSTRING_LEN, stat->string,
-			       ETH_GSTRING_LEN);
-			j++;
-		}
-	}
-}
-
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	struct mv88e6xxx_hw_stat *stat;
-	int i, j;
-
-	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
-		stat = &mv88e6xxx_hw_stats[i];
-		if (mv88e6xxx_has_stat(ps, stat))
-			j++;
-	}
-	return j;
-}
-
-static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
-					uint64_t *data)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	struct mv88e6xxx_hw_stat *stat;
-	int ret;
-	int i, j;
-
-	mutex_lock(&ps->smi_mutex);
-
-	ret = _mv88e6xxx_stats_snapshot(ps, port);
-	if (ret < 0) {
-		mutex_unlock(&ps->smi_mutex);
-		return;
-	}
-	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
-		stat = &mv88e6xxx_hw_stats[i];
-		if (mv88e6xxx_has_stat(ps, stat)) {
-			data[j] = _mv88e6xxx_get_ethtool_stat(ps, stat, port);
-			j++;
-		}
-	}
-
-	mutex_unlock(&ps->smi_mutex);
-}
-
-static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port)
-{
-	return 32 * sizeof(u16);
-}
-
-static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
-			       struct ethtool_regs *regs, void *_p)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	u16 *p = _p;
-	int i;
-
-	regs->version = 0;
-
-	memset(p, 0xff, 32 * sizeof(u16));
-
-	mutex_lock(&ps->smi_mutex);
-
-	for (i = 0; i < 32; i++) {
-		int ret;
-
-		ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), i);
-		if (ret >= 0)
-			p[i] = ret;
-	}
-
-	mutex_unlock(&ps->smi_mutex);
-}
-
-static int _mv88e6xxx_wait(struct mv88e6xxx_priv_state *ps, int reg, int offset,
-			   u16 mask)
-{
-	unsigned long timeout = jiffies + HZ / 10;
-
-	while (time_before(jiffies, timeout)) {
-		int ret;
-
-		ret = _mv88e6xxx_reg_read(ps, reg, offset);
-		if (ret < 0)
-			return ret;
-		if (!(ret & mask))
-			return 0;
-
-		usleep_range(1000, 2000);
-	}
-	return -ETIMEDOUT;
-}
-
-static int mv88e6xxx_wait(struct mv88e6xxx_priv_state *ps, int reg,
-			  int offset, u16 mask)
-{
-	int ret;
-
-	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_wait(ps, reg, offset, mask);
-	mutex_unlock(&ps->smi_mutex);
-
-	return ret;
-}
-
-static int _mv88e6xxx_phy_wait(struct mv88e6xxx_priv_state *ps)
-{
-	return _mv88e6xxx_wait(ps, REG_GLOBAL2, GLOBAL2_SMI_OP,
-			       GLOBAL2_SMI_OP_BUSY);
-}
-
-static int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-
-	return mv88e6xxx_wait(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP,
-			      GLOBAL2_EEPROM_OP_LOAD);
-}
-
-static int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-
-	return mv88e6xxx_wait(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP,
-			      GLOBAL2_EEPROM_OP_BUSY);
-}
-
-static int mv88e6xxx_read_eeprom_word(struct dsa_switch *ds, int addr)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
-
-	mutex_lock(&ps->eeprom_mutex);
-
-	ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP,
-				  GLOBAL2_EEPROM_OP_READ |
-				  (addr & GLOBAL2_EEPROM_OP_ADDR_MASK));
-	if (ret < 0)
-		goto error;
-
-	ret = mv88e6xxx_eeprom_busy_wait(ds);
-	if (ret < 0)
-		goto error;
-
-	ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_EEPROM_DATA);
-error:
-	mutex_unlock(&ps->eeprom_mutex);
-	return ret;
-}
-
-static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEPROM))
-		return ps->eeprom_len;
-
-	return 0;
-}
-
-static int mv88e6xxx_get_eeprom(struct dsa_switch *ds,
-				struct ethtool_eeprom *eeprom, u8 *data)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int offset;
-	int len;
-	int ret;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEPROM))
-		return -EOPNOTSUPP;
-
-	offset = eeprom->offset;
-	len = eeprom->len;
-	eeprom->len = 0;
-
-	eeprom->magic = 0xc3ec4951;
-
-	ret = mv88e6xxx_eeprom_load_wait(ds);
-	if (ret < 0)
-		return ret;
-
-	if (offset & 1) {
-		int word;
-
-		word = mv88e6xxx_read_eeprom_word(ds, offset >> 1);
-		if (word < 0)
-			return word;
-
-		*data++ = (word >> 8) & 0xff;
-
-		offset++;
-		len--;
-		eeprom->len++;
-	}
-
-	while (len >= 2) {
-		int word;
-
-		word = mv88e6xxx_read_eeprom_word(ds, offset >> 1);
-		if (word < 0)
-			return word;
-
-		*data++ = word & 0xff;
-		*data++ = (word >> 8) & 0xff;
-
-		offset += 2;
-		len -= 2;
-		eeprom->len += 2;
-	}
-
-	if (len) {
-		int word;
-
-		word = mv88e6xxx_read_eeprom_word(ds, offset >> 1);
-		if (word < 0)
-			return word;
-
-		*data++ = word & 0xff;
-
-		offset++;
-		len--;
-		eeprom->len++;
-	}
-
-	return 0;
-}
-
-static int mv88e6xxx_eeprom_is_readonly(struct dsa_switch *ds)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
-
-	ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP);
-	if (ret < 0)
-		return ret;
-
-	if (!(ret & GLOBAL2_EEPROM_OP_WRITE_EN))
-		return -EROFS;
-
-	return 0;
-}
-
-static int mv88e6xxx_write_eeprom_word(struct dsa_switch *ds, int addr,
-				       u16 data)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
-
-	mutex_lock(&ps->eeprom_mutex);
-
-	ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
-	if (ret < 0)
-		goto error;
-
-	ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP,
-				  GLOBAL2_EEPROM_OP_WRITE |
-				  (addr & GLOBAL2_EEPROM_OP_ADDR_MASK));
-	if (ret < 0)
-		goto error;
-
-	ret = mv88e6xxx_eeprom_busy_wait(ds);
-error:
-	mutex_unlock(&ps->eeprom_mutex);
-	return ret;
-}
-
-static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
-				struct ethtool_eeprom *eeprom, u8 *data)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int offset;
-	int ret;
-	int len;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEPROM))
-		return -EOPNOTSUPP;
-
-	if (eeprom->magic != 0xc3ec4951)
-		return -EINVAL;
-
-	ret = mv88e6xxx_eeprom_is_readonly(ds);
-	if (ret)
-		return ret;
-
-	offset = eeprom->offset;
-	len = eeprom->len;
-	eeprom->len = 0;
-
-	ret = mv88e6xxx_eeprom_load_wait(ds);
-	if (ret < 0)
-		return ret;
-
-	if (offset & 1) {
-		int word;
-
-		word = mv88e6xxx_read_eeprom_word(ds, offset >> 1);
-		if (word < 0)
-			return word;
-
-		word = (*data++ << 8) | (word & 0xff);
-
-		ret = mv88e6xxx_write_eeprom_word(ds, offset >> 1, word);
-		if (ret < 0)
-			return ret;
-
-		offset++;
-		len--;
-		eeprom->len++;
-	}
-
-	while (len >= 2) {
-		int word;
-
-		word = *data++;
-		word |= *data++ << 8;
-
-		ret = mv88e6xxx_write_eeprom_word(ds, offset >> 1, word);
-		if (ret < 0)
-			return ret;
-
-		offset += 2;
-		len -= 2;
-		eeprom->len += 2;
-	}
-
-	if (len) {
-		int word;
-
-		word = mv88e6xxx_read_eeprom_word(ds, offset >> 1);
-		if (word < 0)
-			return word;
-
-		word = (word & 0xff00) | *data++;
-
-		ret = mv88e6xxx_write_eeprom_word(ds, offset >> 1, word);
-		if (ret < 0)
-			return ret;
-
-		offset++;
-		len--;
-		eeprom->len++;
-	}
-
-	return 0;
-}
-
-static int _mv88e6xxx_atu_wait(struct mv88e6xxx_priv_state *ps)
-{
-	return _mv88e6xxx_wait(ps, REG_GLOBAL, GLOBAL_ATU_OP,
-			       GLOBAL_ATU_OP_BUSY);
-}
-
-static int _mv88e6xxx_phy_read_indirect(struct mv88e6xxx_priv_state *ps,
-					int addr, int regnum)
-{
-	int ret;
-
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SMI_OP,
-				   GLOBAL2_SMI_OP_22_READ | (addr << 5) |
-				   regnum);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_phy_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_SMI_DATA);
-
-	return ret;
-}
-
-static int _mv88e6xxx_phy_write_indirect(struct mv88e6xxx_priv_state *ps,
-					 int addr, int regnum, u16 val)
-{
-	int ret;
-
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SMI_DATA, val);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SMI_OP,
-				   GLOBAL2_SMI_OP_22_WRITE | (addr << 5) |
-				   regnum);
-
-	return _mv88e6xxx_phy_wait(ps);
-}
-
-static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
-			     struct ethtool_eee *e)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int reg;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEE))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-
-	reg = _mv88e6xxx_phy_read_indirect(ps, port, 16);
-	if (reg < 0)
-		goto out;
-
-	e->eee_enabled = !!(reg & 0x0200);
-	e->tx_lpi_enabled = !!(reg & 0x0100);
-
-	reg = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_STATUS);
-	if (reg < 0)
-		goto out;
-
-	e->eee_active = !!(reg & PORT_STATUS_EEE);
-	reg = 0;
-
-out:
-	mutex_unlock(&ps->smi_mutex);
-	return reg;
-}
-
-static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
-			     struct phy_device *phydev, struct ethtool_eee *e)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int reg;
-	int ret;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEE))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-
-	ret = _mv88e6xxx_phy_read_indirect(ps, port, 16);
-	if (ret < 0)
-		goto out;
-
-	reg = ret & ~0x0300;
-	if (e->eee_enabled)
-		reg |= 0x0200;
-	if (e->tx_lpi_enabled)
-		reg |= 0x0100;
-
-	ret = _mv88e6xxx_phy_write_indirect(ps, port, 16, reg);
-out:
-	mutex_unlock(&ps->smi_mutex);
-
-	return ret;
-}
-
-static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_priv_state *ps, u16 fid, u16 cmd)
-{
-	int ret;
-
-	if (mv88e6xxx_has_fid_reg(ps)) {
-		ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_FID, fid);
-		if (ret < 0)
-			return ret;
-	} else if (mv88e6xxx_num_databases(ps) == 256) {
-		/* ATU DBNum[7:4] are located in ATU Control 15:12 */
-		ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_ATU_CONTROL);
-		if (ret < 0)
-			return ret;
-
-		ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_CONTROL,
-					   (ret & 0xfff) |
-					   ((fid << 8) & 0xf000));
-		if (ret < 0)
-			return ret;
-
-		/* ATU DBNum[3:0] are located in ATU Operation 3:0 */
-		cmd |= fid & 0xf;
-	}
-
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_OP, cmd);
-	if (ret < 0)
-		return ret;
-
-	return _mv88e6xxx_atu_wait(ps);
-}
-
-static int _mv88e6xxx_atu_data_write(struct mv88e6xxx_priv_state *ps,
-				     struct mv88e6xxx_atu_entry *entry)
-{
-	u16 data = entry->state & GLOBAL_ATU_DATA_STATE_MASK;
-
-	if (entry->state != GLOBAL_ATU_DATA_STATE_UNUSED) {
-		unsigned int mask, shift;
-
-		if (entry->trunk) {
-			data |= GLOBAL_ATU_DATA_TRUNK;
-			mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK;
-			shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT;
-		} else {
-			mask = GLOBAL_ATU_DATA_PORT_VECTOR_MASK;
-			shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT;
-		}
-
-		data |= (entry->portv_trunkid << shift) & mask;
-	}
-
-	return _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_DATA, data);
-}
-
-static int _mv88e6xxx_atu_flush_move(struct mv88e6xxx_priv_state *ps,
-				     struct mv88e6xxx_atu_entry *entry,
-				     bool static_too)
-{
-	int op;
-	int err;
-
-	err = _mv88e6xxx_atu_wait(ps);
-	if (err)
-		return err;
-
-	err = _mv88e6xxx_atu_data_write(ps, entry);
-	if (err)
-		return err;
-
-	if (entry->fid) {
-		op = static_too ? GLOBAL_ATU_OP_FLUSH_MOVE_ALL_DB :
-			GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC_DB;
-	} else {
-		op = static_too ? GLOBAL_ATU_OP_FLUSH_MOVE_ALL :
-			GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC;
-	}
-
-	return _mv88e6xxx_atu_cmd(ps, entry->fid, op);
-}
-
-static int _mv88e6xxx_atu_flush(struct mv88e6xxx_priv_state *ps,
-				u16 fid, bool static_too)
-{
-	struct mv88e6xxx_atu_entry entry = {
-		.fid = fid,
-		.state = 0, /* EntryState bits must be 0 */
-	};
-
-	return _mv88e6xxx_atu_flush_move(ps, &entry, static_too);
-}
-
-static int _mv88e6xxx_atu_move(struct mv88e6xxx_priv_state *ps, u16 fid,
-			       int from_port, int to_port, bool static_too)
-{
-	struct mv88e6xxx_atu_entry entry = {
-		.trunk = false,
-		.fid = fid,
-	};
-
-	/* EntryState bits must be 0xF */
-	entry.state = GLOBAL_ATU_DATA_STATE_MASK;
-
-	/* ToPort and FromPort are respectively in PortVec bits 7:4 and 3:0 */
-	entry.portv_trunkid = (to_port & 0x0f) << 4;
-	entry.portv_trunkid |= from_port & 0x0f;
-
-	return _mv88e6xxx_atu_flush_move(ps, &entry, static_too);
-}
-
-static int _mv88e6xxx_atu_remove(struct mv88e6xxx_priv_state *ps, u16 fid,
-				 int port, bool static_too)
-{
-	/* Destination port 0xF means remove the entries */
-	return _mv88e6xxx_atu_move(ps, fid, port, 0x0f, static_too);
-}
-
-static const char * const mv88e6xxx_port_state_names[] = {
-	[PORT_CONTROL_STATE_DISABLED] = "Disabled",
-	[PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening",
-	[PORT_CONTROL_STATE_LEARNING] = "Learning",
-	[PORT_CONTROL_STATE_FORWARDING] = "Forwarding",
-};
-
-static int _mv88e6xxx_port_state(struct mv88e6xxx_priv_state *ps, int port,
-				 u8 state)
-{
-	struct dsa_switch *ds = ps->ds;
-	int reg, ret = 0;
-	u8 oldstate;
-
-	reg = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_CONTROL);
-	if (reg < 0)
-		return reg;
-
-	oldstate = reg & PORT_CONTROL_STATE_MASK;
-
-	if (oldstate != state) {
-		/* Flush forwarding database if we're moving a port
-		 * from Learning or Forwarding state to Disabled or
-		 * Blocking or Listening state.
-		 */
-		if ((oldstate == PORT_CONTROL_STATE_LEARNING ||
-		     oldstate == PORT_CONTROL_STATE_FORWARDING)
-		    && (state == PORT_CONTROL_STATE_DISABLED ||
-			state == PORT_CONTROL_STATE_BLOCKING)) {
-			ret = _mv88e6xxx_atu_remove(ps, 0, port, false);
-			if (ret)
-				return ret;
-		}
-
-		reg = (reg & ~PORT_CONTROL_STATE_MASK) | state;
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL,
-					   reg);
-		if (ret)
-			return ret;
-
-		netdev_dbg(ds->ports[port], "PortState %s (was %s)\n",
-			   mv88e6xxx_port_state_names[state],
-			   mv88e6xxx_port_state_names[oldstate]);
-	}
-
-	return ret;
-}
-
-static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_priv_state *ps,
-					  int port)
-{
-	struct net_device *bridge = ps->ports[port].bridge_dev;
-	const u16 mask = (1 << ps->info->num_ports) - 1;
-	struct dsa_switch *ds = ps->ds;
-	u16 output_ports = 0;
-	int reg;
-	int i;
-
-	/* allow CPU port or DSA link(s) to send frames to every port */
-	if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
-		output_ports = mask;
-	} else {
-		for (i = 0; i < ps->info->num_ports; ++i) {
-			/* allow sending frames to every group member */
-			if (bridge && ps->ports[i].bridge_dev == bridge)
-				output_ports |= BIT(i);
-
-			/* allow sending frames to CPU port and DSA link(s) */
-			if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
-				output_ports |= BIT(i);
-		}
-	}
-
-	/* prevent frames from going back out of the port they came in on */
-	output_ports &= ~BIT(port);
-
-	reg = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_BASE_VLAN);
-	if (reg < 0)
-		return reg;
-
-	reg &= ~mask;
-	reg |= output_ports & mask;
-
-	return _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_BASE_VLAN, reg);
-}
-
-static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
-					 u8 state)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int stp_state;
-	int err;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_PORTSTATE))
-		return;
-
-	switch (state) {
-	case BR_STATE_DISABLED:
-		stp_state = PORT_CONTROL_STATE_DISABLED;
-		break;
-	case BR_STATE_BLOCKING:
-	case BR_STATE_LISTENING:
-		stp_state = PORT_CONTROL_STATE_BLOCKING;
-		break;
-	case BR_STATE_LEARNING:
-		stp_state = PORT_CONTROL_STATE_LEARNING;
-		break;
-	case BR_STATE_FORWARDING:
-	default:
-		stp_state = PORT_CONTROL_STATE_FORWARDING;
-		break;
-	}
-
-	mutex_lock(&ps->smi_mutex);
-	err = _mv88e6xxx_port_state(ps, port, stp_state);
-	mutex_unlock(&ps->smi_mutex);
-
-	if (err)
-		netdev_err(ds->ports[port], "failed to update state to %s\n",
-			   mv88e6xxx_port_state_names[stp_state]);
-}
-
-static int _mv88e6xxx_port_pvid(struct mv88e6xxx_priv_state *ps, int port,
-				u16 *new, u16 *old)
-{
-	struct dsa_switch *ds = ps->ds;
-	u16 pvid;
-	int ret;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_DEFAULT_VLAN);
-	if (ret < 0)
-		return ret;
-
-	pvid = ret & PORT_DEFAULT_VLAN_MASK;
-
-	if (new) {
-		ret &= ~PORT_DEFAULT_VLAN_MASK;
-		ret |= *new & PORT_DEFAULT_VLAN_MASK;
-
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_DEFAULT_VLAN, ret);
-		if (ret < 0)
-			return ret;
-
-		netdev_dbg(ds->ports[port], "DefaultVID %d (was %d)\n", *new,
-			   pvid);
-	}
-
-	if (old)
-		*old = pvid;
-
-	return 0;
-}
-
-static int _mv88e6xxx_port_pvid_get(struct mv88e6xxx_priv_state *ps,
-				    int port, u16 *pvid)
-{
-	return _mv88e6xxx_port_pvid(ps, port, NULL, pvid);
-}
-
-static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_priv_state *ps,
-				    int port, u16 pvid)
-{
-	return _mv88e6xxx_port_pvid(ps, port, &pvid, NULL);
-}
-
-static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_priv_state *ps)
-{
-	return _mv88e6xxx_wait(ps, REG_GLOBAL, GLOBAL_VTU_OP,
-			       GLOBAL_VTU_OP_BUSY);
-}
-
-static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_priv_state *ps, u16 op)
-{
-	int ret;
-
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_OP, op);
-	if (ret < 0)
-		return ret;
-
-	return _mv88e6xxx_vtu_wait(ps);
-}
-
-static int _mv88e6xxx_vtu_stu_flush(struct mv88e6xxx_priv_state *ps)
-{
-	int ret;
-
-	ret = _mv88e6xxx_vtu_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	return _mv88e6xxx_vtu_cmd(ps, GLOBAL_VTU_OP_FLUSH_ALL);
-}
-
-static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_priv_state *ps,
-					struct mv88e6xxx_vtu_stu_entry *entry,
-					unsigned int nibble_offset)
-{
-	u16 regs[3];
-	int i;
-	int ret;
-
-	for (i = 0; i < 3; ++i) {
-		ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL,
-					  GLOBAL_VTU_DATA_0_3 + i);
-		if (ret < 0)
-			return ret;
-
-		regs[i] = ret;
-	}
-
-	for (i = 0; i < ps->info->num_ports; ++i) {
-		unsigned int shift = (i % 4) * 4 + nibble_offset;
-		u16 reg = regs[i / 4];
-
-		entry->data[i] = (reg >> shift) & GLOBAL_VTU_STU_DATA_MASK;
-	}
-
-	return 0;
-}
-
-static int mv88e6xxx_vtu_data_read(struct mv88e6xxx_priv_state *ps,
-				   struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	return _mv88e6xxx_vtu_stu_data_read(ps, entry, 0);
-}
-
-static int mv88e6xxx_stu_data_read(struct mv88e6xxx_priv_state *ps,
-				   struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	return _mv88e6xxx_vtu_stu_data_read(ps, entry, 2);
-}
-
-static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_priv_state *ps,
-					 struct mv88e6xxx_vtu_stu_entry *entry,
-					 unsigned int nibble_offset)
-{
-	u16 regs[3] = { 0 };
-	int i;
-	int ret;
-
-	for (i = 0; i < ps->info->num_ports; ++i) {
-		unsigned int shift = (i % 4) * 4 + nibble_offset;
-		u8 data = entry->data[i];
-
-		regs[i / 4] |= (data & GLOBAL_VTU_STU_DATA_MASK) << shift;
-	}
-
-	for (i = 0; i < 3; ++i) {
-		ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL,
-					   GLOBAL_VTU_DATA_0_3 + i, regs[i]);
-		if (ret < 0)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int mv88e6xxx_vtu_data_write(struct mv88e6xxx_priv_state *ps,
-				    struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	return _mv88e6xxx_vtu_stu_data_write(ps, entry, 0);
-}
-
-static int mv88e6xxx_stu_data_write(struct mv88e6xxx_priv_state *ps,
-				    struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	return _mv88e6xxx_vtu_stu_data_write(ps, entry, 2);
-}
-
-static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_priv_state *ps, u16 vid)
-{
-	return _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_VID,
-				    vid & GLOBAL_VTU_VID_MASK);
-}
-
-static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_priv_state *ps,
-				  struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	struct mv88e6xxx_vtu_stu_entry next = { 0 };
-	int ret;
-
-	ret = _mv88e6xxx_vtu_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_vtu_cmd(ps, GLOBAL_VTU_OP_VTU_GET_NEXT);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_VID);
-	if (ret < 0)
-		return ret;
-
-	next.vid = ret & GLOBAL_VTU_VID_MASK;
-	next.valid = !!(ret & GLOBAL_VTU_VID_VALID);
-
-	if (next.valid) {
-		ret = mv88e6xxx_vtu_data_read(ps, &next);
-		if (ret < 0)
-			return ret;
-
-		if (mv88e6xxx_has_fid_reg(ps)) {
-			ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL,
-						  GLOBAL_VTU_FID);
-			if (ret < 0)
-				return ret;
-
-			next.fid = ret & GLOBAL_VTU_FID_MASK;
-		} else if (mv88e6xxx_num_databases(ps) == 256) {
-			/* VTU DBNum[7:4] are located in VTU Operation 11:8, and
-			 * VTU DBNum[3:0] are located in VTU Operation 3:0
-			 */
-			ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL,
-						  GLOBAL_VTU_OP);
-			if (ret < 0)
-				return ret;
-
-			next.fid = (ret & 0xf00) >> 4;
-			next.fid |= ret & 0xf;
-		}
-
-		if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_STU)) {
-			ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL,
-						  GLOBAL_VTU_SID);
-			if (ret < 0)
-				return ret;
-
-			next.sid = ret & GLOBAL_VTU_SID_MASK;
-		}
-	}
-
-	*entry = next;
-	return 0;
-}
-
-static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
-				    struct switchdev_obj_port_vlan *vlan,
-				    int (*cb)(struct switchdev_obj *obj))
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	struct mv88e6xxx_vtu_stu_entry next;
-	u16 pvid;
-	int err;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-
-	err = _mv88e6xxx_port_pvid_get(ps, port, &pvid);
-	if (err)
-		goto unlock;
-
-	err = _mv88e6xxx_vtu_vid_write(ps, GLOBAL_VTU_VID_MASK);
-	if (err)
-		goto unlock;
-
-	do {
-		err = _mv88e6xxx_vtu_getnext(ps, &next);
-		if (err)
-			break;
-
-		if (!next.valid)
-			break;
-
-		if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
-			continue;
-
-		/* reinit and dump this VLAN obj */
-		vlan->vid_begin = vlan->vid_end = next.vid;
-		vlan->flags = 0;
-
-		if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED)
-			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-
-		if (next.vid == pvid)
-			vlan->flags |= BRIDGE_VLAN_INFO_PVID;
-
-		err = cb(&vlan->obj);
-		if (err)
-			break;
-	} while (next.vid < GLOBAL_VTU_VID_MASK);
-
-unlock:
-	mutex_unlock(&ps->smi_mutex);
-
-	return err;
-}
-
-static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_priv_state *ps,
-				    struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE;
-	u16 reg = 0;
-	int ret;
-
-	ret = _mv88e6xxx_vtu_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	if (!entry->valid)
-		goto loadpurge;
-
-	/* Write port member tags */
-	ret = mv88e6xxx_vtu_data_write(ps, entry);
-	if (ret < 0)
-		return ret;
-
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_STU)) {
-		reg = entry->sid & GLOBAL_VTU_SID_MASK;
-		ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_SID, reg);
-		if (ret < 0)
-			return ret;
-	}
-
-	if (mv88e6xxx_has_fid_reg(ps)) {
-		reg = entry->fid & GLOBAL_VTU_FID_MASK;
-		ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_FID, reg);
-		if (ret < 0)
-			return ret;
-	} else if (mv88e6xxx_num_databases(ps) == 256) {
-		/* VTU DBNum[7:4] are located in VTU Operation 11:8, and
-		 * VTU DBNum[3:0] are located in VTU Operation 3:0
-		 */
-		op |= (entry->fid & 0xf0) << 8;
-		op |= entry->fid & 0xf;
-	}
-
-	reg = GLOBAL_VTU_VID_VALID;
-loadpurge:
-	reg |= entry->vid & GLOBAL_VTU_VID_MASK;
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_VID, reg);
-	if (ret < 0)
-		return ret;
-
-	return _mv88e6xxx_vtu_cmd(ps, op);
-}
-
-static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_priv_state *ps, u8 sid,
-				  struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	struct mv88e6xxx_vtu_stu_entry next = { 0 };
-	int ret;
-
-	ret = _mv88e6xxx_vtu_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_SID,
-				   sid & GLOBAL_VTU_SID_MASK);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_vtu_cmd(ps, GLOBAL_VTU_OP_STU_GET_NEXT);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_SID);
-	if (ret < 0)
-		return ret;
-
-	next.sid = ret & GLOBAL_VTU_SID_MASK;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_VTU_VID);
-	if (ret < 0)
-		return ret;
-
-	next.valid = !!(ret & GLOBAL_VTU_VID_VALID);
-
-	if (next.valid) {
-		ret = mv88e6xxx_stu_data_read(ps, &next);
-		if (ret < 0)
-			return ret;
-	}
-
-	*entry = next;
-	return 0;
-}
-
-static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_priv_state *ps,
-				    struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	u16 reg = 0;
-	int ret;
-
-	ret = _mv88e6xxx_vtu_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	if (!entry->valid)
-		goto loadpurge;
-
-	/* Write port states */
-	ret = mv88e6xxx_stu_data_write(ps, entry);
-	if (ret < 0)
-		return ret;
-
-	reg = GLOBAL_VTU_VID_VALID;
-loadpurge:
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_VID, reg);
-	if (ret < 0)
-		return ret;
-
-	reg = entry->sid & GLOBAL_VTU_SID_MASK;
-	ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_VTU_SID, reg);
-	if (ret < 0)
-		return ret;
-
-	return _mv88e6xxx_vtu_cmd(ps, GLOBAL_VTU_OP_STU_LOAD_PURGE);
-}
-
-static int _mv88e6xxx_port_fid(struct mv88e6xxx_priv_state *ps, int port,
-			       u16 *new, u16 *old)
-{
-	struct dsa_switch *ds = ps->ds;
-	u16 upper_mask;
-	u16 fid;
-	int ret;
-
-	if (mv88e6xxx_num_databases(ps) == 4096)
-		upper_mask = 0xff;
-	else if (mv88e6xxx_num_databases(ps) == 256)
-		upper_mask = 0xf;
-	else
-		return -EOPNOTSUPP;
-
-	/* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */
-	ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_BASE_VLAN);
-	if (ret < 0)
-		return ret;
-
-	fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
-
-	if (new) {
-		ret &= ~PORT_BASE_VLAN_FID_3_0_MASK;
-		ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
-
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_BASE_VLAN,
-					   ret);
-		if (ret < 0)
-			return ret;
-	}
-
-	/* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */
-	ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_CONTROL_1);
-	if (ret < 0)
-		return ret;
-
-	fid |= (ret & upper_mask) << 4;
-
-	if (new) {
-		ret &= ~upper_mask;
-		ret |= (*new >> 4) & upper_mask;
-
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_1,
-					   ret);
-		if (ret < 0)
-			return ret;
-
-		netdev_dbg(ds->ports[port], "FID %d (was %d)\n", *new, fid);
-	}
-
-	if (old)
-		*old = fid;
-
-	return 0;
-}
-
-static int _mv88e6xxx_port_fid_get(struct mv88e6xxx_priv_state *ps,
-				   int port, u16 *fid)
-{
-	return _mv88e6xxx_port_fid(ps, port, NULL, fid);
-}
-
-static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_priv_state *ps,
-				   int port, u16 fid)
-{
-	return _mv88e6xxx_port_fid(ps, port, &fid, NULL);
-}
-
-static int _mv88e6xxx_fid_new(struct mv88e6xxx_priv_state *ps, u16 *fid)
-{
-	DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID);
-	struct mv88e6xxx_vtu_stu_entry vlan;
-	int i, err;
-
-	bitmap_zero(fid_bitmap, MV88E6XXX_N_FID);
-
-	/* Set every FID bit used by the (un)bridged ports */
-	for (i = 0; i < ps->info->num_ports; ++i) {
-		err = _mv88e6xxx_port_fid_get(ps, i, fid);
-		if (err)
-			return err;
-
-		set_bit(*fid, fid_bitmap);
-	}
-
-	/* Set every FID bit used by the VLAN entries */
-	err = _mv88e6xxx_vtu_vid_write(ps, GLOBAL_VTU_VID_MASK);
-	if (err)
-		return err;
-
-	do {
-		err = _mv88e6xxx_vtu_getnext(ps, &vlan);
-		if (err)
-			return err;
-
-		if (!vlan.valid)
-			break;
-
-		set_bit(vlan.fid, fid_bitmap);
-	} while (vlan.vid < GLOBAL_VTU_VID_MASK);
-
-	/* The reset value 0x000 is used to indicate that multiple address
-	 * databases are not needed. Return the next positive available.
-	 */
-	*fid = find_next_zero_bit(fid_bitmap, MV88E6XXX_N_FID, 1);
-	if (unlikely(*fid >= mv88e6xxx_num_databases(ps)))
-		return -ENOSPC;
-
-	/* Clear the database */
-	return _mv88e6xxx_atu_flush(ps, *fid, true);
-}
-
-static int _mv88e6xxx_vtu_new(struct mv88e6xxx_priv_state *ps, u16 vid,
-			      struct mv88e6xxx_vtu_stu_entry *entry)
-{
-	struct dsa_switch *ds = ps->ds;
-	struct mv88e6xxx_vtu_stu_entry vlan = {
-		.valid = true,
-		.vid = vid,
-	};
-	int i, err;
-
-	err = _mv88e6xxx_fid_new(ps, &vlan.fid);
-	if (err)
-		return err;
-
-	/* exclude all ports except the CPU and DSA ports */
-	for (i = 0; i < ps->info->num_ports; ++i)
-		vlan.data[i] = dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i)
-			? GLOBAL_VTU_DATA_MEMBER_TAG_UNMODIFIED
-			: GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER;
-
-	if (mv88e6xxx_6097_family(ps) || mv88e6xxx_6165_family(ps) ||
-	    mv88e6xxx_6351_family(ps) || mv88e6xxx_6352_family(ps)) {
-		struct mv88e6xxx_vtu_stu_entry vstp;
-
-		/* Adding a VTU entry requires a valid STU entry. As VSTP is not
-		 * implemented, only one STU entry is needed to cover all VTU
-		 * entries. Thus, validate the SID 0.
-		 */
-		vlan.sid = 0;
-		err = _mv88e6xxx_stu_getnext(ps, GLOBAL_VTU_SID_MASK, &vstp);
-		if (err)
-			return err;
-
-		if (vstp.sid != vlan.sid || !vstp.valid) {
-			memset(&vstp, 0, sizeof(vstp));
-			vstp.valid = true;
-			vstp.sid = vlan.sid;
-
-			err = _mv88e6xxx_stu_loadpurge(ps, &vstp);
-			if (err)
-				return err;
-		}
-	}
-
-	*entry = vlan;
-	return 0;
-}
-
-static int _mv88e6xxx_vtu_get(struct mv88e6xxx_priv_state *ps, u16 vid,
-			      struct mv88e6xxx_vtu_stu_entry *entry, bool creat)
-{
-	int err;
-
-	if (!vid)
-		return -EINVAL;
-
-	err = _mv88e6xxx_vtu_vid_write(ps, vid - 1);
-	if (err)
-		return err;
-
-	err = _mv88e6xxx_vtu_getnext(ps, entry);
-	if (err)
-		return err;
-
-	if (entry->vid != vid || !entry->valid) {
-		if (!creat)
-			return -EOPNOTSUPP;
-		/* -ENOENT would've been more appropriate, but switchdev expects
-		 * -EOPNOTSUPP to inform bridge about an eventual software VLAN.
-		 */
-
-		err = _mv88e6xxx_vtu_new(ps, vid, entry);
-	}
-
-	return err;
-}
-
-static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
-					u16 vid_begin, u16 vid_end)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	struct mv88e6xxx_vtu_stu_entry vlan;
-	int i, err;
-
-	if (!vid_begin)
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-
-	err = _mv88e6xxx_vtu_vid_write(ps, vid_begin - 1);
-	if (err)
-		goto unlock;
-
-	do {
-		err = _mv88e6xxx_vtu_getnext(ps, &vlan);
-		if (err)
-			goto unlock;
-
-		if (!vlan.valid)
-			break;
-
-		if (vlan.vid > vid_end)
-			break;
-
-		for (i = 0; i < ps->info->num_ports; ++i) {
-			if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
-				continue;
-
-			if (vlan.data[i] ==
-			    GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
-				continue;
-
-			if (ps->ports[i].bridge_dev ==
-			    ps->ports[port].bridge_dev)
-				break; /* same bridge, check next VLAN */
-
-			netdev_warn(ds->ports[port],
-				    "hardware VLAN %d already used by %s\n",
-				    vlan.vid,
-				    netdev_name(ps->ports[i].bridge_dev));
-			err = -EOPNOTSUPP;
-			goto unlock;
-		}
-	} while (vlan.vid < vid_end);
-
-unlock:
-	mutex_unlock(&ps->smi_mutex);
-
-	return err;
-}
-
-static const char * const mv88e6xxx_port_8021q_mode_names[] = {
-	[PORT_CONTROL_2_8021Q_DISABLED] = "Disabled",
-	[PORT_CONTROL_2_8021Q_FALLBACK] = "Fallback",
-	[PORT_CONTROL_2_8021Q_CHECK] = "Check",
-	[PORT_CONTROL_2_8021Q_SECURE] = "Secure",
-};
-
-static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
-					 bool vlan_filtering)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE :
-		PORT_CONTROL_2_8021Q_DISABLED;
-	int ret;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-
-	ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_CONTROL_2);
-	if (ret < 0)
-		goto unlock;
-
-	old = ret & PORT_CONTROL_2_8021Q_MASK;
-
-	if (new != old) {
-		ret &= ~PORT_CONTROL_2_8021Q_MASK;
-		ret |= new & PORT_CONTROL_2_8021Q_MASK;
-
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_2,
-					   ret);
-		if (ret < 0)
-			goto unlock;
-
-		netdev_dbg(ds->ports[port], "802.1Q Mode %s (was %s)\n",
-			   mv88e6xxx_port_8021q_mode_names[new],
-			   mv88e6xxx_port_8021q_mode_names[old]);
-	}
-
-	ret = 0;
-unlock:
-	mutex_unlock(&ps->smi_mutex);
-
-	return ret;
-}
-
-static int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port,
-				       const struct switchdev_obj_port_vlan *vlan,
-				       struct switchdev_trans *trans)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int err;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU))
-		return -EOPNOTSUPP;
-
-	/* If the requested port doesn't belong to the same bridge as the VLAN
-	 * members, do not support it (yet) and fallback to software VLAN.
-	 */
-	err = mv88e6xxx_port_check_hw_vlan(ds, port, vlan->vid_begin,
-					   vlan->vid_end);
-	if (err)
-		return err;
-
-	/* We don't need any dynamic resource from the kernel (yet),
-	 * so skip the prepare phase.
-	 */
-	return 0;
-}
-
-static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_priv_state *ps, int port,
-				    u16 vid, bool untagged)
-{
-	struct mv88e6xxx_vtu_stu_entry vlan;
-	int err;
-
-	err = _mv88e6xxx_vtu_get(ps, vid, &vlan, true);
-	if (err)
-		return err;
-
-	vlan.data[port] = untagged ?
-		GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED :
-		GLOBAL_VTU_DATA_MEMBER_TAG_TAGGED;
-
-	return _mv88e6xxx_vtu_loadpurge(ps, &vlan);
-}
-
-static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
-				    const struct switchdev_obj_port_vlan *vlan,
-				    struct switchdev_trans *trans)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
-	bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
-	u16 vid;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU))
-		return;
-
-	mutex_lock(&ps->smi_mutex);
-
-	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid)
-		if (_mv88e6xxx_port_vlan_add(ps, port, vid, untagged))
-			netdev_err(ds->ports[port], "failed to add VLAN %d%c\n",
-				   vid, untagged ? 'u' : 't');
-
-	if (pvid && _mv88e6xxx_port_pvid_set(ps, port, vlan->vid_end))
-		netdev_err(ds->ports[port], "failed to set PVID %d\n",
-			   vlan->vid_end);
-
-	mutex_unlock(&ps->smi_mutex);
-}
-
-static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_priv_state *ps,
-				    int port, u16 vid)
-{
-	struct dsa_switch *ds = ps->ds;
-	struct mv88e6xxx_vtu_stu_entry vlan;
-	int i, err;
-
-	err = _mv88e6xxx_vtu_get(ps, vid, &vlan, false);
-	if (err)
-		return err;
-
-	/* Tell switchdev if this VLAN is handled in software */
-	if (vlan.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
-		return -EOPNOTSUPP;
-
-	vlan.data[port] = GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER;
-
-	/* keep the VLAN unless all ports are excluded */
-	vlan.valid = false;
-	for (i = 0; i < ps->info->num_ports; ++i) {
-		if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
-			continue;
-
-		if (vlan.data[i] != GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER) {
-			vlan.valid = true;
-			break;
-		}
-	}
-
-	err = _mv88e6xxx_vtu_loadpurge(ps, &vlan);
-	if (err)
-		return err;
-
-	return _mv88e6xxx_atu_remove(ps, vlan.fid, port, false);
-}
-
-static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
-				   const struct switchdev_obj_port_vlan *vlan)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	u16 pvid, vid;
-	int err = 0;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-
-	err = _mv88e6xxx_port_pvid_get(ps, port, &pvid);
-	if (err)
-		goto unlock;
-
-	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
-		err = _mv88e6xxx_port_vlan_del(ps, port, vid);
-		if (err)
-			goto unlock;
-
-		if (vid == pvid) {
-			err = _mv88e6xxx_port_pvid_set(ps, port, 0);
-			if (err)
-				goto unlock;
-		}
-	}
-
-unlock:
-	mutex_unlock(&ps->smi_mutex);
-
-	return err;
-}
-
-static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_priv_state *ps,
-				    const unsigned char *addr)
-{
-	int i, ret;
-
-	for (i = 0; i < 3; i++) {
-		ret = _mv88e6xxx_reg_write(
-			ps, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i,
-			(addr[i * 2] << 8) | addr[i * 2 + 1]);
-		if (ret < 0)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int _mv88e6xxx_atu_mac_read(struct mv88e6xxx_priv_state *ps,
-				   unsigned char *addr)
-{
-	int i, ret;
-
-	for (i = 0; i < 3; i++) {
-		ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL,
-					  GLOBAL_ATU_MAC_01 + i);
-		if (ret < 0)
-			return ret;
-		addr[i * 2] = ret >> 8;
-		addr[i * 2 + 1] = ret & 0xff;
-	}
-
-	return 0;
-}
-
-static int _mv88e6xxx_atu_load(struct mv88e6xxx_priv_state *ps,
-			       struct mv88e6xxx_atu_entry *entry)
-{
-	int ret;
-
-	ret = _mv88e6xxx_atu_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_atu_mac_write(ps, entry->mac);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_atu_data_write(ps, entry);
-	if (ret < 0)
-		return ret;
-
-	return _mv88e6xxx_atu_cmd(ps, entry->fid, GLOBAL_ATU_OP_LOAD_DB);
-}
-
-static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_priv_state *ps, int port,
-				    const unsigned char *addr, u16 vid,
-				    u8 state)
-{
-	struct mv88e6xxx_atu_entry entry = { 0 };
-	struct mv88e6xxx_vtu_stu_entry vlan;
-	int err;
-
-	/* Null VLAN ID corresponds to the port private database */
-	if (vid == 0)
-		err = _mv88e6xxx_port_fid_get(ps, port, &vlan.fid);
-	else
-		err = _mv88e6xxx_vtu_get(ps, vid, &vlan, false);
-	if (err)
-		return err;
-
-	entry.fid = vlan.fid;
-	entry.state = state;
-	ether_addr_copy(entry.mac, addr);
-	if (state != GLOBAL_ATU_DATA_STATE_UNUSED) {
-		entry.trunk = false;
-		entry.portv_trunkid = BIT(port);
-	}
-
-	return _mv88e6xxx_atu_load(ps, &entry);
-}
-
-static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
-				      const struct switchdev_obj_port_fdb *fdb,
-				      struct switchdev_trans *trans)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_ATU))
-		return -EOPNOTSUPP;
-
-	/* We don't need any dynamic resource from the kernel (yet),
-	 * so skip the prepare phase.
-	 */
-	return 0;
-}
-
-static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
-				   const struct switchdev_obj_port_fdb *fdb,
-				   struct switchdev_trans *trans)
-{
-	int state = is_multicast_ether_addr(fdb->addr) ?
-		GLOBAL_ATU_DATA_STATE_MC_STATIC :
-		GLOBAL_ATU_DATA_STATE_UC_STATIC;
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_ATU))
-		return;
-
-	mutex_lock(&ps->smi_mutex);
-	if (_mv88e6xxx_port_fdb_load(ps, port, fdb->addr, fdb->vid, state))
-		netdev_err(ds->ports[port], "failed to load MAC address\n");
-	mutex_unlock(&ps->smi_mutex);
-}
-
-static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
-				  const struct switchdev_obj_port_fdb *fdb)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_ATU))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_port_fdb_load(ps, port, fdb->addr, fdb->vid,
-				       GLOBAL_ATU_DATA_STATE_UNUSED);
-	mutex_unlock(&ps->smi_mutex);
-
-	return ret;
-}
-
-static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_priv_state *ps, u16 fid,
-				  struct mv88e6xxx_atu_entry *entry)
-{
-	struct mv88e6xxx_atu_entry next = { 0 };
-	int ret;
-
-	next.fid = fid;
-
-	ret = _mv88e6xxx_atu_wait(ps);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_atu_cmd(ps, fid, GLOBAL_ATU_OP_GET_NEXT_DB);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_atu_mac_read(ps, next.mac);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_ATU_DATA);
-	if (ret < 0)
-		return ret;
-
-	next.state = ret & GLOBAL_ATU_DATA_STATE_MASK;
-	if (next.state != GLOBAL_ATU_DATA_STATE_UNUSED) {
-		unsigned int mask, shift;
-
-		if (ret & GLOBAL_ATU_DATA_TRUNK) {
-			next.trunk = true;
-			mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK;
-			shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT;
-		} else {
-			next.trunk = false;
-			mask = GLOBAL_ATU_DATA_PORT_VECTOR_MASK;
-			shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT;
-		}
-
-		next.portv_trunkid = (ret & mask) >> shift;
-	}
-
-	*entry = next;
-	return 0;
-}
-
-static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_priv_state *ps,
-					u16 fid, u16 vid, int port,
-					struct switchdev_obj_port_fdb *fdb,
-					int (*cb)(struct switchdev_obj *obj))
-{
-	struct mv88e6xxx_atu_entry addr = {
-		.mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
-	};
-	int err;
-
-	err = _mv88e6xxx_atu_mac_write(ps, addr.mac);
-	if (err)
-		return err;
-
-	do {
-		err = _mv88e6xxx_atu_getnext(ps, fid, &addr);
-		if (err)
-			break;
-
-		if (addr.state == GLOBAL_ATU_DATA_STATE_UNUSED)
-			break;
-
-		if (!addr.trunk && addr.portv_trunkid & BIT(port)) {
-			bool is_static = addr.state ==
-				(is_multicast_ether_addr(addr.mac) ?
-				 GLOBAL_ATU_DATA_STATE_MC_STATIC :
-				 GLOBAL_ATU_DATA_STATE_UC_STATIC);
-
-			fdb->vid = vid;
-			ether_addr_copy(fdb->addr, addr.mac);
-			fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
-
-			err = cb(&fdb->obj);
-			if (err)
-				break;
-		}
-	} while (!is_broadcast_ether_addr(addr.mac));
-
-	return err;
-}
-
-static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
-				   struct switchdev_obj_port_fdb *fdb,
-				   int (*cb)(struct switchdev_obj *obj))
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	struct mv88e6xxx_vtu_stu_entry vlan = {
-		.vid = GLOBAL_VTU_VID_MASK, /* all ones */
-	};
-	u16 fid;
-	int err;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_ATU))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-
-	/* Dump port's default Filtering Information Database (VLAN ID 0) */
-	err = _mv88e6xxx_port_fid_get(ps, port, &fid);
-	if (err)
-		goto unlock;
-
-	err = _mv88e6xxx_port_fdb_dump_one(ps, fid, 0, port, fdb, cb);
-	if (err)
-		goto unlock;
-
-	/* Dump VLANs' Filtering Information Databases */
-	err = _mv88e6xxx_vtu_vid_write(ps, vlan.vid);
-	if (err)
-		goto unlock;
-
-	do {
-		err = _mv88e6xxx_vtu_getnext(ps, &vlan);
-		if (err)
-			break;
-
-		if (!vlan.valid)
-			break;
-
-		err = _mv88e6xxx_port_fdb_dump_one(ps, vlan.fid, vlan.vid, port,
-						   fdb, cb);
-		if (err)
-			break;
-	} while (vlan.vid < GLOBAL_VTU_VID_MASK);
-
-unlock:
-	mutex_unlock(&ps->smi_mutex);
-
-	return err;
-}
-
-static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
-				      struct net_device *bridge)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int i, err = 0;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VLANTABLE))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&ps->smi_mutex);
-
-	/* Assign the bridge and remap each port's VLANTable */
-	ps->ports[port].bridge_dev = bridge;
-
-	for (i = 0; i < ps->info->num_ports; ++i) {
-		if (ps->ports[i].bridge_dev == bridge) {
-			err = _mv88e6xxx_port_based_vlan_map(ps, i);
-			if (err)
-				break;
-		}
-	}
-
-	mutex_unlock(&ps->smi_mutex);
-
-	return err;
-}
-
-static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	struct net_device *bridge = ps->ports[port].bridge_dev;
-	int i;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VLANTABLE))
-		return;
-
-	mutex_lock(&ps->smi_mutex);
-
-	/* Unassign the bridge and remap each port's VLANTable */
-	ps->ports[port].bridge_dev = NULL;
-
-	for (i = 0; i < ps->info->num_ports; ++i)
-		if (i == port || ps->ports[i].bridge_dev == bridge)
-			if (_mv88e6xxx_port_based_vlan_map(ps, i))
-				netdev_warn(ds->ports[i], "failed to remap\n");
-
-	mutex_unlock(&ps->smi_mutex);
-}
-
-static int _mv88e6xxx_phy_page_write(struct mv88e6xxx_priv_state *ps,
-				     int port, int page, int reg, int val)
-{
-	int ret;
-
-	ret = _mv88e6xxx_phy_write_indirect(ps, port, 0x16, page);
-	if (ret < 0)
-		goto restore_page_0;
-
-	ret = _mv88e6xxx_phy_write_indirect(ps, port, reg, val);
-restore_page_0:
-	_mv88e6xxx_phy_write_indirect(ps, port, 0x16, 0x0);
-
-	return ret;
-}
-
-static int _mv88e6xxx_phy_page_read(struct mv88e6xxx_priv_state *ps,
-				    int port, int page, int reg)
-{
-	int ret;
-
-	ret = _mv88e6xxx_phy_write_indirect(ps, port, 0x16, page);
-	if (ret < 0)
-		goto restore_page_0;
-
-	ret = _mv88e6xxx_phy_read_indirect(ps, port, reg);
-restore_page_0:
-	_mv88e6xxx_phy_write_indirect(ps, port, 0x16, 0x0);
-
-	return ret;
-}
-
-static int mv88e6xxx_switch_reset(struct mv88e6xxx_priv_state *ps)
-{
-	bool ppu_active = mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU_ACTIVE);
-	u16 is_reset = (ppu_active ? 0x8800 : 0xc800);
-	struct gpio_desc *gpiod = ps->reset;
-	unsigned long timeout;
-	int ret;
-	int i;
-
-	/* Set all ports to the disabled state. */
-	for (i = 0; i < ps->info->num_ports; i++) {
-		ret = _mv88e6xxx_reg_read(ps, REG_PORT(i), PORT_CONTROL);
-		if (ret < 0)
-			return ret;
-
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(i), PORT_CONTROL,
-					   ret & 0xfffc);
-		if (ret)
-			return ret;
-	}
-
-	/* Wait for transmit queues to drain. */
-	usleep_range(2000, 4000);
-
-	/* If there is a gpio connected to the reset pin, toggle it */
-	if (gpiod) {
-		gpiod_set_value_cansleep(gpiod, 1);
-		usleep_range(10000, 20000);
-		gpiod_set_value_cansleep(gpiod, 0);
-		usleep_range(10000, 20000);
-	}
-
-	/* Reset the switch. Keep the PPU active if requested. The PPU
-	 * needs to be active to support indirect phy register access
-	 * through global registers 0x18 and 0x19.
-	 */
-	if (ppu_active)
-		ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x04, 0xc000);
-	else
-		ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x04, 0xc400);
-	if (ret)
-		return ret;
-
-	/* Wait up to one second for reset to complete. */
-	timeout = jiffies + 1 * HZ;
-	while (time_before(jiffies, timeout)) {
-		ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, 0x00);
-		if (ret < 0)
-			return ret;
-
-		if ((ret & is_reset) == is_reset)
-			break;
-		usleep_range(1000, 2000);
-	}
-	if (time_after(jiffies, timeout))
-		ret = -ETIMEDOUT;
-	else
-		ret = 0;
-
-	return ret;
-}
-
-static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_priv_state *ps)
-{
-	int ret;
-
-	ret = _mv88e6xxx_phy_page_read(ps, REG_FIBER_SERDES, PAGE_FIBER_SERDES,
-				       MII_BMCR);
-	if (ret < 0)
-		return ret;
-
-	if (ret & BMCR_PDOWN) {
-		ret &= ~BMCR_PDOWN;
-		ret = _mv88e6xxx_phy_page_write(ps, REG_FIBER_SERDES,
-						PAGE_FIBER_SERDES, MII_BMCR,
-						ret);
-	}
-
-	return ret;
-}
-
-static int mv88e6xxx_setup_port(struct mv88e6xxx_priv_state *ps, int port)
-{
-	struct dsa_switch *ds = ps->ds;
-	int ret;
-	u16 reg;
-
-	if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-	    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-	    mv88e6xxx_6185_family(ps) || mv88e6xxx_6095_family(ps) ||
-	    mv88e6xxx_6065_family(ps) || mv88e6xxx_6320_family(ps)) {
-		/* MAC Forcing register: don't force link, speed,
-		 * duplex or flow control state to any particular
-		 * values on physical ports, but force the CPU port
-		 * and all DSA ports to their maximum bandwidth and
-		 * full duplex.
-		 */
-		reg = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_PCS_CTRL);
-		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
-			reg &= ~PORT_PCS_CTRL_UNFORCED;
-			reg |= PORT_PCS_CTRL_FORCE_LINK |
-				PORT_PCS_CTRL_LINK_UP |
-				PORT_PCS_CTRL_DUPLEX_FULL |
-				PORT_PCS_CTRL_FORCE_DUPLEX;
-			if (mv88e6xxx_6065_family(ps))
-				reg |= PORT_PCS_CTRL_100;
-			else
-				reg |= PORT_PCS_CTRL_1000;
-		} else {
-			reg |= PORT_PCS_CTRL_UNFORCED;
-		}
-
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_PCS_CTRL, reg);
-		if (ret)
-			return ret;
-	}
-
-	/* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
-	 * disable Header mode, enable IGMP/MLD snooping, disable VLAN
-	 * tunneling, determine priority by looking at 802.1p and IP
-	 * priority fields (IP prio has precedence), and set STP state
-	 * to Forwarding.
-	 *
-	 * If this is the CPU link, use DSA or EDSA tagging depending
-	 * on which tagging mode was configured.
-	 *
-	 * If this is a link to another switch, use DSA tagging mode.
-	 *
-	 * If this is the upstream port for this switch, enable
-	 * forwarding of unknown unicasts and multicasts.
-	 */
-	reg = 0;
-	if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-	    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-	    mv88e6xxx_6095_family(ps) || mv88e6xxx_6065_family(ps) ||
-	    mv88e6xxx_6185_family(ps) || mv88e6xxx_6320_family(ps))
-		reg = PORT_CONTROL_IGMP_MLD_SNOOP |
-		PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP |
-		PORT_CONTROL_STATE_FORWARDING;
-	if (dsa_is_cpu_port(ds, port)) {
-		if (mv88e6xxx_6095_family(ps) || mv88e6xxx_6185_family(ps))
-			reg |= PORT_CONTROL_DSA_TAG;
-		if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-		    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-		    mv88e6xxx_6320_family(ps)) {
-			if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA)
-				reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA;
-			else
-				reg |= PORT_CONTROL_FRAME_MODE_DSA;
-			reg |= PORT_CONTROL_FORWARD_UNKNOWN |
-				PORT_CONTROL_FORWARD_UNKNOWN_MC;
-		}
-
-		if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-		    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-		    mv88e6xxx_6095_family(ps) || mv88e6xxx_6065_family(ps) ||
-		    mv88e6xxx_6185_family(ps) || mv88e6xxx_6320_family(ps)) {
-			if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA)
-				reg |= PORT_CONTROL_EGRESS_ADD_TAG;
-		}
-	}
-	if (dsa_is_dsa_port(ds, port)) {
-		if (mv88e6xxx_6095_family(ps) || mv88e6xxx_6185_family(ps))
-			reg |= PORT_CONTROL_DSA_TAG;
-		if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-		    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-		    mv88e6xxx_6320_family(ps)) {
-			reg |= PORT_CONTROL_FRAME_MODE_DSA;
-		}
-
-		if (port == dsa_upstream_port(ds))
-			reg |= PORT_CONTROL_FORWARD_UNKNOWN |
-				PORT_CONTROL_FORWARD_UNKNOWN_MC;
-	}
-	if (reg) {
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_CONTROL, reg);
-		if (ret)
-			return ret;
-	}
-
-	/* If this port is connected to a SerDes, make sure the SerDes is not
-	 * powered down.
-	 */
-	if (mv88e6xxx_6352_family(ps)) {
-		ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_STATUS);
-		if (ret < 0)
-			return ret;
-		ret &= PORT_STATUS_CMODE_MASK;
-		if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
-		    (ret == PORT_STATUS_CMODE_1000BASE_X) ||
-		    (ret == PORT_STATUS_CMODE_SGMII)) {
-			ret = mv88e6xxx_power_on_serdes(ps);
-			if (ret < 0)
-				return ret;
-		}
-	}
-
-	/* Port Control 2: don't force a good FCS, set the maximum frame size to
-	 * 10240 bytes, disable 802.1q tags checking, don't discard tagged or
-	 * untagged frames on this port, do a destination address lookup on all
-	 * received packets as usual, disable ARP mirroring and don't send a
-	 * copy of all transmitted/received frames on this port to the CPU.
-	 */
-	reg = 0;
-	if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-	    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-	    mv88e6xxx_6095_family(ps) || mv88e6xxx_6320_family(ps) ||
-	    mv88e6xxx_6185_family(ps))
-		reg = PORT_CONTROL_2_MAP_DA;
-
-	if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-	    mv88e6xxx_6165_family(ps) || mv88e6xxx_6320_family(ps))
-		reg |= PORT_CONTROL_2_JUMBO_10240;
-
-	if (mv88e6xxx_6095_family(ps) || mv88e6xxx_6185_family(ps)) {
-		/* Set the upstream port this port should use */
-		reg |= dsa_upstream_port(ds);
-		/* enable forwarding of unknown multicast addresses to
-		 * the upstream port
-		 */
-		if (port == dsa_upstream_port(ds))
-			reg |= PORT_CONTROL_2_FORWARD_UNKNOWN;
-	}
-
-	reg |= PORT_CONTROL_2_8021Q_DISABLED;
-
-	if (reg) {
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_CONTROL_2, reg);
-		if (ret)
-			return ret;
-	}
-
-	/* Port Association Vector: when learning source addresses
-	 * of packets, add the address to the address database using
-	 * a port bitmap that has only the bit for this port set and
-	 * the other bits clear.
-	 */
-	reg = 1 << port;
-	/* Disable learning for CPU port */
-	if (dsa_is_cpu_port(ds, port))
-		reg = 0;
-
-	ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_ASSOC_VECTOR, reg);
-	if (ret)
-		return ret;
-
-	/* Egress rate control 2: disable egress rate control. */
-	ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_RATE_CONTROL_2,
-				   0x0000);
-	if (ret)
-		return ret;
-
-	if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-	    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-	    mv88e6xxx_6320_family(ps)) {
-		/* Do not limit the period of time that this port can
-		 * be paused for by the remote end or the period of
-		 * time that this port can pause the remote end.
-		 */
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_PAUSE_CTRL, 0x0000);
-		if (ret)
-			return ret;
-
-		/* Port ATU control: disable limiting the number of
-		 * address database entries that this port is allowed
-		 * to use.
-		 */
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_ATU_CONTROL, 0x0000);
-		/* Priority Override: disable DA, SA and VTU priority
-		 * override.
-		 */
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_PRI_OVERRIDE, 0x0000);
-		if (ret)
-			return ret;
-
-		/* Port Ethertype: use the Ethertype DSA Ethertype
-		 * value.
-		 */
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_ETH_TYPE, ETH_P_EDSA);
-		if (ret)
-			return ret;
-		/* Tag Remap: use an identity 802.1p prio -> switch
-		 * prio mapping.
-		 */
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_TAG_REGMAP_0123, 0x3210);
-		if (ret)
-			return ret;
-
-		/* Tag Remap 2: use an identity 802.1p prio -> switch
-		 * prio mapping.
-		 */
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_TAG_REGMAP_4567, 0x7654);
-		if (ret)
-			return ret;
-	}
-
-	if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-	    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-	    mv88e6xxx_6185_family(ps) || mv88e6xxx_6095_family(ps) ||
-	    mv88e6xxx_6320_family(ps)) {
-		/* Rate Control: disable ingress rate limiting. */
-		ret = _mv88e6xxx_reg_write(ps, REG_PORT(port),
-					   PORT_RATE_CONTROL, 0x0001);
-		if (ret)
-			return ret;
-	}
-
-	/* Port Control 1: disable trunking, disable sending
-	 * learning messages to this port.
-	 */
-	ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_1, 0x0000);
-	if (ret)
-		return ret;
-
-	/* Port based VLAN map: give each port the same default address
-	 * database, and allow bidirectional communication between the
-	 * CPU and DSA port(s), and the other ports.
-	 */
-	ret = _mv88e6xxx_port_fid_set(ps, port, 0);
-	if (ret)
-		return ret;
-
-	ret = _mv88e6xxx_port_based_vlan_map(ps, port);
-	if (ret)
-		return ret;
-
-	/* Default VLAN ID and priority: don't set a default VLAN
-	 * ID, and set the default packet priority to zero.
-	 */
-	ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_DEFAULT_VLAN,
-				   0x0000);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int mv88e6xxx_setup_global(struct mv88e6xxx_priv_state *ps)
-{
-	struct dsa_switch *ds = ps->ds;
-	u32 upstream_port = dsa_upstream_port(ds);
-	u16 reg;
-	int err;
-	int i;
-
-	/* Enable the PHY Polling Unit if present, don't discard any packets,
-	 * and mask all interrupt sources.
-	 */
-	reg = 0;
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU) ||
-	    mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU_ACTIVE))
-		reg |= GLOBAL_CONTROL_PPU_ENABLE;
-
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, reg);
-	if (err)
-		return err;
-
-	/* Configure the upstream port, and configure it as the port to which
-	 * ingress and egress and ARP monitor frames are to be sent.
-	 */
-	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
-		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
-		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT;
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
-	if (err)
-		return err;
-
-	/* Disable remote management, and set the switch's DSA device number. */
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2,
-				   GLOBAL_CONTROL_2_MULTIPLE_CASCADE |
-				   (ds->index & 0x1f));
-	if (err)
-		return err;
-
-	/* Set the default address aging time to 5 minutes, and
-	 * enable address learn messages to be sent to all message
-	 * ports.
-	 */
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_CONTROL,
-				   0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL);
-	if (err)
-		return err;
-
-	/* Configure the IP ToS mapping registers. */
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
-	if (err)
-		return err;
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
-	if (err)
-		return err;
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
-	if (err)
-		return err;
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
-	if (err)
-		return err;
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
-	if (err)
-		return err;
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
-	if (err)
-		return err;
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
-	if (err)
-		return err;
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
-	if (err)
-		return err;
-
-	/* Configure the IEEE 802.1p priority mapping register. */
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
-	if (err)
-		return err;
-
-	/* Send all frames with destination addresses matching
-	 * 01:80:c2:00:00:0x to the CPU port.
-	 */
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff);
-	if (err)
-		return err;
-
-	/* Ignore removed tag data on doubly tagged packets, disable
-	 * flow control messages, force flow control priority to the
-	 * highest, and send all special multicast frames to the CPU
-	 * port at the highest priority.
-	 */
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT,
-				   0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 |
-				   GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI);
-	if (err)
-		return err;
-
-	/* Program the DSA routing table. */
-	for (i = 0; i < 32; i++) {
-		int nexthop = 0x1f;
-
-		if (ps->ds->cd->rtable &&
-		    i != ps->ds->index && i < ps->ds->dst->pd->nr_chips)
-			nexthop = ps->ds->cd->rtable[i] & 0x1f;
-
-		err = _mv88e6xxx_reg_write(
-			ps, REG_GLOBAL2,
-			GLOBAL2_DEVICE_MAPPING,
-			GLOBAL2_DEVICE_MAPPING_UPDATE |
-			(i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | nexthop);
-		if (err)
-			return err;
-	}
-
-	/* Clear all trunk masks. */
-	for (i = 0; i < 8; i++) {
-		err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_TRUNK_MASK,
-					   0x8000 |
-					   (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) |
-					   ((1 << ps->info->num_ports) - 1));
-		if (err)
-			return err;
-	}
-
-	/* Clear all trunk mappings. */
-	for (i = 0; i < 16; i++) {
-		err = _mv88e6xxx_reg_write(
-			ps, REG_GLOBAL2,
-			GLOBAL2_TRUNK_MAPPING,
-			GLOBAL2_TRUNK_MAPPING_UPDATE |
-			(i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
-		if (err)
-			return err;
-	}
-
-	if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-	    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-	    mv88e6xxx_6320_family(ps)) {
-		/* Send all frames with destination addresses matching
-		 * 01:80:c2:00:00:2x to the CPU port.
-		 */
-		err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2,
-					   GLOBAL2_MGMT_EN_2X, 0xffff);
-		if (err)
-			return err;
-
-		/* Initialise cross-chip port VLAN table to reset
-		 * defaults.
-		 */
-		err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2,
-					   GLOBAL2_PVT_ADDR, 0x9000);
-		if (err)
-			return err;
-
-		/* Clear the priority override table. */
-		for (i = 0; i < 16; i++) {
-			err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2,
-						   GLOBAL2_PRIO_OVERRIDE,
-						   0x8000 | (i << 8));
-			if (err)
-				return err;
-		}
-	}
-
-	if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) ||
-	    mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) ||
-	    mv88e6xxx_6185_family(ps) || mv88e6xxx_6095_family(ps) ||
-	    mv88e6xxx_6320_family(ps)) {
-		/* Disable ingress rate limiting by resetting all
-		 * ingress rate limit registers to their initial
-		 * state.
-		 */
-		for (i = 0; i < ps->info->num_ports; i++) {
-			err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2,
-						   GLOBAL2_INGRESS_OP,
-						   0x9000 | (i << 8));
-			if (err)
-				return err;
-		}
-	}
-
-	/* Clear the statistics counters for all ports */
-	err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_STATS_OP,
-				   GLOBAL_STATS_OP_FLUSH_ALL);
-	if (err)
-		return err;
-
-	/* Wait for the flush to complete. */
-	err = _mv88e6xxx_stats_wait(ps);
-	if (err)
-		return err;
-
-	/* Clear all ATU entries */
-	err = _mv88e6xxx_atu_flush(ps, 0, true);
-	if (err)
-		return err;
-
-	/* Clear all the VTU and STU entries */
-	err = _mv88e6xxx_vtu_stu_flush(ps);
-	if (err < 0)
-		return err;
-
-	return err;
-}
-
-static int mv88e6xxx_setup(struct dsa_switch *ds)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int err;
-	int i;
-
-	ps->ds = ds;
-
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEPROM))
-		mutex_init(&ps->eeprom_mutex);
-
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU))
-		mv88e6xxx_ppu_state_init(ps);
-
-	mutex_lock(&ps->smi_mutex);
-
-	err = mv88e6xxx_switch_reset(ps);
-	if (err)
-		goto unlock;
-
-	err = mv88e6xxx_setup_global(ps);
-	if (err)
-		goto unlock;
-
-	for (i = 0; i < ps->info->num_ports; i++) {
-		err = mv88e6xxx_setup_port(ps, i);
-		if (err)
-			goto unlock;
-	}
-
-unlock:
-	mutex_unlock(&ps->smi_mutex);
-
-	return err;
-}
-
-int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
-
-	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_phy_page_read(ps, port, page, reg);
-	mutex_unlock(&ps->smi_mutex);
-
-	return ret;
-}
-
-int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page,
-			     int reg, int val)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
-
-	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_phy_page_write(ps, port, page, reg, val);
-	mutex_unlock(&ps->smi_mutex);
-
-	return ret;
-}
-
-static int mv88e6xxx_port_to_phy_addr(struct mv88e6xxx_priv_state *ps,
-				      int port)
-{
-	if (port >= 0 && port < ps->info->num_ports)
-		return port;
-	return -EINVAL;
-}
-
-static int mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int addr = mv88e6xxx_port_to_phy_addr(ps, port);
-	int ret;
-
-	if (addr < 0)
-		return 0xffff;
-
-	mutex_lock(&ps->smi_mutex);
-
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU))
-		ret = mv88e6xxx_phy_read_ppu(ps, addr, regnum);
-	else if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_SMI_PHY))
-		ret = _mv88e6xxx_phy_read_indirect(ps, addr, regnum);
-	else
-		ret = _mv88e6xxx_phy_read(ps, addr, regnum);
-
-	mutex_unlock(&ps->smi_mutex);
-	return ret;
-}
-
-static int mv88e6xxx_phy_write(struct dsa_switch *ds, int port, int regnum,
-			       u16 val)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int addr = mv88e6xxx_port_to_phy_addr(ps, port);
-	int ret;
-
-	if (addr < 0)
-		return 0xffff;
-
-	mutex_lock(&ps->smi_mutex);
-
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU))
-		ret = mv88e6xxx_phy_write_ppu(ps, addr, regnum, val);
-	else if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_SMI_PHY))
-		ret = _mv88e6xxx_phy_write_indirect(ps, addr, regnum, val);
-	else
-		ret = _mv88e6xxx_phy_write(ps, addr, regnum, val);
-
-	mutex_unlock(&ps->smi_mutex);
-	return ret;
-}
-
-#ifdef CONFIG_NET_DSA_HWMON
-
-static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
-	int val;
-
-	*temp = 0;
-
-	mutex_lock(&ps->smi_mutex);
-
-	ret = _mv88e6xxx_phy_write(ps, 0x0, 0x16, 0x6);
-	if (ret < 0)
-		goto error;
-
-	/* Enable temperature sensor */
-	ret = _mv88e6xxx_phy_read(ps, 0x0, 0x1a);
-	if (ret < 0)
-		goto error;
-
-	ret = _mv88e6xxx_phy_write(ps, 0x0, 0x1a, ret | (1 << 5));
-	if (ret < 0)
-		goto error;
-
-	/* Wait for temperature to stabilize */
-	usleep_range(10000, 12000);
-
-	val = _mv88e6xxx_phy_read(ps, 0x0, 0x1a);
-	if (val < 0) {
-		ret = val;
-		goto error;
-	}
-
-	/* Disable temperature sensor */
-	ret = _mv88e6xxx_phy_write(ps, 0x0, 0x1a, ret & ~(1 << 5));
-	if (ret < 0)
-		goto error;
-
-	*temp = ((val & 0x1f) - 5) * 5;
-
-error:
-	_mv88e6xxx_phy_write(ps, 0x0, 0x16, 0x0);
-	mutex_unlock(&ps->smi_mutex);
-	return ret;
-}
-
-static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int phy = mv88e6xxx_6320_family(ps) ? 3 : 0;
-	int ret;
-
-	*temp = 0;
-
-	ret = mv88e6xxx_phy_page_read(ds, phy, 6, 27);
-	if (ret < 0)
-		return ret;
-
-	*temp = (ret & 0xff) - 25;
-
-	return 0;
-}
-
-static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_TEMP))
-		return -EOPNOTSUPP;
-
-	if (mv88e6xxx_6320_family(ps) || mv88e6xxx_6352_family(ps))
-		return mv88e63xx_get_temp(ds, temp);
-
-	return mv88e61xx_get_temp(ds, temp);
-}
-
-static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int phy = mv88e6xxx_6320_family(ps) ? 3 : 0;
-	int ret;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_TEMP_LIMIT))
-		return -EOPNOTSUPP;
-
-	*temp = 0;
-
-	ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
-	if (ret < 0)
-		return ret;
-
-	*temp = (((ret >> 8) & 0x1f) * 5) - 25;
-
-	return 0;
-}
-
-static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int phy = mv88e6xxx_6320_family(ps) ? 3 : 0;
-	int ret;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_TEMP_LIMIT))
-		return -EOPNOTSUPP;
-
-	ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
-	if (ret < 0)
-		return ret;
-	temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f);
-	return mv88e6xxx_phy_page_write(ds, phy, 6, 26,
-					(ret & 0xe0ff) | (temp << 8));
-}
-
-static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
-{
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int phy = mv88e6xxx_6320_family(ps) ? 3 : 0;
-	int ret;
-
-	if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_TEMP_LIMIT))
-		return -EOPNOTSUPP;
-
-	*alarm = false;
-
-	ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
-	if (ret < 0)
-		return ret;
-
-	*alarm = !!(ret & 0x40);
-
-	return 0;
-}
-#endif /* CONFIG_NET_DSA_HWMON */
-
-static const struct mv88e6xxx_info mv88e6xxx_table[] = {
-	[MV88E6085] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6085,
-		.family = MV88E6XXX_FAMILY_6097,
-		.name = "Marvell 88E6085",
-		.num_databases = 4096,
-		.num_ports = 10,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
-	},
-
-	[MV88E6095] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6095,
-		.family = MV88E6XXX_FAMILY_6095,
-		.name = "Marvell 88E6095/88E6095F",
-		.num_databases = 256,
-		.num_ports = 11,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6095,
-	},
-
-	[MV88E6123] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6123,
-		.family = MV88E6XXX_FAMILY_6165,
-		.name = "Marvell 88E6123",
-		.num_databases = 4096,
-		.num_ports = 3,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
-	},
-
-	[MV88E6131] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6131,
-		.family = MV88E6XXX_FAMILY_6185,
-		.name = "Marvell 88E6131",
-		.num_databases = 256,
-		.num_ports = 8,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
-	},
-
-	[MV88E6161] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6161,
-		.family = MV88E6XXX_FAMILY_6165,
-		.name = "Marvell 88E6161",
-		.num_databases = 4096,
-		.num_ports = 6,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
-	},
-
-	[MV88E6165] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6165,
-		.family = MV88E6XXX_FAMILY_6165,
-		.name = "Marvell 88E6165",
-		.num_databases = 4096,
-		.num_ports = 6,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
-	},
-
-	[MV88E6171] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6171,
-		.family = MV88E6XXX_FAMILY_6351,
-		.name = "Marvell 88E6171",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
-	},
-
-	[MV88E6172] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6172,
-		.family = MV88E6XXX_FAMILY_6352,
-		.name = "Marvell 88E6172",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
-	},
-
-	[MV88E6175] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6175,
-		.family = MV88E6XXX_FAMILY_6351,
-		.name = "Marvell 88E6175",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
-	},
-
-	[MV88E6176] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6176,
-		.family = MV88E6XXX_FAMILY_6352,
-		.name = "Marvell 88E6176",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
-	},
-
-	[MV88E6185] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6185,
-		.family = MV88E6XXX_FAMILY_6185,
-		.name = "Marvell 88E6185",
-		.num_databases = 256,
-		.num_ports = 10,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
-	},
-
-	[MV88E6240] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6240,
-		.family = MV88E6XXX_FAMILY_6352,
-		.name = "Marvell 88E6240",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
-	},
-
-	[MV88E6320] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6320,
-		.family = MV88E6XXX_FAMILY_6320,
-		.name = "Marvell 88E6320",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
-	},
-
-	[MV88E6321] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6321,
-		.family = MV88E6XXX_FAMILY_6320,
-		.name = "Marvell 88E6321",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
-	},
-
-	[MV88E6350] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6350,
-		.family = MV88E6XXX_FAMILY_6351,
-		.name = "Marvell 88E6350",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
-	},
-
-	[MV88E6351] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6351,
-		.family = MV88E6XXX_FAMILY_6351,
-		.name = "Marvell 88E6351",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
-	},
-
-	[MV88E6352] = {
-		.prod_num = PORT_SWITCH_ID_PROD_NUM_6352,
-		.family = MV88E6XXX_FAMILY_6352,
-		.name = "Marvell 88E6352",
-		.num_databases = 4096,
-		.num_ports = 7,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
-	},
-};
-
-static const struct mv88e6xxx_info *
-mv88e6xxx_lookup_info(unsigned int prod_num, const struct mv88e6xxx_info *table,
-		      unsigned int num)
-{
-	int i;
-
-	for (i = 0; i < num; ++i)
-		if (table[i].prod_num == prod_num)
-			return &table[i];
-
-	return NULL;
-}
-
-static const char *mv88e6xxx_drv_probe(struct device *dsa_dev,
-				       struct device *host_dev, int sw_addr,
-				       void **priv)
-{
-	const struct mv88e6xxx_info *info;
-	struct mv88e6xxx_priv_state *ps;
-	struct mii_bus *bus;
-	const char *name;
-	int id, prod_num, rev;
-
-	bus = dsa_host_dev_to_mii_bus(host_dev);
-	if (!bus)
-		return NULL;
-
-	id = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), PORT_SWITCH_ID);
-	if (id < 0)
-		return NULL;
-
-	prod_num = (id & 0xfff0) >> 4;
-	rev = id & 0x000f;
-
-	info = mv88e6xxx_lookup_info(prod_num, mv88e6xxx_table,
-				     ARRAY_SIZE(mv88e6xxx_table));
-	if (!info)
-		return NULL;
-
-	name = info->name;
-
-	ps = devm_kzalloc(dsa_dev, sizeof(*ps), GFP_KERNEL);
-	if (!ps)
-		return NULL;
-
-	ps->bus = bus;
-	ps->sw_addr = sw_addr;
-	ps->info = info;
-	mutex_init(&ps->smi_mutex);
-
-	*priv = ps;
-
-	dev_info(&ps->bus->dev, "switch 0x%x probed: %s, revision %u\n",
-		 prod_num, name, rev);
-
-	return name;
-}
-
-struct dsa_switch_driver mv88e6xxx_switch_driver = {
-	.tag_protocol		= DSA_TAG_PROTO_EDSA,
-	.probe			= mv88e6xxx_drv_probe,
-	.setup			= mv88e6xxx_setup,
-	.set_addr		= mv88e6xxx_set_addr,
-	.phy_read		= mv88e6xxx_phy_read,
-	.phy_write		= mv88e6xxx_phy_write,
-	.adjust_link		= mv88e6xxx_adjust_link,
-	.get_strings		= mv88e6xxx_get_strings,
-	.get_ethtool_stats	= mv88e6xxx_get_ethtool_stats,
-	.get_sset_count		= mv88e6xxx_get_sset_count,
-	.set_eee		= mv88e6xxx_set_eee,
-	.get_eee		= mv88e6xxx_get_eee,
-#ifdef CONFIG_NET_DSA_HWMON
-	.get_temp		= mv88e6xxx_get_temp,
-	.get_temp_limit		= mv88e6xxx_get_temp_limit,
-	.set_temp_limit		= mv88e6xxx_set_temp_limit,
-	.get_temp_alarm		= mv88e6xxx_get_temp_alarm,
-#endif
-	.get_eeprom_len		= mv88e6xxx_get_eeprom_len,
-	.get_eeprom		= mv88e6xxx_get_eeprom,
-	.set_eeprom		= mv88e6xxx_set_eeprom,
-	.get_regs_len		= mv88e6xxx_get_regs_len,
-	.get_regs		= mv88e6xxx_get_regs,
-	.port_bridge_join	= mv88e6xxx_port_bridge_join,
-	.port_bridge_leave	= mv88e6xxx_port_bridge_leave,
-	.port_stp_state_set	= mv88e6xxx_port_stp_state_set,
-	.port_vlan_filtering	= mv88e6xxx_port_vlan_filtering,
-	.port_vlan_prepare	= mv88e6xxx_port_vlan_prepare,
-	.port_vlan_add		= mv88e6xxx_port_vlan_add,
-	.port_vlan_del		= mv88e6xxx_port_vlan_del,
-	.port_vlan_dump		= mv88e6xxx_port_vlan_dump,
-	.port_fdb_prepare       = mv88e6xxx_port_fdb_prepare,
-	.port_fdb_add           = mv88e6xxx_port_fdb_add,
-	.port_fdb_del           = mv88e6xxx_port_fdb_del,
-	.port_fdb_dump          = mv88e6xxx_port_fdb_dump,
-};
-
-int mv88e6xxx_probe(struct mdio_device *mdiodev)
-{
-	struct device *dev = &mdiodev->dev;
-	struct device_node *np = dev->of_node;
-	struct mv88e6xxx_priv_state *ps;
-	int id, prod_num, rev;
-	struct dsa_switch *ds;
-	u32 eeprom_len;
-	int err;
-
-	ds = devm_kzalloc(dev, sizeof(*ds) + sizeof(*ps), GFP_KERNEL);
-	if (!ds)
-		return -ENOMEM;
-
-	ps = (struct mv88e6xxx_priv_state *)(ds + 1);
-	ds->priv = ps;
-	ds->dev = dev;
-	ps->dev = dev;
-	ps->ds = ds;
-	ps->bus = mdiodev->bus;
-	ps->sw_addr = mdiodev->addr;
-	mutex_init(&ps->smi_mutex);
-
-	get_device(&ps->bus->dev);
-
-	ds->drv = &mv88e6xxx_switch_driver;
-
-	id = mv88e6xxx_reg_read(ps, REG_PORT(0), PORT_SWITCH_ID);
-	if (id < 0)
-		return id;
-
-	prod_num = (id & 0xfff0) >> 4;
-	rev = id & 0x000f;
-
-	ps->info = mv88e6xxx_lookup_info(prod_num, mv88e6xxx_table,
-					 ARRAY_SIZE(mv88e6xxx_table));
-	if (!ps->info)
-		return -ENODEV;
-
-	ps->reset = devm_gpiod_get(&mdiodev->dev, "reset", GPIOD_ASIS);
-	if (IS_ERR(ps->reset)) {
-		err = PTR_ERR(ps->reset);
-		if (err == -ENOENT) {
-			/* Optional, so not an error */
-			ps->reset = NULL;
-		} else {
-			return err;
-		}
-	}
-
-	if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEPROM) &&
-	    !of_property_read_u32(np, "eeprom-length", &eeprom_len))
-		ps->eeprom_len = eeprom_len;
-
-	dev_set_drvdata(dev, ds);
-
-	dev_info(dev, "switch 0x%x probed: %s, revision %u\n",
-		 prod_num, ps->info->name, rev);
-
-	return 0;
-}
-
-static void mv88e6xxx_remove(struct mdio_device *mdiodev)
-{
-	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-
-	put_device(&ps->bus->dev);
-}
-
-static const struct of_device_id mv88e6xxx_of_match[] = {
-	{ .compatible = "marvell,mv88e6085" },
-	{ /* sentinel */ },
-};
-
-MODULE_DEVICE_TABLE(of, mv88e6xxx_of_match);
-
-static struct mdio_driver mv88e6xxx_driver = {
-	.probe	= mv88e6xxx_probe,
-	.remove = mv88e6xxx_remove,
-	.mdiodrv.driver = {
-		.name = "mv88e6085",
-		.of_match_table = mv88e6xxx_of_match,
-	},
-};
-
-static int __init mv88e6xxx_init(void)
-{
-	register_switch_driver(&mv88e6xxx_switch_driver);
-	return mdio_driver_register(&mv88e6xxx_driver);
-}
-module_init(mv88e6xxx_init);
-
-static void __exit mv88e6xxx_cleanup(void)
-{
-	mdio_driver_unregister(&mv88e6xxx_driver);
-	unregister_switch_driver(&mv88e6xxx_switch_driver);
-}
-module_exit(mv88e6xxx_cleanup);
-
-MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>");
-MODULE_DESCRIPTION("Driver for Marvell 88E6XXX ethernet switch chips");
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
new file mode 100644
index 000000000000..490bc06f993e
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -0,0 +1,7 @@
+config NET_DSA_MV88E6XXX
+	tristate "Marvell 88E6xxx Ethernet switch fabric support"
+	depends on NET_DSA
+	select NET_DSA_TAG_EDSA
+	help
+	  This driver adds support for most of the Marvell 88E6xxx models of
+	  Ethernet switch chips, except 88E6060.
diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
new file mode 100644
index 000000000000..6e29a75ee2f7
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NET_DSA_MV88E6XXX) += chip.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
new file mode 100644
index 000000000000..d36aedde8cb9
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -0,0 +1,4087 @@
+/*
+ * Marvell 88e6xxx Ethernet switch single-chip support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2015 CMC Electronics, Inc.
+ *	Added support for VLAN Table Unit operations
+ *
+ * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_bridge.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/mdio.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/netdevice.h>
+#include <linux/gpio/consumer.h>
+#include <linux/phy.h>
+#include <net/dsa.h>
+#include <net/switchdev.h>
+#include "mv88e6xxx.h"
+
+static void assert_reg_lock(struct mv88e6xxx_chip *chip)
+{
+	if (unlikely(!mutex_is_locked(&chip->reg_lock))) {
+		dev_err(chip->dev, "Switch registers lock not held!\n");
+		dump_stack();
+	}
+}
+
+/* The switch ADDR[4:1] configuration pins define the chip SMI device address
+ * (ADDR[0] is always zero, thus only even SMI addresses can be strapped).
+ *
+ * When ADDR is all zero, the chip uses Single-chip Addressing Mode, assuming it
+ * is the only device connected to the SMI master. In this mode it responds to
+ * all 32 possible SMI addresses, and thus maps directly the internal devices.
+ *
+ * When ADDR is non-zero, the chip uses Multi-chip Addressing Mode, allowing
+ * multiple devices to share the SMI interface. In this mode it responds to only
+ * 2 registers, used to indirectly access the internal SMI devices.
+ */
+
+static int mv88e6xxx_smi_read(struct mv88e6xxx_chip *chip,
+			      int addr, int reg, u16 *val)
+{
+	if (!chip->smi_ops)
+		return -EOPNOTSUPP;
+
+	return chip->smi_ops->read(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_smi_write(struct mv88e6xxx_chip *chip,
+			       int addr, int reg, u16 val)
+{
+	if (!chip->smi_ops)
+		return -EOPNOTSUPP;
+
+	return chip->smi_ops->write(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_smi_single_chip_read(struct mv88e6xxx_chip *chip,
+					  int addr, int reg, u16 *val)
+{
+	int ret;
+
+	ret = mdiobus_read_nested(chip->bus, addr, reg);
+	if (ret < 0)
+		return ret;
+
+	*val = ret & 0xffff;
+
+	return 0;
+}
+
+static int mv88e6xxx_smi_single_chip_write(struct mv88e6xxx_chip *chip,
+					   int addr, int reg, u16 val)
+{
+	int ret;
+
+	ret = mdiobus_write_nested(chip->bus, addr, reg, val);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static const struct mv88e6xxx_ops mv88e6xxx_smi_single_chip_ops = {
+	.read = mv88e6xxx_smi_single_chip_read,
+	.write = mv88e6xxx_smi_single_chip_write,
+};
+
+static int mv88e6xxx_smi_multi_chip_wait(struct mv88e6xxx_chip *chip)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		ret = mdiobus_read_nested(chip->bus, chip->sw_addr, SMI_CMD);
+		if (ret < 0)
+			return ret;
+
+		if ((ret & SMI_CMD_BUSY) == 0)
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int mv88e6xxx_smi_multi_chip_read(struct mv88e6xxx_chip *chip,
+					 int addr, int reg, u16 *val)
+{
+	int ret;
+
+	/* Wait for the bus to become free. */
+	ret = mv88e6xxx_smi_multi_chip_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	/* Transmit the read command. */
+	ret = mdiobus_write_nested(chip->bus, chip->sw_addr, SMI_CMD,
+				   SMI_CMD_OP_22_READ | (addr << 5) | reg);
+	if (ret < 0)
+		return ret;
+
+	/* Wait for the read command to complete. */
+	ret = mv88e6xxx_smi_multi_chip_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	/* Read the data. */
+	ret = mdiobus_read_nested(chip->bus, chip->sw_addr, SMI_DATA);
+	if (ret < 0)
+		return ret;
+
+	*val = ret & 0xffff;
+
+	return 0;
+}
+
+static int mv88e6xxx_smi_multi_chip_write(struct mv88e6xxx_chip *chip,
+					  int addr, int reg, u16 val)
+{
+	int ret;
+
+	/* Wait for the bus to become free. */
+	ret = mv88e6xxx_smi_multi_chip_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	/* Transmit the data to write. */
+	ret = mdiobus_write_nested(chip->bus, chip->sw_addr, SMI_DATA, val);
+	if (ret < 0)
+		return ret;
+
+	/* Transmit the write command. */
+	ret = mdiobus_write_nested(chip->bus, chip->sw_addr, SMI_CMD,
+				   SMI_CMD_OP_22_WRITE | (addr << 5) | reg);
+	if (ret < 0)
+		return ret;
+
+	/* Wait for the write command to complete. */
+	ret = mv88e6xxx_smi_multi_chip_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = {
+	.read = mv88e6xxx_smi_multi_chip_read,
+	.write = mv88e6xxx_smi_multi_chip_write,
+};
+
+static int mv88e6xxx_read(struct mv88e6xxx_chip *chip,
+			  int addr, int reg, u16 *val)
+{
+	int err;
+
+	assert_reg_lock(chip);
+
+	err = mv88e6xxx_smi_read(chip, addr, reg, val);
+	if (err)
+		return err;
+
+	dev_dbg(chip->dev, "<- addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n",
+		addr, reg, *val);
+
+	return 0;
+}
+
+static int mv88e6xxx_write(struct mv88e6xxx_chip *chip,
+			   int addr, int reg, u16 val)
+{
+	int err;
+
+	assert_reg_lock(chip);
+
+	err = mv88e6xxx_smi_write(chip, addr, reg, val);
+	if (err)
+		return err;
+
+	dev_dbg(chip->dev, "-> addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n",
+		addr, reg, val);
+
+	return 0;
+}
+
+/* Indirect write to single pointer-data register with an Update bit */
+static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
+			    u16 update)
+{
+	u16 val;
+	int i, err;
+
+	/* Wait until the previous operation is completed */
+	for (i = 0; i < 16; ++i) {
+		err = mv88e6xxx_read(chip, addr, reg, &val);
+		if (err)
+			return err;
+
+		if (!(val & BIT(15)))
+			break;
+	}
+
+	if (i == 16)
+		return -ETIMEDOUT;
+
+	/* Set the Update bit to trigger a write operation */
+	val = BIT(15) | update;
+
+	return mv88e6xxx_write(chip, addr, reg, val);
+}
+
+static int _mv88e6xxx_reg_read(struct mv88e6xxx_chip *chip, int addr, int reg)
+{
+	u16 val;
+	int err;
+
+	err = mv88e6xxx_read(chip, addr, reg, &val);
+	if (err)
+		return err;
+
+	return val;
+}
+
+static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr,
+				int reg, u16 val)
+{
+	return mv88e6xxx_write(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_mdio_read_direct(struct mv88e6xxx_chip *chip,
+				      int addr, int regnum)
+{
+	if (addr >= 0)
+		return _mv88e6xxx_reg_read(chip, addr, regnum);
+	return 0xffff;
+}
+
+static int mv88e6xxx_mdio_write_direct(struct mv88e6xxx_chip *chip,
+				       int addr, int regnum, u16 val)
+{
+	if (addr >= 0)
+		return _mv88e6xxx_reg_write(chip, addr, regnum, val);
+	return 0;
+}
+
+static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip)
+{
+	int ret;
+	unsigned long timeout;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL,
+				   ret & ~GLOBAL_CONTROL_PPU_ENABLE);
+	if (ret)
+		return ret;
+
+	timeout = jiffies + 1 * HZ;
+	while (time_before(jiffies, timeout)) {
+		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS);
+		if (ret < 0)
+			return ret;
+
+		usleep_range(1000, 2000);
+		if ((ret & GLOBAL_STATUS_PPU_MASK) !=
+		    GLOBAL_STATUS_PPU_POLLING)
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip)
+{
+	int ret, err;
+	unsigned long timeout;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL);
+	if (ret < 0)
+		return ret;
+
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL,
+				   ret | GLOBAL_CONTROL_PPU_ENABLE);
+	if (err)
+		return err;
+
+	timeout = jiffies + 1 * HZ;
+	while (time_before(jiffies, timeout)) {
+		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS);
+		if (ret < 0)
+			return ret;
+
+		usleep_range(1000, 2000);
+		if ((ret & GLOBAL_STATUS_PPU_MASK) ==
+		    GLOBAL_STATUS_PPU_POLLING)
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly)
+{
+	struct mv88e6xxx_chip *chip;
+
+	chip = container_of(ugly, struct mv88e6xxx_chip, ppu_work);
+
+	mutex_lock(&chip->reg_lock);
+
+	if (mutex_trylock(&chip->ppu_mutex)) {
+		if (mv88e6xxx_ppu_enable(chip) == 0)
+			chip->ppu_disabled = 0;
+		mutex_unlock(&chip->ppu_mutex);
+	}
+
+	mutex_unlock(&chip->reg_lock);
+}
+
+static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps)
+{
+	struct mv88e6xxx_chip *chip = (void *)_ps;
+
+	schedule_work(&chip->ppu_work);
+}
+
+static int mv88e6xxx_ppu_access_get(struct mv88e6xxx_chip *chip)
+{
+	int ret;
+
+	mutex_lock(&chip->ppu_mutex);
+
+	/* If the PHY polling unit is enabled, disable it so that
+	 * we can access the PHY registers.  If it was already
+	 * disabled, cancel the timer that is going to re-enable
+	 * it.
+	 */
+	if (!chip->ppu_disabled) {
+		ret = mv88e6xxx_ppu_disable(chip);
+		if (ret < 0) {
+			mutex_unlock(&chip->ppu_mutex);
+			return ret;
+		}
+		chip->ppu_disabled = 1;
+	} else {
+		del_timer(&chip->ppu_timer);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static void mv88e6xxx_ppu_access_put(struct mv88e6xxx_chip *chip)
+{
+	/* Schedule a timer to re-enable the PHY polling unit. */
+	mod_timer(&chip->ppu_timer, jiffies + msecs_to_jiffies(10));
+	mutex_unlock(&chip->ppu_mutex);
+}
+
+static void mv88e6xxx_ppu_state_init(struct mv88e6xxx_chip *chip)
+{
+	mutex_init(&chip->ppu_mutex);
+	INIT_WORK(&chip->ppu_work, mv88e6xxx_ppu_reenable_work);
+	init_timer(&chip->ppu_timer);
+	chip->ppu_timer.data = (unsigned long)chip;
+	chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
+}
+
+static int mv88e6xxx_mdio_read_ppu(struct mv88e6xxx_chip *chip, int addr,
+				   int regnum)
+{
+	int ret;
+
+	ret = mv88e6xxx_ppu_access_get(chip);
+	if (ret >= 0) {
+		ret = _mv88e6xxx_reg_read(chip, addr, regnum);
+		mv88e6xxx_ppu_access_put(chip);
+	}
+
+	return ret;
+}
+
+static int mv88e6xxx_mdio_write_ppu(struct mv88e6xxx_chip *chip, int addr,
+				    int regnum, u16 val)
+{
+	int ret;
+
+	ret = mv88e6xxx_ppu_access_get(chip);
+	if (ret >= 0) {
+		ret = _mv88e6xxx_reg_write(chip, addr, regnum, val);
+		mv88e6xxx_ppu_access_put(chip);
+	}
+
+	return ret;
+}
+
+static bool mv88e6xxx_6065_family(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->family == MV88E6XXX_FAMILY_6065;
+}
+
+static bool mv88e6xxx_6095_family(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->family == MV88E6XXX_FAMILY_6095;
+}
+
+static bool mv88e6xxx_6097_family(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->family == MV88E6XXX_FAMILY_6097;
+}
+
+static bool mv88e6xxx_6165_family(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->family == MV88E6XXX_FAMILY_6165;
+}
+
+static bool mv88e6xxx_6185_family(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->family == MV88E6XXX_FAMILY_6185;
+}
+
+static bool mv88e6xxx_6320_family(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->family == MV88E6XXX_FAMILY_6320;
+}
+
+static bool mv88e6xxx_6351_family(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->family == MV88E6XXX_FAMILY_6351;
+}
+
+static bool mv88e6xxx_6352_family(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->family == MV88E6XXX_FAMILY_6352;
+}
+
+static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->num_databases;
+}
+
+static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip)
+{
+	/* Does the device have dedicated FID registers for ATU and VTU ops? */
+	if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) ||
+	    mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip))
+		return true;
+
+	return false;
+}
+
+/* We expect the switch to perform auto negotiation if there is a real
+ * phy. However, in the case of a fixed link phy, we force the port
+ * settings from the fixed link settings.
+ */
+static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
+				  struct phy_device *phydev)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	u32 reg;
+	int ret;
+
+	if (!phy_is_pseudo_fixed_link(phydev))
+		return;
+
+	mutex_lock(&chip->reg_lock);
+
+	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL);
+	if (ret < 0)
+		goto out;
+
+	reg = ret & ~(PORT_PCS_CTRL_LINK_UP |
+		      PORT_PCS_CTRL_FORCE_LINK |
+		      PORT_PCS_CTRL_DUPLEX_FULL |
+		      PORT_PCS_CTRL_FORCE_DUPLEX |
+		      PORT_PCS_CTRL_UNFORCED);
+
+	reg |= PORT_PCS_CTRL_FORCE_LINK;
+	if (phydev->link)
+		reg |= PORT_PCS_CTRL_LINK_UP;
+
+	if (mv88e6xxx_6065_family(chip) && phydev->speed > SPEED_100)
+		goto out;
+
+	switch (phydev->speed) {
+	case SPEED_1000:
+		reg |= PORT_PCS_CTRL_1000;
+		break;
+	case SPEED_100:
+		reg |= PORT_PCS_CTRL_100;
+		break;
+	case SPEED_10:
+		reg |= PORT_PCS_CTRL_10;
+		break;
+	default:
+		pr_info("Unknown speed");
+		goto out;
+	}
+
+	reg |= PORT_PCS_CTRL_FORCE_DUPLEX;
+	if (phydev->duplex == DUPLEX_FULL)
+		reg |= PORT_PCS_CTRL_DUPLEX_FULL;
+
+	if ((mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip)) &&
+	    (port >= chip->info->num_ports - 2)) {
+		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
+			reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK;
+		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+			reg |= PORT_PCS_CTRL_RGMII_DELAY_TXCLK;
+		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+			reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK |
+				PORT_PCS_CTRL_RGMII_DELAY_TXCLK);
+	}
+	_mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg);
+
+out:
+	mutex_unlock(&chip->reg_lock);
+}
+
+static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_OP);
+		if ((ret & GLOBAL_STATS_OP_BUSY) == 0)
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int _mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port)
+{
+	int ret;
+
+	if (mv88e6xxx_6320_family(chip) || mv88e6xxx_6352_family(chip))
+		port = (port + 1) << 5;
+
+	/* Snapshot the hardware statistics counters for this port. */
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP,
+				   GLOBAL_STATS_OP_CAPTURE_PORT |
+				   GLOBAL_STATS_OP_HIST_RX_TX | port);
+	if (ret < 0)
+		return ret;
+
+	/* Wait for the snapshotting to complete. */
+	ret = _mv88e6xxx_stats_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void _mv88e6xxx_stats_read(struct mv88e6xxx_chip *chip,
+				  int stat, u32 *val)
+{
+	u32 _val;
+	int ret;
+
+	*val = 0;
+
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP,
+				   GLOBAL_STATS_OP_READ_CAPTURED |
+				   GLOBAL_STATS_OP_HIST_RX_TX | stat);
+	if (ret < 0)
+		return;
+
+	ret = _mv88e6xxx_stats_wait(chip);
+	if (ret < 0)
+		return;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_32);
+	if (ret < 0)
+		return;
+
+	_val = ret << 16;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_01);
+	if (ret < 0)
+		return;
+
+	*val = _val | ret;
+}
+
+static struct mv88e6xxx_hw_stat mv88e6xxx_hw_stats[] = {
+	{ "in_good_octets",	8, 0x00, BANK0, },
+	{ "in_bad_octets",	4, 0x02, BANK0, },
+	{ "in_unicast",		4, 0x04, BANK0, },
+	{ "in_broadcasts",	4, 0x06, BANK0, },
+	{ "in_multicasts",	4, 0x07, BANK0, },
+	{ "in_pause",		4, 0x16, BANK0, },
+	{ "in_undersize",	4, 0x18, BANK0, },
+	{ "in_fragments",	4, 0x19, BANK0, },
+	{ "in_oversize",	4, 0x1a, BANK0, },
+	{ "in_jabber",		4, 0x1b, BANK0, },
+	{ "in_rx_error",	4, 0x1c, BANK0, },
+	{ "in_fcs_error",	4, 0x1d, BANK0, },
+	{ "out_octets",		8, 0x0e, BANK0, },
+	{ "out_unicast",	4, 0x10, BANK0, },
+	{ "out_broadcasts",	4, 0x13, BANK0, },
+	{ "out_multicasts",	4, 0x12, BANK0, },
+	{ "out_pause",		4, 0x15, BANK0, },
+	{ "excessive",		4, 0x11, BANK0, },
+	{ "collisions",		4, 0x1e, BANK0, },
+	{ "deferred",		4, 0x05, BANK0, },
+	{ "single",		4, 0x14, BANK0, },
+	{ "multiple",		4, 0x17, BANK0, },
+	{ "out_fcs_error",	4, 0x03, BANK0, },
+	{ "late",		4, 0x1f, BANK0, },
+	{ "hist_64bytes",	4, 0x08, BANK0, },
+	{ "hist_65_127bytes",	4, 0x09, BANK0, },
+	{ "hist_128_255bytes",	4, 0x0a, BANK0, },
+	{ "hist_256_511bytes",	4, 0x0b, BANK0, },
+	{ "hist_512_1023bytes", 4, 0x0c, BANK0, },
+	{ "hist_1024_max_bytes", 4, 0x0d, BANK0, },
+	{ "sw_in_discards",	4, 0x10, PORT, },
+	{ "sw_in_filtered",	2, 0x12, PORT, },
+	{ "sw_out_filtered",	2, 0x13, PORT, },
+	{ "in_discards",	4, 0x00 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_filtered",	4, 0x01 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_accepted",	4, 0x02 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_bad_accepted",	4, 0x03 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_good_avb_class_a", 4, 0x04 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_good_avb_class_b", 4, 0x05 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_bad_avb_class_a", 4, 0x06 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_bad_avb_class_b", 4, 0x07 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "tcam_counter_0",	4, 0x08 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "tcam_counter_1",	4, 0x09 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "tcam_counter_2",	4, 0x0a | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "tcam_counter_3",	4, 0x0b | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_da_unknown",	4, 0x0e | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "in_management",	4, 0x0f | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_queue_0",	4, 0x10 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_queue_1",	4, 0x11 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_queue_2",	4, 0x12 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_queue_3",	4, 0x13 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_queue_4",	4, 0x14 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_queue_5",	4, 0x15 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_queue_6",	4, 0x16 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_queue_7",	4, 0x17 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_cut_through",	4, 0x18 | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_octets_a",	4, 0x1a | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_octets_b",	4, 0x1b | GLOBAL_STATS_OP_BANK_1, BANK1, },
+	{ "out_management",	4, 0x1f | GLOBAL_STATS_OP_BANK_1, BANK1, },
+};
+
+static bool mv88e6xxx_has_stat(struct mv88e6xxx_chip *chip,
+			       struct mv88e6xxx_hw_stat *stat)
+{
+	switch (stat->type) {
+	case BANK0:
+		return true;
+	case BANK1:
+		return mv88e6xxx_6320_family(chip);
+	case PORT:
+		return mv88e6xxx_6095_family(chip) ||
+			mv88e6xxx_6185_family(chip) ||
+			mv88e6xxx_6097_family(chip) ||
+			mv88e6xxx_6165_family(chip) ||
+			mv88e6xxx_6351_family(chip) ||
+			mv88e6xxx_6352_family(chip);
+	}
+	return false;
+}
+
+static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
+					    struct mv88e6xxx_hw_stat *s,
+					    int port)
+{
+	u32 low;
+	u32 high = 0;
+	int ret;
+	u64 value;
+
+	switch (s->type) {
+	case PORT:
+		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg);
+		if (ret < 0)
+			return UINT64_MAX;
+
+		low = ret;
+		if (s->sizeof_stat == 4) {
+			ret = _mv88e6xxx_reg_read(chip, REG_PORT(port),
+						  s->reg + 1);
+			if (ret < 0)
+				return UINT64_MAX;
+			high = ret;
+		}
+		break;
+	case BANK0:
+	case BANK1:
+		_mv88e6xxx_stats_read(chip, s->reg, &low);
+		if (s->sizeof_stat == 8)
+			_mv88e6xxx_stats_read(chip, s->reg + 1, &high);
+	}
+	value = (((u64)high) << 16) | low;
+	return value;
+}
+
+static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
+				  uint8_t *data)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_hw_stat *stat;
+	int i, j;
+
+	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
+		stat = &mv88e6xxx_hw_stats[i];
+		if (mv88e6xxx_has_stat(chip, stat)) {
+			memcpy(data + j * ETH_GSTRING_LEN, stat->string,
+			       ETH_GSTRING_LEN);
+			j++;
+		}
+	}
+}
+
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_hw_stat *stat;
+	int i, j;
+
+	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
+		stat = &mv88e6xxx_hw_stats[i];
+		if (mv88e6xxx_has_stat(chip, stat))
+			j++;
+	}
+	return j;
+}
+
+static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
+					uint64_t *data)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_hw_stat *stat;
+	int ret;
+	int i, j;
+
+	mutex_lock(&chip->reg_lock);
+
+	ret = _mv88e6xxx_stats_snapshot(chip, port);
+	if (ret < 0) {
+		mutex_unlock(&chip->reg_lock);
+		return;
+	}
+	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
+		stat = &mv88e6xxx_hw_stats[i];
+		if (mv88e6xxx_has_stat(chip, stat)) {
+			data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port);
+			j++;
+		}
+	}
+
+	mutex_unlock(&chip->reg_lock);
+}
+
+static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port)
+{
+	return 32 * sizeof(u16);
+}
+
+static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
+			       struct ethtool_regs *regs, void *_p)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	u16 *p = _p;
+	int i;
+
+	regs->version = 0;
+
+	memset(p, 0xff, 32 * sizeof(u16));
+
+	mutex_lock(&chip->reg_lock);
+
+	for (i = 0; i < 32; i++) {
+		int ret;
+
+		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i);
+		if (ret >= 0)
+			p[i] = ret;
+	}
+
+	mutex_unlock(&chip->reg_lock);
+}
+
+static int _mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int reg, int offset,
+			   u16 mask)
+{
+	unsigned long timeout = jiffies + HZ / 10;
+
+	while (time_before(jiffies, timeout)) {
+		int ret;
+
+		ret = _mv88e6xxx_reg_read(chip, reg, offset);
+		if (ret < 0)
+			return ret;
+		if (!(ret & mask))
+			return 0;
+
+		usleep_range(1000, 2000);
+	}
+	return -ETIMEDOUT;
+}
+
+static int mv88e6xxx_mdio_wait(struct mv88e6xxx_chip *chip)
+{
+	return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
+			       GLOBAL2_SMI_OP_BUSY);
+}
+
+static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip)
+{
+	return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP,
+			       GLOBAL_ATU_OP_BUSY);
+}
+
+static int mv88e6xxx_mdio_read_indirect(struct mv88e6xxx_chip *chip,
+					int addr, int regnum)
+{
+	int ret;
+
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
+				   GLOBAL2_SMI_OP_22_READ | (addr << 5) |
+				   regnum);
+	if (ret < 0)
+		return ret;
+
+	ret = mv88e6xxx_mdio_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA);
+
+	return ret;
+}
+
+static int mv88e6xxx_mdio_write_indirect(struct mv88e6xxx_chip *chip,
+					 int addr, int regnum, u16 val)
+{
+	int ret;
+
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA, val);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
+				   GLOBAL2_SMI_OP_22_WRITE | (addr << 5) |
+				   regnum);
+
+	return mv88e6xxx_mdio_wait(chip);
+}
+
+static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
+			     struct ethtool_eee *e)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int reg;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&chip->reg_lock);
+
+	reg = mv88e6xxx_mdio_read_indirect(chip, port, 16);
+	if (reg < 0)
+		goto out;
+
+	e->eee_enabled = !!(reg & 0x0200);
+	e->tx_lpi_enabled = !!(reg & 0x0100);
+
+	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
+	if (reg < 0)
+		goto out;
+
+	e->eee_active = !!(reg & PORT_STATUS_EEE);
+	reg = 0;
+
+out:
+	mutex_unlock(&chip->reg_lock);
+	return reg;
+}
+
+static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
+			     struct phy_device *phydev, struct ethtool_eee *e)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int reg;
+	int ret;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&chip->reg_lock);
+
+	ret = mv88e6xxx_mdio_read_indirect(chip, port, 16);
+	if (ret < 0)
+		goto out;
+
+	reg = ret & ~0x0300;
+	if (e->eee_enabled)
+		reg |= 0x0200;
+	if (e->tx_lpi_enabled)
+		reg |= 0x0100;
+
+	ret = mv88e6xxx_mdio_write_indirect(chip, port, 16, reg);
+out:
+	mutex_unlock(&chip->reg_lock);
+
+	return ret;
+}
+
+static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd)
+{
+	int ret;
+
+	if (mv88e6xxx_has_fid_reg(chip)) {
+		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_FID,
+					   fid);
+		if (ret < 0)
+			return ret;
+	} else if (mv88e6xxx_num_databases(chip) == 256) {
+		/* ATU DBNum[7:4] are located in ATU Control 15:12 */
+		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL);
+		if (ret < 0)
+			return ret;
+
+		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL,
+					   (ret & 0xfff) |
+					   ((fid << 8) & 0xf000));
+		if (ret < 0)
+			return ret;
+
+		/* ATU DBNum[3:0] are located in ATU Operation 3:0 */
+		cmd |= fid & 0xf;
+	}
+
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_OP, cmd);
+	if (ret < 0)
+		return ret;
+
+	return _mv88e6xxx_atu_wait(chip);
+}
+
+static int _mv88e6xxx_atu_data_write(struct mv88e6xxx_chip *chip,
+				     struct mv88e6xxx_atu_entry *entry)
+{
+	u16 data = entry->state & GLOBAL_ATU_DATA_STATE_MASK;
+
+	if (entry->state != GLOBAL_ATU_DATA_STATE_UNUSED) {
+		unsigned int mask, shift;
+
+		if (entry->trunk) {
+			data |= GLOBAL_ATU_DATA_TRUNK;
+			mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK;
+			shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT;
+		} else {
+			mask = GLOBAL_ATU_DATA_PORT_VECTOR_MASK;
+			shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT;
+		}
+
+		data |= (entry->portv_trunkid << shift) & mask;
+	}
+
+	return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_DATA, data);
+}
+
+static int _mv88e6xxx_atu_flush_move(struct mv88e6xxx_chip *chip,
+				     struct mv88e6xxx_atu_entry *entry,
+				     bool static_too)
+{
+	int op;
+	int err;
+
+	err = _mv88e6xxx_atu_wait(chip);
+	if (err)
+		return err;
+
+	err = _mv88e6xxx_atu_data_write(chip, entry);
+	if (err)
+		return err;
+
+	if (entry->fid) {
+		op = static_too ? GLOBAL_ATU_OP_FLUSH_MOVE_ALL_DB :
+			GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC_DB;
+	} else {
+		op = static_too ? GLOBAL_ATU_OP_FLUSH_MOVE_ALL :
+			GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC;
+	}
+
+	return _mv88e6xxx_atu_cmd(chip, entry->fid, op);
+}
+
+static int _mv88e6xxx_atu_flush(struct mv88e6xxx_chip *chip,
+				u16 fid, bool static_too)
+{
+	struct mv88e6xxx_atu_entry entry = {
+		.fid = fid,
+		.state = 0, /* EntryState bits must be 0 */
+	};
+
+	return _mv88e6xxx_atu_flush_move(chip, &entry, static_too);
+}
+
+static int _mv88e6xxx_atu_move(struct mv88e6xxx_chip *chip, u16 fid,
+			       int from_port, int to_port, bool static_too)
+{
+	struct mv88e6xxx_atu_entry entry = {
+		.trunk = false,
+		.fid = fid,
+	};
+
+	/* EntryState bits must be 0xF */
+	entry.state = GLOBAL_ATU_DATA_STATE_MASK;
+
+	/* ToPort and FromPort are respectively in PortVec bits 7:4 and 3:0 */
+	entry.portv_trunkid = (to_port & 0x0f) << 4;
+	entry.portv_trunkid |= from_port & 0x0f;
+
+	return _mv88e6xxx_atu_flush_move(chip, &entry, static_too);
+}
+
+static int _mv88e6xxx_atu_remove(struct mv88e6xxx_chip *chip, u16 fid,
+				 int port, bool static_too)
+{
+	/* Destination port 0xF means remove the entries */
+	return _mv88e6xxx_atu_move(chip, fid, port, 0x0f, static_too);
+}
+
+static const char * const mv88e6xxx_port_state_names[] = {
+	[PORT_CONTROL_STATE_DISABLED] = "Disabled",
+	[PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening",
+	[PORT_CONTROL_STATE_LEARNING] = "Learning",
+	[PORT_CONTROL_STATE_FORWARDING] = "Forwarding",
+};
+
+static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port,
+				 u8 state)
+{
+	struct dsa_switch *ds = chip->ds;
+	int reg, ret = 0;
+	u8 oldstate;
+
+	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL);
+	if (reg < 0)
+		return reg;
+
+	oldstate = reg & PORT_CONTROL_STATE_MASK;
+
+	if (oldstate != state) {
+		/* Flush forwarding database if we're moving a port
+		 * from Learning or Forwarding state to Disabled or
+		 * Blocking or Listening state.
+		 */
+		if ((oldstate == PORT_CONTROL_STATE_LEARNING ||
+		     oldstate == PORT_CONTROL_STATE_FORWARDING) &&
+		    (state == PORT_CONTROL_STATE_DISABLED ||
+		     state == PORT_CONTROL_STATE_BLOCKING)) {
+			ret = _mv88e6xxx_atu_remove(chip, 0, port, false);
+			if (ret)
+				return ret;
+		}
+
+		reg = (reg & ~PORT_CONTROL_STATE_MASK) | state;
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL,
+					   reg);
+		if (ret)
+			return ret;
+
+		netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n",
+			   mv88e6xxx_port_state_names[state],
+			   mv88e6xxx_port_state_names[oldstate]);
+	}
+
+	return ret;
+}
+
+static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
+{
+	struct net_device *bridge = chip->ports[port].bridge_dev;
+	const u16 mask = (1 << chip->info->num_ports) - 1;
+	struct dsa_switch *ds = chip->ds;
+	u16 output_ports = 0;
+	int reg;
+	int i;
+
+	/* allow CPU port or DSA link(s) to send frames to every port */
+	if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
+		output_ports = mask;
+	} else {
+		for (i = 0; i < chip->info->num_ports; ++i) {
+			/* allow sending frames to every group member */
+			if (bridge && chip->ports[i].bridge_dev == bridge)
+				output_ports |= BIT(i);
+
+			/* allow sending frames to CPU port and DSA link(s) */
+			if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
+				output_ports |= BIT(i);
+		}
+	}
+
+	/* prevent frames from going back out of the port they came in on */
+	output_ports &= ~BIT(port);
+
+	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN);
+	if (reg < 0)
+		return reg;
+
+	reg &= ~mask;
+	reg |= output_ports & mask;
+
+	return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg);
+}
+
+static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
+					 u8 state)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int stp_state;
+	int err;
+
+	switch (state) {
+	case BR_STATE_DISABLED:
+		stp_state = PORT_CONTROL_STATE_DISABLED;
+		break;
+	case BR_STATE_BLOCKING:
+	case BR_STATE_LISTENING:
+		stp_state = PORT_CONTROL_STATE_BLOCKING;
+		break;
+	case BR_STATE_LEARNING:
+		stp_state = PORT_CONTROL_STATE_LEARNING;
+		break;
+	case BR_STATE_FORWARDING:
+	default:
+		stp_state = PORT_CONTROL_STATE_FORWARDING;
+		break;
+	}
+
+	mutex_lock(&chip->reg_lock);
+	err = _mv88e6xxx_port_state(chip, port, stp_state);
+	mutex_unlock(&chip->reg_lock);
+
+	if (err)
+		netdev_err(ds->ports[port].netdev,
+			   "failed to update state to %s\n",
+			   mv88e6xxx_port_state_names[stp_state]);
+}
+
+static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port,
+				u16 *new, u16 *old)
+{
+	struct dsa_switch *ds = chip->ds;
+	u16 pvid;
+	int ret;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN);
+	if (ret < 0)
+		return ret;
+
+	pvid = ret & PORT_DEFAULT_VLAN_MASK;
+
+	if (new) {
+		ret &= ~PORT_DEFAULT_VLAN_MASK;
+		ret |= *new & PORT_DEFAULT_VLAN_MASK;
+
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_DEFAULT_VLAN, ret);
+		if (ret < 0)
+			return ret;
+
+		netdev_dbg(ds->ports[port].netdev,
+			   "DefaultVID %d (was %d)\n", *new, pvid);
+	}
+
+	if (old)
+		*old = pvid;
+
+	return 0;
+}
+
+static int _mv88e6xxx_port_pvid_get(struct mv88e6xxx_chip *chip,
+				    int port, u16 *pvid)
+{
+	return _mv88e6xxx_port_pvid(chip, port, NULL, pvid);
+}
+
+static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip,
+				    int port, u16 pvid)
+{
+	return _mv88e6xxx_port_pvid(chip, port, &pvid, NULL);
+}
+
+static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip)
+{
+	return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP,
+			       GLOBAL_VTU_OP_BUSY);
+}
+
+static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_chip *chip, u16 op)
+{
+	int ret;
+
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_OP, op);
+	if (ret < 0)
+		return ret;
+
+	return _mv88e6xxx_vtu_wait(chip);
+}
+
+static int _mv88e6xxx_vtu_stu_flush(struct mv88e6xxx_chip *chip)
+{
+	int ret;
+
+	ret = _mv88e6xxx_vtu_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_FLUSH_ALL);
+}
+
+static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip,
+					struct mv88e6xxx_vtu_stu_entry *entry,
+					unsigned int nibble_offset)
+{
+	u16 regs[3];
+	int i;
+	int ret;
+
+	for (i = 0; i < 3; ++i) {
+		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
+					  GLOBAL_VTU_DATA_0_3 + i);
+		if (ret < 0)
+			return ret;
+
+		regs[i] = ret;
+	}
+
+	for (i = 0; i < chip->info->num_ports; ++i) {
+		unsigned int shift = (i % 4) * 4 + nibble_offset;
+		u16 reg = regs[i / 4];
+
+		entry->data[i] = (reg >> shift) & GLOBAL_VTU_STU_DATA_MASK;
+	}
+
+	return 0;
+}
+
+static int mv88e6xxx_vtu_data_read(struct mv88e6xxx_chip *chip,
+				   struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	return _mv88e6xxx_vtu_stu_data_read(chip, entry, 0);
+}
+
+static int mv88e6xxx_stu_data_read(struct mv88e6xxx_chip *chip,
+				   struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	return _mv88e6xxx_vtu_stu_data_read(chip, entry, 2);
+}
+
+static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip,
+					 struct mv88e6xxx_vtu_stu_entry *entry,
+					 unsigned int nibble_offset)
+{
+	u16 regs[3] = { 0 };
+	int i;
+	int ret;
+
+	for (i = 0; i < chip->info->num_ports; ++i) {
+		unsigned int shift = (i % 4) * 4 + nibble_offset;
+		u8 data = entry->data[i];
+
+		regs[i / 4] |= (data & GLOBAL_VTU_STU_DATA_MASK) << shift;
+	}
+
+	for (i = 0; i < 3; ++i) {
+		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL,
+					   GLOBAL_VTU_DATA_0_3 + i, regs[i]);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int mv88e6xxx_vtu_data_write(struct mv88e6xxx_chip *chip,
+				    struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	return _mv88e6xxx_vtu_stu_data_write(chip, entry, 0);
+}
+
+static int mv88e6xxx_stu_data_write(struct mv88e6xxx_chip *chip,
+				    struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	return _mv88e6xxx_vtu_stu_data_write(chip, entry, 2);
+}
+
+static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_chip *chip, u16 vid)
+{
+	return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID,
+				    vid & GLOBAL_VTU_VID_MASK);
+}
+
+static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip,
+				  struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	struct mv88e6xxx_vtu_stu_entry next = { 0 };
+	int ret;
+
+	ret = _mv88e6xxx_vtu_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID);
+	if (ret < 0)
+		return ret;
+
+	next.vid = ret & GLOBAL_VTU_VID_MASK;
+	next.valid = !!(ret & GLOBAL_VTU_VID_VALID);
+
+	if (next.valid) {
+		ret = mv88e6xxx_vtu_data_read(chip, &next);
+		if (ret < 0)
+			return ret;
+
+		if (mv88e6xxx_has_fid_reg(chip)) {
+			ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
+						  GLOBAL_VTU_FID);
+			if (ret < 0)
+				return ret;
+
+			next.fid = ret & GLOBAL_VTU_FID_MASK;
+		} else if (mv88e6xxx_num_databases(chip) == 256) {
+			/* VTU DBNum[7:4] are located in VTU Operation 11:8, and
+			 * VTU DBNum[3:0] are located in VTU Operation 3:0
+			 */
+			ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
+						  GLOBAL_VTU_OP);
+			if (ret < 0)
+				return ret;
+
+			next.fid = (ret & 0xf00) >> 4;
+			next.fid |= ret & 0xf;
+		}
+
+		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) {
+			ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
+						  GLOBAL_VTU_SID);
+			if (ret < 0)
+				return ret;
+
+			next.sid = ret & GLOBAL_VTU_SID_MASK;
+		}
+	}
+
+	*entry = next;
+	return 0;
+}
+
+static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
+				    struct switchdev_obj_port_vlan *vlan,
+				    int (*cb)(struct switchdev_obj *obj))
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_vtu_stu_entry next;
+	u16 pvid;
+	int err;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&chip->reg_lock);
+
+	err = _mv88e6xxx_port_pvid_get(chip, port, &pvid);
+	if (err)
+		goto unlock;
+
+	err = _mv88e6xxx_vtu_vid_write(chip, GLOBAL_VTU_VID_MASK);
+	if (err)
+		goto unlock;
+
+	do {
+		err = _mv88e6xxx_vtu_getnext(chip, &next);
+		if (err)
+			break;
+
+		if (!next.valid)
+			break;
+
+		if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
+			continue;
+
+		/* reinit and dump this VLAN obj */
+		vlan->vid_begin = next.vid;
+		vlan->vid_end = next.vid;
+		vlan->flags = 0;
+
+		if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED)
+			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+		if (next.vid == pvid)
+			vlan->flags |= BRIDGE_VLAN_INFO_PVID;
+
+		err = cb(&vlan->obj);
+		if (err)
+			break;
+	} while (next.vid < GLOBAL_VTU_VID_MASK);
+
+unlock:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip,
+				    struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE;
+	u16 reg = 0;
+	int ret;
+
+	ret = _mv88e6xxx_vtu_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	if (!entry->valid)
+		goto loadpurge;
+
+	/* Write port member tags */
+	ret = mv88e6xxx_vtu_data_write(chip, entry);
+	if (ret < 0)
+		return ret;
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) {
+		reg = entry->sid & GLOBAL_VTU_SID_MASK;
+		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID,
+					   reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (mv88e6xxx_has_fid_reg(chip)) {
+		reg = entry->fid & GLOBAL_VTU_FID_MASK;
+		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_FID,
+					   reg);
+		if (ret < 0)
+			return ret;
+	} else if (mv88e6xxx_num_databases(chip) == 256) {
+		/* VTU DBNum[7:4] are located in VTU Operation 11:8, and
+		 * VTU DBNum[3:0] are located in VTU Operation 3:0
+		 */
+		op |= (entry->fid & 0xf0) << 8;
+		op |= entry->fid & 0xf;
+	}
+
+	reg = GLOBAL_VTU_VID_VALID;
+loadpurge:
+	reg |= entry->vid & GLOBAL_VTU_VID_MASK;
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg);
+	if (ret < 0)
+		return ret;
+
+	return _mv88e6xxx_vtu_cmd(chip, op);
+}
+
+static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid,
+				  struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	struct mv88e6xxx_vtu_stu_entry next = { 0 };
+	int ret;
+
+	ret = _mv88e6xxx_vtu_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID,
+				   sid & GLOBAL_VTU_SID_MASK);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_SID);
+	if (ret < 0)
+		return ret;
+
+	next.sid = ret & GLOBAL_VTU_SID_MASK;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID);
+	if (ret < 0)
+		return ret;
+
+	next.valid = !!(ret & GLOBAL_VTU_VID_VALID);
+
+	if (next.valid) {
+		ret = mv88e6xxx_stu_data_read(chip, &next);
+		if (ret < 0)
+			return ret;
+	}
+
+	*entry = next;
+	return 0;
+}
+
+static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_chip *chip,
+				    struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	u16 reg = 0;
+	int ret;
+
+	ret = _mv88e6xxx_vtu_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	if (!entry->valid)
+		goto loadpurge;
+
+	/* Write port states */
+	ret = mv88e6xxx_stu_data_write(chip, entry);
+	if (ret < 0)
+		return ret;
+
+	reg = GLOBAL_VTU_VID_VALID;
+loadpurge:
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg);
+	if (ret < 0)
+		return ret;
+
+	reg = entry->sid & GLOBAL_VTU_SID_MASK;
+	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, reg);
+	if (ret < 0)
+		return ret;
+
+	return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE);
+}
+
+static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port,
+			       u16 *new, u16 *old)
+{
+	struct dsa_switch *ds = chip->ds;
+	u16 upper_mask;
+	u16 fid;
+	int ret;
+
+	if (mv88e6xxx_num_databases(chip) == 4096)
+		upper_mask = 0xff;
+	else if (mv88e6xxx_num_databases(chip) == 256)
+		upper_mask = 0xf;
+	else
+		return -EOPNOTSUPP;
+
+	/* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */
+	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN);
+	if (ret < 0)
+		return ret;
+
+	fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
+
+	if (new) {
+		ret &= ~PORT_BASE_VLAN_FID_3_0_MASK;
+		ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
+
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN,
+					   ret);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */
+	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1);
+	if (ret < 0)
+		return ret;
+
+	fid |= (ret & upper_mask) << 4;
+
+	if (new) {
+		ret &= ~upper_mask;
+		ret |= (*new >> 4) & upper_mask;
+
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1,
+					   ret);
+		if (ret < 0)
+			return ret;
+
+		netdev_dbg(ds->ports[port].netdev,
+			   "FID %d (was %d)\n", *new, fid);
+	}
+
+	if (old)
+		*old = fid;
+
+	return 0;
+}
+
+static int _mv88e6xxx_port_fid_get(struct mv88e6xxx_chip *chip,
+				   int port, u16 *fid)
+{
+	return _mv88e6xxx_port_fid(chip, port, NULL, fid);
+}
+
+static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_chip *chip,
+				   int port, u16 fid)
+{
+	return _mv88e6xxx_port_fid(chip, port, &fid, NULL);
+}
+
+static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid)
+{
+	DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID);
+	struct mv88e6xxx_vtu_stu_entry vlan;
+	int i, err;
+
+	bitmap_zero(fid_bitmap, MV88E6XXX_N_FID);
+
+	/* Set every FID bit used by the (un)bridged ports */
+	for (i = 0; i < chip->info->num_ports; ++i) {
+		err = _mv88e6xxx_port_fid_get(chip, i, fid);
+		if (err)
+			return err;
+
+		set_bit(*fid, fid_bitmap);
+	}
+
+	/* Set every FID bit used by the VLAN entries */
+	err = _mv88e6xxx_vtu_vid_write(chip, GLOBAL_VTU_VID_MASK);
+	if (err)
+		return err;
+
+	do {
+		err = _mv88e6xxx_vtu_getnext(chip, &vlan);
+		if (err)
+			return err;
+
+		if (!vlan.valid)
+			break;
+
+		set_bit(vlan.fid, fid_bitmap);
+	} while (vlan.vid < GLOBAL_VTU_VID_MASK);
+
+	/* The reset value 0x000 is used to indicate that multiple address
+	 * databases are not needed. Return the next positive available.
+	 */
+	*fid = find_next_zero_bit(fid_bitmap, MV88E6XXX_N_FID, 1);
+	if (unlikely(*fid >= mv88e6xxx_num_databases(chip)))
+		return -ENOSPC;
+
+	/* Clear the database */
+	return _mv88e6xxx_atu_flush(chip, *fid, true);
+}
+
+static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid,
+			      struct mv88e6xxx_vtu_stu_entry *entry)
+{
+	struct dsa_switch *ds = chip->ds;
+	struct mv88e6xxx_vtu_stu_entry vlan = {
+		.valid = true,
+		.vid = vid,
+	};
+	int i, err;
+
+	err = _mv88e6xxx_fid_new(chip, &vlan.fid);
+	if (err)
+		return err;
+
+	/* exclude all ports except the CPU and DSA ports */
+	for (i = 0; i < chip->info->num_ports; ++i)
+		vlan.data[i] = dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i)
+			? GLOBAL_VTU_DATA_MEMBER_TAG_UNMODIFIED
+			: GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER;
+
+	if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) ||
+	    mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip)) {
+		struct mv88e6xxx_vtu_stu_entry vstp;
+
+		/* Adding a VTU entry requires a valid STU entry. As VSTP is not
+		 * implemented, only one STU entry is needed to cover all VTU
+		 * entries. Thus, validate the SID 0.
+		 */
+		vlan.sid = 0;
+		err = _mv88e6xxx_stu_getnext(chip, GLOBAL_VTU_SID_MASK, &vstp);
+		if (err)
+			return err;
+
+		if (vstp.sid != vlan.sid || !vstp.valid) {
+			memset(&vstp, 0, sizeof(vstp));
+			vstp.valid = true;
+			vstp.sid = vlan.sid;
+
+			err = _mv88e6xxx_stu_loadpurge(chip, &vstp);
+			if (err)
+				return err;
+		}
+	}
+
+	*entry = vlan;
+	return 0;
+}
+
+static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid,
+			      struct mv88e6xxx_vtu_stu_entry *entry, bool creat)
+{
+	int err;
+
+	if (!vid)
+		return -EINVAL;
+
+	err = _mv88e6xxx_vtu_vid_write(chip, vid - 1);
+	if (err)
+		return err;
+
+	err = _mv88e6xxx_vtu_getnext(chip, entry);
+	if (err)
+		return err;
+
+	if (entry->vid != vid || !entry->valid) {
+		if (!creat)
+			return -EOPNOTSUPP;
+		/* -ENOENT would've been more appropriate, but switchdev expects
+		 * -EOPNOTSUPP to inform bridge about an eventual software VLAN.
+		 */
+
+		err = _mv88e6xxx_vtu_new(chip, vid, entry);
+	}
+
+	return err;
+}
+
+static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
+					u16 vid_begin, u16 vid_end)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_vtu_stu_entry vlan;
+	int i, err;
+
+	if (!vid_begin)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&chip->reg_lock);
+
+	err = _mv88e6xxx_vtu_vid_write(chip, vid_begin - 1);
+	if (err)
+		goto unlock;
+
+	do {
+		err = _mv88e6xxx_vtu_getnext(chip, &vlan);
+		if (err)
+			goto unlock;
+
+		if (!vlan.valid)
+			break;
+
+		if (vlan.vid > vid_end)
+			break;
+
+		for (i = 0; i < chip->info->num_ports; ++i) {
+			if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
+				continue;
+
+			if (vlan.data[i] ==
+			    GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
+				continue;
+
+			if (chip->ports[i].bridge_dev ==
+			    chip->ports[port].bridge_dev)
+				break; /* same bridge, check next VLAN */
+
+			netdev_warn(ds->ports[port].netdev,
+				    "hardware VLAN %d already used by %s\n",
+				    vlan.vid,
+				    netdev_name(chip->ports[i].bridge_dev));
+			err = -EOPNOTSUPP;
+			goto unlock;
+		}
+	} while (vlan.vid < vid_end);
+
+unlock:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static const char * const mv88e6xxx_port_8021q_mode_names[] = {
+	[PORT_CONTROL_2_8021Q_DISABLED] = "Disabled",
+	[PORT_CONTROL_2_8021Q_FALLBACK] = "Fallback",
+	[PORT_CONTROL_2_8021Q_CHECK] = "Check",
+	[PORT_CONTROL_2_8021Q_SECURE] = "Secure",
+};
+
+static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
+					 bool vlan_filtering)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE :
+		PORT_CONTROL_2_8021Q_DISABLED;
+	int ret;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&chip->reg_lock);
+
+	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2);
+	if (ret < 0)
+		goto unlock;
+
+	old = ret & PORT_CONTROL_2_8021Q_MASK;
+
+	if (new != old) {
+		ret &= ~PORT_CONTROL_2_8021Q_MASK;
+		ret |= new & PORT_CONTROL_2_8021Q_MASK;
+
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2,
+					   ret);
+		if (ret < 0)
+			goto unlock;
+
+		netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n",
+			   mv88e6xxx_port_8021q_mode_names[new],
+			   mv88e6xxx_port_8021q_mode_names[old]);
+	}
+
+	ret = 0;
+unlock:
+	mutex_unlock(&chip->reg_lock);
+
+	return ret;
+}
+
+static int
+mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port,
+			    const struct switchdev_obj_port_vlan *vlan,
+			    struct switchdev_trans *trans)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int err;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
+		return -EOPNOTSUPP;
+
+	/* If the requested port doesn't belong to the same bridge as the VLAN
+	 * members, do not support it (yet) and fallback to software VLAN.
+	 */
+	err = mv88e6xxx_port_check_hw_vlan(ds, port, vlan->vid_begin,
+					   vlan->vid_end);
+	if (err)
+		return err;
+
+	/* We don't need any dynamic resource from the kernel (yet),
+	 * so skip the prepare phase.
+	 */
+	return 0;
+}
+
+static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port,
+				    u16 vid, bool untagged)
+{
+	struct mv88e6xxx_vtu_stu_entry vlan;
+	int err;
+
+	err = _mv88e6xxx_vtu_get(chip, vid, &vlan, true);
+	if (err)
+		return err;
+
+	vlan.data[port] = untagged ?
+		GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED :
+		GLOBAL_VTU_DATA_MEMBER_TAG_TAGGED;
+
+	return _mv88e6xxx_vtu_loadpurge(chip, &vlan);
+}
+
+static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
+				    const struct switchdev_obj_port_vlan *vlan,
+				    struct switchdev_trans *trans)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+	bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+	u16 vid;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
+		return;
+
+	mutex_lock(&chip->reg_lock);
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid)
+		if (_mv88e6xxx_port_vlan_add(chip, port, vid, untagged))
+			netdev_err(ds->ports[port].netdev,
+				   "failed to add VLAN %d%c\n",
+				   vid, untagged ? 'u' : 't');
+
+	if (pvid && _mv88e6xxx_port_pvid_set(chip, port, vlan->vid_end))
+		netdev_err(ds->ports[port].netdev, "failed to set PVID %d\n",
+			   vlan->vid_end);
+
+	mutex_unlock(&chip->reg_lock);
+}
+
+static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip,
+				    int port, u16 vid)
+{
+	struct dsa_switch *ds = chip->ds;
+	struct mv88e6xxx_vtu_stu_entry vlan;
+	int i, err;
+
+	err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false);
+	if (err)
+		return err;
+
+	/* Tell switchdev if this VLAN is handled in software */
+	if (vlan.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
+		return -EOPNOTSUPP;
+
+	vlan.data[port] = GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER;
+
+	/* keep the VLAN unless all ports are excluded */
+	vlan.valid = false;
+	for (i = 0; i < chip->info->num_ports; ++i) {
+		if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
+			continue;
+
+		if (vlan.data[i] != GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER) {
+			vlan.valid = true;
+			break;
+		}
+	}
+
+	err = _mv88e6xxx_vtu_loadpurge(chip, &vlan);
+	if (err)
+		return err;
+
+	return _mv88e6xxx_atu_remove(chip, vlan.fid, port, false);
+}
+
+static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
+				   const struct switchdev_obj_port_vlan *vlan)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	u16 pvid, vid;
+	int err = 0;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&chip->reg_lock);
+
+	err = _mv88e6xxx_port_pvid_get(chip, port, &pvid);
+	if (err)
+		goto unlock;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+		err = _mv88e6xxx_port_vlan_del(chip, port, vid);
+		if (err)
+			goto unlock;
+
+		if (vid == pvid) {
+			err = _mv88e6xxx_port_pvid_set(chip, port, 0);
+			if (err)
+				goto unlock;
+		}
+	}
+
+unlock:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip,
+				    const unsigned char *addr)
+{
+	int i, ret;
+
+	for (i = 0; i < 3; i++) {
+		ret = _mv88e6xxx_reg_write(
+			chip, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i,
+			(addr[i * 2] << 8) | addr[i * 2 + 1]);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int _mv88e6xxx_atu_mac_read(struct mv88e6xxx_chip *chip,
+				   unsigned char *addr)
+{
+	int i, ret;
+
+	for (i = 0; i < 3; i++) {
+		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
+					  GLOBAL_ATU_MAC_01 + i);
+		if (ret < 0)
+			return ret;
+		addr[i * 2] = ret >> 8;
+		addr[i * 2 + 1] = ret & 0xff;
+	}
+
+	return 0;
+}
+
+static int _mv88e6xxx_atu_load(struct mv88e6xxx_chip *chip,
+			       struct mv88e6xxx_atu_entry *entry)
+{
+	int ret;
+
+	ret = _mv88e6xxx_atu_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_atu_mac_write(chip, entry->mac);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_atu_data_write(chip, entry);
+	if (ret < 0)
+		return ret;
+
+	return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB);
+}
+
+static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_chip *chip, int port,
+				    const unsigned char *addr, u16 vid,
+				    u8 state)
+{
+	struct mv88e6xxx_atu_entry entry = { 0 };
+	struct mv88e6xxx_vtu_stu_entry vlan;
+	int err;
+
+	/* Null VLAN ID corresponds to the port private database */
+	if (vid == 0)
+		err = _mv88e6xxx_port_fid_get(chip, port, &vlan.fid);
+	else
+		err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false);
+	if (err)
+		return err;
+
+	entry.fid = vlan.fid;
+	entry.state = state;
+	ether_addr_copy(entry.mac, addr);
+	if (state != GLOBAL_ATU_DATA_STATE_UNUSED) {
+		entry.trunk = false;
+		entry.portv_trunkid = BIT(port);
+	}
+
+	return _mv88e6xxx_atu_load(chip, &entry);
+}
+
+static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
+				      const struct switchdev_obj_port_fdb *fdb,
+				      struct switchdev_trans *trans)
+{
+	/* We don't need any dynamic resource from the kernel (yet),
+	 * so skip the prepare phase.
+	 */
+	return 0;
+}
+
+static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+				   const struct switchdev_obj_port_fdb *fdb,
+				   struct switchdev_trans *trans)
+{
+	int state = is_multicast_ether_addr(fdb->addr) ?
+		GLOBAL_ATU_DATA_STATE_MC_STATIC :
+		GLOBAL_ATU_DATA_STATE_UC_STATIC;
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+
+	mutex_lock(&chip->reg_lock);
+	if (_mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, state))
+		netdev_err(ds->ports[port].netdev,
+			   "failed to load MAC address\n");
+	mutex_unlock(&chip->reg_lock);
+}
+
+static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
+				  const struct switchdev_obj_port_fdb *fdb)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int ret;
+
+	mutex_lock(&chip->reg_lock);
+	ret = _mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid,
+				       GLOBAL_ATU_DATA_STATE_UNUSED);
+	mutex_unlock(&chip->reg_lock);
+
+	return ret;
+}
+
+static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
+				  struct mv88e6xxx_atu_entry *entry)
+{
+	struct mv88e6xxx_atu_entry next = { 0 };
+	int ret;
+
+	next.fid = fid;
+
+	ret = _mv88e6xxx_atu_wait(chip);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_atu_mac_read(chip, next.mac);
+	if (ret < 0)
+		return ret;
+
+	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_DATA);
+	if (ret < 0)
+		return ret;
+
+	next.state = ret & GLOBAL_ATU_DATA_STATE_MASK;
+	if (next.state != GLOBAL_ATU_DATA_STATE_UNUSED) {
+		unsigned int mask, shift;
+
+		if (ret & GLOBAL_ATU_DATA_TRUNK) {
+			next.trunk = true;
+			mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK;
+			shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT;
+		} else {
+			next.trunk = false;
+			mask = GLOBAL_ATU_DATA_PORT_VECTOR_MASK;
+			shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT;
+		}
+
+		next.portv_trunkid = (ret & mask) >> shift;
+	}
+
+	*entry = next;
+	return 0;
+}
+
+static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_chip *chip,
+					u16 fid, u16 vid, int port,
+					struct switchdev_obj_port_fdb *fdb,
+					int (*cb)(struct switchdev_obj *obj))
+{
+	struct mv88e6xxx_atu_entry addr = {
+		.mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	};
+	int err;
+
+	err = _mv88e6xxx_atu_mac_write(chip, addr.mac);
+	if (err)
+		return err;
+
+	do {
+		err = _mv88e6xxx_atu_getnext(chip, fid, &addr);
+		if (err)
+			break;
+
+		if (addr.state == GLOBAL_ATU_DATA_STATE_UNUSED)
+			break;
+
+		if (!addr.trunk && addr.portv_trunkid & BIT(port)) {
+			bool is_static = addr.state ==
+				(is_multicast_ether_addr(addr.mac) ?
+				 GLOBAL_ATU_DATA_STATE_MC_STATIC :
+				 GLOBAL_ATU_DATA_STATE_UC_STATIC);
+
+			fdb->vid = vid;
+			ether_addr_copy(fdb->addr, addr.mac);
+			fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
+
+			err = cb(&fdb->obj);
+			if (err)
+				break;
+		}
+	} while (!is_broadcast_ether_addr(addr.mac));
+
+	return err;
+}
+
+static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
+				   struct switchdev_obj_port_fdb *fdb,
+				   int (*cb)(struct switchdev_obj *obj))
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_vtu_stu_entry vlan = {
+		.vid = GLOBAL_VTU_VID_MASK, /* all ones */
+	};
+	u16 fid;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+
+	/* Dump port's default Filtering Information Database (VLAN ID 0) */
+	err = _mv88e6xxx_port_fid_get(chip, port, &fid);
+	if (err)
+		goto unlock;
+
+	err = _mv88e6xxx_port_fdb_dump_one(chip, fid, 0, port, fdb, cb);
+	if (err)
+		goto unlock;
+
+	/* Dump VLANs' Filtering Information Databases */
+	err = _mv88e6xxx_vtu_vid_write(chip, vlan.vid);
+	if (err)
+		goto unlock;
+
+	do {
+		err = _mv88e6xxx_vtu_getnext(chip, &vlan);
+		if (err)
+			break;
+
+		if (!vlan.valid)
+			break;
+
+		err = _mv88e6xxx_port_fdb_dump_one(chip, vlan.fid, vlan.vid,
+						   port, fdb, cb);
+		if (err)
+			break;
+	} while (vlan.vid < GLOBAL_VTU_VID_MASK);
+
+unlock:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
+				      struct net_device *bridge)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int i, err = 0;
+
+	mutex_lock(&chip->reg_lock);
+
+	/* Assign the bridge and remap each port's VLANTable */
+	chip->ports[port].bridge_dev = bridge;
+
+	for (i = 0; i < chip->info->num_ports; ++i) {
+		if (chip->ports[i].bridge_dev == bridge) {
+			err = _mv88e6xxx_port_based_vlan_map(chip, i);
+			if (err)
+				break;
+		}
+	}
+
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct net_device *bridge = chip->ports[port].bridge_dev;
+	int i;
+
+	mutex_lock(&chip->reg_lock);
+
+	/* Unassign the bridge and remap each port's VLANTable */
+	chip->ports[port].bridge_dev = NULL;
+
+	for (i = 0; i < chip->info->num_ports; ++i)
+		if (i == port || chip->ports[i].bridge_dev == bridge)
+			if (_mv88e6xxx_port_based_vlan_map(chip, i))
+				netdev_warn(ds->ports[i].netdev,
+					    "failed to remap\n");
+
+	mutex_unlock(&chip->reg_lock);
+}
+
+static int _mv88e6xxx_mdio_page_write(struct mv88e6xxx_chip *chip,
+				      int port, int page, int reg, int val)
+{
+	int ret;
+
+	ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page);
+	if (ret < 0)
+		goto restore_page_0;
+
+	ret = mv88e6xxx_mdio_write_indirect(chip, port, reg, val);
+restore_page_0:
+	mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0);
+
+	return ret;
+}
+
+static int _mv88e6xxx_mdio_page_read(struct mv88e6xxx_chip *chip,
+				     int port, int page, int reg)
+{
+	int ret;
+
+	ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page);
+	if (ret < 0)
+		goto restore_page_0;
+
+	ret = mv88e6xxx_mdio_read_indirect(chip, port, reg);
+restore_page_0:
+	mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0);
+
+	return ret;
+}
+
+static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
+{
+	bool ppu_active = mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE);
+	u16 is_reset = (ppu_active ? 0x8800 : 0xc800);
+	struct gpio_desc *gpiod = chip->reset;
+	unsigned long timeout;
+	int ret;
+	int i;
+
+	/* Set all ports to the disabled state. */
+	for (i = 0; i < chip->info->num_ports; i++) {
+		ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL);
+		if (ret < 0)
+			return ret;
+
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL,
+					   ret & 0xfffc);
+		if (ret)
+			return ret;
+	}
+
+	/* Wait for transmit queues to drain. */
+	usleep_range(2000, 4000);
+
+	/* If there is a gpio connected to the reset pin, toggle it */
+	if (gpiod) {
+		gpiod_set_value_cansleep(gpiod, 1);
+		usleep_range(10000, 20000);
+		gpiod_set_value_cansleep(gpiod, 0);
+		usleep_range(10000, 20000);
+	}
+
+	/* Reset the switch. Keep the PPU active if requested. The PPU
+	 * needs to be active to support indirect phy register access
+	 * through global registers 0x18 and 0x19.
+	 */
+	if (ppu_active)
+		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000);
+	else
+		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400);
+	if (ret)
+		return ret;
+
+	/* Wait up to one second for reset to complete. */
+	timeout = jiffies + 1 * HZ;
+	while (time_before(jiffies, timeout)) {
+		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, 0x00);
+		if (ret < 0)
+			return ret;
+
+		if ((ret & is_reset) == is_reset)
+			break;
+		usleep_range(1000, 2000);
+	}
+	if (time_after(jiffies, timeout))
+		ret = -ETIMEDOUT;
+	else
+		ret = 0;
+
+	return ret;
+}
+
+static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_chip *chip)
+{
+	int ret;
+
+	ret = _mv88e6xxx_mdio_page_read(chip, REG_FIBER_SERDES,
+					PAGE_FIBER_SERDES, MII_BMCR);
+	if (ret < 0)
+		return ret;
+
+	if (ret & BMCR_PDOWN) {
+		ret &= ~BMCR_PDOWN;
+		ret = _mv88e6xxx_mdio_page_write(chip, REG_FIBER_SERDES,
+						 PAGE_FIBER_SERDES, MII_BMCR,
+						 ret);
+	}
+
+	return ret;
+}
+
+static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port,
+			       int reg, u16 *val)
+{
+	int addr = chip->info->port_base_addr + port;
+
+	if (port >= chip->info->num_ports)
+		return -EINVAL;
+
+	return mv88e6xxx_read(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
+{
+	struct dsa_switch *ds = chip->ds;
+	int ret;
+	u16 reg;
+
+	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
+	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
+	    mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) ||
+	    mv88e6xxx_6065_family(chip) || mv88e6xxx_6320_family(chip)) {
+		/* MAC Forcing register: don't force link, speed,
+		 * duplex or flow control state to any particular
+		 * values on physical ports, but force the CPU port
+		 * and all DSA ports to their maximum bandwidth and
+		 * full duplex.
+		 */
+		reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL);
+		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
+			reg &= ~PORT_PCS_CTRL_UNFORCED;
+			reg |= PORT_PCS_CTRL_FORCE_LINK |
+				PORT_PCS_CTRL_LINK_UP |
+				PORT_PCS_CTRL_DUPLEX_FULL |
+				PORT_PCS_CTRL_FORCE_DUPLEX;
+			if (mv88e6xxx_6065_family(chip))
+				reg |= PORT_PCS_CTRL_100;
+			else
+				reg |= PORT_PCS_CTRL_1000;
+		} else {
+			reg |= PORT_PCS_CTRL_UNFORCED;
+		}
+
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_PCS_CTRL, reg);
+		if (ret)
+			return ret;
+	}
+
+	/* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
+	 * disable Header mode, enable IGMP/MLD snooping, disable VLAN
+	 * tunneling, determine priority by looking at 802.1p and IP
+	 * priority fields (IP prio has precedence), and set STP state
+	 * to Forwarding.
+	 *
+	 * If this is the CPU link, use DSA or EDSA tagging depending
+	 * on which tagging mode was configured.
+	 *
+	 * If this is a link to another switch, use DSA tagging mode.
+	 *
+	 * If this is the upstream port for this switch, enable
+	 * forwarding of unknown unicasts and multicasts.
+	 */
+	reg = 0;
+	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
+	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
+	    mv88e6xxx_6095_family(chip) || mv88e6xxx_6065_family(chip) ||
+	    mv88e6xxx_6185_family(chip) || mv88e6xxx_6320_family(chip))
+		reg = PORT_CONTROL_IGMP_MLD_SNOOP |
+		PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP |
+		PORT_CONTROL_STATE_FORWARDING;
+	if (dsa_is_cpu_port(ds, port)) {
+		if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip))
+			reg |= PORT_CONTROL_DSA_TAG;
+		if (mv88e6xxx_6352_family(chip) ||
+		    mv88e6xxx_6351_family(chip) ||
+		    mv88e6xxx_6165_family(chip) ||
+		    mv88e6xxx_6097_family(chip) ||
+		    mv88e6xxx_6320_family(chip)) {
+			reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA |
+				PORT_CONTROL_FORWARD_UNKNOWN |
+				PORT_CONTROL_FORWARD_UNKNOWN_MC;
+		}
+
+		if (mv88e6xxx_6352_family(chip) ||
+		    mv88e6xxx_6351_family(chip) ||
+		    mv88e6xxx_6165_family(chip) ||
+		    mv88e6xxx_6097_family(chip) ||
+		    mv88e6xxx_6095_family(chip) ||
+		    mv88e6xxx_6065_family(chip) ||
+		    mv88e6xxx_6185_family(chip) ||
+		    mv88e6xxx_6320_family(chip)) {
+			reg |= PORT_CONTROL_EGRESS_ADD_TAG;
+		}
+	}
+	if (dsa_is_dsa_port(ds, port)) {
+		if (mv88e6xxx_6095_family(chip) ||
+		    mv88e6xxx_6185_family(chip))
+			reg |= PORT_CONTROL_DSA_TAG;
+		if (mv88e6xxx_6352_family(chip) ||
+		    mv88e6xxx_6351_family(chip) ||
+		    mv88e6xxx_6165_family(chip) ||
+		    mv88e6xxx_6097_family(chip) ||
+		    mv88e6xxx_6320_family(chip)) {
+			reg |= PORT_CONTROL_FRAME_MODE_DSA;
+		}
+
+		if (port == dsa_upstream_port(ds))
+			reg |= PORT_CONTROL_FORWARD_UNKNOWN |
+				PORT_CONTROL_FORWARD_UNKNOWN_MC;
+	}
+	if (reg) {
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_CONTROL, reg);
+		if (ret)
+			return ret;
+	}
+
+	/* If this port is connected to a SerDes, make sure the SerDes is not
+	 * powered down.
+	 */
+	if (mv88e6xxx_6352_family(chip)) {
+		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
+		if (ret < 0)
+			return ret;
+		ret &= PORT_STATUS_CMODE_MASK;
+		if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
+		    (ret == PORT_STATUS_CMODE_1000BASE_X) ||
+		    (ret == PORT_STATUS_CMODE_SGMII)) {
+			ret = mv88e6xxx_power_on_serdes(chip);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	/* Port Control 2: don't force a good FCS, set the maximum frame size to
+	 * 10240 bytes, disable 802.1q tags checking, don't discard tagged or
+	 * untagged frames on this port, do a destination address lookup on all
+	 * received packets as usual, disable ARP mirroring and don't send a
+	 * copy of all transmitted/received frames on this port to the CPU.
+	 */
+	reg = 0;
+	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
+	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
+	    mv88e6xxx_6095_family(chip) || mv88e6xxx_6320_family(chip) ||
+	    mv88e6xxx_6185_family(chip))
+		reg = PORT_CONTROL_2_MAP_DA;
+
+	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
+	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6320_family(chip))
+		reg |= PORT_CONTROL_2_JUMBO_10240;
+
+	if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip)) {
+		/* Set the upstream port this port should use */
+		reg |= dsa_upstream_port(ds);
+		/* enable forwarding of unknown multicast addresses to
+		 * the upstream port
+		 */
+		if (port == dsa_upstream_port(ds))
+			reg |= PORT_CONTROL_2_FORWARD_UNKNOWN;
+	}
+
+	reg |= PORT_CONTROL_2_8021Q_DISABLED;
+
+	if (reg) {
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_CONTROL_2, reg);
+		if (ret)
+			return ret;
+	}
+
+	/* Port Association Vector: when learning source addresses
+	 * of packets, add the address to the address database using
+	 * a port bitmap that has only the bit for this port set and
+	 * the other bits clear.
+	 */
+	reg = 1 << port;
+	/* Disable learning for CPU port */
+	if (dsa_is_cpu_port(ds, port))
+		reg = 0;
+
+	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR,
+				   reg);
+	if (ret)
+		return ret;
+
+	/* Egress rate control 2: disable egress rate control. */
+	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2,
+				   0x0000);
+	if (ret)
+		return ret;
+
+	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
+	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
+	    mv88e6xxx_6320_family(chip)) {
+		/* Do not limit the period of time that this port can
+		 * be paused for by the remote end or the period of
+		 * time that this port can pause the remote end.
+		 */
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_PAUSE_CTRL, 0x0000);
+		if (ret)
+			return ret;
+
+		/* Port ATU control: disable limiting the number of
+		 * address database entries that this port is allowed
+		 * to use.
+		 */
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_ATU_CONTROL, 0x0000);
+		/* Priority Override: disable DA, SA and VTU priority
+		 * override.
+		 */
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_PRI_OVERRIDE, 0x0000);
+		if (ret)
+			return ret;
+
+		/* Port Ethertype: use the Ethertype DSA Ethertype
+		 * value.
+		 */
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_ETH_TYPE, ETH_P_EDSA);
+		if (ret)
+			return ret;
+		/* Tag Remap: use an identity 802.1p prio -> switch
+		 * prio mapping.
+		 */
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_TAG_REGMAP_0123, 0x3210);
+		if (ret)
+			return ret;
+
+		/* Tag Remap 2: use an identity 802.1p prio -> switch
+		 * prio mapping.
+		 */
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_TAG_REGMAP_4567, 0x7654);
+		if (ret)
+			return ret;
+	}
+
+	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
+	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
+	    mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) ||
+	    mv88e6xxx_6320_family(chip)) {
+		/* Rate Control: disable ingress rate limiting. */
+		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+					   PORT_RATE_CONTROL, 0x0001);
+		if (ret)
+			return ret;
+	}
+
+	/* Port Control 1: disable trunking, disable sending
+	 * learning messages to this port.
+	 */
+	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1,
+				   0x0000);
+	if (ret)
+		return ret;
+
+	/* Port based VLAN map: give each port the same default address
+	 * database, and allow bidirectional communication between the
+	 * CPU and DSA port(s), and the other ports.
+	 */
+	ret = _mv88e6xxx_port_fid_set(chip, port, 0);
+	if (ret)
+		return ret;
+
+	ret = _mv88e6xxx_port_based_vlan_map(chip, port);
+	if (ret)
+		return ret;
+
+	/* Default VLAN ID and priority: don't set a default VLAN
+	 * ID, and set the default packet priority to zero.
+	 */
+	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN,
+				   0x0000);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
+{
+	int err;
+
+	err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_01,
+			      (addr[0] << 8) | addr[1]);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_23,
+			      (addr[2] << 8) | addr[3]);
+	if (err)
+		return err;
+
+	return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_45,
+			       (addr[4] << 8) | addr[5]);
+}
+
+static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip,
+				     unsigned int msecs)
+{
+	const unsigned int coeff = chip->info->age_time_coeff;
+	const unsigned int min = 0x01 * coeff;
+	const unsigned int max = 0xff * coeff;
+	u8 age_time;
+	u16 val;
+	int err;
+
+	if (msecs < min || msecs > max)
+		return -ERANGE;
+
+	/* Round to nearest multiple of coeff */
+	age_time = (msecs + coeff / 2) / coeff;
+
+	err = mv88e6xxx_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, &val);
+	if (err)
+		return err;
+
+	/* AgeTime is 11:4 bits */
+	val &= ~0xff0;
+	val |= age_time << 4;
+
+	return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, val);
+}
+
+static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds,
+				     unsigned int ageing_time)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_g1_set_age_time(chip, ageing_time);
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip)
+{
+	struct dsa_switch *ds = chip->ds;
+	u32 upstream_port = dsa_upstream_port(ds);
+	u16 reg;
+	int err;
+
+	/* Enable the PHY Polling Unit if present, don't discard any packets,
+	 * and mask all interrupt sources.
+	 */
+	reg = 0;
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU) ||
+	    mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE))
+		reg |= GLOBAL_CONTROL_PPU_ENABLE;
+
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, reg);
+	if (err)
+		return err;
+
+	/* Configure the upstream port, and configure it as the port to which
+	 * ingress and egress and ARP monitor frames are to be sent.
+	 */
+	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
+		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
+		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT;
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_MONITOR_CONTROL,
+				   reg);
+	if (err)
+		return err;
+
+	/* Disable remote management, and set the switch's DSA device number. */
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL_2,
+				   GLOBAL_CONTROL_2_MULTIPLE_CASCADE |
+				   (ds->index & 0x1f));
+	if (err)
+		return err;
+
+	/* Clear all the VTU and STU entries */
+	err = _mv88e6xxx_vtu_stu_flush(chip);
+	if (err < 0)
+		return err;
+
+	/* Set the default address aging time to 5 minutes, and
+	 * enable address learn messages to be sent to all message
+	 * ports.
+	 */
+	err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL,
+			      GLOBAL_ATU_CONTROL_LEARN2ALL);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g1_set_age_time(chip, 300000);
+	if (err)
+		return err;
+
+	/* Clear all ATU entries */
+	err = _mv88e6xxx_atu_flush(chip, 0, true);
+	if (err)
+		return err;
+
+	/* Configure the IP ToS mapping registers. */
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
+	if (err)
+		return err;
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
+	if (err)
+		return err;
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
+	if (err)
+		return err;
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
+	if (err)
+		return err;
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
+	if (err)
+		return err;
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
+	if (err)
+		return err;
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
+	if (err)
+		return err;
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
+	if (err)
+		return err;
+
+	/* Configure the IEEE 802.1p priority mapping register. */
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
+	if (err)
+		return err;
+
+	/* Clear the statistics counters for all ports */
+	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP,
+				   GLOBAL_STATS_OP_FLUSH_ALL);
+	if (err)
+		return err;
+
+	/* Wait for the flush to complete. */
+	err = _mv88e6xxx_stats_wait(chip);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
+					     int target, int port)
+{
+	u16 val = (target << 8) | (port & 0xf);
+
+	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val);
+}
+
+static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
+{
+	int target, port;
+	int err;
+
+	/* Initialize the routing port to the 32 possible target devices */
+	for (target = 0; target < 32; ++target) {
+		port = 0xf;
+
+		if (target < DSA_MAX_SWITCHES) {
+			port = chip->ds->rtable[target];
+			if (port == DSA_RTABLE_NONE)
+				port = 0xf;
+		}
+
+		err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
+					 bool hask, u16 mask)
+{
+	const u16 port_mask = BIT(chip->info->num_ports) - 1;
+	u16 val = (num << 12) | (mask & port_mask);
+
+	if (hask)
+		val |= GLOBAL2_TRUNK_MASK_HASK;
+
+	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val);
+}
+
+static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
+					    u16 map)
+{
+	const u16 port_mask = BIT(chip->info->num_ports) - 1;
+	u16 val = (id << 11) | (map & port_mask);
+
+	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val);
+}
+
+static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
+{
+	const u16 port_mask = BIT(chip->info->num_ports) - 1;
+	int i, err;
+
+	/* Clear all eight possible Trunk Mask vectors */
+	for (i = 0; i < 8; ++i) {
+		err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask);
+		if (err)
+			return err;
+	}
+
+	/* Clear all sixteen possible Trunk ID routing vectors */
+	for (i = 0; i < 16; ++i) {
+		err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
+{
+	int port, err;
+
+	/* Init all Ingress Rate Limit resources of all ports */
+	for (port = 0; port < chip->info->num_ports; ++port) {
+		/* XXX newer chips (like 88E6390) have different 2-bit ops */
+		err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
+				      GLOBAL2_IRL_CMD_OP_INIT_ALL |
+				      (port << 8));
+		if (err)
+			break;
+
+		/* Wait for the operation to complete */
+		err = _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
+				      GLOBAL2_IRL_CMD_BUSY);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/* Indirect write to the Switch MAC/WoL/WoF register */
+static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
+					 unsigned int pointer, u8 data)
+{
+	u16 val = (pointer << 8) | data;
+
+	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val);
+}
+
+static int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
+{
+	int i, err;
+
+	for (i = 0; i < 6; i++) {
+		err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
+				  u8 data)
+{
+	u16 val = (pointer << 8) | (data & 0x7);
+
+	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val);
+}
+
+static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
+{
+	int i, err;
+
+	/* Clear all sixteen possible Priority Override entries */
+	for (i = 0; i < 16; i++) {
+		err = mv88e6xxx_g2_pot_write(chip, i, 0);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
+{
+	return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
+			       GLOBAL2_EEPROM_CMD_BUSY |
+			       GLOBAL2_EEPROM_CMD_RUNNING);
+}
+
+static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+	int err;
+
+	err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_eeprom_wait(chip);
+}
+
+static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip,
+				      u8 addr, u16 *data)
+{
+	u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr;
+	int err;
+
+	err = mv88e6xxx_g2_eeprom_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_eeprom_cmd(chip, cmd);
+	if (err)
+		return err;
+
+	return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
+}
+
+static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip,
+				       u8 addr, u16 data)
+{
+	u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr;
+	int err;
+
+	err = mv88e6xxx_g2_eeprom_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
+}
+
+static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
+{
+	u16 reg;
+	int err;
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
+		/* Consider the frames with reserved multicast destination
+		 * addresses matching 01:80:c2:00:00:2x as MGMT.
+		 */
+		err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X,
+				      0xffff);
+		if (err)
+			return err;
+	}
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) {
+		/* Consider the frames with reserved multicast destination
+		 * addresses matching 01:80:c2:00:00:0x as MGMT.
+		 */
+		err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X,
+				      0xffff);
+		if (err)
+			return err;
+	}
+
+	/* Ignore removed tag data on doubly tagged packets, disable
+	 * flow control messages, force flow control priority to the
+	 * highest, and send all special multicast frames to the CPU
+	 * port at the highest priority.
+	 */
+	reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4);
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
+	    mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
+		reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7;
+	err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg);
+	if (err)
+		return err;
+
+	/* Program the DSA routing table. */
+	err = mv88e6xxx_g2_set_device_mapping(chip);
+	if (err)
+		return err;
+
+	/* Clear all trunk masks and mapping. */
+	err = mv88e6xxx_g2_clear_trunk(chip);
+	if (err)
+		return err;
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) {
+		/* Disable ingress rate limiting by resetting all per port
+		 * ingress rate limit resources to their initial state.
+		 */
+		err = mv88e6xxx_g2_clear_irl(chip);
+			if (err)
+				return err;
+	}
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) {
+		/* Initialize Cross-chip Port VLAN Table to reset defaults */
+		err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR,
+				      GLOBAL2_PVT_ADDR_OP_INIT_ONES);
+		if (err)
+			return err;
+	}
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
+		/* Clear the priority override table. */
+		err = mv88e6xxx_g2_clear_pot(chip);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int mv88e6xxx_setup(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int err;
+	int i;
+
+	chip->ds = ds;
+	ds->slave_mii_bus = chip->mdio_bus;
+
+	mutex_lock(&chip->reg_lock);
+
+	err = mv88e6xxx_switch_reset(chip);
+	if (err)
+		goto unlock;
+
+	/* Setup Switch Port Registers */
+	for (i = 0; i < chip->info->num_ports; i++) {
+		err = mv88e6xxx_setup_port(chip, i);
+		if (err)
+			goto unlock;
+	}
+
+	/* Setup Switch Global 1 Registers */
+	err = mv88e6xxx_g1_setup(chip);
+	if (err)
+		goto unlock;
+
+	/* Setup Switch Global 2 Registers */
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+		err = mv88e6xxx_g2_setup(chip);
+		if (err)
+			goto unlock;
+	}
+
+unlock:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+
+	/* Has an indirect Switch MAC/WoL/WoF register in Global 2? */
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_SWITCH_MAC))
+		err = mv88e6xxx_g2_set_switch_mac(chip, addr);
+	else
+		err = mv88e6xxx_g1_set_switch_mac(chip, addr);
+
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page,
+				    int reg)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int ret;
+
+	mutex_lock(&chip->reg_lock);
+	ret = _mv88e6xxx_mdio_page_read(chip, port, page, reg);
+	mutex_unlock(&chip->reg_lock);
+
+	return ret;
+}
+
+static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page,
+				     int reg, int val)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int ret;
+
+	mutex_lock(&chip->reg_lock);
+	ret = _mv88e6xxx_mdio_page_write(chip, port, page, reg, val);
+	mutex_unlock(&chip->reg_lock);
+
+	return ret;
+}
+
+static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port)
+{
+	if (port >= 0 && port < chip->info->num_ports)
+		return port;
+	return -EINVAL;
+}
+
+static int mv88e6xxx_mdio_read(struct mii_bus *bus, int port, int regnum)
+{
+	struct mv88e6xxx_chip *chip = bus->priv;
+	int addr = mv88e6xxx_port_to_mdio_addr(chip, port);
+	int ret;
+
+	if (addr < 0)
+		return 0xffff;
+
+	mutex_lock(&chip->reg_lock);
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
+		ret = mv88e6xxx_mdio_read_ppu(chip, addr, regnum);
+	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY))
+		ret = mv88e6xxx_mdio_read_indirect(chip, addr, regnum);
+	else
+		ret = mv88e6xxx_mdio_read_direct(chip, addr, regnum);
+
+	mutex_unlock(&chip->reg_lock);
+	return ret;
+}
+
+static int mv88e6xxx_mdio_write(struct mii_bus *bus, int port, int regnum,
+				u16 val)
+{
+	struct mv88e6xxx_chip *chip = bus->priv;
+	int addr = mv88e6xxx_port_to_mdio_addr(chip, port);
+	int ret;
+
+	if (addr < 0)
+		return 0xffff;
+
+	mutex_lock(&chip->reg_lock);
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
+		ret = mv88e6xxx_mdio_write_ppu(chip, addr, regnum, val);
+	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY))
+		ret = mv88e6xxx_mdio_write_indirect(chip, addr, regnum, val);
+	else
+		ret = mv88e6xxx_mdio_write_direct(chip, addr, regnum, val);
+
+	mutex_unlock(&chip->reg_lock);
+	return ret;
+}
+
+static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
+				   struct device_node *np)
+{
+	static int index;
+	struct mii_bus *bus;
+	int err;
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
+		mv88e6xxx_ppu_state_init(chip);
+
+	if (np)
+		chip->mdio_np = of_get_child_by_name(np, "mdio");
+
+	bus = devm_mdiobus_alloc(chip->dev);
+	if (!bus)
+		return -ENOMEM;
+
+	bus->priv = (void *)chip;
+	if (np) {
+		bus->name = np->full_name;
+		snprintf(bus->id, MII_BUS_ID_SIZE, "%s", np->full_name);
+	} else {
+		bus->name = "mv88e6xxx SMI";
+		snprintf(bus->id, MII_BUS_ID_SIZE, "mv88e6xxx-%d", index++);
+	}
+
+	bus->read = mv88e6xxx_mdio_read;
+	bus->write = mv88e6xxx_mdio_write;
+	bus->parent = chip->dev;
+
+	if (chip->mdio_np)
+		err = of_mdiobus_register(bus, chip->mdio_np);
+	else
+		err = mdiobus_register(bus);
+	if (err) {
+		dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
+		goto out;
+	}
+	chip->mdio_bus = bus;
+
+	return 0;
+
+out:
+	if (chip->mdio_np)
+		of_node_put(chip->mdio_np);
+
+	return err;
+}
+
+static void mv88e6xxx_mdio_unregister(struct mv88e6xxx_chip *chip)
+
+{
+	struct mii_bus *bus = chip->mdio_bus;
+
+	mdiobus_unregister(bus);
+
+	if (chip->mdio_np)
+		of_node_put(chip->mdio_np);
+}
+
+#ifdef CONFIG_NET_DSA_HWMON
+
+static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int ret;
+	int val;
+
+	*temp = 0;
+
+	mutex_lock(&chip->reg_lock);
+
+	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x6);
+	if (ret < 0)
+		goto error;
+
+	/* Enable temperature sensor */
+	ret = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a);
+	if (ret < 0)
+		goto error;
+
+	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret | (1 << 5));
+	if (ret < 0)
+		goto error;
+
+	/* Wait for temperature to stabilize */
+	usleep_range(10000, 12000);
+
+	val = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a);
+	if (val < 0) {
+		ret = val;
+		goto error;
+	}
+
+	/* Disable temperature sensor */
+	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret & ~(1 << 5));
+	if (ret < 0)
+		goto error;
+
+	*temp = ((val & 0x1f) - 5) * 5;
+
+error:
+	mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x0);
+	mutex_unlock(&chip->reg_lock);
+	return ret;
+}
+
+static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	int ret;
+
+	*temp = 0;
+
+	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 27);
+	if (ret < 0)
+		return ret;
+
+	*temp = (ret & 0xff) - 25;
+
+	return 0;
+}
+
+static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP))
+		return -EOPNOTSUPP;
+
+	if (mv88e6xxx_6320_family(chip) || mv88e6xxx_6352_family(chip))
+		return mv88e63xx_get_temp(ds, temp);
+
+	return mv88e61xx_get_temp(ds, temp);
+}
+
+static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	int ret;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
+		return -EOPNOTSUPP;
+
+	*temp = 0;
+
+	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+	if (ret < 0)
+		return ret;
+
+	*temp = (((ret >> 8) & 0x1f) * 5) - 25;
+
+	return 0;
+}
+
+static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	int ret;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
+		return -EOPNOTSUPP;
+
+	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+	if (ret < 0)
+		return ret;
+	temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f);
+	return mv88e6xxx_mdio_page_write(ds, phy, 6, 26,
+					 (ret & 0xe0ff) | (temp << 8));
+}
+
+static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	int ret;
+
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
+		return -EOPNOTSUPP;
+
+	*alarm = false;
+
+	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+	if (ret < 0)
+		return ret;
+
+	*alarm = !!(ret & 0x40);
+
+	return 0;
+}
+#endif /* CONFIG_NET_DSA_HWMON */
+
+static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+
+	return chip->eeprom_len;
+}
+
+static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip,
+				  struct ethtool_eeprom *eeprom, u8 *data)
+{
+	unsigned int offset = eeprom->offset;
+	unsigned int len = eeprom->len;
+	u16 val;
+	int err;
+
+	eeprom->len = 0;
+
+	if (offset & 1) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		*data++ = (val >> 8) & 0xff;
+
+		offset++;
+		len--;
+		eeprom->len++;
+	}
+
+	while (len >= 2) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		*data++ = val & 0xff;
+		*data++ = (val >> 8) & 0xff;
+
+		offset += 2;
+		len -= 2;
+		eeprom->len += 2;
+	}
+
+	if (len) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		*data++ = val & 0xff;
+
+		offset++;
+		len--;
+		eeprom->len++;
+	}
+
+	return 0;
+}
+
+static int mv88e6xxx_get_eeprom(struct dsa_switch *ds,
+				struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16))
+		err = mv88e6xxx_get_eeprom16(chip, eeprom, data);
+	else
+		err = -EOPNOTSUPP;
+
+	mutex_unlock(&chip->reg_lock);
+
+	if (err)
+		return err;
+
+	eeprom->magic = 0xc3ec4951;
+
+	return 0;
+}
+
+static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip,
+				  struct ethtool_eeprom *eeprom, u8 *data)
+{
+	unsigned int offset = eeprom->offset;
+	unsigned int len = eeprom->len;
+	u16 val;
+	int err;
+
+	/* Ensure the RO WriteEn bit is set */
+	err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val);
+	if (err)
+		return err;
+
+	if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN))
+		return -EROFS;
+
+	eeprom->len = 0;
+
+	if (offset & 1) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		val = (*data++ << 8) | (val & 0xff);
+
+		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+		if (err)
+			return err;
+
+		offset++;
+		len--;
+		eeprom->len++;
+	}
+
+	while (len >= 2) {
+		val = *data++;
+		val |= *data++ << 8;
+
+		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+		if (err)
+			return err;
+
+		offset += 2;
+		len -= 2;
+		eeprom->len += 2;
+	}
+
+	if (len) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		val = (val & 0xff00) | *data++;
+
+		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+		if (err)
+			return err;
+
+		offset++;
+		len--;
+		eeprom->len++;
+	}
+
+	return 0;
+}
+
+static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
+				struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	int err;
+
+	if (eeprom->magic != 0xc3ec4951)
+		return -EINVAL;
+
+	mutex_lock(&chip->reg_lock);
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16))
+		err = mv88e6xxx_set_eeprom16(chip, eeprom, data);
+	else
+		err = -EOPNOTSUPP;
+
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static const struct mv88e6xxx_info mv88e6xxx_table[] = {
+	[MV88E6085] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6085,
+		.family = MV88E6XXX_FAMILY_6097,
+		.name = "Marvell 88E6085",
+		.num_databases = 4096,
+		.num_ports = 10,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
+	},
+
+	[MV88E6095] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6095,
+		.family = MV88E6XXX_FAMILY_6095,
+		.name = "Marvell 88E6095/88E6095F",
+		.num_databases = 256,
+		.num_ports = 11,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6095,
+	},
+
+	[MV88E6123] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6123,
+		.family = MV88E6XXX_FAMILY_6165,
+		.name = "Marvell 88E6123",
+		.num_databases = 4096,
+		.num_ports = 3,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
+	},
+
+	[MV88E6131] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6131,
+		.family = MV88E6XXX_FAMILY_6185,
+		.name = "Marvell 88E6131",
+		.num_databases = 256,
+		.num_ports = 8,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
+	},
+
+	[MV88E6161] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6161,
+		.family = MV88E6XXX_FAMILY_6165,
+		.name = "Marvell 88E6161",
+		.num_databases = 4096,
+		.num_ports = 6,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
+	},
+
+	[MV88E6165] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6165,
+		.family = MV88E6XXX_FAMILY_6165,
+		.name = "Marvell 88E6165",
+		.num_databases = 4096,
+		.num_ports = 6,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
+	},
+
+	[MV88E6171] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6171,
+		.family = MV88E6XXX_FAMILY_6351,
+		.name = "Marvell 88E6171",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
+	},
+
+	[MV88E6172] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6172,
+		.family = MV88E6XXX_FAMILY_6352,
+		.name = "Marvell 88E6172",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+	},
+
+	[MV88E6175] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6175,
+		.family = MV88E6XXX_FAMILY_6351,
+		.name = "Marvell 88E6175",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
+	},
+
+	[MV88E6176] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6176,
+		.family = MV88E6XXX_FAMILY_6352,
+		.name = "Marvell 88E6176",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+	},
+
+	[MV88E6185] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6185,
+		.family = MV88E6XXX_FAMILY_6185,
+		.name = "Marvell 88E6185",
+		.num_databases = 256,
+		.num_ports = 10,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
+	},
+
+	[MV88E6240] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6240,
+		.family = MV88E6XXX_FAMILY_6352,
+		.name = "Marvell 88E6240",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+	},
+
+	[MV88E6320] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6320,
+		.family = MV88E6XXX_FAMILY_6320,
+		.name = "Marvell 88E6320",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
+	},
+
+	[MV88E6321] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6321,
+		.family = MV88E6XXX_FAMILY_6320,
+		.name = "Marvell 88E6321",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
+	},
+
+	[MV88E6350] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6350,
+		.family = MV88E6XXX_FAMILY_6351,
+		.name = "Marvell 88E6350",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
+	},
+
+	[MV88E6351] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6351,
+		.family = MV88E6XXX_FAMILY_6351,
+		.name = "Marvell 88E6351",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
+	},
+
+	[MV88E6352] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6352,
+		.family = MV88E6XXX_FAMILY_6352,
+		.name = "Marvell 88E6352",
+		.num_databases = 4096,
+		.num_ports = 7,
+		.port_base_addr = 0x10,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+	},
+};
+
+static const struct mv88e6xxx_info *mv88e6xxx_lookup_info(unsigned int prod_num)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mv88e6xxx_table); ++i)
+		if (mv88e6xxx_table[i].prod_num == prod_num)
+			return &mv88e6xxx_table[i];
+
+	return NULL;
+}
+
+static int mv88e6xxx_detect(struct mv88e6xxx_chip *chip)
+{
+	const struct mv88e6xxx_info *info;
+	unsigned int prod_num, rev;
+	u16 id;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_read(chip, 0, PORT_SWITCH_ID, &id);
+	mutex_unlock(&chip->reg_lock);
+	if (err)
+		return err;
+
+	prod_num = (id & 0xfff0) >> 4;
+	rev = id & 0x000f;
+
+	info = mv88e6xxx_lookup_info(prod_num);
+	if (!info)
+		return -ENODEV;
+
+	/* Update the compatible info with the probed one */
+	chip->info = info;
+
+	dev_info(chip->dev, "switch 0x%x detected: %s, revision %u\n",
+		 chip->info->prod_num, chip->info->name, rev);
+
+	return 0;
+}
+
+static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev)
+{
+	struct mv88e6xxx_chip *chip;
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return NULL;
+
+	chip->dev = dev;
+
+	mutex_init(&chip->reg_lock);
+
+	return chip;
+}
+
+static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
+			      struct mii_bus *bus, int sw_addr)
+{
+	/* ADDR[0] pin is unavailable externally and considered zero */
+	if (sw_addr & 0x1)
+		return -EINVAL;
+
+	if (sw_addr == 0)
+		chip->smi_ops = &mv88e6xxx_smi_single_chip_ops;
+	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_MULTI_CHIP))
+		chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops;
+	else
+		return -EINVAL;
+
+	chip->bus = bus;
+	chip->sw_addr = sw_addr;
+
+	return 0;
+}
+
+static const char *mv88e6xxx_drv_probe(struct device *dsa_dev,
+				       struct device *host_dev, int sw_addr,
+				       void **priv)
+{
+	struct mv88e6xxx_chip *chip;
+	struct mii_bus *bus;
+	int err;
+
+	bus = dsa_host_dev_to_mii_bus(host_dev);
+	if (!bus)
+		return NULL;
+
+	chip = mv88e6xxx_alloc_chip(dsa_dev);
+	if (!chip)
+		return NULL;
+
+	/* Legacy SMI probing will only support chips similar to 88E6085 */
+	chip->info = &mv88e6xxx_table[MV88E6085];
+
+	err = mv88e6xxx_smi_init(chip, bus, sw_addr);
+	if (err)
+		goto free;
+
+	err = mv88e6xxx_detect(chip);
+	if (err)
+		goto free;
+
+	err = mv88e6xxx_mdio_register(chip, NULL);
+	if (err)
+		goto free;
+
+	*priv = chip;
+
+	return chip->info->name;
+free:
+	devm_kfree(dsa_dev, chip);
+
+	return NULL;
+}
+
+static struct dsa_switch_driver mv88e6xxx_switch_driver = {
+	.tag_protocol		= DSA_TAG_PROTO_EDSA,
+	.probe			= mv88e6xxx_drv_probe,
+	.setup			= mv88e6xxx_setup,
+	.set_addr		= mv88e6xxx_set_addr,
+	.adjust_link		= mv88e6xxx_adjust_link,
+	.get_strings		= mv88e6xxx_get_strings,
+	.get_ethtool_stats	= mv88e6xxx_get_ethtool_stats,
+	.get_sset_count		= mv88e6xxx_get_sset_count,
+	.set_eee		= mv88e6xxx_set_eee,
+	.get_eee		= mv88e6xxx_get_eee,
+#ifdef CONFIG_NET_DSA_HWMON
+	.get_temp		= mv88e6xxx_get_temp,
+	.get_temp_limit		= mv88e6xxx_get_temp_limit,
+	.set_temp_limit		= mv88e6xxx_set_temp_limit,
+	.get_temp_alarm		= mv88e6xxx_get_temp_alarm,
+#endif
+	.get_eeprom_len		= mv88e6xxx_get_eeprom_len,
+	.get_eeprom		= mv88e6xxx_get_eeprom,
+	.set_eeprom		= mv88e6xxx_set_eeprom,
+	.get_regs_len		= mv88e6xxx_get_regs_len,
+	.get_regs		= mv88e6xxx_get_regs,
+	.set_ageing_time	= mv88e6xxx_set_ageing_time,
+	.port_bridge_join	= mv88e6xxx_port_bridge_join,
+	.port_bridge_leave	= mv88e6xxx_port_bridge_leave,
+	.port_stp_state_set	= mv88e6xxx_port_stp_state_set,
+	.port_vlan_filtering	= mv88e6xxx_port_vlan_filtering,
+	.port_vlan_prepare	= mv88e6xxx_port_vlan_prepare,
+	.port_vlan_add		= mv88e6xxx_port_vlan_add,
+	.port_vlan_del		= mv88e6xxx_port_vlan_del,
+	.port_vlan_dump		= mv88e6xxx_port_vlan_dump,
+	.port_fdb_prepare       = mv88e6xxx_port_fdb_prepare,
+	.port_fdb_add           = mv88e6xxx_port_fdb_add,
+	.port_fdb_del           = mv88e6xxx_port_fdb_del,
+	.port_fdb_dump          = mv88e6xxx_port_fdb_dump,
+};
+
+static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip,
+				     struct device_node *np)
+{
+	struct device *dev = chip->dev;
+	struct dsa_switch *ds;
+
+	ds = devm_kzalloc(dev, sizeof(*ds), GFP_KERNEL);
+	if (!ds)
+		return -ENOMEM;
+
+	ds->dev = dev;
+	ds->priv = chip;
+	ds->drv = &mv88e6xxx_switch_driver;
+
+	dev_set_drvdata(dev, ds);
+
+	return dsa_register_switch(ds, np);
+}
+
+static void mv88e6xxx_unregister_switch(struct mv88e6xxx_chip *chip)
+{
+	dsa_unregister_switch(chip->ds);
+}
+
+static int mv88e6xxx_probe(struct mdio_device *mdiodev)
+{
+	struct device *dev = &mdiodev->dev;
+	struct device_node *np = dev->of_node;
+	const struct mv88e6xxx_info *compat_info;
+	struct mv88e6xxx_chip *chip;
+	u32 eeprom_len;
+	int err;
+
+	compat_info = of_device_get_match_data(dev);
+	if (!compat_info)
+		return -EINVAL;
+
+	chip = mv88e6xxx_alloc_chip(dev);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->info = compat_info;
+
+	err = mv88e6xxx_smi_init(chip, mdiodev->bus, mdiodev->addr);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_detect(chip);
+	if (err)
+		return err;
+
+	chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS);
+	if (IS_ERR(chip->reset))
+		return PTR_ERR(chip->reset);
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16) &&
+	    !of_property_read_u32(np, "eeprom-length", &eeprom_len))
+		chip->eeprom_len = eeprom_len;
+
+	err = mv88e6xxx_mdio_register(chip, np);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_register_switch(chip, np);
+	if (err) {
+		mv88e6xxx_mdio_unregister(chip);
+		return err;
+	}
+
+	return 0;
+}
+
+static void mv88e6xxx_remove(struct mdio_device *mdiodev)
+{
+	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
+	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+
+	mv88e6xxx_unregister_switch(chip);
+	mv88e6xxx_mdio_unregister(chip);
+}
+
+static const struct of_device_id mv88e6xxx_of_match[] = {
+	{
+		.compatible = "marvell,mv88e6085",
+		.data = &mv88e6xxx_table[MV88E6085],
+	},
+	{ /* sentinel */ },
+};
+
+MODULE_DEVICE_TABLE(of, mv88e6xxx_of_match);
+
+static struct mdio_driver mv88e6xxx_driver = {
+	.probe	= mv88e6xxx_probe,
+	.remove = mv88e6xxx_remove,
+	.mdiodrv.driver = {
+		.name = "mv88e6085",
+		.of_match_table = mv88e6xxx_of_match,
+	},
+};
+
+static int __init mv88e6xxx_init(void)
+{
+	register_switch_driver(&mv88e6xxx_switch_driver);
+	return mdio_driver_register(&mv88e6xxx_driver);
+}
+module_init(mv88e6xxx_init);
+
+static void __exit mv88e6xxx_cleanup(void)
+{
+	mdio_driver_unregister(&mv88e6xxx_driver);
+	unregister_switch_driver(&mv88e6xxx_switch_driver);
+}
+module_exit(mv88e6xxx_cleanup);
+
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>");
+MODULE_DESCRIPTION("Driver for Marvell 88E6XXX ethernet switch chips");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
similarity index 76%
rename from drivers/net/dsa/mv88e6xxx.h
rename to drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index 36d0e1504de1..48d6ea77f9bd 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -1,5 +1,6 @@
 /*
- * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support
+ * Marvell 88e6xxx common definitions
+ *
  * Copyright (c) 2008 Marvell Semiconductor
  *
  * This program is free software; you can redistribute it and/or modify
@@ -293,28 +294,38 @@
 #define GLOBAL2_TRUNK_MASK	0x07
 #define GLOBAL2_TRUNK_MASK_UPDATE		BIT(15)
 #define GLOBAL2_TRUNK_MASK_NUM_SHIFT		12
+#define GLOBAL2_TRUNK_MASK_HASK			BIT(11)
 #define GLOBAL2_TRUNK_MAPPING	0x08
 #define GLOBAL2_TRUNK_MAPPING_UPDATE		BIT(15)
 #define GLOBAL2_TRUNK_MAPPING_ID_SHIFT		11
-#define GLOBAL2_INGRESS_OP	0x09
-#define GLOBAL2_INGRESS_DATA	0x0a
+#define GLOBAL2_IRL_CMD		0x09
+#define GLOBAL2_IRL_CMD_BUSY	BIT(15)
+#define GLOBAL2_IRL_CMD_OP_INIT_ALL	((0x001 << 12) | GLOBAL2_IRL_CMD_BUSY)
+#define GLOBAL2_IRL_CMD_OP_INIT_SEL	((0x010 << 12) | GLOBAL2_IRL_CMD_BUSY)
+#define GLOBAL2_IRL_CMD_OP_WRITE_SEL	((0x011 << 12) | GLOBAL2_IRL_CMD_BUSY)
+#define GLOBAL2_IRL_CMD_OP_READ_SEL	((0x100 << 12) | GLOBAL2_IRL_CMD_BUSY)
+#define GLOBAL2_IRL_DATA	0x0a
 #define GLOBAL2_PVT_ADDR	0x0b
+#define GLOBAL2_PVT_ADDR_BUSY	BIT(15)
+#define GLOBAL2_PVT_ADDR_OP_INIT_ONES	((0x01 << 12) | GLOBAL2_PVT_ADDR_BUSY)
+#define GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN	((0x03 << 12) | GLOBAL2_PVT_ADDR_BUSY)
+#define GLOBAL2_PVT_ADDR_OP_READ	((0x04 << 12) | GLOBAL2_PVT_ADDR_BUSY)
 #define GLOBAL2_PVT_DATA	0x0c
 #define GLOBAL2_SWITCH_MAC	0x0d
-#define GLOBAL2_SWITCH_MAC_BUSY BIT(15)
 #define GLOBAL2_ATU_STATS	0x0e
 #define GLOBAL2_PRIO_OVERRIDE	0x0f
 #define GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP	BIT(7)
 #define GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT	4
 #define GLOBAL2_PRIO_OVERRIDE_FORCE_ARP		BIT(3)
 #define GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT		0
-#define GLOBAL2_EEPROM_OP	0x14
-#define GLOBAL2_EEPROM_OP_BUSY		BIT(15)
-#define GLOBAL2_EEPROM_OP_WRITE		((3 << 12) | GLOBAL2_EEPROM_OP_BUSY)
-#define GLOBAL2_EEPROM_OP_READ		((4 << 12) | GLOBAL2_EEPROM_OP_BUSY)
-#define GLOBAL2_EEPROM_OP_LOAD		BIT(11)
-#define GLOBAL2_EEPROM_OP_WRITE_EN	BIT(10)
-#define GLOBAL2_EEPROM_OP_ADDR_MASK	0xff
+#define GLOBAL2_EEPROM_CMD		0x14
+#define GLOBAL2_EEPROM_CMD_BUSY		BIT(15)
+#define GLOBAL2_EEPROM_CMD_OP_WRITE	((0x3 << 12) | GLOBAL2_EEPROM_CMD_BUSY)
+#define GLOBAL2_EEPROM_CMD_OP_READ	((0x4 << 12) | GLOBAL2_EEPROM_CMD_BUSY)
+#define GLOBAL2_EEPROM_CMD_OP_LOAD	((0x6 << 12) | GLOBAL2_EEPROM_CMD_BUSY)
+#define GLOBAL2_EEPROM_CMD_RUNNING	BIT(11)
+#define GLOBAL2_EEPROM_CMD_WRITE_EN	BIT(10)
+#define GLOBAL2_EEPROM_CMD_ADDR_MASK	0xff
 #define GLOBAL2_EEPROM_DATA	0x15
 #define GLOBAL2_PTP_AVB_OP	0x16
 #define GLOBAL2_PTP_AVB_DATA	0x17
@@ -373,24 +384,30 @@ enum mv88e6xxx_family {
 };
 
 enum mv88e6xxx_cap {
-	/* Address Translation Unit.
-	 * The ATU is used to lookup and learn MAC addresses. See GLOBAL_ATU_OP.
-	 */
-	MV88E6XXX_CAP_ATU,
-
 	/* Energy Efficient Ethernet.
 	 */
 	MV88E6XXX_CAP_EEE,
 
-	/* EEPROM Command and Data registers.
-	 * See GLOBAL2_EEPROM_OP and GLOBAL2_EEPROM_DATA.
+	/* Switch Global 2 Registers.
+	 * The device contains a second set of global 16-bit registers.
 	 */
-	MV88E6XXX_CAP_EEPROM,
-
-	/* Port State Filtering for 802.1D Spanning Tree.
-	 * See PORT_CONTROL_STATE_* values in the PORT_CONTROL register.
+	MV88E6XXX_CAP_GLOBAL2,
+	MV88E6XXX_CAP_G2_MGMT_EN_2X,	/* (0x02) MGMT Enable Register 2x */
+	MV88E6XXX_CAP_G2_MGMT_EN_0X,	/* (0x03) MGMT Enable Register 0x */
+	MV88E6XXX_CAP_G2_IRL_CMD,	/* (0x09) Ingress Rate Command */
+	MV88E6XXX_CAP_G2_IRL_DATA,	/* (0x0a) Ingress Rate Data */
+	MV88E6XXX_CAP_G2_PVT_ADDR,	/* (0x0b) Cross Chip Port VLAN Addr */
+	MV88E6XXX_CAP_G2_PVT_DATA,	/* (0x0c) Cross Chip Port VLAN Data */
+	MV88E6XXX_CAP_G2_SWITCH_MAC,	/* (0x0d) Switch MAC/WoL/WoF */
+	MV88E6XXX_CAP_G2_POT,		/* (0x0f) Priority Override Table */
+	MV88E6XXX_CAP_G2_EEPROM_CMD,	/* (0x14) EEPROM Command */
+	MV88E6XXX_CAP_G2_EEPROM_DATA,	/* (0x15) EEPROM Data */
+
+	/* Multi-chip Addressing Mode.
+	 * Some chips require an indirect SMI access when their SMI device
+	 * address is not zero. See SMI_CMD and SMI_DATA.
 	 */
-	MV88E6XXX_CAP_PORTSTATE,
+	MV88E6XXX_CAP_MULTI_CHIP,
 
 	/* PHY Polling Unit.
 	 * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING.
@@ -410,25 +427,12 @@ enum mv88e6xxx_cap {
 	 */
 	MV88E6XXX_CAP_STU,
 
-	/* Switch MAC/WoL/WoF register.
-	 * This requires an indirect access to set the switch MAC address
-	 * through GLOBAL2_SWITCH_MAC, otherwise GLOBAL_MAC_01, GLOBAL_MAC_23,
-	 * and GLOBAL_MAC_45 are used with a direct access.
-	 */
-	MV88E6XXX_CAP_SWITCH_MAC_WOL_WOF,
-
 	/* Internal temperature sensor.
 	 * Available from any enabled port's PHY register 26, page 6.
 	 */
 	MV88E6XXX_CAP_TEMP,
 	MV88E6XXX_CAP_TEMP_LIMIT,
 
-	/* In-chip Port Based VLANs.
-	 * Each port VLANTable register (see PORT_BASE_VLAN) is used to restrict
-	 * the output (or egress) ports to which it is allowed to send frames.
-	 */
-	MV88E6XXX_CAP_VLANTABLE,
-
 	/* VLAN Table Unit.
 	 * The VTU is used to program 802.1Q VLANs. See GLOBAL_VTU_OP.
 	 */
@@ -436,82 +440,130 @@ enum mv88e6xxx_cap {
 };
 
 /* Bitmask of capabilities */
-#define MV88E6XXX_FLAG_ATU		BIT(MV88E6XXX_CAP_ATU)
 #define MV88E6XXX_FLAG_EEE		BIT(MV88E6XXX_CAP_EEE)
-#define MV88E6XXX_FLAG_EEPROM		BIT(MV88E6XXX_CAP_EEPROM)
-#define MV88E6XXX_FLAG_PORTSTATE	BIT(MV88E6XXX_CAP_PORTSTATE)
+#define MV88E6XXX_FLAG_GLOBAL2		BIT(MV88E6XXX_CAP_GLOBAL2)
+#define MV88E6XXX_FLAG_G2_MGMT_EN_2X	BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X)
+#define MV88E6XXX_FLAG_G2_MGMT_EN_0X	BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X)
+#define MV88E6XXX_FLAG_G2_IRL_CMD	BIT(MV88E6XXX_CAP_G2_IRL_CMD)
+#define MV88E6XXX_FLAG_G2_IRL_DATA	BIT(MV88E6XXX_CAP_G2_IRL_DATA)
+#define MV88E6XXX_FLAG_G2_PVT_ADDR	BIT(MV88E6XXX_CAP_G2_PVT_ADDR)
+#define MV88E6XXX_FLAG_G2_PVT_DATA	BIT(MV88E6XXX_CAP_G2_PVT_DATA)
+#define MV88E6XXX_FLAG_G2_SWITCH_MAC	BIT(MV88E6XXX_CAP_G2_SWITCH_MAC)
+#define MV88E6XXX_FLAG_G2_POT		BIT(MV88E6XXX_CAP_G2_POT)
+#define MV88E6XXX_FLAG_G2_EEPROM_CMD	BIT(MV88E6XXX_CAP_G2_EEPROM_CMD)
+#define MV88E6XXX_FLAG_G2_EEPROM_DATA	BIT(MV88E6XXX_CAP_G2_EEPROM_DATA)
+#define MV88E6XXX_FLAG_MULTI_CHIP	BIT(MV88E6XXX_CAP_MULTI_CHIP)
 #define MV88E6XXX_FLAG_PPU		BIT(MV88E6XXX_CAP_PPU)
 #define MV88E6XXX_FLAG_PPU_ACTIVE	BIT(MV88E6XXX_CAP_PPU_ACTIVE)
 #define MV88E6XXX_FLAG_SMI_PHY		BIT(MV88E6XXX_CAP_SMI_PHY)
 #define MV88E6XXX_FLAG_STU		BIT(MV88E6XXX_CAP_STU)
-#define MV88E6XXX_FLAG_SWITCH_MAC	BIT(MV88E6XXX_CAP_SWITCH_MAC_WOL_WOF)
 #define MV88E6XXX_FLAG_TEMP		BIT(MV88E6XXX_CAP_TEMP)
 #define MV88E6XXX_FLAG_TEMP_LIMIT	BIT(MV88E6XXX_CAP_TEMP_LIMIT)
-#define MV88E6XXX_FLAG_VLANTABLE	BIT(MV88E6XXX_CAP_VLANTABLE)
 #define MV88E6XXX_FLAG_VTU		BIT(MV88E6XXX_CAP_VTU)
 
+/* EEPROM Programming via Global2 with 16-bit data */
+#define MV88E6XXX_FLAGS_EEPROM16	\
+	(MV88E6XXX_FLAG_G2_EEPROM_CMD |	\
+	 MV88E6XXX_FLAG_G2_EEPROM_DATA)
+
+/* Ingress Rate Limit unit */
+#define MV88E6XXX_FLAGS_IRL		\
+	(MV88E6XXX_FLAG_G2_IRL_CMD |	\
+	 MV88E6XXX_FLAG_G2_IRL_DATA)
+
+/* Cross-chip Port VLAN Table */
+#define MV88E6XXX_FLAGS_PVT		\
+	(MV88E6XXX_FLAG_G2_PVT_ADDR |	\
+	 MV88E6XXX_FLAG_G2_PVT_DATA)
+
 #define MV88E6XXX_FLAGS_FAMILY_6095	\
-	(MV88E6XXX_FLAG_ATU |		\
+	(MV88E6XXX_FLAG_GLOBAL2 |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
+	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
-	 MV88E6XXX_FLAG_VLANTABLE |	\
 	 MV88E6XXX_FLAG_VTU)
 
 #define MV88E6XXX_FLAGS_FAMILY_6097	\
-	(MV88E6XXX_FLAG_ATU |		\
+	(MV88E6XXX_FLAG_GLOBAL2 |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
+	 MV88E6XXX_FLAG_G2_POT |	\
+	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
 	 MV88E6XXX_FLAG_STU |		\
-	 MV88E6XXX_FLAG_VLANTABLE |	\
-	 MV88E6XXX_FLAG_VTU)
+	 MV88E6XXX_FLAG_VTU |		\
+	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6165	\
-	(MV88E6XXX_FLAG_STU |		\
-	 MV88E6XXX_FLAG_SWITCH_MAC |	\
+	(MV88E6XXX_FLAG_GLOBAL2 |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
+	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
+	 MV88E6XXX_FLAG_G2_POT |	\
+	 MV88E6XXX_FLAG_MULTI_CHIP |	\
+	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_TEMP |		\
-	 MV88E6XXX_FLAG_VTU)
+	 MV88E6XXX_FLAG_VTU |		\
+	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6185	\
-	(MV88E6XXX_FLAG_ATU |		\
+	(MV88E6XXX_FLAG_GLOBAL2 |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
+	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
-	 MV88E6XXX_FLAG_VLANTABLE |	\
 	 MV88E6XXX_FLAG_VTU)
 
 #define MV88E6XXX_FLAGS_FAMILY_6320	\
-	(MV88E6XXX_FLAG_ATU |		\
-	 MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_EEPROM |	\
-	 MV88E6XXX_FLAG_PORTSTATE |	\
+	(MV88E6XXX_FLAG_EEE |		\
+	 MV88E6XXX_FLAG_GLOBAL2 |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
+	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
+	 MV88E6XXX_FLAG_G2_POT |	\
+	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
 	 MV88E6XXX_FLAG_SMI_PHY |	\
-	 MV88E6XXX_FLAG_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_TEMP_LIMIT |	\
-	 MV88E6XXX_FLAG_VLANTABLE |	\
-	 MV88E6XXX_FLAG_VTU)
+	 MV88E6XXX_FLAG_VTU |		\
+	 MV88E6XXX_FLAGS_EEPROM16 |	\
+	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6351	\
-	(MV88E6XXX_FLAG_ATU |		\
-	 MV88E6XXX_FLAG_PORTSTATE |	\
+	(MV88E6XXX_FLAG_GLOBAL2 |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
+	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
+	 MV88E6XXX_FLAG_G2_POT |	\
+	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
 	 MV88E6XXX_FLAG_SMI_PHY |	\
 	 MV88E6XXX_FLAG_STU |		\
-	 MV88E6XXX_FLAG_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_TEMP |		\
-	 MV88E6XXX_FLAG_VLANTABLE |	\
-	 MV88E6XXX_FLAG_VTU)
+	 MV88E6XXX_FLAG_VTU |		\
+	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6352	\
-	(MV88E6XXX_FLAG_ATU |		\
-	 MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_EEPROM |	\
-	 MV88E6XXX_FLAG_PORTSTATE |	\
+	(MV88E6XXX_FLAG_EEE |		\
+	 MV88E6XXX_FLAG_GLOBAL2 |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
+	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
+	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
+	 MV88E6XXX_FLAG_G2_POT |	\
+	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
 	 MV88E6XXX_FLAG_SMI_PHY |	\
 	 MV88E6XXX_FLAG_STU |		\
-	 MV88E6XXX_FLAG_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_TEMP_LIMIT |	\
-	 MV88E6XXX_FLAG_VLANTABLE |	\
-	 MV88E6XXX_FLAG_VTU)
+	 MV88E6XXX_FLAG_VTU |		\
+	 MV88E6XXX_FLAGS_EEPROM16 |	\
+	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_PVT)
 
 struct mv88e6xxx_info {
 	enum mv88e6xxx_family family;
@@ -519,6 +571,8 @@ struct mv88e6xxx_info {
 	const char *name;
 	unsigned int num_databases;
 	unsigned int num_ports;
+	unsigned int port_base_addr;
+	unsigned int age_time_coeff;
 	unsigned long flags;
 };
 
@@ -541,11 +595,13 @@ struct mv88e6xxx_vtu_stu_entry {
 	u8	data[DSA_MAX_PORTS];
 };
 
+struct mv88e6xxx_ops;
+
 struct mv88e6xxx_priv_port {
 	struct net_device *bridge_dev;
 };
 
-struct mv88e6xxx_priv_state {
+struct mv88e6xxx_chip {
 	const struct mv88e6xxx_info *info;
 
 	/* The dsa_switch this private structure is related to */
@@ -554,15 +610,13 @@ struct mv88e6xxx_priv_state {
 	/* The device this structure is associated to */
 	struct device *dev;
 
-	/* When using multi-chip addressing, this mutex protects
-	 * access to the indirect access registers.  (In single-chip
-	 * mode, this mutex is effectively useless.)
-	 */
-	struct mutex	smi_mutex;
+	/* This mutex protects the access to the switch registers */
+	struct mutex reg_lock;
 
 	/* The MII bus and the address on the bus that is used to
 	 * communication with the switch
 	 */
+	const struct mv88e6xxx_ops *smi_ops;
 	struct mii_bus *bus;
 	int sw_addr;
 
@@ -579,17 +633,6 @@ struct mv88e6xxx_priv_state {
 	 */
 	struct mutex	stats_mutex;
 
-	/* This mutex serializes phy access for chips with
-	 * indirect phy addressing. It is unused for chips
-	 * with direct phy access.
-	 */
-	struct mutex	phy_mutex;
-
-	/* This mutex serializes eeprom access for chips with
-	 * eeprom support.
-	 */
-	struct mutex eeprom_mutex;
-
 	struct mv88e6xxx_priv_port	ports[DSA_MAX_PORTS];
 
 	/* A switch may have a GPIO line tied to its reset pin. Parse
@@ -600,6 +643,17 @@ struct mv88e6xxx_priv_state {
 
 	/* set to size of eeprom if supported by the switch */
 	int		eeprom_len;
+
+	/* Device node for the MDIO bus */
+	struct device_node *mdio_np;
+
+	/* And the MDIO bus itself */
+	struct mii_bus *mdio_bus;
+};
+
+struct mv88e6xxx_ops {
+	int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
+	int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 };
 
 enum stat_type {
@@ -615,10 +669,10 @@ struct mv88e6xxx_hw_stat {
 	enum stat_type type;
 };
 
-static inline bool mv88e6xxx_has(struct mv88e6xxx_priv_state *ps,
+static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip,
 				 unsigned long flags)
 {
-	return (ps->info->flags & flags) == flags;
+	return (chip->info->flags & flags) == flags;
 }
 
 #endif
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index c89b9aeeceb6..5698f5354c0b 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -84,7 +84,6 @@ static u32 ax_msg_enable;
 struct ax_device {
 	struct mii_bus *mii_bus;
 	struct mdiobb_ctrl bb_ctrl;
-	struct phy_device *phy_dev;
 	void __iomem *addr_memr;
 	u8 reg_memr;
 	int link;
@@ -320,7 +319,7 @@ static void ax_block_output(struct net_device *dev, int count,
 static void ax_handle_link_change(struct net_device *dev)
 {
 	struct ax_device  *ax = to_ax_dev(dev);
-	struct phy_device *phy_dev = ax->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	int status_change = 0;
 
 	if (phy_dev->link && ((ax->speed != phy_dev->speed) ||
@@ -369,8 +368,6 @@ static int ax_mii_probe(struct net_device *dev)
 	phy_dev->supported &= PHY_BASIC_FEATURES;
 	phy_dev->advertising = phy_dev->supported;
 
-	ax->phy_dev = phy_dev;
-
 	netdev_info(dev, "PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n",
 		    phy_dev->drv->name, phydev_name(phy_dev), phy_dev->irq);
 
@@ -410,7 +407,7 @@ static int ax_open(struct net_device *dev)
 	ret = ax_mii_probe(dev);
 	if (ret)
 		goto failed_mii_probe;
-	phy_start(ax->phy_dev);
+	phy_start(dev->phydev);
 
 	ret = ax_ei_open(dev);
 	if (ret)
@@ -421,7 +418,7 @@ static int ax_open(struct net_device *dev)
 	return 0;
 
  failed_ax_ei_open:
-	phy_disconnect(ax->phy_dev);
+	phy_disconnect(dev->phydev);
  failed_mii_probe:
 	ax_phy_switch(dev, 0);
 	free_irq(dev->irq, dev);
@@ -442,7 +439,7 @@ static int ax_close(struct net_device *dev)
 
 	/* turn the phy off */
 	ax_phy_switch(dev, 0);
-	phy_disconnect(ax->phy_dev);
+	phy_disconnect(dev->phydev);
 
 	free_irq(dev->irq, dev);
 	return 0;
@@ -450,8 +447,7 @@ static int ax_close(struct net_device *dev)
 
 static int ax_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 {
-	struct ax_device *ax = to_ax_dev(dev);
-	struct phy_device *phy_dev = ax->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 
 	if (!netif_running(dev))
 		return -EINVAL;
@@ -474,28 +470,6 @@ static void ax_get_drvinfo(struct net_device *dev,
 	strlcpy(info->bus_info, pdev->name, sizeof(info->bus_info));
 }
 
-static int ax_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ax_device *ax = to_ax_dev(dev);
-	struct phy_device *phy_dev = ax->phy_dev;
-
-	if (!phy_dev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(phy_dev, cmd);
-}
-
-static int ax_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ax_device *ax = to_ax_dev(dev);
-	struct phy_device *phy_dev = ax->phy_dev;
-
-	if (!phy_dev)
-		return -ENODEV;
-
-	return phy_ethtool_sset(phy_dev, cmd);
-}
-
 static u32 ax_get_msglevel(struct net_device *dev)
 {
 	struct ei_device *ei_local = netdev_priv(dev);
@@ -512,12 +486,12 @@ static void ax_set_msglevel(struct net_device *dev, u32 v)
 
 static const struct ethtool_ops ax_ethtool_ops = {
 	.get_drvinfo		= ax_get_drvinfo,
-	.get_settings		= ax_get_settings,
-	.set_settings		= ax_set_settings,
 	.get_link		= ethtool_op_get_link,
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.get_msglevel		= ax_get_msglevel,
 	.set_msglevel		= ax_set_msglevel,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
 #ifdef CONFIG_AX88796_93CX6
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index 3d2245fdc283..38eaea18da23 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -310,7 +310,7 @@ static int bfin_mdiobus_write(struct mii_bus *bus, int phy_addr, int regnum,
 static void bfin_mac_adjust_link(struct net_device *dev)
 {
 	struct bfin_mac_local *lp = netdev_priv(dev);
-	struct phy_device *phydev = lp->phydev;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	int new_state = 0;
 
@@ -430,7 +430,6 @@ static int mii_probe(struct net_device *dev, int phy_mode)
 	lp->old_link = 0;
 	lp->old_speed = 0;
 	lp->old_duplex = -1;
-	lp->phydev = phydev;
 
 	phy_attached_print(phydev, "mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n",
 			   MDC_CLK, mdc_div, sclk / 1000000);
@@ -450,31 +449,6 @@ static irqreturn_t bfin_mac_wake_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int
-bfin_mac_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct bfin_mac_local *lp = netdev_priv(dev);
-
-	if (lp->phydev)
-		return phy_ethtool_gset(lp->phydev, cmd);
-
-	return -EINVAL;
-}
-
-static int
-bfin_mac_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct bfin_mac_local *lp = netdev_priv(dev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (lp->phydev)
-		return phy_ethtool_sset(lp->phydev, cmd);
-
-	return -EINVAL;
-}
-
 static void bfin_mac_ethtool_getdrvinfo(struct net_device *dev,
 					struct ethtool_drvinfo *info)
 {
@@ -552,8 +526,6 @@ static int bfin_mac_ethtool_get_ts_info(struct net_device *dev,
 #endif
 
 static const struct ethtool_ops bfin_mac_ethtool_ops = {
-	.get_settings = bfin_mac_ethtool_getsettings,
-	.set_settings = bfin_mac_ethtool_setsettings,
 	.get_link = ethtool_op_get_link,
 	.get_drvinfo = bfin_mac_ethtool_getdrvinfo,
 	.get_wol = bfin_mac_ethtool_getwol,
@@ -561,6 +533,8 @@ static const struct ethtool_ops bfin_mac_ethtool_ops = {
 #ifdef CONFIG_BFIN_MAC_USE_HWSTAMP
 	.get_ts_info = bfin_mac_ethtool_get_ts_info,
 #endif
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /**************************************************************************/
@@ -1427,7 +1401,7 @@ static void bfin_mac_timeout(struct net_device *dev)
 	if (netif_queue_stopped(dev))
 		netif_wake_queue(dev);
 
-	bfin_mac_enable(lp->phydev);
+	bfin_mac_enable(dev->phydev);
 
 	/* We can accept TX packets again */
 	netif_trans_update(dev); /* prevent tx timeout */
@@ -1491,8 +1465,6 @@ static void bfin_mac_set_multicast_list(struct net_device *dev)
 
 static int bfin_mac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
-	struct bfin_mac_local *lp = netdev_priv(netdev);
-
 	if (!netif_running(netdev))
 		return -EINVAL;
 
@@ -1502,8 +1474,8 @@ static int bfin_mac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 	case SIOCGHWTSTAMP:
 		return bfin_mac_hwtstamp_get(netdev, ifr);
 	default:
-		if (lp->phydev)
-			return phy_mii_ioctl(lp->phydev, ifr, cmd);
+		if (netdev->phydev)
+			return phy_mii_ioctl(netdev->phydev, ifr, cmd);
 		else
 			return -EOPNOTSUPP;
 	}
@@ -1547,12 +1519,12 @@ static int bfin_mac_open(struct net_device *dev)
 	if (ret)
 		return ret;
 
-	phy_start(lp->phydev);
+	phy_start(dev->phydev);
 	setup_system_regs(dev);
 	setup_mac_addr(dev->dev_addr);
 
 	bfin_mac_disable();
-	ret = bfin_mac_enable(lp->phydev);
+	ret = bfin_mac_enable(dev->phydev);
 	if (ret)
 		return ret;
 	pr_debug("hardware init finished\n");
@@ -1578,8 +1550,8 @@ static int bfin_mac_close(struct net_device *dev)
 	napi_disable(&lp->napi);
 	netif_carrier_off(dev);
 
-	phy_stop(lp->phydev);
-	phy_write(lp->phydev, MII_BMCR, BMCR_PDOWN);
+	phy_stop(dev->phydev);
+	phy_write(dev->phydev, MII_BMCR, BMCR_PDOWN);
 
 	/* clear everything */
 	bfin_mac_shutdown(dev);
diff --git a/drivers/net/ethernet/adi/bfin_mac.h b/drivers/net/ethernet/adi/bfin_mac.h
index d1217db70db4..8c3b56198e4b 100644
--- a/drivers/net/ethernet/adi/bfin_mac.h
+++ b/drivers/net/ethernet/adi/bfin_mac.h
@@ -92,7 +92,6 @@ struct bfin_mac_local {
 	int old_speed;
 	int old_duplex;
 
-	struct phy_device *phydev;
 	struct mii_bus *mii_bus;
 
 #if defined(CONFIG_BFIN_MAC_USE_HWSTAMP)
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 30defe6c81f2..c83ebae73d91 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -440,7 +440,6 @@ struct et131x_adapter {
 	struct net_device *netdev;
 	struct pci_dev *pdev;
 	struct mii_bus *mii_bus;
-	struct phy_device *phydev;
 	struct napi_struct napi;
 
 	/* Flags that indicate current state of the adapter */
@@ -864,7 +863,7 @@ static void et1310_config_mac_regs2(struct et131x_adapter *adapter)
 {
 	int32_t delay = 0;
 	struct mac_regs __iomem *mac = &adapter->regs->mac;
-	struct phy_device *phydev = adapter->phydev;
+	struct phy_device *phydev = adapter->netdev->phydev;
 	u32 cfg1;
 	u32 cfg2;
 	u32 ifctrl;
@@ -1035,7 +1034,7 @@ static void et1310_setup_device_for_unicast(struct et131x_adapter *adapter)
 static void et1310_config_rxmac_regs(struct et131x_adapter *adapter)
 {
 	struct rxmac_regs __iomem *rxmac = &adapter->regs->rxmac;
-	struct phy_device *phydev = adapter->phydev;
+	struct phy_device *phydev = adapter->netdev->phydev;
 	u32 sa_lo;
 	u32 sa_hi = 0;
 	u32 pf_ctrl = 0;
@@ -1230,7 +1229,7 @@ out:
 
 static int et131x_mii_read(struct et131x_adapter *adapter, u8 reg, u16 *value)
 {
-	struct phy_device *phydev = adapter->phydev;
+	struct phy_device *phydev = adapter->netdev->phydev;
 
 	if (!phydev)
 		return -EIO;
@@ -1311,7 +1310,7 @@ static void et1310_phy_read_mii_bit(struct et131x_adapter *adapter,
 
 static void et1310_config_flow_control(struct et131x_adapter *adapter)
 {
-	struct phy_device *phydev = adapter->phydev;
+	struct phy_device *phydev = adapter->netdev->phydev;
 
 	if (phydev->duplex == DUPLEX_HALF) {
 		adapter->flow = FLOW_NONE;
@@ -1456,7 +1455,7 @@ static int et131x_mdio_write(struct mii_bus *bus, int phy_addr,
 static void et1310_phy_power_switch(struct et131x_adapter *adapter, bool down)
 {
 	u16 data;
-	struct  phy_device *phydev = adapter->phydev;
+	struct  phy_device *phydev = adapter->netdev->phydev;
 
 	et131x_mii_read(adapter, MII_BMCR, &data);
 	data &= ~BMCR_PDOWN;
@@ -1469,7 +1468,7 @@ static void et1310_phy_power_switch(struct et131x_adapter *adapter, bool down)
 static void et131x_xcvr_init(struct et131x_adapter *adapter)
 {
 	u16 lcr2;
-	struct  phy_device *phydev = adapter->phydev;
+	struct  phy_device *phydev = adapter->netdev->phydev;
 
 	/* Set the LED behavior such that LED 1 indicates speed (off =
 	 * 10Mbits, blink = 100Mbits, on = 1000Mbits) and LED 2 indicates
@@ -2111,7 +2110,7 @@ static int et131x_init_recv(struct et131x_adapter *adapter)
 /* et131x_set_rx_dma_timer - Set the heartbeat timer according to line rate */
 static void et131x_set_rx_dma_timer(struct et131x_adapter *adapter)
 {
-	struct phy_device *phydev = adapter->phydev;
+	struct phy_device *phydev = adapter->netdev->phydev;
 
 	/* For version B silicon, we do not use the RxDMA timer for 10 and 100
 	 * Mbits/s line rates. We do not enable and RxDMA interrupt coalescing.
@@ -2426,7 +2425,7 @@ static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb)
 	struct sk_buff *skb = tcb->skb;
 	u32 nr_frags = skb_shinfo(skb)->nr_frags + 1;
 	struct skb_frag_struct *frags = &skb_shinfo(skb)->frags[0];
-	struct phy_device *phydev = adapter->phydev;
+	struct phy_device *phydev = adapter->netdev->phydev;
 	dma_addr_t dma_addr;
 	struct tx_ring *tx_ring = &adapter->tx_ring;
 
@@ -2791,22 +2790,6 @@ static void et131x_handle_send_pkts(struct et131x_adapter *adapter)
 	spin_unlock_irqrestore(&adapter->tcb_send_qlock, flags);
 }
 
-static int et131x_get_settings(struct net_device *netdev,
-			       struct ethtool_cmd *cmd)
-{
-	struct et131x_adapter *adapter = netdev_priv(netdev);
-
-	return phy_ethtool_gset(adapter->phydev, cmd);
-}
-
-static int et131x_set_settings(struct net_device *netdev,
-			       struct ethtool_cmd *cmd)
-{
-	struct et131x_adapter *adapter = netdev_priv(netdev);
-
-	return phy_ethtool_sset(adapter->phydev, cmd);
-}
-
 static int et131x_get_regs_len(struct net_device *netdev)
 {
 #define ET131X_REGS_LEN 256
@@ -2979,12 +2962,12 @@ static void et131x_get_drvinfo(struct net_device *netdev,
 }
 
 static struct ethtool_ops et131x_ethtool_ops = {
-	.get_settings	= et131x_get_settings,
-	.set_settings	= et131x_set_settings,
 	.get_drvinfo	= et131x_get_drvinfo,
 	.get_regs_len	= et131x_get_regs_len,
 	.get_regs	= et131x_get_regs,
 	.get_link	= ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /* et131x_hwaddr_init - set up the MAC Address */
@@ -3098,7 +3081,7 @@ err_out:
 static void et131x_error_timer_handler(unsigned long data)
 {
 	struct et131x_adapter *adapter = (struct et131x_adapter *)data;
-	struct phy_device *phydev = adapter->phydev;
+	struct phy_device *phydev = adapter->netdev->phydev;
 
 	if (et1310_in_phy_coma(adapter)) {
 		/* Bring the device immediately out of coma, to
@@ -3168,7 +3151,7 @@ static int et131x_adapter_memory_alloc(struct et131x_adapter *adapter)
 static void et131x_adjust_link(struct net_device *netdev)
 {
 	struct et131x_adapter *adapter = netdev_priv(netdev);
-	struct  phy_device *phydev = adapter->phydev;
+	struct  phy_device *phydev = netdev->phydev;
 
 	if (!phydev)
 		return;
@@ -3287,7 +3270,6 @@ static int et131x_mii_probe(struct net_device *netdev)
 
 	phydev->advertising = phydev->supported;
 	phydev->autoneg = AUTONEG_ENABLE;
-	adapter->phydev = phydev;
 
 	phy_attached_info(phydev);
 
@@ -3323,7 +3305,7 @@ static void et131x_pci_remove(struct pci_dev *pdev)
 
 	unregister_netdev(netdev);
 	netif_napi_del(&adapter->napi);
-	phy_disconnect(adapter->phydev);
+	phy_disconnect(netdev->phydev);
 	mdiobus_unregister(adapter->mii_bus);
 	mdiobus_free(adapter->mii_bus);
 
@@ -3338,20 +3320,16 @@ static void et131x_pci_remove(struct pci_dev *pdev)
 
 static void et131x_up(struct net_device *netdev)
 {
-	struct et131x_adapter *adapter = netdev_priv(netdev);
-
 	et131x_enable_txrx(netdev);
-	phy_start(adapter->phydev);
+	phy_start(netdev->phydev);
 }
 
 static void et131x_down(struct net_device *netdev)
 {
-	struct et131x_adapter *adapter = netdev_priv(netdev);
-
 	/* Save the timestamp for the TX watchdog, prevent a timeout */
 	netif_trans_update(netdev);
 
-	phy_stop(adapter->phydev);
+	phy_stop(netdev->phydev);
 	et131x_disable_txrx(netdev);
 }
 
@@ -3684,12 +3662,10 @@ static int et131x_close(struct net_device *netdev)
 static int et131x_ioctl(struct net_device *netdev, struct ifreq *reqbuf,
 			int cmd)
 {
-	struct et131x_adapter *adapter = netdev_priv(netdev);
-
-	if (!adapter->phydev)
+	if (!netdev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(adapter->phydev, reqbuf, cmd);
+	return phy_mii_ioctl(netdev->phydev, reqbuf, cmd);
 }
 
 /* et131x_set_packet_filter - Configures the Rx Packet filtering */
@@ -3851,7 +3827,7 @@ static void et131x_tx_timeout(struct net_device *netdev)
 	unsigned long flags;
 
 	/* If the device is closed, ignore the timeout */
-	if (~(adapter->flags & FMP_ADAPTER_INTERRUPT_IN_USE))
+	if (!(adapter->flags & FMP_ADAPTER_INTERRUPT_IN_USE))
 		return;
 
 	/* Any nonrecoverable hardware error?
@@ -4073,7 +4049,7 @@ out:
 	return rc;
 
 err_phy_disconnect:
-	phy_disconnect(adapter->phydev);
+	phy_disconnect(netdev->phydev);
 err_mdio_unregister:
 	mdiobus_unregister(adapter->mii_bus);
 err_mdio_free:
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index de2c4bf5fac4..6ffdff68bfc4 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -77,7 +77,6 @@ struct emac_board_info {
 
 	int			emacrx_completed_flag;
 
-	struct phy_device	*phy_dev;
 	struct device_node	*phy_node;
 	unsigned int		link;
 	unsigned int		speed;
@@ -115,7 +114,7 @@ static void emac_update_duplex(struct net_device *dev)
 static void emac_handle_link_change(struct net_device *dev)
 {
 	struct emac_board_info *db = netdev_priv(dev);
-	struct phy_device *phydev = db->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	int status_change = 0;
 
@@ -154,21 +153,22 @@ static void emac_handle_link_change(struct net_device *dev)
 static int emac_mdio_probe(struct net_device *dev)
 {
 	struct emac_board_info *db = netdev_priv(dev);
+	struct phy_device *phydev;
 
 	/* to-do: PHY interrupts are currently not supported */
 
 	/* attach the mac to the phy */
-	db->phy_dev = of_phy_connect(db->ndev, db->phy_node,
-				     &emac_handle_link_change, 0,
-				     db->phy_interface);
-	if (!db->phy_dev) {
+	phydev = of_phy_connect(db->ndev, db->phy_node,
+				&emac_handle_link_change, 0,
+				db->phy_interface);
+	if (!phydev) {
 		netdev_err(db->ndev, "could not find the PHY\n");
 		return -ENODEV;
 	}
 
 	/* mask with MAC supported features */
-	db->phy_dev->supported &= PHY_BASIC_FEATURES;
-	db->phy_dev->advertising = db->phy_dev->supported;
+	phydev->supported &= PHY_BASIC_FEATURES;
+	phydev->advertising = phydev->supported;
 
 	db->link = 0;
 	db->speed = 0;
@@ -179,10 +179,7 @@ static int emac_mdio_probe(struct net_device *dev)
 
 static void emac_mdio_remove(struct net_device *dev)
 {
-	struct emac_board_info *db = netdev_priv(dev);
-
-	phy_disconnect(db->phy_dev);
-	db->phy_dev = NULL;
+	phy_disconnect(dev->phydev);
 }
 
 static void emac_reset(struct emac_board_info *db)
@@ -208,8 +205,7 @@ static void emac_inblk_32bit(void __iomem *reg, void *data, int count)
 
 static int emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct emac_board_info *dm = netdev_priv(dev);
-	struct phy_device *phydev = dm->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 
 	if (!netif_running(dev))
 		return -EINVAL;
@@ -229,33 +225,11 @@ static void emac_get_drvinfo(struct net_device *dev,
 	strlcpy(info->bus_info, dev_name(&dev->dev), sizeof(info->bus_info));
 }
 
-static int emac_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct emac_board_info *dm = netdev_priv(dev);
-	struct phy_device *phydev = dm->phy_dev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(phydev, cmd);
-}
-
-static int emac_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct emac_board_info *dm = netdev_priv(dev);
-	struct phy_device *phydev = dm->phy_dev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_ethtool_sset(phydev, cmd);
-}
-
 static const struct ethtool_ops emac_ethtool_ops = {
 	.get_drvinfo	= emac_get_drvinfo,
-	.get_settings	= emac_get_settings,
-	.set_settings	= emac_set_settings,
 	.get_link	= ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static unsigned int emac_setup(struct net_device *ndev)
@@ -744,7 +718,7 @@ static int emac_open(struct net_device *dev)
 		return ret;
 	}
 
-	phy_start(db->phy_dev);
+	phy_start(dev->phydev);
 	netif_start_queue(dev);
 
 	return 0;
@@ -781,7 +755,7 @@ static int emac_stop(struct net_device *ndev)
 	netif_stop_queue(ndev);
 	netif_carrier_off(ndev);
 
-	phy_stop(db->phy_dev);
+	phy_stop(ndev->phydev);
 
 	emac_mdio_remove(ndev);
 
diff --git a/drivers/net/ethernet/altera/altera_tse.h b/drivers/net/ethernet/altera/altera_tse.h
index 103c30ddddf7..e0052003d16f 100644
--- a/drivers/net/ethernet/altera/altera_tse.h
+++ b/drivers/net/ethernet/altera/altera_tse.h
@@ -473,7 +473,6 @@ struct altera_tse_private {
 	int phy_addr;		/* PHY's MDIO address, -1 for autodetection */
 	phy_interface_t phy_iface;
 	struct mii_bus *mdio;
-	struct phy_device *phydev;
 	int oldspeed;
 	int oldduplex;
 	int oldlink;
diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c
index be72e1e64525..7c367713c3e6 100644
--- a/drivers/net/ethernet/altera/altera_tse_ethtool.c
+++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c
@@ -233,40 +233,18 @@ static void tse_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 		buf[i] = csrrd32(priv->mac_dev, i * 4);
 }
 
-static int tse_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct altera_tse_private *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
-
-	if (phydev == NULL)
-		return -ENODEV;
-
-	return phy_ethtool_gset(phydev, cmd);
-}
-
-static int tse_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct altera_tse_private *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
-
-	if (phydev == NULL)
-		return -ENODEV;
-
-	return phy_ethtool_sset(phydev, cmd);
-}
-
 static const struct ethtool_ops tse_ethtool_ops = {
 	.get_drvinfo = tse_get_drvinfo,
 	.get_regs_len = tse_reglen,
 	.get_regs = tse_get_regs,
 	.get_link = ethtool_op_get_link,
-	.get_settings = tse_get_settings,
-	.set_settings = tse_set_settings,
 	.get_strings = tse_gstrings,
 	.get_sset_count = tse_sset_count,
 	.get_ethtool_stats = tse_fill_stats,
 	.get_msglevel = tse_get_msglevel,
 	.set_msglevel = tse_set_msglevel,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 void altera_tse_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index f749e4d389eb..49025e99fb0e 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -625,7 +625,7 @@ out:
 static void altera_tse_adjust_link(struct net_device *dev)
 {
 	struct altera_tse_private *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	int new_state = 0;
 
 	/* only change config if there is a link */
@@ -845,7 +845,6 @@ static int init_phy(struct net_device *dev)
 	netdev_dbg(dev, "attached to PHY %d UID 0x%08x Link = %d\n",
 		   phydev->mdio.addr, phydev->phy_id, phydev->link);
 
-	priv->phydev = phydev;
 	return 0;
 }
 
@@ -1172,8 +1171,8 @@ static int tse_open(struct net_device *dev)
 
 	spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags);
 
-	if (priv->phydev)
-		phy_start(priv->phydev);
+	if (dev->phydev)
+		phy_start(dev->phydev);
 
 	napi_enable(&priv->napi);
 	netif_start_queue(dev);
@@ -1205,8 +1204,8 @@ static int tse_shutdown(struct net_device *dev)
 	unsigned long int flags;
 
 	/* Stop the PHY */
-	if (priv->phydev)
-		phy_stop(priv->phydev);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 
 	netif_stop_queue(dev);
 	napi_disable(&priv->napi);
@@ -1545,10 +1544,9 @@ err_free_netdev:
 static int altera_tse_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct altera_tse_private *priv = netdev_priv(ndev);
 
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
 
 	platform_set_drvdata(pdev, NULL);
 	altera_tse_mdio_destroy(ndev);
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index e0fb0f1122db..df664187cd82 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -412,13 +412,13 @@ static void
 au1000_adjust_link(struct net_device *dev)
 {
 	struct au1000_private *aup = netdev_priv(dev);
-	struct phy_device *phydev = aup->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	u32 reg;
 
 	int status_change = 0;
 
-	BUG_ON(!aup->phy_dev);
+	BUG_ON(!phydev);
 
 	spin_lock_irqsave(&aup->lock, flags);
 
@@ -509,8 +509,8 @@ static int au1000_mii_probe(struct net_device *dev)
 	 * on the current MAC's MII bus
 	 */
 	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
-		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
-			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
+		if (mdiobus_get_phy(aup->mii_bus, phy_addr)) {
+			phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
 			if (!aup->phy_search_highest_addr)
 				/* break out with first one found */
 				break;
@@ -579,7 +579,6 @@ static int au1000_mii_probe(struct net_device *dev)
 	aup->old_link = 0;
 	aup->old_speed = 0;
 	aup->old_duplex = -1;
-	aup->phy_dev = phydev;
 
 	phy_attached_info(phydev);
 
@@ -678,29 +677,6 @@ au1000_setup_hw_rings(struct au1000_private *aup, void __iomem *tx_base)
  * ethtool operations
  */
 
-static int au1000_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct au1000_private *aup = netdev_priv(dev);
-
-	if (aup->phy_dev)
-		return phy_ethtool_gset(aup->phy_dev, cmd);
-
-	return -EINVAL;
-}
-
-static int au1000_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct au1000_private *aup = netdev_priv(dev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (aup->phy_dev)
-		return phy_ethtool_sset(aup->phy_dev, cmd);
-
-	return -EINVAL;
-}
-
 static void
 au1000_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
@@ -725,12 +701,12 @@ static u32 au1000_get_msglevel(struct net_device *dev)
 }
 
 static const struct ethtool_ops au1000_ethtool_ops = {
-	.get_settings = au1000_get_settings,
-	.set_settings = au1000_set_settings,
 	.get_drvinfo = au1000_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_msglevel = au1000_get_msglevel,
 	.set_msglevel = au1000_set_msglevel,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 
@@ -778,8 +754,8 @@ static int au1000_init(struct net_device *dev)
 #ifndef CONFIG_CPU_LITTLE_ENDIAN
 	control |= MAC_BIG_ENDIAN;
 #endif
-	if (aup->phy_dev) {
-		if (aup->phy_dev->link && (DUPLEX_FULL == aup->phy_dev->duplex))
+	if (dev->phydev) {
+		if (dev->phydev->link && (DUPLEX_FULL == dev->phydev->duplex))
 			control |= MAC_FULL_DUPLEX;
 		else
 			control |= MAC_DISABLE_RX_OWN;
@@ -891,11 +867,10 @@ static int au1000_rx(struct net_device *dev)
 
 static void au1000_update_tx_stats(struct net_device *dev, u32 status)
 {
-	struct au1000_private *aup = netdev_priv(dev);
 	struct net_device_stats *ps = &dev->stats;
 
 	if (status & TX_FRAME_ABORTED) {
-		if (!aup->phy_dev || (DUPLEX_FULL == aup->phy_dev->duplex)) {
+		if (!dev->phydev || (DUPLEX_FULL == dev->phydev->duplex)) {
 			if (status & (TX_JAB_TIMEOUT | TX_UNDERRUN)) {
 				/* any other tx errors are only valid
 				 * in half duplex mode
@@ -975,10 +950,10 @@ static int au1000_open(struct net_device *dev)
 		return retval;
 	}
 
-	if (aup->phy_dev) {
+	if (dev->phydev) {
 		/* cause the PHY state machine to schedule a link state check */
-		aup->phy_dev->state = PHY_CHANGELINK;
-		phy_start(aup->phy_dev);
+		dev->phydev->state = PHY_CHANGELINK;
+		phy_start(dev->phydev);
 	}
 
 	netif_start_queue(dev);
@@ -995,8 +970,8 @@ static int au1000_close(struct net_device *dev)
 
 	netif_dbg(aup, drv, dev, "close: dev=%p\n", dev);
 
-	if (aup->phy_dev)
-		phy_stop(aup->phy_dev);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 
 	spin_lock_irqsave(&aup->lock, flags);
 
@@ -1110,15 +1085,13 @@ static void au1000_multicast_list(struct net_device *dev)
 
 static int au1000_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct au1000_private *aup = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!aup->phy_dev)
+	if (!dev->phydev)
 		return -EINVAL; /* PHY not controllable */
 
-	return phy_mii_ioctl(aup->phy_dev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 static const struct net_device_ops au1000_netdev_ops = {
diff --git a/drivers/net/ethernet/amd/au1000_eth.h b/drivers/net/ethernet/amd/au1000_eth.h
index ca53024f017f..4c47c2377d74 100644
--- a/drivers/net/ethernet/amd/au1000_eth.h
+++ b/drivers/net/ethernet/amd/au1000_eth.h
@@ -106,7 +106,6 @@ struct au1000_private {
 	int old_speed;
 	int old_duplex;
 
-	struct phy_device *phy_dev;
 	struct mii_bus *mii_bus;
 
 	/* PHY configuration */
diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig
index 19e38afbc5ee..300e3b5c54e0 100644
--- a/drivers/net/ethernet/apm/xgene/Kconfig
+++ b/drivers/net/ethernet/apm/xgene/Kconfig
@@ -3,6 +3,7 @@ config NET_XGENE
 	depends on HAS_DMA
 	depends on ARCH_XGENE || COMPILE_TEST
 	select PHYLIB
+	select MDIO_XGENE
 	help
 	  This is the Ethernet driver for the on-chip ethernet interface on the
 	  APM X-Gene SoC.
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
index 416d6ebfc2ce..22a7b26ca1d6 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
@@ -65,8 +65,15 @@ static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
 
 		return phy_ethtool_gset(phydev, cmd);
 	} else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
-		cmd->supported = SUPPORTED_1000baseT_Full |
-				 SUPPORTED_Autoneg | SUPPORTED_MII;
+		if (pdata->mdio_driver) {
+			if (!phydev)
+				return -ENODEV;
+
+			return phy_ethtool_gset(phydev, cmd);
+		}
+
+		cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
+				 SUPPORTED_MII;
 		cmd->advertising = cmd->supported;
 		ethtool_cmd_speed_set(cmd, SPEED_1000);
 		cmd->duplex = DUPLEX_FULL;
@@ -92,12 +99,21 @@ static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
 	struct phy_device *phydev = pdata->phy_dev;
 
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
-		if (phydev == NULL)
+		if (!phydev)
 			return -ENODEV;
 
 		return phy_ethtool_sset(phydev, cmd);
 	}
 
+	if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
+		if (pdata->mdio_driver) {
+			if (!phydev)
+				return -ENODEV;
+
+			return phy_ethtool_sset(phydev, cmd);
+		}
+	}
+
 	return -EINVAL;
 }
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 2f5638f7f864..7714b7d4026a 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -381,59 +381,6 @@ static void xgene_enet_rd_mcx_mac(struct xgene_enet_pdata *pdata,
 			   rd_addr);
 }
 
-static int xgene_mii_phy_write(struct xgene_enet_pdata *pdata, int phy_id,
-			       u32 reg, u16 data)
-{
-	u32 addr = 0, wr_data = 0;
-	u32 done;
-	u8 wait = 10;
-
-	PHY_ADDR_SET(&addr, phy_id);
-	REG_ADDR_SET(&addr, reg);
-	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_ADDRESS_ADDR, addr);
-
-	PHY_CONTROL_SET(&wr_data, data);
-	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONTROL_ADDR, wr_data);
-	do {
-		usleep_range(5, 10);
-		xgene_enet_rd_mcx_mac(pdata, MII_MGMT_INDICATORS_ADDR, &done);
-	} while ((done & BUSY_MASK) && wait--);
-
-	if (done & BUSY_MASK) {
-		netdev_err(pdata->ndev, "MII_MGMT write failed\n");
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-static int xgene_mii_phy_read(struct xgene_enet_pdata *pdata,
-			      u8 phy_id, u32 reg)
-{
-	u32 addr = 0;
-	u32 data, done;
-	u8 wait = 10;
-
-	PHY_ADDR_SET(&addr, phy_id);
-	REG_ADDR_SET(&addr, reg);
-	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_ADDRESS_ADDR, addr);
-	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_COMMAND_ADDR, READ_CYCLE_MASK);
-	do {
-		usleep_range(5, 10);
-		xgene_enet_rd_mcx_mac(pdata, MII_MGMT_INDICATORS_ADDR, &done);
-	} while ((done & BUSY_MASK) && wait--);
-
-	if (done & BUSY_MASK) {
-		netdev_err(pdata->ndev, "MII_MGMT read failed\n");
-		return -EBUSY;
-	}
-
-	xgene_enet_rd_mcx_mac(pdata, MII_MGMT_STATUS_ADDR, &data);
-	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_COMMAND_ADDR, 0);
-
-	return data;
-}
-
 static void xgene_gmac_set_mac_addr(struct xgene_enet_pdata *pdata)
 {
 	u32 addr0, addr1;
@@ -512,14 +459,11 @@ static void xgene_enet_configure_clock(struct xgene_enet_pdata *pdata)
 #endif
 }
 
-static void xgene_gmac_init(struct xgene_enet_pdata *pdata)
+static void xgene_gmac_set_speed(struct xgene_enet_pdata *pdata)
 {
 	struct device *dev = &pdata->pdev->dev;
-	u32 value, mc2;
-	u32 intf_ctl, rgmii;
-	u32 icm0, icm2;
-
-	xgene_gmac_reset(pdata);
+	u32 icm0, icm2, mc2;
+	u32 intf_ctl, rgmii, value;
 
 	xgene_enet_rd_mcx_csr(pdata, ICM_CONFIG0_REG_0_ADDR, &icm0);
 	xgene_enet_rd_mcx_csr(pdata, ICM_CONFIG2_REG_0_ADDR, &icm2);
@@ -564,7 +508,21 @@ static void xgene_gmac_init(struct xgene_enet_pdata *pdata)
 	mc2 |= FULL_DUPLEX2 | PAD_CRC;
 	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_2_ADDR, mc2);
 	xgene_enet_wr_mcx_mac(pdata, INTERFACE_CONTROL_ADDR, intf_ctl);
+	xgene_enet_wr_csr(pdata, RGMII_REG_0_ADDR, rgmii);
+	xgene_enet_configure_clock(pdata);
+
+	xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG0_REG_0_ADDR, icm0);
+	xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG2_REG_0_ADDR, icm2);
+}
+
+static void xgene_gmac_init(struct xgene_enet_pdata *pdata)
+{
+	u32 value;
 
+	if (!pdata->mdio_driver)
+		xgene_gmac_reset(pdata);
+
+	xgene_gmac_set_speed(pdata);
 	xgene_gmac_set_mac_addr(pdata);
 
 	/* Adjust MDC clock frequency */
@@ -579,15 +537,10 @@ static void xgene_gmac_init(struct xgene_enet_pdata *pdata)
 
 	/* Rtype should be copied from FP */
 	xgene_enet_wr_csr(pdata, RSIF_RAM_DBG_REG0_ADDR, 0);
-	xgene_enet_wr_csr(pdata, RGMII_REG_0_ADDR, rgmii);
-	xgene_enet_configure_clock(pdata);
 
 	/* Rx-Tx traffic resume */
 	xgene_enet_wr_csr(pdata, CFG_LINK_AGGR_RESUME_0_ADDR, TX_PORT0);
 
-	xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG0_REG_0_ADDR, icm0);
-	xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG2_REG_0_ADDR, icm2);
-
 	xgene_enet_rd_mcx_csr(pdata, RX_DV_GATE_REG_0_ADDR, &value);
 	value &= ~TX_DV_GATE_EN0;
 	value &= ~RX_DV_GATE_EN0;
@@ -671,92 +624,153 @@ bool xgene_ring_mgr_init(struct xgene_enet_pdata *p)
 
 static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
 {
-	u32 val;
+	struct device *dev = &pdata->pdev->dev;
 
 	if (!xgene_ring_mgr_init(pdata))
 		return -ENODEV;
 
-	if (!IS_ERR(pdata->clk)) {
+	if (pdata->mdio_driver) {
+		xgene_enet_config_ring_if_assoc(pdata);
+		return 0;
+	}
+
+	if (dev->of_node) {
 		clk_prepare_enable(pdata->clk);
+		udelay(5);
 		clk_disable_unprepare(pdata->clk);
+		udelay(5);
 		clk_prepare_enable(pdata->clk);
-		xgene_enet_ecc_init(pdata);
+		udelay(5);
+	} else {
+#ifdef CONFIG_ACPI
+		if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev), "_RST")) {
+			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
+					     "_RST", NULL, NULL);
+		} else if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev),
+					 "_INI")) {
+			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
+					     "_INI", NULL, NULL);
+		}
+#endif
 	}
-	xgene_enet_config_ring_if_assoc(pdata);
 
-	/* Enable auto-incr for scanning */
-	xgene_enet_rd_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, &val);
-	val |= SCAN_AUTO_INCR;
-	MGMT_CLOCK_SEL_SET(&val, 1);
-	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, val);
+	xgene_enet_ecc_init(pdata);
+	xgene_enet_config_ring_if_assoc(pdata);
 
 	return 0;
 }
 
-static void xgene_gport_shutdown(struct xgene_enet_pdata *pdata)
+static void xgene_enet_clear(struct xgene_enet_pdata *pdata,
+			     struct xgene_enet_desc_ring *ring)
 {
-	if (!IS_ERR(pdata->clk))
-		clk_disable_unprepare(pdata->clk);
-}
+	u32 addr, val, data;
 
-static int xgene_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
-{
-	struct xgene_enet_pdata *pdata = bus->priv;
-	u32 val;
+	val = xgene_enet_ring_bufnum(ring->id);
 
-	val = xgene_mii_phy_read(pdata, mii_id, regnum);
-	netdev_dbg(pdata->ndev, "mdio_rd: bus=%d reg=%d val=%x\n",
-		   mii_id, regnum, val);
+	if (xgene_enet_is_bufpool(ring->id)) {
+		addr = ENET_CFGSSQMIFPRESET_ADDR;
+		data = BIT(val - 0x20);
+	} else {
+		addr = ENET_CFGSSQMIWQRESET_ADDR;
+		data = BIT(val);
+	}
 
-	return val;
+	xgene_enet_wr_ring_if(pdata, addr, data);
 }
 
-static int xgene_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
-				 u16 val)
+static void xgene_gport_shutdown(struct xgene_enet_pdata *pdata)
 {
-	struct xgene_enet_pdata *pdata = bus->priv;
+	struct device *dev = &pdata->pdev->dev;
+	struct xgene_enet_desc_ring *ring;
+	u32 pb, val;
+	int i;
+
+	pb = 0;
+	for (i = 0; i < pdata->rxq_cnt; i++) {
+		ring = pdata->rx_ring[i]->buf_pool;
+
+		val = xgene_enet_ring_bufnum(ring->id);
+		pb |= BIT(val - 0x20);
+	}
+	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIFPRESET_ADDR, pb);
+
+	pb = 0;
+	for (i = 0; i < pdata->txq_cnt; i++) {
+		ring = pdata->tx_ring[i];
+
+		val = xgene_enet_ring_bufnum(ring->id);
+		pb |= BIT(val);
+	}
+	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIWQRESET_ADDR, pb);
 
-	netdev_dbg(pdata->ndev, "mdio_wr: bus=%d reg=%d val=%x\n",
-		   mii_id, regnum, val);
-	return xgene_mii_phy_write(pdata, mii_id, regnum, val);
+	if (dev->of_node) {
+		if (!IS_ERR(pdata->clk))
+			clk_disable_unprepare(pdata->clk);
+	}
 }
 
 static void xgene_enet_adjust_link(struct net_device *ndev)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	const struct xgene_mac_ops *mac_ops = pdata->mac_ops;
 	struct phy_device *phydev = pdata->phy_dev;
 
 	if (phydev->link) {
 		if (pdata->phy_speed != phydev->speed) {
 			pdata->phy_speed = phydev->speed;
-			xgene_gmac_init(pdata);
-			xgene_gmac_rx_enable(pdata);
-			xgene_gmac_tx_enable(pdata);
+			mac_ops->set_speed(pdata);
+			mac_ops->rx_enable(pdata);
+			mac_ops->tx_enable(pdata);
 			phy_print_status(phydev);
 		}
 	} else {
-		xgene_gmac_rx_disable(pdata);
-		xgene_gmac_tx_disable(pdata);
+		mac_ops->rx_disable(pdata);
+		mac_ops->tx_disable(pdata);
 		pdata->phy_speed = SPEED_UNKNOWN;
 		phy_print_status(phydev);
 	}
 }
 
-static int xgene_enet_phy_connect(struct net_device *ndev)
+#ifdef CONFIG_ACPI
+static struct acpi_device *acpi_phy_find_device(struct device *dev)
+{
+	struct acpi_reference_args args;
+	struct fwnode_handle *fw_node;
+	int status;
+
+	fw_node = acpi_fwnode_handle(ACPI_COMPANION(dev));
+	status = acpi_node_get_property_reference(fw_node, "phy-handle", 0,
+						  &args);
+	if (ACPI_FAILURE(status)) {
+		dev_dbg(dev, "No matching phy in ACPI table\n");
+		return NULL;
+	}
+
+	return args.adev;
+}
+#endif
+
+int xgene_enet_phy_connect(struct net_device *ndev)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
-	struct device_node *phy_np;
+	struct device_node *np;
 	struct phy_device *phy_dev;
 	struct device *dev = &pdata->pdev->dev;
+	int i;
 
 	if (dev->of_node) {
-		phy_np = of_parse_phandle(dev->of_node, "phy-handle", 0);
-		if (!phy_np) {
+		for (i = 0 ; i < 2; i++) {
+			np = of_parse_phandle(dev->of_node, "phy-handle", i);
+			if (np)
+				break;
+		}
+
+		if (!np) {
 			netdev_dbg(ndev, "No phy-handle found in DT\n");
 			return -ENODEV;
 		}
 
-		phy_dev = of_phy_connect(ndev, phy_np, &xgene_enet_adjust_link,
+		phy_dev = of_phy_connect(ndev, np, &xgene_enet_adjust_link,
 					 0, pdata->phy_mode);
 		if (!phy_dev) {
 			netdev_err(ndev, "Could not connect to PHY\n");
@@ -765,6 +779,11 @@ static int xgene_enet_phy_connect(struct net_device *ndev)
 
 		pdata->phy_dev = phy_dev;
 	} else {
+#ifdef CONFIG_ACPI
+		struct acpi_device *adev = acpi_phy_find_device(dev);
+		if (adev)
+			pdata->phy_dev =  adev->driver_data;
+
 		phy_dev = pdata->phy_dev;
 
 		if (!phy_dev ||
@@ -773,6 +792,7 @@ static int xgene_enet_phy_connect(struct net_device *ndev)
 			netdev_err(ndev, "Could not connect to PHY\n");
 			return  -ENODEV;
 		}
+#endif
 	}
 
 	pdata->phy_speed = SPEED_UNKNOWN;
@@ -792,8 +812,8 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata,
 	struct phy_device *phy;
 	struct device_node *child_np;
 	struct device_node *mdio_np = NULL;
+	u32 phy_addr;
 	int ret;
-	u32 phy_id;
 
 	if (dev->of_node) {
 		for_each_child_of_node(dev->of_node, child_np) {
@@ -820,21 +840,17 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata,
 	if (ret)
 		return ret;
 
-	ret = device_property_read_u32(dev, "phy-channel", &phy_id);
+	ret = device_property_read_u32(dev, "phy-channel", &phy_addr);
 	if (ret)
-		ret = device_property_read_u32(dev, "phy-addr", &phy_id);
+		ret = device_property_read_u32(dev, "phy-addr", &phy_addr);
 	if (ret)
 		return -EINVAL;
 
-	phy = get_phy_device(mdio, phy_id, false);
-	if (IS_ERR(phy))
+	phy = xgene_enet_phy_register(mdio, phy_addr);
+	if (!phy)
 		return -EIO;
 
-	ret = phy_device_register(phy);
-	if (ret)
-		phy_device_free(phy);
-	else
-		pdata->phy_dev = phy;
+	pdata->phy_dev = phy;
 
 	return ret;
 }
@@ -850,13 +866,13 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata)
 		return -ENOMEM;
 
 	mdio_bus->name = "APM X-Gene MDIO bus";
-	mdio_bus->read = xgene_enet_mdio_read;
-	mdio_bus->write = xgene_enet_mdio_write;
+	mdio_bus->read = xgene_mdio_rgmii_read;
+	mdio_bus->write = xgene_mdio_rgmii_write;
 	snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s-%s", "xgene-mii",
 		 ndev->name);
 
-	mdio_bus->priv = pdata;
-	mdio_bus->parent = &ndev->dev;
+	mdio_bus->priv = (void __force *)pdata->mcx_mac_addr;
+	mdio_bus->parent = &pdata->pdev->dev;
 
 	ret = xgene_mdiobus_register(pdata, mdio_bus);
 	if (ret) {
@@ -873,6 +889,12 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata)
 	return ret;
 }
 
+void xgene_enet_phy_disconnect(struct xgene_enet_pdata *pdata)
+{
+	if (pdata->phy_dev)
+		phy_disconnect(pdata->phy_dev);
+}
+
 void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata)
 {
 	if (pdata->phy_dev)
@@ -890,11 +912,13 @@ const struct xgene_mac_ops xgene_gmac_ops = {
 	.tx_enable = xgene_gmac_tx_enable,
 	.rx_disable = xgene_gmac_rx_disable,
 	.tx_disable = xgene_gmac_tx_disable,
+	.set_speed = xgene_gmac_set_speed,
 	.set_mac_addr = xgene_gmac_set_mac_addr,
 };
 
 const struct xgene_port_ops xgene_gport_ops = {
 	.reset = xgene_enet_reset,
+	.clear = xgene_enet_clear,
 	.cle_bypass = xgene_enet_cle_bypass,
 	.shutdown = xgene_gport_shutdown,
 };
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index 45220be3122f..179a44dceb29 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
@@ -104,6 +104,8 @@ enum xgene_enet_rm {
 #define RECOMBBUF		BIT(27)
 
 #define MAC_OFFSET			0x30
+#define OFFSET_4			0x04
+#define OFFSET_8			0x08
 
 #define BLOCK_ETH_CSR_OFFSET		0x2000
 #define BLOCK_ETH_CLE_CSR_OFFSET	0x6000
@@ -165,6 +167,8 @@ enum xgene_enet_rm {
 #define TX_DV_GATE_EN0			BIT(2)
 #define RX_DV_GATE_EN0			BIT(1)
 #define RESUME_RX0			BIT(0)
+#define ENET_CFGSSQMIFPRESET_ADDR		0x14
+#define ENET_CFGSSQMIWQRESET_ADDR		0x1c
 #define ENET_CFGSSQMIWQASSOC_ADDR		0xe0
 #define ENET_CFGSSQMIFPQASSOC_ADDR		0xdc
 #define ENET_CFGSSQMIQMLITEFPQASSOC_ADDR	0xf0
@@ -297,11 +301,6 @@ enum xgene_enet_ring_bufnum {
 	RING_BUFNUM_INVALID
 };
 
-enum xgene_enet_cmd {
-	XGENE_ENET_WR_CMD = BIT(31),
-	XGENE_ENET_RD_CMD = BIT(30)
-};
-
 enum xgene_enet_err_code {
 	HBF_READ_DATA = 3,
 	HBF_LL_READ = 4,
@@ -347,6 +346,8 @@ void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring,
 int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata);
 void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata);
 bool xgene_ring_mgr_init(struct xgene_enet_pdata *p);
+int xgene_enet_phy_connect(struct net_device *ndev);
+void xgene_enet_phy_disconnect(struct xgene_enet_pdata *pdata);
 
 extern const struct xgene_mac_ops xgene_gmac_ops;
 extern const struct xgene_port_ops xgene_gport_ops;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index d208b172f4d7..d1d6b5eeb613 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -102,25 +102,13 @@ static u8 xgene_enet_hdr_len(const void *data)
 
 static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool)
 {
-	struct xgene_enet_pdata *pdata = netdev_priv(buf_pool->ndev);
-	struct xgene_enet_raw_desc16 *raw_desc;
-	u32 slots = buf_pool->slots - 1;
-	u32 tail = buf_pool->tail;
-	u32 userinfo;
-	int i, len;
-
-	len = pdata->ring_ops->len(buf_pool);
-	for (i = 0; i < len; i++) {
-		tail = (tail - 1) & slots;
-		raw_desc = &buf_pool->raw_desc16[tail];
+	int i;
 
-		/* Hardware stores descriptor in little endian format */
-		userinfo = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0));
-		dev_kfree_skb_any(buf_pool->rx_skb[userinfo]);
+	/* Free up the buffers held by hardware */
+	for (i = 0; i < buf_pool->slots; i++) {
+		if (buf_pool->rx_skb[i])
+			dev_kfree_skb_any(buf_pool->rx_skb[i]);
 	}
-
-	pdata->ring_ops->wr_cmd(buf_pool, -len);
-	buf_pool->tail = tail;
 }
 
 static irqreturn_t xgene_enet_rx_irq(const int irq, void *data)
@@ -481,6 +469,7 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
 			 XGENE_ENET_MAX_MTU, DMA_FROM_DEVICE);
 	skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0));
 	skb = buf_pool->rx_skb[skb_index];
+	buf_pool->rx_skb[skb_index] = NULL;
 
 	/* checking for error */
 	status = (GET_VAL(ELERR, le64_to_cpu(raw_desc->m0)) << LERR_LEN) ||
@@ -619,6 +608,30 @@ static void xgene_enet_timeout(struct net_device *ndev)
 	}
 }
 
+static void xgene_enet_set_irq_name(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct xgene_enet_desc_ring *ring;
+	int i;
+
+	for (i = 0; i < pdata->rxq_cnt; i++) {
+		ring = pdata->rx_ring[i];
+		if (!pdata->cq_cnt) {
+			snprintf(ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc",
+				 ndev->name);
+		} else {
+			snprintf(ring->irq_name, IRQ_ID_SIZE, "%s-rx-%d",
+				 ndev->name, i);
+		}
+	}
+
+	for (i = 0; i < pdata->cq_cnt; i++) {
+		ring = pdata->tx_ring[i]->cp_ring;
+		snprintf(ring->irq_name, IRQ_ID_SIZE, "%s-txc-%d",
+			 ndev->name, i);
+	}
+}
+
 static int xgene_enet_register_irq(struct net_device *ndev)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
@@ -626,6 +639,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 	struct xgene_enet_desc_ring *ring;
 	int ret = 0, i;
 
+	xgene_enet_set_irq_name(ndev);
 	for (i = 0; i < pdata->rxq_cnt; i++) {
 		ring = pdata->rx_ring[i];
 		irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
@@ -720,20 +734,21 @@ static int xgene_enet_open(struct net_device *ndev)
 	if (ret)
 		return ret;
 
-	mac_ops->tx_enable(pdata);
-	mac_ops->rx_enable(pdata);
-
 	xgene_enet_napi_enable(pdata);
 	ret = xgene_enet_register_irq(ndev);
 	if (ret)
 		return ret;
 
-	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+	if (pdata->phy_dev) {
 		phy_start(pdata->phy_dev);
-	else
+	} else {
 		schedule_delayed_work(&pdata->link_work, PHY_POLL_LINK_OFF);
+		netif_carrier_off(ndev);
+	}
 
-	netif_start_queue(ndev);
+	mac_ops->tx_enable(pdata);
+	mac_ops->rx_enable(pdata);
+	netif_tx_start_all_queues(ndev);
 
 	return ret;
 }
@@ -744,16 +759,15 @@ static int xgene_enet_close(struct net_device *ndev)
 	const struct xgene_mac_ops *mac_ops = pdata->mac_ops;
 	int i;
 
-	netif_stop_queue(ndev);
+	netif_tx_stop_all_queues(ndev);
+	mac_ops->tx_disable(pdata);
+	mac_ops->rx_disable(pdata);
 
-	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+	if (pdata->phy_dev)
 		phy_stop(pdata->phy_dev);
 	else
 		cancel_delayed_work_sync(&pdata->link_work);
 
-	mac_ops->tx_disable(pdata);
-	mac_ops->rx_disable(pdata);
-
 	xgene_enet_free_irq(ndev);
 	xgene_enet_napi_disable(pdata);
 	for (i = 0; i < pdata->rxq_cnt; i++)
@@ -761,7 +775,6 @@ static int xgene_enet_close(struct net_device *ndev)
 
 	return 0;
 }
-
 static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring)
 {
 	struct xgene_enet_pdata *pdata;
@@ -771,7 +784,7 @@ static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring)
 	dev = ndev_to_dev(ring->ndev);
 
 	pdata->ring_ops->clear(ring);
-	dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
+	dmam_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
 }
 
 static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata)
@@ -784,6 +797,9 @@ static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata)
 		ring = pdata->tx_ring[i];
 		if (ring) {
 			xgene_enet_delete_ring(ring);
+			pdata->port_ops->clear(pdata, ring);
+			if (pdata->cq_cnt)
+				xgene_enet_delete_ring(ring->cp_ring);
 			pdata->tx_ring[i] = NULL;
 		}
 	}
@@ -794,6 +810,7 @@ static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata)
 			buf_pool = ring->buf_pool;
 			xgene_enet_delete_bufpool(buf_pool);
 			xgene_enet_delete_ring(buf_pool);
+			pdata->port_ops->clear(pdata, buf_pool);
 			xgene_enet_delete_ring(ring);
 			pdata->rx_ring[i] = NULL;
 		}
@@ -842,7 +859,7 @@ static void xgene_enet_free_desc_ring(struct xgene_enet_desc_ring *ring)
 
 	if (ring->desc_addr) {
 		pdata->ring_ops->clear(ring);
-		dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
+		dmam_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
 	}
 	devm_kfree(dev, ring);
 }
@@ -900,9 +917,10 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
 			struct net_device *ndev, u32 ring_num,
 			enum xgene_enet_ring_cfgsize cfgsize, u32 ring_id)
 {
-	struct xgene_enet_desc_ring *ring;
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
 	struct device *dev = ndev_to_dev(ndev);
+	struct xgene_enet_desc_ring *ring;
+	void *irq_mbox_addr;
 	int size;
 
 	size = xgene_enet_get_ring_size(dev, cfgsize);
@@ -919,8 +937,8 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
 	ring->cfgsize = cfgsize;
 	ring->id = ring_id;
 
-	ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma,
-					      GFP_KERNEL);
+	ring->desc_addr = dmam_alloc_coherent(dev, size, &ring->dma,
+					      GFP_KERNEL | __GFP_ZERO);
 	if (!ring->desc_addr) {
 		devm_kfree(dev, ring);
 		return NULL;
@@ -928,14 +946,16 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
 	ring->size = size;
 
 	if (is_irq_mbox_required(pdata, ring)) {
-		ring->irq_mbox_addr = dma_zalloc_coherent(dev, INTR_MBOX_SIZE,
-				&ring->irq_mbox_dma, GFP_KERNEL);
-		if (!ring->irq_mbox_addr) {
-			dma_free_coherent(dev, size, ring->desc_addr,
-					  ring->dma);
+		irq_mbox_addr = dmam_alloc_coherent(dev, INTR_MBOX_SIZE,
+						    &ring->irq_mbox_dma,
+						    GFP_KERNEL | __GFP_ZERO);
+		if (!irq_mbox_addr) {
+			dmam_free_coherent(dev, size, ring->desc_addr,
+					   ring->dma);
 			devm_kfree(dev, ring);
 			return NULL;
 		}
+		ring->irq_mbox_addr = irq_mbox_addr;
 	}
 
 	ring->cmd_base = xgene_enet_ring_cmd_base(pdata, ring);
@@ -996,6 +1016,7 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
 	u8 eth_bufnum = pdata->eth_bufnum;
 	u8 bp_bufnum = pdata->bp_bufnum;
 	u16 ring_num = pdata->ring_num;
+	__le64 *exp_bufs;
 	u16 ring_id;
 	int i, ret, size;
 
@@ -1027,13 +1048,6 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
 		rx_ring->nbufpool = NUM_BUFPOOL;
 		rx_ring->buf_pool = buf_pool;
 		rx_ring->irq = pdata->irqs[i];
-		if (!pdata->cq_cnt) {
-			snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc",
-				 ndev->name);
-		} else {
-			snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx%d",
-				 ndev->name, i);
-		}
 		buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots,
 						sizeof(struct sk_buff *),
 						GFP_KERNEL);
@@ -1060,13 +1074,13 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
 		}
 
 		size = (tx_ring->slots / 2) * sizeof(__le64) * MAX_EXP_BUFFS;
-		tx_ring->exp_bufs = dma_zalloc_coherent(dev, size,
-							&dma_exp_bufs,
-							GFP_KERNEL);
-		if (!tx_ring->exp_bufs) {
+		exp_bufs = dmam_alloc_coherent(dev, size, &dma_exp_bufs,
+					       GFP_KERNEL | __GFP_ZERO);
+		if (!exp_bufs) {
 			ret = -ENOMEM;
 			goto err;
 		}
+		tx_ring->exp_bufs = exp_bufs;
 
 		pdata->tx_ring[i] = tx_ring;
 
@@ -1086,8 +1100,6 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
 
 			cp_ring->irq = pdata->irqs[pdata->rxq_cnt + i];
 			cp_ring->index = i;
-			snprintf(cp_ring->irq_name, IRQ_ID_SIZE, "%s-txc%d",
-				 ndev->name, i);
 		}
 
 		cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots,
@@ -1283,6 +1295,23 @@ static int xgene_enet_get_irqs(struct xgene_enet_pdata *pdata)
 	return 0;
 }
 
+static int xgene_enet_check_phy_handle(struct xgene_enet_pdata *pdata)
+{
+	int ret;
+
+	if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII)
+		return 0;
+
+	if (!IS_ENABLED(CONFIG_MDIO_XGENE))
+		return 0;
+
+	ret = xgene_enet_phy_connect(pdata->ndev);
+	if (!ret)
+		pdata->mdio_driver = true;
+
+	return 0;
+}
+
 static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 {
 	struct platform_device *pdev;
@@ -1368,6 +1397,10 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 	if (ret)
 		return ret;
 
+	ret = xgene_enet_check_phy_handle(pdata);
+	if (ret)
+		return ret;
+
 	pdata->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pdata->clk)) {
 		/* Firmware may have set up the clock already. */
@@ -1447,6 +1480,7 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata)
 		pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id);
 	}
 
+	pdata->phy_speed = SPEED_UNKNOWN;
 	pdata->mac_ops->init(pdata);
 
 	return ret;
@@ -1556,28 +1590,12 @@ static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata)
 	}
 }
 
-static void xgene_enet_napi_del(struct xgene_enet_pdata *pdata)
-{
-	struct napi_struct *napi;
-	int i;
-
-	for (i = 0; i < pdata->rxq_cnt; i++) {
-		napi = &pdata->rx_ring[i]->napi;
-		netif_napi_del(napi);
-	}
-
-	for (i = 0; i < pdata->cq_cnt; i++) {
-		napi = &pdata->tx_ring[i]->cp_ring->napi;
-		netif_napi_del(napi);
-	}
-}
-
 static int xgene_enet_probe(struct platform_device *pdev)
 {
 	struct net_device *ndev;
 	struct xgene_enet_pdata *pdata;
 	struct device *dev = &pdev->dev;
-	const struct xgene_mac_ops *mac_ops;
+	void (*link_state)(struct work_struct *);
 	const struct of_device_id *of_id;
 	int ret;
 
@@ -1635,27 +1653,31 @@ static int xgene_enet_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = register_netdev(ndev);
-	if (ret) {
-		netdev_err(ndev, "Failed to register netdev\n");
-		goto err;
-	}
-
 	ret = xgene_enet_init_hw(pdata);
 	if (ret)
 		goto err_netdev;
 
-	mac_ops = pdata->mac_ops;
-	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
-		ret = xgene_enet_mdio_config(pdata);
-		if (ret)
-			goto err_netdev;
-	} else {
-		INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state);
+	link_state = pdata->mac_ops->link_state;
+	if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
+		INIT_DELAYED_WORK(&pdata->link_work, link_state);
+	} else if (!pdata->mdio_driver) {
+		if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+			ret = xgene_enet_mdio_config(pdata);
+		else
+			INIT_DELAYED_WORK(&pdata->link_work, link_state);
 	}
+	if (ret)
+		goto err;
 
 	xgene_enet_napi_add(pdata);
+	ret = register_netdev(ndev);
+	if (ret) {
+		netdev_err(ndev, "Failed to register netdev\n");
+		goto err;
+	}
+
 	return 0;
+
 err_netdev:
 	unregister_netdev(ndev);
 err:
@@ -1673,20 +1695,38 @@ static int xgene_enet_remove(struct platform_device *pdev)
 	mac_ops = pdata->mac_ops;
 	ndev = pdata->ndev;
 
-	mac_ops->rx_disable(pdata);
-	mac_ops->tx_disable(pdata);
+	rtnl_lock();
+	if (netif_running(ndev))
+		dev_close(ndev);
+	rtnl_unlock();
 
-	xgene_enet_napi_del(pdata);
-	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+	if (pdata->mdio_driver)
+		xgene_enet_phy_disconnect(pdata);
+	else if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
 		xgene_enet_mdio_remove(pdata);
+
 	unregister_netdev(ndev);
-	xgene_enet_delete_desc_rings(pdata);
 	pdata->port_ops->shutdown(pdata);
+	xgene_enet_delete_desc_rings(pdata);
 	free_netdev(ndev);
 
 	return 0;
 }
 
+static void xgene_enet_shutdown(struct platform_device *pdev)
+{
+	struct xgene_enet_pdata *pdata;
+
+	pdata = platform_get_drvdata(pdev);
+	if (!pdata)
+		return;
+
+	if (!pdata->ndev)
+		return;
+
+	xgene_enet_remove(pdev);
+}
+
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id xgene_enet_acpi_match[] = {
 	{ "APMC0D05", XGENE_ENET1},
@@ -1721,6 +1761,7 @@ static struct platform_driver xgene_enet_driver = {
 	},
 	.probe = xgene_enet_probe,
 	.remove = xgene_enet_remove,
+	.shutdown = xgene_enet_shutdown,
 };
 
 module_platform_driver(xgene_enet_driver);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index 092fbeccaa20..217546e5714a 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -38,6 +38,7 @@
 #include "xgene_enet_hw.h"
 #include "xgene_enet_cle.h"
 #include "xgene_enet_ring2.h"
+#include "../../../phy/mdio-xgene.h"
 
 #define XGENE_DRV_VERSION	"v1.0"
 #define XGENE_ENET_MAX_MTU	1536
@@ -140,6 +141,7 @@ struct xgene_mac_ops {
 	void (*rx_enable)(struct xgene_enet_pdata *pdata);
 	void (*tx_disable)(struct xgene_enet_pdata *pdata);
 	void (*rx_disable)(struct xgene_enet_pdata *pdata);
+	void (*set_speed)(struct xgene_enet_pdata *pdata);
 	void (*set_mac_addr)(struct xgene_enet_pdata *pdata);
 	void (*set_mss)(struct xgene_enet_pdata *pdata);
 	void (*link_state)(struct work_struct *work);
@@ -147,6 +149,8 @@ struct xgene_mac_ops {
 
 struct xgene_port_ops {
 	int (*reset)(struct xgene_enet_pdata *pdata);
+	void (*clear)(struct xgene_enet_pdata *pdata,
+		      struct xgene_enet_desc_ring *ring);
 	void (*cle_bypass)(struct xgene_enet_pdata *pdata,
 			   u32 dst_ring_num, u16 bufpool_id);
 	void (*shutdown)(struct xgene_enet_pdata *pdata);
@@ -211,6 +215,7 @@ struct xgene_enet_pdata {
 	u32 mss;
 	u8 tx_delay;
 	u8 rx_delay;
+	bool mdio_driver;
 };
 
 struct xgene_indirect_ctl {
@@ -220,34 +225,6 @@ struct xgene_indirect_ctl {
 	void __iomem *cmd_done;
 };
 
-/* Set the specified value into a bit-field defined by its starting position
- * and length within a single u64.
- */
-static inline u64 xgene_enet_set_field_value(int pos, int len, u64 val)
-{
-	return (val & ((1ULL << len) - 1)) << pos;
-}
-
-#define SET_VAL(field, val) \
-		xgene_enet_set_field_value(field ## _POS, field ## _LEN, val)
-
-#define SET_BIT(field) \
-		xgene_enet_set_field_value(field ## _POS, 1, 1)
-
-/* Get the value from a bit-field defined by its starting position
- * and length within the specified u64.
- */
-static inline u64 xgene_enet_get_field_value(int pos, int len, u64 src)
-{
-	return (src >> pos) & ((1ULL << len) - 1);
-}
-
-#define GET_VAL(field, src) \
-		xgene_enet_get_field_value(field ## _POS, field ## _LEN, src)
-
-#define GET_BIT(field, src) \
-		xgene_enet_get_field_value(field ## _POS, 1, src)
-
 static inline struct device *ndev_to_dev(struct net_device *ndev)
 {
 	return ndev->dev.parent;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index 78475512b683..d12e9cbae820 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
@@ -28,6 +28,12 @@ static void xgene_enet_wr_csr(struct xgene_enet_pdata *p, u32 offset, u32 val)
 	iowrite32(val, p->eth_csr_addr + offset);
 }
 
+static void xgene_enet_wr_clkrst_csr(struct xgene_enet_pdata *p, u32 offset,
+				     u32 val)
+{
+	iowrite32(val, p->base_addr + offset);
+}
+
 static void xgene_enet_wr_ring_if(struct xgene_enet_pdata *p,
 				  u32 offset, u32 val)
 {
@@ -93,6 +99,11 @@ static u32 xgene_enet_rd_diag_csr(struct xgene_enet_pdata *p, u32 offset)
 	return ioread32(p->eth_diag_csr_addr + offset);
 }
 
+static u32 xgene_enet_rd_mcx_csr(struct xgene_enet_pdata *p, u32 offset)
+{
+	return ioread32(p->mcx_mac_csr_addr + offset);
+}
+
 static u32 xgene_enet_rd_indirect(struct xgene_indirect_ctl *ctl, u32 rd_addr)
 {
 	u32 rd_data;
@@ -132,9 +143,17 @@ static u32 xgene_enet_rd_mac(struct xgene_enet_pdata *p, u32 rd_addr)
 static int xgene_enet_ecc_init(struct xgene_enet_pdata *p)
 {
 	struct net_device *ndev = p->ndev;
-	u32 data;
+	u32 data, shutdown;
 	int i = 0;
 
+	shutdown = xgene_enet_rd_diag_csr(p, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR);
+	data = xgene_enet_rd_diag_csr(p, ENET_BLOCK_MEM_RDY_ADDR);
+
+	if (!shutdown && data == ~0U) {
+		netdev_dbg(ndev, "+ ecc_init done, skipping\n");
+		return 0;
+	}
+
 	xgene_enet_wr_diag_csr(p, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0);
 	do {
 		usleep_range(100, 110);
@@ -230,21 +249,105 @@ static u32 xgene_enet_link_status(struct xgene_enet_pdata *p)
 	data = xgene_mii_phy_read(p, INT_PHY_ADDR,
 				  SGMII_BASE_PAGE_ABILITY_ADDR >> 2);
 
+	if (LINK_SPEED(data) == PHY_SPEED_1000)
+		p->phy_speed = SPEED_1000;
+	else if (LINK_SPEED(data) == PHY_SPEED_100)
+		p->phy_speed = SPEED_100;
+	else
+		p->phy_speed = SPEED_10;
+
 	return data & LINK_UP;
 }
 
-static void xgene_sgmac_init(struct xgene_enet_pdata *p)
+static void xgene_sgmii_configure(struct xgene_enet_pdata *p)
 {
-	u32 data, loop = 10;
-	u32 offset = p->port_id * 4;
-	u32 enet_spare_cfg_reg, rsif_config_reg;
-	u32 cfg_bypass_reg, rx_dv_gate_reg;
-
-	xgene_sgmac_reset(p);
+	xgene_mii_phy_write(p, INT_PHY_ADDR, SGMII_TBI_CONTROL_ADDR >> 2,
+			    0x8000);
+	xgene_mii_phy_write(p, INT_PHY_ADDR, SGMII_CONTROL_ADDR >> 2, 0x9000);
+	xgene_mii_phy_write(p, INT_PHY_ADDR, SGMII_TBI_CONTROL_ADDR >> 2, 0);
+}
 
-	/* Enable auto-negotiation */
-	xgene_mii_phy_write(p, INT_PHY_ADDR, SGMII_CONTROL_ADDR >> 2, 0x1000);
+static void xgene_sgmii_tbi_control_reset(struct xgene_enet_pdata *p)
+{
+	xgene_mii_phy_write(p, INT_PHY_ADDR, SGMII_TBI_CONTROL_ADDR >> 2,
+			    0x8000);
 	xgene_mii_phy_write(p, INT_PHY_ADDR, SGMII_TBI_CONTROL_ADDR >> 2, 0);
+}
+
+static void xgene_sgmii_reset(struct xgene_enet_pdata *p)
+{
+	u32 value;
+
+	if (p->phy_speed == SPEED_UNKNOWN)
+		return;
+
+	value = xgene_mii_phy_read(p, INT_PHY_ADDR,
+				   SGMII_BASE_PAGE_ABILITY_ADDR >> 2);
+	if (!(value & LINK_UP))
+		xgene_sgmii_tbi_control_reset(p);
+}
+
+static void xgene_sgmac_set_speed(struct xgene_enet_pdata *p)
+{
+	u32 icm0_addr, icm2_addr, debug_addr;
+	u32 icm0, icm2, intf_ctl;
+	u32 mc2, value;
+
+	xgene_sgmii_reset(p);
+
+	if (p->enet_id == XGENE_ENET1) {
+		icm0_addr = ICM_CONFIG0_REG_0_ADDR + p->port_id * OFFSET_8;
+		icm2_addr = ICM_CONFIG2_REG_0_ADDR + p->port_id * OFFSET_4;
+		debug_addr = DEBUG_REG_ADDR;
+	} else {
+		icm0_addr = XG_MCX_ICM_CONFIG0_REG_0_ADDR;
+		icm2_addr = XG_MCX_ICM_CONFIG2_REG_0_ADDR;
+		debug_addr = XG_DEBUG_REG_ADDR;
+	}
+
+	icm0 = xgene_enet_rd_mcx_csr(p, icm0_addr);
+	icm2 = xgene_enet_rd_mcx_csr(p, icm2_addr);
+	mc2 = xgene_enet_rd_mac(p, MAC_CONFIG_2_ADDR);
+	intf_ctl = xgene_enet_rd_mac(p, INTERFACE_CONTROL_ADDR);
+
+	switch (p->phy_speed) {
+	case SPEED_10:
+		ENET_INTERFACE_MODE2_SET(&mc2, 1);
+		intf_ctl &= ~(ENET_LHD_MODE | ENET_GHD_MODE);
+		CFG_MACMODE_SET(&icm0, 0);
+		CFG_WAITASYNCRD_SET(&icm2, 500);
+		break;
+	case SPEED_100:
+		ENET_INTERFACE_MODE2_SET(&mc2, 1);
+		intf_ctl &= ~ENET_GHD_MODE;
+		intf_ctl |= ENET_LHD_MODE;
+		CFG_MACMODE_SET(&icm0, 1);
+		CFG_WAITASYNCRD_SET(&icm2, 80);
+		break;
+	default:
+		ENET_INTERFACE_MODE2_SET(&mc2, 2);
+		intf_ctl &= ~ENET_LHD_MODE;
+		intf_ctl |= ENET_GHD_MODE;
+		CFG_MACMODE_SET(&icm0, 2);
+		CFG_WAITASYNCRD_SET(&icm2, 16);
+		value = xgene_enet_rd_csr(p, debug_addr);
+		value |= CFG_BYPASS_UNISEC_TX | CFG_BYPASS_UNISEC_RX;
+		xgene_enet_wr_csr(p, debug_addr, value);
+		break;
+	}
+
+	mc2 |= FULL_DUPLEX2 | PAD_CRC;
+	xgene_enet_wr_mac(p, MAC_CONFIG_2_ADDR, mc2);
+	xgene_enet_wr_mac(p, INTERFACE_CONTROL_ADDR, intf_ctl);
+	xgene_enet_wr_mcx_csr(p, icm0_addr, icm0);
+	xgene_enet_wr_mcx_csr(p, icm2_addr, icm2);
+}
+
+static void xgene_sgmii_enable_autoneg(struct xgene_enet_pdata *p)
+{
+	u32 data, loop = 10;
+
+	xgene_sgmii_configure(p);
 
 	while (loop--) {
 		data = xgene_mii_phy_read(p, INT_PHY_ADDR,
@@ -255,17 +358,27 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p)
 	}
 	if (!(data & AUTO_NEG_COMPLETE) || !(data & LINK_STATUS))
 		netdev_err(p->ndev, "Auto-negotiation failed\n");
+}
+
+static void xgene_sgmac_init(struct xgene_enet_pdata *p)
+{
+	u32 enet_spare_cfg_reg, rsif_config_reg;
+	u32 cfg_bypass_reg, rx_dv_gate_reg;
+	u32 data, offset;
 
-	data = xgene_enet_rd_mac(p, MAC_CONFIG_2_ADDR);
-	ENET_INTERFACE_MODE2_SET(&data, 2);
-	xgene_enet_wr_mac(p, MAC_CONFIG_2_ADDR, data | FULL_DUPLEX2);
-	xgene_enet_wr_mac(p, INTERFACE_CONTROL_ADDR, ENET_GHD_MODE);
+	if (!(p->enet_id == XGENE_ENET2 && p->mdio_driver))
+		xgene_sgmac_reset(p);
+
+	xgene_sgmii_enable_autoneg(p);
+	xgene_sgmac_set_speed(p);
+	xgene_sgmac_set_mac_addr(p);
 
 	if (p->enet_id == XGENE_ENET1) {
 		enet_spare_cfg_reg = ENET_SPARE_CFG_REG_ADDR;
 		rsif_config_reg = RSIF_CONFIG_REG_ADDR;
 		cfg_bypass_reg = CFG_BYPASS_ADDR;
-		rx_dv_gate_reg = SG_RX_DV_GATE_REG_0_ADDR;
+		offset = p->port_id * OFFSET_4;
+		rx_dv_gate_reg = SG_RX_DV_GATE_REG_0_ADDR + offset;
 	} else {
 		enet_spare_cfg_reg = XG_ENET_SPARE_CFG_REG_ADDR;
 		rsif_config_reg = XG_RSIF_CONFIG_REG_ADDR;
@@ -277,8 +390,6 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p)
 	data |= MPA_IDLE_WITH_QMI_EMPTY;
 	xgene_enet_wr_csr(p, enet_spare_cfg_reg, data);
 
-	xgene_sgmac_set_mac_addr(p);
-
 	/* Adjust MDC clock frequency */
 	data = xgene_enet_rd_mac(p, MII_MGMT_CONFIG_ADDR);
 	MGMT_CLOCK_SEL_SET(&data, 7);
@@ -292,7 +403,7 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p)
 	/* Bypass traffic gating */
 	xgene_enet_wr_csr(p, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x84);
 	xgene_enet_wr_csr(p, cfg_bypass_reg, RESUME_TX);
-	xgene_enet_wr_mcx_csr(p, rx_dv_gate_reg + offset, RESUME_RX0);
+	xgene_enet_wr_mcx_csr(p, rx_dv_gate_reg, RESUME_RX0);
 }
 
 static void xgene_sgmac_rxtx(struct xgene_enet_pdata *p, u32 bits, bool set)
@@ -331,17 +442,43 @@ static void xgene_sgmac_tx_disable(struct xgene_enet_pdata *p)
 
 static int xgene_enet_reset(struct xgene_enet_pdata *p)
 {
+	struct device *dev = &p->pdev->dev;
+
 	if (!xgene_ring_mgr_init(p))
 		return -ENODEV;
 
-	if (!IS_ERR(p->clk)) {
-		clk_prepare_enable(p->clk);
-		clk_disable_unprepare(p->clk);
-		clk_prepare_enable(p->clk);
+	if (p->mdio_driver && p->enet_id == XGENE_ENET2) {
+		xgene_enet_config_ring_if_assoc(p);
+		return 0;
 	}
 
-	xgene_enet_ecc_init(p);
-	xgene_enet_config_ring_if_assoc(p);
+	if (p->enet_id == XGENE_ENET2)
+		xgene_enet_wr_clkrst_csr(p, XGENET_CONFIG_REG_ADDR, SGMII_EN);
+
+	if (dev->of_node) {
+		if (!IS_ERR(p->clk)) {
+			clk_prepare_enable(p->clk);
+			udelay(5);
+			clk_disable_unprepare(p->clk);
+			udelay(5);
+			clk_prepare_enable(p->clk);
+			udelay(5);
+		}
+	} else {
+#ifdef CONFIG_ACPI
+		if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_RST"))
+			acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
+					     "_RST", NULL, NULL);
+		else if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_INI"))
+			acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
+					     "_INI", NULL, NULL);
+#endif
+	}
+
+	if (!p->port_id) {
+		xgene_enet_ecc_init(p);
+		xgene_enet_config_ring_if_assoc(p);
+	}
 
 	return 0;
 }
@@ -369,10 +506,53 @@ static void xgene_enet_cle_bypass(struct xgene_enet_pdata *p,
 	xgene_enet_wr_csr(p, cle_bypass_reg1 + offset, data);
 }
 
+static void xgene_enet_clear(struct xgene_enet_pdata *pdata,
+			     struct xgene_enet_desc_ring *ring)
+{
+	u32 addr, val, data;
+
+	val = xgene_enet_ring_bufnum(ring->id);
+
+	if (xgene_enet_is_bufpool(ring->id)) {
+		addr = ENET_CFGSSQMIFPRESET_ADDR;
+		data = BIT(val - 0x20);
+	} else {
+		addr = ENET_CFGSSQMIWQRESET_ADDR;
+		data = BIT(val);
+	}
+
+	xgene_enet_wr_ring_if(pdata, addr, data);
+}
+
 static void xgene_enet_shutdown(struct xgene_enet_pdata *p)
 {
-	if (!IS_ERR(p->clk))
-		clk_disable_unprepare(p->clk);
+	struct device *dev = &p->pdev->dev;
+	struct xgene_enet_desc_ring *ring;
+	u32 pb, val;
+	int i;
+
+	pb = 0;
+	for (i = 0; i < p->rxq_cnt; i++) {
+		ring = p->rx_ring[i]->buf_pool;
+
+		val = xgene_enet_ring_bufnum(ring->id);
+		pb |= BIT(val - 0x20);
+	}
+	xgene_enet_wr_ring_if(p, ENET_CFGSSQMIFPRESET_ADDR, pb);
+
+	pb = 0;
+	for (i = 0; i < p->txq_cnt; i++) {
+		ring = p->tx_ring[i];
+
+		val = xgene_enet_ring_bufnum(ring->id);
+		pb |= BIT(val);
+	}
+	xgene_enet_wr_ring_if(p, ENET_CFGSSQMIWQRESET_ADDR, pb);
+
+	if (dev->of_node) {
+		if (!IS_ERR(p->clk))
+			clk_disable_unprepare(p->clk);
+	}
 }
 
 static void xgene_enet_link_state(struct work_struct *work)
@@ -386,10 +566,11 @@ static void xgene_enet_link_state(struct work_struct *work)
 	if (link) {
 		if (!netif_carrier_ok(ndev)) {
 			netif_carrier_on(ndev);
-			xgene_sgmac_init(p);
+			xgene_sgmac_set_speed(p);
 			xgene_sgmac_rx_enable(p);
 			xgene_sgmac_tx_enable(p);
-			netdev_info(ndev, "Link is Up - 1Gbps\n");
+			netdev_info(ndev, "Link is Up - %dMbps\n",
+				    p->phy_speed);
 		}
 		poll_interval = PHY_POLL_LINK_ON;
 	} else {
@@ -412,12 +593,14 @@ const struct xgene_mac_ops xgene_sgmac_ops = {
 	.tx_enable	= xgene_sgmac_tx_enable,
 	.rx_disable	= xgene_sgmac_rx_disable,
 	.tx_disable	= xgene_sgmac_tx_disable,
+	.set_speed	= xgene_sgmac_set_speed,
 	.set_mac_addr	= xgene_sgmac_set_mac_addr,
 	.link_state	= xgene_enet_link_state
 };
 
 const struct xgene_port_ops xgene_sgport_ops = {
 	.reset		= xgene_enet_reset,
+	.clear		= xgene_enet_clear,
 	.cle_bypass	= xgene_enet_cle_bypass,
 	.shutdown	= xgene_enet_shutdown
 };
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
index 002df5a6756e..3d0ba374491b 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
@@ -24,6 +24,7 @@
 #define PHY_ADDR(src)		(((src)<<8) & GENMASK(12, 8))
 #define REG_ADDR(src)		((src) & GENMASK(4, 0))
 #define PHY_CONTROL(src)	((src) & GENMASK(15, 0))
+#define LINK_SPEED(src)		(((src) & GENMASK(11, 10)) >> 10)
 #define INT_PHY_ADDR			0x1e
 #define SGMII_TBI_CONTROL_ADDR		0x44
 #define SGMII_CONTROL_ADDR		0x00
@@ -34,6 +35,13 @@
 #define LINK_UP				BIT(15)
 #define MPA_IDLE_WITH_QMI_EMPTY		BIT(12)
 #define SG_RX_DV_GATE_REG_0_ADDR	0x05fc
+#define SGMII_EN			0x1
+
+enum xgene_phy_speed {
+	PHY_SPEED_10,
+	PHY_SPEED_100,
+	PHY_SPEED_1000
+};
 
 extern const struct xgene_mac_ops xgene_sgmac_ops;
 extern const struct xgene_port_ops xgene_sgport_ops;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
index ba030dc1940b..9c6ad0dce00f 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
@@ -258,13 +258,29 @@ static void xgene_xgmac_tx_disable(struct xgene_enet_pdata *pdata)
 
 static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
 {
+	struct device *dev = &pdata->pdev->dev;
+
 	if (!xgene_ring_mgr_init(pdata))
 		return -ENODEV;
 
-	if (!IS_ERR(pdata->clk)) {
+	if (dev->of_node) {
 		clk_prepare_enable(pdata->clk);
+		udelay(5);
 		clk_disable_unprepare(pdata->clk);
+		udelay(5);
 		clk_prepare_enable(pdata->clk);
+		udelay(5);
+	} else {
+#ifdef CONFIG_ACPI
+		if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev), "_RST")) {
+			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
+					     "_RST", NULL, NULL);
+		} else if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev),
+					   "_INI")) {
+			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
+					     "_INI", NULL, NULL);
+		}
+#endif
 	}
 
 	xgene_enet_ecc_init(pdata);
@@ -292,8 +308,51 @@ static void xgene_enet_xgcle_bypass(struct xgene_enet_pdata *pdata,
 
 static void xgene_enet_shutdown(struct xgene_enet_pdata *pdata)
 {
-	if (!IS_ERR(pdata->clk))
-		clk_disable_unprepare(pdata->clk);
+	struct device *dev = &pdata->pdev->dev;
+	struct xgene_enet_desc_ring *ring;
+	u32 pb, val;
+	int i;
+
+	pb = 0;
+	for (i = 0; i < pdata->rxq_cnt; i++) {
+		ring = pdata->rx_ring[i]->buf_pool;
+
+		val = xgene_enet_ring_bufnum(ring->id);
+		pb |= BIT(val - 0x20);
+	}
+	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIFPRESET_ADDR, pb);
+
+	pb = 0;
+	for (i = 0; i < pdata->txq_cnt; i++) {
+		ring = pdata->tx_ring[i];
+
+		val = xgene_enet_ring_bufnum(ring->id);
+		pb |= BIT(val);
+	}
+	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIWQRESET_ADDR, pb);
+
+	if (dev->of_node) {
+		if (!IS_ERR(pdata->clk))
+			clk_disable_unprepare(pdata->clk);
+	}
+}
+
+static void xgene_enet_clear(struct xgene_enet_pdata *pdata,
+			     struct xgene_enet_desc_ring *ring)
+{
+	u32 addr, val, data;
+
+	val = xgene_enet_ring_bufnum(ring->id);
+
+	if (xgene_enet_is_bufpool(ring->id)) {
+		addr = ENET_CFGSSQMIFPRESET_ADDR;
+		data = BIT(val - 0x20);
+	} else {
+		addr = ENET_CFGSSQMIWQRESET_ADDR;
+		data = BIT(val);
+	}
+
+	xgene_enet_wr_ring_if(pdata, addr, data);
 }
 
 static void xgene_enet_link_state(struct work_struct *work)
@@ -340,6 +399,7 @@ const struct xgene_mac_ops xgene_xgmac_ops = {
 
 const struct xgene_port_ops xgene_xgport_ops = {
 	.reset = xgene_enet_reset,
+	.clear = xgene_enet_clear,
 	.cle_bypass = xgene_enet_xgcle_bypass,
 	.shutdown = xgene_enet_shutdown,
 };
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
index 0a2dca8a1725..f1ea485f916b 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
@@ -65,9 +65,12 @@
 #define XG_CFG_LINK_AGGR_RESUME_0_ADDR	0x0214
 #define XG_LINK_STATUS_ADDR		0x0228
 #define XG_TSIF_MSS_REG0_ADDR		0x02a4
+#define XG_DEBUG_REG_ADDR		0x0400
 #define XG_ENET_SPARE_CFG_REG_ADDR	0x040c
 #define XG_ENET_SPARE_CFG_REG_1_ADDR	0x0410
 #define XGENET_RX_DV_GATE_REG_0_ADDR	0x0804
+#define XG_MCX_ICM_CONFIG0_REG_0_ADDR	0x00e0
+#define XG_MCX_ICM_CONFIG2_REG_0_ADDR	0x00e8
 
 extern const struct xgene_mac_ops xgene_xgmac_ops;
 extern const struct xgene_port_ops xgene_xgport_ops;
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
index ca562bc034c3..e4feb712d4f2 100644
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -134,7 +134,6 @@ struct arc_emac_priv {
 
 	/* Devices */
 	struct device *dev;
-	struct phy_device *phy_dev;
 	struct mii_bus *bus;
 	struct arc_emac_mdio_bus_data bus_data;
 
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index a3a9392a4954..586bedac457d 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -47,7 +47,7 @@ static inline int arc_emac_tx_avail(struct arc_emac_priv *priv)
 static void arc_emac_adjust_link(struct net_device *ndev)
 {
 	struct arc_emac_priv *priv = netdev_priv(ndev);
-	struct phy_device *phy_dev = priv->phy_dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	unsigned int reg, state_changed = 0;
 
 	if (priv->link != phy_dev->link) {
@@ -79,46 +79,6 @@ static void arc_emac_adjust_link(struct net_device *ndev)
 		phy_print_status(phy_dev);
 }
 
-/**
- * arc_emac_get_settings - Get PHY settings.
- * @ndev:	Pointer to net_device structure.
- * @cmd:	Pointer to ethtool_cmd structure.
- *
- * This implements ethtool command for getting PHY settings. If PHY could
- * not be found, the function returns -ENODEV. This function calls the
- * relevant PHY ethtool API to get the PHY settings.
- * Issue "ethtool ethX" under linux prompt to execute this function.
- */
-static int arc_emac_get_settings(struct net_device *ndev,
-				 struct ethtool_cmd *cmd)
-{
-	struct arc_emac_priv *priv = netdev_priv(ndev);
-
-	return phy_ethtool_gset(priv->phy_dev, cmd);
-}
-
-/**
- * arc_emac_set_settings - Set PHY settings as passed in the argument.
- * @ndev:	Pointer to net_device structure.
- * @cmd:	Pointer to ethtool_cmd structure.
- *
- * This implements ethtool command for setting various PHY settings. If PHY
- * could not be found, the function returns -ENODEV. This function calls the
- * relevant PHY ethtool API to set the PHY.
- * Issue e.g. "ethtool -s ethX speed 1000" under linux prompt to execute this
- * function.
- */
-static int arc_emac_set_settings(struct net_device *ndev,
-				 struct ethtool_cmd *cmd)
-{
-	struct arc_emac_priv *priv = netdev_priv(ndev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	return phy_ethtool_sset(priv->phy_dev, cmd);
-}
-
 /**
  * arc_emac_get_drvinfo - Get EMAC driver information.
  * @ndev:	Pointer to net_device structure.
@@ -137,10 +97,10 @@ static void arc_emac_get_drvinfo(struct net_device *ndev,
 }
 
 static const struct ethtool_ops arc_emac_ethtool_ops = {
-	.get_settings	= arc_emac_get_settings,
-	.set_settings	= arc_emac_set_settings,
 	.get_drvinfo	= arc_emac_get_drvinfo,
 	.get_link	= ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 #define FIRST_OR_LAST_MASK	(FIRST_MASK | LAST_MASK)
@@ -403,7 +363,7 @@ static void arc_emac_poll_controller(struct net_device *dev)
 static int arc_emac_open(struct net_device *ndev)
 {
 	struct arc_emac_priv *priv = netdev_priv(ndev);
-	struct phy_device *phy_dev = priv->phy_dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int i;
 
 	phy_dev->autoneg = AUTONEG_ENABLE;
@@ -474,7 +434,7 @@ static int arc_emac_open(struct net_device *ndev)
 	/* Enable EMAC */
 	arc_reg_or(priv, R_CTRL, EN_MASK);
 
-	phy_start_aneg(priv->phy_dev);
+	phy_start_aneg(ndev->phydev);
 
 	netif_start_queue(ndev);
 
@@ -772,6 +732,7 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 	struct device *dev = ndev->dev.parent;
 	struct resource res_regs;
 	struct device_node *phy_node;
+	struct phy_device *phydev = NULL;
 	struct arc_emac_priv *priv;
 	const char *mac_addr;
 	unsigned int id, clock_frequency, irq;
@@ -887,16 +848,16 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 		goto out_clken;
 	}
 
-	priv->phy_dev = of_phy_connect(ndev, phy_node, arc_emac_adjust_link, 0,
-				       interface);
-	if (!priv->phy_dev) {
+	phydev = of_phy_connect(ndev, phy_node, arc_emac_adjust_link, 0,
+				interface);
+	if (!phydev) {
 		dev_err(dev, "of_phy_connect() failed\n");
 		err = -ENODEV;
 		goto out_mdio;
 	}
 
 	dev_info(dev, "connected to %s phy with id 0x%x\n",
-		 priv->phy_dev->drv->name, priv->phy_dev->phy_id);
+		 phydev->drv->name, phydev->phy_id);
 
 	netif_napi_add(ndev, &priv->napi, arc_emac_poll, ARC_EMAC_NAPI_WEIGHT);
 
@@ -910,8 +871,7 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 
 out_netif_api:
 	netif_napi_del(&priv->napi);
-	phy_disconnect(priv->phy_dev);
-	priv->phy_dev = NULL;
+	phy_disconnect(phydev);
 out_mdio:
 	arc_mdio_remove(priv);
 out_clken:
@@ -925,8 +885,7 @@ int arc_emac_remove(struct net_device *ndev)
 {
 	struct arc_emac_priv *priv = netdev_priv(ndev);
 
-	phy_disconnect(priv->phy_dev);
-	priv->phy_dev = NULL;
+	phy_disconnect(ndev->phydev);
 	arc_mdio_remove(priv);
 	unregister_netdev(ndev);
 	netif_napi_del(&priv->napi);
diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index d02c4240b7df..8fc93c5f6abc 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -96,10 +96,6 @@ struct alx_priv {
 	unsigned int rx_ringsz;
 	unsigned int rxbuf_size;
 
-	struct page  *rx_page;
-	unsigned int rx_page_offset;
-	unsigned int rx_frag_size;
-
 	struct napi_struct napi;
 	struct alx_tx_queue txq;
 	struct alx_rx_queue rxq;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c98acdc0d14f..e708e360a9e3 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -70,35 +70,6 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
 	}
 }
 
-static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp)
-{
-	struct sk_buff *skb;
-	struct page *page;
-
-	if (alx->rx_frag_size > PAGE_SIZE)
-		return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
-
-	page = alx->rx_page;
-	if (!page) {
-		alx->rx_page = page = alloc_page(gfp);
-		if (unlikely(!page))
-			return NULL;
-		alx->rx_page_offset = 0;
-	}
-
-	skb = build_skb(page_address(page) + alx->rx_page_offset,
-			alx->rx_frag_size);
-	if (likely(skb)) {
-		alx->rx_page_offset += alx->rx_frag_size;
-		if (alx->rx_page_offset >= PAGE_SIZE)
-			alx->rx_page = NULL;
-		else
-			get_page(page);
-	}
-	return skb;
-}
-
-
 static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 {
 	struct alx_rx_queue *rxq = &alx->rxq;
@@ -115,9 +86,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	while (!cur_buf->skb && next != rxq->read_idx) {
 		struct alx_rfd *rfd = &rxq->rfd[cur];
 
-		skb = alx_alloc_skb(alx, gfp);
+		/*
+		 * When DMA RX address is set to something like
+		 * 0x....fc0, it will be very likely to cause DMA
+		 * RFD overflow issue.
+		 *
+		 * To work around it, we apply rx skb with 64 bytes
+		 * longer space, and offset the address whenever
+		 * 0x....fc0 is detected.
+		 */
+		skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp);
 		if (!skb)
 			break;
+
+		if (((unsigned long)skb->data & 0xfff) == 0xfc0)
+			skb_reserve(skb, 64);
+
 		dma = dma_map_single(&alx->hw.pdev->dev,
 				     skb->data, alx->rxbuf_size,
 				     DMA_FROM_DEVICE);
@@ -153,7 +137,6 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 		alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur);
 	}
 
-
 	return count;
 }
 
@@ -622,11 +605,6 @@ static void alx_free_rings(struct alx_priv *alx)
 	kfree(alx->txq.bufs);
 	kfree(alx->rxq.bufs);
 
-	if (alx->rx_page) {
-		put_page(alx->rx_page);
-		alx->rx_page = NULL;
-	}
-
 	dma_free_coherent(&alx->hw.pdev->dev,
 			  alx->descmem.size,
 			  alx->descmem.virt,
@@ -681,7 +659,6 @@ static int alx_request_irq(struct alx_priv *alx)
 				  alx->dev->name, alx);
 		if (!err)
 			goto out;
-
 		/* fall back to legacy interrupt */
 		pci_disable_msi(alx->hw.pdev);
 	}
@@ -725,7 +702,6 @@ static int alx_init_sw(struct alx_priv *alx)
 	struct pci_dev *pdev = alx->hw.pdev;
 	struct alx_hw *hw = &alx->hw;
 	int err;
-	unsigned int head_size;
 
 	err = alx_identify_hw(alx);
 	if (err) {
@@ -741,12 +717,7 @@ static int alx_init_sw(struct alx_priv *alx)
 
 	hw->smb_timer = 400;
 	hw->mtu = alx->dev->mtu;
-
 	alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu);
-	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	alx->rx_frag_size = roundup_pow_of_two(head_size);
-
 	alx->tx_ringsz = 256;
 	alx->rx_ringsz = 512;
 	hw->imt = 200;
@@ -848,7 +819,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
 	int max_frame = ALX_MAX_FRAME_LEN(mtu);
-	unsigned int head_size;
 
 	if ((max_frame < ALX_MIN_FRAME_SIZE) ||
 	    (max_frame > ALX_MAX_FRAME_SIZE))
@@ -860,9 +830,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 	netdev->mtu = mtu;
 	alx->hw.mtu = mtu;
 	alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
-	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	alx->rx_frag_size = roundup_pow_of_two(head_size);
 	netdev_update_features(netdev);
 	if (netif_running(netdev))
 		alx_reinit(alx);
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index 08a23e6b60e9..0d4ea92a0d37 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -259,6 +259,7 @@ static void nb8800_receive(struct net_device *dev, unsigned int i,
 		if (err) {
 			netdev_err(dev, "rx buffer allocation failed\n");
 			dev->stats.rx_dropped++;
+			dev_kfree_skb(skb);
 			return;
 		}
 
@@ -631,7 +632,7 @@ static void nb8800_mac_config(struct net_device *dev)
 static void nb8800_pause_config(struct net_device *dev)
 {
 	struct nb8800_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	u32 rxcr;
 
 	if (priv->pause_aneg) {
@@ -664,7 +665,7 @@ static void nb8800_pause_config(struct net_device *dev)
 static void nb8800_link_reconfigure(struct net_device *dev)
 {
 	struct nb8800_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	int change = 0;
 
 	if (phydev->link) {
@@ -690,7 +691,7 @@ static void nb8800_link_reconfigure(struct net_device *dev)
 	}
 
 	if (change)
-		phy_print_status(priv->phydev);
+		phy_print_status(phydev);
 }
 
 static void nb8800_update_mac_addr(struct net_device *dev)
@@ -935,9 +936,10 @@ static int nb8800_dma_stop(struct net_device *dev)
 static void nb8800_pause_adv(struct net_device *dev)
 {
 	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = dev->phydev;
 	u32 adv = 0;
 
-	if (!priv->phydev)
+	if (!phydev)
 		return;
 
 	if (priv->pause_rx)
@@ -945,13 +947,14 @@ static void nb8800_pause_adv(struct net_device *dev)
 	if (priv->pause_tx)
 		adv ^= ADVERTISED_Asym_Pause;
 
-	priv->phydev->supported |= adv;
-	priv->phydev->advertising |= adv;
+	phydev->supported |= adv;
+	phydev->advertising |= adv;
 }
 
 static int nb8800_open(struct net_device *dev)
 {
 	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev;
 	int err;
 
 	/* clear any pending interrupts */
@@ -969,10 +972,10 @@ static int nb8800_open(struct net_device *dev)
 	nb8800_mac_rx(dev, true);
 	nb8800_mac_tx(dev, true);
 
-	priv->phydev = of_phy_connect(dev, priv->phy_node,
-				      nb8800_link_reconfigure, 0,
-				      priv->phy_mode);
-	if (!priv->phydev)
+	phydev = of_phy_connect(dev, priv->phy_node,
+				nb8800_link_reconfigure, 0,
+				priv->phy_mode);
+	if (!phydev)
 		goto err_free_irq;
 
 	nb8800_pause_adv(dev);
@@ -982,7 +985,7 @@ static int nb8800_open(struct net_device *dev)
 	netif_start_queue(dev);
 
 	nb8800_start_rx(dev);
-	phy_start(priv->phydev);
+	phy_start(phydev);
 
 	return 0;
 
@@ -997,8 +1000,9 @@ err_free_dma:
 static int nb8800_stop(struct net_device *dev)
 {
 	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = dev->phydev;
 
-	phy_stop(priv->phydev);
+	phy_stop(phydev);
 
 	netif_stop_queue(dev);
 	napi_disable(&priv->napi);
@@ -1007,8 +1011,7 @@ static int nb8800_stop(struct net_device *dev)
 	nb8800_mac_rx(dev, false);
 	nb8800_mac_tx(dev, false);
 
-	phy_disconnect(priv->phydev);
-	priv->phydev = NULL;
+	phy_disconnect(phydev);
 
 	free_irq(dev->irq, dev);
 
@@ -1019,9 +1022,7 @@ static int nb8800_stop(struct net_device *dev)
 
 static int nb8800_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	return phy_mii_ioctl(priv->phydev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 static const struct net_device_ops nb8800_netdev_ops = {
@@ -1035,34 +1036,14 @@ static const struct net_device_ops nb8800_netdev_ops = {
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
-static int nb8800_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	if (!priv->phydev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(priv->phydev, cmd);
-}
-
-static int nb8800_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	if (!priv->phydev)
-		return -ENODEV;
-
-	return phy_ethtool_sset(priv->phydev, cmd);
-}
-
 static int nb8800_nway_reset(struct net_device *dev)
 {
-	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = dev->phydev;
 
-	if (!priv->phydev)
+	if (!phydev)
 		return -ENODEV;
 
-	return genphy_restart_aneg(priv->phydev);
+	return genphy_restart_aneg(phydev);
 }
 
 static void nb8800_get_pauseparam(struct net_device *dev,
@@ -1079,6 +1060,7 @@ static int nb8800_set_pauseparam(struct net_device *dev,
 				 struct ethtool_pauseparam *pp)
 {
 	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = dev->phydev;
 
 	priv->pause_aneg = pp->autoneg;
 	priv->pause_rx = pp->rx_pause;
@@ -1088,8 +1070,8 @@ static int nb8800_set_pauseparam(struct net_device *dev,
 
 	if (!priv->pause_aneg)
 		nb8800_pause_config(dev);
-	else if (priv->phydev)
-		phy_start_aneg(priv->phydev);
+	else if (phydev)
+		phy_start_aneg(phydev);
 
 	return 0;
 }
@@ -1182,8 +1164,6 @@ static void nb8800_get_ethtool_stats(struct net_device *dev,
 }
 
 static const struct ethtool_ops nb8800_ethtool_ops = {
-	.get_settings		= nb8800_get_settings,
-	.set_settings		= nb8800_set_settings,
 	.nway_reset		= nb8800_nway_reset,
 	.get_link		= ethtool_op_get_link,
 	.get_pauseparam		= nb8800_get_pauseparam,
@@ -1191,6 +1171,8 @@ static const struct ethtool_ops nb8800_ethtool_ops = {
 	.get_sset_count		= nb8800_get_sset_count,
 	.get_strings		= nb8800_get_strings,
 	.get_ethtool_stats	= nb8800_get_ethtool_stats,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
 static int nb8800_hw_init(struct net_device *dev)
@@ -1437,7 +1419,7 @@ static int nb8800_probe(struct platform_device *pdev)
 	if (ops && ops->reset) {
 		ret = ops->reset(dev);
 		if (ret)
-			goto err_free_dev;
+			goto err_disable_clk;
 	}
 
 	bus = devm_mdiobus_alloc(&pdev->dev);
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
index e5adbc2aac9f..6ec4a956e1e5 100644
--- a/drivers/net/ethernet/aurora/nb8800.h
+++ b/drivers/net/ethernet/aurora/nb8800.h
@@ -284,7 +284,6 @@ struct nb8800_priv {
 
 	struct mii_bus			*mii_bus;
 	struct device_node		*phy_node;
-	struct phy_device		*phydev;
 
 	/* PHY connection type from DT */
 	int				phy_mode;
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 18042c2460bd..bd8c80c0b71c 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -139,31 +139,19 @@ config BNX2X_SRIOV
 	  Virtualization support in the 578xx and 57712 products. This
 	  allows for virtual function acceleration in virtual environments.
 
-config BNX2X_VXLAN
-	bool "Virtual eXtensible Local Area Network support"
-	default n
-	depends on BNX2X && VXLAN && !(BNX2X=y && VXLAN=m)
-	---help---
-	  This enables hardward offload support for VXLAN protocol over the
-	  NetXtremeII series adapters.
-	  Say Y here if you want to enable hardware offload support for
-	  Virtual eXtensible Local Area Network (VXLAN) in the driver.
-
-config BNX2X_GENEVE
-	bool "Generic Network Virtualization Encapsulation (GENEVE) support"
-	depends on BNX2X && GENEVE && !(BNX2X=y && GENEVE=m)
-	---help---
-          This allows one to create GENEVE virtual interfaces that provide
-          Layer 2 Networks over Layer 3 Networks. GENEVE is often used
-          to tunnel virtual network infrastructure in virtualized environments.
-	  Say Y here if you want to enable hardware offload support for
-	  Generic Network Virtualization Encapsulation (GENEVE) in the driver.
-
 config BGMAC
-	tristate "BCMA bus GBit core support"
+	tristate
+	help
+	  This enables the integrated ethernet controller support for many
+	  Broadcom (mostly iProc) SoCs. An appropriate bus interface driver
+	  needs to be enabled to select this.
+
+config BGMAC_BCMA
+	tristate "Broadcom iProc GBit BCMA support"
 	depends on BCMA && BCMA_HOST_SOC
 	depends on HAS_DMA
 	depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
+	select BGMAC
 	select PHYLIB
 	select FIXED_PHY
 	---help---
@@ -172,6 +160,19 @@ config BGMAC
 	  In case of using this driver on BCM4706 it's also requires to enable
 	  BCMA_DRIVER_GMAC_CMN to make it work.
 
+config BGMAC_PLATFORM
+	tristate "Broadcom iProc GBit platform support"
+	depends on HAS_DMA
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	depends on OF
+	select BGMAC
+	select PHYLIB
+	select FIXED_PHY
+	default ARCH_BCM_IPROC
+	---help---
+	  Say Y here if you want to use the Broadcom iProc Gigabit Ethernet
+	  controller through the generic platform interface
+
 config SYSTEMPORT
 	tristate "Broadcom SYSTEMPORT internal MAC support"
 	depends on OF
@@ -186,7 +187,6 @@ config SYSTEMPORT
 config BNXT
 	tristate "Broadcom NetXtreme-C/E support"
 	depends on PCI
-	depends on VXLAN || VXLAN=n
 	select FW_LOADER
 	select LIBCRC32C
 	---help---
diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
index 00584d78b3e0..79f2372c66ec 100644
--- a/drivers/net/ethernet/broadcom/Makefile
+++ b/drivers/net/ethernet/broadcom/Makefile
@@ -11,5 +11,7 @@ obj-$(CONFIG_BNX2X) += bnx2x/
 obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o
 obj-$(CONFIG_TIGON3) += tg3.o
 obj-$(CONFIG_BGMAC) += bgmac.o
+obj-$(CONFIG_BGMAC_BCMA) += bgmac-bcma.o bgmac-bcma-mdio.o
+obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o
 obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o
 obj-$(CONFIG_BNXT) += bnxt/
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 543bf38105c9..b2d30863caeb 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -96,28 +96,6 @@ static inline void tdma_port_write_desc_addr(struct bcm_sysport_priv *priv,
 }
 
 /* Ethtool operations */
-static int bcm_sysport_set_settings(struct net_device *dev,
-				    struct ethtool_cmd *cmd)
-{
-	struct bcm_sysport_priv *priv = netdev_priv(dev);
-
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	return phy_ethtool_sset(priv->phydev, cmd);
-}
-
-static int bcm_sysport_get_settings(struct net_device *dev,
-				    struct ethtool_cmd *cmd)
-{
-	struct bcm_sysport_priv *priv = netdev_priv(dev);
-
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	return phy_ethtool_gset(priv->phydev, cmd);
-}
-
 static int bcm_sysport_set_rx_csum(struct net_device *dev,
 				   netdev_features_t wanted)
 {
@@ -392,7 +370,7 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 		else
 			p = (char *)priv;
 		p += s->stat_offset;
-		data[i] = *(u32 *)p;
+		data[i] = *(unsigned long *)p;
 	}
 }
 
@@ -1127,7 +1105,7 @@ static void bcm_sysport_tx_timeout(struct net_device *dev)
 static void bcm_sysport_adj_link(struct net_device *dev)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	unsigned int changed = 0;
 	u32 cmd_bits = 0, reg;
 
@@ -1182,7 +1160,7 @@ static void bcm_sysport_adj_link(struct net_device *dev)
 		umac_writel(priv, reg, UMAC_CMD);
 	}
 
-	phy_print_status(priv->phydev);
+	phy_print_status(phydev);
 }
 
 static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
@@ -1525,7 +1503,7 @@ static void bcm_sysport_netif_start(struct net_device *dev)
 	/* Enable RX interrupt and TX ring full interrupt */
 	intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE | INTRL2_0_TX_RING_FULL);
 
-	phy_start(priv->phydev);
+	phy_start(dev->phydev);
 
 	/* Enable TX interrupts for the 32 TXQs */
 	intrl2_1_mask_clear(priv, 0xffffffff);
@@ -1546,6 +1524,7 @@ static void rbuf_init(struct bcm_sysport_priv *priv)
 static int bcm_sysport_open(struct net_device *dev)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev;
 	unsigned int i;
 	int ret;
 
@@ -1570,9 +1549,9 @@ static int bcm_sysport_open(struct net_device *dev)
 	/* Read CRC forward */
 	priv->crc_fwd = !!(umac_readl(priv, UMAC_CMD) & CMD_CRC_FWD);
 
-	priv->phydev = of_phy_connect(dev, priv->phy_dn, bcm_sysport_adj_link,
-					0, priv->phy_interface);
-	if (!priv->phydev) {
+	phydev = of_phy_connect(dev, priv->phy_dn, bcm_sysport_adj_link,
+				0, priv->phy_interface);
+	if (!phydev) {
 		netdev_err(dev, "could not attach to PHY\n");
 		return -ENODEV;
 	}
@@ -1650,7 +1629,7 @@ out_free_tx_ring:
 out_free_irq0:
 	free_irq(priv->irq0, dev);
 out_phy_disconnect:
-	phy_disconnect(priv->phydev);
+	phy_disconnect(phydev);
 	return ret;
 }
 
@@ -1661,7 +1640,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev)
 	/* stop all software from updating hardware */
 	netif_tx_stop_all_queues(dev);
 	napi_disable(&priv->napi);
-	phy_stop(priv->phydev);
+	phy_stop(dev->phydev);
 
 	/* mask all interrupts */
 	intrl2_0_mask_set(priv, 0xffffffff);
@@ -1708,14 +1687,12 @@ static int bcm_sysport_stop(struct net_device *dev)
 	free_irq(priv->irq1, dev);
 
 	/* Disconnect from PHY */
-	phy_disconnect(priv->phydev);
+	phy_disconnect(dev->phydev);
 
 	return 0;
 }
 
 static struct ethtool_ops bcm_sysport_ethtool_ops = {
-	.get_settings		= bcm_sysport_get_settings,
-	.set_settings		= bcm_sysport_set_settings,
 	.get_drvinfo		= bcm_sysport_get_drvinfo,
 	.get_msglevel		= bcm_sysport_get_msglvl,
 	.set_msglevel		= bcm_sysport_set_msglvl,
@@ -1727,6 +1704,8 @@ static struct ethtool_ops bcm_sysport_ethtool_ops = {
 	.set_wol		= bcm_sysport_set_wol,
 	.get_coalesce		= bcm_sysport_get_coalesce,
 	.set_coalesce		= bcm_sysport_set_coalesce,
+	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops bcm_sysport_netdev_ops = {
@@ -1929,7 +1908,7 @@ static int bcm_sysport_suspend(struct device *d)
 
 	bcm_sysport_netif_stop(dev);
 
-	phy_suspend(priv->phydev);
+	phy_suspend(dev->phydev);
 
 	netif_device_detach(dev);
 
@@ -2055,7 +2034,7 @@ static int bcm_sysport_resume(struct device *d)
 		goto out_free_rx_ring;
 	}
 
-	phy_resume(priv->phydev);
+	phy_resume(dev->phydev);
 
 	bcm_sysport_netif_start(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index f28bf545d7f4..1c82e3da69a7 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -670,7 +670,6 @@ struct bcm_sysport_priv {
 
 	/* PHY device */
 	struct device_node	*phy_dn;
-	struct phy_device	*phydev;
 	phy_interface_t		phy_interface;
 	int			old_pause;
 	int			old_link;
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
new file mode 100644
index 000000000000..7c19c8e2bf91
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
@@ -0,0 +1,266 @@
+/*
+ * Driver for (BCM4706)? GBit MAC core on BCMA bus.
+ *
+ * Copyright (C) 2012 RafaÅ MiÅecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/brcmphy.h>
+#include "bgmac.h"
+
+struct bcma_mdio {
+	struct bcma_device *core;
+	u8 phyaddr;
+};
+
+static bool bcma_mdio_wait_value(struct bcma_device *core, u16 reg, u32 mask,
+				 u32 value, int timeout)
+{
+	u32 val;
+	int i;
+
+	for (i = 0; i < timeout / 10; i++) {
+		val = bcma_read32(core, reg);
+		if ((val & mask) == value)
+			return true;
+		udelay(10);
+	}
+	dev_err(&core->dev, "Timeout waiting for reg 0x%X\n", reg);
+	return false;
+}
+
+/**************************************************
+ * PHY ops
+ **************************************************/
+
+static u16 bcma_mdio_phy_read(struct bcma_mdio *bcma_mdio, u8 phyaddr, u8 reg)
+{
+	struct bcma_device *core;
+	u16 phy_access_addr;
+	u16 phy_ctl_addr;
+	u32 tmp;
+
+	BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK);
+	BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK);
+	BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT);
+	BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK);
+	BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT);
+	BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE);
+	BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START);
+	BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK);
+	BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK);
+	BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
+	BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
+
+	if (bcma_mdio->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		core = bcma_mdio->core->bus->drv_gmac_cmn.core;
+		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+	} else {
+		core = bcma_mdio->core;
+		phy_access_addr = BGMAC_PHY_ACCESS;
+		phy_ctl_addr = BGMAC_PHY_CNTL;
+	}
+
+	tmp = bcma_read32(core, phy_ctl_addr);
+	tmp &= ~BGMAC_PC_EPA_MASK;
+	tmp |= phyaddr;
+	bcma_write32(core, phy_ctl_addr, tmp);
+
+	tmp = BGMAC_PA_START;
+	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+	tmp |= reg << BGMAC_PA_REG_SHIFT;
+	bcma_write32(core, phy_access_addr, tmp);
+
+	if (!bcma_mdio_wait_value(core, phy_access_addr, BGMAC_PA_START, 0,
+				  1000)) {
+		dev_err(&core->dev, "Reading PHY %d register 0x%X failed\n",
+			phyaddr, reg);
+		return 0xffff;
+	}
+
+	return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK;
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
+static int bcma_mdio_phy_write(struct bcma_mdio *bcma_mdio, u8 phyaddr, u8 reg,
+			       u16 value)
+{
+	struct bcma_device *core;
+	u16 phy_access_addr;
+	u16 phy_ctl_addr;
+	u32 tmp;
+
+	if (bcma_mdio->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		core = bcma_mdio->core->bus->drv_gmac_cmn.core;
+		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+	} else {
+		core = bcma_mdio->core;
+		phy_access_addr = BGMAC_PHY_ACCESS;
+		phy_ctl_addr = BGMAC_PHY_CNTL;
+	}
+
+	tmp = bcma_read32(core, phy_ctl_addr);
+	tmp &= ~BGMAC_PC_EPA_MASK;
+	tmp |= phyaddr;
+	bcma_write32(core, phy_ctl_addr, tmp);
+
+	bcma_write32(bcma_mdio->core, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
+	if (bcma_read32(bcma_mdio->core, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
+		dev_warn(&core->dev, "Error setting MDIO int\n");
+
+	tmp = BGMAC_PA_START;
+	tmp |= BGMAC_PA_WRITE;
+	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+	tmp |= reg << BGMAC_PA_REG_SHIFT;
+	tmp |= value;
+	bcma_write32(core, phy_access_addr, tmp);
+
+	if (!bcma_mdio_wait_value(core, phy_access_addr, BGMAC_PA_START, 0,
+				  1000)) {
+		dev_err(&core->dev, "Writing to PHY %d register 0x%X failed\n",
+			phyaddr, reg);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
+static void bcma_mdio_phy_init(struct bcma_mdio *bcma_mdio)
+{
+	struct bcma_chipinfo *ci = &bcma_mdio->core->bus->chipinfo;
+	u8 i;
+
+	if (ci->id == BCMA_CHIP_ID_BCM5356) {
+		for (i = 0; i < 5; i++) {
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x008b);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x15, 0x0100);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x12, 0x2aaa);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+		}
+	}
+	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) {
+		struct bcma_drv_cc *cc = &bcma_mdio->core->bus->drv_cc;
+
+		bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0);
+		bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0);
+		for (i = 0; i < 5; i++) {
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x5284);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x0010);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x5296);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x1073);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x9073);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x52b6);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x9273);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+		}
+	}
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
+static int bcma_mdio_phy_reset(struct mii_bus *bus)
+{
+	struct bcma_mdio *bcma_mdio = bus->priv;
+	u8 phyaddr = bcma_mdio->phyaddr;
+
+	if (bcma_mdio->phyaddr == BGMAC_PHY_NOREGS)
+		return 0;
+
+	bcma_mdio_phy_write(bcma_mdio, phyaddr, MII_BMCR, BMCR_RESET);
+	udelay(100);
+	if (bcma_mdio_phy_read(bcma_mdio, phyaddr, MII_BMCR) & BMCR_RESET)
+		dev_err(&bcma_mdio->core->dev, "PHY reset failed\n");
+	bcma_mdio_phy_init(bcma_mdio);
+
+	return 0;
+}
+
+/**************************************************
+ * MII
+ **************************************************/
+
+static int bcma_mdio_mii_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	return bcma_mdio_phy_read(bus->priv, mii_id, regnum);
+}
+
+static int bcma_mdio_mii_write(struct mii_bus *bus, int mii_id, int regnum,
+			       u16 value)
+{
+	return bcma_mdio_phy_write(bus->priv, mii_id, regnum, value);
+}
+
+struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr)
+{
+	struct bcma_mdio *bcma_mdio;
+	struct mii_bus *mii_bus;
+	int err;
+
+	bcma_mdio = kzalloc(sizeof(*bcma_mdio), GFP_KERNEL);
+	if (!bcma_mdio)
+		return ERR_PTR(-ENOMEM);
+
+	mii_bus = mdiobus_alloc();
+	if (!mii_bus) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	mii_bus->name = "bcma_mdio mii bus";
+	sprintf(mii_bus->id, "%s-%d-%d", "bcma_mdio", core->bus->num,
+		core->core_unit);
+	mii_bus->priv = bcma_mdio;
+	mii_bus->read = bcma_mdio_mii_read;
+	mii_bus->write = bcma_mdio_mii_write;
+	mii_bus->reset = bcma_mdio_phy_reset;
+	mii_bus->parent = &core->dev;
+	mii_bus->phy_mask = ~(1 << phyaddr);
+
+	bcma_mdio->core = core;
+	bcma_mdio->phyaddr = phyaddr;
+
+	err = mdiobus_register(mii_bus);
+	if (err) {
+		dev_err(&core->dev, "Registration of mii bus failed\n");
+		goto err_free_bus;
+	}
+
+	return mii_bus;
+
+err_free_bus:
+	mdiobus_free(mii_bus);
+err:
+	kfree(bcma_mdio);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(bcma_mdio_mii_register);
+
+void bcma_mdio_mii_unregister(struct mii_bus *mii_bus)
+{
+	struct bcma_mdio *bcma_mdio;
+
+	if (!mii_bus)
+		return;
+
+	bcma_mdio = mii_bus->priv;
+
+	mdiobus_unregister(mii_bus);
+	mdiobus_free(mii_bus);
+	kfree(bcma_mdio);
+}
+EXPORT_SYMBOL_GPL(bcma_mdio_mii_unregister);
+
+MODULE_AUTHOR("RafaÅ MiÅecki");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
new file mode 100644
index 000000000000..9a9745c4047c
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -0,0 +1,315 @@
+/*
+ * Driver for (BCM4706)? GBit MAC core on BCMA bus.
+ *
+ * Copyright (C) 2012 RafaÅ MiÅecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/brcmphy.h>
+#include <linux/etherdevice.h>
+#include "bgmac.h"
+
+static inline bool bgmac_is_bcm4707_family(struct bcma_device *core)
+{
+	switch (core->bus->chipinfo.id) {
+	case BCMA_CHIP_ID_BCM4707:
+	case BCMA_CHIP_ID_BCM47094:
+	case BCMA_CHIP_ID_BCM53018:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**************************************************
+ * BCMA bus ops
+ **************************************************/
+
+static u32 bcma_bgmac_read(struct bgmac *bgmac, u16 offset)
+{
+	return bcma_read32(bgmac->bcma.core, offset);
+}
+
+static void bcma_bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	bcma_write32(bgmac->bcma.core, offset, value);
+}
+
+static u32 bcma_bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+	return bcma_aread32(bgmac->bcma.core, offset);
+}
+
+static void bcma_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	return bcma_awrite32(bgmac->bcma.core, offset, value);
+}
+
+static bool bcma_bgmac_clk_enabled(struct bgmac *bgmac)
+{
+	return bcma_core_is_enabled(bgmac->bcma.core);
+}
+
+static void bcma_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+	bcma_core_enable(bgmac->bcma.core, flags);
+}
+
+static void bcma_bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+				       u32 mask, u32 set)
+{
+	struct bcma_drv_cc *cc = &bgmac->bcma.core->bus->drv_cc;
+
+	bcma_chipco_chipctl_maskset(cc, offset, mask, set);
+}
+
+static u32 bcma_bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+	struct bcma_drv_cc *cc = &bgmac->bcma.core->bus->drv_cc;
+
+	return bcma_pmu_get_bus_clock(cc);
+}
+
+static void bcma_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, u32 mask,
+				     u32 set)
+{
+	bcma_maskset32(bgmac->bcma.cmn, offset, mask, set);
+}
+
+static const struct bcma_device_id bgmac_bcma_tbl[] = {
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT,
+		  BCMA_ANY_REV, BCMA_ANY_CLASS),
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV,
+		  BCMA_ANY_CLASS),
+	{},
+};
+MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
+static int bgmac_probe(struct bcma_device *core)
+{
+	struct ssb_sprom *sprom = &core->bus->sprom;
+	struct mii_bus *mii_bus;
+	struct bgmac *bgmac;
+	u8 *mac;
+	int err;
+
+	bgmac = kzalloc(sizeof(*bgmac), GFP_KERNEL);
+	if (!bgmac)
+		return -ENOMEM;
+
+	bgmac->bcma.core = core;
+	bgmac->dev = &core->dev;
+	bgmac->dma_dev = core->dma_dev;
+	bgmac->irq = core->irq;
+
+	bcma_set_drvdata(core, bgmac);
+
+	switch (core->core_unit) {
+	case 0:
+		mac = sprom->et0mac;
+		break;
+	case 1:
+		mac = sprom->et1mac;
+		break;
+	case 2:
+		mac = sprom->et2mac;
+		break;
+	default:
+		dev_err(bgmac->dev, "Unsupported core_unit %d\n",
+			core->core_unit);
+		err = -ENOTSUPP;
+		goto err;
+	}
+
+	ether_addr_copy(bgmac->mac_addr, mac);
+
+	/* On BCM4706 we need common core to access PHY */
+	if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
+	    !core->bus->drv_gmac_cmn.core) {
+		dev_err(bgmac->dev, "GMAC CMN core not found (required for BCM4706)\n");
+		err = -ENODEV;
+		goto err;
+	}
+	bgmac->bcma.cmn = core->bus->drv_gmac_cmn.core;
+
+	switch (core->core_unit) {
+	case 0:
+		bgmac->phyaddr = sprom->et0phyaddr;
+		break;
+	case 1:
+		bgmac->phyaddr = sprom->et1phyaddr;
+		break;
+	case 2:
+		bgmac->phyaddr = sprom->et2phyaddr;
+		break;
+	}
+	bgmac->phyaddr &= BGMAC_PHY_MASK;
+	if (bgmac->phyaddr == BGMAC_PHY_MASK) {
+		dev_err(bgmac->dev, "No PHY found\n");
+		err = -ENODEV;
+		goto err;
+	}
+	dev_info(bgmac->dev, "Found PHY addr: %d%s\n", bgmac->phyaddr,
+		 bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
+
+	if (!bgmac_is_bcm4707_family(core)) {
+		mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr);
+		if (!IS_ERR(mii_bus)) {
+			err = PTR_ERR(mii_bus);
+			goto err;
+		}
+
+		bgmac->mii_bus = mii_bus;
+	}
+
+	if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
+		dev_err(bgmac->dev, "PCI setup not implemented\n");
+		err = -ENOTSUPP;
+		goto err1;
+	}
+
+	bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
+			       BGMAC_BFL_ENETROBO);
+	if (bgmac->has_robosw)
+		dev_warn(bgmac->dev, "Support for Roboswitch not implemented\n");
+
+	if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
+		dev_warn(bgmac->dev, "Support for ADMtek ethernet switch not implemented\n");
+
+	/* Feature Flags */
+	switch (core->bus->chipinfo.id) {
+	case BCMA_CHIP_ID_BCM5357:
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+		bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+		if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47186) {
+			bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+			bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+		}
+		if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM5358)
+			bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII;
+		break;
+	case BCMA_CHIP_ID_BCM53572:
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+		bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+		if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47188) {
+			bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+			bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+		}
+		break;
+	case BCMA_CHIP_ID_BCM4749:
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+		bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+		if (core->bus->chipinfo.pkg == 10) {
+			bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+			bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+		}
+		break;
+	case BCMA_CHIP_ID_BCM4716:
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		/* fallthrough */
+	case BCMA_CHIP_ID_BCM47162:
+		bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL2;
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		break;
+	/* bcm4707_family */
+	case BCMA_CHIP_ID_BCM4707:
+	case BCMA_CHIP_ID_BCM47094:
+	case BCMA_CHIP_ID_BCM53018:
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
+		bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+		break;
+	default:
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+	}
+
+	if (!bgmac_is_bcm4707_family(core) && core->id.rev > 2)
+		bgmac->feature_flags |= BGMAC_FEAT_MISC_PLL_REQ;
+
+	if (core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		bgmac->feature_flags |= BGMAC_FEAT_CMN_PHY_CTL;
+		bgmac->feature_flags |= BGMAC_FEAT_NO_CLR_MIB;
+	}
+
+	if (core->id.rev >= 4) {
+		bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
+		bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
+		bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
+	}
+
+	bgmac->read = bcma_bgmac_read;
+	bgmac->write = bcma_bgmac_write;
+	bgmac->idm_read = bcma_bgmac_idm_read;
+	bgmac->idm_write = bcma_bgmac_idm_write;
+	bgmac->clk_enabled = bcma_bgmac_clk_enabled;
+	bgmac->clk_enable = bcma_bgmac_clk_enable;
+	bgmac->cco_ctl_maskset = bcma_bgmac_cco_ctl_maskset;
+	bgmac->get_bus_clock = bcma_bgmac_get_bus_clock;
+	bgmac->cmn_maskset32 = bcma_bgmac_cmn_maskset32;
+
+	err = bgmac_enet_probe(bgmac);
+	if (err)
+		goto err1;
+
+	return 0;
+
+err1:
+	bcma_mdio_mii_unregister(bgmac->mii_bus);
+err:
+	kfree(bgmac);
+	bcma_set_drvdata(core, NULL);
+
+	return err;
+}
+
+static void bgmac_remove(struct bcma_device *core)
+{
+	struct bgmac *bgmac = bcma_get_drvdata(core);
+
+	bcma_mdio_mii_unregister(bgmac->mii_bus);
+	bgmac_enet_remove(bgmac);
+	bcma_set_drvdata(core, NULL);
+	kfree(bgmac);
+}
+
+static struct bcma_driver bgmac_bcma_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= bgmac_bcma_tbl,
+	.probe		= bgmac_probe,
+	.remove		= bgmac_remove,
+};
+
+static int __init bgmac_init(void)
+{
+	int err;
+
+	err = bcma_driver_register(&bgmac_bcma_driver);
+	if (err)
+		return err;
+	pr_info("Broadcom 47xx GBit MAC driver loaded\n");
+
+	return 0;
+}
+
+static void __exit bgmac_exit(void)
+{
+	bcma_driver_unregister(&bgmac_bcma_driver);
+}
+
+module_init(bgmac_init)
+module_exit(bgmac_exit)
+
+MODULE_AUTHOR("RafaÅ MiÅecki");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
new file mode 100644
index 000000000000..be52f270c2c1
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/etherdevice.h>
+#include <linux/of_address.h>
+#include <linux/of_net.h>
+#include "bgmac.h"
+
+static u32 platform_bgmac_read(struct bgmac *bgmac, u16 offset)
+{
+	return readl(bgmac->plat.base + offset);
+}
+
+static void platform_bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	writel(value, bgmac->plat.base + offset);
+}
+
+static u32 platform_bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+	return readl(bgmac->plat.idm_base + offset);
+}
+
+static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	return writel(value, bgmac->plat.idm_base + offset);
+}
+
+static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
+{
+	if ((bgmac_idm_read(bgmac, BCMA_IOCTL) &
+	     (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC)) != BCMA_IOCTL_CLK)
+		return false;
+	if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
+		return false;
+	return true;
+}
+
+static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+	bgmac_idm_write(bgmac, BCMA_IOCTL,
+			(BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
+	bgmac_idm_read(bgmac, BCMA_IOCTL);
+
+	bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
+	bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+	udelay(1);
+
+	bgmac_idm_write(bgmac, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+	bgmac_idm_read(bgmac, BCMA_IOCTL);
+	udelay(1);
+}
+
+static void platform_bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+					   u32 mask, u32 set)
+{
+	/* This shouldn't be encountered */
+	WARN_ON(1);
+}
+
+static u32 platform_bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+	/* This shouldn't be encountered */
+	WARN_ON(1);
+
+	return 0;
+}
+
+static void platform_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset,
+					 u32 mask, u32 set)
+{
+	/* This shouldn't be encountered */
+	WARN_ON(1);
+}
+
+static int bgmac_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct bgmac *bgmac;
+	struct resource *regs;
+	const u8 *mac_addr;
+
+	bgmac = devm_kzalloc(&pdev->dev, sizeof(*bgmac), GFP_KERNEL);
+	if (!bgmac)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, bgmac);
+
+	/* Set the features of the 4707 family */
+	bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+	bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
+	bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+	bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
+	bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
+	bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
+
+	bgmac->dev = &pdev->dev;
+	bgmac->dma_dev = &pdev->dev;
+
+	mac_addr = of_get_mac_address(np);
+	if (mac_addr)
+		ether_addr_copy(bgmac->mac_addr, mac_addr);
+	else
+		dev_warn(&pdev->dev, "MAC address not present in device tree\n");
+
+	bgmac->irq = platform_get_irq(pdev, 0);
+	if (bgmac->irq < 0) {
+		dev_err(&pdev->dev, "Unable to obtain IRQ\n");
+		return bgmac->irq;
+	}
+
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "amac_base");
+	if (!regs) {
+		dev_err(&pdev->dev, "Unable to obtain base resource\n");
+		return -EINVAL;
+	}
+
+	bgmac->plat.base = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(bgmac->plat.base))
+		return PTR_ERR(bgmac->plat.base);
+
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base");
+	if (!regs) {
+		dev_err(&pdev->dev, "Unable to obtain idm resource\n");
+		return -EINVAL;
+	}
+
+	bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(bgmac->plat.idm_base))
+		return PTR_ERR(bgmac->plat.idm_base);
+
+	bgmac->read = platform_bgmac_read;
+	bgmac->write = platform_bgmac_write;
+	bgmac->idm_read = platform_bgmac_idm_read;
+	bgmac->idm_write = platform_bgmac_idm_write;
+	bgmac->clk_enabled = platform_bgmac_clk_enabled;
+	bgmac->clk_enable = platform_bgmac_clk_enable;
+	bgmac->cco_ctl_maskset = platform_bgmac_cco_ctl_maskset;
+	bgmac->get_bus_clock = platform_bgmac_get_bus_clock;
+	bgmac->cmn_maskset32 = platform_bgmac_cmn_maskset32;
+
+	return bgmac_enet_probe(bgmac);
+}
+
+static int bgmac_remove(struct platform_device *pdev)
+{
+	struct bgmac *bgmac = platform_get_drvdata(pdev);
+
+	bgmac_enet_remove(bgmac);
+
+	return 0;
+}
+
+static const struct of_device_id bgmac_of_enet_match[] = {
+	{.compatible = "brcm,amac",},
+	{.compatible = "brcm,nsp-amac",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, bgmac_of_enet_match);
+
+static struct platform_driver bgmac_enet_driver = {
+	.driver = {
+		.name  = "bgmac-enet",
+		.of_match_table = bgmac_of_enet_match,
+	},
+	.probe = bgmac_probe,
+	.remove = bgmac_remove,
+};
+
+module_platform_driver(bgmac_enet_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index ee5f431ab32a..c4751ece76f6 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -6,51 +6,27 @@
  * Licensed under the GNU/GPL. See COPYING for details.
  */
 
-#include "bgmac.h"
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/delay.h>
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
 #include <linux/etherdevice.h>
-#include <linux/mii.h>
-#include <linux/phy.h>
-#include <linux/phy_fixed.h>
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
 #include <linux/bcm47xx_nvram.h>
+#include "bgmac.h"
 
-static const struct bcma_device_id bgmac_bcma_tbl[] = {
-	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
-	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
-	{},
-};
-MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
-
-static inline bool bgmac_is_bcm4707_family(struct bgmac *bgmac)
-{
-	switch (bgmac->core->bus->chipinfo.id) {
-	case BCMA_CHIP_ID_BCM4707:
-	case BCMA_CHIP_ID_BCM47094:
-	case BCMA_CHIP_ID_BCM53018:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool bgmac_wait_value(struct bcma_device *core, u16 reg, u32 mask,
+static bool bgmac_wait_value(struct bgmac *bgmac, u16 reg, u32 mask,
 			     u32 value, int timeout)
 {
 	u32 val;
 	int i;
 
 	for (i = 0; i < timeout / 10; i++) {
-		val = bcma_read32(core, reg);
+		val = bgmac_read(bgmac, reg);
 		if ((val & mask) == value)
 			return true;
 		udelay(10);
 	}
-	pr_err("Timeout waiting for reg 0x%X\n", reg);
+	dev_err(bgmac->dev, "Timeout waiting for reg 0x%X\n", reg);
 	return false;
 }
 
@@ -84,22 +60,22 @@ static void bgmac_dma_tx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 		udelay(10);
 	}
 	if (i)
-		bgmac_err(bgmac, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n",
-			  ring->mmio_base, val);
+		dev_err(bgmac->dev, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n",
+			ring->mmio_base, val);
 
 	/* Remove SUSPEND bit */
 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, 0);
-	if (!bgmac_wait_value(bgmac->core,
+	if (!bgmac_wait_value(bgmac,
 			      ring->mmio_base + BGMAC_DMA_TX_STATUS,
 			      BGMAC_DMA_TX_STAT, BGMAC_DMA_TX_STAT_DISABLED,
 			      10000)) {
-		bgmac_warn(bgmac, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n",
-			   ring->mmio_base);
+		dev_warn(bgmac->dev, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n",
+			 ring->mmio_base);
 		udelay(300);
 		val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
 		if ((val & BGMAC_DMA_TX_STAT) != BGMAC_DMA_TX_STAT_DISABLED)
-			bgmac_err(bgmac, "Reset of DMA TX ring 0x%X failed\n",
-				  ring->mmio_base);
+			dev_err(bgmac->dev, "Reset of DMA TX ring 0x%X failed\n",
+				ring->mmio_base);
 	}
 }
 
@@ -109,7 +85,7 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac,
 	u32 ctl;
 
 	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL);
-	if (bgmac->core->id.rev >= 4) {
+	if (bgmac->feature_flags & BGMAC_FEAT_TX_MASK_SETUP) {
 		ctl &= ~BGMAC_DMA_TX_BL_MASK;
 		ctl |= BGMAC_DMA_TX_BL_128 << BGMAC_DMA_TX_BL_SHIFT;
 
@@ -152,7 +128,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 				    struct bgmac_dma_ring *ring,
 				    struct sk_buff *skb)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	struct net_device *net_dev = bgmac->net_dev;
 	int index = ring->end % BGMAC_TX_RING_SLOTS;
 	struct bgmac_slot_info *slot = &ring->slots[index];
@@ -161,7 +137,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	int i;
 
 	if (skb->len > BGMAC_DESC_CTL1_LEN) {
-		bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+		netdev_err(bgmac->net_dev, "Too long skb (%d)\n", skb->len);
 		goto err_drop;
 	}
 
@@ -174,7 +150,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	 * even when ring->end overflows
 	 */
 	if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
-		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+		netdev_err(bgmac->net_dev, "TX ring is full, queue should be stopped!\n");
 		netif_stop_queue(net_dev);
 		return NETDEV_TX_BUSY;
 	}
@@ -231,7 +207,7 @@ err_dma:
 	dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
 			 DMA_TO_DEVICE);
 
-	while (i > 0) {
+	while (i-- > 0) {
 		int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
 		struct bgmac_slot_info *slot = &ring->slots[index];
 		u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
@@ -241,18 +217,20 @@ err_dma:
 	}
 
 err_dma_head:
-	bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
-		  ring->mmio_base);
+	netdev_err(bgmac->net_dev, "Mapping error of skb on ring 0x%X\n",
+		   ring->mmio_base);
 
 err_drop:
 	dev_kfree_skb(skb);
+	net_dev->stats.tx_dropped++;
+	net_dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
 /* Free transmitted packets */
 static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	int empty_slot;
 	bool freed = false;
 	unsigned bytes_compl = 0, pkts_compl = 0;
@@ -267,15 +245,16 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 	while (ring->start != ring->end) {
 		int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
 		struct bgmac_slot_info *slot = &ring->slots[slot_idx];
-		u32 ctl1;
+		u32 ctl0, ctl1;
 		int len;
 
 		if (slot_idx == empty_slot)
 			break;
 
+		ctl0 = le32_to_cpu(ring->cpu_base[slot_idx].ctl0);
 		ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
 		len = ctl1 & BGMAC_DESC_CTL1_LEN;
-		if (ctl1 & BGMAC_DESC_CTL0_SOF)
+		if (ctl0 & BGMAC_DESC_CTL0_SOF)
 			/* Unmap no longer used buffer */
 			dma_unmap_single(dma_dev, slot->dma_addr, len,
 					 DMA_TO_DEVICE);
@@ -284,6 +263,8 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 				       DMA_TO_DEVICE);
 
 		if (slot->skb) {
+			bgmac->net_dev->stats.tx_bytes += slot->skb->len;
+			bgmac->net_dev->stats.tx_packets++;
 			bytes_compl += slot->skb->len;
 			pkts_compl++;
 
@@ -312,12 +293,12 @@ static void bgmac_dma_rx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 		return;
 
 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, 0);
-	if (!bgmac_wait_value(bgmac->core,
+	if (!bgmac_wait_value(bgmac,
 			      ring->mmio_base + BGMAC_DMA_RX_STATUS,
 			      BGMAC_DMA_RX_STAT, BGMAC_DMA_RX_STAT_DISABLED,
 			      10000))
-		bgmac_err(bgmac, "Reset of ring 0x%X RX failed\n",
-			  ring->mmio_base);
+		dev_err(bgmac->dev, "Reset of ring 0x%X RX failed\n",
+			ring->mmio_base);
 }
 
 static void bgmac_dma_rx_enable(struct bgmac *bgmac,
@@ -326,7 +307,7 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
 	u32 ctl;
 
 	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
-	if (bgmac->core->id.rev >= 4) {
+	if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
 		ctl &= ~BGMAC_DMA_RX_BL_MASK;
 		ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
 
@@ -347,7 +328,7 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
 static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
 				     struct bgmac_slot_info *slot)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	dma_addr_t dma_addr;
 	struct bgmac_rx_header *rx;
 	void *buf;
@@ -366,7 +347,7 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
 	dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET,
 				  BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
 	if (dma_mapping_error(dma_dev, dma_addr)) {
-		bgmac_err(bgmac, "DMA mapping error\n");
+		netdev_err(bgmac->net_dev, "DMA mapping error\n");
 		put_page(virt_to_head_page(buf));
 		return -ENOMEM;
 	}
@@ -436,7 +417,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 	end_slot /= sizeof(struct bgmac_dma_desc);
 
 	while (ring->start != end_slot) {
-		struct device *dma_dev = bgmac->core->dma_dev;
+		struct device *dma_dev = bgmac->dma_dev;
 		struct bgmac_slot_info *slot = &ring->slots[ring->start];
 		struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
 		struct sk_buff *skb;
@@ -461,16 +442,19 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 
 			/* Check for poison and drop or pass the packet */
 			if (len == 0xdead && flags == 0xbeef) {
-				bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n",
-					  ring->start);
+				netdev_err(bgmac->net_dev, "Found poisoned packet at slot %d, DMA issue!\n",
+					   ring->start);
 				put_page(virt_to_head_page(buf));
+				bgmac->net_dev->stats.rx_errors++;
 				break;
 			}
 
 			if (len > BGMAC_RX_ALLOC_SIZE) {
-				bgmac_err(bgmac, "Found oversized packet at slot %d, DMA issue!\n",
-					  ring->start);
+				netdev_err(bgmac->net_dev, "Found oversized packet at slot %d, DMA issue!\n",
+					   ring->start);
 				put_page(virt_to_head_page(buf));
+				bgmac->net_dev->stats.rx_length_errors++;
+				bgmac->net_dev->stats.rx_errors++;
 				break;
 			}
 
@@ -479,8 +463,9 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 
 			skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
 			if (unlikely(!skb)) {
-				bgmac_err(bgmac, "build_skb failed\n");
+				netdev_err(bgmac->net_dev, "build_skb failed\n");
 				put_page(virt_to_head_page(buf));
+				bgmac->net_dev->stats.rx_errors++;
 				break;
 			}
 			skb_put(skb, BGMAC_RX_FRAME_OFFSET +
@@ -490,6 +475,8 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 
 			skb_checksum_none_assert(skb);
 			skb->protocol = eth_type_trans(skb, bgmac->net_dev);
+			bgmac->net_dev->stats.rx_bytes += len;
+			bgmac->net_dev->stats.rx_packets++;
 			napi_gro_receive(&bgmac->napi, skb);
 			handled++;
 		} while (0);
@@ -533,7 +520,7 @@ static bool bgmac_dma_unaligned(struct bgmac *bgmac,
 static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
 				   struct bgmac_dma_ring *ring)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	struct bgmac_dma_desc *dma_desc = ring->cpu_base;
 	struct bgmac_slot_info *slot;
 	int i;
@@ -559,7 +546,7 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
 static void bgmac_dma_rx_ring_free(struct bgmac *bgmac,
 				   struct bgmac_dma_ring *ring)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	struct bgmac_slot_info *slot;
 	int i;
 
@@ -580,7 +567,7 @@ static void bgmac_dma_ring_desc_free(struct bgmac *bgmac,
 				     struct bgmac_dma_ring *ring,
 				     int num_slots)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	int size;
 
 	if (!ring->cpu_base)
@@ -618,7 +605,7 @@ static void bgmac_dma_free(struct bgmac *bgmac)
 
 static int bgmac_dma_alloc(struct bgmac *bgmac)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	struct bgmac_dma_ring *ring;
 	static const u16 ring_base[] = { BGMAC_DMA_BASE0, BGMAC_DMA_BASE1,
 					 BGMAC_DMA_BASE2, BGMAC_DMA_BASE3, };
@@ -629,8 +616,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
 	BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base));
 	BUILD_BUG_ON(BGMAC_MAX_RX_RINGS > ARRAY_SIZE(ring_base));
 
-	if (!(bcma_aread32(bgmac->core, BCMA_IOST) & BCMA_IOST_DMA64)) {
-		bgmac_err(bgmac, "Core does not report 64-bit DMA\n");
+	if (!(bgmac_idm_read(bgmac, BCMA_IOST) & BCMA_IOST_DMA64)) {
+		dev_err(bgmac->dev, "Core does not report 64-bit DMA\n");
 		return -ENOTSUPP;
 	}
 
@@ -644,8 +631,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
 						     &ring->dma_base,
 						     GFP_KERNEL);
 		if (!ring->cpu_base) {
-			bgmac_err(bgmac, "Allocation of TX ring 0x%X failed\n",
-				  ring->mmio_base);
+			dev_err(bgmac->dev, "Allocation of TX ring 0x%X failed\n",
+				ring->mmio_base);
 			goto err_dma_free;
 		}
 
@@ -669,8 +656,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
 						     &ring->dma_base,
 						     GFP_KERNEL);
 		if (!ring->cpu_base) {
-			bgmac_err(bgmac, "Allocation of RX ring 0x%X failed\n",
-				  ring->mmio_base);
+			dev_err(bgmac->dev, "Allocation of RX ring 0x%X failed\n",
+				ring->mmio_base);
 			err = -ENOMEM;
 			goto err_dma_free;
 		}
@@ -745,150 +732,6 @@ error:
 	return err;
 }
 
-/**************************************************
- * PHY ops
- **************************************************/
-
-static u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg)
-{
-	struct bcma_device *core;
-	u16 phy_access_addr;
-	u16 phy_ctl_addr;
-	u32 tmp;
-
-	BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK);
-	BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK);
-	BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT);
-	BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK);
-	BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT);
-	BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE);
-	BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START);
-	BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK);
-	BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK);
-	BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
-	BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
-
-	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
-		core = bgmac->core->bus->drv_gmac_cmn.core;
-		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
-		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
-	} else {
-		core = bgmac->core;
-		phy_access_addr = BGMAC_PHY_ACCESS;
-		phy_ctl_addr = BGMAC_PHY_CNTL;
-	}
-
-	tmp = bcma_read32(core, phy_ctl_addr);
-	tmp &= ~BGMAC_PC_EPA_MASK;
-	tmp |= phyaddr;
-	bcma_write32(core, phy_ctl_addr, tmp);
-
-	tmp = BGMAC_PA_START;
-	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
-	tmp |= reg << BGMAC_PA_REG_SHIFT;
-	bcma_write32(core, phy_access_addr, tmp);
-
-	if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) {
-		bgmac_err(bgmac, "Reading PHY %d register 0x%X failed\n",
-			  phyaddr, reg);
-		return 0xffff;
-	}
-
-	return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK;
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
-static int bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value)
-{
-	struct bcma_device *core;
-	u16 phy_access_addr;
-	u16 phy_ctl_addr;
-	u32 tmp;
-
-	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
-		core = bgmac->core->bus->drv_gmac_cmn.core;
-		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
-		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
-	} else {
-		core = bgmac->core;
-		phy_access_addr = BGMAC_PHY_ACCESS;
-		phy_ctl_addr = BGMAC_PHY_CNTL;
-	}
-
-	tmp = bcma_read32(core, phy_ctl_addr);
-	tmp &= ~BGMAC_PC_EPA_MASK;
-	tmp |= phyaddr;
-	bcma_write32(core, phy_ctl_addr, tmp);
-
-	bgmac_write(bgmac, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
-	if (bgmac_read(bgmac, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
-		bgmac_warn(bgmac, "Error setting MDIO int\n");
-
-	tmp = BGMAC_PA_START;
-	tmp |= BGMAC_PA_WRITE;
-	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
-	tmp |= reg << BGMAC_PA_REG_SHIFT;
-	tmp |= value;
-	bcma_write32(core, phy_access_addr, tmp);
-
-	if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) {
-		bgmac_err(bgmac, "Writing to PHY %d register 0x%X failed\n",
-			  phyaddr, reg);
-		return -ETIMEDOUT;
-	}
-
-	return 0;
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
-static void bgmac_phy_init(struct bgmac *bgmac)
-{
-	struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
-	struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
-	u8 i;
-
-	if (ci->id == BCMA_CHIP_ID_BCM5356) {
-		for (i = 0; i < 5; i++) {
-			bgmac_phy_write(bgmac, i, 0x1f, 0x008b);
-			bgmac_phy_write(bgmac, i, 0x15, 0x0100);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
-			bgmac_phy_write(bgmac, i, 0x12, 0x2aaa);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
-		}
-	}
-	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
-	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
-	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) {
-		bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0);
-		bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0);
-		for (i = 0; i < 5; i++) {
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
-			bgmac_phy_write(bgmac, i, 0x16, 0x5284);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
-			bgmac_phy_write(bgmac, i, 0x17, 0x0010);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
-			bgmac_phy_write(bgmac, i, 0x16, 0x5296);
-			bgmac_phy_write(bgmac, i, 0x17, 0x1073);
-			bgmac_phy_write(bgmac, i, 0x17, 0x9073);
-			bgmac_phy_write(bgmac, i, 0x16, 0x52b6);
-			bgmac_phy_write(bgmac, i, 0x17, 0x9273);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
-		}
-	}
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
-static void bgmac_phy_reset(struct bgmac *bgmac)
-{
-	if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
-		return;
-
-	bgmac_phy_write(bgmac, bgmac->phyaddr, MII_BMCR, BMCR_RESET);
-	udelay(100);
-	if (bgmac_phy_read(bgmac, bgmac->phyaddr, MII_BMCR) & BMCR_RESET)
-		bgmac_err(bgmac, "PHY reset failed\n");
-	bgmac_phy_init(bgmac);
-}
 
 /**************************************************
  * Chip ops
@@ -902,14 +745,20 @@ static void bgmac_cmdcfg_maskset(struct bgmac *bgmac, u32 mask, u32 set,
 {
 	u32 cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
 	u32 new_val = (cmdcfg & mask) | set;
+	u32 cmdcfg_sr;
 
-	bgmac_set(bgmac, BGMAC_CMDCFG, BGMAC_CMDCFG_SR(bgmac->core->id.rev));
+	if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+	else
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
+
+	bgmac_set(bgmac, BGMAC_CMDCFG, cmdcfg_sr);
 	udelay(2);
 
 	if (new_val != cmdcfg || force)
 		bgmac_write(bgmac, BGMAC_CMDCFG, new_val);
 
-	bgmac_mask(bgmac, BGMAC_CMDCFG, ~BGMAC_CMDCFG_SR(bgmac->core->id.rev));
+	bgmac_mask(bgmac, BGMAC_CMDCFG, ~cmdcfg_sr);
 	udelay(2);
 }
 
@@ -938,7 +787,7 @@ static void bgmac_chip_stats_update(struct bgmac *bgmac)
 {
 	int i;
 
-	if (bgmac->core->id.id != BCMA_CORE_4706_MAC_GBIT) {
+	if (!(bgmac->feature_flags & BGMAC_FEAT_NO_CLR_MIB)) {
 		for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++)
 			bgmac->mib_tx_regs[i] =
 				bgmac_read(bgmac,
@@ -957,7 +806,7 @@ static void bgmac_clear_mib(struct bgmac *bgmac)
 {
 	int i;
 
-	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT)
+	if (bgmac->feature_flags & BGMAC_FEAT_NO_CLR_MIB)
 		return;
 
 	bgmac_set(bgmac, BGMAC_DEV_CTL, BGMAC_DC_MROR);
@@ -987,7 +836,8 @@ static void bgmac_mac_speed(struct bgmac *bgmac)
 		set |= BGMAC_CMDCFG_ES_2500;
 		break;
 	default:
-		bgmac_err(bgmac, "Unsupported speed: %d\n", bgmac->mac_speed);
+		dev_err(bgmac->dev, "Unsupported speed: %d\n",
+			bgmac->mac_speed);
 	}
 
 	if (bgmac->mac_duplex == DUPLEX_HALF)
@@ -998,17 +848,16 @@ static void bgmac_mac_speed(struct bgmac *bgmac)
 
 static void bgmac_miiconfig(struct bgmac *bgmac)
 {
-	struct bcma_device *core = bgmac->core;
-	u8 imode;
-
-	if (bgmac_is_bcm4707_family(bgmac)) {
-		bcma_awrite32(core, BCMA_IOCTL,
-			      bcma_aread32(core, BCMA_IOCTL) | 0x40 |
-			      BGMAC_BCMA_IOCTL_SW_CLKEN);
+	if (bgmac->feature_flags & BGMAC_FEAT_FORCE_SPEED_2500) {
+		bgmac_idm_write(bgmac, BCMA_IOCTL,
+				bgmac_idm_read(bgmac, BCMA_IOCTL) | 0x40 |
+				BGMAC_BCMA_IOCTL_SW_CLKEN);
 		bgmac->mac_speed = SPEED_2500;
 		bgmac->mac_duplex = DUPLEX_FULL;
 		bgmac_mac_speed(bgmac);
 	} else {
+		u8 imode;
+
 		imode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) &
 			BGMAC_DS_MM_MASK) >> BGMAC_DS_MM_SHIFT;
 		if (imode == 0 || imode == 1) {
@@ -1022,14 +871,11 @@ static void bgmac_miiconfig(struct bgmac *bgmac)
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipreset */
 static void bgmac_chip_reset(struct bgmac *bgmac)
 {
-	struct bcma_device *core = bgmac->core;
-	struct bcma_bus *bus = core->bus;
-	struct bcma_chipinfo *ci = &bus->chipinfo;
-	u32 flags;
+	u32 cmdcfg_sr;
 	u32 iost;
 	int i;
 
-	if (bcma_core_is_enabled(core)) {
+	if (bgmac_clk_enabled(bgmac)) {
 		if (!bgmac->stats_grabbed) {
 			/* bgmac_chip_stats_update(bgmac); */
 			bgmac->stats_grabbed = true;
@@ -1047,38 +893,32 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 		/* TODO: Clear software multicast filter list */
 	}
 
-	iost = bcma_aread32(core, BCMA_IOST);
-	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) ||
-	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) ||
-	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188))
+	iost = bgmac_idm_read(bgmac, BCMA_IOST);
+	if (bgmac->feature_flags & BGMAC_FEAT_IOST_ATTACHED)
 		iost &= ~BGMAC_BCMA_IOST_ATTACHED;
 
 	/* 3GMAC: for BCM4707 & BCM47094, only do core reset at bgmac_probe() */
-	if (ci->id != BCMA_CHIP_ID_BCM4707 &&
-	    ci->id != BCMA_CHIP_ID_BCM47094) {
-		flags = 0;
+	if (!(bgmac->feature_flags & BGMAC_FEAT_NO_RESET)) {
+		u32 flags = 0;
 		if (iost & BGMAC_BCMA_IOST_ATTACHED) {
 			flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
 			if (!bgmac->has_robosw)
 				flags |= BGMAC_BCMA_IOCTL_SW_RESET;
 		}
-		bcma_core_enable(core, flags);
+		bgmac_clk_enable(bgmac, flags);
 	}
 
 	/* Request Misc PLL for corerev > 2 */
-	if (core->id.rev > 2 && !bgmac_is_bcm4707_family(bgmac)) {
+	if (bgmac->feature_flags & BGMAC_FEAT_MISC_PLL_REQ) {
 		bgmac_set(bgmac, BCMA_CLKCTLST,
 			  BGMAC_BCMA_CLKCTLST_MISC_PLL_REQ);
-		bgmac_wait_value(bgmac->core, BCMA_CLKCTLST,
+		bgmac_wait_value(bgmac, BCMA_CLKCTLST,
 				 BGMAC_BCMA_CLKCTLST_MISC_PLL_ST,
 				 BGMAC_BCMA_CLKCTLST_MISC_PLL_ST,
 				 1000);
 	}
 
-	if (ci->id == BCMA_CHIP_ID_BCM5357 ||
-	    ci->id == BCMA_CHIP_ID_BCM4749 ||
-	    ci->id == BCMA_CHIP_ID_BCM53572) {
-		struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
+	if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_PHY) {
 		u8 et_swtype = 0;
 		u8 sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHY |
 			     BGMAC_CHIPCTL_1_IF_TYPE_MII;
@@ -1086,35 +926,37 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 
 		if (bcm47xx_nvram_getenv("et_swtype", buf, sizeof(buf)) > 0) {
 			if (kstrtou8(buf, 0, &et_swtype))
-				bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n",
-					  buf);
+				dev_err(bgmac->dev, "Failed to parse et_swtype (%s)\n",
+					buf);
 			et_swtype &= 0x0f;
 			et_swtype <<= 4;
 			sw_type = et_swtype;
-		} else if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358) {
+		} else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_EPHYRMII) {
 			sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
-		} else if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) ||
-			   (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) ||
-			   (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) {
+		} else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_RGMII) {
 			sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RGMII |
 				  BGMAC_CHIPCTL_1_SW_TYPE_RGMII;
 		}
-		bcma_chipco_chipctl_maskset(cc, 1,
-					    ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
-					      BGMAC_CHIPCTL_1_SW_TYPE_MASK),
-					    sw_type);
+		bgmac_cco_ctl_maskset(bgmac, 1, ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
+						  BGMAC_CHIPCTL_1_SW_TYPE_MASK),
+				      sw_type);
 	}
 
 	if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
-		bcma_awrite32(core, BCMA_IOCTL,
-			      bcma_aread32(core, BCMA_IOCTL) &
-			      ~BGMAC_BCMA_IOCTL_SW_RESET);
+		bgmac_idm_write(bgmac, BCMA_IOCTL,
+				bgmac_idm_read(bgmac, BCMA_IOCTL) &
+				~BGMAC_BCMA_IOCTL_SW_RESET);
 
 	/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_reset
 	 * Specs don't say about using BGMAC_CMDCFG_SR, but in this routine
 	 * BGMAC_CMDCFG is read _after_ putting chip in a reset. So it has to
 	 * be keps until taking MAC out of the reset.
 	 */
+	if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+	else
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
+
 	bgmac_cmdcfg_maskset(bgmac,
 			     ~(BGMAC_CMDCFG_TE |
 			       BGMAC_CMDCFG_RE |
@@ -1132,19 +974,20 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 			     BGMAC_CMDCFG_PROM |
 			     BGMAC_CMDCFG_NLC |
 			     BGMAC_CMDCFG_CFE |
-			     BGMAC_CMDCFG_SR(core->id.rev),
+			     cmdcfg_sr,
 			     false);
 	bgmac->mac_speed = SPEED_UNKNOWN;
 	bgmac->mac_duplex = DUPLEX_UNKNOWN;
 
 	bgmac_clear_mib(bgmac);
-	if (core->id.id == BCMA_CORE_4706_MAC_GBIT)
-		bcma_maskset32(bgmac->cmn, BCMA_GMAC_CMN_PHY_CTL, ~0,
-			       BCMA_GMAC_CMN_PC_MTE);
+	if (bgmac->feature_flags & BGMAC_FEAT_CMN_PHY_CTL)
+		bgmac_cmn_maskset32(bgmac, BCMA_GMAC_CMN_PHY_CTL, ~0,
+				    BCMA_GMAC_CMN_PC_MTE);
 	else
 		bgmac_set(bgmac, BGMAC_PHY_CNTL, BGMAC_PC_MTE);
 	bgmac_miiconfig(bgmac);
-	bgmac_phy_init(bgmac);
+	if (bgmac->mii_bus)
+		bgmac->mii_bus->reset(bgmac->mii_bus);
 
 	netdev_reset_queue(bgmac->net_dev);
 }
@@ -1163,50 +1006,51 @@ static void bgmac_chip_intrs_off(struct bgmac *bgmac)
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_enable */
 static void bgmac_enable(struct bgmac *bgmac)
 {
-	struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
+	u32 cmdcfg_sr;
 	u32 cmdcfg;
 	u32 mode;
-	u32 rxq_ctl;
-	u32 fl_ctl;
-	u16 bp_clk;
-	u8 mdp;
+
+	if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+	else
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
 
 	cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
 	bgmac_cmdcfg_maskset(bgmac, ~(BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE),
-			     BGMAC_CMDCFG_SR(bgmac->core->id.rev), true);
+			     cmdcfg_sr, true);
 	udelay(2);
 	cmdcfg |= BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE;
 	bgmac_write(bgmac, BGMAC_CMDCFG, cmdcfg);
 
 	mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
 		BGMAC_DS_MM_SHIFT;
-	if (ci->id != BCMA_CHIP_ID_BCM47162 || mode != 0)
+	if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0)
 		bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
-	if (ci->id == BCMA_CHIP_ID_BCM47162 && mode == 2)
-		bcma_chipco_chipctl_maskset(&bgmac->core->bus->drv_cc, 1, ~0,
-					    BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
-
-	switch (ci->id) {
-	case BCMA_CHIP_ID_BCM5357:
-	case BCMA_CHIP_ID_BCM4749:
-	case BCMA_CHIP_ID_BCM53572:
-	case BCMA_CHIP_ID_BCM4716:
-	case BCMA_CHIP_ID_BCM47162:
-		fl_ctl = 0x03cb04cb;
-		if (ci->id == BCMA_CHIP_ID_BCM5357 ||
-		    ci->id == BCMA_CHIP_ID_BCM4749 ||
-		    ci->id == BCMA_CHIP_ID_BCM53572)
+	if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2)
+		bgmac_cco_ctl_maskset(bgmac, 1, ~0,
+				      BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
+
+	if (bgmac->feature_flags & (BGMAC_FEAT_FLW_CTRL1 |
+				    BGMAC_FEAT_FLW_CTRL2)) {
+		u32 fl_ctl;
+
+		if (bgmac->feature_flags & BGMAC_FEAT_FLW_CTRL1)
 			fl_ctl = 0x2300e1;
+		else
+			fl_ctl = 0x03cb04cb;
+
 		bgmac_write(bgmac, BGMAC_FLOW_CTL_THRESH, fl_ctl);
 		bgmac_write(bgmac, BGMAC_PAUSE_CTL, 0x27fff);
-		break;
 	}
 
-	if (!bgmac_is_bcm4707_family(bgmac)) {
+	if (bgmac->feature_flags & BGMAC_FEAT_SET_RXQ_CLK) {
+		u32 rxq_ctl;
+		u16 bp_clk;
+		u8 mdp;
+
 		rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL);
 		rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK;
-		bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) /
-				1000000;
+		bp_clk = bgmac_get_bus_clock(bgmac) / 1000000;
 		mdp = (bp_clk * 128 / 1000) - 3;
 		rxq_ctl |= (mdp << BGMAC_RXQ_CTL_MDP_SHIFT);
 		bgmac_write(bgmac, BGMAC_RXQ_CTL, rxq_ctl);
@@ -1250,7 +1094,7 @@ static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
 
 	int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
 	if (int_status)
-		bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
+		dev_err(bgmac->dev, "Unknown IRQs: 0x%08X\n", int_status);
 
 	/* Disable new interrupts until handling existing ones */
 	bgmac_chip_intrs_off(bgmac);
@@ -1301,18 +1145,19 @@ static int bgmac_open(struct net_device *net_dev)
 	/* Specs say about reclaiming rings here, but we do that in DMA init */
 	bgmac_chip_init(bgmac);
 
-	err = request_irq(bgmac->core->irq, bgmac_interrupt, IRQF_SHARED,
+	err = request_irq(bgmac->irq, bgmac_interrupt, IRQF_SHARED,
 			  KBUILD_MODNAME, net_dev);
 	if (err < 0) {
-		bgmac_err(bgmac, "IRQ request error: %d!\n", err);
+		dev_err(bgmac->dev, "IRQ request error: %d!\n", err);
 		bgmac_dma_cleanup(bgmac);
 		return err;
 	}
 	napi_enable(&bgmac->napi);
 
-	phy_start(bgmac->phy_dev);
+	phy_start(net_dev->phydev);
+
+	netif_start_queue(net_dev);
 
-	netif_carrier_on(net_dev);
 	return 0;
 }
 
@@ -1322,11 +1167,11 @@ static int bgmac_stop(struct net_device *net_dev)
 
 	netif_carrier_off(net_dev);
 
-	phy_stop(bgmac->phy_dev);
+	phy_stop(net_dev->phydev);
 
 	napi_disable(&bgmac->napi);
 	bgmac_chip_intrs_off(bgmac);
-	free_irq(bgmac->core->irq, net_dev);
+	free_irq(bgmac->irq, net_dev);
 
 	bgmac_chip_reset(bgmac);
 	bgmac_dma_cleanup(bgmac);
@@ -1360,12 +1205,10 @@ static int bgmac_set_mac_address(struct net_device *net_dev, void *addr)
 
 static int bgmac_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
 {
-	struct bgmac *bgmac = netdev_priv(net_dev);
-
 	if (!netif_running(net_dev))
 		return -EINVAL;
 
-	return phy_mii_ioctl(bgmac->phy_dev, ifr, cmd);
+	return phy_mii_ioctl(net_dev->phydev, ifr, cmd);
 }
 
 static const struct net_device_ops bgmac_netdev_ops = {
@@ -1382,54 +1225,151 @@ static const struct net_device_ops bgmac_netdev_ops = {
  * ethtool_ops
  **************************************************/
 
-static int bgmac_get_settings(struct net_device *net_dev,
-			      struct ethtool_cmd *cmd)
+struct bgmac_stat {
+	u8 size;
+	u32 offset;
+	const char *name;
+};
+
+static struct bgmac_stat bgmac_get_strings_stats[] = {
+	{ 8, BGMAC_TX_GOOD_OCTETS, "tx_good_octets" },
+	{ 4, BGMAC_TX_GOOD_PKTS, "tx_good" },
+	{ 8, BGMAC_TX_OCTETS, "tx_octets" },
+	{ 4, BGMAC_TX_PKTS, "tx_pkts" },
+	{ 4, BGMAC_TX_BROADCAST_PKTS, "tx_broadcast" },
+	{ 4, BGMAC_TX_MULTICAST_PKTS, "tx_multicast" },
+	{ 4, BGMAC_TX_LEN_64, "tx_64" },
+	{ 4, BGMAC_TX_LEN_65_TO_127, "tx_65_127" },
+	{ 4, BGMAC_TX_LEN_128_TO_255, "tx_128_255" },
+	{ 4, BGMAC_TX_LEN_256_TO_511, "tx_256_511" },
+	{ 4, BGMAC_TX_LEN_512_TO_1023, "tx_512_1023" },
+	{ 4, BGMAC_TX_LEN_1024_TO_1522, "tx_1024_1522" },
+	{ 4, BGMAC_TX_LEN_1523_TO_2047, "tx_1523_2047" },
+	{ 4, BGMAC_TX_LEN_2048_TO_4095, "tx_2048_4095" },
+	{ 4, BGMAC_TX_LEN_4096_TO_8191, "tx_4096_8191" },
+	{ 4, BGMAC_TX_LEN_8192_TO_MAX, "tx_8192_max" },
+	{ 4, BGMAC_TX_JABBER_PKTS, "tx_jabber" },
+	{ 4, BGMAC_TX_OVERSIZE_PKTS, "tx_oversize" },
+	{ 4, BGMAC_TX_FRAGMENT_PKTS, "tx_fragment" },
+	{ 4, BGMAC_TX_UNDERRUNS, "tx_underruns" },
+	{ 4, BGMAC_TX_TOTAL_COLS, "tx_total_cols" },
+	{ 4, BGMAC_TX_SINGLE_COLS, "tx_single_cols" },
+	{ 4, BGMAC_TX_MULTIPLE_COLS, "tx_multiple_cols" },
+	{ 4, BGMAC_TX_EXCESSIVE_COLS, "tx_excessive_cols" },
+	{ 4, BGMAC_TX_LATE_COLS, "tx_late_cols" },
+	{ 4, BGMAC_TX_DEFERED, "tx_defered" },
+	{ 4, BGMAC_TX_CARRIER_LOST, "tx_carrier_lost" },
+	{ 4, BGMAC_TX_PAUSE_PKTS, "tx_pause" },
+	{ 4, BGMAC_TX_UNI_PKTS, "tx_unicast" },
+	{ 4, BGMAC_TX_Q0_PKTS, "tx_q0" },
+	{ 8, BGMAC_TX_Q0_OCTETS, "tx_q0_octets" },
+	{ 4, BGMAC_TX_Q1_PKTS, "tx_q1" },
+	{ 8, BGMAC_TX_Q1_OCTETS, "tx_q1_octets" },
+	{ 4, BGMAC_TX_Q2_PKTS, "tx_q2" },
+	{ 8, BGMAC_TX_Q2_OCTETS, "tx_q2_octets" },
+	{ 4, BGMAC_TX_Q3_PKTS, "tx_q3" },
+	{ 8, BGMAC_TX_Q3_OCTETS, "tx_q3_octets" },
+	{ 8, BGMAC_RX_GOOD_OCTETS, "rx_good_octets" },
+	{ 4, BGMAC_RX_GOOD_PKTS, "rx_good" },
+	{ 8, BGMAC_RX_OCTETS, "rx_octets" },
+	{ 4, BGMAC_RX_PKTS, "rx_pkts" },
+	{ 4, BGMAC_RX_BROADCAST_PKTS, "rx_broadcast" },
+	{ 4, BGMAC_RX_MULTICAST_PKTS, "rx_multicast" },
+	{ 4, BGMAC_RX_LEN_64, "rx_64" },
+	{ 4, BGMAC_RX_LEN_65_TO_127, "rx_65_127" },
+	{ 4, BGMAC_RX_LEN_128_TO_255, "rx_128_255" },
+	{ 4, BGMAC_RX_LEN_256_TO_511, "rx_256_511" },
+	{ 4, BGMAC_RX_LEN_512_TO_1023, "rx_512_1023" },
+	{ 4, BGMAC_RX_LEN_1024_TO_1522, "rx_1024_1522" },
+	{ 4, BGMAC_RX_LEN_1523_TO_2047, "rx_1523_2047" },
+	{ 4, BGMAC_RX_LEN_2048_TO_4095, "rx_2048_4095" },
+	{ 4, BGMAC_RX_LEN_4096_TO_8191, "rx_4096_8191" },
+	{ 4, BGMAC_RX_LEN_8192_TO_MAX, "rx_8192_max" },
+	{ 4, BGMAC_RX_JABBER_PKTS, "rx_jabber" },
+	{ 4, BGMAC_RX_OVERSIZE_PKTS, "rx_oversize" },
+	{ 4, BGMAC_RX_FRAGMENT_PKTS, "rx_fragment" },
+	{ 4, BGMAC_RX_MISSED_PKTS, "rx_missed" },
+	{ 4, BGMAC_RX_CRC_ALIGN_ERRS, "rx_crc_align" },
+	{ 4, BGMAC_RX_UNDERSIZE, "rx_undersize" },
+	{ 4, BGMAC_RX_CRC_ERRS, "rx_crc" },
+	{ 4, BGMAC_RX_ALIGN_ERRS, "rx_align" },
+	{ 4, BGMAC_RX_SYMBOL_ERRS, "rx_symbol" },
+	{ 4, BGMAC_RX_PAUSE_PKTS, "rx_pause" },
+	{ 4, BGMAC_RX_NONPAUSE_PKTS, "rx_nonpause" },
+	{ 4, BGMAC_RX_SACHANGES, "rx_sa_changes" },
+	{ 4, BGMAC_RX_UNI_PKTS, "rx_unicast" },
+};
+
+#define BGMAC_STATS_LEN	ARRAY_SIZE(bgmac_get_strings_stats)
+
+static int bgmac_get_sset_count(struct net_device *dev, int string_set)
 {
-	struct bgmac *bgmac = netdev_priv(net_dev);
+	switch (string_set) {
+	case ETH_SS_STATS:
+		return BGMAC_STATS_LEN;
+	}
 
-	return phy_ethtool_gset(bgmac->phy_dev, cmd);
+	return -EOPNOTSUPP;
 }
 
-static int bgmac_set_settings(struct net_device *net_dev,
-			      struct ethtool_cmd *cmd)
+static void bgmac_get_strings(struct net_device *dev, u32 stringset,
+			      u8 *data)
 {
-	struct bgmac *bgmac = netdev_priv(net_dev);
+	int i;
 
-	return phy_ethtool_sset(bgmac->phy_dev, cmd);
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < BGMAC_STATS_LEN; i++)
+		strlcpy(data + i * ETH_GSTRING_LEN,
+			bgmac_get_strings_stats[i].name, ETH_GSTRING_LEN);
+}
+
+static void bgmac_get_ethtool_stats(struct net_device *dev,
+				    struct ethtool_stats *ss, uint64_t *data)
+{
+	struct bgmac *bgmac = netdev_priv(dev);
+	const struct bgmac_stat *s;
+	unsigned int i;
+	u64 val;
+
+	if (!netif_running(dev))
+		return;
+
+	for (i = 0; i < BGMAC_STATS_LEN; i++) {
+		s = &bgmac_get_strings_stats[i];
+		val = 0;
+		if (s->size == 8)
+			val = (u64)bgmac_read(bgmac, s->offset + 4) << 32;
+		val |= bgmac_read(bgmac, s->offset);
+		data[i] = val;
+	}
 }
 
 static void bgmac_get_drvinfo(struct net_device *net_dev,
 			      struct ethtool_drvinfo *info)
 {
 	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
-	strlcpy(info->bus_info, "BCMA", sizeof(info->bus_info));
+	strlcpy(info->bus_info, "AXI", sizeof(info->bus_info));
 }
 
 static const struct ethtool_ops bgmac_ethtool_ops = {
-	.get_settings		= bgmac_get_settings,
-	.set_settings		= bgmac_set_settings,
+	.get_strings		= bgmac_get_strings,
+	.get_sset_count		= bgmac_get_sset_count,
+	.get_ethtool_stats	= bgmac_get_ethtool_stats,
 	.get_drvinfo		= bgmac_get_drvinfo,
+	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 /**************************************************
  * MII
  **************************************************/
 
-static int bgmac_mii_read(struct mii_bus *bus, int mii_id, int regnum)
-{
-	return bgmac_phy_read(bus->priv, mii_id, regnum);
-}
-
-static int bgmac_mii_write(struct mii_bus *bus, int mii_id, int regnum,
-			   u16 value)
-{
-	return bgmac_phy_write(bus->priv, mii_id, regnum, value);
-}
-
 static void bgmac_adjust_link(struct net_device *net_dev)
 {
 	struct bgmac *bgmac = netdev_priv(net_dev);
-	struct phy_device *phy_dev = bgmac->phy_dev;
+	struct phy_device *phy_dev = net_dev->phydev;
 	bool update = false;
 
 	if (phy_dev->link) {
@@ -1450,7 +1390,7 @@ static void bgmac_adjust_link(struct net_device *net_dev)
 	}
 }
 
-static int bgmac_fixed_phy_register(struct bgmac *bgmac)
+static int bgmac_phy_connect_direct(struct bgmac *bgmac)
 {
 	struct fixed_phy_status fphy_status = {
 		.link = 1,
@@ -1462,196 +1402,76 @@ static int bgmac_fixed_phy_register(struct bgmac *bgmac)
 
 	phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL);
 	if (!phy_dev || IS_ERR(phy_dev)) {
-		bgmac_err(bgmac, "Failed to register fixed PHY device\n");
+		dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
 		return -ENODEV;
 	}
 
 	err = phy_connect_direct(bgmac->net_dev, phy_dev, bgmac_adjust_link,
 				 PHY_INTERFACE_MODE_MII);
 	if (err) {
-		bgmac_err(bgmac, "Connecting PHY failed\n");
+		dev_err(bgmac->dev, "Connecting PHY failed\n");
 		return err;
 	}
 
-	bgmac->phy_dev = phy_dev;
-
 	return err;
 }
 
-static int bgmac_mii_register(struct bgmac *bgmac)
+static int bgmac_phy_connect(struct bgmac *bgmac)
 {
-	struct mii_bus *mii_bus;
 	struct phy_device *phy_dev;
 	char bus_id[MII_BUS_ID_SIZE + 3];
-	int err = 0;
-
-	if (bgmac_is_bcm4707_family(bgmac))
-		return bgmac_fixed_phy_register(bgmac);
-
-	mii_bus = mdiobus_alloc();
-	if (!mii_bus)
-		return -ENOMEM;
-
-	mii_bus->name = "bgmac mii bus";
-	sprintf(mii_bus->id, "%s-%d-%d", "bgmac", bgmac->core->bus->num,
-		bgmac->core->core_unit);
-	mii_bus->priv = bgmac;
-	mii_bus->read = bgmac_mii_read;
-	mii_bus->write = bgmac_mii_write;
-	mii_bus->parent = &bgmac->core->dev;
-	mii_bus->phy_mask = ~(1 << bgmac->phyaddr);
-
-	err = mdiobus_register(mii_bus);
-	if (err) {
-		bgmac_err(bgmac, "Registration of mii bus failed\n");
-		goto err_free_bus;
-	}
-
-	bgmac->mii_bus = mii_bus;
 
 	/* Connect to the PHY */
-	snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, mii_bus->id,
+	snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id,
 		 bgmac->phyaddr);
 	phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link,
 			      PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(phy_dev)) {
-		bgmac_err(bgmac, "PHY connection failed\n");
-		err = PTR_ERR(phy_dev);
-		goto err_unregister_bus;
+		dev_err(bgmac->dev, "PHY connecton failed\n");
+		return PTR_ERR(phy_dev);
 	}
-	bgmac->phy_dev = phy_dev;
-
-	return err;
 
-err_unregister_bus:
-	mdiobus_unregister(mii_bus);
-err_free_bus:
-	mdiobus_free(mii_bus);
-	return err;
-}
-
-static void bgmac_mii_unregister(struct bgmac *bgmac)
-{
-	struct mii_bus *mii_bus = bgmac->mii_bus;
-
-	mdiobus_unregister(mii_bus);
-	mdiobus_free(mii_bus);
+	return 0;
 }
 
-/**************************************************
- * BCMA bus ops
- **************************************************/
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
-static int bgmac_probe(struct bcma_device *core)
+int bgmac_enet_probe(struct bgmac *info)
 {
 	struct net_device *net_dev;
 	struct bgmac *bgmac;
-	struct ssb_sprom *sprom = &core->bus->sprom;
-	u8 *mac;
 	int err;
 
-	switch (core->core_unit) {
-	case 0:
-		mac = sprom->et0mac;
-		break;
-	case 1:
-		mac = sprom->et1mac;
-		break;
-	case 2:
-		mac = sprom->et2mac;
-		break;
-	default:
-		pr_err("Unsupported core_unit %d\n", core->core_unit);
-		return -ENOTSUPP;
-	}
-
-	if (!is_valid_ether_addr(mac)) {
-		dev_err(&core->dev, "Invalid MAC addr: %pM\n", mac);
-		eth_random_addr(mac);
-		dev_warn(&core->dev, "Using random MAC: %pM\n", mac);
-	}
-
-	/* This (reset &) enable is not preset in specs or reference driver but
-	 * Broadcom does it in arch PCI code when enabling fake PCI device.
-	 */
-	bcma_core_enable(core, 0);
-
 	/* Allocation and references */
 	net_dev = alloc_etherdev(sizeof(*bgmac));
 	if (!net_dev)
 		return -ENOMEM;
+
 	net_dev->netdev_ops = &bgmac_netdev_ops;
-	net_dev->irq = core->irq;
 	net_dev->ethtool_ops = &bgmac_ethtool_ops;
 	bgmac = netdev_priv(net_dev);
+	memcpy(bgmac, info, sizeof(*bgmac));
 	bgmac->net_dev = net_dev;
-	bgmac->core = core;
-	bcma_set_drvdata(core, bgmac);
-
-	/* Defaults */
-	memcpy(bgmac->net_dev->dev_addr, mac, ETH_ALEN);
-
-	/* On BCM4706 we need common core to access PHY */
-	if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
-	    !core->bus->drv_gmac_cmn.core) {
-		bgmac_err(bgmac, "GMAC CMN core not found (required for BCM4706)\n");
-		err = -ENODEV;
-		goto err_netdev_free;
-	}
-	bgmac->cmn = core->bus->drv_gmac_cmn.core;
-
-	switch (core->core_unit) {
-	case 0:
-		bgmac->phyaddr = sprom->et0phyaddr;
-		break;
-	case 1:
-		bgmac->phyaddr = sprom->et1phyaddr;
-		break;
-	case 2:
-		bgmac->phyaddr = sprom->et2phyaddr;
-		break;
-	}
-	bgmac->phyaddr &= BGMAC_PHY_MASK;
-	if (bgmac->phyaddr == BGMAC_PHY_MASK) {
-		bgmac_err(bgmac, "No PHY found\n");
-		err = -ENODEV;
-		goto err_netdev_free;
+	net_dev->irq = bgmac->irq;
+	SET_NETDEV_DEV(net_dev, bgmac->dev);
+
+	if (!is_valid_ether_addr(bgmac->mac_addr)) {
+		dev_err(bgmac->dev, "Invalid MAC addr: %pM\n",
+			bgmac->mac_addr);
+		eth_random_addr(bgmac->mac_addr);
+		dev_warn(bgmac->dev, "Using random MAC: %pM\n",
+			 bgmac->mac_addr);
 	}
-	bgmac_info(bgmac, "Found PHY addr: %d%s\n", bgmac->phyaddr,
-		   bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
+	ether_addr_copy(net_dev->dev_addr, bgmac->mac_addr);
 
-	if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
-		bgmac_err(bgmac, "PCI setup not implemented\n");
-		err = -ENOTSUPP;
-		goto err_netdev_free;
-	}
+	/* This (reset &) enable is not preset in specs or reference driver but
+	 * Broadcom does it in arch PCI code when enabling fake PCI device.
+	 */
+	bgmac_clk_enable(bgmac, 0);
 
 	bgmac_chip_reset(bgmac);
 
-	/* For Northstar, we have to take all GMAC core out of reset */
-	if (bgmac_is_bcm4707_family(bgmac)) {
-		struct bcma_device *ns_core;
-		int ns_gmac;
-
-		/* Northstar has 4 GMAC cores */
-		for (ns_gmac = 0; ns_gmac < 4; ns_gmac++) {
-			/* As Northstar requirement, we have to reset all GMACs
-			 * before accessing one. bgmac_chip_reset() call
-			 * bcma_core_enable() for this core. Then the other
-			 * three GMACs didn't reset.  We do it here.
-			 */
-			ns_core = bcma_find_core_unit(core->bus,
-						      BCMA_CORE_MAC_GBIT,
-						      ns_gmac);
-			if (ns_core && !bcma_core_is_enabled(ns_core))
-				bcma_core_enable(ns_core, 0);
-		}
-	}
-
 	err = bgmac_dma_alloc(bgmac);
 	if (err) {
-		bgmac_err(bgmac, "Unable to alloc memory for DMA\n");
+		dev_err(bgmac->dev, "Unable to alloc memory for DMA\n");
 		goto err_netdev_free;
 	}
 
@@ -1659,22 +1479,14 @@ static int bgmac_probe(struct bcma_device *core)
 	if (bcm47xx_nvram_getenv("et0_no_txint", NULL, 0) == 0)
 		bgmac->int_mask &= ~BGMAC_IS_TX_MASK;
 
-	/* TODO: reset the external phy. Specs are needed */
-	bgmac_phy_reset(bgmac);
-
-	bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
-			       BGMAC_BFL_ENETROBO);
-	if (bgmac->has_robosw)
-		bgmac_warn(bgmac, "Support for Roboswitch not implemented\n");
-
-	if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
-		bgmac_warn(bgmac, "Support for ADMtek ethernet switch not implemented\n");
-
 	netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
 
-	err = bgmac_mii_register(bgmac);
+	if (!bgmac->mii_bus)
+		err = bgmac_phy_connect_direct(bgmac);
+	else
+		err = bgmac_phy_connect(bgmac);
 	if (err) {
-		bgmac_err(bgmac, "Cannot register MDIO\n");
+		dev_err(bgmac->dev, "Cannot connect to phy\n");
 		goto err_dma_free;
 	}
 
@@ -1684,64 +1496,34 @@ static int bgmac_probe(struct bcma_device *core)
 
 	err = register_netdev(bgmac->net_dev);
 	if (err) {
-		bgmac_err(bgmac, "Cannot register net device\n");
-		goto err_mii_unregister;
+		dev_err(bgmac->dev, "Cannot register net device\n");
+		goto err_phy_disconnect;
 	}
 
 	netif_carrier_off(net_dev);
 
 	return 0;
 
-err_mii_unregister:
-	bgmac_mii_unregister(bgmac);
+err_phy_disconnect:
+	phy_disconnect(net_dev->phydev);
 err_dma_free:
 	bgmac_dma_free(bgmac);
-
 err_netdev_free:
-	bcma_set_drvdata(core, NULL);
 	free_netdev(net_dev);
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(bgmac_enet_probe);
 
-static void bgmac_remove(struct bcma_device *core)
+void bgmac_enet_remove(struct bgmac *bgmac)
 {
-	struct bgmac *bgmac = bcma_get_drvdata(core);
-
 	unregister_netdev(bgmac->net_dev);
-	bgmac_mii_unregister(bgmac);
+	phy_disconnect(bgmac->net_dev->phydev);
 	netif_napi_del(&bgmac->napi);
 	bgmac_dma_free(bgmac);
-	bcma_set_drvdata(core, NULL);
 	free_netdev(bgmac->net_dev);
 }
-
-static struct bcma_driver bgmac_bcma_driver = {
-	.name		= KBUILD_MODNAME,
-	.id_table	= bgmac_bcma_tbl,
-	.probe		= bgmac_probe,
-	.remove		= bgmac_remove,
-};
-
-static int __init bgmac_init(void)
-{
-	int err;
-
-	err = bcma_driver_register(&bgmac_bcma_driver);
-	if (err)
-		return err;
-	pr_info("Broadcom 47xx GBit MAC driver loaded\n");
-
-	return 0;
-}
-
-static void __exit bgmac_exit(void)
-{
-	bcma_driver_unregister(&bgmac_bcma_driver);
-}
-
-module_init(bgmac_init)
-module_exit(bgmac_exit)
+EXPORT_SYMBOL_GPL(bgmac_enet_remove);
 
 MODULE_AUTHOR("RafaÅ MiÅecki");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 9a03c142b742..24a250267b88 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -1,19 +1,6 @@
 #ifndef _BGMAC_H
 #define _BGMAC_H
 
-#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
-
-#define bgmac_err(bgmac, fmt, ...) \
-	dev_err(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
-#define bgmac_warn(bgmac, fmt, ...) \
-	dev_warn(&(bgmac)->core->dev, fmt,  ##__VA_ARGS__)
-#define bgmac_info(bgmac, fmt, ...) \
-	dev_info(&(bgmac)->core->dev, fmt,  ##__VA_ARGS__)
-#define bgmac_dbg(bgmac, fmt, ...) \
-	dev_dbg(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
-
-#include <linux/bcma/bcma.h>
-#include <linux/brcmphy.h>
 #include <linux/netdevice.h>
 
 #define BGMAC_DEV_CTL				0x000
@@ -123,7 +110,7 @@
 #define BGMAC_TX_LEN_1024_TO_1522		0x334
 #define BGMAC_TX_LEN_1523_TO_2047		0x338
 #define BGMAC_TX_LEN_2048_TO_4095		0x33c
-#define BGMAC_TX_LEN_4095_TO_8191		0x340
+#define BGMAC_TX_LEN_4096_TO_8191		0x340
 #define BGMAC_TX_LEN_8192_TO_MAX		0x344
 #define BGMAC_TX_JABBER_PKTS			0x348		/* Error */
 #define BGMAC_TX_OVERSIZE_PKTS			0x34c		/* Error */
@@ -166,7 +153,7 @@
 #define BGMAC_RX_LEN_1024_TO_1522		0x3e4
 #define BGMAC_RX_LEN_1523_TO_2047		0x3e8
 #define BGMAC_RX_LEN_2048_TO_4095		0x3ec
-#define BGMAC_RX_LEN_4095_TO_8191		0x3f0
+#define BGMAC_RX_LEN_4096_TO_8191		0x3f0
 #define BGMAC_RX_LEN_8192_TO_MAX		0x3f4
 #define BGMAC_RX_JABBER_PKTS			0x3f8		/* Error */
 #define BGMAC_RX_OVERSIZE_PKTS			0x3fc		/* Error */
@@ -201,7 +188,6 @@
 #define  BGMAC_CMDCFG_HD_SHIFT			10
 #define  BGMAC_CMDCFG_SR_REV0			0x00000800	/* Set to reset mode, for core rev 0-3 */
 #define  BGMAC_CMDCFG_SR_REV4			0x00002000	/* Set to reset mode, for core rev >= 4 */
-#define  BGMAC_CMDCFG_SR(rev)  ((rev >= 4) ? BGMAC_CMDCFG_SR_REV4 : BGMAC_CMDCFG_SR_REV0)
 #define  BGMAC_CMDCFG_ML			0x00008000	/* Set to activate mac loopback mode */
 #define  BGMAC_CMDCFG_AE			0x00400000
 #define  BGMAC_CMDCFG_CFE			0x00800000
@@ -387,6 +373,24 @@
 
 #define ETHER_MAX_LEN   1518
 
+/* Feature Flags */
+#define BGMAC_FEAT_TX_MASK_SETUP	BIT(0)
+#define BGMAC_FEAT_RX_MASK_SETUP	BIT(1)
+#define BGMAC_FEAT_IOST_ATTACHED	BIT(2)
+#define BGMAC_FEAT_NO_RESET		BIT(3)
+#define BGMAC_FEAT_MISC_PLL_REQ		BIT(4)
+#define BGMAC_FEAT_SW_TYPE_PHY		BIT(5)
+#define BGMAC_FEAT_SW_TYPE_EPHYRMII	BIT(6)
+#define BGMAC_FEAT_SW_TYPE_RGMII	BIT(7)
+#define BGMAC_FEAT_CMN_PHY_CTL		BIT(8)
+#define BGMAC_FEAT_FLW_CTRL1		BIT(9)
+#define BGMAC_FEAT_FLW_CTRL2		BIT(10)
+#define BGMAC_FEAT_SET_RXQ_CLK		BIT(11)
+#define BGMAC_FEAT_CLKCTLST		BIT(12)
+#define BGMAC_FEAT_NO_CLR_MIB		BIT(13)
+#define BGMAC_FEAT_FORCE_SPEED_2500	BIT(14)
+#define BGMAC_FEAT_CMDCFG_SR_REV4	BIT(15)
+
 struct bgmac_slot_info {
 	union {
 		struct sk_buff *skb;
@@ -436,12 +440,26 @@ struct bgmac_rx_header {
 };
 
 struct bgmac {
-	struct bcma_device *core;
-	struct bcma_device *cmn; /* Reference to CMN core for BCM4706 */
+	union {
+		struct {
+			void *base;
+			void *idm_base;
+		} plat;
+		struct {
+			struct bcma_device *core;
+			/* Reference to CMN core for BCM4706 */
+			struct bcma_device *cmn;
+		} bcma;
+	};
+
+	struct device *dev;
+	struct device *dma_dev;
+	unsigned char mac_addr[ETH_ALEN];
+	u32 feature_flags;
+
 	struct net_device *net_dev;
 	struct napi_struct napi;
 	struct mii_bus *mii_bus;
-	struct phy_device *phy_dev;
 
 	/* DMA */
 	struct bgmac_dma_ring tx_ring[BGMAC_MAX_TX_RINGS];
@@ -453,6 +471,7 @@ struct bgmac {
 	u32 mib_rx_regs[BGMAC_NUM_MIB_RX_REGS];
 
 	/* Int */
+	int irq;
 	u32 int_mask;
 
 	/* Current MAC state */
@@ -463,16 +482,71 @@ struct bgmac {
 	bool has_robosw;
 
 	bool loopback;
+
+	u32 (*read)(struct bgmac *bgmac, u16 offset);
+	void (*write)(struct bgmac *bgmac, u16 offset, u32 value);
+	u32 (*idm_read)(struct bgmac *bgmac, u16 offset);
+	void (*idm_write)(struct bgmac *bgmac, u16 offset, u32 value);
+	bool (*clk_enabled)(struct bgmac *bgmac);
+	void (*clk_enable)(struct bgmac *bgmac, u32 flags);
+	void (*cco_ctl_maskset)(struct bgmac *bgmac, u32 offset, u32 mask,
+				u32 set);
+	u32 (*get_bus_clock)(struct bgmac *bgmac);
+	void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask,
+			      u32 set);
 };
 
+int bgmac_enet_probe(struct bgmac *info);
+void bgmac_enet_remove(struct bgmac *bgmac);
+
+struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr);
+void bcma_mdio_mii_unregister(struct mii_bus *mii_bus);
+
 static inline u32 bgmac_read(struct bgmac *bgmac, u16 offset)
 {
-	return bcma_read32(bgmac->core, offset);
+	return bgmac->read(bgmac, offset);
 }
 
 static inline void bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
 {
-	bcma_write32(bgmac->core, offset, value);
+	bgmac->write(bgmac, offset, value);
+}
+
+static inline u32 bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+	return bgmac->idm_read(bgmac, offset);
+}
+
+static inline void bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	bgmac->idm_write(bgmac, offset, value);
+}
+
+static inline bool bgmac_clk_enabled(struct bgmac *bgmac)
+{
+	return bgmac->clk_enabled(bgmac);
+}
+
+static inline void bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+	bgmac->clk_enable(bgmac, flags);
+}
+
+static inline void bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+					 u32 mask, u32 set)
+{
+	bgmac->cco_ctl_maskset(bgmac, offset, mask, set);
+}
+
+static inline u32 bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+	return bgmac->get_bus_clock(bgmac);
+}
+
+static inline void bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset,
+				       u32 mask, u32 set)
+{
+	bgmac->cmn_maskset32(bgmac, offset, mask, set);
 }
 
 static inline void bgmac_maskset(struct bgmac *bgmac, u16 offset, u32 mask,
@@ -490,5 +564,4 @@ static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set)
 {
 	bgmac_maskset(bgmac, offset, ~0, set);
 }
-
 #endif /* _BGMAC_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index a59d55e25d5f..97e892511666 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -59,9 +59,6 @@
 #include <linux/semaphore.h>
 #include <linux/stringify.h>
 #include <linux/vmalloc.h>
-#if IS_ENABLED(CONFIG_BNX2X_GENEVE)
-#include <net/geneve.h>
-#endif
 #include "bnx2x.h"
 #include "bnx2x_init.h"
 #include "bnx2x_init_ops.h"
@@ -10076,7 +10073,6 @@ static void bnx2x_parity_recover(struct bnx2x *bp)
 	}
 }
 
-#if defined(CONFIG_BNX2X_VXLAN) || IS_ENABLED(CONFIG_BNX2X_GENEVE)
 static int bnx2x_udp_port_update(struct bnx2x *bp)
 {
 	struct bnx2x_func_switch_update_params *switch_update_params;
@@ -10177,47 +10173,42 @@ static void __bnx2x_del_udp_port(struct bnx2x *bp, u16 port,
 		DP(BNX2X_MSG_SP, "Deleted UDP tunnel [%d] port %d\n",
 		   type, port);
 }
-#endif
-
-#ifdef CONFIG_BNX2X_VXLAN
-static void bnx2x_add_vxlan_port(struct net_device *netdev,
-				 sa_family_t sa_family, __be16 port)
-{
-	struct bnx2x *bp = netdev_priv(netdev);
-	u16 t_port = ntohs(port);
-
-	__bnx2x_add_udp_port(bp, t_port, BNX2X_UDP_PORT_VXLAN);
-}
-
-static void bnx2x_del_vxlan_port(struct net_device *netdev,
-				 sa_family_t sa_family, __be16 port)
-{
-	struct bnx2x *bp = netdev_priv(netdev);
-	u16 t_port = ntohs(port);
-
-	__bnx2x_del_udp_port(bp, t_port, BNX2X_UDP_PORT_VXLAN);
-}
-#endif
 
-#if IS_ENABLED(CONFIG_BNX2X_GENEVE)
-static void bnx2x_add_geneve_port(struct net_device *netdev,
-				  sa_family_t sa_family, __be16 port)
+static void bnx2x_udp_tunnel_add(struct net_device *netdev,
+				 struct udp_tunnel_info *ti)
 {
 	struct bnx2x *bp = netdev_priv(netdev);
-	u16 t_port = ntohs(port);
+	u16 t_port = ntohs(ti->port);
 
-	__bnx2x_add_udp_port(bp, t_port, BNX2X_UDP_PORT_GENEVE);
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		__bnx2x_add_udp_port(bp, t_port, BNX2X_UDP_PORT_VXLAN);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		__bnx2x_add_udp_port(bp, t_port, BNX2X_UDP_PORT_GENEVE);
+		break;
+	default:
+		break;
+	}
 }
 
-static void bnx2x_del_geneve_port(struct net_device *netdev,
-				  sa_family_t sa_family, __be16 port)
+static void bnx2x_udp_tunnel_del(struct net_device *netdev,
+				 struct udp_tunnel_info *ti)
 {
 	struct bnx2x *bp = netdev_priv(netdev);
-	u16 t_port = ntohs(port);
+	u16 t_port = ntohs(ti->port);
 
-	__bnx2x_del_udp_port(bp, t_port, BNX2X_UDP_PORT_GENEVE);
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		__bnx2x_del_udp_port(bp, t_port, BNX2X_UDP_PORT_VXLAN);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		__bnx2x_del_udp_port(bp, t_port, BNX2X_UDP_PORT_GENEVE);
+		break;
+	default:
+		break;
+	}
 }
-#endif
 
 static int bnx2x_close(struct net_device *dev);
 
@@ -10325,7 +10316,6 @@ sp_rtnl_not_reset:
 			       &bp->sp_rtnl_state))
 		bnx2x_update_mng_version(bp);
 
-#if defined(CONFIG_BNX2X_VXLAN) || IS_ENABLED(CONFIG_BNX2X_GENEVE)
 	if (test_and_clear_bit(BNX2X_SP_RTNL_CHANGE_UDP_PORT,
 			       &bp->sp_rtnl_state)) {
 		if (bnx2x_udp_port_update(bp)) {
@@ -10335,20 +10325,14 @@ sp_rtnl_not_reset:
 			       BNX2X_UDP_PORT_MAX);
 		} else {
 			/* Since we don't store additional port information,
-			 * if no port is configured for any feature ask for
+			 * if no ports are configured for any feature ask for
 			 * information about currently configured ports.
 			 */
-#ifdef CONFIG_BNX2X_VXLAN
-			if (!bp->udp_tunnel_ports[BNX2X_UDP_PORT_VXLAN].count)
-				vxlan_get_rx_port(bp->dev);
-#endif
-#if IS_ENABLED(CONFIG_BNX2X_GENEVE)
-			if (!bp->udp_tunnel_ports[BNX2X_UDP_PORT_GENEVE].count)
-				geneve_get_rx_port(bp->dev);
-#endif
+			if (!bp->udp_tunnel_ports[BNX2X_UDP_PORT_VXLAN].count &&
+			    !bp->udp_tunnel_ports[BNX2X_UDP_PORT_GENEVE].count)
+				udp_tunnel_get_rx_info(bp->dev);
 		}
 	}
-#endif
 
 	/* work which needs rtnl lock not-taken (as it takes the lock itself and
 	 * can be called from other contexts as well)
@@ -12551,14 +12535,8 @@ static int bnx2x_open(struct net_device *dev)
 	if (rc)
 		return rc;
 
-#ifdef CONFIG_BNX2X_VXLAN
-	if (IS_PF(bp))
-		vxlan_get_rx_port(dev);
-#endif
-#if IS_ENABLED(CONFIG_BNX2X_GENEVE)
 	if (IS_PF(bp))
-		geneve_get_rx_port(dev);
-#endif
+		udp_tunnel_get_rx_info(dev);
 
 	return 0;
 }
@@ -13045,14 +13023,8 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 	.ndo_get_phys_port_id	= bnx2x_get_phys_port_id,
 	.ndo_set_vf_link_state	= bnx2x_set_vf_link_state,
 	.ndo_features_check	= bnx2x_features_check,
-#ifdef CONFIG_BNX2X_VXLAN
-	.ndo_add_vxlan_port	= bnx2x_add_vxlan_port,
-	.ndo_del_vxlan_port	= bnx2x_del_vxlan_port,
-#endif
-#if IS_ENABLED(CONFIG_BNX2X_GENEVE)
-	.ndo_add_geneve_port	= bnx2x_add_geneve_port,
-	.ndo_del_geneve_port	= bnx2x_del_geneve_port,
-#endif
+	.ndo_udp_tunnel_add	= bnx2x_udp_tunnel_add,
+	.ndo_udp_tunnel_del	= bnx2x_udp_tunnel_del,
 };
 
 static int bnx2x_set_coherency_mask(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c777cde85ce4..2cf79100c9cb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -37,9 +37,7 @@
 #include <net/udp.h>
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
-#if defined(CONFIG_VXLAN) || defined(CONFIG_VXLAN_MODULE)
-#include <net/vxlan.h>
-#endif
+#include <net/udp_tunnel.h>
 #ifdef CONFIG_NET_RX_BUSY_POLL
 #include <net/busy_poll.h>
 #endif
@@ -75,12 +73,32 @@ enum board_idx {
 	BCM57301,
 	BCM57302,
 	BCM57304,
+	BCM57417_NPAR,
+	BCM58700,
+	BCM57311,
+	BCM57312,
 	BCM57402,
 	BCM57404,
 	BCM57406,
+	BCM57402_NPAR,
+	BCM57407,
+	BCM57412,
+	BCM57414,
+	BCM57416,
+	BCM57417,
+	BCM57412_NPAR,
 	BCM57314,
+	BCM57417_SFP,
+	BCM57416_SFP,
+	BCM57404_NPAR,
+	BCM57406_NPAR,
+	BCM57407_SFP,
+	BCM57414_NPAR,
+	BCM57416_NPAR,
 	BCM57304_VF,
 	BCM57404_VF,
+	BCM57414_VF,
+	BCM57314_VF,
 };
 
 /* indexed by enum above */
@@ -90,25 +108,65 @@ static const struct {
 	{ "Broadcom BCM57301 NetXtreme-C Single-port 10Gb Ethernet" },
 	{ "Broadcom BCM57302 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" },
 	{ "Broadcom BCM57304 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	{ "Broadcom BCM57417 NetXtreme-E Ethernet Partition" },
+	{ "Broadcom BCM58700 Nitro 4-port 1Gb/2.5Gb/10Gb Ethernet" },
+	{ "Broadcom BCM57311 NetXtreme-C Single-port 10Gb Ethernet" },
+	{ "Broadcom BCM57312 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" },
 	{ "Broadcom BCM57402 NetXtreme-E Dual-port 10Gb Ethernet" },
 	{ "Broadcom BCM57404 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" },
 	{ "Broadcom BCM57406 NetXtreme-E Dual-port 10GBase-T Ethernet" },
+	{ "Broadcom BCM57402 NetXtreme-E Ethernet Partition" },
+	{ "Broadcom BCM57407 NetXtreme-E Dual-port 10GBase-T Ethernet" },
+	{ "Broadcom BCM57412 NetXtreme-E Dual-port 10Gb Ethernet" },
+	{ "Broadcom BCM57414 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" },
+	{ "Broadcom BCM57416 NetXtreme-E Dual-port 10GBase-T Ethernet" },
+	{ "Broadcom BCM57417 NetXtreme-E Dual-port 10GBase-T Ethernet" },
+	{ "Broadcom BCM57412 NetXtreme-E Ethernet Partition" },
 	{ "Broadcom BCM57314 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	{ "Broadcom BCM57417 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" },
+	{ "Broadcom BCM57416 NetXtreme-E Dual-port 10Gb Ethernet" },
+	{ "Broadcom BCM57404 NetXtreme-E Ethernet Partition" },
+	{ "Broadcom BCM57406 NetXtreme-E Ethernet Partition" },
+	{ "Broadcom BCM57407 NetXtreme-E Dual-port 25Gb Ethernet" },
+	{ "Broadcom BCM57414 NetXtreme-E Ethernet Partition" },
+	{ "Broadcom BCM57416 NetXtreme-E Ethernet Partition" },
 	{ "Broadcom BCM57304 NetXtreme-C Ethernet Virtual Function" },
 	{ "Broadcom BCM57404 NetXtreme-E Ethernet Virtual Function" },
+	{ "Broadcom BCM57414 NetXtreme-E Ethernet Virtual Function" },
+	{ "Broadcom BCM57314 NetXtreme-E Ethernet Virtual Function" },
 };
 
 static const struct pci_device_id bnxt_pci_tbl[] = {
 	{ PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 },
 	{ PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 },
 	{ PCI_VDEVICE(BROADCOM, 0x16ca), .driver_data = BCM57304 },
+	{ PCI_VDEVICE(BROADCOM, 0x16cc), .driver_data = BCM57417_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16cd), .driver_data = BCM58700 },
+	{ PCI_VDEVICE(BROADCOM, 0x16ce), .driver_data = BCM57311 },
+	{ PCI_VDEVICE(BROADCOM, 0x16cf), .driver_data = BCM57312 },
 	{ PCI_VDEVICE(BROADCOM, 0x16d0), .driver_data = BCM57402 },
 	{ PCI_VDEVICE(BROADCOM, 0x16d1), .driver_data = BCM57404 },
 	{ PCI_VDEVICE(BROADCOM, 0x16d2), .driver_data = BCM57406 },
+	{ PCI_VDEVICE(BROADCOM, 0x16d4), .driver_data = BCM57402_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16d5), .driver_data = BCM57407 },
+	{ PCI_VDEVICE(BROADCOM, 0x16d6), .driver_data = BCM57412 },
+	{ PCI_VDEVICE(BROADCOM, 0x16d7), .driver_data = BCM57414 },
+	{ PCI_VDEVICE(BROADCOM, 0x16d8), .driver_data = BCM57416 },
+	{ PCI_VDEVICE(BROADCOM, 0x16d9), .driver_data = BCM57417 },
+	{ PCI_VDEVICE(BROADCOM, 0x16de), .driver_data = BCM57412_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16df), .driver_data = BCM57314 },
+	{ PCI_VDEVICE(BROADCOM, 0x16e2), .driver_data = BCM57417_SFP },
+	{ PCI_VDEVICE(BROADCOM, 0x16e3), .driver_data = BCM57416_SFP },
+	{ PCI_VDEVICE(BROADCOM, 0x16e7), .driver_data = BCM57404_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16e8), .driver_data = BCM57406_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16e9), .driver_data = BCM57407_SFP },
+	{ PCI_VDEVICE(BROADCOM, 0x16ec), .driver_data = BCM57414_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16ee), .driver_data = BCM57416_NPAR },
 #ifdef CONFIG_BNXT_SRIOV
 	{ PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = BCM57304_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = BCM57404_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = BCM57414_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = BCM57314_VF },
 #endif
 	{ 0 }
 };
@@ -125,12 +183,14 @@ static const u16 bnxt_async_events_arr[] = {
 	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE,
 	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD,
 	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED,
+	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE,
 	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE,
 };
 
 static bool bnxt_vf_pciid(enum board_idx idx)
 {
-	return (idx == BCM57304_VF || idx == BCM57404_VF);
+	return (idx == BCM57304_VF || idx == BCM57404_VF ||
+		idx == BCM57314_VF || idx == BCM57414_VF);
 }
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
@@ -920,6 +980,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	}
 	tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2);
 	tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata);
+	tpa_info->hdr_info = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_hdr_info);
 
 	rxr->rx_prod = NEXT_RX(prod);
 	cons = NEXT_RX(cons);
@@ -938,32 +999,102 @@ static void bnxt_abort_tpa(struct bnxt *bp, struct bnxt_napi *bnapi,
 		bnxt_reuse_rx_agg_bufs(bnapi, cp_cons, agg_bufs);
 }
 
+static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info,
+					   int payload_off, int tcp_ts,
+					   struct sk_buff *skb)
+{
+#ifdef CONFIG_INET
+	struct tcphdr *th;
+	int len, nw_off;
+	u16 outer_ip_off, inner_ip_off, inner_mac_off;
+	u32 hdr_info = tpa_info->hdr_info;
+	bool loopback = false;
+
+	inner_ip_off = BNXT_TPA_INNER_L3_OFF(hdr_info);
+	inner_mac_off = BNXT_TPA_INNER_L2_OFF(hdr_info);
+	outer_ip_off = BNXT_TPA_OUTER_L3_OFF(hdr_info);
+
+	/* If the packet is an internal loopback packet, the offsets will
+	 * have an extra 4 bytes.
+	 */
+	if (inner_mac_off == 4) {
+		loopback = true;
+	} else if (inner_mac_off > 4) {
+		__be16 proto = *((__be16 *)(skb->data + inner_ip_off -
+					    ETH_HLEN - 2));
+
+		/* We only support inner iPv4/ipv6.  If we don't see the
+		 * correct protocol ID, it must be a loopback packet where
+		 * the offsets are off by 4.
+		 */
+		if (proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6))
+			loopback = true;
+	}
+	if (loopback) {
+		/* internal loopback packet, subtract all offsets by 4 */
+		inner_ip_off -= 4;
+		inner_mac_off -= 4;
+		outer_ip_off -= 4;
+	}
+
+	nw_off = inner_ip_off - ETH_HLEN;
+	skb_set_network_header(skb, nw_off);
+	if (tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_IP_TYPE) {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		skb_set_transport_header(skb, nw_off + sizeof(struct ipv6hdr));
+		len = skb->len - skb_transport_offset(skb);
+		th = tcp_hdr(skb);
+		th->check = ~tcp_v6_check(len, &iph->saddr, &iph->daddr, 0);
+	} else {
+		struct iphdr *iph = ip_hdr(skb);
+
+		skb_set_transport_header(skb, nw_off + sizeof(struct iphdr));
+		len = skb->len - skb_transport_offset(skb);
+		th = tcp_hdr(skb);
+		th->check = ~tcp_v4_check(len, iph->saddr, iph->daddr, 0);
+	}
+
+	if (inner_mac_off) { /* tunnel */
+		struct udphdr *uh = NULL;
+		__be16 proto = *((__be16 *)(skb->data + outer_ip_off -
+					    ETH_HLEN - 2));
+
+		if (proto == htons(ETH_P_IP)) {
+			struct iphdr *iph = (struct iphdr *)skb->data;
+
+			if (iph->protocol == IPPROTO_UDP)
+				uh = (struct udphdr *)(iph + 1);
+		} else {
+			struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+
+			if (iph->nexthdr == IPPROTO_UDP)
+				uh = (struct udphdr *)(iph + 1);
+		}
+		if (uh) {
+			if (uh->check)
+				skb_shinfo(skb)->gso_type |=
+					SKB_GSO_UDP_TUNNEL_CSUM;
+			else
+				skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+		}
+	}
+#endif
+	return skb;
+}
+
 #define BNXT_IPV4_HDR_SIZE	(sizeof(struct iphdr) + sizeof(struct tcphdr))
 #define BNXT_IPV6_HDR_SIZE	(sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
 
-static inline struct sk_buff *bnxt_gro_skb(struct bnxt_tpa_info *tpa_info,
-					   struct rx_tpa_end_cmp *tpa_end,
-					   struct rx_tpa_end_cmp_ext *tpa_end1,
+static struct sk_buff *bnxt_gro_func_5730x(struct bnxt_tpa_info *tpa_info,
+					   int payload_off, int tcp_ts,
 					   struct sk_buff *skb)
 {
 #ifdef CONFIG_INET
 	struct tcphdr *th;
-	int payload_off, tcp_opt_len = 0;
-	int len, nw_off;
-	u16 segs;
-
-	segs = TPA_END_TPA_SEGS(tpa_end);
-	if (segs == 1)
-		return skb;
+	int len, nw_off, tcp_opt_len;
 
-	NAPI_GRO_CB(skb)->count = segs;
-	skb_shinfo(skb)->gso_size =
-		le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
-	skb_shinfo(skb)->gso_type = tpa_info->gso_type;
-	payload_off = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
-		       RX_TPA_END_CMP_PAYLOAD_OFFSET) >>
-		      RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT;
-	if (TPA_END_GRO_TS(tpa_end))
+	if (tcp_ts)
 		tcp_opt_len = 12;
 
 	if (tpa_info->gso_type == SKB_GSO_TCPV4) {
@@ -1020,6 +1151,32 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt_tpa_info *tpa_info,
 	return skb;
 }
 
+static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp,
+					   struct bnxt_tpa_info *tpa_info,
+					   struct rx_tpa_end_cmp *tpa_end,
+					   struct rx_tpa_end_cmp_ext *tpa_end1,
+					   struct sk_buff *skb)
+{
+#ifdef CONFIG_INET
+	int payload_off;
+	u16 segs;
+
+	segs = TPA_END_TPA_SEGS(tpa_end);
+	if (segs == 1)
+		return skb;
+
+	NAPI_GRO_CB(skb)->count = segs;
+	skb_shinfo(skb)->gso_size =
+		le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
+	skb_shinfo(skb)->gso_type = tpa_info->gso_type;
+	payload_off = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
+		       RX_TPA_END_CMP_PAYLOAD_OFFSET) >>
+		      RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT;
+	skb = bp->gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb);
+#endif
+	return skb;
+}
+
 static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 					   struct bnxt_napi *bnapi,
 					   u32 *raw_cons,
@@ -1130,7 +1287,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 	}
 
 	if (TPA_END_GRO(tpa_end))
-		skb = bnxt_gro_skb(tpa_info, tpa_end, tpa_end1, skb);
+		skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb);
 
 	return skb;
 }
@@ -1358,6 +1515,11 @@ static int bnxt_async_event_process(struct bnxt *bp,
 		set_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event);
 		break;
 	}
+	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE:
+		if (BNXT_PF(bp))
+			goto async_event_process_exit;
+		set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event);
+		break;
 	default:
 		netdev_err(bp->dev, "unhandled ASYNC event (id 0x%x)\n",
 			   event_id);
@@ -1536,6 +1698,76 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 	return rx_pkts;
 }
 
+static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
+{
+	struct bnxt_napi *bnapi = container_of(napi, struct bnxt_napi, napi);
+	struct bnxt *bp = bnapi->bp;
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+	struct tx_cmp *txcmp;
+	struct rx_cmp_ext *rxcmp1;
+	u32 cp_cons, tmp_raw_cons;
+	u32 raw_cons = cpr->cp_raw_cons;
+	u32 rx_pkts = 0;
+	bool agg_event = false;
+
+	while (1) {
+		int rc;
+
+		cp_cons = RING_CMP(raw_cons);
+		txcmp = &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+		if (!TX_CMP_VALID(txcmp, raw_cons))
+			break;
+
+		if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
+			tmp_raw_cons = NEXT_RAW_CMP(raw_cons);
+			cp_cons = RING_CMP(tmp_raw_cons);
+			rxcmp1 = (struct rx_cmp_ext *)
+			  &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+			if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+				break;
+
+			/* force an error to recycle the buffer */
+			rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+				cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
+
+			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event);
+			if (likely(rc == -EIO))
+				rx_pkts++;
+			else if (rc == -EBUSY)	/* partial completion */
+				break;
+		} else if (unlikely(TX_CMP_TYPE(txcmp) ==
+				    CMPL_BASE_TYPE_HWRM_DONE)) {
+			bnxt_hwrm_handler(bp, txcmp);
+		} else {
+			netdev_err(bp->dev,
+				   "Invalid completion received on special ring\n");
+		}
+		raw_cons = NEXT_RAW_CMP(raw_cons);
+
+		if (rx_pkts == budget)
+			break;
+	}
+
+	cpr->cp_raw_cons = raw_cons;
+	BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
+	writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell);
+	writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell);
+
+	if (agg_event) {
+		writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell);
+		writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell);
+	}
+
+	if (!bnxt_has_work(bp, cpr) && rx_pkts < budget) {
+		napi_complete(napi);
+		BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons);
+	}
+	return rx_pkts;
+}
+
 static int bnxt_poll(struct napi_struct *napi, int budget)
 {
 	struct bnxt_napi *bnapi = container_of(napi, struct bnxt_napi, napi);
@@ -2208,6 +2440,9 @@ static int bnxt_alloc_vnics(struct bnxt *bp)
 		num_vnics += bp->rx_nr_rings;
 #endif
 
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+		num_vnics++;
+
 	bp->vnic_info = kcalloc(num_vnics, sizeof(struct bnxt_vnic_info),
 				GFP_KERNEL);
 	if (!bp->vnic_info)
@@ -2225,7 +2460,8 @@ static void bnxt_init_vnics(struct bnxt *bp)
 		struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
 
 		vnic->fw_vnic_id = INVALID_HW_RING_ID;
-		vnic->fw_rss_cos_lb_ctx = INVALID_HW_RING_ID;
+		vnic->fw_rss_cos_lb_ctx[0] = INVALID_HW_RING_ID;
+		vnic->fw_rss_cos_lb_ctx[1] = INVALID_HW_RING_ID;
 		vnic->fw_l2_ctx_id = INVALID_HW_RING_ID;
 
 		if (bp->vnic_info[i].rss_hash_key) {
@@ -2262,7 +2498,7 @@ static void bnxt_set_tpa_flags(struct bnxt *bp)
 	bp->flags &= ~BNXT_FLAG_TPA;
 	if (bp->dev->features & NETIF_F_LRO)
 		bp->flags |= BNXT_FLAG_LRO;
-	if ((bp->dev->features & NETIF_F_GRO) && (bp->pdev->revision > 0))
+	if (bp->dev->features & NETIF_F_GRO)
 		bp->flags |= BNXT_FLAG_GRO;
 }
 
@@ -2529,7 +2765,7 @@ static int bnxt_alloc_stats(struct bnxt *bp)
 		cpr->hw_stats_ctx_id = INVALID_STATS_CTX_ID;
 	}
 
-	if (BNXT_PF(bp)) {
+	if (BNXT_PF(bp) && bp->chip_num != CHIP_NUM_58700) {
 		bp->hw_port_stats_size = sizeof(struct rx_port_stats) +
 					 sizeof(struct tx_port_stats) + 1024;
 
@@ -3031,7 +3267,7 @@ static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[fltr->rxq + 1];
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_NTUPLE_FILTER_ALLOC, -1, -1);
-	req.l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[0];
+	req.l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[fltr->l2_fltr_idx];
 
 	req.enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS);
 
@@ -3068,8 +3304,10 @@ static int bnxt_hwrm_set_vnic_filter(struct bnxt *bp, u16 vnic_id, u16 idx,
 	struct hwrm_cfa_l2_filter_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_L2_FILTER_ALLOC, -1, -1);
-	req.flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX |
-				CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST);
+	req.flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX);
+	if (!BNXT_CHIP_TYPE_NITRO_A0(bp))
+		req.flags |=
+			cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST);
 	req.dst_id = cpu_to_le16(bp->vnic_info[vnic_id].fw_vnic_id);
 	req.enables =
 		cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR |
@@ -3176,7 +3414,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
 	struct hwrm_vnic_rss_cfg_input req = {0};
 
-	if (vnic->fw_rss_cos_lb_ctx == INVALID_HW_RING_ID)
+	if (vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID)
 		return 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
@@ -3188,10 +3426,14 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
 
 		req.hash_type = cpu_to_le32(vnic->hash_type);
 
-		if (vnic->flags & BNXT_VNIC_RSS_FLAG)
-			max_rings = bp->rx_nr_rings;
-		else
+		if (vnic->flags & BNXT_VNIC_RSS_FLAG) {
+			if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+				max_rings = bp->rx_nr_rings - 1;
+			else
+				max_rings = bp->rx_nr_rings;
+		} else {
 			max_rings = 1;
+		}
 
 		/* Fill the RSS indirection table with ring group ids */
 		for (i = 0, j = 0; i < HW_HASH_INDEX_SIZE; i++, j++) {
@@ -3204,7 +3446,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
 		req.hash_key_tbl_addr =
 			cpu_to_le64(vnic->rss_hash_key_dma_addr);
 	}
-	req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx);
+	req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
@@ -3227,32 +3469,35 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id)
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
-static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id)
+static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id,
+					u16 ctx_idx)
 {
 	struct hwrm_vnic_rss_cos_lb_ctx_free_input req = {0};
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_COS_LB_CTX_FREE, -1, -1);
 	req.rss_cos_lb_ctx_id =
-		cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx);
+		cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx]);
 
 	hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx = INVALID_HW_RING_ID;
+	bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID;
 }
 
 static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp)
 {
-	int i;
+	int i, j;
 
 	for (i = 0; i < bp->nr_vnics; i++) {
 		struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
 
-		if (vnic->fw_rss_cos_lb_ctx != INVALID_HW_RING_ID)
-			bnxt_hwrm_vnic_ctx_free_one(bp, i);
+		for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++) {
+			if (vnic->fw_rss_cos_lb_ctx[j] != INVALID_HW_RING_ID)
+				bnxt_hwrm_vnic_ctx_free_one(bp, i, j);
+		}
 	}
 	bp->rsscos_nr_ctxs = 0;
 }
 
-static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id)
+static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx)
 {
 	int rc;
 	struct hwrm_vnic_rss_cos_lb_ctx_alloc_input req = {0};
@@ -3265,7 +3510,7 @@ static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id)
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc)
-		bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx =
+		bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] =
 			le16_to_cpu(resp->rss_cos_lb_ctx_id);
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
@@ -3277,17 +3522,34 @@ static int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 	unsigned int ring = 0, grp_idx;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
 	struct hwrm_vnic_cfg_input req = {0};
+	u16 def_vlan = 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_CFG, -1, -1);
+
+	req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP);
 	/* Only RSS support for now TBD: COS & LB */
-	req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP |
-				  VNIC_CFG_REQ_ENABLES_RSS_RULE);
-	req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx);
-	req.cos_rule = cpu_to_le16(0xffff);
+	if (vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID) {
+		req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
+		req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
+					   VNIC_CFG_REQ_ENABLES_MRU);
+	} else {
+		req.rss_rule = cpu_to_le16(0xffff);
+	}
+
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp) &&
+	    (vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID)) {
+		req.cos_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[1]);
+		req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_COS_RULE);
+	} else {
+		req.cos_rule = cpu_to_le16(0xffff);
+	}
+
 	if (vnic->flags & BNXT_VNIC_RSS_FLAG)
 		ring = 0;
 	else if (vnic->flags & BNXT_VNIC_RFS_FLAG)
 		ring = vnic_id - 1;
+	else if ((vnic_id == 1) && BNXT_CHIP_TYPE_NITRO_A0(bp))
+		ring = bp->rx_nr_rings - 1;
 
 	grp_idx = bp->rx_ring[ring].bnapi->index;
 	req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
@@ -3297,7 +3559,11 @@ static int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 	req.mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + ETH_FCS_LEN +
 			      VLAN_HLEN);
 
-	if (bp->flags & BNXT_FLAG_STRIP_VLAN)
+#ifdef CONFIG_BNXT_SRIOV
+	if (BNXT_VF(bp))
+		def_vlan = bp->vf.vlan;
+#endif
+	if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan)
 		req.flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE);
 
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
@@ -3351,7 +3617,8 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id,
 					bp->grp_info[grp_idx].fw_grp_id;
 	}
 
-	bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx = INVALID_HW_RING_ID;
+	bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[0] = INVALID_HW_RING_ID;
+	bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[1] = INVALID_HW_RING_ID;
 	if (vnic_id == 0)
 		req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT);
 
@@ -3784,6 +4051,9 @@ static int bnxt_hwrm_stat_ctx_free(struct bnxt *bp)
 	if (!bp->bnapi)
 		return 0;
 
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+		return 0;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_FREE, -1, -1);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
@@ -3812,9 +4082,12 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
 	struct hwrm_stat_ctx_alloc_input req = {0};
 	struct hwrm_stat_ctx_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+		return 0;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
 
-	req.update_period_ms = cpu_to_le32(1000);
+	req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -3836,6 +4109,39 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
 	return 0;
 }
 
+static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
+{
+	struct hwrm_func_qcfg_input req = {0};
+	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
+	req.fid = cpu_to_le16(0xffff);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		goto func_qcfg_exit;
+
+#ifdef CONFIG_BNXT_SRIOV
+	if (BNXT_VF(bp)) {
+		struct bnxt_vf_info *vf = &bp->vf;
+
+		vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
+	}
+#endif
+	switch (resp->port_partition_type) {
+	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
+	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
+	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0:
+		bp->port_partition_type = resp->port_partition_type;
+		break;
+	}
+
+func_qcfg_exit:
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 {
 	int rc = 0;
@@ -3855,6 +4161,7 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 
 		pf->fw_fid = le16_to_cpu(resp->fid);
 		pf->port_id = le16_to_cpu(resp->port_id);
+		bp->dev->dev_port = pf->port_id;
 		memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN);
 		memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN);
 		pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
@@ -3990,6 +4297,11 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
 	if (resp->hwrm_intf_maj >= 1)
 		bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len);
 
+	bp->chip_num = le16_to_cpu(resp->chip_num);
+	if (bp->chip_num == CHIP_NUM_58700 && !resp->chip_rev &&
+	    !resp->chip_metal)
+		bp->flags |= BNXT_FLAG_CHIP_NITRO_A0;
+
 hwrm_ver_get_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -4078,7 +4390,7 @@ static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
 	int rc;
 
 	/* allocate context for vnic */
-	rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id);
+	rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 0);
 	if (rc) {
 		netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n",
 			   vnic_id, rc);
@@ -4086,6 +4398,16 @@ static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
 	}
 	bp->rsscos_nr_ctxs++;
 
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
+		rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 1);
+		if (rc) {
+			netdev_err(bp->dev, "hwrm vnic %d cos ctx alloc failure rc: %x\n",
+				   vnic_id, rc);
+			goto vnic_setup_err;
+		}
+		bp->rsscos_nr_ctxs++;
+	}
+
 	/* configure default vnic, ring grp */
 	rc = bnxt_hwrm_vnic_cfg(bp, vnic_id);
 	if (rc) {
@@ -4143,6 +4465,36 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
 #endif
 }
 
+/* Allow PF and VF with default VLAN to be in promiscuous mode */
+static bool bnxt_promisc_ok(struct bnxt *bp)
+{
+#ifdef CONFIG_BNXT_SRIOV
+	if (BNXT_VF(bp) && !bp->vf.vlan)
+		return false;
+#endif
+	return true;
+}
+
+static int bnxt_setup_nitroa0_vnic(struct bnxt *bp)
+{
+	unsigned int rc = 0;
+
+	rc = bnxt_hwrm_vnic_alloc(bp, 1, bp->rx_nr_rings - 1, 1);
+	if (rc) {
+		netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n",
+			   rc);
+		return rc;
+	}
+
+	rc = bnxt_hwrm_vnic_cfg(bp, 1);
+	if (rc) {
+		netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n",
+			   rc);
+		return rc;
+	}
+	return rc;
+}
+
 static int bnxt_cfg_rx_mode(struct bnxt *);
 static bool bnxt_mc_list_updated(struct bnxt *, u32 *);
 
@@ -4150,6 +4502,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 {
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
 	int rc = 0;
+	unsigned int rx_nr_rings = bp->rx_nr_rings;
 
 	if (irq_re_init) {
 		rc = bnxt_hwrm_stat_ctx_alloc(bp);
@@ -4172,8 +4525,11 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 		goto err_out;
 	}
 
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+		rx_nr_rings--;
+
 	/* default vnic 0 */
-	rc = bnxt_hwrm_vnic_alloc(bp, 0, 0, bp->rx_nr_rings);
+	rc = bnxt_hwrm_vnic_alloc(bp, 0, 0, rx_nr_rings);
 	if (rc) {
 		netdev_err(bp->dev, "hwrm vnic alloc failure rc: %x\n", rc);
 		goto err_out;
@@ -4208,7 +4564,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 
 	vnic->rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST;
 
-	if ((bp->dev->flags & IFF_PROMISC) && BNXT_PF(bp))
+	if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
 		vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
 	if (bp->dev->flags & IFF_ALLMULTI) {
@@ -4228,7 +4584,19 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 	rc = bnxt_hwrm_set_coal(bp);
 	if (rc)
 		netdev_warn(bp->dev, "HWRM set coalescing failure rc: %x\n",
-			    rc);
+				rc);
+
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
+		rc = bnxt_setup_nitroa0_vnic(bp);
+		if (rc)
+			netdev_err(bp->dev, "Special vnic setup failure for NS2 A0 rc: %x\n",
+				   rc);
+	}
+
+	if (BNXT_VF(bp)) {
+		bnxt_hwrm_func_qcfg(bp);
+		netdev_update_features(bp->dev);
+	}
 
 	return 0;
 
@@ -4532,14 +4900,23 @@ static void bnxt_del_napi(struct bnxt *bp)
 static void bnxt_init_napi(struct bnxt *bp)
 {
 	int i;
+	unsigned int cp_nr_rings = bp->cp_nr_rings;
 	struct bnxt_napi *bnapi;
 
 	if (bp->flags & BNXT_FLAG_USING_MSIX) {
-		for (i = 0; i < bp->cp_nr_rings; i++) {
+		if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+			cp_nr_rings--;
+		for (i = 0; i < cp_nr_rings; i++) {
 			bnapi = bp->bnapi[i];
 			netif_napi_add(bp->dev, &bnapi->napi,
 				       bnxt_poll, 64);
 		}
+		if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
+			bnapi = bp->bnapi[cp_nr_rings];
+			netif_napi_add(bp->dev, &bnapi->napi,
+				       bnxt_poll_nitroa0, 64);
+			napi_hash_add(&bnapi->napi);
+		}
 	} else {
 		bnapi = bp->bnapi[0];
 		netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64);
@@ -4580,9 +4957,7 @@ static void bnxt_tx_disable(struct bnxt *bp)
 		for (i = 0; i < bp->tx_nr_rings; i++) {
 			txr = &bp->tx_ring[i];
 			txq = netdev_get_tx_queue(bp->dev, i);
-			__netif_tx_lock(txq, smp_processor_id());
 			txr->dev_state = BNXT_DEV_STATE_CLOSING;
-			__netif_tx_unlock(txq);
 		}
 	}
 	/* Stop all TX queues */
@@ -4644,6 +5019,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 	int rc = 0;
 	struct hwrm_port_phy_qcaps_input req = {0};
 	struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_link_info *link_info = &bp->link_info;
 
 	if (bp->hwrm_spec_code < 0x10201)
 		return 0;
@@ -4666,6 +5042,8 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 		bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
 				 PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
 	}
+	link_info->support_auto_speeds =
+		le16_to_cpu(resp->supported_speeds_auto_mode);
 
 hwrm_phy_qcaps_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
@@ -4923,7 +5301,7 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
 {
 	struct hwrm_port_phy_cfg_input req = {0};
 
-	if (BNXT_VF(bp))
+	if (!BNXT_SINGLE_PF(bp))
 		return 0;
 
 	if (pci_num_vf(bp->pdev))
@@ -5073,15 +5451,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 			netdev_warn(bp->dev, "failed to update phy settings\n");
 	}
 
-	if (irq_re_init) {
-#if defined(CONFIG_VXLAN) || defined(CONFIG_VXLAN_MODULE)
-		vxlan_get_rx_port(bp->dev);
-#endif
-		if (!bnxt_hwrm_tunnel_dst_port_alloc(
-				bp, htons(0x17c1),
-				TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE))
-			bp->nge_port_cnt = 1;
-	}
+	if (irq_re_init)
+		udp_tunnel_get_rx_info(bp->dev);
 
 	set_bit(BNXT_STATE_OPEN, &bp->state);
 	bnxt_enable_int(bp);
@@ -5122,12 +5493,19 @@ static int bnxt_open(struct net_device *dev)
 	struct bnxt *bp = netdev_priv(dev);
 	int rc = 0;
 
-	rc = bnxt_hwrm_func_reset(bp);
-	if (rc) {
-		netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n",
-			   rc);
-		rc = -1;
-		return rc;
+	if (!test_bit(BNXT_STATE_FN_RST_DONE, &bp->state)) {
+		rc = bnxt_hwrm_func_reset(bp);
+		if (rc) {
+			netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n",
+				   rc);
+			rc = -EBUSY;
+			return rc;
+		}
+		/* Do func_reset during the 1st PF open only to prevent killing
+		 * the VFs when the PF is brought down and up.
+		 */
+		if (BNXT_PF(bp))
+			set_bit(BNXT_STATE_FN_RST_DONE, &bp->state);
 	}
 	return __bnxt_open_nic(bp, true, true);
 }
@@ -5347,8 +5725,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
 		  CFA_L2_SET_RX_MASK_REQ_MASK_MCAST |
 		  CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST);
 
-	/* Only allow PF to be in promiscuous mode */
-	if ((dev->flags & IFF_PROMISC) && BNXT_PF(bp))
+	if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
 		mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
 	uc_update = bnxt_uc_list_updated(bp);
@@ -5440,8 +5817,12 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
 		return false;
 
 	vnics = 1 + bp->rx_nr_rings;
-	if (vnics > pf->max_rsscos_ctxs || vnics > pf->max_vnics)
+	if (vnics > pf->max_rsscos_ctxs || vnics > pf->max_vnics) {
+		netdev_warn(bp->dev,
+			    "Not enough resources to support NTUPLE filters, enough resources for up to %d rx rings\n",
+			    min(pf->max_rsscos_ctxs - 1, pf->max_vnics - 1));
 		return false;
+	}
 
 	return true;
 #else
@@ -5454,7 +5835,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (!bnxt_rfs_capable(bp))
+	if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp))
 		features &= ~NETIF_F_NTUPLE;
 
 	/* Both CTAG and STAG VLAN accelaration on the RX side have to be
@@ -5469,7 +5850,14 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
 			features |= NETIF_F_HW_VLAN_CTAG_RX |
 				    NETIF_F_HW_VLAN_STAG_RX;
 	}
-
+#ifdef CONFIG_BNXT_SRIOV
+	if (BNXT_VF(bp)) {
+		if (bp->vf.vlan) {
+			features &= ~(NETIF_F_HW_VLAN_CTAG_RX |
+				      NETIF_F_HW_VLAN_STAG_RX);
+		}
+	}
+#endif
 	return features;
 }
 
@@ -5483,7 +5871,7 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 	bool update_tpa = false;
 
 	flags &= ~BNXT_FLAG_ALL_CONFIG_FEATS;
-	if ((features & NETIF_F_GRO) && (bp->pdev->revision > 0))
+	if ((features & NETIF_F_GRO) && !BNXT_CHIP_TYPE_NITRO_A0(bp))
 		flags |= BNXT_FLAG_GRO;
 	if (features & NETIF_F_LRO)
 		flags |= BNXT_FLAG_LRO;
@@ -5585,9 +5973,10 @@ static void bnxt_dbg_dump_states(struct bnxt *bp)
 	}
 }
 
-static void bnxt_reset_task(struct bnxt *bp)
+static void bnxt_reset_task(struct bnxt *bp, bool silent)
 {
-	bnxt_dbg_dump_states(bp);
+	if (!silent)
+		bnxt_dbg_dump_states(bp);
 	if (netif_running(bp->dev)) {
 		bnxt_close_nic(bp, false, false);
 		bnxt_open_nic(bp, false, false);
@@ -5638,6 +6027,23 @@ bnxt_restart_timer:
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
 }
 
+/* Only called from bnxt_sp_task() */
+static void bnxt_reset(struct bnxt *bp, bool silent)
+{
+	/* bnxt_reset_task() calls bnxt_close_nic() which waits
+	 * for BNXT_STATE_IN_SP_TASK to clear.
+	 * If there is a parallel dev_close(), bnxt_close() may be holding
+	 * rtnl() and waiting for BNXT_STATE_IN_SP_TASK to clear.  So we
+	 * must clear BNXT_STATE_IN_SP_TASK before holding rtnl().
+	 */
+	clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+	rtnl_lock();
+	if (test_bit(BNXT_STATE_OPEN, &bp->state))
+		bnxt_reset_task(bp, silent);
+	set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+	rtnl_unlock();
+}
+
 static void bnxt_cfg_ntp_filters(struct bnxt *);
 
 static void bnxt_sp_task(struct work_struct *work)
@@ -5674,16 +6080,20 @@ static void bnxt_sp_task(struct work_struct *work)
 		bnxt_hwrm_tunnel_dst_port_free(
 			bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN);
 	}
-	if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) {
-		/* bnxt_reset_task() calls bnxt_close_nic() which waits
-		 * for BNXT_STATE_IN_SP_TASK to clear.
-		 */
-		clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
-		rtnl_lock();
-		bnxt_reset_task(bp);
-		set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
-		rtnl_unlock();
+	if (test_and_clear_bit(BNXT_GENEVE_ADD_PORT_SP_EVENT, &bp->sp_event)) {
+		bnxt_hwrm_tunnel_dst_port_alloc(
+			bp, bp->nge_port,
+			TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE);
+	}
+	if (test_and_clear_bit(BNXT_GENEVE_DEL_PORT_SP_EVENT, &bp->sp_event)) {
+		bnxt_hwrm_tunnel_dst_port_free(
+			bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE);
 	}
+	if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event))
+		bnxt_reset(bp, false);
+
+	if (test_and_clear_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event))
+		bnxt_reset(bp, true);
 
 	if (test_and_clear_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event))
 		bnxt_get_port_module_status(bp);
@@ -5774,6 +6184,8 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 	bp->tx_coal_ticks_irq = 2;
 	bp->tx_coal_bufs_irq = 2;
 
+	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
+
 	init_timer(&bp->timer);
 	bp->timer.data = (unsigned long)bp;
 	bp->timer.function = bnxt_timer;
@@ -5839,7 +6251,7 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (new_mtu < 60 || new_mtu > 9000)
+	if (new_mtu < 60 || new_mtu > 9500)
 		return -EINVAL;
 
 	if (netif_running(dev))
@@ -5918,7 +6330,8 @@ static bool bnxt_fltr_match(struct bnxt_ntuple_filter *f1,
 	    keys1->ports.ports == keys2->ports.ports &&
 	    keys1->basic.ip_proto == keys2->basic.ip_proto &&
 	    keys1->basic.n_proto == keys2->basic.n_proto &&
-	    ether_addr_equal(f1->src_mac_addr, f2->src_mac_addr))
+	    ether_addr_equal(f1->src_mac_addr, f2->src_mac_addr) &&
+	    ether_addr_equal(f1->dst_mac_addr, f2->dst_mac_addr))
 		return true;
 
 	return false;
@@ -5931,12 +6344,28 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 	struct bnxt_ntuple_filter *fltr, *new_fltr;
 	struct flow_keys *fkeys;
 	struct ethhdr *eth = (struct ethhdr *)skb_mac_header(skb);
-	int rc = 0, idx, bit_id;
+	int rc = 0, idx, bit_id, l2_idx = 0;
 	struct hlist_head *head;
 
 	if (skb->encapsulation)
 		return -EPROTONOSUPPORT;
 
+	if (!ether_addr_equal(dev->dev_addr, eth->h_dest)) {
+		struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+		int off = 0, j;
+
+		netif_addr_lock_bh(dev);
+		for (j = 0; j < vnic->uc_filter_count; j++, off += ETH_ALEN) {
+			if (ether_addr_equal(eth->h_dest,
+					     vnic->uc_list + off)) {
+				l2_idx = j + 1;
+				break;
+			}
+		}
+		netif_addr_unlock_bh(dev);
+		if (!l2_idx)
+			return -EINVAL;
+	}
 	new_fltr = kzalloc(sizeof(*new_fltr), GFP_ATOMIC);
 	if (!new_fltr)
 		return -ENOMEM;
@@ -5954,6 +6383,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 		goto err_free;
 	}
 
+	memcpy(new_fltr->dst_mac_addr, eth->h_dest, ETH_ALEN);
 	memcpy(new_fltr->src_mac_addr, eth->h_source, ETH_ALEN);
 
 	idx = skb_get_hash_raw(skb) & BNXT_NTP_FLTR_HASH_MASK;
@@ -5979,6 +6409,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 
 	new_fltr->sw_id = (u16)bit_id;
 	new_fltr->flow_id = flow_id;
+	new_fltr->l2_fltr_idx = l2_idx;
 	new_fltr->rxq = rxq_index;
 	hlist_add_head_rcu(&new_fltr->hash, head);
 	bp->ntp_fltr_count++;
@@ -6048,47 +6479,83 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp)
 
 #endif /* CONFIG_RFS_ACCEL */
 
-static void bnxt_add_vxlan_port(struct net_device *dev, sa_family_t sa_family,
-				__be16 port)
+static void bnxt_udp_tunnel_add(struct net_device *dev,
+				struct udp_tunnel_info *ti)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (!netif_running(dev))
+	if (ti->sa_family != AF_INET6 && ti->sa_family != AF_INET)
 		return;
 
-	if (sa_family != AF_INET6 && sa_family != AF_INET)
+	if (!netif_running(dev))
 		return;
 
-	if (bp->vxlan_port_cnt && bp->vxlan_port != port)
-		return;
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		if (bp->vxlan_port_cnt && bp->vxlan_port != ti->port)
+			return;
 
-	bp->vxlan_port_cnt++;
-	if (bp->vxlan_port_cnt == 1) {
-		bp->vxlan_port = port;
-		set_bit(BNXT_VXLAN_ADD_PORT_SP_EVENT, &bp->sp_event);
-		schedule_work(&bp->sp_task);
+		bp->vxlan_port_cnt++;
+		if (bp->vxlan_port_cnt == 1) {
+			bp->vxlan_port = ti->port;
+			set_bit(BNXT_VXLAN_ADD_PORT_SP_EVENT, &bp->sp_event);
+			schedule_work(&bp->sp_task);
+		}
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		if (bp->nge_port_cnt && bp->nge_port != ti->port)
+			return;
+
+		bp->nge_port_cnt++;
+		if (bp->nge_port_cnt == 1) {
+			bp->nge_port = ti->port;
+			set_bit(BNXT_GENEVE_ADD_PORT_SP_EVENT, &bp->sp_event);
+		}
+		break;
+	default:
+		return;
 	}
+
+	schedule_work(&bp->sp_task);
 }
 
-static void bnxt_del_vxlan_port(struct net_device *dev, sa_family_t sa_family,
-				__be16 port)
+static void bnxt_udp_tunnel_del(struct net_device *dev,
+				struct udp_tunnel_info *ti)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (!netif_running(dev))
+	if (ti->sa_family != AF_INET6 && ti->sa_family != AF_INET)
 		return;
 
-	if (sa_family != AF_INET6 && sa_family != AF_INET)
+	if (!netif_running(dev))
 		return;
 
-	if (bp->vxlan_port_cnt && bp->vxlan_port == port) {
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		if (!bp->vxlan_port_cnt || bp->vxlan_port != ti->port)
+			return;
 		bp->vxlan_port_cnt--;
 
-		if (bp->vxlan_port_cnt == 0) {
-			set_bit(BNXT_VXLAN_DEL_PORT_SP_EVENT, &bp->sp_event);
-			schedule_work(&bp->sp_task);
-		}
+		if (bp->vxlan_port_cnt != 0)
+			return;
+
+		set_bit(BNXT_VXLAN_DEL_PORT_SP_EVENT, &bp->sp_event);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		if (!bp->nge_port_cnt || bp->nge_port != ti->port)
+			return;
+		bp->nge_port_cnt--;
+
+		if (bp->nge_port_cnt != 0)
+			return;
+
+		set_bit(BNXT_GENEVE_DEL_PORT_SP_EVENT, &bp->sp_event);
+		break;
+	default:
+		return;
 	}
+
+	schedule_work(&bp->sp_task);
 }
 
 static const struct net_device_ops bnxt_netdev_ops = {
@@ -6119,8 +6586,8 @@ static const struct net_device_ops bnxt_netdev_ops = {
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	= bnxt_rx_flow_steer,
 #endif
-	.ndo_add_vxlan_port	= bnxt_add_vxlan_port,
-	.ndo_del_vxlan_port	= bnxt_del_vxlan_port,
+	.ndo_udp_tunnel_add	= bnxt_udp_tunnel_add,
+	.ndo_udp_tunnel_del	= bnxt_udp_tunnel_del,
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= bnxt_busy_poll,
 #endif
@@ -6169,6 +6636,12 @@ static int bnxt_probe_phy(struct bnxt *bp)
 		return rc;
 	}
 
+	/* Older firmware does not have supported_auto_speeds, so assume
+	 * that all supported speeds can be autonegotiated.
+	 */
+	if (link_info->auto_link_speeds && !link_info->support_auto_speeds)
+		link_info->support_auto_speeds = link_info->support_speeds;
+
 	/*initialize the ethool setting copy with NVM settings */
 	if (BNXT_AUTO_MODE(link_info->auto_mode)) {
 		link_info->autoneg = BNXT_AUTONEG_SPEED;
@@ -6224,7 +6697,10 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
 		*max_cp = min_t(int, *max_cp, bp->pf.max_stat_ctxs);
 		max_ring_grps = bp->pf.max_hw_ring_grps;
 	}
-
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {
+		*max_cp -= 1;
+		*max_rx -= 2;
+	}
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		*max_rx >>= 1;
 	*max_rx = min_t(int, *max_rx, max_ring_grps);
@@ -6260,6 +6736,10 @@ static int bnxt_set_dflt_rings(struct bnxt *bp)
 	bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
 			       bp->tx_nr_rings + bp->rx_nr_rings;
 	bp->num_stat_ctxs = bp->cp_nr_rings;
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
+		bp->rx_nr_rings++;
+		bp->cp_nr_rings++;
+	}
 	return rc;
 }
 
@@ -6286,6 +6766,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct bnxt *bp;
 	int rc, max_irqs;
 
+	if (pdev->device == 0x16cd && pci_is_bridge(pdev))
+		return -ENODEV;
+
 	if (version_printed++ == 0)
 		pr_info("%s", version);
 
@@ -6312,13 +6795,25 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata(pdev, dev);
 
+	rc = bnxt_alloc_hwrm_resources(bp);
+	if (rc)
+		goto init_err;
+
+	mutex_init(&bp->hwrm_cmd_lock);
+	rc = bnxt_hwrm_ver_get(bp);
+	if (rc)
+		goto init_err;
+
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
 			   NETIF_F_TSO | NETIF_F_TSO6 |
 			   NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
 			   NETIF_F_GSO_IPXIP4 |
 			   NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM |
 			   NETIF_F_GSO_PARTIAL | NETIF_F_RXHASH |
-			   NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO;
+			   NETIF_F_RXCSUM | NETIF_F_GRO;
+
+	if (!BNXT_CHIP_TYPE_NITRO_A0(bp))
+		dev->hw_features |= NETIF_F_LRO;
 
 	dev->hw_enc_features =
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
@@ -6337,12 +6832,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 #ifdef CONFIG_BNXT_SRIOV
 	init_waitqueue_head(&bp->sriov_cfg_wait);
 #endif
-	rc = bnxt_alloc_hwrm_resources(bp);
-	if (rc)
-		goto init_err;
-
-	mutex_init(&bp->hwrm_cmd_lock);
-	bnxt_hwrm_ver_get(bp);
+	bp->gro_func = bnxt_gro_func_5730x;
+	if (BNXT_CHIP_NUM_57X1X(bp->chip_num))
+		bp->gro_func = bnxt_gro_func_5731x;
 
 	rc = bnxt_hwrm_func_drv_rgtr(bp);
 	if (rc)
@@ -6365,6 +6857,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto init_err;
 	}
 
+	bnxt_hwrm_func_qcfg(bp);
+
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
 	if (BNXT_PF(bp))
@@ -6375,7 +6869,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 #endif
 	bnxt_set_dflt_rings(bp);
 
-	if (BNXT_PF(bp)) {
+	if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) {
 		dev->hw_features |= NETIF_F_NTUPLE;
 		if (bnxt_rfs_capable(bp)) {
 			bp->flags |= BNXT_FLAG_RFS;
@@ -6424,6 +6918,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 					       pci_channel_state_t state)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct bnxt *bp = netdev_priv(netdev);
 
 	netdev_info(netdev, "PCI I/O error detected\n");
 
@@ -6438,6 +6933,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 	if (netif_running(netdev))
 		bnxt_close(netdev);
 
+	/* So that func_reset will be done during slot_reset */
+	clear_bit(BNXT_STATE_FN_RST_DONE, &bp->state);
 	pci_disable_device(pdev);
 	rtnl_unlock();
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 2824d65b2e35..23e04a6142fb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -11,10 +11,10 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME		"bnxt_en"
-#define DRV_MODULE_VERSION	"1.2.0"
+#define DRV_MODULE_VERSION	"1.3.0"
 
 #define DRV_VER_MAJ	1
-#define DRV_VER_MIN	0
+#define DRV_VER_MIN	3
 #define DRV_VER_UPD	0
 
 struct tx_bd {
@@ -298,13 +298,14 @@ struct rx_tpa_start_cmp_ext {
 	#define RX_TPA_START_CMP_FLAGS2_L4_CS_CALC		(0x1 << 1)
 	#define RX_TPA_START_CMP_FLAGS2_T_IP_CS_CALC		(0x1 << 2)
 	#define RX_TPA_START_CMP_FLAGS2_T_L4_CS_CALC		(0x1 << 3)
+	#define RX_TPA_START_CMP_FLAGS2_IP_TYPE			(0x1 << 8)
 
 	__le32 rx_tpa_start_cmp_metadata;
 	__le32 rx_tpa_start_cmp_cfa_code_v2;
 	#define RX_TPA_START_CMP_V2				(0x1 << 0)
 	#define RX_TPA_START_CMP_CFA_CODE			(0xffff << 16)
 	 #define RX_TPA_START_CMPL_CFA_CODE_SHIFT		 16
-	__le32 rx_tpa_start_cmp_unused5;
+	__le32 rx_tpa_start_cmp_hdr_info;
 };
 
 struct rx_tpa_end_cmp {
@@ -358,7 +359,8 @@ struct rx_tpa_end_cmp {
 	 RX_TPA_END_CMP_FLAGS_PLACEMENT_ANY_GRO)
 
 #define TPA_END_GRO_TS(rx_tpa_end)					\
-	((rx_tpa_end)->rx_tpa_end_cmp_tsdelta & cpu_to_le32(RX_TPA_END_GRO_TS))
+	(!!((rx_tpa_end)->rx_tpa_end_cmp_tsdelta &			\
+	    cpu_to_le32(RX_TPA_END_GRO_TS)))
 
 struct rx_tpa_end_cmp_ext {
 	__le32 rx_tpa_end_cmp_dup_acks;
@@ -584,6 +586,19 @@ struct bnxt_tpa_info {
 	u32			metadata;
 	enum pkt_hash_types	hash_type;
 	u32			rss_hash;
+	u32			hdr_info;
+
+#define BNXT_TPA_L4_SIZE(hdr_info)	\
+	(((hdr_info) & 0xf8000000) ? ((hdr_info) >> 27) : 32)
+
+#define BNXT_TPA_INNER_L3_OFF(hdr_info)	\
+	(((hdr_info) >> 18) & 0x1ff)
+
+#define BNXT_TPA_INNER_L2_OFF(hdr_info)	\
+	(((hdr_info) >> 9) & 0x1ff)
+
+#define BNXT_TPA_OUTER_L3_OFF(hdr_info)	\
+	((hdr_info) & 0x1ff)
 };
 
 struct bnxt_rx_ring_info {
@@ -680,7 +695,8 @@ struct bnxt_ring_grp_info {
 
 struct bnxt_vnic_info {
 	u16		fw_vnic_id; /* returned by Chimp during alloc */
-	u16		fw_rss_cos_lb_ctx;
+#define BNXT_MAX_CTX_PER_VNIC	2
+	u16		fw_rss_cos_lb_ctx[BNXT_MAX_CTX_PER_VNIC];
 	u16		fw_l2_ctx_id;
 #define BNXT_MAX_UC_ADDRS	4
 	__le64		fw_l2_filter_id[BNXT_MAX_UC_ADDRS];
@@ -739,8 +755,8 @@ struct bnxt_vf_info {
 struct bnxt_pf_info {
 #define BNXT_FIRST_PF_FID	1
 #define BNXT_FIRST_VF_FID	128
-	u32	fw_fid;
-	u8	port_id;
+	u16	fw_fid;
+	u16	port_id;
 	u8	mac_addr[ETH_ALEN];
 	u16	max_rsscos_ctxs;
 	u16	max_cp_rings;
@@ -769,10 +785,12 @@ struct bnxt_pf_info {
 
 struct bnxt_ntuple_filter {
 	struct hlist_node	hash;
+	u8			dst_mac_addr[ETH_ALEN];
 	u8			src_mac_addr[ETH_ALEN];
 	struct flow_keys	fkeys;
 	__le64			filter_id;
 	u16			sw_id;
+	u8			l2_fltr_idx;
 	u16			rxq;
 	u32			flow_id;
 	unsigned long		state;
@@ -835,6 +853,7 @@ struct bnxt_link_info {
 #define BNXT_LINK_SPEED_MSK_25GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_25GB
 #define BNXT_LINK_SPEED_MSK_40GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_40GB
 #define BNXT_LINK_SPEED_MSK_50GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_50GB
+	u16			support_auto_speeds;
 	u16			lp_auto_link_speeds;
 	u16			force_link_speed;
 	u32			preemphasis;
@@ -873,6 +892,45 @@ struct bnxt {
 	void __iomem		*bar2;
 
 	u32			reg_base;
+	u16			chip_num;
+#define CHIP_NUM_57301		0x16c8
+#define CHIP_NUM_57302		0x16c9
+#define CHIP_NUM_57304		0x16ca
+#define CHIP_NUM_58700		0x16cd
+#define CHIP_NUM_57402		0x16d0
+#define CHIP_NUM_57404		0x16d1
+#define CHIP_NUM_57406		0x16d2
+
+#define CHIP_NUM_57311		0x16ce
+#define CHIP_NUM_57312		0x16cf
+#define CHIP_NUM_57314		0x16df
+#define CHIP_NUM_57412		0x16d6
+#define CHIP_NUM_57414		0x16d7
+#define CHIP_NUM_57416		0x16d8
+#define CHIP_NUM_57417		0x16d9
+
+#define BNXT_CHIP_NUM_5730X(chip_num)		\
+	((chip_num) >= CHIP_NUM_57301 &&	\
+	 (chip_num) <= CHIP_NUM_57304)
+
+#define BNXT_CHIP_NUM_5740X(chip_num)		\
+	((chip_num) >= CHIP_NUM_57402 &&	\
+	 (chip_num) <= CHIP_NUM_57406)
+
+#define BNXT_CHIP_NUM_5731X(chip_num)		\
+	((chip_num) == CHIP_NUM_57311 ||	\
+	 (chip_num) == CHIP_NUM_57312 ||	\
+	 (chip_num) == CHIP_NUM_57314)
+
+#define BNXT_CHIP_NUM_5741X(chip_num)		\
+	((chip_num) >= CHIP_NUM_57412 &&	\
+	 (chip_num) <= CHIP_NUM_57417)
+
+#define BNXT_CHIP_NUM_57X0X(chip_num)		\
+	(BNXT_CHIP_NUM_5730X(chip_num) || BNXT_CHIP_NUM_5740X(chip_num))
+
+#define BNXT_CHIP_NUM_57X1X(chip_num)		\
+	(BNXT_CHIP_NUM_5731X(chip_num) || BNXT_CHIP_NUM_5741X(chip_num))
 
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
@@ -900,6 +958,7 @@ struct bnxt {
 	#define BNXT_FLAG_SHARED_RINGS	0x200
 	#define BNXT_FLAG_PORT_STATS	0x400
 	#define BNXT_FLAG_EEE_CAP	0x1000
+	#define BNXT_FLAG_CHIP_NITRO_A0	0x1000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
 					    BNXT_FLAG_RFS |		\
@@ -907,12 +966,18 @@ struct bnxt {
 
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
+#define BNXT_NPAR(bp)		((bp)->port_partition_type)
+#define BNXT_SINGLE_PF(bp)	(BNXT_PF(bp) && !BNXT_NPAR(bp))
+#define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
 
 	struct bnxt_napi	**bnapi;
 
 	struct bnxt_rx_ring_info	*rx_ring;
 	struct bnxt_tx_ring_info	*tx_ring;
 
+	struct sk_buff *	(*gro_func)(struct bnxt_tpa_info *, int, int,
+					    struct sk_buff *);
+
 	u32			rx_buf_size;
 	u32			rx_buf_use_size;	/* useable size */
 	u32			rx_ring_size;
@@ -959,6 +1024,7 @@ struct bnxt {
 	unsigned long		state;
 #define BNXT_STATE_OPEN		0
 #define BNXT_STATE_IN_SP_TASK	1
+#define BNXT_STATE_FN_RST_DONE	2
 
 	struct bnxt_irq	*irq_tbl;
 	u8			mac_addr[ETH_ALEN];
@@ -991,8 +1057,10 @@ struct bnxt {
 	__be16			vxlan_port;
 	u8			vxlan_port_cnt;
 	__le16			vxlan_fw_dst_port_id;
+	__be16			nge_port;
 	u8			nge_port_cnt;
 	__le16			nge_fw_dst_port_id;
+	u8			port_partition_type;
 
 	u16			rx_coal_ticks;
 	u16			rx_coal_ticks_irq;
@@ -1005,6 +1073,11 @@ struct bnxt {
 
 #define BNXT_USEC_TO_COAL_TIMER(x)	((x) * 25 / 2)
 
+	u32			stats_coal_ticks;
+#define BNXT_DEF_STATS_COAL_TICKS	 1000000
+#define BNXT_MIN_STATS_COAL_TICKS	  250000
+#define BNXT_MAX_STATS_COAL_TICKS	 1000000
+
 	struct work_struct	sp_task;
 	unsigned long		sp_event;
 #define BNXT_RX_MASK_SP_EVENT		0
@@ -1018,6 +1091,9 @@ struct bnxt {
 #define BNXT_HWRM_PF_UNLOAD_SP_EVENT	8
 #define BNXT_PERIODIC_STATS_SP_EVENT	9
 #define BNXT_HWRM_PORT_MODULE_SP_EVENT	10
+#define BNXT_RESET_TASK_SILENT_SP_EVENT	11
+#define BNXT_GENEVE_ADD_PORT_SP_EVENT	12
+#define BNXT_GENEVE_DEL_PORT_SP_EVENT	13
 
 	struct bnxt_pf_info	pf;
 #ifdef CONFIG_BNXT_SRIOV
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index a38cb047b540..b83e17403d6c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -56,6 +56,8 @@ static int bnxt_get_coalesce(struct net_device *dev,
 	coal->tx_coalesce_usecs_irq = bp->tx_coal_ticks_irq;
 	coal->tx_max_coalesced_frames_irq = bp->tx_coal_bufs_irq;
 
+	coal->stats_block_coalesce_usecs = bp->stats_coal_ticks;
+
 	return 0;
 }
 
@@ -63,6 +65,7 @@ static int bnxt_set_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
 	struct bnxt *bp = netdev_priv(dev);
+	bool update_stats = false;
 	int rc = 0;
 
 	bp->rx_coal_ticks = coal->rx_coalesce_usecs;
@@ -76,8 +79,26 @@ static int bnxt_set_coalesce(struct net_device *dev,
 	bp->tx_coal_ticks_irq = coal->tx_coalesce_usecs_irq;
 	bp->tx_coal_bufs_irq = coal->tx_max_coalesced_frames_irq;
 
-	if (netif_running(dev))
-		rc = bnxt_hwrm_set_coal(bp);
+	if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) {
+		u32 stats_ticks = coal->stats_block_coalesce_usecs;
+
+		stats_ticks = clamp_t(u32, stats_ticks,
+				      BNXT_MIN_STATS_COAL_TICKS,
+				      BNXT_MAX_STATS_COAL_TICKS);
+		stats_ticks = rounddown(stats_ticks, BNXT_MIN_STATS_COAL_TICKS);
+		bp->stats_coal_ticks = stats_ticks;
+		update_stats = true;
+	}
+
+	if (netif_running(dev)) {
+		if (update_stats) {
+			rc = bnxt_close_nic(bp, true, false);
+			if (!rc)
+				rc = bnxt_open_nic(bp, true, false);
+		} else {
+			rc = bnxt_hwrm_set_coal(bp);
+		}
+	}
 
 	return rc;
 }
@@ -341,9 +362,13 @@ static void bnxt_get_channels(struct net_device *dev,
 	channel->max_other = 0;
 	if (bp->flags & BNXT_FLAG_SHARED_RINGS) {
 		channel->combined_count = bp->rx_nr_rings;
+		if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+			channel->combined_count--;
 	} else {
-		channel->rx_count = bp->rx_nr_rings;
-		channel->tx_count = bp->tx_nr_rings_per_tc;
+		if (!BNXT_CHIP_TYPE_NITRO_A0(bp)) {
+			channel->rx_count = bp->rx_nr_rings;
+			channel->tx_count = bp->tx_nr_rings_per_tc;
+		}
 	}
 }
 
@@ -366,6 +391,10 @@ static int bnxt_set_channels(struct net_device *dev,
 	    (channel->rx_count || channel->tx_count))
 		return -EINVAL;
 
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp) && (channel->rx_count ||
+					    channel->tx_count))
+		return -EINVAL;
+
 	if (channel->combined_count)
 		sh = true;
 
@@ -628,7 +657,66 @@ u32 _bnxt_fw_to_ethtool_adv_spds(u16 fw_speeds, u8 fw_pause)
 	return speed_mask;
 }
 
-static u32 bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info)
+#define BNXT_FW_TO_ETHTOOL_SPDS(fw_speeds, fw_pause, lk_ksettings, name)\
+{									\
+	if ((fw_speeds) & BNXT_LINK_SPEED_MSK_100MB)			\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     100baseT_Full);	\
+	if ((fw_speeds) & BNXT_LINK_SPEED_MSK_1GB)			\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     1000baseT_Full);	\
+	if ((fw_speeds) & BNXT_LINK_SPEED_MSK_10GB)			\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     10000baseT_Full);	\
+	if ((fw_speeds) & BNXT_LINK_SPEED_MSK_25GB)			\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     25000baseCR_Full);	\
+	if ((fw_speeds) & BNXT_LINK_SPEED_MSK_40GB)			\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     40000baseCR4_Full);\
+	if ((fw_speeds) & BNXT_LINK_SPEED_MSK_50GB)			\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     50000baseCR2_Full);\
+	if ((fw_pause) & BNXT_LINK_PAUSE_RX) {				\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     Pause);		\
+		if (!((fw_pause) & BNXT_LINK_PAUSE_TX))			\
+			ethtool_link_ksettings_add_link_mode(		\
+					lk_ksettings, name, Asym_Pause);\
+	} else if ((fw_pause) & BNXT_LINK_PAUSE_TX) {			\
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, name,\
+						     Asym_Pause);	\
+	}								\
+}
+
+#define BNXT_ETHTOOL_TO_FW_SPDS(fw_speeds, lk_ksettings, name)		\
+{									\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  100baseT_Full) ||	\
+	    ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  100baseT_Half))	\
+		(fw_speeds) |= BNXT_LINK_SPEED_MSK_100MB;		\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  1000baseT_Full) ||	\
+	    ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  1000baseT_Half))	\
+		(fw_speeds) |= BNXT_LINK_SPEED_MSK_1GB;			\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  10000baseT_Full))	\
+		(fw_speeds) |= BNXT_LINK_SPEED_MSK_10GB;		\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  25000baseCR_Full))	\
+		(fw_speeds) |= BNXT_LINK_SPEED_MSK_25GB;		\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  40000baseCR4_Full))	\
+		(fw_speeds) |= BNXT_LINK_SPEED_MSK_40GB;		\
+	if (ethtool_link_ksettings_test_link_mode(lk_ksettings, name,	\
+						  50000baseCR2_Full))	\
+		(fw_speeds) |= BNXT_LINK_SPEED_MSK_50GB;		\
+}
+
+static void bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info,
+				struct ethtool_link_ksettings *lk_ksettings)
 {
 	u16 fw_speeds = link_info->auto_link_speeds;
 	u8 fw_pause = 0;
@@ -636,10 +724,11 @@ static u32 bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info)
 	if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
 		fw_pause = link_info->auto_pause_setting;
 
-	return _bnxt_fw_to_ethtool_adv_spds(fw_speeds, fw_pause);
+	BNXT_FW_TO_ETHTOOL_SPDS(fw_speeds, fw_pause, lk_ksettings, advertising);
 }
 
-static u32 bnxt_fw_to_ethtool_lp_adv(struct bnxt_link_info *link_info)
+static void bnxt_fw_to_ethtool_lp_adv(struct bnxt_link_info *link_info,
+				struct ethtool_link_ksettings *lk_ksettings)
 {
 	u16 fw_speeds = link_info->lp_auto_link_speeds;
 	u8 fw_pause = 0;
@@ -647,16 +736,24 @@ static u32 bnxt_fw_to_ethtool_lp_adv(struct bnxt_link_info *link_info)
 	if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
 		fw_pause = link_info->lp_pause;
 
-	return _bnxt_fw_to_ethtool_adv_spds(fw_speeds, fw_pause);
+	BNXT_FW_TO_ETHTOOL_SPDS(fw_speeds, fw_pause, lk_ksettings,
+				lp_advertising);
 }
 
-static u32 bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info)
+static void bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info,
+				struct ethtool_link_ksettings *lk_ksettings)
 {
 	u16 fw_speeds = link_info->support_speeds;
-	u32 supported;
 
-	supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
-	return supported | SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	BNXT_FW_TO_ETHTOOL_SPDS(fw_speeds, 0, lk_ksettings, supported);
+
+	ethtool_link_ksettings_add_link_mode(lk_ksettings, supported, Pause);
+	ethtool_link_ksettings_add_link_mode(lk_ksettings, supported,
+					     Asym_Pause);
+
+	if (link_info->support_auto_speeds)
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, supported,
+						     Autoneg);
 }
 
 u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
@@ -683,65 +780,62 @@ u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
 	}
 }
 
-static int bnxt_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int bnxt_get_link_ksettings(struct net_device *dev,
+				   struct ethtool_link_ksettings *lk_ksettings)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_link_info *link_info = &bp->link_info;
-	u16 ethtool_speed;
-
-	cmd->supported = bnxt_fw_to_ethtool_support_spds(link_info);
+	struct ethtool_link_settings *base = &lk_ksettings->base;
+	u32 ethtool_speed;
 
-	if (link_info->auto_link_speeds)
-		cmd->supported |= SUPPORTED_Autoneg;
+	ethtool_link_ksettings_zero_link_mode(lk_ksettings, supported);
+	bnxt_fw_to_ethtool_support_spds(link_info, lk_ksettings);
 
+	ethtool_link_ksettings_zero_link_mode(lk_ksettings, advertising);
 	if (link_info->autoneg) {
-		cmd->advertising =
-			bnxt_fw_to_ethtool_advertised_spds(link_info);
-		cmd->advertising |= ADVERTISED_Autoneg;
-		cmd->autoneg = AUTONEG_ENABLE;
+		bnxt_fw_to_ethtool_advertised_spds(link_info, lk_ksettings);
+		ethtool_link_ksettings_add_link_mode(lk_ksettings,
+						     advertising, Autoneg);
+		base->autoneg = AUTONEG_ENABLE;
 		if (link_info->phy_link_status == BNXT_LINK_LINK)
-			cmd->lp_advertising =
-				bnxt_fw_to_ethtool_lp_adv(link_info);
+			bnxt_fw_to_ethtool_lp_adv(link_info, lk_ksettings);
 		ethtool_speed = bnxt_fw_to_ethtool_speed(link_info->link_speed);
 		if (!netif_carrier_ok(dev))
-			cmd->duplex = DUPLEX_UNKNOWN;
+			base->duplex = DUPLEX_UNKNOWN;
 		else if (link_info->duplex & BNXT_LINK_DUPLEX_FULL)
-			cmd->duplex = DUPLEX_FULL;
+			base->duplex = DUPLEX_FULL;
 		else
-			cmd->duplex = DUPLEX_HALF;
+			base->duplex = DUPLEX_HALF;
 	} else {
-		cmd->autoneg = AUTONEG_DISABLE;
-		cmd->advertising = 0;
+		base->autoneg = AUTONEG_DISABLE;
 		ethtool_speed =
 			bnxt_fw_to_ethtool_speed(link_info->req_link_speed);
-		cmd->duplex = DUPLEX_HALF;
+		base->duplex = DUPLEX_HALF;
 		if (link_info->req_duplex == BNXT_LINK_DUPLEX_FULL)
-			cmd->duplex = DUPLEX_FULL;
+			base->duplex = DUPLEX_FULL;
 	}
-	ethtool_cmd_speed_set(cmd, ethtool_speed);
+	base->speed = ethtool_speed;
 
-	cmd->port = PORT_NONE;
+	base->port = PORT_NONE;
 	if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP) {
-		cmd->port = PORT_TP;
-		cmd->supported |= SUPPORTED_TP;
-		cmd->advertising |= ADVERTISED_TP;
+		base->port = PORT_TP;
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, supported,
+						     TP);
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, advertising,
+						     TP);
 	} else {
-		cmd->supported |= SUPPORTED_FIBRE;
-		cmd->advertising |= ADVERTISED_FIBRE;
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, supported,
+						     FIBRE);
+		ethtool_link_ksettings_add_link_mode(lk_ksettings, advertising,
+						     FIBRE);
 
 		if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC)
-			cmd->port = PORT_DA;
+			base->port = PORT_DA;
 		else if (link_info->media_type ==
 			 PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE)
-			cmd->port = PORT_FIBRE;
+			base->port = PORT_FIBRE;
 	}
-
-	if (link_info->transceiver ==
-	    PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL)
-		cmd->transceiver = XCVR_INTERNAL;
-	else
-		cmd->transceiver = XCVR_EXTERNAL;
-	cmd->phy_address = link_info->phy_addr;
+	base->phy_address = link_info->phy_addr;
 
 	return 0;
 }
@@ -815,37 +909,25 @@ u16 bnxt_get_fw_auto_link_speeds(u32 advertising)
 	return fw_speed_mask;
 }
 
-static int bnxt_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int bnxt_set_link_ksettings(struct net_device *dev,
+			   const struct ethtool_link_ksettings *lk_ksettings)
 {
-	int rc = 0;
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_link_info *link_info = &bp->link_info;
+	const struct ethtool_link_settings *base = &lk_ksettings->base;
 	u32 speed, fw_advertising = 0;
 	bool set_pause = false;
+	int rc = 0;
 
-	if (BNXT_VF(bp))
-		return rc;
-
-	if (cmd->autoneg == AUTONEG_ENABLE) {
-		u32 supported_spds = bnxt_fw_to_ethtool_support_spds(link_info);
+	if (!BNXT_SINGLE_PF(bp))
+		return -EOPNOTSUPP;
 
-		if (cmd->advertising & ~(supported_spds | ADVERTISED_Autoneg |
-					 ADVERTISED_TP | ADVERTISED_FIBRE)) {
-			netdev_err(dev, "Unsupported advertising mask (adv: 0x%x)\n",
-				   cmd->advertising);
-			rc = -EINVAL;
-			goto set_setting_exit;
-		}
-		fw_advertising = bnxt_get_fw_auto_link_speeds(cmd->advertising);
-		if (fw_advertising & ~link_info->support_speeds) {
-			netdev_err(dev, "Advertising parameters are not supported! (adv: 0x%x)\n",
-				   cmd->advertising);
-			rc = -EINVAL;
-			goto set_setting_exit;
-		}
+	if (base->autoneg == AUTONEG_ENABLE) {
+		BNXT_ETHTOOL_TO_FW_SPDS(fw_advertising, lk_ksettings,
+					advertising);
 		link_info->autoneg |= BNXT_AUTONEG_SPEED;
 		if (!fw_advertising)
-			link_info->advertising = link_info->support_speeds;
+			link_info->advertising = link_info->support_auto_speeds;
 		else
 			link_info->advertising = fw_advertising;
 		/* any change to autoneg will cause link change, therefore the
@@ -863,16 +945,12 @@ static int bnxt_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 			rc = -EINVAL;
 			goto set_setting_exit;
 		}
-		/* TODO: currently don't support half duplex */
-		if (cmd->duplex == DUPLEX_HALF) {
+		if (base->duplex == DUPLEX_HALF) {
 			netdev_err(dev, "HALF DUPLEX is not supported!\n");
 			rc = -EINVAL;
 			goto set_setting_exit;
 		}
-		/* If received a request for an unknown duplex, assume full*/
-		if (cmd->duplex == DUPLEX_UNKNOWN)
-			cmd->duplex = DUPLEX_FULL;
-		speed = ethtool_cmd_speed(cmd);
+		speed = base->speed;
 		fw_speed = bnxt_get_fw_speed(dev, speed);
 		if (!fw_speed) {
 			rc = -EINVAL;
@@ -911,8 +989,8 @@ static int bnxt_set_pauseparam(struct net_device *dev,
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_link_info *link_info = &bp->link_info;
 
-	if (BNXT_VF(bp))
-		return rc;
+	if (!BNXT_SINGLE_PF(bp))
+		return -EOPNOTSUPP;
 
 	if (epause->autoneg) {
 		if (!(link_info->autoneg & BNXT_AUTONEG_SPEED))
@@ -1010,6 +1088,8 @@ static int bnxt_firmware_reset(struct net_device *dev,
 	case BNX_DIR_TYPE_APE_FW:
 	case BNX_DIR_TYPE_APE_PATCH:
 		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT;
+		/* Self-reset APE upon next PCIe reset: */
+		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST;
 		break;
 	case BNX_DIR_TYPE_KONG_FW:
 	case BNX_DIR_TYPE_KONG_PATCH:
@@ -1043,9 +1123,27 @@ static int bnxt_flash_firmware(struct net_device *dev,
 	case BNX_DIR_TYPE_BOOTCODE_2:
 		code_type = CODE_BOOT;
 		break;
+	case BNX_DIR_TYPE_CHIMP_PATCH:
+		code_type = CODE_CHIMP_PATCH;
+		break;
 	case BNX_DIR_TYPE_APE_FW:
 		code_type = CODE_MCTP_PASSTHRU;
 		break;
+	case BNX_DIR_TYPE_APE_PATCH:
+		code_type = CODE_APE_PATCH;
+		break;
+	case BNX_DIR_TYPE_KONG_FW:
+		code_type = CODE_KONG_FW;
+		break;
+	case BNX_DIR_TYPE_KONG_PATCH:
+		code_type = CODE_KONG_PATCH;
+		break;
+	case BNX_DIR_TYPE_BONO_FW:
+		code_type = CODE_BONO_FW;
+		break;
+	case BNX_DIR_TYPE_BONO_PATCH:
+		code_type = CODE_BONO_PATCH;
+		break;
 	default:
 		netdev_err(dev, "Unsupported directory entry type: %u\n",
 			   dir_type);
@@ -1100,6 +1198,8 @@ static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type)
 	case BNX_DIR_TYPE_APE_PATCH:
 	case BNX_DIR_TYPE_KONG_FW:
 	case BNX_DIR_TYPE_KONG_PATCH:
+	case BNX_DIR_TYPE_BONO_FW:
+	case BNX_DIR_TYPE_BONO_PATCH:
 		return true;
 	}
 
@@ -1137,7 +1237,8 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev,
 	const struct firmware  *fw;
 	int			rc;
 
-	if (bnxt_dir_type_is_executable(dir_type) == false)
+	if (dir_type != BNX_DIR_TYPE_UPDATE &&
+	    bnxt_dir_type_is_executable(dir_type) == false)
 		return -EINVAL;
 
 	rc = request_firmware(&fw, filename, &dev->dev);
@@ -1433,8 +1534,8 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata)
 		 _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0);
 	int rc = 0;
 
-	if (BNXT_VF(bp))
-		return 0;
+	if (!BNXT_SINGLE_PF(bp))
+		return -EOPNOTSUPP;
 
 	if (!(bp->flags & BNXT_FLAG_EEE_CAP))
 		return -EOPNOTSUPP;
@@ -1591,7 +1692,7 @@ static int bnxt_get_module_eeprom(struct net_device *dev,
 {
 	struct bnxt *bp = netdev_priv(dev);
 	u16  start = eeprom->offset, length = eeprom->len;
-	int rc;
+	int rc = 0;
 
 	memset(data, 0, eeprom->len);
 
@@ -1618,8 +1719,8 @@ static int bnxt_get_module_eeprom(struct net_device *dev,
 }
 
 const struct ethtool_ops bnxt_ethtool_ops = {
-	.get_settings		= bnxt_get_settings,
-	.set_settings		= bnxt_set_settings,
+	.get_link_ksettings	= bnxt_get_link_ksettings,
+	.set_link_ksettings	= bnxt_set_link_ksettings,
 	.get_pauseparam		= bnxt_get_pauseparam,
 	.set_pauseparam		= bnxt_set_pauseparam,
 	.get_drvinfo		= bnxt_get_drvinfo,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
index 461675caaacd..82bf44ab811b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
@@ -70,6 +70,7 @@ enum SUPPORTED_CODE {
 	CODE_KONG_PATCH,	/* 18 - KONG Patch firmware */
 	CODE_BONO_FW,		/* 19 - BONO firmware */
 	CODE_BONO_PATCH,	/* 20 - BONO Patch firmware */
+	CODE_CHIMP_PATCH,	/* 21 - ChiMP Patch firmware */
 
 	MAX_CODE_TYPE,
 };
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 05e3c49a7677..517567f6d651 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -105,6 +105,7 @@ struct hwrm_async_event_cmpl {
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD   (0x10UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD     (0x11UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD     (0x20UL << 0)
@@ -484,12 +485,12 @@ struct hwrm_async_event_cmpl_hwrm_error {
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL
 };
 
-/* HW Resource Manager Specification 1.2.2 */
+/* HW Resource Manager Specification 1.3.0 */
 #define HWRM_VERSION_MAJOR	1
-#define HWRM_VERSION_MINOR	2
-#define HWRM_VERSION_UPDATE	2
+#define HWRM_VERSION_MINOR	3
+#define HWRM_VERSION_UPDATE	0
 
-#define HWRM_VERSION_STR	"1.2.2"
+#define HWRM_VERSION_STR	"1.3.0"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable (All F's). Need to cast it the size of the field if needed.
@@ -611,6 +612,9 @@ struct cmd_nums {
 	#define HWRM_FWD_RESP					   (0xd2UL)
 	#define HWRM_FWD_ASYNC_EVENT_CMPL			   (0xd3UL)
 	#define HWRM_TEMP_MONITOR_QUERY			   (0xe0UL)
+	#define HWRM_WOL_FILTER_ALLOC				   (0xf0UL)
+	#define HWRM_WOL_FILTER_FREE				   (0xf1UL)
+	#define HWRM_WOL_FILTER_QCFG				   (0xf2UL)
 	#define HWRM_DBG_READ_DIRECT				   (0xff10UL)
 	#define HWRM_DBG_READ_INDIRECT				   (0xff11UL)
 	#define HWRM_DBG_WRITE_DIRECT				   (0xff12UL)
@@ -1020,6 +1024,10 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED	    0x1UL
 	#define FUNC_QCAPS_RESP_FLAGS_GLOBAL_MSIX_AUTOMASKING      0x2UL
 	#define FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED		    0x4UL
+	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED	    0x8UL
+	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED	    0x10UL
+	#define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED       0x20UL
+	#define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED	    0x40UL
 	u8 mac_address[6];
 	__le16 max_rsscos_ctx;
 	__le16 max_cmpl_rings;
@@ -1066,8 +1074,9 @@ struct hwrm_func_qcfg_output {
 	__le16 fid;
 	__le16 port_id;
 	__le16 vlan;
-	u8 unused_0;
-	u8 unused_1;
+	__le16 flags;
+	#define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED      0x1UL
+	#define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED	    0x2UL
 	u8 mac_address[6];
 	__le16 pci_id;
 	__le16 alloc_rsscos_ctx;
@@ -1086,23 +1095,23 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5	   (0x3UL << 0)
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0	   (0x4UL << 0)
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN	   (0xffUL << 0)
-	u8 unused_2;
+	u8 unused_0;
 	__le16 dflt_vnic_id;
-	u8 unused_3;
-	u8 unused_4;
+	u8 unused_1;
+	u8 unused_2;
 	__le32 min_bw;
 	__le32 max_bw;
 	u8 evb_mode;
 	#define FUNC_QCFG_RESP_EVB_MODE_NO_EVB			   (0x0UL << 0)
 	#define FUNC_QCFG_RESP_EVB_MODE_VEB			   (0x1UL << 0)
 	#define FUNC_QCFG_RESP_EVB_MODE_VEPA			   (0x2UL << 0)
-	u8 unused_5;
-	__le16 unused_6;
+	u8 unused_3;
+	__le16 unused_4;
 	__le32 alloc_mcast_filters;
 	__le32 alloc_hw_ring_grps;
+	u8 unused_5;
+	u8 unused_6;
 	u8 unused_7;
-	u8 unused_8;
-	u8 unused_9;
 	u8 valid;
 };
 
@@ -1410,8 +1419,8 @@ struct hwrm_func_buf_rgtr_input {
 	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K		   (0xcUL << 0)
 	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K		   (0xdUL << 0)
 	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K	   (0x10UL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M		   (0x16UL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M		   (0x17UL << 0)
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M		   (0x15UL << 0)
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M		   (0x16UL << 0)
 	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G		   (0x1eUL << 0)
 	__le16 req_buf_len;
 	__le16 resp_buf_len;
@@ -1499,6 +1508,12 @@ struct hwrm_port_phy_cfg_input {
 	#define PORT_PHY_CFG_REQ_FLAGS_EEE_DISABLE		    0x20UL
 	#define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_ENABLE	    0x40UL
 	#define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_DISABLE	    0x80UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_ENABLE	    0x100UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_DISABLE	    0x200UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_ENABLE	    0x400UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE	    0x800UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE	    0x1000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE	    0x2000UL
 	__le32 enables;
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE		    0x1UL
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX		    0x2UL
@@ -1815,13 +1830,22 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP      (0xcUL << 24)
 	#define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFPPLUS  (0xdUL << 24)
 	#define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP28    (0x11UL << 24)
-	__le32 unused_1;
+	__le16 fec_cfg;
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED      0x1UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_SUPPORTED   0x2UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_ENABLED     0x4UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_SUPPORTED  0x8UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED    0x10UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED  0x20UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED    0x40UL
+	u8 unused_1;
+	u8 unused_2;
 	char phy_vendor_name[16];
 	char phy_vendor_partnumber[16];
-	__le32 unused_2;
-	u8 unused_3;
+	__le32 unused_3;
 	u8 unused_4;
 	u8 unused_5;
+	u8 unused_6;
 	u8 valid;
 };
 
@@ -1842,6 +1866,8 @@ struct hwrm_port_mac_cfg_input {
 	#define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_DISABLE   0x20UL
 	#define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_ENABLE    0x40UL
 	#define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE   0x80UL
+	#define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE		    0x100UL
+	#define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE		    0x200UL
 	__le32 enables;
 	#define PORT_MAC_CFG_REQ_ENABLES_IPG			    0x1UL
 	#define PORT_MAC_CFG_REQ_ENABLES_LPBK			    0x2UL
@@ -2127,6 +2153,7 @@ struct hwrm_port_phy_i2c_read_output {
 	u8 valid;
 };
 
+/* hwrm_queue_qportcfg */
 /* Input (24 bytes) */
 struct hwrm_queue_qportcfg_input {
 	__le16 req_type;
@@ -2382,7 +2409,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id0_pri_lvl;
 	u8 queue_id0_bw_weight;
 	u8 queue_id1;
@@ -2392,7 +2419,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id1_pri_lvl;
 	u8 queue_id1_bw_weight;
 	u8 queue_id2;
@@ -2402,7 +2429,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id2_pri_lvl;
 	u8 queue_id2_bw_weight;
 	u8 queue_id3;
@@ -2412,7 +2439,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id3_pri_lvl;
 	u8 queue_id3_bw_weight;
 	u8 queue_id4;
@@ -2422,7 +2449,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id4_pri_lvl;
 	u8 queue_id4_bw_weight;
 	u8 queue_id5;
@@ -2432,7 +2459,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id5_pri_lvl;
 	u8 queue_id5_bw_weight;
 	u8 queue_id6;
@@ -2442,7 +2469,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id6_pri_lvl;
 	u8 queue_id6_bw_weight;
 	u8 queue_id7;
@@ -2452,7 +2479,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id7_pri_lvl;
 	u8 queue_id7_bw_weight;
 	u8 unused_1[5];
@@ -3150,7 +3177,7 @@ struct hwrm_cfa_l2_filter_cfg_output {
 };
 
 /* hwrm_cfa_l2_set_rx_mask */
-/* Input (40 bytes) */
+/* Input (56 bytes) */
 struct hwrm_cfa_l2_set_rx_mask_input {
 	__le16 req_type;
 	__le16 cmpl_ring;
@@ -3165,9 +3192,15 @@ struct hwrm_cfa_l2_set_rx_mask_input {
 	#define CFA_L2_SET_RX_MASK_REQ_MASK_BCAST		    0x8UL
 	#define CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS	    0x10UL
 	#define CFA_L2_SET_RX_MASK_REQ_MASK_OUTERMOST		    0x20UL
+	#define CFA_L2_SET_RX_MASK_REQ_MASK_VLANONLY		    0x40UL
+	#define CFA_L2_SET_RX_MASK_REQ_MASK_VLAN_NONVLAN	    0x80UL
+	#define CFA_L2_SET_RX_MASK_REQ_MASK_ANYVLAN_NONVLAN	    0x100UL
 	__le64 mc_tbl_addr;
 	__le32 num_mc_entries;
 	__le32 unused_0;
+	__le64 vlan_tag_tbl_addr;
+	__le32 num_vlan_tags;
+	__le32 unused_1;
 };
 
 /* Output (16 bytes) */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
index 40a7b0e09612..73f2249555b5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
@@ -13,6 +13,7 @@
 enum bnxt_nvm_directory_type {
 	BNX_DIR_TYPE_UNUSED = 0,
 	BNX_DIR_TYPE_PKG_LOG = 1,
+	BNX_DIR_TYPE_UPDATE = 2,
 	BNX_DIR_TYPE_CHIMP_PATCH = 3,
 	BNX_DIR_TYPE_BOOTCODE = 4,
 	BNX_DIR_TYPE_VPD = 5,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 363884dd9e8a..50d2007a2640 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -143,6 +143,9 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos)
 	u16 vlan_tag;
 	int rc;
 
+	if (bp->hwrm_spec_code < 0x10201)
+		return -ENOTSUPP;
+
 	rc = bnxt_vf_ndo_prep(bp, vf_id);
 	if (rc)
 		return rc;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 541456398dfb..8d4f8495dbb3 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -453,29 +453,25 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
 static int bcmgenet_get_settings(struct net_device *dev,
 				 struct ethtool_cmd *cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_gset(priv->phydev, cmd);
+	return phy_ethtool_gset(dev->phydev, cmd);
 }
 
 static int bcmgenet_set_settings(struct net_device *dev,
 				 struct ethtool_cmd *cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_sset(priv->phydev, cmd);
+	return phy_ethtool_sset(dev->phydev, cmd);
 }
 
 static int bcmgenet_set_rx_csum(struct net_device *dev,
@@ -941,7 +937,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	e->eee_active = p->eee_active;
 	e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER);
 
-	return phy_ethtool_get_eee(priv->phydev, e);
+	return phy_ethtool_get_eee(dev->phydev, e);
 }
 
 static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -958,7 +954,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	if (!p->eee_enabled) {
 		bcmgenet_eee_enable_set(dev, false);
 	} else {
-		ret = phy_init_eee(priv->phydev, 0);
+		ret = phy_init_eee(dev->phydev, 0);
 		if (ret) {
 			netif_err(priv, hw, dev, "EEE initialization failed\n");
 			return ret;
@@ -968,14 +964,12 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 		bcmgenet_eee_enable_set(dev, true);
 	}
 
-	return phy_ethtool_set_eee(priv->phydev, e);
+	return phy_ethtool_set_eee(dev->phydev, e);
 }
 
 static int bcmgenet_nway_reset(struct net_device *dev)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
-	return genphy_restart_aneg(priv->phydev);
+	return genphy_restart_aneg(dev->phydev);
 }
 
 /* standard ethtool support functions. */
@@ -1002,12 +996,13 @@ static struct ethtool_ops bcmgenet_ethtool_ops = {
 static int bcmgenet_power_down(struct bcmgenet_priv *priv,
 				enum bcmgenet_power_mode mode)
 {
+	struct net_device *ndev = priv->dev;
 	int ret = 0;
 	u32 reg;
 
 	switch (mode) {
 	case GENET_POWER_CABLE_SENSE:
-		phy_detach(priv->phydev);
+		phy_detach(ndev->phydev);
 		break;
 
 	case GENET_POWER_WOL_MAGIC:
@@ -1068,7 +1063,6 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 /* ioctl handle special commands that are not present in ethtool. */
 static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
 	int val = 0;
 
 	if (!netif_running(dev))
@@ -1078,10 +1072,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!priv->phydev)
+		if (!dev->phydev)
 			val = -ENODEV;
 		else
-			val = phy_mii_ioctl(priv->phydev, rq, cmd);
+			val = phy_mii_ioctl(dev->phydev, rq, cmd);
 		break;
 
 	default:
@@ -2464,6 +2458,7 @@ static void bcmgenet_irq_task(struct work_struct *work)
 {
 	struct bcmgenet_priv *priv = container_of(
 			work, struct bcmgenet_priv, bcmgenet_irq_work);
+	struct net_device *ndev = priv->dev;
 
 	netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
 
@@ -2476,7 +2471,7 @@ static void bcmgenet_irq_task(struct work_struct *work)
 
 	/* Link UP/DOWN event */
 	if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
-		phy_mac_interrupt(priv->phydev,
+		phy_mac_interrupt(ndev->phydev,
 				  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
 		priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
 	}
@@ -2838,7 +2833,7 @@ static void bcmgenet_netif_start(struct net_device *dev)
 	/* Monitor link interrupts now */
 	bcmgenet_link_intr_enable(priv);
 
-	phy_start(priv->phydev);
+	phy_start(dev->phydev);
 }
 
 static int bcmgenet_open(struct net_device *dev)
@@ -2937,7 +2932,7 @@ static void bcmgenet_netif_stop(struct net_device *dev)
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
 	netif_tx_stop_all_queues(dev);
-	phy_stop(priv->phydev);
+	phy_stop(dev->phydev);
 	bcmgenet_intr_disable(priv);
 	bcmgenet_disable_rx_napi(priv);
 	bcmgenet_disable_tx_napi(priv);
@@ -2963,7 +2958,7 @@ static int bcmgenet_close(struct net_device *dev)
 	bcmgenet_netif_stop(dev);
 
 	/* Really kill the PHY state machine and disconnect from it */
-	phy_disconnect(priv->phydev);
+	phy_disconnect(dev->phydev);
 
 	/* Disable MAC receive */
 	umac_enable_set(priv, CMD_RX_EN, false);
@@ -3522,7 +3517,7 @@ static int bcmgenet_suspend(struct device *d)
 
 	bcmgenet_netif_stop(dev);
 
-	phy_suspend(priv->phydev);
+	phy_suspend(dev->phydev);
 
 	netif_device_detach(dev);
 
@@ -3586,7 +3581,7 @@ static int bcmgenet_resume(struct device *d)
 	if (priv->wolopts)
 		clk_disable_unprepare(priv->clk_wol);
 
-	phy_init_hw(priv->phydev);
+	phy_init_hw(dev->phydev);
 	/* Speed settings must be restored */
 	bcmgenet_mii_config(priv->dev);
 
@@ -3619,7 +3614,7 @@ static int bcmgenet_resume(struct device *d)
 
 	netif_device_attach(dev);
 
-	phy_resume(priv->phydev);
+	phy_resume(dev->phydev);
 
 	if (priv->eee.eee_enabled)
 		bcmgenet_eee_enable_set(dev, true);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 1e2dc34d331a..0f0868c56f05 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -597,7 +597,6 @@ struct bcmgenet_priv {
 
 	/* MDIO bus variables */
 	wait_queue_head_t wq;
-	struct phy_device *phydev;
 	bool internal_phy;
 	struct device_node *phy_dn;
 	struct device_node *mdio_dn;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 457c3bc8cfff..e907acd81da9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id,
 void bcmgenet_mii_setup(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	u32 reg, cmd_bits = 0;
 	bool status_changed = false;
 
@@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev)
 	if (GENET_IS_V4(priv))
 		return;
 
-	if (priv->phydev) {
-		phy_init_hw(priv->phydev);
-		phy_start_aneg(priv->phydev);
+	if (dev->phydev) {
+		phy_init_hw(dev->phydev);
+		phy_start_aneg(dev->phydev);
 	}
 }
 
@@ -236,6 +236,7 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev)
 
 static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 {
+	struct net_device *ndev = priv->dev;
 	u32 reg;
 
 	/* Speed settings are set in bcmgenet_mii_setup() */
@@ -244,14 +245,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 	bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
 
 	if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET)
-		fixed_phy_set_link_update(priv->phydev,
+		fixed_phy_set_link_update(ndev->phydev,
 					  bcmgenet_fixed_phy_link_update);
 }
 
 int bcmgenet_mii_config(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	struct device *kdev = &priv->pdev->dev;
 	const char *phy_name = NULL;
 	u32 id_mode_dis = 0;
@@ -302,7 +303,7 @@ int bcmgenet_mii_config(struct net_device *dev)
 		 * capabilities, use that knowledge to also configure the
 		 * Reverse MII interface correctly.
 		 */
-		if ((priv->phydev->supported & PHY_BASIC_FEATURES) ==
+		if ((phydev->supported & PHY_BASIC_FEATURES) ==
 				PHY_BASIC_FEATURES)
 			port_ctrl = PORT_MODE_EXT_RVMII_25;
 		else
@@ -371,7 +372,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 			return -ENODEV;
 		}
 	} else {
-		phydev = priv->phydev;
+		phydev = dev->phydev;
 		phydev->dev_flags = phy_flags;
 
 		ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
@@ -382,8 +383,6 @@ int bcmgenet_mii_probe(struct net_device *dev)
 		}
 	}
 
-	priv->phydev = phydev;
-
 	/* Configure port multiplexer based on what the probed PHY device since
 	 * reading the 'max-speed' property determines the maximum supported
 	 * PHY speed which is needed for bcmgenet_mii_config() to configure
@@ -391,7 +390,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 */
 	ret = bcmgenet_mii_config(dev);
 	if (ret) {
-		phy_disconnect(priv->phydev);
+		phy_disconnect(phydev);
 		return ret;
 	}
 
@@ -401,7 +400,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 * Ethernet MAC ISRs
 	 */
 	if (priv->internal_phy)
-		priv->phydev->irq = PHY_IGNORE_INTERRUPT;
+		phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	return 0;
 }
@@ -606,7 +605,6 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
 
 	}
 
-	priv->phydev = phydev;
 	priv->phy_interface = pd->phy_interface;
 
 	return 0;
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index cb07d95e3dd9..89c0cfa9719f 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -304,7 +304,7 @@ static void macb_set_tx_clk(struct clk *clk, int speed, struct net_device *dev)
 static void macb_handle_link_change(struct net_device *dev)
 {
 	struct macb *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	int status_change = 0;
 
@@ -414,7 +414,6 @@ static int macb_mii_probe(struct net_device *dev)
 	bp->link = 0;
 	bp->speed = 0;
 	bp->duplex = -1;
-	bp->phy_dev = phydev;
 
 	return 0;
 }
@@ -1886,7 +1885,7 @@ static int macb_open(struct net_device *dev)
 	netif_carrier_off(dev);
 
 	/* if the phy is not yet register, retry later*/
-	if (!bp->phy_dev)
+	if (!dev->phydev)
 		return -EAGAIN;
 
 	/* RX buffers initialization */
@@ -1905,7 +1904,7 @@ static int macb_open(struct net_device *dev)
 	macb_init_hw(bp);
 
 	/* schedule a link state check */
-	phy_start(bp->phy_dev);
+	phy_start(dev->phydev);
 
 	netif_tx_start_all_queues(dev);
 
@@ -1920,8 +1919,8 @@ static int macb_close(struct net_device *dev)
 	netif_tx_stop_all_queues(dev);
 	napi_disable(&bp->napi);
 
-	if (bp->phy_dev)
-		phy_stop(bp->phy_dev);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 
 	spin_lock_irqsave(&bp->lock, flags);
 	macb_reset_hw(bp);
@@ -2092,28 +2091,6 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev)
 	return nstat;
 }
 
-static int macb_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct macb *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phy_dev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(phydev, cmd);
-}
-
-static int macb_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct macb *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phy_dev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_ethtool_sset(phydev, cmd);
-}
-
 static int macb_get_regs_len(struct net_device *netdev)
 {
 	return MACB_GREGS_NBR * sizeof(u32);
@@ -2186,19 +2163,17 @@ static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 }
 
 static const struct ethtool_ops macb_ethtool_ops = {
-	.get_settings		= macb_get_settings,
-	.set_settings		= macb_set_settings,
 	.get_regs_len		= macb_get_regs_len,
 	.get_regs		= macb_get_regs,
 	.get_link		= ethtool_op_get_link,
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.get_wol		= macb_get_wol,
 	.set_wol		= macb_set_wol,
+	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static const struct ethtool_ops gem_ethtool_ops = {
-	.get_settings		= macb_get_settings,
-	.set_settings		= macb_set_settings,
 	.get_regs_len		= macb_get_regs_len,
 	.get_regs		= macb_get_regs,
 	.get_link		= ethtool_op_get_link,
@@ -2206,12 +2181,13 @@ static const struct ethtool_ops gem_ethtool_ops = {
 	.get_ethtool_stats	= gem_get_ethtool_stats,
 	.get_strings		= gem_get_ethtool_strings,
 	.get_sset_count		= gem_get_sset_count,
+	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct macb *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 
 	if (!netif_running(dev))
 		return -EINVAL;
@@ -2570,7 +2546,7 @@ static int at91ether_open(struct net_device *dev)
 			     MACB_BIT(HRESP));
 
 	/* schedule a link state check */
-	phy_start(lp->phy_dev);
+	phy_start(dev->phydev);
 
 	netif_start_queue(dev);
 
@@ -3010,7 +2986,7 @@ static int macb_probe(struct platform_device *pdev)
 	if (err)
 		goto err_out_free_netdev;
 
-	phydev = bp->phy_dev;
+	phydev = dev->phydev;
 
 	netif_carrier_off(dev);
 
@@ -3029,7 +3005,7 @@ static int macb_probe(struct platform_device *pdev)
 	return 0;
 
 err_out_unregister_mdio:
-	phy_disconnect(bp->phy_dev);
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(bp->mii_bus);
 	mdiobus_free(bp->mii_bus);
 
@@ -3057,8 +3033,8 @@ static int macb_remove(struct platform_device *pdev)
 
 	if (dev) {
 		bp = netdev_priv(dev);
-		if (bp->phy_dev)
-			phy_disconnect(bp->phy_dev);
+		if (dev->phydev)
+			phy_disconnect(dev->phydev);
 		mdiobus_unregister(bp->mii_bus);
 		mdiobus_free(bp->mii_bus);
 
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 8a13824ef802..36893d8958d4 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -823,7 +823,6 @@ struct macb {
 	struct macb_or_gem_ops	macbgem_ops;
 
 	struct mii_bus		*mii_bus;
-	struct phy_device	*phy_dev;
 	int 			link;
 	int 			speed;
 	int 			duplex;
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
index 8ad7425f89bf..c03d37016a48 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
@@ -19,26 +19,16 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
 
 int lio_cn6xxx_soft_reset(struct octeon_device *oct)
 {
@@ -74,9 +64,9 @@ void lio_cn6xxx_enable_error_reporting(struct octeon_device *oct)
 	u32 val;
 
 	pci_read_config_dword(oct->pci_dev, CN6XXX_PCIE_DEVCTL, &val);
-	if (val & 0x000f0000) {
+	if (val & 0x000c0000) {
 		dev_err(&oct->pci_dev->dev, "PCI-E Link error detected: 0x%08x\n",
-			val & 0x000f0000);
+			val & 0x000c0000);
 	}
 
 	val |= 0xf;          /* Enable Link error reporting */
@@ -229,7 +219,7 @@ void lio_cn6xxx_setup_global_output_regs(struct octeon_device *oct)
 	/* / Select Packet count instead of bytes for SLI_PKTi_CNTS[CNT] */
 	octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_BMODE, 0);
 
-	/* / Select ES,RO,NS setting from register for Output Queue Packet
+	/* Select ES, RO, NS setting from register for Output Queue Packet
 	 * Address
 	 */
 	octeon_write_csr(oct, CN6XXX_SLI_PKT_DPADDR, 0xFFFFFFFF);
@@ -367,7 +357,8 @@ void lio_cn6xxx_enable_io_queues(struct octeon_device *oct)
 
 void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 {
-	u32 mask, i, loop = HZ;
+	int i;
+	u32 mask, loop = HZ;
 	u32 d32;
 
 	/* Reset the Enable bits for Input Queues. */
@@ -376,7 +367,7 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 	octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, mask);
 
 	/* Wait until hardware indicates that the queues are out of reset. */
-	mask = oct->io_qmask.iq;
+	mask = (u32)oct->io_qmask.iq;
 	d32 = octeon_read_csr(oct, CN6XXX_SLI_PORT_IN_RST_IQ);
 	while (((d32 & mask) != mask) && loop--) {
 		d32 = octeon_read_csr(oct, CN6XXX_SLI_PORT_IN_RST_IQ);
@@ -384,8 +375,8 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 	}
 
 	/* Reset the doorbell register for each Input queue. */
-	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
-		if (!(oct->io_qmask.iq & (1UL << i)))
+	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+		if (!(oct->io_qmask.iq & (1ULL << i)))
 			continue;
 		octeon_write_csr(oct, CN6XXX_SLI_IQ_DOORBELL(i), 0xFFFFFFFF);
 		d32 = octeon_read_csr(oct, CN6XXX_SLI_IQ_DOORBELL(i));
@@ -398,7 +389,7 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 
 	/* Wait until hardware indicates that the queues are out of reset. */
 	loop = HZ;
-	mask = oct->io_qmask.oq;
+	mask = (u32)oct->io_qmask.oq;
 	d32 = octeon_read_csr(oct, CN6XXX_SLI_PORT_IN_RST_OQ);
 	while (((d32 & mask) != mask) && loop--) {
 		d32 = octeon_read_csr(oct, CN6XXX_SLI_PORT_IN_RST_OQ);
@@ -408,8 +399,8 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 
 	/* Reset the doorbell register for each Output queue. */
 	/* for (i = 0; i < oct->num_oqs; i++) { */
-	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
-		if (!(oct->io_qmask.oq & (1UL << i)))
+	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+		if (!(oct->io_qmask.oq & (1ULL << i)))
 			continue;
 		octeon_write_csr(oct, CN6XXX_SLI_OQ_PKTS_CREDIT(i), 0xFFFFFFFF);
 		d32 = octeon_read_csr(oct, CN6XXX_SLI_OQ_PKTS_CREDIT(i));
@@ -429,16 +420,16 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 
 void lio_cn6xxx_reinit_regs(struct octeon_device *oct)
 {
-	u32 i;
+	int i;
 
-	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
-		if (!(oct->io_qmask.iq & (1UL << i)))
+	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+		if (!(oct->io_qmask.iq & (1ULL << i)))
 			continue;
 		oct->fn_list.setup_iq_regs(oct, i);
 	}
 
-	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
-		if (!(oct->io_qmask.oq & (1UL << i)))
+	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+		if (!(oct->io_qmask.oq & (1ULL << i)))
 			continue;
 		oct->fn_list.setup_oq_regs(oct, i);
 	}
@@ -450,8 +441,8 @@ void lio_cn6xxx_reinit_regs(struct octeon_device *oct)
 	oct->fn_list.enable_io_queues(oct);
 
 	/* for (i = 0; i < oct->num_oqs; i++) { */
-	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
-		if (!(oct->io_qmask.oq & (1UL << i)))
+	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+		if (!(oct->io_qmask.oq & (1ULL << i)))
 			continue;
 		writel(oct->droq[i]->max_count, oct->droq[i]->pkts_credit_reg);
 	}
@@ -495,8 +486,7 @@ u32 lio_cn6xxx_bar1_idx_read(struct octeon_device *oct, u32 idx)
 }
 
 u32
-lio_cn6xxx_update_read_index(struct octeon_device *oct __attribute__((unused)),
-			     struct octeon_instr_queue *iq)
+lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq)
 {
 	u32 new_idx = readl(iq->inst_cnt_reg);
 
@@ -547,17 +537,18 @@ static void lio_cn6xxx_get_pcie_qlmport(struct octeon_device *oct)
 	dev_dbg(&oct->pci_dev->dev, "Using PCIE Port %d\n", oct->pcie_port);
 }
 
-void
+static void
 lio_cn6xxx_process_pcie_error_intr(struct octeon_device *oct, u64 intr64)
 {
 	dev_err(&oct->pci_dev->dev, "Error Intr: 0x%016llx\n",
 		CVM_CAST64(intr64));
 }
 
-int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
+static int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
 {
 	struct octeon_droq *droq;
-	u32 oq_no, pkt_count, droq_time_mask, droq_mask, droq_int_enb;
+	int oq_no;
+	u32 pkt_count, droq_time_mask, droq_mask, droq_int_enb;
 	u32 droq_cnt_enb, droq_cnt_mask;
 
 	droq_cnt_enb = octeon_read_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB);
@@ -573,12 +564,12 @@ int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
 	oct->droq_intr = 0;
 
 	/* for (oq_no = 0; oq_no < oct->num_oqs; oq_no++) { */
-	for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES; oq_no++) {
-		if (!(droq_mask & (1 << oq_no)))
+	for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES(oct); oq_no++) {
+		if (!(droq_mask & (1ULL << oq_no)))
 			continue;
 
 		droq = oct->droq[oq_no];
-		pkt_count = octeon_droq_check_hw_for_pkts(oct, droq);
+		pkt_count = octeon_droq_check_hw_for_pkts(droq);
 		if (pkt_count) {
 			oct->droq_intr |= (1ULL << oq_no);
 			if (droq->ops.poll_mode) {
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
index f77918779355..28c47224221a 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
@@ -82,8 +82,6 @@ void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no);
 void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no);
 void lio_cn6xxx_enable_io_queues(struct octeon_device *oct);
 void lio_cn6xxx_disable_io_queues(struct octeon_device *oct);
-void lio_cn6xxx_process_pcie_error_intr(struct octeon_device *oct, u64 intr64);
-int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct);
 irqreturn_t lio_cn6xxx_process_interrupt_regs(void *dev);
 void lio_cn6xxx_reinit_regs(struct octeon_device *oct);
 void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr,
@@ -91,8 +89,7 @@ void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr,
 void lio_cn6xxx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask);
 u32 lio_cn6xxx_bar1_idx_read(struct octeon_device *oct, u32 idx);
 u32
-lio_cn6xxx_update_read_index(struct octeon_device *oct __attribute__((unused)),
-			     struct octeon_instr_queue *iq);
+lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq);
 void lio_cn6xxx_enable_interrupt(void *chip);
 void lio_cn6xxx_disable_interrupt(void *chip);
 void cn6xxx_get_pcie_qlmport(struct octeon_device *oct);
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
index 8e830d0c0754..29755bc68f12 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
@@ -19,28 +19,17 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
 
 static void lio_cn68xx_set_dpi_regs(struct octeon_device *oct)
 {
@@ -129,7 +118,7 @@ static inline void lio_cn68xx_vendor_message_fix(struct octeon_device *oct)
 	pci_write_config_dword(oct->pci_dev, CN6XXX_PCIE_FLTMSK, val);
 }
 
-int lio_is_210nv(struct octeon_device *oct)
+static int lio_is_210nv(struct octeon_device *oct)
 {
 	u64 mio_qlm4_cfg = lio_pci_readq(oct, CN6XXX_MIO_QLM4_CFG);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
index d4e1c9fb0bf2..ea7bdcce6044 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
@@ -28,6 +28,5 @@
 #define  __CN68XX_DEVICE_H__
 
 int lio_setup_cn68xx_octeon_device(struct octeon_device *oct);
-int lio_is_210nv(struct octeon_device *oct);
 
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
index 38cddbd107b6..d45a0f4aaf1f 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
@@ -29,7 +29,6 @@
 
 #ifndef __CN68XX_REGS_H__
 #define __CN68XX_REGS_H__
-#include "cn66xx_regs.h"
 
 /*###################### REQUEST QUEUE #########################*/
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 245c063ed4db..289eb8907922 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -19,13 +19,9 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
 #include <linux/netdevice.h>
 #include <linux/net_tstamp.h>
-#include <linux/ethtool.h>
-#include <linux/dma-mapping.h>
 #include <linux/pci.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -36,9 +32,8 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
+
+static int octnet_get_link_stats(struct net_device *netdev);
 
 struct oct_mdio_cmd_context {
 	int octeon_id;
@@ -71,34 +66,126 @@ enum {
 	INTERFACE_MODE_RXAUI,
 	INTERFACE_MODE_QSGMII,
 	INTERFACE_MODE_AGL,
+	INTERFACE_MODE_XLAUI,
+	INTERFACE_MODE_XFI,
+	INTERFACE_MODE_10G_KR,
+	INTERFACE_MODE_40G_KR4,
+	INTERFACE_MODE_MIXED,
 };
 
 #define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0]))
 #define OCT_ETHTOOL_REGDUMP_LEN  4096
 #define OCT_ETHTOOL_REGSVER  1
 
+/* statistics of PF */
+static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
+	"rx_packets",
+	"tx_packets",
+	"rx_bytes",
+	"tx_bytes",
+	"rx_errors",	/*jabber_err+l2_err+frame_err */
+	"tx_errors",	/*fw_err_pko+fw_err_link+fw_err_drop */
+	"rx_dropped",   /*st->fromwire.total_rcvd - st->fromwire.fw_total_rcvd
+			*+st->fromwire.dmac_drop + st->fromwire.fw_err_drop
+			*/
+	"tx_dropped",
+
+	"tx_total_sent",
+	"tx_total_fwd",
+	"tx_err_pko",
+	"tx_err_link",
+	"tx_err_drop",
+
+	"tx_tso",
+	"tx_tso_packets",
+	"tx_tso_err",
+	"tx_vxlan",
+
+	"mac_tx_total_pkts",
+	"mac_tx_total_bytes",
+	"mac_tx_mcast_pkts",
+	"mac_tx_bcast_pkts",
+	"mac_tx_ctl_packets",	/*oct->link_stats.fromhost.ctl_sent */
+	"mac_tx_total_collisions",
+	"mac_tx_one_collision",
+	"mac_tx_multi_collison",
+	"mac_tx_max_collision_fail",
+	"mac_tx_max_deferal_fail",
+	"mac_tx_fifo_err",
+	"mac_tx_runts",
+
+	"rx_total_rcvd",
+	"rx_total_fwd",
+	"rx_jabber_err",
+	"rx_l2_err",
+	"rx_frame_err",
+	"rx_err_pko",
+	"rx_err_link",
+	"rx_err_drop",
+
+	"rx_vxlan",
+	"rx_vxlan_err",
+
+	"rx_lro_pkts",
+	"rx_lro_bytes",
+	"rx_total_lro",
+
+	"rx_lro_aborts",
+	"rx_lro_aborts_port",
+	"rx_lro_aborts_seq",
+	"rx_lro_aborts_tsval",
+	"rx_lro_aborts_timer",
+	"rx_fwd_rate",
+
+	"mac_rx_total_rcvd",
+	"mac_rx_bytes",
+	"mac_rx_total_bcst",
+	"mac_rx_total_mcst",
+	"mac_rx_runts",
+	"mac_rx_ctl_packets",
+	"mac_rx_fifo_err",
+	"mac_rx_dma_drop",
+	"mac_rx_fcs_err",
+
+	"link_state_changes",
+};
+
+/* statistics of host tx queue */
 static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = {
-	"Instr posted",
-	"Instr processed",
-	"Instr dropped",
-	"Bytes Sent",
-	"Sgentry_sent",
-	"Inst cntreg",
-	"Tx done",
-	"Tx Iq busy",
-	"Tx dropped",
-	"Tx bytes",
+	"packets",		/*oct->instr_queue[iq_no]->stats.tx_done*/
+	"bytes",		/*oct->instr_queue[iq_no]->stats.tx_tot_bytes*/
+	"dropped",
+	"iq_busy",
+	"sgentry_sent",
+
+	"fw_instr_posted",
+	"fw_instr_processed",
+	"fw_instr_dropped",
+	"fw_bytes_sent",
+
+	"tso",
+	"vxlan",
+	"txq_restart",
 };
 
+/* statistics of host rx queue */
 static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = {
-	"OQ Pkts Received",
-	"OQ Bytes Received",
-	"Dropped no dispatch",
-	"Dropped nomem",
-	"Dropped toomany",
-	"Stack RX cnt",
-	"Stack RX Bytes",
-	"RX dropped",
+	"packets",		/*oct->droq[oq_no]->stats.rx_pkts_received */
+	"bytes",		/*oct->droq[oq_no]->stats.rx_bytes_received */
+	"dropped",		/*oct->droq[oq_no]->stats.rx_dropped+
+				 *oct->droq[oq_no]->stats.dropped_nodispatch+
+				 *oct->droq[oq_no]->stats.dropped_toomany+
+				 *oct->droq[oq_no]->stats.dropped_nomem
+				 */
+	"dropped_nomem",
+	"dropped_toomany",
+	"fw_dropped",
+	"fw_pkts_received",
+	"fw_bytes_received",
+	"fw_dropped_nodispatch",
+
+	"vxlan",
+	"buffer_alloc_failure",
 };
 
 #define OCTNIC_NCMD_AUTONEG_ON  0x1
@@ -112,8 +199,9 @@ static int lio_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 
 	linfo = &lio->linfo;
 
-	if (linfo->link.s.interface == INTERFACE_MODE_XAUI ||
-	    linfo->link.s.interface == INTERFACE_MODE_RXAUI) {
+	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
 		ecmd->port = PORT_FIBRE;
 		ecmd->supported =
 			(SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE |
@@ -124,10 +212,11 @@ static int lio_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 		ecmd->autoneg = AUTONEG_DISABLE;
 
 	} else {
-		dev_err(&oct->pci_dev->dev, "Unknown link interface reported\n");
+		dev_err(&oct->pci_dev->dev, "Unknown link interface reported %d\n",
+			linfo->link.s.if_mode);
 	}
 
-	if (linfo->link.s.status) {
+	if (linfo->link.s.link_up) {
 		ethtool_cmd_speed_set(ecmd, linfo->link.s.speed);
 		ecmd->duplex = linfo->link.s.duplex;
 	} else {
@@ -222,23 +311,20 @@ static int octnet_gpio_access(struct net_device *netdev, int addr, int val)
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
-	struct octnic_ctrl_params nparams;
 	int ret = 0;
 
 	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
 
 	nctrl.ncmd.u64 = 0;
 	nctrl.ncmd.s.cmd = OCTNET_CMD_GPIO_ACCESS;
-	nctrl.ncmd.s.param1 = lio->linfo.ifidx;
-	nctrl.ncmd.s.param2 = addr;
-	nctrl.ncmd.s.param3 = val;
+	nctrl.ncmd.s.param1 = addr;
+	nctrl.ncmd.s.param2 = val;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
-	nparams.resp_order = OCTEON_RESP_ORDERED;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
 	if (ret < 0) {
 		dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n");
 		return -EINVAL;
@@ -253,20 +339,18 @@ static void octnet_mdio_resp_callback(struct octeon_device *oct,
 				      u32 status,
 				      void *buf)
 {
-	struct oct_mdio_cmd_resp *mdio_cmd_rsp;
 	struct oct_mdio_cmd_context *mdio_cmd_ctx;
 	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
 
-	mdio_cmd_rsp = (struct oct_mdio_cmd_resp *)sc->virtrptr;
 	mdio_cmd_ctx = (struct oct_mdio_cmd_context *)sc->ctxptr;
 
 	oct = lio_get_device(mdio_cmd_ctx->octeon_id);
 	if (status) {
 		dev_err(&oct->pci_dev->dev, "MIDO instruction failed. Status: %llx\n",
 			CVM_CAST64(status));
-		ACCESS_ONCE(mdio_cmd_ctx->cond) = -1;
+		WRITE_ONCE(mdio_cmd_ctx->cond, -1);
 	} else {
-		ACCESS_ONCE(mdio_cmd_ctx->cond) = 1;
+		WRITE_ONCE(mdio_cmd_ctx->cond, 1);
 	}
 	wake_up_interruptible(&mdio_cmd_ctx->wc);
 }
@@ -297,15 +381,16 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value)
 	mdio_cmd_rsp = (struct oct_mdio_cmd_resp *)sc->virtrptr;
 	mdio_cmd = (struct oct_mdio_cmd *)sc->virtdptr;
 
-	ACCESS_ONCE(mdio_cmd_ctx->cond) = 0;
+	WRITE_ONCE(mdio_cmd_ctx->cond, 0);
 	mdio_cmd_ctx->octeon_id = lio_get_device_id(oct_dev);
 	mdio_cmd->op = op;
 	mdio_cmd->mdio_addr = loc;
 	if (op)
 		mdio_cmd->value1 = *value;
-	mdio_cmd->value2 = lio->linfo.ifidx;
 	octeon_swap_8B_data((u64 *)mdio_cmd, sizeof(struct oct_mdio_cmd) / 8);
 
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
 	octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC, OPCODE_NIC_MDIO45,
 				    0, 0, 0);
 
@@ -317,7 +402,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value)
 
 	retval = octeon_send_soft_command(oct_dev, sc);
 
-	if (retval) {
+	if (retval == IQ_SEND_FAILED) {
 		dev_err(&oct_dev->pci_dev->dev,
 			"octnet_mdio45_access instruction failed status: %x\n",
 			retval);
@@ -335,7 +420,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value)
 			octeon_swap_8B_data((u64 *)(&mdio_cmd_rsp->resp),
 					    sizeof(struct oct_mdio_cmd) / 8);
 
-			if (ACCESS_ONCE(mdio_cmd_ctx->cond) == 1) {
+			if (READ_ONCE(mdio_cmd_ctx->cond) == 1) {
 				if (!op)
 					*value = mdio_cmd_rsp->resp.value1;
 			} else {
@@ -379,18 +464,16 @@ static int lio_set_phys_id(struct net_device *netdev,
 
 			/* Configure Beacon values */
 			value = LIO68XX_LED_BEACON_CFGON;
-			ret =
-				octnet_mdio45_access(lio, 1,
-						     LIO68XX_LED_BEACON_ADDR,
-						     &value);
+			ret = octnet_mdio45_access(lio, 1,
+						   LIO68XX_LED_BEACON_ADDR,
+						   &value);
 			if (ret)
 				return ret;
 
 			value = LIO68XX_LED_CTRL_CFGON;
-			ret =
-				octnet_mdio45_access(lio, 1,
-						     LIO68XX_LED_CTRL_ADDR,
-						     &value);
+			ret = octnet_mdio45_access(lio, 1,
+						   LIO68XX_LED_CTRL_ADDR,
+						   &value);
 			if (ret)
 				return ret;
 		} else {
@@ -469,7 +552,7 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
 		tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx);
 	}
 
-	if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE) {
+	if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) {
 		ering->rx_pending = 0;
 		ering->rx_max_pending = 0;
 		ering->rx_mini_pending = 0;
@@ -503,10 +586,10 @@ static void lio_set_msglevel(struct net_device *netdev, u32 msglvl)
 	if ((msglvl ^ lio->msg_enable) & NETIF_MSG_HW) {
 		if (msglvl & NETIF_MSG_HW)
 			liquidio_set_feature(netdev,
-					     OCTNET_CMD_VERBOSE_ENABLE);
+					     OCTNET_CMD_VERBOSE_ENABLE, 0);
 		else
 			liquidio_set_feature(netdev,
-					     OCTNET_CMD_VERBOSE_DISABLE);
+					     OCTNET_CMD_VERBOSE_DISABLE, 0);
 	}
 
 	lio->msg_enable = msglvl;
@@ -518,61 +601,279 @@ lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 	/* Notes: Not supporting any auto negotiation in these
 	 * drivers. Just report pause frame support.
 	 */
-	pause->tx_pause = 1;
-	pause->rx_pause = 1;    /* TODO: Need to support RX pause frame!!. */
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	pause->autoneg = 0;
+
+	pause->tx_pause = oct->tx_pause;
+	pause->rx_pause = oct->rx_pause;
 }
 
 static void
 lio_get_ethtool_stats(struct net_device *netdev,
-		      struct ethtool_stats *stats, u64 *data)
+		      struct ethtool_stats *stats  __attribute__((unused)),
+		      u64 *data)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct_dev = lio->oct_dev;
+	struct net_device_stats *netstats = &netdev->stats;
 	int i = 0, j;
 
-	for (j = 0; j < MAX_OCTEON_INSTR_QUEUES; j++) {
-		if (!(oct_dev->io_qmask.iq & (1UL << j)))
+	netdev->netdev_ops->ndo_get_stats(netdev);
+	octnet_get_link_stats(netdev);
+
+	/*sum of oct->droq[oq_no]->stats->rx_pkts_received */
+	data[i++] = CVM_CAST64(netstats->rx_packets);
+	/*sum of oct->instr_queue[iq_no]->stats.tx_done */
+	data[i++] = CVM_CAST64(netstats->tx_packets);
+	/*sum of oct->droq[oq_no]->stats->rx_bytes_received */
+	data[i++] = CVM_CAST64(netstats->rx_bytes);
+	/*sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */
+	data[i++] = CVM_CAST64(netstats->tx_bytes);
+	data[i++] = CVM_CAST64(netstats->rx_errors);
+	data[i++] = CVM_CAST64(netstats->tx_errors);
+	/*sum of oct->droq[oq_no]->stats->rx_dropped +
+	 *oct->droq[oq_no]->stats->dropped_nodispatch +
+	 *oct->droq[oq_no]->stats->dropped_toomany +
+	 *oct->droq[oq_no]->stats->dropped_nomem
+	 */
+	data[i++] = CVM_CAST64(netstats->rx_dropped);
+	/*sum of oct->instr_queue[iq_no]->stats.tx_dropped */
+	data[i++] = CVM_CAST64(netstats->tx_dropped);
+
+	/*data[i++] = CVM_CAST64(stats->multicast); */
+	/*data[i++] = CVM_CAST64(stats->collisions); */
+
+	/* firmware tx stats */
+	/*per_core_stats[cvmx_get_core_num()].link_stats[mdata->from_ifidx].
+	 *fromhost.fw_total_sent
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_total_sent);
+	/*per_core_stats[i].link_stats[port].fromwire.fw_total_fwd */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_total_fwd);
+	/*per_core_stats[j].link_stats[i].fromhost.fw_err_pko */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pko);
+	/*per_core_stats[j].link_stats[i].fromhost.fw_err_link */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_link);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.
+	 *fw_err_drop
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_drop);
+
+	/*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.fw_tso */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_tso);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.
+	 *fw_tso_fwd
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_tso_fwd);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.
+	 *fw_err_tso
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_tso);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.
+	 *fw_tx_vxlan
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_tx_vxlan);
+
+	/* mac tx statistics */
+	/*CVMX_BGXX_CMRX_TX_STAT5 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.total_pkts_sent);
+	/*CVMX_BGXX_CMRX_TX_STAT4 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.total_bytes_sent);
+	/*CVMX_BGXX_CMRX_TX_STAT15 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.mcast_pkts_sent);
+	/*CVMX_BGXX_CMRX_TX_STAT14 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.bcast_pkts_sent);
+	/*CVMX_BGXX_CMRX_TX_STAT17 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.ctl_sent);
+	/*CVMX_BGXX_CMRX_TX_STAT0 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.total_collisions);
+	/*CVMX_BGXX_CMRX_TX_STAT3 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.one_collision_sent);
+	/*CVMX_BGXX_CMRX_TX_STAT2 */
+	data[i++] =
+		CVM_CAST64(oct_dev->link_stats.fromhost.multi_collision_sent);
+	/*CVMX_BGXX_CMRX_TX_STAT0 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.max_collision_fail);
+	/*CVMX_BGXX_CMRX_TX_STAT1 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.max_deferral_fail);
+	/*CVMX_BGXX_CMRX_TX_STAT16 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fifo_err);
+	/*CVMX_BGXX_CMRX_TX_STAT6 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.runts);
+
+	/* RX firmware stats */
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_total_rcvd
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_total_rcvd);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_total_fwd
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_total_fwd);
+	/*per_core_stats[core_id].link_stats[ifidx].fromwire.jabber_err */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.jabber_err);
+	/*per_core_stats[core_id].link_stats[ifidx].fromwire.l2_err */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.l2_err);
+	/*per_core_stats[core_id].link_stats[ifidx].fromwire.frame_err */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.frame_err);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_err_pko
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_err_pko);
+	/*per_core_stats[j].link_stats[i].fromwire.fw_err_link */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_err_link);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[lro_ctx->ifidx].
+	 *fromwire.fw_err_drop
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_err_drop);
+
+	/*per_core_stats[cvmx_get_core_num()].link_stats[lro_ctx->ifidx].
+	 *fromwire.fw_rx_vxlan
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_rx_vxlan);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[lro_ctx->ifidx].
+	 *fromwire.fw_rx_vxlan_err
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_rx_vxlan_err);
+
+	/* LRO */
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_lro_pkts
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_lro_pkts);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_lro_octs
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_lro_octs);
+	/*per_core_stats[j].link_stats[i].fromwire.fw_total_lro */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_total_lro);
+	/*per_core_stats[j].link_stats[i].fromwire.fw_lro_aborts */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_lro_aborts);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_lro_aborts_port
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_lro_aborts_port);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_lro_aborts_seq
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_lro_aborts_seq);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_lro_aborts_tsval
+	 */
+	data[i++] =
+		CVM_CAST64(oct_dev->link_stats.fromwire.fw_lro_aborts_tsval);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
+	 *fw_lro_aborts_timer
+	 */
+	/* intrmod: packet forward rate */
+	data[i++] =
+		CVM_CAST64(oct_dev->link_stats.fromwire.fw_lro_aborts_timer);
+	/*per_core_stats[j].link_stats[i].fromwire.fw_lro_aborts */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fwd_rate);
+
+	/* mac: link-level stats */
+	/*CVMX_BGXX_CMRX_RX_STAT0 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.total_rcvd);
+	/*CVMX_BGXX_CMRX_RX_STAT1 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.bytes_rcvd);
+	/*CVMX_PKI_STATX_STAT5 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.total_bcst);
+	/*CVMX_PKI_STATX_STAT5 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.total_mcst);
+	/*wqe->word2.err_code or wqe->word2.err_level */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.runts);
+	/*CVMX_BGXX_CMRX_RX_STAT2 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.ctl_rcvd);
+	/*CVMX_BGXX_CMRX_RX_STAT6 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fifo_err);
+	/*CVMX_BGXX_CMRX_RX_STAT4 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.dmac_drop);
+	/*wqe->word2.err_code or wqe->word2.err_level */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fcs_err);
+	/*lio->link_changes*/
+	data[i++] = CVM_CAST64(lio->link_changes);
+
+	/* TX  -- lio_update_stats(lio); */
+	for (j = 0; j < MAX_OCTEON_INSTR_QUEUES(oct_dev); j++) {
+		if (!(oct_dev->io_qmask.iq & (1ULL << j)))
 			continue;
+		/*packets to network port*/
+		/*# of packets tx to network */
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_done);
+		/*# of bytes tx to network */
 		data[i++] =
-			CVM_CAST64(oct_dev->instr_queue[j]->stats.instr_posted);
-		data[i++] =
-			CVM_CAST64(
-				oct_dev->instr_queue[j]->stats.instr_processed);
+			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_tot_bytes);
+		/*# of packets dropped */
 		data[i++] =
-			CVM_CAST64(
-				oct_dev->instr_queue[j]->stats.instr_dropped);
+			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_dropped);
+		/*# of tx fails due to queue full */
 		data[i++] =
-			CVM_CAST64(oct_dev->instr_queue[j]->stats.bytes_sent);
+			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_iq_busy);
+		/*XXX gather entries sent */
 		data[i++] =
 			CVM_CAST64(oct_dev->instr_queue[j]->stats.sgentry_sent);
+
+		/*instruction to firmware: data and control */
+		/*# of instructions to the queue */
 		data[i++] =
-			readl(oct_dev->instr_queue[j]->inst_cnt_reg);
-		data[i++] =
-			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_done);
-		data[i++] =
-			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_iq_busy);
+			CVM_CAST64(oct_dev->instr_queue[j]->stats.instr_posted);
+		/*# of instructions processed */
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->
+				       stats.instr_processed);
+		/*# of instructions could not be processed */
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->
+				       stats.instr_dropped);
+		/*bytes sent through the queue */
 		data[i++] =
-			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_dropped);
+			CVM_CAST64(oct_dev->instr_queue[j]->stats.bytes_sent);
+
+		/*tso request*/
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_gso);
+		/*vxlan request*/
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_vxlan);
+		/*txq restart*/
 		data[i++] =
-			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_tot_bytes);
+			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_restart);
 	}
 
-	/* for (j = 0; j < oct_dev->num_oqs; j++){ */
-	for (j = 0; j < MAX_OCTEON_OUTPUT_QUEUES; j++) {
-		if (!(oct_dev->io_qmask.oq & (1UL << j)))
+	/* RX */
+	/* for (j = 0; j < oct_dev->num_oqs; j++) { */
+	for (j = 0; j < MAX_OCTEON_OUTPUT_QUEUES(oct_dev); j++) {
+		if (!(oct_dev->io_qmask.oq & (1ULL << j)))
 			continue;
-		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.pkts_received);
-		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.bytes_received);
-		data[i++] =
-			CVM_CAST64(oct_dev->droq[j]->stats.dropped_nodispatch);
-		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.dropped_nomem);
-		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.dropped_toomany);
+
+		/*packets send to TCP/IP network stack */
+		/*# of packets to network stack */
 		data[i++] =
 			CVM_CAST64(oct_dev->droq[j]->stats.rx_pkts_received);
+		/*# of bytes to network stack */
 		data[i++] =
 			CVM_CAST64(oct_dev->droq[j]->stats.rx_bytes_received);
+		/*# of packets dropped */
+		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.dropped_nomem +
+				       oct_dev->droq[j]->stats.dropped_toomany +
+				       oct_dev->droq[j]->stats.rx_dropped);
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.dropped_nomem);
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.dropped_toomany);
 		data[i++] =
 			CVM_CAST64(oct_dev->droq[j]->stats.rx_dropped);
+
+		/*control and data path*/
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.pkts_received);
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.bytes_received);
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.dropped_nodispatch);
+
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.rx_vxlan);
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.rx_alloc_failure);
 	}
 }
 
@@ -581,26 +882,43 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct_dev = lio->oct_dev;
 	int num_iq_stats, num_oq_stats, i, j;
+	int num_stats;
 
-	num_iq_stats = ARRAY_SIZE(oct_iq_stats_strings);
-	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
-		if (!(oct_dev->io_qmask.iq & (1UL << i)))
-			continue;
-		for (j = 0; j < num_iq_stats; j++) {
-			sprintf(data, "IQ%d %s", i, oct_iq_stats_strings[j]);
+	switch (stringset) {
+	case ETH_SS_STATS:
+		num_stats = ARRAY_SIZE(oct_stats_strings);
+		for (j = 0; j < num_stats; j++) {
+			sprintf(data, "%s", oct_stats_strings[j]);
 			data += ETH_GSTRING_LEN;
 		}
-	}
 
-	num_oq_stats = ARRAY_SIZE(oct_droq_stats_strings);
-	/* for (i = 0; i < oct_dev->num_oqs; i++) { */
-	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
-		if (!(oct_dev->io_qmask.oq & (1UL << i)))
-			continue;
-		for (j = 0; j < num_oq_stats; j++) {
-			sprintf(data, "OQ%d %s", i, oct_droq_stats_strings[j]);
-			data += ETH_GSTRING_LEN;
+		num_iq_stats = ARRAY_SIZE(oct_iq_stats_strings);
+		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct_dev); i++) {
+			if (!(oct_dev->io_qmask.iq & (1ULL << i)))
+				continue;
+			for (j = 0; j < num_iq_stats; j++) {
+				sprintf(data, "tx-%d-%s", i,
+					oct_iq_stats_strings[j]);
+				data += ETH_GSTRING_LEN;
+			}
 		}
+
+		num_oq_stats = ARRAY_SIZE(oct_droq_stats_strings);
+		/* for (i = 0; i < oct_dev->num_oqs; i++) { */
+		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct_dev); i++) {
+			if (!(oct_dev->io_qmask.oq & (1ULL << i)))
+				continue;
+			for (j = 0; j < num_oq_stats; j++) {
+				sprintf(data, "rx-%d-%s", i,
+					oct_droq_stats_strings[j]);
+				data += ETH_GSTRING_LEN;
+			}
+		}
+		break;
+
+	default:
+		netif_info(lio, drv, lio->netdev, "Unknown Stringset !!\n");
+		break;
 	}
 }
 
@@ -609,8 +927,14 @@ static int lio_get_sset_count(struct net_device *netdev, int sset)
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct_dev = lio->oct_dev;
 
-	return (ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs) +
-	       (ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs);
+	switch (sset) {
+	case ETH_SS_STATS:
+		return (ARRAY_SIZE(oct_stats_strings) +
+			ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs +
+			ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static int lio_get_intr_coalesce(struct net_device *netdev,
@@ -618,50 +942,49 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
-	struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip;
 	struct octeon_instr_queue *iq;
 	struct oct_intrmod_cfg *intrmod_cfg;
 
 	intrmod_cfg = &oct->intrmod;
 
 	switch (oct->chip_id) {
-	/* case OCTEON_CN73XX: Todo */
-	/*      break; */
 	case OCTEON_CN68XX:
-	case OCTEON_CN66XX:
-		if (!intrmod_cfg->intrmod_enable) {
+	case OCTEON_CN66XX: {
+		struct octeon_cn6xxx *cn6xxx =
+			(struct octeon_cn6xxx *)oct->chip;
+
+		if (!intrmod_cfg->rx_enable) {
 			intr_coal->rx_coalesce_usecs =
 				CFG_GET_OQ_INTR_TIME(cn6xxx->conf);
 			intr_coal->rx_max_coalesced_frames =
 				CFG_GET_OQ_INTR_PKT(cn6xxx->conf);
-		} else {
-			intr_coal->use_adaptive_rx_coalesce =
-				intrmod_cfg->intrmod_enable;
-			intr_coal->rate_sample_interval =
-				intrmod_cfg->intrmod_check_intrvl;
-			intr_coal->pkt_rate_high =
-				intrmod_cfg->intrmod_maxpkt_ratethr;
-			intr_coal->pkt_rate_low =
-				intrmod_cfg->intrmod_minpkt_ratethr;
-			intr_coal->rx_max_coalesced_frames_high =
-				intrmod_cfg->intrmod_maxcnt_trigger;
-			intr_coal->rx_coalesce_usecs_high =
-				intrmod_cfg->intrmod_maxtmr_trigger;
-			intr_coal->rx_coalesce_usecs_low =
-				intrmod_cfg->intrmod_mintmr_trigger;
-			intr_coal->rx_max_coalesced_frames_low =
-				intrmod_cfg->intrmod_mincnt_trigger;
 		}
-
-		iq = oct->instr_queue[lio->linfo.txpciq[0]];
+		iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no];
 		intr_coal->tx_max_coalesced_frames = iq->fill_threshold;
 		break;
-
+	}
 	default:
 		netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
 		return -EINVAL;
 	}
-
+	if (intrmod_cfg->rx_enable) {
+		intr_coal->use_adaptive_rx_coalesce =
+			intrmod_cfg->rx_enable;
+		intr_coal->rate_sample_interval =
+			intrmod_cfg->check_intrvl;
+		intr_coal->pkt_rate_high =
+			intrmod_cfg->maxpkt_ratethr;
+		intr_coal->pkt_rate_low =
+			intrmod_cfg->minpkt_ratethr;
+		intr_coal->rx_max_coalesced_frames_high =
+			intrmod_cfg->rx_maxcnt_trigger;
+		intr_coal->rx_coalesce_usecs_high =
+			intrmod_cfg->rx_maxtmr_trigger;
+		intr_coal->rx_coalesce_usecs_low =
+			intrmod_cfg->rx_mintmr_trigger;
+		intr_coal->rx_max_coalesced_frames_low =
+			intrmod_cfg->rx_mincnt_trigger;
+	}
 	return 0;
 }
 
@@ -681,19 +1004,20 @@ static void octnet_intrmod_callback(struct octeon_device *oct_dev,
 	else
 		dev_info(&oct_dev->pci_dev->dev,
 			 "Rx-Adaptive Interrupt moderation enabled:%llx\n",
-			 oct_dev->intrmod.intrmod_enable);
+			 oct_dev->intrmod.rx_enable);
 
 	octeon_free_soft_command(oct_dev, sc);
 }
 
 /*  Configure interrupt moderation parameters */
-static int octnet_set_intrmod_cfg(void *oct, struct oct_intrmod_cfg *intr_cfg)
+static int octnet_set_intrmod_cfg(struct lio *lio,
+				  struct oct_intrmod_cfg *intr_cfg)
 {
 	struct octeon_soft_command *sc;
 	struct oct_intrmod_cmd *cmd;
 	struct oct_intrmod_cfg *cfg;
 	int retval;
-	struct octeon_device *oct_dev = (struct octeon_device *)oct;
+	struct octeon_device *oct_dev = lio->oct_dev;
 
 	/* Alloc soft command */
 	sc = (struct octeon_soft_command *)
@@ -714,6 +1038,8 @@ static int octnet_set_intrmod_cfg(void *oct, struct oct_intrmod_cfg *intr_cfg)
 	cmd->cfg = cfg;
 	cmd->oct_dev = oct_dev;
 
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
 	octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
 				    OPCODE_NIC_INTRMOD_CFG, 0, 0, 0);
 
@@ -722,17 +1048,171 @@ static int octnet_set_intrmod_cfg(void *oct, struct oct_intrmod_cfg *intr_cfg)
 	sc->wait_time = 1000;
 
 	retval = octeon_send_soft_command(oct_dev, sc);
-	if (retval) {
+	if (retval == IQ_SEND_FAILED) {
+		octeon_free_soft_command(oct_dev, sc);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+octnet_nic_stats_callback(struct octeon_device *oct_dev,
+			  u32 status, void *ptr)
+{
+	struct octeon_soft_command  *sc = (struct octeon_soft_command  *)ptr;
+	struct oct_nic_stats_resp *resp = (struct oct_nic_stats_resp *)
+		sc->virtrptr;
+	struct oct_nic_stats_ctrl *ctrl = (struct oct_nic_stats_ctrl *)
+		sc->ctxptr;
+	struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
+	struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
+
+	struct nic_rx_stats *rstats = &oct_dev->link_stats.fromwire;
+	struct nic_tx_stats *tstats = &oct_dev->link_stats.fromhost;
+
+	if ((status != OCTEON_REQUEST_TIMEOUT) && !resp->status) {
+		octeon_swap_8B_data((u64 *)&resp->stats,
+				    (sizeof(struct oct_link_stats)) >> 3);
+
+		/* RX link-level stats */
+		rstats->total_rcvd = rsp_rstats->total_rcvd;
+		rstats->bytes_rcvd = rsp_rstats->bytes_rcvd;
+		rstats->total_bcst = rsp_rstats->total_bcst;
+		rstats->total_mcst = rsp_rstats->total_mcst;
+		rstats->runts      = rsp_rstats->runts;
+		rstats->ctl_rcvd   = rsp_rstats->ctl_rcvd;
+		/* Accounts for over/under-run of buffers */
+		rstats->fifo_err  = rsp_rstats->fifo_err;
+		rstats->dmac_drop = rsp_rstats->dmac_drop;
+		rstats->fcs_err   = rsp_rstats->fcs_err;
+		rstats->jabber_err = rsp_rstats->jabber_err;
+		rstats->l2_err    = rsp_rstats->l2_err;
+		rstats->frame_err = rsp_rstats->frame_err;
+
+		/* RX firmware stats */
+		rstats->fw_total_rcvd = rsp_rstats->fw_total_rcvd;
+		rstats->fw_total_fwd = rsp_rstats->fw_total_fwd;
+		rstats->fw_err_pko = rsp_rstats->fw_err_pko;
+		rstats->fw_err_link = rsp_rstats->fw_err_link;
+		rstats->fw_err_drop = rsp_rstats->fw_err_drop;
+		rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan;
+		rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err;
+
+		/* Number of packets that are LROed      */
+		rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts;
+		/* Number of octets that are LROed       */
+		rstats->fw_lro_octs = rsp_rstats->fw_lro_octs;
+		/* Number of LRO packets formed          */
+		rstats->fw_total_lro = rsp_rstats->fw_total_lro;
+		/* Number of times lRO of packet aborted */
+		rstats->fw_lro_aborts = rsp_rstats->fw_lro_aborts;
+		rstats->fw_lro_aborts_port = rsp_rstats->fw_lro_aborts_port;
+		rstats->fw_lro_aborts_seq = rsp_rstats->fw_lro_aborts_seq;
+		rstats->fw_lro_aborts_tsval = rsp_rstats->fw_lro_aborts_tsval;
+		rstats->fw_lro_aborts_timer = rsp_rstats->fw_lro_aborts_timer;
+		/* intrmod: packet forward rate */
+		rstats->fwd_rate = rsp_rstats->fwd_rate;
+
+		/* TX link-level stats */
+		tstats->total_pkts_sent = rsp_tstats->total_pkts_sent;
+		tstats->total_bytes_sent = rsp_tstats->total_bytes_sent;
+		tstats->mcast_pkts_sent = rsp_tstats->mcast_pkts_sent;
+		tstats->bcast_pkts_sent = rsp_tstats->bcast_pkts_sent;
+		tstats->ctl_sent = rsp_tstats->ctl_sent;
+		/* Packets sent after one collision*/
+		tstats->one_collision_sent = rsp_tstats->one_collision_sent;
+		/* Packets sent after multiple collision*/
+		tstats->multi_collision_sent = rsp_tstats->multi_collision_sent;
+		/* Packets not sent due to max collisions */
+		tstats->max_collision_fail = rsp_tstats->max_collision_fail;
+		/* Packets not sent due to max deferrals */
+		tstats->max_deferral_fail = rsp_tstats->max_deferral_fail;
+		/* Accounts for over/under-run of buffers */
+		tstats->fifo_err = rsp_tstats->fifo_err;
+		tstats->runts = rsp_tstats->runts;
+		/* Total number of collisions detected */
+		tstats->total_collisions = rsp_tstats->total_collisions;
+
+		/* firmware stats */
+		tstats->fw_total_sent = rsp_tstats->fw_total_sent;
+		tstats->fw_total_fwd = rsp_tstats->fw_total_fwd;
+		tstats->fw_err_pko = rsp_tstats->fw_err_pko;
+		tstats->fw_err_link = rsp_tstats->fw_err_link;
+		tstats->fw_err_drop = rsp_tstats->fw_err_drop;
+		tstats->fw_tso = rsp_tstats->fw_tso;
+		tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd;
+		tstats->fw_err_tso = rsp_tstats->fw_err_tso;
+		tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan;
+
+		resp->status = 1;
+	} else {
+		resp->status = -1;
+	}
+	complete(&ctrl->complete);
+}
+
+/*  Configure interrupt moderation parameters */
+static int octnet_get_link_stats(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct_dev = lio->oct_dev;
+
+	struct octeon_soft_command *sc;
+	struct oct_nic_stats_ctrl *ctrl;
+	struct oct_nic_stats_resp *resp;
+
+	int retval;
+
+	/* Alloc soft command */
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct_dev,
+					  0,
+					  sizeof(struct oct_nic_stats_resp),
+					  sizeof(struct octnic_ctrl_pkt));
+
+	if (!sc)
+		return -ENOMEM;
+
+	resp = (struct oct_nic_stats_resp *)sc->virtrptr;
+	memset(resp, 0, sizeof(struct oct_nic_stats_resp));
+
+	ctrl = (struct oct_nic_stats_ctrl *)sc->ctxptr;
+	memset(ctrl, 0, sizeof(struct oct_nic_stats_ctrl));
+	ctrl->netdev = netdev;
+	init_completion(&ctrl->complete);
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
+				    OPCODE_NIC_PORT_STATS, 0, 0, 0);
+
+	sc->callback = octnet_nic_stats_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = 500;	/*in milli seconds*/
+
+	retval = octeon_send_soft_command(oct_dev, sc);
+	if (retval == IQ_SEND_FAILED) {
+		octeon_free_soft_command(oct_dev, sc);
+		return -EINVAL;
+	}
+
+	wait_for_completion_timeout(&ctrl->complete, msecs_to_jiffies(1000));
+
+	if (resp->status != 1) {
 		octeon_free_soft_command(oct_dev, sc);
+
 		return -EINVAL;
 	}
 
+	octeon_free_soft_command(oct_dev, sc);
+
 	return 0;
 }
 
 /* Enable/Disable auto interrupt Moderation */
 static int oct_cfg_adaptive_intr(struct lio *lio, struct ethtool_coalesce
-				 *intr_coal, int adaptive)
+				 *intr_coal)
 {
 	int ret = 0;
 	struct octeon_device *oct = lio->oct_dev;
@@ -740,59 +1220,73 @@ static int oct_cfg_adaptive_intr(struct lio *lio, struct ethtool_coalesce
 
 	intrmod_cfg = &oct->intrmod;
 
-	if (adaptive) {
+	if (oct->intrmod.rx_enable || oct->intrmod.tx_enable) {
 		if (intr_coal->rate_sample_interval)
-			intrmod_cfg->intrmod_check_intrvl =
+			intrmod_cfg->check_intrvl =
 				intr_coal->rate_sample_interval;
 		else
-			intrmod_cfg->intrmod_check_intrvl =
+			intrmod_cfg->check_intrvl =
 				LIO_INTRMOD_CHECK_INTERVAL;
 
 		if (intr_coal->pkt_rate_high)
-			intrmod_cfg->intrmod_maxpkt_ratethr =
+			intrmod_cfg->maxpkt_ratethr =
 				intr_coal->pkt_rate_high;
 		else
-			intrmod_cfg->intrmod_maxpkt_ratethr =
+			intrmod_cfg->maxpkt_ratethr =
 				LIO_INTRMOD_MAXPKT_RATETHR;
 
 		if (intr_coal->pkt_rate_low)
-			intrmod_cfg->intrmod_minpkt_ratethr =
+			intrmod_cfg->minpkt_ratethr =
 				intr_coal->pkt_rate_low;
 		else
-			intrmod_cfg->intrmod_minpkt_ratethr =
+			intrmod_cfg->minpkt_ratethr =
 				LIO_INTRMOD_MINPKT_RATETHR;
-
+	}
+	if (oct->intrmod.rx_enable) {
 		if (intr_coal->rx_max_coalesced_frames_high)
-			intrmod_cfg->intrmod_maxcnt_trigger =
+			intrmod_cfg->rx_maxcnt_trigger =
 				intr_coal->rx_max_coalesced_frames_high;
 		else
-			intrmod_cfg->intrmod_maxcnt_trigger =
-				LIO_INTRMOD_MAXCNT_TRIGGER;
+			intrmod_cfg->rx_maxcnt_trigger =
+				LIO_INTRMOD_RXMAXCNT_TRIGGER;
 
 		if (intr_coal->rx_coalesce_usecs_high)
-			intrmod_cfg->intrmod_maxtmr_trigger =
+			intrmod_cfg->rx_maxtmr_trigger =
 				intr_coal->rx_coalesce_usecs_high;
 		else
-			intrmod_cfg->intrmod_maxtmr_trigger =
-				LIO_INTRMOD_MAXTMR_TRIGGER;
+			intrmod_cfg->rx_maxtmr_trigger =
+				LIO_INTRMOD_RXMAXTMR_TRIGGER;
 
 		if (intr_coal->rx_coalesce_usecs_low)
-			intrmod_cfg->intrmod_mintmr_trigger =
+			intrmod_cfg->rx_mintmr_trigger =
 				intr_coal->rx_coalesce_usecs_low;
 		else
-			intrmod_cfg->intrmod_mintmr_trigger =
-				LIO_INTRMOD_MINTMR_TRIGGER;
+			intrmod_cfg->rx_mintmr_trigger =
+				LIO_INTRMOD_RXMINTMR_TRIGGER;
 
 		if (intr_coal->rx_max_coalesced_frames_low)
-			intrmod_cfg->intrmod_mincnt_trigger =
+			intrmod_cfg->rx_mincnt_trigger =
 				intr_coal->rx_max_coalesced_frames_low;
 		else
-			intrmod_cfg->intrmod_mincnt_trigger =
-				LIO_INTRMOD_MINCNT_TRIGGER;
+			intrmod_cfg->rx_mincnt_trigger =
+				LIO_INTRMOD_RXMINCNT_TRIGGER;
+	}
+	if (oct->intrmod.tx_enable) {
+		if (intr_coal->tx_max_coalesced_frames_high)
+			intrmod_cfg->tx_maxcnt_trigger =
+				intr_coal->tx_max_coalesced_frames_high;
+		else
+			intrmod_cfg->tx_maxcnt_trigger =
+				LIO_INTRMOD_TXMAXCNT_TRIGGER;
+		if (intr_coal->tx_max_coalesced_frames_low)
+			intrmod_cfg->tx_mincnt_trigger =
+				intr_coal->tx_max_coalesced_frames_low;
+		else
+			intrmod_cfg->tx_mincnt_trigger =
+				LIO_INTRMOD_TXMINCNT_TRIGGER;
 	}
 
-	intrmod_cfg->intrmod_enable = adaptive;
-	ret = octnet_set_intrmod_cfg(oct, intrmod_cfg);
+	ret = octnet_set_intrmod_cfg(lio, intrmod_cfg);
 
 	return ret;
 }
@@ -800,54 +1294,82 @@ static int oct_cfg_adaptive_intr(struct lio *lio, struct ethtool_coalesce
 static int
 oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
 {
-	int ret;
 	struct octeon_device *oct = lio->oct_dev;
-	struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip;
 	u32 rx_max_coalesced_frames;
 
-	if (!intr_coal->rx_max_coalesced_frames)
-		rx_max_coalesced_frames = CN6XXX_OQ_INTR_PKT;
-	else
-		rx_max_coalesced_frames = intr_coal->rx_max_coalesced_frames;
-
-	/* Disable adaptive interrupt modulation */
-	ret = oct_cfg_adaptive_intr(lio, intr_coal, 0);
-	if (ret)
-		return ret;
-
 	/* Config Cnt based interrupt values */
-	octeon_write_csr(oct, CN6XXX_SLI_OQ_INT_LEVEL_PKTS,
-			 rx_max_coalesced_frames);
-	CFG_SET_OQ_INTR_PKT(cn6xxx->conf, rx_max_coalesced_frames);
+	switch (oct->chip_id) {
+	case OCTEON_CN68XX:
+	case OCTEON_CN66XX: {
+		struct octeon_cn6xxx *cn6xxx =
+			(struct octeon_cn6xxx *)oct->chip;
+
+		if (!intr_coal->rx_max_coalesced_frames)
+			rx_max_coalesced_frames = CN6XXX_OQ_INTR_PKT;
+		else
+			rx_max_coalesced_frames =
+				intr_coal->rx_max_coalesced_frames;
+		octeon_write_csr(oct, CN6XXX_SLI_OQ_INT_LEVEL_PKTS,
+				 rx_max_coalesced_frames);
+		CFG_SET_OQ_INTR_PKT(cn6xxx->conf, rx_max_coalesced_frames);
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
 	return 0;
 }
 
 static int oct_cfg_rx_intrtime(struct lio *lio, struct ethtool_coalesce
 			       *intr_coal)
 {
-	int ret;
 	struct octeon_device *oct = lio->oct_dev;
-	struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip;
 	u32 time_threshold, rx_coalesce_usecs;
 
-	if (!intr_coal->rx_coalesce_usecs)
-		rx_coalesce_usecs = CN6XXX_OQ_INTR_TIME;
-	else
-		rx_coalesce_usecs = intr_coal->rx_coalesce_usecs;
+	/* Config Time based interrupt values */
+	switch (oct->chip_id) {
+	case OCTEON_CN68XX:
+	case OCTEON_CN66XX: {
+		struct octeon_cn6xxx *cn6xxx =
+			(struct octeon_cn6xxx *)oct->chip;
+		if (!intr_coal->rx_coalesce_usecs)
+			rx_coalesce_usecs = CN6XXX_OQ_INTR_TIME;
+		else
+			rx_coalesce_usecs = intr_coal->rx_coalesce_usecs;
 
-	/* Disable adaptive interrupt modulation */
-	ret = oct_cfg_adaptive_intr(lio, intr_coal, 0);
-	if (ret)
-		return ret;
+		time_threshold = lio_cn6xxx_get_oq_ticks(oct,
+							 rx_coalesce_usecs);
+		octeon_write_csr(oct,
+				 CN6XXX_SLI_OQ_INT_LEVEL_TIME,
+				 time_threshold);
 
-	/* Config Time based interrupt values */
-	time_threshold = lio_cn6xxx_get_oq_ticks(oct, rx_coalesce_usecs);
-	octeon_write_csr(oct, CN6XXX_SLI_OQ_INT_LEVEL_TIME, time_threshold);
-	CFG_SET_OQ_INTR_TIME(cn6xxx->conf, rx_coalesce_usecs);
+		CFG_SET_OQ_INTR_TIME(cn6xxx->conf, rx_coalesce_usecs);
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
 
 	return 0;
 }
 
+static int
+oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal
+		   __attribute__((unused)))
+{
+	struct octeon_device *oct = lio->oct_dev;
+
+	/* Config Cnt based interrupt values */
+	switch (oct->chip_id) {
+	case OCTEON_CN68XX:
+	case OCTEON_CN66XX:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int lio_set_intr_coalesce(struct net_device *netdev,
 				 struct ethtool_coalesce *intr_coal)
 {
@@ -855,59 +1377,48 @@ static int lio_set_intr_coalesce(struct net_device *netdev,
 	int ret;
 	struct octeon_device *oct = lio->oct_dev;
 	u32 j, q_no;
+	int db_max, db_min;
 
-	if ((intr_coal->tx_max_coalesced_frames >= CN6XXX_DB_MIN) &&
-	    (intr_coal->tx_max_coalesced_frames <= CN6XXX_DB_MAX)) {
-		for (j = 0; j < lio->linfo.num_txpciq; j++) {
-			q_no = lio->linfo.txpciq[j];
-			oct->instr_queue[q_no]->fill_threshold =
-				intr_coal->tx_max_coalesced_frames;
+	switch (oct->chip_id) {
+	case OCTEON_CN68XX:
+	case OCTEON_CN66XX:
+		db_min = CN6XXX_DB_MIN;
+		db_max = CN6XXX_DB_MAX;
+		if ((intr_coal->tx_max_coalesced_frames >= db_min) &&
+		    (intr_coal->tx_max_coalesced_frames <= db_max)) {
+			for (j = 0; j < lio->linfo.num_txpciq; j++) {
+				q_no = lio->linfo.txpciq[j].s.q_no;
+				oct->instr_queue[q_no]->fill_threshold =
+					intr_coal->tx_max_coalesced_frames;
+			}
+		} else {
+			dev_err(&oct->pci_dev->dev,
+				"LIQUIDIO: Invalid tx-frames:%d. Range is min:%d max:%d\n",
+				intr_coal->tx_max_coalesced_frames, db_min,
+				db_max);
+			return -EINVAL;
 		}
-	} else {
-		dev_err(&oct->pci_dev->dev,
-			"LIQUIDIO: Invalid tx-frames:%d. Range is min:%d max:%d\n",
-			intr_coal->tx_max_coalesced_frames, CN6XXX_DB_MIN,
-			CN6XXX_DB_MAX);
+		break;
+	default:
 		return -EINVAL;
 	}
 
-	/* User requested adaptive-rx on */
-	if (intr_coal->use_adaptive_rx_coalesce) {
-		ret = oct_cfg_adaptive_intr(lio, intr_coal, 1);
-		if (ret)
-			goto ret_intrmod;
-	}
+	oct->intrmod.rx_enable = intr_coal->use_adaptive_rx_coalesce ? 1 : 0;
+	oct->intrmod.tx_enable = intr_coal->use_adaptive_tx_coalesce ? 1 : 0;
 
-	/* User requested adaptive-rx off and rx coalesce */
-	if ((intr_coal->rx_coalesce_usecs) &&
-	    (!intr_coal->use_adaptive_rx_coalesce)) {
+	ret = oct_cfg_adaptive_intr(lio, intr_coal);
+
+	if (!intr_coal->use_adaptive_rx_coalesce) {
 		ret = oct_cfg_rx_intrtime(lio, intr_coal);
 		if (ret)
 			goto ret_intrmod;
-	}
 
-	/* User requested adaptive-rx off and rx coalesce */
-	if ((intr_coal->rx_max_coalesced_frames) &&
-	    (!intr_coal->use_adaptive_rx_coalesce)) {
 		ret = oct_cfg_rx_intrcnt(lio, intr_coal);
 		if (ret)
 			goto ret_intrmod;
 	}
-
-	/* User requested adaptive-rx off, so use default coalesce params */
-	if ((!intr_coal->rx_max_coalesced_frames) &&
-	    (!intr_coal->use_adaptive_rx_coalesce) &&
-	    (!intr_coal->rx_coalesce_usecs)) {
-		dev_info(&oct->pci_dev->dev,
-			 "Turning off adaptive-rx interrupt moderation\n");
-		dev_info(&oct->pci_dev->dev,
-			 "Using RX Coalesce Default values rx_coalesce_usecs:%d rx_max_coalesced_frames:%d\n",
-			 CN6XXX_OQ_INTR_TIME, CN6XXX_OQ_INTR_PKT);
-		ret = oct_cfg_rx_intrtime(lio, intr_coal);
-		if (ret)
-			goto ret_intrmod;
-
-		ret = oct_cfg_rx_intrcnt(lio, intr_coal);
+	if (!intr_coal->use_adaptive_tx_coalesce) {
+		ret = oct_cfg_tx_intrcnt(lio, intr_coal);
 		if (ret)
 			goto ret_intrmod;
 	}
@@ -923,23 +1434,28 @@ static int lio_get_ts_info(struct net_device *netdev,
 	struct lio *lio = GET_LIO(netdev);
 
 	info->so_timestamping =
+#ifdef PTP_HARDWARE_TIMESTAMPING
 		SOF_TIMESTAMPING_TX_HARDWARE |
-		SOF_TIMESTAMPING_TX_SOFTWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE |
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+#endif
 		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE;
+		SOF_TIMESTAMPING_SOFTWARE;
 
 	if (lio->ptp_clock)
 		info->phc_index = ptp_clock_index(lio->ptp_clock);
 	else
 		info->phc_index = -1;
 
+#ifdef PTP_HARDWARE_TIMESTAMPING
 	info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
 
 	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
 			   (1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
 			   (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
 			   (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
+#endif
 
 	return 0;
 }
@@ -950,7 +1466,6 @@ static int lio_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 	struct octeon_device *oct = lio->oct_dev;
 	struct oct_link_info *linfo;
 	struct octnic_ctrl_pkt nctrl;
-	struct octnic_ctrl_params nparams;
 	int ret = 0;
 
 	/* get the link info */
@@ -965,12 +1480,14 @@ static int lio_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 						  ecmd->duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
-	/* Ethtool Support is not provided for XAUI and RXAUI Interfaces
+	/* Ethtool Support is not provided for XAUI, RXAUI, and XFI Interfaces
 	 * as they operate at fixed Speed and Duplex settings
 	 */
-	if (linfo->link.s.interface == INTERFACE_MODE_XAUI ||
-	    linfo->link.s.interface == INTERFACE_MODE_RXAUI) {
-		dev_info(&oct->pci_dev->dev, "XAUI IFs settings cannot be modified.\n");
+	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
+		dev_info(&oct->pci_dev->dev,
+			 "Autonegotiation, duplex and speed settings cannot be modified.\n");
 		return -EINVAL;
 	}
 
@@ -978,9 +1495,9 @@ static int lio_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 
 	nctrl.ncmd.u64 = 0;
 	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_SETTINGS;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.wait_time = 1000;
 	nctrl.netpndev = (u64)netdev;
-	nctrl.ncmd.s.param1 = lio->linfo.ifidx;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	/* Passing the parameters sent by ethtool like Speed, Autoneg & Duplex
@@ -990,19 +1507,17 @@ static int lio_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 		/* Autoneg ON */
 		nctrl.ncmd.s.more = OCTNIC_NCMD_PHY_ON |
 				     OCTNIC_NCMD_AUTONEG_ON;
-		nctrl.ncmd.s.param2 = ecmd->advertising;
+		nctrl.ncmd.s.param1 = ecmd->advertising;
 	} else {
 		/* Autoneg OFF */
 		nctrl.ncmd.s.more = OCTNIC_NCMD_PHY_ON;
 
-		nctrl.ncmd.s.param3 = ecmd->duplex;
+		nctrl.ncmd.s.param2 = ecmd->duplex;
 
-		nctrl.ncmd.s.param2 = ecmd->speed;
+		nctrl.ncmd.s.param1 = ecmd->speed;
 	}
 
-	nparams.resp_order = OCTEON_RESP_ORDERED;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
 	if (ret < 0) {
 		dev_err(&oct->pci_dev->dev, "Failed to set settings\n");
 		return -1;
@@ -1026,7 +1541,7 @@ static int lio_nway_reset(struct net_device *netdev)
 }
 
 /* Return register dump len. */
-static int lio_get_regs_len(struct net_device *dev)
+static int lio_get_regs_len(struct net_device *dev __attribute__((unused)))
 {
 	return OCT_ETHTOOL_REGDUMP_LEN;
 }
@@ -1170,13 +1685,12 @@ static void lio_get_regs(struct net_device *dev,
 	int len = 0;
 	struct octeon_device *oct = lio->oct_dev;
 
-	memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN);
 	regs->version = OCT_ETHTOOL_REGSVER;
 
 	switch (oct->chip_id) {
-	/* case OCTEON_CN73XX: Todo */
 	case OCTEON_CN68XX:
 	case OCTEON_CN66XX:
+		memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN);
 		len += cn6xxx_read_csr_reg(regbuf + len, oct);
 		len += cn6xxx_read_config_reg(regbuf + len, oct);
 		break;
@@ -1186,6 +1700,23 @@ static void lio_get_regs(struct net_device *dev,
 	}
 }
 
+static u32 lio_get_priv_flags(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	return lio->oct_dev->priv_flags;
+}
+
+static int lio_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct lio *lio = GET_LIO(netdev);
+	bool intr_by_tx_bytes = !!(flags & (0x1 << OCT_PRIV_FLAG_TX_BYTES));
+
+	lio_set_priv_flag(lio->oct_dev, OCT_PRIV_FLAG_TX_BYTES,
+			  intr_by_tx_bytes);
+	return 0;
+}
+
 static const struct ethtool_ops lio_ethtool_ops = {
 	.get_settings		= lio_get_settings,
 	.get_link		= ethtool_op_get_link,
@@ -1207,6 +1738,8 @@ static const struct ethtool_ops lio_ethtool_ops = {
 	.set_settings		= lio_set_settings,
 	.get_coalesce		= lio_get_intr_coalesce,
 	.set_coalesce		= lio_set_intr_coalesce,
+	.get_priv_flags		= lio_get_priv_flags,
+	.set_priv_flags		= lio_set_priv_flags,
 	.get_ts_info		= lio_get_ts_info,
 };
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8de79ae63231..20d6942edf40 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -20,24 +20,12 @@
 * Contact Cavium, Inc. for more information
 **********************************************************************/
 #include <linux/version.h>
-#include <linux/module.h>
-#include <linux/crc32.h>
-#include <linux/dma-mapping.h>
 #include <linux/pci.h>
-#include <linux/pci_ids.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/ipv6.h>
 #include <linux/net_tstamp.h>
 #include <linux/if_vlan.h>
 #include <linux/firmware.h>
-#include <linux/ethtool.h>
 #include <linux/ptp_clock_kernel.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-#include "octeon_config.h"
+#include <net/vxlan.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -48,7 +36,6 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
 #include "cn68xx_device.h"
 #include "liquidio_image.h"
 
@@ -72,6 +59,9 @@ MODULE_PARM_DESC(console_bitmask,
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
+#define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
+	(octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
+
 static int debug = -1;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
@@ -84,6 +74,8 @@ static int conf_type;
 module_param(conf_type, int, 0);
 MODULE_PARM_DESC(conf_type, "select octeon configuration 0 default 1 ovs");
 
+static int ptp_enable = 1;
+
 /* Bit mask values for lio->ifstate */
 #define   LIO_IFSTATE_DROQ_OPS             0x01
 #define   LIO_IFSTATE_REGISTERED           0x02
@@ -166,6 +158,8 @@ struct octnic_gather {
 	 *  received from the IP layer.
 	 */
 	struct octeon_sg_entry *sg;
+
+	u64 sg_dma_ptr;
 };
 
 /** This structure is used by NIC driver to store information required
@@ -220,8 +214,8 @@ static void octeon_droq_bh(unsigned long pdev)
 		(struct octeon_device_priv *)oct->priv;
 
 	/* for (q_no = 0; q_no < oct->num_oqs; q_no++) { */
-	for (q_no = 0; q_no < MAX_OCTEON_OUTPUT_QUEUES; q_no++) {
-		if (!(oct->io_qmask.oq & (1UL << q_no)))
+	for (q_no = 0; q_no < MAX_OCTEON_OUTPUT_QUEUES(oct); q_no++) {
+		if (!(oct->io_qmask.oq & (1ULL << q_no)))
 			continue;
 		reschedule |= octeon_droq_process_packets(oct, oct->droq[q_no],
 							  MAX_PACKET_BUDGET);
@@ -241,11 +235,10 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 	do {
 		pending_pkts = 0;
 
-		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
-			if (!(oct->io_qmask.oq & (1UL << i)))
+		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+			if (!(oct->io_qmask.oq & (1ULL << i)))
 				continue;
-			pkt_cnt += octeon_droq_check_hw_for_pkts(oct,
-								 oct->droq[i]);
+			pkt_cnt += octeon_droq_check_hw_for_pkts(oct->droq[i]);
 		}
 		if (pkt_cnt > 0) {
 			pending_pkts += pkt_cnt;
@@ -361,7 +354,7 @@ static int wait_for_pending_requests(struct octeon_device *oct)
 				[OCTEON_ORDERED_SC_LIST].pending_req_count);
 		if (pcount)
 			schedule_timeout_uninterruptible(HZ / 10);
-		 else
+		else
 			break;
 	}
 
@@ -392,10 +385,10 @@ static inline void pcierror_quiesce_device(struct octeon_device *oct)
 		dev_err(&oct->pci_dev->dev, "There were pending requests\n");
 
 	/* Force all requests waiting to be fetched by OCTEON to complete. */
-	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
+	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
 		struct octeon_instr_queue *iq;
 
-		if (!(oct->io_qmask.iq & (1UL << i)))
+		if (!(oct->io_qmask.iq & (1ULL << i)))
 			continue;
 		iq = oct->instr_queue[i];
 
@@ -405,7 +398,7 @@ static inline void pcierror_quiesce_device(struct octeon_device *oct)
 			iq->octeon_read_index = iq->host_write_index;
 			iq->stats.instr_processed +=
 				atomic_read(&iq->instr_pending);
-			lio_process_iq_request_list(oct, iq);
+			lio_process_iq_request_list(oct, iq, 0);
 			spin_unlock_bh(&iq->lock);
 		}
 	}
@@ -500,7 +493,8 @@ static pci_ers_result_t liquidio_pcie_error_detected(struct pci_dev *pdev,
  * \brief mmio handler
  * @param pdev Pointer to PCI device
  */
-static pci_ers_result_t liquidio_pcie_mmio_enabled(struct pci_dev *pdev)
+static pci_ers_result_t liquidio_pcie_mmio_enabled(
+				struct pci_dev *pdev __attribute__((unused)))
 {
 	/* We should never hit this since we never ask for a reset for a Fatal
 	 * Error. We always return DISCONNECT in io_error above.
@@ -516,7 +510,8 @@ static pci_ers_result_t liquidio_pcie_mmio_enabled(struct pci_dev *pdev)
  * Restart the card from scratch, as if from a cold-boot. Implementation
  * resembles the first-half of the octeon_resume routine.
  */
-static pci_ers_result_t liquidio_pcie_slot_reset(struct pci_dev *pdev)
+static pci_ers_result_t liquidio_pcie_slot_reset(
+				struct pci_dev *pdev __attribute__((unused)))
 {
 	/* We should never hit this since we never ask for a reset for a Fatal
 	 * Error. We always return DISCONNECT in io_error above.
@@ -533,7 +528,7 @@ static pci_ers_result_t liquidio_pcie_slot_reset(struct pci_dev *pdev)
  * its OK to resume normal operation. Implementation resembles the
  * second-half of the octeon_resume routine.
  */
-static void liquidio_pcie_resume(struct pci_dev *pdev)
+static void liquidio_pcie_resume(struct pci_dev *pdev __attribute__((unused)))
 {
 	/* Nothing to be done here. */
 }
@@ -544,7 +539,8 @@ static void liquidio_pcie_resume(struct pci_dev *pdev)
  * @param pdev Pointer to PCI device
  * @param state state to suspend to
  */
-static int liquidio_suspend(struct pci_dev *pdev, pm_message_t state)
+static int liquidio_suspend(struct pci_dev *pdev __attribute__((unused)),
+			    pm_message_t state __attribute__((unused)))
 {
 	return 0;
 }
@@ -553,7 +549,7 @@ static int liquidio_suspend(struct pci_dev *pdev, pm_message_t state)
  * \brief called when resuming
  * @param pdev Pointer to PCI device
  */
-static int liquidio_resume(struct pci_dev *pdev)
+static int liquidio_resume(struct pci_dev *pdev __attribute__((unused)))
 {
 	return 0;
 }
@@ -678,12 +674,24 @@ static inline void txqs_start(struct net_device *netdev)
  */
 static inline void txqs_wake(struct net_device *netdev)
 {
+	struct lio *lio = GET_LIO(netdev);
+
 	if (netif_is_multiqueue(netdev)) {
 		int i;
 
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_wake_subqueue(netdev, i);
+		for (i = 0; i < netdev->num_tx_queues; i++) {
+			int qno = lio->linfo.txpciq[i %
+				(lio->linfo.num_txpciq)].s.q_no;
+
+			if (__netif_subqueue_stopped(netdev, i)) {
+				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
+							  tx_restart, 1);
+				netif_wake_subqueue(netdev, i);
+			}
+		}
 	} else {
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+					  tx_restart, 1);
 		netif_wake_queue(netdev);
 	}
 }
@@ -705,7 +713,7 @@ static void start_txq(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 
-	if (lio->linfo.link.s.status) {
+	if (lio->linfo.link.s.link_up) {
 		txqs_start(netdev);
 		return;
 	}
@@ -752,16 +760,23 @@ static inline int check_txq_status(struct lio *lio)
 
 		/* check each sub-queue state */
 		for (q = 0; q < numqs; q++) {
-			iq = lio->linfo.txpciq[q & (lio->linfo.num_txpciq - 1)];
+			iq = lio->linfo.txpciq[q %
+				(lio->linfo.num_txpciq)].s.q_no;
 			if (octnet_iq_is_full(lio->oct_dev, iq))
 				continue;
-			wake_q(lio->netdev, q);
-			ret_val++;
+			if (__netif_subqueue_stopped(lio->netdev, q)) {
+				wake_q(lio->netdev, q);
+				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq,
+							  tx_restart, 1);
+				ret_val++;
+			}
 		}
 	} else {
 		if (octnet_iq_is_full(lio->oct_dev, lio->txq))
 			return 0;
 		wake_q(lio->netdev, lio->txq);
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+					  tx_restart, 1);
 		ret_val = 1;
 	}
 	return ret_val;
@@ -787,64 +802,116 @@ static inline struct list_head *list_delete_head(struct list_head *root)
 }
 
 /**
- * \brief Delete gather list
+ * \brief Delete gather lists
  * @param lio per-network private data
  */
-static void delete_glist(struct lio *lio)
+static void delete_glists(struct lio *lio)
 {
 	struct octnic_gather *g;
+	int i;
 
-	do {
-		g = (struct octnic_gather *)
-		    list_delete_head(&lio->glist);
-		if (g) {
-			if (g->sg)
-				kfree((void *)((unsigned long)g->sg -
-						g->adjust));
-			kfree(g);
-		}
-	} while (g);
+	if (!lio->glist)
+		return;
+
+	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+		do {
+			g = (struct octnic_gather *)
+				list_delete_head(&lio->glist[i]);
+			if (g) {
+				if (g->sg) {
+					dma_unmap_single(&lio->oct_dev->
+							 pci_dev->dev,
+							 g->sg_dma_ptr,
+							 g->sg_size,
+							 DMA_TO_DEVICE);
+					kfree((void *)((unsigned long)g->sg -
+						       g->adjust));
+				}
+				kfree(g);
+			}
+		} while (g);
+	}
+
+	kfree((void *)lio->glist);
 }
 
 /**
- * \brief Setup gather list
+ * \brief Setup gather lists
  * @param lio per-network private data
  */
-static int setup_glist(struct lio *lio)
+static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 {
-	int i;
+	int i, j;
 	struct octnic_gather *g;
 
-	INIT_LIST_HEAD(&lio->glist);
+	lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
+				  GFP_KERNEL);
+	if (!lio->glist_lock)
+		return 1;
 
-	for (i = 0; i < lio->tx_qsize; i++) {
-		g = kzalloc(sizeof(*g), GFP_KERNEL);
-		if (!g)
-			break;
+	lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
+			     GFP_KERNEL);
+	if (!lio->glist) {
+		kfree((void *)lio->glist_lock);
+		return 1;
+	}
 
-		g->sg_size =
-			((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+	for (i = 0; i < num_iqs; i++) {
+		int numa_node = cpu_to_node(i % num_online_cpus());
 
-		g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-		if (!g->sg) {
-			kfree(g);
-			break;
+		spin_lock_init(&lio->glist_lock[i]);
+
+		INIT_LIST_HEAD(&lio->glist[i]);
+
+		for (j = 0; j < lio->tx_qsize; j++) {
+			g = kzalloc_node(sizeof(*g), GFP_KERNEL,
+					 numa_node);
+			if (!g)
+				g = kzalloc(sizeof(*g), GFP_KERNEL);
+			if (!g)
+				break;
+
+			g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
+				      OCT_SG_ENTRY_SIZE);
+
+			g->sg = kmalloc_node(g->sg_size + 8,
+					     GFP_KERNEL, numa_node);
+			if (!g->sg)
+				g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
+			if (!g->sg) {
+				kfree(g);
+				break;
+			}
+
+			/* The gather component should be aligned on 64-bit
+			 * boundary
+			 */
+			if (((unsigned long)g->sg) & 7) {
+				g->adjust = 8 - (((unsigned long)g->sg) & 7);
+				g->sg = (struct octeon_sg_entry *)
+					((unsigned long)g->sg + g->adjust);
+			}
+			g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
+						       g->sg, g->sg_size,
+						       DMA_TO_DEVICE);
+			if (dma_mapping_error(&oct->pci_dev->dev,
+					      g->sg_dma_ptr)) {
+				kfree((void *)((unsigned long)g->sg -
+					       g->adjust));
+				kfree(g);
+				break;
+			}
+
+			list_add_tail(&g->list, &lio->glist[i]);
 		}
 
-		/* The gather component should be aligned on 64-bit boundary */
-		if (((unsigned long)g->sg) & 7) {
-			g->adjust = 8 - (((unsigned long)g->sg) & 7);
-			g->sg = (struct octeon_sg_entry *)
-				((unsigned long)g->sg + g->adjust);
+		if (j != lio->tx_qsize) {
+			delete_glists(lio);
+			return 1;
 		}
-		list_add_tail(&g->list, &lio->glist);
 	}
 
-	if (i == lio->tx_qsize)
-		return 0;
-
-	delete_glist(lio);
-	return 1;
+	return 0;
 }
 
 /**
@@ -858,7 +925,7 @@ static void print_link_info(struct net_device *netdev)
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) {
 		struct oct_link_info *linfo = &lio->linfo;
 
-		if (linfo->link.s.status) {
+		if (linfo->link.s.link_up) {
 			netif_info(lio, link, lio->netdev, "%d Mbps %s Duplex UP\n",
 				   linfo->link.s.speed,
 				   (linfo->link.s.duplex) ? "Full" : "Half");
@@ -880,13 +947,15 @@ static inline void update_link_status(struct net_device *netdev,
 				      union oct_link_status *ls)
 {
 	struct lio *lio = GET_LIO(netdev);
+	int changed = (lio->linfo.link.u64 != ls->u64);
 
-	if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
-		lio->linfo.link.u64 = ls->u64;
+	lio->linfo.link.u64 = ls->u64;
 
+	if ((lio->intf_open) && (changed)) {
 		print_link_info(netdev);
+		lio->link_changes++;
 
-		if (lio->linfo.link.s.status) {
+		if (lio->linfo.link.s.link_up) {
 			netif_carrier_on(netdev);
 			/* start_txq(netdev); */
 			txqs_wake(netdev);
@@ -897,6 +966,42 @@ static inline void update_link_status(struct net_device *netdev,
 	}
 }
 
+/* Runs in interrupt context. */
+static void update_txq_status(struct octeon_device *oct, int iq_num)
+{
+	struct net_device *netdev;
+	struct lio *lio;
+	struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
+
+	/*octeon_update_iq_read_idx(oct, iq);*/
+
+	netdev = oct->props[iq->ifidx].netdev;
+
+	/* This is needed because the first IQ does not have
+	 * a netdev associated with it.
+	 */
+	if (!netdev)
+		return;
+
+	lio = GET_LIO(netdev);
+	if (netif_is_multiqueue(netdev)) {
+		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+		    lio->linfo.link.s.link_up &&
+		    (!octnet_iq_is_full(oct, iq_num))) {
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
+						  tx_restart, 1);
+			netif_wake_subqueue(netdev, iq->q_index);
+		} else {
+			if (!octnet_iq_is_full(oct, lio->txq)) {
+				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev,
+							  lio->txq,
+							  tx_restart, 1);
+				wake_q(netdev, lio->txq);
+			}
+		}
+	}
+}
+
 /**
  * \brief Droq packet processor sceduler
  * @param oct octeon device
@@ -910,8 +1015,9 @@ void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
 	struct octeon_droq *droq;
 
 	if (oct->int_status & OCT_DEV_INTR_PKT_DATA) {
-		for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES; oq_no++) {
-			if (!(oct->droq_intr & (1 << oq_no)))
+		for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES(oct);
+		     oq_no++) {
+			if (!(oct->droq_intr & (1ULL << oq_no)))
 				continue;
 
 			droq = oct->droq[oq_no];
@@ -987,7 +1093,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
  * @param pdev PCI device structure
  * @param ent unused
  */
-static int liquidio_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int
+liquidio_probe(struct pci_dev *pdev,
+	       const struct pci_device_id *ent __attribute__((unused)))
 {
 	struct octeon_device *oct_dev = NULL;
 	struct handshake *hs;
@@ -1022,6 +1130,9 @@ static int liquidio_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENOMEM;
 	}
 
+	oct_dev->rx_pause = 1;
+	oct_dev->tx_pause = 1;
+
 	dev_dbg(&oct_dev->pci_dev->dev, "Device is ready\n");
 
 	return 0;
@@ -1087,19 +1198,13 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 		if (oct->flags & LIO_FLAG_MSI_ENABLED)
 			pci_disable_msi(oct->pci_dev);
 
-		/* Soft reset the octeon device before exiting */
-		oct->fn_list.soft_reset(oct);
-
-		/* Disable the device, releasing the PCI INT */
-		pci_disable_device(oct->pci_dev);
-
 		/* fallthrough */
 	case OCT_DEV_IN_RESET:
 	case OCT_DEV_DROQ_INIT_DONE:
 		/*atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);*/
 		mdelay(100);
-		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
-			if (!(oct->io_qmask.oq & (1UL << i)))
+		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+			if (!(oct->io_qmask.oq & (1ULL << i)))
 				continue;
 			octeon_delete_droq(oct, i);
 		}
@@ -1126,8 +1231,8 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 
 		/* fallthrough */
 	case OCT_DEV_INSTR_QUEUE_INIT_DONE:
-		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
-			if (!(oct->io_qmask.iq & (1UL << i)))
+		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+			if (!(oct->io_qmask.iq & (1ULL << i)))
 				continue;
 			octeon_delete_instr_queue(oct, i);
 		}
@@ -1139,14 +1244,21 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 
 		/* fallthrough */
 	case OCT_DEV_PCI_MAP_DONE:
+
+		/* Soft reset the octeon device before exiting */
+		oct->fn_list.soft_reset(oct);
+
 		octeon_unmap_pci_barx(oct, 0);
 		octeon_unmap_pci_barx(oct, 1);
 
 		/* fallthrough */
 	case OCT_DEV_BEGIN_STATE:
+		/* Disable the device, releasing the PCI INT */
+		pci_disable_device(oct->pci_dev);
+
 		/* Nothing to be done here either */
 		break;
-	}                       /* end switch(oct->status) */
+	}                       /* end switch (oct->status) */
 
 	tasklet_kill(&oct_priv->droq_tasklet);
 }
@@ -1159,18 +1271,15 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
 	struct octnic_ctrl_pkt nctrl;
-	struct octnic_ctrl_params nparams;
 
 	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
 
 	nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL;
-	nctrl.ncmd.s.param1 = lio->linfo.ifidx;
-	nctrl.ncmd.s.param2 = start_stop;
+	nctrl.ncmd.s.param1 = start_stop;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.netpndev = (u64)lio->netdev;
 
-	nparams.resp_order = OCTEON_RESP_NORESPONSE;
-
-	if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams) < 0)
+	if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0)
 		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
 }
 
@@ -1186,6 +1295,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 {
 	struct net_device *netdev = oct->props[ifidx].netdev;
 	struct lio *lio;
+	struct napi_struct *napi, *n;
 
 	if (!netdev) {
 		dev_err(&oct->pci_dev->dev, "%s No netdevice ptr for index %d\n",
@@ -1202,13 +1312,22 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
 		txqs_stop(netdev);
 
+	if (oct->props[lio->ifidx].napi_enabled == 1) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_disable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 0;
+	}
+
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
-	delete_glist(lio);
+	delete_glists(lio);
 
 	free_netdev(netdev);
 
+	oct->props[ifidx].gmxport = -1;
+
 	oct->props[ifidx].netdev = NULL;
 }
 
@@ -1227,10 +1346,15 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 		return 1;
 	}
 
+	spin_lock_bh(&oct->cmd_resp_wqlock);
+	oct->cmd_resp_state = OCT_DRV_OFFLINE;
+	spin_unlock_bh(&oct->cmd_resp_wqlock);
+
 	for (i = 0; i < oct->ifcount; i++) {
 		lio = GET_LIO(oct->props[i].netdev);
 		for (j = 0; j < lio->linfo.num_rxpciq; j++)
-			octeon_unregister_droq_ops(oct, lio->linfo.rxpciq[j]);
+			octeon_unregister_droq_ops(oct,
+						   lio->linfo.rxpciq[j].s.q_no);
 	}
 
 	for (i = 0; i < oct->ifcount; i++)
@@ -1274,6 +1398,7 @@ static int octeon_chip_specific_setup(struct octeon_device *oct)
 {
 	u32 dev_id, rev_id;
 	int ret = 1;
+	char *s;
 
 	pci_read_config_dword(oct->pci_dev, 0, &dev_id);
 	pci_read_config_dword(oct->pci_dev, 8, &rev_id);
@@ -1283,22 +1408,27 @@ static int octeon_chip_specific_setup(struct octeon_device *oct)
 	case OCTEON_CN68XX_PCIID:
 		oct->chip_id = OCTEON_CN68XX;
 		ret = lio_setup_cn68xx_octeon_device(oct);
+		s = "CN68XX";
 		break;
 
 	case OCTEON_CN66XX_PCIID:
 		oct->chip_id = OCTEON_CN66XX;
 		ret = lio_setup_cn66xx_octeon_device(oct);
+		s = "CN66XX";
 		break;
+
 	default:
+		s = "?";
 		dev_err(&oct->pci_dev->dev, "Unknown device found (dev_id: %x)\n",
 			dev_id);
 	}
 
 	if (!ret)
-		dev_info(&oct->pci_dev->dev, "CN68XX PASS%d.%d %s\n",
+		dev_info(&oct->pci_dev->dev, "%s PASS%d.%d %s Version: %s\n", s,
 			 OCTEON_MAJOR_REV(oct),
 			 OCTEON_MINOR_REV(oct),
-			 octeon_get_conf(oct)->card_name);
+			 octeon_get_conf(oct)->card_name,
+			 LIQUIDIO_VERSION);
 
 	return ret;
 }
@@ -1326,6 +1456,16 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 	return 0;
 }
 
+static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+	int q = 0;
+
+	if (netif_is_multiqueue(lio->netdev))
+		q = skb->queue_mapping % lio->linfo.num_txpciq;
+
+	return q;
+}
+
 /**
  * \brief Check Tx queue state for a given network buffer
  * @param lio per-network private data
@@ -1337,14 +1477,19 @@ static inline int check_txq_state(struct lio *lio, struct sk_buff *skb)
 
 	if (netif_is_multiqueue(lio->netdev)) {
 		q = skb->queue_mapping;
-		iq = lio->linfo.txpciq[(q & (lio->linfo.num_txpciq - 1))];
+		iq = lio->linfo.txpciq[(q % (lio->linfo.num_txpciq))].s.q_no;
 	} else {
 		iq = lio->txq;
+		q = iq;
 	}
 
 	if (octnet_iq_is_full(lio->oct_dev, iq))
 		return 0;
-	wake_q(lio->netdev, q);
+
+	if (__netif_subqueue_stopped(lio->netdev, q)) {
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
+		wake_q(lio->netdev, q);
+	}
 	return 1;
 }
 
@@ -1367,7 +1512,7 @@ static void free_netbuf(void *buf)
 
 	check_txq_state(lio, skb);
 
-	recv_buffer_free((struct sk_buff *)skb);
+	tx_buffer_free(skb);
 }
 
 /**
@@ -1380,7 +1525,7 @@ static void free_netsgbuf(void *buf)
 	struct sk_buff *skb;
 	struct lio *lio;
 	struct octnic_gather *g;
-	int i, frags;
+	int i, frags, iq;
 
 	finfo = (struct octnet_buf_free_info *)buf;
 	skb = finfo->skb;
@@ -1402,17 +1547,17 @@ static void free_netsgbuf(void *buf)
 		i++;
 	}
 
-	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-			 finfo->dptr, g->sg_size,
-			 DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
+				g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
 
-	spin_lock(&lio->lock);
-	list_add_tail(&g->list, &lio->glist);
-	spin_unlock(&lio->lock);
+	iq = skb_iq(lio, skb);
+	spin_lock(&lio->glist_lock[iq]);
+	list_add_tail(&g->list, &lio->glist[iq]);
+	spin_unlock(&lio->glist_lock[iq]);
 
 	check_txq_state(lio, skb);     /* mq support: sub-queue state check */
 
-	recv_buffer_free((struct sk_buff *)skb);
+	tx_buffer_free(skb);
 }
 
 /**
@@ -1426,7 +1571,7 @@ static void free_netsgbuf_with_resp(void *buf)
 	struct sk_buff *skb;
 	struct lio *lio;
 	struct octnic_gather *g;
-	int i, frags;
+	int i, frags, iq;
 
 	sc = (struct octeon_soft_command *)buf;
 	skb = (struct sk_buff *)sc->callback_arg;
@@ -1450,13 +1595,14 @@ static void free_netsgbuf_with_resp(void *buf)
 		i++;
 	}
 
-	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-			 finfo->dptr, g->sg_size,
-			 DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
+				g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
+
+	iq = skb_iq(lio, skb);
 
-	spin_lock(&lio->lock);
-	list_add_tail(&g->list, &lio->glist);
-	spin_unlock(&lio->lock);
+	spin_lock(&lio->glist_lock[iq]);
+	list_add_tail(&g->list, &lio->glist[iq]);
+	spin_unlock(&lio->glist_lock[iq]);
 
 	/* Don't free the skb yet */
 
@@ -1569,8 +1715,10 @@ static int liquidio_ptp_settime(struct ptp_clock_info *ptp,
  * @param rq request
  * @param on is it on
  */
-static int liquidio_ptp_enable(struct ptp_clock_info *ptp,
-			       struct ptp_clock_request *rq, int on)
+static int
+liquidio_ptp_enable(struct ptp_clock_info *ptp __attribute__((unused)),
+		    struct ptp_clock_request *rq __attribute__((unused)),
+		    int on __attribute__((unused)))
 {
 	return -EOPNOTSUPP;
 }
@@ -1657,6 +1805,7 @@ static int load_firmware(struct octeon_device *oct)
 	if (ret) {
 		dev_err(&oct->pci_dev->dev, "Request firmware failed. Could not find file %s.\n.",
 			fw_name);
+		release_firmware(fw);
 		return ret;
 	}
 
@@ -1710,7 +1859,7 @@ static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
  * @param buf pointer to resp structure
  */
 static void if_cfg_callback(struct octeon_device *oct,
-			    u32 status,
+			    u32 status __attribute__((unused)),
 			    void *buf)
 {
 	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
@@ -1724,7 +1873,10 @@ static void if_cfg_callback(struct octeon_device *oct,
 	if (resp->status)
 		dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
 			CVM_CAST64(resp->status));
-	ACCESS_ONCE(ctx->cond) = 1;
+	WRITE_ONCE(ctx->cond, 1);
+
+	snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
+		 resp->cfg_info.liquidio_firmware_version);
 
 	/* This barrier is required to be sure that the response has been
 	 * written fully before waking up the handler
@@ -1741,16 +1893,16 @@ static void if_cfg_callback(struct octeon_device *oct,
  * @returns selected queue number
  */
 static u16 select_q(struct net_device *dev, struct sk_buff *skb,
-		    void *accel_priv, select_queue_fallback_t fallback)
+		    void *accel_priv __attribute__((unused)),
+		    select_queue_fallback_t fallback __attribute__((unused)))
 {
-	int qindex;
+	u32 qindex = 0;
 	struct lio *lio;
 
 	lio = GET_LIO(dev);
-	/* select queue on chosen queue_mapping or core */
-	qindex = skb_rx_queue_recorded(skb) ?
-		 skb_get_rx_queue(skb) : smp_processor_id();
-	return (u16)(qindex & (lio->linfo.num_txpciq - 1));
+	qindex = skb_tx_hash(dev, skb);
+
+	return (u16)(qindex % (lio->linfo.num_txpciq));
 }
 
 /** Routine to push packets arriving on Octeon interface upto network layer.
@@ -1759,26 +1911,28 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
  * @param len      - size of total data received.
  * @param rh       - Control header associated with the packet
  * @param param    - additional control data with the packet
+ * @param arg	   - farg registered in droq_ops
  */
 static void
-liquidio_push_packet(u32 octeon_id,
+liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 		     void *skbuff,
 		     u32 len,
 		     union octeon_rh *rh,
-		     void *param)
+		     void *param,
+		     void *arg)
 {
 	struct napi_struct *napi = param;
-	struct octeon_device *oct = lio_get_device(octeon_id);
 	struct sk_buff *skb = (struct sk_buff *)skbuff;
 	struct skb_shared_hwtstamps *shhwtstamps;
 	u64 ns;
-	struct net_device *netdev =
-		(struct net_device *)oct->props[rh->r_dh.link].netdev;
+	u16 vtag = 0;
+	struct net_device *netdev = (struct net_device *)arg;
 	struct octeon_droq *droq = container_of(param, struct octeon_droq,
 						napi);
 	if (netdev) {
 		int packet_was_received;
 		struct lio *lio = GET_LIO(netdev);
+		struct octeon_device *oct = lio->oct_dev;
 
 		/* Do not proceed if the interface is not in RUNNING state. */
 		if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
@@ -1789,32 +1943,86 @@ liquidio_push_packet(u32 octeon_id,
 
 		skb->dev = netdev;
 
-		if (rh->r_dh.has_hwtstamp) {
-			/* timestamp is included from the hardware at the
-			 * beginning of the packet.
-			 */
-			if (ifstate_check(lio,
-					  LIO_IFSTATE_RX_TIMESTAMP_ENABLED)) {
-				/* Nanoseconds are in the first 64-bits
-				 * of the packet.
+		skb_record_rx_queue(skb, droq->q_no);
+		if (likely(len > MIN_SKB_SIZE)) {
+			struct octeon_skb_page_info *pg_info;
+			unsigned char *va;
+
+			pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+			if (pg_info->page) {
+				/* For Paged allocation use the frags */
+				va = page_address(pg_info->page) +
+					pg_info->page_offset;
+				memcpy(skb->data, va, MIN_SKB_SIZE);
+				skb_put(skb, MIN_SKB_SIZE);
+				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+						pg_info->page,
+						pg_info->page_offset +
+						MIN_SKB_SIZE,
+						len - MIN_SKB_SIZE,
+						LIO_RXBUFFER_SZ);
+			}
+		} else {
+			struct octeon_skb_page_info *pg_info =
+				((struct octeon_skb_page_info *)(skb->cb));
+			skb_copy_to_linear_data(skb, page_address(pg_info->page)
+						+ pg_info->page_offset, len);
+			skb_put(skb, len);
+			put_page(pg_info->page);
+		}
+
+		if (((oct->chip_id == OCTEON_CN66XX) ||
+		     (oct->chip_id == OCTEON_CN68XX)) &&
+		    ptp_enable) {
+			if (rh->r_dh.has_hwtstamp) {
+				/* timestamp is included from the hardware at
+				 * the beginning of the packet.
 				 */
-				memcpy(&ns, (skb->data), sizeof(ns));
-				shhwtstamps = skb_hwtstamps(skb);
-				shhwtstamps->hwtstamp =
-					ns_to_ktime(ns + lio->ptp_adjust);
+				if (ifstate_check
+				    (lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED)) {
+					/* Nanoseconds are in the first 64-bits
+					 * of the packet.
+					 */
+					memcpy(&ns, (skb->data), sizeof(ns));
+					shhwtstamps = skb_hwtstamps(skb);
+					shhwtstamps->hwtstamp =
+						ns_to_ktime(ns +
+							    lio->ptp_adjust);
+				}
+				skb_pull(skb, sizeof(ns));
 			}
-			skb_pull(skb, sizeof(ns));
 		}
 
 		skb->protocol = eth_type_trans(skb, skb->dev);
-
 		if ((netdev->features & NETIF_F_RXCSUM) &&
-		    (rh->r_dh.csum_verified == CNNIC_CSUM_VERIFIED))
+		    (((rh->r_dh.encap_on) &&
+		      (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) ||
+		     (!(rh->r_dh.encap_on) &&
+		      (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))))
 			/* checksum has already been verified */
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
 			skb->ip_summed = CHECKSUM_NONE;
 
+		/* Setting Encapsulation field on basis of status received
+		 * from the firmware
+		 */
+		if (rh->r_dh.encap_on) {
+			skb->encapsulation = 1;
+			skb->csum_level = 1;
+			droq->stats.rx_vxlan++;
+		}
+
+		/* inbound VLAN tag */
+		if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		    (rh->r_dh.vlan != 0)) {
+			u16 vid = rh->r_dh.vlan;
+			u16 priority = rh->r_dh.priority;
+
+			vtag = priority << 13 | vid;
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
+		}
+
 		packet_was_received = napi_gro_receive(napi, skb) != GRO_DROP;
 
 		if (packet_was_received) {
@@ -1868,39 +2076,6 @@ static void liquidio_napi_drv_callback(void *arg)
 	}
 }
 
-/**
- * \brief Main NAPI poll function
- * @param droq octeon output queue
- * @param budget maximum number of items to process
- */
-static int liquidio_napi_do_rx(struct octeon_droq *droq, int budget)
-{
-	int work_done;
-	struct lio *lio = GET_LIO(droq->napi.dev);
-	struct octeon_device *oct = lio->oct_dev;
-
-	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
-						 POLL_EVENT_PROCESS_PKTS,
-						 budget);
-	if (work_done < 0) {
-		netif_info(lio, rx_err, lio->netdev,
-			   "Receive work_done < 0, rxq:%d\n", droq->q_no);
-		goto octnet_napi_finish;
-	}
-
-	if (work_done > budget)
-		dev_err(&oct->pci_dev->dev, ">>>> %s work_done: %d budget: %d\n",
-			__func__, work_done, budget);
-
-	return work_done;
-
-octnet_napi_finish:
-	napi_complete(&droq->napi);
-	octeon_process_droq_poll_cmd(oct, droq->q_no, POLL_EVENT_ENABLE_INTR,
-				     0);
-	return 0;
-}
-
 /**
  * \brief Entry point for NAPI polling
  * @param napi NAPI structure
@@ -1910,35 +2085,57 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct octeon_droq *droq;
 	int work_done;
+	int tx_done = 0, iq_no;
+	struct octeon_instr_queue *iq;
+	struct octeon_device *oct;
 
 	droq = container_of(napi, struct octeon_droq, napi);
+	oct = droq->oct_dev;
+	iq_no = droq->q_no;
+	/* Handle Droq descriptors */
+	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
+						 POLL_EVENT_PROCESS_PKTS,
+						 budget);
 
-	work_done = liquidio_napi_do_rx(droq, budget);
+	/* Flush the instruction queue */
+	iq = oct->instr_queue[iq_no];
+	if (iq) {
+		/* Process iq buffers with in the budget limits */
+		tx_done = octeon_flush_iq(oct, iq, 1, budget);
+		/* Update iq read-index rather than waiting for next interrupt.
+		 * Return back if tx_done is false.
+		 */
+		update_txq_status(oct, iq_no);
+		/*tx_done = (iq->flush_index == iq->octeon_read_index);*/
+	} else {
+		dev_err(&oct->pci_dev->dev, "%s:  iq (%d) num invalid\n",
+			__func__, iq_no);
+	}
 
-	if (work_done < budget) {
+	if ((work_done < budget) && (tx_done)) {
 		napi_complete(napi);
 		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
 					     POLL_EVENT_ENABLE_INTR, 0);
 		return 0;
 	}
 
-	return work_done;
+	return (!tx_done) ? (budget) : (work_done);
 }
 
 /**
  * \brief Setup input and output queues
  * @param octeon_dev octeon device
- * @param net_device Net device
+ * @param ifidx  Interface Index
  *
  * Note: Queues are with respect to the octeon device. Thus
  * an input queue is for egress packets, and output queues
  * are for ingress packets.
  */
 static inline int setup_io_queues(struct octeon_device *octeon_dev,
-				  struct net_device *net_device)
+				  int ifidx)
 {
-	static int first_time = 1;
-	static struct octeon_droq_ops droq_ops;
+	struct octeon_droq_ops droq_ops;
+	struct net_device *netdev;
 	static int cpu_id;
 	static int cpu_id_modulus;
 	struct octeon_droq *droq;
@@ -1947,23 +2144,26 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
 	struct lio *lio;
 	int num_tx_descs;
 
-	lio = GET_LIO(net_device);
-	if (first_time) {
-		first_time = 0;
-		memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+	netdev = octeon_dev->props[ifidx].netdev;
 
-		droq_ops.fptr = liquidio_push_packet;
+	lio = GET_LIO(netdev);
 
-		droq_ops.poll_mode = 1;
-		droq_ops.napi_fn = liquidio_napi_drv_callback;
-		cpu_id = 0;
-		cpu_id_modulus = num_present_cpus();
-	}
+	memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+
+	droq_ops.fptr = liquidio_push_packet;
+	droq_ops.farg = (void *)netdev;
+
+	droq_ops.poll_mode = 1;
+	droq_ops.napi_fn = liquidio_napi_drv_callback;
+	cpu_id = 0;
+	cpu_id_modulus = num_present_cpus();
 
 	/* set up DROQs. */
 	for (q = 0; q < lio->linfo.num_rxpciq; q++) {
-		q_no = lio->linfo.rxpciq[q];
-
+		q_no = lio->linfo.rxpciq[q].s.q_no;
+		dev_dbg(&octeon_dev->pci_dev->dev,
+			"setup_io_queues index:%d linfo.rxpciq.s.q_no:%d\n",
+			q, q_no);
 		retval = octeon_setup_droq(octeon_dev, q_no,
 					   CFG_GET_NUM_RX_DESCS_NIC_IF
 						   (octeon_get_conf(octeon_dev),
@@ -1980,7 +2180,11 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
 
 		droq = octeon_dev->droq[q_no];
 		napi = &droq->napi;
-		netif_napi_add(net_device, napi, liquidio_napi_poll, 64);
+		dev_dbg(&octeon_dev->pci_dev->dev,
+			"netif_napi_add netdev:%llx oct:%llx\n",
+			(u64)netdev,
+			(u64)octeon_dev);
+		netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
 
 		/* designate a CPU for this droq */
 		droq->cpu_id = cpu_id;
@@ -1996,9 +2200,9 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
 		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf
 							   (octeon_dev),
 							   lio->ifidx);
-		retval = octeon_setup_iq(octeon_dev, lio->linfo.txpciq[q],
-					 num_tx_descs,
-					 netdev_get_tx_queue(net_device, q));
+		retval = octeon_setup_iq(octeon_dev, ifidx, q,
+					 lio->linfo.txpciq[q], num_tx_descs,
+					 netdev_get_tx_queue(netdev, q));
 		if (retval) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				" %s : Runtime IQ(TxQ) creation failed.\n",
@@ -2036,7 +2240,8 @@ static inline void setup_tx_poll_fn(struct net_device *netdev)
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 
-	lio->txq_status_wq.wq = create_workqueue("txq-status");
+	lio->txq_status_wq.wq = alloc_workqueue("txq-status",
+						WQ_MEM_RECLAIM, 0);
 	if (!lio->txq_status_wq.wq) {
 		dev_err(&oct->pci_dev->dev, "unable to create cavium txq status wq\n");
 		return;
@@ -2048,6 +2253,14 @@ static inline void setup_tx_poll_fn(struct net_device *netdev)
 			   &lio->txq_status_wq.wk.work, msecs_to_jiffies(1));
 }
 
+static inline void cleanup_tx_poll_fn(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	cancel_delayed_work_sync(&lio->txq_status_wq.wk.work);
+	destroy_workqueue(lio->txq_status_wq.wq);
+}
+
 /**
  * \brief Net device open for LiquidIO
  * @param netdev network device
@@ -2058,17 +2271,22 @@ static int liquidio_open(struct net_device *netdev)
 	struct octeon_device *oct = lio->oct_dev;
 	struct napi_struct *napi, *n;
 
-	list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
-		napi_enable(napi);
+	if (oct->props[lio->ifidx].napi_enabled == 0) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_enable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 1;
+	}
 
 	oct_ptp_open(netdev);
 
 	ifstate_set(lio, LIO_IFSTATE_RUNNING);
+
 	setup_tx_poll_fn(netdev);
+
 	start_txq(netdev);
 
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-	try_module_get(THIS_MODULE);
 
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
@@ -2088,41 +2306,36 @@ static int liquidio_open(struct net_device *netdev)
  */
 static int liquidio_stop(struct net_device *netdev)
 {
-	struct napi_struct *napi, *n;
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 
-	netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
+	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
+
+	netif_tx_disable(netdev);
+
 	/* Inform that netif carrier is down */
+	netif_carrier_off(netdev);
 	lio->intf_open = 0;
-	lio->linfo.link.s.status = 0;
+	lio->linfo.link.s.link_up = 0;
+	lio->link_changes++;
 
-	netif_carrier_off(netdev);
+	/* Pause for a moment and wait for Octeon to flush out (to the wire) any
+	 * egress packets that are in-flight.
+	 */
+	set_current_state(TASK_INTERRUPTIBLE);
+	schedule_timeout(msecs_to_jiffies(100));
 
-	/* tell Octeon to stop forwarding packets to host */
+	/* Now it should be safe to tell Octeon that nic interface is down. */
 	send_rx_ctrl_cmd(lio, 0);
 
-	cancel_delayed_work_sync(&lio->txq_status_wq.wk.work);
-	flush_workqueue(lio->txq_status_wq.wq);
-	destroy_workqueue(lio->txq_status_wq.wq);
+	cleanup_tx_poll_fn(netdev);
 
 	if (lio->ptp_clock) {
 		ptp_clock_unregister(lio->ptp_clock);
 		lio->ptp_clock = NULL;
 	}
 
-	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
-
-	/* This is a hack that allows DHCP to continue working. */
-	set_bit(__LINK_STATE_START, &lio->netdev->state);
-
-	list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
-		napi_disable(napi);
-
-	txqs_stop(netdev);
-
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
-	module_put(THIS_MODULE);
 
 	return 0;
 }
@@ -2133,6 +2346,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 	struct net_device *netdev = (struct net_device *)nctrl->netpndev;
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	u8 *mac;
 
 	switch (nctrl->ncmd.s.cmd) {
 	case OCTNET_CMD_CHANGE_DEVFLAGS:
@@ -2140,22 +2354,24 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 		break;
 
 	case OCTNET_CMD_CHANGE_MACADDR:
-		/* If command is successful, change the MACADDR. */
-		netif_info(lio, probe, lio->netdev, " MACAddr changed to 0x%llx\n",
-			   CVM_CAST64(nctrl->udd[0]));
-		dev_info(&oct->pci_dev->dev, "%s MACAddr changed to 0x%llx\n",
-			 netdev->name, CVM_CAST64(nctrl->udd[0]));
-		memcpy(netdev->dev_addr, ((u8 *)&nctrl->udd[0]) + 2, ETH_ALEN);
+		mac = ((u8 *)&nctrl->udd[0]) + 2;
+		netif_info(lio, probe, lio->netdev,
+			   "%s %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
+			   "MACAddr changed to", mac[0], mac[1],
+			   mac[2], mac[3], mac[4], mac[5]);
 		break;
 
 	case OCTNET_CMD_CHANGE_MTU:
 		/* If command is successful, change the MTU. */
 		netif_info(lio, probe, lio->netdev, " MTU Changed from %d to %d\n",
-			   netdev->mtu, nctrl->ncmd.s.param2);
+			   netdev->mtu, nctrl->ncmd.s.param1);
 		dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
 			 netdev->name, netdev->mtu,
-			 nctrl->ncmd.s.param2);
-		netdev->mtu = nctrl->ncmd.s.param2;
+			 nctrl->ncmd.s.param1);
+		rtnl_lock();
+		netdev->mtu = nctrl->ncmd.s.param1;
+		call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev);
+		rtnl_unlock();
 		break;
 
 	case OCTNET_CMD_GPIO_ACCESS:
@@ -2181,11 +2397,79 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 			 netdev->name);
 		break;
 
+	case OCTNET_CMD_ENABLE_VLAN_FILTER:
+		dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n",
+			 netdev->name);
+		break;
+
+	case OCTNET_CMD_ADD_VLAN_FILTER:
+		dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n",
+			 netdev->name, nctrl->ncmd.s.param1);
+		break;
+
+	case OCTNET_CMD_DEL_VLAN_FILTER:
+		dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n",
+			 netdev->name, nctrl->ncmd.s.param1);
+		break;
+
 	case OCTNET_CMD_SET_SETTINGS:
 		dev_info(&oct->pci_dev->dev, "%s settings changed\n",
 			 netdev->name);
 
 		break;
+		/* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL"
+		 * Command passed by NIC driver
+		 */
+	case OCTNET_CMD_TNL_RX_CSUM_CTL:
+		if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s RX Checksum Offload Enabled\n",
+				   netdev->name);
+		} else if (nctrl->ncmd.s.param1 ==
+			   OCTNET_CMD_RXCSUM_DISABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s RX Checksum Offload Disabled\n",
+				   netdev->name);
+		}
+		break;
+
+		/* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL"
+		 * Command passed by NIC driver
+		 */
+	case OCTNET_CMD_TNL_TX_CSUM_CTL:
+		if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s TX Checksum Offload Enabled\n",
+				   netdev->name);
+		} else if (nctrl->ncmd.s.param1 ==
+			   OCTNET_CMD_TXCSUM_DISABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s TX Checksum Offload Disabled\n",
+				   netdev->name);
+		}
+		break;
+
+		/* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG"
+		 * Command passed by NIC driver
+		 */
+	case OCTNET_CMD_VXLAN_PORT_CONFIG:
+		if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s VxLAN Destination UDP PORT:%d ADDED\n",
+				   netdev->name,
+				   nctrl->ncmd.s.param1);
+		} else if (nctrl->ncmd.s.more ==
+			   OCTNET_CMD_VXLAN_PORT_DEL) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s VxLAN Destination UDP PORT:%d DELETED\n",
+				   netdev->name,
+				   nctrl->ncmd.s.param1);
+		}
+		break;
+
+	case OCTNET_CMD_SET_FLOW_CTL:
+		netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n");
+		break;
 
 	default:
 		dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__,
@@ -2235,10 +2519,9 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
-	struct octnic_ctrl_params nparams;
 	struct netdev_hw_addr *ha;
 	u64 *mc;
-	int ret, i;
+	int ret;
 	int mc_count = min(netdev_mc_count(netdev), MAX_OCTEON_MULTICAST_ADDR);
 
 	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
@@ -2246,15 +2529,14 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
 	/* Create a ctrl pkt command to be sent to core app. */
 	nctrl.ncmd.u64 = 0;
 	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_MULTI_LIST;
-	nctrl.ncmd.s.param1 = lio->linfo.ifidx;
-	nctrl.ncmd.s.param2 = get_new_flags(netdev);
-	nctrl.ncmd.s.param3 = mc_count;
+	nctrl.ncmd.s.param1 = get_new_flags(netdev);
+	nctrl.ncmd.s.param2 = mc_count;
 	nctrl.ncmd.s.more = mc_count;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	/* copy all the addresses into the udd */
-	i = 0;
 	mc = &nctrl.udd[0];
 	netdev_for_each_mc_addr(ha, netdev) {
 		*mc = 0;
@@ -2270,9 +2552,7 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
 	 */
 	nctrl.wait_time = 0;
 
-	nparams.resp_order = OCTEON_RESP_NORESPONSE;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
 	if (ret < 0) {
 		dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
 			ret);
@@ -2290,19 +2570,17 @@ static int liquidio_set_mac(struct net_device *netdev, void *p)
 	struct octeon_device *oct = lio->oct_dev;
 	struct sockaddr *addr = (struct sockaddr *)p;
 	struct octnic_ctrl_pkt nctrl;
-	struct octnic_ctrl_params nparams;
 
-	if ((!is_valid_ether_addr(addr->sa_data)) ||
-	    (ifstate_check(lio, LIO_IFSTATE_RUNNING)))
+	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
 	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
 
 	nctrl.ncmd.u64 = 0;
 	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
-	nctrl.ncmd.s.param1 = lio->linfo.ifidx;
-	nctrl.ncmd.s.param2 = 0;
+	nctrl.ncmd.s.param1 = 0;
 	nctrl.ncmd.s.more = 1;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 	nctrl.wait_time = 100;
@@ -2311,9 +2589,7 @@ static int liquidio_set_mac(struct net_device *netdev, void *p)
 	/* The MAC Address is presented in network byte order. */
 	memcpy((u8 *)&nctrl.udd[0] + 2, addr->sa_data, ETH_ALEN);
 
-	nparams.resp_order = OCTEON_RESP_ORDERED;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
 	if (ret < 0) {
 		dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
 		return -ENOMEM;
@@ -2341,7 +2617,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 	oct = lio->oct_dev;
 
 	for (i = 0; i < lio->linfo.num_txpciq; i++) {
-		iq_no = lio->linfo.txpciq[i];
+		iq_no = lio->linfo.txpciq[i].s.q_no;
 		iq_stats = &oct->instr_queue[iq_no]->stats;
 		pkts += iq_stats->tx_done;
 		drop += iq_stats->tx_dropped;
@@ -2357,7 +2633,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 	bytes = 0;
 
 	for (i = 0; i < lio->linfo.num_rxpciq; i++) {
-		oq_no = lio->linfo.rxpciq[i];
+		oq_no = lio->linfo.rxpciq[i].s.q_no;
 		oq_stats = &oct->droq[oq_no]->stats;
 		pkts += oq_stats->rx_pkts_received;
 		drop += (oq_stats->rx_dropped +
@@ -2383,19 +2659,16 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
-	struct octnic_ctrl_params nparams;
-	int max_frm_size = new_mtu + OCTNET_FRM_HEADER_SIZE;
 	int ret = 0;
 
-	/* Limit the MTU to make sure the ethernet packets are between 64 bytes
-	 * and 65535 bytes
+	/* Limit the MTU to make sure the ethernet packets are between 68 bytes
+	 * and 16000 bytes
 	 */
-	if ((max_frm_size < OCTNET_MIN_FRM_SIZE) ||
-	    (max_frm_size > OCTNET_MAX_FRM_SIZE)) {
+	if ((new_mtu < LIO_MIN_MTU_SIZE) ||
+	    (new_mtu > LIO_MAX_MTU_SIZE)) {
 		dev_err(&oct->pci_dev->dev, "Invalid MTU: %d\n", new_mtu);
 		dev_err(&oct->pci_dev->dev, "Valid range %d and %d\n",
-			(OCTNET_MIN_FRM_SIZE - OCTNET_FRM_HEADER_SIZE),
-			(OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE));
+			LIO_MIN_MTU_SIZE, LIO_MAX_MTU_SIZE);
 		return -EINVAL;
 	}
 
@@ -2403,15 +2676,13 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 
 	nctrl.ncmd.u64 = 0;
 	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-	nctrl.ncmd.s.param1 = lio->linfo.ifidx;
-	nctrl.ncmd.s.param2 = new_mtu;
+	nctrl.ncmd.s.param1 = new_mtu;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
-	nparams.resp_order = OCTEON_RESP_ORDERED;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
 	if (ret < 0) {
 		dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
 		return -1;
@@ -2428,7 +2699,7 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
  * @param ifr interface request
  * @param cmd command
  */
-static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr)
 {
 	struct hwtstamp_config conf;
 	struct lio *lio = GET_LIO(netdev);
@@ -2489,7 +2760,7 @@ static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
 	switch (cmd) {
 	case SIOCSHWTSTAMP:
-		return hwtstamp_ioctl(netdev, ifr, cmd);
+		return hwtstamp_ioctl(netdev, ifr);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2536,7 +2807,7 @@ static void handle_timestamp(struct octeon_device *oct,
 	}
 
 	octeon_free_soft_command(oct, sc);
-	recv_buffer_free(skb);
+	tx_buffer_free(skb);
 }
 
 /* \brief Send a data packet that will be timestamped
@@ -2551,10 +2822,9 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
 {
 	int retval;
 	struct octeon_soft_command *sc;
-	struct octeon_instr_ih *ih;
-	struct octeon_instr_rdp *rdp;
 	struct lio *lio;
 	int ring_doorbell;
+	u32 len;
 
 	lio = finfo->lio;
 
@@ -2576,14 +2846,13 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
 	sc->callback_arg = finfo->skb;
 	sc->iq_no = ndata->q_no;
 
-	ih = (struct octeon_instr_ih *)&sc->cmd.ih;
-	rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+	len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz;
 
 	ring_doorbell = !xmit_more;
 	retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
-				     sc, ih->dlengsz, ndata->reqtype);
+				     sc, len, ndata->reqtype);
 
-	if (retval) {
+	if (retval == IQ_SEND_FAILED) {
 		dev_err(&oct->pci_dev->dev, "timestamp data packet failed status: %x\n",
 			retval);
 		octeon_free_soft_command(oct, sc);
@@ -2594,68 +2863,6 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
 	return retval;
 }
 
-static inline int is_ipv4(struct sk_buff *skb)
-{
-	return (skb->protocol == htons(ETH_P_IP)) &&
-	       (ip_hdr(skb)->version == 4);
-}
-
-static inline int is_vlan(struct sk_buff *skb)
-{
-	return skb->protocol == htons(ETH_P_8021Q);
-}
-
-static inline int is_ip_fragmented(struct sk_buff *skb)
-{
-	/* The Don't fragment and Reserved flag fields are ignored.
-	 * IP is fragmented if
-	 * -  the More fragments bit is set (indicating this IP is a fragment
-	 * with more to follow; the current offset could be 0 ).
-	 * -  ths offset field is non-zero.
-	 */
-	return (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ? 1 : 0;
-}
-
-static inline int is_ipv6(struct sk_buff *skb)
-{
-	return (skb->protocol == htons(ETH_P_IPV6)) &&
-	       (ipv6_hdr(skb)->version == 6);
-}
-
-static inline int is_with_extn_hdr(struct sk_buff *skb)
-{
-	return (ipv6_hdr(skb)->nexthdr != IPPROTO_TCP) &&
-	       (ipv6_hdr(skb)->nexthdr != IPPROTO_UDP);
-}
-
-static inline int is_tcpudp(struct sk_buff *skb)
-{
-	return (ip_hdr(skb)->protocol == IPPROTO_TCP) ||
-	       (ip_hdr(skb)->protocol == IPPROTO_UDP);
-}
-
-static inline u32 get_ipv4_5tuple_tag(struct sk_buff *skb)
-{
-	u32 tag;
-	struct iphdr *iphdr = ip_hdr(skb);
-
-	tag = crc32(0, &iphdr->protocol, 1);
-	tag = crc32(tag, (u8 *)&iphdr->saddr, 8);
-	tag = crc32(tag, skb_transport_header(skb), 4);
-	return tag;
-}
-
-static inline u32 get_ipv6_5tuple_tag(struct sk_buff *skb)
-{
-	u32 tag;
-	struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
-
-	tag = crc32(0, &ipv6hdr->nexthdr, 1);
-	tag = crc32(tag, (u8 *)&ipv6hdr->saddr, 32);
-	tag = crc32(tag, skb_transport_header(skb), 4);
-	return tag;
-}
-
 /** \brief Transmit networks packets to the Octeon interface
  * @param skbuff   skbuff struct to be passed to network layer.
  * @param netdev    pointer to network device
@@ -2670,18 +2877,22 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	struct octnic_data_pkt ndata;
 	struct octeon_device *oct;
 	struct oct_iq_stats *stats;
-	int cpu = 0, status = 0;
+	struct octeon_instr_irh *irh;
+	union tx_info *tx_info;
+	int status = 0;
 	int q_idx = 0, iq_no = 0;
-	int xmit_more;
+	int xmit_more, j;
+	u64 dptr = 0;
 	u32 tag = 0;
 
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
 	if (netif_is_multiqueue(netdev)) {
-		cpu = skb->queue_mapping;
-		q_idx = (cpu & (lio->linfo.num_txpciq - 1));
-		iq_no = lio->linfo.txpciq[q_idx];
+		q_idx = skb->queue_mapping;
+		q_idx = (q_idx % (lio->linfo.num_txpciq));
+		tag = q_idx;
+		iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 	} else {
 		iq_no = lio->txq;
 	}
@@ -2692,11 +2903,11 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	 * transmitted.
 	 */
 	if (!(atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) ||
-	    (!lio->linfo.link.s.status) ||
+	    (!lio->linfo.link.s.link_up) ||
 	    (skb->len <= 0)) {
 		netif_info(lio, tx_err, lio->netdev,
 			   "Transmit failed link_status : %d\n",
-			   lio->linfo.link.s.status);
+			   lio->linfo.link.s.link_up);
 		goto lio_xmit_failed;
 	}
 
@@ -2728,62 +2939,25 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 			/* defer sending if queue is full */
 			stats->tx_iq_busy++;
 			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   ndata.q_no);
+				   lio->txq);
 			return NETDEV_TX_BUSY;
 		}
 	}
 	/* pr_info(" XMIT - valid Qs: %d, 1st Q no: %d, cpu:  %d, q_no:%d\n",
-	 *	lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no );
+	 *	lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no);
 	 */
 
 	ndata.datasize = skb->len;
 
 	cmdsetup.u64 = 0;
-	cmdsetup.s.ifidx = lio->linfo.ifidx;
+	cmdsetup.s.iq_no = iq_no;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (is_ipv4(skb) && !is_ip_fragmented(skb) && is_tcpudp(skb)) {
-			tag = get_ipv4_5tuple_tag(skb);
-
-			cmdsetup.s.cksum_offset = sizeof(struct ethhdr) + 1;
-
-			if (ip_hdr(skb)->ihl > 5)
-				cmdsetup.s.ipv4opts_ipv6exthdr =
-						OCT_PKT_PARAM_IPV4OPTS;
-
-		} else if (is_ipv6(skb)) {
-			tag = get_ipv6_5tuple_tag(skb);
-
-			cmdsetup.s.cksum_offset = sizeof(struct ethhdr) + 1;
-
-			if (is_with_extn_hdr(skb))
-				cmdsetup.s.ipv4opts_ipv6exthdr =
-						OCT_PKT_PARAM_IPV6EXTHDR;
-
-		} else if (is_vlan(skb)) {
-			if (vlan_eth_hdr(skb)->h_vlan_encapsulated_proto
-				== htons(ETH_P_IP) &&
-				!is_ip_fragmented(skb) && is_tcpudp(skb)) {
-				tag = get_ipv4_5tuple_tag(skb);
-
-				cmdsetup.s.cksum_offset =
-					sizeof(struct vlan_ethhdr) + 1;
-
-				if (ip_hdr(skb)->ihl > 5)
-					cmdsetup.s.ipv4opts_ipv6exthdr =
-						OCT_PKT_PARAM_IPV4OPTS;
-
-			} else if (vlan_eth_hdr(skb)->h_vlan_encapsulated_proto
-				== htons(ETH_P_IPV6)) {
-				tag = get_ipv6_5tuple_tag(skb);
-
-				cmdsetup.s.cksum_offset =
-					sizeof(struct vlan_ethhdr) + 1;
-
-				if (is_with_extn_hdr(skb))
-					cmdsetup.s.ipv4opts_ipv6exthdr =
-						OCT_PKT_PARAM_IPV6EXTHDR;
-			}
+		if (skb->encapsulation) {
+			cmdsetup.s.tnl_csum = 1;
+			stats->tx_vxlan++;
+		} else {
+			cmdsetup.s.transport_csum = 1;
 		}
 	}
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
@@ -2793,20 +2967,21 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	if (skb_shinfo(skb)->nr_frags == 0) {
 		cmdsetup.s.u.datasize = skb->len;
-		octnet_prepare_pci_cmd(&ndata.cmd, &cmdsetup, tag);
+		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+
 		/* Offload checksum calculation for TCP/UDP packets */
-		ndata.cmd.dptr = dma_map_single(&oct->pci_dev->dev,
-						skb->data,
-						skb->len,
-						DMA_TO_DEVICE);
-		if (dma_mapping_error(&oct->pci_dev->dev, ndata.cmd.dptr)) {
+		dptr = dma_map_single(&oct->pci_dev->dev,
+				      skb->data,
+				      skb->len,
+				      DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
 			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
 				__func__);
 			return NETDEV_TX_BUSY;
 		}
 
-		finfo->dptr = ndata.cmd.dptr;
-
+		ndata.cmd.cmd2.dptr = dptr;
+		finfo->dptr = dptr;
 		ndata.reqtype = REQTYPE_NORESP_NET;
 
 	} else {
@@ -2814,9 +2989,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 		struct skb_frag_struct *frag;
 		struct octnic_gather *g;
 
-		spin_lock(&lio->lock);
-		g = (struct octnic_gather *)list_delete_head(&lio->glist);
-		spin_unlock(&lio->lock);
+		spin_lock(&lio->glist_lock[q_idx]);
+		g = (struct octnic_gather *)
+			list_delete_head(&lio->glist[q_idx]);
+		spin_unlock(&lio->glist_lock[q_idx]);
 
 		if (!g) {
 			netif_info(lio, tx_err, lio->netdev,
@@ -2826,7 +3002,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 		cmdsetup.s.gather = 1;
 		cmdsetup.s.u.gatherptrs = (skb_shinfo(skb)->nr_frags + 1);
-		octnet_prepare_pci_cmd(&ndata.cmd, &cmdsetup, tag);
+		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
 
 		memset(g->sg, 0, g->sg_size);
 
@@ -2853,36 +3029,52 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 					     frag->size,
 					     DMA_TO_DEVICE);
 
+			if (dma_mapping_error(&oct->pci_dev->dev,
+					      g->sg[i >> 2].ptr[i & 3])) {
+				dma_unmap_single(&oct->pci_dev->dev,
+						 g->sg[0].ptr[0],
+						 skb->len - skb->data_len,
+						 DMA_TO_DEVICE);
+				for (j = 1; j < i; j++) {
+					frag = &skb_shinfo(skb)->frags[j - 1];
+					dma_unmap_page(&oct->pci_dev->dev,
+						       g->sg[j >> 2].ptr[j & 3],
+						       frag->size,
+						       DMA_TO_DEVICE);
+				}
+				dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
+					__func__);
+				return NETDEV_TX_BUSY;
+			}
+
 			add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
 			i++;
 		}
 
-		ndata.cmd.dptr = dma_map_single(&oct->pci_dev->dev,
-						g->sg, g->sg_size,
-						DMA_TO_DEVICE);
-		if (dma_mapping_error(&oct->pci_dev->dev, ndata.cmd.dptr)) {
-			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
-				__func__);
-			dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
-					 skb->len - skb->data_len,
-					 DMA_TO_DEVICE);
-			return NETDEV_TX_BUSY;
-		}
+		dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
+					   g->sg_size, DMA_TO_DEVICE);
+		dptr = g->sg_dma_ptr;
 
-		finfo->dptr = ndata.cmd.dptr;
+		ndata.cmd.cmd2.dptr = dptr;
+		finfo->dptr = dptr;
 		finfo->g = g;
 
 		ndata.reqtype = REQTYPE_NORESP_NET_SG;
 	}
 
-	if (skb_shinfo(skb)->gso_size) {
-		struct octeon_instr_irh *irh =
-			(struct octeon_instr_irh *)&ndata.cmd.irh;
-		union tx_info *tx_info = (union tx_info *)&ndata.cmd.ossp[0];
+	irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
+	tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
 
-		irh->len = 1;   /* to indicate that ossp[0] contains tx_info */
+	if (skb_shinfo(skb)->gso_size) {
 		tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
 		tx_info->s.gso_segs = skb_shinfo(skb)->gso_segs;
+		stats->tx_gso++;
+	}
+
+	/* HW insert VLAN tag */
+	if (skb_vlan_tag_present(skb)) {
+		irh->priority = skb_vlan_tag_get(skb) >> 13;
+		irh->vlan = skb_vlan_tag_get(skb) & 0xfff;
 	}
 
 	xmit_more = skb->xmit_more;
@@ -2901,7 +3093,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	netif_trans_update(netdev);
 
-	stats->tx_done++;
+	if (skb_shinfo(skb)->gso_size)
+		stats->tx_done += skb_shinfo(skb)->gso_segs;
+	else
+		stats->tx_done++;
 	stats->tx_tot_bytes += skb->len;
 
 	return NETDEV_TX_OK;
@@ -2910,9 +3105,10 @@ lio_xmit_failed:
 	stats->tx_dropped++;
 	netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
 		   iq_no, stats->tx_dropped);
-	dma_unmap_single(&oct->pci_dev->dev, ndata.cmd.dptr,
-			 ndata.datasize, DMA_TO_DEVICE);
-	recv_buffer_free(skb);
+	if (dptr)
+		dma_unmap_single(&oct->pci_dev->dev, dptr,
+				 ndata.datasize, DMA_TO_DEVICE);
+	tx_buffer_free(skb);
 	return NETDEV_TX_OK;
 }
 
@@ -2932,27 +3128,145 @@ static void liquidio_tx_timeout(struct net_device *netdev)
 	txqs_wake(netdev);
 }
 
-int liquidio_set_feature(struct net_device *netdev, int cmd)
+static int liquidio_vlan_rx_add_vid(struct net_device *netdev,
+				    __be16 proto __attribute__((unused)),
+				    u16 vid)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
-	struct octnic_ctrl_params nparams;
 	int ret = 0;
 
 	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
 
 	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = cmd;
-	nctrl.ncmd.s.param1 = lio->linfo.ifidx;
-	nctrl.ncmd.s.param2 = OCTNIC_LROIPV4 | OCTNIC_LROIPV6;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_ADD_VLAN_FILTER;
+	nctrl.ncmd.s.param1 = vid;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n",
+			ret);
+	}
+
+	return ret;
+}
+
+static int liquidio_vlan_rx_kill_vid(struct net_device *netdev,
+				     __be16 proto __attribute__((unused)),
+				     u16 vid)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_DEL_VLAN_FILTER;
+	nctrl.ncmd.s.param1 = vid;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
+/** Sending command to enable/disable RX checksum offload
+ * @param netdev                pointer to network device
+ * @param command               OCTNET_CMD_TNL_RX_CSUM_CTL
+ * @param rx_cmd_bit            OCTNET_CMD_RXCSUM_ENABLE/
+ *                              OCTNET_CMD_RXCSUM_DISABLE
+ * @returns                     SUCCESS or FAILURE
+ */
+int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
+				u8 rx_cmd)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = command;
+	nctrl.ncmd.s.param1 = rx_cmd;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
-	nparams.resp_order = OCTEON_RESP_NORESPONSE;
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev,
+			"DEVFLAGS RXCSUM change failed in core(ret:0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
+/** Sending command to add/delete VxLAN UDP port to firmware
+ * @param netdev                pointer to network device
+ * @param command               OCTNET_CMD_VXLAN_PORT_CONFIG
+ * @param vxlan_port            VxLAN port to be added or deleted
+ * @param vxlan_cmd_bit         OCTNET_CMD_VXLAN_PORT_ADD,
+ *                              OCTNET_CMD_VXLAN_PORT_DEL
+ * @returns                     SUCCESS or FAILURE
+ */
+static int liquidio_vxlan_port_command(struct net_device *netdev, int command,
+				       u16 vxlan_port, u8 vxlan_cmd_bit)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = command;
+	nctrl.ncmd.s.more = vxlan_cmd_bit;
+	nctrl.ncmd.s.param1 = vxlan_port;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev,
+			"VxLAN port add/delete failed in core (ret:0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
+int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = cmd;
+	nctrl.ncmd.s.param1 = param1;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
 	if (ret < 0) {
 		dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n",
 			ret);
@@ -3008,14 +3322,55 @@ static int liquidio_set_features(struct net_device *netdev,
 		return 0;
 
 	if ((features & NETIF_F_LRO) && (lio->dev_capability & NETIF_F_LRO))
-		liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE);
+		liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+				     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
 	else if (!(features & NETIF_F_LRO) &&
 		 (lio->dev_capability & NETIF_F_LRO))
-		liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE);
+		liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE,
+				     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+
+	/* Sending command to firmware to enable/disable RX checksum
+	 * offload settings using ethtool
+	 */
+	if (!(netdev->features & NETIF_F_RXCSUM) &&
+	    (lio->enc_dev_capability & NETIF_F_RXCSUM) &&
+	    (features & NETIF_F_RXCSUM))
+		liquidio_set_rxcsum_command(netdev,
+					    OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_ENABLE);
+	else if ((netdev->features & NETIF_F_RXCSUM) &&
+		 (lio->enc_dev_capability & NETIF_F_RXCSUM) &&
+		 !(features & NETIF_F_RXCSUM))
+		liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_DISABLE);
 
 	return 0;
 }
 
+static void liquidio_add_vxlan_port(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	liquidio_vxlan_port_command(netdev,
+				    OCTNET_CMD_VXLAN_PORT_CONFIG,
+				    htons(ti->port),
+				    OCTNET_CMD_VXLAN_PORT_ADD);
+}
+
+static void liquidio_del_vxlan_port(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	liquidio_vxlan_port_command(netdev,
+				    OCTNET_CMD_VXLAN_PORT_CONFIG,
+				    htons(ti->port),
+				    OCTNET_CMD_VXLAN_PORT_DEL);
+}
+
 static struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
@@ -3024,10 +3379,15 @@ static struct net_device_ops lionetdevops = {
 	.ndo_set_mac_address	= liquidio_set_mac,
 	.ndo_set_rx_mode	= liquidio_set_mcast_list,
 	.ndo_tx_timeout		= liquidio_tx_timeout,
+
+	.ndo_vlan_rx_add_vid    = liquidio_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid   = liquidio_vlan_rx_kill_vid,
 	.ndo_change_mtu		= liquidio_change_mtu,
 	.ndo_do_ioctl		= liquidio_ioctl,
 	.ndo_fix_features	= liquidio_fix_features,
 	.ndo_set_features	= liquidio_set_features,
+	.ndo_udp_tunnel_add	= liquidio_add_vxlan_port,
+	.ndo_udp_tunnel_del	= liquidio_del_vxlan_port,
 };
 
 /** \brief Entry point for the liquidio module
@@ -3082,24 +3442,27 @@ static int lio_nic_info(struct octeon_recv_info *recv_info, void *buf)
 {
 	struct octeon_device *oct = (struct octeon_device *)buf;
 	struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
-	int ifidx = 0;
+	int gmxport = 0;
 	union oct_link_status *ls;
 	int i;
 
-	if ((recv_pkt->buffer_size[0] != sizeof(*ls)) ||
-	    (recv_pkt->rh.r_nic_info.ifidx > oct->ifcount)) {
+	if (recv_pkt->buffer_size[0] != sizeof(*ls)) {
 		dev_err(&oct->pci_dev->dev, "Malformed NIC_INFO, len=%d, ifidx=%d\n",
 			recv_pkt->buffer_size[0],
-			recv_pkt->rh.r_nic_info.ifidx);
+			recv_pkt->rh.r_nic_info.gmxport);
 		goto nic_info_err;
 	}
 
-	ifidx = recv_pkt->rh.r_nic_info.ifidx;
+	gmxport = recv_pkt->rh.r_nic_info.gmxport;
 	ls = (union oct_link_status *)get_rbd(recv_pkt->buffer_ptr[0]);
 
 	octeon_swap_8B_data((u64 *)ls, (sizeof(union oct_link_status)) >> 3);
-
-	update_link_status(oct->props[ifidx].netdev, ls);
+	for (i = 0; i < oct->ifcount; i++) {
+		if (oct->props[i].gmxport == gmxport) {
+			update_link_status(oct->props[i].netdev, ls);
+			break;
+		}
+	}
 
 nic_info_err:
 	for (i = 0; i < recv_pkt->buffer_count; i++)
@@ -3125,13 +3488,12 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 	struct liquidio_if_cfg_context *ctx;
 	struct liquidio_if_cfg_resp *resp;
 	struct octdev_props *props;
-	int retval, num_iqueues, num_oqueues, q_no;
-	u64 q_mask;
-	int num_cpus = num_online_cpus();
+	int retval, num_iqueues, num_oqueues;
 	union oct_nic_if_cfg if_cfg;
 	unsigned int base_queue;
 	unsigned int gmx_port_id;
 	u32 resp_size, ctx_size;
+	u32 ifidx_or_pfnum;
 
 	/* This is to handle link status changes */
 	octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC,
@@ -3167,14 +3529,12 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			CFG_GET_BASE_QUE_NIC_IF(octeon_get_conf(octeon_dev), i);
 		gmx_port_id =
 			CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i);
-		if (num_iqueues > num_cpus)
-			num_iqueues = num_cpus;
-		if (num_oqueues > num_cpus)
-			num_oqueues = num_cpus;
+		ifidx_or_pfnum = i;
+
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"requesting config for interface %d, iqs %d, oqs %d\n",
-			i, num_iqueues, num_oqueues);
-		ACCESS_ONCE(ctx->cond) = 0;
+			ifidx_or_pfnum, num_iqueues, num_oqueues);
+		WRITE_ONCE(ctx->cond, 0);
 		ctx->octeon_id = lio_get_device_id(octeon_dev);
 		init_waitqueue_head(&ctx->wc);
 
@@ -3183,16 +3543,19 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		if_cfg.s.num_oqueues = num_oqueues;
 		if_cfg.s.base_queue = base_queue;
 		if_cfg.s.gmx_port_id = gmx_port_id;
+
+		sc->iq_no = 0;
+
 		octeon_prepare_soft_command(octeon_dev, sc, OPCODE_NIC,
-					    OPCODE_NIC_IF_CFG, i,
+					    OPCODE_NIC_IF_CFG, 0,
 					    if_cfg.u64, 0);
 
 		sc->callback = if_cfg_callback;
 		sc->callback_arg = sc;
-		sc->wait_time = 1000;
+		sc->wait_time = 3000;
 
 		retval = octeon_send_soft_command(octeon_dev, sc);
-		if (retval) {
+		if (retval == IQ_SEND_FAILED) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"iq/oq config failed status: %x\n",
 				retval);
@@ -3234,8 +3597,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			goto setup_nic_dev_fail;
 		}
 
-		props = &octeon_dev->props[i];
-		props->netdev = netdev;
+		SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
 
 		if (num_iqueues > 1)
 			lionetdevops.ndo_select_queue = select_q;
@@ -3249,23 +3611,21 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		memset(lio, 0, sizeof(struct lio));
 
-		lio->linfo.ifidx = resp->cfg_info.ifidx;
-		lio->ifidx = resp->cfg_info.ifidx;
+		lio->ifidx = ifidx_or_pfnum;
+
+		props = &octeon_dev->props[i];
+		props->gmxport = resp->cfg_info.linfo.gmxport;
+		props->netdev = netdev;
 
 		lio->linfo.num_rxpciq = num_oqueues;
 		lio->linfo.num_txpciq = num_iqueues;
-		q_mask = resp->cfg_info.oqmask;
-		/* q_mask is 0-based and already verified mask is nonzero */
 		for (j = 0; j < num_oqueues; j++) {
-			q_no = __ffs64(q_mask);
-			q_mask &= (~(1UL << q_no));
-			lio->linfo.rxpciq[j] = q_no;
+			lio->linfo.rxpciq[j].u64 =
+				resp->cfg_info.linfo.rxpciq[j].u64;
 		}
-		q_mask = resp->cfg_info.iqmask;
 		for (j = 0; j < num_iqueues; j++) {
-			q_no = __ffs64(q_mask);
-			q_mask &= (~(1UL << q_no));
-			lio->linfo.txpciq[j] = q_no;
+			lio->linfo.txpciq[j].u64 =
+				resp->cfg_info.linfo.txpciq[j].u64;
 		}
 		lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr;
 		lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
@@ -3274,16 +3634,41 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
 		lio->dev_capability = NETIF_F_HIGHDMA
-				      | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-				      | NETIF_F_SG | NETIF_F_RXCSUM
-				      | NETIF_F_TSO | NETIF_F_TSO6
-				      | NETIF_F_LRO;
+				| NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
+				| NETIF_F_SG | NETIF_F_RXCSUM
+				| NETIF_F_GRO
+				| NETIF_F_TSO | NETIF_F_TSO6
+				| NETIF_F_LRO;
 		netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE);
 
-		netdev->features = lio->dev_capability;
+		/*  Copy of transmit encapsulation capabilities:
+		 *  TSO, TSO6, Checksums for this device
+		 */
+		lio->enc_dev_capability = NETIF_F_IP_CSUM
+					  | NETIF_F_IPV6_CSUM
+					  | NETIF_F_GSO_UDP_TUNNEL
+					  | NETIF_F_HW_CSUM | NETIF_F_SG
+					  | NETIF_F_RXCSUM
+					  | NETIF_F_TSO | NETIF_F_TSO6
+					  | NETIF_F_LRO;
+
+		netdev->hw_enc_features = (lio->enc_dev_capability &
+					   ~NETIF_F_LRO);
+
+		lio->dev_capability |= NETIF_F_GSO_UDP_TUNNEL;
+
 		netdev->vlan_features = lio->dev_capability;
+		/* Add any unchangeable hw features */
+		lio->dev_capability |=  NETIF_F_HW_VLAN_CTAG_FILTER |
+					NETIF_F_HW_VLAN_CTAG_RX |
+					NETIF_F_HW_VLAN_CTAG_TX;
+
+		netdev->features = (lio->dev_capability & ~NETIF_F_LRO);
 
 		netdev->hw_features = lio->dev_capability;
+		/*HW_VLAN_RX and HW_VLAN_FILTER is always on*/
+		netdev->hw_features = netdev->hw_features &
+			~NETIF_F_HW_VLAN_CTAG_RX;
 
 		/* Point to the  properties for octeon device to which this
 		 * interface belongs.
@@ -3291,7 +3676,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		lio->oct_dev = octeon_dev;
 		lio->octprops = props;
 		lio->netdev = netdev;
-		spin_lock_init(&lio->lock);
 
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"if%d gmx: %d hw_addr: 0x%llx\n", i,
@@ -3306,23 +3690,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		ether_addr_copy(netdev->dev_addr, mac);
 
-		if (setup_io_queues(octeon_dev, netdev)) {
+		/* By default all interfaces on a single Octeon uses the same
+		 * tx and rx queues
+		 */
+		lio->txq = lio->linfo.txpciq[0].s.q_no;
+		lio->rxq = lio->linfo.rxpciq[0].s.q_no;
+		if (setup_io_queues(octeon_dev, i)) {
 			dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
 			goto setup_nic_dev_fail;
 		}
 
 		ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
 
-		/* By default all interfaces on a single Octeon uses the same
-		 * tx and rx queues
-		 */
-		lio->txq = lio->linfo.txpciq[0];
-		lio->rxq = lio->linfo.rxpciq[0];
-
 		lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
 		lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
 
-		if (setup_glist(lio)) {
+		if (setup_glists(octeon_dev, lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Gather list allocation failed\n");
 			goto setup_nic_dev_fail;
@@ -3330,11 +3713,17 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		/* Register ethtool support */
 		liquidio_set_ethtool_ops(netdev);
+		octeon_dev->priv_flags = 0x0;
 
-		liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE);
+		if (netdev->features & NETIF_F_LRO)
+			liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+					     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+
+		liquidio_set_feature(netdev, OCTNET_CMD_ENABLE_VLAN_FILTER, 0);
 
 		if ((debug != -1) && (debug & NETIF_MSG_HW))
-			liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE);
+			liquidio_set_feature(netdev,
+					     OCTNET_CMD_VERBOSE_ENABLE, 0);
 
 		/* Register the network device with the OS */
 		if (register_netdev(netdev)) {
@@ -3346,16 +3735,19 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			"Setup NIC ifidx:%d mac:%02x%02x%02x%02x%02x%02x\n",
 			i, mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
 		netif_carrier_off(netdev);
-
-		if (lio->linfo.link.s.status) {
-			netif_carrier_on(netdev);
-			start_txq(netdev);
-		} else {
-			netif_carrier_off(netdev);
-		}
+		lio->link_changes++;
 
 		ifstate_set(lio, LIO_IFSTATE_REGISTERED);
 
+		/* Sending command to firmware to enable Rx checksum offload
+		 * by default at the time of setup of Liquidio driver for
+		 * this device
+		 */
+		liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_ENABLE);
+		liquidio_set_feature(netdev, OCTNET_CMD_TNL_TX_CSUM_CTL,
+				     OCTNET_CMD_TXCSUM_ENABLE);
+
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"NIC ifidx:%d Setup successful\n", i);
 
@@ -3386,7 +3778,7 @@ setup_nic_dev_fail:
 static int liquidio_init_nic_module(struct octeon_device *oct)
 {
 	struct oct_intrmod_cfg *intrmod_cfg;
-	int retval = 0;
+	int i, retval = 0;
 	int num_nic_ports = CFG_GET_NUM_NIC_PORTS(octeon_get_conf(oct));
 
 	dev_dbg(&oct->pci_dev->dev, "Initializing network interfaces\n");
@@ -3400,6 +3792,9 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
 	memset(oct->props, 0,
 	       sizeof(struct octdev_props) * num_nic_ports);
 
+	for (i = 0; i < MAX_OCTEON_LINKS; i++)
+		oct->props[i].gmxport = -1;
+
 	retval = setup_nic_devices(oct);
 	if (retval) {
 		dev_err(&oct->pci_dev->dev, "Setup NIC devices failed\n");
@@ -3410,15 +3805,19 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
 
 	/* Initialize interrupt moderation params */
 	intrmod_cfg = &((struct octeon_device *)oct)->intrmod;
-	intrmod_cfg->intrmod_enable = 1;
-	intrmod_cfg->intrmod_check_intrvl = LIO_INTRMOD_CHECK_INTERVAL;
-	intrmod_cfg->intrmod_maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR;
-	intrmod_cfg->intrmod_minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR;
-	intrmod_cfg->intrmod_maxcnt_trigger = LIO_INTRMOD_MAXCNT_TRIGGER;
-	intrmod_cfg->intrmod_maxtmr_trigger = LIO_INTRMOD_MAXTMR_TRIGGER;
-	intrmod_cfg->intrmod_mintmr_trigger = LIO_INTRMOD_MINTMR_TRIGGER;
-	intrmod_cfg->intrmod_mincnt_trigger = LIO_INTRMOD_MINCNT_TRIGGER;
-
+	intrmod_cfg->rx_enable = 1;
+	intrmod_cfg->check_intrvl =   LIO_INTRMOD_CHECK_INTERVAL;
+	intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR;
+	intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR;
+	intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER;
+	intrmod_cfg->rx_maxtmr_trigger = LIO_INTRMOD_RXMAXTMR_TRIGGER;
+	intrmod_cfg->rx_mintmr_trigger = LIO_INTRMOD_RXMINTMR_TRIGGER;
+	intrmod_cfg->rx_mincnt_trigger = LIO_INTRMOD_RXMINCNT_TRIGGER;
+	intrmod_cfg->tx_enable = 1;
+	intrmod_cfg->tx_maxcnt_trigger = LIO_INTRMOD_TXMAXCNT_TRIGGER;
+	intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER;
+	intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
+	intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
 	dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n");
 
 	return retval;
@@ -3481,6 +3880,7 @@ static void nic_starter(struct work_struct *work)
 static int octeon_device_init(struct octeon_device *octeon_dev)
 {
 	int j, ret;
+	char bootcmd[] = "\n";
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)octeon_dev->priv;
 	atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE);
@@ -3558,6 +3958,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 		/* Release any previously allocated queues */
 		for (j = 0; j < octeon_dev->num_oqs; j++)
 			octeon_delete_droq(octeon_dev, j);
+		return 1;
 	}
 
 	atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE);
@@ -3580,7 +3981,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	/* Setup the interrupt handler and record the INT SUM register address
 	 */
-	octeon_setup_interrupt(octeon_dev);
+	if (octeon_setup_interrupt(octeon_dev))
+		return 1;
 
 	/* Enable Octeon device interrupts */
 	octeon_dev->fn_list.enable_interrupt(octeon_dev->chip);
@@ -3592,14 +3994,19 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n");
 
-	if (ddr_timeout == 0) {
-		dev_info(&octeon_dev->pci_dev->dev,
-			 "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n");
-	}
+	if (ddr_timeout == 0)
+		dev_info(&octeon_dev->pci_dev->dev, "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n");
 
 	schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS);
 
 	/* Wait for the octeon to initialize DDR after the soft-reset. */
+	while (ddr_timeout == 0) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (schedule_timeout(HZ / 10)) {
+			/* user probably pressed Control-C */
+			return 1;
+		}
+	}
 	ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout);
 	if (ret) {
 		dev_err(&octeon_dev->pci_dev->dev,
@@ -3613,6 +4020,9 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 		return 1;
 	}
 
+	/* Divert uboot to take commands from host instead. */
+	ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50);
+
 	dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n");
 	ret = octeon_init_consoles(octeon_dev);
 	if (ret) {
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 0ac347ccc8ba..199a8b9c7dc5 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -30,10 +30,10 @@
 
 #include "octeon_config.h"
 
-#define LIQUIDIO_VERSION        "1.1.9"
-#define LIQUIDIO_MAJOR_VERSION  1
-#define LIQUIDIO_MINOR_VERSION  1
-#define LIQUIDIO_MICRO_VERSION  9
+#define LIQUIDIO_BASE_VERSION   "1.4"
+#define LIQUIDIO_MICRO_VERSION  ".1"
+#define LIQUIDIO_PACKAGE ""
+#define LIQUIDIO_VERSION  "1.4.1"
 
 #define CONTROL_IQ 0
 /** Tag types used by Octeon cores in its work. */
@@ -174,9 +174,11 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 /*------------------------- End Scatter/Gather ---------------------------*/
 
 #define   OCTNET_FRM_PTP_HEADER_SIZE  8
-#define   OCTNET_FRM_HEADER_SIZE     30 /* PTP timestamp + VLAN + Ethernet */
 
-#define   OCTNET_MIN_FRM_SIZE        (64  + OCTNET_FRM_PTP_HEADER_SIZE)
+#define   OCTNET_FRM_HEADER_SIZE     22 /* VLAN + Ethernet */
+
+#define   OCTNET_MIN_FRM_SIZE        64
+
 #define   OCTNET_MAX_FRM_SIZE        (16000 + OCTNET_FRM_HEADER_SIZE)
 
 #define   OCTNET_DEFAULT_FRM_SIZE    (1500 + OCTNET_FRM_HEADER_SIZE)
@@ -212,6 +214,17 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 #define   OCTNET_CMD_VERBOSE_ENABLE   0x14
 #define   OCTNET_CMD_VERBOSE_DISABLE  0x15
 
+#define   OCTNET_CMD_ENABLE_VLAN_FILTER 0x16
+#define   OCTNET_CMD_ADD_VLAN_FILTER  0x17
+#define   OCTNET_CMD_DEL_VLAN_FILTER  0x18
+#define   OCTNET_CMD_VXLAN_PORT_CONFIG 0x19
+#define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
+#define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
+#define   OCTNET_CMD_RXCSUM_ENABLE     0x0
+#define   OCTNET_CMD_RXCSUM_DISABLE    0x1
+#define   OCTNET_CMD_TXCSUM_ENABLE     0x0
+#define   OCTNET_CMD_TXCSUM_DISABLE    0x1
+
 /* RX(packets coming from wire) Checksum verification flags */
 /* TCP/UDP csum */
 #define   CNNIC_L4SUM_VERIFIED             0x1
@@ -258,19 +271,19 @@ union octnet_cmd {
 
 		u64 more:6; /* How many udd words follow the command */
 
-		u64 param1:29;
+		u64 reserved:29;
 
-		u64 param2:16;
+		u64 param1:16;
 
-		u64 param3:8;
+		u64 param2:8;
 
 #else
 
-		u64 param3:8;
+		u64 param2:8;
 
-		u64 param2:16;
+		u64 param1:16;
 
-		u64 param1:29;
+		u64 reserved:29;
 
 		u64 more:6;
 
@@ -283,8 +296,140 @@ union octnet_cmd {
 
 #define   OCTNET_CMD_SIZE     (sizeof(union octnet_cmd))
 
+/* Instruction Header(DPI) - for OCTEON-III models */
+struct  octeon_instr_ih3 {
+#ifdef __BIG_ENDIAN_BITFIELD
+
+	/** Reserved3 */
+	u64     reserved3:1;
+
+	/** Gather indicator 1=gather*/
+	u64     gather:1;
+
+	/** Data length OR no. of entries in gather list */
+	u64     dlengsz:14;
+
+	/** Front Data size */
+	u64     fsz:6;
+
+	/** Reserved2 */
+	u64     reserved2:4;
+
+	/** PKI port kind - PKIND */
+	u64     pkind:6;
+
+	/** Reserved1 */
+	u64     reserved1:32;
+
+#else
+	/** Reserved1 */
+	u64     reserved1:32;
+
+	/** PKI port kind - PKIND */
+	u64     pkind:6;
+
+	/** Reserved2 */
+	u64     reserved2:4;
+
+	/** Front Data size */
+	u64     fsz:6;
+
+	/** Data length OR no. of entries in gather list */
+	u64     dlengsz:14;
+
+	/** Gather indicator 1=gather*/
+	u64     gather:1;
+
+	/** Reserved3 */
+	u64     reserved3:1;
+
+#endif
+};
+
+/* Optional PKI Instruction Header(PKI IH) - for OCTEON-III models */
+/** BIG ENDIAN format.   */
+struct  octeon_instr_pki_ih3 {
+#ifdef __BIG_ENDIAN_BITFIELD
+
+	/** Wider bit */
+	u64     w:1;
+
+	/** Raw mode indicator 1 = RAW */
+	u64     raw:1;
+
+	/** Use Tag */
+	u64     utag:1;
+
+	/** Use QPG */
+	u64     uqpg:1;
+
+	/** Reserved2 */
+	u64     reserved2:1;
+
+	/** Parse Mode */
+	u64     pm:3;
+
+	/** Skip Length */
+	u64     sl:8;
+
+	/** Use Tag Type */
+	u64     utt:1;
+
+	/** Tag type */
+	u64     tagtype:2;
+
+	/** Reserved1 */
+	u64     reserved1:2;
+
+	/** QPG Value */
+	u64     qpg:11;
+
+	/** Tag Value */
+	u64     tag:32;
+
+#else
+
+	/** Tag Value */
+	u64     tag:32;
+
+	/** QPG Value */
+	u64     qpg:11;
+
+	/** Reserved1 */
+	u64     reserved1:2;
+
+	/** Tag type */
+	u64     tagtype:2;
+
+	/** Use Tag Type */
+	u64     utt:1;
+
+	/** Skip Length */
+	u64     sl:8;
+
+	/** Parse Mode */
+	u64     pm:3;
+
+	/** Reserved2 */
+	u64     reserved2:1;
+
+	/** Use QPG */
+	u64     uqpg:1;
+
+	/** Use Tag */
+	u64     utag:1;
+
+	/** Raw mode indicator 1 = RAW */
+	u64     raw:1;
+
+	/** Wider bit */
+	u64     w:1;
+#endif
+
+};
+
 /** Instruction Header */
-struct octeon_instr_ih {
+struct octeon_instr_ih2 {
 #ifdef __BIG_ENDIAN_BITFIELD
 	/** Raw mode indicator 1 = RAW */
 	u64 raw:1;
@@ -348,15 +493,15 @@ struct octeon_instr_irh {
 	u64 opcode:4;
 	u64 rflag:1;
 	u64 subcode:7;
-	u64 len:3;
-	u64 rid:13;
-	u64 reserved:4;
+	u64 vlan:12;
+	u64 priority:3;
+	u64 reserved:5;
 	u64 ossp:32;             /* opcode/subcode specific parameters */
 #else
 	u64 ossp:32;             /* opcode/subcode specific parameters */
-	u64 reserved:4;
-	u64 rid:13;
-	u64 len:3;
+	u64 reserved:5;
+	u64 priority:3;
+	u64 vlan:12;
 	u64 subcode:7;
 	u64 rflag:1;
 	u64 opcode:4;
@@ -383,75 +528,77 @@ union octeon_rh {
 	struct {
 		u64 opcode:4;
 		u64 subcode:8;
-		u64 len:3;       /** additional 64-bit words */
-		u64 rid:13;      /** request id in response to pkt sent by host */
-		u64 reserved:4;
-		u64 ossp:32;     /** opcode/subcode specific parameters */
+		u64 len:3;     /** additional 64-bit words */
+		u64 reserved:17;
+		u64 ossp:32;   /** opcode/subcode specific parameters */
 	} r;
 	struct {
 		u64 opcode:4;
 		u64 subcode:8;
-		u64 len:3;       /** additional 64-bit words */
-		u64 rid:13;      /** request id in response to pkt sent by host */
-		u64 extra:24;
-		u64 link:8;
+		u64 len:3;     /** additional 64-bit words */
+		u64 extra:28;
+		u64 vlan:12;
+		u64 priority:3;
 		u64 csum_verified:3;     /** checksum verified. */
 		u64 has_hwtstamp:1;      /** Has hardware timestamp. 1 = yes. */
+		u64 encap_on:1;
+		u64 has_hash:1;          /** Has hash (rth or rss). 1 = yes. */
 	} r_dh;
 	struct {
 		u64 opcode:4;
 		u64 subcode:8;
-		u64 len:3;       /** additional 64-bit words */
-		u64 rid:13;      /** request id in response to pkt sent by host */
+		u64 len:3;     /** additional 64-bit words */
+		u64 reserved:11;
 		u64 num_gmx_ports:8;
-		u64 max_nic_ports:8;
+		u64 max_nic_ports:10;
 		u64 app_cap_flags:4;
-		u64 app_mode:16;
+		u64 app_mode:8;
+		u64 pkind:8;
 	} r_core_drv_init;
 	struct {
 		u64 opcode:4;
 		u64 subcode:8;
 		u64 len:3;       /** additional 64-bit words */
-		u64 rid:13;
-		u64 reserved:4;
+		u64 reserved:8;
 		u64 extra:25;
-		u64 ifidx:7;
+		u64 gmxport:16;
 	} r_nic_info;
 #else
 	u64 u64;
 	struct {
 		u64 ossp:32;  /** opcode/subcode specific parameters */
-		u64 reserved:4;
-		u64 rid:13;   /** req id in response to pkt sent by host */
+		u64 reserved:17;
 		u64 len:3;    /** additional 64-bit words */
 		u64 subcode:8;
 		u64 opcode:4;
 	} r;
 	struct {
+		u64 has_hash:1;          /** Has hash (rth or rss). 1 = yes. */
+		u64 encap_on:1;
 		u64 has_hwtstamp:1;      /** 1 = has hwtstamp */
 		u64 csum_verified:3;     /** checksum verified. */
-		u64 link:8;
-		u64 extra:24;
-		u64 rid:13;   /** req id in response to pkt sent by host */
+		u64 priority:3;
+		u64 vlan:12;
+		u64 extra:28;
 		u64 len:3;    /** additional 64-bit words */
 		u64 subcode:8;
 		u64 opcode:4;
 	} r_dh;
 	struct {
-		u64 app_mode:16;
+		u64 pkind:8;
+		u64 app_mode:8;
 		u64 app_cap_flags:4;
-		u64 max_nic_ports:8;
+		u64 max_nic_ports:10;
 		u64 num_gmx_ports:8;
-		u64 rid:13;
+		u64 reserved:11;
 		u64 len:3;       /** additional 64-bit words */
 		u64 subcode:8;
 		u64 opcode:4;
 	} r_core_drv_init;
 	struct {
-		u64 ifidx:7;
+		u64 gmxport:16;
 		u64 extra:25;
-		u64 reserved:4;
-		u64 rid:13;
+		u64 reserved:8;
 		u64 len:3;       /** additional 64-bit words */
 		u64 subcode:8;
 		u64 opcode:4;
@@ -461,30 +608,25 @@ union octeon_rh {
 
 #define  OCT_RH_SIZE   (sizeof(union  octeon_rh))
 
-#define OCT_PKT_PARAM_IPV4OPTS   1
-#define OCT_PKT_PARAM_IPV6EXTHDR 2
-
 union octnic_packet_params {
 	u32 u32;
 	struct {
 #ifdef __BIG_ENDIAN_BITFIELD
-		u32 reserved:6;
+		u32 reserved:24;
+		u32 ip_csum:1;		/* Perform IP header checksum(s) */
+		/* Perform Outer transport header checksum */
+		u32 transport_csum:1;
+		/* Find tunnel, and perform transport csum. */
 		u32 tnl_csum:1;
-		u32 ip_csum:1;
-		u32 ipv4opts_ipv6exthdr:2;
-		u32 ipsec_ops:4;
-		u32 tsflag:1;
-		u32 csoffset:9;
-		u32 ifidx:8;
+		u32 tsflag:1;		/* Timestamp this packet */
+		u32 ipsec_ops:4;	/* IPsec operation */
 #else
-		u32 ifidx:8;
-		u32 csoffset:9;
-		u32 tsflag:1;
 		u32 ipsec_ops:4;
-		u32 ipv4opts_ipv6exthdr:2;
-		u32 ip_csum:1;
+		u32 tsflag:1;
 		u32 tnl_csum:1;
-		u32 reserved:6;
+		u32 transport_csum:1;
+		u32 ip_csum:1;
+		u32 reserved:24;
 #endif
 	} s;
 };
@@ -496,56 +638,96 @@ union oct_link_status {
 	struct {
 #ifdef __BIG_ENDIAN_BITFIELD
 		u64 duplex:8;
-		u64 status:8;
 		u64 mtu:16;
 		u64 speed:16;
+		u64 link_up:1;
 		u64 autoneg:1;
-		u64 interface:4;
+		u64 if_mode:5;
 		u64 pause:1;
-		u64 reserved:10;
+		u64 flashing:1;
+		u64 reserved:15;
 #else
-		u64 reserved:10;
+		u64 reserved:15;
+		u64 flashing:1;
 		u64 pause:1;
-		u64 interface:4;
+		u64 if_mode:5;
 		u64 autoneg:1;
+		u64 link_up:1;
 		u64 speed:16;
 		u64 mtu:16;
-		u64 status:8;
 		u64 duplex:8;
 #endif
 	} s;
 };
 
+/** The txpciq info passed to host from the firmware */
+
+union oct_txpciq {
+	u64 u64;
+
+	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+		u64 q_no:8;
+		u64 port:8;
+		u64 pkind:6;
+		u64 use_qpg:1;
+		u64 qpg:11;
+		u64 reserved:30;
+#else
+		u64 reserved:30;
+		u64 qpg:11;
+		u64 use_qpg:1;
+		u64 pkind:6;
+		u64 port:8;
+		u64 q_no:8;
+#endif
+	} s;
+};
+
+/** The rxpciq info passed to host from the firmware */
+
+union oct_rxpciq {
+	u64 u64;
+
+	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+		u64 q_no:8;
+		u64 reserved:56;
+#else
+		u64 reserved:56;
+		u64 q_no:8;
+#endif
+	} s;
+};
+
 /** Information for a OCTEON ethernet interface shared between core & host. */
 struct oct_link_info {
 	union oct_link_status link;
 	u64 hw_addr;
 
 #ifdef __BIG_ENDIAN_BITFIELD
-	u16 gmxport;
-	u8 rsvd[3];
-	u8 num_txpciq;
-	u8 num_rxpciq;
-	u8 ifidx;
+	u64 gmxport:16;
+	u64 rsvd:32;
+	u64 num_txpciq:8;
+	u64 num_rxpciq:8;
 #else
-	u8 ifidx;
-	u8 num_rxpciq;
-	u8 num_txpciq;
-	u8 rsvd[3];
-	u16 gmxport;
+	u64 num_rxpciq:8;
+	u64 num_txpciq:8;
+	u64 rsvd:32;
+	u64 gmxport:16;
 #endif
 
-	u8 txpciq[MAX_IOQS_PER_NICIF];
-	u8 rxpciq[MAX_IOQS_PER_NICIF];
+	union oct_txpciq txpciq[MAX_IOQS_PER_NICIF];
+	union oct_rxpciq rxpciq[MAX_IOQS_PER_NICIF];
 };
 
 #define OCT_LINK_INFO_SIZE   (sizeof(struct oct_link_info))
 
 struct liquidio_if_cfg_info {
-	u64 ifidx;
 	u64 iqmask; /** mask for IQs enabled for  the port */
 	u64 oqmask; /** mask for OQs enabled for the port */
 	struct oct_link_info linfo; /** initial link information */
+	char   liquidio_firmware_version[32];
 };
 
 /** Stats for each NIC port in RX direction. */
@@ -570,10 +752,18 @@ struct nic_rx_stats {
 	u64 fw_err_pko;
 	u64 fw_err_link;
 	u64 fw_err_drop;
+	u64 fw_rx_vxlan;
+	u64 fw_rx_vxlan_err;
+
+	/* LRO */
 	u64 fw_lro_pkts;   /* Number of packets that are LROed      */
 	u64 fw_lro_octs;   /* Number of octets that are LROed       */
 	u64 fw_total_lro;  /* Number of LRO packets formed          */
 	u64 fw_lro_aborts; /* Number of times lRO of packet aborted */
+	u64 fw_lro_aborts_port;
+	u64 fw_lro_aborts_seq;
+	u64 fw_lro_aborts_tsval;
+	u64 fw_lro_aborts_timer;
 	/* intrmod: packet forward rate */
 	u64 fwd_rate;
 };
@@ -597,9 +787,14 @@ struct nic_tx_stats {
 	/* firmware stats */
 	u64 fw_total_sent;
 	u64 fw_total_fwd;
+	u64 fw_total_fwd_bytes;
 	u64 fw_err_pko;
 	u64 fw_err_link;
 	u64 fw_err_drop;
+	u64 fw_err_tso;
+	u64 fw_tso;		/* number of tso requests */
+	u64 fw_tso_fwd;		/* number of packets segmented in tso */
+	u64 fw_tx_vxlan;
 };
 
 struct oct_link_stats {
@@ -630,23 +825,44 @@ struct oct_mdio_cmd {
 
 #define OCT_LINK_STATS_SIZE   (sizeof(struct oct_link_stats))
 
+/* intrmod: max. packet rate threshold */
+#define LIO_INTRMOD_MAXPKT_RATETHR	196608
+/* intrmod: min. packet rate threshold */
+#define LIO_INTRMOD_MINPKT_RATETHR	9216
+/* intrmod: max. packets to trigger interrupt */
+#define LIO_INTRMOD_RXMAXCNT_TRIGGER	384
+/* intrmod: min. packets to trigger interrupt */
+#define LIO_INTRMOD_RXMINCNT_TRIGGER	1
+/* intrmod: max. time to trigger interrupt */
+#define LIO_INTRMOD_RXMAXTMR_TRIGGER	128
+/* 66xx:intrmod: min. time to trigger interrupt
+ * (value of 1 is optimum for TCP_RR)
+ */
+#define LIO_INTRMOD_RXMINTMR_TRIGGER	1
+
+/* intrmod: max. packets to trigger interrupt */
+#define LIO_INTRMOD_TXMAXCNT_TRIGGER	64
+/* intrmod: min. packets to trigger interrupt */
+#define LIO_INTRMOD_TXMINCNT_TRIGGER	0
+
+/* intrmod: poll interval in seconds */
 #define LIO_INTRMOD_CHECK_INTERVAL  1
-#define LIO_INTRMOD_MAXPKT_RATETHR  196608 /* max pkt rate threshold */
-#define LIO_INTRMOD_MINPKT_RATETHR  9216   /* min pkt rate threshold */
-#define LIO_INTRMOD_MAXCNT_TRIGGER  384    /* max pkts to trigger interrupt */
-#define LIO_INTRMOD_MINCNT_TRIGGER  1      /* min pkts to trigger interrupt */
-#define LIO_INTRMOD_MAXTMR_TRIGGER  128    /* max time to trigger interrupt */
-#define LIO_INTRMOD_MINTMR_TRIGGER  32     /* min time to trigger interrupt */
 
 struct oct_intrmod_cfg {
-	u64 intrmod_enable;
-	u64 intrmod_check_intrvl;
-	u64 intrmod_maxpkt_ratethr;
-	u64 intrmod_minpkt_ratethr;
-	u64 intrmod_maxcnt_trigger;
-	u64 intrmod_maxtmr_trigger;
-	u64 intrmod_mincnt_trigger;
-	u64 intrmod_mintmr_trigger;
+	u64 rx_enable;
+	u64 tx_enable;
+	u64 check_intrvl;
+	u64 maxpkt_ratethr;
+	u64 minpkt_ratethr;
+	u64 rx_maxcnt_trigger;
+	u64 rx_mincnt_trigger;
+	u64 rx_maxtmr_trigger;
+	u64 rx_mintmr_trigger;
+	u64 tx_mincnt_trigger;
+	u64 tx_maxcnt_trigger;
+	u64 rx_frames;
+	u64 tx_frames;
+	u64 rx_usecs;
 };
 
 #define BASE_QUEUE_NOT_REQUESTED 65535
@@ -659,9 +875,9 @@ union oct_nic_if_cfg {
 		u64 num_iqueues:16;
 		u64 num_oqueues:16;
 		u64 gmx_port_id:8;
-		u64 reserved:8;
+		u64 vf_id:8;
 #else
-		u64 reserved:8;
+		u64 vf_id:8;
 		u64 gmx_port_id:8;
 		u64 num_oqueues:16;
 		u64 num_iqueues:16;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index 62a8dd5cd3dc..b3396e3a8bab 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -37,7 +37,7 @@
 /* Maximum octeon devices defined as MAX_OCTEON_NICIF to support
  * multiple(<= MAX_OCTEON_NICIF) Miniports
  */
-#define   MAX_OCTEON_NICIF             32
+#define   MAX_OCTEON_NICIF             128
 #define   MAX_OCTEON_DEVICES           MAX_OCTEON_NICIF
 #define   MAX_OCTEON_LINKS	       MAX_OCTEON_NICIF
 #define   MAX_OCTEON_MULTICAST_ADDR    32
@@ -135,7 +135,7 @@
 #define CFG_GET_IS_SLI_BP_ON(cfg)                ((cfg)->misc.enable_sli_oq_bp)
 
 /* Max IOQs per OCTEON Link */
-#define MAX_IOQS_PER_NICIF              32
+#define MAX_IOQS_PER_NICIF              64
 
 enum lio_card_type {
 	LIO_210SV = 0, /* Two port, 66xx */
@@ -226,7 +226,7 @@ struct octeon_oq_config {
 	 */
 	u64 refill_threshold:16;
 
-	/** If set, the Output queue uses info-pointer mode. (Default: 1 ) */
+	/** If set, the Output queue uses info-pointer mode. (Default: 1) */
 	u64 info_ptr:32;
 
 	/* Max number of OQs available */
@@ -236,7 +236,7 @@ struct octeon_oq_config {
 	/* Max number of OQs available */
 	u64 max_oqs:8;
 
-	/** If set, the Output queue uses info-pointer mode. (Default: 1 ) */
+	/** If set, the Output queue uses info-pointer mode. (Default: 1) */
 	u64 info_ptr:32;
 
 	/** The number of buffers that were consumed during packet processing by
@@ -416,9 +416,11 @@ struct octeon_config {
 #define DISPATCH_LIST_SIZE                      BIT(OPCODE_MASK_BITS)
 
 /* Maximum number of Octeon Instruction (command) queues */
-#define MAX_OCTEON_INSTR_QUEUES         CN6XXX_MAX_INPUT_QUEUES
+#define MAX_OCTEON_INSTR_QUEUES(oct)         CN6XXX_MAX_INPUT_QUEUES
+/* Maximum number of Octeon Output queues */
+#define MAX_OCTEON_OUTPUT_QUEUES(oct)         CN6XXX_MAX_OUTPUT_QUEUES
 
-/* Maximum number of Octeon Instruction (command) queues */
-#define MAX_OCTEON_OUTPUT_QUEUES        CN6XXX_MAX_OUTPUT_QUEUES
+#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES       CN6XXX_MAX_INPUT_QUEUES
+#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES      CN6XXX_MAX_OUTPUT_QUEUES
 
 #endif /* __OCTEON_CONFIG_H__  */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index 466147e409c9..bbb50ea66f16 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -23,27 +23,14 @@
 /**
  * @file octeon_console.c
  */
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
-#include "cn66xx_regs.h"
-#include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
 #include "octeon_mem_ops.h"
 
 static void octeon_remote_lock(void);
@@ -51,6 +38,8 @@ static void octeon_remote_unlock(void);
 static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 					     const char *name,
 					     u32 flags);
+static int octeon_console_read(struct octeon_device *oct, u32 console_num,
+			       char *buffer, u32 buf_size);
 
 #define MIN(a, b) min((a), (b))
 #define CAST_ULL(v) ((u64)(v))
@@ -170,8 +159,8 @@ struct octeon_pci_console_desc {
 				offsetof(struct cvmx_bootmem_desc, field),   \
 				SIZEOF_FIELD(struct cvmx_bootmem_desc, field))
 
-#define __cvmx_bootmem_lock(flags)
-#define __cvmx_bootmem_unlock(flags)
+#define __cvmx_bootmem_lock(flags)	(flags = flags)
+#define __cvmx_bootmem_unlock(flags)	(flags = flags)
 
 /**
  * This macro returns a member of the
@@ -234,7 +223,7 @@ static void CVMX_BOOTMEM_NAMED_GET_NAME(struct octeon_device *oct,
 					u32 len)
 {
 	addr += offsetof(struct cvmx_bootmem_named_block_desc, name);
-	octeon_pci_read_core_mem(oct, addr, str, len);
+	octeon_pci_read_core_mem(oct, addr, (u8 *)str, len);
 	str[len] = 0;
 }
 
@@ -323,6 +312,9 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 			if (name && named_size) {
 				char *name_tmp =
 					kmalloc(name_length + 1, GFP_KERNEL);
+				if (!name_tmp)
+					break;
+
 				CVMX_BOOTMEM_NAMED_GET_NAME(oct, named_addr,
 							    name_tmp,
 							    name_length);
@@ -383,7 +375,7 @@ static void octeon_remote_unlock(void)
 int octeon_console_send_cmd(struct octeon_device *oct, char *cmd_str,
 			    u32 wait_hundredths)
 {
-	u32 len = strlen(cmd_str);
+	u32 len = (u32)strlen(cmd_str);
 
 	dev_dbg(&oct->pci_dev->dev, "sending \"%s\" to bootloader\n", cmd_str);
 
@@ -440,8 +432,7 @@ int octeon_wait_for_bootloader(struct octeon_device *oct,
 }
 
 static void octeon_console_handle_result(struct octeon_device *oct,
-					 size_t console_num,
-					 char *buffer, s32 bytes_read)
+					 size_t console_num)
 {
 	struct octeon_console *console;
 
@@ -492,7 +483,7 @@ static void check_console(struct work_struct *work)
 	struct octeon_console *console;
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
-	size_t console_num = wk->ctxul;
+	u32 console_num = (u32)wk->ctxul;
 	u32 delay;
 
 	console = &oct->console[console_num];
@@ -505,20 +496,17 @@ static void check_console(struct work_struct *work)
 		 */
 		bytes_read =
 			octeon_console_read(oct, console_num, console_buffer,
-					    sizeof(console_buffer) - 1, 0);
+					    sizeof(console_buffer) - 1);
 		if (bytes_read > 0) {
 			total_read += bytes_read;
-			if (console->waiting) {
-				octeon_console_handle_result(oct, console_num,
-							     console_buffer,
-							     bytes_read);
-			}
+			if (console->waiting)
+				octeon_console_handle_result(oct, console_num);
 			if (octeon_console_debug_enabled(console_num)) {
 				output_console_line(oct, console, console_num,
 						    console_buffer, bytes_read);
 			}
 		} else if (bytes_read < 0) {
-			dev_err(&oct->pci_dev->dev, "Error reading console %lu, ret=%d\n",
+			dev_err(&oct->pci_dev->dev, "Error reading console %u, ret=%d\n",
 				console_num, bytes_read);
 		}
 
@@ -530,7 +518,7 @@ static void check_console(struct work_struct *work)
 	 */
 	if (octeon_console_debug_enabled(console_num) &&
 	    (total_read == 0) && (console->leftover[0])) {
-		dev_info(&oct->pci_dev->dev, "%lu: %s\n",
+		dev_info(&oct->pci_dev->dev, "%u: %s\n",
 			 console_num, console->leftover);
 		console->leftover[0] = '\0';
 	}
@@ -675,8 +663,8 @@ static inline int octeon_console_avail_bytes(u32 buffer_size,
 	       octeon_console_free_bytes(buffer_size, wr_idx, rd_idx);
 }
 
-int octeon_console_read(struct octeon_device *oct, u32 console_num,
-			char *buffer, u32 buf_size, u32 flags)
+static int octeon_console_read(struct octeon_device *oct, u32 console_num,
+			       char *buffer, u32 buf_size)
 {
 	int bytes_to_read;
 	u32 rd_idx, wr_idx;
@@ -712,7 +700,7 @@ int octeon_console_read(struct octeon_device *oct, u32 console_num,
 		bytes_to_read = console->buffer_size - rd_idx;
 
 	octeon_pci_read_core_mem(oct, console->output_base_addr + rd_idx,
-				 buffer, bytes_to_read);
+				 (u8 *)buffer, bytes_to_read);
 	octeon_write_device_mem32(oct, console->addr +
 				  offsetof(struct octeon_pci_console,
 					   output_read_index),
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 8e23e3fad662..0eb504a4379a 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -19,28 +19,19 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/crc32.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
 #include "liquidio_image.h"
 #include "octeon_mem_ops.h"
 
@@ -449,10 +440,10 @@ static struct octeon_config_ptr {
 };
 
 static char oct_dev_state_str[OCT_DEV_STATES + 1][32] = {
-	"BEGIN",	"PCI-MAP-DONE",	      "DISPATCH-INIT-DONE",
+	"BEGIN", "PCI-MAP-DONE", "DISPATCH-INIT-DONE",
 	"IQ-INIT-DONE", "SCBUFF-POOL-INIT-DONE", "RESPLIST-INIT-DONE",
 	"DROQ-INIT-DONE", "IO-QUEUES-INIT-DONE", "CONSOLE-INIT-DONE",
-	"HOST-READY",	"CORE-READY",	      "RUNNING",	   "IN-RESET",
+	"HOST-READY", "CORE-READY", "RUNNING", "IN-RESET",
 	"INVALID"
 };
 
@@ -550,17 +541,19 @@ static char *get_oct_app_string(u32 app_mode)
 	return oct_dev_app_str[CVM_DRV_INVALID_APP - CVM_DRV_APP_START];
 }
 
+u8 fbuf[4 * 1024 * 1024];
+
 int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
 			     size_t size)
 {
 	int ret = 0;
-	u8 *p;
-	u8 *buffer;
+	u8 *p = fbuf;
 	u32 crc32_result;
 	u64 load_addr;
 	u32 image_len;
 	struct octeon_firmware_file_header *h;
-	u32 i;
+	u32 i, rem, base_len = strlen(LIQUIDIO_BASE_VERSION);
+	char *base;
 
 	if (size < sizeof(struct octeon_firmware_file_header)) {
 		dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n",
@@ -576,19 +569,26 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
 		return -EINVAL;
 	}
 
-	crc32_result =
-		crc32(~0, data,
-		      sizeof(struct octeon_firmware_file_header) -
-		      sizeof(u32)) ^ ~0U;
+	crc32_result = crc32((unsigned int)~0, data,
+			     sizeof(struct octeon_firmware_file_header) -
+			     sizeof(u32)) ^ ~0U;
 	if (crc32_result != be32_to_cpu(h->crc32)) {
 		dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n",
 			crc32_result, be32_to_cpu(h->crc32));
 		return -EINVAL;
 	}
 
-	if (memcmp(LIQUIDIO_VERSION, h->version, strlen(LIQUIDIO_VERSION))) {
-		dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s, got %s.\n",
-			LIQUIDIO_VERSION, h->version);
+	if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) {
+		dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n",
+			LIQUIDIO_PACKAGE, h->version);
+		return -EINVAL;
+	}
+
+	base = h->version + strlen(LIQUIDIO_PACKAGE);
+	ret = memcmp(LIQUIDIO_BASE_VERSION, base, base_len);
+	if (ret) {
+		dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n",
+			LIQUIDIO_BASE_VERSION, base);
 		return -EINVAL;
 	}
 
@@ -602,58 +602,58 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
 	snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s",
 		 h->version);
 
-	buffer = kmemdup(data, size, GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	p = buffer + sizeof(struct octeon_firmware_file_header);
+	data += sizeof(struct octeon_firmware_file_header);
 
+	dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__,
+		 be32_to_cpu(h->num_images));
 	/* load all images */
 	for (i = 0; i < be32_to_cpu(h->num_images); i++) {
 		load_addr = be64_to_cpu(h->desc[i].addr);
 		image_len = be32_to_cpu(h->desc[i].len);
 
-		/* validate the image */
-		crc32_result = crc32(~0, p, image_len) ^ ~0U;
-		if (crc32_result != be32_to_cpu(h->desc[i].crc32)) {
-			dev_err(&oct->pci_dev->dev,
-				"Firmware CRC mismatch in image %d (0x%08x != 0x%08x).\n",
-				i, crc32_result,
-				be32_to_cpu(h->desc[i].crc32));
-			ret = -EINVAL;
-			goto done_downloading;
-		}
+		dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n",
+			 image_len, load_addr);
 
-		/* download the image */
-		octeon_pci_write_core_mem(oct, load_addr, p, image_len);
+		/* Write in 4MB chunks*/
+		rem = image_len;
 
-		p += image_len;
-		dev_dbg(&oct->pci_dev->dev,
-			"Downloaded image %d (%d bytes) to address 0x%016llx\n",
-			i, image_len, load_addr);
+		while (rem) {
+			if (rem < (4 * 1024 * 1024))
+				size = rem;
+			else
+				size = 4 * 1024 * 1024;
+
+			memcpy(p, data, size);
+
+			/* download the image */
+			octeon_pci_write_core_mem(oct, load_addr, p, (u32)size);
+
+			data += size;
+			rem -= (u32)size;
+			load_addr += size;
+		}
 	}
+	dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n",
+		 h->bootcmd);
 
 	/* Invoke the bootcmd */
 	ret = octeon_console_send_cmd(oct, h->bootcmd, 50);
 
-done_downloading:
-	kfree(buffer);
-
-	return ret;
+	return 0;
 }
 
 void octeon_free_device_mem(struct octeon_device *oct)
 {
-	u32 i;
+	int i;
 
-	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
-		/* could check  mask as well */
-		vfree(oct->droq[i]);
+	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+		if (oct->io_qmask.oq & (1ULL << i))
+			vfree(oct->droq[i]);
 	}
 
-	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
-		/* could check mask as well */
-		vfree(oct->instr_queue[i]);
+	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+		if (oct->io_qmask.iq & (1ULL << i))
+			vfree(oct->instr_queue[i]);
 	}
 
 	i = oct->octeon_id;
@@ -735,55 +735,61 @@ struct octeon_device *octeon_allocate_device(u32 pci_id,
 	octeon_device[oct_idx] = oct;
 
 	oct->octeon_id = oct_idx;
-	snprintf((oct->device_name), sizeof(oct->device_name),
+	snprintf(oct->device_name, sizeof(oct->device_name),
 		 "LiquidIO%d", (oct->octeon_id));
 
 	return oct;
 }
 
+/* this function is only for setting up the first queue */
 int octeon_setup_instr_queues(struct octeon_device *oct)
 {
-	u32 i, num_iqs = 0;
 	u32 num_descs = 0;
+	u32 iq_no = 0;
+	union oct_txpciq txpciq;
+	int numa_node = cpu_to_node(iq_no % num_online_cpus());
 
 	/* this causes queue 0 to be default queue */
-	if (OCTEON_CN6XXX(oct)) {
-		num_iqs = 1;
+	if (OCTEON_CN6XXX(oct))
 		num_descs =
 			CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
-	}
 
 	oct->num_iqs = 0;
 
-	for (i = 0; i < num_iqs; i++) {
-		oct->instr_queue[i] =
+	oct->instr_queue[0] = vmalloc_node(sizeof(*oct->instr_queue[0]),
+				numa_node);
+	if (!oct->instr_queue[0])
+		oct->instr_queue[0] =
 			vmalloc(sizeof(struct octeon_instr_queue));
-		if (!oct->instr_queue[i])
-			return 1;
-
-		memset(oct->instr_queue[i], 0,
-		       sizeof(struct octeon_instr_queue));
-
-		oct->instr_queue[i]->app_ctx = (void *)(size_t)i;
-		if (octeon_init_instr_queue(oct, i, num_descs))
-			return 1;
-
-		oct->num_iqs++;
+	if (!oct->instr_queue[0])
+		return 1;
+	memset(oct->instr_queue[0], 0, sizeof(struct octeon_instr_queue));
+	oct->instr_queue[0]->q_index = 0;
+	oct->instr_queue[0]->app_ctx = (void *)(size_t)0;
+	oct->instr_queue[0]->ifidx = 0;
+	txpciq.u64 = 0;
+	txpciq.s.q_no = iq_no;
+	txpciq.s.use_qpg = 0;
+	txpciq.s.qpg = 0;
+	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
+		/* prevent memory leak */
+		vfree(oct->instr_queue[0]);
+		return 1;
 	}
 
+	oct->num_iqs++;
 	return 0;
 }
 
 int octeon_setup_output_queues(struct octeon_device *oct)
 {
-	u32 i, num_oqs = 0;
 	u32 num_descs = 0;
 	u32 desc_size = 0;
+	u32 oq_no = 0;
+	int numa_node = cpu_to_node(oq_no % num_online_cpus());
 
 	/* this causes queue 0 to be default queue */
 	if (OCTEON_CN6XXX(oct)) {
-		/* CFG_GET_OQ_MAX_BASE_Q(CHIP_FIELD(oct, cn6xxx, conf)); */
-		num_oqs = 1;
 		num_descs =
 			CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
 		desc_size =
@@ -791,19 +797,15 @@ int octeon_setup_output_queues(struct octeon_device *oct)
 	}
 
 	oct->num_oqs = 0;
+	oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
+	if (!oct->droq[0])
+		oct->droq[0] = vmalloc(sizeof(*oct->droq[0]));
+	if (!oct->droq[0])
+		return 1;
 
-	for (i = 0; i < num_oqs; i++) {
-		oct->droq[i] = vmalloc(sizeof(*oct->droq[i]));
-		if (!oct->droq[i])
-			return 1;
-
-		memset(oct->droq[i], 0, sizeof(struct octeon_droq));
-
-		if (octeon_init_droq(oct, i, num_descs, desc_size, NULL))
-			return 1;
-
-		oct->num_oqs++;
-	}
+	if (octeon_init_droq(oct, oq_no, num_descs, desc_size, NULL))
+		return 1;
+	oct->num_oqs++;
 
 	return 0;
 }
@@ -1005,79 +1007,6 @@ octeon_register_dispatch_fn(struct octeon_device *oct,
 	return 0;
 }
 
-/* octeon_unregister_dispatch_fn
- * Parameters:
- *   oct       - octeon device
- *   opcode    - driver should unregister the function for this opcode
- *   subcode   - driver should unregister the function for this subcode
- * Description:
- *   Unregister the function set for this opcode+subcode.
- * Returns:
- *   Success: 0
- *   Failure: 1
- * Locks:
- *   No locks are held.
- */
-int
-octeon_unregister_dispatch_fn(struct octeon_device *oct, u16 opcode,
-			      u16 subcode)
-{
-	int retval = 0;
-	u32 idx;
-	struct list_head *dispatch, *dfree = NULL, *tmp2;
-	u16 combined_opcode = OPCODE_SUBCODE(opcode, subcode);
-
-	idx = combined_opcode & OCTEON_OPCODE_MASK;
-
-	spin_lock_bh(&oct->dispatch.lock);
-
-	if (oct->dispatch.count == 0) {
-		spin_unlock_bh(&oct->dispatch.lock);
-		dev_err(&oct->pci_dev->dev,
-			"No dispatch functions registered for this device\n");
-		return 1;
-	}
-
-	if (oct->dispatch.dlist[idx].opcode == combined_opcode) {
-		dispatch = &oct->dispatch.dlist[idx].list;
-		if (dispatch->next != dispatch) {
-			dispatch = dispatch->next;
-			oct->dispatch.dlist[idx].opcode =
-				((struct octeon_dispatch *)dispatch)->opcode;
-			oct->dispatch.dlist[idx].dispatch_fn =
-				((struct octeon_dispatch *)
-				 dispatch)->dispatch_fn;
-			oct->dispatch.dlist[idx].arg =
-				((struct octeon_dispatch *)dispatch)->arg;
-			list_del(dispatch);
-			dfree = dispatch;
-		} else {
-			oct->dispatch.dlist[idx].opcode = 0;
-			oct->dispatch.dlist[idx].dispatch_fn = NULL;
-			oct->dispatch.dlist[idx].arg = NULL;
-		}
-	} else {
-		retval = 1;
-		list_for_each_safe(dispatch, tmp2,
-				   &(oct->dispatch.dlist[idx].
-				     list)) {
-			if (((struct octeon_dispatch *)dispatch)->opcode ==
-			    combined_opcode) {
-				list_del(dispatch);
-				dfree = dispatch;
-				retval = 0;
-			}
-		}
-	}
-
-	if (!retval)
-		oct->dispatch.count--;
-
-	spin_unlock_bh(&oct->dispatch.lock);
-	vfree(dfree);
-	return retval;
-}
-
 int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf)
 {
 	u32 i;
@@ -1152,8 +1081,8 @@ core_drv_init_err:
 int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no)
 
 {
-	if (oct && (q_no < MAX_OCTEON_INSTR_QUEUES) &&
-	    (oct->io_qmask.iq & (1UL << q_no)))
+	if (oct && (q_no < MAX_OCTEON_INSTR_QUEUES(oct)) &&
+	    (oct->io_qmask.iq & (1ULL << q_no)))
 		return oct->instr_queue[q_no]->max_count;
 
 	return -1;
@@ -1161,8 +1090,8 @@ int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no)
 
 int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no)
 {
-	if (oct && (q_no < MAX_OCTEON_OUTPUT_QUEUES) &&
-	    (oct->io_qmask.oq & (1UL << q_no)))
+	if (oct && (q_no < MAX_OCTEON_OUTPUT_QUEUES(oct)) &&
+	    (oct->io_qmask.oq & (1ULL << q_no)))
 		return oct->droq[q_no]->max_count;
 	return -1;
 }
@@ -1253,10 +1182,10 @@ void lio_pci_writeq(struct octeon_device *oct,
 int octeon_mem_access_ok(struct octeon_device *oct)
 {
 	u64 access_okay = 0;
+	u64 lmc0_reset_ctl;
 
 	/* Check to make sure a DDR interface is enabled */
-	u64 lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL);
-
+	lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL);
 	access_okay = (lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK);
 
 	return access_okay ? 0 : 1;
@@ -1270,9 +1199,6 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout)
 	if (!timeout)
 		return ret;
 
-	while (*timeout == 0)
-		schedule_timeout_uninterruptible(HZ / 10);
-
 	for (ms = 0; (ret != 0) && ((*timeout == 0) || (ms <= *timeout));
 	     ms += HZ / 10) {
 		ret = octeon_mem_access_ok(oct);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 36e1f85df8c4..01edfb404346 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -152,9 +152,9 @@ struct octeon_mmio {
 #define   MAX_OCTEON_MAPS    32
 
 struct octeon_io_enable {
-	u32 iq;
-	u32 oq;
-	u32 iq64B;
+	u64 iq;
+	u64 oq;
+	u64 iq64B;
 };
 
 struct octeon_reg_list {
@@ -204,8 +204,7 @@ struct octeon_fn_list {
 	void (*bar1_idx_setup)(struct octeon_device *, u64, u32, int);
 	void (*bar1_idx_write)(struct octeon_device *, u32, u32);
 	u32 (*bar1_idx_read)(struct octeon_device *, u32);
-	u32 (*update_iq_read_idx)(struct octeon_device *,
-				  struct octeon_instr_queue *);
+	u32 (*update_iq_read_idx)(struct octeon_instr_queue *);
 
 	void (*enable_oq_pkt_time_intr)(struct octeon_device *, u32);
 	void (*disable_oq_pkt_time_intr)(struct octeon_device *, u32);
@@ -222,7 +221,7 @@ struct octeon_fn_list {
 
 /* Structure for named memory blocks
  * Number of descriptors
- * available can be changed without affecting compatiblity,
+ * available can be changed without affecting compatibility,
  * but name length changes require a bump in the bootmem
  * descriptor version
  * Note: This structure must be naturally 64 bit aligned, as a single
@@ -255,7 +254,7 @@ struct oct_fw_info {
 struct cavium_wk {
 	struct delayed_work work;
 	void *ctxptr;
-	size_t ctxul;
+	u64 ctxul;
 };
 
 struct cavium_wq {
@@ -267,6 +266,8 @@ struct octdev_props {
 	/* Each interface in the Octeon device has a network
 	 * device pointer (used for OS specific calls).
 	 */
+	int    napi_enabled;
+	int    gmxport;
 	struct net_device *netdev;
 };
 
@@ -324,7 +325,8 @@ struct octeon_device {
 	struct octeon_sc_buffer_pool	sc_buf_pool;
 
 	/** The input instruction queues */
-	struct octeon_instr_queue *instr_queue[MAX_OCTEON_INSTR_QUEUES];
+	struct octeon_instr_queue *instr_queue
+		[MAX_POSSIBLE_OCTEON_INSTR_QUEUES];
 
 	/** The doubly-linked list of instruction response */
 	struct octeon_response_list response_list[MAX_RESPONSE_LISTS];
@@ -332,7 +334,7 @@ struct octeon_device {
 	u32 num_oqs;
 
 	/** The DROQ output queues  */
-	struct octeon_droq *droq[MAX_OCTEON_OUTPUT_QUEUES];
+	struct octeon_droq *droq[MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES];
 
 	struct octeon_io_enable io_qmask;
 
@@ -381,15 +383,29 @@ struct octeon_device {
 
 	struct cavium_wq dma_comp_wq;
 
-	struct cavium_wq check_db_wq[MAX_OCTEON_INSTR_QUEUES];
+	/** Lock for dma response list */
+	spinlock_t cmd_resp_wqlock;
+	u32 cmd_resp_state;
+
+	struct cavium_wq check_db_wq[MAX_POSSIBLE_OCTEON_INSTR_QUEUES];
 
 	struct cavium_wk nic_poll_work;
 
 	struct cavium_wk console_poll_work[MAX_OCTEON_MAPS];
 
 	void *priv;
+
+	int rx_pause;
+	int tx_pause;
+
+	struct oct_link_stats link_stats; /*stastics from firmware*/
+
+	/* private flags to control driver-specific features through ethtool */
+	u32 priv_flags;
 };
 
+#define  OCT_DRV_ONLINE 1
+#define  OCT_DRV_OFFLINE 2
 #define  OCTEON_CN6XXX(oct)           ((oct->chip_id == OCTEON_CN66XX) || \
 				       (oct->chip_id == OCTEON_CN68XX))
 #define CHIP_FIELD(oct, TYPE, field)             \
@@ -569,8 +585,7 @@ int octeon_add_console(struct octeon_device *oct, u32 console_num);
 int octeon_console_write(struct octeon_device *oct, u32 console_num,
 			 char *buffer, u32 write_request_size, u32 flags);
 int octeon_console_write_avail(struct octeon_device *oct, u32 console_num);
-int octeon_console_read(struct octeon_device *oct, u32 console_num,
-			char *buffer, u32 buf_size, u32 flags);
+
 int octeon_console_read_avail(struct octeon_device *oct, u32 console_num);
 
 /** Removes all attached consoles. */
@@ -646,4 +661,17 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type);
  */
 struct octeon_config *octeon_get_conf(struct octeon_device *oct);
 
+/* LiquidIO driver pivate flags */
+enum {
+	OCT_PRIV_FLAG_TX_BYTES = 0, /* Tx interrupts by pending byte count */
+};
+
+static inline void lio_set_priv_flag(struct octeon_device *octdev, u32 flag,
+				     u32 val)
+{
+	if (val)
+		octdev->priv_flags |= (0x1 << flag);
+	else
+		octdev->priv_flags &= ~(0x1 << flag);
+}
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 174072b3740b..e0afe4c1fd01 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -19,30 +19,18 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
-
-/* #define CAVIUM_ONLY_PERF_MODE */
 
 #define     CVM_MIN(d1, d2)           (((d1) < (d2)) ? (d1) : (d2))
 #define     CVM_MAX(d1, d2)           (((d1) > (d2)) ? (d1) : (d2))
@@ -104,8 +92,12 @@ static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev,
 	return fn_arg;
 }
 
-u32 octeon_droq_check_hw_for_pkts(struct octeon_device *oct,
-				  struct octeon_droq *droq)
+/** Check for packets on Droq. This function should be called with
+ * lock held.
+ *  @param  droq - Droq on which count is checked.
+ *  @return Returns packet count.
+ */
+u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq)
 {
 	u32 pkt_count = 0;
 
@@ -151,22 +143,26 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
 				 struct octeon_droq *droq)
 {
 	u32 i;
+	struct octeon_skb_page_info *pg_info;
 
 	for (i = 0; i < droq->max_count; i++) {
-		if (droq->recv_buf_list[i].buffer) {
-			if (droq->desc_ring) {
-				lio_unmap_ring_info(oct->pci_dev,
-						    (u64)droq->
-						    desc_ring[i].info_ptr,
-						    OCT_DROQ_INFO_SIZE);
-				lio_unmap_ring(oct->pci_dev,
-					       (u64)droq->desc_ring[i].
-					       buffer_ptr,
-					       droq->buffer_size);
-			}
-			recv_buffer_free(droq->recv_buf_list[i].buffer);
-			droq->recv_buf_list[i].buffer = NULL;
-		}
+		pg_info = &droq->recv_buf_list[i].pg_info;
+
+		if (pg_info->dma)
+			lio_unmap_ring(oct->pci_dev,
+				       (u64)pg_info->dma);
+		pg_info->dma = 0;
+
+		if (pg_info->page)
+			recv_buffer_destroy(droq->recv_buf_list[i].buffer,
+					    pg_info);
+
+		if (droq->desc_ring && droq->desc_ring[i].info_ptr)
+			lio_unmap_ring_info(oct->pci_dev,
+					    (u64)droq->
+					    desc_ring[i].info_ptr,
+					    OCT_DROQ_INFO_SIZE);
+		droq->recv_buf_list[i].buffer = NULL;
 	}
 
 	octeon_droq_reset_indices(droq);
@@ -181,25 +177,23 @@ octeon_droq_setup_ring_buffers(struct octeon_device *oct,
 	struct octeon_droq_desc *desc_ring = droq->desc_ring;
 
 	for (i = 0; i < droq->max_count; i++) {
-		buf = recv_buffer_alloc(oct, droq->q_no, droq->buffer_size);
+		buf = recv_buffer_alloc(oct, &droq->recv_buf_list[i].pg_info);
 
 		if (!buf) {
 			dev_err(&oct->pci_dev->dev, "%s buffer alloc failed\n",
 				__func__);
+			droq->stats.rx_alloc_failure++;
 			return -ENOMEM;
 		}
 
 		droq->recv_buf_list[i].buffer = buf;
 		droq->recv_buf_list[i].data = get_rbd(buf);
-
 		droq->info_list[i].length = 0;
 
 		/* map ring buffers into memory */
 		desc_ring[i].info_ptr = lio_map_ring_info(droq, i);
 		desc_ring[i].buffer_ptr =
-			lio_map_ring(oct->pci_dev,
-				     droq->recv_buf_list[i].buffer,
-				     droq->buffer_size);
+			lio_map_ring(droq->recv_buf_list[i].buffer);
 	}
 
 	octeon_droq_reset_indices(droq);
@@ -242,6 +236,8 @@ int octeon_init_droq(struct octeon_device *oct,
 	struct octeon_droq *droq;
 	u32 desc_ring_size = 0, c_num_descs = 0, c_buf_size = 0;
 	u32 c_pkts_per_intr = 0, c_refill_threshold = 0;
+	int orig_node = dev_to_node(&oct->pci_dev->dev);
+	int numa_node = cpu_to_node(q_no % num_online_cpus());
 
 	dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no);
 
@@ -261,15 +257,23 @@ int octeon_init_droq(struct octeon_device *oct,
 		struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
 
 		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x);
-		c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+		c_refill_threshold =
+			(u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+	} else {
+		return 1;
 	}
 
 	droq->max_count = c_num_descs;
 	droq->buffer_size = c_buf_size;
 
 	desc_ring_size = droq->max_count * OCT_DROQ_DESC_SIZE;
+	set_dev_node(&oct->pci_dev->dev, numa_node);
 	droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
 					(dma_addr_t *)&droq->desc_ring_dma);
+	set_dev_node(&oct->pci_dev->dev, orig_node);
+	if (!droq->desc_ring)
+		droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
+					(dma_addr_t *)&droq->desc_ring_dma);
 
 	if (!droq->desc_ring) {
 		dev_err(&oct->pci_dev->dev,
@@ -283,12 +287,11 @@ int octeon_init_droq(struct octeon_device *oct,
 		droq->max_count);
 
 	droq->info_list =
-		cnnic_alloc_aligned_dma(oct->pci_dev,
-					(droq->max_count * OCT_DROQ_INFO_SIZE),
-					&droq->info_alloc_size,
-					&droq->info_base_addr,
-					&droq->info_list_dma);
-
+		cnnic_numa_alloc_aligned_dma((droq->max_count *
+					      OCT_DROQ_INFO_SIZE),
+					     &droq->info_alloc_size,
+					     &droq->info_base_addr,
+					     numa_node);
 	if (!droq->info_list) {
 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
 		lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
@@ -297,7 +300,12 @@ int octeon_init_droq(struct octeon_device *oct,
 	}
 
 	droq->recv_buf_list = (struct octeon_recv_buffer *)
-			      vmalloc(droq->max_count *
+			      vmalloc_node(droq->max_count *
+						OCT_DROQ_RECVBUF_SIZE,
+						numa_node);
+	if (!droq->recv_buf_list)
+		droq->recv_buf_list = (struct octeon_recv_buffer *)
+				      vmalloc(droq->max_count *
 						OCT_DROQ_RECVBUF_SIZE);
 	if (!droq->recv_buf_list) {
 		dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n");
@@ -320,7 +328,7 @@ int octeon_init_droq(struct octeon_device *oct,
 	/* For 56xx Pass1, this function won't be called, so no checks. */
 	oct->fn_list.setup_oq_regs(oct, q_no);
 
-	oct->io_qmask.oq |= (1 << q_no);
+	oct->io_qmask.oq |= (1ULL << q_no);
 
 	return 0;
 
@@ -358,6 +366,7 @@ static inline struct octeon_recv_info *octeon_create_recv_info(
 	struct octeon_recv_pkt *recv_pkt;
 	struct octeon_recv_info *recv_info;
 	u32 i, bytes_left;
+	struct octeon_skb_page_info *pg_info;
 
 	info = &droq->info_list[idx];
 
@@ -375,9 +384,14 @@ static inline struct octeon_recv_info *octeon_create_recv_info(
 	bytes_left = (u32)info->length;
 
 	while (buf_cnt) {
-		lio_unmap_ring(octeon_dev->pci_dev,
-			       (u64)droq->desc_ring[idx].buffer_ptr,
-			       droq->buffer_size);
+		{
+			pg_info = &droq->recv_buf_list[idx].pg_info;
+
+			lio_unmap_ring(octeon_dev->pci_dev,
+				       (u64)pg_info->dma);
+			pg_info->page = NULL;
+			pg_info->dma = 0;
+		}
 
 		recv_pkt->buffer_size[i] =
 			(bytes_left >=
@@ -449,6 +463,7 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
 	void *buf = NULL;
 	u8 *data;
 	u32 desc_refilled = 0;
+	struct octeon_skb_page_info *pg_info;
 
 	desc_ring = droq->desc_ring;
 
@@ -458,13 +473,22 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
 		 * the buffer, else allocate.
 		 */
 		if (!droq->recv_buf_list[droq->refill_idx].buffer) {
-			buf = recv_buffer_alloc(octeon_dev, droq->q_no,
-						droq->buffer_size);
+			pg_info =
+				&droq->recv_buf_list[droq->refill_idx].pg_info;
+			/* Either recycle the existing pages or go for
+			 * new page alloc
+			 */
+			if (pg_info->page)
+				buf = recv_buffer_reuse(octeon_dev, pg_info);
+			else
+				buf = recv_buffer_alloc(octeon_dev, pg_info);
 			/* If a buffer could not be allocated, no point in
 			 * continuing
 			 */
-			if (!buf)
+			if (!buf) {
+				droq->stats.rx_alloc_failure++;
 				break;
+			}
 			droq->recv_buf_list[droq->refill_idx].buffer =
 				buf;
 			data = get_rbd(buf);
@@ -476,11 +500,8 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
 		droq->recv_buf_list[droq->refill_idx].data = data;
 
 		desc_ring[droq->refill_idx].buffer_ptr =
-			lio_map_ring(octeon_dev->pci_dev,
-				     droq->recv_buf_list[droq->
-				     refill_idx].buffer,
-				     droq->buffer_size);
-
+			lio_map_ring(droq->recv_buf_list[droq->
+				     refill_idx].buffer);
 		/* Reset any previous values in the length field. */
 		droq->info_list[droq->refill_idx].length = 0;
 
@@ -539,7 +560,9 @@ octeon_droq_dispatch_pkt(struct octeon_device *oct,
 			droq->stats.dropped_nomem++;
 		}
 	} else {
-		dev_err(&oct->pci_dev->dev, "DROQ: No dispatch function\n");
+		dev_err(&oct->pci_dev->dev, "DROQ: No dispatch function (opcode %u/%u)\n",
+			(unsigned int)rh->r.opcode,
+			(unsigned int)rh->r.subcode);
 		droq->stats.dropped_nodispatch++;
 	}                       /* else (dispatch_fn ... */
 
@@ -586,6 +609,8 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 	for (pkt = 0; pkt < pkt_count; pkt++) {
 		u32 pkt_len = 0;
 		struct sk_buff *nicbuf = NULL;
+		struct octeon_skb_page_info *pg_info;
+		void *buf;
 
 		info = &droq->info_list[droq->read_idx];
 		octeon_swap_8B_data((u64 *)info, 2);
@@ -605,7 +630,6 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 		rh = &info->rh;
 
 		total_len += (u32)info->length;
-
 		if (OPCODE_SLOW_PATH(rh)) {
 			u32 buf_cnt;
 
@@ -614,50 +638,45 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 			droq->refill_count += buf_cnt;
 		} else {
 			if (info->length <= droq->buffer_size) {
-				lio_unmap_ring(oct->pci_dev,
-					       (u64)droq->desc_ring[
-					       droq->read_idx].buffer_ptr,
-					       droq->buffer_size);
 				pkt_len = (u32)info->length;
 				nicbuf = droq->recv_buf_list[
 					droq->read_idx].buffer;
+				pg_info = &droq->recv_buf_list[
+					droq->read_idx].pg_info;
+				if (recv_buffer_recycle(oct, pg_info))
+					pg_info->page = NULL;
 				droq->recv_buf_list[droq->read_idx].buffer =
 					NULL;
+
 				INCR_INDEX_BY1(droq->read_idx, droq->max_count);
-				skb_put(nicbuf, pkt_len);
 				droq->refill_count++;
 			} else {
-				nicbuf = octeon_fast_packet_alloc(oct, droq,
-								  droq->q_no,
-								  (u32)
+				nicbuf = octeon_fast_packet_alloc((u32)
 								  info->length);
 				pkt_len = 0;
 				/* nicbuf allocation can fail. We'll handle it
 				 * inside the loop.
 				 */
 				while (pkt_len < info->length) {
-					int cpy_len;
+					int cpy_len, idx = droq->read_idx;
 
-					cpy_len = ((pkt_len +
-						droq->buffer_size) >
-						info->length) ?
+					cpy_len = ((pkt_len + droq->buffer_size)
+						   > info->length) ?
 						((u32)info->length - pkt_len) :
 						droq->buffer_size;
 
 					if (nicbuf) {
-						lio_unmap_ring(oct->pci_dev,
-							       (u64)
-							       droq->desc_ring
-							       [droq->read_idx].
-							       buffer_ptr,
-							       droq->
-							       buffer_size);
 						octeon_fast_packet_next(droq,
 									nicbuf,
 									cpy_len,
-									droq->
-									read_idx
-									);
+									idx);
+						buf = droq->recv_buf_list[idx].
+							buffer;
+						recv_buffer_fast_free(buf);
+						droq->recv_buf_list[idx].buffer
+							= NULL;
+					} else {
+						droq->stats.rx_alloc_failure++;
 					}
 
 					pkt_len += cpy_len;
@@ -668,12 +687,14 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 			}
 
 			if (nicbuf) {
-				if (droq->ops.fptr)
+				if (droq->ops.fptr) {
 					droq->ops.fptr(oct->octeon_id,
-					nicbuf, pkt_len,
-					rh, &droq->napi);
-				else
+						       nicbuf, pkt_len,
+						       rh, &droq->napi,
+						       droq->ops.farg);
+				} else {
 					recv_buffer_free(nicbuf);
+				}
 			}
 		}
 
@@ -681,16 +702,16 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 			int desc_refilled = octeon_droq_refill(oct, droq);
 
 			/* Flush the droq descriptor data to memory to be sure
-			* that when we update the credits the data in memory
-			* is accurate.
-			*/
+			 * that when we update the credits the data in memory
+			 * is accurate.
+			 */
 			wmb();
 			writel((desc_refilled), droq->pkts_credit_reg);
 			/* make sure mmio write completes */
 			mmiowb();
 		}
 
-	}                       /* for ( each packet )... */
+	}                       /* for (each packet)... */
 
 	/* Increment refill_count by the number of buffers processed. */
 	droq->stats.pkts_received += pkt;
@@ -721,7 +742,7 @@ octeon_droq_process_packets(struct octeon_device *oct,
 	if (pkt_count > budget)
 		pkt_count = budget;
 
-	/* Grab the lock */
+	/* Grab the droq lock */
 	spin_lock(&droq->lock);
 
 	pkts_processed = octeon_droq_fast_process_packets(oct, droq, pkt_count);
@@ -783,7 +804,7 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
 
 		total_pkts_processed += pkts_processed;
 
-		octeon_droq_check_hw_for_pkts(oct, droq);
+		octeon_droq_check_hw_for_pkts(droq);
 	}
 
 	spin_unlock(&droq->lock);
@@ -807,18 +828,6 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
 			     u32 arg)
 {
 	struct octeon_droq *droq;
-	struct octeon_config *oct_cfg = NULL;
-
-	oct_cfg = octeon_get_conf(oct);
-
-	if (!oct_cfg)
-		return -EINVAL;
-
-	if (q_no >= CFG_GET_OQ_MAX_Q(oct_cfg)) {
-		dev_err(&oct->pci_dev->dev, "%s: droq id (%d) exceeds MAX (%d)\n",
-			__func__, q_no, (oct->num_oqs - 1));
-		return -EINVAL;
-	}
 
 	droq = oct->droq[q_no];
 
@@ -937,6 +946,7 @@ int octeon_unregister_droq_ops(struct octeon_device *oct, u32 q_no)
 	spin_lock_irqsave(&droq->lock, flags);
 
 	droq->ops.fptr = NULL;
+	droq->ops.farg = NULL;
 	droq->ops.drop_on_max = 0;
 
 	spin_unlock_irqrestore(&droq->lock, flags);
@@ -949,6 +959,7 @@ int octeon_create_droq(struct octeon_device *oct,
 		       u32 desc_size, void *app_ctx)
 {
 	struct octeon_droq *droq;
+	int numa_node = cpu_to_node(q_no % num_online_cpus());
 
 	if (oct->droq[q_no]) {
 		dev_dbg(&oct->pci_dev->dev, "Droq already in use. Cannot create droq %d again\n",
@@ -957,7 +968,9 @@ int octeon_create_droq(struct octeon_device *oct,
 	}
 
 	/* Allocate the DS for the new droq. */
-	droq = vmalloc(sizeof(*droq));
+	droq = vmalloc_node(sizeof(*droq), numa_node);
+	if (!droq)
+		droq = vmalloc(sizeof(*droq));
 	if (!droq)
 		goto create_droq_fail;
 	memset(droq, 0, sizeof(struct octeon_droq));
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 7940ccee12d9..5a6fb9113bbd 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -65,6 +65,17 @@ struct octeon_droq_info {
 
 #define OCT_DROQ_INFO_SIZE   (sizeof(struct octeon_droq_info))
 
+struct octeon_skb_page_info {
+	/* DMA address for the page */
+	dma_addr_t dma;
+
+	/* Page for the rx dma  **/
+	struct page *page;
+
+	/** which offset into page */
+	unsigned int page_offset;
+};
+
 /** Pointer to data buffer.
  *  Driver keeps a pointer to the data buffer that it made available to
  *  the Octeon device. Since the descriptor ring keeps physical (bus)
@@ -77,6 +88,9 @@ struct octeon_recv_buffer {
 
 	/** Data in the packet buffer.  */
 	u8 *data;
+
+	/** pg_info **/
+	struct octeon_skb_page_info pg_info;
 };
 
 #define OCT_DROQ_RECVBUF_SIZE    (sizeof(struct octeon_recv_buffer))
@@ -106,6 +120,13 @@ struct oct_droq_stats {
 
 	/** Num of Packets dropped due to receive path failures. */
 	u64 rx_dropped;
+
+	/** Num of vxlan packets received; */
+	u64 rx_vxlan;
+
+	/** Num of failures of recv_buffer_alloc() */
+	u64 rx_alloc_failure;
+
 };
 
 #define POLL_EVENT_INTR_ARRIVED  1
@@ -213,7 +234,8 @@ struct octeon_droq_ops {
 	 *  data in the buffer. The receive header gives the port
 	 *  number to the caller.  Function pointer is set by caller.
 	 */
-	void (*fptr)(u32, void *, u32, union octeon_rh *, void *);
+	void (*fptr)(u32, void *, u32, union octeon_rh *, void *, void *);
+	void *farg;
 
 	/* This function will be called by the driver for all NAPI related
 	 * events. The first param is the octeon id. The second param is the
@@ -394,24 +416,9 @@ int octeon_register_dispatch_fn(struct octeon_device *oct,
 				u16 subcode,
 				octeon_dispatch_fn_t fn, void *fn_arg);
 
-/**  Remove registration for an opcode/subcode. This will delete the mapping for
- *   an opcode/subcode. The dispatch function will be unregistered and will no
- *   longer be called if a packet with the opcode/subcode arrives in the driver
- *   output queues.
- *   @param  oct        -  the octeon device to unregister from.
- *   @param  opcode     -  the opcode to be unregistered.
- *   @param  subcode    -  the subcode to be unregistered.
- *
- *   @return Success: 0; Failure: 1
- */
-int octeon_unregister_dispatch_fn(struct octeon_device *oct,
-				  u16 opcode,
-				  u16 subcode);
-
 void octeon_droq_print_stats(void);
 
-u32 octeon_droq_check_hw_for_pkts(struct octeon_device *oct,
-				  struct octeon_droq *droq);
+u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq);
 
 int octeon_create_droq(struct octeon_device *oct, u32 q_no,
 		       u32 num_descs, u32 desc_size, void *app_ctx);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index 592fe49b589d..ff4b1d6f007b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -65,6 +65,11 @@ struct oct_iq_stats {
 	u64 tx_iq_busy;/**< Numof times this iq was found to be full. */
 	u64 tx_dropped;/**< Numof pkts dropped dueto xmitpath errors. */
 	u64 tx_tot_bytes;/**< Total count of bytes sento to network. */
+	u64 tx_gso;  /* count of tso */
+	u64 tx_vxlan; /* tunnel */
+	u64 tx_dmamap_fail;
+	u64 tx_restart;
+	/*u64 tx_timeout_count;*/
 };
 
 #define OCT_IQ_STATS_SIZE   (sizeof(struct oct_iq_stats))
@@ -75,18 +80,26 @@ struct oct_iq_stats {
  *  a Octeon device has one such structure to represent it.
 */
 struct octeon_instr_queue {
+	struct octeon_device *oct_dev;
+
 	/** A spinlock to protect access to the input ring.  */
 	spinlock_t lock;
 
+	/** A spinlock to protect while posting on the ring.  */
+	spinlock_t post_lock;
+
+	/** A spinlock to protect access to the input ring.*/
+	spinlock_t iq_flush_running_lock;
+
 	/** Flag that indicates if the queue uses 64 byte commands. */
 	u32 iqcmd_64B:1;
 
-	/** Queue Number. */
-	u32 iq_no:5;
+	/** Queue info. */
+	union oct_txpciq txpciq;
 
 	u32 rsvd:17;
 
-	/* Controls the periodic flushing of iq */
+	/* Controls whether extra flushing of IQ is done on Tx */
 	u32 do_auto_flush:1;
 
 	u32 status:8;
@@ -147,6 +160,13 @@ struct octeon_instr_queue {
 
 	/** Application context */
 	void *app_ctx;
+
+	/* network stack queue index */
+	int q_index;
+
+	/*os ifidx associated with this queue */
+	int ifidx;
+
 };
 
 /*----------------------  INSTRUCTION FORMAT ----------------------------*/
@@ -176,12 +196,12 @@ struct octeon_instr_32B {
 /** 64-byte instruction format.
  *  Format of instruction for a 64-byte mode input queue.
  */
-struct octeon_instr_64B {
+struct octeon_instr2_64B {
 	/** Pointer where the input data is available. */
 	u64 dptr;
 
 	/** Instruction Header. */
-	u64 ih;
+	u64 ih2;
 
 	/** Input Request Header. */
 	u64 irh;
@@ -198,14 +218,44 @@ struct octeon_instr_64B {
 	u64 rptr;
 
 	u64 reserved;
+};
+
+struct octeon_instr3_64B {
+	/** Pointer where the input data is available. */
+	u64 dptr;
+
+	/** Instruction Header. */
+	u64 ih3;
+
+	/** Instruction Header. */
+	u64 pki_ih3;
+
+	/** Input Request Header. */
+	u64 irh;
 
+	/** opcode/subcode specific parameters */
+	u64 ossp[2];
+
+	/** Return Data Parameters */
+	u64 rdp;
+
+	/** Pointer where the response for a RAW mode packet will be written
+	 * by Octeon.
+	 */
+	u64 rptr;
+
+};
+
+union octeon_instr_64B {
+	struct octeon_instr2_64B cmd2;
+	struct octeon_instr3_64B cmd3;
 };
 
-#define OCT_64B_INSTR_SIZE     (sizeof(struct octeon_instr_64B))
+#define OCT_64B_INSTR_SIZE     (sizeof(union octeon_instr_64B))
 
 /** The size of each buffer in soft command buffer pool
  */
-#define  SOFT_COMMAND_BUFFER_SIZE	1024
+#define  SOFT_COMMAND_BUFFER_SIZE	1536
 
 struct octeon_soft_command {
 	/** Soft command buffer info. */
@@ -214,7 +264,8 @@ struct octeon_soft_command {
 	u32 size;
 
 	/** Command and return status */
-	struct octeon_instr_64B cmd;
+	union octeon_instr_64B cmd;
+
 #define COMPLETION_WORD_INIT    0xffffffffffffffffULL
 	u64 *status_word;
 
@@ -242,7 +293,7 @@ struct octeon_soft_command {
 
 /** Maximum number of buffers to allocate into soft command buffer pool
  */
-#define  MAX_SOFT_COMMAND_BUFFERS	16
+#define  MAX_SOFT_COMMAND_BUFFERS	256
 
 /** Head of a soft command buffer pool.
  */
@@ -268,14 +319,15 @@ void octeon_free_soft_command(struct octeon_device *oct,
 /**
  *  octeon_init_instr_queue()
  *  @param octeon_dev      - pointer to the octeon device structure.
- *  @param iq_no           - queue to be initialized (0 <= q_no <= 3).
+ *  @param txpciq          - queue to be initialized (0 <= q_no <= 3).
  *
  *  Called at driver init time for each input queue. iq_conf has the
  *  configuration parameters for the queue.
  *
  *  @return  Success: 0   Failure: 1
  */
-int octeon_init_instr_queue(struct octeon_device *octeon_dev, u32 iq_no,
+int octeon_init_instr_queue(struct octeon_device *octeon_dev,
+			    union oct_txpciq txpciq,
 			    u32 num_descs);
 
 /**
@@ -298,7 +350,7 @@ octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
 
 int
 lio_process_iq_request_list(struct octeon_device *oct,
-			    struct octeon_instr_queue *iq);
+			    struct octeon_instr_queue *iq, u32 napi_budget);
 
 int octeon_send_command(struct octeon_device *oct, u32 iq_no,
 			u32 force_db, void *cmd, void *buf,
@@ -313,7 +365,10 @@ void octeon_prepare_soft_command(struct octeon_device *oct,
 int octeon_send_soft_command(struct octeon_device *oct,
 			     struct octeon_soft_command *sc);
 
-int octeon_setup_iq(struct octeon_device *oct, u32 iq_no,
-		    u32 num_descs, void *app_ctx);
-
+int octeon_setup_iq(struct octeon_device *oct, int ifidx,
+		    int q_index, union oct_txpciq iq_no, u32 num_descs,
+		    void *app_ctx);
+int
+octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
+		u32 pending_thresh, u32 napi_budget);
 #endif				/* __OCTEON_IQ_H__ */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index cbd081981180..bc14e4c27332 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -126,22 +126,27 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct,
 }
 
 static inline void *
-cnnic_alloc_aligned_dma(struct pci_dev *pci_dev,
-			u32 size,
-			u32 *alloc_size,
-			size_t *orig_ptr,
-			size_t *dma_addr __attribute__((unused)))
+cnnic_numa_alloc_aligned_dma(u32 size,
+			     u32 *alloc_size,
+			     size_t *orig_ptr,
+			     int numa_node)
 {
 	int retries = 0;
 	void *ptr = NULL;
 
 #define OCTEON_MAX_ALLOC_RETRIES     1
 	do {
-		ptr =
-		    (void *)__get_free_pages(GFP_KERNEL,
-					     get_order(size));
+		struct page *page = NULL;
+
+		page = alloc_pages_node(numa_node,
+					GFP_KERNEL,
+					get_order(size));
+		if (!page)
+			page = alloc_pages(GFP_KERNEL,
+					   get_order(size));
+		ptr = (void *)page_address(page);
 		if ((unsigned long)ptr & 0x07) {
-			free_pages((unsigned long)ptr, get_order(size));
+			__free_pages(page, get_order(size));
 			ptr = NULL;
 			/* Increment the size required if the first
 			 * attempt failed.
@@ -169,7 +174,7 @@ sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);
-	while (!(ACCESS_ONCE(*condition))) {
+	while (!(READ_ONCE(*condition))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (signal_pending(current))
 			goto out;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
index 5aecef870377..95a4bbedf557 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
@@ -19,43 +19,29 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
-#include "octeon_main.h"
-#include "octeon_network.h"
-#include "cn66xx_regs.h"
-#include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
 
 #define MEMOPS_IDX   MAX_BAR1_MAP_INDEX
 
+#ifdef __BIG_ENDIAN_BITFIELD
 static inline void
-octeon_toggle_bar1_swapmode(struct octeon_device *oct __attribute__((unused)),
-			    u32 idx __attribute__((unused)))
+octeon_toggle_bar1_swapmode(struct octeon_device *oct, u32 idx)
 {
-#ifdef __BIG_ENDIAN_BITFIELD
 	u32 mask;
 
 	mask = oct->fn_list.bar1_idx_read(oct, idx);
 	mask = (mask & 0x2) ? (mask & ~2) : (mask | 2);
 	oct->fn_list.bar1_idx_write(oct, idx, mask);
-#endif
 }
+#else
+#define octeon_toggle_bar1_swapmode(oct, idx) (oct = oct)
+#endif
 
 static void
 octeon_pci_fastwrite(struct octeon_device *oct, u8 __iomem *mapped_addr,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index b3abe5818fd3..fb820dc7fcb7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -30,6 +30,20 @@
 #include <linux/dma-mapping.h>
 #include <linux/ptp_clock_kernel.h>
 
+#define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE)
+#define LIO_MIN_MTU_SIZE 68
+
+struct oct_nic_stats_resp {
+	u64     rh;
+	struct oct_link_stats stats;
+	u64     status;
+};
+
+struct oct_nic_stats_ctrl {
+	struct completion complete;
+	struct net_device *netdev;
+};
+
 /** LiquidIO per-interface network private data */
 struct lio {
 	/** State of the interface. Rx/Tx happens only in the RUNNING state.  */
@@ -48,11 +62,11 @@ struct lio {
 	 */
 	int rxq;
 
-	/** Guards the glist */
-	spinlock_t lock;
+	/** Guards each glist */
+	spinlock_t *glist_lock;
 
-	/** Linked list of gather components */
-	struct list_head glist;
+	/** Array of gather component linked lists */
+	struct list_head *glist;
 
 	/** Pointer to the NIC properties for the Octeon device this network
 	 *  interface is associated with.
@@ -67,6 +81,9 @@ struct lio {
 	/** Link information sent by the core application for this interface. */
 	struct oct_link_info linfo;
 
+	/** counter of link changes */
+	u64 link_changes;
+
 	/** Size of Tx queue for this octeon device. */
 	u32 tx_qsize;
 
@@ -82,6 +99,12 @@ struct lio {
 	/** Copy of Interface capabilities: TSO, TSO6, LRO, Chescksums . */
 	u64 dev_capability;
 
+	/* Copy of transmit encapsulation capabilities:
+	 * TSO, TSO6, Checksums for this device for Kernel
+	 * 3.10.0 onwards
+	 */
+	u64 enc_dev_capability;
+
 	/** Copy of beacaon reg in phy */
 	u32 phy_beacon_val;
 
@@ -101,7 +124,6 @@ struct lio {
 
 	/* work queue for  txq status */
 	struct cavium_wq	txq_status_wq;
-
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
@@ -111,8 +133,9 @@ struct lio {
  * \brief Enable or disable feature
  * @param netdev    pointer to network device
  * @param cmd       Command that just requires acknowledgment
+ * @param param1    Parameter to command
  */
-int liquidio_set_feature(struct net_device *netdev, int cmd);
+int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1);
 
 /**
  * \brief Link control command completion callback
@@ -131,14 +154,30 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr);
  */
 void liquidio_set_ethtool_ops(struct net_device *netdev);
 
-static inline void
-*recv_buffer_alloc(struct octeon_device *oct __attribute__((unused)),
-		   u32 q_no __attribute__((unused)), u32 size)
-{
 #define SKB_ADJ_MASK  0x3F
 #define SKB_ADJ       (SKB_ADJ_MASK + 1)
 
-	struct sk_buff *skb = dev_alloc_skb(size + SKB_ADJ);
+#define MIN_SKB_SIZE       256 /* 8 bytes and more - 8 bytes for PTP */
+#define LIO_RXBUFFER_SZ    2048
+
+static inline void
+*recv_buffer_alloc(struct octeon_device *oct,
+		   struct octeon_skb_page_info *pg_info)
+{
+	struct page *page;
+	struct sk_buff *skb;
+	struct octeon_skb_page_info *skb_pg_info;
+
+	page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+	if (unlikely(!page))
+		return NULL;
+
+	skb = dev_alloc_skb(MIN_SKB_SIZE + SKB_ADJ);
+	if (unlikely(!skb)) {
+		__free_page(page);
+		pg_info->page = NULL;
+		return NULL;
+	}
 
 	if ((unsigned long)skb->data & SKB_ADJ_MASK) {
 		u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
@@ -146,10 +185,150 @@ static inline void
 		skb_reserve(skb, r);
 	}
 
+	skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+	/* Get DMA info */
+	pg_info->dma = dma_map_page(&oct->pci_dev->dev, page, 0,
+				    PAGE_SIZE, DMA_FROM_DEVICE);
+
+	/* Mapping failed!! */
+	if (dma_mapping_error(&oct->pci_dev->dev, pg_info->dma)) {
+		__free_page(page);
+		dev_kfree_skb_any((struct sk_buff *)skb);
+		pg_info->page = NULL;
+		return NULL;
+	}
+
+	pg_info->page = page;
+	pg_info->page_offset = 0;
+	skb_pg_info->page = page;
+	skb_pg_info->page_offset = 0;
+	skb_pg_info->dma = pg_info->dma;
+
 	return (void *)skb;
 }
 
+static inline void
+*recv_buffer_fast_alloc(u32 size)
+{
+	struct sk_buff *skb;
+	struct octeon_skb_page_info *skb_pg_info;
+
+	skb = dev_alloc_skb(size + SKB_ADJ);
+	if (unlikely(!skb))
+		return NULL;
+
+	if ((unsigned long)skb->data & SKB_ADJ_MASK) {
+		u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
+
+		skb_reserve(skb, r);
+	}
+
+	skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+	skb_pg_info->page = NULL;
+	skb_pg_info->page_offset = 0;
+	skb_pg_info->dma = 0;
+
+	return skb;
+}
+
+static inline int
+recv_buffer_recycle(struct octeon_device *oct, void *buf)
+{
+	struct octeon_skb_page_info *pg_info = buf;
+
+	if (!pg_info->page) {
+		dev_err(&oct->pci_dev->dev, "%s: pg_info->page NULL\n",
+			__func__);
+		return -ENOMEM;
+	}
+
+	if (unlikely(page_count(pg_info->page) != 1) ||
+	    unlikely(page_to_nid(pg_info->page)	!= numa_node_id())) {
+		dma_unmap_page(&oct->pci_dev->dev,
+			       pg_info->dma, (PAGE_SIZE << 0),
+			       DMA_FROM_DEVICE);
+		pg_info->dma = 0;
+		pg_info->page = NULL;
+		pg_info->page_offset = 0;
+		return -ENOMEM;
+	}
+
+	/* Flip to other half of the buffer */
+	if (pg_info->page_offset == 0)
+		pg_info->page_offset = LIO_RXBUFFER_SZ;
+	else
+		pg_info->page_offset = 0;
+	page_ref_inc(pg_info->page);
+
+	return 0;
+}
+
+static inline void
+*recv_buffer_reuse(struct octeon_device *oct, void *buf)
+{
+	struct octeon_skb_page_info *pg_info = buf, *skb_pg_info;
+	struct sk_buff *skb;
+
+	skb = dev_alloc_skb(MIN_SKB_SIZE + SKB_ADJ);
+	if (unlikely(!skb)) {
+		dma_unmap_page(&oct->pci_dev->dev,
+			       pg_info->dma, (PAGE_SIZE << 0),
+			       DMA_FROM_DEVICE);
+		return NULL;
+	}
+
+	if ((unsigned long)skb->data & SKB_ADJ_MASK) {
+		u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
+
+		skb_reserve(skb, r);
+	}
+
+	skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+	skb_pg_info->page = pg_info->page;
+	skb_pg_info->page_offset = pg_info->page_offset;
+	skb_pg_info->dma = pg_info->dma;
+
+	return skb;
+}
+
+static inline void
+recv_buffer_destroy(void *buffer, struct octeon_skb_page_info *pg_info)
+{
+	struct sk_buff *skb = (struct sk_buff *)buffer;
+
+	put_page(pg_info->page);
+	pg_info->dma = 0;
+	pg_info->page = NULL;
+	pg_info->page_offset = 0;
+
+	if (skb)
+		dev_kfree_skb_any(skb);
+}
+
 static inline void recv_buffer_free(void *buffer)
+{
+	struct sk_buff *skb = (struct sk_buff *)buffer;
+	struct octeon_skb_page_info *pg_info;
+
+	pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+
+	if (pg_info->page) {
+		put_page(pg_info->page);
+		pg_info->dma = 0;
+		pg_info->page = NULL;
+		pg_info->page_offset = 0;
+	}
+
+	dev_kfree_skb_any((struct sk_buff *)buffer);
+}
+
+static inline void
+recv_buffer_fast_free(void *buffer)
+{
+	dev_kfree_skb_any((struct sk_buff *)buffer);
+}
+
+static inline void tx_buffer_free(void *buffer)
 {
 	dev_kfree_skb_any((struct sk_buff *)buffer);
 }
@@ -159,7 +338,17 @@ static inline void recv_buffer_free(void *buffer)
 #define lio_dma_free(oct, size, virt_addr, dma_addr) \
 	dma_free_coherent(&oct->pci_dev->dev, size, virt_addr, dma_addr)
 
-#define   get_rbd(ptr)      (((struct sk_buff *)(ptr))->data)
+static inline
+void *get_rbd(struct sk_buff *skb)
+{
+	struct octeon_skb_page_info *pg_info;
+	unsigned char *va;
+
+	pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+	va = page_address(pg_info->page) + pg_info->page_offset;
+
+	return va;
+}
 
 static inline u64
 lio_map_ring_info(struct octeon_droq *droq, u32 i)
@@ -170,7 +359,7 @@ lio_map_ring_info(struct octeon_droq *droq, u32 i)
 	dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
 				  OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
 
-	BUG_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
+	WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
 
 	return (u64)dma_addr;
 }
@@ -183,33 +372,44 @@ lio_unmap_ring_info(struct pci_dev *pci_dev,
 }
 
 static inline u64
-lio_map_ring(struct pci_dev *pci_dev,
-	     void *buf, u32 size)
+lio_map_ring(void *buf)
 {
 	dma_addr_t dma_addr;
 
-	dma_addr = dma_map_single(&pci_dev->dev, get_rbd(buf), size,
-				  DMA_FROM_DEVICE);
+	struct sk_buff *skb = (struct sk_buff *)buf;
+	struct octeon_skb_page_info *pg_info;
 
-	BUG_ON(dma_mapping_error(&pci_dev->dev, dma_addr));
+	pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+	if (!pg_info->page) {
+		pr_err("%s: pg_info->page NULL\n", __func__);
+		WARN_ON(1);
+	}
+
+	/* Get DMA info */
+	dma_addr = pg_info->dma;
+	if (!pg_info->dma) {
+		pr_err("%s: ERROR it should be already available\n",
+		       __func__);
+		WARN_ON(1);
+	}
+	dma_addr += pg_info->page_offset;
 
 	return (u64)dma_addr;
 }
 
 static inline void
 lio_unmap_ring(struct pci_dev *pci_dev,
-	       u64 buf_ptr, u32 size)
+	       u64 buf_ptr)
+
 {
-	dma_unmap_single(&pci_dev->dev,
-			 buf_ptr, size,
-			 DMA_FROM_DEVICE);
+	dma_unmap_page(&pci_dev->dev,
+		       buf_ptr, (PAGE_SIZE << 0),
+		       DMA_FROM_DEVICE);
 }
 
-static inline void *octeon_fast_packet_alloc(struct octeon_device *oct,
-					     struct octeon_droq *droq,
-					     u32 q_no, u32 size)
+static inline void *octeon_fast_packet_alloc(u32 size)
 {
-	return recv_buffer_alloc(oct, q_no, size);
+	return recv_buffer_fast_alloc(size);
 }
 
 static inline void octeon_fast_packet_next(struct octeon_droq *droq,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index 1a0191549cb3..166727be928f 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
@@ -19,14 +19,9 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -34,21 +29,14 @@
 #include "octeon_device.h"
 #include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
-#include "cn66xx_regs.h"
-#include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
 
 void *
 octeon_alloc_soft_command_resp(struct octeon_device    *oct,
-			       struct octeon_instr_64B *cmd,
-			       size_t		       rdatasize)
+			       union octeon_instr_64B *cmd,
+			       u32		       rdatasize)
 {
 	struct octeon_soft_command *sc;
-	struct octeon_instr_ih  *ih;
+	struct octeon_instr_ih2  *ih2;
 	struct octeon_instr_irh *irh;
 	struct octeon_instr_rdp *rdp;
 
@@ -59,24 +47,25 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
 		return NULL;
 
 	/* Copy existing command structure into the soft command */
-	memcpy(&sc->cmd, cmd, sizeof(struct octeon_instr_64B));
+	memcpy(&sc->cmd, cmd, sizeof(union octeon_instr_64B));
 
 	/* Add in the response related fields. Opcode and Param are already
 	 * there.
 	 */
-	ih      = (struct octeon_instr_ih *)&sc->cmd.ih;
-	ih->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+	ih2      = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+	rdp     = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+	irh     = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+	ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
 
-	irh        = (struct octeon_instr_irh *)&sc->cmd.irh;
 	irh->rflag = 1; /* a response is required */
-	irh->len   = 4; /* means four 64-bit words immediately follow irh */
 
-	rdp            = (struct octeon_instr_rdp *)&sc->cmd.rdp;
 	rdp->pcie_port = oct->pcie_port;
 	rdp->rlen      = rdatasize;
 
 	*sc->status_word = COMPLETION_WORD_INIT;
 
+	sc->cmd.cmd2.rptr =  sc->dmarptr;
+
 	sc->wait_time = 1000;
 	sc->timeout = jiffies + sc->wait_time;
 
@@ -119,12 +108,11 @@ static void octnet_link_ctrl_callback(struct octeon_device *oct,
 
 static inline struct octeon_soft_command
 *octnic_alloc_ctrl_pkt_sc(struct octeon_device *oct,
-			  struct octnic_ctrl_pkt *nctrl,
-			  struct octnic_ctrl_params nparams)
+			  struct octnic_ctrl_pkt *nctrl)
 {
 	struct octeon_soft_command *sc = NULL;
 	u8 *data;
-	size_t rdatasize;
+	u32 rdatasize;
 	u32 uddsize = 0, datasize = 0;
 
 	uddsize = (u32)(nctrl->ncmd.s.more * 8);
@@ -143,7 +131,7 @@ static inline struct octeon_soft_command
 
 	data = (u8 *)sc->virtdptr;
 
-	memcpy(data, &nctrl->ncmd,  OCTNET_CMD_SIZE);
+	memcpy(data, &nctrl->ncmd, OCTNET_CMD_SIZE);
 
 	octeon_swap_8B_data((u64 *)data, (OCTNET_CMD_SIZE >> 3));
 
@@ -152,6 +140,8 @@ static inline struct octeon_soft_command
 		memcpy(data + OCTNET_CMD_SIZE, nctrl->udd, uddsize);
 	}
 
+	sc->iq_no = (u32)nctrl->iq_no;
+
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC, OPCODE_NIC_CMD,
 				    0, 0, 0);
 
@@ -164,26 +154,41 @@ static inline struct octeon_soft_command
 
 int
 octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
-			 struct octnic_ctrl_pkt *nctrl,
-			 struct octnic_ctrl_params nparams)
+			 struct octnic_ctrl_pkt *nctrl)
 {
 	int retval;
 	struct octeon_soft_command *sc = NULL;
 
-	sc = octnic_alloc_ctrl_pkt_sc(oct, nctrl, nparams);
+	spin_lock_bh(&oct->cmd_resp_wqlock);
+	/* Allow only rx ctrl command to stop traffic on the chip
+	 * during offline operations
+	 */
+	if ((oct->cmd_resp_state == OCT_DRV_OFFLINE) &&
+	    (nctrl->ncmd.s.cmd != OCTNET_CMD_RX_CTL)) {
+		spin_unlock_bh(&oct->cmd_resp_wqlock);
+		dev_err(&oct->pci_dev->dev,
+			"%s cmd:%d not processed since driver offline\n",
+			__func__, nctrl->ncmd.s.cmd);
+		return -1;
+	}
+
+	sc = octnic_alloc_ctrl_pkt_sc(oct, nctrl);
 	if (!sc) {
 		dev_err(&oct->pci_dev->dev, "%s soft command alloc failed\n",
 			__func__);
+		spin_unlock_bh(&oct->cmd_resp_wqlock);
 		return -1;
 	}
 
 	retval = octeon_send_soft_command(oct, sc);
-	if (retval) {
+	if (retval == IQ_SEND_FAILED) {
 		octeon_free_soft_command(oct, sc);
-		dev_err(&oct->pci_dev->dev, "%s soft command send failed status: %x\n",
-			__func__, retval);
+		dev_err(&oct->pci_dev->dev, "%s soft command:%d send failed status: %x\n",
+			__func__, nctrl->ncmd.s.cmd, retval);
+		spin_unlock_bh(&oct->cmd_resp_wqlock);
 		return -1;
 	}
 
+	spin_unlock_bh(&oct->cmd_resp_wqlock);
 	return retval;
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
index 0238857c8105..b71a2bbe4bee 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
@@ -52,6 +52,9 @@ struct octnic_ctrl_pkt {
 	/** Additional data that may be needed by some commands. */
 	u64 udd[MAX_NCTRL_UDD];
 
+	/** Input queue to use to send this command. */
+	u64 iq_no;
+
 	/** Time to wait for Octeon software to respond to this control command.
 	 *  If wait_time is 0, OSI assumes no response is expected.
 	 */
@@ -82,7 +85,7 @@ struct octnic_data_pkt {
 	u32 datasize;
 
 	/** Command to be passed to the Octeon device software. */
-	struct octeon_instr_64B cmd;
+	union octeon_instr_64B cmd;
 
 	/** Input queue to use to send this command. */
 	u32 q_no;
@@ -94,15 +97,14 @@ struct octnic_data_pkt {
  */
 union octnic_cmd_setup {
 	struct {
-		u32 ifidx:8;
-		u32 cksum_offset:7;
+		u32 iq_no:8;
 		u32 gather:1;
 		u32 timestamp:1;
-		u32 ipv4opts_ipv6exthdr:2;
 		u32 ip_csum:1;
+		u32 transport_csum:1;
 		u32 tnl_csum:1;
+		u32 rsvd:19;
 
-		u32 rsvd:11;
 		union {
 			u32 datasize;
 			u32 gatherptrs;
@@ -113,79 +115,146 @@ union octnic_cmd_setup {
 
 };
 
-struct octnic_ctrl_params {
-	u32 resp_order;
-};
-
 static inline int octnet_iq_is_full(struct octeon_device *oct, u32 q_no)
 {
 	return ((u32)atomic_read(&oct->instr_queue[q_no]->instr_pending)
 		>= (oct->instr_queue[q_no]->max_count - 2));
 }
 
-/** Utility function to prepare a 64B NIC instruction based on a setup command
- * @param cmd - pointer to instruction to be filled in.
- * @param setup - pointer to the setup structure
- * @param q_no - which queue for back pressure
- *
- * Assumes the cmd instruction is pre-allocated, but no fields are filled in.
- */
 static inline void
-octnet_prepare_pci_cmd(struct octeon_instr_64B *cmd,
-		       union octnic_cmd_setup *setup, u32 tag)
+octnet_prepare_pci_cmd_o2(struct octeon_device *oct,
+			  union octeon_instr_64B *cmd,
+			  union octnic_cmd_setup *setup, u32 tag)
 {
-	struct octeon_instr_ih *ih;
+	struct octeon_instr_ih2 *ih2;
 	struct octeon_instr_irh *irh;
 	union octnic_packet_params packet_params;
+	int port;
 
-	memset(cmd, 0, sizeof(struct octeon_instr_64B));
+	memset(cmd, 0, sizeof(union octeon_instr_64B));
 
-	ih = (struct octeon_instr_ih *)&cmd->ih;
+	ih2 = (struct octeon_instr_ih2 *)&cmd->cmd2.ih2;
 
 	/* assume that rflag is cleared so therefore front data will only have
-	 * irh and ossp[1] and ossp[2] for a total of 24 bytes
+	 * irh and ossp[0], ossp[1] for a total of 32 bytes
 	 */
-	ih->fsz = 24;
+	ih2->fsz = 24;
+
+	ih2->tagtype = ORDERED_TAG;
+	ih2->grp = DEFAULT_POW_GRP;
 
-	ih->tagtype = ORDERED_TAG;
-	ih->grp = DEFAULT_POW_GRP;
+	port = (int)oct->instr_queue[setup->s.iq_no]->txpciq.s.port;
 
 	if (tag)
-		ih->tag = tag;
+		ih2->tag = tag;
 	else
-		ih->tag = LIO_DATA(setup->s.ifidx);
+		ih2->tag = LIO_DATA(port);
 
-	ih->raw = 1;
-	ih->qos = (setup->s.ifidx & 3) + 4;	/* map qos based on interface */
+	ih2->raw = 1;
+	ih2->qos = (port & 3) + 4;	/* map qos based on interface */
 
 	if (!setup->s.gather) {
-		ih->dlengsz = setup->s.u.datasize;
+		ih2->dlengsz = setup->s.u.datasize;
 	} else {
-		ih->gather = 1;
-		ih->dlengsz = setup->s.u.gatherptrs;
+		ih2->gather = 1;
+		ih2->dlengsz = setup->s.u.gatherptrs;
 	}
 
-	irh = (struct octeon_instr_irh *)&cmd->irh;
+	irh = (struct octeon_instr_irh *)&cmd->cmd2.irh;
 
 	irh->opcode = OPCODE_NIC;
 	irh->subcode = OPCODE_NIC_NW_DATA;
 
 	packet_params.u32 = 0;
 
-	if (setup->s.cksum_offset) {
-		packet_params.s.csoffset = setup->s.cksum_offset;
-		packet_params.s.ipv4opts_ipv6exthdr =
-						setup->s.ipv4opts_ipv6exthdr;
+	packet_params.s.ip_csum = setup->s.ip_csum;
+	packet_params.s.transport_csum = setup->s.transport_csum;
+	packet_params.s.tnl_csum = setup->s.tnl_csum;
+	packet_params.s.tsflag = setup->s.timestamp;
+
+	irh->ossp = packet_params.u32;
+}
+
+static inline void
+octnet_prepare_pci_cmd_o3(struct octeon_device *oct,
+			  union octeon_instr_64B *cmd,
+			  union octnic_cmd_setup *setup, u32 tag)
+{
+	struct octeon_instr_irh *irh;
+	struct octeon_instr_ih3     *ih3;
+	struct octeon_instr_pki_ih3 *pki_ih3;
+	union octnic_packet_params packet_params;
+	int port;
+
+	memset(cmd, 0, sizeof(union octeon_instr_64B));
+
+	ih3 = (struct octeon_instr_ih3 *)&cmd->cmd3.ih3;
+	pki_ih3 = (struct octeon_instr_pki_ih3 *)&cmd->cmd3.pki_ih3;
+
+	/* assume that rflag is cleared so therefore front data will only have
+	 * irh and ossp[1] and ossp[2] for a total of 24 bytes
+	 */
+	ih3->pkind       = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind;
+	/*PKI IH*/
+	ih3->fsz = 24 + 8;
+
+	if (!setup->s.gather) {
+		ih3->dlengsz = setup->s.u.datasize;
+	} else {
+		ih3->gather = 1;
+		ih3->dlengsz = setup->s.u.gatherptrs;
 	}
 
+	pki_ih3->w       = 1;
+	pki_ih3->raw     = 1;
+	pki_ih3->utag    = 1;
+	pki_ih3->utt     = 1;
+	pki_ih3->uqpg    = oct->instr_queue[setup->s.iq_no]->txpciq.s.use_qpg;
+
+	port = (int)oct->instr_queue[setup->s.iq_no]->txpciq.s.port;
+
+	if (tag)
+		pki_ih3->tag = tag;
+	else
+		pki_ih3->tag     = LIO_DATA(port);
+
+	pki_ih3->tagtype = ORDERED_TAG;
+	pki_ih3->qpg     = oct->instr_queue[setup->s.iq_no]->txpciq.s.qpg;
+	pki_ih3->pm      = 0x7; /*0x7 - meant for Parse nothing, uninterpreted*/
+	pki_ih3->sl      = 8;   /* sl will be sizeof(pki_ih3)*/
+
+	irh = (struct octeon_instr_irh *)&cmd->cmd3.irh;
+
+	irh->opcode = OPCODE_NIC;
+	irh->subcode = OPCODE_NIC_NW_DATA;
+
+	packet_params.u32 = 0;
+
 	packet_params.s.ip_csum = setup->s.ip_csum;
+	packet_params.s.transport_csum = setup->s.transport_csum;
 	packet_params.s.tnl_csum = setup->s.tnl_csum;
-	packet_params.s.ifidx = setup->s.ifidx;
 	packet_params.s.tsflag = setup->s.timestamp;
 
 	irh->ossp = packet_params.u32;
 }
 
+/** Utility function to prepare a 64B NIC instruction based on a setup command
+ * @param cmd - pointer to instruction to be filled in.
+ * @param setup - pointer to the setup structure
+ * @param q_no - which queue for back pressure
+ *
+ * Assumes the cmd instruction is pre-allocated, but no fields are filled in.
+ */
+static inline void
+octnet_prepare_pci_cmd(struct octeon_device *oct, union octeon_instr_64B *cmd,
+		       union octnic_cmd_setup *setup, u32 tag)
+{
+	if (OCTEON_CN6XXX(oct))
+		octnet_prepare_pci_cmd_o2(oct, cmd, setup, tag);
+	else
+		octnet_prepare_pci_cmd_o3(oct, cmd, setup, tag);
+}
+
 /** Allocate and a soft command with space for a response immediately following
  * the commnad.
  * @param oct - octeon device pointer
@@ -198,8 +267,8 @@ octnet_prepare_pci_cmd(struct octeon_instr_64B *cmd,
  */
 void *
 octeon_alloc_soft_command_resp(struct octeon_device    *oct,
-			       struct octeon_instr_64B *cmd,
-			       size_t		       rdatasize);
+			       union octeon_instr_64B *cmd,
+			       u32		       rdatasize);
 
 /** Send a NIC data packet to the device
  * @param oct - octeon device pointer
@@ -214,14 +283,11 @@ int octnet_send_nic_data_pkt(struct octeon_device *oct,
 /** Send a NIC control packet to the device
  * @param oct - octeon device pointer
  * @param nctrl - control structure with command, timout, and callback info
- * @param nparams - response control structure
- *
  * @returns IQ_FAILED if it failed to add to the input queue. IQ_STOP if it the
  * queue should be stopped, and IQ_SEND_OK if it sent okay.
  */
 int
 octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
-			 struct octnic_ctrl_pkt *nctrl,
-			 struct octnic_ctrl_params nparams);
+			 struct octnic_ctrl_pkt *nctrl);
 
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index a2a24652c8f3..d32492f185ff 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -19,28 +19,17 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
 #include "octeon_network.h"
-#include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
 
 #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
 	(octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
@@ -51,7 +40,7 @@ struct iq_post_status {
 };
 
 static void check_db_timeout(struct work_struct *work);
-static void  __check_db_timeout(struct octeon_device *oct, unsigned long iq_no);
+static void  __check_db_timeout(struct octeon_device *oct, u64 iq_no);
 
 static void (*reqtype_free_fn[MAX_OCTEON_DEVICES][REQTYPE_LAST + 1]) (void *);
 
@@ -69,12 +58,16 @@ static inline int IQ_INSTR_MODE_64B(struct octeon_device *oct, int iq_no)
 
 /* Return 0 on success, 1 on failure */
 int octeon_init_instr_queue(struct octeon_device *oct,
-			    u32 iq_no, u32 num_descs)
+			    union oct_txpciq txpciq,
+			    u32 num_descs)
 {
 	struct octeon_instr_queue *iq;
 	struct octeon_iq_config *conf = NULL;
+	u32 iq_no = (u32)txpciq.s.q_no;
 	u32 q_size;
 	struct cavium_wq *db_wq;
+	int orig_node = dev_to_node(&oct->pci_dev->dev);
+	int numa_node = cpu_to_node(iq_no % num_online_cpus());
 
 	if (OCTEON_CN6XXX(oct))
 		conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
@@ -95,9 +88,15 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 	q_size = (u32)conf->instr_type * num_descs;
 
 	iq = oct->instr_queue[iq_no];
+	iq->oct_dev = oct;
 
+	set_dev_node(&oct->pci_dev->dev, numa_node);
 	iq->base_addr = lio_dma_alloc(oct, q_size,
 				      (dma_addr_t *)&iq->base_addr_dma);
+	set_dev_node(&oct->pci_dev->dev, orig_node);
+	if (!iq->base_addr)
+		iq->base_addr = lio_dma_alloc(oct, q_size,
+					      (dma_addr_t *)&iq->base_addr_dma);
 	if (!iq->base_addr) {
 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
 			iq_no);
@@ -109,7 +108,11 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 	/* Initialize a list to holds requests that have been posted to Octeon
 	 * but has yet to be fetched by octeon
 	 */
-	iq->request_list = vmalloc(sizeof(*iq->request_list) * num_descs);
+	iq->request_list = vmalloc_node((sizeof(*iq->request_list) * num_descs),
+					       numa_node);
+	if (!iq->request_list)
+		iq->request_list = vmalloc(sizeof(*iq->request_list) *
+						  num_descs);
 	if (!iq->request_list) {
 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
 		dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
@@ -122,7 +125,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 	dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %llx count: %d\n",
 		iq_no, iq->base_addr, iq->base_addr_dma, iq->max_count);
 
-	iq->iq_no = iq_no;
+	iq->txpciq.u64 = txpciq.u64;
 	iq->fill_threshold = (u32)conf->db_min;
 	iq->fill_cnt = 0;
 	iq->host_write_index = 0;
@@ -135,8 +138,11 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 
 	/* Initialize the spinlock for this instruction queue */
 	spin_lock_init(&iq->lock);
+	spin_lock_init(&iq->post_lock);
 
-	oct->io_qmask.iq |= (1 << iq_no);
+	spin_lock_init(&iq->iq_flush_running_lock);
+
+	oct->io_qmask.iq |= (1ULL << iq_no);
 
 	/* Set the 32B/64B mode for each input queue */
 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
@@ -144,7 +150,9 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 
 	oct->fn_list.setup_iq_regs(oct, iq_no);
 
-	oct->check_db_wq[iq_no].wq = create_workqueue("check_iq_db");
+	oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db",
+						     WQ_MEM_RECLAIM,
+						     0);
 	if (!oct->check_db_wq[iq_no].wq) {
 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
 		dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
@@ -168,7 +176,6 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
 
 	cancel_delayed_work_sync(&oct->check_db_wq[iq_no].wk.work);
-	flush_workqueue(oct->check_db_wq[iq_no].wq);
 	destroy_workqueue(oct->check_db_wq[iq_no].wq);
 
 	if (OCTEON_CN6XXX(oct))
@@ -188,26 +195,38 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
 
 /* Return 0 on success, 1 on failure */
 int octeon_setup_iq(struct octeon_device *oct,
-		    u32 iq_no,
+		    int ifidx,
+		    int q_index,
+		    union oct_txpciq txpciq,
 		    u32 num_descs,
 		    void *app_ctx)
 {
+	u32 iq_no = (u32)txpciq.s.q_no;
+	int numa_node = cpu_to_node(iq_no % num_online_cpus());
+
 	if (oct->instr_queue[iq_no]) {
 		dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
 			iq_no);
+		oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
 		oct->instr_queue[iq_no]->app_ctx = app_ctx;
 		return 0;
 	}
 	oct->instr_queue[iq_no] =
-	    vmalloc(sizeof(struct octeon_instr_queue));
+	    vmalloc_node(sizeof(struct octeon_instr_queue), numa_node);
+	if (!oct->instr_queue[iq_no])
+		oct->instr_queue[iq_no] =
+		    vmalloc(sizeof(struct octeon_instr_queue));
 	if (!oct->instr_queue[iq_no])
 		return 1;
 
 	memset(oct->instr_queue[iq_no], 0,
 	       sizeof(struct octeon_instr_queue));
 
+	oct->instr_queue[iq_no]->q_index = q_index;
 	oct->instr_queue[iq_no]->app_ctx = app_ctx;
-	if (octeon_init_instr_queue(oct, iq_no, num_descs)) {
+	oct->instr_queue[iq_no]->ifidx = ifidx;
+
+	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
 		vfree(oct->instr_queue[iq_no]);
 		oct->instr_queue[iq_no] = NULL;
 		return 1;
@@ -226,8 +245,8 @@ int lio_wait_for_instr_fetch(struct octeon_device *oct)
 		instr_cnt = 0;
 
 		/*for (i = 0; i < oct->num_iqs; i++) {*/
-		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
-			if (!(oct->io_qmask.iq & (1UL << i)))
+		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+			if (!(oct->io_qmask.iq & (1ULL << i)))
 				continue;
 			pending =
 			    atomic_read(&oct->
@@ -271,40 +290,8 @@ static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
 	memcpy(iqptr, cmd, cmdsize);
 }
 
-static inline int
-__post_command(struct octeon_device *octeon_dev __attribute__((unused)),
-	       struct octeon_instr_queue *iq,
-	       u32 force_db __attribute__((unused)), u8 *cmd)
-{
-	u32 index = -1;
-
-	/* This ensures that the read index does not wrap around to the same
-	 * position if queue gets full before Octeon could fetch any instr.
-	 */
-	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1))
-		return -1;
-
-	__copy_cmd_into_iq(iq, cmd);
-
-	/* "index" is returned, host_write_index is modified. */
-	index = iq->host_write_index;
-	INCR_INDEX_BY1(iq->host_write_index, iq->max_count);
-	iq->fill_cnt++;
-
-	/* Flush the command into memory. We need to be sure the data is in
-	 * memory before indicating that the instruction is pending.
-	 */
-	wmb();
-
-	atomic_inc(&iq->instr_pending);
-
-	return index;
-}
-
 static inline struct iq_post_status
-__post_command2(struct octeon_device *octeon_dev __attribute__((unused)),
-		struct octeon_instr_queue *iq,
-		u32 force_db __attribute__((unused)), u8 *cmd)
+__post_command2(struct octeon_instr_queue *iq, u8 *cmd)
 {
 	struct iq_post_status st;
 
@@ -362,17 +349,19 @@ __add_to_request_list(struct octeon_instr_queue *iq,
 	iq->request_list[idx].reqtype = reqtype;
 }
 
+/* Can only run in process context */
 int
 lio_process_iq_request_list(struct octeon_device *oct,
-			    struct octeon_instr_queue *iq)
+			    struct octeon_instr_queue *iq, u32 napi_budget)
 {
 	int reqtype;
 	void *buf;
 	u32 old = iq->flush_index;
 	u32 inst_count = 0;
-	unsigned pkts_compl = 0, bytes_compl = 0;
+	unsigned int pkts_compl = 0, bytes_compl = 0;
 	struct octeon_soft_command *sc;
 	struct octeon_instr_irh *irh;
+	unsigned long flags;
 
 	while (old != iq->octeon_read_index) {
 		reqtype = iq->request_list[old].reqtype;
@@ -394,7 +383,7 @@ lio_process_iq_request_list(struct octeon_device *oct,
 		case REQTYPE_SOFT_COMMAND:
 			sc = buf;
 
-			irh = (struct octeon_instr_irh *)&sc->cmd.irh;
+			irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
 			if (irh->rflag) {
 				/* We're expecting a response from Octeon.
 				 * It's up to lio_process_ordered_list() to
@@ -402,17 +391,22 @@ lio_process_iq_request_list(struct octeon_device *oct,
 				 * command response list because we expect
 				 * a response from Octeon.
 				 */
-				spin_lock_bh(&oct->response_list
-					[OCTEON_ORDERED_SC_LIST].lock);
+				spin_lock_irqsave
+					(&oct->response_list
+					 [OCTEON_ORDERED_SC_LIST].lock,
+					 flags);
 				atomic_inc(&oct->response_list
 					[OCTEON_ORDERED_SC_LIST].
 					pending_req_count);
 				list_add_tail(&sc->node, &oct->response_list
 					[OCTEON_ORDERED_SC_LIST].head);
-				spin_unlock_bh(&oct->response_list
-					[OCTEON_ORDERED_SC_LIST].lock);
+				spin_unlock_irqrestore
+					(&oct->response_list
+					 [OCTEON_ORDERED_SC_LIST].lock,
+					 flags);
 			} else {
 				if (sc->callback) {
+					/* This callback must not sleep */
 					sc->callback(oct, OCTEON_REQUEST_DONE,
 						     sc->callback_arg);
 				}
@@ -430,6 +424,9 @@ lio_process_iq_request_list(struct octeon_device *oct,
  skip_this:
 		inst_count++;
 		INCR_INDEX_BY1(old, iq->max_count);
+
+		if ((napi_budget) && (inst_count >= napi_budget))
+			break;
 	}
 	if (bytes_compl)
 		octeon_report_tx_completion_to_bql(iq->app_ctx, pkts_compl,
@@ -439,38 +436,63 @@ lio_process_iq_request_list(struct octeon_device *oct,
 	return inst_count;
 }
 
-static inline void
-update_iq_indices(struct octeon_device *oct, struct octeon_instr_queue *iq)
+/* Can only be called from process context */
+int
+octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
+		u32 pending_thresh, u32 napi_budget)
 {
 	u32 inst_processed = 0;
+	u32 tot_inst_processed = 0;
+	int tx_done = 1;
 
-	/* Calculate how many commands Octeon has read and move the read index
-	 * accordingly.
-	 */
-	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(oct, iq);
+	if (!spin_trylock(&iq->iq_flush_running_lock))
+		return tx_done;
 
-	/* Move the NORESPONSE requests to the per-device completion list. */
-	if (iq->flush_index != iq->octeon_read_index)
-		inst_processed = lio_process_iq_request_list(oct, iq);
+	spin_lock_bh(&iq->lock);
 
-	if (inst_processed) {
-		atomic_sub(inst_processed, &iq->instr_pending);
-		iq->stats.instr_processed += inst_processed;
-	}
-}
+	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
 
-static void
-octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
-		u32 pending_thresh)
-{
 	if (atomic_read(&iq->instr_pending) >= (s32)pending_thresh) {
-		spin_lock_bh(&iq->lock);
-		update_iq_indices(oct, iq);
-		spin_unlock_bh(&iq->lock);
+		do {
+			/* Process any outstanding IQ packets. */
+			if (iq->flush_index == iq->octeon_read_index)
+				break;
+
+			if (napi_budget)
+				inst_processed = lio_process_iq_request_list
+					(oct, iq,
+					 napi_budget - tot_inst_processed);
+			else
+				inst_processed =
+					lio_process_iq_request_list(oct, iq, 0);
+
+			if (inst_processed) {
+				atomic_sub(inst_processed, &iq->instr_pending);
+				iq->stats.instr_processed += inst_processed;
+			}
+
+			tot_inst_processed += inst_processed;
+			inst_processed = 0;
+
+		} while (tot_inst_processed < napi_budget);
+
+		if (napi_budget && (tot_inst_processed >= napi_budget))
+			tx_done = 0;
 	}
+
+	iq->last_db_time = jiffies;
+
+	spin_unlock_bh(&iq->lock);
+
+	spin_unlock(&iq->iq_flush_running_lock);
+
+	return tx_done;
 }
 
-static void __check_db_timeout(struct octeon_device *oct, unsigned long iq_no)
+/* Process instruction queue after timeout.
+ * This routine gets called from a workqueue or when removing the module.
+ */
+static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
 {
 	struct octeon_instr_queue *iq;
 	u64 next_time;
@@ -481,24 +503,17 @@ static void __check_db_timeout(struct octeon_device *oct, unsigned long iq_no)
 	if (!iq)
 		return;
 
+	/* return immediately, if no work pending */
+	if (!atomic_read(&iq->instr_pending))
+		return;
 	/* If jiffies - last_db_time < db_timeout do nothing  */
 	next_time = iq->last_db_time + iq->db_timeout;
 	if (!time_after(jiffies, (unsigned long)next_time))
 		return;
 	iq->last_db_time = jiffies;
 
-	/* Get the lock and prevent tasklets. This routine gets called from
-	 * the poll thread. Instructions can now be posted in tasklet context
-	 */
-	spin_lock_bh(&iq->lock);
-	if (iq->fill_cnt != 0)
-		ring_doorbell(oct, iq);
-
-	spin_unlock_bh(&iq->lock);
-
 	/* Flush the instruction queue */
-	if (iq->do_auto_flush)
-		octeon_flush_iq(oct, iq, 1);
+	octeon_flush_iq(oct, iq, 1, 0);
 }
 
 /* Called by the Poll thread at regular intervals to check the instruction
@@ -508,11 +523,12 @@ static void check_db_timeout(struct work_struct *work)
 {
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
-	unsigned long iq_no = wk->ctxul;
+	u64 iq_no = wk->ctxul;
 	struct cavium_wq *db_wq = &oct->check_db_wq[iq_no];
+	u32 delay = 10;
 
 	__check_db_timeout(oct, iq_no);
-	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1));
+	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay));
 }
 
 int
@@ -523,9 +539,12 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
 	struct iq_post_status st;
 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
 
-	spin_lock_bh(&iq->lock);
+	/* Get the lock and prevent other tasks and tx interrupt handler from
+	 * running.
+	 */
+	spin_lock_bh(&iq->post_lock);
 
-	st = __post_command2(oct, iq, force_db, cmd);
+	st = __post_command2(iq, cmd);
 
 	if (st.status != IQ_SEND_FAILED) {
 		octeon_report_sent_bytes_to_bql(buf, reqtype);
@@ -533,16 +552,19 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
 
-		if (iq->fill_cnt >= iq->fill_threshold || force_db)
+		if (force_db)
 			ring_doorbell(oct, iq);
 	} else {
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
 	}
 
-	spin_unlock_bh(&iq->lock);
+	spin_unlock_bh(&iq->post_lock);
 
-	if (iq->do_auto_flush)
-		octeon_flush_iq(oct, iq, 2);
+	/* This is only done here to expedite packets being flushed
+	 * for cases where there are no IQ completion interrupts.
+	 */
+	/*if (iq->do_auto_flush)*/
+	/*	octeon_flush_iq(oct, iq, 2, 0);*/
 
 	return st.status;
 }
@@ -557,82 +579,75 @@ octeon_prepare_soft_command(struct octeon_device *oct,
 			    u64 ossp1)
 {
 	struct octeon_config *oct_cfg;
-	struct octeon_instr_ih *ih;
+	struct octeon_instr_ih2 *ih2;
 	struct octeon_instr_irh *irh;
 	struct octeon_instr_rdp *rdp;
 
-	BUG_ON(opcode > 15);
-	BUG_ON(subcode > 127);
+	WARN_ON(opcode > 15);
+	WARN_ON(subcode > 127);
 
 	oct_cfg = octeon_get_conf(oct);
 
-	ih          = (struct octeon_instr_ih *)&sc->cmd.ih;
-	ih->tagtype = ATOMIC_TAG;
-	ih->tag     = LIO_CONTROL;
-	ih->raw     = 1;
-	ih->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
+	ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+	ih2->tagtype = ATOMIC_TAG;
+	ih2->tag     = LIO_CONTROL;
+	ih2->raw     = 1;
+	ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
 
 	if (sc->datasize) {
-		ih->dlengsz = sc->datasize;
-		ih->rs = 1;
+		ih2->dlengsz = sc->datasize;
+		ih2->rs = 1;
 	}
 
-	irh            = (struct octeon_instr_irh *)&sc->cmd.irh;
+	irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
 	irh->opcode    = opcode;
 	irh->subcode   = subcode;
 
 	/* opcode/subcode specific parameters (ossp) */
 	irh->ossp       = irh_ossp;
-	sc->cmd.ossp[0] = ossp0;
-	sc->cmd.ossp[1] = ossp1;
+	sc->cmd.cmd2.ossp[0] = ossp0;
+	sc->cmd.cmd2.ossp[1] = ossp1;
 
 	if (sc->rdatasize) {
-		rdp            = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+		rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
 		rdp->pcie_port = oct->pcie_port;
 		rdp->rlen      = sc->rdatasize;
 
 		irh->rflag =  1;
-		irh->len   =  4;
-		ih->fsz    = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
+		ih2->fsz   = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
 	} else {
 		irh->rflag =  0;
-		irh->len   =  2;
-		ih->fsz    = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
+		ih2->fsz   = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
 	}
-
-	while (!(oct->io_qmask.iq & (1 << sc->iq_no)))
-		sc->iq_no++;
 }
 
 int octeon_send_soft_command(struct octeon_device *oct,
 			     struct octeon_soft_command *sc)
 {
-	struct octeon_instr_ih *ih;
+	struct octeon_instr_ih2 *ih2;
 	struct octeon_instr_irh *irh;
-	struct octeon_instr_rdp *rdp;
+	u32 len;
 
-	ih = (struct octeon_instr_ih *)&sc->cmd.ih;
-	if (ih->dlengsz) {
-		BUG_ON(!sc->dmadptr);
-		sc->cmd.dptr = sc->dmadptr;
+	ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+	if (ih2->dlengsz) {
+		WARN_ON(!sc->dmadptr);
+		sc->cmd.cmd2.dptr = sc->dmadptr;
 	}
-
-	irh = (struct octeon_instr_irh *)&sc->cmd.irh;
+	irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
 	if (irh->rflag) {
-		BUG_ON(!sc->dmarptr);
-		BUG_ON(!sc->status_word);
+		WARN_ON(!sc->dmarptr);
+		WARN_ON(!sc->status_word);
 		*sc->status_word = COMPLETION_WORD_INIT;
 
-		rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
-
-		sc->cmd.rptr = sc->dmarptr;
+		sc->cmd.cmd2.rptr = sc->dmarptr;
 	}
+	len = (u32)ih2->dlengsz;
 
 	if (sc->wait_time)
 		sc->timeout = jiffies + sc->wait_time;
 
-	return octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
-				   (u32)ih->dlengsz, REQTYPE_SOFT_COMMAND);
+	return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
+				    len, REQTYPE_SOFT_COMMAND));
 }
 
 int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
@@ -667,7 +682,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct)
 	struct list_head *tmp, *tmp2;
 	struct octeon_soft_command *sc;
 
-	spin_lock(&oct->sc_buf_pool.lock);
+	spin_lock_bh(&oct->sc_buf_pool.lock);
 
 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
 		list_del(tmp);
@@ -679,7 +694,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct)
 
 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
 
-	spin_unlock(&oct->sc_buf_pool.lock);
+	spin_unlock_bh(&oct->sc_buf_pool.lock);
 
 	return 0;
 }
@@ -695,13 +710,13 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 	struct octeon_soft_command *sc = NULL;
 	struct list_head *tmp;
 
-	BUG_ON((offset + datasize + rdatasize + ctxsize) >
+	WARN_ON((offset + datasize + rdatasize + ctxsize) >
 	       SOFT_COMMAND_BUFFER_SIZE);
 
-	spin_lock(&oct->sc_buf_pool.lock);
+	spin_lock_bh(&oct->sc_buf_pool.lock);
 
 	if (list_empty(&oct->sc_buf_pool.head)) {
-		spin_unlock(&oct->sc_buf_pool.lock);
+		spin_unlock_bh(&oct->sc_buf_pool.lock);
 		return NULL;
 	}
 
@@ -712,7 +727,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 
 	atomic_inc(&oct->sc_buf_pool.alloc_buf_count);
 
-	spin_unlock(&oct->sc_buf_pool.lock);
+	spin_unlock_bh(&oct->sc_buf_pool.lock);
 
 	sc = (struct octeon_soft_command *)tmp;
 
@@ -742,7 +757,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 	offset = (offset + datasize + 127) & 0xffffff80;
 
 	if (rdatasize) {
-		BUG_ON(rdatasize < 16);
+		WARN_ON(rdatasize < 16);
 		sc->virtrptr = (u8 *)sc + offset;
 		sc->dmarptr = dma_addr + offset;
 		sc->rdatasize = rdatasize;
@@ -755,11 +770,11 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 void octeon_free_soft_command(struct octeon_device *oct,
 			      struct octeon_soft_command *sc)
 {
-	spin_lock(&oct->sc_buf_pool.lock);
+	spin_lock_bh(&oct->sc_buf_pool.lock);
 
 	list_add_tail(&sc->node, &oct->sc_buf_pool.head);
 
 	atomic_dec(&oct->sc_buf_pool.alloc_buf_count);
 
-	spin_unlock(&oct->sc_buf_pool.lock);
+	spin_unlock_bh(&oct->sc_buf_pool.lock);
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index 091f537a946e..709049e36627 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -19,28 +19,14 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
-#include "cn66xx_regs.h"
-#include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
 
 static void oct_poll_req_completion(struct work_struct *work);
 
@@ -54,8 +40,9 @@ int octeon_setup_response_list(struct octeon_device *oct)
 		spin_lock_init(&oct->response_list[i].lock);
 		atomic_set(&oct->response_list[i].pending_req_count, 0);
 	}
+	spin_lock_init(&oct->cmd_resp_wqlock);
 
-	oct->dma_comp_wq.wq = create_workqueue("dma-comp");
+	oct->dma_comp_wq.wq = alloc_workqueue("dma-comp", WQ_MEM_RECLAIM, 0);
 	if (!oct->dma_comp_wq.wq) {
 		dev_err(&oct->pci_dev->dev, "failed to create wq thread\n");
 		return -ENOMEM;
@@ -64,7 +51,8 @@ int octeon_setup_response_list(struct octeon_device *oct)
 	cwq = &oct->dma_comp_wq;
 	INIT_DELAYED_WORK(&cwq->wk.work, oct_poll_req_completion);
 	cwq->wk.ctxptr = oct;
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(100));
+	oct->cmd_resp_state = OCT_DRV_ONLINE;
+	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
 
 	return ret;
 }
@@ -72,7 +60,6 @@ int octeon_setup_response_list(struct octeon_device *oct)
 void octeon_delete_response_list(struct octeon_device *oct)
 {
 	cancel_delayed_work_sync(&oct->dma_comp_wq.wk.work);
-	flush_workqueue(oct->dma_comp_wq.wq);
 	destroy_workqueue(oct->dma_comp_wq.wq);
 }
 
@@ -86,6 +73,7 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
 	u32 status;
 	u64 status64;
 	struct octeon_instr_rdp *rdp;
+	u64 rptr;
 
 	ordered_sc_list = &octeon_dev->response_list[OCTEON_ORDERED_SC_LIST];
 
@@ -103,7 +91,8 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
 
 		sc = (struct octeon_soft_command *)ordered_sc_list->
 		    head.next;
-		rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+		rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+		rptr = sc->cmd.cmd2.rptr;
 
 		status = OCTEON_REQUEST_PENDING;
 
@@ -111,7 +100,7 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
 		 * to where rptr is pointing to
 		 */
 		dma_sync_single_for_cpu(&octeon_dev->pci_dev->dev,
-					sc->cmd.rptr, rdp->rlen,
+					rptr, rdp->rlen,
 					DMA_FROM_DEVICE);
 		status64 = *sc->status_word;
 
@@ -173,6 +162,5 @@ static void oct_poll_req_completion(struct work_struct *work)
 	struct cavium_wq *cwq = &oct->dma_comp_wq;
 
 	lio_process_ordered_list(oct, 0);
-
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(100));
+	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
 }
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 388cd799d9ed..e8bc15bcde70 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -146,7 +146,6 @@ struct octeon_mgmt {
 	struct device *dev;
 	struct napi_struct napi;
 	struct tasklet_struct tx_clean_tasklet;
-	struct phy_device *phydev;
 	struct device_node *phy_np;
 	resource_size_t mix_phys;
 	resource_size_t mix_size;
@@ -787,14 +786,12 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev,
 static int octeon_mgmt_ioctl(struct net_device *netdev,
 			     struct ifreq *rq, int cmd)
 {
-	struct octeon_mgmt *p = netdev_priv(netdev);
-
 	switch (cmd) {
 	case SIOCSHWTSTAMP:
 		return octeon_mgmt_ioctl_hwtstamp(netdev, rq, cmd);
 	default:
-		if (p->phydev)
-			return phy_mii_ioctl(p->phydev, rq, cmd);
+		if (netdev->phydev)
+			return phy_mii_ioctl(netdev->phydev, rq, cmd);
 		return -EINVAL;
 	}
 }
@@ -836,16 +833,18 @@ static void octeon_mgmt_enable_link(struct octeon_mgmt *p)
 
 static void octeon_mgmt_update_link(struct octeon_mgmt *p)
 {
+	struct net_device *ndev = p->netdev;
+	struct phy_device *phydev = ndev->phydev;
 	union cvmx_agl_gmx_prtx_cfg prtx_cfg;
 
 	prtx_cfg.u64 = cvmx_read_csr(p->agl + AGL_GMX_PRT_CFG);
 
-	if (!p->phydev->link)
+	if (!phydev->link)
 		prtx_cfg.s.duplex = 1;
 	else
-		prtx_cfg.s.duplex = p->phydev->duplex;
+		prtx_cfg.s.duplex = phydev->duplex;
 
-	switch (p->phydev->speed) {
+	switch (phydev->speed) {
 	case 10:
 		prtx_cfg.s.speed = 0;
 		prtx_cfg.s.slottime = 0;
@@ -871,7 +870,7 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p)
 			prtx_cfg.s.speed_msb = 0;
 			/* Only matters for half-duplex */
 			prtx_cfg.s.slottime = 1;
-			prtx_cfg.s.burst = p->phydev->duplex;
+			prtx_cfg.s.burst = phydev->duplex;
 		}
 		break;
 	case 0:  /* No link */
@@ -894,9 +893,9 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p)
 		/* MII (both speeds) and RGMII 1000 speed. */
 		agl_clk.s.clk_cnt = 1;
 		if (prtx_ctl.s.mode == 0) { /* RGMII mode */
-			if (p->phydev->speed == 10)
+			if (phydev->speed == 10)
 				agl_clk.s.clk_cnt = 50;
-			else if (p->phydev->speed == 100)
+			else if (phydev->speed == 100)
 				agl_clk.s.clk_cnt = 5;
 		}
 		cvmx_write_csr(p->agl + AGL_GMX_TX_CLK, agl_clk.u64);
@@ -906,39 +905,40 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p)
 static void octeon_mgmt_adjust_link(struct net_device *netdev)
 {
 	struct octeon_mgmt *p = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
 	unsigned long flags;
 	int link_changed = 0;
 
-	if (!p->phydev)
+	if (!phydev)
 		return;
 
 	spin_lock_irqsave(&p->lock, flags);
 
 
-	if (!p->phydev->link && p->last_link)
+	if (!phydev->link && p->last_link)
 		link_changed = -1;
 
-	if (p->phydev->link
-	    && (p->last_duplex != p->phydev->duplex
-		|| p->last_link != p->phydev->link
-		|| p->last_speed != p->phydev->speed)) {
+	if (phydev->link &&
+	    (p->last_duplex != phydev->duplex ||
+	     p->last_link != phydev->link ||
+	     p->last_speed != phydev->speed)) {
 		octeon_mgmt_disable_link(p);
 		link_changed = 1;
 		octeon_mgmt_update_link(p);
 		octeon_mgmt_enable_link(p);
 	}
 
-	p->last_link = p->phydev->link;
-	p->last_speed = p->phydev->speed;
-	p->last_duplex = p->phydev->duplex;
+	p->last_link = phydev->link;
+	p->last_speed = phydev->speed;
+	p->last_duplex = phydev->duplex;
 
 	spin_unlock_irqrestore(&p->lock, flags);
 
 	if (link_changed != 0) {
 		if (link_changed > 0) {
 			pr_info("%s: Link is up - %d/%s\n", netdev->name,
-				p->phydev->speed,
-				DUPLEX_FULL == p->phydev->duplex ?
+				phydev->speed,
+				phydev->duplex == DUPLEX_FULL ?
 				"Full" : "Half");
 		} else {
 			pr_info("%s: Link is down\n", netdev->name);
@@ -949,6 +949,7 @@ static void octeon_mgmt_adjust_link(struct net_device *netdev)
 static int octeon_mgmt_init_phy(struct net_device *netdev)
 {
 	struct octeon_mgmt *p = netdev_priv(netdev);
+	struct phy_device *phydev = NULL;
 
 	if (octeon_is_simulation() || p->phy_np == NULL) {
 		/* No PHYs in the simulator. */
@@ -956,11 +957,11 @@ static int octeon_mgmt_init_phy(struct net_device *netdev)
 		return 0;
 	}
 
-	p->phydev = of_phy_connect(netdev, p->phy_np,
-				   octeon_mgmt_adjust_link, 0,
-				   PHY_INTERFACE_MODE_MII);
+	phydev = of_phy_connect(netdev, p->phy_np,
+				octeon_mgmt_adjust_link, 0,
+				PHY_INTERFACE_MODE_MII);
 
-	if (!p->phydev)
+	if (!phydev)
 		return -ENODEV;
 
 	return 0;
@@ -1080,9 +1081,9 @@ static int octeon_mgmt_open(struct net_device *netdev)
 	}
 
 	/* Set the mode of the interface, RGMII/MII. */
-	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && p->phydev) {
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && netdev->phydev) {
 		union cvmx_agl_prtx_ctl agl_prtx_ctl;
-		int rgmii_mode = (p->phydev->supported &
+		int rgmii_mode = (netdev->phydev->supported &
 				  (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) != 0;
 
 		agl_prtx_ctl.u64 = cvmx_read_csr(p->agl_prt_ctl);
@@ -1205,7 +1206,7 @@ static int octeon_mgmt_open(struct net_device *netdev)
 
 	/* Configure the port duplex, speed and enables */
 	octeon_mgmt_disable_link(p);
-	if (p->phydev)
+	if (netdev->phydev)
 		octeon_mgmt_update_link(p);
 	octeon_mgmt_enable_link(p);
 
@@ -1214,9 +1215,9 @@ static int octeon_mgmt_open(struct net_device *netdev)
 	/* PHY is not present in simulator. The carrier is enabled
 	 * while initializing the phy for simulator, leave it enabled.
 	 */
-	if (p->phydev) {
+	if (netdev->phydev) {
 		netif_carrier_off(netdev);
-		phy_start_aneg(p->phydev);
+		phy_start_aneg(netdev->phydev);
 	}
 
 	netif_wake_queue(netdev);
@@ -1244,9 +1245,8 @@ static int octeon_mgmt_stop(struct net_device *netdev)
 	napi_disable(&p->napi);
 	netif_stop_queue(netdev);
 
-	if (p->phydev)
-		phy_disconnect(p->phydev);
-	p->phydev = NULL;
+	if (netdev->phydev)
+		phy_disconnect(netdev->phydev);
 
 	netif_carrier_off(netdev);
 
@@ -1346,50 +1346,23 @@ static void octeon_mgmt_get_drvinfo(struct net_device *netdev,
 	strlcpy(info->bus_info, "N/A", sizeof(info->bus_info));
 }
 
-static int octeon_mgmt_get_settings(struct net_device *netdev,
-				    struct ethtool_cmd *cmd)
-{
-	struct octeon_mgmt *p = netdev_priv(netdev);
-
-	if (p->phydev)
-		return phy_ethtool_gset(p->phydev, cmd);
-
-	return -EOPNOTSUPP;
-}
-
-static int octeon_mgmt_set_settings(struct net_device *netdev,
-				    struct ethtool_cmd *cmd)
-{
-	struct octeon_mgmt *p = netdev_priv(netdev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (p->phydev)
-		return phy_ethtool_sset(p->phydev, cmd);
-
-	return -EOPNOTSUPP;
-}
-
 static int octeon_mgmt_nway_reset(struct net_device *dev)
 {
-	struct octeon_mgmt *p = netdev_priv(dev);
-
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (p->phydev)
-		return phy_start_aneg(p->phydev);
+	if (dev->phydev)
+		return phy_start_aneg(dev->phydev);
 
 	return -EOPNOTSUPP;
 }
 
 static const struct ethtool_ops octeon_mgmt_ethtool_ops = {
 	.get_drvinfo = octeon_mgmt_get_drvinfo,
-	.get_settings = octeon_mgmt_get_settings,
-	.set_settings = octeon_mgmt_set_settings,
 	.nway_reset = octeon_mgmt_nway_reset,
 	.get_link = ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops octeon_mgmt_ops = {
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 95f17f8cadac..16ed20357c5c 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -499,6 +499,7 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 	u32 rr_quantum;
 	u8 sq_idx = sq->sq_num;
 	u8 pqs_vnic;
+	int svf;
 
 	if (sq->sqs_mode)
 		pqs_vnic = nic->pqs_vf[vnic];
@@ -511,10 +512,19 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 	/* 24 bytes for FCS, IPG and preamble */
 	rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
 
-	tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+	if (!sq->sqs_mode) {
+		tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+	} else {
+		for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
+			if (nic->vf_sqs[pqs_vnic][svf] == vnic)
+				break;
+		}
+		tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC);
+		tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF);
+		tl4 += (svf * NIC_TL4_PER_LMAC);
+		tl4 += (bgx * NIC_TL4_PER_BGX);
+	}
 	tl4 += sq_idx;
-	if (sq->sqs_mode)
-		tl4 += vnic * 8;
 
 	tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
 	nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 3ed21988626b..63a39ac97d53 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -551,7 +551,9 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 	}
 
 	/* Clear rcvflt bit (latching high) and read it back */
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
+	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT)
+		bgx_reg_modify(bgx, lmacid,
+			       BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
 	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
 		dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
 		if (bgx->use_training) {
@@ -570,13 +572,6 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 		return -1;
 	}
 
-	/* Wait for MAC RX to be ready */
-	if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL,
-			 SMU_RX_CTL_STATUS, true)) {
-		dev_err(&bgx->pdev->dev, "SMU RX link not okay\n");
-		return -1;
-	}
-
 	/* Wait for BGX RX to be idle */
 	if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) {
 		dev_err(&bgx->pdev->dev, "SMU RX not idle\n");
@@ -589,29 +584,30 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 		return -1;
 	}
 
-	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
-		dev_err(&bgx->pdev->dev, "Receive fault\n");
-		return -1;
-	}
-
-	/* Receive link is latching low. Force it high and verify it */
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK);
-	if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1,
-			 SPU_STATUS1_RCV_LNK, false)) {
-		dev_err(&bgx->pdev->dev, "SPU receive link down\n");
-		return -1;
-	}
-
+	/* Clear receive packet disable */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
 	cfg &= ~SPU_MISC_CTL_RX_DIS;
 	bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
-	return 0;
+
+	/* Check for MAC RX faults */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL);
+	/* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */
+	cfg &= SMU_RX_CTL_STATUS;
+	if (!cfg)
+		return 0;
+
+	/* Rx local/remote fault seen.
+	 * Do lmac reinit to see if condition recovers
+	 */
+	bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type);
+
+	return -1;
 }
 
 static void bgx_poll_for_link(struct work_struct *work)
 {
 	struct lmac *lmac;
-	u64 link;
+	u64 spu_link, smu_link;
 
 	lmac = container_of(work, struct lmac, dwork.work);
 
@@ -621,8 +617,11 @@ static void bgx_poll_for_link(struct work_struct *work)
 	bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1,
 		     SPU_STATUS1_RCV_LNK, false);
 
-	link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
-	if (link & SPU_STATUS1_RCV_LNK) {
+	spu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
+	smu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SMUX_RX_CTL);
+
+	if ((spu_link & SPU_STATUS1_RCV_LNK) &&
+	    !(smu_link & SMU_RX_CTL_STATUS)) {
 		lmac->link_up = 1;
 		if (lmac->bgx->lmac_type == BGX_MODE_XLAUI)
 			lmac->last_speed = 40000;
@@ -636,9 +635,15 @@ static void bgx_poll_for_link(struct work_struct *work)
 	}
 
 	if (lmac->last_link != lmac->link_up) {
+		if (lmac->link_up) {
+			if (bgx_xaui_check_link(lmac)) {
+				/* Errors, clear link_up state */
+				lmac->link_up = 0;
+				lmac->last_speed = SPEED_UNKNOWN;
+				lmac->last_duplex = DUPLEX_UNKNOWN;
+			}
+		}
 		lmac->last_link = lmac->link_up;
-		if (lmac->link_up)
-			bgx_xaui_check_link(lmac);
 	}
 
 	queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
@@ -710,7 +715,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 {
 	struct lmac *lmac;
-	u64 cmrx_cfg;
+	u64 cfg;
 
 	lmac = &bgx->lmac[lmacid];
 	if (lmac->check_link) {
@@ -719,9 +724,33 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 		destroy_workqueue(lmac->check_link);
 	}
 
-	cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
-	cmrx_cfg &= ~(1 << 15);
-	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg);
+	/* Disable packet reception */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_PKT_RX_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	/* Give chance for Rx/Tx FIFO to get drained */
+	bgx_poll_reg(bgx, lmacid, BGX_CMRX_RX_FIFO_LEN, (u64)0x1FFF, true);
+	bgx_poll_reg(bgx, lmacid, BGX_CMRX_TX_FIFO_LEN, (u64)0x3FFF, true);
+
+	/* Disable packet transmission */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_PKT_TX_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	/* Disable serdes lanes */
+        if (!lmac->is_sgmii)
+                bgx_reg_modify(bgx, lmacid,
+                               BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
+        else
+                bgx_reg_modify(bgx, lmacid,
+                               BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_PWR_DN);
+
+	/* Disable LMAC */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
 	bgx_flush_dmac_addrs(bgx, lmacid);
 
 	if ((bgx->lmac_type != BGX_MODE_XFI) &&
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 149e179363a1..42010d2e5ddf 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -41,6 +41,7 @@
 #define BGX_CMRX_RX_STAT10		0xC0
 #define BGX_CMRX_RX_BP_DROP		0xC8
 #define BGX_CMRX_RX_DMAC_CTL		0x0E8
+#define BGX_CMRX_RX_FIFO_LEN		0x108
 #define BGX_CMR_RX_DMACX_CAM		0x200
 #define  RX_DMACX_CAM_EN			BIT_ULL(48)
 #define  RX_DMACX_CAM_LMACID(x)			(x << 49)
@@ -50,6 +51,7 @@
 #define BGX_CMR_CHAN_MSK_AND		0x450
 #define BGX_CMR_BIST_STATUS		0x460
 #define BGX_CMR_RX_LMACS		0x468
+#define BGX_CMRX_TX_FIFO_LEN		0x518
 #define BGX_CMRX_TX_STAT0		0x600
 #define BGX_CMRX_TX_STAT1		0x608
 #define BGX_CMRX_TX_STAT2		0x610
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 4686a85a8a22..5713e83be08c 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -96,17 +96,6 @@ config CHELSIO_T4_DCB
 
 	  If unsure, say N.
 
-config CHELSIO_T4_UWIRE
-	bool "Unified Wire Support for Chelsio T5 cards"
-	default n
-	depends on CHELSIO_T4
-	---help---
-	  Enable unified-wire offload features.
-	  Say Y here if you want to enable unified-wire over Ethernet
-	  in the driver.
-
-	  If unsure, say N.
-
 config CHELSIO_T4_FCOE
 	bool "Fibre Channel over Ethernet (FCoE) Support for Chelsio T5 cards"
 	default n
@@ -137,4 +126,9 @@ config CHELSIO_T4VF
 	  To compile this driver as a module choose M here; the module
 	  will be called cxgb4vf.
 
+config CHELSIO_LIB
+	tristate
+	---help---
+	Common library for Chelsio drivers.
+
 endif # NET_VENDOR_CHELSIO
diff --git a/drivers/net/ethernet/chelsio/Makefile b/drivers/net/ethernet/chelsio/Makefile
index 390510b5e90f..b6a5eec6ed8e 100644
--- a/drivers/net/ethernet/chelsio/Makefile
+++ b/drivers/net/ethernet/chelsio/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_CHELSIO_T1) += cxgb/
 obj-$(CONFIG_CHELSIO_T3) += cxgb3/
 obj-$(CONFIG_CHELSIO_T4) += cxgb4/
 obj-$(CONFIG_CHELSIO_T4VF) += cxgb4vf/
+obj-$(CONFIG_CHELSIO_LIB) += libcxgb/
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 85c92821b239..ace0ab98d0f1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -7,5 +7,4 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
-cxgb4-$(CONFIG_CHELSIO_T4_UWIRE) +=  cxgb4_ppm.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index b4fceb92479f..2e2aa9fec9bb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -418,6 +418,7 @@ struct trace_params {
 struct link_config {
 	unsigned short supported;        /* link capabilities */
 	unsigned short advertising;      /* advertised capabilities */
+	unsigned short lp_advertising;   /* peer advertised capabilities */
 	unsigned short requested_speed;  /* speed user has requested */
 	unsigned short speed;            /* actual link speed */
 	unsigned char  requested_fc;     /* flow control user has requested */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 7a0b92b2f73c..02f80febeb91 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -480,178 +480,293 @@ static int identify_port(struct net_device *dev,
 	return t4_identify_port(adap, adap->pf, netdev2pinfo(dev)->viid, val);
 }
 
-static unsigned int from_fw_linkcaps(enum fw_port_type type, unsigned int caps)
+/**
+ *	from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
+ *	@port_type: Firmware Port Type
+ *	@mod_type: Firmware Module Type
+ *
+ *	Translate Firmware Port/Module type to Ethtool Port Type.
+ */
+static int from_fw_port_mod_type(enum fw_port_type port_type,
+				 enum fw_port_module_type mod_type)
 {
-	unsigned int v = 0;
-
-	if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
-	    type == FW_PORT_TYPE_BT_XAUI) {
-		v |= SUPPORTED_TP;
-		if (caps & FW_PORT_CAP_SPEED_100M)
-			v |= SUPPORTED_100baseT_Full;
-		if (caps & FW_PORT_CAP_SPEED_1G)
-			v |= SUPPORTED_1000baseT_Full;
-		if (caps & FW_PORT_CAP_SPEED_10G)
-			v |= SUPPORTED_10000baseT_Full;
-	} else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
-		v |= SUPPORTED_Backplane;
-		if (caps & FW_PORT_CAP_SPEED_1G)
-			v |= SUPPORTED_1000baseKX_Full;
-		if (caps & FW_PORT_CAP_SPEED_10G)
-			v |= SUPPORTED_10000baseKX4_Full;
-	} else if (type == FW_PORT_TYPE_KR) {
-		v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
-	} else if (type == FW_PORT_TYPE_BP_AP) {
-		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
-		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
-	} else if (type == FW_PORT_TYPE_BP4_AP) {
-		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
-		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
-		     SUPPORTED_10000baseKX4_Full;
-	} else if (type == FW_PORT_TYPE_FIBER_XFI ||
-		   type == FW_PORT_TYPE_FIBER_XAUI ||
-		   type == FW_PORT_TYPE_SFP ||
-		   type == FW_PORT_TYPE_QSFP_10G ||
-		   type == FW_PORT_TYPE_QSA) {
-		v |= SUPPORTED_FIBRE;
-		if (caps & FW_PORT_CAP_SPEED_1G)
-			v |= SUPPORTED_1000baseT_Full;
-		if (caps & FW_PORT_CAP_SPEED_10G)
-			v |= SUPPORTED_10000baseT_Full;
-	} else if (type == FW_PORT_TYPE_BP40_BA ||
-		   type == FW_PORT_TYPE_QSFP) {
-		v |= SUPPORTED_40000baseSR4_Full;
-		v |= SUPPORTED_FIBRE;
+	if (port_type == FW_PORT_TYPE_BT_SGMII ||
+	    port_type == FW_PORT_TYPE_BT_XFI ||
+	    port_type == FW_PORT_TYPE_BT_XAUI) {
+		return PORT_TP;
+	} else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
+		   port_type == FW_PORT_TYPE_FIBER_XAUI) {
+		return PORT_FIBRE;
+	} else if (port_type == FW_PORT_TYPE_SFP ||
+		   port_type == FW_PORT_TYPE_QSFP_10G ||
+		   port_type == FW_PORT_TYPE_QSA ||
+		   port_type == FW_PORT_TYPE_QSFP) {
+		if (mod_type == FW_PORT_MOD_TYPE_LR ||
+		    mod_type == FW_PORT_MOD_TYPE_SR ||
+		    mod_type == FW_PORT_MOD_TYPE_ER ||
+		    mod_type == FW_PORT_MOD_TYPE_LRM)
+			return PORT_FIBRE;
+		else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
+			 mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
+			return PORT_DA;
+		else
+			return PORT_OTHER;
 	}
 
-	if (caps & FW_PORT_CAP_ANEG)
-		v |= SUPPORTED_Autoneg;
-	return v;
+	return PORT_OTHER;
 }
 
-static unsigned int to_fw_linkcaps(unsigned int caps)
+/**
+ *	speed_to_fw_caps - translate Port Speed to Firmware Port Capabilities
+ *	@speed: speed in Kb/s
+ *
+ *	Translates a specific Port Speed into a Firmware Port Capabilities
+ *	value.
+ */
+static unsigned int speed_to_fw_caps(int speed)
 {
-	unsigned int v = 0;
-
-	if (caps & ADVERTISED_100baseT_Full)
-		v |= FW_PORT_CAP_SPEED_100M;
-	if (caps & ADVERTISED_1000baseT_Full)
-		v |= FW_PORT_CAP_SPEED_1G;
-	if (caps & ADVERTISED_10000baseT_Full)
-		v |= FW_PORT_CAP_SPEED_10G;
-	if (caps & ADVERTISED_40000baseSR4_Full)
-		v |= FW_PORT_CAP_SPEED_40G;
-	return v;
+	if (speed == 100)
+		return FW_PORT_CAP_SPEED_100M;
+	if (speed == 1000)
+		return FW_PORT_CAP_SPEED_1G;
+	if (speed == 10000)
+		return FW_PORT_CAP_SPEED_10G;
+	if (speed == 25000)
+		return FW_PORT_CAP_SPEED_25G;
+	if (speed == 40000)
+		return FW_PORT_CAP_SPEED_40G;
+	if (speed == 100000)
+		return FW_PORT_CAP_SPEED_100G;
+	return 0;
 }
 
-static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+/**
+ *	fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
+ *	@port_type: Firmware Port Type
+ *	@fw_caps: Firmware Port Capabilities
+ *	@link_mode_mask: ethtool Link Mode Mask
+ *
+ *	Translate a Firmware Port Capabilities specification to an ethtool
+ *	Link Mode Mask.
+ */
+static void fw_caps_to_lmm(enum fw_port_type port_type,
+			   unsigned int fw_caps,
+			   unsigned long *link_mode_mask)
 {
-	const struct port_info *p = netdev_priv(dev);
-
-	if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
-	    p->port_type == FW_PORT_TYPE_BT_XFI ||
-	    p->port_type == FW_PORT_TYPE_BT_XAUI) {
-		cmd->port = PORT_TP;
-	} else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
-		   p->port_type == FW_PORT_TYPE_FIBER_XAUI) {
-		cmd->port = PORT_FIBRE;
-	} else if (p->port_type == FW_PORT_TYPE_SFP ||
-		   p->port_type == FW_PORT_TYPE_QSFP_10G ||
-		   p->port_type == FW_PORT_TYPE_QSA ||
-		   p->port_type == FW_PORT_TYPE_QSFP) {
-		if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
-		    p->mod_type == FW_PORT_MOD_TYPE_SR ||
-		    p->mod_type == FW_PORT_MOD_TYPE_ER ||
-		    p->mod_type == FW_PORT_MOD_TYPE_LRM)
-			cmd->port = PORT_FIBRE;
-		else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
-			 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
-			cmd->port = PORT_DA;
-		else
-			cmd->port = PORT_OTHER;
+	#define SET_LMM(__lmm_name) __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name \
+					## _BIT, link_mode_mask)
+
+	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
+		do { \
+			if (fw_caps & FW_PORT_CAP_ ## __fw_name) \
+				SET_LMM(__lmm_name); \
+		} while (0)
+
+	switch (port_type) {
+	case FW_PORT_TYPE_BT_SGMII:
+	case FW_PORT_TYPE_BT_XFI:
+	case FW_PORT_TYPE_BT_XAUI:
+		SET_LMM(TP);
+		FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		break;
+
+	case FW_PORT_TYPE_KX4:
+	case FW_PORT_TYPE_KX:
+		SET_LMM(Backplane);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
+		break;
+
+	case FW_PORT_TYPE_KR:
+		SET_LMM(Backplane);
+		SET_LMM(10000baseKR_Full);
+		break;
+
+	case FW_PORT_TYPE_BP_AP:
+		SET_LMM(Backplane);
+		SET_LMM(10000baseR_FEC);
+		SET_LMM(10000baseKR_Full);
+		SET_LMM(1000baseKX_Full);
+		break;
+
+	case FW_PORT_TYPE_BP4_AP:
+		SET_LMM(Backplane);
+		SET_LMM(10000baseR_FEC);
+		SET_LMM(10000baseKR_Full);
+		SET_LMM(1000baseKX_Full);
+		SET_LMM(10000baseKX4_Full);
+		break;
+
+	case FW_PORT_TYPE_FIBER_XFI:
+	case FW_PORT_TYPE_FIBER_XAUI:
+	case FW_PORT_TYPE_SFP:
+	case FW_PORT_TYPE_QSFP_10G:
+	case FW_PORT_TYPE_QSA:
+		SET_LMM(FIBRE);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		break;
+
+	case FW_PORT_TYPE_BP40_BA:
+	case FW_PORT_TYPE_QSFP:
+		SET_LMM(FIBRE);
+		SET_LMM(40000baseSR4_Full);
+		break;
+
+	case FW_PORT_TYPE_CR_QSFP:
+	case FW_PORT_TYPE_SFP28:
+		SET_LMM(FIBRE);
+		SET_LMM(25000baseCR_Full);
+		break;
+
+	case FW_PORT_TYPE_KR4_100G:
+	case FW_PORT_TYPE_CR4_QSFP:
+		SET_LMM(FIBRE);
+		SET_LMM(100000baseCR4_Full);
+		break;
+
+	default:
+		break;
+	}
+
+	FW_CAPS_TO_LMM(ANEG, Autoneg);
+	FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
+	FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
+
+	#undef FW_CAPS_TO_LMM
+	#undef SET_LMM
+}
+
+/**
+ *	lmm_to_fw_caps - translate ethtool Link Mode Mask to Firmware
+ *	capabilities
+ *
+ *	@link_mode_mask: ethtool Link Mode Mask
+ *
+ *	Translate ethtool Link Mode Mask into a Firmware Port capabilities
+ *	value.
+ */
+static unsigned int lmm_to_fw_caps(const unsigned long *link_mode_mask)
+{
+	unsigned int fw_caps = 0;
+
+	#define LMM_TO_FW_CAPS(__lmm_name, __fw_name) \
+		do { \
+			if (test_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
+				     link_mode_mask)) \
+				fw_caps |= FW_PORT_CAP_ ## __fw_name; \
+		} while (0)
+
+	LMM_TO_FW_CAPS(100baseT_Full, SPEED_100M);
+	LMM_TO_FW_CAPS(1000baseT_Full, SPEED_1G);
+	LMM_TO_FW_CAPS(10000baseT_Full, SPEED_10G);
+	LMM_TO_FW_CAPS(40000baseSR4_Full, SPEED_40G);
+	LMM_TO_FW_CAPS(25000baseCR_Full, SPEED_25G);
+	LMM_TO_FW_CAPS(100000baseCR4_Full, SPEED_100G);
+
+	#undef LMM_TO_FW_CAPS
+
+	return fw_caps;
+}
+
+static int get_link_ksettings(struct net_device *dev,
+			      struct ethtool_link_ksettings *link_ksettings)
+{
+	const struct port_info *pi = netdev_priv(dev);
+	struct ethtool_link_settings *base = &link_ksettings->base;
+
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
+
+	base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
+
+	if (pi->mdio_addr >= 0) {
+		base->phy_address = pi->mdio_addr;
+		base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
+				      ? ETH_MDIO_SUPPORTS_C22
+				      : ETH_MDIO_SUPPORTS_C45);
 	} else {
-		cmd->port = PORT_OTHER;
+		base->phy_address = 255;
+		base->mdio_support = 0;
 	}
 
-	if (p->mdio_addr >= 0) {
-		cmd->phy_address = p->mdio_addr;
-		cmd->transceiver = XCVR_EXTERNAL;
-		cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
-			MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.supported,
+		       link_ksettings->link_modes.supported);
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.advertising,
+		       link_ksettings->link_modes.advertising);
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lp_advertising,
+		       link_ksettings->link_modes.lp_advertising);
+
+	if (netif_carrier_ok(dev)) {
+		base->speed = pi->link_cfg.speed;
+		base->duplex = DUPLEX_FULL;
 	} else {
-		cmd->phy_address = 0;  /* not really, but no better option */
-		cmd->transceiver = XCVR_INTERNAL;
-		cmd->mdio_support = 0;
+		base->speed = SPEED_UNKNOWN;
+		base->duplex = DUPLEX_UNKNOWN;
 	}
 
-	cmd->supported = from_fw_linkcaps(p->port_type, p->link_cfg.supported);
-	cmd->advertising = from_fw_linkcaps(p->port_type,
-					    p->link_cfg.advertising);
-	ethtool_cmd_speed_set(cmd,
-			      netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
-	cmd->duplex = DUPLEX_FULL;
-	cmd->autoneg = p->link_cfg.autoneg;
-	cmd->maxtxpkt = 0;
-	cmd->maxrxpkt = 0;
-	return 0;
-}
+	base->autoneg = pi->link_cfg.autoneg;
+	if (pi->link_cfg.supported & FW_PORT_CAP_ANEG)
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     supported, Autoneg);
+	if (pi->link_cfg.autoneg)
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     advertising, Autoneg);
 
-static unsigned int speed_to_caps(int speed)
-{
-	if (speed == 100)
-		return FW_PORT_CAP_SPEED_100M;
-	if (speed == 1000)
-		return FW_PORT_CAP_SPEED_1G;
-	if (speed == 10000)
-		return FW_PORT_CAP_SPEED_10G;
-	if (speed == 40000)
-		return FW_PORT_CAP_SPEED_40G;
 	return 0;
 }
 
-static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int set_link_ksettings(struct net_device *dev,
+			      const struct ethtool_link_ksettings
+						*link_ksettings)
 {
-	unsigned int cap;
-	struct port_info *p = netdev_priv(dev);
-	struct link_config *lc = &p->link_cfg;
-	u32 speed = ethtool_cmd_speed(cmd);
+	struct port_info *pi = netdev_priv(dev);
+	struct link_config *lc = &pi->link_cfg;
+	const struct ethtool_link_settings *base = &link_ksettings->base;
 	struct link_config old_lc;
-	int ret;
+	unsigned int fw_caps;
+	int ret = 0;
 
-	if (cmd->duplex != DUPLEX_FULL)     /* only full-duplex supported */
+	/* only full-duplex supported */
+	if (base->duplex != DUPLEX_FULL)
 		return -EINVAL;
 
 	if (!(lc->supported & FW_PORT_CAP_ANEG)) {
 		/* PHY offers a single speed.  See if that's what's
 		 * being requested.
 		 */
-		if (cmd->autoneg == AUTONEG_DISABLE &&
-		    (lc->supported & speed_to_caps(speed)))
+		if (base->autoneg == AUTONEG_DISABLE &&
+		    (lc->supported & speed_to_fw_caps(base->speed)))
 			return 0;
 		return -EINVAL;
 	}
 
 	old_lc = *lc;
-	if (cmd->autoneg == AUTONEG_DISABLE) {
-		cap = speed_to_caps(speed);
+	if (base->autoneg == AUTONEG_DISABLE) {
+		fw_caps = speed_to_fw_caps(base->speed);
 
-		if (!(lc->supported & cap))
+		if (!(lc->supported & fw_caps))
 			return -EINVAL;
-		lc->requested_speed = cap;
+		lc->requested_speed = fw_caps;
 		lc->advertising = 0;
 	} else {
-		cap = to_fw_linkcaps(cmd->advertising);
-		if (!(lc->supported & cap))
+		fw_caps =
+			lmm_to_fw_caps(link_ksettings->link_modes.advertising);
+
+		if (!(lc->supported & fw_caps))
 			return -EINVAL;
 		lc->requested_speed = 0;
-		lc->advertising = cap | FW_PORT_CAP_ANEG;
+		lc->advertising = fw_caps | FW_PORT_CAP_ANEG;
 	}
-	lc->autoneg = cmd->autoneg;
+	lc->autoneg = base->autoneg;
 
 	/* If the firmware rejects the Link Configuration request, back out
 	 * the changes and report the error.
 	 */
-	ret = t4_link_l1cfg(p->adapter, p->adapter->mbox, p->tx_chan, lc);
+	ret = t4_link_l1cfg(pi->adapter, pi->adapter->mbox, pi->tx_chan, lc);
 	if (ret)
 		*lc = old_lc;
 
@@ -1093,8 +1208,8 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 }
 
 static const struct ethtool_ops cxgb_ethtool_ops = {
-	.get_settings      = get_settings,
-	.set_settings      = set_settings,
+	.get_link_ksettings = get_link_ksettings,
+	.set_link_ksettings = set_link_ksettings,
 	.get_drvinfo       = get_drvinfo,
 	.get_msglevel      = get_msglevel,
 	.set_msglevel      = set_msglevel,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 477db477b133..c45de49dc963 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -64,6 +64,7 @@
 #include <net/bonding.h>
 #include <net/addrconf.h>
 #include <asm/uaccess.h>
+#include <linux/crash_dump.h>
 
 #include "cxgb4.h"
 #include "t4_regs.h"
@@ -206,7 +207,7 @@ static int rx_dma_offset = 2;
 static unsigned int num_vf[NUM_OF_PF_WITH_SRIOV];
 
 module_param_array(num_vf, uint, NULL, 0644);
-MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
+MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3, deprecated parameter - please use the pci sysfs interface.");
 #endif
 
 /* TX Queue select used to determine what algorithm to use for selecting TX
@@ -460,11 +461,8 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
 
-	if (!(dev->flags & IFF_PROMISC)) {
-		__dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
-		if (!(dev->flags & IFF_ALLMULTI))
-			__dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
-	}
+	__dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
+	__dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
 
 	return t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu,
 			     (dev->flags & IFF_PROMISC) ? 1 : 0,
@@ -3735,7 +3733,8 @@ static int adap_init0(struct adapter *adap)
 		return ret;
 
 	/* Contact FW, advertising Master capability */
-	ret = t4_fw_hello(adap, adap->mbox, adap->mbox, MASTER_MAY, &state);
+	ret = t4_fw_hello(adap, adap->mbox, adap->mbox,
+			  is_kdump_kernel() ? MASTER_MUST : MASTER_MAY, &state);
 	if (ret < 0) {
 		dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
 			ret);
@@ -4366,6 +4365,11 @@ static void cfg_queues(struct adapter *adap)
 	if (q10g > netif_get_num_default_rss_queues())
 		q10g = netif_get_num_default_rss_queues();
 
+	/* Reduce memory usage in kdump environment, disable all offload.
+	 */
+	if (is_kdump_kernel())
+		adap->params.offload = 0;
+
 	for_each_port(adap, i) {
 		struct port_info *pi = adap2pinfo(adap, i);
 
@@ -4829,6 +4833,60 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
 	return -EINVAL;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	int err = 0;
+	int current_vfs = pci_num_vf(pdev);
+	u32 pcie_fw;
+	void __iomem *regs;
+
+	regs = pci_ioremap_bar(pdev, 0);
+	if (!regs) {
+		dev_err(&pdev->dev, "cannot map device registers\n");
+		return -ENOMEM;
+	}
+
+	pcie_fw = readl(regs + PCIE_FW_A);
+	iounmap(regs);
+	/* Check if cxgb4 is the MASTER and fw is initialized */
+	if (!(pcie_fw & PCIE_FW_INIT_F) ||
+	    !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
+	    PCIE_FW_MASTER_G(pcie_fw) != 4) {
+		dev_warn(&pdev->dev,
+			 "cxgb4 driver needs to be MASTER to support SRIOV\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* If any of the VF's is already assigned to Guest OS, then
+	 * SRIOV for the same cannot be modified
+	 */
+	if (current_vfs && pci_vfs_assigned(pdev)) {
+		dev_err(&pdev->dev,
+			"Cannot modify SR-IOV while VFs are assigned\n");
+		num_vfs = current_vfs;
+		return num_vfs;
+	}
+
+	/* Disable SRIOV when zero is passed.
+	 * One needs to disable SRIOV before modifying it, else
+	 * stack throws the below warning:
+	 * " 'n' VFs already enabled. Disable before enabling 'm' VFs."
+	 */
+	if (!num_vfs) {
+		pci_disable_sriov(pdev);
+		return num_vfs;
+	}
+
+	if (num_vfs != current_vfs) {
+		err = pci_enable_sriov(pdev, num_vfs);
+		if (err)
+			return err;
+	}
+	return num_vfs;
+}
+#endif
+
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int func, i, err, s_qpp, qpp, num_seg;
@@ -5162,11 +5220,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 sriov:
 #ifdef CONFIG_PCI_IOV
-	if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
+	if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0) {
+		dev_warn(&pdev->dev,
+			 "Enabling SR-IOV VFs using the num_vf module "
+			 "parameter is deprecated - please use the pci sysfs "
+			 "interface instead.\n");
 		if (pci_enable_sriov(pdev, num_vf[func]) == 0)
 			dev_info(&pdev->dev,
 				 "instantiated %u virtual functions\n",
 				 num_vf[func]);
+	}
 #endif
 	return 0;
 
@@ -5259,6 +5322,9 @@ static struct pci_driver cxgb4_driver = {
 	.probe    = init_one,
 	.remove   = remove_one,
 	.shutdown = remove_one,
+#ifdef CONFIG_PCI_IOV
+	.sriov_configure = cxgb4_iov_configure,
+#endif
 	.err_handler = &cxgb4_eeh,
 };
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index a63addb4e72c..dc92c80a75f4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -7219,6 +7219,7 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 		lc->speed = speed;
 		lc->fc = fc;
 		lc->supported = be16_to_cpu(p->u.info.pcap);
+		lc->lp_advertising = be16_to_cpu(p->u.info.lpacap);
 		t4_os_link_changed(adap, pi->port_id, link_ok);
 	}
 }
@@ -7284,6 +7285,7 @@ static void get_pci_mode(struct adapter *adapter, struct pci_params *p)
 static void init_link_config(struct link_config *lc, unsigned int caps)
 {
 	lc->supported = caps;
+	lc->lp_advertising = 0;
 	lc->requested_speed = 0;
 	lc->speed = 0;
 	lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 392d6644fdd8..a89b30720e38 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -2249,20 +2249,20 @@ struct fw_acl_vlan_cmd {
 enum fw_port_cap {
 	FW_PORT_CAP_SPEED_100M		= 0x0001,
 	FW_PORT_CAP_SPEED_1G		= 0x0002,
-	FW_PORT_CAP_SPEED_2_5G		= 0x0004,
+	FW_PORT_CAP_SPEED_25G		= 0x0004,
 	FW_PORT_CAP_SPEED_10G		= 0x0008,
 	FW_PORT_CAP_SPEED_40G		= 0x0010,
 	FW_PORT_CAP_SPEED_100G		= 0x0020,
 	FW_PORT_CAP_FC_RX		= 0x0040,
 	FW_PORT_CAP_FC_TX		= 0x0080,
 	FW_PORT_CAP_ANEG		= 0x0100,
-	FW_PORT_CAP_MDI_0		= 0x0200,
-	FW_PORT_CAP_MDI_1		= 0x0400,
-	FW_PORT_CAP_BEAN		= 0x0800,
-	FW_PORT_CAP_PMA_LPBK		= 0x1000,
-	FW_PORT_CAP_PCS_LPBK		= 0x2000,
-	FW_PORT_CAP_PHYXS_LPBK		= 0x4000,
-	FW_PORT_CAP_FAR_END_LPBK	= 0x8000,
+	FW_PORT_CAP_MDIX		= 0x0200,
+	FW_PORT_CAP_MDIAUTO		= 0x0400,
+	FW_PORT_CAP_FEC			= 0x0800,
+	FW_PORT_CAP_TECHKR		= 0x1000,
+	FW_PORT_CAP_TECHKX4		= 0x2000,
+	FW_PORT_CAP_802_3_PAUSE		= 0x4000,
+	FW_PORT_CAP_802_3_ASM_DIR	= 0x8000,
 };
 
 enum fw_port_mdi {
@@ -2376,7 +2376,8 @@ struct fw_port_cmd {
 			__u8   cbllen;
 			__u8   auxlinfo;
 			__u8   dcbxdis_pkd;
-			__u8   r8_lo[3];
+			__u8   r8_lo;
+			__be16 lpacap;
 			__be64 r9;
 		} info;
 		struct fw_port_diags {
@@ -2555,6 +2556,11 @@ enum fw_port_type {
 	FW_PORT_TYPE_QSA,
 	FW_PORT_TYPE_QSFP,
 	FW_PORT_TYPE_BP40_BA,
+	FW_PORT_TYPE_KR4_100G,
+	FW_PORT_TYPE_CR4_QSFP,
+	FW_PORT_TYPE_CR_QSFP,
+	FW_PORT_TYPE_CR2_QSFP,
+	FW_PORT_TYPE_SFP28,
 
 	FW_PORT_TYPE_NONE = FW_PORT_CMD_PTYPE_M
 };
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index c4b262ca7d43..2accab386323 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x0E
-#define T4FW_VERSION_MICRO 0x04
+#define T4FW_VERSION_MINOR 0x0F
+#define T4FW_VERSION_MICRO 0x25
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x0E
-#define T5FW_VERSION_MICRO 0x04
+#define T5FW_VERSION_MINOR 0x0F
+#define T5FW_VERSION_MICRO 0x25
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x0E
-#define T6FW_VERSION_MICRO 0x04
+#define T6FW_VERSION_MINOR 0x0F
+#define T6FW_VERSION_MICRO 0x25
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 734dd776c22f..109bc630408b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -353,6 +353,10 @@ struct hash_mac_addr {
 	u8 addr[ETH_ALEN];
 };
 
+struct mbox_list {
+	struct list_head list;
+};
+
 /*
  * Per-"adapter" (Virtual Function) information.
  */
@@ -387,6 +391,10 @@ struct adapter {
 	/* various locks */
 	spinlock_t stats_lock;
 
+	/* lock for mailbox cmd list */
+	spinlock_t mbox_lock;
+	struct mbox_list mlist;
+
 	/* support for mailbox command/reply logging */
 #define T4VF_OS_LOG_MBOX_CMDS 256
 	struct mbox_cmd_log *mbox_log;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 04fc6f6d1e25..e116bb8d1729 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -937,12 +937,8 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 {
 	struct port_info *pi = netdev_priv(dev);
 
-	if (!(dev->flags & IFF_PROMISC)) {
-		__dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
-		if (!(dev->flags & IFF_ALLMULTI))
-			__dev_mc_sync(dev, cxgb4vf_mac_sync,
-				      cxgb4vf_mac_unsync);
-	}
+	__dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
+	__dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 	return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
 			       (dev->flags & IFF_PROMISC) != 0,
 			       (dev->flags & IFF_ALLMULTI) != 0,
@@ -1205,105 +1201,187 @@ static void cxgb4vf_poll_controller(struct net_device *dev)
  * state of the port to which we're linked.
  */
 
-static unsigned int t4vf_from_fw_linkcaps(enum fw_port_type type,
-					  unsigned int caps)
-{
-	unsigned int v = 0;
-
-	if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
-	    type == FW_PORT_TYPE_BT_XAUI) {
-		v |= SUPPORTED_TP;
-		if (caps & FW_PORT_CAP_SPEED_100M)
-			v |= SUPPORTED_100baseT_Full;
-		if (caps & FW_PORT_CAP_SPEED_1G)
-			v |= SUPPORTED_1000baseT_Full;
-		if (caps & FW_PORT_CAP_SPEED_10G)
-			v |= SUPPORTED_10000baseT_Full;
-	} else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
-		v |= SUPPORTED_Backplane;
-		if (caps & FW_PORT_CAP_SPEED_1G)
-			v |= SUPPORTED_1000baseKX_Full;
-		if (caps & FW_PORT_CAP_SPEED_10G)
-			v |= SUPPORTED_10000baseKX4_Full;
-	} else if (type == FW_PORT_TYPE_KR)
-		v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
-	else if (type == FW_PORT_TYPE_BP_AP)
-		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
-		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
-	else if (type == FW_PORT_TYPE_BP4_AP)
-		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
-		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
-		     SUPPORTED_10000baseKX4_Full;
-	else if (type == FW_PORT_TYPE_FIBER_XFI ||
-		 type == FW_PORT_TYPE_FIBER_XAUI ||
-		 type == FW_PORT_TYPE_SFP ||
-		 type == FW_PORT_TYPE_QSFP_10G ||
-		 type == FW_PORT_TYPE_QSA) {
-		v |= SUPPORTED_FIBRE;
-		if (caps & FW_PORT_CAP_SPEED_1G)
-			v |= SUPPORTED_1000baseT_Full;
-		if (caps & FW_PORT_CAP_SPEED_10G)
-			v |= SUPPORTED_10000baseT_Full;
-	} else if (type == FW_PORT_TYPE_BP40_BA ||
-		   type == FW_PORT_TYPE_QSFP) {
-		v |= SUPPORTED_40000baseSR4_Full;
-		v |= SUPPORTED_FIBRE;
-	}
-
-	if (caps & FW_PORT_CAP_ANEG)
-		v |= SUPPORTED_Autoneg;
-	return v;
-}
-
-static int cxgb4vf_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	const struct port_info *p = netdev_priv(dev);
-
-	if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
-	    p->port_type == FW_PORT_TYPE_BT_XFI ||
-	    p->port_type == FW_PORT_TYPE_BT_XAUI)
-		cmd->port = PORT_TP;
-	else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
-		 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
-		cmd->port = PORT_FIBRE;
-	else if (p->port_type == FW_PORT_TYPE_SFP ||
-		 p->port_type == FW_PORT_TYPE_QSFP_10G ||
-		 p->port_type == FW_PORT_TYPE_QSA ||
-		 p->port_type == FW_PORT_TYPE_QSFP) {
-		if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
-		    p->mod_type == FW_PORT_MOD_TYPE_SR ||
-		    p->mod_type == FW_PORT_MOD_TYPE_ER ||
-		    p->mod_type == FW_PORT_MOD_TYPE_LRM)
-			cmd->port = PORT_FIBRE;
-		else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
-			 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
-			cmd->port = PORT_DA;
+/**
+ *	from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
+ *	@port_type: Firmware Port Type
+ *	@mod_type: Firmware Module Type
+ *
+ *	Translate Firmware Port/Module type to Ethtool Port Type.
+ */
+static int from_fw_port_mod_type(enum fw_port_type port_type,
+				 enum fw_port_module_type mod_type)
+{
+	if (port_type == FW_PORT_TYPE_BT_SGMII ||
+	    port_type == FW_PORT_TYPE_BT_XFI ||
+	    port_type == FW_PORT_TYPE_BT_XAUI) {
+		return PORT_TP;
+	} else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
+		   port_type == FW_PORT_TYPE_FIBER_XAUI) {
+		return PORT_FIBRE;
+	} else if (port_type == FW_PORT_TYPE_SFP ||
+		   port_type == FW_PORT_TYPE_QSFP_10G ||
+		   port_type == FW_PORT_TYPE_QSA ||
+		   port_type == FW_PORT_TYPE_QSFP) {
+		if (mod_type == FW_PORT_MOD_TYPE_LR ||
+		    mod_type == FW_PORT_MOD_TYPE_SR ||
+		    mod_type == FW_PORT_MOD_TYPE_ER ||
+		    mod_type == FW_PORT_MOD_TYPE_LRM)
+			return PORT_FIBRE;
+		else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
+			 mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
+			return PORT_DA;
 		else
-			cmd->port = PORT_OTHER;
-	} else
-		cmd->port = PORT_OTHER;
+			return PORT_OTHER;
+	}
+
+	return PORT_OTHER;
+}
+
+/**
+ *	fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
+ *	@port_type: Firmware Port Type
+ *	@fw_caps: Firmware Port Capabilities
+ *	@link_mode_mask: ethtool Link Mode Mask
+ *
+ *	Translate a Firmware Port Capabilities specification to an ethtool
+ *	Link Mode Mask.
+ */
+static void fw_caps_to_lmm(enum fw_port_type port_type,
+			   unsigned int fw_caps,
+			   unsigned long *link_mode_mask)
+{
+	#define SET_LMM(__lmm_name) __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name\
+			 ## _BIT, link_mode_mask)
+
+	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
+		do { \
+			if (fw_caps & FW_PORT_CAP_ ## __fw_name) \
+				SET_LMM(__lmm_name); \
+		} while (0)
+
+	switch (port_type) {
+	case FW_PORT_TYPE_BT_SGMII:
+	case FW_PORT_TYPE_BT_XFI:
+	case FW_PORT_TYPE_BT_XAUI:
+		SET_LMM(TP);
+		FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		break;
+
+	case FW_PORT_TYPE_KX4:
+	case FW_PORT_TYPE_KX:
+		SET_LMM(Backplane);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
+		break;
+
+	case FW_PORT_TYPE_KR:
+		SET_LMM(Backplane);
+		SET_LMM(10000baseKR_Full);
+		break;
+
+	case FW_PORT_TYPE_BP_AP:
+		SET_LMM(Backplane);
+		SET_LMM(10000baseR_FEC);
+		SET_LMM(10000baseKR_Full);
+		SET_LMM(1000baseKX_Full);
+		break;
+
+	case FW_PORT_TYPE_BP4_AP:
+		SET_LMM(Backplane);
+		SET_LMM(10000baseR_FEC);
+		SET_LMM(10000baseKR_Full);
+		SET_LMM(1000baseKX_Full);
+		SET_LMM(10000baseKX4_Full);
+		break;
+
+	case FW_PORT_TYPE_FIBER_XFI:
+	case FW_PORT_TYPE_FIBER_XAUI:
+	case FW_PORT_TYPE_SFP:
+	case FW_PORT_TYPE_QSFP_10G:
+	case FW_PORT_TYPE_QSA:
+		SET_LMM(FIBRE);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		break;
+
+	case FW_PORT_TYPE_BP40_BA:
+	case FW_PORT_TYPE_QSFP:
+		SET_LMM(FIBRE);
+		SET_LMM(40000baseSR4_Full);
+		break;
+
+	case FW_PORT_TYPE_CR_QSFP:
+	case FW_PORT_TYPE_SFP28:
+		SET_LMM(FIBRE);
+		SET_LMM(25000baseCR_Full);
+		break;
+
+	case FW_PORT_TYPE_KR4_100G:
+	case FW_PORT_TYPE_CR4_QSFP:
+		SET_LMM(FIBRE);
+		SET_LMM(100000baseCR4_Full);
+		break;
+
+	default:
+		break;
+	}
+
+	FW_CAPS_TO_LMM(ANEG, Autoneg);
+	FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
+	FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
+
+	#undef FW_CAPS_TO_LMM
+	#undef SET_LMM
+}
+
+static int cxgb4vf_get_link_ksettings(struct net_device *dev,
+				      struct ethtool_link_ksettings
+							*link_ksettings)
+{
+	const struct port_info *pi = netdev_priv(dev);
+	struct ethtool_link_settings *base = &link_ksettings->base;
 
-	if (p->mdio_addr >= 0) {
-		cmd->phy_address = p->mdio_addr;
-		cmd->transceiver = XCVR_EXTERNAL;
-		cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
-			MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
+
+	base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
+
+	if (pi->mdio_addr >= 0) {
+		base->phy_address = pi->mdio_addr;
+		base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
+				      ? ETH_MDIO_SUPPORTS_C22
+				      : ETH_MDIO_SUPPORTS_C45);
 	} else {
-		cmd->phy_address = 0;  /* not really, but no better option */
-		cmd->transceiver = XCVR_INTERNAL;
-		cmd->mdio_support = 0;
-	}
-
-	cmd->supported = t4vf_from_fw_linkcaps(p->port_type,
-					       p->link_cfg.supported);
-	cmd->advertising = t4vf_from_fw_linkcaps(p->port_type,
-					    p->link_cfg.advertising);
-	ethtool_cmd_speed_set(cmd,
-			      netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
-	cmd->duplex = DUPLEX_FULL;
-	cmd->autoneg = p->link_cfg.autoneg;
-	cmd->maxtxpkt = 0;
-	cmd->maxrxpkt = 0;
+		base->phy_address = 255;
+		base->mdio_support = 0;
+	}
+
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.supported,
+		       link_ksettings->link_modes.supported);
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.advertising,
+		       link_ksettings->link_modes.advertising);
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lp_advertising,
+		       link_ksettings->link_modes.lp_advertising);
+
+	if (netif_carrier_ok(dev)) {
+		base->speed = pi->link_cfg.speed;
+		base->duplex = DUPLEX_FULL;
+	} else {
+		base->speed = SPEED_UNKNOWN;
+		base->duplex = DUPLEX_UNKNOWN;
+	}
+
+	base->autoneg = pi->link_cfg.autoneg;
+	if (pi->link_cfg.supported & FW_PORT_CAP_ANEG)
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     supported, Autoneg);
+	if (pi->link_cfg.autoneg)
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     advertising, Autoneg);
+
 	return 0;
 }
 
@@ -1679,7 +1757,7 @@ static void cxgb4vf_get_wol(struct net_device *dev,
 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
 
 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
-	.get_settings		= cxgb4vf_get_settings,
+	.get_link_ksettings	= cxgb4vf_get_link_ksettings,
 	.get_drvinfo		= cxgb4vf_get_drvinfo,
 	.get_msglevel		= cxgb4vf_get_msglevel,
 	.set_msglevel		= cxgb4vf_set_msglevel,
@@ -2778,6 +2856,8 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 	 * Initialize SMP data synchronization resources.
 	 */
 	spin_lock_init(&adapter->stats_lock);
+	spin_lock_init(&adapter->mbox_lock);
+	INIT_LIST_HEAD(&adapter->mlist.list);
 
 	/*
 	 * Map our I/O registers in BAR0.
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 438374a05791..8ee541431e8b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -107,6 +107,7 @@ struct t4vf_port_stats {
 struct link_config {
 	unsigned int   supported;        /* link capabilities */
 	unsigned int   advertising;      /* advertised capabilities */
+	unsigned short lp_advertising;   /* peer advertised capabilities */
 	unsigned short requested_speed;  /* speed user has requested */
 	unsigned short speed;            /* actual link speed */
 	unsigned char  requested_fc;     /* flow control user has requested */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 955ff7c61f1b..427bfa71388b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -139,6 +139,7 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 	u32 mbox_ctl = T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL;
 	u32 cmd_op = FW_CMD_OP_G(be32_to_cpu(((struct fw_cmd_hdr *)cmd)->hi));
 	__be64 cmd_rpl[MBOX_LEN / 8];
+	struct mbox_list entry;
 
 	/* In T6, mailbox size is changed to 128 bytes to avoid
 	 * invalidating the entire prefetch buffer.
@@ -156,6 +157,51 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 	    size > NUM_CIM_VF_MAILBOX_DATA_INSTANCES * 4)
 		return -EINVAL;
 
+	/* Queue ourselves onto the mailbox access list.  When our entry is at
+	 * the front of the list, we have rights to access the mailbox.  So we
+	 * wait [for a while] till we're at the front [or bail out with an
+	 * EBUSY] ...
+	 */
+	spin_lock(&adapter->mbox_lock);
+	list_add_tail(&entry.list, &adapter->mlist.list);
+	spin_unlock(&adapter->mbox_lock);
+
+	delay_idx = 0;
+	ms = delay[0];
+
+	for (i = 0; ; i += ms) {
+		/* If we've waited too long, return a busy indication.  This
+		 * really ought to be based on our initial position in the
+		 * mailbox access list but this is a start.  We very rearely
+		 * contend on access to the mailbox ...
+		 */
+		if (i > FW_CMD_MAX_TIMEOUT) {
+			spin_lock(&adapter->mbox_lock);
+			list_del(&entry.list);
+			spin_unlock(&adapter->mbox_lock);
+			ret = -EBUSY;
+			t4vf_record_mbox(adapter, cmd, size, access, ret);
+			return ret;
+		}
+
+		/* If we're at the head, break out and start the mailbox
+		 * protocol.
+		 */
+		if (list_first_entry(&adapter->mlist.list, struct mbox_list,
+				     list) == &entry)
+			break;
+
+		/* Delay for a bit before checking again ... */
+		if (sleep_ok) {
+			ms = delay[delay_idx];  /* last element may repeat */
+			if (delay_idx < ARRAY_SIZE(delay) - 1)
+				delay_idx++;
+			msleep(ms);
+		} else {
+			mdelay(ms);
+		}
+	}
+
 	/*
 	 * Loop trying to get ownership of the mailbox.  Return an error
 	 * if we can't gain ownership.
@@ -164,6 +210,9 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 	for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++)
 		v = MBOWNER_G(t4_read_reg(adapter, mbox_ctl));
 	if (v != MBOX_OWNER_DRV) {
+		spin_lock(&adapter->mbox_lock);
+		list_del(&entry.list);
+		spin_unlock(&adapter->mbox_lock);
 		ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT;
 		t4vf_record_mbox(adapter, cmd, size, access, ret);
 		return ret;
@@ -248,6 +297,9 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 			if (cmd_op != FW_VI_STATS_CMD)
 				t4vf_record_mbox(adapter, cmd_rpl, size, access,
 						 execute);
+			spin_lock(&adapter->mbox_lock);
+			list_del(&entry.list);
+			spin_unlock(&adapter->mbox_lock);
 			return -FW_CMD_RETVAL_G(v);
 		}
 	}
@@ -255,6 +307,9 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 	/* We timed out.  Return the error ... */
 	ret = -ETIMEDOUT;
 	t4vf_record_mbox(adapter, cmd, size, access, ret);
+	spin_lock(&adapter->mbox_lock);
+	list_del(&entry.list);
+	spin_unlock(&adapter->mbox_lock);
 	return ret;
 }
 
@@ -273,6 +328,7 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 static void init_link_config(struct link_config *lc, unsigned int caps)
 {
 	lc->supported = caps;
+	lc->lp_advertising = 0;
 	lc->requested_speed = 0;
 	lc->speed = 0;
 	lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
@@ -1688,6 +1744,8 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl)
 				lc->fc = fc;
 				lc->supported =
 					be16_to_cpu(port_cmd->u.info.pcap);
+				lc->lp_advertising =
+					be16_to_cpu(port_cmd->u.info.lpacap);
 				t4vf_os_link_changed(adapter, pidx, link_ok);
 			}
 		}
diff --git a/drivers/net/ethernet/chelsio/libcxgb/Makefile b/drivers/net/ethernet/chelsio/libcxgb/Makefile
new file mode 100644
index 000000000000..2362230ef4fe
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/libcxgb/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_CHELSIO_LIB) += libcxgb.o
+
+libcxgb-y := libcxgb_ppm.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
similarity index 86%
rename from drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c
rename to drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
index d88a7a7b2400..0ed161642371 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
@@ -1,17 +1,44 @@
 /*
- * cxgb4_ppm.c: Chelsio common library for T4/T5 iSCSI PagePod Manager
+ * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager
  *
  * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  *
  * Written by: Karen Xie (kxie@chelsio.com)
  */
 
+#define DRV_NAME "libcxgb"
+#define DRV_VERSION "1.0.0-ko"
+#define pr_fmt(fmt) DRV_NAME ": " fmt
+
 #include <linux/kernel.h>
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -22,7 +49,7 @@
 #include <linux/pci.h>
 #include <linux/scatterlist.h>
 
-#include "cxgb4_ppm.h"
+#include "libcxgb_ppm.h"
 
 /* Direct Data Placement -
  * Directly place the iSCSI Data-In or Data-Out PDU's payload into
@@ -309,6 +336,7 @@ int cxgbi_ppm_release(struct cxgbi_ppm *ppm)
 	}
 	return 1;
 }
+EXPORT_SYMBOL(cxgbi_ppm_release);
 
 static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
 						 unsigned int *pcpu_ppmax)
@@ -462,3 +490,9 @@ unsigned int cxgbi_tagmask_set(unsigned int ppmax)
 
 	return 1 << (bits + PPOD_IDX_SHIFT);
 }
+EXPORT_SYMBOL(cxgbi_tagmask_set);
+
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_DESCRIPTION("Chelsio common library");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
similarity index 84%
rename from drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h
rename to drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
index d48732673b75..e995a1a3840a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
@@ -1,17 +1,41 @@
 /*
- * cxgb4_ppm.h: Chelsio common library for T4/T5 iSCSI ddp operation
+ * libcxgb_ppm.h: Chelsio common library for T3/T4/T5 iSCSI ddp operation
  *
  * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  *
  * Written by: Karen Xie (kxie@chelsio.com)
  */
 
-#ifndef	__CXGB4PPM_H__
-#define	__CXGB4PPM_H__
+#ifndef	__LIBCXGB_PPM_H__
+#define	__LIBCXGB_PPM_H__
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -307,4 +331,4 @@ int cxgbi_ppm_release(struct cxgbi_ppm *ppm);
 void cxgbi_tagmask_check(unsigned int tagmask, struct cxgbi_tag_format *);
 unsigned int cxgbi_tagmask_set(unsigned int ppmax);
 
-#endif	/*__CXGB4PPM_H__*/
+#endif	/*__LIBCXGB_PPM_H__*/
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 60383040d6c6..c363b58552e9 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -53,6 +53,8 @@
 #include <linux/errno.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
@@ -1895,9 +1897,17 @@ static int cs89x0_platform_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct __maybe_unused of_device_id cs89x0_match[] = {
+	{ .compatible = "cirrus,cs8900", },
+	{ .compatible = "cirrus,cs8920", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, cs89x0_match);
+
 static struct platform_driver cs89x0_driver = {
 	.driver	= {
-		.name	= DRV_NAME,
+		.name		= DRV_NAME,
+		.of_match_table	= of_match_ptr(cs89x0_match),
 	},
 	.remove	= cs89x0_platform_remove,
 };
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index f44a39c40642..fd3980cc1e34 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -103,25 +103,29 @@ static void enic_intr_coal_set_rx(struct enic *enic, u32 timer)
 	}
 }
 
-static int enic_get_settings(struct net_device *netdev,
-	struct ethtool_cmd *ecmd)
+static int enic_get_ksettings(struct net_device *netdev,
+			      struct ethtool_link_ksettings *ecmd)
 {
 	struct enic *enic = netdev_priv(netdev);
+	struct ethtool_link_settings *base = &ecmd->base;
 
-	ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE);
-	ecmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE);
-	ecmd->port = PORT_FIBRE;
-	ecmd->transceiver = XCVR_EXTERNAL;
+	ethtool_link_ksettings_add_link_mode(ecmd, supported,
+					     10000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE);
+	ethtool_link_ksettings_add_link_mode(ecmd, advertising,
+					     10000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ecmd, advertising, FIBRE);
+	base->port = PORT_FIBRE;
 
 	if (netif_carrier_ok(netdev)) {
-		ethtool_cmd_speed_set(ecmd, vnic_dev_port_speed(enic->vdev));
-		ecmd->duplex = DUPLEX_FULL;
+		base->speed = vnic_dev_port_speed(enic->vdev);
+		base->duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-		ecmd->duplex = DUPLEX_UNKNOWN;
+		base->speed = SPEED_UNKNOWN;
+		base->duplex = DUPLEX_UNKNOWN;
 	}
 
-	ecmd->autoneg = AUTONEG_DISABLE;
+	base->autoneg = AUTONEG_DISABLE;
 
 	return 0;
 }
@@ -500,7 +504,6 @@ static int enic_set_rxfh(struct net_device *netdev, const u32 *indir,
 }
 
 static const struct ethtool_ops enic_ethtool_ops = {
-	.get_settings = enic_get_settings,
 	.get_drvinfo = enic_get_drvinfo,
 	.get_msglevel = enic_get_msglevel,
 	.set_msglevel = enic_set_msglevel,
@@ -516,6 +519,7 @@ static const struct ethtool_ops enic_ethtool_ops = {
 	.get_rxfh_key_size = enic_get_rxfh_key_size,
 	.get_rxfh = enic_get_rxfh,
 	.set_rxfh = enic_set_rxfh,
+	.get_link_ksettings = enic_get_ksettings,
 };
 
 void enic_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index b69a9eacc531..c3b64cdd0dec 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -173,7 +173,7 @@ static int dnet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 static void dnet_handle_link_change(struct net_device *dev)
 {
 	struct dnet *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	u32 mode_reg, ctl_reg;
 
@@ -295,7 +295,6 @@ static int dnet_mii_probe(struct net_device *dev)
 	bp->link = 0;
 	bp->speed = 0;
 	bp->duplex = -1;
-	bp->phy_dev = phydev;
 
 	return 0;
 }
@@ -629,16 +628,16 @@ static int dnet_open(struct net_device *dev)
 	struct dnet *bp = netdev_priv(dev);
 
 	/* if the phy is not yet register, retry later */
-	if (!bp->phy_dev)
+	if (!dev->phydev)
 		return -EAGAIN;
 
 	napi_enable(&bp->napi);
 	dnet_init_hw(bp);
 
-	phy_start_aneg(bp->phy_dev);
+	phy_start_aneg(dev->phydev);
 
 	/* schedule a link state check */
-	phy_start(bp->phy_dev);
+	phy_start(dev->phydev);
 
 	netif_start_queue(dev);
 
@@ -652,8 +651,8 @@ static int dnet_close(struct net_device *dev)
 	netif_stop_queue(dev);
 	napi_disable(&bp->napi);
 
-	if (bp->phy_dev)
-		phy_stop(bp->phy_dev);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 
 	dnet_reset_hw(bp);
 	netif_carrier_off(dev);
@@ -731,32 +730,9 @@ static struct net_device_stats *dnet_get_stats(struct net_device *dev)
 	return nstat;
 }
 
-static int dnet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct dnet *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phy_dev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(phydev, cmd);
-}
-
-static int dnet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct dnet *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phy_dev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_ethtool_sset(phydev, cmd);
-}
-
 static int dnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct dnet *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 
 	if (!netif_running(dev))
 		return -EINVAL;
@@ -776,11 +752,11 @@ static void dnet_get_drvinfo(struct net_device *dev,
 }
 
 static const struct ethtool_ops dnet_ethtool_ops = {
-	.get_settings		= dnet_get_settings,
-	.set_settings		= dnet_set_settings,
 	.get_drvinfo		= dnet_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
 	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops dnet_netdev_ops = {
@@ -875,7 +851,7 @@ static int dnet_probe(struct platform_device *pdev)
 	       (bp->capabilities & DNET_HAS_IRQ) ? "" : "no ",
 	       (bp->capabilities & DNET_HAS_GIGABIT) ? "" : "no ",
 	       (bp->capabilities & DNET_HAS_DMA) ? "" : "no ");
-	phydev = bp->phy_dev;
+	phydev = dev->phydev;
 	phy_attached_info(phydev);
 
 	return 0;
@@ -899,8 +875,8 @@ static int dnet_remove(struct platform_device *pdev)
 
 	if (dev) {
 		bp = netdev_priv(dev);
-		if (bp->phy_dev)
-			phy_disconnect(bp->phy_dev);
+		if (dev->phydev)
+			phy_disconnect(dev->phydev);
 		mdiobus_unregister(bp->mii_bus);
 		mdiobus_free(bp->mii_bus);
 		unregister_netdev(dev);
diff --git a/drivers/net/ethernet/dnet.h b/drivers/net/ethernet/dnet.h
index 37f5b30fa78b..d985080bbd5d 100644
--- a/drivers/net/ethernet/dnet.h
+++ b/drivers/net/ethernet/dnet.h
@@ -216,7 +216,6 @@ struct dnet {
 
 	/* PHY stuff */
 	struct mii_bus			*mii_bus;
-	struct phy_device		*phy_dev;
 	unsigned int			link;
 	unsigned int			speed;
 	unsigned int			duplex;
diff --git a/drivers/net/ethernet/emulex/benet/Kconfig b/drivers/net/ethernet/emulex/benet/Kconfig
index 7108563260ae..b4853ec9de8d 100644
--- a/drivers/net/ethernet/emulex/benet/Kconfig
+++ b/drivers/net/ethernet/emulex/benet/Kconfig
@@ -13,11 +13,3 @@ config BE2NET_HWMON
 	---help---
 	  Say Y here if you want to expose thermal sensor data on
 	  be2net network adapter.
-
-config BE2NET_VXLAN
-        bool "VXLAN offload support on be2net driver"
-        default y
-        depends on BE2NET && VXLAN && !(BE2NET=y && VXLAN=m)
-        ---help---
-	  Say Y here if you want to enable VXLAN offload support on
-	  be2net driver.
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index fe3763df3f13..4555e041ef69 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 - 2015 Emulex
+ * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -97,7 +97,8 @@
 					 * SURF/DPDK
 					 */
 
-#define MAX_RSS_IFACES		15
+#define MAX_PORT_RSS_TABLES	15
+#define MAX_NIC_FUNCS		16
 #define MAX_RX_QS		32
 #define MAX_EVT_QS		32
 #define MAX_TX_QS		32
@@ -442,8 +443,20 @@ struct be_resources {
 	u16 max_iface_count;
 	u16 max_mcc_count;
 	u16 max_evt_qs;
+	u16 max_nic_evt_qs;	/* NIC's share of evt qs */
 	u32 if_cap_flags;
 	u32 vf_if_cap_flags;	/* VF if capability flags */
+	u32 flags;
+	/* Calculated PF Pool's share of RSS Tables. This is not enforced by
+	 * the FW, but is a self-imposed driver limitation.
+	 */
+	u16 max_rss_tables;
+};
+
+/* These are port-wide values */
+struct be_port_resources {
+	u16 max_vfs;
+	u16 nic_pfs;
 };
 
 #define be_is_os2bmc_enabled(adapter) (adapter->flags & BE_FLAGS_OS2BMC)
@@ -513,7 +526,8 @@ struct be_adapter {
 	spinlock_t mcc_lock;	/* For serializing mcc cmds to BE card */
 	spinlock_t mcc_cq_lock;
 
-	u16 cfg_num_qs;		/* configured via set-channels */
+	u16 cfg_num_rx_irqs;		/* configured via set-channels */
+	u16 cfg_num_tx_irqs;		/* configured via set-channels */
 	u16 num_evt_qs;
 	u16 num_msix_vec;
 	struct be_eq_obj eq_obj[MAX_EVT_QS];
@@ -632,16 +646,42 @@ struct be_adapter {
 #define be_max_txqs(adapter)		(adapter->res.max_tx_qs)
 #define be_max_prio_txqs(adapter)	(adapter->res.max_prio_tx_qs)
 #define be_max_rxqs(adapter)		(adapter->res.max_rx_qs)
-#define be_max_eqs(adapter)		(adapter->res.max_evt_qs)
+/* Max number of EQs available for the function (NIC + RoCE (if enabled)) */
+#define be_max_func_eqs(adapter)	(adapter->res.max_evt_qs)
+/* Max number of EQs available avaialble only for NIC */
+#define be_max_nic_eqs(adapter)		(adapter->res.max_nic_evt_qs)
 #define be_if_cap_flags(adapter)	(adapter->res.if_cap_flags)
-
-static inline u16 be_max_qs(struct be_adapter *adapter)
+#define be_max_pf_pool_rss_tables(adapter)	\
+				(adapter->pool_res.max_rss_tables)
+/* Max irqs avaialble for NIC */
+#define be_max_irqs(adapter)		\
+			(min_t(u16, be_max_nic_eqs(adapter), num_online_cpus()))
+
+/* Max irqs *needed* for RX queues */
+static inline u16 be_max_rx_irqs(struct be_adapter *adapter)
 {
-	/* If no RSS, need atleast the one def RXQ */
+	/* If no RSS, need atleast one irq for def-RXQ */
 	u16 num = max_t(u16, be_max_rss(adapter), 1);
 
-	num = min(num, be_max_eqs(adapter));
-	return min_t(u16, num, num_online_cpus());
+	return min_t(u16, num, be_max_irqs(adapter));
+}
+
+/* Max irqs *needed* for TX queues */
+static inline u16 be_max_tx_irqs(struct be_adapter *adapter)
+{
+	return min_t(u16, be_max_txqs(adapter), be_max_irqs(adapter));
+}
+
+/* Max irqs *needed* for combined queues */
+static inline u16 be_max_qp_irqs(struct be_adapter *adapter)
+{
+	return min(be_max_tx_irqs(adapter), be_max_rx_irqs(adapter));
+}
+
+/* Max irqs *needed* for RX and TX queues together */
+static inline u16 be_max_any_irqs(struct be_adapter *adapter)
+{
+	return max(be_max_tx_irqs(adapter), be_max_rx_irqs(adapter));
 }
 
 /* Is BE in pvid_tagging mode */
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 22402db275f2..2cc11756859f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 - 2015 Emulex
+ * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -87,6 +87,11 @@ static struct be_cmd_priv_map cmd_priv_map[] = {
 		CMD_SUBSYSTEM_LOWLEVEL,
 		BE_PRIV_DEVCFG | BE_PRIV_DEVSEC
 	},
+	{
+		OPCODE_COMMON_SET_HSW_CONFIG,
+		CMD_SUBSYSTEM_COMMON,
+		BE_PRIV_DEVCFG | BE_PRIV_VHADM
+	},
 };
 
 static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem)
@@ -3850,6 +3855,10 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid,
 	void *ctxt;
 	int status;
 
+	if (!be_cmd_allowed(adapter, OPCODE_COMMON_SET_HSW_CONFIG,
+			    CMD_SUBSYSTEM_COMMON))
+		return -EPERM;
+
 	spin_lock_bh(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
@@ -3871,7 +3880,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid,
 		AMAP_SET_BITS(struct amap_set_hsw_context, pvid_valid, ctxt, 1);
 		AMAP_SET_BITS(struct amap_set_hsw_context, pvid, ctxt, pvid);
 	}
-	if (!BEx_chip(adapter) && hsw_mode) {
+	if (hsw_mode) {
 		AMAP_SET_BITS(struct amap_set_hsw_context, interface_id,
 			      ctxt, adapter->hba_port_num);
 		AMAP_SET_BITS(struct amap_set_hsw_context, pport, ctxt, 1);
@@ -4023,7 +4032,10 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter)
 		resp = (struct be_cmd_resp_acpi_wol_magic_config_v1 *)cmd.va;
 
 		adapter->wol_cap = resp->wol_settings;
-		if (adapter->wol_cap & BE_WOL_CAP)
+
+		/* Non-zero macaddr indicates WOL is enabled */
+		if (adapter->wol_cap & BE_WOL_CAP &&
+		    !is_zero_ether_addr(resp->magic_mac))
 			adapter->wol_en = true;
 	}
 err:
@@ -4360,9 +4372,35 @@ err:
 	return status;
 }
 
+/* This routine returns a list of all the NIC PF_nums in the adapter */
+u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums)
+{
+	struct be_res_desc_hdr *hdr = (struct be_res_desc_hdr *)buf;
+	struct be_pcie_res_desc *pcie = NULL;
+	int i;
+	u16 nic_pf_count = 0;
+
+	for (i = 0; i < desc_count; i++) {
+		if (hdr->desc_type == PCIE_RESOURCE_DESC_TYPE_V0 ||
+		    hdr->desc_type == PCIE_RESOURCE_DESC_TYPE_V1) {
+			pcie = (struct be_pcie_res_desc *)hdr;
+			if (pcie->pf_state && (pcie->pf_type == MISSION_NIC ||
+					       pcie->pf_type == MISSION_RDMA)) {
+				nic_pf_nums[nic_pf_count++] = pcie->pf_num;
+			}
+		}
+
+		hdr->desc_len = hdr->desc_len ? : RESOURCE_DESC_SIZE_V0;
+		hdr = (void *)hdr + hdr->desc_len;
+	}
+	return nic_pf_count;
+}
+
 /* Will use MBOX only if MCCQ has not been created */
 int be_cmd_get_profile_config(struct be_adapter *adapter,
-			      struct be_resources *res, u8 query, u8 domain)
+			      struct be_resources *res,
+			      struct be_port_resources *port_res,
+			      u8 profile_type, u8 query, u8 domain)
 {
 	struct be_cmd_resp_get_profile_config *resp;
 	struct be_cmd_req_get_profile_config *req;
@@ -4389,7 +4427,7 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
 
 	if (!lancer_chip(adapter))
 		req->hdr.version = 1;
-	req->type = ACTIVE_PROFILE_TYPE;
+	req->type = profile_type;
 	req->hdr.domain = domain;
 
 	/* When QUERY_MODIFIABLE_FIELDS_TYPE bit is set, cmd returns the
@@ -4406,6 +4444,28 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
 	resp = cmd.va;
 	desc_count = le16_to_cpu(resp->desc_count);
 
+	if (port_res) {
+		u16 nic_pf_cnt = 0, i;
+		u16 nic_pf_num_list[MAX_NIC_FUNCS];
+
+		nic_pf_cnt = be_get_nic_pf_num_list(resp->func_param,
+						    desc_count,
+						    nic_pf_num_list);
+
+		for (i = 0; i < nic_pf_cnt; i++) {
+			nic = be_get_func_nic_desc(resp->func_param, desc_count,
+						   nic_pf_num_list[i]);
+			if (nic->link_param == adapter->port_num) {
+				port_res->nic_pfs++;
+				pcie = be_get_pcie_desc(resp->func_param,
+							desc_count,
+							nic_pf_num_list[i]);
+				port_res->max_vfs += le16_to_cpu(pcie->num_vfs);
+			}
+		}
+		return status;
+	}
+
 	pcie = be_get_pcie_desc(resp->func_param, desc_count,
 				adapter->pf_num);
 	if (pcie)
@@ -4465,7 +4525,7 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc,
 }
 
 /* Mark all fields invalid */
-static void be_reset_nic_desc(struct be_nic_res_desc *nic)
+void be_reset_nic_desc(struct be_nic_res_desc *nic)
 {
 	memset(nic, 0, sizeof(*nic));
 	nic->unicast_mac_count = 0xFFFF;
@@ -4534,73 +4594,9 @@ int be_cmd_config_qos(struct be_adapter *adapter, u32 max_rate, u16 link_speed,
 					 1, version, domain);
 }
 
-static void be_fill_vf_res_template(struct be_adapter *adapter,
-				    struct be_resources pool_res,
-				    u16 num_vfs, u16 num_vf_qs,
-				    struct be_nic_res_desc *nic_vft)
-{
-	u32 vf_if_cap_flags = pool_res.vf_if_cap_flags;
-	struct be_resources res_mod = {0};
-
-	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
-	 * which are modifiable using SET_PROFILE_CONFIG cmd.
-	 */
-	be_cmd_get_profile_config(adapter, &res_mod, RESOURCE_MODIFIABLE, 0);
-
-	/* If RSS IFACE capability flags are modifiable for a VF, set the
-	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
-	 * more than 1 RSSQ is available for a VF.
-	 * Otherwise, provision only 1 queue pair for VF.
-	 */
-	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
-		nic_vft->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
-		if (num_vf_qs > 1) {
-			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
-			if (pool_res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
-				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
-		} else {
-			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
-					     BE_IF_FLAGS_DEFQ_RSS);
-		}
-	} else {
-		num_vf_qs = 1;
-	}
-
-	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
-		nic_vft->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
-		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
-	}
-
-	nic_vft->cap_flags = cpu_to_le32(vf_if_cap_flags);
-	nic_vft->rq_count = cpu_to_le16(num_vf_qs);
-	nic_vft->txq_count = cpu_to_le16(num_vf_qs);
-	nic_vft->rssq_count = cpu_to_le16(num_vf_qs);
-	nic_vft->cq_count = cpu_to_le16(pool_res.max_cq_count /
-					(num_vfs + 1));
-
-	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
-	 * among the PF and it's VFs, if the fields are changeable
-	 */
-	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
-		nic_vft->unicast_mac_count = cpu_to_le16(pool_res.max_uc_mac /
-							 (num_vfs + 1));
-
-	if (res_mod.max_vlans == FIELD_MODIFIABLE)
-		nic_vft->vlan_count = cpu_to_le16(pool_res.max_vlans /
-						  (num_vfs + 1));
-
-	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
-		nic_vft->iface_count = cpu_to_le16(pool_res.max_iface_count /
-						   (num_vfs + 1));
-
-	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
-		nic_vft->mcc_count = cpu_to_le16(pool_res.max_mcc_count /
-						 (num_vfs + 1));
-}
-
 int be_cmd_set_sriov_config(struct be_adapter *adapter,
 			    struct be_resources pool_res, u16 num_vfs,
-			    u16 num_vf_qs)
+			    struct be_resources *vft_res)
 {
 	struct {
 		struct be_pcie_res_desc pcie;
@@ -4620,12 +4616,26 @@ int be_cmd_set_sriov_config(struct be_adapter *adapter,
 	be_reset_nic_desc(&desc.nic_vft);
 	desc.nic_vft.hdr.desc_type = NIC_RESOURCE_DESC_TYPE_V1;
 	desc.nic_vft.hdr.desc_len = RESOURCE_DESC_SIZE_V1;
-	desc.nic_vft.flags = BIT(VFT_SHIFT) | BIT(IMM_SHIFT) | BIT(NOSV_SHIFT);
+	desc.nic_vft.flags = vft_res->flags | BIT(VFT_SHIFT) |
+			     BIT(IMM_SHIFT) | BIT(NOSV_SHIFT);
 	desc.nic_vft.pf_num = adapter->pdev->devfn;
 	desc.nic_vft.vf_num = 0;
-
-	be_fill_vf_res_template(adapter, pool_res, num_vfs, num_vf_qs,
-				&desc.nic_vft);
+	desc.nic_vft.cap_flags = cpu_to_le32(vft_res->vf_if_cap_flags);
+	desc.nic_vft.rq_count = cpu_to_le16(vft_res->max_rx_qs);
+	desc.nic_vft.txq_count = cpu_to_le16(vft_res->max_tx_qs);
+	desc.nic_vft.rssq_count = cpu_to_le16(vft_res->max_rss_qs);
+	desc.nic_vft.cq_count = cpu_to_le16(vft_res->max_cq_count);
+
+	if (vft_res->max_uc_mac)
+		desc.nic_vft.unicast_mac_count =
+					cpu_to_le16(vft_res->max_uc_mac);
+	if (vft_res->max_vlans)
+		desc.nic_vft.vlan_count = cpu_to_le16(vft_res->max_vlans);
+	if (vft_res->max_iface_count)
+		desc.nic_vft.iface_count =
+				cpu_to_le16(vft_res->max_iface_count);
+	if (vft_res->max_mcc_count)
+		desc.nic_vft.mcc_count = cpu_to_le16(vft_res->max_mcc_count);
 
 	return be_cmd_set_profile_config(adapter, &desc,
 					 2 * RESOURCE_DESC_SIZE_V1, 2, 1, 0);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index d8540ae95e5a..0d6be224a787 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 - 2015 Emulex
+ * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -1556,7 +1556,9 @@ struct be_cmd_resp_acpi_wol_magic_config_v1 {
 	u8 rsvd0[2];
 	u8 wol_settings;
 	u8 rsvd1[5];
-	u32 rsvd2[295];
+	u32 rsvd2[288];
+	u8 magic_mac[6];
+	u8 rsvd3[22];
 } __packed;
 
 #define BE_GET_WOL_CAP			2
@@ -2128,6 +2130,9 @@ struct be_cmd_req_set_ext_fat_caps {
 #define IMM_SHIFT				6	/* Immediate */
 #define NOSV_SHIFT				7	/* No save */
 
+#define MISSION_NIC				1
+#define MISSION_RDMA				8
+
 struct be_res_desc_hdr {
 	u8 desc_type;
 	u8 desc_len;
@@ -2244,6 +2249,7 @@ struct be_cmd_req_get_profile_config {
 	struct be_cmd_req_hdr hdr;
 	u8 rsvd;
 #define ACTIVE_PROFILE_TYPE			0x2
+#define SAVED_PROFILE_TYPE			0x0
 #define QUERY_MODIFIABLE_FIELDS_TYPE		BIT(3)
 	u8 type;
 	u16 rsvd1;
@@ -2449,7 +2455,9 @@ int be_cmd_query_port_name(struct be_adapter *adapter);
 int be_cmd_get_func_config(struct be_adapter *adapter,
 			   struct be_resources *res);
 int be_cmd_get_profile_config(struct be_adapter *adapter,
-			      struct be_resources *res, u8 query, u8 domain);
+			      struct be_resources *res,
+			      struct be_port_resources *port_res,
+			      u8 profile_type, u8 query, u8 domain);
 int be_cmd_get_active_profile(struct be_adapter *adapter, u16 *profile);
 int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg,
 		     int vf_num);
@@ -2461,4 +2469,4 @@ int be_cmd_set_vxlan_port(struct be_adapter *adapter, __be16 port);
 int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op);
 int be_cmd_set_sriov_config(struct be_adapter *adapter,
 			    struct be_resources res, u16 num_vfs,
-			    u16 num_vf_qs);
+			    struct be_resources *vft_res);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 2ff691636dac..50e7be5da50c 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 - 2015 Emulex
+ * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -793,6 +793,11 @@ static void be_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 static int be_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
+	struct device *dev = &adapter->pdev->dev;
+	struct be_dma_mem cmd;
+	u8 mac[ETH_ALEN];
+	bool enable;
+	int status;
 
 	if (wol->wolopts & ~WAKE_MAGIC)
 		return -EOPNOTSUPP;
@@ -802,12 +807,32 @@ static int be_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 		return -EOPNOTSUPP;
 	}
 
-	if (wol->wolopts & WAKE_MAGIC)
-		adapter->wol_en = true;
-	else
-		adapter->wol_en = false;
+	cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
+	cmd.va = dma_zalloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL);
+	if (!cmd.va)
+		return -ENOMEM;
 
-	return 0;
+	eth_zero_addr(mac);
+
+	enable = wol->wolopts & WAKE_MAGIC;
+	if (enable)
+		ether_addr_copy(mac, adapter->netdev->dev_addr);
+
+	status = be_cmd_enable_magic_wol(adapter, mac, &cmd);
+	if (status) {
+		dev_err(dev, "Could not set Wake-on-lan mac address\n");
+		status = be_cmd_status(status);
+		goto err;
+	}
+
+	pci_enable_wake(adapter->pdev, PCI_D3hot, enable);
+	pci_enable_wake(adapter->pdev, PCI_D3cold, enable);
+
+	adapter->wol_en = enable ? true : false;
+
+err:
+	dma_free_coherent(dev, cmd.size, cmd.va, cmd.dma);
+	return status;
 }
 
 static int be_test_ddr_dma(struct be_adapter *adapter)
@@ -1171,9 +1196,17 @@ static void be_get_channels(struct net_device *netdev,
 			    struct ethtool_channels *ch)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
+	u16 num_rx_irqs = max_t(u16, adapter->num_rss_qs, 1);
 
-	ch->combined_count = adapter->num_evt_qs;
-	ch->max_combined = be_max_qs(adapter);
+	/* num_tx_qs is always same as the number of irqs used for TX */
+	ch->combined_count = min(adapter->num_tx_qs, num_rx_irqs);
+	ch->rx_count = num_rx_irqs - ch->combined_count;
+	ch->tx_count = adapter->num_tx_qs - ch->combined_count;
+
+	ch->max_combined = be_max_qp_irqs(adapter);
+	/* The user must create atleast one combined channel */
+	ch->max_rx = be_max_rx_irqs(adapter) - 1;
+	ch->max_tx = be_max_tx_irqs(adapter) - 1;
 }
 
 static int be_set_channels(struct net_device  *netdev,
@@ -1182,11 +1215,22 @@ static int be_set_channels(struct net_device  *netdev,
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int status;
 
-	if (ch->rx_count || ch->tx_count || ch->other_count ||
-	    !ch->combined_count || ch->combined_count > be_max_qs(adapter))
+	/* we support either only combined channels or a combination of
+	 * combined and either RX-only or TX-only channels.
+	 */
+	if (ch->other_count || !ch->combined_count ||
+	    (ch->rx_count && ch->tx_count))
+		return -EINVAL;
+
+	if (ch->combined_count > be_max_qp_irqs(adapter) ||
+	    (ch->rx_count &&
+	     (ch->rx_count + ch->combined_count) > be_max_rx_irqs(adapter)) ||
+	    (ch->tx_count &&
+	     (ch->tx_count + ch->combined_count) > be_max_tx_irqs(adapter)))
 		return -EINVAL;
 
-	adapter->cfg_num_qs = ch->combined_count;
+	adapter->cfg_num_rx_irqs = ch->combined_count + ch->rx_count;
+	adapter->cfg_num_tx_irqs = ch->combined_count + ch->tx_count;
 
 	status = be_update_queues(adapter);
 	return be_cmd_status(status);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index ed98ef1ecac3..874c7539a79d 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 - 2015 Emulex
+ * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -2620,8 +2620,10 @@ static int be_evt_queues_create(struct be_adapter *adapter)
 	struct be_aic_obj *aic;
 	int i, rc;
 
+	/* need enough EQs to service both RX and TX queues */
 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
-				    adapter->cfg_num_qs);
+				    max(adapter->cfg_num_rx_irqs,
+					adapter->cfg_num_tx_irqs));
 
 	for_all_evt_queues(adapter, eqo, i) {
 		int numa_node = dev_to_node(&adapter->pdev->dev);
@@ -2726,7 +2728,7 @@ static int be_tx_qs_create(struct be_adapter *adapter)
 	struct be_eq_obj *eqo;
 	int status, i;
 
-	adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter));
+	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
 
 	for_all_tx_queues(adapter, txo, i) {
 		cq = &txo->cq;
@@ -2784,11 +2786,11 @@ static int be_rx_cqs_create(struct be_adapter *adapter)
 	struct be_rx_obj *rxo;
 	int rc, i;
 
-	/* We can create as many RSS rings as there are EQs. */
-	adapter->num_rss_qs = adapter->num_evt_qs;
+	adapter->num_rss_qs =
+			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
 
 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
-	if (adapter->num_rss_qs <= 1)
+	if (adapter->num_rss_qs < 2)
 		adapter->num_rss_qs = 0;
 
 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
@@ -3249,18 +3251,23 @@ static void be_msix_disable(struct be_adapter *adapter)
 
 static int be_msix_enable(struct be_adapter *adapter)
 {
-	int i, num_vec;
+	unsigned int i, max_roce_eqs;
 	struct device *dev = &adapter->pdev->dev;
+	int num_vec;
 
-	/* If RoCE is supported, program the max number of NIC vectors that
-	 * may be configured via set-channels, along with vectors needed for
-	 * RoCe. Else, just program the number we'll use initially.
+	/* If RoCE is supported, program the max number of vectors that
+	 * could be used for NIC and RoCE, else, just program the number
+	 * we'll use initially.
 	 */
-	if (be_roce_supported(adapter))
-		num_vec = min_t(int, 2 * be_max_eqs(adapter),
-				2 * num_online_cpus());
-	else
-		num_vec = adapter->cfg_num_qs;
+	if (be_roce_supported(adapter)) {
+		max_roce_eqs =
+			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
+		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
+		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
+	} else {
+		num_vec = max(adapter->cfg_num_rx_irqs,
+			      adapter->cfg_num_tx_irqs);
+	}
 
 	for (i = 0; i < num_vec; i++)
 		adapter->msix_entries[i].entry = i;
@@ -3625,10 +3632,8 @@ static int be_open(struct net_device *netdev)
 		be_link_status_update(adapter, link_status);
 
 	netif_tx_start_all_queues(netdev);
-#ifdef CONFIG_BE2NET_VXLAN
 	if (skyhawk_chip(adapter))
-		vxlan_get_rx_port(netdev);
-#endif
+		udp_tunnel_get_rx_info(netdev);
 
 	return 0;
 err:
@@ -3636,40 +3641,6 @@ err:
 	return -EIO;
 }
 
-static int be_setup_wol(struct be_adapter *adapter, bool enable)
-{
-	struct device *dev = &adapter->pdev->dev;
-	struct be_dma_mem cmd;
-	u8 mac[ETH_ALEN];
-	int status;
-
-	eth_zero_addr(mac);
-
-	cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
-	cmd.va = dma_zalloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL);
-	if (!cmd.va)
-		return -ENOMEM;
-
-	if (enable) {
-		status = pci_write_config_dword(adapter->pdev,
-						PCICFG_PM_CONTROL_OFFSET,
-						PCICFG_PM_CONTROL_MASK);
-		if (status) {
-			dev_err(dev, "Could not enable Wake-on-lan\n");
-			goto err;
-		}
-	} else {
-		ether_addr_copy(mac, adapter->netdev->dev_addr);
-	}
-
-	status = be_cmd_enable_magic_wol(adapter, mac, &cmd);
-	pci_enable_wake(adapter->pdev, PCI_D3hot, enable);
-	pci_enable_wake(adapter->pdev, PCI_D3cold, enable);
-err:
-	dma_free_coherent(dev, cmd.size, cmd.va, cmd.dma);
-	return status;
-}
-
 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
 {
 	u32 addr;
@@ -3759,6 +3730,11 @@ static void be_vf_clear(struct be_adapter *adapter)
 
 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
 	}
+
+	if (BE3_chip(adapter))
+		be_cmd_set_hsw_config(adapter, 0, 0,
+				      adapter->if_handle,
+				      PORT_FWD_TYPE_PASSTHRU, 0);
 done:
 	kfree(adapter->vf_cfg);
 	adapter->num_vfs = 0;
@@ -3789,7 +3765,6 @@ static void be_cancel_err_detection(struct be_adapter *adapter)
 	}
 }
 
-#ifdef CONFIG_BE2NET_VXLAN
 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -3808,37 +3783,87 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter)
 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
 }
-#endif
 
-static u16 be_calculate_vf_qs(struct be_adapter *adapter, u16 num_vfs)
+static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
+				struct be_resources *vft_res)
 {
 	struct be_resources res = adapter->pool_res;
+	u32 vf_if_cap_flags = res.vf_if_cap_flags;
+	struct be_resources res_mod = {0};
 	u16 num_vf_qs = 1;
 
-	/* Distribute the queue resources among the PF and it's VFs
-	 * Do not distribute queue resources in multi-channel configuration.
-	 */
-	if (num_vfs && !be_is_mc(adapter)) {
-		 /* Divide the qpairs evenly among the VFs and the PF, capped
-		  * at VF-EQ-count. Any remainder qpairs belong to the PF.
-		  */
+	/* Distribute the queue resources among the PF and it's VFs */
+	if (num_vfs) {
+		/* Divide the rx queues evenly among the VFs and the PF, capped
+		 * at VF-EQ-count. Any remainder queues belong to the PF.
+		 */
 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
 				res.max_rss_qs / (num_vfs + 1));
 
-		/* Skyhawk-R chip supports only MAX_RSS_IFACES RSS capable
-		 * interfaces per port. Provide RSS on VFs, only if number
-		 * of VFs requested is less than MAX_RSS_IFACES limit.
+		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
+		 * RSS Tables per port. Provide RSS on VFs, only if number of
+		 * VFs requested is less than it's PF Pool's RSS Tables limit.
 		 */
-		if (num_vfs >= MAX_RSS_IFACES)
+		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
 			num_vf_qs = 1;
 	}
-	return num_vf_qs;
+
+	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
+	 * which are modifiable using SET_PROFILE_CONFIG cmd.
+	 */
+	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
+				  RESOURCE_MODIFIABLE, 0);
+
+	/* If RSS IFACE capability flags are modifiable for a VF, set the
+	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
+	 * more than 1 RSSQ is available for a VF.
+	 * Otherwise, provision only 1 queue pair for VF.
+	 */
+	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
+		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
+		if (num_vf_qs > 1) {
+			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
+			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
+				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
+		} else {
+			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
+					     BE_IF_FLAGS_DEFQ_RSS);
+		}
+	} else {
+		num_vf_qs = 1;
+	}
+
+	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
+		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
+		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
+	}
+
+	vft_res->vf_if_cap_flags = vf_if_cap_flags;
+	vft_res->max_rx_qs = num_vf_qs;
+	vft_res->max_rss_qs = num_vf_qs;
+	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
+	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
+
+	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
+	 * among the PF and it's VFs, if the fields are changeable
+	 */
+	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
+		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
+
+	if (res_mod.max_vlans == FIELD_MODIFIABLE)
+		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
+
+	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
+		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
+
+	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
+		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
 }
 
 static int be_clear(struct be_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
-	u16 num_vf_qs;
+	struct  be_resources vft_res = {0};
 
 	be_cancel_worker(adapter);
 
@@ -3850,16 +3875,15 @@ static int be_clear(struct be_adapter *adapter)
 	 */
 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
 	    !pci_vfs_assigned(pdev)) {
-		num_vf_qs = be_calculate_vf_qs(adapter,
-					       pci_sriov_get_totalvfs(pdev));
+		be_calculate_vf_res(adapter,
+				    pci_sriov_get_totalvfs(pdev),
+				    &vft_res);
 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
 					pci_sriov_get_totalvfs(pdev),
-					num_vf_qs);
+					&vft_res);
 	}
 
-#ifdef CONFIG_BE2NET_VXLAN
 	be_disable_vxlan_offloads(adapter);
-#endif
 	kfree(adapter->pmac_id);
 	adapter->pmac_id = NULL;
 
@@ -3884,7 +3908,8 @@ static int be_vfs_if_create(struct be_adapter *adapter)
 
 	for_all_vfs(adapter, vf_cfg, vf) {
 		if (!BE3_chip(adapter)) {
-			status = be_cmd_get_profile_config(adapter, &res,
+			status = be_cmd_get_profile_config(adapter, &res, NULL,
+							   ACTIVE_PROFILE_TYPE,
 							   RESOURCE_LIMITS,
 							   vf + 1);
 			if (!status) {
@@ -4000,6 +4025,15 @@ static int be_vf_setup(struct be_adapter *adapter)
 		}
 	}
 
+	if (BE3_chip(adapter)) {
+		/* On BE3, enable VEB only when SRIOV is enabled */
+		status = be_cmd_set_hsw_config(adapter, 0, 0,
+					       adapter->if_handle,
+					       PORT_FWD_TYPE_VEB, 0);
+		if (status)
+			goto err;
+	}
+
 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
 	return 0;
 err:
@@ -4069,8 +4103,9 @@ static void BEx_get_resources(struct be_adapter *adapter,
 		/* On a SuperNIC profile, the driver needs to use the
 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
 		 */
-		be_cmd_get_profile_config(adapter, &super_nic_res,
-					  RESOURCE_LIMITS, 0);
+		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
+					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
+					  0);
 		/* Some old versions of BE3 FW don't report max_tx_qs value */
 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
 	} else {
@@ -4109,12 +4144,38 @@ static void be_setup_init(struct be_adapter *adapter)
 		adapter->cmd_privileges = MIN_PRIVILEGES;
 }
 
+/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
+ * However, this HW limitation is not exposed to the host via any SLI cmd.
+ * As a result, in the case of SRIOV and in particular multi-partition configs
+ * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
+ * for distribution between the VFs. This self-imposed limit will determine the
+ * no: of VFs for which RSS can be enabled.
+ */
+void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
+{
+	struct be_port_resources port_res = {0};
+	u8 rss_tables_on_port;
+	u16 max_vfs = be_max_vfs(adapter);
+
+	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
+				  RESOURCE_LIMITS, 0);
+
+	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
+
+	/* Each PF Pool's RSS Tables limit =
+	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
+	 */
+	adapter->pool_res.max_rss_tables =
+		max_vfs * rss_tables_on_port / port_res.max_vfs;
+}
+
 static int be_get_sriov_config(struct be_adapter *adapter)
 {
 	struct be_resources res = {0};
 	int max_vfs, old_vfs;
 
-	be_cmd_get_profile_config(adapter, &res, RESOURCE_LIMITS, 0);
+	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
+				  RESOURCE_LIMITS, 0);
 
 	/* Some old versions of BE3 FW don't report max_vfs value */
 	if (BE3_chip(adapter) && !res.max_vfs) {
@@ -4138,13 +4199,19 @@ static int be_get_sriov_config(struct be_adapter *adapter)
 		adapter->num_vfs = old_vfs;
 	}
 
+	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
+		be_calculate_pf_pool_rss_tables(adapter);
+		dev_info(&adapter->pdev->dev,
+			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
+			 be_max_pf_pool_rss_tables(adapter));
+	}
 	return 0;
 }
 
 static void be_alloc_sriov_res(struct be_adapter *adapter)
 {
 	int old_vfs = pci_num_vf(adapter->pdev);
-	u16 num_vf_qs;
+	struct  be_resources vft_res = {0};
 	int status;
 
 	be_get_sriov_config(adapter);
@@ -4158,9 +4225,9 @@ static void be_alloc_sriov_res(struct be_adapter *adapter)
 	 * Also, this is done by FW in Lancer chip.
 	 */
 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
-		num_vf_qs = be_calculate_vf_qs(adapter, 0);
+		be_calculate_vf_res(adapter, 0, &vft_res);
 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
-						 num_vf_qs);
+						 &vft_res);
 		if (status)
 			dev_err(&adapter->pdev->dev,
 				"Failed to optimize SRIOV resources\n");
@@ -4173,16 +4240,13 @@ static int be_get_resources(struct be_adapter *adapter)
 	struct be_resources res = {0};
 	int status;
 
-	if (BEx_chip(adapter)) {
-		BEx_get_resources(adapter, &res);
-		adapter->res = res;
-	}
-
 	/* For Lancer, SH etc read per-function resource limits from FW.
 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
 	 */
-	if (!BEx_chip(adapter)) {
+	if (BEx_chip(adapter)) {
+		BEx_get_resources(adapter, &res);
+	} else {
 		status = be_cmd_get_func_config(adapter, &res);
 		if (status)
 			return status;
@@ -4191,13 +4255,13 @@ static int be_get_resources(struct be_adapter *adapter)
 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
 			res.max_rss_qs -= 1;
-
-		/* If RoCE may be enabled stash away half the EQs for RoCE */
-		if (be_roce_supported(adapter))
-			res.max_evt_qs /= 2;
-		adapter->res = res;
 	}
 
+	/* If RoCE is supported stash away half the EQs for RoCE */
+	res.max_nic_evt_qs = be_roce_supported(adapter) ?
+				res.max_evt_qs / 2 : res.max_evt_qs;
+	adapter->res = res;
+
 	/* If FW supports RSS default queue, then skip creating non-RSS
 	 * queue for non-IP traffic.
 	 */
@@ -4206,15 +4270,17 @@ static int be_get_resources(struct be_adapter *adapter)
 
 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
 		 be_max_txqs(adapter), be_max_rxqs(adapter),
-		 be_max_rss(adapter), be_max_eqs(adapter),
+		 be_max_rss(adapter), be_max_nic_eqs(adapter),
 		 be_max_vfs(adapter));
 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
 		 be_max_uc(adapter), be_max_mc(adapter),
 		 be_max_vlans(adapter));
 
-	/* Sanitize cfg_num_qs based on HW and platform limits */
-	adapter->cfg_num_qs = min_t(u16, netif_get_num_default_rss_queues(),
-				    be_max_qs(adapter));
+	/* Ensure RX and TX queues are created in pairs at init time */
+	adapter->cfg_num_rx_irqs =
+				min_t(u16, netif_get_num_default_rss_queues(),
+				      be_max_qp_irqs(adapter));
+	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
 	return 0;
 }
 
@@ -4241,6 +4307,8 @@ static int be_get_config(struct be_adapter *adapter)
 	}
 
 	be_cmd_get_acpi_wol_cap(adapter);
+	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
+	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
 
 	be_cmd_query_port_name(adapter);
 
@@ -4251,15 +4319,6 @@ static int be_get_config(struct be_adapter *adapter)
 				 "Using profile 0x%x\n", profile_id);
 	}
 
-	status = be_get_resources(adapter);
-	if (status)
-		return status;
-
-	adapter->pmac_id = kcalloc(be_max_uc(adapter),
-				   sizeof(*adapter->pmac_id), GFP_KERNEL);
-	if (!adapter->pmac_id)
-		return -ENOMEM;
-
 	return 0;
 }
 
@@ -4334,7 +4393,7 @@ static int be_if_create(struct be_adapter *adapter)
 	u32 cap_flags = be_if_cap_flags(adapter);
 	int status;
 
-	if (adapter->cfg_num_qs == 1)
+	if (adapter->cfg_num_rx_irqs == 1)
 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
 
 	en_flags &= cap_flags;
@@ -4460,13 +4519,22 @@ static int be_setup(struct be_adapter *adapter)
 			return status;
 	}
 
+	status = be_get_config(adapter);
+	if (status)
+		goto err;
+
 	if (!BE2_chip(adapter) && be_physfn(adapter))
 		be_alloc_sriov_res(adapter);
 
-	status = be_get_config(adapter);
+	status = be_get_resources(adapter);
 	if (status)
 		goto err;
 
+	adapter->pmac_id = kcalloc(be_max_uc(adapter),
+				   sizeof(*adapter->pmac_id), GFP_KERNEL);
+	if (!adapter->pmac_id)
+		return -ENOMEM;
+
 	status = be_msix_enable(adapter);
 	if (status)
 		goto err;
@@ -4511,6 +4579,15 @@ static int be_setup(struct be_adapter *adapter)
 		be_cmd_set_logical_link_config(adapter,
 					       IFLA_VF_LINK_STATE_AUTO, 0);
 
+	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
+	 * confusing a linux bridge or OVS that it might be connected to.
+	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
+	 * when SRIOV is not enabled.
+	 */
+	if (BE3_chip(adapter))
+		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
+				      PORT_FWD_TYPE_PASSTHRU, 0);
+
 	if (adapter->num_vfs)
 		be_vf_setup(adapter);
 
@@ -4651,7 +4728,6 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 				       0, 0, nlflags, filter_mask, NULL);
 }
 
-#ifdef CONFIG_BE2NET_VXLAN
 /* VxLAN offload Notes:
  *
  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
@@ -4666,13 +4742,17 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
  * adds more than one port, disable offloads and don't re-enable them again
  * until after all the tunnels are removed.
  */
-static void be_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
-			      __be16 port)
+static void be_add_vxlan_port(struct net_device *netdev,
+			      struct udp_tunnel_info *ti)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct device *dev = &adapter->pdev->dev;
+	__be16 port = ti->port;
 	int status;
 
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
 		return;
 
@@ -4720,10 +4800,14 @@ err:
 	be_disable_vxlan_offloads(adapter);
 }
 
-static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
-			      __be16 port)
+static void be_del_vxlan_port(struct net_device *netdev,
+			      struct udp_tunnel_info *ti)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
+	__be16 port = ti->port;
+
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
 
 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
 		return;
@@ -4785,7 +4869,6 @@ static netdev_features_t be_features_check(struct sk_buff *skb,
 
 	return features;
 }
-#endif
 
 static int be_get_phys_port_id(struct net_device *dev,
 			       struct netdev_phys_item_id *ppid)
@@ -4833,11 +4916,9 @@ static const struct net_device_ops be_netdev_ops = {
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= be_busy_poll,
 #endif
-#ifdef CONFIG_BE2NET_VXLAN
-	.ndo_add_vxlan_port	= be_add_vxlan_port,
-	.ndo_del_vxlan_port	= be_del_vxlan_port,
+	.ndo_udp_tunnel_add	= be_add_vxlan_port,
+	.ndo_udp_tunnel_del	= be_del_vxlan_port,
 	.ndo_features_check	= be_features_check,
-#endif
 	.ndo_get_phys_port_id   = be_get_phys_port_id,
 };
 
@@ -4996,6 +5077,10 @@ static void be_worker(struct work_struct *work)
 	struct be_rx_obj *rxo;
 	int i;
 
+	if (be_physfn(adapter) &&
+	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
+		be_cmd_get_die_temperature(adapter);
+
 	/* when interrupts are not yet enabled, just reap any pending
 	 * mcc completions
 	 */
@@ -5014,10 +5099,6 @@ static void be_worker(struct work_struct *work)
 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
 	}
 
-	if (be_physfn(adapter) &&
-	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
-		be_cmd_get_die_temperature(adapter);
-
 	for_all_rx_queues(adapter, rxo, i) {
 		/* Replenish RX-queues starved due to memory
 		 * allocation failures.
@@ -5410,9 +5491,6 @@ static int be_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct be_adapter *adapter = pci_get_drvdata(pdev);
 
-	if (adapter->wol_en)
-		be_setup_wol(adapter, true);
-
 	be_intr_set(adapter, false);
 	be_cancel_err_detection(adapter);
 
@@ -5441,9 +5519,6 @@ static int be_pci_resume(struct pci_dev *pdev)
 
 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
 
-	if (adapter->wol_en)
-		be_setup_wol(adapter, false);
-
 	return 0;
 }
 
@@ -5552,7 +5627,7 @@ err:
 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	struct be_adapter *adapter = pci_get_drvdata(pdev);
-	u16 num_vf_qs;
+	struct be_resources vft_res = {0};
 	int status;
 
 	if (!num_vfs)
@@ -5575,9 +5650,10 @@ static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	 * Also, this is done by FW in Lancer chip.
 	 */
 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
-		num_vf_qs = be_calculate_vf_qs(adapter, adapter->num_vfs);
+		be_calculate_vf_res(adapter, adapter->num_vfs,
+				    &vft_res);
 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
-						 adapter->num_vfs, num_vf_qs);
+						 adapter->num_vfs, &vft_res);
 		if (status)
 			dev_err(&pdev->dev,
 				"Failed to optimize SR-IOV resources\n");
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 4089156a7f5e..2b62841c4c63 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 - 2015 Emulex
+ * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
index fde609789483..e51719a7307f 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.h
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 - 2015 Emulex
+ * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 4edb98c3c6c7..c044667a0a25 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -192,7 +192,6 @@ MODULE_PARM_DESC(buffer_size, "DMA buffer allocation size");
  * @napi:	NAPI structure
  * @msg_enable:	device state flags
  * @lock:	device lock
- * @phy:	attached PHY
  * @mdio:	MDIO bus for PHY access
  * @phy_id:	address of attached PHY
  */
@@ -219,7 +218,6 @@ struct ethoc {
 
 	spinlock_t lock;
 
-	struct phy_device *phy;
 	struct mii_bus *mdio;
 	struct clk *clk;
 	s8 phy_id;
@@ -694,7 +692,6 @@ static int ethoc_mdio_probe(struct net_device *dev)
 		return err;
 	}
 
-	priv->phy = phy;
 	phy->advertising &= ~(ADVERTISED_1000baseT_Full |
 			      ADVERTISED_1000baseT_Half);
 	phy->supported &= ~(SUPPORTED_1000baseT_Full |
@@ -724,7 +721,7 @@ static int ethoc_open(struct net_device *dev)
 		netif_start_queue(dev);
 	}
 
-	phy_start(priv->phy);
+	phy_start(dev->phydev);
 	napi_enable(&priv->napi);
 
 	if (netif_msg_ifup(priv)) {
@@ -741,8 +738,8 @@ static int ethoc_stop(struct net_device *dev)
 
 	napi_disable(&priv->napi);
 
-	if (priv->phy)
-		phy_stop(priv->phy);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 
 	ethoc_disable_rx_and_tx(priv);
 	free_irq(dev->irq, dev);
@@ -770,7 +767,7 @@ static int ethoc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (!phy)
 			return -ENODEV;
 	} else {
-		phy = priv->phy;
+		phy = dev->phydev;
 	}
 
 	return phy_mii_ioctl(phy, ifr, cmd);
@@ -860,6 +857,11 @@ static netdev_tx_t ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	unsigned int entry;
 	void *dest;
 
+	if (skb_put_padto(skb, ETHOC_ZLEN)) {
+		dev->stats.tx_errors++;
+		goto out_no_free;
+	}
+
 	if (unlikely(skb->len > ETHOC_BUFSIZ)) {
 		dev->stats.tx_errors++;
 		goto out;
@@ -894,31 +896,10 @@ static netdev_tx_t ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_tx_timestamp(skb);
 out:
 	dev_kfree_skb(skb);
+out_no_free:
 	return NETDEV_TX_OK;
 }
 
-static int ethoc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ethoc *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phy;
-
-	if (!phydev)
-		return -EOPNOTSUPP;
-
-	return phy_ethtool_gset(phydev, cmd);
-}
-
-static int ethoc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ethoc *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phy;
-
-	if (!phydev)
-		return -EOPNOTSUPP;
-
-	return phy_ethtool_sset(phydev, cmd);
-}
-
 static int ethoc_get_regs_len(struct net_device *netdev)
 {
 	return ETH_END;
@@ -983,14 +964,14 @@ static int ethoc_set_ringparam(struct net_device *dev,
 }
 
 const struct ethtool_ops ethoc_ethtool_ops = {
-	.get_settings = ethoc_get_settings,
-	.set_settings = ethoc_set_settings,
 	.get_regs_len = ethoc_get_regs_len,
 	.get_regs = ethoc_get_regs,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = ethoc_get_ringparam,
 	.set_ringparam = ethoc_set_ringparam,
 	.get_ts_info = ethtool_op_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops ethoc_netdev_ops = {
@@ -1086,7 +1067,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	if (!priv->iobase) {
 		dev_err(&pdev->dev, "cannot remap I/O memory space\n");
 		ret = -ENXIO;
-		goto error;
+		goto free;
 	}
 
 	if (netdev->mem_end) {
@@ -1095,7 +1076,7 @@ static int ethoc_probe(struct platform_device *pdev)
 		if (!priv->membase) {
 			dev_err(&pdev->dev, "cannot remap memory space\n");
 			ret = -ENXIO;
-			goto error;
+			goto free;
 		}
 	} else {
 		/* Allocate buffer memory */
@@ -1106,7 +1087,7 @@ static int ethoc_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "cannot allocate %dB buffer\n",
 				buffer_size);
 			ret = -ENOMEM;
-			goto error;
+			goto free;
 		}
 		netdev->mem_end = netdev->mem_start + buffer_size;
 		priv->dma_alloc = buffer_size;
@@ -1120,7 +1101,7 @@ static int ethoc_probe(struct platform_device *pdev)
 		128, (netdev->mem_end - netdev->mem_start + 1) / ETHOC_BUFSIZ);
 	if (num_bd < 4) {
 		ret = -ENODEV;
-		goto error;
+		goto free;
 	}
 	priv->num_bd = num_bd;
 	/* num_tx must be a power of two */
@@ -1133,7 +1114,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	priv->vma = devm_kzalloc(&pdev->dev, num_bd*sizeof(void *), GFP_KERNEL);
 	if (!priv->vma) {
 		ret = -ENOMEM;
-		goto error;
+		goto free;
 	}
 
 	/* Allow the platform setup code to pass in a MAC address. */
@@ -1261,8 +1242,7 @@ static int ethoc_remove(struct platform_device *pdev)
 
 	if (netdev) {
 		netif_napi_del(&priv->napi);
-		phy_disconnect(priv->phy);
-		priv->phy = NULL;
+		phy_disconnect(netdev->phydev);
 
 		if (priv->mdio) {
 			mdiobus_unregister(priv->mdio);
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 06f031715b57..f928e6f79c89 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -24,6 +24,14 @@
 
 #define DRV_NAME			"nps_mgt_enet"
 
+static inline bool nps_enet_is_tx_pending(struct nps_enet_priv *priv)
+{
+	u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
+	u32 tx_ctrl_ct = (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
+
+	return (!tx_ctrl_ct && priv->tx_skb);
+}
+
 static void nps_enet_clean_rx_fifo(struct net_device *ndev, u32 frame_len)
 {
 	struct nps_enet_priv *priv = netdev_priv(ndev);
@@ -46,16 +54,17 @@ static void nps_enet_read_rx_fifo(struct net_device *ndev,
 	if (dst_is_aligned) {
 		ioread32_rep(priv->regs_base + NPS_ENET_REG_RX_BUF, reg, len);
 		reg += len;
-	}
-	else { /* !dst_is_aligned */
+	} else { /* !dst_is_aligned */
 		for (i = 0; i < len; i++, reg++) {
 			u32 buf = nps_enet_reg_get(priv, NPS_ENET_REG_RX_BUF);
+
 			put_unaligned_be32(buf, reg);
 		}
 	}
 	/* copy last bytes (if any) */
 	if (last) {
 		u32 buf;
+
 		ioread32_rep(priv->regs_base + NPS_ENET_REG_RX_BUF, &buf, 1);
 		memcpy((u8 *)reg, &buf, last);
 	}
@@ -140,12 +149,11 @@ static void nps_enet_tx_handler(struct net_device *ndev)
 {
 	struct nps_enet_priv *priv = netdev_priv(ndev);
 	u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
-	u32 tx_ctrl_ct = (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
 	u32 tx_ctrl_et = (tx_ctrl_value & TX_CTL_ET_MASK) >> TX_CTL_ET_SHIFT;
 	u32 tx_ctrl_nt = (tx_ctrl_value & TX_CTL_NT_MASK) >> TX_CTL_NT_SHIFT;
 
 	/* Check if we got TX */
-	if (!priv->tx_skb || tx_ctrl_ct)
+	if (!nps_enet_is_tx_pending(priv))
 		return;
 
 	/* Ack Tx ctrl register */
@@ -183,9 +191,6 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
 	work_done = nps_enet_rx_handler(ndev);
 	if (work_done < budget) {
 		u32 buf_int_enable_value = 0;
-		u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
-		u32 tx_ctrl_ct =
-			(tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
 
 		napi_complete(napi);
 
@@ -204,8 +209,7 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
 		 * the two code lines below will solve this situation by
 		 * re-adding ourselves to the poll list.
 		 */
-
-		if (priv->tx_skb && !tx_ctrl_ct) {
+		if (nps_enet_is_tx_pending(priv)) {
 			nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0);
 			napi_reschedule(napi);
 		}
@@ -230,11 +234,9 @@ static irqreturn_t nps_enet_irq_handler(s32 irq, void *dev_instance)
 	struct net_device *ndev = dev_instance;
 	struct nps_enet_priv *priv = netdev_priv(ndev);
 	u32 rx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_RX_CTL);
-	u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
-	u32 tx_ctrl_ct = (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
 	u32 rx_ctrl_cr = (rx_ctrl_value & RX_CTL_CR_MASK) >> RX_CTL_CR_SHIFT;
 
-	if ((!tx_ctrl_ct && priv->tx_skb) || rx_ctrl_cr)
+	if (nps_enet_is_tx_pending(priv) || rx_ctrl_cr)
 		if (likely(napi_schedule_prep(&priv->napi))) {
 			nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0);
 			__napi_schedule(&priv->napi);
@@ -285,6 +287,7 @@ static void nps_enet_hw_reset(struct net_device *ndev)
 	ge_rst_value |= NPS_ENET_ENABLE << RST_GMAC_0_SHIFT;
 	nps_enet_reg_set(priv, NPS_ENET_REG_GE_RST, ge_rst_value);
 	usleep_range(10, 20);
+	ge_rst_value = 0;
 	nps_enet_reg_set(priv, NPS_ENET_REG_GE_RST, ge_rst_value);
 
 	/* Tx fifo reset sequence */
@@ -459,7 +462,6 @@ static void nps_enet_set_rx_mode(struct net_device *ndev)
 			 | NPS_ENET_ENABLE << CFG_2_DISK_DA_SHIFT;
 		ge_mac_cfg_2_value = (ge_mac_cfg_2_value & ~CFG_2_DISK_MC_MASK)
 			 | NPS_ENET_ENABLE << CFG_2_DISK_MC_SHIFT;
-
 	}
 
 	nps_enet_reg_set(priv, NPS_ENET_REG_GE_MAC_CFG_2, ge_mac_cfg_2_value);
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index e7cf313e359b..36361f8bf894 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -31,6 +31,7 @@
 #include <linux/phy.h>
 #include <linux/platform_device.h>
 #include <net/ip.h>
+#include <net/ncsi.h>
 
 #include "ftgmac100.h"
 
@@ -68,10 +69,14 @@ struct ftgmac100 {
 
 	struct net_device *netdev;
 	struct device *dev;
+	struct ncsi_dev *ndev;
 	struct napi_struct napi;
 
 	struct mii_bus *mii_bus;
 	int old_speed;
+	int int_mask_all;
+	bool use_ncsi;
+	bool enabled;
 };
 
 static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv,
@@ -80,14 +85,6 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv,
 /******************************************************************************
  * internal functions (hardware register access)
  *****************************************************************************/
-#define INT_MASK_ALL_ENABLED	(FTGMAC100_INT_RPKT_LOST	| \
-				 FTGMAC100_INT_XPKT_ETH		| \
-				 FTGMAC100_INT_XPKT_LOST	| \
-				 FTGMAC100_INT_AHB_ERR		| \
-				 FTGMAC100_INT_PHYSTS_CHG	| \
-				 FTGMAC100_INT_RPKT_BUF		| \
-				 FTGMAC100_INT_NO_RXBUF)
-
 static void ftgmac100_set_rx_ring_base(struct ftgmac100 *priv, dma_addr_t addr)
 {
 	iowrite32(addr, priv->base + FTGMAC100_OFFSET_RXR_BADR);
@@ -141,6 +138,55 @@ static void ftgmac100_set_mac(struct ftgmac100 *priv, const unsigned char *mac)
 	iowrite32(laddr, priv->base + FTGMAC100_OFFSET_MAC_LADR);
 }
 
+static void ftgmac100_setup_mac(struct ftgmac100 *priv)
+{
+	u8 mac[ETH_ALEN];
+	unsigned int m;
+	unsigned int l;
+	void *addr;
+
+	addr = device_get_mac_address(priv->dev, mac, ETH_ALEN);
+	if (addr) {
+		ether_addr_copy(priv->netdev->dev_addr, mac);
+		dev_info(priv->dev, "Read MAC address %pM from device tree\n",
+			 mac);
+		return;
+	}
+
+	m = ioread32(priv->base + FTGMAC100_OFFSET_MAC_MADR);
+	l = ioread32(priv->base + FTGMAC100_OFFSET_MAC_LADR);
+
+	mac[0] = (m >> 8) & 0xff;
+	mac[1] = m & 0xff;
+	mac[2] = (l >> 24) & 0xff;
+	mac[3] = (l >> 16) & 0xff;
+	mac[4] = (l >> 8) & 0xff;
+	mac[5] = l & 0xff;
+
+	if (is_valid_ether_addr(mac)) {
+		ether_addr_copy(priv->netdev->dev_addr, mac);
+		dev_info(priv->dev, "Read MAC address %pM from chip\n", mac);
+	} else {
+		eth_hw_addr_random(priv->netdev);
+		dev_info(priv->dev, "Generated random MAC address %pM\n",
+			 priv->netdev->dev_addr);
+	}
+}
+
+static int ftgmac100_set_mac_addr(struct net_device *dev, void *p)
+{
+	int ret;
+
+	ret = eth_prepare_mac_addr_change(dev, p);
+	if (ret < 0)
+		return ret;
+
+	eth_commit_mac_addr_change(dev, p);
+	ftgmac100_set_mac(netdev_priv(dev), dev->dev_addr);
+
+	return 0;
+}
+
 static void ftgmac100_init_hw(struct ftgmac100 *priv)
 {
 	/* setup ring buffer base registers */
@@ -952,7 +998,10 @@ static irqreturn_t ftgmac100_interrupt(int irq, void *dev_id)
 	struct net_device *netdev = dev_id;
 	struct ftgmac100 *priv = netdev_priv(netdev);
 
-	if (likely(netif_running(netdev))) {
+	/* When running in NCSI mode, the interface should be ready for
+	 * receiving or transmitting NCSI packets before it's opened.
+	 */
+	if (likely(priv->use_ncsi || netif_running(netdev))) {
 		/* Disable interrupts for polling */
 		iowrite32(0, priv->base + FTGMAC100_OFFSET_IER);
 		napi_schedule(&priv->napi);
@@ -1005,8 +1054,9 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget)
 		ftgmac100_tx_complete(priv);
 	}
 
-	if (status & (FTGMAC100_INT_NO_RXBUF | FTGMAC100_INT_RPKT_LOST |
-		      FTGMAC100_INT_AHB_ERR | FTGMAC100_INT_PHYSTS_CHG)) {
+	if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF |
+			FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR |
+			FTGMAC100_INT_PHYSTS_CHG)) {
 		if (net_ratelimit())
 			netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status,
 				    status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "",
@@ -1029,7 +1079,8 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget)
 		napi_complete(napi);
 
 		/* enable all interrupts */
-		iowrite32(INT_MASK_ALL_ENABLED, priv->base + FTGMAC100_OFFSET_IER);
+		iowrite32(priv->int_mask_all,
+			  priv->base + FTGMAC100_OFFSET_IER);
 	}
 
 	return rx;
@@ -1065,17 +1116,33 @@ static int ftgmac100_open(struct net_device *netdev)
 		goto err_hw;
 
 	ftgmac100_init_hw(priv);
-	ftgmac100_start_hw(priv, 10);
-
-	phy_start(netdev->phydev);
+	ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10);
+	if (netdev->phydev)
+		phy_start(netdev->phydev);
+	else if (priv->use_ncsi)
+		netif_carrier_on(netdev);
 
 	napi_enable(&priv->napi);
 	netif_start_queue(netdev);
 
 	/* enable all interrupts */
-	iowrite32(INT_MASK_ALL_ENABLED, priv->base + FTGMAC100_OFFSET_IER);
+	iowrite32(priv->int_mask_all, priv->base + FTGMAC100_OFFSET_IER);
+
+	/* Start the NCSI device */
+	if (priv->use_ncsi) {
+		err = ncsi_start_dev(priv->ndev);
+		if (err)
+			goto err_ncsi;
+	}
+
+	priv->enabled = true;
+
 	return 0;
 
+err_ncsi:
+	napi_disable(&priv->napi);
+	netif_stop_queue(netdev);
+	iowrite32(0, priv->base + FTGMAC100_OFFSET_IER);
 err_hw:
 	free_irq(priv->irq, netdev);
 err_irq:
@@ -1088,12 +1155,17 @@ static int ftgmac100_stop(struct net_device *netdev)
 {
 	struct ftgmac100 *priv = netdev_priv(netdev);
 
+	if (!priv->enabled)
+		return 0;
+
 	/* disable all interrupts */
+	priv->enabled = false;
 	iowrite32(0, priv->base + FTGMAC100_OFFSET_IER);
 
 	netif_stop_queue(netdev);
 	napi_disable(&priv->napi);
-	phy_stop(netdev->phydev);
+	if (netdev->phydev)
+		phy_stop(netdev->phydev);
 
 	ftgmac100_stop_hw(priv);
 	free_irq(priv->irq, netdev);
@@ -1134,6 +1206,9 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 /* optional */
 static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
+	if (!netdev->phydev)
+		return -ENXIO;
+
 	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
 }
 
@@ -1141,11 +1216,74 @@ static const struct net_device_ops ftgmac100_netdev_ops = {
 	.ndo_open		= ftgmac100_open,
 	.ndo_stop		= ftgmac100_stop,
 	.ndo_start_xmit		= ftgmac100_hard_start_xmit,
-	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_mac_address	= ftgmac100_set_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_do_ioctl		= ftgmac100_do_ioctl,
 };
 
+static int ftgmac100_setup_mdio(struct net_device *netdev)
+{
+	struct ftgmac100 *priv = netdev_priv(netdev);
+	struct platform_device *pdev = to_platform_device(priv->dev);
+	int i, err = 0;
+
+	/* initialize mdio bus */
+	priv->mii_bus = mdiobus_alloc();
+	if (!priv->mii_bus)
+		return -EIO;
+
+	priv->mii_bus->name = "ftgmac100_mdio";
+	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d",
+		 pdev->name, pdev->id);
+	priv->mii_bus->priv = priv->netdev;
+	priv->mii_bus->read = ftgmac100_mdiobus_read;
+	priv->mii_bus->write = ftgmac100_mdiobus_write;
+
+	for (i = 0; i < PHY_MAX_ADDR; i++)
+		priv->mii_bus->irq[i] = PHY_POLL;
+
+	err = mdiobus_register(priv->mii_bus);
+	if (err) {
+		dev_err(priv->dev, "Cannot register MDIO bus!\n");
+		goto err_register_mdiobus;
+	}
+
+	err = ftgmac100_mii_probe(priv);
+	if (err) {
+		dev_err(priv->dev, "MII Probe failed!\n");
+		goto err_mii_probe;
+	}
+
+	return 0;
+
+err_mii_probe:
+	mdiobus_unregister(priv->mii_bus);
+err_register_mdiobus:
+	mdiobus_free(priv->mii_bus);
+	return err;
+}
+
+static void ftgmac100_destroy_mdio(struct net_device *netdev)
+{
+	struct ftgmac100 *priv = netdev_priv(netdev);
+
+	if (!netdev->phydev)
+		return;
+
+	phy_disconnect(netdev->phydev);
+	mdiobus_unregister(priv->mii_bus);
+	mdiobus_free(priv->mii_bus);
+}
+
+static void ftgmac100_ncsi_handler(struct ncsi_dev *nd)
+{
+	if (unlikely(nd->state != ncsi_dev_state_functional))
+		return;
+
+	netdev_info(nd->dev, "NCSI interface %s\n",
+		    nd->link_up ? "up" : "down");
+}
+
 /******************************************************************************
  * struct platform_driver functions
  *****************************************************************************/
@@ -1155,7 +1293,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	int irq;
 	struct net_device *netdev;
 	struct ftgmac100 *priv;
-	int err;
+	int err = 0;
 
 	if (!pdev)
 		return -ENODEV;
@@ -1179,7 +1317,6 @@ static int ftgmac100_probe(struct platform_device *pdev)
 
 	netdev->ethtool_ops = &ftgmac100_ethtool_ops;
 	netdev->netdev_ops = &ftgmac100_netdev_ops;
-	netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO;
 
 	platform_set_drvdata(pdev, netdev);
 
@@ -1211,31 +1348,45 @@ static int ftgmac100_probe(struct platform_device *pdev)
 
 	priv->irq = irq;
 
-	/* initialize mdio bus */
-	priv->mii_bus = mdiobus_alloc();
-	if (!priv->mii_bus) {
-		err = -EIO;
-		goto err_alloc_mdiobus;
-	}
-
-	priv->mii_bus->name = "ftgmac100_mdio";
-	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "ftgmac100_mii");
-
-	priv->mii_bus->priv = netdev;
-	priv->mii_bus->read = ftgmac100_mdiobus_read;
-	priv->mii_bus->write = ftgmac100_mdiobus_write;
+	/* MAC address from chip or random one */
+	ftgmac100_setup_mac(priv);
+
+	priv->int_mask_all = (FTGMAC100_INT_RPKT_LOST |
+			      FTGMAC100_INT_XPKT_ETH |
+			      FTGMAC100_INT_XPKT_LOST |
+			      FTGMAC100_INT_AHB_ERR |
+			      FTGMAC100_INT_PHYSTS_CHG |
+			      FTGMAC100_INT_RPKT_BUF |
+			      FTGMAC100_INT_NO_RXBUF);
+	if (pdev->dev.of_node &&
+	    of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) {
+		if (!IS_ENABLED(CONFIG_NET_NCSI)) {
+			dev_err(&pdev->dev, "NCSI stack not enabled\n");
+			goto err_ncsi_dev;
+		}
 
-	err = mdiobus_register(priv->mii_bus);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot register MDIO bus!\n");
-		goto err_register_mdiobus;
+		dev_info(&pdev->dev, "Using NCSI interface\n");
+		priv->use_ncsi = true;
+		priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG;
+		priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler);
+		if (!priv->ndev)
+			goto err_ncsi_dev;
+	} else {
+		priv->use_ncsi = false;
+		err = ftgmac100_setup_mdio(netdev);
+		if (err)
+			goto err_setup_mdio;
 	}
 
-	err = ftgmac100_mii_probe(priv);
-	if (err) {
-		dev_err(&pdev->dev, "MII Probe failed!\n");
-		goto err_mii_probe;
-	}
+	/* We have to disable on-chip IP checksum functionality
+	 * when NCSI is enabled on the interface. It doesn't work
+	 * in that case.
+	 */
+	netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO;
+	if (priv->use_ncsi &&
+	    of_get_property(pdev->dev.of_node, "no-hw-checksum", NULL))
+		netdev->features &= ~NETIF_F_IP_CSUM;
+
 
 	/* register network device */
 	err = register_netdev(netdev);
@@ -1246,21 +1397,12 @@ static int ftgmac100_probe(struct platform_device *pdev)
 
 	netdev_info(netdev, "irq %d, mapped at %p\n", priv->irq, priv->base);
 
-	if (!is_valid_ether_addr(netdev->dev_addr)) {
-		eth_hw_addr_random(netdev);
-		netdev_info(netdev, "generated random MAC address %pM\n",
-			    netdev->dev_addr);
-	}
-
 	return 0;
 
+err_ncsi_dev:
 err_register_netdev:
-	phy_disconnect(netdev->phydev);
-err_mii_probe:
-	mdiobus_unregister(priv->mii_bus);
-err_register_mdiobus:
-	mdiobus_free(priv->mii_bus);
-err_alloc_mdiobus:
+	ftgmac100_destroy_mdio(netdev);
+err_setup_mdio:
 	iounmap(priv->base);
 err_ioremap:
 	release_resource(priv->res);
@@ -1280,10 +1422,7 @@ static int __exit ftgmac100_remove(struct platform_device *pdev)
 	priv = netdev_priv(netdev);
 
 	unregister_netdev(netdev);
-
-	phy_disconnect(netdev->phydev);
-	mdiobus_unregister(priv->mii_bus);
-	mdiobus_free(priv->mii_bus);
+	ftgmac100_destroy_mdio(netdev);
 
 	iounmap(priv->base);
 	release_resource(priv->res);
@@ -1293,14 +1432,20 @@ static int __exit ftgmac100_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id ftgmac100_of_match[] = {
+	{ .compatible = "faraday,ftgmac100" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ftgmac100_of_match);
+
 static struct platform_driver ftgmac100_driver = {
-	.probe		= ftgmac100_probe,
-	.remove		= __exit_p(ftgmac100_remove),
-	.driver		= {
-		.name	= DRV_NAME,
+	.probe	= ftgmac100_probe,
+	.remove	= __exit_p(ftgmac100_remove),
+	.driver	= {
+		.name		= DRV_NAME,
+		.of_match_table	= ftgmac100_of_match,
 	},
 };
-
 module_platform_driver(ftgmac100_driver);
 
 MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>");
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index f58f9ea51639..92fd5c0bf4df 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -442,6 +442,8 @@ struct bufdesc_ex {
 #define FEC_QUIRK_SINGLE_MDIO		(1 << 11)
 /* Controller supports RACC register */
 #define FEC_QUIRK_HAS_RACC		(1 << 12)
+/* Controller supports interrupt coalesc */
+#define FEC_QUIRK_HAS_COALESCE		(1 << 13)
 
 struct bufdesc_prop {
 	int qid;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index fea0f330ddbd..4040003a74f9 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -111,7 +111,13 @@ static struct platform_device_id fec_devtype[] = {
 				FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
 				FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
 				FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
-				FEC_QUIRK_HAS_RACC,
+				FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE,
+	}, {
+		.name = "imx6ul-fec",
+		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
+				FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
+				FEC_QUIRK_HAS_VLAN | FEC_QUIRK_BUG_CAPTURE |
+				FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE,
 	}, {
 		/* sentinel */
 	}
@@ -125,6 +131,7 @@ enum imx_fec_type {
 	IMX6Q_FEC,
 	MVF600_FEC,
 	IMX6SX_FEC,
+	IMX6UL_FEC,
 };
 
 static const struct of_device_id fec_dt_ids[] = {
@@ -134,6 +141,7 @@ static const struct of_device_id fec_dt_ids[] = {
 	{ .compatible = "fsl,imx6q-fec", .data = &fec_devtype[IMX6Q_FEC], },
 	{ .compatible = "fsl,mvf600-fec", .data = &fec_devtype[MVF600_FEC], },
 	{ .compatible = "fsl,imx6sx-fec", .data = &fec_devtype[IMX6SX_FEC], },
+	{ .compatible = "fsl,imx6ul-fec", .data = &fec_devtype[IMX6UL_FEC], },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, fec_dt_ids);
@@ -2358,9 +2366,6 @@ static void fec_enet_itr_coal_set(struct net_device *ndev)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	int rx_itr, tx_itr;
 
-	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
-		return;
-
 	/* Must be greater than zero to avoid unpredictable behavior */
 	if (!fep->rx_time_itr || !fep->rx_pkts_itr ||
 	    !fep->tx_time_itr || !fep->tx_pkts_itr)
@@ -2383,10 +2388,12 @@ static void fec_enet_itr_coal_set(struct net_device *ndev)
 
 	writel(tx_itr, fep->hwp + FEC_TXIC0);
 	writel(rx_itr, fep->hwp + FEC_RXIC0);
-	writel(tx_itr, fep->hwp + FEC_TXIC1);
-	writel(rx_itr, fep->hwp + FEC_RXIC1);
-	writel(tx_itr, fep->hwp + FEC_TXIC2);
-	writel(rx_itr, fep->hwp + FEC_RXIC2);
+	if (fep->quirks & FEC_QUIRK_HAS_AVB) {
+		writel(tx_itr, fep->hwp + FEC_TXIC1);
+		writel(rx_itr, fep->hwp + FEC_RXIC1);
+		writel(tx_itr, fep->hwp + FEC_TXIC2);
+		writel(rx_itr, fep->hwp + FEC_RXIC2);
+	}
 }
 
 static int
@@ -2394,7 +2401,7 @@ fec_enet_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
 
-	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
+	if (!(fep->quirks & FEC_QUIRK_HAS_COALESCE))
 		return -EOPNOTSUPP;
 
 	ec->rx_coalesce_usecs = fep->rx_time_itr;
@@ -2412,7 +2419,7 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	unsigned int cycle;
 
-	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
+	if (!(fep->quirks & FEC_QUIRK_HAS_COALESCE))
 		return -EOPNOTSUPP;
 
 	if (ec->rx_max_coalesced_frames > 255) {
@@ -3191,7 +3198,12 @@ static void fec_reset_phy(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to get phy-reset-gpios: %d\n", err);
 		return;
 	}
-	msleep(msec);
+
+	if (msec > 20)
+		msleep(msec);
+	else
+		usleep_range(msec * 1000, msec * 1000 + 1000);
+
 	gpio_set_value_cansleep(phy_reset, !active_high);
 }
 #else /* CONFIG_OF */
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 2e6785b6e8be..d20935dc8399 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2275,7 +2275,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb,
 	fcb->flags = flags;
 }
 
-void inline gfar_tx_vlan(struct sk_buff *skb, struct txfcb *fcb)
+static inline void gfar_tx_vlan(struct sk_buff *skb, struct txfcb *fcb)
 {
 	fcb->flags |= TXFCB_VLN;
 	fcb->vlctl = cpu_to_be16(skb_vlan_tag_get(skb));
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 4ccc032633c4..d11287e11371 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -5,7 +5,7 @@
 config NET_VENDOR_HISILICON
 	bool "Hisilicon devices"
 	default y
-	depends on OF && HAS_DMA
+	depends on (OF || ACPI) && HAS_DMA
 	depends on ARM || ARM64 || COMPILE_TEST
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
@@ -23,6 +23,18 @@ config HIX5HD2_GMAC
 	help
 	  This selects the hix5hd2 mac family network device.
 
+config HISI_FEMAC
+	tristate "Hisilicon Fast Ethernet MAC device support"
+	depends on HAS_IOMEM
+	select PHYLIB
+	select RESET_CONTROLLER
+	help
+	  This selects the Hisilicon Fast Ethernet MAC device(FEMAC).
+	  The FEMAC receives and transmits data over Ethernet
+	  ports at 10/100 Mbps in full-duplex or half-duplex mode.
+	  The FEMAC exchanges data with the CPU, and supports
+	  the energy efficient Ethernet (EEE).
+
 config HIP04_ETH
 	tristate "HISILICON P04 Ethernet support"
 	depends on HAS_IOMEM	# For MFD_SYSCON
diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 390b71fb3000..8661695024dc 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o
 obj-$(CONFIG_HIP04_ETH) += hip04_eth.o
 obj-$(CONFIG_HNS_MDIO) += hns_mdio.o
 obj-$(CONFIG_HNS) += hns/
+obj-$(CONFIG_HISI_FEMAC) += hisi_femac.o
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
new file mode 100644
index 000000000000..b5d7ad0252a0
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -0,0 +1,1007 @@
+/*
+ * Hisilicon Fast Ethernet MAC Driver
+ *
+ * Copyright (c) 2016 HiSilicon Technologies Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/clk.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+/* MAC control register list */
+#define MAC_PORTSEL			0x0200
+#define MAC_PORTSEL_STAT_CPU		BIT(0)
+#define MAC_PORTSEL_RMII		BIT(1)
+#define MAC_PORTSET			0x0208
+#define MAC_PORTSET_DUPLEX_FULL		BIT(0)
+#define MAC_PORTSET_LINKED		BIT(1)
+#define MAC_PORTSET_SPEED_100M		BIT(2)
+#define MAC_SET				0x0210
+#define MAX_FRAME_SIZE			1600
+#define MAX_FRAME_SIZE_MASK		GENMASK(10, 0)
+#define BIT_PAUSE_EN			BIT(18)
+#define RX_COALESCE_SET			0x0340
+#define RX_COALESCED_FRAME_OFFSET	24
+#define RX_COALESCED_FRAMES		8
+#define RX_COALESCED_TIMER		0x74
+#define QLEN_SET			0x0344
+#define RX_DEPTH_OFFSET			8
+#define MAX_HW_FIFO_DEPTH		64
+#define HW_TX_FIFO_DEPTH		12
+#define HW_RX_FIFO_DEPTH		(MAX_HW_FIFO_DEPTH - HW_TX_FIFO_DEPTH)
+#define IQFRM_DES			0x0354
+#define RX_FRAME_LEN_MASK		GENMASK(11, 0)
+#define IQ_ADDR				0x0358
+#define EQ_ADDR				0x0360
+#define EQFRM_LEN			0x0364
+#define ADDRQ_STAT			0x036C
+#define TX_CNT_INUSE_MASK		GENMASK(5, 0)
+#define BIT_TX_READY			BIT(24)
+#define BIT_RX_READY			BIT(25)
+/* global control register list */
+#define GLB_HOSTMAC_L32			0x0000
+#define GLB_HOSTMAC_H16			0x0004
+#define GLB_SOFT_RESET			0x0008
+#define SOFT_RESET_ALL			BIT(0)
+#define GLB_FWCTRL			0x0010
+#define FWCTRL_VLAN_ENABLE		BIT(0)
+#define FWCTRL_FW2CPU_ENA		BIT(5)
+#define FWCTRL_FWALL2CPU		BIT(7)
+#define GLB_MACTCTRL			0x0014
+#define MACTCTRL_UNI2CPU		BIT(1)
+#define MACTCTRL_MULTI2CPU		BIT(3)
+#define MACTCTRL_BROAD2CPU		BIT(5)
+#define MACTCTRL_MACT_ENA		BIT(7)
+#define GLB_IRQ_STAT			0x0030
+#define GLB_IRQ_ENA			0x0034
+#define IRQ_ENA_PORT0_MASK		GENMASK(7, 0)
+#define IRQ_ENA_PORT0			BIT(18)
+#define IRQ_ENA_ALL			BIT(19)
+#define GLB_IRQ_RAW			0x0038
+#define IRQ_INT_RX_RDY			BIT(0)
+#define IRQ_INT_TX_PER_PACKET		BIT(1)
+#define IRQ_INT_TX_FIFO_EMPTY		BIT(6)
+#define IRQ_INT_MULTI_RXRDY		BIT(7)
+#define DEF_INT_MASK			(IRQ_INT_MULTI_RXRDY | \
+					IRQ_INT_TX_PER_PACKET | \
+					IRQ_INT_TX_FIFO_EMPTY)
+#define GLB_MAC_L32_BASE		0x0100
+#define GLB_MAC_H16_BASE		0x0104
+#define MACFLT_HI16_MASK		GENMASK(15, 0)
+#define BIT_MACFLT_ENA			BIT(17)
+#define BIT_MACFLT_FW2CPU		BIT(21)
+#define GLB_MAC_H16(reg)		(GLB_MAC_H16_BASE + ((reg) * 0x8))
+#define GLB_MAC_L32(reg)		(GLB_MAC_L32_BASE + ((reg) * 0x8))
+#define MAX_MAC_FILTER_NUM		8
+#define MAX_UNICAST_ADDRESSES		2
+#define MAX_MULTICAST_ADDRESSES		(MAX_MAC_FILTER_NUM - \
+					MAX_UNICAST_ADDRESSES)
+/* software tx and rx queue number, should be power of 2 */
+#define TXQ_NUM				64
+#define RXQ_NUM				128
+#define FEMAC_POLL_WEIGHT		16
+
+#define PHY_RESET_DELAYS_PROPERTY	"hisilicon,phy-reset-delays-us"
+
+enum phy_reset_delays {
+	PRE_DELAY,
+	PULSE,
+	POST_DELAY,
+	DELAYS_NUM,
+};
+
+struct hisi_femac_queue {
+	struct sk_buff **skb;
+	dma_addr_t *dma_phys;
+	int num;
+	unsigned int head;
+	unsigned int tail;
+};
+
+struct hisi_femac_priv {
+	void __iomem *port_base;
+	void __iomem *glb_base;
+	struct clk *clk;
+	struct reset_control *mac_rst;
+	struct reset_control *phy_rst;
+	u32 phy_reset_delays[DELAYS_NUM];
+	u32 link_status;
+
+	struct device *dev;
+	struct net_device *ndev;
+
+	struct hisi_femac_queue txq;
+	struct hisi_femac_queue rxq;
+	u32 tx_fifo_used_cnt;
+	struct napi_struct napi;
+};
+
+static void hisi_femac_irq_enable(struct hisi_femac_priv *priv, int irqs)
+{
+	u32 val;
+
+	val = readl(priv->glb_base + GLB_IRQ_ENA);
+	writel(val | irqs, priv->glb_base + GLB_IRQ_ENA);
+}
+
+static void hisi_femac_irq_disable(struct hisi_femac_priv *priv, int irqs)
+{
+	u32 val;
+
+	val = readl(priv->glb_base + GLB_IRQ_ENA);
+	writel(val & (~irqs), priv->glb_base + GLB_IRQ_ENA);
+}
+
+static void hisi_femac_tx_dma_unmap(struct hisi_femac_priv *priv,
+				    struct sk_buff *skb, unsigned int pos)
+{
+	dma_addr_t dma_addr;
+
+	dma_addr = priv->txq.dma_phys[pos];
+	dma_unmap_single(priv->dev, dma_addr, skb->len, DMA_TO_DEVICE);
+}
+
+static void hisi_femac_xmit_reclaim(struct net_device *dev)
+{
+	struct sk_buff *skb;
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+	struct hisi_femac_queue *txq = &priv->txq;
+	unsigned int bytes_compl = 0, pkts_compl = 0;
+	u32 val;
+
+	netif_tx_lock(dev);
+
+	val = readl(priv->port_base + ADDRQ_STAT) & TX_CNT_INUSE_MASK;
+	while (val < priv->tx_fifo_used_cnt) {
+		skb = txq->skb[txq->tail];
+		if (unlikely(!skb)) {
+			netdev_err(dev, "xmitq_cnt_inuse=%d, tx_fifo_used=%d\n",
+				   val, priv->tx_fifo_used_cnt);
+			break;
+		}
+		hisi_femac_tx_dma_unmap(priv, skb, txq->tail);
+		pkts_compl++;
+		bytes_compl += skb->len;
+		dev_kfree_skb_any(skb);
+
+		priv->tx_fifo_used_cnt--;
+
+		val = readl(priv->port_base + ADDRQ_STAT) & TX_CNT_INUSE_MASK;
+		txq->skb[txq->tail] = NULL;
+		txq->tail = (txq->tail + 1) % txq->num;
+	}
+
+	netdev_completed_queue(dev, pkts_compl, bytes_compl);
+
+	if (unlikely(netif_queue_stopped(dev)) && pkts_compl)
+		netif_wake_queue(dev);
+
+	netif_tx_unlock(dev);
+}
+
+static void hisi_femac_adjust_link(struct net_device *dev)
+{
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+	struct phy_device *phy = dev->phydev;
+	u32 status = 0;
+
+	if (phy->link)
+		status |= MAC_PORTSET_LINKED;
+	if (phy->duplex == DUPLEX_FULL)
+		status |= MAC_PORTSET_DUPLEX_FULL;
+	if (phy->speed == SPEED_100)
+		status |= MAC_PORTSET_SPEED_100M;
+
+	if ((status != priv->link_status) &&
+	    ((status | priv->link_status) & MAC_PORTSET_LINKED)) {
+		writel(status, priv->port_base + MAC_PORTSET);
+		priv->link_status = status;
+		phy_print_status(phy);
+	}
+}
+
+static void hisi_femac_rx_refill(struct hisi_femac_priv *priv)
+{
+	struct hisi_femac_queue *rxq = &priv->rxq;
+	struct sk_buff *skb;
+	u32 pos;
+	u32 len = MAX_FRAME_SIZE;
+	dma_addr_t addr;
+
+	pos = rxq->head;
+	while (readl(priv->port_base + ADDRQ_STAT) & BIT_RX_READY) {
+		if (!CIRC_SPACE(pos, rxq->tail, rxq->num))
+			break;
+		if (unlikely(rxq->skb[pos])) {
+			netdev_err(priv->ndev, "err skb[%d]=%p\n",
+				   pos, rxq->skb[pos]);
+			break;
+		}
+		skb = netdev_alloc_skb_ip_align(priv->ndev, len);
+		if (unlikely(!skb))
+			break;
+
+		addr = dma_map_single(priv->dev, skb->data, len,
+				      DMA_FROM_DEVICE);
+		if (dma_mapping_error(priv->dev, addr)) {
+			dev_kfree_skb_any(skb);
+			break;
+		}
+		rxq->dma_phys[pos] = addr;
+		rxq->skb[pos] = skb;
+		writel(addr, priv->port_base + IQ_ADDR);
+		pos = (pos + 1) % rxq->num;
+	}
+	rxq->head = pos;
+}
+
+static int hisi_femac_rx(struct net_device *dev, int limit)
+{
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+	struct hisi_femac_queue *rxq = &priv->rxq;
+	struct sk_buff *skb;
+	dma_addr_t addr;
+	u32 rx_pkt_info, pos, len, rx_pkts_num = 0;
+
+	pos = rxq->tail;
+	while (readl(priv->glb_base + GLB_IRQ_RAW) & IRQ_INT_RX_RDY) {
+		rx_pkt_info = readl(priv->port_base + IQFRM_DES);
+		len = rx_pkt_info & RX_FRAME_LEN_MASK;
+		len -= ETH_FCS_LEN;
+
+		/* tell hardware we will deal with this packet */
+		writel(IRQ_INT_RX_RDY, priv->glb_base + GLB_IRQ_RAW);
+
+		rx_pkts_num++;
+
+		skb = rxq->skb[pos];
+		if (unlikely(!skb)) {
+			netdev_err(dev, "rx skb NULL. pos=%d\n", pos);
+			break;
+		}
+		rxq->skb[pos] = NULL;
+
+		addr = rxq->dma_phys[pos];
+		dma_unmap_single(priv->dev, addr, MAX_FRAME_SIZE,
+				 DMA_FROM_DEVICE);
+		skb_put(skb, len);
+		if (unlikely(skb->len > MAX_FRAME_SIZE)) {
+			netdev_err(dev, "rcv len err, len = %d\n", skb->len);
+			dev->stats.rx_errors++;
+			dev->stats.rx_length_errors++;
+			dev_kfree_skb_any(skb);
+			goto next;
+		}
+
+		skb->protocol = eth_type_trans(skb, dev);
+		napi_gro_receive(&priv->napi, skb);
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += skb->len;
+next:
+		pos = (pos + 1) % rxq->num;
+		if (rx_pkts_num >= limit)
+			break;
+	}
+	rxq->tail = pos;
+
+	hisi_femac_rx_refill(priv);
+
+	return rx_pkts_num;
+}
+
+static int hisi_femac_poll(struct napi_struct *napi, int budget)
+{
+	struct hisi_femac_priv *priv = container_of(napi,
+					struct hisi_femac_priv, napi);
+	struct net_device *dev = priv->ndev;
+	int work_done = 0, task = budget;
+	int ints, num;
+
+	do {
+		hisi_femac_xmit_reclaim(dev);
+		num = hisi_femac_rx(dev, task);
+		work_done += num;
+		task -= num;
+		if (work_done >= budget)
+			break;
+
+		ints = readl(priv->glb_base + GLB_IRQ_RAW);
+		writel(ints & DEF_INT_MASK,
+		       priv->glb_base + GLB_IRQ_RAW);
+	} while (ints & DEF_INT_MASK);
+
+	if (work_done < budget) {
+		napi_complete(napi);
+		hisi_femac_irq_enable(priv, DEF_INT_MASK &
+					(~IRQ_INT_TX_PER_PACKET));
+	}
+
+	return work_done;
+}
+
+static irqreturn_t hisi_femac_interrupt(int irq, void *dev_id)
+{
+	int ints;
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+
+	ints = readl(priv->glb_base + GLB_IRQ_RAW);
+
+	if (likely(ints & DEF_INT_MASK)) {
+		writel(ints & DEF_INT_MASK,
+		       priv->glb_base + GLB_IRQ_RAW);
+		hisi_femac_irq_disable(priv, DEF_INT_MASK);
+		napi_schedule(&priv->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hisi_femac_init_queue(struct device *dev,
+				 struct hisi_femac_queue *queue,
+				 unsigned int num)
+{
+	queue->skb = devm_kcalloc(dev, num, sizeof(struct sk_buff *),
+				  GFP_KERNEL);
+	if (!queue->skb)
+		return -ENOMEM;
+
+	queue->dma_phys = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+				       GFP_KERNEL);
+	if (!queue->dma_phys)
+		return -ENOMEM;
+
+	queue->num = num;
+	queue->head = 0;
+	queue->tail = 0;
+
+	return 0;
+}
+
+static int hisi_femac_init_tx_and_rx_queues(struct hisi_femac_priv *priv)
+{
+	int ret;
+
+	ret = hisi_femac_init_queue(priv->dev, &priv->txq, TXQ_NUM);
+	if (ret)
+		return ret;
+
+	ret = hisi_femac_init_queue(priv->dev, &priv->rxq, RXQ_NUM);
+	if (ret)
+		return ret;
+
+	priv->tx_fifo_used_cnt = 0;
+
+	return 0;
+}
+
+static void hisi_femac_free_skb_rings(struct hisi_femac_priv *priv)
+{
+	struct hisi_femac_queue *txq = &priv->txq;
+	struct hisi_femac_queue *rxq = &priv->rxq;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+	u32 pos;
+
+	pos = rxq->tail;
+	while (pos != rxq->head) {
+		skb = rxq->skb[pos];
+		if (unlikely(!skb)) {
+			netdev_err(priv->ndev, "NULL rx skb. pos=%d, head=%d\n",
+				   pos, rxq->head);
+			continue;
+		}
+
+		dma_addr = rxq->dma_phys[pos];
+		dma_unmap_single(priv->dev, dma_addr, MAX_FRAME_SIZE,
+				 DMA_FROM_DEVICE);
+
+		dev_kfree_skb_any(skb);
+		rxq->skb[pos] = NULL;
+		pos = (pos + 1) % rxq->num;
+	}
+	rxq->tail = pos;
+
+	pos = txq->tail;
+	while (pos != txq->head) {
+		skb = txq->skb[pos];
+		if (unlikely(!skb)) {
+			netdev_err(priv->ndev, "NULL tx skb. pos=%d, head=%d\n",
+				   pos, txq->head);
+			continue;
+		}
+		hisi_femac_tx_dma_unmap(priv, skb, pos);
+		dev_kfree_skb_any(skb);
+		txq->skb[pos] = NULL;
+		pos = (pos + 1) % txq->num;
+	}
+	txq->tail = pos;
+	priv->tx_fifo_used_cnt = 0;
+}
+
+static int hisi_femac_set_hw_mac_addr(struct hisi_femac_priv *priv,
+				      unsigned char *mac)
+{
+	u32 reg;
+
+	reg = mac[1] | (mac[0] << 8);
+	writel(reg, priv->glb_base + GLB_HOSTMAC_H16);
+
+	reg = mac[5] | (mac[4] << 8) | (mac[3] << 16) | (mac[2] << 24);
+	writel(reg, priv->glb_base + GLB_HOSTMAC_L32);
+
+	return 0;
+}
+
+static int hisi_femac_port_reset(struct hisi_femac_priv *priv)
+{
+	u32 val;
+
+	val = readl(priv->glb_base + GLB_SOFT_RESET);
+	val |= SOFT_RESET_ALL;
+	writel(val, priv->glb_base + GLB_SOFT_RESET);
+
+	usleep_range(500, 800);
+
+	val &= ~SOFT_RESET_ALL;
+	writel(val, priv->glb_base + GLB_SOFT_RESET);
+
+	return 0;
+}
+
+static int hisi_femac_net_open(struct net_device *dev)
+{
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+
+	hisi_femac_port_reset(priv);
+	hisi_femac_set_hw_mac_addr(priv, dev->dev_addr);
+	hisi_femac_rx_refill(priv);
+
+	netif_carrier_off(dev);
+	netdev_reset_queue(dev);
+	netif_start_queue(dev);
+	napi_enable(&priv->napi);
+
+	priv->link_status = 0;
+	if (dev->phydev)
+		phy_start(dev->phydev);
+
+	writel(IRQ_ENA_PORT0_MASK, priv->glb_base + GLB_IRQ_RAW);
+	hisi_femac_irq_enable(priv, IRQ_ENA_ALL | IRQ_ENA_PORT0 | DEF_INT_MASK);
+
+	return 0;
+}
+
+static int hisi_femac_net_close(struct net_device *dev)
+{
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+
+	hisi_femac_irq_disable(priv, IRQ_ENA_PORT0);
+
+	if (dev->phydev)
+		phy_stop(dev->phydev);
+
+	netif_stop_queue(dev);
+	napi_disable(&priv->napi);
+
+	hisi_femac_free_skb_rings(priv);
+
+	return 0;
+}
+
+static netdev_tx_t hisi_femac_net_xmit(struct sk_buff *skb,
+				       struct net_device *dev)
+{
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+	struct hisi_femac_queue *txq = &priv->txq;
+	dma_addr_t addr;
+	u32 val;
+
+	val = readl(priv->port_base + ADDRQ_STAT);
+	val &= BIT_TX_READY;
+	if (!val) {
+		hisi_femac_irq_enable(priv, IRQ_INT_TX_PER_PACKET);
+		dev->stats.tx_dropped++;
+		dev->stats.tx_fifo_errors++;
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(!CIRC_SPACE(txq->head, txq->tail,
+				 txq->num))) {
+		hisi_femac_irq_enable(priv, IRQ_INT_TX_PER_PACKET);
+		dev->stats.tx_dropped++;
+		dev->stats.tx_fifo_errors++;
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	addr = dma_map_single(priv->dev, skb->data,
+			      skb->len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(priv->dev, addr))) {
+		dev_kfree_skb_any(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+	txq->dma_phys[txq->head] = addr;
+
+	txq->skb[txq->head] = skb;
+	txq->head = (txq->head + 1) % txq->num;
+
+	writel(addr, priv->port_base + EQ_ADDR);
+	writel(skb->len + ETH_FCS_LEN, priv->port_base + EQFRM_LEN);
+
+	priv->tx_fifo_used_cnt++;
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+	netdev_sent_queue(dev, skb->len);
+
+	return NETDEV_TX_OK;
+}
+
+static int hisi_femac_set_mac_address(struct net_device *dev, void *p)
+{
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+	struct sockaddr *skaddr = p;
+
+	if (!is_valid_ether_addr(skaddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, skaddr->sa_data, dev->addr_len);
+	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
+
+	hisi_femac_set_hw_mac_addr(priv, dev->dev_addr);
+
+	return 0;
+}
+
+static void hisi_femac_enable_hw_addr_filter(struct hisi_femac_priv *priv,
+					     unsigned int reg_n, bool enable)
+{
+	u32 val;
+
+	val = readl(priv->glb_base + GLB_MAC_H16(reg_n));
+	if (enable)
+		val |= BIT_MACFLT_ENA;
+	else
+		val &= ~BIT_MACFLT_ENA;
+	writel(val, priv->glb_base + GLB_MAC_H16(reg_n));
+}
+
+static void hisi_femac_set_hw_addr_filter(struct hisi_femac_priv *priv,
+					  unsigned char *addr,
+					  unsigned int reg_n)
+{
+	unsigned int high, low;
+	u32 val;
+
+	high = GLB_MAC_H16(reg_n);
+	low = GLB_MAC_L32(reg_n);
+
+	val = (addr[2] << 24) | (addr[3] << 16) | (addr[4] << 8) | addr[5];
+	writel(val, priv->glb_base + low);
+
+	val = readl(priv->glb_base + high);
+	val &= ~MACFLT_HI16_MASK;
+	val |= ((addr[0] << 8) | addr[1]);
+	val |= (BIT_MACFLT_ENA | BIT_MACFLT_FW2CPU);
+	writel(val, priv->glb_base + high);
+}
+
+static void hisi_femac_set_promisc_mode(struct hisi_femac_priv *priv,
+					bool promisc_mode)
+{
+	u32 val;
+
+	val = readl(priv->glb_base + GLB_FWCTRL);
+	if (promisc_mode)
+		val |= FWCTRL_FWALL2CPU;
+	else
+		val &= ~FWCTRL_FWALL2CPU;
+	writel(val, priv->glb_base + GLB_FWCTRL);
+}
+
+/* Handle multiple multicast addresses (perfect filtering)*/
+static void hisi_femac_set_mc_addr_filter(struct hisi_femac_priv *priv)
+{
+	struct net_device *dev = priv->ndev;
+	u32 val;
+
+	val = readl(priv->glb_base + GLB_MACTCTRL);
+	if ((netdev_mc_count(dev) > MAX_MULTICAST_ADDRESSES) ||
+	    (dev->flags & IFF_ALLMULTI)) {
+		val |= MACTCTRL_MULTI2CPU;
+	} else {
+		int reg = MAX_UNICAST_ADDRESSES;
+		int i;
+		struct netdev_hw_addr *ha;
+
+		for (i = reg; i < MAX_MAC_FILTER_NUM; i++)
+			hisi_femac_enable_hw_addr_filter(priv, i, false);
+
+		netdev_for_each_mc_addr(ha, dev) {
+			hisi_femac_set_hw_addr_filter(priv, ha->addr, reg);
+			reg++;
+		}
+		val &= ~MACTCTRL_MULTI2CPU;
+	}
+	writel(val, priv->glb_base + GLB_MACTCTRL);
+}
+
+/* Handle multiple unicast addresses (perfect filtering)*/
+static void hisi_femac_set_uc_addr_filter(struct hisi_femac_priv *priv)
+{
+	struct net_device *dev = priv->ndev;
+	u32 val;
+
+	val = readl(priv->glb_base + GLB_MACTCTRL);
+	if (netdev_uc_count(dev) > MAX_UNICAST_ADDRESSES) {
+		val |= MACTCTRL_UNI2CPU;
+	} else {
+		int reg = 0;
+		int i;
+		struct netdev_hw_addr *ha;
+
+		for (i = reg; i < MAX_UNICAST_ADDRESSES; i++)
+			hisi_femac_enable_hw_addr_filter(priv, i, false);
+
+		netdev_for_each_uc_addr(ha, dev) {
+			hisi_femac_set_hw_addr_filter(priv, ha->addr, reg);
+			reg++;
+		}
+		val &= ~MACTCTRL_UNI2CPU;
+	}
+	writel(val, priv->glb_base + GLB_MACTCTRL);
+}
+
+static void hisi_femac_net_set_rx_mode(struct net_device *dev)
+{
+	struct hisi_femac_priv *priv = netdev_priv(dev);
+
+	if (dev->flags & IFF_PROMISC) {
+		hisi_femac_set_promisc_mode(priv, true);
+	} else {
+		hisi_femac_set_promisc_mode(priv, false);
+		hisi_femac_set_mc_addr_filter(priv);
+		hisi_femac_set_uc_addr_filter(priv);
+	}
+}
+
+static int hisi_femac_net_ioctl(struct net_device *dev,
+				struct ifreq *ifreq, int cmd)
+{
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	if (!dev->phydev)
+		return -EINVAL;
+
+	return phy_mii_ioctl(dev->phydev, ifreq, cmd);
+}
+
+static struct ethtool_ops hisi_femac_ethtools_ops = {
+	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+};
+
+static const struct net_device_ops hisi_femac_netdev_ops = {
+	.ndo_open		= hisi_femac_net_open,
+	.ndo_stop		= hisi_femac_net_close,
+	.ndo_start_xmit		= hisi_femac_net_xmit,
+	.ndo_do_ioctl		= hisi_femac_net_ioctl,
+	.ndo_set_mac_address	= hisi_femac_set_mac_address,
+	.ndo_set_rx_mode	= hisi_femac_net_set_rx_mode,
+	.ndo_change_mtu		= eth_change_mtu,
+};
+
+static void hisi_femac_core_reset(struct hisi_femac_priv *priv)
+{
+	reset_control_assert(priv->mac_rst);
+	reset_control_deassert(priv->mac_rst);
+}
+
+static void hisi_femac_sleep_us(u32 time_us)
+{
+	u32 time_ms;
+
+	if (!time_us)
+		return;
+
+	time_ms = DIV_ROUND_UP(time_us, 1000);
+	if (time_ms < 20)
+		usleep_range(time_us, time_us + 500);
+	else
+		msleep(time_ms);
+}
+
+static void hisi_femac_phy_reset(struct hisi_femac_priv *priv)
+{
+	/* To make sure PHY hardware reset success,
+	 * we must keep PHY in deassert state first and
+	 * then complete the hardware reset operation
+	 */
+	reset_control_deassert(priv->phy_rst);
+	hisi_femac_sleep_us(priv->phy_reset_delays[PRE_DELAY]);
+
+	reset_control_assert(priv->phy_rst);
+	/* delay some time to ensure reset ok,
+	 * this depends on PHY hardware feature
+	 */
+	hisi_femac_sleep_us(priv->phy_reset_delays[PULSE]);
+	reset_control_deassert(priv->phy_rst);
+	/* delay some time to ensure later MDIO access */
+	hisi_femac_sleep_us(priv->phy_reset_delays[POST_DELAY]);
+}
+
+static void hisi_femac_port_init(struct hisi_femac_priv *priv)
+{
+	u32 val;
+
+	/* MAC gets link status info and phy mode by software config */
+	val = MAC_PORTSEL_STAT_CPU;
+	if (priv->ndev->phydev->interface == PHY_INTERFACE_MODE_RMII)
+		val |= MAC_PORTSEL_RMII;
+	writel(val, priv->port_base + MAC_PORTSEL);
+
+	/*clear all interrupt status */
+	writel(IRQ_ENA_PORT0_MASK, priv->glb_base + GLB_IRQ_RAW);
+	hisi_femac_irq_disable(priv, IRQ_ENA_PORT0_MASK | IRQ_ENA_PORT0);
+
+	val = readl(priv->glb_base + GLB_FWCTRL);
+	val &= ~(FWCTRL_VLAN_ENABLE | FWCTRL_FWALL2CPU);
+	val |= FWCTRL_FW2CPU_ENA;
+	writel(val, priv->glb_base + GLB_FWCTRL);
+
+	val = readl(priv->glb_base + GLB_MACTCTRL);
+	val |= (MACTCTRL_BROAD2CPU | MACTCTRL_MACT_ENA);
+	writel(val, priv->glb_base + GLB_MACTCTRL);
+
+	val = readl(priv->port_base + MAC_SET);
+	val &= ~MAX_FRAME_SIZE_MASK;
+	val |= MAX_FRAME_SIZE;
+	writel(val, priv->port_base + MAC_SET);
+
+	val = RX_COALESCED_TIMER |
+		(RX_COALESCED_FRAMES << RX_COALESCED_FRAME_OFFSET);
+	writel(val, priv->port_base + RX_COALESCE_SET);
+
+	val = (HW_RX_FIFO_DEPTH << RX_DEPTH_OFFSET) | HW_TX_FIFO_DEPTH;
+	writel(val, priv->port_base + QLEN_SET);
+}
+
+static int hisi_femac_drv_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct resource *res;
+	struct net_device *ndev;
+	struct hisi_femac_priv *priv;
+	struct phy_device *phy;
+	const char *mac_addr;
+	int ret;
+
+	ndev = alloc_etherdev(sizeof(*priv));
+	if (!ndev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ndev);
+
+	priv = netdev_priv(ndev);
+	priv->dev = dev;
+	priv->ndev = ndev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->port_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->port_base)) {
+		ret = PTR_ERR(priv->port_base);
+		goto out_free_netdev;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	priv->glb_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->glb_base)) {
+		ret = PTR_ERR(priv->glb_base);
+		goto out_free_netdev;
+	}
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(dev, "failed to get clk\n");
+		ret = -ENODEV;
+		goto out_free_netdev;
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret) {
+		dev_err(dev, "failed to enable clk %d\n", ret);
+		goto out_free_netdev;
+	}
+
+	priv->mac_rst = devm_reset_control_get(dev, "mac");
+	if (IS_ERR(priv->mac_rst)) {
+		ret = PTR_ERR(priv->mac_rst);
+		goto out_disable_clk;
+	}
+	hisi_femac_core_reset(priv);
+
+	priv->phy_rst = devm_reset_control_get(dev, "phy");
+	if (IS_ERR(priv->phy_rst)) {
+		priv->phy_rst = NULL;
+	} else {
+		ret = of_property_read_u32_array(node,
+						 PHY_RESET_DELAYS_PROPERTY,
+						 priv->phy_reset_delays,
+						 DELAYS_NUM);
+		if (ret)
+			goto out_disable_clk;
+		hisi_femac_phy_reset(priv);
+	}
+
+	phy = of_phy_get_and_connect(ndev, node, hisi_femac_adjust_link);
+	if (!phy) {
+		dev_err(dev, "connect to PHY failed!\n");
+		ret = -ENODEV;
+		goto out_disable_clk;
+	}
+
+	phy_attached_print(phy, "phy_id=0x%.8lx, phy_mode=%s\n",
+			   (unsigned long)phy->phy_id,
+			   phy_modes(phy->interface));
+
+	mac_addr = of_get_mac_address(node);
+	if (mac_addr)
+		ether_addr_copy(ndev->dev_addr, mac_addr);
+	if (!is_valid_ether_addr(ndev->dev_addr)) {
+		eth_hw_addr_random(ndev);
+		dev_warn(dev, "using random MAC address %pM\n",
+			 ndev->dev_addr);
+	}
+
+	ndev->watchdog_timeo = 6 * HZ;
+	ndev->priv_flags |= IFF_UNICAST_FLT;
+	ndev->netdev_ops = &hisi_femac_netdev_ops;
+	ndev->ethtool_ops = &hisi_femac_ethtools_ops;
+	netif_napi_add(ndev, &priv->napi, hisi_femac_poll, FEMAC_POLL_WEIGHT);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	hisi_femac_port_init(priv);
+
+	ret = hisi_femac_init_tx_and_rx_queues(priv);
+	if (ret)
+		goto out_disconnect_phy;
+
+	ndev->irq = platform_get_irq(pdev, 0);
+	if (ndev->irq <= 0) {
+		dev_err(dev, "No irq resource\n");
+		ret = -ENODEV;
+		goto out_disconnect_phy;
+	}
+
+	ret = devm_request_irq(dev, ndev->irq, hisi_femac_interrupt,
+			       IRQF_SHARED, pdev->name, ndev);
+	if (ret) {
+		dev_err(dev, "devm_request_irq %d failed!\n", ndev->irq);
+		goto out_disconnect_phy;
+	}
+
+	ret = register_netdev(ndev);
+	if (ret) {
+		dev_err(dev, "register_netdev failed!\n");
+		goto out_disconnect_phy;
+	}
+
+	return ret;
+
+out_disconnect_phy:
+	netif_napi_del(&priv->napi);
+	phy_disconnect(phy);
+out_disable_clk:
+	clk_disable_unprepare(priv->clk);
+out_free_netdev:
+	free_netdev(ndev);
+
+	return ret;
+}
+
+static int hisi_femac_drv_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct hisi_femac_priv *priv = netdev_priv(ndev);
+
+	netif_napi_del(&priv->napi);
+	unregister_netdev(ndev);
+
+	phy_disconnect(ndev->phydev);
+	clk_disable_unprepare(priv->clk);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+int hisi_femac_drv_suspend(struct platform_device *pdev,
+			   pm_message_t state)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct hisi_femac_priv *priv = netdev_priv(ndev);
+
+	disable_irq(ndev->irq);
+	if (netif_running(ndev)) {
+		hisi_femac_net_close(ndev);
+		netif_device_detach(ndev);
+	}
+
+	clk_disable_unprepare(priv->clk);
+
+	return 0;
+}
+
+int hisi_femac_drv_resume(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct hisi_femac_priv *priv = netdev_priv(ndev);
+
+	clk_prepare_enable(priv->clk);
+	if (priv->phy_rst)
+		hisi_femac_phy_reset(priv);
+
+	if (netif_running(ndev)) {
+		hisi_femac_port_init(priv);
+		hisi_femac_net_open(ndev);
+		netif_device_attach(ndev);
+	}
+	enable_irq(ndev->irq);
+
+	return 0;
+}
+#endif
+
+static const struct of_device_id hisi_femac_match[] = {
+	{.compatible = "hisilicon,hisi-femac-v1",},
+	{.compatible = "hisilicon,hisi-femac-v2",},
+	{.compatible = "hisilicon,hi3516cv300-femac",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, hisi_femac_match);
+
+static struct platform_driver hisi_femac_driver = {
+	.driver = {
+		.name = "hisi-femac",
+		.of_match_table = hisi_femac_match,
+	},
+	.probe = hisi_femac_drv_probe,
+	.remove = hisi_femac_drv_remove,
+#ifdef CONFIG_PM
+	.suspend = hisi_femac_drv_suspend,
+	.resume = hisi_femac_drv_resume,
+#endif
+};
+
+module_platform_driver(hisi_femac_driver);
+
+MODULE_DESCRIPTION("Hisilicon Fast Ethernet MAC driver");
+MODULE_AUTHOR("Dongpo Li <lidongpo@hisilicon.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:hisi-femac");
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index b9f2ea59308a..275618bb4646 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -218,7 +218,6 @@ struct hix5hd2_priv {
 	struct device *dev;
 	struct net_device *netdev;
 
-	struct phy_device *phy;
 	struct device_node *phy_node;
 	phy_interface_t	phy_mode;
 
@@ -402,7 +401,7 @@ static int hix5hd2_net_set_mac_address(struct net_device *dev, void *p)
 static void hix5hd2_adjust_link(struct net_device *dev)
 {
 	struct hix5hd2_priv *priv = netdev_priv(dev);
-	struct phy_device *phy = priv->phy;
+	struct phy_device *phy = dev->phydev;
 
 	if ((priv->speed != phy->speed) || (priv->duplex != phy->duplex)) {
 		hix5hd2_config_port(dev, phy->speed, phy->duplex);
@@ -679,6 +678,7 @@ static void hix5hd2_free_dma_desc_rings(struct hix5hd2_priv *priv)
 static int hix5hd2_net_open(struct net_device *dev)
 {
 	struct hix5hd2_priv *priv = netdev_priv(dev);
+	struct phy_device *phy;
 	int ret;
 
 	ret = clk_prepare_enable(priv->clk);
@@ -687,12 +687,12 @@ static int hix5hd2_net_open(struct net_device *dev)
 		return ret;
 	}
 
-	priv->phy = of_phy_connect(dev, priv->phy_node,
-				   &hix5hd2_adjust_link, 0, priv->phy_mode);
-	if (!priv->phy)
+	phy = of_phy_connect(dev, priv->phy_node,
+			     &hix5hd2_adjust_link, 0, priv->phy_mode);
+	if (!phy)
 		return -ENODEV;
 
-	phy_start(priv->phy);
+	phy_start(phy);
 	hix5hd2_hw_init(priv);
 	hix5hd2_rx_refill(priv);
 
@@ -716,9 +716,9 @@ static int hix5hd2_net_close(struct net_device *dev)
 	netif_stop_queue(dev);
 	hix5hd2_free_dma_desc_rings(priv);
 
-	if (priv->phy) {
-		phy_stop(priv->phy);
-		phy_disconnect(priv->phy);
+	if (dev->phydev) {
+		phy_stop(dev->phydev);
+		phy_disconnect(dev->phydev);
 	}
 
 	clk_disable_unprepare(priv->clk);
@@ -750,32 +750,10 @@ static const struct net_device_ops hix5hd2_netdev_ops = {
 	.ndo_set_mac_address	= hix5hd2_net_set_mac_address,
 };
 
-static int hix5hd2_get_settings(struct net_device *net_dev,
-				struct ethtool_cmd *cmd)
-{
-	struct hix5hd2_priv *priv = netdev_priv(net_dev);
-
-	if (!priv->phy)
-		return -ENODEV;
-
-	return phy_ethtool_gset(priv->phy, cmd);
-}
-
-static int hix5hd2_set_settings(struct net_device *net_dev,
-				struct ethtool_cmd *cmd)
-{
-	struct hix5hd2_priv *priv = netdev_priv(net_dev);
-
-	if (!priv->phy)
-		return -ENODEV;
-
-	return phy_ethtool_sset(priv->phy, cmd);
-}
-
 static struct ethtool_ops hix5hd2_ethtools_ops = {
 	.get_link		= ethtool_op_get_link,
-	.get_settings		= hix5hd2_get_settings,
-	.set_settings		= hix5hd2_set_settings,
+	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static int hix5hd2_mdio_wait_ready(struct mii_bus *bus)
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 3bfe36f9405b..c54c6fac0d1d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -96,16 +96,22 @@ static int __ae_match(struct device *dev, const void *data)
 {
 	struct hnae_ae_dev *hdev = cls_to_ae_dev(dev);
 
-	return hdev->dev->of_node == data;
+	if (dev_of_node(hdev->dev))
+		return (data == &hdev->dev->of_node->fwnode);
+	else if (is_acpi_node(hdev->dev->fwnode))
+		return (data == hdev->dev->fwnode);
+
+	dev_err(dev, "__ae_match cannot read cfg data from OF or acpi\n");
+	return 0;
 }
 
-static struct hnae_ae_dev *find_ae(const struct device_node *ae_node)
+static struct hnae_ae_dev *find_ae(const struct fwnode_handle *fwnode)
 {
 	struct device *dev;
 
-	WARN_ON(!ae_node);
+	WARN_ON(!fwnode);
 
-	dev = class_find_device(hnae_class, NULL, ae_node, __ae_match);
+	dev = class_find_device(hnae_class, NULL, fwnode, __ae_match);
 
 	return dev ? cls_to_ae_dev(dev) : NULL;
 }
@@ -312,7 +318,7 @@ EXPORT_SYMBOL(hnae_reinit_handle);
  * return handle ptr or ERR_PTR
  */
 struct hnae_handle *hnae_get_handle(struct device *owner_dev,
-				    const struct device_node *ae_node,
+				    const struct fwnode_handle	*fwnode,
 				    u32 port_id,
 				    struct hnae_buf_ops *bops)
 {
@@ -321,7 +327,7 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev,
 	int i, j;
 	int ret;
 
-	dev = find_ae(ae_node);
+	dev = find_ae(fwnode);
 	if (!dev)
 		return ERR_PTR(-ENODEV);
 
@@ -394,7 +400,6 @@ int hnae_ae_register(struct hnae_ae_dev *hdev, struct module *owner)
 
 	if (!hdev->ops || !hdev->ops->get_handle ||
 	    !hdev->ops->toggle_ring_irq ||
-	    !hdev->ops->toggle_queue_status ||
 	    !hdev->ops->get_status || !hdev->ops->adjust_link)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index e8d36aaea223..e093cbf26c8c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -27,6 +27,7 @@
  * "cb" means control block
  */
 
+#include <linux/acpi.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/module.h>
@@ -362,6 +363,14 @@ enum hnae_port_type {
 	HNAE_PORT_DEBUG
 };
 
+/* mac media type */
+enum hnae_media_type {
+	HNAE_MEDIA_TYPE_UNKNOWN = 0,
+	HNAE_MEDIA_TYPE_FIBER,
+	HNAE_MEDIA_TYPE_COPPER,
+	HNAE_MEDIA_TYPE_BACKPLANE,
+};
+
 /* This struct defines the operation on the handle.
  *
  * get_handle(): (mandatory)
@@ -453,7 +462,6 @@ struct hnae_ae_ops {
 	int (*get_info)(struct hnae_handle *handle,
 			u8 *auto_neg, u16 *speed, u8 *duplex);
 	void (*toggle_ring_irq)(struct hnae_ring *ring, u32 val);
-	void (*toggle_queue_status)(struct hnae_queue *queue, u32 val);
 	void (*adjust_link)(struct hnae_handle *handle, int speed, int duplex);
 	int (*set_loopback)(struct hnae_handle *handle,
 			    enum hnae_loop loop_mode, int en);
@@ -472,6 +480,11 @@ struct hnae_ae_ops {
 	int (*set_coalesce_usecs)(struct hnae_handle *handle, u32 timeout);
 	int (*set_coalesce_frames)(struct hnae_handle *handle,
 				   u32 coalesce_frames);
+	void (*get_coalesce_range)(struct hnae_handle *handle,
+				   u32 *tx_frames_low, u32 *rx_frames_low,
+				   u32 *tx_frames_high, u32 *rx_frames_high,
+				   u32 *tx_usecs_low, u32 *rx_usecs_low,
+				   u32 *tx_usecs_high, u32 *rx_usecs_high);
 	void (*set_promisc_mode)(struct hnae_handle *handle, u32 en);
 	int (*get_mac_addr)(struct hnae_handle *handle, void **p);
 	int (*set_mac_addr)(struct hnae_handle *handle, void *p);
@@ -512,7 +525,7 @@ struct hnae_ae_dev {
 struct hnae_handle {
 	struct device *owner_dev; /* the device which make use of this handle */
 	struct hnae_ae_dev *dev;  /* the device who provides this handle */
-	struct device_node *phy_node;
+	struct phy_device *phy_dev;
 	phy_interface_t phy_if;
 	u32 if_support;
 	int q_num;
@@ -520,6 +533,7 @@ struct hnae_handle {
 	u32 eport_id;
 	u32 dport_id;	/* v2 tx bd should fill the dport_id */
 	enum hnae_port_type port_type;
+	enum hnae_media_type media_type;
 	struct list_head node;    /* list to hnae_ae_dev->handle_list */
 	struct hnae_buf_ops *bops; /* operation for the buffer */
 	struct hnae_queue **qs;  /* array base of all queues */
@@ -528,7 +542,7 @@ struct hnae_handle {
 #define ring_to_dev(ring) ((ring)->q->dev->dev)
 
 struct hnae_handle *hnae_get_handle(struct device *owner_dev,
-				    const struct device_node *ae_node,
+				    const struct fwnode_handle	*fwnode,
 				    u32 port_id,
 				    struct hnae_buf_ops *bops);
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 7a757e88c89a..e28d960997af 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -131,9 +131,10 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev,
 	vf_cb->mac_cb = dsaf_dev->mac_cb[port_id];
 
 	ae_handle->phy_if = vf_cb->mac_cb->phy_if;
-	ae_handle->phy_node = vf_cb->mac_cb->phy_node;
+	ae_handle->phy_dev = vf_cb->mac_cb->phy_dev;
 	ae_handle->if_support = vf_cb->mac_cb->if_support;
 	ae_handle->port_type = vf_cb->mac_cb->mac_type;
+	ae_handle->media_type = vf_cb->mac_cb->media_type;
 	ae_handle->dport_id = port_id;
 
 	return ae_handle;
@@ -247,12 +248,21 @@ static void hns_ae_set_tso_stats(struct hnae_handle *handle, int enable)
 static int hns_ae_start(struct hnae_handle *handle)
 {
 	int ret;
+	int k;
 	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
 
 	ret = hns_mac_vm_config_bc_en(mac_cb, 0, true);
 	if (ret)
 		return ret;
 
+	for (k = 0; k < handle->q_num; k++) {
+		if (AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver))
+			hns_rcb_int_clr_hw(handle->qs[k],
+					   RCB_INT_FLAG_TX | RCB_INT_FLAG_RX);
+		else
+			hns_rcbv2_int_clr_hw(handle->qs[k],
+					     RCB_INT_FLAG_TX | RCB_INT_FLAG_RX);
+	}
 	hns_ae_ring_enable_all(handle, 1);
 	msleep(100);
 
@@ -313,18 +323,6 @@ static void hns_aev2_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
 	hns_rcbv2_int_ctrl_hw(ring->q, flag, mask);
 }
 
-static void hns_ae_toggle_queue_status(struct hnae_queue *queue, u32 val)
-{
-	struct dsaf_device *dsaf_dev = hns_ae_get_dsaf_dev(queue->dev);
-
-	if (AE_IS_VER1(dsaf_dev->dsaf_ver))
-		hns_rcb_int_clr_hw(queue, RCB_INT_FLAG_TX | RCB_INT_FLAG_RX);
-	else
-		hns_rcbv2_int_clr_hw(queue, RCB_INT_FLAG_TX | RCB_INT_FLAG_RX);
-
-	hns_rcb_start(queue, val);
-}
-
 static int hns_ae_get_link_status(struct hnae_handle *handle)
 {
 	u32 link_status;
@@ -465,6 +463,30 @@ static int  hns_ae_set_coalesce_frames(struct hnae_handle *handle,
 		ring_pair->port_id_in_comm, coalesce_frames);
 }
 
+static void hns_ae_get_coalesce_range(struct hnae_handle *handle,
+				      u32 *tx_frames_low, u32 *rx_frames_low,
+				      u32 *tx_frames_high, u32 *rx_frames_high,
+				      u32 *tx_usecs_low, u32 *rx_usecs_low,
+				      u32 *tx_usecs_high, u32 *rx_usecs_high)
+{
+	struct dsaf_device *dsaf_dev;
+
+	dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
+
+	*tx_frames_low  = HNS_RCB_MIN_COALESCED_FRAMES;
+	*rx_frames_low  = HNS_RCB_MIN_COALESCED_FRAMES;
+	*tx_frames_high =
+		(dsaf_dev->desc_num - 1 > HNS_RCB_MAX_COALESCED_FRAMES) ?
+		HNS_RCB_MAX_COALESCED_FRAMES : dsaf_dev->desc_num - 1;
+	*rx_frames_high =
+		(dsaf_dev->desc_num - 1 > HNS_RCB_MAX_COALESCED_FRAMES) ?
+		 HNS_RCB_MAX_COALESCED_FRAMES : dsaf_dev->desc_num - 1;
+	*tx_usecs_low   = 0;
+	*rx_usecs_low   = 0;
+	*tx_usecs_high  = HNS_RCB_MAX_COALESCED_USECS;
+	*rx_usecs_high  = HNS_RCB_MAX_COALESCED_USECS;
+}
+
 void hns_ae_update_stats(struct hnae_handle *handle,
 			 struct net_device_stats *net_stats)
 {
@@ -587,6 +609,7 @@ void hns_ae_get_strings(struct hnae_handle *handle,
 	int idx;
 	struct hns_mac_cb *mac_cb;
 	struct hns_ppe_cb *ppe_cb;
+	struct dsaf_device *dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
 	u8 *p = data;
 	struct	hnae_vf_cb *vf_cb;
 
@@ -609,13 +632,14 @@ void hns_ae_get_strings(struct hnae_handle *handle,
 	p += ETH_GSTRING_LEN * hns_mac_get_sset_count(mac_cb, stringset);
 
 	if (mac_cb->mac_type == HNAE_PORT_SERVICE)
-		hns_dsaf_get_strings(stringset, p, port);
+		hns_dsaf_get_strings(stringset, p, port, dsaf_dev);
 }
 
 int hns_ae_get_sset_count(struct hnae_handle *handle, int stringset)
 {
 	u32 sset_count = 0;
 	struct hns_mac_cb *mac_cb;
+	struct dsaf_device *dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
 
 	assert(handle);
 
@@ -626,7 +650,7 @@ int hns_ae_get_sset_count(struct hnae_handle *handle, int stringset)
 	sset_count += hns_mac_get_sset_count(mac_cb, stringset);
 
 	if (mac_cb->mac_type == HNAE_PORT_SERVICE)
-		sset_count += hns_dsaf_get_sset_count(stringset);
+		sset_count += hns_dsaf_get_sset_count(dsaf_dev, stringset);
 
 	return sset_count;
 }
@@ -637,13 +661,15 @@ static int hns_ae_config_loopback(struct hnae_handle *handle,
 	int ret;
 	struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
 	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
 
 	switch (loop) {
 	case MAC_INTERNALLOOP_PHY:
 		ret = 0;
 		break;
 	case MAC_INTERNALLOOP_SERDES:
-		ret = hns_mac_config_sds_loopback(vf_cb->mac_cb, en);
+		ret = dsaf_dev->misc_op->cfg_serdes_loopback(vf_cb->mac_cb,
+							     !!en);
 		break;
 	case MAC_INTERNALLOOP_MAC:
 		ret = hns_mac_config_mac_loopback(vf_cb->mac_cb, loop, en);
@@ -780,7 +806,6 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 	.stop = hns_ae_stop,
 	.reset = hns_ae_reset,
 	.toggle_ring_irq = hns_ae_toggle_ring_irq,
-	.toggle_queue_status = hns_ae_toggle_queue_status,
 	.get_status = hns_ae_get_link_status,
 	.get_info = hns_ae_get_mac_info,
 	.adjust_link = hns_ae_adjust_link,
@@ -794,6 +819,7 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 	.get_rx_max_coalesced_frames = hns_ae_get_rx_max_coalesced_frames,
 	.set_coalesce_usecs = hns_ae_set_coalesce_usecs,
 	.set_coalesce_frames = hns_ae_set_coalesce_frames,
+	.get_coalesce_range = hns_ae_get_coalesce_range,
 	.set_promisc_mode = hns_ae_set_promisc_mode,
 	.set_mac_addr = hns_ae_set_mac_address,
 	.set_mc_addr = hns_ae_set_multicast_one,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 44abb08de155..1235c7f2564b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -110,7 +110,7 @@ static void hns_gmac_free(void *mac_drv)
 
 	u32 mac_id = drv->mac_id;
 
-	hns_dsaf_ge_srst_by_port(dsaf_dev, mac_id, 0);
+	dsaf_dev->misc_op->ge_srst(dsaf_dev, mac_id, 0);
 }
 
 static void hns_gmac_set_tx_auto_pause_frames(void *mac_drv, u16 newval)
@@ -317,9 +317,9 @@ static void hns_gmac_init(void *mac_drv)
 
 	port = drv->mac_id;
 
-	hns_dsaf_ge_srst_by_port(dsaf_dev, port, 0);
+	dsaf_dev->misc_op->ge_srst(dsaf_dev, port, 0);
 	mdelay(10);
-	hns_dsaf_ge_srst_by_port(dsaf_dev, port, 1);
+	dsaf_dev->misc_op->ge_srst(dsaf_dev, port, 1);
 	mdelay(10);
 	hns_gmac_disable(mac_drv, MAC_COMM_MODE_RX_AND_TX);
 	hns_gmac_tx_loop_pkt_dis(mac_drv);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 611581fccf2a..3fb87e233c49 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -7,6 +7,7 @@
  * (at your option) any later version.
  */
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
@@ -15,7 +16,8 @@
 #include <linux/netdevice.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/phy_fixed.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
 #include <linux/platform_device.h>
 
 #include "hns_dsaf_main.h"
@@ -54,20 +56,6 @@ static const enum mac_mode g_mac_mode_1000[] = {
 	[PHY_INTERFACE_MODE_RTBI]   = MAC_MODE_RTBI_1000
 };
 
-static enum mac_mode hns_mac_dev_to_enet_if(const struct hns_mac_cb *mac_cb)
-{
-	switch (mac_cb->max_speed) {
-	case MAC_SPEED_100:
-		return g_mac_mode_100[mac_cb->phy_if];
-	case MAC_SPEED_1000:
-		return g_mac_mode_1000[mac_cb->phy_if];
-	case MAC_SPEED_10000:
-		return MAC_MODE_XGMII_10000;
-	default:
-		return MAC_MODE_MII_100;
-	}
-}
-
 static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb)
 {
 	switch (mac_cb->max_speed) {
@@ -94,7 +82,7 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status)
 	else
 		*link_status = 0;
 
-	ret = hns_mac_get_sfp_prsnt(mac_cb, &sfp_prsnt);
+	ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, &sfp_prsnt);
 	if (!ret)
 		*link_status = *link_status && sfp_prsnt;
 
@@ -132,7 +120,6 @@ void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex)
 
 	mac_cb->speed = speed;
 	mac_cb->half_duplex = !duplex;
-	mac_ctrl_drv->mac_mode = hns_mac_dev_to_enet_if(mac_cb);
 
 	if (mac_ctrl_drv->adjust_link) {
 		ret = mac_ctrl_drv->adjust_link(mac_ctrl_drv,
@@ -511,7 +498,7 @@ void hns_mac_stop(struct hns_mac_cb *mac_cb)
 
 	mac_ctrl_drv->mac_en_flg = 0;
 	mac_cb->link = 0;
-	cpld_led_reset(mac_cb);
+	mac_cb->dsaf_dev->misc_op->cpld_reset_led(mac_cb);
 }
 
 /**
@@ -637,6 +624,127 @@ free_mac_drv:
 	return ret;
 }
 
+static int
+hns_mac_phy_parse_addr(struct device *dev, struct fwnode_handle *fwnode)
+{
+	u32 addr;
+	int ret;
+
+	ret = fwnode_property_read_u32(fwnode, "phy-addr", &addr);
+	if (ret) {
+		dev_err(dev, "has invalid PHY address ret:%d\n", ret);
+		return ret;
+	}
+
+	if (addr >= PHY_MAX_ADDR) {
+		dev_err(dev, "PHY address %i is too large\n", addr);
+		return -EINVAL;
+	}
+
+	return addr;
+}
+
+static int hns_mac_phydev_match(struct device *dev, void *fwnode)
+{
+	return dev->fwnode == fwnode;
+}
+
+static struct
+platform_device *hns_mac_find_platform_device(struct fwnode_handle *fwnode)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&platform_bus_type, NULL,
+			      fwnode, hns_mac_phydev_match);
+	return dev ? to_platform_device(dev) : NULL;
+}
+
+static int
+hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb,
+			u32 addr)
+{
+	struct phy_device *phy;
+	const char *phy_type;
+	bool is_c45;
+	int rc;
+
+	rc = fwnode_property_read_string(mac_cb->fw_port,
+					 "phy-mode", &phy_type);
+	if (rc < 0)
+		return rc;
+
+	if (!strcmp(phy_type, phy_modes(PHY_INTERFACE_MODE_XGMII)))
+		is_c45 = 1;
+	else if (!strcmp(phy_type, phy_modes(PHY_INTERFACE_MODE_SGMII)))
+		is_c45 = 0;
+	else
+		return -ENODATA;
+
+	phy = get_phy_device(mdio, addr, is_c45);
+	if (!phy || IS_ERR(phy))
+		return -EIO;
+
+	if (mdio->irq)
+		phy->irq = mdio->irq[addr];
+
+	/* All data is now stored in the phy struct;
+	 * register it
+	 */
+	rc = phy_device_register(phy);
+	if (rc) {
+		phy_device_free(phy);
+		return -ENODEV;
+	}
+
+	mac_cb->phy_dev = phy;
+
+	dev_dbg(&mdio->dev, "registered phy at address %i\n", addr);
+
+	return 0;
+}
+
+static void hns_mac_register_phy(struct hns_mac_cb *mac_cb)
+{
+	struct acpi_reference_args args;
+	struct platform_device *pdev;
+	struct mii_bus *mii_bus;
+	int rc;
+	int addr;
+
+	/* Loop over the child nodes and register a phy_device for each one */
+	if (!to_acpi_device_node(mac_cb->fw_port))
+		return;
+
+	rc = acpi_node_get_property_reference(
+			mac_cb->fw_port, "mdio-node", 0, &args);
+	if (rc)
+		return;
+
+	addr = hns_mac_phy_parse_addr(mac_cb->dev, mac_cb->fw_port);
+	if (addr < 0)
+		return;
+
+	/* dev address in adev */
+	pdev = hns_mac_find_platform_device(acpi_fwnode_handle(args.adev));
+	mii_bus = platform_get_drvdata(pdev);
+	rc = hns_mac_register_phydev(mii_bus, mac_cb, addr);
+	if (!rc)
+		dev_dbg(mac_cb->dev, "mac%d register phy addr:%d\n",
+			mac_cb->mac_id, addr);
+}
+
+#define MAC_MEDIA_TYPE_MAX_LEN		16
+
+static const struct {
+	enum hnae_media_type value;
+	const char *name;
+} media_type_defs[] = {
+	{HNAE_MEDIA_TYPE_UNKNOWN,	"unknown" },
+	{HNAE_MEDIA_TYPE_FIBER,		"fiber" },
+	{HNAE_MEDIA_TYPE_COPPER,	"copper" },
+	{HNAE_MEDIA_TYPE_BACKPLANE,	"backplane" },
+};
+
 /**
  *hns_mac_get_info  - get mac information from device node
  *@mac_cb: mac device
@@ -645,13 +753,16 @@ free_mac_drv:
  */
 static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 {
-	struct device_node *np = mac_cb->dev->of_node;
+	struct device_node *np;
 	struct regmap *syscon;
 	struct of_phandle_args cpld_args;
+	const char *media_type;
+	u32 i;
 	u32 ret;
 
 	mac_cb->link = false;
 	mac_cb->half_duplex = false;
+	mac_cb->media_type = HNAE_MEDIA_TYPE_UNKNOWN;
 	mac_cb->speed = mac_phy_to_speed[mac_cb->phy_if];
 	mac_cb->max_speed = mac_cb->speed;
 
@@ -672,62 +783,95 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 	 * from dsaf node
 	 */
 	if (!mac_cb->fw_port) {
-		mac_cb->phy_node = of_parse_phandle(np, "phy-handle",
-						    mac_cb->mac_id);
-		if (mac_cb->phy_node)
+		np = of_parse_phandle(mac_cb->dev->of_node, "phy-handle",
+				      mac_cb->mac_id);
+		mac_cb->phy_dev = of_phy_find_device(np);
+		if (mac_cb->phy_dev) {
+			/* refcount is held by of_phy_find_device()
+			 * if the phy_dev is found
+			 */
+			put_device(&mac_cb->phy_dev->mdio.dev);
+
 			dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n",
-				mac_cb->mac_id, mac_cb->phy_node->name);
-		return 0;
-	}
-	if (!is_of_node(mac_cb->fw_port))
-		return -EINVAL;
-	/* parse property from port subnode in dsaf */
-	mac_cb->phy_node = of_parse_phandle(to_of_node(mac_cb->fw_port),
-					    "phy-handle", 0);
-	if (mac_cb->phy_node)
-		dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n",
-			mac_cb->mac_id, mac_cb->phy_node->name);
-	syscon = syscon_node_to_regmap(
-			of_parse_phandle(to_of_node(mac_cb->fw_port),
-					 "serdes-syscon", 0));
-	if (IS_ERR_OR_NULL(syscon)) {
-		dev_err(mac_cb->dev, "serdes-syscon is needed!\n");
-		return -EINVAL;
-	}
-	mac_cb->serdes_ctrl = syscon;
+				mac_cb->mac_id, np->name);
+		}
 
-	ret = fwnode_property_read_u32(mac_cb->fw_port,
-				       "port-rst-offset",
-				       &mac_cb->port_rst_off);
-	if (ret) {
-		dev_dbg(mac_cb->dev,
-			"mac%d port-rst-offset not found, use default value.\n",
-			mac_cb->mac_id);
+		return 0;
 	}
 
-	ret = fwnode_property_read_u32(mac_cb->fw_port,
-				       "port-mode-offset",
-				       &mac_cb->port_mode_off);
-	if (ret) {
-		dev_dbg(mac_cb->dev,
-			"mac%d port-mode-offset not found, use default value.\n",
-			mac_cb->mac_id);
-	}
+	if (is_of_node(mac_cb->fw_port)) {
+		/* parse property from port subnode in dsaf */
+		np = of_parse_phandle(to_of_node(mac_cb->fw_port),
+				      "phy-handle", 0);
+		mac_cb->phy_dev = of_phy_find_device(np);
+		if (mac_cb->phy_dev) {
+			/* refcount is held by of_phy_find_device()
+			 * if the phy_dev is found
+			 */
+			put_device(&mac_cb->phy_dev->mdio.dev);
+			dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n",
+				mac_cb->mac_id, np->name);
+		}
 
-	ret = of_parse_phandle_with_fixed_args(to_of_node(mac_cb->fw_port),
-					       "cpld-syscon", 1, 0, &cpld_args);
-	if (ret) {
-		dev_dbg(mac_cb->dev, "mac%d no cpld-syscon found.\n",
-			mac_cb->mac_id);
-		mac_cb->cpld_ctrl = NULL;
-	} else {
-		syscon = syscon_node_to_regmap(cpld_args.np);
+		syscon = syscon_node_to_regmap(
+				of_parse_phandle(to_of_node(mac_cb->fw_port),
+						 "serdes-syscon", 0));
 		if (IS_ERR_OR_NULL(syscon)) {
-			dev_dbg(mac_cb->dev, "no cpld-syscon found!\n");
+			dev_err(mac_cb->dev, "serdes-syscon is needed!\n");
+			return -EINVAL;
+		}
+		mac_cb->serdes_ctrl = syscon;
+
+		ret = fwnode_property_read_u32(mac_cb->fw_port,
+					       "port-rst-offset",
+					       &mac_cb->port_rst_off);
+		if (ret) {
+			dev_dbg(mac_cb->dev,
+				"mac%d port-rst-offset not found, use default value.\n",
+				mac_cb->mac_id);
+		}
+
+		ret = fwnode_property_read_u32(mac_cb->fw_port,
+					       "port-mode-offset",
+					       &mac_cb->port_mode_off);
+		if (ret) {
+			dev_dbg(mac_cb->dev,
+				"mac%d port-mode-offset not found, use default value.\n",
+				mac_cb->mac_id);
+		}
+
+		ret = of_parse_phandle_with_fixed_args(
+			to_of_node(mac_cb->fw_port), "cpld-syscon", 1, 0,
+			&cpld_args);
+		if (ret) {
+			dev_dbg(mac_cb->dev, "mac%d no cpld-syscon found.\n",
+				mac_cb->mac_id);
 			mac_cb->cpld_ctrl = NULL;
 		} else {
-			mac_cb->cpld_ctrl = syscon;
-			mac_cb->cpld_ctrl_reg = cpld_args.args[0];
+			syscon = syscon_node_to_regmap(cpld_args.np);
+			if (IS_ERR_OR_NULL(syscon)) {
+				dev_dbg(mac_cb->dev, "no cpld-syscon found!\n");
+				mac_cb->cpld_ctrl = NULL;
+			} else {
+				mac_cb->cpld_ctrl = syscon;
+				mac_cb->cpld_ctrl_reg = cpld_args.args[0];
+			}
+		}
+	} else if (is_acpi_node(mac_cb->fw_port)) {
+		hns_mac_register_phy(mac_cb);
+	} else {
+		dev_err(mac_cb->dev, "mac%d cannot find phy node\n",
+			mac_cb->mac_id);
+	}
+
+	if (!fwnode_property_read_string(mac_cb->fw_port, "media-type",
+					 &media_type)) {
+		for (i = 0; i < ARRAY_SIZE(media_type_defs); i++) {
+			if (!strncmp(media_type_defs[i].name, media_type,
+				     MAC_MEDIA_TYPE_MAX_LEN)) {
+				mac_cb->media_type = media_type_defs[i].value;
+				break;
+			}
 		}
 	}
 
@@ -790,7 +934,7 @@ int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, struct hns_mac_cb *mac_cb)
 	else
 		mac_cb->mac_type = HNAE_PORT_DEBUG;
 
-	mac_cb->phy_if = hns_mac_get_phy_if(mac_cb);
+	mac_cb->phy_if = dsaf_dev->misc_op->get_phy_if(mac_cb);
 
 	ret = hns_mac_get_mode(mac_cb->phy_if);
 	if (ret < 0) {
@@ -805,7 +949,7 @@ int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, struct hns_mac_cb *mac_cb)
 	if (ret)
 		return ret;
 
-	cpld_led_reset(mac_cb);
+	mac_cb->dsaf_dev->misc_op->cpld_reset_led(mac_cb);
 	mac_cb->vaddr = hns_mac_get_vaddr(dsaf_dev, mac_cb, mac_mode_idx);
 
 	return 0;
@@ -892,7 +1036,7 @@ void hns_mac_uninit(struct dsaf_device *dsaf_dev)
 	int max_port_num = hns_mac_get_max_port_num(dsaf_dev);
 
 	for (i = 0; i < max_port_num; i++) {
-		cpld_led_reset(dsaf_dev->mac_cb[i]);
+		dsaf_dev->misc_op->cpld_reset_led(dsaf_dev->mac_cb[i]);
 		dsaf_dev->mac_cb[i] = NULL;
 	}
 }
@@ -975,7 +1119,7 @@ void hns_set_led_opt(struct hns_mac_cb *mac_cb)
 		nic_data = 0;
 	mac_cb->txpkt_for_led = mac_cb->hw_stats.tx_good_pkts;
 	mac_cb->rxpkt_for_led = mac_cb->hw_stats.rx_good_pkts;
-	hns_cpld_set_led(mac_cb, (int)mac_cb->link,
+	mac_cb->dsaf_dev->misc_op->cpld_set_led(mac_cb, (int)mac_cb->link,
 			 mac_cb->speed, nic_data);
 }
 
@@ -985,5 +1129,5 @@ int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb,
 	if (!mac_cb || !mac_cb->cpld_ctrl)
 		return 0;
 
-	return cpld_set_led_id(mac_cb, status);
+	return mac_cb->dsaf_dev->misc_op->cpld_set_led_id(mac_cb, status);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 97ce9a750aaf..4cbdf14f5c16 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -335,10 +335,11 @@ struct hns_mac_cb {
 	u64 txpkt_for_led;
 	u64 rxpkt_for_led;
 	enum hnae_port_type mac_type;
+	enum hnae_media_type media_type;
 	phy_interface_t phy_if;
 	enum hnae_loop loop_mode;
 
-	struct device_node *phy_node;
+	struct phy_device *phy_dev;
 
 	struct mac_hw_stats hw_stats;
 };
@@ -448,8 +449,6 @@ int hns_mac_set_pauseparam(struct hns_mac_cb *mac_cb, u32 rx_en, u32 tx_en);
 int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu);
 int hns_mac_get_port_info(struct hns_mac_cb *mac_cb,
 			  u8 *auto_neg, u16 *speed, u8 *duplex);
-phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb *mac_cb);
-int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, u8 en);
 int hns_mac_config_mac_loopback(struct hns_mac_cb *mac_cb,
 				enum hnae_loop loop, int en);
 void hns_mac_update_stats(struct hns_mac_cb *mac_cb);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 1c2ddb25e776..2ef4277d00b3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -7,6 +7,7 @@
  * (at your option) any later version.
  */
 
+#include <linux/acpi.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -24,6 +25,7 @@
 #include "hns_dsaf_main.h"
 #include "hns_dsaf_ppe.h"
 #include "hns_dsaf_rcb.h"
+#include "hns_dsaf_misc.h"
 
 const char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
 	[DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf",
@@ -32,6 +34,13 @@ const char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
 	[DSAF_MODE_DISABLE_SP] = "single-port",
 };
 
+static const struct acpi_device_id hns_dsaf_acpi_match[] = {
+	{ "HISI00B1", 0 },
+	{ "HISI00B2", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, hns_dsaf_acpi_match);
+
 int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 {
 	int ret, i;
@@ -45,12 +54,24 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 	struct device_node *np = dsaf_dev->dev->of_node;
 	struct platform_device *pdev = to_platform_device(dsaf_dev->dev);
 
-	if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1"))
-		dsaf_dev->dsaf_ver = AE_VERSION_1;
-	else
-		dsaf_dev->dsaf_ver = AE_VERSION_2;
+	if (dev_of_node(dsaf_dev->dev)) {
+		if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1"))
+			dsaf_dev->dsaf_ver = AE_VERSION_1;
+		else
+			dsaf_dev->dsaf_ver = AE_VERSION_2;
+	} else if (is_acpi_node(dsaf_dev->dev->fwnode)) {
+		if (acpi_dev_found(hns_dsaf_acpi_match[0].id))
+			dsaf_dev->dsaf_ver = AE_VERSION_1;
+		else if (acpi_dev_found(hns_dsaf_acpi_match[1].id))
+			dsaf_dev->dsaf_ver = AE_VERSION_2;
+		else
+			return -ENXIO;
+	} else {
+		dev_err(dsaf_dev->dev, "cannot get cfg data from of or acpi\n");
+		return -ENXIO;
+	}
 
-	ret = of_property_read_string(np, "mode", &mode_str);
+	ret = device_property_read_string(dsaf_dev->dev, "mode", &mode_str);
 	if (ret) {
 		dev_err(dsaf_dev->dev, "get dsaf mode fail, ret=%d!\n", ret);
 		return ret;
@@ -80,32 +101,40 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 	else
 		dsaf_dev->dsaf_tc_mode = HRD_DSAF_4TC_MODE;
 
-	syscon = syscon_node_to_regmap(
-			of_parse_phandle(np, "subctrl-syscon", 0));
-	if (IS_ERR_OR_NULL(syscon)) {
-		res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++);
-		if (!res) {
-			dev_err(dsaf_dev->dev, "subctrl info is needed!\n");
-			return -ENOMEM;
-		}
-		dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev, res);
-		if (!dsaf_dev->sc_base) {
-			dev_err(dsaf_dev->dev, "subctrl can not map!\n");
-			return -ENOMEM;
-		}
+	if (dev_of_node(dsaf_dev->dev)) {
+		syscon = syscon_node_to_regmap(
+				of_parse_phandle(np, "subctrl-syscon", 0));
+		if (IS_ERR_OR_NULL(syscon)) {
+			res = platform_get_resource(pdev, IORESOURCE_MEM,
+						    res_idx++);
+			if (!res) {
+				dev_err(dsaf_dev->dev, "subctrl info is needed!\n");
+				return -ENOMEM;
+			}
 
-		res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++);
-		if (!res) {
-			dev_err(dsaf_dev->dev, "serdes-ctrl info is needed!\n");
-			return -ENOMEM;
-		}
-		dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev, res);
-		if (!dsaf_dev->sds_base) {
-			dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n");
-			return -ENOMEM;
+			dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev,
+								  res);
+			if (IS_ERR(dsaf_dev->sc_base)) {
+				dev_err(dsaf_dev->dev, "subctrl can not map!\n");
+				return PTR_ERR(dsaf_dev->sc_base);
+			}
+
+			res = platform_get_resource(pdev, IORESOURCE_MEM,
+						    res_idx++);
+			if (!res) {
+				dev_err(dsaf_dev->dev, "serdes-ctrl info is needed!\n");
+				return -ENOMEM;
+			}
+
+			dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev,
+								   res);
+			if (IS_ERR(dsaf_dev->sds_base)) {
+				dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n");
+				return PTR_ERR(dsaf_dev->sds_base);
+			}
+		} else {
+			dsaf_dev->sub_ctrl = syscon;
 		}
-	} else {
-		dsaf_dev->sub_ctrl = syscon;
 	}
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ppe-base");
@@ -117,9 +146,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 		}
 	}
 	dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res);
-	if (!dsaf_dev->ppe_base) {
+	if (IS_ERR(dsaf_dev->ppe_base)) {
 		dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n");
-		return -ENOMEM;
+		return PTR_ERR(dsaf_dev->ppe_base);
 	}
 	dsaf_dev->ppe_paddr = res->start;
 
@@ -136,33 +165,34 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 			}
 		}
 		dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res);
-		if (!dsaf_dev->io_base) {
+		if (IS_ERR(dsaf_dev->io_base)) {
 			dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n");
-			return -ENOMEM;
+			return PTR_ERR(dsaf_dev->io_base);
 		}
 	}
 
-	ret = of_property_read_u32(np, "desc-num", &desc_num);
+	ret = device_property_read_u32(dsaf_dev->dev, "desc-num", &desc_num);
 	if (ret < 0 || desc_num < HNS_DSAF_MIN_DESC_CNT ||
 	    desc_num > HNS_DSAF_MAX_DESC_CNT) {
 		dev_err(dsaf_dev->dev, "get desc-num(%d) fail, ret=%d!\n",
 			desc_num, ret);
-		goto unmap_base_addr;
+		return -EINVAL;
 	}
 	dsaf_dev->desc_num = desc_num;
 
-	ret = of_property_read_u32(np, "reset-field-offset", &reset_offset);
+	ret = device_property_read_u32(dsaf_dev->dev, "reset-field-offset",
+				       &reset_offset);
 	if (ret < 0) {
 		dev_dbg(dsaf_dev->dev,
 			"get reset-field-offset fail, ret=%d!\r\n", ret);
 	}
 	dsaf_dev->reset_offset = reset_offset;
 
-	ret = of_property_read_u32(np, "buf-size", &buf_size);
+	ret = device_property_read_u32(dsaf_dev->dev, "buf-size", &buf_size);
 	if (ret < 0) {
 		dev_err(dsaf_dev->dev,
 			"get buf-size fail, ret=%d!\r\n", ret);
-		goto unmap_base_addr;
+		return ret;
 	}
 	dsaf_dev->buf_size = buf_size;
 
@@ -170,41 +200,19 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 	if (dsaf_dev->buf_size_type < 0) {
 		dev_err(dsaf_dev->dev,
 			"buf_size(%d) is wrong!\n", buf_size);
-		goto unmap_base_addr;
+		return -EINVAL;
 	}
 
+	dsaf_dev->misc_op = hns_misc_op_get(dsaf_dev);
+	if (!dsaf_dev->misc_op)
+		return -ENOMEM;
+
 	if (!dma_set_mask_and_coherent(dsaf_dev->dev, DMA_BIT_MASK(64ULL)))
 		dev_dbg(dsaf_dev->dev, "set mask to 64bit\n");
 	else
 		dev_err(dsaf_dev->dev, "set mask to 64bit fail!\n");
 
 	return 0;
-
-unmap_base_addr:
-	if (dsaf_dev->io_base)
-		iounmap(dsaf_dev->io_base);
-	if (dsaf_dev->ppe_base)
-		iounmap(dsaf_dev->ppe_base);
-	if (dsaf_dev->sds_base)
-		iounmap(dsaf_dev->sds_base);
-	if (dsaf_dev->sc_base)
-		iounmap(dsaf_dev->sc_base);
-	return ret;
-}
-
-static void hns_dsaf_free_cfg(struct dsaf_device *dsaf_dev)
-{
-	if (dsaf_dev->io_base)
-		iounmap(dsaf_dev->io_base);
-
-	if (dsaf_dev->ppe_base)
-		iounmap(dsaf_dev->ppe_base);
-
-	if (dsaf_dev->sds_base)
-		iounmap(dsaf_dev->sds_base);
-
-	if (dsaf_dev->sc_base)
-		iounmap(dsaf_dev->sc_base);
 }
 
 /**
@@ -508,10 +516,10 @@ static void hns_dsafv2_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev)
 		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
 		dsaf_set_field(o_sbm_bp_cfg,
 			       DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M,
-			       DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 110);
+			       DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 48);
 		dsaf_set_field(o_sbm_bp_cfg,
 			       DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M,
-			       DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 160);
+			       DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 80);
 		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 
 		/* for no enable pfc mode */
@@ -519,29 +527,39 @@ static void hns_dsafv2_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev)
 		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
 		dsaf_set_field(o_sbm_bp_cfg,
 			       DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M,
-			       DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 128);
+			       DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 192);
 		dsaf_set_field(o_sbm_bp_cfg,
 			       DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M,
-			       DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 192);
+			       DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 240);
 		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 	}
 
 	/* PPE */
-	reg = DSAF_SBM_BP_CFG_2_PPE_REG_0_REG + 0x80 * i;
-	o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
-	dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M,
-		       DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 10);
-	dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M,
-		       DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 12);
-	dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+	for (i = 0; i < DSAFV2_SBM_PPE_CHN; i++) {
+		reg = DSAF_SBM_BP_CFG_2_PPE_REG_0_REG + 0x80 * i;
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG2_PPE_SET_BUF_NUM_M,
+			       DSAFV2_SBM_CFG2_PPE_SET_BUF_NUM_S, 2);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG2_PPE_RESET_BUF_NUM_M,
+			       DSAFV2_SBM_CFG2_PPE_RESET_BUF_NUM_S, 3);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG2_PPE_CFG_USEFUL_NUM_M,
+			       DSAFV2_SBM_CFG2_PPE_CFG_USEFUL_NUM_S, 52);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+	}
+
 	/* RoCEE */
 	for (i = 0; i < DASFV2_ROCEE_CRD_NUM; i++) {
 		reg = DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG + 0x80 * i;
 		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
-		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M,
-			       DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 2);
-		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M,
-			       DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 4);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_M,
+			       DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_S, 2);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG2_ROCEE_RESET_BUF_NUM_M,
+			       DSAFV2_SBM_CFG2_ROCEE_RESET_BUF_NUM_S, 4);
 		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 	}
 }
@@ -852,6 +870,8 @@ static void hns_dsaf_single_line_tbl_cfg(
 	struct dsaf_device *dsaf_dev,
 	u32 address, struct dsaf_tbl_line_cfg *ptbl_line)
 {
+	spin_lock_bh(&dsaf_dev->tcam_lock);
+
 	/*Write Addr*/
 	hns_dsaf_tbl_line_addr_cfg(dsaf_dev, address);
 
@@ -860,6 +880,8 @@ static void hns_dsaf_single_line_tbl_cfg(
 
 	/*Write Plus*/
 	hns_dsaf_tbl_line_pul(dsaf_dev);
+
+	spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
 /**
@@ -873,6 +895,8 @@ static void hns_dsaf_tcam_uc_cfg(
 	struct dsaf_tbl_tcam_data *ptbl_tcam_data,
 	struct dsaf_tbl_tcam_ucast_cfg *ptbl_tcam_ucast)
 {
+	spin_lock_bh(&dsaf_dev->tcam_lock);
+
 	/*Write Addr*/
 	hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
 	/*Write Tcam Data*/
@@ -881,6 +905,8 @@ static void hns_dsaf_tcam_uc_cfg(
 	hns_dsaf_tbl_tcam_ucast_cfg(dsaf_dev, ptbl_tcam_ucast);
 	/*Write Plus*/
 	hns_dsaf_tbl_tcam_data_ucast_pul(dsaf_dev);
+
+	spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
 /**
@@ -895,6 +921,8 @@ static void hns_dsaf_tcam_mc_cfg(
 	struct dsaf_tbl_tcam_data *ptbl_tcam_data,
 	struct dsaf_tbl_tcam_mcast_cfg *ptbl_tcam_mcast)
 {
+	spin_lock_bh(&dsaf_dev->tcam_lock);
+
 	/*Write Addr*/
 	hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
 	/*Write Tcam Data*/
@@ -903,6 +931,8 @@ static void hns_dsaf_tcam_mc_cfg(
 	hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, ptbl_tcam_mcast);
 	/*Write Plus*/
 	hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev);
+
+	spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
 /**
@@ -912,6 +942,8 @@ static void hns_dsaf_tcam_mc_cfg(
  */
 static void hns_dsaf_tcam_mc_invld(struct dsaf_device *dsaf_dev, u32 address)
 {
+	spin_lock_bh(&dsaf_dev->tcam_lock);
+
 	/*Write Addr*/
 	hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
 
@@ -924,6 +956,8 @@ static void hns_dsaf_tcam_mc_invld(struct dsaf_device *dsaf_dev, u32 address)
 
 	/*Write Plus*/
 	hns_dsaf_tbl_tcam_mcast_pul(dsaf_dev);
+
+	spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
 /**
@@ -941,6 +975,8 @@ static void hns_dsaf_tcam_uc_get(
 	u32 tcam_read_data0;
 	u32 tcam_read_data4;
 
+	spin_lock_bh(&dsaf_dev->tcam_lock);
+
 	/*Write Addr*/
 	hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
 
@@ -949,9 +985,9 @@ static void hns_dsaf_tcam_uc_get(
 
 	/*read tcam data*/
 	ptbl_tcam_data->tbl_tcam_data_high
-		= dsaf_read_dev(dsaf_dev, DSAF_TBL_TCAM_RDATA_LOW_0_REG);
-	ptbl_tcam_data->tbl_tcam_data_low
 		= dsaf_read_dev(dsaf_dev, DSAF_TBL_TCAM_RDATA_HIGH_0_REG);
+	ptbl_tcam_data->tbl_tcam_data_low
+		= dsaf_read_dev(dsaf_dev, DSAF_TBL_TCAM_RDATA_LOW_0_REG);
 
 	/*read tcam mcast*/
 	tcam_read_data0 = dsaf_read_dev(dsaf_dev,
@@ -973,6 +1009,8 @@ static void hns_dsaf_tcam_uc_get(
 				 DSAF_TBL_UCAST_CFG1_OUT_PORT_S);
 	ptbl_tcam_ucast->tbl_ucast_dvc
 		= dsaf_get_bit(tcam_read_data0, DSAF_TBL_UCAST_CFG1_DVC_S);
+
+	spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
 /**
@@ -989,6 +1027,8 @@ static void hns_dsaf_tcam_mc_get(
 {
 	u32 data_tmp;
 
+	spin_lock_bh(&dsaf_dev->tcam_lock);
+
 	/*Write Addr*/
 	hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
 
@@ -997,9 +1037,9 @@ static void hns_dsaf_tcam_mc_get(
 
 	/*read tcam data*/
 	ptbl_tcam_data->tbl_tcam_data_high =
-		dsaf_read_dev(dsaf_dev, DSAF_TBL_TCAM_RDATA_LOW_0_REG);
-	ptbl_tcam_data->tbl_tcam_data_low =
 		dsaf_read_dev(dsaf_dev, DSAF_TBL_TCAM_RDATA_HIGH_0_REG);
+	ptbl_tcam_data->tbl_tcam_data_low =
+		dsaf_read_dev(dsaf_dev, DSAF_TBL_TCAM_RDATA_LOW_0_REG);
 
 	/*read tcam mcast*/
 	ptbl_tcam_mcast->tbl_mcast_port_msk[0] =
@@ -1019,6 +1059,8 @@ static void hns_dsaf_tcam_mc_get(
 	ptbl_tcam_mcast->tbl_mcast_port_msk[4] =
 		dsaf_get_field(data_tmp, DSAF_TBL_MCAST_CFG4_VM128_112_M,
 			       DSAF_TBL_MCAST_CFG4_VM128_112_S);
+
+	spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
 /**
@@ -1080,10 +1122,10 @@ int hns_dsaf_set_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id,
 				 u32 en)
 {
 	if (AE_IS_VER1(dsaf_dev->dsaf_ver)) {
-		if (!en)
+		if (!en) {
 			dev_err(dsaf_dev->dev, "dsafv1 can't close rx_pause!\n");
-
-		return -EINVAL;
+			return -EINVAL;
+		}
 	}
 
 	dsaf_set_dev_bit(dsaf_dev, DSAF_PAUSE_CFG_REG + mac_id * 4,
@@ -1295,9 +1337,9 @@ static int hns_dsaf_init_hw(struct dsaf_device *dsaf_dev)
 	dev_dbg(dsaf_dev->dev,
 		"hns_dsaf_init_hw begin %s !\n", dsaf_dev->ae_dev.name);
 
-	hns_dsaf_rst(dsaf_dev, 0);
+	dsaf_dev->misc_op->dsaf_reset(dsaf_dev, 0);
 	mdelay(10);
-	hns_dsaf_rst(dsaf_dev, 1);
+	dsaf_dev->misc_op->dsaf_reset(dsaf_dev, 1);
 
 	hns_dsaf_comm_init(dsaf_dev);
 
@@ -1325,7 +1367,7 @@ static int hns_dsaf_init_hw(struct dsaf_device *dsaf_dev)
 static void hns_dsaf_remove_hw(struct dsaf_device *dsaf_dev)
 {
 	/*reset*/
-	hns_dsaf_rst(dsaf_dev, 0);
+	dsaf_dev->misc_op->dsaf_reset(dsaf_dev, 0);
 }
 
 /**
@@ -1343,6 +1385,7 @@ static int hns_dsaf_init(struct dsaf_device *dsaf_dev)
 	if (HNS_DSAF_IS_DEBUG(dsaf_dev))
 		return 0;
 
+	spin_lock_init(&dsaf_dev->tcam_lock);
 	ret = hns_dsaf_init_hw(dsaf_dev);
 	if (ret)
 		return ret;
@@ -2088,11 +2131,24 @@ void hns_dsaf_fix_mac_mode(struct hns_mac_cb *mac_cb)
 	hns_dsaf_port_work_rate_cfg(dsaf_dev, mac_id, mode);
 }
 
+static u32 hns_dsaf_get_inode_prio_reg(int index)
+{
+	int base_index, offset;
+	u32 base_addr = DSAF_INODE_IN_PRIO_PAUSE_BASE_REG;
+
+	base_index = (index + 1) / DSAF_REG_PER_ZONE;
+	offset = (index + 1) % DSAF_REG_PER_ZONE;
+
+	return base_addr + DSAF_INODE_IN_PRIO_PAUSE_BASE_OFFSET * base_index +
+		DSAF_INODE_IN_PRIO_PAUSE_OFFSET * offset;
+}
+
 void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 node_num)
 {
 	struct dsaf_hw_stats *hw_stats
 		= &dsaf_dev->hw_stats[node_num];
 	bool is_ver1 = AE_IS_VER1(dsaf_dev->dsaf_ver);
+	int i;
 	u32 reg_tmp;
 
 	hw_stats->pad_drop += dsaf_read_dev(dsaf_dev,
@@ -2127,6 +2183,18 @@ void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 node_num)
 	hw_stats->stp_drop += dsaf_read_dev(dsaf_dev,
 		DSAF_INODE_IN_DATA_STP_DISC_0_REG + 0x80 * (u64)node_num);
 
+	/* pfc pause frame statistics stored in dsaf inode*/
+	if ((node_num < DSAF_SERVICE_NW_NUM) && !is_ver1) {
+		for (i = 0; i < DSAF_PRIO_NR; i++) {
+			reg_tmp = hns_dsaf_get_inode_prio_reg(i);
+			hw_stats->rx_pfc[i] += dsaf_read_dev(dsaf_dev,
+				reg_tmp + 0x4 * (u64)node_num);
+			hw_stats->tx_pfc[i] += dsaf_read_dev(dsaf_dev,
+				DSAF_XOD_XGE_PFC_PRIO_CNT_BASE_REG +
+				DSAF_XOD_XGE_PFC_PRIO_CNT_OFFSET * i +
+				0xF0 * (u64)node_num);
+		}
+	}
 	hw_stats->tx_pkts += dsaf_read_dev(dsaf_dev,
 		DSAF_XOD_RCVPKT_CNT_0_REG + 0x90 * (u64)node_num);
 }
@@ -2464,38 +2532,53 @@ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data)
 		p[i] = 0xdddddddd;
 }
 
-static char *hns_dsaf_get_node_stats_strings(char *data, int node)
+static char *hns_dsaf_get_node_stats_strings(char *data, int node,
+					     struct dsaf_device *dsaf_dev)
 {
 	char *buff = data;
+	int i;
+	bool is_ver1 = AE_IS_VER1(dsaf_dev->dsaf_ver);
 
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_pad_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_manage_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pkt_id", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pause_frame", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_release_buf_num", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_sbm_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_crc_false_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_bp_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_lookup_rslt_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_local_rslt_fail_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_vlan_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_stp_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
+	if (node < DSAF_SERVICE_NW_NUM && !is_ver1) {
+		for (i = 0; i < DSAF_PRIO_NR; i++) {
+			snprintf(buff + 0 * ETH_GSTRING_LEN * DSAF_PRIO_NR,
+				 ETH_GSTRING_LEN, "inod%d_pfc_prio%d_pkts",
+				 node, i);
+			snprintf(buff + 1 * ETH_GSTRING_LEN * DSAF_PRIO_NR,
+				 ETH_GSTRING_LEN, "onod%d_pfc_prio%d_pkts",
+				 node, i);
+			buff += ETH_GSTRING_LEN;
+		}
+		buff += 1 * DSAF_PRIO_NR * ETH_GSTRING_LEN;
+	}
 	snprintf(buff, ETH_GSTRING_LEN, "onnod%d_tx_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 
 	return buff;
 }
@@ -2504,7 +2587,9 @@ static u64 *hns_dsaf_get_node_stats(struct dsaf_device *ddev, u64 *data,
 				    int node_num)
 {
 	u64 *p = data;
+	int i;
 	struct dsaf_hw_stats *hw_stats = &ddev->hw_stats[node_num];
+	bool is_ver1 = AE_IS_VER1(ddev->dsaf_ver);
 
 	p[0] = hw_stats->pad_drop;
 	p[1] = hw_stats->man_pkts;
@@ -2519,8 +2604,16 @@ static u64 *hns_dsaf_get_node_stats(struct dsaf_device *ddev, u64 *data,
 	p[10] = hw_stats->local_addr_false;
 	p[11] = hw_stats->vlan_drop;
 	p[12] = hw_stats->stp_drop;
-	p[13] = hw_stats->tx_pkts;
+	if (node_num < DSAF_SERVICE_NW_NUM && !is_ver1) {
+		for (i = 0; i < DSAF_PRIO_NR; i++) {
+			p[13 + i + 0 * DSAF_PRIO_NR] = hw_stats->rx_pfc[i];
+			p[13 + i + 1 * DSAF_PRIO_NR] = hw_stats->tx_pfc[i];
+		}
+		p[29] = hw_stats->tx_pkts;
+		return &p[30];
+	}
 
+	p[13] = hw_stats->tx_pkts;
 	return &p[14];
 }
 
@@ -2548,11 +2641,16 @@ void hns_dsaf_get_stats(struct dsaf_device *ddev, u64 *data, int port)
  *@stringset: type of values in data
  *return dsaf string name count
  */
-int hns_dsaf_get_sset_count(int stringset)
+int hns_dsaf_get_sset_count(struct dsaf_device *dsaf_dev, int stringset)
 {
-	if (stringset == ETH_SS_STATS)
-		return DSAF_STATIC_NUM;
+	bool is_ver1 = AE_IS_VER1(dsaf_dev->dsaf_ver);
 
+	if (stringset == ETH_SS_STATS) {
+		if (is_ver1)
+			return DSAF_STATIC_NUM;
+		else
+			return DSAF_V2_STATIC_NUM;
+	}
 	return 0;
 }
 
@@ -2562,7 +2660,8 @@ int hns_dsaf_get_sset_count(int stringset)
  *@data:strings name value
  *@port:port index
  */
-void hns_dsaf_get_strings(int stringset, u8 *data, int port)
+void hns_dsaf_get_strings(int stringset, u8 *data, int port,
+			  struct dsaf_device *dsaf_dev)
 {
 	char *buff = (char *)data;
 	int node = port;
@@ -2571,11 +2670,11 @@ void hns_dsaf_get_strings(int stringset, u8 *data, int port)
 		return;
 
 	/* for ge/xge node info */
-	buff = hns_dsaf_get_node_stats_strings(buff, node);
+	buff = hns_dsaf_get_node_stats_strings(buff, node, dsaf_dev);
 
 	/* for ppe node info */
 	node = port + DSAF_PPE_INODE_BASE;
-	(void)hns_dsaf_get_node_stats_strings(buff, node);
+	(void)hns_dsaf_get_node_stats_strings(buff, node, dsaf_dev);
 }
 
 /**
@@ -2611,7 +2710,7 @@ static int hns_dsaf_probe(struct platform_device *pdev)
 
 	ret = hns_dsaf_init(dsaf_dev);
 	if (ret)
-		goto free_cfg;
+		goto free_dev;
 
 	ret = hns_mac_init(dsaf_dev);
 	if (ret)
@@ -2636,9 +2735,6 @@ uninit_mac:
 uninit_dsaf:
 	hns_dsaf_free(dsaf_dev);
 
-free_cfg:
-	hns_dsaf_free_cfg(dsaf_dev);
-
 free_dev:
 	hns_dsaf_free_dev(dsaf_dev);
 
@@ -2661,8 +2757,6 @@ static int hns_dsaf_remove(struct platform_device *pdev)
 
 	hns_dsaf_free(dsaf_dev);
 
-	hns_dsaf_free_cfg(dsaf_dev);
-
 	hns_dsaf_free_dev(dsaf_dev);
 
 	return 0;
@@ -2680,6 +2774,7 @@ static struct platform_driver g_dsaf_driver = {
 	.driver = {
 		.name = DSAF_DRV_NAME,
 		.of_match_table = g_dsaf_match,
+		.acpi_match_table = hns_dsaf_acpi_match,
 	},
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index f0502ba0a677..1daf018d9071 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -39,6 +39,9 @@ struct hns_mac_cb;
 
 #define DSAF_DUMP_REGS_NUM 504
 #define DSAF_STATIC_NUM 28
+#define DSAF_V2_STATIC_NUM	44
+#define DSAF_PRIO_NR	8
+#define DSAF_REG_PER_ZONE	3
 
 #define DSAF_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
 #define HNS_DSAF_IS_DEBUG(dev) (dev->dsaf_mode == DSAF_MODE_DISABLE_SP)
@@ -176,6 +179,8 @@ struct dsaf_hw_stats {
 	u64 local_addr_false;
 	u64 vlan_drop;
 	u64 stp_drop;
+	u64 rx_pfc[DSAF_PRIO_NR];
+	u64 tx_pfc[DSAF_PRIO_NR];
 	u64 tx_pkts;
 };
 
@@ -268,6 +273,27 @@ struct dsaf_int_stat {
 
 };
 
+struct dsaf_misc_op {
+	void (*cpld_set_led)(struct hns_mac_cb *mac_cb, int link_status,
+			     u16 speed, int data);
+	void (*cpld_reset_led)(struct hns_mac_cb *mac_cb);
+	int (*cpld_set_led_id)(struct hns_mac_cb *mac_cb,
+			       enum hnae_led_state status);
+	/* reset seris function, it will be reset if the dereseet is 0 */
+	void (*dsaf_reset)(struct dsaf_device *dsaf_dev, bool dereset);
+	void (*xge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
+	void (*xge_core_srst)(struct dsaf_device *dsaf_dev, u32 port,
+			      bool dereset);
+	void (*ge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
+	void (*ppe_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
+	void (*ppe_comm_srst)(struct dsaf_device *dsaf_dev, bool dereset);
+
+	phy_interface_t (*get_phy_if)(struct hns_mac_cb *mac_cb);
+	int (*get_sfp_prsnt)(struct hns_mac_cb *mac_cb, int *sfp_prsnt);
+
+	int (*cfg_serdes_loopback)(struct hns_mac_cb *mac_cb, bool en);
+};
+
 /* Dsaf device struct define ,and mac ->  dsaf */
 struct dsaf_device {
 	struct device *dev;
@@ -292,9 +318,12 @@ struct dsaf_device {
 	struct ppe_common_cb *ppe_common[DSAF_COMM_DEV_NUM];
 	struct rcb_common_cb *rcb_common[DSAF_COMM_DEV_NUM];
 	struct hns_mac_cb *mac_cb[DSAF_MAX_PORT_NUM];
+	struct dsaf_misc_op *misc_op;
 
 	struct dsaf_hw_stats hw_stats[DSAF_NODE_NUM];
 	struct dsaf_int_stat int_stat;
+	/* make sure tcam table config spinlock */
+	spinlock_t tcam_lock;
 };
 
 static inline void *hns_dsaf_dev_priv(const struct dsaf_device *dsaf_dev)
@@ -388,27 +417,17 @@ int hns_dsaf_get_mac_entry_by_index(
 	u16 entry_index,
 	struct dsaf_drv_mac_multi_dest_entry *mac_entry);
 
-void hns_dsaf_rst(struct dsaf_device *dsaf_dev, u32 val);
-
-void hns_ppe_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val);
-
-void hns_ppe_com_srst(struct ppe_common_cb *ppe_common, u32 val);
-
 void hns_dsaf_fix_mac_mode(struct hns_mac_cb *mac_cb);
 
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev);
 void hns_dsaf_ae_uninit(struct dsaf_device *dsaf_dev);
 
-void hns_dsaf_xge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val);
-void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val);
-void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
-				    u32 port, u32 val);
-
 void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 inode_num);
 
-int hns_dsaf_get_sset_count(int stringset);
+int hns_dsaf_get_sset_count(struct dsaf_device *dsaf_dev, int stringset);
 void hns_dsaf_get_stats(struct dsaf_device *ddev, u64 *data, int port);
-void hns_dsaf_get_strings(int stringset, u8 *data, int port);
+void hns_dsaf_get_strings(int stringset, u8 *data, int port,
+			  struct dsaf_device *dsaf_dev);
 
 void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data);
 int hns_dsaf_get_regs_count(void);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index a837bb9e3839..611b67b6f450 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -12,6 +12,27 @@
 #include "hns_dsaf_ppe.h"
 #include "hns_dsaf_reg.h"
 
+enum _dsm_op_index {
+	HNS_OP_RESET_FUNC               = 0x1,
+	HNS_OP_SERDES_LP_FUNC           = 0x2,
+	HNS_OP_LED_SET_FUNC             = 0x3,
+	HNS_OP_GET_PORT_TYPE_FUNC       = 0x4,
+	HNS_OP_GET_SFP_STAT_FUNC        = 0x5,
+};
+
+enum _dsm_rst_type {
+	HNS_DSAF_RESET_FUNC     = 0x1,
+	HNS_PPE_RESET_FUNC      = 0x2,
+	HNS_XGE_CORE_RESET_FUNC = 0x3,
+	HNS_XGE_RESET_FUNC      = 0x4,
+	HNS_GE_RESET_FUNC       = 0x5,
+};
+
+const u8 hns_dsaf_acpi_dsm_uuid[] = {
+	0x1A, 0xAA, 0x85, 0x1A, 0x93, 0xE2, 0x5E, 0x41,
+	0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A
+};
+
 static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val)
 {
 	if (dsaf_dev->sub_ctrl)
@@ -32,8 +53,8 @@ static u32 dsaf_read_sub(struct dsaf_device *dsaf_dev, u32 reg)
 	return ret;
 }
 
-void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
-		      u16 speed, int data)
+static void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
+			     u16 speed, int data)
 {
 	int speed_reg = 0;
 	u8 value;
@@ -65,13 +86,14 @@ void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
 			mac_cb->cpld_led_value = value;
 		}
 	} else {
-		dsaf_write_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg,
-				  CPLD_LED_DEFAULT_VALUE);
-		mac_cb->cpld_led_value = CPLD_LED_DEFAULT_VALUE;
+		value = (mac_cb->cpld_led_value) & (0x1 << DSAF_LED_ANCHOR_B);
+		dsaf_write_syscon(mac_cb->cpld_ctrl,
+				  mac_cb->cpld_ctrl_reg, value);
+		mac_cb->cpld_led_value = value;
 	}
 }
 
-void cpld_led_reset(struct hns_mac_cb *mac_cb)
+static void cpld_led_reset(struct hns_mac_cb *mac_cb)
 {
 	if (!mac_cb || !mac_cb->cpld_ctrl)
 		return;
@@ -81,8 +103,8 @@ void cpld_led_reset(struct hns_mac_cb *mac_cb)
 	mac_cb->cpld_led_value = CPLD_LED_DEFAULT_VALUE;
 }
 
-int cpld_set_led_id(struct hns_mac_cb *mac_cb,
-		    enum hnae_led_state status)
+static int cpld_set_led_id(struct hns_mac_cb *mac_cb,
+			   enum hnae_led_state status)
 {
 	switch (status) {
 	case HNAE_LED_ACTIVE:
@@ -93,7 +115,7 @@ int cpld_set_led_id(struct hns_mac_cb *mac_cb,
 			     CPLD_LED_ON_VALUE);
 		dsaf_write_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg,
 				  mac_cb->cpld_led_value);
-		return 2;
+		break;
 	case HNAE_LED_INACTIVE:
 		dsaf_set_bit(mac_cb->cpld_led_value, DSAF_LED_ANCHOR_B,
 			     CPLD_LED_DEFAULT_VALUE);
@@ -101,7 +123,8 @@ int cpld_set_led_id(struct hns_mac_cb *mac_cb,
 				  mac_cb->cpld_led_value);
 		break;
 	default:
-		break;
+		dev_err(mac_cb->dev, "invalid led state: %d!", status);
+		return -EINVAL;
 	}
 
 	return 0;
@@ -109,12 +132,40 @@ int cpld_set_led_id(struct hns_mac_cb *mac_cb,
 
 #define RESET_REQ_OR_DREQ 1
 
-void hns_dsaf_rst(struct dsaf_device *dsaf_dev, u32 val)
+static void hns_dsaf_acpi_srst_by_port(struct dsaf_device *dsaf_dev, u8 op_type,
+				       u32 port_type, u32 port, u32 val)
+{
+	union acpi_object *obj;
+	union acpi_object obj_args[3], argv4;
+
+	obj_args[0].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[0].integer.value = port_type;
+	obj_args[1].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[1].integer.value = port;
+	obj_args[2].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[2].integer.value = val;
+
+	argv4.type = ACPI_TYPE_PACKAGE;
+	argv4.package.count = 3;
+	argv4.package.elements = obj_args;
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(dsaf_dev->dev),
+				hns_dsaf_acpi_dsm_uuid, 0, op_type, &argv4);
+	if (!obj) {
+		dev_warn(dsaf_dev->dev, "reset port_type%d port%d fail!",
+			 port_type, port);
+		return;
+	}
+
+	ACPI_FREE(obj);
+}
+
+static void hns_dsaf_rst(struct dsaf_device *dsaf_dev, bool dereset)
 {
 	u32 xbar_reg_addr;
 	u32 nt_reg_addr;
 
-	if (!val) {
+	if (!dereset) {
 		xbar_reg_addr = DSAF_SUB_SC_XBAR_RESET_REQ_REG;
 		nt_reg_addr = DSAF_SUB_SC_NT_RESET_REQ_REG;
 	} else {
@@ -126,7 +177,15 @@ void hns_dsaf_rst(struct dsaf_device *dsaf_dev, u32 val)
 	dsaf_write_sub(dsaf_dev, nt_reg_addr, RESET_REQ_OR_DREQ);
 }
 
-void hns_dsaf_xge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
+static void hns_dsaf_rst_acpi(struct dsaf_device *dsaf_dev, bool dereset)
+{
+	hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
+				   HNS_DSAF_RESET_FUNC,
+				   0, dereset);
+}
+
+static void hns_dsaf_xge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
+				      bool dereset)
 {
 	u32 reg_val = 0;
 	u32 reg_addr;
@@ -137,7 +196,7 @@ void hns_dsaf_xge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
 	reg_val |= RESET_REQ_OR_DREQ;
 	reg_val |= 0x2082082 << dsaf_dev->mac_cb[port]->port_rst_off;
 
-	if (val == 0)
+	if (!dereset)
 		reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
 	else
 		reg_addr = DSAF_SUB_SC_XGE_RESET_DREQ_REG;
@@ -145,8 +204,15 @@ void hns_dsaf_xge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
 	dsaf_write_sub(dsaf_dev, reg_addr, reg_val);
 }
 
-void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
-				    u32 port, u32 val)
+static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
+					   u32 port, bool dereset)
+{
+	hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
+				   HNS_XGE_RESET_FUNC, port, dereset);
+}
+
+static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
+					   u32 port, bool dereset)
 {
 	u32 reg_val = 0;
 	u32 reg_addr;
@@ -157,7 +223,7 @@ void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
 	reg_val |= XGMAC_TRX_CORE_SRST_M
 		<< dsaf_dev->mac_cb[port]->port_rst_off;
 
-	if (val == 0)
+	if (!dereset)
 		reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
 	else
 		reg_addr = DSAF_SUB_SC_XGE_RESET_DREQ_REG;
@@ -165,7 +231,16 @@ void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
 	dsaf_write_sub(dsaf_dev, reg_addr, reg_val);
 }
 
-void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
+static void
+hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
+				    u32 port, bool dereset)
+{
+	hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
+				   HNS_XGE_CORE_RESET_FUNC, port, dereset);
+}
+
+static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
+				     bool dereset)
 {
 	u32 reg_val_1;
 	u32 reg_val_2;
@@ -178,12 +253,11 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
 		reg_val_1  = 0x1 << port;
 		port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off;
 		/* there is difference between V1 and V2 in register.*/
-		if (AE_IS_VER1(dsaf_dev->dsaf_ver))
-			reg_val_2  = 0x1041041 << port_rst_off;
-		else
-			reg_val_2  = 0x2082082 << port_rst_off;
+		reg_val_2 = AE_IS_VER1(dsaf_dev->dsaf_ver) ?
+				0x1041041 : 0x2082082;
+		reg_val_2 <<= port_rst_off;
 
-		if (val == 0) {
+		if (!dereset) {
 			dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG,
 				       reg_val_1);
 
@@ -197,10 +271,13 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
 				       reg_val_1);
 		}
 	} else {
-		reg_val_1 = 0x15540 << dsaf_dev->reset_offset;
-		reg_val_2 = 0x100 << dsaf_dev->reset_offset;
+		reg_val_1 = 0x15540;
+		reg_val_2 = AE_IS_VER1(dsaf_dev->dsaf_ver) ? 0x100 : 0x40;
+
+		reg_val_1 <<= dsaf_dev->reset_offset;
+		reg_val_2 <<= dsaf_dev->reset_offset;
 
-		if (val == 0) {
+		if (!dereset) {
 			dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG,
 				       reg_val_1);
 
@@ -216,14 +293,22 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
 	}
 }
 
-void hns_ppe_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
+static void hns_dsaf_ge_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
+					  u32 port, bool dereset)
+{
+	hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
+				   HNS_GE_RESET_FUNC, port, dereset);
+}
+
+static void hns_ppe_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
+				 bool dereset)
 {
 	u32 reg_val = 0;
 	u32 reg_addr;
 
 	reg_val |= RESET_REQ_OR_DREQ <<	dsaf_dev->mac_cb[port]->port_rst_off;
 
-	if (val == 0)
+	if (!dereset)
 		reg_addr = DSAF_SUB_SC_PPE_RESET_REQ_REG;
 	else
 		reg_addr = DSAF_SUB_SC_PPE_RESET_DREQ_REG;
@@ -231,15 +316,24 @@ void hns_ppe_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
 	dsaf_write_sub(dsaf_dev, reg_addr, reg_val);
 }
 
-void hns_ppe_com_srst(struct ppe_common_cb *ppe_common, u32 val)
+static void
+hns_ppe_srst_by_port_acpi(struct dsaf_device *dsaf_dev, u32 port, bool dereset)
+{
+	hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
+				   HNS_PPE_RESET_FUNC, port, dereset);
+}
+
+static void hns_ppe_com_srst(struct dsaf_device *dsaf_dev, bool dereset)
 {
-	struct dsaf_device *dsaf_dev = ppe_common->dsaf_dev;
 	u32 reg_val;
 	u32 reg_addr;
 
+	if (!(dev_of_node(dsaf_dev->dev)))
+		return;
+
 	if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
 		reg_val = RESET_REQ_OR_DREQ;
-		if (val == 0)
+		if (!dereset)
 			reg_addr = DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG;
 		else
 			reg_addr = DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG;
@@ -247,7 +341,7 @@ void hns_ppe_com_srst(struct ppe_common_cb *ppe_common, u32 val)
 	} else {
 		reg_val = 0x100 << dsaf_dev->reset_offset;
 
-		if (val == 0)
+		if (!dereset)
 			reg_addr = DSAF_SUB_SC_PPE_RESET_REQ_REG;
 		else
 			reg_addr = DSAF_SUB_SC_PPE_RESET_DREQ_REG;
@@ -261,7 +355,7 @@ void hns_ppe_com_srst(struct ppe_common_cb *ppe_common, u32 val)
  * @mac_cb: mac control block
  * retuen phy interface
  */
-phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb *mac_cb)
+static phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb *mac_cb)
 {
 	u32 mode;
 	u32 reg;
@@ -293,6 +387,36 @@ phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb *mac_cb)
 	return phy_if;
 }
 
+static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb)
+{
+	phy_interface_t phy_if = PHY_INTERFACE_MODE_NA;
+	union acpi_object *obj;
+	union acpi_object obj_args, argv4;
+
+	obj_args.integer.type = ACPI_TYPE_INTEGER;
+	obj_args.integer.value = mac_cb->mac_id;
+
+	argv4.type = ACPI_TYPE_PACKAGE,
+	argv4.package.count = 1,
+	argv4.package.elements = &obj_args,
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
+				hns_dsaf_acpi_dsm_uuid, 0,
+				HNS_OP_GET_PORT_TYPE_FUNC, &argv4);
+
+	if (!obj || obj->type != ACPI_TYPE_INTEGER)
+		return phy_if;
+
+	phy_if = obj->integer.value ?
+		PHY_INTERFACE_MODE_XGMII : PHY_INTERFACE_MODE_SGMII;
+
+	dev_dbg(mac_cb->dev, "mac_id=%d, phy_if=%d\n", mac_cb->mac_id, phy_if);
+
+	ACPI_FREE(obj);
+
+	return phy_if;
+}
+
 int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 {
 	if (!mac_cb->cpld_ctrl)
@@ -309,13 +433,8 @@ int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
  * @mac_cb: mac control block
  * retuen 0 == success
  */
-int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, u8 en)
+static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en)
 {
-	/* port 0-3 hilink4 base is serdes_vaddr + 0x00280000
-	 * port 4-7 hilink3 base is serdes_vaddr + 0x00200000
-	 */
-	u8 *base_addr = (u8 *)mac_cb->serdes_vaddr +
-		       (mac_cb->mac_id <= 3 ? 0x00280000 : 0x00200000);
 	const u8 lane_id[] = {
 		0,	/* mac 0 -> lane 0 */
 		1,	/* mac 1 -> lane 1 */
@@ -332,7 +451,7 @@ int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, u8 en)
 	int sfp_prsnt;
 	int ret = hns_mac_get_sfp_prsnt(mac_cb, &sfp_prsnt);
 
-	if (!mac_cb->phy_node) {
+	if (!mac_cb->phy_dev) {
 		if (ret)
 			pr_info("please confirm sfp is present or not\n");
 		else
@@ -341,13 +460,110 @@ int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, u8 en)
 	}
 
 	if (mac_cb->serdes_ctrl) {
-		u32 origin = dsaf_read_syscon(mac_cb->serdes_ctrl, reg_offset);
+		u32 origin;
+
+		if (!AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver)) {
+#define HILINK_ACCESS_SEL_CFG		0x40008
+			/* hilink4 & hilink3 use the same xge training and
+			 * xge u adaptor. There is a hilink access sel cfg
+			 * register to select which one to be configed
+			 */
+			if ((!HNS_DSAF_IS_DEBUG(mac_cb->dsaf_dev)) &&
+			    (mac_cb->mac_id <= 3))
+				dsaf_write_syscon(mac_cb->serdes_ctrl,
+						  HILINK_ACCESS_SEL_CFG, 0);
+			else
+				dsaf_write_syscon(mac_cb->serdes_ctrl,
+						  HILINK_ACCESS_SEL_CFG, 3);
+		}
 
-		dsaf_set_field(origin, 1ull << 10, 10, !!en);
+		origin = dsaf_read_syscon(mac_cb->serdes_ctrl, reg_offset);
+
+		dsaf_set_field(origin, 1ull << 10, 10, en);
 		dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin);
 	} else {
-		dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, !!en);
+		u8 *base_addr = (u8 *)mac_cb->serdes_vaddr +
+				(mac_cb->mac_id <= 3 ? 0x00280000 : 0x00200000);
+		dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, en);
+	}
+
+	return 0;
+}
+
+static int
+hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en)
+{
+	union acpi_object *obj;
+	union acpi_object obj_args[3], argv4;
+
+	obj_args[0].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[0].integer.value = mac_cb->mac_id;
+	obj_args[1].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[1].integer.value = !!en;
+
+	argv4.type = ACPI_TYPE_PACKAGE;
+	argv4.package.count = 2;
+	argv4.package.elements = obj_args;
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dsaf_dev->dev),
+				hns_dsaf_acpi_dsm_uuid, 0,
+				HNS_OP_SERDES_LP_FUNC, &argv4);
+	if (!obj) {
+		dev_warn(mac_cb->dsaf_dev->dev, "set port%d serdes lp fail!",
+			 mac_cb->mac_id);
+
+		return -ENOTSUPP;
 	}
 
+	ACPI_FREE(obj);
+
 	return 0;
 }
+
+struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
+{
+	struct dsaf_misc_op *misc_op;
+
+	misc_op = devm_kzalloc(dsaf_dev->dev, sizeof(*misc_op), GFP_KERNEL);
+	if (!misc_op)
+		return NULL;
+
+	if (dev_of_node(dsaf_dev->dev)) {
+		misc_op->cpld_set_led = hns_cpld_set_led;
+		misc_op->cpld_reset_led = cpld_led_reset;
+		misc_op->cpld_set_led_id = cpld_set_led_id;
+
+		misc_op->dsaf_reset = hns_dsaf_rst;
+		misc_op->xge_srst = hns_dsaf_xge_srst_by_port;
+		misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port;
+		misc_op->ge_srst = hns_dsaf_ge_srst_by_port;
+		misc_op->ppe_srst = hns_ppe_srst_by_port;
+		misc_op->ppe_comm_srst = hns_ppe_com_srst;
+
+		misc_op->get_phy_if = hns_mac_get_phy_if;
+		misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt;
+
+		misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback;
+	} else if (is_acpi_node(dsaf_dev->dev->fwnode)) {
+		misc_op->cpld_set_led = hns_cpld_set_led;
+		misc_op->cpld_reset_led = cpld_led_reset;
+		misc_op->cpld_set_led_id = cpld_set_led_id;
+
+		misc_op->dsaf_reset = hns_dsaf_rst_acpi;
+		misc_op->xge_srst = hns_dsaf_xge_srst_by_port_acpi;
+		misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port_acpi;
+		misc_op->ge_srst = hns_dsaf_ge_srst_by_port_acpi;
+		misc_op->ppe_srst = hns_ppe_srst_by_port_acpi;
+		misc_op->ppe_comm_srst = hns_ppe_com_srst;
+
+		misc_op->get_phy_if = hns_mac_get_phy_if_acpi;
+		misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt;
+
+		misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback_acpi;
+	} else {
+		devm_kfree(dsaf_dev->dev, (void *)misc_op);
+		misc_op = NULL;
+	}
+
+	return (void *)misc_op;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h
index 419f07aa9734..f06bb03d47a6 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h
@@ -33,11 +33,6 @@
 #define DSAF_LED_DATA_B 4
 #define DSAF_LED_ANCHOR_B 5
 
-void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
-		      u16 speed, int data);
-void cpld_led_reset(struct hns_mac_cb *mac_cb);
-int cpld_set_led_id(struct hns_mac_cb *mac_cb,
-		    enum hnae_led_state status);
-int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt);
+struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev);
 
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index 8cd151a5245e..ff8b6a468b24 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -112,7 +112,6 @@ void hns_ppe_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index)
 static void __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common,
 					int ppe_idx)
 {
-
 	return ppe_common->dsaf_dev->ppe_base + ppe_idx * PPE_REG_OFFSET;
 }
 
@@ -200,11 +199,12 @@ static void hns_ppe_set_port_mode(struct hns_ppe_cb *ppe_cb,
 static int hns_ppe_common_init_hw(struct ppe_common_cb *ppe_common)
 {
 	enum ppe_qid_mode qid_mode;
-	enum dsaf_mode dsaf_mode = ppe_common->dsaf_dev->dsaf_mode;
+	struct dsaf_device *dsaf_dev = ppe_common->dsaf_dev;
+	enum dsaf_mode dsaf_mode = dsaf_dev->dsaf_mode;
 
-	hns_ppe_com_srst(ppe_common, 0);
+	dsaf_dev->misc_op->ppe_comm_srst(dsaf_dev, 0);
 	mdelay(100);
-	hns_ppe_com_srst(ppe_common, 1);
+	dsaf_dev->misc_op->ppe_comm_srst(dsaf_dev, 1);
 	mdelay(100);
 
 	if (ppe_common->ppe_mode == PPE_COMMON_MODE_SERVICE) {
@@ -288,9 +288,9 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 	/* get default RSS key */
 	netdev_rss_key_fill(ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE);
 
-	hns_ppe_srst_by_port(dsaf_dev, port, 0);
+	dsaf_dev->misc_op->ppe_srst(dsaf_dev, port, 0);
 	mdelay(10);
-	hns_ppe_srst_by_port(dsaf_dev, port, 1);
+	dsaf_dev->misc_op->ppe_srst(dsaf_dev, port, 1);
 
 	/* clr and msk except irq*/
 	hns_ppe_exc_irq_en(ppe_cb, 0);
@@ -328,10 +328,11 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb)
 {
 	u32 port;
+	struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev;
 
 	if (ppe_cb->ppe_common_cb) {
 		port = ppe_cb->index;
-		hns_ppe_srst_by_port(ppe_cb->ppe_common_cb->dsaf_dev, port, 0);
+		dsaf_dev->misc_op->ppe_srst(dsaf_dev, port, 0);
 	}
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 4ef6d23d998e..ef1107777c08 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -458,7 +458,6 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common)
 	u32 i;
 	u32 ring_num = rcb_common->ring_num;
 	int base_irq_idx = hns_rcb_get_base_irq_idx(rcb_common);
-	struct device_node *np = rcb_common->dsaf_dev->dev->of_node;
 	struct platform_device *pdev =
 		to_platform_device(rcb_common->dsaf_dev->dev);
 	bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver);
@@ -473,10 +472,10 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common)
 		ring_pair_cb->port_id_in_comm =
 			hns_rcb_get_port_in_comm(rcb_common, i);
 		ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] =
-		is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2) :
+		is_ver1 ? platform_get_irq(pdev, base_irq_idx + i * 2) :
 			  platform_get_irq(pdev, base_irq_idx + i * 3 + 1);
 		ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX] =
-		is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2 + 1) :
+		is_ver1 ? platform_get_irq(pdev, base_irq_idx + i * 2 + 1) :
 			  platform_get_irq(pdev, base_irq_idx + i * 3);
 		ring_pair_cb->q.phy_base =
 			RCB_COMM_BASE_TO_RING_BASE(rcb_common->phy_base, i);
@@ -541,7 +540,7 @@ int hns_rcb_set_coalesce_usecs(
 	}
 	if (timeout > HNS_RCB_MAX_COALESCED_USECS) {
 		dev_err(rcb_common->dsaf_dev->dev,
-			"error: not support coalesce %dus!\n", timeout);
+			"error: coalesce_usecs setting supports 0~1023us\n");
 		return -EINVAL;
 	}
 	hns_rcb_set_port_timeout(rcb_common, port_idx, timeout);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
index bd54dac82ee0..99b4e1ba0a94 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
@@ -40,7 +40,7 @@ struct rcb_common_cb;
 #define HNS_RCB_DEF_COALESCED_FRAMES		50
 #define HNS_RCB_CLK_FREQ_MHZ			350
 #define HNS_RCB_MAX_COALESCED_USECS		0x3ff
-#define HNS_RCB_DEF_COALESCED_USECS		3
+#define HNS_RCB_DEF_COALESCED_USECS		50
 
 #define HNS_RCB_COMMON_ENDIAN			1
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 7c3b5103d151..235f74444b1d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -32,7 +32,7 @@
 #define DSAFV2_SBM_NUM		8
 #define DSAFV2_SBM_XGE_CHN    6
 #define DSAFV2_SBM_PPE_CHN    1
-#define DASFV2_ROCEE_CRD_NUM  8
+#define DASFV2_ROCEE_CRD_NUM  1
 
 #define DSAF_VOQ_NUM		DSAF_NODE_NUM
 #define DSAF_INODE_NUM		DSAF_NODE_NUM
@@ -166,6 +166,9 @@
 #define DSAF_INODE_GE_FC_EN_0_REG		0x1B00
 #define DSAF_INODE_VC0_IN_PKT_NUM_0_REG		0x1B50
 #define DSAF_INODE_VC1_IN_PKT_NUM_0_REG		0x1C00
+#define DSAF_INODE_IN_PRIO_PAUSE_BASE_REG	0x1C00
+#define DSAF_INODE_IN_PRIO_PAUSE_BASE_OFFSET	0x100
+#define DSAF_INODE_IN_PRIO_PAUSE_OFFSET		0x50
 
 #define DSAF_SBM_CFG_REG_0_REG			0x2000
 #define DSAF_SBM_BP_CFG_0_XGE_REG_0_REG		0x2004
@@ -175,7 +178,7 @@
 #define DSAF_SBM_BP_CFG_2_XGE_REG_0_REG		0x200C
 #define DSAF_SBM_BP_CFG_2_PPE_REG_0_REG		0x230C
 #define DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG	0x260C
-#define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG		 0x238C
+#define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG	0x238C
 #define DSAF_SBM_FREE_CNT_0_0_REG		0x2010
 #define DSAF_SBM_FREE_CNT_1_0_REG		0x2014
 #define DSAF_SBM_BP_CNT_0_0_REG			0x2018
@@ -232,6 +235,8 @@
 #define DSAF_XOD_ROCEE_RCVIN0_CNT_0_REG		0x3074
 #define DSAF_XOD_ROCEE_RCVIN1_CNT_0_REG		0x3078
 #define DSAF_XOD_FIFO_STATUS_0_REG		0x307C
+#define DSAF_XOD_XGE_PFC_PRIO_CNT_BASE_REG	0x3A00
+#define DSAF_XOD_XGE_PFC_PRIO_CNT_OFFSET	0x4
 
 #define DSAF_VOQ_ECC_INVERT_EN_0_REG		0x4004
 #define DSAF_VOQ_SRAM_PKT_NUM_0_REG		0x4008
@@ -791,6 +796,18 @@
 #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S 9
 #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9)
 
+#define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_S 0
+#define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_M (((1ULL << 8) - 1) << 0)
+#define DSAFV2_SBM_CFG2_ROCEE_RESET_BUF_NUM_S 8
+#define DSAFV2_SBM_CFG2_ROCEE_RESET_BUF_NUM_M (((1ULL << 8) - 1) << 8)
+
+#define DSAFV2_SBM_CFG2_PPE_SET_BUF_NUM_S (0)
+#define DSAFV2_SBM_CFG2_PPE_SET_BUF_NUM_M (((1ULL << 6) - 1) << 0)
+#define DSAFV2_SBM_CFG2_PPE_RESET_BUF_NUM_S (6)
+#define DSAFV2_SBM_CFG2_PPE_RESET_BUF_NUM_M (((1ULL << 6) - 1) << 6)
+#define DSAFV2_SBM_CFG2_PPE_CFG_USEFUL_NUM_S (12)
+#define DSAFV2_SBM_CFG2_PPE_CFG_USEFUL_NUM_M (((1ULL << 6) - 1) << 12)
+
 #define DSAF_TBL_TCAM_ADDR_S 0
 #define DSAF_TBL_TCAM_ADDR_M ((1ULL << 9) - 1)
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index fd90f3737963..8f4f0e8da984 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -119,7 +119,7 @@ static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode)
 		= (struct dsaf_device *)dev_get_drvdata(drv->dev);
 	u32 port = drv->mac_id;
 
-	hns_dsaf_xge_core_srst_by_port(dsaf_dev, port, 1);
+	dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 1);
 	mdelay(10);
 
 	/*enable XGE rX/tX */
@@ -157,7 +157,7 @@ static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode)
 	}
 
 	mdelay(10);
-	hns_dsaf_xge_core_srst_by_port(dsaf_dev, port, 0);
+	dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 0);
 }
 
 /**
@@ -198,9 +198,9 @@ static void hns_xgmac_init(void *mac_drv)
 		= (struct dsaf_device *)dev_get_drvdata(drv->dev);
 	u32 port = drv->mac_id;
 
-	hns_dsaf_xge_srst_by_port(dsaf_dev, port, 0);
+	dsaf_dev->misc_op->xge_srst(dsaf_dev, port, 0);
 	mdelay(100);
-	hns_dsaf_xge_srst_by_port(dsaf_dev, port, 1);
+	dsaf_dev->misc_op->xge_srst(dsaf_dev, port, 1);
 
 	mdelay(100);
 	hns_xgmac_exc_irq_en(drv, 0);
@@ -425,7 +425,7 @@ static void hns_xgmac_free(void *mac_drv)
 
 	u32 mac_id = drv->mac_id;
 
-	hns_dsaf_xge_srst_by_port(dsaf_dev, mac_id, 0);
+	dsaf_dev->misc_op->xge_srst(dsaf_dev, mac_id, 0);
 }
 
 /**
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index e621636e69b9..d7e1f8c7ae92 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -132,6 +132,13 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
 	ring_ptr_move_fw(ring, next_to_use);
 }
 
+static const struct acpi_device_id hns_enet_acpi_match[] = {
+	{ "HISI00C1", 0 },
+	{ "HISI00C2", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, hns_enet_acpi_match);
+
 static void fill_desc(struct hnae_ring *ring, void *priv,
 		      int size, dma_addr_t dma, int frag_end,
 		      int buf_num, enum hns_desc_type type, int mtu)
@@ -593,6 +600,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 		ring->stats.sw_err_cnt++;
 		return -ENOMEM;
 	}
+	skb_reset_mac_header(skb);
 
 	prefetchw(skb->data);
 	length = le16_to_cpu(desc->rx.pkt_len);
@@ -754,16 +762,16 @@ static int hns_nic_rx_poll_one(struct hns_nic_ring_data *ring_data,
 	recv_pkts = 0, recv_bds = 0, clean_count = 0;
 recv:
 	while (recv_pkts < budget && recv_bds < num) {
-		/* reuse or realloc buffers*/
+		/* reuse or realloc buffers */
 		if (clean_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
 			hns_nic_alloc_rx_buffers(ring_data, clean_count);
 			clean_count = 0;
 		}
 
-		/* poll one pkg*/
+		/* poll one pkt */
 		err = hns_nic_poll_rx_skb(ring_data, &skb, &bnum);
 		if (unlikely(!skb)) /* this fault cannot be repaired */
-			break;
+			goto out;
 
 		recv_bds += bnum;
 		clean_count += bnum;
@@ -789,6 +797,7 @@ recv:
 		}
 	}
 
+out:
 	/* make all data has been write before submit */
 	if (clean_count > 0)
 		hns_nic_alloc_rx_buffers(ring_data, clean_count);
@@ -983,8 +992,26 @@ static void hns_nic_adjust_link(struct net_device *ndev)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct hnae_handle *h = priv->ae_handle;
+	int state = 1;
+
+	if (priv->phy) {
+		h->dev->ops->adjust_link(h, ndev->phydev->speed,
+					 ndev->phydev->duplex);
+		state = priv->phy->link;
+	}
+	state = state && h->dev->ops->get_status(h);
 
-	h->dev->ops->adjust_link(h, ndev->phydev->speed, ndev->phydev->duplex);
+	if (state != priv->link) {
+		if (state) {
+			netif_carrier_on(ndev);
+			netif_tx_wake_all_queues(ndev);
+			netdev_info(ndev, "link up\n");
+		} else {
+			netif_carrier_off(ndev);
+			netdev_info(ndev, "link down\n");
+		}
+		priv->link = state;
+	}
 }
 
 /**
@@ -996,19 +1023,22 @@ static void hns_nic_adjust_link(struct net_device *ndev)
 int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
-	struct phy_device *phy_dev = NULL;
+	struct phy_device *phy_dev = h->phy_dev;
+	int ret;
 
-	if (!h->phy_node)
+	if (!h->phy_dev)
 		return 0;
 
-	if (h->phy_if != PHY_INTERFACE_MODE_XGMII)
-		phy_dev = of_phy_connect(ndev, h->phy_node,
-					 hns_nic_adjust_link, 0, h->phy_if);
-	else
-		phy_dev = of_phy_attach(ndev, h->phy_node, 0, h->phy_if);
+	if (h->phy_if != PHY_INTERFACE_MODE_XGMII) {
+		phy_dev->dev_flags = 0;
 
-	if (unlikely(!phy_dev) || IS_ERR(phy_dev))
-		return !phy_dev ? -ENODEV : PTR_ERR(phy_dev);
+		ret = phy_connect_direct(ndev, phy_dev, hns_nic_adjust_link,
+					 h->phy_if);
+	} else {
+		ret = phy_attach_direct(ndev, phy_dev, 0, h->phy_if);
+	}
+	if (unlikely(ret))
+		return -ENODEV;
 
 	phy_dev->supported &= h->if_support;
 	phy_dev->advertising = phy_dev->supported;
@@ -1067,13 +1097,8 @@ void hns_nic_update_stats(struct net_device *netdev)
 static void hns_init_mac_addr(struct net_device *ndev)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
-	struct device_node *node = priv->dev->of_node;
-	const void *mac_addr_temp;
 
-	mac_addr_temp = of_get_mac_address(node);
-	if (mac_addr_temp && is_valid_ether_addr(mac_addr_temp)) {
-		memcpy(ndev->dev_addr, mac_addr_temp, ndev->addr_len);
-	} else {
+	if (!device_get_mac_address(priv->dev, ndev->dev_addr, ETH_ALEN)) {
 		eth_hw_addr_random(ndev);
 		dev_warn(priv->dev, "No valid mac, use random mac %pM",
 			 ndev->dev_addr);
@@ -1176,7 +1201,7 @@ static int hns_nic_net_up(struct net_device *ndev)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct hnae_handle *h = priv->ae_handle;
-	int i, j, k;
+	int i, j;
 	int ret;
 
 	ret = hns_nic_init_irq(priv);
@@ -1191,9 +1216,6 @@ static int hns_nic_net_up(struct net_device *ndev)
 			goto out_has_some_queues;
 	}
 
-	for (k = 0; k < h->q_num; k++)
-		h->dev->ops->toggle_queue_status(h->qs[k], 1);
-
 	ret = h->dev->ops->set_mac_addr(h, ndev->dev_addr);
 	if (ret)
 		goto out_set_mac_addr_err;
@@ -1213,8 +1235,6 @@ static int hns_nic_net_up(struct net_device *ndev)
 out_start_err:
 	netif_stop_queue(ndev);
 out_set_mac_addr_err:
-	for (k = 0; k < h->q_num; k++)
-		h->dev->ops->toggle_queue_status(h->qs[k], 0);
 out_has_some_queues:
 	for (j = i - 1; j >= 0; j--)
 		hns_nic_ring_close(ndev, j);
@@ -1421,7 +1441,6 @@ static int hns_nic_set_features(struct net_device *netdev,
 				netdev_features_t features)
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
-	struct hnae_handle *h = priv->ae_handle;
 
 	switch (priv->enet_ver) {
 	case AE_VERSION_1:
@@ -1434,11 +1453,9 @@ static int hns_nic_set_features(struct net_device *netdev,
 			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso;
 			/* The chip only support 7*4096 */
 			netif_set_gso_max_size(netdev, 7 * 4096);
-			h->dev->ops->set_tso_stats(h, 1);
 		} else {
 			priv->ops.fill_desc = fill_v2_desc;
 			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx;
-			h->dev->ops->set_tso_stats(h, 0);
 		}
 		break;
 	}
@@ -1571,27 +1588,14 @@ static void hns_nic_update_link_status(struct net_device *netdev)
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 
 	struct hnae_handle *h = priv->ae_handle;
-	int state = 1;
 
-	if (priv->phy) {
-		if (!genphy_update_link(priv->phy))
-			state = priv->phy->link;
-		else
-			state = 0;
-	}
-	state = state && h->dev->ops->get_status(h);
+	if (h->phy_dev) {
+		if (h->phy_if != PHY_INTERFACE_MODE_XGMII)
+			return;
 
-	if (state != priv->link) {
-		if (state) {
-			netif_carrier_on(netdev);
-			netif_tx_wake_all_queues(netdev);
-			netdev_info(netdev, "link up\n");
-		} else {
-			netif_carrier_off(netdev);
-			netdev_info(netdev, "link down\n");
-		}
-		priv->link = state;
+		(void)genphy_read_status(h->phy_dev);
 	}
+	hns_nic_adjust_link(netdev);
 }
 
 /* for dumping key regs*/
@@ -1627,7 +1631,7 @@ static void hns_nic_dump(struct hns_nic_priv *priv)
 	}
 }
 
-/* for resetting suntask*/
+/* for resetting subtask */
 static void hns_nic_reset_subtask(struct hns_nic_priv *priv)
 {
 	enum hnae_port_type type = priv->ae_handle->port_type;
@@ -1797,11 +1801,14 @@ static void hns_nic_set_priv_ops(struct net_device *netdev)
 			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso;
 			/* This chip only support 7*4096 */
 			netif_set_gso_max_size(netdev, 7 * 4096);
-			h->dev->ops->set_tso_stats(h, 1);
 		} else {
 			priv->ops.fill_desc = fill_v2_desc;
 			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx;
 		}
+		/* enable tso when init
+		 * control tso on/off through TSE bit in bd
+		 */
+		h->dev->ops->set_tso_stats(h, 1);
 	}
 }
 
@@ -1812,7 +1819,7 @@ static int hns_nic_try_get_ae(struct net_device *ndev)
 	int ret;
 
 	h = hnae_get_handle(&priv->netdev->dev,
-			    priv->ae_node, priv->port_id, NULL);
+			    priv->fwnode, priv->port_id, NULL);
 	if (IS_ERR_OR_NULL(h)) {
 		ret = -ENODEV;
 		dev_dbg(priv->dev, "has not handle, register notifier!\n");
@@ -1872,7 +1879,6 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct net_device *ndev;
 	struct hns_nic_priv *priv;
-	struct device_node *node = dev->of_node;
 	u32 port_id;
 	int ret;
 
@@ -1886,22 +1892,49 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 	priv->dev = dev;
 	priv->netdev = ndev;
 
-	if (of_device_is_compatible(node, "hisilicon,hns-nic-v1"))
-		priv->enet_ver = AE_VERSION_1;
-	else
-		priv->enet_ver = AE_VERSION_2;
+	if (dev_of_node(dev)) {
+		struct device_node *ae_node;
 
-	priv->ae_node = (void *)of_parse_phandle(node, "ae-handle", 0);
-	if (IS_ERR_OR_NULL(priv->ae_node)) {
-		ret = PTR_ERR(priv->ae_node);
-		dev_err(dev, "not find ae-handle\n");
-		goto out_read_prop_fail;
+		if (of_device_is_compatible(dev->of_node,
+					    "hisilicon,hns-nic-v1"))
+			priv->enet_ver = AE_VERSION_1;
+		else
+			priv->enet_ver = AE_VERSION_2;
+
+		ae_node = of_parse_phandle(dev->of_node, "ae-handle", 0);
+		if (IS_ERR_OR_NULL(ae_node)) {
+			ret = PTR_ERR(ae_node);
+			dev_err(dev, "not find ae-handle\n");
+			goto out_read_prop_fail;
+		}
+		priv->fwnode = &ae_node->fwnode;
+	} else if (is_acpi_node(dev->fwnode)) {
+		struct acpi_reference_args args;
+
+		if (acpi_dev_found(hns_enet_acpi_match[0].id))
+			priv->enet_ver = AE_VERSION_1;
+		else if (acpi_dev_found(hns_enet_acpi_match[1].id))
+			priv->enet_ver = AE_VERSION_2;
+		else
+			return -ENXIO;
+
+		/* try to find port-idx-in-ae first */
+		ret = acpi_node_get_property_reference(dev->fwnode,
+						       "ae-handle", 0, &args);
+		if (ret) {
+			dev_err(dev, "not find ae-handle\n");
+			goto out_read_prop_fail;
+		}
+		priv->fwnode = acpi_fwnode_handle(args.adev);
+	} else {
+		dev_err(dev, "cannot read cfg data from OF or acpi\n");
+		return -ENXIO;
 	}
-	/* try to find port-idx-in-ae first */
-	ret = of_property_read_u32(node, "port-idx-in-ae", &port_id);
+
+	ret = device_property_read_u32(dev, "port-idx-in-ae", &port_id);
 	if (ret) {
 		/* only for old code compatible */
-		ret = of_property_read_u32(node, "port-id", &port_id);
+		ret = device_property_read_u32(dev, "port-id", &port_id);
 		if (ret)
 			goto out_read_prop_fail;
 		/* for old dts, we need to caculate the port offset */
@@ -1940,7 +1973,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 	if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
 		dev_dbg(dev, "set mask to 64bit\n");
 	else
-		dev_err(dev, "set mask to 32bit fail!\n");
+		dev_err(dev, "set mask to 64bit fail!\n");
 
 	/* carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(ndev);
@@ -2014,6 +2047,7 @@ static struct platform_driver hns_nic_dev_driver = {
 	.driver = {
 		.name = "hns-nic",
 		.of_match_table = hns_enet_of_match,
+		.acpi_match_table = ACPI_PTR(hns_enet_acpi_match),
 	},
 	.probe = hns_nic_dev_probe,
 	.remove = hns_nic_dev_remove,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index 337efa582bac..44bb3015eed3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -54,7 +54,7 @@ struct hns_nic_ops {
 };
 
 struct hns_nic_priv {
-	const struct device_node *ae_node;
+	const struct fwnode_handle      *fwnode;
 	u32 enet_ver;
 	u32 port_id;
 	int phy_mode;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 67a648c7d3a9..ab33487a5321 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -49,7 +49,7 @@ static u32 hns_nic_get_link(struct net_device *net_dev)
 	h = priv->ae_handle;
 
 	if (priv->phy) {
-		if (!genphy_update_link(priv->phy))
+		if (!genphy_read_status(priv->phy))
 			link_stat = priv->phy->link;
 		else
 			link_stat = 0;
@@ -165,13 +165,21 @@ static int hns_nic_get_settings(struct net_device *net_dev,
 		cmd->advertising |= ADVERTISED_10000baseKR_Full;
 	}
 
-	if (h->port_type == HNAE_PORT_SERVICE) {
+	switch (h->media_type) {
+	case HNAE_MEDIA_TYPE_FIBER:
 		cmd->port = PORT_FIBRE;
-		cmd->supported |= SUPPORTED_Pause;
-	} else {
+		break;
+	case HNAE_MEDIA_TYPE_COPPER:
 		cmd->port = PORT_TP;
+		break;
+	case HNAE_MEDIA_TYPE_UNKNOWN:
+	default:
+		break;
 	}
 
+	if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG))
+		cmd->supported |= SUPPORTED_Pause;
+
 	cmd->transceiver = XCVR_EXTERNAL;
 	cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22);
 	hns_get_mdix_mode(net_dev, cmd);
@@ -242,6 +250,7 @@ static const char hns_nic_test_strs[][ETH_GSTRING_LEN] = {
 static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 {
 #define COPPER_CONTROL_REG 0
+#define PHY_POWER_DOWN BIT(11)
 #define PHY_LOOP_BACK BIT(14)
 	u16 val = 0;
 
@@ -252,33 +261,40 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 		/* speed : 1000M */
 		phy_write(phy_dev, HNS_PHY_PAGE_REG, 2);
 		phy_write(phy_dev, 21, 0x1046);
+
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 		/* Force Master */
 		phy_write(phy_dev, 9, 0x1F00);
+
 		/* Soft-reset */
 		phy_write(phy_dev, 0, 0x9140);
 		/* If autoneg disabled,two soft-reset operations */
 		phy_write(phy_dev, 0, 0x9140);
-		phy_write(phy_dev, 22, 0xFA);
+
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
 
 		/* Default is 0x0400 */
 		phy_write(phy_dev, 1, 0x418);
 
 		/* Force 1000M Link, Default is 0x0200 */
 		phy_write(phy_dev, 7, 0x20C);
-		phy_write(phy_dev, 22, 0);
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 
-		/* Enable MAC loop-back */
+		/* Enable PHY loop-back */
 		val = phy_read(phy_dev, COPPER_CONTROL_REG);
 		val |= PHY_LOOP_BACK;
+		val &= ~PHY_POWER_DOWN;
 		phy_write(phy_dev, COPPER_CONTROL_REG, val);
 	} else {
-		phy_write(phy_dev, 22, 0xFA);
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
 		phy_write(phy_dev, 1, 0x400);
 		phy_write(phy_dev, 7, 0x200);
-		phy_write(phy_dev, 22, 0);
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
+		phy_write(phy_dev, 9, 0xF00);
 
 		val = phy_read(phy_dev, COPPER_CONTROL_REG);
 		val &= ~PHY_LOOP_BACK;
+		val |= PHY_POWER_DOWN;
 		phy_write(phy_dev, COPPER_CONTROL_REG, val);
 	}
 	return 0;
@@ -339,28 +355,16 @@ static int __lb_up(struct net_device *ndev,
 
 	hns_nic_net_reset(ndev);
 
-	if (priv->phy) {
-		phy_disconnect(priv->phy);
-		msleep(100);
-
-		ret = hns_nic_init_phy(ndev, h);
-		if (ret)
-			return ret;
-	}
-
 	ret = __lb_setup(ndev, loop_mode);
 	if (ret)
 		return ret;
 
-	msleep(100);
+	msleep(200);
 
 	ret = h->dev->ops->start ? h->dev->ops->start(h) : 0;
 	if (ret)
 		return ret;
 
-	if (priv->phy)
-		phy_start(priv->phy);
-
 	/* link adjust duplex*/
 	if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII)
 		speed = 1000;
@@ -561,9 +565,6 @@ static int __lb_down(struct net_device *ndev)
 			   __func__,
 			   ret);
 
-	if (priv->phy)
-		phy_stop(priv->phy);
-
 	if (h->dev->ops->stop)
 		h->dev->ops->stop(h);
 
@@ -596,7 +597,7 @@ static void hns_nic_self_test(struct net_device *ndev,
 	st_param[1][0] = MAC_INTERNALLOOP_SERDES;
 	st_param[1][1] = 1; /*serdes must exist*/
 	st_param[2][0] = MAC_INTERNALLOOP_PHY; /* only supporte phy node*/
-	st_param[2][1] = ((!!(priv->ae_handle->phy_node)) &&
+	st_param[2][1] = ((!!(priv->ae_handle->phy_dev)) &&
 		(priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII));
 
 	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
@@ -758,6 +759,16 @@ static int hns_get_coalesce(struct net_device *net_dev,
 		&ec->tx_max_coalesced_frames,
 		&ec->rx_max_coalesced_frames);
 
+	ops->get_coalesce_range(priv->ae_handle,
+				&ec->tx_max_coalesced_frames_low,
+				&ec->rx_max_coalesced_frames_low,
+				&ec->tx_max_coalesced_frames_high,
+				&ec->rx_max_coalesced_frames_high,
+				&ec->tx_coalesce_usecs_low,
+				&ec->rx_coalesce_usecs_low,
+				&ec->tx_coalesce_usecs_high,
+				&ec->rx_coalesce_usecs_high);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 765ddb3dcd1a..33f4c483af0f 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -7,6 +7,7 @@
  * (at your option) any later version.
  */
 
+#include <linux/acpi.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/init.h>
@@ -36,9 +37,19 @@
 
 #define MDIO_TIMEOUT			1000000
 
+struct hns_mdio_sc_reg {
+	u16 mdio_clk_en;
+	u16 mdio_clk_dis;
+	u16 mdio_reset_req;
+	u16 mdio_reset_dreq;
+	u16 mdio_clk_st;
+	u16 mdio_reset_st;
+};
+
 struct hns_mdio_device {
 	void *vbase;		/* mdio reg base address */
 	struct regmap *subctrl_vbase;
+	struct hns_mdio_sc_reg sc_reg;
 };
 
 /* mdio reg */
@@ -92,7 +103,6 @@ enum mdio_c45_op_seq {
 #define MDIO_SC_CLK_DIS		0x33C
 #define MDIO_SC_RESET_REQ	0xA38
 #define MDIO_SC_RESET_DREQ	0xA3C
-#define MDIO_SC_CTRL		0x2010
 #define MDIO_SC_CLK_ST		0x531C
 #define MDIO_SC_RESET_ST	0x5A1C
 
@@ -352,68 +362,67 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 static int hns_mdio_reset(struct mii_bus *bus)
 {
 	struct hns_mdio_device *mdio_dev = (struct hns_mdio_device *)bus->priv;
+	const struct hns_mdio_sc_reg *sc_reg;
 	int ret;
 
-	if (!mdio_dev->subctrl_vbase) {
-		dev_err(&bus->dev, "mdio sys ctl reg has not maped\n");
-		return -ENODEV;
-	}
-
-	/*1. reset req, and read reset st check*/
-	ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_RESET_REQ, 0x1,
-				    MDIO_SC_RESET_ST, 0x1,
-				    MDIO_CHECK_SET_ST);
-	if (ret) {
-		dev_err(&bus->dev, "MDIO reset fail\n");
-		return ret;
-	}
+	if (dev_of_node(bus->parent)) {
+		if (!mdio_dev->subctrl_vbase) {
+			dev_err(&bus->dev, "mdio sys ctl reg has not maped\n");
+			return -ENODEV;
+		}
 
-	/*2. dis clk, and read clk st check*/
-	ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_CLK_DIS,
-				    0x1, MDIO_SC_CLK_ST, 0x1,
-				    MDIO_CHECK_CLR_ST);
-	if (ret) {
-		dev_err(&bus->dev, "MDIO dis clk fail\n");
-		return ret;
-	}
+		sc_reg = &mdio_dev->sc_reg;
+		/* 1. reset req, and read reset st check */
+		ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_reset_req,
+					    0x1, sc_reg->mdio_reset_st, 0x1,
+					    MDIO_CHECK_SET_ST);
+		if (ret) {
+			dev_err(&bus->dev, "MDIO reset fail\n");
+			return ret;
+		}
 
-	/*3. reset dreq, and read reset st check*/
-	ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_RESET_DREQ, 0x1,
-				    MDIO_SC_RESET_ST, 0x1,
-				    MDIO_CHECK_CLR_ST);
-	if (ret) {
-		dev_err(&bus->dev, "MDIO dis clk fail\n");
-		return ret;
-	}
+		/* 2. dis clk, and read clk st check */
+		ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_clk_dis,
+					    0x1, sc_reg->mdio_clk_st, 0x1,
+					    MDIO_CHECK_CLR_ST);
+		if (ret) {
+			dev_err(&bus->dev, "MDIO dis clk fail\n");
+			return ret;
+		}
 
-	/*4. en clk, and read clk st check*/
-	ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_CLK_EN,
-				    0x1, MDIO_SC_CLK_ST, 0x1,
-				    MDIO_CHECK_SET_ST);
-	if (ret)
-		dev_err(&bus->dev, "MDIO en clk fail\n");
+		/* 3. reset dreq, and read reset st check */
+		ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_reset_dreq,
+					    0x1, sc_reg->mdio_reset_st, 0x1,
+					    MDIO_CHECK_CLR_ST);
+		if (ret) {
+			dev_err(&bus->dev, "MDIO dis clk fail\n");
+			return ret;
+		}
 
+		/* 4. en clk, and read clk st check */
+		ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_clk_en,
+					    0x1, sc_reg->mdio_clk_st, 0x1,
+					    MDIO_CHECK_SET_ST);
+		if (ret)
+			dev_err(&bus->dev, "MDIO en clk fail\n");
+	} else if (is_acpi_node(bus->parent->fwnode)) {
+		acpi_status s;
+
+		s = acpi_evaluate_object(ACPI_HANDLE(bus->parent),
+					 "_RST", NULL, NULL);
+		if (ACPI_FAILURE(s)) {
+			dev_err(&bus->dev, "Reset failed, return:%#x\n", s);
+			ret = -EBUSY;
+		} else {
+			ret = 0;
+		}
+	} else {
+		dev_err(&bus->dev, "Can not get cfg data from DT or ACPI\n");
+		ret = -ENXIO;
+	}
 	return ret;
 }
 
-/**
- * hns_mdio_bus_name - get mdio bus name
- * @name: mdio bus name
- * @np: mdio device node pointer
- */
-static void hns_mdio_bus_name(char *name, struct device_node *np)
-{
-	const u32 *addr;
-	u64 taddr = OF_BAD_ADDR;
-
-	addr = of_get_address(np, 0, NULL, NULL);
-	if (addr)
-		taddr = of_translate_address(np, addr);
-
-	snprintf(name, MII_BUS_ID_SIZE, "%s@%llx", np->name,
-		 (unsigned long long)taddr);
-}
-
 /**
  * hns_mdio_probe - probe mdio device
  * @pdev: mdio platform device
@@ -422,17 +431,16 @@ static void hns_mdio_bus_name(char *name, struct device_node *np)
  */
 static int hns_mdio_probe(struct platform_device *pdev)
 {
-	struct device_node *np;
 	struct hns_mdio_device *mdio_dev;
 	struct mii_bus *new_bus;
 	struct resource *res;
-	int ret;
+	int ret = -ENODEV;
 
 	if (!pdev) {
 		dev_err(NULL, "pdev is NULL!\r\n");
 		return -ENODEV;
 	}
-	np = pdev->dev.of_node;
+
 	mdio_dev = devm_kzalloc(&pdev->dev, sizeof(*mdio_dev), GFP_KERNEL);
 	if (!mdio_dev)
 		return -ENOMEM;
@@ -448,7 +456,7 @@ static int hns_mdio_probe(struct platform_device *pdev)
 	new_bus->write = hns_mdio_write;
 	new_bus->reset = hns_mdio_reset;
 	new_bus->priv = mdio_dev;
-	hns_mdio_bus_name(new_bus->id, np);
+	new_bus->parent = &pdev->dev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mdio_dev->vbase = devm_ioremap_resource(&pdev->dev, res);
@@ -457,16 +465,73 @@ static int hns_mdio_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	mdio_dev->subctrl_vbase =
-		syscon_node_to_regmap(of_parse_phandle(np, "subctrl-vbase", 0));
-	if (IS_ERR(mdio_dev->subctrl_vbase)) {
-		dev_warn(&pdev->dev, "no syscon hisilicon,peri-c-subctrl\n");
-		mdio_dev->subctrl_vbase = NULL;
-	}
-	new_bus->parent = &pdev->dev;
 	platform_set_drvdata(pdev, new_bus);
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%s", "Mii",
+		 dev_name(&pdev->dev));
+	if (dev_of_node(&pdev->dev)) {
+		struct of_phandle_args reg_args;
+
+		ret = of_parse_phandle_with_fixed_args(pdev->dev.of_node,
+						       "subctrl-vbase",
+						       4,
+						       0,
+						       &reg_args);
+		if (!ret) {
+			mdio_dev->subctrl_vbase =
+				syscon_node_to_regmap(reg_args.np);
+			if (IS_ERR(mdio_dev->subctrl_vbase)) {
+				dev_warn(&pdev->dev, "syscon_node_to_regmap error\n");
+				mdio_dev->subctrl_vbase = NULL;
+			} else {
+				if (reg_args.args_count == 4) {
+					mdio_dev->sc_reg.mdio_clk_en =
+						(u16)reg_args.args[0];
+					mdio_dev->sc_reg.mdio_clk_dis =
+						(u16)reg_args.args[0] + 4;
+					mdio_dev->sc_reg.mdio_reset_req =
+						(u16)reg_args.args[1];
+					mdio_dev->sc_reg.mdio_reset_dreq =
+						(u16)reg_args.args[1] + 4;
+					mdio_dev->sc_reg.mdio_clk_st =
+						(u16)reg_args.args[2];
+					mdio_dev->sc_reg.mdio_reset_st =
+						(u16)reg_args.args[3];
+				} else {
+					/* for compatible */
+					mdio_dev->sc_reg.mdio_clk_en =
+						MDIO_SC_CLK_EN;
+					mdio_dev->sc_reg.mdio_clk_dis =
+						MDIO_SC_CLK_DIS;
+					mdio_dev->sc_reg.mdio_reset_req =
+						MDIO_SC_RESET_REQ;
+					mdio_dev->sc_reg.mdio_reset_dreq =
+						MDIO_SC_RESET_DREQ;
+					mdio_dev->sc_reg.mdio_clk_st =
+						MDIO_SC_CLK_ST;
+					mdio_dev->sc_reg.mdio_reset_st =
+						MDIO_SC_RESET_ST;
+				}
+			}
+		} else {
+			dev_warn(&pdev->dev, "find syscon ret = %#x\n", ret);
+			mdio_dev->subctrl_vbase = NULL;
+		}
+
+		ret = of_mdiobus_register(new_bus, pdev->dev.of_node);
+	} else if (is_acpi_node(pdev->dev.fwnode)) {
+		/* Clear all the IRQ properties */
+		memset(new_bus->irq, PHY_POLL, 4 * PHY_MAX_ADDR);
+
+		/* Mask out all PHYs from auto probing. */
+		new_bus->phy_mask = ~0;
+
+		/* Register the MDIO bus */
+		ret = mdiobus_register(new_bus);
+	} else {
+		dev_err(&pdev->dev, "Can not get cfg data from DT or ACPI\n");
+		ret = -ENXIO;
+	}
 
-	ret = of_mdiobus_register(new_bus, np);
 	if (ret) {
 		dev_err(&pdev->dev, "Cannot register as MDIO bus!\n");
 		platform_set_drvdata(pdev, NULL);
@@ -499,12 +564,19 @@ static const struct of_device_id hns_mdio_match[] = {
 	{}
 };
 
+static const struct acpi_device_id hns_mdio_acpi_match[] = {
+	{ "HISI0141", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, hns_mdio_acpi_match);
+
 static struct platform_driver hns_mdio_driver = {
 	.probe = hns_mdio_probe,
 	.remove = hns_mdio_remove,
 	.driver = {
 		   .name = MDIO_DRV_NAME,
 		   .of_match_table = hns_mdio_match,
+		   .acpi_match_table = ACPI_PTR(hns_mdio_acpi_match),
 		   },
 };
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 864cb21351a4..88f3c85fb04a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -75,6 +75,7 @@
 #include <linux/uaccess.h>
 #include <asm/firmware.h>
 #include <linux/seq_file.h>
+#include <linux/workqueue.h>
 
 #include "ibmvnic.h"
 
@@ -89,6 +90,7 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
 static int ibmvnic_remove(struct vio_dev *);
 static void release_sub_crqs(struct ibmvnic_adapter *);
+static void release_sub_crqs_no_irqs(struct ibmvnic_adapter *);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -469,7 +471,8 @@ static int ibmvnic_open(struct net_device *netdev)
 	crq.logical_link_state.link_state = IBMVNIC_LOGICAL_LNK_UP;
 	ibmvnic_send_crq(adapter, &crq);
 
-	netif_start_queue(netdev);
+	netif_tx_start_all_queues(netdev);
+
 	return 0;
 
 bounce_map_failed:
@@ -519,7 +522,7 @@ static int ibmvnic_close(struct net_device *netdev)
 	for (i = 0; i < adapter->req_rx_queues; i++)
 		napi_disable(&adapter->napi[i]);
 
-	netif_stop_queue(netdev);
+	netif_tx_stop_all_queues(netdev);
 
 	if (adapter->bounce_buffer) {
 		if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
@@ -1212,12 +1215,6 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 		goto reg_failed;
 	}
 
-	scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
-	if (scrq->irq == NO_IRQ) {
-		dev_err(dev, "Error mapping irq\n");
-		goto map_irq_failed;
-	}
-
 	scrq->adapter = adapter;
 	scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
 	scrq->cur = 0;
@@ -1230,12 +1227,6 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 
 	return scrq;
 
-map_irq_failed:
-	do {
-		rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
-					adapter->vdev->unit_address,
-					scrq->crq_num);
-	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 reg_failed:
 	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
 			 DMA_BIDIRECTIONAL);
@@ -1256,6 +1247,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 			if (adapter->tx_scrq[i]) {
 				free_irq(adapter->tx_scrq[i]->irq,
 					 adapter->tx_scrq[i]);
+				irq_dispose_mapping(adapter->tx_scrq[i]->irq);
 				release_sub_crq_queue(adapter,
 						      adapter->tx_scrq[i]);
 			}
@@ -1267,6 +1259,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 			if (adapter->rx_scrq[i]) {
 				free_irq(adapter->rx_scrq[i]->irq,
 					 adapter->rx_scrq[i]);
+				irq_dispose_mapping(adapter->rx_scrq[i]->irq);
 				release_sub_crq_queue(adapter,
 						      adapter->rx_scrq[i]);
 			}
@@ -1276,6 +1269,29 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 	adapter->requested_caps = 0;
 }
 
+static void release_sub_crqs_no_irqs(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	if (adapter->tx_scrq) {
+		for (i = 0; i < adapter->req_tx_queues; i++)
+			if (adapter->tx_scrq[i])
+				release_sub_crq_queue(adapter,
+						      adapter->tx_scrq[i]);
+		adapter->tx_scrq = NULL;
+	}
+
+	if (adapter->rx_scrq) {
+		for (i = 0; i < adapter->req_rx_queues; i++)
+			if (adapter->rx_scrq[i])
+				release_sub_crq_queue(adapter,
+						      adapter->rx_scrq[i]);
+		adapter->rx_scrq = NULL;
+	}
+
+	adapter->requested_caps = 0;
+}
+
 static int disable_scrq_irq(struct ibmvnic_adapter *adapter,
 			    struct ibmvnic_sub_crq_queue *scrq)
 {
@@ -1395,6 +1411,66 @@ static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance)
 	return IRQ_HANDLED;
 }
 
+static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
+{
+	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_sub_crq_queue *scrq;
+	int i = 0, j = 0;
+	int rc = 0;
+
+	for (i = 0; i < adapter->req_tx_queues; i++) {
+		scrq = adapter->tx_scrq[i];
+		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
+
+		if (scrq->irq == NO_IRQ) {
+			rc = -EINVAL;
+			dev_err(dev, "Error mapping irq\n");
+			goto req_tx_irq_failed;
+		}
+
+		rc = request_irq(scrq->irq, ibmvnic_interrupt_tx,
+				 0, "ibmvnic_tx", scrq);
+
+		if (rc) {
+			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
+				scrq->irq, rc);
+			irq_dispose_mapping(scrq->irq);
+			goto req_rx_irq_failed;
+		}
+	}
+
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		scrq = adapter->rx_scrq[i];
+		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
+		if (scrq->irq == NO_IRQ) {
+			rc = -EINVAL;
+			dev_err(dev, "Error mapping irq\n");
+			goto req_rx_irq_failed;
+		}
+		rc = request_irq(scrq->irq, ibmvnic_interrupt_rx,
+				 0, "ibmvnic_rx", scrq);
+		if (rc) {
+			dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
+				scrq->irq, rc);
+			irq_dispose_mapping(scrq->irq);
+			goto req_rx_irq_failed;
+		}
+	}
+	return rc;
+
+req_rx_irq_failed:
+	for (j = 0; j < i; j++)
+		free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
+		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+	i = adapter->req_tx_queues;
+req_tx_irq_failed:
+	for (j = 0; j < i; j++)
+		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
+		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+	release_sub_crqs_no_irqs(adapter);
+	return rc;
+}
+
 static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 {
 	struct device *dev = &adapter->vdev->dev;
@@ -1403,8 +1479,7 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	union ibmvnic_crq crq;
 	int total_queues;
 	int more = 0;
-	int i, j;
-	int rc;
+	int i;
 
 	if (!retry) {
 		/* Sub-CRQ entries are 32 byte long */
@@ -1483,13 +1558,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	for (i = 0; i < adapter->req_tx_queues; i++) {
 		adapter->tx_scrq[i] = allqueues[i];
 		adapter->tx_scrq[i]->pool_index = i;
-		rc = request_irq(adapter->tx_scrq[i]->irq, ibmvnic_interrupt_tx,
-				 0, "ibmvnic_tx", adapter->tx_scrq[i]);
-		if (rc) {
-			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
-				adapter->tx_scrq[i]->irq, rc);
-			goto req_tx_irq_failed;
-		}
 	}
 
 	adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
@@ -1500,13 +1568,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
 		adapter->rx_scrq[i]->scrq_num = i;
-		rc = request_irq(adapter->rx_scrq[i]->irq, ibmvnic_interrupt_rx,
-				 0, "ibmvnic_rx", adapter->rx_scrq[i]);
-		if (rc) {
-			dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
-				adapter->rx_scrq[i]->irq, rc);
-			goto req_rx_irq_failed;
-		}
 	}
 
 	memset(&crq, 0, sizeof(crq));
@@ -1559,15 +1620,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 
 	return;
 
-req_rx_irq_failed:
-	for (j = 0; j < i; j++)
-		free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
-	i = adapter->req_tx_queues;
-req_tx_irq_failed:
-	for (j = 0; j < i; j++)
-		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
-	kfree(adapter->rx_scrq);
-	adapter->rx_scrq = NULL;
 rx_failed:
 	kfree(adapter->tx_scrq);
 	adapter->tx_scrq = NULL;
@@ -2121,7 +2173,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
 				  struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_error_buff *error_buff;
+	struct ibmvnic_error_buff *error_buff, *tmp;
 	unsigned long flags;
 	bool found = false;
 	int i;
@@ -2133,7 +2185,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
 	}
 
 	spin_lock_irqsave(&adapter->error_list_lock, flags);
-	list_for_each_entry(error_buff, &adapter->errors, list)
+	list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list)
 		if (error_buff->error_id == crq->request_error_rsp.error_id) {
 			found = true;
 			list_del(&error_buff->list);
@@ -2348,9 +2400,9 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
 			 *req_value,
 			 (long int)be32_to_cpu(crq->request_capability_rsp.
 					       number), name);
-		release_sub_crqs(adapter);
+		release_sub_crqs_no_irqs(adapter);
 		*req_value = be32_to_cpu(crq->request_capability_rsp.number);
-		complete(&adapter->init_done);
+		init_sub_crqs(adapter, 1);
 		return;
 	default:
 		dev_err(dev, "Error %d in request cap rsp\n",
@@ -2659,7 +2711,7 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
 
 out:
 	if (atomic_read(&adapter->running_cap_queries) == 0)
-		complete(&adapter->init_done);
+		init_sub_crqs(adapter, 0);
 		/* We're done querying the capabilities, initialize sub-crqs */
 }
 
@@ -3141,14 +3193,14 @@ static void handle_request_ras_comp_num_rsp(union ibmvnic_crq *crq,
 
 static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_inflight_cmd *inflight_cmd;
+	struct ibmvnic_inflight_cmd *inflight_cmd, *tmp1;
 	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_error_buff *error_buff;
+	struct ibmvnic_error_buff *error_buff, *tmp2;
 	unsigned long flags;
 	unsigned long flags2;
 
 	spin_lock_irqsave(&adapter->inflight_lock, flags);
-	list_for_each_entry(inflight_cmd, &adapter->inflight, list) {
+	list_for_each_entry_safe(inflight_cmd, tmp1, &adapter->inflight, list) {
 		switch (inflight_cmd->crq.generic.cmd) {
 		case LOGIN:
 			dma_unmap_single(dev, adapter->login_buf_token,
@@ -3165,8 +3217,8 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 			break;
 		case REQUEST_ERROR_INFO:
 			spin_lock_irqsave(&adapter->error_list_lock, flags2);
-			list_for_each_entry(error_buff, &adapter->errors,
-					    list) {
+			list_for_each_entry_safe(error_buff, tmp2,
+						 &adapter->errors, list) {
 				dma_unmap_single(dev, error_buff->dma,
 						 error_buff->len,
 						 DMA_FROM_DEVICE);
@@ -3202,8 +3254,8 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 			dev_info(dev, "Partner initialized\n");
 			/* Send back a response */
 			rc = ibmvnic_send_crq_init_complete(adapter);
-			if (rc == 0)
-				send_version_xchg(adapter);
+			if (!rc)
+				schedule_work(&adapter->vnic_crq_init);
 			else
 				dev_err(dev, "Can't send initrsp rc=%ld\n", rc);
 			break;
@@ -3555,8 +3607,63 @@ static const struct file_operations ibmvnic_dump_ops = {
 	.release        = single_release,
 };
 
+static void handle_crq_init_rsp(struct work_struct *work)
+{
+	struct ibmvnic_adapter *adapter = container_of(work,
+						       struct ibmvnic_adapter,
+						       vnic_crq_init);
+	struct device *dev = &adapter->vdev->dev;
+	struct net_device *netdev = adapter->netdev;
+	unsigned long timeout = msecs_to_jiffies(30000);
+	int rc;
+
+	send_version_xchg(adapter);
+	reinit_completion(&adapter->init_done);
+	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
+		dev_err(dev, "Passive init timeout\n");
+		goto task_failed;
+	}
+
+	do {
+		if (adapter->renegotiate) {
+			adapter->renegotiate = false;
+			release_sub_crqs_no_irqs(adapter);
+			send_cap_queries(adapter);
+
+			reinit_completion(&adapter->init_done);
+			if (!wait_for_completion_timeout(&adapter->init_done,
+							 timeout)) {
+				dev_err(dev, "Passive init timeout\n");
+				goto task_failed;
+			}
+		}
+	} while (adapter->renegotiate);
+	rc = init_sub_crq_irqs(adapter);
+
+	if (rc)
+		goto task_failed;
+
+	netdev->real_num_tx_queues = adapter->req_tx_queues;
+
+	rc = register_netdev(netdev);
+	if (rc) {
+		dev_err(dev,
+			"failed to register netdev rc=%d\n", rc);
+		goto register_failed;
+	}
+	dev_info(dev, "ibmvnic registered\n");
+
+	return;
+
+register_failed:
+	release_sub_crqs(adapter);
+task_failed:
+	dev_err(dev, "Passive initialization was not successful\n");
+}
+
 static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 {
+	unsigned long timeout = msecs_to_jiffies(30000);
 	struct ibmvnic_adapter *adapter;
 	struct net_device *netdev;
 	unsigned char *mac_addr_p;
@@ -3593,6 +3700,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	netdev->ethtool_ops = &ibmvnic_ethtool_ops;
 	SET_NETDEV_DEV(netdev, &dev->dev);
 
+	INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp);
+
 	spin_lock_init(&adapter->stats_lock);
 
 	rc = ibmvnic_init_crq_queue(adapter);
@@ -3635,30 +3744,26 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	ibmvnic_send_crq_init(adapter);
 
 	init_completion(&adapter->init_done);
-	wait_for_completion(&adapter->init_done);
+	if (!wait_for_completion_timeout(&adapter->init_done, timeout))
+		return 0;
 
 	do {
-		adapter->renegotiate = false;
-
-		init_sub_crqs(adapter, 0);
-		reinit_completion(&adapter->init_done);
-		wait_for_completion(&adapter->init_done);
-
 		if (adapter->renegotiate) {
-			release_sub_crqs(adapter);
+			adapter->renegotiate = false;
+			release_sub_crqs_no_irqs(adapter);
 			send_cap_queries(adapter);
 
 			reinit_completion(&adapter->init_done);
-			wait_for_completion(&adapter->init_done);
+			if (!wait_for_completion_timeout(&adapter->init_done,
+							 timeout))
+				return 0;
 		}
 	} while (adapter->renegotiate);
 
-	/* if init_sub_crqs is partially successful, retry */
-	while (!adapter->tx_scrq || !adapter->rx_scrq) {
-		init_sub_crqs(adapter, 1);
-
-		reinit_completion(&adapter->init_done);
-		wait_for_completion(&adapter->init_done);
+	rc = init_sub_crq_irqs(adapter);
+	if (rc) {
+		dev_err(&dev->dev, "failed to initialize sub crq irqs\n");
+		goto free_debugfs;
 	}
 
 	netdev->real_num_tx_queues = adapter->req_tx_queues;
@@ -3666,12 +3771,14 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	rc = register_netdev(netdev);
 	if (rc) {
 		dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
-		goto free_debugfs;
+		goto free_sub_crqs;
 	}
 	dev_info(&dev->dev, "ibmvnic registered\n");
 
 	return 0;
 
+free_sub_crqs:
+	release_sub_crqs(adapter);
 free_debugfs:
 	if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir))
 		debugfs_remove_recursive(adapter->debugfs_dir);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 0b66a506a4e4..e82898fd518e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1045,4 +1045,6 @@ struct ibmvnic_adapter {
 	u64 opt_rxba_entries_per_subcrq;
 	__be64 tx_rx_desc_req;
 	u8 map_id;
+
+	struct work_struct vnic_crq_init;
 };
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 714bd1014ddb..c0e17433f623 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -167,17 +167,6 @@ config IXGBE
 	  To compile this driver as a module, choose M here. The module
 	  will be called ixgbe.
 
-config IXGBE_VXLAN
-	bool "Virtual eXtensible Local Area Network Support"
-	default n
-	depends on IXGBE && VXLAN && !(IXGBE=y && VXLAN=m)
-	---help---
-	  This allows one to create VXLAN virtual interfaces that provide
-	  Layer 2 Networks over Layer 3 Networks. VXLAN is often used
-	  to tunnel virtual network infrastructure in virtualized environments.
-	  Say Y here if you want to use Virtual eXtensible Local Area Network
-	  (VXLAN) in the driver.
-
 config IXGBE_HWMON
 	bool "Intel(R) 10GbE PCI Express adapters HWMON support"
 	default y
@@ -236,27 +225,6 @@ config I40E
 	  To compile this driver as a module, choose M here. The module
 	  will be called i40e.
 
-config I40E_VXLAN
-	bool "Virtual eXtensible Local Area Network Support"
-	default n
-	depends on I40E && VXLAN && !(I40E=y && VXLAN=m)
-	---help---
-	  This allows one to create VXLAN virtual interfaces that provide
-	  Layer 2 Networks over Layer 3 Networks. VXLAN is often used
-	  to tunnel virtual network infrastructure in virtualized environments.
-	  Say Y here if you want to use Virtual eXtensible Local Area Network
-	  (VXLAN) in the driver.
-
-config I40E_GENEVE
-	bool "Generic Network Virtualization Encapsulation (GENEVE) Support"
-	depends on I40E && GENEVE && !(I40E=y && GENEVE=m)
-	default n
-	---help---
-	  This allows one to create GENEVE virtual interfaces that provide
-	  Layer 2 Networks over Layer 3 Networks. GENEVE is often used
-	  to tunnel virtual network infrastructure in virtualized environments.
-	  Say Y here if you want to use GENEVE in the driver.
-
 config I40E_DCB
 	bool "Data Center Bridging (DCB) Support"
 	default n
@@ -307,15 +275,4 @@ config FM10K
 	  To compile this driver as a module, choose M here. The module
 	  will be called fm10k.  MSI-X interrupt support is required
 
-config FM10K_VXLAN
-	bool "Virtual eXtensible Local Area Network Support"
-	default n
-	depends on FM10K && VXLAN && !(FM10K=y && VXLAN=m)
-	---help---
-	  This allows one to create VXLAN virtual interfaces that provide
-	  Layer 2 Networks over Layer 3 Networks. VXLAN is often used
-	  to tunnel virtual network infrastructure in virtualized environments.
-	  Say Y here if you want to use Virtual eXtensible Local Area Network
-	  (VXLAN) in the driver.
-
 endif # NET_VENDOR_INTEL
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 75e60897b7e7..41f32c0b341e 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -2789,7 +2789,7 @@ static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter)
 }
 
 /**
- * e1000e_vlan_strip_enable - helper to disable HW VLAN stripping
+ * e1000e_vlan_strip_disable - helper to disable HW VLAN stripping
  * @adapter: board private structure to initialize
  **/
 static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter)
@@ -4352,7 +4352,8 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
 
 			time_delta = systim_next - systim;
 			temp = time_delta;
-			rem = do_div(temp, incvalue);
+			/* VMWare users have seen incvalue of zero, don't div / 0 */
+			rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
 
 			systim = systim_next;
 
@@ -6915,6 +6916,14 @@ static netdev_features_t e1000_fix_features(struct net_device *netdev,
 	if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN))
 		features &= ~NETIF_F_RXFCS;
 
+	/* Since there is no support for separate Rx/Tx vlan accel
+	 * enable/disable make sure Tx flag is always in same state as Rx.
+	 */
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	else
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
 	return features;
 }
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index fcf106e545c5..c4cf08dcf5af 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -362,6 +362,7 @@ enum fm10k_state_t {
 	__FM10K_SERVICE_DISABLE,
 	__FM10K_MBX_LOCK,
 	__FM10K_LINK_DOWN,
+	__FM10K_UPDATING_STATS,
 };
 
 static inline void fm10k_mbx_lock(struct fm10k_intfc *interface)
@@ -406,7 +407,7 @@ static inline u16 fm10k_desc_unused(struct fm10k_ring *ring)
 	 (&(((union fm10k_rx_desc *)((R)->desc))[i]))
 
 #define FM10K_MAX_TXD_PWR	14
-#define FM10K_MAX_DATA_PER_TXD	BIT(FM10K_MAX_TXD_PWR)
+#define FM10K_MAX_DATA_PER_TXD	(1u << FM10K_MAX_TXD_PWR)
 
 /* Tx Descriptors needed, worst case */
 #define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), FM10K_MAX_DATA_PER_TXD)
@@ -457,6 +458,7 @@ __be16 fm10k_tx_encap_offload(struct sk_buff *skb);
 netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
 				  struct fm10k_ring *tx_ring);
 void fm10k_tx_timeout_reset(struct fm10k_intfc *interface);
+u64 fm10k_get_tx_pending(struct fm10k_ring *ring);
 bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring);
 void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count);
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index 5bbf19cfe29b..d6baaea8bc7c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -519,8 +519,12 @@ s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready)
 		goto out;
 
 	/* interface cannot receive traffic without logical ports */
-	if (mac->dglort_map == FM10K_DGLORTMAP_NONE)
+	if (mac->dglort_map == FM10K_DGLORTMAP_NONE) {
+		if (hw->mac.ops.request_lport_map)
+			ret_val = hw->mac.ops.request_lport_map(hw);
+
 		goto out;
+	}
 
 	/* if we passed all the tests above then the switch is ready and we no
 	 * longer need to check for link
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 9c0d87503977..c04cbe9c9f7c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -76,6 +76,8 @@ static const struct fm10k_stats fm10k_gstrings_global_stats[] = {
 	FM10K_STAT("mac_rules_used", hw.swapi.mac.used),
 	FM10K_STAT("mac_rules_avail", hw.swapi.mac.avail),
 
+	FM10K_STAT("reset_while_pending", hw.mac.reset_while_pending),
+
 	FM10K_STAT("tx_hang_count", tx_timeout_count),
 };
 
@@ -983,9 +985,10 @@ void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir)
 		/* generate a new table if we weren't given one */
 		for (j = 0; j < 4; j++) {
 			if (indir)
-				n = indir[i + j];
+				n = indir[4 * i + j];
 			else
-				n = ethtool_rxfh_indir_default(i + j, rss_i);
+				n = ethtool_rxfh_indir_default(4 * i + j,
+							       rss_i);
 
 			table[j] = n;
 		}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 0e166e9c90c8..e9767b6366a8 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -28,7 +28,7 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.19.3-k"
+#define DRV_VERSION	"0.21.2-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
@@ -56,7 +56,7 @@ static int __init fm10k_init_module(void)
 	pr_info("%s\n", fm10k_copyright);
 
 	/* create driver workqueue */
-	fm10k_workqueue = create_workqueue("fm10k");
+	fm10k_workqueue = alloc_workqueue("fm10k", WQ_MEM_RECLAIM, 0);
 
 	fm10k_dbg_init();
 
@@ -77,7 +77,6 @@ static void __exit fm10k_exit_module(void)
 	fm10k_dbg_exit();
 
 	/* destroy driver workqueue */
-	flush_workqueue(fm10k_workqueue);
 	destroy_workqueue(fm10k_workqueue);
 }
 module_exit(fm10k_exit_module);
@@ -272,7 +271,7 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer,
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = FM10K_RX_BUFSZ;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = ALIGN(size, 512);
 #endif
 	unsigned int pull_len;
 
@@ -1129,11 +1128,13 @@ static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
 	return ring->stats.packets;
 }
 
-static u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
+u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
 {
-	/* use SW head and tail until we have real hardware */
-	u32 head = ring->next_to_clean;
-	u32 tail = ring->next_to_use;
+	struct fm10k_intfc *interface = ring->q_vector->interface;
+	struct fm10k_hw *hw = &interface->hw;
+
+	u32 head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx));
+	u32 tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx));
 
 	return ((head <= tail) ? tail : tail + ring->count) - head;
 }
@@ -1857,7 +1858,7 @@ static int fm10k_init_msix_capability(struct fm10k_intfc *interface)
 	if (v_budget < 0) {
 		kfree(interface->msix_entries);
 		interface->msix_entries = NULL;
-		return -ENOMEM;
+		return v_budget;
 	}
 
 	/* record the number of queues available for q_vectors */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
index b7dbc8a84c05..35c1dbad1330 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
@@ -41,6 +41,8 @@ struct fm10k_mbx_info;
 #define FM10K_MBX_ACK_INTERRUPT			0x00000010
 #define FM10K_MBX_INTERRUPT_ENABLE		0x00000020
 #define FM10K_MBX_INTERRUPT_DISABLE		0x00000040
+#define FM10K_MBX_GLOBAL_REQ_INTERRUPT		0x00000200
+#define FM10K_MBX_GLOBAL_ACK_INTERRUPT		0x00000400
 #define FM10K_MBICR(_n)		((_n) + 0x18840)
 #define FM10K_GMBX		0x18842
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 2a08d3f5b6df..20a5bbe3f536 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -20,9 +20,7 @@
 
 #include "fm10k.h"
 #include <linux/vmalloc.h>
-#ifdef CONFIG_FM10K_VXLAN
-#include <net/vxlan.h>
-#endif /* CONFIG_FM10K_VXLAN */
+#include <net/udp_tunnel.h>
 
 /**
  * fm10k_setup_tx_resources - allocate Tx resources (Descriptors)
@@ -434,8 +432,7 @@ static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface)
 /**
  * fm10k_add_vxlan_port
  * @netdev: network interface device structure
- * @sa_family: Address family of new port
- * @port: port number used for VXLAN
+ * @ti: Tunnel endpoint information
  *
  * This function is called when a new VXLAN interface has added a new port
  * number to the range that is currently in use for VXLAN.  The new port
@@ -444,18 +441,21 @@ static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface)
  * is always used as the VXLAN port number for offloads.
  **/
 static void fm10k_add_vxlan_port(struct net_device *dev,
-				 sa_family_t sa_family, __be16 port) {
+				 struct udp_tunnel_info *ti)
+{
 	struct fm10k_intfc *interface = netdev_priv(dev);
 	struct fm10k_vxlan_port *vxlan_port;
 
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
 	/* only the PF supports configuring tunnels */
 	if (interface->hw.mac.type != fm10k_mac_pf)
 		return;
 
 	/* existing ports are pulled out so our new entry is always last */
 	fm10k_vxlan_port_for_each(vxlan_port, interface) {
-		if ((vxlan_port->port == port) &&
-		    (vxlan_port->sa_family == sa_family)) {
+		if ((vxlan_port->port == ti->port) &&
+		    (vxlan_port->sa_family == ti->sa_family)) {
 			list_del(&vxlan_port->list);
 			goto insert_tail;
 		}
@@ -465,8 +465,8 @@ static void fm10k_add_vxlan_port(struct net_device *dev,
 	vxlan_port = kmalloc(sizeof(*vxlan_port), GFP_ATOMIC);
 	if (!vxlan_port)
 		return;
-	vxlan_port->port = port;
-	vxlan_port->sa_family = sa_family;
+	vxlan_port->port = ti->port;
+	vxlan_port->sa_family = ti->sa_family;
 
 insert_tail:
 	/* add new port value to list */
@@ -478,8 +478,7 @@ insert_tail:
 /**
  * fm10k_del_vxlan_port
  * @netdev: network interface device structure
- * @sa_family: Address family of freed port
- * @port: port number used for VXLAN
+ * @ti: Tunnel endpoint information
  *
  * This function is called when a new VXLAN interface has freed a port
  * number from the range that is currently in use for VXLAN.  The freed
@@ -487,17 +486,20 @@ insert_tail:
  * the port number for offloads.
  **/
 static void fm10k_del_vxlan_port(struct net_device *dev,
-				 sa_family_t sa_family, __be16 port) {
+				 struct udp_tunnel_info *ti)
+{
 	struct fm10k_intfc *interface = netdev_priv(dev);
 	struct fm10k_vxlan_port *vxlan_port;
 
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
 	if (interface->hw.mac.type != fm10k_mac_pf)
 		return;
 
 	/* find the port in the list and free it */
 	fm10k_vxlan_port_for_each(vxlan_port, interface) {
-		if ((vxlan_port->port == port) &&
-		    (vxlan_port->sa_family == sa_family)) {
+		if ((vxlan_port->port == ti->port) &&
+		    (vxlan_port->sa_family == ti->sa_family)) {
 			list_del(&vxlan_port->list);
 			kfree(vxlan_port);
 			break;
@@ -553,10 +555,8 @@ int fm10k_open(struct net_device *netdev)
 	if (err)
 		goto err_set_queues;
 
-#ifdef CONFIG_FM10K_VXLAN
 	/* update VXLAN port configuration */
-	vxlan_get_rx_port(netdev);
-#endif
+	udp_tunnel_get_rx_info(netdev);
 
 	fm10k_up(interface);
 
@@ -1375,8 +1375,8 @@ static const struct net_device_ops fm10k_netdev_ops = {
 	.ndo_set_vf_vlan	= fm10k_ndo_set_vf_vlan,
 	.ndo_set_vf_rate	= fm10k_ndo_set_vf_bw,
 	.ndo_get_vf_config	= fm10k_ndo_get_vf_config,
-	.ndo_add_vxlan_port	= fm10k_add_vxlan_port,
-	.ndo_del_vxlan_port	= fm10k_del_vxlan_port,
+	.ndo_udp_tunnel_add	= fm10k_add_vxlan_port,
+	.ndo_udp_tunnel_del	= fm10k_del_vxlan_port,
 	.ndo_dfwd_add_station	= fm10k_dfwd_add_station,
 	.ndo_dfwd_del_station	= fm10k_dfwd_del_station,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index e05aca9bef0e..b8245c734c96 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -123,11 +123,24 @@ static void fm10k_service_timer(unsigned long data)
 static void fm10k_detach_subtask(struct fm10k_intfc *interface)
 {
 	struct net_device *netdev = interface->netdev;
+	u32 __iomem *hw_addr;
+	u32 value;
 
 	/* do nothing if device is still present or hw_addr is set */
 	if (netif_device_present(netdev) || interface->hw.hw_addr)
 		return;
 
+	/* check the real address space to see if we've recovered */
+	hw_addr = READ_ONCE(interface->uc_addr);
+	value = readl(hw_addr);
+	if ((~value)) {
+		interface->hw.hw_addr = interface->uc_addr;
+		netif_device_attach(netdev);
+		interface->flags |= FM10K_FLAG_RESET_REQUESTED;
+		netdev_warn(netdev, "PCIe link restored, device now attached\n");
+		return;
+	}
+
 	rtnl_lock();
 
 	if (netif_running(netdev))
@@ -136,11 +149,9 @@ static void fm10k_detach_subtask(struct fm10k_intfc *interface)
 	rtnl_unlock();
 }
 
-static void fm10k_reinit(struct fm10k_intfc *interface)
+static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)
 {
 	struct net_device *netdev = interface->netdev;
-	struct fm10k_hw *hw = &interface->hw;
-	int err;
 
 	WARN_ON(in_interrupt());
 
@@ -165,6 +176,19 @@ static void fm10k_reinit(struct fm10k_intfc *interface)
 	/* delay any future reset requests */
 	interface->last_reset = jiffies + (10 * HZ);
 
+	rtnl_unlock();
+}
+
+static int fm10k_handle_reset(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	int err;
+
+	rtnl_lock();
+
+	pci_set_master(interface->pdev);
+
 	/* reset and initialize the hardware so it is in a known state */
 	err = hw->mac.ops.reset_hw(hw);
 	if (err) {
@@ -185,7 +209,7 @@ static void fm10k_reinit(struct fm10k_intfc *interface)
 		goto reinit_err;
 	}
 
-	/* reassociate interrupts */
+	/* re-associate interrupts */
 	err = fm10k_mbx_request_irq(interface);
 	if (err)
 		goto err_mbx_irq;
@@ -219,7 +243,7 @@ static void fm10k_reinit(struct fm10k_intfc *interface)
 
 	clear_bit(__FM10K_RESETTING, &interface->state);
 
-	return;
+	return err;
 err_open:
 	fm10k_mbx_free_irq(interface);
 err_mbx_irq:
@@ -230,6 +254,20 @@ reinit_err:
 	rtnl_unlock();
 
 	clear_bit(__FM10K_RESETTING, &interface->state);
+
+	return err;
+}
+
+static void fm10k_reinit(struct fm10k_intfc *interface)
+{
+	int err;
+
+	fm10k_prepare_for_reset(interface);
+
+	err = fm10k_handle_reset(interface);
+	if (err)
+		dev_err(&interface->pdev->dev,
+			"fm10k_handle_reset failed: %d\n", err);
 }
 
 static void fm10k_reset_subtask(struct fm10k_intfc *interface)
@@ -372,12 +410,19 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 	u64 bytes, pkts;
 	int i;
 
+	/* ensure only one thread updates stats at a time */
+	if (test_and_set_bit(__FM10K_UPDATING_STATS, &interface->state))
+		return;
+
 	/* do not allow stats update via service task for next second */
 	interface->next_stats_update = jiffies + HZ;
 
 	/* gather some stats to the interface struct that are per queue */
 	for (bytes = 0, pkts = 0, i = 0; i < interface->num_tx_queues; i++) {
-		struct fm10k_ring *tx_ring = interface->tx_ring[i];
+		struct fm10k_ring *tx_ring = READ_ONCE(interface->tx_ring[i]);
+
+		if (!tx_ring)
+			continue;
 
 		restart_queue += tx_ring->tx_stats.restart_queue;
 		tx_busy += tx_ring->tx_stats.tx_busy;
@@ -396,7 +441,10 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 
 	/* gather some stats to the interface struct that are per queue */
 	for (bytes = 0, pkts = 0, i = 0; i < interface->num_rx_queues; i++) {
-		struct fm10k_ring *rx_ring = interface->rx_ring[i];
+		struct fm10k_ring *rx_ring = READ_ONCE(interface->rx_ring[i]);
+
+		if (!rx_ring)
+			continue;
 
 		bytes += rx_ring->stats.bytes;
 		pkts += rx_ring->stats.packets;
@@ -443,6 +491,8 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 	/* Fill out the OS statistics structure */
 	net_stats->rx_errors = rx_errors;
 	net_stats->rx_dropped = interface->stats.nodesc_drop.count;
+
+	clear_bit(__FM10K_UPDATING_STATS, &interface->state);
 }
 
 /**
@@ -1566,6 +1616,9 @@ void fm10k_up(struct fm10k_intfc *interface)
 	/* configure interrupts */
 	hw->mac.ops.update_int_moderator(hw);
 
+	/* enable statistics capture again */
+	clear_bit(__FM10K_UPDATING_STATS, &interface->state);
+
 	/* clear down bit to indicate we are ready to go */
 	clear_bit(__FM10K_DOWN, &interface->state);
 
@@ -1598,10 +1651,11 @@ void fm10k_down(struct fm10k_intfc *interface)
 {
 	struct net_device *netdev = interface->netdev;
 	struct fm10k_hw *hw = &interface->hw;
-	int err;
+	int err, i = 0, count = 0;
 
 	/* signal that we are down to the interrupt handler and service task */
-	set_bit(__FM10K_DOWN, &interface->state);
+	if (test_and_set_bit(__FM10K_DOWN, &interface->state))
+		return;
 
 	/* call carrier off first to avoid false dev_watchdog timeouts */
 	netif_carrier_off(netdev);
@@ -1613,18 +1667,57 @@ void fm10k_down(struct fm10k_intfc *interface)
 	/* reset Rx filters */
 	fm10k_reset_rx_state(interface);
 
-	/* allow 10ms for device to quiesce */
-	usleep_range(10000, 20000);
-
 	/* disable polling routines */
 	fm10k_napi_disable_all(interface);
 
 	/* capture stats one last time before stopping interface */
 	fm10k_update_stats(interface);
 
+	/* prevent updating statistics while we're down */
+	while (test_and_set_bit(__FM10K_UPDATING_STATS, &interface->state))
+		usleep_range(1000, 2000);
+
+	/* skip waiting for TX DMA if we lost PCIe link */
+	if (FM10K_REMOVED(hw->hw_addr))
+		goto skip_tx_dma_drain;
+
+	/* In some rare circumstances it can take a while for Tx queues to
+	 * quiesce and be fully disabled. Attempt to .stop_hw() first, and
+	 * then if we get ERR_REQUESTS_PENDING, go ahead and wait in a loop
+	 * until the Tx queues have emptied, or until a number of retries. If
+	 * we fail to clear within the retry loop, we will issue a warning
+	 * indicating that Tx DMA is probably hung. Note this means we call
+	 * .stop_hw() twice but this shouldn't cause any problems.
+	 */
+	err = hw->mac.ops.stop_hw(hw);
+	if (err != FM10K_ERR_REQUESTS_PENDING)
+		goto skip_tx_dma_drain;
+
+#define TX_DMA_DRAIN_RETRIES 25
+	for (count = 0; count < TX_DMA_DRAIN_RETRIES; count++) {
+		usleep_range(10000, 20000);
+
+		/* start checking at the last ring to have pending Tx */
+		for (; i < interface->num_tx_queues; i++)
+			if (fm10k_get_tx_pending(interface->tx_ring[i]))
+				break;
+
+		/* if all the queues are drained, we can break now */
+		if (i == interface->num_tx_queues)
+			break;
+	}
+
+	if (count >= TX_DMA_DRAIN_RETRIES)
+		dev_err(&interface->pdev->dev,
+			"Tx queues failed to drain after %d tries. Tx DMA is probably hung.\n",
+			count);
+skip_tx_dma_drain:
 	/* Disable DMA engine for Tx/Rx */
 	err = hw->mac.ops.stop_hw(hw);
-	if (err)
+	if (err == FM10K_ERR_REQUESTS_PENDING)
+		dev_err(&interface->pdev->dev,
+			"due to pending requests hw was not shut down gracefully\n");
+	else if (err)
 		dev_err(&interface->pdev->dev, "stop_hw failed: %d\n", err);
 
 	/* free any buffers still on the rings */
@@ -1750,6 +1843,7 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
 
 	/* Start off interface as being down */
 	set_bit(__FM10K_DOWN, &interface->state);
+	set_bit(__FM10K_UPDATING_STATS, &interface->state);
 
 	return 0;
 }
@@ -2033,6 +2127,48 @@ static void fm10k_remove(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 }
 
+static void fm10k_prepare_suspend(struct fm10k_intfc *interface)
+{
+	/* the watchdog task reads from registers, which might appear like
+	 * a surprise remove if the PCIe device is disabled while we're
+	 * stopped. We stop the watchdog task until after we resume software
+	 * activity.
+	 */
+	set_bit(__FM10K_SERVICE_DISABLE, &interface->state);
+	cancel_work_sync(&interface->service_task);
+
+	fm10k_prepare_for_reset(interface);
+}
+
+static int fm10k_handle_resume(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	int err;
+
+	/* reset statistics starting values */
+	hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
+
+	err = fm10k_handle_reset(interface);
+	if (err)
+		return err;
+
+	/* assume host is not ready, to prevent race with watchdog in case we
+	 * actually don't have connection to the switch
+	 */
+	interface->host_ready = false;
+	fm10k_watchdog_host_not_ready(interface);
+
+	/* force link to stay down for a second to prevent link flutter */
+	interface->link_down_event = jiffies + (HZ);
+	set_bit(__FM10K_LINK_DOWN, &interface->state);
+
+	/* clear the service task disable bit to allow service task to start */
+	clear_bit(__FM10K_SERVICE_DISABLE, &interface->state);
+	fm10k_service_event_schedule(interface);
+
+	return err;
+}
+
 #ifdef CONFIG_PM
 /**
  * fm10k_resume - Restore device to pre-sleep state
@@ -2069,60 +2205,13 @@ static int fm10k_resume(struct pci_dev *pdev)
 	/* refresh hw_addr in case it was dropped */
 	hw->hw_addr = interface->uc_addr;
 
-	/* reset hardware to known state */
-	err = hw->mac.ops.init_hw(&interface->hw);
-	if (err) {
-		dev_err(&pdev->dev, "init_hw failed: %d\n", err);
-		return err;
-	}
-
-	/* reset statistics starting values */
-	hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
-
-	rtnl_lock();
-
-	err = fm10k_init_queueing_scheme(interface);
-	if (err)
-		goto err_queueing_scheme;
-
-	err = fm10k_mbx_request_irq(interface);
-	if (err)
-		goto err_mbx_irq;
-
-	err = fm10k_hw_ready(interface);
+	err = fm10k_handle_resume(interface);
 	if (err)
-		goto err_open;
-
-	err = netif_running(netdev) ? fm10k_open(netdev) : 0;
-	if (err)
-		goto err_open;
-
-	rtnl_unlock();
-
-	/* assume host is not ready, to prevent race with watchdog in case we
-	 * actually don't have connection to the switch
-	 */
-	interface->host_ready = false;
-	fm10k_watchdog_host_not_ready(interface);
-
-	/* clear the service task disable bit to allow service task to start */
-	clear_bit(__FM10K_SERVICE_DISABLE, &interface->state);
-	fm10k_service_event_schedule(interface);
-
-	/* restore SR-IOV interface */
-	fm10k_iov_resume(pdev);
+		return err;
 
 	netif_device_attach(netdev);
 
 	return 0;
-err_open:
-	fm10k_mbx_free_irq(interface);
-err_mbx_irq:
-	fm10k_clear_queueing_scheme(interface);
-err_queueing_scheme:
-	rtnl_unlock();
-
-	return err;
 }
 
 /**
@@ -2142,27 +2231,7 @@ static int fm10k_suspend(struct pci_dev *pdev,
 
 	netif_device_detach(netdev);
 
-	fm10k_iov_suspend(pdev);
-
-	/* the watchdog tasks may read registers, which will appear like a
-	 * surprise-remove event once the PCI device is disabled. This will
-	 * cause us to close the netdevice, so we don't retain the open/closed
-	 * state post-resume. Prevent this by disabling the service task while
-	 * suspended, until we actually resume.
-	 */
-	set_bit(__FM10K_SERVICE_DISABLE, &interface->state);
-	cancel_work_sync(&interface->service_task);
-
-	rtnl_lock();
-
-	if (netif_running(netdev))
-		fm10k_close(netdev);
-
-	fm10k_mbx_free_irq(interface);
-
-	fm10k_clear_queueing_scheme(interface);
-
-	rtnl_unlock();
+	fm10k_prepare_suspend(interface);
 
 	err = pci_save_state(pdev);
 	if (err)
@@ -2195,17 +2264,7 @@ static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev,
 	if (state == pci_channel_io_perm_failure)
 		return PCI_ERS_RESULT_DISCONNECT;
 
-	rtnl_lock();
-
-	if (netif_running(netdev))
-		fm10k_close(netdev);
-
-	fm10k_mbx_free_irq(interface);
-
-	/* free interrupts */
-	fm10k_clear_queueing_scheme(interface);
-
-	rtnl_unlock();
+	fm10k_prepare_suspend(interface);
 
 	/* Request a slot reset. */
 	return PCI_ERS_RESULT_NEED_RESET;
@@ -2219,7 +2278,6 @@ static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev,
  */
 static pci_ers_result_t fm10k_io_slot_reset(struct pci_dev *pdev)
 {
-	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
 	pci_ers_result_t result;
 
 	if (pci_enable_device_mem(pdev)) {
@@ -2237,12 +2295,6 @@ static pci_ers_result_t fm10k_io_slot_reset(struct pci_dev *pdev)
 
 		pci_wake_from_d3(pdev, false);
 
-		/* refresh hw_addr in case it was dropped */
-		interface->hw.hw_addr = interface->uc_addr;
-
-		interface->flags |= FM10K_FLAG_RESET_REQUESTED;
-		fm10k_service_event_schedule(interface);
-
 		result = PCI_ERS_RESULT_RECOVERED;
 	}
 
@@ -2262,50 +2314,54 @@ static void fm10k_io_resume(struct pci_dev *pdev)
 {
 	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
 	struct net_device *netdev = interface->netdev;
-	struct fm10k_hw *hw = &interface->hw;
-	int err = 0;
-
-	/* reset hardware to known state */
-	err = hw->mac.ops.init_hw(&interface->hw);
-	if (err) {
-		dev_err(&pdev->dev, "init_hw failed: %d\n", err);
-		return;
-	}
-
-	/* reset statistics starting values */
-	hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
-
-	rtnl_lock();
+	int err;
 
-	err = fm10k_init_queueing_scheme(interface);
-	if (err) {
-		dev_err(&interface->pdev->dev,
-			"init_queueing_scheme failed: %d\n", err);
-		goto unlock;
-	}
+	err = fm10k_handle_resume(interface);
 
-	/* reassociate interrupts */
-	fm10k_mbx_request_irq(interface);
+	if (err)
+		dev_warn(&pdev->dev,
+			 "fm10k_io_resume failed: %d\n", err);
+	else
+		netif_device_attach(netdev);
+}
 
-	rtnl_lock();
-	if (netif_running(netdev))
-		err = fm10k_open(netdev);
-	rtnl_unlock();
+/**
+ * fm10k_io_reset_notify - called when PCI function is reset
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the PCI function is reset such as from
+ * /sys/class/net/<enpX>/device/reset or similar. When prepare is true, it
+ * means we should prepare for a function reset. If prepare is false, it means
+ * the function reset just occurred.
+ */
+static void fm10k_io_reset_notify(struct pci_dev *pdev, bool prepare)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	int err = 0;
 
-	/* final check of hardware state before registering the interface */
-	err = err ? : fm10k_hw_ready(interface);
+	if (prepare) {
+		/* warn incase we have any active VF devices */
+		if (pci_num_vf(pdev))
+			dev_warn(&pdev->dev,
+				 "PCIe FLR may cause issues for any active VF devices\n");
 
-	if (!err)
-		netif_device_attach(netdev);
+		fm10k_prepare_suspend(interface);
+	} else {
+		err = fm10k_handle_resume(interface);
+	}
 
-unlock:
-	rtnl_unlock();
+	if (err) {
+		dev_warn(&pdev->dev,
+			 "fm10k_io_reset_notify failed: %d\n", err);
+		netif_device_detach(interface->netdev);
+	}
 }
 
 static const struct pci_error_handlers fm10k_err_handler = {
 	.error_detected = fm10k_io_error_detected,
 	.slot_reset = fm10k_io_slot_reset,
 	.resume = fm10k_io_resume,
+	.reset_notify = fm10k_io_reset_notify,
 };
 
 static struct pci_driver fm10k_driver = {
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index dc75507c9926..682299dd0ce4 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -51,34 +51,37 @@ static s32 fm10k_reset_hw_pf(struct fm10k_hw *hw)
 
 	/* shut down all rings */
 	err = fm10k_disable_queues_generic(hw, FM10K_MAX_QUEUES);
-	if (err)
+	if (err == FM10K_ERR_REQUESTS_PENDING) {
+		hw->mac.reset_while_pending++;
+		goto force_reset;
+	} else if (err) {
 		return err;
+	}
 
 	/* Verify that DMA is no longer active */
 	reg = fm10k_read_reg(hw, FM10K_DMA_CTRL);
 	if (reg & (FM10K_DMA_CTRL_TX_ACTIVE | FM10K_DMA_CTRL_RX_ACTIVE))
 		return FM10K_ERR_DMA_PENDING;
 
-	/* verify the switch is ready for reset */
-	reg = fm10k_read_reg(hw, FM10K_DMA_CTRL2);
-	if (!(reg & FM10K_DMA_CTRL2_SWITCH_READY))
-		goto out;
-
+force_reset:
 	/* Inititate data path reset */
-	reg |= FM10K_DMA_CTRL_DATAPATH_RESET;
+	reg = FM10K_DMA_CTRL_DATAPATH_RESET;
 	fm10k_write_reg(hw, FM10K_DMA_CTRL, reg);
 
 	/* Flush write and allow 100us for reset to complete */
 	fm10k_write_flush(hw);
 	udelay(FM10K_RESET_TIMEOUT);
 
+	/* Reset mailbox global interrupts */
+	reg = FM10K_MBX_GLOBAL_REQ_INTERRUPT | FM10K_MBX_GLOBAL_ACK_INTERRUPT;
+	fm10k_write_reg(hw, FM10K_GMBX, reg);
+
 	/* Verify we made it out of reset */
 	reg = fm10k_read_reg(hw, FM10K_IP);
 	if (!(reg & FM10K_IP_NOTINRESET))
-		err = FM10K_ERR_RESET_FAILED;
+		return FM10K_ERR_RESET_FAILED;
 
-out:
-	return err;
+	return 0;
 }
 
 /**
@@ -1619,25 +1622,15 @@ static s32 fm10k_request_lport_map_pf(struct fm10k_hw *hw)
  **/
 static s32 fm10k_get_host_state_pf(struct fm10k_hw *hw, bool *switch_ready)
 {
-	s32 ret_val = 0;
 	u32 dma_ctrl2;
 
 	/* verify the switch is ready for interaction */
 	dma_ctrl2 = fm10k_read_reg(hw, FM10K_DMA_CTRL2);
 	if (!(dma_ctrl2 & FM10K_DMA_CTRL2_SWITCH_READY))
-		goto out;
+		return 0;
 
 	/* retrieve generic host state info */
-	ret_val = fm10k_get_host_state_generic(hw, switch_ready);
-	if (ret_val)
-		goto out;
-
-	/* interface cannot receive traffic without logical ports */
-	if (hw->mac.dglort_map == FM10K_DGLORTMAP_NONE)
-		ret_val = fm10k_request_lport_map_pf(hw);
-
-out:
-	return ret_val;
+	return fm10k_get_host_state_generic(hw, switch_ready);
 }
 
 /* This structure defines the attibutes to be parsed below */
@@ -1813,6 +1806,7 @@ static const struct fm10k_mac_ops mac_ops_pf = {
 	.set_dma_mask		= fm10k_set_dma_mask_pf,
 	.get_fault		= fm10k_get_fault_pf,
 	.get_host_state		= fm10k_get_host_state_pf,
+	.request_lport_map	= fm10k_request_lport_map_pf,
 };
 
 static const struct fm10k_iov_ops iov_ops_pf = {
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index b8bc06183720..f4e75c498287 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -526,6 +526,7 @@ struct fm10k_mac_ops {
 	s32 (*stop_hw)(struct fm10k_hw *);
 	s32 (*get_bus_info)(struct fm10k_hw *);
 	s32 (*get_host_state)(struct fm10k_hw *, bool *);
+	s32 (*request_lport_map)(struct fm10k_hw *);
 	s32 (*update_vlan)(struct fm10k_hw *, u32, u8, bool);
 	s32 (*read_mac_addr)(struct fm10k_hw *);
 	s32 (*update_uc_addr)(struct fm10k_hw *, u16, const u8 *,
@@ -562,6 +563,7 @@ struct fm10k_mac_info {
 	bool tx_ready;
 	u32 dglort_map;
 	u8 itr_scale;
+	u64 reset_while_pending;
 };
 
 struct fm10k_swapi_table_info {
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
index 3b06685ea63b..337ba65a9411 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
@@ -34,7 +34,7 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw)
 
 	/* we need to disable the queues before taking further steps */
 	err = fm10k_stop_hw_generic(hw);
-	if (err)
+	if (err && err != FM10K_ERR_REQUESTS_PENDING)
 		return err;
 
 	/* If permanent address is set then we need to restore it */
@@ -67,7 +67,7 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw)
 		fm10k_write_reg(hw, FM10K_TDLEN(i), tdlen);
 	}
 
-	return 0;
+	return err;
 }
 
 /**
@@ -83,7 +83,9 @@ static s32 fm10k_reset_hw_vf(struct fm10k_hw *hw)
 
 	/* shut down queues we own and reset DMA configuration */
 	err = fm10k_stop_hw_vf(hw);
-	if (err)
+	if (err == FM10K_ERR_REQUESTS_PENDING)
+		hw->mac.reset_while_pending++;
+	else if (err)
 		return err;
 
 	/* Inititate VF reset */
@@ -96,9 +98,9 @@ static s32 fm10k_reset_hw_vf(struct fm10k_hw *hw)
 	/* Clear reset bit and verify it was cleared */
 	fm10k_write_reg(hw, FM10K_VFCTRL, 0);
 	if (fm10k_read_reg(hw, FM10K_VFCTRL) & FM10K_VFCTRL_RST)
-		err = FM10K_ERR_RESET_FAILED;
+		return FM10K_ERR_RESET_FAILED;
 
-	return err;
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 9c44739da5e2..2a882916b4f6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -283,6 +283,7 @@ struct i40e_pf {
 #endif /* I40E_FCOE */
 	u16 num_lan_qps;           /* num lan queues this PF has set up */
 	u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
+	u16 num_fdsb_msix;         /* num queue vectors for sideband Fdir */
 	u16 num_iwarp_msix;        /* num of iwarp vectors for this PF */
 	int iwarp_base_vector;
 	int queues_left;           /* queues left unclaimed */
@@ -447,6 +448,14 @@ struct i40e_pf {
 	u16 phy_led_val;
 };
 
+enum i40e_filter_state {
+	I40E_FILTER_INVALID = 0,	/* Invalid state */
+	I40E_FILTER_NEW,		/* New, not sent to FW yet */
+	I40E_FILTER_ACTIVE,		/* Added to switch by FW */
+	I40E_FILTER_FAILED,		/* Rejected by FW */
+	I40E_FILTER_REMOVE,		/* To be removed */
+/* There is no 'removed' state; the filter struct is freed */
+};
 struct i40e_mac_filter {
 	struct list_head list;
 	u8 macaddr[ETH_ALEN];
@@ -455,8 +464,7 @@ struct i40e_mac_filter {
 	u8 counter;		/* number of instances of this filter */
 	bool is_vf;		/* filter belongs to a VF */
 	bool is_netdev;		/* filter belongs to a netdev */
-	bool changed;		/* filter needs to be sync'd to the HW */
-	bool is_laa;		/* filter is a Locally Administered Address */
+	enum i40e_filter_state state;
 };
 
 struct i40e_veb {
@@ -522,6 +530,9 @@ struct i40e_vsi {
 	struct i40e_ring **rx_rings;
 	struct i40e_ring **tx_rings;
 
+	u32  active_filters;
+	u32  promisc_threshold;
+
 	u16 work_limit;
 	u16 int_rate_limit;  /* value in usecs */
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 0e6ac841321c..e1370c556a3c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -980,13 +980,13 @@ int i40e_unregister_client(struct i40e_client *client)
 	 * a close for each of the client instances that were opened.
 	 * client_release function is called to handle this.
 	 */
+	mutex_lock(&i40e_client_mutex);
 	if (!client || i40e_client_release(client)) {
 		ret = -EIO;
 		goto out;
 	}
 
 	/* TODO: check if device is in reset, or if that matters? */
-	mutex_lock(&i40e_client_mutex);
 	if (!i40e_client_is_registered(client)) {
 		pr_info("i40e: Client %s has not been registered\n",
 			client->name);
@@ -1005,8 +1005,8 @@ int i40e_unregister_client(struct i40e_client *client)
 		       client->name);
 	}
 
-	mutex_unlock(&i40e_client_mutex);
 out:
+	mutex_unlock(&i40e_client_mutex);
 	return ret;
 }
 EXPORT_SYMBOL(i40e_unregister_client);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 422b41d61c9a..2154a34c1dd8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -61,7 +61,6 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw)
 		case I40E_DEV_ID_1G_BASE_T_X722:
 		case I40E_DEV_ID_10G_BASE_T_X722:
 		case I40E_DEV_ID_SFP_I_X722:
-		case I40E_DEV_ID_QSFP_I_X722:
 			hw->mac.type = I40E_MAC_X722;
 			break;
 		default:
@@ -297,13 +296,15 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
 		   void *buffer, u16 buf_len)
 {
 	struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
-	u16 len = le16_to_cpu(aq_desc->datalen);
+	u16 len;
 	u8 *buf = (u8 *)buffer;
 	u16 i = 0;
 
 	if ((!(mask & hw->debug_mask)) || (desc == NULL))
 		return;
 
+	len = le16_to_cpu(aq_desc->datalen);
+
 	i40e_debug(hw, mask,
 		   "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
 		   le16_to_cpu(aq_desc->opcode),
@@ -1966,6 +1967,62 @@ aq_add_vsi_exit:
 	return status;
 }
 
+/**
+ * i40e_aq_set_default_vsi
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw,
+				    u16 seid,
+				    struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)
+		&desc.params.raw;
+	i40e_status status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	cmd->promiscuous_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_clear_default_vsi
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_clear_default_vsi(struct i40e_hw *hw,
+				      u16 seid,
+				      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)
+		&desc.params.raw;
+	i40e_status status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	cmd->promiscuous_flags = cpu_to_le16(0);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
 /**
  * i40e_aq_set_vsi_unicast_promiscuous
  * @hw: pointer to the hw struct
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index e6af8c8d7019..05cf9a719bab 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -116,6 +116,14 @@ static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer,
 	return len;
 }
 
+static char *i40e_filter_state_string[] = {
+	"INVALID",
+	"NEW",
+	"ACTIVE",
+	"FAILED",
+	"REMOVE",
+};
+
 /**
  * i40e_dbg_dump_vsi_seid - handles dump vsi seid write into command datum
  * @pf: the i40e_pf created in command write
@@ -160,10 +168,14 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 pf->hw.mac.port_addr);
 	list_for_each_entry(f, &vsi->mac_filter_list, list) {
 		dev_info(&pf->pdev->dev,
-			 "    mac_filter_list: %pM vid=%d, is_netdev=%d is_vf=%d counter=%d\n",
+			 "    mac_filter_list: %pM vid=%d, is_netdev=%d is_vf=%d counter=%d, state %s\n",
 			 f->macaddr, f->vlan, f->is_netdev, f->is_vf,
-			 f->counter);
+			 f->counter, i40e_filter_state_string[f->state]);
 	}
+	dev_info(&pf->pdev->dev, "    active_filters %d, promisc_threshold %d, overflow promisc %s\n",
+		 vsi->active_filters, vsi->promisc_threshold,
+		 (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state) ?
+		  "ON" : "OFF"));
 	nstat = i40e_get_vsi_stats_struct(vsi);
 	dev_info(&pf->pdev->dev,
 		 "    net_stats: rx_packets = %lu, rx_bytes = %lu, rx_errors = %lu, rx_dropped = %lu\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index d701861c6e1e..dd4457d29e98 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -45,7 +45,6 @@
 #define I40E_DEV_ID_1G_BASE_T_X722	0x37D1
 #define I40E_DEV_ID_10G_BASE_T_X722	0x37D2
 #define I40E_DEV_ID_SFP_I_X722		0x37D3
-#define I40E_DEV_ID_QSFP_I_X722		0x37D4
 
 #define i40e_is_40G_device(d)		((d) == I40E_DEV_ID_QSFP_A  || \
 					 (d) == I40E_DEV_ID_QSFP_B  || \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5e8d84ff7d5f..c912e041d102 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -272,15 +272,16 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 				     u32 *advertising)
 {
 	enum i40e_aq_capabilities_phy_type phy_types = pf->hw.phy.phy_types;
-
+	struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info;
 	*supported = 0x0;
 	*advertising = 0x0;
 
 	if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
 		*supported |= SUPPORTED_Autoneg |
 			      SUPPORTED_1000baseT_Full;
-		*advertising |= ADVERTISED_Autoneg |
-				ADVERTISED_1000baseT_Full;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			*advertising |= ADVERTISED_1000baseT_Full;
 		if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) {
 			*supported |= SUPPORTED_100baseT_Full;
 			*advertising |= ADVERTISED_100baseT_Full;
@@ -299,8 +300,9 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
 		*supported |= SUPPORTED_Autoneg |
 			      SUPPORTED_10000baseT_Full;
-		*advertising |= ADVERTISED_Autoneg |
-				ADVERTISED_10000baseT_Full;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			*advertising |= ADVERTISED_10000baseT_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
 	    phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
@@ -310,15 +312,16 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
 		*supported |= SUPPORTED_Autoneg |
 			      SUPPORTED_40000baseCR4_Full;
-		*advertising |= ADVERTISED_Autoneg |
-				ADVERTISED_40000baseCR4_Full;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB)
+			*advertising |= ADVERTISED_40000baseCR4_Full;
 	}
-	if ((phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) &&
-	    !(phy_types & I40E_CAP_PHY_TYPE_1000BASE_T)) {
+	if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
 		*supported |= SUPPORTED_Autoneg |
 			      SUPPORTED_100baseT_Full;
-		*advertising |= ADVERTISED_Autoneg |
-				ADVERTISED_100baseT_Full;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
+			*advertising |= ADVERTISED_100baseT_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
 	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
@@ -326,8 +329,9 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
 		*supported |= SUPPORTED_Autoneg |
 			      SUPPORTED_1000baseT_Full;
-		*advertising |= ADVERTISED_Autoneg |
-				ADVERTISED_1000baseT_Full;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			*advertising |= ADVERTISED_1000baseT_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
 		*supported |= SUPPORTED_40000baseSR4_Full;
@@ -342,26 +346,30 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 	if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
 		*supported |= SUPPORTED_20000baseKR2_Full |
 			      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_20000baseKR2_Full |
-				ADVERTISED_Autoneg;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB)
+			*advertising |= ADVERTISED_20000baseKR2_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
 		*supported |= SUPPORTED_10000baseKR_Full |
 			      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_10000baseKR_Full |
-				ADVERTISED_Autoneg;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			*advertising |= ADVERTISED_10000baseKR_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
 		*supported |= SUPPORTED_10000baseKX4_Full |
 			      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_10000baseKX4_Full |
-				ADVERTISED_Autoneg;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			*advertising |= ADVERTISED_10000baseKX4_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
 		*supported |= SUPPORTED_1000baseKX_Full |
 			      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_1000baseKX_Full |
-				ADVERTISED_Autoneg;
+		*advertising |= ADVERTISED_Autoneg;
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			*advertising |= ADVERTISED_1000baseKX_Full;
 	}
 }
 
@@ -453,6 +461,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
 	case I40E_PHY_TYPE_10GBASE_SFPP_CU:
 	case I40E_PHY_TYPE_10GBASE_AOC:
 		ecmd->supported = SUPPORTED_10000baseT_Full;
+		ecmd->advertising = SUPPORTED_10000baseT_Full;
 		break;
 	case I40E_PHY_TYPE_SGMII:
 		ecmd->supported = SUPPORTED_Autoneg |
@@ -663,6 +672,7 @@ static int i40e_set_settings(struct net_device *netdev,
 	if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET &&
 	    hw->phy.media_type != I40E_MEDIA_TYPE_FIBER &&
 	    hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE &&
+	    hw->phy.media_type != I40E_MEDIA_TYPE_DA &&
 	    hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
 		return -EOPNOTSUPP;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5ea22008d721..339d99be4702 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -31,12 +31,7 @@
 /* Local includes */
 #include "i40e.h"
 #include "i40e_diag.h"
-#if IS_ENABLED(CONFIG_VXLAN)
-#include <net/vxlan.h>
-#endif
-#if IS_ENABLED(CONFIG_GENEVE)
-#include <net/geneve.h>
-#endif
+#include <net/udp_tunnel.h>
 
 const char i40e_driver_name[] = "i40e";
 static const char i40e_driver_string[] =
@@ -45,8 +40,8 @@ static const char i40e_driver_string[] =
 #define DRV_KERN "-k"
 
 #define DRV_VERSION_MAJOR 1
-#define DRV_VERSION_MINOR 5
-#define DRV_VERSION_BUILD 16
+#define DRV_VERSION_MINOR 6
+#define DRV_VERSION_BUILD 11
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -91,7 +86,6 @@ static const struct pci_device_id i40e_pci_tbl[] = {
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0},
-	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_I_X722), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
 	/* required last entry */
@@ -1280,8 +1274,9 @@ int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr,
 		    (is_vf == f->is_vf) &&
 		    (is_netdev == f->is_netdev)) {
 			f->counter--;
-			f->changed = true;
 			changed = 1;
+			if (f->counter == 0)
+				f->state = I40E_FILTER_REMOVE;
 		}
 	}
 	if (changed) {
@@ -1297,29 +1292,32 @@ int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr,
  * @vsi: the PF Main VSI - inappropriate for any other VSI
  * @macaddr: the MAC address
  *
- * Some older firmware configurations set up a default promiscuous VLAN
- * filter that needs to be removed.
+ * Remove whatever filter the firmware set up so the driver can manage
+ * its own filtering intelligently.
  **/
-static int i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
+static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
 {
 	struct i40e_aqc_remove_macvlan_element_data element;
 	struct i40e_pf *pf = vsi->back;
-	i40e_status ret;
 
 	/* Only appropriate for the PF main VSI */
 	if (vsi->type != I40E_VSI_MAIN)
-		return -EINVAL;
+		return;
 
 	memset(&element, 0, sizeof(element));
 	ether_addr_copy(element.mac_addr, macaddr);
 	element.vlan_tag = 0;
-	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
-			I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
-	ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
-	if (ret)
-		return -ENOENT;
+	/* Ignore error returns, some firmware does it this way... */
+	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
 
-	return 0;
+	memset(&element, 0, sizeof(element));
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	/* ...and some firmware does it this way. */
+	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
+			I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
 }
 
 /**
@@ -1340,10 +1338,18 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 					bool is_vf, bool is_netdev)
 {
 	struct i40e_mac_filter *f;
+	int changed = false;
 
 	if (!vsi || !macaddr)
 		return NULL;
 
+	/* Do not allow broadcast filter to be added since broadcast filter
+	 * is added as part of add VSI for any newly created VSI except
+	 * FDIR VSI
+	 */
+	if (is_broadcast_ether_addr(macaddr))
+		return NULL;
+
 	f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
 	if (!f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
@@ -1352,8 +1358,15 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 
 		ether_addr_copy(f->macaddr, macaddr);
 		f->vlan = vlan;
-		f->changed = true;
-
+		/* If we're in overflow promisc mode, set the state directly
+		 * to failed, so we don't bother to try sending the filter
+		 * to the hardware.
+		 */
+		if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state))
+			f->state = I40E_FILTER_FAILED;
+		else
+			f->state = I40E_FILTER_NEW;
+		changed = true;
 		INIT_LIST_HEAD(&f->list);
 		list_add_tail(&f->list, &vsi->mac_filter_list);
 	}
@@ -1373,10 +1386,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 		f->counter++;
 	}
 
-	/* changed tells sync_filters_subtask to
-	 * push the filter down to the firmware
-	 */
-	if (f->changed) {
+	if (changed) {
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
 		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
 	}
@@ -1395,6 +1405,9 @@ add_filter_out:
  *
  * NOTE: This function is expected to be called with mac_filter_list_lock
  * being held.
+ * ANOTHER NOTE: This function MUST be called from within the context of
+ * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
+ * instead of list_for_each_entry().
  **/
 void i40e_del_filter(struct i40e_vsi *vsi,
 		     u8 *macaddr, s16 vlan,
@@ -1434,9 +1447,18 @@ void i40e_del_filter(struct i40e_vsi *vsi,
 	 * remove the filter from the firmware's list
 	 */
 	if (f->counter == 0) {
-		f->changed = true;
-		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+		if ((f->state == I40E_FILTER_FAILED) ||
+		    (f->state == I40E_FILTER_NEW)) {
+			/* this one never got added by the FW. Just remove it,
+			 * no need to sync anything.
+			 */
+			list_del(&f->list);
+			kfree(f);
+		} else {
+			f->state = I40E_FILTER_REMOVE;
+			vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+			vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+		}
 	}
 }
 
@@ -1458,7 +1480,6 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	struct sockaddr *addr = p;
-	struct i40e_mac_filter *f;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
@@ -1479,52 +1500,23 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
 	else
 		netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
 
+	spin_lock_bh(&vsi->mac_filter_list_lock);
+	i40e_del_mac_all_vlan(vsi, netdev->dev_addr, false, true);
+	i40e_put_mac_in_vlan(vsi, addr->sa_data, false, true);
+	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 	if (vsi->type == I40E_VSI_MAIN) {
 		i40e_status ret;
 
 		ret = i40e_aq_mac_address_write(&vsi->back->hw,
 						I40E_AQC_WRITE_TYPE_LAA_WOL,
 						addr->sa_data, NULL);
-		if (ret) {
-			netdev_info(netdev,
-				    "Addr change for Main VSI failed: %d\n",
-				    ret);
-			return -EADDRNOTAVAIL;
-		}
-	}
-
-	if (ether_addr_equal(netdev->dev_addr, hw->mac.addr)) {
-		struct i40e_aqc_remove_macvlan_element_data element;
-
-		memset(&element, 0, sizeof(element));
-		ether_addr_copy(element.mac_addr, netdev->dev_addr);
-		element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
-		i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
-	} else {
-		spin_lock_bh(&vsi->mac_filter_list_lock);
-		i40e_del_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY,
-				false, false);
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
-	}
-
-	if (ether_addr_equal(addr->sa_data, hw->mac.addr)) {
-		struct i40e_aqc_add_macvlan_element_data element;
-
-		memset(&element, 0, sizeof(element));
-		ether_addr_copy(element.mac_addr, hw->mac.addr);
-		element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH);
-		i40e_aq_add_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
-	} else {
-		spin_lock_bh(&vsi->mac_filter_list_lock);
-		f = i40e_add_filter(vsi, addr->sa_data, I40E_VLAN_ANY,
-				    false, false);
-		if (f)
-			f->is_laa = true;
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
+		if (ret)
+			netdev_info(netdev, "Ignoring error from firmware on LAA update, status %s, AQ ret %s\n",
+				    i40e_stat_str(hw, ret),
+				    i40e_aq_str(hw, hw->aq.asq_last_status));
 	}
 
-	ether_addr_copy(netdev->dev_addr, addr->sa_data);
-
 	/* schedule our worker thread which will take care of
 	 * applying the new filter changes
 	 */
@@ -1584,14 +1576,8 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 	vsi->tc_config.numtc = numtc;
 	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
 	/* Number of queues per enabled TC */
-	/* In MFP case we can have a much lower count of MSIx
-	 * vectors available and so we need to lower the used
-	 * q count.
-	 */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
-		qcount = min_t(int, vsi->alloc_queue_pairs, pf->num_lan_msix);
-	else
-		qcount = vsi->alloc_queue_pairs;
+	qcount = vsi->alloc_queue_pairs;
+
 	num_tc_qps = qcount / numtc;
 	num_tc_qps = min_t(int, num_tc_qps, i40e_pf_get_max_q_per_tc(pf));
 
@@ -1760,28 +1746,6 @@ bottom_of_search_loop:
 	i40e_service_event_schedule(vsi->back);
 }
 
-/**
- * i40e_mac_filter_entry_clone - Clones a MAC filter entry
- * @src: source MAC filter entry to be clones
- *
- * Returns the pointer to newly cloned MAC filter entry or NULL
- * in case of error
- **/
-static struct i40e_mac_filter *i40e_mac_filter_entry_clone(
-					struct i40e_mac_filter *src)
-{
-	struct i40e_mac_filter *f;
-
-	f = kzalloc(sizeof(*f), GFP_ATOMIC);
-	if (!f)
-		return NULL;
-	*f = *src;
-
-	INIT_LIST_HEAD(&f->list);
-
-	return f;
-}
-
 /**
  * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries
  * @vsi: pointer to vsi struct
@@ -1796,41 +1760,61 @@ static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
 	struct i40e_mac_filter *f, *ftmp;
 
 	list_for_each_entry_safe(f, ftmp, from, list) {
-		f->changed = true;
 		/* Move the element back into MAC filter list*/
 		list_move_tail(&f->list, &vsi->mac_filter_list);
 	}
 }
 
 /**
- * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries
- * @vsi: pointer to vsi struct
+ * i40e_update_filter_state - Update filter state based on return data
+ * from firmware
+ * @count: Number of filters added
+ * @add_list: return data from fw
+ * @head: pointer to first filter in current batch
+ * @aq_err: status from fw
  *
- * MAC filter entries from list were slated to be added from device.
+ * MAC filter entries from list were slated to be added to device. Returns
+ * number of successful filters. Note that 0 does NOT mean success!
  **/
-static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi)
+static int
+i40e_update_filter_state(int count,
+			 struct i40e_aqc_add_macvlan_element_data *add_list,
+			 struct i40e_mac_filter *add_head, int aq_err)
 {
-	struct i40e_mac_filter *f, *ftmp;
-
-	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-		if (!f->changed && f->counter)
-			f->changed = true;
-	}
-}
+	int retval = 0;
+	int i;
 
-/**
- * i40e_cleanup_add_list - Deletes the element from add list and release
- *			memory
- * @add_list: Pointer to list which contains MAC filter entries
- **/
-static void i40e_cleanup_add_list(struct list_head *add_list)
-{
-	struct i40e_mac_filter *f, *ftmp;
 
-	list_for_each_entry_safe(f, ftmp, add_list, list) {
-		list_del(&f->list);
-		kfree(f);
+	if (!aq_err) {
+		retval = count;
+		/* Everything's good, mark all filters active. */
+		for (i = 0; i < count ; i++) {
+			add_head->state = I40E_FILTER_ACTIVE;
+			add_head = list_next_entry(add_head, list);
+		}
+	} else if (aq_err == I40E_AQ_RC_ENOSPC) {
+		/* Device ran out of filter space. Check the return value
+		 * for each filter to see which ones are active.
+		 */
+		for (i = 0; i < count ; i++) {
+			if (add_list[i].match_method ==
+			    I40E_AQC_MM_ERR_NO_RES) {
+				add_head->state = I40E_FILTER_FAILED;
+			} else {
+				add_head->state = I40E_FILTER_ACTIVE;
+				retval++;
+			}
+			add_head = list_next_entry(add_head, list);
+		}
+	} else {
+		/* Some other horrible thing happened, fail all filters */
+		retval = 0;
+		for (i = 0; i < count ; i++) {
+			add_head->state = I40E_FILTER_FAILED;
+			add_head = list_next_entry(add_head, list);
+		}
 	}
+	return retval;
 }
 
 /**
@@ -1843,20 +1827,22 @@ static void i40e_cleanup_add_list(struct list_head *add_list)
  **/
 int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 {
-	struct list_head tmp_del_list, tmp_add_list;
-	struct i40e_mac_filter *f, *ftmp, *fclone;
-	bool promisc_forced_on = false;
-	bool add_happened = false;
+	struct i40e_mac_filter *f, *ftmp, *add_head = NULL;
+	struct list_head tmp_add_list, tmp_del_list;
+	struct i40e_hw *hw = &vsi->back->hw;
+	bool promisc_changed = false;
+	char vsi_name[16] = "PF";
 	int filter_list_len = 0;
 	u32 changed_flags = 0;
 	i40e_status aq_ret = 0;
-	bool err_cond = false;
 	int retval = 0;
 	struct i40e_pf *pf;
 	int num_add = 0;
 	int num_del = 0;
 	int aq_err = 0;
 	u16 cmd_flags;
+	int list_size;
+	int fcnt;
 
 	/* empty array typed pointers, kcalloc later */
 	struct i40e_aqc_add_macvlan_element_data *add_list;
@@ -1871,72 +1857,46 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		vsi->current_netdev_flags = vsi->netdev->flags;
 	}
 
-	INIT_LIST_HEAD(&tmp_del_list);
 	INIT_LIST_HEAD(&tmp_add_list);
+	INIT_LIST_HEAD(&tmp_del_list);
+
+	if (vsi->type == I40E_VSI_SRIOV)
+		snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id);
+	else if (vsi->type != I40E_VSI_MAIN)
+		snprintf(vsi_name, sizeof(vsi_name) - 1, "vsi %d", vsi->seid);
 
 	if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
 		vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
 
 		spin_lock_bh(&vsi->mac_filter_list_lock);
+		/* Create a list of filters to delete. */
 		list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-			if (!f->changed)
-				continue;
-
-			if (f->counter != 0)
-				continue;
-			f->changed = false;
-
-			/* Move the element into temporary del_list */
-			list_move_tail(&f->list, &tmp_del_list);
-		}
-
-		list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-			if (!f->changed)
-				continue;
-
-			if (f->counter == 0)
-				continue;
-			f->changed = false;
-
-			/* Clone MAC filter entry and add into temporary list */
-			fclone = i40e_mac_filter_entry_clone(f);
-			if (!fclone) {
-				err_cond = true;
-				break;
+			if (f->state == I40E_FILTER_REMOVE) {
+				WARN_ON(f->counter != 0);
+				/* Move the element into temporary del_list */
+				list_move_tail(&f->list, &tmp_del_list);
+				vsi->active_filters--;
+			}
+			if (f->state == I40E_FILTER_NEW) {
+				WARN_ON(f->counter == 0);
+				/* Move the element into temporary add_list */
+				list_move_tail(&f->list, &tmp_add_list);
 			}
-			list_add_tail(&fclone->list, &tmp_add_list);
-		}
-
-		/* if failed to clone MAC filter entry - undo */
-		if (err_cond) {
-			i40e_undo_del_filter_entries(vsi, &tmp_del_list);
-			i40e_undo_add_filter_entries(vsi);
 		}
 		spin_unlock_bh(&vsi->mac_filter_list_lock);
-
-		if (err_cond) {
-			i40e_cleanup_add_list(&tmp_add_list);
-			retval = -ENOMEM;
-			goto out;
-		}
 	}
 
 	/* Now process 'del_list' outside the lock */
 	if (!list_empty(&tmp_del_list)) {
-		int del_list_size;
-
-		filter_list_len = pf->hw.aq.asq_buf_size /
+		filter_list_len = hw->aq.asq_buf_size /
 			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
-		del_list_size = filter_list_len *
+		list_size = filter_list_len *
 			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
-		del_list = kzalloc(del_list_size, GFP_ATOMIC);
+		del_list = kzalloc(list_size, GFP_ATOMIC);
 		if (!del_list) {
-			i40e_cleanup_add_list(&tmp_add_list);
-
 			/* Undo VSI's MAC filter entry element updates */
 			spin_lock_bh(&vsi->mac_filter_list_lock);
 			i40e_undo_del_filter_entries(vsi, &tmp_del_list);
-			i40e_undo_add_filter_entries(vsi);
 			spin_unlock_bh(&vsi->mac_filter_list_lock);
 			retval = -ENOMEM;
 			goto out;
@@ -1947,9 +1907,13 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
 			/* add to delete list */
 			ether_addr_copy(del_list[num_del].mac_addr, f->macaddr);
-			del_list[num_del].vlan_tag =
-				cpu_to_le16((u16)(f->vlan ==
-					    I40E_VLAN_ANY ? 0 : f->vlan));
+			if (f->vlan == I40E_VLAN_ANY) {
+				del_list[num_del].vlan_tag = 0;
+				cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+			} else {
+				del_list[num_del].vlan_tag =
+					cpu_to_le16((u16)(f->vlan));
+			}
 
 			cmd_flags |= I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
 			del_list[num_del].flags = cmd_flags;
@@ -1957,21 +1921,23 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
 			/* flush a full buffer */
 			if (num_del == filter_list_len) {
-				aq_ret = i40e_aq_remove_macvlan(&pf->hw,
-								vsi->seid,
+				aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid,
 								del_list,
-								num_del,
-								NULL);
-				aq_err = pf->hw.aq.asq_last_status;
+								num_del, NULL);
+				aq_err = hw->aq.asq_last_status;
 				num_del = 0;
-				memset(del_list, 0, del_list_size);
+				memset(del_list, 0, list_size);
 
-				if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) {
+				/* Explicitly ignore and do not report when
+				 * firmware returns ENOENT.
+				 */
+				if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
 					retval = -EIO;
-					dev_err(&pf->pdev->dev,
-						"ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n",
-						i40e_stat_str(&pf->hw, aq_ret),
-						i40e_aq_str(&pf->hw, aq_err));
+					dev_info(&pf->pdev->dev,
+						 "ignoring delete macvlan error on %s, err %s, aq_err %s\n",
+						 vsi_name,
+						 i40e_stat_str(hw, aq_ret),
+						 i40e_aq_str(hw, aq_err));
 				}
 			}
 			/* Release memory for MAC filter entries which were
@@ -1982,17 +1948,22 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		}
 
 		if (num_del) {
-			aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
-							del_list, num_del,
-							NULL);
-			aq_err = pf->hw.aq.asq_last_status;
+			aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, del_list,
+							num_del, NULL);
+			aq_err = hw->aq.asq_last_status;
 			num_del = 0;
 
-			if (aq_ret && aq_err != I40E_AQ_RC_ENOENT)
+			/* Explicitly ignore and do not report when firmware
+			 * returns ENOENT.
+			 */
+			if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
+				retval = -EIO;
 				dev_info(&pf->pdev->dev,
-					 "ignoring delete macvlan error, err %s aq_err %s\n",
-					 i40e_stat_str(&pf->hw, aq_ret),
-					 i40e_aq_str(&pf->hw, aq_err));
+					 "ignoring delete macvlan error on %s, err %s aq_err %s\n",
+					 vsi_name,
+					 i40e_stat_str(hw, aq_ret),
+					 i40e_aq_str(hw, aq_err));
+			}
 		}
 
 		kfree(del_list);
@@ -2000,84 +1971,117 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	}
 
 	if (!list_empty(&tmp_add_list)) {
-		int add_list_size;
-
-		/* do all the adds now */
-		filter_list_len = pf->hw.aq.asq_buf_size /
-			       sizeof(struct i40e_aqc_add_macvlan_element_data),
-		add_list_size = filter_list_len *
+		/* Do all the adds now. */
+		filter_list_len = hw->aq.asq_buf_size /
 			       sizeof(struct i40e_aqc_add_macvlan_element_data);
-		add_list = kzalloc(add_list_size, GFP_ATOMIC);
+		list_size = filter_list_len *
+			       sizeof(struct i40e_aqc_add_macvlan_element_data);
+		add_list = kzalloc(list_size, GFP_ATOMIC);
 		if (!add_list) {
-			/* Purge element from temporary lists */
-			i40e_cleanup_add_list(&tmp_add_list);
-
-			/* Undo add filter entries from VSI MAC filter list */
-			spin_lock_bh(&vsi->mac_filter_list_lock);
-			i40e_undo_add_filter_entries(vsi);
-			spin_unlock_bh(&vsi->mac_filter_list_lock);
 			retval = -ENOMEM;
 			goto out;
 		}
-
-		list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) {
-
-			add_happened = true;
-			cmd_flags = 0;
-
+		num_add = 0;
+		list_for_each_entry(f, &tmp_add_list, list) {
+			if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
+				     &vsi->state)) {
+				f->state = I40E_FILTER_FAILED;
+				continue;
+			}
 			/* add to add array */
+			if (num_add == 0)
+				add_head = f;
+			cmd_flags = 0;
 			ether_addr_copy(add_list[num_add].mac_addr, f->macaddr);
-			add_list[num_add].vlan_tag =
-				cpu_to_le16(
-				 (u16)(f->vlan == I40E_VLAN_ANY ? 0 : f->vlan));
+			if (f->vlan == I40E_VLAN_ANY) {
+				add_list[num_add].vlan_tag = 0;
+				cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+			} else {
+				add_list[num_add].vlan_tag =
+					cpu_to_le16((u16)(f->vlan));
+			}
 			add_list[num_add].queue_number = 0;
-
 			cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
 			add_list[num_add].flags = cpu_to_le16(cmd_flags);
 			num_add++;
 
 			/* flush a full buffer */
 			if (num_add == filter_list_len) {
-				aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+				aq_ret = i40e_aq_add_macvlan(hw, vsi->seid,
 							     add_list, num_add,
 							     NULL);
-				aq_err = pf->hw.aq.asq_last_status;
+				aq_err = hw->aq.asq_last_status;
+				fcnt = i40e_update_filter_state(num_add,
+								add_list,
+								add_head,
+								aq_ret);
+				vsi->active_filters += fcnt;
+
+				if (fcnt != num_add) {
+					promisc_changed = true;
+					set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
+						&vsi->state);
+					vsi->promisc_threshold =
+						(vsi->active_filters * 3) / 4;
+					dev_warn(&pf->pdev->dev,
+						 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
+						 i40e_aq_str(hw, aq_err),
+						 vsi_name);
+				}
+				memset(add_list, 0, list_size);
 				num_add = 0;
-
-				if (aq_ret)
-					break;
-				memset(add_list, 0, add_list_size);
 			}
-			/* Entries from tmp_add_list were cloned from MAC
-			 * filter list, hence clean those cloned entries
-			 */
-			list_del(&f->list);
-			kfree(f);
 		}
-
 		if (num_add) {
-			aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+			aq_ret = i40e_aq_add_macvlan(hw, vsi->seid,
 						     add_list, num_add, NULL);
-			aq_err = pf->hw.aq.asq_last_status;
-			num_add = 0;
+			aq_err = hw->aq.asq_last_status;
+			fcnt = i40e_update_filter_state(num_add, add_list,
+							add_head, aq_ret);
+			vsi->active_filters += fcnt;
+			if (fcnt != num_add) {
+				promisc_changed = true;
+				set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
+					&vsi->state);
+				vsi->promisc_threshold =
+						(vsi->active_filters * 3) / 4;
+				dev_warn(&pf->pdev->dev,
+					 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
+					 i40e_aq_str(hw, aq_err), vsi_name);
+			}
+		}
+		/* Now move all of the filters from the temp add list back to
+		 * the VSI's list.
+		 */
+		spin_lock_bh(&vsi->mac_filter_list_lock);
+		list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) {
+			list_move_tail(&f->list, &vsi->mac_filter_list);
 		}
+		spin_unlock_bh(&vsi->mac_filter_list_lock);
 		kfree(add_list);
 		add_list = NULL;
+	}
 
-		if (add_happened && aq_ret && aq_err != I40E_AQ_RC_EINVAL) {
-			retval = i40e_aq_rc_to_posix(aq_ret, aq_err);
+	/* Check to see if we can drop out of overflow promiscuous mode. */
+	if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state) &&
+	    (vsi->active_filters < vsi->promisc_threshold)) {
+		int failed_count = 0;
+		/* See if we have any failed filters. We can't drop out of
+		 * promiscuous until these have all been deleted.
+		 */
+		spin_lock_bh(&vsi->mac_filter_list_lock);
+		list_for_each_entry(f, &vsi->mac_filter_list, list) {
+			if (f->state == I40E_FILTER_FAILED)
+				failed_count++;
+		}
+		spin_unlock_bh(&vsi->mac_filter_list_lock);
+		if (!failed_count) {
 			dev_info(&pf->pdev->dev,
-				 "add filter failed, err %s aq_err %s\n",
-				 i40e_stat_str(&pf->hw, aq_ret),
-				 i40e_aq_str(&pf->hw, aq_err));
-			if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) &&
-			    !test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
-				      &vsi->state)) {
-				promisc_forced_on = true;
-				set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
-					&vsi->state);
-				dev_info(&pf->pdev->dev, "promiscuous mode forced on\n");
-			}
+				 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
+				 vsi_name);
+			clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state);
+			promisc_changed = true;
+			vsi->promisc_threshold = 0;
 		}
 	}
 
@@ -2098,15 +2102,17 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 							       NULL);
 		if (aq_ret) {
 			retval = i40e_aq_rc_to_posix(aq_ret,
-						     pf->hw.aq.asq_last_status);
+						     hw->aq.asq_last_status);
 			dev_info(&pf->pdev->dev,
-				 "set multi promisc failed, err %s aq_err %s\n",
-				 i40e_stat_str(&pf->hw, aq_ret),
-				 i40e_aq_str(&pf->hw,
-					     pf->hw.aq.asq_last_status));
+				 "set multi promisc failed on %s, err %s aq_err %s\n",
+				 vsi_name,
+				 i40e_stat_str(hw, aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
 		}
 	}
-	if ((changed_flags & IFF_PROMISC) || promisc_forced_on) {
+	if ((changed_flags & IFF_PROMISC) ||
+	    (promisc_changed &&
+	     test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state))) {
 		bool cur_promisc;
 
 		cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
@@ -2122,33 +2128,58 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 			 */
 			if (pf->cur_promisc != cur_promisc) {
 				pf->cur_promisc = cur_promisc;
-				set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+				if (cur_promisc)
+					aq_ret =
+					      i40e_aq_set_default_vsi(hw,
+								      vsi->seid,
+								      NULL);
+				else
+					aq_ret =
+					    i40e_aq_clear_default_vsi(hw,
+								      vsi->seid,
+								      NULL);
+				if (aq_ret) {
+					retval = i40e_aq_rc_to_posix(aq_ret,
+							hw->aq.asq_last_status);
+					dev_info(&pf->pdev->dev,
+						 "Set default VSI failed on %s, err %s, aq_err %s\n",
+						 vsi_name,
+						 i40e_stat_str(hw, aq_ret),
+						 i40e_aq_str(hw,
+						     hw->aq.asq_last_status));
+				}
 			}
 		} else {
 			aq_ret = i40e_aq_set_vsi_unicast_promiscuous(
-							  &vsi->back->hw,
+							  hw,
 							  vsi->seid,
 							  cur_promisc, NULL,
 							  true);
 			if (aq_ret) {
 				retval =
 				i40e_aq_rc_to_posix(aq_ret,
-						    pf->hw.aq.asq_last_status);
+						    hw->aq.asq_last_status);
 				dev_info(&pf->pdev->dev,
-					 "set unicast promisc failed, err %d, aq_err %d\n",
-					 aq_ret, pf->hw.aq.asq_last_status);
+					 "set unicast promisc failed on %s, err %s, aq_err %s\n",
+					 vsi_name,
+					 i40e_stat_str(hw, aq_ret),
+					 i40e_aq_str(hw,
+						     hw->aq.asq_last_status));
 			}
 			aq_ret = i40e_aq_set_vsi_multicast_promiscuous(
-							  &vsi->back->hw,
+							  hw,
 							  vsi->seid,
 							  cur_promisc, NULL);
 			if (aq_ret) {
 				retval =
 				i40e_aq_rc_to_posix(aq_ret,
-						    pf->hw.aq.asq_last_status);
+						    hw->aq.asq_last_status);
 				dev_info(&pf->pdev->dev,
-					 "set multicast promisc failed, err %d, aq_err %d\n",
-					 aq_ret, pf->hw.aq.asq_last_status);
+					 "set multicast promisc failed on %s, err %s, aq_err %s\n",
+					 vsi_name,
+					 i40e_stat_str(hw, aq_ret),
+					 i40e_aq_str(hw,
+						     hw->aq.asq_last_status));
 			}
 		}
 		aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw,
@@ -2159,9 +2190,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 						     pf->hw.aq.asq_last_status);
 			dev_info(&pf->pdev->dev,
 				 "set brdcast promisc failed, err %s, aq_err %s\n",
-				 i40e_stat_str(&pf->hw, aq_ret),
-				 i40e_aq_str(&pf->hw,
-					     pf->hw.aq.asq_last_status));
+					 i40e_stat_str(hw, aq_ret),
+					 i40e_aq_str(hw,
+						     hw->aq.asq_last_status));
 		}
 	}
 out:
@@ -2330,7 +2361,7 @@ static void i40e_vlan_rx_register(struct net_device *netdev, u32 features)
  **/
 int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 {
-	struct i40e_mac_filter *f, *add_f;
+	struct i40e_mac_filter *f, *ftmp, *add_f;
 	bool is_netdev, is_vf;
 
 	is_vf = (vsi->type == I40E_VSI_SRIOV);
@@ -2351,7 +2382,7 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 		}
 	}
 
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
+	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
 		add_f = i40e_add_filter(vsi, f->macaddr, vid, is_vf, is_netdev);
 		if (!add_f) {
 			dev_info(&vsi->back->pdev->dev,
@@ -2365,7 +2396,7 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 	/* Now if we add a vlan tag, make sure to check if it is the first
 	 * tag (i.e. a "tag" -1 does exist) and if so replace the -1 "tag"
 	 * with 0, so we now accept untagged and specified tagged traffic
-	 * (and not any taged and untagged)
+	 * (and not all tags along with untagged)
 	 */
 	if (vid > 0) {
 		if (is_netdev && i40e_find_filter(vsi, vsi->netdev->dev_addr,
@@ -2387,7 +2418,7 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 
 	/* Do not assume that I40E_VLAN_ANY should be reset to VLAN 0 */
 	if (vid > 0 && !vsi->info.pvid) {
-		list_for_each_entry(f, &vsi->mac_filter_list, list) {
+		list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
 			if (!i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY,
 					      is_vf, is_netdev))
 				continue;
@@ -2424,7 +2455,7 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
 {
 	struct net_device *netdev = vsi->netdev;
-	struct i40e_mac_filter *f, *add_f;
+	struct i40e_mac_filter *f, *ftmp, *add_f;
 	bool is_vf, is_netdev;
 	int filter_count = 0;
 
@@ -2437,7 +2468,7 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
 	if (is_netdev)
 		i40e_del_filter(vsi, netdev->dev_addr, vid, is_vf, is_netdev);
 
-	list_for_each_entry(f, &vsi->mac_filter_list, list)
+	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list)
 		i40e_del_filter(vsi, f->macaddr, vid, is_vf, is_netdev);
 
 	/* go through all the filters for this VSI and if there is only
@@ -2470,7 +2501,7 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
 	}
 
 	if (!filter_count) {
-		list_for_each_entry(f, &vsi->mac_filter_list, list) {
+		list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
 			i40e_del_filter(vsi, f->macaddr, 0, is_vf, is_netdev);
 			add_f = i40e_add_filter(vsi, f->macaddr, I40E_VLAN_ANY,
 						is_vf, is_netdev);
@@ -2515,8 +2546,6 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
 	if (vid > 4095)
 		return -EINVAL;
 
-	netdev_info(netdev, "adding %pM vid=%d\n", netdev->dev_addr, vid);
-
 	/* If the network stack called us with vid = 0 then
 	 * it is asking to receive priority tagged packets with
 	 * vlan id 0.  Our HW receives them by default when configured
@@ -2550,8 +2579,6 @@ static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 
-	netdev_info(netdev, "removing %pM vid=%d\n", netdev->dev_addr, vid);
-
 	/* return code is ignored as there is nothing a user
 	 * can do about failure to remove and a log message was
 	 * already printed from the other function
@@ -2563,6 +2590,44 @@ static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
 	return 0;
 }
 
+/**
+ * i40e_macaddr_init - explicitly write the mac address filters
+ *
+ * @vsi: pointer to the vsi
+ * @macaddr: the MAC address
+ *
+ * This is needed when the macaddr has been obtained by other
+ * means than the default, e.g., from Open Firmware or IDPROM.
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_macaddr_init(struct i40e_vsi *vsi, u8 *macaddr)
+{
+	int ret;
+	struct i40e_aqc_add_macvlan_element_data element;
+
+	ret = i40e_aq_mac_address_write(&vsi->back->hw,
+					I40E_AQC_WRITE_TYPE_LAA_WOL,
+					macaddr, NULL);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Addr change for VSI failed: %d\n", ret);
+		return -EADDRNOTAVAIL;
+	}
+
+	memset(&element, 0, sizeof(element));
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH);
+	ret = i40e_aq_add_macvlan(&vsi->back->hw, vsi->seid, &element, 1, NULL);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "add filter failed err %s aq_err %s\n",
+			 i40e_stat_str(&vsi->back->hw, ret),
+			 i40e_aq_str(&vsi->back->hw,
+				     vsi->back->hw.aq.asq_last_status));
+	}
+	return ret;
+}
+
 /**
  * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up
  * @vsi: the vsi being brought back up
@@ -3009,8 +3074,19 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
  **/
 static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
 {
+	struct i40e_pf *pf = vsi->back;
+	int err;
+
 	if (vsi->netdev)
 		i40e_set_rx_mode(vsi->netdev);
+
+	if (!!(pf->flags & I40E_FLAG_PF_MAC)) {
+		err = i40e_macaddr_init(vsi, pf->hw.mac.addr);
+		if (err) {
+			dev_warn(&pf->pdev->dev,
+				 "could not set up macaddr; err %d\n", err);
+		}
+	}
 }
 
 /**
@@ -3952,6 +4028,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 			/* clear the affinity_mask in the IRQ descriptor */
 			irq_set_affinity_hint(pf->msix_entries[vector].vector,
 					      NULL);
+			synchronize_irq(pf->msix_entries[vector].vector);
 			free_irq(pf->msix_entries[vector].vector,
 				 vsi->q_vectors[i]);
 
@@ -4958,7 +5035,6 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
 			if (pf->vsi[v]->netdev)
 				i40e_dcbnl_set_all(pf->vsi[v]);
 		}
-		i40e_notify_client_of_l2_param_changes(pf->vsi[v]);
 	}
 }
 
@@ -5183,12 +5259,6 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
 		usleep_range(1000, 2000);
 	i40e_down(vsi);
 
-	/* Give a VF some time to respond to the reset.  The
-	 * two second wait is based upon the watchdog cycle in
-	 * the VF driver.
-	 */
-	if (vsi->type == I40E_VSI_SRIOV)
-		msleep(2000);
 	i40e_up(vsi);
 	clear_bit(__I40E_CONFIG_BUSY, &pf->state);
 }
@@ -5231,6 +5301,9 @@ void i40e_down(struct i40e_vsi *vsi)
 		i40e_clean_tx_ring(vsi->tx_rings[i]);
 		i40e_clean_rx_ring(vsi->rx_rings[i]);
 	}
+
+	i40e_notify_client_of_netdev_close(vsi, false);
+
 }
 
 /**
@@ -5342,14 +5415,7 @@ int i40e_open(struct net_device *netdev)
 						       TCP_FLAG_CWR) >> 16);
 	wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16);
 
-#ifdef CONFIG_I40E_VXLAN
-	vxlan_get_rx_port(netdev);
-#endif
-#ifdef CONFIG_I40E_GENEVE
-	if (pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)
-		geneve_get_rx_port(netdev);
-#endif
-
+	udp_tunnel_get_rx_info(netdev);
 	i40e_notify_client_of_netdev_open(vsi);
 
 	return 0;
@@ -5716,6 +5782,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
 		i40e_service_event_schedule(pf);
 	} else {
 		i40e_pf_unquiesce_all_vsi(pf);
+		/* Notify the client for the DCB changes */
+		i40e_notify_client_of_l2_param_changes(pf->vsi[pf->lan_vsi]);
 	}
 
 exit:
@@ -5940,7 +6008,6 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
 	}
-
 }
 
 /**
@@ -7057,7 +7124,6 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
  **/
 static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 {
-#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
 	struct i40e_hw *hw = &pf->hw;
 	i40e_status ret;
 	__be16 port;
@@ -7092,7 +7158,6 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 			}
 		}
 	}
-#endif
 }
 
 /**
@@ -7174,7 +7239,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
 		vsi->alloc_queue_pairs = 1;
 		vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT,
 				      I40E_REQ_DESCRIPTOR_MULTIPLE);
-		vsi->num_q_vectors = 1;
+		vsi->num_q_vectors = pf->num_fdsb_msix;
 		break;
 
 	case I40E_VSI_VMDQ2:
@@ -7558,9 +7623,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	/* reserve one vector for sideband flow director */
 	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
 		if (vectors_left) {
+			pf->num_fdsb_msix = 1;
 			v_budget++;
 			vectors_left--;
 		} else {
+			pf->num_fdsb_msix = 0;
 			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
 		}
 	}
@@ -7726,10 +7793,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
  * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
  * @vsi: the VSI being configured
  * @v_idx: index of the vector in the vsi struct
+ * @cpu: cpu to be used on affinity_mask
  *
  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
  **/
-static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
+static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
 {
 	struct i40e_q_vector *q_vector;
 
@@ -7740,7 +7808,8 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 
 	q_vector->vsi = vsi;
 	q_vector->v_idx = v_idx;
-	cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+	cpumask_set_cpu(cpu, &q_vector->affinity_mask);
+
 	if (vsi->netdev)
 		netif_napi_add(vsi->netdev, &q_vector->napi,
 			       i40e_napi_poll, NAPI_POLL_WEIGHT);
@@ -7764,8 +7833,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
-	int v_idx, num_q_vectors;
-	int err;
+	int err, v_idx, num_q_vectors, current_cpu;
 
 	/* if not MSIX, give the one vector only to the LAN VSI */
 	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
@@ -7775,10 +7843,15 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 	else
 		return -EINVAL;
 
+	current_cpu = cpumask_first(cpu_online_mask);
+
 	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-		err = i40e_vsi_alloc_q_vector(vsi, v_idx);
+		err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
 		if (err)
 			goto err_out;
+		current_cpu = cpumask_next(current_cpu, cpu_online_mask);
+		if (unlikely(current_cpu >= nr_cpu_ids))
+			current_cpu = cpumask_first(cpu_online_mask);
 	}
 
 	return 0;
@@ -7905,7 +7978,6 @@ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
 	u8 *rss_lut;
 	int ret, i;
 
-	memset(&rss_key, 0, sizeof(rss_key));
 	memcpy(&rss_key, seed, sizeof(rss_key));
 
 	rss_lut = kzalloc(pf->rss_table_size, GFP_KERNEL);
@@ -8579,7 +8651,9 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
 		/* Enable filters and mark for reset */
 		if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
 			need_reset = true;
-		pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+		/* enable FD_SB only if there is MSI-X vector */
+		if (pf->num_fdsb_msix > 0)
+			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
 	} else {
 		/* turn off filters, mark for reset and clear SW filter list */
 		if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
@@ -8628,7 +8702,6 @@ static int i40e_set_features(struct net_device *netdev,
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
 /**
  * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port
  * @pf: board private structure
@@ -8648,21 +8721,18 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
 	return i;
 }
 
-#endif
-
-#if IS_ENABLED(CONFIG_VXLAN)
 /**
- * i40e_add_vxlan_port - Get notifications about VXLAN ports that come up
+ * i40e_udp_tunnel_add - Get notifications about UDP tunnel ports that come up
  * @netdev: This physical port's netdev
- * @sa_family: Socket Family that VXLAN is notifying us about
- * @port: New UDP port number that VXLAN started listening to
+ * @ti: Tunnel endpoint information
  **/
-static void i40e_add_vxlan_port(struct net_device *netdev,
-				sa_family_t sa_family, __be16 port)
+static void i40e_udp_tunnel_add(struct net_device *netdev,
+				struct udp_tunnel_info *ti)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
+	__be16 port = ti->port;
 	u8 next_idx;
 	u8 idx;
 
@@ -8670,7 +8740,7 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
 
 	/* Check if port already exists */
 	if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-		netdev_info(netdev, "vxlan port %d already offloaded\n",
+		netdev_info(netdev, "port %d already offloaded\n",
 			    ntohs(port));
 		return;
 	}
@@ -8679,131 +8749,75 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
 	next_idx = i40e_get_udp_port_idx(pf, 0);
 
 	if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-		netdev_info(netdev, "maximum number of vxlan UDP ports reached, not adding port %d\n",
+		netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n",
 			    ntohs(port));
 		return;
 	}
 
-	/* New port: add it and mark its index in the bitmap */
-	pf->udp_ports[next_idx].index = port;
-	pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
-	pf->pending_udp_bitmap |= BIT_ULL(next_idx);
-	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
-}
-
-/**
- * i40e_del_vxlan_port - Get notifications about VXLAN ports that go away
- * @netdev: This physical port's netdev
- * @sa_family: Socket Family that VXLAN is notifying us about
- * @port: UDP port number that VXLAN stopped listening to
- **/
-static void i40e_del_vxlan_port(struct net_device *netdev,
-				sa_family_t sa_family, __be16 port)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	u8 idx;
-
-	idx = i40e_get_udp_port_idx(pf, port);
-
-	/* Check if port already exists */
-	if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-		/* if port exists, set it to 0 (mark for deletion)
-		 * and make it pending
-		 */
-		pf->udp_ports[idx].index = 0;
-		pf->pending_udp_bitmap |= BIT_ULL(idx);
-		pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
-	} else {
-		netdev_warn(netdev, "vxlan port %d was not found, not deleting\n",
-			    ntohs(port));
-	}
-}
-#endif
-
-#if IS_ENABLED(CONFIG_GENEVE)
-/**
- * i40e_add_geneve_port - Get notifications about GENEVE ports that come up
- * @netdev: This physical port's netdev
- * @sa_family: Socket Family that GENEVE is notifying us about
- * @port: New UDP port number that GENEVE started listening to
- **/
-static void i40e_add_geneve_port(struct net_device *netdev,
-				 sa_family_t sa_family, __be16 port)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	u8 next_idx;
-	u8 idx;
-
-	if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE))
-		return;
-
-	idx = i40e_get_udp_port_idx(pf, port);
-
-	/* Check if port already exists */
-	if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-		netdev_info(netdev, "udp port %d already offloaded\n",
-			    ntohs(port));
-		return;
-	}
-
-	/* Now check if there is space to add the new port */
-	next_idx = i40e_get_udp_port_idx(pf, 0);
-
-	if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-		netdev_info(netdev, "maximum number of UDP ports reached, not adding port %d\n",
-			    ntohs(port));
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE))
+			return;
+		pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE;
+		break;
+	default:
 		return;
 	}
 
 	/* New port: add it and mark its index in the bitmap */
 	pf->udp_ports[next_idx].index = port;
-	pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE;
 	pf->pending_udp_bitmap |= BIT_ULL(next_idx);
 	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
-
-	dev_info(&pf->pdev->dev, "adding geneve port %d\n", ntohs(port));
 }
 
 /**
- * i40e_del_geneve_port - Get notifications about GENEVE ports that go away
+ * i40e_udp_tunnel_del - Get notifications about UDP tunnel ports that go away
  * @netdev: This physical port's netdev
- * @sa_family: Socket Family that GENEVE is notifying us about
- * @port: UDP port number that GENEVE stopped listening to
+ * @ti: Tunnel endpoint information
  **/
-static void i40e_del_geneve_port(struct net_device *netdev,
-				 sa_family_t sa_family, __be16 port)
+static void i40e_udp_tunnel_del(struct net_device *netdev,
+				struct udp_tunnel_info *ti)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
+	__be16 port = ti->port;
 	u8 idx;
 
-	if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE))
-		return;
-
 	idx = i40e_get_udp_port_idx(pf, port);
 
 	/* Check if port already exists */
-	if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-		/* if port exists, set it to 0 (mark for deletion)
-		 * and make it pending
-		 */
-		pf->udp_ports[idx].index = 0;
-		pf->pending_udp_bitmap |= BIT_ULL(idx);
-		pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+	if (idx >= I40E_MAX_PF_UDP_OFFLOAD_PORTS)
+		goto not_found;
 
-		dev_info(&pf->pdev->dev, "deleting geneve port %d\n",
-			 ntohs(port));
-	} else {
-		netdev_warn(netdev, "geneve port %d was not found, not deleting\n",
-			    ntohs(port));
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_VXLAN)
+			goto not_found;
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_NGE)
+			goto not_found;
+		break;
+	default:
+		goto not_found;
 	}
+
+	/* if port exists, set it to 0 (mark for deletion)
+	 * and make it pending
+	 */
+	pf->udp_ports[idx].index = 0;
+	pf->pending_udp_bitmap |= BIT_ULL(idx);
+	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+
+	return;
+not_found:
+	netdev_warn(netdev, "UDP port %d was not found, not deleting\n",
+		    ntohs(port));
 }
-#endif
 
 static int i40e_get_phys_port_id(struct net_device *netdev,
 				 struct netdev_phys_item_id *ppid)
@@ -9033,14 +9047,8 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
 	.ndo_set_vf_spoofchk	= i40e_ndo_set_vf_spoofchk,
 	.ndo_set_vf_trust	= i40e_ndo_set_vf_trust,
-#if IS_ENABLED(CONFIG_VXLAN)
-	.ndo_add_vxlan_port	= i40e_add_vxlan_port,
-	.ndo_del_vxlan_port	= i40e_del_vxlan_port,
-#endif
-#if IS_ENABLED(CONFIG_GENEVE)
-	.ndo_add_geneve_port	= i40e_add_geneve_port,
-	.ndo_del_geneve_port	= i40e_del_geneve_port,
-#endif
+	.ndo_udp_tunnel_add	= i40e_udp_tunnel_add,
+	.ndo_udp_tunnel_del	= i40e_udp_tunnel_del,
 	.ndo_get_phys_port_id	= i40e_get_phys_port_id,
 	.ndo_fdb_add		= i40e_ndo_fdb_add,
 	.ndo_features_check	= i40e_features_check,
@@ -9056,7 +9064,6 @@ static const struct net_device_ops i40e_netdev_ops = {
  **/
 static int i40e_config_netdev(struct i40e_vsi *vsi)
 {
-	u8 brdcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_netdev_priv *np;
@@ -9120,18 +9127,10 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 		 * default a MAC-VLAN filter that accepts any tagged packet
 		 * which must be replaced by a normal filter.
 		 */
-		if (!i40e_rm_default_mac_filter(vsi, mac_addr)) {
-			spin_lock_bh(&vsi->mac_filter_list_lock);
-			i40e_add_filter(vsi, mac_addr,
-					I40E_VLAN_ANY, false, true);
-			spin_unlock_bh(&vsi->mac_filter_list_lock);
-		}
-	} else if ((pf->hw.aq.api_maj_ver > 1) ||
-		   ((pf->hw.aq.api_maj_ver == 1) &&
-		    (pf->hw.aq.api_min_ver > 4))) {
-		/* Supported in FW API version higher than 1.4 */
-		pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
-		pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+		i40e_rm_default_mac_filter(vsi, mac_addr);
+		spin_lock_bh(&vsi->mac_filter_list_lock);
+		i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, true);
+		spin_unlock_bh(&vsi->mac_filter_list_lock);
 	} else {
 		/* relate the VSI_VMDQ name to the VSI_MAIN name */
 		snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
@@ -9143,10 +9142,6 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 		spin_unlock_bh(&vsi->mac_filter_list_lock);
 	}
 
-	spin_lock_bh(&vsi->mac_filter_list_lock);
-	i40e_add_filter(vsi, brdcast, I40E_VLAN_ANY, false, false);
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
-
 	ether_addr_copy(netdev->dev_addr, mac_addr);
 	ether_addr_copy(netdev->perm_addr, mac_addr);
 
@@ -9224,8 +9219,7 @@ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
 static int i40e_add_vsi(struct i40e_vsi *vsi)
 {
 	int ret = -ENODEV;
-	u8 laa_macaddr[ETH_ALEN];
-	bool found_laa_mac_filter = false;
+	i40e_status aq_ret = 0;
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_vsi_context ctxt;
@@ -9413,42 +9407,29 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		vsi->seid = ctxt.seid;
 		vsi->id = ctxt.vsi_number;
 	}
+	/* Except FDIR VSI, for all othet VSI set the broadcast filter */
+	if (vsi->type != I40E_VSI_FDIR) {
+		aq_ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, true, NULL);
+		if (aq_ret) {
+			ret = i40e_aq_rc_to_posix(aq_ret,
+						  hw->aq.asq_last_status);
+			dev_info(&pf->pdev->dev,
+				 "set brdcast promisc failed, err %s, aq_err %s\n",
+				 i40e_stat_str(hw, aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		}
+	}
 
+	vsi->active_filters = 0;
+	clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state);
 	spin_lock_bh(&vsi->mac_filter_list_lock);
 	/* If macvlan filters already exist, force them to get loaded */
 	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-		f->changed = true;
+		f->state = I40E_FILTER_NEW;
 		f_count++;
-
-		/* Expected to have only one MAC filter entry for LAA in list */
-		if (f->is_laa && vsi->type == I40E_VSI_MAIN) {
-			ether_addr_copy(laa_macaddr, f->macaddr);
-			found_laa_mac_filter = true;
-		}
 	}
 	spin_unlock_bh(&vsi->mac_filter_list_lock);
 
-	if (found_laa_mac_filter) {
-		struct i40e_aqc_remove_macvlan_element_data element;
-
-		memset(&element, 0, sizeof(element));
-		ether_addr_copy(element.mac_addr, laa_macaddr);
-		element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
-		ret = i40e_aq_remove_macvlan(hw, vsi->seid,
-					     &element, 1, NULL);
-		if (ret) {
-			/* some older FW has a different default */
-			element.flags |=
-				       I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
-			i40e_aq_remove_macvlan(hw, vsi->seid,
-					       &element, 1, NULL);
-		}
-
-		i40e_aq_mac_address_write(hw,
-					  I40E_AQC_WRITE_TYPE_LAA_WOL,
-					  laa_macaddr, NULL);
-	}
-
 	if (f_count) {
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
 		pf->flags |= I40E_FLAG_FILTER_SYNC;
@@ -9659,6 +9640,8 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
 	pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
 	pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
 	i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
+	if (vsi->type == I40E_VSI_MAIN)
+		i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
 
 	/* assign it some queues */
 	ret = i40e_alloc_rings(vsi);
@@ -9683,44 +9666,6 @@ err_vsi:
 	return NULL;
 }
 
-/**
- * i40e_macaddr_init - explicitly write the mac address filters.
- *
- * @vsi: pointer to the vsi.
- * @macaddr: the MAC address
- *
- * This is needed when the macaddr has been obtained by other
- * means than the default, e.g., from Open Firmware or IDPROM.
- * Returns 0 on success, negative on failure
- **/
-static int i40e_macaddr_init(struct i40e_vsi *vsi, u8 *macaddr)
-{
-	int ret;
-	struct i40e_aqc_add_macvlan_element_data element;
-
-	ret = i40e_aq_mac_address_write(&vsi->back->hw,
-					I40E_AQC_WRITE_TYPE_LAA_WOL,
-					macaddr, NULL);
-	if (ret) {
-		dev_info(&vsi->back->pdev->dev,
-			 "Addr change for VSI failed: %d\n", ret);
-		return -EADDRNOTAVAIL;
-	}
-
-	memset(&element, 0, sizeof(element));
-	ether_addr_copy(element.mac_addr, macaddr);
-	element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH);
-	ret = i40e_aq_add_macvlan(&vsi->back->hw, vsi->seid, &element, 1, NULL);
-	if (ret) {
-		dev_info(&vsi->back->pdev->dev,
-			 "add filter failed err %s aq_err %s\n",
-			 i40e_stat_str(&vsi->back->hw, ret),
-			 i40e_aq_str(&vsi->back->hw,
-				     vsi->back->hw.aq.asq_last_status));
-	}
-	return ret;
-}
-
 /**
  * i40e_vsi_setup - Set up a VSI by a given type
  * @pf: board private structure
@@ -10133,14 +10078,14 @@ void i40e_veb_release(struct i40e_veb *veb)
 static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = veb->pf;
-	bool is_default = veb->pf->cur_promisc;
 	bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED);
 	int ret;
 
-	/* get a VEB from the hardware */
 	ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
-			      veb->enabled_tc, is_default,
+			      veb->enabled_tc, false,
 			      &veb->seid, enable_stats, NULL);
+
+	/* get a VEB from the hardware */
 	if (ret) {
 		dev_info(&pf->pdev->dev,
 			 "couldn't add VEB, err %s aq_err %s\n",
@@ -10689,12 +10634,8 @@ static void i40e_print_features(struct i40e_pf *pf)
 	}
 	if (pf->flags & I40E_FLAG_DCB_CAPABLE)
 		i += snprintf(&buf[i], REMAIN(i), " DCB");
-#if IS_ENABLED(CONFIG_VXLAN)
 	i += snprintf(&buf[i], REMAIN(i), " VxLAN");
-#endif
-#if IS_ENABLED(CONFIG_GENEVE)
 	i += snprintf(&buf[i], REMAIN(i), " Geneve");
-#endif
 	if (pf->flags & I40E_FLAG_PTP)
 		i += snprintf(&buf[i], REMAIN(i), " PTP");
 #ifdef I40E_FCOE
@@ -11525,6 +11466,7 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
 	struct i40e_hw *hw = &pf->hw;
+	int retval = 0;
 
 	set_bit(__I40E_SUSPENDED, &pf->state);
 	set_bit(__I40E_DOWN, &pf->state);
@@ -11536,10 +11478,16 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
 	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
 
+	i40e_stop_misc_vector(pf);
+
+	retval = pci_save_state(pdev);
+	if (retval)
+		return retval;
+
 	pci_wake_from_d3(pdev, pf->wol_en);
 	pci_set_power_state(pdev, PCI_D3hot);
 
-	return 0;
+	return retval;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 80403c6ee7f0..4660c5abc855 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -98,6 +98,8 @@ i40e_status i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
 				struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_clear_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+				      struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
 			bool qualified_modules, bool report_init,
 			struct i40e_aq_get_phy_abilities_resp *abilities,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 55f151fca1dc..df7ecc9578c9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -740,14 +740,12 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 	tx_ring->q_vector->tx.total_packets += total_packets;
 
 	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
-		unsigned int j = 0;
-
 		/* check to see if there are < 4 descriptors
 		 * waiting to be written back, then kick the hardware to force
 		 * them to be written back in case we stay in NAPI.
 		 * In this mode on X722 we do not enable Interrupt.
 		 */
-		j = i40e_get_tx_pending(tx_ring, false);
+		unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
 		if (budget &&
 		    ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
@@ -1280,8 +1278,8 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 				    union i40e_rx_desc *rx_desc)
 {
 	struct i40e_rx_ptype_decoded decoded;
-	bool ipv4, ipv6, tunnel = false;
 	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
 	u8 ptype;
 	u64 qword;
 
@@ -1336,19 +1334,23 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
 		return;
 
-	/* The hardware supported by this driver does not validate outer
-	 * checksums for tunneled VXLAN or GENEVE frames.  I don't agree
-	 * with it but the specification states that you "MAY validate", it
-	 * doesn't make it a hard requirement so if we have validated the
-	 * inner checksum report CHECKSUM_UNNECESSARY.
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
 	 */
-	if (decoded.inner_prot & (I40E_RX_PTYPE_INNER_PROT_TCP |
-				  I40E_RX_PTYPE_INNER_PROT_UDP |
-				  I40E_RX_PTYPE_INNER_PROT_SCTP))
-		tunnel = true;
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	skb->csum_level = tunnel ? 1 : 0;
+	if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case I40E_RX_PTYPE_INNER_PROT_TCP:
+	case I40E_RX_PTYPE_INNER_PROT_UDP:
+	case I40E_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		/* fall though */
+	default:
+		break;
+	}
 
 	return;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 1fcafcfa8f14..6fcbf764f32b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -665,6 +665,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		goto error_alloc_vsi_res;
 	}
 	if (type == I40E_VSI_SRIOV) {
+		u64 hena = i40e_pf_get_default_rss_hena(pf);
+
 		vf->lan_vsi_idx = vsi->idx;
 		vf->lan_vsi_id = vsi->id;
 		/* If the port VLAN has been configured and then the
@@ -687,6 +689,10 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 					vf->default_lan_addr.addr, vf->vf_id);
 		}
 		spin_unlock_bh(&vsi->mac_filter_list_lock);
+		i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id),
+				  (u32)hena);
+		i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id),
+				  (u32)(hena >> 32));
 	}
 
 	/* program mac filter */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 8f64204000fb..4db0c0326185 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -59,7 +59,6 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw)
 		case I40E_DEV_ID_1G_BASE_T_X722:
 		case I40E_DEV_ID_10G_BASE_T_X722:
 		case I40E_DEV_ID_SFP_I_X722:
-		case I40E_DEV_ID_QSFP_I_X722:
 			hw->mac.type = I40E_MAC_X722;
 			break;
 		case I40E_DEV_ID_X722_VF:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
index d34972bab09c..70235706915e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
@@ -45,7 +45,6 @@
 #define I40E_DEV_ID_1G_BASE_T_X722	0x37D1
 #define I40E_DEV_ID_10G_BASE_T_X722	0x37D2
 #define I40E_DEV_ID_SFP_I_X722		0x37D3
-#define I40E_DEV_ID_QSFP_I_X722		0x37D4
 #define I40E_DEV_ID_X722_VF		0x37CD
 #define I40E_DEV_ID_X722_VF_HV		0x37D9
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index be99189da925..a579193b2c21 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -259,13 +259,12 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 	tx_ring->q_vector->tx.total_packets += total_packets;
 
 	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
-		unsigned int j = 0;
 		/* check to see if there are < 4 descriptors
 		 * waiting to be written back, then kick the hardware to force
 		 * them to be written back in case we stay in NAPI.
 		 * In this mode on X722 we do not enable Interrupt.
 		 */
-		j = i40evf_get_tx_pending(tx_ring, false);
+		unsigned int j = i40evf_get_tx_pending(tx_ring, false);
 
 		if (budget &&
 		    ((j / (WB_STRIDE + 1)) == 0) && (j > 0) &&
@@ -752,8 +751,8 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 				    union i40e_rx_desc *rx_desc)
 {
 	struct i40e_rx_ptype_decoded decoded;
-	bool ipv4, ipv6, tunnel = false;
 	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
 	u8 ptype;
 	u64 qword;
 
@@ -808,19 +807,23 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
 		return;
 
-	/* The hardware supported by this driver does not validate outer
-	 * checksums for tunneled VXLAN or GENEVE frames.  I don't agree
-	 * with it but the specification states that you "MAY validate", it
-	 * doesn't make it a hard requirement so if we have validated the
-	 * inner checksum report CHECKSUM_UNNECESSARY.
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
 	 */
-	if (decoded.inner_prot & (I40E_RX_PTYPE_INNER_PROT_TCP |
-				  I40E_RX_PTYPE_INNER_PROT_UDP |
-				  I40E_RX_PTYPE_INNER_PROT_SCTP))
-		tunnel = true;
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	skb->csum_level = tunnel ? 1 : 0;
+	if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case I40E_RX_PTYPE_INNER_PROT_TCP:
+	case I40E_RX_PTYPE_INNER_PROT_UDP:
+	case I40E_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		/* fall though */
+	default:
+		break;
+	}
 
 	return;
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 16c552952860..600fb9c4a7f0 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -37,8 +37,8 @@ static const char i40evf_driver_string[] =
 #define DRV_KERN "-k"
 
 #define DRV_VERSION_MAJOR 1
-#define DRV_VERSION_MINOR 5
-#define DRV_VERSION_BUILD 10
+#define DRV_VERSION_MINOR 6
+#define DRV_VERSION_BUILD 11
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD) \
@@ -57,7 +57,9 @@ static const char i40evf_copyright[] =
  */
 static const struct pci_device_id i40evf_pci_tbl[] = {
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_VF), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_VF_HV), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF_HV), 0},
 	/* required last entry */
 	{0, }
 };
@@ -825,7 +827,7 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
 
 		ether_addr_copy(f->macaddr, macaddr);
 
-		list_add(&f->list, &adapter->mac_filter_list);
+		list_add_tail(&f->list, &adapter->mac_filter_list);
 		f->add = true;
 		adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 	}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index f13445691507..d76c221d4c8a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -434,6 +434,8 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 			ether_addr_copy(veal->list[i].addr, f->macaddr);
 			i++;
 			f->add = false;
+			if (i == count)
+				break;
 		}
 	}
 	if (!more)
@@ -497,6 +499,8 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 			i++;
 			list_del(&f->list);
 			kfree(f);
+			if (i == count)
+				break;
 		}
 	}
 	if (!more)
@@ -560,6 +564,8 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 			vvfl->vlan_id[i] = f->vlan;
 			i++;
 			f->add = false;
+			if (i == count)
+				break;
 		}
 	}
 	if (!more)
@@ -623,6 +629,8 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 			i++;
 			list_del(&f->list);
 			kfree(f);
+			if (i == count)
+				break;
 		}
 	}
 	if (!more)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index b9609afa5ca3..5387b3a96489 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -445,6 +445,7 @@ struct igb_adapter {
 	unsigned long ptp_tx_start;
 	unsigned long last_rx_ptp_check;
 	unsigned long last_rx_timestamp;
+	unsigned int ptp_flags;
 	spinlock_t tmreg_lock;
 	struct cyclecounter cc;
 	struct timecounter tc;
@@ -474,12 +475,15 @@ struct igb_adapter {
 	u16 eee_advert;
 };
 
+/* flags controlling PTP/1588 function */
+#define IGB_PTP_ENABLED		BIT(0)
+#define IGB_PTP_OVERFLOW_CHECK	BIT(1)
+
 #define IGB_FLAG_HAS_MSI		BIT(0)
 #define IGB_FLAG_DCA_ENABLED		BIT(1)
 #define IGB_FLAG_QUAD_PORT_A		BIT(2)
 #define IGB_FLAG_QUEUE_PAIRS		BIT(3)
 #define IGB_FLAG_DMAC			BIT(4)
-#define IGB_FLAG_PTP			BIT(5)
 #define IGB_FLAG_RSS_FIELD_IPV4_UDP	BIT(6)
 #define IGB_FLAG_RSS_FIELD_IPV6_UDP	BIT(7)
 #define IGB_FLAG_WOL_SUPPORTED		BIT(8)
@@ -546,6 +550,7 @@ void igb_set_fw_version(struct igb_adapter *);
 void igb_ptp_init(struct igb_adapter *adapter);
 void igb_ptp_stop(struct igb_adapter *adapter);
 void igb_ptp_reset(struct igb_adapter *adapter);
+void igb_ptp_suspend(struct igb_adapter *adapter);
 void igb_ptp_rx_hang(struct igb_adapter *adapter);
 void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
 void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va,
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ef3d642f5ff2..9bcba42abb91 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2027,7 +2027,8 @@ void igb_reset(struct igb_adapter *adapter)
 	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
 
 	/* Re-enable PTP, where applicable. */
-	igb_ptp_reset(adapter);
+	if (adapter->ptp_flags & IGB_PTP_ENABLED)
+		igb_ptp_reset(adapter);
 
 	igb_get_phy_info(hw);
 }
@@ -6855,12 +6856,12 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
  **/
 static bool igb_add_rx_frag(struct igb_ring *rx_ring,
 			    struct igb_rx_buffer *rx_buffer,
+			    unsigned int size,
 			    union e1000_adv_rx_desc *rx_desc,
 			    struct sk_buff *skb)
 {
 	struct page *page = rx_buffer->page;
 	unsigned char *va = page_address(page) + rx_buffer->page_offset;
-	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = IGB_RX_BUFSZ;
 #else
@@ -6912,6 +6913,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
 					   union e1000_adv_rx_desc *rx_desc,
 					   struct sk_buff *skb)
 {
+	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
 	struct igb_rx_buffer *rx_buffer;
 	struct page *page;
 
@@ -6947,11 +6949,11 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
 	dma_sync_single_range_for_cpu(rx_ring->dev,
 				      rx_buffer->dma,
 				      rx_buffer->page_offset,
-				      IGB_RX_BUFSZ,
+				      size,
 				      DMA_FROM_DEVICE);
 
 	/* pull page into skb */
-	if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+	if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
 		/* hand second half of page back to the ring */
 		igb_reuse_rx_page(rx_ring, rx_buffer);
 	} else {
@@ -7527,6 +7529,8 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
 	if (netif_running(netdev))
 		__igb_close(netdev, true);
 
+	igb_ptp_suspend(adapter);
+
 	igb_clear_interrupt_scheme(adapter);
 
 #ifdef CONFIG_PM
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index f097c5a8ab93..e61b647f5f2a 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -684,6 +684,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter)
 	u32 tsyncrxctl = rd32(E1000_TSYNCRXCTL);
 	unsigned long rx_event;
 
+	/* Other hardware uses per-packet timestamps */
 	if (hw->mac.type != e1000_82576)
 		return;
 
@@ -1042,6 +1043,13 @@ int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
 		-EFAULT : 0;
 }
 
+/**
+ * igb_ptp_init - Initialize PTP functionality
+ * @adapter: Board private structure
+ *
+ * This function is called at device probe to initialize the PTP
+ * functionality.
+ */
 void igb_ptp_init(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
@@ -1064,8 +1072,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->cc.mask = CYCLECOUNTER_MASK(64);
 		adapter->cc.mult = 1;
 		adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
-		/* Dial the nominal frequency. */
-		wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576);
+		adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
 		break;
 	case e1000_82580:
 	case e1000_i354:
@@ -1084,8 +1091,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->cc.mask = CYCLECOUNTER_MASK(IGB_NBITS_82580);
 		adapter->cc.mult = 1;
 		adapter->cc.shift = 0;
-		/* Enable the timer functions by clearing bit 31. */
-		wr32(E1000_TSAUXC, 0x0);
+		adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
 		break;
 	case e1000_i210:
 	case e1000_i211:
@@ -1110,44 +1116,24 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->ptp_caps.settime64 = igb_ptp_settime_i210;
 		adapter->ptp_caps.enable = igb_ptp_feature_enable_i210;
 		adapter->ptp_caps.verify = igb_ptp_verify_pin;
-		/* Enable the timer functions by clearing bit 31. */
-		wr32(E1000_TSAUXC, 0x0);
 		break;
 	default:
 		adapter->ptp_clock = NULL;
 		return;
 	}
 
-	wrfl();
-
 	spin_lock_init(&adapter->tmreg_lock);
 	INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
 
-	/* Initialize the clock and overflow work for devices that need it. */
-	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
-		struct timespec64 ts = ktime_to_timespec64(ktime_get_real());
-
-		igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
-	} else {
-		timecounter_init(&adapter->tc, &adapter->cc,
-				 ktime_to_ns(ktime_get_real()));
-
+	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
 		INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
 				  igb_ptp_overflow_check);
 
-		schedule_delayed_work(&adapter->ptp_overflow_work,
-				      IGB_SYSTIM_OVERFLOW_PERIOD);
-	}
-
-	/* Initialize the time sync interrupts for devices that support it. */
-	if (hw->mac.type >= e1000_82580) {
-		wr32(E1000_TSIM, TSYNC_INTERRUPTS);
-		wr32(E1000_IMS, E1000_IMS_TS);
-	}
-
 	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
 	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
 
+	igb_ptp_reset(adapter);
+
 	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
 						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
@@ -1156,32 +1142,24 @@ void igb_ptp_init(struct igb_adapter *adapter)
 	} else {
 		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
 			 adapter->netdev->name);
-		adapter->flags |= IGB_FLAG_PTP;
+		adapter->ptp_flags |= IGB_PTP_ENABLED;
 	}
 }
 
 /**
- * igb_ptp_stop - Disable PTP device and stop the overflow check.
- * @adapter: Board private structure.
+ * igb_ptp_suspend - Disable PTP work items and prepare for suspend
+ * @adapter: Board private structure
  *
- * This function stops the PTP support and cancels the delayed work.
- **/
-void igb_ptp_stop(struct igb_adapter *adapter)
+ * This function stops the overflow check work and PTP Tx timestamp work, and
+ * will prepare the device for OS suspend.
+ */
+void igb_ptp_suspend(struct igb_adapter *adapter)
 {
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-	case e1000_82580:
-	case e1000_i354:
-	case e1000_i350:
-		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
-		break;
-	case e1000_i210:
-	case e1000_i211:
-		/* No delayed work to cancel. */
-		break;
-	default:
+	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
 		return;
-	}
+
+	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
 
 	cancel_work_sync(&adapter->ptp_tx_work);
 	if (adapter->ptp_tx_skb) {
@@ -1189,12 +1167,23 @@ void igb_ptp_stop(struct igb_adapter *adapter)
 		adapter->ptp_tx_skb = NULL;
 		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
 	}
+}
+
+/**
+ * igb_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igb_ptp_stop(struct igb_adapter *adapter)
+{
+	igb_ptp_suspend(adapter);
 
 	if (adapter->ptp_clock) {
 		ptp_clock_unregister(adapter->ptp_clock);
 		dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
 			 adapter->netdev->name);
-		adapter->flags &= ~IGB_FLAG_PTP;
+		adapter->ptp_flags &= ~IGB_PTP_ENABLED;
 	}
 }
 
@@ -1209,9 +1198,6 @@ void igb_ptp_reset(struct igb_adapter *adapter)
 	struct e1000_hw *hw = &adapter->hw;
 	unsigned long flags;
 
-	if (!(adapter->flags & IGB_FLAG_PTP))
-		return;
-
 	/* reset the tstamp_config */
 	igb_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
 
@@ -1248,4 +1234,10 @@ void igb_ptp_reset(struct igb_adapter *adapter)
 	}
 out:
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	wrfl();
+
+	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+		schedule_delayed_work(&adapter->ptp_overflow_work,
+				      IGB_SYSTIM_OVERFLOW_PERIOD);
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 9f2db1855412..9475ff9055aa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -804,8 +804,6 @@ struct ixgbe_adapter {
 
 #define IXGBE_RSS_KEY_SIZE     40  /* size of RSS Hash Key in bytes */
 	u32 rss_key[IXGBE_RSS_KEY_SIZE / sizeof(u32)];
-
-	bool need_crosstalk_fix;
 };
 
 static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 47afed74a54d..63b25006ac90 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1813,9 +1813,6 @@ static s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw)
 	/* We need to run link autotry after the driver loads */
 	hw->mac.autotry_restart = true;
 
-	if (ret_val)
-		return ret_val;
-
 	return ixgbe_verify_fw_version_82599(hw);
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 902d2061ce73..b4217f30e89c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -277,6 +277,7 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
 {
 	s32 ret_val;
 	u32 ctrl_ext;
+	u16 device_caps;
 
 	/* Set the media type */
 	hw->phy.media_type = hw->mac.ops.get_media_type(hw);
@@ -301,6 +302,22 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
 	if (ret_val)
 		return ret_val;
 
+	/* Cashe bit indicating need for crosstalk fix */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		hw->mac.ops.get_device_caps(hw, &device_caps);
+		if (device_caps & IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR)
+			hw->need_crosstalk_fix = false;
+		else
+			hw->need_crosstalk_fix = true;
+		break;
+	default:
+		hw->need_crosstalk_fix = false;
+		break;
+	}
+
 	/* Clear adapter stopped flag */
 	hw->adapter_stopped = false;
 
@@ -763,6 +780,9 @@ s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
 {
 	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
 
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
 	/* To turn on the LED, set mode to ON. */
 	led_reg &= ~IXGBE_LED_MODE_MASK(index);
 	led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index);
@@ -781,6 +801,9 @@ s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
 {
 	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
 
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
 	/* To turn off the LED, set mode to OFF. */
 	led_reg &= ~IXGBE_LED_MODE_MASK(index);
 	led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index);
@@ -2657,7 +2680,7 @@ s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw)
  **/
 s32 ixgbe_enable_rx_buff_generic(struct ixgbe_hw *hw)
 {
-	int secrxreg;
+	u32 secrxreg;
 
 	secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
 	secrxreg &= ~IXGBE_SECRXCTRL_RX_DIS;
@@ -2698,6 +2721,9 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
 	bool locked = false;
 	s32 ret_val;
 
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
 	/*
 	 * Link must be up to auto-blink the LEDs;
 	 * Force it if link is down.
@@ -2741,6 +2767,9 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
 	bool locked = false;
 	s32 ret_val;
 
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
 	ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg);
 	if (ret_val)
 		return ret_val;
@@ -3187,6 +3216,31 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
 	return 0;
 }
 
+/**
+ *  ixgbe_need_crosstalk_fix - Determine if we need to do cross talk fix
+ *  @hw: pointer to hardware structure
+ *
+ *  Contains the logic to identify if we need to verify link for the
+ *  crosstalk fix
+ **/
+static bool ixgbe_need_crosstalk_fix(struct ixgbe_hw *hw)
+{
+	/* Does FW say we need the fix */
+	if (!hw->need_crosstalk_fix)
+		return false;
+
+	/* Only consider SFP+ PHYs i.e. media type fiber */
+	switch (hw->mac.ops.get_media_type(hw)) {
+	case ixgbe_media_type_fiber:
+	case ixgbe_media_type_fiber_qsfp:
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 /**
  *  ixgbe_check_mac_link_generic - Determine link and speed status
  *  @hw: pointer to hardware structure
@@ -3202,6 +3256,35 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
 	u32 links_reg, links_orig;
 	u32 i;
 
+	/* If Crosstalk fix enabled do the sanity check of making sure
+	 * the SFP+ cage is full.
+	 */
+	if (ixgbe_need_crosstalk_fix(hw)) {
+		u32 sfp_cage_full;
+
+		switch (hw->mac.type) {
+		case ixgbe_mac_82599EB:
+			sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
+					IXGBE_ESDP_SDP2;
+			break;
+		case ixgbe_mac_X550EM_x:
+		case ixgbe_mac_x550em_a:
+			sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
+					IXGBE_ESDP_SDP0;
+			break;
+		default:
+			/* sanity check - No SFP+ devices here */
+			sfp_cage_full = false;
+			break;
+		}
+
+		if (!sfp_cage_full) {
+			*link_up = false;
+			*speed = IXGBE_LINK_SPEED_UNKNOWN;
+			return 0;
+		}
+	}
+
 	/* clear the old state */
 	links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 59b771b9b354..0d7209eb5abf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2204,11 +2204,11 @@ static int ixgbe_set_phys_id(struct net_device *netdev,
 		return 2;
 
 	case ETHTOOL_ID_ON:
-		hw->mac.ops.led_on(hw, IXGBE_LED_ON);
+		hw->mac.ops.led_on(hw, hw->bus.func);
 		break;
 
 	case ETHTOOL_ID_OFF:
-		hw->mac.ops.led_off(hw, IXGBE_LED_ON);
+		hw->mac.ops.led_off(hw, hw->bus.func);
 		break;
 
 	case ETHTOOL_ID_INACTIVE:
@@ -2991,10 +2991,15 @@ static int ixgbe_get_ts_info(struct net_device *dev,
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
+	/* we always support timestamping disabled */
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 	case ixgbe_mac_x550em_a:
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
+		/* fallthrough */
 	case ixgbe_mac_X540:
 	case ixgbe_mac_82599EB:
 		info->so_timestamping =
@@ -3014,8 +3019,7 @@ static int ixgbe_get_ts_info(struct net_device *dev,
 			BIT(HWTSTAMP_TX_OFF) |
 			BIT(HWTSTAMP_TX_ON);
 
-		info->rx_filters =
-			BIT(HWTSTAMP_FILTER_NONE) |
+		info->rx_filters |=
 			BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
 			BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
 			BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 088c47cf27d9..7871f538f0ad 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -50,7 +50,7 @@
 #include <linux/if_bridge.h>
 #include <linux/prefetch.h>
 #include <scsi/fc/fc_fcoe.h>
-#include <net/vxlan.h>
+#include <net/udp_tunnel.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
@@ -2887,7 +2887,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
 	if (!test_bit(__IXGBE_DOWN, &adapter->state))
 		ixgbe_irq_enable_queues(adapter, BIT_ULL(q_vector->v_idx));
 
-	return 0;
+	return min(work_done, budget - 1);
 }
 
 /**
@@ -3084,7 +3084,7 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter)
 		free_irq(entry->vector, q_vector);
 	}
 
-	free_irq(adapter->msix_entries[vector++].vector, adapter);
+	free_irq(adapter->msix_entries[vector].vector, adapter);
 }
 
 /**
@@ -5625,7 +5625,6 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	struct pci_dev *pdev = adapter->pdev;
 	unsigned int rss, fdir;
 	u32 fwsm;
-	u16 device_caps;
 	int i;
 
 	/* PCI config space info */
@@ -5722,9 +5721,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
 #ifdef CONFIG_IXGBE_DCA
 		adapter->flags &= ~IXGBE_FLAG_DCA_CAPABLE;
 #endif
-#ifdef CONFIG_IXGBE_VXLAN
 		adapter->flags |= IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE;
-#endif
 		break;
 	default:
 		break;
@@ -5773,22 +5770,6 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	adapter->tx_ring_count = IXGBE_DEFAULT_TXD;
 	adapter->rx_ring_count = IXGBE_DEFAULT_RXD;
 
-	/* Cache bit indicating need for crosstalk fix */
-	switch (hw->mac.type) {
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X550EM_x:
-	case ixgbe_mac_x550em_a:
-		hw->mac.ops.get_device_caps(hw, &device_caps);
-		if (device_caps & IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR)
-			adapter->need_crosstalk_fix = false;
-		else
-			adapter->need_crosstalk_fix = true;
-		break;
-	default:
-		adapter->need_crosstalk_fix = false;
-		break;
-	}
-
 	/* set default work limits */
 	adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
 
@@ -6158,9 +6139,7 @@ int ixgbe_open(struct net_device *netdev)
 	ixgbe_up_complete(adapter);
 
 	ixgbe_clear_vxlan_port(adapter);
-#ifdef CONFIG_IXGBE_VXLAN
-	vxlan_get_rx_port(netdev);
-#endif
+	udp_tunnel_get_rx_info(netdev);
 
 	return 0;
 
@@ -6711,18 +6690,6 @@ static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter)
 		link_up = true;
 	}
 
-	/* If Crosstalk fix enabled do the sanity check of making sure
-	 * the SFP+ cage is empty.
-	 */
-	if (adapter->need_crosstalk_fix) {
-		u32 sfp_cage_full;
-
-		sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
-				IXGBE_ESDP_SDP2;
-		if (ixgbe_is_sfp(hw) && link_up && !sfp_cage_full)
-			link_up = false;
-	}
-
 	if (adapter->ixgbe_ieee_pfc)
 		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
 
@@ -7069,16 +7036,6 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	s32 err;
 
-	/* If crosstalk fix enabled verify the SFP+ cage is full */
-	if (adapter->need_crosstalk_fix) {
-		u32 sfp_cage_full;
-
-		sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
-				IXGBE_ESDP_SDP2;
-		if (!sfp_cage_full)
-			return;
-	}
-
 	/* not searching for SFP so there is nothing to do here */
 	if (!(adapter->flags2 & IXGBE_FLAG2_SEARCH_FOR_SFP) &&
 	    !(adapter->flags2 & IXGBE_FLAG2_SFP_NEEDS_RESET))
@@ -7262,14 +7219,12 @@ static void ixgbe_service_task(struct work_struct *work)
 		ixgbe_service_event_complete(adapter);
 		return;
 	}
-#ifdef CONFIG_IXGBE_VXLAN
-	rtnl_lock();
 	if (adapter->flags2 & IXGBE_FLAG2_VXLAN_REREG_NEEDED) {
+		rtnl_lock();
 		adapter->flags2 &= ~IXGBE_FLAG2_VXLAN_REREG_NEEDED;
-		vxlan_get_rx_port(adapter->netdev);
+		udp_tunnel_get_rx_info(adapter->netdev);
+		rtnl_unlock();
 	}
-	rtnl_unlock();
-#endif /* CONFIG_IXGBE_VXLAN */
 	ixgbe_reset_subtask(adapter);
 	ixgbe_phy_interrupt_subtask(adapter);
 	ixgbe_sfp_detection_subtask(adapter);
@@ -7697,7 +7652,6 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 	/* snag network header to get L4 type and address */
 	skb = first->skb;
 	hdr.network = skb_network_header(skb);
-#ifdef CONFIG_IXGBE_VXLAN
 	if (skb->encapsulation &&
 	    first->protocol == htons(ETH_P_IP) &&
 	    hdr.ipv4->protocol != IPPROTO_UDP) {
@@ -7708,7 +7662,6 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 		    udp_hdr(skb)->dest == adapter->vxlan_port)
 			hdr.network = skb_inner_network_header(skb);
 	}
-#endif /* CONFIG_IXGBE_VXLAN */
 
 	/* Currently only IPv4/IPv6 with TCP is supported */
 	switch (hdr.ipv4->version) {
@@ -8308,14 +8261,53 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
 			       struct tc_cls_u32_offload *cls)
 {
+	u32 hdl = cls->knode.handle;
 	u32 uhtid = TC_U32_USERHTID(cls->knode.handle);
-	u32 loc;
-	int err;
+	u32 loc = cls->knode.handle & 0xfffff;
+	int err = 0, i, j;
+	struct ixgbe_jump_table *jump = NULL;
+
+	if (loc > IXGBE_MAX_HW_ENTRIES)
+		return -EINVAL;
 
 	if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE))
 		return -EINVAL;
 
-	loc = cls->knode.handle & 0xfffff;
+	/* Clear this filter in the link data it is associated with */
+	if (uhtid != 0x800) {
+		jump = adapter->jump_tables[uhtid];
+		if (!jump)
+			return -EINVAL;
+		if (!test_bit(loc - 1, jump->child_loc_map))
+			return -EINVAL;
+		clear_bit(loc - 1, jump->child_loc_map);
+	}
+
+	/* Check if the filter being deleted is a link */
+	for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
+		jump = adapter->jump_tables[i];
+		if (jump && jump->link_hdl == hdl) {
+			/* Delete filters in the hardware in the child hash
+			 * table associated with this link
+			 */
+			for (j = 0; j < IXGBE_MAX_HW_ENTRIES; j++) {
+				if (!test_bit(j, jump->child_loc_map))
+					continue;
+				spin_lock(&adapter->fdir_perfect_lock);
+				err = ixgbe_update_ethtool_fdir_entry(adapter,
+								      NULL,
+								      j + 1);
+				spin_unlock(&adapter->fdir_perfect_lock);
+				clear_bit(j, jump->child_loc_map);
+			}
+			/* Remove resources for this link */
+			kfree(jump->input);
+			kfree(jump->mask);
+			kfree(jump);
+			adapter->jump_tables[i] = NULL;
+			return err;
+		}
+	}
 
 	spin_lock(&adapter->fdir_perfect_lock);
 	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, loc);
@@ -8549,6 +8541,18 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 		if (!test_bit(link_uhtid - 1, &adapter->tables))
 			return err;
 
+		/* Multiple filters as links to the same hash table are not
+		 * supported. To add a new filter with the same next header
+		 * but different match/jump conditions, create a new hash table
+		 * and link to it.
+		 */
+		if (adapter->jump_tables[link_uhtid] &&
+		    (adapter->jump_tables[link_uhtid])->link_hdl) {
+			e_err(drv, "Link filter exists for link: %x\n",
+			      link_uhtid);
+			return err;
+		}
+
 		for (i = 0; nexthdr[i].jump; i++) {
 			if (nexthdr[i].o != cls->knode.sel->offoff ||
 			    nexthdr[i].s != cls->knode.sel->offshift ||
@@ -8570,6 +8574,8 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 			}
 			jump->input = input;
 			jump->mask = mask;
+			jump->link_hdl = cls->knode.handle;
+
 			err = ixgbe_clsu32_build_input(input, mask, cls,
 						       field_ptr, &nexthdr[i]);
 			if (!err) {
@@ -8597,6 +8603,20 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 		if ((adapter->jump_tables[uhtid])->mask)
 			memcpy(mask, (adapter->jump_tables[uhtid])->mask,
 			       sizeof(*mask));
+
+		/* Lookup in all child hash tables if this location is already
+		 * filled with a filter
+		 */
+		for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
+			struct ixgbe_jump_table *link = adapter->jump_tables[i];
+
+			if (link && (test_bit(loc - 1, link->child_loc_map))) {
+				e_err(drv, "Filter exists in location: %x\n",
+				      loc);
+				err = -EINVAL;
+				goto err_out;
+			}
+		}
 	}
 	err = ixgbe_clsu32_build_input(input, mask, cls, field_ptr, NULL);
 	if (err)
@@ -8628,6 +8648,9 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 		ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
 	spin_unlock(&adapter->fdir_perfect_lock);
 
+	if ((uhtid != 0x800) && (adapter->jump_tables[uhtid]))
+		set_bit(loc - 1, (adapter->jump_tables[uhtid])->child_loc_map);
+
 	kfree(mask);
 	return err;
 err_out_w_lock:
@@ -8770,14 +8793,12 @@ static int ixgbe_set_features(struct net_device *netdev,
 
 	netdev->features = features;
 
-#ifdef CONFIG_IXGBE_VXLAN
 	if ((adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) {
 		if (features & NETIF_F_RXCSUM)
 			adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED;
 		else
 			ixgbe_clear_vxlan_port(adapter);
 	}
-#endif /* CONFIG_IXGBE_VXLAN */
 
 	if (need_reset)
 		ixgbe_do_reset(netdev);
@@ -8788,23 +8809,25 @@ static int ixgbe_set_features(struct net_device *netdev,
 	return 0;
 }
 
-#ifdef CONFIG_IXGBE_VXLAN
 /**
  * ixgbe_add_vxlan_port - Get notifications about VXLAN ports that come up
  * @dev: The port's netdev
- * @sa_family: Socket Family that VXLAN is notifiying us about
- * @port: New UDP port number that VXLAN started listening to
+ * @ti: Tunnel endpoint information
  **/
-static void ixgbe_add_vxlan_port(struct net_device *dev, sa_family_t sa_family,
-				 __be16 port)
+static void ixgbe_add_vxlan_port(struct net_device *dev,
+				 struct udp_tunnel_info *ti)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_hw *hw = &adapter->hw;
+	__be16 port = ti->port;
 
-	if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 		return;
 
-	if (sa_family == AF_INET6)
+	if (ti->sa_family != AF_INET)
+		return;
+
+	if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
 		return;
 
 	if (adapter->vxlan_port == port)
@@ -8824,30 +8847,31 @@ static void ixgbe_add_vxlan_port(struct net_device *dev, sa_family_t sa_family,
 /**
  * ixgbe_del_vxlan_port - Get notifications about VXLAN ports that go away
  * @dev: The port's netdev
- * @sa_family: Socket Family that VXLAN is notifying us about
- * @port: UDP port number that VXLAN stopped listening to
+ * @ti: Tunnel endpoint information
  **/
-static void ixgbe_del_vxlan_port(struct net_device *dev, sa_family_t sa_family,
-				 __be16 port)
+static void ixgbe_del_vxlan_port(struct net_device *dev,
+				 struct udp_tunnel_info *ti)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
-	if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 		return;
 
-	if (sa_family == AF_INET6)
+	if (ti->sa_family != AF_INET)
 		return;
 
-	if (adapter->vxlan_port != port) {
+	if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
+		return;
+
+	if (adapter->vxlan_port != ti->port) {
 		netdev_info(dev, "Port %d was not found, not deleting\n",
-			    ntohs(port));
+			    ntohs(ti->port));
 		return;
 	}
 
 	ixgbe_clear_vxlan_port(adapter);
 	adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED;
 }
-#endif /* CONFIG_IXGBE_VXLAN */
 
 static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			     struct net_device *dev,
@@ -9160,10 +9184,8 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
 	.ndo_dfwd_add_station	= ixgbe_fwd_add,
 	.ndo_dfwd_del_station	= ixgbe_fwd_del,
-#ifdef CONFIG_IXGBE_VXLAN
-	.ndo_add_vxlan_port	= ixgbe_add_vxlan_port,
-	.ndo_del_vxlan_port	= ixgbe_del_vxlan_port,
-#endif /* CONFIG_IXGBE_VXLAN */
+	.ndo_udp_tunnel_add	= ixgbe_add_vxlan_port,
+	.ndo_udp_tunnel_del	= ixgbe_del_vxlan_port,
 	.ndo_features_check	= ixgbe_features_check,
 };
 
@@ -10051,6 +10073,7 @@ static int __init ixgbe_init_module(void)
 
 	ret = pci_register_driver(&ixgbe_driver);
 	if (ret) {
+		destroy_workqueue(ixgbe_wq);
 		ixgbe_dbg_exit();
 		return ret;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
index a8bed3d887f7..538a1c5475b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
@@ -42,8 +42,12 @@ struct ixgbe_jump_table {
 	struct ixgbe_mat_field *mat;
 	struct ixgbe_fdir_filter *input;
 	union ixgbe_atr_input *mask;
+	u32 link_hdl;
+	unsigned long child_loc_map[32];
 };
 
+#define IXGBE_MAX_HW_ENTRIES 2045
+
 static inline int ixgbe_mat_prgm_sip(struct ixgbe_fdir_filter *input,
 				     union ixgbe_atr_input *mask,
 				     u32 val, u32 m)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index c5caacdd193d..8618599dfd6f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -954,6 +954,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
 			struct ixgbe_hw *hw = &adapter->hw;
 
 			hw->mac.ops.set_mac_anti_spoofing(hw, false, vf);
+			hw->mac.ops.set_vlan_anti_spoofing(hw, false, vf);
 		}
 	}
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index da3d8358fee0..1248a9936f7a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3525,6 +3525,7 @@ struct ixgbe_hw {
 	bool				force_full_reset;
 	bool				allow_unsupported_sfp;
 	bool				wol_enabled;
+	bool				need_crosstalk_fix;
 };
 
 struct ixgbe_info {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 19b75cd98682..4716ca499e67 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1618,6 +1618,8 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
 {
 	struct ixgbe_mac_info *mac = &hw->mac;
 
+	mac->ops.setup_fc = ixgbe_setup_fc_x550em;
+
 	switch (mac->ops.get_media_type(hw)) {
 	case ixgbe_media_type_fiber:
 		/* CS4227 does not support autoneg, so disable the laser control
@@ -1627,7 +1629,6 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
 		mac->ops.enable_tx_laser = NULL;
 		mac->ops.flap_tx_laser = NULL;
 		mac->ops.setup_link = ixgbe_setup_mac_link_multispeed_fiber;
-		mac->ops.setup_fc = ixgbe_setup_fc_x550em;
 		switch (hw->device_id) {
 		case IXGBE_DEV_ID_X550EM_A_SFP_N:
 			mac->ops.setup_mac_link = ixgbe_setup_mac_link_sfp_n;
@@ -1655,7 +1656,6 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
 			mac->ops.setup_link = ixgbe_setup_sgmii;
 		break;
 	default:
-		mac->ops.setup_fc = ixgbe_setup_fc_x550em;
 		break;
 	}
 }
diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index ae09d60e7b67..8617cae2f801 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -32,6 +32,7 @@
 #define IXGBE_DEV_ID_X540_VF		0x1515
 #define IXGBE_DEV_ID_X550_VF		0x1565
 #define IXGBE_DEV_ID_X550EM_X_VF	0x15A8
+#define IXGBE_DEV_ID_X550EM_A_VF	0x15C5
 
 #define IXGBE_DEV_ID_82599_VF_HV	0x152E
 #define IXGBE_DEV_ID_X540_VF_HV		0x1530
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index d5944c391cbb..be52f597688b 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -457,6 +457,7 @@ enum ixgbevf_boards {
 	board_X550_vf_hv,
 	board_X550EM_x_vf,
 	board_X550EM_x_vf_hv,
+	board_x550em_a_vf,
 };
 
 enum ixgbevf_xcast_modes {
@@ -470,6 +471,7 @@ extern const struct ixgbevf_info ixgbevf_X540_vf_info;
 extern const struct ixgbevf_info ixgbevf_X550_vf_info;
 extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_info;
 extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops;
+extern const struct ixgbevf_info ixgbevf_x550em_a_vf_info;
 
 extern const struct ixgbevf_info ixgbevf_82599_vf_hv_info;
 extern const struct ixgbevf_info ixgbevf_X540_vf_hv_info;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index acc24010cfe0..d9d6616f02a4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -56,7 +56,7 @@ const char ixgbevf_driver_name[] = "ixgbevf";
 static const char ixgbevf_driver_string[] =
 	"Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver";
 
-#define DRV_VERSION "2.12.1-k"
+#define DRV_VERSION "3.2.2-k"
 const char ixgbevf_driver_version[] = DRV_VERSION;
 static char ixgbevf_copyright[] =
 	"Copyright (c) 2009 - 2015 Intel Corporation.";
@@ -70,6 +70,7 @@ static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
 	[board_X550_vf_hv]	= &ixgbevf_X550_vf_hv_info,
 	[board_X550EM_x_vf]	= &ixgbevf_X550EM_x_vf_info,
 	[board_X550EM_x_vf_hv]	= &ixgbevf_X550EM_x_vf_hv_info,
+	[board_x550em_a_vf]	= &ixgbevf_x550em_a_vf_info,
 };
 
 /* ixgbevf_pci_tbl - PCI Device ID Table
@@ -89,6 +90,7 @@ static const struct pci_device_id ixgbevf_pci_tbl[] = {
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550_VF_HV), board_X550_vf_hv },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV), board_X550EM_x_vf_hv},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_VF), board_x550em_a_vf },
 	/* required last entry */
 	{0, }
 };
@@ -1800,16 +1802,19 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
  **/
 static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 {
-	int i;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct net_device *netdev = adapter->netdev;
+	int i, ret;
 
 	ixgbevf_setup_psrtype(adapter);
 	if (hw->mac.type >= ixgbe_mac_X550_vf)
 		ixgbevf_setup_vfmrqc(adapter);
 
 	/* notify the PF of our intent to use this size of frame */
-	hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
+	ret = hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
+	if (ret)
+		dev_err(&adapter->pdev->dev,
+			"Failed to set MTU at %d\n", netdev->mtu);
 
 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
 	 * the Base and Length of the Rx Descriptor Ring
@@ -2772,12 +2777,15 @@ static void ixgbevf_reset_subtask(struct ixgbevf_adapter *adapter)
 
 	/* If we're already down or resetting, just bail */
 	if (test_bit(__IXGBEVF_DOWN, &adapter->state) ||
+	    test_bit(__IXGBEVF_REMOVING, &adapter->state) ||
 	    test_bit(__IXGBEVF_RESETTING, &adapter->state))
 		return;
 
 	adapter->tx_timeout_count++;
 
+	rtnl_lock();
 	ixgbevf_reinit_locked(adapter);
+	rtnl_unlock();
 }
 
 /**
@@ -3732,6 +3740,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	struct ixgbe_hw *hw = &adapter->hw;
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
 	int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE;
+	int ret;
 
 	switch (adapter->hw.api_version) {
 	case ixgbe_mbox_api_11:
@@ -3748,14 +3757,17 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	if ((new_mtu < 68) || (max_frame > max_possible_frame))
 		return -EINVAL;
 
+	/* notify the PF of our intent to use this size of frame */
+	ret = hw->mac.ops.set_rlpml(hw, max_frame);
+	if (ret)
+		return -EINVAL;
+
 	hw_dbg(hw, "changing MTU from %d to %d\n",
 	       netdev->mtu, new_mtu);
+
 	/* must set new MTU before calling down or up */
 	netdev->mtu = new_mtu;
 
-	/* notify the PF of our intent to use this size of frame */
-	hw->mac.ops.set_rlpml(hw, max_frame);
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c
index 61a80da8b6f0..2819abc454c7 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.c
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c
@@ -85,7 +85,7 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw)
 static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -IXGBE_ERR_MBX;
+	s32 ret_val = IXGBE_ERR_MBX;
 
 	if (!mbx->ops.read)
 		goto out;
@@ -111,7 +111,7 @@ out:
 static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -IXGBE_ERR_MBX;
+	s32 ret_val = IXGBE_ERR_MBX;
 
 	/* exit if either we can't write or there isn't a defined timeout */
 	if (!mbx->ops.write || !mbx->timeout)
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index e670d3b19c3c..a52f70ec42b6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -33,6 +33,18 @@
  */
 #define IXGBE_HV_RESET_OFFSET           0x201
 
+static inline s32 ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, u32 *msg,
+					     u32 *retmsg, u16 size)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	s32 retval = mbx->ops.write_posted(hw, msg, size);
+
+	if (retval)
+		return retval;
+
+	return mbx->ops.read_posted(hw, retmsg, size);
+}
+
 /**
  *  ixgbevf_start_hw_vf - Prepare hardware for Tx/Rx
  *  @hw: pointer to hardware structure
@@ -255,8 +267,7 @@ static s32 ixgbevf_get_mac_addr_vf(struct ixgbe_hw *hw, u8 *mac_addr)
 
 static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
 {
-	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	u32 msgbuf[3];
+	u32 msgbuf[3], msgbuf_chk;
 	u8 *msg_addr = (u8 *)(&msgbuf[1]);
 	s32 ret_val;
 
@@ -268,19 +279,18 @@ static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
 	 */
 	msgbuf[0] |= index << IXGBE_VT_MSGINFO_SHIFT;
 	msgbuf[0] |= IXGBE_VF_SET_MACVLAN;
+	msgbuf_chk = msgbuf[0];
+
 	if (addr)
 		ether_addr_copy(msg_addr, addr);
-	ret_val = mbx->ops.write_posted(hw, msgbuf, 3);
 
-	if (!ret_val)
-		ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3);
+	if (!ret_val) {
+		msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
-	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
-
-	if (!ret_val)
-		if (msgbuf[0] ==
-		    (IXGBE_VF_SET_MACVLAN | IXGBE_VT_MSGTYPE_NACK))
-			ret_val = -ENOMEM;
+		if (msgbuf[0] == (msgbuf_chk | IXGBE_VT_MSGTYPE_NACK))
+			return -ENOMEM;
+	}
 
 	return ret_val;
 }
@@ -423,7 +433,6 @@ int ixgbevf_get_rss_key_locked(struct ixgbe_hw *hw, u8 *rss_key)
 static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
 			      u32 vmdq)
 {
-	struct ixgbe_mbx_info *mbx = &hw->mbx;
 	u32 msgbuf[3];
 	u8 *msg_addr = (u8 *)(&msgbuf[1]);
 	s32 ret_val;
@@ -431,10 +440,8 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
 	memset(msgbuf, 0, sizeof(msgbuf));
 	msgbuf[0] = IXGBE_VF_SET_MAC_ADDR;
 	ether_addr_copy(msg_addr, addr);
-	ret_val = mbx->ops.write_posted(hw, msgbuf, 3);
 
-	if (!ret_val)
-		ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
 
 	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -468,17 +475,6 @@ static s32 ixgbevf_hv_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
 	return -EOPNOTSUPP;
 }
 
-static void ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw,
-				       u32 *msg, u16 size)
-{
-	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	u32 retmsg[IXGBE_VFMAILBOX_SIZE];
-	s32 retval = mbx->ops.write_posted(hw, msg, size);
-
-	if (!retval)
-		mbx->ops.read_posted(hw, retmsg, size);
-}
-
 /**
  *  ixgbevf_update_mc_addr_list_vf - Update Multicast addresses
  *  @hw: pointer to the HW structure
@@ -519,7 +515,7 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 		vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr);
 	}
 
-	ixgbevf_write_msg_read_ack(hw, msgbuf, IXGBE_VFMAILBOX_SIZE);
+	ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, IXGBE_VFMAILBOX_SIZE);
 
 	return 0;
 }
@@ -542,7 +538,6 @@ static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw,
  **/
 static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 {
-	struct ixgbe_mbx_info *mbx = &hw->mbx;
 	u32 msgbuf[2];
 	s32 err;
 
@@ -556,11 +551,7 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 	msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE;
 	msgbuf[1] = xcast_mode;
 
-	err = mbx->ops.write_posted(hw, msgbuf, 2);
-	if (err)
-		return err;
-
-	err = mbx->ops.read_posted(hw, msgbuf, 2);
+	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
 	if (err)
 		return err;
 
@@ -589,7 +580,6 @@ static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
 			       bool vlan_on)
 {
-	struct ixgbe_mbx_info *mbx = &hw->mbx;
 	u32 msgbuf[2];
 	s32 err;
 
@@ -598,11 +588,7 @@ static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
 	/* Setting the 8 bit field MSG INFO to TRUE indicates "add" */
 	msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT;
 
-	err = mbx->ops.write_posted(hw, msgbuf, 2);
-	if (err)
-		goto mbx_err;
-
-	err = mbx->ops.read_posted(hw, msgbuf, 2);
+	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
 	if (err)
 		goto mbx_err;
 
@@ -797,13 +783,22 @@ out:
  *  @hw: pointer to the HW structure
  *  @max_size: value to assign to max frame size
  **/
-static void ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
+static s32 ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
 {
 	u32 msgbuf[2];
+	s32 ret_val;
 
 	msgbuf[0] = IXGBE_VF_SET_LPE;
 	msgbuf[1] = max_size;
-	ixgbevf_write_msg_read_ack(hw, msgbuf, 2);
+
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+	if (ret_val)
+		return ret_val;
+	if ((msgbuf[0] & IXGBE_VF_SET_LPE) &&
+	    (msgbuf[0] & IXGBE_VT_MSGTYPE_NACK))
+		return IXGBE_ERR_MBX;
+
+	return 0;
 }
 
 /**
@@ -812,7 +807,7 @@ static void ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
  * @max_size: value to assign to max frame size
  * Hyper-V variant.
  **/
-static void ixgbevf_hv_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
+static s32 ixgbevf_hv_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
 {
 	u32 reg;
 
@@ -823,6 +818,8 @@ static void ixgbevf_hv_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
 	/* CRC == 4 */
 	reg |= ((max_size + 4) | IXGBE_RXDCTL_RLPML_EN);
 	IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(0), reg);
+
+	return 0;
 }
 
 /**
@@ -839,11 +836,8 @@ static int ixgbevf_negotiate_api_version_vf(struct ixgbe_hw *hw, int api)
 	msg[0] = IXGBE_VF_API_NEGOTIATE;
 	msg[1] = api;
 	msg[2] = 0;
-	err = hw->mbx.ops.write_posted(hw, msg, 3);
-
-	if (!err)
-		err = hw->mbx.ops.read_posted(hw, msg, 3);
 
+	err = ixgbevf_write_msg_read_ack(hw, msg, msg, 3);
 	if (!err) {
 		msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -892,11 +886,8 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
 	/* Fetch queue configuration from the PF */
 	msg[0] = IXGBE_VF_GET_QUEUE;
 	msg[1] = msg[2] = msg[3] = msg[4] = 0;
-	err = hw->mbx.ops.write_posted(hw, msg, 5);
-
-	if (!err)
-		err = hw->mbx.ops.read_posted(hw, msg, 5);
 
+	err = ixgbevf_write_msg_read_ack(hw, msg, msg, 5);
 	if (!err) {
 		msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -1005,3 +996,8 @@ const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info = {
 	.mac = ixgbe_mac_X550EM_x_vf,
 	.mac_ops = &ixgbevf_hv_mac_ops,
 };
+
+const struct ixgbevf_info ixgbevf_x550em_a_vf_info = {
+	.mac = ixgbe_mac_x550em_a_vf,
+	.mac_ops = &ixgbevf_mac_ops,
+};
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index 2cac610f32ba..04d8d4ee4f04 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -69,7 +69,7 @@ struct ixgbe_mac_operations {
 	s32 (*disable_mc)(struct ixgbe_hw *);
 	s32 (*clear_vfta)(struct ixgbe_hw *);
 	s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool);
-	void (*set_rlpml)(struct ixgbe_hw *, u16);
+	s32 (*set_rlpml)(struct ixgbe_hw *, u16);
 };
 
 enum ixgbe_mac_type {
@@ -78,6 +78,7 @@ enum ixgbe_mac_type {
 	ixgbe_mac_X540_vf,
 	ixgbe_mac_X550_vf,
 	ixgbe_mac_X550EM_x_vf,
+	ixgbe_mac_x550em_a_vf,
 	ixgbe_num_macs
 };
 
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index dc82b1b19574..91e09d68b7e2 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -102,7 +102,6 @@ struct ltq_etop_priv {
 	struct resource *res;
 
 	struct mii_bus *mii_bus;
-	struct phy_device *phydev;
 
 	struct ltq_etop_chan ch[MAX_DMA_CHAN];
 	int tx_free[MAX_DMA_CHAN >> 1];
@@ -304,35 +303,17 @@ ltq_etop_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int
-ltq_etop_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ltq_etop_priv *priv = netdev_priv(dev);
-
-	return phy_ethtool_gset(priv->phydev, cmd);
-}
-
-static int
-ltq_etop_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ltq_etop_priv *priv = netdev_priv(dev);
-
-	return phy_ethtool_sset(priv->phydev, cmd);
-}
-
 static int
 ltq_etop_nway_reset(struct net_device *dev)
 {
-	struct ltq_etop_priv *priv = netdev_priv(dev);
-
-	return phy_start_aneg(priv->phydev);
+	return phy_start_aneg(dev->phydev);
 }
 
 static const struct ethtool_ops ltq_etop_ethtool_ops = {
 	.get_drvinfo = ltq_etop_get_drvinfo,
-	.get_settings = ltq_etop_get_settings,
-	.set_settings = ltq_etop_set_settings,
 	.nway_reset = ltq_etop_nway_reset,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static int
@@ -401,7 +382,6 @@ ltq_etop_mdio_probe(struct net_device *dev)
 			      | SUPPORTED_TP);
 
 	phydev->advertising = phydev->supported;
-	priv->phydev = phydev;
 	phy_attached_info(phydev);
 
 	return 0;
@@ -411,7 +391,6 @@ static int
 ltq_etop_mdio_init(struct net_device *dev)
 {
 	struct ltq_etop_priv *priv = netdev_priv(dev);
-	int i;
 	int err;
 
 	priv->mii_bus = mdiobus_alloc();
@@ -451,7 +430,7 @@ ltq_etop_mdio_cleanup(struct net_device *dev)
 {
 	struct ltq_etop_priv *priv = netdev_priv(dev);
 
-	phy_disconnect(priv->phydev);
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(priv->mii_bus);
 	mdiobus_free(priv->mii_bus);
 }
@@ -470,7 +449,7 @@ ltq_etop_open(struct net_device *dev)
 		ltq_dma_open(&ch->dma);
 		napi_enable(&ch->napi);
 	}
-	phy_start(priv->phydev);
+	phy_start(dev->phydev);
 	netif_tx_start_all_queues(dev);
 	return 0;
 }
@@ -482,7 +461,7 @@ ltq_etop_stop(struct net_device *dev)
 	int i;
 
 	netif_tx_stop_all_queues(dev);
-	phy_stop(priv->phydev);
+	phy_stop(dev->phydev);
 	for (i = 0; i < MAX_DMA_CHAN; i++) {
 		struct ltq_etop_chan *ch = &priv->ch[i];
 
@@ -557,10 +536,8 @@ ltq_etop_change_mtu(struct net_device *dev, int new_mtu)
 static int
 ltq_etop_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct ltq_etop_priv *priv = netdev_priv(dev);
-
 	/* TODO: mii-toll reports "No MII transceiver present!." ?!*/
-	return phy_mii_ioctl(priv->phydev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 static int
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index a6d26d351dfc..f92018b13d28 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -244,7 +244,7 @@
 /* Various constants */
 
 /* Coalescing */
-#define MVNETA_TXDONE_COAL_PKTS		1
+#define MVNETA_TXDONE_COAL_PKTS		0	/* interrupt per packet */
 #define MVNETA_RX_COAL_PKTS		32
 #define MVNETA_RX_COAL_USEC		100
 
@@ -3458,6 +3458,8 @@ static int mvneta_open(struct net_device *dev)
 	return 0;
 
 err_free_irq:
+	unregister_cpu_notifier(&pp->cpu_notifier);
+	on_each_cpu(mvneta_percpu_disable, pp, true);
 	free_percpu_irq(pp->dev->irq, pp->ports);
 err_cleanup_txqs:
 	mvneta_cleanup_txqs(pp);
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 868a957f24bb..0b047178cda1 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -699,7 +699,6 @@ struct mvpp2_port {
 	u16 rx_ring_size;
 	struct mvpp2_pcpu_stats __percpu *stats;
 
-	struct phy_device *phy_dev;
 	phy_interface_t phy_interface;
 	struct device_node *phy_node;
 	unsigned int link;
@@ -4850,7 +4849,7 @@ static irqreturn_t mvpp2_isr(int irq, void *dev_id)
 static void mvpp2_link_event(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct phy_device *phydev = port->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 	int status_change = 0;
 	u32 val;
 
@@ -5416,6 +5415,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 /* Set hw internals when starting port */
 static void mvpp2_start_dev(struct mvpp2_port *port)
 {
+	struct net_device *ndev = port->dev;
+
 	mvpp2_gmac_max_rx_size_set(port);
 	mvpp2_txp_max_tx_size_set(port);
 
@@ -5425,13 +5426,15 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 	mvpp2_interrupts_enable(port);
 
 	mvpp2_port_enable(port);
-	phy_start(port->phy_dev);
+	phy_start(ndev->phydev);
 	netif_tx_start_all_queues(port->dev);
 }
 
 /* Set hw internals when stopping port */
 static void mvpp2_stop_dev(struct mvpp2_port *port)
 {
+	struct net_device *ndev = port->dev;
+
 	/* Stop new packets from arriving to RXQs */
 	mvpp2_ingress_disable(port);
 
@@ -5447,7 +5450,7 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
 
 	mvpp2_egress_disable(port);
 	mvpp2_port_disable(port);
-	phy_stop(port->phy_dev);
+	phy_stop(ndev->phydev);
 }
 
 /* Return positive if MTU is valid */
@@ -5535,7 +5538,6 @@ static int mvpp2_phy_connect(struct mvpp2_port *port)
 	phy_dev->supported &= PHY_GBIT_FEATURES;
 	phy_dev->advertising = phy_dev->supported;
 
-	port->phy_dev = phy_dev;
 	port->link    = 0;
 	port->duplex  = 0;
 	port->speed   = 0;
@@ -5545,8 +5547,9 @@ static int mvpp2_phy_connect(struct mvpp2_port *port)
 
 static void mvpp2_phy_disconnect(struct mvpp2_port *port)
 {
-	phy_disconnect(port->phy_dev);
-	port->phy_dev = NULL;
+	struct net_device *ndev = port->dev;
+
+	phy_disconnect(ndev->phydev);
 }
 
 static int mvpp2_open(struct net_device *dev)
@@ -5796,13 +5799,12 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
 static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
 	int ret;
 
-	if (!port->phy_dev)
+	if (!dev->phydev)
 		return -ENOTSUPP;
 
-	ret = phy_mii_ioctl(port->phy_dev, ifr, cmd);
+	ret = phy_mii_ioctl(dev->phydev, ifr, cmd);
 	if (!ret)
 		mvpp2_link_event(dev);
 
@@ -5811,28 +5813,6 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 /* Ethtool methods */
 
-/* Get settings (phy address, speed) for ethtools */
-static int mvpp2_ethtool_get_settings(struct net_device *dev,
-				      struct ethtool_cmd *cmd)
-{
-	struct mvpp2_port *port = netdev_priv(dev);
-
-	if (!port->phy_dev)
-		return -ENODEV;
-	return phy_ethtool_gset(port->phy_dev, cmd);
-}
-
-/* Set settings (phy address, speed) for ethtools */
-static int mvpp2_ethtool_set_settings(struct net_device *dev,
-				      struct ethtool_cmd *cmd)
-{
-	struct mvpp2_port *port = netdev_priv(dev);
-
-	if (!port->phy_dev)
-		return -ENODEV;
-	return phy_ethtool_sset(port->phy_dev, cmd);
-}
-
 /* Set interrupt coalescing for ethtools */
 static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 				      struct ethtool_coalesce *c)
@@ -5967,13 +5947,13 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.get_link	= ethtool_op_get_link,
-	.get_settings	= mvpp2_ethtool_get_settings,
-	.set_settings	= mvpp2_ethtool_set_settings,
 	.set_coalesce	= mvpp2_ethtool_set_coalesce,
 	.get_coalesce	= mvpp2_ethtool_get_coalesce,
 	.get_drvinfo	= mvpp2_ethtool_get_drvinfo,
 	.get_ringparam	= mvpp2_ethtool_get_ringparam,
 	.set_ringparam	= mvpp2_ethtool_set_ringparam,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /* Driver initialization */
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 54d5154ac0f8..aeeb2e79a91a 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -247,7 +247,6 @@ struct pxa168_eth_private {
 	 */
 	struct timer_list timeout;
 	struct mii_bus *smi_bus;
-	struct phy_device *phy;
 
 	/* clock */
 	struct clk *clk;
@@ -275,8 +274,8 @@ enum hash_table_entry {
 	HASH_ENTRY_RECEIVE_DISCARD_BIT = 2
 };
 
-static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
-static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd);
+static int pxa168_get_link_ksettings(struct net_device *dev,
+				     struct ethtool_link_ksettings *cmd);
 static int pxa168_init_hw(struct pxa168_eth_private *pep);
 static int pxa168_init_phy(struct net_device *dev);
 static void eth_port_reset(struct net_device *dev);
@@ -644,7 +643,7 @@ static void eth_port_start(struct net_device *dev)
 	struct pxa168_eth_private *pep = netdev_priv(dev);
 	int tx_curr_desc, rx_curr_desc;
 
-	phy_start(pep->phy);
+	phy_start(dev->phydev);
 
 	/* Assignment of Tx CTRP of given queue */
 	tx_curr_desc = pep->tx_curr_desc_q;
@@ -700,7 +699,7 @@ static void eth_port_reset(struct net_device *dev)
 	val &= ~PCR_EN;
 	wrl(pep, PORT_CONFIG, val);
 
-	phy_stop(pep->phy);
+	phy_stop(dev->phydev);
 }
 
 /*
@@ -943,7 +942,7 @@ static int set_port_config_ext(struct pxa168_eth_private *pep)
 static void pxa168_eth_adjust_link(struct net_device *dev)
 {
 	struct pxa168_eth_private *pep = netdev_priv(dev);
-	struct phy_device *phy = pep->phy;
+	struct phy_device *phy = dev->phydev;
 	u32 cfg, cfg_o = rdl(pep, PORT_CONFIG);
 	u32 cfgext, cfgext_o = rdl(pep, PORT_CONFIG_EXT);
 
@@ -972,35 +971,37 @@ static void pxa168_eth_adjust_link(struct net_device *dev)
 static int pxa168_init_phy(struct net_device *dev)
 {
 	struct pxa168_eth_private *pep = netdev_priv(dev);
-	struct ethtool_cmd cmd;
+	struct ethtool_link_ksettings cmd;
+	struct phy_device *phy = NULL;
 	int err;
 
-	if (pep->phy)
+	if (dev->phydev)
 		return 0;
 
-	pep->phy = mdiobus_scan(pep->smi_bus, pep->phy_addr);
-	if (IS_ERR(pep->phy))
-		return PTR_ERR(pep->phy);
+	phy = mdiobus_scan(pep->smi_bus, pep->phy_addr);
+	if (IS_ERR(phy))
+		return PTR_ERR(phy);
 
-	err = phy_connect_direct(dev, pep->phy, pxa168_eth_adjust_link,
+	err = phy_connect_direct(dev, phy, pxa168_eth_adjust_link,
 				 pep->phy_intf);
 	if (err)
 		return err;
 
-	err = pxa168_get_settings(dev, &cmd);
+	err = pxa168_get_link_ksettings(dev, &cmd);
 	if (err)
 		return err;
 
-	cmd.phy_address = pep->phy_addr;
-	cmd.speed = pep->phy_speed;
-	cmd.duplex = pep->phy_duplex;
-	cmd.advertising = PHY_BASIC_FEATURES;
-	cmd.autoneg = AUTONEG_ENABLE;
+	cmd.base.phy_address = pep->phy_addr;
+	cmd.base.speed = pep->phy_speed;
+	cmd.base.duplex = pep->phy_duplex;
+	ethtool_convert_legacy_u32_to_link_mode(cmd.link_modes.advertising,
+						PHY_BASIC_FEATURES);
+	cmd.base.autoneg = AUTONEG_ENABLE;
 
-	if (cmd.speed != 0)
-		cmd.autoneg = AUTONEG_DISABLE;
+	if (cmd.base.speed != 0)
+		cmd.base.autoneg = AUTONEG_DISABLE;
 
-	return pxa168_set_settings(dev, &cmd);
+	return phy_ethtool_set_link_ksettings(dev, &cmd);
 }
 
 static int pxa168_init_hw(struct pxa168_eth_private *pep)
@@ -1366,32 +1367,24 @@ static int pxa168_smi_write(struct mii_bus *bus, int phy_addr, int regnum,
 static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr,
 			       int cmd)
 {
-	struct pxa168_eth_private *pep = netdev_priv(dev);
-	if (pep->phy != NULL)
-		return phy_mii_ioctl(pep->phy, ifr, cmd);
+	if (dev->phydev != NULL)
+		return phy_mii_ioctl(dev->phydev, ifr, cmd);
 
 	return -EOPNOTSUPP;
 }
 
-static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int pxa168_get_link_ksettings(struct net_device *dev,
+				     struct ethtool_link_ksettings *cmd)
 {
-	struct pxa168_eth_private *pep = netdev_priv(dev);
 	int err;
 
-	err = phy_read_status(pep->phy);
+	err = phy_read_status(dev->phydev);
 	if (err == 0)
-		err = phy_ethtool_gset(pep->phy, cmd);
+		err = phy_ethtool_ksettings_get(dev->phydev, cmd);
 
 	return err;
 }
 
-static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct pxa168_eth_private *pep = netdev_priv(dev);
-
-	return phy_ethtool_sset(pep->phy, cmd);
-}
-
 static void pxa168_get_drvinfo(struct net_device *dev,
 			       struct ethtool_drvinfo *info)
 {
@@ -1402,11 +1395,11 @@ static void pxa168_get_drvinfo(struct net_device *dev,
 }
 
 static const struct ethtool_ops pxa168_ethtool_ops = {
-	.get_settings	= pxa168_get_settings,
-	.set_settings	= pxa168_set_settings,
 	.get_drvinfo	= pxa168_get_drvinfo,
 	.get_link	= ethtool_op_get_link,
 	.get_ts_info	= ethtool_op_get_ts_info,
+	.get_link_ksettings = pxa168_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops pxa168_eth_netdev_ops = {
@@ -1569,8 +1562,8 @@ static int pxa168_eth_remove(struct platform_device *pdev)
 				  pep->htpr, pep->htpr_dma);
 		pep->htpr = NULL;
 	}
-	if (pep->phy)
-		phy_disconnect(pep->phy);
+	if (dev->phydev)
+		phy_disconnect(dev->phydev);
 	if (pep->clk) {
 		clk_disable_unprepare(pep->clk);
 	}
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 4763252bbf85..b57ae3afb994 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -76,8 +76,8 @@ static int mtk_mdio_busy_wait(struct mtk_eth *eth)
 	return -1;
 }
 
-u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
-		    u32 phy_register, u32 write_data)
+static u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
+			   u32 phy_register, u32 write_data)
 {
 	if (mtk_mdio_busy_wait(eth))
 		return -1;
@@ -95,7 +95,7 @@ u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
 	return 0;
 }
 
-u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
+static u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
 {
 	u32 d;
 
@@ -328,22 +328,24 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth)
 
 static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
 {
+	unsigned long flags;
 	u32 val;
 
+	spin_lock_irqsave(&eth->irq_lock, flags);
 	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
 	mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
-	/* flush write */
-	mtk_r32(eth, MTK_QDMA_INT_MASK);
+	spin_unlock_irqrestore(&eth->irq_lock, flags);
 }
 
 static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
 {
+	unsigned long flags;
 	u32 val;
 
+	spin_lock_irqsave(&eth->irq_lock, flags);
 	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
 	mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
-	/* flush write */
-	mtk_r32(eth, MTK_QDMA_INT_MASK);
+	spin_unlock_irqrestore(&eth->irq_lock, flags);
 }
 
 static int mtk_set_mac_address(struct net_device *dev, void *p)
@@ -481,20 +483,23 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
 /* the qdma core needs scratch memory to be setup */
 static int mtk_init_fq_dma(struct mtk_eth *eth)
 {
-	dma_addr_t phy_ring_head, phy_ring_tail;
+	dma_addr_t phy_ring_tail;
 	int cnt = MTK_DMA_SIZE;
 	dma_addr_t dma_addr;
 	int i;
 
 	eth->scratch_ring = dma_alloc_coherent(eth->dev,
 					       cnt * sizeof(struct mtk_tx_dma),
-					       &phy_ring_head,
+					       &eth->phy_scratch_ring,
 					       GFP_ATOMIC | __GFP_ZERO);
 	if (unlikely(!eth->scratch_ring))
 		return -ENOMEM;
 
 	eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE,
 				    GFP_KERNEL);
+	if (unlikely(!eth->scratch_head))
+		return -ENOMEM;
+
 	dma_addr = dma_map_single(eth->dev,
 				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
 				  DMA_FROM_DEVICE);
@@ -502,19 +507,19 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 		return -ENOMEM;
 
 	memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
-	phy_ring_tail = phy_ring_head +
+	phy_ring_tail = eth->phy_scratch_ring +
 			(sizeof(struct mtk_tx_dma) * (cnt - 1));
 
 	for (i = 0; i < cnt; i++) {
 		eth->scratch_ring[i].txd1 =
 					(dma_addr + (i * MTK_QDMA_PAGE_SIZE));
 		if (i < cnt - 1)
-			eth->scratch_ring[i].txd2 = (phy_ring_head +
+			eth->scratch_ring[i].txd2 = (eth->phy_scratch_ring +
 				((i + 1) * sizeof(struct mtk_tx_dma)));
 		eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE);
 	}
 
-	mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD);
+	mtk_w32(eth, eth->phy_scratch_ring, MTK_QDMA_FQ_HEAD);
 	mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL);
 	mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT);
 	mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN);
@@ -671,7 +676,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 
 err_dma:
 	do {
-		tx_buf = mtk_desc_to_tx_buf(ring, txd);
+		tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
 		/* unmap dma */
 		mtk_tx_unmap(&dev->dev, tx_buf);
@@ -701,6 +706,20 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 	return nfrags;
 }
 
+static int mtk_queue_stopped(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		if (netif_queue_stopped(eth->netdev[i]))
+			return 1;
+	}
+
+	return 0;
+}
+
 static void mtk_wake_queue(struct mtk_eth *eth)
 {
 	int i;
@@ -766,12 +785,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
 		goto drop;
 
-	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
+	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
 		mtk_stop_queue(eth);
-		if (unlikely(atomic_read(&ring->free_count) >
-			     ring->thresh))
-			mtk_wake_queue(eth);
-	}
+
 	spin_unlock_irqrestore(&eth->page_lock, flags);
 
 	return NETDEV_TX_OK;
@@ -784,7 +800,7 @@ drop:
 }
 
 static int mtk_poll_rx(struct napi_struct *napi, int budget,
-		       struct mtk_eth *eth, u32 rx_intr)
+		       struct mtk_eth *eth)
 {
 	struct mtk_rx_ring *ring = &eth->rx_ring;
 	int idx = ring->calc_idx;
@@ -826,6 +842,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 					  DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
 			skb_free_frag(new_data);
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 
@@ -833,6 +850,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
 			put_page(virt_to_head_page(new_data));
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
@@ -870,22 +888,22 @@ release_desc:
 	}
 
 	if (done < budget)
-		mtk_w32(eth, rx_intr, MTK_QMTK_INT_STATUS);
+		mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
 
 	return done;
 }
 
-static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
+static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 {
 	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct mtk_tx_dma *desc;
 	struct sk_buff *skb;
 	struct mtk_tx_buf *tx_buf;
-	int total = 0, done[MTK_MAX_DEVS];
+	unsigned int done[MTK_MAX_DEVS];
 	unsigned int bytes[MTK_MAX_DEVS];
 	u32 cpu, dma;
 	static int condition;
-	int i;
+	int total = 0, i;
 
 	memset(done, 0, sizeof(done));
 	memset(bytes, 0, sizeof(bytes));
@@ -921,7 +939,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 		}
 		mtk_tx_unmap(eth->dev, tx_buf);
 
-		ring->last_free->txd2 = next_cpu;
 		ring->last_free = desc;
 		atomic_inc(&ring->free_count);
 
@@ -937,64 +954,82 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 		total += done[i];
 	}
 
-	/* read hw index again make sure no new tx packet */
-	if (cpu != dma || cpu != mtk_r32(eth, MTK_QTX_DRX_PTR))
-		*tx_again = true;
-	else
-		mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS);
-
-	if (!total)
-		return 0;
-
-	if (atomic_read(&ring->free_count) > ring->thresh)
+	if (mtk_queue_stopped(eth) &&
+	    (atomic_read(&ring->free_count) > ring->thresh))
 		mtk_wake_queue(eth);
 
 	return total;
 }
 
-static int mtk_poll(struct napi_struct *napi, int budget)
+static void mtk_handle_status_irq(struct mtk_eth *eth)
 {
-	struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
-	u32 status, status2, mask, tx_intr, rx_intr, status_intr;
-	int tx_done, rx_done;
-	bool tx_again = false;
+	u32 status2 = mtk_r32(eth, MTK_INT_STATUS2);
 
-	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-	status2 = mtk_r32(eth, MTK_INT_STATUS2);
-	tx_intr = MTK_TX_DONE_INT;
-	rx_intr = MTK_RX_DONE_INT;
-	status_intr = (MTK_GDM1_AF | MTK_GDM2_AF);
-	tx_done = 0;
-	rx_done = 0;
-	tx_again = 0;
+	if (unlikely(status2 & (MTK_GDM1_AF | MTK_GDM2_AF))) {
+		mtk_stats_update(eth);
+		mtk_w32(eth, (MTK_GDM1_AF | MTK_GDM2_AF),
+			MTK_INT_STATUS2);
+	}
+}
 
-	if (status & tx_intr)
-		tx_done = mtk_poll_tx(eth, budget, &tx_again);
+static int mtk_napi_tx(struct napi_struct *napi, int budget)
+{
+	struct mtk_eth *eth = container_of(napi, struct mtk_eth, tx_napi);
+	u32 status, mask;
+	int tx_done = 0;
 
-	if (status & rx_intr)
-		rx_done = mtk_poll_rx(napi, budget, eth, rx_intr);
+	mtk_handle_status_irq(eth);
+	mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS);
+	tx_done = mtk_poll_tx(eth, budget);
 
-	if (unlikely(status2 & status_intr)) {
-		mtk_stats_update(eth);
-		mtk_w32(eth, status_intr, MTK_INT_STATUS2);
+	if (unlikely(netif_msg_intr(eth))) {
+		status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+		mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+		dev_info(eth->dev,
+			 "done tx %d, intr 0x%08x/0x%x\n",
+			 tx_done, status, mask);
 	}
 
+	if (tx_done == budget)
+		return budget;
+
+	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+	if (status & MTK_TX_DONE_INT)
+		return budget;
+
+	napi_complete(napi);
+	mtk_irq_enable(eth, MTK_TX_DONE_INT);
+
+	return tx_done;
+}
+
+static int mtk_napi_rx(struct napi_struct *napi, int budget)
+{
+	struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
+	u32 status, mask;
+	int rx_done = 0;
+
+	mtk_handle_status_irq(eth);
+	mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
+	rx_done = mtk_poll_rx(napi, budget, eth);
+
 	if (unlikely(netif_msg_intr(eth))) {
+		status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
 		mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
-		netdev_info(eth->netdev[0],
-			    "done tx %d, rx %d, intr 0x%08x/0x%x\n",
-			    tx_done, rx_done, status, mask);
+		dev_info(eth->dev,
+			 "done rx %d, intr 0x%08x/0x%x\n",
+			 rx_done, status, mask);
 	}
 
-	if (tx_again || rx_done == budget)
+	if (rx_done == budget)
 		return budget;
 
 	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-	if (status & (tx_intr | rx_intr))
+	if (status & MTK_RX_DONE_INT)
 		return budget;
 
 	napi_complete(napi);
-	mtk_irq_enable(eth, tx_intr | rx_intr);
+	mtk_irq_enable(eth, MTK_RX_DONE_INT);
 
 	return rx_done;
 }
@@ -1027,9 +1062,8 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
-	ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
-	ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2,
-			      MAX_SKB_FRAGS);
+	ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
+	ring->thresh = MAX_SKB_FRAGS;
 
 	/* make sure that all changes to the dma ring are flushed before we
 	 * continue
@@ -1207,6 +1241,14 @@ static void mtk_dma_free(struct mtk_eth *eth)
 	for (i = 0; i < MTK_MAC_COUNT; i++)
 		if (eth->netdev[i])
 			netdev_reset_queue(eth->netdev[i]);
+	if (eth->scratch_ring) {
+		dma_free_coherent(eth->dev,
+				  MTK_DMA_SIZE * sizeof(struct mtk_tx_dma),
+				  eth->scratch_ring,
+				  eth->phy_scratch_ring);
+		eth->scratch_ring = NULL;
+		eth->phy_scratch_ring = 0;
+	}
 	mtk_tx_clean(eth);
 	mtk_rx_clean(eth);
 	kfree(eth->scratch_head);
@@ -1223,22 +1265,26 @@ static void mtk_tx_timeout(struct net_device *dev)
 	schedule_work(&eth->pending_work);
 }
 
-static irqreturn_t mtk_handle_irq(int irq, void *_eth)
+static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth)
 {
 	struct mtk_eth *eth = _eth;
-	u32 status;
 
-	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-	if (unlikely(!status))
-		return IRQ_NONE;
+	if (likely(napi_schedule_prep(&eth->rx_napi))) {
+		__napi_schedule(&eth->rx_napi);
+		mtk_irq_disable(eth, MTK_RX_DONE_INT);
+	}
 
-	if (likely(status & (MTK_RX_DONE_INT | MTK_TX_DONE_INT))) {
-		if (likely(napi_schedule_prep(&eth->rx_napi)))
-			__napi_schedule(&eth->rx_napi);
-	} else {
-		mtk_w32(eth, status, MTK_QMTK_INT_STATUS);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth)
+{
+	struct mtk_eth *eth = _eth;
+
+	if (likely(napi_schedule_prep(&eth->tx_napi))) {
+		__napi_schedule(&eth->tx_napi);
+		mtk_irq_disable(eth, MTK_TX_DONE_INT);
 	}
-	mtk_irq_disable(eth, (MTK_RX_DONE_INT | MTK_TX_DONE_INT));
 
 	return IRQ_HANDLED;
 }
@@ -1251,7 +1297,7 @@ static void mtk_poll_controller(struct net_device *dev)
 	u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT;
 
 	mtk_irq_disable(eth, int_mask);
-	mtk_handle_irq(dev->irq, dev);
+	mtk_handle_irq_rx(eth->irq[2], dev);
 	mtk_irq_enable(eth, int_mask);
 }
 #endif
@@ -1269,7 +1315,7 @@ static int mtk_start_dma(struct mtk_eth *eth)
 	mtk_w32(eth,
 		MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
 		MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
-		MTK_RX_BT_32DWORDS,
+		MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
 		MTK_QDMA_GLO_CFG);
 
 	return 0;
@@ -1287,6 +1333,7 @@ static int mtk_open(struct net_device *dev)
 		if (err)
 			return err;
 
+		napi_enable(&eth->tx_napi);
 		napi_enable(&eth->rx_napi);
 		mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
 	}
@@ -1335,6 +1382,7 @@ static int mtk_stop(struct net_device *dev)
 		return 0;
 
 	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+	napi_disable(&eth->tx_napi);
 	napi_disable(&eth->rx_napi);
 
 	mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
@@ -1372,7 +1420,11 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
 	/* Enable RX VLan Offloading */
 	mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
 
-	err = devm_request_irq(eth->dev, eth->irq, mtk_handle_irq, 0,
+	err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
+			       dev_name(eth->dev), eth);
+	if (err)
+		return err;
+	err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
 			       dev_name(eth->dev), eth);
 	if (err)
 		return err;
@@ -1383,12 +1435,16 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
 
 	/* disable delay and normal interrupt */
 	mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
-	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+	mtk_irq_disable(eth, ~0);
 	mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
 	mtk_w32(eth, 0, MTK_RST_GL);
 
 	/* FE int grouping */
-	mtk_w32(eth, 0, MTK_FE_INT_GRP);
+	mtk_w32(eth, MTK_TX_DONE_INT, MTK_PDMA_INT_GRP1);
+	mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_GRP2);
+	mtk_w32(eth, MTK_TX_DONE_INT, MTK_QDMA_INT_GRP1);
+	mtk_w32(eth, MTK_RX_DONE_INT, MTK_QDMA_INT_GRP2);
+	mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP);
 
 	for (i = 0; i < 2; i++) {
 		u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
@@ -1436,7 +1492,8 @@ static void mtk_uninit(struct net_device *dev)
 	phy_disconnect(mac->phy_dev);
 	mtk_mdio_cleanup(eth);
 	mtk_irq_disable(eth, ~0);
-	free_irq(dev->irq, dev);
+	free_irq(eth->irq[1], dev);
+	free_irq(eth->irq[2], dev);
 }
 
 static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -1697,7 +1754,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
-	eth->netdev[id]->watchdog_timeo = HZ;
+	eth->netdev[id]->watchdog_timeo = 5 * HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
@@ -1710,10 +1767,10 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 		dev_err(eth->dev, "error bringing up device\n");
 		goto free_netdev;
 	}
-	eth->netdev[id]->irq = eth->irq;
+	eth->netdev[id]->irq = eth->irq[0];
 	netif_info(eth, probe, eth->netdev[id],
 		   "mediatek frame engine at 0x%08lx, irq %d\n",
-		   eth->netdev[id]->base_addr, eth->netdev[id]->irq);
+		   eth->netdev[id]->base_addr, eth->irq[0]);
 
 	return 0;
 
@@ -1730,6 +1787,7 @@ static int mtk_probe(struct platform_device *pdev)
 	struct mtk_soc_data *soc;
 	struct mtk_eth *eth;
 	int err;
+	int i;
 
 	match = of_match_device(of_mtk_match, &pdev->dev);
 	soc = (struct mtk_soc_data *)match->data;
@@ -1743,6 +1801,7 @@ static int mtk_probe(struct platform_device *pdev)
 		return PTR_ERR(eth->base);
 
 	spin_lock_init(&eth->page_lock);
+	spin_lock_init(&eth->irq_lock);
 
 	eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
 						      "mediatek,ethsys");
@@ -1764,10 +1823,12 @@ static int mtk_probe(struct platform_device *pdev)
 		return PTR_ERR(eth->rstc);
 	}
 
-	eth->irq = platform_get_irq(pdev, 0);
-	if (eth->irq < 0) {
-		dev_err(&pdev->dev, "no IRQ resource found\n");
-		return -ENXIO;
+	for (i = 0; i < 3; i++) {
+		eth->irq[i] = platform_get_irq(pdev, i);
+		if (eth->irq[i] < 0) {
+			dev_err(&pdev->dev, "no IRQ%d resource found\n", i);
+			return -ENXIO;
+		}
 	}
 
 	eth->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
@@ -1808,7 +1869,9 @@ static int mtk_probe(struct platform_device *pdev)
 	 * for NAPI to work
 	 */
 	init_dummy_netdev(&eth->dummy_dev);
-	netif_napi_add(&eth->dummy_dev, &eth->rx_napi, mtk_poll,
+	netif_napi_add(&eth->dummy_dev, &eth->tx_napi, mtk_napi_tx,
+		       MTK_NAPI_WEIGHT);
+	netif_napi_add(&eth->dummy_dev, &eth->rx_napi, mtk_napi_rx,
 		       MTK_NAPI_WEIGHT);
 
 	platform_set_drvdata(pdev, eth);
@@ -1829,6 +1892,7 @@ static int mtk_remove(struct platform_device *pdev)
 	clk_disable_unprepare(eth->clk_gp1);
 	clk_disable_unprepare(eth->clk_gp2);
 
+	netif_napi_del(&eth->tx_napi);
 	netif_napi_del(&eth->rx_napi);
 	mtk_cleanup(eth);
 	platform_set_drvdata(pdev, NULL);
@@ -1846,7 +1910,6 @@ static struct platform_driver mtk_driver = {
 	.remove = mtk_remove,
 	.driver = {
 		.name = "mtk_soc_eth",
-		.owner = THIS_MODULE,
 		.of_match_table = of_mtk_match,
 	},
 };
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index eed626d56ea4..f82e3acb947b 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -68,6 +68,10 @@
 /* Unicast Filter MAC Address Register - High */
 #define MTK_GDMA_MAC_ADRH(x)	(0x50C + (x * 0x1000))
 
+/* PDMA Interrupt grouping registers */
+#define MTK_PDMA_INT_GRP1	0xa50
+#define MTK_PDMA_INT_GRP2	0xa54
+
 /* QDMA TX Queue Configuration Registers */
 #define MTK_QTX_CFG(x)		(0x1800 + (x * 0x10))
 #define QDMA_RES_THRES		4
@@ -91,6 +95,7 @@
 #define MTK_QDMA_GLO_CFG	0x1A04
 #define MTK_RX_2B_OFFSET	BIT(31)
 #define MTK_RX_BT_32DWORDS	(3 << 11)
+#define MTK_NDP_CO_PRO		BIT(10)
 #define MTK_TX_WB_DDONE		BIT(6)
 #define MTK_DMA_SIZE_16DWORDS	(2 << 4)
 #define MTK_RX_DMA_BUSY		BIT(3)
@@ -124,6 +129,11 @@
 #define MTK_TX_DONE_INT		(MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
 				 MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
 
+/* QDMA Interrupt grouping registers */
+#define MTK_QDMA_INT_GRP1	0x1a20
+#define MTK_QDMA_INT_GRP2	0x1a24
+#define MTK_RLS_DONE_INT	BIT(0)
+
 /* QDMA Interrupt Status Register */
 #define MTK_QDMA_INT_MASK	0x1A1C
 
@@ -355,8 +365,10 @@ struct mtk_rx_ring {
  * @dma_refcnt:		track how many netdevs are using the DMA engine
  * @tx_ring:		Pointer to the memore holding info about the TX ring
  * @rx_ring:		Pointer to the memore holding info about the RX ring
- * @rx_napi:		The NAPI struct
+ * @tx_napi:		The TX NAPI struct
+ * @rx_napi:		The RX NAPI struct
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
+ * @phy_scratch_ring:	physical address of scratch_ring
  * @scratch_head:	The scratch memory that scratch_ring points to.
  * @clk_ethif:		The ethif clock
  * @clk_esw:		The switch clock
@@ -371,10 +383,11 @@ struct mtk_eth {
 	void __iomem			*base;
 	struct reset_control		*rstc;
 	spinlock_t			page_lock;
+	spinlock_t			irq_lock;
 	struct net_device		dummy_dev;
 	struct net_device		*netdev[MTK_MAX_DEVS];
 	struct mtk_mac			*mac[MTK_MAX_DEVS];
-	int				irq;
+	int				irq[3];
 	u32				msg_enable;
 	unsigned long			sysclk;
 	struct regmap			*ethsys;
@@ -382,8 +395,10 @@ struct mtk_eth {
 	atomic_t			dma_refcnt;
 	struct mtk_tx_ring		tx_ring;
 	struct mtk_rx_ring		rx_ring;
+	struct napi_struct		tx_napi;
 	struct napi_struct		rx_napi;
 	struct mtk_tx_dma		*scratch_ring;
+	dma_addr_t			phy_scratch_ring;
 	void				*scratch_head;
 	struct clk			*clk_ethif;
 	struct clk			*clk_esw;
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index 9ca3734ebb6b..5098e7f21987 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -24,13 +24,6 @@ config MLX4_EN_DCB
 
 	  If unsure, set to Y
 
-config MLX4_EN_VXLAN
-	bool "VXLAN offloads Support"
-	default y
-	depends on MLX4_EN && VXLAN && !(MLX4_EN=y && VXLAN=m)
-	---help---
-	  Say Y here if you want to use VXLAN offloads in the driver.
-
 config MLX4_CORE
 	tristate
 	depends on PCI
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e94ca1c3fc7c..f04a423ff79d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2597,7 +2597,6 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
 	priv->cmd.free_head = 0;
 
 	sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
-	spin_lock_init(&priv->cmd.context_lock);
 
 	for (priv->cmd.token_mask = 1;
 	     priv->cmd.token_mask < priv->cmd.max_cmds;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index f01918c63f28..99c6bbdff501 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -37,6 +37,11 @@
 #include "mlx4_en.h"
 #include "fw_qos.h"
 
+enum {
+	MLX4_CEE_STATE_DOWN   = 0,
+	MLX4_CEE_STATE_UP     = 1,
+};
+
 /* Definitions for QCN
  */
 
@@ -80,13 +85,202 @@ struct mlx4_congestion_control_mb_prio_802_1_qau_statistics {
 	__be32 reserved3[4];
 };
 
+static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = priv->cee_params.dcbx_cap;
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 1 <<  mlx4_max_tc(priv->mdev->dev);
+		break;
+	default:
+		*cap = false;
+		break;
+	}
+
+	return 0;
+}
+
+static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+	return priv->cee_params.dcb_cfg.pfc_state;
+}
+
+static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+	priv->cee_params.dcb_cfg.pfc_state = state;
+}
+
+static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
+				      u8 *setting)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+	*setting = priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc;
+}
+
+static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
+				      u8 setting)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+	priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc = setting;
+	priv->cee_params.dcb_cfg.pfc_state = true;
+}
+
+static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+	if (!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED))
+		return -EINVAL;
+
+	if (tcid == DCB_NUMTCS_ATTR_PFC)
+		*num = mlx4_max_tc(priv->mdev->dev);
+	else
+		*num = 0;
+
+	return 0;
+}
+
+static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_cee_config *dcb_cfg = &priv->cee_params.dcb_cfg;
+	int err = 0;
+
+	if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return -EINVAL;
+
+	if (dcb_cfg->pfc_state) {
+		int tc;
+
+		priv->prof->rx_pause = 0;
+		priv->prof->tx_pause = 0;
+		for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) {
+			u8 tc_mask = 1 << tc;
+
+			switch (dcb_cfg->tc_config[tc].dcb_pfc) {
+			case pfc_disabled:
+				priv->prof->tx_ppp &= ~tc_mask;
+				priv->prof->rx_ppp &= ~tc_mask;
+				break;
+			case pfc_enabled_full:
+				priv->prof->tx_ppp |= tc_mask;
+				priv->prof->rx_ppp |= tc_mask;
+				break;
+			case pfc_enabled_tx:
+				priv->prof->tx_ppp |= tc_mask;
+				priv->prof->rx_ppp &= ~tc_mask;
+				break;
+			case pfc_enabled_rx:
+				priv->prof->tx_ppp &= ~tc_mask;
+				priv->prof->rx_ppp |= tc_mask;
+				break;
+			default:
+				break;
+			}
+		}
+		en_dbg(DRV, priv, "Set pfc on\n");
+	} else {
+		priv->prof->rx_pause = 1;
+		priv->prof->tx_pause = 1;
+		en_dbg(DRV, priv, "Set pfc off\n");
+	}
+
+	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+				    priv->rx_skb_size + ETH_FCS_LEN,
+				    priv->prof->tx_pause,
+				    priv->prof->tx_ppp,
+				    priv->prof->rx_pause,
+				    priv->prof->rx_ppp);
+	if (err)
+		en_err(priv, "Failed setting pause params\n");
+	return err;
+}
+
+static u8 mlx4_en_dcbnl_get_state(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	if (priv->flags & MLX4_EN_FLAG_DCB_ENABLED)
+		return MLX4_CEE_STATE_UP;
+
+	return MLX4_CEE_STATE_DOWN;
+}
+
+static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int num_tcs = 0;
+
+	if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return 1;
+
+	if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED))
+		return 0;
+
+	if (state) {
+		priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+		num_tcs = IEEE_8021QAZ_MAX_TCS;
+	} else {
+		priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
+	}
+
+	return mlx4_en_setup_tc(dev, num_tcs);
+}
+
+/* On success returns a non-zero 802.1p user priority bitmap
+ * otherwise returns 0 as the invalid user priority bitmap to
+ * indicate an error.
+ */
+static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	struct dcb_app app = {
+				.selector = idtype,
+				.protocol = id,
+			     };
+	if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return 0;
+
+	return dcb_getapp(netdev, &app);
+}
+
+static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype,
+				u16 id, u8 up)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	struct dcb_app app;
+
+	if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return -EINVAL;
+
+	memset(&app, 0, sizeof(struct dcb_app));
+	app.selector = idtype;
+	app.protocol = id;
+	app.priority = up;
+
+	return dcb_setapp(netdev, &app);
+}
+
 static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev,
 				   struct ieee_ets *ets)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct ieee_ets *my_ets = &priv->ets;
 
-	/* No IEEE PFC settings available */
 	if (!my_ets)
 		return -EINVAL;
 
@@ -237,18 +431,51 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev,
 
 static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev)
 {
-	return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	return priv->cee_params.dcbx_cap;
 }
 
 static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct ieee_ets ets = {0};
+	struct ieee_pfc pfc = {0};
+
+	if (mode == priv->cee_params.dcbx_cap)
+		return 0;
+
 	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-	    (mode & DCB_CAP_DCBX_VER_CEE) ||
-	    !(mode & DCB_CAP_DCBX_VER_IEEE) ||
+	    ((mode & DCB_CAP_DCBX_VER_IEEE) &&
+	     (mode & DCB_CAP_DCBX_VER_CEE)) ||
 	    !(mode & DCB_CAP_DCBX_HOST))
-		return 1;
+		goto err;
+
+	priv->cee_params.dcbx_cap = mode;
+
+	ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+	pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS;
+
+	if (mode & DCB_CAP_DCBX_VER_IEEE) {
+		if (mlx4_en_dcbnl_ieee_setets(dev, &ets))
+			goto err;
+		if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
+			goto err;
+	} else if (mode & DCB_CAP_DCBX_VER_CEE) {
+		if (mlx4_en_dcbnl_set_all(dev))
+			goto err;
+	} else {
+		if (mlx4_en_dcbnl_ieee_setets(dev, &ets))
+			goto err;
+		if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
+			goto err;
+		if (mlx4_en_setup_tc(dev, 0))
+			goto err;
+	}
 
 	return 0;
+err:
+	return 1;
 }
 
 #define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */
@@ -463,24 +690,46 @@ static int mlx4_en_dcbnl_ieee_getqcnstats(struct net_device *dev,
 }
 
 const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = {
-	.ieee_getets	= mlx4_en_dcbnl_ieee_getets,
-	.ieee_setets	= mlx4_en_dcbnl_ieee_setets,
-	.ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate,
-	.ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate,
-	.ieee_getpfc	= mlx4_en_dcbnl_ieee_getpfc,
-	.ieee_setpfc	= mlx4_en_dcbnl_ieee_setpfc,
+	.ieee_getets		= mlx4_en_dcbnl_ieee_getets,
+	.ieee_setets		= mlx4_en_dcbnl_ieee_setets,
+	.ieee_getmaxrate	= mlx4_en_dcbnl_ieee_getmaxrate,
+	.ieee_setmaxrate	= mlx4_en_dcbnl_ieee_setmaxrate,
+	.ieee_getqcn		= mlx4_en_dcbnl_ieee_getqcn,
+	.ieee_setqcn		= mlx4_en_dcbnl_ieee_setqcn,
+	.ieee_getqcnstats	= mlx4_en_dcbnl_ieee_getqcnstats,
+	.ieee_getpfc		= mlx4_en_dcbnl_ieee_getpfc,
+	.ieee_setpfc		= mlx4_en_dcbnl_ieee_setpfc,
+
+	.getstate	= mlx4_en_dcbnl_get_state,
+	.setstate	= mlx4_en_dcbnl_set_state,
+	.getpfccfg	= mlx4_en_dcbnl_get_pfc_cfg,
+	.setpfccfg	= mlx4_en_dcbnl_set_pfc_cfg,
+	.setall		= mlx4_en_dcbnl_set_all,
+	.getcap		= mlx4_en_dcbnl_getcap,
+	.getnumtcs	= mlx4_en_dcbnl_getnumtcs,
+	.getpfcstate	= mlx4_en_dcbnl_getpfcstate,
+	.setpfcstate	= mlx4_en_dcbnl_setpfcstate,
+	.getapp		= mlx4_en_dcbnl_getapp,
+	.setapp		= mlx4_en_dcbnl_setapp,
 
 	.getdcbx	= mlx4_en_dcbnl_getdcbx,
 	.setdcbx	= mlx4_en_dcbnl_setdcbx,
-	.ieee_getqcn	= mlx4_en_dcbnl_ieee_getqcn,
-	.ieee_setqcn	= mlx4_en_dcbnl_ieee_setqcn,
-	.ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats,
 };
 
 const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = {
 	.ieee_getpfc	= mlx4_en_dcbnl_ieee_getpfc,
 	.ieee_setpfc	= mlx4_en_dcbnl_ieee_setpfc,
 
+	.setstate	= mlx4_en_dcbnl_set_state,
+	.getpfccfg	= mlx4_en_dcbnl_get_pfc_cfg,
+	.setpfccfg	= mlx4_en_dcbnl_set_pfc_cfg,
+	.setall		= mlx4_en_dcbnl_set_all,
+	.getnumtcs	= mlx4_en_dcbnl_getnumtcs,
+	.getpfcstate	= mlx4_en_dcbnl_getpfcstate,
+	.setpfcstate	= mlx4_en_dcbnl_setpfcstate,
+	.getapp		= mlx4_en_dcbnl_getapp,
+	.setapp		= mlx4_en_dcbnl_setapp,
+
 	.getdcbx	= mlx4_en_dcbnl_getdcbx,
 	.setdcbx	= mlx4_en_dcbnl_setdcbx,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index fc95affaf76b..bdda17d2ea0f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1042,6 +1042,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	u32 rx_size, tx_size;
 	int port_up = 0;
 	int err = 0;
@@ -1061,22 +1063,25 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 	    tx_size == priv->tx_ring[0]->size)
 		return 0;
 
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	mutex_lock(&mdev->state_lock);
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	new_prof.tx_ring_size = tx_size;
+	new_prof.rx_ring_size = rx_size;
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
+		goto out;
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	mlx4_en_free_resources(priv);
-
-	priv->prof->tx_ring_size = tx_size;
-	priv->prof->rx_ring_size = rx_size;
+	mlx4_en_safe_replace_resources(priv, tmp);
 
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
-		goto out;
-	}
 	if (port_up) {
 		err = mlx4_en_start_port(dev);
 		if (err)
@@ -1084,8 +1089,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 	}
 
 	err = mlx4_en_moderation_update(priv);
-
 out:
+	kfree(tmp);
 	mutex_unlock(&mdev->state_lock);
 	return err;
 }
@@ -1107,7 +1112,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 
-	return priv->rx_ring_num;
+	return rounddown_pow_of_two(priv->rx_ring_num);
 }
 
 static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev)
@@ -1141,19 +1146,17 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key,
 			    u8 *hfunc)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
-	int rss_rings;
-	size_t n = priv->rx_ring_num;
+	u32 n = mlx4_en_get_rxfh_indir_size(dev);
+	u32 i, rss_rings;
 	int err = 0;
 
-	rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num;
-	rss_rings = 1 << ilog2(rss_rings);
+	rss_rings = priv->prof->rss_rings ?: n;
+	rss_rings = rounddown_pow_of_two(rss_rings);
 
-	while (n--) {
+	for (i = 0; i < n; i++) {
 		if (!ring_index)
 			break;
-		ring_index[n] = rss_map->qps[n % rss_rings].qpn -
-			rss_map->base_qpn;
+		ring_index[i] = i % rss_rings;
 	}
 	if (key)
 		memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE);
@@ -1166,6 +1169,7 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
 			    const u8 *key, const u8 hfunc)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
+	u32 n = mlx4_en_get_rxfh_indir_size(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
 	int port_up = 0;
 	int err = 0;
@@ -1175,18 +1179,18 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
 	/* Calculate RSS table size and make sure flows are spread evenly
 	 * between rings
 	 */
-	for (i = 0; i < priv->rx_ring_num; i++) {
+	for (i = 0; i < n; i++) {
 		if (!ring_index)
-			continue;
+			break;
 		if (i > 0 && !ring_index[i] && !rss_rings)
 			rss_rings = i;
 
-		if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num)))
+		if (ring_index[i] != (i % (rss_rings ?: n)))
 			return -EINVAL;
 	}
 
 	if (!rss_rings)
-		rss_rings = priv->rx_ring_num;
+		rss_rings = n;
 
 	/* RSS table size must be an order of 2 */
 	if (!is_power_of_2(rss_rings))
@@ -1714,6 +1718,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	int port_up = 0;
 	int err = 0;
 
@@ -1723,25 +1729,35 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	    !channel->tx_count || !channel->rx_count)
 		return -EINVAL;
 
-	mutex_lock(&mdev->state_lock);
-	if (priv->port_up) {
-		port_up = 1;
-		mlx4_en_stop_port(dev, 1);
+	if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) {
+		en_err(priv, "Minimum %d tx channels required with XDP on\n",
+		       priv->xdp_ring_num / MLX4_EN_NUM_UP + 1);
+		return -EINVAL;
 	}
 
-	mlx4_en_free_resources(priv);
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
 
-	priv->num_tx_rings_p_up = channel->tx_count;
-	priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
-	priv->rx_ring_num = channel->rx_count;
+	mutex_lock(&mdev->state_lock);
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	new_prof.num_tx_rings_p_up = channel->tx_count;
+	new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
+	new_prof.rx_ring_num = channel->rx_count;
 
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
 		goto out;
+
+	if (priv->port_up) {
+		port_up = 1;
+		mlx4_en_stop_port(dev, 1);
 	}
 
-	netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
+	mlx4_en_safe_replace_resources(priv, tmp);
+
+	netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
+							priv->xdp_ring_num);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
 	if (dev->num_tc)
@@ -1757,8 +1773,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	}
 
 	err = mlx4_en_moderation_update(priv);
-
 out:
+	kfree(tmp);
 	mutex_unlock(&mdev->state_lock);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 19ceced6736c..4198e9bf89d0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -31,6 +31,7 @@
  *
  */
 
+#include <linux/bpf.h>
 #include <linux/etherdevice.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
@@ -67,6 +68,17 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 		offset += priv->num_tx_rings_p_up;
 	}
 
+#ifdef CONFIG_MLX4_EN_DCB
+	if (!mlx4_is_slave(priv->mdev->dev)) {
+		if (up) {
+			priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+		} else {
+			priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
+			priv->cee_params.dcb_cfg.pfc_state = false;
+		}
+	}
+#endif /* CONFIG_MLX4_EN_DCB */
+
 	return 0;
 }
 
@@ -406,14 +418,18 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev,
 	mutex_lock(&mdev->state_lock);
 	if (mdev->device_up && priv->port_up) {
 		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
-		if (err)
+		if (err) {
 			en_err(priv, "Failed configuring VLAN filter\n");
+			goto out;
+		}
 	}
-	if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx))
-		en_dbg(HW, priv, "failed adding vlan %d\n", vid);
-	mutex_unlock(&mdev->state_lock);
+	err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx);
+	if (err)
+		en_dbg(HW, priv, "Failed adding vlan %d\n", vid);
 
-	return 0;
+out:
+	mutex_unlock(&mdev->state_lock);
+	return err;
 }
 
 static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
@@ -421,7 +437,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int err;
+	int err = 0;
 
 	en_dbg(HW, priv, "Killing VID:%d\n", vid);
 
@@ -438,7 +454,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 	}
 	mutex_unlock(&mdev->state_lock);
 
-	return 0;
+	return err;
 }
 
 static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac)
@@ -1197,8 +1213,8 @@ static void mlx4_en_netpoll(struct net_device *dev)
 	struct mlx4_en_cq *cq;
 	int i;
 
-	for (i = 0; i < priv->rx_ring_num; i++) {
-		cq = priv->rx_cq[i];
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		cq = priv->tx_cq[i];
 		napi_schedule(&cq->napi);
 	}
 }
@@ -1506,6 +1522,24 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
 	free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
 }
 
+static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv,
+				      int tx_ring_idx)
+{
+	struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx];
+	int rr_index;
+
+	rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx;
+	if (rr_index >= 0) {
+		tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc;
+		tx_ring->recycle_ring = priv->rx_ring[rr_index];
+		en_dbg(DRV, priv,
+		       "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n",
+		       tx_ring_idx, rr_index);
+	} else {
+		tx_ring->recycle_ring = NULL;
+	}
+}
+
 int mlx4_en_start_port(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -1628,6 +1662,8 @@ int mlx4_en_start_port(struct net_device *dev)
 		}
 		tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
 
+		mlx4_en_init_recycle_ring(priv, i);
+
 		/* Arm CQ for TX completions */
 		mlx4_en_arm_cq(priv, cq);
 
@@ -1692,10 +1728,9 @@ int mlx4_en_start_port(struct net_device *dev)
 	/* Schedule multicast task to populate multicast list */
 	queue_work(mdev->workqueue, &priv->rx_mode_task);
 
-#ifdef CONFIG_MLX4_EN_VXLAN
 	if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
-		vxlan_get_rx_port(dev);
-#endif
+		udp_tunnel_get_rx_info(dev);
+
 	priv->port_up = true;
 	netif_tx_start_all_queues(dev);
 	netif_device_attach(dev);
@@ -1950,7 +1985,7 @@ static int mlx4_en_close(struct net_device *dev)
 	return 0;
 }
 
-void mlx4_en_free_resources(struct mlx4_en_priv *priv)
+static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 {
 	int i;
 
@@ -1975,7 +2010,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 
 }
 
-int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
+static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
@@ -2032,11 +2067,91 @@ err:
 	return -ENOMEM;
 }
 
+static void mlx4_en_shutdown(struct net_device *dev)
+{
+	rtnl_lock();
+	netif_device_detach(dev);
+	mlx4_en_close(dev);
+	rtnl_unlock();
+}
+
+static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
+			     struct mlx4_en_priv *src,
+			     struct mlx4_en_port_profile *prof)
+{
+	memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
+	       sizeof(dst->hwtstamp_config));
+	dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
+	dst->tx_ring_num = prof->tx_ring_num;
+	dst->rx_ring_num = prof->rx_ring_num;
+	dst->flags = prof->flags;
+	dst->mdev = src->mdev;
+	dst->port = src->port;
+	dst->dev = src->dev;
+	dst->prof = prof;
+	dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+					 DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
+
+	dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS,
+				GFP_KERNEL);
+	if (!dst->tx_ring)
+		return -ENOMEM;
+
+	dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS,
+			      GFP_KERNEL);
+	if (!dst->tx_cq) {
+		kfree(dst->tx_ring);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
+				struct mlx4_en_priv *src)
+{
+	memcpy(dst->rx_ring, src->rx_ring,
+	       sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num);
+	memcpy(dst->rx_cq, src->rx_cq,
+	       sizeof(struct mlx4_en_cq *) * src->rx_ring_num);
+	memcpy(&dst->hwtstamp_config, &src->hwtstamp_config,
+	       sizeof(dst->hwtstamp_config));
+	dst->tx_ring_num = src->tx_ring_num;
+	dst->rx_ring_num = src->rx_ring_num;
+	dst->tx_ring = src->tx_ring;
+	dst->tx_cq = src->tx_cq;
+	memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
+}
+
+int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+				struct mlx4_en_priv *tmp,
+				struct mlx4_en_port_profile *prof)
+{
+	mlx4_en_copy_priv(tmp, priv, prof);
+
+	if (mlx4_en_alloc_resources(tmp)) {
+		en_warn(priv,
+			"%s: Resource allocation failed, using previous configuration\n",
+			__func__);
+		kfree(tmp->tx_ring);
+		kfree(tmp->tx_cq);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
+				    struct mlx4_en_priv *tmp)
+{
+	mlx4_en_free_resources(priv);
+	mlx4_en_update_priv(priv, tmp);
+}
 
 void mlx4_en_destroy_netdev(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	bool shutdown = mdev->dev->persist->interface_state &
+					    MLX4_INTERFACE_STATE_SHUTDOWN;
 
 	en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
 
@@ -2044,7 +2159,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	if (priv->registered) {
 		devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev,
 							      priv->port));
-		unregister_netdev(dev);
+		if (shutdown)
+			mlx4_en_shutdown(dev);
+		else
+			unregister_netdev(dev);
 	}
 
 	if (priv->allocated)
@@ -2064,12 +2182,17 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	mdev->upper[priv->port] = NULL;
 	mutex_unlock(&mdev->state_lock);
 
+#ifdef CONFIG_RFS_ACCEL
+	mlx4_en_cleanup_filters(priv);
+#endif
+
 	mlx4_en_free_resources(priv);
 
 	kfree(priv->tx_ring);
 	kfree(priv->tx_cq);
 
-	free_netdev(dev);
+	if (!shutdown)
+		free_netdev(dev);
 }
 
 static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
@@ -2085,6 +2208,11 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 		en_err(priv, "Bad MTU size:%d.\n", new_mtu);
 		return -EPERM;
 	}
+	if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
+		en_err(priv, "MTU size:%d requires frags but XDP running\n",
+		       new_mtu);
+		return -EOPNOTSUPP;
+	}
 	dev->mtu = new_mtu;
 
 	if (netif_running(dev)) {
@@ -2342,7 +2470,6 @@ static int mlx4_en_get_phys_port_id(struct net_device *dev,
 	return 0;
 }
 
-#ifdef CONFIG_MLX4_EN_VXLAN
 static void mlx4_en_add_vxlan_offloads(struct work_struct *work)
 {
 	int ret;
@@ -2392,15 +2519,19 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
 }
 
 static void mlx4_en_add_vxlan_port(struct  net_device *dev,
-				   sa_family_t sa_family, __be16 port)
+				   struct udp_tunnel_info *ti)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
+	__be16 port = ti->port;
 	__be16 current_port;
 
-	if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 		return;
 
-	if (sa_family == AF_INET6)
+	if (ti->sa_family != AF_INET)
+		return;
+
+	if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
 		return;
 
 	current_port = priv->vxlan_port;
@@ -2415,15 +2546,19 @@ static void mlx4_en_add_vxlan_port(struct  net_device *dev,
 }
 
 static void mlx4_en_del_vxlan_port(struct  net_device *dev,
-				   sa_family_t sa_family, __be16 port)
+				   struct udp_tunnel_info *ti)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
+	__be16 port = ti->port;
 	__be16 current_port;
 
-	if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 		return;
 
-	if (sa_family == AF_INET6)
+	if (ti->sa_family != AF_INET)
+		return;
+
+	if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
 		return;
 
 	current_port = priv->vxlan_port;
@@ -2447,13 +2582,17 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
 	 * strip that feature if this is an IPv6 encapsulated frame.
 	 */
 	if (skb->encapsulation &&
-	    (skb->ip_summed == CHECKSUM_PARTIAL) &&
-	    (ip_hdr(skb)->version != 4))
-		features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	    (skb->ip_summed == CHECKSUM_PARTIAL)) {
+		struct mlx4_en_priv *priv = netdev_priv(dev);
+
+		if (!priv->vxlan_port ||
+		    (ip_hdr(skb)->version != 4) ||
+		    (udp_hdr(skb)->dest != priv->vxlan_port))
+			features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	}
 
 	return features;
 }
-#endif
 
 static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate)
 {
@@ -2482,6 +2621,103 @@ static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m
 	return err;
 }
 
+static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct bpf_prog *old_prog;
+	int xdp_ring_num;
+	int port_up = 0;
+	int err;
+	int i;
+
+	xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0;
+
+	/* No need to reconfigure buffers when simply swapping the
+	 * program for a new one.
+	 */
+	if (priv->xdp_ring_num == xdp_ring_num) {
+		if (prog) {
+			prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
+			if (IS_ERR(prog))
+				return PTR_ERR(prog);
+		}
+		for (i = 0; i < priv->rx_ring_num; i++) {
+			/* This xchg is paired with READ_ONCE in the fastpath */
+			old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+			if (old_prog)
+				bpf_prog_put(old_prog);
+		}
+		return 0;
+	}
+
+	if (priv->num_frags > 1) {
+		en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) {
+		en_err(priv,
+		       "Minimum %d tx channels required to run XDP\n",
+		       (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP);
+		return -EINVAL;
+	}
+
+	if (prog) {
+		prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
+		if (IS_ERR(prog))
+			return PTR_ERR(prog);
+	}
+
+	mutex_lock(&mdev->state_lock);
+	if (priv->port_up) {
+		port_up = 1;
+		mlx4_en_stop_port(dev, 1);
+	}
+
+	priv->xdp_ring_num = xdp_ring_num;
+	netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
+							priv->xdp_ring_num);
+
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+		if (old_prog)
+			bpf_prog_put(old_prog);
+	}
+
+	if (port_up) {
+		err = mlx4_en_start_port(dev);
+		if (err) {
+			en_err(priv, "Failed starting port %d for XDP change\n",
+			       priv->port);
+			queue_work(mdev->workqueue, &priv->watchdog_task);
+		}
+	}
+
+	mutex_unlock(&mdev->state_lock);
+	return 0;
+}
+
+static bool mlx4_xdp_attached(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	return !!priv->xdp_ring_num;
+}
+
+static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return mlx4_xdp_set(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = mlx4_xdp_attached(dev);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_open		= mlx4_en_open,
 	.ndo_stop		= mlx4_en_close,
@@ -2506,12 +2742,11 @@ static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_rx_flow_steer	= mlx4_en_filter_rfs,
 #endif
 	.ndo_get_phys_port_id	= mlx4_en_get_phys_port_id,
-#ifdef CONFIG_MLX4_EN_VXLAN
-	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
-	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
+	.ndo_udp_tunnel_add	= mlx4_en_add_vxlan_port,
+	.ndo_udp_tunnel_del	= mlx4_en_del_vxlan_port,
 	.ndo_features_check	= mlx4_en_features_check,
-#endif
 	.ndo_set_tx_maxrate	= mlx4_en_set_tx_maxrate,
+	.ndo_xdp		= mlx4_xdp,
 };
 
 static const struct net_device_ops mlx4_netdev_ops_master = {
@@ -2544,12 +2779,11 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
 	.ndo_rx_flow_steer	= mlx4_en_filter_rfs,
 #endif
 	.ndo_get_phys_port_id	= mlx4_en_get_phys_port_id,
-#ifdef CONFIG_MLX4_EN_VXLAN
-	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
-	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
+	.ndo_udp_tunnel_add	= mlx4_en_add_vxlan_port,
+	.ndo_udp_tunnel_del	= mlx4_en_del_vxlan_port,
 	.ndo_features_check	= mlx4_en_features_check,
-#endif
 	.ndo_set_tx_maxrate	= mlx4_en_set_tx_maxrate,
+	.ndo_xdp		= mlx4_xdp,
 };
 
 struct mlx4_en_bond {
@@ -2814,6 +3048,9 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	struct mlx4_en_priv *priv;
 	int i;
 	int err;
+#ifdef CONFIG_MLX4_EN_DCB
+	struct tc_configuration *tc;
+#endif
 
 	dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
 				 MAX_TX_RINGS, MAX_RX_RINGS);
@@ -2839,10 +3076,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
 	INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
 	INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task);
-#ifdef CONFIG_MLX4_EN_VXLAN
 	INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads);
 	INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads);
-#endif
 #ifdef CONFIG_RFS_ACCEL
 	INIT_LIST_HEAD(&priv->filters);
 	spin_lock_init(&priv->filters_lock);
@@ -2882,6 +3117,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	priv->msg_enable = MLX4_EN_MSG_LEVEL;
 #ifdef CONFIG_MLX4_EN_DCB
 	if (!mlx4_is_slave(priv->mdev->dev)) {
+		priv->cee_params.dcbx_cap = DCB_CAP_DCBX_VER_CEE |
+					    DCB_CAP_DCBX_HOST |
+					    DCB_CAP_DCBX_VER_IEEE;
+		priv->flags |= MLX4_EN_DCB_ENABLED;
+		priv->cee_params.dcb_cfg.pfc_state = false;
+
+		for (i = 0; i < MLX4_EN_NUM_UP; i++) {
+			tc = &priv->cee_params.dcb_cfg.tc_config[i];
+			tc->dcb_pfc = pfc_disabled;
+		}
+
 		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
 			dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
 		} else {
@@ -3102,6 +3348,8 @@ int mlx4_en_reset_config(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	int port_up = 0;
 	int err = 0;
 
@@ -3118,19 +3366,29 @@ int mlx4_en_reset_config(struct net_device *dev,
 		return -EINVAL;
 	}
 
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	mutex_lock(&mdev->state_lock);
+
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config));
+
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
+		goto out;
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	mlx4_en_free_resources(priv);
-
 	en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-		ts_config.rx_filter, !!(features & NETIF_F_HW_VLAN_CTAG_RX));
+		ts_config.rx_filter,
+		!!(features & NETIF_F_HW_VLAN_CTAG_RX));
 
-	priv->hwtstamp_config.tx_type = ts_config.tx_type;
-	priv->hwtstamp_config.rx_filter = ts_config.rx_filter;
+	mlx4_en_safe_replace_resources(priv, tmp);
 
 	if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
 		if (features & NETIF_F_HW_VLAN_CTAG_RX)
@@ -3164,11 +3422,6 @@ int mlx4_en_reset_config(struct net_device *dev,
 		dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
 	}
 
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
-		goto out;
-	}
 	if (port_up) {
 		err = mlx4_en_start_port(dev);
 		if (err)
@@ -3177,6 +3430,8 @@ int mlx4_en_reset_config(struct net_device *dev,
 
 out:
 	mutex_unlock(&mdev->state_lock);
-	netdev_features_change(dev);
+	kfree(tmp);
+	if (!err)
+		netdev_features_change(dev);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c1b3a9c8cf3b..2040dad8611d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -32,6 +32,7 @@
  */
 
 #include <net/busy_poll.h>
+#include <linux/bpf.h>
 #include <linux/mlx4/cq.h>
 #include <linux/slab.h>
 #include <linux/mlx4/qp.h>
@@ -57,7 +58,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
 	struct page *page;
 	dma_addr_t dma;
 
-	for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
+	for (order = frag_info->order; ;) {
 		gfp_t gfp = _gfp;
 
 		if (order)
@@ -70,7 +71,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
 			return -ENOMEM;
 	}
 	dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
-			   PCI_DMA_FROMDEVICE);
+			   frag_info->dma_dir);
 	if (dma_mapping_error(priv->ddev, dma)) {
 		put_page(page);
 		return -ENOMEM;
@@ -124,7 +125,8 @@ out:
 	while (i--) {
 		if (page_alloc[i].page != ring_alloc[i].page) {
 			dma_unmap_page(priv->ddev, page_alloc[i].dma,
-				page_alloc[i].page_size, PCI_DMA_FROMDEVICE);
+				page_alloc[i].page_size,
+				priv->frag_info[i].dma_dir);
 			page = page_alloc[i].page;
 			/* Revert changes done by mlx4_alloc_pages */
 			page_ref_sub(page, page_alloc[i].page_size /
@@ -145,7 +147,7 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
 
 	if (next_frag_end > frags[i].page_size)
 		dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
-			       PCI_DMA_FROMDEVICE);
+			       frag_info->dma_dir);
 
 	if (frags[i].page)
 		put_page(frags[i].page);
@@ -176,7 +178,8 @@ out:
 
 		page_alloc = &ring->page_alloc[i];
 		dma_unmap_page(priv->ddev, page_alloc->dma,
-			       page_alloc->page_size, PCI_DMA_FROMDEVICE);
+			       page_alloc->page_size,
+			       priv->frag_info[i].dma_dir);
 		page = page_alloc->page;
 		/* Revert changes done by mlx4_alloc_pages */
 		page_ref_sub(page, page_alloc->page_size /
@@ -201,7 +204,7 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
 		       i, page_count(page_alloc->page));
 
 		dma_unmap_page(priv->ddev, page_alloc->dma,
-				page_alloc->page_size, PCI_DMA_FROMDEVICE);
+				page_alloc->page_size, frag_info->dma_dir);
 		while (page_alloc->page_offset + frag_info->frag_stride <
 		       page_alloc->page_size) {
 			put_page(page_alloc->page);
@@ -244,6 +247,12 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
 	struct mlx4_en_rx_alloc *frags = ring->rx_info +
 					(index << priv->log_rx_info);
 
+	if (ring->page_cache.index > 0) {
+		frags[0] = ring->page_cache.buf[--ring->page_cache.index];
+		rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
+		return 0;
+	}
+
 	return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
 }
 
@@ -502,26 +511,55 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv)
 	}
 }
 
+/* When the rx ring is running in page-per-packet mode, a released frame can go
+ * directly into a small cache, to avoid unmapping or touching the page
+ * allocator. In bpf prog performance scenarios, buffers are either forwarded
+ * or dropped, never converted to skbs, so every page can come directly from
+ * this cache when it is sized to be a multiple of the napi budget.
+ */
+bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
+			struct mlx4_en_rx_alloc *frame)
+{
+	struct mlx4_en_page_cache *cache = &ring->page_cache;
+
+	if (cache->index >= MLX4_EN_CACHE_SIZE)
+		return false;
+
+	cache->buf[cache->index++] = *frame;
+	return true;
+}
+
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_rx_ring **pring,
 			     u32 size, u16 stride)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring = *pring;
+	struct bpf_prog *old_prog;
 
+	old_prog = READ_ONCE(ring->xdp_prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
 	vfree(ring->rx_info);
 	ring->rx_info = NULL;
 	kfree(ring);
 	*pring = NULL;
-#ifdef CONFIG_RFS_ACCEL
-	mlx4_en_cleanup_filters(priv);
-#endif
 }
 
 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
 				struct mlx4_en_rx_ring *ring)
 {
+	int i;
+
+	for (i = 0; i < ring->page_cache.index; i++) {
+		struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i];
+
+		dma_unmap_page(priv->ddev, frame->dma, frame->page_size,
+			       priv->frag_info[0].dma_dir);
+		put_page(frame->page);
+	}
+	ring->page_cache.index = 0;
 	mlx4_en_free_rx_buf(priv, ring);
 	if (ring->stride <= TXBB_SIZE)
 		ring->buf -= TXBB_SIZE;
@@ -743,7 +781,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
 	struct mlx4_en_rx_alloc *frags;
 	struct mlx4_en_rx_desc *rx_desc;
+	struct bpf_prog *xdp_prog;
+	int doorbell_pending;
 	struct sk_buff *skb;
+	int tx_index;
 	int index;
 	int nr;
 	unsigned int length;
@@ -759,6 +800,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	if (budget <= 0)
 		return polled;
 
+	xdp_prog = READ_ONCE(ring->xdp_prog);
+	doorbell_pending = 0;
+	tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
+
 	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
 	 * descriptor offset can be deduced from the CQE index instead of
 	 * reading 'cqe->index' */
@@ -835,6 +880,43 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 		l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
 			(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 
+		/* A bpf program gets first chance to drop the packet. It may
+		 * read bytes but not past the end of the frag.
+		 */
+		if (xdp_prog) {
+			struct xdp_buff xdp;
+			dma_addr_t dma;
+			u32 act;
+
+			dma = be64_to_cpu(rx_desc->data[0].addr);
+			dma_sync_single_for_cpu(priv->ddev, dma,
+						priv->frag_info[0].frag_size,
+						DMA_FROM_DEVICE);
+
+			xdp.data = page_address(frags[0].page) +
+							frags[0].page_offset;
+			xdp.data_end = xdp.data + length;
+
+			act = bpf_prog_run_xdp(xdp_prog, &xdp);
+			switch (act) {
+			case XDP_PASS:
+				break;
+			case XDP_TX:
+				if (!mlx4_en_xmit_frame(frags, dev,
+							length, tx_index,
+							&doorbell_pending))
+					goto consumed;
+				break;
+			default:
+				bpf_warn_invalid_xdp_action(act);
+			case XDP_ABORTED:
+			case XDP_DROP:
+				if (mlx4_en_rx_recycle(ring, frags))
+					goto consumed;
+				goto next;
+			}
+		}
+
 		if (likely(dev->features & NETIF_F_RXCSUM)) {
 			if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
 						      MLX4_CQE_STATUS_UDP)) {
@@ -986,6 +1068,7 @@ next:
 		for (nr = 0; nr < priv->num_frags; nr++)
 			mlx4_en_free_frag(priv, frags, nr);
 
+consumed:
 		++cq->mcq.cons_index;
 		index = (cq->mcq.cons_index) & ring->size_mask;
 		cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
@@ -994,6 +1077,9 @@ next:
 	}
 
 out:
+	if (doorbell_pending)
+		mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
+
 	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
 	mlx4_cq_set_ci(&cq->mcq);
 	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
@@ -1061,22 +1147,35 @@ static const int frag_sizes[] = {
 
 void mlx4_en_calc_rx_buf(struct net_device *dev)
 {
+	enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE;
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
-	 * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
-	 */
-	int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN);
+	int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
+	int order = MLX4_EN_ALLOC_PREFER_ORDER;
+	u32 align = SMP_CACHE_BYTES;
 	int buf_size = 0;
 	int i = 0;
 
+	/* bpf requires buffers to be set up as 1 packet per page.
+	 * This only works when num_frags == 1.
+	 */
+	if (priv->xdp_ring_num) {
+		dma_dir = PCI_DMA_BIDIRECTIONAL;
+		/* This will gain efficient xdp frame recycling at the expense
+		 * of more costly truesize accounting
+		 */
+		align = PAGE_SIZE;
+		order = 0;
+	}
+
 	while (buf_size < eff_mtu) {
+		priv->frag_info[i].order = order;
 		priv->frag_info[i].frag_size =
 			(eff_mtu > buf_size + frag_sizes[i]) ?
 				frag_sizes[i] : eff_mtu - buf_size;
 		priv->frag_info[i].frag_prefix_size = buf_size;
 		priv->frag_info[i].frag_stride =
-				ALIGN(priv->frag_info[i].frag_size,
-				      SMP_CACHE_BYTES);
+				ALIGN(priv->frag_info[i].frag_size, align);
+		priv->frag_info[i].dma_dir = dma_dir;
 		buf_size += priv->frag_info[i].frag_size;
 		i++;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 76aa4d27183c..9df87ca0515a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -196,6 +196,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 	ring->last_nr_txbb = 1;
 	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
 	memset(ring->buf, 0, ring->buf_size);
+	ring->free_tx_desc = mlx4_en_free_tx_desc;
 
 	ring->qp_state = MLX4_QP_STATE_RST;
 	ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8);
@@ -265,10 +266,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
 }
 
 
-static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
-				struct mlx4_en_tx_ring *ring,
-				int index, u8 owner, u64 timestamp,
-				int napi_mode)
+u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+			 struct mlx4_en_tx_ring *ring,
+			 int index, u8 owner, u64 timestamp,
+			 int napi_mode)
 {
 	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
@@ -344,6 +345,27 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 	return tx_info->nr_txbb;
 }
 
+u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
+			    struct mlx4_en_tx_ring *ring,
+			    int index, u8 owner, u64 timestamp,
+			    int napi_mode)
+{
+	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+	struct mlx4_en_rx_alloc frame = {
+		.page = tx_info->page,
+		.dma = tx_info->map0_dma,
+		.page_offset = 0,
+		.page_size = PAGE_SIZE,
+	};
+
+	if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
+		dma_unmap_page(priv->ddev, tx_info->map0_dma,
+			       PAGE_SIZE, priv->frag_info[0].dma_dir);
+		put_page(tx_info->page);
+	}
+
+	return tx_info->nr_txbb;
+}
 
 int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
 {
@@ -362,7 +384,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
 	}
 
 	while (ring->cons != ring->prod) {
-		ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring,
+		ring->last_nr_txbb = ring->free_tx_desc(priv, ring,
 						ring->cons & ring->size_mask,
 						!!(ring->cons & ring->size), 0,
 						0 /* Non-NAPI caller */);
@@ -444,7 +466,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 				timestamp = mlx4_en_get_cqe_ts(cqe);
 
 			/* free next descriptor */
-			last_nr_txbb = mlx4_en_free_tx_desc(
+			last_nr_txbb = ring->free_tx_desc(
 					priv, ring, ring_index,
 					!!((ring_cons + txbbs_skipped) &
 					ring->size), timestamp, napi_budget);
@@ -476,6 +498,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 	ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
 	ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
 
+	if (ring->free_tx_desc == mlx4_en_recycle_tx_desc)
+		return done < budget;
+
 	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
 
 	/* Wakeup Tx queue if this stopped, and ring is not full.
@@ -631,8 +656,7 @@ static int get_real_size(const struct sk_buff *skb,
 static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
 			     const struct sk_buff *skb,
 			     const struct skb_shared_info *shinfo,
-			     int real_size, u16 *vlan_tag,
-			     int tx_ind, void *fragptr)
+			     void *fragptr)
 {
 	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
 	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
@@ -700,10 +724,66 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src,
 	__iowrite64_copy(dst, src, bytecnt / 8);
 }
 
+void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
+{
+	wmb();
+	/* Since there is no iowrite*_native() that writes the
+	 * value as is, without byteswapping - using the one
+	 * the doesn't do byteswapping in the relevant arch
+	 * endianness.
+	 */
+#if defined(__LITTLE_ENDIAN)
+	iowrite32(
+#else
+	iowrite32be(
+#endif
+		  ring->doorbell_qpn,
+		  ring->bf.uar->map + MLX4_SEND_DOORBELL);
+}
+
+static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
+				  struct mlx4_en_tx_desc *tx_desc,
+				  union mlx4_wqe_qpn_vlan qpn_vlan,
+				  int desc_size, int bf_index,
+				  __be32 op_own, bool bf_ok,
+				  bool send_doorbell)
+{
+	tx_desc->ctrl.qpn_vlan = qpn_vlan;
+
+	if (bf_ok) {
+		op_own |= htonl((bf_index & 0xffff) << 8);
+		/* Ensure new descriptor hits memory
+		 * before setting ownership of this descriptor to HW
+		 */
+		dma_wmb();
+		tx_desc->ctrl.owner_opcode = op_own;
+
+		wmb();
+
+		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
+			     desc_size);
+
+		wmb();
+
+		ring->bf.offset ^= ring->bf.buf_size;
+	} else {
+		/* Ensure new descriptor hits memory
+		 * before setting ownership of this descriptor to HW
+		 */
+		dma_wmb();
+		tx_desc->ctrl.owner_opcode = op_own;
+		if (send_doorbell)
+			mlx4_en_xmit_doorbell(ring);
+		else
+			ring->xmit_more++;
+	}
+}
+
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	struct mlx4_en_priv *priv = netdev_priv(dev);
+	union mlx4_wqe_qpn_vlan	qpn_vlan = {};
 	struct device *ddev = priv->ddev;
 	struct mlx4_en_tx_ring *ring;
 	struct mlx4_en_tx_desc *tx_desc;
@@ -715,7 +795,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	int real_size;
 	u32 index, bf_index;
 	__be32 op_own;
-	u16 vlan_tag = 0;
 	u16 vlan_proto = 0;
 	int i_frag;
 	int lso_header_size;
@@ -725,6 +804,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	bool stop_queue;
 	bool inline_ok;
 	u32 ring_cons;
+	bool bf_ok;
 
 	tx_ind = skb_get_queue_mapping(skb);
 	ring = priv->tx_ring[tx_ind];
@@ -749,9 +829,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto tx_drop;
 	}
 
+	bf_ok = ring->bf_enabled;
 	if (skb_vlan_tag_present(skb)) {
-		vlan_tag = skb_vlan_tag_get(skb);
+		qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb));
 		vlan_proto = be16_to_cpu(skb->vlan_proto);
+		if (vlan_proto == ETH_P_8021AD)
+			qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
+		else if (vlan_proto == ETH_P_8021Q)
+			qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
+		else
+			qpn_vlan.ins_vlan = 0;
+		bf_ok = false;
 	}
 
 	netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
@@ -771,6 +859,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	else {
 		tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
 		bounce = true;
+		bf_ok = false;
 	}
 
 	/* Save skb in tx_info ring */
@@ -907,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
 
 	if (tx_info->inl)
-		build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag,
-				 tx_ind, fragptr);
+		build_inline_wqe(tx_desc, skb, shinfo, fragptr);
 
 	if (skb->encapsulation) {
 		union {
@@ -946,60 +1034,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	real_size = (real_size / 16) & 0x3f;
 
-	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce &&
-	    !skb_vlan_tag_present(skb) && send_doorbell) {
-		tx_desc->ctrl.bf_qpn = ring->doorbell_qpn |
-				       cpu_to_be32(real_size);
-
-		op_own |= htonl((bf_index & 0xffff) << 8);
-		/* Ensure new descriptor hits memory
-		 * before setting ownership of this descriptor to HW
-		 */
-		dma_wmb();
-		tx_desc->ctrl.owner_opcode = op_own;
-
-		wmb();
+	bf_ok &= desc_size <= MAX_BF && send_doorbell;
 
-		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
-			     desc_size);
-
-		wmb();
-
-		ring->bf.offset ^= ring->bf.buf_size;
-	} else {
-		tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
-		if (vlan_proto == ETH_P_8021AD)
-			tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
-		else if (vlan_proto == ETH_P_8021Q)
-			tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
-		else
-			tx_desc->ctrl.ins_vlan = 0;
-
-		tx_desc->ctrl.fence_size = real_size;
+	if (bf_ok)
+		qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
+	else
+		qpn_vlan.fence_size = real_size;
 
-		/* Ensure new descriptor hits memory
-		 * before setting ownership of this descriptor to HW
-		 */
-		dma_wmb();
-		tx_desc->ctrl.owner_opcode = op_own;
-		if (send_doorbell) {
-			wmb();
-			/* Since there is no iowrite*_native() that writes the
-			 * value as is, without byteswapping - using the one
-			 * the doesn't do byteswapping in the relevant arch
-			 * endianness.
-			 */
-#if defined(__LITTLE_ENDIAN)
-			iowrite32(
-#else
-			iowrite32be(
-#endif
-				  ring->doorbell_qpn,
-				  ring->bf.uar->map + MLX4_SEND_DOORBELL);
-		} else {
-			ring->xmit_more++;
-		}
-	}
+	mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index,
+			      op_own, bf_ok, send_doorbell);
 
 	if (unlikely(stop_queue)) {
 		/* If queue was emptied after the if (stop_queue) , and before
@@ -1034,3 +1077,106 @@ tx_drop:
 	return NETDEV_TX_OK;
 }
 
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+			       struct net_device *dev, unsigned int length,
+			       int tx_ind, int *doorbell_pending)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	union mlx4_wqe_qpn_vlan	qpn_vlan = {};
+	struct mlx4_en_tx_ring *ring;
+	struct mlx4_en_tx_desc *tx_desc;
+	struct mlx4_wqe_data_seg *data;
+	struct mlx4_en_tx_info *tx_info;
+	int index, bf_index;
+	bool send_doorbell;
+	int nr_txbb = 1;
+	bool stop_queue;
+	dma_addr_t dma;
+	int real_size;
+	__be32 op_own;
+	u32 ring_cons;
+	bool bf_ok;
+
+	BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE,
+			 "mlx4_en_xmit_frame requires minimum size tx desc");
+
+	ring = priv->tx_ring[tx_ind];
+
+	if (!priv->port_up)
+		goto tx_drop;
+
+	if (mlx4_en_is_tx_ring_full(ring))
+		goto tx_drop;
+
+	/* fetch ring->cons far ahead before needing it to avoid stall */
+	ring_cons = READ_ONCE(ring->cons);
+
+	index = ring->prod & ring->size_mask;
+	tx_info = &ring->tx_info[index];
+
+	bf_ok = ring->bf_enabled;
+
+	/* Track current inflight packets for performance analysis */
+	AVG_PERF_COUNTER(priv->pstats.inflight_avg,
+			 (u32)(ring->prod - ring_cons - 1));
+
+	bf_index = ring->prod;
+	tx_desc = ring->buf + index * TXBB_SIZE;
+	data = &tx_desc->data;
+
+	dma = frame->dma;
+
+	tx_info->page = frame->page;
+	frame->page = NULL;
+	tx_info->map0_dma = dma;
+	tx_info->map0_byte_count = length;
+	tx_info->nr_txbb = nr_txbb;
+	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
+	tx_info->data_offset = (void *)data - (void *)tx_desc;
+	tx_info->ts_requested = 0;
+	tx_info->nr_maps = 1;
+	tx_info->linear = 1;
+	tx_info->inl = 0;
+
+	dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE);
+
+	data->addr = cpu_to_be64(dma);
+	data->lkey = ring->mr_key;
+	dma_wmb();
+	data->byte_count = cpu_to_be32(length);
+
+	/* tx completion can avoid cache line miss for common cases */
+	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+
+	op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
+		((ring->prod & ring->size) ?
+		 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+
+	ring->packets++;
+	ring->bytes += tx_info->nr_bytes;
+	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);
+
+	ring->prod += nr_txbb;
+
+	stop_queue = mlx4_en_is_tx_ring_full(ring);
+	send_doorbell = stop_queue ||
+				*doorbell_pending > MLX4_EN_DOORBELL_BUDGET;
+	bf_ok &= send_doorbell;
+
+	real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f;
+
+	if (bf_ok)
+		qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
+	else
+		qpn_vlan.fence_size = real_size;
+
+	mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index,
+			      op_own, bf_ok, send_doorbell);
+	*doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1;
+
+	return NETDEV_TX_OK;
+
+tx_drop:
+	ring->tx_dropped++;
+	return NETDEV_TX_BUSY;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index e97094598b2d..f4497cf4d06d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -1128,6 +1128,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c
 		port_cap->max_pkeys	   = 1 << (field & 0xf);
 		MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET);
 		port_cap->max_vl	   = field & 0xf;
+		port_cap->max_tc_eth	   = field >> 4;
 		MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET);
 		port_cap->log_max_macs  = field & 0xf;
 		port_cap->log_max_vlans = field >> 4;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 7ea258af636a..cdbd76f10ced 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -53,6 +53,7 @@ struct mlx4_port_cap {
 	int ib_mtu;
 	int max_port_width;
 	int max_vl;
+	int max_tc_eth;
 	int max_gids;
 	int max_pkeys;
 	u64 def_mac;
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index dec77d6f0ac9..0e8b7c44931f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -147,7 +147,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
 	if (enable) {
 		dev->flags |= MLX4_FLAG_BONDED;
 	} else {
-		 ret = mlx4_virt2phy_port_map(dev, 1, 2);
+		ret = mlx4_virt2phy_port_map(dev, 1, 2);
 		if (ret) {
 			mlx4_err(dev, "Fail to reset port map\n");
 			return ret;
@@ -218,6 +218,9 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_interface *intf;
 
+	if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP))
+		return;
+
 	mlx4_stop_catas_poll(dev);
 	mutex_lock(&intf_mutex);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 12c77a70abdb..75dd2e3d3059 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -292,6 +292,7 @@ static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
 	dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
 	dev->caps.port_width_cap[port] = port_cap->max_port_width;
 	dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
+	dev->caps.max_tc_eth	       = port_cap->max_tc_eth;
 	dev->caps.def_mac[port]        = port_cap->def_mac;
 	dev->caps.supported_type[port] = port_cap->supported_port_types;
 	dev->caps.suggested_type[port] = port_cap->suggested_type;
@@ -2599,7 +2600,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 	err = mlx4_init_uar_table(dev);
 	if (err) {
 		mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
-		 return err;
+		return err;
 	}
 
 	err = mlx4_uar_alloc(dev, &priv->driver_uar);
@@ -3222,6 +3223,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 
 	INIT_LIST_HEAD(&priv->pgdir_list);
 	mutex_init(&priv->pgdir_mutex);
+	spin_lock_init(&priv->cmd.context_lock);
 
 	INIT_LIST_HEAD(&priv->bf_list);
 	mutex_init(&priv->bf_mutex);
@@ -4134,8 +4136,11 @@ static void mlx4_shutdown(struct pci_dev *pdev)
 
 	mlx4_info(persist->dev, "mlx4_shutdown was called\n");
 	mutex_lock(&persist->interface_state_mutex);
-	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+	if (persist->interface_state & MLX4_INTERFACE_STATE_UP) {
+		/* Notify mlx4 clients that the kernel is being shut down */
+		persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN;
 		mlx4_unload_one(pdev);
+	}
 	mutex_unlock(&persist->interface_state_mutex);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index f2d0920018a5..94b891c118c1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -618,8 +618,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
 				err = mlx4_READ_ENTRY(dev,
 						      entry->index,
 						      mailbox);
-					if (err)
-						goto out_mailbox;
+				if (err)
+					goto out_mailbox;
 				members_count =
 					be32_to_cpu(mgm->members_count) &
 					0xffffff;
@@ -657,8 +657,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
 				err = mlx4_WRITE_ENTRY(dev,
 						       entry->index,
 						       mailbox);
-					if (err)
-						goto out_mailbox;
+				if (err)
+					goto out_mailbox;
 			}
 		}
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 467d47ed2c39..2c2913dcae98 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -132,6 +132,7 @@ enum {
 					 MLX4_EN_NUM_UP)
 
 #define MLX4_EN_DEFAULT_TX_WORK		256
+#define MLX4_EN_DOORBELL_BUDGET		8
 
 /* Target number of packets to coalesce with interrupt moderation */
 #define MLX4_EN_RX_COAL_TARGET	44
@@ -164,6 +165,10 @@ enum {
 #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
 
 #define MLX4_EN_MIN_MTU		46
+/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
+ * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
+ */
+#define MLX4_EN_EFF_MTU(mtu)	((mtu) + ETH_HLEN + (2 * VLAN_HLEN))
 #define ETH_BCAST		0xffffffffffffULL
 
 #define MLX4_EN_LOOPBACK_RETRIES	5
@@ -215,7 +220,10 @@ enum cq_type {
 
 
 struct mlx4_en_tx_info {
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		struct page *page;
+	};
 	dma_addr_t	map0_dma;
 	u32		map0_byte_count;
 	u32		nr_txbb;
@@ -255,6 +263,14 @@ struct mlx4_en_rx_alloc {
 	u32		page_size;
 };
 
+#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT)
+struct mlx4_en_page_cache {
+	u32 index;
+	struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE];
+};
+
+struct mlx4_en_priv;
+
 struct mlx4_en_tx_ring {
 	/* cache line used and dirtied in tx completion
 	 * (mlx4_en_free_tx_buf())
@@ -288,6 +304,11 @@ struct mlx4_en_tx_ring {
 	__be32			mr_key;
 	void			*buf;
 	struct mlx4_en_tx_info	*tx_info;
+	struct mlx4_en_rx_ring	*recycle_ring;
+	u32			(*free_tx_desc)(struct mlx4_en_priv *priv,
+						struct mlx4_en_tx_ring *ring,
+						int index, u8 owner,
+						u64 timestamp, int napi_mode);
 	u8			*bounce_buf;
 	struct mlx4_qp_context	context;
 	int			qpn;
@@ -319,6 +340,8 @@ struct mlx4_en_rx_ring {
 	u8  fcs_del;
 	void *buf;
 	void *rx_info;
+	struct bpf_prog *xdp_prog;
+	struct mlx4_en_page_cache page_cache;
 	unsigned long bytes;
 	unsigned long packets;
 	unsigned long csum_ok;
@@ -353,12 +376,14 @@ struct mlx4_en_port_profile {
 	u32 rx_ring_num;
 	u32 tx_ring_size;
 	u32 rx_ring_size;
+	u8 num_tx_rings_p_up;
 	u8 rx_pause;
 	u8 rx_ppp;
 	u8 tx_pause;
 	u8 tx_ppp;
 	int rss_rings;
 	int inline_thold;
+	struct hwtstamp_config hwtstamp_config;
 };
 
 struct mlx4_en_profile {
@@ -438,7 +463,9 @@ struct mlx4_en_mc_list {
 struct mlx4_en_frag_info {
 	u16 frag_size;
 	u16 frag_prefix_size;
-	u16 frag_stride;
+	u32 frag_stride;
+	enum dma_data_direction dma_dir;
+	int order;
 };
 
 #ifdef CONFIG_MLX4_EN_DCB
@@ -448,6 +475,27 @@ struct mlx4_en_frag_info {
 
 #define MLX4_EN_TC_ETS 7
 
+enum dcb_pfc_type {
+	pfc_disabled = 0,
+	pfc_enabled_full,
+	pfc_enabled_tx,
+	pfc_enabled_rx
+};
+
+struct tc_configuration {
+	enum dcb_pfc_type  dcb_pfc;
+};
+
+struct mlx4_en_cee_config {
+	bool	pfc_state;
+	struct	tc_configuration tc_config[MLX4_EN_NUM_UP];
+};
+
+struct mlx4_en_cee_params {
+	u8 dcbx_cap;
+	struct mlx4_en_cee_config dcb_cfg;
+};
+
 #endif
 
 struct ethtool_flow_id {
@@ -467,6 +515,9 @@ enum {
 	MLX4_EN_FLAG_RX_FILTER_NEEDED	= (1 << 3),
 	MLX4_EN_FLAG_FORCE_PROMISC	= (1 << 4),
 	MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP	= (1 << 5),
+#ifdef CONFIG_MLX4_EN_DCB
+	MLX4_EN_FLAG_DCB_ENABLED        = (1 << 6),
+#endif
 };
 
 #define PORT_BEACON_MAX_LIMIT (65535)
@@ -534,6 +585,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
 	u16 num_frags;
 	u16 log_rx_info;
+	int xdp_ring_num;
 
 	struct mlx4_en_tx_ring **tx_ring;
 	struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
@@ -545,10 +597,8 @@ struct mlx4_en_priv {
 	struct work_struct linkstate_task;
 	struct delayed_work stats_task;
 	struct delayed_work service_task;
-#ifdef CONFIG_MLX4_EN_VXLAN
 	struct work_struct vxlan_add_task;
 	struct work_struct vxlan_del_task;
-#endif
 	struct mlx4_en_perf_stats pstats;
 	struct mlx4_en_pkt_stats pkstats;
 	struct mlx4_en_counter_stats pf_stats;
@@ -570,9 +620,11 @@ struct mlx4_en_priv {
 	u32 counter_index;
 
 #ifdef CONFIG_MLX4_EN_DCB
+#define MLX4_EN_DCB_ENABLED	0x3
 	struct ieee_ets ets;
 	u16 maxrate[IEEE_8021QAZ_MAX_TCS];
 	enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS];
+	struct mlx4_en_cee_params cee_params;
 #endif
 #ifdef CONFIG_RFS_ACCEL
 	spinlock_t filters_lock;
@@ -623,8 +675,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 			      u8 rx_ppp, u8 rx_pause,
 			      u8 tx_ppp, u8 tx_pause);
 
-void mlx4_en_free_resources(struct mlx4_en_priv *priv);
-int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
+int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+				struct mlx4_en_priv *tmp,
+				struct mlx4_en_port_profile *prof);
+void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
+				    struct mlx4_en_priv *tmp);
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
 		      int entries, int ring, enum cq_type mode, int node);
@@ -639,6 +694,12 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 			 void *accel_priv, select_queue_fallback_t fallback);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+			       struct net_device *dev, unsigned int length,
+			       int tx_ind, int *doorbell_pending);
+void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
+bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
+			struct mlx4_en_rx_alloc *frame);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_tx_ring **pring,
@@ -667,6 +728,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
 			  int budget);
 int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
 int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
+u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+			 struct mlx4_en_tx_ring *ring,
+			 int index, u8 owner, u64 timestamp,
+			 int napi_mode);
+u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
+			    struct mlx4_en_tx_ring *ring,
+			    int index, u8 owner, u64 timestamp,
+			    int napi_mode);
 void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 		int is_tx, int rss, int qpn, int cqn, int user_prio,
 		struct mlx4_qp_context *context);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 93195191f45b..395b5463cfd9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -248,7 +248,7 @@ static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
 				  offset, order);
 		return;
 	}
-	 __mlx4_free_mtt_range(dev, offset, order);
+	__mlx4_free_mtt_range(dev, offset, order);
 }
 
 void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 087b23b320cb..3d2095e5c61c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -52,6 +52,7 @@
 
 #define MLX4_FLAG_V_IGNORE_FCS_MASK		0x2
 #define MLX4_IGNORE_FCS_MASK			0x1
+#define MLNX4_TX_MAX_NUMBER			8
 
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 {
@@ -2015,3 +2016,14 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL(mlx4_get_module_info);
+
+int mlx4_max_tc(struct mlx4_dev *dev)
+{
+	u8 num_tc = dev->caps.max_tc_eth;
+
+	if (!num_tc)
+		num_tc = MLNX4_TX_MAX_NUMBER;
+
+	return num_tc;
+}
+EXPORT_SYMBOL(mlx4_max_tc);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index cd9b2b28df88..8b81114bdc72 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2372,16 +2372,15 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 		__mlx4_mpt_release(dev, index);
 		break;
 	case RES_OP_MAP_ICM:
-			index = get_param_l(&in_param);
-			id = index & mpt_mask(dev);
-			err = mr_res_start_move_to(dev, slave, id,
-						   RES_MPT_RESERVED, &mpt);
-			if (err)
-				return err;
-
-			__mlx4_mpt_free_icm(dev, mpt->key);
-			res_end_move(dev, slave, RES_MPT, id);
+		index = get_param_l(&in_param);
+		id = index & mpt_mask(dev);
+		err = mr_res_start_move_to(dev, slave, id,
+					   RES_MPT_RESERVED, &mpt);
+		if (err)
 			return err;
+
+		__mlx4_mpt_free_icm(dev, mpt->key);
+		res_end_move(dev, slave, RES_MPT, id);
 		break;
 	default:
 		err = -EINVAL;
@@ -4253,9 +4252,8 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
 	     (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
 		!(dev->caps.flags2 &
 		  MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
-			mlx4_warn(dev,
-				  "Src check LB for slave %d isn't supported\n",
-				   slave);
+		mlx4_warn(dev, "Src check LB for slave %d isn't supported\n",
+			  slave);
 		return -ENOTSUPP;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 1cf722eba607..aae46884bf93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -4,6 +4,7 @@
 
 config MLX5_CORE
 	tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver"
+	depends on MAY_USE_DEVLINK
 	depends on PCI
 	default n
 	---help---
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9ea7b583096a..05cc1effc13c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -1,11 +1,13 @@
 obj-$(CONFIG_MLX5_CORE)		+= mlx5_core.o
 
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
-		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o   \
-		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o fs_counters.o
+		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
+		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+		fs_counters.o rl.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
-		en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-		en_txrx.o en_clock.o vxlan.o en_tc.o en_arfs.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
+		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
+		en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
+		en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index dcd2df6518de..d6e2a1cae19a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
 	case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+	case MLX5_CMD_OP_2ERR_QP:
+	case MLX5_CMD_OP_2RST_QP:
+	case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_RTR2RTS_QP:
 	case MLX5_CMD_OP_RTS2RTS_QP:
 	case MLX5_CMD_OP_SQERR2RTS_QP:
-	case MLX5_CMD_OP_2ERR_QP:
-	case MLX5_CMD_OP_2RST_QP:
 	case MLX5_CMD_OP_QUERY_QP:
 	case MLX5_CMD_OP_SQD_RTS_QP:
 	case MLX5_CMD_OP_INIT2INIT_QP:
@@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
 	case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
-	case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
 	case MLX5_CMD_OP_SET_ROCE_ADDRESS:
 	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_CREATE_RQT:
 	case MLX5_CMD_OP_MODIFY_RQT:
 	case MLX5_CMD_OP_QUERY_RQT:
+
 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
@@ -545,6 +549,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
+	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
 	default: return "unknown command opcode";
 	}
 }
@@ -601,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev,
 		pr_debug("\n");
 }
 
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+	struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
+
+	return be16_to_cpu(hdr->opcode);
+}
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+	struct delayed_work *dwork = container_of(work, struct delayed_work,
+						  work);
+	struct mlx5_cmd_work_ent *ent = container_of(dwork,
+						     struct mlx5_cmd_work_ent,
+						     cb_timeout_work);
+	struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+						 cmd);
+
+	ent->ret = -ETIMEDOUT;
+	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+		       mlx5_command_str(msg_to_opcode(ent->in)),
+		       msg_to_opcode(ent->in));
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
 	struct mlx5_cmd *cmd = ent->cmd;
 	struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+	unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
 	struct mlx5_cmd_layout *lay;
 	struct semaphore *sem;
 	unsigned long flags;
@@ -646,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work)
 	dump_command(dev, ent, 1);
 	ent->ts1 = ktime_get_ns();
 
+	if (ent->callback)
+		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -690,13 +723,6 @@ static const char *deliv_status_to_str(u8 status)
 	}
 }
 
-static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
-{
-	struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
-
-	return be16_to_cpu(hdr->opcode);
-}
-
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 {
 	unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -705,13 +731,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 
 	if (cmd->mode == CMD_MODE_POLLING) {
 		wait_for_completion(&ent->done);
-		err = ent->ret;
-	} else {
-		if (!wait_for_completion_timeout(&ent->done, timeout))
-			err = -ETIMEDOUT;
-		else
-			err = 0;
+	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+		ent->ret = -ETIMEDOUT;
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
 	}
+
+	err = ent->ret;
+
 	if (err == -ETIMEDOUT) {
 		mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
 			       mlx5_command_str(msg_to_opcode(ent->in)),
@@ -760,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 	if (!callback)
 		init_completion(&ent->done);
 
+	INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
 	INIT_WORK(&ent->work, cmd_work_handler);
 	if (page_queue) {
 		cmd_work_handler(&ent->work);
@@ -769,28 +796,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 		goto out_free;
 	}
 
-	if (!callback) {
-		err = wait_func(dev, ent);
-		if (err == -ETIMEDOUT)
-			goto out;
-
-		ds = ent->ts2 - ent->ts1;
-		op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
-		if (op < ARRAY_SIZE(cmd->stats)) {
-			stats = &cmd->stats[op];
-			spin_lock_irq(&stats->lock);
-			stats->sum += ds;
-			++stats->n;
-			spin_unlock_irq(&stats->lock);
-		}
-		mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
-				   "fw exec time for %s is %lld nsec\n",
-				   mlx5_command_str(op), ds);
-		*status = ent->status;
-		free_cmd(ent);
-	}
+	if (callback)
+		goto out;
 
-	return err;
+	err = wait_func(dev, ent);
+	if (err == -ETIMEDOUT)
+		goto out_free;
+
+	ds = ent->ts2 - ent->ts1;
+	op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+	if (op < ARRAY_SIZE(cmd->stats)) {
+		stats = &cmd->stats[op];
+		spin_lock_irq(&stats->lock);
+		stats->sum += ds;
+		++stats->n;
+		spin_unlock_irq(&stats->lock);
+	}
+	mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+			   "fw exec time for %s is %lld nsec\n",
+			   mlx5_command_str(op), ds);
+	*status = ent->status;
 
 out_free:
 	free_cmd(ent);
@@ -1180,41 +1205,30 @@ err_dbg:
 	return err;
 }
 
-void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	int i;
 
 	for (i = 0; i < cmd->max_reg_cmds; i++)
 		down(&cmd->sem);
-
 	down(&cmd->pages_sem);
 
-	flush_workqueue(cmd->wq);
-
-	cmd->mode = CMD_MODE_EVENTS;
+	cmd->mode = mode;
 
 	up(&cmd->pages_sem);
 	for (i = 0; i < cmd->max_reg_cmds; i++)
 		up(&cmd->sem);
 }
 
-void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd *cmd = &dev->cmd;
-	int i;
-
-	for (i = 0; i < cmd->max_reg_cmds; i++)
-		down(&cmd->sem);
-
-	down(&cmd->pages_sem);
-
-	flush_workqueue(cmd->wq);
-	cmd->mode = CMD_MODE_POLLING;
+	mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
 
-	up(&cmd->pages_sem);
-	for (i = 0; i < cmd->max_reg_cmds; i++)
-		up(&cmd->sem);
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+	mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1250,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 			struct semaphore *sem;
 
 			ent = cmd->ent_arr[i];
+			if (ent->callback)
+				cancel_delayed_work(&ent->cb_timeout_work);
 			if (ent->page_queue)
 				sem = &cmd->pages_sem;
 			else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e8a6c3325b39..1b495efa7490 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -44,6 +44,7 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/transobj.h>
 #include <linux/rhashtable.h>
+#include <net/switchdev.h>
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
@@ -79,6 +80,7 @@
 
 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC      0x10
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS      0x20
 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC      0x10
 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS      0x20
@@ -88,6 +90,7 @@
 #define MLX5E_LOG_INDIR_RQT_SIZE       0x7
 #define MLX5E_INDIR_RQT_SIZE           BIT(MLX5E_LOG_INDIR_RQT_SIZE)
 #define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE >> 1)
+#define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
 #define MLX5E_TX_CQ_POLL_BUDGET        128
 #define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
 #define MLX5E_SQ_BF_BUDGET             16
@@ -126,6 +129,12 @@ static inline int mlx5_max_log_rq_size(int wq_type)
 	}
 }
 
+enum {
+	MLX5E_INLINE_MODE_L2,
+	MLX5E_INLINE_MODE_VPORT_CONTEXT,
+	MLX5_INLINE_MODE_NOT_REQUIRED,
+};
+
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
 	struct mlx5_wqe_eth_seg  eth;
@@ -143,11 +152,31 @@ struct mlx5e_umr_wqe {
 	struct mlx5_wqe_data_seg       data;
 };
 
+static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
+	"rx_cqe_moder",
+};
+
+enum mlx5e_priv_flag {
+	MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
+};
+
+#define MLX5E_SET_PRIV_FLAG(priv, pflag, enable)    \
+	do {                                        \
+		if (enable)                         \
+			priv->pflags |= pflag;      \
+		else                                \
+			priv->pflags &= ~pflag;     \
+	} while (0)
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
-#define MLX5E_MIN_BW_ALLOC 1   /* Min percentage of BW allocation */
 #endif
 
+struct mlx5e_cq_moder {
+	u16 usec;
+	u16 pkts;
+};
+
 struct mlx5e_params {
 	u8  log_sq_size;
 	u8  rq_wq_type;
@@ -156,16 +185,16 @@ struct mlx5e_params {
 	u8  log_rq_size;
 	u16 num_channels;
 	u8  num_tc;
+	u8  rx_cq_period_mode;
 	bool rx_cqe_compress_admin;
 	bool rx_cqe_compress;
-	u16 rx_cq_moderation_usec;
-	u16 rx_cq_moderation_pkts;
-	u16 tx_cq_moderation_usec;
-	u16 tx_cq_moderation_pkts;
+	struct mlx5e_cq_moder rx_cq_moderation;
+	struct mlx5e_cq_moder tx_cq_moderation;
 	u16 min_rx_wqes;
 	bool lro_en;
 	u32 lro_wqe_sz;
 	u16 tx_max_inline;
+	u8  tx_min_inline_mode;
 	u8  rss_hfunc;
 	u8  toeplitz_hash_key[40];
 	u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
@@ -173,6 +202,7 @@ struct mlx5e_params {
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	struct ieee_ets ets;
 #endif
+	bool rx_am_enabled;
 };
 
 struct mlx5e_tstamp {
@@ -191,6 +221,8 @@ struct mlx5e_tstamp {
 enum {
 	MLX5E_RQ_STATE_POST_WQES_ENABLE,
 	MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
+	MLX5E_RQ_STATE_FLUSH_TIMEOUT,
+	MLX5E_RQ_STATE_AM,
 };
 
 struct mlx5e_cq {
@@ -198,6 +230,7 @@ struct mlx5e_cq {
 	struct mlx5_cqwq           wq;
 
 	/* data path - accessed per napi poll */
+	u16                        event_ctr;
 	struct napi_struct        *napi;
 	struct mlx5_core_cq        mcq;
 	struct mlx5e_channel      *channel;
@@ -220,11 +253,37 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
 typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
 				  u16 ix);
 
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix);
+
 struct mlx5e_dma_info {
 	struct page	*page;
 	dma_addr_t	addr;
 };
 
+struct mlx5e_rx_am_stats {
+	int ppms; /* packets per msec */
+	int epms; /* events per msec */
+};
+
+struct mlx5e_rx_am_sample {
+	ktime_t		time;
+	unsigned int	pkt_ctr;
+	u16		event_ctr;
+};
+
+struct mlx5e_rx_am { /* Adaptive Moderation */
+	u8					state;
+	struct mlx5e_rx_am_stats		prev_stats;
+	struct mlx5e_rx_am_sample		start_sample;
+	struct work_struct			work;
+	u8					profile_ix;
+	u8					mode;
+	u8					tune_state;
+	u8					steps_right;
+	u8					steps_left;
+	u8					tired;
+};
+
 struct mlx5e_rq {
 	/* data path */
 	struct mlx5_wq_ll      wq;
@@ -241,10 +300,13 @@ struct mlx5e_rq {
 	struct mlx5e_cq        cq;
 	mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 	mlx5e_fp_alloc_wqe     alloc_wqe;
+	mlx5e_fp_dealloc_wqe   dealloc_wqe;
 
 	unsigned long          state;
 	int                    ix;
 
+	struct mlx5e_rx_am     am; /* Adaptive Moderation */
+
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
 	u8                     wq_type;
@@ -305,6 +367,7 @@ struct mlx5e_sq_dma {
 enum {
 	MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
 	MLX5E_SQ_STATE_BF_ENABLE,
+	MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
 struct mlx5e_ico_wqe_info {
@@ -342,6 +405,7 @@ struct mlx5e_sq {
 	u32                        sqn;
 	u16                        bf_buf_size;
 	u16                        max_inline;
+	u8                         min_inline_mode;
 	u16                        edge;
 	struct device             *pdev;
 	struct mlx5e_tstamp       *tstamp;
@@ -354,6 +418,7 @@ struct mlx5e_sq {
 	struct mlx5e_channel      *channel;
 	int                        tc;
 	struct mlx5e_ico_wqe_info *ico_wqe_info;
+	u32                        rate_limit;
 } ____cacheline_aligned_in_smp;
 
 static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n)
@@ -401,7 +466,7 @@ enum mlx5e_traffic_types {
 };
 
 enum {
-	MLX5E_STATE_ASYNC_EVENTS_ENABLE,
+	MLX5E_STATE_ASYNC_EVENTS_ENABLED,
 	MLX5E_STATE_OPENED,
 	MLX5E_STATE_DESTROYING,
 };
@@ -491,8 +556,24 @@ enum {
 	MLX5E_ARFS_FT_LEVEL
 };
 
+struct mlx5e_ethtool_table {
+	struct mlx5_flow_table *ft;
+	int                    num_rules;
+};
+
+#define ETHTOOL_NUM_L3_L4_FTS 7
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+	struct mlx5e_ethtool_table      l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
+	struct mlx5e_ethtool_table      l2_ft[ETHTOOL_NUM_L2_FTS];
+	struct list_head                rules;
+	int                             tot_num_rules;
+};
+
 struct mlx5e_flow_steering {
 	struct mlx5_flow_namespace      *ns;
+	struct mlx5e_ethtool_steering   ethtool;
 	struct mlx5e_tc_table           tc;
 	struct mlx5e_vlan_table         vlan;
 	struct mlx5e_l2_table           l2;
@@ -500,9 +581,15 @@ struct mlx5e_flow_steering {
 	struct mlx5e_arfs_tables        arfs;
 };
 
-struct mlx5e_direct_tir {
-	u32              tirn;
+struct mlx5e_rqt {
 	u32              rqtn;
+	bool		 enabled;
+};
+
+struct mlx5e_tir {
+	u32		  tirn;
+	struct mlx5e_rqt  rqt;
+	struct list_head  list;
 };
 
 enum {
@@ -510,6 +597,22 @@ enum {
 	MLX5E_NIC_PRIO
 };
 
+struct mlx5e_profile {
+	void	(*init)(struct mlx5_core_dev *mdev,
+			struct net_device *netdev,
+			const struct mlx5e_profile *profile, void *ppriv);
+	void	(*cleanup)(struct mlx5e_priv *priv);
+	int	(*init_rx)(struct mlx5e_priv *priv);
+	void	(*cleanup_rx)(struct mlx5e_priv *priv);
+	int	(*init_tx)(struct mlx5e_priv *priv);
+	void	(*cleanup_tx)(struct mlx5e_priv *priv);
+	void	(*enable)(struct mlx5e_priv *priv);
+	void	(*disable)(struct mlx5e_priv *priv);
+	void	(*update_stats)(struct mlx5e_priv *priv);
+	int	(*max_nch)(struct mlx5_core_dev *mdev);
+	int	max_tc;
+};
+
 struct mlx5e_priv {
 	/* priv data path fields - start */
 	struct mlx5e_sq            **txq_to_sq_map;
@@ -518,18 +621,15 @@ struct mlx5e_priv {
 
 	unsigned long              state;
 	struct mutex               state_lock; /* Protects Interface state */
-	struct mlx5_uar            cq_uar;
-	u32                        pdn;
-	u32                        tdn;
-	struct mlx5_core_mkey      mkey;
 	struct mlx5_core_mkey      umr_mkey;
 	struct mlx5e_rq            drop_rq;
 
 	struct mlx5e_channel     **channel;
 	u32                        tisn[MLX5E_MAX_NUM_TC];
-	u32                        indir_rqtn;
-	u32                        indir_tirn[MLX5E_NUM_INDIR_TIRS];
-	struct mlx5e_direct_tir    direct_tir[MLX5E_MAX_NUM_CHANNELS];
+	struct mlx5e_rqt           indir_rqt;
+	struct mlx5e_tir           indir_tir[MLX5E_NUM_INDIR_TIRS];
+	struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
+	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
 	struct mlx5e_flow_steering fs;
 	struct mlx5e_vxlan_db      vxlan;
@@ -538,13 +638,17 @@ struct mlx5e_priv {
 	struct workqueue_struct    *wq;
 	struct work_struct         update_carrier_work;
 	struct work_struct         set_rx_mode_work;
+	struct work_struct         tx_timeout_work;
 	struct delayed_work        update_stats_work;
 
+	u32                        pflags;
 	struct mlx5_core_dev      *mdev;
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
 	struct mlx5e_tstamp        tstamp;
 	u16 q_counter;
+	const struct mlx5e_profile *profile;
+	void                      *ppriv;
 };
 
 enum mlx5e_link_mode {
@@ -562,6 +666,7 @@ enum mlx5e_link_mode {
 	MLX5E_10GBASE_ER	 = 14,
 	MLX5E_40GBASE_SR4	 = 15,
 	MLX5E_40GBASE_LR4	 = 16,
+	MLX5E_50GBASE_SR2	 = 18,
 	MLX5E_100GBASE_CR4	 = 20,
 	MLX5E_100GBASE_SR4	 = 21,
 	MLX5E_100GBASE_KR4	 = 22,
@@ -579,6 +684,9 @@ enum mlx5e_link_mode {
 
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
 
+
+void mlx5e_build_ptys2ethtool_map(void);
+
 void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw);
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       void *accel_priv, select_queue_fallback_t fallback);
@@ -589,12 +697,16 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
 int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
 int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
 				    struct mlx5_cqe64 *cqe,
@@ -612,12 +724,26 @@ void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
 				    struct mlx5e_mpw_info *wi);
 struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
 
+void mlx5e_rx_am(struct mlx5e_rq *rq);
+void mlx5e_rx_am_work(struct work_struct *work);
+struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode);
+
 void mlx5e_update_stats(struct mlx5e_priv *priv);
 
 int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
+			   int location);
+int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
+				struct ethtool_rxnfc *info, u32 *rule_locs);
+int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+			       struct ethtool_rx_flow_spec *fs);
+int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv,
+			      int location);
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
 void mlx5e_set_rx_mode_work(struct work_struct *work);
 
 void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp,
@@ -647,6 +773,9 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
 				   int num_channels);
 int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
+				 u8 cq_period_mode);
+
 static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
 				      struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz)
 {
@@ -723,5 +852,39 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 #endif
 
 u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+		     struct mlx5e_tir *tir, u32 *in, int inlen);
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+		       struct mlx5e_tir *tir);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev);
+
+struct mlx5_eswitch_rep;
+int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
+			 struct mlx5_eswitch_rep *rep);
+void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
+			    struct mlx5_eswitch_rep *rep);
+int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep);
+void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
+			  struct mlx5_eswitch_rep *rep);
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_tises(struct mlx5e_priv *priv);
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+int mlx5e_close(struct net_device *netdev);
+int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_stats_work(struct work_struct *work);
+void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+			  const struct mlx5e_profile *profile, void *ppriv);
+void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
+struct rtnl_link_stats64 *
+mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 3515e78ba68f..a8cb38789774 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
 static int arfs_disable(struct mlx5e_priv *priv)
 {
 	struct mlx5_flow_destination dest;
-	u32 *tirn = priv->indir_tirn;
+	struct mlx5e_tir *tir = priv->indir_tir;
 	int err = 0;
 	int tt;
 	int i;
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	for (i = 0; i < ARFS_NUM_TYPES; i++) {
-		dest.tir_num = tirn[i];
+		dest.tir_num = tir[i].tirn;
 		tt = arfs_get_tt(i);
 		/* Modify ttc rules destination to bypass the aRFS tables*/
 		err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
@@ -175,15 +175,12 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 {
 	struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
 	struct mlx5_flow_destination dest;
-	u8 match_criteria_enable = 0;
-	u32 *tirn = priv->indir_tirn;
-	u32 *match_criteria;
-	u32 *match_value;
+	struct mlx5e_tir *tir = priv->indir_tir;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
 		err = -ENOMEM;
 		goto out;
@@ -192,24 +189,23 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	switch (type) {
 	case ARFS_IPV4_TCP:
-		dest.tir_num = tirn[MLX5E_TT_IPV4_TCP];
+		dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn;
 		break;
 	case ARFS_IPV4_UDP:
-		dest.tir_num = tirn[MLX5E_TT_IPV4_UDP];
+		dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn;
 		break;
 	case ARFS_IPV6_TCP:
-		dest.tir_num = tirn[MLX5E_TT_IPV6_TCP];
+		dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn;
 		break;
 	case ARFS_IPV6_UDP:
-		dest.tir_num = tirn[MLX5E_TT_IPV6_UDP];
+		dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn;
 		break;
 	default:
 		err = -EINVAL;
 		goto out;
 	}
 
-	arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, match_criteria_enable,
-						  match_criteria, match_value,
+	arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec,
 						  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 						  MLX5_FS_DEFAULT_FLOW_TAG,
 						  &dest);
@@ -220,8 +216,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 			   __func__, type);
 	}
 out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+	kvfree(spec);
 	return err;
 }
 
@@ -475,23 +470,20 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_rule *rule = NULL;
 	struct mlx5_flow_destination dest;
 	struct arfs_table *arfs_table;
-	u8 match_criteria_enable = 0;
+	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_table *ft;
-	u32 *match_criteria;
-	u32 *match_value;
 	int err = 0;
 
-	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
 		err = -ENOMEM;
 		goto out;
 	}
-	match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 			 outer_headers.ethertype);
-	MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
 		 ntohs(tuple->etype));
 	arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
 	if (!arfs_table) {
@@ -501,59 +493,58 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
 
 	ft = arfs_table->ft.t;
 	if (tuple->ip_proto == IPPROTO_TCP) {
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.tcp_dport);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.tcp_sport);
-		MLX5_SET(fte_match_param, match_value, outer_headers.tcp_dport,
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
 			 ntohs(tuple->dst_port));
-		MLX5_SET(fte_match_param, match_value, outer_headers.tcp_sport,
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
 			 ntohs(tuple->src_port));
 	} else {
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.udp_dport);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.udp_sport);
-		MLX5_SET(fte_match_param, match_value, outer_headers.udp_dport,
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
 			 ntohs(tuple->dst_port));
-		MLX5_SET(fte_match_param, match_value, outer_headers.udp_sport,
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
 			 ntohs(tuple->src_port));
 	}
 	if (tuple->etype == htons(ETH_P_IP)) {
-		memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
 		       &tuple->src_ipv4,
 		       4);
-		memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &tuple->dst_ipv4,
 		       4);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
 	} else {
-		memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
 		       &tuple->src_ipv6,
 		       16);
-		memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       &tuple->dst_ipv6,
 		       16);
-		memset(MLX5_ADDR_OF(fte_match_param, match_criteria,
+		memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
 		       0xff,
 		       16);
-		memset(MLX5_ADDR_OF(fte_match_param, match_criteria,
+		memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       0xff,
 		       16);
 	}
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
-	rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
-				  match_value, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+	rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				  MLX5_FS_DEFAULT_FLOW_TAG,
 				  &dest);
 	if (IS_ERR(rule)) {
@@ -563,8 +554,7 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
 	}
 
 out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+	kvfree(spec);
 	return err ? ERR_PTR(err) : rule;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
new file mode 100644
index 000000000000..673043ccd76c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+
+/* mlx5e global resources should be placed in this file.
+ * Global resources are common to all the netdevices crated on the same nic.
+ */
+
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+		     struct mlx5e_tir *tir, u32 *in, int inlen)
+{
+	int err;
+
+	err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn);
+	if (err)
+		return err;
+
+	list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+
+	return 0;
+}
+
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+		       struct mlx5e_tir *tir)
+{
+	mlx5_core_destroy_tir(mdev, tir->tirn);
+	list_del(&tir->list);
+}
+
+static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+			     struct mlx5_core_mkey *mkey)
+{
+	struct mlx5_create_mkey_mbox_in *in;
+	int err;
+
+	in = mlx5_vzalloc(sizeof(*in));
+	if (!in)
+		return -ENOMEM;
+
+	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
+			MLX5_PERM_LOCAL_READ  |
+			MLX5_ACCESS_MODE_PA;
+	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
+	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+
+	err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
+				    NULL);
+
+	kvfree(in);
+
+	return err;
+}
+
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
+{
+	struct mlx5e_resources *res = &mdev->mlx5e_res;
+	int err;
+
+	err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false);
+	if (err) {
+		mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err);
+		return err;
+	}
+
+	err = mlx5_core_alloc_pd(mdev, &res->pdn);
+	if (err) {
+		mlx5_core_err(mdev, "alloc pd failed, %d\n", err);
+		goto err_unmap_free_uar;
+	}
+
+	err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn);
+	if (err) {
+		mlx5_core_err(mdev, "alloc td failed, %d\n", err);
+		goto err_dealloc_pd;
+	}
+
+	err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey);
+	if (err) {
+		mlx5_core_err(mdev, "create mkey failed, %d\n", err);
+		goto err_dealloc_transport_domain;
+	}
+
+	INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+
+	return 0;
+
+err_dealloc_transport_domain:
+	mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+err_dealloc_pd:
+	mlx5_core_dealloc_pd(mdev, res->pdn);
+err_unmap_free_uar:
+	mlx5_unmap_free_uar(mdev, &res->cq_uar);
+
+	return err;
+}
+
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
+{
+	struct mlx5e_resources *res = &mdev->mlx5e_res;
+
+	mlx5_core_destroy_mkey(mdev, &res->mkey);
+	mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+	mlx5_core_dealloc_pd(mdev, res->pdn);
+	mlx5_unmap_free_uar(mdev, &res->cq_uar);
+}
+
+int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
+{
+	struct mlx5e_tir *tir;
+	void *in;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
+		err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen);
+		if (err)
+			return err;
+	}
+
+	kvfree(in);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index b2db180ae2a5..caa9a3ccc3f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
 			tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
 			break;
 		case IEEE_8021QAZ_TSA_ETS:
-			tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC;
+			tc_tx_bw[i] = ets->tc_tx_bw[i];
 			break;
 		}
 	}
@@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
 
 	/* Validate Bandwidth Sum */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+			if (!ets->tc_tx_bw[i])
+				return -EINVAL;
+
 			bw_sum += ets->tc_tx_bw[i];
+		}
 	}
 
 	if (bw_sum != 0 && bw_sum != 100)
@@ -191,7 +195,6 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	enum mlx5_port_status ps;
 	u8 curr_pfc_en;
 	int ret;
 
@@ -200,14 +203,8 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 	if (pfc->pfc_en == curr_pfc_en)
 		return 0;
 
-	mlx5_query_port_admin_status(mdev, &ps);
-	if (ps == MLX5_PORT_UP)
-		mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
-
 	ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
-
-	if (ps == MLX5_PORT_UP)
-		mlx5_set_port_admin_status(mdev, MLX5_PORT_UP);
+	mlx5_toggle_port_link(mdev);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index fc7dcc03b1de..4a3757e60441 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -48,123 +48,85 @@ static void mlx5e_get_drvinfo(struct net_device *dev,
 		sizeof(drvinfo->bus_info));
 }
 
-static const struct {
-	u32 supported;
-	u32 advertised;
+struct ptys2ethtool_config {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
 	u32 speed;
-} ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER] = {
-	[MLX5E_1000BASE_CX_SGMII] = {
-		.supported  = SUPPORTED_1000baseKX_Full,
-		.advertised = ADVERTISED_1000baseKX_Full,
-		.speed      = 1000,
-	},
-	[MLX5E_1000BASE_KX] = {
-		.supported  = SUPPORTED_1000baseKX_Full,
-		.advertised = ADVERTISED_1000baseKX_Full,
-		.speed      = 1000,
-	},
-	[MLX5E_10GBASE_CX4] = {
-		.supported  = SUPPORTED_10000baseKX4_Full,
-		.advertised = ADVERTISED_10000baseKX4_Full,
-		.speed      = 10000,
-	},
-	[MLX5E_10GBASE_KX4] = {
-		.supported  = SUPPORTED_10000baseKX4_Full,
-		.advertised = ADVERTISED_10000baseKX4_Full,
-		.speed      = 10000,
-	},
-	[MLX5E_10GBASE_KR] = {
-		.supported  = SUPPORTED_10000baseKR_Full,
-		.advertised = ADVERTISED_10000baseKR_Full,
-		.speed      = 10000,
-	},
-	[MLX5E_20GBASE_KR2] = {
-		.supported  = SUPPORTED_20000baseKR2_Full,
-		.advertised = ADVERTISED_20000baseKR2_Full,
-		.speed      = 20000,
-	},
-	[MLX5E_40GBASE_CR4] = {
-		.supported  = SUPPORTED_40000baseCR4_Full,
-		.advertised = ADVERTISED_40000baseCR4_Full,
-		.speed      = 40000,
-	},
-	[MLX5E_40GBASE_KR4] = {
-		.supported  = SUPPORTED_40000baseKR4_Full,
-		.advertised = ADVERTISED_40000baseKR4_Full,
-		.speed      = 40000,
-	},
-	[MLX5E_56GBASE_R4] = {
-		.supported  = SUPPORTED_56000baseKR4_Full,
-		.advertised = ADVERTISED_56000baseKR4_Full,
-		.speed      = 56000,
-	},
-	[MLX5E_10GBASE_CR] = {
-		.supported  = SUPPORTED_10000baseKR_Full,
-		.advertised = ADVERTISED_10000baseKR_Full,
-		.speed      = 10000,
-	},
-	[MLX5E_10GBASE_SR] = {
-		.supported  = SUPPORTED_10000baseKR_Full,
-		.advertised = ADVERTISED_10000baseKR_Full,
-		.speed      = 10000,
-	},
-	[MLX5E_10GBASE_ER] = {
-		.supported  = SUPPORTED_10000baseKR_Full,
-		.advertised = ADVERTISED_10000baseKR_Full,
-		.speed      = 10000,
-	},
-	[MLX5E_40GBASE_SR4] = {
-		.supported  = SUPPORTED_40000baseSR4_Full,
-		.advertised = ADVERTISED_40000baseSR4_Full,
-		.speed      = 40000,
-	},
-	[MLX5E_40GBASE_LR4] = {
-		.supported  = SUPPORTED_40000baseLR4_Full,
-		.advertised = ADVERTISED_40000baseLR4_Full,
-		.speed      = 40000,
-	},
-	[MLX5E_100GBASE_CR4] = {
-		.speed      = 100000,
-	},
-	[MLX5E_100GBASE_SR4] = {
-		.speed      = 100000,
-	},
-	[MLX5E_100GBASE_KR4] = {
-		.speed      = 100000,
-	},
-	[MLX5E_100GBASE_LR4] = {
-		.speed      = 100000,
-	},
-	[MLX5E_100BASE_TX]   = {
-		.speed      = 100,
-	},
-	[MLX5E_1000BASE_T]    = {
-		.supported  = SUPPORTED_1000baseT_Full,
-		.advertised = ADVERTISED_1000baseT_Full,
-		.speed      = 1000,
-	},
-	[MLX5E_10GBASE_T]    = {
-		.supported  = SUPPORTED_10000baseT_Full,
-		.advertised = ADVERTISED_10000baseT_Full,
-		.speed      = 1000,
-	},
-	[MLX5E_25GBASE_CR]   = {
-		.speed      = 25000,
-	},
-	[MLX5E_25GBASE_KR]   = {
-		.speed      = 25000,
-	},
-	[MLX5E_25GBASE_SR]   = {
-		.speed      = 25000,
-	},
-	[MLX5E_50GBASE_CR2]  = {
-		.speed      = 50000,
-	},
-	[MLX5E_50GBASE_KR2]  = {
-		.speed      = 50000,
-	},
 };
 
+static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER];
+
+#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...)               \
+	({                                                              \
+		struct ptys2ethtool_config *cfg;                        \
+		const unsigned int modes[] = { __VA_ARGS__ };           \
+		unsigned int i;                                         \
+		cfg = &ptys2ethtool_table[reg_];                        \
+		cfg->speed = speed_;                                    \
+		bitmap_zero(cfg->supported,                             \
+			    __ETHTOOL_LINK_MODE_MASK_NBITS);            \
+		bitmap_zero(cfg->advertised,                            \
+			    __ETHTOOL_LINK_MODE_MASK_NBITS);            \
+		for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) {             \
+			__set_bit(modes[i], cfg->supported);            \
+			__set_bit(modes[i], cfg->advertised);           \
+		}                                                       \
+	})
+
+void mlx5e_build_ptys2ethtool_map(void)
+{
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, SPEED_1000,
+				       ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, SPEED_1000,
+				       ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, SPEED_10000,
+				       ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, SPEED_10000,
+				       ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, SPEED_10000,
+				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, SPEED_20000,
+				       ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, SPEED_40000,
+				       ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, SPEED_40000,
+				       ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, SPEED_56000,
+				       ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, SPEED_10000,
+				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, SPEED_10000,
+				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, SPEED_10000,
+				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, SPEED_40000,
+				       ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, SPEED_40000,
+				       ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, SPEED_50000,
+				       ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, SPEED_100000,
+				       ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, SPEED_100000,
+				       ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, SPEED_100000,
+				       ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, SPEED_100000,
+				       ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, SPEED_10000,
+				       ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, SPEED_25000,
+				       ETHTOOL_LINK_MODE_25000baseCR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, SPEED_25000,
+				       ETHTOOL_LINK_MODE_25000baseKR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, SPEED_25000,
+				       ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, SPEED_50000,
+				       ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, SPEED_50000,
+				       ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
+}
+
 static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -177,6 +139,18 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 	return err ? 0 : pfc_en_tx | pfc_en_rx;
 }
 
+static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 rx_pause;
+	u32 tx_pause;
+	int err;
+
+	err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+	return err ? false : rx_pause | tx_pause;
+}
+
 #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter))
 #define MLX5E_NUM_RQ_STATS(priv) \
 	(NUM_RQ_STATS * priv->params.num_channels * \
@@ -184,7 +158,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 #define MLX5E_NUM_SQ_STATS(priv) \
 	(NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \
 	 test_bit(MLX5E_STATE_OPENED, &priv->state))
-#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv))
+#define MLX5E_NUM_PFC_COUNTERS(priv) \
+	((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \
+	  NUM_PPORT_PER_PRIO_PFC_COUNTERS)
 
 static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 {
@@ -198,6 +174,8 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 		       MLX5E_NUM_RQ_STATS(priv) +
 		       MLX5E_NUM_SQ_STATS(priv) +
 		       MLX5E_NUM_PFC_COUNTERS(priv);
+	case ETH_SS_PRIV_FLAGS:
+		return ARRAY_SIZE(mlx5e_priv_flags);
 	/* fallthrough */
 	default:
 		return -EOPNOTSUPP;
@@ -211,42 +189,51 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 
 	/* SW counters */
 	for (i = 0; i < NUM_SW_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
 
 	/* Q counters */
 	for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format);
 
 	/* VPORT counters */
 	for (i = 0; i < NUM_VPORT_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       vport_stats_desc[i].name);
+		       vport_stats_desc[i].format);
 
 	/* PPORT counters */
 	for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_802_3_stats_desc[i].name);
+		       pport_802_3_stats_desc[i].format);
 
 	for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2863_stats_desc[i].name);
+		       pport_2863_stats_desc[i].format);
 
 	for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2819_stats_desc[i].name);
+		       pport_2819_stats_desc[i].format);
 
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-				prio,
-				pport_per_prio_traffic_stats_desc[i].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_traffic_stats_desc[i].format, prio);
 	}
 
 	pfc_combined = mlx5e_query_pfc_combined(priv);
 	for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-				prio, pport_per_prio_pfc_stats_desc[i].name);
+			char pfc_string[ETH_GSTRING_LEN];
+
+			snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_pfc_stats_desc[i].format, pfc_string);
+		}
+	}
+
+	if (mlx5e_query_global_pause_combined(priv)) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_pfc_stats_desc[i].format, "global");
 		}
 	}
 
@@ -256,25 +243,27 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 	/* per channel counters */
 	for (i = 0; i < priv->params.num_channels; i++)
 		for (j = 0; j < NUM_RQ_STATS; j++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i,
-				rq_stats_desc[j].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				rq_stats_desc[j].format, i);
 
 	for (tc = 0; tc < priv->params.num_tc; tc++)
 		for (i = 0; i < priv->params.num_channels; i++)
 			for (j = 0; j < NUM_SQ_STATS; j++)
 				sprintf(data + (idx++) * ETH_GSTRING_LEN,
-					"tx%d_%s",
-					priv->channeltc_to_txq_map[i][tc],
-					sq_stats_desc[j].name);
+					sq_stats_desc[j].format,
+					priv->channeltc_to_txq_map[i][tc]);
 }
 
 static void mlx5e_get_strings(struct net_device *dev,
 			      uint32_t stringset, uint8_t *data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	int i;
 
 	switch (stringset) {
 	case ETH_SS_PRIV_FLAGS:
+		for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++)
+			strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]);
 		break;
 
 	case ETH_SS_TEST:
@@ -339,6 +328,13 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
 		}
 	}
 
+	if (mlx5e_query_global_pause_combined(priv)) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+							  pport_per_prio_pfc_stats_desc, 0);
+		}
+	}
+
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return;
 
@@ -519,10 +515,11 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
 	if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
 		return -ENOTSUPP;
 
-	coal->rx_coalesce_usecs       = priv->params.rx_cq_moderation_usec;
-	coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts;
-	coal->tx_coalesce_usecs       = priv->params.tx_cq_moderation_usec;
-	coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation_pkts;
+	coal->rx_coalesce_usecs       = priv->params.rx_cq_moderation.usec;
+	coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts;
+	coal->tx_coalesce_usecs       = priv->params.tx_cq_moderation.usec;
+	coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts;
+	coal->use_adaptive_rx_coalesce = priv->params.rx_am_enabled;
 
 	return 0;
 }
@@ -533,6 +530,10 @@ static int mlx5e_set_coalesce(struct net_device *netdev,
 	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channel *c;
+	bool restart =
+		!!coal->use_adaptive_rx_coalesce != priv->params.rx_am_enabled;
+	bool was_opened;
+	int err = 0;
 	int tc;
 	int i;
 
@@ -540,12 +541,19 @@ static int mlx5e_set_coalesce(struct net_device *netdev,
 		return -ENOTSUPP;
 
 	mutex_lock(&priv->state_lock);
-	priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs;
-	priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames;
-	priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs;
-	priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames;
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (was_opened && restart) {
+		mlx5e_close_locked(netdev);
+		priv->params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce;
+	}
+
+	priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs;
+	priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames;
+	priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs;
+	priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames;
+
+	if (!was_opened || restart)
 		goto out;
 
 	for (i = 0; i < priv->params.num_channels; ++i) {
@@ -564,35 +572,37 @@ static int mlx5e_set_coalesce(struct net_device *netdev,
 	}
 
 out:
+	if (was_opened && restart)
+		err = mlx5e_open_locked(netdev);
+
 	mutex_unlock(&priv->state_lock);
-	return 0;
+	return err;
 }
 
-static u32 ptys2ethtool_supported_link(u32 eth_proto_cap)
+static void ptys2ethtool_supported_link(unsigned long *supported_modes,
+					u32 eth_proto_cap)
 {
-	int i;
-	u32 supported_modes = 0;
+	int proto;
 
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (eth_proto_cap & MLX5E_PROT_MASK(i))
-			supported_modes |= ptys2ethtool_table[i].supported;
-	}
-	return supported_modes;
+	for_each_set_bit(proto, (unsigned long *)&eth_proto_cap, MLX5E_LINK_MODES_NUMBER)
+		bitmap_or(supported_modes, supported_modes,
+			  ptys2ethtool_table[proto].supported,
+			  __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
-static u32 ptys2ethtool_adver_link(u32 eth_proto_cap)
+static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
+				    u32 eth_proto_cap)
 {
-	int i;
-	u32 advertising_modes = 0;
+	int proto;
 
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (eth_proto_cap & MLX5E_PROT_MASK(i))
-			advertising_modes |= ptys2ethtool_table[i].advertised;
-	}
-	return advertising_modes;
+	for_each_set_bit(proto, (unsigned long *)&eth_proto_cap, MLX5E_LINK_MODES_NUMBER)
+		bitmap_or(advertising_modes, advertising_modes,
+			  ptys2ethtool_table[proto].advertised,
+			  __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
-static u32 ptys2ethtool_supported_port(u32 eth_proto_cap)
+static void ptys2ethtool_supported_port(struct ethtool_link_ksettings *link_ksettings,
+					u32 eth_proto_cap)
 {
 	if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
 			   | MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
@@ -600,7 +610,7 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap)
 			   | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)
 			   | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4)
 			   | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
-		return SUPPORTED_FIBRE;
+		ethtool_link_ksettings_add_link_mode(link_ksettings, supported, FIBRE);
 	}
 
 	if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4)
@@ -608,9 +618,8 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap)
 			   | MLX5E_PROT_MASK(MLX5E_10GBASE_KR)
 			   | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4)
 			   | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) {
-		return SUPPORTED_Backplane;
+		ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Backplane);
 	}
-	return 0;
 }
 
 int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
@@ -634,7 +643,7 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 
 static void get_speed_duplex(struct net_device *netdev,
 			     u32 eth_proto_oper,
-			     struct ethtool_cmd *cmd)
+			     struct ethtool_link_ksettings *link_ksettings)
 {
 	int i;
 	u32 speed = SPEED_UNKNOWN;
@@ -651,23 +660,32 @@ static void get_speed_duplex(struct net_device *netdev,
 		}
 	}
 out:
-	ethtool_cmd_speed_set(cmd, speed);
-	cmd->duplex = duplex;
+	link_ksettings->base.speed = speed;
+	link_ksettings->base.duplex = duplex;
 }
 
-static void get_supported(u32 eth_proto_cap, u32 *supported)
+static void get_supported(u32 eth_proto_cap,
+			  struct ethtool_link_ksettings *link_ksettings)
 {
-	*supported |= ptys2ethtool_supported_port(eth_proto_cap);
-	*supported |= ptys2ethtool_supported_link(eth_proto_cap);
-	*supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	unsigned long *supported = link_ksettings->link_modes.supported;
+
+	ptys2ethtool_supported_port(link_ksettings, eth_proto_cap);
+	ptys2ethtool_supported_link(supported, eth_proto_cap);
+	ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
+	ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause);
 }
 
 static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
-			    u8 rx_pause, u32 *advertising)
+			    u8 rx_pause,
+			    struct ethtool_link_ksettings *link_ksettings)
 {
-	*advertising |= ptys2ethtool_adver_link(eth_proto_cap);
-	*advertising |= tx_pause ? ADVERTISED_Pause : 0;
-	*advertising |= (tx_pause ^ rx_pause) ? ADVERTISED_Asym_Pause : 0;
+	unsigned long *advertising = link_ksettings->link_modes.advertising;
+
+	ptys2ethtool_adver_link(advertising, eth_proto_cap);
+	if (tx_pause)
+		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
+	if (tx_pause ^ rx_pause)
+		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause);
 }
 
 static u8 get_connector_port(u32 eth_proto)
@@ -695,13 +713,16 @@ static u8 get_connector_port(u32 eth_proto)
 	return PORT_OTHER;
 }
 
-static void get_lp_advertising(u32 eth_proto_lp, u32 *lp_advertising)
+static void get_lp_advertising(u32 eth_proto_lp,
+			       struct ethtool_link_ksettings *link_ksettings)
 {
-	*lp_advertising = ptys2ethtool_adver_link(eth_proto_lp);
+	unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
+
+	ptys2ethtool_adver_link(lp_advertising, eth_proto_lp);
 }
 
-static int mlx5e_get_settings(struct net_device *netdev,
-			      struct ethtool_cmd *cmd)
+static int mlx5e_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *link_ksettings)
 {
 	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -710,6 +731,8 @@ static int mlx5e_get_settings(struct net_device *netdev,
 	u32 eth_proto_admin;
 	u32 eth_proto_lp;
 	u32 eth_proto_oper;
+	u8 an_disable_admin;
+	u8 an_status;
 	int err;
 
 	err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
@@ -720,35 +743,49 @@ static int mlx5e_get_settings(struct net_device *netdev,
 		goto err_query_ptys;
 	}
 
-	eth_proto_cap   = MLX5_GET(ptys_reg, out, eth_proto_capability);
-	eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
-	eth_proto_oper  = MLX5_GET(ptys_reg, out, eth_proto_oper);
-	eth_proto_lp    = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
+	eth_proto_cap    = MLX5_GET(ptys_reg, out, eth_proto_capability);
+	eth_proto_admin  = MLX5_GET(ptys_reg, out, eth_proto_admin);
+	eth_proto_oper   = MLX5_GET(ptys_reg, out, eth_proto_oper);
+	eth_proto_lp     = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
+	an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+	an_status        = MLX5_GET(ptys_reg, out, an_status);
 
-	cmd->supported   = 0;
-	cmd->advertising = 0;
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
 
-	get_supported(eth_proto_cap, &cmd->supported);
-	get_advertising(eth_proto_admin, 0, 0, &cmd->advertising);
-	get_speed_duplex(netdev, eth_proto_oper, cmd);
+	get_supported(eth_proto_cap, link_ksettings);
+	get_advertising(eth_proto_admin, 0, 0, link_ksettings);
+	get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
 
 	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
 
-	cmd->port = get_connector_port(eth_proto_oper);
-	get_lp_advertising(eth_proto_lp, &cmd->lp_advertising);
+	link_ksettings->base.port = get_connector_port(eth_proto_oper);
+	get_lp_advertising(eth_proto_lp, link_ksettings);
 
-	cmd->transceiver = XCVR_INTERNAL;
+	if (an_status == MLX5_AN_COMPLETE)
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     lp_advertising, Autoneg);
+
+	link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE :
+							  AUTONEG_ENABLE;
+	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+					     Autoneg);
+	if (!an_disable_admin)
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     advertising, Autoneg);
 
 err_query_ptys:
 	return err;
 }
 
-static u32 mlx5e_ethtool2ptys_adver_link(u32 link_modes)
+static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
 {
 	u32 i, ptys_modes = 0;
 
 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (ptys2ethtool_table[i].advertised & link_modes)
+		if (bitmap_intersects(ptys2ethtool_table[i].advertised,
+				      link_modes,
+				      __ETHTOOL_LINK_MODE_MASK_NBITS))
 			ptys_modes |= MLX5E_PROT_MASK(i);
 	}
 
@@ -767,21 +804,25 @@ static u32 mlx5e_ethtool2ptys_speed_link(u32 speed)
 	return speed_links;
 }
 
-static int mlx5e_set_settings(struct net_device *netdev,
-			      struct ethtool_cmd *cmd)
+static int mlx5e_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *link_ksettings)
 {
 	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 eth_proto_cap, eth_proto_admin;
+	bool an_changes = false;
+	u8 an_disable_admin;
+	u8 an_disable_cap;
+	bool an_disable;
 	u32 link_modes;
+	u8 an_status;
 	u32 speed;
-	u32 eth_proto_cap, eth_proto_admin;
-	enum mlx5_port_status ps;
 	int err;
 
-	speed = ethtool_cmd_speed(cmd);
+	speed = link_ksettings->base.speed;
 
-	link_modes = cmd->autoneg == AUTONEG_ENABLE ?
-		mlx5e_ethtool2ptys_adver_link(cmd->advertising) :
+	link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
+		mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
 		mlx5e_ethtool2ptys_speed_link(speed);
 
 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
@@ -806,15 +847,18 @@ static int mlx5e_set_settings(struct net_device *netdev,
 		goto out;
 	}
 
-	if (link_modes == eth_proto_admin)
+	mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status,
+				&an_disable_cap, &an_disable_admin);
+
+	an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE;
+	an_changes = ((!an_disable && an_disable_admin) ||
+		      (an_disable && !an_disable_admin));
+
+	if (!an_changes && link_modes == eth_proto_admin)
 		goto out;
 
-	mlx5_query_port_admin_status(mdev, &ps);
-	if (ps == MLX5_PORT_UP)
-		mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
-	mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN);
-	if (ps == MLX5_PORT_UP)
-		mlx5_set_port_admin_status(mdev, MLX5_PORT_UP);
+	mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN);
+	mlx5_toggle_port_link(mdev);
 
 out:
 	return err;
@@ -861,7 +905,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
 	mlx5e_build_tir_ctx_hash(tirc, priv);
 
 	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
-		mlx5_core_modify_tir(mdev, priv->indir_tirn[i], in, inlen);
+		mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen);
 }
 
 static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
@@ -883,7 +927,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 	mutex_lock(&priv->state_lock);
 
 	if (indir) {
-		u32 rqtn = priv->indir_rqtn;
+		u32 rqtn = priv->indir_rqt.rqtn;
 
 		memcpy(priv->params.indirection_rqt, indir,
 		       sizeof(priv->params.indirection_rqt));
@@ -916,6 +960,15 @@ static int mlx5e_get_rxnfc(struct net_device *netdev,
 	case ETHTOOL_GRXRINGS:
 		info->data = priv->params.num_channels;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		info->rule_cnt = priv->fs.ethtool.tot_num_rules;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -1272,6 +1325,107 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
 	return 0;
 }
 
+typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+
+static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	bool rx_mode_changed;
+	u8 rx_cq_period_mode;
+	int err = 0;
+	bool reset;
+
+	rx_cq_period_mode = enable ?
+		MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+		MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+	rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode;
+
+	if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
+	    !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
+		return -ENOTSUPP;
+
+	if (!rx_mode_changed)
+		return 0;
+
+	reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (reset)
+		mlx5e_close_locked(netdev);
+
+	mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode);
+
+	if (reset)
+		err = mlx5e_open_locked(netdev);
+
+	return err;
+}
+
+static int mlx5e_handle_pflag(struct net_device *netdev,
+			      u32 wanted_flags,
+			      enum mlx5e_priv_flag flag,
+			      mlx5e_pflag_handler pflag_handler)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	bool enable = !!(wanted_flags & flag);
+	u32 changes = wanted_flags ^ priv->pflags;
+	int err;
+
+	if (!(changes & flag))
+		return 0;
+
+	err = pflag_handler(netdev, enable);
+	if (err) {
+		netdev_err(netdev, "%s private flag 0x%x failed err %d\n",
+			   enable ? "Enable" : "Disable", flag, err);
+		return err;
+	}
+
+	MLX5E_SET_PRIV_FLAG(priv, flag, enable);
+	return 0;
+}
+
+static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	int err;
+
+	mutex_lock(&priv->state_lock);
+
+	err = mlx5e_handle_pflag(netdev, pflags,
+				 MLX5E_PFLAG_RX_CQE_BASED_MODER,
+				 set_pflag_rx_cqe_based_moder);
+
+	mutex_unlock(&priv->state_lock);
+	return err ? -EINVAL : 0;
+}
+
+static u32 mlx5e_get_priv_flags(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return priv->pflags;
+}
+
+static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	int err = 0;
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		err = mlx5e_ethtool_flow_replace(priv, &cmd->fs);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_drvinfo       = mlx5e_get_drvinfo,
 	.get_link          = ethtool_op_get_link,
@@ -1284,13 +1438,14 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.set_channels      = mlx5e_set_channels,
 	.get_coalesce      = mlx5e_get_coalesce,
 	.set_coalesce      = mlx5e_set_coalesce,
-	.get_settings      = mlx5e_get_settings,
-	.set_settings      = mlx5e_set_settings,
+	.get_link_ksettings  = mlx5e_get_link_ksettings,
+	.set_link_ksettings  = mlx5e_set_link_ksettings,
 	.get_rxfh_key_size   = mlx5e_get_rxfh_key_size,
 	.get_rxfh_indir_size = mlx5e_get_rxfh_indir_size,
 	.get_rxfh          = mlx5e_get_rxfh,
 	.set_rxfh          = mlx5e_set_rxfh,
 	.get_rxnfc         = mlx5e_get_rxnfc,
+	.set_rxnfc         = mlx5e_set_rxnfc,
 	.get_tunable       = mlx5e_get_tunable,
 	.set_tunable       = mlx5e_set_tunable,
 	.get_pauseparam    = mlx5e_get_pauseparam,
@@ -1301,4 +1456,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.set_wol	   = mlx5e_set_wol,
 	.get_module_info   = mlx5e_get_module_info,
 	.get_module_eeprom = mlx5e_get_module_eeprom,
+	.get_priv_flags    = mlx5e_get_priv_flags,
+	.set_priv_flags    = mlx5e_set_priv_flags
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index b32740092854..1587a9fd5724 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -156,19 +156,18 @@ enum mlx5e_vlan_rule_type {
 
 static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 				 enum mlx5e_vlan_rule_type rule_type,
-				 u16 vid, u32 *mc, u32 *mv)
+				 u16 vid, struct mlx5_flow_spec *spec)
 {
 	struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
 	struct mlx5_flow_destination dest;
-	u8 match_criteria_enable = 0;
 	struct mlx5_flow_rule **rule_p;
 	int err = 0;
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dest.ft = priv->fs.l2.ft.t;
 
-	match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag);
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
 
 	switch (rule_type) {
 	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
@@ -176,17 +175,19 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 		break;
 	case MLX5E_VLAN_RULE_TYPE_ANY_VID:
 		rule_p = &priv->fs.vlan.any_vlan_rule;
-		MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1);
 		break;
 	default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */
 		rule_p = &priv->fs.vlan.active_vlans_rule[vid];
-		MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1);
-		MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
-		MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.first_vid);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+			 vid);
 		break;
 	}
 
-	*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+	*rule_p = mlx5_add_flow_rule(ft, spec,
 				     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				     MLX5_FS_DEFAULT_FLOW_TAG,
 				     &dest);
@@ -203,27 +204,21 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 			       enum mlx5e_vlan_rule_type rule_type, u16 vid)
 {
-	u32 *match_criteria;
-	u32 *match_value;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
-		err = -ENOMEM;
-		goto add_vlan_rule_out;
+		return -ENOMEM;
 	}
 
 	if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID)
 		mlx5e_vport_context_update_vlans(priv);
 
-	err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria,
-				    match_value);
+	err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec);
 
-add_vlan_rule_out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+	kvfree(spec);
 
 	return err;
 }
@@ -598,32 +593,27 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 						      u8 proto)
 {
 	struct mlx5_flow_rule *rule;
-	u8 match_criteria_enable = 0;
-	u32 *match_criteria;
-	u32 *match_value;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
-		err = -ENOMEM;
-		goto out;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	if (proto) {
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ip_protocol);
-		MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, proto);
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
 	}
 	if (etype) {
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ethertype);
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, etype);
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
 	}
 
-	rule = mlx5_add_flow_rule(ft, match_criteria_enable,
-				  match_criteria, match_value,
+	rule = mlx5_add_flow_rule(ft, spec,
 				  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				  MLX5_FS_DEFAULT_FLOW_TAG,
 				  dest);
@@ -631,9 +621,8 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
 	}
-out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+
+	kvfree(spec);
 	return err ? ERR_PTR(err) : rule;
 }
 
@@ -655,7 +644,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv)
 		if (tt == MLX5E_TT_ANY)
 			dest.tir_num = priv->direct_tir[0].tirn;
 		else
-			dest.tir_num = priv->indir_tirn[tt];
+			dest.tir_num = priv->indir_tir[tt].tirn;
 		rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
 						    ttc_rules[tt].etype,
 						    ttc_rules[tt].proto);
@@ -792,24 +781,20 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 {
 	struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
 	struct mlx5_flow_destination dest;
-	u8 match_criteria_enable = 0;
-	u32 *match_criteria;
-	u32 *match_value;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 	u8 *mc_dmac;
 	u8 *mv_dmac;
 
-	match_value    = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
-		err = -ENOMEM;
-		goto add_l2_rule_out;
+		return -ENOMEM;
 	}
 
-	mc_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+	mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 			       outer_headers.dmac_47_16);
-	mv_dmac = MLX5_ADDR_OF(fte_match_param, match_value,
+	mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 			       outer_headers.dmac_47_16);
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
@@ -817,13 +802,13 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 
 	switch (type) {
 	case MLX5E_FULLMATCH:
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 		eth_broadcast_addr(mc_dmac);
 		ether_addr_copy(mv_dmac, ai->addr);
 		break;
 
 	case MLX5E_ALLMULTI:
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 		mc_dmac[0] = 0x01;
 		mv_dmac[0] = 0x01;
 		break;
@@ -832,8 +817,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
-	ai->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
-				      match_value,
+	ai->rule = mlx5_add_flow_rule(ft, spec,
 				      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				      MLX5_FS_DEFAULT_FLOW_TAG, &dest);
 	if (IS_ERR(ai->rule)) {
@@ -843,9 +827,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 		ai->rule = NULL;
 	}
 
-add_l2_rule_out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+	kvfree(spec);
 
 	return err;
 }
@@ -1102,6 +1084,8 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
 		goto err_destroy_l2_table;
 	}
 
+	mlx5e_ethtool_init_steering(priv);
+
 	return 0;
 
 err_destroy_l2_table:
@@ -1121,4 +1105,5 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
 	mlx5e_destroy_l2_table(priv);
 	mlx5e_destroy_ttc_table(priv);
 	mlx5e_arfs_destroy_tables(priv);
+	mlx5e_ethtool_cleanup_steering(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
new file mode 100644
index 000000000000..d17c24227900
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/fs.h>
+#include "en.h"
+
+struct mlx5e_ethtool_rule {
+	struct list_head             list;
+	struct ethtool_rx_flow_spec  flow_spec;
+	struct mlx5_flow_rule        *rule;
+	struct mlx5e_ethtool_table   *eth_ft;
+};
+
+static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
+{
+	if (!--eth_ft->num_rules) {
+		mlx5_destroy_flow_table(eth_ft->ft);
+		eth_ft->ft = NULL;
+	}
+}
+
+#define MLX5E_ETHTOOL_L3_L4_PRIO 0
+#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS)
+#define MLX5E_ETHTOOL_NUM_ENTRIES 64000
+#define MLX5E_ETHTOOL_NUM_GROUPS  10
+static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
+						  struct ethtool_rx_flow_spec *fs,
+						  int num_tuples)
+{
+	struct mlx5e_ethtool_table *eth_ft;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft;
+	int max_tuples;
+	int table_size;
+	int prio;
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+		prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+		eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+		break;
+	case IP_USER_FLOW:
+		max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+		prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+		eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+		break;
+	case ETHER_FLOW:
+		max_tuples = ETHTOOL_NUM_L2_FTS;
+		prio = max_tuples - num_tuples;
+		eth_ft = &priv->fs.ethtool.l2_ft[prio];
+		prio += MLX5E_ETHTOOL_L2_PRIO;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	eth_ft->num_rules++;
+	if (eth_ft->ft)
+		return eth_ft;
+
+	ns = mlx5_get_flow_namespace(priv->mdev,
+				     MLX5_FLOW_NAMESPACE_ETHTOOL);
+	if (!ns)
+		return ERR_PTR(-ENOTSUPP);
+
+	table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev,
+						       flow_table_properties_nic_receive.log_max_ft_size)),
+			   MLX5E_ETHTOOL_NUM_ENTRIES);
+	ft = mlx5_create_auto_grouped_flow_table(ns, prio,
+						 table_size,
+						 MLX5E_ETHTOOL_NUM_GROUPS, 0);
+	if (IS_ERR(ft))
+		return (void *)ft;
+
+	eth_ft->ft = ft;
+	return eth_ft;
+}
+
+static void mask_spec(u8 *mask, u8 *val, size_t size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++, mask++, val++)
+		*((u8 *)val) = *((u8 *)mask) & *((u8 *)val);
+}
+
+static void set_ips(void *outer_headers_v, void *outer_headers_c, __be32 ip4src_m,
+		    __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v)
+{
+	if (ip4src_m) {
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+		       &ip4src_v, sizeof(ip4src_v));
+		memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+		       0xff, sizeof(ip4src_m));
+	}
+	if (ip4dst_m) {
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+		       &ip4dst_v, sizeof(ip4dst_v));
+		memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+		       0xff, sizeof(ip4dst_m));
+	}
+	MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+		 ethertype, ETH_P_IP);
+	MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+		 ethertype, 0xffff);
+}
+
+static int set_flow_attrs(u32 *match_c, u32 *match_v,
+			  struct ethtool_rx_flow_spec *fs)
+{
+	void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+	struct ethtool_tcpip4_spec *l4_mask;
+	struct ethtool_tcpip4_spec *l4_val;
+	struct ethtool_usrip4_spec *l3_mask;
+	struct ethtool_usrip4_spec *l3_val;
+	struct ethhdr *eth_val;
+	struct ethhdr *eth_mask;
+
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+		l4_mask = &fs->m_u.tcp_ip4_spec;
+		l4_val = &fs->h_u.tcp_ip4_spec;
+		set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src,
+			l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst);
+
+		if (l4_mask->psrc) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+				 0xffff);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+				 ntohs(l4_val->psrc));
+		}
+		if (l4_mask->pdst) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+				 0xffff);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+				 ntohs(l4_val->pdst));
+		}
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+			 0xffff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+			 IPPROTO_TCP);
+		break;
+	case UDP_V4_FLOW:
+		l4_mask = &fs->m_u.tcp_ip4_spec;
+		l4_val = &fs->h_u.tcp_ip4_spec;
+		set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src,
+			l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst);
+
+		if (l4_mask->psrc) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+				 0xffff);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+				 ntohs(l4_val->psrc));
+		}
+		if (l4_mask->pdst) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+				 0xffff);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+				 ntohs(l4_val->pdst));
+		}
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+			 0xffff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+			 IPPROTO_UDP);
+		break;
+	case IP_USER_FLOW:
+		l3_mask = &fs->m_u.usr_ip4_spec;
+		l3_val = &fs->h_u.usr_ip4_spec;
+		set_ips(outer_headers_v, outer_headers_c, l3_mask->ip4src,
+			l3_val->ip4src, l3_mask->ip4dst, l3_val->ip4dst);
+		break;
+	case ETHER_FLOW:
+		eth_mask = &fs->m_u.ether_spec;
+		eth_val = &fs->h_u.ether_spec;
+
+		mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask));
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_c, smac_47_16),
+				eth_mask->h_source);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_v, smac_47_16),
+				eth_val->h_source);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_c, dmac_47_16),
+				eth_mask->h_dest);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_v, dmac_47_16),
+				eth_val->h_dest);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype,
+			 ntohs(eth_mask->h_proto));
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ethertype,
+			 ntohs(eth_val->h_proto));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ((fs->flow_type & FLOW_EXT) &&
+	    (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) {
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			 vlan_tag, 1);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			 vlan_tag, 1);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			 first_vid, 0xfff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			 first_vid, ntohs(fs->h_ext.vlan_tci));
+	}
+	if (fs->flow_type & FLOW_MAC_EXT &&
+	    !is_zero_ether_addr(fs->m_ext.h_dest)) {
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_c, dmac_47_16),
+				fs->m_ext.h_dest);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_v, dmac_47_16),
+				fs->h_ext.h_dest);
+	}
+
+	return 0;
+}
+
+static void add_rule_to_list(struct mlx5e_priv *priv,
+			     struct mlx5e_ethtool_rule *rule)
+{
+	struct mlx5e_ethtool_rule *iter;
+	struct list_head *head = &priv->fs.ethtool.rules;
+
+	list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+		if (iter->flow_spec.location > rule->flow_spec.location)
+			break;
+		head = &iter->list;
+	}
+	priv->fs.ethtool.tot_num_rules++;
+	list_add(&rule->list, head);
+}
+
+static bool outer_header_zero(u32 *match_criteria)
+{
+	int size = MLX5_ST_SZ_BYTES(fte_match_param);
+	char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+					     outer_headers);
+
+	return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+						  outer_headers_c + 1,
+						  size - 1);
+}
+
+static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
+						    struct mlx5_flow_table *ft,
+						    struct ethtool_rx_flow_spec *fs)
+{
+	struct mlx5_flow_destination *dst = NULL;
+	struct mlx5_flow_spec *spec;
+	struct mlx5_flow_rule *rule;
+	int err = 0;
+	u32 action;
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+	err = set_flow_attrs(spec->match_criteria, spec->match_value,
+			     fs);
+	if (err)
+		goto free;
+
+	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
+		action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+	} else {
+		dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+		if (!dst) {
+			err = -ENOMEM;
+			goto free;
+		}
+
+		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+		dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
+		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	}
+
+	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
+	rule = mlx5_add_flow_rule(ft, spec, action,
+				  MLX5_FS_DEFAULT_FLOW_TAG, dst);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
+			   __func__, err);
+		goto free;
+	}
+free:
+	kvfree(spec);
+	kfree(dst);
+	return err ? ERR_PTR(err) : rule;
+}
+
+static void del_ethtool_rule(struct mlx5e_priv *priv,
+			     struct mlx5e_ethtool_rule *eth_rule)
+{
+	if (eth_rule->rule)
+		mlx5_del_flow_rule(eth_rule->rule);
+	list_del(&eth_rule->list);
+	priv->fs.ethtool.tot_num_rules--;
+	put_flow_table(eth_rule->eth_ft);
+	kfree(eth_rule);
+}
+
+static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv,
+						    int location)
+{
+	struct mlx5e_ethtool_rule *iter;
+
+	list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+		if (iter->flow_spec.location == location)
+			return iter;
+	}
+	return NULL;
+}
+
+static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv,
+						   int location)
+{
+	struct mlx5e_ethtool_rule *eth_rule;
+
+	eth_rule = find_ethtool_rule(priv, location);
+	if (eth_rule)
+		del_ethtool_rule(priv, eth_rule);
+
+	eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
+	if (!eth_rule)
+		return ERR_PTR(-ENOMEM);
+
+	add_rule_to_list(priv, eth_rule);
+	return eth_rule;
+}
+
+#define MAX_NUM_OF_ETHTOOL_RULES BIT(10)
+
+#define all_ones(field) (field == (__force typeof(field))-1)
+#define all_zeros_or_all_ones(field)		\
+	((field) == 0 || (field) == (__force typeof(field))-1)
+
+static int validate_flow(struct mlx5e_priv *priv,
+			 struct ethtool_rx_flow_spec *fs)
+{
+	struct ethtool_tcpip4_spec *l4_mask;
+	struct ethtool_usrip4_spec *l3_mask;
+	struct ethhdr *eth_mask;
+	int num_tuples = 0;
+
+	if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
+		return -EINVAL;
+
+	if (fs->ring_cookie >= priv->params.num_channels &&
+	    fs->ring_cookie != RX_CLS_FLOW_DISC)
+		return -EINVAL;
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case ETHER_FLOW:
+		eth_mask = &fs->m_u.ether_spec;
+		if (!is_zero_ether_addr(eth_mask->h_dest))
+			num_tuples++;
+		if (!is_zero_ether_addr(eth_mask->h_source))
+			num_tuples++;
+		if (eth_mask->h_proto)
+			num_tuples++;
+		break;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		if (fs->m_u.tcp_ip4_spec.tos)
+			return -EINVAL;
+		l4_mask = &fs->m_u.tcp_ip4_spec;
+		if (l4_mask->ip4src) {
+			if (!all_ones(l4_mask->ip4src))
+				return -EINVAL;
+			num_tuples++;
+		}
+		if (l4_mask->ip4dst) {
+			if (!all_ones(l4_mask->ip4dst))
+				return -EINVAL;
+			num_tuples++;
+		}
+		if (l4_mask->psrc) {
+			if (!all_ones(l4_mask->psrc))
+				return -EINVAL;
+			num_tuples++;
+		}
+		if (l4_mask->pdst) {
+			if (!all_ones(l4_mask->pdst))
+				return -EINVAL;
+			num_tuples++;
+		}
+		/* Flow is TCP/UDP */
+		num_tuples++;
+		break;
+	case IP_USER_FLOW:
+		l3_mask = &fs->m_u.usr_ip4_spec;
+		if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto ||
+		    fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4)
+			return -EINVAL;
+		if (l3_mask->ip4src) {
+			if (!all_ones(l3_mask->ip4src))
+				return -EINVAL;
+			num_tuples++;
+		}
+		if (l3_mask->ip4dst) {
+			if (!all_ones(l3_mask->ip4dst))
+				return -EINVAL;
+			num_tuples++;
+		}
+		/* Flow is IPv4 */
+		num_tuples++;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if ((fs->flow_type & FLOW_EXT)) {
+		if (fs->m_ext.vlan_etype ||
+		    (fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK)))
+			return -EINVAL;
+
+		if (fs->m_ext.vlan_tci) {
+			if (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)
+				return -EINVAL;
+		}
+		num_tuples++;
+	}
+
+	if (fs->flow_type & FLOW_MAC_EXT &&
+	    !is_zero_ether_addr(fs->m_ext.h_dest))
+		num_tuples++;
+
+	return num_tuples;
+}
+
+int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+			       struct ethtool_rx_flow_spec *fs)
+{
+	struct mlx5e_ethtool_table *eth_ft;
+	struct mlx5e_ethtool_rule *eth_rule;
+	struct mlx5_flow_rule *rule;
+	int num_tuples;
+	int err;
+
+	num_tuples = validate_flow(priv, fs);
+	if (num_tuples <= 0) {
+		netdev_warn(priv->netdev, "%s: flow is not valid\n",  __func__);
+		return -EINVAL;
+	}
+
+	eth_ft = get_flow_table(priv, fs, num_tuples);
+	if (IS_ERR(eth_ft))
+		return PTR_ERR(eth_ft);
+
+	eth_rule = get_ethtool_rule(priv, fs->location);
+	if (IS_ERR(eth_rule)) {
+		put_flow_table(eth_ft);
+		return PTR_ERR(eth_rule);
+	}
+
+	eth_rule->flow_spec = *fs;
+	eth_rule->eth_ft = eth_ft;
+	if (!eth_ft->ft) {
+		err = -EINVAL;
+		goto del_ethtool_rule;
+	}
+	rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		goto del_ethtool_rule;
+	}
+
+	eth_rule->rule = rule;
+
+	return 0;
+
+del_ethtool_rule:
+	del_ethtool_rule(priv, eth_rule);
+
+	return err;
+}
+
+int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv,
+			      int location)
+{
+	struct mlx5e_ethtool_rule *eth_rule;
+	int err = 0;
+
+	if (location >= MAX_NUM_OF_ETHTOOL_RULES)
+		return -ENOSPC;
+
+	eth_rule = find_ethtool_rule(priv, location);
+	if (!eth_rule) {
+		err =  -ENOENT;
+		goto out;
+	}
+
+	del_ethtool_rule(priv, eth_rule);
+out:
+	return err;
+}
+
+int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
+			   int location)
+{
+	struct mlx5e_ethtool_rule *eth_rule;
+
+	if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
+		return -EINVAL;
+
+	list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) {
+		if (eth_rule->flow_spec.location == location) {
+			info->fs = eth_rule->flow_spec;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
+				u32 *rule_locs)
+{
+	int location = 0;
+	int idx = 0;
+	int err = 0;
+
+	while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
+		err = mlx5e_ethtool_get_flow(priv, info, location);
+		if (!err)
+			rule_locs[idx++] = location;
+		location++;
+	}
+	return err;
+}
+
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ethtool_rule *iter;
+	struct mlx5e_ethtool_rule *temp;
+
+	list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list)
+		del_ethtool_rule(priv, iter);
+}
+
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
+{
+	INIT_LIST_HEAD(&priv->fs.ethtool.rules);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f5c8d5db25a8..870bea37c57c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,15 +39,24 @@
 #include "eswitch.h"
 #include "vxlan.h"
 
+enum {
+	MLX5_EN_QP_FLUSH_TIMEOUT_MS	= 5000,
+	MLX5_EN_QP_FLUSH_MSLEEP_QUANT	= 20,
+	MLX5_EN_QP_FLUSH_MAX_ITER	= MLX5_EN_QP_FLUSH_TIMEOUT_MS /
+					  MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
+};
+
 struct mlx5e_rq_param {
-	u32                        rqc[MLX5_ST_SZ_DW(rqc)];
-	struct mlx5_wq_param       wq;
+	u32			rqc[MLX5_ST_SZ_DW(rqc)];
+	struct mlx5_wq_param	wq;
+	bool			am_enabled;
 };
 
 struct mlx5e_sq_param {
 	u32                        sqc[MLX5_ST_SZ_DW(sqc)];
 	struct mlx5_wq_param       wq;
 	u16                        max_inline;
+	u8                         min_inline_mode;
 	bool                       icosq;
 };
 
@@ -55,6 +64,7 @@ struct mlx5e_cq_param {
 	u32                        cqc[MLX5_ST_SZ_DW(cqc)];
 	struct mlx5_wq_param       wq;
 	u16                        eq_ix;
+	u8                         cq_period_mode;
 };
 
 struct mlx5e_channel_param {
@@ -74,10 +84,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
 	port_state = mlx5_query_vport_state(mdev,
 		MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
 
-	if (port_state == VPORT_STATE_UP)
+	if (port_state == VPORT_STATE_UP) {
+		netdev_info(priv->netdev, "Link up\n");
 		netif_carrier_on(priv->netdev);
-	else
+	} else {
+		netdev_info(priv->netdev, "Link down\n");
 		netif_carrier_off(priv->netdev);
+	}
 }
 
 static void mlx5e_update_carrier_work(struct work_struct *work)
@@ -91,6 +104,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
 	mutex_unlock(&priv->state_lock);
 }
 
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+					       tx_timeout_work);
+	int err;
+
+	rtnl_lock();
+	mutex_lock(&priv->state_lock);
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto unlock;
+	mlx5e_close_locked(priv->netdev);
+	err = mlx5e_open_locked(priv->netdev);
+	if (err)
+		netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+			   err);
+unlock:
+	mutex_unlock(&priv->state_lock);
+	rtnl_unlock();
+}
+
 static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -105,11 +138,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 
 		s->rx_packets	+= rq_stats->packets;
 		s->rx_bytes	+= rq_stats->bytes;
-		s->lro_packets	+= rq_stats->lro_packets;
-		s->lro_bytes	+= rq_stats->lro_bytes;
+		s->rx_lro_packets += rq_stats->lro_packets;
+		s->rx_lro_bytes	+= rq_stats->lro_bytes;
 		s->rx_csum_none	+= rq_stats->csum_none;
-		s->rx_csum_sw	+= rq_stats->csum_sw;
-		s->rx_csum_inner += rq_stats->csum_inner;
+		s->rx_csum_complete += rq_stats->csum_complete;
+		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
 		s->rx_wqe_err   += rq_stats->wqe_err;
 		s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
 		s->rx_mpwqe_frag   += rq_stats->mpwqe_frag;
@@ -122,24 +155,23 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 
 			s->tx_packets		+= sq_stats->packets;
 			s->tx_bytes		+= sq_stats->bytes;
-			s->tso_packets		+= sq_stats->tso_packets;
-			s->tso_bytes		+= sq_stats->tso_bytes;
-			s->tso_inner_packets	+= sq_stats->tso_inner_packets;
-			s->tso_inner_bytes	+= sq_stats->tso_inner_bytes;
+			s->tx_tso_packets	+= sq_stats->tso_packets;
+			s->tx_tso_bytes		+= sq_stats->tso_bytes;
+			s->tx_tso_inner_packets	+= sq_stats->tso_inner_packets;
+			s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
-			s->tx_csum_inner	+= sq_stats->csum_offload_inner;
-			tx_offload_none		+= sq_stats->csum_offload_none;
+			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+			tx_offload_none		+= sq_stats->csum_none;
 		}
 	}
 
 	/* Update calculated offload counters */
-	s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
-	s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
-			     s->rx_csum_sw;
+	s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner;
+	s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete;
 
-	s->link_down_events = MLX5_GET(ppcnt_reg,
+	s->link_down_events_phy = MLX5_GET(ppcnt_reg,
 				priv->stats.pport.phy_counters,
 				counter_set.phys_layer_cntrs.link_down_events);
 }
@@ -225,14 +257,14 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
 	mlx5e_update_sw_counters(priv);
 }
 
-static void mlx5e_update_stats_work(struct work_struct *work)
+void mlx5e_update_stats_work(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv,
 					       update_stats_work);
 	mutex_lock(&priv->state_lock);
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		mlx5e_update_stats(priv);
+		priv->profile->update_stats(priv);
 		queue_delayed_work(priv->wq, dwork,
 				   msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL));
 	}
@@ -244,7 +276,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 {
 	struct mlx5e_priv *priv = vpriv;
 
-	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
+	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
 		return;
 
 	switch (event) {
@@ -260,12 +292,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
-	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
 }
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
 	synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC));
 }
 
@@ -306,6 +338,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		}
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
 		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
 		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
 		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
@@ -321,6 +354,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		}
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
 		rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
 		rq->wqe_sz = (priv->params.lro_en) ?
 				priv->params.lro_wqe_sz :
@@ -336,6 +370,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		wqe->data.byte_count = cpu_to_be32(byte_count);
 	}
 
+	INIT_WORK(&rq->am.work, mlx5e_rx_am_work);
+	rq->am.mode = priv->params.rx_cq_period_mode;
+
 	rq->wq_type = priv->params.rq_wq_type;
 	rq->pdev    = c->pdev;
 	rq->netdev  = c->netdev;
@@ -508,6 +545,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
 	if (err)
 		goto err_disable_rq;
 
+	if (param->am_enabled)
+		set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+
 	set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
 
 	sq->ico_wqe_info[pi].opcode     = MLX5_OPCODE_NOP;
@@ -526,17 +566,27 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
+	int tout = 0;
+	int err;
+
 	clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
 	napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
 
-	mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-	while (!mlx5_wq_ll_is_empty(&rq->wq))
-		msleep(20);
+	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+	while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
+	       tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
+		msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+
+	if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
+		set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
 
 	/* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
 	napi_synchronize(&rq->channel->napi);
 
+	cancel_work_sync(&rq->am.work);
+
 	mlx5e_disable_rq(rq);
+	mlx5e_free_rx_descs(rq);
 	mlx5e_destroy_rq(rq);
 }
 
@@ -580,7 +630,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	int err;
 
-	err = mlx5_alloc_map_uar(mdev, &sq->uar, true);
+	err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf));
 	if (err)
 		return err;
 
@@ -600,6 +650,9 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	}
 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
 	sq->max_inline  = param->max_inline;
+	sq->min_inline_mode =
+		MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5E_INLINE_MODE_VPORT_CONTEXT ?
+		param->min_inline_mode : 0;
 
 	err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu));
 	if (err)
@@ -682,6 +735,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
 
 	MLX5_SET(sqc,  sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]);
 	MLX5_SET(sqc,  sqc, cqn,		sq->cq.mcq.cqn);
+	MLX5_SET(sqc,  sqc, min_wqe_inline_mode, sq->min_inline_mode);
 	MLX5_SET(sqc,  sqc, state,		MLX5_SQC_STATE_RST);
 	MLX5_SET(sqc,  sqc, tis_lst_sz,		param->icosq ? 0 : 1);
 	MLX5_SET(sqc,  sqc, flush_in_error_en,	1);
@@ -702,7 +756,8 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
 	return err;
 }
 
-static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
+static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state,
+			   int next_state, bool update_rl, int rl_index)
 {
 	struct mlx5e_channel *c = sq->channel;
 	struct mlx5e_priv *priv = c->priv;
@@ -722,6 +777,10 @@ static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
 
 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
 	MLX5_SET(sqc, sqc, state, next_state);
+	if (update_rl && next_state == MLX5_SQC_STATE_RDY) {
+		MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+		MLX5_SET(sqc,  sqc, packet_pacing_rate_limit_index, rl_index);
+	}
 
 	err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen);
 
@@ -737,6 +796,8 @@ static void mlx5e_disable_sq(struct mlx5e_sq *sq)
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	mlx5_core_destroy_sq(mdev, sq->sqn);
+	if (sq->rate_limit)
+		mlx5_rl_remove_rate(mdev, sq->rate_limit);
 }
 
 static int mlx5e_open_sq(struct mlx5e_channel *c,
@@ -754,7 +815,8 @@ static int mlx5e_open_sq(struct mlx5e_channel *c,
 	if (err)
 		goto err_destroy_sq;
 
-	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
+	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY,
+			      false, 0);
 	if (err)
 		goto err_disable_sq;
 
@@ -783,6 +845,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
+	int tout = 0;
+	int err;
+
 	if (sq->txq) {
 		clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
 		/* prevent netif_tx_wake_queue */
@@ -793,15 +858,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
 		if (mlx5e_sq_has_room_for(sq, 1))
 			mlx5e_send_nop(sq, true);
 
-		mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+		err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
+				      MLX5_SQC_STATE_ERR, false, 0);
+		if (err)
+			set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
 	}
 
-	while (sq->cc != sq->pc) /* wait till sq is empty */
-		msleep(20);
+	/* wait till sq is empty, unless a TX timeout occurred on this SQ */
+	while (sq->cc != sq->pc &&
+	       !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
+		msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+		if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
+			set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+	}
 
 	/* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
 	napi_synchronize(&sq->channel->napi);
 
+	mlx5e_free_tx_descs(sq);
 	mlx5e_disable_sq(sq);
 	mlx5e_destroy_sq(sq);
 }
@@ -840,7 +914,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c,
 	mcq->comp       = mlx5e_completion_event;
 	mcq->event      = mlx5e_cq_error_event;
 	mcq->irqn       = irqn;
-	mcq->uar        = &priv->cq_uar;
+	mcq->uar        = &mdev->mlx5e_res.cq_uar;
 
 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
@@ -887,6 +961,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 
 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
 
+	MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
 	MLX5_SET(cqc,   cqc, c_eqn,         eqn);
 	MLX5_SET(cqc,   cqc, uar_page,      mcq->uar->index);
 	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
@@ -916,8 +991,7 @@ static void mlx5e_disable_cq(struct mlx5e_cq *cq)
 static int mlx5e_open_cq(struct mlx5e_channel *c,
 			 struct mlx5e_cq_param *param,
 			 struct mlx5e_cq *cq,
-			 u16 moderation_usecs,
-			 u16 moderation_frames)
+			 struct mlx5e_cq_moder moderation)
 {
 	int err;
 	struct mlx5e_priv *priv = c->priv;
@@ -933,8 +1007,8 @@ static int mlx5e_open_cq(struct mlx5e_channel *c,
 
 	if (MLX5_CAP_GEN(mdev, cq_moderation))
 		mlx5_core_modify_cq_moderation(mdev, &cq->mcq,
-					       moderation_usecs,
-					       moderation_frames);
+					       moderation.usec,
+					       moderation.pkts);
 	return 0;
 
 err_destroy_cq:
@@ -963,8 +1037,7 @@ static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
 
 	for (tc = 0; tc < c->num_tc; tc++) {
 		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
-				    priv->params.tx_cq_moderation_usec,
-				    priv->params.tx_cq_moderation_pkts);
+				    priv->params.tx_cq_moderation);
 		if (err)
 			goto err_close_tx_cqs;
 	}
@@ -1019,19 +1092,96 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix)
 {
 	int i;
 
-	for (i = 0; i < MLX5E_MAX_NUM_TC; i++)
+	for (i = 0; i < priv->profile->max_tc; i++)
 		priv->channeltc_to_txq_map[ix][i] =
 			ix + i * priv->params.num_channels;
 }
 
+static int mlx5e_set_sq_maxrate(struct net_device *dev,
+				struct mlx5e_sq *sq, u32 rate)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u16 rl_index = 0;
+	int err;
+
+	if (rate == sq->rate_limit)
+		/* nothing to do */
+		return 0;
+
+	if (sq->rate_limit)
+		/* remove current rl index to free space to next ones */
+		mlx5_rl_remove_rate(mdev, sq->rate_limit);
+
+	sq->rate_limit = 0;
+
+	if (rate) {
+		err = mlx5_rl_add_rate(mdev, rate, &rl_index);
+		if (err) {
+			netdev_err(dev, "Failed configuring rate %u: %d\n",
+				   rate, err);
+			return err;
+		}
+	}
+
+	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
+			      MLX5_SQC_STATE_RDY, true, rl_index);
+	if (err) {
+		netdev_err(dev, "Failed configuring rate %u: %d\n",
+			   rate, err);
+		/* remove the rate from the table */
+		if (rate)
+			mlx5_rl_remove_rate(mdev, rate);
+		return err;
+	}
+
+	sq->rate_limit = rate;
+	return 0;
+}
+
+static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_sq *sq = priv->txq_to_sq_map[index];
+	int err = 0;
+
+	if (!mlx5_rl_is_supported(mdev)) {
+		netdev_err(dev, "Rate limiting is not supported on this device\n");
+		return -EINVAL;
+	}
+
+	/* rate is given in Mb/sec, HW config is in Kb/sec */
+	rate = rate << 10;
+
+	/* Check whether rate in valid range, 0 is always valid */
+	if (rate && !mlx5_rl_is_in_range(mdev, rate)) {
+		netdev_err(dev, "TX rate %u, is not in range\n", rate);
+		return -ERANGE;
+	}
+
+	mutex_lock(&priv->state_lock);
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+		err = mlx5e_set_sq_maxrate(dev, sq, rate);
+	if (!err)
+		priv->tx_rates[index] = rate;
+	mutex_unlock(&priv->state_lock);
+
+	return err;
+}
+
 static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 			      struct mlx5e_channel_param *cparam,
 			      struct mlx5e_channel **cp)
 {
+	struct mlx5e_cq_moder icosq_cq_moder = {0, 0};
 	struct net_device *netdev = priv->netdev;
+	struct mlx5e_cq_moder rx_cq_profile;
 	int cpu = mlx5e_get_cpu(priv, ix);
 	struct mlx5e_channel *c;
+	struct mlx5e_sq *sq;
 	int err;
+	int i;
 
 	c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
 	if (!c)
@@ -1042,14 +1192,19 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->cpu      = cpu;
 	c->pdev     = &priv->mdev->pdev->dev;
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mkey.key);
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
 	c->num_tc   = priv->params.num_tc;
 
+	if (priv->params.rx_am_enabled)
+		rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode);
+	else
+		rx_cq_profile = priv->params.rx_cq_moderation;
+
 	mlx5e_build_channeltc_to_txq_map(priv, ix);
 
 	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
 
-	err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, 0, 0);
+	err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder);
 	if (err)
 		goto err_napi_del;
 
@@ -1058,8 +1213,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 		goto err_close_icosq_cq;
 
 	err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
-			    priv->params.rx_cq_moderation_usec,
-			    priv->params.rx_cq_moderation_pkts);
+			    rx_cq_profile);
 	if (err)
 		goto err_close_tx_cqs;
 
@@ -1073,6 +1227,16 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	if (err)
 		goto err_close_icosq;
 
+	for (i = 0; i < priv->params.num_tc; i++) {
+		u32 txq_ix = priv->channeltc_to_txq_map[ix][i];
+
+		if (priv->tx_rates[txq_ix]) {
+			sq = priv->txq_to_sq_map[txq_ix];
+			mlx5e_set_sq_maxrate(priv->netdev, sq,
+					     priv->tx_rates[txq_ix]);
+		}
+	}
+
 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
 	if (err)
 		goto err_close_sqs;
@@ -1144,11 +1308,13 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 	MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
 	MLX5_SET(wq, wq, log_wq_sz,        priv->params.log_rq_size);
-	MLX5_SET(wq, wq, pd,               priv->pdn);
+	MLX5_SET(wq, wq, pd,               priv->mdev->mlx5e_res.pdn);
 	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 
 	param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
 	param->wq.linear = 1;
+
+	param->am_enabled = priv->params.rx_am_enabled;
 }
 
 static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param)
@@ -1167,7 +1333,7 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
-	MLX5_SET(wq, wq, pd,            priv->pdn);
+	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
 
 	param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
 }
@@ -1182,6 +1348,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, log_wq_sz,     priv->params.log_sq_size);
 
 	param->max_inline = priv->params.tx_max_inline;
+	param->min_inline_mode = priv->params.tx_min_inline_mode;
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -1189,7 +1356,7 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
 {
 	void *cqc = param->cqc;
 
-	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
+	MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index);
 }
 
 static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
@@ -1214,6 +1381,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 	}
 
 	mlx5e_build_common_cq_param(priv, param);
+
+	param->cq_period_mode = priv->params.rx_cq_period_mode;
 }
 
 static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
@@ -1224,6 +1393,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
 
 	mlx5e_build_common_cq_param(priv, param);
+
+	param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 }
 
 static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
@@ -1235,6 +1406,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
 	MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
 
 	mlx5e_build_common_cq_param(priv, param);
+
+	param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 }
 
 static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
@@ -1297,6 +1470,11 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv)
 			goto err_close_channels;
 	}
 
+	/* FIXME: This is a W/A for tx timeout watch dog false alarm when
+	 * polling for inactive tx queues.
+	 */
+	netif_tx_start_all_queues(priv->netdev);
+
 	kfree(cparam);
 	return 0;
 
@@ -1316,6 +1494,12 @@ static void mlx5e_close_channels(struct mlx5e_priv *priv)
 {
 	int i;
 
+	/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
+	 * polling for inactive tx queues.
+	 */
+	netif_tx_stop_all_queues(priv->netdev);
+	netif_tx_disable(priv->netdev);
+
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_close_channel(priv->channel[i]);
 
@@ -1370,7 +1554,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc,
 	MLX5_SET(rqtc, rqtc, rq_num[0], rqn);
 }
 
-static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn)
+static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz,
+			    int ix, struct mlx5e_rqt *rqt)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqtc;
@@ -1393,34 +1578,36 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn)
 	else
 		mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix);
 
-	err = mlx5_core_create_rqt(mdev, in, inlen, rqtn);
+	err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
+	if (!err)
+		rqt->enabled = true;
 
 	kvfree(in);
 	return err;
 }
 
-static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn)
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
 {
-	mlx5_core_destroy_rqt(priv->mdev, rqtn);
+	rqt->enabled = false;
+	mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
 }
 
-static int mlx5e_create_rqts(struct mlx5e_priv *priv)
+static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv)
 {
-	int nch = mlx5e_get_max_num_channels(priv->mdev);
-	u32 *rqtn;
+	struct mlx5e_rqt *rqt = &priv->indir_rqt;
+
+	return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
+}
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rqt *rqt;
 	int err;
 	int ix;
 
-	/* Indirect RQT */
-	rqtn = &priv->indir_rqtn;
-	err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn);
-	if (err)
-		return err;
-
-	/* Direct RQTs */
-	for (ix = 0; ix < nch; ix++) {
-		rqtn = &priv->direct_tir[ix].rqtn;
-		err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn);
+	for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+		rqt = &priv->direct_tir[ix].rqt;
+		err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt);
 		if (err)
 			goto err_destroy_rqts;
 	}
@@ -1429,24 +1616,11 @@ static int mlx5e_create_rqts(struct mlx5e_priv *priv)
 
 err_destroy_rqts:
 	for (ix--; ix >= 0; ix--)
-		mlx5e_destroy_rqt(priv, priv->direct_tir[ix].rqtn);
-
-	mlx5e_destroy_rqt(priv, priv->indir_rqtn);
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt);
 
 	return err;
 }
 
-static void mlx5e_destroy_rqts(struct mlx5e_priv *priv)
-{
-	int nch = mlx5e_get_max_num_channels(priv->mdev);
-	int i;
-
-	for (i = 0; i < nch; i++)
-		mlx5e_destroy_rqt(priv, priv->direct_tir[i].rqtn);
-
-	mlx5e_destroy_rqt(priv, priv->indir_rqtn);
-}
-
 int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1482,10 +1656,15 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv)
 	u32 rqtn;
 	int ix;
 
-	rqtn = priv->indir_rqtn;
-	mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0);
+	if (priv->indir_rqt.enabled) {
+		rqtn = priv->indir_rqt.rqtn;
+		mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0);
+	}
+
 	for (ix = 0; ix < priv->params.num_channels; ix++) {
-		rqtn = priv->direct_tir[ix].rqtn;
+		if (!priv->direct_tir[ix].rqt.enabled)
+			continue;
+		rqtn = priv->direct_tir[ix].rqt.rqtn;
 		mlx5e_redirect_rqt(priv, rqtn, 1, ix);
 	}
 }
@@ -1545,13 +1724,13 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-		err = mlx5_core_modify_tir(mdev, priv->indir_tirn[tt], in,
+		err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in,
 					   inlen);
 		if (err)
 			goto free_in;
 	}
 
-	for (ix = 0; ix < mlx5e_get_max_num_channels(mdev); ix++) {
+	for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
 		err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
 					   in, inlen);
 		if (err)
@@ -1564,40 +1743,6 @@ free_in:
 	return err;
 }
 
-static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
-{
-	void *in;
-	int inlen;
-	int err;
-	int i;
-
-	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
-	in = mlx5_vzalloc(inlen);
-	if (!in)
-		return -ENOMEM;
-
-	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
-
-	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
-		err = mlx5_core_modify_tir(priv->mdev, priv->indir_tirn[i], in,
-					   inlen);
-		if (err)
-			return err;
-	}
-
-	for (i = 0; i < priv->params.num_channels; i++) {
-		err = mlx5_core_modify_tir(priv->mdev,
-					   priv->direct_tir[i].tirn, in,
-					   inlen);
-		if (err)
-			return err;
-	}
-
-	kvfree(in);
-
-	return 0;
-}
-
 static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1659,13 +1804,17 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 
 	netdev_set_num_tc(netdev, ntc);
 
+	/* Map netdev TCs to offset 0
+	 * We have our own UP to TXQ mapping for QoS
+	 */
 	for (tc = 0; tc < ntc; tc++)
-		netdev_set_tc_queue(netdev, tc, nch, tc * nch);
+		netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
 int mlx5e_open_locked(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int num_txqs;
 	int err;
 
@@ -1688,7 +1837,7 @@ int mlx5e_open_locked(struct net_device *netdev)
 		goto err_clear_state_opened_flag;
 	}
 
-	err = mlx5e_refresh_tirs_self_loopback_enable(priv);
+	err = mlx5e_refresh_tirs_self_loopback_enable(priv->mdev);
 	if (err) {
 		netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
 			   __func__, err);
@@ -1701,9 +1850,14 @@ int mlx5e_open_locked(struct net_device *netdev)
 #ifdef CONFIG_RFS_ACCEL
 	priv->netdev->rx_cpu_rmap = priv->mdev->rmap;
 #endif
+	if (priv->profile->update_stats)
+		queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
 
-	queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
-
+	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+		err = mlx5e_add_sqs_fwd_rules(priv);
+		if (err)
+			goto err_close_channels;
+	}
 	return 0;
 
 err_close_channels:
@@ -1713,7 +1867,7 @@ err_clear_state_opened_flag:
 	return err;
 }
 
-static int mlx5e_open(struct net_device *netdev)
+int mlx5e_open(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
@@ -1728,6 +1882,7 @@ static int mlx5e_open(struct net_device *netdev)
 int mlx5e_close_locked(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
 
 	/* May already be CLOSED in case a previous configuration operation
 	 * (e.g RX/TX queue size change) that involves close&open failed.
@@ -1737,6 +1892,9 @@ int mlx5e_close_locked(struct net_device *netdev)
 
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		mlx5e_remove_sqs_fwd_rules(priv);
+
 	mlx5e_timestamp_cleanup(priv);
 	netif_carrier_off(priv->netdev);
 	mlx5e_redirect_rqts(priv);
@@ -1745,7 +1903,7 @@ int mlx5e_close_locked(struct net_device *netdev)
 	return 0;
 }
 
-static int mlx5e_close(struct net_device *netdev)
+int mlx5e_close(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
@@ -1804,7 +1962,7 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv,
 	mcq->comp       = mlx5e_completion_event;
 	mcq->event      = mlx5e_cq_error_event;
 	mcq->irqn       = irqn;
-	mcq->uar        = &priv->cq_uar;
+	mcq->uar        = &mdev->mlx5e_res.cq_uar;
 
 	cq->priv = priv;
 
@@ -1870,7 +2028,7 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc)
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(tisc, tisc, prio, tc << 1);
-	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
+	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
 
 	return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]);
 }
@@ -1880,12 +2038,12 @@ static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc)
 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
 }
 
-static int mlx5e_create_tises(struct mlx5e_priv *priv)
+int mlx5e_create_tises(struct mlx5e_priv *priv)
 {
 	int err;
 	int tc;
 
-	for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) {
+	for (tc = 0; tc < priv->profile->max_tc; tc++) {
 		err = mlx5e_create_tis(priv, tc);
 		if (err)
 			goto err_close_tises;
@@ -1900,11 +2058,11 @@ err_close_tises:
 	return err;
 }
 
-static void mlx5e_destroy_tises(struct mlx5e_priv *priv)
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 {
 	int tc;
 
-	for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++)
+	for (tc = 0; tc < priv->profile->max_tc; tc++)
 		mlx5e_destroy_tis(priv, tc);
 }
 
@@ -1913,7 +2071,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 {
 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
 
-	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
+	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
 
 #define MLX5_HASH_IP            (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 				 MLX5_HASH_FIELD_SEL_DST_IP)
@@ -1930,7 +2088,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
-	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqtn);
+	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
 	mlx5e_build_tir_ctx_hash(tirc, priv);
 
 	switch (tt) {
@@ -2020,7 +2178,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 				       u32 rqtn)
 {
-	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
+	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
 
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
@@ -2029,15 +2187,13 @@ static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
 }
 
-static int mlx5e_create_tirs(struct mlx5e_priv *priv)
+static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
 {
-	int nch = mlx5e_get_max_num_channels(priv->mdev);
+	struct mlx5e_tir *tir;
 	void *tirc;
 	int inlen;
-	u32 *tirn;
 	int err;
 	u32 *in;
-	int ix;
 	int tt;
 
 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
@@ -2045,25 +2201,51 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv)
 	if (!in)
 		return -ENOMEM;
 
-	/* indirect tirs */
 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
 		memset(in, 0, inlen);
-		tirn = &priv->indir_tirn[tt];
+		tir = &priv->indir_tir[tt];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 		mlx5e_build_indir_tir_ctx(priv, tirc, tt);
-		err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn);
+		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
 		if (err)
 			goto err_destroy_tirs;
 	}
 
-	/* direct tirs */
+	kvfree(in);
+
+	return 0;
+
+err_destroy_tirs:
+	for (tt--; tt >= 0; tt--)
+		mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
+
+	kvfree(in);
+
+	return err;
+}
+
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
+{
+	int nch = priv->profile->max_nch(priv->mdev);
+	struct mlx5e_tir *tir;
+	void *tirc;
+	int inlen;
+	int err;
+	u32 *in;
+	int ix;
+
+	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
 	for (ix = 0; ix < nch; ix++) {
 		memset(in, 0, inlen);
-		tirn = &priv->direct_tir[ix].tirn;
+		tir = &priv->direct_tir[ix];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 		mlx5e_build_direct_tir_ctx(priv, tirc,
-					   priv->direct_tir[ix].rqtn);
-		err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn);
+					   priv->direct_tir[ix].rqt.rqtn);
+		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
 		if (err)
 			goto err_destroy_ch_tirs;
 	}
@@ -2074,27 +2256,28 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv)
 
 err_destroy_ch_tirs:
 	for (ix--; ix >= 0; ix--)
-		mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[ix].tirn);
-
-err_destroy_tirs:
-	for (tt--; tt >= 0; tt--)
-		mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[tt]);
+		mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]);
 
 	kvfree(in);
 
 	return err;
 }
 
-static void mlx5e_destroy_tirs(struct mlx5e_priv *priv)
+static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
 {
-	int nch = mlx5e_get_max_num_channels(priv->mdev);
 	int i;
 
-	for (i = 0; i < nch; i++)
-		mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[i].tirn);
-
 	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
-		mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[i]);
+		mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
+}
+
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
+{
+	int nch = priv->profile->max_nch(priv->mdev);
+	int i;
+
+	for (i = 0; i < nch; i++)
+		mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
 }
 
 int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd)
@@ -2168,7 +2351,7 @@ mqprio:
 	return mlx5e_setup_tc(dev, tc->tc);
 }
 
-static struct rtnl_link_stats64 *
+struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -2520,25 +2703,31 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
 }
 
 static void mlx5e_add_vxlan_port(struct net_device *netdev,
-				 sa_family_t sa_family, __be16 port)
+				 struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
 	if (!mlx5e_vxlan_allowed(priv->mdev))
 		return;
 
-	mlx5e_vxlan_queue_work(priv, sa_family, be16_to_cpu(port), 1);
+	mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
 }
 
 static void mlx5e_del_vxlan_port(struct net_device *netdev,
-				 sa_family_t sa_family, __be16 port)
+				 struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
 	if (!mlx5e_vxlan_allowed(priv->mdev))
 		return;
 
-	mlx5e_vxlan_queue_work(priv, sa_family, be16_to_cpu(port), 0);
+	mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0);
 }
 
 static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv,
@@ -2591,6 +2780,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 	return features;
 }
 
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	bool sched_work = false;
+	int i;
+
+	netdev_err(dev, "TX timeout detected\n");
+
+	for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
+		struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
+
+		if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
+			continue;
+		sched_work = true;
+		set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+		netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
+			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
+	}
+
+	if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state))
+		schedule_work(&priv->tx_timeout_work);
+}
+
 static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
@@ -2605,9 +2817,11 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_set_features        = mlx5e_set_features,
 	.ndo_change_mtu          = mlx5e_change_mtu,
 	.ndo_do_ioctl            = mlx5e_ioctl,
+	.ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
+	.ndo_tx_timeout          = mlx5e_tx_timeout,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2624,8 +2838,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 	.ndo_set_features        = mlx5e_set_features,
 	.ndo_change_mtu          = mlx5e_change_mtu,
 	.ndo_do_ioctl            = mlx5e_ioctl,
-	.ndo_add_vxlan_port      = mlx5e_add_vxlan_port,
-	.ndo_del_vxlan_port      = mlx5e_del_vxlan_port,
+	.ndo_udp_tunnel_add	 = mlx5e_add_vxlan_port,
+	.ndo_udp_tunnel_del	 = mlx5e_del_vxlan_port,
+	.ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
 	.ndo_features_check      = mlx5e_features_check,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
@@ -2637,6 +2852,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
+	.ndo_tx_timeout          = mlx5e_tx_timeout,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2754,13 +2970,48 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
 		(pci_bw < 40000) && (pci_bw < link_speed));
 }
 
-static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
-				    struct net_device *netdev,
-				    int num_channels)
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+	params->rx_cq_period_mode = cq_period_mode;
+
+	params->rx_cq_moderation.pkts =
+		MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+	params->rx_cq_moderation.usec =
+			MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+
+	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+		params->rx_cq_moderation.usec =
+			MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+}
+
+static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev,
+				   u8 *min_inline_mode)
+{
+	switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
+	case MLX5E_INLINE_MODE_L2:
+		*min_inline_mode = MLX5_INLINE_MODE_L2;
+		break;
+	case MLX5E_INLINE_MODE_VPORT_CONTEXT:
+		mlx5_query_nic_vport_min_inline(mdev,
+						min_inline_mode);
+		break;
+	case MLX5_INLINE_MODE_NOT_REQUIRED:
+		*min_inline_mode = MLX5_INLINE_MODE_NONE;
+		break;
+	}
+}
+
+static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
+					struct net_device *netdev,
+					const struct mlx5e_profile *profile,
+					void *ppriv)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	u32 link_speed = 0;
 	u32 pci_bw = 0;
+	u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
 	priv->params.log_sq_size           =
 		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
@@ -2806,15 +3057,16 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 
 	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
 					    BIT(priv->params.log_rq_size));
-	priv->params.rx_cq_moderation_usec =
-		MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
-	priv->params.rx_cq_moderation_pkts =
-		MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
-	priv->params.tx_cq_moderation_usec =
+
+	priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+	mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
+
+	priv->params.tx_cq_moderation.usec =
 		MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
-	priv->params.tx_cq_moderation_pkts =
+	priv->params.tx_cq_moderation.pkts =
 		MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
 	priv->params.tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
+	mlx5e_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
 	priv->params.num_tc                = 1;
 	priv->params.rss_hfunc             = ETH_RSS_HASH_XOR;
 
@@ -2822,14 +3074,20 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 			    sizeof(priv->params.toeplitz_hash_key));
 
 	mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt,
-				      MLX5E_INDIR_RQT_SIZE, num_channels);
+				      MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev));
 
 	priv->params.lro_wqe_sz            =
 		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
 
+	/* Initialize pflags */
+	MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+			    priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+
 	priv->mdev                         = mdev;
 	priv->netdev                       = netdev;
-	priv->params.num_channels          = num_channels;
+	priv->params.num_channels          = profile->max_nch(mdev);
+	priv->profile                      = profile;
+	priv->ppriv                        = ppriv;
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_ets_init(priv);
@@ -2839,6 +3097,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 
 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+	INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
 	INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
 }
 
@@ -2854,7 +3113,11 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
 	}
 }
 
-static void mlx5e_build_netdev(struct net_device *netdev)
+static const struct switchdev_ops mlx5e_switchdev_ops = {
+	.switchdev_port_attr_get	= mlx5e_attr_get,
+};
+
+static void mlx5e_build_nic_netdev(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -2935,31 +3198,11 @@ static void mlx5e_build_netdev(struct net_device *netdev)
 	netdev->priv_flags       |= IFF_UNICAST_FLT;
 
 	mlx5e_set_netdev_dev_addr(netdev);
-}
-
-static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
-			     struct mlx5_core_mkey *mkey)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_create_mkey_mbox_in *in;
-	int err;
-
-	in = mlx5_vzalloc(sizeof(*in));
-	if (!in)
-		return -ENOMEM;
-
-	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
-			MLX5_PERM_LOCAL_READ  |
-			MLX5_ACCESS_MODE_PA;
-	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
-	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 
-	err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
-				    NULL);
-
-	kvfree(in);
-
-	return err;
+#ifdef CONFIG_NET_SWITCHDEV
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		netdev->switchdev_ops = &mlx5e_switchdev_ops;
+#endif
 }
 
 static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
@@ -2989,7 +3232,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
 	struct mlx5_mkey_seg *mkc;
 	int inlen = sizeof(*in);
 	u64 npages =
-		mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS;
+		priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS;
 	int err;
 
 	in = mlx5_vzalloc(inlen);
@@ -3004,7 +3247,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
 		     MLX5_ACCESS_MODE_MTT;
 
 	mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-	mkc->flags_pd = cpu_to_be32(priv->pdn);
+	mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn);
 	mkc->len = cpu_to_be64(npages << PAGE_SHIFT);
 	mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages));
 	mkc->log2_page_size = PAGE_SHIFT;
@@ -3017,160 +3260,233 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
 	return err;
 }
 
-static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
+static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
+			   struct net_device *netdev,
+			   const struct mlx5e_profile *profile,
+			   void *ppriv)
 {
-	struct net_device *netdev;
-	struct mlx5e_priv *priv;
-	int nch = mlx5e_get_max_num_channels(mdev);
-	int err;
-
-	if (mlx5e_check_required_hca_cap(mdev))
-		return NULL;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
-				    nch * MLX5E_MAX_NUM_TC,
-				    nch);
-	if (!netdev) {
-		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
-		return NULL;
-	}
+	mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+	mlx5e_build_nic_netdev(netdev);
+	mlx5e_vxlan_init(priv);
+}
 
-	mlx5e_build_netdev_priv(mdev, netdev, nch);
-	mlx5e_build_netdev(netdev);
+static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
 
-	netif_carrier_off(netdev);
+	mlx5e_vxlan_cleanup(priv);
 
-	priv = netdev_priv(netdev);
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		mlx5_eswitch_unregister_vport_rep(esw, 0);
+}
 
-	priv->wq = create_singlethread_workqueue("mlx5e");
-	if (!priv->wq)
-		goto err_free_netdev;
+static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int err;
+	int i;
 
-	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false);
+	err = mlx5e_create_indirect_rqts(priv);
 	if (err) {
-		mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err);
-		goto err_destroy_wq;
+		mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err);
+		return err;
 	}
 
-	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
+	err = mlx5e_create_direct_rqts(priv);
 	if (err) {
-		mlx5_core_err(mdev, "alloc pd failed, %d\n", err);
-		goto err_unmap_free_uar;
+		mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err);
+		goto err_destroy_indirect_rqts;
 	}
 
-	err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
+	err = mlx5e_create_indirect_tirs(priv);
 	if (err) {
-		mlx5_core_err(mdev, "alloc td failed, %d\n", err);
-		goto err_dealloc_pd;
+		mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err);
+		goto err_destroy_direct_rqts;
 	}
 
-	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey);
+	err = mlx5e_create_direct_tirs(priv);
 	if (err) {
-		mlx5_core_err(mdev, "create mkey failed, %d\n", err);
-		goto err_dealloc_transport_domain;
+		mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err);
+		goto err_destroy_indirect_tirs;
 	}
 
-	err = mlx5e_create_umr_mkey(priv);
+	err = mlx5e_create_flow_steering(priv);
 	if (err) {
-		mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
-		goto err_destroy_mkey;
+		mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
+		goto err_destroy_direct_tirs;
 	}
 
+	err = mlx5e_tc_init(priv);
+	if (err)
+		goto err_destroy_flow_steering;
+
+	return 0;
+
+err_destroy_flow_steering:
+	mlx5e_destroy_flow_steering(priv);
+err_destroy_direct_tirs:
+	mlx5e_destroy_direct_tirs(priv);
+err_destroy_indirect_tirs:
+	mlx5e_destroy_indirect_tirs(priv);
+err_destroy_direct_rqts:
+	for (i = 0; i < priv->profile->max_nch(mdev); i++)
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+err_destroy_indirect_rqts:
+	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+	return err;
+}
+
+static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
+{
+	int i;
+
+	mlx5e_tc_cleanup(priv);
+	mlx5e_destroy_flow_steering(priv);
+	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_indirect_tirs(priv);
+	for (i = 0; i < priv->profile->max_nch(priv->mdev); i++)
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+}
+
+static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
+{
+	int err;
+
 	err = mlx5e_create_tises(priv);
 	if (err) {
-		mlx5_core_warn(mdev, "create tises failed, %d\n", err);
-		goto err_destroy_umr_mkey;
+		mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+		return err;
 	}
 
-	err = mlx5e_open_drop_rq(priv);
-	if (err) {
-		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
-		goto err_destroy_tises;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
+#endif
+	return 0;
+}
+
+static void mlx5e_nic_enable(struct mlx5e_priv *priv)
+{
+	struct net_device *netdev = priv->netdev;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	struct mlx5_eswitch_rep rep;
+
+	if (mlx5e_vxlan_allowed(mdev)) {
+		rtnl_lock();
+		udp_tunnel_get_rx_info(netdev);
+		rtnl_unlock();
 	}
 
-	err = mlx5e_create_rqts(priv);
-	if (err) {
-		mlx5_core_warn(mdev, "create rqts failed, %d\n", err);
-		goto err_close_drop_rq;
+	mlx5e_enable_async_events(priv);
+	queue_work(priv->wq, &priv->set_rx_mode_work);
+
+	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+		rep.load = mlx5e_nic_rep_load;
+		rep.unload = mlx5e_nic_rep_unload;
+		rep.vport = 0;
+		rep.priv_data = priv;
+		mlx5_eswitch_register_vport_rep(esw, &rep);
 	}
+}
 
-	err = mlx5e_create_tirs(priv);
-	if (err) {
-		mlx5_core_warn(mdev, "create tirs failed, %d\n", err);
-		goto err_destroy_rqts;
+static void mlx5e_nic_disable(struct mlx5e_priv *priv)
+{
+	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_disable_async_events(priv);
+}
+
+static const struct mlx5e_profile mlx5e_nic_profile = {
+	.init		   = mlx5e_nic_init,
+	.cleanup	   = mlx5e_nic_cleanup,
+	.init_rx	   = mlx5e_init_nic_rx,
+	.cleanup_rx	   = mlx5e_cleanup_nic_rx,
+	.init_tx	   = mlx5e_init_nic_tx,
+	.cleanup_tx	   = mlx5e_cleanup_nic_tx,
+	.enable		   = mlx5e_nic_enable,
+	.disable	   = mlx5e_nic_disable,
+	.update_stats	   = mlx5e_update_stats,
+	.max_nch	   = mlx5e_get_max_num_channels,
+	.max_tc		   = MLX5E_MAX_NUM_TC,
+};
+
+void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+			  const struct mlx5e_profile *profile, void *ppriv)
+{
+	struct net_device *netdev;
+	struct mlx5e_priv *priv;
+	int nch = profile->max_nch(mdev);
+	int err;
+
+	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
+				    nch * profile->max_tc,
+				    nch);
+	if (!netdev) {
+		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
+		return NULL;
 	}
 
-	err = mlx5e_create_flow_steering(priv);
+	profile->init(mdev, netdev, profile, ppriv);
+
+	netif_carrier_off(netdev);
+
+	priv = netdev_priv(netdev);
+
+	priv->wq = create_singlethread_workqueue("mlx5e");
+	if (!priv->wq)
+		goto err_free_netdev;
+
+	err = mlx5e_create_umr_mkey(priv);
 	if (err) {
-		mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
-		goto err_destroy_tirs;
+		mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
+		goto err_destroy_wq;
 	}
 
-	mlx5e_create_q_counter(priv);
-
-	mlx5e_init_l2_addr(priv);
+	err = profile->init_tx(priv);
+	if (err)
+		goto err_destroy_umr_mkey;
 
-	mlx5e_vxlan_init(priv);
+	err = mlx5e_open_drop_rq(priv);
+	if (err) {
+		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+		goto err_cleanup_tx;
+	}
 
-	err = mlx5e_tc_init(priv);
+	err = profile->init_rx(priv);
 	if (err)
-		goto err_dealloc_q_counters;
+		goto err_close_drop_rq;
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-	mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
-#endif
+	mlx5e_create_q_counter(priv);
+
+	mlx5e_init_l2_addr(priv);
 
 	err = register_netdev(netdev);
 	if (err) {
 		mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
-		goto err_tc_cleanup;
-	}
-
-	if (mlx5e_vxlan_allowed(mdev)) {
-		rtnl_lock();
-		vxlan_get_rx_port(netdev);
-		rtnl_unlock();
+		goto err_dealloc_q_counters;
 	}
 
-	mlx5e_enable_async_events(priv);
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	if (profile->enable)
+		profile->enable(priv);
 
 	return priv;
 
-err_tc_cleanup:
-	mlx5e_tc_cleanup(priv);
-
 err_dealloc_q_counters:
 	mlx5e_destroy_q_counter(priv);
-	mlx5e_destroy_flow_steering(priv);
-
-err_destroy_tirs:
-	mlx5e_destroy_tirs(priv);
-
-err_destroy_rqts:
-	mlx5e_destroy_rqts(priv);
+	profile->cleanup_rx(priv);
 
 err_close_drop_rq:
 	mlx5e_close_drop_rq(priv);
 
-err_destroy_tises:
-	mlx5e_destroy_tises(priv);
+err_cleanup_tx:
+	profile->cleanup_tx(priv);
 
 err_destroy_umr_mkey:
 	mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
 
-err_destroy_mkey:
-	mlx5_core_destroy_mkey(mdev, &priv->mkey);
-
-err_dealloc_transport_domain:
-	mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
-
-err_dealloc_pd:
-	mlx5_core_dealloc_pd(mdev, priv->pdn);
-
-err_unmap_free_uar:
-	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
-
 err_destroy_wq:
 	destroy_workqueue(priv->wq);
 
@@ -3180,15 +3496,59 @@ err_free_netdev:
 	return NULL;
 }
 
-static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
+static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
 {
-	struct mlx5e_priv *priv = vpriv;
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+	int vport;
+
+	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+		return;
+
+	for (vport = 1; vport < total_vfs; vport++) {
+		struct mlx5_eswitch_rep rep;
+
+		rep.load = mlx5e_vport_rep_load;
+		rep.unload = mlx5e_vport_rep_unload;
+		rep.vport = vport;
+		mlx5_eswitch_register_vport_rep(esw, &rep);
+	}
+}
+
+static void *mlx5e_add(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	void *ppriv = NULL;
+	void *ret;
+
+	if (mlx5e_check_required_hca_cap(mdev))
+		return NULL;
+
+	if (mlx5e_create_mdev_resources(mdev))
+		return NULL;
+
+	mlx5e_register_vport_rep(mdev);
+
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		ppriv = &esw->offloads.vport_reps[0];
+
+	ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
+	if (!ret) {
+		mlx5e_destroy_mdev_resources(mdev);
+		return NULL;
+	}
+	return ret;
+}
+
+void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
+{
+	const struct mlx5e_profile *profile = priv->profile;
 	struct net_device *netdev = priv->netdev;
 
 	set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+	if (profile->disable)
+		profile->disable(priv);
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
-	mlx5e_disable_async_events(priv);
 	flush_workqueue(priv->wq);
 	if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) {
 		netif_device_detach(netdev);
@@ -3197,26 +3557,35 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
 		unregister_netdev(netdev);
 	}
 
-	mlx5e_tc_cleanup(priv);
-	mlx5e_vxlan_cleanup(priv);
 	mlx5e_destroy_q_counter(priv);
-	mlx5e_destroy_flow_steering(priv);
-	mlx5e_destroy_tirs(priv);
-	mlx5e_destroy_rqts(priv);
+	profile->cleanup_rx(priv);
 	mlx5e_close_drop_rq(priv);
-	mlx5e_destroy_tises(priv);
+	profile->cleanup_tx(priv);
 	mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
-	mlx5_core_destroy_mkey(priv->mdev, &priv->mkey);
-	mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
-	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
-	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
 	cancel_delayed_work_sync(&priv->update_stats_work);
 	destroy_workqueue(priv->wq);
+	if (profile->cleanup)
+		profile->cleanup(priv);
 
 	if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state))
 		free_netdev(netdev);
 }
 
+static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+	struct mlx5e_priv *priv = vpriv;
+	int vport;
+
+	mlx5e_destroy_netdev(mdev, priv);
+
+	for (vport = 1; vport < total_vfs; vport++)
+		mlx5_eswitch_unregister_vport_rep(esw, vport);
+
+	mlx5e_destroy_mdev_resources(mdev);
+}
+
 static void *mlx5e_get_netdev(void *vpriv)
 {
 	struct mlx5e_priv *priv = vpriv;
@@ -3225,8 +3594,8 @@ static void *mlx5e_get_netdev(void *vpriv)
 }
 
 static struct mlx5_interface mlx5e_interface = {
-	.add       = mlx5e_create_netdev,
-	.remove    = mlx5e_destroy_netdev,
+	.add       = mlx5e_add,
+	.remove    = mlx5e_remove,
 	.event     = mlx5e_async_event,
 	.protocol  = MLX5_INTERFACE_PROTOCOL_ETH,
 	.get_dev   = mlx5e_get_netdev,
@@ -3234,6 +3603,7 @@ static struct mlx5_interface mlx5e_interface = {
 
 void mlx5e_init(void)
 {
+	mlx5e_build_ptys2ethtool_map();
 	mlx5_register_interface(&mlx5e_interface);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
new file mode 100644
index 000000000000..1c7d8b8314bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/mlx5/fs.h>
+#include <net/switchdev.h>
+#include <net/pkt_cls.h>
+
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+
+static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
+
+static void mlx5e_rep_get_drvinfo(struct net_device *dev,
+				  struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
+		sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
+}
+
+static const struct counter_desc sw_rep_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+};
+
+#define NUM_VPORT_REP_COUNTERS	ARRAY_SIZE(sw_rep_stats_desc)
+
+static void mlx5e_rep_get_strings(struct net_device *dev,
+				  u32 stringset, uint8_t *data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
+			strcpy(data + (i * ETH_GSTRING_LEN),
+			       sw_rep_stats_desc[i].format);
+		break;
+	}
+}
+
+static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv)
+{
+	struct mlx5e_sw_stats *s = &priv->stats.sw;
+	struct mlx5e_rq_stats *rq_stats;
+	struct mlx5e_sq_stats *sq_stats;
+	int i, j;
+
+	memset(s, 0, sizeof(*s));
+	for (i = 0; i < priv->params.num_channels; i++) {
+		rq_stats = &priv->channel[i]->rq.stats;
+
+		s->rx_packets	+= rq_stats->packets;
+		s->rx_bytes	+= rq_stats->bytes;
+
+		for (j = 0; j < priv->params.num_tc; j++) {
+			sq_stats = &priv->channel[i]->sq[j].stats;
+
+			s->tx_packets		+= sq_stats->packets;
+			s->tx_bytes		+= sq_stats->bytes;
+		}
+	}
+}
+
+static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
+					struct ethtool_stats *stats, u64 *data)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	int i;
+
+	if (!data)
+		return;
+
+	mutex_lock(&priv->state_lock);
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+		mlx5e_update_sw_rep_counters(priv);
+	mutex_unlock(&priv->state_lock);
+
+	for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
+		data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
+					       sw_rep_stats_desc, i);
+}
+
+static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return NUM_VPORT_REP_COUNTERS;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+	.get_drvinfo	   = mlx5e_rep_get_drvinfo,
+	.get_link	   = ethtool_op_get_link,
+	.get_strings       = mlx5e_rep_get_strings,
+	.get_sset_count    = mlx5e_rep_get_sset_count,
+	.get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+};
+
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	u8 mac[ETH_ALEN];
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+		mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac);
+		attr->u.ppid.id_len = ETH_ALEN;
+		memcpy(&attr->u.ppid.id, &mac, ETH_ALEN);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
+
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5e_channel *c;
+	int n, tc, err, num_sqs = 0;
+	u16 *sqs;
+
+	sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL);
+	if (!sqs)
+		return -ENOMEM;
+
+	for (n = 0; n < priv->params.num_channels; n++) {
+		c = priv->channel[n];
+		for (tc = 0; tc < c->num_tc; tc++)
+			sqs[num_sqs++] = c->sq[tc].sqn;
+	}
+
+	err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs);
+
+	kfree(sqs);
+	return err;
+}
+
+int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_priv *priv = rep->priv_data;
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+		return mlx5e_add_sqs_fwd_rules(priv);
+	return 0;
+}
+
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+
+	mlx5_eswitch_sqs2vport_stop(esw, rep);
+}
+
+void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
+			  struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_priv *priv = rep->priv_data;
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+		mlx5e_remove_sqs_fwd_rules(priv);
+
+	/* clean (and re-init) existing uplink offloaded TC rules */
+	mlx5e_tc_cleanup(priv);
+	mlx5e_tc_init(priv);
+}
+
+static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
+					char *buf, size_t len)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	int ret;
+
+	ret = snprintf(buf, len, "%d", rep->vport - 1);
+	if (ret >= len)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
+				  __be16 proto, struct tc_to_netdev *tc)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+		return -EOPNOTSUPP;
+
+	switch (tc->type) {
+	case TC_SETUP_CLSFLOWER:
+		switch (tc->cls_flower->command) {
+		case TC_CLSFLOWER_REPLACE:
+			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
+		case TC_CLSFLOWER_DESTROY:
+			return mlx5e_delete_flower(priv, tc->cls_flower);
+		case TC_CLSFLOWER_STATS:
+			return mlx5e_stats_flower(priv, tc->cls_flower);
+		}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
+	.switchdev_port_attr_get	= mlx5e_attr_get,
+};
+
+static const struct net_device_ops mlx5e_netdev_ops_rep = {
+	.ndo_open                = mlx5e_open,
+	.ndo_stop                = mlx5e_close,
+	.ndo_start_xmit          = mlx5e_xmit,
+	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
+	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
+	.ndo_get_stats64         = mlx5e_get_stats,
+};
+
+static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev,
+					struct net_device *netdev,
+					const struct mlx5e_profile *profile,
+					void *ppriv)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+
+	priv->params.log_sq_size           =
+		MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+	priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
+	priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+
+	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
+					    BIT(priv->params.log_rq_size));
+
+	priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+	mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
+
+	priv->params.tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
+	priv->params.num_tc                = 1;
+
+	priv->params.lro_wqe_sz            =
+		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+	priv->mdev                         = mdev;
+	priv->netdev                       = netdev;
+	priv->params.num_channels          = profile->max_nch(mdev);
+	priv->profile                      = profile;
+	priv->ppriv                        = ppriv;
+
+	mutex_init(&priv->state_lock);
+
+	INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+}
+
+static void mlx5e_build_rep_netdev(struct net_device *netdev)
+{
+	netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+
+	netdev->watchdog_timeo    = 15 * HZ;
+
+	netdev->ethtool_ops	  = &mlx5e_rep_ethtool_ops;
+
+#ifdef CONFIG_NET_SWITCHDEV
+	netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
+#endif
+
+	netdev->features	 |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC;
+	netdev->hw_features      |= NETIF_F_HW_TC;
+
+	eth_hw_addr_random(netdev);
+}
+
+static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
+			   struct net_device *netdev,
+			   const struct mlx5e_profile *profile,
+			   void *ppriv)
+{
+	mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv);
+	mlx5e_build_rep_netdev(netdev);
+}
+
+static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_flow_rule *flow_rule;
+	int err;
+	int i;
+
+	err = mlx5e_create_direct_rqts(priv);
+	if (err) {
+		mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err);
+		return err;
+	}
+
+	err = mlx5e_create_direct_tirs(priv);
+	if (err) {
+		mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err);
+		goto err_destroy_direct_rqts;
+	}
+
+	flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
+						      rep->vport,
+						      priv->direct_tir[0].tirn);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		goto err_destroy_direct_tirs;
+	}
+	rep->vport_rx_rule = flow_rule;
+
+	err = mlx5e_tc_init(priv);
+	if (err)
+		goto err_del_flow_rule;
+
+	return 0;
+
+err_del_flow_rule:
+	mlx5_del_flow_rule(rep->vport_rx_rule);
+err_destroy_direct_tirs:
+	mlx5e_destroy_direct_tirs(priv);
+err_destroy_direct_rqts:
+	for (i = 0; i < priv->params.num_channels; i++)
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+	return err;
+}
+
+static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	int i;
+
+	mlx5e_tc_cleanup(priv);
+	mlx5_del_flow_rule(rep->vport_rx_rule);
+	mlx5e_destroy_direct_tirs(priv);
+	for (i = 0; i < priv->params.num_channels; i++)
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+}
+
+static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
+{
+	int err;
+
+	err = mlx5e_create_tises(priv);
+	if (err) {
+		mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+		return err;
+	}
+	return 0;
+}
+
+static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev)
+{
+#define	MLX5E_PORT_REPRESENTOR_NCH 1
+	return MLX5E_PORT_REPRESENTOR_NCH;
+}
+
+static struct mlx5e_profile mlx5e_rep_profile = {
+	.init			= mlx5e_init_rep,
+	.init_rx		= mlx5e_init_rep_rx,
+	.cleanup_rx		= mlx5e_cleanup_rep_rx,
+	.init_tx		= mlx5e_init_rep_tx,
+	.cleanup_tx		= mlx5e_cleanup_nic_tx,
+	.update_stats           = mlx5e_update_sw_rep_counters,
+	.max_nch		= mlx5e_get_rep_max_num_channels,
+	.max_tc			= 1,
+};
+
+int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
+			 struct mlx5_eswitch_rep *rep)
+{
+	rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
+	if (!rep->priv_data) {
+		pr_warn("Failed to create representor for vport %d\n",
+			rep->vport);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
+			    struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_priv *priv = rep->priv_data;
+
+	mlx5e_destroy_netdev(esw->dev, priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index bd947704b59c..9f2a16a507e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -212,6 +212,20 @@ err_free_skb:
 	return -ENOMEM;
 }
 
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+{
+	struct sk_buff *skb = rq->skb[ix];
+
+	if (skb) {
+		rq->skb[ix] = NULL;
+		dma_unmap_single(rq->pdev,
+				 *((dma_addr_t *)skb->cb),
+				 rq->wqe_sz,
+				 DMA_FROM_DEVICE);
+		dev_kfree_skb(skb);
+	}
+}
+
 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
 {
 	return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
@@ -574,6 +588,30 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 	return 0;
 }
 
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+
+	wi->free_wqe(rq, wi);
+}
+
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+	struct mlx5_wq_ll *wq = &rq->wq;
+	struct mlx5e_rx_wqe *wqe;
+	__be16 wqe_ix_be;
+	u16 wqe_ix;
+
+	while (!mlx5_wq_ll_is_empty(wq)) {
+		wqe_ix_be = *wq->tail_next;
+		wqe_ix    = be16_to_cpu(wqe_ix_be);
+		wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
+		rq->dealloc_wqe(rq, wqe_ix);
+		mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
+			       &wqe->next.next_wqe_index);
+	}
+}
+
 #define RQ_CANNOT_POST(rq) \
 		(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
 		 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
@@ -689,7 +727,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 	if (is_first_ethertype_ip(skb)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
-		rq->stats.csum_sw++;
+		rq->stats.csum_complete++;
 		return;
 	}
 
@@ -699,7 +737,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		if (cqe_is_tunneled(cqe)) {
 			skb->csum_level = 1;
 			skb->encapsulation = 1;
-			rq->stats.csum_inner++;
+			rq->stats.csum_unnecessary_inner++;
 		}
 		return;
 	}
@@ -878,6 +916,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
 	int work_done = 0;
 
+	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
+		return 0;
+
 	if (cq->decmprs_left)
 		work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
new file mode 100644
index 000000000000..1fffe48a93cc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+
+/* Adaptive moderation profiles */
+#define MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+#define MLX5E_RX_AM_DEF_PROFILE_CQE 1
+#define MLX5E_RX_AM_DEF_PROFILE_EQE 1
+#define MLX5E_PARAMS_AM_NUM_PROFILES 5
+
+/* All profiles sizes must be MLX5E_PARAMS_AM_NUM_PROFILES */
+#define MLX5_AM_EQE_PROFILES { \
+	{1,   MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{8,   MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{64,  MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{128, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{256, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+}
+
+#define MLX5_AM_CQE_PROFILES { \
+	{2,  256},             \
+	{8,  128},             \
+	{16, 64},              \
+	{32, 64},              \
+	{64, 64}               \
+}
+
+static const struct mlx5e_cq_moder
+profile[MLX5_CQ_PERIOD_NUM_MODES][MLX5E_PARAMS_AM_NUM_PROFILES] = {
+	MLX5_AM_EQE_PROFILES,
+	MLX5_AM_CQE_PROFILES,
+};
+
+static inline struct mlx5e_cq_moder mlx5e_am_get_profile(u8 cq_period_mode, int ix)
+{
+	return profile[cq_period_mode][ix];
+}
+
+struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode)
+{
+	int default_profile_ix;
+
+	if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+		default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_CQE;
+	else /* MLX5_CQ_PERIOD_MODE_START_FROM_EQE */
+		default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_EQE;
+
+	return profile[rx_cq_period_mode][default_profile_ix];
+}
+
+/* Adaptive moderation logic */
+enum {
+	MLX5E_AM_START_MEASURE,
+	MLX5E_AM_MEASURE_IN_PROGRESS,
+	MLX5E_AM_APPLY_NEW_PROFILE,
+};
+
+enum {
+	MLX5E_AM_PARKING_ON_TOP,
+	MLX5E_AM_PARKING_TIRED,
+	MLX5E_AM_GOING_RIGHT,
+	MLX5E_AM_GOING_LEFT,
+};
+
+enum {
+	MLX5E_AM_STATS_WORSE,
+	MLX5E_AM_STATS_SAME,
+	MLX5E_AM_STATS_BETTER,
+};
+
+enum {
+	MLX5E_AM_STEPPED,
+	MLX5E_AM_TOO_TIRED,
+	MLX5E_AM_ON_EDGE,
+};
+
+static bool mlx5e_am_on_top(struct mlx5e_rx_am *am)
+{
+	switch (am->tune_state) {
+	case MLX5E_AM_PARKING_ON_TOP:
+	case MLX5E_AM_PARKING_TIRED:
+		WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n");
+		return true;
+	case MLX5E_AM_GOING_RIGHT:
+		return (am->steps_left > 1) && (am->steps_right == 1);
+	default: /* MLX5E_AM_GOING_LEFT */
+		return (am->steps_right > 1) && (am->steps_left == 1);
+	}
+}
+
+static void mlx5e_am_turn(struct mlx5e_rx_am *am)
+{
+	switch (am->tune_state) {
+	case MLX5E_AM_PARKING_ON_TOP:
+	case MLX5E_AM_PARKING_TIRED:
+		WARN_ONCE(true, "mlx5e_am_turn: PARKING\n");
+		break;
+	case MLX5E_AM_GOING_RIGHT:
+		am->tune_state = MLX5E_AM_GOING_LEFT;
+		am->steps_left = 0;
+		break;
+	case MLX5E_AM_GOING_LEFT:
+		am->tune_state = MLX5E_AM_GOING_RIGHT;
+		am->steps_right = 0;
+		break;
+	}
+}
+
+static int mlx5e_am_step(struct mlx5e_rx_am *am)
+{
+	if (am->tired == (MLX5E_PARAMS_AM_NUM_PROFILES * 2))
+		return MLX5E_AM_TOO_TIRED;
+
+	switch (am->tune_state) {
+	case MLX5E_AM_PARKING_ON_TOP:
+	case MLX5E_AM_PARKING_TIRED:
+		WARN_ONCE(true, "mlx5e_am_step: PARKING\n");
+		break;
+	case MLX5E_AM_GOING_RIGHT:
+		if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1))
+			return MLX5E_AM_ON_EDGE;
+		am->profile_ix++;
+		am->steps_right++;
+		break;
+	case MLX5E_AM_GOING_LEFT:
+		if (am->profile_ix == 0)
+			return MLX5E_AM_ON_EDGE;
+		am->profile_ix--;
+		am->steps_left++;
+		break;
+	}
+
+	am->tired++;
+	return MLX5E_AM_STEPPED;
+}
+
+static void mlx5e_am_park_on_top(struct mlx5e_rx_am *am)
+{
+	am->steps_right  = 0;
+	am->steps_left   = 0;
+	am->tired        = 0;
+	am->tune_state   = MLX5E_AM_PARKING_ON_TOP;
+}
+
+static void mlx5e_am_park_tired(struct mlx5e_rx_am *am)
+{
+	am->steps_right  = 0;
+	am->steps_left   = 0;
+	am->tune_state   = MLX5E_AM_PARKING_TIRED;
+}
+
+static void mlx5e_am_exit_parking(struct mlx5e_rx_am *am)
+{
+	am->tune_state = am->profile_ix ? MLX5E_AM_GOING_LEFT :
+					  MLX5E_AM_GOING_RIGHT;
+	mlx5e_am_step(am);
+}
+
+static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr,
+				  struct mlx5e_rx_am_stats *prev)
+{
+	int diff;
+
+	if (!prev->ppms)
+		return curr->ppms ? MLX5E_AM_STATS_BETTER :
+				    MLX5E_AM_STATS_SAME;
+
+	diff = curr->ppms - prev->ppms;
+	if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */
+		return (diff > 0) ? MLX5E_AM_STATS_BETTER :
+				    MLX5E_AM_STATS_WORSE;
+
+	if (!prev->epms)
+		return curr->epms ? MLX5E_AM_STATS_WORSE :
+				    MLX5E_AM_STATS_SAME;
+
+	diff = curr->epms - prev->epms;
+	if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */
+		return (diff < 0) ? MLX5E_AM_STATS_BETTER :
+				    MLX5E_AM_STATS_WORSE;
+
+	return MLX5E_AM_STATS_SAME;
+}
+
+static bool mlx5e_am_decision(struct mlx5e_rx_am_stats *curr_stats,
+			      struct mlx5e_rx_am *am)
+{
+	int prev_state = am->tune_state;
+	int prev_ix = am->profile_ix;
+	int stats_res;
+	int step_res;
+
+	switch (am->tune_state) {
+	case MLX5E_AM_PARKING_ON_TOP:
+		stats_res = mlx5e_am_stats_compare(curr_stats, &am->prev_stats);
+		if (stats_res != MLX5E_AM_STATS_SAME)
+			mlx5e_am_exit_parking(am);
+		break;
+
+	case MLX5E_AM_PARKING_TIRED:
+		am->tired--;
+		if (!am->tired)
+			mlx5e_am_exit_parking(am);
+		break;
+
+	case MLX5E_AM_GOING_RIGHT:
+	case MLX5E_AM_GOING_LEFT:
+		stats_res = mlx5e_am_stats_compare(curr_stats, &am->prev_stats);
+		if (stats_res != MLX5E_AM_STATS_BETTER)
+			mlx5e_am_turn(am);
+
+		if (mlx5e_am_on_top(am)) {
+			mlx5e_am_park_on_top(am);
+			break;
+		}
+
+		step_res = mlx5e_am_step(am);
+		switch (step_res) {
+		case MLX5E_AM_ON_EDGE:
+			mlx5e_am_park_on_top(am);
+			break;
+		case MLX5E_AM_TOO_TIRED:
+			mlx5e_am_park_tired(am);
+			break;
+		}
+
+		break;
+	}
+
+	if ((prev_state     != MLX5E_AM_PARKING_ON_TOP) ||
+	    (am->tune_state != MLX5E_AM_PARKING_ON_TOP))
+		am->prev_stats = *curr_stats;
+
+	return am->profile_ix != prev_ix;
+}
+
+static void mlx5e_am_sample(struct mlx5e_rq *rq,
+			    struct mlx5e_rx_am_sample *s)
+{
+	s->time	     = ktime_get();
+	s->pkt_ctr   = rq->stats.packets;
+	s->event_ctr = rq->cq.event_ctr;
+}
+
+#define MLX5E_AM_NEVENTS 64
+
+static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
+				struct mlx5e_rx_am_sample *end,
+				struct mlx5e_rx_am_stats *curr_stats)
+{
+	/* u32 holds up to 71 minutes, should be enough */
+	u32 delta_us = ktime_us_delta(end->time, start->time);
+	unsigned int npkts = end->pkt_ctr - start->pkt_ctr;
+
+	if (!delta_us) {
+		WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n");
+		return;
+	}
+
+	curr_stats->ppms =            (npkts * USEC_PER_MSEC) / delta_us;
+	curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us;
+}
+
+void mlx5e_rx_am_work(struct work_struct *work)
+{
+	struct mlx5e_rx_am *am = container_of(work, struct mlx5e_rx_am,
+					      work);
+	struct mlx5e_rq *rq = container_of(am, struct mlx5e_rq, am);
+	struct mlx5e_cq_moder cur_profile = profile[am->mode][am->profile_ix];
+
+	mlx5_core_modify_cq_moderation(rq->priv->mdev, &rq->cq.mcq,
+				       cur_profile.usec, cur_profile.pkts);
+
+	am->state = MLX5E_AM_START_MEASURE;
+}
+
+void mlx5e_rx_am(struct mlx5e_rq *rq)
+{
+	struct mlx5e_rx_am *am = &rq->am;
+	struct mlx5e_rx_am_sample end_sample;
+	struct mlx5e_rx_am_stats curr_stats;
+	u16 nevents;
+
+	switch (am->state) {
+	case MLX5E_AM_MEASURE_IN_PROGRESS:
+		nevents = rq->cq.event_ctr - am->start_sample.event_ctr;
+		if (nevents < MLX5E_AM_NEVENTS)
+			break;
+		mlx5e_am_sample(rq, &end_sample);
+		mlx5e_am_calc_stats(&am->start_sample, &end_sample,
+				    &curr_stats);
+		if (mlx5e_am_decision(&curr_stats, am)) {
+			am->state = MLX5E_AM_APPLY_NEW_PROFILE;
+			schedule_work(&am->work);
+			break;
+		}
+		/* fall through */
+	case MLX5E_AM_START_MEASURE:
+		mlx5e_am_sample(rq, &am->start_sample);
+		am->state = MLX5E_AM_MEASURE_IN_PROGRESS;
+		break;
+	case MLX5E_AM_APPLY_NEW_PROFILE:
+		break;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 83bc32b25849..7b9d8a989b52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -42,9 +42,11 @@
 	be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
 
 #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
 
 struct counter_desc {
-	char		name[ETH_GSTRING_LEN];
+	char		format[ETH_GSTRING_LEN];
 	int		offset; /* Byte offset */
 };
 
@@ -53,18 +55,18 @@ struct mlx5e_sw_stats {
 	u64 rx_bytes;
 	u64 tx_packets;
 	u64 tx_bytes;
-	u64 tso_packets;
-	u64 tso_bytes;
-	u64 tso_inner_packets;
-	u64 tso_inner_bytes;
-	u64 lro_packets;
-	u64 lro_bytes;
-	u64 rx_csum_good;
+	u64 tx_tso_packets;
+	u64 tx_tso_bytes;
+	u64 tx_tso_inner_packets;
+	u64 tx_tso_inner_bytes;
+	u64 rx_lro_packets;
+	u64 rx_lro_bytes;
+	u64 rx_csum_unnecessary;
 	u64 rx_csum_none;
-	u64 rx_csum_sw;
-	u64 rx_csum_inner;
-	u64 tx_csum_offload;
-	u64 tx_csum_inner;
+	u64 rx_csum_complete;
+	u64 rx_csum_unnecessary_inner;
+	u64 tx_csum_partial;
+	u64 tx_csum_partial_inner;
 	u64 tx_queue_stopped;
 	u64 tx_queue_wake;
 	u64 tx_queue_dropped;
@@ -76,7 +78,7 @@ struct mlx5e_sw_stats {
 	u64 rx_cqe_compress_pkts;
 
 	/* Special handling counters */
-	u64 link_down_events;
+	u64 link_down_events_phy;
 };
 
 static const struct counter_desc sw_stats_desc[] = {
@@ -84,18 +86,18 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
@@ -105,7 +107,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
 struct mlx5e_qcounter_stats {
@@ -125,12 +127,6 @@ struct mlx5e_vport_stats {
 };
 
 static const struct counter_desc vport_stats_desc[] = {
-	{ "rx_vport_error_packets",
-		VPORT_COUNTER_OFF(received_errors.packets) },
-	{ "rx_vport_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) },
-	{ "tx_vport_error_packets",
-		VPORT_COUNTER_OFF(transmit_errors.packets) },
-	{ "tx_vport_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) },
 	{ "rx_vport_unicast_packets",
 		VPORT_COUNTER_OFF(received_eth_unicast.packets) },
 	{ "rx_vport_unicast_bytes",
@@ -155,6 +151,22 @@ static const struct counter_desc vport_stats_desc[] = {
 		VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
 	{ "tx_vport_broadcast_bytes",
 		VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
+	{ "rx_vport_rdma_unicast_packets",
+		VPORT_COUNTER_OFF(received_ib_unicast.packets) },
+	{ "rx_vport_rdma_unicast_bytes",
+		VPORT_COUNTER_OFF(received_ib_unicast.octets) },
+	{ "tx_vport_rdma_unicast_packets",
+		VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) },
+	{ "tx_vport_rdma_unicast_bytes",
+		VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) },
+	{ "rx_vport_rdma_multicast_packets",
+		VPORT_COUNTER_OFF(received_ib_multicast.packets) },
+	{ "rx_vport_rdma_multicast_bytes",
+		VPORT_COUNTER_OFF(received_ib_multicast.octets) },
+	{ "tx_vport_rdma_multicast_packets",
+		VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) },
+	{ "tx_vport_rdma_multicast_bytes",
+		VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) },
 };
 
 #define PPORT_802_3_OFF(c) \
@@ -192,94 +204,69 @@ struct mlx5e_pport_stats {
 };
 
 static const struct counter_desc pport_802_3_stats_desc[] = {
-	{ "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) },
-	{ "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) },
-	{ "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
-	{ "alignment_err", PPORT_802_3_OFF(a_alignment_errors) },
-	{ "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) },
-	{ "octets_received", PPORT_802_3_OFF(a_octets_received_ok) },
-	{ "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
-	{ "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
-	{ "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
-	{ "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
-	{ "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) },
-	{ "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) },
-	{ "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) },
-	{ "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
-	{ "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
-	{ "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) },
-	{ "unsupported_op_rx",
-		PPORT_802_3_OFF(a_unsupported_opcodes_received) },
-	{ "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
-	{ "pause_ctrl_tx",
-		PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+	{ "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+	{ "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
+	{ "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+	{ "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+	{ "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
+	{ "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+	{ "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+	{ "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+	{ "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+	{ "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
+	{ "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
+	{ "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
+	{ "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+	{ "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+	{ "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
+	{ "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+	{ "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+	{ "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
 };
 
 static const struct counter_desc pport_2863_stats_desc[] = {
-	{ "in_octets", PPORT_2863_OFF(if_in_octets) },
-	{ "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) },
-	{ "in_discards", PPORT_2863_OFF(if_in_discards) },
-	{ "in_errors", PPORT_2863_OFF(if_in_errors) },
-	{ "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) },
-	{ "out_octets", PPORT_2863_OFF(if_out_octets) },
-	{ "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) },
-	{ "out_discards", PPORT_2863_OFF(if_out_discards) },
-	{ "out_errors", PPORT_2863_OFF(if_out_errors) },
-	{ "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) },
-	{ "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) },
-	{ "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) },
-	{ "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) },
+	{ "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
+	{ "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
+	{ "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
 };
 
 static const struct counter_desc pport_2819_stats_desc[] = {
-	{ "drop_events", PPORT_2819_OFF(ether_stats_drop_events) },
-	{ "octets", PPORT_2819_OFF(ether_stats_octets) },
-	{ "pkts", PPORT_2819_OFF(ether_stats_pkts) },
-	{ "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) },
-	{ "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) },
-	{ "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) },
-	{ "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) },
-	{ "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) },
-	{ "fragments", PPORT_2819_OFF(ether_stats_fragments) },
-	{ "jabbers", PPORT_2819_OFF(ether_stats_jabbers) },
-	{ "collisions", PPORT_2819_OFF(ether_stats_collisions) },
-	{ "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) },
-	{ "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
-	{ "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
-	{ "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
-	{ "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
-	{ "p1024to1518octets",
-		PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
-	{ "p1519to2047octets",
-		PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
-	{ "p2048to4095octets",
-		PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
-	{ "p4096to8191octets",
-		PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
-	{ "p8192to10239octets",
-		PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+	{ "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+	{ "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
+	{ "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
+	{ "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
+	{ "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+	{ "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+	{ "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+	{ "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+	{ "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+	{ "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+	{ "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+	{ "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+	{ "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
 };
 
 static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
-	{ "rx_octets", PPORT_PER_PRIO_OFF(rx_octets) },
-	{ "rx_frames", PPORT_PER_PRIO_OFF(rx_frames) },
-	{ "tx_octets", PPORT_PER_PRIO_OFF(tx_octets) },
-	{ "tx_frames", PPORT_PER_PRIO_OFF(tx_frames) },
+	{ "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
+	{ "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+	{ "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
+	{ "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
 };
 
 static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
-	{ "rx_pause", PPORT_PER_PRIO_OFF(rx_pause) },
-	{ "rx_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
-	{ "tx_pause", PPORT_PER_PRIO_OFF(tx_pause) },
-	{ "tx_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
-	{ "rx_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
+	/* %s is "global" or "prio{i}" */
+	{ "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+	{ "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+	{ "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+	{ "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
 struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
-	u64 csum_sw;
-	u64 csum_inner;
+	u64 csum_complete;
+	u64 csum_unnecessary_inner;
 	u64 csum_none;
 	u64 lro_packets;
 	u64 lro_bytes;
@@ -292,19 +279,19 @@ struct mlx5e_rq_stats {
 };
 
 static const struct counter_desc rq_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
 };
 
 struct mlx5e_sq_stats {
@@ -315,28 +302,28 @@ struct mlx5e_sq_stats {
 	u64 tso_bytes;
 	u64 tso_inner_packets;
 	u64 tso_inner_bytes;
-	u64 csum_offload_inner;
+	u64 csum_partial_inner;
 	u64 nop;
 	/* less likely accessed in data path */
-	u64 csum_offload_none;
+	u64 csum_none;
 	u64 stopped;
 	u64 wake;
 	u64 dropped;
 };
 
 static const struct counter_desc sq_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
 };
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 704c3d30493e..0f19b01e3fff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -37,8 +37,11 @@
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/device.h>
 #include <linux/rhashtable.h>
+#include <net/switchdev.h>
+#include <net/tc_act/tc_mirred.h>
 #include "en.h"
 #include "en_tc.h"
+#include "eswitch.h"
 
 struct mlx5e_tc_flow {
 	struct rhash_head	node;
@@ -49,9 +52,9 @@ struct mlx5e_tc_flow {
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 
-static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
-						u32 *match_c, u32 *match_v,
-						u32 action, u32 flow_tag)
+static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+						    struct mlx5_flow_spec *spec,
+						    u32 action, u32 flow_tag)
 {
 	struct mlx5_core_dev *dev = priv->mdev;
 	struct mlx5_flow_destination dest = { 0 };
@@ -62,7 +65,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
 	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 		dest.ft = priv->fs.vlan.ft.t;
-	} else {
+	} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		counter = mlx5_fc_create(dev, true);
 		if (IS_ERR(counter))
 			return ERR_CAST(counter);
@@ -88,8 +91,8 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
 		table_created = true;
 	}
 
-	rule = mlx5_add_flow_rule(priv->fs.tc.t, MLX5_MATCH_OUTER_HEADERS,
-				  match_c, match_v,
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	rule = mlx5_add_flow_rule(priv->fs.tc.t, spec,
 				  action, flow_tag,
 				  &dest);
 
@@ -109,6 +112,22 @@ err_create_ft:
 	return rule;
 }
 
+static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+						    struct mlx5_flow_spec *spec,
+						    u32 action, u32 dst_vport)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	u32 src_vport;
+
+	if (rep->vport) /* set source vport for the flow */
+		src_vport = rep->vport;
+	else
+		src_vport = FDB_UPLINK_VPORT;
+
+	return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport);
+}
+
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			      struct mlx5_flow_rule *rule)
 {
@@ -120,18 +139,19 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 
 	mlx5_fc_destroy(priv->mdev, counter);
 
-	if (!mlx5e_tc_num_filters(priv)) {
+	if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
 		mlx5_destroy_flow_table(priv->fs.tc.t);
 		priv->fs.tc.t = NULL;
 	}
 }
 
-static int parse_cls_flower(struct mlx5e_priv *priv,
-			    u32 *match_c, u32 *match_v,
+static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 			    struct tc_cls_flower_offload *f)
 {
-	void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers);
-	void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers);
+	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				       outer_headers);
+	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				       outer_headers);
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 
@@ -294,8 +314,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 	return 0;
 }
 
-static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-			    u32 *action, u32 *flow_tag)
+static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+				u32 *action, u32 *flow_tag)
 {
 	const struct tc_action *a;
 
@@ -338,17 +358,66 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	return 0;
 }
 
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+				u32 *action, u32 *dest_vport)
+{
+	const struct tc_action *a;
+
+	if (tc_no_actions(exts))
+		return -EINVAL;
+
+	*action = 0;
+
+	tc_for_each_action(a, exts) {
+		/* Only support a single action per rule */
+		if (*action)
+			return -EINVAL;
+
+		if (is_tcf_gact_shot(a)) {
+			*action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			continue;
+		}
+
+		if (is_tcf_mirred_redirect(a)) {
+			int ifindex = tcf_mirred_ifindex(a);
+			struct net_device *out_dev;
+			struct mlx5e_priv *out_priv;
+			struct mlx5_eswitch_rep *out_rep;
+
+			out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
+
+			if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) {
+				pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+				       priv->netdev->name, out_dev->name);
+				return -EINVAL;
+			}
+
+			out_priv = netdev_priv(out_dev);
+			out_rep  = out_priv->ppriv;
+			if (out_rep->vport == 0)
+				*dest_vport = FDB_UPLINK_VPORT;
+			else
+				*dest_vport = out_rep->vport;
+			*action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			continue;
+		}
+
+		return -EINVAL;
+	}
+	return 0;
+}
+
 int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 			   struct tc_cls_flower_offload *f)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
-	u32 *match_c;
-	u32 *match_v;
 	int err = 0;
-	u32 flow_tag;
-	u32 action;
+	u32 flow_tag, action, dest_vport = 0;
 	struct mlx5e_tc_flow *flow;
+	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_rule *old = NULL;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
 	flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
 				      tc->ht_params);
@@ -357,49 +426,53 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	else
 		flow = kzalloc(sizeof(*flow), GFP_KERNEL);
 
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	if (!match_c || !match_v || !flow) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec || !flow) {
 		err = -ENOMEM;
 		goto err_free;
 	}
 
 	flow->cookie = f->cookie;
 
-	err = parse_cls_flower(priv, match_c, match_v, f);
+	err = parse_cls_flower(priv, spec, f);
 	if (err < 0)
 		goto err_free;
 
-	err = parse_tc_actions(priv, f->exts, &action, &flow_tag);
-	if (err < 0)
+	if (esw && esw->mode == SRIOV_OFFLOADS) {
+		err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport);
+		if (err < 0)
+			goto err_free;
+		flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport);
+	} else {
+		err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag);
+		if (err < 0)
+			goto err_free;
+		flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag);
+	}
+
+	if (IS_ERR(flow->rule)) {
+		err = PTR_ERR(flow->rule);
 		goto err_free;
+	}
 
 	err = rhashtable_insert_fast(&tc->ht, &flow->node,
 				     tc->ht_params);
 	if (err)
-		goto err_free;
-
-	flow->rule = mlx5e_tc_add_flow(priv, match_c, match_v, action,
-				       flow_tag);
-	if (IS_ERR(flow->rule)) {
-		err = PTR_ERR(flow->rule);
-		goto err_hash_del;
-	}
+		goto err_del_rule;
 
 	if (old)
 		mlx5e_tc_del_flow(priv, old);
 
 	goto out;
 
-err_hash_del:
-	rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
+err_del_rule:
+	mlx5_del_flow_rule(flow->rule);
 
 err_free:
 	if (!old)
 		kfree(flow);
 out:
-	kfree(match_c);
-	kfree(match_v);
+	kvfree(spec);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index b000ddc29553..e073bf59890d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -110,12 +110,68 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	int channel_ix = fallback(dev, skb);
-	int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ?
-		 skb->vlan_tci >> VLAN_PRIO_SHIFT : 0;
+	int up = 0;
+
+	if (!netdev_get_num_tc(dev))
+		return channel_ix;
+
+	if (skb_vlan_tag_present(skb))
+		up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+
+	/* channel_ix can be larger than num_channels since
+	 * dev->num_real_tx_queues = num_channels * num_tc
+	 */
+	if (channel_ix >= priv->params.num_channels)
+		channel_ix = reciprocal_scale(channel_ix,
+					      priv->params.num_channels);
 
 	return priv->channeltc_to_txq_map[channel_ix][up];
 }
 
+static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
+{
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+
+	return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
+}
+
+static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
+{
+	struct flow_keys keys;
+
+	if (skb_transport_header_was_set(skb))
+		return skb_transport_offset(skb);
+	else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
+		return keys.control.thoff;
+	else
+		return mlx5e_skb_l2_header_offset(skb);
+}
+
+static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
+						 struct sk_buff *skb)
+{
+	int hlen;
+
+	switch (mode) {
+	case MLX5_INLINE_MODE_TCP_UDP:
+		hlen = eth_get_headlen(skb->data, skb_headlen(skb));
+		if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
+			hlen += VLAN_HLEN;
+		return hlen;
+	case MLX5_INLINE_MODE_IP:
+		/* When transport header is set to zero, it means no transport
+		 * header. When transport header is set to 0xff's, it means
+		 * transport header wasn't set.
+		 */
+		if (skb_transport_offset(skb))
+			return mlx5e_skb_l3_header_offset(skb);
+		/* fall through */
+	case MLX5_INLINE_MODE_L2:
+	default:
+		return mlx5e_skb_l2_header_offset(skb);
+	}
+}
+
 static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 					    struct sk_buff *skb, bool bf)
 {
@@ -123,8 +179,6 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 	 * headers and occur before the data gather.
 	 * Therefore these headers must be copied into the WQE
 	 */
-#define MLX5E_MIN_INLINE ETH_HLEN
-
 	if (bf) {
 		u16 ihs = skb_headlen(skb);
 
@@ -134,8 +188,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 		if (ihs <= sq->max_inline)
 			return skb_headlen(skb);
 	}
-
-	return MLX5E_MIN_INLINE;
+	return mlx5e_calc_min_inline(sq->min_inline_mode, skb);
 }
 
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
@@ -192,12 +245,12 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		if (skb->encapsulation) {
 			eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
 					  MLX5_ETH_WQE_L4_INNER_CSUM;
-			sq->stats.csum_offload_inner++;
+			sq->stats.csum_partial_inner++;
 		} else {
 			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
 		}
 	} else
-		sq->stats.csum_offload_none++;
+		sq->stats.csum_none++;
 
 	if (sq->cc != sq->prev_cc) {
 		sq->prev_cc = sq->cc;
@@ -341,6 +394,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	return mlx5e_sq_xmit(sq, skb);
 }
 
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+{
+	struct mlx5e_tx_wqe_info *wi;
+	struct sk_buff *skb;
+	u16 ci;
+	int i;
+
+	while (sq->cc != sq->pc) {
+		ci = sq->cc & sq->wq.sz_m1;
+		skb = sq->skb[ci];
+		wi = &sq->wqe_info[ci];
+
+		if (!skb) { /* nop */
+			sq->cc++;
+			continue;
+		}
+
+		for (i = 0; i < wi->num_dma; i++) {
+			struct mlx5e_sq_dma *dma =
+				mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+			mlx5e_tx_dma_unmap(sq->pdev, dma);
+		}
+
+		dev_kfree_skb_any(skb);
+		sq->cc += wi->num_wqebbs;
+	}
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_sq *sq;
@@ -352,6 +434,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
 	sq = container_of(cq, struct mlx5e_sq, cq);
 
+	if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
+		return false;
+
 	npkts = 0;
 	nbytes = 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index c38781fa567d..64ae2e800daa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -136,6 +136,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 
 	for (i = 0; i < c->num_tc; i++)
 		mlx5e_cq_arm(&c->sq[i].cq);
+
+	if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state))
+		mlx5e_rx_am(&c->rq);
+
 	mlx5e_cq_arm(&c->rq.cq);
 	mlx5e_cq_arm(&c->icosq.cq);
 
@@ -146,6 +150,7 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq)
 {
 	struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
 
+	cq->event_ctr++;
 	set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags);
 	napi_schedule(cq->napi);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index aebbd6ccb9fe..f6d667797ee1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -40,17 +40,6 @@
 
 #define UPLINK_VPORT 0xFFFF
 
-#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
-
-#define esw_info(dev, format, ...)				\
-	pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
-
-#define esw_warn(dev, format, ...)				\
-	pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
-
-#define esw_debug(dev, format, ...)				\
-	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
-
 enum {
 	MLX5_ACTION_NONE = 0,
 	MLX5_ACTION_ADD  = 1,
@@ -92,6 +81,9 @@ enum {
 			    MC_ADDR_CHANGE | \
 			    PROMISC_CHANGE)
 
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
+
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
 					u32 events_mask)
 {
@@ -337,25 +329,23 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 			    MLX5_MATCH_OUTER_HEADERS);
 	struct mlx5_flow_rule *flow_rule = NULL;
 	struct mlx5_flow_destination dest;
+	struct mlx5_flow_spec *spec;
 	void *mv_misc = NULL;
 	void *mc_misc = NULL;
 	u8 *dmac_v = NULL;
 	u8 *dmac_c = NULL;
-	u32 *match_v;
-	u32 *match_c;
 
 	if (rx_rule)
 		match_header |= MLX5_MATCH_MISC_PARAMETERS;
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	if (!match_v || !match_c) {
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		pr_warn("FDB: Failed to alloc match parameters\n");
-		goto out;
+		return NULL;
 	}
-
-	dmac_v = MLX5_ADDR_OF(fte_match_param, match_v,
+	dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 			      outer_headers.dmac_47_16);
-	dmac_c = MLX5_ADDR_OF(fte_match_param, match_c,
+	dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 			      outer_headers.dmac_47_16);
 
 	if (match_header & MLX5_MATCH_OUTER_HEADERS) {
@@ -364,8 +354,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 	}
 
 	if (match_header & MLX5_MATCH_MISC_PARAMETERS) {
-		mv_misc  = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters);
-		mc_misc  = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+		mv_misc  = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+					misc_parameters);
+		mc_misc  = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+					misc_parameters);
 		MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT);
 		MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
 	}
@@ -376,11 +368,9 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 	esw_debug(esw->dev,
 		  "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
 		  dmac_v, dmac_c, vport);
+	spec->match_criteria_enable = match_header;
 	flow_rule =
-		mlx5_add_flow_rule(esw->fdb_table.fdb,
-				   match_header,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				   0, &dest);
 	if (IS_ERR(flow_rule)) {
@@ -389,9 +379,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 			 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
 		flow_rule = NULL;
 	}
-out:
-	kfree(match_v);
-	kfree(match_c);
+
+	kvfree(spec);
 	return flow_rule;
 }
 
@@ -428,7 +417,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
 	return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
 }
 
-static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_core_dev *dev = esw->dev;
@@ -479,7 +468,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 		esw_warn(dev, "Failed to create flow group err(%d)\n", err);
 		goto out;
 	}
-	esw->fdb_table.addr_grp = g;
+	esw->fdb_table.legacy.addr_grp = g;
 
 	/* Allmulti group : One rule that forwards any mcast traffic */
 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -494,7 +483,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 		esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
 		goto out;
 	}
-	esw->fdb_table.allmulti_grp = g;
+	esw->fdb_table.legacy.allmulti_grp = g;
 
 	/* Promiscuous group :
 	 * One rule that forward all unmatched traffic from previous groups
@@ -511,17 +500,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 		esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
 		goto out;
 	}
-	esw->fdb_table.promisc_grp = g;
+	esw->fdb_table.legacy.promisc_grp = g;
 
 out:
 	if (err) {
-		if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) {
-			mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp);
-			esw->fdb_table.allmulti_grp = NULL;
+		if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) {
+			mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+			esw->fdb_table.legacy.allmulti_grp = NULL;
 		}
-		if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) {
-			mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
-			esw->fdb_table.addr_grp = NULL;
+		if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) {
+			mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+			esw->fdb_table.legacy.addr_grp = NULL;
 		}
 		if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) {
 			mlx5_destroy_flow_table(esw->fdb_table.fdb);
@@ -533,20 +522,20 @@ out:
 	return err;
 }
 
-static void esw_destroy_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
 {
 	if (!esw->fdb_table.fdb)
 		return;
 
 	esw_debug(esw->dev, "Destroy FDB Table\n");
-	mlx5_destroy_flow_group(esw->fdb_table.promisc_grp);
-	mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp);
-	mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
 	mlx5_destroy_flow_table(esw->fdb_table.fdb);
 	esw->fdb_table.fdb = NULL;
-	esw->fdb_table.addr_grp = NULL;
-	esw->fdb_table.allmulti_grp = NULL;
-	esw->fdb_table.promisc_grp = NULL;
+	esw->fdb_table.legacy.addr_grp = NULL;
+	esw->fdb_table.legacy.allmulti_grp = NULL;
+	esw->fdb_table.legacy.promisc_grp = NULL;
 }
 
 /* E-Switch vport UC/MC lists management */
@@ -578,7 +567,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	if (err)
 		goto abort;
 
-	if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */
+	/* SRIOV is enabled: Forward UC MAC to vport */
+	if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY)
 		vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
 
 	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
@@ -1300,9 +1290,8 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
 static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 				    struct mlx5_vport *vport)
 {
+	struct mlx5_flow_spec *spec;
 	u8 smac[ETH_ALEN];
-	u32 *match_v;
-	u32 *match_c;
 	int err = 0;
 	u8 *smac_v;
 
@@ -1336,9 +1325,8 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 		  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
 		  vport->vport, vport->vlan, vport->qos);
 
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	if (!match_v || !match_c) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		err = -ENOMEM;
 		esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n",
 			 vport->vport, err);
@@ -1346,22 +1334,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 	}
 
 	if (vport->vlan || vport->qos)
-		MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
 
 	if (vport->spoofchk) {
-		MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_47_16);
-		MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_15_0);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
 		smac_v = MLX5_ADDR_OF(fte_match_param,
-				      match_v,
+				      spec->match_value,
 				      outer_headers.smac_47_16);
 		ether_addr_copy(smac_v, smac);
 	}
 
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 	vport->ingress.allow_rule =
-		mlx5_add_flow_rule(vport->ingress.acl,
-				   MLX5_MATCH_OUTER_HEADERS,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(vport->ingress.acl, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
 				   0, NULL);
 	if (IS_ERR(vport->ingress.allow_rule)) {
@@ -1372,13 +1358,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 		goto out;
 	}
 
-	memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param));
-	memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param));
+	memset(spec, 0, sizeof(*spec));
 	vport->ingress.drop_rule =
-		mlx5_add_flow_rule(vport->ingress.acl,
-				   0,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(vport->ingress.acl, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_DROP,
 				   0, NULL);
 	if (IS_ERR(vport->ingress.drop_rule)) {
@@ -1392,17 +1374,14 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 out:
 	if (err)
 		esw_vport_cleanup_ingress_rules(esw, vport);
-
-	kfree(match_v);
-	kfree(match_c);
+	kvfree(spec);
 	return err;
 }
 
 static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 				   struct mlx5_vport *vport)
 {
-	u32 *match_v;
-	u32 *match_c;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
 	esw_vport_cleanup_egress_rules(esw, vport);
@@ -1418,9 +1397,8 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 		  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
 		  vport->vport, vport->vlan, vport->qos);
 
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	if (!match_v || !match_c) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		err = -ENOMEM;
 		esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n",
 			 vport->vport, err);
@@ -1428,16 +1406,14 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	}
 
 	/* Allowed vlan rule */
-	MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.vlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.first_vid);
-	MLX5_SET(fte_match_param, match_v, outer_headers.first_vid, vport->vlan);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan);
 
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 	vport->egress.allowed_vlan =
-		mlx5_add_flow_rule(vport->egress.acl,
-				   MLX5_MATCH_OUTER_HEADERS,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(vport->egress.acl, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
 				   0, NULL);
 	if (IS_ERR(vport->egress.allowed_vlan)) {
@@ -1449,13 +1425,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	}
 
 	/* Drop others rule (star rule) */
-	memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param));
-	memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param));
+	memset(spec, 0, sizeof(*spec));
 	vport->egress.drop_rule =
-		mlx5_add_flow_rule(vport->egress.acl,
-				   0,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(vport->egress.acl, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_DROP,
 				   0, NULL);
 	if (IS_ERR(vport->egress.drop_rule)) {
@@ -1465,8 +1437,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 		vport->egress.drop_rule = NULL;
 	}
 out:
-	kfree(match_v);
-	kfree(match_c);
+	kvfree(spec);
 	return err;
 }
 
@@ -1540,10 +1511,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
 }
 
 /* Public E-Switch API */
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs)
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
 	int err;
-	int i;
+	int i, enabled_events;
 
 	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
 	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
@@ -1561,16 +1532,20 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs)
 	if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
 		esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
 
-	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs);
-
+	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
+	esw->mode = mode;
 	esw_disable_vport(esw, 0);
 
-	err = esw_create_fdb_table(esw, nvfs + 1);
+	if (mode == SRIOV_LEGACY)
+		err = esw_create_legacy_fdb_table(esw, nvfs + 1);
+	else
+		err = esw_offloads_init(esw, nvfs + 1);
 	if (err)
 		goto abort;
 
+	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE;
 	for (i = 0; i <= nvfs; i++)
-		esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS);
+		esw_enable_vport(esw, i, enabled_events);
 
 	esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
 		 esw->enabled_vports);
@@ -1584,16 +1559,18 @@ abort:
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
 	struct esw_mc_addr *mc_promisc;
+	int nvports;
 	int i;
 
 	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
 	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 		return;
 
-	esw_info(esw->dev, "disable SRIOV: active vports(%d)\n",
-		 esw->enabled_vports);
+	esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
+		 esw->enabled_vports, esw->mode);
 
 	mc_promisc = esw->mc_promisc;
+	nvports = esw->enabled_vports;
 
 	for (i = 0; i < esw->total_vports; i++)
 		esw_disable_vport(esw, i);
@@ -1601,8 +1578,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 	if (mc_promisc && mc_promisc->uplink_rule)
 		mlx5_del_flow_rule(mc_promisc->uplink_rule);
 
-	esw_destroy_fdb_table(esw);
+	if (esw->mode == SRIOV_LEGACY)
+		esw_destroy_legacy_fdb_table(esw);
+	else if (esw->mode == SRIOV_OFFLOADS)
+		esw_offloads_cleanup(esw, nvports);
 
+	esw->mode = SRIOV_NONE;
 	/* VPORT 0 (PF) must be enabled back with non-sriov configuration */
 	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
 }
@@ -1660,6 +1641,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		goto abort;
 	}
 
+	esw->offloads.vport_reps =
+		kzalloc(total_vports * sizeof(struct mlx5_eswitch_rep),
+			GFP_KERNEL);
+	if (!esw->offloads.vport_reps) {
+		err = -ENOMEM;
+		goto abort;
+	}
+
 	mutex_init(&esw->state_lock);
 
 	for (vport_num = 0; vport_num < total_vports; vport_num++) {
@@ -1673,6 +1662,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 
 	esw->total_vports = total_vports;
 	esw->enabled_vports = 0;
+	esw->mode = SRIOV_NONE;
 
 	dev->priv.eswitch = esw;
 	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
@@ -1683,6 +1673,7 @@ abort:
 		destroy_workqueue(esw->work_queue);
 	kfree(esw->l2_table.bitmap);
 	kfree(esw->vports);
+	kfree(esw->offloads.vport_reps);
 	kfree(esw);
 	return err;
 }
@@ -1700,6 +1691,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	destroy_workqueue(esw->work_queue);
 	kfree(esw->l2_table.bitmap);
 	kfree(esw->mc_promisc);
+	kfree(esw->offloads.vport_reps);
 	kfree(esw->vports);
 	kfree(esw);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index fd6800256d4a..c0b05603fc31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -35,6 +35,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_link.h>
+#include <net/devlink.h>
 #include <linux/mlx5/device.h>
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -46,6 +47,8 @@
 #define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
 #define MLX5_L2_ADDR_HASH(addr) (addr[5])
 
+#define FDB_UPLINK_VPORT 0xffff
+
 /* L2 -mac address based- hash helpers */
 struct l2addr_node {
 	struct hlist_node hlist;
@@ -134,9 +137,49 @@ struct mlx5_l2_table {
 
 struct mlx5_eswitch_fdb {
 	void *fdb;
-	struct mlx5_flow_group *addr_grp;
-	struct mlx5_flow_group *allmulti_grp;
-	struct mlx5_flow_group *promisc_grp;
+	union {
+		struct legacy_fdb {
+			struct mlx5_flow_group *addr_grp;
+			struct mlx5_flow_group *allmulti_grp;
+			struct mlx5_flow_group *promisc_grp;
+		} legacy;
+
+		struct offloads_fdb {
+			struct mlx5_flow_table *fdb;
+			struct mlx5_flow_group *send_to_vport_grp;
+			struct mlx5_flow_group *miss_grp;
+			struct mlx5_flow_rule  *miss_rule;
+		} offloads;
+	};
+};
+
+enum {
+	SRIOV_NONE,
+	SRIOV_LEGACY,
+	SRIOV_OFFLOADS
+};
+
+struct mlx5_esw_sq {
+	struct mlx5_flow_rule	*send_to_vport_rule;
+	struct list_head	 list;
+};
+
+struct mlx5_eswitch_rep {
+	int		       (*load)(struct mlx5_eswitch *esw,
+				       struct mlx5_eswitch_rep *rep);
+	void		       (*unload)(struct mlx5_eswitch *esw,
+					 struct mlx5_eswitch_rep *rep);
+	u16		       vport;
+	struct mlx5_flow_rule *vport_rx_rule;
+	void		      *priv_data;
+	struct list_head       vport_sqs_list;
+	bool		       valid;
+};
+
+struct mlx5_esw_offload {
+	struct mlx5_flow_table *ft_offloads;
+	struct mlx5_flow_group *vport_rx_group;
+	struct mlx5_eswitch_rep *vport_reps;
 };
 
 struct mlx5_eswitch {
@@ -153,13 +196,15 @@ struct mlx5_eswitch {
 	 */
 	struct mutex            state_lock;
 	struct esw_mc_addr      *mc_promisc;
+	struct mlx5_esw_offload offloads;
+	int                     mode;
 };
 
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
 void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs);
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 			       int vport, u8 mac[ETH_ALEN]);
@@ -177,4 +222,36 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 				 int vport,
 				 struct ifla_vf_stats *vf_stats);
 
+struct mlx5_flow_spec;
+
+struct mlx5_flow_rule *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+				struct mlx5_flow_spec *spec,
+				u32 action, u32 src_vport, u32 dst_vport);
+struct mlx5_flow_rule *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
+
+int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
+				 struct mlx5_eswitch_rep *rep,
+				 u16 *sqns_array, int sqns_num);
+void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
+				 struct mlx5_eswitch_rep *rep);
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+				     struct mlx5_eswitch_rep *rep);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+				       int vport);
+
+#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
+
+#define esw_info(dev, format, ...)				\
+	pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_warn(dev, format, ...)				\
+	pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_debug(dev, format, ...)				\
+	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
new file mode 100644
index 000000000000..a357e8eeeed8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+enum {
+	FDB_FAST_PATH = 0,
+	FDB_SLOW_PATH
+};
+
+struct mlx5_flow_rule *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+				struct mlx5_flow_spec *spec,
+				u32 action, u32 src_vport, u32 dst_vport)
+{
+	struct mlx5_flow_destination dest = { 0 };
+	struct mlx5_fc *counter = NULL;
+	struct mlx5_flow_rule *rule;
+	void *misc;
+
+	if (esw->mode != SRIOV_OFFLOADS)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+		dest.vport_num = dst_vport;
+		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		counter = mlx5_fc_create(esw->dev, true);
+		if (IS_ERR(counter))
+			return ERR_CAST(counter);
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		dest.counter = counter;
+	}
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_port, src_vport);
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+				      MLX5_MATCH_MISC_PARAMETERS;
+
+	rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb,
+				  spec, action, 0, &dest);
+
+	if (IS_ERR(rule))
+		mlx5_fc_destroy(esw->dev, counter);
+
+	return rule;
+}
+
+static struct mlx5_flow_rule *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
+{
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_spec *spec;
+	void *misc;
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
+		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+		flow_rule = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
+	MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest.vport_num = vport;
+
+	flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
+				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				       0, &dest);
+	if (IS_ERR(flow_rule))
+		esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
+out:
+	kvfree(spec);
+	return flow_rule;
+}
+
+void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
+				 struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5_esw_sq *esw_sq, *tmp;
+
+	if (esw->mode != SRIOV_OFFLOADS)
+		return;
+
+	list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) {
+		mlx5_del_flow_rule(esw_sq->send_to_vport_rule);
+		list_del(&esw_sq->list);
+		kfree(esw_sq);
+	}
+}
+
+int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
+				 struct mlx5_eswitch_rep *rep,
+				 u16 *sqns_array, int sqns_num)
+{
+	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_esw_sq *esw_sq;
+	int vport;
+	int err;
+	int i;
+
+	if (esw->mode != SRIOV_OFFLOADS)
+		return 0;
+
+	vport = rep->vport == 0 ?
+		FDB_UPLINK_VPORT : rep->vport;
+
+	for (i = 0; i < sqns_num; i++) {
+		esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL);
+		if (!esw_sq) {
+			err = -ENOMEM;
+			goto out_err;
+		}
+
+		/* Add re-inject rule to the PF/representor sqs */
+		flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
+								vport,
+								sqns_array[i]);
+		if (IS_ERR(flow_rule)) {
+			err = PTR_ERR(flow_rule);
+			kfree(esw_sq);
+			goto out_err;
+		}
+		esw_sq->send_to_vport_rule = flow_rule;
+		list_add(&esw_sq->list, &rep->vport_sqs_list);
+	}
+	return 0;
+
+out_err:
+	mlx5_eswitch_sqs2vport_stop(esw, rep);
+	return err;
+}
+
+static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
+{
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_rule *flow_rule = NULL;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
+		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest.vport_num = 0;
+
+	flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
+				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				       0, &dest);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
+		goto out;
+	}
+
+	esw->fdb_table.offloads.miss_rule = flow_rule;
+out:
+	kvfree(spec);
+	return err;
+}
+
+#define MAX_PF_SQ 256
+#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */
+#define ESW_OFFLOADS_NUM_GROUPS  4
+
+static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_table *fdb = NULL;
+	struct mlx5_flow_group *g;
+	u32 *flow_group_in;
+	void *match_criteria;
+	int table_size, ix, err = 0;
+
+	flow_group_in = mlx5_vzalloc(inlen);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		goto ns_err;
+	}
+
+	esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n",
+		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
+						  ESW_OFFLOADS_NUM_ENTRIES,
+						  ESW_OFFLOADS_NUM_GROUPS, 0);
+	if (IS_ERR(fdb)) {
+		err = PTR_ERR(fdb);
+		esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
+		goto fast_fdb_err;
+	}
+	esw->fdb_table.fdb = fdb;
+
+	table_size = nvports + MAX_PF_SQ + 1;
+	fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0);
+	if (IS_ERR(fdb)) {
+		err = PTR_ERR(fdb);
+		esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
+		goto slow_fdb_err;
+	}
+	esw->fdb_table.offloads.fdb = fdb;
+
+	/* create send-to-vport group */
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS);
+
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+
+	ix = nvports + MAX_PF_SQ;
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
+
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+		goto send_vport_err;
+	}
+	esw->fdb_table.offloads.send_to_vport_grp = g;
+
+	/* create miss group */
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
+		goto miss_err;
+	}
+	esw->fdb_table.offloads.miss_grp = g;
+
+	err = esw_add_fdb_miss_rule(esw);
+	if (err)
+		goto miss_rule_err;
+
+	return 0;
+
+miss_rule_err:
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+miss_err:
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+send_vport_err:
+	mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
+slow_fdb_err:
+	mlx5_destroy_flow_table(esw->fdb_table.fdb);
+fast_fdb_err:
+ns_err:
+	kvfree(flow_group_in);
+	return err;
+}
+
+static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
+{
+	if (!esw->fdb_table.fdb)
+		return;
+
+	esw_debug(esw->dev, "Destroy offloads FDB Table\n");
+	mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule);
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+
+	mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
+	mlx5_destroy_flow_table(esw->fdb_table.fdb);
+}
+
+static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+{
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft_offloads;
+	struct mlx5_core_dev *dev = esw->dev;
+	int err = 0;
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+	if (!ns) {
+		esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+		return -ENOMEM;
+	}
+
+	ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0);
+	if (IS_ERR(ft_offloads)) {
+		err = PTR_ERR(ft_offloads);
+		esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
+		return err;
+	}
+
+	esw->offloads.ft_offloads = ft_offloads;
+	return 0;
+}
+
+static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+
+	mlx5_destroy_flow_table(offloads->ft_offloads);
+}
+
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *g;
+	struct mlx5_priv *priv = &esw->dev->priv;
+	u32 *flow_group_in;
+	void *match_criteria, *misc;
+	int err = 0;
+	int nvports = priv->sriov.num_vfs + 2;
+
+	flow_group_in = mlx5_vzalloc(inlen);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	/* create vport rx group */
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS);
+
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+	misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
+
+	g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
+		goto out;
+	}
+
+	esw->offloads.vport_rx_group = g;
+out:
+	kfree(flow_group_in);
+	return err;
+}
+
+static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
+{
+	mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
+}
+
+struct mlx5_flow_rule *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
+{
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_spec *spec;
+	void *misc;
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
+		esw_warn(esw->dev, "Failed to alloc match parameters\n");
+		flow_rule = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	dest.tir_num = tirn;
+
+	flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec,
+				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				       0, &dest);
+	if (IS_ERR(flow_rule)) {
+		esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
+		goto out;
+	}
+
+out:
+	kvfree(spec);
+	return flow_rule;
+}
+
+static int esw_offloads_start(struct mlx5_eswitch *esw)
+{
+	int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+	if (esw->mode != SRIOV_LEGACY) {
+		esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
+		return -EINVAL;
+	}
+
+	mlx5_eswitch_disable_sriov(esw);
+	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+	if (err)
+		esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err);
+	return err;
+}
+
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+{
+	struct mlx5_eswitch_rep *rep;
+	int vport;
+	int err;
+
+	err = esw_create_offloads_fdb_table(esw, nvports);
+	if (err)
+		return err;
+
+	err = esw_create_offloads_table(esw);
+	if (err)
+		goto create_ft_err;
+
+	err = esw_create_vport_rx_group(esw);
+	if (err)
+		goto create_fg_err;
+
+	for (vport = 0; vport < nvports; vport++) {
+		rep = &esw->offloads.vport_reps[vport];
+		if (!rep->valid)
+			continue;
+
+		err = rep->load(esw, rep);
+		if (err)
+			goto err_reps;
+	}
+	return 0;
+
+err_reps:
+	for (vport--; vport >= 0; vport--) {
+		rep = &esw->offloads.vport_reps[vport];
+		if (!rep->valid)
+			continue;
+		rep->unload(esw, rep);
+	}
+	esw_destroy_vport_rx_group(esw);
+
+create_fg_err:
+	esw_destroy_offloads_table(esw);
+
+create_ft_err:
+	esw_destroy_offloads_fdb_table(esw);
+	return err;
+}
+
+static int esw_offloads_stop(struct mlx5_eswitch *esw)
+{
+	int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+	mlx5_eswitch_disable_sriov(esw);
+	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+	if (err)
+		esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err);
+
+	return err;
+}
+
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
+{
+	struct mlx5_eswitch_rep *rep;
+	int vport;
+
+	for (vport = 0; vport < nvports; vport++) {
+		rep = &esw->offloads.vport_reps[vport];
+		if (!rep->valid)
+			continue;
+		rep->unload(esw, rep);
+	}
+
+	esw_destroy_vport_rx_group(esw);
+	esw_destroy_offloads_table(esw);
+	esw_destroy_offloads_fdb_table(esw);
+}
+
+static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+{
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+		*mlx5_mode = SRIOV_LEGACY;
+		break;
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		*mlx5_mode = SRIOV_OFFLOADS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct mlx5_core_dev *dev;
+	u16 cur_mlx5_mode, mlx5_mode = 0;
+
+	dev = devlink_priv(devlink);
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	cur_mlx5_mode = dev->priv.eswitch->mode;
+
+	if (cur_mlx5_mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode))
+		return -EINVAL;
+
+	if (cur_mlx5_mode == mlx5_mode)
+		return 0;
+
+	if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return esw_offloads_start(dev->priv.eswitch);
+	else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
+		return esw_offloads_stop(dev->priv.eswitch);
+	else
+		return -EINVAL;
+}
+
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct mlx5_core_dev *dev;
+
+	dev = devlink_priv(devlink);
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (dev->priv.eswitch->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	*mode = dev->priv.eswitch->mode;
+
+	return 0;
+}
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+				     struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+
+	memcpy(&offloads->vport_reps[rep->vport], rep,
+	       sizeof(struct mlx5_eswitch_rep));
+
+	INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list);
+	offloads->vport_reps[rep->vport].valid = true;
+}
+
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+				       int vport)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+	struct mlx5_eswitch_rep *rep;
+
+	rep = &offloads->vport_reps[vport];
+
+	if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled)
+		rep->unload(esw, rep);
+
+	offloads->vport_reps[vport].valid = false;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a5bb6b695242..9134010e2921 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
 
 	return 0;
 }
+
+struct mlx5_cmd_fc_bulk {
+	u16 id;
+	int num;
+	int outlen;
+	u32 out[0];
+};
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
+{
+	struct mlx5_cmd_fc_bulk *b;
+	int outlen = sizeof(*b) +
+		MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+		MLX5_ST_SZ_BYTES(traffic_counter) * num;
+
+	b = kzalloc(outlen, GFP_KERNEL);
+	if (!b)
+		return NULL;
+
+	b->id = id;
+	b->num = num;
+	b->outlen = outlen;
+
+	return b;
+}
+
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
+{
+	kfree(b);
+}
+
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
+{
+	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(query_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+	MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
+	MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
+
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
+					  b->out, b->outlen);
+}
+
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+			  struct mlx5_cmd_fc_bulk *b, u16 id,
+			  u64 *packets, u64 *bytes)
+{
+	int index = id - b->id;
+	void *stats;
+
+	if (index < 0 || index >= b->num) {
+		mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
+			       id, b->id, b->id + b->num - 1);
+		return;
+	}
+
+	stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
+			     flow_statistics[index]);
+	*packets = MLX5_GET64(traffic_counter, stats, packets);
+	*bytes = MLX5_GET64(traffic_counter, stats, octets);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index fc4f7b83fe0a..158844cef82b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -76,4 +76,16 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id);
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
 		      u64 *packets, u64 *bytes);
+
+struct mlx5_cmd_fc_bulk;
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num);
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+			  struct mlx5_cmd_fc_bulk *b, u16 id,
+			  u64 *packets, u64 *bytes);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e912a3d2505e..75bb8c864557 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -67,13 +67,21 @@
 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
 			       .caps = (long[]) {__VA_ARGS__} }
 
+#define FS_CHAINING_CAPS  FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
+					   FS_CAP(flow_table_properties_nic_receive.modify_root), \
+					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
+					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
+
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
 #define BY_PASS_PRIO_NUM_LEVELS 1
-#define BY_PASS_MIN_LEVEL (KERNEL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
+#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
 			   LEFTOVERS_NUM_PRIOS)
 
+#define ETHTOOL_PRIO_NUM_LEVELS 1
+#define ETHTOOL_NUM_PRIOS 10
+#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
 /* Vlan, mac, ttc, aRFS */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 4
 #define KERNEL_NIC_NUM_PRIOS 1
@@ -83,6 +91,11 @@
 #define ANCHOR_NUM_LEVELS 1
 #define ANCHOR_NUM_PRIOS 1
 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
+
+#define OFFLOADS_MAX_FT 1
+#define OFFLOADS_NUM_PRIOS 1
+#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
+
 struct node_caps {
 	size_t	arr_sz;
 	long	*caps;
@@ -98,24 +111,24 @@ static struct init_tree_node {
 	int num_levels;
 } root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 4,
+	.ar_size = 6,
 	.children = (struct init_tree_node[]) {
 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
-			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
-					  FS_CAP(flow_table_properties_nic_receive.modify_root),
-					  FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
-					  FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
+			 FS_CHAINING_CAPS,
 			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
 						  BY_PASS_PRIO_NUM_LEVELS))),
+		ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+			 ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
+		ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
+						  ETHTOOL_PRIO_NUM_LEVELS))),
 		ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
 			 ADD_NS(ADD_MULTIPLE_PRIO(1, 1),
 				ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
 						  KERNEL_NIC_PRIO_NUM_LEVELS))),
 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
-			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
-					  FS_CAP(flow_table_properties_nic_receive.modify_root),
-					  FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
-					  FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
+			 FS_CHAINING_CAPS,
 			 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))),
 		ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
 			 ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))),
@@ -1152,9 +1165,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 
 static struct mlx5_flow_rule *
 _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		    u8 match_criteria_enable,
-		    u32 *match_criteria,
-		    u32 *match_value,
+		   struct mlx5_flow_spec *spec,
 		    u32 action,
 		    u32 flow_tag,
 		    struct mlx5_flow_destination *dest)
@@ -1168,22 +1179,23 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
 	fs_for_each_fg(g, ft)
 		if (compare_match_criteria(g->mask.match_criteria_enable,
-					   match_criteria_enable,
+					   spec->match_criteria_enable,
 					   g->mask.match_criteria,
-					   match_criteria)) {
-			rule = add_rule_fg(g, match_value,
+					   spec->match_criteria)) {
+			rule = add_rule_fg(g, spec->match_value,
 					   action, flow_tag, dest);
 			if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
 				goto unlock;
 		}
 
-	g = create_autogroup(ft, match_criteria_enable, match_criteria);
+	g = create_autogroup(ft, spec->match_criteria_enable,
+			     spec->match_criteria);
 	if (IS_ERR(g)) {
 		rule = (void *)g;
 		goto unlock;
 	}
 
-	rule = add_rule_fg(g, match_value,
+	rule = add_rule_fg(g, spec->match_value,
 			   action, flow_tag, dest);
 	if (IS_ERR(rule)) {
 		/* Remove assumes refcount > 0 and autogroup creates a group
@@ -1207,9 +1219,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
 
 struct mlx5_flow_rule *
 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   u8 match_criteria_enable,
-		   u32 *match_criteria,
-		   u32 *match_value,
+		   struct mlx5_flow_spec *spec,
 		   u32 action,
 		   u32 flow_tag,
 		   struct mlx5_flow_destination *dest)
@@ -1240,8 +1250,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 		}
 	}
 
-	rule =	_mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
-				    match_value, action, flow_tag, dest);
+	rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest);
 
 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!IS_ERR_OR_NULL(rule) &&
@@ -1359,40 +1368,47 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 						    enum mlx5_flow_namespace_type type)
 {
-	struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	struct mlx5_flow_root_namespace *root_ns;
 	int prio;
 	struct fs_prio *fs_prio;
 	struct mlx5_flow_namespace *ns;
 
-	if (!root_ns)
+	if (!steering)
 		return NULL;
 
 	switch (type) {
 	case MLX5_FLOW_NAMESPACE_BYPASS:
+	case MLX5_FLOW_NAMESPACE_OFFLOADS:
+	case MLX5_FLOW_NAMESPACE_ETHTOOL:
 	case MLX5_FLOW_NAMESPACE_KERNEL:
 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
 	case MLX5_FLOW_NAMESPACE_ANCHOR:
 		prio = type;
 		break;
 	case MLX5_FLOW_NAMESPACE_FDB:
-		if (dev->priv.fdb_root_ns)
-			return &dev->priv.fdb_root_ns->ns;
+		if (steering->fdb_root_ns)
+			return &steering->fdb_root_ns->ns;
 		else
 			return NULL;
 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
-		if (dev->priv.esw_egress_root_ns)
-			return &dev->priv.esw_egress_root_ns->ns;
+		if (steering->esw_egress_root_ns)
+			return &steering->esw_egress_root_ns->ns;
 		else
 			return NULL;
 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
-		if (dev->priv.esw_ingress_root_ns)
-			return &dev->priv.esw_ingress_root_ns->ns;
+		if (steering->esw_ingress_root_ns)
+			return &steering->esw_ingress_root_ns->ns;
 		else
 			return NULL;
 	default:
 		return NULL;
 	}
 
+	root_ns = steering->root_ns;
+	if (!root_ns)
+		return NULL;
+
 	fs_prio = find_prio(&root_ns->ns, prio);
 	if (!fs_prio)
 		return NULL;
@@ -1478,13 +1494,13 @@ static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
 	return true;
 }
 
-static int init_root_tree_recursive(struct mlx5_core_dev *dev,
+static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
 				    struct init_tree_node *init_node,
 				    struct fs_node *fs_parent_node,
 				    struct init_tree_node *init_parent_node,
 				    int prio)
 {
-	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
+	int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
 					      flow_table_properties_nic_receive.
 					      max_ft_level);
 	struct mlx5_flow_namespace *fs_ns;
@@ -1495,7 +1511,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev,
 
 	if (init_node->type == FS_TYPE_PRIO) {
 		if ((init_node->min_ft_level > max_ft_level) ||
-		    !has_required_caps(dev, &init_node->caps))
+		    !has_required_caps(steering->dev, &init_node->caps))
 			return 0;
 
 		fs_get_obj(fs_ns, fs_parent_node);
@@ -1516,7 +1532,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev,
 	}
 	prio = 0;
 	for (i = 0; i < init_node->ar_size; i++) {
-		err = init_root_tree_recursive(dev, &init_node->children[i],
+		err = init_root_tree_recursive(steering, &init_node->children[i],
 					       base, init_node, prio);
 		if (err)
 			return err;
@@ -1529,7 +1545,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev,
 	return 0;
 }
 
-static int init_root_tree(struct mlx5_core_dev *dev,
+static int init_root_tree(struct mlx5_flow_steering *steering,
 			  struct init_tree_node *init_node,
 			  struct fs_node *fs_parent_node)
 {
@@ -1539,7 +1555,7 @@ static int init_root_tree(struct mlx5_core_dev *dev,
 
 	fs_get_obj(fs_ns, fs_parent_node);
 	for (i = 0; i < init_node->ar_size; i++) {
-		err = init_root_tree_recursive(dev, &init_node->children[i],
+		err = init_root_tree_recursive(steering, &init_node->children[i],
 					       &fs_ns->node,
 					       init_node, i);
 		if (err)
@@ -1548,7 +1564,7 @@ static int init_root_tree(struct mlx5_core_dev *dev,
 	return 0;
 }
 
-static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
+static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering,
 						       enum fs_flow_table_type
 						       table_type)
 {
@@ -1560,7 +1576,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev
 	if (!root_ns)
 		return NULL;
 
-	root_ns->dev = dev;
+	root_ns->dev = steering->dev;
 	root_ns->table_type = table_type;
 
 	ns = &root_ns->ns;
@@ -1615,220 +1631,135 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
 #define ANCHOR_PRIO 0
 #define ANCHOR_SIZE 1
 #define ANCHOR_LEVEL 0
-static int create_anchor_flow_table(struct mlx5_core_dev
-							*dev)
+static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
 {
 	struct mlx5_flow_namespace *ns = NULL;
 	struct mlx5_flow_table *ft;
 
-	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR);
+	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
 	if (!ns)
 		return -EINVAL;
 	ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL);
 	if (IS_ERR(ft)) {
-		mlx5_core_err(dev, "Failed to create last anchor flow table");
+		mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
 		return PTR_ERR(ft);
 	}
 	return 0;
 }
 
-static int init_root_ns(struct mlx5_core_dev *dev)
+static int init_root_ns(struct mlx5_flow_steering *steering)
 {
 
-	dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX);
-	if (IS_ERR_OR_NULL(dev->priv.root_ns))
+	steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
+	if (IS_ERR_OR_NULL(steering->root_ns))
 		goto cleanup;
 
-	if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node))
+	if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
 		goto cleanup;
 
-	set_prio_attrs(dev->priv.root_ns);
+	set_prio_attrs(steering->root_ns);
 
-	if (create_anchor_flow_table(dev))
+	if (create_anchor_flow_table(steering))
 		goto cleanup;
 
 	return 0;
 
 cleanup:
-	mlx5_cleanup_fs(dev);
+	mlx5_cleanup_fs(steering->dev);
 	return -ENOMEM;
 }
 
-static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
-					struct mlx5_flow_root_namespace *root_ns)
+static void clean_tree(struct fs_node *node)
 {
-	struct fs_node *prio;
-
-	if (!root_ns)
-		return;
+	if (node) {
+		struct fs_node *iter;
+		struct fs_node *temp;
 
-	if (!list_empty(&root_ns->ns.node.children)) {
-		prio = list_first_entry(&root_ns->ns.node.children,
-					struct fs_node,
-				 list);
-		if (tree_remove_node(prio))
-			mlx5_core_warn(dev,
-				       "Flow steering priority wasn't destroyed, refcount > 1\n");
+		list_for_each_entry_safe(iter, temp, &node->children, list)
+			clean_tree(iter);
+		tree_remove_node(node);
 	}
-	if (tree_remove_node(&root_ns->ns.node))
-		mlx5_core_warn(dev,
-			       "Flow steering namespace wasn't destroyed, refcount > 1\n");
-	root_ns = NULL;
 }
 
-static void destroy_flow_tables(struct fs_prio *prio)
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
 {
-	struct mlx5_flow_table *iter;
-	struct mlx5_flow_table *tmp;
-
-	fs_for_each_ft_safe(iter, tmp, prio)
-		mlx5_destroy_flow_table(iter);
-}
-
-static void cleanup_root_ns(struct mlx5_core_dev *dev)
-{
-	struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
-	struct fs_prio *iter_prio;
-
-	if (!MLX5_CAP_GEN(dev, nic_flow_table))
-		return;
-
 	if (!root_ns)
 		return;
 
-	/* stage 1 */
-	fs_for_each_prio(iter_prio, &root_ns->ns) {
-		struct fs_node *node;
-		struct mlx5_flow_namespace *iter_ns;
-
-		fs_for_each_ns_or_ft(node, iter_prio) {
-			if (node->type == FS_TYPE_FLOW_TABLE)
-				continue;
-			fs_get_obj(iter_ns, node);
-			while (!list_empty(&iter_ns->node.children)) {
-				struct fs_prio *obj_iter_prio2;
-				struct fs_node *iter_prio2 =
-					list_first_entry(&iter_ns->node.children,
-							 struct fs_node,
-							 list);
-
-				fs_get_obj(obj_iter_prio2, iter_prio2);
-				destroy_flow_tables(obj_iter_prio2);
-				if (tree_remove_node(iter_prio2)) {
-					mlx5_core_warn(dev,
-						       "Priority %d wasn't destroyed, refcount > 1\n",
-						       obj_iter_prio2->prio);
-					return;
-				}
-			}
-		}
-	}
-
-	/* stage 2 */
-	fs_for_each_prio(iter_prio, &root_ns->ns) {
-		while (!list_empty(&iter_prio->node.children)) {
-			struct fs_node *iter_ns =
-				list_first_entry(&iter_prio->node.children,
-						 struct fs_node,
-						 list);
-			if (tree_remove_node(iter_ns)) {
-				mlx5_core_warn(dev,
-					       "Namespace wasn't destroyed, refcount > 1\n");
-				return;
-			}
-		}
-	}
-
-	/* stage 3 */
-	while (!list_empty(&root_ns->ns.node.children)) {
-		struct fs_prio *obj_prio_node;
-		struct fs_node *prio_node =
-			list_first_entry(&root_ns->ns.node.children,
-					 struct fs_node,
-					 list);
-
-		fs_get_obj(obj_prio_node, prio_node);
-		if (tree_remove_node(prio_node)) {
-			mlx5_core_warn(dev,
-				       "Priority %d wasn't destroyed, refcount > 1\n",
-				       obj_prio_node->prio);
-			return;
-		}
-	}
-
-	if (tree_remove_node(&root_ns->ns.node)) {
-		mlx5_core_warn(dev,
-			       "root namespace wasn't destroyed, refcount > 1\n");
-		return;
-	}
-
-	dev->priv.root_ns = NULL;
+	clean_tree(&root_ns->ns.node);
 }
 
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 {
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+
 	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 		return;
 
-	cleanup_root_ns(dev);
-	cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
-	cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns);
-	cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns);
+	cleanup_root_ns(steering->root_ns);
+	cleanup_root_ns(steering->esw_egress_root_ns);
+	cleanup_root_ns(steering->esw_ingress_root_ns);
+	cleanup_root_ns(steering->fdb_root_ns);
 	mlx5_cleanup_fc_stats(dev);
+	kfree(steering);
 }
 
-static int init_fdb_root_ns(struct mlx5_core_dev *dev)
+static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
 
-	dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB);
-	if (!dev->priv.fdb_root_ns)
+	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
+	if (!steering->fdb_root_ns)
 		return -ENOMEM;
 
-	/* Create single prio */
-	prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1);
-	if (IS_ERR(prio)) {
-		cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
-		return PTR_ERR(prio);
-	} else {
-		return 0;
-	}
+	prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1);
+	if (IS_ERR(prio))
+		goto out_err;
+
+	prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+	if (IS_ERR(prio))
+		goto out_err;
+
+	set_prio_attrs(steering->fdb_root_ns);
+	return 0;
+
+out_err:
+	cleanup_root_ns(steering->fdb_root_ns);
+	steering->fdb_root_ns = NULL;
+	return PTR_ERR(prio);
 }
 
-static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
+static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
 
-	dev->priv.esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL);
-	if (!dev->priv.esw_egress_root_ns)
+	steering->esw_egress_root_ns = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
+	if (!steering->esw_egress_root_ns)
 		return -ENOMEM;
 
 	/* create 1 prio*/
-	prio = fs_create_prio(&dev->priv.esw_egress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev));
-	if (IS_ERR(prio))
-		return PTR_ERR(prio);
-	else
-		return 0;
+	prio = fs_create_prio(&steering->esw_egress_root_ns->ns, 0,
+			      MLX5_TOTAL_VPORTS(steering->dev));
+	return PTR_ERR_OR_ZERO(prio);
 }
 
-static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
+static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
 
-	dev->priv.esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL);
-	if (!dev->priv.esw_ingress_root_ns)
+	steering->esw_ingress_root_ns = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
+	if (!steering->esw_ingress_root_ns)
 		return -ENOMEM;
 
 	/* create 1 prio*/
-	prio = fs_create_prio(&dev->priv.esw_ingress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev));
-	if (IS_ERR(prio))
-		return PTR_ERR(prio);
-	else
-		return 0;
+	prio = fs_create_prio(&steering->esw_ingress_root_ns->ns, 0,
+			      MLX5_TOTAL_VPORTS(steering->dev));
+	return PTR_ERR_OR_ZERO(prio);
 }
 
 int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
+	struct mlx5_flow_steering *steering;
 	int err = 0;
 
 	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
@@ -1838,26 +1769,32 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 	if (err)
 		return err;
 
+	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+	if (!steering)
+		return -ENOMEM;
+	steering->dev = dev;
+	dev->priv.steering = steering;
+
 	if (MLX5_CAP_GEN(dev, nic_flow_table) &&
 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
-		err = init_root_ns(dev);
+		err = init_root_ns(steering);
 		if (err)
 			goto err;
 	}
 
 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
-			err = init_fdb_root_ns(dev);
+			err = init_fdb_root_ns(steering);
 			if (err)
 				goto err;
 		}
 		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
-			err = init_egress_acl_root_ns(dev);
+			err = init_egress_acl_root_ns(steering);
 			if (err)
 				goto err;
 		}
 		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
-			err = init_ingress_acl_root_ns(dev);
+			err = init_ingress_acl_root_ns(steering);
 			if (err)
 				goto err;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index aa41a7314691..9cffb6aeb4e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -55,6 +55,14 @@ enum fs_fte_status {
 	FS_FTE_STATUS_EXISTING = 1UL << 0,
 };
 
+struct mlx5_flow_steering {
+	struct mlx5_core_dev *dev;
+	struct mlx5_flow_root_namespace *root_ns;
+	struct mlx5_flow_root_namespace *fdb_root_ns;
+	struct mlx5_flow_root_namespace *esw_egress_root_ns;
+	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
+};
+
 struct fs_node {
 	struct list_head	list;
 	struct list_head	children;
@@ -103,6 +111,7 @@ struct mlx5_fc_cache {
 };
 
 struct mlx5_fc {
+	struct rb_node node;
 	struct list_head list;
 
 	/* last{packets,bytes} members are used when calculating the delta since
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 164dc37fda72..c2877e9de8a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -32,6 +32,7 @@
 
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
 #include "mlx5_core.h"
 #include "fs_core.h"
 #include "fs_cmd.h"
@@ -68,32 +69,108 @@
  *   elapsed, the thread will actually query the hardware.
  */
 
+static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+{
+	struct rb_node **new = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*new) {
+		struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node);
+		int result = counter->id - this->id;
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&counter->node, parent, new);
+	rb_insert_color(&counter->node, root);
+}
+
+static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
+					   struct mlx5_fc *first,
+					   u16 last_id)
+{
+	struct mlx5_cmd_fc_bulk *b;
+	struct rb_node *node = NULL;
+	u16 afirst_id;
+	int num;
+	int err;
+	int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk);
+
+	/* first id must be aligned to 4 when using bulk query */
+	afirst_id = first->id & ~0x3;
+
+	/* number of counters to query inc. the last counter */
+	num = ALIGN(last_id - afirst_id + 1, 4);
+	if (num > max_bulk) {
+		num = max_bulk;
+		last_id = afirst_id + num - 1;
+	}
+
+	b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
+	if (!b) {
+		mlx5_core_err(dev, "Error allocating resources for bulk query\n");
+		return NULL;
+	}
+
+	err = mlx5_cmd_fc_bulk_query(dev, b);
+	if (err) {
+		mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+		goto out;
+	}
+
+	for (node = &first->node; node; node = rb_next(node)) {
+		struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
+		struct mlx5_fc_cache *c = &counter->cache;
+
+		if (counter->id > last_id)
+			break;
+
+		mlx5_cmd_fc_bulk_get(dev, b,
+				     counter->id, &c->packets, &c->bytes);
+	}
+
+out:
+	mlx5_cmd_fc_bulk_free(b);
+
+	return node;
+}
+
 static void mlx5_fc_stats_work(struct work_struct *work)
 {
 	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
 						 priv.fc_stats.work.work);
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	unsigned long now = jiffies;
-	struct mlx5_fc *counter;
-	struct mlx5_fc *tmp;
-	int err = 0;
+	struct mlx5_fc *counter = NULL;
+	struct mlx5_fc *last = NULL;
+	struct rb_node *node;
+	LIST_HEAD(tmplist);
 
 	spin_lock(&fc_stats->addlist_lock);
 
-	list_splice_tail_init(&fc_stats->addlist, &fc_stats->list);
+	list_splice_tail_init(&fc_stats->addlist, &tmplist);
 
-	if (!list_empty(&fc_stats->list))
+	if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
 		queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD);
 
 	spin_unlock(&fc_stats->addlist_lock);
 
-	list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
-		struct mlx5_fc_cache *c = &counter->cache;
-		u64 packets;
-		u64 bytes;
+	list_for_each_entry(counter, &tmplist, list)
+		mlx5_fc_stats_insert(&fc_stats->counters, counter);
+
+	node = rb_first(&fc_stats->counters);
+	while (node) {
+		counter = rb_entry(node, struct mlx5_fc, node);
+
+		node = rb_next(node);
 
 		if (counter->deleted) {
-			list_del(&counter->list);
+			rb_erase(&counter->node, &fc_stats->counters);
 
 			mlx5_cmd_fc_free(dev, counter->id);
 
@@ -101,26 +178,20 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 			continue;
 		}
 
-		if (time_before(now, fc_stats->next_query))
-			continue;
+		last = counter;
+	}
 
-		err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes);
-		if (err) {
-			pr_err("Error querying stats for counter id %d\n",
-			       counter->id);
-			continue;
-		}
+	if (time_before(now, fc_stats->next_query) || !last)
+		return;
 
-		if (packets == c->packets)
-			continue;
+	node = rb_first(&fc_stats->counters);
+	while (node) {
+		counter = rb_entry(node, struct mlx5_fc, node);
 
-		c->lastuse = jiffies;
-		c->packets = packets;
-		c->bytes   = bytes;
+		node = mlx5_fc_stats_query(dev, counter, last->id);
 	}
 
-	if (time_after_eq(now, fc_stats->next_query))
-		fc_stats->next_query = now + MLX5_FC_STATS_PERIOD;
+	fc_stats->next_query = now + MLX5_FC_STATS_PERIOD;
 }
 
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
@@ -176,7 +247,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 
-	INIT_LIST_HEAD(&fc_stats->list);
+	fc_stats->counters = RB_ROOT;
 	INIT_LIST_HEAD(&fc_stats->addlist);
 	spin_lock_init(&fc_stats->addlist_lock);
 
@@ -194,20 +265,32 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	struct mlx5_fc *counter;
 	struct mlx5_fc *tmp;
+	struct rb_node *node;
 
 	cancel_delayed_work_sync(&dev->priv.fc_stats.work);
 	destroy_workqueue(dev->priv.fc_stats.wq);
 	dev->priv.fc_stats.wq = NULL;
 
-	list_splice_tail_init(&fc_stats->addlist, &fc_stats->list);
-
-	list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
+	list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
 		list_del(&counter->list);
 
 		mlx5_cmd_fc_free(dev, counter->id);
 
 		kfree(counter);
 	}
+
+	node = rb_first(&fc_stats->counters);
+	while (node) {
+		counter = rb_entry(node, struct mlx5_fc, node);
+
+		node = rb_next(node);
+
+		rb_erase(&counter->node, &fc_stats->counters);
+
+		mlx5_cmd_fc_free(dev, counter->id);
+
+		kfree(counter);
+	}
 }
 
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 75c7ae6a5cc4..77fc1aa26114 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -151,6 +151,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 			return err;
 	}
 
+	if (MLX5_CAP_GEN(dev, qos)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_QOS);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 42d16b9458e4..1a05fb965c8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev)
 
 void mlx5_enter_error_state(struct mlx5_core_dev *dev)
 {
+	mutex_lock(&dev->intf_state_mutex);
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
-		return;
+		goto unlock;
 
 	mlx5_core_err(dev, "start\n");
-	if (pci_channel_offline(dev->pdev) || in_fatal(dev))
+	if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+		trigger_cmd_completions(dev);
+	}
 
 	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
 	mlx5_core_err(dev, "end\n");
+
+unlock:
+	mutex_unlock(&dev->intf_state_mutex);
 }
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
@@ -245,7 +251,6 @@ static void poll_health(unsigned long data)
 	u32 count;
 
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		trigger_cmd_completions(dev);
 		mod_timer(&health->timer, get_next_poll_jiffies());
 		return;
 	}
@@ -267,7 +272,7 @@ static void poll_health(unsigned long data)
 	if (in_fatal(dev) && !health->sick) {
 		health->sick = true;
 		print_health_info(dev);
-		queue_work(health->wq, &health->work);
+		schedule_work(&health->work);
 	}
 }
 
@@ -296,7 +301,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 
-	destroy_workqueue(health->wq);
+	flush_work(&health->work);
 }
 
 int mlx5_health_init(struct mlx5_core_dev *dev)
@@ -311,10 +316,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 
 	strcpy(name, "mlx5_health");
 	strcat(name, dev_name(&dev->pdev->dev));
-	health->wq = create_singlethread_workqueue(name);
 	kfree(name);
-	if (!health->wq)
-		return -ENOMEM;
 
 	INIT_WORK(&health->work, health_care);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a19b59348dd6..4f491d43e77d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -51,6 +51,7 @@
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
+#include <net/devlink.h>
 #include "mlx5_core.h"
 #include "fs_core.h"
 #ifdef CONFIG_MLX5_CORE_EN
@@ -1144,6 +1145,13 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		dev_err(&pdev->dev, "Failed to init flow steering\n");
 		goto err_fs;
 	}
+
+	err = mlx5_init_rl_table(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init rate limiting\n");
+		goto err_rl;
+	}
+
 #ifdef CONFIG_MLX5_CORE_EN
 	err = mlx5_eswitch_init(dev);
 	if (err) {
@@ -1183,6 +1191,8 @@ err_sriov:
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
 #endif
 err_reg_dev:
+	mlx5_cleanup_rl_table(dev);
+err_rl:
 	mlx5_cleanup_fs(dev);
 err_fs:
 	mlx5_cleanup_mkey_table(dev);
@@ -1253,6 +1263,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
 #endif
 
+	mlx5_cleanup_rl_table(dev);
 	mlx5_cleanup_fs(dev);
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
@@ -1305,19 +1316,28 @@ struct mlx5_core_event_handler {
 		      void *data);
 };
 
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_CORE_EN
+	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+#endif
+};
 
 static int init_one(struct pci_dev *pdev,
 		    const struct pci_device_id *id)
 {
 	struct mlx5_core_dev *dev;
+	struct devlink *devlink;
 	struct mlx5_priv *priv;
 	int err;
 
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev) {
+	devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
+	if (!devlink) {
 		dev_err(&pdev->dev, "kzalloc failed\n");
 		return -ENOMEM;
 	}
+
+	dev = devlink_priv(devlink);
 	priv = &dev->priv;
 	priv->pci_dev_data = id->driver_data;
 
@@ -1354,15 +1374,21 @@ static int init_one(struct pci_dev *pdev,
 		goto clean_health;
 	}
 
+	err = devlink_register(devlink, &pdev->dev);
+	if (err)
+		goto clean_load;
+
 	return 0;
 
+clean_load:
+	mlx5_unload_one(dev, priv);
 clean_health:
 	mlx5_health_cleanup(dev);
 close_pci:
 	mlx5_pci_close(dev, priv);
 clean_dev:
 	pci_set_drvdata(pdev, NULL);
-	kfree(dev);
+	devlink_free(devlink);
 
 	return err;
 }
@@ -1370,8 +1396,10 @@ clean_dev:
 static void remove_one(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	struct devlink *devlink = priv_to_devlink(dev);
 	struct mlx5_priv *priv = &dev->priv;
 
+	devlink_unregister(devlink);
 	if (mlx5_unload_one(dev, priv)) {
 		dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
 		mlx5_health_cleanup(dev);
@@ -1380,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev)
 	mlx5_health_cleanup(dev);
 	mlx5_pci_close(dev, priv);
 	pci_set_drvdata(pdev, NULL);
-	kfree(dev);
+	devlink_free(devlink);
 }
 
 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
@@ -1422,46 +1450,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
 	mlx5_pci_err_detected(dev->pdev, 0);
 }
 
-/* wait for the device to show vital signs. For now we check
- * that we can read the device ID and that the health buffer
- * shows a non zero value which is different than 0xffffffff
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
  */
-static void wait_vital(struct pci_dev *pdev)
+static int wait_vital(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 	struct mlx5_core_health *health = &dev->priv.health;
 	const int niter = 100;
+	u32 last_count = 0;
 	u32 count;
-	u16 did;
 	int i;
 
-	/* Wait for firmware to be ready after reset */
-	msleep(1000);
-	for (i = 0; i < niter; i++) {
-		if (pci_read_config_word(pdev, 2, &did)) {
-			dev_warn(&pdev->dev, "failed reading config word\n");
-			break;
-		}
-		if (did == pdev->device) {
-			dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
-			break;
-		}
-		msleep(50);
-	}
-	if (i == niter)
-		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
-
 	for (i = 0; i < niter; i++) {
 		count = ioread32be(health->health_counter);
 		if (count && count != 0xffffffff) {
-			dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
-			break;
+			if (last_count && last_count != count) {
+				dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+				return 0;
+			}
+			last_count = count;
 		}
 		msleep(50);
 	}
 
-	if (i == niter)
-		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+	return -ETIMEDOUT;
 }
 
 static void mlx5_pci_resume(struct pci_dev *pdev)
@@ -1473,7 +1486,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
 	dev_info(&pdev->dev, "%s was called\n", __func__);
 
 	pci_save_state(pdev);
-	wait_vital(pdev);
+	err = wait_vital(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+		return;
+	}
 
 	err = mlx5_load_one(dev, priv);
 	if (err)
@@ -1508,8 +1525,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4 VF */
 	{ PCI_VDEVICE(MELLANOX, 0x1015) },			/* ConnectX-4LX */
 	{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4LX VF */
-	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5 */
+	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5, PCIe 3.0 */
 	{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 VF */
+	{ PCI_VDEVICE(MELLANOX, 0x1019) },			/* ConnectX-5, PCIe 4.0 */
 	{ 0, }
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 9eeee0545f1c..32dea3524cee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -345,7 +345,6 @@ retry:
 			       func_id, npages, err);
 		goto out_4k;
 	}
-	dev->priv.fw_pages += npages;
 
 	err = mlx5_cmd_status_to_err(&out.hdr);
 	if (err) {
@@ -373,6 +372,33 @@ out_free:
 	return err;
 }
 
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+			     struct mlx5_manage_pages_inbox *in, int in_size,
+			     struct mlx5_manage_pages_outbox *out, int out_size)
+{
+	struct fw_page *fwp;
+	struct rb_node *p;
+	u32 npages;
+	u32 i = 0;
+
+	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
+						  (u32 *)out, out_size);
+
+	npages = be32_to_cpu(in->num_entries);
+
+	p = rb_first(&dev->priv.page_root);
+	while (p && i < npages) {
+		fwp = rb_entry(p, struct fw_page, rb_node);
+		out->pas[i] = cpu_to_be64(fwp->addr);
+		p = rb_next(p);
+		i++;
+	}
+
+	out->num_entries = cpu_to_be32(i);
+	return 0;
+}
+
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 			 int *nclaimed)
 {
@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	in.func_id = cpu_to_be16(func_id);
 	in.num_entries = cpu_to_be32(npages);
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+	err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
 	if (err) {
-		mlx5_core_err(dev, "failed reclaiming pages\n");
-		goto out_free;
-	}
-	dev->priv.fw_pages -= npages;
-
-	if (out->hdr.status) {
-		err = mlx5_cmd_status_to_err(&out->hdr);
+		mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
 		goto out_free;
 	}
 
@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 		err = -EINVAL;
 		goto out_free;
 	}
-	if (nclaimed)
-		*nclaimed = num_claimed;
 
 	for (i = 0; i < num_claimed; i++) {
 		addr = be64_to_cpu(out->pas[i]);
 		free_4k(dev, addr);
 	}
+
+	if (nclaimed)
+		*nclaimed = num_claimed;
+
 	dev->priv.fw_pages -= num_claimed;
 	if (func_id)
 		dev->priv.vfs_pages -= num_claimed;
@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 		p = rb_first(&dev->priv.page_root);
 		if (p) {
 			fwp = rb_entry(p, struct fw_page, rb_node);
-			if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-				free_4k(dev, fwp->addr);
-				nclaimed = 1;
-			} else {
-				err = reclaim_pages(dev, fwp->func_id,
-						    optimal_reclaimed_pages(),
-						    &nclaimed);
-			}
+			err = reclaim_pages(dev, fwp->func_id,
+					    optimal_reclaimed_pages(),
+					    &nclaimed);
+
 			if (err) {
 				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
 					       err);
@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 		}
 	} while (p);
 
+	WARN(dev->priv.fw_pages,
+	     "FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.fw_pages);
+	WARN(dev->priv.vfs_pages,
+	     "VFs FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.vfs_pages);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 3e35611b19c3..752c08127138 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -202,15 +202,24 @@ int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper);
 
-int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
-			int proto_mask)
+int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
+		       u32 proto_admin, int proto_mask)
 {
-	u32 in[MLX5_ST_SZ_DW(ptys_reg)];
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+	u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+	u8 an_disable_admin;
+	u8 an_disable_cap;
+	u8 an_status;
+
+	mlx5_query_port_autoneg(dev, proto_mask, &an_status,
+				&an_disable_cap, &an_disable_admin);
+	if (!an_disable_cap && an_disable)
+		return -EPERM;
 
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(ptys_reg, in, local_port, 1);
+	MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
 	MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
 	if (proto_mask == MLX5_PTYS_EN)
 		MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
@@ -220,7 +229,19 @@ int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
 	return mlx5_core_access_reg(dev, in, sizeof(in), out,
 				    sizeof(out), MLX5_REG_PTYS, 0, 1);
 }
-EXPORT_SYMBOL_GPL(mlx5_set_port_proto);
+EXPORT_SYMBOL_GPL(mlx5_set_port_ptys);
+
+/* This function should be used after setting a port register only */
+void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
+{
+	enum mlx5_port_status ps;
+
+	mlx5_query_port_admin_status(dev, &ps);
+	mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN);
+	if (ps == MLX5_PORT_UP)
+		mlx5_set_port_admin_status(dev, MLX5_PORT_UP);
+}
+EXPORT_SYMBOL_GPL(mlx5_toggle_port_link);
 
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 			       enum mlx5_port_status status)
@@ -518,6 +539,25 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
 
+void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
+			     u8 *an_status,
+			     u8 *an_disable_cap, u8 *an_disable_admin)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+	*an_status = 0;
+	*an_disable_cap = 0;
+	*an_disable_admin = 0;
+
+	if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1))
+		return;
+
+	*an_status = MLX5_GET(ptys_reg, out, an_status);
+	*an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+	*an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg);
+
 int mlx5_max_tc(struct mlx5_core_dev *mdev)
 {
 	u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
new file mode 100644
index 000000000000..c07c28bd3d55
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+/* Finds an entry where we can register the given rate
+ * If the rate already exists, return the entry where it is registered,
+ * otherwise return the first available entry.
+ * If the table is full, return NULL
+ */
+static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
+					   u32 rate)
+{
+	struct mlx5_rl_entry *ret_entry = NULL;
+	bool empty_found = false;
+	int i;
+
+	for (i = 0; i < table->max_size; i++) {
+		if (table->rl_entry[i].rate == rate)
+			return &table->rl_entry[i];
+		if (!empty_found && !table->rl_entry[i].rate) {
+			empty_found = true;
+			ret_entry = &table->rl_entry[i];
+		}
+	}
+
+	return ret_entry;
+}
+
+static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
+				   u32 rate, u16 index)
+{
+	u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)];
+	u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)];
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(set_rate_limit_in, in, opcode,
+		 MLX5_CMD_OP_SET_RATE_LIMIT);
+	MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
+	MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
+
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
+					  out, sizeof(out));
+}
+
+bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
+{
+	struct mlx5_rl_table *table = &dev->priv.rl_table;
+
+	return (rate <= table->max_rate && rate >= table->min_rate);
+}
+EXPORT_SYMBOL(mlx5_rl_is_in_range);
+
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
+{
+	struct mlx5_rl_table *table = &dev->priv.rl_table;
+	struct mlx5_rl_entry *entry;
+	int err = 0;
+
+	mutex_lock(&table->rl_lock);
+
+	if (!rate || !mlx5_rl_is_in_range(dev, rate)) {
+		mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
+			      rate, table->min_rate, table->max_rate);
+		err = -EINVAL;
+		goto out;
+	}
+
+	entry = find_rl_entry(table, rate);
+	if (!entry) {
+		mlx5_core_err(dev, "Max number of %u rates reached\n",
+			      table->max_size);
+		err = -ENOSPC;
+		goto out;
+	}
+	if (entry->refcount) {
+		/* rate already configured */
+		entry->refcount++;
+	} else {
+		/* new rate limit */
+		err = mlx5_set_rate_limit_cmd(dev, rate, entry->index);
+		if (err) {
+			mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
+				      rate, err);
+			goto out;
+		}
+		entry->rate = rate;
+		entry->refcount = 1;
+	}
+	*index = entry->index;
+
+out:
+	mutex_unlock(&table->rl_lock);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_rl_add_rate);
+
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
+{
+	struct mlx5_rl_table *table = &dev->priv.rl_table;
+	struct mlx5_rl_entry *entry = NULL;
+
+	/* 0 is a reserved value for unlimited rate */
+	if (rate == 0)
+		return;
+
+	mutex_lock(&table->rl_lock);
+	entry = find_rl_entry(table, rate);
+	if (!entry || !entry->refcount) {
+		mlx5_core_warn(dev, "Rate %u is not configured\n", rate);
+		goto out;
+	}
+
+	entry->refcount--;
+	if (!entry->refcount) {
+		/* need to remove rate */
+		mlx5_set_rate_limit_cmd(dev, 0, entry->index);
+		entry->rate = 0;
+	}
+
+out:
+	mutex_unlock(&table->rl_lock);
+}
+EXPORT_SYMBOL(mlx5_rl_remove_rate);
+
+int mlx5_init_rl_table(struct mlx5_core_dev *dev)
+{
+	struct mlx5_rl_table *table = &dev->priv.rl_table;
+	int i;
+
+	mutex_init(&table->rl_lock);
+	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
+		table->max_size = 0;
+		return 0;
+	}
+
+	/* First entry is reserved for unlimited rate */
+	table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1;
+	table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
+	table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);
+
+	table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
+				  GFP_KERNEL);
+	if (!table->rl_entry)
+		return -ENOMEM;
+
+	/* The index represents the index in HW rate limit table
+	 * Index 0 is reserved for unlimited rate
+	 */
+	for (i = 0; i < table->max_size; i++)
+		table->rl_entry[i].index = i + 1;
+
+	/* Index 0 is reserved */
+	mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n",
+		       table->max_size,
+		       table->min_rate >> 10,
+		       table->max_rate >> 10);
+
+	return 0;
+}
+
+void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
+{
+	struct mlx5_rl_table *table = &dev->priv.rl_table;
+	int i;
+
+	/* Clear all configured rates */
+	for (i = 0; i < table->max_size; i++)
+		if (table->rl_entry[i].rate)
+			mlx5_set_rate_limit_cmd(dev, 0,
+						table->rl_entry[i].index);
+
+	kfree(dev->priv.rl_table.rl_entry);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index d6a3f412ba9f..b380a6bc1f85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -167,7 +167,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
 
 	mlx5_core_init_vfs(dev, num_vfs);
 #ifdef CONFIG_MLX5_CORE_EN
-	mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs);
+	mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
 #endif
 
 	return num_vfs;
@@ -209,7 +209,8 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
 	mlx5_core_init_vfs(dev, cur_vfs);
 #ifdef CONFIG_MLX5_CORE_EN
 	if (cur_vfs)
-		mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs);
+		mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs,
+					  SRIOV_LEGACY);
 #endif
 
 	enable_vfs(dev, cur_vfs);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index daf44cd4c566..21365d06982b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -135,6 +135,18 @@ static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
 	return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
 }
 
+void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				     u8 *min_inline_mode)
+{
+	u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+
+	mlx5_query_nic_vport_context(mdev, 0, out, sizeof(out));
+
+	*min_inline_mode = MLX5_GET(query_nic_vport_context_out, out,
+				    nic_vport_context.min_wqe_inline_mode);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
+
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr)
 {
@@ -513,7 +525,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 {
 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
 	void *nic_vport_context;
-	u8 *guid;
 	void *in;
 	int err;
 
@@ -535,8 +546,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 
 	nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
 					 in, nic_vport_context);
-	guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context,
-			    node_guid);
 	MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
 
 	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index f2fd1ef16da7..e25a73ed2981 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)];
 	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)];
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
 
 	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
 		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
@@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
 	struct mlx5e_vxlan *vxlan;
 	int err;
 
+	if (mlx5e_vxlan_lookup_port(priv, port))
+		goto free_work;
+
 	if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
 		goto free_work;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index ce21ee5b2357..821a087c7ae2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -75,14 +75,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
 	err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
 				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
@@ -111,14 +111,14 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
 	err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
 				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
@@ -148,13 +148,14 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_buf_alloc(mdev, mlx5_wq_ll_get_byte_size(wq), &wq_ctrl->buf);
+	err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 9b5ebf84c051..d20ae1838a64 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_MLXSW_SWITCHX2)	+= mlxsw_switchx2.o
 mlxsw_switchx2-objs		:= switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)	+= mlxsw_spectrum.o
 mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
-				   spectrum_switchdev.o
+				   spectrum_switchdev.o spectrum_router.o \
+				   spectrum_kvdl.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index cd63b8263688..28271bedd957 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -105,6 +105,7 @@ enum mlxsw_cmd_opcode {
 	MLXSW_CMD_OPCODE_SW2HW_EQ		= 0x013,
 	MLXSW_CMD_OPCODE_HW2SW_EQ		= 0x014,
 	MLXSW_CMD_OPCODE_QUERY_EQ		= 0x015,
+	MLXSW_CMD_OPCODE_QUERY_RESOURCES	= 0x101,
 };
 
 static inline const char *mlxsw_cmd_opcode_str(u16 opcode)
@@ -144,6 +145,8 @@ static inline const char *mlxsw_cmd_opcode_str(u16 opcode)
 		return "HW2SW_EQ";
 	case MLXSW_CMD_OPCODE_QUERY_EQ:
 		return "QUERY_EQ";
+	case MLXSW_CMD_OPCODE_QUERY_RESOURCES:
+		return "QUERY_RESOURCES";
 	default:
 		return "*UNKNOWN*";
 	}
@@ -500,6 +503,35 @@ static inline int mlxsw_cmd_unmap_fa(struct mlxsw_core *mlxsw_core)
 	return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_UNMAP_FA, 0, 0);
 }
 
+/* QUERY_RESOURCES - Query chip resources
+ * --------------------------------------
+ * OpMod == 0 (N/A) , INMmod is index
+ * ----------------------------------
+ * The QUERY_RESOURCES command retrieves information related to chip resources
+ * by resource ID. Every command returns 32 entries. INmod is being use as base.
+ * for example, index 1 will return entries 32-63. When the tables end and there
+ * are no more sources in the table, will return resource id 0xFFF to indicate
+ * it.
+ */
+static inline int mlxsw_cmd_query_resources(struct mlxsw_core *mlxsw_core,
+					    char *out_mbox, int index)
+{
+	return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_RESOURCES,
+				  0, index, false, out_mbox,
+				  MLXSW_CMD_MBOX_SIZE);
+}
+
+/* cmd_mbox_query_resource_id
+ * The resource id. 0xFFFF indicates table's end.
+ */
+MLXSW_ITEM32_INDEXED(cmd_mbox, query_resource, id, 0x00, 16, 16, 0x8, 0, false);
+
+/* cmd_mbox_query_resource_data
+ * The resource
+ */
+MLXSW_ITEM64_INDEXED(cmd_mbox, query_resource, data,
+		     0x00, 0, 40, 0x8, 0, false);
+
 /* CONFIG_PROFILE (Set) - Configure Switch Profile
  * ------------------------------
  * OpMod == 1 (Set), INMmod == 0 (N/A)
@@ -607,6 +639,24 @@ MLXSW_ITEM32(cmd_mbox, config_profile,
  */
 MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1);
 
+/* cmd_mbox_config_set_kvd_linear_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1);
+
+/* cmd_mbox_config_set_kvd_hash_single_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1);
+
+/* cmd_mbox_config_set_kvd_hash_double_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);
+
 /* cmd_mbox_config_profile_max_vepa_channels
  * Maximum number of VEPA channels per port (0 through 16)
  * 0 - multi-channel VEPA is disabled
@@ -733,6 +783,31 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16);
  */
 MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1);
 
+/* cmd_mbox_config_kvd_linear_size
+ * KVD Linear Size
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24);
+
+/* cmd_mbox_config_kvd_hash_single_size
+ * KVD Hash single-entries size
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ * Must be greater or equal to cap_min_kvd_hash_single_size
+ * Must be smaller or equal to cap_kvd_size - kvd_linear_size
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24);
+
+/* cmd_mbox_config_kvd_hash_double_size
+ * KVD Hash double-entries size (units of single-size entries)
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size
+ * Must be smaller or equal to cap_kvd_size - kvd_linear_size
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24);
+
 /* cmd_mbox_config_profile_swid_config_mask
  * Modify Switch Partition Configuration mask. When set, the configu-
  * ration value for the Switch Partition are taken from the mailbox.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index b0a0b01bb4ef..068ee65a960b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -58,6 +58,7 @@
 #include <linux/workqueue.h>
 #include <asm/byteorder.h>
 #include <net/devlink.h>
+#include <trace/events/devlink.h>
 
 #include "core.h"
 #include "item.h"
@@ -110,6 +111,7 @@ struct mlxsw_core {
 	struct {
 		u8 *mapping; /* lag_id+port_index to local_port mapping */
 	} lag;
+	struct mlxsw_resources resources;
 	struct mlxsw_hwmon *hwmon;
 	unsigned long driver_priv[0];
 	/* driver_priv has to be always the last item */
@@ -447,6 +449,10 @@ static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
 	if (!skb)
 		return -ENOMEM;
 
+	trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0,
+			    skb->data + mlxsw_core->driver->txhdr_len,
+			    skb->len - mlxsw_core->driver->txhdr_len);
+
 	atomic_set(&trans->active, 1);
 	err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info);
 	if (err) {
@@ -529,6 +535,9 @@ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port,
 	struct mlxsw_core *mlxsw_core = priv;
 	struct mlxsw_reg_trans *trans;
 
+	trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0,
+			    skb->data, skb->len);
+
 	if (!mlxsw_emad_is_resp(skb))
 		goto free_skb;
 
@@ -1102,7 +1111,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 		}
 	}
 
-	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile);
+	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
+			      &mlxsw_core->resources);
 	if (err)
 		goto err_bus_init;
 
@@ -1110,14 +1120,14 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	if (err)
 		goto err_emad_init;
 
-	err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
-	if (err)
-		goto err_hwmon_init;
-
 	err = devlink_register(devlink, mlxsw_bus_info->dev);
 	if (err)
 		goto err_devlink_register;
 
+	err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
+	if (err)
+		goto err_hwmon_init;
+
 	err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
 	if (err)
 		goto err_driver_init;
@@ -1131,9 +1141,9 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 err_debugfs_init:
 	mlxsw_core->driver->fini(mlxsw_core);
 err_driver_init:
+err_hwmon_init:
 	devlink_unregister(devlink);
 err_devlink_register:
-err_hwmon_init:
 	mlxsw_emad_fini(mlxsw_core);
 err_emad_init:
 	mlxsw_bus->fini(bus_priv);
@@ -1644,6 +1654,12 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear);
 
+struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core)
+{
+	return &mlxsw_core->resources;
+}
+EXPORT_SYMBOL(mlxsw_core_resources_get);
+
 int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core,
 			 struct mlxsw_core_port *mlxsw_core_port, u8 local_port,
 			 struct net_device *dev, bool split, u32 split_group)
@@ -1736,7 +1752,7 @@ static int __init mlxsw_core_module_init(void)
 {
 	int err;
 
-	mlxsw_wq = create_workqueue(mlxsw_core_driver_name);
+	mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
 	if (!mlxsw_wq)
 		return -ENOMEM;
 	mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 436bc49df6ab..d3476ead9982 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -190,7 +190,8 @@ struct mlxsw_config_profile {
 		used_max_ib_mc:1,
 		used_max_pkey:1,
 		used_ar_sec:1,
-		used_adaptive_routing_group_cap:1;
+		used_adaptive_routing_group_cap:1,
+		used_kvd_sizes:1;
 	u8	max_vepa_channels;
 	u16	max_lag;
 	u16	max_port_per_lag;
@@ -211,6 +212,10 @@ struct mlxsw_config_profile {
 	u8	ar_sec;
 	u16	adaptive_routing_group_cap;
 	u8	arn;
+	u32	kvd_linear_size;
+	u32	kvd_hash_single_size;
+	u32	kvd_hash_double_size;
+	u8	resource_query_enable;
 	struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
 };
 
@@ -262,10 +267,18 @@ struct mlxsw_driver {
 	const struct mlxsw_config_profile *profile;
 };
 
+struct mlxsw_resources {
+	u8	max_span_valid:1;
+	u8      max_span;
+};
+
+struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core);
+
 struct mlxsw_bus {
 	const char *kind;
 	int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core,
-		    const struct mlxsw_config_profile *profile);
+		    const struct mlxsw_config_profile *profile,
+		    struct mlxsw_resources *resources);
 	void (*fini)(void *bus_priv);
 	bool (*skb_transmit_busy)(void *bus_priv,
 				  const struct mlxsw_tx_info *tx_info);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 7f4173c8eda3..1d1360c178bb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1154,6 +1154,61 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
 	mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask);
 }
 
+#define MLXSW_RESOURCES_TABLE_END_ID 0xffff
+#define MLXSW_MAX_SPAN_ID 0x2420
+#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100
+#define MLXSW_RESOURCES_PER_QUERY 32
+
+static void mlxsw_pci_resources_query_parse(int id, u64 val,
+					    struct mlxsw_resources *resources)
+{
+	switch (id) {
+	case MLXSW_MAX_SPAN_ID:
+		resources->max_span = val;
+		resources->max_span_valid = 1;
+		break;
+	default:
+		break;
+	}
+}
+
+static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox,
+				     struct mlxsw_resources *resources,
+				     u8 query_enabled)
+{
+	int index, i;
+	u64 data;
+	u16 id;
+	int err;
+
+	/* Not all the versions support resources query */
+	if (!query_enabled)
+		return 0;
+
+	mlxsw_cmd_mbox_zero(mbox);
+
+	for (index = 0; index < MLXSW_RESOURCES_QUERY_MAX_QUERIES; index++) {
+		err = mlxsw_cmd_query_resources(mlxsw_pci->core, mbox, index);
+		if (err)
+			return err;
+
+		for (i = 0; i < MLXSW_RESOURCES_PER_QUERY; i++) {
+			id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i);
+			data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i);
+
+			if (id == MLXSW_RESOURCES_TABLE_END_ID)
+				return 0;
+
+			mlxsw_pci_resources_query_parse(id, data, resources);
+		}
+	}
+
+	/* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get
+	 * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW.
+	 */
+	return -EIO;
+}
+
 static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
 				    const struct mlxsw_config_profile *profile)
 {
@@ -1255,6 +1310,20 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
 		mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
 			mbox, profile->adaptive_routing_group_cap);
 	}
+	if (profile->used_kvd_sizes) {
+		mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(
+			mbox, 1);
+		mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(
+			mbox, profile->kvd_linear_size);
+		mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(
+			mbox, 1);
+		mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(
+			mbox, profile->kvd_hash_single_size);
+		mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set(
+			mbox, 1);
+		mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(
+			mbox, profile->kvd_hash_double_size);
+	}
 
 	for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
 		mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
@@ -1390,7 +1459,8 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci,
 }
 
 static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
-			  const struct mlxsw_config_profile *profile)
+			  const struct mlxsw_config_profile *profile,
+			  struct mlxsw_resources *resources)
 {
 	struct mlxsw_pci *mlxsw_pci = bus_priv;
 	struct pci_dev *pdev = mlxsw_pci->pdev;
@@ -1449,6 +1519,11 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 	if (err)
 		goto err_boardinfo;
 
+	err = mlxsw_pci_resources_query(mlxsw_pci, mbox, resources,
+					profile->resource_query_enable);
+	if (err)
+		goto err_query_resources;
+
 	err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile);
 	if (err)
 		goto err_config_profile;
@@ -1471,6 +1546,7 @@ err_request_eq_irq:
 	mlxsw_pci_aqs_fini(mlxsw_pci);
 err_aqs_init:
 err_config_profile:
+err_query_resources:
 err_boardinfo:
 	mlxsw_pci_fw_area_fini(mlxsw_pci);
 err_fw_area_init:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 1977e7a5c530..7ca9201f7dcb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1,9 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/reg.h
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
+ * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
- * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -386,7 +387,9 @@ enum mlxsw_reg_sfd_rec_action {
 	/* forward and trap, trap_id is FDB_TRAP */
 	MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1,
 	/* trap and do not forward, trap_id is FDB_TRAP */
-	MLXSW_REG_SFD_REC_ACTION_TRAP = 3,
+	MLXSW_REG_SFD_REC_ACTION_TRAP = 2,
+	/* forward to IP router */
+	MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER = 3,
 	MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15,
 };
 
@@ -2500,6 +2503,7 @@ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2);
 enum mlxsw_reg_ppcnt_grp {
 	MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0,
 	MLXSW_REG_PPCNT_PRIO_CNT = 0x10,
+	MLXSW_REG_PPCNT_TC_CNT = 0x11,
 };
 
 /* reg_ppcnt_grp
@@ -2700,6 +2704,23 @@ MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64);
  */
 MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64);
 
+/* Ethernet Per Traffic Group Counters */
+
+/* reg_ppcnt_tc_transmit_queue
+ * Contains the transmit queue depth in cells of traffic class
+ * selected by prio_tc and the port selected by local_port.
+ * The field cannot be cleared.
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, 0x08 + 0x00, 0, 64);
+
+/* reg_ppcnt_tc_no_buffer_discard_uc
+ * The number of unicast packets dropped due to lack of shared
+ * buffer resources.
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, 0x08 + 0x08, 0, 64);
+
 static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
 					enum mlxsw_reg_ppcnt_grp grp,
 					u8 prio_tc)
@@ -2718,7 +2739,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
  * Configures the switch priority to buffer table.
  */
 #define MLXSW_REG_PPTB_ID 0x500B
-#define MLXSW_REG_PPTB_LEN 0x0C
+#define MLXSW_REG_PPTB_LEN 0x10
 
 static const struct mlxsw_reg_info mlxsw_reg_pptb = {
 	.id = MLXSW_REG_PPTB_ID,
@@ -2784,6 +2805,13 @@ MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8);
  */
 MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4);
 
+/* reg_pptb_prio_to_buff_msb
+ * Mapping of switch priority <i+8> to one of the allocated receive port
+ * buffers.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff_msb, 0x0C, 0x04, 4);
+
 #define MLXSW_REG_PPTB_ALL_PRIO 0xFF
 
 static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port)
@@ -2792,6 +2820,14 @@ static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port)
 	mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM);
 	mlxsw_reg_pptb_local_port_set(payload, local_port);
 	mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
+	mlxsw_reg_pptb_pm_msb_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
+}
+
+static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio,
+						    u8 buff)
+{
+	mlxsw_reg_pptb_prio_to_buff_set(payload, prio, buff);
+	mlxsw_reg_pptb_prio_to_buff_msb_set(payload, prio, buff);
 }
 
 /* PBMC - Port Buffer Management Control Register
@@ -3186,6 +3222,1183 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id)
 	mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT);
 }
 
+/* RGCR - Router General Configuration Register
+ * --------------------------------------------
+ * The register is used for setting up the router configuration.
+ */
+#define MLXSW_REG_RGCR_ID 0x8001
+#define MLXSW_REG_RGCR_LEN 0x28
+
+static const struct mlxsw_reg_info mlxsw_reg_rgcr = {
+	.id = MLXSW_REG_RGCR_ID,
+	.len = MLXSW_REG_RGCR_LEN,
+};
+
+/* reg_rgcr_ipv4_en
+ * IPv4 router enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, ipv4_en, 0x00, 31, 1);
+
+/* reg_rgcr_ipv6_en
+ * IPv6 router enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, ipv6_en, 0x00, 30, 1);
+
+/* reg_rgcr_max_router_interfaces
+ * Defines the maximum number of active router interfaces for all virtual
+ * routers.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, max_router_interfaces, 0x10, 0, 16);
+
+/* reg_rgcr_usp
+ * Update switch priority and packet color.
+ * 0 - Preserve the value of Switch Priority and packet color.
+ * 1 - Recalculate the value of Switch Priority and packet color.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX and SwitchX-2.
+ */
+MLXSW_ITEM32(reg, rgcr, usp, 0x18, 20, 1);
+
+/* reg_rgcr_pcp_rw
+ * Indicates how to handle the pcp_rewrite_en value:
+ * 0 - Preserve the value of pcp_rewrite_en.
+ * 2 - Disable PCP rewrite.
+ * 3 - Enable PCP rewrite.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX and SwitchX-2.
+ */
+MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2);
+
+/* reg_rgcr_activity_dis
+ * Activity disable:
+ * 0 - Activity will be set when an entry is hit (default).
+ * 1 - Activity will not be set when an entry is hit.
+ *
+ * Bit 0 - Disable activity bit in Router Algorithmic LPM Unicast Entry
+ * (RALUE).
+ * Bit 1 - Disable activity bit in Router Algorithmic LPM Unicast Host
+ * Entry (RAUHT).
+ * Bits 2:7 are reserved.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX, SwitchX-2 and Switch-IB.
+ */
+MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8);
+
+static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en)
+{
+	MLXSW_REG_ZERO(rgcr, payload);
+	mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en);
+}
+
+/* RITR - Router Interface Table Register
+ * --------------------------------------
+ * The register is used to configure the router interface table.
+ */
+#define MLXSW_REG_RITR_ID 0x8002
+#define MLXSW_REG_RITR_LEN 0x40
+
+static const struct mlxsw_reg_info mlxsw_reg_ritr = {
+	.id = MLXSW_REG_RITR_ID,
+	.len = MLXSW_REG_RITR_LEN,
+};
+
+/* reg_ritr_enable
+ * Enables routing on the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, enable, 0x00, 31, 1);
+
+/* reg_ritr_ipv4
+ * IPv4 routing enable. Enables routing of IPv4 traffic on the router
+ * interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1);
+
+/* reg_ritr_ipv6
+ * IPv6 routing enable. Enables routing of IPv6 traffic on the router
+ * interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
+
+enum mlxsw_reg_ritr_if_type {
+	MLXSW_REG_RITR_VLAN_IF,
+	MLXSW_REG_RITR_FID_IF,
+	MLXSW_REG_RITR_SP_IF,
+};
+
+/* reg_ritr_type
+ * Router interface type.
+ * 0 - VLAN interface.
+ * 1 - FID interface.
+ * 2 - Sub-port interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3);
+
+enum {
+	MLXSW_REG_RITR_RIF_CREATE,
+	MLXSW_REG_RITR_RIF_DEL,
+};
+
+/* reg_ritr_op
+ * Opcode:
+ * 0 - Create or edit RIF.
+ * 1 - Delete RIF.
+ * Reserved for SwitchX-2. For Spectrum, editing of interface properties
+ * is not supported. An interface must be deleted and re-created in order
+ * to update properties.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ritr, op, 0x00, 20, 2);
+
+/* reg_ritr_rif
+ * Router interface index. A pointer to the Router Interface Table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ritr, rif, 0x00, 0, 16);
+
+/* reg_ritr_ipv4_fe
+ * IPv4 Forwarding Enable.
+ * Enables routing of IPv4 traffic on the router interface. When disabled,
+ * forwarding is blocked but local traffic (traps and IP2ME) will be enabled.
+ * Not supported in SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1);
+
+/* reg_ritr_ipv6_fe
+ * IPv6 Forwarding Enable.
+ * Enables routing of IPv6 traffic on the router interface. When disabled,
+ * forwarding is blocked but local traffic (traps and IP2ME) will be enabled.
+ * Not supported in SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
+
+/* reg_ritr_virtual_router
+ * Virtual router ID associated with the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, virtual_router, 0x04, 0, 16);
+
+/* reg_ritr_mtu
+ * Router interface MTU.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, mtu, 0x34, 0, 16);
+
+/* reg_ritr_if_swid
+ * Switch partition ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8);
+
+/* reg_ritr_if_mac
+ * Router interface MAC address.
+ * In Spectrum, all MAC addresses must have the same 38 MSBits.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6);
+
+/* VLAN Interface */
+
+/* reg_ritr_vlan_if_vid
+ * VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, vlan_if_vid, 0x08, 0, 12);
+
+/* FID Interface */
+
+/* reg_ritr_fid_if_fid
+ * Filtering ID. Used to connect a bridge to the router. Only FIDs from
+ * the vFID range are supported.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16);
+
+static inline void mlxsw_reg_ritr_fid_set(char *payload,
+					  enum mlxsw_reg_ritr_if_type rif_type,
+					  u16 fid)
+{
+	if (rif_type == MLXSW_REG_RITR_FID_IF)
+		mlxsw_reg_ritr_fid_if_fid_set(payload, fid);
+	else
+		mlxsw_reg_ritr_vlan_if_vid_set(payload, fid);
+}
+
+/* Sub-port Interface */
+
+/* reg_ritr_sp_if_lag
+ * LAG indication. When this bit is set the system_port field holds the
+ * LAG identifier.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1);
+
+/* reg_ritr_sp_system_port
+ * Port unique indentifier. When lag bit is set, this field holds the
+ * lag_id in bits 0:9.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16);
+
+/* reg_ritr_sp_if_vid
+ * VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12);
+
+static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif)
+{
+	MLXSW_REG_ZERO(ritr, payload);
+	mlxsw_reg_ritr_rif_set(payload, rif);
+}
+
+static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag,
+					     u16 system_port, u16 vid)
+{
+	mlxsw_reg_ritr_sp_if_lag_set(payload, lag);
+	mlxsw_reg_ritr_sp_if_system_port_set(payload, system_port);
+	mlxsw_reg_ritr_sp_if_vid_set(payload, vid);
+}
+
+static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
+				       enum mlxsw_reg_ritr_if_type type,
+				       u16 rif, u16 mtu, const char *mac)
+{
+	bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL;
+
+	MLXSW_REG_ZERO(ritr, payload);
+	mlxsw_reg_ritr_enable_set(payload, enable);
+	mlxsw_reg_ritr_ipv4_set(payload, 1);
+	mlxsw_reg_ritr_type_set(payload, type);
+	mlxsw_reg_ritr_op_set(payload, op);
+	mlxsw_reg_ritr_rif_set(payload, rif);
+	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+	mlxsw_reg_ritr_mtu_set(payload, mtu);
+	mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
+}
+
+/* RATR - Router Adjacency Table Register
+ * --------------------------------------
+ * The RATR register is used to configure the Router Adjacency (next-hop)
+ * Table.
+ */
+#define MLXSW_REG_RATR_ID 0x8008
+#define MLXSW_REG_RATR_LEN 0x2C
+
+static const struct mlxsw_reg_info mlxsw_reg_ratr = {
+	.id = MLXSW_REG_RATR_ID,
+	.len = MLXSW_REG_RATR_LEN,
+};
+
+enum mlxsw_reg_ratr_op {
+	/* Read */
+	MLXSW_REG_RATR_OP_QUERY_READ = 0,
+	/* Read and clear activity */
+	MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2,
+	/* Write Adjacency entry */
+	MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1,
+	/* Write Adjacency entry only if the activity is cleared.
+	 * The write may not succeed if the activity is set. There is not
+	 * direct feedback if the write has succeeded or not, however
+	 * the get will reveal the actual entry (SW can compare the get
+	 * response to the set command).
+	 */
+	MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3,
+};
+
+/* reg_ratr_op
+ * Note that Write operation may also be used for updating
+ * counter_set_type and counter_index. In this case all other
+ * fields must not be updated.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4);
+
+/* reg_ratr_v
+ * Valid bit. Indicates if the adjacency entry is valid.
+ * Note: the device may need some time before reusing an invalidated
+ * entry. During this time the entry can not be reused. It is
+ * recommended to use another entry before reusing an invalidated
+ * entry (e.g. software can put it at the end of the list for
+ * reusing). Trying to access an invalidated entry not yet cleared
+ * by the device results with failure indicating "Try Again" status.
+ * When valid is '0' then egress_router_interface,trap_action,
+ * adjacency_parameters and counters are reserved
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1);
+
+/* reg_ratr_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on
+ * the specific entry. To clear the a bit, use "clear activity".
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1);
+
+/* reg_ratr_adjacency_index_low
+ * Bits 15:0 of index into the adjacency table.
+ * For SwitchX and SwitchX-2, the adjacency table is linear and
+ * used for adjacency entries only.
+ * For Spectrum, the index is to the KVD linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16);
+
+/* reg_ratr_egress_router_interface
+ * Range is 0 .. cap_max_router_interfaces - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16);
+
+enum mlxsw_reg_ratr_trap_action {
+	MLXSW_REG_RATR_TRAP_ACTION_NOP,
+	MLXSW_REG_RATR_TRAP_ACTION_TRAP,
+	MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU,
+	MLXSW_REG_RATR_TRAP_ACTION_MIRROR,
+	MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS,
+};
+
+/* reg_ratr_trap_action
+ * see mlxsw_reg_ratr_trap_action
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
+
+enum mlxsw_reg_ratr_trap_id {
+	MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0,
+	MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1,
+};
+
+/* reg_ratr_adjacency_index_high
+ * Bits 23:16 of the adjacency_index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
+
+/* reg_ratr_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8);
+
+/* reg_ratr_eth_destination_mac
+ * MAC address of the destination next-hop.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6);
+
+static inline void
+mlxsw_reg_ratr_pack(char *payload,
+		    enum mlxsw_reg_ratr_op op, bool valid,
+		    u32 adjacency_index, u16 egress_rif)
+{
+	MLXSW_REG_ZERO(ratr, payload);
+	mlxsw_reg_ratr_op_set(payload, op);
+	mlxsw_reg_ratr_v_set(payload, valid);
+	mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
+	mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
+	mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
+}
+
+static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload,
+						 const char *dest_mac)
+{
+	mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac);
+}
+
+/* RALTA - Router Algorithmic LPM Tree Allocation Register
+ * -------------------------------------------------------
+ * RALTA is used to allocate the LPM trees of the SHSPM method.
+ */
+#define MLXSW_REG_RALTA_ID 0x8010
+#define MLXSW_REG_RALTA_LEN 0x04
+
+static const struct mlxsw_reg_info mlxsw_reg_ralta = {
+	.id = MLXSW_REG_RALTA_ID,
+	.len = MLXSW_REG_RALTA_LEN,
+};
+
+/* reg_ralta_op
+ * opcode (valid for Write, must be 0 on Read)
+ * 0 - allocate a tree
+ * 1 - deallocate a tree
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ralta, op, 0x00, 28, 2);
+
+enum mlxsw_reg_ralxx_protocol {
+	MLXSW_REG_RALXX_PROTOCOL_IPV4,
+	MLXSW_REG_RALXX_PROTOCOL_IPV6,
+};
+
+/* reg_ralta_protocol
+ * Protocol.
+ * Deallocation opcode: Reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralta, protocol, 0x00, 24, 4);
+
+/* reg_ralta_tree_id
+ * An identifier (numbered from 1..cap_shspm_max_trees-1) representing
+ * the tree identifier (managed by software).
+ * Note that tree_id 0 is allocated for a default-route tree.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralta, tree_id, 0x00, 0, 8);
+
+static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc,
+					enum mlxsw_reg_ralxx_protocol protocol,
+					u8 tree_id)
+{
+	MLXSW_REG_ZERO(ralta, payload);
+	mlxsw_reg_ralta_op_set(payload, !alloc);
+	mlxsw_reg_ralta_protocol_set(payload, protocol);
+	mlxsw_reg_ralta_tree_id_set(payload, tree_id);
+}
+
+/* RALST - Router Algorithmic LPM Structure Tree Register
+ * ------------------------------------------------------
+ * RALST is used to set and query the structure of an LPM tree.
+ * The structure of the tree must be sorted as a sorted binary tree, while
+ * each node is a bin that is tagged as the length of the prefixes the lookup
+ * will refer to. Therefore, bin X refers to a set of entries with prefixes
+ * of X bits to match with the destination address. The bin 0 indicates
+ * the default action, when there is no match of any prefix.
+ */
+#define MLXSW_REG_RALST_ID 0x8011
+#define MLXSW_REG_RALST_LEN 0x104
+
+static const struct mlxsw_reg_info mlxsw_reg_ralst = {
+	.id = MLXSW_REG_RALST_ID,
+	.len = MLXSW_REG_RALST_LEN,
+};
+
+/* reg_ralst_root_bin
+ * The bin number of the root bin.
+ * 0<root_bin=<(length of IP address)
+ * For a default-route tree configure 0xff
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralst, root_bin, 0x00, 16, 8);
+
+/* reg_ralst_tree_id
+ * Tree identifier numbered from 1..(cap_shspm_max_trees-1).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralst, tree_id, 0x00, 0, 8);
+
+#define MLXSW_REG_RALST_BIN_NO_CHILD 0xff
+#define MLXSW_REG_RALST_BIN_OFFSET 0x04
+#define MLXSW_REG_RALST_BIN_COUNT 128
+
+/* reg_ralst_left_child_bin
+ * Holding the children of the bin according to the stored tree's structure.
+ * For trees composed of less than 4 blocks, the bins in excess are reserved.
+ * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff
+ * Access: RW
+ */
+MLXSW_ITEM16_INDEXED(reg, ralst, left_child_bin, 0x04, 8, 8, 0x02, 0x00, false);
+
+/* reg_ralst_right_child_bin
+ * Holding the children of the bin according to the stored tree's structure.
+ * For trees composed of less than 4 blocks, the bins in excess are reserved.
+ * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff
+ * Access: RW
+ */
+MLXSW_ITEM16_INDEXED(reg, ralst, right_child_bin, 0x04, 0, 8, 0x02, 0x00,
+		     false);
+
+static inline void mlxsw_reg_ralst_pack(char *payload, u8 root_bin, u8 tree_id)
+{
+	MLXSW_REG_ZERO(ralst, payload);
+
+	/* Initialize all bins to have no left or right child */
+	memset(payload + MLXSW_REG_RALST_BIN_OFFSET,
+	       MLXSW_REG_RALST_BIN_NO_CHILD, MLXSW_REG_RALST_BIN_COUNT * 2);
+
+	mlxsw_reg_ralst_root_bin_set(payload, root_bin);
+	mlxsw_reg_ralst_tree_id_set(payload, tree_id);
+}
+
+static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number,
+					    u8 left_child_bin,
+					    u8 right_child_bin)
+{
+	int bin_index = bin_number - 1;
+
+	mlxsw_reg_ralst_left_child_bin_set(payload, bin_index, left_child_bin);
+	mlxsw_reg_ralst_right_child_bin_set(payload, bin_index,
+					    right_child_bin);
+}
+
+/* RALTB - Router Algorithmic LPM Tree Binding Register
+ * ----------------------------------------------------
+ * RALTB is used to bind virtual router and protocol to an allocated LPM tree.
+ */
+#define MLXSW_REG_RALTB_ID 0x8012
+#define MLXSW_REG_RALTB_LEN 0x04
+
+static const struct mlxsw_reg_info mlxsw_reg_raltb = {
+	.id = MLXSW_REG_RALTB_ID,
+	.len = MLXSW_REG_RALTB_LEN,
+};
+
+/* reg_raltb_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raltb, virtual_router, 0x00, 16, 16);
+
+/* reg_raltb_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raltb, protocol, 0x00, 12, 4);
+
+/* reg_raltb_tree_id
+ * Tree to be used for the {virtual_router, protocol}
+ * Tree identifier numbered from 1..(cap_shspm_max_trees-1).
+ * By default, all Unicast IPv4 and IPv6 are bound to tree_id 0.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, raltb, tree_id, 0x00, 0, 8);
+
+static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router,
+					enum mlxsw_reg_ralxx_protocol protocol,
+					u8 tree_id)
+{
+	MLXSW_REG_ZERO(raltb, payload);
+	mlxsw_reg_raltb_virtual_router_set(payload, virtual_router);
+	mlxsw_reg_raltb_protocol_set(payload, protocol);
+	mlxsw_reg_raltb_tree_id_set(payload, tree_id);
+}
+
+/* RALUE - Router Algorithmic LPM Unicast Entry Register
+ * -----------------------------------------------------
+ * RALUE is used to configure and query LPM entries that serve
+ * the Unicast protocols.
+ */
+#define MLXSW_REG_RALUE_ID 0x8013
+#define MLXSW_REG_RALUE_LEN 0x38
+
+static const struct mlxsw_reg_info mlxsw_reg_ralue = {
+	.id = MLXSW_REG_RALUE_ID,
+	.len = MLXSW_REG_RALUE_LEN,
+};
+
+/* reg_ralue_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, protocol, 0x00, 24, 4);
+
+enum mlxsw_reg_ralue_op {
+	/* Read operation. If entry doesn't exist, the operation fails. */
+	MLXSW_REG_RALUE_OP_QUERY_READ = 0,
+	/* Clear on read operation. Used to read entry and
+	 * clear Activity bit.
+	 */
+	MLXSW_REG_RALUE_OP_QUERY_CLEAR = 1,
+	/* Write operation. Used to write a new entry to the table. All RW
+	 * fields are written for new entry. Activity bit is set
+	 * for new entries.
+	 */
+	MLXSW_REG_RALUE_OP_WRITE_WRITE = 0,
+	/* Update operation. Used to update an existing route entry and
+	 * only update the RW fields that are detailed in the field
+	 * op_u_mask. If entry doesn't exist, the operation fails.
+	 */
+	MLXSW_REG_RALUE_OP_WRITE_UPDATE = 1,
+	/* Clear activity. The Activity bit (the field a) is cleared
+	 * for the entry.
+	 */
+	MLXSW_REG_RALUE_OP_WRITE_CLEAR = 2,
+	/* Delete operation. Used to delete an existing entry. If entry
+	 * doesn't exist, the operation fails.
+	 */
+	MLXSW_REG_RALUE_OP_WRITE_DELETE = 3,
+};
+
+/* reg_ralue_op
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ralue, op, 0x00, 20, 3);
+
+/* reg_ralue_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry, only if the entry is a route. To clear the a bit, use
+ * "clear activity" op.
+ * Enabled by activity_dis in RGCR
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ralue, a, 0x00, 16, 1);
+
+/* reg_ralue_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, virtual_router, 0x04, 16, 16);
+
+#define MLXSW_REG_RALUE_OP_U_MASK_ENTRY_TYPE	BIT(0)
+#define MLXSW_REG_RALUE_OP_U_MASK_BMP_LEN	BIT(1)
+#define MLXSW_REG_RALUE_OP_U_MASK_ACTION	BIT(2)
+
+/* reg_ralue_op_u_mask
+ * opcode update mask.
+ * On read operation, this field is reserved.
+ * This field is valid for update opcode, otherwise - reserved.
+ * This field is a bitmask of the fields that should be updated.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ralue, op_u_mask, 0x04, 8, 3);
+
+/* reg_ralue_prefix_len
+ * Number of bits in the prefix of the LPM route.
+ * Note that for IPv6 prefixes, if prefix_len>64 the entry consumes
+ * two entries in the physical HW table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8);
+
+/* reg_ralue_dip*
+ * The prefix of the route or of the marker that the object of the LPM
+ * is compared with. The most significant bits of the dip are the prefix.
+ * The list significant bits must be '0' if the prefix_len is smaller
+ * than 128 for IPv6 or smaller than 32 for IPv4.
+ * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32);
+
+enum mlxsw_reg_ralue_entry_type {
+	MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1,
+	MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY = 2,
+	MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_AND_ROUTE_ENTRY = 3,
+};
+
+/* reg_ralue_entry_type
+ * Entry type.
+ * Note - for Marker entries, the action_type and action fields are reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, entry_type, 0x1C, 30, 2);
+
+/* reg_ralue_bmp_len
+ * The best match prefix length in the case that there is no match for
+ * longer prefixes.
+ * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len
+ * Note for any update operation with entry_type modification this
+ * field must be set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, bmp_len, 0x1C, 16, 8);
+
+enum mlxsw_reg_ralue_action_type {
+	MLXSW_REG_RALUE_ACTION_TYPE_REMOTE,
+	MLXSW_REG_RALUE_ACTION_TYPE_LOCAL,
+	MLXSW_REG_RALUE_ACTION_TYPE_IP2ME,
+};
+
+/* reg_ralue_action_type
+ * Action Type
+ * Indicates how the IP address is connected.
+ * It can be connected to a local subnet through local_erif or can be
+ * on a remote subnet connected through a next-hop router,
+ * or transmitted to the CPU.
+ * Reserved when entry_type = MARKER_ENTRY
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, action_type, 0x1C, 0, 2);
+
+enum mlxsw_reg_ralue_trap_action {
+	MLXSW_REG_RALUE_TRAP_ACTION_NOP,
+	MLXSW_REG_RALUE_TRAP_ACTION_TRAP,
+	MLXSW_REG_RALUE_TRAP_ACTION_MIRROR_TO_CPU,
+	MLXSW_REG_RALUE_TRAP_ACTION_MIRROR,
+	MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR,
+};
+
+/* reg_ralue_trap_action
+ * Trap action.
+ * For IP2ME action, only NOP and MIRROR are possible.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, trap_action, 0x20, 28, 4);
+
+/* reg_ralue_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap ID is RTR_INGRESS0 or RTR_INGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR, trap_id is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, trap_id, 0x20, 0, 9);
+
+/* reg_ralue_adjacency_index
+ * Points to the first entry of the group-based ECMP.
+ * Only relevant in case of REMOTE action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, adjacency_index, 0x24, 0, 24);
+
+/* reg_ralue_ecmp_size
+ * Amount of sequential entries starting
+ * from the adjacency_index (the number of ECMPs).
+ * The valid range is 1-64, 512, 1024, 2048 and 4096.
+ * Reserved when trap_action is TRAP or DISCARD_ERROR.
+ * Only relevant in case of REMOTE action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13);
+
+/* reg_ralue_local_erif
+ * Egress Router Interface.
+ * Only relevant in case of LOCAL action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16);
+
+/* reg_ralue_v
+ * Valid bit for the tunnel_ptr field.
+ * If valid = 0 then trap to CPU as IP2ME trap ID.
+ * If valid = 1 and the packet format allows NVE or IPinIP tunnel
+ * decapsulation then tunnel decapsulation is done.
+ * If valid = 1 and packet format does not allow NVE or IPinIP tunnel
+ * decapsulation then trap as IP2ME trap ID.
+ * Only relevant in case of IP2ME action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1);
+
+/* reg_ralue_tunnel_ptr
+ * Tunnel Pointer for NVE or IPinIP tunnel decapsulation.
+ * For Spectrum, pointer to KVD Linear.
+ * Only relevant in case of IP2ME action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24);
+
+static inline void mlxsw_reg_ralue_pack(char *payload,
+					enum mlxsw_reg_ralxx_protocol protocol,
+					enum mlxsw_reg_ralue_op op,
+					u16 virtual_router, u8 prefix_len)
+{
+	MLXSW_REG_ZERO(ralue, payload);
+	mlxsw_reg_ralue_protocol_set(payload, protocol);
+	mlxsw_reg_ralue_virtual_router_set(payload, virtual_router);
+	mlxsw_reg_ralue_prefix_len_set(payload, prefix_len);
+	mlxsw_reg_ralue_entry_type_set(payload,
+				       MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY);
+	mlxsw_reg_ralue_bmp_len_set(payload, prefix_len);
+}
+
+static inline void mlxsw_reg_ralue_pack4(char *payload,
+					 enum mlxsw_reg_ralxx_protocol protocol,
+					 enum mlxsw_reg_ralue_op op,
+					 u16 virtual_router, u8 prefix_len,
+					 u32 dip)
+{
+	mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
+	mlxsw_reg_ralue_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_ralue_act_remote_pack(char *payload,
+				enum mlxsw_reg_ralue_trap_action trap_action,
+				u16 trap_id, u32 adjacency_index, u16 ecmp_size)
+{
+	mlxsw_reg_ralue_action_type_set(payload,
+					MLXSW_REG_RALUE_ACTION_TYPE_REMOTE);
+	mlxsw_reg_ralue_trap_action_set(payload, trap_action);
+	mlxsw_reg_ralue_trap_id_set(payload, trap_id);
+	mlxsw_reg_ralue_adjacency_index_set(payload, adjacency_index);
+	mlxsw_reg_ralue_ecmp_size_set(payload, ecmp_size);
+}
+
+static inline void
+mlxsw_reg_ralue_act_local_pack(char *payload,
+			       enum mlxsw_reg_ralue_trap_action trap_action,
+			       u16 trap_id, u16 local_erif)
+{
+	mlxsw_reg_ralue_action_type_set(payload,
+					MLXSW_REG_RALUE_ACTION_TYPE_LOCAL);
+	mlxsw_reg_ralue_trap_action_set(payload, trap_action);
+	mlxsw_reg_ralue_trap_id_set(payload, trap_id);
+	mlxsw_reg_ralue_local_erif_set(payload, local_erif);
+}
+
+static inline void
+mlxsw_reg_ralue_act_ip2me_pack(char *payload)
+{
+	mlxsw_reg_ralue_action_type_set(payload,
+					MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
+}
+
+/* RAUHT - Router Algorithmic LPM Unicast Host Table Register
+ * ----------------------------------------------------------
+ * The RAUHT register is used to configure and query the Unicast Host table in
+ * devices that implement the Algorithmic LPM.
+ */
+#define MLXSW_REG_RAUHT_ID 0x8014
+#define MLXSW_REG_RAUHT_LEN 0x74
+
+static const struct mlxsw_reg_info mlxsw_reg_rauht = {
+	.id = MLXSW_REG_RAUHT_ID,
+	.len = MLXSW_REG_RAUHT_LEN,
+};
+
+enum mlxsw_reg_rauht_type {
+	MLXSW_REG_RAUHT_TYPE_IPV4,
+	MLXSW_REG_RAUHT_TYPE_IPV6,
+};
+
+/* reg_rauht_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2);
+
+enum mlxsw_reg_rauht_op {
+	MLXSW_REG_RAUHT_OP_QUERY_READ = 0,
+	/* Read operation */
+	MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1,
+	/* Clear on read operation. Used to read entry and clear
+	 * activity bit.
+	 */
+	MLXSW_REG_RAUHT_OP_WRITE_ADD = 0,
+	/* Add. Used to write a new entry to the table. All R/W fields are
+	 * relevant for new entry. Activity bit is set for new entries.
+	 */
+	MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1,
+	/* Update action. Used to update an existing route entry and
+	 * only update the following fields:
+	 * trap_action, trap_id, mac, counter_set_type, counter_index
+	 */
+	MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2,
+	/* Clear activity. A bit is cleared for the entry. */
+	MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3,
+	/* Delete entry */
+	MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4,
+	/* Delete all host entries on a RIF. In this command, dip
+	 * field is reserved.
+	 */
+};
+
+/* reg_rauht_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3);
+
+/* reg_rauht_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on
+ * the specific entry.
+ * To clear the a bit, use "clear activity" op.
+ * Enabled by activity_dis in RGCR
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1);
+
+/* reg_rauht_rif
+ * Router Interface
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16);
+
+/* reg_rauht_dip*
+ * Destination address.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32);
+
+enum mlxsw_reg_rauht_trap_action {
+	MLXSW_REG_RAUHT_TRAP_ACTION_NOP,
+	MLXSW_REG_RAUHT_TRAP_ACTION_TRAP,
+	MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU,
+	MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR,
+	MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS,
+};
+
+/* reg_rauht_trap_action
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4);
+
+enum mlxsw_reg_rauht_trap_id {
+	MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0,
+	MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1,
+};
+
+/* reg_rauht_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR,
+ * trap_id is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9);
+
+/* reg_rauht_counter_set_type
+ * Counter set type for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8);
+
+/* reg_rauht_counter_index
+ * Counter index for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24);
+
+/* reg_rauht_mac
+ * MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6);
+
+static inline void mlxsw_reg_rauht_pack(char *payload,
+					enum mlxsw_reg_rauht_op op, u16 rif,
+					const char *mac)
+{
+	MLXSW_REG_ZERO(rauht, payload);
+	mlxsw_reg_rauht_op_set(payload, op);
+	mlxsw_reg_rauht_rif_set(payload, rif);
+	mlxsw_reg_rauht_mac_memcpy_to(payload, mac);
+}
+
+static inline void mlxsw_reg_rauht_pack4(char *payload,
+					 enum mlxsw_reg_rauht_op op, u16 rif,
+					 const char *mac, u32 dip)
+{
+	mlxsw_reg_rauht_pack(payload, op, rif, mac);
+	mlxsw_reg_rauht_dip4_set(payload, dip);
+}
+
+/* RALEU - Router Algorithmic LPM ECMP Update Register
+ * ---------------------------------------------------
+ * The register enables updating the ECMP section in the action for multiple
+ * LPM Unicast entries in a single operation. The update is executed to
+ * all entries of a {virtual router, protocol} tuple using the same ECMP group.
+ */
+#define MLXSW_REG_RALEU_ID 0x8015
+#define MLXSW_REG_RALEU_LEN 0x28
+
+static const struct mlxsw_reg_info mlxsw_reg_raleu = {
+	.id = MLXSW_REG_RALEU_ID,
+	.len = MLXSW_REG_RALEU_LEN,
+};
+
+/* reg_raleu_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4);
+
+/* reg_raleu_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16);
+
+/* reg_raleu_adjacency_index
+ * Adjacency Index used for matching on the existing entries.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24);
+
+/* reg_raleu_ecmp_size
+ * ECMP Size used for matching on the existing entries.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13);
+
+/* reg_raleu_new_adjacency_index
+ * New Adjacency Index.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24);
+
+/* reg_raleu_new_ecmp_size
+ * New ECMP Size.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13);
+
+static inline void mlxsw_reg_raleu_pack(char *payload,
+					enum mlxsw_reg_ralxx_protocol protocol,
+					u16 virtual_router,
+					u32 adjacency_index, u16 ecmp_size,
+					u32 new_adjacency_index,
+					u16 new_ecmp_size)
+{
+	MLXSW_REG_ZERO(raleu, payload);
+	mlxsw_reg_raleu_protocol_set(payload, protocol);
+	mlxsw_reg_raleu_virtual_router_set(payload, virtual_router);
+	mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index);
+	mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size);
+	mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index);
+	mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size);
+}
+
+/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register
+ * ----------------------------------------------------------------
+ * The RAUHTD register allows dumping entries from the Router Unicast Host
+ * Table. For a given session an entry is dumped no more than one time. The
+ * first RAUHTD access after reset is a new session. A session ends when the
+ * num_rec response is smaller than num_rec request or for IPv4 when the
+ * num_entries is smaller than 4. The clear activity affect the current session
+ * or the last session if a new session has not started.
+ */
+#define MLXSW_REG_RAUHTD_ID 0x8018
+#define MLXSW_REG_RAUHTD_BASE_LEN 0x20
+#define MLXSW_REG_RAUHTD_REC_LEN 0x20
+#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32
+#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \
+		MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN)
+#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4
+
+static const struct mlxsw_reg_info mlxsw_reg_rauhtd = {
+	.id = MLXSW_REG_RAUHTD_ID,
+	.len = MLXSW_REG_RAUHTD_LEN,
+};
+
+#define MLXSW_REG_RAUHTD_FILTER_A BIT(0)
+#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3)
+
+/* reg_rauhtd_filter_fields
+ * if a bit is '0' then the relevant field is ignored and dump is done
+ * regardless of the field value
+ * Bit0 - filter by activity: entry_a
+ * Bit3 - filter by entry rip: entry_rif
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8);
+
+enum mlxsw_reg_rauhtd_op {
+	MLXSW_REG_RAUHTD_OP_DUMP,
+	MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR,
+};
+
+/* reg_rauhtd_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2);
+
+/* reg_rauhtd_num_rec
+ * At request: number of records requested
+ * At response: number of records dumped
+ * For IPv4, each record has 4 entries at request and up to 4 entries
+ * at response
+ * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8);
+
+/* reg_rauhtd_entry_a
+ * Dump only if activity has value of entry_a
+ * Reserved if filter_fields bit0 is '0'
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1);
+
+enum mlxsw_reg_rauhtd_type {
+	MLXSW_REG_RAUHTD_TYPE_IPV4,
+	MLXSW_REG_RAUHTD_TYPE_IPV6,
+};
+
+/* reg_rauhtd_type
+ * Dump only if record type is:
+ * 0 - IPv4
+ * 1 - IPv6
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4);
+
+/* reg_rauhtd_entry_rif
+ * Dump only if RIF has value of entry_rif
+ * Reserved if filter_fields bit3 is '0'
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_rauhtd_pack(char *payload,
+					 enum mlxsw_reg_rauhtd_type type)
+{
+	MLXSW_REG_ZERO(rauhtd, payload);
+	mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A);
+	mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR);
+	mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM);
+	mlxsw_reg_rauhtd_entry_a_set(payload, 1);
+	mlxsw_reg_rauhtd_type_set(payload, type);
+}
+
+/* reg_rauhtd_ipv4_rec_num_entries
+ * Number of valid entries in this record:
+ * 0 - 1 valid entry
+ * 1 - 2 valid entries
+ * 2 - 3 valid entries
+ * 3 - 4 valid entries
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries,
+		     MLXSW_REG_RAUHTD_BASE_LEN, 28, 2,
+		     MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
+
+/* reg_rauhtd_rec_type
+ * Record type.
+ * 0 - IPv4
+ * 1 - IPv6
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2,
+		     MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
+
+#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8
+
+/* reg_rauhtd_ipv4_ent_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
+		     MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv4_ent_rif
+ * Router interface.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+		     16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv4_ent_dip
+ * Destination IPv4 address.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+		     32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false);
+
+static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
+						    int ent_index, u16 *p_rif,
+						    u32 *p_dip)
+{
+	*p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index);
+	*p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index);
+}
+
 /* MFCR - Management Fan Control Register
  * --------------------------------------
  * This register controls the settings of the Fan Speed PWM mechanism.
@@ -3420,6 +4633,123 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
 		mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
 }
 
+/* MPAT - Monitoring Port Analyzer Table
+ * -------------------------------------
+ * MPAT Register is used to query and configure the Switch PortAnalyzer Table.
+ * For an enabled analyzer, all fields except e (enable) cannot be modified.
+ */
+#define MLXSW_REG_MPAT_ID 0x901A
+#define MLXSW_REG_MPAT_LEN 0x78
+
+static const struct mlxsw_reg_info mlxsw_reg_mpat = {
+	.id = MLXSW_REG_MPAT_ID,
+	.len = MLXSW_REG_MPAT_LEN,
+};
+
+/* reg_mpat_pa_id
+ * Port Analyzer ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mpat, pa_id, 0x00, 28, 4);
+
+/* reg_mpat_system_port
+ * A unique port identifier for the final destination of the packet.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, system_port, 0x00, 0, 16);
+
+/* reg_mpat_e
+ * Enable. Indicating the Port Analyzer is enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, e, 0x04, 31, 1);
+
+/* reg_mpat_qos
+ * Quality Of Service Mode.
+ * 0: CONFIGURED - QoS parameters (Switch Priority, and encapsulation
+ * PCP, DEI, DSCP or VL) are configured.
+ * 1: MAINTAIN - QoS parameters (Switch Priority, Color) are the
+ * same as in the original packet that has triggered the mirroring. For
+ * SPAN also the pcp,dei are maintained.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
+
+/* reg_mpat_be
+ * Best effort mode. Indicates mirroring traffic should not cause packet
+ * drop or back pressure, but will discard the mirrored packets. Mirrored
+ * packets will be forwarded on a best effort manner.
+ * 0: Do not discard mirrored packets
+ * 1: Discard mirrored packets if causing congestion
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
+
+static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
+				       u16 system_port, bool e)
+{
+	MLXSW_REG_ZERO(mpat, payload);
+	mlxsw_reg_mpat_pa_id_set(payload, pa_id);
+	mlxsw_reg_mpat_system_port_set(payload, system_port);
+	mlxsw_reg_mpat_e_set(payload, e);
+	mlxsw_reg_mpat_qos_set(payload, 1);
+	mlxsw_reg_mpat_be_set(payload, 1);
+}
+
+/* MPAR - Monitoring Port Analyzer Register
+ * ----------------------------------------
+ * MPAR register is used to query and configure the port analyzer port mirroring
+ * properties.
+ */
+#define MLXSW_REG_MPAR_ID 0x901B
+#define MLXSW_REG_MPAR_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_mpar = {
+	.id = MLXSW_REG_MPAR_ID,
+	.len = MLXSW_REG_MPAR_LEN,
+};
+
+/* reg_mpar_local_port
+ * The local port to mirror the packets from.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mpar, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_mpar_i_e {
+	MLXSW_REG_MPAR_TYPE_EGRESS,
+	MLXSW_REG_MPAR_TYPE_INGRESS,
+};
+
+/* reg_mpar_i_e
+ * Ingress/Egress
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mpar, i_e, 0x00, 0, 4);
+
+/* reg_mpar_enable
+ * Enable mirroring
+ * By default, port mirroring is disabled for all ports.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpar, enable, 0x04, 31, 1);
+
+/* reg_mpar_pa_id
+ * Port Analyzer ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpar, pa_id, 0x04, 0, 4);
+
+static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port,
+				       enum mlxsw_reg_mpar_i_e i_e,
+				       bool enable, u8 pa_id)
+{
+	MLXSW_REG_ZERO(mpar, payload);
+	mlxsw_reg_mpar_local_port_set(payload, local_port);
+	mlxsw_reg_mpar_enable_set(payload, enable);
+	mlxsw_reg_mpar_i_e_set(payload, i_e);
+	mlxsw_reg_mpar_pa_id_set(payload, pa_id);
+}
+
 /* MLCR - Management LED Control Register
  * --------------------------------------
  * Controls the system LEDs.
@@ -3849,6 +5179,45 @@ static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index,
 		mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index);
 }
 
+/* SBIB - Shared Buffer Internal Buffer Register
+ * ---------------------------------------------
+ * The SBIB register configures per port buffers for internal use. The internal
+ * buffers consume memory on the port buffers (note that the port buffers are
+ * used also by PBMC).
+ *
+ * For Spectrum this is used for egress mirroring.
+ */
+#define MLXSW_REG_SBIB_ID 0xB006
+#define MLXSW_REG_SBIB_LEN 0x10
+
+static const struct mlxsw_reg_info mlxsw_reg_sbib = {
+	.id = MLXSW_REG_SBIB_ID,
+	.len = MLXSW_REG_SBIB_LEN,
+};
+
+/* reg_sbib_local_port
+ * Local port number
+ * Not supported for CPU port and router port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbib, local_port, 0x00, 16, 8);
+
+/* reg_sbib_buff_size
+ * Units represented in cells
+ * Allowed range is 0 to (cap_max_headroom_size - 1)
+ * Default is 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbib, buff_size, 0x08, 0, 24);
+
+static inline void mlxsw_reg_sbib_pack(char *payload, u8 local_port,
+				       u32 buff_size)
+{
+	MLXSW_REG_ZERO(sbib, payload);
+	mlxsw_reg_sbib_local_port_set(payload, local_port);
+	mlxsw_reg_sbib_buff_size_set(payload, buff_size);
+}
+
 static inline const char *mlxsw_reg_id_str(u16 reg_id)
 {
 	switch (reg_id) {
@@ -3924,6 +5293,26 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
 		return "HTGT";
 	case MLXSW_REG_HPKT_ID:
 		return "HPKT";
+	case MLXSW_REG_RGCR_ID:
+		return "RGCR";
+	case MLXSW_REG_RITR_ID:
+		return "RITR";
+	case MLXSW_REG_RATR_ID:
+		return "RATR";
+	case MLXSW_REG_RALTA_ID:
+		return "RALTA";
+	case MLXSW_REG_RALST_ID:
+		return "RALST";
+	case MLXSW_REG_RALTB_ID:
+		return "RALTB";
+	case MLXSW_REG_RALUE_ID:
+		return "RALUE";
+	case MLXSW_REG_RAUHT_ID:
+		return "RAUHT";
+	case MLXSW_REG_RALEU_ID:
+		return "RALEU";
+	case MLXSW_REG_RAUHTD_ID:
+		return "RAUHTD";
 	case MLXSW_REG_MFCR_ID:
 		return "MFCR";
 	case MLXSW_REG_MFSC_ID:
@@ -3932,6 +5321,10 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
 		return "MFSM";
 	case MLXSW_REG_MTCAP_ID:
 		return "MTCAP";
+	case MLXSW_REG_MPAT_ID:
+		return "MPAT";
+	case MLXSW_REG_MPAR_ID:
+		return "MPAR";
 	case MLXSW_REG_MTMP_ID:
 		return "MTMP";
 	case MLXSW_REG_MLCR_ID:
@@ -3946,6 +5339,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
 		return "SBMM";
 	case MLXSW_REG_SBSR_ID:
 		return "SBSR";
+	case MLXSW_REG_SBIB_ID:
+		return "SBIB";
 	default:
 		return "*UNKNOWN*";
 	}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6f9e3ddff4a8..c3e61500819d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -49,9 +49,13 @@
 #include <linux/jiffies.h>
 #include <linux/bitops.h>
 #include <linux/list.h>
+#include <linux/notifier.h>
 #include <linux/dcbnl.h>
+#include <linux/inetdevice.h>
 #include <net/switchdev.h>
 #include <generated/utsrelease.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_mirred.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -131,6 +135,8 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16);
  */
 MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
 
+static bool mlxsw_sp_port_dev_check(const struct net_device *dev);
+
 static void mlxsw_sp_txhdr_construct(struct sk_buff *skb,
 				     const struct mlxsw_tx_info *tx_info)
 {
@@ -159,6 +165,303 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
+static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_resources *resources;
+	int i;
+
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	if (!resources->max_span_valid)
+		return -EIO;
+
+	mlxsw_sp->span.entries_count = resources->max_span;
+	mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
+					 sizeof(struct mlxsw_sp_span_entry),
+					 GFP_KERNEL);
+	if (!mlxsw_sp->span.entries)
+		return -ENOMEM;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++)
+		INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
+
+	return 0;
+}
+
+static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
+	}
+	kfree(mlxsw_sp->span.entries);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	struct mlxsw_sp_span_entry *span_entry;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	u8 local_port = port->local_port;
+	int index;
+	int i;
+	int err;
+
+	/* find a free entry to use */
+	index = -1;
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		if (!mlxsw_sp->span.entries[i].used) {
+			index = i;
+			span_entry = &mlxsw_sp->span.entries[i];
+			break;
+		}
+	}
+	if (index < 0)
+		return NULL;
+
+	/* create a new port analayzer entry for local_port */
+	mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+	if (err)
+		return NULL;
+
+	span_entry->used = true;
+	span_entry->id = index;
+	span_entry->ref_count = 0;
+	span_entry->local_port = local_port;
+	return span_entry;
+}
+
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_span_entry *span_entry)
+{
+	u8 local_port = span_entry->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+	span_entry->used = false;
+}
+
+struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		if (curr->used && curr->local_port == port->local_port)
+			return curr;
+	}
+	return NULL;
+}
+
+struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	span_entry = mlxsw_sp_span_entry_find(port);
+	if (span_entry) {
+		span_entry->ref_count++;
+		return span_entry;
+	}
+
+	return mlxsw_sp_span_entry_create(port);
+}
+
+static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_span_entry *span_entry)
+{
+	if (--span_entry->ref_count == 0)
+		mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+	return 0;
+}
+
+static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	struct mlxsw_sp_span_inspected_port *p;
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		list_for_each_entry(p, &curr->bound_ports_list, list)
+			if (p->local_port == port->local_port &&
+			    p->type == MLXSW_SP_SPAN_EGRESS)
+				return true;
+	}
+
+	return false;
+}
+
+static int mlxsw_sp_span_mtu_to_buffsize(int mtu)
+{
+	return MLXSW_SP_BYTES_TO_CELLS(mtu * 5 / 2) + 1;
+}
+
+static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int err;
+
+	/* If port is egress mirrored, the shared buffer size should be
+	 * updated according to the mtu value
+	 */
+	if (mlxsw_sp_span_is_egress_mirror(port)) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port,
+				    mlxsw_sp_span_mtu_to_buffsize(mtu));
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+		if (err) {
+			netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static struct mlxsw_sp_span_inspected_port *
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
+				    struct mlxsw_sp_span_entry *span_entry)
+{
+	struct mlxsw_sp_span_inspected_port *p;
+
+	list_for_each_entry(p, &span_entry->bound_ports_list, list)
+		if (port->local_port == p->local_port)
+			return p;
+	return NULL;
+}
+
+static int
+mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
+				  struct mlxsw_sp_span_entry *span_entry,
+				  enum mlxsw_sp_span_type type)
+{
+	struct mlxsw_sp_span_inspected_port *inspected_port;
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char mpar_pl[MLXSW_REG_MPAR_LEN];
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int pa_id = span_entry->id;
+	int err;
+
+	/* if it is an egress SPAN, bind a shared buffer to it */
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port,
+				    mlxsw_sp_span_mtu_to_buffsize(port->dev->mtu));
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+		if (err) {
+			netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
+			return err;
+		}
+	}
+
+	/* bind the port to the SPAN entry */
+	mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, true, pa_id);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+	if (err)
+		goto err_mpar_reg_write;
+
+	inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
+	if (!inspected_port) {
+		err = -ENOMEM;
+		goto err_inspected_port_alloc;
+	}
+	inspected_port->local_port = port->local_port;
+	inspected_port->type = type;
+	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
+
+	return 0;
+
+err_mpar_reg_write:
+err_inspected_port_alloc:
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+	return err;
+}
+
+static void
+mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port,
+				    struct mlxsw_sp_span_entry *span_entry,
+				    enum mlxsw_sp_span_type type)
+{
+	struct mlxsw_sp_span_inspected_port *inspected_port;
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char mpar_pl[MLXSW_REG_MPAR_LEN];
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int pa_id = span_entry->id;
+
+	inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
+	if (!inspected_port)
+		return;
+
+	/* remove the inspected port */
+	mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, false, pa_id);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+
+	/* remove the SBIB buffer if it was egress SPAN */
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+
+	list_del(&inspected_port->list);
+	kfree(inspected_port);
+}
+
+static int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+				    struct mlxsw_sp_port *to,
+				    enum mlxsw_sp_span_type type)
+{
+	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+	struct mlxsw_sp_span_entry *span_entry;
+	int err;
+
+	span_entry = mlxsw_sp_span_entry_get(to);
+	if (!span_entry)
+		return -ENOENT;
+
+	netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
+		   span_entry->id);
+
+	err = mlxsw_sp_span_inspected_port_bind(from, span_entry, type);
+	if (err)
+		goto err_port_bind;
+
+	return 0;
+
+err_port_bind:
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+	return err;
+}
+
+static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from,
+					struct mlxsw_sp_port *to,
+					enum mlxsw_sp_span_type type)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	span_entry = mlxsw_sp_span_entry_find(to);
+	if (!span_entry) {
+		netdev_err(from->dev, "no span entry found\n");
+		return;
+	}
+
+	netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
+		   span_entry->id);
+	mlxsw_sp_span_inspected_port_unbind(from, span_entry, type);
+}
+
 static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
 					  bool is_up)
 {
@@ -171,23 +474,6 @@ static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(paos), paos_pl);
 }
 
-static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port,
-					 bool *p_is_up)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char paos_pl[MLXSW_REG_PAOS_LEN];
-	u8 oper_status;
-	int err;
-
-	mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port, 0);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(paos), paos_pl);
-	if (err)
-		return err;
-	oper_status = mlxsw_reg_paos_oper_status_get(paos_pl);
-	*p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false;
-	return 0;
-}
-
 static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				      unsigned char *addr)
 {
@@ -209,23 +495,6 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr);
 }
 
-static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
-				       u16 vid, enum mlxsw_reg_spms_state state)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char *spms_pl;
-	int err;
-
-	spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL);
-	if (!spms_pl)
-		return -ENOMEM;
-	mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port);
-	mlxsw_reg_spms_vid_pack(spms_pl, vid, state);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl);
-	kfree(spms_pl);
-	return err;
-}
-
 static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
@@ -408,7 +677,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
 	}
 
 	mlxsw_sp_txhdr_construct(skb, &tx_info);
-	len = skb->len;
+	/* TX header is consumed by HW on the way so we shouldn't count its
+	 * bytes as being sent.
+	 */
+	len = skb->len - MLXSW_TXHDR_LEN;
+
 	/* Due to a race we might fail here because of a full queue. In that
 	 * unlikely case we simply drop the packet.
 	 */
@@ -521,6 +794,9 @@ static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
 	err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en);
 	if (err)
 		return err;
+	err = mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, mtu);
+	if (err)
+		goto err_span_port_mtu_update;
 	err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu);
 	if (err)
 		goto err_port_mtu_set;
@@ -528,6 +804,8 @@ static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
 	return 0;
 
 err_port_mtu_set:
+	mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, dev->mtu);
+err_span_port_mtu_update:
 	mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
 	return err;
 }
@@ -632,94 +910,8 @@ static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
 	return 0;
 }
 
-static struct mlxsw_sp_vfid *
-mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, u16 vid)
-{
-	struct mlxsw_sp_vfid *vfid;
-
-	list_for_each_entry(vfid, &mlxsw_sp->port_vfids.list, list) {
-		if (vfid->vid == vid)
-			return vfid;
-	}
-
-	return NULL;
-}
-
-static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp)
-{
-	return find_first_zero_bit(mlxsw_sp->port_vfids.mapped,
-				   MLXSW_SP_VFID_PORT_MAX);
-}
-
-static int __mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid)
-{
-	u16 fid = mlxsw_sp_vfid_to_fid(vfid);
-	char sfmr_pl[MLXSW_REG_SFMR_LEN];
-
-	mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, 0);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
-}
-
-static void __mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid)
-{
-	u16 fid = mlxsw_sp_vfid_to_fid(vfid);
-	char sfmr_pl[MLXSW_REG_SFMR_LEN];
-
-	mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, fid, 0);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
-}
-
-static struct mlxsw_sp_vfid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp,
-						  u16 vid)
-{
-	struct device *dev = mlxsw_sp->bus_info->dev;
-	struct mlxsw_sp_vfid *vfid;
-	u16 n_vfid;
-	int err;
-
-	n_vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp);
-	if (n_vfid == MLXSW_SP_VFID_PORT_MAX) {
-		dev_err(dev, "No available vFIDs\n");
-		return ERR_PTR(-ERANGE);
-	}
-
-	err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid);
-	if (err) {
-		dev_err(dev, "Failed to create vFID=%d\n", n_vfid);
-		return ERR_PTR(err);
-	}
-
-	vfid = kzalloc(sizeof(*vfid), GFP_KERNEL);
-	if (!vfid)
-		goto err_allocate_vfid;
-
-	vfid->vfid = n_vfid;
-	vfid->vid = vid;
-
-	list_add(&vfid->list, &mlxsw_sp->port_vfids.list);
-	set_bit(n_vfid, mlxsw_sp->port_vfids.mapped);
-
-	return vfid;
-
-err_allocate_vfid:
-	__mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid);
-	return ERR_PTR(-ENOMEM);
-}
-
-static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
-				  struct mlxsw_sp_vfid *vfid)
-{
-	clear_bit(vfid->vfid, mlxsw_sp->port_vfids.mapped);
-	list_del(&vfid->list);
-
-	__mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid);
-
-	kfree(vfid);
-}
-
 static struct mlxsw_sp_port *
-mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port,
-			   struct mlxsw_sp_vfid *vfid)
+mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_vport;
 
@@ -737,8 +929,7 @@ mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port,
 	mlxsw_sp_vport->stp_state = BR_STATE_FORWARDING;
 	mlxsw_sp_vport->lagged = mlxsw_sp_port->lagged;
 	mlxsw_sp_vport->lag_id = mlxsw_sp_port->lag_id;
-	mlxsw_sp_vport->vport.vfid = vfid;
-	mlxsw_sp_vport->vport.vid = vfid->vid;
+	mlxsw_sp_vport->vport.vid = vid;
 
 	list_add(&mlxsw_sp_vport->vport.list, &mlxsw_sp_port->vports_list);
 
@@ -755,9 +946,8 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 			  u16 vid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_port *mlxsw_sp_vport;
-	struct mlxsw_sp_vfid *vfid;
+	bool untagged = vid == 1;
 	int err;
 
 	/* VLAN 0 is added to HW filter when device goes up, but it is
@@ -771,31 +961,10 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 		return 0;
 	}
 
-	vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid);
-	if (!vfid) {
-		vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid);
-		if (IS_ERR(vfid)) {
-			netdev_err(dev, "Failed to create vFID for VID=%d\n",
-				   vid);
-			return PTR_ERR(vfid);
-		}
-	}
-
-	mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vfid);
+	mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid);
 	if (!mlxsw_sp_vport) {
 		netdev_err(dev, "Failed to create vPort for VID=%d\n", vid);
-		err = -ENOMEM;
-		goto err_port_vport_create;
-	}
-
-	if (!vfid->nr_vports) {
-		err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid,
-					       true, false);
-		if (err) {
-			netdev_err(dev, "Failed to setup flooding for vFID=%d\n",
-				   vfid->vfid);
-			goto err_vport_flood_set;
-		}
+		return -ENOMEM;
 	}
 
 	/* When adding the first VLAN interface on a bridged port we need to
@@ -810,70 +979,37 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 		}
 	}
 
-	err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
-					   MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
-					   true,
-					   mlxsw_sp_vfid_to_fid(vfid->vfid),
-					   vid);
-	if (err) {
-		netdev_err(dev, "Failed to map {Port, VID=%d} to vFID=%d\n",
-			   vid, vfid->vfid);
-		goto err_port_vid_to_fid_set;
-	}
-
 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
 	if (err) {
 		netdev_err(dev, "Failed to disable learning for VID=%d\n", vid);
 		goto err_port_vid_learning_set;
 	}
 
-	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false);
+	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
 	if (err) {
 		netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
 			   vid);
 		goto err_port_add_vid;
 	}
 
-	err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
-					  MLXSW_REG_SPMS_STATE_FORWARDING);
-	if (err) {
-		netdev_err(dev, "Failed to set STP state for VID=%d\n", vid);
-		goto err_port_stp_state_set;
-	}
-
-	vfid->nr_vports++;
-
 	return 0;
 
-err_port_stp_state_set:
-	mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 err_port_add_vid:
 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
 err_port_vid_learning_set:
-	mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
-				     MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false,
-				     mlxsw_sp_vfid_to_fid(vfid->vfid), vid);
-err_port_vid_to_fid_set:
 	if (list_is_singular(&mlxsw_sp_port->vports_list))
 		mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
 err_port_vp_mode_trans:
-	if (!vfid->nr_vports)
-		mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false,
-					 false);
-err_vport_flood_set:
 	mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
-err_port_vport_create:
-	if (!vfid->nr_vports)
-		mlxsw_sp_vfid_destroy(mlxsw_sp, vfid);
 	return err;
 }
 
-int mlxsw_sp_port_kill_vid(struct net_device *dev,
-			   __be16 __always_unused proto, u16 vid)
+static int mlxsw_sp_port_kill_vid(struct net_device *dev,
+				  __be16 __always_unused proto, u16 vid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp_port *mlxsw_sp_vport;
-	struct mlxsw_sp_vfid *vfid;
+	struct mlxsw_sp_fid *f;
 	int err;
 
 	/* VLAN 0 is removed from HW filter when device goes down, but
@@ -888,15 +1024,6 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
 		return 0;
 	}
 
-	vfid = mlxsw_sp_vport->vport.vfid;
-
-	err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
-					  MLXSW_REG_SPMS_STATE_DISCARDING);
-	if (err) {
-		netdev_err(dev, "Failed to set STP state for VID=%d\n", vid);
-		return err;
-	}
-
 	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 	if (err) {
 		netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
@@ -910,16 +1037,12 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
 		return err;
 	}
 
-	err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
-					   MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
-					   false,
-					   mlxsw_sp_vfid_to_fid(vfid->vfid),
-					   vid);
-	if (err) {
-		netdev_err(dev, "Failed to invalidate {Port, VID=%d} to vFID=%d mapping\n",
-			   vid, vfid->vfid);
-		return err;
-	}
+	/* Drop FID reference. If this was the last reference the
+	 * resources will be freed.
+	 */
+	f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+	if (f && !WARN_ON(!f->leave))
+		f->leave(mlxsw_sp_vport);
 
 	/* When removing the last VLAN interface on a bridged port we need to
 	 * transition all active 802.1Q bridge VLANs to use VID to FID
@@ -933,13 +1056,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
 		}
 	}
 
-	vfid->nr_vports--;
 	mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
 
-	/* Destroy the vFID if no vPorts are assigned to it anymore. */
-	if (!vfid->nr_vports)
-		mlxsw_sp_vfid_destroy(mlxsw_sp_port->mlxsw_sp, vfid);
-
 	return 0;
 }
 
@@ -964,34 +1082,180 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name,
 	return 0;
 }
 
-static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
-	.ndo_open		= mlxsw_sp_port_open,
-	.ndo_stop		= mlxsw_sp_port_stop,
-	.ndo_start_xmit		= mlxsw_sp_port_xmit,
-	.ndo_set_rx_mode	= mlxsw_sp_set_rx_mode,
-	.ndo_set_mac_address	= mlxsw_sp_port_set_mac_address,
-	.ndo_change_mtu		= mlxsw_sp_port_change_mtu,
-	.ndo_get_stats64	= mlxsw_sp_port_get_stats64,
-	.ndo_vlan_rx_add_vid	= mlxsw_sp_port_add_vid,
-	.ndo_vlan_rx_kill_vid	= mlxsw_sp_port_kill_vid,
-	.ndo_fdb_add		= switchdev_port_fdb_add,
-	.ndo_fdb_del		= switchdev_port_fdb_del,
-	.ndo_fdb_dump		= switchdev_port_fdb_dump,
-	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
-	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
-	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
-	.ndo_get_phys_port_name	= mlxsw_sp_port_get_phys_port_name,
-};
+static struct mlxsw_sp_port_mall_tc_entry *
+mlxsw_sp_port_mirror_entry_find(struct mlxsw_sp_port *port,
+				unsigned long cookie) {
+	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
 
-static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
-				      struct ethtool_drvinfo *drvinfo)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	list_for_each_entry(mall_tc_entry, &port->mall_tc_list, list)
+		if (mall_tc_entry->cookie == cookie)
+			return mall_tc_entry;
 
-	strlcpy(drvinfo->driver, mlxsw_sp_driver_name, sizeof(drvinfo->driver));
-	strlcpy(drvinfo->version, mlxsw_sp_driver_version,
-		sizeof(drvinfo->version));
+	return NULL;
+}
+
+static int
+mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct tc_cls_matchall_offload *cls,
+				      const struct tc_action *a,
+				      bool ingress)
+{
+	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+	struct net *net = dev_net(mlxsw_sp_port->dev);
+	enum mlxsw_sp_span_type span_type;
+	struct mlxsw_sp_port *to_port;
+	struct net_device *to_dev;
+	int ifindex;
+	int err;
+
+	ifindex = tcf_mirred_ifindex(a);
+	to_dev = __dev_get_by_index(net, ifindex);
+	if (!to_dev) {
+		netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
+		return -EINVAL;
+	}
+
+	if (!mlxsw_sp_port_dev_check(to_dev)) {
+		netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
+		return -ENOTSUPP;
+	}
+	to_port = netdev_priv(to_dev);
+
+	mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
+	if (!mall_tc_entry)
+		return -ENOMEM;
+
+	mall_tc_entry->cookie = cls->cookie;
+	mall_tc_entry->type = MLXSW_SP_PORT_MALL_MIRROR;
+	mall_tc_entry->mirror.to_local_port = to_port->local_port;
+	mall_tc_entry->mirror.ingress = ingress;
+	list_add_tail(&mall_tc_entry->list, &mlxsw_sp_port->mall_tc_list);
+
+	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+	err = mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type);
+	if (err)
+		goto err_mirror_add;
+	return 0;
+
+err_mirror_add:
+	list_del(&mall_tc_entry->list);
+	kfree(mall_tc_entry);
+	return err;
+}
+
+static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+					  __be16 protocol,
+					  struct tc_cls_matchall_offload *cls,
+					  bool ingress)
+{
+	const struct tc_action *a;
+	int err;
+
+	if (!tc_single_action(cls->exts)) {
+		netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
+		return -ENOTSUPP;
+	}
+
+	tc_for_each_action(a, cls->exts) {
+		if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL))
+			return -ENOTSUPP;
+
+		err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port, cls,
+							    a, ingress);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+					   struct tc_cls_matchall_offload *cls)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+	enum mlxsw_sp_span_type span_type;
+	struct mlxsw_sp_port *to_port;
+
+	mall_tc_entry = mlxsw_sp_port_mirror_entry_find(mlxsw_sp_port,
+							cls->cookie);
+	if (!mall_tc_entry) {
+		netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
+		return;
+	}
+
+	switch (mall_tc_entry->type) {
+	case MLXSW_SP_PORT_MALL_MIRROR:
+		to_port = mlxsw_sp->ports[mall_tc_entry->mirror.to_local_port];
+		span_type = mall_tc_entry->mirror.ingress ?
+				MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+
+		mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type);
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	list_del(&mall_tc_entry->list);
+	kfree(mall_tc_entry);
+}
+
+static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle,
+			     __be16 proto, struct tc_to_netdev *tc)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+
+	if (tc->type == TC_SETUP_MATCHALL) {
+		switch (tc->cls_mall->command) {
+		case TC_CLSMATCHALL_REPLACE:
+			return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port,
+							      proto,
+							      tc->cls_mall,
+							      ingress);
+		case TC_CLSMATCHALL_DESTROY:
+			mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port,
+						       tc->cls_mall);
+			return 0;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return -ENOTSUPP;
+}
+
+static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
+	.ndo_open		= mlxsw_sp_port_open,
+	.ndo_stop		= mlxsw_sp_port_stop,
+	.ndo_start_xmit		= mlxsw_sp_port_xmit,
+	.ndo_setup_tc           = mlxsw_sp_setup_tc,
+	.ndo_set_rx_mode	= mlxsw_sp_set_rx_mode,
+	.ndo_set_mac_address	= mlxsw_sp_port_set_mac_address,
+	.ndo_change_mtu		= mlxsw_sp_port_change_mtu,
+	.ndo_get_stats64	= mlxsw_sp_port_get_stats64,
+	.ndo_vlan_rx_add_vid	= mlxsw_sp_port_add_vid,
+	.ndo_vlan_rx_kill_vid	= mlxsw_sp_port_kill_vid,
+	.ndo_neigh_construct	= mlxsw_sp_router_neigh_construct,
+	.ndo_neigh_destroy	= mlxsw_sp_router_neigh_destroy,
+	.ndo_fdb_add		= switchdev_port_fdb_add,
+	.ndo_fdb_del		= switchdev_port_fdb_del,
+	.ndo_fdb_dump		= switchdev_port_fdb_dump,
+	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
+	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
+	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
+	.ndo_get_phys_port_name	= mlxsw_sp_port_get_phys_port_name,
+};
+
+static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
+				      struct ethtool_drvinfo *drvinfo)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+	strlcpy(drvinfo->driver, mlxsw_sp_driver_name, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, mlxsw_sp_driver_version,
+		sizeof(drvinfo->version));
 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
 		 "%d.%d.%d",
 		 mlxsw_sp->bus_info->fw_rev.major,
@@ -1068,7 +1332,7 @@ struct mlxsw_sp_port_hw_stats {
 	u64 (*getter)(char *payload);
 };
 
-static const struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = {
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = {
 	{
 		.str = "a_frames_transmitted_ok",
 		.getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get,
@@ -1149,6 +1413,90 @@ static const struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = {
 
 #define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats)
 
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = {
+	{
+		.str = "rx_octets_prio",
+		.getter = mlxsw_reg_ppcnt_rx_octets_get,
+	},
+	{
+		.str = "rx_frames_prio",
+		.getter = mlxsw_reg_ppcnt_rx_frames_get,
+	},
+	{
+		.str = "tx_octets_prio",
+		.getter = mlxsw_reg_ppcnt_tx_octets_get,
+	},
+	{
+		.str = "tx_frames_prio",
+		.getter = mlxsw_reg_ppcnt_tx_frames_get,
+	},
+	{
+		.str = "rx_pause_prio",
+		.getter = mlxsw_reg_ppcnt_rx_pause_get,
+	},
+	{
+		.str = "rx_pause_duration_prio",
+		.getter = mlxsw_reg_ppcnt_rx_pause_duration_get,
+	},
+	{
+		.str = "tx_pause_prio",
+		.getter = mlxsw_reg_ppcnt_tx_pause_get,
+	},
+	{
+		.str = "tx_pause_duration_prio",
+		.getter = mlxsw_reg_ppcnt_tx_pause_duration_get,
+	},
+};
+
+#define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats)
+
+static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(char *ppcnt_pl)
+{
+	u64 transmit_queue = mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl);
+
+	return MLXSW_SP_CELLS_TO_BYTES(transmit_queue);
+}
+
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = {
+	{
+		.str = "tc_transmit_queue_tc",
+		.getter = mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get,
+	},
+	{
+		.str = "tc_no_buffer_discard_uc_tc",
+		.getter = mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get,
+	},
+};
+
+#define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats)
+
+#define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \
+					 (MLXSW_SP_PORT_HW_PRIO_STATS_LEN + \
+					  MLXSW_SP_PORT_HW_TC_STATS_LEN) * \
+					 IEEE_8021QAZ_MAX_TCS)
+
+static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_PORT_HW_PRIO_STATS_LEN; i++) {
+		snprintf(*p, ETH_GSTRING_LEN, "%s_%d",
+			 mlxsw_sp_port_hw_prio_stats[i].str, prio);
+		*p += ETH_GSTRING_LEN;
+	}
+}
+
+static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_PORT_HW_TC_STATS_LEN; i++) {
+		snprintf(*p, ETH_GSTRING_LEN, "%s_%d",
+			 mlxsw_sp_port_hw_tc_stats[i].str, tc);
+		*p += ETH_GSTRING_LEN;
+	}
+}
+
 static void mlxsw_sp_port_get_strings(struct net_device *dev,
 				      u32 stringset, u8 *data)
 {
@@ -1162,6 +1510,13 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev,
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
+
+		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+			mlxsw_sp_port_get_prio_strings(&p, i);
+
+		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+			mlxsw_sp_port_get_tc_strings(&p, i);
+
 		break;
 	}
 }
@@ -1189,27 +1544,80 @@ static int mlxsw_sp_port_set_phys_id(struct net_device *dev,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl);
 }
 
-static void mlxsw_sp_port_get_stats(struct net_device *dev,
-				    struct ethtool_stats *stats, u64 *data)
+static int
+mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats,
+			       int *p_len, enum mlxsw_reg_ppcnt_grp grp)
+{
+	switch (grp) {
+	case  MLXSW_REG_PPCNT_IEEE_8023_CNT:
+		*p_hw_stats = mlxsw_sp_port_hw_stats;
+		*p_len = MLXSW_SP_PORT_HW_STATS_LEN;
+		break;
+	case MLXSW_REG_PPCNT_PRIO_CNT:
+		*p_hw_stats = mlxsw_sp_port_hw_prio_stats;
+		*p_len = MLXSW_SP_PORT_HW_PRIO_STATS_LEN;
+		break;
+	case MLXSW_REG_PPCNT_TC_CNT:
+		*p_hw_stats = mlxsw_sp_port_hw_tc_stats;
+		*p_len = MLXSW_SP_PORT_HW_TC_STATS_LEN;
+		break;
+	default:
+		WARN_ON(1);
+		return -ENOTSUPP;
+	}
+	return 0;
+}
+
+static void __mlxsw_sp_port_get_stats(struct net_device *dev,
+				      enum mlxsw_reg_ppcnt_grp grp, int prio,
+				      u64 *data, int data_index)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_port_hw_stats *hw_stats;
 	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
-	int i;
+	int i, len;
 	int err;
 
-	mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port,
-			     MLXSW_REG_PPCNT_IEEE_8023_CNT, 0);
+	err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp);
+	if (err)
+		return;
+	mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl);
-	for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++)
-		data[i] = !err ? mlxsw_sp_port_hw_stats[i].getter(ppcnt_pl) : 0;
+	for (i = 0; i < len; i++)
+		data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0;
+}
+
+static void mlxsw_sp_port_get_stats(struct net_device *dev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	int i, data_index = 0;
+
+	/* IEEE 802.3 Counters */
+	__mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, 0,
+				  data, data_index);
+	data_index = MLXSW_SP_PORT_HW_STATS_LEN;
+
+	/* Per-Priority Counters */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		__mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_PRIO_CNT, i,
+					  data, data_index);
+		data_index += MLXSW_SP_PORT_HW_PRIO_STATS_LEN;
+	}
+
+	/* Per-TC Counters */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		__mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_TC_CNT, i,
+					  data, data_index);
+		data_index += MLXSW_SP_PORT_HW_TC_STATS_LEN;
+	}
 }
 
 static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset)
 {
 	switch (sset) {
 	case ETH_SS_STATS:
-		return MLXSW_SP_PORT_HW_STATS_LEN;
+		return MLXSW_SP_PORT_ETHTOOL_STATS_LEN;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1430,7 +1838,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev,
 
 	cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) |
 			 mlxsw_sp_from_ptys_supported_link(eth_proto_cap) |
-			 SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+			 SUPPORTED_Pause | SUPPORTED_Asym_Pause |
+			 SUPPORTED_Autoneg;
 	cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin);
 	mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev),
 					eth_proto_oper, cmd);
@@ -1489,7 +1898,6 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev,
 	u32 eth_proto_new;
 	u32 eth_proto_cap;
 	u32 eth_proto_admin;
-	bool is_up;
 	int err;
 
 	speed = ethtool_cmd_speed(cmd);
@@ -1521,12 +1929,7 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev,
 		return err;
 	}
 
-	err = mlxsw_sp_port_oper_status_get(mlxsw_sp_port, &is_up);
-	if (err) {
-		netdev_err(dev, "Failed to get oper status");
-		return err;
-	}
-	if (!is_up)
+	if (!netif_running(dev))
 		return 0;
 
 	err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
@@ -1704,6 +2107,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_port_untagged_vlans_alloc;
 	}
 	INIT_LIST_HEAD(&mlxsw_sp_port->vports_list);
+	INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list);
 
 	mlxsw_sp_port->pcpu_stats =
 		netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats);
@@ -1725,7 +2129,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	netif_carrier_off(dev);
 
 	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG |
-			 NETIF_F_HW_VLAN_CTAG_FILTER;
+			 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
+	dev->hw_features |= NETIF_F_HW_TC;
 
 	/* Each packet needs to have a Tx header (metadata) on top all other
 	 * headers.
@@ -1834,23 +2239,6 @@ err_port_active_vlans_alloc:
 	return err;
 }
 
-static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-	struct net_device *dev = mlxsw_sp_port->dev;
-	struct mlxsw_sp_port *mlxsw_sp_vport, *tmp;
-
-	list_for_each_entry_safe(mlxsw_sp_vport, tmp,
-				 &mlxsw_sp_port->vports_list, vport.list) {
-		u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-
-		/* vPorts created for VLAN devices should already be gone
-		 * by now, since we unregistered the port netdev.
-		 */
-		WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev));
-		mlxsw_sp_port_kill_vid(dev, 0, vid);
-	}
-}
-
 static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -1861,13 +2249,14 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
-	mlxsw_sp_port_vports_fini(mlxsw_sp_port);
+	mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
 	free_percpu(mlxsw_sp_port->pcpu_stats);
 	kfree(mlxsw_sp_port->untagged_vlans);
 	kfree(mlxsw_sp_port->active_vlans);
+	WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list));
 	free_netdev(mlxsw_sp_port->dev);
 }
 
@@ -2104,11 +2493,8 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
 
 	local_port = mlxsw_reg_pude_local_port_get(pude_pl);
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
-	if (!mlxsw_sp_port) {
-		dev_warn(mlxsw_sp->bus_info->dev, "Port %d: Link event received for non-existent port\n",
-			 local_port);
+	if (!mlxsw_sp_port)
 		return;
-	}
 
 	status = mlxsw_reg_pude_oper_status_get(pude_pl);
 	if (status == MLXSW_PORT_OPER_STATUS_UP) {
@@ -2263,6 +2649,31 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
 		.local_port = MLXSW_PORT_DONT_CARE,
 		.trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT,
 	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_ARPBC,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_ARPUC,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_IP2ME,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_RTR_INGRESS0,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4,
+	},
 };
 
 static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
@@ -2303,7 +2714,7 @@ err_rx_trap_set:
 					  mlxsw_sp);
 err_rx_listener_register:
 	for (i--; i >= 0; i--) {
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
+		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
 				    mlxsw_sp_rx_listener[i].trap_id);
 		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
 
@@ -2320,7 +2731,7 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) {
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
+		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
 				    mlxsw_sp_rx_listener[i].trap_id);
 		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
 
@@ -2399,8 +2810,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 
 	mlxsw_sp->core = mlxsw_core;
 	mlxsw_sp->bus_info = mlxsw_bus_info;
-	INIT_LIST_HEAD(&mlxsw_sp->port_vfids.list);
-	INIT_LIST_HEAD(&mlxsw_sp->br_vfids.list);
+	INIT_LIST_HEAD(&mlxsw_sp->fids);
+	INIT_LIST_HEAD(&mlxsw_sp->vfids.list);
 	INIT_LIST_HEAD(&mlxsw_sp->br_mids.list);
 
 	err = mlxsw_sp_base_mac_get(mlxsw_sp);
@@ -2409,16 +2820,10 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		return err;
 	}
 
-	err = mlxsw_sp_ports_create(mlxsw_sp);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
-		return err;
-	}
-
 	err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE events\n");
-		goto err_event_register;
+		return err;
 	}
 
 	err = mlxsw_sp_traps_init(mlxsw_sp);
@@ -2451,8 +2856,32 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_switchdev_init;
 	}
 
+	err = mlxsw_sp_router_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
+		goto err_router_init;
+	}
+
+	err = mlxsw_sp_span_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+		goto err_span_init;
+	}
+
+	err = mlxsw_sp_ports_create(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
+		goto err_ports_create;
+	}
+
 	return 0;
 
+err_ports_create:
+	mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
+	mlxsw_sp_router_fini(mlxsw_sp);
+err_router_init:
+	mlxsw_sp_switchdev_fini(mlxsw_sp);
 err_switchdev_init:
 err_lag_init:
 	mlxsw_sp_buffers_fini(mlxsw_sp);
@@ -2461,20 +2890,25 @@ err_flood_init:
 	mlxsw_sp_traps_fini(mlxsw_sp);
 err_rx_listener_register:
 	mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
-err_event_register:
-	mlxsw_sp_ports_remove(mlxsw_sp);
 	return err;
 }
 
 static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	int i;
 
+	mlxsw_sp_ports_remove(mlxsw_sp);
+	mlxsw_sp_span_fini(mlxsw_sp);
+	mlxsw_sp_router_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
 	mlxsw_sp_buffers_fini(mlxsw_sp);
 	mlxsw_sp_traps_fini(mlxsw_sp);
 	mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
-	mlxsw_sp_ports_remove(mlxsw_sp);
+	WARN_ON(!list_empty(&mlxsw_sp->vfids.list));
+	WARN_ON(!list_empty(&mlxsw_sp->fids));
+	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
 }
 
 static struct mlxsw_config_profile mlxsw_sp_config_profile = {
@@ -2505,12 +2939,17 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.max_ib_mc			= 0,
 	.used_max_pkey			= 1,
 	.max_pkey			= 0,
+	.used_kvd_sizes			= 1,
+	.kvd_linear_size		= MLXSW_SP_KVD_LINEAR_SIZE,
+	.kvd_hash_single_size		= MLXSW_SP_KVD_HASH_SINGLE_SIZE,
+	.kvd_hash_double_size		= MLXSW_SP_KVD_HASH_DOUBLE_SIZE,
 	.swid_config			= {
 		{
 			.used_type	= 1,
 			.type		= MLXSW_PORT_SWID_TYPE_ETH,
 		}
 	},
+	.resource_query_enable		= 1,
 };
 
 static struct mlxsw_driver mlxsw_sp_driver = {
@@ -2536,121 +2975,679 @@ static struct mlxsw_driver mlxsw_sp_driver = {
 	.profile			= &mlxsw_sp_config_profile,
 };
 
-static int
-mlxsw_sp_port_fdb_flush_by_port(const struct mlxsw_sp_port *mlxsw_sp_port)
+static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char sfdf_pl[MLXSW_REG_SFDF_LEN];
-
-	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT);
-	mlxsw_reg_sfdf_system_port_set(sfdf_pl, mlxsw_sp_port->local_port);
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+	return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
 }
 
-static int
-mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
-				    u16 fid)
+static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+	struct net_device *lower_dev;
+	struct list_head *iter;
 
-	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID);
-	mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
-	mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl,
-						mlxsw_sp_port->local_port);
+	if (mlxsw_sp_port_dev_check(dev))
+		return netdev_priv(dev);
 
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+	netdev_for_each_all_lower_dev(dev, lower_dev, iter) {
+		if (mlxsw_sp_port_dev_check(lower_dev))
+			return netdev_priv(lower_dev);
+	}
+	return NULL;
 }
 
-static int
-mlxsw_sp_port_fdb_flush_by_lag_id(const struct mlxsw_sp_port *mlxsw_sp_port)
+static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char sfdf_pl[MLXSW_REG_SFDF_LEN];
-
-	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG);
-	mlxsw_reg_sfdf_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+	struct mlxsw_sp_port *mlxsw_sp_port;
 
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
+	return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL;
 }
 
-static int
-mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
-				      u16 fid)
+static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+	struct net_device *lower_dev;
+	struct list_head *iter;
 
-	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID);
-	mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
-	mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+	if (mlxsw_sp_port_dev_check(dev))
+		return netdev_priv(dev);
 
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+	netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) {
+		if (mlxsw_sp_port_dev_check(lower_dev))
+			return netdev_priv(lower_dev);
+	}
+	return NULL;
 }
 
-static int
-__mlxsw_sp_port_fdb_flush(const struct mlxsw_sp_port *mlxsw_sp_port)
+struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev)
 {
-	int err, last_err = 0;
-	u16 vid;
+	struct mlxsw_sp_port *mlxsw_sp_port;
 
-	for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
-		err = mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, vid);
-		if (err)
-			last_err = err;
+	rcu_read_lock();
+	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
+	if (mlxsw_sp_port)
+		dev_hold(mlxsw_sp_port->dev);
+	rcu_read_unlock();
+	return mlxsw_sp_port;
+}
+
+void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	dev_put(mlxsw_sp_port->dev);
+}
+
+static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r,
+				       unsigned long event)
+{
+	switch (event) {
+	case NETDEV_UP:
+		if (!r)
+			return true;
+		r->ref_count++;
+		return false;
+	case NETDEV_DOWN:
+		if (r && --r->ref_count == 0)
+			return true;
+		/* It is possible we already removed the RIF ourselves
+		 * if it was assigned to a netdev that is now a bridge
+		 * or LAG slave.
+		 */
+		return false;
+	}
+
+	return false;
+}
+
+static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+		if (!mlxsw_sp->rifs[i])
+			return i;
+
+	return MLXSW_SP_RIF_MAX;
+}
+
+static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
+					   bool *p_lagged, u16 *p_system_port)
+{
+	u8 local_port = mlxsw_sp_vport->local_port;
+
+	*p_lagged = mlxsw_sp_vport->lagged;
+	*p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
+}
+
+static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
+				    struct net_device *l3_dev, u16 rif,
+				    bool create)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	bool lagged = mlxsw_sp_vport->lagged;
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+	u16 system_port;
+
+	mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif,
+			    l3_dev->mtu, l3_dev->dev_addr);
+
+	mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
+	mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
+				  mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
+{
+	struct mlxsw_sp_fid *f;
+
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	f->leave = mlxsw_sp_vport_rif_sp_leave;
+	f->ref_count = 0;
+	f->dev = l3_dev;
+	f->fid = fid;
+
+	return f;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f)
+{
+	struct mlxsw_sp_rif *r;
+
+	r = kzalloc(sizeof(*r), GFP_KERNEL);
+	if (!r)
+		return NULL;
+
+	ether_addr_copy(r->addr, l3_dev->dev_addr);
+	r->mtu = l3_dev->mtu;
+	r->ref_count = 1;
+	r->dev = l3_dev;
+	r->rif = rif;
+	r->f = f;
+
+	return r;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
+			     struct net_device *l3_dev)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	struct mlxsw_sp_fid *f;
+	struct mlxsw_sp_rif *r;
+	u16 fid, rif;
+	int err;
+
+	rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
+	if (rif == MLXSW_SP_RIF_MAX)
+		return ERR_PTR(-ERANGE);
+
+	err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true);
+	if (err)
+		return ERR_PTR(err);
+
+	fid = mlxsw_sp_rif_sp_to_fid(rif);
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	f = mlxsw_sp_rfid_alloc(fid, l3_dev);
+	if (!f) {
+		err = -ENOMEM;
+		goto err_rfid_alloc;
+	}
+
+	r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
+	if (!r) {
+		err = -ENOMEM;
+		goto err_rif_alloc;
+	}
+
+	f->r = r;
+	mlxsw_sp->rifs[rif] = r;
+
+	return r;
+
+err_rif_alloc:
+	kfree(f);
+err_rfid_alloc:
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
+err_rif_fdb_op:
+	mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
+					  struct mlxsw_sp_rif *r)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	struct net_device *l3_dev = r->dev;
+	struct mlxsw_sp_fid *f = r->f;
+	u16 fid = f->fid;
+	u16 rif = r->rif;
+
+	mlxsw_sp->rifs[rif] = NULL;
+	f->r = NULL;
+
+	kfree(r);
+
+	kfree(f);
+
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
+
+	mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false);
+}
+
+static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
+				      struct net_device *l3_dev)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	struct mlxsw_sp_rif *r;
+
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
+	if (!r) {
+		r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
+		if (IS_ERR(r))
+			return PTR_ERR(r);
 	}
 
-	return last_err;
+	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f);
+	r->f->ref_count++;
+
+	netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid);
+
+	return 0;
+}
+
+static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+
+	netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
+
+	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
+	if (--f->ref_count == 0)
+		mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r);
+}
+
+static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
+					 struct net_device *port_dev,
+					 unsigned long event, u16 vid)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
+	struct mlxsw_sp_port *mlxsw_sp_vport;
+
+	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+	if (WARN_ON(!mlxsw_sp_vport))
+		return -EINVAL;
+
+	switch (event) {
+	case NETDEV_UP:
+		return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
+	case NETDEV_DOWN:
+		mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
+		break;
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
+					unsigned long event)
+{
+	if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev))
+		return 0;
+
+	return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
+}
+
+static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
+					 struct net_device *lag_dev,
+					 unsigned long event, u16 vid)
+{
+	struct net_device *port_dev;
+	struct list_head *iter;
+	int err;
+
+	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
+		if (mlxsw_sp_port_dev_check(port_dev)) {
+			err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
+							    event, vid);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
+				       unsigned long event)
+{
+	if (netif_is_bridge_port(lag_dev))
+		return 0;
+
+	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
+}
+
+static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
+						    struct net_device *l3_dev)
+{
+	u16 fid;
+
+	if (is_vlan_dev(l3_dev))
+		fid = vlan_dev_vlan_id(l3_dev);
+	else if (mlxsw_sp->master_bridge.dev == l3_dev)
+		fid = 1;
+	else
+		return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
+
+	return mlxsw_sp_fid_find(mlxsw_sp, fid);
+}
+
+static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
+{
+	if (mlxsw_sp_fid_is_vfid(fid))
+		return MLXSW_REG_RITR_FID_IF;
+	else
+		return MLXSW_REG_RITR_VLAN_IF;
+}
+
+static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp,
+				  struct net_device *l3_dev,
+				  u16 fid, u16 rif,
+				  bool create)
+{
+	enum mlxsw_reg_ritr_if_type rif_type;
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+
+	rif_type = mlxsw_sp_rif_type_get(fid);
+	mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu,
+			    l3_dev->dev_addr);
+	mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
+				      struct net_device *l3_dev,
+				      struct mlxsw_sp_fid *f)
+{
+	struct mlxsw_sp_rif *r;
+	u16 rif;
+	int err;
+
+	rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
+	if (rif == MLXSW_SP_RIF_MAX)
+		return -ERANGE;
+
+	err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
+	if (!r) {
+		err = -ENOMEM;
+		goto err_rif_alloc;
+	}
+
+	f->r = r;
+	mlxsw_sp->rifs[rif] = r;
+
+	netdev_dbg(l3_dev, "RIF=%d created\n", rif);
+
+	return 0;
+
+err_rif_alloc:
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
+err_rif_fdb_op:
+	mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+	return err;
+}
+
+void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_rif *r)
+{
+	struct net_device *l3_dev = r->dev;
+	struct mlxsw_sp_fid *f = r->f;
+	u16 rif = r->rif;
+
+	mlxsw_sp->rifs[rif] = NULL;
+	f->r = NULL;
+
+	kfree(r);
+
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
+
+	mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+
+	netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif);
+}
+
+static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
+					  struct net_device *br_dev,
+					  unsigned long event)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
+	struct mlxsw_sp_fid *f;
+
+	/* FID can either be an actual FID if the L3 device is the
+	 * VLAN-aware bridge or a VLAN device on top. Otherwise, the
+	 * L3 device is a VLAN-unaware bridge and we get a vFID.
+	 */
+	f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
+	if (WARN_ON(!f))
+		return -EINVAL;
+
+	switch (event) {
+	case NETDEV_UP:
+		return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
+	case NETDEV_DOWN:
+		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+		break;
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
+					unsigned long event)
+{
+	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
+	u16 vid = vlan_dev_vlan_id(vlan_dev);
+
+	if (mlxsw_sp_port_dev_check(real_dev))
+		return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
+						     vid);
+	else if (netif_is_lag_master(real_dev))
+		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
+						     vid);
+	else if (netif_is_bridge_master(real_dev) &&
+		 mlxsw_sp->master_bridge.dev == real_dev)
+		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
+						      event);
+
+	return 0;
+}
+
+static int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
+				   unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
+	struct net_device *dev = ifa->ifa_dev->dev;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *r;
+	int err = 0;
+
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		goto out;
+
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!mlxsw_sp_rif_should_config(r, event))
+		goto out;
+
+	if (mlxsw_sp_port_dev_check(dev))
+		err = mlxsw_sp_inetaddr_port_event(dev, event);
+	else if (netif_is_lag_master(dev))
+		err = mlxsw_sp_inetaddr_lag_event(dev, event);
+	else if (netif_is_bridge_master(dev))
+		err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
+	else if (is_vlan_dev(dev))
+		err = mlxsw_sp_inetaddr_vlan_event(dev, event);
+
+out:
+	return notifier_from_errno(err);
+}
+
+static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif,
+			     const char *mac, int mtu)
+{
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+	int err;
+
+	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
+	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
+	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
+{
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *r;
+	int err;
+
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		return 0;
+
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!r)
+		return 0;
+
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu);
+	if (err)
+		goto err_rif_edit;
+
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	ether_addr_copy(r->addr, dev->dev_addr);
+	r->mtu = dev->mtu;
+
+	netdev_dbg(dev, "Updated RIF=%d\n", r->rif);
+
+	return 0;
+
+err_rif_fdb_op:
+	mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu);
+err_rif_edit:
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true);
+	return err;
+}
+
+static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port,
+					 u16 fid)
+{
+	if (mlxsw_sp_fid_is_vfid(fid))
+		return mlxsw_sp_port_vport_find_by_fid(lag_port, fid);
+	else
+		return test_bit(fid, lag_port->active_vlans);
+}
+
+static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port,
+					   u16 fid)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	u8 local_port = mlxsw_sp_port->local_port;
+	u16 lag_id = mlxsw_sp_port->lag_id;
+	int i, count = 0;
+
+	if (!mlxsw_sp_port->lagged)
+		return true;
+
+	for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+		struct mlxsw_sp_port *lag_port;
+
+		lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
+		if (!lag_port || lag_port->local_port == local_port)
+			continue;
+		if (mlxsw_sp_lag_port_fid_member(lag_port, fid))
+			count++;
+	}
+
+	return !count;
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+				    u16 fid)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID);
+	mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+	mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl,
+						mlxsw_sp_port->local_port);
+
+	netdev_dbg(mlxsw_sp_port->dev, "FDB flushed using Port=%d, FID=%d\n",
+		   mlxsw_sp_port->local_port, fid);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
 }
 
 static int
-__mlxsw_sp_port_fdb_flush_lagged(const struct mlxsw_sp_port *mlxsw_sp_port)
+mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+				      u16 fid)
 {
-	int err, last_err = 0;
-	u16 vid;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sfdf_pl[MLXSW_REG_SFDF_LEN];
 
-	for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
-		err = mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, vid);
-		if (err)
-			last_err = err;
-	}
+	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID);
+	mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+	mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
 
-	return last_err;
+	netdev_dbg(mlxsw_sp_port->dev, "FDB flushed using LAG ID=%d, FID=%d\n",
+		   mlxsw_sp_port->lag_id, fid);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
 }
 
-static int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port)
+int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid)
 {
-	if (!list_empty(&mlxsw_sp_port->vports_list))
-		if (mlxsw_sp_port->lagged)
-			return __mlxsw_sp_port_fdb_flush_lagged(mlxsw_sp_port);
-		else
-			return __mlxsw_sp_port_fdb_flush(mlxsw_sp_port);
+	if (!mlxsw_sp_port_fdb_should_flush(mlxsw_sp_port, fid))
+		return 0;
+
+	if (mlxsw_sp_port->lagged)
+		return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port,
+							     fid);
 	else
-		if (mlxsw_sp_port->lagged)
-			return mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port);
+		return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid);
+}
+
+static void mlxsw_sp_master_bridge_gone_sync(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_fid *f, *tmp;
+
+	list_for_each_entry_safe(f, tmp, &mlxsw_sp->fids, list)
+		if (--f->ref_count == 0)
+			mlxsw_sp_fid_destroy(mlxsw_sp, f);
 		else
-			return mlxsw_sp_port_fdb_flush_by_port(mlxsw_sp_port);
+			WARN_ON_ONCE(1);
 }
 
-static int mlxsw_sp_vport_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_vport)
+static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp,
+					 struct net_device *br_dev)
 {
-	u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_vport);
-	u16 fid = mlxsw_sp_vfid_to_fid(vfid);
+	return !mlxsw_sp->master_bridge.dev ||
+	       mlxsw_sp->master_bridge.dev == br_dev;
+}
 
-	if (mlxsw_sp_vport->lagged)
-		return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_vport,
-							     fid);
-	else
-		return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_vport, fid);
+static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp,
+				       struct net_device *br_dev)
+{
+	mlxsw_sp->master_bridge.dev = br_dev;
+	mlxsw_sp->master_bridge.ref_count++;
 }
 
-static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
+static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp)
 {
-	return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
+	if (--mlxsw_sp->master_bridge.ref_count == 0) {
+		mlxsw_sp->master_bridge.dev = NULL;
+		/* It's possible upper VLAN devices are still holding
+		 * references to underlying FIDs. Drop the reference
+		 * and release the resources if it was the last one.
+		 * If it wasn't, then something bad happened.
+		 */
+		mlxsw_sp_master_bridge_gone_sync(mlxsw_sp);
+	}
 }
 
-static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port)
+static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
+				     struct net_device *br_dev)
 {
 	struct net_device *dev = mlxsw_sp_port->dev;
 	int err;
@@ -2664,6 +3661,8 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port)
 	if (err)
 		return err;
 
+	mlxsw_sp_master_bridge_inc(mlxsw_sp_port->mlxsw_sp, br_dev);
+
 	mlxsw_sp_port->learning = 1;
 	mlxsw_sp_port->learning_sync = 1;
 	mlxsw_sp_port->uc_flood = 1;
@@ -2672,16 +3671,14 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port)
 	return 0;
 }
 
-static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
-				      bool flush_fdb)
+static void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct net_device *dev = mlxsw_sp_port->dev;
 
-	if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port))
-		netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
-
 	mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
 
+	mlxsw_sp_master_bridge_dec(mlxsw_sp_port->mlxsw_sp);
+
 	mlxsw_sp_port->learning = 0;
 	mlxsw_sp_port->learning_sync = 0;
 	mlxsw_sp_port->uc_flood = 0;
@@ -2690,28 +3687,7 @@ static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
 	/* Add implicit VLAN interface in the device, so that untagged
 	 * packets will be classified to the default vFID.
 	 */
-	return mlxsw_sp_port_add_vid(dev, 0, 1);
-}
-
-static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp,
-					 struct net_device *br_dev)
-{
-	return !mlxsw_sp->master_bridge.dev ||
-	       mlxsw_sp->master_bridge.dev == br_dev;
-}
-
-static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp,
-				       struct net_device *br_dev)
-{
-	mlxsw_sp->master_bridge.dev = br_dev;
-	mlxsw_sp->master_bridge.ref_count++;
-}
-
-static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp,
-				       struct net_device *br_dev)
-{
-	if (--mlxsw_sp->master_bridge.ref_count == 0)
-		mlxsw_sp->master_bridge.dev = NULL;
+	mlxsw_sp_port_add_vid(dev, 0, 1);
 }
 
 static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
@@ -2827,6 +3803,45 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
 	return -EBUSY;
 }
 
+static void
+mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
+				  u16 lag_id)
+{
+	struct mlxsw_sp_port *mlxsw_sp_vport;
+	struct mlxsw_sp_fid *f;
+
+	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
+	if (WARN_ON(!mlxsw_sp_vport))
+		return;
+
+	/* If vPort is assigned a RIF, then leave it since it's no
+	 * longer valid.
+	 */
+	f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+	if (f)
+		f->leave(mlxsw_sp_vport);
+
+	mlxsw_sp_vport->lag_id = lag_id;
+	mlxsw_sp_vport->lagged = 1;
+}
+
+static void
+mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp_port *mlxsw_sp_vport;
+	struct mlxsw_sp_fid *f;
+
+	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
+	if (WARN_ON(!mlxsw_sp_vport))
+		return;
+
+	f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+	if (f)
+		f->leave(mlxsw_sp_vport);
+
+	mlxsw_sp_vport->lagged = 0;
+}
+
 static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
 				  struct net_device *lag_dev)
 {
@@ -2862,6 +3877,9 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
 	mlxsw_sp_port->lag_id = lag_id;
 	mlxsw_sp_port->lagged = 1;
 	lag->ref_count++;
+
+	mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id);
+
 	return 0;
 
 err_col_port_enable:
@@ -2872,65 +3890,35 @@ err_col_port_add:
 	return err;
 }
 
-static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-				       struct net_device *br_dev,
-				       bool flush_fdb);
-
-static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
-				   struct net_device *lag_dev)
+static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+				    struct net_device *lag_dev)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	struct mlxsw_sp_port *mlxsw_sp_vport;
-	struct mlxsw_sp_upper *lag;
 	u16 lag_id = mlxsw_sp_port->lag_id;
-	int err;
+	struct mlxsw_sp_upper *lag;
 
 	if (!mlxsw_sp_port->lagged)
-		return 0;
+		return;
 	lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
 	WARN_ON(lag->ref_count == 0);
 
-	err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id);
-	if (err)
-		return err;
-	err = mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
-	if (err)
-		return err;
-
-	/* In case we leave a LAG device that has bridges built on top,
-	 * then their teardown sequence is never issued and we need to
-	 * invoke the necessary cleanup routines ourselves.
-	 */
-	list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
-			    vport.list) {
-		struct net_device *br_dev;
-
-		if (!mlxsw_sp_vport->bridged)
-			continue;
-
-		br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
-		mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, false);
-	}
+	mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id);
+	mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
 
 	if (mlxsw_sp_port->bridged) {
 		mlxsw_sp_port_active_vlans_del(mlxsw_sp_port);
-		mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false);
-		mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
+		mlxsw_sp_port_bridge_leave(mlxsw_sp_port);
 	}
 
-	if (lag->ref_count == 1) {
-		if (mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port))
-			netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
-		err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
-		if (err)
-			return err;
-	}
+	if (lag->ref_count == 1)
+		mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
 
 	mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id,
 				     mlxsw_sp_port->local_port);
 	mlxsw_sp_port->lagged = 0;
 	lag->ref_count--;
-	return 0;
+
+	mlxsw_sp_port_pvid_vport_lag_leave(mlxsw_sp_port);
 }
 
 static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -2979,42 +3967,25 @@ static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port,
 	u16 vid = vlan_dev_vlan_id(vlan_dev);
 
 	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
-	if (!mlxsw_sp_vport) {
-		WARN_ON(!mlxsw_sp_vport);
+	if (WARN_ON(!mlxsw_sp_vport))
 		return -EINVAL;
-	}
 
 	mlxsw_sp_vport->dev = vlan_dev;
 
 	return 0;
 }
 
-static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port,
-				     struct net_device *vlan_dev)
+static void mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct net_device *vlan_dev)
 {
 	struct mlxsw_sp_port *mlxsw_sp_vport;
 	u16 vid = vlan_dev_vlan_id(vlan_dev);
 
 	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
-	if (!mlxsw_sp_vport) {
-		WARN_ON(!mlxsw_sp_vport);
-		return -EINVAL;
-	}
-
-	/* When removing a VLAN device while still bridged we should first
-	 * remove it from the bridge, as we receive the bridge's notification
-	 * when the vPort is already gone.
-	 */
-	if (mlxsw_sp_vport->bridged) {
-		struct net_device *br_dev;
-
-		br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
-		mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, true);
-	}
+	if (WARN_ON(!mlxsw_sp_vport))
+		return;
 
 	mlxsw_sp_vport->dev = mlxsw_sp_port->dev;
-
-	return 0;
 }
 
 static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
@@ -3024,7 +3995,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct net_device *upper_dev;
 	struct mlxsw_sp *mlxsw_sp;
-	int err;
+	int err = 0;
 
 	mlxsw_sp_port = netdev_priv(dev);
 	mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
@@ -3033,73 +4004,56 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
-		if (!info->master || !info->linking)
+		if (!is_vlan_dev(upper_dev) &&
+		    !netif_is_lag_master(upper_dev) &&
+		    !netif_is_bridge_master(upper_dev))
+			return -EINVAL;
+		if (!info->linking)
 			break;
 		/* HW limitation forbids to put ports to multiple bridges. */
 		if (netif_is_bridge_master(upper_dev) &&
 		    !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev))
-			return NOTIFY_BAD;
+			return -EINVAL;
 		if (netif_is_lag_master(upper_dev) &&
 		    !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
 					       info->upper_info))
-			return NOTIFY_BAD;
+			return -EINVAL;
+		if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev))
+			return -EINVAL;
+		if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
+		    !netif_is_lag_master(vlan_dev_real_dev(upper_dev)))
+			return -EINVAL;
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
 		if (is_vlan_dev(upper_dev)) {
-			if (info->linking) {
+			if (info->linking)
 				err = mlxsw_sp_port_vlan_link(mlxsw_sp_port,
 							      upper_dev);
-				if (err) {
-					netdev_err(dev, "Failed to link VLAN device\n");
-					return NOTIFY_BAD;
-				}
-			} else {
-				err = mlxsw_sp_port_vlan_unlink(mlxsw_sp_port,
-								upper_dev);
-				if (err) {
-					netdev_err(dev, "Failed to unlink VLAN device\n");
-					return NOTIFY_BAD;
-				}
-			}
+			else
+				 mlxsw_sp_port_vlan_unlink(mlxsw_sp_port,
+							   upper_dev);
 		} else if (netif_is_bridge_master(upper_dev)) {
-			if (info->linking) {
-				err = mlxsw_sp_port_bridge_join(mlxsw_sp_port);
-				if (err) {
-					netdev_err(dev, "Failed to join bridge\n");
-					return NOTIFY_BAD;
-				}
-				mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev);
-			} else {
-				err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
-								 true);
-				mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev);
-				if (err) {
-					netdev_err(dev, "Failed to leave bridge\n");
-					return NOTIFY_BAD;
-				}
-			}
+			if (info->linking)
+				err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
+								upper_dev);
+			else
+				mlxsw_sp_port_bridge_leave(mlxsw_sp_port);
 		} else if (netif_is_lag_master(upper_dev)) {
-			if (info->linking) {
+			if (info->linking)
 				err = mlxsw_sp_port_lag_join(mlxsw_sp_port,
 							     upper_dev);
-				if (err) {
-					netdev_err(dev, "Failed to join link aggregation\n");
-					return NOTIFY_BAD;
-				}
-			} else {
-				err = mlxsw_sp_port_lag_leave(mlxsw_sp_port,
-							      upper_dev);
-				if (err) {
-					netdev_err(dev, "Failed to leave link aggregation\n");
-					return NOTIFY_BAD;
-				}
-			}
+			else
+				mlxsw_sp_port_lag_leave(mlxsw_sp_port,
+							upper_dev);
+		} else {
+			err = -EINVAL;
+			WARN_ON(1);
 		}
 		break;
 	}
 
-	return NOTIFY_DONE;
+	return err;
 }
 
 static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev,
@@ -3123,7 +4077,7 @@ static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev,
 		break;
 	}
 
-	return NOTIFY_DONE;
+	return 0;
 }
 
 static int mlxsw_sp_netdevice_port_event(struct net_device *dev,
@@ -3137,7 +4091,7 @@ static int mlxsw_sp_netdevice_port_event(struct net_device *dev,
 		return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr);
 	}
 
-	return NOTIFY_DONE;
+	return 0;
 }
 
 static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
@@ -3150,218 +4104,230 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
 	netdev_for_each_lower_dev(lag_dev, dev, iter) {
 		if (mlxsw_sp_port_dev_check(dev)) {
 			ret = mlxsw_sp_netdevice_port_event(dev, event, ptr);
-			if (ret == NOTIFY_BAD)
+			if (ret)
 				return ret;
 		}
 	}
 
-	return NOTIFY_DONE;
+	return 0;
 }
 
-static struct mlxsw_sp_vfid *
-mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp,
-		      const struct net_device *br_dev)
+static int mlxsw_sp_master_bridge_vlan_link(struct mlxsw_sp *mlxsw_sp,
+					    struct net_device *vlan_dev)
 {
-	struct mlxsw_sp_vfid *vfid;
+	u16 fid = vlan_dev_vlan_id(vlan_dev);
+	struct mlxsw_sp_fid *f;
 
-	list_for_each_entry(vfid, &mlxsw_sp->br_vfids.list, list) {
-		if (vfid->br_dev == br_dev)
-			return vfid;
+	f = mlxsw_sp_fid_find(mlxsw_sp, fid);
+	if (!f) {
+		f = mlxsw_sp_fid_create(mlxsw_sp, fid);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
 	}
 
-	return NULL;
+	f->ref_count++;
+
+	return 0;
+}
+
+static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp,
+					       struct net_device *vlan_dev)
+{
+	u16 fid = vlan_dev_vlan_id(vlan_dev);
+	struct mlxsw_sp_fid *f;
+
+	f = mlxsw_sp_fid_find(mlxsw_sp, fid);
+	if (f && f->r)
+		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+	if (f && --f->ref_count == 0)
+		mlxsw_sp_fid_destroy(mlxsw_sp, f);
 }
 
-static u16 mlxsw_sp_vfid_to_br_vfid(u16 vfid)
+static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
+					   unsigned long event, void *ptr)
 {
-	return vfid - MLXSW_SP_VFID_PORT_MAX;
+	struct netdev_notifier_changeupper_info *info;
+	struct net_device *upper_dev;
+	struct mlxsw_sp *mlxsw_sp;
+	int err;
+
+	mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	if (!mlxsw_sp)
+		return 0;
+	if (br_dev != mlxsw_sp->master_bridge.dev)
+		return 0;
+
+	info = ptr;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (!is_vlan_dev(upper_dev))
+			break;
+		if (info->linking) {
+			err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp,
+							       upper_dev);
+			if (err)
+				return err;
+		} else {
+			mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev);
+		}
+		break;
+	}
+
+	return 0;
 }
 
-static u16 mlxsw_sp_br_vfid_to_vfid(u16 br_vfid)
+static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp)
 {
-	return MLXSW_SP_VFID_PORT_MAX + br_vfid;
+	return find_first_zero_bit(mlxsw_sp->vfids.mapped,
+				   MLXSW_SP_VFID_MAX);
 }
 
-static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create)
 {
-	return find_first_zero_bit(mlxsw_sp->br_vfids.mapped,
-				   MLXSW_SP_VFID_BR_MAX);
+	char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+	mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
 }
 
-static struct mlxsw_sp_vfid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp,
-						     struct net_device *br_dev)
+static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
+
+static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp,
+						 struct net_device *br_dev)
 {
 	struct device *dev = mlxsw_sp->bus_info->dev;
-	struct mlxsw_sp_vfid *vfid;
-	u16 n_vfid;
+	struct mlxsw_sp_fid *f;
+	u16 vfid, fid;
 	int err;
 
-	n_vfid = mlxsw_sp_br_vfid_to_vfid(mlxsw_sp_avail_br_vfid_get(mlxsw_sp));
-	if (n_vfid == MLXSW_SP_VFID_MAX) {
+	vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp);
+	if (vfid == MLXSW_SP_VFID_MAX) {
 		dev_err(dev, "No available vFIDs\n");
 		return ERR_PTR(-ERANGE);
 	}
 
-	err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid);
+	fid = mlxsw_sp_vfid_to_fid(vfid);
+	err = mlxsw_sp_vfid_op(mlxsw_sp, fid, true);
 	if (err) {
-		dev_err(dev, "Failed to create vFID=%d\n", n_vfid);
+		dev_err(dev, "Failed to create FID=%d\n", fid);
 		return ERR_PTR(err);
 	}
 
-	vfid = kzalloc(sizeof(*vfid), GFP_KERNEL);
-	if (!vfid)
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
 		goto err_allocate_vfid;
 
-	vfid->vfid = n_vfid;
-	vfid->br_dev = br_dev;
+	f->leave = mlxsw_sp_vport_vfid_leave;
+	f->fid = fid;
+	f->dev = br_dev;
 
-	list_add(&vfid->list, &mlxsw_sp->br_vfids.list);
-	set_bit(mlxsw_sp_vfid_to_br_vfid(n_vfid), mlxsw_sp->br_vfids.mapped);
+	list_add(&f->list, &mlxsw_sp->vfids.list);
+	set_bit(vfid, mlxsw_sp->vfids.mapped);
 
-	return vfid;
+	return f;
 
 err_allocate_vfid:
-	__mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid);
+	mlxsw_sp_vfid_op(mlxsw_sp, fid, false);
 	return ERR_PTR(-ENOMEM);
 }
 
-static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
-				     struct mlxsw_sp_vfid *vfid)
+static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fid *f)
 {
-	u16 br_vfid = mlxsw_sp_vfid_to_br_vfid(vfid->vfid);
+	u16 vfid = mlxsw_sp_fid_to_vfid(f->fid);
+	u16 fid = f->fid;
 
-	clear_bit(br_vfid, mlxsw_sp->br_vfids.mapped);
-	list_del(&vfid->list);
+	clear_bit(vfid, mlxsw_sp->vfids.mapped);
+	list_del(&f->list);
 
-	__mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid);
+	if (f->r)
+		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
 
-	kfree(vfid);
+	kfree(f);
+
+	mlxsw_sp_vfid_op(mlxsw_sp, fid, false);
 }
 
-static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-				       struct net_device *br_dev,
-				       bool flush_fdb)
+static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
+				  bool valid)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
 	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-	struct net_device *dev = mlxsw_sp_vport->dev;
-	struct mlxsw_sp_vfid *vfid, *new_vfid;
-	int err;
-
-	vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev);
-	if (!vfid) {
-		WARN_ON(!vfid);
-		return -EINVAL;
-	}
-
-	/* We need a vFID to go back to after leaving the bridge's vFID. */
-	new_vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid);
-	if (!new_vfid) {
-		new_vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid);
-		if (IS_ERR(new_vfid)) {
-			netdev_err(dev, "Failed to create vFID for VID=%d\n",
-				   vid);
-			return PTR_ERR(new_vfid);
-		}
-	}
 
-	/* Invalidate existing {Port, VID} to vFID mapping and create a new
-	 * one for the new vFID.
-	 */
-	err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
-					   MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
-					   false,
-					   mlxsw_sp_vfid_to_fid(vfid->vfid),
-					   vid);
-	if (err) {
-		netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n",
-			   vfid->vfid);
-		goto err_port_vid_to_fid_invalidate;
-	}
+	return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, mt, valid, fid,
+					    vid);
+}
 
-	err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
-					   MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
-					   true,
-					   mlxsw_sp_vfid_to_fid(new_vfid->vfid),
-					   vid);
-	if (err) {
-		netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n",
-			   new_vfid->vfid);
-		goto err_port_vid_to_fid_validate;
-	}
+static int mlxsw_sp_vport_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport,
+				    struct net_device *br_dev)
+{
+	struct mlxsw_sp_fid *f;
+	int err;
 
-	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
-	if (err) {
-		netdev_err(dev, "Failed to disable learning\n");
-		goto err_port_vid_learning_set;
+	f = mlxsw_sp_vfid_find(mlxsw_sp_vport->mlxsw_sp, br_dev);
+	if (!f) {
+		f = mlxsw_sp_vfid_create(mlxsw_sp_vport->mlxsw_sp, br_dev);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
 	}
 
-	err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false,
-				       false);
-	if (err) {
-		netdev_err(dev, "Failed clear to clear flooding\n");
+	err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, true);
+	if (err)
 		goto err_vport_flood_set;
-	}
-
-	err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
-					  MLXSW_REG_SPMS_STATE_FORWARDING);
-	if (err) {
-		netdev_err(dev, "Failed to set STP state\n");
-		goto err_port_stp_state_set;
-	}
 
-	if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport))
-		netdev_err(dev, "Failed to flush FDB\n");
+	err = mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, true);
+	if (err)
+		goto err_vport_fid_map;
 
-	/* Switch between the vFIDs and destroy the old one if needed. */
-	new_vfid->nr_vports++;
-	mlxsw_sp_vport->vport.vfid = new_vfid;
-	vfid->nr_vports--;
-	if (!vfid->nr_vports)
-		mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid);
+	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, f);
+	f->ref_count++;
 
-	mlxsw_sp_vport->learning = 0;
-	mlxsw_sp_vport->learning_sync = 0;
-	mlxsw_sp_vport->uc_flood = 0;
-	mlxsw_sp_vport->bridged = 0;
+	netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", f->fid);
 
 	return 0;
 
-err_port_stp_state_set:
+err_vport_fid_map:
+	mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false);
 err_vport_flood_set:
-err_port_vid_learning_set:
-err_port_vid_to_fid_validate:
-err_port_vid_to_fid_invalidate:
-	/* Rollback vFID only if new. */
-	if (!new_vfid->nr_vports)
-		mlxsw_sp_vfid_destroy(mlxsw_sp, new_vfid);
+	if (!f->ref_count)
+		mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f);
 	return err;
 }
 
+static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+
+	netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
+
+	mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, false);
+
+	mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false);
+
+	mlxsw_sp_port_fdb_flush(mlxsw_sp_vport, f->fid);
+
+	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
+	if (--f->ref_count == 0)
+		mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f);
+}
+
 static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport,
 				      struct net_device *br_dev)
 {
-	struct mlxsw_sp_vfid *old_vfid = mlxsw_sp_vport->vport.vfid;
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
 	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
 	struct net_device *dev = mlxsw_sp_vport->dev;
-	struct mlxsw_sp_vfid *vfid;
 	int err;
 
-	vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev);
-	if (!vfid) {
-		vfid = mlxsw_sp_br_vfid_create(mlxsw_sp, br_dev);
-		if (IS_ERR(vfid)) {
-			netdev_err(dev, "Failed to create bridge vFID\n");
-			return PTR_ERR(vfid);
-		}
-	}
+	if (f && !WARN_ON(!f->leave))
+		f->leave(mlxsw_sp_vport);
 
-	err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, true, false);
+	err = mlxsw_sp_vport_vfid_join(mlxsw_sp_vport, br_dev);
 	if (err) {
-		netdev_err(dev, "Failed to setup flooding for vFID=%d\n",
-			   vfid->vfid);
-		goto err_port_flood_set;
+		netdev_err(dev, "Failed to join vFID\n");
+		return err;
 	}
 
 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
@@ -3370,38 +4336,6 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport,
 		goto err_port_vid_learning_set;
 	}
 
-	/* We need to invalidate existing {Port, VID} to vFID mapping and
-	 * create a new one for the bridge's vFID.
-	 */
-	err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
-					   MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
-					   false,
-					   mlxsw_sp_vfid_to_fid(old_vfid->vfid),
-					   vid);
-	if (err) {
-		netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n",
-			   old_vfid->vfid);
-		goto err_port_vid_to_fid_invalidate;
-	}
-
-	err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
-					   MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
-					   true,
-					   mlxsw_sp_vfid_to_fid(vfid->vfid),
-					   vid);
-	if (err) {
-		netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n",
-			   vfid->vfid);
-		goto err_port_vid_to_fid_validate;
-	}
-
-	/* Switch between the vFIDs and destroy the old one if needed. */
-	vfid->nr_vports++;
-	mlxsw_sp_vport->vport.vfid = vfid;
-	old_vfid->nr_vports--;
-	if (!old_vfid->nr_vports)
-		mlxsw_sp_vfid_destroy(mlxsw_sp, old_vfid);
-
 	mlxsw_sp_vport->learning = 1;
 	mlxsw_sp_vport->learning_sync = 1;
 	mlxsw_sp_vport->uc_flood = 1;
@@ -3409,20 +4343,25 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport,
 
 	return 0;
 
-err_port_vid_to_fid_validate:
-	mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
-				     MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false,
-				     mlxsw_sp_vfid_to_fid(old_vfid->vfid), vid);
-err_port_vid_to_fid_invalidate:
-	mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
 err_port_vid_learning_set:
-	mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, false);
-err_port_flood_set:
-	if (!vfid->nr_vports)
-		mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid);
+	mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport);
 	return err;
 }
 
+static void mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+
+	mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
+
+	mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport);
+
+	mlxsw_sp_vport->learning = 0;
+	mlxsw_sp_vport->learning_sync = 0;
+	mlxsw_sp_vport->uc_flood = 0;
+	mlxsw_sp_vport->bridged = 0;
+}
+
 static bool
 mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port,
 				  const struct net_device *br_dev)
@@ -3431,7 +4370,9 @@ mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port,
 
 	list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
 			    vport.list) {
-		if (mlxsw_sp_vport_br_get(mlxsw_sp_vport) == br_dev)
+		struct net_device *dev = mlxsw_sp_vport_dev_get(mlxsw_sp_vport);
+
+		if (dev && dev == br_dev)
 			return false;
 	}
 
@@ -3446,56 +4387,39 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
 	struct netdev_notifier_changeupper_info *info = ptr;
 	struct mlxsw_sp_port *mlxsw_sp_vport;
 	struct net_device *upper_dev;
-	int err;
+	int err = 0;
 
 	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
 
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
-		if (!info->master || !info->linking)
-			break;
 		if (!netif_is_bridge_master(upper_dev))
-			return NOTIFY_BAD;
+			return -EINVAL;
+		if (!info->linking)
+			break;
 		/* We can't have multiple VLAN interfaces configured on
 		 * the same port and being members in the same bridge.
 		 */
 		if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port,
 						       upper_dev))
-			return NOTIFY_BAD;
+			return -EINVAL;
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
-		if (!info->master)
-			break;
 		if (info->linking) {
-			if (!mlxsw_sp_vport) {
-				WARN_ON(!mlxsw_sp_vport);
-				return NOTIFY_BAD;
-			}
+			if (WARN_ON(!mlxsw_sp_vport))
+				return -EINVAL;
 			err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport,
 							 upper_dev);
-			if (err) {
-				netdev_err(dev, "Failed to join bridge\n");
-				return NOTIFY_BAD;
-			}
 		} else {
-			/* We ignore bridge's unlinking notifications if vPort
-			 * is gone, since we already left the bridge when the
-			 * VLAN device was unlinked from the real device.
-			 */
 			if (!mlxsw_sp_vport)
-				return NOTIFY_DONE;
-			err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport,
-							  upper_dev, true);
-			if (err) {
-				netdev_err(dev, "Failed to leave bridge\n");
-				return NOTIFY_BAD;
-			}
+				return 0;
+			mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport);
 		}
 	}
 
-	return NOTIFY_DONE;
+	return err;
 }
 
 static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev,
@@ -3510,12 +4434,12 @@ static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev,
 		if (mlxsw_sp_port_dev_check(dev)) {
 			ret = mlxsw_sp_netdevice_vport_event(dev, event, ptr,
 							     vid);
-			if (ret == NOTIFY_BAD)
+			if (ret)
 				return ret;
 		}
 	}
 
-	return NOTIFY_DONE;
+	return 0;
 }
 
 static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
@@ -3531,35 +4455,44 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
 		return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr,
 							  vid);
 
-	return NOTIFY_DONE;
+	return 0;
 }
 
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	int err = 0;
 
-	if (mlxsw_sp_port_dev_check(dev))
-		return mlxsw_sp_netdevice_port_event(dev, event, ptr);
-
-	if (netif_is_lag_master(dev))
-		return mlxsw_sp_netdevice_lag_event(dev, event, ptr);
-
-	if (is_vlan_dev(dev))
-		return mlxsw_sp_netdevice_vlan_event(dev, event, ptr);
+	if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
+		err = mlxsw_sp_netdevice_router_port_event(dev);
+	else if (mlxsw_sp_port_dev_check(dev))
+		err = mlxsw_sp_netdevice_port_event(dev, event, ptr);
+	else if (netif_is_lag_master(dev))
+		err = mlxsw_sp_netdevice_lag_event(dev, event, ptr);
+	else if (netif_is_bridge_master(dev))
+		err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr);
+	else if (is_vlan_dev(dev))
+		err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr);
 
-	return NOTIFY_DONE;
+	return notifier_from_errno(err);
 }
 
 static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = {
 	.notifier_call = mlxsw_sp_netdevice_event,
 };
 
+static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_inetaddr_event,
+	.priority = 10,	/* Must be called before FIB notifier block */
+};
+
 static int __init mlxsw_sp_module_init(void)
 {
 	int err;
 
 	register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
+	register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	err = mlxsw_core_driver_register(&mlxsw_sp_driver);
 	if (err)
 		goto err_core_driver_register;
@@ -3573,6 +4506,7 @@ err_core_driver_register:
 static void __exit mlxsw_sp_module_exit(void)
 {
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
+	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 13b30eaa13d4..f69aa37d1521 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -39,19 +39,22 @@
 
 #include <linux/types.h>
 #include <linux/netdevice.h>
+#include <linux/rhashtable.h>
 #include <linux/bitops.h>
 #include <linux/if_vlan.h>
 #include <linux/list.h>
 #include <linux/dcbnl.h>
+#include <linux/in6.h>
 #include <net/switchdev.h>
 
 #include "port.h"
 #include "core.h"
 
 #define MLXSW_SP_VFID_BASE VLAN_N_VID
-#define MLXSW_SP_VFID_PORT_MAX 512	/* Non-bridged VLAN interfaces */
-#define MLXSW_SP_VFID_BR_MAX 6144	/* Bridged VLAN interfaces */
-#define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX)
+#define MLXSW_SP_VFID_MAX 6656	/* Bridged VLAN interfaces */
+
+#define MLXSW_SP_RFID_BASE 15360
+#define MLXSW_SP_RIF_MAX 800
 
 #define MLXSW_SP_LAG_MAX 64
 #define MLXSW_SP_PORT_PER_LAG_MAX 16
@@ -60,6 +63,12 @@
 
 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4
 
+#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */
+#define MLXSW_SP_LPM_TREE_MAX 22
+#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN)
+
+#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256
+
 #define MLXSW_SP_PORT_BASE_SPEED 25000	/* Mb/s */
 
 #define MLXSW_SP_BYTES_PER_CELL 96
@@ -67,6 +76,10 @@
 #define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL)
 #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL)
 
+#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
+#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */
+#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */
+
 /* Maximum delay buffer needed in case of PAUSE frames, in cells.
  * Assumes 100m cable and maximum MTU.
  */
@@ -87,12 +100,22 @@ struct mlxsw_sp_upper {
 	unsigned int ref_count;
 };
 
-struct mlxsw_sp_vfid {
+struct mlxsw_sp_fid {
+	void (*leave)(struct mlxsw_sp_port *mlxsw_sp_vport);
 	struct list_head list;
-	u16 nr_vports;
-	u16 vfid;	/* Starting at 0 */
-	struct net_device *br_dev;
-	u16 vid;
+	unsigned int ref_count;
+	struct net_device *dev;
+	struct mlxsw_sp_rif *r;
+	u16 fid;
+};
+
+struct mlxsw_sp_rif {
+	struct net_device *dev;
+	unsigned int ref_count;
+	struct mlxsw_sp_fid *f;
+	unsigned char addr[ETH_ALEN];
+	int mtu;
+	u16 rif;
 };
 
 struct mlxsw_sp_mid {
@@ -115,7 +138,17 @@ static inline u16 mlxsw_sp_fid_to_vfid(u16 fid)
 
 static inline bool mlxsw_sp_fid_is_vfid(u16 fid)
 {
-	return fid >= MLXSW_SP_VFID_BASE;
+	return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_RFID_BASE;
+}
+
+static inline bool mlxsw_sp_fid_is_rfid(u16 fid)
+{
+	return fid >= MLXSW_SP_RFID_BASE;
+}
+
+static inline u16 mlxsw_sp_rif_sp_to_fid(u16 rif)
+{
+	return MLXSW_SP_RFID_BASE + rif;
 }
 
 struct mlxsw_sp_sb_pr {
@@ -152,20 +185,97 @@ struct mlxsw_sp_sb {
 	} ports[MLXSW_PORT_MAX_PORTS];
 };
 
-struct mlxsw_sp {
+#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE)
+
+struct mlxsw_sp_prefix_usage {
+	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
+};
+
+enum mlxsw_sp_l3proto {
+	MLXSW_SP_L3_PROTO_IPV4,
+	MLXSW_SP_L3_PROTO_IPV6,
+};
+
+struct mlxsw_sp_lpm_tree {
+	u8 id; /* tree ID */
+	unsigned int ref_count;
+	enum mlxsw_sp_l3proto proto;
+	struct mlxsw_sp_prefix_usage prefix_usage;
+};
+
+struct mlxsw_sp_fib;
+
+struct mlxsw_sp_vr {
+	u16 id; /* virtual router ID */
+	bool used;
+	enum mlxsw_sp_l3proto proto;
+	u32 tb_id; /* kernel fib table id */
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	struct mlxsw_sp_fib *fib;
+};
+
+enum mlxsw_sp_span_type {
+	MLXSW_SP_SPAN_EGRESS,
+	MLXSW_SP_SPAN_INGRESS
+};
+
+struct mlxsw_sp_span_inspected_port {
+	struct list_head list;
+	enum mlxsw_sp_span_type type;
+	u8 local_port;
+};
+
+struct mlxsw_sp_span_entry {
+	u8 local_port;
+	bool used;
+	struct list_head bound_ports_list;
+	int ref_count;
+	int id;
+};
+
+enum mlxsw_sp_port_mall_action_type {
+	MLXSW_SP_PORT_MALL_MIRROR,
+};
+
+struct mlxsw_sp_port_mall_mirror_tc_entry {
+	u8 to_local_port;
+	bool ingress;
+};
+
+struct mlxsw_sp_port_mall_tc_entry {
+	struct list_head list;
+	unsigned long cookie;
+	enum mlxsw_sp_port_mall_action_type type;
+	union {
+		struct mlxsw_sp_port_mall_mirror_tc_entry mirror;
+	};
+};
+
+struct mlxsw_sp_router {
+	struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
+	struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX];
+	struct rhashtable neigh_ht;
 	struct {
-		struct list_head list;
-		unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_PORT_MAX)];
-	} port_vfids;
+		struct delayed_work dw;
+		unsigned long interval;	/* ms */
+	} neighs_update;
+	struct delayed_work nexthop_probe_dw;
+#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
+	struct list_head nexthop_group_list;
+	struct list_head nexthop_neighs_list;
+};
+
+struct mlxsw_sp {
 	struct {
 		struct list_head list;
-		unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_BR_MAX)];
-	} br_vfids;
+		DECLARE_BITMAP(mapped, MLXSW_SP_VFID_MAX);
+	} vfids;
 	struct {
 		struct list_head list;
-		unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_MID_MAX)];
+		DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX);
 	} br_mids;
-	unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)];
+	struct list_head fids;	/* VLAN-aware bridge FIDs */
+	struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX];
 	struct mlxsw_sp_port **ports;
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
@@ -183,6 +293,15 @@ struct mlxsw_sp {
 	struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
 	u8 port_to_module[MLXSW_PORT_MAX_PORTS];
 	struct mlxsw_sp_sb sb;
+	struct mlxsw_sp_router router;
+	struct {
+		DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
+	} kvdl;
+
+	struct {
+		struct mlxsw_sp_span_entry *entries;
+		int entries_count;
+	} span;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -217,7 +336,7 @@ struct mlxsw_sp_port {
 	u16 lag_id;
 	struct {
 		struct list_head list;
-		struct mlxsw_sp_vfid *vfid;
+		struct mlxsw_sp_fid *f;
 		u16 vid;
 	} vport;
 	struct {
@@ -239,8 +358,13 @@ struct mlxsw_sp_port {
 	unsigned long *untagged_vlans;
 	/* VLAN interfaces */
 	struct list_head vports_list;
+	/* TC handles */
+	struct list_head mall_tc_list;
 };
 
+struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
+void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
+
 static inline bool
 mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
 {
@@ -259,28 +383,38 @@ mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index)
 	return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL;
 }
 
+static inline u16
+mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+	return mlxsw_sp_vport->vport.vid;
+}
+
 static inline bool
 mlxsw_sp_port_is_vport(const struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	return mlxsw_sp_port->vport.vfid;
+	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
+
+	return vid != 0;
 }
 
-static inline struct net_device *
-mlxsw_sp_vport_br_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
+static inline void mlxsw_sp_vport_fid_set(struct mlxsw_sp_port *mlxsw_sp_vport,
+					  struct mlxsw_sp_fid *f)
 {
-	return mlxsw_sp_vport->vport.vfid->br_dev;
+	mlxsw_sp_vport->vport.f = f;
 }
 
-static inline u16
-mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
+static inline struct mlxsw_sp_fid *
+mlxsw_sp_vport_fid_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
 {
-	return mlxsw_sp_vport->vport.vid;
+	return mlxsw_sp_vport->vport.f;
 }
 
-static inline u16
-mlxsw_sp_vport_vfid_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
+static inline struct net_device *
+mlxsw_sp_vport_dev_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
 {
-	return mlxsw_sp_vport->vport.vfid->vfid;
+	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+
+	return f ? f->dev : NULL;
 }
 
 static inline struct mlxsw_sp_port *
@@ -298,20 +432,60 @@ mlxsw_sp_port_vport_find(const struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
 }
 
 static inline struct mlxsw_sp_port *
-mlxsw_sp_port_vport_find_by_vfid(const struct mlxsw_sp_port *mlxsw_sp_port,
-				 u16 vfid)
+mlxsw_sp_port_vport_find_by_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+				u16 fid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_vport;
 
 	list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
 			    vport.list) {
-		if (mlxsw_sp_vport_vfid_get(mlxsw_sp_vport) == vfid)
+		struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+
+		if (f && f->fid == fid)
 			return mlxsw_sp_vport;
 	}
 
 	return NULL;
 }
 
+static inline struct mlxsw_sp_fid *mlxsw_sp_fid_find(struct mlxsw_sp *mlxsw_sp,
+						     u16 fid)
+{
+	struct mlxsw_sp_fid *f;
+
+	list_for_each_entry(f, &mlxsw_sp->fids, list)
+		if (f->fid == fid)
+			return f;
+
+	return NULL;
+}
+
+static inline struct mlxsw_sp_fid *
+mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp,
+		   const struct net_device *br_dev)
+{
+	struct mlxsw_sp_fid *f;
+
+	list_for_each_entry(f, &mlxsw_sp->vfids.list, list)
+		if (f->dev == br_dev)
+			return f;
+
+	return NULL;
+}
+
+static inline struct mlxsw_sp_rif *
+mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+			 const struct net_device *dev)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+		if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
+			return mlxsw_sp->rifs[i];
+
+	return NULL;
+}
+
 enum mlxsw_sp_flood_table {
 	MLXSW_SP_FLOOD_TABLE_UC,
 	MLXSW_SP_FLOOD_TABLE_BM,
@@ -364,12 +538,17 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
 			   u16 vid_end, bool is_member, bool untagged);
 int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 			  u16 vid);
-int mlxsw_sp_port_kill_vid(struct net_device *dev,
-			   __be16 __always_unused proto, u16 vid);
-int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
-			     bool set, bool only_uc);
+int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
+			     bool set);
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
+int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid);
+int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
+			bool adding);
+struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid);
+void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f);
+void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_rif *r);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
 			  bool dwrr, u8 dwrr_weight);
@@ -399,4 +578,19 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 
 #endif
 
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct switchdev_obj_ipv4_fib *fib4,
+			     struct switchdev_trans *trans);
+int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct switchdev_obj_ipv4_fib *fib4);
+int mlxsw_sp_router_neigh_construct(struct net_device *dev,
+				    struct neighbour *n);
+void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
+				   struct neighbour *n);
+
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index a3720a0fad7d..074cdda7b6f3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -194,7 +194,7 @@ static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port)
 
 	mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, 0);
+		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, 0);
 	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
 			       pptb_pl);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 0b323661c0b6..01cfb7512827 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -103,7 +103,8 @@ static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, prio_tc[i]);
+		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, prio_tc[i]);
+
 	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
 			       pptb_pl);
 }
@@ -249,6 +250,7 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev,
 		return err;
 
 	memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets));
+	mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
 
 	return 0;
 }
@@ -351,7 +353,8 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	int err;
 
-	if (mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) {
+	if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) &&
+	    pfc->pfc_en) {
 		netdev_err(dev, "PAUSE frames already enabled on port\n");
 		return -EINVAL;
 	}
@@ -371,6 +374,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 	}
 
 	memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc));
+	mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
new file mode 100644
index 000000000000..ac321e8e5c1a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -0,0 +1,91 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum.h"
+
+#define MLXSW_SP_KVDL_SINGLE_BASE 0
+#define MLXSW_SP_KVDL_SINGLE_SIZE 16384
+#define MLXSW_SP_KVDL_CHUNKS_BASE \
+	(MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
+#define MLXSW_SP_KVDL_CHUNKS_SIZE \
+	(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE)
+#define MLXSW_SP_CHUNK_MAX 32
+
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count)
+{
+	int entry_index;
+	int size;
+	int type_base;
+	int type_size;
+	int type_entries;
+
+	if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) {
+		return -EINVAL;
+	} else if (entry_count == 1) {
+		type_base = MLXSW_SP_KVDL_SINGLE_BASE;
+		type_size = MLXSW_SP_KVDL_SINGLE_SIZE;
+		type_entries = 1;
+	} else {
+		type_base = MLXSW_SP_KVDL_CHUNKS_BASE;
+		type_size = MLXSW_SP_KVDL_CHUNKS_SIZE;
+		type_entries = MLXSW_SP_CHUNK_MAX;
+	}
+
+	entry_index = type_base;
+	size = type_base + type_size;
+	for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) {
+		int i;
+
+		for (i = 0; i < type_entries; i++)
+			set_bit(entry_index + i, mlxsw_sp->kvdl.usage);
+		return entry_index;
+	}
+	return -ENOBUFS;
+}
+
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
+{
+	int type_entries;
+	int i;
+
+	if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE)
+		type_entries = 1;
+	else
+		type_entries = MLXSW_SP_CHUNK_MAX;
+	for (i = 0; i < type_entries; i++)
+		clear_bit(entry_index + i, mlxsw_sp->kvdl.usage);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
new file mode 100644
index 000000000000..81418d629231
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -0,0 +1,1814 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
+ * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/rhashtable.h>
+#include <linux/bitops.h>
+#include <linux/in6.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include <net/arp.h>
+
+#include "spectrum.h"
+#include "core.h"
+#include "reg.h"
+
+#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
+	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
+
+static bool
+mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
+			     struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+	unsigned char prefix;
+
+	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
+		if (!test_bit(prefix, prefix_usage2->b))
+			return false;
+	}
+	return true;
+}
+
+static bool
+mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
+			 struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
+}
+
+static bool
+mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
+{
+	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
+
+	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
+}
+
+static void
+mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
+			  struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
+}
+
+static void
+mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
+{
+	memset(prefix_usage, 0, sizeof(*prefix_usage));
+}
+
+static void
+mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
+			  unsigned char prefix_len)
+{
+	set_bit(prefix_len, prefix_usage->b);
+}
+
+static void
+mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
+			    unsigned char prefix_len)
+{
+	clear_bit(prefix_len, prefix_usage->b);
+}
+
+struct mlxsw_sp_fib_key {
+	unsigned char addr[sizeof(struct in6_addr)];
+	unsigned char prefix_len;
+};
+
+enum mlxsw_sp_fib_entry_type {
+	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
+	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
+	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
+};
+
+struct mlxsw_sp_nexthop_group;
+
+struct mlxsw_sp_fib_entry {
+	struct rhash_head ht_node;
+	struct mlxsw_sp_fib_key key;
+	enum mlxsw_sp_fib_entry_type type;
+	u8 added:1;
+	u16 rif; /* used for action local */
+	struct mlxsw_sp_vr *vr;
+	struct list_head nexthop_group_node;
+	struct mlxsw_sp_nexthop_group *nh_group;
+};
+
+struct mlxsw_sp_fib {
+	struct rhashtable ht;
+	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
+	struct mlxsw_sp_prefix_usage prefix_usage;
+};
+
+static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
+	.key_offset = offsetof(struct mlxsw_sp_fib_entry, key),
+	.head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node),
+	.key_len = sizeof(struct mlxsw_sp_fib_key),
+	.automatic_shrinking = true,
+};
+
+static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib,
+				     struct mlxsw_sp_fib_entry *fib_entry)
+{
+	unsigned char prefix_len = fib_entry->key.prefix_len;
+	int err;
+
+	err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node,
+				     mlxsw_sp_fib_ht_params);
+	if (err)
+		return err;
+	if (fib->prefix_ref_count[prefix_len]++ == 0)
+		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
+	return 0;
+}
+
+static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib,
+				      struct mlxsw_sp_fib_entry *fib_entry)
+{
+	unsigned char prefix_len = fib_entry->key.prefix_len;
+
+	if (--fib->prefix_ref_count[prefix_len] == 0)
+		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
+	rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node,
+			       mlxsw_sp_fib_ht_params);
+}
+
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
+			  size_t addr_len, unsigned char prefix_len)
+{
+	struct mlxsw_sp_fib_entry *fib_entry;
+
+	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
+	if (!fib_entry)
+		return NULL;
+	memcpy(fib_entry->key.addr, addr, addr_len);
+	fib_entry->key.prefix_len = prefix_len;
+	return fib_entry;
+}
+
+static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry)
+{
+	kfree(fib_entry);
+}
+
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+			  size_t addr_len, unsigned char prefix_len)
+{
+	struct mlxsw_sp_fib_key key = {{ 0 } };
+
+	memcpy(key.addr, addr, addr_len);
+	key.prefix_len = prefix_len;
+	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
+}
+
+static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
+{
+	struct mlxsw_sp_fib *fib;
+	int err;
+
+	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
+	if (!fib)
+		return ERR_PTR(-ENOMEM);
+	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
+	if (err)
+		goto err_rhashtable_init;
+	return fib;
+
+err_rhashtable_init:
+	kfree(fib);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
+{
+	rhashtable_destroy(&fib->ht);
+	kfree(fib);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
+{
+	static struct mlxsw_sp_lpm_tree *lpm_tree;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
+		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
+		if (lpm_tree->ref_count == 0) {
+			if (one_reserved)
+				one_reserved = false;
+			else
+				return lpm_tree;
+		}
+	}
+	return NULL;
+}
+
+static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	char ralta_pl[MLXSW_REG_RALTA_LEN];
+
+	mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+}
+
+static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	char ralta_pl[MLXSW_REG_RALTA_LEN];
+
+	mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+}
+
+static int
+mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_prefix_usage *prefix_usage,
+				  struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	char ralst_pl[MLXSW_REG_RALST_LEN];
+	u8 root_bin = 0;
+	u8 prefix;
+	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
+
+	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
+		root_bin = prefix;
+
+	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
+	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
+		if (prefix == 0)
+			continue;
+		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
+					 MLXSW_REG_RALST_BIN_NO_CHILD);
+		last_prefix = prefix;
+	}
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
+			 struct mlxsw_sp_prefix_usage *prefix_usage,
+			 enum mlxsw_sp_l3proto proto, bool one_reserved)
+{
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	int err;
+
+	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
+	if (!lpm_tree)
+		return ERR_PTR(-EBUSY);
+	lpm_tree->proto = proto;
+	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
+	if (err)
+		return ERR_PTR(err);
+
+	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
+						lpm_tree);
+	if (err)
+		goto err_left_struct_set;
+	return lpm_tree;
+
+err_left_struct_set:
+	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
+	return ERR_PTR(err);
+}
+
+static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
+		      struct mlxsw_sp_prefix_usage *prefix_usage,
+		      enum mlxsw_sp_l3proto proto, bool one_reserved)
+{
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
+		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
+		if (lpm_tree->proto == proto &&
+		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
+					     prefix_usage))
+			goto inc_ref_count;
+	}
+	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
+					    proto, one_reserved);
+	if (IS_ERR(lpm_tree))
+		return lpm_tree;
+
+inc_ref_count:
+	lpm_tree->ref_count++;
+	return lpm_tree;
+}
+
+static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	if (--lpm_tree->ref_count == 0)
+		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
+	return 0;
+}
+
+static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
+		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
+		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
+	}
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_vr *vr;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+		vr = &mlxsw_sp->router.vrs[i];
+		if (!vr->used)
+			return vr;
+	}
+	return NULL;
+}
+
+static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_vr *vr)
+{
+	char raltb_pl[MLXSW_REG_RALTB_LEN];
+
+	mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+}
+
+static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_vr *vr)
+{
+	char raltb_pl[MLXSW_REG_RALTB_LEN];
+
+	/* Bind to tree 0 which is default */
+	mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+}
+
+static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
+{
+	/* For our purpose, squash main and local table into one */
+	if (tb_id == RT_TABLE_LOCAL)
+		tb_id = RT_TABLE_MAIN;
+	return tb_id;
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
+					    u32 tb_id,
+					    enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_vr *vr;
+	int i;
+
+	tb_id = mlxsw_sp_fix_tb_id(tb_id);
+	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+		vr = &mlxsw_sp->router.vrs[i];
+		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
+			return vr;
+	}
+	return NULL;
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
+					      unsigned char prefix_len,
+					      u32 tb_id,
+					      enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_prefix_usage req_prefix_usage;
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
+	if (!vr)
+		return ERR_PTR(-EBUSY);
+	vr->fib = mlxsw_sp_fib_create();
+	if (IS_ERR(vr->fib))
+		return ERR_CAST(vr->fib);
+
+	vr->proto = proto;
+	vr->tb_id = tb_id;
+	mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
+	mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
+	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+					 proto, true);
+	if (IS_ERR(lpm_tree)) {
+		err = PTR_ERR(lpm_tree);
+		goto err_tree_get;
+	}
+	vr->lpm_tree = lpm_tree;
+	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+	if (err)
+		goto err_tree_bind;
+
+	vr->used = true;
+	return vr;
+
+err_tree_bind:
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+err_tree_get:
+	mlxsw_sp_fib_destroy(vr->fib);
+
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_vr *vr)
+{
+	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+	mlxsw_sp_fib_destroy(vr->fib);
+	vr->used = false;
+}
+
+static int
+mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
+			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
+{
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+
+	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
+				     &vr->lpm_tree->prefix_usage))
+		return 0;
+
+	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
+					 vr->proto, false);
+	if (IS_ERR(lpm_tree)) {
+		/* We failed to get a tree according to the required
+		 * prefix usage. However, the current tree might be still good
+		 * for us if our requirement is subset of the prefixes used
+		 * in the tree.
+		 */
+		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
+						 &vr->lpm_tree->prefix_usage))
+			return 0;
+		return PTR_ERR(lpm_tree);
+	}
+
+	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+	vr->lpm_tree = lpm_tree;
+	return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
+					   unsigned char prefix_len,
+					   u32 tb_id,
+					   enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	tb_id = mlxsw_sp_fix_tb_id(tb_id);
+	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
+	if (!vr) {
+		vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
+		if (IS_ERR(vr))
+			return vr;
+	} else {
+		struct mlxsw_sp_prefix_usage req_prefix_usage;
+
+		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
+					  &vr->fib->prefix_usage);
+		mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
+		/* Need to replace LPM tree in case new prefix is required. */
+		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
+						 &req_prefix_usage);
+		if (err)
+			return ERR_PTR(err);
+	}
+	return vr;
+}
+
+static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
+{
+	/* Destroy virtual router entity in case the associated FIB is empty
+	 * and allow it to be used for other tables in future. Otherwise,
+	 * check if some prefix usage did not disappear and change tree if
+	 * that is the case. Note that in case new, smaller tree cannot be
+	 * allocated, the original one will be kept being used.
+	 */
+	if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
+		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
+	else
+		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
+					   &vr->fib->prefix_usage);
+}
+
+static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_vr *vr;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+		vr = &mlxsw_sp->router.vrs[i];
+		vr->id = i;
+	}
+}
+
+struct mlxsw_sp_neigh_key {
+	unsigned char addr[sizeof(struct in6_addr)];
+	struct net_device *dev;
+};
+
+struct mlxsw_sp_neigh_entry {
+	struct rhash_head ht_node;
+	struct mlxsw_sp_neigh_key key;
+	u16 rif;
+	struct neighbour *n;
+	bool offloaded;
+	struct delayed_work dw;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	unsigned char ha[ETH_ALEN];
+	struct list_head nexthop_list; /* list of nexthops using
+					* this neigh entry
+					*/
+	struct list_head nexthop_neighs_list_node;
+};
+
+static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
+	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
+	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
+	.key_len = sizeof(struct mlxsw_sp_neigh_key),
+};
+
+static int
+mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
+				      &neigh_entry->ht_node,
+				      mlxsw_sp_neigh_ht_params);
+}
+
+static void
+mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
+			       &neigh_entry->ht_node,
+			       mlxsw_sp_neigh_ht_params);
+}
+
+static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work);
+
+static struct mlxsw_sp_neigh_entry *
+mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
+			    struct net_device *dev, u16 rif,
+			    struct neighbour *n)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+
+	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
+	if (!neigh_entry)
+		return NULL;
+	memcpy(neigh_entry->key.addr, addr, addr_len);
+	neigh_entry->key.dev = dev;
+	neigh_entry->rif = rif;
+	neigh_entry->n = n;
+	INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
+	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
+	return neigh_entry;
+}
+
+static void
+mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	kfree(neigh_entry);
+}
+
+static struct mlxsw_sp_neigh_entry *
+mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr,
+			    size_t addr_len, struct net_device *dev)
+{
+	struct mlxsw_sp_neigh_key key = {{ 0 } };
+
+	memcpy(key.addr, addr, addr_len);
+	key.dev = dev;
+	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
+				      &key, mlxsw_sp_neigh_ht_params);
+}
+
+int mlxsw_sp_router_neigh_construct(struct net_device *dev,
+				    struct neighbour *n)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	struct mlxsw_sp_rif *r;
+	u32 dip;
+	int err;
+
+	if (n->tbl != &arp_tbl)
+		return 0;
+
+	dip = ntohl(*((__be32 *) n->primary_key));
+	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
+						  n->dev);
+	if (neigh_entry) {
+		WARN_ON(neigh_entry->n != n);
+		return 0;
+	}
+
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (WARN_ON(!r))
+		return -EINVAL;
+
+	neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev,
+						  r->rif, n);
+	if (!neigh_entry)
+		return -ENOMEM;
+	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+	if (err)
+		goto err_neigh_entry_insert;
+	return 0;
+
+err_neigh_entry_insert:
+	mlxsw_sp_neigh_entry_destroy(neigh_entry);
+	return err;
+}
+
+void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
+				   struct neighbour *n)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	u32 dip;
+
+	if (n->tbl != &arp_tbl)
+		return;
+
+	dip = ntohl(*((__be32 *) n->primary_key));
+	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
+						  n->dev);
+	if (!neigh_entry)
+		return;
+	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
+	mlxsw_sp_neigh_entry_destroy(neigh_entry);
+}
+
+static void
+mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
+{
+	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+
+	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
+}
+
+static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int ent_index)
+{
+	struct net_device *dev;
+	struct neighbour *n;
+	__be32 dipn;
+	u32 dip;
+	u16 rif;
+
+	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
+
+	if (!mlxsw_sp->rifs[rif]) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
+		return;
+	}
+
+	dipn = htonl(dip);
+	dev = mlxsw_sp->rifs[rif]->dev;
+	n = neigh_lookup(&arp_tbl, &dipn, dev);
+	if (!n) {
+		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
+			   &dip);
+		return;
+	}
+
+	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
+	neigh_event_send(n, NULL);
+	neigh_release(n);
+}
+
+static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int rec_index)
+{
+	u8 num_entries;
+	int i;
+
+	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
+								rec_index);
+	/* Hardware starts counting at 0, so add 1. */
+	num_entries++;
+
+	/* Each record consists of several neighbour entries. */
+	for (i = 0; i < num_entries; i++) {
+		int ent_index;
+
+		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
+		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
+						       ent_index);
+	}
+
+}
+
+static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
+					      char *rauhtd_pl, int rec_index)
+{
+	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
+	case MLXSW_REG_RAUHTD_TYPE_IPV4:
+		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
+						       rec_index);
+		break;
+	case MLXSW_REG_RAUHTD_TYPE_IPV6:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
+static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+{
+	char *rauhtd_pl;
+	u8 num_rec;
+	int i, err;
+
+	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
+	if (!rauhtd_pl)
+		return -ENOMEM;
+
+	/* Make sure the neighbour's netdev isn't removed in the
+	 * process.
+	 */
+	rtnl_lock();
+	do {
+		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
+		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
+				      rauhtd_pl);
+		if (err) {
+			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
+			break;
+		}
+		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
+		for (i = 0; i < num_rec; i++)
+			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
+							  i);
+	} while (num_rec);
+	rtnl_unlock();
+
+	kfree(rauhtd_pl);
+	return err;
+}
+
+static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+
+	/* Take RTNL mutex here to prevent lists from changes */
+	rtnl_lock();
+	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
+			    nexthop_neighs_list_node) {
+		/* If this neigh have nexthops, make the kernel think this neigh
+		 * is active regardless of the traffic.
+		 */
+		if (!list_empty(&neigh_entry->nexthop_list))
+			neigh_event_send(neigh_entry->n, NULL);
+	}
+	rtnl_unlock();
+}
+
+static void
+mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
+
+	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
+			       msecs_to_jiffies(interval));
+}
+
+static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
+{
+	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
+						 router.neighs_update.dw.work);
+	int err;
+
+	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
+	if (err)
+		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
+
+	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
+
+	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
+}
+
+static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
+						 router.nexthop_probe_dw.work);
+
+	/* Iterate over nexthop neighbours, find those who are unresolved and
+	 * send arp on them. This solves the chicken-egg problem when
+	 * the nexthop wouldn't get offloaded until the neighbor is resolved
+	 * but it wouldn't get resolved ever in case traffic is flowing in HW
+	 * using different nexthop.
+	 *
+	 * Take RTNL mutex here to prevent lists from changes.
+	 */
+	rtnl_lock();
+	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
+			    nexthop_neighs_list_node) {
+		if (!(neigh_entry->n->nud_state & NUD_VALID) &&
+		    !list_empty(&neigh_entry->nexthop_list))
+			neigh_event_send(neigh_entry->n, NULL);
+	}
+	rtnl_unlock();
+
+	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
+			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
+}
+
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_neigh_entry *neigh_entry,
+			      bool removing);
+
+static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry =
+		container_of(work, struct mlxsw_sp_neigh_entry, dw.work);
+	struct neighbour *n = neigh_entry->n;
+	struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char rauht_pl[MLXSW_REG_RAUHT_LEN];
+	struct net_device *dev;
+	bool entry_connected;
+	u8 nud_state;
+	bool updating;
+	bool removing;
+	bool adding;
+	u32 dip;
+	int err;
+
+	read_lock_bh(&n->lock);
+	dip = ntohl(*((__be32 *) n->primary_key));
+	memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha));
+	nud_state = n->nud_state;
+	dev = n->dev;
+	read_unlock_bh(&n->lock);
+
+	entry_connected = nud_state & NUD_VALID;
+	adding = (!neigh_entry->offloaded) && entry_connected;
+	updating = neigh_entry->offloaded && entry_connected;
+	removing = neigh_entry->offloaded && !entry_connected;
+
+	if (adding || updating) {
+		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD,
+				      neigh_entry->rif,
+				      neigh_entry->ha, dip);
+		err = mlxsw_reg_write(mlxsw_sp->core,
+				      MLXSW_REG(rauht), rauht_pl);
+		if (err) {
+			netdev_err(dev, "Could not add neigh %pI4h\n", &dip);
+			neigh_entry->offloaded = false;
+		} else {
+			neigh_entry->offloaded = true;
+		}
+		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
+	} else if (removing) {
+		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
+				      neigh_entry->rif,
+				      neigh_entry->ha, dip);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht),
+				      rauht_pl);
+		if (err) {
+			netdev_err(dev, "Could not delete neigh %pI4h\n", &dip);
+			neigh_entry->offloaded = true;
+		} else {
+			neigh_entry->offloaded = false;
+		}
+		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
+	}
+
+	neigh_release(n);
+	mlxsw_sp_port_dev_put(mlxsw_sp_port);
+}
+
+static int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
+					  unsigned long event, void *ptr)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp;
+	unsigned long interval;
+	struct net_device *dev;
+	struct neigh_parms *p;
+	struct neighbour *n;
+	u32 dip;
+
+	switch (event) {
+	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+		p = ptr;
+
+		/* We don't care about changes in the default table. */
+		if (!p->dev || p->tbl != &arp_tbl)
+			return NOTIFY_DONE;
+
+		/* We are in atomic context and can't take RTNL mutex,
+		 * so use RCU variant to walk the device chain.
+		 */
+		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
+		if (!mlxsw_sp_port)
+			return NOTIFY_DONE;
+
+		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
+		mlxsw_sp->router.neighs_update.interval = interval;
+
+		mlxsw_sp_port_dev_put(mlxsw_sp_port);
+		break;
+	case NETEVENT_NEIGH_UPDATE:
+		n = ptr;
+		dev = n->dev;
+
+		if (n->tbl != &arp_tbl)
+			return NOTIFY_DONE;
+
+		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev);
+		if (!mlxsw_sp_port)
+			return NOTIFY_DONE;
+
+		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+		dip = ntohl(*((__be32 *) n->primary_key));
+		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp,
+							  &dip,
+							  sizeof(__be32),
+							  dev);
+		if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) {
+			mlxsw_sp_port_dev_put(mlxsw_sp_port);
+			return NOTIFY_DONE;
+		}
+		neigh_entry->mlxsw_sp_port = mlxsw_sp_port;
+
+		/* Take a reference to ensure the neighbour won't be
+		 * destructed until we drop the reference in delayed
+		 * work.
+		 */
+		neigh_clone(n);
+		if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) {
+			neigh_release(n);
+			mlxsw_sp_port_dev_put(mlxsw_sp_port);
+		}
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_router_netevent_event,
+};
+
+static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
+			      &mlxsw_sp_neigh_ht_params);
+	if (err)
+		return err;
+
+	/* Initialize the polling interval according to the default
+	 * table.
+	 */
+	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
+
+	err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	if (err)
+		goto err_register_netevent_notifier;
+
+	/* Create the delayed works for the activity_update */
+	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
+			  mlxsw_sp_router_neighs_update_work);
+	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
+			  mlxsw_sp_router_probe_unresolved_nexthops);
+	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
+	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
+	return 0;
+
+err_register_netevent_notifier:
+	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
+	return err;
+}
+
+static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
+	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
+	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
+}
+
+struct mlxsw_sp_nexthop {
+	struct list_head neigh_list_node; /* member of neigh entry list */
+	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
+						* this belongs to
+						*/
+	u8 should_offload:1, /* set indicates this neigh is connected and
+			      * should be put to KVD linear area of this group.
+			      */
+	   offloaded:1, /* set in case the neigh is actually put into
+			 * KVD linear area of this group.
+			 */
+	   update:1; /* set indicates that MAC of this neigh should be
+		      * updated in HW
+		      */
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+};
+
+struct mlxsw_sp_nexthop_group {
+	struct list_head list; /* node in mlxsw->router.nexthop_group_list */
+	struct list_head fib_list; /* list of fib entries that use this group */
+	u8 adj_index_valid:1;
+	u32 adj_index;
+	u16 ecmp_size;
+	u16 count;
+	struct mlxsw_sp_nexthop nexthops[0];
+};
+
+static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
+					     struct mlxsw_sp_vr *vr,
+					     u32 adj_index, u16 ecmp_size,
+					     u32 new_adj_index,
+					     u16 new_ecmp_size)
+{
+	char raleu_pl[MLXSW_REG_RALEU_LEN];
+
+	mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id,
+			     adj_index, ecmp_size,
+			     new_adj_index, new_ecmp_size);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
+}
+
+static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
+					  struct mlxsw_sp_nexthop_group *nh_grp,
+					  u32 old_adj_index, u16 old_ecmp_size)
+{
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_vr *vr = NULL;
+	int err;
+
+	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+		if (vr == fib_entry->vr)
+			continue;
+		vr = fib_entry->vr;
+		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
+							old_adj_index,
+							old_ecmp_size,
+							nh_grp->adj_index,
+							nh_grp->ecmp_size);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				       struct mlxsw_sp_nexthop *nh)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+	char ratr_pl[MLXSW_REG_RATR_LEN];
+
+	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+			    true, adj_index, neigh_entry->rif);
+	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int
+mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	u32 adj_index = nh_grp->adj_index; /* base */
+	struct mlxsw_sp_nexthop *nh;
+	int i;
+	int err;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+
+		if (!nh->should_offload) {
+			nh->offloaded = 0;
+			continue;
+		}
+
+		if (nh->update) {
+			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
+							  adj_index, nh);
+			if (err)
+				return err;
+			nh->update = 0;
+			nh->offloaded = 1;
+		}
+		adj_index++;
+	}
+	return 0;
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_fib_entry *fib_entry);
+
+static int
+mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_fib_entry *fib_entry;
+	int err;
+
+	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static void
+mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_nexthop *nh;
+	bool offload_change = false;
+	u32 adj_index;
+	u16 ecmp_size = 0;
+	bool old_adj_index_valid;
+	u32 old_adj_index;
+	u16 old_ecmp_size;
+	int ret;
+	int i;
+	int err;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+
+		if (nh->should_offload ^ nh->offloaded) {
+			offload_change = true;
+			if (nh->should_offload)
+				nh->update = 1;
+		}
+		if (nh->should_offload)
+			ecmp_size++;
+	}
+	if (!offload_change) {
+		/* Nothing was added or removed, so no need to reallocate. Just
+		 * update MAC on existing adjacency indexes.
+		 */
+		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
+		if (err) {
+			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+			goto set_trap;
+		}
+		return;
+	}
+	if (!ecmp_size)
+		/* No neigh of this group is connected so we just set
+		 * the trap and let everthing flow through kernel.
+		 */
+		goto set_trap;
+
+	ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
+	if (ret < 0) {
+		/* We ran out of KVD linear space, just set the
+		 * trap and let everything flow through kernel.
+		 */
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
+		goto set_trap;
+	}
+	adj_index = ret;
+	old_adj_index_valid = nh_grp->adj_index_valid;
+	old_adj_index = nh_grp->adj_index;
+	old_ecmp_size = nh_grp->ecmp_size;
+	nh_grp->adj_index_valid = 1;
+	nh_grp->adj_index = adj_index;
+	nh_grp->ecmp_size = ecmp_size;
+	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
+	if (err) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+		goto set_trap;
+	}
+
+	if (!old_adj_index_valid) {
+		/* The trap was set for fib entries, so we have to call
+		 * fib entry update to unset it and use adjacency index.
+		 */
+		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+		if (err) {
+			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
+			goto set_trap;
+		}
+		return;
+	}
+
+	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
+					     old_adj_index, old_ecmp_size);
+	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
+	if (err) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
+		goto set_trap;
+	}
+	return;
+
+set_trap:
+	old_adj_index_valid = nh_grp->adj_index_valid;
+	nh_grp->adj_index_valid = 0;
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+		nh->offloaded = 0;
+	}
+	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+	if (err)
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
+	if (old_adj_index_valid)
+		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
+}
+
+static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
+					    bool removing)
+{
+	if (!removing && !nh->should_offload)
+		nh->should_offload = 1;
+	else if (removing && nh->offloaded)
+		nh->should_offload = 0;
+	nh->update = 1;
+}
+
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_neigh_entry *neigh_entry,
+			      bool removing)
+{
+	struct mlxsw_sp_nexthop *nh;
+
+	/* Take RTNL mutex here to prevent lists from changes */
+	rtnl_lock();
+	list_for_each_entry(nh, &neigh_entry->nexthop_list,
+			    neigh_list_node) {
+		__mlxsw_sp_nexthop_neigh_update(nh, removing);
+		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+	}
+	rtnl_unlock();
+}
+
+static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_nexthop_group *nh_grp,
+				 struct mlxsw_sp_nexthop *nh,
+				 struct fib_nh *fib_nh)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	u32 gwip = ntohl(fib_nh->nh_gw);
+	struct net_device *dev = fib_nh->nh_dev;
+	struct neighbour *n;
+	u8 nud_state;
+
+	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
+						  sizeof(gwip), dev);
+	if (!neigh_entry) {
+		__be32 gwipn = htonl(gwip);
+
+		n = neigh_create(&arp_tbl, &gwipn, dev);
+		if (IS_ERR(n))
+			return PTR_ERR(n);
+		neigh_event_send(n, NULL);
+		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
+							  sizeof(gwip), dev);
+		if (!neigh_entry) {
+			neigh_release(n);
+			return -EINVAL;
+		}
+	} else {
+		/* Take a reference of neigh here ensuring that neigh would
+		 * not be detructed before the nexthop entry is finished.
+		 * The second branch takes the reference in neith_create()
+		 */
+		n = neigh_entry->n;
+		neigh_clone(n);
+	}
+
+	/* If that is the first nexthop connected to that neigh, add to
+	 * nexthop_neighs_list
+	 */
+	if (list_empty(&neigh_entry->nexthop_list))
+		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
+			      &mlxsw_sp->router.nexthop_neighs_list);
+
+	nh->nh_grp = nh_grp;
+	nh->neigh_entry = neigh_entry;
+	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
+	read_lock_bh(&n->lock);
+	nud_state = n->nud_state;
+	read_unlock_bh(&n->lock);
+	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID));
+
+	return 0;
+}
+
+static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_nexthop *nh)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+
+	list_del(&nh->neigh_list_node);
+
+	/* If that is the last nexthop connected to that neigh, remove from
+	 * nexthop_neighs_list
+	 */
+	if (list_empty(&nh->neigh_entry->nexthop_list))
+		list_del(&nh->neigh_entry->nexthop_neighs_list_node);
+
+	neigh_release(neigh_entry->n);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+	struct mlxsw_sp_nexthop *nh;
+	struct fib_nh *fib_nh;
+	size_t alloc_size;
+	int i;
+	int err;
+
+	alloc_size = sizeof(*nh_grp) +
+		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
+	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+	if (!nh_grp)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&nh_grp->fib_list);
+	nh_grp->count = fi->fib_nhs;
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+		fib_nh = &fi->fib_nh[i];
+		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
+		if (err)
+			goto err_nexthop_init;
+	}
+	list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list);
+	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+	return nh_grp;
+
+err_nexthop_init:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+	kfree(nh_grp);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_nexthop *nh;
+	int i;
+
+	list_del(&nh_grp->list);
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+	}
+	kfree(nh_grp);
+}
+
+static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
+				   struct fib_info *fi)
+{
+	int i;
+
+	for (i = 0; i < fi->fib_nhs; i++) {
+		struct fib_nh *fib_nh = &fi->fib_nh[i];
+		u32 gwip = ntohl(fib_nh->nh_gw);
+
+		if (memcmp(nh->neigh_entry->key.addr,
+			   &gwip, sizeof(u32)) == 0 &&
+		    nh->neigh_entry->key.dev == fib_nh->nh_dev)
+			return true;
+	}
+	return false;
+}
+
+static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp,
+					 struct fib_info *fi)
+{
+	int i;
+
+	if (nh_grp->count != fi->fib_nhs)
+		return false;
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+		if (!mlxsw_sp_nexthop_match(nh, fi))
+			return false;
+	}
+	return true;
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+
+	list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list,
+			    list) {
+		if (mlxsw_sp_nexthop_group_match(nh_grp, fi))
+			return nh_grp;
+	}
+	return NULL;
+}
+
+static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_fib_entry *fib_entry,
+				      struct fib_info *fi)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+
+	nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi);
+	if (!nh_grp) {
+		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
+		if (IS_ERR(nh_grp))
+			return PTR_ERR(nh_grp);
+	}
+	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
+	fib_entry->nh_group = nh_grp;
+	return 0;
+}
+
+static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+	list_del(&fib_entry->nexthop_group_node);
+	if (!list_empty(&nh_grp->fib_list))
+		return;
+	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	char rgcr_pl[MLXSW_REG_RGCR_LEN];
+
+	mlxsw_reg_rgcr_pack(rgcr_pl, true);
+	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+}
+
+static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	char rgcr_pl[MLXSW_REG_RGCR_LEN];
+
+	mlxsw_reg_rgcr_pack(rgcr_pl, false);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+}
+
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
+	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
+	err = __mlxsw_sp_router_init(mlxsw_sp);
+	if (err)
+		return err;
+	mlxsw_sp_lpm_init(mlxsw_sp);
+	mlxsw_sp_vrs_init(mlxsw_sp);
+	return mlxsw_sp_neigh_init(mlxsw_sp);
+}
+
+void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp_neigh_fini(mlxsw_sp);
+	__mlxsw_sp_router_fini(mlxsw_sp);
+}
+
+static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_fib_entry *fib_entry,
+					 enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+	u32 *p_dip = (u32 *) fib_entry->key.addr;
+	struct mlxsw_sp_vr *vr = fib_entry->vr;
+	enum mlxsw_reg_ralue_trap_action trap_action;
+	u16 trap_id = 0;
+	u32 adjacency_index = 0;
+	u16 ecmp_size = 0;
+
+	/* In case the nexthop group adjacency index is valid, use it
+	 * with provided ECMP size. Otherwise, setup trap and pass
+	 * traffic to kernel.
+	 */
+	if (fib_entry->nh_group->adj_index_valid) {
+		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
+		adjacency_index = fib_entry->nh_group->adj_index;
+		ecmp_size = fib_entry->nh_group->ecmp_size;
+	} else {
+		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
+		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
+	}
+
+	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
+					adjacency_index, ecmp_size);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry,
+					enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+	u32 *p_dip = (u32 *) fib_entry->key.addr;
+	struct mlxsw_sp_vr *vr = fib_entry->vr;
+
+	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_act_local_pack(ralue_pl,
+				       MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
+				       fib_entry->rif);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry,
+				       enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+	u32 *p_dip = (u32 *) fib_entry->key.addr;
+	struct mlxsw_sp_vr *vr = fib_entry->vr;
+
+	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fib_entry *fib_entry,
+				  enum mlxsw_reg_ralue_op op)
+{
+	switch (fib_entry->type) {
+	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
+		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
+		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
+		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
+	}
+	return -EINVAL;
+}
+
+static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_fib_entry *fib_entry,
+				 enum mlxsw_reg_ralue_op op)
+{
+	switch (fib_entry->vr->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		return -EINVAL;
+	}
+	return -EINVAL;
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_fib_entry *fib_entry)
+{
+	enum mlxsw_reg_ralue_op op;
+
+	op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE :
+				 MLXSW_REG_RALUE_OP_WRITE_UPDATE;
+	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
+}
+
+static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fib_entry *fib_entry)
+{
+	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
+				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
+}
+
+struct mlxsw_sp_router_fib4_add_info {
+	struct switchdev_trans_item tritem;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_fib_entry *fib_entry;
+};
+
+static void mlxsw_sp_router_fib4_add_info_destroy(void const *data)
+{
+	const struct mlxsw_sp_router_fib4_add_info *info = data;
+	struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry;
+	struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp;
+
+	mlxsw_sp_fib_entry_destroy(fib_entry);
+	mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr);
+	kfree(info);
+}
+
+static int
+mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
+				const struct switchdev_obj_ipv4_fib *fib4,
+				struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct fib_info *fi = fib4->fi;
+
+	if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) {
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+		return 0;
+	}
+	if (fib4->type != RTN_UNICAST)
+		return -EINVAL;
+
+	if (fi->fib_scope != RT_SCOPE_UNIVERSE) {
+		struct mlxsw_sp_rif *r;
+
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+		r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev);
+		if (!r)
+			return -EINVAL;
+		fib_entry->rif = r->rif;
+		return 0;
+	}
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+	return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
+}
+
+static void
+mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib_entry *fib_entry)
+{
+	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
+		return;
+	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
+}
+
+static int
+mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
+				 const struct switchdev_obj_ipv4_fib *fib4,
+				 struct switchdev_trans *trans)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_router_fib4_add_info *info;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id,
+			     MLXSW_SP_L3_PROTO_IPV4);
+	if (IS_ERR(vr))
+		return PTR_ERR(vr);
+
+	fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst,
+					      sizeof(fib4->dst), fib4->dst_len);
+	if (!fib_entry) {
+		err = -ENOMEM;
+		goto err_fib_entry_create;
+	}
+	fib_entry->vr = vr;
+
+	err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry);
+	if (err)
+		goto err_fib4_entry_init;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		err = -ENOMEM;
+		goto err_alloc_info;
+	}
+	info->mlxsw_sp = mlxsw_sp;
+	info->fib_entry = fib_entry;
+	switchdev_trans_item_enqueue(trans, info,
+				     mlxsw_sp_router_fib4_add_info_destroy,
+				     &info->tritem);
+	return 0;
+
+err_alloc_info:
+	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
+err_fib4_entry_init:
+	mlxsw_sp_fib_entry_destroy(fib_entry);
+err_fib_entry_create:
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	return err;
+}
+
+static int
+mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port,
+				const struct switchdev_obj_ipv4_fib *fib4,
+				struct switchdev_trans *trans)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_router_fib4_add_info *info;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	info = switchdev_trans_item_dequeue(trans);
+	fib_entry = info->fib_entry;
+	kfree(info);
+
+	vr = fib_entry->vr;
+	err = mlxsw_sp_fib_entry_insert(fib_entry->vr->fib, fib_entry);
+	if (err)
+		goto err_fib_entry_insert;
+	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+	if (err)
+		goto err_fib_entry_add;
+	return 0;
+
+err_fib_entry_add:
+	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
+err_fib_entry_insert:
+	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
+	mlxsw_sp_fib_entry_destroy(fib_entry);
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	return err;
+}
+
+int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct switchdev_obj_ipv4_fib *fib4,
+			     struct switchdev_trans *trans)
+{
+	if (switchdev_trans_ph_prepare(trans))
+		return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port,
+							fib4, trans);
+	return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port,
+					       fib4, trans);
+}
+
+int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct switchdev_obj_ipv4_fib *fib4)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_vr *vr;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4);
+	if (!vr) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to find virtual router for FIB4 entry being removed.\n");
+		return -ENOENT;
+	}
+	fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst,
+					      sizeof(fib4->dst), fib4->dst_len);
+	if (!fib_entry) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n");
+		return -ENOENT;
+	}
+	mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry);
+	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
+	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
+	mlxsw_sp_fib_entry_destroy(fib_entry);
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 3710f19ed6bb..a1ad5e6bdfa8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -55,13 +55,10 @@
 static u16 mlxsw_sp_port_vid_to_fid_get(struct mlxsw_sp_port *mlxsw_sp_port,
 					u16 vid)
 {
+	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_port);
 	u16 fid = vid;
 
-	if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
-		u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
-
-		fid = mlxsw_sp_vfid_to_fid(vfid);
-	}
+	fid = f ? f->fid : fid;
 
 	if (!fid)
 		fid = mlxsw_sp_port->pvid;
@@ -169,11 +166,6 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state);
 }
 
-static bool mlxsw_sp_vfid_is_vport_br(u16 vfid)
-{
-	return vfid >= MLXSW_SP_VFID_PORT_MAX;
-}
-
 static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				     u16 idx_begin, u16 idx_end, bool set,
 				     bool only_uc)
@@ -185,15 +177,10 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	char *sftr_pl;
 	int err;
 
-	if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+	if (mlxsw_sp_port_is_vport(mlxsw_sp_port))
 		table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID;
-		if (mlxsw_sp_vfid_is_vport_br(idx_begin))
-			local_port = mlxsw_sp_port->local_port;
-		else
-			local_port = MLXSW_PORT_CPU_PORT;
-	} else {
+	else
 		table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
-	}
 
 	sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
 	if (!sftr_pl)
@@ -236,7 +223,8 @@ static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	int err;
 
 	if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
-		u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
+		u16 fid = mlxsw_sp_vport_fid_get(mlxsw_sp_port)->fid;
+		u16 vfid = mlxsw_sp_fid_to_vfid(fid);
 
 		return  __mlxsw_sp_port_flood_set(mlxsw_sp_port, vfid, vfid,
 						  set, true);
@@ -260,14 +248,17 @@ err_port_flood_set:
 	return err;
 }
 
-int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
-			     bool set, bool only_uc)
+int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
+			     bool set)
 {
+	u16 vfid;
+
 	/* In case of vFIDs, index into the flooding table is relative to
 	 * the start of the vFIDs range.
 	 */
+	vfid = mlxsw_sp_fid_to_vfid(fid);
 	return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set,
-					 only_uc);
+					 false);
 }
 
 static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -383,6 +374,187 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
 	return err;
 }
 
+static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create)
+{
+	char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+	mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, fid);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
+static int mlxsw_sp_fid_map(struct mlxsw_sp *mlxsw_sp, u16 fid, bool valid)
+{
+	enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID;
+	char svfa_pl[MLXSW_REG_SVFA_LEN];
+
+	mlxsw_reg_svfa_pack(svfa_pl, 0, mt, valid, fid, fid);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl);
+}
+
+static struct mlxsw_sp_fid *mlxsw_sp_fid_alloc(u16 fid)
+{
+	struct mlxsw_sp_fid *f;
+
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	f->fid = fid;
+
+	return f;
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid)
+{
+	struct mlxsw_sp_fid *f;
+	int err;
+
+	err = mlxsw_sp_fid_op(mlxsw_sp, fid, true);
+	if (err)
+		return ERR_PTR(err);
+
+	/* Although all the ports member in the FID might be using a
+	 * {Port, VID} to FID mapping, we create a global VID-to-FID
+	 * mapping. This allows a port to transition to VLAN mode,
+	 * knowing the global mapping exists.
+	 */
+	err = mlxsw_sp_fid_map(mlxsw_sp, fid, true);
+	if (err)
+		goto err_fid_map;
+
+	f = mlxsw_sp_fid_alloc(fid);
+	if (!f) {
+		err = -ENOMEM;
+		goto err_allocate_fid;
+	}
+
+	list_add(&f->list, &mlxsw_sp->fids);
+
+	return f;
+
+err_allocate_fid:
+	mlxsw_sp_fid_map(mlxsw_sp, fid, false);
+err_fid_map:
+	mlxsw_sp_fid_op(mlxsw_sp, fid, false);
+	return ERR_PTR(err);
+}
+
+void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f)
+{
+	u16 fid = f->fid;
+
+	list_del(&f->list);
+
+	if (f->r)
+		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+
+	kfree(f);
+
+	mlxsw_sp_fid_op(mlxsw_sp, fid, false);
+}
+
+static int __mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port,
+				    u16 fid)
+{
+	struct mlxsw_sp_fid *f;
+
+	f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid);
+	if (!f) {
+		f = mlxsw_sp_fid_create(mlxsw_sp_port->mlxsw_sp, fid);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
+	}
+
+	f->ref_count++;
+
+	netdev_dbg(mlxsw_sp_port->dev, "Joined FID=%d\n", fid);
+
+	return 0;
+}
+
+static void __mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+				      u16 fid)
+{
+	struct mlxsw_sp_fid *f;
+
+	f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid);
+	if (WARN_ON(!f))
+		return;
+
+	netdev_dbg(mlxsw_sp_port->dev, "Left FID=%d\n", fid);
+
+	mlxsw_sp_port_fdb_flush(mlxsw_sp_port, fid);
+
+	if (--f->ref_count == 0)
+		mlxsw_sp_fid_destroy(mlxsw_sp_port->mlxsw_sp, f);
+}
+
+static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid,
+				 bool valid)
+{
+	enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
+
+	/* If port doesn't have vPorts, then it can use the global
+	 * VID-to-FID mapping.
+	 */
+	if (list_empty(&mlxsw_sp_port->vports_list))
+		return 0;
+
+	return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, valid, fid, fid);
+}
+
+static int mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port,
+				  u16 fid_begin, u16 fid_end)
+{
+	int fid, err;
+
+	for (fid = fid_begin; fid <= fid_end; fid++) {
+		err = __mlxsw_sp_port_fid_join(mlxsw_sp_port, fid);
+		if (err)
+			goto err_port_fid_join;
+	}
+
+	err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end,
+					true, false);
+	if (err)
+		goto err_port_flood_set;
+
+	for (fid = fid_begin; fid <= fid_end; fid++) {
+		err = mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, true);
+		if (err)
+			goto err_port_fid_map;
+	}
+
+	return 0;
+
+err_port_fid_map:
+	for (fid--; fid >= fid_begin; fid--)
+		mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false);
+	__mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false,
+				  false);
+err_port_flood_set:
+	fid = fid_end;
+err_port_fid_join:
+	for (fid--; fid >= fid_begin; fid--)
+		__mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid);
+	return err;
+}
+
+static void mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+				    u16 fid_begin, u16 fid_end)
+{
+	int fid;
+
+	for (fid = fid_begin; fid <= fid_end; fid++)
+		mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false);
+
+	__mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false,
+				  false);
+
+	for (fid = fid_begin; fid <= fid_end; fid++)
+		__mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid);
+}
+
 static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				    u16 vid)
 {
@@ -440,74 +612,6 @@ err_port_allow_untagged_set:
 	return err;
 }
 
-static int mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid)
-{
-	char sfmr_pl[MLXSW_REG_SFMR_LEN];
-	int err;
-
-	mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, fid);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
-
-	if (err)
-		return err;
-
-	set_bit(fid, mlxsw_sp->active_fids);
-	return 0;
-}
-
-static void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, u16 fid)
-{
-	char sfmr_pl[MLXSW_REG_SFMR_LEN];
-
-	clear_bit(fid, mlxsw_sp->active_fids);
-
-	mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID,
-			    fid, fid);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
-}
-
-static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid)
-{
-	enum mlxsw_reg_svfa_mt mt;
-
-	if (!list_empty(&mlxsw_sp_port->vports_list))
-		mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
-	else
-		mt = MLXSW_REG_SVFA_MT_VID_TO_FID;
-
-	return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, true, fid, fid);
-}
-
-static int mlxsw_sp_port_fid_unmap(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid)
-{
-	enum mlxsw_reg_svfa_mt mt;
-
-	if (list_empty(&mlxsw_sp_port->vports_list))
-		return 0;
-
-	mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
-	return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, false, fid, fid);
-}
-
-static int mlxsw_sp_port_add_vids(struct net_device *dev, u16 vid_begin,
-				  u16 vid_end)
-{
-	u16 vid;
-	int err;
-
-	for (vid = vid_begin; vid <= vid_end; vid++) {
-		err = mlxsw_sp_port_add_vid(dev, 0, vid);
-		if (err)
-			goto err_port_add_vid;
-	}
-	return 0;
-
-err_port_add_vid:
-	for (vid--; vid >= vid_begin; vid--)
-		mlxsw_sp_port_kill_vid(dev, 0, vid);
-	return err;
-}
-
 static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				     u16 vid_begin, u16 vid_end, bool is_member,
 				     bool untagged)
@@ -533,57 +637,17 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
 				     u16 vid_begin, u16 vid_end,
 				     bool flag_untagged, bool flag_pvid)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct net_device *dev = mlxsw_sp_port->dev;
-	u16 vid, last_visited_vid, old_pvid;
-	enum mlxsw_reg_svfa_mt mt;
+	u16 vid, old_pvid;
 	int err;
 
-	/* In case this is invoked with BRIDGE_FLAGS_SELF and port is
-	 * not bridged, then packets ingressing through the port with
-	 * the specified VIDs will be directed to CPU.
-	 */
 	if (!mlxsw_sp_port->bridged)
-		return mlxsw_sp_port_add_vids(dev, vid_begin, vid_end);
-
-	for (vid = vid_begin; vid <= vid_end; vid++) {
-		if (!test_bit(vid, mlxsw_sp->active_fids)) {
-			err = mlxsw_sp_fid_create(mlxsw_sp, vid);
-			if (err) {
-				netdev_err(dev, "Failed to create FID=%d\n",
-					   vid);
-				return err;
-			}
-
-			/* When creating a FID, we set a VID to FID mapping
-			 * regardless of the port's mode.
-			 */
-			mt = MLXSW_REG_SVFA_MT_VID_TO_FID;
-			err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt,
-							   true, vid, vid);
-			if (err) {
-				netdev_err(dev, "Failed to create FID=VID=%d mapping\n",
-					   vid);
-				goto err_port_vid_to_fid_set;
-			}
-		}
-	}
-
-	/* Set FID mapping according to port's mode */
-	for (vid = vid_begin; vid <= vid_end; vid++) {
-		err = mlxsw_sp_port_fid_map(mlxsw_sp_port, vid);
-		if (err) {
-			netdev_err(dev, "Failed to map FID=%d", vid);
-			last_visited_vid = --vid;
-			goto err_port_fid_map;
-		}
-	}
+		return -EINVAL;
 
-	err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end,
-					true, false);
+	err = mlxsw_sp_port_fid_join(mlxsw_sp_port, vid_begin, vid_end);
 	if (err) {
-		netdev_err(dev, "Failed to configure flooding\n");
-		goto err_port_flood_set;
+		netdev_err(dev, "Failed to join FIDs\n");
+		return err;
 	}
 
 	err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
@@ -628,10 +692,6 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	return 0;
 
-err_port_vid_to_fid_set:
-	mlxsw_sp_fid_destroy(mlxsw_sp, vid);
-	return err;
-
 err_port_stp_state_set:
 	for (vid = vid_begin; vid <= vid_end; vid++)
 		clear_bit(vid, mlxsw_sp_port->active_vlans);
@@ -641,13 +701,7 @@ err_port_pvid_set:
 	__mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false,
 				  false);
 err_port_vlans_set:
-	__mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, false,
-				  false);
-err_port_flood_set:
-	last_visited_vid = vid_end;
-err_port_fid_map:
-	for (vid = last_visited_vid; vid >= vid_begin; vid--)
-		mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid);
+	mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
 	return err;
 }
 
@@ -678,9 +732,10 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
 			MLXSW_REG_SFD_OP_WRITE_REMOVE;
 }
 
-static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				   const char *mac, u16 fid, bool adding,
-				   bool dynamic)
+static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				     const char *mac, u16 fid, bool adding,
+				     enum mlxsw_reg_sfd_rec_action action,
+				     bool dynamic)
 {
 	char *sfd_pl;
 	int err;
@@ -691,14 +746,29 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 
 	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
 	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
-			      mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP,
-			      local_port);
+			      mac, fid, action, local_port);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
 	kfree(sfd_pl);
 
 	return err;
 }
 
+static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				   const char *mac, u16 fid, bool adding,
+				   bool dynamic)
+{
+	return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding,
+					 MLXSW_REG_SFD_REC_ACTION_NOP, dynamic);
+}
+
+int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
+			bool adding)
+{
+	return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding,
+					 MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER,
+					 false);
+}
+
 static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
 				       const char *mac, u16 fid, u16 lag_vid,
 				       bool adding, bool dynamic)
@@ -903,6 +973,11 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
 					      SWITCHDEV_OBJ_PORT_VLAN(obj),
 					      trans);
 		break;
+	case SWITCHDEV_OBJ_ID_IPV4_FIB:
+		err = mlxsw_sp_router_fib4_add(mlxsw_sp_port,
+					       SWITCHDEV_OBJ_IPV4_FIB(obj),
+					       trans);
+		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port,
 						   SWITCHDEV_OBJ_PORT_FDB(obj),
@@ -921,21 +996,6 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
 	return err;
 }
 
-static int mlxsw_sp_port_kill_vids(struct net_device *dev, u16 vid_begin,
-				   u16 vid_end)
-{
-	u16 vid;
-	int err;
-
-	for (vid = vid_begin; vid <= vid_end; vid++) {
-		err = mlxsw_sp_port_kill_vid(dev, 0, vid);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 				     u16 vid_begin, u16 vid_end, bool init)
 {
@@ -943,12 +1003,8 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 	u16 vid, pvid;
 	int err;
 
-	/* In case this is invoked with BRIDGE_FLAGS_SELF and port is
-	 * not bridged, then prevent packets ingressing through the
-	 * port with the specified VIDs from being trapped to CPU.
-	 */
 	if (!init && !mlxsw_sp_port->bridged)
-		return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end);
+		return -EINVAL;
 
 	err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
 					false, false);
@@ -970,21 +1026,7 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 		}
 	}
 
-	err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end,
-					false, false);
-	if (err) {
-		netdev_err(dev, "Failed to clear flooding\n");
-		return err;
-	}
-
-	for (vid = vid_begin; vid <= vid_end; vid++) {
-		/* Remove FID mapping in case of Virtual mode */
-		err = mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid);
-		if (err) {
-			netdev_err(dev, "Failed to unmap FID=%d", vid);
-			return err;
-		}
-	}
+	mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
 
 out:
 	/* Changing activity bits only if HW operation succeded */
@@ -1081,6 +1123,10 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev,
 		err = mlxsw_sp_port_vlans_del(mlxsw_sp_port,
 					      SWITCHDEV_OBJ_PORT_VLAN(obj));
 		break;
+	case SWITCHDEV_OBJ_ID_IPV4_FIB:
+		err = mlxsw_sp_router_fib4_del(mlxsw_sp_port,
+					       SWITCHDEV_OBJ_IPV4_FIB(obj));
+		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port,
 						   SWITCHDEV_OBJ_PORT_FDB(obj));
@@ -1118,7 +1164,8 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_port *tmp;
-	u16 vport_fid = 0;
+	struct mlxsw_sp_fid *f;
+	u16 vport_fid;
 	char *sfd_pl;
 	char mac[ETH_ALEN];
 	u16 fid;
@@ -1133,12 +1180,8 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (!sfd_pl)
 		return -ENOMEM;
 
-	if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
-		u16 tmp;
-
-		tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
-		vport_fid = mlxsw_sp_vfid_to_fid(tmp);
-	}
+	f = mlxsw_sp_vport_fid_get(mlxsw_sp_port);
+	vport_fid = f ? f->fid : 0;
 
 	mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0);
 	do {
@@ -1310,11 +1353,10 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
 	}
 
 	if (mlxsw_sp_fid_is_vfid(fid)) {
-		u16 vfid = mlxsw_sp_fid_to_vfid(fid);
 		struct mlxsw_sp_port *mlxsw_sp_vport;
 
-		mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port,
-								  vfid);
+		mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port,
+								 fid);
 		if (!mlxsw_sp_vport) {
 			netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n");
 			goto just_remove;
@@ -1370,11 +1412,10 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
 	}
 
 	if (mlxsw_sp_fid_is_vfid(fid)) {
-		u16 vfid = mlxsw_sp_fid_to_vfid(fid);
 		struct mlxsw_sp_port *mlxsw_sp_vport;
 
-		mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port,
-								  vfid);
+		mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port,
+								 fid);
 		if (!mlxsw_sp_vport) {
 			netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n");
 			goto just_remove;
@@ -1495,14 +1536,6 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
 	cancel_delayed_work_sync(&mlxsw_sp->fdb_notify.dw);
 }
 
-static void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp)
-{
-	u16 fid;
-
-	for_each_set_bit(fid, mlxsw_sp->active_fids, VLAN_N_VID)
-		mlxsw_sp_fid_destroy(mlxsw_sp, fid);
-}
-
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 {
 	return mlxsw_sp_fdb_init(mlxsw_sp);
@@ -1511,7 +1544,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	mlxsw_sp_fdb_fini(mlxsw_sp);
-	mlxsw_sp_fids_fini(mlxsw_sp);
 }
 
 int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 3842eab9449a..377daa4d509c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -316,7 +316,10 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
 		}
 	}
 	mlxsw_sx_txhdr_construct(skb, &tx_info);
-	len = skb->len;
+	/* TX header is consumed by HW on the way so we shouldn't count its
+	 * bytes as being sent.
+	 */
+	len = skb->len - MLXSW_TXHDR_LEN;
 	/* Due to a race we might fail here because of a full queue. In that
 	 * unlikely case we simply drop the packet.
 	 */
@@ -1538,6 +1541,7 @@ static struct mlxsw_config_profile mlxsw_sx_config_profile = {
 			.type		= MLXSW_PORT_SWID_TYPE_ETH,
 		}
 	},
+	.resource_query_enable		= 0,
 };
 
 static struct mlxsw_driver mlxsw_sx_driver = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 53a9550be75e..470d7696e9fe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -54,6 +54,11 @@ enum {
 	MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32,
 	MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33,
 	MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
+	MLXSW_TRAP_ID_ARPBC = 0x50,
+	MLXSW_TRAP_ID_ARPUC = 0x51,
+	MLXSW_TRAP_ID_IP2ME = 0x5F,
+	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
+	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
 
 	MLXSW_TRAP_ID_MAX = 0x1FF
 };
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 7066954c39d6..0a26b11ca8f6 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1151,7 +1151,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
 			enc28j60_phy_read(priv, PHIR);
 		}
 		/* TX complete handler */
-		if ((intflags & EIR_TXIF) != 0) {
+		if (((intflags & EIR_TXIF) != 0) &&
+		    ((intflags & EIR_TXERIF) == 0)) {
 			bool err = false;
 			loop++;
 			if (netif_msg_intr(priv))
@@ -1203,7 +1204,7 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
 					enc28j60_tx_clear(ndev, true);
 			} else
 				enc28j60_tx_clear(ndev, true);
-			locked_reg_bfclr(priv, EIR, EIR_TXERIF);
+			locked_reg_bfclr(priv, EIR, EIR_TXERIF | EIR_TXIF);
 		}
 		/* RX Error handler */
 		if ((intflags & EIR_RXERIF) != 0) {
@@ -1238,6 +1239,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
  */
 static void enc28j60_hw_tx(struct enc28j60_net *priv)
 {
+	BUG_ON(!priv->tx_skb);
+
 	if (netif_msg_tx_queued(priv))
 		printk(KERN_DEBUG DRV_NAME
 			": Tx Packet Len:%d\n", priv->tx_skb->len);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index e744acc18ef4..690635660195 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -63,7 +63,7 @@
 #define NFP_NET_POLL_TIMEOUT	5
 
 /* Bar allocation */
-#define NFP_NET_CRTL_BAR	0
+#define NFP_NET_CTRL_BAR	0
 #define NFP_NET_Q0_BAR		2
 #define NFP_NET_Q1_BAR		4	/* OBSOLETE */
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index fa47c14c743a..88678c172b19 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1845,13 +1845,14 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
 }
 
 /**
- * nfp_net_write_mac_addr() - Write mac address to device registers
+ * nfp_net_write_mac_addr() - Write mac address to the device control BAR
  * @nn:      NFP Net device to reconfigure
- * @mac:     Six-byte MAC address to be written
  *
- * We do a bit of byte swapping dance because firmware is LE.
+ * Writes the MAC address from the netdev to the device control BAR.  Does not
+ * perform the required reconfig.  We do a bit of byte swapping dance because
+ * firmware is LE.
  */
-static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac)
+static void nfp_net_write_mac_addr(struct nfp_net *nn)
 {
 	nn_writel(nn, NFP_NET_CFG_MACADDR + 0,
 		  get_unaligned_be32(nn->netdev->dev_addr));
@@ -1952,7 +1953,7 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
 		  0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
 
-	nfp_net_write_mac_addr(nn, nn->netdev->dev_addr);
+	nfp_net_write_mac_addr(nn);
 
 	nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
 	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
@@ -1979,7 +1980,7 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 	if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) {
 		memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
 		memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
-		vxlan_get_rx_port(nn->netdev);
+		udp_tunnel_get_rx_info(nn->netdev);
 	}
 
 	return err;
@@ -2015,7 +2016,7 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 
 	netif_tx_wake_all_queues(nn->netdev);
 
-	enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 	nfp_net_read_link_status(nn);
 }
 
@@ -2044,7 +2045,7 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 				      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
 	if (err)
 		goto err_free_exn;
-	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 
 	nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
 			       GFP_KERNEL);
@@ -2133,7 +2134,7 @@ static void nfp_net_close_stack(struct nfp_net *nn)
 {
 	unsigned int r;
 
-	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 	netif_carrier_off(nn->netdev);
 	nn->link_up = false;
 
@@ -2551,27 +2552,33 @@ static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port)
 }
 
 static void nfp_net_add_vxlan_port(struct net_device *netdev,
-				   sa_family_t sa_family, __be16 port)
+				   struct udp_tunnel_info *ti)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 	int idx;
 
-	idx = nfp_net_find_vxlan_idx(nn, port);
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	idx = nfp_net_find_vxlan_idx(nn, ti->port);
 	if (idx == -ENOSPC)
 		return;
 
 	if (!nn->vxlan_usecnt[idx]++)
-		nfp_net_set_vxlan_port(nn, idx, port);
+		nfp_net_set_vxlan_port(nn, idx, ti->port);
 }
 
 static void nfp_net_del_vxlan_port(struct net_device *netdev,
-				   sa_family_t sa_family, __be16 port)
+				   struct udp_tunnel_info *ti)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 	int idx;
 
-	idx = nfp_net_find_vxlan_idx(nn, port);
-	if (!nn->vxlan_usecnt[idx] || idx == -ENOSPC)
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	idx = nfp_net_find_vxlan_idx(nn, ti->port);
+	if (idx == -ENOSPC || !nn->vxlan_usecnt[idx])
 		return;
 
 	if (!--nn->vxlan_usecnt[idx])
@@ -2589,8 +2596,8 @@ static const struct net_device_ops nfp_net_netdev_ops = {
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_set_features	= nfp_net_set_features,
 	.ndo_features_check	= nfp_net_features_check,
-	.ndo_add_vxlan_port     = nfp_net_add_vxlan_port,
-	.ndo_del_vxlan_port     = nfp_net_del_vxlan_port,
+	.ndo_udp_tunnel_add	= nfp_net_add_vxlan_port,
+	.ndo_udp_tunnel_del	= nfp_net_del_vxlan_port,
 };
 
 /**
@@ -2733,7 +2740,7 @@ int nfp_net_netdev_init(struct net_device *netdev)
 	nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
 	nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
 
-	nfp_net_write_mac_addr(nn, nn->netdev->dev_addr);
+	nfp_net_write_mac_addr(nn);
 
 	/* Set default MTU and Freelist buffer size */
 	if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index ccfef1f17627..7d7933d00b8f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -605,6 +605,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
 
 static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.get_drvinfo		= nfp_net_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
 	.get_ringparam		= nfp_net_get_ringparam,
 	.set_ringparam		= nfp_net_set_ringparam,
 	.get_strings		= nfp_net_get_strings,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index e2b22b8a20f1..37abef016a0a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -124,11 +124,11 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 	 * first NFP_NET_CFG_BAR_SZ of the BAR.  This keeps the code
 	 * the identical for PF and VF drivers.
 	 */
-	ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CRTL_BAR),
+	ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CTRL_BAR),
 				   NFP_NET_CFG_BAR_SZ);
 	if (!ctrl_bar) {
 		dev_err(&pdev->dev,
-			"Failed to map resource %d\n", NFP_NET_CRTL_BAR);
+			"Failed to map resource %d\n", NFP_NET_CTRL_BAR);
 		err = -EIO;
 		goto err_pci_regions;
 	}
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index b1ce7aaa8f8b..4d4ecba0aad9 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -425,7 +425,6 @@ struct netdata_local {
 	unsigned int		last_tx_idx;
 	unsigned int		num_used_tx_buffs;
 	struct mii_bus		*mii_bus;
-	struct phy_device	*phy_dev;
 	struct clk		*clk;
 	dma_addr_t		dma_buff_base_p;
 	void			*dma_buff_base_v;
@@ -750,7 +749,7 @@ static int lpc_mdio_reset(struct mii_bus *bus)
 static void lpc_handle_link_change(struct net_device *ndev)
 {
 	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = pldat->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 	unsigned long flags;
 
 	bool status_change = false;
@@ -814,7 +813,6 @@ static int lpc_mii_probe(struct net_device *ndev)
 	pldat->link = 0;
 	pldat->speed = 0;
 	pldat->duplex = -1;
-	pldat->phy_dev = phydev;
 
 	phy_attached_info(phydev);
 
@@ -1048,8 +1046,8 @@ static int lpc_eth_close(struct net_device *ndev)
 	napi_disable(&pldat->napi);
 	netif_stop_queue(ndev);
 
-	if (pldat->phy_dev)
-		phy_stop(pldat->phy_dev);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
 
 	spin_lock_irqsave(&pldat->lock, flags);
 	__lpc_eth_reset(pldat);
@@ -1185,8 +1183,7 @@ static void lpc_eth_set_multicast_list(struct net_device *ndev)
 
 static int lpc_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 {
-	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = pldat->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (!netif_running(ndev))
 		return -EINVAL;
@@ -1207,14 +1204,14 @@ static int lpc_eth_open(struct net_device *ndev)
 	__lpc_eth_clock_enable(pldat, true);
 
 	/* Suspended PHY makes LPC ethernet core block, so resume now */
-	phy_resume(pldat->phy_dev);
+	phy_resume(ndev->phydev);
 
 	/* Reset and initialize */
 	__lpc_eth_reset(pldat);
 	__lpc_eth_init(pldat);
 
 	/* schedule a link state check */
-	phy_start(pldat->phy_dev);
+	phy_start(ndev->phydev);
 	netif_start_queue(ndev);
 	napi_enable(&pldat->napi);
 
@@ -1247,37 +1244,13 @@ static void lpc_eth_ethtool_setmsglevel(struct net_device *ndev, u32 level)
 	pldat->msg_enable = level;
 }
 
-static int lpc_eth_ethtool_getsettings(struct net_device *ndev,
-	struct ethtool_cmd *cmd)
-{
-	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = pldat->phy_dev;
-
-	if (!phydev)
-		return -EOPNOTSUPP;
-
-	return phy_ethtool_gset(phydev, cmd);
-}
-
-static int lpc_eth_ethtool_setsettings(struct net_device *ndev,
-	struct ethtool_cmd *cmd)
-{
-	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = pldat->phy_dev;
-
-	if (!phydev)
-		return -EOPNOTSUPP;
-
-	return phy_ethtool_sset(phydev, cmd);
-}
-
 static const struct ethtool_ops lpc_eth_ethtool_ops = {
 	.get_drvinfo	= lpc_eth_ethtool_getdrvinfo,
-	.get_settings	= lpc_eth_ethtool_getsettings,
-	.set_settings	= lpc_eth_ethtool_setsettings,
 	.get_msglevel	= lpc_eth_ethtool_getmsglevel,
 	.set_msglevel	= lpc_eth_ethtool_setmsglevel,
 	.get_link	= ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops lpc_netdev_ops = {
@@ -1460,7 +1433,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	netdev_info(ndev, "LPC mac at 0x%08x irq %d\n",
 	       res->start, ndev->irq);
 
-	phydev = pldat->phy_dev;
+	phydev = ndev->phydev;
 
 	device_init_wakeup(&pdev->dev, 1);
 	device_set_wakeup_enable(&pdev->dev, 0);
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
index af54df52aa6b..2f4a837f0d6a 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.c
@@ -989,7 +989,7 @@ static void pasemi_adjust_link(struct net_device *dev)
 	unsigned int flags;
 	unsigned int new_flags;
 
-	if (!mac->phydev->link) {
+	if (!dev->phydev->link) {
 		/* If no link, MAC speed settings don't matter. Just report
 		 * link down and return.
 		 */
@@ -1010,10 +1010,10 @@ static void pasemi_adjust_link(struct net_device *dev)
 	new_flags = flags & ~(PAS_MAC_CFG_PCFG_HD | PAS_MAC_CFG_PCFG_SPD_M |
 			      PAS_MAC_CFG_PCFG_TSR_M);
 
-	if (!mac->phydev->duplex)
+	if (!dev->phydev->duplex)
 		new_flags |= PAS_MAC_CFG_PCFG_HD;
 
-	switch (mac->phydev->speed) {
+	switch (dev->phydev->speed) {
 	case 1000:
 		new_flags |= PAS_MAC_CFG_PCFG_SPD_1G |
 			     PAS_MAC_CFG_PCFG_TSR_1G;
@@ -1027,15 +1027,15 @@ static void pasemi_adjust_link(struct net_device *dev)
 			     PAS_MAC_CFG_PCFG_TSR_10M;
 		break;
 	default:
-		printk("Unsupported speed %d\n", mac->phydev->speed);
+		printk("Unsupported speed %d\n", dev->phydev->speed);
 	}
 
 	/* Print on link or speed/duplex change */
-	msg = mac->link != mac->phydev->link || flags != new_flags;
+	msg = mac->link != dev->phydev->link || flags != new_flags;
 
-	mac->duplex = mac->phydev->duplex;
-	mac->speed = mac->phydev->speed;
-	mac->link = mac->phydev->link;
+	mac->duplex = dev->phydev->duplex;
+	mac->speed = dev->phydev->speed;
+	mac->link = dev->phydev->link;
 
 	if (new_flags != flags)
 		write_mac_reg(mac, PAS_MAC_CFG_PCFG, new_flags);
@@ -1067,8 +1067,6 @@ static int pasemi_mac_phy_init(struct net_device *dev)
 		return -ENODEV;
 	}
 
-	mac->phydev = phydev;
-
 	return 0;
 }
 
@@ -1198,8 +1196,8 @@ static int pasemi_mac_open(struct net_device *dev)
 		goto out_rx_int;
 	}
 
-	if (mac->phydev)
-		phy_start(mac->phydev);
+	if (dev->phydev)
+		phy_start(dev->phydev);
 
 	setup_timer(&mac->tx->clean_timer, pasemi_mac_tx_timer,
 		    (unsigned long)mac->tx);
@@ -1293,9 +1291,9 @@ static int pasemi_mac_close(struct net_device *dev)
 	rxch = rx_ring(mac)->chan.chno;
 	txch = tx_ring(mac)->chan.chno;
 
-	if (mac->phydev) {
-		phy_stop(mac->phydev);
-		phy_disconnect(mac->phydev);
+	if (dev->phydev) {
+		phy_stop(dev->phydev);
+		phy_disconnect(dev->phydev);
 	}
 
 	del_timer_sync(&mac->tx->clean_timer);
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.h b/drivers/net/ethernet/pasemi/pasemi_mac.h
index 161c99a98403..7c47e263b8c1 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.h
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.h
@@ -70,7 +70,6 @@ struct pasemi_mac {
 	struct pci_dev *pdev;
 	struct pci_dev *dma_pdev;
 	struct pci_dev *iob_pdev;
-	struct phy_device *phydev;
 	struct napi_struct napi;
 
 	int		bufsz; /* RX ring buffer size */
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c b/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c
index f046bfc18e7d..d0afc2b8f8e3 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c
@@ -62,32 +62,6 @@ static struct {
 	{ "tx-1024-1518-byte-packets" },
 };
 
-static int
-pasemi_mac_ethtool_get_settings(struct net_device *netdev,
-			       struct ethtool_cmd *cmd)
-{
-	struct pasemi_mac *mac = netdev_priv(netdev);
-	struct phy_device *phydev = mac->phydev;
-
-	if (!phydev)
-		return -EOPNOTSUPP;
-
-	return phy_ethtool_gset(phydev, cmd);
-}
-
-static int
-pasemi_mac_ethtool_set_settings(struct net_device *netdev,
-			       struct ethtool_cmd *cmd)
-{
-	struct pasemi_mac *mac = netdev_priv(netdev);
-	struct phy_device *phydev = mac->phydev;
-
-	if (!phydev)
-		return -EOPNOTSUPP;
-
-	return phy_ethtool_sset(phydev, cmd);
-}
-
 static u32
 pasemi_mac_ethtool_get_msglevel(struct net_device *netdev)
 {
@@ -145,8 +119,6 @@ static void pasemi_mac_get_strings(struct net_device *netdev, u32 stringset,
 }
 
 const struct ethtool_ops pasemi_mac_ethtool_ops = {
-	.get_settings		= pasemi_mac_ethtool_get_settings,
-	.set_settings		= pasemi_mac_ethtool_set_settings,
 	.get_msglevel		= pasemi_mac_ethtool_get_msglevel,
 	.set_msglevel		= pasemi_mac_ethtool_set_msglevel,
 	.get_link		= ethtool_op_get_link,
@@ -154,5 +126,7 @@ const struct ethtool_ops pasemi_mac_ethtool_ops = {
 	.get_strings		= pasemi_mac_get_strings,
 	.get_sset_count		= pasemi_mac_get_sset_count,
 	.get_ethtool_stats	= pasemi_mac_get_ethtool_stats,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 680d8c736d2b..6ba48406899e 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -54,16 +54,6 @@ config QLCNIC_DCB
 	  mode of DCB is supported. PG and PFC values are related only
 	  to Tx.
 
-config QLCNIC_VXLAN
-	bool "Virtual eXtensible Local Area Network (VXLAN) offload support"
-	default n
-	depends on QLCNIC && VXLAN && !(QLCNIC=y && VXLAN=m)
-	---help---
-	  This enables hardware offload support for VXLAN protocol over QLogic's
-	  84XX series adapters.
-	  Say Y here if you want to enable hardware offload support for
-	  Virtual eXtensible Local Area Network (VXLAN) in the driver.
-
 config QLCNIC_HWMON
 	bool "QLOGIC QLCNIC 82XX and 83XX family HWMON support"
 	depends on QLCNIC && HWMON && !(QLCNIC=y && HWMON=m)
@@ -114,24 +104,4 @@ config QEDE
 	---help---
 	  This enables the support for ...
 
-config QEDE_VXLAN
-	bool "Virtual eXtensible Local Area Network support"
-	default n
-	depends on QEDE && VXLAN && !(QEDE=y && VXLAN=m)
-	---help---
-	  This enables hardware offload support for VXLAN protocol over
-	  qede module. Say Y here if you want to enable hardware offload
-	  support for Virtual eXtensible Local Area Network (VXLAN)
-	  in the driver.
-
-config QEDE_GENEVE
-	bool "Generic Network Virtualization Encapsulation (GENEVE) support"
-	depends on QEDE && GENEVE && !(QEDE=y && GENEVE=m)
-	---help---
-	  This allows one to create GENEVE virtual interfaces that provide
-	  Layer 2 Networks over Layer 3 Networks. GENEVE is often used
-	  to tunnel virtual network infrastructure in virtualized environments.
-	  Say Y here if you want to enable hardware offload support for
-	  Generic Network Virtualization Encapsulation (GENEVE) in the driver.
-
 endif # NET_VENDOR_QLOGIC
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 1042f2af854a..35e53771533f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -127,6 +127,8 @@ struct qed_tunn_update_params {
  */
 enum qed_pci_personality {
 	QED_PCI_ETH,
+	QED_PCI_ISCSI,
+	QED_PCI_ETH_ROCE,
 	QED_PCI_DEFAULT /* default in shmem */
 };
 
@@ -170,6 +172,8 @@ enum QED_PORT_MODE {
 
 enum qed_dev_cap {
 	QED_DEV_CAP_ETH,
+	QED_DEV_CAP_ISCSI,
+	QED_DEV_CAP_ROCE,
 };
 
 struct qed_hw_info {
@@ -183,6 +187,8 @@ struct qed_hw_info {
 
 #define RESC_START(_p_hwfn, resc) ((_p_hwfn)->hw_info.resc_start[resc])
 #define RESC_NUM(_p_hwfn, resc) ((_p_hwfn)->hw_info.resc_num[resc])
+#define RESC_END(_p_hwfn, resc) (RESC_START(_p_hwfn, resc) + \
+				 RESC_NUM(_p_hwfn, resc))
 #define FEAT_NUM(_p_hwfn, resc) ((_p_hwfn)->hw_info.feat_num[resc])
 
 	u8				num_tc;
@@ -255,6 +261,7 @@ struct qed_qm_info {
 	u8				pure_lb_pq;
 	u8				offload_pq;
 	u8				pure_ack_pq;
+	u8 ooo_pq;
 	u8				vf_queues_offset;
 	u16				num_pqs;
 	u16				num_vf_pqs;
@@ -267,6 +274,7 @@ struct qed_qm_info {
 	u8				pf_wfq;
 	u32				pf_rl;
 	struct qed_wfq_data		*wfq_data;
+	u8 num_pf_rls;
 };
 
 struct storm_stats {
@@ -312,6 +320,7 @@ struct qed_hwfn {
 	bool				hw_init_done;
 
 	u8				num_funcs_on_engine;
+	u8 enabled_func_idx;
 
 	/* BAR access */
 	void __iomem			*regview;
@@ -350,6 +359,9 @@ struct qed_hwfn {
 	/* Protocol related */
 	struct qed_pf_params		pf_params;
 
+	bool b_rdma_enabled_in_prs;
+	u32 rdma_prs_search_reg;
+
 	/* Array of sb_info of all status blocks */
 	struct qed_sb_info		*sbs_info[MAX_SB_PER_PF_MIMD];
 	u16				num_sbs;
@@ -477,8 +489,8 @@ struct qed_dev {
 
 	u32				int_mode;
 	enum qed_coalescing_mode	int_coalescing_mode;
-	u8				rx_coalesce_usecs;
-	u8				tx_coalesce_usecs;
+	u16				rx_coalesce_usecs;
+	u16				tx_coalesce_usecs;
 
 	/* Start Bar offset of first hwfn */
 	void __iomem			*regview;
@@ -555,6 +567,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
 }
 
 #define PURE_LB_TC 8
+#define OOO_LB_TC 9
 
 int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate);
 void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index ac284c58d8c2..1c35f376143e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -39,6 +39,14 @@
 #define DQ_RANGE_SHIFT		4
 #define DQ_RANGE_ALIGN		BIT(DQ_RANGE_SHIFT)
 
+/* Searcher constants */
+#define SRC_MIN_NUM_ELEMS 256
+
+/* Timers constants */
+#define TM_SHIFT        7
+#define TM_ALIGN        BIT(TM_SHIFT)
+#define TM_ELEM_SIZE    4
+
 /* ILT constants */
 #define ILT_DEFAULT_HW_P_SIZE		3
 #define ILT_PAGE_IN_BYTES(hw_p_size)	(1U << ((hw_p_size) + 12))
@@ -56,26 +64,71 @@
 union conn_context {
 	struct core_conn_context core_ctx;
 	struct eth_conn_context eth_ctx;
+	struct iscsi_conn_context iscsi_ctx;
+	struct roce_conn_context roce_ctx;
+};
+
+/* TYPE-0 task context - iSCSI */
+union type0_task_context {
+	struct iscsi_task_context iscsi_ctx;
 };
 
+/* TYPE-1 task context - ROCE */
+union type1_task_context {
+	struct rdma_task_context roce_ctx;
+};
+
+struct src_ent {
+	u8 opaque[56];
+	u64 next;
+};
+
+#define CDUT_SEG_ALIGNMET 3	/* in 4k chunks */
+#define CDUT_SEG_ALIGNMET_IN_BYTES (1 << (CDUT_SEG_ALIGNMET + 12))
+
 #define CONN_CXT_SIZE(p_hwfn) \
 	ALIGNED_TYPE_SIZE(union conn_context, p_hwfn)
 
+#define SRQ_CXT_SIZE (sizeof(struct rdma_srq_context))
+
+#define TYPE0_TASK_CXT_SIZE(p_hwfn) \
+	ALIGNED_TYPE_SIZE(union type0_task_context, p_hwfn)
+
+/* Alignment is inherent to the type1_task_context structure */
+#define TYPE1_TASK_CXT_SIZE(p_hwfn) sizeof(union type1_task_context)
+
 /* PF per protocl configuration object */
+#define TASK_SEGMENTS   (NUM_TASK_PF_SEGMENTS + NUM_TASK_VF_SEGMENTS)
+#define TASK_SEGMENT_VF (NUM_TASK_PF_SEGMENTS)
+
+struct qed_tid_seg {
+	u32 count;
+	u8 type;
+	bool has_fl_mem;
+};
+
 struct qed_conn_type_cfg {
 	u32 cid_count;
 	u32 cid_start;
 	u32 cids_per_vf;
+	struct qed_tid_seg tid_seg[TASK_SEGMENTS];
 };
 
 /* ILT Client configuration, Per connection type (protocol) resources. */
 #define ILT_CLI_PF_BLOCKS	(1 + NUM_TASK_PF_SEGMENTS * 2)
 #define ILT_CLI_VF_BLOCKS       (1 + NUM_TASK_VF_SEGMENTS * 2)
 #define CDUC_BLK		(0)
+#define SRQ_BLK                 (0)
+#define CDUT_SEG_BLK(n)         (1 + (u8)(n))
+#define CDUT_FL_SEG_BLK(n, X)   (1 + (n) + NUM_TASK_ ## X ## _SEGMENTS)
 
 enum ilt_clients {
 	ILT_CLI_CDUC,
+	ILT_CLI_CDUT,
 	ILT_CLI_QM,
+	ILT_CLI_TM,
+	ILT_CLI_SRC,
+	ILT_CLI_TSDM,
 	ILT_CLI_MAX
 };
 
@@ -88,6 +141,7 @@ struct qed_ilt_cli_blk {
 	u32 total_size; /* 0 means not active */
 	u32 real_size_in_page;
 	u32 start_line;
+	u32 dynamic_line_cnt;
 };
 
 struct qed_ilt_client_cfg {
@@ -131,18 +185,44 @@ struct qed_cxt_mngr {
 	/* computed ILT structure */
 	struct qed_ilt_client_cfg	clients[ILT_CLI_MAX];
 
+	/* Task type sizes */
+	u32 task_type_size[NUM_TASK_TYPES];
+
 	/* total number of VFs for this hwfn -
 	 * ALL VFs are symmetric in terms of HW resources
 	 */
 	u32				vf_count;
 
+	/* total number of SRQ's for this hwfn */
+	u32 srq_count;
+
 	/* Acquired CIDs */
 	struct qed_cid_acquired_map	acquired[MAX_CONN_TYPES];
 
 	/* ILT  shadow table */
 	struct qed_dma_mem		*ilt_shadow;
 	u32				pf_start_line;
+
+	/* Mutex for a dynamic ILT allocation */
+	struct mutex mutex;
+
+	/* SRC T2 */
+	struct qed_dma_mem *t2;
+	u32 t2_num_pages;
+	u64 first_free;
+	u64 last_free;
 };
+static bool src_proto(enum protocol_type type)
+{
+	return type == PROTOCOLID_ISCSI ||
+	       type == PROTOCOLID_ROCE;
+}
+
+static bool tm_cid_proto(enum protocol_type type)
+{
+	return type == PROTOCOLID_ISCSI ||
+	       type == PROTOCOLID_ROCE;
+}
 
 /* counts the iids for the CDU/CDUC ILT client configuration */
 struct qed_cdu_iids {
@@ -161,21 +241,120 @@ static void qed_cxt_cdu_iids(struct qed_cxt_mngr *p_mngr,
 	}
 }
 
+/* counts the iids for the Searcher block configuration */
+struct qed_src_iids {
+	u32 pf_cids;
+	u32 per_vf_cids;
+};
+
+static void qed_cxt_src_iids(struct qed_cxt_mngr *p_mngr,
+			     struct qed_src_iids *iids)
+{
+	u32 i;
+
+	for (i = 0; i < MAX_CONN_TYPES; i++) {
+		if (!src_proto(i))
+			continue;
+
+		iids->pf_cids += p_mngr->conn_cfg[i].cid_count;
+		iids->per_vf_cids += p_mngr->conn_cfg[i].cids_per_vf;
+	}
+}
+
+/* counts the iids for the Timers block configuration */
+struct qed_tm_iids {
+	u32 pf_cids;
+	u32 pf_tids[NUM_TASK_PF_SEGMENTS];	/* per segment */
+	u32 pf_tids_total;
+	u32 per_vf_cids;
+	u32 per_vf_tids;
+};
+
+static void qed_cxt_tm_iids(struct qed_cxt_mngr *p_mngr,
+			    struct qed_tm_iids *iids)
+{
+	u32 i, j;
+
+	for (i = 0; i < MAX_CONN_TYPES; i++) {
+		struct qed_conn_type_cfg *p_cfg = &p_mngr->conn_cfg[i];
+
+		if (tm_cid_proto(i)) {
+			iids->pf_cids += p_cfg->cid_count;
+			iids->per_vf_cids += p_cfg->cids_per_vf;
+		}
+	}
+
+	iids->pf_cids = roundup(iids->pf_cids, TM_ALIGN);
+	iids->per_vf_cids = roundup(iids->per_vf_cids, TM_ALIGN);
+	iids->per_vf_tids = roundup(iids->per_vf_tids, TM_ALIGN);
+
+	for (iids->pf_tids_total = 0, j = 0; j < NUM_TASK_PF_SEGMENTS; j++) {
+		iids->pf_tids[j] = roundup(iids->pf_tids[j], TM_ALIGN);
+		iids->pf_tids_total += iids->pf_tids[j];
+	}
+}
+
 static void qed_cxt_qm_iids(struct qed_hwfn *p_hwfn,
 			    struct qed_qm_iids *iids)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
-	u32 vf_cids = 0, type;
+	struct qed_tid_seg *segs;
+	u32 vf_cids = 0, type, j;
+	u32 vf_tids = 0;
 
 	for (type = 0; type < MAX_CONN_TYPES; type++) {
 		iids->cids += p_mngr->conn_cfg[type].cid_count;
 		vf_cids += p_mngr->conn_cfg[type].cids_per_vf;
+
+		segs = p_mngr->conn_cfg[type].tid_seg;
+		/* for each segment there is at most one
+		 * protocol for which count is not 0.
+		 */
+		for (j = 0; j < NUM_TASK_PF_SEGMENTS; j++)
+			iids->tids += segs[j].count;
+
+		/* The last array elelment is for the VFs. As for PF
+		 * segments there can be only one protocol for
+		 * which this value is not 0.
+		 */
+		vf_tids += segs[NUM_TASK_PF_SEGMENTS].count;
 	}
 
 	iids->vf_cids += vf_cids * p_mngr->vf_count;
+	iids->tids += vf_tids * p_mngr->vf_count;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_ILT,
-		   "iids: CIDS %08x vf_cids %08x\n",
-		   iids->cids, iids->vf_cids);
+		   "iids: CIDS %08x vf_cids %08x tids %08x vf_tids %08x\n",
+		   iids->cids, iids->vf_cids, iids->tids, vf_tids);
+}
+
+static struct qed_tid_seg *qed_cxt_tid_seg_info(struct qed_hwfn *p_hwfn,
+						u32 seg)
+{
+	struct qed_cxt_mngr *p_cfg = p_hwfn->p_cxt_mngr;
+	u32 i;
+
+	/* Find the protocol with tid count > 0 for this segment.
+	 * Note: there can only be one and this is already validated.
+	 */
+	for (i = 0; i < MAX_CONN_TYPES; i++)
+		if (p_cfg->conn_cfg[i].tid_seg[seg].count)
+			return &p_cfg->conn_cfg[i].tid_seg[seg];
+	return NULL;
+}
+
+void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs)
+{
+	struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
+
+	p_mgr->srq_count = num_srqs;
+}
+
+u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
+
+	return p_mgr->srq_count;
 }
 
 /* set the iids count per protocol */
@@ -188,6 +367,14 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn,
 
 	p_conn->cid_count = roundup(cid_count, DQ_RANGE_ALIGN);
 	p_conn->cids_per_vf = roundup(vf_cid_cnt, DQ_RANGE_ALIGN);
+
+	if (type == PROTOCOLID_ROCE) {
+		u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val;
+		u32 cxt_size = CONN_CXT_SIZE(p_hwfn);
+		u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+
+		p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page);
+	}
 }
 
 u32 qed_cxt_get_proto_cid_count(struct qed_hwfn		*p_hwfn,
@@ -200,6 +387,37 @@ u32 qed_cxt_get_proto_cid_count(struct qed_hwfn		*p_hwfn,
 	return p_hwfn->p_cxt_mngr->conn_cfg[type].cid_count;
 }
 
+u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn,
+				enum protocol_type type)
+{
+	return p_hwfn->p_cxt_mngr->acquired[type].start_cid;
+}
+
+u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn,
+				enum protocol_type type)
+{
+	u32 cnt = 0;
+	int i;
+
+	for (i = 0; i < TASK_SEGMENTS; i++)
+		cnt += p_hwfn->p_cxt_mngr->conn_cfg[type].tid_seg[i].count;
+
+	return cnt;
+}
+
+static void
+qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn,
+			    enum protocol_type proto,
+			    u8 seg, u8 seg_type, u32 count, bool has_fl)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	struct qed_tid_seg *p_seg = &p_mngr->conn_cfg[proto].tid_seg[seg];
+
+	p_seg->count = count;
+	p_seg->has_fl_mem = has_fl;
+	p_seg->type = seg_type;
+}
+
 static void qed_ilt_cli_blk_fill(struct qed_ilt_client_cfg *p_cli,
 				 struct qed_ilt_cli_blk *p_blk,
 				 u32 start_line, u32 total_size,
@@ -241,17 +459,42 @@ static void qed_ilt_cli_adv_line(struct qed_hwfn *p_hwfn,
 		   p_blk->real_size_in_page, p_blk->start_line);
 }
 
+static u32 qed_ilt_get_dynamic_line_cnt(struct qed_hwfn *p_hwfn,
+					enum ilt_clients ilt_client)
+{
+	u32 cid_count = p_hwfn->p_cxt_mngr->conn_cfg[PROTOCOLID_ROCE].cid_count;
+	struct qed_ilt_client_cfg *p_cli;
+	u32 lines_to_skip = 0;
+	u32 cxts_per_p;
+
+	if (ilt_client == ILT_CLI_CDUC) {
+		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUC];
+
+		cxts_per_p = ILT_PAGE_IN_BYTES(p_cli->p_size.val) /
+		    (u32) CONN_CXT_SIZE(p_hwfn);
+
+		lines_to_skip = cid_count / cxts_per_p;
+	}
+
+	return lines_to_skip;
+}
+
 int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 curr_line, total, i, task_size, line;
 	struct qed_ilt_client_cfg *p_cli;
 	struct qed_ilt_cli_blk *p_blk;
 	struct qed_cdu_iids cdu_iids;
+	struct qed_src_iids src_iids;
 	struct qed_qm_iids qm_iids;
-	u32 curr_line, total, i;
+	struct qed_tm_iids tm_iids;
+	struct qed_tid_seg *p_seg;
 
 	memset(&qm_iids, 0, sizeof(qm_iids));
 	memset(&cdu_iids, 0, sizeof(cdu_iids));
+	memset(&src_iids, 0, sizeof(src_iids));
+	memset(&tm_iids, 0, sizeof(tm_iids));
 
 	p_mngr->pf_start_line = RESC_START(p_hwfn, QED_ILT);
 
@@ -279,6 +522,9 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
 	qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, ILT_CLI_CDUC);
 	p_cli->pf_total_lines = curr_line - p_blk->start_line;
 
+	p_blk->dynamic_line_cnt = qed_ilt_get_dynamic_line_cnt(p_hwfn,
+							       ILT_CLI_CDUC);
+
 	/* CDUC VF */
 	p_blk = &p_cli->vf_blks[CDUC_BLK];
 	total = cdu_iids.per_vf_cids * CONN_CXT_SIZE(p_hwfn);
@@ -293,21 +539,128 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
 		qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
 				     ILT_CLI_CDUC);
 
+	/* CDUT PF */
+	p_cli = &p_mngr->clients[ILT_CLI_CDUT];
+	p_cli->first.val = curr_line;
+
+	/* first the 'working' task memory */
+	for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) {
+		p_seg = qed_cxt_tid_seg_info(p_hwfn, i);
+		if (!p_seg || p_seg->count == 0)
+			continue;
+
+		p_blk = &p_cli->pf_blks[CDUT_SEG_BLK(i)];
+		total = p_seg->count * p_mngr->task_type_size[p_seg->type];
+		qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line, total,
+				     p_mngr->task_type_size[p_seg->type]);
+
+		qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+				     ILT_CLI_CDUT);
+	}
+
+	/* next the 'init' task memory (forced load memory) */
+	for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) {
+		p_seg = qed_cxt_tid_seg_info(p_hwfn, i);
+		if (!p_seg || p_seg->count == 0)
+			continue;
+
+		p_blk = &p_cli->pf_blks[CDUT_FL_SEG_BLK(i, PF)];
+
+		if (!p_seg->has_fl_mem) {
+			/* The segment is active (total size pf 'working'
+			 * memory is > 0) but has no FL (forced-load, Init)
+			 * memory. Thus:
+			 *
+			 * 1.   The total-size in the corrsponding FL block of
+			 *      the ILT client is set to 0 - No ILT line are
+			 *      provisioned and no ILT memory allocated.
+			 *
+			 * 2.   The start-line of said block is set to the
+			 *      start line of the matching working memory
+			 *      block in the ILT client. This is later used to
+			 *      configure the CDU segment offset registers and
+			 *      results in an FL command for TIDs of this
+			 *      segement behaves as regular load commands
+			 *      (loading TIDs from the working memory).
+			 */
+			line = p_cli->pf_blks[CDUT_SEG_BLK(i)].start_line;
+
+			qed_ilt_cli_blk_fill(p_cli, p_blk, line, 0, 0);
+			continue;
+		}
+		total = p_seg->count * p_mngr->task_type_size[p_seg->type];
+
+		qed_ilt_cli_blk_fill(p_cli, p_blk,
+				     curr_line, total,
+				     p_mngr->task_type_size[p_seg->type]);
+
+		qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+				     ILT_CLI_CDUT);
+	}
+	p_cli->pf_total_lines = curr_line - p_cli->pf_blks[0].start_line;
+
+	/* CDUT VF */
+	p_seg = qed_cxt_tid_seg_info(p_hwfn, TASK_SEGMENT_VF);
+	if (p_seg && p_seg->count) {
+		/* Stricly speaking we need to iterate over all VF
+		 * task segment types, but a VF has only 1 segment
+		 */
+
+		/* 'working' memory */
+		total = p_seg->count * p_mngr->task_type_size[p_seg->type];
+
+		p_blk = &p_cli->vf_blks[CDUT_SEG_BLK(0)];
+		qed_ilt_cli_blk_fill(p_cli, p_blk,
+				     curr_line, total,
+				     p_mngr->task_type_size[p_seg->type]);
+
+		qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+				     ILT_CLI_CDUT);
+
+		/* 'init' memory */
+		p_blk = &p_cli->vf_blks[CDUT_FL_SEG_BLK(0, VF)];
+		if (!p_seg->has_fl_mem) {
+			/* see comment above */
+			line = p_cli->vf_blks[CDUT_SEG_BLK(0)].start_line;
+			qed_ilt_cli_blk_fill(p_cli, p_blk, line, 0, 0);
+		} else {
+			task_size = p_mngr->task_type_size[p_seg->type];
+			qed_ilt_cli_blk_fill(p_cli, p_blk,
+					     curr_line, total, task_size);
+			qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+					     ILT_CLI_CDUT);
+		}
+		p_cli->vf_total_lines = curr_line -
+		    p_cli->vf_blks[0].start_line;
+
+		/* Now for the rest of the VFs */
+		for (i = 1; i < p_mngr->vf_count; i++) {
+			p_blk = &p_cli->vf_blks[CDUT_SEG_BLK(0)];
+			qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+					     ILT_CLI_CDUT);
+
+			p_blk = &p_cli->vf_blks[CDUT_FL_SEG_BLK(0, VF)];
+			qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+					     ILT_CLI_CDUT);
+		}
+	}
+
 	/* QM */
 	p_cli = &p_mngr->clients[ILT_CLI_QM];
 	p_blk = &p_cli->pf_blks[0];
 
 	qed_cxt_qm_iids(p_hwfn, &qm_iids);
 	total = qed_qm_pf_mem_size(p_hwfn->rel_pf_id, qm_iids.cids,
-				   qm_iids.vf_cids, 0,
+				   qm_iids.vf_cids, qm_iids.tids,
 				   p_hwfn->qm_info.num_pqs,
 				   p_hwfn->qm_info.num_vf_pqs);
 
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_ILT,
-		   "QM ILT Info, (cids=%d, vf_cids=%d, num_pqs=%d, num_vf_pqs=%d, memory_size=%d)\n",
+		   "QM ILT Info, (cids=%d, vf_cids=%d, tids=%d, num_pqs=%d, num_vf_pqs=%d, memory_size=%d)\n",
 		   qm_iids.cids,
 		   qm_iids.vf_cids,
+		   qm_iids.tids,
 		   p_hwfn->qm_info.num_pqs, p_hwfn->qm_info.num_vf_pqs, total);
 
 	qed_ilt_cli_blk_fill(p_cli, p_blk,
@@ -317,6 +670,75 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
 	qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, ILT_CLI_QM);
 	p_cli->pf_total_lines = curr_line - p_blk->start_line;
 
+	/* SRC */
+	p_cli = &p_mngr->clients[ILT_CLI_SRC];
+	qed_cxt_src_iids(p_mngr, &src_iids);
+
+	/* Both the PF and VFs searcher connections are stored in the per PF
+	 * database. Thus sum the PF searcher cids and all the VFs searcher
+	 * cids.
+	 */
+	total = src_iids.pf_cids + src_iids.per_vf_cids * p_mngr->vf_count;
+	if (total) {
+		u32 local_max = max_t(u32, total,
+				      SRC_MIN_NUM_ELEMS);
+
+		total = roundup_pow_of_two(local_max);
+
+		p_blk = &p_cli->pf_blks[0];
+		qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
+				     total * sizeof(struct src_ent),
+				     sizeof(struct src_ent));
+
+		qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+				     ILT_CLI_SRC);
+		p_cli->pf_total_lines = curr_line - p_blk->start_line;
+	}
+
+	/* TM PF */
+	p_cli = &p_mngr->clients[ILT_CLI_TM];
+	qed_cxt_tm_iids(p_mngr, &tm_iids);
+	total = tm_iids.pf_cids + tm_iids.pf_tids_total;
+	if (total) {
+		p_blk = &p_cli->pf_blks[0];
+		qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
+				     total * TM_ELEM_SIZE, TM_ELEM_SIZE);
+
+		qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+				     ILT_CLI_TM);
+		p_cli->pf_total_lines = curr_line - p_blk->start_line;
+	}
+
+	/* TM VF */
+	total = tm_iids.per_vf_cids + tm_iids.per_vf_tids;
+	if (total) {
+		p_blk = &p_cli->vf_blks[0];
+		qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
+				     total * TM_ELEM_SIZE, TM_ELEM_SIZE);
+
+		qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+				     ILT_CLI_TM);
+		p_cli->pf_total_lines = curr_line - p_blk->start_line;
+
+		for (i = 1; i < p_mngr->vf_count; i++)
+			qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+					     ILT_CLI_TM);
+	}
+
+	/* TSDM (SRQ CONTEXT) */
+	total = qed_cxt_get_srq_count(p_hwfn);
+
+	if (total) {
+		p_cli = &p_mngr->clients[ILT_CLI_TSDM];
+		p_blk = &p_cli->pf_blks[SRQ_BLK];
+		qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
+				     total * SRQ_CXT_SIZE, SRQ_CXT_SIZE);
+
+		qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
+				     ILT_CLI_TSDM);
+		p_cli->pf_total_lines = curr_line - p_blk->start_line;
+	}
+
 	if (curr_line - p_hwfn->p_cxt_mngr->pf_start_line >
 	    RESC_NUM(p_hwfn, QED_ILT)) {
 		DP_ERR(p_hwfn, "too many ilt lines...#lines=%d\n",
@@ -327,8 +749,122 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
 	return 0;
 }
 
+static void qed_cxt_src_t2_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 i;
+
+	if (!p_mngr->t2)
+		return;
+
+	for (i = 0; i < p_mngr->t2_num_pages; i++)
+		if (p_mngr->t2[i].p_virt)
+			dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+					  p_mngr->t2[i].size,
+					  p_mngr->t2[i].p_virt,
+					  p_mngr->t2[i].p_phys);
+
+	kfree(p_mngr->t2);
+	p_mngr->t2 = NULL;
+}
+
+static int qed_cxt_src_t2_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 conn_num, total_size, ent_per_page, psz, i;
+	struct qed_ilt_client_cfg *p_src;
+	struct qed_src_iids src_iids;
+	struct qed_dma_mem *p_t2;
+	int rc;
+
+	memset(&src_iids, 0, sizeof(src_iids));
+
+	/* if the SRC ILT client is inactive - there are no connection
+	 * requiring the searcer, leave.
+	 */
+	p_src = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_SRC];
+	if (!p_src->active)
+		return 0;
+
+	qed_cxt_src_iids(p_mngr, &src_iids);
+	conn_num = src_iids.pf_cids + src_iids.per_vf_cids * p_mngr->vf_count;
+	total_size = conn_num * sizeof(struct src_ent);
+
+	/* use the same page size as the SRC ILT client */
+	psz = ILT_PAGE_IN_BYTES(p_src->p_size.val);
+	p_mngr->t2_num_pages = DIV_ROUND_UP(total_size, psz);
+
+	/* allocate t2 */
+	p_mngr->t2 = kzalloc(p_mngr->t2_num_pages * sizeof(struct qed_dma_mem),
+			     GFP_KERNEL);
+	if (!p_mngr->t2) {
+		DP_NOTICE(p_hwfn, "Failed to allocate t2 table\n");
+		rc = -ENOMEM;
+		goto t2_fail;
+	}
+
+	/* allocate t2 pages */
+	for (i = 0; i < p_mngr->t2_num_pages; i++) {
+		u32 size = min_t(u32, total_size, psz);
+		void **p_virt = &p_mngr->t2[i].p_virt;
+
+		*p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					     size,
+					     &p_mngr->t2[i].p_phys, GFP_KERNEL);
+		if (!p_mngr->t2[i].p_virt) {
+			rc = -ENOMEM;
+			goto t2_fail;
+		}
+		memset(*p_virt, 0, size);
+		p_mngr->t2[i].size = size;
+		total_size -= size;
+	}
+
+	/* Set the t2 pointers */
+
+	/* entries per page - must be a power of two */
+	ent_per_page = psz / sizeof(struct src_ent);
+
+	p_mngr->first_free = (u64) p_mngr->t2[0].p_phys;
+
+	p_t2 = &p_mngr->t2[(conn_num - 1) / ent_per_page];
+	p_mngr->last_free = (u64) p_t2->p_phys +
+	    ((conn_num - 1) & (ent_per_page - 1)) * sizeof(struct src_ent);
+
+	for (i = 0; i < p_mngr->t2_num_pages; i++) {
+		u32 ent_num = min_t(u32,
+				    ent_per_page,
+				    conn_num);
+		struct src_ent *entries = p_mngr->t2[i].p_virt;
+		u64 p_ent_phys = (u64) p_mngr->t2[i].p_phys, val;
+		u32 j;
+
+		for (j = 0; j < ent_num - 1; j++) {
+			val = p_ent_phys + (j + 1) * sizeof(struct src_ent);
+			entries[j].next = cpu_to_be64(val);
+		}
+
+		if (i < p_mngr->t2_num_pages - 1)
+			val = (u64) p_mngr->t2[i + 1].p_phys;
+		else
+			val = 0;
+		entries[j].next = cpu_to_be64(val);
+
+		conn_num -= ent_num;
+	}
+
+	return 0;
+
+t2_fail:
+	qed_cxt_src_t2_free(p_hwfn);
+	return rc;
+}
+
 #define for_each_ilt_valid_client(pos, clients)	\
-		for (pos = 0; pos < ILT_CLI_MAX; pos++)
+	for (pos = 0; pos < ILT_CLI_MAX; pos++)	\
+		if (!clients[pos].active) {	\
+			continue;		\
+		} else				\
 
 /* Total number of ILT lines used by this PF */
 static u32 qed_cxt_ilt_shadow_size(struct qed_ilt_client_cfg *ilt_clients)
@@ -336,12 +872,8 @@ static u32 qed_cxt_ilt_shadow_size(struct qed_ilt_client_cfg *ilt_clients)
 	u32 size = 0;
 	u32 i;
 
-	for_each_ilt_valid_client(i, ilt_clients) {
-		if (!ilt_clients[i].active)
-			continue;
-		size += (ilt_clients[i].last.val -
-			 ilt_clients[i].first.val + 1);
-	}
+	for_each_ilt_valid_client(i, ilt_clients)
+	    size += (ilt_clients[i].last.val - ilt_clients[i].first.val + 1);
 
 	return size;
 }
@@ -372,15 +904,22 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
 			     u32 start_line_offset)
 {
 	struct qed_dma_mem *ilt_shadow = p_hwfn->p_cxt_mngr->ilt_shadow;
-	u32 lines, line, sz_left;
+	u32 lines, line, sz_left, lines_to_skip = 0;
+
+	/* Special handling for RoCE that supports dynamic allocation */
+	if ((p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) &&
+	    ((ilt_client == ILT_CLI_CDUT) || ilt_client == ILT_CLI_TSDM))
+		return 0;
+
+	lines_to_skip = p_blk->dynamic_line_cnt;
 
 	if (!p_blk->total_size)
 		return 0;
 
 	sz_left = p_blk->total_size;
-	lines = DIV_ROUND_UP(sz_left, p_blk->real_size_in_page);
+	lines = DIV_ROUND_UP(sz_left, p_blk->real_size_in_page) - lines_to_skip;
 	line = p_blk->start_line + start_line_offset -
-	       p_hwfn->p_cxt_mngr->pf_start_line;
+	    p_hwfn->p_cxt_mngr->pf_start_line + lines_to_skip;
 
 	for (; lines; lines--) {
 		dma_addr_t p_phys;
@@ -434,8 +973,6 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn)
 		   (u32)(size * sizeof(struct qed_dma_mem)));
 
 	for_each_ilt_valid_client(i, clients) {
-		if (!clients[i].active)
-			continue;
 		for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) {
 			p_blk = &clients[i].pf_blks[j];
 			rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, 0);
@@ -514,6 +1051,7 @@ cid_map_fail:
 
 int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn)
 {
+	struct qed_ilt_client_cfg *clients;
 	struct qed_cxt_mngr *p_mngr;
 	u32 i;
 
@@ -524,20 +1062,42 @@ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn)
 	}
 
 	/* Initialize ILT client registers */
-	p_mngr->clients[ILT_CLI_CDUC].first.reg = ILT_CFG_REG(CDUC, FIRST_ILT);
-	p_mngr->clients[ILT_CLI_CDUC].last.reg = ILT_CFG_REG(CDUC, LAST_ILT);
-	p_mngr->clients[ILT_CLI_CDUC].p_size.reg = ILT_CFG_REG(CDUC, P_SIZE);
-
-	p_mngr->clients[ILT_CLI_QM].first.reg = ILT_CFG_REG(QM, FIRST_ILT);
-	p_mngr->clients[ILT_CLI_QM].last.reg = ILT_CFG_REG(QM, LAST_ILT);
-	p_mngr->clients[ILT_CLI_QM].p_size.reg = ILT_CFG_REG(QM, P_SIZE);
-
+	clients = p_mngr->clients;
+	clients[ILT_CLI_CDUC].first.reg = ILT_CFG_REG(CDUC, FIRST_ILT);
+	clients[ILT_CLI_CDUC].last.reg = ILT_CFG_REG(CDUC, LAST_ILT);
+	clients[ILT_CLI_CDUC].p_size.reg = ILT_CFG_REG(CDUC, P_SIZE);
+
+	clients[ILT_CLI_QM].first.reg = ILT_CFG_REG(QM, FIRST_ILT);
+	clients[ILT_CLI_QM].last.reg = ILT_CFG_REG(QM, LAST_ILT);
+	clients[ILT_CLI_QM].p_size.reg = ILT_CFG_REG(QM, P_SIZE);
+
+	clients[ILT_CLI_TM].first.reg = ILT_CFG_REG(TM, FIRST_ILT);
+	clients[ILT_CLI_TM].last.reg = ILT_CFG_REG(TM, LAST_ILT);
+	clients[ILT_CLI_TM].p_size.reg = ILT_CFG_REG(TM, P_SIZE);
+
+	clients[ILT_CLI_SRC].first.reg = ILT_CFG_REG(SRC, FIRST_ILT);
+	clients[ILT_CLI_SRC].last.reg = ILT_CFG_REG(SRC, LAST_ILT);
+	clients[ILT_CLI_SRC].p_size.reg = ILT_CFG_REG(SRC, P_SIZE);
+
+	clients[ILT_CLI_CDUT].first.reg = ILT_CFG_REG(CDUT, FIRST_ILT);
+	clients[ILT_CLI_CDUT].last.reg = ILT_CFG_REG(CDUT, LAST_ILT);
+	clients[ILT_CLI_CDUT].p_size.reg = ILT_CFG_REG(CDUT, P_SIZE);
+
+	clients[ILT_CLI_TSDM].first.reg = ILT_CFG_REG(TSDM, FIRST_ILT);
+	clients[ILT_CLI_TSDM].last.reg = ILT_CFG_REG(TSDM, LAST_ILT);
+	clients[ILT_CLI_TSDM].p_size.reg = ILT_CFG_REG(TSDM, P_SIZE);
 	/* default ILT page size for all clients is 32K */
 	for (i = 0; i < ILT_CLI_MAX; i++)
 		p_mngr->clients[i].p_size.val = ILT_DEFAULT_HW_P_SIZE;
 
+	/* Initialize task sizes */
+	p_mngr->task_type_size[0] = TYPE0_TASK_CXT_SIZE(p_hwfn);
+	p_mngr->task_type_size[1] = TYPE1_TASK_CXT_SIZE(p_hwfn);
+
 	if (p_hwfn->cdev->p_iov_info)
 		p_mngr->vf_count = p_hwfn->cdev->p_iov_info->total_vfs;
+	/* Initialize the dynamic ILT allocation mutex */
+	mutex_init(&p_mngr->mutex);
 
 	/* Set the cxt mangr pointer priori to further allocations */
 	p_hwfn->p_cxt_mngr = p_mngr;
@@ -556,6 +1116,13 @@ int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn)
 		goto tables_alloc_fail;
 	}
 
+	/* Allocate the T2  table */
+	rc = qed_cxt_src_t2_alloc(p_hwfn);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to allocate T2 memory\n");
+		goto tables_alloc_fail;
+	}
+
 	/* Allocate and initialize the acquired cids bitmaps */
 	rc = qed_cid_map_alloc(p_hwfn);
 	if (rc) {
@@ -576,6 +1143,7 @@ void qed_cxt_mngr_free(struct qed_hwfn *p_hwfn)
 		return;
 
 	qed_cid_map_free(p_hwfn);
+	qed_cxt_src_t2_free(p_hwfn);
 	qed_ilt_shadow_free(p_hwfn);
 	kfree(p_hwfn->p_cxt_mngr);
 
@@ -620,6 +1188,48 @@ void qed_cxt_mngr_setup(struct qed_hwfn *p_hwfn)
 #define CDUC_NCIB_MASK \
 	(CDU_REG_CID_ADDR_PARAMS_NCIB >> CDUC_NCIB_SHIFT)
 
+#define CDUT_TYPE0_CXT_SIZE_SHIFT \
+	CDU_REG_SEGMENT0_PARAMS_T0_TID_SIZE_SHIFT
+
+#define CDUT_TYPE0_CXT_SIZE_MASK		\
+	(CDU_REG_SEGMENT0_PARAMS_T0_TID_SIZE >>	\
+	 CDUT_TYPE0_CXT_SIZE_SHIFT)
+
+#define CDUT_TYPE0_BLOCK_WASTE_SHIFT \
+	CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE_SHIFT
+
+#define CDUT_TYPE0_BLOCK_WASTE_MASK		       \
+	(CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE >> \
+	 CDUT_TYPE0_BLOCK_WASTE_SHIFT)
+
+#define CDUT_TYPE0_NCIB_SHIFT \
+	CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK_SHIFT
+
+#define CDUT_TYPE0_NCIB_MASK				 \
+	(CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK >> \
+	 CDUT_TYPE0_NCIB_SHIFT)
+
+#define CDUT_TYPE1_CXT_SIZE_SHIFT \
+	CDU_REG_SEGMENT1_PARAMS_T1_TID_SIZE_SHIFT
+
+#define CDUT_TYPE1_CXT_SIZE_MASK		\
+	(CDU_REG_SEGMENT1_PARAMS_T1_TID_SIZE >>	\
+	 CDUT_TYPE1_CXT_SIZE_SHIFT)
+
+#define CDUT_TYPE1_BLOCK_WASTE_SHIFT \
+	CDU_REG_SEGMENT1_PARAMS_T1_TID_BLOCK_WASTE_SHIFT
+
+#define CDUT_TYPE1_BLOCK_WASTE_MASK		       \
+	(CDU_REG_SEGMENT1_PARAMS_T1_TID_BLOCK_WASTE >> \
+	 CDUT_TYPE1_BLOCK_WASTE_SHIFT)
+
+#define CDUT_TYPE1_NCIB_SHIFT \
+	CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK_SHIFT
+
+#define CDUT_TYPE1_NCIB_MASK				 \
+	(CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK >> \
+	 CDUT_TYPE1_NCIB_SHIFT)
+
 static void qed_cdu_init_common(struct qed_hwfn *p_hwfn)
 {
 	u32 page_sz, elems_per_page, block_waste, cxt_size, cdu_params = 0;
@@ -634,6 +1244,92 @@ static void qed_cdu_init_common(struct qed_hwfn *p_hwfn)
 	SET_FIELD(cdu_params, CDUC_BLOCK_WASTE, block_waste);
 	SET_FIELD(cdu_params, CDUC_NCIB, elems_per_page);
 	STORE_RT_REG(p_hwfn, CDU_REG_CID_ADDR_PARAMS_RT_OFFSET, cdu_params);
+
+	/* CDUT - type-0 tasks configuration */
+	page_sz = p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT].p_size.val;
+	cxt_size = p_hwfn->p_cxt_mngr->task_type_size[0];
+	elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+	block_waste = ILT_PAGE_IN_BYTES(page_sz) - elems_per_page * cxt_size;
+
+	/* cxt size and block-waste are multipes of 8 */
+	cdu_params = 0;
+	SET_FIELD(cdu_params, CDUT_TYPE0_CXT_SIZE, (cxt_size >> 3));
+	SET_FIELD(cdu_params, CDUT_TYPE0_BLOCK_WASTE, (block_waste >> 3));
+	SET_FIELD(cdu_params, CDUT_TYPE0_NCIB, elems_per_page);
+	STORE_RT_REG(p_hwfn, CDU_REG_SEGMENT0_PARAMS_RT_OFFSET, cdu_params);
+
+	/* CDUT - type-1 tasks configuration */
+	cxt_size = p_hwfn->p_cxt_mngr->task_type_size[1];
+	elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+	block_waste = ILT_PAGE_IN_BYTES(page_sz) - elems_per_page * cxt_size;
+
+	/* cxt size and block-waste are multipes of 8 */
+	cdu_params = 0;
+	SET_FIELD(cdu_params, CDUT_TYPE1_CXT_SIZE, (cxt_size >> 3));
+	SET_FIELD(cdu_params, CDUT_TYPE1_BLOCK_WASTE, (block_waste >> 3));
+	SET_FIELD(cdu_params, CDUT_TYPE1_NCIB, elems_per_page);
+	STORE_RT_REG(p_hwfn, CDU_REG_SEGMENT1_PARAMS_RT_OFFSET, cdu_params);
+}
+
+/* CDU PF */
+#define CDU_SEG_REG_TYPE_SHIFT          CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT
+#define CDU_SEG_REG_TYPE_MASK           0x1
+#define CDU_SEG_REG_OFFSET_SHIFT        0
+#define CDU_SEG_REG_OFFSET_MASK         CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK
+
+static void qed_cdu_init_pf(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_tid_seg *p_seg;
+	u32 cdu_seg_params, offset;
+	int i;
+
+	static const u32 rt_type_offset_arr[] = {
+		CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET,
+		CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET,
+		CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET,
+		CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET
+	};
+
+	static const u32 rt_type_offset_fl_arr[] = {
+		CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET,
+		CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET,
+		CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET,
+		CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET
+	};
+
+	p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
+
+	/* There are initializations only for CDUT during pf Phase */
+	for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) {
+		/* Segment 0 */
+		p_seg = qed_cxt_tid_seg_info(p_hwfn, i);
+		if (!p_seg)
+			continue;
+
+		/* Note: start_line is already adjusted for the CDU
+		 * segment register granularity, so we just need to
+		 * divide. Adjustment is implicit as we assume ILT
+		 * Page size is larger than 32K!
+		 */
+		offset = (ILT_PAGE_IN_BYTES(p_cli->p_size.val) *
+			  (p_cli->pf_blks[CDUT_SEG_BLK(i)].start_line -
+			   p_cli->first.val)) / CDUT_SEG_ALIGNMET_IN_BYTES;
+
+		cdu_seg_params = 0;
+		SET_FIELD(cdu_seg_params, CDU_SEG_REG_TYPE, p_seg->type);
+		SET_FIELD(cdu_seg_params, CDU_SEG_REG_OFFSET, offset);
+		STORE_RT_REG(p_hwfn, rt_type_offset_arr[i], cdu_seg_params);
+
+		offset = (ILT_PAGE_IN_BYTES(p_cli->p_size.val) *
+			  (p_cli->pf_blks[CDUT_FL_SEG_BLK(i, PF)].start_line -
+			   p_cli->first.val)) / CDUT_SEG_ALIGNMET_IN_BYTES;
+
+		cdu_seg_params = 0;
+		SET_FIELD(cdu_seg_params, CDU_SEG_REG_TYPE, p_seg->type);
+		SET_FIELD(cdu_seg_params, CDU_SEG_REG_OFFSET, offset);
+		STORE_RT_REG(p_hwfn, rt_type_offset_fl_arr[i], cdu_seg_params);
+	}
 }
 
 void qed_qm_init_pf(struct qed_hwfn *p_hwfn)
@@ -742,14 +1438,11 @@ static void qed_ilt_bounds_init(struct qed_hwfn *p_hwfn)
 
 	ilt_clients = p_hwfn->p_cxt_mngr->clients;
 	for_each_ilt_valid_client(i, ilt_clients) {
-		if (!ilt_clients[i].active)
-			continue;
 		STORE_RT_REG(p_hwfn,
 			     ilt_clients[i].first.reg,
 			     ilt_clients[i].first.val);
 		STORE_RT_REG(p_hwfn,
-			     ilt_clients[i].last.reg,
-			     ilt_clients[i].last.val);
+			     ilt_clients[i].last.reg, ilt_clients[i].last.val);
 		STORE_RT_REG(p_hwfn,
 			     ilt_clients[i].p_size.reg,
 			     ilt_clients[i].p_size.val);
@@ -786,6 +1479,33 @@ static void qed_ilt_vf_bounds_init(struct qed_hwfn *p_hwfn)
 			     PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET,
 			     p_cli->vf_total_lines);
 	}
+
+	p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
+	blk_factor = ilog2(ILT_PAGE_IN_BYTES(p_cli->p_size.val) >> 10);
+	if (p_cli->active) {
+		STORE_RT_REG(p_hwfn,
+			     PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET,
+			     blk_factor);
+		STORE_RT_REG(p_hwfn,
+			     PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET,
+			     p_cli->pf_total_lines);
+		STORE_RT_REG(p_hwfn,
+			     PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET,
+			     p_cli->vf_total_lines);
+	}
+
+	p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_TM];
+	blk_factor = ilog2(ILT_PAGE_IN_BYTES(p_cli->p_size.val) >> 10);
+	if (p_cli->active) {
+		STORE_RT_REG(p_hwfn,
+			     PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET, blk_factor);
+		STORE_RT_REG(p_hwfn,
+			     PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET,
+			     p_cli->pf_total_lines);
+		STORE_RT_REG(p_hwfn,
+			     PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET,
+			     p_cli->vf_total_lines);
+	}
 }
 
 /* ILT (PSWRQ2) PF */
@@ -804,9 +1524,6 @@ static void qed_ilt_init_pf(struct qed_hwfn *p_hwfn)
 	clients = p_hwfn->p_cxt_mngr->clients;
 
 	for_each_ilt_valid_client(i, clients) {
-		if (!clients[i].active)
-			continue;
-
 		/** Client's 1st val and RT array are absolute, ILT shadows'
 		 *  lines are relative.
 		 */
@@ -837,6 +1554,137 @@ static void qed_ilt_init_pf(struct qed_hwfn *p_hwfn)
 	}
 }
 
+/* SRC (Searcher) PF */
+static void qed_src_init_pf(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 rounded_conn_num, conn_num, conn_max;
+	struct qed_src_iids src_iids;
+
+	memset(&src_iids, 0, sizeof(src_iids));
+	qed_cxt_src_iids(p_mngr, &src_iids);
+	conn_num = src_iids.pf_cids + src_iids.per_vf_cids * p_mngr->vf_count;
+	if (!conn_num)
+		return;
+
+	conn_max = max_t(u32, conn_num, SRC_MIN_NUM_ELEMS);
+	rounded_conn_num = roundup_pow_of_two(conn_max);
+
+	STORE_RT_REG(p_hwfn, SRC_REG_COUNTFREE_RT_OFFSET, conn_num);
+	STORE_RT_REG(p_hwfn, SRC_REG_NUMBER_HASH_BITS_RT_OFFSET,
+		     ilog2(rounded_conn_num));
+
+	STORE_RT_REG_AGG(p_hwfn, SRC_REG_FIRSTFREE_RT_OFFSET,
+			 p_hwfn->p_cxt_mngr->first_free);
+	STORE_RT_REG_AGG(p_hwfn, SRC_REG_LASTFREE_RT_OFFSET,
+			 p_hwfn->p_cxt_mngr->last_free);
+}
+
+/* Timers PF */
+#define TM_CFG_NUM_IDS_SHIFT            0
+#define TM_CFG_NUM_IDS_MASK             0xFFFFULL
+#define TM_CFG_PRE_SCAN_OFFSET_SHIFT    16
+#define TM_CFG_PRE_SCAN_OFFSET_MASK     0x1FFULL
+#define TM_CFG_PARENT_PF_SHIFT          25
+#define TM_CFG_PARENT_PF_MASK           0x7ULL
+
+#define TM_CFG_CID_PRE_SCAN_ROWS_SHIFT  30
+#define TM_CFG_CID_PRE_SCAN_ROWS_MASK   0x1FFULL
+
+#define TM_CFG_TID_OFFSET_SHIFT         30
+#define TM_CFG_TID_OFFSET_MASK          0x7FFFFULL
+#define TM_CFG_TID_PRE_SCAN_ROWS_SHIFT  49
+#define TM_CFG_TID_PRE_SCAN_ROWS_MASK   0x1FFULL
+
+static void qed_tm_init_pf(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 active_seg_mask = 0, tm_offset, rt_reg;
+	struct qed_tm_iids tm_iids;
+	u64 cfg_word;
+	u8 i;
+
+	memset(&tm_iids, 0, sizeof(tm_iids));
+	qed_cxt_tm_iids(p_mngr, &tm_iids);
+
+	/* @@@TBD No pre-scan for now */
+
+	/* Note: We assume consecutive VFs for a PF */
+	for (i = 0; i < p_mngr->vf_count; i++) {
+		cfg_word = 0;
+		SET_FIELD(cfg_word, TM_CFG_NUM_IDS, tm_iids.per_vf_cids);
+		SET_FIELD(cfg_word, TM_CFG_PRE_SCAN_OFFSET, 0);
+		SET_FIELD(cfg_word, TM_CFG_PARENT_PF, p_hwfn->rel_pf_id);
+		SET_FIELD(cfg_word, TM_CFG_CID_PRE_SCAN_ROWS, 0);
+		rt_reg = TM_REG_CONFIG_CONN_MEM_RT_OFFSET +
+		    (sizeof(cfg_word) / sizeof(u32)) *
+		    (p_hwfn->cdev->p_iov_info->first_vf_in_pf + i);
+		STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word);
+	}
+
+	cfg_word = 0;
+	SET_FIELD(cfg_word, TM_CFG_NUM_IDS, tm_iids.pf_cids);
+	SET_FIELD(cfg_word, TM_CFG_PRE_SCAN_OFFSET, 0);
+	SET_FIELD(cfg_word, TM_CFG_PARENT_PF, 0);	/* n/a for PF */
+	SET_FIELD(cfg_word, TM_CFG_CID_PRE_SCAN_ROWS, 0);	/* scan all   */
+
+	rt_reg = TM_REG_CONFIG_CONN_MEM_RT_OFFSET +
+	    (sizeof(cfg_word) / sizeof(u32)) *
+	    (NUM_OF_VFS(p_hwfn->cdev) + p_hwfn->rel_pf_id);
+	STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word);
+
+	/* enale scan */
+	STORE_RT_REG(p_hwfn, TM_REG_PF_ENABLE_CONN_RT_OFFSET,
+		     tm_iids.pf_cids ? 0x1 : 0x0);
+
+	/* @@@TBD how to enable the scan for the VFs */
+
+	tm_offset = tm_iids.per_vf_cids;
+
+	/* Note: We assume consecutive VFs for a PF */
+	for (i = 0; i < p_mngr->vf_count; i++) {
+		cfg_word = 0;
+		SET_FIELD(cfg_word, TM_CFG_NUM_IDS, tm_iids.per_vf_tids);
+		SET_FIELD(cfg_word, TM_CFG_PRE_SCAN_OFFSET, 0);
+		SET_FIELD(cfg_word, TM_CFG_PARENT_PF, p_hwfn->rel_pf_id);
+		SET_FIELD(cfg_word, TM_CFG_TID_OFFSET, tm_offset);
+		SET_FIELD(cfg_word, TM_CFG_TID_PRE_SCAN_ROWS, (u64) 0);
+
+		rt_reg = TM_REG_CONFIG_TASK_MEM_RT_OFFSET +
+		    (sizeof(cfg_word) / sizeof(u32)) *
+		    (p_hwfn->cdev->p_iov_info->first_vf_in_pf + i);
+
+		STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word);
+	}
+
+	tm_offset = tm_iids.pf_cids;
+	for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) {
+		cfg_word = 0;
+		SET_FIELD(cfg_word, TM_CFG_NUM_IDS, tm_iids.pf_tids[i]);
+		SET_FIELD(cfg_word, TM_CFG_PRE_SCAN_OFFSET, 0);
+		SET_FIELD(cfg_word, TM_CFG_PARENT_PF, 0);
+		SET_FIELD(cfg_word, TM_CFG_TID_OFFSET, tm_offset);
+		SET_FIELD(cfg_word, TM_CFG_TID_PRE_SCAN_ROWS, (u64) 0);
+
+		rt_reg = TM_REG_CONFIG_TASK_MEM_RT_OFFSET +
+		    (sizeof(cfg_word) / sizeof(u32)) *
+		    (NUM_OF_VFS(p_hwfn->cdev) +
+		     p_hwfn->rel_pf_id * NUM_TASK_PF_SEGMENTS + i);
+
+		STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word);
+		active_seg_mask |= (tm_iids.pf_tids[i] ? (1 << i) : 0);
+
+		tm_offset += tm_iids.pf_tids[i];
+	}
+
+	if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE)
+		active_seg_mask = 0;
+
+	STORE_RT_REG(p_hwfn, TM_REG_PF_ENABLE_TASK_RT_OFFSET, active_seg_mask);
+
+	/* @@@TBD how to enable the scan for the VFs */
+}
+
 void qed_cxt_hw_init_common(struct qed_hwfn *p_hwfn)
 {
 	qed_cdu_init_common(p_hwfn);
@@ -847,7 +1695,10 @@ void qed_cxt_hw_init_pf(struct qed_hwfn *p_hwfn)
 	qed_qm_init_pf(p_hwfn);
 	qed_cm_init_pf(p_hwfn);
 	qed_dq_init_pf(p_hwfn);
+	qed_cdu_init_pf(p_hwfn);
 	qed_ilt_init_pf(p_hwfn);
+	qed_src_init_pf(p_hwfn);
+	qed_tm_init_pf(p_hwfn);
 }
 
 int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
@@ -968,17 +1819,439 @@ int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn)
+void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
+			    struct qed_rdma_pf_params *p_params)
 {
-	struct qed_eth_pf_params *p_params = &p_hwfn->pf_params.eth_pf_params;
+	u32 num_cons, num_tasks, num_qps, num_mrs, num_srqs;
+	enum protocol_type proto;
+
+	num_mrs = min_t(u32, RDMA_MAX_TIDS, p_params->num_mrs);
+	num_tasks = num_mrs;	/* each mr uses a single task id */
+	num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs);
+
+	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ETH_ROCE:
+		num_qps = min_t(u32, ROCE_MAX_QPS, p_params->num_qps);
+		num_cons = num_qps * 2;	/* each QP requires two connections */
+		proto = PROTOCOLID_ROCE;
+		break;
+	default:
+		return;
+	}
+
+	if (num_cons && num_tasks) {
+		qed_cxt_set_proto_cid_count(p_hwfn, proto, num_cons, 0);
+
+		/* Deliberatly passing ROCE for tasks id. This is because
+		 * iWARP / RoCE share the task id.
+		 */
+		qed_cxt_set_proto_tid_count(p_hwfn, PROTOCOLID_ROCE,
+					    QED_CXT_ROCE_TID_SEG, 1,
+					    num_tasks, false);
+		qed_cxt_set_srq_count(p_hwfn, num_srqs);
+	} else {
+		DP_INFO(p_hwfn->cdev,
+			"RDMA personality used without setting params!\n");
+	}
+}
 
+int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn)
+{
 	/* Set the number of required CORE connections */
 	u32 core_cids = 1; /* SPQ */
 
 	qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0);
 
-	qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
-				    p_params->num_cons, 1);
+	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ETH_ROCE:
+	{
+		qed_rdma_set_pf_params(p_hwfn,
+				       &p_hwfn->
+				       pf_params.rdma_pf_params);
+		/* no need for break since RoCE coexist with Ethernet */
+	}
+	case QED_PCI_ETH:
+	{
+		struct qed_eth_pf_params *p_params =
+		    &p_hwfn->pf_params.eth_pf_params;
+
+		qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+					    p_params->num_cons, 1);
+		break;
+	}
+	case QED_PCI_ISCSI:
+	{
+		struct qed_iscsi_pf_params *p_params;
+
+		p_params = &p_hwfn->pf_params.iscsi_pf_params;
+
+		if (p_params->num_cons && p_params->num_tasks) {
+			qed_cxt_set_proto_cid_count(p_hwfn,
+						    PROTOCOLID_ISCSI,
+						    p_params->num_cons,
+						    0);
+
+			qed_cxt_set_proto_tid_count(p_hwfn,
+						    PROTOCOLID_ISCSI,
+						    QED_CXT_ISCSI_TID_SEG,
+						    0,
+						    p_params->num_tasks,
+						    true);
+		} else {
+			DP_INFO(p_hwfn->cdev,
+				"Iscsi personality used without setting params!\n");
+		}
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
+			     struct qed_tid_mem *p_info)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 proto, seg, total_lines, i, shadow_line;
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_fl_seg;
+	struct qed_tid_seg *p_seg_info;
+
+	/* Verify the personality */
+	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ISCSI:
+		proto = PROTOCOLID_ISCSI;
+		seg = QED_CXT_ISCSI_TID_SEG;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	p_cli = &p_mngr->clients[ILT_CLI_CDUT];
+	if (!p_cli->active)
+		return -EINVAL;
+
+	p_seg_info = &p_mngr->conn_cfg[proto].tid_seg[seg];
+	if (!p_seg_info->has_fl_mem)
+		return -EINVAL;
+
+	p_fl_seg = &p_cli->pf_blks[CDUT_FL_SEG_BLK(seg, PF)];
+	total_lines = DIV_ROUND_UP(p_fl_seg->total_size,
+				   p_fl_seg->real_size_in_page);
+
+	for (i = 0; i < total_lines; i++) {
+		shadow_line = i + p_fl_seg->start_line -
+		    p_hwfn->p_cxt_mngr->pf_start_line;
+		p_info->blocks[i] = p_mngr->ilt_shadow[shadow_line].p_virt;
+	}
+	p_info->waste = ILT_PAGE_IN_BYTES(p_cli->p_size.val) -
+	    p_fl_seg->real_size_in_page;
+	p_info->tid_size = p_mngr->task_type_size[p_seg_info->type];
+	p_info->num_tids_per_block = p_fl_seg->real_size_in_page /
+	    p_info->tid_size;
+
+	return 0;
+}
+
+/* This function is very RoCE oriented, if another protocol in the future
+ * will want this feature we'll need to modify the function to be more generic
+ */
+int
+qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
+			  enum qed_cxt_elem_type elem_type, u32 iid)
+{
+	u32 reg_offset, shadow_line, elem_size, hw_p_size, elems_per_p, line;
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_blk;
+	struct qed_ptt *p_ptt;
+	dma_addr_t p_phys;
+	u64 ilt_hw_entry;
+	void *p_virt;
+	int rc = 0;
+
+	switch (elem_type) {
+	case QED_ELEM_CXT:
+		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUC];
+		elem_size = CONN_CXT_SIZE(p_hwfn);
+		p_blk = &p_cli->pf_blks[CDUC_BLK];
+		break;
+	case QED_ELEM_SRQ:
+		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_TSDM];
+		elem_size = SRQ_CXT_SIZE;
+		p_blk = &p_cli->pf_blks[SRQ_BLK];
+		break;
+	case QED_ELEM_TASK:
+		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
+		elem_size = TYPE1_TASK_CXT_SIZE(p_hwfn);
+		p_blk = &p_cli->pf_blks[CDUT_SEG_BLK(QED_CXT_ROCE_TID_SEG)];
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "-EINVALID elem type = %d", elem_type);
+		return -EINVAL;
+	}
+
+	/* Calculate line in ilt */
+	hw_p_size = p_cli->p_size.val;
+	elems_per_p = ILT_PAGE_IN_BYTES(hw_p_size) / elem_size;
+	line = p_blk->start_line + (iid / elems_per_p);
+	shadow_line = line - p_hwfn->p_cxt_mngr->pf_start_line;
+
+	/* If line is already allocated, do nothing, otherwise allocate it and
+	 * write it to the PSWRQ2 registers.
+	 * This section can be run in parallel from different contexts and thus
+	 * a mutex protection is needed.
+	 */
+
+	mutex_lock(&p_hwfn->p_cxt_mngr->mutex);
+
+	if (p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].p_virt)
+		goto out0;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt) {
+		DP_NOTICE(p_hwfn,
+			  "QED_TIME_OUT on ptt acquire - dynamic allocation");
+		rc = -EBUSY;
+		goto out0;
+	}
+
+	p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				    p_blk->real_size_in_page,
+				    &p_phys, GFP_KERNEL);
+	if (!p_virt) {
+		rc = -ENOMEM;
+		goto out1;
+	}
+	memset(p_virt, 0, p_blk->real_size_in_page);
+
+	/* configuration of refTagMask to 0xF is required for RoCE DIF MR only,
+	 * to compensate for a HW bug, but it is configured even if DIF is not
+	 * enabled. This is harmless and allows us to avoid a dedicated API. We
+	 * configure the field for all of the contexts on the newly allocated
+	 * page.
+	 */
+	if (elem_type == QED_ELEM_TASK) {
+		u32 elem_i;
+		u8 *elem_start = (u8 *)p_virt;
+		union type1_task_context *elem;
+
+		for (elem_i = 0; elem_i < elems_per_p; elem_i++) {
+			elem = (union type1_task_context *)elem_start;
+			SET_FIELD(elem->roce_ctx.tdif_context.flags1,
+				  TDIF_TASK_CONTEXT_REFTAGMASK, 0xf);
+			elem_start += TYPE1_TASK_CXT_SIZE(p_hwfn);
+		}
+	}
+
+	p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].p_virt = p_virt;
+	p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].p_phys = p_phys;
+	p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].size =
+	    p_blk->real_size_in_page;
+
+	/* compute absolute offset */
+	reg_offset = PSWRQ2_REG_ILT_MEMORY +
+	    (line * ILT_REG_SIZE_IN_BYTES * ILT_ENTRY_IN_REGS);
+
+	ilt_hw_entry = 0;
+	SET_FIELD(ilt_hw_entry, ILT_ENTRY_VALID, 1ULL);
+	SET_FIELD(ilt_hw_entry,
+		  ILT_ENTRY_PHY_ADDR,
+		  (p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].p_phys >> 12));
+
+	/* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a wide-bus */
+	qed_dmae_host2grc(p_hwfn, p_ptt, (u64) (uintptr_t)&ilt_hw_entry,
+			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), 0);
+
+	if (elem_type == QED_ELEM_CXT) {
+		u32 last_cid_allocated = (1 + (iid / elems_per_p)) *
+		    elems_per_p;
+
+		/* Update the relevant register in the parser */
+		qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF,
+		       last_cid_allocated - 1);
+
+		if (!p_hwfn->b_rdma_enabled_in_prs) {
+			/* Enable RoCE search */
+			qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
+			p_hwfn->b_rdma_enabled_in_prs = true;
+		}
+	}
+
+out1:
+	qed_ptt_release(p_hwfn, p_ptt);
+out0:
+	mutex_unlock(&p_hwfn->p_cxt_mngr->mutex);
+
+	return rc;
+}
+
+/* This function is very RoCE oriented, if another protocol in the future
+ * will want this feature we'll need to modify the function to be more generic
+ */
+static int
+qed_cxt_free_ilt_range(struct qed_hwfn *p_hwfn,
+		       enum qed_cxt_elem_type elem_type,
+		       u32 start_iid, u32 count)
+{
+	u32 start_line, end_line, shadow_start_line, shadow_end_line;
+	u32 reg_offset, elem_size, hw_p_size, elems_per_p;
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_blk;
+	u32 end_iid = start_iid + count;
+	struct qed_ptt *p_ptt;
+	u64 ilt_hw_entry = 0;
+	u32 i;
+
+	switch (elem_type) {
+	case QED_ELEM_CXT:
+		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUC];
+		elem_size = CONN_CXT_SIZE(p_hwfn);
+		p_blk = &p_cli->pf_blks[CDUC_BLK];
+		break;
+	case QED_ELEM_SRQ:
+		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_TSDM];
+		elem_size = SRQ_CXT_SIZE;
+		p_blk = &p_cli->pf_blks[SRQ_BLK];
+		break;
+	case QED_ELEM_TASK:
+		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
+		elem_size = TYPE1_TASK_CXT_SIZE(p_hwfn);
+		p_blk = &p_cli->pf_blks[CDUT_SEG_BLK(QED_CXT_ROCE_TID_SEG)];
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "-EINVALID elem type = %d", elem_type);
+		return -EINVAL;
+	}
+
+	/* Calculate line in ilt */
+	hw_p_size = p_cli->p_size.val;
+	elems_per_p = ILT_PAGE_IN_BYTES(hw_p_size) / elem_size;
+	start_line = p_blk->start_line + (start_iid / elems_per_p);
+	end_line = p_blk->start_line + (end_iid / elems_per_p);
+	if (((end_iid + 1) / elems_per_p) != (end_iid / elems_per_p))
+		end_line--;
+
+	shadow_start_line = start_line - p_hwfn->p_cxt_mngr->pf_start_line;
+	shadow_end_line = end_line - p_hwfn->p_cxt_mngr->pf_start_line;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt) {
+		DP_NOTICE(p_hwfn,
+			  "QED_TIME_OUT on ptt acquire - dynamic allocation");
+		return -EBUSY;
+	}
+
+	for (i = shadow_start_line; i < shadow_end_line; i++) {
+		if (!p_hwfn->p_cxt_mngr->ilt_shadow[i].p_virt)
+			continue;
+
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  p_hwfn->p_cxt_mngr->ilt_shadow[i].size,
+				  p_hwfn->p_cxt_mngr->ilt_shadow[i].p_virt,
+				  p_hwfn->p_cxt_mngr->ilt_shadow[i].p_phys);
+
+		p_hwfn->p_cxt_mngr->ilt_shadow[i].p_virt = NULL;
+		p_hwfn->p_cxt_mngr->ilt_shadow[i].p_phys = 0;
+		p_hwfn->p_cxt_mngr->ilt_shadow[i].size = 0;
+
+		/* compute absolute offset */
+		reg_offset = PSWRQ2_REG_ILT_MEMORY +
+		    ((start_line++) * ILT_REG_SIZE_IN_BYTES *
+		     ILT_ENTRY_IN_REGS);
+
+		/* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a
+		 * wide-bus.
+		 */
+		qed_dmae_host2grc(p_hwfn, p_ptt,
+				  (u64) (uintptr_t) &ilt_hw_entry,
+				  reg_offset,
+				  sizeof(ilt_hw_entry) / sizeof(u32),
+				  0);
+	}
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return 0;
+}
+
+int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto)
+{
+	int rc;
+	u32 cid;
+
+	/* Free Connection CXT */
+	rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_CXT,
+				    qed_cxt_get_proto_cid_start(p_hwfn,
+								proto),
+				    qed_cxt_get_proto_cid_count(p_hwfn,
+								proto, &cid));
+
+	if (rc)
+		return rc;
+
+	/* Free Task CXT */
+	rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_TASK, 0,
+				    qed_cxt_get_proto_tid_count(p_hwfn, proto));
+	if (rc)
+		return rc;
+
+	/* Free TSDM CXT */
+	rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_SRQ, 0,
+				    qed_cxt_get_srq_count(p_hwfn));
+
+	return rc;
+}
+
+int qed_cxt_get_task_ctx(struct qed_hwfn *p_hwfn,
+			 u32 tid, u8 ctx_type, void **pp_task_ctx)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_seg;
+	struct qed_tid_seg *p_seg_info;
+	u32 proto, seg;
+	u32 total_lines;
+	u32 tid_size, ilt_idx;
+	u32 num_tids_per_block;
+
+	/* Verify the personality */
+	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ISCSI:
+		proto = PROTOCOLID_ISCSI;
+		seg = QED_CXT_ISCSI_TID_SEG;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	p_cli = &p_mngr->clients[ILT_CLI_CDUT];
+	if (!p_cli->active)
+		return -EINVAL;
+
+	p_seg_info = &p_mngr->conn_cfg[proto].tid_seg[seg];
+
+	if (ctx_type == QED_CTX_WORKING_MEM) {
+		p_seg = &p_cli->pf_blks[CDUT_SEG_BLK(seg)];
+	} else if (ctx_type == QED_CTX_FL_MEM) {
+		if (!p_seg_info->has_fl_mem)
+			return -EINVAL;
+		p_seg = &p_cli->pf_blks[CDUT_FL_SEG_BLK(seg, PF)];
+	} else {
+		return -EINVAL;
+	}
+	total_lines = DIV_ROUND_UP(p_seg->total_size, p_seg->real_size_in_page);
+	tid_size = p_mngr->task_type_size[p_seg_info->type];
+	num_tids_per_block = p_seg->real_size_in_page / tid_size;
+
+	if (total_lines < tid / num_tids_per_block)
+		return -EINVAL;
+
+	ilt_idx = tid / num_tids_per_block + p_seg->start_line -
+		  p_mngr->pf_start_line;
+	*pp_task_ctx = (u8 *)p_mngr->ilt_shadow[ilt_idx].p_virt +
+		       (tid % num_tids_per_block) * tid_size;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
index 234c0fa8db2a..c6f6f2e8192d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -21,6 +21,14 @@ struct qed_cxt_info {
 	enum protocol_type	type;
 };
 
+#define MAX_TID_BLOCKS                  512
+struct qed_tid_mem {
+	u32 tid_size;
+	u32 num_tids_per_block;
+	u32 waste;
+	u8 *blocks[MAX_TID_BLOCKS];	/* 4K */
+};
+
 /**
  * @brief qed_cxt_acquire - Acquire a new cid of a specific protocol type
  *
@@ -46,8 +54,22 @@ int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
 int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
 			 struct qed_cxt_info *p_info);
 
+/**
+ * @brief qed_cxt_get_tid_mem_info
+ *
+ * @param p_hwfn
+ * @param p_info
+ *
+ * @return int
+ */
+int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
+			     struct qed_tid_mem *p_info);
+
+#define QED_CXT_ISCSI_TID_SEG	PROTOCOLID_ISCSI
+#define QED_CXT_ROCE_TID_SEG	PROTOCOLID_ROCE
 enum qed_cxt_elem_type {
 	QED_ELEM_CXT,
+	QED_ELEM_SRQ,
 	QED_ELEM_TASK
 };
 
@@ -149,4 +171,6 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
 			 u32 cid);
 
+#define QED_CTX_WORKING_MEM 0
+#define QED_CTX_FL_MEM 1
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 21ec1c2df2c7..d0dc28f93c0e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 #include <linux/bitops.h>
+#include <linux/dcbnl.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -18,6 +19,9 @@
 #include "qed_dcbx.h"
 #include "qed_hsi.h"
 #include "qed_sp.h"
+#ifdef CONFIG_DCB
+#include <linux/qed/qed_eth_if.h>
+#endif
 
 #define QED_DCBX_MAX_MIB_READ_TRY       (100)
 #define QED_ETH_TYPE_DEFAULT            (0)
@@ -252,7 +256,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
 		if (p_data->arr[type].update)
 			continue;
 
-		enable = (type == DCBX_PROTOCOL_ETH) ? false : dcbx_enabled;
+		enable = !(type == DCBX_PROTOCOL_ETH);
 		qed_dcbx_update_app_info(p_data, p_hwfn, enable, true,
 					 priority, tc, type);
 	}
@@ -351,6 +355,293 @@ qed_dcbx_copy_mib(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+#ifdef CONFIG_DCB
+static void
+qed_dcbx_get_priority_info(struct qed_hwfn *p_hwfn,
+			   struct qed_dcbx_app_prio *p_prio,
+			   struct qed_dcbx_results *p_results)
+{
+	u8 val;
+
+	p_prio->roce = QED_DCBX_INVALID_PRIORITY;
+	p_prio->roce_v2 = QED_DCBX_INVALID_PRIORITY;
+	p_prio->iscsi = QED_DCBX_INVALID_PRIORITY;
+	p_prio->fcoe = QED_DCBX_INVALID_PRIORITY;
+
+	if (p_results->arr[DCBX_PROTOCOL_ROCE].update &&
+	    p_results->arr[DCBX_PROTOCOL_ROCE].enable)
+		p_prio->roce = p_results->arr[DCBX_PROTOCOL_ROCE].priority;
+
+	if (p_results->arr[DCBX_PROTOCOL_ROCE_V2].update &&
+	    p_results->arr[DCBX_PROTOCOL_ROCE_V2].enable) {
+		val = p_results->arr[DCBX_PROTOCOL_ROCE_V2].priority;
+		p_prio->roce_v2 = val;
+	}
+
+	if (p_results->arr[DCBX_PROTOCOL_ISCSI].update &&
+	    p_results->arr[DCBX_PROTOCOL_ISCSI].enable)
+		p_prio->iscsi = p_results->arr[DCBX_PROTOCOL_ISCSI].priority;
+
+	if (p_results->arr[DCBX_PROTOCOL_FCOE].update &&
+	    p_results->arr[DCBX_PROTOCOL_FCOE].enable)
+		p_prio->fcoe = p_results->arr[DCBX_PROTOCOL_FCOE].priority;
+
+	if (p_results->arr[DCBX_PROTOCOL_ETH].update &&
+	    p_results->arr[DCBX_PROTOCOL_ETH].enable)
+		p_prio->eth = p_results->arr[DCBX_PROTOCOL_ETH].priority;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+		   "Priorities: iscsi %d, roce %d, roce v2 %d, fcoe %d, eth %d\n",
+		   p_prio->iscsi, p_prio->roce, p_prio->roce_v2, p_prio->fcoe,
+		   p_prio->eth);
+}
+
+static void
+qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
+		      struct dcbx_app_priority_feature *p_app,
+		      struct dcbx_app_priority_entry *p_tbl,
+		      struct qed_dcbx_params *p_params)
+{
+	struct qed_app_entry *entry;
+	u8 pri_map;
+	int i;
+
+	p_params->app_willing = QED_MFW_GET_FIELD(p_app->flags,
+						  DCBX_APP_WILLING);
+	p_params->app_valid = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_ENABLED);
+	p_params->app_error = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_ERROR);
+	p_params->num_app_entries = QED_MFW_GET_FIELD(p_app->flags,
+						      DCBX_APP_NUM_ENTRIES);
+	for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
+		entry = &p_params->app_entry[i];
+		entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
+						     DCBX_APP_SF));
+		pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
+		entry->prio = ffs(pri_map) - 1;
+		entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
+						    DCBX_APP_PROTOCOL_ID);
+		qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
+					       entry->proto_id,
+					       &entry->proto_type);
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+		   "APP params: willing %d, valid %d error = %d\n",
+		   p_params->app_willing, p_params->app_valid,
+		   p_params->app_error);
+}
+
+static void
+qed_dcbx_get_pfc_data(struct qed_hwfn *p_hwfn,
+		      u32 pfc, struct qed_dcbx_params *p_params)
+{
+	u8 pfc_map;
+
+	p_params->pfc.willing = QED_MFW_GET_FIELD(pfc, DCBX_PFC_WILLING);
+	p_params->pfc.max_tc = QED_MFW_GET_FIELD(pfc, DCBX_PFC_CAPS);
+	p_params->pfc.enabled = QED_MFW_GET_FIELD(pfc, DCBX_PFC_ENABLED);
+	pfc_map = QED_MFW_GET_FIELD(pfc, DCBX_PFC_PRI_EN_BITMAP);
+	p_params->pfc.prio[0] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_0);
+	p_params->pfc.prio[1] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_1);
+	p_params->pfc.prio[2] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_2);
+	p_params->pfc.prio[3] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_3);
+	p_params->pfc.prio[4] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_4);
+	p_params->pfc.prio[5] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_5);
+	p_params->pfc.prio[6] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_6);
+	p_params->pfc.prio[7] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_7);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+		   "PFC params: willing %d, pfc_bitmap %d\n",
+		   p_params->pfc.willing, pfc_map);
+}
+
+static void
+qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn,
+		      struct dcbx_ets_feature *p_ets,
+		      struct qed_dcbx_params *p_params)
+{
+	u32 bw_map[2], tsa_map[2], pri_map;
+	int i;
+
+	p_params->ets_willing = QED_MFW_GET_FIELD(p_ets->flags,
+						  DCBX_ETS_WILLING);
+	p_params->ets_enabled = QED_MFW_GET_FIELD(p_ets->flags,
+						  DCBX_ETS_ENABLED);
+	p_params->ets_cbs = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_CBS);
+	p_params->max_ets_tc = QED_MFW_GET_FIELD(p_ets->flags,
+						 DCBX_ETS_MAX_TCS);
+	DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+		   "ETS params: willing %d, ets_cbs %d pri_tc_tbl_0 %x max_ets_tc %d\n",
+		   p_params->ets_willing,
+		   p_params->ets_cbs,
+		   p_ets->pri_tc_tbl[0], p_params->max_ets_tc);
+
+	/* 8 bit tsa and bw data corresponding to each of the 8 TC's are
+	 * encoded in a type u32 array of size 2.
+	 */
+	bw_map[0] = be32_to_cpu(p_ets->tc_bw_tbl[0]);
+	bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]);
+	tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]);
+	tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]);
+	pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]);
+	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) {
+		p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i];
+		p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i];
+		p_params->ets_pri_tc_tbl[i] = QED_DCBX_PRIO2TC(pri_map, i);
+		DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+			   "elem %d  bw_tbl %x tsa_tbl %x\n",
+			   i, p_params->ets_tc_bw_tbl[i],
+			   p_params->ets_tc_tsa_tbl[i]);
+	}
+}
+
+static void
+qed_dcbx_get_common_params(struct qed_hwfn *p_hwfn,
+			   struct dcbx_app_priority_feature *p_app,
+			   struct dcbx_app_priority_entry *p_tbl,
+			   struct dcbx_ets_feature *p_ets,
+			   u32 pfc, struct qed_dcbx_params *p_params)
+{
+	qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params);
+	qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params);
+	qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params);
+}
+
+static void
+qed_dcbx_get_local_params(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, struct qed_dcbx_get *params)
+{
+	struct dcbx_features *p_feat;
+
+	p_feat = &p_hwfn->p_dcbx_info->local_admin.features;
+	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
+				   p_feat->app.app_pri_tbl, &p_feat->ets,
+				   p_feat->pfc, &params->local.params);
+	params->local.valid = true;
+}
+
+static void
+qed_dcbx_get_remote_params(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt, struct qed_dcbx_get *params)
+{
+	struct dcbx_features *p_feat;
+
+	p_feat = &p_hwfn->p_dcbx_info->remote.features;
+	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
+				   p_feat->app.app_pri_tbl, &p_feat->ets,
+				   p_feat->pfc, &params->remote.params);
+	params->remote.valid = true;
+}
+
+static void
+qed_dcbx_get_operational_params(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				struct qed_dcbx_get *params)
+{
+	struct qed_dcbx_operational_params *p_operational;
+	struct qed_dcbx_results *p_results;
+	struct dcbx_features *p_feat;
+	bool enabled, err;
+	u32 flags;
+	bool val;
+
+	flags = p_hwfn->p_dcbx_info->operational.flags;
+
+	/* If DCBx version is non zero, then negotiation
+	 * was successfuly performed
+	 */
+	p_operational = &params->operational;
+	enabled = !!(QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION) !=
+		     DCBX_CONFIG_VERSION_DISABLED);
+	if (!enabled) {
+		p_operational->enabled = enabled;
+		p_operational->valid = false;
+		return;
+	}
+
+	p_feat = &p_hwfn->p_dcbx_info->operational.features;
+	p_results = &p_hwfn->p_dcbx_info->results;
+
+	val = !!(QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION) ==
+		 DCBX_CONFIG_VERSION_IEEE);
+	p_operational->ieee = val;
+	val = !!(QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION) ==
+		 DCBX_CONFIG_VERSION_CEE);
+	p_operational->cee = val;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Version support: ieee %d, cee %d\n",
+		   p_operational->ieee, p_operational->cee);
+
+	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
+				   p_feat->app.app_pri_tbl, &p_feat->ets,
+				   p_feat->pfc, &params->operational.params);
+	qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results);
+	err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR);
+	p_operational->err = err;
+	p_operational->enabled = enabled;
+	p_operational->valid = true;
+}
+
+static void
+qed_dcbx_get_local_lldp_params(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt,
+			       struct qed_dcbx_get *params)
+{
+	struct lldp_config_params_s *p_local;
+
+	p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE];
+
+	memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id,
+	       ARRAY_SIZE(p_local->local_chassis_id));
+	memcpy(params->lldp_local.local_port_id, p_local->local_port_id,
+	       ARRAY_SIZE(p_local->local_port_id));
+}
+
+static void
+qed_dcbx_get_remote_lldp_params(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				struct qed_dcbx_get *params)
+{
+	struct lldp_status_params_s *p_remote;
+
+	p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE];
+
+	memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id,
+	       ARRAY_SIZE(p_remote->peer_chassis_id));
+	memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id,
+	       ARRAY_SIZE(p_remote->peer_port_id));
+}
+
+static int
+qed_dcbx_get_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		    struct qed_dcbx_get *p_params,
+		    enum qed_mib_read_type type)
+{
+	switch (type) {
+	case QED_DCBX_REMOTE_MIB:
+		qed_dcbx_get_remote_params(p_hwfn, p_ptt, p_params);
+		break;
+	case QED_DCBX_LOCAL_MIB:
+		qed_dcbx_get_local_params(p_hwfn, p_ptt, p_params);
+		break;
+	case QED_DCBX_OPERATIONAL_MIB:
+		qed_dcbx_get_operational_params(p_hwfn, p_ptt, p_params);
+		break;
+	case QED_DCBX_REMOTE_LLDP_MIB:
+		qed_dcbx_get_remote_lldp_params(p_hwfn, p_ptt, p_params);
+		break;
+	case QED_DCBX_LOCAL_LLDP_MIB:
+		qed_dcbx_get_local_lldp_params(p_hwfn, p_ptt, p_params);
+		break;
+	default:
+		DP_ERR(p_hwfn, "MIB read err, unknown mib type %d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#endif
+
 static int
 qed_dcbx_read_local_lldp_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
@@ -561,3 +852,1333 @@ void qed_dcbx_set_pf_update_params(struct qed_dcbx_results *p_src,
 	p_dcb_data = &p_dest->eth_dcb_data;
 	qed_dcbx_update_protocol_data(p_dcb_data, p_src, DCBX_PROTOCOL_ETH);
 }
+
+#ifdef CONFIG_DCB
+static int qed_dcbx_query_params(struct qed_hwfn *p_hwfn,
+				 struct qed_dcbx_get *p_get,
+				 enum qed_mib_read_type type)
+{
+	struct qed_ptt *p_ptt;
+	int rc;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+
+	rc = qed_dcbx_read_mib(p_hwfn, p_ptt, type);
+	if (rc)
+		goto out;
+
+	rc = qed_dcbx_get_params(p_hwfn, p_ptt, p_get, type);
+
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+	return rc;
+}
+
+static void
+qed_dcbx_set_pfc_data(struct qed_hwfn *p_hwfn,
+		      u32 *pfc, struct qed_dcbx_params *p_params)
+{
+	u8 pfc_map = 0;
+	int i;
+
+	if (p_params->pfc.willing)
+		*pfc |= DCBX_PFC_WILLING_MASK;
+	else
+		*pfc &= ~DCBX_PFC_WILLING_MASK;
+
+	if (p_params->pfc.enabled)
+		*pfc |= DCBX_PFC_ENABLED_MASK;
+	else
+		*pfc &= ~DCBX_PFC_ENABLED_MASK;
+
+	*pfc &= ~DCBX_PFC_CAPS_MASK;
+	*pfc |= (u32)p_params->pfc.max_tc << DCBX_PFC_CAPS_SHIFT;
+
+	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++)
+		if (p_params->pfc.prio[i])
+			pfc_map |= BIT(i);
+
+	*pfc |= (pfc_map << DCBX_PFC_PRI_EN_BITMAP_SHIFT);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_DCB, "pfc = 0x%x\n", *pfc);
+}
+
+static void
+qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn,
+		      struct dcbx_ets_feature *p_ets,
+		      struct qed_dcbx_params *p_params)
+{
+	u8 *bw_map, *tsa_map;
+	u32 val;
+	int i;
+
+	if (p_params->ets_willing)
+		p_ets->flags |= DCBX_ETS_WILLING_MASK;
+	else
+		p_ets->flags &= ~DCBX_ETS_WILLING_MASK;
+
+	if (p_params->ets_cbs)
+		p_ets->flags |= DCBX_ETS_CBS_MASK;
+	else
+		p_ets->flags &= ~DCBX_ETS_CBS_MASK;
+
+	if (p_params->ets_enabled)
+		p_ets->flags |= DCBX_ETS_ENABLED_MASK;
+	else
+		p_ets->flags &= ~DCBX_ETS_ENABLED_MASK;
+
+	p_ets->flags &= ~DCBX_ETS_MAX_TCS_MASK;
+	p_ets->flags |= (u32)p_params->max_ets_tc << DCBX_ETS_MAX_TCS_SHIFT;
+
+	bw_map = (u8 *)&p_ets->tc_bw_tbl[0];
+	tsa_map = (u8 *)&p_ets->tc_tsa_tbl[0];
+	p_ets->pri_tc_tbl[0] = 0;
+	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) {
+		bw_map[i] = p_params->ets_tc_bw_tbl[i];
+		tsa_map[i] = p_params->ets_tc_tsa_tbl[i];
+		/* Copy the priority value to the corresponding 4 bits in the
+		 * traffic class table.
+		 */
+		val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4));
+		p_ets->pri_tc_tbl[0] |= val;
+	}
+	p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]);
+	for (i = 0; i < 2; i++) {
+		p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]);
+		p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]);
+	}
+}
+
+static void
+qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
+		      struct dcbx_app_priority_feature *p_app,
+		      struct qed_dcbx_params *p_params)
+{
+	u32 *entry;
+	int i;
+
+	if (p_params->app_willing)
+		p_app->flags |= DCBX_APP_WILLING_MASK;
+	else
+		p_app->flags &= ~DCBX_APP_WILLING_MASK;
+
+	if (p_params->app_valid)
+		p_app->flags |= DCBX_APP_ENABLED_MASK;
+	else
+		p_app->flags &= ~DCBX_APP_ENABLED_MASK;
+
+	p_app->flags &= ~DCBX_APP_NUM_ENTRIES_MASK;
+	p_app->flags |= (u32)p_params->num_app_entries <<
+	    DCBX_APP_NUM_ENTRIES_SHIFT;
+
+	for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
+		entry = &p_app->app_pri_tbl[i].entry;
+		*entry &= ~DCBX_APP_SF_MASK;
+		if (p_params->app_entry[i].ethtype)
+			*entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+				   DCBX_APP_SF_SHIFT);
+		else
+			*entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT);
+		*entry &= ~DCBX_APP_PROTOCOL_ID_MASK;
+		*entry |= ((u32)p_params->app_entry[i].proto_id <<
+			   DCBX_APP_PROTOCOL_ID_SHIFT);
+		*entry &= ~DCBX_APP_PRI_MAP_MASK;
+		*entry |= ((u32)(p_params->app_entry[i].prio) <<
+			   DCBX_APP_PRI_MAP_SHIFT);
+	}
+}
+
+static void
+qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn,
+			  struct dcbx_local_params *local_admin,
+			  struct qed_dcbx_set *params)
+{
+	local_admin->flags = 0;
+	memcpy(&local_admin->features,
+	       &p_hwfn->p_dcbx_info->operational.features,
+	       sizeof(local_admin->features));
+
+	if (params->enabled)
+		local_admin->config = params->ver_num;
+	else
+		local_admin->config = DCBX_CONFIG_VERSION_DISABLED;
+
+	if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG)
+		qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc,
+				      &params->config.params);
+
+	if (params->override_flags & QED_DCBX_OVERRIDE_ETS_CFG)
+		qed_dcbx_set_ets_data(p_hwfn, &local_admin->features.ets,
+				      &params->config.params);
+
+	if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG)
+		qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app,
+				      &params->config.params);
+}
+
+int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			   struct qed_dcbx_set *params, bool hw_commit)
+{
+	struct dcbx_local_params local_admin;
+	struct qed_dcbx_mib_meta_data data;
+	u32 resp = 0, param = 0;
+	int rc = 0;
+
+	if (!hw_commit) {
+		memcpy(&p_hwfn->p_dcbx_info->set, params,
+		       sizeof(struct qed_dcbx_set));
+		return 0;
+	}
+
+	/* clear set-parmas cache */
+	memset(&p_hwfn->p_dcbx_info->set, 0, sizeof(p_hwfn->p_dcbx_info->set));
+
+	memset(&local_admin, 0, sizeof(local_admin));
+	qed_dcbx_set_local_params(p_hwfn, &local_admin, params);
+
+	data.addr = p_hwfn->mcp_info->port_addr +
+	    offsetof(struct public_port, local_admin_dcbx_mib);
+	data.local_admin = &local_admin;
+	data.size = sizeof(struct dcbx_local_params);
+	qed_memcpy_to(p_hwfn, p_ptt, data.addr, data.local_admin, data.size);
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_DCBX,
+			 1 << DRV_MB_PARAM_LLDP_SEND_SHIFT, &resp, &param);
+	if (rc)
+		DP_NOTICE(p_hwfn, "Failed to send DCBX update request\n");
+
+	return rc;
+}
+
+int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn,
+			       struct qed_dcbx_set *params)
+{
+	struct qed_dcbx_get *dcbx_info;
+	int rc;
+
+	if (p_hwfn->p_dcbx_info->set.config.valid) {
+		memcpy(params, &p_hwfn->p_dcbx_info->set,
+		       sizeof(struct qed_dcbx_set));
+		return 0;
+	}
+
+	dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL);
+	if (!dcbx_info) {
+		DP_ERR(p_hwfn, "Failed to allocate struct qed_dcbx_info\n");
+		return -ENOMEM;
+	}
+
+	rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB);
+	if (rc) {
+		kfree(dcbx_info);
+		return rc;
+	}
+
+	p_hwfn->p_dcbx_info->set.override_flags = 0;
+	p_hwfn->p_dcbx_info->set.ver_num = DCBX_CONFIG_VERSION_DISABLED;
+	if (dcbx_info->operational.cee)
+		p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_CEE;
+	if (dcbx_info->operational.ieee)
+		p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_IEEE;
+
+	p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled;
+	memcpy(&p_hwfn->p_dcbx_info->set.config.params,
+	       &dcbx_info->operational.params,
+	       sizeof(struct qed_dcbx_admin_params));
+	p_hwfn->p_dcbx_info->set.config.valid = true;
+
+	memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set));
+
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn,
+					       enum qed_mib_read_type type)
+{
+	struct qed_dcbx_get *dcbx_info;
+
+	dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL);
+	if (!dcbx_info) {
+		DP_ERR(hwfn->cdev, "Failed to allocate memory for dcbx_info\n");
+		return NULL;
+	}
+
+	if (qed_dcbx_query_params(hwfn, dcbx_info, type)) {
+		kfree(dcbx_info);
+		return NULL;
+	}
+
+	if ((type == QED_DCBX_OPERATIONAL_MIB) &&
+	    !dcbx_info->operational.enabled) {
+		DP_INFO(hwfn, "DCBX is not enabled/operational\n");
+		kfree(dcbx_info);
+		return NULL;
+	}
+
+	return dcbx_info;
+}
+
+static u8 qed_dcbnl_getstate(struct qed_dev *cdev)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	bool enabled;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return 0;
+
+	enabled = dcbx_info->operational.enabled;
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "DCB state = %d\n", enabled);
+	kfree(dcbx_info);
+
+	return enabled;
+}
+
+static u8 qed_dcbnl_setstate(struct qed_dev *cdev, u8 state)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "DCB state = %d\n", state);
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return 1;
+
+	dcbx_set.enabled = !!state;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return 1;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc ? 1 : 0;
+}
+
+static void qed_dcbnl_getpgtccfgtx(struct qed_dev *cdev, int tc, u8 *prio_type,
+				   u8 *pgid, u8 *bw_pct, u8 *up_map)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "tc = %d\n", tc);
+	*prio_type = *pgid = *bw_pct = *up_map = 0;
+	if (tc < 0 || tc >= QED_MAX_PFC_PRIORITIES) {
+		DP_INFO(hwfn, "Invalid tc %d\n", tc);
+		return;
+	}
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return;
+
+	*pgid = dcbx_info->operational.params.ets_pri_tc_tbl[tc];
+	kfree(dcbx_info);
+}
+
+static void qed_dcbnl_getpgbwgcfgtx(struct qed_dev *cdev, int pgid, u8 *bw_pct)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+
+	*bw_pct = 0;
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "pgid = %d\n", pgid);
+	if (pgid < 0 || pgid >= QED_MAX_PFC_PRIORITIES) {
+		DP_INFO(hwfn, "Invalid pgid %d\n", pgid);
+		return;
+	}
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return;
+
+	*bw_pct = dcbx_info->operational.params.ets_tc_bw_tbl[pgid];
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "bw_pct = %d\n", *bw_pct);
+	kfree(dcbx_info);
+}
+
+static void qed_dcbnl_getpgtccfgrx(struct qed_dev *cdev, int tc, u8 *prio,
+				   u8 *bwg_id, u8 *bw_pct, u8 *up_map)
+{
+	DP_INFO(QED_LEADING_HWFN(cdev), "Rx ETS is not supported\n");
+	*prio = *bwg_id = *bw_pct = *up_map = 0;
+}
+
+static void qed_dcbnl_getpgbwgcfgrx(struct qed_dev *cdev,
+				    int bwg_id, u8 *bw_pct)
+{
+	DP_INFO(QED_LEADING_HWFN(cdev), "Rx ETS is not supported\n");
+	*bw_pct = 0;
+}
+
+static void qed_dcbnl_getpfccfg(struct qed_dev *cdev,
+				int priority, u8 *setting)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "priority = %d\n", priority);
+	if (priority < 0 || priority >= QED_MAX_PFC_PRIORITIES) {
+		DP_INFO(hwfn, "Invalid priority %d\n", priority);
+		return;
+	}
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return;
+
+	*setting = dcbx_info->operational.params.pfc.prio[priority];
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "setting = %d\n", *setting);
+	kfree(dcbx_info);
+}
+
+static void qed_dcbnl_setpfccfg(struct qed_dev *cdev, int priority, u8 setting)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "priority = %d setting = %d\n",
+		   priority, setting);
+	if (priority < 0 || priority >= QED_MAX_PFC_PRIORITIES) {
+		DP_INFO(hwfn, "Invalid priority %d\n", priority);
+		return;
+	}
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return;
+
+	dcbx_set.override_flags |= QED_DCBX_OVERRIDE_PFC_CFG;
+	dcbx_set.config.params.pfc.prio[priority] = !!setting;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+}
+
+static u8 qed_dcbnl_getcap(struct qed_dev *cdev, int capid, u8 *cap)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	int rc = 0;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "capid = %d\n", capid);
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return 1;
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+	case DCB_CAP_ATTR_PFC:
+	case DCB_CAP_ATTR_UP2TC:
+	case DCB_CAP_ATTR_GSP:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = (DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_CEE |
+			DCB_CAP_DCBX_VER_IEEE);
+		break;
+	default:
+		*cap = false;
+		rc = 1;
+	}
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "id = %d caps = %d\n", capid, *cap);
+	kfree(dcbx_info);
+
+	return rc;
+}
+
+static int qed_dcbnl_getnumtcs(struct qed_dev *cdev, int tcid, u8 *num)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	int rc = 0;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "tcid = %d\n", tcid);
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	switch (tcid) {
+	case DCB_NUMTCS_ATTR_PG:
+		*num = dcbx_info->operational.params.max_ets_tc;
+		break;
+	case DCB_NUMTCS_ATTR_PFC:
+		*num = dcbx_info->operational.params.pfc.max_tc;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+
+	kfree(dcbx_info);
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "numtcs = %d\n", *num);
+
+	return rc;
+}
+
+static u8 qed_dcbnl_getpfcstate(struct qed_dev *cdev)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	bool enabled;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return 0;
+
+	enabled = dcbx_info->operational.params.pfc.enabled;
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "pfc state = %d\n", enabled);
+	kfree(dcbx_info);
+
+	return enabled;
+}
+
+static u8 qed_dcbnl_getdcbx(struct qed_dev *cdev)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	u8 mode = 0;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return 0;
+
+	if (dcbx_info->operational.enabled)
+		mode |= DCB_CAP_DCBX_LLD_MANAGED;
+	if (dcbx_info->operational.ieee)
+		mode |= DCB_CAP_DCBX_VER_IEEE;
+	if (dcbx_info->operational.cee)
+		mode |= DCB_CAP_DCBX_VER_CEE;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "dcb mode = %d\n", mode);
+	kfree(dcbx_info);
+
+	return mode;
+}
+
+static void qed_dcbnl_setpgtccfgtx(struct qed_dev *cdev,
+				   int tc,
+				   u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB,
+		   "tc = %d pri_type = %d pgid = %d bw_pct = %d up_map = %d\n",
+		   tc, pri_type, pgid, bw_pct, up_map);
+
+	if (tc < 0 || tc >= QED_MAX_PFC_PRIORITIES) {
+		DP_INFO(hwfn, "Invalid tc %d\n", tc);
+		return;
+	}
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return;
+
+	dcbx_set.override_flags |= QED_DCBX_OVERRIDE_ETS_CFG;
+	dcbx_set.config.params.ets_pri_tc_tbl[tc] = pgid;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+}
+
+static void qed_dcbnl_setpgtccfgrx(struct qed_dev *cdev, int prio,
+				   u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map)
+{
+	DP_INFO(QED_LEADING_HWFN(cdev), "Rx ETS is not supported\n");
+}
+
+static void qed_dcbnl_setpgbwgcfgtx(struct qed_dev *cdev, int pgid, u8 bw_pct)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "pgid = %d bw_pct = %d\n", pgid, bw_pct);
+	if (pgid < 0 || pgid >= QED_MAX_PFC_PRIORITIES) {
+		DP_INFO(hwfn, "Invalid pgid %d\n", pgid);
+		return;
+	}
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return;
+
+	dcbx_set.override_flags |= QED_DCBX_OVERRIDE_ETS_CFG;
+	dcbx_set.config.params.ets_tc_bw_tbl[pgid] = bw_pct;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+}
+
+static void qed_dcbnl_setpgbwgcfgrx(struct qed_dev *cdev, int pgid, u8 bw_pct)
+{
+	DP_INFO(QED_LEADING_HWFN(cdev), "Rx ETS is not supported\n");
+}
+
+static u8 qed_dcbnl_setall(struct qed_dev *cdev)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return 1;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return 1;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 1);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
+static int qed_dcbnl_setnumtcs(struct qed_dev *cdev, int tcid, u8 num)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "tcid = %d num = %d\n", tcid, num);
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return 1;
+
+	switch (tcid) {
+	case DCB_NUMTCS_ATTR_PG:
+		dcbx_set.override_flags |= QED_DCBX_OVERRIDE_ETS_CFG;
+		dcbx_set.config.params.max_ets_tc = num;
+		break;
+	case DCB_NUMTCS_ATTR_PFC:
+		dcbx_set.override_flags |= QED_DCBX_OVERRIDE_PFC_CFG;
+		dcbx_set.config.params.pfc.max_tc = num;
+		break;
+	default:
+		DP_INFO(hwfn, "Invalid tcid %d\n", tcid);
+		return -EINVAL;
+	}
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EINVAL;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return 0;
+}
+
+static void qed_dcbnl_setpfcstate(struct qed_dev *cdev, u8 state)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "new state = %d\n", state);
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return;
+
+	dcbx_set.override_flags |= QED_DCBX_OVERRIDE_PFC_CFG;
+	dcbx_set.config.params.pfc.enabled = !!state;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+}
+
+static int qed_dcbnl_getapp(struct qed_dev *cdev, u8 idtype, u16 idval)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	struct qed_app_entry *entry;
+	bool ethtype;
+	u8 prio = 0;
+	int i;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	ethtype = !!(idtype == DCB_APP_IDTYPE_ETHTYPE);
+	for (i = 0; i < QED_DCBX_MAX_APP_PROTOCOL; i++) {
+		entry = &dcbx_info->operational.params.app_entry[i];
+		if ((entry->ethtype == ethtype) && (entry->proto_id == idval)) {
+			prio = entry->prio;
+			break;
+		}
+	}
+
+	if (i == QED_DCBX_MAX_APP_PROTOCOL) {
+		DP_ERR(cdev, "App entry (%d, %d) not found\n", idtype, idval);
+		kfree(dcbx_info);
+		return -EINVAL;
+	}
+
+	kfree(dcbx_info);
+
+	return prio;
+}
+
+static int qed_dcbnl_setapp(struct qed_dev *cdev,
+			    u8 idtype, u16 idval, u8 pri_map)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_app_entry *entry;
+	struct qed_ptt *ptt;
+	bool ethtype;
+	int rc, i;
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return -EINVAL;
+
+	ethtype = !!(idtype == DCB_APP_IDTYPE_ETHTYPE);
+	for (i = 0; i < QED_DCBX_MAX_APP_PROTOCOL; i++) {
+		entry = &dcbx_set.config.params.app_entry[i];
+		if ((entry->ethtype == ethtype) && (entry->proto_id == idval))
+			break;
+		/* First empty slot */
+		if (!entry->proto_id)
+			break;
+	}
+
+	if (i == QED_DCBX_MAX_APP_PROTOCOL) {
+		DP_ERR(cdev, "App table is full\n");
+		return -EBUSY;
+	}
+
+	dcbx_set.override_flags |= QED_DCBX_OVERRIDE_APP_CFG;
+	dcbx_set.config.params.app_entry[i].ethtype = ethtype;
+	dcbx_set.config.params.app_entry[i].proto_id = idval;
+	dcbx_set.config.params.app_entry[i].prio = pri_map;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EBUSY;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
+static u8 qed_dcbnl_setdcbx(struct qed_dev *cdev, u8 mode)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "new mode = %x\n", mode);
+
+	if (!(mode & DCB_CAP_DCBX_VER_IEEE) && !(mode & DCB_CAP_DCBX_VER_CEE)) {
+		DP_INFO(hwfn, "Allowed mode is cee, ieee or both\n");
+		return 1;
+	}
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return 1;
+
+	dcbx_set.ver_num = 0;
+	if (mode & DCB_CAP_DCBX_VER_CEE) {
+		dcbx_set.ver_num |= DCBX_CONFIG_VERSION_CEE;
+		dcbx_set.enabled = true;
+	}
+
+	if (mode & DCB_CAP_DCBX_VER_IEEE) {
+		dcbx_set.ver_num |= DCBX_CONFIG_VERSION_IEEE;
+		dcbx_set.enabled = true;
+	}
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return 1;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return 0;
+}
+
+static u8 qed_dcbnl_getfeatcfg(struct qed_dev *cdev, int featid, u8 *flags)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "Feature id  = %d\n", featid);
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return 1;
+
+	*flags = 0;
+	switch (featid) {
+	case DCB_FEATCFG_ATTR_PG:
+		if (dcbx_info->operational.params.ets_enabled)
+			*flags = DCB_FEATCFG_ENABLE;
+		else
+			*flags = DCB_FEATCFG_ERROR;
+		break;
+	case DCB_FEATCFG_ATTR_PFC:
+		if (dcbx_info->operational.params.pfc.enabled)
+			*flags = DCB_FEATCFG_ENABLE;
+		else
+			*flags = DCB_FEATCFG_ERROR;
+		break;
+	case DCB_FEATCFG_ATTR_APP:
+		if (dcbx_info->operational.params.app_valid)
+			*flags = DCB_FEATCFG_ENABLE;
+		else
+			*flags = DCB_FEATCFG_ERROR;
+		break;
+	default:
+		DP_INFO(hwfn, "Invalid feature-ID %d\n", featid);
+		kfree(dcbx_info);
+		return 1;
+	}
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "flags = %d\n", *flags);
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+static u8 qed_dcbnl_setfeatcfg(struct qed_dev *cdev, int featid, u8 flags)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_set dcbx_set;
+	bool enabled, willing;
+	struct qed_ptt *ptt;
+	int rc;
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "featid = %d flags = %d\n",
+		   featid, flags);
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return 1;
+
+	enabled = !!(flags & DCB_FEATCFG_ENABLE);
+	willing = !!(flags & DCB_FEATCFG_WILLING);
+	switch (featid) {
+	case DCB_FEATCFG_ATTR_PG:
+		dcbx_set.override_flags |= QED_DCBX_OVERRIDE_ETS_CFG;
+		dcbx_set.config.params.ets_enabled = enabled;
+		dcbx_set.config.params.ets_willing = willing;
+		break;
+	case DCB_FEATCFG_ATTR_PFC:
+		dcbx_set.override_flags |= QED_DCBX_OVERRIDE_PFC_CFG;
+		dcbx_set.config.params.pfc.enabled = enabled;
+		dcbx_set.config.params.pfc.willing = willing;
+		break;
+	case DCB_FEATCFG_ATTR_APP:
+		dcbx_set.override_flags |= QED_DCBX_OVERRIDE_APP_CFG;
+		dcbx_set.config.params.app_willing = willing;
+		break;
+	default:
+		DP_INFO(hwfn, "Invalid feature-ID %d\n", featid);
+		return 1;
+	}
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return 1;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return 0;
+}
+
+static int qed_dcbnl_peer_getappinfo(struct qed_dev *cdev,
+				     struct dcb_peer_app_info *info,
+				     u16 *app_count)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_REMOTE_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	info->willing = dcbx_info->remote.params.app_willing;
+	info->error = dcbx_info->remote.params.app_error;
+	*app_count = dcbx_info->remote.params.num_app_entries;
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+static int qed_dcbnl_peer_getapptable(struct qed_dev *cdev,
+				      struct dcb_app *table)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	int i;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_REMOTE_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	for (i = 0; i < dcbx_info->remote.params.num_app_entries; i++) {
+		if (dcbx_info->remote.params.app_entry[i].ethtype)
+			table[i].selector = DCB_APP_IDTYPE_ETHTYPE;
+		else
+			table[i].selector = DCB_APP_IDTYPE_PORTNUM;
+		table[i].priority = dcbx_info->remote.params.app_entry[i].prio;
+		table[i].protocol =
+		    dcbx_info->remote.params.app_entry[i].proto_id;
+	}
+
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+static int qed_dcbnl_cee_peer_getpfc(struct qed_dev *cdev, struct cee_pfc *pfc)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	int i;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_REMOTE_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++)
+		if (dcbx_info->remote.params.pfc.prio[i])
+			pfc->pfc_en |= BIT(i);
+
+	pfc->tcs_supported = dcbx_info->remote.params.pfc.max_tc;
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "pfc state = %d tcs_supported = %d\n",
+		   pfc->pfc_en, pfc->tcs_supported);
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+static int qed_dcbnl_cee_peer_getpg(struct qed_dev *cdev, struct cee_pg *pg)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	int i;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_REMOTE_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	pg->willing = dcbx_info->remote.params.ets_willing;
+	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) {
+		pg->pg_bw[i] = dcbx_info->remote.params.ets_tc_bw_tbl[i];
+		pg->prio_pg[i] = dcbx_info->remote.params.ets_pri_tc_tbl[i];
+	}
+
+	DP_VERBOSE(hwfn, QED_MSG_DCB, "willing = %d", pg->willing);
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+static int qed_dcbnl_get_ieee_pfc(struct qed_dev *cdev,
+				  struct ieee_pfc *pfc, bool remote)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_params *params;
+	struct qed_dcbx_get *dcbx_info;
+	int rc, i;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	if (!dcbx_info->operational.ieee) {
+		DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+		return -EINVAL;
+	}
+
+	if (remote) {
+		memset(dcbx_info, 0, sizeof(*dcbx_info));
+		rc = qed_dcbx_query_params(hwfn, dcbx_info,
+					   QED_DCBX_REMOTE_MIB);
+		if (rc) {
+			kfree(dcbx_info);
+			return -EINVAL;
+		}
+
+		params = &dcbx_info->remote.params;
+	} else {
+		params = &dcbx_info->operational.params;
+	}
+
+	pfc->pfc_cap = params->pfc.max_tc;
+	pfc->pfc_en = 0;
+	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++)
+		if (params->pfc.prio[i])
+			pfc->pfc_en |= BIT(i);
+
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+static int qed_dcbnl_ieee_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
+{
+	return qed_dcbnl_get_ieee_pfc(cdev, pfc, false);
+}
+
+static int qed_dcbnl_ieee_setpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc, i;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	if (!dcbx_info->operational.ieee) {
+		DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+		kfree(dcbx_info);
+		return -EINVAL;
+	}
+
+	kfree(dcbx_info);
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return -EINVAL;
+
+	dcbx_set.override_flags |= QED_DCBX_OVERRIDE_PFC_CFG;
+	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++)
+		dcbx_set.config.params.pfc.prio[i] = !!(pfc->pfc_en & BIT(i));
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EINVAL;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
+static int qed_dcbnl_get_ieee_ets(struct qed_dev *cdev,
+				  struct ieee_ets *ets, bool remote)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	struct qed_dcbx_params *params;
+	int rc;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	if (!dcbx_info->operational.ieee) {
+		DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+		kfree(dcbx_info);
+		return -EINVAL;
+	}
+
+	if (remote) {
+		memset(dcbx_info, 0, sizeof(*dcbx_info));
+		rc = qed_dcbx_query_params(hwfn, dcbx_info,
+					   QED_DCBX_REMOTE_MIB);
+		if (rc) {
+			kfree(dcbx_info);
+			return -EINVAL;
+		}
+
+		params = &dcbx_info->remote.params;
+	} else {
+		params = &dcbx_info->operational.params;
+	}
+
+	ets->ets_cap = params->max_ets_tc;
+	ets->willing = params->ets_willing;
+	ets->cbs = params->ets_cbs;
+	memcpy(ets->tc_tx_bw, params->ets_tc_bw_tbl, sizeof(ets->tc_tx_bw));
+	memcpy(ets->tc_tsa, params->ets_tc_tsa_tbl, sizeof(ets->tc_tsa));
+	memcpy(ets->prio_tc, params->ets_pri_tc_tbl, sizeof(ets->prio_tc));
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+static int qed_dcbnl_ieee_getets(struct qed_dev *cdev, struct ieee_ets *ets)
+{
+	return qed_dcbnl_get_ieee_ets(cdev, ets, false);
+}
+
+static int qed_dcbnl_ieee_setets(struct qed_dev *cdev, struct ieee_ets *ets)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	struct qed_dcbx_set dcbx_set;
+	struct qed_ptt *ptt;
+	int rc;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	if (!dcbx_info->operational.ieee) {
+		DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+		kfree(dcbx_info);
+		return -EINVAL;
+	}
+
+	kfree(dcbx_info);
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return -EINVAL;
+
+	dcbx_set.override_flags |= QED_DCBX_OVERRIDE_ETS_CFG;
+	dcbx_set.config.params.max_ets_tc = ets->ets_cap;
+	dcbx_set.config.params.ets_willing = ets->willing;
+	dcbx_set.config.params.ets_cbs = ets->cbs;
+	memcpy(dcbx_set.config.params.ets_tc_bw_tbl, ets->tc_tx_bw,
+	       sizeof(ets->tc_tx_bw));
+	memcpy(dcbx_set.config.params.ets_tc_tsa_tbl, ets->tc_tsa,
+	       sizeof(ets->tc_tsa));
+	memcpy(dcbx_set.config.params.ets_pri_tc_tbl, ets->prio_tc,
+	       sizeof(ets->prio_tc));
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EINVAL;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
+int qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets)
+{
+	return qed_dcbnl_get_ieee_ets(cdev, ets, true);
+}
+
+int qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
+{
+	return qed_dcbnl_get_ieee_pfc(cdev, pfc, true);
+}
+
+int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	struct qed_app_entry *entry;
+	bool ethtype;
+	u8 prio = 0;
+	int i;
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	if (!dcbx_info->operational.ieee) {
+		DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+		kfree(dcbx_info);
+		return -EINVAL;
+	}
+
+	/* ieee defines the selector field value for ethertype to be 1 */
+	ethtype = !!((app->selector - 1) == DCB_APP_IDTYPE_ETHTYPE);
+	for (i = 0; i < QED_DCBX_MAX_APP_PROTOCOL; i++) {
+		entry = &dcbx_info->operational.params.app_entry[i];
+		if ((entry->ethtype == ethtype) &&
+		    (entry->proto_id == app->protocol)) {
+			prio = entry->prio;
+			break;
+		}
+	}
+
+	if (i == QED_DCBX_MAX_APP_PROTOCOL) {
+		DP_ERR(cdev, "App entry (%d, %d) not found\n", app->selector,
+		       app->protocol);
+		kfree(dcbx_info);
+		return -EINVAL;
+	}
+
+	app->priority = ffs(prio) - 1;
+
+	kfree(dcbx_info);
+
+	return 0;
+}
+
+int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_dcbx_get *dcbx_info;
+	struct qed_dcbx_set dcbx_set;
+	struct qed_app_entry *entry;
+	struct qed_ptt *ptt;
+	bool ethtype;
+	int rc, i;
+
+	if (app->priority < 0 || app->priority >= QED_MAX_PFC_PRIORITIES) {
+		DP_INFO(hwfn, "Invalid priority %d\n", app->priority);
+		return -EINVAL;
+	}
+
+	dcbx_info = qed_dcbnl_get_dcbx(hwfn, QED_DCBX_OPERATIONAL_MIB);
+	if (!dcbx_info)
+		return -EINVAL;
+
+	if (!dcbx_info->operational.ieee) {
+		DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+		kfree(dcbx_info);
+		return -EINVAL;
+	}
+
+	kfree(dcbx_info);
+
+	memset(&dcbx_set, 0, sizeof(dcbx_set));
+	rc = qed_dcbx_get_config_params(hwfn, &dcbx_set);
+	if (rc)
+		return -EINVAL;
+
+	/* ieee defines the selector field value for ethertype to be 1 */
+	ethtype = !!((app->selector - 1) == DCB_APP_IDTYPE_ETHTYPE);
+	for (i = 0; i < QED_DCBX_MAX_APP_PROTOCOL; i++) {
+		entry = &dcbx_set.config.params.app_entry[i];
+		if ((entry->ethtype == ethtype) &&
+		    (entry->proto_id == app->protocol))
+			break;
+		/* First empty slot */
+		if (!entry->proto_id)
+			break;
+	}
+
+	if (i == QED_DCBX_MAX_APP_PROTOCOL) {
+		DP_ERR(cdev, "App table is full\n");
+		return -EBUSY;
+	}
+
+	dcbx_set.override_flags |= QED_DCBX_OVERRIDE_APP_CFG;
+	dcbx_set.config.params.app_entry[i].ethtype = ethtype;
+	dcbx_set.config.params.app_entry[i].proto_id = app->protocol;
+	dcbx_set.config.params.app_entry[i].prio = BIT(app->priority);
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EBUSY;
+
+	rc = qed_dcbx_config_params(hwfn, ptt, &dcbx_set, 0);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
+const struct qed_eth_dcbnl_ops qed_dcbnl_ops_pass = {
+	.getstate = qed_dcbnl_getstate,
+	.setstate = qed_dcbnl_setstate,
+	.getpgtccfgtx = qed_dcbnl_getpgtccfgtx,
+	.getpgbwgcfgtx = qed_dcbnl_getpgbwgcfgtx,
+	.getpgtccfgrx = qed_dcbnl_getpgtccfgrx,
+	.getpgbwgcfgrx = qed_dcbnl_getpgbwgcfgrx,
+	.getpfccfg = qed_dcbnl_getpfccfg,
+	.setpfccfg = qed_dcbnl_setpfccfg,
+	.getcap = qed_dcbnl_getcap,
+	.getnumtcs = qed_dcbnl_getnumtcs,
+	.getpfcstate = qed_dcbnl_getpfcstate,
+	.getdcbx = qed_dcbnl_getdcbx,
+	.setpgtccfgtx = qed_dcbnl_setpgtccfgtx,
+	.setpgtccfgrx = qed_dcbnl_setpgtccfgrx,
+	.setpgbwgcfgtx = qed_dcbnl_setpgbwgcfgtx,
+	.setpgbwgcfgrx = qed_dcbnl_setpgbwgcfgrx,
+	.setall = qed_dcbnl_setall,
+	.setnumtcs = qed_dcbnl_setnumtcs,
+	.setpfcstate = qed_dcbnl_setpfcstate,
+	.setapp = qed_dcbnl_setapp,
+	.setdcbx = qed_dcbnl_setdcbx,
+	.setfeatcfg = qed_dcbnl_setfeatcfg,
+	.getfeatcfg = qed_dcbnl_getfeatcfg,
+	.getapp = qed_dcbnl_getapp,
+	.peer_getappinfo = qed_dcbnl_peer_getappinfo,
+	.peer_getapptable = qed_dcbnl_peer_getapptable,
+	.cee_peer_getpfc = qed_dcbnl_cee_peer_getpfc,
+	.cee_peer_getpg = qed_dcbnl_cee_peer_getpg,
+	.ieee_getpfc = qed_dcbnl_ieee_getpfc,
+	.ieee_setpfc = qed_dcbnl_ieee_setpfc,
+	.ieee_getets = qed_dcbnl_ieee_getets,
+	.ieee_setets = qed_dcbnl_ieee_setets,
+	.ieee_peer_getpfc = qed_dcbnl_ieee_peer_getpfc,
+	.ieee_peer_getets = qed_dcbnl_ieee_peer_getets,
+	.ieee_getapp = qed_dcbnl_ieee_getapp,
+	.ieee_setapp = qed_dcbnl_ieee_setapp,
+};
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.h b/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
index e7f834dbda2d..9ba681643d05 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
@@ -33,6 +33,24 @@ struct qed_dcbx_app_data {
 	u8 tc;			/* Traffic Class */
 };
 
+#ifdef CONFIG_DCB
+#define QED_DCBX_VERSION_DISABLED       0
+#define QED_DCBX_VERSION_IEEE           1
+#define QED_DCBX_VERSION_CEE            2
+
+struct qed_dcbx_set {
+#define QED_DCBX_OVERRIDE_STATE	        BIT(0)
+#define QED_DCBX_OVERRIDE_PFC_CFG       BIT(1)
+#define QED_DCBX_OVERRIDE_ETS_CFG       BIT(2)
+#define QED_DCBX_OVERRIDE_APP_CFG       BIT(3)
+#define QED_DCBX_OVERRIDE_DSCP_CFG      BIT(4)
+	u32 override_flags;
+	bool enabled;
+	struct qed_dcbx_admin_params config;
+	u32 ver_num;
+};
+#endif
+
 struct qed_dcbx_results {
 	bool dcbx_enabled;
 	u8 pf_id;
@@ -55,6 +73,9 @@ struct qed_dcbx_info {
 	struct qed_dcbx_results results;
 	struct dcbx_mib operational;
 	struct dcbx_mib remote;
+#ifdef CONFIG_DCB
+	struct qed_dcbx_set set;
+#endif
 	u8 dcbx_cap;
 };
 
@@ -67,6 +88,13 @@ struct qed_dcbx_mib_meta_data {
 	u32 addr;
 };
 
+#ifdef CONFIG_DCB
+int qed_dcbx_get_config_params(struct qed_hwfn *, struct qed_dcbx_set *);
+
+int qed_dcbx_config_params(struct qed_hwfn *,
+			   struct qed_ptt *, struct qed_dcbx_set *, bool);
+#endif
+
 /* QED local interface routines */
 int
 qed_dcbx_mib_update_event(struct qed_hwfn *,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 2d89e8c16b32..b26fe267a150 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -17,6 +17,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/vmalloc.h>
 #include <linux/etherdevice.h>
 #include <linux/qed/qed_chain.h>
 #include <linux/qed/qed_if.h>
@@ -160,9 +161,13 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable)
 	u8 num_vports, vf_offset = 0, i, vport_id, num_ports, curr_queue = 0;
 	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
 	struct init_qm_port_params *p_qm_port;
+	bool init_rdma_offload_pq = false;
+	bool init_pure_ack_pq = false;
+	bool init_ooo_pq = false;
 	u16 num_pqs, multi_cos_tcs = 1;
 	u8 pf_wfq = qm_info->pf_wfq;
 	u32 pf_rl = qm_info->pf_rl;
+	u16 num_pf_rls = 0;
 	u16 num_vfs = 0;
 
 #ifdef CONFIG_QED_SRIOV
@@ -174,6 +179,25 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable)
 	num_pqs = multi_cos_tcs + num_vfs + 1;	/* The '1' is for pure-LB */
 	num_vports = (u8)RESC_NUM(p_hwfn, QED_VPORT);
 
+	if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+		num_pqs++;	/* for RoCE queue */
+		init_rdma_offload_pq = true;
+		/* we subtract num_vfs because each require a rate limiter,
+		 * and one default rate limiter
+		 */
+		if (p_hwfn->pf_params.rdma_pf_params.enable_dcqcn)
+			num_pf_rls = RESC_NUM(p_hwfn, QED_RL) - num_vfs - 1;
+
+		num_pqs += num_pf_rls;
+		qm_info->num_pf_rls = (u8) num_pf_rls;
+	}
+
+	if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+		num_pqs += 2;	/* for iSCSI pure-ACK / OOO queue */
+		init_pure_ack_pq = true;
+		init_ooo_pq = true;
+	}
+
 	/* Sanity checking that setup requires legal number of resources */
 	if (num_pqs > RESC_NUM(p_hwfn, QED_PQ)) {
 		DP_ERR(p_hwfn,
@@ -211,12 +235,22 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable)
 
 	vport_id = (u8)RESC_START(p_hwfn, QED_VPORT);
 
+	/* First init rate limited queues */
+	for (curr_queue = 0; curr_queue < num_pf_rls; curr_queue++) {
+		qm_info->qm_pq_params[curr_queue].vport_id = vport_id++;
+		qm_info->qm_pq_params[curr_queue].tc_id =
+		    p_hwfn->hw_info.non_offload_tc;
+		qm_info->qm_pq_params[curr_queue].wrr_group = 1;
+		qm_info->qm_pq_params[curr_queue].rl_valid = 1;
+	}
+
 	/* First init per-TC PQs */
 	for (i = 0; i < multi_cos_tcs; i++) {
 		struct init_qm_pq_params *params =
 		    &qm_info->qm_pq_params[curr_queue++];
 
-		if (p_hwfn->hw_info.personality == QED_PCI_ETH) {
+		if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE ||
+		    p_hwfn->hw_info.personality == QED_PCI_ETH) {
 			params->vport_id = vport_id;
 			params->tc_id = p_hwfn->hw_info.non_offload_tc;
 			params->wrr_group = 1;
@@ -236,6 +270,32 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable)
 	curr_queue++;
 
 	qm_info->offload_pq = 0;
+	if (init_rdma_offload_pq) {
+		qm_info->offload_pq = curr_queue;
+		qm_info->qm_pq_params[curr_queue].vport_id = vport_id;
+		qm_info->qm_pq_params[curr_queue].tc_id =
+		    p_hwfn->hw_info.offload_tc;
+		qm_info->qm_pq_params[curr_queue].wrr_group = 1;
+		curr_queue++;
+	}
+
+	if (init_pure_ack_pq) {
+		qm_info->pure_ack_pq = curr_queue;
+		qm_info->qm_pq_params[curr_queue].vport_id = vport_id;
+		qm_info->qm_pq_params[curr_queue].tc_id =
+		    p_hwfn->hw_info.offload_tc;
+		qm_info->qm_pq_params[curr_queue].wrr_group = 1;
+		curr_queue++;
+	}
+
+	if (init_ooo_pq) {
+		qm_info->ooo_pq = curr_queue;
+		qm_info->qm_pq_params[curr_queue].vport_id = vport_id;
+		qm_info->qm_pq_params[curr_queue].tc_id = DCBX_ISCSI_OOO_TC;
+		qm_info->qm_pq_params[curr_queue].wrr_group = 1;
+		curr_queue++;
+	}
+
 	/* Then init per-VF PQs */
 	vf_offset = curr_queue;
 	for (i = 0; i < num_vfs; i++) {
@@ -244,6 +304,7 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable)
 		qm_info->qm_pq_params[curr_queue].tc_id =
 		    p_hwfn->hw_info.non_offload_tc;
 		qm_info->qm_pq_params[curr_queue].wrr_group = 1;
+		qm_info->qm_pq_params[curr_queue].rl_valid = 1;
 		curr_queue++;
 	}
 
@@ -256,7 +317,10 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable)
 	for (i = 0; i < num_ports; i++) {
 		p_qm_port = &qm_info->qm_port_params[i];
 		p_qm_port->active = 1;
-		p_qm_port->num_active_phys_tcs = 4;
+		if (num_ports == 4)
+			p_qm_port->active_phys_tcs = 0x7;
+		else
+			p_qm_port->active_phys_tcs = 0x9f;
 		p_qm_port->num_pbf_cmd_lines = PBF_MAX_CMD_LINES / num_ports;
 		p_qm_port->num_btb_blocks = BTB_MAX_BLOCKS / num_ports;
 	}
@@ -366,21 +430,20 @@ int qed_resc_alloc(struct qed_dev *cdev)
 		if (!p_hwfn->p_tx_cids) {
 			DP_NOTICE(p_hwfn,
 				  "Failed to allocate memory for Tx Cids\n");
-			rc = -ENOMEM;
-			goto alloc_err;
+			goto alloc_no_mem;
 		}
 
 		p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL);
 		if (!p_hwfn->p_rx_cids) {
 			DP_NOTICE(p_hwfn,
 				  "Failed to allocate memory for Rx Cids\n");
-			rc = -ENOMEM;
-			goto alloc_err;
+			goto alloc_no_mem;
 		}
 	}
 
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+		u32 n_eqes, num_cons;
 
 		/* First allocate the context manager structure */
 		rc = qed_cxt_mngr_alloc(p_hwfn);
@@ -429,18 +492,34 @@ int qed_resc_alloc(struct qed_dev *cdev)
 			goto alloc_err;
 
 		/* EQ */
-		p_eq = qed_eq_alloc(p_hwfn, 256);
-		if (!p_eq) {
-			rc = -ENOMEM;
+		n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain);
+		if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+			num_cons = qed_cxt_get_proto_cid_count(p_hwfn,
+							       PROTOCOLID_ROCE,
+							       0) * 2;
+			n_eqes += num_cons + 2 * MAX_NUM_VFS_BB;
+		} else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+			num_cons =
+			    qed_cxt_get_proto_cid_count(p_hwfn,
+							PROTOCOLID_ISCSI, 0);
+			n_eqes += 2 * num_cons;
+		}
+
+		if (n_eqes > 0xFFFF) {
+			DP_ERR(p_hwfn,
+			       "Cannot allocate 0x%x EQ elements. The maximum of a u16 chain is 0x%x\n",
+			       n_eqes, 0xFFFF);
 			goto alloc_err;
 		}
+
+		p_eq = qed_eq_alloc(p_hwfn, (u16) n_eqes);
+		if (!p_eq)
+			goto alloc_no_mem;
 		p_hwfn->p_eq = p_eq;
 
 		p_consq = qed_consq_alloc(p_hwfn);
-		if (!p_consq) {
-			rc = -ENOMEM;
-			goto alloc_err;
-		}
+		if (!p_consq)
+			goto alloc_no_mem;
 		p_hwfn->p_consq = p_consq;
 
 		/* DMA info initialization */
@@ -469,6 +548,8 @@ int qed_resc_alloc(struct qed_dev *cdev)
 
 	return 0;
 
+alloc_no_mem:
+	rc = -ENOMEM;
 alloc_err:
 	qed_resc_free(cdev);
 	return rc;
@@ -634,6 +715,7 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
 	struct qed_qm_common_rt_init_params params;
 	struct qed_dev *cdev = p_hwfn->cdev;
+	u16 num_pfs, pf_id;
 	u32 concrete_fid;
 	int rc = 0;
 	u8 vf_id;
@@ -682,9 +764,16 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 	qed_wr(p_hwfn, p_ptt, PSWRQ2_REG_L2P_VALIDATE_VFID, 0);
 	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_USE_CLIENTID_IN_TAG, 1);
 
-	/* Disable relaxed ordering in the PCI config space */
-	qed_wr(p_hwfn, p_ptt, 0x20b4,
-	       qed_rd(p_hwfn, p_ptt, 0x20b4) & ~0x10);
+	if (QED_IS_BB(p_hwfn->cdev)) {
+		num_pfs = NUM_OF_ENG_PFS(p_hwfn->cdev);
+		for (pf_id = 0; pf_id < num_pfs; pf_id++) {
+			qed_fid_pretend(p_hwfn, p_ptt, pf_id);
+			qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_ROCE, 0x0);
+			qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_TCP, 0x0);
+		}
+		/* pretend to original PF */
+		qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
+	}
 
 	for (vf_id = 0; vf_id < MAX_NUM_VFS_BB; vf_id++) {
 		concrete_fid = qed_vfid_to_concrete(p_hwfn, vf_id);
@@ -703,8 +792,31 @@ static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
 {
 	int rc = 0;
 
-	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id,
-			  hw_mode);
+	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
+	if (rc != 0)
+		return rc;
+
+	if (hw_mode & (1 << MODE_MF_SI)) {
+		u8 pf_id = 0;
+
+		if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) {
+			DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
+				   "PF[%08x] is first eth on engine\n", pf_id);
+
+			/* We should have configured BIT for ppfid, i.e., the
+			 * relative function number in the port. But there's a
+			 * bug in LLH in BB where the ppfid is actually engine
+			 * based, so we need to take this into account.
+			 */
+			qed_wr(p_hwfn, p_ptt,
+			       NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR, 1 << pf_id);
+		}
+
+		/* Take the protocol-based hit vector if there is a hit,
+		 * otherwise take the other vector.
+		 */
+		qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_CLS_TYPE_DUALMODE, 0x2);
+	}
 	return rc;
 }
 
@@ -751,7 +863,8 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 	}
 
 	/* Protocl Configuration  */
-	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_TCP_RT_OFFSET, 0);
+	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_TCP_RT_OFFSET,
+		     (p_hwfn->hw_info.personality == QED_PCI_ISCSI) ? 1 : 0);
 	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_FCOE_RT_OFFSET, 0);
 	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_ROCE_RT_OFFSET, 0);
 
@@ -773,6 +886,21 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 	/* Pure runtime initializations - directly to the HW  */
 	qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true);
 
+	if (hw_mode & (1 << MODE_MF_SI)) {
+		u8 pf_id = 0;
+		u32 val;
+
+		if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) {
+			if (p_hwfn->rel_pf_id == pf_id) {
+				DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
+					   "PF[%d] is first ETH on engine\n",
+					   pf_id);
+				val = 1;
+			}
+			qed_wr(p_hwfn, p_ptt, PRS_REG_MSG_INFO, val);
+		}
+	}
+
 	if (b_hw_start) {
 		/* enable interrupts */
 		qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
@@ -1213,8 +1341,9 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 		   num_features);
 }
 
-static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 {
+	u8 enabled_func_idx = p_hwfn->enabled_func_idx;
 	u32 *resc_start = p_hwfn->hw_info.resc_start;
 	u8 num_funcs = p_hwfn->num_funcs_on_engine;
 	u32 *resc_num = p_hwfn->hw_info.resc_num;
@@ -1238,14 +1367,22 @@ static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 	resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs;
 	resc_num[QED_RSS_ENG] = ETH_RSS_ENGINE_NUM_BB / num_funcs;
 	resc_num[QED_PQ] = MAX_QM_TX_QUEUES_BB / num_funcs;
-	resc_num[QED_RL] = 8;
+	resc_num[QED_RL] = min_t(u32, 64, resc_num[QED_VPORT]);
 	resc_num[QED_MAC] = ETH_NUM_MAC_FILTERS / num_funcs;
 	resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) /
 			     num_funcs;
-	resc_num[QED_ILT] = 950;
+	resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs;
 
 	for (i = 0; i < QED_MAX_RESC; i++)
-		resc_start[i] = resc_num[i] * p_hwfn->rel_pf_id;
+		resc_start[i] = resc_num[i] * enabled_func_idx;
+
+	/* Sanity for ILT */
+	if (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB) {
+		DP_NOTICE(p_hwfn, "Can't assign ILT pages [%08x,...,%08x]\n",
+			  RESC_START(p_hwfn, QED_ILT),
+			  RESC_END(p_hwfn, QED_ILT) - 1);
+		return -EINVAL;
+	}
 
 	qed_hw_set_feat(p_hwfn);
 
@@ -1275,6 +1412,8 @@ static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 		   p_hwfn->hw_info.resc_start[QED_VLAN],
 		   p_hwfn->hw_info.resc_num[QED_ILT],
 		   p_hwfn->hw_info.resc_start[QED_ILT]);
+
+	return 0;
 }
 
 static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
@@ -1304,31 +1443,31 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 
 	switch ((core_cfg & NVM_CFG1_GLOB_NETWORK_PORT_MODE_MASK) >>
 		NVM_CFG1_GLOB_NETWORK_PORT_MODE_OFFSET) {
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X40G:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_BB_2X40G:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_2X40G;
 		break;
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X50G:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_2X50G:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_2X50G;
 		break;
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X100G:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_BB_1X100G:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X100G;
 		break;
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X10G_F:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_4X10G_F:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_4X10G_F;
 		break;
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X10G_E:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_BB_4X10G_E:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_4X10G_E;
 		break;
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X20G:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_BB_4X20G:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_4X20G;
 		break;
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X40G:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_1X40G:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X40G;
 		break;
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X25G:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_2X25G:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_2X25G;
 		break;
-	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X25G:
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_1X25G:
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X25G;
 		break;
 	default:
@@ -1373,7 +1512,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 	case NVM_CFG1_PORT_DRV_LINK_SPEED_50G:
 		link->speed.forced_speed = 50000;
 		break;
-	case NVM_CFG1_PORT_DRV_LINK_SPEED_100G:
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_BB_100G:
 		link->speed.forced_speed = 100000;
 		break;
 	default:
@@ -1429,14 +1568,20 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ETHERNET)
 		__set_bit(QED_DEV_CAP_ETH,
 			  &p_hwfn->hw_info.device_capabilities);
+	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ISCSI)
+		__set_bit(QED_DEV_CAP_ISCSI,
+			  &p_hwfn->hw_info.device_capabilities);
+	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ROCE)
+		__set_bit(QED_DEV_CAP_ROCE,
+			  &p_hwfn->hw_info.device_capabilities);
 
 	return qed_mcp_fill_shmem_func_info(p_hwfn, p_ptt);
 }
 
 static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-	u32 reg_function_hide, tmp, eng_mask;
-	u8 num_funcs;
+	u8 num_funcs, enabled_func_idx = p_hwfn->rel_pf_id;
+	u32 reg_function_hide, tmp, eng_mask, low_pfs_mask;
 
 	num_funcs = MAX_NUM_PFS_BB;
 
@@ -1466,9 +1611,19 @@ static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 				num_funcs++;
 			tmp >>= 0x1;
 		}
+
+		/* Get the PF index within the enabled functions */
+		low_pfs_mask = (0x1 << p_hwfn->abs_pf_id) - 1;
+		tmp = reg_function_hide & eng_mask & low_pfs_mask;
+		while (tmp) {
+			if (tmp & 0x1)
+				enabled_func_idx--;
+			tmp >>= 0x1;
+		}
 	}
 
 	p_hwfn->num_funcs_on_engine = num_funcs;
+	p_hwfn->enabled_func_idx = enabled_func_idx;
 
 	DP_VERBOSE(p_hwfn,
 		   NETIF_MSG_PROBE,
@@ -1538,9 +1693,7 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 
 	qed_get_num_funcs(p_hwfn, p_ptt);
 
-	qed_hw_get_resc(p_hwfn);
-
-	return rc;
+	return qed_hw_get_resc(p_hwfn);
 }
 
 static int qed_get_dev_info(struct qed_dev *cdev)
@@ -1737,92 +1890,285 @@ void qed_hw_remove(struct qed_dev *cdev)
 	qed_iov_free_hw_info(cdev);
 }
 
-int qed_chain_alloc(struct qed_dev *cdev,
-		    enum qed_chain_use_mode intended_use,
-		    enum qed_chain_mode mode,
-		    u16 num_elems,
-		    size_t elem_size,
-		    struct qed_chain *p_chain)
+static void qed_chain_free_next_ptr(struct qed_dev *cdev,
+				    struct qed_chain *p_chain)
+{
+	void *p_virt = p_chain->p_virt_addr, *p_virt_next = NULL;
+	dma_addr_t p_phys = p_chain->p_phys_addr, p_phys_next = 0;
+	struct qed_chain_next *p_next;
+	u32 size, i;
+
+	if (!p_virt)
+		return;
+
+	size = p_chain->elem_size * p_chain->usable_per_page;
+
+	for (i = 0; i < p_chain->page_cnt; i++) {
+		if (!p_virt)
+			break;
+
+		p_next = (struct qed_chain_next *)((u8 *)p_virt + size);
+		p_virt_next = p_next->next_virt;
+		p_phys_next = HILO_DMA_REGPAIR(p_next->next_phys);
+
+		dma_free_coherent(&cdev->pdev->dev,
+				  QED_CHAIN_PAGE_SIZE, p_virt, p_phys);
+
+		p_virt = p_virt_next;
+		p_phys = p_phys_next;
+	}
+}
+
+static void qed_chain_free_single(struct qed_dev *cdev,
+				  struct qed_chain *p_chain)
+{
+	if (!p_chain->p_virt_addr)
+		return;
+
+	dma_free_coherent(&cdev->pdev->dev,
+			  QED_CHAIN_PAGE_SIZE,
+			  p_chain->p_virt_addr, p_chain->p_phys_addr);
+}
+
+static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
+{
+	void **pp_virt_addr_tbl = p_chain->pbl.pp_virt_addr_tbl;
+	u32 page_cnt = p_chain->page_cnt, i, pbl_size;
+	u8 *p_pbl_virt = p_chain->pbl.p_virt_table;
+
+	if (!pp_virt_addr_tbl)
+		return;
+
+	if (!p_chain->pbl.p_virt_table)
+		goto out;
+
+	for (i = 0; i < page_cnt; i++) {
+		if (!pp_virt_addr_tbl[i])
+			break;
+
+		dma_free_coherent(&cdev->pdev->dev,
+				  QED_CHAIN_PAGE_SIZE,
+				  pp_virt_addr_tbl[i],
+				  *(dma_addr_t *)p_pbl_virt);
+
+		p_pbl_virt += QED_CHAIN_PBL_ENTRY_SIZE;
+	}
+
+	pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
+	dma_free_coherent(&cdev->pdev->dev,
+			  pbl_size,
+			  p_chain->pbl.p_virt_table, p_chain->pbl.p_phys_table);
+out:
+	vfree(p_chain->pbl.pp_virt_addr_tbl);
+}
+
+void qed_chain_free(struct qed_dev *cdev, struct qed_chain *p_chain)
 {
-	dma_addr_t p_pbl_phys = 0;
-	void *p_pbl_virt = NULL;
+	switch (p_chain->mode) {
+	case QED_CHAIN_MODE_NEXT_PTR:
+		qed_chain_free_next_ptr(cdev, p_chain);
+		break;
+	case QED_CHAIN_MODE_SINGLE:
+		qed_chain_free_single(cdev, p_chain);
+		break;
+	case QED_CHAIN_MODE_PBL:
+		qed_chain_free_pbl(cdev, p_chain);
+		break;
+	}
+}
+
+static int
+qed_chain_alloc_sanity_check(struct qed_dev *cdev,
+			     enum qed_chain_cnt_type cnt_type,
+			     size_t elem_size, u32 page_cnt)
+{
+	u64 chain_size = ELEMS_PER_PAGE(elem_size) * page_cnt;
+
+	/* The actual chain size can be larger than the maximal possible value
+	 * after rounding up the requested elements number to pages, and after
+	 * taking into acount the unusuable elements (next-ptr elements).
+	 * The size of a "u16" chain can be (U16_MAX + 1) since the chain
+	 * size/capacity fields are of a u32 type.
+	 */
+	if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 &&
+	     chain_size > 0x10000) ||
+	    (cnt_type == QED_CHAIN_CNT_TYPE_U32 &&
+	     chain_size > 0x100000000ULL)) {
+		DP_NOTICE(cdev,
+			  "The actual chain size (0x%llx) is larger than the maximal possible value\n",
+			  chain_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+qed_chain_alloc_next_ptr(struct qed_dev *cdev, struct qed_chain *p_chain)
+{
+	void *p_virt = NULL, *p_virt_prev = NULL;
 	dma_addr_t p_phys = 0;
-	void *p_virt = NULL;
-	u16 page_cnt = 0;
-	size_t size;
+	u32 i;
 
-	if (mode == QED_CHAIN_MODE_SINGLE)
-		page_cnt = 1;
-	else
-		page_cnt = QED_CHAIN_PAGE_CNT(num_elems, elem_size, mode);
+	for (i = 0; i < p_chain->page_cnt; i++) {
+		p_virt = dma_alloc_coherent(&cdev->pdev->dev,
+					    QED_CHAIN_PAGE_SIZE,
+					    &p_phys, GFP_KERNEL);
+		if (!p_virt) {
+			DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+			return -ENOMEM;
+		}
+
+		if (i == 0) {
+			qed_chain_init_mem(p_chain, p_virt, p_phys);
+			qed_chain_reset(p_chain);
+		} else {
+			qed_chain_init_next_ptr_elem(p_chain, p_virt_prev,
+						     p_virt, p_phys);
+		}
+
+		p_virt_prev = p_virt;
+	}
+	/* Last page's next element should point to the beginning of the
+	 * chain.
+	 */
+	qed_chain_init_next_ptr_elem(p_chain, p_virt_prev,
+				     p_chain->p_virt_addr,
+				     p_chain->p_phys_addr);
+
+	return 0;
+}
+
+static int
+qed_chain_alloc_single(struct qed_dev *cdev, struct qed_chain *p_chain)
+{
+	dma_addr_t p_phys = 0;
+	void *p_virt = NULL;
 
-	size = page_cnt * QED_CHAIN_PAGE_SIZE;
 	p_virt = dma_alloc_coherent(&cdev->pdev->dev,
-				    size, &p_phys, GFP_KERNEL);
+				    QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL);
 	if (!p_virt) {
-		DP_NOTICE(cdev, "Failed to allocate chain mem\n");
-		goto nomem;
+		DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+		return -ENOMEM;
 	}
 
-	if (mode == QED_CHAIN_MODE_PBL) {
-		size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
-		p_pbl_virt = dma_alloc_coherent(&cdev->pdev->dev,
-						size, &p_pbl_phys,
-						GFP_KERNEL);
-		if (!p_pbl_virt) {
-			DP_NOTICE(cdev, "Failed to allocate chain pbl mem\n");
-			goto nomem;
-		}
+	qed_chain_init_mem(p_chain, p_virt, p_phys);
+	qed_chain_reset(p_chain);
 
-		qed_chain_pbl_init(p_chain, p_virt, p_phys, page_cnt,
-				   (u8)elem_size, intended_use,
-				   p_pbl_phys, p_pbl_virt);
-	} else {
-		qed_chain_init(p_chain, p_virt, p_phys, page_cnt,
-			       (u8)elem_size, intended_use, mode);
+	return 0;
+}
+
+static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
+{
+	u32 page_cnt = p_chain->page_cnt, size, i;
+	dma_addr_t p_phys = 0, p_pbl_phys = 0;
+	void **pp_virt_addr_tbl = NULL;
+	u8 *p_pbl_virt = NULL;
+	void *p_virt = NULL;
+
+	size = page_cnt * sizeof(*pp_virt_addr_tbl);
+	pp_virt_addr_tbl = vmalloc(size);
+	if (!pp_virt_addr_tbl) {
+		DP_NOTICE(cdev,
+			  "Failed to allocate memory for the chain virtual addresses table\n");
+		return -ENOMEM;
 	}
+	memset(pp_virt_addr_tbl, 0, size);
 
-	return 0;
+	/* The allocation of the PBL table is done with its full size, since it
+	 * is expected to be successive.
+	 * qed_chain_init_pbl_mem() is called even in a case of an allocation
+	 * failure, since pp_virt_addr_tbl was previously allocated, and it
+	 * should be saved to allow its freeing during the error flow.
+	 */
+	size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
+	p_pbl_virt = dma_alloc_coherent(&cdev->pdev->dev,
+					size, &p_pbl_phys, GFP_KERNEL);
+	qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys,
+			       pp_virt_addr_tbl);
+	if (!p_pbl_virt) {
+		DP_NOTICE(cdev, "Failed to allocate chain pbl memory\n");
+		return -ENOMEM;
+	}
 
-nomem:
-	dma_free_coherent(&cdev->pdev->dev,
-			  page_cnt * QED_CHAIN_PAGE_SIZE,
-			  p_virt, p_phys);
-	dma_free_coherent(&cdev->pdev->dev,
-			  page_cnt * QED_CHAIN_PBL_ENTRY_SIZE,
-			  p_pbl_virt, p_pbl_phys);
+	for (i = 0; i < page_cnt; i++) {
+		p_virt = dma_alloc_coherent(&cdev->pdev->dev,
+					    QED_CHAIN_PAGE_SIZE,
+					    &p_phys, GFP_KERNEL);
+		if (!p_virt) {
+			DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+			return -ENOMEM;
+		}
 
-	return -ENOMEM;
+		if (i == 0) {
+			qed_chain_init_mem(p_chain, p_virt, p_phys);
+			qed_chain_reset(p_chain);
+		}
+
+		/* Fill the PBL table with the physical address of the page */
+		*(dma_addr_t *)p_pbl_virt = p_phys;
+		/* Keep the virtual address of the page */
+		p_chain->pbl.pp_virt_addr_tbl[i] = p_virt;
+
+		p_pbl_virt += QED_CHAIN_PBL_ENTRY_SIZE;
+	}
+
+	return 0;
 }
 
-void qed_chain_free(struct qed_dev *cdev,
-		    struct qed_chain *p_chain)
+int qed_chain_alloc(struct qed_dev *cdev,
+		    enum qed_chain_use_mode intended_use,
+		    enum qed_chain_mode mode,
+		    enum qed_chain_cnt_type cnt_type,
+		    u32 num_elems, size_t elem_size, struct qed_chain *p_chain)
 {
-	size_t size;
+	u32 page_cnt;
+	int rc = 0;
 
-	if (!p_chain->p_virt_addr)
-		return;
+	if (mode == QED_CHAIN_MODE_SINGLE)
+		page_cnt = 1;
+	else
+		page_cnt = QED_CHAIN_PAGE_CNT(num_elems, elem_size, mode);
+
+	rc = qed_chain_alloc_sanity_check(cdev, cnt_type, elem_size, page_cnt);
+	if (rc) {
+		DP_NOTICE(cdev,
+			  "Cannot allocate a chain with the given arguments:\n"
+			  "[use_mode %d, mode %d, cnt_type %d, num_elems %d, elem_size %zu]\n",
+			  intended_use, mode, cnt_type, num_elems, elem_size);
+		return rc;
+	}
+
+	qed_chain_init_params(p_chain, page_cnt, (u8) elem_size, intended_use,
+			      mode, cnt_type);
 
-	if (p_chain->mode == QED_CHAIN_MODE_PBL) {
-		size = p_chain->page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
-		dma_free_coherent(&cdev->pdev->dev, size,
-				  p_chain->pbl.p_virt_table,
-				  p_chain->pbl.p_phys_table);
+	switch (mode) {
+	case QED_CHAIN_MODE_NEXT_PTR:
+		rc = qed_chain_alloc_next_ptr(cdev, p_chain);
+		break;
+	case QED_CHAIN_MODE_SINGLE:
+		rc = qed_chain_alloc_single(cdev, p_chain);
+		break;
+	case QED_CHAIN_MODE_PBL:
+		rc = qed_chain_alloc_pbl(cdev, p_chain);
+		break;
 	}
+	if (rc)
+		goto nomem;
+
+	return 0;
 
-	size = p_chain->page_cnt * QED_CHAIN_PAGE_SIZE;
-	dma_free_coherent(&cdev->pdev->dev, size,
-			  p_chain->p_virt_addr,
-			  p_chain->p_phys_addr);
+nomem:
+	qed_chain_free(cdev, p_chain);
+	return rc;
 }
 
-int qed_fw_l2_queue(struct qed_hwfn *p_hwfn,
-		    u16 src_id, u16 *dst_id)
+int qed_fw_l2_queue(struct qed_hwfn *p_hwfn, u16 src_id, u16 *dst_id)
 {
 	if (src_id >= RESC_NUM(p_hwfn, QED_L2_QUEUE)) {
 		u16 min, max;
 
-		min = (u16)RESC_START(p_hwfn, QED_L2_QUEUE);
+		min = (u16) RESC_START(p_hwfn, QED_L2_QUEUE);
 		max = min + RESC_NUM(p_hwfn, QED_L2_QUEUE);
 		DP_NOTICE(p_hwfn,
 			  "l2_queue id [%d] is not valid, available indices [%d - %d]\n",
@@ -1876,6 +2222,110 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			    u32 hw_addr, void *p_eth_qzone,
+			    size_t eth_qzone_size, u8 timeset)
+{
+	struct coalescing_timeset *p_coal_timeset;
+
+	if (p_hwfn->cdev->int_coalescing_mode != QED_COAL_MODE_ENABLE) {
+		DP_NOTICE(p_hwfn, "Coalescing configuration not enabled\n");
+		return -EINVAL;
+	}
+
+	p_coal_timeset = p_eth_qzone;
+	memset(p_coal_timeset, 0, eth_qzone_size);
+	SET_FIELD(p_coal_timeset->value, COALESCING_TIMESET_TIMESET, timeset);
+	SET_FIELD(p_coal_timeset->value, COALESCING_TIMESET_VALID, 1);
+	qed_memcpy_to(p_hwfn, p_ptt, hw_addr, p_eth_qzone, eth_qzone_size);
+
+	return 0;
+}
+
+int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 u16 coalesce, u8 qid, u16 sb_id)
+{
+	struct ustorm_eth_queue_zone eth_qzone;
+	u8 timeset, timer_res;
+	u16 fw_qid = 0;
+	u32 address;
+	int rc;
+
+	/* Coalesce = (timeset << timer-resolution), timeset is 7bit wide */
+	if (coalesce <= 0x7F) {
+		timer_res = 0;
+	} else if (coalesce <= 0xFF) {
+		timer_res = 1;
+	} else if (coalesce <= 0x1FF) {
+		timer_res = 2;
+	} else {
+		DP_ERR(p_hwfn, "Invalid coalesce value - %d\n", coalesce);
+		return -EINVAL;
+	}
+	timeset = (u8)(coalesce >> timer_res);
+
+	rc = qed_fw_l2_queue(p_hwfn, (u16)qid, &fw_qid);
+	if (rc)
+		return rc;
+
+	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, sb_id, false);
+	if (rc)
+		goto out;
+
+	address = BAR0_MAP_REG_USDM_RAM + USTORM_ETH_QUEUE_ZONE_OFFSET(fw_qid);
+
+	rc = qed_set_coalesce(p_hwfn, p_ptt, address, &eth_qzone,
+			      sizeof(struct ustorm_eth_queue_zone), timeset);
+	if (rc)
+		goto out;
+
+	p_hwfn->cdev->rx_coalesce_usecs = coalesce;
+out:
+	return rc;
+}
+
+int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 u16 coalesce, u8 qid, u16 sb_id)
+{
+	struct xstorm_eth_queue_zone eth_qzone;
+	u8 timeset, timer_res;
+	u16 fw_qid = 0;
+	u32 address;
+	int rc;
+
+	/* Coalesce = (timeset << timer-resolution), timeset is 7bit wide */
+	if (coalesce <= 0x7F) {
+		timer_res = 0;
+	} else if (coalesce <= 0xFF) {
+		timer_res = 1;
+	} else if (coalesce <= 0x1FF) {
+		timer_res = 2;
+	} else {
+		DP_ERR(p_hwfn, "Invalid coalesce value - %d\n", coalesce);
+		return -EINVAL;
+	}
+	timeset = (u8)(coalesce >> timer_res);
+
+	rc = qed_fw_l2_queue(p_hwfn, (u16)qid, &fw_qid);
+	if (rc)
+		return rc;
+
+	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, sb_id, true);
+	if (rc)
+		goto out;
+
+	address = BAR0_MAP_REG_XSDM_RAM + XSTORM_ETH_QUEUE_ZONE_OFFSET(fw_qid);
+
+	rc = qed_set_coalesce(p_hwfn, p_ptt, address, &eth_qzone,
+			      sizeof(struct xstorm_eth_queue_zone), timeset);
+	if (rc)
+		goto out;
+
+	p_hwfn->cdev->tx_coalesce_usecs = coalesce;
+out:
+	return rc;
+}
+
 /* Calculate final WFQ values for all vports and configure them.
  * After this configuration each vport will have
  * approx min rate =  min_pf_rate * (vport_wfq / QED_WFQ_UNIT)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index dde364d6f502..343bb0344f62 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -212,6 +212,20 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		  u32 size_in_dwords,
 		  u32 flags);
 
+ /**
+ * @brief qed_dmae_grc2host - Read data from dmae data offset
+ * to source address using the given ptt
+ *
+ * @param p_ptt
+ * @param grc_addr (dmae_data_offset)
+ * @param dest_addr
+ * @param size_in_dwords
+ * @param flags - one of the flags defined above
+ */
+int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		      u32 grc_addr, dma_addr_t dest_addr, u32 size_in_dwords,
+		      u32 flags);
+
 /**
  * @brief qed_dmae_host2host - copy data from to source address
  * to a destination adress (for SRIOV) using the given ptt
@@ -245,9 +259,8 @@ int
 qed_chain_alloc(struct qed_dev *cdev,
 		enum qed_chain_use_mode intended_use,
 		enum qed_chain_mode mode,
-		u16 num_elems,
-		size_t elem_size,
-		struct qed_chain *p_chain);
+		enum qed_chain_cnt_type cnt_type,
+		u32 num_elems, size_t elem_size, struct qed_chain *p_chain);
 
 /**
  * @brief qed_chain_free - Free chain DMA memory
@@ -255,8 +268,7 @@ qed_chain_alloc(struct qed_dev *cdev,
  * @param p_hwfn
  * @param p_chain
  */
-void qed_chain_free(struct qed_dev *cdev,
-		    struct qed_chain *p_chain);
+void qed_chain_free(struct qed_dev *cdev, struct qed_chain *p_chain);
 
 /**
  * @@brief qed_fw_l2_queue - Get absolute L2 queue ID
@@ -310,4 +322,37 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
 int qed_final_cleanup(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt, u16 id, bool is_vf);
 
+/**
+ * @brief qed_set_rxq_coalesce - Configure coalesce parameters for an Rx queue
+ * The fact that we can configure coalescing to up to 511, but on varying
+ * accuracy [the bigger the value the less accurate] up to a mistake of 3usec
+ * for the highest values.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param coalesce - Coalesce value in micro seconds.
+ * @param qid - Queue index.
+ * @param qid - SB Id
+ *
+ * @return int
+ */
+int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 u16 coalesce, u8 qid, u16 sb_id);
+
+/**
+ * @brief qed_set_txq_coalesce - Configure coalesce parameters for a Tx queue
+ * While the API allows setting coalescing per-qid, all tx queues sharing a
+ * SB should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff]
+ * otherwise configuration would break.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param coalesce - Coalesce value in micro seconds.
+ * @param qid - Queue index.
+ * @param qid - SB Id
+ *
+ * @return int
+ */
+int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 u16 coalesce, u8 qid, u16 sb_id);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 9afc15fdbb02..592784019994 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -17,13 +17,15 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/qed/common_hsi.h>
+#include <linux/qed/storage_common.h>
+#include <linux/qed/tcp_common.h>
 #include <linux/qed/eth_common.h>
+#include <linux/qed/iscsi_common.h>
+#include <linux/qed/rdma_common.h>
+#include <linux/qed/roce_common.h>
 
 struct qed_hwfn;
 struct qed_ptt;
-/********************************/
-/* Add include to common target */
-/********************************/
 
 /* opcodes for the event ring */
 enum common_event_opcode {
@@ -32,9 +34,10 @@ enum common_event_opcode {
 	COMMON_EVENT_VF_START,
 	COMMON_EVENT_VF_STOP,
 	COMMON_EVENT_VF_PF_CHANNEL,
-	COMMON_EVENT_RESERVED4,
-	COMMON_EVENT_RESERVED5,
-	COMMON_EVENT_RESERVED6,
+	COMMON_EVENT_VF_FLR,
+	COMMON_EVENT_PF_UPDATE,
+	COMMON_EVENT_MALICIOUS_VF,
+	COMMON_EVENT_RL_UPDATE,
 	COMMON_EVENT_EMPTY,
 	MAX_COMMON_EVENT_OPCODE
 };
@@ -42,11 +45,12 @@ enum common_event_opcode {
 /* Common Ramrod Command IDs */
 enum common_ramrod_cmd_id {
 	COMMON_RAMROD_UNUSED,
-	COMMON_RAMROD_PF_START /* PF Function Start Ramrod */,
-	COMMON_RAMROD_PF_STOP /* PF Function Stop Ramrod */,
+	COMMON_RAMROD_PF_START,
+	COMMON_RAMROD_PF_STOP,
 	COMMON_RAMROD_VF_START,
 	COMMON_RAMROD_VF_STOP,
 	COMMON_RAMROD_PF_UPDATE,
+	COMMON_RAMROD_RL_UPDATE,
 	COMMON_RAMROD_EMPTY,
 	MAX_COMMON_RAMROD_CMD_ID
 };
@@ -63,448 +67,448 @@ struct pstorm_core_conn_st_ctx {
 
 /* Core Slowpath Connection storm context of Xstorm */
 struct xstorm_core_conn_st_ctx {
-	__le32		spq_base_lo /* SPQ Ring Base Address low dword */;
-	__le32		spq_base_hi /* SPQ Ring Base Address high dword */;
-	struct regpair	consolid_base_addr;
-	__le16		spq_cons /* SPQ Ring Consumer */;
-	__le16		consolid_cons /* Consolidation Ring Consumer */;
-	__le32		reserved0[55] /* Pad to 15 cycles */;
+	__le32 spq_base_lo;
+	__le32 spq_base_hi;
+	struct regpair consolid_base_addr;
+	__le16 spq_cons;
+	__le16 consolid_cons;
+	__le32 reserved0[55];
 };
 
 struct xstorm_core_conn_ag_ctx {
-	u8	reserved0 /* cdu_validation */;
-	u8	core_state /* state */;
-	u8	flags0;
-#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_MASK         0x1
-#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT        0
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED1_MASK            0x1
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED1_SHIFT           1
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED2_MASK            0x1
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED2_SHIFT           2
-#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_MASK         0x1
-#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT        3
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED3_MASK            0x1
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED3_SHIFT           4
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED4_MASK            0x1
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED4_SHIFT           5
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED5_MASK            0x1   /* bit6 */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED5_SHIFT           6
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED6_MASK            0x1   /* bit7 */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED6_SHIFT           7
+	u8 reserved0;
+	u8 core_state;
+	u8 flags0;
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED1_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED1_SHIFT		1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED2_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED2_SHIFT		2
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED3_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED3_SHIFT		4
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED4_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED4_SHIFT		5
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED5_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED5_SHIFT		6
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED6_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED6_SHIFT		7
 	u8 flags1;
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED7_MASK            0x1   /* bit8 */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED7_SHIFT           0
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED8_MASK            0x1   /* bit9 */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED8_SHIFT           1
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED9_MASK            0x1   /* bit10 */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED9_SHIFT           2
-#define XSTORM_CORE_CONN_AG_CTX_BIT11_MASK                0x1   /* bit11 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT11_SHIFT               3
-#define XSTORM_CORE_CONN_AG_CTX_BIT12_MASK                0x1   /* bit12 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT12_SHIFT               4
-#define XSTORM_CORE_CONN_AG_CTX_BIT13_MASK                0x1   /* bit13 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT13_SHIFT               5
-#define XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_MASK       0x1   /* bit14 */
-#define XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT      6
-#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_MASK         0x1   /* bit15 */
-#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT        7
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED7_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED7_SHIFT		0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED8_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED8_SHIFT		1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED9_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED9_SHIFT		2
+#define XSTORM_CORE_CONN_AG_CTX_BIT11_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_CORE_CONN_AG_CTX_BIT12_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT12_SHIFT		4
+#define XSTORM_CORE_CONN_AG_CTX_BIT13_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT13_SHIFT		5
+#define XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT	7
 	u8 flags2;
-#define XSTORM_CORE_CONN_AG_CTX_CF0_MASK                  0x3   /* timer0cf */
-#define XSTORM_CORE_CONN_AG_CTX_CF0_SHIFT                 0
-#define XSTORM_CORE_CONN_AG_CTX_CF1_MASK                  0x3   /* timer1cf */
-#define XSTORM_CORE_CONN_AG_CTX_CF1_SHIFT                 2
-#define XSTORM_CORE_CONN_AG_CTX_CF2_MASK                  0x3   /* timer2cf */
-#define XSTORM_CORE_CONN_AG_CTX_CF2_SHIFT                 4
-#define XSTORM_CORE_CONN_AG_CTX_CF3_MASK                  0x3
-#define XSTORM_CORE_CONN_AG_CTX_CF3_SHIFT                 6
+#define XSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF3_SHIFT	6
 	u8 flags3;
-#define XSTORM_CORE_CONN_AG_CTX_CF4_MASK                  0x3   /* cf4 */
-#define XSTORM_CORE_CONN_AG_CTX_CF4_SHIFT                 0
-#define XSTORM_CORE_CONN_AG_CTX_CF5_MASK                  0x3   /* cf5 */
-#define XSTORM_CORE_CONN_AG_CTX_CF5_SHIFT                 2
-#define XSTORM_CORE_CONN_AG_CTX_CF6_MASK                  0x3   /* cf6 */
-#define XSTORM_CORE_CONN_AG_CTX_CF6_SHIFT                 4
-#define XSTORM_CORE_CONN_AG_CTX_CF7_MASK                  0x3   /* cf7 */
-#define XSTORM_CORE_CONN_AG_CTX_CF7_SHIFT                 6
+#define XSTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF4_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF5_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF6_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF7_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF7_SHIFT	6
 	u8 flags4;
-#define XSTORM_CORE_CONN_AG_CTX_CF8_MASK                  0x3   /* cf8 */
-#define XSTORM_CORE_CONN_AG_CTX_CF8_SHIFT                 0
-#define XSTORM_CORE_CONN_AG_CTX_CF9_MASK                  0x3   /* cf9 */
-#define XSTORM_CORE_CONN_AG_CTX_CF9_SHIFT                 2
-#define XSTORM_CORE_CONN_AG_CTX_CF10_MASK                 0x3   /* cf10 */
-#define XSTORM_CORE_CONN_AG_CTX_CF10_SHIFT                4
-#define XSTORM_CORE_CONN_AG_CTX_CF11_MASK                 0x3   /* cf11 */
-#define XSTORM_CORE_CONN_AG_CTX_CF11_SHIFT                6
+#define XSTORM_CORE_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define XSTORM_CORE_CONN_AG_CTX_CF12_MASK                 0x3   /* cf12 */
-#define XSTORM_CORE_CONN_AG_CTX_CF12_SHIFT                0
-#define XSTORM_CORE_CONN_AG_CTX_CF13_MASK                 0x3   /* cf13 */
-#define XSTORM_CORE_CONN_AG_CTX_CF13_SHIFT                2
-#define XSTORM_CORE_CONN_AG_CTX_CF14_MASK                 0x3   /* cf14 */
-#define XSTORM_CORE_CONN_AG_CTX_CF14_SHIFT                4
-#define XSTORM_CORE_CONN_AG_CTX_CF15_MASK                 0x3   /* cf15 */
-#define XSTORM_CORE_CONN_AG_CTX_CF15_SHIFT                6
+#define XSTORM_CORE_CONN_AG_CTX_CF12_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF12_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF13_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF13_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF14_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF14_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF15_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF15_SHIFT	6
 	u8 flags6;
-#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_MASK     0x3   /* cf16 */
-#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_SHIFT    0
-#define XSTORM_CORE_CONN_AG_CTX_CF17_MASK                 0x3
-#define XSTORM_CORE_CONN_AG_CTX_CF17_SHIFT                2
-#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_MASK                0x3   /* cf18 */
-#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_SHIFT               4
-#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_MASK         0x3   /* cf19 */
-#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_SHIFT        6
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF17_MASK		0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF17_SHIFT		2
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_MASK		0x3
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_SHIFT		4
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_SHIFT	6
 	u8 flags7;
-#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_MASK             0x3   /* cf20 */
-#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_SHIFT            0
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED10_MASK           0x3   /* cf21 */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED10_SHIFT          2
-#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_MASK            0x3   /* cf22 */
-#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_SHIFT           4
-#define XSTORM_CORE_CONN_AG_CTX_CF0EN_MASK                0x1   /* cf0en */
-#define XSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT               6
-#define XSTORM_CORE_CONN_AG_CTX_CF1EN_MASK                0x1   /* cf1en */
-#define XSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT               7
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_MASK		0x3
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_SHIFT		0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED10_MASK		0x3
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED10_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_MASK		0x3
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_SHIFT		4
+#define XSTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		6
+#define XSTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		7
 	u8 flags8;
-#define XSTORM_CORE_CONN_AG_CTX_CF2EN_MASK                0x1   /* cf2en */
-#define XSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT               0
-#define XSTORM_CORE_CONN_AG_CTX_CF3EN_MASK                0x1   /* cf3en */
-#define XSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT               1
-#define XSTORM_CORE_CONN_AG_CTX_CF4EN_MASK                0x1   /* cf4en */
-#define XSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT               2
-#define XSTORM_CORE_CONN_AG_CTX_CF5EN_MASK                0x1   /* cf5en */
-#define XSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT               3
-#define XSTORM_CORE_CONN_AG_CTX_CF6EN_MASK                0x1   /* cf6en */
-#define XSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT               4
-#define XSTORM_CORE_CONN_AG_CTX_CF7EN_MASK                0x1   /* cf7en */
-#define XSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT               5
-#define XSTORM_CORE_CONN_AG_CTX_CF8EN_MASK                0x1   /* cf8en */
-#define XSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT               6
-#define XSTORM_CORE_CONN_AG_CTX_CF9EN_MASK                0x1   /* cf9en */
-#define XSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT               7
+#define XSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF3EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_CF4EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF5EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_CF6EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF7EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_CF8EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_CF9EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT	7
 	u8 flags9;
-#define XSTORM_CORE_CONN_AG_CTX_CF10EN_MASK               0x1   /* cf10en */
-#define XSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT              0
-#define XSTORM_CORE_CONN_AG_CTX_CF11EN_MASK               0x1   /* cf11en */
-#define XSTORM_CORE_CONN_AG_CTX_CF11EN_SHIFT              1
-#define XSTORM_CORE_CONN_AG_CTX_CF12EN_MASK               0x1   /* cf12en */
-#define XSTORM_CORE_CONN_AG_CTX_CF12EN_SHIFT              2
-#define XSTORM_CORE_CONN_AG_CTX_CF13EN_MASK               0x1   /* cf13en */
-#define XSTORM_CORE_CONN_AG_CTX_CF13EN_SHIFT              3
-#define XSTORM_CORE_CONN_AG_CTX_CF14EN_MASK               0x1   /* cf14en */
-#define XSTORM_CORE_CONN_AG_CTX_CF14EN_SHIFT              4
-#define XSTORM_CORE_CONN_AG_CTX_CF15EN_MASK               0x1   /* cf15en */
-#define XSTORM_CORE_CONN_AG_CTX_CF15EN_SHIFT              5
-#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_MASK  0x1   /* cf16en */
-#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_SHIFT 6
-#define XSTORM_CORE_CONN_AG_CTX_CF17EN_MASK               0x1
-#define XSTORM_CORE_CONN_AG_CTX_CF17EN_SHIFT              7
+#define XSTORM_CORE_CONN_AG_CTX_CF10EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT			0
+#define XSTORM_CORE_CONN_AG_CTX_CF11EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF11EN_SHIFT			1
+#define XSTORM_CORE_CONN_AG_CTX_CF12EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF12EN_SHIFT			2
+#define XSTORM_CORE_CONN_AG_CTX_CF13EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF13EN_SHIFT			3
+#define XSTORM_CORE_CONN_AG_CTX_CF14EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF14EN_SHIFT			4
+#define XSTORM_CORE_CONN_AG_CTX_CF15EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF15EN_SHIFT			5
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_CF17EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF17EN_SHIFT			7
 	u8 flags10;
-#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_MASK             0x1   /* cf18en */
-#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_SHIFT            0
-#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_MASK      0x1   /* cf19en */
-#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT     1
-#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_MASK          0x1   /* cf20en */
-#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT         2
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED11_MASK           0x1   /* cf21en */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED11_SHIFT          3
-#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_MASK         0x1   /* cf22en */
-#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT        4
-#define XSTORM_CORE_CONN_AG_CTX_CF23EN_MASK               0x1   /* cf23en */
-#define XSTORM_CORE_CONN_AG_CTX_CF23EN_SHIFT              5
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED12_MASK           0x1   /* rule0en */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED12_SHIFT          6
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED13_MASK           0x1   /* rule1en */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED13_SHIFT          7
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_SHIFT		0
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED11_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED11_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF23EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF23EN_SHIFT		5
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED12_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED12_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED13_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED13_SHIFT	7
 	u8 flags11;
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED14_MASK           0x1   /* rule2en */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED14_SHIFT          0
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED15_MASK           0x1   /* rule3en */
-#define XSTORM_CORE_CONN_AG_CTX_RESERVED15_SHIFT          1
-#define XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_MASK       0x1   /* rule4en */
-#define XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT      2
-#define XSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK              0x1   /* rule5en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT             3
-#define XSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK              0x1   /* rule6en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT             4
-#define XSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK              0x1   /* rule7en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT             5
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_MASK         0x1   /* rule8en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_SHIFT        6
-#define XSTORM_CORE_CONN_AG_CTX_RULE9EN_MASK              0x1   /* rule9en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE9EN_SHIFT             7
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED14_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED14_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED15_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED15_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT		3
+#define XSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT		4
+#define XSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT		5
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_RULE9EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE9EN_SHIFT		7
 	u8 flags12;
-#define XSTORM_CORE_CONN_AG_CTX_RULE10EN_MASK             0x1   /* rule10en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE10EN_SHIFT            0
-#define XSTORM_CORE_CONN_AG_CTX_RULE11EN_MASK             0x1   /* rule11en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE11EN_SHIFT            1
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_MASK         0x1   /* rule12en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_SHIFT        2
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_MASK         0x1   /* rule13en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_SHIFT        3
-#define XSTORM_CORE_CONN_AG_CTX_RULE14EN_MASK             0x1   /* rule14en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE14EN_SHIFT            4
-#define XSTORM_CORE_CONN_AG_CTX_RULE15EN_MASK             0x1   /* rule15en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE15EN_SHIFT            5
-#define XSTORM_CORE_CONN_AG_CTX_RULE16EN_MASK             0x1   /* rule16en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE16EN_SHIFT            6
-#define XSTORM_CORE_CONN_AG_CTX_RULE17EN_MASK             0x1   /* rule17en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE17EN_SHIFT            7
+#define XSTORM_CORE_CONN_AG_CTX_RULE10EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE10EN_SHIFT		0
+#define XSTORM_CORE_CONN_AG_CTX_RULE11EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE11EN_SHIFT		1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_RULE14EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE14EN_SHIFT		4
+#define XSTORM_CORE_CONN_AG_CTX_RULE15EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE15EN_SHIFT		5
+#define XSTORM_CORE_CONN_AG_CTX_RULE16EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE16EN_SHIFT		6
+#define XSTORM_CORE_CONN_AG_CTX_RULE17EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE17EN_SHIFT		7
 	u8 flags13;
-#define XSTORM_CORE_CONN_AG_CTX_RULE18EN_MASK             0x1   /* rule18en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE18EN_SHIFT            0
-#define XSTORM_CORE_CONN_AG_CTX_RULE19EN_MASK             0x1   /* rule19en */
-#define XSTORM_CORE_CONN_AG_CTX_RULE19EN_SHIFT            1
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_MASK         0x1   /* rule20en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_SHIFT        2
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_MASK         0x1   /* rule21en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_SHIFT        3
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_MASK         0x1   /* rule22en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_SHIFT        4
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_MASK         0x1   /* rule23en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_SHIFT        5
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_MASK         0x1   /* rule24en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_SHIFT        6
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_MASK         0x1   /* rule25en */
-#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_SHIFT        7
+#define XSTORM_CORE_CONN_AG_CTX_RULE18EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE18EN_SHIFT		0
+#define XSTORM_CORE_CONN_AG_CTX_RULE19EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE19EN_SHIFT		1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
 	u8 flags14;
-#define XSTORM_CORE_CONN_AG_CTX_BIT16_MASK                0x1   /* bit16 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT16_SHIFT               0
-#define XSTORM_CORE_CONN_AG_CTX_BIT17_MASK                0x1   /* bit17 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT17_SHIFT               1
-#define XSTORM_CORE_CONN_AG_CTX_BIT18_MASK                0x1   /* bit18 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT18_SHIFT               2
-#define XSTORM_CORE_CONN_AG_CTX_BIT19_MASK                0x1   /* bit19 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT19_SHIFT               3
-#define XSTORM_CORE_CONN_AG_CTX_BIT20_MASK                0x1   /* bit20 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT20_SHIFT               4
-#define XSTORM_CORE_CONN_AG_CTX_BIT21_MASK                0x1   /* bit21 */
-#define XSTORM_CORE_CONN_AG_CTX_BIT21_SHIFT               5
-#define XSTORM_CORE_CONN_AG_CTX_CF23_MASK                 0x3   /* cf23 */
-#define XSTORM_CORE_CONN_AG_CTX_CF23_SHIFT                6
-	u8	byte2 /* byte2 */;
-	__le16	physical_q0 /* physical_q0 */;
-	__le16	consolid_prod /* physical_q1 */;
-	__le16	reserved16 /* physical_q2 */;
-	__le16	tx_bd_cons /* word3 */;
-	__le16	tx_bd_or_spq_prod /* word4 */;
-	__le16	word5 /* word5 */;
-	__le16	conn_dpi /* conn_dpi */;
-	u8	byte3 /* byte3 */;
-	u8	byte4 /* byte4 */;
-	u8	byte5 /* byte5 */;
-	u8	byte6 /* byte6 */;
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le32	reg4 /* reg4 */;
-	__le32	reg5 /* cf_array0 */;
-	__le32	reg6 /* cf_array1 */;
-	__le16	word7 /* word7 */;
-	__le16	word8 /* word8 */;
-	__le16	word9 /* word9 */;
-	__le16	word10 /* word10 */;
-	__le32	reg7 /* reg7 */;
-	__le32	reg8 /* reg8 */;
-	__le32	reg9 /* reg9 */;
-	u8	byte7 /* byte7 */;
-	u8	byte8 /* byte8 */;
-	u8	byte9 /* byte9 */;
-	u8	byte10 /* byte10 */;
-	u8	byte11 /* byte11 */;
-	u8	byte12 /* byte12 */;
-	u8	byte13 /* byte13 */;
-	u8	byte14 /* byte14 */;
-	u8	byte15 /* byte15 */;
-	u8	byte16 /* byte16 */;
-	__le16	word11 /* word11 */;
-	__le32	reg10 /* reg10 */;
-	__le32	reg11 /* reg11 */;
-	__le32	reg12 /* reg12 */;
-	__le32	reg13 /* reg13 */;
-	__le32	reg14 /* reg14 */;
-	__le32	reg15 /* reg15 */;
-	__le32	reg16 /* reg16 */;
-	__le32	reg17 /* reg17 */;
-	__le32	reg18 /* reg18 */;
-	__le32	reg19 /* reg19 */;
-	__le16	word12 /* word12 */;
-	__le16	word13 /* word13 */;
-	__le16	word14 /* word14 */;
-	__le16	word15 /* word15 */;
+#define XSTORM_CORE_CONN_AG_CTX_BIT16_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT16_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_BIT17_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT17_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_BIT18_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT18_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_BIT19_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT19_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_BIT20_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT20_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_BIT21_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT21_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_CF23_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF23_SHIFT	6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 consolid_prod;
+	__le16 reserved16;
+	__le16 tx_bd_cons;
+	__le16 tx_bd_or_spq_prod;
+	__le16 word5;
+	__le16 conn_dpi;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le16 word7;
+	__le16 word8;
+	__le16 word9;
+	__le16 word10;
+	__le32 reg7;
+	__le32 reg8;
+	__le32 reg9;
+	u8 byte7;
+	u8 byte8;
+	u8 byte9;
+	u8 byte10;
+	u8 byte11;
+	u8 byte12;
+	u8 byte13;
+	u8 byte14;
+	u8 byte15;
+	u8 byte16;
+	__le16 word11;
+	__le32 reg10;
+	__le32 reg11;
+	__le32 reg12;
+	__le32 reg13;
+	__le32 reg14;
+	__le32 reg15;
+	__le32 reg16;
+	__le32 reg17;
+	__le32 reg18;
+	__le32 reg19;
+	__le16 word12;
+	__le16 word13;
+	__le16 word14;
+	__le16 word15;
 };
 
 struct tstorm_core_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
-#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
-#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK     0x1       /* bit2 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT    2
-#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK     0x1       /* bit3 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT    3
-#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK     0x1       /* bit4 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT    4
-#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK     0x1       /* bit5 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT    5
-#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* timer0cf */
-#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     6
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT	3
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT	5
+#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	6
 	u8 flags1;
-#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* timer1cf */
-#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     0
-#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* timer2cf */
-#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     2
-#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3       /* timer_stop_all */
-#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT     4
-#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3       /* cf4 */
-#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT     6
+#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT	6
 	u8 flags2;
-#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3       /* cf5 */
-#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT     0
-#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3       /* cf6 */
-#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT     2
-#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK      0x3       /* cf7 */
-#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT     4
-#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK      0x3       /* cf8 */
-#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT     6
+#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT	6
 	u8 flags3;
-#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK      0x3       /* cf9 */
-#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT     0
-#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK     0x3       /* cf10 */
-#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT    2
-#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
-#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   4
-#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
-#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   5
-#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
-#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   6
-#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1       /* cf3en */
-#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   7
+#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	5
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT	7
 	u8 flags4;
-#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1       /* cf4en */
-#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   0
-#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1       /* cf5en */
-#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   1
-#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1       /* cf6en */
-#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   2
-#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK    0x1       /* cf7en */
-#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT   3
-#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK    0x1       /* cf8en */
-#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT   4
-#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK    0x1       /* cf9en */
-#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT   5
-#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK   0x1       /* cf10en */
-#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT  6
-#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT	1
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT	3
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT	5
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags5;
-#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
-#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
-#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
-#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
-#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1       /* rule5en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
-#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1       /* rule6en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
-#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1       /* rule7en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
-#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1       /* rule8en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le32	reg4 /* reg4 */;
-	__le32	reg5 /* reg5 */;
-	__le32	reg6 /* reg6 */;
-	__le32	reg7 /* reg7 */;
-	__le32	reg8 /* reg8 */;
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	word0 /* word0 */;
-	u8	byte4 /* byte4 */;
-	u8	byte5 /* byte5 */;
-	__le16	word1 /* word1 */;
-	__le16	word2 /* conn_dpi */;
-	__le16	word3 /* word3 */;
-	__le32	reg9 /* reg9 */;
-	__le32	reg10 /* reg10 */;
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT	7
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	u8 byte4;
+	u8 byte5;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le32 reg9;
+	__le32 reg10;
 };
 
 struct ustorm_core_conn_ag_ctx {
-	u8	reserved /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define USTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
-#define USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
-#define USTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
-#define USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
-#define USTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* timer0cf */
-#define USTORM_CORE_CONN_AG_CTX_CF0_SHIFT     2
-#define USTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* timer1cf */
-#define USTORM_CORE_CONN_AG_CTX_CF1_SHIFT     4
-#define USTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* timer2cf */
-#define USTORM_CORE_CONN_AG_CTX_CF2_SHIFT     6
+	u8 reserved;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define USTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define USTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
+#define USTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
+#define USTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define USTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3       /* timer_stop_all */
-#define USTORM_CORE_CONN_AG_CTX_CF3_SHIFT     0
-#define USTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3       /* cf4 */
-#define USTORM_CORE_CONN_AG_CTX_CF4_SHIFT     2
-#define USTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3       /* cf5 */
-#define USTORM_CORE_CONN_AG_CTX_CF5_SHIFT     4
-#define USTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3       /* cf6 */
-#define USTORM_CORE_CONN_AG_CTX_CF6_SHIFT     6
+#define USTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF3_SHIFT	0
+#define USTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF4_SHIFT	2
+#define USTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF5_SHIFT	4
+#define USTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF6_SHIFT	6
 	u8 flags2;
-#define USTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
-#define USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   0
-#define USTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
-#define USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   1
-#define USTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
-#define USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   2
-#define USTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1       /* cf3en */
-#define USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   3
-#define USTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1       /* cf4en */
-#define USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   4
-#define USTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1       /* cf5en */
-#define USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   5
-#define USTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1       /* cf6en */
-#define USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   6
-#define USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
-#define USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
+#define USTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	0
+#define USTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	1
+#define USTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	2
+#define USTORM_CORE_CONN_AG_CTX_CF3EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT	3
+#define USTORM_CORE_CONN_AG_CTX_CF4EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT	4
+#define USTORM_CORE_CONN_AG_CTX_CF5EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT	5
+#define USTORM_CORE_CONN_AG_CTX_CF6EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT	6
+#define USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags3;
-#define USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
-#define USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
-#define USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
-#define USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
-#define USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
-#define USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
-#define USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
-#define USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
-#define USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1       /* rule5en */
-#define USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
-#define USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1       /* rule6en */
-#define USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
-#define USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1       /* rule7en */
-#define USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
-#define USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1       /* rule8en */
-#define USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	word0 /* conn_dpi */;
-	__le16	word1 /* word1 */;
-	__le32	rx_producers /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le16	word2 /* word2 */;
-	__le16	word3 /* word3 */;
+#define USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT	7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le16 word1;
+	__le32 rx_producers;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le16 word2;
+	__le16 word3;
 };
 
 /* The core storm context for the Mstorm */
@@ -519,122 +523,186 @@ struct ustorm_core_conn_st_ctx {
 
 /* core connection context */
 struct core_conn_context {
-	struct ystorm_core_conn_st_ctx	ystorm_st_context;
-	struct regpair			ystorm_st_padding[2] /* padding */;
-	struct pstorm_core_conn_st_ctx	pstorm_st_context;
-	struct regpair			pstorm_st_padding[2];
-	struct xstorm_core_conn_st_ctx	xstorm_st_context;
-	struct xstorm_core_conn_ag_ctx	xstorm_ag_context;
-	struct tstorm_core_conn_ag_ctx	tstorm_ag_context;
-	struct ustorm_core_conn_ag_ctx	ustorm_ag_context;
-	struct mstorm_core_conn_st_ctx	mstorm_st_context;
-	struct ustorm_core_conn_st_ctx	ustorm_st_context;
-	struct regpair			ustorm_st_padding[2] /* padding */;
+	struct ystorm_core_conn_st_ctx ystorm_st_context;
+	struct regpair ystorm_st_padding[2];
+	struct pstorm_core_conn_st_ctx pstorm_st_context;
+	struct regpair pstorm_st_padding[2];
+	struct xstorm_core_conn_st_ctx xstorm_st_context;
+	struct xstorm_core_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_core_conn_ag_ctx tstorm_ag_context;
+	struct ustorm_core_conn_ag_ctx ustorm_ag_context;
+	struct mstorm_core_conn_st_ctx mstorm_st_context;
+	struct ustorm_core_conn_st_ctx ustorm_st_context;
+	struct regpair ustorm_st_padding[2];
+};
+
+struct eth_mstorm_per_pf_stat {
+	struct regpair gre_discard_pkts;
+	struct regpair vxlan_discard_pkts;
+	struct regpair geneve_discard_pkts;
+	struct regpair lb_discard_pkts;
 };
 
 struct eth_mstorm_per_queue_stat {
-	struct regpair  ttl0_discard;
-	struct regpair  packet_too_big_discard;
-	struct regpair  no_buff_discard;
-	struct regpair  not_active_discard;
-	struct regpair  tpa_coalesced_pkts;
-	struct regpair  tpa_coalesced_events;
-	struct regpair  tpa_aborts_num;
-	struct regpair  tpa_coalesced_bytes;
+	struct regpair ttl0_discard;
+	struct regpair packet_too_big_discard;
+	struct regpair no_buff_discard;
+	struct regpair not_active_discard;
+	struct regpair tpa_coalesced_pkts;
+	struct regpair tpa_coalesced_events;
+	struct regpair tpa_aborts_num;
+	struct regpair tpa_coalesced_bytes;
+};
+
+/* Ethernet TX Per PF */
+struct eth_pstorm_per_pf_stat {
+	struct regpair sent_lb_ucast_bytes;
+	struct regpair sent_lb_mcast_bytes;
+	struct regpair sent_lb_bcast_bytes;
+	struct regpair sent_lb_ucast_pkts;
+	struct regpair sent_lb_mcast_pkts;
+	struct regpair sent_lb_bcast_pkts;
+	struct regpair sent_gre_bytes;
+	struct regpair sent_vxlan_bytes;
+	struct regpair sent_geneve_bytes;
+	struct regpair sent_gre_pkts;
+	struct regpair sent_vxlan_pkts;
+	struct regpair sent_geneve_pkts;
+	struct regpair gre_drop_pkts;
+	struct regpair vxlan_drop_pkts;
+	struct regpair geneve_drop_pkts;
+};
+
+/* Ethernet TX Per Queue Stats */
+struct eth_pstorm_per_queue_stat {
+	struct regpair sent_ucast_bytes;
+	struct regpair sent_mcast_bytes;
+	struct regpair sent_bcast_bytes;
+	struct regpair sent_ucast_pkts;
+	struct regpair sent_mcast_pkts;
+	struct regpair sent_bcast_pkts;
+	struct regpair error_drop_pkts;
+};
+
+/* ETH Rx producers data */
+struct eth_rx_rate_limit {
+	__le16 mult;
+	__le16 cnst;
+	u8 add_sub_cnst;
+	u8 reserved0;
+	__le16 reserved1;
 };
 
-struct eth_pstorm_per_queue_stat {
-	struct regpair  sent_ucast_bytes;
-	struct regpair  sent_mcast_bytes;
-	struct regpair  sent_bcast_bytes;
-	struct regpair  sent_ucast_pkts;
-	struct regpair  sent_mcast_pkts;
-	struct regpair  sent_bcast_pkts;
-	struct regpair  error_drop_pkts;
+struct eth_ustorm_per_pf_stat {
+	struct regpair rcv_lb_ucast_bytes;
+	struct regpair rcv_lb_mcast_bytes;
+	struct regpair rcv_lb_bcast_bytes;
+	struct regpair rcv_lb_ucast_pkts;
+	struct regpair rcv_lb_mcast_pkts;
+	struct regpair rcv_lb_bcast_pkts;
+	struct regpair rcv_gre_bytes;
+	struct regpair rcv_vxlan_bytes;
+	struct regpair rcv_geneve_bytes;
+	struct regpair rcv_gre_pkts;
+	struct regpair rcv_vxlan_pkts;
+	struct regpair rcv_geneve_pkts;
 };
 
 struct eth_ustorm_per_queue_stat {
-	struct regpair  rcv_ucast_bytes;
-	struct regpair  rcv_mcast_bytes;
-	struct regpair  rcv_bcast_bytes;
-	struct regpair  rcv_ucast_pkts;
-	struct regpair  rcv_mcast_pkts;
-	struct regpair  rcv_bcast_pkts;
+	struct regpair rcv_ucast_bytes;
+	struct regpair rcv_mcast_bytes;
+	struct regpair rcv_bcast_bytes;
+	struct regpair rcv_ucast_pkts;
+	struct regpair rcv_mcast_pkts;
+	struct regpair rcv_bcast_pkts;
 };
 
 /* Event Ring Next Page Address */
 struct event_ring_next_addr {
-	struct regpair	addr /* Next Page Address */;
-	__le32		reserved[2] /* Reserved */;
+	struct regpair addr;
+	__le32 reserved[2];
 };
 
+/* Event Ring Element */
 union event_ring_element {
-	struct event_ring_entry		entry /* Event Ring Entry */;
-	struct event_ring_next_addr	next_addr;
+	struct event_ring_entry entry;
+	struct event_ring_next_addr next_addr;
 };
 
+/* Major and Minor hsi Versions */
+struct hsi_fp_ver_struct {
+	u8 minor_ver_arr[2];
+	u8 major_ver_arr[2];
+};
+
+/* Mstorm non-triggering VF zone */
 struct mstorm_non_trigger_vf_zone {
 	struct eth_mstorm_per_queue_stat eth_queue_stat;
+	struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF];
 };
 
+/* Mstorm VF zone */
 struct mstorm_vf_zone {
 	struct mstorm_non_trigger_vf_zone non_trigger;
+
 };
 
+/* personality per PF */
 enum personality_type {
 	BAD_PERSONALITY_TYP,
-	PERSONALITY_RESERVED,
+	PERSONALITY_ISCSI,
 	PERSONALITY_RESERVED2,
-	PERSONALITY_RDMA_AND_ETH /* Roce or Iwarp */,
+	PERSONALITY_RDMA_AND_ETH,
 	PERSONALITY_RESERVED3,
 	PERSONALITY_CORE,
-	PERSONALITY_ETH /* Ethernet */,
+	PERSONALITY_ETH,
 	PERSONALITY_RESERVED4,
 	MAX_PERSONALITY_TYPE
 };
 
+/* tunnel configuration */
 struct pf_start_tunnel_config {
-	u8	set_vxlan_udp_port_flg;
-	u8	set_geneve_udp_port_flg;
-	u8	tx_enable_vxlan /* If set, enable VXLAN tunnel in TX path. */;
-	u8	tx_enable_l2geneve;
-	u8	tx_enable_ipgeneve;
-	u8	tx_enable_l2gre /* If set, enable l2 GRE tunnel in TX path. */;
-	u8	tx_enable_ipgre /* If set, enable IP GRE tunnel in TX path. */;
-	u8	tunnel_clss_vxlan /* Classification scheme for VXLAN tunnel. */;
-	u8	tunnel_clss_l2geneve;
-	u8	tunnel_clss_ipgeneve;
-	u8	tunnel_clss_l2gre;
-	u8	tunnel_clss_ipgre;
-	__le16	vxlan_udp_port /* VXLAN tunnel UDP destination port. */;
-	__le16	geneve_udp_port /* GENEVE tunnel UDP destination port. */;
+	u8 set_vxlan_udp_port_flg;
+	u8 set_geneve_udp_port_flg;
+	u8 tx_enable_vxlan;
+	u8 tx_enable_l2geneve;
+	u8 tx_enable_ipgeneve;
+	u8 tx_enable_l2gre;
+	u8 tx_enable_ipgre;
+	u8 tunnel_clss_vxlan;
+	u8 tunnel_clss_l2geneve;
+	u8 tunnel_clss_ipgeneve;
+	u8 tunnel_clss_l2gre;
+	u8 tunnel_clss_ipgre;
+	__le16 vxlan_udp_port;
+	__le16 geneve_udp_port;
 };
 
 /* Ramrod data for PF start ramrod */
 struct pf_start_ramrod_data {
-	struct regpair			event_ring_pbl_addr;
-	struct regpair			consolid_q_pbl_addr;
-	struct pf_start_tunnel_config	tunnel_config;
-	__le16				event_ring_sb_id;
-	u8				base_vf_id;
-	u8				num_vfs;
-	u8				event_ring_num_pages;
-	u8				event_ring_sb_index;
-	u8				path_id;
-	u8				warning_as_error;
-	u8				dont_log_ramrods;
-	u8				personality;
-	__le16				log_type_mask;
-	u8				mf_mode /* Multi function mode */;
-	u8				integ_phase /* Integration phase */;
-	u8				allow_npar_tx_switching;
-	u8				inner_to_outer_pri_map[8];
-	u8				pri_map_valid;
-	u32				outer_tag;
-	u8				reserved0[4];
-};
-
-/* Data for port update ramrod */
+	struct regpair event_ring_pbl_addr;
+	struct regpair consolid_q_pbl_addr;
+	struct pf_start_tunnel_config tunnel_config;
+	__le16 event_ring_sb_id;
+	u8 base_vf_id;
+	u8 num_vfs;
+	u8 event_ring_num_pages;
+	u8 event_ring_sb_index;
+	u8 path_id;
+	u8 warning_as_error;
+	u8 dont_log_ramrods;
+	u8 personality;
+	__le16 log_type_mask;
+	u8 mf_mode;
+	u8 integ_phase;
+	u8 allow_npar_tx_switching;
+	u8 inner_to_outer_pri_map[8];
+	u8 pri_map_valid;
+	__le32 outer_tag;
+	struct hsi_fp_ver_struct hsi_fp_ver;
+
+};
+
 struct protocol_dcb_data {
 	u8 dcb_enable_flag;
 	u8 dcb_priority;
@@ -642,25 +710,24 @@ struct protocol_dcb_data {
 	u8 reserved;
 };
 
-/* tunnel configuration */
 struct pf_update_tunnel_config {
-	u8	update_rx_pf_clss;
-	u8	update_tx_pf_clss;
-	u8	set_vxlan_udp_port_flg;
-	u8	set_geneve_udp_port_flg;
-	u8	tx_enable_vxlan;
-	u8	tx_enable_l2geneve;
-	u8	tx_enable_ipgeneve;
-	u8	tx_enable_l2gre;
-	u8	tx_enable_ipgre;
-	u8	tunnel_clss_vxlan;
-	u8	tunnel_clss_l2geneve;
-	u8	tunnel_clss_ipgeneve;
-	u8	tunnel_clss_l2gre;
-	u8	tunnel_clss_ipgre;
-	__le16	vxlan_udp_port;
-	__le16	geneve_udp_port;
-	__le16	reserved[3];
+	u8 update_rx_pf_clss;
+	u8 update_tx_pf_clss;
+	u8 set_vxlan_udp_port_flg;
+	u8 set_geneve_udp_port_flg;
+	u8 tx_enable_vxlan;
+	u8 tx_enable_l2geneve;
+	u8 tx_enable_ipgeneve;
+	u8 tx_enable_l2gre;
+	u8 tx_enable_ipgre;
+	u8 tunnel_clss_vxlan;
+	u8 tunnel_clss_l2geneve;
+	u8 tunnel_clss_ipgeneve;
+	u8 tunnel_clss_l2gre;
+	u8 tunnel_clss_ipgre;
+	__le16 vxlan_udp_port;
+	__le16 geneve_udp_port;
+	__le16 reserved[3];
 };
 
 struct pf_update_ramrod_data {
@@ -669,38 +736,43 @@ struct pf_update_ramrod_data {
 	u8 update_fcoe_dcb_data_flag;
 	u8 update_iscsi_dcb_data_flag;
 	u8 update_roce_dcb_data_flag;
+	u8 update_iwarp_dcb_data_flag;
 	u8 update_mf_vlan_flag;
-	__le16 mf_vlan;
+	u8 reserved;
 	struct protocol_dcb_data eth_dcb_data;
 	struct protocol_dcb_data fcoe_dcb_data;
 	struct protocol_dcb_data iscsi_dcb_data;
 	struct protocol_dcb_data roce_dcb_data;
-	struct pf_update_tunnel_config	tunnel_config;
-};
-
-/* Tunnel classification scheme */
-enum tunnel_clss {
-	TUNNEL_CLSS_MAC_VLAN = 0,
-	TUNNEL_CLSS_MAC_VNI,
-	TUNNEL_CLSS_INNER_MAC_VLAN,
-	TUNNEL_CLSS_INNER_MAC_VNI,
-	MAX_TUNNEL_CLSS
+	struct protocol_dcb_data iwarp_dcb_data;
+	__le16 mf_vlan;
+	__le16 reserved2;
+	struct pf_update_tunnel_config tunnel_config;
 };
 
+/* Ports mode */
 enum ports_mode {
-	ENGX2_PORTX1 /* 2 engines x 1 port */,
-	ENGX2_PORTX2 /* 2 engines x 2 ports */,
-	ENGX1_PORTX1 /* 1 engine  x 1 port */,
-	ENGX1_PORTX2 /* 1 engine  x 2 ports */,
-	ENGX1_PORTX4 /* 1 engine  x 4 ports */,
+	ENGX2_PORTX1,
+	ENGX2_PORTX2,
+	ENGX1_PORTX1,
+	ENGX1_PORTX2,
+	ENGX1_PORTX4,
 	MAX_PORTS_MODE
 };
 
+/* use to index in hsi_fp_[major|minor]_ver_arr per protocol */
+enum protocol_version_array_key {
+	ETH_VER_KEY = 0,
+	ROCE_VER_KEY,
+	MAX_PROTOCOL_VERSION_ARRAY_KEY
+};
+
+/* Pstorm non-triggering VF zone */
 struct pstorm_non_trigger_vf_zone {
 	struct eth_pstorm_per_queue_stat eth_queue_stat;
 	struct regpair reserved[2];
 };
 
+/* Pstorm VF zone */
 struct pstorm_vf_zone {
 	struct pstorm_non_trigger_vf_zone non_trigger;
 	struct regpair reserved[7];
@@ -708,56 +780,89 @@ struct pstorm_vf_zone {
 
 /* Ramrod Header of SPQE */
 struct ramrod_header {
-	__le32	cid /* Slowpath Connection CID */;
-	u8	cmd_id /* Ramrod Cmd (Per Protocol Type) */;
-	u8	protocol_id /* Ramrod Protocol ID */;
-	__le16	echo /* Ramrod echo */;
+	__le32 cid;
+	u8 cmd_id;
+	u8 protocol_id;
+	__le16 echo;
 };
 
 /* Slowpath Element (SPQE) */
 struct slow_path_element {
-	struct ramrod_header	hdr /* Ramrod Header */;
-	struct regpair		data_ptr;
+	struct ramrod_header hdr;
+	struct regpair data_ptr;
+};
+
+/* Tstorm non-triggering VF zone */
+struct tstorm_non_trigger_vf_zone {
+	struct regpair reserved[2];
 };
 
 struct tstorm_per_port_stat {
-	struct regpair	trunc_error_discard;
-	struct regpair	mac_error_discard;
-	struct regpair	mftag_filter_discard;
-	struct regpair	eth_mac_filter_discard;
-	struct regpair	ll2_mac_filter_discard;
-	struct regpair	ll2_conn_disabled_discard;
-	struct regpair	iscsi_irregular_pkt;
-	struct regpair	fcoe_irregular_pkt;
-	struct regpair	roce_irregular_pkt;
-	struct regpair	eth_irregular_pkt;
-	struct regpair	toe_irregular_pkt;
-	struct regpair	preroce_irregular_pkt;
+	struct regpair trunc_error_discard;
+	struct regpair mac_error_discard;
+	struct regpair mftag_filter_discard;
+	struct regpair eth_mac_filter_discard;
+	struct regpair reserved[5];
+	struct regpair eth_irregular_pkt;
+	struct regpair reserved1[2];
+	struct regpair eth_gre_tunn_filter_discard;
+	struct regpair eth_vxlan_tunn_filter_discard;
+	struct regpair eth_geneve_tunn_filter_discard;
+};
+
+/* Tstorm VF zone */
+struct tstorm_vf_zone {
+	struct tstorm_non_trigger_vf_zone non_trigger;
+};
+
+/* Tunnel classification scheme */
+enum tunnel_clss {
+	TUNNEL_CLSS_MAC_VLAN = 0,
+	TUNNEL_CLSS_MAC_VNI,
+	TUNNEL_CLSS_INNER_MAC_VLAN,
+	TUNNEL_CLSS_INNER_MAC_VNI,
+	TUNNEL_CLSS_MAC_VLAN_DUAL_STAGE,
+	MAX_TUNNEL_CLSS
 };
 
+/* Ustorm non-triggering VF zone */
 struct ustorm_non_trigger_vf_zone {
 	struct eth_ustorm_per_queue_stat eth_queue_stat;
 	struct regpair vf_pf_msg_addr;
 };
 
+/* Ustorm triggering VF zone */
 struct ustorm_trigger_vf_zone {
 	u8 vf_pf_msg_valid;
 	u8 reserved[7];
 };
 
+/* Ustorm VF zone */
 struct ustorm_vf_zone {
 	struct ustorm_non_trigger_vf_zone non_trigger;
 	struct ustorm_trigger_vf_zone trigger;
 };
 
+/* VF-PF channel data */
+struct vf_pf_channel_data {
+	__le32 ready;
+	u8 valid;
+	u8 reserved0;
+	__le16 reserved1;
+};
+
+/* Ramrod data for VF start ramrod */
 struct vf_start_ramrod_data {
 	u8 vf_id;
 	u8 enable_flr_ack;
 	__le16 opaque_fid;
 	u8 personality;
-	u8 reserved[3];
+	u8 reserved[7];
+	struct hsi_fp_ver_struct hsi_fp_ver;
+
 };
 
+/* Ramrod data for VF start ramrod */
 struct vf_stop_ramrod_data {
 	u8 vf_id;
 	u8 reserved0;
@@ -765,94 +870,474 @@ struct vf_stop_ramrod_data {
 	__le32 reserved2;
 };
 
+/* Attentions status block */
 struct atten_status_block {
-	__le32	atten_bits;
-	__le32	atten_ack;
-	__le16	reserved0;
-	__le16	sb_index /* status block running index */;
-	__le32	reserved1;
+	__le32 atten_bits;
+	__le32 atten_ack;
+	__le16 reserved0;
+	__le16 sb_index;
+	__le32 reserved1;
+};
+
+enum command_type_bit {
+	IGU_COMMAND_TYPE_NOP = 0,
+	IGU_COMMAND_TYPE_SET = 1,
+	MAX_COMMAND_TYPE_BIT
+};
+
+/* DMAE command */
+struct dmae_cmd {
+	__le32 opcode;
+#define DMAE_CMD_SRC_MASK		0x1
+#define DMAE_CMD_SRC_SHIFT		0
+#define DMAE_CMD_DST_MASK		0x3
+#define DMAE_CMD_DST_SHIFT		1
+#define DMAE_CMD_C_DST_MASK		0x1
+#define DMAE_CMD_C_DST_SHIFT		3
+#define DMAE_CMD_CRC_RESET_MASK		0x1
+#define DMAE_CMD_CRC_RESET_SHIFT	4
+#define DMAE_CMD_SRC_ADDR_RESET_MASK	0x1
+#define DMAE_CMD_SRC_ADDR_RESET_SHIFT	5
+#define DMAE_CMD_DST_ADDR_RESET_MASK	0x1
+#define DMAE_CMD_DST_ADDR_RESET_SHIFT	6
+#define DMAE_CMD_COMP_FUNC_MASK		0x1
+#define DMAE_CMD_COMP_FUNC_SHIFT	7
+#define DMAE_CMD_COMP_WORD_EN_MASK	0x1
+#define DMAE_CMD_COMP_WORD_EN_SHIFT	8
+#define DMAE_CMD_COMP_CRC_EN_MASK	0x1
+#define DMAE_CMD_COMP_CRC_EN_SHIFT	9
+#define DMAE_CMD_COMP_CRC_OFFSET_MASK	0x7
+#define DMAE_CMD_COMP_CRC_OFFSET_SHIFT 10
+#define DMAE_CMD_RESERVED1_MASK		0x1
+#define DMAE_CMD_RESERVED1_SHIFT	13
+#define DMAE_CMD_ENDIANITY_MODE_MASK	0x3
+#define DMAE_CMD_ENDIANITY_MODE_SHIFT	14
+#define DMAE_CMD_ERR_HANDLING_MASK	0x3
+#define DMAE_CMD_ERR_HANDLING_SHIFT	16
+#define DMAE_CMD_PORT_ID_MASK		0x3
+#define DMAE_CMD_PORT_ID_SHIFT		18
+#define DMAE_CMD_SRC_PF_ID_MASK		0xF
+#define DMAE_CMD_SRC_PF_ID_SHIFT	20
+#define DMAE_CMD_DST_PF_ID_MASK		0xF
+#define DMAE_CMD_DST_PF_ID_SHIFT	24
+#define DMAE_CMD_SRC_VF_ID_VALID_MASK	0x1
+#define DMAE_CMD_SRC_VF_ID_VALID_SHIFT 28
+#define DMAE_CMD_DST_VF_ID_VALID_MASK	0x1
+#define DMAE_CMD_DST_VF_ID_VALID_SHIFT 29
+#define DMAE_CMD_RESERVED2_MASK		0x3
+#define DMAE_CMD_RESERVED2_SHIFT	30
+	__le32 src_addr_lo;
+	__le32 src_addr_hi;
+	__le32 dst_addr_lo;
+	__le32 dst_addr_hi;
+	__le16 length_dw;
+	__le16 opcode_b;
+#define DMAE_CMD_SRC_VF_ID_MASK		0xFF
+#define DMAE_CMD_SRC_VF_ID_SHIFT	0
+#define DMAE_CMD_DST_VF_ID_MASK		0xFF
+#define DMAE_CMD_DST_VF_ID_SHIFT	8
+	__le32 comp_addr_lo;
+	__le32 comp_addr_hi;
+	__le32 comp_val;
+	__le32 crc32;
+	__le32 crc_32_c;
+	__le16 crc16;
+	__le16 crc16_c;
+	__le16 crc10;
+	__le16 reserved;
+	__le16 xsum16;
+	__le16 xsum8;
+};
+
+enum dmae_cmd_comp_crc_en_enum {
+	dmae_cmd_comp_crc_disabled,
+	dmae_cmd_comp_crc_enabled,
+	MAX_DMAE_CMD_COMP_CRC_EN_ENUM
+};
+
+enum dmae_cmd_comp_func_enum {
+	dmae_cmd_comp_func_to_src,
+	dmae_cmd_comp_func_to_dst,
+	MAX_DMAE_CMD_COMP_FUNC_ENUM
+};
+
+enum dmae_cmd_comp_word_en_enum {
+	dmae_cmd_comp_word_disabled,
+	dmae_cmd_comp_word_enabled,
+	MAX_DMAE_CMD_COMP_WORD_EN_ENUM
+};
+
+enum dmae_cmd_c_dst_enum {
+	dmae_cmd_c_dst_pcie,
+	dmae_cmd_c_dst_grc,
+	MAX_DMAE_CMD_C_DST_ENUM
+};
+
+enum dmae_cmd_dst_enum {
+	dmae_cmd_dst_none_0,
+	dmae_cmd_dst_pcie,
+	dmae_cmd_dst_grc,
+	dmae_cmd_dst_none_3,
+	MAX_DMAE_CMD_DST_ENUM
+};
+
+enum dmae_cmd_error_handling_enum {
+	dmae_cmd_error_handling_send_regular_comp,
+	dmae_cmd_error_handling_send_comp_with_err,
+	dmae_cmd_error_handling_dont_send_comp,
+	MAX_DMAE_CMD_ERROR_HANDLING_ENUM
+};
+
+enum dmae_cmd_src_enum {
+	dmae_cmd_src_pcie,
+	dmae_cmd_src_grc,
+	MAX_DMAE_CMD_SRC_ENUM
+};
+
+/* IGU cleanup command */
+struct igu_cleanup {
+	__le32 sb_id_and_flags;
+#define IGU_CLEANUP_RESERVED0_MASK	0x7FFFFFF
+#define IGU_CLEANUP_RESERVED0_SHIFT	0
+#define IGU_CLEANUP_CLEANUP_SET_MASK	0x1
+#define IGU_CLEANUP_CLEANUP_SET_SHIFT	27
+#define IGU_CLEANUP_CLEANUP_TYPE_MASK	0x7
+#define IGU_CLEANUP_CLEANUP_TYPE_SHIFT	28
+#define IGU_CLEANUP_COMMAND_TYPE_MASK	0x1
+#define IGU_CLEANUP_COMMAND_TYPE_SHIFT	31
+	__le32 reserved1;
+};
+
+/* IGU firmware driver command */
+union igu_command {
+	struct igu_prod_cons_update prod_cons_update;
+	struct igu_cleanup cleanup;
+};
+
+/* IGU firmware driver command */
+struct igu_command_reg_ctrl {
+	__le16 opaque_fid;
+	__le16 igu_command_reg_ctrl_fields;
+#define IGU_COMMAND_REG_CTRL_PXP_BAR_ADDR_MASK	0xFFF
+#define IGU_COMMAND_REG_CTRL_PXP_BAR_ADDR_SHIFT	0
+#define IGU_COMMAND_REG_CTRL_RESERVED_MASK	0x7
+#define IGU_COMMAND_REG_CTRL_RESERVED_SHIFT	12
+#define IGU_COMMAND_REG_CTRL_COMMAND_TYPE_MASK	0x1
+#define IGU_COMMAND_REG_CTRL_COMMAND_TYPE_SHIFT	15
+};
+
+/* IGU mapping line structure */
+struct igu_mapping_line {
+	__le32 igu_mapping_line_fields;
+#define IGU_MAPPING_LINE_VALID_MASK		0x1
+#define IGU_MAPPING_LINE_VALID_SHIFT		0
+#define IGU_MAPPING_LINE_VECTOR_NUMBER_MASK	0xFF
+#define IGU_MAPPING_LINE_VECTOR_NUMBER_SHIFT	1
+#define IGU_MAPPING_LINE_FUNCTION_NUMBER_MASK	0xFF
+#define IGU_MAPPING_LINE_FUNCTION_NUMBER_SHIFT	9
+#define IGU_MAPPING_LINE_PF_VALID_MASK		0x1
+#define IGU_MAPPING_LINE_PF_VALID_SHIFT		17
+#define IGU_MAPPING_LINE_IPS_GROUP_MASK		0x3F
+#define IGU_MAPPING_LINE_IPS_GROUP_SHIFT	18
+#define IGU_MAPPING_LINE_RESERVED_MASK		0xFF
+#define IGU_MAPPING_LINE_RESERVED_SHIFT		24
+};
+
+/* IGU MSIX line structure */
+struct igu_msix_vector {
+	struct regpair address;
+	__le32 data;
+	__le32 msix_vector_fields;
+#define IGU_MSIX_VECTOR_MASK_BIT_MASK		0x1
+#define IGU_MSIX_VECTOR_MASK_BIT_SHIFT		0
+#define IGU_MSIX_VECTOR_RESERVED0_MASK		0x7FFF
+#define IGU_MSIX_VECTOR_RESERVED0_SHIFT		1
+#define IGU_MSIX_VECTOR_STEERING_TAG_MASK	0xFF
+#define IGU_MSIX_VECTOR_STEERING_TAG_SHIFT	16
+#define IGU_MSIX_VECTOR_RESERVED1_MASK		0xFF
+#define IGU_MSIX_VECTOR_RESERVED1_SHIFT		24
+};
+
+struct mstorm_core_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define MSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define MSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
+#define MSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
+	u8 flags1;
+#define MSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	0
+#define MSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	1
+#define MSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	2
+#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+/* per encapsulation type enabling flags */
+struct prs_reg_encapsulation_type_en {
+	u8 flags;
+#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GRE_ENABLE_MASK		0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GRE_ENABLE_SHIFT		0
+#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_MASK		0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_SHIFT		1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_MASK			0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_SHIFT		2
+#define PRS_REG_ENCAPSULATION_TYPE_EN_T_TAG_ENABLE_MASK			0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_T_TAG_ENABLE_SHIFT		3
+#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GENEVE_ENABLE_MASK	0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GENEVE_ENABLE_SHIFT	4
+#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_MASK	0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_SHIFT	5
+#define PRS_REG_ENCAPSULATION_TYPE_EN_RESERVED_MASK			0x3
+#define PRS_REG_ENCAPSULATION_TYPE_EN_RESERVED_SHIFT			6
+};
+
+enum pxp_tph_st_hint {
+	TPH_ST_HINT_BIDIR,
+	TPH_ST_HINT_REQUESTER,
+	TPH_ST_HINT_TARGET,
+	TPH_ST_HINT_TARGET_PRIO,
+	MAX_PXP_TPH_ST_HINT
+};
+
+/* QM hardware structure of enable bypass credit mask */
+struct qm_rf_bypass_mask {
+	u8 flags;
+#define QM_RF_BYPASS_MASK_LINEVOQ_MASK		0x1
+#define QM_RF_BYPASS_MASK_LINEVOQ_SHIFT		0
+#define QM_RF_BYPASS_MASK_RESERVED0_MASK	0x1
+#define QM_RF_BYPASS_MASK_RESERVED0_SHIFT	1
+#define QM_RF_BYPASS_MASK_PFWFQ_MASK		0x1
+#define QM_RF_BYPASS_MASK_PFWFQ_SHIFT		2
+#define QM_RF_BYPASS_MASK_VPWFQ_MASK		0x1
+#define QM_RF_BYPASS_MASK_VPWFQ_SHIFT		3
+#define QM_RF_BYPASS_MASK_PFRL_MASK		0x1
+#define QM_RF_BYPASS_MASK_PFRL_SHIFT		4
+#define QM_RF_BYPASS_MASK_VPQCNRL_MASK		0x1
+#define QM_RF_BYPASS_MASK_VPQCNRL_SHIFT		5
+#define QM_RF_BYPASS_MASK_FWPAUSE_MASK		0x1
+#define QM_RF_BYPASS_MASK_FWPAUSE_SHIFT		6
+#define QM_RF_BYPASS_MASK_RESERVED1_MASK	0x1
+#define QM_RF_BYPASS_MASK_RESERVED1_SHIFT	7
+};
+
+/* QM hardware structure of opportunistic credit mask */
+struct qm_rf_opportunistic_mask {
+	__le16 flags;
+#define QM_RF_OPPORTUNISTIC_MASK_LINEVOQ_MASK		0x1
+#define QM_RF_OPPORTUNISTIC_MASK_LINEVOQ_SHIFT		0
+#define QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ_MASK		0x1
+#define QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ_SHIFT		1
+#define QM_RF_OPPORTUNISTIC_MASK_PFWFQ_MASK		0x1
+#define QM_RF_OPPORTUNISTIC_MASK_PFWFQ_SHIFT		2
+#define QM_RF_OPPORTUNISTIC_MASK_VPWFQ_MASK		0x1
+#define QM_RF_OPPORTUNISTIC_MASK_VPWFQ_SHIFT		3
+#define QM_RF_OPPORTUNISTIC_MASK_PFRL_MASK		0x1
+#define QM_RF_OPPORTUNISTIC_MASK_PFRL_SHIFT		4
+#define QM_RF_OPPORTUNISTIC_MASK_VPQCNRL_MASK		0x1
+#define QM_RF_OPPORTUNISTIC_MASK_VPQCNRL_SHIFT		5
+#define QM_RF_OPPORTUNISTIC_MASK_FWPAUSE_MASK		0x1
+#define QM_RF_OPPORTUNISTIC_MASK_FWPAUSE_SHIFT		6
+#define QM_RF_OPPORTUNISTIC_MASK_RESERVED0_MASK		0x1
+#define QM_RF_OPPORTUNISTIC_MASK_RESERVED0_SHIFT	7
+#define QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY_MASK	0x1
+#define QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY_SHIFT	8
+#define QM_RF_OPPORTUNISTIC_MASK_RESERVED1_MASK		0x7F
+#define QM_RF_OPPORTUNISTIC_MASK_RESERVED1_SHIFT	9
+};
+
+/* QM hardware structure of QM map memory */
+struct qm_rf_pq_map {
+	__le32 reg;
+#define QM_RF_PQ_MAP_PQ_VALID_MASK		0x1
+#define QM_RF_PQ_MAP_PQ_VALID_SHIFT		0
+#define QM_RF_PQ_MAP_RL_ID_MASK			0xFF
+#define QM_RF_PQ_MAP_RL_ID_SHIFT		1
+#define QM_RF_PQ_MAP_VP_PQ_ID_MASK		0x1FF
+#define QM_RF_PQ_MAP_VP_PQ_ID_SHIFT		9
+#define QM_RF_PQ_MAP_VOQ_MASK			0x1F
+#define QM_RF_PQ_MAP_VOQ_SHIFT			18
+#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_MASK	0x3
+#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_SHIFT	23
+#define QM_RF_PQ_MAP_RL_VALID_MASK		0x1
+#define QM_RF_PQ_MAP_RL_VALID_SHIFT		25
+#define QM_RF_PQ_MAP_RESERVED_MASK		0x3F
+#define QM_RF_PQ_MAP_RESERVED_SHIFT		26
+};
+
+/* Completion params for aggregated interrupt completion */
+struct sdm_agg_int_comp_params {
+	__le16 params;
+#define SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_MASK	0x3F
+#define SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_SHIFT	0
+#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_MASK	0x1
+#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_SHIFT	6
+#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_MASK	0x1FF
+#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_SHIFT	7
 };
 
+/* SDM operation gen command (generate aggregative interrupt) */
+struct sdm_op_gen {
+	__le32 command;
+#define SDM_OP_GEN_COMP_PARAM_MASK	0xFFFF
+#define SDM_OP_GEN_COMP_PARAM_SHIFT	0
+#define SDM_OP_GEN_COMP_TYPE_MASK	0xF
+#define SDM_OP_GEN_COMP_TYPE_SHIFT	16
+#define SDM_OP_GEN_RESERVED_MASK	0xFFF
+#define SDM_OP_GEN_RESERVED_SHIFT	20
+};
+
+struct ystorm_core_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define YSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
+	u8 flags1;
+#define YSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	0
+#define YSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	1
+#define YSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	2
+#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le32 reg0;
+	__le32 reg1;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+
+/****************************************/
+/* Debug Tools HSI constants and macros */
+/****************************************/
+
 enum block_addr {
-	GRCBASE_GRC		= 0x50000,
-	GRCBASE_MISCS		= 0x9000,
-	GRCBASE_MISC		= 0x8000,
-	GRCBASE_DBU		= 0xa000,
-	GRCBASE_PGLUE_B		= 0x2a8000,
-	GRCBASE_CNIG		= 0x218000,
-	GRCBASE_CPMU		= 0x30000,
-	GRCBASE_NCSI		= 0x40000,
-	GRCBASE_OPTE		= 0x53000,
-	GRCBASE_BMB		= 0x540000,
-	GRCBASE_PCIE		= 0x54000,
-	GRCBASE_MCP		= 0xe00000,
-	GRCBASE_MCP2		= 0x52000,
-	GRCBASE_PSWHST		= 0x2a0000,
-	GRCBASE_PSWHST2		= 0x29e000,
-	GRCBASE_PSWRD		= 0x29c000,
-	GRCBASE_PSWRD2		= 0x29d000,
-	GRCBASE_PSWWR		= 0x29a000,
-	GRCBASE_PSWWR2		= 0x29b000,
-	GRCBASE_PSWRQ		= 0x280000,
-	GRCBASE_PSWRQ2		= 0x240000,
-	GRCBASE_PGLCS		= 0x0,
-	GRCBASE_PTU		= 0x560000,
-	GRCBASE_DMAE		= 0xc000,
-	GRCBASE_TCM		= 0x1180000,
-	GRCBASE_MCM		= 0x1200000,
-	GRCBASE_UCM		= 0x1280000,
-	GRCBASE_XCM		= 0x1000000,
-	GRCBASE_YCM		= 0x1080000,
-	GRCBASE_PCM		= 0x1100000,
-	GRCBASE_QM		= 0x2f0000,
-	GRCBASE_TM		= 0x2c0000,
-	GRCBASE_DORQ		= 0x100000,
-	GRCBASE_BRB		= 0x340000,
-	GRCBASE_SRC		= 0x238000,
-	GRCBASE_PRS		= 0x1f0000,
-	GRCBASE_TSDM		= 0xfb0000,
-	GRCBASE_MSDM		= 0xfc0000,
-	GRCBASE_USDM		= 0xfd0000,
-	GRCBASE_XSDM		= 0xf80000,
-	GRCBASE_YSDM		= 0xf90000,
-	GRCBASE_PSDM		= 0xfa0000,
-	GRCBASE_TSEM		= 0x1700000,
-	GRCBASE_MSEM		= 0x1800000,
-	GRCBASE_USEM		= 0x1900000,
-	GRCBASE_XSEM		= 0x1400000,
-	GRCBASE_YSEM		= 0x1500000,
-	GRCBASE_PSEM		= 0x1600000,
-	GRCBASE_RSS		= 0x238800,
-	GRCBASE_TMLD		= 0x4d0000,
-	GRCBASE_MULD		= 0x4e0000,
-	GRCBASE_YULD		= 0x4c8000,
-	GRCBASE_XYLD		= 0x4c0000,
-	GRCBASE_PRM		= 0x230000,
-	GRCBASE_PBF_PB1		= 0xda0000,
-	GRCBASE_PBF_PB2		= 0xda4000,
-	GRCBASE_RPB		= 0x23c000,
-	GRCBASE_BTB		= 0xdb0000,
-	GRCBASE_PBF		= 0xd80000,
-	GRCBASE_RDIF		= 0x300000,
-	GRCBASE_TDIF		= 0x310000,
-	GRCBASE_CDU		= 0x580000,
-	GRCBASE_CCFC		= 0x2e0000,
-	GRCBASE_TCFC		= 0x2d0000,
-	GRCBASE_IGU		= 0x180000,
-	GRCBASE_CAU		= 0x1c0000,
-	GRCBASE_UMAC		= 0x51000,
-	GRCBASE_XMAC		= 0x210000,
-	GRCBASE_DBG		= 0x10000,
-	GRCBASE_NIG		= 0x500000,
-	GRCBASE_WOL		= 0x600000,
-	GRCBASE_BMBN		= 0x610000,
-	GRCBASE_IPC		= 0x20000,
-	GRCBASE_NWM		= 0x800000,
-	GRCBASE_NWS		= 0x700000,
-	GRCBASE_MS		= 0x6a0000,
-	GRCBASE_PHY_PCIE	= 0x620000,
-	GRCBASE_MISC_AEU	= 0x8000,
-	GRCBASE_BAR0_MAP	= 0x1c00000,
+	GRCBASE_GRC = 0x50000,
+	GRCBASE_MISCS = 0x9000,
+	GRCBASE_MISC = 0x8000,
+	GRCBASE_DBU = 0xa000,
+	GRCBASE_PGLUE_B = 0x2a8000,
+	GRCBASE_CNIG = 0x218000,
+	GRCBASE_CPMU = 0x30000,
+	GRCBASE_NCSI = 0x40000,
+	GRCBASE_OPTE = 0x53000,
+	GRCBASE_BMB = 0x540000,
+	GRCBASE_PCIE = 0x54000,
+	GRCBASE_MCP = 0xe00000,
+	GRCBASE_MCP2 = 0x52000,
+	GRCBASE_PSWHST = 0x2a0000,
+	GRCBASE_PSWHST2 = 0x29e000,
+	GRCBASE_PSWRD = 0x29c000,
+	GRCBASE_PSWRD2 = 0x29d000,
+	GRCBASE_PSWWR = 0x29a000,
+	GRCBASE_PSWWR2 = 0x29b000,
+	GRCBASE_PSWRQ = 0x280000,
+	GRCBASE_PSWRQ2 = 0x240000,
+	GRCBASE_PGLCS = 0x0,
+	GRCBASE_DMAE = 0xc000,
+	GRCBASE_PTU = 0x560000,
+	GRCBASE_TCM = 0x1180000,
+	GRCBASE_MCM = 0x1200000,
+	GRCBASE_UCM = 0x1280000,
+	GRCBASE_XCM = 0x1000000,
+	GRCBASE_YCM = 0x1080000,
+	GRCBASE_PCM = 0x1100000,
+	GRCBASE_QM = 0x2f0000,
+	GRCBASE_TM = 0x2c0000,
+	GRCBASE_DORQ = 0x100000,
+	GRCBASE_BRB = 0x340000,
+	GRCBASE_SRC = 0x238000,
+	GRCBASE_PRS = 0x1f0000,
+	GRCBASE_TSDM = 0xfb0000,
+	GRCBASE_MSDM = 0xfc0000,
+	GRCBASE_USDM = 0xfd0000,
+	GRCBASE_XSDM = 0xf80000,
+	GRCBASE_YSDM = 0xf90000,
+	GRCBASE_PSDM = 0xfa0000,
+	GRCBASE_TSEM = 0x1700000,
+	GRCBASE_MSEM = 0x1800000,
+	GRCBASE_USEM = 0x1900000,
+	GRCBASE_XSEM = 0x1400000,
+	GRCBASE_YSEM = 0x1500000,
+	GRCBASE_PSEM = 0x1600000,
+	GRCBASE_RSS = 0x238800,
+	GRCBASE_TMLD = 0x4d0000,
+	GRCBASE_MULD = 0x4e0000,
+	GRCBASE_YULD = 0x4c8000,
+	GRCBASE_XYLD = 0x4c0000,
+	GRCBASE_PRM = 0x230000,
+	GRCBASE_PBF_PB1 = 0xda0000,
+	GRCBASE_PBF_PB2 = 0xda4000,
+	GRCBASE_RPB = 0x23c000,
+	GRCBASE_BTB = 0xdb0000,
+	GRCBASE_PBF = 0xd80000,
+	GRCBASE_RDIF = 0x300000,
+	GRCBASE_TDIF = 0x310000,
+	GRCBASE_CDU = 0x580000,
+	GRCBASE_CCFC = 0x2e0000,
+	GRCBASE_TCFC = 0x2d0000,
+	GRCBASE_IGU = 0x180000,
+	GRCBASE_CAU = 0x1c0000,
+	GRCBASE_UMAC = 0x51000,
+	GRCBASE_XMAC = 0x210000,
+	GRCBASE_DBG = 0x10000,
+	GRCBASE_NIG = 0x500000,
+	GRCBASE_WOL = 0x600000,
+	GRCBASE_BMBN = 0x610000,
+	GRCBASE_IPC = 0x20000,
+	GRCBASE_NWM = 0x800000,
+	GRCBASE_NWS = 0x700000,
+	GRCBASE_MS = 0x6a0000,
+	GRCBASE_PHY_PCIE = 0x620000,
+	GRCBASE_LED = 0x6b8000,
+	GRCBASE_MISC_AEU = 0x8000,
+	GRCBASE_BAR0_MAP = 0x1c00000,
 	MAX_BLOCK_ADDR
 };
 
@@ -879,8 +1364,8 @@ enum block_id {
 	BLOCK_PSWRQ,
 	BLOCK_PSWRQ2,
 	BLOCK_PGLCS,
-	BLOCK_PTU,
 	BLOCK_DMAE,
+	BLOCK_PTU,
 	BLOCK_TCM,
 	BLOCK_MCM,
 	BLOCK_UCM,
@@ -934,141 +1419,216 @@ enum block_id {
 	BLOCK_NWS,
 	BLOCK_MS,
 	BLOCK_PHY_PCIE,
+	BLOCK_LED,
 	BLOCK_MISC_AEU,
 	BLOCK_BAR0_MAP,
 	MAX_BLOCK_ID
 };
 
-enum command_type_bit {
-	IGU_COMMAND_TYPE_NOP	= 0,
-	IGU_COMMAND_TYPE_SET	= 1,
-	MAX_COMMAND_TYPE_BIT
+/* binary debug buffer types */
+enum bin_dbg_buffer_type {
+	BIN_BUF_DBG_MODE_TREE,
+	BIN_BUF_DBG_DUMP_REG,
+	BIN_BUF_DBG_DUMP_MEM,
+	BIN_BUF_DBG_IDLE_CHK_REGS,
+	BIN_BUF_DBG_IDLE_CHK_IMMS,
+	BIN_BUF_DBG_IDLE_CHK_RULES,
+	BIN_BUF_DBG_IDLE_CHK_PARSING_DATA,
+	BIN_BUF_DBG_ATTN_BLOCKS,
+	BIN_BUF_DBG_ATTN_REGS,
+	BIN_BUF_DBG_ATTN_INDEXES,
+	BIN_BUF_DBG_ATTN_NAME_OFFSETS,
+	BIN_BUF_DBG_PARSING_STRINGS,
+	MAX_BIN_DBG_BUFFER_TYPE
 };
 
-struct dmae_cmd {
-	__le32 opcode;
-#define DMAE_CMD_SRC_MASK              0x1
-#define DMAE_CMD_SRC_SHIFT             0
-#define DMAE_CMD_DST_MASK              0x3
-#define DMAE_CMD_DST_SHIFT             1
-#define DMAE_CMD_C_DST_MASK            0x1
-#define DMAE_CMD_C_DST_SHIFT           3
-#define DMAE_CMD_CRC_RESET_MASK        0x1
-#define DMAE_CMD_CRC_RESET_SHIFT       4
-#define DMAE_CMD_SRC_ADDR_RESET_MASK   0x1
-#define DMAE_CMD_SRC_ADDR_RESET_SHIFT  5
-#define DMAE_CMD_DST_ADDR_RESET_MASK   0x1
-#define DMAE_CMD_DST_ADDR_RESET_SHIFT  6
-#define DMAE_CMD_COMP_FUNC_MASK        0x1
-#define DMAE_CMD_COMP_FUNC_SHIFT       7
-#define DMAE_CMD_COMP_WORD_EN_MASK     0x1
-#define DMAE_CMD_COMP_WORD_EN_SHIFT    8
-#define DMAE_CMD_COMP_CRC_EN_MASK      0x1
-#define DMAE_CMD_COMP_CRC_EN_SHIFT     9
-#define DMAE_CMD_COMP_CRC_OFFSET_MASK  0x7
-#define DMAE_CMD_COMP_CRC_OFFSET_SHIFT 10
-#define DMAE_CMD_RESERVED1_MASK        0x1
-#define DMAE_CMD_RESERVED1_SHIFT       13
-#define DMAE_CMD_ENDIANITY_MODE_MASK   0x3
-#define DMAE_CMD_ENDIANITY_MODE_SHIFT  14
-#define DMAE_CMD_ERR_HANDLING_MASK     0x3
-#define DMAE_CMD_ERR_HANDLING_SHIFT    16
-#define DMAE_CMD_PORT_ID_MASK          0x3
-#define DMAE_CMD_PORT_ID_SHIFT         18
-#define DMAE_CMD_SRC_PF_ID_MASK        0xF
-#define DMAE_CMD_SRC_PF_ID_SHIFT       20
-#define DMAE_CMD_DST_PF_ID_MASK        0xF
-#define DMAE_CMD_DST_PF_ID_SHIFT       24
-#define DMAE_CMD_SRC_VF_ID_VALID_MASK  0x1
-#define DMAE_CMD_SRC_VF_ID_VALID_SHIFT 28
-#define DMAE_CMD_DST_VF_ID_VALID_MASK  0x1
-#define DMAE_CMD_DST_VF_ID_VALID_SHIFT 29
-#define DMAE_CMD_RESERVED2_MASK        0x3
-#define DMAE_CMD_RESERVED2_SHIFT       30
-	__le32	src_addr_lo;
-	__le32	src_addr_hi;
-	__le32	dst_addr_lo;
-	__le32	dst_addr_hi;
-	__le16	length /* Length in DW */;
-	__le16	opcode_b;
-#define DMAE_CMD_SRC_VF_ID_MASK        0xFF     /* Source VF id */
-#define DMAE_CMD_SRC_VF_ID_SHIFT       0
-#define DMAE_CMD_DST_VF_ID_MASK        0xFF     /* Destination VF id */
-#define DMAE_CMD_DST_VF_ID_SHIFT       8
-	__le32	comp_addr_lo /* PCIe completion address low or grc address */;
-	__le32	comp_addr_hi;
-	__le32	comp_val /* Value to write to copmletion address */;
-	__le32	crc32 /* crc16 result */;
-	__le32	crc_32_c /* crc32_c result */;
-	__le16	crc16 /* crc16 result */;
-	__le16	crc16_c /* crc16_c result */;
-	__le16	crc10 /* crc_t10 result */;
-	__le16	reserved;
-	__le16	xsum16 /* checksum16 result  */;
-	__le16	xsum8 /* checksum8 result  */;
+/* Chip IDs */
+enum chip_ids {
+	CHIP_RESERVED,
+	CHIP_BB_B0,
+	CHIP_RESERVED2,
+	MAX_CHIP_IDS
 };
 
-struct igu_cleanup {
-	__le32 sb_id_and_flags;
-#define IGU_CLEANUP_RESERVED0_MASK     0x7FFFFFF
-#define IGU_CLEANUP_RESERVED0_SHIFT    0
-#define IGU_CLEANUP_CLEANUP_SET_MASK   0x1 /* cleanup clear - 0, set - 1 */
-#define IGU_CLEANUP_CLEANUP_SET_SHIFT  27
-#define IGU_CLEANUP_CLEANUP_TYPE_MASK  0x7
-#define IGU_CLEANUP_CLEANUP_TYPE_SHIFT 28
-#define IGU_CLEANUP_COMMAND_TYPE_MASK  0x1
-#define IGU_CLEANUP_COMMAND_TYPE_SHIFT 31
-	__le32 reserved1;
+/* Attention bit mapping */
+struct dbg_attn_bit_mapping {
+	__le16 data;
+#define DBG_ATTN_BIT_MAPPING_VAL_MASK			0x7FFF
+#define DBG_ATTN_BIT_MAPPING_VAL_SHIFT			0
+#define DBG_ATTN_BIT_MAPPING_IS_UNUSED_BIT_CNT_MASK	0x1
+#define DBG_ATTN_BIT_MAPPING_IS_UNUSED_BIT_CNT_SHIFT	15
 };
 
-union igu_command {
-	struct igu_prod_cons_update	prod_cons_update;
-	struct igu_cleanup		cleanup;
+/* Attention block per-type data */
+struct dbg_attn_block_type_data {
+	__le16 names_offset;
+	__le16 reserved1;
+	u8 num_regs;
+	u8 reserved2;
+	__le16 regs_offset;
 };
 
-struct igu_command_reg_ctrl {
-	__le16	opaque_fid;
-	__le16	igu_command_reg_ctrl_fields;
-#define IGU_COMMAND_REG_CTRL_PXP_BAR_ADDR_MASK  0xFFF
-#define IGU_COMMAND_REG_CTRL_PXP_BAR_ADDR_SHIFT 0
-#define IGU_COMMAND_REG_CTRL_RESERVED_MASK      0x7
-#define IGU_COMMAND_REG_CTRL_RESERVED_SHIFT     12
-#define IGU_COMMAND_REG_CTRL_COMMAND_TYPE_MASK  0x1
-#define IGU_COMMAND_REG_CTRL_COMMAND_TYPE_SHIFT 15
+/* Block attentions */
+struct dbg_attn_block {
+	struct dbg_attn_block_type_data per_type_data[2];
 };
 
-struct igu_mapping_line {
-	__le32 igu_mapping_line_fields;
-#define IGU_MAPPING_LINE_VALID_MASK            0x1
-#define IGU_MAPPING_LINE_VALID_SHIFT           0
-#define IGU_MAPPING_LINE_VECTOR_NUMBER_MASK    0xFF
-#define IGU_MAPPING_LINE_VECTOR_NUMBER_SHIFT   1
-#define IGU_MAPPING_LINE_FUNCTION_NUMBER_MASK  0xFF
-#define IGU_MAPPING_LINE_FUNCTION_NUMBER_SHIFT 9
-#define IGU_MAPPING_LINE_PF_VALID_MASK         0x1      /* PF-1, VF-0 */
-#define IGU_MAPPING_LINE_PF_VALID_SHIFT        17
-#define IGU_MAPPING_LINE_IPS_GROUP_MASK        0x3F
-#define IGU_MAPPING_LINE_IPS_GROUP_SHIFT       18
-#define IGU_MAPPING_LINE_RESERVED_MASK         0xFF
-#define IGU_MAPPING_LINE_RESERVED_SHIFT        24
+/* Attention register result */
+struct dbg_attn_reg_result {
+	__le32 data;
+#define DBG_ATTN_REG_RESULT_STS_ADDRESS_MASK	0xFFFFFF
+#define DBG_ATTN_REG_RESULT_STS_ADDRESS_SHIFT	0
+#define DBG_ATTN_REG_RESULT_NUM_ATTN_IDX_MASK	0xFF
+#define DBG_ATTN_REG_RESULT_NUM_ATTN_IDX_SHIFT	24
+	__le16 attn_idx_offset;
+	__le16 reserved;
+	__le32 sts_val;
+	__le32 mask_val;
+};
+
+/* Attention block result */
+struct dbg_attn_block_result {
+	u8 block_id;
+	u8 data;
+#define DBG_ATTN_BLOCK_RESULT_ATTN_TYPE_MASK	0x3
+#define DBG_ATTN_BLOCK_RESULT_ATTN_TYPE_SHIFT	0
+#define DBG_ATTN_BLOCK_RESULT_NUM_REGS_MASK	0x3F
+#define DBG_ATTN_BLOCK_RESULT_NUM_REGS_SHIFT	2
+	__le16 names_offset;
+	struct dbg_attn_reg_result reg_results[15];
+};
+
+/* mode header */
+struct dbg_mode_hdr {
+	__le16 data;
+#define DBG_MODE_HDR_EVAL_MODE_MASK		0x1
+#define DBG_MODE_HDR_EVAL_MODE_SHIFT		0
+#define DBG_MODE_HDR_MODES_BUF_OFFSET_MASK	0x7FFF
+#define DBG_MODE_HDR_MODES_BUF_OFFSET_SHIFT	1
+};
+
+/* Attention register */
+struct dbg_attn_reg {
+	struct dbg_mode_hdr mode;
+	__le16 attn_idx_offset;
+	__le32 data;
+#define DBG_ATTN_REG_STS_ADDRESS_MASK	0xFFFFFF
+#define DBG_ATTN_REG_STS_ADDRESS_SHIFT	0
+#define DBG_ATTN_REG_NUM_ATTN_IDX_MASK	0xFF
+#define DBG_ATTN_REG_NUM_ATTN_IDX_SHIFT	24
+	__le32 sts_clr_address;
+	__le32 mask_address;
+};
+
+/* attention types */
+enum dbg_attn_type {
+	ATTN_TYPE_INTERRUPT,
+	ATTN_TYPE_PARITY,
+	MAX_DBG_ATTN_TYPE
+};
+
+/* Debug status codes */
+enum dbg_status {
+	DBG_STATUS_OK,
+	DBG_STATUS_APP_VERSION_NOT_SET,
+	DBG_STATUS_UNSUPPORTED_APP_VERSION,
+	DBG_STATUS_DBG_BLOCK_NOT_RESET,
+	DBG_STATUS_INVALID_ARGS,
+	DBG_STATUS_OUTPUT_ALREADY_SET,
+	DBG_STATUS_INVALID_PCI_BUF_SIZE,
+	DBG_STATUS_PCI_BUF_ALLOC_FAILED,
+	DBG_STATUS_PCI_BUF_NOT_ALLOCATED,
+	DBG_STATUS_TOO_MANY_INPUTS,
+	DBG_STATUS_INPUT_OVERLAP,
+	DBG_STATUS_HW_ONLY_RECORDING,
+	DBG_STATUS_STORM_ALREADY_ENABLED,
+	DBG_STATUS_STORM_NOT_ENABLED,
+	DBG_STATUS_BLOCK_ALREADY_ENABLED,
+	DBG_STATUS_BLOCK_NOT_ENABLED,
+	DBG_STATUS_NO_INPUT_ENABLED,
+	DBG_STATUS_NO_FILTER_TRIGGER_64B,
+	DBG_STATUS_FILTER_ALREADY_ENABLED,
+	DBG_STATUS_TRIGGER_ALREADY_ENABLED,
+	DBG_STATUS_TRIGGER_NOT_ENABLED,
+	DBG_STATUS_CANT_ADD_CONSTRAINT,
+	DBG_STATUS_TOO_MANY_TRIGGER_STATES,
+	DBG_STATUS_TOO_MANY_CONSTRAINTS,
+	DBG_STATUS_RECORDING_NOT_STARTED,
+	DBG_STATUS_DATA_DIDNT_TRIGGER,
+	DBG_STATUS_NO_DATA_RECORDED,
+	DBG_STATUS_DUMP_BUF_TOO_SMALL,
+	DBG_STATUS_DUMP_NOT_CHUNK_ALIGNED,
+	DBG_STATUS_UNKNOWN_CHIP,
+	DBG_STATUS_VIRT_MEM_ALLOC_FAILED,
+	DBG_STATUS_BLOCK_IN_RESET,
+	DBG_STATUS_INVALID_TRACE_SIGNATURE,
+	DBG_STATUS_INVALID_NVRAM_BUNDLE,
+	DBG_STATUS_NVRAM_GET_IMAGE_FAILED,
+	DBG_STATUS_NON_ALIGNED_NVRAM_IMAGE,
+	DBG_STATUS_NVRAM_READ_FAILED,
+	DBG_STATUS_IDLE_CHK_PARSE_FAILED,
+	DBG_STATUS_MCP_TRACE_BAD_DATA,
+	DBG_STATUS_MCP_TRACE_NO_META,
+	DBG_STATUS_MCP_COULD_NOT_HALT,
+	DBG_STATUS_MCP_COULD_NOT_RESUME,
+	DBG_STATUS_DMAE_FAILED,
+	DBG_STATUS_SEMI_FIFO_NOT_EMPTY,
+	DBG_STATUS_IGU_FIFO_BAD_DATA,
+	DBG_STATUS_MCP_COULD_NOT_MASK_PRTY,
+	DBG_STATUS_FW_ASSERTS_PARSE_FAILED,
+	DBG_STATUS_REG_FIFO_BAD_DATA,
+	DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA,
+	DBG_STATUS_DBG_ARRAY_NOT_SET,
+	MAX_DBG_STATUS
 };
 
-struct igu_msix_vector {
-	struct regpair	address;
-	__le32		data;
-	__le32		msix_vector_fields;
-#define IGU_MSIX_VECTOR_MASK_BIT_MASK      0x1
-#define IGU_MSIX_VECTOR_MASK_BIT_SHIFT     0
-#define IGU_MSIX_VECTOR_RESERVED0_MASK     0x7FFF
-#define IGU_MSIX_VECTOR_RESERVED0_SHIFT    1
-#define IGU_MSIX_VECTOR_STEERING_TAG_MASK  0xFF
-#define IGU_MSIX_VECTOR_STEERING_TAG_SHIFT 16
-#define IGU_MSIX_VECTOR_RESERVED1_MASK     0xFF
-#define IGU_MSIX_VECTOR_RESERVED1_SHIFT    24
+/********************************/
+/* HSI Init Functions constants */
+/********************************/
+
+/* Number of VLAN priorities */
+#define NUM_OF_VLAN_PRIORITIES	8
+
+/* QM per-port init parameters */
+struct init_qm_port_params {
+	u8 active;
+	u8 active_phys_tcs;
+	__le16 num_pbf_cmd_lines;
+	__le16 num_btb_blocks;
+	__le16 reserved;
+};
+
+/* QM per-PQ init parameters */
+struct init_qm_pq_params {
+	u8 vport_id;
+	u8 tc_id;
+	u8 wrr_group;
+	u8 rl_valid;
+};
+
+/* QM per-vport init parameters */
+struct init_qm_vport_params {
+	__le32 vport_rl;
+	__le16 vport_wfq;
+	__le16 first_tx_pq_id[NUM_OF_TCS];
 };
 
+/**************************************/
+/* Init Tool HSI constants and macros */
+/**************************************/
+
+/* Width of GRC address in bits (addresses are specified in dwords) */
+#define GRC_ADDR_BITS	23
+#define MAX_GRC_ADDR	((1 << GRC_ADDR_BITS) - 1)
+
+/* indicates an init that should be applied to any phase ID */
+#define ANY_PHASE_ID	0xffff
+
+/* Max size in dwords of a zipped array */
+#define MAX_ZIPPED_SIZE	8192
+
 enum init_modes {
-	MODE_BB_A0,
+	MODE_RESERVED,
 	MODE_BB_B0,
 	MODE_RESERVED2,
 	MODE_ASIC,
@@ -1083,7 +1643,8 @@ enum init_modes {
 	MODE_PORTS_PER_ENG_2,
 	MODE_PORTS_PER_ENG_4,
 	MODE_100G,
-	MODE_EAGLE_ENG1_WORKAROUND,
+	MODE_40G,
+	MODE_RESERVED7,
 	MAX_INIT_MODES
 };
 
@@ -1096,484 +1657,302 @@ enum init_phases {
 	MAX_INIT_PHASES
 };
 
-/* per encapsulation type enabling flags */
-struct prs_reg_encapsulation_type_en {
-	u8 flags;
-#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GRE_ENABLE_MASK     0x1
-#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GRE_ENABLE_SHIFT    0
-#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_MASK      0x1
-#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_SHIFT     1
-#define PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_MASK            0x1
-#define PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_SHIFT           2
-#define PRS_REG_ENCAPSULATION_TYPE_EN_T_TAG_ENABLE_MASK            0x1
-#define PRS_REG_ENCAPSULATION_TYPE_EN_T_TAG_ENABLE_SHIFT           3
-#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GENEVE_ENABLE_MASK  0x1
-#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GENEVE_ENABLE_SHIFT 4
-#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_MASK   0x1
-#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_SHIFT  5
-#define PRS_REG_ENCAPSULATION_TYPE_EN_RESERVED_MASK                0x3
-#define PRS_REG_ENCAPSULATION_TYPE_EN_RESERVED_SHIFT               6
-};
-
-enum pxp_tph_st_hint {
-	TPH_ST_HINT_BIDIR /* Read/Write access by Host and Device */,
-	TPH_ST_HINT_REQUESTER /* Read/Write access by Device */,
-	TPH_ST_HINT_TARGET,
-	TPH_ST_HINT_TARGET_PRIO,
-	MAX_PXP_TPH_ST_HINT
-};
-
-/* QM hardware structure of enable bypass credit mask */
-struct qm_rf_bypass_mask {
-	u8 flags;
-#define QM_RF_BYPASS_MASK_LINEVOQ_MASK    0x1
-#define QM_RF_BYPASS_MASK_LINEVOQ_SHIFT   0
-#define QM_RF_BYPASS_MASK_RESERVED0_MASK  0x1
-#define QM_RF_BYPASS_MASK_RESERVED0_SHIFT 1
-#define QM_RF_BYPASS_MASK_PFWFQ_MASK      0x1
-#define QM_RF_BYPASS_MASK_PFWFQ_SHIFT     2
-#define QM_RF_BYPASS_MASK_VPWFQ_MASK      0x1
-#define QM_RF_BYPASS_MASK_VPWFQ_SHIFT     3
-#define QM_RF_BYPASS_MASK_PFRL_MASK       0x1
-#define QM_RF_BYPASS_MASK_PFRL_SHIFT      4
-#define QM_RF_BYPASS_MASK_VPQCNRL_MASK    0x1
-#define QM_RF_BYPASS_MASK_VPQCNRL_SHIFT   5
-#define QM_RF_BYPASS_MASK_FWPAUSE_MASK    0x1
-#define QM_RF_BYPASS_MASK_FWPAUSE_SHIFT   6
-#define QM_RF_BYPASS_MASK_RESERVED1_MASK  0x1
-#define QM_RF_BYPASS_MASK_RESERVED1_SHIFT 7
-};
-
-/* QM hardware structure of opportunistic credit mask */
-struct qm_rf_opportunistic_mask {
-	__le16 flags;
-#define QM_RF_OPPORTUNISTIC_MASK_LINEVOQ_MASK     0x1
-#define QM_RF_OPPORTUNISTIC_MASK_LINEVOQ_SHIFT    0
-#define QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ_MASK     0x1
-#define QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ_SHIFT    1
-#define QM_RF_OPPORTUNISTIC_MASK_PFWFQ_MASK       0x1
-#define QM_RF_OPPORTUNISTIC_MASK_PFWFQ_SHIFT      2
-#define QM_RF_OPPORTUNISTIC_MASK_VPWFQ_MASK       0x1
-#define QM_RF_OPPORTUNISTIC_MASK_VPWFQ_SHIFT      3
-#define QM_RF_OPPORTUNISTIC_MASK_PFRL_MASK        0x1
-#define QM_RF_OPPORTUNISTIC_MASK_PFRL_SHIFT       4
-#define QM_RF_OPPORTUNISTIC_MASK_VPQCNRL_MASK     0x1
-#define QM_RF_OPPORTUNISTIC_MASK_VPQCNRL_SHIFT    5
-#define QM_RF_OPPORTUNISTIC_MASK_FWPAUSE_MASK     0x1
-#define QM_RF_OPPORTUNISTIC_MASK_FWPAUSE_SHIFT    6
-#define QM_RF_OPPORTUNISTIC_MASK_RESERVED0_MASK   0x1
-#define QM_RF_OPPORTUNISTIC_MASK_RESERVED0_SHIFT  7
-#define QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY_MASK  0x1
-#define QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY_SHIFT 8
-#define QM_RF_OPPORTUNISTIC_MASK_RESERVED1_MASK   0x7F
-#define QM_RF_OPPORTUNISTIC_MASK_RESERVED1_SHIFT  9
-};
-
-/* QM hardware structure of QM map memory */
-struct qm_rf_pq_map {
-	u32 reg;
-#define QM_RF_PQ_MAP_PQ_VALID_MASK          0x1         /* PQ active */
-#define QM_RF_PQ_MAP_PQ_VALID_SHIFT         0
-#define QM_RF_PQ_MAP_RL_ID_MASK             0xFF        /* RL ID */
-#define QM_RF_PQ_MAP_RL_ID_SHIFT            1
-#define QM_RF_PQ_MAP_VP_PQ_ID_MASK          0x1FF
-#define QM_RF_PQ_MAP_VP_PQ_ID_SHIFT         9
-#define QM_RF_PQ_MAP_VOQ_MASK               0x1F        /* VOQ */
-#define QM_RF_PQ_MAP_VOQ_SHIFT              18
-#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_MASK  0x3         /* WRR weight */
-#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_SHIFT 23
-#define QM_RF_PQ_MAP_RL_VALID_MASK          0x1         /* RL active */
-#define QM_RF_PQ_MAP_RL_VALID_SHIFT         25
-#define QM_RF_PQ_MAP_RESERVED_MASK          0x3F
-#define QM_RF_PQ_MAP_RESERVED_SHIFT         26
-};
-
-/* Completion params for aggregated interrupt completion */
-struct sdm_agg_int_comp_params {
-	__le16 params;
-#define SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_MASK      0x3F
-#define SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_SHIFT     0
-#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_MASK  0x1
-#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_SHIFT 6
-#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_MASK     0x1FF
-#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_SHIFT    7
-};
-
-/* SDM operation gen command (generate aggregative interrupt) */
-struct sdm_op_gen {
-	__le32 command;
-#define SDM_OP_GEN_COMP_PARAM_MASK  0xFFFF      /* completion parameters 0-15 */
-#define SDM_OP_GEN_COMP_PARAM_SHIFT 0
-#define SDM_OP_GEN_COMP_TYPE_MASK   0xF         /* completion type 16-19 */
-#define SDM_OP_GEN_COMP_TYPE_SHIFT  16
-#define SDM_OP_GEN_RESERVED_MASK    0xFFF       /* reserved 20-31 */
-#define SDM_OP_GEN_RESERVED_SHIFT   20
+enum init_split_types {
+	SPLIT_TYPE_NONE,
+	SPLIT_TYPE_PORT,
+	SPLIT_TYPE_PF,
+	SPLIT_TYPE_PORT_PF,
+	SPLIT_TYPE_VF,
+	MAX_INIT_SPLIT_TYPES
 };
 
-/*********************************** Init ************************************/
-
-/* Width of GRC address in bits (addresses are specified in dwords) */
-#define GRC_ADDR_BITS                   23
-#define MAX_GRC_ADDR                    ((1 << GRC_ADDR_BITS) - 1)
-
-/* indicates an init that should be applied to any phase ID */
-#define ANY_PHASE_ID                    0xffff
-
-/* init pattern size in bytes */
-#define INIT_PATTERN_SIZE_BITS  4
-#define MAX_INIT_PATTERN_SIZE	BIT(INIT_PATTERN_SIZE_BITS)
-
-/* Max size in dwords of a zipped array */
-#define MAX_ZIPPED_SIZE                 8192
-
-/* Global PXP window */
-#define NUM_OF_PXP_WIN                  19
-#define PXP_WIN_DWORD_SIZE_BITS 10
-#define PXP_WIN_DWORD_SIZE		BIT(PXP_WIN_DWORD_SIZE_BITS)
-#define PXP_WIN_BYTE_SIZE_BITS  (PXP_WIN_DWORD_SIZE_BITS + 2)
-#define PXP_WIN_BYTE_SIZE               (PXP_WIN_DWORD_SIZE * 4)
-
-/********************************* GRC Dump **********************************/
-
-/* width of GRC dump register sequence length in bits */
-#define DUMP_SEQ_LEN_BITS                       8
-#define DUMP_SEQ_LEN_MAX_VAL            ((1 << DUMP_SEQ_LEN_BITS) - 1)
-
-/* width of GRC dump memory length in bits */
-#define DUMP_MEM_LEN_BITS                       18
-#define DUMP_MEM_LEN_MAX_VAL            ((1 << DUMP_MEM_LEN_BITS) - 1)
-
-/* width of register type ID in bits */
-#define REG_TYPE_ID_BITS                        6
-#define REG_TYPE_ID_MAX_VAL                     ((1 << REG_TYPE_ID_BITS) - 1)
-
-/* width of block ID in bits */
-#define BLOCK_ID_BITS                           8
-#define BLOCK_ID_MAX_VAL                        ((1 << BLOCK_ID_BITS) - 1)
-
-/******************************** Idle Check *********************************/
-
-/* max number of idle check predicate immediates */
-#define MAX_IDLE_CHK_PRED_IMM           3
-
-/* max number of idle check argument registers */
-#define MAX_IDLE_CHK_READ_REGS          3
-
-/* max number of idle check loops */
-#define MAX_IDLE_CHK_LOOPS                      0x10000
-
-/* max idle check address increment */
-#define MAX_IDLE_CHK_INCREMENT          0x10000
-
-/* inicates an undefined idle check line index */
-#define IDLE_CHK_UNDEFINED_LINE_IDX     0xffffff
-
-/* max number of register values following the idle check header */
-#define IDLE_CHK_MAX_DUMP_REGS          2
-
-/* arguments for IDLE_CHK_MACRO_TYPE_QM_RD_WR */
-#define IDLE_CHK_QM_RD_WR_PTR           0
-#define IDLE_CHK_QM_RD_WR_BANK          1
-
-/**************************************/
-/* HSI Functions constants and macros */
-/**************************************/
-
-/* Number of VLAN priorities */
-#define NUM_OF_VLAN_PRIORITIES                  8
-
-/* the MCP Trace meta data signautre is duplicated in the perl script that
- * generats the NVRAM images.
- */
-#define MCP_TRACE_META_IMAGE_SIGNATURE  0x669955aa
-
 /* Binary buffer header */
 struct bin_buffer_hdr {
-	u32	offset;
-	u32	length /* buffer length in bytes */;
-};
-
-/* binary buffer types */
-enum bin_buffer_type {
-	BIN_BUF_FW_VER_INFO /* fw_ver_info struct */,
-	BIN_BUF_INIT_CMD /* init commands */,
-	BIN_BUF_INIT_VAL /* init data */,
-	BIN_BUF_INIT_MODE_TREE /* init modes tree */,
-	BIN_BUF_IRO /* internal RAM offsets array */,
-	MAX_BIN_BUFFER_TYPE
+	__le32 offset;
+	__le32 length;
 };
 
-/* Chip IDs */
-enum chip_ids {
-	CHIP_BB_A0 /* BB A0 chip ID */,
-	CHIP_BB_B0 /* BB B0 chip ID */,
-	CHIP_K2 /* AH chip ID */,
-	MAX_CHIP_IDS
+/* binary init buffer types */
+enum bin_init_buffer_type {
+	BIN_BUF_FW_VER_INFO,
+	BIN_BUF_INIT_CMD,
+	BIN_BUF_INIT_VAL,
+	BIN_BUF_INIT_MODE_TREE,
+	BIN_BUF_IRO,
+	MAX_BIN_INIT_BUFFER_TYPE
 };
 
+/* init array header: raw */
 struct init_array_raw_hdr {
 	__le32 data;
-#define INIT_ARRAY_RAW_HDR_TYPE_MASK    0xF
-#define INIT_ARRAY_RAW_HDR_TYPE_SHIFT   0
-#define INIT_ARRAY_RAW_HDR_PARAMS_MASK  0xFFFFFFF       /* init array params */
-#define INIT_ARRAY_RAW_HDR_PARAMS_SHIFT 4
+#define INIT_ARRAY_RAW_HDR_TYPE_MASK	0xF
+#define INIT_ARRAY_RAW_HDR_TYPE_SHIFT	0
+#define INIT_ARRAY_RAW_HDR_PARAMS_MASK	0xFFFFFFF
+#define INIT_ARRAY_RAW_HDR_PARAMS_SHIFT	4
 };
 
+/* init array header: standard */
 struct init_array_standard_hdr {
 	__le32 data;
-#define INIT_ARRAY_STANDARD_HDR_TYPE_MASK  0xF
-#define INIT_ARRAY_STANDARD_HDR_TYPE_SHIFT 0
-#define INIT_ARRAY_STANDARD_HDR_SIZE_MASK  0xFFFFFFF
-#define INIT_ARRAY_STANDARD_HDR_SIZE_SHIFT 4
+#define INIT_ARRAY_STANDARD_HDR_TYPE_MASK	0xF
+#define INIT_ARRAY_STANDARD_HDR_TYPE_SHIFT	0
+#define INIT_ARRAY_STANDARD_HDR_SIZE_MASK	0xFFFFFFF
+#define INIT_ARRAY_STANDARD_HDR_SIZE_SHIFT	4
 };
 
+/* init array header: zipped */
 struct init_array_zipped_hdr {
 	__le32 data;
-#define INIT_ARRAY_ZIPPED_HDR_TYPE_MASK         0xF
-#define INIT_ARRAY_ZIPPED_HDR_TYPE_SHIFT        0
-#define INIT_ARRAY_ZIPPED_HDR_ZIPPED_SIZE_MASK  0xFFFFFFF
-#define INIT_ARRAY_ZIPPED_HDR_ZIPPED_SIZE_SHIFT 4
+#define INIT_ARRAY_ZIPPED_HDR_TYPE_MASK		0xF
+#define INIT_ARRAY_ZIPPED_HDR_TYPE_SHIFT	0
+#define INIT_ARRAY_ZIPPED_HDR_ZIPPED_SIZE_MASK	0xFFFFFFF
+#define INIT_ARRAY_ZIPPED_HDR_ZIPPED_SIZE_SHIFT	4
 };
 
+/* init array header: pattern */
 struct init_array_pattern_hdr {
 	__le32 data;
-#define INIT_ARRAY_PATTERN_HDR_TYPE_MASK          0xF
-#define INIT_ARRAY_PATTERN_HDR_TYPE_SHIFT         0
-#define INIT_ARRAY_PATTERN_HDR_PATTERN_SIZE_MASK  0xF
-#define INIT_ARRAY_PATTERN_HDR_PATTERN_SIZE_SHIFT 4
-#define INIT_ARRAY_PATTERN_HDR_REPETITIONS_MASK   0xFFFFFF
-#define INIT_ARRAY_PATTERN_HDR_REPETITIONS_SHIFT  8
+#define INIT_ARRAY_PATTERN_HDR_TYPE_MASK		0xF
+#define INIT_ARRAY_PATTERN_HDR_TYPE_SHIFT		0
+#define INIT_ARRAY_PATTERN_HDR_PATTERN_SIZE_MASK	0xF
+#define INIT_ARRAY_PATTERN_HDR_PATTERN_SIZE_SHIFT	4
+#define INIT_ARRAY_PATTERN_HDR_REPETITIONS_MASK		0xFFFFFF
+#define INIT_ARRAY_PATTERN_HDR_REPETITIONS_SHIFT	8
 };
 
+/* init array header union */
 union init_array_hdr {
-	struct init_array_raw_hdr	raw /* raw init array header */;
-	struct init_array_standard_hdr	standard;
-	struct init_array_zipped_hdr	zipped /* zipped init array header */;
-	struct init_array_pattern_hdr	pattern /* pattern init array header */;
+	struct init_array_raw_hdr raw;
+	struct init_array_standard_hdr standard;
+	struct init_array_zipped_hdr zipped;
+	struct init_array_pattern_hdr pattern;
 };
 
+/* init array types */
 enum init_array_types {
-	INIT_ARR_STANDARD /* standard init array */,
-	INIT_ARR_ZIPPED /* zipped init array */,
-	INIT_ARR_PATTERN /* a repeated pattern */,
+	INIT_ARR_STANDARD,
+	INIT_ARR_ZIPPED,
+	INIT_ARR_PATTERN,
 	MAX_INIT_ARRAY_TYPES
 };
 
 /* init operation: callback */
 struct init_callback_op {
-	__le32	op_data;
-#define INIT_CALLBACK_OP_OP_MASK        0xF
-#define INIT_CALLBACK_OP_OP_SHIFT       0
-#define INIT_CALLBACK_OP_RESERVED_MASK  0xFFFFFFF
-#define INIT_CALLBACK_OP_RESERVED_SHIFT 4
-	__le16	callback_id /* Callback ID */;
-	__le16	block_id /* Blocks ID */;
+	__le32 op_data;
+#define INIT_CALLBACK_OP_OP_MASK	0xF
+#define INIT_CALLBACK_OP_OP_SHIFT	0
+#define INIT_CALLBACK_OP_RESERVED_MASK	0xFFFFFFF
+#define INIT_CALLBACK_OP_RESERVED_SHIFT	4
+	__le16 callback_id;
+	__le16 block_id;
 };
 
 /* init operation: delay */
 struct init_delay_op {
-	__le32	op_data;
-#define INIT_DELAY_OP_OP_MASK        0xF
-#define INIT_DELAY_OP_OP_SHIFT       0
-#define INIT_DELAY_OP_RESERVED_MASK  0xFFFFFFF
-#define INIT_DELAY_OP_RESERVED_SHIFT 4
-	__le32	delay /* delay in us */;
+	__le32 op_data;
+#define INIT_DELAY_OP_OP_MASK		0xF
+#define INIT_DELAY_OP_OP_SHIFT		0
+#define INIT_DELAY_OP_RESERVED_MASK	0xFFFFFFF
+#define INIT_DELAY_OP_RESERVED_SHIFT	4
+	__le32 delay;
 };
 
 /* init operation: if_mode */
 struct init_if_mode_op {
 	__le32 op_data;
-#define INIT_IF_MODE_OP_OP_MASK          0xF
-#define INIT_IF_MODE_OP_OP_SHIFT         0
-#define INIT_IF_MODE_OP_RESERVED1_MASK   0xFFF
-#define INIT_IF_MODE_OP_RESERVED1_SHIFT  4
-#define INIT_IF_MODE_OP_CMD_OFFSET_MASK  0xFFFF
-#define INIT_IF_MODE_OP_CMD_OFFSET_SHIFT 16
-	__le16	reserved2;
-	__le16	modes_buf_offset;
+#define INIT_IF_MODE_OP_OP_MASK			0xF
+#define INIT_IF_MODE_OP_OP_SHIFT		0
+#define INIT_IF_MODE_OP_RESERVED1_MASK		0xFFF
+#define INIT_IF_MODE_OP_RESERVED1_SHIFT		4
+#define INIT_IF_MODE_OP_CMD_OFFSET_MASK		0xFFFF
+#define INIT_IF_MODE_OP_CMD_OFFSET_SHIFT	16
+	__le16 reserved2;
+	__le16 modes_buf_offset;
 };
 
-/*  init operation: if_phase */
+/* init operation: if_phase */
 struct init_if_phase_op {
 	__le32 op_data;
-#define INIT_IF_PHASE_OP_OP_MASK           0xF
-#define INIT_IF_PHASE_OP_OP_SHIFT          0
-#define INIT_IF_PHASE_OP_DMAE_ENABLE_MASK  0x1
-#define INIT_IF_PHASE_OP_DMAE_ENABLE_SHIFT 4
-#define INIT_IF_PHASE_OP_RESERVED1_MASK    0x7FF
-#define INIT_IF_PHASE_OP_RESERVED1_SHIFT   5
-#define INIT_IF_PHASE_OP_CMD_OFFSET_MASK   0xFFFF
-#define INIT_IF_PHASE_OP_CMD_OFFSET_SHIFT  16
+#define INIT_IF_PHASE_OP_OP_MASK		0xF
+#define INIT_IF_PHASE_OP_OP_SHIFT		0
+#define INIT_IF_PHASE_OP_DMAE_ENABLE_MASK	0x1
+#define INIT_IF_PHASE_OP_DMAE_ENABLE_SHIFT	4
+#define INIT_IF_PHASE_OP_RESERVED1_MASK		0x7FF
+#define INIT_IF_PHASE_OP_RESERVED1_SHIFT	5
+#define INIT_IF_PHASE_OP_CMD_OFFSET_MASK	0xFFFF
+#define INIT_IF_PHASE_OP_CMD_OFFSET_SHIFT	16
 	__le32 phase_data;
-#define INIT_IF_PHASE_OP_PHASE_MASK        0xFF /* Init phase */
-#define INIT_IF_PHASE_OP_PHASE_SHIFT       0
-#define INIT_IF_PHASE_OP_RESERVED2_MASK    0xFF
-#define INIT_IF_PHASE_OP_RESERVED2_SHIFT   8
-#define INIT_IF_PHASE_OP_PHASE_ID_MASK     0xFFFF /* Init phase ID */
-#define INIT_IF_PHASE_OP_PHASE_ID_SHIFT    16
+#define INIT_IF_PHASE_OP_PHASE_MASK		0xFF
+#define INIT_IF_PHASE_OP_PHASE_SHIFT		0
+#define INIT_IF_PHASE_OP_RESERVED2_MASK		0xFF
+#define INIT_IF_PHASE_OP_RESERVED2_SHIFT	8
+#define INIT_IF_PHASE_OP_PHASE_ID_MASK		0xFFFF
+#define INIT_IF_PHASE_OP_PHASE_ID_SHIFT		16
 };
 
 /* init mode operators */
 enum init_mode_ops {
-	INIT_MODE_OP_NOT /* init mode not operator */,
-	INIT_MODE_OP_OR /* init mode or operator */,
-	INIT_MODE_OP_AND /* init mode and operator */,
+	INIT_MODE_OP_NOT,
+	INIT_MODE_OP_OR,
+	INIT_MODE_OP_AND,
 	MAX_INIT_MODE_OPS
 };
 
 /* init operation: raw */
 struct init_raw_op {
-	__le32	op_data;
-#define INIT_RAW_OP_OP_MASK      0xF
-#define INIT_RAW_OP_OP_SHIFT     0
-#define INIT_RAW_OP_PARAM1_MASK  0xFFFFFFF      /* init param 1 */
-#define INIT_RAW_OP_PARAM1_SHIFT 4
-	__le32	param2 /* Init param 2 */;
+	__le32 op_data;
+#define INIT_RAW_OP_OP_MASK		0xF
+#define INIT_RAW_OP_OP_SHIFT		0
+#define INIT_RAW_OP_PARAM1_MASK		0xFFFFFFF
+#define INIT_RAW_OP_PARAM1_SHIFT	4
+	__le32 param2;
 };
 
 /* init array params */
 struct init_op_array_params {
-	__le16	size /* array size in dwords */;
-	__le16	offset /* array start offset in dwords */;
+	__le16 size;
+	__le16 offset;
 };
 
 /* Write init operation arguments */
 union init_write_args {
-	__le32				inline_val;
-	__le32				zeros_count;
-	__le32				array_offset;
-	struct init_op_array_params	runtime;
+	__le32 inline_val;
+	__le32 zeros_count;
+	__le32 array_offset;
+	struct init_op_array_params runtime;
 };
 
 /* init operation: write */
 struct init_write_op {
 	__le32 data;
-#define INIT_WRITE_OP_OP_MASK        0xF
-#define INIT_WRITE_OP_OP_SHIFT       0
-#define INIT_WRITE_OP_SOURCE_MASK    0x7
-#define INIT_WRITE_OP_SOURCE_SHIFT   4
-#define INIT_WRITE_OP_RESERVED_MASK  0x1
-#define INIT_WRITE_OP_RESERVED_SHIFT 7
-#define INIT_WRITE_OP_WIDE_BUS_MASK  0x1
-#define INIT_WRITE_OP_WIDE_BUS_SHIFT 8
-#define INIT_WRITE_OP_ADDRESS_MASK   0x7FFFFF
-#define INIT_WRITE_OP_ADDRESS_SHIFT  9
-	union init_write_args args /* Write init operation arguments */;
+#define INIT_WRITE_OP_OP_MASK		0xF
+#define INIT_WRITE_OP_OP_SHIFT		0
+#define INIT_WRITE_OP_SOURCE_MASK	0x7
+#define INIT_WRITE_OP_SOURCE_SHIFT	4
+#define INIT_WRITE_OP_RESERVED_MASK	0x1
+#define INIT_WRITE_OP_RESERVED_SHIFT	7
+#define INIT_WRITE_OP_WIDE_BUS_MASK	0x1
+#define INIT_WRITE_OP_WIDE_BUS_SHIFT	8
+#define INIT_WRITE_OP_ADDRESS_MASK	0x7FFFFF
+#define INIT_WRITE_OP_ADDRESS_SHIFT	9
+	union init_write_args args;
 };
 
 /* init operation: read */
 struct init_read_op {
 	__le32 op_data;
-#define INIT_READ_OP_OP_MASK         0xF
-#define INIT_READ_OP_OP_SHIFT        0
-#define INIT_READ_OP_POLL_TYPE_MASK  0xF
-#define INIT_READ_OP_POLL_TYPE_SHIFT 4
-#define INIT_READ_OP_RESERVED_MASK   0x1
-#define INIT_READ_OP_RESERVED_SHIFT  8
-#define INIT_READ_OP_ADDRESS_MASK    0x7FFFFF
-#define INIT_READ_OP_ADDRESS_SHIFT   9
+#define INIT_READ_OP_OP_MASK		0xF
+#define INIT_READ_OP_OP_SHIFT		0
+#define INIT_READ_OP_POLL_TYPE_MASK	0xF
+#define INIT_READ_OP_POLL_TYPE_SHIFT	4
+#define INIT_READ_OP_RESERVED_MASK	0x1
+#define INIT_READ_OP_RESERVED_SHIFT	8
+#define INIT_READ_OP_ADDRESS_MASK	0x7FFFFF
+#define INIT_READ_OP_ADDRESS_SHIFT	9
 	__le32 expected_val;
+
 };
 
 /* Init operations union */
 union init_op {
-	struct init_raw_op	raw /* raw init operation */;
-	struct init_write_op	write /* write init operation */;
-	struct init_read_op	read /* read init operation */;
-	struct init_if_mode_op	if_mode /* if_mode init operation */;
-	struct init_if_phase_op if_phase /* if_phase init operation */;
-	struct init_callback_op callback /* callback init operation */;
-	struct init_delay_op	delay /* delay init operation */;
+	struct init_raw_op raw;
+	struct init_write_op write;
+	struct init_read_op read;
+	struct init_if_mode_op if_mode;
+	struct init_if_phase_op if_phase;
+	struct init_callback_op callback;
+	struct init_delay_op delay;
 };
 
 /* Init command operation types */
 enum init_op_types {
-	INIT_OP_READ /* GRC read init command */,
-	INIT_OP_WRITE /* GRC write init command */,
+	INIT_OP_READ,
+	INIT_OP_WRITE,
 	INIT_OP_IF_MODE,
 	INIT_OP_IF_PHASE,
-	INIT_OP_DELAY /* delay init command */,
-	INIT_OP_CALLBACK /* callback init command */,
+	INIT_OP_DELAY,
+	INIT_OP_CALLBACK,
 	MAX_INIT_OP_TYPES
 };
 
+/* init polling types */
 enum init_poll_types {
-	INIT_POLL_NONE /* No polling */,
-	INIT_POLL_EQ /* init value is included in the init command */,
-	INIT_POLL_OR /* init value is all zeros */,
-	INIT_POLL_AND /* init value is an array of values */,
+	INIT_POLL_NONE,
+	INIT_POLL_EQ,
+	INIT_POLL_OR,
+	INIT_POLL_AND,
 	MAX_INIT_POLL_TYPES
 };
 
 /* init source types */
 enum init_source_types {
-	INIT_SRC_INLINE /* init value is included in the init command */,
-	INIT_SRC_ZEROS /* init value is all zeros */,
-	INIT_SRC_ARRAY /* init value is an array of values */,
-	INIT_SRC_RUNTIME /* init value is provided during runtime */,
+	INIT_SRC_INLINE,
+	INIT_SRC_ZEROS,
+	INIT_SRC_ARRAY,
+	INIT_SRC_RUNTIME,
 	MAX_INIT_SOURCE_TYPES
 };
 
 /* Internal RAM Offsets macro data */
 struct iro {
-	u32	base /* RAM field offset */;
-	u16	m1 /* multiplier 1 */;
-	u16	m2 /* multiplier 2 */;
-	u16	m3 /* multiplier 3 */;
-	u16	size /* RAM field size */;
-};
-
-/* QM per-port init parameters */
-struct init_qm_port_params {
-	u8	active /* Indicates if this port is active */;
-	u8	num_active_phys_tcs;
-	u16	num_pbf_cmd_lines;
-	u16	num_btb_blocks;
-	__le16	reserved;
+	__le32 base;
+	__le16 m1;
+	__le16 m2;
+	__le16 m3;
+	__le16 size;
 };
 
-/* QM per-PQ init parameters */
-struct init_qm_pq_params {
-	u8	vport_id /* VPORT ID */;
-	u8	tc_id /* TC ID */;
-	u8	wrr_group /* WRR group */;
-	u8	reserved;
-};
+/**
+ * @brief qed_dbg_print_attn - Prints attention registers values in the specified results struct.
+ *
+ * @param p_hwfn
+ * @param results - Pointer to the attention read results
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
+				   struct dbg_attn_block_result *results);
 
-/* QM per-vport init parameters */
-struct init_qm_vport_params {
-	u32	vport_rl;
-	u16	vport_wfq;
-	u16	first_tx_pq_id[NUM_OF_TCS];
-};
+#define MAX_NAME_LEN	16
 
 /* Win 2 */
 #define GTT_BAR0_MAP_REG_IGU_CMD \
 	0x00f000UL
+
 /* Win 3 */
 #define GTT_BAR0_MAP_REG_TSDM_RAM \
 	0x010000UL
+
 /* Win 4 */
 #define GTT_BAR0_MAP_REG_MSDM_RAM \
 	0x011000UL
+
 /* Win 5 */
 #define GTT_BAR0_MAP_REG_MSDM_RAM_1024 \
 	0x012000UL
+
 /* Win 6 */
 #define GTT_BAR0_MAP_REG_USDM_RAM \
 	0x013000UL
+
 /* Win 7 */
 #define GTT_BAR0_MAP_REG_USDM_RAM_1024 \
 	0x014000UL
+
 /* Win 8 */
 #define GTT_BAR0_MAP_REG_USDM_RAM_2048 \
 	0x015000UL
+
 /* Win 9 */
 #define GTT_BAR0_MAP_REG_XSDM_RAM \
 	0x016000UL
+
 /* Win 10 */
 #define GTT_BAR0_MAP_REG_YSDM_RAM \
 	0x017000UL
+
 /* Win 11 */
 #define GTT_BAR0_MAP_REG_PSDM_RAM \
 	0x018000UL
@@ -1584,785 +1963,718 @@ struct init_qm_vport_params {
  * Returns the required host memory size in 4KB units.
  * Must be called before all QM init HSI functions.
  *
- * @param pf_id			- physical function ID
- * @param num_pf_cids	- number of connections used by this PF
- * @param num_vf_cids	- number of connections used by VFs of this PF
- * @param num_tids		- number of tasks used by this PF
- * @param num_pf_pqs	- number of PQs used by this PF
- * @param num_vf_pqs	- number of PQs used by VFs of this PF
+ * @param pf_id - physical function ID
+ * @param num_pf_cids - number of connections used by this PF
+ * @param num_vf_cids - number of connections used by VFs of this PF
+ * @param num_tids - number of tasks used by this PF
+ * @param num_pf_pqs - number of PQs used by this PF
+ * @param num_vf_pqs - number of PQs used by VFs of this PF
  *
  * @return The required host memory size in 4KB units.
  */
-u32 qed_qm_pf_mem_size(u8	pf_id,
-		       u32	num_pf_cids,
-		       u32	num_vf_cids,
-		       u32	num_tids,
-		       u16	num_pf_pqs,
-		       u16	num_vf_pqs);
+u32 qed_qm_pf_mem_size(u8 pf_id,
+		       u32 num_pf_cids,
+		       u32 num_vf_cids,
+		       u32 num_tids, u16 num_pf_pqs, u16 num_vf_pqs);
 
 struct qed_qm_common_rt_init_params {
-	u8				max_ports_per_engine;
-	u8				max_phys_tcs_per_port;
-	bool				pf_rl_en;
-	bool				pf_wfq_en;
-	bool				vport_rl_en;
-	bool				vport_wfq_en;
-	struct init_qm_port_params	*port_params;
+	u8 max_ports_per_engine;
+	u8 max_phys_tcs_per_port;
+	bool pf_rl_en;
+	bool pf_wfq_en;
+	bool vport_rl_en;
+	bool vport_wfq_en;
+	struct init_qm_port_params *port_params;
 };
 
+int qed_qm_common_rt_init(struct qed_hwfn *p_hwfn,
+			  struct qed_qm_common_rt_init_params *p_params);
+
+struct qed_qm_pf_rt_init_params {
+	u8 port_id;
+	u8 pf_id;
+	u8 max_phys_tcs_per_port;
+	bool is_first_pf;
+	u32 num_pf_cids;
+	u32 num_vf_cids;
+	u32 num_tids;
+	u16 start_pq;
+	u16 num_pf_pqs;
+	u16 num_vf_pqs;
+	u8 start_vport;
+	u8 num_vports;
+	u8 pf_wfq;
+	u32 pf_rl;
+	struct init_qm_pq_params *pq_params;
+	struct init_qm_vport_params *vport_params;
+};
+
+int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
+	struct qed_ptt *p_ptt,
+	struct qed_qm_pf_rt_init_params *p_params);
+
 /**
- * @brief qed_qm_common_rt_init - Prepare QM runtime init values for the
- * engine phase.
+ * @brief qed_init_pf_wfq - Initializes the WFQ weight of the specified PF
  *
  * @param p_hwfn
- * @param max_ports_per_engine	- max number of ports per engine in HW
- * @param max_phys_tcs_per_port	- max number of physical TCs per port in HW
- * @param pf_rl_en				- enable per-PF rate limiters
- * @param pf_wfq_en				- enable per-PF WFQ
- * @param vport_rl_en			- enable per-VPORT rate limiters
- * @param vport_wfq_en			- enable per-VPORT WFQ
- * @param port_params			- array of size MAX_NUM_PORTS with
- *						arameters for each port
+ * @param p_ptt - ptt window used for writing the registers
+ * @param pf_id - PF ID
+ * @param pf_wfq - WFQ weight. Must be non-zero.
  *
  * @return 0 on success, -1 on error.
  */
-int qed_qm_common_rt_init(
-	struct qed_hwfn				*p_hwfn,
-	struct qed_qm_common_rt_init_params	*p_params);
-
-struct qed_qm_pf_rt_init_params {
-	u8				port_id;
-	u8				pf_id;
-	u8				max_phys_tcs_per_port;
-	bool				is_first_pf;
-	u32				num_pf_cids;
-	u32				num_vf_cids;
-	u32				num_tids;
-	u16				start_pq;
-	u16				num_pf_pqs;
-	u16				num_vf_pqs;
-	u8				start_vport;
-	u8				num_vports;
-	u8				pf_wfq;
-	u32				pf_rl;
-	struct init_qm_pq_params	*pq_params;
-	struct init_qm_vport_params	*vport_params;
-};
-
-int qed_qm_pf_rt_init(struct qed_hwfn			*p_hwfn,
-		      struct qed_ptt			*p_ptt,
-		      struct qed_qm_pf_rt_init_params	*p_params);
+int qed_init_pf_wfq(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt, u8 pf_id, u16 pf_wfq);
 
 /**
- * @brief qed_init_pf_rl  Initializes the rate limit of the specified PF
+ * @brief qed_init_pf_rl - Initializes the rate limit of the specified PF
  *
  * @param p_hwfn
- * @param p_ptt	- ptt window used for writing the registers
- * @param pf_id	- PF ID
- * @param pf_rl	- rate limit in Mb/sec units
+ * @param p_ptt - ptt window used for writing the registers
+ * @param pf_id - PF ID
+ * @param pf_rl - rate limit in Mb/sec units
  *
  * @return 0 on success, -1 on error.
  */
-int qed_init_pf_rl(struct qed_hwfn	*p_hwfn,
-		   struct qed_ptt	*p_ptt,
-		   u8			pf_id,
-		   u32			pf_rl);
+int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt, u8 pf_id, u32 pf_rl);
 
 /**
- * @brief qed_init_vport_rl  Initializes the rate limit of the specified VPORT
+ * @brief qed_init_vport_wfq Initializes the WFQ weight of the specified VPORT
  *
  * @param p_hwfn
- * @param p_ptt		- ptt window used for writing the registers
- * @param vport_id	- VPORT ID
- * @param vport_rl	- rate limit in Mb/sec units
+ * @param p_ptt - ptt window used for writing the registers
+ * @param first_tx_pq_id- An array containing the first Tx PQ ID associated
+ *	  with the VPORT for each TC. This array is filled by
+ *	  qed_qm_pf_rt_init
+ * @param vport_wfq - WFQ weight. Must be non-zero.
  *
  * @return 0 on success, -1 on error.
  */
+int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       u16 first_tx_pq_id[NUM_OF_TCS], u16 vport_wfq);
 
-int qed_init_vport_rl(struct qed_hwfn	*p_hwfn,
-		      struct qed_ptt	*p_ptt,
-		      u8		vport_id,
-		      u32		vport_rl);
+/**
+ * @brief qed_init_vport_rl - Initializes the rate limit of the specified VPORT
+ *
+ * @param p_hwfn
+ * @param p_ptt - ptt window used for writing the registers
+ * @param vport_id - VPORT ID
+ * @param vport_rl - rate limit in Mb/sec units
+ *
+ * @return 0 on success, -1 on error.
+ */
+int qed_init_vport_rl(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt, u8 vport_id, u32 vport_rl);
 /**
  * @brief qed_send_qm_stop_cmd  Sends a stop command to the QM
  *
  * @param p_hwfn
- * @param p_ptt	         - ptt window used for writing the registers
+ * @param p_ptt
  * @param is_release_cmd - true for release, false for stop.
- * @param is_tx_pq       - true for Tx PQs, false for Other PQs.
- * @param start_pq       - first PQ ID to stop
- * @param num_pqs        - Number of PQs to stop, starting from start_pq.
+ * @param is_tx_pq - true for Tx PQs, false for Other PQs.
+ * @param start_pq - first PQ ID to stop
+ * @param num_pqs - Number of PQs to stop, starting from start_pq.
  *
- * @return bool, true if successful, false if timeout occurred while waiting
- *					for QM command done.
+ * @return bool, true if successful, false if timeout occured while waiting for QM command done.
  */
+bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  bool is_release_cmd,
+			  bool is_tx_pq, u16 start_pq, u16 num_pqs);
 
-bool qed_send_qm_stop_cmd(struct qed_hwfn	*p_hwfn,
-			  struct qed_ptt	*p_ptt,
-			  bool			is_release_cmd,
-			  bool			is_tx_pq,
-			  u16			start_pq,
-			  u16			num_pqs);
-
+/**
+ * @brief qed_set_vxlan_dest_port - initializes vxlan tunnel destination udp port
+ *
+ * @param p_ptt - ptt window used for writing the registers.
+ * @param dest_port - vxlan destination udp port.
+ */
 void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn,
-			     struct qed_ptt  *p_ptt, u16 dest_port);
+			     struct qed_ptt *p_ptt, u16 dest_port);
+
+/**
+ * @brief qed_set_vxlan_enable - enable or disable VXLAN tunnel in HW
+ *
+ * @param p_ptt - ptt window used for writing the registers.
+ * @param vxlan_enable - vxlan enable flag.
+ */
 void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, bool vxlan_enable);
+
+/**
+ * @brief qed_set_gre_enable - enable or disable GRE tunnel in HW
+ *
+ * @param p_ptt - ptt window used for writing the registers.
+ * @param eth_gre_enable - eth GRE enable enable flag.
+ * @param ip_gre_enable - IP GRE enable enable flag.
+ */
 void qed_set_gre_enable(struct qed_hwfn *p_hwfn,
-			struct qed_ptt  *p_ptt, bool eth_gre_enable,
-			bool ip_gre_enable);
+			struct qed_ptt *p_ptt,
+			bool eth_gre_enable, bool ip_gre_enable);
+
+/**
+ * @brief qed_set_geneve_dest_port - initializes geneve tunnel destination udp port
+ *
+ * @param p_ptt - ptt window used for writing the registers.
+ * @param dest_port - geneve destination udp port.
+ */
 void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt, u16 dest_port);
+
+/**
+ * @brief qed_set_gre_enable - enable or disable GRE tunnel in HW
+ *
+ * @param p_ptt - ptt window used for writing the registers.
+ * @param eth_geneve_enable - eth GENEVE enable enable flag.
+ * @param ip_geneve_enable - IP GENEVE enable enable flag.
+ */
 void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt, bool eth_geneve_enable,
-			   bool ip_geneve_enable);
-
-/* Ystorm flow control mode. Use enum fw_flow_ctrl_mode */
-#define YSTORM_FLOW_CONTROL_MODE_OFFSET  (IRO[0].base)
-#define YSTORM_FLOW_CONTROL_MODE_SIZE    (IRO[0].size)
-/* Tstorm port statistics */
-#define TSTORM_PORT_STAT_OFFSET(port_id) (IRO[1].base + ((port_id) * IRO[1].m1))
-#define TSTORM_PORT_STAT_SIZE            (IRO[1].size)
-/* Tstorm ll2 port statistics */
-#define TSTORM_LL2_PORT_STAT_OFFSET(port_id) \
-				(IRO[2].base + ((port_id) * IRO[2].m1))
-#define TSTORM_LL2_PORT_STAT_SIZE            (IRO[2].size)
-/* Ustorm VF-PF Channel ready flag */
-#define USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) \
-				(IRO[3].base +	((vf_id) * IRO[3].m1))
-#define USTORM_VF_PF_CHANNEL_READY_SIZE          (IRO[3].size)
-/* Ustorm Final flr cleanup ack */
-#define USTORM_FLR_FINAL_ACK_OFFSET(pf_id) (IRO[4].base + ((pf_id) * IRO[4].m1))
-#define USTORM_FLR_FINAL_ACK_SIZE          (IRO[4].size)
-/* Ustorm Event ring consumer */
-#define USTORM_EQE_CONS_OFFSET(pf_id)    (IRO[5].base +	((pf_id) * IRO[5].m1))
-#define USTORM_EQE_CONS_SIZE             (IRO[5].size)
-/* Ustorm Common Queue ring consumer */
-#define USTORM_COMMON_QUEUE_CONS_OFFSET(global_queue_id) \
-			(IRO[6].base + ((global_queue_id) * IRO[6].m1))
-#define USTORM_COMMON_QUEUE_CONS_SIZE    (IRO[6].size)
-/* Xstorm Integration Test Data */
-#define XSTORM_INTEG_TEST_DATA_OFFSET    (IRO[7].base)
-#define XSTORM_INTEG_TEST_DATA_SIZE      (IRO[7].size)
-/* Ystorm Integration Test Data */
-#define YSTORM_INTEG_TEST_DATA_OFFSET    (IRO[8].base)
-#define YSTORM_INTEG_TEST_DATA_SIZE      (IRO[8].size)
-/* Pstorm Integration Test Data */
-#define PSTORM_INTEG_TEST_DATA_OFFSET    (IRO[9].base)
-#define PSTORM_INTEG_TEST_DATA_SIZE      (IRO[9].size)
-/* Tstorm Integration Test Data */
-#define TSTORM_INTEG_TEST_DATA_OFFSET    (IRO[10].base)
-#define TSTORM_INTEG_TEST_DATA_SIZE      (IRO[10].size)
-/* Mstorm Integration Test Data */
-#define MSTORM_INTEG_TEST_DATA_OFFSET    (IRO[11].base)
-#define MSTORM_INTEG_TEST_DATA_SIZE      (IRO[11].size)
-/* Ustorm Integration Test Data */
-#define USTORM_INTEG_TEST_DATA_OFFSET    (IRO[12].base)
-#define USTORM_INTEG_TEST_DATA_SIZE      (IRO[12].size)
-/* Tstorm producers */
-#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) \
-			(IRO[13].base + ((core_rx_queue_id) * IRO[13].m1))
-#define TSTORM_LL2_RX_PRODS_SIZE         (IRO[13].size)
-/* Tstorm LightL2 queue statistics */
-#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
-			(IRO[14].base + ((core_rx_queue_id) * IRO[14].m1))
-#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE    (IRO[14].size)
-/* Ustorm LiteL2 queue statistics */
-#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
-			(IRO[15].base +	((core_rx_queue_id) * IRO[15].m1))
-#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE    (IRO[15].size)
-/* Pstorm LiteL2 queue statistics */
-#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) \
-			(IRO[16].base +	((core_tx_stats_id) * IRO[16].m1))
-#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE    (IRO[16].size)
-/* Mstorm queue statistics */
-#define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-			(IRO[17].base + ((stat_counter_id) * IRO[17].m1))
-#define MSTORM_QUEUE_STAT_SIZE                 (IRO[17].size)
-/* Mstorm producers */
-#define MSTORM_PRODS_OFFSET(queue_id) (IRO[18].base + ((queue_id) * IRO[18].m1))
-#define MSTORM_PRODS_SIZE             (IRO[18].size)
-/* TPA agregation timeout in us resolution (on ASIC) */
-#define MSTORM_TPA_TIMEOUT_US_OFFSET  (IRO[19].base)
-#define MSTORM_TPA_TIMEOUT_US_SIZE    (IRO[19].size)
-/* Ustorm queue statistics */
-#define USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-			(IRO[20].base + ((stat_counter_id) * IRO[20].m1))
-#define USTORM_QUEUE_STAT_SIZE        (IRO[20].size)
-/* Ustorm queue zone */
-#define USTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
-			(IRO[21].base +	((queue_id) * IRO[21].m1))
-#define USTORM_ETH_QUEUE_ZONE_SIZE    (IRO[21].size)
-/* Pstorm queue statistics */
-#define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-		(IRO[22].base + ((stat_counter_id) * IRO[22].m1))
-#define PSTORM_QUEUE_STAT_SIZE        (IRO[22].size)
-/* Tstorm last parser message */
-#define TSTORM_ETH_PRS_INPUT_OFFSET  (IRO[23].base)
-#define TSTORM_ETH_PRS_INPUT_SIZE    (IRO[23].size)
-/* Tstorm Eth limit Rx rate */
-#define ETH_RX_RATE_LIMIT_OFFSET(pf_id) (IRO[24].base +	((pf_id) * IRO[24].m1))
-#define ETH_RX_RATE_LIMIT_SIZE       (IRO[24].size)
-/* Ystorm queue zone */
-#define YSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
-			(IRO[25].base +	((queue_id) * IRO[25].m1))
-#define YSTORM_ETH_QUEUE_ZONE_SIZE   (IRO[25].size)
-/* Ystorm cqe producer */
-#define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) \
-			(IRO[26].base + ((rss_id) * IRO[26].m1))
-#define YSTORM_TOE_CQ_PROD_SIZE      (IRO[26].size)
-/* Ustorm cqe producer */
-#define USTORM_TOE_CQ_PROD_OFFSET(rss_id) \
-			(IRO[27].base + ((rss_id) * IRO[27].m1))
-#define USTORM_TOE_CQ_PROD_SIZE      (IRO[27].size)
-/* Ustorm grq producer */
-#define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) \
-			(IRO[28].base + ((pf_id) * IRO[28].m1))
-#define USTORM_TOE_GRQ_PROD_SIZE     (IRO[28].size)
-/* Tstorm cmdq-cons of given command queue-id */
-#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \
-			(IRO[29].base + ((cmdq_queue_id) * IRO[29].m1))
-#define TSTORM_SCSI_CMDQ_CONS_SIZE   (IRO[29].size)
-/* Mstorm rq-cons of given queue-id */
-#define MSTORM_SCSI_RQ_CONS_OFFSET(rq_queue_id) \
-		(IRO[30].base + ((rq_queue_id) * IRO[30].m1))
-#define MSTORM_SCSI_RQ_CONS_SIZE     (IRO[30].size)
-/* Mstorm bdq-external-producer of given BDQ function ID, BDqueue-id */
-#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
-	(IRO[31].base + ((func_id) * IRO[31].m1) + ((bdq_id) * IRO[31].m2))
-#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[31].size)
-/* Tstorm (reflects M-Storm) bdq-external-producer of given fn ID, BDqueue-id */
-#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
-	(IRO[32].base + ((func_id) * IRO[32].m1) + ((bdq_id) * IRO[32].m2))
-#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[32].size)
-/* Tstorm iSCSI RX stats */
-#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-				(IRO[33].base + ((pf_id) * IRO[33].m1))
-#define TSTORM_ISCSI_RX_STATS_SIZE    (IRO[33].size)
-/* Mstorm iSCSI RX stats */
-#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-				(IRO[34].base + ((pf_id) * IRO[34].m1))
-#define MSTORM_ISCSI_RX_STATS_SIZE    (IRO[34].size)
-/* Ustorm iSCSI RX stats */
-#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-				(IRO[35].base +	((pf_id) * IRO[35].m1))
-#define USTORM_ISCSI_RX_STATS_SIZE    (IRO[35].size)
-/* Xstorm iSCSI TX stats */
-#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-				(IRO[36].base +	((pf_id) * IRO[36].m1))
-#define XSTORM_ISCSI_TX_STATS_SIZE    (IRO[36].size)
-/* Ystorm iSCSI TX stats */
-#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-				(IRO[37].base +	((pf_id) * IRO[37].m1))
-#define YSTORM_ISCSI_TX_STATS_SIZE    (IRO[37].size)
-/* Pstorm iSCSI TX stats */
-#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-				(IRO[38].base +	((pf_id) * IRO[38].m1))
-#define PSTORM_ISCSI_TX_STATS_SIZE    (IRO[38].size)
-/* Tstorm FCoE RX stats */
-#define TSTORM_FCOE_RX_STATS_OFFSET(pf_id) \
-				(IRO[39].base +	((pf_id) * IRO[39].m1))
-#define TSTORM_FCOE_RX_STATS_SIZE      (IRO[39].size)
-/* Mstorm FCoE RX stats */
-#define MSTORM_FCOE_RX_STATS_OFFSET(pf_id) \
-				(IRO[40].base +	((pf_id) * IRO[40].m1))
-#define MSTORM_FCOE_RX_STATS_SIZE      (IRO[40].size)
-/* Pstorm FCoE TX stats */
-#define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) \
-				(IRO[41].base +	((pf_id) * IRO[41].m1))
-#define PSTORM_FCOE_TX_STATS_SIZE      (IRO[41].size)
-/* Pstorm RoCE statistics */
-#define PSTORM_ROCE_STAT_OFFSET(stat_counter_id) \
-			(IRO[42].base + ((stat_counter_id) * IRO[42].m1))
-#define PSTORM_ROCE_STAT_SIZE          (IRO[42].size)
-/* Tstorm RoCE statistics */
-#define TSTORM_ROCE_STAT_OFFSET(stat_counter_id) \
-			(IRO[43].base + ((stat_counter_id) * IRO[43].m1))
-#define TSTORM_ROCE_STAT_SIZE          (IRO[43].size)
-
-static const struct iro iro_arr[44] = {
-	{ 0x10,	   0x0,	   0x0,	   0x0,	   0x8	    },
-	{ 0x47c8,  0x60,   0x0,	   0x0,	   0x60	    },
-	{ 0x5e30,  0x20,   0x0,	   0x0,	   0x20	    },
-	{ 0x510,   0x8,	   0x0,	   0x0,	   0x4	    },
-	{ 0x490,   0x8,	   0x0,	   0x0,	   0x4	    },
-	{ 0x10,	   0x8,	   0x0,	   0x0,	   0x2	    },
-	{ 0x90,	   0x8,	   0x0,	   0x0,	   0x2	    },
-	{ 0x4940,  0x0,	   0x0,	   0x0,	   0x78	    },
-	{ 0x3de0,  0x0,	   0x0,	   0x0,	   0x78	    },
-	{ 0x2998,  0x0,	   0x0,	   0x0,	   0x78	    },
-	{ 0x4750,  0x0,	   0x0,	   0x0,	   0x78	    },
-	{ 0x56d0,  0x0,	   0x0,	   0x0,	   0x78	    },
-	{ 0x7e50,  0x0,	   0x0,	   0x0,	   0x78	    },
-	{ 0x100,   0x8,	   0x0,	   0x0,	   0x8	    },
-	{ 0x5c10,  0x10,   0x0,	   0x0,	   0x10	    },
-	{ 0xb508,  0x30,   0x0,	   0x0,	   0x30	    },
-	{ 0x95c0,  0x30,   0x0,	   0x0,	   0x30	    },
-	{ 0x58a0,  0x40,   0x0,	   0x0,	   0x40	    },
-	{ 0x200,   0x10,   0x0,	   0x0,	   0x8	    },
-	{ 0xa230,  0x0,	   0x0,	   0x0,	   0x4	    },
-	{ 0x8058,  0x40,   0x0,	   0x0,	   0x30	    },
-	{ 0xd00,   0x8,	   0x0,	   0x0,	   0x8	    },
-	{ 0x2b30,  0x80,   0x0,	   0x0,	   0x38	    },
-	{ 0xa808,  0x0,	   0x0,	   0x0,	   0xf0	    },
-	{ 0xa8f8,  0x8,	   0x0,	   0x0,	   0x8	    },
-	{ 0x80,	   0x8,	   0x0,	   0x0,	   0x8	    },
-	{ 0xac0,   0x8,	   0x0,	   0x0,	   0x8	    },
-	{ 0x2580,  0x8,	   0x0,	   0x0,	   0x8	    },
-	{ 0x2500,  0x8,	   0x0,	   0x0,	   0x8	    },
-	{ 0x440,   0x8,	   0x0,	   0x0,	   0x2	    },
-	{ 0x1800,  0x8,	   0x0,	   0x0,	   0x2	    },
-	{ 0x1a00,  0x10,   0x8,	   0x0,	   0x2	    },
-	{ 0x640,   0x10,   0x8,	   0x0,	   0x2	    },
-	{ 0xd9b8,  0x38,   0x0,	   0x0,	   0x24	    },
-	{ 0x11048, 0x10,   0x0,	   0x0,	   0x8	    },
-	{ 0x11678, 0x38,   0x0,	   0x0,	   0x18	    },
-	{ 0xaec0,  0x30,   0x0,	   0x0,	   0x10	    },
-	{ 0x8700,  0x28,   0x0,	   0x0,	   0x18	    },
-	{ 0xec00,  0x10,   0x0,	   0x0,	   0x10	    },
-	{ 0xde38,  0x40,   0x0,	   0x0,	   0x30	    },
-	{ 0x121a8, 0x38,   0x0,	   0x0,	   0x8	    },
-	{ 0xf068,  0x20,   0x0,	   0x0,	   0x20	    },
-	{ 0x2b68,  0x80,   0x0,	   0x0,	   0x10	    },
-	{ 0x4ab8,  0x10,   0x0,	   0x0,	   0x10	    },
+			   struct qed_ptt *p_ptt,
+			   bool eth_geneve_enable, bool ip_geneve_enable);
+
+#define	YSTORM_FLOW_CONTROL_MODE_OFFSET			(IRO[0].base)
+#define	YSTORM_FLOW_CONTROL_MODE_SIZE			(IRO[0].size)
+#define	TSTORM_PORT_STAT_OFFSET(port_id) \
+	(IRO[1].base + ((port_id) * IRO[1].m1))
+#define	TSTORM_PORT_STAT_SIZE				(IRO[1].size)
+#define	USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) \
+	(IRO[3].base + ((vf_id) * IRO[3].m1))
+#define	USTORM_VF_PF_CHANNEL_READY_SIZE			(IRO[3].size)
+#define	USTORM_FLR_FINAL_ACK_OFFSET(pf_id) \
+	(IRO[4].base + (pf_id) * IRO[4].m1)
+#define	USTORM_FLR_FINAL_ACK_SIZE			(IRO[4].size)
+#define	USTORM_EQE_CONS_OFFSET(pf_id) \
+	(IRO[5].base + ((pf_id) * IRO[5].m1))
+#define	USTORM_EQE_CONS_SIZE				(IRO[5].size)
+#define	USTORM_ETH_QUEUE_ZONE_OFFSET(queue_zone_id) \
+	(IRO[6].base + ((queue_zone_id) * IRO[6].m1))
+#define	USTORM_ETH_QUEUE_ZONE_SIZE			(IRO[6].size)
+#define	USTORM_COMMON_QUEUE_CONS_OFFSET(queue_zone_id) \
+	(IRO[7].base + ((queue_zone_id) * IRO[7].m1))
+#define	USTORM_COMMON_QUEUE_CONS_SIZE			(IRO[7].size)
+#define	MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
+	(IRO[18].base + ((stat_counter_id) * IRO[18].m1))
+#define	MSTORM_QUEUE_STAT_SIZE				(IRO[18].size)
+#define	MSTORM_ETH_PF_PRODS_OFFSET(queue_id) \
+	(IRO[19].base + ((queue_id) * IRO[19].m1))
+#define	MSTORM_ETH_PF_PRODS_SIZE			(IRO[19].size)
+#define	MSTORM_TPA_TIMEOUT_US_OFFSET			(IRO[20].base)
+#define	MSTORM_TPA_TIMEOUT_US_SIZE			(IRO[20].size)
+#define	MSTORM_ETH_PF_STAT_OFFSET(pf_id) \
+	(IRO[21].base + ((pf_id) * IRO[21].m1))
+#define	MSTORM_ETH_PF_STAT_SIZE				(IRO[21].size)
+#define	USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
+	(IRO[22].base + ((stat_counter_id) * IRO[22].m1))
+#define	USTORM_QUEUE_STAT_SIZE				(IRO[22].size)
+#define	USTORM_ETH_PF_STAT_OFFSET(pf_id) \
+	(IRO[23].base + ((pf_id) * IRO[23].m1))
+#define	USTORM_ETH_PF_STAT_SIZE				(IRO[23].size)
+#define	PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
+	(IRO[24].base + ((stat_counter_id) * IRO[24].m1))
+#define	PSTORM_QUEUE_STAT_SIZE				(IRO[24].size)
+#define	PSTORM_ETH_PF_STAT_OFFSET(pf_id) \
+	(IRO[25].base + ((pf_id) * IRO[25].m1))
+#define	PSTORM_ETH_PF_STAT_SIZE				(IRO[25].size)
+#define	PSTORM_CTL_FRAME_ETHTYPE_OFFSET(ethtype) \
+	(IRO[26].base + ((ethtype) * IRO[26].m1))
+#define	PSTORM_CTL_FRAME_ETHTYPE_SIZE			(IRO[26].size)
+#define	TSTORM_ETH_PRS_INPUT_OFFSET			(IRO[27].base)
+#define	TSTORM_ETH_PRS_INPUT_SIZE			(IRO[27].size)
+#define	ETH_RX_RATE_LIMIT_OFFSET(pf_id) \
+	(IRO[28].base + ((pf_id) * IRO[28].m1))
+#define	ETH_RX_RATE_LIMIT_SIZE				(IRO[28].size)
+#define	XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
+	(IRO[29].base + ((queue_id) * IRO[29].m1))
+#define	XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[29].size)
+
+static const struct iro iro_arr[46] = {
+	{0x0, 0x0, 0x0, 0x0, 0x8},
+	{0x4cb0, 0x78, 0x0, 0x0, 0x78},
+	{0x6318, 0x20, 0x0, 0x0, 0x20},
+	{0xb00, 0x8, 0x0, 0x0, 0x4},
+	{0xa80, 0x8, 0x0, 0x0, 0x4},
+	{0x0, 0x8, 0x0, 0x0, 0x2},
+	{0x80, 0x8, 0x0, 0x0, 0x4},
+	{0x84, 0x8, 0x0, 0x0, 0x2},
+	{0x4bc0, 0x0, 0x0, 0x0, 0x78},
+	{0x3df0, 0x0, 0x0, 0x0, 0x78},
+	{0x29b0, 0x0, 0x0, 0x0, 0x78},
+	{0x4c38, 0x0, 0x0, 0x0, 0x78},
+	{0x4a48, 0x0, 0x0, 0x0, 0x78},
+	{0x7e48, 0x0, 0x0, 0x0, 0x78},
+	{0xa28, 0x8, 0x0, 0x0, 0x8},
+	{0x60f8, 0x10, 0x0, 0x0, 0x10},
+	{0xb820, 0x30, 0x0, 0x0, 0x30},
+	{0x95b8, 0x30, 0x0, 0x0, 0x30},
+	{0x4c18, 0x80, 0x0, 0x0, 0x40},
+	{0x1f8, 0x4, 0x0, 0x0, 0x4},
+	{0xc9a8, 0x0, 0x0, 0x0, 0x4},
+	{0x4c58, 0x80, 0x0, 0x0, 0x20},
+	{0x8050, 0x40, 0x0, 0x0, 0x30},
+	{0xe770, 0x60, 0x0, 0x0, 0x60},
+	{0x2b48, 0x80, 0x0, 0x0, 0x38},
+	{0xdf88, 0x78, 0x0, 0x0, 0x78},
+	{0x1f8, 0x4, 0x0, 0x0, 0x4},
+	{0xacf0, 0x0, 0x0, 0x0, 0xf0},
+	{0xade0, 0x8, 0x0, 0x0, 0x8},
+	{0x1f8, 0x8, 0x0, 0x0, 0x8},
+	{0xac0, 0x8, 0x0, 0x0, 0x8},
+	{0x2578, 0x8, 0x0, 0x0, 0x8},
+	{0x24f8, 0x8, 0x0, 0x0, 0x8},
+	{0x0, 0x8, 0x0, 0x0, 0x8},
+	{0x200, 0x10, 0x8, 0x0, 0x8},
+	{0xb78, 0x10, 0x8, 0x0, 0x2},
+	{0xd888, 0x38, 0x0, 0x0, 0x24},
+	{0x12120, 0x10, 0x0, 0x0, 0x8},
+	{0x11b20, 0x38, 0x0, 0x0, 0x18},
+	{0xa8c0, 0x30, 0x0, 0x0, 0x10},
+	{0x86f8, 0x28, 0x0, 0x0, 0x18},
+	{0xeff8, 0x10, 0x0, 0x0, 0x10},
+	{0xdd08, 0x48, 0x0, 0x0, 0x38},
+	{0xf460, 0x20, 0x0, 0x0, 0x20},
+	{0x2b80, 0x80, 0x0, 0x0, 0x10},
+	{0x5000, 0x10, 0x0, 0x0, 0x10},
 };
 
 /* Runtime array offsets */
-#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET                                0
-#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET                                1
-#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET                                2
-#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET                                3
-#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET                                4
-#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET                                5
-#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET                                6
-#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET                                7
-#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET                                8
-#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET                                9
-#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET                                10
-#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET                                11
-#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET                                12
-#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET                                13
-#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET                                14
-#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET                                15
-#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET                                  16
-#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET                               17
-#define IGU_REG_PF_CONFIGURATION_RT_OFFSET                              18
-#define IGU_REG_VF_CONFIGURATION_RT_OFFSET                              19
-#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET                               20
-#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET                               21
-#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET                            22
-#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET                           23
-#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET                             24
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                                 761
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE                                   736
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                                 761
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE                                   736
-#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET                                1497
-#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE                                  736
-#define CAU_REG_PI_MEMORY_RT_OFFSET                                     2233
-#define CAU_REG_PI_MEMORY_RT_SIZE                                       4416
-#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET                    6649
-#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET                      6650
-#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET                      6651
-#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET                         6652
-#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET                         6653
-#define PRS_REG_SEARCH_TCP_RT_OFFSET                                    6654
-#define PRS_REG_SEARCH_FCOE_RT_OFFSET                                   6655
-#define PRS_REG_SEARCH_ROCE_RT_OFFSET                                   6656
-#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET                           6657
-#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET                           6658
-#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET                               6659
-#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET                     6660
-#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET           6661
-#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET                      6662
-#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET                               6663
-#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET                         6664
-#define SRC_REG_FIRSTFREE_RT_OFFSET                                     6665
-#define SRC_REG_FIRSTFREE_RT_SIZE                                       2
-#define SRC_REG_LASTFREE_RT_OFFSET                                      6667
-#define SRC_REG_LASTFREE_RT_SIZE                                        2
-#define SRC_REG_COUNTFREE_RT_OFFSET                                     6669
-#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET                              6670
-#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET                                6671
-#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET                                6672
-#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET                                  6673
-#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET                                  6674
-#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET                                 6675
-#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET                               6676
-#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET                                6677
-#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET                               6678
-#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET                                6679
-#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET                              6680
-#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET                               6681
-#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET                             6682
-#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET                              6683
-#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET                             6684
-#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET                              6685
-#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET                             6686
-#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET                              6687
-#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET                     6688
-#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET                   6689
-#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET                   6690
-#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET                               6691
-#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET                             6692
-#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET                             6693
-#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET                           6694
-#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET                         6695
-#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET                         6696
-#define PSWRQ2_REG_VF_BASE_RT_OFFSET                                    6697
-#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET                                6698
-#define PSWRQ2_REG_WR_MBS0_RT_OFFSET                                    6699
-#define PSWRQ2_REG_RD_MBS0_RT_OFFSET                                    6700
-#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET                              6701
-#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET                              6702
-#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET                                 6703
-#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE                                   22000
-#define PGLUE_REG_B_VF_BASE_RT_OFFSET                                   28703
-#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET                           28704
-#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET                              28705
-#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET                              28706
-#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET                              28707
-#define TM_REG_VF_ENABLE_CONN_RT_OFFSET                                 28708
-#define TM_REG_PF_ENABLE_CONN_RT_OFFSET                                 28709
-#define TM_REG_PF_ENABLE_TASK_RT_OFFSET                                 28710
-#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET                     28711
-#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET                     28712
-#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET                                28713
-#define TM_REG_CONFIG_CONN_MEM_RT_SIZE                                  416
-#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET                                29129
-#define TM_REG_CONFIG_TASK_MEM_RT_SIZE                                  512
-#define QM_REG_MAXPQSIZE_0_RT_OFFSET                                    29641
-#define QM_REG_MAXPQSIZE_1_RT_OFFSET                                    29642
-#define QM_REG_MAXPQSIZE_2_RT_OFFSET                                    29643
-#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET                               29644
-#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET                               29645
-#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET                               29646
-#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET                               29647
-#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET                               29648
-#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET                               29649
-#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET                               29650
-#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET                               29651
-#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET                               29652
-#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET                               29653
-#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET                              29654
-#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET                              29655
-#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET                              29656
-#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET                              29657
-#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET                              29658
-#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET                              29659
-#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET                              29660
-#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET                              29661
-#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET                              29662
-#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET                              29663
-#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET                              29664
-#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET                              29665
-#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET                              29666
-#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET                              29667
-#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET                              29668
-#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET                              29669
-#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET                              29670
-#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET                              29671
-#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET                              29672
-#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET                              29673
-#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET                              29674
-#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET                              29675
-#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET                              29676
-#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET                              29677
-#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET                              29678
-#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET                              29679
-#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET                              29680
-#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET                              29681
-#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET                              29682
-#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET                              29683
-#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET                              29684
-#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET                              29685
-#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET                              29686
-#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET                              29687
-#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET                              29688
-#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET                              29689
-#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET                              29690
-#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET                              29691
-#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET                              29692
-#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET                              29693
-#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET                              29694
-#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET                              29695
-#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET                              29696
-#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET                              29697
-#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET                              29698
-#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET                              29699
-#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET                              29700
-#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET                              29701
-#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET                              29702
-#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET                              29703
-#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET                              29704
-#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET                              29705
-#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET                              29706
-#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET                              29707
-#define QM_REG_BASEADDROTHERPQ_RT_OFFSET                                29708
-#define QM_REG_BASEADDROTHERPQ_RT_SIZE                                  128
-#define QM_REG_VOQCRDLINE_RT_OFFSET                                     29836
-#define QM_REG_VOQCRDLINE_RT_SIZE                                       20
-#define QM_REG_VOQINITCRDLINE_RT_OFFSET                                 29856
-#define QM_REG_VOQINITCRDLINE_RT_SIZE                                   20
-#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET                             29876
-#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET                             29877
-#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET                              29878
-#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET                            29879
-#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET                           29880
-#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET                                29881
-#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET                                29882
-#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET                                29883
-#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET                                29884
-#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET                                29885
-#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET                                29886
-#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET                                29887
-#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET                                29888
-#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET                                29889
-#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET                                29890
-#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET                               29891
-#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET                               29892
-#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET                               29893
-#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET                               29894
-#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET                               29895
-#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET                               29896
-#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET                            29897
-#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET                            29898
-#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET                            29899
-#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET                            29900
-#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET                               29901
-#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET                               29902
-#define QM_REG_PQTX2PF_0_RT_OFFSET                                      29903
-#define QM_REG_PQTX2PF_1_RT_OFFSET                                      29904
-#define QM_REG_PQTX2PF_2_RT_OFFSET                                      29905
-#define QM_REG_PQTX2PF_3_RT_OFFSET                                      29906
-#define QM_REG_PQTX2PF_4_RT_OFFSET                                      29907
-#define QM_REG_PQTX2PF_5_RT_OFFSET                                      29908
-#define QM_REG_PQTX2PF_6_RT_OFFSET                                      29909
-#define QM_REG_PQTX2PF_7_RT_OFFSET                                      29910
-#define QM_REG_PQTX2PF_8_RT_OFFSET                                      29911
-#define QM_REG_PQTX2PF_9_RT_OFFSET                                      29912
-#define QM_REG_PQTX2PF_10_RT_OFFSET                                     29913
-#define QM_REG_PQTX2PF_11_RT_OFFSET                                     29914
-#define QM_REG_PQTX2PF_12_RT_OFFSET                                     29915
-#define QM_REG_PQTX2PF_13_RT_OFFSET                                     29916
-#define QM_REG_PQTX2PF_14_RT_OFFSET                                     29917
-#define QM_REG_PQTX2PF_15_RT_OFFSET                                     29918
-#define QM_REG_PQTX2PF_16_RT_OFFSET                                     29919
-#define QM_REG_PQTX2PF_17_RT_OFFSET                                     29920
-#define QM_REG_PQTX2PF_18_RT_OFFSET                                     29921
-#define QM_REG_PQTX2PF_19_RT_OFFSET                                     29922
-#define QM_REG_PQTX2PF_20_RT_OFFSET                                     29923
-#define QM_REG_PQTX2PF_21_RT_OFFSET                                     29924
-#define QM_REG_PQTX2PF_22_RT_OFFSET                                     29925
-#define QM_REG_PQTX2PF_23_RT_OFFSET                                     29926
-#define QM_REG_PQTX2PF_24_RT_OFFSET                                     29927
-#define QM_REG_PQTX2PF_25_RT_OFFSET                                     29928
-#define QM_REG_PQTX2PF_26_RT_OFFSET                                     29929
-#define QM_REG_PQTX2PF_27_RT_OFFSET                                     29930
-#define QM_REG_PQTX2PF_28_RT_OFFSET                                     29931
-#define QM_REG_PQTX2PF_29_RT_OFFSET                                     29932
-#define QM_REG_PQTX2PF_30_RT_OFFSET                                     29933
-#define QM_REG_PQTX2PF_31_RT_OFFSET                                     29934
-#define QM_REG_PQTX2PF_32_RT_OFFSET                                     29935
-#define QM_REG_PQTX2PF_33_RT_OFFSET                                     29936
-#define QM_REG_PQTX2PF_34_RT_OFFSET                                     29937
-#define QM_REG_PQTX2PF_35_RT_OFFSET                                     29938
-#define QM_REG_PQTX2PF_36_RT_OFFSET                                     29939
-#define QM_REG_PQTX2PF_37_RT_OFFSET                                     29940
-#define QM_REG_PQTX2PF_38_RT_OFFSET                                     29941
-#define QM_REG_PQTX2PF_39_RT_OFFSET                                     29942
-#define QM_REG_PQTX2PF_40_RT_OFFSET                                     29943
-#define QM_REG_PQTX2PF_41_RT_OFFSET                                     29944
-#define QM_REG_PQTX2PF_42_RT_OFFSET                                     29945
-#define QM_REG_PQTX2PF_43_RT_OFFSET                                     29946
-#define QM_REG_PQTX2PF_44_RT_OFFSET                                     29947
-#define QM_REG_PQTX2PF_45_RT_OFFSET                                     29948
-#define QM_REG_PQTX2PF_46_RT_OFFSET                                     29949
-#define QM_REG_PQTX2PF_47_RT_OFFSET                                     29950
-#define QM_REG_PQTX2PF_48_RT_OFFSET                                     29951
-#define QM_REG_PQTX2PF_49_RT_OFFSET                                     29952
-#define QM_REG_PQTX2PF_50_RT_OFFSET                                     29953
-#define QM_REG_PQTX2PF_51_RT_OFFSET                                     29954
-#define QM_REG_PQTX2PF_52_RT_OFFSET                                     29955
-#define QM_REG_PQTX2PF_53_RT_OFFSET                                     29956
-#define QM_REG_PQTX2PF_54_RT_OFFSET                                     29957
-#define QM_REG_PQTX2PF_55_RT_OFFSET                                     29958
-#define QM_REG_PQTX2PF_56_RT_OFFSET                                     29959
-#define QM_REG_PQTX2PF_57_RT_OFFSET                                     29960
-#define QM_REG_PQTX2PF_58_RT_OFFSET                                     29961
-#define QM_REG_PQTX2PF_59_RT_OFFSET                                     29962
-#define QM_REG_PQTX2PF_60_RT_OFFSET                                     29963
-#define QM_REG_PQTX2PF_61_RT_OFFSET                                     29964
-#define QM_REG_PQTX2PF_62_RT_OFFSET                                     29965
-#define QM_REG_PQTX2PF_63_RT_OFFSET                                     29966
-#define QM_REG_PQOTHER2PF_0_RT_OFFSET                                   29967
-#define QM_REG_PQOTHER2PF_1_RT_OFFSET                                   29968
-#define QM_REG_PQOTHER2PF_2_RT_OFFSET                                   29969
-#define QM_REG_PQOTHER2PF_3_RT_OFFSET                                   29970
-#define QM_REG_PQOTHER2PF_4_RT_OFFSET                                   29971
-#define QM_REG_PQOTHER2PF_5_RT_OFFSET                                   29972
-#define QM_REG_PQOTHER2PF_6_RT_OFFSET                                   29973
-#define QM_REG_PQOTHER2PF_7_RT_OFFSET                                   29974
-#define QM_REG_PQOTHER2PF_8_RT_OFFSET                                   29975
-#define QM_REG_PQOTHER2PF_9_RT_OFFSET                                   29976
-#define QM_REG_PQOTHER2PF_10_RT_OFFSET                                  29977
-#define QM_REG_PQOTHER2PF_11_RT_OFFSET                                  29978
-#define QM_REG_PQOTHER2PF_12_RT_OFFSET                                  29979
-#define QM_REG_PQOTHER2PF_13_RT_OFFSET                                  29980
-#define QM_REG_PQOTHER2PF_14_RT_OFFSET                                  29981
-#define QM_REG_PQOTHER2PF_15_RT_OFFSET                                  29982
-#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET                                 29983
-#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET                                 29984
-#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET                            29985
-#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET                            29986
-#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET                              29987
-#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET                              29988
-#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET                              29989
-#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET                              29990
-#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET                              29991
-#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET                              29992
-#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET                              29993
-#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET                              29994
-#define QM_REG_RLGLBLINCVAL_RT_OFFSET                                   29995
-#define QM_REG_RLGLBLINCVAL_RT_SIZE                                     256
-#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET                               30251
-#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE                                 256
-#define QM_REG_RLGLBLCRD_RT_OFFSET                                      30507
-#define QM_REG_RLGLBLCRD_RT_SIZE                                        256
-#define QM_REG_RLGLBLENABLE_RT_OFFSET                                   30763
-#define QM_REG_RLPFPERIOD_RT_OFFSET                                     30764
-#define QM_REG_RLPFPERIODTIMER_RT_OFFSET                                30765
-#define QM_REG_RLPFINCVAL_RT_OFFSET                                     30766
-#define QM_REG_RLPFINCVAL_RT_SIZE                                       16
-#define QM_REG_RLPFUPPERBOUND_RT_OFFSET                                 30782
-#define QM_REG_RLPFUPPERBOUND_RT_SIZE                                   16
-#define QM_REG_RLPFCRD_RT_OFFSET                                        30798
-#define QM_REG_RLPFCRD_RT_SIZE                                          16
-#define QM_REG_RLPFENABLE_RT_OFFSET                                     30814
-#define QM_REG_RLPFVOQENABLE_RT_OFFSET                                  30815
-#define QM_REG_WFQPFWEIGHT_RT_OFFSET                                    30816
-#define QM_REG_WFQPFWEIGHT_RT_SIZE                                      16
-#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET                                30832
-#define QM_REG_WFQPFUPPERBOUND_RT_SIZE                                  16
-#define QM_REG_WFQPFCRD_RT_OFFSET                                       30848
-#define QM_REG_WFQPFCRD_RT_SIZE                                         160
-#define QM_REG_WFQPFENABLE_RT_OFFSET                                    31008
-#define QM_REG_WFQVPENABLE_RT_OFFSET                                    31009
-#define QM_REG_BASEADDRTXPQ_RT_OFFSET                                   31010
-#define QM_REG_BASEADDRTXPQ_RT_SIZE                                     512
-#define QM_REG_TXPQMAP_RT_OFFSET                                        31522
-#define QM_REG_TXPQMAP_RT_SIZE                                          512
-#define QM_REG_WFQVPWEIGHT_RT_OFFSET                                    32034
-#define QM_REG_WFQVPWEIGHT_RT_SIZE                                      512
-#define QM_REG_WFQVPCRD_RT_OFFSET                                       32546
-#define QM_REG_WFQVPCRD_RT_SIZE                                         512
-#define QM_REG_WFQVPMAP_RT_OFFSET                                       33058
-#define QM_REG_WFQVPMAP_RT_SIZE                                         512
-#define QM_REG_WFQPFCRD_MSB_RT_OFFSET                                   33570
-#define QM_REG_WFQPFCRD_MSB_RT_SIZE                                     160
-#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET                               33730
-#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET                         33731
-#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET                         33732
-#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET                         33733
-#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET                         33734
-#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET                          33735
-#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET                      33736
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET                               33737
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE                                 4
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET                          33741
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE                            4
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET                            33745
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE                              4
-#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET                               33749
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET                         33750
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE                           32
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET                            33782
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE                              16
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET                          33798
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE                            16
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET                 33814
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE                   16
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET                       33830
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE                         16
-#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET                                  33846
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                               33847
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                               33848
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                               33849
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                           33850
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                           33851
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                           33852
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                           33853
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                        33854
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                        33855
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                        33856
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                        33857
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                            33858
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                         33859
-#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET                               33860
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                          33861
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                        33862
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                           33863
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                    33864
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                        33865
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                           33866
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                    33867
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                        33868
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                           33869
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                    33870
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                        33871
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                           33872
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                    33873
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                        33874
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                           33875
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                    33876
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                        33877
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                           33878
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                    33879
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                        33880
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                           33881
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                    33882
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                        33883
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                           33884
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                    33885
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                        33886
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                           33887
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                    33888
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                        33889
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                           33890
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                    33891
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                       33892
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                          33893
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET                   33894
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                       33895
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                          33896
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET                   33897
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                       33898
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                          33899
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET                   33900
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                       33901
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                          33902
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET                   33903
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                       33904
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                          33905
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET                   33906
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                       33907
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                          33908
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET                   33909
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                       33910
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                          33911
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET                   33912
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                       33913
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                          33914
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET                   33915
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                       33916
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                          33917
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET                   33918
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                       33919
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                          33920
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET                   33921
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                    33922
-
-#define RUNTIME_ARRAY_SIZE 33923
+#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET 0
+#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET 1
+#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET 2
+#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET 3
+#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET 4
+#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET 5
+#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET 6
+#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET 7
+#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET 8
+#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET 9
+#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET 10
+#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET 11
+#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET 12
+#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET 13
+#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET 14
+#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET 15
+#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16
+#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET 17
+#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 18
+#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 19
+#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 20
+#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 21
+#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 22
+#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 23
+#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 24
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736
+#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1497
+#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736
+#define CAU_REG_PI_MEMORY_RT_OFFSET 2233
+#define CAU_REG_PI_MEMORY_RT_SIZE 4416
+#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6649
+#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6650
+#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6651
+#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6652
+#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6653
+#define PRS_REG_SEARCH_TCP_RT_OFFSET 6654
+#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6655
+#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6656
+#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6657
+#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6658
+#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6659
+#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6660
+#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6661
+#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6662
+#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET 6663
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6664
+#define SRC_REG_FIRSTFREE_RT_OFFSET 6665
+#define SRC_REG_FIRSTFREE_RT_SIZE 2
+#define SRC_REG_LASTFREE_RT_OFFSET 6667
+#define SRC_REG_LASTFREE_RT_SIZE 2
+#define SRC_REG_COUNTFREE_RT_OFFSET 6669
+#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6670
+#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6671
+#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6672
+#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673
+#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674
+#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6675
+#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET 6676
+#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6677
+#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6678
+#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6679
+#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6680
+#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6681
+#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6682
+#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6683
+#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6684
+#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6685
+#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6686
+#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6687
+#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6688
+#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6689
+#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6690
+#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6691
+#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6692
+#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6693
+#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6694
+#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6695
+#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6696
+#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6697
+#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6698
+#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6699
+#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6700
+#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6701
+#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6702
+#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6703
+#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6704
+#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE 22000
+#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28704
+#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28705
+#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28706
+#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28707
+#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28708
+#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28709
+#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28710
+#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28711
+#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28712
+#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28713
+#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28714
+#define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416
+#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29130
+#define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512
+#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29642
+#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29643
+#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29644
+#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29645
+#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29646
+#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29647
+#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29648
+#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29649
+#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29650
+#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29651
+#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29652
+#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29653
+#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29654
+#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29655
+#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29656
+#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29657
+#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29658
+#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29659
+#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29660
+#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29661
+#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29662
+#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29663
+#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29664
+#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29665
+#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29666
+#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29667
+#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29668
+#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29669
+#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29670
+#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29671
+#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29672
+#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29673
+#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29674
+#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29675
+#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29676
+#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29677
+#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29678
+#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29679
+#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29680
+#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29681
+#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29682
+#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29683
+#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29684
+#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29685
+#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29686
+#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29687
+#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29688
+#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29689
+#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29690
+#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29691
+#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29692
+#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29693
+#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29694
+#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29695
+#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29696
+#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29697
+#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29698
+#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29699
+#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29700
+#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29701
+#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29702
+#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29703
+#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29704
+#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29705
+#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29706
+#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29707
+#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29708
+#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29709
+#define QM_REG_BASEADDROTHERPQ_RT_SIZE 128
+#define QM_REG_VOQCRDLINE_RT_OFFSET 29837
+#define QM_REG_VOQCRDLINE_RT_SIZE 20
+#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29857
+#define QM_REG_VOQINITCRDLINE_RT_SIZE 20
+#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29877
+#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29878
+#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29879
+#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29880
+#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29881
+#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29882
+#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29883
+#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29884
+#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29885
+#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29886
+#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29887
+#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29888
+#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29889
+#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29890
+#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29891
+#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29892
+#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29893
+#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29894
+#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29895
+#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29896
+#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29897
+#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29898
+#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29899
+#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29900
+#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29901
+#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29902
+#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29903
+#define QM_REG_PQTX2PF_0_RT_OFFSET 29904
+#define QM_REG_PQTX2PF_1_RT_OFFSET 29905
+#define QM_REG_PQTX2PF_2_RT_OFFSET 29906
+#define QM_REG_PQTX2PF_3_RT_OFFSET 29907
+#define QM_REG_PQTX2PF_4_RT_OFFSET 29908
+#define QM_REG_PQTX2PF_5_RT_OFFSET 29909
+#define QM_REG_PQTX2PF_6_RT_OFFSET 29910
+#define QM_REG_PQTX2PF_7_RT_OFFSET 29911
+#define QM_REG_PQTX2PF_8_RT_OFFSET 29912
+#define QM_REG_PQTX2PF_9_RT_OFFSET 29913
+#define QM_REG_PQTX2PF_10_RT_OFFSET 29914
+#define QM_REG_PQTX2PF_11_RT_OFFSET 29915
+#define QM_REG_PQTX2PF_12_RT_OFFSET 29916
+#define QM_REG_PQTX2PF_13_RT_OFFSET 29917
+#define QM_REG_PQTX2PF_14_RT_OFFSET 29918
+#define QM_REG_PQTX2PF_15_RT_OFFSET 29919
+#define QM_REG_PQTX2PF_16_RT_OFFSET 29920
+#define QM_REG_PQTX2PF_17_RT_OFFSET 29921
+#define QM_REG_PQTX2PF_18_RT_OFFSET 29922
+#define QM_REG_PQTX2PF_19_RT_OFFSET 29923
+#define QM_REG_PQTX2PF_20_RT_OFFSET 29924
+#define QM_REG_PQTX2PF_21_RT_OFFSET 29925
+#define QM_REG_PQTX2PF_22_RT_OFFSET 29926
+#define QM_REG_PQTX2PF_23_RT_OFFSET 29927
+#define QM_REG_PQTX2PF_24_RT_OFFSET 29928
+#define QM_REG_PQTX2PF_25_RT_OFFSET 29929
+#define QM_REG_PQTX2PF_26_RT_OFFSET 29930
+#define QM_REG_PQTX2PF_27_RT_OFFSET 29931
+#define QM_REG_PQTX2PF_28_RT_OFFSET 29932
+#define QM_REG_PQTX2PF_29_RT_OFFSET 29933
+#define QM_REG_PQTX2PF_30_RT_OFFSET 29934
+#define QM_REG_PQTX2PF_31_RT_OFFSET 29935
+#define QM_REG_PQTX2PF_32_RT_OFFSET 29936
+#define QM_REG_PQTX2PF_33_RT_OFFSET 29937
+#define QM_REG_PQTX2PF_34_RT_OFFSET 29938
+#define QM_REG_PQTX2PF_35_RT_OFFSET 29939
+#define QM_REG_PQTX2PF_36_RT_OFFSET 29940
+#define QM_REG_PQTX2PF_37_RT_OFFSET 29941
+#define QM_REG_PQTX2PF_38_RT_OFFSET 29942
+#define QM_REG_PQTX2PF_39_RT_OFFSET 29943
+#define QM_REG_PQTX2PF_40_RT_OFFSET 29944
+#define QM_REG_PQTX2PF_41_RT_OFFSET 29945
+#define QM_REG_PQTX2PF_42_RT_OFFSET 29946
+#define QM_REG_PQTX2PF_43_RT_OFFSET 29947
+#define QM_REG_PQTX2PF_44_RT_OFFSET 29948
+#define QM_REG_PQTX2PF_45_RT_OFFSET 29949
+#define QM_REG_PQTX2PF_46_RT_OFFSET 29950
+#define QM_REG_PQTX2PF_47_RT_OFFSET 29951
+#define QM_REG_PQTX2PF_48_RT_OFFSET 29952
+#define QM_REG_PQTX2PF_49_RT_OFFSET 29953
+#define QM_REG_PQTX2PF_50_RT_OFFSET 29954
+#define QM_REG_PQTX2PF_51_RT_OFFSET 29955
+#define QM_REG_PQTX2PF_52_RT_OFFSET 29956
+#define QM_REG_PQTX2PF_53_RT_OFFSET 29957
+#define QM_REG_PQTX2PF_54_RT_OFFSET 29958
+#define QM_REG_PQTX2PF_55_RT_OFFSET 29959
+#define QM_REG_PQTX2PF_56_RT_OFFSET 29960
+#define QM_REG_PQTX2PF_57_RT_OFFSET 29961
+#define QM_REG_PQTX2PF_58_RT_OFFSET 29962
+#define QM_REG_PQTX2PF_59_RT_OFFSET 29963
+#define QM_REG_PQTX2PF_60_RT_OFFSET 29964
+#define QM_REG_PQTX2PF_61_RT_OFFSET 29965
+#define QM_REG_PQTX2PF_62_RT_OFFSET 29966
+#define QM_REG_PQTX2PF_63_RT_OFFSET 29967
+#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29968
+#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29969
+#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29970
+#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29971
+#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29972
+#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29973
+#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29974
+#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29975
+#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29976
+#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29977
+#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29978
+#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29979
+#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29980
+#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29981
+#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29982
+#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29983
+#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29984
+#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29985
+#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29986
+#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29987
+#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29988
+#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29989
+#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29990
+#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29991
+#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29992
+#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29993
+#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29994
+#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29995
+#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29996
+#define QM_REG_RLGLBLINCVAL_RT_SIZE 256
+#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30252
+#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE 256
+#define QM_REG_RLGLBLCRD_RT_OFFSET 30508
+#define QM_REG_RLGLBLCRD_RT_SIZE 256
+#define QM_REG_RLGLBLENABLE_RT_OFFSET 30764
+#define QM_REG_RLPFPERIOD_RT_OFFSET 30765
+#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30766
+#define QM_REG_RLPFINCVAL_RT_OFFSET 30767
+#define QM_REG_RLPFINCVAL_RT_SIZE 16
+#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30783
+#define QM_REG_RLPFUPPERBOUND_RT_SIZE 16
+#define QM_REG_RLPFCRD_RT_OFFSET 30799
+#define QM_REG_RLPFCRD_RT_SIZE 16
+#define QM_REG_RLPFENABLE_RT_OFFSET 30815
+#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30816
+#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30817
+#define QM_REG_WFQPFWEIGHT_RT_SIZE 16
+#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30833
+#define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16
+#define QM_REG_WFQPFCRD_RT_OFFSET 30849
+#define QM_REG_WFQPFCRD_RT_SIZE 160
+#define QM_REG_WFQPFENABLE_RT_OFFSET 31009
+#define QM_REG_WFQVPENABLE_RT_OFFSET 31010
+#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31011
+#define QM_REG_BASEADDRTXPQ_RT_SIZE 512
+#define QM_REG_TXPQMAP_RT_OFFSET 31523
+#define QM_REG_TXPQMAP_RT_SIZE 512
+#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32035
+#define QM_REG_WFQVPWEIGHT_RT_SIZE 512
+#define QM_REG_WFQVPCRD_RT_OFFSET 32547
+#define QM_REG_WFQVPCRD_RT_SIZE 512
+#define QM_REG_WFQVPMAP_RT_OFFSET 33059
+#define QM_REG_WFQVPMAP_RT_SIZE 512
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 33571
+#define QM_REG_WFQPFCRD_MSB_RT_SIZE 160
+#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET 33731
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 33732
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 33733
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 33734
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 33735
+#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33736
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 33737
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 33738
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE 4
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33742
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE 4
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 33746
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE 4
+#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 33750
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 33751
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE 32
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 33783
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE 16
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33799
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE 16
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 33815
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE 16
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 33831
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE 16
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33847
+#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET 33848
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 33849
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 33850
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 33851
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 33852
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 33853
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 33854
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 33855
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 33856
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 33857
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 33858
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 33859
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 33860
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 33861
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET 33862
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33863
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 33864
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 33865
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 33866
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 33867
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 33868
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 33869
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 33870
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 33871
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 33872
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 33873
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 33874
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 33875
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 33876
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 33877
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 33878
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 33879
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 33880
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 33881
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 33882
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 33883
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 33884
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 33885
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 33886
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 33887
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 33888
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 33889
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 33890
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 33891
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 33892
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 33893
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 33894
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33895
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 33896
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 33897
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33898
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 33899
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 33900
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33901
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 33902
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 33903
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33904
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 33905
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 33906
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33907
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 33908
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 33909
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33910
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 33911
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 33912
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33913
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 33914
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 33915
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33916
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 33917
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 33918
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33919
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 33920
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 33921
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33922
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 33923
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET 33924
+
+#define RUNTIME_ARRAY_SIZE 33925
 
 /* The eth storm context for the Tstorm */
 struct tstorm_eth_conn_st_ctx {
@@ -2380,266 +2692,266 @@ struct xstorm_eth_conn_st_ctx {
 };
 
 struct xstorm_eth_conn_ag_ctx {
-	u8	reserved0 /* cdu_validation */;
-	u8	eth_state /* state */;
-	u8	flags0;
-#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK            0x1
-#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT           0
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED1_MASK               0x1
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED1_SHIFT              1
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED2_MASK               0x1
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED2_SHIFT              2
-#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_MASK            0x1
-#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_SHIFT           3
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED3_MASK               0x1 /* bit4 */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED3_SHIFT              4
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED4_MASK               0x1
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED4_SHIFT              5
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED5_MASK               0x1 /* bit6 */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED5_SHIFT              6
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED6_MASK               0x1 /* bit7 */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED6_SHIFT              7
-	u8 flags1;
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED7_MASK               0x1 /* bit8 */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED7_SHIFT              0
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED8_MASK               0x1 /* bit9 */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED8_SHIFT              1
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED9_MASK               0x1 /* bit10 */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED9_SHIFT              2
-#define XSTORM_ETH_CONN_AG_CTX_BIT11_MASK                   0x1 /* bit11 */
-#define XSTORM_ETH_CONN_AG_CTX_BIT11_SHIFT                  3
-#define XSTORM_ETH_CONN_AG_CTX_BIT12_MASK                   0x1 /* bit12 */
-#define XSTORM_ETH_CONN_AG_CTX_BIT12_SHIFT                  4
-#define XSTORM_ETH_CONN_AG_CTX_BIT13_MASK                   0x1 /* bit13 */
-#define XSTORM_ETH_CONN_AG_CTX_BIT13_SHIFT                  5
-#define XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_MASK          0x1 /* bit14 */
-#define XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT         6
-#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_MASK            0x1 /* bit15 */
-#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT           7
+	u8 reserved0;
+	u8 eth_state;
+	u8 flags0;
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED1_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED1_SHIFT		1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED2_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED2_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED3_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED3_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED4_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED4_SHIFT		5
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED5_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED5_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED6_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED6_SHIFT		7
+		u8 flags1;
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED7_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED7_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED8_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED8_SHIFT		1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED9_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED9_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_BIT11_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_ETH_CONN_AG_CTX_BIT12_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_BIT12_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_BIT13_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_BIT13_SHIFT		5
+#define XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT	7
 	u8 flags2;
-#define XSTORM_ETH_CONN_AG_CTX_CF0_MASK                     0x3 /* timer0cf */
-#define XSTORM_ETH_CONN_AG_CTX_CF0_SHIFT                    0
-#define XSTORM_ETH_CONN_AG_CTX_CF1_MASK                     0x3 /* timer1cf */
-#define XSTORM_ETH_CONN_AG_CTX_CF1_SHIFT                    2
-#define XSTORM_ETH_CONN_AG_CTX_CF2_MASK                     0x3 /* timer2cf */
-#define XSTORM_ETH_CONN_AG_CTX_CF2_SHIFT                    4
-#define XSTORM_ETH_CONN_AG_CTX_CF3_MASK                     0x3
-#define XSTORM_ETH_CONN_AG_CTX_CF3_SHIFT                    6
+#define XSTORM_ETH_CONN_AG_CTX_CF0_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF0_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_CF1_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF1_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_CF2_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF2_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_CF3_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF3_SHIFT		6
 	u8 flags3;
-#define XSTORM_ETH_CONN_AG_CTX_CF4_MASK                     0x3 /* cf4 */
-#define XSTORM_ETH_CONN_AG_CTX_CF4_SHIFT                    0
-#define XSTORM_ETH_CONN_AG_CTX_CF5_MASK                     0x3 /* cf5 */
-#define XSTORM_ETH_CONN_AG_CTX_CF5_SHIFT                    2
-#define XSTORM_ETH_CONN_AG_CTX_CF6_MASK                     0x3 /* cf6 */
-#define XSTORM_ETH_CONN_AG_CTX_CF6_SHIFT                    4
-#define XSTORM_ETH_CONN_AG_CTX_CF7_MASK                     0x3 /* cf7 */
-#define XSTORM_ETH_CONN_AG_CTX_CF7_SHIFT                    6
-	u8 flags4;
-#define XSTORM_ETH_CONN_AG_CTX_CF8_MASK                     0x3 /* cf8 */
-#define XSTORM_ETH_CONN_AG_CTX_CF8_SHIFT                    0
-#define XSTORM_ETH_CONN_AG_CTX_CF9_MASK                     0x3 /* cf9 */
-#define XSTORM_ETH_CONN_AG_CTX_CF9_SHIFT                    2
-#define XSTORM_ETH_CONN_AG_CTX_CF10_MASK                    0x3 /* cf10 */
-#define XSTORM_ETH_CONN_AG_CTX_CF10_SHIFT                   4
-#define XSTORM_ETH_CONN_AG_CTX_CF11_MASK                    0x3 /* cf11 */
-#define XSTORM_ETH_CONN_AG_CTX_CF11_SHIFT                   6
+#define XSTORM_ETH_CONN_AG_CTX_CF4_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF4_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_CF5_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF5_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_CF6_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF6_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_CF7_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF7_SHIFT		6
+		u8 flags4;
+#define XSTORM_ETH_CONN_AG_CTX_CF8_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF8_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_CF9_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF9_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_CF10_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF10_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_CF11_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF11_SHIFT		6
 	u8 flags5;
-#define XSTORM_ETH_CONN_AG_CTX_CF12_MASK                    0x3 /* cf12 */
-#define XSTORM_ETH_CONN_AG_CTX_CF12_SHIFT                   0
-#define XSTORM_ETH_CONN_AG_CTX_CF13_MASK                    0x3 /* cf13 */
-#define XSTORM_ETH_CONN_AG_CTX_CF13_SHIFT                   2
-#define XSTORM_ETH_CONN_AG_CTX_CF14_MASK                    0x3 /* cf14 */
-#define XSTORM_ETH_CONN_AG_CTX_CF14_SHIFT                   4
-#define XSTORM_ETH_CONN_AG_CTX_CF15_MASK                    0x3 /* cf15 */
-#define XSTORM_ETH_CONN_AG_CTX_CF15_SHIFT                   6
+#define XSTORM_ETH_CONN_AG_CTX_CF12_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF12_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_CF13_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF13_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_CF14_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF14_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_CF15_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF15_SHIFT		6
 	u8 flags6;
-#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK        0x3 /* cf16 */
-#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT       0
-#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_MASK        0x3
-#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT       2
-#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_MASK                   0x3 /* cf18 */
-#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_SHIFT                  4
-#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_MASK            0x3 /* cf19 */
-#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_SHIFT           6
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_SHIFT	6
 	u8 flags7;
-#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_MASK                0x3 /* cf20 */
-#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_SHIFT               0
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED10_MASK              0x3 /* cf21 */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED10_SHIFT             2
-#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_MASK               0x3 /* cf22 */
-#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_SHIFT              4
-#define XSTORM_ETH_CONN_AG_CTX_CF0EN_MASK                   0x1 /* cf0en */
-#define XSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT                  6
-#define XSTORM_ETH_CONN_AG_CTX_CF1EN_MASK                   0x1 /* cf1en */
-#define XSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT                  7
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED10_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED10_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_CF0EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_CF1EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT		7
 	u8 flags8;
-#define XSTORM_ETH_CONN_AG_CTX_CF2EN_MASK                   0x1 /* cf2en */
-#define XSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT                  0
-#define XSTORM_ETH_CONN_AG_CTX_CF3EN_MASK                   0x1 /* cf3en */
-#define XSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT                  1
-#define XSTORM_ETH_CONN_AG_CTX_CF4EN_MASK                   0x1 /* cf4en */
-#define XSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT                  2
-#define XSTORM_ETH_CONN_AG_CTX_CF5EN_MASK                   0x1 /* cf5en */
-#define XSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT                  3
-#define XSTORM_ETH_CONN_AG_CTX_CF6EN_MASK                   0x1 /* cf6en */
-#define XSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT                  4
-#define XSTORM_ETH_CONN_AG_CTX_CF7EN_MASK                   0x1 /* cf7en */
-#define XSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT                  5
-#define XSTORM_ETH_CONN_AG_CTX_CF8EN_MASK                   0x1 /* cf8en */
-#define XSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT                  6
-#define XSTORM_ETH_CONN_AG_CTX_CF9EN_MASK                   0x1 /* cf9en */
-#define XSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT                  7
+#define XSTORM_ETH_CONN_AG_CTX_CF2EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_CF3EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT		1
+#define XSTORM_ETH_CONN_AG_CTX_CF4EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_CF5EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT		3
+#define XSTORM_ETH_CONN_AG_CTX_CF6EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_CF7EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT		5
+#define XSTORM_ETH_CONN_AG_CTX_CF8EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_CF9EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT		7
 	u8 flags9;
-#define XSTORM_ETH_CONN_AG_CTX_CF10EN_MASK                  0x1 /* cf10en */
-#define XSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT                 0
-#define XSTORM_ETH_CONN_AG_CTX_CF11EN_MASK                  0x1 /* cf11en */
-#define XSTORM_ETH_CONN_AG_CTX_CF11EN_SHIFT                 1
-#define XSTORM_ETH_CONN_AG_CTX_CF12EN_MASK                  0x1 /* cf12en */
-#define XSTORM_ETH_CONN_AG_CTX_CF12EN_SHIFT                 2
-#define XSTORM_ETH_CONN_AG_CTX_CF13EN_MASK                  0x1 /* cf13en */
-#define XSTORM_ETH_CONN_AG_CTX_CF13EN_SHIFT                 3
-#define XSTORM_ETH_CONN_AG_CTX_CF14EN_MASK                  0x1 /* cf14en */
-#define XSTORM_ETH_CONN_AG_CTX_CF14EN_SHIFT                 4
-#define XSTORM_ETH_CONN_AG_CTX_CF15EN_MASK                  0x1 /* cf15en */
-#define XSTORM_ETH_CONN_AG_CTX_CF15EN_SHIFT                 5
-#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK     0x1 /* cf16en */
-#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT    6
-#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK     0x1
-#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT    7
+#define XSTORM_ETH_CONN_AG_CTX_CF10EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_CF11EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF11EN_SHIFT		1
+#define XSTORM_ETH_CONN_AG_CTX_CF12EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF12EN_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_CF13EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF13EN_SHIFT		3
+#define XSTORM_ETH_CONN_AG_CTX_CF14EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF14EN_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_CF15EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF15EN_SHIFT		5
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT 6
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT 7
 	u8 flags10;
-#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_MASK                0x1 /* cf18en */
-#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_SHIFT               0
-#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_MASK         0x1 /* cf19en */
-#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT        1
-#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_MASK             0x1 /* cf20en */
-#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT            2
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED11_MASK              0x1 /* cf21en */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED11_SHIFT             3
-#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_MASK            0x1 /* cf22en */
-#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_SHIFT           4
-#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK  0x1 /* cf23en */
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED11_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED11_SHIFT		3
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK 0x1
 #define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT 5
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED12_MASK              0x1 /* rule0en */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED12_SHIFT             6
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED13_MASK              0x1 /* rule1en */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED13_SHIFT             7
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED12_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED12_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED13_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED13_SHIFT		7
 	u8 flags11;
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED14_MASK              0x1 /* rule2en */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED14_SHIFT             0
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED15_MASK              0x1 /* rule3en */
-#define XSTORM_ETH_CONN_AG_CTX_RESERVED15_SHIFT             1
-#define XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_MASK          0x1 /* rule4en */
-#define XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT         2
-#define XSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK                 0x1 /* rule5en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT                3
-#define XSTORM_ETH_CONN_AG_CTX_RULE6EN_MASK                 0x1 /* rule6en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT                4
-#define XSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK                 0x1 /* rule7en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT                5
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_MASK            0x1 /* rule8en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_SHIFT           6
-#define XSTORM_ETH_CONN_AG_CTX_RULE9EN_MASK                 0x1 /* rule9en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE9EN_SHIFT                7
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED14_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED14_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED15_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED15_SHIFT		1
+#define XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT		3
+#define XSTORM_ETH_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT		5
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_RULE9EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE9EN_SHIFT		7
 	u8 flags12;
-#define XSTORM_ETH_CONN_AG_CTX_RULE10EN_MASK                0x1 /* rule10en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE10EN_SHIFT               0
-#define XSTORM_ETH_CONN_AG_CTX_RULE11EN_MASK                0x1 /* rule11en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE11EN_SHIFT               1
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_MASK            0x1 /* rule12en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_SHIFT           2
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_MASK            0x1 /* rule13en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_SHIFT           3
-#define XSTORM_ETH_CONN_AG_CTX_RULE14EN_MASK                0x1 /* rule14en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE14EN_SHIFT               4
-#define XSTORM_ETH_CONN_AG_CTX_RULE15EN_MASK                0x1 /* rule15en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE15EN_SHIFT               5
-#define XSTORM_ETH_CONN_AG_CTX_RULE16EN_MASK                0x1 /* rule16en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE16EN_SHIFT               6
-#define XSTORM_ETH_CONN_AG_CTX_RULE17EN_MASK                0x1 /* rule17en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE17EN_SHIFT               7
+#define XSTORM_ETH_CONN_AG_CTX_RULE10EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE10EN_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_RULE11EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE11EN_SHIFT		1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_ETH_CONN_AG_CTX_RULE14EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE14EN_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_RULE15EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE15EN_SHIFT		5
+#define XSTORM_ETH_CONN_AG_CTX_RULE16EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE16EN_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_RULE17EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE17EN_SHIFT		7
 	u8 flags13;
-#define XSTORM_ETH_CONN_AG_CTX_RULE18EN_MASK                0x1 /* rule18en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE18EN_SHIFT               0
-#define XSTORM_ETH_CONN_AG_CTX_RULE19EN_MASK                0x1 /* rule19en */
-#define XSTORM_ETH_CONN_AG_CTX_RULE19EN_SHIFT               1
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_MASK            0x1 /* rule20en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_SHIFT           2
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_MASK            0x1 /* rule21en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_SHIFT           3
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_MASK            0x1 /* rule22en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_SHIFT           4
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_MASK            0x1 /* rule23en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_SHIFT           5
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_MASK            0x1 /* rule24en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_SHIFT           6
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_MASK            0x1 /* rule25en */
-#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_SHIFT           7
+#define XSTORM_ETH_CONN_AG_CTX_RULE18EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE18EN_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_RULE19EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE19EN_SHIFT		1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
 	u8 flags14;
-#define XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK        0x1 /* bit16 */
-#define XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT       0
-#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK      0x1 /* bit17 */
-#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT     1
-#define XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK    0x1 /* bit18 */
-#define XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT   2
-#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK    0x1 /* bit19 */
-#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT   3
-#define XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_MASK          0x1 /* bit20 */
-#define XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT         4
-#define XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK        0x1 /* bit21 */
-#define XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT       5
-#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_MASK              0x3 /* cf23 */
-#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_SHIFT             6
-	u8	edpm_event_id /* byte2 */;
-	__le16	physical_q0 /* physical_q0 */;
-	__le16	word1 /* physical_q1 */;
-	__le16	edpm_num_bds /* physical_q2 */;
-	__le16	tx_bd_cons /* word3 */;
-	__le16	tx_bd_prod /* word4 */;
-	__le16	go_to_bd_cons /* word5 */;
-	__le16	conn_dpi /* conn_dpi */;
-	u8	byte3 /* byte3 */;
-	u8	byte4 /* byte4 */;
-	u8	byte5 /* byte5 */;
-	u8	byte6 /* byte6 */;
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le32	reg4 /* reg4 */;
-	__le32	reg5 /* cf_array0 */;
-	__le32	reg6 /* cf_array1 */;
-	__le16	word7 /* word7 */;
-	__le16	word8 /* word8 */;
-	__le16	word9 /* word9 */;
-	__le16	word10 /* word10 */;
-	__le32	reg7 /* reg7 */;
-	__le32	reg8 /* reg8 */;
-	__le32	reg9 /* reg9 */;
-	u8	byte7 /* byte7 */;
-	u8	byte8 /* byte8 */;
-	u8	byte9 /* byte9 */;
-	u8	byte10 /* byte10 */;
-	u8	byte11 /* byte11 */;
-	u8	byte12 /* byte12 */;
-	u8	byte13 /* byte13 */;
-	u8	byte14 /* byte14 */;
-	u8	byte15 /* byte15 */;
-	u8	byte16 /* byte16 */;
-	__le16	word11 /* word11 */;
-	__le32	reg10 /* reg10 */;
-	__le32	reg11 /* reg11 */;
-	__le32	reg12 /* reg12 */;
-	__le32	reg13 /* reg13 */;
-	__le32	reg14 /* reg14 */;
-	__le32	reg15 /* reg15 */;
-	__le32	reg16 /* reg16 */;
-	__le32	reg17 /* reg17 */;
-	__le32	reg18 /* reg18 */;
-	__le32	reg19 /* reg19 */;
-	__le16	word12 /* word12 */;
-	__le16	word13 /* word13 */;
-	__le16	word14 /* word14 */;
-	__le16	word15 /* word15 */;
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK 0x1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT 2
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK 0x1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT 3
+#define XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_SHIFT		6
+	u8 edpm_event_id;
+	__le16 physical_q0;
+	__le16 quota;
+	__le16 edpm_num_bds;
+	__le16 tx_bd_cons;
+	__le16 tx_bd_prod;
+	__le16 tx_class;
+	__le16 conn_dpi;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le16 word7;
+	__le16 word8;
+	__le16 word9;
+	__le16 word10;
+	__le32 reg7;
+	__le32 reg8;
+	__le32 reg9;
+	u8 byte7;
+	u8 byte8;
+	u8 byte9;
+	u8 byte10;
+	u8 byte11;
+	u8 byte12;
+	u8 byte13;
+	u8 byte14;
+	u8 byte15;
+	u8 byte16;
+	__le16 word11;
+	__le32 reg10;
+	__le32 reg11;
+	__le32 reg12;
+	__le32 reg13;
+	__le32 reg14;
+	__le32 reg15;
+	__le32 reg16;
+	__le32 reg17;
+	__le32 reg18;
+	__le32 reg19;
+	__le16 word12;
+	__le16 word13;
+	__le16 word14;
+	__le16 word15;
 };
 
 /* The eth storm context for the Ystorm */
@@ -2648,220 +2960,220 @@ struct ystorm_eth_conn_st_ctx {
 };
 
 struct ystorm_eth_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define YSTORM_ETH_CONN_AG_CTX_BIT0_MASK                  0x1 /* exist_in_qm0 */
-#define YSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT                 0
-#define YSTORM_ETH_CONN_AG_CTX_BIT1_MASK                  0x1 /* exist_in_qm1 */
-#define YSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT                 1
-#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK     0x3   /* cf0 */
-#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT    2
-#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_MASK      0x3   /* cf1 */
-#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_SHIFT     4
-#define YSTORM_ETH_CONN_AG_CTX_CF2_MASK                   0x3   /* cf2 */
-#define YSTORM_ETH_CONN_AG_CTX_CF2_SHIFT                  6
+	u8 byte0;
+	u8 state;
+	u8 flags0;
+#define YSTORM_ETH_CONN_AG_CTX_BIT0_MASK		0x1
+#define YSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT		0
+#define YSTORM_ETH_CONN_AG_CTX_BIT1_MASK		0x1
+#define YSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT		1
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK	0x3
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT	2
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_MASK	0x3
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_SHIFT	4
+#define YSTORM_ETH_CONN_AG_CTX_CF2_MASK			0x3
+#define YSTORM_ETH_CONN_AG_CTX_CF2_SHIFT		6
 	u8 flags1;
-#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK  0x1   /* cf0en */
-#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT 0
-#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_MASK   0x1   /* cf1en */
-#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_SHIFT  1
-#define YSTORM_ETH_CONN_AG_CTX_CF2EN_MASK                 0x1   /* cf2en */
-#define YSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT                2
-#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK               0x1   /* rule0en */
-#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT              3
-#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK               0x1   /* rule1en */
-#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT              4
-#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK               0x1   /* rule2en */
-#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT              5
-#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK               0x1   /* rule3en */
-#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT              6
-#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK               0x1   /* rule4en */
-#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT              7
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	word0 /* word0 */;
-	__le32	terminate_spqe /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le16	tx_bd_cons_upd /* word1 */;
-	__le16	word2 /* word2 */;
-	__le16	word3 /* word3 */;
-	__le16	word4 /* word4 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK	0x1
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT	0
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_MASK		0x1
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_SHIFT	1
+#define YSTORM_ETH_CONN_AG_CTX_CF2EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT			2
+#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT			3
+#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT			4
+#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT			5
+#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT			6
+#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT			7
+	u8 tx_q0_int_coallecing_timeset;
+	u8 byte3;
+	__le16 word0;
+	__le32 terminate_spqe;
+	__le32 reg1;
+	__le16 tx_bd_cons_upd;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
 };
 
 struct tstorm_eth_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define TSTORM_ETH_CONN_AG_CTX_BIT0_MASK      0x1       /* exist_in_qm0 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT     0
-#define TSTORM_ETH_CONN_AG_CTX_BIT1_MASK      0x1       /* exist_in_qm1 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT     1
-#define TSTORM_ETH_CONN_AG_CTX_BIT2_MASK      0x1       /* bit2 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT     2
-#define TSTORM_ETH_CONN_AG_CTX_BIT3_MASK      0x1       /* bit3 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT     3
-#define TSTORM_ETH_CONN_AG_CTX_BIT4_MASK      0x1       /* bit4 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT     4
-#define TSTORM_ETH_CONN_AG_CTX_BIT5_MASK      0x1       /* bit5 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT     5
-#define TSTORM_ETH_CONN_AG_CTX_CF0_MASK       0x3       /* timer0cf */
-#define TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT      6
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define TSTORM_ETH_CONN_AG_CTX_BIT0_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT		0
+#define TSTORM_ETH_CONN_AG_CTX_BIT1_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT		1
+#define TSTORM_ETH_CONN_AG_CTX_BIT2_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT		2
+#define TSTORM_ETH_CONN_AG_CTX_BIT3_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT		3
+#define TSTORM_ETH_CONN_AG_CTX_BIT4_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT		4
+#define TSTORM_ETH_CONN_AG_CTX_BIT5_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT		5
+#define TSTORM_ETH_CONN_AG_CTX_CF0_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT		6
 	u8 flags1;
-#define TSTORM_ETH_CONN_AG_CTX_CF1_MASK       0x3       /* timer1cf */
-#define TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT      0
-#define TSTORM_ETH_CONN_AG_CTX_CF2_MASK       0x3       /* timer2cf */
-#define TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT      2
-#define TSTORM_ETH_CONN_AG_CTX_CF3_MASK       0x3       /* timer_stop_all */
-#define TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT      4
-#define TSTORM_ETH_CONN_AG_CTX_CF4_MASK       0x3       /* cf4 */
-#define TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT      6
+#define TSTORM_ETH_CONN_AG_CTX_CF1_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT		0
+#define TSTORM_ETH_CONN_AG_CTX_CF2_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT		2
+#define TSTORM_ETH_CONN_AG_CTX_CF3_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT		4
+#define TSTORM_ETH_CONN_AG_CTX_CF4_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT		6
 	u8 flags2;
-#define TSTORM_ETH_CONN_AG_CTX_CF5_MASK       0x3       /* cf5 */
-#define TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT      0
-#define TSTORM_ETH_CONN_AG_CTX_CF6_MASK       0x3       /* cf6 */
-#define TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT      2
-#define TSTORM_ETH_CONN_AG_CTX_CF7_MASK       0x3       /* cf7 */
-#define TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT      4
-#define TSTORM_ETH_CONN_AG_CTX_CF8_MASK       0x3       /* cf8 */
-#define TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT      6
+#define TSTORM_ETH_CONN_AG_CTX_CF5_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT		0
+#define TSTORM_ETH_CONN_AG_CTX_CF6_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT		2
+#define TSTORM_ETH_CONN_AG_CTX_CF7_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT		4
+#define TSTORM_ETH_CONN_AG_CTX_CF8_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT		6
 	u8 flags3;
-#define TSTORM_ETH_CONN_AG_CTX_CF9_MASK       0x3       /* cf9 */
-#define TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT      0
-#define TSTORM_ETH_CONN_AG_CTX_CF10_MASK      0x3       /* cf10 */
-#define TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT     2
-#define TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK     0x1       /* cf0en */
-#define TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT    4
-#define TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK     0x1       /* cf1en */
-#define TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT    5
-#define TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK     0x1       /* cf2en */
-#define TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT    6
-#define TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK     0x1       /* cf3en */
-#define TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT    7
+#define TSTORM_ETH_CONN_AG_CTX_CF9_MASK			0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT		0
+#define TSTORM_ETH_CONN_AG_CTX_CF10_MASK		0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT		2
+#define TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT		4
+#define TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT		5
+#define TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT		6
+#define TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT		7
 	u8 flags4;
-#define TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK     0x1       /* cf4en */
-#define TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT    0
-#define TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK     0x1       /* cf5en */
-#define TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT    1
-#define TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK     0x1       /* cf6en */
-#define TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT    2
-#define TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK     0x1       /* cf7en */
-#define TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT    3
-#define TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK     0x1       /* cf8en */
-#define TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT    4
-#define TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK     0x1       /* cf9en */
-#define TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT    5
-#define TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK    0x1       /* cf10en */
-#define TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT   6
-#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK   0x1       /* rule0en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT  7
+#define TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT		0
+#define TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT		1
+#define TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT		2
+#define TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT		3
+#define TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT		4
+#define TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT		5
+#define TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT		6
+#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT		7
 	u8 flags5;
-#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK   0x1       /* rule1en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT  0
-#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK   0x1       /* rule2en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT  1
-#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK   0x1       /* rule3en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT  2
-#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK   0x1       /* rule4en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT  3
-#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK   0x1       /* rule5en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT  4
-#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK  0x1       /* rule6en */
-#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT 5
-#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK   0x1       /* rule7en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT  6
-#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK   0x1       /* rule8en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT  7
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le32	reg4 /* reg4 */;
-	__le32	reg5 /* reg5 */;
-	__le32	reg6 /* reg6 */;
-	__le32	reg7 /* reg7 */;
-	__le32	reg8 /* reg8 */;
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	rx_bd_cons /* word0 */;
-	u8	byte4 /* byte4 */;
-	u8	byte5 /* byte5 */;
-	__le16	rx_bd_prod /* word1 */;
-	__le16	word2 /* conn_dpi */;
-	__le16	word3 /* word3 */;
-	__le32	reg9 /* reg9 */;
-	__le32	reg10 /* reg10 */;
+#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT		0
+#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT		1
+#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT		2
+#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT		3
+#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT		4
+#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT		5
+#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT		6
+#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT		7
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 byte2;
+	u8 byte3;
+	__le16 rx_bd_cons;
+	u8 byte4;
+	u8 byte5;
+	__le16 rx_bd_prod;
+	__le16 word2;
+	__le16 word3;
+	__le32 reg9;
+	__le32 reg10;
 };
 
 struct ustorm_eth_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define USTORM_ETH_CONN_AG_CTX_BIT0_MASK                  0x1 /* exist_in_qm0 */
-#define USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT                 0
-#define USTORM_ETH_CONN_AG_CTX_BIT1_MASK                  0x1 /* exist_in_qm1 */
-#define USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT                 1
-#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_MASK   0x3 /* timer0cf */
-#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_SHIFT  2
-#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_MASK   0x3 /* timer1cf */
-#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_SHIFT  4
-#define USTORM_ETH_CONN_AG_CTX_CF2_MASK                   0x3 /* timer2cf */
-#define USTORM_ETH_CONN_AG_CTX_CF2_SHIFT                  6
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_ETH_CONN_AG_CTX_BIT0_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT			0
+#define USTORM_ETH_CONN_AG_CTX_BIT1_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT			1
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_MASK		0x3
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_SHIFT	2
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_MASK		0x3
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_SHIFT	4
+#define USTORM_ETH_CONN_AG_CTX_CF2_MASK				0x3
+#define USTORM_ETH_CONN_AG_CTX_CF2_SHIFT			6
 	u8 flags1;
-#define USTORM_ETH_CONN_AG_CTX_CF3_MASK                 0x3 /* timer_stop_all */
-#define USTORM_ETH_CONN_AG_CTX_CF3_SHIFT                0
-#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK           0x3 /* cf4 */
-#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT          2
-#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK           0x3 /* cf5 */
-#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT          4
-#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK   0x3 /* cf6 */
-#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT  6
+#define USTORM_ETH_CONN_AG_CTX_CF3_MASK				0x3
+#define USTORM_ETH_CONN_AG_CTX_CF3_SHIFT			0
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK			0x3
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT			2
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK			0x3
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT			4
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK		0x3
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT		6
 	u8 flags2;
-#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_MASK  0x1 /* cf0en */
-#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_SHIFT 0
-#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_MASK  0x1 /* cf1en */
-#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_SHIFT 1
-#define USTORM_ETH_CONN_AG_CTX_CF2EN_MASK                   0x1 /* cf2en */
-#define USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT                  2
-#define USTORM_ETH_CONN_AG_CTX_CF3EN_MASK                   0x1 /* cf3en */
-#define USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT                  3
-#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK            0x1 /* cf4en */
-#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT           4
-#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK            0x1 /* cf5en */
-#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT           5
-#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK    0x1 /* cf6en */
-#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT   6
-#define USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK                 0x1 /* rule0en */
-#define USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT                7
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_SHIFT	0
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_SHIFT	1
+#define USTORM_ETH_CONN_AG_CTX_CF2EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT			2
+#define USTORM_ETH_CONN_AG_CTX_CF3EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT			3
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK		0x1
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT		4
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK		0x1
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT		5
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT	6
+#define USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT			7
 	u8 flags3;
-#define USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK                 0x1 /* rule1en */
-#define USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT                0
-#define USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK                 0x1 /* rule2en */
-#define USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT                1
-#define USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK                 0x1 /* rule3en */
-#define USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT                2
-#define USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK                 0x1 /* rule4en */
-#define USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT                3
-#define USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK                 0x1 /* rule5en */
-#define USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT                4
-#define USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK                 0x1 /* rule6en */
-#define USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT                5
-#define USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK                 0x1 /* rule7en */
-#define USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT                6
-#define USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK                 0x1 /* rule8en */
-#define USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT                7
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	word0 /* conn_dpi */;
-	__le16	tx_bd_cons /* word1 */;
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	tx_int_coallecing_timeset /* reg3 */;
-	__le16	tx_drv_bd_cons /* word2 */;
-	__le16	rx_drv_cqe_cons /* word3 */;
+#define USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT			0
+#define USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT			1
+#define USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT			2
+#define USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT			3
+#define USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT			4
+#define USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT			5
+#define USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT			6
+#define USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT			7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le16 tx_bd_cons;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 tx_int_coallecing_timeset;
+	__le16 tx_drv_bd_cons;
+	__le16 rx_drv_cqe_cons;
 };
 
 /* The eth storm context for the Ustorm */
@@ -2876,47 +3188,75 @@ struct mstorm_eth_conn_st_ctx {
 
 /* eth connection context */
 struct eth_conn_context {
-	struct tstorm_eth_conn_st_ctx	tstorm_st_context;
-	struct regpair			tstorm_st_padding[2];
-	struct pstorm_eth_conn_st_ctx	pstorm_st_context;
-	struct xstorm_eth_conn_st_ctx	xstorm_st_context;
-	struct xstorm_eth_conn_ag_ctx	xstorm_ag_context;
-	struct ystorm_eth_conn_st_ctx	ystorm_st_context;
-	struct ystorm_eth_conn_ag_ctx	ystorm_ag_context;
-	struct tstorm_eth_conn_ag_ctx	tstorm_ag_context;
-	struct ustorm_eth_conn_ag_ctx	ustorm_ag_context;
-	struct ustorm_eth_conn_st_ctx	ustorm_st_context;
-	struct mstorm_eth_conn_st_ctx	mstorm_st_context;
+	struct tstorm_eth_conn_st_ctx tstorm_st_context;
+	struct regpair tstorm_st_padding[2];
+	struct pstorm_eth_conn_st_ctx pstorm_st_context;
+	struct xstorm_eth_conn_st_ctx xstorm_st_context;
+	struct xstorm_eth_conn_ag_ctx xstorm_ag_context;
+	struct ystorm_eth_conn_st_ctx ystorm_st_context;
+	struct ystorm_eth_conn_ag_ctx ystorm_ag_context;
+	struct tstorm_eth_conn_ag_ctx tstorm_ag_context;
+	struct ustorm_eth_conn_ag_ctx ustorm_ag_context;
+	struct ustorm_eth_conn_st_ctx ustorm_st_context;
+	struct mstorm_eth_conn_st_ctx mstorm_st_context;
 };
 
+/* opcodes for the event ring */
+enum eth_event_opcode {
+	ETH_EVENT_UNUSED,
+	ETH_EVENT_VPORT_START,
+	ETH_EVENT_VPORT_UPDATE,
+	ETH_EVENT_VPORT_STOP,
+	ETH_EVENT_TX_QUEUE_START,
+	ETH_EVENT_TX_QUEUE_STOP,
+	ETH_EVENT_RX_QUEUE_START,
+	ETH_EVENT_RX_QUEUE_UPDATE,
+	ETH_EVENT_RX_QUEUE_STOP,
+	ETH_EVENT_FILTERS_UPDATE,
+	ETH_EVENT_RESERVED,
+	ETH_EVENT_RESERVED2,
+	ETH_EVENT_RESERVED3,
+	ETH_EVENT_RX_ADD_UDP_FILTER,
+	ETH_EVENT_RX_DELETE_UDP_FILTER,
+	ETH_EVENT_RESERVED4,
+	ETH_EVENT_RESERVED5,
+	MAX_ETH_EVENT_OPCODE
+};
+
+/* Classify rule types in E2/E3 */
 enum eth_filter_action {
+	ETH_FILTER_ACTION_UNUSED,
 	ETH_FILTER_ACTION_REMOVE,
 	ETH_FILTER_ACTION_ADD,
 	ETH_FILTER_ACTION_REMOVE_ALL,
 	MAX_ETH_FILTER_ACTION
 };
 
+/* Command for adding/removing a classification rule $$KEEP_ENDIANNESS$$ */
 struct eth_filter_cmd {
-	u8      type /* Filter Type (MAC/VLAN/Pair/VNI) */;
-	u8      vport_id /* the vport id */;
-	u8      action /* filter command action: add/remove/replace */;
-	u8      reserved0;
-	__le32  vni;
-	__le16  mac_lsb;
-	__le16  mac_mid;
-	__le16  mac_msb;
-	__le16  vlan_id;
+	u8 type;
+	u8 vport_id;
+	u8 action;
+	u8 reserved0;
+	__le32 vni;
+	__le16 mac_lsb;
+	__le16 mac_mid;
+	__le16 mac_msb;
+	__le16 vlan_id;
 };
 
+/*	$$KEEP_ENDIANNESS$$ */
 struct eth_filter_cmd_header {
-	u8      rx;
-	u8      tx;
-	u8      cmd_cnt;
-	u8      assert_on_error;
-	u8      reserved1[4];
+	u8 rx;
+	u8 tx;
+	u8 cmd_cnt;
+	u8 assert_on_error;
+	u8 reserved1[4];
 };
 
+/* Ethernet filter types: mac/vlan/pair */
 enum eth_filter_type {
+	ETH_FILTER_TYPE_UNUSED,
 	ETH_FILTER_TYPE_MAC,
 	ETH_FILTER_TYPE_VLAN,
 	ETH_FILTER_TYPE_PAIR,
@@ -2929,463 +3269,3515 @@ enum eth_filter_type {
 	MAX_ETH_FILTER_TYPE
 };
 
+/* Ethernet Ramrod Command IDs */
 enum eth_ramrod_cmd_id {
 	ETH_RAMROD_UNUSED,
-	ETH_RAMROD_VPORT_START /* VPort Start Ramrod */,
-	ETH_RAMROD_VPORT_UPDATE /* VPort Update Ramrod */,
-	ETH_RAMROD_VPORT_STOP /* VPort Stop Ramrod */,
-	ETH_RAMROD_RX_QUEUE_START /* RX Queue Start Ramrod */,
-	ETH_RAMROD_RX_QUEUE_STOP /* RX Queue Stop Ramrod */,
-	ETH_RAMROD_TX_QUEUE_START /* TX Queue Start Ramrod */,
-	ETH_RAMROD_TX_QUEUE_STOP /* TX Queue Stop Ramrod */,
-	ETH_RAMROD_FILTERS_UPDATE /* Add or Remove Mac/Vlan/Pair filters */,
-	ETH_RAMROD_RX_QUEUE_UPDATE /* RX Queue Update Ramrod */,
-	ETH_RAMROD_RESERVED,
-	ETH_RAMROD_RESERVED2,
-	ETH_RAMROD_RESERVED3,
-	ETH_RAMROD_RESERVED4,
-	ETH_RAMROD_RESERVED5,
-	ETH_RAMROD_RESERVED6,
-	ETH_RAMROD_RESERVED7,
-	ETH_RAMROD_RESERVED8,
+	ETH_RAMROD_VPORT_START,
+	ETH_RAMROD_VPORT_UPDATE,
+	ETH_RAMROD_VPORT_STOP,
+	ETH_RAMROD_RX_QUEUE_START,
+	ETH_RAMROD_RX_QUEUE_STOP,
+	ETH_RAMROD_TX_QUEUE_START,
+	ETH_RAMROD_TX_QUEUE_STOP,
+	ETH_RAMROD_FILTERS_UPDATE,
+	ETH_RAMROD_RX_QUEUE_UPDATE,
+	ETH_RAMROD_RX_CREATE_OPENFLOW_ACTION,
+	ETH_RAMROD_RX_ADD_OPENFLOW_FILTER,
+	ETH_RAMROD_RX_DELETE_OPENFLOW_FILTER,
+	ETH_RAMROD_RX_ADD_UDP_FILTER,
+	ETH_RAMROD_RX_DELETE_UDP_FILTER,
+	ETH_RAMROD_RX_CREATE_GFT_ACTION,
+	ETH_RAMROD_GFT_UPDATE_FILTER,
 	MAX_ETH_RAMROD_CMD_ID
 };
 
+/* return code from eth sp ramrods */
+struct eth_return_code {
+	u8 value;
+#define ETH_RETURN_CODE_ERR_CODE_MASK	0x1F
+#define ETH_RETURN_CODE_ERR_CODE_SHIFT	0
+#define ETH_RETURN_CODE_RESERVED_MASK	0x3
+#define ETH_RETURN_CODE_RESERVED_SHIFT	5
+#define ETH_RETURN_CODE_RX_TX_MASK	0x1
+#define ETH_RETURN_CODE_RX_TX_SHIFT	7
+};
+
+/* What to do in case an error occurs */
 enum eth_tx_err {
-	ETH_TX_ERR_DROP /* Drop erronous packet. */,
+	ETH_TX_ERR_DROP,
 	ETH_TX_ERR_ASSERT_MALICIOUS,
 	MAX_ETH_TX_ERR
 };
 
+/* Array of the different error type behaviors */
 struct eth_tx_err_vals {
 	__le16 values;
-#define ETH_TX_ERR_VALS_ILLEGAL_VLAN_MODE_MASK            0x1
-#define ETH_TX_ERR_VALS_ILLEGAL_VLAN_MODE_SHIFT           0
-#define ETH_TX_ERR_VALS_PACKET_TOO_SMALL_MASK             0x1
-#define ETH_TX_ERR_VALS_PACKET_TOO_SMALL_SHIFT            1
-#define ETH_TX_ERR_VALS_ANTI_SPOOFING_ERR_MASK            0x1
-#define ETH_TX_ERR_VALS_ANTI_SPOOFING_ERR_SHIFT           2
-#define ETH_TX_ERR_VALS_ILLEGAL_INBAND_TAGS_MASK          0x1
-#define ETH_TX_ERR_VALS_ILLEGAL_INBAND_TAGS_SHIFT         3
-#define ETH_TX_ERR_VALS_VLAN_INSERTION_W_INBAND_TAG_MASK  0x1
-#define ETH_TX_ERR_VALS_VLAN_INSERTION_W_INBAND_TAG_SHIFT 4
-#define ETH_TX_ERR_VALS_MTU_VIOLATION_MASK                0x1
-#define ETH_TX_ERR_VALS_MTU_VIOLATION_SHIFT               5
-#define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_MASK        0x1
-#define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_SHIFT       6
-#define ETH_TX_ERR_VALS_RESERVED_MASK                     0x1FF
-#define ETH_TX_ERR_VALS_RESERVED_SHIFT                    7
-};
-
+#define ETH_TX_ERR_VALS_ILLEGAL_VLAN_MODE_MASK			0x1
+#define ETH_TX_ERR_VALS_ILLEGAL_VLAN_MODE_SHIFT			0
+#define ETH_TX_ERR_VALS_PACKET_TOO_SMALL_MASK			0x1
+#define ETH_TX_ERR_VALS_PACKET_TOO_SMALL_SHIFT			1
+#define ETH_TX_ERR_VALS_ANTI_SPOOFING_ERR_MASK			0x1
+#define ETH_TX_ERR_VALS_ANTI_SPOOFING_ERR_SHIFT			2
+#define ETH_TX_ERR_VALS_ILLEGAL_INBAND_TAGS_MASK		0x1
+#define ETH_TX_ERR_VALS_ILLEGAL_INBAND_TAGS_SHIFT		3
+#define ETH_TX_ERR_VALS_VLAN_INSERTION_W_INBAND_TAG_MASK	0x1
+#define ETH_TX_ERR_VALS_VLAN_INSERTION_W_INBAND_TAG_SHIFT	4
+#define ETH_TX_ERR_VALS_MTU_VIOLATION_MASK			0x1
+#define ETH_TX_ERR_VALS_MTU_VIOLATION_SHIFT			5
+#define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_MASK		0x1
+#define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_SHIFT		6
+#define ETH_TX_ERR_VALS_RESERVED_MASK				0x1FF
+#define ETH_TX_ERR_VALS_RESERVED_SHIFT				7
+};
+
+/* vport rss configuration data */
 struct eth_vport_rss_config {
 	__le16 capabilities;
-#define ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY_MASK	0x1
-#define ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY_SHIFT       0
-#define ETH_VPORT_RSS_CONFIG_IPV6_CAPABILITY_MASK	0x1
-#define ETH_VPORT_RSS_CONFIG_IPV6_CAPABILITY_SHIFT       1
-#define ETH_VPORT_RSS_CONFIG_IPV4_TCP_CAPABILITY_MASK    0x1
-#define ETH_VPORT_RSS_CONFIG_IPV4_TCP_CAPABILITY_SHIFT   2
-#define ETH_VPORT_RSS_CONFIG_IPV6_TCP_CAPABILITY_MASK    0x1
-#define ETH_VPORT_RSS_CONFIG_IPV6_TCP_CAPABILITY_SHIFT   3
-#define ETH_VPORT_RSS_CONFIG_IPV4_UDP_CAPABILITY_MASK    0x1
-#define ETH_VPORT_RSS_CONFIG_IPV4_UDP_CAPABILITY_SHIFT   4
-#define ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY_MASK    0x1
-#define ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY_SHIFT   5
-#define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_MASK  0x1
-#define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_SHIFT 6
-#define ETH_VPORT_RSS_CONFIG_RESERVED0_MASK	      0x1FF
-#define ETH_VPORT_RSS_CONFIG_RESERVED0_SHIFT	     7
-	u8      rss_id;
-	u8      rss_mode;
-	u8      update_rss_key;
-	u8      update_rss_ind_table;
-	u8      update_rss_capabilities;
-	u8      tbl_size;
-	__le32  reserved2[2];
-	__le16  indirection_table[ETH_RSS_IND_TABLE_ENTRIES_NUM];
-	__le32  rss_key[ETH_RSS_KEY_SIZE_REGS];
-	__le32  reserved3[2];
-};
-
+#define ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY_MASK		0x1
+#define ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY_SHIFT		0
+#define ETH_VPORT_RSS_CONFIG_IPV6_CAPABILITY_MASK		0x1
+#define ETH_VPORT_RSS_CONFIG_IPV6_CAPABILITY_SHIFT		1
+#define ETH_VPORT_RSS_CONFIG_IPV4_TCP_CAPABILITY_MASK		0x1
+#define ETH_VPORT_RSS_CONFIG_IPV4_TCP_CAPABILITY_SHIFT		2
+#define ETH_VPORT_RSS_CONFIG_IPV6_TCP_CAPABILITY_MASK		0x1
+#define ETH_VPORT_RSS_CONFIG_IPV6_TCP_CAPABILITY_SHIFT		3
+#define ETH_VPORT_RSS_CONFIG_IPV4_UDP_CAPABILITY_MASK		0x1
+#define ETH_VPORT_RSS_CONFIG_IPV4_UDP_CAPABILITY_SHIFT		4
+#define ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY_MASK		0x1
+#define ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY_SHIFT		5
+#define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_MASK		0x1
+#define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_SHIFT	6
+#define ETH_VPORT_RSS_CONFIG_RESERVED0_MASK			0x1FF
+#define ETH_VPORT_RSS_CONFIG_RESERVED0_SHIFT			7
+	u8 rss_id;
+	u8 rss_mode;
+	u8 update_rss_key;
+	u8 update_rss_ind_table;
+	u8 update_rss_capabilities;
+	u8 tbl_size;
+	__le32 reserved2[2];
+	__le16 indirection_table[ETH_RSS_IND_TABLE_ENTRIES_NUM];
+
+	__le32 rss_key[ETH_RSS_KEY_SIZE_REGS];
+	__le32 reserved3[2];
+};
+
+/* eth vport RSS mode */
 enum eth_vport_rss_mode {
 	ETH_VPORT_RSS_MODE_DISABLED,
 	ETH_VPORT_RSS_MODE_REGULAR,
 	MAX_ETH_VPORT_RSS_MODE
 };
 
+/* Command for setting classification flags for a vport $$KEEP_ENDIANNESS$$ */
 struct eth_vport_rx_mode {
 	__le16 state;
-#define ETH_VPORT_RX_MODE_UCAST_DROP_ALL_MASK	  0x1
-#define ETH_VPORT_RX_MODE_UCAST_DROP_ALL_SHIFT	 0
-#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_ALL_MASK	0x1
-#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_ALL_SHIFT       1
-#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_UNMATCHED_MASK  0x1
-#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_UNMATCHED_SHIFT 2
-#define ETH_VPORT_RX_MODE_MCAST_DROP_ALL_MASK	  0x1
-#define ETH_VPORT_RX_MODE_MCAST_DROP_ALL_SHIFT	 3
-#define ETH_VPORT_RX_MODE_MCAST_ACCEPT_ALL_MASK	0x1
-#define ETH_VPORT_RX_MODE_MCAST_ACCEPT_ALL_SHIFT       4
-#define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_MASK	0x1
-#define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_SHIFT       5
-#define ETH_VPORT_RX_MODE_RESERVED1_MASK	       0x3FF
-#define ETH_VPORT_RX_MODE_RESERVED1_SHIFT	      6
+#define ETH_VPORT_RX_MODE_UCAST_DROP_ALL_MASK		0x1
+#define ETH_VPORT_RX_MODE_UCAST_DROP_ALL_SHIFT		0
+#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_ALL_MASK		0x1
+#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_ALL_SHIFT	1
+#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_UNMATCHED_MASK	0x1
+#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_UNMATCHED_SHIFT	2
+#define ETH_VPORT_RX_MODE_MCAST_DROP_ALL_MASK		0x1
+#define ETH_VPORT_RX_MODE_MCAST_DROP_ALL_SHIFT		3
+#define ETH_VPORT_RX_MODE_MCAST_ACCEPT_ALL_MASK		0x1
+#define ETH_VPORT_RX_MODE_MCAST_ACCEPT_ALL_SHIFT	4
+#define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_MASK		0x1
+#define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_SHIFT	5
+#define ETH_VPORT_RX_MODE_RESERVED1_MASK		0x3FF
+#define ETH_VPORT_RX_MODE_RESERVED1_SHIFT		6
 	__le16 reserved2[3];
 };
 
+/* Command for setting tpa parameters */
 struct eth_vport_tpa_param {
-	u8	tpa_ipv4_en_flg;
-	u8	tpa_ipv6_en_flg;
-	u8	tpa_ipv4_tunn_en_flg;
-	u8	tpa_ipv6_tunn_en_flg;
-	u8	tpa_pkt_split_flg;
-	u8	tpa_hdr_data_split_flg;
-	u8	tpa_gro_consistent_flg;
-	u8	tpa_max_aggs_num;
-	u16	tpa_max_size;
-	u16	tpa_min_size_to_start;
-	u16	tpa_min_size_to_cont;
-	u8	max_buff_num;
-	u8	reserved;
+	u8 tpa_ipv4_en_flg;
+	u8 tpa_ipv6_en_flg;
+	u8 tpa_ipv4_tunn_en_flg;
+	u8 tpa_ipv6_tunn_en_flg;
+	u8 tpa_pkt_split_flg;
+	u8 tpa_hdr_data_split_flg;
+	u8 tpa_gro_consistent_flg;
+
+	u8 tpa_max_aggs_num;
+
+	__le16 tpa_max_size;
+	__le16 tpa_min_size_to_start;
+
+	__le16 tpa_min_size_to_cont;
+	u8 max_buff_num;
+	u8 reserved;
 };
 
+/* Command for setting classification flags for a vport $$KEEP_ENDIANNESS$$ */
 struct eth_vport_tx_mode {
 	__le16 state;
-#define ETH_VPORT_TX_MODE_UCAST_DROP_ALL_MASK    0x1
-#define ETH_VPORT_TX_MODE_UCAST_DROP_ALL_SHIFT   0
-#define ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL_MASK  0x1
-#define ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL_SHIFT 1
-#define ETH_VPORT_TX_MODE_MCAST_DROP_ALL_MASK    0x1
-#define ETH_VPORT_TX_MODE_MCAST_DROP_ALL_SHIFT   2
-#define ETH_VPORT_TX_MODE_MCAST_ACCEPT_ALL_MASK  0x1
-#define ETH_VPORT_TX_MODE_MCAST_ACCEPT_ALL_SHIFT 3
-#define ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL_MASK  0x1
-#define ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL_SHIFT 4
-#define ETH_VPORT_TX_MODE_RESERVED1_MASK	 0x7FF
-#define ETH_VPORT_TX_MODE_RESERVED1_SHIFT	5
+#define ETH_VPORT_TX_MODE_UCAST_DROP_ALL_MASK		0x1
+#define ETH_VPORT_TX_MODE_UCAST_DROP_ALL_SHIFT		0
+#define ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL_MASK		0x1
+#define ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL_SHIFT	1
+#define ETH_VPORT_TX_MODE_MCAST_DROP_ALL_MASK		0x1
+#define ETH_VPORT_TX_MODE_MCAST_DROP_ALL_SHIFT		2
+#define ETH_VPORT_TX_MODE_MCAST_ACCEPT_ALL_MASK		0x1
+#define ETH_VPORT_TX_MODE_MCAST_ACCEPT_ALL_SHIFT	3
+#define ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL_MASK		0x1
+#define ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL_SHIFT	4
+#define ETH_VPORT_TX_MODE_RESERVED1_MASK		0x7FF
+#define ETH_VPORT_TX_MODE_RESERVED1_SHIFT		5
 	__le16 reserved2[3];
 };
 
+/* Ramrod data for rx queue start ramrod */
 struct rx_queue_start_ramrod_data {
-	__le16	  rx_queue_id;
-	__le16	  num_of_pbl_pages;
-	__le16	  bd_max_bytes;
-	__le16	  sb_id;
-	u8	      sb_index;
-	u8	      vport_id;
-	u8	      default_rss_queue_flg;
-	u8	      complete_cqe_flg;
-	u8	      complete_event_flg;
-	u8	      stats_counter_id;
-	u8	      pin_context;
-	u8	      pxp_tph_valid_bd;
-	u8	      pxp_tph_valid_pkt;
-	u8	      pxp_st_hint;
-	__le16	  pxp_st_index;
-	u8		pmd_mode;
-	u8		notify_en;
-	u8		toggle_val;
-	u8		reserved[7];
-	__le16		reserved1;
-	struct regpair	cqe_pbl_addr;
-	struct regpair	bd_base;
-	struct regpair	reserved2;
+	__le16 rx_queue_id;
+	__le16 num_of_pbl_pages;
+	__le16 bd_max_bytes;
+	__le16 sb_id;
+	u8 sb_index;
+	u8 vport_id;
+	u8 default_rss_queue_flg;
+	u8 complete_cqe_flg;
+	u8 complete_event_flg;
+	u8 stats_counter_id;
+	u8 pin_context;
+	u8 pxp_tph_valid_bd;
+	u8 pxp_tph_valid_pkt;
+	u8 pxp_st_hint;
+
+	__le16 pxp_st_index;
+	u8 pmd_mode;
+
+	u8 notify_en;
+	u8 toggle_val;
+
+	u8 vf_rx_prod_index;
+
+	u8 reserved[6];
+	__le16 reserved1;
+	struct regpair cqe_pbl_addr;
+	struct regpair bd_base;
+	struct regpair reserved2;
 };
 
+/* Ramrod data for rx queue start ramrod */
 struct rx_queue_stop_ramrod_data {
-	__le16  rx_queue_id;
-	u8      complete_cqe_flg;
-	u8      complete_event_flg;
-	u8      vport_id;
-	u8      reserved[3];
+	__le16 rx_queue_id;
+	u8 complete_cqe_flg;
+	u8 complete_event_flg;
+	u8 vport_id;
+	u8 reserved[3];
 };
 
+/* Ramrod data for rx queue update ramrod */
 struct rx_queue_update_ramrod_data {
-	__le16	rx_queue_id;
-	u8	complete_cqe_flg;
-	u8	complete_event_flg;
-	u8	vport_id;
-	u8	reserved[4];
-	u8	reserved1;
-	u8	reserved2;
-	u8	reserved3;
-	__le16	reserved4;
-	__le16	reserved5;
+	__le16 rx_queue_id;
+	u8 complete_cqe_flg;
+	u8 complete_event_flg;
+	u8 vport_id;
+	u8 reserved[4];
+	u8 reserved1;
+	u8 reserved2;
+	u8 reserved3;
+	__le16 reserved4;
+	__le16 reserved5;
 	struct regpair reserved6;
 };
 
-struct tx_queue_start_ramrod_data {
-	__le16  sb_id;
-	u8      sb_index;
-	u8      vport_id;
-	u8      reserved0;
-	u8      stats_counter_id;
-	__le16  qm_pq_id;
-	u8      flags;
-#define TX_QUEUE_START_RAMROD_DATA_DISABLE_OPPORTUNISTIC_MASK  0x1
-#define TX_QUEUE_START_RAMROD_DATA_DISABLE_OPPORTUNISTIC_SHIFT 0
-#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_MASK      0x1
-#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_SHIFT     1
-#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_MASK      0x1
-#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_SHIFT     2
-#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_MASK               0x1
-#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_SHIFT              3
-#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_MASK              0x1
-#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_SHIFT             4
-#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_MASK            0x1
-#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_SHIFT           5
-#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_MASK              0x3
-#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_SHIFT             6
-	u8	pxp_st_hint;
-	u8	pxp_tph_valid_bd;
-	u8	pxp_tph_valid_pkt;
-	__le16	pxp_st_index;
-	__le16	comp_agg_size;
-	__le16	queue_zone_id;
-	__le16	test_dup_count;
-	__le16	pbl_size;
-	__le16	tx_queue_id;
-	struct regpair	pbl_base_addr;
-	struct regpair	bd_cons_address;
+/* Ramrod data for rx Add UDP Filter */
+struct rx_udp_filter_data {
+	__le16 action_icid;
+	__le16 vlan_id;
+	u8 ip_type;
+	u8 tenant_id_exists;
+	__le16 reserved1;
+	__le32 ip_dst_addr[4];
+	__le32 ip_src_addr[4];
+	__le16 udp_dst_port;
+	__le16 udp_src_port;
+	__le32 tenant_id;
 };
 
+/* Ramrod data for rx queue start ramrod */
+struct tx_queue_start_ramrod_data {
+	__le16 sb_id;
+	u8 sb_index;
+	u8 vport_id;
+	u8 reserved0;
+	u8 stats_counter_id;
+	__le16 qm_pq_id;
+	u8 flags;
+#define TX_QUEUE_START_RAMROD_DATA_DISABLE_OPPORTUNISTIC_MASK	0x1
+#define TX_QUEUE_START_RAMROD_DATA_DISABLE_OPPORTUNISTIC_SHIFT	0
+#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_MASK	0x1
+#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_SHIFT	1
+#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_MASK	0x1
+#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_SHIFT	2
+#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_MASK		0x1
+#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_SHIFT		3
+#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_MASK		0x1
+#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_SHIFT		4
+#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_MASK		0x1
+#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_SHIFT		5
+#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_MASK		0x3
+#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_SHIFT		6
+	u8 pxp_st_hint;
+	u8 pxp_tph_valid_bd;
+	u8 pxp_tph_valid_pkt;
+	__le16 pxp_st_index;
+	__le16 comp_agg_size;
+	__le16 queue_zone_id;
+	__le16 test_dup_count;
+	__le16 pbl_size;
+	__le16 tx_queue_id;
+
+	struct regpair pbl_base_addr;
+	struct regpair bd_cons_address;
+};
+
+/* Ramrod data for tx queue stop ramrod */
 struct tx_queue_stop_ramrod_data {
 	__le16 reserved[4];
 };
 
+/* Ramrod data for vport update ramrod */
 struct vport_filter_update_ramrod_data {
-	struct eth_filter_cmd_header    filter_cmd_hdr;
-	struct eth_filter_cmd	   filter_cmds[ETH_FILTER_RULES_COUNT];
+	struct eth_filter_cmd_header filter_cmd_hdr;
+	struct eth_filter_cmd filter_cmds[ETH_FILTER_RULES_COUNT];
 };
 
+/* Ramrod data for vport start ramrod */
 struct vport_start_ramrod_data {
-	u8			      vport_id;
-	u8			      sw_fid;
-	__le16			  mtu;
-	u8			      drop_ttl0_en;
-	u8			      inner_vlan_removal_en;
-	struct eth_vport_rx_mode	rx_mode;
-	struct eth_vport_tx_mode	tx_mode;
-	struct eth_vport_tpa_param      tpa_param;
-	__le16				default_vlan;
-	u8				tx_switching_en;
-	u8				anti_spoofing_en;
-	u8				default_vlan_en;
-	u8				handle_ptp_pkts;
-	u8				silent_vlan_removal_en;
-	u8				untagged;
-	struct eth_tx_err_vals		tx_err_behav;
-	u8				zero_placement_offset;
-	u8				reserved[7];
-};
-
+	u8 vport_id;
+	u8 sw_fid;
+	__le16 mtu;
+	u8 drop_ttl0_en;
+	u8 inner_vlan_removal_en;
+	struct eth_vport_rx_mode rx_mode;
+	struct eth_vport_tx_mode tx_mode;
+	struct eth_vport_tpa_param tpa_param;
+	__le16 default_vlan;
+	u8 tx_switching_en;
+	u8 anti_spoofing_en;
+
+	u8 default_vlan_en;
+
+	u8 handle_ptp_pkts;
+	u8 silent_vlan_removal_en;
+	u8 untagged;
+	struct eth_tx_err_vals tx_err_behav;
+
+	u8 zero_placement_offset;
+	u8 ctl_frame_mac_check_en;
+	u8 ctl_frame_ethtype_check_en;
+	u8 reserved[5];
+};
+
+/* Ramrod data for vport stop ramrod */
 struct vport_stop_ramrod_data {
-	u8      vport_id;
-	u8      reserved[7];
+	u8 vport_id;
+	u8 reserved[7];
 };
 
+/* Ramrod data for vport update ramrod */
 struct vport_update_ramrod_data_cmn {
-	u8	vport_id;
-	u8	update_rx_active_flg;
-	u8	rx_active_flg;
-	u8	update_tx_active_flg;
-	u8	tx_active_flg;
-	u8	update_rx_mode_flg;
-	u8	update_tx_mode_flg;
-	u8	update_approx_mcast_flg;
-	u8	update_rss_flg;
-	u8	update_inner_vlan_removal_en_flg;
-	u8	inner_vlan_removal_en;
-	u8	update_tpa_param_flg;
-	u8	update_tpa_en_flg;
-	u8	update_tx_switching_en_flg;
-	u8	tx_switching_en;
-	u8	update_anti_spoofing_en_flg;
-	u8	anti_spoofing_en;
-	u8	update_handle_ptp_pkts;
-	u8	handle_ptp_pkts;
-	u8	update_default_vlan_en_flg;
-	u8	default_vlan_en;
-	u8	update_default_vlan_flg;
-	__le16	default_vlan;
-	u8	update_accept_any_vlan_flg;
-	u8	accept_any_vlan;
-	u8	silent_vlan_removal_en;
-	u8	update_mtu_flg;
-	__le16	mtu;
-	u8	reserved[2];
+	u8 vport_id;
+	u8 update_rx_active_flg;
+	u8 rx_active_flg;
+	u8 update_tx_active_flg;
+	u8 tx_active_flg;
+	u8 update_rx_mode_flg;
+	u8 update_tx_mode_flg;
+	u8 update_approx_mcast_flg;
+
+	u8 update_rss_flg;
+	u8 update_inner_vlan_removal_en_flg;
+
+	u8 inner_vlan_removal_en;
+	u8 update_tpa_param_flg;
+	u8 update_tpa_en_flg;
+	u8 update_tx_switching_en_flg;
+
+	u8 tx_switching_en;
+	u8 update_anti_spoofing_en_flg;
+
+	u8 anti_spoofing_en;
+	u8 update_handle_ptp_pkts;
+
+	u8 handle_ptp_pkts;
+	u8 update_default_vlan_en_flg;
+
+	u8 default_vlan_en;
+
+	u8 update_default_vlan_flg;
+
+	__le16 default_vlan;
+	u8 update_accept_any_vlan_flg;
+
+	u8 accept_any_vlan;
+	u8 silent_vlan_removal_en;
+	u8 update_mtu_flg;
+
+	__le16 mtu;
+	u8 reserved[2];
 };
 
 struct vport_update_ramrod_mcast {
 	__le32 bins[ETH_MULTICAST_MAC_BINS_IN_REGS];
 };
 
+/* Ramrod data for vport update ramrod */
 struct vport_update_ramrod_data {
-	struct vport_update_ramrod_data_cmn     common;
-	struct eth_vport_rx_mode		rx_mode;
-	struct eth_vport_tx_mode		tx_mode;
-	struct eth_vport_tpa_param	      tpa_param;
-	struct vport_update_ramrod_mcast	approx_mcast;
-	struct eth_vport_rss_config	     rss_config;
+	struct vport_update_ramrod_data_cmn common;
+
+	struct eth_vport_rx_mode rx_mode;
+	struct eth_vport_tx_mode tx_mode;
+	struct eth_vport_tpa_param tpa_param;
+	struct vport_update_ramrod_mcast approx_mcast;
+	struct eth_vport_rss_config rss_config;
+};
+
+struct mstorm_rdma_task_st_ctx {
+	struct regpair temp[4];
+};
+
+struct rdma_close_func_ramrod_data {
+	u8 cnq_start_offset;
+	u8 num_cnqs;
+	u8 vf_id;
+	u8 vf_valid;
+	u8 reserved[4];
+};
+
+struct rdma_cnq_params {
+	__le16 sb_num;
+	u8 sb_index;
+	u8 num_pbl_pages;
+	__le32 reserved;
+	struct regpair pbl_base_addr;
+	__le16 queue_zone_num;
+	u8 reserved1[6];
+};
+
+struct rdma_create_cq_ramrod_data {
+	struct regpair cq_handle;
+	struct regpair pbl_addr;
+	__le32 max_cqes;
+	__le16 pbl_num_pages;
+	__le16 dpi;
+	u8 is_two_level_pbl;
+	u8 cnq_id;
+	u8 pbl_log_page_size;
+	u8 toggle_bit;
+	__le16 int_timeout;
+	__le16 reserved1;
+};
+
+struct rdma_deregister_tid_ramrod_data {
+	__le32 itid;
+	__le32 reserved;
+};
+
+struct rdma_destroy_cq_output_params {
+	__le16 cnq_num;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct rdma_destroy_cq_ramrod_data {
+	struct regpair output_params_addr;
+};
+
+enum rdma_event_opcode {
+	RDMA_EVENT_UNUSED,
+	RDMA_EVENT_FUNC_INIT,
+	RDMA_EVENT_FUNC_CLOSE,
+	RDMA_EVENT_REGISTER_MR,
+	RDMA_EVENT_DEREGISTER_MR,
+	RDMA_EVENT_CREATE_CQ,
+	RDMA_EVENT_RESIZE_CQ,
+	RDMA_EVENT_DESTROY_CQ,
+	RDMA_EVENT_CREATE_SRQ,
+	RDMA_EVENT_MODIFY_SRQ,
+	RDMA_EVENT_DESTROY_SRQ,
+	MAX_RDMA_EVENT_OPCODE
+};
+
+enum rdma_fw_return_code {
+	RDMA_RETURN_OK = 0,
+	RDMA_RETURN_REGISTER_MR_BAD_STATE_ERR,
+	RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR,
+	RDMA_RETURN_RESIZE_CQ_ERR,
+	RDMA_RETURN_NIG_DRAIN_REQ,
+	MAX_RDMA_FW_RETURN_CODE
+};
+
+struct rdma_init_func_hdr {
+	u8 cnq_start_offset;
+	u8 num_cnqs;
+	u8 cq_ring_mode;
+	u8 cnp_vlan_priority;
+	__le32 cnp_send_timeout;
+	u8 cnp_dscp;
+	u8 vf_id;
+	u8 vf_valid;
+	u8 reserved[5];
+};
+
+struct rdma_init_func_ramrod_data {
+	struct rdma_init_func_hdr params_header;
+	struct rdma_cnq_params cnq_params[NUM_OF_GLOBAL_QUEUES];
+};
+
+enum rdma_ramrod_cmd_id {
+	RDMA_RAMROD_UNUSED,
+	RDMA_RAMROD_FUNC_INIT,
+	RDMA_RAMROD_FUNC_CLOSE,
+	RDMA_RAMROD_REGISTER_MR,
+	RDMA_RAMROD_DEREGISTER_MR,
+	RDMA_RAMROD_CREATE_CQ,
+	RDMA_RAMROD_RESIZE_CQ,
+	RDMA_RAMROD_DESTROY_CQ,
+	RDMA_RAMROD_CREATE_SRQ,
+	RDMA_RAMROD_MODIFY_SRQ,
+	RDMA_RAMROD_DESTROY_SRQ,
+	MAX_RDMA_RAMROD_CMD_ID
+};
+
+struct rdma_register_tid_ramrod_data {
+	__le32 flags;
+#define RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID_MASK             0x3FFFF
+#define RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID_SHIFT            0
+#define RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG_MASK      0x1F
+#define RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG_SHIFT     18
+#define RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL_MASK      0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL_SHIFT     23
+#define RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED_MASK         0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED_SHIFT        24
+#define RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR_MASK             0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR_SHIFT            25
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ_MASK        0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ_SHIFT       26
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE_MASK       0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE_SHIFT      27
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC_MASK      0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC_SHIFT     28
+#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE_MASK        0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE_SHIFT       29
+#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ_MASK         0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ_SHIFT        30
+#define RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND_MASK     0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND_SHIFT    31
+	u8 flags1;
+#define RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG_MASK  0x1F
+#define RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG_SHIFT 0
+#define RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE_MASK           0x7
+#define RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE_SHIFT          5
+	u8 flags2;
+#define RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR_MASK             0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR_SHIFT            0
+#define RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG_MASK    0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG_SHIFT   1
+#define RDMA_REGISTER_TID_RAMROD_DATA_RESERVED1_MASK          0x3F
+#define RDMA_REGISTER_TID_RAMROD_DATA_RESERVED1_SHIFT         2
+	u8 key;
+	u8 length_hi;
+	u8 vf_id;
+	u8 vf_valid;
+	__le16 pd;
+	__le32 length_lo;
+	__le32 itid;
+	__le32 reserved2;
+	struct regpair va;
+	struct regpair pbl_base;
+	struct regpair dif_error_addr;
+	struct regpair dif_runt_addr;
+	__le32 reserved3[2];
+};
+
+struct rdma_resize_cq_output_params {
+	__le32 old_cq_cons;
+	__le32 old_cq_prod;
+};
+
+struct rdma_resize_cq_ramrod_data {
+	u8 flags;
+#define RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_MASK        0x1
+#define RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_SHIFT       0
+#define RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL_MASK  0x1
+#define RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL_SHIFT 1
+#define RDMA_RESIZE_CQ_RAMROD_DATA_RESERVED_MASK          0x3F
+#define RDMA_RESIZE_CQ_RAMROD_DATA_RESERVED_SHIFT         2
+	u8 pbl_log_page_size;
+	__le16 pbl_num_pages;
+	__le32 max_cqes;
+	struct regpair pbl_addr;
+	struct regpair output_params_addr;
+};
+
+struct rdma_srq_context {
+	struct regpair temp[8];
+};
+
+struct rdma_srq_create_ramrod_data {
+	struct regpair pbl_base_addr;
+	__le16 pages_in_srq_pbl;
+	__le16 pd_id;
+	struct rdma_srq_id srq_id;
+	__le16 page_size;
+	__le16 reserved1;
+	__le32 reserved2;
+	struct regpair producers_addr;
+};
+
+struct rdma_srq_destroy_ramrod_data {
+	struct rdma_srq_id srq_id;
+	__le32 reserved;
+};
+
+struct rdma_srq_modify_ramrod_data {
+	struct rdma_srq_id srq_id;
+	__le32 wqe_limit;
+};
+
+struct ystorm_rdma_task_st_ctx {
+	struct regpair temp[4];
+};
+
+struct ystorm_rdma_task_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	__le16 msem_ctx_upd_seq;
+	u8 flags0;
+#define YSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK  0xF
+#define YSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0
+#define YSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK     0x1
+#define YSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT    4
+#define YSTORM_RDMA_TASK_AG_CTX_BIT1_MASK             0x1
+#define YSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT            5
+#define YSTORM_RDMA_TASK_AG_CTX_VALID_MASK            0x1
+#define YSTORM_RDMA_TASK_AG_CTX_VALID_SHIFT           6
+#define YSTORM_RDMA_TASK_AG_CTX_BIT3_MASK             0x1
+#define YSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT            7
+	u8 flags1;
+#define YSTORM_RDMA_TASK_AG_CTX_CF0_MASK              0x3
+#define YSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT             0
+#define YSTORM_RDMA_TASK_AG_CTX_CF1_MASK              0x3
+#define YSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT             2
+#define YSTORM_RDMA_TASK_AG_CTX_CF2SPECIAL_MASK       0x3
+#define YSTORM_RDMA_TASK_AG_CTX_CF2SPECIAL_SHIFT      4
+#define YSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK            0x1
+#define YSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT           6
+#define YSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK            0x1
+#define YSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT           7
+	u8 flags2;
+#define YSTORM_RDMA_TASK_AG_CTX_BIT4_MASK             0x1
+#define YSTORM_RDMA_TASK_AG_CTX_BIT4_SHIFT            0
+#define YSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK          0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT         1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK          0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT         2
+#define YSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK          0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT         3
+#define YSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK          0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT         4
+#define YSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK          0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT         5
+#define YSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK          0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT         6
+#define YSTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK          0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT         7
+	u8 key;
+	__le32 mw_cnt;
+	u8 ref_cnt_seq;
+	u8 ctx_upd_seq;
+	__le16 dif_flags;
+	__le16 tx_ref_count;
+	__le16 last_used_ltid;
+	__le16 parent_mr_lo;
+	__le16 parent_mr_hi;
+	__le32 fbo_lo;
+	__le32 fbo_hi;
+};
+
+struct mstorm_rdma_task_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	__le16 icid;
+	u8 flags0;
+#define MSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK  0xF
+#define MSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0
+#define MSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK     0x1
+#define MSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT    4
+#define MSTORM_RDMA_TASK_AG_CTX_BIT1_MASK             0x1
+#define MSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT            5
+#define MSTORM_RDMA_TASK_AG_CTX_BIT2_MASK             0x1
+#define MSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT            6
+#define MSTORM_RDMA_TASK_AG_CTX_BIT3_MASK             0x1
+#define MSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT            7
+	u8 flags1;
+#define MSTORM_RDMA_TASK_AG_CTX_CF0_MASK              0x3
+#define MSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT             0
+#define MSTORM_RDMA_TASK_AG_CTX_CF1_MASK              0x3
+#define MSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT             2
+#define MSTORM_RDMA_TASK_AG_CTX_CF2_MASK              0x3
+#define MSTORM_RDMA_TASK_AG_CTX_CF2_SHIFT             4
+#define MSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK            0x1
+#define MSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT           6
+#define MSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK            0x1
+#define MSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT           7
+	u8 flags2;
+#define MSTORM_RDMA_TASK_AG_CTX_CF2EN_MASK            0x1
+#define MSTORM_RDMA_TASK_AG_CTX_CF2EN_SHIFT           0
+#define MSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK          0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT         1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK          0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT         2
+#define MSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK          0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT         3
+#define MSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK          0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT         4
+#define MSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK          0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT         5
+#define MSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK          0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT         6
+#define MSTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK          0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT         7
+	u8 key;
+	__le32 mw_cnt;
+	u8 ref_cnt_seq;
+	u8 ctx_upd_seq;
+	__le16 dif_flags;
+	__le16 tx_ref_count;
+	__le16 last_used_ltid;
+	__le16 parent_mr_lo;
+	__le16 parent_mr_hi;
+	__le32 fbo_lo;
+	__le32 fbo_hi;
+};
+
+struct ustorm_rdma_task_st_ctx {
+	struct regpair temp[2];
+};
+
+struct ustorm_rdma_task_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	__le16 icid;
+	u8 flags0;
+#define USTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK         0xF
+#define USTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT        0
+#define USTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK            0x1
+#define USTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT           4
+#define USTORM_RDMA_TASK_AG_CTX_DIF_RUNT_VALID_MASK          0x1
+#define USTORM_RDMA_TASK_AG_CTX_DIF_RUNT_VALID_SHIFT         5
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_MASK     0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_SHIFT    6
+	u8 flags1;
+#define USTORM_RDMA_TASK_AG_CTX_DIF_RESULT_TOGGLE_BIT_MASK   0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_RESULT_TOGGLE_BIT_SHIFT  0
+#define USTORM_RDMA_TASK_AG_CTX_DIF_TX_IO_FLG_MASK           0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_TX_IO_FLG_SHIFT          2
+#define USTORM_RDMA_TASK_AG_CTX_CF3_MASK                     0x3
+#define USTORM_RDMA_TASK_AG_CTX_CF3_SHIFT                    4
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_MASK            0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_SHIFT           6
+	u8 flags2;
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_EN_MASK  0x1
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_EN_SHIFT 0
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED2_MASK               0x1
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED2_SHIFT              1
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED3_MASK               0x1
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED3_SHIFT              2
+#define USTORM_RDMA_TASK_AG_CTX_CF3EN_MASK                   0x1
+#define USTORM_RDMA_TASK_AG_CTX_CF3EN_SHIFT                  3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK         0x1
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT        4
+#define USTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK                 0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT                5
+#define USTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK                 0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT                6
+#define USTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK                 0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT                7
+	u8 flags3;
+#define USTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK                 0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT                0
+#define USTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK                 0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT                1
+#define USTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK                 0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT                2
+#define USTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK                 0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT                3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_MASK          0xF
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT         4
+	__le32 dif_err_intervals;
+	__le32 dif_error_1st_interval;
+	__le32 reg2;
+	__le32 dif_runt_value;
+	__le32 reg4;
+	__le32 reg5;
+};
+
+struct rdma_task_context {
+	struct ystorm_rdma_task_st_ctx ystorm_st_context;
+	struct ystorm_rdma_task_ag_ctx ystorm_ag_context;
+	struct tdif_task_context tdif_context;
+	struct mstorm_rdma_task_ag_ctx mstorm_ag_context;
+	struct mstorm_rdma_task_st_ctx mstorm_st_context;
+	struct rdif_task_context rdif_context;
+	struct ustorm_rdma_task_st_ctx ustorm_st_context;
+	struct regpair ustorm_st_padding[2];
+	struct ustorm_rdma_task_ag_ctx ustorm_ag_context;
+};
+
+enum rdma_tid_type {
+	RDMA_TID_REGISTERED_MR,
+	RDMA_TID_FMR,
+	RDMA_TID_MW_TYPE1,
+	RDMA_TID_MW_TYPE2A,
+	MAX_RDMA_TID_TYPE
+};
+
+struct mstorm_rdma_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define MSTORM_RDMA_CONN_AG_CTX_BIT0_MASK     0x1
+#define MSTORM_RDMA_CONN_AG_CTX_BIT0_SHIFT    0
+#define MSTORM_RDMA_CONN_AG_CTX_BIT1_MASK     0x1
+#define MSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT    1
+#define MSTORM_RDMA_CONN_AG_CTX_CF0_MASK      0x3
+#define MSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT     2
+#define MSTORM_RDMA_CONN_AG_CTX_CF1_MASK      0x3
+#define MSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT     4
+#define MSTORM_RDMA_CONN_AG_CTX_CF2_MASK      0x3
+#define MSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define MSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK    0x1
+#define MSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT   0
+#define MSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK    0x1
+#define MSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT   1
+#define MSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK    0x1
+#define MSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT   2
+#define MSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define MSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define MSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define MSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define MSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT 7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+struct tstorm_rdma_conn_ag_ctx {
+	u8 reserved0;
+	u8 byte1;
+	u8 flags0;
+#define TSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK          0x1
+#define TSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT         0
+#define TSTORM_RDMA_CONN_AG_CTX_BIT1_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT                 1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT2_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT2_SHIFT                 2
+#define TSTORM_RDMA_CONN_AG_CTX_BIT3_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT3_SHIFT                 3
+#define TSTORM_RDMA_CONN_AG_CTX_BIT4_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT4_SHIFT                 4
+#define TSTORM_RDMA_CONN_AG_CTX_BIT5_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT5_SHIFT                 5
+#define TSTORM_RDMA_CONN_AG_CTX_CF0_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT                  6
+	u8 flags1;
+#define TSTORM_RDMA_CONN_AG_CTX_CF1_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT                  0
+#define TSTORM_RDMA_CONN_AG_CTX_CF2_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT                  2
+#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK     0x3
+#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT    4
+#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK           0x3
+#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT          6
+	u8 flags2;
+#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK       0x3
+#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT      0
+#define TSTORM_RDMA_CONN_AG_CTX_CF6_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF6_SHIFT                  2
+#define TSTORM_RDMA_CONN_AG_CTX_CF7_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF7_SHIFT                  4
+#define TSTORM_RDMA_CONN_AG_CTX_CF8_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF8_SHIFT                  6
+	u8 flags3;
+#define TSTORM_RDMA_CONN_AG_CTX_CF9_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF9_SHIFT                  0
+#define TSTORM_RDMA_CONN_AG_CTX_CF10_MASK                  0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF10_SHIFT                 2
+#define TSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT                4
+#define TSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT                5
+#define TSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT                6
+#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT 7
+	u8 flags4;
+#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK        0x1
+#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT       0
+#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK    0x1
+#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT   1
+#define TSTORM_RDMA_CONN_AG_CTX_CF6EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT                2
+#define TSTORM_RDMA_CONN_AG_CTX_CF7EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF7EN_SHIFT                3
+#define TSTORM_RDMA_CONN_AG_CTX_CF8EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF8EN_SHIFT                4
+#define TSTORM_RDMA_CONN_AG_CTX_CF9EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF9EN_SHIFT                5
+#define TSTORM_RDMA_CONN_AG_CTX_CF10EN_MASK                0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF10EN_SHIFT               6
+#define TSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT              7
+	u8 flags5;
+#define TSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT              0
+#define TSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT              1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT              2
+#define TSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT              3
+#define TSTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT              4
+#define TSTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT              5
+#define TSTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT              6
+#define TSTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT              7
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	u8 byte4;
+	u8 byte5;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le32 reg9;
+	__le32 reg10;
+};
+
+struct tstorm_rdma_task_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	__le16 word0;
+	u8 flags0;
+#define TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_MASK  0xF
+#define TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_SHIFT 0
+#define TSTORM_RDMA_TASK_AG_CTX_BIT0_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT0_SHIFT    4
+#define TSTORM_RDMA_TASK_AG_CTX_BIT1_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT    5
+#define TSTORM_RDMA_TASK_AG_CTX_BIT2_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT    6
+#define TSTORM_RDMA_TASK_AG_CTX_BIT3_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT    7
+	u8 flags1;
+#define TSTORM_RDMA_TASK_AG_CTX_BIT4_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT4_SHIFT    0
+#define TSTORM_RDMA_TASK_AG_CTX_BIT5_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT5_SHIFT    1
+#define TSTORM_RDMA_TASK_AG_CTX_CF0_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT     2
+#define TSTORM_RDMA_TASK_AG_CTX_CF1_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT     4
+#define TSTORM_RDMA_TASK_AG_CTX_CF2_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF2_SHIFT     6
+	u8 flags2;
+#define TSTORM_RDMA_TASK_AG_CTX_CF3_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF3_SHIFT     0
+#define TSTORM_RDMA_TASK_AG_CTX_CF4_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF4_SHIFT     2
+#define TSTORM_RDMA_TASK_AG_CTX_CF5_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF5_SHIFT     4
+#define TSTORM_RDMA_TASK_AG_CTX_CF6_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF6_SHIFT     6
+	u8 flags3;
+#define TSTORM_RDMA_TASK_AG_CTX_CF7_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF7_SHIFT     0
+#define TSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT   2
+#define TSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT   3
+#define TSTORM_RDMA_TASK_AG_CTX_CF2EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF2EN_SHIFT   4
+#define TSTORM_RDMA_TASK_AG_CTX_CF3EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF3EN_SHIFT   5
+#define TSTORM_RDMA_TASK_AG_CTX_CF4EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF4EN_SHIFT   6
+#define TSTORM_RDMA_TASK_AG_CTX_CF5EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF5EN_SHIFT   7
+	u8 flags4;
+#define TSTORM_RDMA_TASK_AG_CTX_CF6EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF6EN_SHIFT   0
+#define TSTORM_RDMA_TASK_AG_CTX_CF7EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF7EN_SHIFT   1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT 2
+#define TSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT 3
+#define TSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT 4
+#define TSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT 5
+#define TSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT 6
+#define TSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT 7
+	u8 byte2;
+	__le16 word1;
+	__le32 reg0;
+	u8 byte3;
+	u8 byte4;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg1;
+	__le32 reg2;
+};
+
+struct ustorm_rdma_conn_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK     0x1
+#define USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT    0
+#define USTORM_RDMA_CONN_AG_CTX_BIT1_MASK             0x1
+#define USTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT            1
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK      0x3
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT     2
+#define USTORM_RDMA_CONN_AG_CTX_CF1_MASK              0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF1_SHIFT             4
+#define USTORM_RDMA_CONN_AG_CTX_CF2_MASK              0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF2_SHIFT             6
+	u8 flags1;
+#define USTORM_RDMA_CONN_AG_CTX_CF3_MASK              0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF3_SHIFT             0
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_MASK     0x3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_SHIFT    2
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_MASK        0x3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_SHIFT       4
+#define USTORM_RDMA_CONN_AG_CTX_CF6_MASK              0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF6_SHIFT             6
+	u8 flags2;
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK   0x1
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT  0
+#define USTORM_RDMA_CONN_AG_CTX_CF1EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT           1
+#define USTORM_RDMA_CONN_AG_CTX_CF2EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT           2
+#define USTORM_RDMA_CONN_AG_CTX_CF3EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF3EN_SHIFT           3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_MASK  0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_SHIFT 4
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_MASK     0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_SHIFT    5
+#define USTORM_RDMA_CONN_AG_CTX_CF6EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT           6
+#define USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_MASK         0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_SHIFT        7
+	u8 flags3;
+#define USTORM_RDMA_CONN_AG_CTX_CQ_EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_EN_SHIFT           0
+#define USTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT         1
+#define USTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT         2
+#define USTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT         3
+#define USTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT         4
+#define USTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT         5
+#define USTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT         6
+#define USTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT         7
+	u8 byte2;
+	u8 byte3;
+	__le16 conn_dpi;
+	__le16 word1;
+	__le32 cq_cons;
+	__le32 cq_se_prod;
+	__le32 cq_prod;
+	__le32 reg3;
+	__le16 int_timeout;
+	__le16 word3;
+};
+
+struct xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM0_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM0_SHIFT     0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT1_MASK              0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT1_SHIFT             1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT2_MASK              0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT2_SHIFT             2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM3_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM3_SHIFT     3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT4_MASK              0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT4_SHIFT             4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT5_MASK              0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT5_SHIFT             5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT6_MASK              0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT6_SHIFT             6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT7_MASK              0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT7_SHIFT             7
+	u8 flags1;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT8_MASK              0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT8_SHIFT             0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT9_MASK              0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT9_SHIFT             1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT10_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT10_SHIFT            2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_SHIFT            3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_SHIFT            4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT13_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT13_SHIFT            5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT14_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT14_SHIFT            6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_SHIFT     7
+	u8 flags2;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF0_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF0_SHIFT              0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF1_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF1_SHIFT              2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF2_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF2_SHIFT              4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF3_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF3_SHIFT              6
+	u8 flags3;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF4_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF4_SHIFT              0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF5_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF5_SHIFT              2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF6_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF6_SHIFT              4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_MASK       0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_SHIFT      6
+	u8 flags4;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF8_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF8_SHIFT              0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF9_MASK               0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF9_SHIFT              2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF10_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF10_SHIFT             4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF11_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF11_SHIFT             6
+	u8 flags5;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF12_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF12_SHIFT             0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF13_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF13_SHIFT             2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF14_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF14_SHIFT             4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF15_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF15_SHIFT             6
+	u8 flags6;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF16_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF16_SHIFT             0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF17_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF17_SHIFT             2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF18_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF18_SHIFT             4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF19_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF19_SHIFT             6
+	u8 flags7;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF20_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF20_SHIFT             0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF21_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF21_SHIFT             2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_MASK         0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_SHIFT        4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF0EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF0EN_SHIFT            6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF1EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF1EN_SHIFT            7
+	u8 flags8;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF2EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF2EN_SHIFT            0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF3EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF3EN_SHIFT            1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF4EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF4EN_SHIFT            2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF5EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF5EN_SHIFT            3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF6EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF6EN_SHIFT            4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_EN_MASK    0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_EN_SHIFT   5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF8EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF8EN_SHIFT            6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF9EN_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF9EN_SHIFT            7
+	u8 flags9;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF10EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF10EN_SHIFT           0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF11EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF11EN_SHIFT           1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF12EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF12EN_SHIFT           2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF13EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF13EN_SHIFT           3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF14EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF14EN_SHIFT           4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF15EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF15EN_SHIFT           5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF16EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF16EN_SHIFT           6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF17EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF17EN_SHIFT           7
+	u8 flags10;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF18EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF18EN_SHIFT           0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF19EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF19EN_SHIFT           1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF20EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF20EN_SHIFT           2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF21EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF21EN_SHIFT           3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_EN_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_EN_SHIFT     4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF23EN_MASK            0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF23EN_SHIFT           5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE0EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE0EN_SHIFT          6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE1EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE1EN_SHIFT          7
+	u8 flags11;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE2EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE2EN_SHIFT          0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE3EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE3EN_SHIFT          1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE4EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE4EN_SHIFT          2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE5EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE5EN_SHIFT          3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE6EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE6EN_SHIFT          4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE7EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE7EN_SHIFT          5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED1_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED1_SHIFT     6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE9EN_MASK           0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE9EN_SHIFT          7
+	u8 flags12;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE10EN_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE10EN_SHIFT         0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE11EN_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE11EN_SHIFT         1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED2_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED2_SHIFT     2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED3_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED3_SHIFT     3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE14EN_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE14EN_SHIFT         4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE15EN_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE15EN_SHIFT         5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE16EN_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE16EN_SHIFT         6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE17EN_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE17EN_SHIFT         7
+	u8 flags13;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE18EN_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE18EN_SHIFT         0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE19EN_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RULE19EN_SHIFT         1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED4_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED4_SHIFT     2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED5_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED5_SHIFT     3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED6_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED6_SHIFT     4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED7_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED7_SHIFT     5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED8_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED8_SHIFT     6
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED9_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED9_SHIFT     7
+	u8 flags14;
+#define XSTORMROCECONNAGCTXDQEXTLDPART_MIGRATION_MASK         0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_MIGRATION_SHIFT        0
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT17_MASK             0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT17_SHIFT            1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_DPM_PORT_NUM_MASK      0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_DPM_PORT_NUM_SHIFT     2
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RESERVED_MASK          0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_RESERVED_SHIFT         4
+#define XSTORMROCECONNAGCTXDQEXTLDPART_ROCE_EDPM_ENABLE_MASK  0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_ROCE_EDPM_ENABLE_SHIFT 5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF23_MASK              0x3
+#define XSTORMROCECONNAGCTXDQEXTLDPART_CF23_SHIFT             6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le16 word5;
+	__le16 conn_dpi;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 snd_nxt_psn;
+	__le32 reg4;
+};
+
+struct xstorm_rdma_conn_ag_ctx {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define XSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT     0
+#define XSTORM_RDMA_CONN_AG_CTX_BIT1_MASK              0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT             1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT2_MASK              0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT2_SHIFT             2
+#define XSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM3_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM3_SHIFT     3
+#define XSTORM_RDMA_CONN_AG_CTX_BIT4_MASK              0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT4_SHIFT             4
+#define XSTORM_RDMA_CONN_AG_CTX_BIT5_MASK              0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT5_SHIFT             5
+#define XSTORM_RDMA_CONN_AG_CTX_BIT6_MASK              0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT6_SHIFT             6
+#define XSTORM_RDMA_CONN_AG_CTX_BIT7_MASK              0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT7_SHIFT             7
+	u8 flags1;
+#define XSTORM_RDMA_CONN_AG_CTX_BIT8_MASK              0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT8_SHIFT             0
+#define XSTORM_RDMA_CONN_AG_CTX_BIT9_MASK              0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT9_SHIFT             1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT10_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT10_SHIFT            2
+#define XSTORM_RDMA_CONN_AG_CTX_BIT11_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT11_SHIFT            3
+#define XSTORM_RDMA_CONN_AG_CTX_BIT12_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT12_SHIFT            4
+#define XSTORM_RDMA_CONN_AG_CTX_BIT13_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT13_SHIFT            5
+#define XSTORM_RDMA_CONN_AG_CTX_BIT14_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT14_SHIFT            6
+#define XSTORM_RDMA_CONN_AG_CTX_YSTORM_FLUSH_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_YSTORM_FLUSH_SHIFT     7
+	u8 flags2;
+#define XSTORM_RDMA_CONN_AG_CTX_CF0_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT              0
+#define XSTORM_RDMA_CONN_AG_CTX_CF1_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT              2
+#define XSTORM_RDMA_CONN_AG_CTX_CF2_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT              4
+#define XSTORM_RDMA_CONN_AG_CTX_CF3_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF3_SHIFT              6
+	u8 flags3;
+#define XSTORM_RDMA_CONN_AG_CTX_CF4_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF4_SHIFT              0
+#define XSTORM_RDMA_CONN_AG_CTX_CF5_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF5_SHIFT              2
+#define XSTORM_RDMA_CONN_AG_CTX_CF6_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF6_SHIFT              4
+#define XSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK       0x3
+#define XSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT      6
+	u8 flags4;
+#define XSTORM_RDMA_CONN_AG_CTX_CF8_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF8_SHIFT              0
+#define XSTORM_RDMA_CONN_AG_CTX_CF9_MASK               0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF9_SHIFT              2
+#define XSTORM_RDMA_CONN_AG_CTX_CF10_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF10_SHIFT             4
+#define XSTORM_RDMA_CONN_AG_CTX_CF11_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF11_SHIFT             6
+	u8 flags5;
+#define XSTORM_RDMA_CONN_AG_CTX_CF12_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF12_SHIFT             0
+#define XSTORM_RDMA_CONN_AG_CTX_CF13_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF13_SHIFT             2
+#define XSTORM_RDMA_CONN_AG_CTX_CF14_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF14_SHIFT             4
+#define XSTORM_RDMA_CONN_AG_CTX_CF15_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF15_SHIFT             6
+	u8 flags6;
+#define XSTORM_RDMA_CONN_AG_CTX_CF16_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF16_SHIFT             0
+#define XSTORM_RDMA_CONN_AG_CTX_CF17_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF17_SHIFT             2
+#define XSTORM_RDMA_CONN_AG_CTX_CF18_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF18_SHIFT             4
+#define XSTORM_RDMA_CONN_AG_CTX_CF19_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF19_SHIFT             6
+	u8 flags7;
+#define XSTORM_RDMA_CONN_AG_CTX_CF20_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF20_SHIFT             0
+#define XSTORM_RDMA_CONN_AG_CTX_CF21_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF21_SHIFT             2
+#define XSTORM_RDMA_CONN_AG_CTX_SLOW_PATH_MASK         0x3
+#define XSTORM_RDMA_CONN_AG_CTX_SLOW_PATH_SHIFT        4
+#define XSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT            6
+#define XSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT            7
+	u8 flags8;
+#define XSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT            0
+#define XSTORM_RDMA_CONN_AG_CTX_CF3EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF3EN_SHIFT            1
+#define XSTORM_RDMA_CONN_AG_CTX_CF4EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF4EN_SHIFT            2
+#define XSTORM_RDMA_CONN_AG_CTX_CF5EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF5EN_SHIFT            3
+#define XSTORM_RDMA_CONN_AG_CTX_CF6EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT            4
+#define XSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK    0x1
+#define XSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT   5
+#define XSTORM_RDMA_CONN_AG_CTX_CF8EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF8EN_SHIFT            6
+#define XSTORM_RDMA_CONN_AG_CTX_CF9EN_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF9EN_SHIFT            7
+	u8 flags9;
+#define XSTORM_RDMA_CONN_AG_CTX_CF10EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF10EN_SHIFT           0
+#define XSTORM_RDMA_CONN_AG_CTX_CF11EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF11EN_SHIFT           1
+#define XSTORM_RDMA_CONN_AG_CTX_CF12EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF12EN_SHIFT           2
+#define XSTORM_RDMA_CONN_AG_CTX_CF13EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF13EN_SHIFT           3
+#define XSTORM_RDMA_CONN_AG_CTX_CF14EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF14EN_SHIFT           4
+#define XSTORM_RDMA_CONN_AG_CTX_CF15EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF15EN_SHIFT           5
+#define XSTORM_RDMA_CONN_AG_CTX_CF16EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF16EN_SHIFT           6
+#define XSTORM_RDMA_CONN_AG_CTX_CF17EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF17EN_SHIFT           7
+	u8 flags10;
+#define XSTORM_RDMA_CONN_AG_CTX_CF18EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF18EN_SHIFT           0
+#define XSTORM_RDMA_CONN_AG_CTX_CF19EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF19EN_SHIFT           1
+#define XSTORM_RDMA_CONN_AG_CTX_CF20EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF20EN_SHIFT           2
+#define XSTORM_RDMA_CONN_AG_CTX_CF21EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF21EN_SHIFT           3
+#define XSTORM_RDMA_CONN_AG_CTX_SLOW_PATH_EN_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_SLOW_PATH_EN_SHIFT     4
+#define XSTORM_RDMA_CONN_AG_CTX_CF23EN_MASK            0x1
+#define XSTORM_RDMA_CONN_AG_CTX_CF23EN_SHIFT           5
+#define XSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT          6
+#define XSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT          7
+	u8 flags11;
+#define XSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT          0
+#define XSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT          1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT          2
+#define XSTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT          3
+#define XSTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT          4
+#define XSTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT          5
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED1_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED1_SHIFT     6
+#define XSTORM_RDMA_CONN_AG_CTX_RULE9EN_MASK           0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE9EN_SHIFT          7
+	u8 flags12;
+#define XSTORM_RDMA_CONN_AG_CTX_RULE10EN_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE10EN_SHIFT         0
+#define XSTORM_RDMA_CONN_AG_CTX_RULE11EN_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE11EN_SHIFT         1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED2_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED2_SHIFT     2
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED3_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED3_SHIFT     3
+#define XSTORM_RDMA_CONN_AG_CTX_RULE14EN_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE14EN_SHIFT         4
+#define XSTORM_RDMA_CONN_AG_CTX_RULE15EN_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE15EN_SHIFT         5
+#define XSTORM_RDMA_CONN_AG_CTX_RULE16EN_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE16EN_SHIFT         6
+#define XSTORM_RDMA_CONN_AG_CTX_RULE17EN_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE17EN_SHIFT         7
+	u8 flags13;
+#define XSTORM_RDMA_CONN_AG_CTX_RULE18EN_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE18EN_SHIFT         0
+#define XSTORM_RDMA_CONN_AG_CTX_RULE19EN_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RULE19EN_SHIFT         1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED4_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED4_SHIFT     2
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED5_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED5_SHIFT     3
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED6_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED6_SHIFT     4
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED7_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED7_SHIFT     5
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED8_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED8_SHIFT     6
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED9_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED9_SHIFT     7
+	u8 flags14;
+#define XSTORM_RDMA_CONN_AG_CTX_MIGRATION_MASK         0x1
+#define XSTORM_RDMA_CONN_AG_CTX_MIGRATION_SHIFT        0
+#define XSTORM_RDMA_CONN_AG_CTX_BIT17_MASK             0x1
+#define XSTORM_RDMA_CONN_AG_CTX_BIT17_SHIFT            1
+#define XSTORM_RDMA_CONN_AG_CTX_DPM_PORT_NUM_MASK      0x3
+#define XSTORM_RDMA_CONN_AG_CTX_DPM_PORT_NUM_SHIFT     2
+#define XSTORM_RDMA_CONN_AG_CTX_RESERVED_MASK          0x1
+#define XSTORM_RDMA_CONN_AG_CTX_RESERVED_SHIFT         4
+#define XSTORM_RDMA_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK  0x1
+#define XSTORM_RDMA_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT 5
+#define XSTORM_RDMA_CONN_AG_CTX_CF23_MASK              0x3
+#define XSTORM_RDMA_CONN_AG_CTX_CF23_SHIFT             6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le16 word5;
+	__le16 conn_dpi;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 snd_nxt_psn;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+};
+
+struct ystorm_rdma_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define YSTORM_RDMA_CONN_AG_CTX_BIT0_MASK     0x1
+#define YSTORM_RDMA_CONN_AG_CTX_BIT0_SHIFT    0
+#define YSTORM_RDMA_CONN_AG_CTX_BIT1_MASK     0x1
+#define YSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT    1
+#define YSTORM_RDMA_CONN_AG_CTX_CF0_MASK      0x3
+#define YSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT     2
+#define YSTORM_RDMA_CONN_AG_CTX_CF1_MASK      0x3
+#define YSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT     4
+#define YSTORM_RDMA_CONN_AG_CTX_CF2_MASK      0x3
+#define YSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define YSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK    0x1
+#define YSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT   0
+#define YSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK    0x1
+#define YSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT   1
+#define YSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK    0x1
+#define YSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT   2
+#define YSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define YSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define YSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define YSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define YSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define YSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define YSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define YSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define YSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define YSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le32 reg0;
+	__le32 reg1;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+
+struct mstorm_roce_conn_st_ctx {
+	struct regpair temp[6];
+};
+
+struct pstorm_roce_conn_st_ctx {
+	struct regpair temp[16];
+};
+
+struct ystorm_roce_conn_st_ctx {
+	struct regpair temp[2];
+};
+
+struct xstorm_roce_conn_st_ctx {
+	struct regpair temp[22];
+};
+
+struct tstorm_roce_conn_st_ctx {
+	struct regpair temp[30];
+};
+
+struct ustorm_roce_conn_st_ctx {
+	struct regpair temp[12];
+};
+
+struct roce_conn_context {
+	struct ystorm_roce_conn_st_ctx ystorm_st_context;
+	struct regpair ystorm_st_padding[2];
+	struct pstorm_roce_conn_st_ctx pstorm_st_context;
+	struct xstorm_roce_conn_st_ctx xstorm_st_context;
+	struct regpair xstorm_st_padding[2];
+	struct xstorm_rdma_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_rdma_conn_ag_ctx tstorm_ag_context;
+	struct timers_context timer_context;
+	struct ustorm_rdma_conn_ag_ctx ustorm_ag_context;
+	struct tstorm_roce_conn_st_ctx tstorm_st_context;
+	struct mstorm_roce_conn_st_ctx mstorm_st_context;
+	struct ustorm_roce_conn_st_ctx ustorm_st_context;
+	struct regpair ustorm_st_padding[2];
+};
+
+struct roce_create_qp_req_ramrod_data {
+	__le16 flags;
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_ROCE_FLAVOR_MASK          0x3
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_ROCE_FLAVOR_SHIFT         0
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_FMR_AND_RESERVED_EN_MASK  0x1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_FMR_AND_RESERVED_EN_SHIFT 2
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP_MASK        0x1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP_SHIFT       3
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_PRI_MASK                  0x7
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_PRI_SHIFT                 4
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_MASK             0x1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_SHIFT            7
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_MASK        0xF
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_SHIFT       8
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_MASK          0xF
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_SHIFT         12
+	u8 max_ord;
+	u8 traffic_class;
+	u8 hop_limit;
+	u8 orq_num_pages;
+	__le16 p_key;
+	__le32 flow_label;
+	__le32 dst_qp_id;
+	__le32 ack_timeout_val;
+	__le32 initial_psn;
+	__le16 mtu;
+	__le16 pd;
+	__le16 sq_num_pages;
+	__le16 reseved2;
+	struct regpair sq_pbl_addr;
+	struct regpair orq_pbl_addr;
+	__le16 local_mac_addr[3];
+	__le16 remote_mac_addr[3];
+	__le16 vlan_id;
+	__le16 udp_src_port;
+	__le32 src_gid[4];
+	__le32 dst_gid[4];
+	struct regpair qp_handle_for_cqe;
+	struct regpair qp_handle_for_async;
+	u8 stats_counter_id;
+	u8 reserved3[7];
+	__le32 cq_cid;
+	__le16 physical_queue0;
+	__le16 dpi;
+};
+
+struct roce_create_qp_resp_ramrod_data {
+	__le16 flags;
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR_MASK          0x3
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR_SHIFT         0
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN_MASK           0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN_SHIFT          2
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_WR_EN_MASK           0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_WR_EN_SHIFT          3
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_ATOMIC_EN_MASK            0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_ATOMIC_EN_SHIFT           4
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG_MASK              0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG_SHIFT             5
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_MASK  0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_SHIFT 6
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_MASK            0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_SHIFT           7
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_MASK                  0x7
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_SHIFT                 8
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_MASK    0x1F
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_SHIFT   11
+	u8 max_ird;
+	u8 traffic_class;
+	u8 hop_limit;
+	u8 irq_num_pages;
+	__le16 p_key;
+	__le32 flow_label;
+	__le32 dst_qp_id;
+	u8 stats_counter_id;
+	u8 reserved1;
+	__le16 mtu;
+	__le32 initial_psn;
+	__le16 pd;
+	__le16 rq_num_pages;
+	struct rdma_srq_id srq_id;
+	struct regpair rq_pbl_addr;
+	struct regpair irq_pbl_addr;
+	__le16 local_mac_addr[3];
+	__le16 remote_mac_addr[3];
+	__le16 vlan_id;
+	__le16 udp_src_port;
+	__le32 src_gid[4];
+	__le32 dst_gid[4];
+	struct regpair qp_handle_for_cqe;
+	struct regpair qp_handle_for_async;
+	__le32 reserved2[2];
+	__le32 cq_cid;
+	__le16 physical_queue0;
+	__le16 dpi;
+};
+
+struct roce_destroy_qp_req_output_params {
+	__le32 num_bound_mw;
+	__le32 reserved;
+};
+
+struct roce_destroy_qp_req_ramrod_data {
+	struct regpair output_params_addr;
+};
+
+struct roce_destroy_qp_resp_output_params {
+	__le32 num_invalidated_mw;
+	__le32 reserved;
+};
+
+struct roce_destroy_qp_resp_ramrod_data {
+	struct regpair output_params_addr;
+};
+
+enum roce_event_opcode {
+	ROCE_EVENT_CREATE_QP = 11,
+	ROCE_EVENT_MODIFY_QP,
+	ROCE_EVENT_QUERY_QP,
+	ROCE_EVENT_DESTROY_QP,
+	MAX_ROCE_EVENT_OPCODE
+};
+
+struct roce_modify_qp_req_ramrod_data {
+	__le16 flags;
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG_MASK      0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG_SHIFT     0
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_SQD_FLG_MASK      0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_SQD_FLG_SHIFT     1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_EN_SQD_ASYNC_NOTIFY_MASK  0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_EN_SQD_ASYNC_NOTIFY_SHIFT 2
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_P_KEY_FLG_MASK            0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_P_KEY_FLG_SHIFT           3
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ADDRESS_VECTOR_FLG_MASK   0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ADDRESS_VECTOR_FLG_SHIFT  4
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MAX_ORD_FLG_MASK          0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MAX_ORD_FLG_SHIFT         5
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_FLG_MASK      0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_FLG_SHIFT     6
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_FLG_MASK    0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_FLG_SHIFT   7
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ACK_TIMEOUT_FLG_MASK      0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ACK_TIMEOUT_FLG_SHIFT     8
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_FLG_MASK              0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_FLG_SHIFT             9
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_MASK                  0x7
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_SHIFT                 10
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RESERVED1_MASK            0x7
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RESERVED1_SHIFT           13
+	u8 fields;
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_MASK        0xF
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_SHIFT       0
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_MASK          0xF
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_SHIFT         4
+	u8 max_ord;
+	u8 traffic_class;
+	u8 hop_limit;
+	__le16 p_key;
+	__le32 flow_label;
+	__le32 ack_timeout_val;
+	__le16 mtu;
+	__le16 reserved2;
+	__le32 reserved3[3];
+	__le32 src_gid[4];
+	__le32 dst_gid[4];
+};
+
+struct roce_modify_qp_resp_ramrod_data {
+	__le16 flags;
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MOVE_TO_ERR_FLG_MASK        0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MOVE_TO_ERR_FLG_SHIFT       0
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_RD_EN_MASK             0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_RD_EN_SHIFT            1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_WR_EN_MASK             0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_WR_EN_SHIFT            2
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_ATOMIC_EN_MASK              0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_ATOMIC_EN_SHIFT             3
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_P_KEY_FLG_MASK              0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_P_KEY_FLG_SHIFT             4
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_ADDRESS_VECTOR_FLG_MASK     0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_ADDRESS_VECTOR_FLG_SHIFT    5
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MAX_IRD_FLG_MASK            0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MAX_IRD_FLG_SHIFT           6
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_FLG_MASK                0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_FLG_SHIFT               7
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG_MASK  0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG_SHIFT 8
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG_MASK        0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG_SHIFT       9
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RESERVED1_MASK              0x3F
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RESERVED1_SHIFT             10
+	u8 fields;
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_MASK                    0x7
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_SHIFT                   0
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_MASK      0x1F
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_SHIFT     3
+	u8 max_ird;
+	u8 traffic_class;
+	u8 hop_limit;
+	__le16 p_key;
+	__le32 flow_label;
+	__le16 mtu;
+	__le16 reserved2;
+	__le32 src_gid[4];
+	__le32 dst_gid[4];
+};
+
+struct roce_query_qp_req_output_params {
+	__le32 psn;
+	__le32 flags;
+#define ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG_MASK          0x1
+#define ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG_SHIFT         0
+#define ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG_MASK  0x1
+#define ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG_SHIFT 1
+#define ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_RESERVED0_MASK        0x3FFFFFFF
+#define ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_RESERVED0_SHIFT       2
+};
+
+struct roce_query_qp_req_ramrod_data {
+	struct regpair output_params_addr;
+};
+
+struct roce_query_qp_resp_output_params {
+	__le32 psn;
+	__le32 err_flag;
+#define ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG_MASK  0x1
+#define ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG_SHIFT 0
+#define ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_RESERVED0_MASK  0x7FFFFFFF
+#define ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_RESERVED0_SHIFT 1
+};
+
+struct roce_query_qp_resp_ramrod_data {
+	struct regpair output_params_addr;
+};
+
+enum roce_ramrod_cmd_id {
+	ROCE_RAMROD_CREATE_QP = 11,
+	ROCE_RAMROD_MODIFY_QP,
+	ROCE_RAMROD_QUERY_QP,
+	ROCE_RAMROD_DESTROY_QP,
+	MAX_ROCE_RAMROD_CMD_ID
+};
+
+struct mstorm_roce_req_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK     0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT    0
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK     0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT    1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK      0x3
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT     2
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK      0x3
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT     4
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK      0x3
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK    0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT   0
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK    0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT   1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK    0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT   2
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT 7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+struct mstorm_roce_resp_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK     0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT    0
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK     0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT    1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK      0x3
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT     2
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK      0x3
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT     4
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK      0x3
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK    0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT   0
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK    0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT   1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK    0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT   2
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT 7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+enum roce_flavor {
+	PLAIN_ROCE /* RoCE v1 */ ,
+	RROCE_IPV4 /* RoCE v2 (Routable RoCE) over ipv4 */ ,
+	RROCE_IPV6 /* RoCE v2 (Routable RoCE) over ipv6 */ ,
+	MAX_ROCE_FLAVOR
+};
+
+struct tstorm_roce_req_conn_ag_ctx {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_MASK                0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_SHIFT               0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_OCCURED_MASK            0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_OCCURED_SHIFT           1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_CQE_ERROR_OCCURED_MASK        0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_CQE_ERROR_OCCURED_SHIFT       2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_BIT3_MASK                        0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_BIT3_SHIFT                       3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_MASK                0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_SHIFT               4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_CACHED_ORQ_MASK                  0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_CACHED_ORQ_SHIFT                 5
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_MASK                    0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_SHIFT                   6
+	u8 flags1;
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK                         0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT                        0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_MASK                 0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_SHIFT                2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK           0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT          4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_MASK                 0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT                6
+	u8 flags2;
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK             0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT            0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_MASK                0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_SHIFT               2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_MASK           0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_SHIFT          4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_MASK               0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_SHIFT              6
+	u8 flags3;
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_MASK     0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_SHIFT    0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_MASK       0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_SHIFT      2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_EN_MASK                 0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_EN_SHIFT                4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK                       0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT                      5
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_EN_MASK              0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_EN_SHIFT             6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK        0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT       7
+	u8 flags4;
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK              0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT             0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK          0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT         1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_EN_MASK             0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_EN_SHIFT            2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_EN_MASK        0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_EN_SHIFT       3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_EN_MASK            0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_EN_SHIFT           4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_EN_MASK  0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_EN_SHIFT 5
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_EN_MASK    0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_EN_SHIFT   6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK                     0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT                    7
+	u8 flags5;
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK                     0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT                    0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK                     0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT                    1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK                     0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT                    2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK                     0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT                    3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK                     0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT                    4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SND_SQ_CONS_EN_MASK              0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SND_SQ_CONS_EN_SHIFT             5
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_MASK                     0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_SHIFT                    6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_MASK                     0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_SHIFT                    7
+	__le32 reg0;
+	__le32 snd_nxt_psn;
+	__le32 snd_max_psn;
+	__le32 orq_prod;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 tx_cqe_error_type;
+	u8 orq_cache_idx;
+	__le16 snd_sq_cons_th;
+	u8 byte4;
+	u8 byte5;
+	__le16 snd_sq_cons;
+	__le16 word2;
+	__le16 word3;
+	__le32 reg9;
+	__le32 reg10;
+};
+
+struct tstorm_roce_resp_conn_ag_ctx {
+	u8 byte0;
+	u8 state;
+	u8 flags0;
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_MASK        0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT       0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK                0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT               1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT2_MASK                0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT2_SHIFT               2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT3_MASK                0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT3_SHIFT               3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_MASK        0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_SHIFT       4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT5_MASK                0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT5_SHIFT               5
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK                 0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT                6
+	u8 flags1;
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_MASK         0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_SHIFT        0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_MASK         0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_SHIFT        2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK                 0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT                4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_MASK         0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT        6
+	u8 flags2;
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK     0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT    0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF6_MASK                 0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF6_SHIFT                2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF7_MASK                 0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF7_SHIFT                4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF8_MASK                 0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF8_SHIFT                6
+	u8 flags3;
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF9_MASK                 0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF9_SHIFT                0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF10_MASK                0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF10_SHIFT               2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK               0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT              4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_MASK      0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT     5
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_EN_MASK      0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_EN_SHIFT     6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK               0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT              7
+	u8 flags4;
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK      0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT     0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK  0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT 1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_MASK               0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_SHIFT              2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF7EN_MASK               0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF7EN_SHIFT              3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_MASK               0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_SHIFT              4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_MASK               0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_SHIFT              5
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_MASK              0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_SHIFT             6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT            7
+	u8 flags5;
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT            0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT            1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT            2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT            3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT            4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RQ_RULE_EN_MASK          0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RQ_RULE_EN_SHIFT         5
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT            6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_SHIFT            7
+	__le32 psn_and_rxmit_id_echo;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 tx_async_error_type;
+	u8 byte3;
+	__le16 rq_cons;
+	u8 byte4;
+	u8 byte5;
+	__le16 rq_prod;
+	__le16 conn_dpi;
+	__le16 irq_cons;
+	__le32 num_invlidated_mw;
+	__le32 reg10;
+};
+
+struct ustorm_roce_req_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK     0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT    0
+#define USTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK     0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT    1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK      0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT     2
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK      0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT     4
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK      0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF3_MASK      0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF3_SHIFT     0
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF4_MASK      0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF4_SHIFT     2
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF5_MASK      0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF5_SHIFT     4
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF6_MASK      0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF6_SHIFT     6
+	u8 flags2;
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK    0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT   0
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK    0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT   1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK    0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT   2
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_MASK    0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_SHIFT   3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF4EN_MASK    0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF4EN_SHIFT   4
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF5EN_MASK    0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF5EN_SHIFT   5
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF6EN_MASK    0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF6EN_SHIFT   6
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT 7
+	u8 flags3;
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT 0
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT 1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT 2
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT 3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT 4
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_SHIFT 5
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_SHIFT 6
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_MASK  0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le16 word2;
+	__le16 word3;
+};
+
+struct ustorm_roce_resp_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK     0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT    0
+#define USTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK     0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT    1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK      0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT     2
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK      0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT     4
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK      0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK      0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT     0
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF4_MASK      0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF4_SHIFT     2
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF5_MASK      0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF5_SHIFT     4
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF6_MASK      0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF6_SHIFT     6
+	u8 flags2;
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK    0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT   0
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK    0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT   1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK    0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT   2
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK    0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT   3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF4EN_MASK    0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF4EN_SHIFT   4
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF5EN_MASK    0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF5EN_SHIFT   5
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_MASK    0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_SHIFT   6
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT 7
+	u8 flags3;
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT 0
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT 1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT 2
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT 3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT 4
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_SHIFT 5
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT 6
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_MASK  0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le16 word2;
+	__le16 word3;
+};
+
+struct xstorm_roce_req_conn_ag_ctx {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_SHIFT       0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED1_MASK           0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED1_SHIFT          1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED2_MASK           0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED2_SHIFT          2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM3_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM3_SHIFT       3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED3_MASK           0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED3_SHIFT          4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED4_MASK           0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED4_SHIFT          5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED5_MASK           0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED5_SHIFT          6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED6_MASK           0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED6_SHIFT          7
+	u8 flags1;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED7_MASK           0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED7_SHIFT          0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED8_MASK           0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED8_SHIFT          1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT10_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT10_SHIFT              2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT11_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT11_SHIFT              3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT12_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT12_SHIFT              4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT13_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT13_SHIFT              5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_STATE_MASK         0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_STATE_SHIFT        6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_YSTORM_FLUSH_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_YSTORM_FLUSH_SHIFT       7
+	u8 flags2;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK                 0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT                0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK                 0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT                2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK                 0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT                4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF3_MASK                 0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF3_SHIFT                6
+	u8 flags3;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_MASK         0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_SHIFT        0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_MASK         0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_SHIFT        2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_MASK        0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_SHIFT       4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_MASK         0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT        6
+	u8 flags4;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF8_MASK                 0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF8_SHIFT                0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF9_MASK                 0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF9_SHIFT                2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF10_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF10_SHIFT               4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF11_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF11_SHIFT               6
+	u8 flags5;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF12_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF12_SHIFT               0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF13_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF13_SHIFT               2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FMR_ENDED_CF_MASK        0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FMR_ENDED_CF_SHIFT       4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF15_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF15_SHIFT               6
+	u8 flags6;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF16_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF16_SHIFT               0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF17_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF17_SHIFT               2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF18_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF18_SHIFT               4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF19_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF19_SHIFT               6
+	u8 flags7;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF20_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF20_SHIFT               0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF21_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF21_SHIFT               2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_MASK           0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_SHIFT          4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT              6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT              7
+	u8 flags8;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT              0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_SHIFT              1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_EN_MASK      0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_EN_SHIFT     2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_EN_MASK      0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT     3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_EN_MASK     0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_EN_SHIFT    4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK      0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT     5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF8EN_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF8EN_SHIFT              6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF9EN_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF9EN_SHIFT              7
+	u8 flags9;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF10EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF10EN_SHIFT             0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF11EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF11EN_SHIFT             1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF12EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF12EN_SHIFT             2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF13EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF13EN_SHIFT             3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FME_ENDED_CF_EN_MASK     0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FME_ENDED_CF_EN_SHIFT    4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF15EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF15EN_SHIFT             5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF16EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF16EN_SHIFT             6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF17EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF17EN_SHIFT             7
+	u8 flags10;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF18EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF18EN_SHIFT             0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF19EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF19EN_SHIFT             1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF20EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF20EN_SHIFT             2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF21EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF21EN_SHIFT             3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_EN_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_EN_SHIFT       4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF23EN_MASK              0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF23EN_SHIFT             5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK             0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT            6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK             0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT            7
+	u8 flags11;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK             0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT            0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK             0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT            1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK             0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT            2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK             0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT            3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_MASK             0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_SHIFT            4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_E2E_CREDIT_RULE_EN_MASK  0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_E2E_CREDIT_RULE_EN_SHIFT 5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED1_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED1_SHIFT       6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE9EN_MASK             0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE9EN_SHIFT            7
+	u8 flags12;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_PROD_EN_MASK          0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_PROD_EN_SHIFT         0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE11EN_MASK            0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE11EN_SHIFT           1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED2_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED2_SHIFT       2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED3_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED3_SHIFT       3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_INV_FENCE_RULE_EN_MASK   0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_INV_FENCE_RULE_EN_SHIFT  4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE15EN_MASK            0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE15EN_SHIFT           5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ORQ_FENCE_RULE_EN_MASK   0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ORQ_FENCE_RULE_EN_SHIFT  6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MAX_ORD_RULE_EN_MASK     0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MAX_ORD_RULE_EN_SHIFT    7
+	u8 flags13;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE18EN_MASK            0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE18EN_SHIFT           0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE19EN_MASK            0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE19EN_SHIFT           1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED4_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED4_SHIFT       2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED5_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED5_SHIFT       3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED6_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED6_SHIFT       4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED7_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED7_SHIFT       5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED8_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED8_SHIFT       6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED9_MASK        0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED9_SHIFT       7
+	u8 flags14;
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MIGRATION_FLAG_MASK      0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MIGRATION_FLAG_SHIFT     0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT17_MASK               0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT17_SHIFT              1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_DPM_PORT_NUM_MASK        0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_DPM_PORT_NUM_SHIFT       2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED_MASK            0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED_SHIFT           4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK    0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT   5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF23_MASK                0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF23_SHIFT               6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 word1;
+	__le16 sq_cmp_cons;
+	__le16 sq_cons;
+	__le16 sq_prod;
+	__le16 word5;
+	__le16 conn_dpi;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 lsn;
+	__le32 ssn;
+	__le32 snd_una_psn;
+	__le32 snd_nxt_psn;
+	__le32 reg4;
+	__le32 orq_cons_th;
+	__le32 orq_cons;
+};
+
+struct xstorm_roce_resp_conn_ag_ctx {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT     0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED1_MASK         0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED1_SHIFT        1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED2_MASK         0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED2_SHIFT        2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM3_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM3_SHIFT     3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED3_MASK         0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED3_SHIFT        4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED4_MASK         0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED4_SHIFT        5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED5_MASK         0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED5_SHIFT        6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED6_MASK         0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED6_SHIFT        7
+	u8 flags1;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED7_MASK         0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED7_SHIFT        0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED8_MASK         0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED8_SHIFT        1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT10_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT10_SHIFT            2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT11_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT11_SHIFT            3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT12_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT12_SHIFT            4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT13_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT13_SHIFT            5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_ERROR_STATE_MASK       0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_ERROR_STATE_SHIFT      6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_YSTORM_FLUSH_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_YSTORM_FLUSH_SHIFT     7
+	u8 flags2;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK               0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT              0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK               0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT              2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK               0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT              4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK               0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT              6
+	u8 flags3;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_MASK          0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_SHIFT         0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_MASK       0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_SHIFT      2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_MASK      0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_SHIFT     4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_MASK       0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT      6
+	u8 flags4;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF8_MASK               0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF8_SHIFT              0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF9_MASK               0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF9_SHIFT              2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF10_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF10_SHIFT             4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF11_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF11_SHIFT             6
+	u8 flags5;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF12_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF12_SHIFT             0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF13_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF13_SHIFT             2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF14_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF14_SHIFT             4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF15_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF15_SHIFT             6
+	u8 flags6;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF16_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF16_SHIFT             0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF17_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF17_SHIFT             2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF18_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF18_SHIFT             4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF19_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF19_SHIFT             6
+	u8 flags7;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF20_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF20_SHIFT             0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF21_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF21_SHIFT             2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_MASK         0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_SHIFT        4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT            6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT            7
+	u8 flags8;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT            0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT            1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_EN_MASK       0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_EN_SHIFT      2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_MASK    0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT   3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_EN_MASK   0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_EN_SHIFT  4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK    0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT   5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_SHIFT            6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_SHIFT            7
+	u8 flags9;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_SHIFT           0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF11EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF11EN_SHIFT           1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF12EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF12EN_SHIFT           2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF13EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF13EN_SHIFT           3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF14EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF14EN_SHIFT           4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF15EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF15EN_SHIFT           5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF16EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF16EN_SHIFT           6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF17EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF17EN_SHIFT           7
+	u8 flags10;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF18EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF18EN_SHIFT           0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF19EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF19EN_SHIFT           1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF20EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF20EN_SHIFT           2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF21EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF21EN_SHIFT           3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_EN_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_EN_SHIFT     4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF23EN_MASK            0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF23EN_SHIFT           5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT          6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT          7
+	u8 flags11;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT          0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT          1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT          2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT          3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_SHIFT          4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT          5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED1_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED1_SHIFT     6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE9EN_MASK           0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE9EN_SHIFT          7
+	u8 flags12;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE10EN_MASK          0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE10EN_SHIFT         0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_IRQ_PROD_RULE_EN_MASK  0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_IRQ_PROD_RULE_EN_SHIFT 1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED2_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED2_SHIFT     2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED3_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED3_SHIFT     3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE14EN_MASK          0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE14EN_SHIFT         4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE15EN_MASK          0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE15EN_SHIFT         5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE16EN_MASK          0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE16EN_SHIFT         6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE17EN_MASK          0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE17EN_SHIFT         7
+	u8 flags13;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE18EN_MASK          0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE18EN_SHIFT         0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE19EN_MASK          0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE19EN_SHIFT         1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED4_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED4_SHIFT     2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED5_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED5_SHIFT     3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED6_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED6_SHIFT     4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED7_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED7_SHIFT     5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED8_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED8_SHIFT     6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED9_MASK      0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED9_SHIFT     7
+	u8 flags14;
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT16_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT16_SHIFT            0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT17_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT17_SHIFT            1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT18_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT18_SHIFT            2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT19_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT19_SHIFT            3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT20_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT20_SHIFT            4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT21_MASK             0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT21_SHIFT            5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF23_MASK              0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF23_SHIFT             6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 word1;
+	__le16 irq_prod;
+	__le16 word3;
+	__le16 word4;
+	__le16 word5;
+	__le16 irq_cons;
+	u8 rxmit_opcode;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 rxmit_psn_and_id;
+	__le32 rxmit_bytes_length;
+	__le32 psn;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 msn_and_syndrome;
+};
+
+struct ystorm_roce_req_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK     0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT    0
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK     0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT    1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK      0x3
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT     2
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK      0x3
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT     4
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK      0x3
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK    0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT   0
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK    0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT   1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK    0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT   2
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le32 reg0;
+	__le32 reg1;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+
+struct ystorm_roce_resp_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK     0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT    0
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK     0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT    1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK      0x3
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT     2
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK      0x3
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT     4
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK      0x3
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK    0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT   0
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK    0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT   1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK    0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT   2
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le32 reg0;
+	__le32 reg1;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+
+struct ystorm_iscsi_conn_st_ctx {
+	__le32 reserved[4];
 };
 
-#define VF_MAX_STATIC 192       /* In case of K2 */
+struct pstorm_iscsi_tcp_conn_st_ctx {
+	__le32 tcp[32];
+	__le32 iscsi[4];
+};
+
+struct xstorm_iscsi_tcp_conn_st_ctx {
+	__le32 reserved_iscsi[40];
+	__le32 reserved_tcp[4];
+};
+
+struct xstorm_iscsi_conn_ag_ctx {
+	u8 cdu_validation;
+	u8 state;
+	u8 flags0;
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_SHIFT               0
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM1_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM1_SHIFT               1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED1_MASK                   0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED1_SHIFT                  2
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM3_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM3_SHIFT               3
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT4_MASK                        0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT4_SHIFT                       4
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED2_MASK                   0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED2_SHIFT                  5
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT6_MASK                        0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT6_SHIFT                       6
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT7_MASK                        0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT7_SHIFT                       7
+	u8 flags1;
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT8_MASK                        0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT8_SHIFT                       0
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT9_MASK                        0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT9_SHIFT                       1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT10_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT10_SHIFT                      2
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT11_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT11_SHIFT                      3
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT12_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT12_SHIFT                      4
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT13_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT13_SHIFT                      5
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT14_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT14_SHIFT                      6
+#define XSTORM_ISCSI_CONN_AG_CTX_TX_TRUNCATE_MASK                 0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_TX_TRUNCATE_SHIFT                7
+	u8 flags2;
+#define XSTORM_ISCSI_CONN_AG_CTX_CF0_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT                        0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF1_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT                        2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF2_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT                        4
+#define XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_MASK              0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT             6
+	u8 flags3;
+#define XSTORM_ISCSI_CONN_AG_CTX_CF4_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT                        0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF5_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT                        2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF6_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT                        4
+#define XSTORM_ISCSI_CONN_AG_CTX_CF7_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF7_SHIFT                        6
+	u8 flags4;
+#define XSTORM_ISCSI_CONN_AG_CTX_CF8_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF8_SHIFT                        0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF9_MASK                         0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF9_SHIFT                        2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF10_MASK                        0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF10_SHIFT                       4
+#define XSTORM_ISCSI_CONN_AG_CTX_CF11_MASK                        0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF11_SHIFT                       6
+	u8 flags5;
+#define XSTORM_ISCSI_CONN_AG_CTX_CF12_MASK                        0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF12_SHIFT                       0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF13_MASK                        0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF13_SHIFT                       2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF14_MASK                        0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF14_SHIFT                       4
+#define XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_MASK     0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_SHIFT    6
+	u8 flags6;
+#define XSTORM_ISCSI_CONN_AG_CTX_CF16_MASK                        0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF16_SHIFT                       0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF17_MASK                        0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF17_SHIFT                       2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF18_MASK                        0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF18_SHIFT                       4
+#define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_MASK                    0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_SHIFT                   6
+	u8 flags7;
+#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_MASK                    0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_SHIFT                   0
+#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q1_MASK                    0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q1_SHIFT                   2
+#define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_MASK                   0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_SHIFT                  4
+#define XSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT                      6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT                      7
+	u8 flags8;
+#define XSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT                      0
+#define XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK           0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT          1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT                      2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT                      3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT                      4
+#define XSTORM_ISCSI_CONN_AG_CTX_CF7EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF7EN_SHIFT                      5
+#define XSTORM_ISCSI_CONN_AG_CTX_CF8EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF8EN_SHIFT                      6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF9EN_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF9EN_SHIFT                      7
+	u8 flags9;
+#define XSTORM_ISCSI_CONN_AG_CTX_CF10EN_MASK                      0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF10EN_SHIFT                     0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF11EN_MASK                      0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF11EN_SHIFT                     1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF12EN_MASK                      0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF12EN_SHIFT                     2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF13EN_MASK                      0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF13EN_SHIFT                     3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF14EN_MASK                      0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF14EN_SHIFT                     4
+#define XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_EN_MASK  0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_EN_SHIFT 5
+#define XSTORM_ISCSI_CONN_AG_CTX_CF16EN_MASK                      0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF16EN_SHIFT                     6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF17EN_MASK                      0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF17EN_SHIFT                     7
+	u8 flags10;
+#define XSTORM_ISCSI_CONN_AG_CTX_CF18EN_MASK                      0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF18EN_SHIFT                     0
+#define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_EN_MASK                 0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_EN_SHIFT                1
+#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_MASK                 0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT                2
+#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q1_EN_MASK                 0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q1_EN_SHIFT                3
+#define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_EN_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_EN_SHIFT               4
+#define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_EN_MASK        0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_EN_SHIFT       5
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK                     0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT                    6
+#define XSTORM_ISCSI_CONN_AG_CTX_MORE_TO_SEND_DEC_RULE_EN_MASK    0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_MORE_TO_SEND_DEC_RULE_EN_SHIFT   7
+	u8 flags11;
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK                     0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT                    0
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK                     0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT                    1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED3_MASK                   0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED3_SHIFT                  2
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK                     0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT                    3
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK                     0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT                    4
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK                     0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT                    5
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED1_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED1_SHIFT               6
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE9EN_MASK                     0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE9EN_SHIFT                    7
+	u8 flags12;
+#define XSTORM_ISCSI_CONN_AG_CTX_SQ_DEC_RULE_EN_MASK              0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_SQ_DEC_RULE_EN_SHIFT             0
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE11EN_MASK                    0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE11EN_SHIFT                   1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED2_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED2_SHIFT               2
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED3_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED3_SHIFT               3
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE14EN_MASK                    0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE14EN_SHIFT                   4
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE15EN_MASK                    0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE15EN_SHIFT                   5
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE16EN_MASK                    0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE16EN_SHIFT                   6
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE17EN_MASK                    0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE17EN_SHIFT                   7
+	u8 flags13;
+#define XSTORM_ISCSI_CONN_AG_CTX_R2TQ_DEC_RULE_EN_MASK            0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_R2TQ_DEC_RULE_EN_SHIFT           0
+#define XSTORM_ISCSI_CONN_AG_CTX_HQ_DEC_RULE_EN_MASK              0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_HQ_DEC_RULE_EN_SHIFT             1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED4_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED4_SHIFT               2
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED5_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED5_SHIFT               3
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED6_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED6_SHIFT               4
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED7_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED7_SHIFT               5
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED8_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED8_SHIFT               6
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED9_MASK                0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED9_SHIFT               7
+	u8 flags14;
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT16_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT16_SHIFT                      0
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT17_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT17_SHIFT                      1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT18_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT18_SHIFT                      2
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT19_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT19_SHIFT                      3
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT20_MASK                       0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT20_SHIFT                      4
+#define XSTORM_ISCSI_CONN_AG_CTX_DUMMY_READ_DONE_MASK             0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_DUMMY_READ_DONE_SHIFT            5
+#define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_MASK           0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_SHIFT          6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 physical_q1;
+	__le16 dummy_dorq_var;
+	__le16 sq_cons;
+	__le16 sq_prod;
+	__le16 word5;
+	__le16 slow_io_total_data_tx_update;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 more_to_send_seq;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 hq_scan_next_relevant_ack;
+	__le16 r2tq_prod;
+	__le16 r2tq_cons;
+	__le16 hq_prod;
+	__le16 hq_cons;
+	__le32 remain_seq;
+	__le32 bytes_to_next_pdu;
+	__le32 hq_tcp_seq;
+	u8 byte7;
+	u8 byte8;
+	u8 byte9;
+	u8 byte10;
+	u8 byte11;
+	u8 byte12;
+	u8 byte13;
+	u8 byte14;
+	u8 byte15;
+	u8 byte16;
+	__le16 word11;
+	__le32 reg10;
+	__le32 reg11;
+	__le32 exp_stat_sn;
+	__le32 reg13;
+	__le32 reg14;
+	__le32 reg15;
+	__le32 reg16;
+	__le32 reg17;
+};
+
+struct tstorm_iscsi_conn_ag_ctx {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define TSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_MASK       0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_SHIFT      0
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK               0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT              1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT2_MASK               0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT2_SHIFT              2
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT3_MASK               0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT3_SHIFT              3
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT4_MASK               0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT4_SHIFT              4
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT5_MASK               0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT5_SHIFT              5
+#define TSTORM_ISCSI_CONN_AG_CTX_CF0_MASK                0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT               6
+	u8 flags1;
+#define TSTORM_ISCSI_CONN_AG_CTX_CF1_MASK                0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT               0
+#define TSTORM_ISCSI_CONN_AG_CTX_CF2_MASK                0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT               2
+#define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_MASK     0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT    4
+#define TSTORM_ISCSI_CONN_AG_CTX_CF4_MASK                0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT               6
+	u8 flags2;
+#define TSTORM_ISCSI_CONN_AG_CTX_CF5_MASK                0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT               0
+#define TSTORM_ISCSI_CONN_AG_CTX_CF6_MASK                0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT               2
+#define TSTORM_ISCSI_CONN_AG_CTX_CF7_MASK                0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF7_SHIFT               4
+#define TSTORM_ISCSI_CONN_AG_CTX_CF8_MASK                0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF8_SHIFT               6
+	u8 flags3;
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_MASK           0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_SHIFT          0
+#define TSTORM_ISCSI_CONN_AG_CTX_CF10_MASK               0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF10_SHIFT              2
+#define TSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK              0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT             4
+#define TSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK              0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT             5
+#define TSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK              0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT             6
+#define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK  0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT 7
+	u8 flags4;
+#define TSTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK              0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT             0
+#define TSTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK              0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT             1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK              0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT             2
+#define TSTORM_ISCSI_CONN_AG_CTX_CF7EN_MASK              0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF7EN_SHIFT             3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF8EN_MASK              0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF8EN_SHIFT             4
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_MASK        0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT       5
+#define TSTORM_ISCSI_CONN_AG_CTX_CF10EN_MASK             0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF10EN_SHIFT            6
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT           7
+	u8 flags5;
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT           0
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT           1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT           2
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT           3
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT           4
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT           5
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT           6
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE8EN_MASK            0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE8EN_SHIFT           7
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+};
+
+struct ustorm_iscsi_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_ISCSI_CONN_AG_CTX_BIT0_MASK     0x1
+#define USTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT    0
+#define USTORM_ISCSI_CONN_AG_CTX_BIT1_MASK     0x1
+#define USTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT    1
+#define USTORM_ISCSI_CONN_AG_CTX_CF0_MASK      0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT     2
+#define USTORM_ISCSI_CONN_AG_CTX_CF1_MASK      0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT     4
+#define USTORM_ISCSI_CONN_AG_CTX_CF2_MASK      0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define USTORM_ISCSI_CONN_AG_CTX_CF3_MASK      0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF3_SHIFT     0
+#define USTORM_ISCSI_CONN_AG_CTX_CF4_MASK      0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT     2
+#define USTORM_ISCSI_CONN_AG_CTX_CF5_MASK      0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT     4
+#define USTORM_ISCSI_CONN_AG_CTX_CF6_MASK      0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT     6
+	u8 flags2;
+#define USTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK    0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT   0
+#define USTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK    0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT   1
+#define USTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK    0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT   2
+#define USTORM_ISCSI_CONN_AG_CTX_CF3EN_MASK    0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF3EN_SHIFT   3
+#define USTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK    0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT   4
+#define USTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK    0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT   5
+#define USTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK    0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT   6
+#define USTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT 7
+	u8 flags3;
+#define USTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT 0
+#define USTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT 1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT 2
+#define USTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT 3
+#define USTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT 4
+#define USTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT 5
+#define USTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT 6
+#define USTORM_ISCSI_CONN_AG_CTX_RULE8EN_MASK  0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE8EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le16 word2;
+	__le16 word3;
+};
+
+struct tstorm_iscsi_conn_st_ctx {
+	__le32 reserved[40];
+};
 
-#define MCP_GLOB_PATH_MAX       2
-#define MCP_PORT_MAX            2       /* Global */
-#define MCP_GLOB_PORT_MAX       4       /* Global */
-#define MCP_GLOB_FUNC_MAX       16      /* Global */
+struct mstorm_iscsi_conn_ag_ctx {
+	u8 reserved;
+	u8 state;
+	u8 flags0;
+#define MSTORM_ISCSI_CONN_AG_CTX_BIT0_MASK     0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT    0
+#define MSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK     0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT    1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF0_MASK      0x3
+#define MSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT     2
+#define MSTORM_ISCSI_CONN_AG_CTX_CF1_MASK      0x3
+#define MSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT     4
+#define MSTORM_ISCSI_CONN_AG_CTX_CF2_MASK      0x3
+#define MSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define MSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK    0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT   0
+#define MSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK    0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT   1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK    0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT   2
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT 7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+struct mstorm_iscsi_tcp_conn_st_ctx {
+	__le32 reserved_tcp[20];
+	__le32 reserved_iscsi[8];
+};
+
+struct ustorm_iscsi_conn_st_ctx {
+	__le32 reserved[52];
+};
+
+struct iscsi_conn_context {
+	struct ystorm_iscsi_conn_st_ctx ystorm_st_context;
+	struct regpair ystorm_st_padding[2];
+	struct pstorm_iscsi_tcp_conn_st_ctx pstorm_st_context;
+	struct regpair pstorm_st_padding[2];
+	struct pb_context xpb2_context;
+	struct xstorm_iscsi_tcp_conn_st_ctx xstorm_st_context;
+	struct regpair xstorm_st_padding[2];
+	struct xstorm_iscsi_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_iscsi_conn_ag_ctx tstorm_ag_context;
+	struct regpair tstorm_ag_padding[2];
+	struct timers_context timer_context;
+	struct ustorm_iscsi_conn_ag_ctx ustorm_ag_context;
+	struct pb_context upb_context;
+	struct tstorm_iscsi_conn_st_ctx tstorm_st_context;
+	struct regpair tstorm_st_padding[2];
+	struct mstorm_iscsi_conn_ag_ctx mstorm_ag_context;
+	struct mstorm_iscsi_tcp_conn_st_ctx mstorm_st_context;
+	struct ustorm_iscsi_conn_st_ctx ustorm_st_context;
+};
+
+struct iscsi_init_ramrod_params {
+	struct iscsi_spe_func_init iscsi_init_spe;
+	struct tcp_init_params tcp_init;
+};
+
+struct ystorm_iscsi_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define YSTORM_ISCSI_CONN_AG_CTX_BIT0_MASK     0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT    0
+#define YSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK     0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT    1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF0_MASK      0x3
+#define YSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT     2
+#define YSTORM_ISCSI_CONN_AG_CTX_CF1_MASK      0x3
+#define YSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT     4
+#define YSTORM_ISCSI_CONN_AG_CTX_CF2_MASK      0x3
+#define YSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define YSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK    0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT   0
+#define YSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK    0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT   1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK    0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT   2
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le32 reg0;
+	__le32 reg1;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+#define VF_MAX_STATIC 192
+
+#define MCP_GLOB_PATH_MAX	2
+#define MCP_PORT_MAX		2
+#define MCP_GLOB_PORT_MAX	4
+#define MCP_GLOB_FUNC_MAX	16
 
-typedef u32 offsize_t;                  /* In DWORDS !!! */
 /* Offset from the beginning of the MCP scratchpad */
-#define OFFSIZE_OFFSET_SHIFT    0
-#define OFFSIZE_OFFSET_MASK     0x0000ffff
+#define OFFSIZE_OFFSET_SHIFT	0
+#define OFFSIZE_OFFSET_MASK	0x0000ffff
 /* Size of specific element (not the whole array if any) */
-#define OFFSIZE_SIZE_SHIFT      16
-#define OFFSIZE_SIZE_MASK       0xffff0000
+#define OFFSIZE_SIZE_SHIFT	16
+#define OFFSIZE_SIZE_MASK	0xffff0000
 
-/* SECTION_OFFSET is calculating the offset in bytes out of offsize */
-#define SECTION_OFFSET(_offsize)        ((((_offsize &		    \
-					    OFFSIZE_OFFSET_MASK) >> \
-					   OFFSIZE_OFFSET_SHIFT) << 2))
+#define SECTION_OFFSET(_offsize) ((((_offsize &			\
+				     OFFSIZE_OFFSET_MASK) >>	\
+				    OFFSIZE_OFFSET_SHIFT) << 2))
 
-/* QED_SECTION_SIZE is calculating the size in bytes out of offsize */
-#define QED_SECTION_SIZE(_offsize)              (((_offsize &		 \
-						   OFFSIZE_SIZE_MASK) >> \
-						  OFFSIZE_SIZE_SHIFT) << 2)
+#define QED_SECTION_SIZE(_offsize) (((_offsize &		\
+				      OFFSIZE_SIZE_MASK) >>	\
+				     OFFSIZE_SIZE_SHIFT) << 2)
 
-/* SECTION_ADDR returns the GRC addr of a section, given offsize and index
- * within section.
- */
-#define SECTION_ADDR(_offsize, idx)     (MCP_REG_SCRATCH +	    \
-					 SECTION_OFFSET(_offsize) + \
-					 (QED_SECTION_SIZE(_offsize) * idx))
+#define SECTION_ADDR(_offsize, idx) (MCP_REG_SCRATCH +			\
+				     SECTION_OFFSET(_offsize) +		\
+				     (QED_SECTION_SIZE(_offsize) * idx))
+
+#define SECTION_OFFSIZE_ADDR(_pub_base, _section)	\
+	(_pub_base + offsetof(struct mcp_public_data, sections[_section]))
 
-/* SECTION_OFFSIZE_ADDR returns the GRC addr to the offsize address.
- * Use offsetof, since the OFFSETUP collide with the firmware definition
- */
-#define SECTION_OFFSIZE_ADDR(_pub_base, _section) (_pub_base +		     \
-						   offsetof(struct	     \
-							    mcp_public_data, \
-							    sections[_section]))
 /* PHY configuration */
-struct pmm_phy_cfg {
-	u32	speed;
-#define PMM_SPEED_AUTONEG   0
-
-	u32	pause;  /* bitmask */
-#define PMM_PAUSE_NONE          0x0
-#define PMM_PAUSE_AUTONEG       0x1
-#define PMM_PAUSE_RX            0x2
-#define PMM_PAUSE_TX            0x4
-
-	u32	adv_speed;  /* Default should be the speed_cap_mask */
-	u32	loopback_mode;
-#define PMM_LOOPBACK_NONE               0
-#define PMM_LOOPBACK_INT_PHY    1
-#define PMM_LOOPBACK_EXT_PHY    2
-#define PMM_LOOPBACK_EXT                3
-#define PMM_LOOPBACK_MAC                4
-
-	/* features */
+struct eth_phy_cfg {
+	u32 speed;
+#define ETH_SPEED_AUTONEG	0
+#define ETH_SPEED_SMARTLINQ	0x8
+
+	u32 pause;
+#define ETH_PAUSE_NONE		0x0
+#define ETH_PAUSE_AUTONEG	0x1
+#define ETH_PAUSE_RX		0x2
+#define ETH_PAUSE_TX		0x4
+
+	u32 adv_speed;
+	u32 loopback_mode;
+#define ETH_LOOPBACK_NONE		(0)
+#define ETH_LOOPBACK_INT_PHY		(1)
+#define ETH_LOOPBACK_EXT_PHY		(2)
+#define ETH_LOOPBACK_EXT		(3)
+#define ETH_LOOPBACK_MAC		(4)
+
 	u32 feature_config_flags;
+#define ETH_EEE_MODE_ADV_LPI		(1 << 0)
 };
 
 struct port_mf_cfg {
-	u32	dynamic_cfg; /* device control channel */
-#define PORT_MF_CFG_OV_TAG_MASK              0x0000ffff
-#define PORT_MF_CFG_OV_TAG_SHIFT             0
-#define PORT_MF_CFG_OV_TAG_DEFAULT         PORT_MF_CFG_OV_TAG_MASK
-
-	u32	reserved[1];
-};
-
-/* DO NOT add new fields in the middle
- * MUST be synced with struct pmm_stats_map
- */
-struct pmm_stats {
-	u64	r64;    /* 0x00 (Offset 0x00 ) RX 64-byte frame counter*/
-	u64	r127;   /* 0x01 (Offset 0x08 ) RX 65 to 127 byte frame counter*/
-	u64	r255;
-	u64	r511;
-	u64	r1023;
-	u64	r1518;
-	u64	r1522;
-	u64	r2047;
-	u64	r4095;
-	u64	r9216;
-	u64	r16383;
-	u64	rfcs;   /* 0x0F (Offset 0x58 ) RX FCS error frame counter*/
-	u64	rxcf;   /* 0x10 (Offset 0x60 ) RX control frame counter*/
-	u64	rxpf;   /* 0x11 (Offset 0x68 ) RX pause frame counter*/
-	u64	rxpp;   /* 0x12 (Offset 0x70 ) RX PFC frame counter*/
-	u64	raln;   /* 0x16 (Offset 0x78 ) RX alignment error counter*/
-	u64	rfcr;   /* 0x19 (Offset 0x80 ) RX false carrier counter */
-	u64	rovr;   /* 0x1A (Offset 0x88 ) RX oversized frame counter*/
-	u64	rjbr;   /* 0x1B (Offset 0x90 ) RX jabber frame counter */
-	u64	rund;   /* 0x34 (Offset 0x98 ) RX undersized frame counter */
-	u64	rfrg;   /* 0x35 (Offset 0xa0 ) RX fragment counter */
-	u64	t64;    /* 0x40 (Offset 0xa8 ) TX 64-byte frame counter */
-	u64	t127;
-	u64	t255;
-	u64	t511;
-	u64	t1023;
-	u64	t1518;
-	u64	t2047;
-	u64	t4095;
-	u64	t9216;
-	u64	t16383;
-	u64	txpf;   /* 0x50 (Offset 0xf8 ) TX pause frame counter */
-	u64	txpp;   /* 0x51 (Offset 0x100) TX PFC frame counter */
-	u64	tlpiec;
-	u64	tncl;
-	u64	rbyte;  /* 0x3d (Offset 0x118) RX byte counter */
-	u64	rxuca;  /* 0x0c (Offset 0x120) RX UC frame counter */
-	u64	rxmca;  /* 0x0d (Offset 0x128) RX MC frame counter */
-	u64	rxbca;  /* 0x0e (Offset 0x130) RX BC frame counter */
-	u64	rxpok;
-	u64	tbyte;  /* 0x6f (Offset 0x140) TX byte counter */
-	u64	txuca;  /* 0x4d (Offset 0x148) TX UC frame counter */
-	u64	txmca;  /* 0x4e (Offset 0x150) TX MC frame counter */
-	u64	txbca;  /* 0x4f (Offset 0x158) TX BC frame counter */
-	u64	txcf;   /* 0x54 (Offset 0x160) TX control frame counter */
+	u32 dynamic_cfg;
+#define PORT_MF_CFG_OV_TAG_MASK		0x0000ffff
+#define PORT_MF_CFG_OV_TAG_SHIFT	0
+#define PORT_MF_CFG_OV_TAG_DEFAULT	PORT_MF_CFG_OV_TAG_MASK
+
+	u32 reserved[1];
+};
+
+struct eth_stats {
+	u64 r64;
+	u64 r127;
+	u64 r255;
+	u64 r511;
+	u64 r1023;
+	u64 r1518;
+	u64 r1522;
+	u64 r2047;
+	u64 r4095;
+	u64 r9216;
+	u64 r16383;
+	u64 rfcs;
+	u64 rxcf;
+	u64 rxpf;
+	u64 rxpp;
+	u64 raln;
+	u64 rfcr;
+	u64 rovr;
+	u64 rjbr;
+	u64 rund;
+	u64 rfrg;
+	u64 t64;
+	u64 t127;
+	u64 t255;
+	u64 t511;
+	u64 t1023;
+	u64 t1518;
+	u64 t2047;
+	u64 t4095;
+	u64 t9216;
+	u64 t16383;
+	u64 txpf;
+	u64 txpp;
+	u64 tlpiec;
+	u64 tncl;
+	u64 rbyte;
+	u64 rxuca;
+	u64 rxmca;
+	u64 rxbca;
+	u64 rxpok;
+	u64 tbyte;
+	u64 txuca;
+	u64 txmca;
+	u64 txbca;
+	u64 txcf;
 };
 
 struct brb_stats {
-	u64	brb_truncate[8];
-	u64	brb_discard[8];
+	u64 brb_truncate[8];
+	u64 brb_discard[8];
 };
 
 struct port_stats {
-	struct brb_stats	brb;
-	struct pmm_stats	pmm;
+	struct brb_stats brb;
+	struct eth_stats eth;
 };
 
-#define CMT_TEAM0 0
-#define CMT_TEAM1 1
-#define CMT_TEAM_MAX 2
-
 struct couple_mode_teaming {
 	u8 port_cmt[MCP_GLOB_PORT_MAX];
-#define PORT_CMT_IN_TEAM		BIT(0)
+#define PORT_CMT_IN_TEAM	(1 << 0)
 
-#define PORT_CMT_PORT_ROLE		BIT(1)
-#define PORT_CMT_PORT_INACTIVE      (0 << 1)
-#define PORT_CMT_PORT_ACTIVE		BIT(1)
+#define PORT_CMT_PORT_ROLE	(1 << 1)
+#define PORT_CMT_PORT_INACTIVE	(0 << 1)
+#define PORT_CMT_PORT_ACTIVE	(1 << 1)
 
-#define PORT_CMT_TEAM_MASK		BIT(2)
-#define PORT_CMT_TEAM0              (0 << 2)
-#define PORT_CMT_TEAM1			BIT(2)
+#define PORT_CMT_TEAM_MASK	(1 << 2)
+#define PORT_CMT_TEAM0		(0 << 2)
+#define PORT_CMT_TEAM1		(1 << 2)
 };
 
-/**************************************
-*     LLDP and DCBX HSI structures
-**************************************/
-#define LLDP_CHASSIS_ID_STAT_LEN 4
-#define LLDP_PORT_ID_STAT_LEN 4
-#define DCBX_MAX_APP_PROTOCOL           32
-#define MAX_SYSTEM_LLDP_TLV_DATA    32
+#define LLDP_CHASSIS_ID_STAT_LEN	4
+#define LLDP_PORT_ID_STAT_LEN		4
+#define DCBX_MAX_APP_PROTOCOL		32
+#define MAX_SYSTEM_LLDP_TLV_DATA	32
 
-enum lldp_agent_e {
+enum _lldp_agent {
 	LLDP_NEAREST_BRIDGE = 0,
 	LLDP_NEAREST_NON_TPMR_BRIDGE,
 	LLDP_NEAREST_CUSTOMER_BRIDGE,
@@ -3394,689 +6786,517 @@ enum lldp_agent_e {
 
 struct lldp_config_params_s {
 	u32 config;
-#define LLDP_CONFIG_TX_INTERVAL_MASK        0x000000ff
-#define LLDP_CONFIG_TX_INTERVAL_SHIFT       0
-#define LLDP_CONFIG_HOLD_MASK               0x00000f00
-#define LLDP_CONFIG_HOLD_SHIFT              8
-#define LLDP_CONFIG_MAX_CREDIT_MASK         0x0000f000
-#define LLDP_CONFIG_MAX_CREDIT_SHIFT        12
-#define LLDP_CONFIG_ENABLE_RX_MASK          0x40000000
-#define LLDP_CONFIG_ENABLE_RX_SHIFT         30
-#define LLDP_CONFIG_ENABLE_TX_MASK          0x80000000
-#define LLDP_CONFIG_ENABLE_TX_SHIFT         31
-	u32	local_chassis_id[LLDP_CHASSIS_ID_STAT_LEN];
-	u32	local_port_id[LLDP_PORT_ID_STAT_LEN];
+#define LLDP_CONFIG_TX_INTERVAL_MASK	0x000000ff
+#define LLDP_CONFIG_TX_INTERVAL_SHIFT	0
+#define LLDP_CONFIG_HOLD_MASK		0x00000f00
+#define LLDP_CONFIG_HOLD_SHIFT		8
+#define LLDP_CONFIG_MAX_CREDIT_MASK	0x0000f000
+#define LLDP_CONFIG_MAX_CREDIT_SHIFT	12
+#define LLDP_CONFIG_ENABLE_RX_MASK	0x40000000
+#define LLDP_CONFIG_ENABLE_RX_SHIFT	30
+#define LLDP_CONFIG_ENABLE_TX_MASK	0x80000000
+#define LLDP_CONFIG_ENABLE_TX_SHIFT	31
+	u32 local_chassis_id[LLDP_CHASSIS_ID_STAT_LEN];
+	u32 local_port_id[LLDP_PORT_ID_STAT_LEN];
 };
 
 struct lldp_status_params_s {
-	u32	prefix_seq_num;
-	u32	status; /* TBD */
-
-	/* Holds remote Chassis ID TLV header, subtype and 9B of payload. */
-	u32	peer_chassis_id[LLDP_CHASSIS_ID_STAT_LEN];
-
-	/* Holds remote Port ID TLV header, subtype and 9B of payload. */
-	u32	peer_port_id[LLDP_PORT_ID_STAT_LEN];
-	u32	suffix_seq_num;
+	u32 prefix_seq_num;
+	u32 status;
+	u32 peer_chassis_id[LLDP_CHASSIS_ID_STAT_LEN];
+	u32 peer_port_id[LLDP_PORT_ID_STAT_LEN];
+	u32 suffix_seq_num;
 };
 
 struct dcbx_ets_feature {
 	u32 flags;
-#define DCBX_ETS_ENABLED_MASK                   0x00000001
-#define DCBX_ETS_ENABLED_SHIFT                  0
-#define DCBX_ETS_WILLING_MASK                   0x00000002
-#define DCBX_ETS_WILLING_SHIFT                  1
-#define DCBX_ETS_ERROR_MASK                     0x00000004
-#define DCBX_ETS_ERROR_SHIFT                    2
-#define DCBX_ETS_CBS_MASK                       0x00000008
-#define DCBX_ETS_CBS_SHIFT                      3
-#define DCBX_ETS_MAX_TCS_MASK                   0x000000f0
-#define DCBX_ETS_MAX_TCS_SHIFT                  4
-	u32	pri_tc_tbl[1];
-#define DCBX_ISCSI_OOO_TC                       4
-#define NIG_ETS_ISCSI_OOO_CLIENT_OFFSET         (DCBX_ISCSI_OOO_TC + 1)
-	u32	tc_bw_tbl[2];
-	u32	tc_tsa_tbl[2];
-#define DCBX_ETS_TSA_STRICT                     0
-#define DCBX_ETS_TSA_CBS                        1
-#define DCBX_ETS_TSA_ETS                        2
+#define DCBX_ETS_ENABLED_MASK	0x00000001
+#define DCBX_ETS_ENABLED_SHIFT	0
+#define DCBX_ETS_WILLING_MASK	0x00000002
+#define DCBX_ETS_WILLING_SHIFT	1
+#define DCBX_ETS_ERROR_MASK	0x00000004
+#define DCBX_ETS_ERROR_SHIFT	2
+#define DCBX_ETS_CBS_MASK	0x00000008
+#define DCBX_ETS_CBS_SHIFT	3
+#define DCBX_ETS_MAX_TCS_MASK	0x000000f0
+#define DCBX_ETS_MAX_TCS_SHIFT	4
+#define DCBX_ISCSI_OOO_TC_MASK	0x00000f00
+#define DCBX_ISCSI_OOO_TC_SHIFT	8
+	u32 pri_tc_tbl[1];
+#define DCBX_ISCSI_OOO_TC	(4)
+
+#define NIG_ETS_ISCSI_OOO_CLIENT_OFFSET	(DCBX_ISCSI_OOO_TC + 1)
+#define DCBX_CEE_STRICT_PRIORITY	0xf
+	u32 tc_bw_tbl[2];
+	u32 tc_tsa_tbl[2];
+#define DCBX_ETS_TSA_STRICT	0
+#define DCBX_ETS_TSA_CBS	1
+#define DCBX_ETS_TSA_ETS	2
 };
 
 struct dcbx_app_priority_entry {
 	u32 entry;
-#define DCBX_APP_PRI_MAP_MASK       0x000000ff
-#define DCBX_APP_PRI_MAP_SHIFT      0
-#define DCBX_APP_PRI_0              0x01
-#define DCBX_APP_PRI_1              0x02
-#define DCBX_APP_PRI_2              0x04
-#define DCBX_APP_PRI_3              0x08
-#define DCBX_APP_PRI_4              0x10
-#define DCBX_APP_PRI_5              0x20
-#define DCBX_APP_PRI_6              0x40
-#define DCBX_APP_PRI_7              0x80
-#define DCBX_APP_SF_MASK            0x00000300
-#define DCBX_APP_SF_SHIFT           8
-#define DCBX_APP_SF_ETHTYPE         0
-#define DCBX_APP_SF_PORT            1
-#define DCBX_APP_PROTOCOL_ID_MASK   0xffff0000
-#define DCBX_APP_PROTOCOL_ID_SHIFT  16
-};
-
-/* FW structure in BE */
+#define DCBX_APP_PRI_MAP_MASK		0x000000ff
+#define DCBX_APP_PRI_MAP_SHIFT		0
+#define DCBX_APP_PRI_0			0x01
+#define DCBX_APP_PRI_1			0x02
+#define DCBX_APP_PRI_2			0x04
+#define DCBX_APP_PRI_3			0x08
+#define DCBX_APP_PRI_4			0x10
+#define DCBX_APP_PRI_5			0x20
+#define DCBX_APP_PRI_6			0x40
+#define DCBX_APP_PRI_7			0x80
+#define DCBX_APP_SF_MASK		0x00000300
+#define DCBX_APP_SF_SHIFT		8
+#define DCBX_APP_SF_ETHTYPE		0
+#define DCBX_APP_SF_PORT		1
+#define DCBX_APP_PROTOCOL_ID_MASK	0xffff0000
+#define DCBX_APP_PROTOCOL_ID_SHIFT	16
+};
+
 struct dcbx_app_priority_feature {
 	u32 flags;
-#define DCBX_APP_ENABLED_MASK           0x00000001
-#define DCBX_APP_ENABLED_SHIFT          0
-#define DCBX_APP_WILLING_MASK           0x00000002
-#define DCBX_APP_WILLING_SHIFT          1
-#define DCBX_APP_ERROR_MASK             0x00000004
-#define DCBX_APP_ERROR_SHIFT            2
-/* Not in use
- * #define DCBX_APP_DEFAULT_PRI_MASK       0x00000f00
- * #define DCBX_APP_DEFAULT_PRI_SHIFT      8
- */
-#define DCBX_APP_MAX_TCS_MASK           0x0000f000
-#define DCBX_APP_MAX_TCS_SHIFT          12
-#define DCBX_APP_NUM_ENTRIES_MASK       0x00ff0000
-#define DCBX_APP_NUM_ENTRIES_SHIFT      16
+#define DCBX_APP_ENABLED_MASK		0x00000001
+#define DCBX_APP_ENABLED_SHIFT		0
+#define DCBX_APP_WILLING_MASK		0x00000002
+#define DCBX_APP_WILLING_SHIFT		1
+#define DCBX_APP_ERROR_MASK		0x00000004
+#define DCBX_APP_ERROR_SHIFT		2
+#define DCBX_APP_MAX_TCS_MASK		0x0000f000
+#define DCBX_APP_MAX_TCS_SHIFT		12
+#define DCBX_APP_NUM_ENTRIES_MASK	0x00ff0000
+#define DCBX_APP_NUM_ENTRIES_SHIFT	16
 	struct dcbx_app_priority_entry app_pri_tbl[DCBX_MAX_APP_PROTOCOL];
 };
 
-/* FW structure in BE */
 struct dcbx_features {
-	/* PG feature */
 	struct dcbx_ets_feature ets;
+	u32 pfc;
+#define DCBX_PFC_PRI_EN_BITMAP_MASK	0x000000ff
+#define DCBX_PFC_PRI_EN_BITMAP_SHIFT	0
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_0	0x01
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_1	0x02
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_2	0x04
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_3	0x08
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_4	0x10
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_5	0x20
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_6	0x40
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_7	0x80
+
+#define DCBX_PFC_FLAGS_MASK		0x0000ff00
+#define DCBX_PFC_FLAGS_SHIFT		8
+#define DCBX_PFC_CAPS_MASK		0x00000f00
+#define DCBX_PFC_CAPS_SHIFT		8
+#define DCBX_PFC_MBC_MASK		0x00004000
+#define DCBX_PFC_MBC_SHIFT		14
+#define DCBX_PFC_WILLING_MASK		0x00008000
+#define DCBX_PFC_WILLING_SHIFT		15
+#define DCBX_PFC_ENABLED_MASK		0x00010000
+#define DCBX_PFC_ENABLED_SHIFT		16
+#define DCBX_PFC_ERROR_MASK		0x00020000
+#define DCBX_PFC_ERROR_SHIFT		17
 
-	/* PFC feature */
-	u32			pfc;
-#define DCBX_PFC_PRI_EN_BITMAP_MASK             0x000000ff
-#define DCBX_PFC_PRI_EN_BITMAP_SHIFT            0
-#define DCBX_PFC_PRI_EN_BITMAP_PRI_0            0x01
-#define DCBX_PFC_PRI_EN_BITMAP_PRI_1            0x02
-#define DCBX_PFC_PRI_EN_BITMAP_PRI_2            0x04
-#define DCBX_PFC_PRI_EN_BITMAP_PRI_3            0x08
-#define DCBX_PFC_PRI_EN_BITMAP_PRI_4            0x10
-#define DCBX_PFC_PRI_EN_BITMAP_PRI_5            0x20
-#define DCBX_PFC_PRI_EN_BITMAP_PRI_6            0x40
-#define DCBX_PFC_PRI_EN_BITMAP_PRI_7            0x80
-
-#define DCBX_PFC_FLAGS_MASK                     0x0000ff00
-#define DCBX_PFC_FLAGS_SHIFT                    8
-#define DCBX_PFC_CAPS_MASK                      0x00000f00
-#define DCBX_PFC_CAPS_SHIFT                     8
-#define DCBX_PFC_MBC_MASK                       0x00004000
-#define DCBX_PFC_MBC_SHIFT                      14
-#define DCBX_PFC_WILLING_MASK                   0x00008000
-#define DCBX_PFC_WILLING_SHIFT                  15
-#define DCBX_PFC_ENABLED_MASK                   0x00010000
-#define DCBX_PFC_ENABLED_SHIFT                  16
-#define DCBX_PFC_ERROR_MASK                     0x00020000
-#define DCBX_PFC_ERROR_SHIFT                    17
-
-	/* APP feature */
 	struct dcbx_app_priority_feature app;
 };
 
 struct dcbx_local_params {
 	u32 config;
-#define DCBX_CONFIG_VERSION_MASK            0x00000003
-#define DCBX_CONFIG_VERSION_SHIFT           0
-#define DCBX_CONFIG_VERSION_DISABLED        0
-#define DCBX_CONFIG_VERSION_IEEE            1
-#define DCBX_CONFIG_VERSION_CEE             2
+#define DCBX_CONFIG_VERSION_MASK	0x00000007
+#define DCBX_CONFIG_VERSION_SHIFT	0
+#define DCBX_CONFIG_VERSION_DISABLED	0
+#define DCBX_CONFIG_VERSION_IEEE	1
+#define DCBX_CONFIG_VERSION_CEE		2
+#define DCBX_CONFIG_VERSION_STATIC	4
 
-	u32			flags;
-	struct dcbx_features	features;
+	u32 flags;
+	struct dcbx_features features;
 };
 
 struct dcbx_mib {
-	u32	prefix_seq_num;
-	u32	flags;
-	struct dcbx_features	features;
-	u32			suffix_seq_num;
+	u32 prefix_seq_num;
+	u32 flags;
+	struct dcbx_features features;
+	u32 suffix_seq_num;
 };
 
 struct lldp_system_tlvs_buffer_s {
-	u16	valid;
-	u16	length;
-	u32	data[MAX_SYSTEM_LLDP_TLV_DATA];
+	u16 valid;
+	u16 length;
+	u32 data[MAX_SYSTEM_LLDP_TLV_DATA];
 };
 
-/**************************************/
-/*                                    */
-/*     P U B L I C      G L O B A L   */
-/*                                    */
-/**************************************/
-struct public_global {
-	u32				max_path;
-#define MAX_PATH_BIG_BEAR       2
-#define MAX_PATH_K2             1
-	u32				max_ports;
-#define MODE_1P 1
-#define MODE_2P 2
-#define MODE_3P 3
-#define MODE_4P 4
-	u32				debug_mb_offset;
-	u32				phymod_dbg_mb_offset;
-	struct couple_mode_teaming	cmt;
-	s32				internal_temperature;
-	u32				mfw_ver;
-	u32				running_bundle_id;
+struct dcb_dscp_map {
+	u32 flags;
+#define DCB_DSCP_ENABLE_MASK	0x1
+#define DCB_DSCP_ENABLE_SHIFT	0
+#define DCB_DSCP_ENABLE	1
+	u32 dscp_pri_map[8];
 };
 
-/**************************************/
-/*                                    */
-/*     P U B L I C      P A T H       */
-/*                                    */
-/**************************************/
+struct public_global {
+	u32 max_path;
+	u32 max_ports;
+	u32 debug_mb_offset;
+	u32 phymod_dbg_mb_offset;
+	struct couple_mode_teaming cmt;
+	s32 internal_temperature;
+	u32 mfw_ver;
+	u32 running_bundle_id;
+	s32 external_temperature;
+	u32 mdump_reason;
+};
 
-/****************************************************************************
-* Shared Memory 2 Region                                                   *
-****************************************************************************/
-/* The fw_flr_ack is actually built in the following way:                   */
-/* 8 bit:  PF ack                                                           */
-/* 128 bit: VF ack                                                           */
-/* 8 bit:  ios_dis_ack                                                      */
-/* In order to maintain endianity in the mailbox hsi, we want to keep using */
-/* u32. The fw must have the VF right after the PF since this is how it     */
-/* access arrays(it expects always the VF to reside after the PF, and that  */
-/* makes the calculation much easier for it. )                              */
-/* In order to answer both limitations, and keep the struct small, the code */
-/* will abuse the structure defined here to achieve the actual partition    */
-/* above                                                                    */
-/****************************************************************************/
 struct fw_flr_mb {
-	u32	aggint;
-	u32	opgen_addr;
-	u32	accum_ack;  /* 0..15:PF, 16..207:VF, 256..271:IOV_DIS */
-#define ACCUM_ACK_PF_BASE       0
-#define ACCUM_ACK_PF_SHIFT      0
-
-#define ACCUM_ACK_VF_BASE       8
-#define ACCUM_ACK_VF_SHIFT      3
-
-#define ACCUM_ACK_IOV_DIS_BASE  256
-#define ACCUM_ACK_IOV_DIS_SHIFT 8
+	u32 aggint;
+	u32 opgen_addr;
+	u32 accum_ack;
 };
 
 struct public_path {
-	struct fw_flr_mb	flr_mb;
-	u32			mcp_vf_disabled[VF_MAX_STATIC / 32];
-
-	u32			process_kill;
-#define PROCESS_KILL_COUNTER_MASK               0x0000ffff
-#define PROCESS_KILL_COUNTER_SHIFT              0
-#define PROCESS_KILL_GLOB_AEU_BIT_MASK          0xffff0000
-#define PROCESS_KILL_GLOB_AEU_BIT_SHIFT         16
+	struct fw_flr_mb flr_mb;
+	u32 mcp_vf_disabled[VF_MAX_STATIC / 32];
+
+	u32 process_kill;
+#define PROCESS_KILL_COUNTER_MASK	0x0000ffff
+#define PROCESS_KILL_COUNTER_SHIFT	0
+#define PROCESS_KILL_GLOB_AEU_BIT_MASK	0xffff0000
+#define PROCESS_KILL_GLOB_AEU_BIT_SHIFT	16
 #define GLOBAL_AEU_BIT(aeu_reg_id, aeu_bit) (aeu_reg_id * 32 + aeu_bit)
 };
 
-/**************************************/
-/*                                    */
-/*     P U B L I C      P O R T       */
-/*                                    */
-/**************************************/
-
-/****************************************************************************
-* Driver <-> FW Mailbox                                                    *
-****************************************************************************/
-
 struct public_port {
-	u32 validity_map;   /* 0x0 (4*2 = 0x8) */
-
-	/* validity bits */
-#define MCP_VALIDITY_PCI_CFG                    0x00100000
-#define MCP_VALIDITY_MB                         0x00200000
-#define MCP_VALIDITY_DEV_INFO                   0x00400000
-#define MCP_VALIDITY_RESERVED                   0x00000007
-
-	/* One licensing bit should be set */
-#define MCP_VALIDITY_LIC_KEY_IN_EFFECT_MASK     0x00000038
-#define MCP_VALIDITY_LIC_MANUF_KEY_IN_EFFECT    0x00000008
-#define MCP_VALIDITY_LIC_UPGRADE_KEY_IN_EFFECT  0x00000010
-#define MCP_VALIDITY_LIC_NO_KEY_IN_EFFECT       0x00000020
-
-	/* Active MFW */
-#define MCP_VALIDITY_ACTIVE_MFW_UNKNOWN         0x00000000
-#define MCP_VALIDITY_ACTIVE_MFW_MASK            0x000001c0
-#define MCP_VALIDITY_ACTIVE_MFW_NCSI            0x00000040
-#define MCP_VALIDITY_ACTIVE_MFW_NONE            0x000001c0
+	u32 validity_map;
 
 	u32 link_status;
-#define LINK_STATUS_LINK_UP \
-	0x00000001
-#define LINK_STATUS_SPEED_AND_DUPLEX_MASK                       0x0000001e
-#define LINK_STATUS_SPEED_AND_DUPLEX_1000THD		BIT(1)
-#define LINK_STATUS_SPEED_AND_DUPLEX_1000TFD            (2 << 1)
-#define LINK_STATUS_SPEED_AND_DUPLEX_10G                        (3 << 1)
-#define LINK_STATUS_SPEED_AND_DUPLEX_20G                        (4 << 1)
-#define LINK_STATUS_SPEED_AND_DUPLEX_40G                        (5 << 1)
-#define LINK_STATUS_SPEED_AND_DUPLEX_50G                        (6 << 1)
-#define LINK_STATUS_SPEED_AND_DUPLEX_100G                       (7 << 1)
-#define LINK_STATUS_SPEED_AND_DUPLEX_25G                        (8 << 1)
-
-#define LINK_STATUS_AUTO_NEGOTIATE_ENABLED                      0x00000020
-
-#define LINK_STATUS_AUTO_NEGOTIATE_COMPLETE                     0x00000040
-#define LINK_STATUS_PARALLEL_DETECTION_USED                     0x00000080
-
-#define LINK_STATUS_PFC_ENABLED	\
-	0x00000100
-#define LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE        0x00000200
-#define LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE        0x00000400
-#define LINK_STATUS_LINK_PARTNER_10G_CAPABLE            0x00000800
-#define LINK_STATUS_LINK_PARTNER_20G_CAPABLE            0x00001000
-#define LINK_STATUS_LINK_PARTNER_40G_CAPABLE            0x00002000
-#define LINK_STATUS_LINK_PARTNER_50G_CAPABLE            0x00004000
-#define LINK_STATUS_LINK_PARTNER_100G_CAPABLE           0x00008000
-#define LINK_STATUS_LINK_PARTNER_25G_CAPABLE            0x00010000
-
-#define LINK_STATUS_LINK_PARTNER_FLOW_CONTROL_MASK      0x000C0000
-#define LINK_STATUS_LINK_PARTNER_NOT_PAUSE_CAPABLE      (0 << 18)
-#define LINK_STATUS_LINK_PARTNER_SYMMETRIC_PAUSE	BIT(18)
-#define LINK_STATUS_LINK_PARTNER_ASYMMETRIC_PAUSE       (2 << 18)
-#define LINK_STATUS_LINK_PARTNER_BOTH_PAUSE                     (3 << 18)
-
-#define LINK_STATUS_SFP_TX_FAULT \
-	0x00100000
-#define LINK_STATUS_TX_FLOW_CONTROL_ENABLED                     0x00200000
-#define LINK_STATUS_RX_FLOW_CONTROL_ENABLED                     0x00400000
-
-	u32			link_status1;
-	u32			ext_phy_fw_version;
-	u32			drv_phy_cfg_addr;
-
-	u32			port_stx;
-
-	u32			stat_nig_timer;
-
-	struct port_mf_cfg	port_mf_config;
-	struct port_stats	stats;
-
-	u32			media_type;
-#define MEDIA_UNSPECIFIED       0x0
-#define MEDIA_SFPP_10G_FIBER    0x1
-#define MEDIA_XFP_FIBER         0x2
-#define MEDIA_DA_TWINAX         0x3
-#define MEDIA_BASE_T            0x4
-#define MEDIA_SFP_1G_FIBER      0x5
-#define MEDIA_KR                0xf0
-#define MEDIA_NOT_PRESENT       0xff
+#define LINK_STATUS_LINK_UP			0x00000001
+#define LINK_STATUS_SPEED_AND_DUPLEX_MASK	0x0000001e
+#define LINK_STATUS_SPEED_AND_DUPLEX_1000THD	(1 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_1000TFD	(2 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_10G	(3 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_20G	(4 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_40G	(5 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_50G	(6 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_100G	(7 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_25G	(8 << 1)
+
+#define LINK_STATUS_AUTO_NEGOTIATE_ENABLED	0x00000020
+
+#define LINK_STATUS_AUTO_NEGOTIATE_COMPLETE	0x00000040
+#define LINK_STATUS_PARALLEL_DETECTION_USED	0x00000080
+
+#define LINK_STATUS_PFC_ENABLED				0x00000100
+#define LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE 0x00000200
+#define LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE 0x00000400
+#define LINK_STATUS_LINK_PARTNER_10G_CAPABLE		0x00000800
+#define LINK_STATUS_LINK_PARTNER_20G_CAPABLE		0x00001000
+#define LINK_STATUS_LINK_PARTNER_40G_CAPABLE		0x00002000
+#define LINK_STATUS_LINK_PARTNER_50G_CAPABLE		0x00004000
+#define LINK_STATUS_LINK_PARTNER_100G_CAPABLE		0x00008000
+#define LINK_STATUS_LINK_PARTNER_25G_CAPABLE		0x00010000
+
+#define LINK_STATUS_LINK_PARTNER_FLOW_CONTROL_MASK	0x000C0000
+#define LINK_STATUS_LINK_PARTNER_NOT_PAUSE_CAPABLE	(0 << 18)
+#define LINK_STATUS_LINK_PARTNER_SYMMETRIC_PAUSE	(1 << 18)
+#define LINK_STATUS_LINK_PARTNER_ASYMMETRIC_PAUSE	(2 << 18)
+#define LINK_STATUS_LINK_PARTNER_BOTH_PAUSE		(3 << 18)
+
+#define LINK_STATUS_SFP_TX_FAULT			0x00100000
+#define LINK_STATUS_TX_FLOW_CONTROL_ENABLED		0x00200000
+#define LINK_STATUS_RX_FLOW_CONTROL_ENABLED		0x00400000
+#define LINK_STATUS_RX_SIGNAL_PRESENT			0x00800000
+#define LINK_STATUS_MAC_LOCAL_FAULT			0x01000000
+#define LINK_STATUS_MAC_REMOTE_FAULT			0x02000000
+#define LINK_STATUS_UNSUPPORTED_SPD_REQ			0x04000000
+
+	u32 link_status1;
+	u32 ext_phy_fw_version;
+	u32 drv_phy_cfg_addr;
+
+	u32 port_stx;
+
+	u32 stat_nig_timer;
+
+	struct port_mf_cfg port_mf_config;
+	struct port_stats stats;
+
+	u32 media_type;
+#define MEDIA_UNSPECIFIED	0x0
+#define MEDIA_SFPP_10G_FIBER	0x1
+#define MEDIA_XFP_FIBER		0x2
+#define MEDIA_DA_TWINAX		0x3
+#define MEDIA_BASE_T		0x4
+#define MEDIA_SFP_1G_FIBER	0x5
+#define MEDIA_MODULE_FIBER	0x6
+#define MEDIA_KR		0xf0
+#define MEDIA_NOT_PRESENT	0xff
 
 	u32 lfa_status;
-#define LFA_LINK_FLAP_REASON_OFFSET             0
-#define LFA_LINK_FLAP_REASON_MASK               0x000000ff
-#define LFA_NO_REASON                                   (0 << 0)
-#define LFA_LINK_DOWN					BIT(0)
-#define LFA_FORCE_INIT                                  BIT(1)
-#define LFA_LOOPBACK_MISMATCH                           BIT(2)
-#define LFA_SPEED_MISMATCH                              BIT(3)
-#define LFA_FLOW_CTRL_MISMATCH                          BIT(4)
-#define LFA_ADV_SPEED_MISMATCH                          BIT(5)
-#define LINK_FLAP_AVOIDANCE_COUNT_OFFSET        8
-#define LINK_FLAP_AVOIDANCE_COUNT_MASK          0x0000ff00
-#define LINK_FLAP_COUNT_OFFSET                  16
-#define LINK_FLAP_COUNT_MASK                    0x00ff0000
-
-	u32					link_change_count;
-
-	/* LLDP params */
-	struct lldp_config_params_s		lldp_config_params[
-		LLDP_MAX_LLDP_AGENTS];
-	struct lldp_status_params_s		lldp_status_params[
-		LLDP_MAX_LLDP_AGENTS];
-	struct lldp_system_tlvs_buffer_s	system_lldp_tlvs_buf;
+	u32 link_change_count;
+
+	struct lldp_config_params_s lldp_config_params[LLDP_MAX_LLDP_AGENTS];
+	struct lldp_status_params_s lldp_status_params[LLDP_MAX_LLDP_AGENTS];
+	struct lldp_system_tlvs_buffer_s system_lldp_tlvs_buf;
 
 	/* DCBX related MIB */
-	struct dcbx_local_params		local_admin_dcbx_mib;
-	struct dcbx_mib				remote_dcbx_mib;
-	struct dcbx_mib				operational_dcbx_mib;
+	struct dcbx_local_params local_admin_dcbx_mib;
+	struct dcbx_mib remote_dcbx_mib;
+	struct dcbx_mib operational_dcbx_mib;
 
-	u32					fc_npiv_nvram_tbl_addr;
-	u32					fc_npiv_nvram_tbl_size;
-	u32					transceiver_data;
-#define PMM_TRANSCEIVER_STATE_MASK		0x000000FF
-#define PMM_TRANSCEIVER_STATE_SHIFT		0x00000000
-#define PMM_TRANSCEIVER_STATE_PRESENT		0x00000001
-};
+	u32 reserved[2];
+	u32 transceiver_data;
+#define ETH_TRANSCEIVER_STATE_MASK	0x000000FF
+#define ETH_TRANSCEIVER_STATE_SHIFT	0x00000000
+#define ETH_TRANSCEIVER_STATE_UNPLUGGED	0x00000000
+#define ETH_TRANSCEIVER_STATE_PRESENT	0x00000001
+#define ETH_TRANSCEIVER_STATE_VALID	0x00000003
+#define ETH_TRANSCEIVER_STATE_UPDATING	0x00000008
 
-/**************************************/
-/*                                    */
-/*     P U B L I C      F U N C       */
-/*                                    */
-/**************************************/
+	u32 wol_info;
+	u32 wol_pkt_len;
+	u32 wol_pkt_details;
+	struct dcb_dscp_map dcb_dscp_map;
+};
 
 struct public_func {
-	u32	iscsi_boot_signature;
-	u32	iscsi_boot_block_offset;
-
-	u32	mtu_size;
-	u32	c2s_pcp_map_lower;
-	u32	c2s_pcp_map_upper;
-	u32	c2s_pcp_map_default;
-	u32	reserved[4];
-
-	u32	config;
-
-	/* E/R/I/D */
-	/* function 0 of each port cannot be hidden */
-#define FUNC_MF_CFG_FUNC_HIDE                   0x00000001
-#define FUNC_MF_CFG_PAUSE_ON_HOST_RING          0x00000002
-#define FUNC_MF_CFG_PAUSE_ON_HOST_RING_SHIFT    0x00000001
-
-#define FUNC_MF_CFG_PROTOCOL_MASK               0x000000f0
-#define FUNC_MF_CFG_PROTOCOL_SHIFT              4
-#define FUNC_MF_CFG_PROTOCOL_ETHERNET           0x00000000
+	u32 reserved0[2];
+
+	u32 mtu_size;
+
+	u32 reserved[7];
+
+	u32 config;
+#define FUNC_MF_CFG_FUNC_HIDE			0x00000001
+#define FUNC_MF_CFG_PAUSE_ON_HOST_RING		0x00000002
+#define FUNC_MF_CFG_PAUSE_ON_HOST_RING_SHIFT	0x00000001
+
+#define FUNC_MF_CFG_PROTOCOL_MASK	0x000000f0
+#define FUNC_MF_CFG_PROTOCOL_SHIFT	4
+#define FUNC_MF_CFG_PROTOCOL_ETHERNET	0x00000000
 #define FUNC_MF_CFG_PROTOCOL_ISCSI              0x00000010
-#define FUNC_MF_CFG_PROTOCOL_FCOE               0x00000020
 #define FUNC_MF_CFG_PROTOCOL_ROCE               0x00000030
-#define FUNC_MF_CFG_PROTOCOL_MAX                0x00000030
+#define FUNC_MF_CFG_PROTOCOL_MAX	0x00000030
 
-	/* MINBW, MAXBW */
-	/* value range - 0..100, increments in 1 %  */
-#define FUNC_MF_CFG_MIN_BW_MASK                 0x0000ff00
-#define FUNC_MF_CFG_MIN_BW_SHIFT                8
-#define FUNC_MF_CFG_MIN_BW_DEFAULT              0x00000000
-#define FUNC_MF_CFG_MAX_BW_MASK                 0x00ff0000
-#define FUNC_MF_CFG_MAX_BW_SHIFT                16
-#define FUNC_MF_CFG_MAX_BW_DEFAULT              0x00640000
+#define FUNC_MF_CFG_MIN_BW_MASK		0x0000ff00
+#define FUNC_MF_CFG_MIN_BW_SHIFT	8
+#define FUNC_MF_CFG_MIN_BW_DEFAULT	0x00000000
+#define FUNC_MF_CFG_MAX_BW_MASK		0x00ff0000
+#define FUNC_MF_CFG_MAX_BW_SHIFT	16
+#define FUNC_MF_CFG_MAX_BW_DEFAULT	0x00640000
 
-	u32	status;
-#define FUNC_STATUS_VLINK_DOWN                  0x00000001
+	u32 status;
+#define FUNC_STATUS_VLINK_DOWN		0x00000001
 
-	u32	mac_upper;  /* MAC */
-#define FUNC_MF_CFG_UPPERMAC_MASK               0x0000ffff
-#define FUNC_MF_CFG_UPPERMAC_SHIFT              0
-#define FUNC_MF_CFG_UPPERMAC_DEFAULT            FUNC_MF_CFG_UPPERMAC_MASK
-	u32	mac_lower;
-#define FUNC_MF_CFG_LOWERMAC_DEFAULT            0xffffffff
+	u32 mac_upper;
+#define FUNC_MF_CFG_UPPERMAC_MASK	0x0000ffff
+#define FUNC_MF_CFG_UPPERMAC_SHIFT	0
+#define FUNC_MF_CFG_UPPERMAC_DEFAULT	FUNC_MF_CFG_UPPERMAC_MASK
+	u32 mac_lower;
+#define FUNC_MF_CFG_LOWERMAC_DEFAULT	0xffffffff
 
-	u32	fcoe_wwn_port_name_upper;
-	u32	fcoe_wwn_port_name_lower;
+	u32 fcoe_wwn_port_name_upper;
+	u32 fcoe_wwn_port_name_lower;
 
-	u32	fcoe_wwn_node_name_upper;
-	u32	fcoe_wwn_node_name_lower;
+	u32 fcoe_wwn_node_name_upper;
+	u32 fcoe_wwn_node_name_lower;
 
-	u32	ovlan_stag; /* tags */
-#define FUNC_MF_CFG_OV_STAG_MASK              0x0000ffff
-#define FUNC_MF_CFG_OV_STAG_SHIFT             0
-#define FUNC_MF_CFG_OV_STAG_DEFAULT           FUNC_MF_CFG_OV_STAG_MASK
+	u32 ovlan_stag;
+#define FUNC_MF_CFG_OV_STAG_MASK	0x0000ffff
+#define FUNC_MF_CFG_OV_STAG_SHIFT	0
+#define FUNC_MF_CFG_OV_STAG_DEFAULT	FUNC_MF_CFG_OV_STAG_MASK
 
-	u32	pf_allocation;  /* vf per pf */
+	u32 pf_allocation;
 
-	u32	preserve_data;  /* Will be used bt CCM */
+	u32 preserve_data;
 
-	u32	driver_last_activity_ts;
+	u32 driver_last_activity_ts;
 
-	u32	drv_ack_vf_disabled[VF_MAX_STATIC / 32]; /* 0x0044 */
+	u32 drv_ack_vf_disabled[VF_MAX_STATIC / 32];
 
-	u32	drv_id;
-#define DRV_ID_PDA_COMP_VER_MASK        0x0000ffff
-#define DRV_ID_PDA_COMP_VER_SHIFT       0
+	u32 drv_id;
+#define DRV_ID_PDA_COMP_VER_MASK	0x0000ffff
+#define DRV_ID_PDA_COMP_VER_SHIFT	0
 
-#define DRV_ID_MCP_HSI_VER_MASK         0x00ff0000
-#define DRV_ID_MCP_HSI_VER_SHIFT        16
-#define DRV_ID_MCP_HSI_VER_CURRENT	BIT(DRV_ID_MCP_HSI_VER_SHIFT)
+#define DRV_ID_MCP_HSI_VER_MASK		0x00ff0000
+#define DRV_ID_MCP_HSI_VER_SHIFT	16
+#define DRV_ID_MCP_HSI_VER_CURRENT	(1 << DRV_ID_MCP_HSI_VER_SHIFT)
 
-#define DRV_ID_DRV_TYPE_MASK            0x7f000000
-#define DRV_ID_DRV_TYPE_SHIFT           24
-#define DRV_ID_DRV_TYPE_UNKNOWN         (0 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_LINUX           (1 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_WINDOWS         (2 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_DIAG            (3 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_PREBOOT         (4 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_SOLARIS         (5 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_VMWARE          (6 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_FREEBSD         (7 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_AIX             (8 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_MASK		0x7f000000
+#define DRV_ID_DRV_TYPE_SHIFT		24
+#define DRV_ID_DRV_TYPE_UNKNOWN		(0 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_LINUX		(1 << DRV_ID_DRV_TYPE_SHIFT)
 
-#define DRV_ID_DRV_INIT_HW_MASK         0x80000000
-#define DRV_ID_DRV_INIT_HW_SHIFT        31
-#define DRV_ID_DRV_INIT_HW_FLAG         BIT(DRV_ID_DRV_INIT_HW_SHIFT)
+#define DRV_ID_DRV_INIT_HW_MASK		0x80000000
+#define DRV_ID_DRV_INIT_HW_SHIFT	31
+#define DRV_ID_DRV_INIT_HW_FLAG		(1 << DRV_ID_DRV_INIT_HW_SHIFT)
 };
 
-/**************************************/
-/*                                    */
-/*     P U B L I C       M B          */
-/*                                    */
-/**************************************/
-/* This is the only section that the driver can write to, and each */
-/* Basically each driver request to set feature parameters,
- * will be done using a different command, which will be linked
- * to a specific data structure from the union below.
- * For huge strucuture, the common blank structure should be used.
- */
-
 struct mcp_mac {
-	u32	mac_upper;  /* Upper 16 bits are always zeroes */
-	u32	mac_lower;
+	u32 mac_upper;
+	u32 mac_lower;
 };
 
 struct mcp_val64 {
-	u32	lo;
-	u32	hi;
+	u32 lo;
+	u32 hi;
 };
 
 struct mcp_file_att {
-	u32	nvm_start_addr;
-	u32	len;
+	u32 nvm_start_addr;
+	u32 len;
+};
+
+struct bist_nvm_image_att {
+	u32 return_code;
+	u32 image_type;
+	u32 nvm_start_addr;
+	u32 len;
 };
 
 #define MCP_DRV_VER_STR_SIZE 16
 #define MCP_DRV_VER_STR_SIZE_DWORD (MCP_DRV_VER_STR_SIZE / sizeof(u32))
 #define MCP_DRV_NVM_BUF_LEN 32
 struct drv_version_stc {
-	u32	version;
-	u8	name[MCP_DRV_VER_STR_SIZE - 4];
+	u32 version;
+	u8 name[MCP_DRV_VER_STR_SIZE - 4];
+};
+
+struct lan_stats_stc {
+	u64 ucast_rx_pkts;
+	u64 ucast_tx_pkts;
+	u32 fcs_err;
+	u32 rserved;
+};
+
+struct ocbb_data_stc {
+	u32 ocbb_host_addr;
+	u32 ocsd_host_addr;
+	u32 ocsd_req_update_interval;
+};
+
+#define MAX_NUM_OF_SENSORS 7
+struct temperature_status_stc {
+	u32 num_of_sensors;
+	u32 sensor[MAX_NUM_OF_SENSORS];
+};
+
+/* crash dump configuration header */
+struct mdump_config_stc {
+	u32 version;
+	u32 config;
+	u32 epoc;
+	u32 num_of_logs;
+	u32 valid_logs;
 };
 
 union drv_union_data {
-	u32			ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
-	struct mcp_mac		wol_mac;
+	u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
+	struct mcp_mac wol_mac;
 
-	struct pmm_phy_cfg	drv_phy_cfg;
+	struct eth_phy_cfg drv_phy_cfg;
 
-	struct mcp_val64	val64; /* For PHY / AVS commands */
+	struct mcp_val64 val64;
 
-	u8			raw_data[MCP_DRV_NVM_BUF_LEN];
+	u8 raw_data[MCP_DRV_NVM_BUF_LEN];
 
-	struct mcp_file_att	file_att;
+	struct mcp_file_att file_att;
 
-	u32			ack_vf_disabled[VF_MAX_STATIC / 32];
+	u32 ack_vf_disabled[VF_MAX_STATIC / 32];
 
-	struct drv_version_stc	drv_version;
+	struct drv_version_stc drv_version;
+
+	struct lan_stats_stc lan_stats;
+	u64 reserved_stats[11];
+	struct ocbb_data_stc ocbb_info;
+	struct temperature_status_stc temp_info;
+	struct bist_nvm_image_att nvm_image_att;
+	struct mdump_config_stc mdump_config;
 };
 
 struct public_drv_mb {
 	u32 drv_mb_header;
-#define DRV_MSG_CODE_MASK                       0xffff0000
-#define DRV_MSG_CODE_LOAD_REQ                   0x10000000
-#define DRV_MSG_CODE_LOAD_DONE                  0x11000000
-#define DRV_MSG_CODE_INIT_HW                    0x12000000
-#define DRV_MSG_CODE_UNLOAD_REQ                 0x20000000
-#define DRV_MSG_CODE_UNLOAD_DONE                0x21000000
-#define DRV_MSG_CODE_INIT_PHY                   0x22000000
-	/* Params - FORCE - Reinitialize the link regardless of LFA */
-	/*        - DONT_CARE - Don't flap the link if up */
-#define DRV_MSG_CODE_LINK_RESET                 0x23000000
-
-#define DRV_MSG_CODE_SET_LLDP                   0x24000000
-#define DRV_MSG_CODE_SET_DCBX                   0x25000000
+#define DRV_MSG_CODE_MASK			0xffff0000
+#define DRV_MSG_CODE_LOAD_REQ			0x10000000
+#define DRV_MSG_CODE_LOAD_DONE			0x11000000
+#define DRV_MSG_CODE_INIT_HW			0x12000000
+#define DRV_MSG_CODE_UNLOAD_REQ			0x20000000
+#define DRV_MSG_CODE_UNLOAD_DONE		0x21000000
+#define DRV_MSG_CODE_INIT_PHY			0x22000000
+#define DRV_MSG_CODE_LINK_RESET			0x23000000
+#define DRV_MSG_CODE_SET_DCBX			0x25000000
+
 #define DRV_MSG_CODE_BW_UPDATE_ACK		0x32000000
-#define DRV_MSG_CODE_NIG_DRAIN                  0x30000000
-
-#define DRV_MSG_CODE_INITIATE_FLR               0x02000000
-#define DRV_MSG_CODE_VF_DISABLED_DONE           0xc0000000
-#define DRV_MSG_CODE_CFG_VF_MSIX                0xc0010000
-#define DRV_MSG_CODE_NVM_PUT_FILE_BEGIN         0x00010000
-#define DRV_MSG_CODE_NVM_PUT_FILE_DATA          0x00020000
-#define DRV_MSG_CODE_NVM_GET_FILE_ATT           0x00030000
-#define DRV_MSG_CODE_NVM_READ_NVRAM             0x00050000
-#define DRV_MSG_CODE_NVM_WRITE_NVRAM            0x00060000
-#define DRV_MSG_CODE_NVM_DEL_FILE               0x00080000
-#define DRV_MSG_CODE_MCP_RESET                  0x00090000
-#define DRV_MSG_CODE_SET_SECURE_MODE            0x000a0000
-#define DRV_MSG_CODE_PHY_RAW_READ               0x000b0000
-#define DRV_MSG_CODE_PHY_RAW_WRITE              0x000c0000
-#define DRV_MSG_CODE_PHY_CORE_READ              0x000d0000
-#define DRV_MSG_CODE_PHY_CORE_WRITE             0x000e0000
-#define DRV_MSG_CODE_SET_VERSION                0x000f0000
-
-#define DRV_MSG_CODE_BIST_TEST                  0x001e0000
-#define DRV_MSG_CODE_SET_LED_MODE               0x00200000
-
-#define DRV_MSG_SEQ_NUMBER_MASK                 0x0000ffff
+#define DRV_MSG_CODE_NIG_DRAIN			0x30000000
+#define DRV_MSG_CODE_VF_DISABLED_DONE		0xc0000000
+#define DRV_MSG_CODE_CFG_VF_MSIX		0xc0010000
+#define DRV_MSG_CODE_MCP_RESET			0x00090000
+#define DRV_MSG_CODE_SET_VERSION		0x000f0000
+
+#define DRV_MSG_CODE_BIST_TEST			0x001e0000
+#define DRV_MSG_CODE_SET_LED_MODE		0x00200000
+
+#define DRV_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
 	u32 drv_mb_param;
+#define DRV_MB_PARAM_UNLOAD_WOL_MCP		0x00000001
+#define DRV_MB_PARAM_DCBX_NOTIFY_MASK		0x000000FF
+#define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT		3
+#define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT	0
+#define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK	0x000000FF
+#define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT	8
+#define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK	0x0000FF00
+#define DRV_MB_PARAM_LLDP_SEND_MASK		0x00000001
+#define DRV_MB_PARAM_LLDP_SEND_SHIFT		0
+
 
-	/* UNLOAD_REQ params */
-#define DRV_MB_PARAM_UNLOAD_WOL_UNKNOWN         0x00000000
-#define DRV_MB_PARAM_UNLOAD_WOL_MCP             0x00000001
-#define DRV_MB_PARAM_UNLOAD_WOL_DISABLED        0x00000002
-#define DRV_MB_PARAM_UNLOAD_WOL_ENABLED         0x00000003
-
-	/* UNLOAD_DONE_params */
-#define DRV_MB_PARAM_UNLOAD_NON_D3_POWER        0x00000001
-
-	/* INIT_PHY params */
-#define DRV_MB_PARAM_INIT_PHY_FORCE             0x00000001
-#define DRV_MB_PARAM_INIT_PHY_DONT_CARE         0x00000002
-
-	/* LLDP / DCBX params*/
-#define DRV_MB_PARAM_LLDP_SEND_MASK             0x00000001
-#define DRV_MB_PARAM_LLDP_SEND_SHIFT            0
-#define DRV_MB_PARAM_LLDP_AGENT_MASK            0x00000006
-#define DRV_MB_PARAM_LLDP_AGENT_SHIFT           1
-#define DRV_MB_PARAM_DCBX_NOTIFY_MASK           0x00000008
-#define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT          3
-
-#define DRV_MB_PARAM_NIG_DRAIN_PERIOD_MS_MASK   0x000000FF
-#define DRV_MB_PARAM_NIG_DRAIN_PERIOD_MS_SHIFT  0
-
-#define DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_MFW     0x1
-#define DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_IMAGE   0x2
-
-#define DRV_MB_PARAM_NVM_OFFSET_SHIFT           0
-#define DRV_MB_PARAM_NVM_OFFSET_MASK            0x00FFFFFF
-#define DRV_MB_PARAM_NVM_LEN_SHIFT              24
-#define DRV_MB_PARAM_NVM_LEN_MASK               0xFF000000
-
-#define DRV_MB_PARAM_PHY_ADDR_SHIFT             0
-#define DRV_MB_PARAM_PHY_ADDR_MASK              0x1FF0FFFF
-#define DRV_MB_PARAM_PHY_LANE_SHIFT             16
-#define DRV_MB_PARAM_PHY_LANE_MASK              0x000F0000
-#define DRV_MB_PARAM_PHY_SELECT_PORT_SHIFT      29
-#define DRV_MB_PARAM_PHY_SELECT_PORT_MASK       0x20000000
-#define DRV_MB_PARAM_PHY_PORT_SHIFT             30
-#define DRV_MB_PARAM_PHY_PORT_MASK              0xc0000000
-
-/* configure vf MSIX params*/
-#define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT    0
-#define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK     0x000000FF
-#define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT   8
-#define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK    0x0000FF00
-
-#define DRV_MB_PARAM_SET_LED_MODE_OPER          0x0
-#define DRV_MB_PARAM_SET_LED_MODE_ON            0x1
-#define DRV_MB_PARAM_SET_LED_MODE_OFF           0x2
-
-#define DRV_MB_PARAM_BIST_UNKNOWN_TEST          0
-#define DRV_MB_PARAM_BIST_REGISTER_TEST         1
-#define DRV_MB_PARAM_BIST_CLOCK_TEST            2
-
-#define DRV_MB_PARAM_BIST_RC_UNKNOWN            0
-#define DRV_MB_PARAM_BIST_RC_PASSED             1
-#define DRV_MB_PARAM_BIST_RC_FAILED             2
-#define DRV_MB_PARAM_BIST_RC_INVALID_PARAMETER          3
-
-#define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT      0
-#define DRV_MB_PARAM_BIST_TEST_INDEX_MASK       0x000000FF
+#define DRV_MB_PARAM_SET_LED_MODE_OPER		0x0
+#define DRV_MB_PARAM_SET_LED_MODE_ON		0x1
+#define DRV_MB_PARAM_SET_LED_MODE_OFF		0x2
+
+#define DRV_MB_PARAM_BIST_REGISTER_TEST		1
+#define DRV_MB_PARAM_BIST_CLOCK_TEST		2
+
+#define DRV_MB_PARAM_BIST_RC_UNKNOWN		0
+#define DRV_MB_PARAM_BIST_RC_PASSED		1
+#define DRV_MB_PARAM_BIST_RC_FAILED		2
+#define DRV_MB_PARAM_BIST_RC_INVALID_PARAMETER	3
+
+#define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT	0
+#define DRV_MB_PARAM_BIST_TEST_INDEX_MASK	0x000000FF
 
 	u32 fw_mb_header;
-#define FW_MSG_CODE_MASK                        0xffff0000
-#define FW_MSG_CODE_DRV_LOAD_ENGINE             0x10100000
-#define FW_MSG_CODE_DRV_LOAD_PORT               0x10110000
-#define FW_MSG_CODE_DRV_LOAD_FUNCTION           0x10120000
-#define FW_MSG_CODE_DRV_LOAD_REFUSED_PDA        0x10200000
-#define FW_MSG_CODE_DRV_LOAD_REFUSED_HSI        0x10210000
-#define FW_MSG_CODE_DRV_LOAD_REFUSED_DIAG       0x10220000
-#define FW_MSG_CODE_DRV_LOAD_DONE               0x11100000
-#define FW_MSG_CODE_DRV_UNLOAD_ENGINE           0x20110000
-#define FW_MSG_CODE_DRV_UNLOAD_PORT             0x20120000
-#define FW_MSG_CODE_DRV_UNLOAD_FUNCTION         0x20130000
-#define FW_MSG_CODE_DRV_UNLOAD_DONE             0x21100000
-#define FW_MSG_CODE_INIT_PHY_DONE               0x21200000
-#define FW_MSG_CODE_INIT_PHY_ERR_INVALID_ARGS   0x21300000
-#define FW_MSG_CODE_LINK_RESET_DONE             0x23000000
-#define FW_MSG_CODE_SET_LLDP_DONE               0x24000000
-#define FW_MSG_CODE_SET_LLDP_UNSUPPORTED_AGENT  0x24010000
-#define FW_MSG_CODE_SET_DCBX_DONE               0x25000000
-#define FW_MSG_CODE_NIG_DRAIN_DONE              0x30000000
-#define FW_MSG_CODE_VF_DISABLED_DONE            0xb0000000
-#define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE        0xb0010000
-#define FW_MSG_CODE_FLR_ACK                     0x02000000
-#define FW_MSG_CODE_FLR_NACK                    0x02100000
-
-#define FW_MSG_CODE_NVM_OK                      0x00010000
-#define FW_MSG_CODE_NVM_INVALID_MODE            0x00020000
-#define FW_MSG_CODE_NVM_PREV_CMD_WAS_NOT_FINISHED       0x00030000
-#define FW_MSG_CODE_NVM_FAILED_TO_ALLOCATE_PAGE 0x00040000
-#define FW_MSG_CODE_NVM_INVALID_DIR_FOUND       0x00050000
-#define FW_MSG_CODE_NVM_PAGE_NOT_FOUND          0x00060000
-#define FW_MSG_CODE_NVM_FAILED_PARSING_BNDLE_HEADER 0x00070000
-#define FW_MSG_CODE_NVM_FAILED_PARSING_IMAGE_HEADER 0x00080000
-#define FW_MSG_CODE_NVM_PARSING_OUT_OF_SYNC     0x00090000
-#define FW_MSG_CODE_NVM_FAILED_UPDATING_DIR     0x000a0000
-#define FW_MSG_CODE_NVM_FAILED_TO_FREE_PAGE     0x000b0000
-#define FW_MSG_CODE_NVM_FILE_NOT_FOUND          0x000c0000
-#define FW_MSG_CODE_NVM_OPERATION_FAILED        0x000d0000
-#define FW_MSG_CODE_NVM_FAILED_UNALIGNED        0x000e0000
-#define FW_MSG_CODE_NVM_BAD_OFFSET              0x000f0000
-#define FW_MSG_CODE_NVM_BAD_SIGNATURE           0x00100000
-#define FW_MSG_CODE_NVM_FILE_READ_ONLY          0x00200000
-#define FW_MSG_CODE_NVM_UNKNOWN_FILE            0x00300000
-#define FW_MSG_CODE_NVM_PUT_FILE_FINISH_OK      0x00400000
-#define FW_MSG_CODE_MCP_RESET_REJECT            0x00600000
-#define FW_MSG_CODE_PHY_OK                      0x00110000
-#define FW_MSG_CODE_PHY_ERROR                   0x00120000
-#define FW_MSG_CODE_SET_SECURE_MODE_ERROR       0x00130000
-#define FW_MSG_CODE_SET_SECURE_MODE_OK          0x00140000
-#define FW_MSG_MODE_PHY_PRIVILEGE_ERROR         0x00150000
-#define FW_MSG_CODE_OK                          0x00160000
-
-#define FW_MSG_SEQ_NUMBER_MASK                  0x0000ffff
-
-	u32	fw_mb_param;
-
-	u32	drv_pulse_mb;
-#define DRV_PULSE_SEQ_MASK                      0x00007fff
-#define DRV_PULSE_SYSTEM_TIME_MASK              0xffff0000
-#define DRV_PULSE_ALWAYS_ALIVE                  0x00008000
+#define FW_MSG_CODE_MASK			0xffff0000
+#define FW_MSG_CODE_DRV_LOAD_ENGINE		0x10100000
+#define FW_MSG_CODE_DRV_LOAD_PORT		0x10110000
+#define FW_MSG_CODE_DRV_LOAD_FUNCTION		0x10120000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_PDA	0x10200000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_HSI	0x10210000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_DIAG	0x10220000
+#define FW_MSG_CODE_DRV_LOAD_DONE		0x11100000
+#define FW_MSG_CODE_DRV_UNLOAD_ENGINE		0x20110000
+#define FW_MSG_CODE_DRV_UNLOAD_PORT		0x20120000
+#define FW_MSG_CODE_DRV_UNLOAD_FUNCTION		0x20130000
+#define FW_MSG_CODE_DRV_UNLOAD_DONE		0x21100000
+#define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE	0xb0010000
+#define FW_MSG_CODE_OK				0x00160000
+
+#define FW_MSG_SEQ_NUMBER_MASK			0x0000ffff
+
+	u32 fw_mb_param;
+
+	u32 drv_pulse_mb;
+#define DRV_PULSE_SEQ_MASK			0x00007fff
+#define DRV_PULSE_SYSTEM_TIME_MASK		0xffff0000
+#define DRV_PULSE_ALWAYS_ALIVE			0x00008000
+
 	u32 mcp_pulse_mb;
-#define MCP_PULSE_SEQ_MASK                      0x00007fff
-#define MCP_PULSE_ALWAYS_ALIVE                  0x00008000
-#define MCP_EVENT_MASK                          0xffff0000
-#define MCP_EVENT_OTHER_DRIVER_RESET_REQ        0x00010000
+#define MCP_PULSE_SEQ_MASK			0x00007fff
+#define MCP_PULSE_ALWAYS_ALIVE			0x00008000
+#define MCP_EVENT_MASK				0xffff0000
+#define MCP_EVENT_OTHER_DRIVER_RESET_REQ	0x00010000
 
 	union drv_union_data union_data;
 };
 
-/* MFW - DRV MB */
-/**********************************************************************
-* Description
-*   Incremental Aggregative
-*   8-bit MFW counter per message
-*   8-bit ack-counter per message
-* Capabilities
-*   Provides up to 256 aggregative message per type
-*   Provides 4 message types in dword
-*   Message type pointers to byte offset
-*   Backward Compatibility by using sizeof for the counters.
-*   No lock requires for 32bit messages
-* Limitations:
-* In case of messages greater than 32bit, a dedicated mechanism(e.g lock)
-* is required to prevent data corruption.
-**********************************************************************/
 enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_LINK_CHANGE,
 	MFW_DRV_MSG_FLR_FW_ACK_FAILED,
@@ -4084,37 +7304,33 @@ enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_LLDP_DATA_UPDATED,
 	MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
 	MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
-	MFW_DRV_MSG_ERROR_RECOVERY,
+	MFW_DRV_MSG_RESERVED4,
 	MFW_DRV_MSG_BW_UPDATE,
-	MFW_DRV_MSG_S_TAG_UPDATE,
-	MFW_DRV_MSG_GET_LAN_STATS,
-	MFW_DRV_MSG_GET_FCOE_STATS,
-	MFW_DRV_MSG_GET_ISCSI_STATS,
-	MFW_DRV_MSG_GET_RDMA_STATS,
-	MFW_DRV_MSG_FAILURE_DETECTED,
+	MFW_DRV_MSG_BW_UPDATE5,
+	MFW_DRV_MSG_BW_UPDATE6,
+	MFW_DRV_MSG_BW_UPDATE7,
+	MFW_DRV_MSG_BW_UPDATE8,
+	MFW_DRV_MSG_BW_UPDATE9,
+	MFW_DRV_MSG_BW_UPDATE10,
 	MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
+	MFW_DRV_MSG_BW_UPDATE11,
 	MFW_DRV_MSG_MAX
 };
 
-#define MFW_DRV_MSG_MAX_DWORDS(msgs)    (((msgs - 1) >> 2) + 1)
-#define MFW_DRV_MSG_DWORD(msg_id)       (msg_id >> 2)
-#define MFW_DRV_MSG_OFFSET(msg_id)      ((msg_id & 0x3) << 3)
-#define MFW_DRV_MSG_MASK(msg_id)        (0xff << MFW_DRV_MSG_OFFSET(msg_id))
+#define MFW_DRV_MSG_MAX_DWORDS(msgs)	(((msgs - 1) >> 2) + 1)
+#define MFW_DRV_MSG_DWORD(msg_id)	(msg_id >> 2)
+#define MFW_DRV_MSG_OFFSET(msg_id)	((msg_id & 0x3) << 3)
+#define MFW_DRV_MSG_MASK(msg_id)	(0xff << MFW_DRV_MSG_OFFSET(msg_id))
 
 struct public_mfw_mb {
-	u32	sup_msgs;
-	u32	msg[MFW_DRV_MSG_MAX_DWORDS(MFW_DRV_MSG_MAX)];
-	u32	ack[MFW_DRV_MSG_MAX_DWORDS(MFW_DRV_MSG_MAX)];
+	u32 sup_msgs;
+	u32 msg[MFW_DRV_MSG_MAX_DWORDS(MFW_DRV_MSG_MAX)];
+	u32 ack[MFW_DRV_MSG_MAX_DWORDS(MFW_DRV_MSG_MAX)];
 };
 
-/**************************************/
-/*                                    */
-/*     P U B L I C       D A T A      */
-/*                                    */
-/**************************************/
 enum public_sections {
-	PUBLIC_DRV_MB,          /* Points to the first drv_mb of path0 */
-	PUBLIC_MFW_MB,          /* Points to the first mfw_mb of path0 */
+	PUBLIC_DRV_MB,
+	PUBLIC_MFW_MB,
 	PUBLIC_GLOBAL,
 	PUBLIC_PATH,
 	PUBLIC_PORT,
@@ -4122,1080 +7338,179 @@ enum public_sections {
 	PUBLIC_MAX_SECTIONS
 };
 
-struct drv_ver_info_stc {
-	u32	ver;
-	u8	name[32];
-};
-
 struct mcp_public_data {
-	/* The sections fields is an array */
-	u32			num_sections;
-	offsize_t		sections[PUBLIC_MAX_SECTIONS];
-	struct public_drv_mb	drv_mb[MCP_GLOB_FUNC_MAX];
-	struct public_mfw_mb	mfw_mb[MCP_GLOB_FUNC_MAX];
-	struct public_global	global;
-	struct public_path	path[MCP_GLOB_PATH_MAX];
-	struct public_port	port[MCP_GLOB_PORT_MAX];
-	struct public_func	func[MCP_GLOB_FUNC_MAX];
-	struct drv_ver_info_stc drv_info;
+	u32 num_sections;
+	u32 sections[PUBLIC_MAX_SECTIONS];
+	struct public_drv_mb drv_mb[MCP_GLOB_FUNC_MAX];
+	struct public_mfw_mb mfw_mb[MCP_GLOB_FUNC_MAX];
+	struct public_global global;
+	struct public_path path[MCP_GLOB_PATH_MAX];
+	struct public_port port[MCP_GLOB_PORT_MAX];
+	struct public_func func[MCP_GLOB_FUNC_MAX];
 };
 
 struct nvm_cfg_mac_address {
-	u32	mac_addr_hi;
-#define NVM_CFG_MAC_ADDRESS_HI_MASK                             0x0000FFFF
-#define NVM_CFG_MAC_ADDRESS_HI_OFFSET                           0
-
-	u32	mac_addr_lo;
+	u32 mac_addr_hi;
+#define NVM_CFG_MAC_ADDRESS_HI_MASK	0x0000FFFF
+#define NVM_CFG_MAC_ADDRESS_HI_OFFSET	0
+	u32 mac_addr_lo;
 };
 
-/******************************************
-* nvm_cfg1 structs
-******************************************/
-
 struct nvm_cfg1_glob {
-	u32 generic_cont0;					/* 0x0 */
-#define NVM_CFG1_GLOB_BOARD_SWAP_MASK                           0x0000000F
-#define NVM_CFG1_GLOB_BOARD_SWAP_OFFSET                         0
-#define NVM_CFG1_GLOB_BOARD_SWAP_NONE                           0x0
-#define NVM_CFG1_GLOB_BOARD_SWAP_PATH                           0x1
-#define NVM_CFG1_GLOB_BOARD_SWAP_PORT                           0x2
-#define NVM_CFG1_GLOB_BOARD_SWAP_BOTH                           0x3
-#define NVM_CFG1_GLOB_MF_MODE_MASK                              0x00000FF0
-#define NVM_CFG1_GLOB_MF_MODE_OFFSET                            4
-#define NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED                        0x0
-#define NVM_CFG1_GLOB_MF_MODE_DEFAULT                           0x1
-#define NVM_CFG1_GLOB_MF_MODE_SPIO4                             0x2
-#define NVM_CFG1_GLOB_MF_MODE_NPAR1_0                           0x3
-#define NVM_CFG1_GLOB_MF_MODE_NPAR1_5                           0x4
-#define NVM_CFG1_GLOB_MF_MODE_NPAR2_0                           0x5
-#define NVM_CFG1_GLOB_MF_MODE_BD                                0x6
-#define NVM_CFG1_GLOB_MF_MODE_UFP                               0x7
-#define NVM_CFG1_GLOB_FAN_FAILURE_ENFORCEMENT_MASK              0x00001000
-#define NVM_CFG1_GLOB_FAN_FAILURE_ENFORCEMENT_OFFSET            12
-#define NVM_CFG1_GLOB_FAN_FAILURE_ENFORCEMENT_DISABLED          0x0
-#define NVM_CFG1_GLOB_FAN_FAILURE_ENFORCEMENT_ENABLED           0x1
-#define NVM_CFG1_GLOB_AVS_MARGIN_LOW_MASK                       0x001FE000
-#define NVM_CFG1_GLOB_AVS_MARGIN_LOW_OFFSET                     13
-#define NVM_CFG1_GLOB_AVS_MARGIN_HIGH_MASK                      0x1FE00000
-#define NVM_CFG1_GLOB_AVS_MARGIN_HIGH_OFFSET                    21
-#define NVM_CFG1_GLOB_ENABLE_SRIOV_MASK                         0x20000000
-#define NVM_CFG1_GLOB_ENABLE_SRIOV_OFFSET                       29
-#define NVM_CFG1_GLOB_ENABLE_SRIOV_DISABLED                     0x0
-#define NVM_CFG1_GLOB_ENABLE_SRIOV_ENABLED                      0x1
-#define NVM_CFG1_GLOB_ENABLE_ATC_MASK                           0x40000000
-#define NVM_CFG1_GLOB_ENABLE_ATC_OFFSET                         30
-#define NVM_CFG1_GLOB_ENABLE_ATC_DISABLED                       0x0
-#define NVM_CFG1_GLOB_ENABLE_ATC_ENABLED                        0x1
-#define NVM_CFG1_GLOB_CLOCK_SLOWDOWN_MASK                       0x80000000
-#define NVM_CFG1_GLOB_CLOCK_SLOWDOWN_OFFSET                     31
-#define NVM_CFG1_GLOB_CLOCK_SLOWDOWN_DISABLED                   0x0
-#define NVM_CFG1_GLOB_CLOCK_SLOWDOWN_ENABLED                    0x1
-
-	u32	engineering_change[3];				/* 0x4 */
-
-	u32	manufacturing_id;				/* 0x10 */
-
-	u32	serial_number[4];				/* 0x14 */
-
-	u32	pcie_cfg;					/* 0x24 */
-#define NVM_CFG1_GLOB_PCI_GEN_MASK                              0x00000003
-#define NVM_CFG1_GLOB_PCI_GEN_OFFSET                            0
-#define NVM_CFG1_GLOB_PCI_GEN_PCI_GEN1                          0x0
-#define NVM_CFG1_GLOB_PCI_GEN_PCI_GEN2                          0x1
-#define NVM_CFG1_GLOB_PCI_GEN_PCI_GEN3                          0x2
-#define NVM_CFG1_GLOB_BEACON_WOL_ENABLED_MASK                   0x00000004
-#define NVM_CFG1_GLOB_BEACON_WOL_ENABLED_OFFSET                 2
-#define NVM_CFG1_GLOB_BEACON_WOL_ENABLED_DISABLED               0x0
-#define NVM_CFG1_GLOB_BEACON_WOL_ENABLED_ENABLED                0x1
-#define NVM_CFG1_GLOB_ASPM_SUPPORT_MASK                         0x00000018
-#define NVM_CFG1_GLOB_ASPM_SUPPORT_OFFSET                       3
-#define NVM_CFG1_GLOB_ASPM_SUPPORT_L0S_L1_ENABLED               0x0
-#define NVM_CFG1_GLOB_ASPM_SUPPORT_L0S_DISABLED                 0x1
-#define NVM_CFG1_GLOB_ASPM_SUPPORT_L1_DISABLED                  0x2
-#define NVM_CFG1_GLOB_ASPM_SUPPORT_L0S_L1_DISABLED              0x3
-#define NVM_CFG1_GLOB_PREVENT_PCIE_L1_MENTRY_MASK               0x00000020
-#define NVM_CFG1_GLOB_PREVENT_PCIE_L1_MENTRY_OFFSET             5
-#define NVM_CFG1_GLOB_PREVENT_PCIE_L1_MENTRY_DISABLED           0x0
-#define NVM_CFG1_GLOB_PREVENT_PCIE_L1_MENTRY_ENABLED            0x1
-#define NVM_CFG1_GLOB_PCIE_G2_TX_AMPLITUDE_MASK                 0x000003C0
-#define NVM_CFG1_GLOB_PCIE_G2_TX_AMPLITUDE_OFFSET               6
-#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_MASK                     0x00001C00
-#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_OFFSET                   10
-#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_HW                       0x0
-#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_0DB                      0x1
-#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_3_5DB                    0x2
-#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_6_0DB                    0x3
-#define NVM_CFG1_GLOB_WWN_NODE_PREFIX0_MASK                     0x001FE000
-#define NVM_CFG1_GLOB_WWN_NODE_PREFIX0_OFFSET                   13
-#define NVM_CFG1_GLOB_WWN_NODE_PREFIX1_MASK                     0x1FE00000
-#define NVM_CFG1_GLOB_WWN_NODE_PREFIX1_OFFSET                   21
-#define NVM_CFG1_GLOB_NCSI_PACKAGE_ID_MASK                      0x60000000
-#define NVM_CFG1_GLOB_NCSI_PACKAGE_ID_OFFSET                    29
-
-	u32 mgmt_traffic;                                       /* 0x28 */
-#define NVM_CFG1_GLOB_RESERVED60_MASK                           0x00000001
-#define NVM_CFG1_GLOB_RESERVED60_OFFSET                         0
-#define NVM_CFG1_GLOB_RESERVED60_100KHZ                         0x0
-#define NVM_CFG1_GLOB_RESERVED60_400KHZ                         0x1
-#define NVM_CFG1_GLOB_WWN_PORT_PREFIX0_MASK                     0x000001FE
-#define NVM_CFG1_GLOB_WWN_PORT_PREFIX0_OFFSET                   1
-#define NVM_CFG1_GLOB_WWN_PORT_PREFIX1_MASK                     0x0001FE00
-#define NVM_CFG1_GLOB_WWN_PORT_PREFIX1_OFFSET                   9
-#define NVM_CFG1_GLOB_SMBUS_ADDRESS_MASK                        0x01FE0000
-#define NVM_CFG1_GLOB_SMBUS_ADDRESS_OFFSET                      17
-#define NVM_CFG1_GLOB_SIDEBAND_MODE_MASK                        0x06000000
-#define NVM_CFG1_GLOB_SIDEBAND_MODE_OFFSET                      25
-#define NVM_CFG1_GLOB_SIDEBAND_MODE_DISABLED                    0x0
-#define NVM_CFG1_GLOB_SIDEBAND_MODE_RMII                        0x1
-#define NVM_CFG1_GLOB_SIDEBAND_MODE_SGMII                       0x2
-
-	u32 core_cfg;                                           /* 0x2C */
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_MASK                    0x000000FF
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_OFFSET                  0
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X40G                0x0
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X50G                0x1
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X100G               0x2
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X10G_F              0x3
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X10G_E              0x4
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X20G                0x5
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X40G                0xB
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X25G                0xC
-#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X25G                0xD
-#define NVM_CFG1_GLOB_EAGLE_ENFORCE_TX_FIR_CFG_MASK             0x00000100
-#define NVM_CFG1_GLOB_EAGLE_ENFORCE_TX_FIR_CFG_OFFSET           8
-#define NVM_CFG1_GLOB_EAGLE_ENFORCE_TX_FIR_CFG_DISABLED         0x0
-#define NVM_CFG1_GLOB_EAGLE_ENFORCE_TX_FIR_CFG_ENABLED          0x1
-#define NVM_CFG1_GLOB_FALCON_ENFORCE_TX_FIR_CFG_MASK            0x00000200
-#define NVM_CFG1_GLOB_FALCON_ENFORCE_TX_FIR_CFG_OFFSET          9
-#define NVM_CFG1_GLOB_FALCON_ENFORCE_TX_FIR_CFG_DISABLED        0x0
-#define NVM_CFG1_GLOB_FALCON_ENFORCE_TX_FIR_CFG_ENABLED         0x1
-#define NVM_CFG1_GLOB_EAGLE_CORE_ADDR_MASK                      0x0003FC00
-#define NVM_CFG1_GLOB_EAGLE_CORE_ADDR_OFFSET                    10
-#define NVM_CFG1_GLOB_FALCON_CORE_ADDR_MASK                     0x03FC0000
-#define NVM_CFG1_GLOB_FALCON_CORE_ADDR_OFFSET                   18
-#define NVM_CFG1_GLOB_AVS_MODE_MASK                             0x1C000000
-#define NVM_CFG1_GLOB_AVS_MODE_OFFSET                           26
-#define NVM_CFG1_GLOB_AVS_MODE_CLOSE_LOOP                       0x0
-#define NVM_CFG1_GLOB_AVS_MODE_OPEN_LOOP                        0x1
-#define NVM_CFG1_GLOB_AVS_MODE_DISABLED                         0x3
-#define NVM_CFG1_GLOB_OVERRIDE_SECURE_MODE_MASK                 0x60000000
-#define NVM_CFG1_GLOB_OVERRIDE_SECURE_MODE_OFFSET               29
-#define NVM_CFG1_GLOB_OVERRIDE_SECURE_MODE_DISABLED             0x0
-#define NVM_CFG1_GLOB_OVERRIDE_SECURE_MODE_ENABLED              0x1
-
-	u32 e_lane_cfg1;					/* 0x30 */
-#define NVM_CFG1_GLOB_RX_LANE0_SWAP_MASK                        0x0000000F
-#define NVM_CFG1_GLOB_RX_LANE0_SWAP_OFFSET                      0
-#define NVM_CFG1_GLOB_RX_LANE1_SWAP_MASK                        0x000000F0
-#define NVM_CFG1_GLOB_RX_LANE1_SWAP_OFFSET                      4
-#define NVM_CFG1_GLOB_RX_LANE2_SWAP_MASK                        0x00000F00
-#define NVM_CFG1_GLOB_RX_LANE2_SWAP_OFFSET                      8
-#define NVM_CFG1_GLOB_RX_LANE3_SWAP_MASK                        0x0000F000
-#define NVM_CFG1_GLOB_RX_LANE3_SWAP_OFFSET                      12
-#define NVM_CFG1_GLOB_TX_LANE0_SWAP_MASK                        0x000F0000
-#define NVM_CFG1_GLOB_TX_LANE0_SWAP_OFFSET                      16
-#define NVM_CFG1_GLOB_TX_LANE1_SWAP_MASK                        0x00F00000
-#define NVM_CFG1_GLOB_TX_LANE1_SWAP_OFFSET                      20
-#define NVM_CFG1_GLOB_TX_LANE2_SWAP_MASK                        0x0F000000
-#define NVM_CFG1_GLOB_TX_LANE2_SWAP_OFFSET                      24
-#define NVM_CFG1_GLOB_TX_LANE3_SWAP_MASK                        0xF0000000
-#define NVM_CFG1_GLOB_TX_LANE3_SWAP_OFFSET                      28
-
-	u32 e_lane_cfg2;					/* 0x34 */
-#define NVM_CFG1_GLOB_RX_LANE0_POL_FLIP_MASK                    0x00000001
-#define NVM_CFG1_GLOB_RX_LANE0_POL_FLIP_OFFSET                  0
-#define NVM_CFG1_GLOB_RX_LANE1_POL_FLIP_MASK                    0x00000002
-#define NVM_CFG1_GLOB_RX_LANE1_POL_FLIP_OFFSET                  1
-#define NVM_CFG1_GLOB_RX_LANE2_POL_FLIP_MASK                    0x00000004
-#define NVM_CFG1_GLOB_RX_LANE2_POL_FLIP_OFFSET                  2
-#define NVM_CFG1_GLOB_RX_LANE3_POL_FLIP_MASK                    0x00000008
-#define NVM_CFG1_GLOB_RX_LANE3_POL_FLIP_OFFSET                  3
-#define NVM_CFG1_GLOB_TX_LANE0_POL_FLIP_MASK                    0x00000010
-#define NVM_CFG1_GLOB_TX_LANE0_POL_FLIP_OFFSET                  4
-#define NVM_CFG1_GLOB_TX_LANE1_POL_FLIP_MASK                    0x00000020
-#define NVM_CFG1_GLOB_TX_LANE1_POL_FLIP_OFFSET                  5
-#define NVM_CFG1_GLOB_TX_LANE2_POL_FLIP_MASK                    0x00000040
-#define NVM_CFG1_GLOB_TX_LANE2_POL_FLIP_OFFSET                  6
-#define NVM_CFG1_GLOB_TX_LANE3_POL_FLIP_MASK                    0x00000080
-#define NVM_CFG1_GLOB_TX_LANE3_POL_FLIP_OFFSET                  7
-#define NVM_CFG1_GLOB_SMBUS_MODE_MASK                           0x00000F00
-#define NVM_CFG1_GLOB_SMBUS_MODE_OFFSET                         8
-#define NVM_CFG1_GLOB_SMBUS_MODE_DISABLED                       0x0
-#define NVM_CFG1_GLOB_SMBUS_MODE_100KHZ                         0x1
-#define NVM_CFG1_GLOB_SMBUS_MODE_400KHZ                         0x2
-#define NVM_CFG1_GLOB_NCSI_MASK                                 0x0000F000
-#define NVM_CFG1_GLOB_NCSI_OFFSET                               12
-#define NVM_CFG1_GLOB_NCSI_DISABLED                             0x0
-#define NVM_CFG1_GLOB_NCSI_ENABLED                              0x1
-
-	u32 f_lane_cfg1;					/* 0x38 */
-#define NVM_CFG1_GLOB_RX_LANE0_SWAP_MASK                        0x0000000F
-#define NVM_CFG1_GLOB_RX_LANE0_SWAP_OFFSET                      0
-#define NVM_CFG1_GLOB_RX_LANE1_SWAP_MASK                        0x000000F0
-#define NVM_CFG1_GLOB_RX_LANE1_SWAP_OFFSET                      4
-#define NVM_CFG1_GLOB_RX_LANE2_SWAP_MASK                        0x00000F00
-#define NVM_CFG1_GLOB_RX_LANE2_SWAP_OFFSET                      8
-#define NVM_CFG1_GLOB_RX_LANE3_SWAP_MASK                        0x0000F000
-#define NVM_CFG1_GLOB_RX_LANE3_SWAP_OFFSET                      12
-#define NVM_CFG1_GLOB_TX_LANE0_SWAP_MASK                        0x000F0000
-#define NVM_CFG1_GLOB_TX_LANE0_SWAP_OFFSET                      16
-#define NVM_CFG1_GLOB_TX_LANE1_SWAP_MASK                        0x00F00000
-#define NVM_CFG1_GLOB_TX_LANE1_SWAP_OFFSET                      20
-#define NVM_CFG1_GLOB_TX_LANE2_SWAP_MASK                        0x0F000000
-#define NVM_CFG1_GLOB_TX_LANE2_SWAP_OFFSET                      24
-#define NVM_CFG1_GLOB_TX_LANE3_SWAP_MASK                        0xF0000000
-#define NVM_CFG1_GLOB_TX_LANE3_SWAP_OFFSET                      28
-
-	u32 f_lane_cfg2;					/* 0x3C */
-#define NVM_CFG1_GLOB_RX_LANE0_POL_FLIP_MASK                    0x00000001
-#define NVM_CFG1_GLOB_RX_LANE0_POL_FLIP_OFFSET                  0
-#define NVM_CFG1_GLOB_RX_LANE1_POL_FLIP_MASK                    0x00000002
-#define NVM_CFG1_GLOB_RX_LANE1_POL_FLIP_OFFSET                  1
-#define NVM_CFG1_GLOB_RX_LANE2_POL_FLIP_MASK                    0x00000004
-#define NVM_CFG1_GLOB_RX_LANE2_POL_FLIP_OFFSET                  2
-#define NVM_CFG1_GLOB_RX_LANE3_POL_FLIP_MASK                    0x00000008
-#define NVM_CFG1_GLOB_RX_LANE3_POL_FLIP_OFFSET                  3
-#define NVM_CFG1_GLOB_TX_LANE0_POL_FLIP_MASK                    0x00000010
-#define NVM_CFG1_GLOB_TX_LANE0_POL_FLIP_OFFSET                  4
-#define NVM_CFG1_GLOB_TX_LANE1_POL_FLIP_MASK                    0x00000020
-#define NVM_CFG1_GLOB_TX_LANE1_POL_FLIP_OFFSET                  5
-#define NVM_CFG1_GLOB_TX_LANE2_POL_FLIP_MASK                    0x00000040
-#define NVM_CFG1_GLOB_TX_LANE2_POL_FLIP_OFFSET                  6
-#define NVM_CFG1_GLOB_TX_LANE3_POL_FLIP_MASK                    0x00000080
-#define NVM_CFG1_GLOB_TX_LANE3_POL_FLIP_OFFSET                  7
-
-	u32 eagle_preemphasis;					/* 0x40 */
-#define NVM_CFG1_GLOB_LANE0_PREEMP_MASK                         0x000000FF
-#define NVM_CFG1_GLOB_LANE0_PREEMP_OFFSET                       0
-#define NVM_CFG1_GLOB_LANE1_PREEMP_MASK                         0x0000FF00
-#define NVM_CFG1_GLOB_LANE1_PREEMP_OFFSET                       8
-#define NVM_CFG1_GLOB_LANE2_PREEMP_MASK                         0x00FF0000
-#define NVM_CFG1_GLOB_LANE2_PREEMP_OFFSET                       16
-#define NVM_CFG1_GLOB_LANE3_PREEMP_MASK                         0xFF000000
-#define NVM_CFG1_GLOB_LANE3_PREEMP_OFFSET                       24
-
-	u32 eagle_driver_current;				/* 0x44 */
-#define NVM_CFG1_GLOB_LANE0_AMP_MASK                            0x000000FF
-#define NVM_CFG1_GLOB_LANE0_AMP_OFFSET                          0
-#define NVM_CFG1_GLOB_LANE1_AMP_MASK                            0x0000FF00
-#define NVM_CFG1_GLOB_LANE1_AMP_OFFSET                          8
-#define NVM_CFG1_GLOB_LANE2_AMP_MASK                            0x00FF0000
-#define NVM_CFG1_GLOB_LANE2_AMP_OFFSET                          16
-#define NVM_CFG1_GLOB_LANE3_AMP_MASK                            0xFF000000
-#define NVM_CFG1_GLOB_LANE3_AMP_OFFSET                          24
-
-	u32 falcon_preemphasis;					/* 0x48 */
-#define NVM_CFG1_GLOB_LANE0_PREEMP_MASK                         0x000000FF
-#define NVM_CFG1_GLOB_LANE0_PREEMP_OFFSET                       0
-#define NVM_CFG1_GLOB_LANE1_PREEMP_MASK                         0x0000FF00
-#define NVM_CFG1_GLOB_LANE1_PREEMP_OFFSET                       8
-#define NVM_CFG1_GLOB_LANE2_PREEMP_MASK                         0x00FF0000
-#define NVM_CFG1_GLOB_LANE2_PREEMP_OFFSET                       16
-#define NVM_CFG1_GLOB_LANE3_PREEMP_MASK                         0xFF000000
-#define NVM_CFG1_GLOB_LANE3_PREEMP_OFFSET                       24
-
-	u32 falcon_driver_current;				/* 0x4C */
-#define NVM_CFG1_GLOB_LANE0_AMP_MASK                            0x000000FF
-#define NVM_CFG1_GLOB_LANE0_AMP_OFFSET                          0
-#define NVM_CFG1_GLOB_LANE1_AMP_MASK                            0x0000FF00
-#define NVM_CFG1_GLOB_LANE1_AMP_OFFSET                          8
-#define NVM_CFG1_GLOB_LANE2_AMP_MASK                            0x00FF0000
-#define NVM_CFG1_GLOB_LANE2_AMP_OFFSET                          16
-#define NVM_CFG1_GLOB_LANE3_AMP_MASK                            0xFF000000
-#define NVM_CFG1_GLOB_LANE3_AMP_OFFSET                          24
-
-	u32	pci_id;						/* 0x50 */
-#define NVM_CFG1_GLOB_VENDOR_ID_MASK                            0x0000FFFF
-#define NVM_CFG1_GLOB_VENDOR_ID_OFFSET                          0
-
-	u32	pci_subsys_id;					/* 0x54 */
-#define NVM_CFG1_GLOB_SUBSYSTEM_VENDOR_ID_MASK                  0x0000FFFF
-#define NVM_CFG1_GLOB_SUBSYSTEM_VENDOR_ID_OFFSET                0
-#define NVM_CFG1_GLOB_SUBSYSTEM_DEVICE_ID_MASK                  0xFFFF0000
-#define NVM_CFG1_GLOB_SUBSYSTEM_DEVICE_ID_OFFSET                16
-
-	u32	bar;						/* 0x58 */
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_MASK                   0x0000000F
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_OFFSET                 0
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_DISABLED               0x0
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_2K                     0x1
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_4K                     0x2
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_8K                     0x3
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_16K                    0x4
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_32K                    0x5
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_64K                    0x6
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_128K                   0x7
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_256K                   0x8
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_512K                   0x9
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_1M                     0xA
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_2M                     0xB
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_4M                     0xC
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_8M                     0xD
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_16M                    0xE
-#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_32M                    0xF
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_MASK                     0x000000F0
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_OFFSET                   4
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_DISABLED                 0x0
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_4K                       0x1
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_8K                       0x2
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_16K                      0x3
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_32K                      0x4
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_64K                      0x5
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_128K                     0x6
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_256K                     0x7
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_512K                     0x8
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_1M                       0x9
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_2M                       0xA
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_4M                       0xB
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_8M                       0xC
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_16M                      0xD
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_32M                      0xE
-#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_64M                      0xF
-#define NVM_CFG1_GLOB_BAR2_SIZE_MASK                            0x00000F00
-#define NVM_CFG1_GLOB_BAR2_SIZE_OFFSET                          8
-#define NVM_CFG1_GLOB_BAR2_SIZE_DISABLED                        0x0
-#define NVM_CFG1_GLOB_BAR2_SIZE_64K                             0x1
-#define NVM_CFG1_GLOB_BAR2_SIZE_128K                            0x2
-#define NVM_CFG1_GLOB_BAR2_SIZE_256K                            0x3
-#define NVM_CFG1_GLOB_BAR2_SIZE_512K                            0x4
-#define NVM_CFG1_GLOB_BAR2_SIZE_1M                              0x5
-#define NVM_CFG1_GLOB_BAR2_SIZE_2M                              0x6
-#define NVM_CFG1_GLOB_BAR2_SIZE_4M                              0x7
-#define NVM_CFG1_GLOB_BAR2_SIZE_8M                              0x8
-#define NVM_CFG1_GLOB_BAR2_SIZE_16M                             0x9
-#define NVM_CFG1_GLOB_BAR2_SIZE_32M                             0xA
-#define NVM_CFG1_GLOB_BAR2_SIZE_64M                             0xB
-#define NVM_CFG1_GLOB_BAR2_SIZE_128M                            0xC
-#define NVM_CFG1_GLOB_BAR2_SIZE_256M                            0xD
-#define NVM_CFG1_GLOB_BAR2_SIZE_512M                            0xE
-#define NVM_CFG1_GLOB_BAR2_SIZE_1G                              0xF
-
-	u32 eagle_txfir_main;					/* 0x5C */
-#define NVM_CFG1_GLOB_LANE0_TXFIR_MAIN_MASK                     0x000000FF
-#define NVM_CFG1_GLOB_LANE0_TXFIR_MAIN_OFFSET                   0
-#define NVM_CFG1_GLOB_LANE1_TXFIR_MAIN_MASK                     0x0000FF00
-#define NVM_CFG1_GLOB_LANE1_TXFIR_MAIN_OFFSET                   8
-#define NVM_CFG1_GLOB_LANE2_TXFIR_MAIN_MASK                     0x00FF0000
-#define NVM_CFG1_GLOB_LANE2_TXFIR_MAIN_OFFSET                   16
-#define NVM_CFG1_GLOB_LANE3_TXFIR_MAIN_MASK                     0xFF000000
-#define NVM_CFG1_GLOB_LANE3_TXFIR_MAIN_OFFSET                   24
-
-	u32 eagle_txfir_post;					/* 0x60 */
-#define NVM_CFG1_GLOB_LANE0_TXFIR_POST_MASK                     0x000000FF
-#define NVM_CFG1_GLOB_LANE0_TXFIR_POST_OFFSET                   0
-#define NVM_CFG1_GLOB_LANE1_TXFIR_POST_MASK                     0x0000FF00
-#define NVM_CFG1_GLOB_LANE1_TXFIR_POST_OFFSET                   8
-#define NVM_CFG1_GLOB_LANE2_TXFIR_POST_MASK                     0x00FF0000
-#define NVM_CFG1_GLOB_LANE2_TXFIR_POST_OFFSET                   16
-#define NVM_CFG1_GLOB_LANE3_TXFIR_POST_MASK                     0xFF000000
-#define NVM_CFG1_GLOB_LANE3_TXFIR_POST_OFFSET                   24
-
-	u32 falcon_txfir_main;					/* 0x64 */
-#define NVM_CFG1_GLOB_LANE0_TXFIR_MAIN_MASK                     0x000000FF
-#define NVM_CFG1_GLOB_LANE0_TXFIR_MAIN_OFFSET                   0
-#define NVM_CFG1_GLOB_LANE1_TXFIR_MAIN_MASK                     0x0000FF00
-#define NVM_CFG1_GLOB_LANE1_TXFIR_MAIN_OFFSET                   8
-#define NVM_CFG1_GLOB_LANE2_TXFIR_MAIN_MASK                     0x00FF0000
-#define NVM_CFG1_GLOB_LANE2_TXFIR_MAIN_OFFSET                   16
-#define NVM_CFG1_GLOB_LANE3_TXFIR_MAIN_MASK                     0xFF000000
-#define NVM_CFG1_GLOB_LANE3_TXFIR_MAIN_OFFSET                   24
-
-	u32 falcon_txfir_post;					/* 0x68 */
-#define NVM_CFG1_GLOB_LANE0_TXFIR_POST_MASK                     0x000000FF
-#define NVM_CFG1_GLOB_LANE0_TXFIR_POST_OFFSET                   0
-#define NVM_CFG1_GLOB_LANE1_TXFIR_POST_MASK                     0x0000FF00
-#define NVM_CFG1_GLOB_LANE1_TXFIR_POST_OFFSET                   8
-#define NVM_CFG1_GLOB_LANE2_TXFIR_POST_MASK                     0x00FF0000
-#define NVM_CFG1_GLOB_LANE2_TXFIR_POST_OFFSET                   16
-#define NVM_CFG1_GLOB_LANE3_TXFIR_POST_MASK                     0xFF000000
-#define NVM_CFG1_GLOB_LANE3_TXFIR_POST_OFFSET                   24
-
-	u32 manufacture_ver;					/* 0x6C */
-#define NVM_CFG1_GLOB_MANUF0_VER_MASK                           0x0000003F
-#define NVM_CFG1_GLOB_MANUF0_VER_OFFSET                         0
-#define NVM_CFG1_GLOB_MANUF1_VER_MASK                           0x00000FC0
-#define NVM_CFG1_GLOB_MANUF1_VER_OFFSET                         6
-#define NVM_CFG1_GLOB_MANUF2_VER_MASK                           0x0003F000
-#define NVM_CFG1_GLOB_MANUF2_VER_OFFSET                         12
-#define NVM_CFG1_GLOB_MANUF3_VER_MASK                           0x00FC0000
-#define NVM_CFG1_GLOB_MANUF3_VER_OFFSET                         18
-#define NVM_CFG1_GLOB_MANUF4_VER_MASK                           0x3F000000
-#define NVM_CFG1_GLOB_MANUF4_VER_OFFSET                         24
-
-	u32 manufacture_time;					/* 0x70 */
-#define NVM_CFG1_GLOB_MANUF0_TIME_MASK                          0x0000003F
-#define NVM_CFG1_GLOB_MANUF0_TIME_OFFSET                        0
-#define NVM_CFG1_GLOB_MANUF1_TIME_MASK                          0x00000FC0
-#define NVM_CFG1_GLOB_MANUF1_TIME_OFFSET                        6
-#define NVM_CFG1_GLOB_MANUF2_TIME_MASK                          0x0003F000
-#define NVM_CFG1_GLOB_MANUF2_TIME_OFFSET                        12
-
-	u32 led_global_settings;				/* 0x74 */
-#define NVM_CFG1_GLOB_LED_SWAP_0_MASK                           0x0000000F
-#define NVM_CFG1_GLOB_LED_SWAP_0_OFFSET                         0
-#define NVM_CFG1_GLOB_LED_SWAP_1_MASK                           0x000000F0
-#define NVM_CFG1_GLOB_LED_SWAP_1_OFFSET                         4
-#define NVM_CFG1_GLOB_LED_SWAP_2_MASK                           0x00000F00
-#define NVM_CFG1_GLOB_LED_SWAP_2_OFFSET                         8
-#define NVM_CFG1_GLOB_LED_SWAP_3_MASK                           0x0000F000
-#define NVM_CFG1_GLOB_LED_SWAP_3_OFFSET                         12
-
-	u32	generic_cont1;					/* 0x78 */
-#define NVM_CFG1_GLOB_AVS_DAC_CODE_MASK                         0x000003FF
-#define NVM_CFG1_GLOB_AVS_DAC_CODE_OFFSET                       0
-
-	u32	mbi_version;					/* 0x7C */
-#define NVM_CFG1_GLOB_MBI_VERSION_0_MASK                        0x000000FF
-#define NVM_CFG1_GLOB_MBI_VERSION_0_OFFSET                      0
-#define NVM_CFG1_GLOB_MBI_VERSION_1_MASK                        0x0000FF00
-#define NVM_CFG1_GLOB_MBI_VERSION_1_OFFSET                      8
-#define NVM_CFG1_GLOB_MBI_VERSION_2_MASK                        0x00FF0000
-#define NVM_CFG1_GLOB_MBI_VERSION_2_OFFSET                      16
-
-	u32	mbi_date;					/* 0x80 */
-
-	u32	misc_sig;					/* 0x84 */
-
-	/*  Define the GPIO mapping to switch i2c mux */
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO_0_MASK                   0x000000FF
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO_0_OFFSET                 0
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO_1_MASK                   0x0000FF00
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO_1_OFFSET                 8
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__NA                      0x0
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO0                   0x1
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO1                   0x2
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO2                   0x3
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO3                   0x4
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO4                   0x5
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO5                   0x6
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO6                   0x7
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO7                   0x8
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO8                   0x9
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO9                   0xA
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO10                  0xB
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO11                  0xC
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO12                  0xD
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO13                  0xE
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO14                  0xF
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO15                  0x10
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO16                  0x11
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO17                  0x12
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO18                  0x13
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO19                  0x14
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO20                  0x15
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO21                  0x16
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO22                  0x17
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO23                  0x18
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO24                  0x19
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO25                  0x1A
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO26                  0x1B
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO27                  0x1C
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO28                  0x1D
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO29                  0x1E
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO30                  0x1F
-#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO31                  0x20
-	u32	device_capabilities;                            /* 0x88 */
-#define NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ETHERNET              0x1
-	u32	power_dissipated;                               /* 0x8C */
-	u32 power_consumed;                                     /* 0x90 */
-	u32	efi_version;                                    /* 0x94 */
-	u32	reserved[42];                                   /* 0x98 */
+	u32 generic_cont0;
+#define NVM_CFG1_GLOB_MF_MODE_MASK		0x00000FF0
+#define NVM_CFG1_GLOB_MF_MODE_OFFSET		4
+#define NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED	0x0
+#define NVM_CFG1_GLOB_MF_MODE_DEFAULT		0x1
+#define NVM_CFG1_GLOB_MF_MODE_SPIO4		0x2
+#define NVM_CFG1_GLOB_MF_MODE_NPAR1_0		0x3
+#define NVM_CFG1_GLOB_MF_MODE_NPAR1_5		0x4
+#define NVM_CFG1_GLOB_MF_MODE_NPAR2_0		0x5
+#define NVM_CFG1_GLOB_MF_MODE_BD		0x6
+#define NVM_CFG1_GLOB_MF_MODE_UFP		0x7
+	u32 engineering_change[3];
+	u32 manufacturing_id;
+	u32 serial_number[4];
+	u32 pcie_cfg;
+	u32 mgmt_traffic;
+	u32 core_cfg;
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_MASK		0x000000FF
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_OFFSET		0
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_BB_2X40G	0x0
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_2X50G		0x1
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_BB_1X100G	0x2
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_4X10G_F		0x3
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_BB_4X10G_E	0x4
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_BB_4X20G	0x5
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_1X40G		0xB
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_2X25G		0xC
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_1X25G		0xD
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_4X25G		0xE
+	u32 e_lane_cfg1;
+	u32 e_lane_cfg2;
+	u32 f_lane_cfg1;
+	u32 f_lane_cfg2;
+	u32 mps10_preemphasis;
+	u32 mps10_driver_current;
+	u32 mps25_preemphasis;
+	u32 mps25_driver_current;
+	u32 pci_id;
+	u32 pci_subsys_id;
+	u32 bar;
+	u32 mps10_txfir_main;
+	u32 mps10_txfir_post;
+	u32 mps25_txfir_main;
+	u32 mps25_txfir_post;
+	u32 manufacture_ver;
+	u32 manufacture_time;
+	u32 led_global_settings;
+	u32 generic_cont1;
+	u32 mbi_version;
+	u32 mbi_date;
+	u32 misc_sig;
+	u32 device_capabilities;
+#define NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ETHERNET	0x1
+#define NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ISCSI		0x4
+#define NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ROCE		0x8
+	u32 power_dissipated;
+	u32 power_consumed;
+	u32 efi_version;
+	u32 multi_network_modes_capability;
+	u32 reserved[41];
 };
 
 struct nvm_cfg1_path {
-	u32 reserved[30];					/* 0x0 */
+	u32 reserved[30];
 };
 
 struct nvm_cfg1_port {
-	u32	reserved__m_relocated_to_option_123;           /* 0x0 */
-	u32	reserved__m_relocated_to_option_124;           /* 0x4 */
-	u32 generic_cont0;					/* 0x8 */
-#define NVM_CFG1_PORT_LED_MODE_MASK                             0x000000FF
-#define NVM_CFG1_PORT_LED_MODE_OFFSET                           0
-#define NVM_CFG1_PORT_LED_MODE_MAC1                             0x0
-#define NVM_CFG1_PORT_LED_MODE_PHY1                             0x1
-#define NVM_CFG1_PORT_LED_MODE_PHY2                             0x2
-#define NVM_CFG1_PORT_LED_MODE_PHY3                             0x3
-#define NVM_CFG1_PORT_LED_MODE_MAC2                             0x4
-#define NVM_CFG1_PORT_LED_MODE_PHY4                             0x5
-#define NVM_CFG1_PORT_LED_MODE_PHY5                             0x6
-#define NVM_CFG1_PORT_LED_MODE_PHY6                             0x7
-#define NVM_CFG1_PORT_LED_MODE_MAC3                             0x8
-#define NVM_CFG1_PORT_LED_MODE_PHY7                             0x9
-#define NVM_CFG1_PORT_LED_MODE_PHY8                             0xA
-#define NVM_CFG1_PORT_LED_MODE_PHY9                             0xB
-#define NVM_CFG1_PORT_LED_MODE_MAC4                             0xC
-#define NVM_CFG1_PORT_LED_MODE_PHY10                            0xD
-#define NVM_CFG1_PORT_LED_MODE_PHY11                            0xE
-#define NVM_CFG1_PORT_LED_MODE_PHY12                            0xF
-#define NVM_CFG1_PORT_ROCE_PRIORITY_MASK                        0x0000FF00
-#define NVM_CFG1_PORT_ROCE_PRIORITY_OFFSET                      8
-#define NVM_CFG1_PORT_DCBX_MODE_MASK                            0x000F0000
-#define NVM_CFG1_PORT_DCBX_MODE_OFFSET                          16
-#define NVM_CFG1_PORT_DCBX_MODE_DISABLED                        0x0
-#define NVM_CFG1_PORT_DCBX_MODE_IEEE                            0x1
-#define NVM_CFG1_PORT_DCBX_MODE_CEE                             0x2
-#define NVM_CFG1_PORT_DCBX_MODE_DYNAMIC                         0x3
-#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_MASK            0x00F00000
-#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_OFFSET          20
-#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_ETHERNET        0x1
-	u32	pcie_cfg;					/* 0xC */
-#define NVM_CFG1_PORT_RESERVED15_MASK                           0x00000007
-#define NVM_CFG1_PORT_RESERVED15_OFFSET                         0
-
-	u32	features;					/* 0x10 */
-#define NVM_CFG1_PORT_ENABLE_WOL_ON_ACPI_PATTERN_MASK           0x00000001
-#define NVM_CFG1_PORT_ENABLE_WOL_ON_ACPI_PATTERN_OFFSET         0
-#define NVM_CFG1_PORT_ENABLE_WOL_ON_ACPI_PATTERN_DISABLED       0x0
-#define NVM_CFG1_PORT_ENABLE_WOL_ON_ACPI_PATTERN_ENABLED        0x1
-#define NVM_CFG1_PORT_MAGIC_PACKET_WOL_MASK                     0x00000002
-#define NVM_CFG1_PORT_MAGIC_PACKET_WOL_OFFSET                   1
-#define NVM_CFG1_PORT_MAGIC_PACKET_WOL_DISABLED                 0x0
-#define NVM_CFG1_PORT_MAGIC_PACKET_WOL_ENABLED                  0x1
-
-	u32 speed_cap_mask;					/* 0x14 */
-#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK            0x0000FFFF
-#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_OFFSET          0
-#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G              0x1
-#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G             0x2
-#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G             0x8
-#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G             0x10
-#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G             0x20
-#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_100G            0x40
-#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_MASK            0xFFFF0000
-#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_OFFSET          16
-#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_1G              0x1
-#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_10G             0x2
-#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_25G             0x8
-#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_40G             0x10
-#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_50G             0x20
-#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_100G            0x40
-
-	u32 link_settings;					/* 0x18 */
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_MASK                       0x0000000F
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_OFFSET                     0
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_AUTONEG                    0x0
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_1G                         0x1
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_10G                        0x2
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_25G                        0x4
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_40G                        0x5
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_50G                        0x6
-#define NVM_CFG1_PORT_DRV_LINK_SPEED_100G                       0x7
-#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK                     0x00000070
-#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_OFFSET                   4
-#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_AUTONEG                  0x1
-#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_RX                       0x2
-#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX                       0x4
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_MASK                       0x00000780
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_OFFSET                     7
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_AUTONEG                    0x0
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_1G                         0x1
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_10G                        0x2
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_25G                        0x4
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_40G                        0x5
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_50G                        0x6
-#define NVM_CFG1_PORT_MFW_LINK_SPEED_100G                       0x7
-#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_MASK                     0x00003800
-#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_OFFSET                   11
-#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_AUTONEG                  0x1
-#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_RX                       0x2
-#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_TX                       0x4
-#define NVM_CFG1_PORT_OPTIC_MODULE_VENDOR_ENFORCEMENT_MASK      0x00004000
-#define NVM_CFG1_PORT_OPTIC_MODULE_VENDOR_ENFORCEMENT_OFFSET    14
-#define NVM_CFG1_PORT_OPTIC_MODULE_VENDOR_ENFORCEMENT_DISABLED  0x0
-#define NVM_CFG1_PORT_OPTIC_MODULE_VENDOR_ENFORCEMENT_ENABLED   0x1
-
-	u32 phy_cfg;						/* 0x1C */
-#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_MASK                  0x0000FFFF
-#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_OFFSET                0
-#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_HIGIG                 0x1
-#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_SCRAMBLER             0x2
-#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_FIBER                 0x4
-#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_DISABLE_CL72_AN       0x8
-#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_DISABLE_FEC_AN        0x10
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_MASK                 0x00FF0000
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_OFFSET               16
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_BYPASS               0x0
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_KR                   0x2
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_KR2                  0x3
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_KR4                  0x4
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XFI                  0x8
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_SFI                  0x9
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_1000X                0xB
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_SGMII                0xC
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLAUI                0x11
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLPPI                0x12
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CAUI                 0x21
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CPPI                 0x22
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_25GAUI               0x31
-#define NVM_CFG1_PORT_AN_MODE_MASK                              0xFF000000
-#define NVM_CFG1_PORT_AN_MODE_OFFSET                            24
-#define NVM_CFG1_PORT_AN_MODE_NONE                              0x0
-#define NVM_CFG1_PORT_AN_MODE_CL73                              0x1
-#define NVM_CFG1_PORT_AN_MODE_CL37                              0x2
-#define NVM_CFG1_PORT_AN_MODE_CL73_BAM                          0x3
-#define NVM_CFG1_PORT_AN_MODE_CL37_BAM                          0x4
-#define NVM_CFG1_PORT_AN_MODE_HPAM                              0x5
-#define NVM_CFG1_PORT_AN_MODE_SGMII                             0x6
-
-	u32 mgmt_traffic;					/* 0x20 */
-#define NVM_CFG1_PORT_RESERVED61_MASK                           0x0000000F
-#define NVM_CFG1_PORT_RESERVED61_OFFSET                         0
-
-	u32 ext_phy;						/* 0x24 */
-#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_MASK                    0x000000FF
-#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_OFFSET                  0
-#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_NONE                    0x0
-#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_BCM84844                0x1
-#define NVM_CFG1_PORT_EXTERNAL_PHY_ADDRESS_MASK                 0x0000FF00
-#define NVM_CFG1_PORT_EXTERNAL_PHY_ADDRESS_OFFSET               8
-
-	u32 mba_cfg1;						/* 0x28 */
-#define NVM_CFG1_PORT_PREBOOT_OPROM_MASK                        0x00000001
-#define NVM_CFG1_PORT_PREBOOT_OPROM_OFFSET                      0
-#define NVM_CFG1_PORT_PREBOOT_OPROM_DISABLED                    0x0
-#define NVM_CFG1_PORT_PREBOOT_OPROM_ENABLED                     0x1
-#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_TYPE_MASK            0x00000006
-#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_TYPE_OFFSET          1
-#define NVM_CFG1_PORT_MBA_DELAY_TIME_MASK                       0x00000078
-#define NVM_CFG1_PORT_MBA_DELAY_TIME_OFFSET                     3
-#define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_MASK                    0x00000080
-#define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_OFFSET                  7
-#define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_CTRL_S                  0x0
-#define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_CTRL_B                  0x1
-#define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_MASK                0x00000100
-#define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_OFFSET              8
-#define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_DISABLED            0x0
-#define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_ENABLED             0x1
-#define NVM_CFG1_PORT_RESERVED5_MASK                            0x0001FE00
-#define NVM_CFG1_PORT_RESERVED5_OFFSET                          9
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_MASK                   0x001E0000
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_OFFSET                 17
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_AUTONEG                0x0
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_1G                     0x1
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_10G                    0x2
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_25G                    0x4
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_40G                    0x5
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_50G                    0x6
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_100G                   0x7
-#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_SMARTLINQ              0x8
-#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_RETRY_COUNT_MASK     0x00E00000
-#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_RETRY_COUNT_OFFSET   21
-
-	u32	mba_cfg2;					/* 0x2C */
-#define NVM_CFG1_PORT_RESERVED65_MASK                           0x0000FFFF
-#define NVM_CFG1_PORT_RESERVED65_OFFSET                         0
-#define NVM_CFG1_PORT_RESERVED66_MASK                           0x00010000
-#define NVM_CFG1_PORT_RESERVED66_OFFSET                         16
-
-	u32	vf_cfg;						/* 0x30 */
-#define NVM_CFG1_PORT_RESERVED8_MASK                            0x0000FFFF
-#define NVM_CFG1_PORT_RESERVED8_OFFSET                          0
-#define NVM_CFG1_PORT_RESERVED6_MASK                            0x000F0000
-#define NVM_CFG1_PORT_RESERVED6_OFFSET                          16
-
-	struct nvm_cfg_mac_address	lldp_mac_address;	/* 0x34 */
-
-	u32				led_port_settings;	/* 0x3C */
-#define NVM_CFG1_PORT_LANE_LED_SPD_0_SEL_MASK                   0x000000FF
-#define NVM_CFG1_PORT_LANE_LED_SPD_0_SEL_OFFSET                 0
-#define NVM_CFG1_PORT_LANE_LED_SPD_1_SEL_MASK                   0x0000FF00
-#define NVM_CFG1_PORT_LANE_LED_SPD_1_SEL_OFFSET                 8
-#define NVM_CFG1_PORT_LANE_LED_SPD_2_SEL_MASK                   0x00FF0000
-#define NVM_CFG1_PORT_LANE_LED_SPD_2_SEL_OFFSET                 16
-#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_1G                      0x1
-#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_10G                     0x2
-#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_25G                     0x8
-#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_40G                     0x10
-#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_50G                     0x20
-#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_100G                    0x40
-
-	u32 transceiver_00;					/* 0x40 */
-
-	/*  Define for mapping of transceiver signal module absent */
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_MASK                     0x000000FF
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_OFFSET                   0
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_NA                       0x0
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO0                    0x1
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO1                    0x2
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO2                    0x3
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO3                    0x4
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO4                    0x5
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO5                    0x6
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO6                    0x7
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO7                    0x8
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO8                    0x9
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO9                    0xA
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO10                   0xB
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO11                   0xC
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO12                   0xD
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO13                   0xE
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO14                   0xF
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO15                   0x10
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO16                   0x11
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO17                   0x12
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO18                   0x13
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO19                   0x14
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO20                   0x15
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO21                   0x16
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO22                   0x17
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO23                   0x18
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO24                   0x19
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO25                   0x1A
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO26                   0x1B
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO27                   0x1C
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO28                   0x1D
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO29                   0x1E
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO30                   0x1F
-#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO31                   0x20
-	/*  Define the GPIO mux settings  to switch i2c mux to this port */
-#define NVM_CFG1_PORT_I2C_MUX_SEL_VALUE_0_MASK                  0x00000F00
-#define NVM_CFG1_PORT_I2C_MUX_SEL_VALUE_0_OFFSET                8
-#define NVM_CFG1_PORT_I2C_MUX_SEL_VALUE_1_MASK                  0x0000F000
-#define NVM_CFG1_PORT_I2C_MUX_SEL_VALUE_1_OFFSET                12
-
-	u32 reserved[133];					/* 0x44 */
+	u32 reserved__m_relocated_to_option_123;
+	u32 reserved__m_relocated_to_option_124;
+	u32 generic_cont0;
+#define NVM_CFG1_PORT_DCBX_MODE_MASK				0x000F0000
+#define NVM_CFG1_PORT_DCBX_MODE_OFFSET				16
+#define NVM_CFG1_PORT_DCBX_MODE_DISABLED			0x0
+#define NVM_CFG1_PORT_DCBX_MODE_IEEE				0x1
+#define NVM_CFG1_PORT_DCBX_MODE_CEE				0x2
+#define NVM_CFG1_PORT_DCBX_MODE_DYNAMIC				0x3
+#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_MASK		0x00F00000
+#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_OFFSET		20
+#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_ETHERNET	0x1
+#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_FCOE		0x2
+#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_ISCSI		0x4
+	u32 pcie_cfg;
+	u32 features;
+	u32 speed_cap_mask;
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK		0x0000FFFF
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_OFFSET		0
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G		0x1
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G		0x2
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G		0x8
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G		0x10
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G		0x20
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G		0x40
+	u32 link_settings;
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_MASK			0x0000000F
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_OFFSET			0
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_AUTONEG			0x0
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_1G				0x1
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_10G			0x2
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_25G			0x4
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_40G			0x5
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_50G			0x6
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_BB_100G			0x7
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_SMARTLINQ			0x8
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK			0x00000070
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_OFFSET			4
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_AUTONEG			0x1
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_RX			0x2
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX			0x4
+	u32 phy_cfg;
+	u32 mgmt_traffic;
+	u32 ext_phy;
+	u32 mba_cfg1;
+	u32 mba_cfg2;
+	u32 vf_cfg;
+	struct nvm_cfg_mac_address lldp_mac_address;
+	u32 led_port_settings;
+	u32 transceiver_00;
+	u32 device_ids;
+	u32 board_cfg;
+	u32 mnm_10g_cap;
+	u32 mnm_10g_ctrl;
+	u32 mnm_10g_misc;
+	u32 mnm_25g_cap;
+	u32 mnm_25g_ctrl;
+	u32 mnm_25g_misc;
+	u32 mnm_40g_cap;
+	u32 mnm_40g_ctrl;
+	u32 mnm_40g_misc;
+	u32 mnm_50g_cap;
+	u32 mnm_50g_ctrl;
+	u32 mnm_50g_misc;
+	u32 mnm_100g_cap;
+	u32 mnm_100g_ctrl;
+	u32 mnm_100g_misc;
+	u32 reserved[116];
 };
 
 struct nvm_cfg1_func {
-	struct nvm_cfg_mac_address	mac_address;		/* 0x0 */
-
-	u32				rsrv1;			/* 0x8 */
-#define NVM_CFG1_FUNC_RESERVED1_MASK                            0x0000FFFF
-#define NVM_CFG1_FUNC_RESERVED1_OFFSET                          0
-#define NVM_CFG1_FUNC_RESERVED2_MASK                            0xFFFF0000
-#define NVM_CFG1_FUNC_RESERVED2_OFFSET                          16
-
-	u32				rsrv2;			/* 0xC */
-#define NVM_CFG1_FUNC_RESERVED3_MASK                            0x0000FFFF
-#define NVM_CFG1_FUNC_RESERVED3_OFFSET                          0
-#define NVM_CFG1_FUNC_RESERVED4_MASK                            0xFFFF0000
-#define NVM_CFG1_FUNC_RESERVED4_OFFSET                          16
-
-	u32				device_id;		/* 0x10 */
-#define NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_MASK                  0x0000FFFF
-#define NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_OFFSET                0
-#define NVM_CFG1_FUNC_RESERVED77_MASK                           0xFFFF0000
-#define NVM_CFG1_FUNC_RESERVED77_OFFSET                         16
-
-	u32				cmn_cfg;		/* 0x14 */
-#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_MASK                0x00000007
-#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_OFFSET              0
-#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_PXE                 0x0
-#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_ISCSI_BOOT          0x3
-#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_FCOE_BOOT           0x4
-#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_NONE                0x7
-#define NVM_CFG1_FUNC_VF_PCI_DEVICE_ID_MASK                     0x0007FFF8
-#define NVM_CFG1_FUNC_VF_PCI_DEVICE_ID_OFFSET                   3
-#define NVM_CFG1_FUNC_PERSONALITY_MASK                          0x00780000
-#define NVM_CFG1_FUNC_PERSONALITY_OFFSET                        19
-#define NVM_CFG1_FUNC_PERSONALITY_ETHERNET                      0x0
-#define NVM_CFG1_FUNC_PERSONALITY_ISCSI                         0x1
-#define NVM_CFG1_FUNC_PERSONALITY_FCOE                          0x2
-#define NVM_CFG1_FUNC_PERSONALITY_ROCE                          0x3
-#define NVM_CFG1_FUNC_BANDWIDTH_WEIGHT_MASK                     0x7F800000
-#define NVM_CFG1_FUNC_BANDWIDTH_WEIGHT_OFFSET                   23
-#define NVM_CFG1_FUNC_PAUSE_ON_HOST_RING_MASK                   0x80000000
-#define NVM_CFG1_FUNC_PAUSE_ON_HOST_RING_OFFSET                 31
-#define NVM_CFG1_FUNC_PAUSE_ON_HOST_RING_DISABLED               0x0
-#define NVM_CFG1_FUNC_PAUSE_ON_HOST_RING_ENABLED                0x1
-
-	u32 pci_cfg;						/* 0x18 */
-#define NVM_CFG1_FUNC_NUMBER_OF_VFS_PER_PF_MASK                 0x0000007F
-#define NVM_CFG1_FUNC_NUMBER_OF_VFS_PER_PF_OFFSET               0
-#define NVM_CFG1_FUNC_RESERVESD12_MASK                          0x00003F80
-#define NVM_CFG1_FUNC_RESERVESD12_OFFSET                        7
-#define NVM_CFG1_FUNC_BAR1_SIZE_MASK                            0x0003C000
-#define NVM_CFG1_FUNC_BAR1_SIZE_OFFSET                          14
-#define NVM_CFG1_FUNC_BAR1_SIZE_DISABLED                        0x0
-#define NVM_CFG1_FUNC_BAR1_SIZE_64K                             0x1
-#define NVM_CFG1_FUNC_BAR1_SIZE_128K                            0x2
-#define NVM_CFG1_FUNC_BAR1_SIZE_256K                            0x3
-#define NVM_CFG1_FUNC_BAR1_SIZE_512K                            0x4
-#define NVM_CFG1_FUNC_BAR1_SIZE_1M                              0x5
-#define NVM_CFG1_FUNC_BAR1_SIZE_2M                              0x6
-#define NVM_CFG1_FUNC_BAR1_SIZE_4M                              0x7
-#define NVM_CFG1_FUNC_BAR1_SIZE_8M                              0x8
-#define NVM_CFG1_FUNC_BAR1_SIZE_16M                             0x9
-#define NVM_CFG1_FUNC_BAR1_SIZE_32M                             0xA
-#define NVM_CFG1_FUNC_BAR1_SIZE_64M                             0xB
-#define NVM_CFG1_FUNC_BAR1_SIZE_128M                            0xC
-#define NVM_CFG1_FUNC_BAR1_SIZE_256M                            0xD
-#define NVM_CFG1_FUNC_BAR1_SIZE_512M                            0xE
-#define NVM_CFG1_FUNC_BAR1_SIZE_1G                              0xF
-#define NVM_CFG1_FUNC_MAX_BANDWIDTH_MASK                        0x03FC0000
-#define NVM_CFG1_FUNC_MAX_BANDWIDTH_OFFSET                      18
-
-	struct nvm_cfg_mac_address	fcoe_node_wwn_mac_addr;	/* 0x1C */
-
-	struct nvm_cfg_mac_address	fcoe_port_wwn_mac_addr;	/* 0x24 */
-	u32				preboot_generic_cfg;    /* 0x2C */
-	u32				reserved[8];            /* 0x30 */
+	struct nvm_cfg_mac_address mac_address;
+	u32 rsrv1;
+	u32 rsrv2;
+	u32 device_id;
+	u32 cmn_cfg;
+	u32 pci_cfg;
+	struct nvm_cfg_mac_address fcoe_node_wwn_mac_addr;
+	struct nvm_cfg_mac_address fcoe_port_wwn_mac_addr;
+	u32 preboot_generic_cfg;
+	u32 reserved[8];
 };
 
 struct nvm_cfg1 {
-	struct nvm_cfg1_glob	glob;				/* 0x0 */
-
-	struct nvm_cfg1_path	path[MCP_GLOB_PATH_MAX];	/* 0x140 */
-
-	struct nvm_cfg1_port	port[MCP_GLOB_PORT_MAX];	/* 0x230 */
-
-	struct nvm_cfg1_func	func[MCP_GLOB_FUNC_MAX];	/* 0xB90 */
-};
-
-/******************************************
-* nvm_cfg structs
-******************************************/
-
-enum nvm_cfg_sections {
-	NVM_CFG_SECTION_NVM_CFG1,
-	NVM_CFG_SECTION_MAX
-};
-
-struct nvm_cfg {
-	u32		num_sections;
-	u32		sections_offset[NVM_CFG_SECTION_MAX];
-	struct nvm_cfg1 cfg1;
-};
-
-#define PORT_0          0
-#define PORT_1          1
-#define PORT_2          2
-#define PORT_3          3
-
-extern struct spad_layout g_spad;
-
-#define MCP_SPAD_SIZE                       0x00028000  /* 160 KB */
-
-#define SPAD_OFFSET(addr) (((u32)addr - (u32)CPU_SPAD_BASE))
-
-#define TO_OFFSIZE(_offset, _size)				\
-	(u32)((((u32)(_offset) >> 2) << OFFSIZE_OFFSET_SHIFT) |	\
-	      (((u32)(_size) >> 2) << OFFSIZE_SIZE_SHIFT))
-
-enum spad_sections {
-	SPAD_SECTION_TRACE,
-	SPAD_SECTION_NVM_CFG,
-	SPAD_SECTION_PUBLIC,
-	SPAD_SECTION_PRIVATE,
-	SPAD_SECTION_MAX
-};
-
-struct spad_layout {
-	struct nvm_cfg		nvm_cfg;
-	struct mcp_public_data	public_data;
-};
-
-#define CRC_MAGIC_VALUE                     0xDEBB20E3
-#define CRC32_POLYNOMIAL                    0xEDB88320
-#define NVM_CRC_SIZE                            (sizeof(u32))
-
-enum nvm_sw_arbitrator {
-	NVM_SW_ARB_HOST,
-	NVM_SW_ARB_MCP,
-	NVM_SW_ARB_UART,
-	NVM_SW_ARB_RESERVED
+	struct nvm_cfg1_glob glob;
+	struct nvm_cfg1_path path[MCP_GLOB_PATH_MAX];
+	struct nvm_cfg1_port port[MCP_GLOB_PORT_MAX];
+	struct nvm_cfg1_func func[MCP_GLOB_FUNC_MAX];
 };
-
-/****************************************************************************
-* Boot Strap Region                                                        *
-****************************************************************************/
-struct legacy_bootstrap_region {
-	u32	magic_value;
-#define NVM_MAGIC_VALUE          0x669955aa
-	u32	sram_start_addr;
-	u32	code_len;               /* boot code length (in dwords) */
-	u32	code_start_addr;
-	u32	crc;                    /* 32-bit CRC */
-};
-
-/****************************************************************************
-* Directories Region                                                       *
-****************************************************************************/
-struct nvm_code_entry {
-	u32	image_type;             /* Image type */
-	u32	nvm_start_addr;         /* NVM address of the image */
-	u32	len;                    /* Include CRC */
-	u32	sram_start_addr;
-	u32	sram_run_addr;          /* Relevant in case of MIM only */
-};
-
-enum nvm_image_type {
-	NVM_TYPE_TIM1		= 0x01,
-	NVM_TYPE_TIM2		= 0x02,
-	NVM_TYPE_MIM1		= 0x03,
-	NVM_TYPE_MIM2		= 0x04,
-	NVM_TYPE_MBA		= 0x05,
-	NVM_TYPE_MODULES_PN	= 0x06,
-	NVM_TYPE_VPD		= 0x07,
-	NVM_TYPE_MFW_TRACE1	= 0x08,
-	NVM_TYPE_MFW_TRACE2	= 0x09,
-	NVM_TYPE_NVM_CFG1	= 0x0a,
-	NVM_TYPE_L2B		= 0x0b,
-	NVM_TYPE_DIR1		= 0x0c,
-	NVM_TYPE_EAGLE_FW1	= 0x0d,
-	NVM_TYPE_FALCON_FW1	= 0x0e,
-	NVM_TYPE_PCIE_FW1	= 0x0f,
-	NVM_TYPE_HW_SET		= 0x10,
-	NVM_TYPE_LIM		= 0x11,
-	NVM_TYPE_AVS_FW1	= 0x12,
-	NVM_TYPE_DIR2		= 0x13,
-	NVM_TYPE_CCM		= 0x14,
-	NVM_TYPE_EAGLE_FW2	= 0x15,
-	NVM_TYPE_FALCON_FW2	= 0x16,
-	NVM_TYPE_PCIE_FW2	= 0x17,
-	NVM_TYPE_AVS_FW2	= 0x18,
-
-	NVM_TYPE_MAX,
-};
-
-#define MAX_NVM_DIR_ENTRIES 200
-
-struct nvm_dir {
-	s32 seq;
-#define NVM_DIR_NEXT_MFW_MASK   0x00000001
-#define NVM_DIR_SEQ_MASK        0xfffffffe
-#define NVM_DIR_NEXT_MFW(seq) ((seq) & NVM_DIR_NEXT_MFW_MASK)
-
-#define IS_DIR_SEQ_VALID(seq) ((seq & NVM_DIR_SEQ_MASK) != NVM_DIR_SEQ_MASK)
-
-	u32			num_images;
-	u32			rsrv;
-	struct nvm_code_entry	code[1]; /* Up to MAX_NVM_DIR_ENTRIES */
-};
-
-#define NVM_DIR_SIZE(_num_images) (sizeof(struct nvm_dir) +		 \
-				   (_num_images -			 \
-				    1) * sizeof(struct nvm_code_entry) + \
-				   NVM_CRC_SIZE)
-
-struct nvm_vpd_image {
-	u32	format_revision;
-#define VPD_IMAGE_VERSION        1
-
-	/* This array length depends on the number of VPD fields */
-	u8	vpd_data[1];
-};
-
-/****************************************************************************
-* NVRAM FULL MAP                                                           *
-****************************************************************************/
-#define DIR_ID_1    (0)
-#define DIR_ID_2    (1)
-#define MAX_DIR_IDS (2)
-
-#define MFW_BUNDLE_1    (0)
-#define MFW_BUNDLE_2    (1)
-#define MAX_MFW_BUNDLES (2)
-
-#define FLASH_PAGE_SIZE 0x1000
-#define NVM_DIR_MAX_SIZE    (FLASH_PAGE_SIZE)           /* 4Kb */
-#define ASIC_MIM_MAX_SIZE   (300 * FLASH_PAGE_SIZE)     /* 1.2Mb */
-#define FPGA_MIM_MAX_SIZE   (25 * FLASH_PAGE_SIZE)      /* 60Kb */
-
-#define LIM_MAX_SIZE        ((2 *				      \
-			      FLASH_PAGE_SIZE) -		      \
-			     sizeof(struct legacy_bootstrap_region) - \
-			     NVM_RSV_SIZE)
-#define LIM_OFFSET          (NVM_OFFSET(lim_image))
-#define NVM_RSV_SIZE            (44)
-#define MIM_MAX_SIZE(is_asic) ((is_asic) ? ASIC_MIM_MAX_SIZE : \
-			       FPGA_MIM_MAX_SIZE)
-#define MIM_OFFSET(idx, is_asic) (NVM_OFFSET(dir[MAX_MFW_BUNDLES]) + \
-				  ((idx ==			     \
-				    NVM_TYPE_MIM2) ? MIM_MAX_SIZE(is_asic) : 0))
-#define NVM_FIXED_AREA_SIZE(is_asic) (sizeof(struct nvm_image) + \
-				      MIM_MAX_SIZE(is_asic) * 2)
-
-union nvm_dir_union {
-	struct nvm_dir	dir;
-	u8		page[FLASH_PAGE_SIZE];
-};
-
-/*                        Address
- *  +-------------------+ 0x000000
- *  |    Bootstrap:     |
- *  | magic_number      |
- *  | sram_start_addr   |
- *  | code_len          |
- *  | code_start_addr   |
- *  | crc               |
- *  +-------------------+ 0x000014
- *  | rsrv              |
- *  +-------------------+ 0x000040
- *  | LIM               |
- *  +-------------------+ 0x002000
- *  | Dir1              |
- *  +-------------------+ 0x003000
- *  | Dir2              |
- *  +-------------------+ 0x004000
- *  | MIM1              |
- *  +-------------------+ 0x130000
- *  | MIM2              |
- *  +-------------------+ 0x25C000
- *  | Rest Images:      |
- *  | TIM1/2            |
- *  | MFW_TRACE1/2      |
- *  | Eagle/Falcon FW   |
- *  | PCIE/AVS FW       |
- *  | MBA/CCM/L2B       |
- *  | VPD               |
- *  | optic_modules     |
- *  |  ...              |
- *  +-------------------+ 0x400000
- */
-struct nvm_image {
-/*********** !!!  FIXED SECTIONS  !!! DO NOT MODIFY !!! **********************/
-	/* NVM Offset  (size) */
-	struct legacy_bootstrap_region	bootstrap;
-	u8				rsrv[NVM_RSV_SIZE];
-	u8				lim_image[LIM_MAX_SIZE];
-	union nvm_dir_union		dir[MAX_MFW_BUNDLES];
-
-	/* MIM1_IMAGE                              0x004000 (0x12c000) */
-	/* MIM2_IMAGE                              0x130000 (0x12c000) */
-/*********** !!!  FIXED SECTIONS  !!! DO NOT MODIFY !!! **********************/
-};                              /* 0x134 */
-
-#define NVM_OFFSET(f)	((u32_t)((int_ptr_t)(&(((struct nvm_image *)0)->f))))
-
-struct hw_set_info {
-	u32	reg_type;
-#define GRC_REG_TYPE 1
-#define PHY_REG_TYPE 2
-#define PCI_REG_TYPE 4
-
-	u32	bank_num;
-	u32	pf_num;
-	u32	operation;
-#define READ_OP     1
-#define WRITE_OP    2
-#define RMW_SET_OP  3
-#define RMW_CLR_OP  4
-
-	u32	reg_addr;
-	u32	reg_data;
-
-	u32	reset_type;
-#define POR_RESET_TYPE	BIT(0)
-#define HARD_RESET_TYPE	BIT(1)
-#define CORE_RESET_TYPE	BIT(2)
-#define MCP_RESET_TYPE	BIT(3)
-#define PERSET_ASSERT	BIT(4)
-#define PERSET_DEASSERT	BIT(5)
-};
-
-struct hw_set_image {
-	u32			format_version;
-#define HW_SET_IMAGE_VERSION        1
-	u32			no_hw_sets;
-
-	/* This array length depends on the no_hw_sets */
-	struct hw_set_info	hw_sets[1];
-};
-
-int qed_init_pf_wfq(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-		    u8 pf_id, u16 pf_wfq);
-int qed_init_vport_wfq(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-		       u16 first_tx_pq_id[NUM_OF_TCS], u16 vport_wfq);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 0ada7fdb91bc..e17885321faf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -446,7 +446,7 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn,
 			   idx_cmd,
 			   le32_to_cpu(command->opcode),
 			   le16_to_cpu(command->opcode_b),
-			   le16_to_cpu(command->length),
+			   le16_to_cpu(command->length_dw),
 			   le32_to_cpu(command->src_addr_hi),
 			   le32_to_cpu(command->src_addr_lo),
 			   le32_to_cpu(command->dst_addr_hi),
@@ -461,7 +461,7 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn,
 		   idx_cmd,
 		   le32_to_cpu(command->opcode),
 		   le16_to_cpu(command->opcode_b),
-		   le16_to_cpu(command->length),
+		   le16_to_cpu(command->length_dw),
 		   le32_to_cpu(command->src_addr_hi),
 		   le32_to_cpu(command->src_addr_lo),
 		   le32_to_cpu(command->dst_addr_hi),
@@ -645,7 +645,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 	}
 
-	cmd->length = cpu_to_le16((u16)length);
+	cmd->length_dw = cpu_to_le16((u16)length);
 
 	qed_dmae_post_command(p_hwfn, p_ptt);
 
@@ -768,6 +768,29 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+int
+qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr,
+		  dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+{
+	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
+	struct qed_dmae_params params;
+	int rc;
+
+	memset(&params, 0, sizeof(struct qed_dmae_params));
+	params.flags = flags;
+
+	mutex_lock(&p_hwfn->dmae_info.mutex);
+
+	rc = qed_dmae_execute_command(p_hwfn, p_ptt, grc_addr_in_dw,
+				      dest_addr, QED_DMAE_ADDRESS_GRC,
+				      QED_DMAE_ADDRESS_HOST_VIRT,
+				      size_in_dwords, &params);
+
+	mutex_unlock(&p_hwfn->dmae_info.mutex);
+
+	return rc;
+}
+
 int
 qed_dmae_host2host(struct qed_hwfn *p_hwfn,
 		   struct qed_ptt *p_ptt,
@@ -791,16 +814,16 @@ qed_dmae_host2host(struct qed_hwfn *p_hwfn,
 }
 
 u16 qed_get_qm_pq(struct qed_hwfn *p_hwfn,
-		  enum protocol_type proto,
-		  union qed_qm_pq_params *p_params)
+		  enum protocol_type proto, union qed_qm_pq_params *p_params)
 {
 	u16 pq_id = 0;
 
-	if ((proto == PROTOCOLID_CORE || proto == PROTOCOLID_ETH) &&
-	    !p_params) {
+	if ((proto == PROTOCOLID_CORE ||
+	     proto == PROTOCOLID_ETH ||
+	     proto == PROTOCOLID_ISCSI ||
+	     proto == PROTOCOLID_ROCE) && !p_params) {
 		DP_NOTICE(p_hwfn,
-			  "Protocol %d received NULL PQ params\n",
-			  proto);
+			  "Protocol %d received NULL PQ params\n", proto);
 		return 0;
 	}
 
@@ -808,6 +831,8 @@ u16 qed_get_qm_pq(struct qed_hwfn *p_hwfn,
 	case PROTOCOLID_CORE:
 		if (p_params->core.tc == LB_TC)
 			pq_id = p_hwfn->qm_info.pure_lb_pq;
+		else if (p_params->core.tc == OOO_LB_TC)
+			pq_id = p_hwfn->qm_info.ooo_pq;
 		else
 			pq_id = p_hwfn->qm_info.offload_pq;
 		break;
@@ -817,6 +842,18 @@ u16 qed_get_qm_pq(struct qed_hwfn *p_hwfn,
 			pq_id += p_hwfn->qm_info.vf_queues_offset +
 				 p_params->eth.vf_id;
 		break;
+	case PROTOCOLID_ISCSI:
+		if (p_params->iscsi.q_idx == 1)
+			pq_id = p_hwfn->qm_info.pure_ack_pq;
+		break;
+	case PROTOCOLID_ROCE:
+		if (p_params->roce.dcqcn)
+			pq_id = p_params->roce.qpid;
+		else
+			pq_id = p_hwfn->qm_info.offload_pq;
+		if (pq_id > p_hwfn->qm_info.num_pf_rls)
+			pq_id = p_hwfn->qm_info.offload_pq;
+		break;
 	default:
 		pq_id = 0;
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
index 4367363ade40..d01557092868 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
@@ -253,6 +253,10 @@ int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn);
 void qed_dmae_info_free(struct qed_hwfn *p_hwfn);
 
 union qed_qm_pq_params {
+	struct {
+		u8 q_idx;
+	} iscsi;
+
 	struct {
 		u8 tc;
 	}	core;
@@ -262,11 +266,15 @@ union qed_qm_pq_params {
 		u8	vf_id;
 		u8	tc;
 	}	eth;
+
+	struct {
+		u8 dcqcn;
+		u8 qpid;	/* roce relative */
+	} roce;
 };
 
 u16 qed_get_qm_pq(struct qed_hwfn *p_hwfn,
-		  enum protocol_type proto,
-		  union qed_qm_pq_params *params);
+		  enum protocol_type proto, union qed_qm_pq_params *params);
 
 int qed_init_fw_data(struct qed_dev *cdev,
 		     const u8 *fw_data);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index e8a3b9da59b5..23e455f22adc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -31,7 +31,6 @@ enum cminterface {
 };
 
 /* general constants */
-#define QM_PQ_ELEMENT_SIZE                      4 /* in bytes */
 #define QM_PQ_MEM_4KB(pq_size)	(pq_size ? DIV_ROUND_UP((pq_size + 1) *	\
 							QM_PQ_ELEMENT_SIZE, \
 							0x1000) : 0)
@@ -44,28 +43,28 @@ enum cminterface {
 /* other PQ constants */
 #define QM_OTHER_PQS_PER_PF                     4
 /* WFQ constants */
-#define QM_WFQ_UPPER_BOUND		6250000
+#define QM_WFQ_UPPER_BOUND		62500000
 #define QM_WFQ_VP_PQ_VOQ_SHIFT          0
 #define QM_WFQ_VP_PQ_PF_SHIFT           5
 #define QM_WFQ_INC_VAL(weight)          ((weight) * 0x9000)
-#define QM_WFQ_MAX_INC_VAL                      4375000
-#define QM_WFQ_INIT_CRD(inc_val)        (2 * (inc_val))
+#define QM_WFQ_MAX_INC_VAL                      43750000
+
 /* RL constants */
-#define QM_RL_UPPER_BOUND                       6250000
+#define QM_RL_UPPER_BOUND                       62500000
 #define QM_RL_PERIOD                            5               /* in us */
 #define QM_RL_PERIOD_CLK_25M            (25 * QM_RL_PERIOD)
+#define QM_RL_MAX_INC_VAL                       43750000
 #define QM_RL_INC_VAL(rate)		max_t(u32,	\
-					      (((rate ? rate : 1000000)	\
-						* QM_RL_PERIOD) / 8), 1)
-#define QM_RL_MAX_INC_VAL                       4375000
+					      (u32)(((rate ? rate : \
+						      1000000) *    \
+						     QM_RL_PERIOD * \
+						     101) / (8 * 100)), 1)
 /* AFullOprtnstcCrdMask constants */
 #define QM_OPPOR_LINE_VOQ_DEF           1
 #define QM_OPPOR_FW_STOP_DEF            0
 #define QM_OPPOR_PQ_EMPTY_DEF           1
-#define EAGLE_WORKAROUND_TC                     7
 /* Command Queue constants */
 #define PBF_CMDQ_PURE_LB_LINES                          150
-#define PBF_CMDQ_EAGLE_WORKAROUND_LINES         8
 #define PBF_CMDQ_LINES_RT_OFFSET(voq)           (		 \
 		PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET + voq * \
 		(PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET -	 \
@@ -80,7 +79,6 @@ enum cminterface {
 /* BTB: blocks constants (block size = 256B) */
 #define BTB_JUMBO_PKT_BLOCKS            38
 #define BTB_HEADROOM_BLOCKS                     BTB_JUMBO_PKT_BLOCKS
-#define BTB_EAGLE_WORKAROUND_BLOCKS     4
 #define BTB_PURE_LB_FACTOR                      10
 #define BTB_PURE_LB_RATIO                       7
 /* QM stop command constants */
@@ -107,9 +105,9 @@ enum cminterface {
 						 cmd ## _ ## field,	  \
 						 value)
 /* QM: VOQ macros */
-#define PHYS_VOQ(port, tc, max_phy_tcs_pr_port)	((port) *	\
-						 (max_phy_tcs_pr_port) \
-						 + (tc))
+#define PHYS_VOQ(port, tc, max_phys_tcs_per_port) ((port) *	\
+						   (max_phys_tcs_per_port) + \
+						   (tc))
 #define LB_VOQ(port)				( \
 		MAX_PHYS_VOQS + (port))
 #define VOQ(port, tc, max_phy_tcs_pr_port)	\
@@ -120,8 +118,7 @@ enum cminterface {
 		: LB_VOQ(port))
 /******************** INTERNAL IMPLEMENTATION *********************/
 /* Prepare PF RL enable/disable runtime init values */
-static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn,
-			     bool pf_rl_en)
+static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn, bool pf_rl_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_RLPFENABLE_RT_OFFSET, pf_rl_en ? 1 : 0);
 	if (pf_rl_en) {
@@ -130,8 +127,7 @@ static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn,
 			     (1 << MAX_NUM_VOQS) - 1);
 		/* write RL period */
 		STORE_RT_REG(p_hwfn,
-			     QM_REG_RLPFPERIOD_RT_OFFSET,
-			     QM_RL_PERIOD_CLK_25M);
+			     QM_REG_RLPFPERIOD_RT_OFFSET, QM_RL_PERIOD_CLK_25M);
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_RLPFPERIODTIMER_RT_OFFSET,
 			     QM_RL_PERIOD_CLK_25M);
@@ -144,8 +140,7 @@ static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn,
 }
 
 /* Prepare PF WFQ enable/disable runtime init values */
-static void qed_enable_pf_wfq(struct qed_hwfn *p_hwfn,
-			      bool pf_wfq_en)
+static void qed_enable_pf_wfq(struct qed_hwfn *p_hwfn, bool pf_wfq_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_WFQPFENABLE_RT_OFFSET, pf_wfq_en ? 1 : 0);
 	/* set credit threshold for QM bypass flow */
@@ -156,8 +151,7 @@ static void qed_enable_pf_wfq(struct qed_hwfn *p_hwfn,
 }
 
 /* Prepare VPORT RL enable/disable runtime init values */
-static void qed_enable_vport_rl(struct qed_hwfn *p_hwfn,
-				bool vport_rl_en)
+static void qed_enable_vport_rl(struct qed_hwfn *p_hwfn, bool vport_rl_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_RLGLBLENABLE_RT_OFFSET,
 		     vport_rl_en ? 1 : 0);
@@ -178,8 +172,7 @@ static void qed_enable_vport_rl(struct qed_hwfn *p_hwfn,
 }
 
 /* Prepare VPORT WFQ enable/disable runtime init values */
-static void qed_enable_vport_wfq(struct qed_hwfn *p_hwfn,
-				 bool vport_wfq_en)
+static void qed_enable_vport_wfq(struct qed_hwfn *p_hwfn, bool vport_wfq_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_WFQVPENABLE_RT_OFFSET,
 		     vport_wfq_en ? 1 : 0);
@@ -194,8 +187,7 @@ static void qed_enable_vport_wfq(struct qed_hwfn *p_hwfn,
  * the specified VOQ
  */
 static void qed_cmdq_lines_voq_rt_init(struct qed_hwfn *p_hwfn,
-				       u8 voq,
-				       u16 cmdq_lines)
+				       u8 voq, u16 cmdq_lines)
 {
 	u32 qm_line_crd;
 
@@ -221,7 +213,7 @@ static void qed_cmdq_lines_rt_init(
 	u8 max_phys_tcs_per_port,
 	struct init_qm_port_params port_params[MAX_NUM_PORTS])
 {
-	u8 tc, voq, port_id;
+	u8 tc, voq, port_id, num_tcs_in_port;
 
 	/* clear PBF lines for all VOQs */
 	for (voq = 0; voq < MAX_NUM_VOQS; voq++)
@@ -229,22 +221,31 @@ static void qed_cmdq_lines_rt_init(
 	for (port_id = 0; port_id < max_ports_per_engine; port_id++) {
 		if (port_params[port_id].active) {
 			u16 phys_lines, phys_lines_per_tc;
-			u8 phys_tcs = port_params[port_id].num_active_phys_tcs;
 
-			/* find #lines to divide between the active
-			 * physical TCs.
-			 */
+			/* find #lines to divide between active phys TCs */
 			phys_lines = port_params[port_id].num_pbf_cmd_lines -
 				     PBF_CMDQ_PURE_LB_LINES;
 			/* find #lines per active physical TC */
-			phys_lines_per_tc = phys_lines / phys_tcs;
+			num_tcs_in_port = 0;
+			for (tc = 0; tc < NUM_OF_PHYS_TCS; tc++) {
+				if (((port_params[port_id].active_phys_tcs >>
+				      tc) & 0x1) == 1)
+					num_tcs_in_port++;
+			}
+
+			phys_lines_per_tc = phys_lines / num_tcs_in_port;
 			/* init registers per active TC */
-			for (tc = 0; tc < phys_tcs; tc++) {
+			for (tc = 0; tc < NUM_OF_PHYS_TCS; tc++) {
+				if (((port_params[port_id].active_phys_tcs >>
+				      tc) & 0x1) != 1)
+					continue;
+
 				voq = PHYS_VOQ(port_id, tc,
 					       max_phys_tcs_per_port);
 				qed_cmdq_lines_voq_rt_init(p_hwfn, voq,
 							   phys_lines_per_tc);
 			}
+
 			/* init registers for pure LB TC */
 			qed_cmdq_lines_voq_rt_init(p_hwfn, LB_VOQ(port_id),
 						   PBF_CMDQ_PURE_LB_LINES);
@@ -259,34 +260,42 @@ static void qed_btb_blocks_rt_init(
 	struct init_qm_port_params port_params[MAX_NUM_PORTS])
 {
 	u32 usable_blocks, pure_lb_blocks, phys_blocks;
-	u8 tc, voq, port_id;
+	u8 tc, voq, port_id, num_tcs_in_port;
 
 	for (port_id = 0; port_id < max_ports_per_engine; port_id++) {
 		u32 temp;
-		u8 phys_tcs;
 
 		if (!port_params[port_id].active)
 			continue;
 
-		phys_tcs = port_params[port_id].num_active_phys_tcs;
-
 		/* subtract headroom blocks */
 		usable_blocks = port_params[port_id].num_btb_blocks -
 				BTB_HEADROOM_BLOCKS;
 
-		/* find blocks per physical TC. use factor to avoid
-		 * floating arithmethic.
-		 */
+		/* find blocks per physical TC */
+		num_tcs_in_port = 0;
+		for (tc = 0; tc < NUM_OF_PHYS_TCS; tc++) {
+			if (((port_params[port_id].active_phys_tcs >>
+			      tc) & 0x1) == 1)
+				num_tcs_in_port++;
+		}
+
 		pure_lb_blocks = (usable_blocks * BTB_PURE_LB_FACTOR) /
-				 (phys_tcs * BTB_PURE_LB_FACTOR +
+				 (num_tcs_in_port * BTB_PURE_LB_FACTOR +
 				  BTB_PURE_LB_RATIO);
 		pure_lb_blocks = max_t(u32, BTB_JUMBO_PKT_BLOCKS,
 				       pure_lb_blocks / BTB_PURE_LB_FACTOR);
-		phys_blocks = (usable_blocks - pure_lb_blocks) / phys_tcs;
+		phys_blocks = (usable_blocks - pure_lb_blocks) /
+			      num_tcs_in_port;
 
 		/* init physical TCs */
-		for (tc = 0; tc < phys_tcs; tc++) {
-			voq = PHYS_VOQ(port_id, tc, max_phys_tcs_per_port);
+		for (tc = 0; tc < NUM_OF_PHYS_TCS; tc++) {
+			if (((port_params[port_id].active_phys_tcs >>
+			      tc) & 0x1) != 1)
+				continue;
+
+			voq = PHYS_VOQ(port_id, tc,
+				       max_phys_tcs_per_port);
 			STORE_RT_REG(p_hwfn, PBF_BTB_GUARANTEED_RT_OFFSET(voq),
 				     phys_blocks);
 		}
@@ -360,10 +369,11 @@ static void qed_tx_pq_map_rt_init(
 		memset(&tx_pq_map, 0, sizeof(tx_pq_map));
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_PQ_VALID, 1);
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_RL_VALID,
-			  is_vf_pq ? 1 : 0);
+			  p_params->pq_params[i].rl_valid ? 1 : 0);
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VP_PQ_ID, first_tx_pq_id);
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_RL_ID,
-			  is_vf_pq ? p_params->pq_params[i].vport_id : 0);
+			  p_params->pq_params[i].rl_valid ?
+			  p_params->pq_params[i].vport_id : 0);
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VOQ, voq);
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_WRR_WEIGHT_GROUP,
 			  p_params->pq_params[i].wrr_group);
@@ -390,25 +400,11 @@ static void qed_tx_pq_map_rt_init(
 	/* store Tx PQ VF mask to size select register */
 	for (i = 0; i < num_tx_pq_vf_masks; i++) {
 		if (tx_pq_vf_mask[i]) {
-			if (is_bb_a0) {
-				u32 curr_mask = 0, addr;
-
-				addr = QM_REG_MAXPQSIZETXSEL_0 + (i * 4);
-				if (!p_params->is_first_pf)
-					curr_mask = qed_rd(p_hwfn, p_ptt,
-							   addr);
-
-				addr = QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET + i;
-
-				STORE_RT_REG(p_hwfn, addr,
-					     curr_mask | tx_pq_vf_mask[i]);
-			} else {
-				u32 addr;
+			u32 addr;
 
-				addr = QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET + i;
-				STORE_RT_REG(p_hwfn, addr,
-					     tx_pq_vf_mask[i]);
-			}
+			addr = QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET + i;
+			STORE_RT_REG(p_hwfn, addr,
+				     tx_pq_vf_mask[i]);
 		}
 	}
 }
@@ -418,8 +414,7 @@ static void qed_other_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 				     u8 port_id,
 				     u8 pf_id,
 				     u32 num_pf_cids,
-				     u32 num_tids,
-				     u32 base_mem_addr_4kb)
+				     u32 num_tids, u32 base_mem_addr_4kb)
 {
 	u16 i, pq_id;
 
@@ -465,15 +460,10 @@ static int qed_pf_wfq_rt_init(struct qed_hwfn *p_hwfn,
 				 (p_params->pf_id % MAX_NUM_PFS_BB);
 
 	inc_val = QM_WFQ_INC_VAL(p_params->pf_wfq);
-	if (inc_val > QM_WFQ_MAX_INC_VAL) {
+	if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) {
 		DP_NOTICE(p_hwfn, "Invalid PF WFQ weight configuration");
 		return -1;
 	}
-	STORE_RT_REG(p_hwfn, QM_REG_WFQPFWEIGHT_RT_OFFSET + p_params->pf_id,
-		     inc_val);
-	STORE_RT_REG(p_hwfn,
-		     QM_REG_WFQPFUPPERBOUND_RT_OFFSET + p_params->pf_id,
-		     QM_WFQ_UPPER_BOUND | QM_WFQ_CRD_REG_SIGN_BIT);
 
 	for (i = 0; i < num_tx_pqs; i++) {
 		u8 voq = VOQ(p_params->port_id, p_params->pq_params[i].tc_id,
@@ -481,19 +471,21 @@ static int qed_pf_wfq_rt_init(struct qed_hwfn *p_hwfn,
 
 		OVERWRITE_RT_REG(p_hwfn,
 				 crd_reg_offset + voq * MAX_NUM_PFS_BB,
-				 QM_WFQ_INIT_CRD(inc_val) |
 				 QM_WFQ_CRD_REG_SIGN_BIT);
 	}
 
+	STORE_RT_REG(p_hwfn, QM_REG_WFQPFWEIGHT_RT_OFFSET + p_params->pf_id,
+		     inc_val);
+	STORE_RT_REG(p_hwfn,
+		     QM_REG_WFQPFUPPERBOUND_RT_OFFSET + p_params->pf_id,
+		     QM_WFQ_UPPER_BOUND | QM_WFQ_CRD_REG_SIGN_BIT);
 	return 0;
 }
 
 /* Prepare PF RL runtime init values for the specified PF.
  * Return -1 on error.
  */
-static int qed_pf_rl_rt_init(struct qed_hwfn *p_hwfn,
-			     u8 pf_id,
-			     u32 pf_rl)
+static int qed_pf_rl_rt_init(struct qed_hwfn *p_hwfn, u8 pf_id, u32 pf_rl)
 {
 	u32 inc_val = QM_RL_INC_VAL(pf_rl);
 
@@ -607,9 +599,7 @@ static bool qed_poll_on_qm_cmd_ready(struct qed_hwfn *p_hwfn,
 
 static bool qed_send_qm_cmd(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt,
-			    u32 cmd_addr,
-			    u32 cmd_data_lsb,
-			    u32 cmd_data_msb)
+			    u32 cmd_addr, u32 cmd_data_lsb, u32 cmd_data_msb)
 {
 	if (!qed_poll_on_qm_cmd_ready(p_hwfn, p_ptt))
 		return false;
@@ -627,9 +617,7 @@ static bool qed_send_qm_cmd(struct qed_hwfn *p_hwfn,
 u32 qed_qm_pf_mem_size(u8 pf_id,
 		       u32 num_pf_cids,
 		       u32 num_vf_cids,
-		       u32 num_tids,
-		       u16 num_pf_pqs,
-		       u16 num_vf_pqs)
+		       u32 num_tids, u16 num_pf_pqs, u16 num_vf_pqs)
 {
 	return QM_PQ_MEM_4KB(num_pf_cids) * num_pf_pqs +
 	       QM_PQ_MEM_4KB(num_vf_cids) * num_vf_pqs +
@@ -713,8 +701,7 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
 }
 
 int qed_init_pf_wfq(struct qed_hwfn *p_hwfn,
-		    struct qed_ptt *p_ptt,
-		    u8 pf_id, u16 pf_wfq)
+		    struct qed_ptt *p_ptt, u8 pf_id, u16 pf_wfq)
 {
 	u32 inc_val = QM_WFQ_INC_VAL(pf_wfq);
 
@@ -728,9 +715,7 @@ int qed_init_pf_wfq(struct qed_hwfn *p_hwfn,
 }
 
 int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
-		   struct qed_ptt *p_ptt,
-		   u8 pf_id,
-		   u32 pf_rl)
+		   struct qed_ptt *p_ptt, u8 pf_id, u32 pf_rl)
 {
 	u32 inc_val = QM_RL_INC_VAL(pf_rl);
 
@@ -749,8 +734,7 @@ int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
 
 int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
-		       u16 first_tx_pq_id[NUM_OF_TCS],
-		       u16 vport_wfq)
+		       u16 first_tx_pq_id[NUM_OF_TCS], u16 vport_wfq)
 {
 	u32 inc_val = QM_WFQ_INC_VAL(vport_wfq);
 	u8 tc;
@@ -773,9 +757,7 @@ int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 }
 
 int qed_init_vport_rl(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt,
-		      u8 vport_id,
-		      u32 vport_rl)
+		      struct qed_ptt *p_ptt, u8 vport_id, u32 vport_rl)
 {
 	u32 inc_val = QM_RL_INC_VAL(vport_rl);
 
@@ -795,9 +777,7 @@ int qed_init_vport_rl(struct qed_hwfn *p_hwfn,
 bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt,
 			  bool is_release_cmd,
-			  bool is_tx_pq,
-			  u16 start_pq,
-			  u16 num_pqs)
+			  bool is_tx_pq, u16 start_pq, u16 num_pqs)
 {
 	u32 cmd_arr[QM_CMD_STRUCT_SIZE(QM_STOP_CMD)] = { 0 };
 	u32 pq_mask = 0, last_pq = start_pq + num_pqs - 1, pq_id;
@@ -841,17 +821,15 @@ qed_set_tunnel_type_enable_bit(unsigned long *var, int bit, bool enable)
 #define PRS_ETH_TUNN_FIC_FORMAT	-188897008
 
 void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn,
-			     struct qed_ptt *p_ptt,
-			     u16 dest_port)
+			     struct qed_ptt *p_ptt, u16 dest_port)
 {
 	qed_wr(p_hwfn, p_ptt, PRS_REG_VXLAN_PORT, dest_port);
-	qed_wr(p_hwfn, p_ptt, NIG_REG_VXLAN_PORT, dest_port);
+	qed_wr(p_hwfn, p_ptt, NIG_REG_VXLAN_CTRL, dest_port);
 	qed_wr(p_hwfn, p_ptt, PBF_REG_VXLAN_PORT, dest_port);
 }
 
 void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn,
-			  struct qed_ptt *p_ptt,
-			  bool vxlan_enable)
+			  struct qed_ptt *p_ptt, bool vxlan_enable)
 {
 	unsigned long reg_val = 0;
 	u8 shift;
@@ -908,8 +886,7 @@ void qed_set_gre_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 }
 
 void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn,
-			      struct qed_ptt *p_ptt,
-			      u16 dest_port)
+			      struct qed_ptt *p_ptt, u16 dest_port)
 {
 	qed_wr(p_hwfn, p_ptt, PRS_REG_NGE_PORT, dest_port);
 	qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_PORT, dest_port);
@@ -918,8 +895,7 @@ void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn,
 
 void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt,
-			   bool eth_geneve_enable,
-			   bool ip_geneve_enable)
+			   bool eth_geneve_enable, bool ip_geneve_enable)
 {
 	unsigned long reg_val = 0;
 	u8 shift;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index d358c3bb1308..9866a20d2128 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -543,8 +543,7 @@ void qed_gtt_init(struct qed_hwfn *p_hwfn)
 			       pxp_global_win[i]);
 }
 
-int qed_init_fw_data(struct qed_dev *cdev,
-		     const u8 *data)
+int qed_init_fw_data(struct qed_dev *cdev, const u8 *data)
 {
 	struct qed_fw_data *fw = cdev->fw_data;
 	struct bin_buffer_hdr *buf_hdr;
@@ -555,7 +554,11 @@ int qed_init_fw_data(struct qed_dev *cdev,
 		return -EINVAL;
 	}
 
-	buf_hdr = (struct bin_buffer_hdr *)data;
+	/* First Dword contains metadata and should be skipped */
+	buf_hdr = (struct bin_buffer_hdr *)(data + sizeof(u32));
+
+	offset = buf_hdr[BIN_BUF_FW_VER_INFO].offset;
+	fw->fw_ver_info = (struct fw_ver_info *)(data + offset);
 
 	offset = buf_hdr[BIN_BUF_INIT_CMD].offset;
 	fw->init_ops = (union init_op *)(data + offset);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 09a6ad3d22dd..8fa50fa23c8d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -2418,6 +2418,7 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	u32 cau_state;
+	u8 timer_res;
 
 	memset(p_sb_entry, 0, sizeof(*p_sb_entry));
 
@@ -2443,6 +2444,23 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 			cdev->tx_coalesce_usecs = QED_CAU_DEF_TX_USECS;
 	}
 
+	/* Coalesce = (timeset << timer-res), timeset is 7bit wide */
+	if (cdev->rx_coalesce_usecs <= 0x7F)
+		timer_res = 0;
+	else if (cdev->rx_coalesce_usecs <= 0xFF)
+		timer_res = 1;
+	else
+		timer_res = 2;
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES0, timer_res);
+
+	if (cdev->tx_coalesce_usecs <= 0x7F)
+		timer_res = 0;
+	else if (cdev->tx_coalesce_usecs <= 0xFF)
+		timer_res = 1;
+	else
+		timer_res = 2;
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES1, timer_res);
+
 	SET_FIELD(p_sb_entry->data, CAU_SB_ENTRY_STATE0, cau_state);
 	SET_FIELD(p_sb_entry->data, CAU_SB_ENTRY_STATE1, cau_state);
 }
@@ -2484,17 +2502,28 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
 
 	/* Configure pi coalescing if set */
 	if (p_hwfn->cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
-		u8 timeset = p_hwfn->cdev->rx_coalesce_usecs >>
-			     (QED_CAU_DEF_RX_TIMER_RES + 1);
+		u8 timeset, timer_res;
 		u8 num_tc = 1, i;
 
+		/* timeset = (coalesce >> timer-res), timeset is 7bit wide */
+		if (p_hwfn->cdev->rx_coalesce_usecs <= 0x7F)
+			timer_res = 0;
+		else if (p_hwfn->cdev->rx_coalesce_usecs <= 0xFF)
+			timer_res = 1;
+		else
+			timer_res = 2;
+		timeset = (u8)(p_hwfn->cdev->rx_coalesce_usecs >> timer_res);
 		qed_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI,
 				    QED_COAL_RX_STATE_MACHINE,
 				    timeset);
 
-		timeset = p_hwfn->cdev->tx_coalesce_usecs >>
-			  (QED_CAU_DEF_TX_TIMER_RES + 1);
-
+		if (p_hwfn->cdev->tx_coalesce_usecs <= 0x7F)
+			timer_res = 0;
+		else if (p_hwfn->cdev->tx_coalesce_usecs <= 0xFF)
+			timer_res = 1;
+		else
+			timer_res = 2;
+		timeset = (u8)(p_hwfn->cdev->tx_coalesce_usecs >> timer_res);
 		for (i = 0; i < num_tc; i++) {
 			qed_int_cau_conf_pi(p_hwfn, p_ptt,
 					    igu_sb_id, TX_PI(i),
@@ -3199,3 +3228,39 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev)
 	for_each_hwfn(cdev, i)
 		cdev->hwfns[i].b_int_requested = false;
 }
+
+int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			  u8 timer_res, u16 sb_id, bool tx)
+{
+	struct cau_sb_entry sb_entry;
+	int rc;
+
+	if (!p_hwfn->hw_init_done) {
+		DP_ERR(p_hwfn, "hardware not initialized yet\n");
+		return -EINVAL;
+	}
+
+	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
+			       sb_id * sizeof(u64),
+			       (u64)(uintptr_t)&sb_entry, 2, 0);
+	if (rc) {
+		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
+		return rc;
+	}
+
+	if (tx)
+		SET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES1, timer_res);
+	else
+		SET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES0, timer_res);
+
+	rc = qed_dmae_host2grc(p_hwfn, p_ptt,
+			       (u64)(uintptr_t)&sb_entry,
+			       CAU_REG_SB_VAR_MEMORY +
+			       sb_id * sizeof(u64), 2, 0);
+	if (rc) {
+		DP_ERR(p_hwfn, "dmae_host2grc failed %d\n", rc);
+		return rc;
+	}
+
+	return rc;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 20b468637504..0948be64dc78 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -389,6 +389,9 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 			   u16 vf_number,
 			   u8 vf_valid);
 
+int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			  u8 timer_res, u16 sb_id, bool tx);
+
 #define QED_MAPPING_MEMORY_SIZE(dev)	(NUM_OF_SBS(dev))
 
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 8fba87dd48af..a12c6caa6c66 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -72,6 +72,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->mtu			= cpu_to_le16(p_params->mtu);
 	p_ramrod->inner_vlan_removal_en	= p_params->remove_inner_vlan;
 	p_ramrod->drop_ttl0_en		= p_params->drop_ttl0;
+	p_ramrod->untagged		= p_params->only_untagged;
 
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1);
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1);
@@ -247,10 +248,6 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn,
 		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_DROP_ALL,
 			  !!(accept_filter & QED_ACCEPT_NONE));
 
-		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL,
-			  (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) &&
-			   !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED)));
-
 		SET_FIELD(state, ETH_VPORT_TX_MODE_MCAST_DROP_ALL,
 			  !!(accept_filter & QED_ACCEPT_NONE));
 
@@ -575,9 +572,12 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 	p_ramrod->num_of_pbl_pages	= cpu_to_le16(cqe_pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
-	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	p_ramrod->vf_rx_prod_index = params->vf_qid;
+	if (params->vf_qid)
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "Queue is meant for VF rxq[%04x]\n", params->vf_qid);
 
-	return rc;
+	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
 static int
@@ -615,7 +615,7 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
 
 	*pp_prod = (u8 __iomem *)p_hwfn->regview +
 				 GTT_BAR0_MAP_REG_MSDM_RAM +
-				 MSTORM_PRODS_OFFSET(abs_l2_queue);
+				 MSTORM_ETH_PF_PRODS_OFFSET(abs_l2_queue);
 
 	/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
 	__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u64),
@@ -759,9 +759,9 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
 	struct qed_hw_cid_data *p_tx_cid;
-	u8 abs_vport_id;
+	u16 pq_id, abs_tx_q_id = 0;
 	int rc = -EINVAL;
-	u16 pq_id;
+	u8 abs_vport_id;
 
 	/* Store information for the stop */
 	p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
@@ -772,6 +772,10 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
 	if (rc)
 		return rc;
 
+	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_tx_q_id);
+	if (rc)
+		return rc;
+
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
 	init_data.cid = cid;
@@ -791,6 +795,7 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
 	p_ramrod->sb_index		= p_params->sb_idx;
 	p_ramrod->stats_counter_id	= stats_id;
 
+	p_ramrod->queue_zone_id		= cpu_to_le16(abs_tx_q_id);
 	p_ramrod->pbl_size		= cpu_to_le16(pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr);
 
@@ -1485,51 +1490,51 @@ static void __qed_get_vport_port_stats(struct qed_hwfn *p_hwfn,
 			offsetof(struct public_port, stats),
 			sizeof(port_stats));
 
-	p_stats->rx_64_byte_packets		+= port_stats.pmm.r64;
-	p_stats->rx_65_to_127_byte_packets	+= port_stats.pmm.r127;
-	p_stats->rx_128_to_255_byte_packets	+= port_stats.pmm.r255;
-	p_stats->rx_256_to_511_byte_packets	+= port_stats.pmm.r511;
-	p_stats->rx_512_to_1023_byte_packets	+= port_stats.pmm.r1023;
-	p_stats->rx_1024_to_1518_byte_packets	+= port_stats.pmm.r1518;
-	p_stats->rx_1519_to_1522_byte_packets	+= port_stats.pmm.r1522;
-	p_stats->rx_1519_to_2047_byte_packets	+= port_stats.pmm.r2047;
-	p_stats->rx_2048_to_4095_byte_packets	+= port_stats.pmm.r4095;
-	p_stats->rx_4096_to_9216_byte_packets	+= port_stats.pmm.r9216;
-	p_stats->rx_9217_to_16383_byte_packets	+= port_stats.pmm.r16383;
-	p_stats->rx_crc_errors			+= port_stats.pmm.rfcs;
-	p_stats->rx_mac_crtl_frames		+= port_stats.pmm.rxcf;
-	p_stats->rx_pause_frames		+= port_stats.pmm.rxpf;
-	p_stats->rx_pfc_frames			+= port_stats.pmm.rxpp;
-	p_stats->rx_align_errors		+= port_stats.pmm.raln;
-	p_stats->rx_carrier_errors		+= port_stats.pmm.rfcr;
-	p_stats->rx_oversize_packets		+= port_stats.pmm.rovr;
-	p_stats->rx_jabbers			+= port_stats.pmm.rjbr;
-	p_stats->rx_undersize_packets		+= port_stats.pmm.rund;
-	p_stats->rx_fragments			+= port_stats.pmm.rfrg;
-	p_stats->tx_64_byte_packets		+= port_stats.pmm.t64;
-	p_stats->tx_65_to_127_byte_packets	+= port_stats.pmm.t127;
-	p_stats->tx_128_to_255_byte_packets	+= port_stats.pmm.t255;
-	p_stats->tx_256_to_511_byte_packets	+= port_stats.pmm.t511;
-	p_stats->tx_512_to_1023_byte_packets	+= port_stats.pmm.t1023;
-	p_stats->tx_1024_to_1518_byte_packets	+= port_stats.pmm.t1518;
-	p_stats->tx_1519_to_2047_byte_packets	+= port_stats.pmm.t2047;
-	p_stats->tx_2048_to_4095_byte_packets	+= port_stats.pmm.t4095;
-	p_stats->tx_4096_to_9216_byte_packets	+= port_stats.pmm.t9216;
-	p_stats->tx_9217_to_16383_byte_packets	+= port_stats.pmm.t16383;
-	p_stats->tx_pause_frames		+= port_stats.pmm.txpf;
-	p_stats->tx_pfc_frames			+= port_stats.pmm.txpp;
-	p_stats->tx_lpi_entry_count		+= port_stats.pmm.tlpiec;
-	p_stats->tx_total_collisions		+= port_stats.pmm.tncl;
-	p_stats->rx_mac_bytes			+= port_stats.pmm.rbyte;
-	p_stats->rx_mac_uc_packets		+= port_stats.pmm.rxuca;
-	p_stats->rx_mac_mc_packets		+= port_stats.pmm.rxmca;
-	p_stats->rx_mac_bc_packets		+= port_stats.pmm.rxbca;
-	p_stats->rx_mac_frames_ok		+= port_stats.pmm.rxpok;
-	p_stats->tx_mac_bytes			+= port_stats.pmm.tbyte;
-	p_stats->tx_mac_uc_packets		+= port_stats.pmm.txuca;
-	p_stats->tx_mac_mc_packets		+= port_stats.pmm.txmca;
-	p_stats->tx_mac_bc_packets		+= port_stats.pmm.txbca;
-	p_stats->tx_mac_ctrl_frames		+= port_stats.pmm.txcf;
+	p_stats->rx_64_byte_packets		+= port_stats.eth.r64;
+	p_stats->rx_65_to_127_byte_packets	+= port_stats.eth.r127;
+	p_stats->rx_128_to_255_byte_packets	+= port_stats.eth.r255;
+	p_stats->rx_256_to_511_byte_packets	+= port_stats.eth.r511;
+	p_stats->rx_512_to_1023_byte_packets	+= port_stats.eth.r1023;
+	p_stats->rx_1024_to_1518_byte_packets	+= port_stats.eth.r1518;
+	p_stats->rx_1519_to_1522_byte_packets	+= port_stats.eth.r1522;
+	p_stats->rx_1519_to_2047_byte_packets	+= port_stats.eth.r2047;
+	p_stats->rx_2048_to_4095_byte_packets	+= port_stats.eth.r4095;
+	p_stats->rx_4096_to_9216_byte_packets	+= port_stats.eth.r9216;
+	p_stats->rx_9217_to_16383_byte_packets	+= port_stats.eth.r16383;
+	p_stats->rx_crc_errors			+= port_stats.eth.rfcs;
+	p_stats->rx_mac_crtl_frames		+= port_stats.eth.rxcf;
+	p_stats->rx_pause_frames		+= port_stats.eth.rxpf;
+	p_stats->rx_pfc_frames			+= port_stats.eth.rxpp;
+	p_stats->rx_align_errors		+= port_stats.eth.raln;
+	p_stats->rx_carrier_errors		+= port_stats.eth.rfcr;
+	p_stats->rx_oversize_packets		+= port_stats.eth.rovr;
+	p_stats->rx_jabbers			+= port_stats.eth.rjbr;
+	p_stats->rx_undersize_packets		+= port_stats.eth.rund;
+	p_stats->rx_fragments			+= port_stats.eth.rfrg;
+	p_stats->tx_64_byte_packets		+= port_stats.eth.t64;
+	p_stats->tx_65_to_127_byte_packets	+= port_stats.eth.t127;
+	p_stats->tx_128_to_255_byte_packets	+= port_stats.eth.t255;
+	p_stats->tx_256_to_511_byte_packets	+= port_stats.eth.t511;
+	p_stats->tx_512_to_1023_byte_packets	+= port_stats.eth.t1023;
+	p_stats->tx_1024_to_1518_byte_packets	+= port_stats.eth.t1518;
+	p_stats->tx_1519_to_2047_byte_packets	+= port_stats.eth.t2047;
+	p_stats->tx_2048_to_4095_byte_packets	+= port_stats.eth.t4095;
+	p_stats->tx_4096_to_9216_byte_packets	+= port_stats.eth.t9216;
+	p_stats->tx_9217_to_16383_byte_packets	+= port_stats.eth.t16383;
+	p_stats->tx_pause_frames		+= port_stats.eth.txpf;
+	p_stats->tx_pfc_frames			+= port_stats.eth.txpp;
+	p_stats->tx_lpi_entry_count		+= port_stats.eth.tlpiec;
+	p_stats->tx_total_collisions		+= port_stats.eth.tncl;
+	p_stats->rx_mac_bytes			+= port_stats.eth.rbyte;
+	p_stats->rx_mac_uc_packets		+= port_stats.eth.rxuca;
+	p_stats->rx_mac_mc_packets		+= port_stats.eth.rxmca;
+	p_stats->rx_mac_bc_packets		+= port_stats.eth.rxbca;
+	p_stats->rx_mac_frames_ok		+= port_stats.eth.rxpok;
+	p_stats->tx_mac_bytes			+= port_stats.eth.tbyte;
+	p_stats->tx_mac_uc_packets		+= port_stats.eth.txuca;
+	p_stats->tx_mac_mc_packets		+= port_stats.eth.txmca;
+	p_stats->tx_mac_bc_packets		+= port_stats.eth.txbca;
+	p_stats->tx_mac_ctrl_frames		+= port_stats.eth.txcf;
 	for (j = 0; j < 8; j++) {
 		p_stats->brb_truncates	+= port_stats.brb.brb_truncate[j];
 		p_stats->brb_discards	+= port_stats.brb.brb_discard[j];
@@ -1748,7 +1753,8 @@ static int qed_start_vport(struct qed_dev *cdev,
 			   start.vport_id, start.mtu);
 	}
 
-	qed_reset_vport_stats(cdev);
+	if (params->clear_stats)
+		qed_reset_vport_stats(cdev);
 
 	return 0;
 }
@@ -2158,10 +2164,17 @@ static int qed_fp_cqe_completion(struct qed_dev *dev,
 extern const struct qed_iov_hv_ops qed_iov_ops_pass;
 #endif
 
+#ifdef CONFIG_DCB
+extern const struct qed_eth_dcbnl_ops qed_dcbnl_ops_pass;
+#endif
+
 static const struct qed_eth_ops qed_eth_ops_pass = {
 	.common = &qed_common_ops_pass,
 #ifdef CONFIG_QED_SRIOV
 	.iov = &qed_iov_ops_pass,
+#endif
+#ifdef CONFIG_DCB
+	.dcb = &qed_dcbnl_ops_pass,
 #endif
 	.fill_dev_info = &qed_fill_eth_dev_info,
 	.register_ops = &qed_register_eth_ops,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 61cc6869fa65..1f13abb5c316 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -207,6 +207,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 	dev_info->pci_mem_start = cdev->pci_params.mem_start;
 	dev_info->pci_mem_end = cdev->pci_params.mem_end;
 	dev_info->pci_irq = cdev->pci_params.irq;
+	dev_info->rdma_supported =
+	    (cdev->hwfns[0].hw_info.personality == QED_PCI_ETH_ROCE);
 	dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
 	ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
 
@@ -832,7 +834,8 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 			goto err2;
 		}
 
-		data = cdev->firmware->data;
+		/* First Dword used to diffrentiate between various sources */
+		data = cdev->firmware->data + sizeof(u32);
 	}
 
 	memset(&tunn_info, 0, sizeof(tunn_info));
@@ -900,7 +903,8 @@ static int qed_slowpath_stop(struct qed_dev *cdev)
 
 	if (IS_PF(cdev)) {
 		qed_free_stream_mem(cdev);
-		qed_sriov_disable(cdev, true);
+		if (IS_QED_ETH_IF(cdev))
+			qed_sriov_disable(cdev, true);
 
 		qed_nic_stop(cdev);
 		qed_slowpath_irq_free(cdev);
@@ -991,8 +995,7 @@ static bool qed_can_link_change(struct qed_dev *cdev)
 	return true;
 }
 
-static int qed_set_link(struct qed_dev *cdev,
-			struct qed_link_params *params)
+static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
 {
 	struct qed_hwfn *hwfn;
 	struct qed_mcp_link_params *link_params;
@@ -1032,7 +1035,7 @@ static int qed_set_link(struct qed_dev *cdev,
 				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
 		if (params->adv_speeds & 0)
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_100G;
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G;
 	}
 	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_FORCED_SPEED)
 		link_params->speed.forced_speed = params->forced_speed;
@@ -1053,19 +1056,19 @@ static int qed_set_link(struct qed_dev *cdev,
 	if (params->override_flags & QED_LINK_OVERRIDE_LOOPBACK_MODE) {
 		switch (params->loopback_mode) {
 		case QED_LINK_LOOPBACK_INT_PHY:
-			link_params->loopback_mode = PMM_LOOPBACK_INT_PHY;
+			link_params->loopback_mode = ETH_LOOPBACK_INT_PHY;
 			break;
 		case QED_LINK_LOOPBACK_EXT_PHY:
-			link_params->loopback_mode = PMM_LOOPBACK_EXT_PHY;
+			link_params->loopback_mode = ETH_LOOPBACK_EXT_PHY;
 			break;
 		case QED_LINK_LOOPBACK_EXT:
-			link_params->loopback_mode = PMM_LOOPBACK_EXT;
+			link_params->loopback_mode = ETH_LOOPBACK_EXT;
 			break;
 		case QED_LINK_LOOPBACK_MAC:
-			link_params->loopback_mode = PMM_LOOPBACK_MAC;
+			link_params->loopback_mode = ETH_LOOPBACK_MAC;
 			break;
 		default:
-			link_params->loopback_mode = PMM_LOOPBACK_NONE;
+			link_params->loopback_mode = ETH_LOOPBACK_NONE;
 			break;
 		}
 	}
@@ -1085,6 +1088,7 @@ static int qed_get_port_type(u32 media_type)
 	case MEDIA_SFPP_10G_FIBER:
 	case MEDIA_SFP_1G_FIBER:
 	case MEDIA_XFP_FIBER:
+	case MEDIA_MODULE_FIBER:
 	case MEDIA_KR:
 		port_type = PORT_FIBRE;
 		break;
@@ -1184,7 +1188,7 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
 		if_link->advertised_caps |= 0;
 	if (params.speed.advertised_speeds &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_100G)
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
 		if_link->advertised_caps |= 0;
 
 	if (link_caps.speed_capabilities &
@@ -1201,7 +1205,7 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
 		if_link->supported_caps |= 0;
 	if (link_caps.speed_capabilities &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_100G)
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
 		if_link->supported_caps |= 0;
 
 	if (link.link_up)
@@ -1300,6 +1304,38 @@ static int qed_drain(struct qed_dev *cdev)
 	return 0;
 }
 
+static void qed_get_coalesce(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal)
+{
+	*rx_coal = cdev->rx_coalesce_usecs;
+	*tx_coal = cdev->tx_coalesce_usecs;
+}
+
+static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
+			    u8 qid, u16 sb_id)
+{
+	struct qed_hwfn *hwfn;
+	struct qed_ptt *ptt;
+	int hwfn_index;
+	int status = 0;
+
+	hwfn_index = qid % cdev->num_hwfns;
+	hwfn = &cdev->hwfns[hwfn_index];
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	status = qed_set_rxq_coalesce(hwfn, ptt, rx_coal,
+				      qid / cdev->num_hwfns, sb_id);
+	if (status)
+		goto out;
+	status = qed_set_txq_coalesce(hwfn, ptt, tx_coal,
+				      qid / cdev->num_hwfns, sb_id);
+out:
+	qed_ptt_release(hwfn, ptt);
+
+	return status;
+}
+
 static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
@@ -1346,5 +1382,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.update_msglvl = &qed_init_dp,
 	.chain_alloc = &qed_chain_alloc,
 	.chain_free = &qed_chain_free,
+	.get_coalesce = &qed_get_coalesce,
+	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
 };
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 1182361798b5..a240f26344a4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -531,9 +531,9 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn,
 				  transceiver_data)));
 
 	transceiver_state = GET_FIELD(transceiver_state,
-				      PMM_TRANSCEIVER_STATE);
+				      ETH_TRANSCEIVER_STATE);
 
-	if (transceiver_state == PMM_TRANSCEIVER_STATE_PRESENT)
+	if (transceiver_state == ETH_TRANSCEIVER_STATE_PRESENT)
 		DP_NOTICE(p_hwfn, "Transceiver is present.\n");
 	else
 		DP_NOTICE(p_hwfn, "Transceiver is unplugged.\n");
@@ -668,14 +668,12 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 	qed_link_update(p_hwfn);
 }
 
-int qed_mcp_set_link(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     bool b_up)
+int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
 {
 	struct qed_mcp_link_params *params = &p_hwfn->mcp_info->link_input;
 	struct qed_mcp_mb_params mb_params;
 	union drv_union_data union_data;
-	struct pmm_phy_cfg *phy_cfg;
+	struct eth_phy_cfg *phy_cfg;
 	int rc = 0;
 	u32 cmd;
 
@@ -685,9 +683,9 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn,
 	cmd = b_up ? DRV_MSG_CODE_INIT_PHY : DRV_MSG_CODE_LINK_RESET;
 	if (!params->speed.autoneg)
 		phy_cfg->speed = params->speed.forced_speed;
-	phy_cfg->pause |= (params->pause.autoneg) ? PMM_PAUSE_AUTONEG : 0;
-	phy_cfg->pause |= (params->pause.forced_rx) ? PMM_PAUSE_RX : 0;
-	phy_cfg->pause |= (params->pause.forced_tx) ? PMM_PAUSE_TX : 0;
+	phy_cfg->pause |= (params->pause.autoneg) ? ETH_PAUSE_AUTONEG : 0;
+	phy_cfg->pause |= (params->pause.forced_rx) ? ETH_PAUSE_RX : 0;
+	phy_cfg->pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0;
 	phy_cfg->adv_speed = params->speed.advertised_speeds;
 	phy_cfg->loopback_mode = params->loopback_mode;
 
@@ -773,6 +771,34 @@ static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
 	return size;
 }
 
+int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u8 *p_pf)
+{
+	struct public_func shmem_info;
+	int i;
+
+	/* Find first Ethernet interface in port */
+	for (i = 0; i < NUM_OF_ENG_PFS(p_hwfn->cdev);
+	     i += p_hwfn->cdev->num_ports_in_engines) {
+		qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
+				       MCP_PF_ID_BY_REL(p_hwfn, i));
+
+		if (shmem_info.config & FUNC_MF_CFG_FUNC_HIDE)
+			continue;
+
+		if ((shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK) ==
+		    FUNC_MF_CFG_PROTOCOL_ETHERNET) {
+			*p_pf = (u8)i;
+			return 0;
+		}
+	}
+
+	DP_NOTICE(p_hwfn,
+		  "Failed to find on port an ethernet interface in MF_SI mode\n");
+
+	return -EINVAL;
+}
+
 static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt)
 {
@@ -951,7 +977,18 @@ qed_mcp_get_shmem_proto(struct qed_hwfn *p_hwfn,
 
 	switch (p_info->config & FUNC_MF_CFG_PROTOCOL_MASK) {
 	case FUNC_MF_CFG_PROTOCOL_ETHERNET:
-		*p_proto = QED_PCI_ETH;
+		if (test_bit(QED_DEV_CAP_ROCE,
+			     &p_hwfn->hw_info.device_capabilities))
+			*p_proto = QED_PCI_ETH_ROCE;
+		else
+			*p_proto = QED_PCI_ETH;
+		break;
+	case FUNC_MF_CFG_PROTOCOL_ISCSI:
+		*p_proto = QED_PCI_ISCSI;
+		break;
+	case FUNC_MF_CFG_PROTOCOL_ROCE:
+		DP_NOTICE(p_hwfn, "RoCE personality is not a valid value!\n");
+		rc = -EINVAL;
 		break;
 	default:
 		rc = -EINVAL;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 6dd59eb7f4c6..7f319aa1b229 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -457,4 +457,7 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt,
 				     struct qed_mcp_link_state *p_link,
 				     u8 min_bw);
+
+int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u8 *p_pf);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 3a6c506f0d71..f6b86ca1ff79 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -27,6 +27,35 @@
 #define  CDU_REG_CID_ADDR_PARAMS_NCIB			( \
 		0xff << 24)
 
+#define CDU_REG_SEGMENT0_PARAMS	\
+	0x580904UL
+#define CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK \
+	(0xfff << 0)
+#define CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK_SHIFT \
+	0
+#define CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE \
+	(0xff << 16)
+#define CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE_SHIFT \
+	16
+#define CDU_REG_SEGMENT0_PARAMS_T0_TID_SIZE \
+	(0xff << 24)
+#define CDU_REG_SEGMENT0_PARAMS_T0_TID_SIZE_SHIFT \
+	24
+#define CDU_REG_SEGMENT1_PARAMS	\
+	0x580908UL
+#define CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK \
+	(0xfff << 0)
+#define CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK_SHIFT \
+	0
+#define CDU_REG_SEGMENT1_PARAMS_T1_TID_BLOCK_WASTE \
+	(0xff << 16)
+#define CDU_REG_SEGMENT1_PARAMS_T1_TID_BLOCK_WASTE_SHIFT \
+	16
+#define CDU_REG_SEGMENT1_PARAMS_T1_TID_SIZE \
+	(0xff << 24)
+#define CDU_REG_SEGMENT1_PARAMS_T1_TID_SIZE_SHIFT \
+	24
+
 #define  XSDM_REG_OPERATION_GEN \
 	0xf80408UL
 #define  NIG_REG_RX_BRB_OUT_EN \
@@ -51,6 +80,8 @@
 	0x1f00000UL
 #define  BAR0_MAP_REG_TSDM_RAM \
 	0x1c80000UL
+#define BAR0_MAP_REG_XSDM_RAM \
+	0x1e00000UL
 #define  NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF \
 	0x5011f4UL
 #define  PRS_REG_SEARCH_TCP \
@@ -167,6 +198,10 @@
 	0x1800004UL
 #define  NIG_REG_CM_HDR \
 	0x500840UL
+#define NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR \
+	0x50196cUL
+#define NIG_REG_LLH_CLS_TYPE_DUALMODE \
+	0x501964UL
 #define  NCSI_REG_CONFIG	\
 	0x040200UL
 #define  PBF_REG_INIT \
@@ -219,6 +254,10 @@
 	0x230000UL
 #define  PRS_REG_SOFT_RST \
 	0x1f0000UL
+#define PRS_REG_MSG_INFO \
+	0x1f0a1cUL
+#define PRS_REG_ROCE_DEST_QP_MAX_PF \
+	0x1f0430UL
 #define  PSDM_REG_ENABLE_IN1 \
 	0xfa0004UL
 #define  PSEM_REG_ENABLE_IN \
@@ -227,6 +266,8 @@
 	0x280020UL
 #define  PSWRQ2_REG_CDUT_P_SIZE \
 	0x24000cUL
+#define PSWRQ2_REG_ILT_MEMORY \
+	0x260000UL
 #define  PSWHST_REG_DISCARD_INTERNAL_WRITES \
 	0x2a0040UL
 #define  PSWHST2_REG_DBGSYN_ALMOST_FULL_THR \
@@ -460,7 +501,7 @@
 #define NIG_REG_ENC_TYPE_ENABLE_VXLAN_ENABLE			(0x1 << 2)
 #define NIG_REG_ENC_TYPE_ENABLE_VXLAN_ENABLE_SHIFT		2
 
-#define NIG_REG_VXLAN_PORT		0x50105cUL
+#define NIG_REG_VXLAN_CTRL		0x50105cUL
 #define PBF_REG_VXLAN_PORT		0xd80518UL
 #define PBF_REG_NGE_PORT		0xd8051cUL
 #define PRS_REG_NGE_PORT		0x1f086cUL
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index ea4e9ce53e0a..a548504c3420 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -63,6 +63,32 @@ union ramrod_data {
 	struct vport_update_ramrod_data vport_update;
 	struct vport_filter_update_ramrod_data vport_filter_update;
 
+	struct rdma_init_func_ramrod_data rdma_init_func;
+	struct rdma_close_func_ramrod_data rdma_close_func;
+	struct rdma_register_tid_ramrod_data rdma_register_tid;
+	struct rdma_deregister_tid_ramrod_data rdma_deregister_tid;
+	struct roce_create_qp_resp_ramrod_data roce_create_qp_resp;
+	struct roce_create_qp_req_ramrod_data roce_create_qp_req;
+	struct roce_modify_qp_resp_ramrod_data roce_modify_qp_resp;
+	struct roce_modify_qp_req_ramrod_data roce_modify_qp_req;
+	struct roce_query_qp_resp_ramrod_data roce_query_qp_resp;
+	struct roce_query_qp_req_ramrod_data roce_query_qp_req;
+	struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp;
+	struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req;
+	struct rdma_create_cq_ramrod_data rdma_create_cq;
+	struct rdma_resize_cq_ramrod_data rdma_resize_cq;
+	struct rdma_destroy_cq_ramrod_data rdma_destroy_cq;
+	struct rdma_srq_create_ramrod_data rdma_create_srq;
+	struct rdma_srq_destroy_ramrod_data rdma_destroy_srq;
+	struct rdma_srq_modify_ramrod_data rdma_modify_srq;
+
+	struct iscsi_slow_path_hdr iscsi_empty;
+	struct iscsi_init_ramrod_params iscsi_init;
+	struct iscsi_spe_func_dstry iscsi_destroy;
+	struct iscsi_spe_conn_offload iscsi_conn_offload;
+	struct iscsi_conn_update_ramrod_params iscsi_conn_update;
+	struct iscsi_spe_conn_termination iscsi_conn_terminate;
+
 	struct vf_start_ramrod_data vf_start;
 	struct vf_stop_ramrod_data vf_stop;
 };
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 67f6ce3c84c8..a52f3fc051f5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -308,6 +308,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
 	int rc = -EINVAL;
+	u8 page_cnt;
 
 	/* update initial eq producer */
 	qed_eq_prod_update(p_hwfn,
@@ -332,7 +333,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->path_id		= QED_PATH_ID(p_hwfn);
 	p_ramrod->dont_log_ramrods	= 0;
 	p_ramrod->log_type_mask		= cpu_to_le16(0xf);
-	p_ramrod->mf_mode = mode;
+
 	switch (mode) {
 	case QED_MF_DEFAULT:
 	case QED_MF_NPAR:
@@ -350,24 +351,41 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	/* Place EQ address in RAMROD */
 	DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
 		       p_hwfn->p_eq->chain.pbl.p_phys_table);
-	p_ramrod->event_ring_num_pages = (u8)p_hwfn->p_eq->chain.page_cnt;
-
+	page_cnt = (u8)qed_chain_get_page_cnt(&p_hwfn->p_eq->chain);
+	p_ramrod->event_ring_num_pages = page_cnt;
 	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
 		       p_hwfn->p_consq->chain.pbl.p_phys_table);
 
 	qed_tunn_set_pf_start_params(p_hwfn, p_tunn,
 				     &p_ramrod->tunnel_config);
-	p_hwfn->hw_info.personality = PERSONALITY_ETH;
 
 	if (IS_MF_SI(p_hwfn))
 		p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch;
 
+	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ETH:
+		p_ramrod->personality = PERSONALITY_ETH;
+		break;
+	case QED_PCI_ISCSI:
+		p_ramrod->personality = PERSONALITY_ISCSI;
+		break;
+	case QED_PCI_ETH_ROCE:
+		p_ramrod->personality = PERSONALITY_RDMA_AND_ETH;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unkown personality %d\n",
+			  p_hwfn->hw_info.personality);
+		p_ramrod->personality = PERSONALITY_ETH;
+	}
+
 	if (p_hwfn->cdev->p_iov_info) {
 		struct qed_hw_sriov_info *p_iov = p_hwfn->cdev->p_iov_info;
 
 		p_ramrod->base_vf_id = (u8) p_iov->first_vf_in_pf;
 		p_ramrod->num_vfs = (u8) p_iov->total_vfs;
 	}
+	p_ramrod->hsi_fp_ver.major_ver_arr[ETH_VER_KEY] = ETH_HSI_VER_MAJOR;
+	p_ramrod->hsi_fp_ver.minor_ver_arr[ETH_VER_KEY] = ETH_HSI_VER_MINOR;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
 		   "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n",
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index acac6626a1b2..d73456eab1d7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -213,19 +213,15 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
 	SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL,
 		  DQ_XCM_CORE_SPQ_PROD_CMD);
 	db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
-
-	/* validate producer is up to-date */
-	rmb();
-
 	db.spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain));
 
-	/* do not reorder */
-	barrier();
+	/* make sure the SPQE is updated before the doorbell */
+	wmb();
 
 	DOORBELL(p_hwfn, qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db);
 
 	/* make sure doorbell is rang */
-	mmiowb();
+	wmb();
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
 		   "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x agg_params: %02x, prod: %04x\n",
@@ -343,6 +339,7 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn,
 	if (qed_chain_alloc(p_hwfn->cdev,
 			    QED_CHAIN_USE_TO_PRODUCE,
 			    QED_CHAIN_MODE_PBL,
+			    QED_CHAIN_CNT_TYPE_U16,
 			    num_elem,
 			    sizeof(union event_ring_element),
 			    &p_eq->chain)) {
@@ -416,10 +413,10 @@ int qed_eth_cqe_completion(struct qed_hwfn *p_hwfn,
 ***************************************************************************/
 void qed_spq_setup(struct qed_hwfn *p_hwfn)
 {
-	struct qed_spq		*p_spq	= p_hwfn->p_spq;
-	struct qed_spq_entry	*p_virt = NULL;
-	dma_addr_t		p_phys	= 0;
-	unsigned int		i	= 0;
+	struct qed_spq *p_spq = p_hwfn->p_spq;
+	struct qed_spq_entry *p_virt = NULL;
+	dma_addr_t p_phys = 0;
+	u32 i, capacity;
 
 	INIT_LIST_HEAD(&p_spq->pending);
 	INIT_LIST_HEAD(&p_spq->completion_pending);
@@ -431,7 +428,8 @@ void qed_spq_setup(struct qed_hwfn *p_hwfn)
 	p_phys	= p_spq->p_phys + offsetof(struct qed_spq_entry, ramrod);
 	p_virt	= p_spq->p_virt;
 
-	for (i = 0; i < p_spq->chain.capacity; i++) {
+	capacity = qed_chain_get_capacity(&p_spq->chain);
+	for (i = 0; i < capacity; i++) {
 		DMA_REGPAIR_LE(p_virt->elem.data_ptr, p_phys);
 
 		list_add_tail(&p_virt->list, &p_spq->free_pool);
@@ -459,9 +457,10 @@ void qed_spq_setup(struct qed_hwfn *p_hwfn)
 
 int qed_spq_alloc(struct qed_hwfn *p_hwfn)
 {
-	struct qed_spq		*p_spq	= NULL;
-	dma_addr_t		p_phys	= 0;
-	struct qed_spq_entry	*p_virt = NULL;
+	struct qed_spq_entry *p_virt = NULL;
+	struct qed_spq *p_spq = NULL;
+	dma_addr_t p_phys = 0;
+	u32 capacity;
 
 	/* SPQ struct */
 	p_spq =
@@ -475,6 +474,7 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn)
 	if (qed_chain_alloc(p_hwfn->cdev,
 			    QED_CHAIN_USE_TO_PRODUCE,
 			    QED_CHAIN_MODE_SINGLE,
+			    QED_CHAIN_CNT_TYPE_U16,
 			    0,   /* N/A when the mode is SINGLE */
 			    sizeof(struct slow_path_element),
 			    &p_spq->chain)) {
@@ -483,11 +483,11 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn)
 	}
 
 	/* allocate and fill the SPQ elements (incl. ramrod data list) */
+	capacity = qed_chain_get_capacity(&p_spq->chain);
 	p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-				    p_spq->chain.capacity *
+				    capacity *
 				    sizeof(struct qed_spq_entry),
-				    &p_phys,
-				    GFP_KERNEL);
+				    &p_phys, GFP_KERNEL);
 
 	if (!p_virt)
 		goto spq_allocate_fail;
@@ -507,16 +507,18 @@ spq_allocate_fail:
 void qed_spq_free(struct qed_hwfn *p_hwfn)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
+	u32 capacity;
 
 	if (!p_spq)
 		return;
 
-	if (p_spq->p_virt)
+	if (p_spq->p_virt) {
+		capacity = qed_chain_get_capacity(&p_spq->chain);
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
-				  p_spq->chain.capacity *
+				  capacity *
 				  sizeof(struct qed_spq_entry),
-				  p_spq->p_virt,
-				  p_spq->p_phys);
+				  p_spq->p_virt, p_spq->p_phys);
+	}
 
 	qed_chain_free(p_hwfn->cdev, &p_spq->chain);
 	;
@@ -614,7 +616,9 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn,
 
 			*p_en2 = *p_ent;
 
-			kfree(p_ent);
+			/* EBLOCK responsible to free the allocated p_ent */
+			if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK)
+				kfree(p_ent);
 
 			p_ent = p_en2;
 		}
@@ -749,6 +753,15 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 		 * Thus, after gaining the answer perform the cleanup here.
 		 */
 		rc = qed_spq_block(p_hwfn, p_ent, fw_return_code);
+
+		if (p_ent->queue == &p_spq->unlimited_pending) {
+			/* This is an allocated p_ent which does not need to
+			 * return to pool.
+			 */
+			kfree(p_ent);
+			return rc;
+		}
+
 		if (rc)
 			goto spq_post_fail2;
 
@@ -802,13 +815,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 			 * in a bitmap and increasing the chain consumer only
 			 * for the first successive completed entries.
 			 */
-			bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
+			__set_bit(pos, p_spq->p_comp_bitmap);
 
 			while (test_bit(p_spq->comp_bitmap_idx,
 					p_spq->p_comp_bitmap)) {
-				bitmap_clear(p_spq->p_comp_bitmap,
-					     p_spq->comp_bitmap_idx,
-					     SPQ_RING_SIZE);
+				__clear_bit(p_spq->comp_bitmap_idx,
+					    p_spq->p_comp_bitmap);
 				p_spq->comp_bitmap_idx++;
 				qed_chain_return_produced(&p_spq->chain);
 			}
@@ -844,8 +856,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 		found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data,
 					fw_return_code);
 
-	if (found->comp_mode != QED_SPQ_MODE_EBLOCK)
-		/* EBLOCK is responsible for freeing its own entry */
+	if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) ||
+	    (found->queue == &p_spq->unlimited_pending))
+		/* EBLOCK  is responsible for returning its own entry into the
+		 * free list, unless it originally added the entry into the
+		 * unlimited pending list.
+		 */
 		qed_spq_return_entry(p_hwfn, found);
 
 	/* Attempt to post pending requests */
@@ -871,9 +887,9 @@ struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn)
 	if (qed_chain_alloc(p_hwfn->cdev,
 			    QED_CHAIN_USE_TO_PRODUCE,
 			    QED_CHAIN_MODE_PBL,
+			    QED_CHAIN_CNT_TYPE_U16,
 			    QED_CHAIN_PAGE_SIZE / 0x80,
-			    0x80,
-			    &p_consq->chain)) {
+			    0x80, &p_consq->chain)) {
 		DP_NOTICE(p_hwfn, "Failed to allocate consq chain");
 		goto consq_allocate_fail;
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index c325ee857ecd..4d161c751c12 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -21,18 +21,18 @@
 #include "qed_vf.h"
 
 /* IOV ramrods */
-static int qed_sp_vf_start(struct qed_hwfn *p_hwfn,
-			   u32 concrete_vfid, u16 opaque_vfid)
+static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf)
 {
 	struct vf_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
 	int rc = -EINVAL;
+	u8 fp_minor;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
 	init_data.cid = qed_spq_get_cid(p_hwfn);
-	init_data.opaque_fid = opaque_vfid;
+	init_data.opaque_fid = p_vf->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -43,10 +43,39 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn,
 
 	p_ramrod = &p_ent->ramrod.vf_start;
 
-	p_ramrod->vf_id = GET_FIELD(concrete_vfid, PXP_CONCRETE_FID_VFID);
-	p_ramrod->opaque_fid = cpu_to_le16(opaque_vfid);
+	p_ramrod->vf_id = GET_FIELD(p_vf->concrete_fid, PXP_CONCRETE_FID_VFID);
+	p_ramrod->opaque_fid = cpu_to_le16(p_vf->opaque_fid);
 
-	p_ramrod->personality = PERSONALITY_ETH;
+	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ETH:
+		p_ramrod->personality = PERSONALITY_ETH;
+		break;
+	case QED_PCI_ETH_ROCE:
+		p_ramrod->personality = PERSONALITY_RDMA_AND_ETH;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unknown VF personality %d\n",
+			  p_hwfn->hw_info.personality);
+		return -EINVAL;
+	}
+
+	fp_minor = p_vf->acquire.vfdev_info.eth_fp_hsi_minor;
+	if (fp_minor > ETH_HSI_VER_MINOR) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_IOV,
+			   "VF [%d] - Requested fp hsi %02x.%02x which is slightly newer than PF's %02x.%02x; Configuring PFs version\n",
+			   p_vf->abs_vf_id,
+			   ETH_HSI_VER_MAJOR,
+			   fp_minor, ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR);
+		fp_minor = ETH_HSI_VER_MINOR;
+	}
+
+	p_ramrod->hsi_fp_ver.major_ver_arr[ETH_VER_KEY] = ETH_HSI_VER_MAJOR;
+	p_ramrod->hsi_fp_ver.minor_ver_arr[ETH_VER_KEY] = fp_minor;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+		   "VF[%d] - Starting using HSI %02x.%02x\n",
+		   p_vf->abs_vf_id, ETH_HSI_VER_MAJOR, fp_minor);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -117,6 +146,45 @@ static struct qed_vf_info *qed_iov_get_vf_info(struct qed_hwfn *p_hwfn,
 	return vf;
 }
 
+static bool qed_iov_validate_rxq(struct qed_hwfn *p_hwfn,
+				 struct qed_vf_info *p_vf, u16 rx_qid)
+{
+	if (rx_qid >= p_vf->num_rxqs)
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_IOV,
+			   "VF[0x%02x] - can't touch Rx queue[%04x]; Only 0x%04x are allocated\n",
+			   p_vf->abs_vf_id, rx_qid, p_vf->num_rxqs);
+	return rx_qid < p_vf->num_rxqs;
+}
+
+static bool qed_iov_validate_txq(struct qed_hwfn *p_hwfn,
+				 struct qed_vf_info *p_vf, u16 tx_qid)
+{
+	if (tx_qid >= p_vf->num_txqs)
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_IOV,
+			   "VF[0x%02x] - can't touch Tx queue[%04x]; Only 0x%04x are allocated\n",
+			   p_vf->abs_vf_id, tx_qid, p_vf->num_txqs);
+	return tx_qid < p_vf->num_txqs;
+}
+
+static bool qed_iov_validate_sb(struct qed_hwfn *p_hwfn,
+				struct qed_vf_info *p_vf, u16 sb_idx)
+{
+	int i;
+
+	for (i = 0; i < p_vf->num_sbs; i++)
+		if (p_vf->igu_sbs[i] == sb_idx)
+			return true;
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_IOV,
+		   "VF[0%02x] - tried using sb_idx %04x which doesn't exist as one of its 0x%02x SBs\n",
+		   p_vf->abs_vf_id, sb_idx, p_vf->num_sbs);
+
+	return false;
+}
+
 int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn,
 			     int vfid, struct qed_ptt *p_ptt)
 {
@@ -293,6 +361,9 @@ static void qed_iov_setup_vfdb(struct qed_hwfn *p_hwfn)
 		vf->opaque_fid = (p_hwfn->hw_info.opaque_fid & 0xff) |
 				 (vf->abs_vf_id << 8);
 		vf->vport_id = idx + 1;
+
+		vf->num_mac_filters = QED_ETH_VF_NUM_MAC_FILTERS;
+		vf->num_vlan_filters = QED_ETH_VF_NUM_VLAN_FILTERS;
 	}
 }
 
@@ -598,17 +669,6 @@ static int qed_iov_enable_vf_access(struct qed_hwfn *p_hwfn,
 	/* unpretend */
 	qed_fid_pretend(p_hwfn, p_ptt, (u16) p_hwfn->hw_info.concrete_fid);
 
-	if (vf->state != VF_STOPPED) {
-		DP_NOTICE(p_hwfn, "VF[%02x] is already started\n",
-			  vf->abs_vf_id);
-		return -EINVAL;
-	}
-
-	/* Start VF */
-	rc = qed_sp_vf_start(p_hwfn, vf->concrete_fid, vf->opaque_fid);
-	if (rc)
-		DP_NOTICE(p_hwfn, "Failed to start VF[%02x]\n", vf->abs_vf_id);
-
 	vf->state = VF_FREE;
 
 	return rc;
@@ -852,7 +912,6 @@ static int qed_iov_release_hw_for_vf(struct qed_hwfn *p_hwfn,
 	struct qed_mcp_link_params params;
 	struct qed_mcp_link_state link;
 	struct qed_vf_info *vf = NULL;
-	int rc = 0;
 
 	vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, true);
 	if (!vf) {
@@ -874,18 +933,8 @@ static int qed_iov_release_hw_for_vf(struct qed_hwfn *p_hwfn,
 	memcpy(&caps, qed_mcp_get_link_capabilities(p_hwfn), sizeof(caps));
 	qed_iov_set_link(p_hwfn, rel_vf_id, &params, &link, &caps);
 
-	if (vf->state != VF_STOPPED) {
-		/* Stopping the VF */
-		rc = qed_sp_vf_stop(p_hwfn, vf->concrete_fid, vf->opaque_fid);
-
-		if (rc != 0) {
-			DP_ERR(p_hwfn, "qed_sp_vf_stop returned error %d\n",
-			       rc);
-			return rc;
-		}
-
-		vf->state = VF_STOPPED;
-	}
+	/* Forget the VF's acquisition message */
+	memset(&vf->acquire, 0, sizeof(vf->acquire));
 
 	/* disablng interrupts and resetting permission table was done during
 	 * vf-close, however, we could get here without going through vf_close
@@ -1116,8 +1165,6 @@ static void qed_iov_vf_cleanup(struct qed_hwfn *p_hwfn,
 
 	p_vf->vf_bulletin = 0;
 	p_vf->vport_instance = 0;
-	p_vf->num_mac_filters = 0;
-	p_vf->num_vlan_filters = 0;
 	p_vf->configured_features = 0;
 
 	/* If VF previously requested less resources, go back to default */
@@ -1130,9 +1177,95 @@ static void qed_iov_vf_cleanup(struct qed_hwfn *p_hwfn,
 		p_vf->vf_queues[i].rxq_active = 0;
 
 	memset(&p_vf->shadow_config, 0, sizeof(p_vf->shadow_config));
+	memset(&p_vf->acquire, 0, sizeof(p_vf->acquire));
 	qed_iov_clean_vf(p_hwfn, p_vf->relative_vf_id);
 }
 
+static u8 qed_iov_vf_mbx_acquire_resc(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      struct qed_vf_info *p_vf,
+				      struct vf_pf_resc_request *p_req,
+				      struct pf_vf_resc *p_resp)
+{
+	int i;
+
+	/* Queue related information */
+	p_resp->num_rxqs = p_vf->num_rxqs;
+	p_resp->num_txqs = p_vf->num_txqs;
+	p_resp->num_sbs = p_vf->num_sbs;
+
+	for (i = 0; i < p_resp->num_sbs; i++) {
+		p_resp->hw_sbs[i].hw_sb_id = p_vf->igu_sbs[i];
+		p_resp->hw_sbs[i].sb_qid = 0;
+	}
+
+	/* These fields are filled for backward compatibility.
+	 * Unused by modern vfs.
+	 */
+	for (i = 0; i < p_resp->num_rxqs; i++) {
+		qed_fw_l2_queue(p_hwfn, p_vf->vf_queues[i].fw_rx_qid,
+				(u16 *)&p_resp->hw_qid[i]);
+		p_resp->cid[i] = p_vf->vf_queues[i].fw_cid;
+	}
+
+	/* Filter related information */
+	p_resp->num_mac_filters = min_t(u8, p_vf->num_mac_filters,
+					p_req->num_mac_filters);
+	p_resp->num_vlan_filters = min_t(u8, p_vf->num_vlan_filters,
+					 p_req->num_vlan_filters);
+
+	/* This isn't really needed/enforced, but some legacy VFs might depend
+	 * on the correct filling of this field.
+	 */
+	p_resp->num_mc_filters = QED_MAX_MC_ADDRS;
+
+	/* Validate sufficient resources for VF */
+	if (p_resp->num_rxqs < p_req->num_rxqs ||
+	    p_resp->num_txqs < p_req->num_txqs ||
+	    p_resp->num_sbs < p_req->num_sbs ||
+	    p_resp->num_mac_filters < p_req->num_mac_filters ||
+	    p_resp->num_vlan_filters < p_req->num_vlan_filters ||
+	    p_resp->num_mc_filters < p_req->num_mc_filters) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_IOV,
+			   "VF[%d] - Insufficient resources: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x]\n",
+			   p_vf->abs_vf_id,
+			   p_req->num_rxqs,
+			   p_resp->num_rxqs,
+			   p_req->num_rxqs,
+			   p_resp->num_txqs,
+			   p_req->num_sbs,
+			   p_resp->num_sbs,
+			   p_req->num_mac_filters,
+			   p_resp->num_mac_filters,
+			   p_req->num_vlan_filters,
+			   p_resp->num_vlan_filters,
+			   p_req->num_mc_filters, p_resp->num_mc_filters);
+		return PFVF_STATUS_NO_RESOURCE;
+	}
+
+	return PFVF_STATUS_SUCCESS;
+}
+
+static void qed_iov_vf_mbx_acquire_stats(struct qed_hwfn *p_hwfn,
+					 struct pfvf_stats_info *p_stats)
+{
+	p_stats->mstats.address = PXP_VF_BAR0_START_MSDM_ZONE_B +
+				  offsetof(struct mstorm_vf_zone,
+					   non_trigger.eth_queue_stat);
+	p_stats->mstats.len = sizeof(struct eth_mstorm_per_queue_stat);
+	p_stats->ustats.address = PXP_VF_BAR0_START_USDM_ZONE_B +
+				  offsetof(struct ustorm_vf_zone,
+					   non_trigger.eth_queue_stat);
+	p_stats->ustats.len = sizeof(struct eth_ustorm_per_queue_stat);
+	p_stats->pstats.address = PXP_VF_BAR0_START_PSDM_ZONE_B +
+				  offsetof(struct pstorm_vf_zone,
+					   non_trigger.eth_queue_stat);
+	p_stats->pstats.len = sizeof(struct eth_pstorm_per_queue_stat);
+	p_stats->tstats.address = 0;
+	p_stats->tstats.len = 0;
+}
+
 static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 				   struct qed_ptt *p_ptt,
 				   struct qed_vf_info *vf)
@@ -1141,25 +1274,27 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 	struct pfvf_acquire_resp_tlv *resp = &mbx->reply_virt->acquire_resp;
 	struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info;
 	struct vfpf_acquire_tlv *req = &mbx->req_virt->acquire;
-	u8 i, vfpf_status = PFVF_STATUS_SUCCESS;
+	u8 vfpf_status = PFVF_STATUS_NOT_SUPPORTED;
 	struct pf_vf_resc *resc = &resp->resc;
+	int rc;
+
+	memset(resp, 0, sizeof(*resp));
 
 	/* Validate FW compatibility */
-	if (req->vfdev_info.fw_major != FW_MAJOR_VERSION ||
-	    req->vfdev_info.fw_minor != FW_MINOR_VERSION ||
-	    req->vfdev_info.fw_revision != FW_REVISION_VERSION ||
-	    req->vfdev_info.fw_engineering != FW_ENGINEERING_VERSION) {
+	if (req->vfdev_info.eth_fp_hsi_major != ETH_HSI_VER_MAJOR) {
 		DP_INFO(p_hwfn,
-			"VF[%d] is running an incompatible driver [VF needs FW %02x:%02x:%02x:%02x but Hypervisor is using %02x:%02x:%02x:%02x]\n",
+			"VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
 			vf->abs_vf_id,
-			req->vfdev_info.fw_major,
-			req->vfdev_info.fw_minor,
-			req->vfdev_info.fw_revision,
-			req->vfdev_info.fw_engineering,
-			FW_MAJOR_VERSION,
-			FW_MINOR_VERSION,
-			FW_REVISION_VERSION, FW_ENGINEERING_VERSION);
-		vfpf_status = PFVF_STATUS_NOT_SUPPORTED;
+			req->vfdev_info.eth_fp_hsi_major,
+			req->vfdev_info.eth_fp_hsi_minor,
+			ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR);
+
+		/* Write the PF version so that VF would know which version
+		 * is supported.
+		 */
+		pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR;
+		pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR;
+
 		goto out;
 	}
 
@@ -1169,16 +1304,13 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 		DP_INFO(p_hwfn,
 			"VF[%d] is running an old driver that doesn't support 100g\n",
 			vf->abs_vf_id);
-		vfpf_status = PFVF_STATUS_NOT_SUPPORTED;
 		goto out;
 	}
 
-	memset(resp, 0, sizeof(*resp));
+	/* Store the acquire message */
+	memcpy(&vf->acquire, req, sizeof(vf->acquire));
 
-	/* Fill in vf info stuff */
 	vf->opaque_fid = req->vfdev_info.opaque_fid;
-	vf->num_mac_filters = 1;
-	vf->num_vlan_filters = QED_ETH_VF_NUM_VLAN_FILTERS;
 
 	vf->vf_bulletin = req->bulletin_addr;
 	vf->bulletin.size = (vf->bulletin.size < req->bulletin_size) ?
@@ -1194,26 +1326,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 	if (p_hwfn->cdev->num_hwfns > 1)
 		pfdev_info->capabilities |= PFVF_ACQUIRE_CAP_100G;
 
-	pfdev_info->stats_info.mstats.address =
-	    PXP_VF_BAR0_START_MSDM_ZONE_B +
-	    offsetof(struct mstorm_vf_zone, non_trigger.eth_queue_stat);
-	pfdev_info->stats_info.mstats.len =
-	    sizeof(struct eth_mstorm_per_queue_stat);
-
-	pfdev_info->stats_info.ustats.address =
-	    PXP_VF_BAR0_START_USDM_ZONE_B +
-	    offsetof(struct ustorm_vf_zone, non_trigger.eth_queue_stat);
-	pfdev_info->stats_info.ustats.len =
-	    sizeof(struct eth_ustorm_per_queue_stat);
-
-	pfdev_info->stats_info.pstats.address =
-	    PXP_VF_BAR0_START_PSDM_ZONE_B +
-	    offsetof(struct pstorm_vf_zone, non_trigger.eth_queue_stat);
-	pfdev_info->stats_info.pstats.len =
-	    sizeof(struct eth_pstorm_per_queue_stat);
-
-	pfdev_info->stats_info.tstats.address = 0;
-	pfdev_info->stats_info.tstats.len = 0;
+	qed_iov_vf_mbx_acquire_stats(p_hwfn, &pfdev_info->stats_info);
 
 	memcpy(pfdev_info->port_mac, p_hwfn->hw_info.hw_mac_addr, ETH_ALEN);
 
@@ -1221,36 +1334,31 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 	pfdev_info->fw_minor = FW_MINOR_VERSION;
 	pfdev_info->fw_rev = FW_REVISION_VERSION;
 	pfdev_info->fw_eng = FW_ENGINEERING_VERSION;
+	pfdev_info->minor_fp_hsi = min_t(u8,
+					 ETH_HSI_VER_MINOR,
+					 req->vfdev_info.eth_fp_hsi_minor);
 	pfdev_info->os_type = VFPF_ACQUIRE_OS_LINUX;
 	qed_mcp_get_mfw_ver(p_hwfn, p_ptt, &pfdev_info->mfw_ver, NULL);
 
 	pfdev_info->dev_type = p_hwfn->cdev->type;
 	pfdev_info->chip_rev = p_hwfn->cdev->chip_rev;
 
-	resc->num_rxqs = vf->num_rxqs;
-	resc->num_txqs = vf->num_txqs;
-	resc->num_sbs = vf->num_sbs;
-	for (i = 0; i < resc->num_sbs; i++) {
-		resc->hw_sbs[i].hw_sb_id = vf->igu_sbs[i];
-		resc->hw_sbs[i].sb_qid = 0;
-	}
+	/* Fill resources available to VF; Make sure there are enough to
+	 * satisfy the VF's request.
+	 */
+	vfpf_status = qed_iov_vf_mbx_acquire_resc(p_hwfn, p_ptt, vf,
+						  &req->resc_request, resc);
+	if (vfpf_status != PFVF_STATUS_SUCCESS)
+		goto out;
 
-	for (i = 0; i < resc->num_rxqs; i++) {
-		qed_fw_l2_queue(p_hwfn, vf->vf_queues[i].fw_rx_qid,
-				(u16 *)&resc->hw_qid[i]);
-		resc->cid[i] = vf->vf_queues[i].fw_cid;
+	/* Start the VF in FW */
+	rc = qed_sp_vf_start(p_hwfn, vf);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to start VF[%02x]\n", vf->abs_vf_id);
+		vfpf_status = PFVF_STATUS_FAILURE;
+		goto out;
 	}
 
-	resc->num_mac_filters = min_t(u8, vf->num_mac_filters,
-				      req->resc_request.num_mac_filters);
-	resc->num_vlan_filters = min_t(u8, vf->num_vlan_filters,
-				       req->resc_request.num_vlan_filters);
-
-	/* This isn't really required as VF isn't limited, but some VFs might
-	 * actually test this value, so need to provide it.
-	 */
-	resc->num_mc_filters = req->resc_request.num_mc_filters;
-
 	/* Fill agreed size of bulletin board in response */
 	resp->bulletin_size = vf->bulletin.size;
 	qed_iov_post_vf_bulletin(p_hwfn, vf->relative_vf_id, p_ptt);
@@ -1585,10 +1693,6 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn,
 			     sizeof(struct pfvf_def_resp_tlv), status);
 }
 
-#define TSTORM_QZONE_START   PXP_VF_BAR0_START_SDM_ZONE_A
-#define MSTORM_QZONE_START(dev)   (TSTORM_QZONE_START +	\
-				   (TSTORM_QZONE_SIZE * NUM_OF_L2_QUEUES(dev)))
-
 static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn,
 					  struct qed_ptt *p_ptt,
 					  struct qed_vf_info *vf, u8 status)
@@ -1606,16 +1710,11 @@ static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn,
 
 	/* Update the TLV with the response */
 	if (status == PFVF_STATUS_SUCCESS) {
-		u16 hw_qid = 0;
-
 		req = &mbx->req_virt->start_rxq;
-		qed_fw_l2_queue(p_hwfn, vf->vf_queues[req->rx_qid].fw_rx_qid,
-				&hw_qid);
-
-		p_tlv->offset = MSTORM_QZONE_START(p_hwfn->cdev) +
-				hw_qid * MSTORM_QZONE_SIZE +
-				offsetof(struct mstorm_eth_queue_zone,
-					 rx_producers);
+		p_tlv->offset = PXP_VF_BAR0_START_MSDM_ZONE_B +
+				offsetof(struct mstorm_vf_zone,
+					 non_trigger.eth_rx_queue_producers) +
+				sizeof(struct eth_rx_prod_data) * req->rx_qid;
 	}
 
 	qed_iov_send_response(p_hwfn, p_ptt, vf, sizeof(*p_tlv), status);
@@ -1627,13 +1726,19 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 {
 	struct qed_queue_start_common_params params;
 	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
-	u8 status = PFVF_STATUS_SUCCESS;
+	u8 status = PFVF_STATUS_NO_RESOURCE;
 	struct vfpf_start_rxq_tlv *req;
 	int rc;
 
 	memset(&params, 0, sizeof(params));
 	req = &mbx->req_virt->start_rxq;
+
+	if (!qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid) ||
+	    !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
+		goto out;
+
 	params.queue_id =  vf->vf_queues[req->rx_qid].fw_rx_qid;
+	params.vf_qid = req->rx_qid;
 	params.vport_id = vf->vport_id;
 	params.sb = req->hw_sb;
 	params.sb_idx = req->sb_index;
@@ -1649,22 +1754,48 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 	if (rc) {
 		status = PFVF_STATUS_FAILURE;
 	} else {
+		status = PFVF_STATUS_SUCCESS;
 		vf->vf_queues[req->rx_qid].rxq_active = true;
 		vf->num_active_rxqs++;
 	}
 
+out:
 	qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status);
 }
 
+static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn,
+					  struct qed_ptt *p_ptt,
+					  struct qed_vf_info *p_vf, u8 status)
+{
+	struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx;
+	struct pfvf_start_queue_resp_tlv *p_tlv;
+
+	mbx->offset = (u8 *)mbx->reply_virt;
+
+	p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_TXQ,
+			    sizeof(*p_tlv));
+	qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
+		    sizeof(struct channel_list_end_tlv));
+
+	/* Update the TLV with the response */
+	if (status == PFVF_STATUS_SUCCESS) {
+		u16 qid = mbx->req_virt->start_txq.tx_qid;
+
+		p_tlv->offset = qed_db_addr(p_vf->vf_queues[qid].fw_cid,
+					    DQ_DEMS_LEGACY);
+	}
+
+	qed_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_tlv), status);
+}
+
 static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt,
 				     struct qed_vf_info *vf)
 {
-	u16 length = sizeof(struct pfvf_def_resp_tlv);
 	struct qed_queue_start_common_params params;
 	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
+	u8 status = PFVF_STATUS_NO_RESOURCE;
 	union qed_qm_pq_params pq_params;
-	u8 status = PFVF_STATUS_SUCCESS;
 	struct vfpf_start_txq_tlv *req;
 	int rc;
 
@@ -1675,6 +1806,11 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 
 	memset(&params, 0, sizeof(params));
 	req = &mbx->req_virt->start_txq;
+
+	if (!qed_iov_validate_txq(p_hwfn, vf, req->tx_qid) ||
+	    !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
+		goto out;
+
 	params.queue_id =  vf->vf_queues[req->tx_qid].fw_tx_qid;
 	params.vport_id = vf->vport_id;
 	params.sb = req->hw_sb;
@@ -1688,13 +1824,15 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 					 req->pbl_addr,
 					 req->pbl_size, &pq_params);
 
-	if (rc)
+	if (rc) {
 		status = PFVF_STATUS_FAILURE;
-	else
+	} else {
+		status = PFVF_STATUS_SUCCESS;
 		vf->vf_queues[req->tx_qid].txq_active = true;
+	}
 
-	qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_START_TXQ,
-			     length, status);
+out:
+	qed_iov_vf_mbx_start_txq_resp(p_hwfn, p_ptt, vf, status);
 }
 
 static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
@@ -2119,6 +2257,16 @@ static void qed_iov_vf_mbx_vport_update(struct qed_hwfn *p_hwfn,
 	u16 length;
 	int rc;
 
+	/* Valiate PF can send such a request */
+	if (!vf->vport_instance) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_IOV,
+			   "No VPORT instance available for VF[%d], failing vport update\n",
+			   vf->abs_vf_id);
+		status = PFVF_STATUS_FAILURE;
+		goto out;
+	}
+
 	memset(&params, 0, sizeof(params));
 	params.opaque_fid = vf->opaque_fid;
 	params.vport_id = vf->vport_id;
@@ -2161,15 +2309,12 @@ out:
 	qed_iov_send_response(p_hwfn, p_ptt, vf, length, status);
 }
 
-static int qed_iov_vf_update_unicast_shadow(struct qed_hwfn *p_hwfn,
-					    struct qed_vf_info *p_vf,
-					    struct qed_filter_ucast *p_params)
+static int qed_iov_vf_update_vlan_shadow(struct qed_hwfn *p_hwfn,
+					 struct qed_vf_info *p_vf,
+					 struct qed_filter_ucast *p_params)
 {
 	int i;
 
-	if (p_params->type == QED_FILTER_MAC)
-		return 0;
-
 	/* First remove entries and then add new ones */
 	if (p_params->opcode == QED_FILTER_REMOVE) {
 		for (i = 0; i < QED_ETH_VF_NUM_VLAN_FILTERS + 1; i++)
@@ -2222,6 +2367,80 @@ static int qed_iov_vf_update_unicast_shadow(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+static int qed_iov_vf_update_mac_shadow(struct qed_hwfn *p_hwfn,
+					struct qed_vf_info *p_vf,
+					struct qed_filter_ucast *p_params)
+{
+	int i;
+
+	/* If we're in forced-mode, we don't allow any change */
+	if (p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED))
+		return 0;
+
+	/* First remove entries and then add new ones */
+	if (p_params->opcode == QED_FILTER_REMOVE) {
+		for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
+			if (ether_addr_equal(p_vf->shadow_config.macs[i],
+					     p_params->mac)) {
+				memset(p_vf->shadow_config.macs[i], 0,
+				       ETH_ALEN);
+				break;
+			}
+		}
+
+		if (i == QED_ETH_VF_NUM_MAC_FILTERS) {
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "MAC isn't configured\n");
+			return -EINVAL;
+		}
+	} else if (p_params->opcode == QED_FILTER_REPLACE ||
+		   p_params->opcode == QED_FILTER_FLUSH) {
+		for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++)
+			memset(p_vf->shadow_config.macs[i], 0, ETH_ALEN);
+	}
+
+	/* List the new MAC address */
+	if (p_params->opcode != QED_FILTER_ADD &&
+	    p_params->opcode != QED_FILTER_REPLACE)
+		return 0;
+
+	for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
+		if (is_zero_ether_addr(p_vf->shadow_config.macs[i])) {
+			ether_addr_copy(p_vf->shadow_config.macs[i],
+					p_params->mac);
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "Added MAC at %d entry in shadow\n", i);
+			break;
+		}
+	}
+
+	if (i == QED_ETH_VF_NUM_MAC_FILTERS) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV, "No available place for MAC\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+qed_iov_vf_update_unicast_shadow(struct qed_hwfn *p_hwfn,
+				 struct qed_vf_info *p_vf,
+				 struct qed_filter_ucast *p_params)
+{
+	int rc = 0;
+
+	if (p_params->type == QED_FILTER_MAC) {
+		rc = qed_iov_vf_update_mac_shadow(p_hwfn, p_vf, p_params);
+		if (rc)
+			return rc;
+	}
+
+	if (p_params->type == QED_FILTER_VLAN)
+		rc = qed_iov_vf_update_vlan_shadow(p_hwfn, p_vf, p_params);
+
+	return rc;
+}
+
 int qed_iov_chk_ucast(struct qed_hwfn *hwfn,
 		      int vfid, struct qed_filter_ucast *params)
 {
@@ -2366,11 +2585,27 @@ static void qed_iov_vf_mbx_release(struct qed_hwfn *p_hwfn,
 				   struct qed_vf_info *p_vf)
 {
 	u16 length = sizeof(struct pfvf_def_resp_tlv);
+	u8 status = PFVF_STATUS_SUCCESS;
+	int rc = 0;
 
 	qed_iov_vf_cleanup(p_hwfn, p_vf);
 
+	if (p_vf->state != VF_STOPPED && p_vf->state != VF_FREE) {
+		/* Stopping the VF */
+		rc = qed_sp_vf_stop(p_hwfn, p_vf->concrete_fid,
+				    p_vf->opaque_fid);
+
+		if (rc) {
+			DP_ERR(p_hwfn, "qed_sp_vf_stop returned error %d\n",
+			       rc);
+			status = PFVF_STATUS_FAILURE;
+		}
+
+		p_vf->state = VF_STOPPED;
+	}
+
 	qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf, CHANNEL_TLV_RELEASE,
-			     length, PFVF_STATUS_SUCCESS);
+			     length, status);
 }
 
 static int
@@ -2622,7 +2857,6 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 {
 	struct qed_iov_vf_mbx *mbx;
 	struct qed_vf_info *p_vf;
-	int i;
 
 	p_vf = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
 	if (!p_vf)
@@ -2631,9 +2865,8 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 	mbx = &p_vf->vf_mbx;
 
 	/* qed_iov_process_mbx_request */
-	DP_VERBOSE(p_hwfn,
-		   QED_MSG_IOV,
-		   "qed_iov_process_mbx_req vfid %d\n", p_vf->abs_vf_id);
+	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+		   "VF[%02x]: Processing mailbox message\n", p_vf->abs_vf_id);
 
 	mbx->first_tlv = mbx->req_virt->first_tlv;
 
@@ -2687,15 +2920,28 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 		 * support them. Or this may be because someone wrote a crappy
 		 * VF driver and is sending garbage over the channel.
 		 */
-		DP_ERR(p_hwfn,
-		       "unknown TLV. type %d length %d. first 20 bytes of mailbox buffer:\n",
-		       mbx->first_tlv.tl.type, mbx->first_tlv.tl.length);
-
-		for (i = 0; i < 20; i++) {
+		DP_NOTICE(p_hwfn,
+			  "VF[%02x]: unknown TLV. type %04x length %04x padding %08x reply address %llu\n",
+			  p_vf->abs_vf_id,
+			  mbx->first_tlv.tl.type,
+			  mbx->first_tlv.tl.length,
+			  mbx->first_tlv.padding, mbx->first_tlv.reply_address);
+
+		/* Try replying in case reply address matches the acquisition's
+		 * posted address.
+		 */
+		if (p_vf->acquire.first_tlv.reply_address &&
+		    (mbx->first_tlv.reply_address ==
+		     p_vf->acquire.first_tlv.reply_address)) {
+			qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf,
+					     mbx->first_tlv.tl.type,
+					     sizeof(struct pfvf_def_resp_tlv),
+					     PFVF_STATUS_NOT_SUPPORTED);
+		} else {
 			DP_VERBOSE(p_hwfn,
 				   QED_MSG_IOV,
-				   "%x ",
-				   mbx->req_virt->tlv_buf_size.tlv_buffer[i]);
+				   "VF[%02x]: Can't respond to TLV - no valid reply address\n",
+				   p_vf->abs_vf_id);
 		}
 	}
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index c90b2b6ad969..0dd23e409b3f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -10,6 +10,9 @@
 #define _QED_SRIOV_H
 #include <linux/types.h>
 #include "qed_vf.h"
+
+#define QED_ETH_VF_NUM_MAC_FILTERS 1
+#define QED_ETH_VF_NUM_VLAN_FILTERS 2
 #define QED_VF_ARRAY_LENGTH (3)
 
 #ifdef CONFIG_QED_SRIOV
@@ -24,7 +27,6 @@
 #define IS_PF_SRIOV_ALLOC(p_hwfn)       (!!((p_hwfn)->pf_iov_info))
 
 #define QED_MAX_VF_CHAINS_PER_PF 16
-#define QED_ETH_VF_NUM_VLAN_FILTERS 2
 
 #define QED_ETH_MAX_VF_NUM_VLAN_FILTERS	\
 	(MAX_NUM_VFS * QED_ETH_VF_NUM_VLAN_FILTERS)
@@ -120,6 +122,8 @@ struct qed_vf_shadow_config {
 	/* Shadow copy of all guest vlans */
 	struct qed_vf_vlan_shadow vlans[QED_ETH_VF_NUM_VLAN_FILTERS + 1];
 
+	/* Shadow copy of all configured MACs; Empty if forcing MACs */
+	u8 macs[QED_ETH_VF_NUM_MAC_FILTERS][ETH_ALEN];
 	u8 inner_vlan_removal;
 };
 
@@ -133,6 +137,9 @@ struct qed_vf_info {
 	struct qed_bulletin bulletin;
 	dma_addr_t vf_bulletin;
 
+	/* PF saves a copy of the last VF acquire message */
+	struct vfpf_acquire_tlv acquire;
+
 	u32 concrete_fid;
 	u16 opaque_fid;
 	u16 mtu;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 72e69c0ec10d..9819230947bf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -117,36 +117,64 @@ exit:
 }
 
 #define VF_ACQUIRE_THRESH 3
-#define VF_ACQUIRE_MAC_FILTERS 1
+static void qed_vf_pf_acquire_reduce_resc(struct qed_hwfn *p_hwfn,
+					  struct vf_pf_resc_request *p_req,
+					  struct pf_vf_resc *p_resp)
+{
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_IOV,
+		   "PF unwilling to fullill resource request: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x]. Try PF recommended amount\n",
+		   p_req->num_rxqs,
+		   p_resp->num_rxqs,
+		   p_req->num_rxqs,
+		   p_resp->num_txqs,
+		   p_req->num_sbs,
+		   p_resp->num_sbs,
+		   p_req->num_mac_filters,
+		   p_resp->num_mac_filters,
+		   p_req->num_vlan_filters,
+		   p_resp->num_vlan_filters,
+		   p_req->num_mc_filters, p_resp->num_mc_filters);
+
+	/* humble our request */
+	p_req->num_txqs = p_resp->num_txqs;
+	p_req->num_rxqs = p_resp->num_rxqs;
+	p_req->num_sbs = p_resp->num_sbs;
+	p_req->num_mac_filters = p_resp->num_mac_filters;
+	p_req->num_vlan_filters = p_resp->num_vlan_filters;
+	p_req->num_mc_filters = p_resp->num_mc_filters;
+}
 
 static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp;
 	struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info;
-	u8 rx_count = 1, tx_count = 1, num_sbs = 1;
-	u8 num_mac = VF_ACQUIRE_MAC_FILTERS;
+	struct vf_pf_resc_request *p_resc;
 	bool resources_acquired = false;
 	struct vfpf_acquire_tlv *req;
 	int rc = 0, attempts = 0;
 
 	/* clear mailbox and prep first tlv */
 	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_ACQUIRE, sizeof(*req));
+	p_resc = &req->resc_request;
 
 	/* starting filling the request */
 	req->vfdev_info.opaque_fid = p_hwfn->hw_info.opaque_fid;
 
-	req->resc_request.num_rxqs = rx_count;
-	req->resc_request.num_txqs = tx_count;
-	req->resc_request.num_sbs = num_sbs;
-	req->resc_request.num_mac_filters = num_mac;
-	req->resc_request.num_vlan_filters = QED_ETH_VF_NUM_VLAN_FILTERS;
+	p_resc->num_rxqs = QED_MAX_VF_CHAINS_PER_PF;
+	p_resc->num_txqs = QED_MAX_VF_CHAINS_PER_PF;
+	p_resc->num_sbs = QED_MAX_VF_CHAINS_PER_PF;
+	p_resc->num_mac_filters = QED_ETH_VF_NUM_MAC_FILTERS;
+	p_resc->num_vlan_filters = QED_ETH_VF_NUM_VLAN_FILTERS;
 
 	req->vfdev_info.os_type = VFPF_ACQUIRE_OS_LINUX;
 	req->vfdev_info.fw_major = FW_MAJOR_VERSION;
 	req->vfdev_info.fw_minor = FW_MINOR_VERSION;
 	req->vfdev_info.fw_revision = FW_REVISION_VERSION;
 	req->vfdev_info.fw_engineering = FW_ENGINEERING_VERSION;
+	req->vfdev_info.eth_fp_hsi_major = ETH_HSI_VER_MAJOR;
+	req->vfdev_info.eth_fp_hsi_minor = ETH_HSI_VER_MINOR;
 
 	/* Fill capability field with any non-deprecated config we support */
 	req->vfdev_info.capabilities |= VFPF_ACQUIRE_CAP_100G;
@@ -185,21 +213,21 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
 			resources_acquired = true;
 		} else if (resp->hdr.status == PFVF_STATUS_NO_RESOURCE &&
 			   attempts < VF_ACQUIRE_THRESH) {
-			DP_VERBOSE(p_hwfn,
-				   QED_MSG_IOV,
-				   "PF unwilling to fullfill resource request. Try PF recommended amount\n");
-
-			/* humble our request */
-			req->resc_request.num_txqs = resp->resc.num_txqs;
-			req->resc_request.num_rxqs = resp->resc.num_rxqs;
-			req->resc_request.num_sbs = resp->resc.num_sbs;
-			req->resc_request.num_mac_filters =
-			    resp->resc.num_mac_filters;
-			req->resc_request.num_vlan_filters =
-			    resp->resc.num_vlan_filters;
+			qed_vf_pf_acquire_reduce_resc(p_hwfn, p_resc,
+						      &resp->resc);
 
 			/* Clear response buffer */
 			memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs));
+		} else if ((resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) &&
+			   pfdev_info->major_fp_hsi &&
+			   (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) {
+			DP_NOTICE(p_hwfn,
+				  "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n",
+				  pfdev_info->major_fp_hsi,
+				  pfdev_info->minor_fp_hsi,
+				  ETH_HSI_VER_MAJOR,
+				  ETH_HSI_VER_MINOR, pfdev_info->major_fp_hsi);
+			return -EINVAL;
 		} else {
 			DP_ERR(p_hwfn,
 			       "PF returned error %d to VF acquisition request\n",
@@ -225,6 +253,13 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
 		}
 	}
 
+	if (ETH_HSI_VER_MINOR &&
+	    (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) {
+		DP_INFO(p_hwfn,
+			"PF is using older fastpath HSI; %02x.%02x is configured\n",
+			ETH_HSI_VER_MAJOR, resp->pfdev_info.minor_fp_hsi);
+	}
+
 	return 0;
 }
 
@@ -405,8 +440,8 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 			u16 pbl_size, void __iomem **pp_doorbell)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct pfvf_start_queue_resp_tlv *resp;
 	struct vfpf_start_txq_tlv *req;
-	struct pfvf_def_resp_tlv *resp;
 	int rc;
 
 	/* clear mailbox and prep first tlv */
@@ -424,20 +459,24 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 	qed_add_tlv(p_hwfn, &p_iov->offset,
 		    CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv));
 
-	resp = &p_iov->pf2vf_reply->default_resp;
+	resp = &p_iov->pf2vf_reply->queue_start;
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EINVAL;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EINVAL;
+		goto exit;
+	}
 
 	if (pp_doorbell) {
-		u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id];
+		*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + resp->offset;
 
-		*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-					     qed_db_addr(cid, DQ_DEMS_LEGACY);
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
+			   tx_queue_id, *pp_doorbell, resp->offset);
 	}
+exit:
 
 	return rc;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index b82fda964bbd..b23ce58e932f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -96,7 +96,9 @@ struct vfpf_acquire_tlv {
 		u32 driver_version;
 		u16 opaque_fid;	/* ME register value */
 		u8 os_type;	/* VFPF_ACQUIRE_OS_* value */
-		u8 padding[5];
+		u8 eth_fp_hsi_major;
+		u8 eth_fp_hsi_minor;
+		u8 padding[3];
 	} vfdev_info;
 
 	struct vf_pf_resc_request resc_request;
@@ -171,7 +173,14 @@ struct pfvf_acquire_resp_tlv {
 		struct pfvf_stats_info stats_info;
 
 		u8 port_mac[ETH_ALEN];
-		u8 padding2[2];
+
+		/* It's possible PF had to configure an older fastpath HSI
+		 * [in case VF is newer than PF]. This is communicated back
+		 * to the VF. It can also be used in case of error due to
+		 * non-matching versions to shed light in VF about failure.
+		 */
+		u8 major_fp_hsi;
+		u8 minor_fp_hsi;
 	} pfdev_info;
 
 	struct pf_vf_resc {
diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile
index 06ff90d87572..74a49850d74d 100644
--- a/drivers/net/ethernet/qlogic/qede/Makefile
+++ b/drivers/net/ethernet/qlogic/qede/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_QEDE) := qede.o
 
 qede-y := qede_main.o qede_ethtool.o
+qede-$(CONFIG_DCB) += qede_dcbnl.o
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 47d6b22252f6..02b06d4e40ae 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -24,7 +24,7 @@
 #include <linux/qed/qed_eth_if.h>
 
 #define QEDE_MAJOR_VERSION		8
-#define QEDE_MINOR_VERSION		7
+#define QEDE_MINOR_VERSION		10
 #define QEDE_REVISION_VERSION		1
 #define QEDE_ENGINEERING_VERSION	20
 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "."	\
@@ -143,6 +143,8 @@ struct qede_dev {
 	struct mutex			qede_lock;
 	u32				state; /* Protected by qede_lock */
 	u16				rx_buf_size;
+	u32				rx_copybreak;
+
 	/* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */
 #define ETH_OVERHEAD			(ETH_HLEN + 8 + 8)
 	/* Max supported alignment is 256 (8 shift)
@@ -235,6 +237,7 @@ struct qede_rx_queue {
 
 	u64			rx_hw_errors;
 	u64			rx_alloc_errors;
+	u64			rx_ip_frags;
 };
 
 union db_prod {
@@ -304,6 +307,9 @@ union qede_reload_args {
 	u16 mtu;
 };
 
+#ifdef CONFIG_DCB
+void qede_set_dcbnl_ops(struct net_device *ndev);
+#endif
 void qede_config_debug(uint debug, u32 *p_dp_module, u8 *p_dp_level);
 void qede_set_ethtool_ops(struct net_device *netdev);
 void qede_reload(struct qede_dev *edev,
@@ -329,6 +335,7 @@ void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev,
 #define NUM_TX_BDS_MIN		128
 #define NUM_TX_BDS_DEF		NUM_TX_BDS_MAX
 
+#define QEDE_MIN_PKT_LEN	64
 #define QEDE_RX_HDR_SIZE	256
 #define	for_each_rss(i) for (i = 0; i < edev->num_rss; i++)
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_dcbnl.c b/drivers/net/ethernet/qlogic/qede/qede_dcbnl.c
new file mode 100644
index 000000000000..03e8c0212433
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qede/qede_dcbnl.c
@@ -0,0 +1,348 @@
+/* QLogic qede NIC Driver
+* Copyright (c) 2015 QLogic Corporation
+*
+* This software is available under the terms of the GNU General Public License
+* (GPL) Version 2, available from the file COPYING in the main directory of
+* this source tree.
+*/
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/dcbnl.h>
+#include "qede.h"
+
+static u8 qede_dcbnl_getstate(struct net_device *netdev)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->getstate(edev->cdev);
+}
+
+static u8 qede_dcbnl_setstate(struct net_device *netdev, u8 state)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setstate(edev->cdev, state);
+}
+
+static void qede_dcbnl_getpermhwaddr(struct net_device *netdev,
+				     u8 *perm_addr)
+{
+	memcpy(perm_addr, netdev->dev_addr, netdev->addr_len);
+}
+
+static void qede_dcbnl_getpgtccfgtx(struct net_device *netdev, int prio,
+				    u8 *prio_type, u8 *pgid, u8 *bw_pct,
+				    u8 *up_map)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	edev->ops->dcb->getpgtccfgtx(edev->cdev, prio, prio_type,
+				     pgid, bw_pct, up_map);
+}
+
+static void qede_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
+				     int pgid, u8 *bw_pct)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	edev->ops->dcb->getpgbwgcfgtx(edev->cdev, pgid, bw_pct);
+}
+
+static void qede_dcbnl_getpgtccfgrx(struct net_device *netdev, int prio,
+				    u8 *prio_type, u8 *pgid, u8 *bw_pct,
+				    u8 *up_map)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	edev->ops->dcb->getpgtccfgrx(edev->cdev, prio, prio_type, pgid, bw_pct,
+				     up_map);
+}
+
+static void qede_dcbnl_getpgbwgcfgrx(struct net_device *netdev,
+				     int pgid, u8 *bw_pct)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	edev->ops->dcb->getpgbwgcfgrx(edev->cdev, pgid, bw_pct);
+}
+
+static void qede_dcbnl_getpfccfg(struct net_device *netdev, int prio,
+				 u8 *setting)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	edev->ops->dcb->getpfccfg(edev->cdev, prio, setting);
+}
+
+static void qede_dcbnl_setpfccfg(struct net_device *netdev, int prio,
+				 u8 setting)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	edev->ops->dcb->setpfccfg(edev->cdev, prio, setting);
+}
+
+static u8 qede_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->getcap(edev->cdev, capid, cap);
+}
+
+static int qede_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->getnumtcs(edev->cdev, tcid, num);
+}
+
+static u8 qede_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->getpfcstate(edev->cdev);
+}
+
+static int qede_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->getapp(edev->cdev, idtype, id);
+}
+
+static u8 qede_dcbnl_getdcbx(struct net_device *netdev)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->getdcbx(edev->cdev);
+}
+
+static void qede_dcbnl_setpgtccfgtx(struct net_device *netdev, int prio,
+				    u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setpgtccfgtx(edev->cdev, prio, pri_type, pgid,
+					    bw_pct, up_map);
+}
+
+static void qede_dcbnl_setpgtccfgrx(struct net_device *netdev, int prio,
+				    u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setpgtccfgrx(edev->cdev, prio, pri_type, pgid,
+					    bw_pct, up_map);
+}
+
+static void qede_dcbnl_setpgbwgcfgtx(struct net_device *netdev, int pgid,
+				     u8 bw_pct)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setpgbwgcfgtx(edev->cdev, pgid, bw_pct);
+}
+
+static void qede_dcbnl_setpgbwgcfgrx(struct net_device *netdev, int pgid,
+				     u8 bw_pct)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setpgbwgcfgrx(edev->cdev, pgid, bw_pct);
+}
+
+static u8 qede_dcbnl_setall(struct net_device *netdev)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setall(edev->cdev);
+}
+
+static int qede_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setnumtcs(edev->cdev, tcid, num);
+}
+
+static void qede_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setpfcstate(edev->cdev, state);
+}
+
+static int qede_dcbnl_setapp(struct net_device *netdev, u8 idtype, u16 idval,
+			     u8 up)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setapp(edev->cdev, idtype, idval, up);
+}
+
+static u8 qede_dcbnl_setdcbx(struct net_device *netdev, u8 state)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setdcbx(edev->cdev, state);
+}
+
+static u8 qede_dcbnl_getfeatcfg(struct net_device *netdev, int featid,
+				u8 *flags)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->getfeatcfg(edev->cdev, featid, flags);
+}
+
+static u8 qede_dcbnl_setfeatcfg(struct net_device *netdev, int featid, u8 flags)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->setfeatcfg(edev->cdev, featid, flags);
+}
+
+static int qede_dcbnl_peer_getappinfo(struct net_device *netdev,
+				      struct dcb_peer_app_info *info,
+				      u16 *count)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->peer_getappinfo(edev->cdev, info, count);
+}
+
+static int qede_dcbnl_peer_getapptable(struct net_device *netdev,
+				       struct dcb_app *app)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->peer_getapptable(edev->cdev, app);
+}
+
+static int qede_dcbnl_cee_peer_getpfc(struct net_device *netdev,
+				      struct cee_pfc *pfc)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->cee_peer_getpfc(edev->cdev, pfc);
+}
+
+static int qede_dcbnl_cee_peer_getpg(struct net_device *netdev,
+				     struct cee_pg *pg)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->cee_peer_getpg(edev->cdev, pg);
+}
+
+static int qede_dcbnl_ieee_getpfc(struct net_device *netdev,
+				  struct ieee_pfc *pfc)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->ieee_getpfc(edev->cdev, pfc);
+}
+
+static int qede_dcbnl_ieee_setpfc(struct net_device *netdev,
+				  struct ieee_pfc *pfc)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->ieee_setpfc(edev->cdev, pfc);
+}
+
+static int qede_dcbnl_ieee_getets(struct net_device *netdev,
+				  struct ieee_ets *ets)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->ieee_getets(edev->cdev, ets);
+}
+
+static int qede_dcbnl_ieee_setets(struct net_device *netdev,
+				  struct ieee_ets *ets)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->ieee_setets(edev->cdev, ets);
+}
+
+static int qede_dcbnl_ieee_getapp(struct net_device *netdev,
+				  struct dcb_app *app)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->ieee_getapp(edev->cdev, app);
+}
+
+static int qede_dcbnl_ieee_setapp(struct net_device *netdev,
+				  struct dcb_app *app)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->ieee_setapp(edev->cdev, app);
+}
+
+static int qede_dcbnl_ieee_peer_getpfc(struct net_device *netdev,
+				       struct ieee_pfc *pfc)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->ieee_peer_getpfc(edev->cdev, pfc);
+}
+
+static int qede_dcbnl_ieee_peer_getets(struct net_device *netdev,
+				       struct ieee_ets *ets)
+{
+	struct qede_dev *edev = netdev_priv(netdev);
+
+	return edev->ops->dcb->ieee_peer_getets(edev->cdev, ets);
+}
+
+static const struct dcbnl_rtnl_ops qede_dcbnl_ops = {
+	.ieee_getpfc = qede_dcbnl_ieee_getpfc,
+	.ieee_setpfc = qede_dcbnl_ieee_setpfc,
+	.ieee_getets = qede_dcbnl_ieee_getets,
+	.ieee_setets = qede_dcbnl_ieee_setets,
+	.ieee_getapp = qede_dcbnl_ieee_getapp,
+	.ieee_setapp = qede_dcbnl_ieee_setapp,
+	.getdcbx = qede_dcbnl_getdcbx,
+	.ieee_peer_getpfc = qede_dcbnl_ieee_peer_getpfc,
+	.ieee_peer_getets = qede_dcbnl_ieee_peer_getets,
+	.getstate = qede_dcbnl_getstate,
+	.setstate = qede_dcbnl_setstate,
+	.getpermhwaddr = qede_dcbnl_getpermhwaddr,
+	.getpgtccfgtx = qede_dcbnl_getpgtccfgtx,
+	.getpgbwgcfgtx = qede_dcbnl_getpgbwgcfgtx,
+	.getpgtccfgrx = qede_dcbnl_getpgtccfgrx,
+	.getpgbwgcfgrx = qede_dcbnl_getpgbwgcfgrx,
+	.getpfccfg = qede_dcbnl_getpfccfg,
+	.setpfccfg = qede_dcbnl_setpfccfg,
+	.getcap = qede_dcbnl_getcap,
+	.getnumtcs = qede_dcbnl_getnumtcs,
+	.getpfcstate = qede_dcbnl_getpfcstate,
+	.getapp = qede_dcbnl_getapp,
+	.getdcbx = qede_dcbnl_getdcbx,
+	.setpgtccfgtx = qede_dcbnl_setpgtccfgtx,
+	.setpgtccfgrx = qede_dcbnl_setpgtccfgrx,
+	.setpgbwgcfgtx = qede_dcbnl_setpgbwgcfgtx,
+	.setpgbwgcfgrx = qede_dcbnl_setpgbwgcfgrx,
+	.setall = qede_dcbnl_setall,
+	.setnumtcs = qede_dcbnl_setnumtcs,
+	.setpfcstate = qede_dcbnl_setpfcstate,
+	.setapp = qede_dcbnl_setapp,
+	.setdcbx = qede_dcbnl_setdcbx,
+	.setfeatcfg = qede_dcbnl_setfeatcfg,
+	.getfeatcfg = qede_dcbnl_getfeatcfg,
+	.peer_getappinfo = qede_dcbnl_peer_getappinfo,
+	.peer_getapptable = qede_dcbnl_peer_getapptable,
+	.cee_peer_getpfc = qede_dcbnl_cee_peer_getpfc,
+	.cee_peer_getpg = qede_dcbnl_cee_peer_getpg,
+};
+
+void qede_set_dcbnl_ops(struct net_device *dev)
+{
+	dev->dcbnl_ops = &qede_dcbnl_ops;
+}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index ad3cae3b7243..f8492cac9290 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -37,6 +37,7 @@ static const struct {
 } qede_rqstats_arr[] = {
 	QEDE_RQSTAT(rx_hw_errors),
 	QEDE_RQSTAT(rx_alloc_errors),
+	QEDE_RQSTAT(rx_ip_frags),
 };
 
 #define QEDE_NUM_RQSTATS ARRAY_SIZE(qede_rqstats_arr)
@@ -426,6 +427,59 @@ static u32 qede_get_link(struct net_device *dev)
 	return current_link.link_up;
 }
 
+static int qede_get_coalesce(struct net_device *dev,
+			     struct ethtool_coalesce *coal)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	u16 rxc, txc;
+
+	memset(coal, 0, sizeof(struct ethtool_coalesce));
+	edev->ops->common->get_coalesce(edev->cdev, &rxc, &txc);
+
+	coal->rx_coalesce_usecs = rxc;
+	coal->tx_coalesce_usecs = txc;
+
+	return 0;
+}
+
+static int qede_set_coalesce(struct net_device *dev,
+			     struct ethtool_coalesce *coal)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	int i, rc = 0;
+	u16 rxc, txc;
+	u8 sb_id;
+
+	if (!netif_running(dev)) {
+		DP_INFO(edev, "Interface is down\n");
+		return -EINVAL;
+	}
+
+	if (coal->rx_coalesce_usecs > QED_COALESCE_MAX ||
+	    coal->tx_coalesce_usecs > QED_COALESCE_MAX) {
+		DP_INFO(edev,
+			"Can't support requested %s coalesce value [max supported value %d]\n",
+			coal->rx_coalesce_usecs > QED_COALESCE_MAX ? "rx"
+								   : "tx",
+			QED_COALESCE_MAX);
+		return -EINVAL;
+	}
+
+	rxc = (u16)coal->rx_coalesce_usecs;
+	txc = (u16)coal->tx_coalesce_usecs;
+	for_each_rss(i) {
+		sb_id = edev->fp_array[i].sb_info->igu_sb_id;
+		rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc,
+						     (u8)i, sb_id);
+		if (rc) {
+			DP_INFO(edev, "Set coalesce error, rc = %d\n", rc);
+			return rc;
+		}
+	}
+
+	return rc;
+}
+
 static void qede_get_ringparam(struct net_device *dev,
 			       struct ethtool_ringparam *ering)
 {
@@ -910,6 +964,8 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 	memset(first_bd, 0, sizeof(*first_bd));
 	val = 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
 	first_bd->data.bd_flags.bitfields = val;
+	val = skb->len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK;
+	first_bd->data.bitfields |= (val << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT);
 
 	/* Map skb linear data for DMA and set in the first BD */
 	mapping = dma_map_single(&edev->pdev->dev, skb->data,
@@ -1129,6 +1185,48 @@ static void qede_self_test(struct net_device *dev,
 	}
 }
 
+static int qede_set_tunable(struct net_device *dev,
+			    const struct ethtool_tunable *tuna,
+			    const void *data)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	u32 val;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		val = *(u32 *)data;
+		if (val < QEDE_MIN_PKT_LEN || val > QEDE_RX_HDR_SIZE) {
+			DP_VERBOSE(edev, QED_MSG_DEBUG,
+				   "Invalid rx copy break value, range is [%u, %u]",
+				   QEDE_MIN_PKT_LEN, QEDE_RX_HDR_SIZE);
+			return -EINVAL;
+		}
+
+		edev->rx_copybreak = *(u32 *)data;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int qede_get_tunable(struct net_device *dev,
+			    const struct ethtool_tunable *tuna, void *data)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = edev->rx_copybreak;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops qede_ethtool_ops = {
 	.get_settings = qede_get_settings,
 	.set_settings = qede_set_settings,
@@ -1137,6 +1235,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
 	.set_msglevel = qede_set_msglevel,
 	.nway_reset = qede_nway_reset,
 	.get_link = qede_get_link,
+	.get_coalesce = qede_get_coalesce,
+	.set_coalesce = qede_set_coalesce,
 	.get_ringparam = qede_get_ringparam,
 	.set_ringparam = qede_set_ringparam,
 	.get_pauseparam = qede_get_pauseparam,
@@ -1155,6 +1255,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
 	.get_channels = qede_get_channels,
 	.set_channels = qede_set_channels,
 	.self_test = qede_self_test,
+	.get_tunable = qede_get_tunable,
+	.set_tunable = qede_set_tunable,
 };
 
 static const struct ethtool_ops qede_vf_ethtool_ops = {
@@ -1177,6 +1279,8 @@ static const struct ethtool_ops qede_vf_ethtool_ops = {
 	.set_rxfh = qede_set_rxfh,
 	.get_channels = qede_get_channels,
 	.set_channels = qede_set_channels,
+	.get_tunable = qede_get_tunable,
+	.set_tunable = qede_set_tunable,
 };
 
 void qede_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 5733d1888223..91e7bb0b85c8 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -24,12 +24,7 @@
 #include <linux/netdev_features.h>
 #include <linux/udp.h>
 #include <linux/tcp.h>
-#ifdef CONFIG_QEDE_VXLAN
-#include <net/vxlan.h>
-#endif
-#ifdef CONFIG_QEDE_GENEVE
-#include <net/geneve.h>
-#endif
+#include <net/udp_tunnel.h>
 #include <linux/ip.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
@@ -490,6 +485,24 @@ static bool qede_pkt_req_lin(struct qede_dev *edev, struct sk_buff *skb,
 }
 #endif
 
+static inline void qede_update_tx_producer(struct qede_tx_queue *txq)
+{
+	/* wmb makes sure that the BDs data is updated before updating the
+	 * producer, otherwise FW may read old data from the BDs.
+	 */
+	wmb();
+	barrier();
+	writel(txq->tx_db.raw, txq->doorbell_addr);
+
+	/* mmiowb is needed to synchronize doorbell writes from more than one
+	 * processor. It guarantees that the write arrives to the device before
+	 * the queue lock is released and another start_xmit is called (possibly
+	 * on another CPU). Without this barrier, the next doorbell can bypass
+	 * this doorbell. This is applicable to IA64/Altix systems.
+	 */
+	mmiowb();
+}
+
 /* Main transmit function */
 static
 netdev_tx_t qede_start_xmit(struct sk_buff *skb,
@@ -548,6 +561,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 	if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
 		DP_NOTICE(edev, "SKB mapping failed\n");
 		qede_free_failed_tx_pkt(edev, txq, first_bd, 0, false);
+		qede_update_tx_producer(txq);
 		return NETDEV_TX_OK;
 	}
 	nbd++;
@@ -579,8 +593,6 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 
 	/* Fill the parsing flags & params according to the requested offload */
 	if (xmit_type & XMIT_L4_CSUM) {
-		u16 temp = 1 << ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_SHIFT;
-
 		/* We don't re-calculate IP checksum as it is already done by
 		 * the upper stack
 		 */
@@ -590,14 +602,8 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 		if (xmit_type & XMIT_ENC) {
 			first_bd->data.bd_flags.bitfields |=
 				1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
-		} else {
-			/* In cases when OS doesn't indicate for inner offloads
-			 * when packet is tunnelled, we need to override the HW
-			 * tunnel configuration so that packets are treated as
-			 * regular non tunnelled packets and no inner offloads
-			 * are done by the hardware.
-			 */
-			first_bd->data.bitfields |= cpu_to_le16(temp);
+			first_bd->data.bitfields |=
+			    1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
 		}
 
 		/* If the packet is IPv6 with extension header, indicate that
@@ -655,6 +661,10 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 			tx_data_bd = (struct eth_tx_bd *)third_bd;
 			data_split = true;
 		}
+	} else {
+		first_bd->data.bitfields |=
+		    (skb->len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) <<
+		    ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
 	}
 
 	/* Handle fragmented skb */
@@ -666,6 +676,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 		if (rc) {
 			qede_free_failed_tx_pkt(edev, txq, first_bd, nbd,
 						data_split);
+			qede_update_tx_producer(txq);
 			return NETDEV_TX_OK;
 		}
 
@@ -690,6 +701,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 		if (rc) {
 			qede_free_failed_tx_pkt(edev, txq, first_bd, nbd,
 						data_split);
+			qede_update_tx_producer(txq);
 			return NETDEV_TX_OK;
 		}
 	}
@@ -710,20 +722,8 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 	txq->tx_db.data.bd_prod =
 		cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl));
 
-	/* wmb makes sure that the BDs data is updated before updating the
-	 * producer, otherwise FW may read old data from the BDs.
-	 */
-	wmb();
-	barrier();
-	writel(txq->tx_db.raw, txq->doorbell_addr);
-
-	/* mmiowb is needed to synchronize doorbell writes from more than one
-	 * processor. It guarantees that the write arrives to the device before
-	 * the queue lock is released and another start_xmit is called (possibly
-	 * on another CPU). Without this barrier, the next doorbell can bypass
-	 * this doorbell. This is applicable to IA64/Altix systems.
-	 */
-	mmiowb();
+	if (!skb->xmit_more || netif_tx_queue_stopped(netdev_txq))
+		qede_update_tx_producer(txq);
 
 	if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl)
 		      < (MAX_SKB_FRAGS + 1))) {
@@ -1357,6 +1357,20 @@ static u8 qede_check_csum(u16 flag)
 		return qede_check_tunn_csum(flag);
 }
 
+static bool qede_pkt_is_ip_fragmented(struct eth_fast_path_rx_reg_cqe *cqe,
+				      u16 flag)
+{
+	u8 tun_pars_flg = cqe->tunnel_pars_flags.flags;
+
+	if ((tun_pars_flg & (ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK <<
+			     ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT)) ||
+	    (flag & (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK <<
+		     PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT)))
+		return true;
+
+	return false;
+}
+
 static int qede_rx_int(struct qede_fastpath *fp, int budget)
 {
 	struct qede_dev *edev = fp->edev;
@@ -1435,6 +1449,12 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 
 		csum_flag = qede_check_csum(parse_flag);
 		if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
+			if (qede_pkt_is_ip_fragmented(&cqe->fast_path_regular,
+						      parse_flag)) {
+				rxq->rx_ip_frags++;
+				goto alloc_skb;
+			}
+
 			DP_NOTICE(edev,
 				  "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n",
 				  sw_comp_cons, parse_flag);
@@ -1443,6 +1463,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 			goto next_cqe;
 		}
 
+alloc_skb:
 		skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
 		if (unlikely(!skb)) {
 			DP_NOTICE(edev,
@@ -1453,7 +1474,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 		}
 
 		/* Copy data into SKB */
-		if (len + pad <= QEDE_RX_HDR_SIZE) {
+		if (len + pad <= edev->rx_copybreak) {
 			memcpy(skb_put(skb, len),
 			       page_address(data) + pad +
 				sw_rx_data->page_offset, len);
@@ -1585,56 +1606,49 @@ next_cqe: /* don't consume bd rx buffer */
 
 static int qede_poll(struct napi_struct *napi, int budget)
 {
-	int work_done = 0;
 	struct qede_fastpath *fp = container_of(napi, struct qede_fastpath,
-						 napi);
+						napi);
 	struct qede_dev *edev = fp->edev;
+	int rx_work_done = 0;
+	u8 tc;
 
-	while (1) {
-		u8 tc;
-
-		for (tc = 0; tc < edev->num_tc; tc++)
-			if (qede_txq_has_work(&fp->txqs[tc]))
-				qede_tx_int(edev, &fp->txqs[tc]);
-
-		if (qede_has_rx_work(fp->rxq)) {
-			work_done += qede_rx_int(fp, budget - work_done);
-
-			/* must not complete if we consumed full budget */
-			if (work_done >= budget)
-				break;
-		}
+	for (tc = 0; tc < edev->num_tc; tc++)
+		if (qede_txq_has_work(&fp->txqs[tc]))
+			qede_tx_int(edev, &fp->txqs[tc]);
+
+	rx_work_done = qede_has_rx_work(fp->rxq) ?
+			qede_rx_int(fp, budget) : 0;
+	if (rx_work_done < budget) {
+		qed_sb_update_sb_idx(fp->sb_info);
+		/* *_has_*_work() reads the status block,
+		 * thus we need to ensure that status block indices
+		 * have been actually read (qed_sb_update_sb_idx)
+		 * prior to this check (*_has_*_work) so that
+		 * we won't write the "newer" value of the status block
+		 * to HW (if there was a DMA right after
+		 * qede_has_rx_work and if there is no rmb, the memory
+		 * reading (qed_sb_update_sb_idx) may be postponed
+		 * to right before *_ack_sb). In this case there
+		 * will never be another interrupt until there is
+		 * another update of the status block, while there
+		 * is still unhandled work.
+		 */
+		rmb();
 
 		/* Fall out from the NAPI loop if needed */
-		if (!(qede_has_rx_work(fp->rxq) || qede_has_tx_work(fp))) {
-			qed_sb_update_sb_idx(fp->sb_info);
-			/* *_has_*_work() reads the status block,
-			 * thus we need to ensure that status block indices
-			 * have been actually read (qed_sb_update_sb_idx)
-			 * prior to this check (*_has_*_work) so that
-			 * we won't write the "newer" value of the status block
-			 * to HW (if there was a DMA right after
-			 * qede_has_rx_work and if there is no rmb, the memory
-			 * reading (qed_sb_update_sb_idx) may be postponed
-			 * to right before *_ack_sb). In this case there
-			 * will never be another interrupt until there is
-			 * another update of the status block, while there
-			 * is still unhandled work.
-			 */
-			rmb();
-
-			if (!(qede_has_rx_work(fp->rxq) ||
-			      qede_has_tx_work(fp))) {
-				napi_complete(napi);
-				/* Update and reenable interrupts */
-				qed_sb_ack(fp->sb_info, IGU_INT_ENABLE,
-					   1 /*update*/);
-				break;
-			}
+		if (!(qede_has_rx_work(fp->rxq) ||
+		      qede_has_tx_work(fp))) {
+			napi_complete(napi);
+
+			/* Update and reenable interrupts */
+			qed_sb_ack(fp->sb_info, IGU_INT_ENABLE,
+				   1 /*update*/);
+		} else {
+			rx_work_done = budget;
 		}
 	}
 
-	return work_done;
+	return rx_work_done;
 }
 
 static irqreturn_t qede_msix_fp_int(int irq, void *fp_cookie)
@@ -2116,75 +2130,75 @@ int qede_set_features(struct net_device *dev, netdev_features_t features)
 	return 0;
 }
 
-#ifdef CONFIG_QEDE_VXLAN
-static void qede_add_vxlan_port(struct net_device *dev,
-				sa_family_t sa_family, __be16 port)
+static void qede_udp_tunnel_add(struct net_device *dev,
+				struct udp_tunnel_info *ti)
 {
 	struct qede_dev *edev = netdev_priv(dev);
-	u16 t_port = ntohs(port);
+	u16 t_port = ntohs(ti->port);
 
-	if (edev->vxlan_dst_port)
-		return;
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		if (edev->vxlan_dst_port)
+			return;
 
-	edev->vxlan_dst_port = t_port;
+		edev->vxlan_dst_port = t_port;
 
-	DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d", t_port);
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d",
+			   t_port);
 
-	set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
-	schedule_delayed_work(&edev->sp_task, 0);
-}
+		set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		if (edev->geneve_dst_port)
+			return;
 
-static void qede_del_vxlan_port(struct net_device *dev,
-				sa_family_t sa_family, __be16 port)
-{
-	struct qede_dev *edev = netdev_priv(dev);
-	u16 t_port = ntohs(port);
+		edev->geneve_dst_port = t_port;
 
-	if (t_port != edev->vxlan_dst_port)
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d",
+			   t_port);
+		set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
+		break;
+	default:
 		return;
+	}
 
-	edev->vxlan_dst_port = 0;
-
-	DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d", t_port);
-
-	set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
 	schedule_delayed_work(&edev->sp_task, 0);
 }
-#endif
 
-#ifdef CONFIG_QEDE_GENEVE
-static void qede_add_geneve_port(struct net_device *dev,
-				 sa_family_t sa_family, __be16 port)
+static void qede_udp_tunnel_del(struct net_device *dev,
+				struct udp_tunnel_info *ti)
 {
 	struct qede_dev *edev = netdev_priv(dev);
-	u16 t_port = ntohs(port);
+	u16 t_port = ntohs(ti->port);
 
-	if (edev->geneve_dst_port)
-		return;
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		if (t_port != edev->vxlan_dst_port)
+			return;
 
-	edev->geneve_dst_port = t_port;
+		edev->vxlan_dst_port = 0;
 
-	DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d", t_port);
-	set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
-	schedule_delayed_work(&edev->sp_task, 0);
-}
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d",
+			   t_port);
 
-static void qede_del_geneve_port(struct net_device *dev,
-				 sa_family_t sa_family, __be16 port)
-{
-	struct qede_dev *edev = netdev_priv(dev);
-	u16 t_port = ntohs(port);
+		set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		if (t_port != edev->geneve_dst_port)
+			return;
 
-	if (t_port != edev->geneve_dst_port)
-		return;
+		edev->geneve_dst_port = 0;
 
-	edev->geneve_dst_port = 0;
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d",
+			   t_port);
+		set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
+		break;
+	default:
+		return;
+	}
 
-	DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d", t_port);
-	set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
 	schedule_delayed_work(&edev->sp_task, 0);
 }
-#endif
 
 static const struct net_device_ops qede_netdev_ops = {
 	.ndo_open = qede_open,
@@ -2208,14 +2222,8 @@ static const struct net_device_ops qede_netdev_ops = {
 	.ndo_get_vf_config = qede_get_vf_config,
 	.ndo_set_vf_rate = qede_set_vf_rate,
 #endif
-#ifdef CONFIG_QEDE_VXLAN
-	.ndo_add_vxlan_port = qede_add_vxlan_port,
-	.ndo_del_vxlan_port = qede_del_vxlan_port,
-#endif
-#ifdef CONFIG_QEDE_GENEVE
-	.ndo_add_geneve_port = qede_add_geneve_port,
-	.ndo_del_geneve_port = qede_del_geneve_port,
-#endif
+	.ndo_udp_tunnel_add = qede_udp_tunnel_add,
+	.ndo_udp_tunnel_del = qede_udp_tunnel_del,
 };
 
 /* -------------------------------------------------------------------------
@@ -2505,8 +2513,13 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
 	edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
+#ifdef CONFIG_DCB
+	qede_set_dcbnl_ops(edev->ndev);
+#endif
+
 	INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
 	mutex_init(&edev->qede_lock);
+	edev->rx_copybreak = QEDE_RX_HDR_SIZE;
 
 	DP_INFO(edev, "Ending successfully qede probe\n");
 
@@ -2823,6 +2836,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev,
 	rc = edev->ops->common->chain_alloc(edev->cdev,
 					    QED_CHAIN_USE_TO_CONSUME_PRODUCE,
 					    QED_CHAIN_MODE_NEXT_PTR,
+					    QED_CHAIN_CNT_TYPE_U16,
 					    RX_RING_SIZE,
 					    sizeof(struct eth_rx_bd),
 					    &rxq->rx_bd_ring);
@@ -2834,6 +2848,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev,
 	rc = edev->ops->common->chain_alloc(edev->cdev,
 					    QED_CHAIN_USE_TO_CONSUME,
 					    QED_CHAIN_MODE_PBL,
+					    QED_CHAIN_CNT_TYPE_U16,
 					    RX_RING_SIZE,
 					    sizeof(union eth_rx_cqe),
 					    &rxq->rx_comp_ring);
@@ -2885,9 +2900,9 @@ static int qede_alloc_mem_txq(struct qede_dev *edev,
 	rc = edev->ops->common->chain_alloc(edev->cdev,
 					    QED_CHAIN_USE_TO_CONSUME_PRODUCE,
 					    QED_CHAIN_MODE_PBL,
+					    QED_CHAIN_CNT_TYPE_U16,
 					    NUM_TX_BDS_MAX,
-					    sizeof(*p_virt),
-					    &txq->tx_pbl);
+					    sizeof(*p_virt), &txq->tx_pbl);
 	if (rc)
 		goto err;
 
@@ -3231,7 +3246,7 @@ static int qede_stop_queues(struct qede_dev *edev)
 	return rc;
 }
 
-static int qede_start_queues(struct qede_dev *edev)
+static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 {
 	int rc, tc, i;
 	int vlan_removal_en = 1;
@@ -3462,6 +3477,7 @@ out:
 
 enum qede_load_mode {
 	QEDE_LOAD_NORMAL,
+	QEDE_LOAD_RELOAD,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
@@ -3500,7 +3516,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
 		goto err3;
 	DP_INFO(edev, "Setup IRQs succeeded\n");
 
-	rc = qede_start_queues(edev);
+	rc = qede_start_queues(edev, mode != QEDE_LOAD_RELOAD);
 	if (rc)
 		goto err4;
 	DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n");
@@ -3555,7 +3571,7 @@ void qede_reload(struct qede_dev *edev,
 	if (func)
 		func(edev, args);
 
-	qede_load(edev, QEDE_LOAD_NORMAL);
+	qede_load(edev, QEDE_LOAD_RELOAD);
 
 	mutex_lock(&edev->qede_lock);
 	qede_config_rx_mode(edev->ndev);
@@ -3577,12 +3593,8 @@ static int qede_open(struct net_device *ndev)
 	if (rc)
 		return rc;
 
-#ifdef CONFIG_QEDE_VXLAN
-	vxlan_get_rx_port(ndev);
-#endif
-#ifdef CONFIG_QEDE_GENEVE
-	geneve_get_rx_port(ndev);
-#endif
+	udp_tunnel_get_rx_info(ndev);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index caf6ddb7ea76..fd973f4f16c7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1026,10 +1026,8 @@ struct qlcnic_ipaddr {
 #define QLCNIC_HAS_PHYS_PORT_ID		0x40000
 #define QLCNIC_TSS_RSS			0x80000
 
-#ifdef CONFIG_QLCNIC_VXLAN
 #define QLCNIC_ADD_VXLAN_PORT		0x100000
 #define QLCNIC_DEL_VXLAN_PORT		0x200000
-#endif
 
 #define QLCNIC_VLAN_FILTERING		0x800000
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index f9640d5ce6ba..bdbcd2b088a0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -2159,7 +2159,6 @@ int qlcnic_83xx_get_mac_address(struct qlcnic_adapter *adapter, u8 *mac,
 	struct qlcnic_cmd_args cmd;
 	u32 mac_low, mac_high;
 
-	function = 0;
 	err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_MAC_ADDRESS);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index bf892160dd5f..a496390b8632 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -1020,7 +1020,6 @@ static int qlcnic_83xx_idc_check_state_validity(struct qlcnic_adapter *adapter,
 	return 0;
 }
 
-#ifdef CONFIG_QLCNIC_VXLAN
 #define QLC_83XX_ENCAP_TYPE_VXLAN	BIT_1
 #define QLC_83XX_MATCH_ENCAP_ID		BIT_2
 #define QLC_83XX_SET_VXLAN_UDP_DPORT	BIT_3
@@ -1089,14 +1088,12 @@ static int qlcnic_set_vxlan_parsing(struct qlcnic_adapter *adapter,
 
 	return ret;
 }
-#endif
 
 static void qlcnic_83xx_periodic_tasks(struct qlcnic_adapter *adapter)
 {
 	if (adapter->fhash.fnum)
 		qlcnic_prune_lb_filters(adapter);
 
-#ifdef CONFIG_QLCNIC_VXLAN
 	if (adapter->flags & QLCNIC_ADD_VXLAN_PORT) {
 		if (qlcnic_set_vxlan_port(adapter))
 			return;
@@ -1112,7 +1109,6 @@ static void qlcnic_83xx_periodic_tasks(struct qlcnic_adapter *adapter)
 		adapter->ahw->vxlan_port = 0;
 		adapter->flags &= ~QLCNIC_DEL_VXLAN_PORT;
 	}
-#endif
 }
 
 /**
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 7bd6f25b4625..87c642d3b075 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -772,6 +772,8 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	tx_ring->tx_stats.tx_bytes += skb->len;
 	tx_ring->tx_stats.xmit_called++;
 
+	/* Ensure writes are complete before HW fetches Tx descriptors */
+	wmb();
 	qlcnic_update_cmd_producer(tx_ring);
 
 	return NETDEV_TX_OK;
@@ -2220,7 +2222,7 @@ void qlcnic_83xx_process_rcv_ring_diag(struct qlcnic_host_sds_ring *sds_ring)
 	if (!opcode)
 		return;
 
-	ring = QLCNIC_FETCH_RING_ID(qlcnic_83xx_hndl(sts_data[0]));
+	ring = QLCNIC_FETCH_RING_ID(sts_data[0]);
 	qlcnic_83xx_process_rcv_diag(adapter, ring, sts_data);
 	desc = &sds_ring->desc_head[consumer];
 	desc->status_desc_data[0] = cpu_to_le64(STATUS_OWNER_PHANTOM);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 1c29105b6c36..3ebef27e0964 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -16,9 +16,7 @@
 #include <linux/aer.h>
 #include <linux/log2.h>
 #include <linux/pci.h>
-#ifdef CONFIG_QLCNIC_VXLAN
 #include <net/vxlan.h>
-#endif
 
 #include "qlcnic.h"
 #include "qlcnic_sriov.h"
@@ -474,13 +472,15 @@ static int qlcnic_get_phys_port_id(struct net_device *netdev,
 	return 0;
 }
 
-#ifdef CONFIG_QLCNIC_VXLAN
 static void qlcnic_add_vxlan_port(struct net_device *netdev,
-				  sa_family_t sa_family, __be16 port)
+				  struct udp_tunnel_info *ti)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
 	/* Adapter supports only one VXLAN port. Use very first port
 	 * for enabling offload
 	 */
@@ -488,23 +488,26 @@ static void qlcnic_add_vxlan_port(struct net_device *netdev,
 		return;
 	if (!ahw->vxlan_port_count) {
 		ahw->vxlan_port_count = 1;
-		ahw->vxlan_port = ntohs(port);
+		ahw->vxlan_port = ntohs(ti->port);
 		adapter->flags |= QLCNIC_ADD_VXLAN_PORT;
 		return;
 	}
-	if (ahw->vxlan_port == ntohs(port))
+	if (ahw->vxlan_port == ntohs(ti->port))
 		ahw->vxlan_port_count++;
 
 }
 
 static void qlcnic_del_vxlan_port(struct net_device *netdev,
-				  sa_family_t sa_family, __be16 port)
+				  struct udp_tunnel_info *ti)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
 	if (!qlcnic_encap_rx_offload(adapter) || !ahw->vxlan_port_count ||
-	    (ahw->vxlan_port != ntohs(port)))
+	    (ahw->vxlan_port != ntohs(ti->port)))
 		return;
 
 	ahw->vxlan_port_count--;
@@ -519,7 +522,6 @@ static netdev_features_t qlcnic_features_check(struct sk_buff *skb,
 	features = vlan_features_check(skb, features);
 	return vxlan_features_check(skb, features);
 }
-#endif
 
 static const struct net_device_ops qlcnic_netdev_ops = {
 	.ndo_open	   = qlcnic_open,
@@ -539,11 +541,9 @@ static const struct net_device_ops qlcnic_netdev_ops = {
 	.ndo_fdb_del		= qlcnic_fdb_del,
 	.ndo_fdb_dump		= qlcnic_fdb_dump,
 	.ndo_get_phys_port_id	= qlcnic_get_phys_port_id,
-#ifdef CONFIG_QLCNIC_VXLAN
-	.ndo_add_vxlan_port	= qlcnic_add_vxlan_port,
-	.ndo_del_vxlan_port	= qlcnic_del_vxlan_port,
+	.ndo_udp_tunnel_add	= qlcnic_add_vxlan_port,
+	.ndo_udp_tunnel_del	= qlcnic_del_vxlan_port,
 	.ndo_features_check	= qlcnic_features_check,
-#endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = qlcnic_poll_controller,
 #endif
@@ -2015,10 +2015,8 @@ qlcnic_attach(struct qlcnic_adapter *adapter)
 
 	qlcnic_create_sysfs_entries(adapter);
 
-#ifdef CONFIG_QLCNIC_VXLAN
 	if (qlcnic_encap_rx_offload(adapter))
-		vxlan_get_rx_port(netdev);
-#endif
+		udp_tunnel_get_rx_info(netdev);
 
 	adapter->is_up = QLCNIC_ADAPTER_UP_MAGIC;
 	return 0;
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index 6b541e57c96a..cb29ee24cf1b 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2004 Sten Wang <sten.wang@rdc.com.tw>
  * Copyright (C) 2007
  *	Daniel Gimpelevich <daniel@gimpelevich.san-francisco.ca.us>
- * Copyright (C) 2007-2012 Florian Fainelli <florian@openwrt.org>
+ * Copyright (C) 2007-2012 Florian Fainelli <f.fainelli@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -48,8 +48,8 @@
 #include <asm/processor.h>
 
 #define DRV_NAME	"r6040"
-#define DRV_VERSION	"0.28"
-#define DRV_RELDATE	"07Oct2011"
+#define DRV_VERSION	"0.29"
+#define DRV_RELDATE	"04Jul2016"
 
 /* Time in jiffies before concluding the transmitter is hung. */
 #define TX_TIMEOUT	(6000 * HZ / 1000)
@@ -162,7 +162,7 @@
 
 MODULE_AUTHOR("Sten Wang <sten.wang@rdc.com.tw>,"
 	"Daniel Gimpelevich <daniel@gimpelevich.san-francisco.ca.us>,"
-	"Florian Fainelli <florian@openwrt.org>");
+	"Florian Fainelli <f.fainelli@gmail.com>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("RDC R6040 NAPI PCI FastEthernet driver");
 MODULE_VERSION(DRV_VERSION " " DRV_RELDATE);
@@ -200,7 +200,6 @@ struct r6040_private {
 	struct mii_bus *mii_bus;
 	struct napi_struct napi;
 	void __iomem *base;
-	struct phy_device *phydev;
 	int old_link;
 	int old_duplex;
 };
@@ -474,7 +473,7 @@ static void r6040_down(struct net_device *dev)
 	iowrite16(adrp[1], ioaddr + MID_0M);
 	iowrite16(adrp[2], ioaddr + MID_0H);
 
-	phy_stop(lp->phydev);
+	phy_stop(dev->phydev);
 }
 
 static int r6040_close(struct net_device *dev)
@@ -515,12 +514,10 @@ static int r6040_close(struct net_device *dev)
 
 static int r6040_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct r6040_private *lp = netdev_priv(dev);
-
-	if (!lp->phydev)
+	if (!dev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(lp->phydev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 static int r6040_rx(struct net_device *dev, int limit)
@@ -617,10 +614,15 @@ static void r6040_tx(struct net_device *dev)
 		if (descptr->status & DSC_OWNER_MAC)
 			break; /* Not complete */
 		skb_ptr = descptr->skb_ptr;
+
+		/* Statistic Counter */
+		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += skb_ptr->len;
+
 		pci_unmap_single(priv->pdev, le32_to_cpu(descptr->buf),
 			skb_ptr->len, PCI_DMA_TODEVICE);
 		/* Free buffer */
-		dev_kfree_skb_irq(skb_ptr);
+		dev_kfree_skb(skb_ptr);
 		descptr->skb_ptr = NULL;
 		/* To next descriptor */
 		descptr = descptr->vndescp;
@@ -641,12 +643,15 @@ static int r6040_poll(struct napi_struct *napi, int budget)
 	void __iomem *ioaddr = priv->base;
 	int work_done;
 
+	r6040_tx(dev);
+
 	work_done = r6040_rx(dev, budget);
 
 	if (work_done < budget) {
-		napi_complete(napi);
-		/* Enable RX interrupt */
-		iowrite16(ioread16(ioaddr + MIER) | RX_INTS, ioaddr + MIER);
+		napi_complete_done(napi, work_done);
+		/* Enable RX/TX interrupt */
+		iowrite16(ioread16(ioaddr + MIER) | RX_INTS | TX_INTS,
+			  ioaddr + MIER);
 	}
 	return work_done;
 }
@@ -673,7 +678,7 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id)
 	}
 
 	/* RX interrupt request */
-	if (status & RX_INTS) {
+	if (status & (RX_INTS | TX_INTS)) {
 		if (status & RX_NO_DESC) {
 			/* RX descriptor unavailable */
 			dev->stats.rx_dropped++;
@@ -684,15 +689,11 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id)
 
 		if (likely(napi_schedule_prep(&lp->napi))) {
 			/* Mask off RX interrupt */
-			misr &= ~RX_INTS;
-			__napi_schedule(&lp->napi);
+			misr &= ~(RX_INTS | TX_INTS);
+			__napi_schedule_irqoff(&lp->napi);
 		}
 	}
 
-	/* TX interrupt request */
-	if (status & TX_INTS)
-		r6040_tx(dev);
-
 	/* Restore RDC MAC interrupt */
 	iowrite16(misr, ioaddr + MIER);
 
@@ -732,7 +733,7 @@ static int r6040_up(struct net_device *dev)
 	/* Initialize all MAC registers */
 	r6040_init_mac_regs(dev);
 
-	phy_start(lp->phydev);
+	phy_start(dev->phydev);
 
 	return 0;
 }
@@ -813,6 +814,9 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb,
 	void __iomem *ioaddr = lp->base;
 	unsigned long flags;
 
+	if (skb_put_padto(skb, ETH_ZLEN) < 0)
+		return NETDEV_TX_OK;
+
 	/* Critical Section */
 	spin_lock_irqsave(&lp->lock, flags);
 
@@ -824,17 +828,10 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	/* Statistic Counter */
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
 	/* Set TX descriptor & Transmit it */
 	lp->tx_free_desc--;
 	descptr = lp->tx_insert_ptr;
-	if (skb->len < ETH_ZLEN)
-		descptr->len = ETH_ZLEN;
-	else
-		descptr->len = skb->len;
-
+	descptr->len = skb->len;
 	descptr->skb_ptr = skb;
 	descptr->buf = cpu_to_le32(pci_map_single(lp->pdev,
 		skb->data, skb->len, PCI_DMA_TODEVICE));
@@ -843,7 +840,8 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb,
 	skb_tx_timestamp(skb);
 
 	/* Trigger the MAC to check the TX descriptor */
-	iowrite16(TM2TX, ioaddr + MTPR);
+	if (!skb->xmit_more || netif_queue_stopped(dev))
+		iowrite16(TM2TX, ioaddr + MTPR);
 	lp->tx_insert_ptr = descptr->vndescp;
 
 	/* If no tx resource, stop */
@@ -957,26 +955,12 @@ static void netdev_get_drvinfo(struct net_device *dev,
 	strlcpy(info->bus_info, pci_name(rp->pdev), sizeof(info->bus_info));
 }
 
-static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct r6040_private *rp = netdev_priv(dev);
-
-	return  phy_ethtool_gset(rp->phydev, cmd);
-}
-
-static int netdev_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct r6040_private *rp = netdev_priv(dev);
-
-	return phy_ethtool_sset(rp->phydev, cmd);
-}
-
 static const struct ethtool_ops netdev_ethtool_ops = {
 	.get_drvinfo		= netdev_get_drvinfo,
-	.get_settings		= netdev_get_settings,
-	.set_settings		= netdev_set_settings,
 	.get_link		= ethtool_op_get_link,
 	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops r6040_netdev_ops = {
@@ -998,7 +982,7 @@ static const struct net_device_ops r6040_netdev_ops = {
 static void r6040_adjust_link(struct net_device *dev)
 {
 	struct r6040_private *lp = netdev_priv(dev);
-	struct phy_device *phydev = lp->phydev;
+	struct phy_device *phydev = dev->phydev;
 	int status_changed = 0;
 	void __iomem *ioaddr = lp->base;
 
@@ -1018,14 +1002,8 @@ static void r6040_adjust_link(struct net_device *dev)
 		lp->old_duplex = phydev->duplex;
 	}
 
-	if (status_changed) {
-		pr_info("%s: link %s", dev->name, phydev->link ?
-			"UP" : "DOWN");
-		if (phydev->link)
-			pr_cont(" - %d/%s", phydev->speed,
-			DUPLEX_FULL == phydev->duplex ? "full" : "half");
-		pr_cont("\n");
-	}
+	if (status_changed)
+		phy_print_status(phydev);
 }
 
 static int r6040_mii_probe(struct net_device *dev)
@@ -1057,7 +1035,6 @@ static int r6040_mii_probe(struct net_device *dev)
 				| SUPPORTED_TP);
 
 	phydev->advertising = phydev->supported;
-	lp->phydev = phydev;
 	lp->old_link = 0;
 	lp->old_duplex = -1;
 
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 867caf6e7a5a..8377d0220fa8 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -362,8 +362,6 @@ static void ravb_emac_init(struct net_device *ndev)
 	ravb_write(ndev,
 		   (ndev->dev_addr[4] << 8)  | (ndev->dev_addr[5]), MALR);
 
-	ravb_write(ndev, 1, MPR);
-
 	/* E-MAC status register clear */
 	ravb_write(ndev, ECSR_ICD | ECSR_MPD, ECSR);
 
@@ -402,7 +400,8 @@ static int ravb_dmac_init(struct net_device *ndev)
 #endif
 
 	/* Set AVB RX */
-	ravb_write(ndev, RCR_EFFS | RCR_ENCF | RCR_ETS0 | 0x18000000, RCR);
+	ravb_write(ndev,
+		   RCR_EFFS | RCR_ENCF | RCR_ETS0 | RCR_ESF | 0x18000000, RCR);
 
 	/* Set FIFO size */
 	ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00222200, TGC);
@@ -1909,7 +1908,6 @@ static int ravb_probe(struct platform_device *pdev)
 
 	/* The Ether-specific entries in the device structure. */
 	ndev->base_addr = res->start;
-	ndev->dma = -1;
 
 	chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev);
 
@@ -2111,8 +2109,7 @@ static int ravb_runtime_nop(struct device *dev)
 }
 
 static const struct dev_pm_ops ravb_dev_pm_ops = {
-	.runtime_suspend = ravb_runtime_nop,
-	.runtime_resume = ravb_runtime_nop,
+	SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL)
 };
 
 #define RAVB_PM_OPS (&ravb_dev_pm_ops)
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 04cd39f66cc9..7bd910ce8b34 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2996,7 +2996,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	if (devno < 0)
 		devno = 0;
 
-	ndev->dma = -1;
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0)
 		goto out_release;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 28b775e5a9ad..f0b09b05ed3f 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -1996,7 +1996,8 @@ static int rocker_port_change_proto_down(struct net_device *dev,
 	return 0;
 }
 
-static void rocker_port_neigh_destroy(struct neighbour *n)
+static void rocker_port_neigh_destroy(struct net_device *dev,
+				      struct neighbour *n)
 {
 	struct rocker_port *rocker_port = netdev_priv(n->dev);
 	int err;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
index 45019649bbbd..5cb51b609f02 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
@@ -475,7 +475,6 @@ struct sxgbe_priv_data {
 	int rxcsum_insertion;
 	spinlock_t stats_lock;	/* lock for tx/rx statatics */
 
-	struct phy_device *phydev;
 	int oldlink;
 	int speed;
 	int oldduplex;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
index c0981ae45874..542b67d436df 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
@@ -147,7 +147,7 @@ static int sxgbe_get_eee(struct net_device *dev,
 	edata->eee_active = priv->eee_active;
 	edata->tx_lpi_timer = priv->tx_lpi_timer;
 
-	return phy_ethtool_get_eee(priv->phydev, edata);
+	return phy_ethtool_get_eee(dev->phydev, edata);
 }
 
 static int sxgbe_set_eee(struct net_device *dev,
@@ -172,7 +172,7 @@ static int sxgbe_set_eee(struct net_device *dev,
 		priv->tx_lpi_timer = edata->tx_lpi_timer;
 	}
 
-	return phy_ethtool_set_eee(priv->phydev, edata);
+	return phy_ethtool_set_eee(dev->phydev, edata);
 }
 
 static void sxgbe_getdrvinfo(struct net_device *dev,
@@ -182,27 +182,6 @@ static void sxgbe_getdrvinfo(struct net_device *dev,
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int sxgbe_getsettings(struct net_device *dev,
-			     struct ethtool_cmd *cmd)
-{
-	struct sxgbe_priv_data *priv = netdev_priv(dev);
-
-	if (priv->phydev)
-		return phy_ethtool_gset(priv->phydev, cmd);
-
-	return -EOPNOTSUPP;
-}
-
-static int sxgbe_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct sxgbe_priv_data *priv = netdev_priv(dev);
-
-	if (priv->phydev)
-		return phy_ethtool_sset(priv->phydev, cmd);
-
-	return -EOPNOTSUPP;
-}
-
 static u32 sxgbe_getmsglevel(struct net_device *dev)
 {
 	struct sxgbe_priv_data *priv = netdev_priv(dev);
@@ -255,7 +234,7 @@ static void sxgbe_get_ethtool_stats(struct net_device *dev,
 	char *p;
 
 	if (priv->eee_enabled) {
-		int val = phy_get_eee_err(priv->phydev);
+		int val = phy_get_eee_err(dev->phydev);
 
 		if (val)
 			priv->xstats.eee_wakeup_error_n = val;
@@ -499,8 +478,6 @@ static int sxgbe_get_regs_len(struct net_device *dev)
 
 static const struct ethtool_ops sxgbe_ethtool_ops = {
 	.get_drvinfo = sxgbe_getdrvinfo,
-	.get_settings = sxgbe_getsettings,
-	.set_settings = sxgbe_setsettings,
 	.get_msglevel = sxgbe_getmsglevel,
 	.set_msglevel = sxgbe_setmsglevel,
 	.get_link = ethtool_op_get_link,
@@ -516,6 +493,8 @@ static const struct ethtool_ops sxgbe_ethtool_ops = {
 	.get_regs_len = sxgbe_get_regs_len,
 	.get_eee = sxgbe_get_eee,
 	.set_eee = sxgbe_set_eee,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 void sxgbe_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 413ea14ab91f..ea44a2456ce1 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -124,12 +124,13 @@ static void sxgbe_eee_ctrl_timer(unsigned long arg)
  */
 bool sxgbe_eee_init(struct sxgbe_priv_data * const priv)
 {
+	struct net_device *ndev = priv->dev;
 	bool ret = false;
 
 	/* MAC core supports the EEE feature. */
 	if (priv->hw_cap.eee) {
 		/* Check if the PHY supports EEE */
-		if (phy_init_eee(priv->phydev, 1))
+		if (phy_init_eee(ndev->phydev, 1))
 			return false;
 
 		priv->eee_active = 1;
@@ -152,12 +153,14 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv)
 
 static void sxgbe_eee_adjust(const struct sxgbe_priv_data *priv)
 {
+	struct net_device *ndev = priv->dev;
+
 	/* When the EEE has been already initialised we have to
 	 * modify the PLS bit in the LPI ctrl & status reg according
 	 * to the PHY link status. For this reason.
 	 */
 	if (priv->eee_enabled)
-		priv->hw->mac->set_eee_pls(priv->ioaddr, priv->phydev->link);
+		priv->hw->mac->set_eee_pls(priv->ioaddr, ndev->phydev->link);
 }
 
 /**
@@ -203,7 +206,7 @@ static inline u32 sxgbe_tx_avail(struct sxgbe_tx_queue *queue, int tx_qsize)
 static void sxgbe_adjust_link(struct net_device *dev)
 {
 	struct sxgbe_priv_data *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	u8 new_state = 0;
 	u8 speed = 0xff;
 
@@ -306,9 +309,6 @@ static int sxgbe_init_phy(struct net_device *ndev)
 	netdev_dbg(ndev, "%s: attached to PHY (UID 0x%x) Link = %d\n",
 		   __func__, phydev->phy_id, phydev->link);
 
-	/* save phy device in private structure */
-	priv->phydev = phydev;
-
 	return 0;
 }
 
@@ -1173,8 +1173,8 @@ static int sxgbe_open(struct net_device *dev)
 	priv->hw->dma->start_tx(priv->ioaddr, SXGBE_TX_QUEUES);
 	priv->hw->dma->start_rx(priv->ioaddr, SXGBE_RX_QUEUES);
 
-	if (priv->phydev)
-		phy_start(priv->phydev);
+	if (dev->phydev)
+		phy_start(dev->phydev);
 
 	/* initialise TX coalesce parameters */
 	sxgbe_tx_init_coalesce(priv);
@@ -1194,8 +1194,8 @@ static int sxgbe_open(struct net_device *dev)
 
 init_error:
 	free_dma_desc_resources(priv);
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
+	if (dev->phydev)
+		phy_disconnect(dev->phydev);
 phy_error:
 	clk_disable_unprepare(priv->sxgbe_clk);
 
@@ -1216,10 +1216,9 @@ static int sxgbe_release(struct net_device *dev)
 		del_timer_sync(&priv->eee_ctrl_timer);
 
 	/* Stop and disconnect the PHY */
-	if (priv->phydev) {
-		phy_stop(priv->phydev);
-		phy_disconnect(priv->phydev);
-		priv->phydev = NULL;
+	if (dev->phydev) {
+		phy_stop(dev->phydev);
+		phy_disconnect(dev->phydev);
 	}
 
 	netif_tx_stop_all_queues(dev);
@@ -1969,7 +1968,6 @@ static void sxgbe_poll_controller(struct net_device *dev)
  */
 static int sxgbe_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct sxgbe_priv_data *priv = netdev_priv(dev);
 	int ret = -EOPNOTSUPP;
 
 	if (!netif_running(dev))
@@ -1979,9 +1977,9 @@ static int sxgbe_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!priv->phydev)
+		if (!dev->phydev)
 			return -EINVAL;
-		ret = phy_mii_ioctl(priv->phydev, rq, cmd);
+		ret = phy_mii_ioctl(dev->phydev, rq, cmd);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 1f309127457d..f658fee74f18 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -50,14 +50,34 @@ enum {
 #define HUNT_FILTER_TBL_ROWS 8192
 
 #define EFX_EF10_FILTER_ID_INVALID 0xffff
+
+#define EFX_EF10_FILTER_DEV_UC_MAX	32
+#define EFX_EF10_FILTER_DEV_MC_MAX	256
+
+/* VLAN list entry */
+struct efx_ef10_vlan {
+	struct list_head list;
+	u16 vid;
+};
+
+/* Per-VLAN filters information */
+struct efx_ef10_filter_vlan {
+	struct list_head list;
+	u16 vid;
+	u16 uc[EFX_EF10_FILTER_DEV_UC_MAX];
+	u16 mc[EFX_EF10_FILTER_DEV_MC_MAX];
+	u16 ucdef;
+	u16 bcast;
+	u16 mcdef;
+};
+
 struct efx_ef10_dev_addr {
 	u8 addr[ETH_ALEN];
-	u16 id;
 };
 
 struct efx_ef10_filter_table {
-/* The RX match field masks supported by this fw & hw, in order of priority */
-	enum efx_filter_match_flags rx_match_flags[
+/* The MCDI match masks supported by this fw & hw, in order of priority */
+	u32 rx_match_mcdi_flags[
 		MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM];
 	unsigned int rx_match_count;
 
@@ -73,16 +93,16 @@ struct efx_ef10_filter_table {
 	} *entry;
 	wait_queue_head_t waitq;
 /* Shadow of net_device address lists, guarded by mac_lock */
-#define EFX_EF10_FILTER_DEV_UC_MAX	32
-#define EFX_EF10_FILTER_DEV_MC_MAX	256
 	struct efx_ef10_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX];
 	struct efx_ef10_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX];
 	int dev_uc_count;
 	int dev_mc_count;
-/* Indices (like efx_ef10_dev_addr.id) for promisc/allmulti filters */
-	u16 ucdef_id;
-	u16 bcast_id;
-	u16 mcdef_id;
+	bool uc_promisc;
+	bool mc_promisc;
+/* Whether in multicast promiscuous mode when last changed */
+	bool mc_promisc_last;
+	bool vlan_filter;
+	struct list_head vlan_list;
 };
 
 /* An arbitrary search limit for the software hash table */
@@ -90,6 +110,10 @@ struct efx_ef10_filter_table {
 
 static void efx_ef10_rx_free_indir_table(struct efx_nic *efx);
 static void efx_ef10_filter_table_remove(struct efx_nic *efx);
+static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid);
+static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx,
+					      struct efx_ef10_filter_vlan *vlan);
+static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid);
 
 static int efx_ef10_get_warm_boot_count(struct efx_nic *efx)
 {
@@ -275,6 +299,131 @@ static ssize_t efx_ef10_show_primary_flag(struct device *dev,
 		       ? 1 : 0);
 }
 
+static struct efx_ef10_vlan *efx_ef10_find_vlan(struct efx_nic *efx, u16 vid)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_ef10_vlan *vlan;
+
+	WARN_ON(!mutex_is_locked(&nic_data->vlan_lock));
+
+	list_for_each_entry(vlan, &nic_data->vlan_list, list) {
+		if (vlan->vid == vid)
+			return vlan;
+	}
+
+	return NULL;
+}
+
+static int efx_ef10_add_vlan(struct efx_nic *efx, u16 vid)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_ef10_vlan *vlan;
+	int rc;
+
+	mutex_lock(&nic_data->vlan_lock);
+
+	vlan = efx_ef10_find_vlan(efx, vid);
+	if (vlan) {
+		/* We add VID 0 on init. 8021q adds it on module init
+		 * for all interfaces with VLAN filtring feature.
+		 */
+		if (vid == 0)
+			goto done_unlock;
+		netif_warn(efx, drv, efx->net_dev,
+			   "VLAN %u already added\n", vid);
+		rc = -EALREADY;
+		goto fail_exist;
+	}
+
+	rc = -ENOMEM;
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		goto fail_alloc;
+
+	vlan->vid = vid;
+
+	list_add_tail(&vlan->list, &nic_data->vlan_list);
+
+	if (efx->filter_state) {
+		mutex_lock(&efx->mac_lock);
+		down_write(&efx->filter_sem);
+		rc = efx_ef10_filter_add_vlan(efx, vlan->vid);
+		up_write(&efx->filter_sem);
+		mutex_unlock(&efx->mac_lock);
+		if (rc)
+			goto fail_filter_add_vlan;
+	}
+
+done_unlock:
+	mutex_unlock(&nic_data->vlan_lock);
+	return 0;
+
+fail_filter_add_vlan:
+	list_del(&vlan->list);
+	kfree(vlan);
+fail_alloc:
+fail_exist:
+	mutex_unlock(&nic_data->vlan_lock);
+	return rc;
+}
+
+static void efx_ef10_del_vlan_internal(struct efx_nic *efx,
+				       struct efx_ef10_vlan *vlan)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+	WARN_ON(!mutex_is_locked(&nic_data->vlan_lock));
+
+	if (efx->filter_state) {
+		down_write(&efx->filter_sem);
+		efx_ef10_filter_del_vlan(efx, vlan->vid);
+		up_write(&efx->filter_sem);
+	}
+
+	list_del(&vlan->list);
+	kfree(vlan);
+}
+
+static int efx_ef10_del_vlan(struct efx_nic *efx, u16 vid)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_ef10_vlan *vlan;
+	int rc = 0;
+
+	/* 8021q removes VID 0 on module unload for all interfaces
+	 * with VLAN filtering feature. We need to keep it to receive
+	 * untagged traffic.
+	 */
+	if (vid == 0)
+		return 0;
+
+	mutex_lock(&nic_data->vlan_lock);
+
+	vlan = efx_ef10_find_vlan(efx, vid);
+	if (!vlan) {
+		netif_err(efx, drv, efx->net_dev,
+			  "VLAN %u to be deleted not found\n", vid);
+		rc = -ENOENT;
+	} else {
+		efx_ef10_del_vlan_internal(efx, vlan);
+	}
+
+	mutex_unlock(&nic_data->vlan_lock);
+
+	return rc;
+}
+
+static void efx_ef10_cleanup_vlans(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_ef10_vlan *vlan, *next_vlan;
+
+	mutex_lock(&nic_data->vlan_lock);
+	list_for_each_entry_safe(vlan, next_vlan, &nic_data->vlan_list, list)
+		efx_ef10_del_vlan_internal(efx, vlan);
+	mutex_unlock(&nic_data->vlan_lock);
+}
+
 static DEVICE_ATTR(link_control_flag, 0444, efx_ef10_show_link_control_flag,
 		   NULL);
 static DEVICE_ATTR(primary_flag, 0444, efx_ef10_show_primary_flag, NULL);
@@ -421,8 +570,30 @@ static int efx_ef10_probe(struct efx_nic *efx)
 #endif
 		ether_addr_copy(nic_data->port_id, efx->net_dev->perm_addr);
 
+	INIT_LIST_HEAD(&nic_data->vlan_list);
+	mutex_init(&nic_data->vlan_lock);
+
+	/* Add unspecified VID to support VLAN filtering being disabled */
+	rc = efx_ef10_add_vlan(efx, EFX_FILTER_VID_UNSPEC);
+	if (rc)
+		goto fail_add_vid_unspec;
+
+	/* If VLAN filtering is enabled, we need VID 0 to get untagged
+	 * traffic.  It is added automatically if 8021q module is loaded,
+	 * but we can't rely on it since module may be not loaded.
+	 */
+	rc = efx_ef10_add_vlan(efx, 0);
+	if (rc)
+		goto fail_add_vid_0;
+
 	return 0;
 
+fail_add_vid_0:
+	efx_ef10_cleanup_vlans(efx);
+fail_add_vid_unspec:
+	mutex_destroy(&nic_data->vlan_lock);
+	efx_ptp_remove(efx);
+	efx_mcdi_mon_remove(efx);
 fail5:
 	device_remove_file(&efx->pci_dev->dev, &dev_attr_primary_flag);
 fail4:
@@ -676,6 +847,9 @@ static void efx_ef10_remove(struct efx_nic *efx)
 	}
 #endif
 
+	efx_ef10_cleanup_vlans(efx);
+	mutex_destroy(&nic_data->vlan_lock);
+
 	efx_ptp_remove(efx);
 
 	efx_mcdi_mon_remove(efx);
@@ -704,6 +878,45 @@ static int efx_ef10_probe_pf(struct efx_nic *efx)
 	return efx_ef10_probe(efx);
 }
 
+int efx_ef10_vadaptor_query(struct efx_nic *efx, unsigned int port_id,
+			    u32 *port_flags, u32 *vadaptor_flags,
+			    unsigned int *vlan_tags)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_QUERY_IN_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_VADAPTOR_QUERY_OUT_LEN);
+	size_t outlen;
+	int rc;
+
+	if (nic_data->datapath_caps &
+	    (1 << MC_CMD_GET_CAPABILITIES_OUT_VADAPTOR_QUERY_LBN)) {
+		MCDI_SET_DWORD(inbuf, VADAPTOR_QUERY_IN_UPSTREAM_PORT_ID,
+			       port_id);
+
+		rc = efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_QUERY, inbuf, sizeof(inbuf),
+				  outbuf, sizeof(outbuf), &outlen);
+		if (rc)
+			return rc;
+
+		if (outlen < sizeof(outbuf)) {
+			rc = -EIO;
+			return rc;
+		}
+	}
+
+	if (port_flags)
+		*port_flags = MCDI_DWORD(outbuf, VADAPTOR_QUERY_OUT_PORT_FLAGS);
+	if (vadaptor_flags)
+		*vadaptor_flags =
+			MCDI_DWORD(outbuf, VADAPTOR_QUERY_OUT_VADAPTOR_FLAGS);
+	if (vlan_tags)
+		*vlan_tags =
+			MCDI_DWORD(outbuf,
+				   VADAPTOR_QUERY_OUT_NUM_AVAILABLE_VLAN_TAGS);
+
+	return 0;
+}
+
 int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN);
@@ -3040,15 +3253,55 @@ static int efx_ef10_filter_push(struct efx_nic *efx,
 	return rc;
 }
 
-static int efx_ef10_filter_rx_match_pri(struct efx_ef10_filter_table *table,
-					enum efx_filter_match_flags match_flags)
+static u32 efx_ef10_filter_mcdi_flags_from_spec(const struct efx_filter_spec *spec)
 {
+	unsigned int match_flags = spec->match_flags;
+	u32 mcdi_flags = 0;
+
+	if (match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) {
+		match_flags &= ~EFX_FILTER_MATCH_LOC_MAC_IG;
+		mcdi_flags |=
+			is_multicast_ether_addr(spec->loc_mac) ?
+			(1 << MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_MCAST_DST_LBN) :
+			(1 << MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_UCAST_DST_LBN);
+	}
+
+#define MAP_FILTER_TO_MCDI_FLAG(gen_flag, mcdi_field) {			\
+		unsigned int old_match_flags = match_flags;		\
+		match_flags &= ~EFX_FILTER_MATCH_ ## gen_flag;		\
+		if (match_flags != old_match_flags)			\
+			mcdi_flags |=					\
+				(1 << MC_CMD_FILTER_OP_IN_MATCH_ ##	\
+				 mcdi_field ## _LBN);			\
+	}
+	MAP_FILTER_TO_MCDI_FLAG(REM_HOST, SRC_IP);
+	MAP_FILTER_TO_MCDI_FLAG(LOC_HOST, DST_IP);
+	MAP_FILTER_TO_MCDI_FLAG(REM_MAC, SRC_MAC);
+	MAP_FILTER_TO_MCDI_FLAG(REM_PORT, SRC_PORT);
+	MAP_FILTER_TO_MCDI_FLAG(LOC_MAC, DST_MAC);
+	MAP_FILTER_TO_MCDI_FLAG(LOC_PORT, DST_PORT);
+	MAP_FILTER_TO_MCDI_FLAG(ETHER_TYPE, ETHER_TYPE);
+	MAP_FILTER_TO_MCDI_FLAG(INNER_VID, INNER_VLAN);
+	MAP_FILTER_TO_MCDI_FLAG(OUTER_VID, OUTER_VLAN);
+	MAP_FILTER_TO_MCDI_FLAG(IP_PROTO, IP_PROTO);
+#undef MAP_FILTER_TO_MCDI_FLAG
+
+	/* Did we map them all? */
+	WARN_ON_ONCE(match_flags);
+
+	return mcdi_flags;
+}
+
+static int efx_ef10_filter_pri(struct efx_ef10_filter_table *table,
+			       const struct efx_filter_spec *spec)
+{
+	u32 mcdi_flags = efx_ef10_filter_mcdi_flags_from_spec(spec);
 	unsigned int match_pri;
 
 	for (match_pri = 0;
 	     match_pri < table->rx_match_count;
 	     match_pri++)
-		if (table->rx_match_flags[match_pri] == match_flags)
+		if (table->rx_match_mcdi_flags[match_pri] == mcdi_flags)
 			return match_pri;
 
 	return -EPROTONOSUPPORT;
@@ -3074,7 +3327,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	    EFX_FILTER_FLAG_RX)
 		return -EINVAL;
 
-	rc = efx_ef10_filter_rx_match_pri(table, spec->match_flags);
+	rc = efx_ef10_filter_pri(table, spec);
 	if (rc < 0)
 		return rc;
 	match_pri = rc;
@@ -3313,7 +3566,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 	spec = efx_ef10_filter_entry_spec(table, filter_idx);
 	if (!spec ||
 	    (!by_index &&
-	     efx_ef10_filter_rx_match_pri(table, spec->match_flags) !=
+	     efx_ef10_filter_pri(table, spec) !=
 	     filter_id / HUNT_FILTER_TBL_ROWS)) {
 		rc = -ENOENT;
 		goto out_unlock;
@@ -3394,12 +3647,13 @@ static u32 efx_ef10_filter_get_unsafe_id(struct efx_nic *efx, u32 filter_id)
 	return filter_id % HUNT_FILTER_TBL_ROWS;
 }
 
-static int efx_ef10_filter_remove_unsafe(struct efx_nic *efx,
-					 enum efx_filter_priority priority,
-					 u32 filter_id)
+static void efx_ef10_filter_remove_unsafe(struct efx_nic *efx,
+					  enum efx_filter_priority priority,
+					  u32 filter_id)
 {
-	return efx_ef10_filter_remove_internal(efx, 1U << priority,
-					       filter_id, true);
+	if (filter_id == EFX_EF10_FILTER_ID_INVALID)
+		return;
+	efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id, true);
 }
 
 static int efx_ef10_filter_get_safe(struct efx_nic *efx,
@@ -3414,7 +3668,7 @@ static int efx_ef10_filter_get_safe(struct efx_nic *efx,
 	spin_lock_bh(&efx->filter_lock);
 	saved_spec = efx_ef10_filter_entry_spec(table, filter_idx);
 	if (saved_spec && saved_spec->priority == priority &&
-	    efx_ef10_filter_rx_match_pri(table, saved_spec->match_flags) ==
+	    efx_ef10_filter_pri(table, saved_spec) ==
 	    filter_id / HUNT_FILTER_TBL_ROWS) {
 		*spec = *saved_spec;
 		rc = 0;
@@ -3487,8 +3741,7 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
 				count = -EMSGSIZE;
 				break;
 			}
-			buf[count++] = (efx_ef10_filter_rx_match_pri(
-						table, spec->match_flags) *
+			buf[count++] = (efx_ef10_filter_pri(table, spec) *
 					HUNT_FILTER_TBL_ROWS +
 					filter_idx);
 		}
@@ -3724,15 +3977,58 @@ static int efx_ef10_filter_match_flags_from_mcdi(u32 mcdi_flags)
 	return match_flags;
 }
 
+static void efx_ef10_filter_cleanup_vlans(struct efx_nic *efx)
+{
+	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_vlan *vlan, *next_vlan;
+
+	/* See comment in efx_ef10_filter_table_remove() */
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return;
+
+	if (!table)
+		return;
+
+	list_for_each_entry_safe(vlan, next_vlan, &table->vlan_list, list)
+		efx_ef10_filter_del_vlan_internal(efx, vlan);
+}
+
+static bool efx_ef10_filter_match_supported(struct efx_ef10_filter_table *table,
+					    enum efx_filter_match_flags match_flags)
+{
+	unsigned int match_pri;
+	int mf;
+
+	for (match_pri = 0;
+	     match_pri < table->rx_match_count;
+	     match_pri++) {
+		mf = efx_ef10_filter_match_flags_from_mcdi(
+				table->rx_match_mcdi_flags[match_pri]);
+		if (mf == match_flags)
+			return true;
+	}
+
+	return false;
+}
+
 static int efx_ef10_filter_table_probe(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX);
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct net_device *net_dev = efx->net_dev;
 	unsigned int pd_match_pri, pd_match_count;
 	struct efx_ef10_filter_table *table;
+	struct efx_ef10_vlan *vlan;
 	size_t outlen;
 	int rc;
 
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return -EINVAL;
+
+	if (efx->filter_state) /* already probed */
+		return 0;
+
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
@@ -3765,24 +4061,48 @@ static int efx_ef10_filter_table_probe(struct efx_nic *efx)
 				  "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n",
 				  __func__, mcdi_flags, pd_match_pri,
 				  rc, table->rx_match_count);
-			table->rx_match_flags[table->rx_match_count++] = rc;
+			table->rx_match_mcdi_flags[table->rx_match_count] = mcdi_flags;
+			table->rx_match_count++;
 		}
 	}
 
+	if ((efx_supported_features(efx) & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    !(efx_ef10_filter_match_supported(table,
+		(EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC)) &&
+	      efx_ef10_filter_match_supported(table,
+		(EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC_IG)))) {
+		netif_info(efx, probe, net_dev,
+			   "VLAN filters are not supported in this firmware variant\n");
+		net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+		efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+		net_dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+	}
+
 	table->entry = vzalloc(HUNT_FILTER_TBL_ROWS * sizeof(*table->entry));
 	if (!table->entry) {
 		rc = -ENOMEM;
 		goto fail;
 	}
 
-	table->ucdef_id = EFX_EF10_FILTER_ID_INVALID;
-	table->bcast_id = EFX_EF10_FILTER_ID_INVALID;
-	table->mcdef_id = EFX_EF10_FILTER_ID_INVALID;
+	table->mc_promisc_last = false;
+	table->vlan_filter =
+		!!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
+	INIT_LIST_HEAD(&table->vlan_list);
 
 	efx->filter_state = table;
 	init_waitqueue_head(&table->waitq);
+
+	list_for_each_entry(vlan, &nic_data->vlan_list, list) {
+		rc = efx_ef10_filter_add_vlan(efx, vlan->vid);
+		if (rc)
+			goto fail_add_vlan;
+	}
+
 	return 0;
 
+fail_add_vlan:
+	efx_ef10_filter_cleanup_vlans(efx);
+	efx->filter_state = NULL;
 fail:
 	kfree(table);
 	return rc;
@@ -3843,7 +4163,6 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 		nic_data->must_restore_filters = false;
 }
 
-/* Caller must hold efx->filter_sem for write */
 static void efx_ef10_filter_table_remove(struct efx_nic *efx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
@@ -3852,7 +4171,17 @@ static void efx_ef10_filter_table_remove(struct efx_nic *efx)
 	unsigned int filter_idx;
 	int rc;
 
+	efx_ef10_filter_cleanup_vlans(efx);
 	efx->filter_state = NULL;
+	/* If we were called without locking, then it's not safe to free
+	 * the table as others might be using it.  So we just WARN, leak
+	 * the memory, and potentially get an inconsistent filter table
+	 * state.
+	 * This should never actually happen.
+	 */
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return;
+
 	if (!table)
 		return;
 
@@ -3880,37 +4209,54 @@ static void efx_ef10_filter_table_remove(struct efx_nic *efx)
 	kfree(table);
 }
 
-#define EFX_EF10_FILTER_DO_MARK_OLD(id) \
-	if (id != EFX_EF10_FILTER_ID_INVALID) { \
-		filter_idx = efx_ef10_filter_get_unsafe_id(efx, id); \
-		if (!table->entry[filter_idx].spec) \
-			netif_dbg(efx, drv, efx->net_dev, \
-				  "%s: marked null spec old %04x:%04x\n", \
-				  __func__, id, filter_idx); \
-		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD;\
-	}
-static void efx_ef10_filter_mark_old(struct efx_nic *efx)
+static void efx_ef10_filter_mark_one_old(struct efx_nic *efx, uint16_t *id)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
-	unsigned int filter_idx, i;
+	unsigned int filter_idx;
 
-	if (!table)
-		return;
+	if (*id != EFX_EF10_FILTER_ID_INVALID) {
+		filter_idx = efx_ef10_filter_get_unsafe_id(efx, *id);
+		if (!table->entry[filter_idx].spec)
+			netif_dbg(efx, drv, efx->net_dev,
+				  "marked null spec old %04x:%04x\n", *id,
+				  filter_idx);
+		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD;
+		*id = EFX_EF10_FILTER_ID_INVALID;
+	}
+}
+
+/* Mark old per-VLAN filters that may need to be removed */
+static void _efx_ef10_filter_vlan_mark_old(struct efx_nic *efx,
+					   struct efx_ef10_filter_vlan *vlan)
+{
+	struct efx_ef10_filter_table *table = efx->filter_state;
+	unsigned int i;
 
-	/* Mark old filters that may need to be removed */
-	spin_lock_bh(&efx->filter_lock);
 	for (i = 0; i < table->dev_uc_count; i++)
-		EFX_EF10_FILTER_DO_MARK_OLD(table->dev_uc_list[i].id);
+		efx_ef10_filter_mark_one_old(efx, &vlan->uc[i]);
 	for (i = 0; i < table->dev_mc_count; i++)
-		EFX_EF10_FILTER_DO_MARK_OLD(table->dev_mc_list[i].id);
-	EFX_EF10_FILTER_DO_MARK_OLD(table->ucdef_id);
-	EFX_EF10_FILTER_DO_MARK_OLD(table->bcast_id);
-	EFX_EF10_FILTER_DO_MARK_OLD(table->mcdef_id);
+		efx_ef10_filter_mark_one_old(efx, &vlan->mc[i]);
+	efx_ef10_filter_mark_one_old(efx, &vlan->ucdef);
+	efx_ef10_filter_mark_one_old(efx, &vlan->bcast);
+	efx_ef10_filter_mark_one_old(efx, &vlan->mcdef);
+}
+
+/* Mark old filters that may need to be removed.
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_ef10_filter_table_remove() is possible
+ */
+static void efx_ef10_filter_mark_old(struct efx_nic *efx)
+{
+	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_vlan *vlan;
+
+	spin_lock_bh(&efx->filter_lock);
+	list_for_each_entry(vlan, &table->vlan_list, list)
+		_efx_ef10_filter_vlan_mark_old(efx, vlan);
 	spin_unlock_bh(&efx->filter_lock);
 }
-#undef EFX_EF10_FILTER_DO_MARK_OLD
 
-static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx, bool *promisc)
+static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct net_device *net_dev = efx->net_dev;
@@ -3918,45 +4264,38 @@ static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx, bool *promisc)
 	int addr_count;
 	unsigned int i;
 
-	table->ucdef_id = EFX_EF10_FILTER_ID_INVALID;
 	addr_count = netdev_uc_count(net_dev);
-	if (net_dev->flags & IFF_PROMISC)
-		*promisc = true;
+	table->uc_promisc = !!(net_dev->flags & IFF_PROMISC);
 	table->dev_uc_count = 1 + addr_count;
 	ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr);
 	i = 1;
 	netdev_for_each_uc_addr(uc, net_dev) {
 		if (i >= EFX_EF10_FILTER_DEV_UC_MAX) {
-			*promisc = true;
+			table->uc_promisc = true;
 			break;
 		}
 		ether_addr_copy(table->dev_uc_list[i].addr, uc->addr);
-		table->dev_uc_list[i].id = EFX_EF10_FILTER_ID_INVALID;
 		i++;
 	}
 }
 
-static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx, bool *promisc)
+static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct net_device *net_dev = efx->net_dev;
 	struct netdev_hw_addr *mc;
 	unsigned int i, addr_count;
 
-	table->mcdef_id = EFX_EF10_FILTER_ID_INVALID;
-	table->bcast_id = EFX_EF10_FILTER_ID_INVALID;
-	if (net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI))
-		*promisc = true;
+	table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI));
 
 	addr_count = netdev_mc_count(net_dev);
 	i = 0;
 	netdev_for_each_mc_addr(mc, net_dev) {
 		if (i >= EFX_EF10_FILTER_DEV_MC_MAX) {
-			*promisc = true;
+			table->mc_promisc = true;
 			break;
 		}
 		ether_addr_copy(table->dev_mc_list[i].addr, mc->addr);
-		table->dev_mc_list[i].id = EFX_EF10_FILTER_ID_INVALID;
 		i++;
 	}
 
@@ -3964,7 +4303,8 @@ static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx, bool *promisc)
 }
 
 static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
-					     bool multicast, bool rollback)
+					    struct efx_ef10_filter_vlan *vlan,
+					    bool multicast, bool rollback)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct efx_ef10_dev_addr *addr_list;
@@ -3973,14 +4313,17 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 	u8 baddr[ETH_ALEN];
 	unsigned int i, j;
 	int addr_count;
+	u16 *ids;
 	int rc;
 
 	if (multicast) {
 		addr_list = table->dev_mc_list;
 		addr_count = table->dev_mc_count;
+		ids = vlan->mc;
 	} else {
 		addr_list = table->dev_uc_list;
 		addr_count = table->dev_uc_count;
+		ids = vlan->uc;
 	}
 
 	filter_flags = efx_rss_enabled(efx) ? EFX_FILTER_FLAG_RX_RSS : 0;
@@ -3988,8 +4331,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 	/* Insert/renew filters */
 	for (i = 0; i < addr_count; i++) {
 		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
-		efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC,
-					 addr_list[i].addr);
+		efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
 		rc = efx_ef10_filter_insert(efx, &spec, true);
 		if (rc < 0) {
 			if (rollback) {
@@ -3998,12 +4340,10 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 					   rc);
 				/* Fall back to promiscuous */
 				for (j = 0; j < i; j++) {
-					if (addr_list[j].id == EFX_EF10_FILTER_ID_INVALID)
-						continue;
 					efx_ef10_filter_remove_unsafe(
 						efx, EFX_FILTER_PRI_AUTO,
-						addr_list[j].id);
-					addr_list[j].id = EFX_EF10_FILTER_ID_INVALID;
+						ids[j]);
+					ids[j] = EFX_EF10_FILTER_ID_INVALID;
 				}
 				return rc;
 			} else {
@@ -4011,40 +4351,40 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 				rc = EFX_EF10_FILTER_ID_INVALID;
 			}
 		}
-		addr_list[i].id = efx_ef10_filter_get_unsafe_id(efx, rc);
+		ids[i] = efx_ef10_filter_get_unsafe_id(efx, rc);
 	}
 
 	if (multicast && rollback) {
 		/* Also need an Ethernet broadcast filter */
 		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
 		eth_broadcast_addr(baddr);
-		efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, baddr);
+		efx_filter_set_eth_local(&spec, vlan->vid, baddr);
 		rc = efx_ef10_filter_insert(efx, &spec, true);
 		if (rc < 0) {
 			netif_warn(efx, drv, efx->net_dev,
 				   "Broadcast filter insert failed rc=%d\n", rc);
 			/* Fall back to promiscuous */
 			for (j = 0; j < i; j++) {
-				if (addr_list[j].id == EFX_EF10_FILTER_ID_INVALID)
-					continue;
 				efx_ef10_filter_remove_unsafe(
 					efx, EFX_FILTER_PRI_AUTO,
-					addr_list[j].id);
-				addr_list[j].id = EFX_EF10_FILTER_ID_INVALID;
+					ids[j]);
+				ids[j] = EFX_EF10_FILTER_ID_INVALID;
 			}
 			return rc;
 		} else {
-			table->bcast_id = efx_ef10_filter_get_unsafe_id(efx, rc);
+			EFX_WARN_ON_PARANOID(vlan->bcast !=
+					     EFX_EF10_FILTER_ID_INVALID);
+			vlan->bcast = efx_ef10_filter_get_unsafe_id(efx, rc);
 		}
 	}
 
 	return 0;
 }
 
-static int efx_ef10_filter_insert_def(struct efx_nic *efx, bool multicast,
-				      bool rollback)
+static int efx_ef10_filter_insert_def(struct efx_nic *efx,
+				      struct efx_ef10_filter_vlan *vlan,
+				      bool multicast, bool rollback)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	enum efx_filter_flags filter_flags;
 	struct efx_filter_spec spec;
@@ -4060,6 +4400,9 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, bool multicast,
 	else
 		efx_filter_set_uc_def(&spec);
 
+	if (vlan->vid != EFX_FILTER_VID_UNSPEC)
+		efx_filter_set_eth_local(&spec, vlan->vid, NULL);
+
 	rc = efx_ef10_filter_insert(efx, &spec, true);
 	if (rc < 0) {
 		netif_printk(efx, drv, rc == -EPERM ? KERN_DEBUG : KERN_WARNING,
@@ -4067,14 +4410,14 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, bool multicast,
 			     "%scast mismatch filter insert failed rc=%d\n",
 			     multicast ? "Multi" : "Uni", rc);
 	} else if (multicast) {
-		table->mcdef_id = efx_ef10_filter_get_unsafe_id(efx, rc);
+		EFX_WARN_ON_PARANOID(vlan->mcdef != EFX_EF10_FILTER_ID_INVALID);
+		vlan->mcdef = efx_ef10_filter_get_unsafe_id(efx, rc);
 		if (!nic_data->workaround_26807) {
 			/* Also need an Ethernet broadcast filter */
 			efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
 					   filter_flags, 0);
 			eth_broadcast_addr(baddr);
-			efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC,
-						 baddr);
+			efx_filter_set_eth_local(&spec, vlan->vid, baddr);
 			rc = efx_ef10_filter_insert(efx, &spec, true);
 			if (rc < 0) {
 				netif_warn(efx, drv, efx->net_dev,
@@ -4084,17 +4427,20 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, bool multicast,
 					/* Roll back the mc_def filter */
 					efx_ef10_filter_remove_unsafe(
 							efx, EFX_FILTER_PRI_AUTO,
-							table->mcdef_id);
-					table->mcdef_id = EFX_EF10_FILTER_ID_INVALID;
+							vlan->mcdef);
+					vlan->mcdef = EFX_EF10_FILTER_ID_INVALID;
 					return rc;
 				}
 			} else {
-				table->bcast_id = efx_ef10_filter_get_unsafe_id(efx, rc);
+				EFX_WARN_ON_PARANOID(vlan->bcast !=
+						     EFX_EF10_FILTER_ID_INVALID);
+				vlan->bcast = efx_ef10_filter_get_unsafe_id(efx, rc);
 			}
 		}
 		rc = 0;
 	} else {
-		table->ucdef_id = rc;
+		EFX_WARN_ON_PARANOID(vlan->ucdef != EFX_EF10_FILTER_ID_INVALID);
+		vlan->ucdef = rc;
 		rc = 0;
 	}
 	return rc;
@@ -4203,64 +4549,55 @@ reset_nic:
 /* Caller must hold efx->filter_sem for read if race against
  * efx_ef10_filter_table_remove() is possible
  */
-static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx)
+static void efx_ef10_filter_vlan_sync_rx_mode(struct efx_nic *efx,
+					      struct efx_ef10_filter_vlan *vlan)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	struct net_device *net_dev = efx->net_dev;
-	bool uc_promisc = false, mc_promisc = false;
 
-	if (!efx_dev_registered(efx))
-		return;
-
-	if (!table)
-		return;
-
-	efx_ef10_filter_mark_old(efx);
-
-	/* Copy/convert the address lists; add the primary station
-	 * address and broadcast address
+	/* Do not install unspecified VID if VLAN filtering is enabled.
+	 * Do not install all specified VIDs if VLAN filtering is disabled.
 	 */
-	netif_addr_lock_bh(net_dev);
-	efx_ef10_filter_uc_addr_list(efx, &uc_promisc);
-	efx_ef10_filter_mc_addr_list(efx, &mc_promisc);
-	netif_addr_unlock_bh(net_dev);
+	if ((vlan->vid == EFX_FILTER_VID_UNSPEC) == table->vlan_filter)
+		return;
 
 	/* Insert/renew unicast filters */
-	if (uc_promisc) {
-		efx_ef10_filter_insert_def(efx, false, false);
-		efx_ef10_filter_insert_addr_list(efx, false, false);
+	if (table->uc_promisc) {
+		efx_ef10_filter_insert_def(efx, vlan, false, false);
+		efx_ef10_filter_insert_addr_list(efx, vlan, false, false);
 	} else {
 		/* If any of the filters failed to insert, fall back to
 		 * promiscuous mode - add in the uc_def filter.  But keep
 		 * our individual unicast filters.
 		 */
-		if (efx_ef10_filter_insert_addr_list(efx, false, false))
-			efx_ef10_filter_insert_def(efx, false, false);
+		if (efx_ef10_filter_insert_addr_list(efx, vlan, false, false))
+			efx_ef10_filter_insert_def(efx, vlan, false, false);
 	}
 
 	/* Insert/renew multicast filters */
 	/* If changing promiscuous state with cascaded multicast filters, remove
 	 * old filters first, so that packets are dropped rather than duplicated
 	 */
-	if (nic_data->workaround_26807 && efx->mc_promisc != mc_promisc)
+	if (nic_data->workaround_26807 &&
+	    table->mc_promisc_last != table->mc_promisc)
 		efx_ef10_filter_remove_old(efx);
-	if (mc_promisc) {
+	if (table->mc_promisc) {
 		if (nic_data->workaround_26807) {
 			/* If we failed to insert promiscuous filters, rollback
 			 * and fall back to individual multicast filters
 			 */
-			if (efx_ef10_filter_insert_def(efx, true, true)) {
+			if (efx_ef10_filter_insert_def(efx, vlan, true, true)) {
 				/* Changing promisc state, so remove old filters */
 				efx_ef10_filter_remove_old(efx);
-				efx_ef10_filter_insert_addr_list(efx, true, false);
+				efx_ef10_filter_insert_addr_list(efx, vlan,
+								 true, false);
 			}
 		} else {
 			/* If we failed to insert promiscuous filters, don't
 			 * rollback.  Regardless, also insert the mc_list
 			 */
-			efx_ef10_filter_insert_def(efx, true, false);
-			efx_ef10_filter_insert_addr_list(efx, true, false);
+			efx_ef10_filter_insert_def(efx, vlan, true, false);
+			efx_ef10_filter_insert_addr_list(efx, vlan, true, false);
 		}
 	} else {
 		/* If any filters failed to insert, rollback and fall back to
@@ -4268,17 +4605,153 @@ static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx)
 		 * that fails, roll back again and insert as many of our
 		 * individual multicast filters as we can.
 		 */
-		if (efx_ef10_filter_insert_addr_list(efx, true, true)) {
+		if (efx_ef10_filter_insert_addr_list(efx, vlan, true, true)) {
 			/* Changing promisc state, so remove old filters */
 			if (nic_data->workaround_26807)
 				efx_ef10_filter_remove_old(efx);
-			if (efx_ef10_filter_insert_def(efx, true, true))
-				efx_ef10_filter_insert_addr_list(efx, true, false);
+			if (efx_ef10_filter_insert_def(efx, vlan, true, true))
+				efx_ef10_filter_insert_addr_list(efx, vlan,
+								 true, false);
 		}
 	}
+}
+
+/* Caller must hold efx->filter_sem for read if race against
+ * efx_ef10_filter_table_remove() is possible
+ */
+static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx)
+{
+	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct net_device *net_dev = efx->net_dev;
+	struct efx_ef10_filter_vlan *vlan;
+	bool vlan_filter;
+
+	if (!efx_dev_registered(efx))
+		return;
+
+	if (!table)
+		return;
+
+	efx_ef10_filter_mark_old(efx);
+
+	/* Copy/convert the address lists; add the primary station
+	 * address and broadcast address
+	 */
+	netif_addr_lock_bh(net_dev);
+	efx_ef10_filter_uc_addr_list(efx);
+	efx_ef10_filter_mc_addr_list(efx);
+	netif_addr_unlock_bh(net_dev);
+
+	/* If VLAN filtering changes, all old filters are finally removed.
+	 * Do it in advance to avoid conflicts for unicast untagged and
+	 * VLAN 0 tagged filters.
+	 */
+	vlan_filter = !!(net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
+	if (table->vlan_filter != vlan_filter) {
+		table->vlan_filter = vlan_filter;
+		efx_ef10_filter_remove_old(efx);
+	}
+
+	list_for_each_entry(vlan, &table->vlan_list, list)
+		efx_ef10_filter_vlan_sync_rx_mode(efx, vlan);
 
 	efx_ef10_filter_remove_old(efx);
-	efx->mc_promisc = mc_promisc;
+	table->mc_promisc_last = table->mc_promisc;
+}
+
+static struct efx_ef10_filter_vlan *efx_ef10_filter_find_vlan(struct efx_nic *efx, u16 vid)
+{
+	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_vlan *vlan;
+
+	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
+
+	list_for_each_entry(vlan, &table->vlan_list, list) {
+		if (vlan->vid == vid)
+			return vlan;
+	}
+
+	return NULL;
+}
+
+static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid)
+{
+	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_vlan *vlan;
+	unsigned int i;
+
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return -EINVAL;
+
+	vlan = efx_ef10_filter_find_vlan(efx, vid);
+	if (WARN_ON(vlan)) {
+		netif_err(efx, drv, efx->net_dev,
+			  "VLAN %u already added\n", vid);
+		return -EALREADY;
+	}
+
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return -ENOMEM;
+
+	vlan->vid = vid;
+
+	for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
+		vlan->uc[i] = EFX_EF10_FILTER_ID_INVALID;
+	for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
+		vlan->mc[i] = EFX_EF10_FILTER_ID_INVALID;
+	vlan->ucdef = EFX_EF10_FILTER_ID_INVALID;
+	vlan->bcast = EFX_EF10_FILTER_ID_INVALID;
+	vlan->mcdef = EFX_EF10_FILTER_ID_INVALID;
+
+	list_add_tail(&vlan->list, &table->vlan_list);
+
+	if (efx_dev_registered(efx))
+		efx_ef10_filter_vlan_sync_rx_mode(efx, vlan);
+
+	return 0;
+}
+
+static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx,
+					      struct efx_ef10_filter_vlan *vlan)
+{
+	unsigned int i;
+
+	/* See comment in efx_ef10_filter_table_remove() */
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return;
+
+	list_del(&vlan->list);
+
+	for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
+		efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
+					      vlan->uc[i]);
+	for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
+		efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
+					      vlan->mc[i]);
+	efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, vlan->ucdef);
+	efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, vlan->bcast);
+	efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, vlan->mcdef);
+
+	kfree(vlan);
+}
+
+static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid)
+{
+	struct efx_ef10_filter_vlan *vlan;
+
+	/* See comment in efx_ef10_filter_table_remove() */
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return;
+
+	vlan = efx_ef10_filter_find_vlan(efx, vid);
+	if (!vlan) {
+		netif_err(efx, drv, efx->net_dev,
+			  "VLAN %u not found in filter state\n", vid);
+		return;
+	}
+
+	efx_ef10_filter_del_vlan_internal(efx, vlan);
 }
 
 static int efx_ef10_set_mac_address(struct efx_nic *efx)
@@ -4290,6 +4763,8 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 
 	efx_device_detach_sync(efx);
 	efx_net_stop(efx->net_dev);
+
+	mutex_lock(&efx->mac_lock);
 	down_write(&efx->filter_sem);
 	efx_ef10_filter_table_remove(efx);
 
@@ -4302,6 +4777,8 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 
 	efx_ef10_filter_table_probe(efx);
 	up_write(&efx->filter_sem);
+	mutex_unlock(&efx->mac_lock);
+
 	if (was_enabled)
 		efx_net_open(efx->net_dev);
 	netif_device_attach(efx->net_dev);
@@ -4703,6 +5180,29 @@ static int efx_ef10_ptp_set_ts_config(struct efx_nic *efx,
 	}
 }
 
+static int efx_ef10_vlan_rx_add_vid(struct efx_nic *efx, __be16 proto, u16 vid)
+{
+	if (proto != htons(ETH_P_8021Q))
+		return -EINVAL;
+
+	return efx_ef10_add_vlan(efx, vid);
+}
+
+static int efx_ef10_vlan_rx_kill_vid(struct efx_nic *efx, __be16 proto, u16 vid)
+{
+	if (proto != htons(ETH_P_8021Q))
+		return -EINVAL;
+
+	return efx_ef10_del_vlan(efx, vid);
+}
+
+#define EF10_OFFLOAD_FEATURES		\
+	(NETIF_F_IP_CSUM |		\
+	 NETIF_F_HW_VLAN_CTAG_FILTER |	\
+	 NETIF_F_IPV6_CSUM |		\
+	 NETIF_F_RXHASH |		\
+	 NETIF_F_NTUPLE)
+
 const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.is_vf = true,
 	.mem_bar = EFX_MEM_VF_BAR,
@@ -4780,6 +5280,8 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 #endif
 	.ptp_write_host_time = efx_ef10_ptp_write_host_time_vf,
 	.ptp_set_ts_config = efx_ef10_ptp_set_ts_config_vf,
+	.vlan_rx_add_vid = efx_ef10_vlan_rx_add_vid,
+	.vlan_rx_kill_vid = efx_ef10_vlan_rx_kill_vid,
 #ifdef CONFIG_SFC_SRIOV
 	.vswitching_probe = efx_ef10_vswitching_probe_vf,
 	.vswitching_restore = efx_ef10_vswitching_restore_vf,
@@ -4798,8 +5300,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.always_rx_scatter = true,
 	.max_interrupt_mode = EFX_INT_MODE_MSIX,
 	.timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH,
-	.offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-			     NETIF_F_RXHASH | NETIF_F_NTUPLE),
+	.offload_features = EF10_OFFLOAD_FEATURES,
 	.mcdi_max_ver = 2,
 	.max_rx_ip_filters = HUNT_FILTER_TBL_ROWS,
 	.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
@@ -4891,6 +5392,8 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.ptp_write_host_time = efx_ef10_ptp_write_host_time,
 	.ptp_set_ts_sync_events = efx_ef10_ptp_set_ts_sync_events,
 	.ptp_set_ts_config = efx_ef10_ptp_set_ts_config,
+	.vlan_rx_add_vid = efx_ef10_vlan_rx_add_vid,
+	.vlan_rx_kill_vid = efx_ef10_vlan_rx_kill_vid,
 #ifdef CONFIG_SFC_SRIOV
 	.sriov_configure = efx_ef10_sriov_configure,
 	.sriov_init = efx_ef10_sriov_init,
@@ -4919,8 +5422,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.always_rx_scatter = true,
 	.max_interrupt_mode = EFX_INT_MODE_MSIX,
 	.timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH,
-	.offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-			     NETIF_F_RXHASH | NETIF_F_NTUPLE),
+	.offload_features = EF10_OFFLOAD_FEATURES,
 	.mcdi_max_ver = 2,
 	.max_rx_ip_filters = HUNT_FILTER_TBL_ROWS,
 	.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 3c17f274e802..a949b9d27329 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -232,6 +232,35 @@ fail:
 	return rc;
 }
 
+static int efx_ef10_vadaptor_alloc_set_features(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	u32 port_flags;
+	int rc;
+
+	rc = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id);
+	if (rc)
+		goto fail_vadaptor_alloc;
+
+	rc = efx_ef10_vadaptor_query(efx, nic_data->vport_id,
+				     &port_flags, NULL, NULL);
+	if (rc)
+		goto fail_vadaptor_query;
+
+	if (port_flags &
+	    (1 << MC_CMD_VPORT_ALLOC_IN_FLAG_VLAN_RESTRICT_LBN))
+		efx->fixed_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	else
+		efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	return 0;
+
+fail_vadaptor_query:
+	efx_ef10_vadaptor_free(efx, EVB_PORT_ID_ASSIGNED);
+fail_vadaptor_alloc:
+	return rc;
+}
+
 /* On top of the default firmware vswitch setup, create a VEB vswitch and
  * expansion vport for use by this function.
  */
@@ -243,7 +272,7 @@ int efx_ef10_vswitching_probe_pf(struct efx_nic *efx)
 
 	if (pci_sriov_get_totalvfs(efx->pci_dev) <= 0) {
 		/* vswitch not needed as we have no VFs */
-		efx_ef10_vadaptor_alloc(efx, nic_data->vport_id);
+		efx_ef10_vadaptor_alloc_set_features(efx);
 		return 0;
 	}
 
@@ -263,7 +292,7 @@ int efx_ef10_vswitching_probe_pf(struct efx_nic *efx)
 		goto fail3;
 	ether_addr_copy(nic_data->vport_mac, net_dev->dev_addr);
 
-	rc = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id);
+	rc = efx_ef10_vadaptor_alloc_set_features(efx);
 	if (rc)
 		goto fail4;
 
@@ -282,9 +311,7 @@ fail1:
 
 int efx_ef10_vswitching_probe_vf(struct efx_nic *efx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
-	return efx_ef10_vadaptor_alloc(efx, nic_data->vport_id);
+	return efx_ef10_vadaptor_alloc_set_features(efx);
 }
 
 int efx_ef10_vswitching_restore_pf(struct efx_nic *efx)
@@ -554,6 +581,7 @@ int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan,
 		efx_device_detach_sync(vf->efx);
 		efx_net_stop(vf->efx->net_dev);
 
+		mutex_lock(&vf->efx->mac_lock);
 		down_write(&vf->efx->filter_sem);
 		vf->efx->type->filter_table_remove(vf->efx);
 
@@ -630,6 +658,7 @@ restore_filters:
 			goto reset_nic_up_write;
 
 		up_write(&vf->efx->filter_sem);
+		mutex_unlock(&vf->efx->mac_lock);
 
 		up_write(&vf->efx->filter_sem);
 
@@ -642,9 +671,10 @@ restore_filters:
 	return rc;
 
 reset_nic_up_write:
-	if (vf->efx)
+	if (vf->efx) {
 		up_write(&vf->efx->filter_sem);
-
+		mutex_unlock(&vf->efx->mac_lock);
+	}
 reset_nic:
 	if (vf->efx) {
 		netif_err(efx, drv, efx->net_dev,
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h
index 6d25b92cb45e..9ceb7ef0a210 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.h
+++ b/drivers/net/ethernet/sfc/ef10_sriov.h
@@ -70,6 +70,9 @@ int efx_ef10_vport_add_mac(struct efx_nic *efx,
 int efx_ef10_vport_del_mac(struct efx_nic *efx,
 			   unsigned int port_id, u8 *mac);
 int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id);
+int efx_ef10_vadaptor_query(struct efx_nic *efx, unsigned int port_id,
+			    u32 *port_flags, u32 *vadaptor_flags,
+			    unsigned int *vlan_tags);
 int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id);
 
 #endif /* EF10_SRIOV_H */
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 097f363f1630..14b821b1c880 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -600,6 +600,7 @@ fail:
  */
 static void efx_start_datapath(struct efx_nic *efx)
 {
+	netdev_features_t old_features = efx->net_dev->features;
 	bool old_rx_scatter = efx->rx_scatter;
 	struct efx_tx_queue *tx_queue;
 	struct efx_rx_queue *rx_queue;
@@ -644,6 +645,15 @@ static void efx_start_datapath(struct efx_nic *efx)
 			  efx->rx_dma_len, efx->rx_page_buf_step,
 			  efx->rx_bufs_per_page, efx->rx_pages_per_batch);
 
+	/* Restore previously fixed features in hw_features and remove
+	 * features which are fixed now
+	 */
+	efx->net_dev->hw_features |= efx->net_dev->features;
+	efx->net_dev->hw_features &= ~efx->fixed_features;
+	efx->net_dev->features |= efx->fixed_features;
+	if (efx->net_dev->features != old_features)
+		netdev_features_change(efx->net_dev);
+
 	/* RX filters may also have scatter-enabled flags */
 	if (efx->rx_scatter != old_rx_scatter)
 		efx->type->filter_update_rx_scatter(efx);
@@ -1719,6 +1729,7 @@ static int efx_probe_filters(struct efx_nic *efx)
 
 	spin_lock_init(&efx->filter_lock);
 	init_rwsem(&efx->filter_sem);
+	mutex_lock(&efx->mac_lock);
 	down_write(&efx->filter_sem);
 	rc = efx->type->filter_table_probe(efx);
 	if (rc)
@@ -1757,6 +1768,7 @@ static int efx_probe_filters(struct efx_nic *efx)
 #endif
 out_unlock:
 	up_write(&efx->filter_sem);
+	mutex_unlock(&efx->mac_lock);
 	return rc;
 }
 
@@ -2312,14 +2324,46 @@ static void efx_set_rx_mode(struct net_device *net_dev)
 static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
 
 	/* If disabling RX n-tuple filtering, clear existing filters */
-	if (net_dev->features & ~data & NETIF_F_NTUPLE)
-		return efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
+	if (net_dev->features & ~data & NETIF_F_NTUPLE) {
+		rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
+		if (rc)
+			return rc;
+	}
+
+	/* If Rx VLAN filter is changed, update filters via mac_reconfigure */
+	if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) {
+		/* efx_set_rx_mode() will schedule MAC work to update filters
+		 * when a new features are finally set in net_dev.
+		 */
+		efx_set_rx_mode(net_dev);
+	}
 
 	return 0;
 }
 
+static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	if (efx->type->vlan_rx_add_vid)
+		return efx->type->vlan_rx_add_vid(efx, proto, vid);
+	else
+		return -EOPNOTSUPP;
+}
+
+static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	if (efx->type->vlan_rx_kill_vid)
+		return efx->type->vlan_rx_kill_vid(efx, proto, vid);
+	else
+		return -EOPNOTSUPP;
+}
+
 static const struct net_device_ops efx_netdev_ops = {
 	.ndo_open		= efx_net_open,
 	.ndo_stop		= efx_net_stop,
@@ -2332,6 +2376,8 @@ static const struct net_device_ops efx_netdev_ops = {
 	.ndo_set_mac_address	= efx_set_mac_address,
 	.ndo_set_rx_mode	= efx_set_rx_mode,
 	.ndo_set_features	= efx_set_features,
+	.ndo_vlan_rx_add_vid	= efx_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= efx_vlan_rx_kill_vid,
 #ifdef CONFIG_SFC_SRIOV
 	.ndo_set_vf_mac		= efx_sriov_set_vf_mac,
 	.ndo_set_vf_vlan	= efx_sriov_set_vf_vlan,
@@ -3147,17 +3193,25 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 		return -ENOMEM;
 	efx = netdev_priv(net_dev);
 	efx->type = (const struct efx_nic_type *) entry->driver_data;
+	efx->fixed_features |= NETIF_F_HIGHDMA;
 	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
-			      NETIF_F_HIGHDMA | NETIF_F_TSO |
-			      NETIF_F_RXCSUM);
+			      NETIF_F_TSO | NETIF_F_RXCSUM);
 	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		net_dev->features |= NETIF_F_TSO6;
 	/* Mask for features that also apply to VLAN devices */
 	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
 				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
 				   NETIF_F_RXCSUM);
-	/* All offloads can be toggled */
-	net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA;
+
+	net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+
+	/* Disable VLAN filtering by default.  It may be enforced if
+	 * the feature is fixed (i.e. VLAN filters are required to
+	 * receive VLAN tagged packets due to vPort restrictions).
+	 */
+	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+	net_dev->features |= efx->fixed_features;
+
 	pci_set_drvdata(pci_dev, efx);
 	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
 	rc = efx_init_struct(efx, pci_dev, net_dev);
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 5e3f93f04e62..c3ae739e9c7a 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -274,4 +274,13 @@ static inline void efx_device_detach_sync(struct efx_nic *efx)
 	netif_tx_unlock_bh(dev);
 }
 
+static inline bool efx_rwsem_assert_write_locked(struct rw_semaphore *sem)
+{
+	if (WARN_ON(down_read_trylock(sem))) {
+		up_read(sem);
+		return false;
+	}
+	return true;
+}
+
 #endif /* EFX_EFX_H */
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 133e9e35be9e..4c83739d158f 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -104,7 +104,8 @@ int efx_farch_test_registers(struct efx_nic *efx,
 			     const struct efx_farch_register_test *regs,
 			     size_t n_regs)
 {
-	unsigned address = 0, i, j;
+	unsigned address = 0;
+	int i, j;
 	efx_oword_t mask, imask, original, reg, buf;
 
 	for (i = 0; i < n_regs; ++i) {
diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index 4cc772164a79..c9a5b003caaf 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
@@ -273,6 +273,9 @@
  * have already installed filters. See the comment at
  * MC_CMD_WORKAROUND_BUG26807. */
 #define MC_CMD_ERR_FILTERS_PRESENT 0x1014
+/* The clock whose frequency you've attempted to set set
+ * doesn't exist on this NIC */
+#define MC_CMD_ERR_NO_CLOCK 0x1015
 
 #define MC_CMD_ERR_CODE_OFST 0
 
@@ -292,9 +295,11 @@
 /* Point to the copycode entry point. */
 #define SIENA_MC_BOOTROM_COPYCODE_VEC (0x800 - 3 * 0x4)
 #define HUNT_MC_BOOTROM_COPYCODE_VEC (0x8000 - 3 * 0x4)
+#define MEDFORD_MC_BOOTROM_COPYCODE_VEC (0x10000 - 3 * 0x4)
 /* Points to the recovery mode entry point. */
 #define SIENA_MC_BOOTROM_NOFLASH_VEC (0x800 - 2 * 0x4)
 #define HUNT_MC_BOOTROM_NOFLASH_VEC (0x8000 - 2 * 0x4)
+#define MEDFORD_MC_BOOTROM_NOFLASH_VEC (0x10000 - 2 * 0x4)
 
 /* The command set exported by the boot ROM (MCDI v0) */
 #define MC_CMD_GET_VERSION_V0_SUPPORTED_FUNCS {		\
@@ -686,6 +691,12 @@
 #define          FCDI_EVENT_CODE_PTP_STATUS 0x9
 /* enum: Port id config to map MC-FC port idx */
 #define          FCDI_EVENT_CODE_PORT_CONFIG 0xa
+/* enum: Boot result or error code */
+#define          FCDI_EVENT_CODE_BOOT_RESULT 0xb
+#define       FCDI_EVENT_REBOOT_SRC_LBN 36
+#define       FCDI_EVENT_REBOOT_SRC_WIDTH 8
+#define          FCDI_EVENT_REBOOT_FC_FW 0x0 /* enum */
+#define          FCDI_EVENT_REBOOT_FC_BOOTLOADER 0x1 /* enum */
 #define       FCDI_EVENT_ASSERT_INSTR_ADDRESS_OFST 0
 #define       FCDI_EVENT_ASSERT_INSTR_ADDRESS_LBN 0
 #define       FCDI_EVENT_ASSERT_INSTR_ADDRESS_WIDTH 32
@@ -717,6 +728,11 @@
 #define       FCDI_EVENT_PORT_CONFIG_DATA_OFST 0
 #define       FCDI_EVENT_PORT_CONFIG_DATA_LBN 0
 #define       FCDI_EVENT_PORT_CONFIG_DATA_WIDTH 32
+#define       FCDI_EVENT_BOOT_RESULT_OFST 0
+/*            Enum values, see field(s): */
+/*               MC_CMD_AOE/MC_CMD_AOE_OUT_INFO/FC_BOOT_RESULT */
+#define       FCDI_EVENT_BOOT_RESULT_LBN 0
+#define       FCDI_EVENT_BOOT_RESULT_WIDTH 32
 
 /* FCDI_EXTENDED_EVENT_PPS structuredef: Extended FCDI event to send PPS events
  * to the MC. Note that this structure | is overlayed over a normal FCDI event
@@ -1649,15 +1665,30 @@
 
 /* MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS msgresponse */
 #define    MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_LEN 16
-/* Uncorrected error on transmit timestamps in NIC clock format */
+/* Uncorrected error on PTP transmit timestamps in NIC clock format */
 #define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_TRANSMIT_OFST 0
-/* Uncorrected error on receive timestamps in NIC clock format */
+/* Uncorrected error on PTP receive timestamps in NIC clock format */
 #define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_RECEIVE_OFST 4
 /* Uncorrected error on PPS output in NIC clock format */
 #define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_OUT_OFST 8
 /* Uncorrected error on PPS input in NIC clock format */
 #define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_IN_OFST 12
 
+/* MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2 msgresponse */
+#define    MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN 24
+/* Uncorrected error on PTP transmit timestamps in NIC clock format */
+#define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_PTP_TX_OFST 0
+/* Uncorrected error on PTP receive timestamps in NIC clock format */
+#define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_PTP_RX_OFST 4
+/* Uncorrected error on PPS output in NIC clock format */
+#define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_PPS_OUT_OFST 8
+/* Uncorrected error on PPS input in NIC clock format */
+#define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_PPS_IN_OFST 12
+/* Uncorrected error on non-PTP transmit timestamps in NIC clock format */
+#define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_TX_OFST 16
+/* Uncorrected error on non-PTP receive timestamps in NIC clock format */
+#define       MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_RX_OFST 20
+
 /* MC_CMD_PTP_OUT_MANFTEST_PPS msgresponse */
 #define    MC_CMD_PTP_OUT_MANFTEST_PPS_LEN 4
 /* Results of testing */
@@ -2158,8 +2189,12 @@
 
 /* MC_CMD_DRV_ATTACH_IN msgrequest */
 #define    MC_CMD_DRV_ATTACH_IN_LEN 12
-/* new state (0=detached, 1=attached) to set if UPDATE=1 */
+/* new state to set if UPDATE=1 */
 #define       MC_CMD_DRV_ATTACH_IN_NEW_STATE_OFST 0
+#define        MC_CMD_DRV_ATTACH_LBN 0
+#define        MC_CMD_DRV_ATTACH_WIDTH 1
+#define        MC_CMD_DRV_PREBOOT_LBN 1
+#define        MC_CMD_DRV_PREBOOT_WIDTH 1
 /* 1 to set new state, or 0 to just report the existing state */
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_OFST 4
 /* preferred datapath firmware (for Huntington; ignored for Siena) */
@@ -2181,12 +2216,12 @@
 
 /* MC_CMD_DRV_ATTACH_OUT msgresponse */
 #define    MC_CMD_DRV_ATTACH_OUT_LEN 4
-/* previous or existing state (0=detached, 1=attached) */
+/* previous or existing state, see the bitmask at NEW_STATE */
 #define       MC_CMD_DRV_ATTACH_OUT_OLD_STATE_OFST 0
 
 /* MC_CMD_DRV_ATTACH_EXT_OUT msgresponse */
 #define    MC_CMD_DRV_ATTACH_EXT_OUT_LEN 8
-/* previous or existing state (0=detached, 1=attached) */
+/* previous or existing state, see the bitmask at NEW_STATE */
 #define       MC_CMD_DRV_ATTACH_EXT_OUT_OLD_STATE_OFST 0
 /* Flags associated with this function */
 #define       MC_CMD_DRV_ATTACH_EXT_OUT_FUNC_FLAGS_OFST 4
@@ -2198,6 +2233,10 @@
 #define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL 0x1
 /* enum: The function can perform privileged operations */
 #define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED 0x2
+/* enum: The function does not have an active port associated with it. The port
+ * refers to the Sorrento external FPGA port.
+ */
+#define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_NO_ACTIVE_PORT 0x3
 
 
 /***********************************/
@@ -2892,7 +2931,7 @@
  */
 #define MC_CMD_SET_MAC 0x2c
 
-#define MC_CMD_0x2c_PRIVILEGE_CTG SRIOV_CTG_LINK
+#define MC_CMD_0x2c_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_SET_MAC_IN msgrequest */
 #define    MC_CMD_SET_MAC_IN_LEN 28
@@ -2927,9 +2966,66 @@
 #define        MC_CMD_SET_MAC_IN_FLAG_INCLUDE_FCS_LBN 0
 #define        MC_CMD_SET_MAC_IN_FLAG_INCLUDE_FCS_WIDTH 1
 
+/* MC_CMD_SET_MAC_EXT_IN msgrequest */
+#define    MC_CMD_SET_MAC_EXT_IN_LEN 32
+/* The MTU is the MTU programmed directly into the XMAC/GMAC (inclusive of
+ * EtherII, VLAN, bug16011 padding).
+ */
+#define       MC_CMD_SET_MAC_EXT_IN_MTU_OFST 0
+#define       MC_CMD_SET_MAC_EXT_IN_DRAIN_OFST 4
+#define       MC_CMD_SET_MAC_EXT_IN_ADDR_OFST 8
+#define       MC_CMD_SET_MAC_EXT_IN_ADDR_LEN 8
+#define       MC_CMD_SET_MAC_EXT_IN_ADDR_LO_OFST 8
+#define       MC_CMD_SET_MAC_EXT_IN_ADDR_HI_OFST 12
+#define       MC_CMD_SET_MAC_EXT_IN_REJECT_OFST 16
+#define        MC_CMD_SET_MAC_EXT_IN_REJECT_UNCST_LBN 0
+#define        MC_CMD_SET_MAC_EXT_IN_REJECT_UNCST_WIDTH 1
+#define        MC_CMD_SET_MAC_EXT_IN_REJECT_BRDCST_LBN 1
+#define        MC_CMD_SET_MAC_EXT_IN_REJECT_BRDCST_WIDTH 1
+#define       MC_CMD_SET_MAC_EXT_IN_FCNTL_OFST 20
+/* enum: Flow control is off. */
+/*               MC_CMD_FCNTL_OFF 0x0 */
+/* enum: Respond to flow control. */
+/*               MC_CMD_FCNTL_RESPOND 0x1 */
+/* enum: Respond to and Issue flow control. */
+/*               MC_CMD_FCNTL_BIDIR 0x2 */
+/* enum: Auto neg flow control. */
+/*               MC_CMD_FCNTL_AUTO 0x3 */
+/* enum: Priority flow control (eftest builds only). */
+/*               MC_CMD_FCNTL_QBB 0x4 */
+/* enum: Issue flow control. */
+/*               MC_CMD_FCNTL_GENERATE 0x5 */
+#define       MC_CMD_SET_MAC_EXT_IN_FLAGS_OFST 24
+#define        MC_CMD_SET_MAC_EXT_IN_FLAG_INCLUDE_FCS_LBN 0
+#define        MC_CMD_SET_MAC_EXT_IN_FLAG_INCLUDE_FCS_WIDTH 1
+/* Select which parameters to configure. A parameter will only be modified if
+ * the corresponding control flag is set. If SET_MAC_ENHANCED is not set in
+ * capabilities then this field is ignored (and all flags are assumed to be
+ * set).
+ */
+#define       MC_CMD_SET_MAC_EXT_IN_CONTROL_OFST 28
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_MTU_LBN 0
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_MTU_WIDTH 1
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_DRAIN_LBN 1
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_DRAIN_WIDTH 1
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_REJECT_LBN 2
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_REJECT_WIDTH 1
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_FCNTL_LBN 3
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_FCNTL_WIDTH 1
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_FCS_LBN 4
+#define        MC_CMD_SET_MAC_EXT_IN_CFG_FCS_WIDTH 1
+
 /* MC_CMD_SET_MAC_OUT msgresponse */
 #define    MC_CMD_SET_MAC_OUT_LEN 0
 
+/* MC_CMD_SET_MAC_V2_OUT msgresponse */
+#define    MC_CMD_SET_MAC_V2_OUT_LEN 4
+/* MTU as configured after processing the request. See comment at
+ * MC_CMD_SET_MAC_IN/MTU. To query MTU without doing any changes, set CONTROL
+ * to 0.
+ */
+#define       MC_CMD_SET_MAC_V2_OUT_MTU_OFST 0
+
 
 /***********************************/
 /* MC_CMD_PHY_STATS
@@ -3521,6 +3617,26 @@
 #define       MC_CMD_NVRAM_INFO_OUT_PHYSDEV_OFST 16
 #define       MC_CMD_NVRAM_INFO_OUT_PHYSADDR_OFST 20
 
+/* MC_CMD_NVRAM_INFO_V2_OUT msgresponse */
+#define    MC_CMD_NVRAM_INFO_V2_OUT_LEN 28
+#define       MC_CMD_NVRAM_INFO_V2_OUT_TYPE_OFST 0
+/*            Enum values, see field(s): */
+/*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
+#define       MC_CMD_NVRAM_INFO_V2_OUT_SIZE_OFST 4
+#define       MC_CMD_NVRAM_INFO_V2_OUT_ERASESIZE_OFST 8
+#define       MC_CMD_NVRAM_INFO_V2_OUT_FLAGS_OFST 12
+#define        MC_CMD_NVRAM_INFO_V2_OUT_PROTECTED_LBN 0
+#define        MC_CMD_NVRAM_INFO_V2_OUT_PROTECTED_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_LBN 1
+#define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_V2_OUT_A_B_LBN 7
+#define        MC_CMD_NVRAM_INFO_V2_OUT_A_B_WIDTH 1
+#define       MC_CMD_NVRAM_INFO_V2_OUT_PHYSDEV_OFST 16
+#define       MC_CMD_NVRAM_INFO_V2_OUT_PHYSADDR_OFST 20
+/* Writes must be multiples of this size. Added to support the MUM on Sorrento.
+ */
+#define       MC_CMD_NVRAM_INFO_V2_OUT_WRITESIZE_OFST 24
+
 
 /***********************************/
 /* MC_CMD_NVRAM_UPDATE_START
@@ -3561,6 +3677,37 @@
 /* amount to read in bytes */
 #define       MC_CMD_NVRAM_READ_IN_LENGTH_OFST 8
 
+/* MC_CMD_NVRAM_READ_IN_V2 msgrequest */
+#define    MC_CMD_NVRAM_READ_IN_V2_LEN 16
+#define       MC_CMD_NVRAM_READ_IN_V2_TYPE_OFST 0
+/*            Enum values, see field(s): */
+/*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
+#define       MC_CMD_NVRAM_READ_IN_V2_OFFSET_OFST 4
+/* amount to read in bytes */
+#define       MC_CMD_NVRAM_READ_IN_V2_LENGTH_OFST 8
+/* Optional control info. If a partition is stored with an A/B versioning
+ * scheme (i.e. in more than one physical partition in NVRAM) the host can set
+ * this to control which underlying physical partition is used to read data
+ * from. This allows it to perform a read-modify-write-verify with the write
+ * lock continuously held by calling NVRAM_UPDATE_START, reading the old
+ * contents using MODE=TARGET_CURRENT, overwriting the old partition and then
+ * verifying by reading with MODE=TARGET_BACKUP.
+ */
+#define       MC_CMD_NVRAM_READ_IN_V2_MODE_OFST 12
+/* enum: Same as omitting MODE: caller sees data in current partition unless it
+ * holds the write lock in which case it sees data in the partition it is
+ * updating.
+ */
+#define          MC_CMD_NVRAM_READ_IN_V2_DEFAULT 0x0
+/* enum: Read from the current partition of an A/B pair, even if holding the
+ * write lock.
+ */
+#define          MC_CMD_NVRAM_READ_IN_V2_TARGET_CURRENT 0x1
+/* enum: Read from the non-current (i.e. to be updated) partition of an A/B
+ * pair
+ */
+#define          MC_CMD_NVRAM_READ_IN_V2_TARGET_BACKUP 0x2
+
 /* MC_CMD_NVRAM_READ_OUT msgresponse */
 #define    MC_CMD_NVRAM_READ_OUT_LENMIN 1
 #define    MC_CMD_NVRAM_READ_OUT_LENMAX 252
@@ -3895,6 +4042,8 @@
 #define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY  0x39
 /* enum: CCOM AVREG 1v8 supply (external ADC): mV */
 #define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC  0x3a
+/* enum: CCOM RTS temperature: degC */
+#define          MC_CMD_SENSOR_CONTROLLER_RTS  0x3b
 /* enum: Not a sensor: reserved for the next page flag */
 #define          MC_CMD_SENSOR_PAGE1_NEXT  0x3f
 /* enum: controller internal temperature sensor voltage on master core
@@ -3931,6 +4080,12 @@
 #define          MC_CMD_SENSOR_PHY0_VCC  0x4c
 /* enum: Voltage supplied to the QSFP #1 from their power supply: mV */
 #define          MC_CMD_SENSOR_PHY1_VCC  0x4d
+/* enum: Controller die temperature (TDIODE): degC */
+#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP  0x4e
+/* enum: Board temperature (front): degC */
+#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP  0x4f
+/* enum: Board temperature (back): degC */
+#define          MC_CMD_SENSOR_BOARD_BACK_TEMP  0x50
 /* MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF */
 #define       MC_CMD_SENSOR_ENTRY_OFST 4
 #define       MC_CMD_SENSOR_ENTRY_LEN 8
@@ -4007,7 +4162,7 @@
 
 /* MC_CMD_READ_SENSORS_EXT_IN msgrequest */
 #define    MC_CMD_READ_SENSORS_EXT_IN_LEN 12
-/* DMA address of host buffer for sensor readings */
+/* DMA address of host buffer for sensor readings (must be 4Kbyte aligned). */
 #define       MC_CMD_READ_SENSORS_EXT_IN_DMA_ADDR_OFST 0
 #define       MC_CMD_READ_SENSORS_EXT_IN_DMA_ADDR_LEN 8
 #define       MC_CMD_READ_SENSORS_EXT_IN_DMA_ADDR_LO_OFST 0
@@ -4608,6 +4763,10 @@
  * operations
  */
 #define          MC_CMD_MUM_OP_QSFP 0xc
+/* enum: Request discrete and SODIMM DDR info (type, size, speed grade, voltage
+ * level) from MUM
+ */
+#define          MC_CMD_MUM_OP_READ_DDR_INFO 0xd
 
 /* MC_CMD_MUM_IN_NULL msgrequest */
 #define    MC_CMD_MUM_IN_NULL_LEN 4
@@ -4793,6 +4952,10 @@
 #define       MC_CMD_MUM_IN_PROGRAM_CLOCKS_FLAGS_OFST 8
 #define        MC_CMD_MUM_IN_PROGRAM_CLOCKS_OVERCLOCK_110_LBN 0
 #define        MC_CMD_MUM_IN_PROGRAM_CLOCKS_OVERCLOCK_110_WIDTH 1
+#define        MC_CMD_MUM_IN_PROGRAM_CLOCKS_CLOCK_NIC_FROM_FPGA_LBN 1
+#define        MC_CMD_MUM_IN_PROGRAM_CLOCKS_CLOCK_NIC_FROM_FPGA_WIDTH 1
+#define        MC_CMD_MUM_IN_PROGRAM_CLOCKS_CLOCK_REF_FROM_XO_LBN 2
+#define        MC_CMD_MUM_IN_PROGRAM_CLOCKS_CLOCK_REF_FROM_XO_WIDTH 1
 
 /* MC_CMD_MUM_IN_FPGA_LOAD msgrequest */
 #define    MC_CMD_MUM_IN_FPGA_LOAD_LEN 8
@@ -4862,6 +5025,11 @@
 #define       MC_CMD_MUM_IN_QSFP_POLL_BIST_HDR_OFST 4
 #define       MC_CMD_MUM_IN_QSFP_POLL_BIST_IDX_OFST 8
 
+/* MC_CMD_MUM_IN_READ_DDR_INFO msgrequest */
+#define    MC_CMD_MUM_IN_READ_DDR_INFO_LEN 4
+/* MUM cmd header */
+/*            MC_CMD_MUM_IN_CMD_OFST 0 */
+
 /* MC_CMD_MUM_OUT msgresponse */
 #define    MC_CMD_MUM_OUT_LEN 0
 
@@ -5004,6 +5172,69 @@
 #define    MC_CMD_MUM_OUT_QSFP_POLL_BIST_LEN 4
 #define       MC_CMD_MUM_OUT_QSFP_POLL_BIST_TEST_OFST 0
 
+/* MC_CMD_MUM_OUT_READ_DDR_INFO msgresponse */
+#define    MC_CMD_MUM_OUT_READ_DDR_INFO_LENMIN 24
+#define    MC_CMD_MUM_OUT_READ_DDR_INFO_LENMAX 248
+#define    MC_CMD_MUM_OUT_READ_DDR_INFO_LEN(num) (8+8*(num))
+/* Discrete (soldered) DDR resistor strap info */
+#define       MC_CMD_MUM_OUT_READ_DDR_INFO_DISCRETE_DDR_INFO_OFST 0
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_VRATIO_LBN 0
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_VRATIO_WIDTH 16
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_RESERVED1_LBN 16
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_RESERVED1_WIDTH 16
+/* Number of SODIMM info records */
+#define       MC_CMD_MUM_OUT_READ_DDR_INFO_NUM_RECORDS_OFST 4
+/* Array of SODIMM info records */
+#define       MC_CMD_MUM_OUT_READ_DDR_INFO_SODIMM_INFO_RECORD_OFST 8
+#define       MC_CMD_MUM_OUT_READ_DDR_INFO_SODIMM_INFO_RECORD_LEN 8
+#define       MC_CMD_MUM_OUT_READ_DDR_INFO_SODIMM_INFO_RECORD_LO_OFST 8
+#define       MC_CMD_MUM_OUT_READ_DDR_INFO_SODIMM_INFO_RECORD_HI_OFST 12
+#define       MC_CMD_MUM_OUT_READ_DDR_INFO_SODIMM_INFO_RECORD_MINNUM 2
+#define       MC_CMD_MUM_OUT_READ_DDR_INFO_SODIMM_INFO_RECORD_MAXNUM 30
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_BANK_ID_LBN 0
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_BANK_ID_WIDTH 8
+/* enum: SODIMM bank 1 (Top SODIMM for Sorrento) */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_BANK1 0x0
+/* enum: SODIMM bank 2 (Bottom SODDIMM for Sorrento) */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_BANK2 0x1
+/* enum: Total number of SODIMM banks */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_NUM_BANKS 0x2
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_TYPE_LBN 8
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_TYPE_WIDTH 8
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_RANK_LBN 16
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_RANK_WIDTH 4
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_VOLTAGE_LBN 20
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_VOLTAGE_WIDTH 4
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_NOT_POWERED 0x0 /* enum */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_1V25 0x1 /* enum */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_1V35 0x2 /* enum */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_1V5 0x3 /* enum */
+/* enum: Values 5-15 are reserved for future usage */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_1V8 0x4
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_SIZE_LBN 24
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_SIZE_WIDTH 8
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_SPEED_LBN 32
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_SPEED_WIDTH 16
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_STATE_LBN 48
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_STATE_WIDTH 4
+/* enum: No module present */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_ABSENT 0x0
+/* enum: Module present supported and powered on */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_PRESENT_POWERED 0x1
+/* enum: Module present but bad type */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_PRESENT_BAD_TYPE 0x2
+/* enum: Module present but incompatible voltage */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_PRESENT_BAD_VOLTAGE 0x3
+/* enum: Module present but unknown SPD */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_PRESENT_BAD_SPD 0x4
+/* enum: Module present but slot cannot support it */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_PRESENT_BAD_SLOT 0x5
+/* enum: Modules may or may not be present, but cannot establish contact by I2C
+ */
+#define          MC_CMD_MUM_OUT_READ_DDR_INFO_NOT_REACHABLE 0x6
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_RESERVED2_LBN 52
+#define        MC_CMD_MUM_OUT_READ_DDR_INFO_RESERVED2_WIDTH 12
+
 /* MC_CMD_RESOURCE_SPECIFIER enum */
 /* enum: Any */
 #define          MC_CMD_RESOURCE_INSTANCE_ANY 0xffffffff
@@ -5076,6 +5307,8 @@
 #define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG       0x500
 /* enum: Expansion ROM configuration data for port 0 */
 #define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0  0x600
+/* enum: Synonym for EXPROM_CONFIG_PORT0 as used in pmap files */
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG        0x600
 /* enum: Expansion ROM configuration data for port 1 */
 #define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1  0x601
 /* enum: Expansion ROM configuration data for port 2 */
@@ -5084,6 +5317,8 @@
 #define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3  0x603
 /* enum: Non-volatile log output partition */
 #define          NVRAM_PARTITION_TYPE_LOG                  0x700
+/* enum: Non-volatile log output of second core on dual-core device */
+#define          NVRAM_PARTITION_TYPE_LOG_SLAVE            0x701
 /* enum: Device state dump output partition */
 #define          NVRAM_PARTITION_TYPE_DUMP                 0x800
 /* enum: Application license key storage partition */
@@ -5116,6 +5351,20 @@
 #define          NVRAM_PARTITION_TYPE_MUM_USER_ROM         0xc05
 /* enum: MUM fuses and lockbits partition. */
 #define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK         0xc06
+/* enum: UEFI expansion ROM if separate from PXE */
+#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI       0xd00
+/* enum: Spare partition 0 */
+#define          NVRAM_PARTITION_TYPE_SPARE_0              0x1000
+/* enum: Spare partition 1 */
+#define          NVRAM_PARTITION_TYPE_SPARE_1              0x1100
+/* enum: Spare partition 2 */
+#define          NVRAM_PARTITION_TYPE_SPARE_2              0x1200
+/* enum: Spare partition 3 */
+#define          NVRAM_PARTITION_TYPE_SPARE_3              0x1300
+/* enum: Spare partition 4 */
+#define          NVRAM_PARTITION_TYPE_SPARE_4              0x1400
+/* enum: Spare partition 5 */
+#define          NVRAM_PARTITION_TYPE_SPARE_5              0x1500
 /* enum: Start of reserved value range (firmware may use for any purpose) */
 #define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN  0xff00
 /* enum: End of reserved value range (firmware may use for any purpose) */
@@ -5149,6 +5398,90 @@
 #define       LICENSED_APP_ID_ID_LBN 0
 #define       LICENSED_APP_ID_ID_WIDTH 32
 
+/* LICENSED_FEATURES structuredef */
+#define    LICENSED_FEATURES_LEN 8
+/* Bitmask of licensed firmware features */
+#define       LICENSED_FEATURES_MASK_OFST 0
+#define       LICENSED_FEATURES_MASK_LEN 8
+#define       LICENSED_FEATURES_MASK_LO_OFST 0
+#define       LICENSED_FEATURES_MASK_HI_OFST 4
+#define        LICENSED_FEATURES_RX_CUT_THROUGH_LBN 0
+#define        LICENSED_FEATURES_RX_CUT_THROUGH_WIDTH 1
+#define        LICENSED_FEATURES_PIO_LBN 1
+#define        LICENSED_FEATURES_PIO_WIDTH 1
+#define        LICENSED_FEATURES_EVQ_TIMER_LBN 2
+#define        LICENSED_FEATURES_EVQ_TIMER_WIDTH 1
+#define        LICENSED_FEATURES_CLOCK_LBN 3
+#define        LICENSED_FEATURES_CLOCK_WIDTH 1
+#define        LICENSED_FEATURES_RX_TIMESTAMPS_LBN 4
+#define        LICENSED_FEATURES_RX_TIMESTAMPS_WIDTH 1
+#define        LICENSED_FEATURES_TX_TIMESTAMPS_LBN 5
+#define        LICENSED_FEATURES_TX_TIMESTAMPS_WIDTH 1
+#define        LICENSED_FEATURES_RX_SNIFF_LBN 6
+#define        LICENSED_FEATURES_RX_SNIFF_WIDTH 1
+#define        LICENSED_FEATURES_TX_SNIFF_LBN 7
+#define        LICENSED_FEATURES_TX_SNIFF_WIDTH 1
+#define        LICENSED_FEATURES_PROXY_FILTER_OPS_LBN 8
+#define        LICENSED_FEATURES_PROXY_FILTER_OPS_WIDTH 1
+#define        LICENSED_FEATURES_EVENT_CUT_THROUGH_LBN 9
+#define        LICENSED_FEATURES_EVENT_CUT_THROUGH_WIDTH 1
+#define       LICENSED_FEATURES_MASK_LBN 0
+#define       LICENSED_FEATURES_MASK_WIDTH 64
+
+/* LICENSED_V3_APPS structuredef */
+#define    LICENSED_V3_APPS_LEN 8
+/* Bitmask of licensed applications */
+#define       LICENSED_V3_APPS_MASK_OFST 0
+#define       LICENSED_V3_APPS_MASK_LEN 8
+#define       LICENSED_V3_APPS_MASK_LO_OFST 0
+#define       LICENSED_V3_APPS_MASK_HI_OFST 4
+#define        LICENSED_V3_APPS_ONLOAD_LBN 0
+#define        LICENSED_V3_APPS_ONLOAD_WIDTH 1
+#define        LICENSED_V3_APPS_PTP_LBN 1
+#define        LICENSED_V3_APPS_PTP_WIDTH 1
+#define        LICENSED_V3_APPS_SOLARCAPTURE_PRO_LBN 2
+#define        LICENSED_V3_APPS_SOLARCAPTURE_PRO_WIDTH 1
+#define        LICENSED_V3_APPS_SOLARSECURE_LBN 3
+#define        LICENSED_V3_APPS_SOLARSECURE_WIDTH 1
+#define        LICENSED_V3_APPS_PERF_MONITOR_LBN 4
+#define        LICENSED_V3_APPS_PERF_MONITOR_WIDTH 1
+#define        LICENSED_V3_APPS_SOLARCAPTURE_LIVE_LBN 5
+#define        LICENSED_V3_APPS_SOLARCAPTURE_LIVE_WIDTH 1
+#define        LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_LBN 6
+#define        LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_WIDTH 1
+#define        LICENSED_V3_APPS_NETWORK_ACCESS_CONTROL_LBN 7
+#define        LICENSED_V3_APPS_NETWORK_ACCESS_CONTROL_WIDTH 1
+#define       LICENSED_V3_APPS_MASK_LBN 0
+#define       LICENSED_V3_APPS_MASK_WIDTH 64
+
+/* LICENSED_V3_FEATURES structuredef */
+#define    LICENSED_V3_FEATURES_LEN 8
+/* Bitmask of licensed firmware features */
+#define       LICENSED_V3_FEATURES_MASK_OFST 0
+#define       LICENSED_V3_FEATURES_MASK_LEN 8
+#define       LICENSED_V3_FEATURES_MASK_LO_OFST 0
+#define       LICENSED_V3_FEATURES_MASK_HI_OFST 4
+#define        LICENSED_V3_FEATURES_RX_CUT_THROUGH_LBN 0
+#define        LICENSED_V3_FEATURES_RX_CUT_THROUGH_WIDTH 1
+#define        LICENSED_V3_FEATURES_PIO_LBN 1
+#define        LICENSED_V3_FEATURES_PIO_WIDTH 1
+#define        LICENSED_V3_FEATURES_EVQ_TIMER_LBN 2
+#define        LICENSED_V3_FEATURES_EVQ_TIMER_WIDTH 1
+#define        LICENSED_V3_FEATURES_CLOCK_LBN 3
+#define        LICENSED_V3_FEATURES_CLOCK_WIDTH 1
+#define        LICENSED_V3_FEATURES_RX_TIMESTAMPS_LBN 4
+#define        LICENSED_V3_FEATURES_RX_TIMESTAMPS_WIDTH 1
+#define        LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN 5
+#define        LICENSED_V3_FEATURES_TX_TIMESTAMPS_WIDTH 1
+#define        LICENSED_V3_FEATURES_RX_SNIFF_LBN 6
+#define        LICENSED_V3_FEATURES_RX_SNIFF_WIDTH 1
+#define        LICENSED_V3_FEATURES_TX_SNIFF_LBN 7
+#define        LICENSED_V3_FEATURES_TX_SNIFF_WIDTH 1
+#define        LICENSED_V3_FEATURES_PROXY_FILTER_OPS_LBN 8
+#define        LICENSED_V3_FEATURES_PROXY_FILTER_OPS_WIDTH 1
+#define       LICENSED_V3_FEATURES_MASK_LBN 0
+#define       LICENSED_V3_FEATURES_MASK_WIDTH 64
+
 /* TX_TIMESTAMP_EVENT structuredef */
 #define    TX_TIMESTAMP_EVENT_LEN 6
 /* lower 16 bits of timestamp data */
@@ -5258,6 +5591,8 @@
 #define        MC_CMD_INIT_EVQ_IN_FLAG_RX_MERGE_WIDTH 1
 #define        MC_CMD_INIT_EVQ_IN_FLAG_TX_MERGE_LBN 5
 #define        MC_CMD_INIT_EVQ_IN_FLAG_TX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_IN_FLAG_USE_TIMER_LBN 6
+#define        MC_CMD_INIT_EVQ_IN_FLAG_USE_TIMER_WIDTH 1
 #define       MC_CMD_INIT_EVQ_IN_TMR_MODE_OFST 20
 /* enum: Disabled */
 #define          MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS 0x0
@@ -5362,6 +5697,8 @@
 #define        MC_CMD_INIT_RXQ_IN_FLAG_PREFIX_WIDTH 1
 #define        MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_LBN 9
 #define        MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_LBN 10
+#define        MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
 /* Owner ID to use if in buffer mode (zero if physical) */
 #define       MC_CMD_INIT_RXQ_IN_OWNER_ID_OFST 20
 /* The port ID associated with the v-adaptor which should contain this DMAQ. */
@@ -5422,6 +5759,8 @@
 #define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K  0x4 /* enum */
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
+#define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_FORCE_EV_MERGING_LBN 19
+#define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
 /* Owner ID to use if in buffer mode (zero if physical) */
 #define       MC_CMD_INIT_RXQ_EXT_IN_OWNER_ID_OFST 20
 /* The port ID associated with the v-adaptor which should contain this DMAQ. */
@@ -5535,6 +5874,8 @@
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_INNER_IP_CSUM_EN_WIDTH 1
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_INNER_TCP_CSUM_EN_LBN 11
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_INNER_TCP_CSUM_EN_WIDTH 1
+#define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_TSOV2_EN_LBN 12
+#define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_TSOV2_EN_WIDTH 1
 /* Owner ID to use if in buffer mode (zero if physical) */
 #define       MC_CMD_INIT_TXQ_EXT_IN_OWNER_ID_OFST 20
 /* The port ID associated with the v-adaptor which should contain this DMAQ. */
@@ -5747,6 +6088,46 @@
 #define       MC_CMD_PROXY_CONFIGURE_IN_ALLOWED_MCDI_MASK_OFST 44
 #define       MC_CMD_PROXY_CONFIGURE_IN_ALLOWED_MCDI_MASK_LEN 64
 
+/* MC_CMD_PROXY_CONFIGURE_EXT_IN msgrequest */
+#define    MC_CMD_PROXY_CONFIGURE_EXT_IN_LEN 112
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_FLAGS_OFST 0
+#define        MC_CMD_PROXY_CONFIGURE_EXT_IN_ENABLE_LBN 0
+#define        MC_CMD_PROXY_CONFIGURE_EXT_IN_ENABLE_WIDTH 1
+/* Host provides a contiguous memory buffer that contains at least NUM_BLOCKS
+ * of blocks, each of the size REQUEST_BLOCK_SIZE.
+ */
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_STATUS_BUFF_ADDR_OFST 4
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_STATUS_BUFF_ADDR_LEN 8
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_STATUS_BUFF_ADDR_LO_OFST 4
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_STATUS_BUFF_ADDR_HI_OFST 8
+/* Must be a power of 2 */
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_STATUS_BLOCK_SIZE_OFST 12
+/* Host provides a contiguous memory buffer that contains at least NUM_BLOCKS
+ * of blocks, each of the size REPLY_BLOCK_SIZE.
+ */
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REQUEST_BUFF_ADDR_OFST 16
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REQUEST_BUFF_ADDR_LEN 8
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REQUEST_BUFF_ADDR_LO_OFST 16
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REQUEST_BUFF_ADDR_HI_OFST 20
+/* Must be a power of 2 */
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REQUEST_BLOCK_SIZE_OFST 24
+/* Host provides a contiguous memory buffer that contains at least NUM_BLOCKS
+ * of blocks, each of the size STATUS_BLOCK_SIZE. This buffer is only needed if
+ * host intends to complete proxied operations by using MC_CMD_PROXY_CMD.
+ */
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REPLY_BUFF_ADDR_OFST 28
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REPLY_BUFF_ADDR_LEN 8
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REPLY_BUFF_ADDR_LO_OFST 28
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REPLY_BUFF_ADDR_HI_OFST 32
+/* Must be a power of 2, or zero if this buffer is not provided */
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_REPLY_BLOCK_SIZE_OFST 36
+/* Applies to all three buffers */
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_NUM_BLOCKS_OFST 40
+/* A bit mask defining which MCDI operations may be proxied */
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_ALLOWED_MCDI_MASK_OFST 44
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_ALLOWED_MCDI_MASK_LEN 64
+#define       MC_CMD_PROXY_CONFIGURE_EXT_IN_RESERVED_OFST 108
+
 /* MC_CMD_PROXY_CONFIGURE_OUT msgresponse */
 #define    MC_CMD_PROXY_CONFIGURE_OUT_LEN 0
 
@@ -6323,6 +6704,15 @@
  * client
  */
 #define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS  0x2
+/* enum: read properties relating to security rules (Medford-only; for use by
+ * SolarSecure apps, not directly by drivers. See SF-114946-SW.)
+ */
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO  0x3
+/* enum: read the list of supported RX filter matches for VXLAN/NVGRE
+ * encapsulated frames, which follow a different match sequence to normal
+ * frames (Medford only)
+ */
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES  0x4
 
 /* MC_CMD_GET_PARSER_DISP_INFO_OUT msgresponse */
 #define    MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMIN 8
@@ -6356,7 +6746,10 @@
 
 /***********************************/
 /* MC_CMD_PARSER_DISP_RW
- * Direct read/write of parser-dispatcher state (DICPUs and LUE) for debugging
+ * Direct read/write of parser-dispatcher state (DICPUs and LUE) for debugging.
+ * Please note that this interface is only of use to debug tools which have
+ * knowledge of firmware and hardware data structures; nothing here is intended
+ * for use by normal driver code.
  */
 #define MC_CMD_PARSER_DISP_RW 0xe5
 
@@ -6374,6 +6767,12 @@
 #define          MC_CMD_PARSER_DISP_RW_IN_LUE  0x2
 /* enum: Lookup engine (with requested metadata format) */
 #define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA  0x3
+/* enum: RX0 dispatcher CPU (alias for RX_DICPU; Medford has 2 RX DICPUs) */
+#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU  0x0
+/* enum: RX1 dispatcher CPU (only valid for Medford) */
+#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU  0x4
+/* enum: Miscellaneous other state (only valid for Medford) */
+#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE  0x5
 /* identifies the type of operation requested */
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_OFST 4
 /* enum: read a word of DICPU DMEM or a LUE entry */
@@ -6382,8 +6781,12 @@
 #define          MC_CMD_PARSER_DISP_RW_IN_WRITE  0x1
 /* enum: read-modify-write a word of DICPU DMEM (not valid for LUE) */
 #define          MC_CMD_PARSER_DISP_RW_IN_RMW  0x2
-/* data memory address or LUE index */
+/* data memory address (DICPU targets) or LUE index (LUE targets) */
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_OFST 8
+/* selector (for MISC_STATE target) */
+#define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_OFST 8
+/* enum: Port to datapath mapping */
+#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING  0x1
 /* value to write (for DMEM writes) */
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_OFST 12
 /* XOR value (for DMEM read-modify-writes: new = (old & mask) ^ value) */
@@ -6408,6 +6811,12 @@
  */
 #define       MC_CMD_PARSER_DISP_RW_OUT_LUE_MGR_STATE_OFST 20
 #define       MC_CMD_PARSER_DISP_RW_OUT_LUE_MGR_STATE_LEN 32
+/* datapath(s) used for each port (for MISC_STATE PORT_DP_MAPPING selector) */
+#define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_OFST 0
+#define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_LEN 4
+#define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_NUM 4
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP0  0x1 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP1  0x2 /* enum */
 
 
 /***********************************/
@@ -7071,6 +7480,24 @@
 #define    MC_CMD_GET_CAPABILITIES_OUT_LEN 20
 /* First word of flags. */
 #define       MC_CMD_GET_CAPABILITIES_OUT_FLAGS1_OFST 0
+#define        MC_CMD_GET_CAPABILITIES_OUT_VPORT_RECONFIGURE_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_OUT_VPORT_RECONFIGURE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_OUT_TX_STRIPING_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_OUT_TX_STRIPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_OUT_VADAPTOR_QUERY_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_OUT_VADAPTOR_QUERY_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_OUT_EVB_PORT_VLAN_RESTRICT_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_OUT_EVB_PORT_VLAN_RESTRICT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_OUT_DRV_ATTACH_PREBOOT_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_OUT_DRV_ATTACH_PREBOOT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_OUT_RX_FORCE_EVENT_MERGING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_OUT_RX_FORCE_EVENT_MERGING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_OUT_SET_MAC_ENHANCED_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_OUT_SET_MAC_ENHANCED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_OUT_TX_MAC_SECURITY_FILTERING_LBN 12
 #define        MC_CMD_GET_CAPABILITIES_OUT_TX_MAC_SECURITY_FILTERING_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN 13
@@ -7138,6 +7565,8 @@
 #define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
 /* enum: RXDP Test firmware image 8 */
 #define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+/* enum: RXDP Test firmware image 9 */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_LEN 2
@@ -7153,6 +7582,8 @@
 #define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
 /* enum: TXDP Test firmware image 2 */
 #define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+/* enum: TXDP CSR bus test firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR  0x103
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -7227,6 +7658,258 @@
 /* Licensed capabilities */
 #define       MC_CMD_GET_CAPABILITIES_OUT_LICENSE_CAPABILITIES_OFST 16
 
+/* MC_CMD_GET_CAPABILITIES_V2_IN msgrequest */
+#define    MC_CMD_GET_CAPABILITIES_V2_IN_LEN 0
+
+/* MC_CMD_GET_CAPABILITIES_V2_OUT msgresponse */
+#define    MC_CMD_GET_CAPABILITIES_V2_OUT_LEN 72
+/* First word of flags. */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_FLAGS1_OFST 0
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VPORT_RECONFIGURE_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VPORT_RECONFIGURE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_STRIPING_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_STRIPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VADAPTOR_QUERY_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VADAPTOR_QUERY_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVB_PORT_VLAN_RESTRICT_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVB_PORT_VLAN_RESTRICT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_DRV_ATTACH_PREBOOT_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_DRV_ATTACH_PREBOOT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_FORCE_EVENT_MERGING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_FORCE_EVENT_MERGING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_SET_MAC_ENHANCED_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_SET_MAC_ENHANCED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_SECURITY_FILTERING_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_SECURITY_FILTERING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_ADDITIONAL_RSS_MODES_LBN 13
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_ADDITIONAL_RSS_MODES_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_QBB_LBN 14
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_QBB_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_PACKED_STREAM_VAR_BUFFERS_LBN 15
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_PACKED_STREAM_VAR_BUFFERS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_RSS_LIMITED_LBN 16
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_RSS_LIMITED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_PACKED_STREAM_LBN 17
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_INCLUDE_FCS_LBN 18
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_INCLUDE_FCS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VLAN_INSERTION_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VLAN_INSERTION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_VLAN_STRIPPING_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_VLAN_STRIPPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_PREFIX_LEN_0_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_PREFIX_LEN_0_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_PREFIX_LEN_14_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_PREFIX_LEN_14_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_TIMESTAMP_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_BATCHING_LBN 25
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_BATCHING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_MCAST_FILTER_CHAINING_LBN 26
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_MCAST_FILTER_CHAINING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_PM_AND_RXDP_COUNTERS_LBN 27
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_PM_AND_RXDP_COUNTERS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DISABLE_SCATTER_LBN 28
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MCAST_UDP_LOOPBACK_LBN 29
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MCAST_UDP_LOOPBACK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVB_LBN 30
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVB_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VXLAN_NVGRE_LBN 31
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VXLAN_NVGRE_WIDTH 1
+/* RxDPCPU firmware id. */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_OFST 4
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_LEN 2
+/* enum: Standard RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP  0x0
+/* enum: Low latency RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY  0x1
+/* enum: Packed stream RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM  0x2
+/* enum: BIST RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST  0x10a
+/* enum: RXDP Test firmware image 1 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+/* enum: RXDP Test firmware image 2 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+/* enum: RXDP Test firmware image 3 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+/* enum: RXDP Test firmware image 4 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+/* enum: RXDP Test firmware image 5 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE  0x105
+/* enum: RXDP Test firmware image 6 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+/* enum: RXDP Test firmware image 7 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+/* enum: RXDP Test firmware image 8 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+/* enum: RXDP Test firmware image 9 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+/* TxDPCPU firmware id. */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_OFST 6
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_LEN 2
+/* enum: Standard TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP  0x0
+/* enum: Low latency TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY  0x1
+/* enum: High packet rate TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE  0x3
+/* enum: BIST TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST  0x12d
+/* enum: TXDP Test firmware image 1 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+/* enum: TXDP Test firmware image 2 */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+/* enum: TXDP CSR bus test firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR  0x103
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_OFST 8
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_LEN 2
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_REV_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_REV_WIDTH 12
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_TYPE_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_TYPE_WIDTH 4
+/* enum: reserved value - do not use (may indicate alternative interpretation
+ * of REV field in future)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED  0x0
+/* enum: Trivial RX PD firmware for early Huntington development (Huntington
+ * development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+/* enum: RX PD firmware with approximately Siena-compatible behaviour
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+/* enum: Virtual switching (full feature) RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+/* enum: siena_compat variant RX PD firmware using PM rather than MAC
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+/* enum: Low latency RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+/* enum: Packed stream RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+/* enum: RX PD firmware handling layer 2 only for high packet rate performance
+ * tests (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+/* enum: RX PD firmware parsing but not filtering network overlay tunnel
+ * encapsulations (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_OFST 10
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_LEN 2
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_REV_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_REV_WIDTH 12
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_TYPE_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_TYPE_WIDTH 4
+/* enum: reserved value - do not use (may indicate alternative interpretation
+ * of REV field in future)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED  0x0
+/* enum: Trivial TX PD firmware for early Huntington development (Huntington
+ * development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+/* enum: TX PD firmware with approximately Siena-compatible behaviour
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+/* enum: Virtual switching (full feature) TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+/* enum: siena_compat variant TX PD firmware using PM rather than MAC
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+/* enum: TX PD firmware handling layer 2 only for high packet rate performance
+ * tests (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+/* Hardware capabilities of NIC */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_OFST 12
+/* Licensed capabilities */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_LICENSE_CAPABILITIES_OFST 16
+/* Second word of flags. Not present on older firmware (check the length). */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_FLAGS2_OFST 20
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_ENCAP_LBN 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_ENCAP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVQ_TIMER_CTRL_LBN 2
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVQ_TIMER_CTRL_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVENT_CUT_THROUGH_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVENT_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_WIDTH 1
+/* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
+ * on older firmware (check the length).
+ */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_N_CONTEXTS_OFST 24
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_N_CONTEXTS_LEN 2
+/* One byte per PF containing the number of the external port assigned to this
+ * PF, indexed by PF number. Special values indicate that a PF is either not
+ * present or not assigned.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_OFST 26
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
+/* enum: The caller is not permitted to access information on this PF. */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff
+/* enum: PF does not exist. */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe
+/* enum: PF does exist but is not assigned to any external port. */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED  0xfd
+/* enum: This value indicates that PF is assigned, but it cannot be expressed
+ * in this field. It is intended for a possible future situation where a more
+ * complex scheme of PFs to ports mapping is being used. The future driver
+ * should look for a new field supporting the new scheme. The current/old
+ * driver should treat this value as PF_NOT_ASSIGNED.
+ */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+/* One byte per PF containing the number of its VFs, indexed by PF number. A
+ * special value indicates that a PF is not present.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_OFST 42
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_LEN 1
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_NUM 16
+/* enum: The caller is not permitted to access information on this PF. */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff */
+/* enum: PF does not exist. */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe */
+/* Number of VIs available for each external port */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_OFST 58
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_LEN 2
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_NUM 4
+/* Size of RX descriptor cache expressed as binary logarithm The actual size
+ * equals (2 ^ RX_DESC_CACHE_SIZE)
+ */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DESC_CACHE_SIZE_OFST 66
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DESC_CACHE_SIZE_LEN 1
+/* Size of TX descriptor cache expressed as binary logarithm The actual size
+ * equals (2 ^ TX_DESC_CACHE_SIZE)
+ */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DESC_CACHE_SIZE_OFST 67
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DESC_CACHE_SIZE_LEN 1
+/* Total number of available PIO buffers */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_PIO_BUFFS_OFST 68
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_PIO_BUFFS_LEN 2
+/* Size of a single PIO buffer */
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_OFST 70
+#define       MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_LEN 2
+
 
 /***********************************/
 /* MC_CMD_V2_EXTN
@@ -7474,6 +8157,25 @@
 #define    MC_CMD_VSWITCH_FREE_OUT_LEN 0
 
 
+/***********************************/
+/* MC_CMD_VSWITCH_QUERY
+ * read some config of v-switch. For now this command is an empty placeholder.
+ * It may be used to check if a v-switch is connected to a given EVB port (if
+ * not, then the command returns ENOENT).
+ */
+#define MC_CMD_VSWITCH_QUERY 0x63
+
+#define MC_CMD_0x63_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_VSWITCH_QUERY_IN msgrequest */
+#define    MC_CMD_VSWITCH_QUERY_IN_LEN 4
+/* The port to which the v-switch is connected. */
+#define       MC_CMD_VSWITCH_QUERY_IN_UPSTREAM_PORT_ID_OFST 0
+
+/* MC_CMD_VSWITCH_QUERY_OUT msgresponse */
+#define    MC_CMD_VSWITCH_QUERY_OUT_LEN 0
+
+
 /***********************************/
 /* MC_CMD_VPORT_ALLOC
  * allocate a v-port.
@@ -7510,6 +8212,8 @@
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_OFST 8
 #define        MC_CMD_VPORT_ALLOC_IN_FLAG_AUTO_PORT_LBN 0
 #define        MC_CMD_VPORT_ALLOC_IN_FLAG_AUTO_PORT_WIDTH 1
+#define        MC_CMD_VPORT_ALLOC_IN_FLAG_VLAN_RESTRICT_LBN 1
+#define        MC_CMD_VPORT_ALLOC_IN_FLAG_VLAN_RESTRICT_WIDTH 1
 /* The number of VLAN tags to insert/remove. An error will be returned if
  * incompatible with the number of VLAN tags specified for the upstream
  * v-switch.
@@ -7561,6 +8265,8 @@
 #define       MC_CMD_VADAPTOR_ALLOC_IN_FLAGS_OFST 8
 #define        MC_CMD_VADAPTOR_ALLOC_IN_FLAG_AUTO_VADAPTOR_LBN 0
 #define        MC_CMD_VADAPTOR_ALLOC_IN_FLAG_AUTO_VADAPTOR_WIDTH 1
+#define        MC_CMD_VADAPTOR_ALLOC_IN_FLAG_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_LBN 1
+#define        MC_CMD_VADAPTOR_ALLOC_IN_FLAG_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_WIDTH 1
 /* The number of VLAN tags to strip on receive */
 #define       MC_CMD_VADAPTOR_ALLOC_IN_NUM_VLANS_OFST 12
 /* The number of VLAN tags to transparently insert/remove. */
@@ -7638,6 +8344,29 @@
 #define       MC_CMD_VADAPTOR_GET_MAC_OUT_MACADDR_LEN 6
 
 
+/***********************************/
+/* MC_CMD_VADAPTOR_QUERY
+ * read some config of v-adaptor.
+ */
+#define MC_CMD_VADAPTOR_QUERY 0x61
+
+#define MC_CMD_0x61_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_VADAPTOR_QUERY_IN msgrequest */
+#define    MC_CMD_VADAPTOR_QUERY_IN_LEN 4
+/* The port to which the v-adaptor is connected. */
+#define       MC_CMD_VADAPTOR_QUERY_IN_UPSTREAM_PORT_ID_OFST 0
+
+/* MC_CMD_VADAPTOR_QUERY_OUT msgresponse */
+#define    MC_CMD_VADAPTOR_QUERY_OUT_LEN 12
+/* The EVB port flags as defined at MC_CMD_VPORT_ALLOC. */
+#define       MC_CMD_VADAPTOR_QUERY_OUT_PORT_FLAGS_OFST 0
+/* The v-adaptor flags as defined at MC_CMD_VADAPTOR_ALLOC. */
+#define       MC_CMD_VADAPTOR_QUERY_OUT_VADAPTOR_FLAGS_OFST 4
+/* The number of VLAN tags that may still be added */
+#define       MC_CMD_VADAPTOR_QUERY_OUT_NUM_AVAILABLE_VLAN_TAGS_OFST 8
+
+
 /***********************************/
 /* MC_CMD_EVB_PORT_ASSIGN
  * assign a port to a PCI function.
@@ -7875,10 +8604,17 @@
 #define    MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN 8
 /* The handle of the RSS context */
 #define       MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID_OFST 0
-/* Hash control flags. The _EN bits are always supported. The _MODE bits only
- * work when the firmware reports ADDITIONAL_RSS_MODES in
- * MC_CMD_GET_CAPABILITIES and override the _EN bits if any of them are not 0.
- * See the RSS_MODE structure for the meaning of the mode bits.
+/* Hash control flags. The _EN bits are always supported, but new modes are
+ * available when ADDITIONAL_RSS_MODES is reported by MC_CMD_GET_CAPABILITIES:
+ * in this case, the MODE fields may be set to non-zero values, and will take
+ * effect regardless of the settings of the _EN flags. See the RSS_MODE
+ * structure for the meaning of the mode bits. Drivers must check the
+ * capability before trying to set any _MODE fields, as older firmware will
+ * reject any attempt to set the FLAGS field to a value > 0xff with EINVAL. In
+ * the case where all the _MODE flags are zero, the _EN flags take effect,
+ * providing backward compatibility for existing drivers. (Setting all _MODE
+ * *and* all _EN flags to zero is valid, to disable RSS spreading for that
+ * particular packet type.)
  */
 #define       MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_FLAGS_OFST 4
 #define        MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_TOEPLITZ_IPV4_EN_LBN 0
@@ -7923,11 +8659,18 @@
 
 /* MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT msgresponse */
 #define    MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN 8
-/* Hash control flags. If any _MODE bits are non-zero (which will only be true
- * when the firmware reports ADDITIONAL_RSS_MODES) then the _EN bits should be
- * disregarded (but are guaranteed to be consistent with the _MODE bits if
- * RSS_CONTEXT_SET_FLAGS has never been called for this context since it was
- * allocated).
+/* Hash control flags. If all _MODE bits are zero (which will always be true
+ * for older firmware which does not report the ADDITIONAL_RSS_MODES
+ * capability), the _EN bits report the state. If any _MODE bits are non-zero
+ * (which will only be true when the firmware reports ADDITIONAL_RSS_MODES)
+ * then the _EN bits should be disregarded, although the _MODE flags are
+ * guaranteed to be consistent with the _EN flags for a freshly-allocated RSS
+ * context and in the case where the _EN flags were used in the SET. This
+ * provides backward compatibility: old drivers will not be attempting to
+ * derive any meaning from the _MODE bits (and can never set them to any value
+ * not representable by the _EN bits); new drivers can always determine the
+ * mode by looking only at the _MODE bits; the value returned by a GET can
+ * always be used for a SET regardless of old/new driver vs. old/new firmware.
  */
 #define       MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST 4
 #define        MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN 0
@@ -8154,6 +8897,74 @@
 #define       MC_CMD_VPORT_GET_MAC_ADDRESSES_OUT_MACADDR_MAXNUM 41
 
 
+/***********************************/
+/* MC_CMD_VPORT_RECONFIGURE
+ * Replace VLAN tags and/or MAC addresses of an existing v-port. If the v-port
+ * has already been passed to another function (v-port's user), then that
+ * function will be reset before applying the changes.
+ */
+#define MC_CMD_VPORT_RECONFIGURE 0xeb
+
+#define MC_CMD_0xeb_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_VPORT_RECONFIGURE_IN msgrequest */
+#define    MC_CMD_VPORT_RECONFIGURE_IN_LEN 44
+/* The handle of the v-port */
+#define       MC_CMD_VPORT_RECONFIGURE_IN_VPORT_ID_OFST 0
+/* Flags requesting what should be changed. */
+#define       MC_CMD_VPORT_RECONFIGURE_IN_FLAGS_OFST 4
+#define        MC_CMD_VPORT_RECONFIGURE_IN_REPLACE_VLAN_TAGS_LBN 0
+#define        MC_CMD_VPORT_RECONFIGURE_IN_REPLACE_VLAN_TAGS_WIDTH 1
+#define        MC_CMD_VPORT_RECONFIGURE_IN_REPLACE_MACADDRS_LBN 1
+#define        MC_CMD_VPORT_RECONFIGURE_IN_REPLACE_MACADDRS_WIDTH 1
+/* The number of VLAN tags to insert/remove. An error will be returned if
+ * incompatible with the number of VLAN tags specified for the upstream
+ * v-switch.
+ */
+#define       MC_CMD_VPORT_RECONFIGURE_IN_NUM_VLAN_TAGS_OFST 8
+/* The actual VLAN tags to insert/remove */
+#define       MC_CMD_VPORT_RECONFIGURE_IN_VLAN_TAGS_OFST 12
+#define        MC_CMD_VPORT_RECONFIGURE_IN_VLAN_TAG_0_LBN 0
+#define        MC_CMD_VPORT_RECONFIGURE_IN_VLAN_TAG_0_WIDTH 16
+#define        MC_CMD_VPORT_RECONFIGURE_IN_VLAN_TAG_1_LBN 16
+#define        MC_CMD_VPORT_RECONFIGURE_IN_VLAN_TAG_1_WIDTH 16
+/* The number of MAC addresses to add */
+#define       MC_CMD_VPORT_RECONFIGURE_IN_NUM_MACADDRS_OFST 16
+/* MAC addresses to add */
+#define       MC_CMD_VPORT_RECONFIGURE_IN_MACADDRS_OFST 20
+#define       MC_CMD_VPORT_RECONFIGURE_IN_MACADDRS_LEN 6
+#define       MC_CMD_VPORT_RECONFIGURE_IN_MACADDRS_NUM 4
+
+/* MC_CMD_VPORT_RECONFIGURE_OUT msgresponse */
+#define    MC_CMD_VPORT_RECONFIGURE_OUT_LEN 4
+#define       MC_CMD_VPORT_RECONFIGURE_OUT_FLAGS_OFST 0
+#define        MC_CMD_VPORT_RECONFIGURE_OUT_RESET_DONE_LBN 0
+#define        MC_CMD_VPORT_RECONFIGURE_OUT_RESET_DONE_WIDTH 1
+
+
+/***********************************/
+/* MC_CMD_EVB_PORT_QUERY
+ * read some config of v-port.
+ */
+#define MC_CMD_EVB_PORT_QUERY 0x62
+
+#define MC_CMD_0x62_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_EVB_PORT_QUERY_IN msgrequest */
+#define    MC_CMD_EVB_PORT_QUERY_IN_LEN 4
+/* The handle of the v-port */
+#define       MC_CMD_EVB_PORT_QUERY_IN_PORT_ID_OFST 0
+
+/* MC_CMD_EVB_PORT_QUERY_OUT msgresponse */
+#define    MC_CMD_EVB_PORT_QUERY_OUT_LEN 8
+/* The EVB port flags as defined at MC_CMD_VPORT_ALLOC. */
+#define       MC_CMD_EVB_PORT_QUERY_OUT_PORT_FLAGS_OFST 0
+/* The number of VLAN tags that may be used on a v-adaptor connected to this
+ * EVB port.
+ */
+#define       MC_CMD_EVB_PORT_QUERY_OUT_NUM_AVAILABLE_VLAN_TAGS_OFST 4
+
+
 /***********************************/
 /* MC_CMD_DUMP_BUFTBL_ENTRIES
  * Dump buffer table entries, mainly for command client debug use. Dumps
@@ -8196,6 +9007,14 @@
 #define       MC_CMD_SET_RXDP_CONFIG_IN_DATA_OFST 0
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_DMA_LBN 0
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_DMA_WIDTH 1
+#define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_LBN 1
+#define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_WIDTH 2
+/* enum: pad to 64 bytes */
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64  0x0
+/* enum: pad to 128 bytes (Medford only) */
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128  0x1
+/* enum: pad to 256 bytes (Medford only) */
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256   0x2
 
 /* MC_CMD_SET_RXDP_CONFIG_OUT msgresponse */
 #define    MC_CMD_SET_RXDP_CONFIG_OUT_LEN 0
@@ -8217,6 +9036,10 @@
 #define       MC_CMD_GET_RXDP_CONFIG_OUT_DATA_OFST 0
 #define        MC_CMD_GET_RXDP_CONFIG_OUT_PAD_HOST_DMA_LBN 0
 #define        MC_CMD_GET_RXDP_CONFIG_OUT_PAD_HOST_DMA_WIDTH 1
+#define        MC_CMD_GET_RXDP_CONFIG_OUT_PAD_HOST_LEN_LBN 1
+#define        MC_CMD_GET_RXDP_CONFIG_OUT_PAD_HOST_LEN_WIDTH 2
+/*             Enum values, see field(s): */
+/*                MC_CMD_SET_RXDP_CONFIG/MC_CMD_SET_RXDP_CONFIG_IN/PAD_HOST_LEN */
 
 
 /***********************************/
@@ -8788,32 +9611,38 @@
 #define       MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_MAXNUM 63
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
-/* enum: Attenuation (0-15, TBD for Medford) */
+/* enum: Attenuation (0-15, Huntington) */
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT  0x0
-/* enum: CTLE Boost (0-15, TBD for Medford) */
+/* enum: CTLE Boost (0-15, Huntington) */
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST  0x1
-/* enum: Edge DFE Tap1 (0 - max negative, 64 - zero, 127 - max positive, TBD
- * for Medford)
+/* enum: Edge DFE Tap1 (Huntington - 0 - max negative, 64 - zero, 127 - max
+ * positive, Medford - 0-31)
  */
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1  0x2
-/* enum: Edge DFE Tap2 (0 - max negative, 32 - zero, 63 - max positive, TBD for
- * Medford)
+/* enum: Edge DFE Tap2 (Huntington - 0 - max negative, 32 - zero, 63 - max
+ * positive, Medford - 0-31)
  */
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2  0x3
-/* enum: Edge DFE Tap3 (0 - max negative, 32 - zero, 63 - max positive, TBD for
- * Medford)
+/* enum: Edge DFE Tap3 (Huntington - 0 - max negative, 32 - zero, 63 - max
+ * positive, Medford - 0-16)
  */
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3  0x4
-/* enum: Edge DFE Tap4 (0 - max negative, 32 - zero, 63 - max positive, TBD for
- * Medford)
+/* enum: Edge DFE Tap4 (Huntington - 0 - max negative, 32 - zero, 63 - max
+ * positive, Medford - 0-16)
  */
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4  0x5
-/* enum: Edge DFE Tap5 (0 - max negative, 32 - zero, 63 - max positive, TBD for
- * Medford)
+/* enum: Edge DFE Tap5 (Huntington - 0 - max negative, 32 - zero, 63 - max
+ * positive, Medford - 0-16)
  */
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5  0x6
-/* enum: Edge DFE DLEV (TBD for Medford) */
+/* enum: Edge DFE DLEV (0-128 for Medford) */
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV  0x7
+/* enum: Variable Gain Amplifier (0-15, Medford) */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA  0x8
+/* enum: CTLE EQ Capacitor (0-15, Medford) */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+/* enum: CTLE EQ Resistor (0-7, Medford) */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 3
 #define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
@@ -8885,26 +9714,32 @@
 #define       MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_MAXNUM 63
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
-/* enum: TX Amplitude */
+/* enum: TX Amplitude (Huntington, Medford) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV  0x0
-/* enum: De-Emphasis Tap1 Magnitude (0-7) */
+/* enum: De-Emphasis Tap1 Magnitude (0-7) (Huntington) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE  0x1
 /* enum: De-Emphasis Tap1 Fine */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV  0x2
-/* enum: De-Emphasis Tap2 Magnitude (0-6) */
+/* enum: De-Emphasis Tap2 Magnitude (0-6) (Huntington) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2  0x3
-/* enum: De-Emphasis Tap2 Fine */
+/* enum: De-Emphasis Tap2 Fine (Huntington) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV  0x4
-/* enum: Pre-Emphasis Magnitude */
+/* enum: Pre-Emphasis Magnitude (Huntington) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E  0x5
-/* enum: Pre-Emphasis Fine */
+/* enum: Pre-Emphasis Fine (Huntington) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV  0x6
-/* enum: TX Slew Rate Coarse control */
+/* enum: TX Slew Rate Coarse control (Huntington) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY  0x7
-/* enum: TX Slew Rate Fine control */
+/* enum: TX Slew Rate Fine control (Huntington) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET  0x8
-/* enum: TX Termination Impedance control */
+/* enum: TX Termination Impedance control (Huntington) */
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET  0x9
+/* enum: TX Amplitude Fine control (Medford) */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE  0xa
+/* enum: Pre-shoot Tap (Medford) */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV  0xb
+/* enum: De-emphasis Tap (Medford) */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY  0xc
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 3
 #define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0  0x0 /* enum */
@@ -9086,8 +9921,16 @@
 #define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x5
 /* enum: DFE Tap5 (0 - max negative, 32 - zero, 63 - max positive) */
 #define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x6
+/* enum: DFE DLev */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV  0x7
+/* enum: Figure of Merit */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM  0x8
+/* enum: CTLE EQ Capacitor (HF Gain) */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+/* enum: CTLE EQ Resistor (DC Gain) */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
-#define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 4
+#define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 5
 #define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
 #define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
 #define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
@@ -9096,12 +9939,57 @@
 #define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5  0x5 /* enum */
 #define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6  0x6 /* enum */
 #define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7  0x7 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL  0x8 /* enum */
-#define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_RESERVED_LBN 12
-#define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_RESERVED_WIDTH 12
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8  0x8 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9  0x9 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10  0xa /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11  0xb /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12  0xc /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13  0xd /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14  0xe /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15  0xf /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL  0x10 /* enum */
+#define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 13
+#define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
+#define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_RESERVED_LBN 14
+#define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_RESERVED_WIDTH 10
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_CURRENT_LBN 24
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_CURRENT_WIDTH 8
 
+/* MC_CMD_PCIE_TUNE_RXEQ_SET_IN msgrequest */
+#define    MC_CMD_PCIE_TUNE_RXEQ_SET_IN_LENMIN 8
+#define    MC_CMD_PCIE_TUNE_RXEQ_SET_IN_LENMAX 252
+#define    MC_CMD_PCIE_TUNE_RXEQ_SET_IN_LEN(num) (4+4*(num))
+/* Requested operation */
+#define       MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PCIE_TUNE_OP_OFST 0
+#define       MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PCIE_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PCIE_TUNE_RSVD_OFST 1
+#define       MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PCIE_TUNE_RSVD_LEN 3
+/* RXEQ Parameter */
+#define       MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_OFST 4
+#define       MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_LEN 4
+#define       MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_MINNUM 1
+#define       MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_MAXNUM 62
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_ID_LBN 0
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_ID_WIDTH 8
+/*             Enum values, see field(s): */
+/*                MC_CMD_PCIE_TUNE_RXEQ_GET_OUT/PARAM_ID */
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_LANE_LBN 8
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_LANE_WIDTH 5
+/*             Enum values, see field(s): */
+/*                MC_CMD_PCIE_TUNE_RXEQ_GET_OUT/PARAM_LANE */
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_AUTOCAL_LBN 13
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_AUTOCAL_WIDTH 1
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_RESERVED_LBN 14
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_RESERVED_WIDTH 2
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_INITIAL_LBN 16
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_PARAM_INITIAL_WIDTH 8
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_RESERVED2_LBN 24
+#define        MC_CMD_PCIE_TUNE_RXEQ_SET_IN_RESERVED2_WIDTH 8
+
+/* MC_CMD_PCIE_TUNE_RXEQ_SET_OUT msgresponse */
+#define    MC_CMD_PCIE_TUNE_RXEQ_SET_OUT_LEN 0
+
 /* MC_CMD_PCIE_TUNE_TXEQ_GET_IN msgrequest */
 #define    MC_CMD_PCIE_TUNE_TXEQ_GET_IN_LEN 4
 /* Requested operation */
@@ -9176,6 +10064,7 @@
 /***********************************/
 /* MC_CMD_LICENSING
  * Operations on the NVRAM_PARTITION_TYPE_LICENSE application license partition
+ * - not used for V3 licensing
  */
 #define MC_CMD_LICENSING 0xf3
 
@@ -9219,6 +10108,93 @@
 #define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS  0x1
 
 
+/***********************************/
+/* MC_CMD_LICENSING_V3
+ * Operations on the NVRAM_PARTITION_TYPE_LICENSE application license partition
+ * - V3 licensing (Medford)
+ */
+#define MC_CMD_LICENSING_V3 0xd0
+
+#define MC_CMD_0xd0_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_LICENSING_V3_IN msgrequest */
+#define    MC_CMD_LICENSING_V3_IN_LEN 4
+/* identifies the type of operation requested */
+#define       MC_CMD_LICENSING_V3_IN_OP_OFST 0
+/* enum: re-read and apply licenses after a license key partition update; note
+ * that this operation returns a zero-length response
+ */
+#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE  0x0
+/* enum: report counts of installed licenses */
+#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE  0x1
+
+/* MC_CMD_LICENSING_V3_OUT msgresponse */
+#define    MC_CMD_LICENSING_V3_OUT_LEN 88
+/* count of keys which are valid */
+#define       MC_CMD_LICENSING_V3_OUT_VALID_KEYS_OFST 0
+/* sum of UNVERIFIABLE_KEYS + WRONG_NODE_KEYS (for compatibility with
+ * MC_CMD_FC_OP_LICENSE)
+ */
+#define       MC_CMD_LICENSING_V3_OUT_INVALID_KEYS_OFST 4
+/* count of keys which are invalid due to being unverifiable */
+#define       MC_CMD_LICENSING_V3_OUT_UNVERIFIABLE_KEYS_OFST 8
+/* count of keys which are invalid due to being for the wrong node */
+#define       MC_CMD_LICENSING_V3_OUT_WRONG_NODE_KEYS_OFST 12
+/* licensing state (for diagnostics; the exact meaning of the bits in this
+ * field are private to the firmware)
+ */
+#define       MC_CMD_LICENSING_V3_OUT_LICENSING_STATE_OFST 16
+/* licensing subsystem self-test report (for manftest) */
+#define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_OFST 20
+/* enum: licensing subsystem self-test failed */
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL  0x0
+/* enum: licensing subsystem self-test passed */
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS  0x1
+/* bitmask of licensed applications */
+#define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_OFST 24
+#define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_LEN 8
+#define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_LO_OFST 24
+#define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_HI_OFST 28
+/* reserved for future use */
+#define       MC_CMD_LICENSING_V3_OUT_RESERVED_0_OFST 32
+#define       MC_CMD_LICENSING_V3_OUT_RESERVED_0_LEN 24
+/* bitmask of licensed features */
+#define       MC_CMD_LICENSING_V3_OUT_LICENSED_FEATURES_OFST 56
+#define       MC_CMD_LICENSING_V3_OUT_LICENSED_FEATURES_LEN 8
+#define       MC_CMD_LICENSING_V3_OUT_LICENSED_FEATURES_LO_OFST 56
+#define       MC_CMD_LICENSING_V3_OUT_LICENSED_FEATURES_HI_OFST 60
+/* reserved for future use */
+#define       MC_CMD_LICENSING_V3_OUT_RESERVED_1_OFST 64
+#define       MC_CMD_LICENSING_V3_OUT_RESERVED_1_LEN 24
+
+
+/***********************************/
+/* MC_CMD_LICENSING_GET_ID_V3
+ * Get ID and type from the NVRAM_PARTITION_TYPE_LICENSE application license
+ * partition - V3 licensing (Medford)
+ */
+#define MC_CMD_LICENSING_GET_ID_V3 0xd1
+
+#define MC_CMD_0xd1_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_LICENSING_GET_ID_V3_IN msgrequest */
+#define    MC_CMD_LICENSING_GET_ID_V3_IN_LEN 0
+
+/* MC_CMD_LICENSING_GET_ID_V3_OUT msgresponse */
+#define    MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN 8
+#define    MC_CMD_LICENSING_GET_ID_V3_OUT_LENMAX 252
+#define    MC_CMD_LICENSING_GET_ID_V3_OUT_LEN(num) (8+1*(num))
+/* type of license (eg 3) */
+#define       MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_TYPE_OFST 0
+/* length of the license ID (in bytes) */
+#define       MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_LENGTH_OFST 4
+/* the unique license ID of the adapter */
+#define       MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_OFST 8
+#define       MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_LEN 1
+#define       MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_MINNUM 0
+#define       MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_MAXNUM 244
+
+
 /***********************************/
 /* MC_CMD_MC2MC_PROXY
  * Execute an arbitrary MCDI command on the slave MC of a dual-core device.
@@ -9239,7 +10215,7 @@
 /* MC_CMD_GET_LICENSED_APP_STATE
  * Query the state of an individual licensed application. (Note that the actual
  * state may be invalidated by the MC_CMD_LICENSING OP_UPDATE_LICENSE operation
- * or a reboot of the MC.)
+ * or a reboot of the MC.) Not used for V3 licensing
  */
 #define MC_CMD_GET_LICENSED_APP_STATE 0xf5
 
@@ -9260,9 +10236,69 @@
 #define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED  0x1
 
 
+/***********************************/
+/* MC_CMD_GET_LICENSED_V3_APP_STATE
+ * Query the state of an individual licensed application. (Note that the actual
+ * state may be invalidated by the MC_CMD_LICENSING_V3 OP_UPDATE_LICENSE
+ * operation or a reboot of the MC.) Used for V3 licensing (Medford)
+ */
+#define MC_CMD_GET_LICENSED_V3_APP_STATE 0xd2
+
+#define MC_CMD_0xd2_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_GET_LICENSED_V3_APP_STATE_IN msgrequest */
+#define    MC_CMD_GET_LICENSED_V3_APP_STATE_IN_LEN 8
+/* application ID to query (LICENSED_V3_APPS_xxx) expressed as a single bit
+ * mask
+ */
+#define       MC_CMD_GET_LICENSED_V3_APP_STATE_IN_APP_ID_OFST 0
+#define       MC_CMD_GET_LICENSED_V3_APP_STATE_IN_APP_ID_LEN 8
+#define       MC_CMD_GET_LICENSED_V3_APP_STATE_IN_APP_ID_LO_OFST 0
+#define       MC_CMD_GET_LICENSED_V3_APP_STATE_IN_APP_ID_HI_OFST 4
+
+/* MC_CMD_GET_LICENSED_V3_APP_STATE_OUT msgresponse */
+#define    MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LEN 4
+/* state of this application */
+#define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_OFST 0
+/* enum: no (or invalid) license is present for the application */
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED  0x0
+/* enum: a valid license is present for the application */
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED  0x1
+
+
+/***********************************/
+/* MC_CMD_GET_LICENSED_V3_FEATURE_STATES
+ * Query the state of an one or more licensed features. (Note that the actual
+ * state may be invalidated by the MC_CMD_LICENSING_V3 OP_UPDATE_LICENSE
+ * operation or a reboot of the MC.) Used for V3 licensing (Medford)
+ */
+#define MC_CMD_GET_LICENSED_V3_FEATURE_STATES 0xd3
+
+#define MC_CMD_0xd3_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_GET_LICENSED_V3_FEATURE_STATES_IN msgrequest */
+#define    MC_CMD_GET_LICENSED_V3_FEATURE_STATES_IN_LEN 8
+/* features to query (LICENSED_V3_FEATURES_xxx) expressed as a mask with one or
+ * more bits set
+ */
+#define       MC_CMD_GET_LICENSED_V3_FEATURE_STATES_IN_FEATURES_OFST 0
+#define       MC_CMD_GET_LICENSED_V3_FEATURE_STATES_IN_FEATURES_LEN 8
+#define       MC_CMD_GET_LICENSED_V3_FEATURE_STATES_IN_FEATURES_LO_OFST 0
+#define       MC_CMD_GET_LICENSED_V3_FEATURE_STATES_IN_FEATURES_HI_OFST 4
+
+/* MC_CMD_GET_LICENSED_V3_FEATURE_STATES_OUT msgresponse */
+#define    MC_CMD_GET_LICENSED_V3_FEATURE_STATES_OUT_LEN 8
+/* states of these features - bit set for licensed, clear for not licensed */
+#define       MC_CMD_GET_LICENSED_V3_FEATURE_STATES_OUT_STATES_OFST 0
+#define       MC_CMD_GET_LICENSED_V3_FEATURE_STATES_OUT_STATES_LEN 8
+#define       MC_CMD_GET_LICENSED_V3_FEATURE_STATES_OUT_STATES_LO_OFST 0
+#define       MC_CMD_GET_LICENSED_V3_FEATURE_STATES_OUT_STATES_HI_OFST 4
+
+
 /***********************************/
 /* MC_CMD_LICENSED_APP_OP
- * Perform an action for an individual licensed application.
+ * Perform an action for an individual licensed application - not used for V3
+ * licensing.
  */
 #define MC_CMD_LICENSED_APP_OP 0xf6
 
@@ -9327,6 +10363,67 @@
 #define    MC_CMD_LICENSED_APP_OP_MASK_OUT_LEN 0
 
 
+/***********************************/
+/* MC_CMD_LICENSED_V3_VALIDATE_APP
+ * Perform validation for an individual licensed application - V3 licensing
+ * (Medford)
+ */
+#define MC_CMD_LICENSED_V3_VALIDATE_APP 0xd4
+
+#define MC_CMD_0xd4_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_LICENSED_V3_VALIDATE_APP_IN msgrequest */
+#define    MC_CMD_LICENSED_V3_VALIDATE_APP_IN_LEN 72
+/* application ID expressed as a single bit mask */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LEN 8
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LO_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_HI_OFST 4
+/* challenge for validation */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_OFST 8
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_LEN 64
+
+/* MC_CMD_LICENSED_V3_VALIDATE_APP_OUT msgresponse */
+#define    MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_LEN 72
+/* application expiry time */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_TIME_OFST 0
+/* application expiry units */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 4
+/* enum: expiry units are accounting units */
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC  0x0
+/* enum: expiry units are calendar days */
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS  0x1
+/* validation response to challenge */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_OFST 8
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_LEN 64
+
+
+/***********************************/
+/* MC_CMD_LICENSED_V3_MASK_FEATURES
+ * Mask features - V3 licensing (Medford)
+ */
+#define MC_CMD_LICENSED_V3_MASK_FEATURES 0xd5
+
+#define MC_CMD_0xd5_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_LICENSED_V3_MASK_FEATURES_IN msgrequest */
+#define    MC_CMD_LICENSED_V3_MASK_FEATURES_IN_LEN 12
+/* mask to be applied to features to be changed */
+#define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_MASK_OFST 0
+#define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_MASK_LEN 8
+#define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_MASK_LO_OFST 0
+#define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_MASK_HI_OFST 4
+/* whether to turn on or turn off the masked features */
+#define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_OFST 8
+/* enum: turn the features off */
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF  0x0
+/* enum: turn the features back on */
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON  0x1
+
+/* MC_CMD_LICENSED_V3_MASK_FEATURES_OUT msgresponse */
+#define    MC_CMD_LICENSED_V3_MASK_FEATURES_OUT_LEN 0
+
+
 /***********************************/
 /* MC_CMD_SET_PORT_SNIFF_CONFIG
  * Configure RX port sniffing for the physical port associated with the calling
@@ -9696,12 +10793,27 @@
 #define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD            0x4 /* enum */
 #define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP               0x8 /* enum */
 #define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS  0x10 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING      0x20 /* enum */
+/* enum: Deprecated. Equivalent to MAC_SPOOFING_TX combined with CHANGE_MAC. */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING      0x20
 #define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST           0x40 /* enum */
 #define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST         0x80 /* enum */
 #define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST         0x100 /* enum */
 #define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST     0x200 /* enum */
 #define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS       0x400 /* enum */
+/* enum: Allows to set the TX packets' source MAC address to any arbitrary MAC
+ * adress.
+ */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX   0x800
+/* enum: Privilege that allows a Function to change the MAC address configured
+ * in its associated vAdapter/vPort.
+ */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC        0x1000
+/* enum: Privilege that allows a Function to install filters that specify VLANs
+ * that are not in the permit list for the associated vPort. This privilege is
+ * primarily to support ESX where vPorts are created that restrict traffic to
+ * only a set of permitted VLANs. See the vPort flag FLAG_VLAN_RESTRICT.
+ */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN  0x2000
 /* enum: Set this bit to indicate that a new privilege mask is to be set,
  * otherwise the command will only read the existing mask.
  */
@@ -9951,7 +11063,7 @@
 /* Sector type */
 #define       MC_CMD_XPM_WRITE_SECTOR_IN_TYPE_OFST 4
 /*            Enum values, see field(s): */
-/*               MC_CMD_XPM_READ_SECTOR_OUT/TYPE */
+/*               MC_CMD_XPM_READ_SECTOR/MC_CMD_XPM_READ_SECTOR_OUT/TYPE */
 /* Sector size */
 #define       MC_CMD_XPM_WRITE_SECTOR_IN_SIZE_OFST 8
 /* Sector data */
@@ -10067,4 +11179,123 @@
 #define    MC_CMD_XPM_WRITE_TEST_OUT_LEN 0
 
 
+/***********************************/
+/* MC_CMD_EXEC_SIGNED
+ * Check the CMAC of the contents of IMEM and DMEM against the value supplied
+ * and if correct begin execution from the start of IMEM. The caller supplies a
+ * key ID, the length of IMEM and DMEM to validate and the expected CMAC. CMAC
+ * computation runs from the start of IMEM, and from the start of DMEM + 16k,
+ * to match flash booting. The command will respond with EINVAL if the CMAC
+ * does match, otherwise it will respond with success before it jumps to IMEM.
+ */
+#define MC_CMD_EXEC_SIGNED 0x10c
+
+#define MC_CMD_0x10c_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_EXEC_SIGNED_IN msgrequest */
+#define    MC_CMD_EXEC_SIGNED_IN_LEN 28
+/* the length of code to include in the CMAC */
+#define       MC_CMD_EXEC_SIGNED_IN_CODELEN_OFST 0
+/* the length of date to include in the CMAC */
+#define       MC_CMD_EXEC_SIGNED_IN_DATALEN_OFST 4
+/* the XPM sector containing the key to use */
+#define       MC_CMD_EXEC_SIGNED_IN_KEYSECTOR_OFST 8
+/* the expected CMAC value */
+#define       MC_CMD_EXEC_SIGNED_IN_CMAC_OFST 12
+#define       MC_CMD_EXEC_SIGNED_IN_CMAC_LEN 16
+
+/* MC_CMD_EXEC_SIGNED_OUT msgresponse */
+#define    MC_CMD_EXEC_SIGNED_OUT_LEN 0
+
+
+/***********************************/
+/* MC_CMD_PREPARE_SIGNED
+ * Prepare to upload a signed image. This will scrub the specified length of
+ * the data region, which must be at least as large as the DATALEN supplied to
+ * MC_CMD_EXEC_SIGNED.
+ */
+#define MC_CMD_PREPARE_SIGNED 0x10d
+
+#define MC_CMD_0x10d_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_PREPARE_SIGNED_IN msgrequest */
+#define    MC_CMD_PREPARE_SIGNED_IN_LEN 4
+/* the length of data area to clear */
+#define       MC_CMD_PREPARE_SIGNED_IN_DATALEN_OFST 0
+
+/* MC_CMD_PREPARE_SIGNED_OUT msgresponse */
+#define    MC_CMD_PREPARE_SIGNED_OUT_LEN 0
+
+
+/***********************************/
+/* MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS
+ * Configure UDP ports for tunnel encapsulation hardware acceleration. The
+ * parser-dispatcher will attempt to parse traffic on these ports as tunnel
+ * encapsulation PDUs and filter them using the tunnel encapsulation filter
+ * chain rather than the standard filter chain. Note that this command can
+ * cause all functions to see a reset. (Available on Medford only.)
+ */
+#define MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS 0x117
+
+#define MC_CMD_0x117_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN msgrequest */
+#define    MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LENMIN 4
+#define    MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LENMAX 68
+#define    MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LEN(num) (4+4*(num))
+/* Flags */
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_FLAGS_OFST 0
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_FLAGS_LEN 2
+#define        MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_UNLOADING_LBN 0
+#define        MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_UNLOADING_WIDTH 1
+/* The number of entries in the ENTRIES array */
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_NUM_ENTRIES_OFST 2
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_NUM_ENTRIES_LEN 2
+/* Entries defining the UDP port to protocol mapping, each laid out as a
+ * TUNNEL_ENCAP_UDP_PORT_ENTRY
+ */
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES_OFST 4
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES_LEN 4
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES_MINNUM 0
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES_MAXNUM 16
+
+/* MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT msgresponse */
+#define    MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_LEN 2
+/* Flags */
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_FLAGS_OFST 0
+#define       MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_FLAGS_LEN 2
+#define        MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_RESETTING_LBN 0
+#define        MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_RESETTING_WIDTH 1
+
+
+/***********************************/
+/* MC_CMD_RX_BALANCING
+ * Configure a port upconverter to distribute the packets on both RX engines.
+ * Packets are distributed based on a table with the destination vFIFO. The
+ * index of the table is a hash of source and destination of IPV4 and VLAN
+ * priority.
+ */
+#define MC_CMD_RX_BALANCING 0x118
+
+#define MC_CMD_0x118_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_RX_BALANCING_IN msgrequest */
+#define    MC_CMD_RX_BALANCING_IN_LEN 4
+/* The RX port whose upconverter table will be modified */
+#define       MC_CMD_RX_BALANCING_IN_PORT_OFST 0
+#define       MC_CMD_RX_BALANCING_IN_PORT_LEN 1
+/* The VLAN priority associated to the table index and vFIFO */
+#define       MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 1
+#define       MC_CMD_RX_BALANCING_IN_PRIORITY_LEN 1
+/* The resulting bit of SRC^DST for indexing the table */
+#define       MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 2
+#define       MC_CMD_RX_BALANCING_IN_SRC_DST_LEN 1
+/* The RX engine to which the vFIFO in the table entry will point to */
+#define       MC_CMD_RX_BALANCING_IN_ENG_OFST 3
+#define       MC_CMD_RX_BALANCING_IN_ENG_LEN 1
+
+/* MC_CMD_RX_BALANCING_OUT msgresponse */
+#define    MC_CMD_RX_BALANCING_OUT_LEN 0
+
+
 #endif /* MCDI_PCOL_H */
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index d13ddf9703ff..9ff062a36ea8 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -868,6 +868,7 @@ struct vfdi_status;
  *	be held to modify it.
  * @port_initialized: Port initialized?
  * @net_dev: Operating system network device. Consider holding the rtnl lock
+ * @fixed_features: Features which cannot be turned off
  * @stats_buffer: DMA buffer for statistics
  * @phy_type: PHY type
  * @phy_op: PHY interface
@@ -916,7 +917,6 @@ struct vfdi_status;
  * @stats_lock: Statistics update lock. Must be held when calling
  *	efx_nic_type::{update,start,stop}_stats.
  * @n_rx_noskb_drops: Count of RX packets dropped due to failure to allocate an skb
- * @mc_promisc: Whether in multicast promiscuous mode when last changed
  *
  * This is stored in the private area of the &struct net_device.
  */
@@ -1008,6 +1008,8 @@ struct efx_nic {
 	bool port_initialized;
 	struct net_device *net_dev;
 
+	netdev_features_t fixed_features;
+
 	struct efx_buffer stats_buffer;
 	u64 rx_nodesc_drops_total;
 	u64 rx_nodesc_drops_while_down;
@@ -1065,7 +1067,6 @@ struct efx_nic {
 	int last_irq_cpu;
 	spinlock_t stats_lock;
 	atomic_t n_rx_noskb_drops;
-	bool mc_promisc;
 };
 
 static inline int efx_dev_registered(struct efx_nic *efx)
@@ -1333,6 +1334,8 @@ struct efx_nic_type {
 	int (*ptp_set_ts_config)(struct efx_nic *efx,
 				 struct hwtstamp_config *init);
 	int (*sriov_configure)(struct efx_nic *efx, int num_vfs);
+	int (*vlan_rx_add_vid)(struct efx_nic *efx, __be16 proto, u16 vid);
+	int (*vlan_rx_kill_vid)(struct efx_nic *efx, __be16 proto, u16 vid);
 	int (*sriov_init)(struct efx_nic *efx);
 	void (*sriov_fini)(struct efx_nic *efx);
 	bool (*sriov_wanted)(struct efx_nic *efx);
@@ -1521,4 +1524,16 @@ static inline void efx_xmit_hwtstamp_pending(struct sk_buff *skb)
 	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 }
 
+/* Get all supported features.
+ * If a feature is not fixed, it is present in hw_features.
+ * If a feature is fixed, it does not present in hw_features, but
+ * always in features.
+ */
+static inline netdev_features_t efx_supported_features(const struct efx_nic *efx)
+{
+	const struct net_device *net_dev = efx->net_dev;
+
+	return net_dev->features | net_dev->hw_features;
+}
+
 #endif /* EFX_NET_DRIVER_H */
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 0b536e27d3b2..96944c3c9d14 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -519,6 +519,9 @@ enum {
 #ifdef CONFIG_SFC_SRIOV
  * @vf: Pointer to VF data structure
 #endif
+ * @vport_mac: The MAC address on the vport, only for PFs; VFs will be zero
+ * @vlan_list: List of VLANs added over the interface. Serialised by vlan_lock.
+ * @vlan_lock: Lock to serialize access to vlan_list.
  */
 struct efx_ef10_nic_data {
 	struct efx_buffer mcdi_buf;
@@ -550,6 +553,8 @@ struct efx_ef10_nic_data {
 	struct ef10_vf *vf;
 #endif
 	u8 vport_mac[ETH_ALEN];
+	struct list_head vlan_list;
+	struct mutex vlan_lock;
 };
 
 int efx_init_sriov(void);
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 18ac52ded696..726b80f45906 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2195,6 +2195,12 @@ static void smc_release_datacs(struct platform_device *pdev, struct net_device *
 	}
 }
 
+static const struct acpi_device_id smc91x_acpi_match[] = {
+	{ "LNRO0003", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, smc91x_acpi_match);
+
 #if IS_BUILTIN(CONFIG_OF)
 static const struct of_device_id smc91x_match[] = {
 	{ .compatible = "smsc,lan91c94", },
@@ -2274,7 +2280,6 @@ static int smc_drv_probe(struct platform_device *pdev)
 #if IS_BUILTIN(CONFIG_OF)
 	match = of_match_device(of_match_ptr(smc91x_match), &pdev->dev);
 	if (match) {
-		struct device_node *np = pdev->dev.of_node;
 		u32 val;
 
 		/* Optional pwrdwn GPIO configured? */
@@ -2300,7 +2305,8 @@ static int smc_drv_probe(struct platform_device *pdev)
 			usleep_range(750, 1000);
 
 		/* Combination of IO widths supported, default to 16-bit */
-		if (!of_property_read_u32(np, "reg-io-width", &val)) {
+		if (!device_property_read_u32(&pdev->dev, "reg-io-width",
+					      &val)) {
 			if (val & 1)
 				lp->cfg.flags |= SMC91X_USE_8BIT;
 			if ((val == 0) || (val & 2))
@@ -2478,7 +2484,8 @@ static struct platform_driver smc_driver = {
 	.driver		= {
 		.name	= CARDNAME,
 		.pm	= &smc_drv_pm_ops,
-		.of_match_table = of_match_ptr(smc91x_match),
+		.of_match_table   = of_match_ptr(smc91x_match),
+		.acpi_match_table = smc91x_acpi_match,
 	},
 };
 
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 8af25563f627..ca3134540d2d 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -114,9 +114,7 @@ struct smsc911x_data {
 	/* spinlock to ensure register accesses are serialised */
 	spinlock_t dev_lock;
 
-	struct phy_device *phy_dev;
 	struct mii_bus *mii_bus;
-	int phy_irq[PHY_MAX_ADDR];
 	unsigned int using_extphy;
 	int last_duplex;
 	int last_carrier;
@@ -834,7 +832,7 @@ static int smsc911x_phy_reset(struct smsc911x_data *pdata)
 static int smsc911x_phy_loopbacktest(struct net_device *dev)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
-	struct phy_device *phy_dev = pdata->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	int result = -EIO;
 	unsigned int i, val;
 	unsigned long flags;
@@ -904,7 +902,8 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev)
 
 static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata)
 {
-	struct phy_device *phy_dev = pdata->phy_dev;
+	struct net_device *ndev = pdata->dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	u32 afc = smsc911x_reg_read(pdata, AFC_CFG);
 	u32 flow;
 	unsigned long flags;
@@ -945,7 +944,7 @@ static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata)
 static void smsc911x_phy_adjust_link(struct net_device *dev)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
-	struct phy_device *phy_dev = pdata->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	unsigned long flags;
 	int carrier;
 
@@ -1038,7 +1037,6 @@ static int smsc911x_mii_probe(struct net_device *dev)
 			      SUPPORTED_Asym_Pause);
 	phydev->advertising = phydev->supported;
 
-	pdata->phy_dev = phydev;
 	pdata->last_duplex = -1;
 	pdata->last_carrier = -1;
 
@@ -1073,7 +1071,6 @@ static int smsc911x_mii_init(struct platform_device *pdev,
 	pdata->mii_bus->priv = pdata;
 	pdata->mii_bus->read = smsc911x_mii_read;
 	pdata->mii_bus->write = smsc911x_mii_write;
-	memcpy(pdata->mii_bus->irq, pdata->phy_irq, sizeof(pdata->mii_bus));
 
 	pdata->mii_bus->parent = &pdev->dev;
 
@@ -1340,9 +1337,11 @@ static void smsc911x_rx_multicast_update_workaround(struct smsc911x_data *pdata)
 
 static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata)
 {
+	struct net_device *ndev = pdata->dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int rc = 0;
 
-	if (!pdata->phy_dev)
+	if (!phy_dev)
 		return rc;
 
 	/* If the internal PHY is in General Power-Down mode, all, except the
@@ -1352,7 +1351,7 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata)
 	 * In that case, clear the bit 0.11, so the PHY powers up and we can
 	 * access to the phy registers.
 	 */
-	rc = phy_read(pdata->phy_dev, MII_BMCR);
+	rc = phy_read(phy_dev, MII_BMCR);
 	if (rc < 0) {
 		SMSC_WARN(pdata, drv, "Failed reading PHY control reg");
 		return rc;
@@ -1362,7 +1361,7 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata)
 	 * disable the general power down-mode.
 	 */
 	if (rc & BMCR_PDOWN) {
-		rc = phy_write(pdata->phy_dev, MII_BMCR, rc & ~BMCR_PDOWN);
+		rc = phy_write(phy_dev, MII_BMCR, rc & ~BMCR_PDOWN);
 		if (rc < 0) {
 			SMSC_WARN(pdata, drv, "Failed writing PHY control reg");
 			return rc;
@@ -1376,12 +1375,14 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata)
 
 static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata)
 {
+	struct net_device *ndev = pdata->dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int rc = 0;
 
-	if (!pdata->phy_dev)
+	if (!phy_dev)
 		return rc;
 
-	rc = phy_read(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS);
+	rc = phy_read(phy_dev, MII_LAN83C185_CTRL_STATUS);
 
 	if (rc < 0) {
 		SMSC_WARN(pdata, drv, "Failed reading PHY control reg");
@@ -1391,7 +1392,7 @@ static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata)
 	/* Only disable if energy detect mode is already enabled */
 	if (rc & MII_LAN83C185_EDPWRDOWN) {
 		/* Disable energy detect mode for this SMSC Transceivers */
-		rc = phy_write(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS,
+		rc = phy_write(phy_dev, MII_LAN83C185_CTRL_STATUS,
 			       rc & (~MII_LAN83C185_EDPWRDOWN));
 
 		if (rc < 0) {
@@ -1407,12 +1408,14 @@ static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata)
 
 static int smsc911x_phy_enable_energy_detect(struct smsc911x_data *pdata)
 {
+	struct net_device *ndev = pdata->dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int rc = 0;
 
-	if (!pdata->phy_dev)
+	if (!phy_dev)
 		return rc;
 
-	rc = phy_read(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS);
+	rc = phy_read(phy_dev, MII_LAN83C185_CTRL_STATUS);
 
 	if (rc < 0) {
 		SMSC_WARN(pdata, drv, "Failed reading PHY control reg");
@@ -1422,7 +1425,7 @@ static int smsc911x_phy_enable_energy_detect(struct smsc911x_data *pdata)
 	/* Only enable if energy detect mode is already disabled */
 	if (!(rc & MII_LAN83C185_EDPWRDOWN)) {
 		/* Enable energy detect mode for this SMSC Transceivers */
-		rc = phy_write(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS,
+		rc = phy_write(phy_dev, MII_LAN83C185_CTRL_STATUS,
 			       rc | MII_LAN83C185_EDPWRDOWN);
 
 		if (rc < 0) {
@@ -1519,7 +1522,7 @@ static int smsc911x_open(struct net_device *dev)
 	unsigned int intcfg;
 
 	/* if the phy is not yet registered, retry later*/
-	if (!pdata->phy_dev) {
+	if (!dev->phydev) {
 		SMSC_WARN(pdata, hw, "phy_dev is NULL");
 		return -EAGAIN;
 	}
@@ -1610,7 +1613,7 @@ static int smsc911x_open(struct net_device *dev)
 	pdata->last_carrier = -1;
 
 	/* Bring the PHY up */
-	phy_start(pdata->phy_dev);
+	phy_start(dev->phydev);
 
 	temp = smsc911x_reg_read(pdata, HW_CFG);
 	/* Preserve TX FIFO size and external PHY configuration */
@@ -1665,8 +1668,8 @@ static int smsc911x_stop(struct net_device *dev)
 	smsc911x_tx_update_txcounters(dev);
 
 	/* Bring the PHY down */
-	if (pdata->phy_dev)
-		phy_stop(pdata->phy_dev);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 
 	SMSC_TRACE(pdata, ifdown, "Interface stopped");
 	return 0;
@@ -1906,30 +1909,10 @@ static int smsc911x_set_mac_address(struct net_device *dev, void *p)
 /* Standard ioctls for mii-tool */
 static int smsc911x_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct smsc911x_data *pdata = netdev_priv(dev);
-
-	if (!netif_running(dev) || !pdata->phy_dev)
+	if (!netif_running(dev) || !dev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(pdata->phy_dev, ifr, cmd);
-}
-
-static int
-smsc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct smsc911x_data *pdata = netdev_priv(dev);
-
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
-	return phy_ethtool_gset(pdata->phy_dev, cmd);
-}
-
-static int
-smsc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct smsc911x_data *pdata = netdev_priv(dev);
-
-	return phy_ethtool_sset(pdata->phy_dev, cmd);
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 static void smsc911x_ethtool_getdrvinfo(struct net_device *dev,
@@ -1943,9 +1926,7 @@ static void smsc911x_ethtool_getdrvinfo(struct net_device *dev,
 
 static int smsc911x_ethtool_nwayreset(struct net_device *dev)
 {
-	struct smsc911x_data *pdata = netdev_priv(dev);
-
-	return phy_start_aneg(pdata->phy_dev);
+	return phy_start_aneg(dev->phydev);
 }
 
 static u32 smsc911x_ethtool_getmsglevel(struct net_device *dev)
@@ -1971,7 +1952,7 @@ smsc911x_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs,
 			 void *buf)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
-	struct phy_device *phy_dev = pdata->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	unsigned long flags;
 	unsigned int i;
 	unsigned int j = 0;
@@ -2117,8 +2098,6 @@ static int smsc911x_ethtool_set_eeprom(struct net_device *dev,
 }
 
 static const struct ethtool_ops smsc911x_ethtool_ops = {
-	.get_settings = smsc911x_ethtool_getsettings,
-	.set_settings = smsc911x_ethtool_setsettings,
 	.get_link = ethtool_op_get_link,
 	.get_drvinfo = smsc911x_ethtool_getdrvinfo,
 	.nway_reset = smsc911x_ethtool_nwayreset,
@@ -2130,6 +2109,8 @@ static const struct ethtool_ops smsc911x_ethtool_ops = {
 	.get_eeprom = smsc911x_ethtool_get_eeprom,
 	.set_eeprom = smsc911x_ethtool_set_eeprom,
 	.get_ts_info = ethtool_op_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops smsc911x_netdev_ops = {
@@ -2310,12 +2291,11 @@ static int smsc911x_drv_remove(struct platform_device *pdev)
 	pdata = netdev_priv(dev);
 	BUG_ON(!pdata);
 	BUG_ON(!pdata->ioaddr);
-	BUG_ON(!pdata->phy_dev);
+	BUG_ON(!dev->phydev);
 
 	SMSC_TRACE(pdata, ifdown, "Stopping driver");
 
-	phy_disconnect(pdata->phy_dev);
-	pdata->phy_dev = NULL;
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(pdata->mii_bus);
 	mdiobus_free(pdata->mii_bus);
 
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 8594b9e8b28b..b7bfed4bc96b 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -76,7 +76,6 @@ struct smsc9420_pdata {
 	bool rx_csum;
 	u32 msg_enable;
 
-	struct phy_device *phy_dev;
 	struct mii_bus *mii_bus;
 	int last_duplex;
 	int last_carrier;
@@ -226,36 +225,10 @@ static int smsc9420_eeprom_reload(struct smsc9420_pdata *pd)
 /* Standard ioctls for mii-tool */
 static int smsc9420_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct smsc9420_pdata *pd = netdev_priv(dev);
-
-	if (!netif_running(dev) || !pd->phy_dev)
+	if (!netif_running(dev) || !dev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(pd->phy_dev, ifr, cmd);
-}
-
-static int smsc9420_ethtool_get_settings(struct net_device *dev,
-					 struct ethtool_cmd *cmd)
-{
-	struct smsc9420_pdata *pd = netdev_priv(dev);
-
-	if (!pd->phy_dev)
-		return -ENODEV;
-
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
-	return phy_ethtool_gset(pd->phy_dev, cmd);
-}
-
-static int smsc9420_ethtool_set_settings(struct net_device *dev,
-					 struct ethtool_cmd *cmd)
-{
-	struct smsc9420_pdata *pd = netdev_priv(dev);
-
-	if (!pd->phy_dev)
-		return -ENODEV;
-
-	return phy_ethtool_sset(pd->phy_dev, cmd);
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 static void smsc9420_ethtool_get_drvinfo(struct net_device *netdev,
@@ -283,12 +256,10 @@ static void smsc9420_ethtool_set_msglevel(struct net_device *netdev, u32 data)
 
 static int smsc9420_ethtool_nway_reset(struct net_device *netdev)
 {
-	struct smsc9420_pdata *pd = netdev_priv(netdev);
-
-	if (!pd->phy_dev)
+	if (!netdev->phydev)
 		return -ENODEV;
 
-	return phy_start_aneg(pd->phy_dev);
+	return phy_start_aneg(netdev->phydev);
 }
 
 static int smsc9420_ethtool_getregslen(struct net_device *dev)
@@ -302,7 +273,7 @@ smsc9420_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs,
 			 void *buf)
 {
 	struct smsc9420_pdata *pd = netdev_priv(dev);
-	struct phy_device *phy_dev = pd->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	unsigned int i, j = 0;
 	u32 *data = buf;
 
@@ -443,8 +414,6 @@ static int smsc9420_ethtool_set_eeprom(struct net_device *dev,
 }
 
 static const struct ethtool_ops smsc9420_ethtool_ops = {
-	.get_settings = smsc9420_ethtool_get_settings,
-	.set_settings = smsc9420_ethtool_set_settings,
 	.get_drvinfo = smsc9420_ethtool_get_drvinfo,
 	.get_msglevel = smsc9420_ethtool_get_msglevel,
 	.set_msglevel = smsc9420_ethtool_set_msglevel,
@@ -456,6 +425,8 @@ static const struct ethtool_ops smsc9420_ethtool_ops = {
 	.get_regs_len = smsc9420_ethtool_getregslen,
 	.get_regs = smsc9420_ethtool_getregs,
 	.get_ts_info = ethtool_op_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /* Sets the device MAC address to dev_addr */
@@ -736,7 +707,7 @@ static int smsc9420_stop(struct net_device *dev)
 	ulong flags;
 
 	BUG_ON(!pd);
-	BUG_ON(!pd->phy_dev);
+	BUG_ON(!dev->phydev);
 
 	/* disable master interrupt */
 	spin_lock_irqsave(&pd->int_lock, flags);
@@ -757,10 +728,9 @@ static int smsc9420_stop(struct net_device *dev)
 
 	smsc9420_dmac_soft_reset(pd);
 
-	phy_stop(pd->phy_dev);
+	phy_stop(dev->phydev);
 
-	phy_disconnect(pd->phy_dev);
-	pd->phy_dev = NULL;
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(pd->mii_bus);
 	mdiobus_free(pd->mii_bus);
 
@@ -1093,7 +1063,8 @@ static void smsc9420_set_multicast_list(struct net_device *dev)
 
 static void smsc9420_phy_update_flowcontrol(struct smsc9420_pdata *pd)
 {
-	struct phy_device *phy_dev = pd->phy_dev;
+	struct net_device *dev = pd->dev;
+	struct phy_device *phy_dev = dev->phydev;
 	u32 flow;
 
 	if (phy_dev->duplex == DUPLEX_FULL) {
@@ -1122,7 +1093,7 @@ static void smsc9420_phy_update_flowcontrol(struct smsc9420_pdata *pd)
 static void smsc9420_phy_adjust_link(struct net_device *dev)
 {
 	struct smsc9420_pdata *pd = netdev_priv(dev);
-	struct phy_device *phy_dev = pd->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	int carrier;
 
 	if (phy_dev->duplex != pd->last_duplex) {
@@ -1155,7 +1126,7 @@ static int smsc9420_mii_probe(struct net_device *dev)
 	struct smsc9420_pdata *pd = netdev_priv(dev);
 	struct phy_device *phydev = NULL;
 
-	BUG_ON(pd->phy_dev);
+	BUG_ON(dev->phydev);
 
 	/* Device only supports internal PHY at address 1 */
 	phydev = mdiobus_get_phy(pd->mii_bus, 1);
@@ -1179,7 +1150,6 @@ static int smsc9420_mii_probe(struct net_device *dev)
 
 	phy_attached_info(phydev);
 
-	pd->phy_dev = phydev;
 	pd->last_duplex = -1;
 	pd->last_carrier = -1;
 
@@ -1440,7 +1410,7 @@ static int smsc9420_open(struct net_device *dev)
 	}
 
 	/* Bring the PHY up */
-	phy_start(pd->phy_dev);
+	phy_start(dev->phydev);
 
 	napi_enable(&pd->napi);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index cec147d1d34f..8f06a6621ab1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -40,7 +40,7 @@ config DWMAC_GENERIC
 config DWMAC_IPQ806X
 	tristate "QCA IPQ806x DWMAC support"
 	default ARCH_QCOM
-	depends on OF
+	depends on OF && (ARCH_QCOM || COMPILE_TEST)
 	select MFD_SYSCON
 	help
 	  Support for QCA IPQ806X DWMAC Ethernet.
@@ -53,7 +53,7 @@ config DWMAC_IPQ806X
 config DWMAC_LPC18XX
 	tristate "NXP LPC18xx/43xx DWMAC support"
 	default ARCH_LPC18XX
-	depends on OF
+	depends on OF && (ARCH_LPC18XX || COMPILE_TEST)
 	select MFD_SYSCON
 	---help---
 	  Support for NXP LPC18xx/43xx DWMAC Ethernet.
@@ -61,7 +61,7 @@ config DWMAC_LPC18XX
 config DWMAC_MESON
 	tristate "Amlogic Meson dwmac support"
 	default ARCH_MESON
-	depends on OF
+	depends on OF && (ARCH_MESON || COMPILE_TEST)
 	help
 	  Support for Ethernet controller on Amlogic Meson SoCs.
 
@@ -72,7 +72,7 @@ config DWMAC_MESON
 config DWMAC_ROCKCHIP
 	tristate "Rockchip dwmac support"
 	default ARCH_ROCKCHIP
-	depends on OF
+	depends on OF && (ARCH_ROCKCHIP || COMPILE_TEST)
 	select MFD_SYSCON
 	help
 	  Support for Ethernet controller on Rockchip RK3288 SoC.
@@ -83,7 +83,7 @@ config DWMAC_ROCKCHIP
 config DWMAC_SOCFPGA
 	tristate "SOCFPGA dwmac support"
 	default ARCH_SOCFPGA
-	depends on OF
+	depends on OF && (ARCH_SOCFPGA || COMPILE_TEST)
 	select MFD_SYSCON
 	help
 	  Support for ethernet controller on Altera SOCFPGA
@@ -95,7 +95,7 @@ config DWMAC_SOCFPGA
 config DWMAC_STI
 	tristate "STi GMAC support"
 	default ARCH_STI
-	depends on OF
+	depends on OF && (ARCH_STI || COMPILE_TEST)
 	select MFD_SYSCON
 	---help---
 	  Support for ethernet controller on STi SOCs.
@@ -107,7 +107,7 @@ config DWMAC_STI
 config DWMAC_SUNXI
 	tristate "Allwinner GMAC support"
 	default ARCH_SUNXI
-	depends on OF
+	depends on OF && (ARCH_SUNXI || COMPILE_TEST)
 	---help---
 	  Support for Allwinner A20/A31 GMAC ethernet controllers.
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 0fb362d5a722..44b630cd1755 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -11,11 +11,12 @@ obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
 obj-$(CONFIG_DWMAC_MESON)	+= dwmac-meson.o
 obj-$(CONFIG_DWMAC_ROCKCHIP)	+= dwmac-rk.o
-obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-socfpga.o
+obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-altr-socfpga.o
 obj-$(CONFIG_DWMAC_STI)		+= dwmac-sti.o
 obj-$(CONFIG_DWMAC_SUNXI)	+= dwmac-sunxi.o
 obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
 stmmac-platform-objs:= stmmac_platform.o
+dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
 
 obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o
 stmmac-pci-objs:= stmmac_pci.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
new file mode 100644
index 000000000000..2920e2ee3864
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
@@ -0,0 +1,274 @@
+/* Copyright Altera Corporation (C) 2016. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Tien Hock Loh <thloh@altera.com>
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/stmmac.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+#include "altr_tse_pcs.h"
+
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII	0
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII		BIT(1)
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII		BIT(2)
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH		2
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK			GENMASK(1, 0)
+
+#define TSE_PCS_CONTROL_AN_EN_MASK			BIT(12)
+#define TSE_PCS_CONTROL_REG				0x00
+#define TSE_PCS_CONTROL_RESTART_AN_MASK			BIT(9)
+#define TSE_PCS_IF_MODE_REG				0x28
+#define TSE_PCS_LINK_TIMER_0_REG			0x24
+#define TSE_PCS_LINK_TIMER_1_REG			0x26
+#define TSE_PCS_SIZE					0x40
+#define TSE_PCS_STATUS_AN_COMPLETED_MASK		BIT(5)
+#define TSE_PCS_STATUS_LINK_MASK			0x0004
+#define TSE_PCS_STATUS_REG				0x02
+#define TSE_PCS_SGMII_SPEED_1000			BIT(3)
+#define TSE_PCS_SGMII_SPEED_100				BIT(2)
+#define TSE_PCS_SGMII_SPEED_10				0x0
+#define TSE_PCS_SW_RST_MASK				0x8000
+#define TSE_PCS_PARTNER_ABILITY_REG			0x0A
+#define TSE_PCS_PARTNER_DUPLEX_FULL			0x1000
+#define TSE_PCS_PARTNER_DUPLEX_HALF			0x0000
+#define TSE_PCS_PARTNER_DUPLEX_MASK			0x1000
+#define TSE_PCS_PARTNER_SPEED_MASK			GENMASK(11, 10)
+#define TSE_PCS_PARTNER_SPEED_1000			BIT(11)
+#define TSE_PCS_PARTNER_SPEED_100			BIT(10)
+#define TSE_PCS_PARTNER_SPEED_10			0x0000
+#define TSE_PCS_PARTNER_SPEED_1000			BIT(11)
+#define TSE_PCS_PARTNER_SPEED_100			BIT(10)
+#define TSE_PCS_PARTNER_SPEED_10			0x0000
+#define TSE_PCS_SGMII_SPEED_MASK			GENMASK(3, 2)
+#define TSE_PCS_SGMII_LINK_TIMER_0			0x0D40
+#define TSE_PCS_SGMII_LINK_TIMER_1			0x0003
+#define TSE_PCS_SW_RESET_TIMEOUT			100
+#define TSE_PCS_USE_SGMII_AN_MASK			BIT(2)
+#define TSE_PCS_USE_SGMII_ENA				BIT(1)
+
+#define SGMII_ADAPTER_CTRL_REG				0x00
+#define SGMII_ADAPTER_DISABLE				0x0001
+#define SGMII_ADAPTER_ENABLE				0x0000
+
+#define AUTONEGO_LINK_TIMER				20
+
+static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs)
+{
+	int counter = 0;
+	u16 val;
+
+	val = readw(base + TSE_PCS_CONTROL_REG);
+	val |= TSE_PCS_SW_RST_MASK;
+	writew(val, base + TSE_PCS_CONTROL_REG);
+
+	while (counter < TSE_PCS_SW_RESET_TIMEOUT) {
+		val = readw(base + TSE_PCS_CONTROL_REG);
+		val &= TSE_PCS_SW_RST_MASK;
+		if (val == 0)
+			break;
+		counter++;
+		udelay(1);
+	}
+	if (counter >= TSE_PCS_SW_RESET_TIMEOUT) {
+		dev_err(pcs->dev, "PCS could not get out of sw reset\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs)
+{
+	int ret = 0;
+
+	writew(TSE_PCS_USE_SGMII_ENA, base + TSE_PCS_IF_MODE_REG);
+
+	writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG);
+	writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG);
+
+	ret = tse_pcs_reset(base, pcs);
+	if (ret == 0)
+		writew(SGMII_ADAPTER_ENABLE,
+		       pcs->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+	return ret;
+}
+
+static void pcs_link_timer_callback(unsigned long data)
+{
+	u16 val = 0;
+	struct tse_pcs *pcs = (struct tse_pcs *)data;
+	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+
+	val = readw(tse_pcs_base + TSE_PCS_STATUS_REG);
+	val &= TSE_PCS_STATUS_LINK_MASK;
+
+	if (val != 0) {
+		dev_dbg(pcs->dev, "Adapter: Link is established\n");
+		writew(SGMII_ADAPTER_ENABLE,
+		       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+	} else {
+		mod_timer(&pcs->aneg_link_timer, jiffies +
+			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+	}
+}
+
+static void auto_nego_timer_callback(unsigned long data)
+{
+	u16 val = 0;
+	u16 speed = 0;
+	u16 duplex = 0;
+	struct tse_pcs *pcs = (struct tse_pcs *)data;
+	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+
+	val = readw(tse_pcs_base + TSE_PCS_STATUS_REG);
+	val &= TSE_PCS_STATUS_AN_COMPLETED_MASK;
+
+	if (val != 0) {
+		dev_dbg(pcs->dev, "Adapter: Auto Negotiation is completed\n");
+		val = readw(tse_pcs_base + TSE_PCS_PARTNER_ABILITY_REG);
+		speed = val & TSE_PCS_PARTNER_SPEED_MASK;
+		duplex = val & TSE_PCS_PARTNER_DUPLEX_MASK;
+
+		if (speed == TSE_PCS_PARTNER_SPEED_10 &&
+		    duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+			dev_dbg(pcs->dev,
+				"Adapter: Link Partner is Up - 10/Full\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_100 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+			dev_dbg(pcs->dev,
+				"Adapter: Link Partner is Up - 100/Full\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_1000 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+			dev_dbg(pcs->dev,
+				"Adapter: Link Partner is Up - 1000/Full\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_10 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+			dev_err(pcs->dev,
+				"Adapter does not support Half Duplex\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_100 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+			dev_err(pcs->dev,
+				"Adapter does not support Half Duplex\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_1000 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+			dev_err(pcs->dev,
+				"Adapter does not support Half Duplex\n");
+		else
+			dev_err(pcs->dev,
+				"Adapter: Invalid Partner Speed and Duplex\n");
+
+		if (duplex == TSE_PCS_PARTNER_DUPLEX_FULL &&
+		    (speed == TSE_PCS_PARTNER_SPEED_10 ||
+		     speed == TSE_PCS_PARTNER_SPEED_100 ||
+		     speed == TSE_PCS_PARTNER_SPEED_1000))
+			writew(SGMII_ADAPTER_ENABLE,
+			       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+	} else {
+		val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+		val |= TSE_PCS_CONTROL_RESTART_AN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+		tse_pcs_reset(tse_pcs_base, pcs);
+		mod_timer(&pcs->aneg_link_timer, jiffies +
+			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+	}
+}
+
+static void aneg_link_timer_callback(unsigned long data)
+{
+	struct tse_pcs *pcs = (struct tse_pcs *)data;
+
+	if (pcs->autoneg == AUTONEG_ENABLE)
+		auto_nego_timer_callback(data);
+	else if (pcs->autoneg == AUTONEG_DISABLE)
+		pcs_link_timer_callback(data);
+}
+
+void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
+			   unsigned int speed)
+{
+	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+	u32 val;
+
+	writew(SGMII_ADAPTER_ENABLE,
+	       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+	pcs->autoneg = phy_dev->autoneg;
+
+	if (phy_dev->autoneg == AUTONEG_ENABLE) {
+		val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+		val |= TSE_PCS_CONTROL_AN_EN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+		val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+		val |= TSE_PCS_USE_SGMII_AN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+		val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+		val |= TSE_PCS_CONTROL_RESTART_AN_MASK;
+
+		tse_pcs_reset(tse_pcs_base, pcs);
+
+		setup_timer(&pcs->aneg_link_timer,
+			    aneg_link_timer_callback, (unsigned long)pcs);
+		mod_timer(&pcs->aneg_link_timer, jiffies +
+			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+	} else if (phy_dev->autoneg == AUTONEG_DISABLE) {
+		val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+		val &= ~TSE_PCS_CONTROL_AN_EN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+		val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+		val &= ~TSE_PCS_USE_SGMII_AN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+		val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+		val &= ~TSE_PCS_SGMII_SPEED_MASK;
+
+		switch (speed) {
+		case 1000:
+			val |= TSE_PCS_SGMII_SPEED_1000;
+			break;
+		case 100:
+			val |= TSE_PCS_SGMII_SPEED_100;
+			break;
+		case 10:
+			val |= TSE_PCS_SGMII_SPEED_10;
+			break;
+		default:
+			return;
+		}
+		writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+		tse_pcs_reset(tse_pcs_base, pcs);
+
+		setup_timer(&pcs->aneg_link_timer,
+			    aneg_link_timer_callback, (unsigned long)pcs);
+		mod_timer(&pcs->aneg_link_timer, jiffies +
+			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+	}
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
new file mode 100644
index 000000000000..2f5882450b06
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
@@ -0,0 +1,36 @@
+/* Copyright Altera Corporation (C) 2016. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Tien Hock Loh <thloh@altera.com>
+ */
+
+#ifndef __TSE_PCS_H__
+#define __TSE_PCS_H__
+
+#include <linux/phy.h>
+#include <linux/timer.h>
+
+struct tse_pcs {
+	struct device *dev;
+	void __iomem *tse_pcs_base;
+	void __iomem *sgmii_adapter_base;
+	struct timer_list aneg_link_timer;
+	int autoneg;
+};
+
+int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs);
+void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
+			   unsigned int speed);
+
+#endif /* __TSE_PCS_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index fc60368df2e7..2533b91f1421 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -232,6 +232,11 @@ struct stmmac_extra_stats {
 #define DMA_HW_FEAT_ACTPHYIF	0x70000000	/* Active/selected PHY iface */
 #define DEFAULT_DMA_PBL		8
 
+/* PCS status and mask defines */
+#define	PCS_ANE_IRQ		BIT(2)	/* PCS Auto-Negotiation */
+#define	PCS_LINK_IRQ		BIT(1)	/* PCS Link */
+#define	PCS_RGSMIIIS_IRQ	BIT(0)	/* RGMII or SMII Interrupt */
+
 /* Max/Min RI Watchdog Timer count value */
 #define MAX_DMA_RIWT		0xff
 #define MIN_DMA_RIWT		0x20
@@ -272,9 +277,6 @@ enum dma_irq_status {
 #define	CORE_IRQ_RX_PATH_IN_LPI_MODE	(1 << 2)
 #define	CORE_IRQ_RX_PATH_EXIT_LPI_MODE	(1 << 3)
 
-#define	CORE_PCS_ANE_COMPLETE		(1 << 5)
-#define	CORE_PCS_LINK_STATUS		(1 << 6)
-#define	CORE_RGMII_IRQ			(1 << 7)
 #define CORE_IRQ_MTL_RX_OVERFLOW	BIT(8)
 
 /* Physical Coding Sublayer */
@@ -469,9 +471,12 @@ struct stmmac_ops {
 	void (*reset_eee_mode)(struct mac_device_info *hw);
 	void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
 	void (*set_eee_pls)(struct mac_device_info *hw, int link);
-	void (*ctrl_ane)(struct mac_device_info *hw, bool restart);
-	void (*get_adv)(struct mac_device_info *hw, struct rgmii_adv *adv);
 	void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x);
+	/* PCS calls */
+	void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral,
+			     bool loopback);
+	void (*pcs_rane)(void __iomem *ioaddr, bool restart);
+	void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv);
 };
 
 /* PTP and HW Timer helpers */
@@ -524,6 +529,9 @@ struct mac_device_info {
 	int unicast_filter_entries;
 	int mcast_bits_log2;
 	unsigned int rx_csum;
+	unsigned int pcs;
+	unsigned int pmt;
+	unsigned int ps;
 };
 
 struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
@@ -546,6 +554,7 @@ void stmmac_dwmac4_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
 void stmmac_dwmac4_set_mac(void __iomem *ioaddr, bool enable);
 
 void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr);
+
 extern const struct stmmac_mode_ops ring_mode_ops;
 extern const struct stmmac_mode_ops chain_mode_ops;
 extern const struct stmmac_desc_ops dwmac4_desc_ops;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 0cd3ecff768b..92105916ef40 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -46,6 +46,7 @@ struct rk_priv_data {
 	struct platform_device *pdev;
 	int phy_iface;
 	struct regulator *regulator;
+	bool suspended;
 	const struct rk_gmac_ops *ops;
 
 	bool clk_enabled;
@@ -72,6 +73,122 @@ struct rk_priv_data {
 #define GRF_BIT(nr)	(BIT(nr) | BIT(nr+16))
 #define GRF_CLR_BIT(nr)	(BIT(nr+16))
 
+#define RK3228_GRF_MAC_CON0	0x0900
+#define RK3228_GRF_MAC_CON1	0x0904
+
+/* RK3228_GRF_MAC_CON0 */
+#define RK3228_GMAC_CLK_RX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 7)
+#define RK3228_GMAC_CLK_TX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 0)
+
+/* RK3228_GRF_MAC_CON1 */
+#define RK3228_GMAC_PHY_INTF_SEL_RGMII	\
+		(GRF_BIT(4) | GRF_CLR_BIT(5) | GRF_CLR_BIT(6))
+#define RK3228_GMAC_PHY_INTF_SEL_RMII	\
+		(GRF_CLR_BIT(4) | GRF_CLR_BIT(5) | GRF_BIT(6))
+#define RK3228_GMAC_FLOW_CTRL		GRF_BIT(3)
+#define RK3228_GMAC_FLOW_CTRL_CLR	GRF_CLR_BIT(3)
+#define RK3228_GMAC_SPEED_10M		GRF_CLR_BIT(2)
+#define RK3228_GMAC_SPEED_100M		GRF_BIT(2)
+#define RK3228_GMAC_RMII_CLK_25M	GRF_BIT(7)
+#define RK3228_GMAC_RMII_CLK_2_5M	GRF_CLR_BIT(7)
+#define RK3228_GMAC_CLK_125M		(GRF_CLR_BIT(8) | GRF_CLR_BIT(9))
+#define RK3228_GMAC_CLK_25M		(GRF_BIT(8) | GRF_BIT(9))
+#define RK3228_GMAC_CLK_2_5M		(GRF_CLR_BIT(8) | GRF_BIT(9))
+#define RK3228_GMAC_RMII_MODE		GRF_BIT(10)
+#define RK3228_GMAC_RMII_MODE_CLR	GRF_CLR_BIT(10)
+#define RK3228_GMAC_TXCLK_DLY_ENABLE	GRF_BIT(0)
+#define RK3228_GMAC_TXCLK_DLY_DISABLE	GRF_CLR_BIT(0)
+#define RK3228_GMAC_RXCLK_DLY_ENABLE	GRF_BIT(1)
+#define RK3228_GMAC_RXCLK_DLY_DISABLE	GRF_CLR_BIT(1)
+
+static void rk3228_set_to_rgmii(struct rk_priv_data *bsp_priv,
+				int tx_delay, int rx_delay)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "Missing rockchip,grf property\n");
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
+		     RK3228_GMAC_PHY_INTF_SEL_RGMII |
+		     RK3228_GMAC_RMII_MODE_CLR |
+		     RK3228_GMAC_RXCLK_DLY_ENABLE |
+		     RK3228_GMAC_TXCLK_DLY_ENABLE);
+
+	regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON0,
+		     RK3228_GMAC_CLK_RX_DL_CFG(rx_delay) |
+		     RK3228_GMAC_CLK_TX_DL_CFG(tx_delay));
+}
+
+static void rk3228_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "Missing rockchip,grf property\n");
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
+		     RK3228_GMAC_PHY_INTF_SEL_RMII |
+		     RK3228_GMAC_RMII_MODE);
+
+	/* set MAC to RMII mode */
+	regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1, GRF_BIT(11));
+}
+
+static void rk3228_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "Missing rockchip,grf property\n");
+		return;
+	}
+
+	if (speed == 10)
+		regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
+			     RK3228_GMAC_CLK_2_5M);
+	else if (speed == 100)
+		regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
+			     RK3228_GMAC_CLK_25M);
+	else if (speed == 1000)
+		regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
+			     RK3228_GMAC_CLK_125M);
+	else
+		dev_err(dev, "unknown speed value for RGMII! speed=%d", speed);
+}
+
+static void rk3228_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "Missing rockchip,grf property\n");
+		return;
+	}
+
+	if (speed == 10)
+		regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
+			     RK3228_GMAC_RMII_CLK_2_5M |
+			     RK3228_GMAC_SPEED_10M);
+	else if (speed == 100)
+		regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
+			     RK3228_GMAC_RMII_CLK_25M |
+			     RK3228_GMAC_SPEED_100M);
+	else
+		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+}
+
+static const struct rk_gmac_ops rk3228_ops = {
+	.set_to_rgmii = rk3228_set_to_rgmii,
+	.set_to_rmii = rk3228_set_to_rmii,
+	.set_rgmii_speed = rk3228_set_rgmii_speed,
+	.set_rmii_speed = rk3228_set_rmii_speed,
+};
+
 #define RK3288_GRF_SOC_CON1	0x0248
 #define RK3288_GRF_SOC_CON3	0x0250
 
@@ -529,9 +646,8 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
 	return bsp_priv;
 }
 
-static int rk_gmac_init(struct platform_device *pdev, void *priv)
+static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
 {
-	struct rk_priv_data *bsp_priv = priv;
 	int ret;
 
 	ret = phy_power_on(bsp_priv, true);
@@ -545,14 +661,50 @@ static int rk_gmac_init(struct platform_device *pdev, void *priv)
 	return 0;
 }
 
-static void rk_gmac_exit(struct platform_device *pdev, void *priv)
+static void rk_gmac_powerdown(struct rk_priv_data *gmac)
 {
-	struct rk_priv_data *gmac = priv;
-
 	phy_power_on(gmac, false);
 	gmac_clk_enable(gmac, false);
 }
 
+static int rk_gmac_init(struct platform_device *pdev, void *priv)
+{
+	struct rk_priv_data *bsp_priv = priv;
+
+	return rk_gmac_powerup(bsp_priv);
+}
+
+static void rk_gmac_exit(struct platform_device *pdev, void *priv)
+{
+	struct rk_priv_data *bsp_priv = priv;
+
+	rk_gmac_powerdown(bsp_priv);
+}
+
+static void rk_gmac_suspend(struct platform_device *pdev, void *priv)
+{
+	struct rk_priv_data *bsp_priv = priv;
+
+	/* Keep the PHY up if we use Wake-on-Lan. */
+	if (device_may_wakeup(&pdev->dev))
+		return;
+
+	rk_gmac_powerdown(bsp_priv);
+	bsp_priv->suspended = true;
+}
+
+static void rk_gmac_resume(struct platform_device *pdev, void *priv)
+{
+	struct rk_priv_data *bsp_priv = priv;
+
+	/* The PHY was up for Wake-on-Lan. */
+	if (!bsp_priv->suspended)
+		return;
+
+	rk_gmac_powerup(bsp_priv);
+	bsp_priv->suspended = false;
+}
+
 static void rk_fix_speed(void *priv, unsigned int speed)
 {
 	struct rk_priv_data *bsp_priv = priv;
@@ -591,6 +743,8 @@ static int rk_gmac_probe(struct platform_device *pdev)
 	plat_dat->init = rk_gmac_init;
 	plat_dat->exit = rk_gmac_exit;
 	plat_dat->fix_mac_speed = rk_fix_speed;
+	plat_dat->suspend = rk_gmac_suspend;
+	plat_dat->resume = rk_gmac_resume;
 
 	plat_dat->bsp_priv = rk_gmac_setup(pdev, data);
 	if (IS_ERR(plat_dat->bsp_priv))
@@ -604,6 +758,7 @@ static int rk_gmac_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id rk_gmac_dwmac_match[] = {
+	{ .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
 	{ .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
 	{ .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops },
 	{ }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index f13499fa1f58..edd20c3b2b3d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -27,6 +27,11 @@
 #include "stmmac.h"
 #include "stmmac_platform.h"
 
+#include "altr_tse_pcs.h"
+
+#define SGMII_ADAPTER_CTRL_REG                          0x00
+#define SGMII_ADAPTER_DISABLE                           0x0001
+
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2
@@ -52,35 +57,46 @@ struct socfpga_dwmac {
 	struct reset_control *stmmac_rst;
 	void __iomem *splitter_base;
 	bool f2h_ptp_ref_clk;
+	struct tse_pcs pcs;
 };
 
 static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
 {
 	struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv;
 	void __iomem *splitter_base = dwmac->splitter_base;
+	void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base;
+	void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base;
+	struct device *dev = dwmac->dev;
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct phy_device *phy_dev = ndev->phydev;
 	u32 val;
 
-	if (!splitter_base)
-		return;
-
-	val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG);
-	val &= ~EMAC_SPLITTER_CTRL_SPEED_MASK;
-
-	switch (speed) {
-	case 1000:
-		val |= EMAC_SPLITTER_CTRL_SPEED_1000;
-		break;
-	case 100:
-		val |= EMAC_SPLITTER_CTRL_SPEED_100;
-		break;
-	case 10:
-		val |= EMAC_SPLITTER_CTRL_SPEED_10;
-		break;
-	default:
-		return;
+	if ((tse_pcs_base) && (sgmii_adapter_base))
+		writew(SGMII_ADAPTER_DISABLE,
+		       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+	if (splitter_base) {
+		val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG);
+		val &= ~EMAC_SPLITTER_CTRL_SPEED_MASK;
+
+		switch (speed) {
+		case 1000:
+			val |= EMAC_SPLITTER_CTRL_SPEED_1000;
+			break;
+		case 100:
+			val |= EMAC_SPLITTER_CTRL_SPEED_100;
+			break;
+		case 10:
+			val |= EMAC_SPLITTER_CTRL_SPEED_10;
+			break;
+		default:
+			return;
+		}
+		writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
 	}
 
-	writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
+	if (tse_pcs_base && sgmii_adapter_base)
+		tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed);
 }
 
 static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev)
@@ -88,9 +104,12 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
 	struct device_node *np = dev->of_node;
 	struct regmap *sys_mgr_base_addr;
 	u32 reg_offset, reg_shift;
-	int ret;
-	struct device_node *np_splitter;
+	int ret, index;
+	struct device_node *np_splitter = NULL;
+	struct device_node *np_sgmii_adapter = NULL;
 	struct resource res_splitter;
+	struct resource res_tse_pcs;
+	struct resource res_sgmii_adapter;
 
 	dwmac->interface = of_get_phy_mode(np);
 
@@ -128,6 +147,66 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
 		}
 	}
 
+	np_sgmii_adapter = of_parse_phandle(np,
+					    "altr,gmii-to-sgmii-converter", 0);
+	if (np_sgmii_adapter) {
+		index = of_property_match_string(np_sgmii_adapter, "reg-names",
+						 "hps_emac_interface_splitter_avalon_slave");
+
+		if (index >= 0) {
+			if (of_address_to_resource(np_sgmii_adapter, index,
+						   &res_splitter)) {
+				dev_err(dev,
+					"%s: ERROR: missing emac splitter address\n",
+					__func__);
+				return -EINVAL;
+			}
+
+			dwmac->splitter_base =
+			    devm_ioremap_resource(dev, &res_splitter);
+
+			if (IS_ERR(dwmac->splitter_base))
+				return PTR_ERR(dwmac->splitter_base);
+		}
+
+		index = of_property_match_string(np_sgmii_adapter, "reg-names",
+						 "gmii_to_sgmii_adapter_avalon_slave");
+
+		if (index >= 0) {
+			if (of_address_to_resource(np_sgmii_adapter, index,
+						   &res_sgmii_adapter)) {
+				dev_err(dev,
+					"%s: ERROR: failed mapping adapter\n",
+					__func__);
+				return -EINVAL;
+			}
+
+			dwmac->pcs.sgmii_adapter_base =
+			    devm_ioremap_resource(dev, &res_sgmii_adapter);
+
+			if (IS_ERR(dwmac->pcs.sgmii_adapter_base))
+				return PTR_ERR(dwmac->pcs.sgmii_adapter_base);
+		}
+
+		index = of_property_match_string(np_sgmii_adapter, "reg-names",
+						 "eth_tse_control_port");
+
+		if (index >= 0) {
+			if (of_address_to_resource(np_sgmii_adapter, index,
+						   &res_tse_pcs)) {
+				dev_err(dev,
+					"%s: ERROR: failed mapping tse control port\n",
+					__func__);
+				return -EINVAL;
+			}
+
+			dwmac->pcs.tse_pcs_base =
+			    devm_ioremap_resource(dev, &res_tse_pcs);
+
+			if (IS_ERR(dwmac->pcs.tse_pcs_base))
+				return PTR_ERR(dwmac->pcs.tse_pcs_base);
+		}
+	}
 	dwmac->reg_offset = reg_offset;
 	dwmac->reg_shift = reg_shift;
 	dwmac->sys_mgr_base_addr = sys_mgr_base_addr;
@@ -151,6 +230,7 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
 		break;
 	case PHY_INTERFACE_MODE_MII:
 	case PHY_INTERFACE_MODE_GMII:
+	case PHY_INTERFACE_MODE_SGMII:
 		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
 		break;
 	default:
@@ -191,6 +271,12 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
 	 */
 	if (dwmac->stmmac_rst)
 		reset_control_deassert(dwmac->stmmac_rst);
+	if (phymode == PHY_INTERFACE_MODE_SGMII) {
+		if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
+			dev_err(dwmac->dev, "Unable to initialize TSE PCS");
+			return -EINVAL;
+		}
+	}
 
 	return 0;
 }
@@ -225,6 +311,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 	plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+
 	if (!ret) {
 		struct net_device *ndev = platform_get_drvdata(pdev);
 		struct stmmac_priv *stpriv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index b0593a4268ee..ff3e5ab39bd0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -38,19 +38,26 @@
 #define GMAC_WAKEUP_FILTER	0x00000028	/* Wake-up Frame Filter */
 
 #define GMAC_INT_STATUS		0x00000038	/* interrupt status register */
-enum dwmac1000_irq_status {
-	lpiis_irq = 0x400,
-	time_stamp_irq = 0x0200,
-	mmc_rx_csum_offload_irq = 0x0080,
-	mmc_tx_irq = 0x0040,
-	mmc_rx_irq = 0x0020,
-	mmc_irq = 0x0010,
-	pmt_irq = 0x0008,
-	pcs_ane_irq = 0x0004,
-	pcs_link_irq = 0x0002,
-	rgmii_irq = 0x0001,
-};
-#define GMAC_INT_MASK		0x0000003c	/* interrupt mask register */
+#define GMAC_INT_STATUS_PMT	BIT(3)
+#define GMAC_INT_STATUS_MMCIS	BIT(4)
+#define GMAC_INT_STATUS_MMCRIS	BIT(5)
+#define GMAC_INT_STATUS_MMCTIS	BIT(6)
+#define GMAC_INT_STATUS_MMCCSUM	BIT(7)
+#define GMAC_INT_STATUS_TSTAMP	BIT(9)
+#define GMAC_INT_STATUS_LPIIS	BIT(10)
+
+/* interrupt mask register */
+#define	GMAC_INT_MASK		0x0000003c
+#define	GMAC_INT_DISABLE_RGMII		BIT(0)
+#define	GMAC_INT_DISABLE_PCSLINK	BIT(1)
+#define	GMAC_INT_DISABLE_PCSAN		BIT(2)
+#define	GMAC_INT_DISABLE_PMT		BIT(3)
+#define	GMAC_INT_DISABLE_TIMESTAMP	BIT(9)
+#define	GMAC_INT_DISABLE_PCS	(GMAC_INT_DISABLE_RGMII | \
+				 GMAC_INT_DISABLE_PCSLINK | \
+				 GMAC_INT_DISABLE_PCSAN)
+#define	GMAC_INT_DEFAULT_MASK	(GMAC_INT_DISABLE_TIMESTAMP | \
+				 GMAC_INT_DISABLE_PCS)
 
 /* PMT Control and Status */
 #define GMAC_PMT		0x0000002c
@@ -90,42 +97,23 @@ enum power_event {
 				(reg * 8))
 #define GMAC_MAX_PERFECT_ADDRESSES	1
 
-/* PCS registers (AN/TBI/SGMII/RGMII) offset */
-#define GMAC_AN_CTRL	0x000000c0	/* AN control */
-#define GMAC_AN_STATUS	0x000000c4	/* AN status */
-#define GMAC_ANE_ADV	0x000000c8	/* Auto-Neg. Advertisement */
-#define GMAC_ANE_LPA	0x000000cc	/* Auto-Neg. link partener ability */
-#define GMAC_ANE_EXP	0x000000d0	/* ANE expansion */
-#define GMAC_TBI	0x000000d4	/* TBI extend status */
-#define GMAC_S_R_GMII	0x000000d8	/* SGMII RGMII status */
-
-/* AN Configuration defines */
-#define GMAC_AN_CTRL_RAN	0x00000200	/* Restart Auto-Negotiation */
-#define GMAC_AN_CTRL_ANE	0x00001000	/* Auto-Negotiation Enable */
-#define GMAC_AN_CTRL_ELE	0x00004000	/* External Loopback Enable */
-#define GMAC_AN_CTRL_ECD	0x00010000	/* Enable Comma Detect */
-#define GMAC_AN_CTRL_LR		0x00020000	/* Lock to Reference */
-#define GMAC_AN_CTRL_SGMRAL	0x00040000	/* SGMII RAL Control */
-
-/* AN Status defines */
-#define GMAC_AN_STATUS_LS	0x00000004	/* Link Status 0:down 1:up */
-#define GMAC_AN_STATUS_ANA	0x00000008	/* Auto-Negotiation Ability */
-#define GMAC_AN_STATUS_ANC	0x00000020	/* Auto-Negotiation Complete */
-#define GMAC_AN_STATUS_ES	0x00000100	/* Extended Status */
-
-/* Register 54 (SGMII/RGMII status register) */
-#define GMAC_S_R_GMII_LINK		0x8
-#define GMAC_S_R_GMII_SPEED		0x5
-#define GMAC_S_R_GMII_SPEED_SHIFT	0x1
-#define GMAC_S_R_GMII_MODE		0x1
-#define GMAC_S_R_GMII_SPEED_125		2
-#define GMAC_S_R_GMII_SPEED_25		1
-
-/* Common ADV and LPA defines */
-#define GMAC_ANE_FD		(1 << 5)
-#define GMAC_ANE_HD		(1 << 6)
-#define GMAC_ANE_PSE		(3 << 7)
-#define GMAC_ANE_PSE_SHIFT	7
+#define GMAC_PCS_BASE		0x000000c0	/* PCS register base */
+#define GMAC_RGSMIIIS		0x000000d8	/* RGMII/SMII status */
+
+/* SGMII/RGMII status register */
+#define GMAC_RGSMIIIS_LNKMODE		BIT(0)
+#define GMAC_RGSMIIIS_SPEED		GENMASK(2, 1)
+#define GMAC_RGSMIIIS_SPEED_SHIFT	1
+#define GMAC_RGSMIIIS_LNKSTS		BIT(3)
+#define GMAC_RGSMIIIS_JABTO		BIT(4)
+#define GMAC_RGSMIIIS_FALSECARDET	BIT(5)
+#define GMAC_RGSMIIIS_SMIDRXS		BIT(16)
+/* LNKMOD */
+#define GMAC_RGSMIIIS_LNKMOD_MASK	0x1
+/* LNKSPEED */
+#define GMAC_RGSMIIIS_SPEED_125		0x2
+#define GMAC_RGSMIIIS_SPEED_25		0x1
+#define GMAC_RGSMIIIS_SPEED_2_5		0x0
 
 /* GMAC Configuration defines */
 #define GMAC_CONTROL_2K 0x08000000	/* IEEE 802.3as 2K packets */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index fb1eb578e34e..cbefe9e2207c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -30,22 +30,48 @@
 #include <linux/slab.h>
 #include <linux/ethtool.h>
 #include <asm/io.h>
+#include "stmmac_pcs.h"
 #include "dwmac1000.h"
 
 static void dwmac1000_core_init(struct mac_device_info *hw, int mtu)
 {
 	void __iomem *ioaddr = hw->pcsr;
 	u32 value = readl(ioaddr + GMAC_CONTROL);
+
+	/* Configure GMAC core */
 	value |= GMAC_CORE_INIT;
+
 	if (mtu > 1500)
 		value |= GMAC_CONTROL_2K;
 	if (mtu > 2000)
 		value |= GMAC_CONTROL_JE;
 
+	if (hw->ps) {
+		value |= GMAC_CONTROL_TE;
+
+		if (hw->ps == SPEED_1000) {
+			value &= ~GMAC_CONTROL_PS;
+		} else {
+			value |= GMAC_CONTROL_PS;
+
+			if (hw->ps == SPEED_10)
+				value &= ~GMAC_CONTROL_FES;
+			else
+				value |= GMAC_CONTROL_FES;
+		}
+	}
+
 	writel(value, ioaddr + GMAC_CONTROL);
 
 	/* Mask GMAC interrupts */
-	writel(0x207, ioaddr + GMAC_INT_MASK);
+	value = GMAC_INT_DEFAULT_MASK;
+
+	if (hw->pmt)
+		value &= ~GMAC_INT_DISABLE_PMT;
+	if (hw->pcs)
+		value &= ~GMAC_INT_DISABLE_PCS;
+
+	writel(value, ioaddr + GMAC_INT_MASK);
 
 #ifdef STMMAC_VLAN_TAG_USED
 	/* Tag detection without filtering */
@@ -241,6 +267,39 @@ static void dwmac1000_pmt(struct mac_device_info *hw, unsigned long mode)
 	writel(pmt, ioaddr + GMAC_PMT);
 }
 
+/* RGMII or SMII interface */
+static void dwmac1000_rgsmii(void __iomem *ioaddr, struct stmmac_extra_stats *x)
+{
+	u32 status;
+
+	status = readl(ioaddr + GMAC_RGSMIIIS);
+	x->irq_rgmii_n++;
+
+	/* Check the link status */
+	if (status & GMAC_RGSMIIIS_LNKSTS) {
+		int speed_value;
+
+		x->pcs_link = 1;
+
+		speed_value = ((status & GMAC_RGSMIIIS_SPEED) >>
+			       GMAC_RGSMIIIS_SPEED_SHIFT);
+		if (speed_value == GMAC_RGSMIIIS_SPEED_125)
+			x->pcs_speed = SPEED_1000;
+		else if (speed_value == GMAC_RGSMIIIS_SPEED_25)
+			x->pcs_speed = SPEED_100;
+		else
+			x->pcs_speed = SPEED_10;
+
+		x->pcs_duplex = (status & GMAC_RGSMIIIS_LNKMOD_MASK);
+
+		pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed,
+			x->pcs_duplex ? "Full" : "Half");
+	} else {
+		x->pcs_link = 0;
+		pr_info("Link is Down\n");
+	}
+}
+
 static int dwmac1000_irq_status(struct mac_device_info *hw,
 				struct stmmac_extra_stats *x)
 {
@@ -249,19 +308,20 @@ static int dwmac1000_irq_status(struct mac_device_info *hw,
 	int ret = 0;
 
 	/* Not used events (e.g. MMC interrupts) are not handled. */
-	if ((intr_status & mmc_tx_irq))
+	if ((intr_status & GMAC_INT_STATUS_MMCTIS))
 		x->mmc_tx_irq_n++;
-	if (unlikely(intr_status & mmc_rx_irq))
+	if (unlikely(intr_status & GMAC_INT_STATUS_MMCRIS))
 		x->mmc_rx_irq_n++;
-	if (unlikely(intr_status & mmc_rx_csum_offload_irq))
+	if (unlikely(intr_status & GMAC_INT_STATUS_MMCCSUM))
 		x->mmc_rx_csum_offload_irq_n++;
-	if (unlikely(intr_status & pmt_irq)) {
+	if (unlikely(intr_status & GMAC_INT_DISABLE_PMT)) {
 		/* clear the PMT bits 5 and 6 by reading the PMT status reg */
 		readl(ioaddr + GMAC_PMT);
 		x->irq_receive_pmt_irq_n++;
 	}
-	/* MAC trx/rx EEE LPI entry/exit interrupts */
-	if (intr_status & lpiis_irq) {
+
+	/* MAC tx/rx EEE LPI entry/exit interrupts */
+	if (intr_status & GMAC_INT_STATUS_LPIIS) {
 		/* Clean LPI interrupt by reading the Reg 12 */
 		ret = readl(ioaddr + LPI_CTRL_STATUS);
 
@@ -275,36 +335,10 @@ static int dwmac1000_irq_status(struct mac_device_info *hw,
 			x->irq_rx_path_exit_lpi_mode_n++;
 	}
 
-	if ((intr_status & pcs_ane_irq) || (intr_status & pcs_link_irq)) {
-		readl(ioaddr + GMAC_AN_STATUS);
-		x->irq_pcs_ane_n++;
-	}
-	if (intr_status & rgmii_irq) {
-		u32 status = readl(ioaddr + GMAC_S_R_GMII);
-		x->irq_rgmii_n++;
-
-		/* Save and dump the link status. */
-		if (status & GMAC_S_R_GMII_LINK) {
-			int speed_value = (status & GMAC_S_R_GMII_SPEED) >>
-			    GMAC_S_R_GMII_SPEED_SHIFT;
-			x->pcs_duplex = (status & GMAC_S_R_GMII_MODE);
-
-			if (speed_value == GMAC_S_R_GMII_SPEED_125)
-				x->pcs_speed = SPEED_1000;
-			else if (speed_value == GMAC_S_R_GMII_SPEED_25)
-				x->pcs_speed = SPEED_100;
-			else
-				x->pcs_speed = SPEED_10;
+	dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x);
 
-			x->pcs_link = 1;
-			pr_debug("%s: Link is Up - %d/%s\n", __func__,
-				 (int)x->pcs_speed,
-				 x->pcs_duplex ? "Full" : "Half");
-		} else {
-			x->pcs_link = 0;
-			pr_debug("%s: Link is Down\n", __func__);
-		}
-	}
+	if (intr_status & PCS_RGSMIIIS_IRQ)
+		dwmac1000_rgsmii(ioaddr, x);
 
 	return ret;
 }
@@ -363,38 +397,20 @@ static void dwmac1000_set_eee_timer(struct mac_device_info *hw, int ls, int tw)
 	writel(value, ioaddr + LPI_TIMER_CTRL);
 }
 
-static void dwmac1000_ctrl_ane(struct mac_device_info *hw, bool restart)
+static void dwmac1000_ctrl_ane(void __iomem *ioaddr, bool ane, bool srgmi_ral,
+			       bool loopback)
 {
-	void __iomem *ioaddr = hw->pcsr;
-	/* auto negotiation enable and External Loopback enable */
-	u32 value = GMAC_AN_CTRL_ANE | GMAC_AN_CTRL_ELE;
-
-	if (restart)
-		value |= GMAC_AN_CTRL_RAN;
-
-	writel(value, ioaddr + GMAC_AN_CTRL);
+	dwmac_ctrl_ane(ioaddr, GMAC_PCS_BASE, ane, srgmi_ral, loopback);
 }
 
-static void dwmac1000_get_adv(struct mac_device_info *hw, struct rgmii_adv *adv)
+static void dwmac1000_rane(void __iomem *ioaddr, bool restart)
 {
-	void __iomem *ioaddr = hw->pcsr;
-	u32 value = readl(ioaddr + GMAC_ANE_ADV);
-
-	if (value & GMAC_ANE_FD)
-		adv->duplex = DUPLEX_FULL;
-	if (value & GMAC_ANE_HD)
-		adv->duplex |= DUPLEX_HALF;
-
-	adv->pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT;
-
-	value = readl(ioaddr + GMAC_ANE_LPA);
-
-	if (value & GMAC_ANE_FD)
-		adv->lp_duplex = DUPLEX_FULL;
-	if (value & GMAC_ANE_HD)
-		adv->lp_duplex = DUPLEX_HALF;
+	dwmac_rane(ioaddr, GMAC_PCS_BASE, restart);
+}
 
-	adv->lp_pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT;
+static void dwmac1000_get_adv_lp(void __iomem *ioaddr, struct rgmii_adv *adv)
+{
+	dwmac_get_adv_lp(ioaddr, GMAC_PCS_BASE, adv);
 }
 
 static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x)
@@ -485,9 +501,10 @@ static const struct stmmac_ops dwmac1000_ops = {
 	.reset_eee_mode = dwmac1000_reset_eee_mode,
 	.set_eee_timer = dwmac1000_set_eee_timer,
 	.set_eee_pls = dwmac1000_set_eee_pls,
-	.ctrl_ane = dwmac1000_ctrl_ane,
-	.get_adv = dwmac1000_get_adv,
 	.debug = dwmac1000_debug,
+	.pcs_ctrl_ane = dwmac1000_ctrl_ane,
+	.pcs_rane = dwmac1000_rane,
+	.pcs_get_adv_lp = dwmac1000_get_adv_lp,
 };
 
 struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index bc50952a18e7..6f4f5ce25114 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -24,10 +24,8 @@
 #define GMAC_QX_TX_FLOW_CTRL(x)		(0x70 + x * 4)
 #define GMAC_INT_STATUS			0x000000b0
 #define GMAC_INT_EN			0x000000b4
-#define GMAC_AN_CTRL			0x000000e0
-#define GMAC_AN_STATUS			0x000000e4
-#define GMAC_AN_ADV			0x000000e8
-#define GMAC_AN_LPA			0x000000ec
+#define GMAC_PCS_BASE			0x000000e0
+#define GMAC_PHYIF_CONTROL_STATUS	0x000000f8
 #define GMAC_PMT			0x000000c0
 #define GMAC_VERSION			0x00000110
 #define GMAC_DEBUG			0x00000114
@@ -54,9 +52,18 @@
 #define GMAC_TX_FLOW_CTRL_PT_SHIFT	16
 
 /*  MAC Interrupt bitmap*/
+#define GMAC_INT_RGSMIIS		BIT(0)
+#define GMAC_INT_PCS_LINK		BIT(1)
+#define GMAC_INT_PCS_ANE		BIT(2)
+#define GMAC_INT_PCS_PHYIS		BIT(3)
 #define GMAC_INT_PMT_EN			BIT(4)
 #define GMAC_INT_LPI_EN			BIT(5)
 
+#define	GMAC_PCS_IRQ_DEFAULT	(GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK |	\
+				 GMAC_INT_PCS_ANE)
+
+#define	GMAC_INT_DEFAULT_MASK	GMAC_INT_PMT_EN
+
 enum dwmac4_irq_status {
 	time_stamp_irq = 0x00001000,
 	mmc_rx_csum_offload_irq = 0x00000800,
@@ -64,19 +71,8 @@ enum dwmac4_irq_status {
 	mmc_rx_irq = 0x00000200,
 	mmc_irq = 0x00000100,
 	pmt_irq = 0x00000010,
-	pcs_ane_irq = 0x00000004,
-	pcs_link_irq = 0x00000002,
 };
 
-/* MAC Auto-Neg bitmap*/
-#define	GMAC_AN_CTRL_RAN		BIT(9)
-#define	GMAC_AN_CTRL_ANE		BIT(12)
-#define GMAC_AN_CTRL_ELE		BIT(14)
-#define GMAC_AN_FD			BIT(5)
-#define GMAC_AN_HD			BIT(6)
-#define GMAC_AN_PSE_MASK		GENMASK(8, 7)
-#define GMAC_AN_PSE_SHIFT		7
-
 /* MAC PMT bitmap */
 enum power_event {
 	pointer_reset =	0x80000000,
@@ -250,6 +246,23 @@ enum power_event {
 #define MTL_DEBUG_RRCSTS_FLUSH		3
 #define MTL_DEBUG_RWCSTS		BIT(0)
 
+/* SGMII/RGMII status register */
+#define GMAC_PHYIF_CTRLSTATUS_TC		BIT(0)
+#define GMAC_PHYIF_CTRLSTATUS_LUD		BIT(1)
+#define GMAC_PHYIF_CTRLSTATUS_SMIDRXS		BIT(4)
+#define GMAC_PHYIF_CTRLSTATUS_LNKMOD		BIT(16)
+#define GMAC_PHYIF_CTRLSTATUS_SPEED		GENMASK(18, 17)
+#define GMAC_PHYIF_CTRLSTATUS_SPEED_SHIFT	17
+#define GMAC_PHYIF_CTRLSTATUS_LNKSTS		BIT(19)
+#define GMAC_PHYIF_CTRLSTATUS_JABTO		BIT(20)
+#define GMAC_PHYIF_CTRLSTATUS_FALSECARDET	BIT(21)
+/* LNKMOD */
+#define GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK	0x1
+/* LNKSPEED */
+#define GMAC_PHYIF_CTRLSTATUS_SPEED_125		0x2
+#define GMAC_PHYIF_CTRLSTATUS_SPEED_25		0x1
+#define GMAC_PHYIF_CTRLSTATUS_SPEED_2_5		0x0
+
 extern const struct stmmac_dma_ops dwmac4_dma_ops;
 extern const struct stmmac_dma_ops dwmac410_dma_ops;
 #endif /* __DWMAC4_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 44da877d2483..df5580dcdfed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/ethtool.h>
 #include <linux/io.h>
+#include "stmmac_pcs.h"
 #include "dwmac4.h"
 
 static void dwmac4_core_init(struct mac_device_info *hw, int mtu)
@@ -31,10 +32,31 @@ static void dwmac4_core_init(struct mac_device_info *hw, int mtu)
 	if (mtu > 2000)
 		value |= GMAC_CONFIG_JE;
 
+	if (hw->ps) {
+		value |= GMAC_CONFIG_TE;
+
+		if (hw->ps == SPEED_1000) {
+			value &= ~GMAC_CONFIG_PS;
+		} else {
+			value |= GMAC_CONFIG_PS;
+
+			if (hw->ps == SPEED_10)
+				value &= ~GMAC_CONFIG_FES;
+			else
+				value |= GMAC_CONFIG_FES;
+		}
+	}
+
 	writel(value, ioaddr + GMAC_CONFIG);
 
 	/* Mask GMAC interrupts */
-	writel(GMAC_INT_PMT_EN, ioaddr + GMAC_INT_EN);
+	value = GMAC_INT_DEFAULT_MASK;
+	if (hw->pmt)
+		value |= GMAC_INT_PMT_EN;
+	if (hw->pcs)
+		value |= GMAC_PCS_IRQ_DEFAULT;
+
+	writel(value, ioaddr + GMAC_INT_EN);
 }
 
 static void dwmac4_dump_regs(struct mac_device_info *hw)
@@ -190,39 +212,53 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
 	}
 }
 
-static void dwmac4_ctrl_ane(struct mac_device_info *hw, bool restart)
+static void dwmac4_ctrl_ane(void __iomem *ioaddr, bool ane, bool srgmi_ral,
+			    bool loopback)
 {
-	void __iomem *ioaddr = hw->pcsr;
-
-	/* auto negotiation enable and External Loopback enable */
-	u32 value = GMAC_AN_CTRL_ANE | GMAC_AN_CTRL_ELE;
+	dwmac_ctrl_ane(ioaddr, GMAC_PCS_BASE, ane, srgmi_ral, loopback);
+}
 
-	if (restart)
-		value |= GMAC_AN_CTRL_RAN;
+static void dwmac4_rane(void __iomem *ioaddr, bool restart)
+{
+	dwmac_rane(ioaddr, GMAC_PCS_BASE, restart);
+}
 
-	writel(value, ioaddr + GMAC_AN_CTRL);
+static void dwmac4_get_adv_lp(void __iomem *ioaddr, struct rgmii_adv *adv)
+{
+	dwmac_get_adv_lp(ioaddr, GMAC_PCS_BASE, adv);
 }
 
-static void dwmac4_get_adv(struct mac_device_info *hw, struct rgmii_adv *adv)
+/* RGMII or SMII interface */
+static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x)
 {
-	void __iomem *ioaddr = hw->pcsr;
-	u32 value = readl(ioaddr + GMAC_AN_ADV);
+	u32 status;
 
-	if (value & GMAC_AN_FD)
-		adv->duplex = DUPLEX_FULL;
-	if (value & GMAC_AN_HD)
-		adv->duplex |= DUPLEX_HALF;
+	status = readl(ioaddr + GMAC_PHYIF_CONTROL_STATUS);
+	x->irq_rgmii_n++;
 
-	adv->pause = (value & GMAC_AN_PSE_MASK) >> GMAC_AN_PSE_SHIFT;
+	/* Check the link status */
+	if (status & GMAC_PHYIF_CTRLSTATUS_LNKSTS) {
+		int speed_value;
 
-	value = readl(ioaddr + GMAC_AN_LPA);
+		x->pcs_link = 1;
+
+		speed_value = ((status & GMAC_PHYIF_CTRLSTATUS_SPEED) >>
+			       GMAC_PHYIF_CTRLSTATUS_SPEED_SHIFT);
+		if (speed_value == GMAC_PHYIF_CTRLSTATUS_SPEED_125)
+			x->pcs_speed = SPEED_1000;
+		else if (speed_value == GMAC_PHYIF_CTRLSTATUS_SPEED_25)
+			x->pcs_speed = SPEED_100;
+		else
+			x->pcs_speed = SPEED_10;
 
-	if (value & GMAC_AN_FD)
-		adv->lp_duplex = DUPLEX_FULL;
-	if (value & GMAC_AN_HD)
-		adv->lp_duplex = DUPLEX_HALF;
+		x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK);
 
-	adv->lp_pause = (value & GMAC_AN_PSE_MASK) >> GMAC_AN_PSE_SHIFT;
+		pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed,
+			x->pcs_duplex ? "Full" : "Half");
+	} else {
+		x->pcs_link = 0;
+		pr_info("Link is Down\n");
+	}
 }
 
 static int dwmac4_irq_status(struct mac_device_info *hw,
@@ -248,11 +284,6 @@ static int dwmac4_irq_status(struct mac_device_info *hw,
 		x->irq_receive_pmt_irq_n++;
 	}
 
-	if ((intr_status & pcs_ane_irq) || (intr_status & pcs_link_irq)) {
-		readl(ioaddr + GMAC_AN_STATUS);
-		x->irq_pcs_ane_n++;
-	}
-
 	mtl_int_qx_status = readl(ioaddr + MTL_INT_STATUS);
 	/* Check MTL Interrupt: Currently only one queue is used: Q0. */
 	if (mtl_int_qx_status & MTL_INT_Q0) {
@@ -267,6 +298,10 @@ static int dwmac4_irq_status(struct mac_device_info *hw,
 		}
 	}
 
+	dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x);
+	if (intr_status & PCS_RGSMIIIS_IRQ)
+		dwmac4_phystatus(ioaddr, x);
+
 	return ret;
 }
 
@@ -363,8 +398,9 @@ static const struct stmmac_ops dwmac4_ops = {
 	.pmt = dwmac4_pmt,
 	.set_umac_addr = dwmac4_set_umac_addr,
 	.get_umac_addr = dwmac4_get_umac_addr,
-	.ctrl_ane = dwmac4_ctrl_ane,
-	.get_adv = dwmac4_get_adv,
+	.pcs_ctrl_ane = dwmac4_ctrl_ane,
+	.pcs_rane = dwmac4_rane,
+	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 59ae6088cd22..8dc9056c1001 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -117,7 +117,6 @@ struct stmmac_priv {
 	int eee_enabled;
 	int eee_active;
 	int tx_lpi_timer;
-	int pcs;
 	unsigned int mode;
 	int extend_desc;
 	struct ptp_clock *ptp_clock;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index e2b98b01647e..1e06173fc9d7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -276,7 +276,8 @@ static int stmmac_ethtool_getsettings(struct net_device *dev,
 	struct phy_device *phy = priv->phydev;
 	int rc;
 
-	if ((priv->pcs & STMMAC_PCS_RGMII) || (priv->pcs & STMMAC_PCS_SGMII)) {
+	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
+	    priv->hw->pcs & STMMAC_PCS_SGMII) {
 		struct rgmii_adv adv;
 
 		if (!priv->xstats.pcs_link) {
@@ -289,10 +290,10 @@ static int stmmac_ethtool_getsettings(struct net_device *dev,
 		ethtool_cmd_speed_set(cmd, priv->xstats.pcs_speed);
 
 		/* Get and convert ADV/LP_ADV from the HW AN registers */
-		if (!priv->hw->mac->get_adv)
+		if (!priv->hw->mac->pcs_get_adv_lp)
 			return -EOPNOTSUPP;	/* should never happen indeed */
 
-		priv->hw->mac->get_adv(priv->hw, &adv);
+		priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv);
 
 		/* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */
 
@@ -361,7 +362,8 @@ static int stmmac_ethtool_setsettings(struct net_device *dev,
 	struct phy_device *phy = priv->phydev;
 	int rc;
 
-	if ((priv->pcs & STMMAC_PCS_RGMII) || (priv->pcs & STMMAC_PCS_SGMII)) {
+	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
+	    priv->hw->pcs & STMMAC_PCS_SGMII) {
 		u32 mask = ADVERTISED_Autoneg | ADVERTISED_Pause;
 
 		/* Only support ANE */
@@ -376,8 +378,11 @@ static int stmmac_ethtool_setsettings(struct net_device *dev,
 			ADVERTISED_10baseT_Full);
 
 		spin_lock(&priv->lock);
-		if (priv->hw->mac->ctrl_ane)
-			priv->hw->mac->ctrl_ane(priv->hw, 1);
+
+		if (priv->hw->mac->pcs_ctrl_ane)
+			priv->hw->mac->pcs_ctrl_ane(priv->ioaddr, 1,
+						    priv->hw->ps, 0);
+
 		spin_unlock(&priv->lock);
 
 		return 0;
@@ -452,11 +457,22 @@ stmmac_get_pauseparam(struct net_device *netdev,
 {
 	struct stmmac_priv *priv = netdev_priv(netdev);
 
-	if (priv->pcs)	/* FIXME */
-		return;
-
 	pause->rx_pause = 0;
 	pause->tx_pause = 0;
+
+	if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) {
+		struct rgmii_adv adv_lp;
+
+		pause->autoneg = 1;
+		priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv_lp);
+		if (!adv_lp.pause)
+			return;
+	} else {
+		if (!(priv->phydev->supported & SUPPORTED_Pause) ||
+		    !(priv->phydev->supported & SUPPORTED_Asym_Pause))
+			return;
+	}
+
 	pause->autoneg = priv->phydev->autoneg;
 
 	if (priv->flow_ctrl & FLOW_RX)
@@ -473,10 +489,19 @@ stmmac_set_pauseparam(struct net_device *netdev,
 	struct stmmac_priv *priv = netdev_priv(netdev);
 	struct phy_device *phy = priv->phydev;
 	int new_pause = FLOW_OFF;
-	int ret = 0;
 
-	if (priv->pcs)	/* FIXME */
-		return -EOPNOTSUPP;
+	if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) {
+		struct rgmii_adv adv_lp;
+
+		pause->autoneg = 1;
+		priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv_lp);
+		if (!adv_lp.pause)
+			return -EOPNOTSUPP;
+	} else {
+		if (!(phy->supported & SUPPORTED_Pause) ||
+		    !(phy->supported & SUPPORTED_Asym_Pause))
+			return -EOPNOTSUPP;
+	}
 
 	if (pause->rx_pause)
 		new_pause |= FLOW_RX;
@@ -488,11 +513,12 @@ stmmac_set_pauseparam(struct net_device *netdev,
 
 	if (phy->autoneg) {
 		if (netif_running(netdev))
-			ret = phy_start_aneg(phy);
-	} else
-		priv->hw->mac->flow_ctrl(priv->hw, phy->duplex,
-					 priv->flow_ctrl, priv->pause);
-	return ret;
+			return phy_start_aneg(phy);
+	}
+
+	priv->hw->mac->flow_ctrl(priv->hw, phy->duplex, priv->flow_ctrl,
+				 priv->pause);
+	return 0;
 }
 
 static void stmmac_get_ethtool_stats(struct net_device *dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a473c182c91d..c23ccabc2d8a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -285,8 +285,9 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 	/* Using PCS we cannot dial with the phy registers at this stage
 	 * so we do not support extra feature like EEE.
 	 */
-	if ((priv->pcs == STMMAC_PCS_RGMII) || (priv->pcs == STMMAC_PCS_TBI) ||
-	    (priv->pcs == STMMAC_PCS_RTBI))
+	if ((priv->hw->pcs == STMMAC_PCS_RGMII) ||
+	    (priv->hw->pcs == STMMAC_PCS_TBI) ||
+	    (priv->hw->pcs == STMMAC_PCS_RTBI))
 		goto out;
 
 	/* MAC core supports the EEE feature. */
@@ -799,10 +800,10 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
 		    (interface == PHY_INTERFACE_MODE_RGMII_RXID) ||
 		    (interface == PHY_INTERFACE_MODE_RGMII_TXID)) {
 			pr_debug("STMMAC: PCS RGMII support enable\n");
-			priv->pcs = STMMAC_PCS_RGMII;
+			priv->hw->pcs = STMMAC_PCS_RGMII;
 		} else if (interface == PHY_INTERFACE_MODE_SGMII) {
 			pr_debug("STMMAC: PCS SGMII support enable\n");
-			priv->pcs = STMMAC_PCS_SGMII;
+			priv->hw->pcs = STMMAC_PCS_SGMII;
 		}
 	}
 }
@@ -1665,6 +1666,19 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	if (priv->plat->bus_setup)
 		priv->plat->bus_setup(priv->ioaddr);
 
+	/* PS and related bits will be programmed according to the speed */
+	if (priv->hw->pcs) {
+		int speed = priv->plat->mac_port_sel_speed;
+
+		if ((speed == SPEED_10) || (speed == SPEED_100) ||
+		    (speed == SPEED_1000)) {
+			priv->hw->ps = speed;
+		} else {
+			dev_warn(priv->device, "invalid port speed\n");
+			priv->hw->ps = 0;
+		}
+	}
+
 	/* Initialize the MAC Core */
 	priv->hw->mac->core_init(priv->hw, dev->mtu);
 
@@ -1714,8 +1728,8 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 		priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT);
 	}
 
-	if (priv->pcs && priv->hw->mac->ctrl_ane)
-		priv->hw->mac->ctrl_ane(priv->hw, 0);
+	if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
+		priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
 
 	/*  set TX ring length */
 	if (priv->hw->dma->set_tx_ring_len)
@@ -1748,8 +1762,9 @@ static int stmmac_open(struct net_device *dev)
 
 	stmmac_check_ether_addr(priv);
 
-	if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI &&
-	    priv->pcs != STMMAC_PCS_RTBI) {
+	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
+	    priv->hw->pcs != STMMAC_PCS_TBI &&
+	    priv->hw->pcs != STMMAC_PCS_RTBI) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
 			pr_err("%s: Cannot attach to PHY (error: %d)\n",
@@ -2804,11 +2819,19 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 				priv->tx_path_in_lpi_mode = true;
 			if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE)
 				priv->tx_path_in_lpi_mode = false;
-			if (status & CORE_IRQ_MTL_RX_OVERFLOW)
+			if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr)
 				priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
 							priv->rx_tail_addr,
 							STMMAC_CHAN0);
 		}
+
+		/* PCS link status */
+		if (priv->hw->pcs) {
+			if (priv->xstats.pcs_link)
+				netif_carrier_on(dev);
+			else
+				netif_carrier_off(dev);
+		}
 	}
 
 	/* To handle DMA interrupts */
@@ -3130,6 +3153,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 		 */
 		priv->plat->enh_desc = priv->dma_cap.enh_desc;
 		priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up;
+		priv->hw->pmt = priv->plat->pmt;
 
 		/* TXCOE doesn't work in thresh DMA mode */
 		if (priv->plat->force_thresh_dma_mode)
@@ -3325,8 +3349,9 @@ int stmmac_dvr_probe(struct device *device,
 
 	stmmac_check_pcs_mode(priv);
 
-	if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI &&
-	    priv->pcs != STMMAC_PCS_RTBI) {
+	if (priv->hw->pcs != STMMAC_PCS_RGMII  &&
+	    priv->hw->pcs != STMMAC_PCS_TBI &&
+	    priv->hw->pcs != STMMAC_PCS_RTBI) {
 		/* MDIO bus Registration */
 		ret = stmmac_mdio_register(ndev);
 		if (ret < 0) {
@@ -3376,8 +3401,9 @@ int stmmac_dvr_remove(struct device *dev)
 		reset_control_assert(priv->stmmac_rst);
 	clk_disable_unprepare(priv->pclk);
 	clk_disable_unprepare(priv->stmmac_clk);
-	if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI &&
-	    priv->pcs != STMMAC_PCS_RTBI)
+	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
+	    priv->hw->pcs != STMMAC_PCS_TBI &&
+	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
 	free_netdev(ndev);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
new file mode 100644
index 000000000000..eba41c24b7a7
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
@@ -0,0 +1,159 @@
+/*
+ * stmmac_pcs.h: Physical Coding Sublayer Header File
+ *
+ * Copyright (C) 2016 STMicroelectronics (R&D) Limited
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __STMMAC_PCS_H__
+#define __STMMAC_PCS_H__
+
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "common.h"
+
+/* PCS registers (AN/TBI/SGMII/RGMII) offsets */
+#define GMAC_AN_CTRL(x)		(x)		/* AN control */
+#define GMAC_AN_STATUS(x)	(x + 0x4)	/* AN status */
+#define GMAC_ANE_ADV(x)		(x + 0x8)	/* ANE Advertisement */
+#define GMAC_ANE_LPA(x)		(x + 0xc)	/* ANE link partener ability */
+#define GMAC_ANE_EXP(x)		(x + 0x10)	/* ANE expansion */
+#define GMAC_TBI(x)		(x + 0x14)	/* TBI extend status */
+
+/* AN Configuration defines */
+#define GMAC_AN_CTRL_RAN	BIT(9)	/* Restart Auto-Negotiation */
+#define GMAC_AN_CTRL_ANE	BIT(12)	/* Auto-Negotiation Enable */
+#define GMAC_AN_CTRL_ELE	BIT(14)	/* External Loopback Enable */
+#define GMAC_AN_CTRL_ECD	BIT(16)	/* Enable Comma Detect */
+#define GMAC_AN_CTRL_LR		BIT(17)	/* Lock to Reference */
+#define GMAC_AN_CTRL_SGMRAL	BIT(18)	/* SGMII RAL Control */
+
+/* AN Status defines */
+#define GMAC_AN_STATUS_LS	BIT(2)	/* Link Status 0:down 1:up */
+#define GMAC_AN_STATUS_ANA	BIT(3)	/* Auto-Negotiation Ability */
+#define GMAC_AN_STATUS_ANC	BIT(5)	/* Auto-Negotiation Complete */
+#define GMAC_AN_STATUS_ES	BIT(8)	/* Extended Status */
+
+/* ADV and LPA defines */
+#define GMAC_ANE_FD		BIT(5)
+#define GMAC_ANE_HD		BIT(6)
+#define GMAC_ANE_PSE		GENMASK(8, 7)
+#define GMAC_ANE_PSE_SHIFT	7
+#define GMAC_ANE_RFE		GENMASK(13, 12)
+#define GMAC_ANE_RFE_SHIFT	12
+#define GMAC_ANE_ACK		BIT(14)
+
+/**
+ * dwmac_pcs_isr - TBI, RTBI, or SGMII PHY ISR
+ * @ioaddr: IO registers pointer
+ * @reg: Base address of the AN Control Register.
+ * @intr_status: GMAC core interrupt status
+ * @x: pointer to log these events as stats
+ * Description: it is the ISR for PCS events: Auto-Negotiation Completed and
+ * Link status.
+ */
+static inline void dwmac_pcs_isr(void __iomem *ioaddr, u32 reg,
+				 unsigned int intr_status,
+				 struct stmmac_extra_stats *x)
+{
+	u32 val = readl(ioaddr + GMAC_AN_STATUS(reg));
+
+	if (intr_status & PCS_ANE_IRQ) {
+		x->irq_pcs_ane_n++;
+		if (val & GMAC_AN_STATUS_ANC)
+			pr_info("stmmac_pcs: ANE process completed\n");
+	}
+
+	if (intr_status & PCS_LINK_IRQ) {
+		x->irq_pcs_link_n++;
+		if (val & GMAC_AN_STATUS_LS)
+			pr_info("stmmac_pcs: Link Up\n");
+		else
+			pr_info("stmmac_pcs: Link Down\n");
+	}
+}
+
+/**
+ * dwmac_rane - To restart ANE
+ * @ioaddr: IO registers pointer
+ * @reg: Base address of the AN Control Register.
+ * @restart: to restart ANE
+ * Description: this is to just restart the Auto-Negotiation.
+ */
+static inline void dwmac_rane(void __iomem *ioaddr, u32 reg, bool restart)
+{
+	u32 value = readl(ioaddr + GMAC_AN_CTRL(reg));
+
+	if (restart)
+		value |= GMAC_AN_CTRL_RAN;
+
+	writel(value, ioaddr + GMAC_AN_CTRL(reg));
+}
+
+/**
+ * dwmac_ctrl_ane - To program the AN Control Register.
+ * @ioaddr: IO registers pointer
+ * @reg: Base address of the AN Control Register.
+ * @ane: to enable the auto-negotiation
+ * @srgmi_ral: to manage MAC-2-MAC SGMII connections.
+ * @loopback: to cause the PHY to loopback tx data into rx path.
+ * Description: this is the main function to configure the AN control register
+ * and init the ANE, select loopback (usually for debugging purpose) and
+ * configure SGMII RAL.
+ */
+static inline void dwmac_ctrl_ane(void __iomem *ioaddr, u32 reg, bool ane,
+				  bool srgmi_ral, bool loopback)
+{
+	u32 value = readl(ioaddr + GMAC_AN_CTRL(reg));
+
+	/* Enable and restart the Auto-Negotiation */
+	if (ane)
+		value |= GMAC_AN_CTRL_ANE | GMAC_AN_CTRL_RAN;
+
+	/* In case of MAC-2-MAC connection, block is configured to operate
+	 * according to MAC conf register.
+	 */
+	if (srgmi_ral)
+		value |= GMAC_AN_CTRL_SGMRAL;
+
+	if (loopback)
+		value |= GMAC_AN_CTRL_ELE;
+
+	writel(value, ioaddr + GMAC_AN_CTRL(reg));
+}
+
+/**
+ * dwmac_get_adv_lp - Get ADV and LP cap
+ * @ioaddr: IO registers pointer
+ * @reg: Base address of the AN Control Register.
+ * @adv_lp: structure to store the adv,lp status
+ * Description: this is to expose the ANE advertisement and Link partner ability
+ * status to ethtool support.
+ */
+static inline void dwmac_get_adv_lp(void __iomem *ioaddr, u32 reg,
+				    struct rgmii_adv *adv_lp)
+{
+	u32 value = readl(ioaddr + GMAC_ANE_ADV(reg));
+
+	if (value & GMAC_ANE_FD)
+		adv_lp->duplex = DUPLEX_FULL;
+	if (value & GMAC_ANE_HD)
+		adv_lp->duplex |= DUPLEX_HALF;
+
+	adv_lp->pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT;
+
+	value = readl(ioaddr + GMAC_ANE_LPA(reg));
+
+	if (value & GMAC_ANE_FD)
+		adv_lp->lp_duplex = DUPLEX_FULL;
+	if (value & GMAC_ANE_HD)
+		adv_lp->lp_duplex = DUPLEX_HALF;
+
+	adv_lp->lp_pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT;
+}
+#endif /* __STMMAC_PCS_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 409db913b117..f7dfc0ae8e9c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -319,6 +319,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode is set.");
 	}
 
+	of_property_read_u32(np, "snps,ps-speed", &plat->mac_port_sel_speed);
+
 	plat->axi = stmmac_axi_setup(pdev);
 
 	return plat;
@@ -411,7 +413,9 @@ static int stmmac_pltfr_suspend(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 
 	ret = stmmac_suspend(dev);
-	if (priv->plat->exit)
+	if (priv->plat->suspend)
+		priv->plat->suspend(pdev, priv->plat->bsp_priv);
+	else if (priv->plat->exit)
 		priv->plat->exit(pdev, priv->plat->bsp_priv);
 
 	return ret;
@@ -430,7 +434,9 @@ static int stmmac_pltfr_resume(struct device *dev)
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	struct platform_device *pdev = to_platform_device(dev);
 
-	if (priv->plat->init)
+	if (priv->plat->resume)
+		priv->plat->resume(pdev, priv->plat->bsp_priv);
+	else if (priv->plat->init)
 		priv->plat->init(pdev, priv->plat->bsp_priv);
 
 	return stmmac_resume(dev);
diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
index 158213cd6cdd..9f159a775af3 100644
--- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c
+++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
@@ -46,7 +46,6 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
-#include <linux/version.h>
 
 #include <linux/device.h>
 #include <linux/bitrev.h>
@@ -598,7 +597,6 @@ struct net_local {
 	struct work_struct txtimeout_reinit;
 
 	phy_interface_t phy_interface;
-	struct phy_device *phy_dev;
 	struct mii_bus *mii_bus;
 
 	unsigned int link;
@@ -816,7 +814,7 @@ static int dwceqos_mdio_write(struct mii_bus *bus, int mii_id, int phyreg,
 static int dwceqos_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 {
 	struct net_local *lp = netdev_priv(ndev);
-	struct phy_device *phydev = lp->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (!netif_running(ndev))
 		return -EINVAL;
@@ -850,6 +848,7 @@ static void dwceqos_link_down(struct net_local *lp)
 
 static void dwceqos_link_up(struct net_local *lp)
 {
+	struct net_device *ndev = lp->ndev;
 	u32 regval;
 	unsigned long flags;
 
@@ -860,7 +859,7 @@ static void dwceqos_link_up(struct net_local *lp)
 	dwceqos_write(lp, REG_DWCEQOS_MAC_LPI_CTRL_STATUS, regval);
 	spin_unlock_irqrestore(&lp->hw_lock, flags);
 
-	lp->eee_active = !phy_init_eee(lp->phy_dev, 0);
+	lp->eee_active = !phy_init_eee(ndev->phydev, 0);
 
 	/* Check for changed EEE capability */
 	if (!lp->eee_active && lp->eee_enabled) {
@@ -876,7 +875,8 @@ static void dwceqos_link_up(struct net_local *lp)
 
 static void dwceqos_set_speed(struct net_local *lp)
 {
-	struct phy_device *phydev = lp->phy_dev;
+	struct net_device *ndev = lp->ndev;
+	struct phy_device *phydev = ndev->phydev;
 	u32 regval;
 
 	regval = dwceqos_read(lp, REG_DWCEQOS_MAC_CFG);
@@ -903,7 +903,7 @@ static void dwceqos_set_speed(struct net_local *lp)
 static void dwceqos_adjust_link(struct net_device *ndev)
 {
 	struct net_local *lp = netdev_priv(ndev);
-	struct phy_device *phydev = lp->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 	int status_change = 0;
 
 	if (lp->phy_defer)
@@ -987,7 +987,6 @@ static int dwceqos_mii_probe(struct net_device *ndev)
 	lp->link    = 0;
 	lp->speed   = 0;
 	lp->duplex  = DUPLEX_UNKNOWN;
-	lp->phy_dev = phydev;
 
 	return 0;
 }
@@ -1531,6 +1530,7 @@ static void dwceqos_configure_bus(struct net_local *lp)
 
 static void dwceqos_init_hw(struct net_local *lp)
 {
+	struct net_device *ndev = lp->ndev;
 	u32 regval;
 	u32 buswidth;
 	u32 dma_skip;
@@ -1645,10 +1645,10 @@ static void dwceqos_init_hw(struct net_local *lp)
 		      regval | DWCEQOS_MAC_CFG_TE | DWCEQOS_MAC_CFG_RE);
 
 	lp->phy_defer = false;
-	mutex_lock(&lp->phy_dev->lock);
-	phy_read_status(lp->phy_dev);
+	mutex_lock(&ndev->phydev->lock);
+	phy_read_status(ndev->phydev);
 	dwceqos_adjust_link(lp->ndev);
-	mutex_unlock(&lp->phy_dev->lock);
+	mutex_unlock(&ndev->phydev->lock);
 }
 
 static void dwceqos_tx_reclaim(unsigned long data)
@@ -1898,7 +1898,7 @@ static int dwceqos_open(struct net_device *ndev)
 	 * hence the unusual init order with phy_start first.
 	 */
 	lp->phy_defer = true;
-	phy_start(lp->phy_dev);
+	phy_start(ndev->phydev);
 	dwceqos_init_hw(lp);
 	napi_enable(&lp->napi);
 
@@ -1943,7 +1943,7 @@ static int dwceqos_stop(struct net_device *ndev)
 
 	dwceqos_drain_dma(lp);
 	dwceqos_reset_hw(lp);
-	phy_stop(lp->phy_dev);
+	phy_stop(ndev->phydev);
 
 	dwceqos_descriptor_free(lp);
 
@@ -2523,30 +2523,6 @@ dwceqos_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *s)
 	return s;
 }
 
-static int
-dwceqos_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
-{
-	struct net_local *lp = netdev_priv(ndev);
-	struct phy_device *phydev = lp->phy_dev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(phydev, ecmd);
-}
-
-static int
-dwceqos_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
-{
-	struct net_local *lp = netdev_priv(ndev);
-	struct phy_device *phydev = lp->phy_dev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_ethtool_sset(phydev, ecmd);
-}
-
 static void
 dwceqos_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *ed)
 {
@@ -2574,17 +2550,17 @@ static int dwceqos_set_pauseparam(struct net_device *ndev,
 
 	lp->flowcontrol.autoneg = pp->autoneg;
 	if (pp->autoneg) {
-		lp->phy_dev->advertising |= ADVERTISED_Pause;
-		lp->phy_dev->advertising |= ADVERTISED_Asym_Pause;
+		ndev->phydev->advertising |= ADVERTISED_Pause;
+		ndev->phydev->advertising |= ADVERTISED_Asym_Pause;
 	} else {
-		lp->phy_dev->advertising &= ~ADVERTISED_Pause;
-		lp->phy_dev->advertising &= ~ADVERTISED_Asym_Pause;
+		ndev->phydev->advertising &= ~ADVERTISED_Pause;
+		ndev->phydev->advertising &= ~ADVERTISED_Asym_Pause;
 		lp->flowcontrol.rx = pp->rx_pause;
 		lp->flowcontrol.tx = pp->tx_pause;
 	}
 
 	if (netif_running(ndev))
-		ret = phy_start_aneg(lp->phy_dev);
+		ret = phy_start_aneg(ndev->phydev);
 
 	return ret;
 }
@@ -2705,7 +2681,7 @@ static int dwceqos_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
 			    dwceqos_get_tx_lpi_state(regval));
 	}
 
-	return phy_ethtool_get_eee(lp->phy_dev, edata);
+	return phy_ethtool_get_eee(ndev->phydev, edata);
 }
 
 static int dwceqos_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
@@ -2747,7 +2723,7 @@ static int dwceqos_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
 		spin_unlock_irqrestore(&lp->hw_lock, flags);
 	}
 
-	return phy_ethtool_set_eee(lp->phy_dev, edata);
+	return phy_ethtool_set_eee(ndev->phydev, edata);
 }
 
 static u32 dwceqos_get_msglevel(struct net_device *ndev)
@@ -2765,8 +2741,6 @@ static void dwceqos_set_msglevel(struct net_device *ndev, u32 msglevel)
 }
 
 static struct ethtool_ops dwceqos_ethtool_ops = {
-	.get_settings   = dwceqos_get_settings,
-	.set_settings   = dwceqos_set_settings,
 	.get_drvinfo    = dwceqos_get_drvinfo,
 	.get_link       = ethtool_op_get_link,
 	.get_pauseparam = dwceqos_get_pauseparam,
@@ -2780,6 +2754,8 @@ static struct ethtool_ops dwceqos_ethtool_ops = {
 	.set_eee        = dwceqos_set_eee,
 	.get_msglevel   = dwceqos_get_msglevel,
 	.set_msglevel   = dwceqos_set_msglevel,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static struct net_device_ops netdev_ops = {
@@ -2901,7 +2877,7 @@ static int dwceqos_probe(struct platform_device *pdev)
 		ret = of_phy_register_fixed_link(lp->pdev->dev.of_node);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "invalid fixed-link");
-			goto err_out_unregister_netdev;
+			goto err_out_unregister_clk_notifier;
 		}
 
 		lp->phy_node = of_node_get(lp->pdev->dev.of_node);
@@ -2934,7 +2910,8 @@ static int dwceqos_probe(struct platform_device *pdev)
 		     (unsigned long)ndev);
 	tasklet_disable(&lp->tx_bdreclaim_tasklet);
 
-	lp->txtimeout_handler_wq = create_singlethread_workqueue(DRIVER_NAME);
+	lp->txtimeout_handler_wq = alloc_workqueue(DRIVER_NAME,
+						   WQ_MEM_RECLAIM, 0);
 	INIT_WORK(&lp->txtimeout_reinit, dwceqos_reinit_for_txtimeout);
 
 	platform_set_drvdata(pdev, ndev);
@@ -2981,8 +2958,8 @@ static int dwceqos_remove(struct platform_device *pdev)
 	if (ndev) {
 		lp = netdev_priv(ndev);
 
-		if (lp->phy_dev)
-			phy_disconnect(lp->phy_dev);
+		if (ndev->phydev)
+			phy_disconnect(ndev->phydev);
 		mdiobus_unregister(lp->mii_bus);
 		mdiobus_free(lp->mii_bus);
 
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index e7f0b7d95b65..9904d740d528 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -48,8 +48,7 @@ config TI_DAVINCI_CPDMA
 	  will be called davinci_cpdma.  This is recommended.
 
 config TI_CPSW_PHY_SEL
-	bool "TI CPSW Switch Phy sel Support"
-	depends on TI_CPSW
+	bool
 	---help---
 	  This driver supports configuring of the phy mode connected to
 	  the CPSW.
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 7eef45e6d70a..d300d536d06f 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -205,7 +205,6 @@ struct cpmac_priv {
 	dma_addr_t dma_ring;
 	void __iomem *regs;
 	struct mii_bus *mii_bus;
-	struct phy_device *phy;
 	char phy_name[MII_BUS_ID_SIZE + 3];
 	int oldlink, oldspeed, oldduplex;
 	u32 msg_enable;
@@ -830,37 +829,12 @@ static void cpmac_tx_timeout(struct net_device *dev)
 
 static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct cpmac_priv *priv = netdev_priv(dev);
-
 	if (!(netif_running(dev)))
 		return -EINVAL;
-	if (!priv->phy)
+	if (!dev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(priv->phy, ifr, cmd);
-}
-
-static int cpmac_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct cpmac_priv *priv = netdev_priv(dev);
-
-	if (priv->phy)
-		return phy_ethtool_gset(priv->phy, cmd);
-
-	return -EINVAL;
-}
-
-static int cpmac_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct cpmac_priv *priv = netdev_priv(dev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (priv->phy)
-		return phy_ethtool_sset(priv->phy, cmd);
-
-	return -EINVAL;
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 static void cpmac_get_ringparam(struct net_device *dev,
@@ -900,12 +874,12 @@ static void cpmac_get_drvinfo(struct net_device *dev,
 }
 
 static const struct ethtool_ops cpmac_ethtool_ops = {
-	.get_settings = cpmac_get_settings,
-	.set_settings = cpmac_set_settings,
 	.get_drvinfo = cpmac_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = cpmac_get_ringparam,
 	.set_ringparam = cpmac_set_ringparam,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static void cpmac_adjust_link(struct net_device *dev)
@@ -914,16 +888,16 @@ static void cpmac_adjust_link(struct net_device *dev)
 	int new_state = 0;
 
 	spin_lock(&priv->lock);
-	if (priv->phy->link) {
+	if (dev->phydev->link) {
 		netif_tx_start_all_queues(dev);
-		if (priv->phy->duplex != priv->oldduplex) {
+		if (dev->phydev->duplex != priv->oldduplex) {
 			new_state = 1;
-			priv->oldduplex = priv->phy->duplex;
+			priv->oldduplex = dev->phydev->duplex;
 		}
 
-		if (priv->phy->speed != priv->oldspeed) {
+		if (dev->phydev->speed != priv->oldspeed) {
 			new_state = 1;
-			priv->oldspeed = priv->phy->speed;
+			priv->oldspeed = dev->phydev->speed;
 		}
 
 		if (!priv->oldlink) {
@@ -938,7 +912,7 @@ static void cpmac_adjust_link(struct net_device *dev)
 	}
 
 	if (new_state && netif_msg_link(priv) && net_ratelimit())
-		phy_print_status(priv->phy);
+		phy_print_status(dev->phydev);
 
 	spin_unlock(&priv->lock);
 }
@@ -1016,8 +990,8 @@ static int cpmac_open(struct net_device *dev)
 	cpmac_hw_start(dev);
 
 	napi_enable(&priv->napi);
-	priv->phy->state = PHY_CHANGELINK;
-	phy_start(priv->phy);
+	dev->phydev->state = PHY_CHANGELINK;
+	phy_start(dev->phydev);
 
 	return 0;
 
@@ -1032,8 +1006,10 @@ fail_desc:
 			kfree_skb(priv->rx_head[i].skb);
 		}
 	}
+	dma_free_coherent(&dev->dev, sizeof(struct cpmac_desc) * size,
+			  priv->desc_ring, priv->dma_ring);
+
 fail_alloc:
-	kfree(priv->desc_ring);
 	iounmap(priv->regs);
 
 fail_remap:
@@ -1053,7 +1029,7 @@ static int cpmac_stop(struct net_device *dev)
 
 	cancel_work_sync(&priv->reset_work);
 	napi_disable(&priv->napi);
-	phy_stop(priv->phy);
+	phy_stop(dev->phydev);
 
 	cpmac_hw_stop(dev);
 
@@ -1106,6 +1082,7 @@ static int cpmac_probe(struct platform_device *pdev)
 	struct cpmac_priv *priv;
 	struct net_device *dev;
 	struct plat_cpmac_data *pdata;
+	struct phy_device *phydev = NULL;
 
 	pdata = dev_get_platdata(&pdev->dev);
 
@@ -1142,7 +1119,7 @@ static int cpmac_probe(struct platform_device *pdev)
 	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
 	if (!mem) {
 		rc = -ENODEV;
-		goto out;
+		goto fail;
 	}
 
 	dev->irq = platform_get_irq_byname(pdev, "irq");
@@ -1162,15 +1139,15 @@ static int cpmac_probe(struct platform_device *pdev)
 	snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT,
 						mdio_bus_id, phy_id);
 
-	priv->phy = phy_connect(dev, priv->phy_name, cpmac_adjust_link,
-				PHY_INTERFACE_MODE_MII);
+	phydev = phy_connect(dev, priv->phy_name, cpmac_adjust_link,
+			     PHY_INTERFACE_MODE_MII);
 
-	if (IS_ERR(priv->phy)) {
+	if (IS_ERR(phydev)) {
 		if (netif_msg_drv(priv))
 			dev_err(&pdev->dev, "Could not attach to PHY\n");
 
-		rc = PTR_ERR(priv->phy);
-		goto out;
+		rc = PTR_ERR(phydev);
+		goto fail;
 	}
 
 	rc = register_netdev(dev);
@@ -1189,7 +1166,6 @@ static int cpmac_probe(struct platform_device *pdev)
 
 fail:
 	free_netdev(dev);
-out:
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index e6bb0ecb12c7..1a93a1f28433 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -364,7 +364,6 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset)
 }
 
 struct cpsw_priv {
-	spinlock_t			lock;
 	struct platform_device		*pdev;
 	struct net_device		*ndev;
 	struct napi_struct		napi_rx;
@@ -1244,6 +1243,7 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv)
 	slave->phy = NULL;
 	cpsw_ale_control_set(priv->ale, slave_port,
 			     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
+	soft_reset_slave(slave);
 }
 
 static int cpsw_ndo_open(struct net_device *ndev)
@@ -1252,7 +1252,11 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	int i, ret;
 	u32 reg;
 
-	pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(&priv->pdev->dev);
+		return ret;
+	}
 
 	if (!cpsw_common_res_usage_state(priv))
 		cpsw_intr_disable(priv);
@@ -1278,6 +1282,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
 
 	if (!cpsw_common_res_usage_state(priv)) {
 		struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0);
+		int buf_num;
 
 		/* setup tx dma to fixed prio and zero offset */
 		cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
@@ -1305,10 +1310,8 @@ static int cpsw_ndo_open(struct net_device *ndev)
 			enable_irq(priv->irqs_table[0]);
 		}
 
-		if (WARN_ON(!priv->data.rx_descs))
-			priv->data.rx_descs = 128;
-
-		for (i = 0; i < priv->data.rx_descs; i++) {
+		buf_num = cpdma_chan_get_rx_buf_num(priv->dma);
+		for (i = 0; i < buf_num; i++) {
 			struct sk_buff *skb;
 
 			ret = -ENOMEM;
@@ -1611,10 +1614,17 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
 	struct sockaddr *addr = (struct sockaddr *)p;
 	int flags = 0;
 	u16 vid = 0;
+	int ret;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
+	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(&priv->pdev->dev);
+		return ret;
+	}
+
 	if (priv->data.dual_emac) {
 		vid = priv->slaves[priv->emac_port].port_vlan;
 		flags = ALE_VLAN;
@@ -1629,6 +1639,8 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
 	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
 	for_each_slave(priv, cpsw_set_slave_mac, priv);
 
+	pm_runtime_put(&priv->pdev->dev);
+
 	return 0;
 }
 
@@ -1693,10 +1705,17 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
 				    __be16 proto, u16 vid)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	int ret;
 
 	if (vid == priv->data.default_vlan)
 		return 0;
 
+	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(&priv->pdev->dev);
+		return ret;
+	}
+
 	if (priv->data.dual_emac) {
 		/* In dual EMAC, reserved VLAN id should not be used for
 		 * creating VLAN interfaces as this can break the dual
@@ -1711,7 +1730,10 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
 	}
 
 	dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid);
-	return cpsw_add_vlan_ale_entry(priv, vid);
+	ret = cpsw_add_vlan_ale_entry(priv, vid);
+
+	pm_runtime_put(&priv->pdev->dev);
+	return ret;
 }
 
 static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
@@ -1723,6 +1745,12 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
 	if (vid == priv->data.default_vlan)
 		return 0;
 
+	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(&priv->pdev->dev);
+		return ret;
+	}
+
 	if (priv->data.dual_emac) {
 		int i;
 
@@ -1742,8 +1770,10 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
 	if (ret != 0)
 		return ret;
 
-	return cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast,
-				  0, ALE_VLAN, vid);
+	ret = cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast,
+				 0, ALE_VLAN, vid);
+	pm_runtime_put(&priv->pdev->dev);
+	return ret;
 }
 
 static const struct net_device_ops cpsw_netdev_ops = {
@@ -1902,10 +1932,33 @@ static int cpsw_set_pauseparam(struct net_device *ndev,
 	priv->tx_pause = pause->tx_pause ? true : false;
 
 	for_each_slave(priv, _cpsw_adjust_link, priv, &link);
-
 	return 0;
 }
 
+static int cpsw_ethtool_op_begin(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	if (ret < 0) {
+		cpsw_err(priv, drv, "ethtool begin failed %d\n", ret);
+		pm_runtime_put_noidle(&priv->pdev->dev);
+	}
+
+	return ret;
+}
+
+static void cpsw_ethtool_op_complete(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = pm_runtime_put(&priv->pdev->dev);
+	if (ret < 0)
+		cpsw_err(priv, drv, "ethtool complete failed %d\n", ret);
+}
+
 static const struct ethtool_ops cpsw_ethtool_ops = {
 	.get_drvinfo	= cpsw_get_drvinfo,
 	.get_msglevel	= cpsw_get_msglevel,
@@ -1925,6 +1978,8 @@ static const struct ethtool_ops cpsw_ethtool_ops = {
 	.set_wol	= cpsw_set_wol,
 	.get_regs_len	= cpsw_get_regs_len,
 	.get_regs	= cpsw_get_regs,
+	.begin		= cpsw_ethtool_op_begin,
+	.complete	= cpsw_ethtool_op_complete,
 };
 
 static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
@@ -1999,12 +2054,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->bd_ram_size = prop;
 
-	if (of_property_read_u32(node, "rx_descs", &prop)) {
-		dev_err(&pdev->dev, "Missing rx_descs property in the DT.\n");
-		return -EINVAL;
-	}
-	data->rx_descs = prop;
-
 	if (of_property_read_u32(node, "mac_control", &prop)) {
 		dev_err(&pdev->dev, "Missing mac_control property in the DT.\n");
 		return -EINVAL;
@@ -2022,7 +2071,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	if (ret)
 		dev_warn(&pdev->dev, "Doesn't have any child node\n");
 
-	for_each_child_of_node(node, slave_node) {
+	for_each_available_child_of_node(node, slave_node) {
 		struct cpsw_slave_data *slave_data = data->slave_data + i;
 		const void *mac_addr = NULL;
 		int lenp;
@@ -2124,7 +2173,6 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev,
 	}
 
 	priv_sl2 = netdev_priv(ndev);
-	spin_lock_init(&priv_sl2->lock);
 	priv_sl2->data = *data;
 	priv_sl2->pdev = pdev;
 	priv_sl2->ndev = ndev;
@@ -2243,7 +2291,6 @@ static int cpsw_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ndev);
 	priv = netdev_priv(ndev);
-	spin_lock_init(&priv->lock);
 	priv->pdev = pdev;
 	priv->ndev = ndev;
 	priv->dev  = &ndev->dev;
@@ -2321,7 +2368,11 @@ static int cpsw_probe(struct platform_device *pdev)
 	/* Need to enable clocks with runtime PM api to access module
 	 * registers
 	 */
-	pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(&pdev->dev);
+		goto clean_runtime_disable_ret;
+	}
 	priv->version = readl(&priv->regs->id_ver);
 	pm_runtime_put_sync(&pdev->dev);
 
@@ -2505,8 +2556,6 @@ static int cpsw_probe(struct platform_device *pdev)
 clean_ale_ret:
 	cpsw_ale_destroy(priv->ale);
 clean_dma_ret:
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 clean_runtime_disable_ret:
 	pm_runtime_disable(&pdev->dev);
@@ -2534,8 +2583,6 @@ static int cpsw_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 
 	cpsw_ale_destroy(priv->ale);
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 	pm_runtime_disable(&pdev->dev);
 	device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);
@@ -2558,16 +2605,12 @@ static int cpsw_suspend(struct device *dev)
 		for (i = 0; i < priv->data.slaves; i++) {
 			if (netif_running(priv->slaves[i].ndev))
 				cpsw_ndo_stop(priv->slaves[i].ndev);
-			soft_reset_slave(priv->slaves + i);
 		}
 	} else {
 		if (netif_running(ndev))
 			cpsw_ndo_stop(ndev);
-		for_each_slave(priv, soft_reset_slave);
 	}
 
-	pm_runtime_put_sync(&pdev->dev);
-
 	/* Select sleep pin state */
 	pinctrl_pm_select_sleep_state(&pdev->dev);
 
@@ -2580,8 +2623,6 @@ static int cpsw_resume(struct device *dev)
 	struct net_device	*ndev = platform_get_drvdata(pdev);
 	struct cpsw_priv	*priv = netdev_priv(ndev);
 
-	pm_runtime_get_sync(&pdev->dev);
-
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(&pdev->dev);
 
diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
index e50afd1b2eda..16b54c6f32c2 100644
--- a/drivers/net/ethernet/ti/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h
@@ -35,7 +35,6 @@ struct cpsw_platform_data {
 	u32	cpts_clock_shift; /* convert input clock ticks to nanoseconds */
 	u32	ale_entries;	/* ale table size */
 	u32	bd_ram_size;  /*buffer descriptor ram size */
-	u32	rx_descs;	/* Number of Rx Descriptios */
 	u32	mac_control;	/* Mac control register */
 	u16	default_vlan;	/* Def VLAN for ALE lookup in VLAN aware mode*/
 	bool	dual_emac;	/* Enable Dual EMAC mode */
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 18bf3a8fdc50..73638f7a55d4 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -21,7 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
 #include <linux/delay.h>
-
+#include <linux/genalloc.h>
 #include "davinci_cpdma.h"
 
 /* DMA Registers */
@@ -87,9 +87,8 @@ struct cpdma_desc_pool {
 	void			*cpumap;	/* dma_alloc map */
 	int			desc_size, mem_size;
 	int			num_desc, used_desc;
-	unsigned long		*bitmap;
 	struct device		*dev;
-	spinlock_t		lock;
+	struct gen_pool		*gen_pool;
 };
 
 enum cpdma_state {
@@ -98,8 +97,6 @@ enum cpdma_state {
 	CPDMA_STATE_TEARDOWN,
 };
 
-static const char *cpdma_state_str[] = { "idle", "active", "teardown" };
-
 struct cpdma_ctlr {
 	enum cpdma_state	state;
 	struct cpdma_params	params;
@@ -117,6 +114,7 @@ struct cpdma_chan {
 	int				chan_num;
 	spinlock_t			lock;
 	int				count;
+	u32				desc_num;
 	u32				mask;
 	cpdma_handler_fn		handler;
 	enum dma_data_direction		dir;
@@ -145,6 +143,19 @@ struct cpdma_chan {
 				 (directed << CPDMA_TO_PORT_SHIFT));	\
 	} while (0)
 
+static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool)
+{
+	if (!pool)
+		return;
+
+	WARN_ON(pool->used_desc);
+	if (pool->cpumap)
+		dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap,
+				  pool->phys);
+	else
+		iounmap(pool->iomap);
+}
+
 /*
  * Utility constructs for a cpdma descriptor pool.  Some devices (e.g. davinci
  * emac) have dedicated on-chip memory for these descriptors.  Some other
@@ -155,24 +166,25 @@ static struct cpdma_desc_pool *
 cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
 				int size, int align)
 {
-	int bitmap_size;
 	struct cpdma_desc_pool *pool;
+	int ret;
 
 	pool = devm_kzalloc(dev, sizeof(*pool), GFP_KERNEL);
 	if (!pool)
-		goto fail;
-
-	spin_lock_init(&pool->lock);
+		goto gen_pool_create_fail;
 
 	pool->dev	= dev;
 	pool->mem_size	= size;
 	pool->desc_size	= ALIGN(sizeof(struct cpdma_desc), align);
 	pool->num_desc	= size / pool->desc_size;
 
-	bitmap_size  = (pool->num_desc / BITS_PER_LONG) * sizeof(long);
-	pool->bitmap = devm_kzalloc(dev, bitmap_size, GFP_KERNEL);
-	if (!pool->bitmap)
-		goto fail;
+	pool->gen_pool = devm_gen_pool_create(dev, ilog2(pool->desc_size), -1,
+					      "cpdma");
+	if (IS_ERR(pool->gen_pool)) {
+		dev_err(dev, "pool create failed %ld\n",
+			PTR_ERR(pool->gen_pool));
+		goto gen_pool_create_fail;
+	}
 
 	if (phys) {
 		pool->phys  = phys;
@@ -185,24 +197,22 @@ cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
 		pool->phys = pool->hw_addr; /* assumes no IOMMU, don't use this value */
 	}
 
-	if (pool->iomap)
-		return pool;
-fail:
-	return NULL;
-}
-
-static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool)
-{
-	if (!pool)
-		return;
+	if (!pool->iomap)
+		goto gen_pool_create_fail;
 
-	WARN_ON(pool->used_desc);
-	if (pool->cpumap) {
-		dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap,
-				  pool->phys);
-	} else {
-		iounmap(pool->iomap);
+	ret = gen_pool_add_virt(pool->gen_pool, (unsigned long)pool->iomap,
+				pool->phys, pool->mem_size, -1);
+	if (ret < 0) {
+		dev_err(dev, "pool add failed %d\n", ret);
+		goto gen_pool_add_virt_fail;
 	}
+
+	return pool;
+
+gen_pool_add_virt_fail:
+	cpdma_desc_pool_destroy(pool);
+gen_pool_create_fail:
+	return NULL;
 }
 
 static inline dma_addr_t desc_phys(struct cpdma_desc_pool *pool,
@@ -220,47 +230,23 @@ desc_from_phys(struct cpdma_desc_pool *pool, dma_addr_t dma)
 }
 
 static struct cpdma_desc __iomem *
-cpdma_desc_alloc(struct cpdma_desc_pool *pool, int num_desc, bool is_rx)
+cpdma_desc_alloc(struct cpdma_desc_pool *pool)
 {
-	unsigned long flags;
-	int index;
-	int desc_start;
-	int desc_end;
 	struct cpdma_desc __iomem *desc = NULL;
 
-	spin_lock_irqsave(&pool->lock, flags);
-
-	if (is_rx) {
-		desc_start = 0;
-		desc_end = pool->num_desc/2;
-	 } else {
-		desc_start = pool->num_desc/2;
-		desc_end = pool->num_desc;
-	}
-
-	index = bitmap_find_next_zero_area(pool->bitmap,
-				desc_end, desc_start, num_desc, 0);
-	if (index < desc_end) {
-		bitmap_set(pool->bitmap, index, num_desc);
-		desc = pool->iomap + pool->desc_size * index;
+	desc = (struct cpdma_desc __iomem *)gen_pool_alloc(pool->gen_pool,
+							   pool->desc_size);
+	if (desc)
 		pool->used_desc++;
-	}
 
-	spin_unlock_irqrestore(&pool->lock, flags);
 	return desc;
 }
 
 static void cpdma_desc_free(struct cpdma_desc_pool *pool,
 			    struct cpdma_desc __iomem *desc, int num_desc)
 {
-	unsigned long flags, index;
-
-	index = ((unsigned long)desc - (unsigned long)pool->iomap) /
-		pool->desc_size;
-	spin_lock_irqsave(&pool->lock, flags);
-	bitmap_clear(pool->bitmap, index, num_desc);
+	gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size);
 	pool->used_desc--;
-	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
 struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params)
@@ -369,77 +355,6 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr)
 }
 EXPORT_SYMBOL_GPL(cpdma_ctlr_stop);
 
-int cpdma_ctlr_dump(struct cpdma_ctlr *ctlr)
-{
-	struct device *dev = ctlr->dev;
-	unsigned long flags;
-	int i;
-
-	spin_lock_irqsave(&ctlr->lock, flags);
-
-	dev_info(dev, "CPDMA: state: %s", cpdma_state_str[ctlr->state]);
-
-	dev_info(dev, "CPDMA: txidver: %x",
-		 dma_reg_read(ctlr, CPDMA_TXIDVER));
-	dev_info(dev, "CPDMA: txcontrol: %x",
-		 dma_reg_read(ctlr, CPDMA_TXCONTROL));
-	dev_info(dev, "CPDMA: txteardown: %x",
-		 dma_reg_read(ctlr, CPDMA_TXTEARDOWN));
-	dev_info(dev, "CPDMA: rxidver: %x",
-		 dma_reg_read(ctlr, CPDMA_RXIDVER));
-	dev_info(dev, "CPDMA: rxcontrol: %x",
-		 dma_reg_read(ctlr, CPDMA_RXCONTROL));
-	dev_info(dev, "CPDMA: softreset: %x",
-		 dma_reg_read(ctlr, CPDMA_SOFTRESET));
-	dev_info(dev, "CPDMA: rxteardown: %x",
-		 dma_reg_read(ctlr, CPDMA_RXTEARDOWN));
-	dev_info(dev, "CPDMA: txintstatraw: %x",
-		 dma_reg_read(ctlr, CPDMA_TXINTSTATRAW));
-	dev_info(dev, "CPDMA: txintstatmasked: %x",
-		 dma_reg_read(ctlr, CPDMA_TXINTSTATMASKED));
-	dev_info(dev, "CPDMA: txintmaskset: %x",
-		 dma_reg_read(ctlr, CPDMA_TXINTMASKSET));
-	dev_info(dev, "CPDMA: txintmaskclear: %x",
-		 dma_reg_read(ctlr, CPDMA_TXINTMASKCLEAR));
-	dev_info(dev, "CPDMA: macinvector: %x",
-		 dma_reg_read(ctlr, CPDMA_MACINVECTOR));
-	dev_info(dev, "CPDMA: maceoivector: %x",
-		 dma_reg_read(ctlr, CPDMA_MACEOIVECTOR));
-	dev_info(dev, "CPDMA: rxintstatraw: %x",
-		 dma_reg_read(ctlr, CPDMA_RXINTSTATRAW));
-	dev_info(dev, "CPDMA: rxintstatmasked: %x",
-		 dma_reg_read(ctlr, CPDMA_RXINTSTATMASKED));
-	dev_info(dev, "CPDMA: rxintmaskset: %x",
-		 dma_reg_read(ctlr, CPDMA_RXINTMASKSET));
-	dev_info(dev, "CPDMA: rxintmaskclear: %x",
-		 dma_reg_read(ctlr, CPDMA_RXINTMASKCLEAR));
-	dev_info(dev, "CPDMA: dmaintstatraw: %x",
-		 dma_reg_read(ctlr, CPDMA_DMAINTSTATRAW));
-	dev_info(dev, "CPDMA: dmaintstatmasked: %x",
-		 dma_reg_read(ctlr, CPDMA_DMAINTSTATMASKED));
-	dev_info(dev, "CPDMA: dmaintmaskset: %x",
-		 dma_reg_read(ctlr, CPDMA_DMAINTMASKSET));
-	dev_info(dev, "CPDMA: dmaintmaskclear: %x",
-		 dma_reg_read(ctlr, CPDMA_DMAINTMASKCLEAR));
-
-	if (!ctlr->params.has_ext_regs) {
-		dev_info(dev, "CPDMA: dmacontrol: %x",
-			 dma_reg_read(ctlr, CPDMA_DMACONTROL));
-		dev_info(dev, "CPDMA: dmastatus: %x",
-			 dma_reg_read(ctlr, CPDMA_DMASTATUS));
-		dev_info(dev, "CPDMA: rxbuffofs: %x",
-			 dma_reg_read(ctlr, CPDMA_RXBUFFOFS));
-	}
-
-	for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++)
-		if (ctlr->channels[i])
-			cpdma_chan_dump(ctlr->channels[i]);
-
-	spin_unlock_irqrestore(&ctlr->lock, flags);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(cpdma_ctlr_dump);
-
 int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
 {
 	unsigned long flags;
@@ -516,6 +431,7 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
 	chan->state	= CPDMA_STATE_IDLE;
 	chan->chan_num	= chan_num;
 	chan->handler	= handler;
+	chan->desc_num = ctlr->pool->num_desc / 2;
 
 	if (is_rx_chan(chan)) {
 		chan->hdp	= ctlr->params.rxhdp + offset;
@@ -543,6 +459,12 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_create);
 
+int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr)
+{
+	return ctlr->pool->num_desc / 2;
+}
+EXPORT_SYMBOL_GPL(cpdma_chan_get_rx_buf_num);
+
 int cpdma_chan_destroy(struct cpdma_chan *chan)
 {
 	struct cpdma_ctlr *ctlr;
@@ -574,54 +496,6 @@ int cpdma_chan_get_stats(struct cpdma_chan *chan,
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_get_stats);
 
-int cpdma_chan_dump(struct cpdma_chan *chan)
-{
-	unsigned long flags;
-	struct device *dev = chan->ctlr->dev;
-
-	spin_lock_irqsave(&chan->lock, flags);
-
-	dev_info(dev, "channel %d (%s %d) state %s",
-		 chan->chan_num, is_rx_chan(chan) ? "rx" : "tx",
-		 chan_linear(chan), cpdma_state_str[chan->state]);
-	dev_info(dev, "\thdp: %x\n", chan_read(chan, hdp));
-	dev_info(dev, "\tcp: %x\n", chan_read(chan, cp));
-	if (chan->rxfree) {
-		dev_info(dev, "\trxfree: %x\n",
-			 chan_read(chan, rxfree));
-	}
-
-	dev_info(dev, "\tstats head_enqueue: %d\n",
-		 chan->stats.head_enqueue);
-	dev_info(dev, "\tstats tail_enqueue: %d\n",
-		 chan->stats.tail_enqueue);
-	dev_info(dev, "\tstats pad_enqueue: %d\n",
-		 chan->stats.pad_enqueue);
-	dev_info(dev, "\tstats misqueued: %d\n",
-		 chan->stats.misqueued);
-	dev_info(dev, "\tstats desc_alloc_fail: %d\n",
-		 chan->stats.desc_alloc_fail);
-	dev_info(dev, "\tstats pad_alloc_fail: %d\n",
-		 chan->stats.pad_alloc_fail);
-	dev_info(dev, "\tstats runt_receive_buff: %d\n",
-		 chan->stats.runt_receive_buff);
-	dev_info(dev, "\tstats runt_transmit_buff: %d\n",
-		 chan->stats.runt_transmit_buff);
-	dev_info(dev, "\tstats empty_dequeue: %d\n",
-		 chan->stats.empty_dequeue);
-	dev_info(dev, "\tstats busy_dequeue: %d\n",
-		 chan->stats.busy_dequeue);
-	dev_info(dev, "\tstats good_dequeue: %d\n",
-		 chan->stats.good_dequeue);
-	dev_info(dev, "\tstats requeue: %d\n",
-		 chan->stats.requeue);
-	dev_info(dev, "\tstats teardown_dequeue: %d\n",
-		 chan->stats.teardown_dequeue);
-
-	spin_unlock_irqrestore(&chan->lock, flags);
-	return 0;
-}
-
 static void __cpdma_chan_submit(struct cpdma_chan *chan,
 				struct cpdma_desc __iomem *desc)
 {
@@ -675,7 +549,13 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
 		goto unlock_ret;
 	}
 
-	desc = cpdma_desc_alloc(ctlr->pool, 1, is_rx_chan(chan));
+	if (chan->count >= chan->desc_num)	{
+		chan->stats.desc_alloc_fail++;
+		ret = -ENOMEM;
+		goto unlock_ret;
+	}
+
+	desc = cpdma_desc_alloc(ctlr->pool);
 	if (!desc) {
 		chan->stats.desc_alloc_fail++;
 		ret = -ENOMEM;
@@ -721,24 +601,16 @@ EXPORT_SYMBOL_GPL(cpdma_chan_submit);
 
 bool cpdma_check_free_tx_desc(struct cpdma_chan *chan)
 {
-	unsigned long flags;
-	int index;
-	bool ret;
 	struct cpdma_ctlr	*ctlr = chan->ctlr;
 	struct cpdma_desc_pool	*pool = ctlr->pool;
+	bool			free_tx_desc;
+	unsigned long		flags;
 
-	spin_lock_irqsave(&pool->lock, flags);
-
-	index = bitmap_find_next_zero_area(pool->bitmap,
-				pool->num_desc, pool->num_desc/2, 1, 0);
-
-	if (index < pool->num_desc)
-		ret = true;
-	else
-		ret = false;
-
-	spin_unlock_irqrestore(&pool->lock, flags);
-	return ret;
+	spin_lock_irqsave(&chan->lock, flags);
+	free_tx_desc = (chan->count < chan->desc_num) &&
+			 gen_pool_avail(pool->gen_pool);
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return free_tx_desc;
 }
 EXPORT_SYMBOL_GPL(cpdma_check_free_tx_desc);
 
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index 86dee487f2f0..4b46cd6e9a3f 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -77,14 +77,13 @@ struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params);
 int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr);
 int cpdma_ctlr_start(struct cpdma_ctlr *ctlr);
 int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr);
-int cpdma_ctlr_dump(struct cpdma_ctlr *ctlr);
 
 struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
 				     cpdma_handler_fn handler);
+int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr);
 int cpdma_chan_destroy(struct cpdma_chan *chan);
 int cpdma_chan_start(struct cpdma_chan *chan);
 int cpdma_chan_stop(struct cpdma_chan *chan);
-int cpdma_chan_dump(struct cpdma_chan *chan);
 
 int cpdma_chan_get_stats(struct cpdma_chan *chan,
 			 struct cpdma_chan_stats *stats);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index f56d66e6ec15..6e305a82ed43 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -348,7 +348,6 @@ struct emac_priv {
 	u32 rx_addr_type;
 	const char *phy_id;
 	struct device_node *phy_node;
-	struct phy_device *phydev;
 	spinlock_t lock;
 	/*platform specific members*/
 	void (*int_enable) (void);
@@ -379,97 +378,6 @@ static char *emac_rxhost_errcodes[16] = {
 #define emac_ctrl_read(reg)	  ioread32((priv->ctrl_base + (reg)))
 #define emac_ctrl_write(reg, val) iowrite32(val, (priv->ctrl_base + (reg)))
 
-/**
- * emac_dump_regs - Dump important EMAC registers to debug terminal
- * @priv: The DaVinci EMAC private adapter structure
- *
- * Executes ethtool set cmd & sets phy mode
- *
- */
-static void emac_dump_regs(struct emac_priv *priv)
-{
-	struct device *emac_dev = &priv->ndev->dev;
-
-	/* Print important registers in EMAC */
-	dev_info(emac_dev, "EMAC Basic registers\n");
-	if (priv->version == EMAC_VERSION_1) {
-		dev_info(emac_dev, "EMAC: EWCTL: %08X, EWINTTCNT: %08X\n",
-			emac_ctrl_read(EMAC_CTRL_EWCTL),
-			emac_ctrl_read(EMAC_CTRL_EWINTTCNT));
-	}
-	dev_info(emac_dev, "EMAC: EmuControl:%08X, FifoControl: %08X\n",
-		emac_read(EMAC_EMCONTROL), emac_read(EMAC_FIFOCONTROL));
-	dev_info(emac_dev, "EMAC: MBPEnable:%08X, RXUnicastSet: %08X, "\
-		"RXMaxLen=%08X\n", emac_read(EMAC_RXMBPENABLE),
-		emac_read(EMAC_RXUNICASTSET), emac_read(EMAC_RXMAXLEN));
-	dev_info(emac_dev, "EMAC: MacControl:%08X, MacStatus: %08X, "\
-		"MacConfig=%08X\n", emac_read(EMAC_MACCONTROL),
-		emac_read(EMAC_MACSTATUS), emac_read(EMAC_MACCONFIG));
-	dev_info(emac_dev, "EMAC Statistics\n");
-	dev_info(emac_dev, "EMAC: rx_good_frames:%d\n",
-		emac_read(EMAC_RXGOODFRAMES));
-	dev_info(emac_dev, "EMAC: rx_broadcast_frames:%d\n",
-		emac_read(EMAC_RXBCASTFRAMES));
-	dev_info(emac_dev, "EMAC: rx_multicast_frames:%d\n",
-		emac_read(EMAC_RXMCASTFRAMES));
-	dev_info(emac_dev, "EMAC: rx_pause_frames:%d\n",
-		emac_read(EMAC_RXPAUSEFRAMES));
-	dev_info(emac_dev, "EMAC: rx_crcerrors:%d\n",
-		emac_read(EMAC_RXCRCERRORS));
-	dev_info(emac_dev, "EMAC: rx_align_code_errors:%d\n",
-		emac_read(EMAC_RXALIGNCODEERRORS));
-	dev_info(emac_dev, "EMAC: rx_oversized_frames:%d\n",
-		emac_read(EMAC_RXOVERSIZED));
-	dev_info(emac_dev, "EMAC: rx_jabber_frames:%d\n",
-		emac_read(EMAC_RXJABBER));
-	dev_info(emac_dev, "EMAC: rx_undersized_frames:%d\n",
-		emac_read(EMAC_RXUNDERSIZED));
-	dev_info(emac_dev, "EMAC: rx_fragments:%d\n",
-		emac_read(EMAC_RXFRAGMENTS));
-	dev_info(emac_dev, "EMAC: rx_filtered_frames:%d\n",
-		emac_read(EMAC_RXFILTERED));
-	dev_info(emac_dev, "EMAC: rx_qos_filtered_frames:%d\n",
-		emac_read(EMAC_RXQOSFILTERED));
-	dev_info(emac_dev, "EMAC: rx_octets:%d\n",
-		emac_read(EMAC_RXOCTETS));
-	dev_info(emac_dev, "EMAC: tx_goodframes:%d\n",
-		emac_read(EMAC_TXGOODFRAMES));
-	dev_info(emac_dev, "EMAC: tx_bcastframes:%d\n",
-		emac_read(EMAC_TXBCASTFRAMES));
-	dev_info(emac_dev, "EMAC: tx_mcastframes:%d\n",
-		emac_read(EMAC_TXMCASTFRAMES));
-	dev_info(emac_dev, "EMAC: tx_pause_frames:%d\n",
-		emac_read(EMAC_TXPAUSEFRAMES));
-	dev_info(emac_dev, "EMAC: tx_deferred_frames:%d\n",
-		emac_read(EMAC_TXDEFERRED));
-	dev_info(emac_dev, "EMAC: tx_collision_frames:%d\n",
-		emac_read(EMAC_TXCOLLISION));
-	dev_info(emac_dev, "EMAC: tx_single_coll_frames:%d\n",
-		emac_read(EMAC_TXSINGLECOLL));
-	dev_info(emac_dev, "EMAC: tx_mult_coll_frames:%d\n",
-		emac_read(EMAC_TXMULTICOLL));
-	dev_info(emac_dev, "EMAC: tx_excessive_collisions:%d\n",
-		emac_read(EMAC_TXEXCESSIVECOLL));
-	dev_info(emac_dev, "EMAC: tx_late_collisions:%d\n",
-		emac_read(EMAC_TXLATECOLL));
-	dev_info(emac_dev, "EMAC: tx_underrun:%d\n",
-		emac_read(EMAC_TXUNDERRUN));
-	dev_info(emac_dev, "EMAC: tx_carrier_sense_errors:%d\n",
-		emac_read(EMAC_TXCARRIERSENSE));
-	dev_info(emac_dev, "EMAC: tx_octets:%d\n",
-		emac_read(EMAC_TXOCTETS));
-	dev_info(emac_dev, "EMAC: net_octets:%d\n",
-		emac_read(EMAC_NETOCTETS));
-	dev_info(emac_dev, "EMAC: rx_sof_overruns:%d\n",
-		emac_read(EMAC_RXSOFOVERRUNS));
-	dev_info(emac_dev, "EMAC: rx_mof_overruns:%d\n",
-		emac_read(EMAC_RXMOFOVERRUNS));
-	dev_info(emac_dev, "EMAC: rx_dma_overruns:%d\n",
-		emac_read(EMAC_RXDMAOVERRUNS));
-
-	cpdma_ctlr_dump(priv->dma);
-}
-
 /**
  * emac_get_drvinfo - Get EMAC driver information
  * @ndev: The DaVinci EMAC network adapter
@@ -485,43 +393,6 @@ static void emac_get_drvinfo(struct net_device *ndev,
 	strlcpy(info->version, EMAC_MODULE_VERSION, sizeof(info->version));
 }
 
-/**
- * emac_get_settings - Get EMAC settings
- * @ndev: The DaVinci EMAC network adapter
- * @ecmd: ethtool command
- *
- * Executes ethool get command
- *
- */
-static int emac_get_settings(struct net_device *ndev,
-			     struct ethtool_cmd *ecmd)
-{
-	struct emac_priv *priv = netdev_priv(ndev);
-	if (priv->phydev)
-		return phy_ethtool_gset(priv->phydev, ecmd);
-	else
-		return -EOPNOTSUPP;
-
-}
-
-/**
- * emac_set_settings - Set EMAC settings
- * @ndev: The DaVinci EMAC network adapter
- * @ecmd: ethtool command
- *
- * Executes ethool set command
- *
- */
-static int emac_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
-{
-	struct emac_priv *priv = netdev_priv(ndev);
-	if (priv->phydev)
-		return phy_ethtool_sset(priv->phydev, ecmd);
-	else
-		return -EOPNOTSUPP;
-
-}
-
 /**
  * emac_get_coalesce - Get interrupt coalesce settings for this device
  * @ndev : The DaVinci EMAC network adapter
@@ -625,12 +496,12 @@ static int emac_set_coalesce(struct net_device *ndev,
  */
 static const struct ethtool_ops ethtool_ops = {
 	.get_drvinfo = emac_get_drvinfo,
-	.get_settings = emac_get_settings,
-	.set_settings = emac_set_settings,
 	.get_link = ethtool_op_get_link,
 	.get_coalesce = emac_get_coalesce,
 	.set_coalesce =  emac_set_coalesce,
 	.get_ts_info = ethtool_op_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /**
@@ -651,8 +522,8 @@ static void emac_update_phystatus(struct emac_priv *priv)
 	mac_control = emac_read(EMAC_MACCONTROL);
 	cur_duplex = (mac_control & EMAC_MACCONTROL_FULLDUPLEXEN) ?
 			DUPLEX_FULL : DUPLEX_HALF;
-	if (priv->phydev)
-		new_duplex = priv->phydev->duplex;
+	if (ndev->phydev)
+		new_duplex = ndev->phydev->duplex;
 	else
 		new_duplex = DUPLEX_FULL;
 
@@ -1134,8 +1005,6 @@ static void emac_dev_tx_timeout(struct net_device *ndev)
 	if (netif_msg_tx_err(priv))
 		dev_err(emac_dev, "DaVinci EMAC: xmit timeout, restarting TX");
 
-	emac_dump_regs(priv);
-
 	ndev->stats.tx_errors++;
 	emac_int_disable(priv);
 	cpdma_chan_stop(priv->txchan);
@@ -1454,7 +1323,7 @@ static void emac_poll_controller(struct net_device *ndev)
 static void emac_adjust_link(struct net_device *ndev)
 {
 	struct emac_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = ndev->phydev;
 	unsigned long flags;
 	int new_state = 0;
 
@@ -1483,7 +1352,7 @@ static void emac_adjust_link(struct net_device *ndev)
 	}
 	if (new_state) {
 		emac_update_phystatus(priv);
-		phy_print_status(priv->phydev);
+		phy_print_status(ndev->phydev);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -1505,15 +1374,13 @@ static void emac_adjust_link(struct net_device *ndev)
  */
 static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd)
 {
-	struct emac_priv *priv = netdev_priv(ndev);
-
 	if (!(netif_running(ndev)))
 		return -EINVAL;
 
 	/* TODO: Add phy read and write and private statistics get feature */
 
-	if (priv->phydev)
-		return phy_mii_ioctl(priv->phydev, ifrq, cmd);
+	if (ndev->phydev)
+		return phy_mii_ioctl(ndev->phydev, ifrq, cmd);
 	else
 		return -EOPNOTSUPP;
 }
@@ -1542,6 +1409,7 @@ static int emac_dev_open(struct net_device *ndev)
 	int res_num = 0, irq_num = 0;
 	int i = 0;
 	struct emac_priv *priv = netdev_priv(ndev);
+	struct phy_device *phydev = NULL;
 
 	ret = pm_runtime_get_sync(&priv->pdev->dev);
 	if (ret < 0) {
@@ -1607,12 +1475,10 @@ static int emac_dev_open(struct net_device *ndev)
 
 	cpdma_ctlr_start(priv->dma);
 
-	priv->phydev = NULL;
-
 	if (priv->phy_node) {
-		priv->phydev = of_phy_connect(ndev, priv->phy_node,
-					      &emac_adjust_link, 0, 0);
-		if (!priv->phydev) {
+		phydev = of_phy_connect(ndev, priv->phy_node,
+					&emac_adjust_link, 0, 0);
+		if (!phydev) {
 			dev_err(emac_dev, "could not connect to phy %s\n",
 				priv->phy_node->full_name);
 			ret = -ENODEV;
@@ -1621,7 +1487,7 @@ static int emac_dev_open(struct net_device *ndev)
 	}
 
 	/* use the first phy on the bus if pdata did not give us a phy id */
-	if (!priv->phydev && !priv->phy_id) {
+	if (!phydev && !priv->phy_id) {
 		struct device *phy;
 
 		phy = bus_find_device(&mdio_bus_type, NULL, NULL,
@@ -1630,16 +1496,15 @@ static int emac_dev_open(struct net_device *ndev)
 			priv->phy_id = dev_name(phy);
 	}
 
-	if (!priv->phydev && priv->phy_id && *priv->phy_id) {
-		priv->phydev = phy_connect(ndev, priv->phy_id,
-					   &emac_adjust_link,
-					   PHY_INTERFACE_MODE_MII);
+	if (!phydev && priv->phy_id && *priv->phy_id) {
+		phydev = phy_connect(ndev, priv->phy_id,
+				     &emac_adjust_link,
+				     PHY_INTERFACE_MODE_MII);
 
-		if (IS_ERR(priv->phydev)) {
+		if (IS_ERR(phydev)) {
 			dev_err(emac_dev, "could not connect to phy %s\n",
 				priv->phy_id);
-			ret = PTR_ERR(priv->phydev);
-			priv->phydev = NULL;
+			ret = PTR_ERR(phydev);
 			goto err;
 		}
 
@@ -1647,10 +1512,10 @@ static int emac_dev_open(struct net_device *ndev)
 		priv->speed = 0;
 		priv->duplex = ~0;
 
-		phy_attached_info(priv->phydev);
+		phy_attached_info(phydev);
 	}
 
-	if (!priv->phydev) {
+	if (!phydev) {
 		/* No PHY , fix the link, speed and duplex settings */
 		dev_notice(emac_dev, "no phy, defaulting to 100/full\n");
 		priv->link = 1;
@@ -1659,14 +1524,11 @@ static int emac_dev_open(struct net_device *ndev)
 		emac_update_phystatus(priv);
 	}
 
-	if (!netif_running(ndev)) /* debug only - to avoid compiler warning */
-		emac_dump_regs(priv);
-
 	if (netif_msg_drv(priv))
 		dev_notice(emac_dev, "DaVinci EMAC: Opened %s\n", ndev->name);
 
-	if (priv->phydev)
-		phy_start(priv->phydev);
+	if (phydev)
+		phy_start(phydev);
 
 	return 0;
 
@@ -1717,8 +1579,8 @@ static int emac_dev_stop(struct net_device *ndev)
 	cpdma_ctlr_stop(priv->dma);
 	emac_write(EMAC_SOFTRESET, 1);
 
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
 
 	/* Free IRQ */
 	while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, i))) {
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 4e7c9b9b042a..33df340db1f1 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -53,6 +53,10 @@
 
 #define DEF_OUT_FREQ		2200000		/* 2.2 MHz */
 
+struct davinci_mdio_of_param {
+	int autosuspend_delay_ms;
+};
+
 struct davinci_mdio_regs {
 	u32	version;
 	u32	control;
@@ -90,19 +94,19 @@ static const struct mdio_platform_data default_pdata = {
 struct davinci_mdio_data {
 	struct mdio_platform_data pdata;
 	struct davinci_mdio_regs __iomem *regs;
-	spinlock_t	lock;
 	struct clk	*clk;
 	struct device	*dev;
 	struct mii_bus	*bus;
-	bool		suspended;
+	bool            active_in_suspend;
 	unsigned long	access_time; /* jiffies */
 	/* Indicates that driver shouldn't modify phy_mask in case
 	 * if MDIO bus is registered from DT.
 	 */
 	bool		skip_scan;
+	u32		clk_div;
 };
 
-static void __davinci_mdio_reset(struct davinci_mdio_data *data)
+static void davinci_mdio_init_clk(struct davinci_mdio_data *data)
 {
 	u32 mdio_in, div, mdio_out_khz, access_time;
 
@@ -111,9 +115,7 @@ static void __davinci_mdio_reset(struct davinci_mdio_data *data)
 	if (div > CONTROL_MAX_DIV)
 		div = CONTROL_MAX_DIV;
 
-	/* set enable and clock divider */
-	__raw_writel(div | CONTROL_ENABLE, &data->regs->control);
-
+	data->clk_div = div;
 	/*
 	 * One mdio transaction consists of:
 	 *	32 bits of preamble
@@ -134,12 +136,23 @@ static void __davinci_mdio_reset(struct davinci_mdio_data *data)
 		data->access_time = 1;
 }
 
+static void davinci_mdio_enable(struct davinci_mdio_data *data)
+{
+	/* set enable and clock divider */
+	__raw_writel(data->clk_div | CONTROL_ENABLE, &data->regs->control);
+}
+
 static int davinci_mdio_reset(struct mii_bus *bus)
 {
 	struct davinci_mdio_data *data = bus->priv;
 	u32 phy_mask, ver;
+	int ret;
 
-	__davinci_mdio_reset(data);
+	ret = pm_runtime_get_sync(data->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(data->dev);
+		return ret;
+	}
 
 	/* wait for scan logic to settle */
 	msleep(PHY_MAX_ADDR * data->access_time);
@@ -150,7 +163,7 @@ static int davinci_mdio_reset(struct mii_bus *bus)
 		 (ver >> 8) & 0xff, ver & 0xff);
 
 	if (data->skip_scan)
-		return 0;
+		goto done;
 
 	/* get phy mask from the alive register */
 	phy_mask = __raw_readl(&data->regs->alive);
@@ -165,6 +178,10 @@ static int davinci_mdio_reset(struct mii_bus *bus)
 	}
 	data->bus->phy_mask = phy_mask;
 
+done:
+	pm_runtime_mark_last_busy(data->dev);
+	pm_runtime_put_autosuspend(data->dev);
+
 	return 0;
 }
 
@@ -190,7 +207,7 @@ static inline int wait_for_user_access(struct davinci_mdio_data *data)
 		 * operation
 		 */
 		dev_warn(data->dev, "resetting idled controller\n");
-		__davinci_mdio_reset(data);
+		davinci_mdio_enable(data);
 		return -EAGAIN;
 	}
 
@@ -225,11 +242,10 @@ static int davinci_mdio_read(struct mii_bus *bus, int phy_id, int phy_reg)
 	if (phy_reg & ~PHY_REG_MASK || phy_id & ~PHY_ID_MASK)
 		return -EINVAL;
 
-	spin_lock(&data->lock);
-
-	if (data->suspended) {
-		spin_unlock(&data->lock);
-		return -ENODEV;
+	ret = pm_runtime_get_sync(data->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(data->dev);
+		return ret;
 	}
 
 	reg = (USERACCESS_GO | USERACCESS_READ | (phy_reg << 21) |
@@ -255,8 +271,8 @@ static int davinci_mdio_read(struct mii_bus *bus, int phy_id, int phy_reg)
 		break;
 	}
 
-	spin_unlock(&data->lock);
-
+	pm_runtime_mark_last_busy(data->dev);
+	pm_runtime_put_autosuspend(data->dev);
 	return ret;
 }
 
@@ -270,11 +286,10 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id,
 	if (phy_reg & ~PHY_REG_MASK || phy_id & ~PHY_ID_MASK)
 		return -EINVAL;
 
-	spin_lock(&data->lock);
-
-	if (data->suspended) {
-		spin_unlock(&data->lock);
-		return -ENODEV;
+	ret = pm_runtime_get_sync(data->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(data->dev);
+		return ret;
 	}
 
 	reg = (USERACCESS_GO | USERACCESS_WRITE | (phy_reg << 21) |
@@ -295,9 +310,10 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id,
 		break;
 	}
 
-	spin_unlock(&data->lock);
+	pm_runtime_mark_last_busy(data->dev);
+	pm_runtime_put_autosuspend(data->dev);
 
-	return 0;
+	return ret;
 }
 
 #if IS_ENABLED(CONFIG_OF)
@@ -320,6 +336,19 @@ static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
 }
 #endif
 
+#if IS_ENABLED(CONFIG_OF)
+static const struct davinci_mdio_of_param of_cpsw_mdio_data = {
+	.autosuspend_delay_ms = 100,
+};
+
+static const struct of_device_id davinci_mdio_of_mtable[] = {
+	{ .compatible = "ti,davinci_mdio", },
+	{ .compatible = "ti,cpsw-mdio", .data = &of_cpsw_mdio_data},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, davinci_mdio_of_mtable);
+#endif
+
 static int davinci_mdio_probe(struct platform_device *pdev)
 {
 	struct mdio_platform_data *pdata = dev_get_platdata(&pdev->dev);
@@ -328,6 +357,7 @@ static int davinci_mdio_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct phy_device *phy;
 	int ret, addr;
+	int autosuspend_delay_ms = -1;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -340,9 +370,22 @@ static int davinci_mdio_probe(struct platform_device *pdev)
 	}
 
 	if (dev->of_node) {
-		if (davinci_mdio_probe_dt(&data->pdata, pdev))
-			data->pdata = default_pdata;
+		const struct of_device_id	*of_id;
+
+		ret = davinci_mdio_probe_dt(&data->pdata, pdev);
+		if (ret)
+			return ret;
 		snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
+
+		of_id = of_match_device(davinci_mdio_of_mtable, &pdev->dev);
+		if (of_id) {
+			const struct davinci_mdio_of_param *of_mdio_data;
+
+			of_mdio_data = of_id->data;
+			if (of_mdio_data)
+				autosuspend_delay_ms =
+					of_mdio_data->autosuspend_delay_ms;
+		}
 	} else {
 		data->pdata = pdata ? (*pdata) : default_pdata;
 		snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x",
@@ -356,26 +399,25 @@ static int davinci_mdio_probe(struct platform_device *pdev)
 	data->bus->parent	= dev;
 	data->bus->priv		= data;
 
-	pm_runtime_enable(&pdev->dev);
-	pm_runtime_get_sync(&pdev->dev);
 	data->clk = devm_clk_get(dev, "fck");
 	if (IS_ERR(data->clk)) {
 		dev_err(dev, "failed to get device clock\n");
-		ret = PTR_ERR(data->clk);
-		data->clk = NULL;
-		goto bail_out;
+		return PTR_ERR(data->clk);
 	}
 
 	dev_set_drvdata(dev, data);
 	data->dev = dev;
-	spin_lock_init(&data->lock);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	data->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(data->regs)) {
-		ret = PTR_ERR(data->regs);
-		goto bail_out;
-	}
+	if (IS_ERR(data->regs))
+		return PTR_ERR(data->regs);
+
+	davinci_mdio_init_clk(data);
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, autosuspend_delay_ms);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
 
 	/* register the mii bus
 	 * Create PHYs from DT only in case if PHY child nodes are explicitly
@@ -404,9 +446,8 @@ static int davinci_mdio_probe(struct platform_device *pdev)
 	return 0;
 
 bail_out:
-	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-
 	return ret;
 }
 
@@ -417,29 +458,47 @@ static int davinci_mdio_remove(struct platform_device *pdev)
 	if (data->bus)
 		mdiobus_unregister(data->bus);
 
-	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int davinci_mdio_suspend(struct device *dev)
+#ifdef CONFIG_PM
+static int davinci_mdio_runtime_suspend(struct device *dev)
 {
 	struct davinci_mdio_data *data = dev_get_drvdata(dev);
 	u32 ctrl;
 
-	spin_lock(&data->lock);
-
 	/* shutdown the scan state machine */
 	ctrl = __raw_readl(&data->regs->control);
 	ctrl &= ~CONTROL_ENABLE;
 	__raw_writel(ctrl, &data->regs->control);
 	wait_for_idle(data);
 
-	data->suspended = true;
-	spin_unlock(&data->lock);
-	pm_runtime_put_sync(data->dev);
+	return 0;
+}
+
+static int davinci_mdio_runtime_resume(struct device *dev)
+{
+	struct davinci_mdio_data *data = dev_get_drvdata(dev);
+
+	davinci_mdio_enable(data);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int davinci_mdio_suspend(struct device *dev)
+{
+	struct davinci_mdio_data *data = dev_get_drvdata(dev);
+	int ret = 0;
+
+	data->active_in_suspend = !pm_runtime_status_suspended(dev);
+	if (data->active_in_suspend)
+		ret = pm_runtime_force_suspend(dev);
+	if (ret < 0)
+		return ret;
 
 	/* Select sleep pin state */
 	pinctrl_pm_select_sleep_state(dev);
@@ -454,31 +513,19 @@ static int davinci_mdio_resume(struct device *dev)
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(dev);
 
-	pm_runtime_get_sync(data->dev);
-
-	spin_lock(&data->lock);
-	/* restart the scan state machine */
-	__davinci_mdio_reset(data);
-
-	data->suspended = false;
-	spin_unlock(&data->lock);
+	if (data->active_in_suspend)
+		pm_runtime_force_resume(dev);
 
 	return 0;
 }
 #endif
 
 static const struct dev_pm_ops davinci_mdio_pm_ops = {
+	SET_RUNTIME_PM_OPS(davinci_mdio_runtime_suspend,
+			   davinci_mdio_runtime_resume, NULL)
 	SET_LATE_SYSTEM_SLEEP_PM_OPS(davinci_mdio_suspend, davinci_mdio_resume)
 };
 
-#if IS_ENABLED(CONFIG_OF)
-static const struct of_device_id davinci_mdio_of_mtable[] = {
-	{ .compatible = "ti,davinci_mdio", },
-	{ /* sentinel */ },
-};
-MODULE_DEVICE_TABLE(of, davinci_mdio_of_mtable);
-#endif
-
 static struct platform_driver davinci_mdio_driver = {
 	.driver = {
 		.name	 = "davinci_mdio",
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 561703317312..ece0ea0f6b38 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -1651,7 +1651,6 @@ static u32 tlan_handle_tx_eoc(struct net_device *dev, u16 host_int)
 	dma_addr_t		head_list_phys;
 	u32			ack = 1;
 
-	host_int = 0;
 	if (priv->tlan_rev < 0x30) {
 		TLAN_DBG(TLAN_DEBUG_TX,
 			 "TRANSMIT:  handling TX EOC (Head=%d Tail=%d) -- IRQ\n",
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 0a15acc075b3..11213a38c795 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -462,7 +462,7 @@ static void tile_tx_timestamp(struct sk_buff *skb, int instance)
 	if (unlikely((shtx->tx_flags & SKBTX_HW_TSTAMP) != 0)) {
 		struct mpipe_data *md = &mpipe_data[instance];
 		struct skb_shared_hwtstamps shhwtstamps;
-		struct timespec ts;
+		struct timespec64 ts;
 
 		shtx->tx_flags |= SKBTX_IN_PROGRESS;
 		gxio_mpipe_get_timestamp(&md->context, &ts);
@@ -886,9 +886,9 @@ static struct ptp_clock_info ptp_mpipe_caps = {
 /* Sync mPIPE's timestamp up with Linux system time and register PTP clock. */
 static void register_ptp_clock(struct net_device *dev, struct mpipe_data *md)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 
-	getnstimeofday(&ts);
+	ktime_get_ts64(&ts);
 	gxio_mpipe_set_timestamp(&md->context, &ts);
 
 	mutex_init(&md->ptp_lock);
diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c
index 922a443e3415..4ef605a90247 100644
--- a/drivers/net/ethernet/tile/tilepro.c
+++ b/drivers/net/ethernet/tile/tilepro.c
@@ -588,7 +588,7 @@ static bool tile_net_lepp_free_comps(struct net_device *dev, bool all)
 static void tile_net_schedule_egress_timer(struct tile_net_cpu *info)
 {
 	if (!info->egress_timer_scheduled) {
-		mod_timer_pinned(&info->egress_timer, jiffies + 1);
+		mod_timer(&info->egress_timer, jiffies + 1);
 		info->egress_timer_scheduled = true;
 	}
 }
@@ -1004,7 +1004,7 @@ static void tile_net_register(void *dev_ptr)
 		BUG();
 
 	/* Initialize the egress timer. */
-	init_timer(&info->egress_timer);
+	init_timer_pinned(&info->egress_timer);
 	info->egress_timer.data = (long)info;
 	info->egress_timer.function = tile_net_handle_egress_timer;
 
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index 54874783476a..5b01b3fa9fec 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -280,7 +280,7 @@ struct tc35815_regs {
  * Descriptors
  */
 
-/* Frame descripter */
+/* Frame descriptor */
 struct FDesc {
 	volatile __u32 FDNext;
 	volatile __u32 FDSystem;
@@ -288,7 +288,7 @@ struct FDesc {
 	volatile __u32 FDCtl;
 };
 
-/* Buffer descripter */
+/* Buffer descriptor */
 struct BDesc {
 	volatile __u32 BuffData;
 	volatile __u32 BDCtl;
@@ -296,7 +296,7 @@ struct BDesc {
 
 #define FD_ALIGN	16
 
-/* Frame Descripter bit assign ---------------------------------------------- */
+/* Frame Descriptor bit assign ---------------------------------------------- */
 #define FD_FDLength_MASK       0x0000FFFF /* Length MASK		     */
 #define FD_BDCnt_MASK	       0x001F0000 /* BD count MASK in FD	     */
 #define FD_FrmOpt_MASK	       0x7C000000 /* Frame option MASK		     */
@@ -309,7 +309,7 @@ struct BDesc {
 #define FD_Next_EOL	       0x00000001 /* FD EOL indicator		     */
 #define FD_BDCnt_SHIFT	       16
 
-/* Buffer Descripter bit assign --------------------------------------------- */
+/* Buffer Descriptor bit assign --------------------------------------------- */
 #define BD_BuffLength_MASK     0x0000FFFF /* Receive Data Size		     */
 #define BD_RxBDID_MASK	       0x00FF0000 /* BD ID Number MASK		     */
 #define BD_RxBDSeqN_MASK       0x7F000000 /* Rx BD Sequence Number	     */
@@ -405,7 +405,6 @@ struct tc35815_local {
 	spinlock_t rx_lock;
 
 	struct mii_bus *mii_bus;
-	struct phy_device *phy_dev;
 	int duplex;
 	int speed;
 	int link;
@@ -539,7 +538,7 @@ static int tc_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 val)
 static void tc_handle_link_change(struct net_device *dev)
 {
 	struct tc35815_local *lp = netdev_priv(dev);
-	struct phy_device *phydev = lp->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	int status_change = 0;
 
@@ -645,7 +644,6 @@ static int tc_mii_probe(struct net_device *dev)
 	lp->link = 0;
 	lp->speed = 0;
 	lp->duplex = -1;
-	lp->phy_dev = phydev;
 
 	return 0;
 }
@@ -853,7 +851,7 @@ static void tc35815_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct tc35815_local *lp = netdev_priv(dev);
 
-	phy_disconnect(lp->phy_dev);
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(lp->mii_bus);
 	mdiobus_free(lp->mii_bus);
 	unregister_netdev(dev);
@@ -1143,8 +1141,8 @@ static void tc35815_restart(struct net_device *dev)
 	struct tc35815_local *lp = netdev_priv(dev);
 	int ret;
 
-	if (lp->phy_dev) {
-		ret = phy_init_hw(lp->phy_dev);
+	if (dev->phydev) {
+		ret = phy_init_hw(dev->phydev);
 		if (ret)
 			printk(KERN_ERR "%s: PHY init failed.\n", dev->name);
 	}
@@ -1236,7 +1234,7 @@ tc35815_open(struct net_device *dev)
 
 	netif_carrier_off(dev);
 	/* schedule a link state check */
-	phy_start(lp->phy_dev);
+	phy_start(dev->phydev);
 
 	/* We are now ready to accept transmit requeusts from
 	 * the queueing layer of the networking.
@@ -1387,7 +1385,7 @@ static int tc35815_do_interrupt(struct net_device *dev, u32 status, int limit)
 	if (status & Int_IntExBD) {
 		if (netif_msg_rx_err(lp))
 			dev_warn(&dev->dev,
-				 "Excessive Buffer Descriptiors (%#x).\n",
+				 "Excessive Buffer Descriptors (%#x).\n",
 				 status);
 		dev->stats.rx_length_errors++;
 		ret = 0;
@@ -1819,8 +1817,8 @@ tc35815_close(struct net_device *dev)
 
 	netif_stop_queue(dev);
 	napi_disable(&lp->napi);
-	if (lp->phy_dev)
-		phy_stop(lp->phy_dev);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 	cancel_work_sync(&lp->restart_work);
 
 	/* Flush the Tx and disable Rx here. */
@@ -1946,24 +1944,6 @@ static void tc35815_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *
 	strlcpy(info->bus_info, pci_name(lp->pci_dev), sizeof(info->bus_info));
 }
 
-static int tc35815_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct tc35815_local *lp = netdev_priv(dev);
-
-	if (!lp->phy_dev)
-		return -ENODEV;
-	return phy_ethtool_gset(lp->phy_dev, cmd);
-}
-
-static int tc35815_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct tc35815_local *lp = netdev_priv(dev);
-
-	if (!lp->phy_dev)
-		return -ENODEV;
-	return phy_ethtool_sset(lp->phy_dev, cmd);
-}
-
 static u32 tc35815_get_msglevel(struct net_device *dev)
 {
 	struct tc35815_local *lp = netdev_priv(dev);
@@ -2013,25 +1993,23 @@ static void tc35815_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 static const struct ethtool_ops tc35815_ethtool_ops = {
 	.get_drvinfo		= tc35815_get_drvinfo,
-	.get_settings		= tc35815_get_settings,
-	.set_settings		= tc35815_set_settings,
 	.get_link		= ethtool_op_get_link,
 	.get_msglevel		= tc35815_get_msglevel,
 	.set_msglevel		= tc35815_set_msglevel,
 	.get_strings		= tc35815_get_strings,
 	.get_sset_count		= tc35815_get_sset_count,
 	.get_ethtool_stats	= tc35815_get_ethtool_stats,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static int tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct tc35815_local *lp = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
-	if (!lp->phy_dev)
+	if (!dev->phydev)
 		return -ENODEV;
-	return phy_mii_ioctl(lp->phy_dev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 static void tc35815_chip_reset(struct net_device *dev)
@@ -2116,7 +2094,7 @@ static void tc35815_chip_init(struct net_device *dev)
 	if (lp->chiptype == TC35815_TX4939)
 		txctl &= ~Tx_EnLCarr;
 	/* WORKAROUND: ignore LostCrS in full duplex operation */
-	if (!lp->phy_dev || !lp->link || lp->duplex == DUPLEX_FULL)
+	if (!dev->phydev || !lp->link || lp->duplex == DUPLEX_FULL)
 		txctl &= ~Tx_EnLCarr;
 	tc_writel(txctl, &tr->Tx_Ctl);
 }
@@ -2132,8 +2110,8 @@ static int tc35815_suspend(struct pci_dev *pdev, pm_message_t state)
 	if (!netif_running(dev))
 		return 0;
 	netif_device_detach(dev);
-	if (lp->phy_dev)
-		phy_stop(lp->phy_dev);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 	spin_lock_irqsave(&lp->lock, flags);
 	tc35815_chip_reset(dev);
 	spin_unlock_irqrestore(&lp->lock, flags);
@@ -2144,7 +2122,6 @@ static int tc35815_suspend(struct pci_dev *pdev, pm_message_t state)
 static int tc35815_resume(struct pci_dev *pdev)
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
-	struct tc35815_local *lp = netdev_priv(dev);
 
 	pci_restore_state(pdev);
 	if (!netif_running(dev))
@@ -2152,8 +2129,8 @@ static int tc35815_resume(struct pci_dev *pdev)
 	pci_set_power_state(pdev, PCI_D0);
 	tc35815_restart(dev);
 	netif_carrier_off(dev);
-	if (lp->phy_dev)
-		phy_start(lp->phy_dev);
+	if (dev->phydev)
+		phy_start(dev->phydev);
 	netif_device_attach(dev);
 	return 0;
 }
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 4f6255cf62ce..37ab46cdbec4 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -1154,7 +1154,7 @@ int w5100_probe(struct device *dev, const struct w5100_ops *ops,
 	if (err < 0)
 		goto err_register;
 
-	priv->xfer_wq = create_workqueue(netdev_name(ndev));
+	priv->xfer_wq = alloc_workqueue(netdev_name(ndev), WQ_MEM_RECLAIM, 0);
 	if (!priv->xfer_wq) {
 		err = -ENOMEM;
 		goto err_wq;
@@ -1233,7 +1233,6 @@ int w5100_remove(struct device *dev)
 
 	flush_work(&priv->setrx_work);
 	flush_work(&priv->restart_work);
-	flush_workqueue(priv->xfer_wq);
 	destroy_workqueue(priv->xfer_wq);
 
 	unregister_netdev(ndev);
diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 902457e43628..7d06e3e1abac 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -332,7 +332,6 @@ struct temac_local {
 	struct device *dev;
 
 	/* Connection to PHY device */
-	struct phy_device *phy_dev;	/* Pointer to PHY device */
 	struct device_node *phy_node;
 
 	/* MDIO bus data */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 739708712022..a9bd665fd122 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -590,7 +590,7 @@ static void temac_device_reset(struct net_device *ndev)
 static void temac_adjust_link(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
-	struct phy_device *phy = lp->phy_dev;
+	struct phy_device *phy = ndev->phydev;
 	u32 mii_speed;
 	int link_state;
 
@@ -843,19 +843,20 @@ static irqreturn_t ll_temac_rx_irq(int irq, void *_ndev)
 static int temac_open(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
+	struct phy_device *phydev = NULL;
 	int rc;
 
 	dev_dbg(&ndev->dev, "temac_open()\n");
 
 	if (lp->phy_node) {
-		lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node,
-					     temac_adjust_link, 0, 0);
-		if (!lp->phy_dev) {
+		phydev = of_phy_connect(lp->ndev, lp->phy_node,
+					temac_adjust_link, 0, 0);
+		if (!phydev) {
 			dev_err(lp->dev, "of_phy_connect() failed\n");
 			return -ENODEV;
 		}
 
-		phy_start(lp->phy_dev);
+		phy_start(phydev);
 	}
 
 	temac_device_reset(ndev);
@@ -872,9 +873,8 @@ static int temac_open(struct net_device *ndev)
  err_rx_irq:
 	free_irq(lp->tx_irq, ndev);
  err_tx_irq:
-	if (lp->phy_dev)
-		phy_disconnect(lp->phy_dev);
-	lp->phy_dev = NULL;
+	if (phydev)
+		phy_disconnect(phydev);
 	dev_err(lp->dev, "request_irq() failed\n");
 	return rc;
 }
@@ -882,15 +882,15 @@ static int temac_open(struct net_device *ndev)
 static int temac_stop(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
+	struct phy_device *phydev = ndev->phydev;
 
 	dev_dbg(&ndev->dev, "temac_close()\n");
 
 	free_irq(lp->tx_irq, ndev);
 	free_irq(lp->rx_irq, ndev);
 
-	if (lp->phy_dev)
-		phy_disconnect(lp->phy_dev);
-	lp->phy_dev = NULL;
+	if (phydev)
+		phy_disconnect(phydev);
 
 	temac_dma_bd_release(ndev);
 
@@ -916,15 +916,13 @@ temac_poll_controller(struct net_device *ndev)
 
 static int temac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 {
-	struct temac_local *lp = netdev_priv(ndev);
-
 	if (!netif_running(ndev))
 		return -EINVAL;
 
-	if (!lp->phy_dev)
+	if (!ndev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(lp->phy_dev, rq, cmd);
+	return phy_mii_ioctl(ndev->phydev, rq, cmd);
 }
 
 static const struct net_device_ops temac_netdev_ops = {
@@ -969,30 +967,17 @@ static const struct attribute_group temac_attr_group = {
 };
 
 /* ethtool support */
-static int temac_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
-{
-	struct temac_local *lp = netdev_priv(ndev);
-	return phy_ethtool_gset(lp->phy_dev, cmd);
-}
-
-static int temac_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
-{
-	struct temac_local *lp = netdev_priv(ndev);
-	return phy_ethtool_sset(lp->phy_dev, cmd);
-}
-
 static int temac_nway_reset(struct net_device *ndev)
 {
-	struct temac_local *lp = netdev_priv(ndev);
-	return phy_start_aneg(lp->phy_dev);
+	return phy_start_aneg(ndev->phydev);
 }
 
 static const struct ethtool_ops temac_ethtool_ops = {
-	.get_settings = temac_get_settings,
-	.set_settings = temac_set_settings,
 	.nway_reset = temac_nway_reset,
 	.get_link = ethtool_op_get_link,
 	.get_ts_info = ethtool_op_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static int temac_of_probe(struct platform_device *op)
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 9ead4e269409..af27f7d1cbf3 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -382,7 +382,6 @@ struct axidma_bd {
  * struct axienet_local - axienet private per device data
  * @ndev:	Pointer for net_device to which it will be attached.
  * @dev:	Pointer to device structure
- * @phy_dev:	Pointer to PHY device structure attached to the axienet_local
  * @phy_node:	Pointer to device node structure
  * @mii_bus:	Pointer to MII bus structure
  * @regs:	Base address for the axienet_local device address space
@@ -420,7 +419,6 @@ struct axienet_local {
 	struct device *dev;
 
 	/* Connection to PHY device */
-	struct phy_device *phy_dev;	/* Pointer to PHY device */
 	struct device_node *phy_node;
 
 	/* MDIO bus data */
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 8c7f5be51e62..36ee7ab300ae 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -525,7 +525,7 @@ static void axienet_adjust_link(struct net_device *ndev)
 	u32 link_state;
 	u32 setspeed = 1;
 	struct axienet_local *lp = netdev_priv(ndev);
-	struct phy_device *phy = lp->phy_dev;
+	struct phy_device *phy = ndev->phydev;
 
 	link_state = phy->speed | (phy->duplex << 1) | phy->link;
 	if (lp->last_link != link_state) {
@@ -911,6 +911,7 @@ static int axienet_open(struct net_device *ndev)
 {
 	int ret, mdio_mcreg;
 	struct axienet_local *lp = netdev_priv(ndev);
+	struct phy_device *phydev = NULL;
 
 	dev_dbg(&ndev->dev, "axienet_open()\n");
 
@@ -934,19 +935,19 @@ static int axienet_open(struct net_device *ndev)
 
 	if (lp->phy_node) {
 		if (lp->phy_type == XAE_PHY_TYPE_GMII) {
-			lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node,
-					     axienet_adjust_link, 0,
-					     PHY_INTERFACE_MODE_GMII);
+			phydev = of_phy_connect(lp->ndev, lp->phy_node,
+						axienet_adjust_link, 0,
+						PHY_INTERFACE_MODE_GMII);
 		} else if (lp->phy_type == XAE_PHY_TYPE_RGMII_2_0) {
-			lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node,
-					     axienet_adjust_link, 0,
-					     PHY_INTERFACE_MODE_RGMII_ID);
+			phydev = of_phy_connect(lp->ndev, lp->phy_node,
+						axienet_adjust_link, 0,
+						PHY_INTERFACE_MODE_RGMII_ID);
 		}
 
-		if (!lp->phy_dev)
+		if (!phydev)
 			dev_err(lp->dev, "of_phy_connect() failed\n");
 		else
-			phy_start(lp->phy_dev);
+			phy_start(phydev);
 	}
 
 	/* Enable tasklets for Axi DMA error handling */
@@ -967,9 +968,8 @@ static int axienet_open(struct net_device *ndev)
 err_rx_irq:
 	free_irq(lp->tx_irq, ndev);
 err_tx_irq:
-	if (lp->phy_dev)
-		phy_disconnect(lp->phy_dev);
-	lp->phy_dev = NULL;
+	if (phydev)
+		phy_disconnect(phydev);
 	tasklet_kill(&lp->dma_err_tasklet);
 	dev_err(lp->dev, "request_irq() failed\n");
 	return ret;
@@ -1006,9 +1006,8 @@ static int axienet_stop(struct net_device *ndev)
 	free_irq(lp->tx_irq, ndev);
 	free_irq(lp->rx_irq, ndev);
 
-	if (lp->phy_dev)
-		phy_disconnect(lp->phy_dev);
-	lp->phy_dev = NULL;
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
 
 	axienet_dma_bd_release(ndev);
 	return 0;
@@ -1077,51 +1076,6 @@ static const struct net_device_ops axienet_netdev_ops = {
 #endif
 };
 
-/**
- * axienet_ethtools_get_settings - Get Axi Ethernet settings related to PHY.
- * @ndev:	Pointer to net_device structure
- * @ecmd:	Pointer to ethtool_cmd structure
- *
- * This implements ethtool command for getting PHY settings. If PHY could
- * not be found, the function returns -ENODEV. This function calls the
- * relevant PHY ethtool API to get the PHY settings.
- * Issue "ethtool ethX" under linux prompt to execute this function.
- *
- * Return: 0 on success, -ENODEV if PHY doesn't exist
- */
-static int axienet_ethtools_get_settings(struct net_device *ndev,
-					 struct ethtool_cmd *ecmd)
-{
-	struct axienet_local *lp = netdev_priv(ndev);
-	struct phy_device *phydev = lp->phy_dev;
-	if (!phydev)
-		return -ENODEV;
-	return phy_ethtool_gset(phydev, ecmd);
-}
-
-/**
- * axienet_ethtools_set_settings - Set PHY settings as passed in the argument.
- * @ndev:	Pointer to net_device structure
- * @ecmd:	Pointer to ethtool_cmd structure
- *
- * This implements ethtool command for setting various PHY settings. If PHY
- * could not be found, the function returns -ENODEV. This function calls the
- * relevant PHY ethtool API to set the PHY.
- * Issue e.g. "ethtool -s ethX speed 1000" under linux prompt to execute this
- * function.
- *
- * Return: 0 on success, -ENODEV if PHY doesn't exist
- */
-static int axienet_ethtools_set_settings(struct net_device *ndev,
-					 struct ethtool_cmd *ecmd)
-{
-	struct axienet_local *lp = netdev_priv(ndev);
-	struct phy_device *phydev = lp->phy_dev;
-	if (!phydev)
-		return -ENODEV;
-	return phy_ethtool_sset(phydev, ecmd);
-}
-
 /**
  * axienet_ethtools_get_drvinfo - Get various Axi Ethernet driver information.
  * @ndev:	Pointer to net_device structure
@@ -1344,8 +1298,6 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev,
 }
 
 static struct ethtool_ops axienet_ethtool_ops = {
-	.get_settings   = axienet_ethtools_get_settings,
-	.set_settings   = axienet_ethtools_set_settings,
 	.get_drvinfo    = axienet_ethtools_get_drvinfo,
 	.get_regs_len   = axienet_ethtools_get_regs_len,
 	.get_regs       = axienet_ethtools_get_regs,
@@ -1354,6 +1306,8 @@ static struct ethtool_ops axienet_ethtool_ops = {
 	.set_pauseparam = axienet_ethtools_set_pauseparam,
 	.get_coalesce   = axienet_ethtools_get_coalesce,
 	.set_coalesce   = axienet_ethtools_set_coalesce,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /**
@@ -1587,9 +1541,9 @@ static int axienet_probe(struct platform_device *pdev)
 
 	/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
 	np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
-	if (IS_ERR(np)) {
+	if (!np) {
 		dev_err(&pdev->dev, "could not find DMA node\n");
-		ret = PTR_ERR(np);
+		ret = -ENODEV;
 		goto free_netdev;
 	}
 	ret = of_address_to_resource(np, 0, &dmares);
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index 7b44968e02e6..ddced28e8247 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -1144,8 +1144,8 @@ xirc2ps_interrupt(int irq, void *dev_id)
 	    dev->stats.tx_packets += lp->last_ptr_value - n;
 	netif_wake_queue(dev);
     }
-    if (tx_status & 0x0002) {	/* Execessive collissions */
-	pr_debug("tx restarted due to execssive collissions\n");
+    if (tx_status & 0x0002) {	/* Excessive collisions */
+	pr_debug("tx restarted due to excessive collisions\n");
 	PutByte(XIRCREG_CR, RestartTx);  /* restart transmitter process */
     }
     if (tx_status & 0x0040)
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 5138407941cf..7f127dc1b7ba 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -171,7 +171,6 @@ struct port {
 	struct npe *npe;
 	struct net_device *netdev;
 	struct napi_struct napi;
-	struct phy_device *phydev;
 	struct eth_plat_info *plat;
 	buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS];
 	struct desc *desc_tab;	/* coherent */
@@ -562,7 +561,7 @@ static void ixp4xx_mdio_remove(void)
 static void ixp4xx_adjust_link(struct net_device *dev)
 {
 	struct port *port = netdev_priv(dev);
-	struct phy_device *phydev = port->phydev;
+	struct phy_device *phydev = dev->phydev;
 
 	if (!phydev->link) {
 		if (port->speed) {
@@ -976,8 +975,6 @@ static void eth_set_mcast_list(struct net_device *dev)
 
 static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 {
-	struct port *port = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
@@ -988,7 +985,7 @@ static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 			return hwtstamp_get(dev, req);
 	}
 
-	return phy_mii_ioctl(port->phydev, req, cmd);
+	return phy_mii_ioctl(dev->phydev, req, cmd);
 }
 
 /* ethtool support */
@@ -1005,22 +1002,9 @@ static void ixp4xx_get_drvinfo(struct net_device *dev,
 	strlcpy(info->bus_info, "internal", sizeof(info->bus_info));
 }
 
-static int ixp4xx_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct port *port = netdev_priv(dev);
-	return phy_ethtool_gset(port->phydev, cmd);
-}
-
-static int ixp4xx_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct port *port = netdev_priv(dev);
-	return phy_ethtool_sset(port->phydev, cmd);
-}
-
 static int ixp4xx_nway_reset(struct net_device *dev)
 {
-	struct port *port = netdev_priv(dev);
-	return phy_start_aneg(port->phydev);
+	return phy_start_aneg(dev->phydev);
 }
 
 int ixp46x_phc_index = -1;
@@ -1054,11 +1038,11 @@ static int ixp4xx_get_ts_info(struct net_device *dev,
 
 static const struct ethtool_ops ixp4xx_ethtool_ops = {
 	.get_drvinfo = ixp4xx_get_drvinfo,
-	.get_settings = ixp4xx_get_settings,
-	.set_settings = ixp4xx_set_settings,
 	.nway_reset = ixp4xx_nway_reset,
 	.get_link = ethtool_op_get_link,
 	.get_ts_info = ixp4xx_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 
@@ -1259,7 +1243,7 @@ static int eth_open(struct net_device *dev)
 	}
 
 	port->speed = 0;	/* force "link up" message */
-	phy_start(port->phydev);
+	phy_start(dev->phydev);
 
 	for (i = 0; i < ETH_ALEN; i++)
 		__raw_writel(dev->dev_addr[i], &port->regs->hw_addr[i]);
@@ -1380,7 +1364,7 @@ static int eth_close(struct net_device *dev)
 		printk(KERN_CRIT "%s: unable to disable loopback\n",
 		       dev->name);
 
-	phy_stop(port->phydev);
+	phy_stop(dev->phydev);
 
 	if (!ports_open)
 		qmgr_disable_irq(TXDONE_QUEUE);
@@ -1405,6 +1389,7 @@ static int eth_init_one(struct platform_device *pdev)
 	struct port *port;
 	struct net_device *dev;
 	struct eth_plat_info *plat = dev_get_platdata(&pdev->dev);
+	struct phy_device *phydev = NULL;
 	u32 regs_phys;
 	char phy_id[MII_BUS_ID_SIZE + 3];
 	int err;
@@ -1466,14 +1451,14 @@ static int eth_init_one(struct platform_device *pdev)
 
 	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
 		mdio_bus->id, plat->phy);
-	port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link,
-				   PHY_INTERFACE_MODE_MII);
-	if (IS_ERR(port->phydev)) {
-		err = PTR_ERR(port->phydev);
+	phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link,
+			     PHY_INTERFACE_MODE_MII);
+	if (IS_ERR(phydev)) {
+		err = PTR_ERR(phydev);
 		goto err_free_mem;
 	}
 
-	port->phydev->irq = PHY_POLL;
+	phydev->irq = PHY_POLL;
 
 	if ((err = register_netdev(dev)))
 		goto err_phy_dis;
@@ -1484,7 +1469,7 @@ static int eth_init_one(struct platform_device *pdev)
 	return 0;
 
 err_phy_dis:
-	phy_disconnect(port->phydev);
+	phy_disconnect(phydev);
 err_free_mem:
 	npe_port_tab[NPE_ID(port->id)] = NULL;
 	release_resource(port->mem_res);
@@ -1498,10 +1483,11 @@ err_free:
 static int eth_remove_one(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
+	struct phy_device *phydev = dev->phydev;
 	struct port *port = netdev_priv(dev);
 
 	unregister_netdev(dev);
-	phy_disconnect(port->phydev);
+	phy_disconnect(phydev);
 	npe_port_tab[NPE_ID(port->id)] = NULL;
 	npe_release(port->npe);
 	release_resource(port->mem_res);
diff --git a/drivers/net/fddi/skfp/Makefile b/drivers/net/fddi/skfp/Makefile
index b0be0234abf6..a957a1c7e5ba 100644
--- a/drivers/net/fddi/skfp/Makefile
+++ b/drivers/net/fddi/skfp/Makefile
@@ -17,4 +17,4 @@ skfp-objs :=  skfddi.o    hwmtm.o    fplustm.o  smt.o      cfm.o     \
 #   projects. To keep the source common for all those drivers (and
 #   thus simplify fixes to it), please do not clean it up!
 
-ccflags-y := -Idrivers/net/skfp -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes
+ccflags-y := -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index 86c331bb5eb3..9006877c53f2 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1187,8 +1187,9 @@ static int fjes_probe(struct platform_device *plat_dev)
 	adapter->force_reset = false;
 	adapter->open_guard = false;
 
-	adapter->txrx_wq = create_workqueue(DRV_NAME "/txrx");
-	adapter->control_wq = create_workqueue(DRV_NAME "/control");
+	adapter->txrx_wq = alloc_workqueue(DRV_NAME "/txrx", WQ_MEM_RECLAIM, 0);
+	adapter->control_wq = alloc_workqueue(DRV_NAME "/control",
+					      WQ_MEM_RECLAIM, 0);
 
 	INIT_WORK(&adapter->tx_stall_task, fjes_tx_stall_task);
 	INIT_WORK(&adapter->raise_intr_rxdata_task,
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index cadefe4fdaa2..3c20e87bb761 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -12,7 +12,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/hash.h>
 #include <net/dst_metadata.h>
@@ -397,23 +396,6 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
 	return sock;
 }
 
-static void geneve_notify_add_rx_port(struct geneve_sock *gs)
-{
-	struct net_device *dev;
-	struct sock *sk = gs->sock->sk;
-	struct net *net = sock_net(sk);
-	sa_family_t sa_family = geneve_get_sk_family(gs);
-	__be16 port = inet_sk(sk)->inet_sport;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(net, dev) {
-		if (dev->netdev_ops->ndo_add_geneve_port)
-			dev->netdev_ops->ndo_add_geneve_port(dev, sa_family,
-							     port);
-	}
-	rcu_read_unlock();
-}
-
 static int geneve_hlen(struct genevehdr *gh)
 {
 	return sizeof(*gh) + gh->opt_len * 4;
@@ -533,7 +515,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 		INIT_HLIST_HEAD(&gs->vni_list[h]);
 
 	/* Initialize the geneve udp offloads structure */
-	geneve_notify_add_rx_port(gs);
+	udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
 
 	/* Mark socket as an encapsulation socket */
 	memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
@@ -548,31 +530,13 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	return gs;
 }
 
-static void geneve_notify_del_rx_port(struct geneve_sock *gs)
-{
-	struct net_device *dev;
-	struct sock *sk = gs->sock->sk;
-	struct net *net = sock_net(sk);
-	sa_family_t sa_family = geneve_get_sk_family(gs);
-	__be16 port = inet_sk(sk)->inet_sport;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(net, dev) {
-		if (dev->netdev_ops->ndo_del_geneve_port)
-			dev->netdev_ops->ndo_del_geneve_port(dev, sa_family,
-							     port);
-	}
-
-	rcu_read_unlock();
-}
-
 static void __geneve_sock_release(struct geneve_sock *gs)
 {
 	if (!gs || --gs->refcnt)
 		return;
 
 	list_del(&gs->list);
-	geneve_notify_del_rx_port(gs);
+	udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
 	udp_tunnel_sock_release(gs->sock);
 	kfree_rcu(gs, rcu);
 }
@@ -958,8 +922,8 @@ tx_error:
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
 		dev->stats.tx_carrier_errors++;
-	else
-		dev->stats.tx_errors++;
+
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
@@ -1048,8 +1012,8 @@ tx_error:
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
 		dev->stats.tx_carrier_errors++;
-	else
-		dev->stats.tx_errors++;
+
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 #endif
@@ -1072,12 +1036,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
+	struct geneve_dev *geneve = netdev_priv(dev);
 	/* The max_mtu calculation does not take account of GENEVE
 	 * options, to avoid excluding potentially valid
 	 * configurations.
 	 */
-	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
-		- dev->hard_header_len;
+	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
+
+	if (geneve->remote.sa.sa_family == AF_INET6)
+		max_mtu -= sizeof(struct ipv6hdr);
+	else
+		max_mtu -= sizeof(struct iphdr);
 
 	if (new_mtu < 68)
 		return -EINVAL;
@@ -1165,29 +1134,20 @@ static struct device_type geneve_type = {
 	.name = "geneve",
 };
 
-/* Calls the ndo_add_geneve_port of the caller in order to
+/* Calls the ndo_udp_tunnel_add of the caller in order to
  * supply the listening GENEVE udp ports. Callers are expected
- * to implement the ndo_add_geneve_port.
+ * to implement the ndo_udp_tunnel_add.
  */
 static void geneve_push_rx_ports(struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
-	sa_family_t sa_family;
-	struct sock *sk;
-	__be16 port;
-
-	if (!dev->netdev_ops->ndo_add_geneve_port)
-		return;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(gs, &gn->sock_list, list) {
-		sk = gs->sock->sk;
-		sa_family = sk->sk_family;
-		port = inet_sk(sk)->inet_sport;
-		dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port);
-	}
+	list_for_each_entry_rcu(gs, &gn->sock_list, list)
+		udp_tunnel_push_rx_port(dev, gs->sock,
+					UDP_TUNNEL_TYPE_GENEVE);
 	rcu_read_unlock();
 }
 
@@ -1508,6 +1468,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 {
 	struct nlattr *tb[IFLA_MAX + 1];
 	struct net_device *dev;
+	LIST_HEAD(list_kill);
 	int err;
 
 	memset(tb, 0, sizeof(tb));
@@ -1519,8 +1480,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	err = geneve_configure(net, dev, &geneve_remote_unspec,
 			       0, 0, 0, 0, htons(dst_port), true,
 			       GENEVE_F_UDP_ZERO_CSUM6_RX);
-	if (err)
-		goto err;
+	if (err) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
 
 	/* openvswitch users expect packet sizes to be unrestricted,
 	 * so set the largest MTU we can.
@@ -1529,10 +1492,15 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	if (err)
 		goto err;
 
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0)
+		goto err;
+
 	return dev;
 
  err:
-	free_netdev(dev);
+	geneve_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
@@ -1542,7 +1510,7 @@ static int geneve_netdevice_event(struct notifier_block *unused,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-	if (event == NETDEV_OFFLOAD_PUSH_GENEVE)
+	if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
 		geneve_push_rx_ports(dev);
 
 	return NOTIFY_DONE;
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 4e976a0d5a76..97e0cbca0a08 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -16,7 +16,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/skbuff.h>
 #include <linux/udp.h>
 #include <linux/rculist.h>
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index acb636963e90..072cddce9264 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -156,7 +156,7 @@ struct baycom_state {
 
 /* --------------------------------------------------------------------- */
 
-static void __inline__ baycom_int_freq(struct baycom_state *bc)
+static inline void baycom_int_freq(struct baycom_state *bc)
 {
 #ifdef BAYCOM_DEBUG
 	unsigned long cur_jiffies = jiffies;
@@ -192,7 +192,7 @@ static void __inline__ baycom_int_freq(struct baycom_state *bc)
 
 /* --------------------------------------------------------------------- */
 
-static __inline__ void par96_tx(struct net_device *dev, struct baycom_state *bc)
+static inline void par96_tx(struct net_device *dev, struct baycom_state *bc)
 {
 	int i;
 	unsigned int data = hdlcdrv_getbits(&bc->hdrv);
@@ -216,7 +216,7 @@ static __inline__ void par96_tx(struct net_device *dev, struct baycom_state *bc)
 
 /* --------------------------------------------------------------------- */
 
-static __inline__ void par96_rx(struct net_device *dev, struct baycom_state *bc)
+static inline void par96_rx(struct net_device *dev, struct baycom_state *bc)
 {
 	int i;
 	unsigned int data, mask, mask2, descx;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index c270c5a54f3a..467fb8b4d083 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -173,6 +173,7 @@ struct rndis_device {
 
 /* Interface */
 struct rndis_message;
+struct netvsc_device;
 int netvsc_device_add(struct hv_device *device, void *additional_info);
 int netvsc_device_remove(struct hv_device *device);
 int netvsc_send(struct hv_device *device,
@@ -189,8 +190,8 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 			struct vmbus_channel *channel,
 			u16 vlan_tci);
 void netvsc_channel_cb(void *context);
-int rndis_filter_open(struct hv_device *dev);
-int rndis_filter_close(struct hv_device *dev);
+int rndis_filter_open(struct netvsc_device *nvdev);
+int rndis_filter_close(struct netvsc_device *nvdev);
 int rndis_filter_device_add(struct hv_device *dev,
 			void *additional_info);
 void rndis_filter_device_remove(struct hv_device *dev);
@@ -200,7 +201,7 @@ int rndis_filter_receive(struct hv_device *dev,
 			struct vmbus_channel *channel);
 
 int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
-int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac);
+int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
 
 void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
 
@@ -743,6 +744,18 @@ struct netvsc_device {
 	atomic_t vf_use_cnt;
 };
 
+static inline struct netvsc_device *
+net_device_to_netvsc_device(struct net_device *ndev)
+{
+	return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
+}
+
+static inline struct netvsc_device *
+hv_device_to_netvsc_device(struct hv_device *device)
+{
+	return net_device_to_netvsc_device(hv_get_drvdata(device));
+}
+
 /* NdisInitialize message */
 struct rndis_initialize_request {
 	u32 req_id;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 719cb3578e55..20e09174ff62 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -95,9 +95,7 @@ static void free_netvsc_device(struct netvsc_device *nvdev)
 
 static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
 {
-	struct net_device *ndev = hv_get_drvdata(device);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *net_device = net_device_ctx->nvdev;
+	struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
 
 	if (net_device && net_device->destroy)
 		net_device = NULL;
@@ -107,9 +105,7 @@ static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
 
 static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
 {
-	struct net_device *ndev = hv_get_drvdata(device);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *net_device = net_device_ctx->nvdev;
+	struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
 
 	if (!net_device)
 		goto get_in_err;
@@ -128,8 +124,7 @@ static int netvsc_destroy_buf(struct hv_device *device)
 	struct nvsp_message *revoke_packet;
 	int ret = 0;
 	struct net_device *ndev = hv_get_drvdata(device);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *net_device = net_device_ctx->nvdev;
+	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
 
 	/*
 	 * If we got a section count, it means we received a
@@ -249,7 +244,6 @@ static int netvsc_destroy_buf(struct hv_device *device)
 static int netvsc_init_buf(struct hv_device *device)
 {
 	int ret = 0;
-	unsigned long t;
 	struct netvsc_device *net_device;
 	struct nvsp_message *init_packet;
 	struct net_device *ndev;
@@ -310,9 +304,7 @@ static int netvsc_init_buf(struct hv_device *device)
 		goto cleanup;
 	}
 
-	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
-	BUG_ON(t == 0);
-
+	wait_for_completion(&net_device->channel_init_wait);
 
 	/* Check the response */
 	if (init_packet->msg.v1_msg.
@@ -395,8 +387,7 @@ static int netvsc_init_buf(struct hv_device *device)
 		goto cleanup;
 	}
 
-	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
-	BUG_ON(t == 0);
+	wait_for_completion(&net_device->channel_init_wait);
 
 	/* Check the response */
 	if (init_packet->msg.v1_msg.
@@ -450,7 +441,6 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 {
 	struct net_device *ndev = hv_get_drvdata(device);
 	int ret;
-	unsigned long t;
 
 	memset(init_packet, 0, sizeof(struct nvsp_message));
 	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
@@ -467,10 +457,7 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 	if (ret != 0)
 		return ret;
 
-	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
-
-	if (t == 0)
-		return -ETIMEDOUT;
+	wait_for_completion(&net_device->channel_init_wait);
 
 	if (init_packet->msg.init_msg.init_complete.status !=
 	    NVSP_STAT_SUCCESS)
@@ -1141,6 +1128,39 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
 	}
 }
 
+static void netvsc_process_raw_pkt(struct hv_device *device,
+				   struct vmbus_channel *channel,
+				   struct netvsc_device *net_device,
+				   struct net_device *ndev,
+				   u64 request_id,
+				   struct vmpacket_descriptor *desc)
+{
+	struct nvsp_message *nvmsg;
+
+	nvmsg = (struct nvsp_message *)((unsigned long)
+		desc + (desc->offset8 << 3));
+
+	switch (desc->type) {
+	case VM_PKT_COMP:
+		netvsc_send_completion(net_device, channel, device, desc);
+		break;
+
+	case VM_PKT_DATA_USING_XFER_PAGES:
+		netvsc_receive(net_device, channel, device, desc);
+		break;
+
+	case VM_PKT_DATA_INBAND:
+		netvsc_receive_inband(device, net_device, nvmsg);
+		break;
+
+	default:
+		netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
+			   desc->type, request_id);
+		break;
+	}
+}
+
+
 void netvsc_channel_cb(void *context)
 {
 	int ret;
@@ -1153,7 +1173,7 @@ void netvsc_channel_cb(void *context)
 	unsigned char *buffer;
 	int bufferlen = NETVSC_PACKET_SIZE;
 	struct net_device *ndev;
-	struct nvsp_message *nvmsg;
+	bool need_to_commit = false;
 
 	if (channel->primary_channel != NULL)
 		device = channel->primary_channel->device_obj;
@@ -1167,39 +1187,36 @@ void netvsc_channel_cb(void *context)
 	buffer = get_per_channel_state(channel);
 
 	do {
+		desc = get_next_pkt_raw(channel);
+		if (desc != NULL) {
+			netvsc_process_raw_pkt(device,
+					       channel,
+					       net_device,
+					       ndev,
+					       desc->trans_id,
+					       desc);
+
+			put_pkt_raw(channel, desc);
+			need_to_commit = true;
+			continue;
+		}
+		if (need_to_commit) {
+			need_to_commit = false;
+			commit_rd_index(channel);
+		}
+
 		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
 					   &bytes_recvd, &request_id);
 		if (ret == 0) {
 			if (bytes_recvd > 0) {
 				desc = (struct vmpacket_descriptor *)buffer;
-				nvmsg = (struct nvsp_message *)((unsigned long)
-					 desc + (desc->offset8 << 3));
-				switch (desc->type) {
-				case VM_PKT_COMP:
-					netvsc_send_completion(net_device,
-								channel,
-								device, desc);
-					break;
-
-				case VM_PKT_DATA_USING_XFER_PAGES:
-					netvsc_receive(net_device, channel,
-						       device, desc);
-					break;
-
-				case VM_PKT_DATA_INBAND:
-					netvsc_receive_inband(device,
-							      net_device,
-							      nvmsg);
-					break;
-
-				default:
-					netdev_err(ndev,
-						   "unhandled packet type %d, "
-						   "tid %llx len %d\n",
-						   desc->type, request_id,
-						   bytes_recvd);
-					break;
-				}
+				netvsc_process_raw_pkt(device,
+						       channel,
+						       net_device,
+						       ndev,
+						       request_id,
+						       desc);
+
 
 			} else {
 				/*
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 6a69b5cc9fe2..41bd952cc28d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -98,16 +98,14 @@ static void netvsc_set_multicast_list(struct net_device *net)
 
 static int netvsc_open(struct net_device *net)
 {
-	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct hv_device *device_obj = net_device_ctx->device_ctx;
-	struct netvsc_device *nvdev = net_device_ctx->nvdev;
+	struct netvsc_device *nvdev = net_device_to_netvsc_device(net);
 	struct rndis_device *rdev;
 	int ret = 0;
 
 	netif_carrier_off(net);
 
 	/* Open up the device */
-	ret = rndis_filter_open(device_obj);
+	ret = rndis_filter_open(nvdev);
 	if (ret != 0) {
 		netdev_err(net, "unable to open device (ret %d).\n", ret);
 		return ret;
@@ -125,7 +123,6 @@ static int netvsc_open(struct net_device *net)
 static int netvsc_close(struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct hv_device *device_obj = net_device_ctx->device_ctx;
 	struct netvsc_device *nvdev = net_device_ctx->nvdev;
 	int ret;
 	u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20;
@@ -135,7 +132,7 @@ static int netvsc_close(struct net_device *net)
 
 	/* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
 	cancel_work_sync(&net_device_ctx->work);
-	ret = rndis_filter_close(device_obj);
+	ret = rndis_filter_close(nvdev);
 	if (ret != 0) {
 		netdev_err(net, "unable to close device (ret %d).\n", ret);
 		return ret;
@@ -701,7 +698,6 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 	}
 
 vf_injection_done:
-	net_device_ctx = netdev_priv(net);
 	rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
 
 	/* Allocate a skb - TODO direct I/O to pages? */
@@ -986,8 +982,6 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
 
 static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
 {
-	struct net_device_context *ndevctx = netdev_priv(ndev);
-	struct hv_device *hdev =  ndevctx->device_ctx;
 	struct sockaddr *addr = p;
 	char save_adr[ETH_ALEN];
 	unsigned char save_aatype;
@@ -1000,7 +994,7 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
 	if (err != 0)
 		return err;
 
-	err = rndis_filter_set_device_mac(hdev, addr->sa_data);
+	err = rndis_filter_set_device_mac(ndev, addr->sa_data);
 	if (err != 0) {
 		/* roll back to saved MAC */
 		memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
@@ -1248,7 +1242,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 	/*
 	 * Open the device before switching data path.
 	 */
-	rndis_filter_open(net_device_ctx->device_ctx);
+	rndis_filter_open(netvsc_dev);
 
 	/*
 	 * notify the host to switch the data path.
@@ -1303,7 +1297,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 		udelay(50);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
-	rndis_filter_close(net_device_ctx->device_ctx);
+	rndis_filter_close(netvsc_dev);
 	netif_carrier_on(ndev);
 	/*
 	 * Notify peers.
@@ -1500,6 +1494,10 @@ static int netvsc_netdev_event(struct notifier_block *this,
 {
 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 
+	/* Avoid Vlan, Bonding dev with same MAC registering as VF */
+	if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING))
+		return NOTIFY_DONE;
+
 	switch (event) {
 	case NETDEV_REGISTER:
 		return netvsc_register_vf(event_dev);
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 97c292b7dbea..8e830f741d47 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -466,7 +466,6 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
 	struct rndis_query_request *query;
 	struct rndis_query_complete *query_complete;
 	int ret = 0;
-	unsigned long t;
 
 	if (!result)
 		return -EINVAL;
@@ -503,11 +502,7 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
 	if (ret != 0)
 		goto cleanup;
 
-	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
-	if (t == 0) {
-		ret = -ETIMEDOUT;
-		goto cleanup;
-	}
+	wait_for_completion(&request->wait_event);
 
 	/* Copy the response back */
 	query_complete = &request->response_msg.msg.query_complete;
@@ -543,11 +538,9 @@ static int rndis_filter_query_device_mac(struct rndis_device *dev)
 #define NWADR_STR "NetworkAddress"
 #define NWADR_STRLEN 14
 
-int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac)
+int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
 {
-	struct net_device *ndev = hv_get_drvdata(hdev);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *nvdev = net_device_ctx->nvdev;
+	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
 	struct rndis_device *rdev = nvdev->extension;
 	struct rndis_request *request;
 	struct rndis_set_request *set;
@@ -558,7 +551,6 @@ int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac)
 	u32 extlen = sizeof(struct rndis_config_parameter_info) +
 		2*NWADR_STRLEN + 4*ETH_ALEN;
 	int ret;
-	unsigned long t;
 
 	request = get_rndis_request(rdev, RNDIS_MSG_SET,
 		RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
@@ -599,21 +591,13 @@ int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac)
 	if (ret != 0)
 		goto cleanup;
 
-	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
-	if (t == 0) {
-		netdev_err(ndev, "timeout before we got a set response...\n");
-		/*
-		 * can't put_rndis_request, since we may still receive a
-		 * send-completion.
-		 */
-		return -EBUSY;
-	} else {
-		set_complete = &request->response_msg.msg.set_complete;
-		if (set_complete->status != RNDIS_STATUS_SUCCESS) {
-			netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
-				   set_complete->status);
-			ret = -EINVAL;
-		}
+	wait_for_completion(&request->wait_event);
+
+	set_complete = &request->response_msg.msg.set_complete;
+	if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+		netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
+			   set_complete->status);
+		ret = -EINVAL;
 	}
 
 cleanup:
@@ -622,12 +606,10 @@ cleanup:
 }
 
 static int
-rndis_filter_set_offload_params(struct hv_device *hdev,
+rndis_filter_set_offload_params(struct net_device *ndev,
 				struct ndis_offload_params *req_offloads)
 {
-	struct net_device *ndev = hv_get_drvdata(hdev);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *nvdev = net_device_ctx->nvdev;
+	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
 	struct rndis_device *rdev = nvdev->extension;
 	struct rndis_request *request;
 	struct rndis_set_request *set;
@@ -635,7 +617,6 @@ rndis_filter_set_offload_params(struct hv_device *hdev,
 	struct rndis_set_complete *set_complete;
 	u32 extlen = sizeof(struct ndis_offload_params);
 	int ret;
-	unsigned long t;
 	u32 vsp_version = nvdev->nvsp_version;
 
 	if (vsp_version <= NVSP_PROTOCOL_VERSION_4) {
@@ -669,20 +650,12 @@ rndis_filter_set_offload_params(struct hv_device *hdev,
 	if (ret != 0)
 		goto cleanup;
 
-	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
-	if (t == 0) {
-		netdev_err(ndev, "timeout before we got aOFFLOAD set response...\n");
-		/* can't put_rndis_request, since we may still receive a
-		 * send-completion.
-		 */
-		return -EBUSY;
-	} else {
-		set_complete = &request->response_msg.msg.set_complete;
-		if (set_complete->status != RNDIS_STATUS_SUCCESS) {
-			netdev_err(ndev, "Fail to set offload on host side:0x%x\n",
-				   set_complete->status);
-			ret = -EINVAL;
-		}
+	wait_for_completion(&request->wait_event);
+	set_complete = &request->response_msg.msg.set_complete;
+	if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+		netdev_err(ndev, "Fail to set offload on host side:0x%x\n",
+			   set_complete->status);
+		ret = -EINVAL;
 	}
 
 cleanup:
@@ -710,7 +683,6 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
 	u32 *itab;
 	u8 *keyp;
 	int i, ret;
-	unsigned long t;
 
 	request = get_rndis_request(
 			rdev, RNDIS_MSG_SET,
@@ -753,20 +725,12 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
 	if (ret != 0)
 		goto cleanup;
 
-	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
-	if (t == 0) {
-		netdev_err(ndev, "timeout before we got a set response...\n");
-		/* can't put_rndis_request, since we may still receive a
-		 * send-completion.
-		 */
-		return -ETIMEDOUT;
-	} else {
-		set_complete = &request->response_msg.msg.set_complete;
-		if (set_complete->status != RNDIS_STATUS_SUCCESS) {
-			netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
-				   set_complete->status);
-			ret = -EINVAL;
-		}
+	wait_for_completion(&request->wait_event);
+	set_complete = &request->response_msg.msg.set_complete;
+	if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+		netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
+			   set_complete->status);
+		ret = -EINVAL;
 	}
 
 cleanup:
@@ -795,8 +759,6 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
 	struct rndis_set_complete *set_complete;
 	u32 status;
 	int ret;
-	unsigned long t;
-	struct net_device *ndev = dev->ndev;
 
 	request = get_rndis_request(dev, RNDIS_MSG_SET,
 			RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
@@ -819,26 +781,14 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
 	if (ret != 0)
 		goto cleanup;
 
-	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+	wait_for_completion(&request->wait_event);
 
-	if (t == 0) {
-		netdev_err(ndev,
-			"timeout before we got a set response...\n");
-		ret = -ETIMEDOUT;
-		/*
-		 * We can't deallocate the request since we may still receive a
-		 * send completion for it.
-		 */
-		goto exit;
-	} else {
-		set_complete = &request->response_msg.msg.set_complete;
-		status = set_complete->status;
-	}
+	set_complete = &request->response_msg.msg.set_complete;
+	status = set_complete->status;
 
 cleanup:
 	if (request)
 		put_rndis_request(dev, request);
-exit:
 	return ret;
 }
 
@@ -850,9 +800,7 @@ static int rndis_filter_init_device(struct rndis_device *dev)
 	struct rndis_initialize_complete *init_complete;
 	u32 status;
 	int ret;
-	unsigned long t;
-	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-	struct netvsc_device *nvdev = net_device_ctx->nvdev;
+	struct netvsc_device *nvdev = net_device_to_netvsc_device(dev->ndev);
 
 	request = get_rndis_request(dev, RNDIS_MSG_INIT,
 			RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
@@ -875,12 +823,7 @@ static int rndis_filter_init_device(struct rndis_device *dev)
 		goto cleanup;
 	}
 
-	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
-
-	if (t == 0) {
-		ret = -ETIMEDOUT;
-		goto cleanup;
-	}
+	wait_for_completion(&request->wait_event);
 
 	init_complete = &request->response_msg.msg.init_complete;
 	status = init_complete->status;
@@ -977,8 +920,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 {
 	struct net_device *ndev =
 		hv_get_drvdata(new_sc->primary_channel->device_obj);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *nvscdev = net_device_ctx->nvdev;
+	struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
 	u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
 	int ret;
 	unsigned long flags;
@@ -1014,7 +956,6 @@ int rndis_filter_device_add(struct hv_device *dev,
 	struct netvsc_device_info *device_info = additional_info;
 	struct ndis_offload_params offloads;
 	struct nvsp_message *init_packet;
-	unsigned long t;
 	struct ndis_recv_scale_cap rsscap;
 	u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
 	u32 mtu, size;
@@ -1088,7 +1029,7 @@ int rndis_filter_device_add(struct hv_device *dev,
 	offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
 
 
-	ret = rndis_filter_set_offload_params(dev, &offloads);
+	ret = rndis_filter_set_offload_params(net, &offloads);
 	if (ret)
 		goto err_dev_remv;
 
@@ -1157,11 +1098,8 @@ int rndis_filter_device_add(struct hv_device *dev,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret)
 		goto out;
-	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
-	if (t == 0) {
-		ret = -ETIMEDOUT;
-		goto out;
-	}
+	wait_for_completion(&net_device->channel_init_wait);
+
 	if (init_packet->msg.v5_msg.subchn_comp.status !=
 	    NVSP_STAT_SUCCESS) {
 		ret = -ENODEV;
@@ -1196,21 +1134,14 @@ err_dev_remv:
 
 void rndis_filter_device_remove(struct hv_device *dev)
 {
-	struct net_device *ndev = hv_get_drvdata(dev);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *net_dev = net_device_ctx->nvdev;
+	struct netvsc_device *net_dev = hv_device_to_netvsc_device(dev);
 	struct rndis_device *rndis_dev = net_dev->extension;
-	unsigned long t;
 
 	/* If not all subchannel offers are complete, wait for them until
 	 * completion to avoid race.
 	 */
-	while (net_dev->num_sc_offered > 0) {
-		t = wait_for_completion_timeout(&net_dev->channel_init_wait,
-						10 * HZ);
-		if (t == 0)
-			WARN(1, "Netvsc: Waiting for sub-channel processing");
-	}
+	if (net_dev->num_sc_offered > 0)
+		wait_for_completion(&net_dev->channel_init_wait);
 
 	/* Halt and release the rndis device */
 	rndis_filter_halt_device(rndis_dev);
@@ -1222,27 +1153,19 @@ void rndis_filter_device_remove(struct hv_device *dev)
 }
 
 
-int rndis_filter_open(struct hv_device *dev)
+int rndis_filter_open(struct netvsc_device *nvdev)
 {
-	struct net_device *ndev = hv_get_drvdata(dev);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *net_device = net_device_ctx->nvdev;
-
-	if (!net_device)
+	if (!nvdev)
 		return -EINVAL;
 
-	if (atomic_inc_return(&net_device->open_cnt) != 1)
+	if (atomic_inc_return(&nvdev->open_cnt) != 1)
 		return 0;
 
-	return rndis_filter_open_device(net_device->extension);
+	return rndis_filter_open_device(nvdev->extension);
 }
 
-int rndis_filter_close(struct hv_device *dev)
+int rndis_filter_close(struct netvsc_device *nvdev)
 {
-	struct net_device *ndev = hv_get_drvdata(dev);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *nvdev = net_device_ctx->nvdev;
-
 	if (!nvdev)
 		return -EINVAL;
 
diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index 52c9051f3b95..1056ed142411 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -366,11 +366,7 @@ static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
 	struct atusb *atusb = hw->priv;
 	int ret;
 
-	/* This implicitly sets the CCA (Clear Channel Assessment) mode to 0,
-	 * "Mode 3a, Carrier sense OR energy above threshold".
-	 * We should probably make this configurable. @@@
-	 */
-	ret = atusb_write_reg(atusb, RG_PHY_CC_CCA, channel);
+	ret = atusb_write_subreg(atusb, SR_CHANNEL, channel);
 	if (ret < 0)
 		return ret;
 	msleep(1);	/* @@@ ugly synchronization */
diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c
index 860d4aed8274..0becf0ac3926 100644
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c
@@ -112,6 +112,12 @@ static void fakelb_hw_stop(struct ieee802154_hw *hw)
 	write_unlock_bh(&fakelb_ifup_phys_lock);
 }
 
+static int
+fakelb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+	return 0;
+}
+
 static const struct ieee802154_ops fakelb_ops = {
 	.owner = THIS_MODULE,
 	.xmit_async = fakelb_hw_xmit,
@@ -119,6 +125,7 @@ static const struct ieee802154_ops fakelb_ops = {
 	.set_channel = fakelb_hw_channel,
 	.start = fakelb_hw_start,
 	.stop = fakelb_hw_stop,
+	.set_promiscuous_mode = fakelb_set_promiscuous_mode,
 };
 
 /* Number of dummy devices to be set up by this module. */
@@ -174,6 +181,7 @@ static int fakelb_add_one(struct device *dev)
 	hw->phy->current_channel = 13;
 	phy->channel = hw->phy->current_channel;
 
+	hw->flags = IEEE802154_HW_PROMISCUOUS;
 	hw->parent = dev;
 
 	err = ieee802154_register_hw(hw);
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index f446db828561..7b131f8e4093 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -1054,6 +1054,8 @@ static irqreturn_t mrf24j40_isr(int irq, void *data)
 	disable_irq_nosync(irq);
 
 	devrec->irq_buf[0] = MRF24J40_READSHORT(REG_INTSTAT);
+	devrec->irq_buf[1] = 0;
+
 	/* Read the interrupt status */
 	ret = spi_async(devrec->spi, &devrec->irq_msg);
 	if (ret) {
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index d6d0524ee5fd..b5f9511d819e 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -254,6 +254,18 @@ acct:
 	}
 }
 
+static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
+{
+	bool xnet = true;
+
+	if (dev)
+		xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
+
+	skb_scrub_packet(skb, xnet);
+	if (dev)
+		skb->dev = dev;
+}
+
 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
 			    bool local)
 {
@@ -280,7 +292,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
 
 		*pskb = skb;
 	}
-	skb->dev = dev;
+	ipvlan_skb_crossing_ns(skb, dev);
 
 	if (local) {
 		skb->pkt_type = PACKET_HOST;
@@ -347,7 +359,7 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 	return addr;
 }
 
-static int ipvlan_process_v4_outbound(struct sk_buff *skb, bool xnet)
+static int ipvlan_process_v4_outbound(struct sk_buff *skb)
 {
 	const struct iphdr *ip4h = ip_hdr(skb);
 	struct net_device *dev = skb->dev;
@@ -370,7 +382,6 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb, bool xnet)
 		ip_rt_put(rt);
 		goto err;
 	}
-	skb_scrub_packet(skb, xnet);
 	skb_dst_set(skb, &rt->dst);
 	err = ip_local_out(net, skb->sk, skb);
 	if (unlikely(net_xmit_eval(err)))
@@ -385,7 +396,7 @@ out:
 	return ret;
 }
 
-static int ipvlan_process_v6_outbound(struct sk_buff *skb, bool xnet)
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
 {
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct net_device *dev = skb->dev;
@@ -408,7 +419,6 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb, bool xnet)
 		dst_release(dst);
 		goto err;
 	}
-	skb_scrub_packet(skb, xnet);
 	skb_dst_set(skb, dst);
 	err = ip6_local_out(net, skb->sk, skb);
 	if (unlikely(net_xmit_eval(err)))
@@ -423,7 +433,7 @@ out:
 	return ret;
 }
 
-static int ipvlan_process_outbound(struct sk_buff *skb, bool xnet)
+static int ipvlan_process_outbound(struct sk_buff *skb)
 {
 	struct ethhdr *ethh = eth_hdr(skb);
 	int ret = NET_XMIT_DROP;
@@ -447,9 +457,9 @@ static int ipvlan_process_outbound(struct sk_buff *skb, bool xnet)
 	}
 
 	if (skb->protocol == htons(ETH_P_IPV6))
-		ret = ipvlan_process_v6_outbound(skb, xnet);
+		ret = ipvlan_process_v6_outbound(skb);
 	else if (skb->protocol == htons(ETH_P_IP))
-		ret = ipvlan_process_v4_outbound(skb, xnet);
+		ret = ipvlan_process_v4_outbound(skb);
 	else {
 		pr_warn_ratelimited("Dropped outbound packet type=%x\n",
 				    ntohs(skb->protocol));
@@ -485,7 +495,6 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 	void *lyr3h;
 	struct ipvl_addr *addr;
 	int addr_type;
-	bool xnet;
 
 	lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
 	if (!lyr3h)
@@ -496,9 +505,8 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 		return ipvlan_rcv_frame(addr, &skb, true);
 
 out:
-	xnet = !net_eq(dev_net(skb->dev), dev_net(ipvlan->phy_dev));
-	skb->dev = ipvlan->phy_dev;
-	return ipvlan_process_outbound(skb, xnet);
+	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
+	return ipvlan_process_outbound(skb);
 }
 
 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
@@ -528,11 +536,12 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		return dev_forward_skb(ipvlan->phy_dev, skb);
 
 	} else if (is_multicast_ether_addr(eth->h_dest)) {
+		ipvlan_skb_crossing_ns(skb, NULL);
 		ipvlan_multicast_enqueue(ipvlan->port, skb);
 		return NET_XMIT_SUCCESS;
 	}
 
-	skb->dev = ipvlan->phy_dev;
+	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
 	return dev_queue_xmit(skb);
 }
 
@@ -622,8 +631,10 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 			 * when work-queue processes this frame. This is
 			 * achieved by returning RX_HANDLER_PASS.
 			 */
-			if (nskb)
+			if (nskb) {
+				ipvlan_skb_crossing_ns(nskb, NULL);
 				ipvlan_multicast_enqueue(port, nskb);
+			}
 		}
 	} else {
 		struct ipvl_addr *addr;
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 1c4d395fbd49..18b4e8c7f68a 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -80,13 +80,6 @@ static void ipvlan_port_destroy(struct net_device *dev)
 	kfree_rcu(port, rcu);
 }
 
-/* ipvlan network devices have devices nesting below it and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key ipvlan_netdev_xmit_lock_key;
-static struct lock_class_key ipvlan_netdev_addr_lock_key;
-
 #define IPVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
@@ -96,19 +89,6 @@ static struct lock_class_key ipvlan_netdev_addr_lock_key;
 #define IPVLAN_STATE_MASK \
 	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
 
-static void ipvlan_set_lockdep_class_one(struct net_device *dev,
-					 struct netdev_queue *txq,
-					 void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &ipvlan_netdev_xmit_lock_key);
-}
-
-static void ipvlan_set_lockdep_class(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &ipvlan_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, ipvlan_set_lockdep_class_one, NULL);
-}
-
 static int ipvlan_init(struct net_device *dev)
 {
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
@@ -123,7 +103,7 @@ static int ipvlan_init(struct net_device *dev)
 	dev->gso_max_segs = phy_dev->gso_max_segs;
 	dev->hard_header_len = phy_dev->hard_header_len;
 
-	ipvlan_set_lockdep_class(dev);
+	netdev_lockdep_set_classes(dev);
 
 	ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats);
 	if (!ipvlan->pcpu_stats)
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index a400288cb37b..6255973e3dda 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -169,10 +169,9 @@ static void loopback_setup(struct net_device *dev)
 	dev->flags		= IFF_LOOPBACK;
 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
 	netif_keep_dst(dev);
-	dev->hw_features	= NETIF_F_ALL_TSO | NETIF_F_UFO;
+	dev->hw_features	= NETIF_F_GSO_SOFTWARE;
 	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
-		| NETIF_F_ALL_TSO
-		| NETIF_F_UFO
+		| NETIF_F_GSO_SOFTWARE
 		| NETIF_F_HW_CSUM
 		| NETIF_F_RXCSUM
 		| NETIF_F_SCTP_CRC
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 47ee2c840b55..2d0beb1b801c 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -18,6 +18,7 @@
 #include <linux/rtnetlink.h>
 #include <net/genetlink.h>
 #include <net/sock.h>
+#include <net/gro_cells.h>
 
 #include <uapi/linux/if_macsec.h>
 
@@ -268,6 +269,7 @@ struct macsec_dev {
 	struct net_device *real_dev;
 	struct pcpu_secy_stats __percpu *stats;
 	struct list_head secys;
+	struct gro_cells gro_cells;
 };
 
 /**
@@ -508,7 +510,7 @@ static bool macsec_validate_skb(struct sk_buff *skb, u16 icv_len)
 }
 
 #define MACSEC_NEEDED_HEADROOM (macsec_extra_len(true))
-#define MACSEC_NEEDED_TAILROOM MACSEC_MAX_ICV_LEN
+#define MACSEC_NEEDED_TAILROOM MACSEC_STD_ICV_LEN
 
 static void macsec_fill_iv(unsigned char *iv, sci_t sci, u32 pn)
 {
@@ -605,12 +607,41 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err)
 	dev_put(dev);
 }
 
+static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
+					     unsigned char **iv,
+					     struct scatterlist **sg)
+{
+	size_t size, iv_offset, sg_offset;
+	struct aead_request *req;
+	void *tmp;
+
+	size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm);
+	iv_offset = size;
+	size += GCM_AES_IV_LEN;
+
+	size = ALIGN(size, __alignof__(struct scatterlist));
+	sg_offset = size;
+	size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1);
+
+	tmp = kmalloc(size, GFP_ATOMIC);
+	if (!tmp)
+		return NULL;
+
+	*iv = (unsigned char *)(tmp + iv_offset);
+	*sg = (struct scatterlist *)(tmp + sg_offset);
+	req = tmp;
+
+	aead_request_set_tfm(req, tfm);
+
+	return req;
+}
+
 static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	int ret;
-	struct scatterlist sg[MAX_SKB_FRAGS + 1];
-	unsigned char iv[GCM_AES_IV_LEN];
+	struct scatterlist *sg;
+	unsigned char *iv;
 	struct ethhdr *eth;
 	struct macsec_eth_header *hh;
 	size_t unprotected_len;
@@ -668,8 +699,6 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 	macsec_fill_sectag(hh, secy, pn);
 	macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN);
 
-	macsec_fill_iv(iv, secy->sci, pn);
-
 	skb_put(skb, secy->icv_len);
 
 	if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) {
@@ -684,13 +713,15 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 		return ERR_PTR(-EINVAL);
 	}
 
-	req = aead_request_alloc(tx_sa->key.tfm, GFP_ATOMIC);
+	req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg);
 	if (!req) {
 		macsec_txsa_put(tx_sa);
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	macsec_fill_iv(iv, secy->sci, pn);
+
 	sg_init_table(sg, MAX_SKB_FRAGS + 1);
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
@@ -850,7 +881,7 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err)
 	macsec_reset_skb(skb, macsec->secy.netdev);
 
 	len = skb->len;
-	ret = netif_rx(skb);
+	ret = gro_cells_receive(&macsec->gro_cells, skb);
 	if (ret == NET_RX_SUCCESS)
 		count_rx(dev, len);
 	else
@@ -861,7 +892,6 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err)
 out:
 	macsec_rxsa_put(rx_sa);
 	dev_put(dev);
-	return;
 }
 
 static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
@@ -871,8 +901,8 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 				      struct macsec_secy *secy)
 {
 	int ret;
-	struct scatterlist sg[MAX_SKB_FRAGS + 1];
-	unsigned char iv[GCM_AES_IV_LEN];
+	struct scatterlist *sg;
+	unsigned char *iv;
 	struct aead_request *req;
 	struct macsec_eth_header *hdr;
 	u16 icv_len = secy->icv_len;
@@ -882,7 +912,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	req = aead_request_alloc(rx_sa->key.tfm, GFP_ATOMIC);
+	req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg);
 	if (!req) {
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
@@ -914,7 +944,6 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	}
 
 	macsec_skb_cb(skb)->req = req;
-	macsec_skb_cb(skb)->rx_sa = rx_sa;
 	skb->dev = dev;
 	aead_request_set_callback(req, 0, macsec_decrypt_done, skb);
 
@@ -1024,6 +1053,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
 	struct pcpu_rx_sc_stats *rxsc_stats;
 	struct pcpu_secy_stats *secy_stats;
 	bool pulled_sci;
+	int ret;
 
 	if (skb_headroom(skb) < ETH_HLEN)
 		goto drop_direct;
@@ -1141,6 +1171,8 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
 		}
 	}
 
+	macsec_skb_cb(skb)->rx_sa = rx_sa;
+
 	/* Disabled && !changed text => skip validation */
 	if (hdr->tci_an & MACSEC_TCI_C ||
 	    secy->validate_frames != MACSEC_VALIDATE_DISABLED)
@@ -1165,12 +1197,17 @@ deliver:
 
 	if (rx_sa)
 		macsec_rxsa_put(rx_sa);
-	count_rx(dev, skb->len);
+
+	ret = gro_cells_receive(&macsec->gro_cells, skb);
+	if (ret == NET_RX_SUCCESS)
+		count_rx(dev, skb->len);
+	else
+		macsec->secy.netdev->stats.rx_dropped++;
 
 	rcu_read_unlock();
 
-	*pskb = skb;
-	return RX_HANDLER_ANOTHER;
+	*pskb = NULL;
+	return RX_HANDLER_CONSUMED;
 
 drop:
 	macsec_rxsa_put(rx_sa);
@@ -1190,7 +1227,6 @@ nosci:
 
 	list_for_each_entry_rcu(macsec, &rxd->secys, secys) {
 		struct sk_buff *nskb;
-		int ret;
 
 		secy_stats = this_cpu_ptr(macsec->stats);
 
@@ -1234,23 +1270,23 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
 	struct crypto_aead *tfm;
 	int ret;
 
-	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
-	if (!tfm || IS_ERR(tfm))
-		return NULL;
+	tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+
+	if (IS_ERR(tfm))
+		return tfm;
 
 	ret = crypto_aead_setkey(tfm, key, key_len);
-	if (ret < 0) {
-		crypto_free_aead(tfm);
-		return NULL;
-	}
+	if (ret < 0)
+		goto fail;
 
 	ret = crypto_aead_setauthsize(tfm, icv_len);
-	if (ret < 0) {
-		crypto_free_aead(tfm);
-		return NULL;
-	}
+	if (ret < 0)
+		goto fail;
 
 	return tfm;
+fail:
+	crypto_free_aead(tfm);
+	return ERR_PTR(ret);
 }
 
 static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len,
@@ -1258,12 +1294,12 @@ static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len,
 {
 	rx_sa->stats = alloc_percpu(struct macsec_rx_sa_stats);
 	if (!rx_sa->stats)
-		return -1;
+		return -ENOMEM;
 
 	rx_sa->key.tfm = macsec_alloc_tfm(sak, key_len, icv_len);
-	if (!rx_sa->key.tfm) {
+	if (IS_ERR(rx_sa->key.tfm)) {
 		free_percpu(rx_sa->stats);
-		return -1;
+		return PTR_ERR(rx_sa->key.tfm);
 	}
 
 	rx_sa->active = false;
@@ -1356,12 +1392,12 @@ static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len,
 {
 	tx_sa->stats = alloc_percpu(struct macsec_tx_sa_stats);
 	if (!tx_sa->stats)
-		return -1;
+		return -ENOMEM;
 
 	tx_sa->key.tfm = macsec_alloc_tfm(sak, key_len, icv_len);
-	if (!tx_sa->key.tfm) {
+	if (IS_ERR(tx_sa->key.tfm)) {
 		free_percpu(tx_sa->stats);
-		return -1;
+		return PTR_ERR(tx_sa->key.tfm);
 	}
 
 	tx_sa->active = false;
@@ -1594,6 +1630,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
 	unsigned char assoc_num;
 	struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1];
 	struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1];
+	int err;
 
 	if (!attrs[MACSEC_ATTR_IFINDEX])
 		return -EINVAL;
@@ -1630,13 +1667,19 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL);
-	if (!rx_sa || init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
-				 secy->key_len, secy->icv_len)) {
-		kfree(rx_sa);
+	if (!rx_sa) {
 		rtnl_unlock();
 		return -ENOMEM;
 	}
 
+	err = init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
+			 secy->key_len, secy->icv_len);
+	if (err < 0) {
+		kfree(rx_sa);
+		rtnl_unlock();
+		return err;
+	}
+
 	if (tb_sa[MACSEC_SA_ATTR_PN]) {
 		spin_lock_bh(&rx_sa->lock);
 		rx_sa->next_pn = nla_get_u32(tb_sa[MACSEC_SA_ATTR_PN]);
@@ -1742,6 +1785,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
 	struct macsec_tx_sa *tx_sa;
 	unsigned char assoc_num;
 	struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1];
+	int err;
 
 	if (!attrs[MACSEC_ATTR_IFINDEX])
 		return -EINVAL;
@@ -1778,13 +1822,19 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	tx_sa = kmalloc(sizeof(*tx_sa), GFP_KERNEL);
-	if (!tx_sa || init_tx_sa(tx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
-				 secy->key_len, secy->icv_len)) {
-		kfree(tx_sa);
+	if (!tx_sa) {
 		rtnl_unlock();
 		return -ENOMEM;
 	}
 
+	err = init_tx_sa(tx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
+			 secy->key_len, secy->icv_len);
+	if (err < 0) {
+		kfree(tx_sa);
+		rtnl_unlock();
+		return err;
+	}
+
 	nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN);
 
 	spin_lock_bh(&tx_sa->lock);
@@ -2612,6 +2662,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 		u64_stats_update_begin(&secy_stats->syncp);
 		secy_stats->stats.OutPktsUntagged++;
 		u64_stats_update_end(&secy_stats->syncp);
+		skb->dev = macsec->real_dev;
 		len = skb->len;
 		ret = dev_queue_xmit(skb);
 		count_tx(dev, ret, len);
@@ -2646,11 +2697,18 @@ static int macsec_dev_init(struct net_device *dev)
 {
 	struct macsec_dev *macsec = macsec_priv(dev);
 	struct net_device *real_dev = macsec->real_dev;
+	int err;
 
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	err = gro_cells_init(&macsec->gro_cells, dev);
+	if (err) {
+		free_percpu(dev->tstats);
+		return err;
+	}
+
 	dev->features = real_dev->features & MACSEC_FEATURES;
 	dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE;
 
@@ -2669,6 +2727,9 @@ static int macsec_dev_init(struct net_device *dev)
 
 static void macsec_dev_uninit(struct net_device *dev)
 {
+	struct macsec_dev *macsec = macsec_priv(dev);
+
+	gro_cells_destroy(&macsec->gro_cells);
 	free_percpu(dev->tstats);
 }
 
@@ -2678,8 +2739,9 @@ static netdev_features_t macsec_fix_features(struct net_device *dev,
 	struct macsec_dev *macsec = macsec_priv(dev);
 	struct net_device *real_dev = macsec->real_dev;
 
-	features &= real_dev->features & MACSEC_FEATURES;
-	features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE;
+	features &= (real_dev->features & MACSEC_FEATURES) |
+		    NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES;
+	features |= NETIF_F_LLTX;
 
 	return features;
 }
@@ -3163,14 +3225,26 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[])
 	if (data[IFLA_MACSEC_CIPHER_SUITE])
 		csid = nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE]);
 
-	if (data[IFLA_MACSEC_ICV_LEN])
+	if (data[IFLA_MACSEC_ICV_LEN]) {
 		icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]);
+		if (icv_len != DEFAULT_ICV_LEN) {
+			char dummy_key[DEFAULT_SAK_LEN] = { 0 };
+			struct crypto_aead *dummy_tfm;
+
+			dummy_tfm = macsec_alloc_tfm(dummy_key,
+						     DEFAULT_SAK_LEN,
+						     icv_len);
+			if (IS_ERR(dummy_tfm))
+				return PTR_ERR(dummy_tfm);
+			crypto_free_aead(dummy_tfm);
+		}
+	}
 
 	switch (csid) {
 	case MACSEC_DEFAULT_CIPHER_ID:
 	case MACSEC_DEFAULT_CIPHER_ALT:
 		if (icv_len < MACSEC_MIN_ICV_LEN ||
-		    icv_len > MACSEC_MAX_ICV_LEN)
+		    icv_len > MACSEC_STD_ICV_LEN)
 			return -EINVAL;
 		break;
 	default:
@@ -3361,6 +3435,7 @@ static void __exit macsec_exit(void)
 	genl_unregister_family(&macsec_fam);
 	rtnl_link_unregister(&macsec_link_ops);
 	unregister_netdevice_notifier(&macsec_notifier);
+	rcu_barrier();
 }
 
 module_init(macsec_init);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cb01023eab41..cd9b53834bf6 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -49,6 +49,7 @@ struct macvlan_port {
 	bool 			passthru;
 	int			count;
 	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
+	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
 };
 
 struct macvlan_source_entry {
@@ -305,11 +306,14 @@ static void macvlan_process_broadcast(struct work_struct *w)
 
 		rcu_read_unlock();
 
+		if (src)
+			dev_put(src->dev);
 		kfree_skb(skb);
 	}
 }
 
 static void macvlan_broadcast_enqueue(struct macvlan_port *port,
+				      const struct macvlan_dev *src,
 				      struct sk_buff *skb)
 {
 	struct sk_buff *nskb;
@@ -319,8 +323,12 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port,
 	if (!nskb)
 		goto err;
 
+	MACVLAN_SKB_CB(nskb)->src = src;
+
 	spin_lock(&port->bc_queue.lock);
 	if (skb_queue_len(&port->bc_queue) < MACVLAN_BC_QUEUE_LEN) {
+		if (src)
+			dev_hold(src->dev);
 		__skb_queue_tail(&port->bc_queue, nskb);
 		err = 0;
 	}
@@ -412,6 +420,8 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 
 	port = macvlan_port_get_rcu(skb->dev);
 	if (is_multicast_ether_addr(eth->h_dest)) {
+		unsigned int hash;
+
 		skb = ip_check_defrag(dev_net(skb->dev), skb, IP_DEFRAG_MACVLAN);
 		if (!skb)
 			return RX_HANDLER_CONSUMED;
@@ -429,8 +439,9 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 			goto out;
 		}
 
-		MACVLAN_SKB_CB(skb)->src = src;
-		macvlan_broadcast_enqueue(port, skb);
+		hash = mc_hash(NULL, eth->h_dest);
+		if (test_bit(hash, port->mc_filter))
+			macvlan_broadcast_enqueue(port, src, skb);
 
 		return RX_HANDLER_PASS;
 	}
@@ -716,12 +727,12 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change)
 	}
 }
 
-static void macvlan_set_mac_lists(struct net_device *dev)
+static void macvlan_compute_filter(unsigned long *mc_filter,
+				   struct net_device *dev,
+				   struct macvlan_dev *vlan)
 {
-	struct macvlan_dev *vlan = netdev_priv(dev);
-
 	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
-		bitmap_fill(vlan->mc_filter, MACVLAN_MC_FILTER_SZ);
+		bitmap_fill(mc_filter, MACVLAN_MC_FILTER_SZ);
 	} else {
 		struct netdev_hw_addr *ha;
 		DECLARE_BITMAP(filter, MACVLAN_MC_FILTER_SZ);
@@ -733,10 +744,33 @@ static void macvlan_set_mac_lists(struct net_device *dev)
 
 		__set_bit(mc_hash(vlan, dev->broadcast), filter);
 
-		bitmap_copy(vlan->mc_filter, filter, MACVLAN_MC_FILTER_SZ);
+		bitmap_copy(mc_filter, filter, MACVLAN_MC_FILTER_SZ);
 	}
+}
+
+static void macvlan_set_mac_lists(struct net_device *dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	macvlan_compute_filter(vlan->mc_filter, dev, vlan);
+
 	dev_uc_sync(vlan->lowerdev, dev);
 	dev_mc_sync(vlan->lowerdev, dev);
+
+	/* This is slightly inaccurate as we're including the subscription
+	 * list of vlan->lowerdev too.
+	 *
+	 * Bug alert: This only works if everyone has the same broadcast
+	 * address as lowerdev.  As soon as someone changes theirs this
+	 * will break.
+	 *
+	 * However, this is already broken as when you change your broadcast
+	 * address we don't get called.
+	 *
+	 * The solution is to maintain a list of broadcast addresses like
+	 * we do for uc/mc, if you care.
+	 */
+	macvlan_compute_filter(vlan->port->mc_filter, vlan->lowerdev, NULL);
 }
 
 static int macvlan_change_mtu(struct net_device *dev, int new_mtu)
@@ -754,7 +788,6 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu)
  * "super class" of normal network devices; split their locks off into a
  * separate class since they always nest.
  */
-static struct lock_class_key macvlan_netdev_xmit_lock_key;
 static struct lock_class_key macvlan_netdev_addr_lock_key;
 
 #define ALWAYS_ON_FEATURES \
@@ -775,20 +808,12 @@ static int macvlan_get_nest_level(struct net_device *dev)
 	return ((struct macvlan_dev *)netdev_priv(dev))->nest_level;
 }
 
-static void macvlan_set_lockdep_class_one(struct net_device *dev,
-					  struct netdev_queue *txq,
-					  void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock,
-			  &macvlan_netdev_xmit_lock_key);
-}
-
 static void macvlan_set_lockdep_class(struct net_device *dev)
 {
+	netdev_lockdep_set_classes(dev);
 	lockdep_set_class_and_subclass(&dev->addr_list_lock,
 				       &macvlan_netdev_addr_lock_key,
 				       macvlan_get_nest_level(dev));
-	netdev_for_each_tx_queue(dev, macvlan_set_lockdep_class_one, NULL);
 }
 
 static int macvlan_init(struct net_device *dev)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index bd6720962b1f..a38c0dac514b 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -21,6 +21,7 @@
 #include <net/rtnetlink.h>
 #include <net/sock.h>
 #include <linux/virtio_net.h>
+#include <linux/skb_array.h>
 
 /*
  * A macvtap queue is the central object of this driver, it connects
@@ -43,6 +44,7 @@ struct macvtap_queue {
 	u16 queue_index;
 	bool enabled;
 	struct list_head next;
+	struct skb_array skb_array;
 };
 
 #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
@@ -273,6 +275,7 @@ static void macvtap_put_queue(struct macvtap_queue *q)
 	rtnl_unlock();
 
 	synchronize_rcu();
+	skb_array_cleanup(&q->skb_array);
 	sock_put(&q->sk);
 }
 
@@ -299,6 +302,9 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
 	if (!numvtaps)
 		goto out;
 
+	if (numvtaps == 1)
+		goto single;
+
 	/* Check if we can use flow to select a queue */
 	rxq = skb_get_hash(skb);
 	if (rxq) {
@@ -316,6 +322,7 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
 		goto out;
 	}
 
+single:
 	tap = rcu_dereference(vlan->taps[0]);
 out:
 	return tap;
@@ -362,7 +369,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 	if (!q)
 		return RX_HANDLER_PASS;
 
-	if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)
+	if (__skb_array_full(&q->skb_array))
 		goto drop;
 
 	skb_push(skb, ETH_HLEN);
@@ -380,7 +387,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 			goto drop;
 
 		if (!segs) {
-			skb_queue_tail(&q->sk.sk_receive_queue, skb);
+			if (skb_array_produce(&q->skb_array, skb))
+				goto drop;
 			goto wake_up;
 		}
 
@@ -389,7 +397,11 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 			struct sk_buff *nskb = segs->next;
 
 			segs->next = NULL;
-			skb_queue_tail(&q->sk.sk_receive_queue, segs);
+			if (skb_array_produce(&q->skb_array, segs)) {
+				kfree_skb(segs);
+				kfree_skb_list(nskb);
+				break;
+			}
 			segs = nskb;
 		}
 	} else {
@@ -402,7 +414,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 		    !(features & NETIF_F_CSUM_MASK) &&
 		    skb_checksum_help(skb))
 			goto drop;
-		skb_queue_tail(&q->sk.sk_receive_queue, skb);
+		if (skb_array_produce(&q->skb_array, skb))
+			goto drop;
 	}
 
 wake_up:
@@ -519,7 +532,11 @@ static void macvtap_sock_write_space(struct sock *sk)
 
 static void macvtap_sock_destruct(struct sock *sk)
 {
-	skb_queue_purge(&sk->sk_receive_queue);
+	struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
+	struct sk_buff *skb;
+
+	while ((skb = skb_array_consume(&q->skb_array)) != NULL)
+		kfree_skb(skb);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
@@ -532,13 +549,13 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	rtnl_lock();
 	dev = dev_get_by_macvtap_minor(iminor(inode));
 	if (!dev)
-		goto out;
+		goto err;
 
 	err = -ENOMEM;
 	q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
 					     &macvtap_proto, 0);
 	if (!q)
-		goto out;
+		goto err;
 
 	RCU_INIT_POINTER(q->sock.wq, &q->wq);
 	init_waitqueue_head(&q->wq.wait);
@@ -562,11 +579,24 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG))
 		sock_set_flag(&q->sk, SOCK_ZEROCOPY);
 
+	err = -ENOMEM;
+	if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL))
+		goto err_array;
+
 	err = macvtap_set_queue(dev, file, q);
 	if (err)
-		sock_put(&q->sk);
+		goto err_queue;
 
-out:
+	dev_put(dev);
+
+	rtnl_unlock();
+	return err;
+
+err_queue:
+	skb_array_cleanup(&q->skb_array);
+err_array:
+	sock_put(&q->sk);
+err:
 	if (dev)
 		dev_put(dev);
 
@@ -592,7 +622,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
 	mask = 0;
 	poll_wait(file, &q->wq.wait, wait);
 
-	if (!skb_queue_empty(&q->sk.sk_receive_queue))
+	if (!skb_array_empty(&q->skb_array))
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(&q->sk) ||
@@ -627,93 +657,6 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
 	return skb;
 }
 
-/*
- * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should
- * be shared with the tun/tap driver.
- */
-static int macvtap_skb_from_vnet_hdr(struct macvtap_queue *q,
-				     struct sk_buff *skb,
-				     struct virtio_net_hdr *vnet_hdr)
-{
-	unsigned short gso_type = 0;
-	if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
-		case VIRTIO_NET_HDR_GSO_TCPV4:
-			gso_type = SKB_GSO_TCPV4;
-			break;
-		case VIRTIO_NET_HDR_GSO_TCPV6:
-			gso_type = SKB_GSO_TCPV6;
-			break;
-		case VIRTIO_NET_HDR_GSO_UDP:
-			gso_type = SKB_GSO_UDP;
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			gso_type |= SKB_GSO_TCP_ECN;
-
-		if (vnet_hdr->gso_size == 0)
-			return -EINVAL;
-	}
-
-	if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-		if (!skb_partial_csum_set(skb, macvtap16_to_cpu(q, vnet_hdr->csum_start),
-					  macvtap16_to_cpu(q, vnet_hdr->csum_offset)))
-			return -EINVAL;
-	}
-
-	if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		skb_shinfo(skb)->gso_size = macvtap16_to_cpu(q, vnet_hdr->gso_size);
-		skb_shinfo(skb)->gso_type = gso_type;
-
-		/* Header must be checked, and gso_segs computed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
-	}
-	return 0;
-}
-
-static void macvtap_skb_to_vnet_hdr(struct macvtap_queue *q,
-				    const struct sk_buff *skb,
-				    struct virtio_net_hdr *vnet_hdr)
-{
-	memset(vnet_hdr, 0, sizeof(*vnet_hdr));
-
-	if (skb_is_gso(skb)) {
-		struct skb_shared_info *sinfo = skb_shinfo(skb);
-
-		/* This is a hint as to how much should be linear. */
-		vnet_hdr->hdr_len = cpu_to_macvtap16(q, skb_headlen(skb));
-		vnet_hdr->gso_size = cpu_to_macvtap16(q, sinfo->gso_size);
-		if (sinfo->gso_type & SKB_GSO_TCPV4)
-			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-		else if (sinfo->gso_type & SKB_GSO_TCPV6)
-			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-		else if (sinfo->gso_type & SKB_GSO_UDP)
-			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
-		else
-			BUG();
-		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
-			vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
-	} else
-		vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		if (skb_vlan_tag_present(skb))
-			vnet_hdr->csum_start = cpu_to_macvtap16(q,
-				skb_checksum_start_offset(skb) + VLAN_HLEN);
-		else
-			vnet_hdr->csum_start = cpu_to_macvtap16(q,
-				skb_checksum_start_offset(skb));
-		vnet_hdr->csum_offset = cpu_to_macvtap16(q, skb->csum_offset);
-	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
-	} /* else everything is zero */
-}
-
 /* Neighbour code has some assumptions on HH_DATA_MOD alignment */
 #define MACVTAP_RESERVE HH_DATA_OFF(ETH_HLEN)
 
@@ -812,7 +755,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 	skb->protocol = eth_hdr(skb)->h_proto;
 
 	if (vnet_hdr_len) {
-		err = macvtap_skb_from_vnet_hdr(q, skb, &vnet_hdr);
+		err = virtio_net_hdr_to_skb(skb, &vnet_hdr,
+					    macvtap_is_little_endian(q));
 		if (err)
 			goto err_kfree;
 	}
@@ -880,7 +824,10 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
 		if (iov_iter_count(iter) < vnet_hdr_len)
 			return -EINVAL;
 
-		macvtap_skb_to_vnet_hdr(q, skb, &vnet_hdr);
+		ret = virtio_net_hdr_from_skb(skb, &vnet_hdr,
+					      macvtap_is_little_endian(q));
+		if (ret)
+			BUG();
 
 		if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) !=
 		    sizeof(vnet_hdr))
@@ -935,7 +882,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q,
 					TASK_INTERRUPTIBLE);
 
 		/* Read frames from the queue */
-		skb = skb_dequeue(&q->sk.sk_receive_queue);
+		skb = skb_array_consume(&q->skb_array);
 		if (skb)
 			break;
 		if (noblock) {
@@ -1259,10 +1206,18 @@ static int macvtap_recvmsg(struct socket *sock, struct msghdr *m,
 	return ret;
 }
 
+static int macvtap_peek_len(struct socket *sock)
+{
+	struct macvtap_queue *q = container_of(sock, struct macvtap_queue,
+					       sock);
+	return skb_array_peek_len(&q->skb_array);
+}
+
 /* Ops structure to mimic raw sockets with tun */
 static const struct proto_ops macvtap_socket_ops = {
 	.sendmsg = macvtap_sendmsg,
 	.recvmsg = macvtap_recvmsg,
+	.peek_len = macvtap_peek_len,
 };
 
 /* Get an underlying socket object from tun file.  Returns error unless file is
@@ -1281,6 +1236,28 @@ struct socket *macvtap_get_socket(struct file *file)
 }
 EXPORT_SYMBOL_GPL(macvtap_get_socket);
 
+static int macvtap_queue_resize(struct macvlan_dev *vlan)
+{
+	struct net_device *dev = vlan->dev;
+	struct macvtap_queue *q;
+	struct skb_array **arrays;
+	int n = vlan->numqueues;
+	int ret, i = 0;
+
+	arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
+	if (!arrays)
+		return -ENOMEM;
+
+	list_for_each_entry(q, &vlan->queue_list, next)
+		arrays[i++] = &q->skb_array;
+
+	ret = skb_array_resize_multiple(arrays, n,
+					dev->tx_queue_len, GFP_KERNEL);
+
+	kfree(arrays);
+	return ret;
+}
+
 static int macvtap_device_event(struct notifier_block *unused,
 				unsigned long event, void *ptr)
 {
@@ -1328,6 +1305,10 @@ static int macvtap_device_event(struct notifier_block *unused,
 		device_destroy(&macvtap_class, devt);
 		macvtap_free_minor(vlan);
 		break;
+	case NETDEV_CHANGE_TX_QUEUE_LEN:
+		if (macvtap_queue_resize(vlan))
+			return NOTIFY_BAD;
+		break;
 	}
 
 	return NOTIFY_DONE;
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 6dad9a9c356c..47a64342cc16 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -12,6 +12,9 @@ menuconfig PHYLIB
 
 if PHYLIB
 
+config SWPHY
+	bool
+
 comment "MII PHY device drivers"
 
 config AQUANTIA_PHY
@@ -159,6 +162,7 @@ config MICROCHIP_PHY
 config FIXED_PHY
 	tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
 	depends on PHYLIB
+	select SWPHY
 	---help---
 	  Adds the platform "fixed" MDIO Bus to cover the boards that use
 	  PHYs that are not connected to the real MDIO bus.
@@ -254,6 +258,17 @@ config MDIO_BUS_MUX_MMIOREG
 
 	  Currently, only 8-bit registers are supported.
 
+config MDIO_BUS_MUX_BCM_IPROC
+	tristate "Support for iProc based MDIO bus multiplexers"
+	depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
+	select MDIO_BUS_MUX
+	default ARCH_BCM_IPROC
+	help
+	  This module provides a driver for MDIO bus multiplexers found in
+	  iProc based Broadcom SoCs. This multiplexer connects one of several
+	  child MDIO bus to a parent bus. Buses could be internal as well as
+	  external and selection logic lies inside the same multiplexer.
+
 config MDIO_BCM_UNIMAC
 	tristate "Broadcom UniMAC MDIO bus controller"
 	depends on HAS_IOMEM
@@ -271,6 +286,27 @@ config MDIO_BCM_IPROC
 	  This module provides a driver for the MDIO busses found in the
 	  Broadcom iProc SoC's.
 
+config INTEL_XWAY_PHY
+	tristate "Driver for Intel XWAY PHYs"
+	---help---
+	  Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs.
+	  These PHYs are marked as standalone chips under the names
+	  PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel
+	  SoCs xRX200, xRX300, xRX330, xRX350 and xRX550.
+
+config MDIO_HISI_FEMAC
+	tristate "Hisilicon FEMAC MDIO bus controller"
+	depends on HAS_IOMEM && OF_MDIO
+	help
+	  This module provides a driver for the MDIO busses found in the
+	  Hisilicon SoC that have an Fast Ethernet MAC.
+
+config MDIO_XGENE
+	tristate "APM X-Gene SoC MDIO bus controller"
+	help
+	  This module provides a driver for the MDIO busses found in the
+	  APM X-Gene SoC's.
+
 endif # PHYLIB
 
 config MICREL_KS8995MA
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index fcdbb9299fab..534dfa74d5a2 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -1,6 +1,7 @@
 # Makefile for Linux PHY drivers
 
-libphy-objs			:= phy.o phy_device.o mdio_bus.o mdio_device.o
+libphy-y			:= phy.o phy_device.o mdio_bus.o mdio_device.o
+libphy-$(CONFIG_SWPHY)		+= swphy.o
 
 obj-$(CONFIG_PHYLIB)		+= libphy.o
 obj-$(CONFIG_AQUANTIA_PHY)	+= aquantia.o
@@ -39,8 +40,12 @@ obj-$(CONFIG_AMD_PHY)		+= amd.o
 obj-$(CONFIG_MDIO_BUS_MUX)	+= mdio-mux.o
 obj-$(CONFIG_MDIO_BUS_MUX_GPIO)	+= mdio-mux-gpio.o
 obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
+obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC)	+= mdio-mux-bcm-iproc.o
 obj-$(CONFIG_MDIO_SUN4I)	+= mdio-sun4i.o
 obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
 obj-$(CONFIG_MDIO_BCM_UNIMAC)	+= mdio-bcm-unimac.o
 obj-$(CONFIG_MICROCHIP_PHY)	+= microchip.o
 obj-$(CONFIG_MDIO_BCM_IPROC)	+= mdio-bcm-iproc.o
+obj-$(CONFIG_INTEL_XWAY_PHY)	+= intel-xway.o
+obj-$(CONFIG_MDIO_HISI_FEMAC)	+= mdio-hisi-femac.o
+obj-$(CONFIG_MDIO_XGENE)	+= mdio-xgene.o
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 2afa61b51d41..91177a4a32ad 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -57,6 +57,7 @@
 
 /* PHY CTRL bits */
 #define DP83867_PHYCR_FIFO_DEPTH_SHIFT		14
+#define DP83867_PHYCR_FIFO_DEPTH_MASK		(3 << 14)
 
 /* RGMIIDCTL bits */
 #define DP83867_RGMII_TX_CLK_DELAY_SHIFT	4
@@ -133,8 +134,8 @@ static int dp83867_of_init(struct phy_device *phydev)
 static int dp83867_config_init(struct phy_device *phydev)
 {
 	struct dp83867_private *dp83867;
-	int ret;
-	u16 val, delay;
+	int ret, val;
+	u16 delay;
 
 	if (!phydev->priv) {
 		dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867),
@@ -151,8 +152,12 @@ static int dp83867_config_init(struct phy_device *phydev)
 	}
 
 	if (phy_interface_is_rgmii(phydev)) {
-		ret = phy_write(phydev, MII_DP83867_PHYCTRL,
-			(dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT));
+		val = phy_read(phydev, MII_DP83867_PHYCTRL);
+		if (val < 0)
+			return val;
+		val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK;
+		val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT);
+		ret = phy_write(phydev, MII_DP83867_PHYCTRL, val);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 2d2e4339f0df..c649c101bbab 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -23,8 +23,10 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/gpio.h>
+#include <linux/seqlock.h>
+#include <linux/idr.h>
 
-#define MII_REGS_NUM 29
+#include "swphy.h"
 
 struct fixed_mdio_bus {
 	struct mii_bus *mii_bus;
@@ -33,8 +35,8 @@ struct fixed_mdio_bus {
 
 struct fixed_phy {
 	int addr;
-	u16 regs[MII_REGS_NUM];
 	struct phy_device *phydev;
+	seqcount_t seqcount;
 	struct fixed_phy_status status;
 	int (*link_update)(struct net_device *, struct fixed_phy_status *);
 	struct list_head node;
@@ -46,103 +48,10 @@ static struct fixed_mdio_bus platform_fmb = {
 	.phys = LIST_HEAD_INIT(platform_fmb.phys),
 };
 
-static int fixed_phy_update_regs(struct fixed_phy *fp)
+static void fixed_phy_update(struct fixed_phy *fp)
 {
-	u16 bmsr = BMSR_ANEGCAPABLE;
-	u16 bmcr = 0;
-	u16 lpagb = 0;
-	u16 lpa = 0;
-
 	if (gpio_is_valid(fp->link_gpio))
 		fp->status.link = !!gpio_get_value_cansleep(fp->link_gpio);
-
-	if (fp->status.duplex) {
-		switch (fp->status.speed) {
-		case 1000:
-			bmsr |= BMSR_ESTATEN;
-			break;
-		case 100:
-			bmsr |= BMSR_100FULL;
-			break;
-		case 10:
-			bmsr |= BMSR_10FULL;
-			break;
-		default:
-			break;
-		}
-	} else {
-		switch (fp->status.speed) {
-		case 1000:
-			bmsr |= BMSR_ESTATEN;
-			break;
-		case 100:
-			bmsr |= BMSR_100HALF;
-			break;
-		case 10:
-			bmsr |= BMSR_10HALF;
-			break;
-		default:
-			break;
-		}
-	}
-
-	if (fp->status.link) {
-		bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE;
-
-		if (fp->status.duplex) {
-			bmcr |= BMCR_FULLDPLX;
-
-			switch (fp->status.speed) {
-			case 1000:
-				bmcr |= BMCR_SPEED1000;
-				lpagb |= LPA_1000FULL;
-				break;
-			case 100:
-				bmcr |= BMCR_SPEED100;
-				lpa |= LPA_100FULL;
-				break;
-			case 10:
-				lpa |= LPA_10FULL;
-				break;
-			default:
-				pr_warn("fixed phy: unknown speed\n");
-				return -EINVAL;
-			}
-		} else {
-			switch (fp->status.speed) {
-			case 1000:
-				bmcr |= BMCR_SPEED1000;
-				lpagb |= LPA_1000HALF;
-				break;
-			case 100:
-				bmcr |= BMCR_SPEED100;
-				lpa |= LPA_100HALF;
-				break;
-			case 10:
-				lpa |= LPA_10HALF;
-				break;
-			default:
-				pr_warn("fixed phy: unknown speed\n");
-			return -EINVAL;
-			}
-		}
-
-		if (fp->status.pause)
-			lpa |= LPA_PAUSE_CAP;
-
-		if (fp->status.asym_pause)
-			lpa |= LPA_PAUSE_ASYM;
-	}
-
-	fp->regs[MII_PHYSID1] = 0;
-	fp->regs[MII_PHYSID2] = 0;
-
-	fp->regs[MII_BMSR] = bmsr;
-	fp->regs[MII_BMCR] = bmcr;
-	fp->regs[MII_LPA] = lpa;
-	fp->regs[MII_STAT1000] = lpagb;
-
-	return 0;
 }
 
 static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
@@ -150,29 +59,23 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
 	struct fixed_mdio_bus *fmb = bus->priv;
 	struct fixed_phy *fp;
 
-	if (reg_num >= MII_REGS_NUM)
-		return -1;
-
-	/* We do not support emulating Clause 45 over Clause 22 register reads
-	 * return an error instead of bogus data.
-	 */
-	switch (reg_num) {
-	case MII_MMD_CTRL:
-	case MII_MMD_DATA:
-		return -1;
-	default:
-		break;
-	}
-
 	list_for_each_entry(fp, &fmb->phys, node) {
 		if (fp->addr == phy_addr) {
-			/* Issue callback if user registered it. */
-			if (fp->link_update) {
-				fp->link_update(fp->phydev->attached_dev,
-						&fp->status);
-				fixed_phy_update_regs(fp);
-			}
-			return fp->regs[reg_num];
+			struct fixed_phy_status state;
+			int s;
+
+			do {
+				s = read_seqcount_begin(&fp->seqcount);
+				/* Issue callback if user registered it. */
+				if (fp->link_update) {
+					fp->link_update(fp->phydev->attached_dev,
+							&fp->status);
+					fixed_phy_update(fp);
+				}
+				state = fp->status;
+			} while (read_seqcount_retry(&fp->seqcount, s));
+
+			return swphy_read_reg(reg_num, &state);
 		}
 	}
 
@@ -224,6 +127,7 @@ int fixed_phy_update_state(struct phy_device *phydev,
 
 	list_for_each_entry(fp, &fmb->phys, node) {
 		if (fp->addr == phydev->mdio.addr) {
+			write_seqcount_begin(&fp->seqcount);
 #define _UPD(x) if (changed->x) \
 	fp->status.x = status->x
 			_UPD(link);
@@ -232,7 +136,8 @@ int fixed_phy_update_state(struct phy_device *phydev,
 			_UPD(pause);
 			_UPD(asym_pause);
 #undef _UPD
-			fixed_phy_update_regs(fp);
+			fixed_phy_update(fp);
+			write_seqcount_end(&fp->seqcount);
 			return 0;
 		}
 	}
@@ -249,11 +154,15 @@ int fixed_phy_add(unsigned int irq, int phy_addr,
 	struct fixed_mdio_bus *fmb = &platform_fmb;
 	struct fixed_phy *fp;
 
+	ret = swphy_validate_state(status);
+	if (ret < 0)
+		return ret;
+
 	fp = kzalloc(sizeof(*fp), GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 
-	memset(fp->regs, 0xFF,  sizeof(fp->regs[0]) * MII_REGS_NUM);
+	seqcount_init(&fp->seqcount);
 
 	if (irq != PHY_POLL)
 		fmb->mii_bus->irq[phy_addr] = irq;
@@ -269,23 +178,20 @@ int fixed_phy_add(unsigned int irq, int phy_addr,
 			goto err_regs;
 	}
 
-	ret = fixed_phy_update_regs(fp);
-	if (ret)
-		goto err_gpio;
+	fixed_phy_update(fp);
 
 	list_add_tail(&fp->node, &fmb->phys);
 
 	return 0;
 
-err_gpio:
-	if (gpio_is_valid(fp->link_gpio))
-		gpio_free(fp->link_gpio);
 err_regs:
 	kfree(fp);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
+static DEFINE_IDA(phy_fixed_ida);
+
 static void fixed_phy_del(int phy_addr)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
@@ -297,14 +203,12 @@ static void fixed_phy_del(int phy_addr)
 			if (gpio_is_valid(fp->link_gpio))
 				gpio_free(fp->link_gpio);
 			kfree(fp);
+			ida_simple_remove(&phy_fixed_ida, phy_addr);
 			return;
 		}
 	}
 }
 
-static int phy_fixed_addr;
-static DEFINE_SPINLOCK(phy_fixed_addr_lock);
-
 struct phy_device *fixed_phy_register(unsigned int irq,
 				      struct fixed_phy_status *status,
 				      int link_gpio,
@@ -319,17 +223,15 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 		return ERR_PTR(-EPROBE_DEFER);
 
 	/* Get the next available PHY address, up to PHY_MAX_ADDR */
-	spin_lock(&phy_fixed_addr_lock);
-	if (phy_fixed_addr == PHY_MAX_ADDR) {
-		spin_unlock(&phy_fixed_addr_lock);
-		return ERR_PTR(-ENOSPC);
-	}
-	phy_addr = phy_fixed_addr++;
-	spin_unlock(&phy_fixed_addr_lock);
+	phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL);
+	if (phy_addr < 0)
+		return ERR_PTR(phy_addr);
 
 	ret = fixed_phy_add(irq, phy_addr, status, link_gpio);
-	if (ret < 0)
+	if (ret < 0) {
+		ida_simple_remove(&phy_fixed_ida, phy_addr);
 		return ERR_PTR(ret);
+	}
 
 	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
 	if (IS_ERR(phy)) {
@@ -434,6 +336,7 @@ static void __exit fixed_mdio_bus_exit(void)
 		list_del(&fp->node);
 		kfree(fp);
 	}
+	ida_destroy(&phy_fixed_ida);
 }
 module_exit(fixed_mdio_bus_exit);
 
diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
new file mode 100644
index 000000000000..c300ab5587b8
--- /dev/null
+++ b/drivers/net/phy/intel-xway.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2012 Daniel Schwierzeck <daniel.schwierzeck@googlemail.com>
+ * Copyright (C) 2016 Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mdio.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+
+#define XWAY_MDIO_IMASK			0x19	/* interrupt mask */
+#define XWAY_MDIO_ISTAT			0x1A	/* interrupt status */
+
+#define XWAY_MDIO_INIT_WOL		BIT(15)	/* Wake-On-LAN */
+#define XWAY_MDIO_INIT_MSRE		BIT(14)
+#define XWAY_MDIO_INIT_NPRX		BIT(13)
+#define XWAY_MDIO_INIT_NPTX		BIT(12)
+#define XWAY_MDIO_INIT_ANE		BIT(11)	/* Auto-Neg error */
+#define XWAY_MDIO_INIT_ANC		BIT(10)	/* Auto-Neg complete */
+#define XWAY_MDIO_INIT_ADSC		BIT(5)	/* Link auto-downspeed detect */
+#define XWAY_MDIO_INIT_MPIPC		BIT(4)
+#define XWAY_MDIO_INIT_MDIXC		BIT(3)
+#define XWAY_MDIO_INIT_DXMC		BIT(2)	/* Duplex mode change */
+#define XWAY_MDIO_INIT_LSPC		BIT(1)	/* Link speed change */
+#define XWAY_MDIO_INIT_LSTC		BIT(0)	/* Link state change */
+#define XWAY_MDIO_INIT_MASK		(XWAY_MDIO_INIT_LSTC | \
+					 XWAY_MDIO_INIT_ADSC)
+
+#define ADVERTISED_MPD			BIT(10)	/* Multi-port device */
+
+/* LED Configuration */
+#define XWAY_MMD_LEDCH			0x01E0
+/* Inverse of SCAN Function */
+#define  XWAY_MMD_LEDCH_NACS_NONE	0x0000
+#define  XWAY_MMD_LEDCH_NACS_LINK	0x0001
+#define  XWAY_MMD_LEDCH_NACS_PDOWN	0x0002
+#define  XWAY_MMD_LEDCH_NACS_EEE	0x0003
+#define  XWAY_MMD_LEDCH_NACS_ANEG	0x0004
+#define  XWAY_MMD_LEDCH_NACS_ABIST	0x0005
+#define  XWAY_MMD_LEDCH_NACS_CDIAG	0x0006
+#define  XWAY_MMD_LEDCH_NACS_TEST	0x0007
+/* Slow Blink Frequency */
+#define  XWAY_MMD_LEDCH_SBF_F02HZ	0x0000
+#define  XWAY_MMD_LEDCH_SBF_F04HZ	0x0010
+#define  XWAY_MMD_LEDCH_SBF_F08HZ	0x0020
+#define  XWAY_MMD_LEDCH_SBF_F16HZ	0x0030
+/* Fast Blink Frequency */
+#define  XWAY_MMD_LEDCH_FBF_F02HZ	0x0000
+#define  XWAY_MMD_LEDCH_FBF_F04HZ	0x0040
+#define  XWAY_MMD_LEDCH_FBF_F08HZ	0x0080
+#define  XWAY_MMD_LEDCH_FBF_F16HZ	0x00C0
+/* LED Configuration */
+#define XWAY_MMD_LEDCL			0x01E1
+/* Complex Blinking Configuration */
+#define  XWAY_MMD_LEDCH_CBLINK_NONE	0x0000
+#define  XWAY_MMD_LEDCH_CBLINK_LINK	0x0001
+#define  XWAY_MMD_LEDCH_CBLINK_PDOWN	0x0002
+#define  XWAY_MMD_LEDCH_CBLINK_EEE	0x0003
+#define  XWAY_MMD_LEDCH_CBLINK_ANEG	0x0004
+#define  XWAY_MMD_LEDCH_CBLINK_ABIST	0x0005
+#define  XWAY_MMD_LEDCH_CBLINK_CDIAG	0x0006
+#define  XWAY_MMD_LEDCH_CBLINK_TEST	0x0007
+/* Complex SCAN Configuration */
+#define  XWAY_MMD_LEDCH_SCAN_NONE	0x0000
+#define  XWAY_MMD_LEDCH_SCAN_LINK	0x0010
+#define  XWAY_MMD_LEDCH_SCAN_PDOWN	0x0020
+#define  XWAY_MMD_LEDCH_SCAN_EEE	0x0030
+#define  XWAY_MMD_LEDCH_SCAN_ANEG	0x0040
+#define  XWAY_MMD_LEDCH_SCAN_ABIST	0x0050
+#define  XWAY_MMD_LEDCH_SCAN_CDIAG	0x0060
+#define  XWAY_MMD_LEDCH_SCAN_TEST	0x0070
+/* Configuration for LED Pin x */
+#define XWAY_MMD_LED0H			0x01E2
+/* Fast Blinking Configuration */
+#define  XWAY_MMD_LEDxH_BLINKF_MASK	0x000F
+#define  XWAY_MMD_LEDxH_BLINKF_NONE	0x0000
+#define  XWAY_MMD_LEDxH_BLINKF_LINK10	0x0001
+#define  XWAY_MMD_LEDxH_BLINKF_LINK100	0x0002
+#define  XWAY_MMD_LEDxH_BLINKF_LINK10X	0x0003
+#define  XWAY_MMD_LEDxH_BLINKF_LINK1000	0x0004
+#define  XWAY_MMD_LEDxH_BLINKF_LINK10_0	0x0005
+#define  XWAY_MMD_LEDxH_BLINKF_LINK100X	0x0006
+#define  XWAY_MMD_LEDxH_BLINKF_LINK10XX	0x0007
+#define  XWAY_MMD_LEDxH_BLINKF_PDOWN	0x0008
+#define  XWAY_MMD_LEDxH_BLINKF_EEE	0x0009
+#define  XWAY_MMD_LEDxH_BLINKF_ANEG	0x000A
+#define  XWAY_MMD_LEDxH_BLINKF_ABIST	0x000B
+#define  XWAY_MMD_LEDxH_BLINKF_CDIAG	0x000C
+/* Constant On Configuration */
+#define  XWAY_MMD_LEDxH_CON_MASK	0x00F0
+#define  XWAY_MMD_LEDxH_CON_NONE	0x0000
+#define  XWAY_MMD_LEDxH_CON_LINK10	0x0010
+#define  XWAY_MMD_LEDxH_CON_LINK100	0x0020
+#define  XWAY_MMD_LEDxH_CON_LINK10X	0x0030
+#define  XWAY_MMD_LEDxH_CON_LINK1000	0x0040
+#define  XWAY_MMD_LEDxH_CON_LINK10_0	0x0050
+#define  XWAY_MMD_LEDxH_CON_LINK100X	0x0060
+#define  XWAY_MMD_LEDxH_CON_LINK10XX	0x0070
+#define  XWAY_MMD_LEDxH_CON_PDOWN	0x0080
+#define  XWAY_MMD_LEDxH_CON_EEE		0x0090
+#define  XWAY_MMD_LEDxH_CON_ANEG	0x00A0
+#define  XWAY_MMD_LEDxH_CON_ABIST	0x00B0
+#define  XWAY_MMD_LEDxH_CON_CDIAG	0x00C0
+#define  XWAY_MMD_LEDxH_CON_COPPER	0x00D0
+#define  XWAY_MMD_LEDxH_CON_FIBER	0x00E0
+/* Configuration for LED Pin x */
+#define XWAY_MMD_LED0L			0x01E3
+/* Pulsing Configuration */
+#define  XWAY_MMD_LEDxL_PULSE_MASK	0x000F
+#define  XWAY_MMD_LEDxL_PULSE_NONE	0x0000
+#define  XWAY_MMD_LEDxL_PULSE_TXACT	0x0001
+#define  XWAY_MMD_LEDxL_PULSE_RXACT	0x0002
+#define  XWAY_MMD_LEDxL_PULSE_COL	0x0004
+/* Slow Blinking Configuration */
+#define  XWAY_MMD_LEDxL_BLINKS_MASK	0x00F0
+#define  XWAY_MMD_LEDxL_BLINKS_NONE	0x0000
+#define  XWAY_MMD_LEDxL_BLINKS_LINK10	0x0010
+#define  XWAY_MMD_LEDxL_BLINKS_LINK100	0x0020
+#define  XWAY_MMD_LEDxL_BLINKS_LINK10X	0x0030
+#define  XWAY_MMD_LEDxL_BLINKS_LINK1000	0x0040
+#define  XWAY_MMD_LEDxL_BLINKS_LINK10_0	0x0050
+#define  XWAY_MMD_LEDxL_BLINKS_LINK100X	0x0060
+#define  XWAY_MMD_LEDxL_BLINKS_LINK10XX	0x0070
+#define  XWAY_MMD_LEDxL_BLINKS_PDOWN	0x0080
+#define  XWAY_MMD_LEDxL_BLINKS_EEE	0x0090
+#define  XWAY_MMD_LEDxL_BLINKS_ANEG	0x00A0
+#define  XWAY_MMD_LEDxL_BLINKS_ABIST	0x00B0
+#define  XWAY_MMD_LEDxL_BLINKS_CDIAG	0x00C0
+#define XWAY_MMD_LED1H			0x01E4
+#define XWAY_MMD_LED1L			0x01E5
+#define XWAY_MMD_LED2H			0x01E6
+#define XWAY_MMD_LED2L			0x01E7
+#define XWAY_MMD_LED3H			0x01E8
+#define XWAY_MMD_LED3L			0x01E9
+
+#define PHY_ID_PHY11G_1_3		0x030260D1
+#define PHY_ID_PHY22F_1_3		0x030260E1
+#define PHY_ID_PHY11G_1_4		0xD565A400
+#define PHY_ID_PHY22F_1_4		0xD565A410
+#define PHY_ID_PHY11G_1_5		0xD565A401
+#define PHY_ID_PHY22F_1_5		0xD565A411
+#define PHY_ID_PHY11G_VR9		0xD565A409
+#define PHY_ID_PHY22F_VR9		0xD565A419
+
+static int xway_gphy_config_init(struct phy_device *phydev)
+{
+	int err;
+	u32 ledxh;
+	u32 ledxl;
+
+	/* Mask all interrupts */
+	err = phy_write(phydev, XWAY_MDIO_IMASK, 0);
+	if (err)
+		return err;
+
+	/* Clear all pending interrupts */
+	phy_read(phydev, XWAY_MDIO_ISTAT);
+
+	phy_write_mmd_indirect(phydev, XWAY_MMD_LEDCH, MDIO_MMD_VEND2,
+			       XWAY_MMD_LEDCH_NACS_NONE |
+			       XWAY_MMD_LEDCH_SBF_F02HZ |
+			       XWAY_MMD_LEDCH_FBF_F16HZ);
+	phy_write_mmd_indirect(phydev, XWAY_MMD_LEDCL, MDIO_MMD_VEND2,
+			       XWAY_MMD_LEDCH_CBLINK_NONE |
+			       XWAY_MMD_LEDCH_SCAN_NONE);
+
+	/**
+	 * In most cases only one LED is connected to this phy, so
+	 * configure them all to constant on and pulse mode. LED3 is
+	 * only available in some packages, leave it in its reset
+	 * configuration.
+	 */
+	ledxh = XWAY_MMD_LEDxH_BLINKF_NONE | XWAY_MMD_LEDxH_CON_LINK10XX;
+	ledxl = XWAY_MMD_LEDxL_PULSE_TXACT | XWAY_MMD_LEDxL_PULSE_RXACT |
+		XWAY_MMD_LEDxL_BLINKS_NONE;
+	phy_write_mmd_indirect(phydev, XWAY_MMD_LED0H, MDIO_MMD_VEND2, ledxh);
+	phy_write_mmd_indirect(phydev, XWAY_MMD_LED0L, MDIO_MMD_VEND2, ledxl);
+	phy_write_mmd_indirect(phydev, XWAY_MMD_LED1H, MDIO_MMD_VEND2, ledxh);
+	phy_write_mmd_indirect(phydev, XWAY_MMD_LED1L, MDIO_MMD_VEND2, ledxl);
+	phy_write_mmd_indirect(phydev, XWAY_MMD_LED2H, MDIO_MMD_VEND2, ledxh);
+	phy_write_mmd_indirect(phydev, XWAY_MMD_LED2L, MDIO_MMD_VEND2, ledxl);
+
+	return 0;
+}
+
+static int xway_gphy14_config_aneg(struct phy_device *phydev)
+{
+	int reg, err;
+
+	/* Advertise as multi-port device, see IEEE802.3-2002 40.5.1.1 */
+	/* This is a workaround for an errata in rev < 1.5 devices */
+	reg = phy_read(phydev, MII_CTRL1000);
+	reg |= ADVERTISED_MPD;
+	err = phy_write(phydev, MII_CTRL1000, reg);
+	if (err)
+		return err;
+
+	return genphy_config_aneg(phydev);
+}
+
+static int xway_gphy_ack_interrupt(struct phy_device *phydev)
+{
+	int reg;
+
+	reg = phy_read(phydev, XWAY_MDIO_ISTAT);
+	return (reg < 0) ? reg : 0;
+}
+
+static int xway_gphy_did_interrupt(struct phy_device *phydev)
+{
+	int reg;
+
+	reg = phy_read(phydev, XWAY_MDIO_ISTAT);
+	return reg & XWAY_MDIO_INIT_MASK;
+}
+
+static int xway_gphy_config_intr(struct phy_device *phydev)
+{
+	u16 mask = 0;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+		mask = XWAY_MDIO_INIT_MASK;
+
+	return phy_write(phydev, XWAY_MDIO_IMASK, mask);
+}
+
+static struct phy_driver xway_gphy[] = {
+	{
+		.phy_id		= PHY_ID_PHY11G_1_3,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.3",
+		.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause),
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.config_aneg	= xway_gphy14_config_aneg,
+		.read_status	= genphy_read_status,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY22F_1_3,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY22F (PEF 7061) v1.3",
+		.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause),
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.config_aneg	= xway_gphy14_config_aneg,
+		.read_status	= genphy_read_status,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY11G_1_4,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.4",
+		.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause),
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.config_aneg	= xway_gphy14_config_aneg,
+		.read_status	= genphy_read_status,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY22F_1_4,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY22F (PEF 7061) v1.4",
+		.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause),
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.config_aneg	= xway_gphy14_config_aneg,
+		.read_status	= genphy_read_status,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY11G_1_5,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.5 / v1.6",
+		.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause),
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.config_aneg	= genphy_config_aneg,
+		.read_status	= genphy_read_status,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY22F_1_5,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY22F (PEF 7061) v1.5 / v1.6",
+		.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause),
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.config_aneg	= genphy_config_aneg,
+		.read_status	= genphy_read_status,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY11G_VR9,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY11G (xRX integrated)",
+		.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause),
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.config_aneg	= genphy_config_aneg,
+		.read_status	= genphy_read_status,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY22F_VR9,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY22F (xRX integrated)",
+		.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause),
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.config_aneg	= genphy_config_aneg,
+		.read_status	= genphy_read_status,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	},
+};
+module_phy_driver(xway_gphy);
+
+static struct mdio_device_id __maybe_unused xway_gphy_tbl[] = {
+	{ PHY_ID_PHY11G_1_3, 0xffffffff },
+	{ PHY_ID_PHY22F_1_3, 0xffffffff },
+	{ PHY_ID_PHY11G_1_4, 0xffffffff },
+	{ PHY_ID_PHY22F_1_4, 0xffffffff },
+	{ PHY_ID_PHY11G_1_5, 0xffffffff },
+	{ PHY_ID_PHY22F_1_5, 0xffffffff },
+	{ PHY_ID_PHY11G_VR9, 0xffffffff },
+	{ PHY_ID_PHY22F_VR9, 0xffffffff },
+	{ }
+};
+MODULE_DEVICE_TABLE(mdio, xway_gphy_tbl);
+
+MODULE_DESCRIPTION("Intel XWAY PHY driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 280e8795b463..c2dcf02df202 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -138,6 +138,21 @@
 #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII	0x1	/* SGMII to copper */
 #define MII_88E1510_GEN_CTRL_REG_1_RESET	0x8000	/* Soft reset */
 
+#define LPA_FIBER_1000HALF	0x40
+#define LPA_FIBER_1000FULL	0x20
+
+#define LPA_PAUSE_FIBER	0x180
+#define LPA_PAUSE_ASYM_FIBER	0x100
+
+#define ADVERTISE_FIBER_1000HALF	0x40
+#define ADVERTISE_FIBER_1000FULL	0x20
+
+#define ADVERTISE_PAUSE_FIBER		0x180
+#define ADVERTISE_PAUSE_ASYM_FIBER	0x100
+
+#define REGISTER_LINK_STATUS	0x400
+#define NB_FIBER_STATS	1
+
 MODULE_DESCRIPTION("Marvell PHY driver");
 MODULE_AUTHOR("Andy Fleming");
 MODULE_LICENSE("GPL");
@@ -150,8 +165,9 @@ struct marvell_hw_stat {
 };
 
 static struct marvell_hw_stat marvell_hw_stats[] = {
-	{ "phy_receive_errors", 0, 21, 16},
+	{ "phy_receive_errors_copper", 0, 21, 16},
 	{ "phy_idle_errors", 0, 10, 8 },
+	{ "phy_receive_errors_fiber", 1, 21, 16},
 };
 
 struct marvell_priv {
@@ -285,6 +301,48 @@ static int marvell_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e1111_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	/* The Marvell PHY has an errata which requires
+	 * that certain registers get written in order
+	 * to restart autonegotiation
+	 */
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	err = marvell_set_polarity(phydev, phydev->mdix);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, MII_M1111_PHY_LED_CONTROL,
+			MII_M1111_PHY_LED_DIRECT);
+	if (err < 0)
+		return err;
+
+	err = genphy_config_aneg(phydev);
+	if (err < 0)
+		return err;
+
+	if (phydev->autoneg != AUTONEG_ENABLE) {
+		int bmcr;
+
+		/* A write to speed/duplex bits (that is performed by
+		 * genphy_config_aneg() call above) must be followed by
+		 * a software reset. Otherwise, the write has no effect.
+		 */
+		bmcr = phy_read(phydev, MII_BMCR);
+		if (bmcr < 0)
+			return bmcr;
+
+		err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_OF_MDIO
 /*
  * Set and/or override some configuration registers based on the
@@ -407,15 +465,7 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
-
-	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
-	phy_write(phydev, MII_88E1121_PHY_LED_CTRL, MII_88E1121_PHY_LED_DEF);
-	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
-
-	err = genphy_config_aneg(phydev);
-
-	return err;
+	return genphy_config_aneg(phydev);
 }
 
 static int m88e1318_config_aneg(struct phy_device *phydev)
@@ -443,15 +493,122 @@ static int m88e1318_config_aneg(struct phy_device *phydev)
 	return m88e1121_config_aneg(phydev);
 }
 
+/**
+ * ethtool_adv_to_fiber_adv_t
+ * @ethadv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_ADV register for fiber link.
+ */
+static inline u32 ethtool_adv_to_fiber_adv_t(u32 ethadv)
+{
+	u32 result = 0;
+
+	if (ethadv & ADVERTISED_1000baseT_Half)
+		result |= ADVERTISE_FIBER_1000HALF;
+	if (ethadv & ADVERTISED_1000baseT_Full)
+		result |= ADVERTISE_FIBER_1000FULL;
+
+	if ((ethadv & ADVERTISE_PAUSE_ASYM) && (ethadv & ADVERTISE_PAUSE_CAP))
+		result |= LPA_PAUSE_ASYM_FIBER;
+	else if (ethadv & ADVERTISE_PAUSE_CAP)
+		result |= (ADVERTISE_PAUSE_FIBER
+			   & (~ADVERTISE_PAUSE_ASYM_FIBER));
+
+	return result;
+}
+
+/**
+ * marvell_config_aneg_fiber - restart auto-negotiation or write BMCR
+ * @phydev: target phy_device struct
+ *
+ * Description: If auto-negotiation is enabled, we configure the
+ *   advertising, and then restart auto-negotiation.  If it is not
+ *   enabled, then we write the BMCR. Adapted for fiber link in
+ *   some Marvell's devices.
+ */
+static int marvell_config_aneg_fiber(struct phy_device *phydev)
+{
+	int changed = 0;
+	int err;
+	int adv, oldadv;
+	u32 advertise;
+
+	if (phydev->autoneg != AUTONEG_ENABLE)
+		return genphy_setup_forced(phydev);
+
+	/* Only allow advertising what this PHY supports */
+	phydev->advertising &= phydev->supported;
+	advertise = phydev->advertising;
+
+	/* Setup fiber advertisement */
+	adv = phy_read(phydev, MII_ADVERTISE);
+	if (adv < 0)
+		return adv;
+
+	oldadv = adv;
+	adv &= ~(ADVERTISE_FIBER_1000HALF | ADVERTISE_FIBER_1000FULL
+		| LPA_PAUSE_FIBER);
+	adv |= ethtool_adv_to_fiber_adv_t(advertise);
+
+	if (adv != oldadv) {
+		err = phy_write(phydev, MII_ADVERTISE, adv);
+		if (err < 0)
+			return err;
+
+		changed = 1;
+	}
+
+	if (changed == 0) {
+		/* Advertisement hasn't changed, but maybe aneg was never on to
+		 * begin with?  Or maybe phy was isolated?
+		 */
+		int ctl = phy_read(phydev, MII_BMCR);
+
+		if (ctl < 0)
+			return ctl;
+
+		if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
+			changed = 1; /* do restart aneg */
+	}
+
+	/* Only restart aneg if we are advertising something different
+	 * than we were before.
+	 */
+	if (changed > 0)
+		changed = genphy_restart_aneg(phydev);
+
+	return changed;
+}
+
 static int m88e1510_config_aneg(struct phy_device *phydev)
 {
 	int err;
 
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+	if (err < 0)
+		goto error;
+
+	/* Configure the copper link first */
 	err = m88e1318_config_aneg(phydev);
 	if (err < 0)
-		return err;
+		goto error;
 
-	return 0;
+	/* Then the fiber link */
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER);
+	if (err < 0)
+		goto error;
+
+	err = marvell_config_aneg_fiber(phydev);
+	if (err < 0)
+		goto error;
+
+	return phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+
+error:
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+	return err;
 }
 
 static int marvell_config_init(struct phy_device *phydev)
@@ -636,6 +793,28 @@ static int m88e1111_config_init(struct phy_device *phydev)
 	return phy_write(phydev, MII_BMCR, BMCR_RESET);
 }
 
+static int m88e1121_config_init(struct phy_device *phydev)
+{
+	int err, oldpage;
+
+	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
+
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
+	if (err < 0)
+		return err;
+
+	/* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */
+	err = phy_write(phydev, MII_88E1121_PHY_LED_CTRL,
+			MII_88E1121_PHY_LED_DEF);
+	if (err < 0)
+		return err;
+
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
+
+	/* Set marvell,reg-init configuration from device tree */
+	return marvell_config_init(phydev);
+}
+
 static int m88e1510_config_init(struct phy_device *phydev)
 {
 	int err;
@@ -668,7 +847,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
 			return err;
 	}
 
-	return marvell_config_init(phydev);
+	return m88e1121_config_init(phydev);
 }
 
 static int m88e1118_config_aneg(struct phy_device *phydev)
@@ -834,26 +1013,79 @@ static int m88e1145_config_init(struct phy_device *phydev)
 	return 0;
 }
 
-/* marvell_read_status
+/**
+ * fiber_lpa_to_ethtool_lpa_t
+ * @lpa: value of the MII_LPA register for fiber link
+ *
+ * A small helper function that translates MII_LPA
+ * bits to ethtool LP advertisement settings.
+ */
+static u32 fiber_lpa_to_ethtool_lpa_t(u32 lpa)
+{
+	u32 result = 0;
+
+	if (lpa & LPA_FIBER_1000HALF)
+		result |= ADVERTISED_1000baseT_Half;
+	if (lpa & LPA_FIBER_1000FULL)
+		result |= ADVERTISED_1000baseT_Full;
+
+	return result;
+}
+
+/**
+ * marvell_update_link - update link status in real time in @phydev
+ * @phydev: target phy_device struct
+ *
+ * Description: Update the value in phydev->link to reflect the
+ *   current link value.
+ */
+static int marvell_update_link(struct phy_device *phydev, int fiber)
+{
+	int status;
+
+	/* Use the generic register for copper link, or specific
+	 * register for fiber case */
+	if (fiber) {
+		status = phy_read(phydev, MII_M1011_PHY_STATUS);
+		if (status < 0)
+			return status;
+
+		if ((status & REGISTER_LINK_STATUS) == 0)
+			phydev->link = 0;
+		else
+			phydev->link = 1;
+	} else {
+		return genphy_update_link(phydev);
+	}
+
+	return 0;
+}
+
+/* marvell_read_status_page
  *
- * Generic status code does not detect Fiber correctly!
  * Description:
  *   Check the link, then figure out the current state
  *   by comparing what we advertise with what the link partner
  *   advertises.  Start by checking the gigabit possibilities,
  *   then move on to 10/100.
  */
-static int marvell_read_status(struct phy_device *phydev)
+static int marvell_read_status_page(struct phy_device *phydev, int page)
 {
 	int adv;
 	int err;
 	int lpa;
 	int lpagb;
 	int status = 0;
+	int fiber;
 
-	/* Update the link, but return if there
+	/* Detect and update the link, but return if there
 	 * was an error */
-	err = genphy_update_link(phydev);
+	if (page == MII_M1111_FIBER)
+		fiber = 1;
+	else
+		fiber = 0;
+
+	err = marvell_update_link(phydev, fiber);
 	if (err)
 		return err;
 
@@ -874,9 +1106,6 @@ static int marvell_read_status(struct phy_device *phydev)
 		if (adv < 0)
 			return adv;
 
-		phydev->lp_advertising = mii_stat1000_to_ethtool_lpa_t(lpagb) |
-					 mii_lpa_to_ethtool_lpa_t(lpa);
-
 		lpa &= adv;
 
 		if (status & MII_M1011_PHY_STATUS_FULLDUPLEX)
@@ -901,9 +1130,30 @@ static int marvell_read_status(struct phy_device *phydev)
 			break;
 		}
 
-		if (phydev->duplex == DUPLEX_FULL) {
-			phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0;
-			phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0;
+		if (!fiber) {
+			phydev->lp_advertising = mii_stat1000_to_ethtool_lpa_t(lpagb) |
+					 mii_lpa_to_ethtool_lpa_t(lpa);
+
+			if (phydev->duplex == DUPLEX_FULL) {
+				phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0;
+				phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0;
+			}
+		} else {
+			/* The fiber link is only 1000M capable */
+			phydev->lp_advertising = fiber_lpa_to_ethtool_lpa_t(lpa);
+
+			if (phydev->duplex == DUPLEX_FULL) {
+				if (!(lpa & LPA_PAUSE_FIBER)) {
+					phydev->pause = 0;
+					phydev->asym_pause = 0;
+				} else if ((lpa & LPA_PAUSE_ASYM_FIBER)) {
+					phydev->pause = 1;
+					phydev->asym_pause = 1;
+				} else {
+					phydev->pause = 1;
+					phydev->asym_pause = 0;
+				}
+			}
 		}
 	} else {
 		int bmcr = phy_read(phydev, MII_BMCR);
@@ -930,6 +1180,119 @@ static int marvell_read_status(struct phy_device *phydev)
 	return 0;
 }
 
+/* marvell_read_status
+ *
+ * Some Marvell's phys have two modes: fiber and copper.
+ * Both need status checked.
+ * Description:
+ *   First, check the fiber link and status.
+ *   If the fiber link is down, check the copper link and status which
+ *   will be the default value if both link are down.
+ */
+static int marvell_read_status(struct phy_device *phydev)
+{
+	int err;
+
+	/* Check the fiber mode first */
+	if (phydev->supported & SUPPORTED_FIBRE) {
+		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER);
+		if (err < 0)
+			goto error;
+
+		err = marvell_read_status_page(phydev, MII_M1111_FIBER);
+		if (err < 0)
+			goto error;
+
+		/* If the fiber link is up, it is the selected and used link.
+		 * In this case, we need to stay in the fiber page.
+		 * Please to be careful about that, avoid to restore Copper page
+		 * in other functions which could break the behaviour
+		 * for some fiber phy like 88E1512.
+		 * */
+		if (phydev->link)
+			return 0;
+
+		/* If fiber link is down, check and save copper mode state */
+		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+		if (err < 0)
+			goto error;
+	}
+
+	return marvell_read_status_page(phydev, MII_M1111_COPPER);
+
+error:
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+	return err;
+}
+
+/* marvell_suspend
+ *
+ * Some Marvell's phys have two modes: fiber and copper.
+ * Both need to be suspended
+ */
+static int marvell_suspend(struct phy_device *phydev)
+{
+	int err;
+
+	/* Suspend the fiber mode first */
+	if (!(phydev->supported & SUPPORTED_FIBRE)) {
+		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER);
+		if (err < 0)
+			goto error;
+
+		/* With the page set, use the generic suspend */
+		err = genphy_suspend(phydev);
+		if (err < 0)
+			goto error;
+
+		/* Then, the copper link */
+		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+		if (err < 0)
+			goto error;
+	}
+
+	/* With the page set, use the generic suspend */
+	return genphy_suspend(phydev);
+
+error:
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+	return err;
+}
+
+/* marvell_resume
+ *
+ * Some Marvell's phys have two modes: fiber and copper.
+ * Both need to be resumed
+ */
+static int marvell_resume(struct phy_device *phydev)
+{
+	int err;
+
+	/* Resume the fiber mode first */
+	if (!(phydev->supported & SUPPORTED_FIBRE)) {
+		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER);
+		if (err < 0)
+			goto error;
+
+		/* With the page set, use the generic resume */
+		err = genphy_resume(phydev);
+		if (err < 0)
+			goto error;
+
+		/* Then, the copper link */
+		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+		if (err < 0)
+			goto error;
+	}
+
+	/* With the page set, use the generic resume */
+	return genphy_resume(phydev);
+
+error:
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER);
+	return err;
+}
+
 static int marvell_aneg_done(struct phy_device *phydev)
 {
 	int retval = phy_read(phydev, MII_M1011_PHY_STATUS);
@@ -1051,7 +1414,10 @@ static int m88e1318_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *w
 
 static int marvell_get_sset_count(struct phy_device *phydev)
 {
-	return ARRAY_SIZE(marvell_hw_stats);
+	if (phydev->supported & SUPPORTED_FIBRE)
+		return ARRAY_SIZE(marvell_hw_stats);
+	else
+		return ARRAY_SIZE(marvell_hw_stats) - NB_FIBER_STATS;
 }
 
 static void marvell_get_strings(struct phy_device *phydev, u8 *data)
@@ -1161,7 +1527,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &m88e1111_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1111_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -1196,7 +1562,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &marvell_config_init,
+		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1215,7 +1581,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &marvell_config_init,
+		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1318_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1305,7 +1671,7 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E1510,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1510",
-		.features = PHY_GBIT_FEATURES,
+		.features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &m88e1510_config_init,
@@ -1314,8 +1680,8 @@ static struct phy_driver marvell_drivers[] = {
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
 		.did_interrupt = &m88e1121_did_interrupt,
-		.resume = &genphy_resume,
-		.suspend = &genphy_suspend,
+		.resume = &marvell_resume,
+		.suspend = &marvell_suspend,
 		.get_sset_count = marvell_get_sset_count,
 		.get_strings = marvell_get_strings,
 		.get_stats = marvell_get_stats,
diff --git a/drivers/net/phy/mdio-hisi-femac.c b/drivers/net/phy/mdio-hisi-femac.c
new file mode 100644
index 000000000000..b03fedd6c1d8
--- /dev/null
+++ b/drivers/net/phy/mdio-hisi-femac.c
@@ -0,0 +1,166 @@
+/*
+ * Hisilicon Fast Ethernet MDIO Bus Driver
+ *
+ * Copyright (c) 2016 HiSilicon Technologies Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
+#include <linux/platform_device.h>
+
+#define MDIO_RWCTRL		0x00
+#define MDIO_RO_DATA		0x04
+#define MDIO_WRITE		BIT(13)
+#define MDIO_RW_FINISH		BIT(15)
+#define BIT_PHY_ADDR_OFFSET	8
+#define BIT_WR_DATA_OFFSET	16
+
+struct hisi_femac_mdio_data {
+	struct clk *clk;
+	void __iomem *membase;
+};
+
+static int hisi_femac_mdio_wait_ready(struct hisi_femac_mdio_data *data)
+{
+	u32 val;
+
+	return readl_poll_timeout(data->membase + MDIO_RWCTRL,
+				  val, val & MDIO_RW_FINISH, 20, 10000);
+}
+
+static int hisi_femac_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	struct hisi_femac_mdio_data *data = bus->priv;
+	int ret;
+
+	ret = hisi_femac_mdio_wait_ready(data);
+	if (ret)
+		return ret;
+
+	writel((mii_id << BIT_PHY_ADDR_OFFSET) | regnum,
+	       data->membase + MDIO_RWCTRL);
+
+	ret = hisi_femac_mdio_wait_ready(data);
+	if (ret)
+		return ret;
+
+	return readl(data->membase + MDIO_RO_DATA) & 0xFFFF;
+}
+
+static int hisi_femac_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
+				 u16 value)
+{
+	struct hisi_femac_mdio_data *data = bus->priv;
+	int ret;
+
+	ret = hisi_femac_mdio_wait_ready(data);
+	if (ret)
+		return ret;
+
+	writel(MDIO_WRITE | (value << BIT_WR_DATA_OFFSET) |
+	       (mii_id << BIT_PHY_ADDR_OFFSET) | regnum,
+	       data->membase + MDIO_RWCTRL);
+
+	return hisi_femac_mdio_wait_ready(data);
+}
+
+static int hisi_femac_mdio_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct mii_bus *bus;
+	struct hisi_femac_mdio_data *data;
+	struct resource *res;
+	int ret;
+
+	bus = mdiobus_alloc_size(sizeof(*data));
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = "hisi_femac_mii_bus";
+	bus->read = &hisi_femac_mdio_read;
+	bus->write = &hisi_femac_mdio_write;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
+	bus->parent = &pdev->dev;
+
+	data = bus->priv;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(data->membase)) {
+		ret = PTR_ERR(data->membase);
+		goto err_out_free_mdiobus;
+	}
+
+	data->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(data->clk)) {
+		ret = PTR_ERR(data->clk);
+		goto err_out_free_mdiobus;
+	}
+
+	ret = clk_prepare_enable(data->clk);
+	if (ret)
+		goto err_out_free_mdiobus;
+
+	ret = of_mdiobus_register(bus, np);
+	if (ret)
+		goto err_out_disable_clk;
+
+	platform_set_drvdata(pdev, bus);
+
+	return 0;
+
+err_out_disable_clk:
+	clk_disable_unprepare(data->clk);
+err_out_free_mdiobus:
+	mdiobus_free(bus);
+	return ret;
+}
+
+static int hisi_femac_mdio_remove(struct platform_device *pdev)
+{
+	struct mii_bus *bus = platform_get_drvdata(pdev);
+	struct hisi_femac_mdio_data *data = bus->priv;
+
+	mdiobus_unregister(bus);
+	clk_disable_unprepare(data->clk);
+	mdiobus_free(bus);
+
+	return 0;
+}
+
+static const struct of_device_id hisi_femac_mdio_dt_ids[] = {
+	{ .compatible = "hisilicon,hisi-femac-mdio" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, hisi_femac_mdio_dt_ids);
+
+static struct platform_driver hisi_femac_mdio_driver = {
+	.probe = hisi_femac_mdio_probe,
+	.remove = hisi_femac_mdio_remove,
+	.driver = {
+		.name = "hisi-femac-mdio",
+		.of_match_table = hisi_femac_mdio_dt_ids,
+	},
+};
+
+module_platform_driver(hisi_femac_mdio_driver);
+
+MODULE_DESCRIPTION("Hisilicon Fast Ethernet MAC MDIO interface driver");
+MODULE_AUTHOR("Dongpo Li <lidongpo@hisilicon.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
new file mode 100644
index 000000000000..0a0412524cec
--- /dev/null
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/of_mdio.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/mdio-mux.h>
+#include <linux/delay.h>
+
+#define MDIO_PARAM_OFFSET		0x00
+#define MDIO_PARAM_MIIM_CYCLE		29
+#define MDIO_PARAM_INTERNAL_SEL		25
+#define MDIO_PARAM_BUS_ID		22
+#define MDIO_PARAM_C45_SEL		21
+#define MDIO_PARAM_PHY_ID		16
+#define MDIO_PARAM_PHY_DATA		0
+
+#define MDIO_READ_OFFSET		0x04
+#define MDIO_READ_DATA_MASK		0xffff
+#define MDIO_ADDR_OFFSET		0x08
+
+#define MDIO_CTRL_OFFSET		0x0C
+#define MDIO_CTRL_WRITE_OP		0x1
+#define MDIO_CTRL_READ_OP		0x2
+
+#define MDIO_STAT_OFFSET		0x10
+#define MDIO_STAT_DONE			1
+
+#define BUS_MAX_ADDR			32
+#define EXT_BUS_START_ADDR		16
+
+struct iproc_mdiomux_desc {
+	void *mux_handle;
+	void __iomem *base;
+	struct device *dev;
+	struct mii_bus *mii_bus;
+};
+
+static int iproc_mdio_wait_for_idle(void __iomem *base, bool result)
+{
+	unsigned int timeout = 1000; /* loop for 1s */
+	u32 val;
+
+	do {
+		val = readl(base + MDIO_STAT_OFFSET);
+		if ((val & MDIO_STAT_DONE) == result)
+			return 0;
+
+		usleep_range(1000, 2000);
+	} while (timeout--);
+
+	return -ETIMEDOUT;
+}
+
+/* start_miim_ops- Program and start MDIO transaction over mdio bus.
+ * @base: Base address
+ * @phyid: phyid of the selected bus.
+ * @reg: register offset to be read/written.
+ * @val :0 if read op else value to be written in @reg;
+ * @op: Operation that need to be carried out.
+ *      MDIO_CTRL_READ_OP: Read transaction.
+ *      MDIO_CTRL_WRITE_OP: Write transaction.
+ *
+ * Return value: Successful Read operation returns read reg values and write
+ *      operation returns 0. Failure operation returns negative error code.
+ */
+static int start_miim_ops(void __iomem *base,
+			  u16 phyid, u32 reg, u16 val, u32 op)
+{
+	u32 param;
+	int ret;
+
+	writel(0, base + MDIO_CTRL_OFFSET);
+	ret = iproc_mdio_wait_for_idle(base, 0);
+	if (ret)
+		goto err;
+
+	param = readl(base + MDIO_PARAM_OFFSET);
+	param |= phyid << MDIO_PARAM_PHY_ID;
+	param |= val << MDIO_PARAM_PHY_DATA;
+	if (reg & MII_ADDR_C45)
+		param |= BIT(MDIO_PARAM_C45_SEL);
+
+	writel(param, base + MDIO_PARAM_OFFSET);
+
+	writel(reg, base + MDIO_ADDR_OFFSET);
+
+	writel(op, base + MDIO_CTRL_OFFSET);
+
+	ret = iproc_mdio_wait_for_idle(base, 1);
+	if (ret)
+		goto err;
+
+	if (op == MDIO_CTRL_READ_OP)
+		ret = readl(base + MDIO_READ_OFFSET) & MDIO_READ_DATA_MASK;
+err:
+	return ret;
+}
+
+static int iproc_mdiomux_read(struct mii_bus *bus, int phyid, int reg)
+{
+	struct iproc_mdiomux_desc *md = bus->priv;
+	int ret;
+
+	ret = start_miim_ops(md->base, phyid, reg, 0, MDIO_CTRL_READ_OP);
+	if (ret < 0)
+		dev_err(&bus->dev, "mdiomux read operation failed!!!");
+
+	return ret;
+}
+
+static int iproc_mdiomux_write(struct mii_bus *bus,
+			       int phyid, int reg, u16 val)
+{
+	struct iproc_mdiomux_desc *md = bus->priv;
+	int ret;
+
+	/* Write val at reg offset */
+	ret = start_miim_ops(md->base, phyid, reg, val, MDIO_CTRL_WRITE_OP);
+	if (ret < 0)
+		dev_err(&bus->dev, "mdiomux write operation failed!!!");
+
+	return ret;
+}
+
+static int mdio_mux_iproc_switch_fn(int current_child, int desired_child,
+				    void *data)
+{
+	struct iproc_mdiomux_desc *md = data;
+	u32 param, bus_id;
+	bool bus_dir;
+
+	/* select bus and its properties */
+	bus_dir = (desired_child < EXT_BUS_START_ADDR);
+	bus_id = bus_dir ? desired_child : (desired_child - EXT_BUS_START_ADDR);
+
+	param = (bus_dir ? 1 : 0) << MDIO_PARAM_INTERNAL_SEL;
+	param |= (bus_id << MDIO_PARAM_BUS_ID);
+
+	writel(param, md->base + MDIO_PARAM_OFFSET);
+	return 0;
+}
+
+static int mdio_mux_iproc_probe(struct platform_device *pdev)
+{
+	struct iproc_mdiomux_desc *md;
+	struct mii_bus *bus;
+	struct resource *res;
+	int rc;
+
+	md = devm_kzalloc(&pdev->dev, sizeof(*md), GFP_KERNEL);
+	if (!md)
+		return -ENOMEM;
+	md->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	md->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(md->base)) {
+		dev_err(&pdev->dev, "failed to ioremap register\n");
+		return PTR_ERR(md->base);
+	}
+
+	md->mii_bus = mdiobus_alloc();
+	if (!md->mii_bus) {
+		dev_err(&pdev->dev, "mdiomux bus alloc failed\n");
+		return -ENOMEM;
+	}
+
+	bus = md->mii_bus;
+	bus->priv = md;
+	bus->name = "iProc MDIO mux bus";
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id);
+	bus->parent = &pdev->dev;
+	bus->read = iproc_mdiomux_read;
+	bus->write = iproc_mdiomux_write;
+
+	bus->phy_mask = ~0;
+	bus->dev.of_node = pdev->dev.of_node;
+	rc = mdiobus_register(bus);
+	if (rc) {
+		dev_err(&pdev->dev, "mdiomux registration failed\n");
+		goto out;
+	}
+
+	platform_set_drvdata(pdev, md);
+
+	rc = mdio_mux_init(md->dev, mdio_mux_iproc_switch_fn,
+			   &md->mux_handle, md, md->mii_bus);
+	if (rc) {
+		dev_info(md->dev, "mdiomux initialization failed\n");
+		goto out;
+	}
+
+	dev_info(md->dev, "iProc mdiomux registered\n");
+	return 0;
+out:
+	mdiobus_free(bus);
+	return rc;
+}
+
+static int mdio_mux_iproc_remove(struct platform_device *pdev)
+{
+	struct iproc_mdiomux_desc *md = dev_get_platdata(&pdev->dev);
+
+	mdio_mux_uninit(md->mux_handle);
+	mdiobus_unregister(md->mii_bus);
+	mdiobus_free(md->mii_bus);
+
+	return 0;
+}
+
+static const struct of_device_id mdio_mux_iproc_match[] = {
+	{
+		.compatible = "brcm,mdio-mux-iproc",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, mdio_mux_iproc_match);
+
+static struct platform_driver mdiomux_iproc_driver = {
+	.driver = {
+		.name		= "mdio-mux-iproc",
+		.of_match_table = mdio_mux_iproc_match,
+	},
+	.probe		= mdio_mux_iproc_probe,
+	.remove		= mdio_mux_iproc_remove,
+};
+
+module_platform_driver(mdiomux_iproc_driver);
+
+MODULE_DESCRIPTION("iProc MDIO Mux Bus Driver");
+MODULE_AUTHOR("Pramod Kumar <pramod.kumar@broadcom.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index 7ddb1ab70891..919949960a10 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -55,7 +55,7 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
 		return PTR_ERR(s->gpios);
 
 	r = mdio_mux_init(&pdev->dev,
-			  mdio_mux_gpio_switch_fn, &s->mux_handle, s);
+			  mdio_mux_gpio_switch_fn, &s->mux_handle, s, NULL);
 
 	if (r != 0) {
 		gpiod_put_array(s->gpios);
diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c
index 7fde454fbc4f..d0bed52c8d16 100644
--- a/drivers/net/phy/mdio-mux-mmioreg.c
+++ b/drivers/net/phy/mdio-mux-mmioreg.c
@@ -126,7 +126,7 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 	}
 
 	ret = mdio_mux_init(&pdev->dev, mdio_mux_mmioreg_switch_fn,
-			    &s->mux_handle, s);
+			    &s->mux_handle, s, NULL);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register mdio-mux bus %s\n",
 			np->full_name);
diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 5c81d6faf304..963838d4fac1 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -89,7 +89,8 @@ static int parent_count;
 int mdio_mux_init(struct device *dev,
 		  int (*switch_fn)(int cur, int desired, void *data),
 		  void **mux_handle,
-		  void *data)
+		  void *data,
+		  struct mii_bus *mux_bus)
 {
 	struct device_node *parent_bus_node;
 	struct device_node *child_bus_node;
@@ -101,10 +102,22 @@ int mdio_mux_init(struct device *dev,
 	if (!dev->of_node)
 		return -ENODEV;
 
-	parent_bus_node = of_parse_phandle(dev->of_node, "mdio-parent-bus", 0);
+	if (!mux_bus) {
+		parent_bus_node = of_parse_phandle(dev->of_node,
+						   "mdio-parent-bus", 0);
 
-	if (!parent_bus_node)
-		return -ENODEV;
+		if (!parent_bus_node)
+			return -ENODEV;
+
+		parent_bus = of_mdio_find_bus(parent_bus_node);
+		if (!parent_bus) {
+			ret_val = -EPROBE_DEFER;
+			goto err_parent_bus;
+		}
+	} else {
+		parent_bus_node = NULL;
+		parent_bus = mux_bus;
+	}
 
 	pb = devm_kzalloc(dev, sizeof(*pb), GFP_KERNEL);
 	if (pb == NULL) {
@@ -112,11 +125,6 @@ int mdio_mux_init(struct device *dev,
 		goto err_parent_bus;
 	}
 
-	parent_bus = of_mdio_find_bus(parent_bus_node);
-	if (parent_bus == NULL) {
-		ret_val = -EPROBE_DEFER;
-		goto err_parent_bus;
-	}
 
 	pb->switch_data = data;
 	pb->switch_fn = switch_fn;
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c
new file mode 100644
index 000000000000..d94a978024d9
--- /dev/null
+++ b/drivers/net/phy/mdio-xgene.c
@@ -0,0 +1,477 @@
+/* Applied Micro X-Gene SoC MDIO Driver
+ *
+ * Copyright (c) 2016, Applied Micro Circuits Corporation
+ * Author: Iyappan Subramanian <isubramanian@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/acpi.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/efi.h>
+#include <linux/if_vlan.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <linux/prefetch.h>
+#include <linux/phy.h>
+#include <net/ip.h>
+#include "mdio-xgene.h"
+
+static bool xgene_mdio_status;
+
+static u32 xgene_enet_rd_mac(void __iomem *base_addr, u32 rd_addr)
+{
+	void __iomem *addr, *rd, *cmd, *cmd_done;
+	u32 done, rd_data = BUSY_MASK;
+	u8 wait = 10;
+
+	addr = base_addr + MAC_ADDR_REG_OFFSET;
+	rd = base_addr + MAC_READ_REG_OFFSET;
+	cmd = base_addr + MAC_COMMAND_REG_OFFSET;
+	cmd_done = base_addr + MAC_COMMAND_DONE_REG_OFFSET;
+
+	iowrite32(rd_addr, addr);
+	iowrite32(XGENE_ENET_RD_CMD, cmd);
+
+	while (wait--) {
+		done = ioread32(cmd_done);
+		if (done)
+			break;
+		udelay(1);
+	}
+
+	if (!done)
+		return rd_data;
+
+	rd_data = ioread32(rd);
+	iowrite32(0, cmd);
+
+	return rd_data;
+}
+
+static void xgene_enet_wr_mac(void __iomem *base_addr, u32 wr_addr, u32 wr_data)
+{
+	void __iomem *addr, *wr, *cmd, *cmd_done;
+	u8 wait = 10;
+	u32 done;
+
+	addr = base_addr + MAC_ADDR_REG_OFFSET;
+	wr = base_addr + MAC_WRITE_REG_OFFSET;
+	cmd = base_addr + MAC_COMMAND_REG_OFFSET;
+	cmd_done = base_addr + MAC_COMMAND_DONE_REG_OFFSET;
+
+	iowrite32(wr_addr, addr);
+	iowrite32(wr_data, wr);
+	iowrite32(XGENE_ENET_WR_CMD, cmd);
+
+	while (wait--) {
+		done = ioread32(cmd_done);
+		if (done)
+			break;
+		udelay(1);
+	}
+
+	if (!done)
+		pr_err("MCX mac write failed, addr: 0x%04x\n", wr_addr);
+
+	iowrite32(0, cmd);
+}
+
+int xgene_mdio_rgmii_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	void __iomem *addr = (void __iomem *)bus->priv;
+	u32 data, done;
+	u8 wait = 10;
+
+	data = SET_VAL(PHY_ADDR, phy_id) | SET_VAL(REG_ADDR, reg);
+	xgene_enet_wr_mac(addr, MII_MGMT_ADDRESS_ADDR, data);
+	xgene_enet_wr_mac(addr, MII_MGMT_COMMAND_ADDR, READ_CYCLE_MASK);
+	do {
+		usleep_range(5, 10);
+		done = xgene_enet_rd_mac(addr, MII_MGMT_INDICATORS_ADDR);
+	} while ((done & BUSY_MASK) && wait--);
+
+	if (done & BUSY_MASK) {
+		dev_err(&bus->dev, "MII_MGMT read failed\n");
+		return -EBUSY;
+	}
+
+	data = xgene_enet_rd_mac(addr, MII_MGMT_STATUS_ADDR);
+	xgene_enet_wr_mac(addr, MII_MGMT_COMMAND_ADDR, 0);
+
+	return data;
+}
+EXPORT_SYMBOL(xgene_mdio_rgmii_read);
+
+int xgene_mdio_rgmii_write(struct mii_bus *bus, int phy_id, int reg, u16 data)
+{
+	void __iomem *addr = (void __iomem *)bus->priv;
+	u32 val, done;
+	u8 wait = 10;
+
+	val = SET_VAL(PHY_ADDR, phy_id) | SET_VAL(REG_ADDR, reg);
+	xgene_enet_wr_mac(addr, MII_MGMT_ADDRESS_ADDR, val);
+
+	xgene_enet_wr_mac(addr, MII_MGMT_CONTROL_ADDR, data);
+	do {
+		usleep_range(5, 10);
+		done = xgene_enet_rd_mac(addr, MII_MGMT_INDICATORS_ADDR);
+	} while ((done & BUSY_MASK) && wait--);
+
+	if (done & BUSY_MASK) {
+		dev_err(&bus->dev, "MII_MGMT write failed\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(xgene_mdio_rgmii_write);
+
+static u32 xgene_menet_rd_diag_csr(struct xgene_mdio_pdata *pdata, u32 offset)
+{
+	return ioread32(pdata->diag_csr_addr + offset);
+}
+
+static void xgene_menet_wr_diag_csr(struct xgene_mdio_pdata *pdata,
+				    u32 offset, u32 val)
+{
+	iowrite32(val, pdata->diag_csr_addr + offset);
+}
+
+static int xgene_enet_ecc_init(struct xgene_mdio_pdata *pdata)
+{
+	u32 data;
+	u8 wait = 10;
+
+	xgene_menet_wr_diag_csr(pdata, MENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0x0);
+	do {
+		usleep_range(100, 110);
+		data = xgene_menet_rd_diag_csr(pdata, MENET_BLOCK_MEM_RDY_ADDR);
+	} while ((data != 0xffffffff) && wait--);
+
+	if (data != 0xffffffff) {
+		dev_err(pdata->dev, "Failed to release memory from shutdown\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void xgene_gmac_reset(struct xgene_mdio_pdata *pdata)
+{
+	xgene_enet_wr_mac(pdata->mac_csr_addr, MAC_CONFIG_1_ADDR, SOFT_RESET);
+	xgene_enet_wr_mac(pdata->mac_csr_addr, MAC_CONFIG_1_ADDR, 0);
+}
+
+static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
+{
+	int ret;
+
+	if (pdata->dev->of_node) {
+		clk_prepare_enable(pdata->clk);
+		udelay(5);
+		clk_disable_unprepare(pdata->clk);
+		udelay(5);
+		clk_prepare_enable(pdata->clk);
+		udelay(5);
+	} else {
+#ifdef CONFIG_ACPI
+		acpi_evaluate_object(ACPI_HANDLE(pdata->dev),
+				     "_RST", NULL, NULL);
+#endif
+	}
+
+	ret = xgene_enet_ecc_init(pdata);
+	if (ret)
+		return ret;
+	xgene_gmac_reset(pdata);
+
+	return 0;
+}
+
+static void xgene_enet_rd_mdio_csr(void __iomem *base_addr,
+				   u32 offset, u32 *val)
+{
+	void __iomem *addr = base_addr  + offset;
+
+	*val = ioread32(addr);
+}
+
+static void xgene_enet_wr_mdio_csr(void __iomem *base_addr,
+				   u32 offset, u32 val)
+{
+	void __iomem *addr = base_addr  + offset;
+
+	iowrite32(val, addr);
+}
+
+static int xgene_xfi_mdio_write(struct mii_bus *bus, int phy_id,
+				int reg, u16 data)
+{
+	void __iomem *addr = (void __iomem *)bus->priv;
+	int timeout = 100;
+	u32 status, val;
+
+	val = SET_VAL(HSTPHYADX, phy_id) | SET_VAL(HSTREGADX, reg) |
+	      SET_VAL(HSTMIIMWRDAT, data);
+	xgene_enet_wr_mdio_csr(addr, MIIM_FIELD_ADDR, data);
+
+	val = HSTLDCMD | SET_VAL(HSTMIIMCMD, MIIM_CMD_LEGACY_WRITE);
+	xgene_enet_wr_mdio_csr(addr, MIIM_COMMAND_ADDR, val);
+
+	do {
+		usleep_range(5, 10);
+		xgene_enet_rd_mdio_csr(addr, MIIM_INDICATOR_ADDR, &status);
+	} while ((status & BUSY_MASK) && timeout--);
+
+	xgene_enet_wr_mdio_csr(addr, MIIM_COMMAND_ADDR, 0);
+
+	return 0;
+}
+
+static int xgene_xfi_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	void __iomem *addr = (void __iomem *)bus->priv;
+	u32 data, status, val;
+	int timeout = 100;
+
+	val = SET_VAL(HSTPHYADX, phy_id) | SET_VAL(HSTREGADX, reg);
+	xgene_enet_wr_mdio_csr(addr, MIIM_FIELD_ADDR, val);
+
+	val = HSTLDCMD | SET_VAL(HSTMIIMCMD, MIIM_CMD_LEGACY_READ);
+	xgene_enet_wr_mdio_csr(addr, MIIM_COMMAND_ADDR, val);
+
+	do {
+		usleep_range(5, 10);
+		xgene_enet_rd_mdio_csr(addr, MIIM_INDICATOR_ADDR, &status);
+	} while ((status & BUSY_MASK) && timeout--);
+
+	if (status & BUSY_MASK) {
+		pr_err("XGENET_MII_MGMT write failed\n");
+		return -EBUSY;
+	}
+
+	xgene_enet_rd_mdio_csr(addr, MIIMRD_FIELD_ADDR, &data);
+	xgene_enet_wr_mdio_csr(addr, MIIM_COMMAND_ADDR, 0);
+
+	return data;
+}
+
+struct phy_device *xgene_enet_phy_register(struct mii_bus *bus, int phy_addr)
+{
+	struct phy_device *phy_dev;
+
+	phy_dev = get_phy_device(bus, phy_addr, false);
+	if (!phy_dev || IS_ERR(phy_dev))
+		return NULL;
+
+	if (phy_device_register(phy_dev))
+		phy_device_free(phy_dev);
+
+	return phy_dev;
+}
+EXPORT_SYMBOL(xgene_enet_phy_register);
+
+#ifdef CONFIG_ACPI
+static acpi_status acpi_register_phy(acpi_handle handle, u32 lvl,
+				     void *context, void **ret)
+{
+	struct mii_bus *mdio = context;
+	struct acpi_device *adev;
+	struct phy_device *phy_dev;
+	const union acpi_object *obj;
+	u32 phy_addr;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+
+	if (acpi_dev_get_property(adev, "phy-channel", ACPI_TYPE_INTEGER, &obj))
+		return AE_OK;
+	phy_addr = obj->integer.value;
+
+	phy_dev = xgene_enet_phy_register(mdio, phy_addr);
+	adev->driver_data = phy_dev;
+
+	return AE_OK;
+}
+#endif
+
+static int xgene_mdio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mii_bus *mdio_bus;
+	const struct of_device_id *of_id;
+	struct resource *res;
+	struct xgene_mdio_pdata *pdata;
+	void __iomem *csr_base;
+	int mdio_id = 0, ret = 0;
+
+	of_id = of_match_device(xgene_mdio_of_match, &pdev->dev);
+	if (of_id) {
+		mdio_id = (enum xgene_mdio_id)of_id->data;
+	} else {
+#ifdef CONFIG_ACPI
+		const struct acpi_device_id *acpi_id;
+
+		acpi_id = acpi_match_device(xgene_mdio_acpi_match, &pdev->dev);
+		if (acpi_id)
+			mdio_id = (enum xgene_mdio_id)acpi_id->driver_data;
+#endif
+	}
+
+	if (!mdio_id)
+		return -ENODEV;
+
+	pdata = devm_kzalloc(dev, sizeof(struct xgene_mdio_pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+	pdata->mdio_id = mdio_id;
+	pdata->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	csr_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(csr_base)) {
+		dev_err(dev, "Unable to retrieve mac CSR region\n");
+		return PTR_ERR(csr_base);
+	}
+	pdata->mac_csr_addr = csr_base;
+	pdata->mdio_csr_addr = csr_base + BLOCK_XG_MDIO_CSR_OFFSET;
+	pdata->diag_csr_addr = csr_base + BLOCK_DIAG_CSR_OFFSET;
+
+	if (dev->of_node) {
+		pdata->clk = devm_clk_get(dev, NULL);
+		if (IS_ERR(pdata->clk)) {
+			dev_err(dev, "Unable to retrieve clk\n");
+			return PTR_ERR(pdata->clk);
+		}
+	}
+
+	ret = xgene_mdio_reset(pdata);
+	if (ret)
+		return ret;
+
+	mdio_bus = mdiobus_alloc();
+	if (!mdio_bus)
+		return -ENOMEM;
+
+	mdio_bus->name = "APM X-Gene MDIO bus";
+
+	if (mdio_id == XGENE_MDIO_RGMII) {
+		mdio_bus->read = xgene_mdio_rgmii_read;
+		mdio_bus->write = xgene_mdio_rgmii_write;
+		mdio_bus->priv = (void __force *)pdata->mac_csr_addr;
+		snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s",
+			 "xgene-mii-rgmii");
+	} else {
+		mdio_bus->read = xgene_xfi_mdio_read;
+		mdio_bus->write = xgene_xfi_mdio_write;
+		mdio_bus->priv = (void __force *)pdata->mdio_csr_addr;
+		snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s",
+			 "xgene-mii-xfi");
+	}
+
+	mdio_bus->parent = dev;
+	platform_set_drvdata(pdev, pdata);
+
+	if (dev->of_node) {
+		ret = of_mdiobus_register(mdio_bus, dev->of_node);
+	} else {
+#ifdef CONFIG_ACPI
+		/* Mask out all PHYs from auto probing. */
+		mdio_bus->phy_mask = ~0;
+		ret = mdiobus_register(mdio_bus);
+		if (ret)
+			goto out;
+
+		acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
+				    acpi_register_phy, NULL, mdio_bus, NULL);
+#endif
+	}
+
+	if (ret)
+		goto out;
+
+	pdata->mdio_bus = mdio_bus;
+	xgene_mdio_status = true;
+
+	return 0;
+
+out:
+	mdiobus_free(mdio_bus);
+
+	return ret;
+}
+
+static int xgene_mdio_remove(struct platform_device *pdev)
+{
+	struct xgene_mdio_pdata *pdata = platform_get_drvdata(pdev);
+	struct mii_bus *mdio_bus = pdata->mdio_bus;
+	struct device *dev = &pdev->dev;
+
+	mdiobus_unregister(mdio_bus);
+	mdiobus_free(mdio_bus);
+
+	if (dev->of_node) {
+		if (IS_ERR(pdata->clk))
+			clk_disable_unprepare(pdata->clk);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id xgene_mdio_of_match[] = {
+	{
+		.compatible = "apm,xgene-mdio-rgmii",
+		.data = (void *)XGENE_MDIO_RGMII
+	},
+	{
+		.compatible = "apm,xgene-mdio-xfi",
+		.data = (void *)XGENE_MDIO_XFI
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, xgene_mdio_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_mdio_acpi_match[] = {
+	{ "APMC0D65", XGENE_MDIO_RGMII },
+	{ "APMC0D66", XGENE_MDIO_XFI },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(acpi, xgene_mdio_acpi_match);
+#endif
+
+static struct platform_driver xgene_mdio_driver = {
+	.driver = {
+		.name = "xgene-mdio",
+		.of_match_table = of_match_ptr(xgene_mdio_of_match),
+		.acpi_match_table = ACPI_PTR(xgene_mdio_acpi_match),
+	},
+	.probe = xgene_mdio_probe,
+	.remove = xgene_mdio_remove,
+};
+
+module_platform_driver(xgene_mdio_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC MDIO driver");
+MODULE_AUTHOR("Iyappan Subramanian <isubramanian@apm.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/mdio-xgene.h b/drivers/net/phy/mdio-xgene.h
new file mode 100644
index 000000000000..354241b53c1d
--- /dev/null
+++ b/drivers/net/phy/mdio-xgene.h
@@ -0,0 +1,143 @@
+/* Applied Micro X-Gene SoC MDIO Driver
+ *
+ * Copyright (c) 2016, Applied Micro Circuits Corporation
+ * Author: Iyappan Subramanian <isubramanian@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MDIO_XGENE_H__
+#define __MDIO_XGENE_H__
+
+#define BLOCK_XG_MDIO_CSR_OFFSET	0x5000
+#define BLOCK_DIAG_CSR_OFFSET		0xd000
+#define XGENET_CONFIG_REG_ADDR		0x20
+
+#define MAC_ADDR_REG_OFFSET		0x00
+#define MAC_COMMAND_REG_OFFSET		0x04
+#define MAC_WRITE_REG_OFFSET		0x08
+#define MAC_READ_REG_OFFSET		0x0c
+#define MAC_COMMAND_DONE_REG_OFFSET	0x10
+
+#define CLKEN_OFFSET			0x08
+#define SRST_OFFSET			0x00
+
+#define MENET_CFG_MEM_RAM_SHUTDOWN_ADDR	0x70
+#define MENET_BLOCK_MEM_RDY_ADDR	0x74
+
+#define MAC_CONFIG_1_ADDR		0x00
+#define MII_MGMT_COMMAND_ADDR		0x24
+#define MII_MGMT_ADDRESS_ADDR		0x28
+#define MII_MGMT_CONTROL_ADDR		0x2c
+#define MII_MGMT_STATUS_ADDR		0x30
+#define MII_MGMT_INDICATORS_ADDR	0x34
+#define SOFT_RESET			BIT(31)
+
+#define MII_MGMT_CONFIG_ADDR            0x20
+#define MII_MGMT_COMMAND_ADDR           0x24
+#define MII_MGMT_ADDRESS_ADDR           0x28
+#define MII_MGMT_CONTROL_ADDR           0x2c
+#define MII_MGMT_STATUS_ADDR            0x30
+#define MII_MGMT_INDICATORS_ADDR        0x34
+
+#define MIIM_COMMAND_ADDR               0x20
+#define MIIM_FIELD_ADDR                 0x24
+#define MIIM_CONFIGURATION_ADDR         0x28
+#define MIIM_LINKFAILVECTOR_ADDR        0x2c
+#define MIIM_INDICATOR_ADDR             0x30
+#define MIIMRD_FIELD_ADDR               0x34
+
+#define MDIO_CSR_OFFSET			0x5000
+
+#define REG_ADDR_POS			0
+#define REG_ADDR_LEN			5
+#define PHY_ADDR_POS			8
+#define PHY_ADDR_LEN			5
+
+#define HSTMIIMWRDAT_POS		0
+#define HSTMIIMWRDAT_LEN		16
+#define HSTPHYADX_POS			23
+#define HSTPHYADX_LEN			5
+#define HSTREGADX_POS			18
+#define HSTREGADX_LEN			5
+#define HSTLDCMD			BIT(3)
+#define HSTMIIMCMD_POS			0
+#define HSTMIIMCMD_LEN			3
+
+#define BUSY_MASK			BIT(0)
+#define READ_CYCLE_MASK			BIT(0)
+
+enum xgene_enet_cmd {
+	XGENE_ENET_WR_CMD = BIT(31),
+	XGENE_ENET_RD_CMD = BIT(30)
+};
+
+enum {
+	MIIM_CMD_IDLE,
+	MIIM_CMD_LEGACY_WRITE,
+	MIIM_CMD_LEGACY_READ,
+};
+
+enum xgene_mdio_id {
+	XGENE_MDIO_RGMII = 1,
+	XGENE_MDIO_XFI
+};
+
+struct xgene_mdio_pdata {
+	struct clk *clk;
+	struct device *dev;
+	void __iomem *mac_csr_addr;
+	void __iomem *diag_csr_addr;
+	void __iomem *mdio_csr_addr;
+	struct mii_bus *mdio_bus;
+	int mdio_id;
+};
+
+/* Set the specified value into a bit-field defined by its starting position
+ * and length within a single u64.
+ */
+static inline u64 xgene_enet_set_field_value(int pos, int len, u64 val)
+{
+	return (val & ((1ULL << len) - 1)) << pos;
+}
+
+#define SET_VAL(field, val) \
+		xgene_enet_set_field_value(field ## _POS, field ## _LEN, val)
+
+#define SET_BIT(field) \
+		xgene_enet_set_field_value(field ## _POS, 1, 1)
+
+/* Get the value from a bit-field defined by its starting position
+ * and length within the specified u64.
+ */
+static inline u64 xgene_enet_get_field_value(int pos, int len, u64 src)
+{
+	return (src >> pos) & ((1ULL << len) - 1);
+}
+
+#define GET_VAL(field, src) \
+		xgene_enet_get_field_value(field ## _POS, field ## _LEN, src)
+
+#define GET_BIT(field, src) \
+		xgene_enet_get_field_value(field ## _POS, 1, src)
+
+static const struct of_device_id xgene_mdio_of_match[];
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_mdio_acpi_match[];
+#endif
+int xgene_mdio_rgmii_read(struct mii_bus *bus, int phy_id, int reg);
+int xgene_mdio_rgmii_write(struct mii_bus *bus, int phy_id, int reg, u16 data);
+struct phy_device *xgene_enet_phy_register(struct mii_bus *bus, int phy_addr);
+
+#endif  /* __MDIO_XGENE_H__ */
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 5a8fefc25157..059f13b60fe0 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -311,6 +311,36 @@ static int kszphy_config_init(struct phy_device *phydev)
 	return 0;
 }
 
+static int ksz8041_config_init(struct phy_device *phydev)
+{
+	struct device_node *of_node = phydev->mdio.dev.of_node;
+
+	/* Limit supported and advertised modes in fiber mode */
+	if (of_property_read_bool(of_node, "micrel,fiber-mode")) {
+		phydev->dev_flags |= MICREL_PHY_FXEN;
+		phydev->supported &= SUPPORTED_FIBRE |
+				     SUPPORTED_100baseT_Full |
+				     SUPPORTED_100baseT_Half;
+		phydev->advertising &= ADVERTISED_FIBRE |
+				       ADVERTISED_100baseT_Full |
+				       ADVERTISED_100baseT_Half;
+		phydev->autoneg = AUTONEG_DISABLE;
+	}
+
+	return kszphy_config_init(phydev);
+}
+
+static int ksz8041_config_aneg(struct phy_device *phydev)
+{
+	/* Skip auto-negotiation in fiber mode */
+	if (phydev->dev_flags & MICREL_PHY_FXEN) {
+		phydev->speed = SPEED_100;
+		return 0;
+	}
+
+	return genphy_config_aneg(phydev);
+}
+
 static int ksz9021_load_values_from_of(struct phy_device *phydev,
 				       const struct device_node *of_node,
 				       u16 reg,
@@ -788,8 +818,8 @@ static struct phy_driver ksphy_driver[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8041_type,
 	.probe		= kszphy_probe,
-	.config_init	= kszphy_config_init,
-	.config_aneg	= genphy_config_aneg,
+	.config_init	= ksz8041_config_init,
+	.config_aneg	= ksz8041_config_aneg,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 2e21e9366f76..b62c4aaee40b 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -75,22 +75,13 @@ static int smsc_phy_reset(struct phy_device *phydev)
 	 * in all capable mode before using it.
 	 */
 	if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) {
-		int timeout = 50000;
-
-		/* set "all capable" mode and reset the phy */
+		/* set "all capable" mode */
 		rc |= MII_LAN83C185_MODE_ALL;
 		phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc);
-		phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-		/* wait end of reset (max 500 ms) */
-		do {
-			udelay(10);
-			if (timeout-- == 0)
-				return -1;
-			rc = phy_read(phydev, MII_BMCR);
-		} while (rc & BMCR_RESET);
 	}
-	return 0;
+
+	/* reset the phy */
+	return genphy_soft_reset(phydev);
 }
 
 static int lan911x_config_init(struct phy_device *phydev)
diff --git a/drivers/net/phy/swphy.c b/drivers/net/phy/swphy.c
new file mode 100644
index 000000000000..34f58f2349e9
--- /dev/null
+++ b/drivers/net/phy/swphy.c
@@ -0,0 +1,179 @@
+/*
+ * Software PHY emulation
+ *
+ * Code taken from fixed_phy.c by Russell King <rmk+kernel@arm.linux.org.uk>
+ *
+ * Author: Vitaly Bordug <vbordug@ru.mvista.com>
+ *         Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * Copyright (c) 2006-2007 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/export.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
+
+#include "swphy.h"
+
+#define MII_REGS_NUM 29
+
+struct swmii_regs {
+	u16 bmcr;
+	u16 bmsr;
+	u16 lpa;
+	u16 lpagb;
+};
+
+enum {
+	SWMII_SPEED_10 = 0,
+	SWMII_SPEED_100,
+	SWMII_SPEED_1000,
+	SWMII_DUPLEX_HALF = 0,
+	SWMII_DUPLEX_FULL,
+};
+
+/*
+ * These two tables get bitwise-anded together to produce the final result.
+ * This means the speed table must contain both duplex settings, and the
+ * duplex table must contain all speed settings.
+ */
+static const struct swmii_regs speed[] = {
+	[SWMII_SPEED_10] = {
+		.bmcr  = BMCR_FULLDPLX,
+		.lpa   = LPA_10FULL | LPA_10HALF,
+	},
+	[SWMII_SPEED_100] = {
+		.bmcr  = BMCR_FULLDPLX | BMCR_SPEED100,
+		.bmsr  = BMSR_100FULL | BMSR_100HALF,
+		.lpa   = LPA_100FULL | LPA_100HALF,
+	},
+	[SWMII_SPEED_1000] = {
+		.bmcr  = BMCR_FULLDPLX | BMCR_SPEED1000,
+		.bmsr  = BMSR_ESTATEN,
+		.lpagb = LPA_1000FULL | LPA_1000HALF,
+	},
+};
+
+static const struct swmii_regs duplex[] = {
+	[SWMII_DUPLEX_HALF] = {
+		.bmcr  = ~BMCR_FULLDPLX,
+		.bmsr  = BMSR_ESTATEN | BMSR_100HALF,
+		.lpa   = LPA_10HALF | LPA_100HALF,
+		.lpagb = LPA_1000HALF,
+	},
+	[SWMII_DUPLEX_FULL] = {
+		.bmcr  = ~0,
+		.bmsr  = BMSR_ESTATEN | BMSR_100FULL,
+		.lpa   = LPA_10FULL | LPA_100FULL,
+		.lpagb = LPA_1000FULL,
+	},
+};
+
+static int swphy_decode_speed(int speed)
+{
+	switch (speed) {
+	case 1000:
+		return SWMII_SPEED_1000;
+	case 100:
+		return SWMII_SPEED_100;
+	case 10:
+		return SWMII_SPEED_10;
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * swphy_validate_state - validate the software phy status
+ * @state: software phy status
+ *
+ * This checks that we can represent the state stored in @state can be
+ * represented in the emulated MII registers.  Returns 0 if it can,
+ * otherwise returns -EINVAL.
+ */
+int swphy_validate_state(const struct fixed_phy_status *state)
+{
+	int err;
+
+	if (state->link) {
+		err = swphy_decode_speed(state->speed);
+		if (err < 0) {
+			pr_warn("swphy: unknown speed\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(swphy_validate_state);
+
+/**
+ * swphy_read_reg - return a MII register from the fixed phy state
+ * @reg: MII register
+ * @state: fixed phy status
+ *
+ * Return the MII @reg register generated from the fixed phy state @state.
+ */
+int swphy_read_reg(int reg, const struct fixed_phy_status *state)
+{
+	int speed_index, duplex_index;
+	u16 bmsr = BMSR_ANEGCAPABLE;
+	u16 bmcr = 0;
+	u16 lpagb = 0;
+	u16 lpa = 0;
+
+	if (reg > MII_REGS_NUM)
+		return -1;
+
+	speed_index = swphy_decode_speed(state->speed);
+	if (WARN_ON(speed_index < 0))
+		return 0;
+
+	duplex_index = state->duplex ? SWMII_DUPLEX_FULL : SWMII_DUPLEX_HALF;
+
+	bmsr |= speed[speed_index].bmsr & duplex[duplex_index].bmsr;
+
+	if (state->link) {
+		bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE;
+
+		bmcr  |= speed[speed_index].bmcr  & duplex[duplex_index].bmcr;
+		lpa   |= speed[speed_index].lpa   & duplex[duplex_index].lpa;
+		lpagb |= speed[speed_index].lpagb & duplex[duplex_index].lpagb;
+
+		if (state->pause)
+			lpa |= LPA_PAUSE_CAP;
+
+		if (state->asym_pause)
+			lpa |= LPA_PAUSE_ASYM;
+	}
+
+	switch (reg) {
+	case MII_BMCR:
+		return bmcr;
+	case MII_BMSR:
+		return bmsr;
+	case MII_PHYSID1:
+	case MII_PHYSID2:
+		return 0;
+	case MII_LPA:
+		return lpa;
+	case MII_STAT1000:
+		return lpagb;
+
+	/*
+	 * We do not support emulating Clause 45 over Clause 22 register
+	 * reads.  Return an error instead of bogus data.
+	 */
+	case MII_MMD_CTRL:
+	case MII_MMD_DATA:
+		return -1;
+
+	default:
+		return 0xffff;
+	}
+}
+EXPORT_SYMBOL_GPL(swphy_read_reg);
diff --git a/drivers/net/phy/swphy.h b/drivers/net/phy/swphy.h
new file mode 100644
index 000000000000..2f09ac324e18
--- /dev/null
+++ b/drivers/net/phy/swphy.h
@@ -0,0 +1,9 @@
+#ifndef SWPHY_H
+#define SWPHY_H
+
+struct fixed_phy_status;
+
+int swphy_validate_state(const struct fixed_phy_status *state);
+int swphy_read_reg(int reg, const struct fixed_phy_status *state);
+
+#endif
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 8dedafa1a95d..f226db4616b7 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1312,10 +1312,9 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64)
 	return stats64;
 }
 
-static struct lock_class_key ppp_tx_busylock;
 static int ppp_dev_init(struct net_device *dev)
 {
-	dev->qdisc_tx_busylock = &ppp_tx_busylock;
+	netdev_lockdep_set_classes(dev);
 	return 0;
 }
 
@@ -2601,8 +2600,6 @@ ppp_unregister_channel(struct ppp_channel *chan)
 	spin_lock_bh(&pn->all_channels_lock);
 	list_del(&pch->list);
 	spin_unlock_bh(&pn->all_channels_lock);
-	put_net(pch->chan_net);
-	pch->chan_net = NULL;
 
 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);
@@ -3136,6 +3133,9 @@ ppp_disconnect_channel(struct channel *pch)
  */
 static void ppp_destroy_channel(struct channel *pch)
 {
+	put_net(pch->chan_net);
+	pch->chan_net = NULL;
+
 	atomic_dec(&channel_count);
 
 	if (!pch->file.dead) {
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 2ace126533cd..a380649bf6b5 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1203,8 +1203,10 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_dev_open;
 	}
 
+	netif_addr_lock_bh(dev);
 	dev_uc_sync_multiple(port_dev, dev);
 	dev_mc_sync_multiple(port_dev, dev);
+	netif_addr_unlock_bh(dev);
 
 	err = vlan_vids_add_by_dev(port_dev, dev);
 	if (err) {
@@ -1574,23 +1576,6 @@ static const struct team_option team_options[] = {
 	},
 };
 
-static struct lock_class_key team_netdev_xmit_lock_key;
-static struct lock_class_key team_netdev_addr_lock_key;
-static struct lock_class_key team_tx_busylock_key;
-
-static void team_set_lockdep_class_one(struct net_device *dev,
-				       struct netdev_queue *txq,
-				       void *unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &team_netdev_xmit_lock_key);
-}
-
-static void team_set_lockdep_class(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &team_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, team_set_lockdep_class_one, NULL);
-	dev->qdisc_tx_busylock = &team_tx_busylock_key;
-}
 
 static int team_init(struct net_device *dev)
 {
@@ -1626,7 +1611,7 @@ static int team_init(struct net_device *dev)
 		goto err_options_register;
 	netif_carrier_off(dev);
 
-	team_set_lockdep_class(dev);
+	netdev_lockdep_set_classes(dev);
 
 	return 0;
 
@@ -2017,6 +2002,8 @@ static const struct net_device_ops team_netdev_ops = {
 	.ndo_add_slave		= team_add_slave,
 	.ndo_del_slave		= team_del_slave,
 	.ndo_fix_features	= team_fix_features,
+	.ndo_neigh_construct	= netdev_default_l2upper_neigh_construct,
+	.ndo_neigh_destroy	= netdev_default_l2upper_neigh_destroy,
 	.ndo_change_carrier     = team_change_carrier,
 	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
 	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e16487cc6a9a..9c8b5bc2b9d8 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -71,6 +71,7 @@
 #include <net/sock.h>
 #include <linux/seq_file.h>
 #include <linux/uio.h>
+#include <linux/skb_array.h>
 
 #include <asm/uaccess.h>
 
@@ -167,6 +168,7 @@ struct tun_file {
 	};
 	struct list_head next;
 	struct tun_struct *detached;
+	struct skb_array tx_array;
 };
 
 struct tun_flow_entry {
@@ -515,7 +517,11 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
 
 static void tun_queue_purge(struct tun_file *tfile)
 {
-	skb_queue_purge(&tfile->sk.sk_receive_queue);
+	struct sk_buff *skb;
+
+	while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
+		kfree_skb(skb);
+
 	skb_queue_purge(&tfile->sk.sk_error_queue);
 }
 
@@ -560,6 +566,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 			    tun->dev->reg_state == NETREG_REGISTERED)
 				unregister_netdevice(tun->dev);
 		}
+		if (tun)
+			skb_array_cleanup(&tfile->tx_array);
 		sock_put(&tfile->sk);
 	}
 }
@@ -613,6 +621,7 @@ static void tun_detach_all(struct net_device *dev)
 static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
 {
 	struct tun_file *tfile = file->private_data;
+	struct net_device *dev = tun->dev;
 	int err;
 
 	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
@@ -642,6 +651,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
 		if (!err)
 			goto out;
 	}
+
+	if (!tfile->detached &&
+	    skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
+		err = -ENOMEM;
+		goto out;
+	}
+
 	tfile->queue_index = tun->numqueues;
 	tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
 	rcu_assign_pointer(tfile->tun, tun);
@@ -891,8 +907,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	nf_reset(skb);
 
-	/* Enqueue packet */
-	skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb);
+	if (skb_array_produce(&tfile->tx_array, skb))
+		goto drop;
 
 	/* Notify and wake up reader process */
 	if (tfile->flags & TUN_FASYNC)
@@ -1107,7 +1123,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, sk_sleep(sk), wait);
 
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_array_empty(&tfile->tx_array))
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(sk) ||
@@ -1254,13 +1270,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		return -EFAULT;
 	}
 
-	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-		if (!skb_partial_csum_set(skb, tun16_to_cpu(tun, gso.csum_start),
-					  tun16_to_cpu(tun, gso.csum_offset))) {
-			this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
-			kfree_skb(skb);
-			return -EINVAL;
-		}
+	err = virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun));
+	if (err) {
+		this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
+		kfree_skb(skb);
+		return -EINVAL;
 	}
 
 	switch (tun->flags & TUN_TYPE_MASK) {
@@ -1289,39 +1303,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		break;
 	}
 
-	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		pr_debug("GSO!\n");
-		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
-		case VIRTIO_NET_HDR_GSO_TCPV4:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-			break;
-		case VIRTIO_NET_HDR_GSO_TCPV6:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-			break;
-		case VIRTIO_NET_HDR_GSO_UDP:
-			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-			break;
-		default:
-			this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
-			kfree_skb(skb);
-			return -EINVAL;
-		}
-
-		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
-
-		skb_shinfo(skb)->gso_size = tun16_to_cpu(tun, gso.gso_size);
-		if (skb_shinfo(skb)->gso_size == 0) {
-			this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
-			kfree_skb(skb);
-			return -EINVAL;
-		}
-
-		/* Header must be checked, and gso_segs computed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
-	}
-
 	/* copy skb_ubuf_info for callback when skb has no error */
 	if (zerocopy) {
 		skb_shinfo(skb)->destructor_arg = msg_control;
@@ -1399,46 +1380,26 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 
 	if (vnet_hdr_sz) {
 		struct virtio_net_hdr gso = { 0 }; /* no info leak */
+		int ret;
+
 		if (iov_iter_count(iter) < vnet_hdr_sz)
 			return -EINVAL;
 
-		if (skb_is_gso(skb)) {
+		ret = virtio_net_hdr_from_skb(skb, &gso,
+					      tun_is_little_endian(tun));
+		if (ret) {
 			struct skb_shared_info *sinfo = skb_shinfo(skb);
-
-			/* This is a hint as to how much should be linear. */
-			gso.hdr_len = cpu_to_tun16(tun, skb_headlen(skb));
-			gso.gso_size = cpu_to_tun16(tun, sinfo->gso_size);
-			if (sinfo->gso_type & SKB_GSO_TCPV4)
-				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-			else if (sinfo->gso_type & SKB_GSO_TCPV6)
-				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-			else if (sinfo->gso_type & SKB_GSO_UDP)
-				gso.gso_type = VIRTIO_NET_HDR_GSO_UDP;
-			else {
-				pr_err("unexpected GSO type: "
-				       "0x%x, gso_size %d, hdr_len %d\n",
-				       sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
-				       tun16_to_cpu(tun, gso.hdr_len));
-				print_hex_dump(KERN_ERR, "tun: ",
-					       DUMP_PREFIX_NONE,
-					       16, 1, skb->head,
-					       min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
-				WARN_ON_ONCE(1);
-				return -EINVAL;
-			}
-			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
-				gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
-		} else
-			gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
-		if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-			gso.csum_start = cpu_to_tun16(tun, skb_checksum_start_offset(skb) +
-						      vlan_hlen);
-			gso.csum_offset = cpu_to_tun16(tun, skb->csum_offset);
-		} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-			gso.flags = VIRTIO_NET_HDR_F_DATA_VALID;
-		} /* else everything is zero */
+			pr_err("unexpected GSO type: "
+			       "0x%x, gso_size %d, hdr_len %d\n",
+			       sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
+			       tun16_to_cpu(tun, gso.hdr_len));
+			print_hex_dump(KERN_ERR, "tun: ",
+				       DUMP_PREFIX_NONE,
+				       16, 1, skb->head,
+				       min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+			WARN_ON_ONCE(1);
+			return -EINVAL;
+		}
 
 		if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
 			return -EFAULT;
@@ -1481,22 +1442,63 @@ done:
 	return total;
 }
 
+static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
+				     int *err)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sk_buff *skb = NULL;
+	int error = 0;
+
+	skb = skb_array_consume(&tfile->tx_array);
+	if (skb)
+		goto out;
+	if (noblock) {
+		error = -EAGAIN;
+		goto out;
+	}
+
+	add_wait_queue(&tfile->wq.wait, &wait);
+	current->state = TASK_INTERRUPTIBLE;
+
+	while (1) {
+		skb = skb_array_consume(&tfile->tx_array);
+		if (skb)
+			break;
+		if (signal_pending(current)) {
+			error = -ERESTARTSYS;
+			break;
+		}
+		if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) {
+			error = -EFAULT;
+			break;
+		}
+
+		schedule();
+	}
+
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&tfile->wq.wait, &wait);
+
+out:
+	*err = error;
+	return skb;
+}
+
 static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 			   struct iov_iter *to,
 			   int noblock)
 {
 	struct sk_buff *skb;
 	ssize_t ret;
-	int peeked, err, off = 0;
+	int err;
 
 	tun_debug(KERN_INFO, tun, "tun_do_read\n");
 
 	if (!iov_iter_count(to))
 		return 0;
 
-	/* Read frames from queue */
-	skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0,
-				  &peeked, &off, &err);
+	/* Read frames from ring */
+	skb = tun_ring_recv(tfile, noblock, &err);
 	if (!skb)
 		return err;
 
@@ -1629,8 +1631,25 @@ out:
 	return ret;
 }
 
+static int tun_peek_len(struct socket *sock)
+{
+	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
+	struct tun_struct *tun;
+	int ret = 0;
+
+	tun = __tun_get(tfile);
+	if (!tun)
+		return 0;
+
+	ret = skb_array_peek_len(&tfile->tx_array);
+	tun_put(tun);
+
+	return ret;
+}
+
 /* Ops structure to mimic raw sockets with tun */
 static const struct proto_ops tun_socket_ops = {
+	.peek_len = tun_peek_len,
 	.sendmsg = tun_sendmsg,
 	.recvmsg = tun_recvmsg,
 };
@@ -2452,6 +2471,56 @@ static const struct ethtool_ops tun_ethtool_ops = {
 	.get_ts_info	= ethtool_op_get_ts_info,
 };
 
+static int tun_queue_resize(struct tun_struct *tun)
+{
+	struct net_device *dev = tun->dev;
+	struct tun_file *tfile;
+	struct skb_array **arrays;
+	int n = tun->numqueues + tun->numdisabled;
+	int ret, i;
+
+	arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
+	if (!arrays)
+		return -ENOMEM;
+
+	for (i = 0; i < tun->numqueues; i++) {
+		tfile = rtnl_dereference(tun->tfiles[i]);
+		arrays[i] = &tfile->tx_array;
+	}
+	list_for_each_entry(tfile, &tun->disabled, next)
+		arrays[i++] = &tfile->tx_array;
+
+	ret = skb_array_resize_multiple(arrays, n,
+					dev->tx_queue_len, GFP_KERNEL);
+
+	kfree(arrays);
+	return ret;
+}
+
+static int tun_device_event(struct notifier_block *unused,
+			    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct tun_struct *tun = netdev_priv(dev);
+
+	if (dev->rtnl_link_ops != &tun_link_ops)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_CHANGE_TX_QUEUE_LEN:
+		if (tun_queue_resize(tun))
+			return NOTIFY_BAD;
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block tun_notifier_block __read_mostly = {
+	.notifier_call	= tun_device_event,
+};
 
 static int __init tun_init(void)
 {
@@ -2471,6 +2540,8 @@ static int __init tun_init(void)
 		pr_err("Can't register misc device %d\n", TUN_MINOR);
 		goto err_misc;
 	}
+
+	register_netdevice_notifier(&tun_notifier_block);
 	return  0;
 err_misc:
 	rtnl_link_unregister(&tun_link_ops);
@@ -2482,6 +2553,7 @@ static void tun_cleanup(void)
 {
 	misc_deregister(&tun_miscdev);
 	rtnl_link_unregister(&tun_link_ops);
+	unregister_netdevice_notifier(&tun_notifier_block);
 }
 
 /* Get an underlying socket object from tun file.  Returns error unless file is
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index cf77f2dffa69..163a2c576e69 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -149,24 +149,6 @@ static const struct net_device_ops ax88172a_netdev_ops = {
 	.ndo_set_rx_mode        = asix_set_multicast,
 };
 
-static int ax88172a_get_settings(struct net_device *net,
-				 struct ethtool_cmd *cmd)
-{
-	if (!net->phydev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(net->phydev, cmd);
-}
-
-static int ax88172a_set_settings(struct net_device *net,
-				 struct ethtool_cmd *cmd)
-{
-	if (!net->phydev)
-		return -ENODEV;
-
-	return phy_ethtool_sset(net->phydev, cmd);
-}
-
 static int ax88172a_nway_reset(struct net_device *net)
 {
 	if (!net->phydev)
@@ -185,9 +167,9 @@ static const struct ethtool_ops ax88172a_ethtool_ops = {
 	.get_eeprom_len		= asix_get_eeprom_len,
 	.get_eeprom		= asix_get_eeprom,
 	.set_eeprom		= asix_set_eeprom,
-	.get_settings		= ax88172a_get_settings,
-	.set_settings		= ax88172a_set_settings,
 	.nway_reset		= ax88172a_nway_reset,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
 static int ax88172a_reset_phy(struct usbnet *dev, int embd_phy)
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 7cba2c3759df..c47ec0a04c8e 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -388,6 +388,12 @@ void usbnet_cdc_status(struct usbnet *dev, struct urb *urb)
 	case USB_CDC_NOTIFY_NETWORK_CONNECTION:
 		netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n",
 			  event->wValue ? "on" : "off");
+
+		/* Work-around for devices with broken off-notifications */
+		if (event->wValue &&
+		    !test_bit(__LINK_STATE_NOCARRIER, &dev->net->state))
+			usbnet_link_change(dev, 0, 0);
+
 		usbnet_link_change(dev, !!event->wValue, 0);
 		break;
 	case USB_CDC_NOTIFY_SPEED_CHANGE:	/* tx/rx rates */
@@ -432,6 +438,34 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(usbnet_cdc_bind);
 
+static int usbnet_cdc_zte_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	int status = usbnet_cdc_bind(dev, intf);
+
+	if (!status && (dev->net->dev_addr[0] & 0x02))
+		eth_hw_addr_random(dev->net);
+
+	return status;
+}
+
+/* Make sure packets have correct destination MAC address
+ *
+ * A firmware bug observed on some devices (ZTE MF823/831/910) is that the
+ * device sends packets with a static, bogus, random MAC address (event if
+ * device MAC address has been updated). Always set MAC address to that of the
+ * device.
+ */
+static int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+{
+	if (skb->len < ETH_HLEN || !(skb->data[0] & 0x02))
+		return 1;
+
+	skb_reset_mac_header(skb);
+	ether_addr_copy(eth_hdr(skb)->h_dest, dev->net->dev_addr);
+
+	return 1;
+}
+
 static const struct driver_info	cdc_info = {
 	.description =	"CDC Ethernet Device",
 	.flags =	FLAG_ETHER | FLAG_POINTTOPOINT,
@@ -442,6 +476,17 @@ static const struct driver_info	cdc_info = {
 	.manage_power =	usbnet_manage_power,
 };
 
+static const struct driver_info	zte_cdc_info = {
+	.description =	"ZTE CDC Ethernet Device",
+	.flags =	FLAG_ETHER | FLAG_POINTTOPOINT,
+	.bind =		usbnet_cdc_zte_bind,
+	.unbind =	usbnet_cdc_unbind,
+	.status =	usbnet_cdc_status,
+	.set_rx_mode =	usbnet_cdc_update_filter,
+	.manage_power =	usbnet_manage_power,
+	.rx_fixup = usbnet_cdc_zte_rx_fixup,
+};
+
 static const struct driver_info wwan_info = {
 	.description =	"Mobile Broadband Network Device",
 	.flags =	FLAG_WWAN,
@@ -706,6 +751,12 @@ static const struct usb_device_id	products[] = {
 	USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x81ba, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
 	.driver_info = (kernel_ulong_t)&wwan_info,
+}, {
+	/* ZTE modules */
+	USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, USB_CLASS_COMM,
+				      USB_CDC_SUBCLASS_ETHERNET,
+				      USB_CDC_PROTO_NONE),
+	.driver_info = (unsigned long)&zte_cdc_info,
 }, {
 	USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET,
 			USB_CDC_PROTO_NONE),
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 53759c315b97..877c9516e781 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -854,6 +854,13 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	if (cdc_ncm_init(dev))
 		goto error2;
 
+	/* Some firmwares need a pause here or they will silently fail
+	 * to set up the interface properly.  This value was decided
+	 * empirically on a Sierra Wireless MC7455 running 02.08.02.00
+	 * firmware.
+	 */
+	usleep_range(10000, 20000);
+
 	/* configure data interface */
 	temp = usb_set_interface(dev->udev, iface_no, data_altsetting);
 	if (temp) {
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 3f9f6ed3eec4..f41a8ad4740e 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -26,12 +26,13 @@
 #include <linux/mdio.h>
 #include <linux/usb/cdc.h>
 #include <linux/suspend.h>
+#include <linux/acpi.h>
 
 /* Information for net-next */
 #define NETNEXT_VERSION		"08"
 
 /* Information for net */
-#define NET_VERSION		"3"
+#define NET_VERSION		"5"
 
 #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -116,6 +117,7 @@
 #define USB_TX_DMA		0xd434
 #define USB_TOLERANCE		0xd490
 #define USB_LPM_CTRL		0xd41a
+#define USB_BMU_RESET		0xd4b0
 #define USB_UPS_CTRL		0xd800
 #define USB_MISC_0		0xd81a
 #define USB_POWER_CUT		0xd80a
@@ -338,6 +340,10 @@
 #define TEST_MODE_DISABLE	0x00000001
 #define TX_SIZE_ADJUST1		0x00000100
 
+/* USB_BMU_RESET */
+#define BMU_RESET_EP_IN		0x01
+#define BMU_RESET_EP_OUT	0x02
+
 /* USB_UPS_CTRL */
 #define POWER_CUT		0x0100
 
@@ -455,6 +461,11 @@
 /* SRAM_IMPEDANCE */
 #define RX_DRIVING_MASK		0x6000
 
+/* MAC PASSTHRU */
+#define AD_MASK			0xfee0
+#define EFUSE			0xcfdb
+#define PASS_THRU_MASK		0x1
+
 enum rtl_register_content {
 	_1000bps	= 0x10,
 	_100bps		= 0x08,
@@ -602,7 +613,7 @@ struct r8152 {
 	struct list_head rx_done, tx_free;
 	struct sk_buff_head tx_queue, rx_queue;
 	spinlock_t rx_lock, tx_lock;
-	struct delayed_work schedule;
+	struct delayed_work schedule, hw_phy_work;
 	struct mii_if_info mii;
 	struct mutex control;	/* use for hw setting */
 #ifdef CONFIG_PM_SLEEP
@@ -619,6 +630,8 @@ struct r8152 {
 		int (*eee_get)(struct r8152 *, struct ethtool_eee *);
 		int (*eee_set)(struct r8152 *, struct ethtool_eee *);
 		bool (*in_nway)(struct r8152 *);
+		void (*hw_phy_cfg)(struct r8152 *);
+		void (*autosuspend_en)(struct r8152 *tp, bool enable);
 	} rtl_ops;
 
 	int intr_interval;
@@ -627,8 +640,11 @@ struct r8152 {
 	u32 tx_qlen;
 	u32 coalesce;
 	u16 ocp_base;
+	u16 speed;
 	u8 *intr_buff;
 	u8 version;
+	u8 duplex;
+	u8 autoneg;
 };
 
 enum rtl_version {
@@ -1030,6 +1046,65 @@ out1:
 	return ret;
 }
 
+/* Devices containing RTL8153-AD can support a persistent
+ * host system provided MAC address.
+ * Examples of this are Dell TB15 and Dell WD15 docks
+ */
+static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	int ret = -EINVAL;
+	u32 ocp_data;
+	unsigned char buf[6];
+
+	/* test for -AD variant of RTL8153 */
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0);
+	if ((ocp_data & AD_MASK) != 0x1000)
+		return -ENODEV;
+
+	/* test for MAC address pass-through bit */
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, EFUSE);
+	if ((ocp_data & PASS_THRU_MASK) != 1)
+		return -ENODEV;
+
+	/* returns _AUXMAC_#AABBCCDDEEFF# */
+	status = acpi_evaluate_object(NULL, "\\_SB.AMAC", NULL, &buffer);
+	obj = (union acpi_object *)buffer.pointer;
+	if (!ACPI_SUCCESS(status))
+		return -ENODEV;
+	if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid buffer when reading pass-thru MAC addr: "
+			   "(%d, %d)\n",
+			   obj->type, obj->string.length);
+		goto amacout;
+	}
+	if (strncmp(obj->string.pointer, "_AUXMAC_#", 9) != 0 ||
+	    strncmp(obj->string.pointer + 0x15, "#", 1) != 0) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid header when reading pass-thru MAC addr\n");
+		goto amacout;
+	}
+	ret = hex2bin(buf, obj->string.pointer + 9, 6);
+	if (!(ret == 0 && is_valid_ether_addr(buf))) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid MAC when reading pass-thru MAC addr: "
+			   "%d, %pM\n", ret, buf);
+		ret = -EINVAL;
+		goto amacout;
+	}
+	memcpy(sa->sa_data, buf, 6);
+	ether_addr_copy(tp->netdev->dev_addr, sa->sa_data);
+	netif_info(tp, probe, tp->netdev,
+		   "Using pass-thru MAC addr %pM\n", sa->sa_data);
+
+amacout:
+	kfree(obj);
+	return ret;
+}
+
 static int set_ethernet_addr(struct r8152 *tp)
 {
 	struct net_device *dev = tp->netdev;
@@ -1038,8 +1113,15 @@ static int set_ethernet_addr(struct r8152 *tp)
 
 	if (tp->version == RTL_VER_01)
 		ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data);
-	else
-		ret = pla_ocp_read(tp, PLA_BACKUP, 8, sa.sa_data);
+	else {
+		/* if this is not an RTL8153-AD, no eFuse mac pass thru set,
+		 * or system doesn't provide valid _SB.AMAC this will be
+		 * be expected to non-zero
+		 */
+		ret = vendor_mac_passthru_addr_read(tp, &sa);
+		if (ret < 0)
+			ret = pla_ocp_read(tp, PLA_BACKUP, 8, sa.sa_data);
+	}
 
 	if (ret < 0) {
 		netif_err(tp, probe, dev, "Get ether addr fail\n");
@@ -1742,7 +1824,7 @@ static int rx_bottom(struct r8152 *tp, int budget)
 			pkt_len -= CRC_SIZE;
 			rx_data += sizeof(struct rx_desc);
 
-			skb = netdev_alloc_skb_ip_align(netdev, pkt_len);
+			skb = napi_alloc_skb(&tp->napi, pkt_len);
 			if (!skb) {
 				stats->rx_dropped++;
 				goto find_next_rx;
@@ -2169,7 +2251,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 static void r8153_set_rx_early_size(struct r8152 *tp)
 {
 	u32 mtu = tp->netdev->mtu;
-	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 4;
+	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
 
 	ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
 }
@@ -2290,10 +2372,6 @@ static u32 __rtl_get_wol(struct r8152 *tp)
 	u32 ocp_data;
 	u32 wolopts = 0;
 
-	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG5);
-	if (!(ocp_data & LAN_WAKE_EN))
-		return 0;
-
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
 	if (ocp_data & LINK_ON_WAKE_EN)
 		wolopts |= WAKE_PHY;
@@ -2326,15 +2404,13 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts)
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG5);
-	ocp_data &= ~(UWF_EN | BWF_EN | MWF_EN | LAN_WAKE_EN);
+	ocp_data &= ~(UWF_EN | BWF_EN | MWF_EN);
 	if (wolopts & WAKE_UCAST)
 		ocp_data |= UWF_EN;
 	if (wolopts & WAKE_BCAST)
 		ocp_data |= BWF_EN;
 	if (wolopts & WAKE_MCAST)
 		ocp_data |= MWF_EN;
-	if (wolopts & WAKE_ANY)
-		ocp_data |= LAN_WAKE_EN;
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG5, ocp_data);
 
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
@@ -2403,9 +2479,6 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
 	if (enable) {
 		u32 ocp_data;
 
-		r8153_u1u2en(tp, false);
-		r8153_u2p3en(tp, false);
-
 		__rtl_set_wol(tp, WAKE_ANY);
 
 		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
@@ -2416,30 +2489,30 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
 
 		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
 	} else {
+		u32 ocp_data;
+
 		__rtl_set_wol(tp, tp->saved_wolopts);
-		r8153_u2p3en(tp, true);
-		r8153_u1u2en(tp, true);
-	}
-}
 
-static void rtl_phy_reset(struct r8152 *tp)
-{
-	u16 data;
-	int i;
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
 
-	data = r8152_mdio_read(tp, MII_BMCR);
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
+		ocp_data &= ~LINK_OFF_WAKE_EN;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
 
-	/* don't reset again before the previous one complete */
-	if (data & BMCR_RESET)
-		return;
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
+	}
+}
 
-	data |= BMCR_RESET;
-	r8152_mdio_write(tp, MII_BMCR, data);
+static void rtl8153_runtime_enable(struct r8152 *tp, bool enable)
+{
+	rtl_runtime_suspend_enable(tp, enable);
 
-	for (i = 0; i < 50; i++) {
-		msleep(20);
-		if ((r8152_mdio_read(tp, MII_BMCR) & BMCR_RESET) == 0)
-			break;
+	if (enable) {
+		r8153_u1u2en(tp, false);
+		r8153_u2p3en(tp, false);
+	} else {
+		r8153_u2p3en(tp, true);
+		r8153_u1u2en(tp, true);
 	}
 }
 
@@ -2456,6 +2529,17 @@ static void r8153_teredo_off(struct r8152 *tp)
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0);
 }
 
+static void rtl_reset_bmu(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_RESET);
+	ocp_data &= ~(BMU_RESET_EP_IN | BMU_RESET_EP_OUT);
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+	ocp_data |= BMU_RESET_EP_IN | BMU_RESET_EP_OUT;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+}
+
 static void r8152_aldps_en(struct r8152 *tp, bool enable)
 {
 	if (enable) {
@@ -2499,8 +2583,6 @@ static void r8152b_exit_oob(struct r8152 *tp)
 
 	rxdy_gated_en(tp, true);
 	r8153_teredo_off(tp);
-	r8152b_hw_phy_cfg(tp);
-
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, 0x00);
 
@@ -2678,9 +2760,8 @@ static void r8153_first_init(struct r8152 *tp)
 	ocp_data &= ~RCR_ACPT_ALL;
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 
-	r8153_hw_phy_cfg(tp);
-
 	rtl8152_nic_reset(tp);
+	rtl_reset_bmu(tp);
 
 	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
 	ocp_data &= ~NOW_IS_OOB;
@@ -2742,6 +2823,7 @@ static void r8153_enter_oob(struct r8152 *tp)
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
 
 	rtl_disable(tp);
+	rtl_reset_bmu(tp);
 
 	for (i = 0; i < 1000; i++) {
 		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
@@ -2803,6 +2885,7 @@ static void rtl8153_disable(struct r8152 *tp)
 {
 	r8153_aldps_en(tp, false);
 	rtl_disable(tp);
+	rtl_reset_bmu(tp);
 	r8153_aldps_en(tp, true);
 	usb_enable_lpm(tp->udev);
 }
@@ -2812,7 +2895,6 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
 	u16 bmcr, anar, gbcr;
 	int ret = 0;
 
-	cancel_delayed_work_sync(&tp->schedule);
 	anar = r8152_mdio_read(tp, MII_ADVERTISE);
 	anar &= ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
 		  ADVERTISE_100HALF | ADVERTISE_100FULL);
@@ -2872,7 +2954,7 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
 		bmcr = BMCR_ANENABLE | BMCR_ANRESTART;
 	}
 
-	if (test_bit(PHY_RESET, &tp->flags))
+	if (test_and_clear_bit(PHY_RESET, &tp->flags))
 		bmcr |= BMCR_RESET;
 
 	if (tp->mii.supports_gmii)
@@ -2881,7 +2963,7 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
 	r8152_mdio_write(tp, MII_ADVERTISE, anar);
 	r8152_mdio_write(tp, MII_BMCR, bmcr);
 
-	if (test_and_clear_bit(PHY_RESET, &tp->flags)) {
+	if (bmcr & BMCR_RESET) {
 		int i;
 
 		for (i = 0; i < 50; i++) {
@@ -3031,15 +3113,33 @@ static void rtl_work_func_t(struct work_struct *work)
 	    netif_carrier_ok(tp->netdev))
 		napi_schedule(&tp->napi);
 
-	if (test_and_clear_bit(PHY_RESET, &tp->flags))
-		rtl_phy_reset(tp);
-
 	mutex_unlock(&tp->control);
 
 out1:
 	usb_autopm_put_interface(tp->intf);
 }
 
+static void rtl_hw_phy_work_func_t(struct work_struct *work)
+{
+	struct r8152 *tp = container_of(work, struct r8152, hw_phy_work.work);
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return;
+
+	if (usb_autopm_get_interface(tp->intf) < 0)
+		return;
+
+	mutex_lock(&tp->control);
+
+	tp->rtl_ops.hw_phy_cfg(tp);
+
+	rtl8152_set_speed(tp, tp->autoneg, tp->speed, tp->duplex);
+
+	mutex_unlock(&tp->control);
+
+	usb_autopm_put_interface(tp->intf);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int rtl_notifier(struct notifier_block *nb, unsigned long action,
 			void *data)
@@ -3076,8 +3176,6 @@ static int rtl8152_open(struct net_device *netdev)
 	if (res)
 		goto out;
 
-	netif_carrier_off(netdev);
-
 	res = usb_autopm_get_interface(tp->intf);
 	if (res < 0) {
 		free_all_mem(tp);
@@ -3088,9 +3186,6 @@ static int rtl8152_open(struct net_device *netdev)
 
 	tp->rtl_ops.up(tp);
 
-	rtl8152_set_speed(tp, AUTONEG_ENABLE,
-			  tp->mii.supports_gmii ? SPEED_1000 : SPEED_100,
-			  DUPLEX_FULL);
 	netif_carrier_off(netdev);
 	netif_start_queue(netdev);
 	set_bit(WORK_ENABLE, &tp->flags);
@@ -3382,15 +3477,11 @@ static void r8153_init(struct r8152 *tp)
 	r8153_power_cut_en(tp, false);
 	r8153_u1u2en(tp, true);
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ALDPS_SPDWN_RATIO);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, EEE_SPDWN_RATIO);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3,
-		       PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN |
-		       U1U2_SPDWN_EN | L1_SPDWN_EN);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4,
-		       PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN |
-		       TP100_SPDWN_EN | TP500_SPDWN_EN | TP1000_SPDWN_EN |
-		       EEE_SPDWN_EN);
+	/* MAC clock speed down */
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
 
 	r8153_enable_eee(tp);
 	r8153_aldps_en(tp, true);
@@ -3497,7 +3588,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
 		napi_disable(&tp->napi);
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 			rtl_stop_rx(tp);
-			rtl_runtime_suspend_enable(tp, true);
+			tp->rtl_ops.autosuspend_en(tp, true);
 		} else {
 			cancel_delayed_work_sync(&tp->schedule);
 			tp->rtl_ops.down(tp);
@@ -3518,12 +3609,13 @@ static int rtl8152_resume(struct usb_interface *intf)
 
 	if (!test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 		tp->rtl_ops.init(tp);
+		queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
 		netif_device_attach(tp->netdev);
 	}
 
 	if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
-			rtl_runtime_suspend_enable(tp, false);
+			tp->rtl_ops.autosuspend_en(tp, false);
 			clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 			napi_disable(&tp->napi);
 			set_bit(WORK_ENABLE, &tp->flags);
@@ -3532,17 +3624,13 @@ static int rtl8152_resume(struct usb_interface *intf)
 			napi_enable(&tp->napi);
 		} else {
 			tp->rtl_ops.up(tp);
-			rtl8152_set_speed(tp, AUTONEG_ENABLE,
-					  tp->mii.supports_gmii ?
-					  SPEED_1000 : SPEED_100,
-					  DUPLEX_FULL);
 			netif_carrier_off(tp->netdev);
 			set_bit(WORK_ENABLE, &tp->flags);
 		}
 		usb_submit_urb(tp->intr_urb, GFP_KERNEL);
 	} else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 		if (tp->netdev->flags & IFF_UP)
-			rtl_runtime_suspend_enable(tp, false);
+			tp->rtl_ops.autosuspend_en(tp, false);
 		clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 	}
 
@@ -3665,6 +3753,11 @@ static int rtl8152_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	mutex_lock(&tp->control);
 
 	ret = rtl8152_set_speed(tp, cmd->autoneg, cmd->speed, cmd->duplex);
+	if (!ret) {
+		tp->autoneg = cmd->autoneg;
+		tp->speed = cmd->speed;
+		tp->duplex = cmd->duplex;
+	}
 
 	mutex_unlock(&tp->control);
 
@@ -4122,6 +4215,8 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->eee_get		= r8152_get_eee;
 		ops->eee_set		= r8152_set_eee;
 		ops->in_nway		= rtl8152_in_nway;
+		ops->hw_phy_cfg		= r8152b_hw_phy_cfg;
+		ops->autosuspend_en	= rtl_runtime_suspend_enable;
 		break;
 
 	case RTL_VER_03:
@@ -4137,6 +4232,8 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->eee_get		= r8153_get_eee;
 		ops->eee_set		= r8153_set_eee;
 		ops->in_nway		= rtl8153_in_nway;
+		ops->hw_phy_cfg		= r8153_hw_phy_cfg;
+		ops->autosuspend_en	= rtl8153_runtime_enable;
 		break;
 
 	default:
@@ -4183,6 +4280,7 @@ static int rtl8152_probe(struct usb_interface *intf,
 
 	mutex_init(&tp->control);
 	INIT_DELAYED_WORK(&tp->schedule, rtl_work_func_t);
+	INIT_DELAYED_WORK(&tp->hw_phy_work, rtl_hw_phy_work_func_t);
 
 	netdev->netdev_ops = &rtl8152_netdev_ops;
 	netdev->watchdog_timeo = RTL8152_TX_TIMEOUT;
@@ -4222,9 +4320,14 @@ static int rtl8152_probe(struct usb_interface *intf,
 		break;
 	}
 
+	tp->autoneg = AUTONEG_ENABLE;
+	tp->speed = tp->mii.supports_gmii ? SPEED_1000 : SPEED_100;
+	tp->duplex = DUPLEX_FULL;
+
 	intf->needs_remote_wakeup = 1;
 
 	tp->rtl_ops.init(tp);
+	queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
 	set_ethernet_addr(tp);
 
 	usb_set_intfdata(intf, tp);
@@ -4270,6 +4373,7 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 
 		netif_napi_del(&tp->napi);
 		unregister_netdev(tp->netdev);
+		cancel_delayed_work_sync(&tp->hw_phy_work);
 		tp->rtl_ops.unload(tp);
 		free_netdev(tp->netdev);
 	}
@@ -4323,3 +4427,4 @@ module_usb_driver(rtl8152_driver);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index 524a47a28120..4f4f71b2966b 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -428,7 +428,11 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags)
 		dev_err(&intf->dev, "rndis get ethaddr, %d\n", retval);
 		goto halt_fail_and_release;
 	}
-	memcpy(net->dev_addr, bp, ETH_ALEN);
+
+	if (bp[0] & 0x02)
+		eth_hw_addr_random(net);
+	else
+		ether_addr_copy(net->dev_addr, bp);
 
 	/* set a nonzero filter to enable data transfers */
 	memset(u.set, 0, sizeof *u.set);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 61ba46404937..3bfb59209326 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -42,7 +42,6 @@
 #include <linux/mii.h>
 #include <linux/usb.h>
 #include <linux/usb/usbnet.h>
-#include <linux/usb/cdc.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/pm_runtime.h>
@@ -395,8 +394,11 @@ int usbnet_change_mtu (struct net_device *net, int new_mtu)
 	dev->hard_mtu = net->mtu + net->hard_header_len;
 	if (dev->rx_urb_size == old_hard_mtu) {
 		dev->rx_urb_size = dev->hard_mtu;
-		if (dev->rx_urb_size > old_rx_urb_size)
+		if (dev->rx_urb_size > old_rx_urb_size) {
+			usbnet_pause_rx(dev);
 			usbnet_unlink_rx_urbs(dev);
+			usbnet_resume_rx(dev);
+		}
 	}
 
 	/* max qlen depend on hard_mtu and rx_urb_size */
@@ -1508,8 +1510,9 @@ static void usbnet_bh (unsigned long param)
 	} else if (netif_running (dev->net) &&
 		   netif_device_present (dev->net) &&
 		   netif_carrier_ok(dev->net) &&
-		   !timer_pending (&dev->delay) &&
-		   !test_bit (EVENT_RX_HALT, &dev->flags)) {
+		   !timer_pending(&dev->delay) &&
+		   !test_bit(EVENT_RX_PAUSED, &dev->flags) &&
+		   !test_bit(EVENT_RX_HALT, &dev->flags)) {
 		int	temp = dev->rxq.qlen;
 
 		if (temp < RX_QLEN(dev)) {
@@ -1968,143 +1971,6 @@ out:
 	return err;
 }
 
-int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr,
-				struct usb_interface *intf,
-				u8 *buffer,
-				int buflen)
-{
-	/* duplicates are ignored */
-	struct usb_cdc_union_desc *union_header = NULL;
-
-	/* duplicates are not tolerated */
-	struct usb_cdc_header_desc *header = NULL;
-	struct usb_cdc_ether_desc *ether = NULL;
-	struct usb_cdc_mdlm_detail_desc *detail = NULL;
-	struct usb_cdc_mdlm_desc *desc = NULL;
-
-	unsigned int elength;
-	int cnt = 0;
-
-	memset(hdr, 0x00, sizeof(struct usb_cdc_parsed_header));
-	hdr->phonet_magic_present = false;
-	while (buflen > 0) {
-		elength = buffer[0];
-		if (!elength) {
-			dev_err(&intf->dev, "skipping garbage byte\n");
-			elength = 1;
-			goto next_desc;
-		}
-		if (buffer[1] != USB_DT_CS_INTERFACE) {
-			dev_err(&intf->dev, "skipping garbage\n");
-			goto next_desc;
-		}
-
-		switch (buffer[2]) {
-		case USB_CDC_UNION_TYPE: /* we've found it */
-			if (elength < sizeof(struct usb_cdc_union_desc))
-				goto next_desc;
-			if (union_header) {
-				dev_err(&intf->dev, "More than one union descriptor, skipping ...\n");
-				goto next_desc;
-			}
-			union_header = (struct usb_cdc_union_desc *)buffer;
-			break;
-		case USB_CDC_COUNTRY_TYPE:
-			if (elength < sizeof(struct usb_cdc_country_functional_desc))
-				goto next_desc;
-			hdr->usb_cdc_country_functional_desc =
-				(struct usb_cdc_country_functional_desc *)buffer;
-			break;
-		case USB_CDC_HEADER_TYPE:
-			if (elength != sizeof(struct usb_cdc_header_desc))
-				goto next_desc;
-			if (header)
-				return -EINVAL;
-			header = (struct usb_cdc_header_desc *)buffer;
-			break;
-		case USB_CDC_ACM_TYPE:
-			if (elength < sizeof(struct usb_cdc_acm_descriptor))
-				goto next_desc;
-			hdr->usb_cdc_acm_descriptor =
-				(struct usb_cdc_acm_descriptor *)buffer;
-			break;
-		case USB_CDC_ETHERNET_TYPE:
-			if (elength != sizeof(struct usb_cdc_ether_desc))
-				goto next_desc;
-			if (ether)
-				return -EINVAL;
-			ether = (struct usb_cdc_ether_desc *)buffer;
-			break;
-		case USB_CDC_CALL_MANAGEMENT_TYPE:
-			if (elength < sizeof(struct usb_cdc_call_mgmt_descriptor))
-				goto next_desc;
-			hdr->usb_cdc_call_mgmt_descriptor =
-				(struct usb_cdc_call_mgmt_descriptor *)buffer;
-			break;
-		case USB_CDC_DMM_TYPE:
-			if (elength < sizeof(struct usb_cdc_dmm_desc))
-				goto next_desc;
-			hdr->usb_cdc_dmm_desc =
-				(struct usb_cdc_dmm_desc *)buffer;
-			break;
-		case USB_CDC_MDLM_TYPE:
-			if (elength < sizeof(struct usb_cdc_mdlm_desc *))
-				goto next_desc;
-			if (desc)
-				return -EINVAL;
-			desc = (struct usb_cdc_mdlm_desc *)buffer;
-			break;
-		case USB_CDC_MDLM_DETAIL_TYPE:
-			if (elength < sizeof(struct usb_cdc_mdlm_detail_desc *))
-				goto next_desc;
-			if (detail)
-				return -EINVAL;
-			detail = (struct usb_cdc_mdlm_detail_desc *)buffer;
-			break;
-		case USB_CDC_NCM_TYPE:
-			if (elength < sizeof(struct usb_cdc_ncm_desc))
-				goto next_desc;
-			hdr->usb_cdc_ncm_desc = (struct usb_cdc_ncm_desc *)buffer;
-			break;
-		case USB_CDC_MBIM_TYPE:
-			if (elength < sizeof(struct usb_cdc_mbim_desc))
-				goto next_desc;
-
-			hdr->usb_cdc_mbim_desc = (struct usb_cdc_mbim_desc *)buffer;
-			break;
-		case USB_CDC_MBIM_EXTENDED_TYPE:
-			if (elength < sizeof(struct usb_cdc_mbim_extended_desc))
-				break;
-			hdr->usb_cdc_mbim_extended_desc =
-				(struct usb_cdc_mbim_extended_desc *)buffer;
-			break;
-		case CDC_PHONET_MAGIC_NUMBER:
-			hdr->phonet_magic_present = true;
-			break;
-		default:
-			/*
-			 * there are LOTS more CDC descriptors that
-			 * could legitimately be found here.
-			 */
-			dev_dbg(&intf->dev, "Ignoring descriptor: type %02x, length %ud\n",
-					buffer[2], elength);
-			goto next_desc;
-		}
-		cnt++;
-next_desc:
-		buflen -= elength;
-		buffer += elength;
-	}
-	hdr->usb_cdc_union_desc = union_header;
-	hdr->usb_cdc_header_desc = header;
-	hdr->usb_cdc_mdlm_detail_desc = detail;
-	hdr->usb_cdc_mdlm_desc = desc;
-	hdr->usb_cdc_ether_desc = ether;
-	return cnt;
-}
-
-EXPORT_SYMBOL(cdc_parse_cdc_header);
-
 /*
  * The function can't be called inside suspend/resume callback,
  * otherwise deadlock will be caused.
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e0638e556fe7..1b5f531eeb25 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -144,8 +144,10 @@ struct virtnet_info {
 	/* Control VQ buffers: protected by the rtnl lock */
 	struct virtio_net_ctrl_hdr ctrl_hdr;
 	virtio_net_ctrl_ack ctrl_status;
+	struct virtio_net_ctrl_mq ctrl_mq;
 	u8 ctrl_promisc;
 	u8 ctrl_allmulti;
+	u16 ctrl_vid;
 
 	/* Ethtool settings */
 	u8 duplex;
@@ -479,53 +481,21 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 	stats->rx_packets++;
 	u64_stats_update_end(&stats->rx_syncp);
 
-	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-		pr_debug("Needs csum!\n");
-		if (!skb_partial_csum_set(skb,
-			  virtio16_to_cpu(vi->vdev, hdr->hdr.csum_start),
-			  virtio16_to_cpu(vi->vdev, hdr->hdr.csum_offset)))
-			goto frame_err;
-	} else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) {
+	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
+				  virtio_is_little_endian(vi->vdev))) {
+		net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
+				     dev->name, hdr->hdr.gso_type,
+				     hdr->hdr.gso_size);
+		goto frame_err;
 	}
 
 	skb->protocol = eth_type_trans(skb, dev);
 	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
 
-	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		pr_debug("GSO!\n");
-		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
-		case VIRTIO_NET_HDR_GSO_TCPV4:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-			break;
-		case VIRTIO_NET_HDR_GSO_UDP:
-			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-			break;
-		case VIRTIO_NET_HDR_GSO_TCPV6:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-			break;
-		default:
-			net_warn_ratelimited("%s: bad gso type %u.\n",
-					     dev->name, hdr->hdr.gso_type);
-			goto frame_err;
-		}
-
-		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
-
-		skb_shinfo(skb)->gso_size = virtio16_to_cpu(vi->vdev,
-							    hdr->hdr.gso_size);
-		if (skb_shinfo(skb)->gso_size == 0) {
-			net_warn_ratelimited("%s: zero gso size.\n", dev->name);
-			goto frame_err;
-		}
-
-		/* Header must be checked, and gso_segs computed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
-	}
-
 	napi_gro_receive(&rq->napi, skb);
 	return;
 
@@ -868,35 +838,9 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 	else
 		hdr = skb_vnet_hdr(skb);
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		hdr->hdr.csum_start = cpu_to_virtio16(vi->vdev,
-						skb_checksum_start_offset(skb));
-		hdr->hdr.csum_offset = cpu_to_virtio16(vi->vdev,
-							 skb->csum_offset);
-	} else {
-		hdr->hdr.flags = 0;
-		hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
-	}
-
-	if (skb_is_gso(skb)) {
-		hdr->hdr.hdr_len = cpu_to_virtio16(vi->vdev, skb_headlen(skb));
-		hdr->hdr.gso_size = cpu_to_virtio16(vi->vdev,
-						    skb_shinfo(skb)->gso_size);
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
-		else
-			BUG();
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
-			hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
-	} else {
-		hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
-		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
-	}
+	if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
+				    virtio_is_little_endian(vi->vdev)))
+		BUG();
 
 	if (vi->mergeable_rx_bufs)
 		hdr->num_buffers = 0;
@@ -1116,14 +1060,13 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi)
 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
 {
 	struct scatterlist sg;
-	struct virtio_net_ctrl_mq s;
 	struct net_device *dev = vi->dev;
 
 	if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
 		return 0;
 
-	s.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
-	sg_init_one(&sg, &s, sizeof(s));
+	vi->ctrl_mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
+	sg_init_one(&sg, &vi->ctrl_mq, sizeof(vi->ctrl_mq));
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
 				  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
@@ -1230,7 +1173,8 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev,
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg;
 
-	sg_init_one(&sg, &vid, sizeof(vid));
+	vi->ctrl_vid = vid;
+	sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
 				  VIRTIO_NET_CTRL_VLAN_ADD, &sg))
@@ -1244,7 +1188,8 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg;
 
-	sg_init_one(&sg, &vid, sizeof(vid));
+	vi->ctrl_vid = vid;
+	sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
 				  VIRTIO_NET_CTRL_VLAN_DEL, &sg))
@@ -1780,6 +1725,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	struct net_device *dev;
 	struct virtnet_info *vi;
 	u16 max_queue_pairs;
+	int mtu;
 
 	if (!vdev->config->get) {
 		dev_err(&vdev->dev, "%s failure: config access disabled\n",
@@ -1896,6 +1842,14 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
 		vi->has_cvq = true;
 
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
+		mtu = virtio_cread16(vdev,
+				     offsetof(struct virtio_net_config,
+					      mtu));
+		if (virtnet_change_mtu(dev, mtu))
+			__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
+	}
+
 	if (vi->any_header_sg)
 		dev->needed_headroom = vi->hdr_len;
 
@@ -2067,6 +2021,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
 	VIRTIO_NET_F_CTRL_MAC_ADDR,
 	VIRTIO_F_ANY_LAYOUT,
+	VIRTIO_NET_F_MTU,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile
index 880f5098eac9..8cdbb63d1bb0 100644
--- a/drivers/net/vmxnet3/Makefile
+++ b/drivers/net/vmxnet3/Makefile
@@ -2,7 +2,7 @@
 #
 # Linux driver for VMware's vmxnet3 ethernet NIC.
 #
-# Copyright (C) 2007-2009, VMware, Inc. All Rights Reserved.
+# Copyright (C) 2007-2016, VMware, Inc. All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
@@ -21,7 +21,7 @@
 # The full GNU General Public License is included in this distribution in
 # the file called "COPYING".
 #
-# Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
+# Maintained by: pv-drivers@vmware.com
 #
 #
 ################################################################################
diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h
index 969c751ee404..db9f1fde3aac 100644
--- a/drivers/net/vmxnet3/upt1_defs.h
+++ b/drivers/net/vmxnet3/upt1_defs.h
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -20,7 +20,7 @@
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
- * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
+ * Maintained by: pv-drivers@vmware.com
  *
  */
 
diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h
index 72ba8ae7f09a..c3a31646189f 100644
--- a/drivers/net/vmxnet3/vmxnet3_defs.h
+++ b/drivers/net/vmxnet3/vmxnet3_defs.h
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2015, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -20,7 +20,7 @@
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
- * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
+ * Maintained by: pv-drivers@vmware.com
  *
  */
 
@@ -76,7 +76,12 @@ enum {
 	VMXNET3_CMD_UPDATE_IML,
 	VMXNET3_CMD_UPDATE_PMCFG,
 	VMXNET3_CMD_UPDATE_FEATURE,
+	VMXNET3_CMD_RESERVED1,
 	VMXNET3_CMD_LOAD_PLUGIN,
+	VMXNET3_CMD_RESERVED2,
+	VMXNET3_CMD_RESERVED3,
+	VMXNET3_CMD_SET_COALESCE,
+	VMXNET3_CMD_REGISTER_MEMREGS,
 
 	VMXNET3_CMD_FIRST_GET = 0xF00D0000,
 	VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET,
@@ -87,7 +92,10 @@ enum {
 	VMXNET3_CMD_GET_DID_LO,
 	VMXNET3_CMD_GET_DID_HI,
 	VMXNET3_CMD_GET_DEV_EXTRA_INFO,
-	VMXNET3_CMD_GET_CONF_INTR
+	VMXNET3_CMD_GET_CONF_INTR,
+	VMXNET3_CMD_GET_RESERVED1,
+	VMXNET3_CMD_GET_TXDATA_DESC_SIZE,
+	VMXNET3_CMD_GET_COALESCE,
 };
 
 /*
@@ -169,6 +177,8 @@ struct Vmxnet3_TxDataDesc {
 	u8		data[VMXNET3_HDR_COPY_SIZE];
 };
 
+typedef u8 Vmxnet3_RxDataDesc;
+
 #define VMXNET3_TCD_GEN_SHIFT	31
 #define VMXNET3_TCD_GEN_SIZE	1
 #define VMXNET3_TCD_TXIDX_SHIFT	0
@@ -373,6 +383,14 @@ union Vmxnet3_GenericDesc {
 #define VMXNET3_RING_SIZE_ALIGN 32
 #define VMXNET3_RING_SIZE_MASK  (VMXNET3_RING_SIZE_ALIGN - 1)
 
+/* Tx Data Ring buffer size must be a multiple of 64 */
+#define VMXNET3_TXDATA_DESC_SIZE_ALIGN 64
+#define VMXNET3_TXDATA_DESC_SIZE_MASK  (VMXNET3_TXDATA_DESC_SIZE_ALIGN - 1)
+
+/* Rx Data Ring buffer size must be a multiple of 64 */
+#define VMXNET3_RXDATA_DESC_SIZE_ALIGN 64
+#define VMXNET3_RXDATA_DESC_SIZE_MASK  (VMXNET3_RXDATA_DESC_SIZE_ALIGN - 1)
+
 /* Max ring size */
 #define VMXNET3_TX_RING_MAX_SIZE   4096
 #define VMXNET3_TC_RING_MAX_SIZE   4096
@@ -380,6 +398,11 @@ union Vmxnet3_GenericDesc {
 #define VMXNET3_RX_RING2_MAX_SIZE  4096
 #define VMXNET3_RC_RING_MAX_SIZE   8192
 
+#define VMXNET3_TXDATA_DESC_MIN_SIZE 128
+#define VMXNET3_TXDATA_DESC_MAX_SIZE 2048
+
+#define VMXNET3_RXDATA_DESC_MAX_SIZE 2048
+
 /* a list of reasons for queue stop */
 
 enum {
@@ -466,7 +489,9 @@ struct Vmxnet3_TxQueueConf {
 	__le32		compRingSize; /* # of comp desc */
 	__le32		ddLen;        /* size of driver data */
 	u8		intrIdx;
-	u8		_pad[7];
+	u8		_pad1[1];
+	__le16		txDataRingDescSize;
+	u8		_pad2[4];
 };
 
 
@@ -474,12 +499,14 @@ struct Vmxnet3_RxQueueConf {
 	__le64		rxRingBasePA[2];
 	__le64		compRingBasePA;
 	__le64		ddPA;            /* driver data */
-	__le64		reserved;
+	__le64		rxDataRingBasePA;
 	__le32		rxRingSize[2];   /* # of rx desc */
 	__le32		compRingSize;    /* # of rx comp desc */
 	__le32		ddLen;           /* size of driver data */
 	u8		intrIdx;
-	u8		_pad[7];
+	u8		_pad1[1];
+	__le16		rxDataRingDescSize;  /* size of rx data ring buffer */
+	u8		_pad2[4];
 };
 
 
@@ -609,6 +636,63 @@ struct Vmxnet3_RxQueueDesc {
 	u8				      __pad[88]; /* 128 aligned */
 };
 
+struct Vmxnet3_SetPolling {
+	u8					enablePolling;
+};
+
+#define VMXNET3_COAL_STATIC_MAX_DEPTH		128
+#define VMXNET3_COAL_RBC_MIN_RATE		100
+#define VMXNET3_COAL_RBC_MAX_RATE		100000
+
+enum Vmxnet3_CoalesceMode {
+	VMXNET3_COALESCE_DISABLED   = 0,
+	VMXNET3_COALESCE_ADAPT      = 1,
+	VMXNET3_COALESCE_STATIC     = 2,
+	VMXNET3_COALESCE_RBC        = 3
+};
+
+struct Vmxnet3_CoalesceRbc {
+	u32					rbc_rate;
+};
+
+struct Vmxnet3_CoalesceStatic {
+	u32					tx_depth;
+	u32					tx_comp_depth;
+	u32					rx_depth;
+};
+
+struct Vmxnet3_CoalesceScheme {
+	enum Vmxnet3_CoalesceMode		coalMode;
+	union {
+		struct Vmxnet3_CoalesceRbc	coalRbc;
+		struct Vmxnet3_CoalesceStatic	coalStatic;
+	} coalPara;
+};
+
+struct Vmxnet3_MemoryRegion {
+	__le64					startPA;
+	__le32					length;
+	__le16					txQueueBits;
+	__le16					rxQueueBits;
+};
+
+#define MAX_MEMORY_REGION_PER_QUEUE 16
+#define MAX_MEMORY_REGION_PER_DEVICE 256
+
+struct Vmxnet3_MemRegs {
+	__le16					numRegs;
+	__le16					pad[3];
+	struct Vmxnet3_MemoryRegion		memRegs[1];
+};
+
+/* If the command data <= 16 bytes, use the shared memory directly.
+ * otherwise, use variable length configuration descriptor.
+ */
+union Vmxnet3_CmdInfo {
+	struct Vmxnet3_VariableLenConfDesc	varConf;
+	struct Vmxnet3_SetPolling		setPolling;
+	__le64					data[2];
+};
 
 struct Vmxnet3_DSDevRead {
 	/* read-only region for device, read by dev in response to a SET cmd */
@@ -627,7 +711,14 @@ struct Vmxnet3_DriverShared {
 	__le32				pad;
 	struct Vmxnet3_DSDevRead	devRead;
 	__le32				ecr;
-	__le32				reserved[5];
+	__le32				reserved;
+	union {
+		__le32			reserved1[4];
+		union Vmxnet3_CmdInfo	cmdInfo; /* only valid in the context of
+						  * executing the relevant
+						  * command
+						  */
+	} cu;
 };
 
 
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 08885bc8d6db..c68fe495d3f9 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -20,7 +20,7 @@
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
- * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
+ * Maintained by: pv-drivers@vmware.com
  *
  */
 
@@ -435,8 +435,8 @@ vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
 		tq->tx_ring.base = NULL;
 	}
 	if (tq->data_ring.base) {
-		dma_free_coherent(&adapter->pdev->dev, tq->data_ring.size *
-				  sizeof(struct Vmxnet3_TxDataDesc),
+		dma_free_coherent(&adapter->pdev->dev,
+				  tq->data_ring.size * tq->txdata_desc_size,
 				  tq->data_ring.base, tq->data_ring.basePA);
 		tq->data_ring.base = NULL;
 	}
@@ -478,8 +478,8 @@ vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
 	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
 	tq->tx_ring.gen = VMXNET3_INIT_GEN;
 
-	memset(tq->data_ring.base, 0, tq->data_ring.size *
-	       sizeof(struct Vmxnet3_TxDataDesc));
+	memset(tq->data_ring.base, 0,
+	       tq->data_ring.size * tq->txdata_desc_size);
 
 	/* reset the tx comp ring contents to 0 and reset comp ring states */
 	memset(tq->comp_ring.base, 0, tq->comp_ring.size *
@@ -514,10 +514,10 @@ vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
 	}
 
 	tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
-			tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc),
+			tq->data_ring.size * tq->txdata_desc_size,
 			&tq->data_ring.basePA, GFP_KERNEL);
 	if (!tq->data_ring.base) {
-		netdev_err(adapter->netdev, "failed to allocate data ring\n");
+		netdev_err(adapter->netdev, "failed to allocate tx data ring\n");
 		goto err;
 	}
 
@@ -689,7 +689,7 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 	if (ctx->copy_size) {
 		ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
 					tq->tx_ring.next2fill *
-					sizeof(struct Vmxnet3_TxDataDesc));
+					tq->txdata_desc_size);
 		ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
 		ctx->sop_txd->dword[3] = 0;
 
@@ -873,8 +873,9 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 			ctx->eth_ip_hdr_size = 0;
 			ctx->l4_hdr_size = 0;
 			/* copy as much as allowed */
-			ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
-					     , skb_headlen(skb));
+			ctx->copy_size = min_t(unsigned int,
+					       tq->txdata_desc_size,
+					       skb_headlen(skb));
 		}
 
 		if (skb->len <= VMXNET3_HDR_COPY_SIZE)
@@ -885,7 +886,7 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 			goto err;
 	}
 
-	if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
+	if (unlikely(ctx->copy_size > tq->txdata_desc_size)) {
 		tq->stats.oversized_hdr++;
 		ctx->copy_size = 0;
 		return 0;
@@ -1283,9 +1284,10 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 			 */
 			break;
 		}
-		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
+		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
+		       rcd->rqID != rq->dataRingQid);
 		idx = rcd->rxdIdx;
-		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
+		ring_idx = VMXNET3_GET_RING_IDX(adapter, rcd->rqID);
 		ring = rq->rx_ring + ring_idx;
 		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
 				  &rxCmdDesc);
@@ -1300,8 +1302,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 		}
 
 		if (rcd->sop) { /* first buf of the pkt */
+			bool rxDataRingUsed;
+			u16 len;
+
 			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
-			       rcd->rqID != rq->qid);
+			       (rcd->rqID != rq->qid &&
+				rcd->rqID != rq->dataRingQid));
 
 			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
 			BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
@@ -1317,8 +1323,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 
 			skip_page_frags = false;
 			ctx->skb = rbi->skb;
+
+			rxDataRingUsed =
+				VMXNET3_RX_DATA_RING(adapter, rcd->rqID);
+			len = rxDataRingUsed ? rcd->len : rbi->len;
 			new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
-							    rbi->len);
+							    len);
 			if (new_skb == NULL) {
 				/* Skb allocation failed, do not handover this
 				 * skb to stack. Reuse it. Drop the existing pkt
@@ -1329,25 +1339,48 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 				skip_page_frags = true;
 				goto rcd_done;
 			}
-			new_dma_addr = dma_map_single(&adapter->pdev->dev,
-						      new_skb->data, rbi->len,
-						      PCI_DMA_FROMDEVICE);
-			if (dma_mapping_error(&adapter->pdev->dev,
-					      new_dma_addr)) {
-				dev_kfree_skb(new_skb);
-				/* Skb allocation failed, do not handover this
-				 * skb to stack. Reuse it. Drop the existing pkt
-				 */
-				rq->stats.rx_buf_alloc_failure++;
-				ctx->skb = NULL;
-				rq->stats.drop_total++;
-				skip_page_frags = true;
-				goto rcd_done;
-			}
 
-			dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
-					 rbi->len,
-					 PCI_DMA_FROMDEVICE);
+			if (rxDataRingUsed) {
+				size_t sz;
+
+				BUG_ON(rcd->len > rq->data_ring.desc_size);
+
+				ctx->skb = new_skb;
+				sz = rcd->rxdIdx * rq->data_ring.desc_size;
+				memcpy(new_skb->data,
+				       &rq->data_ring.base[sz], rcd->len);
+			} else {
+				ctx->skb = rbi->skb;
+
+				new_dma_addr =
+					dma_map_single(&adapter->pdev->dev,
+						       new_skb->data, rbi->len,
+						       PCI_DMA_FROMDEVICE);
+				if (dma_mapping_error(&adapter->pdev->dev,
+						      new_dma_addr)) {
+					dev_kfree_skb(new_skb);
+					/* Skb allocation failed, do not
+					 * handover this skb to stack. Reuse
+					 * it. Drop the existing pkt.
+					 */
+					rq->stats.rx_buf_alloc_failure++;
+					ctx->skb = NULL;
+					rq->stats.drop_total++;
+					skip_page_frags = true;
+					goto rcd_done;
+				}
+
+				dma_unmap_single(&adapter->pdev->dev,
+						 rbi->dma_addr,
+						 rbi->len,
+						 PCI_DMA_FROMDEVICE);
+
+				/* Immediate refill */
+				rbi->skb = new_skb;
+				rbi->dma_addr = new_dma_addr;
+				rxd->addr = cpu_to_le64(rbi->dma_addr);
+				rxd->len = rbi->len;
+			}
 
 #ifdef VMXNET3_RSS
 			if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
@@ -1358,12 +1391,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 #endif
 			skb_put(ctx->skb, rcd->len);
 
-			/* Immediate refill */
-			rbi->skb = new_skb;
-			rbi->dma_addr = new_dma_addr;
-			rxd->addr = cpu_to_le64(rbi->dma_addr);
-			rxd->len = rbi->len;
-			if (adapter->version == 2 &&
+			if (VMXNET3_VERSION_GE_2(adapter) &&
 			    rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
 				struct Vmxnet3_RxCompDescExt *rcdlro;
 				rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd;
@@ -1589,6 +1617,13 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
 		rq->buf_info[i] = NULL;
 	}
 
+	if (rq->data_ring.base) {
+		dma_free_coherent(&adapter->pdev->dev,
+				  rq->rx_ring[0].size * rq->data_ring.desc_size,
+				  rq->data_ring.base, rq->data_ring.basePA);
+		rq->data_ring.base = NULL;
+	}
+
 	if (rq->comp_ring.base) {
 		dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
 				  * sizeof(struct Vmxnet3_RxCompDesc),
@@ -1604,6 +1639,25 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
 	}
 }
 
+void
+vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
+
+		if (rq->data_ring.base) {
+			dma_free_coherent(&adapter->pdev->dev,
+					  (rq->rx_ring[0].size *
+					  rq->data_ring.desc_size),
+					  rq->data_ring.base,
+					  rq->data_ring.basePA);
+			rq->data_ring.base = NULL;
+			rq->data_ring.desc_size = 0;
+		}
+	}
+}
 
 static int
 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
@@ -1697,6 +1751,22 @@ vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
 		}
 	}
 
+	if ((adapter->rxdataring_enabled) && (rq->data_ring.desc_size != 0)) {
+		sz = rq->rx_ring[0].size * rq->data_ring.desc_size;
+		rq->data_ring.base =
+			dma_alloc_coherent(&adapter->pdev->dev, sz,
+					   &rq->data_ring.basePA,
+					   GFP_KERNEL);
+		if (!rq->data_ring.base) {
+			netdev_err(adapter->netdev,
+				   "rx data ring will be disabled\n");
+			adapter->rxdataring_enabled = false;
+		}
+	} else {
+		rq->data_ring.base = NULL;
+		rq->data_ring.desc_size = 0;
+	}
+
 	sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
 	rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
 						&rq->comp_ring.basePA,
@@ -1729,6 +1799,8 @@ vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
 {
 	int i, err = 0;
 
+	adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter);
+
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
 		if (unlikely(err)) {
@@ -1738,6 +1810,10 @@ vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
 			goto err_out;
 		}
 	}
+
+	if (!adapter->rxdataring_enabled)
+		vmxnet3_rq_destroy_all_rxdataring(adapter);
+
 	return err;
 err_out:
 	vmxnet3_rq_destroy_all(adapter);
@@ -2045,10 +2121,9 @@ vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
 			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
 			rq->qid = i;
 			rq->qid2 = i + adapter->num_rx_queues;
+			rq->dataRingQid = i + 2 * adapter->num_rx_queues;
 		}
 
-
-
 		/* init our intr settings */
 		for (i = 0; i < intr->num_intrs; i++)
 			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
@@ -2336,6 +2411,7 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 		tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
 		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
 		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
+		tqc->txDataRingDescSize = cpu_to_le32(tq->txdata_desc_size);
 		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
 		tqc->ddLen          = cpu_to_le32(
 					sizeof(struct vmxnet3_tx_buf_info) *
@@ -2360,6 +2436,12 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 					(rqc->rxRingSize[0] +
 					 rqc->rxRingSize[1]));
 		rqc->intrIdx         = rq->comp_ring.intr_idx;
+		if (VMXNET3_VERSION_GE_3(adapter)) {
+			rqc->rxDataRingBasePA =
+				cpu_to_le64(rq->data_ring.basePA);
+			rqc->rxDataRingDescSize =
+				cpu_to_le16(rq->data_ring.desc_size);
+		}
 	}
 
 #ifdef VMXNET3_RSS
@@ -2409,6 +2491,32 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 	/* the rest are already zeroed */
 }
 
+static void
+vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter)
+{
+	struct Vmxnet3_DriverShared *shared = adapter->shared;
+	union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
+	unsigned long flags;
+
+	if (!VMXNET3_VERSION_GE_3(adapter))
+		return;
+
+	spin_lock_irqsave(&adapter->cmd_lock, flags);
+	cmdInfo->varConf.confVer = 1;
+	cmdInfo->varConf.confLen =
+		cpu_to_le32(sizeof(*adapter->coal_conf));
+	cmdInfo->varConf.confPA  = cpu_to_le64(adapter->coal_conf_pa);
+
+	if (adapter->default_coal_mode) {
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_GET_COALESCE);
+	} else {
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_SET_COALESCE);
+	}
+
+	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+}
 
 int
 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
@@ -2458,6 +2566,8 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
 		goto activate_err;
 	}
 
+	vmxnet3_init_coalesce(adapter);
+
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		VMXNET3_WRITE_BAR0_REG(adapter,
 				VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
@@ -2689,7 +2799,8 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
 
 int
 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
-		      u32 rx_ring_size, u32 rx_ring2_size)
+		      u32 rx_ring_size, u32 rx_ring2_size,
+		      u16 txdata_desc_size, u16 rxdata_desc_size)
 {
 	int err = 0, i;
 
@@ -2698,6 +2809,7 @@ vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
 		tq->tx_ring.size   = tx_ring_size;
 		tq->data_ring.size = tx_ring_size;
 		tq->comp_ring.size = tx_ring_size;
+		tq->txdata_desc_size = txdata_desc_size;
 		tq->shared = &adapter->tqd_start[i].ctrl;
 		tq->stopped = true;
 		tq->adapter = adapter;
@@ -2714,12 +2826,15 @@ vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
 	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
 	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
 	vmxnet3_adjust_rx_ring_size(adapter);
+
+	adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter);
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
 		/* qid and qid2 for rx queues will be assigned later when num
 		 * of rx queues is finalized after allocating intrs */
 		rq->shared = &adapter->rqd_start[i].ctrl;
 		rq->adapter = adapter;
+		rq->data_ring.desc_size = rxdata_desc_size;
 		err = vmxnet3_rq_create(rq, adapter);
 		if (err) {
 			if (i == 0) {
@@ -2737,6 +2852,10 @@ vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
 			}
 		}
 	}
+
+	if (!adapter->rxdataring_enabled)
+		vmxnet3_rq_destroy_all_rxdataring(adapter);
+
 	return err;
 queue_err:
 	vmxnet3_tq_destroy_all(adapter);
@@ -2754,9 +2873,35 @@ vmxnet3_open(struct net_device *netdev)
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		spin_lock_init(&adapter->tx_queue[i].tx_lock);
 
-	err = vmxnet3_create_queues(adapter, adapter->tx_ring_size,
+	if (VMXNET3_VERSION_GE_3(adapter)) {
+		unsigned long flags;
+		u16 txdata_desc_size;
+
+		spin_lock_irqsave(&adapter->cmd_lock, flags);
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_GET_TXDATA_DESC_SIZE);
+		txdata_desc_size = VMXNET3_READ_BAR1_REG(adapter,
+							 VMXNET3_REG_CMD);
+		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+
+		if ((txdata_desc_size < VMXNET3_TXDATA_DESC_MIN_SIZE) ||
+		    (txdata_desc_size > VMXNET3_TXDATA_DESC_MAX_SIZE) ||
+		    (txdata_desc_size & VMXNET3_TXDATA_DESC_SIZE_MASK)) {
+			adapter->txdata_desc_size =
+				sizeof(struct Vmxnet3_TxDataDesc);
+		} else {
+			adapter->txdata_desc_size = txdata_desc_size;
+		}
+	} else {
+		adapter->txdata_desc_size = sizeof(struct Vmxnet3_TxDataDesc);
+	}
+
+	err = vmxnet3_create_queues(adapter,
+				    adapter->tx_ring_size,
 				    adapter->rx_ring_size,
-				    adapter->rx_ring2_size);
+				    adapter->rx_ring2_size,
+				    adapter->txdata_desc_size,
+				    adapter->rxdata_desc_size);
 	if (err)
 		goto queue_err;
 
@@ -3200,12 +3345,21 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 		goto err_alloc_pci;
 
 	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
-	if (ver & 2) {
-		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 2);
-		adapter->version = 2;
-	} else if (ver & 1) {
-		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
-		adapter->version = 1;
+	if (ver & (1 << VMXNET3_REV_3)) {
+		VMXNET3_WRITE_BAR1_REG(adapter,
+				       VMXNET3_REG_VRRS,
+				       1 << VMXNET3_REV_3);
+		adapter->version = VMXNET3_REV_3 + 1;
+	} else if (ver & (1 << VMXNET3_REV_2)) {
+		VMXNET3_WRITE_BAR1_REG(adapter,
+				       VMXNET3_REG_VRRS,
+				       1 << VMXNET3_REV_2);
+		adapter->version = VMXNET3_REV_2 + 1;
+	} else if (ver & (1 << VMXNET3_REV_1)) {
+		VMXNET3_WRITE_BAR1_REG(adapter,
+				       VMXNET3_REG_VRRS,
+				       1 << VMXNET3_REV_1);
+		adapter->version = VMXNET3_REV_1 + 1;
 	} else {
 		dev_err(&pdev->dev,
 			"Incompatible h/w version (0x%x) for adapter\n", ver);
@@ -3224,9 +3378,28 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 		goto err_ver;
 	}
 
+	if (VMXNET3_VERSION_GE_3(adapter)) {
+		adapter->coal_conf =
+			dma_alloc_coherent(&adapter->pdev->dev,
+					   sizeof(struct Vmxnet3_CoalesceScheme)
+					   ,
+					   &adapter->coal_conf_pa,
+					   GFP_KERNEL);
+		if (!adapter->coal_conf) {
+			err = -ENOMEM;
+			goto err_ver;
+		}
+		memset(adapter->coal_conf, 0, sizeof(*adapter->coal_conf));
+		adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED;
+		adapter->default_coal_mode = true;
+	}
+
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 	vmxnet3_declare_features(adapter, dma64);
 
+	adapter->rxdata_desc_size = VMXNET3_VERSION_GE_3(adapter) ?
+		VMXNET3_DEF_RXDATA_DESC_SIZE : 0;
+
 	if (adapter->num_tx_queues == adapter->num_rx_queues)
 		adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
 	else
@@ -3283,6 +3456,11 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 	return 0;
 
 err_register:
+	if (VMXNET3_VERSION_GE_3(adapter)) {
+		dma_free_coherent(&adapter->pdev->dev,
+				  sizeof(struct Vmxnet3_CoalesceScheme),
+				  adapter->coal_conf, adapter->coal_conf_pa);
+	}
 	vmxnet3_free_intr_resources(adapter);
 err_ver:
 	vmxnet3_free_pci_resources(adapter);
@@ -3333,6 +3511,11 @@ vmxnet3_remove_device(struct pci_dev *pdev)
 
 	vmxnet3_free_intr_resources(adapter);
 	vmxnet3_free_pci_resources(adapter);
+	if (VMXNET3_VERSION_GE_3(adapter)) {
+		dma_free_coherent(&adapter->pdev->dev,
+				  sizeof(struct Vmxnet3_CoalesceScheme),
+				  adapter->coal_conf, adapter->coal_conf_pa);
+	}
 #ifdef VMXNET3_RSS
 	dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
 			  adapter->rss_conf, adapter->rss_conf_pa);
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 9ba11d737753..aabc6ef366b4 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -20,7 +20,7 @@
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
- * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
+ * Maintained by: pv-drivers@vmware.com
  *
  */
 
@@ -396,8 +396,7 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
 		buf[j++] = VMXNET3_GET_ADDR_LO(tq->data_ring.basePA);
 		buf[j++] = VMXNET3_GET_ADDR_HI(tq->data_ring.basePA);
 		buf[j++] = tq->data_ring.size;
-		/* transmit data ring buffer size */
-		buf[j++] = VMXNET3_HDR_COPY_SIZE;
+		buf[j++] = tq->txdata_desc_size;
 
 		buf[j++] = VMXNET3_GET_ADDR_LO(tq->comp_ring.basePA);
 		buf[j++] = VMXNET3_GET_ADDR_HI(tq->comp_ring.basePA);
@@ -431,11 +430,10 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
 		buf[j++] = rq->rx_ring[1].next2comp;
 		buf[j++] = rq->rx_ring[1].gen;
 
-		/* receive data ring */
-		buf[j++] = 0;
-		buf[j++] = 0;
-		buf[j++] = 0;
-		buf[j++] = 0;
+		buf[j++] = VMXNET3_GET_ADDR_LO(rq->data_ring.basePA);
+		buf[j++] = VMXNET3_GET_ADDR_HI(rq->data_ring.basePA);
+		buf[j++] = rq->rx_ring[0].size;
+		buf[j++] = rq->data_ring.desc_size;
 
 		buf[j++] = VMXNET3_GET_ADDR_LO(rq->comp_ring.basePA);
 		buf[j++] = VMXNET3_GET_ADDR_HI(rq->comp_ring.basePA);
@@ -504,12 +502,14 @@ vmxnet3_get_ringparam(struct net_device *netdev,
 
 	param->rx_max_pending = VMXNET3_RX_RING_MAX_SIZE;
 	param->tx_max_pending = VMXNET3_TX_RING_MAX_SIZE;
-	param->rx_mini_max_pending = 0;
+	param->rx_mini_max_pending = VMXNET3_VERSION_GE_3(adapter) ?
+		VMXNET3_RXDATA_DESC_MAX_SIZE : 0;
 	param->rx_jumbo_max_pending = VMXNET3_RX_RING2_MAX_SIZE;
 
 	param->rx_pending = adapter->rx_ring_size;
 	param->tx_pending = adapter->tx_ring_size;
-	param->rx_mini_pending = 0;
+	param->rx_mini_pending = VMXNET3_VERSION_GE_3(adapter) ?
+		adapter->rxdata_desc_size : 0;
 	param->rx_jumbo_pending = adapter->rx_ring2_size;
 }
 
@@ -520,6 +520,7 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	u32 new_tx_ring_size, new_rx_ring_size, new_rx_ring2_size;
+	u16 new_rxdata_desc_size;
 	u32 sz;
 	int err = 0;
 
@@ -542,6 +543,15 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 		return -EOPNOTSUPP;
 	}
 
+	if (VMXNET3_VERSION_GE_3(adapter)) {
+		if (param->rx_mini_pending < 0 ||
+		    param->rx_mini_pending > VMXNET3_RXDATA_DESC_MAX_SIZE) {
+			return -EINVAL;
+		}
+	} else if (param->rx_mini_pending != 0) {
+		return -EINVAL;
+	}
+
 	/* round it up to a multiple of VMXNET3_RING_SIZE_ALIGN */
 	new_tx_ring_size = (param->tx_pending + VMXNET3_RING_SIZE_MASK) &
 							~VMXNET3_RING_SIZE_MASK;
@@ -568,9 +578,19 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 	new_rx_ring2_size = min_t(u32, new_rx_ring2_size,
 				  VMXNET3_RX_RING2_MAX_SIZE);
 
+	/* rx data ring buffer size has to be a multiple of
+	 * VMXNET3_RXDATA_DESC_SIZE_ALIGN
+	 */
+	new_rxdata_desc_size =
+		(param->rx_mini_pending + VMXNET3_RXDATA_DESC_SIZE_MASK) &
+		~VMXNET3_RXDATA_DESC_SIZE_MASK;
+	new_rxdata_desc_size = min_t(u16, new_rxdata_desc_size,
+				     VMXNET3_RXDATA_DESC_MAX_SIZE);
+
 	if (new_tx_ring_size == adapter->tx_ring_size &&
 	    new_rx_ring_size == adapter->rx_ring_size &&
-	    new_rx_ring2_size == adapter->rx_ring2_size) {
+	    new_rx_ring2_size == adapter->rx_ring2_size &&
+	    new_rxdata_desc_size == adapter->rxdata_desc_size) {
 		return 0;
 	}
 
@@ -591,8 +611,9 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 		vmxnet3_rq_destroy_all(adapter);
 
 		err = vmxnet3_create_queues(adapter, new_tx_ring_size,
-			new_rx_ring_size, new_rx_ring2_size);
-
+					    new_rx_ring_size, new_rx_ring2_size,
+					    adapter->txdata_desc_size,
+					    new_rxdata_desc_size);
 		if (err) {
 			/* failed, most likely because of OOM, try default
 			 * size */
@@ -601,10 +622,15 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 			new_rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
 			new_rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
 			new_tx_ring_size = VMXNET3_DEF_TX_RING_SIZE;
+			new_rxdata_desc_size = VMXNET3_VERSION_GE_3(adapter) ?
+				VMXNET3_DEF_RXDATA_DESC_SIZE : 0;
+
 			err = vmxnet3_create_queues(adapter,
 						    new_tx_ring_size,
 						    new_rx_ring_size,
-						    new_rx_ring2_size);
+						    new_rx_ring2_size,
+						    adapter->txdata_desc_size,
+						    new_rxdata_desc_size);
 			if (err) {
 				netdev_err(netdev, "failed to create queues "
 					   "with default sizes. Closing it\n");
@@ -620,6 +646,7 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 	adapter->tx_ring_size = new_tx_ring_size;
 	adapter->rx_ring_size = new_rx_ring_size;
 	adapter->rx_ring2_size = new_rx_ring2_size;
+	adapter->rxdata_desc_size = new_rxdata_desc_size;
 
 out:
 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
@@ -698,6 +725,162 @@ vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key,
 }
 #endif
 
+static int
+vmxnet3_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+{
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+
+	if (!VMXNET3_VERSION_GE_3(adapter))
+		return -EOPNOTSUPP;
+
+	switch (adapter->coal_conf->coalMode) {
+	case VMXNET3_COALESCE_DISABLED:
+		/* struct ethtool_coalesce is already initialized to 0 */
+		break;
+	case VMXNET3_COALESCE_ADAPT:
+		ec->use_adaptive_rx_coalesce = true;
+		break;
+	case VMXNET3_COALESCE_STATIC:
+		ec->tx_max_coalesced_frames =
+			adapter->coal_conf->coalPara.coalStatic.tx_comp_depth;
+		ec->rx_max_coalesced_frames =
+			adapter->coal_conf->coalPara.coalStatic.rx_depth;
+		break;
+	case VMXNET3_COALESCE_RBC: {
+		u32 rbc_rate;
+
+		rbc_rate = adapter->coal_conf->coalPara.coalRbc.rbc_rate;
+		ec->rx_coalesce_usecs = VMXNET3_COAL_RBC_USECS(rbc_rate);
+	}
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
+vmxnet3_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+{
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+	struct Vmxnet3_DriverShared *shared = adapter->shared;
+	union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
+	unsigned long flags;
+
+	if (!VMXNET3_VERSION_GE_3(adapter))
+		return -EOPNOTSUPP;
+
+	if (ec->rx_coalesce_usecs_irq ||
+	    ec->rx_max_coalesced_frames_irq ||
+	    ec->tx_coalesce_usecs ||
+	    ec->tx_coalesce_usecs_irq ||
+	    ec->tx_max_coalesced_frames_irq ||
+	    ec->stats_block_coalesce_usecs ||
+	    ec->use_adaptive_tx_coalesce ||
+	    ec->pkt_rate_low ||
+	    ec->rx_coalesce_usecs_low ||
+	    ec->rx_max_coalesced_frames_low ||
+	    ec->tx_coalesce_usecs_low ||
+	    ec->tx_max_coalesced_frames_low ||
+	    ec->pkt_rate_high ||
+	    ec->rx_coalesce_usecs_high ||
+	    ec->rx_max_coalesced_frames_high ||
+	    ec->tx_coalesce_usecs_high ||
+	    ec->tx_max_coalesced_frames_high ||
+	    ec->rate_sample_interval) {
+		return -EINVAL;
+	}
+
+	if ((ec->rx_coalesce_usecs == 0) &&
+	    (ec->use_adaptive_rx_coalesce == 0) &&
+	    (ec->tx_max_coalesced_frames == 0) &&
+	    (ec->rx_max_coalesced_frames == 0)) {
+		memset(adapter->coal_conf, 0, sizeof(*adapter->coal_conf));
+		adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED;
+		goto done;
+	}
+
+	if (ec->rx_coalesce_usecs != 0) {
+		u32 rbc_rate;
+
+		if ((ec->use_adaptive_rx_coalesce != 0) ||
+		    (ec->tx_max_coalesced_frames != 0) ||
+		    (ec->rx_max_coalesced_frames != 0)) {
+			return -EINVAL;
+		}
+
+		rbc_rate = VMXNET3_COAL_RBC_RATE(ec->rx_coalesce_usecs);
+		if (rbc_rate < VMXNET3_COAL_RBC_MIN_RATE ||
+		    rbc_rate > VMXNET3_COAL_RBC_MAX_RATE) {
+			return -EINVAL;
+		}
+
+		memset(adapter->coal_conf, 0, sizeof(*adapter->coal_conf));
+		adapter->coal_conf->coalMode = VMXNET3_COALESCE_RBC;
+		adapter->coal_conf->coalPara.coalRbc.rbc_rate = rbc_rate;
+		goto done;
+	}
+
+	if (ec->use_adaptive_rx_coalesce != 0) {
+		if ((ec->rx_coalesce_usecs != 0) ||
+		    (ec->tx_max_coalesced_frames != 0) ||
+		    (ec->rx_max_coalesced_frames != 0)) {
+			return -EINVAL;
+		}
+		memset(adapter->coal_conf, 0, sizeof(*adapter->coal_conf));
+		adapter->coal_conf->coalMode = VMXNET3_COALESCE_ADAPT;
+		goto done;
+	}
+
+	if ((ec->tx_max_coalesced_frames != 0) ||
+	    (ec->rx_max_coalesced_frames != 0)) {
+		if ((ec->rx_coalesce_usecs != 0) ||
+		    (ec->use_adaptive_rx_coalesce != 0)) {
+			return -EINVAL;
+		}
+
+		if ((ec->tx_max_coalesced_frames >
+		    VMXNET3_COAL_STATIC_MAX_DEPTH) ||
+		    (ec->rx_max_coalesced_frames >
+		     VMXNET3_COAL_STATIC_MAX_DEPTH)) {
+			return -EINVAL;
+		}
+
+		memset(adapter->coal_conf, 0, sizeof(*adapter->coal_conf));
+		adapter->coal_conf->coalMode = VMXNET3_COALESCE_STATIC;
+
+		adapter->coal_conf->coalPara.coalStatic.tx_comp_depth =
+			(ec->tx_max_coalesced_frames ?
+			 ec->tx_max_coalesced_frames :
+			 VMXNET3_COAL_STATIC_DEFAULT_DEPTH);
+
+		adapter->coal_conf->coalPara.coalStatic.rx_depth =
+			(ec->rx_max_coalesced_frames ?
+			 ec->rx_max_coalesced_frames :
+			 VMXNET3_COAL_STATIC_DEFAULT_DEPTH);
+
+		adapter->coal_conf->coalPara.coalStatic.tx_depth =
+			 VMXNET3_COAL_STATIC_DEFAULT_DEPTH;
+		goto done;
+	}
+
+done:
+	adapter->default_coal_mode = false;
+	if (netif_running(netdev)) {
+		spin_lock_irqsave(&adapter->cmd_lock, flags);
+		cmdInfo->varConf.confVer = 1;
+		cmdInfo->varConf.confLen =
+			cpu_to_le32(sizeof(*adapter->coal_conf));
+		cmdInfo->varConf.confPA  = cpu_to_le64(adapter->coal_conf_pa);
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_SET_COALESCE);
+		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops vmxnet3_ethtool_ops = {
 	.get_settings      = vmxnet3_get_settings,
 	.get_drvinfo       = vmxnet3_get_drvinfo,
@@ -706,6 +889,8 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = {
 	.get_wol           = vmxnet3_get_wol,
 	.set_wol           = vmxnet3_set_wol,
 	.get_link          = ethtool_op_get_link,
+	.get_coalesce      = vmxnet3_get_coalesce,
+	.set_coalesce      = vmxnet3_set_coalesce,
 	.get_strings       = vmxnet3_get_strings,
 	.get_sset_count	   = vmxnet3_get_sset_count,
 	.get_ethtool_stats = vmxnet3_get_ethtool_stats,
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 3d2b64e63408..74fc03072b87 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -20,7 +20,7 @@
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
- * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
+ * Maintained by: pv-drivers@vmware.com
  *
  */
 
@@ -69,16 +69,20 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.8.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.9.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040800
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040900
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
 	#define VMXNET3_RSS
 #endif
 
+#define VMXNET3_REV_3		2	/* Vmxnet3 Rev. 3 */
+#define VMXNET3_REV_2		1	/* Vmxnet3 Rev. 2 */
+#define VMXNET3_REV_1		0	/* Vmxnet3 Rev. 1 */
+
 /*
  * Capabilities
  */
@@ -237,6 +241,7 @@ struct vmxnet3_tx_queue {
 	int                             num_stop;  /* # of times the queue is
 						    * stopped */
 	int				qid;
+	u16				txdata_desc_size;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 enum vmxnet3_rx_buf_type {
@@ -267,15 +272,23 @@ struct vmxnet3_rq_driver_stats {
 	u64 rx_buf_alloc_failure;
 };
 
+struct vmxnet3_rx_data_ring {
+	Vmxnet3_RxDataDesc *base;
+	dma_addr_t basePA;
+	u16 desc_size;
+};
+
 struct vmxnet3_rx_queue {
 	char			name[IFNAMSIZ + 8]; /* To identify interrupt */
 	struct vmxnet3_adapter	  *adapter;
 	struct napi_struct        napi;
 	struct vmxnet3_cmd_ring   rx_ring[2];
+	struct vmxnet3_rx_data_ring data_ring;
 	struct vmxnet3_comp_ring  comp_ring;
 	struct vmxnet3_rx_ctx     rx_ctx;
 	u32 qid;            /* rqID in RCD for buffer from 1st ring */
 	u32 qid2;           /* rqID in RCD for buffer from 2nd ring */
+	u32 dataRingQid;    /* rqID in RCD for buffer from data ring */
 	struct vmxnet3_rx_buf_info     *buf_info[2];
 	dma_addr_t                      buf_info_pa;
 	struct Vmxnet3_RxQueueCtrl            *shared;
@@ -345,6 +358,7 @@ struct vmxnet3_adapter {
 	int		rx_buf_per_pkt;  /* only apply to the 1st ring */
 	dma_addr_t			shared_pa;
 	dma_addr_t queue_desc_pa;
+	dma_addr_t coal_conf_pa;
 
 	/* Wake-on-LAN */
 	u32     wol;
@@ -359,12 +373,21 @@ struct vmxnet3_adapter {
 	u32 rx_ring_size;
 	u32 rx_ring2_size;
 
+	/* Size of buffer in the data ring */
+	u16 txdata_desc_size;
+	u16 rxdata_desc_size;
+
+	bool rxdataring_enabled;
+
 	struct work_struct work;
 
 	unsigned long  state;    /* VMXNET3_STATE_BIT_xxx */
 
 	int share_intr;
 
+	struct Vmxnet3_CoalesceScheme *coal_conf;
+	bool   default_coal_mode;
+
 	dma_addr_t adapter_pa;
 	dma_addr_t pm_conf_pa;
 	dma_addr_t rss_conf_pa;
@@ -387,14 +410,34 @@ struct vmxnet3_adapter {
 #define VMXNET3_GET_ADDR_LO(dma)   ((u32)(dma))
 #define VMXNET3_GET_ADDR_HI(dma)   ((u32)(((u64)(dma)) >> 32))
 
+#define VMXNET3_VERSION_GE_2(adapter) \
+	(adapter->version >= VMXNET3_REV_2 + 1)
+#define VMXNET3_VERSION_GE_3(adapter) \
+	(adapter->version >= VMXNET3_REV_3 + 1)
+
 /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */
 #define VMXNET3_DEF_TX_RING_SIZE    512
 #define VMXNET3_DEF_RX_RING_SIZE    256
 #define VMXNET3_DEF_RX_RING2_SIZE   128
 
+#define VMXNET3_DEF_RXDATA_DESC_SIZE 128
+
 #define VMXNET3_MAX_ETH_HDR_SIZE    22
 #define VMXNET3_MAX_SKB_BUF_SIZE    (3*1024)
 
+#define VMXNET3_GET_RING_IDX(adapter, rqID)		\
+	((rqID >= adapter->num_rx_queues &&		\
+	 rqID < 2 * adapter->num_rx_queues) ? 1 : 0)	\
+
+#define VMXNET3_RX_DATA_RING(adapter, rqID)		\
+	(rqID >= 2 * adapter->num_rx_queues &&		\
+	rqID < 3 * adapter->num_rx_queues)		\
+
+#define VMXNET3_COAL_STATIC_DEFAULT_DEPTH	64
+
+#define VMXNET3_COAL_RBC_RATE(usecs) (1000000 / usecs)
+#define VMXNET3_COAL_RBC_USECS(rbc_rate) (1000000 / rbc_rate)
+
 int
 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter);
 
@@ -418,7 +461,8 @@ vmxnet3_set_features(struct net_device *netdev, netdev_features_t features);
 
 int
 vmxnet3_create_queues(struct vmxnet3_adapter *adapter,
-		      u32 tx_ring_size, u32 rx_ring_size, u32 rx_ring2_size);
+		      u32 tx_ring_size, u32 rx_ring_size, u32 rx_ring2_size,
+		      u16 txdata_desc_size, u16 rxdata_desc_size);
 
 void vmxnet3_set_ethtool_ops(struct net_device *netdev);
 
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index dff08842f26d..1ce7420322ee 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -35,6 +35,7 @@
 #include <net/route.h>
 #include <net/addrconf.h>
 #include <net/l3mdev.h>
+#include <net/fib_rules.h>
 
 #define RT_FL_TOS(oldflp4) \
 	((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -42,9 +43,14 @@
 #define DRV_NAME	"vrf"
 #define DRV_VERSION	"1.0"
 
+#define FIB_RULE_PREF  1000       /* default preference for FIB rules */
+static bool add_fib_rules = true;
+
 struct net_vrf {
 	struct rtable __rcu	*rth;
+	struct rtable __rcu	*rth_local;
 	struct rt6_info	__rcu	*rt6;
+	struct rt6_info	__rcu	*rt6_local;
 	u32                     tb_id;
 };
 
@@ -54,9 +60,20 @@ struct pcpu_dstats {
 	u64			tx_drps;
 	u64			rx_pkts;
 	u64			rx_bytes;
+	u64			rx_drps;
 	struct u64_stats_sync	syncp;
 };
 
+static void vrf_rx_stats(struct net_device *dev, int len)
+{
+	struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+	u64_stats_update_begin(&dstats->syncp);
+	dstats->rx_pkts++;
+	dstats->rx_bytes += len;
+	u64_stats_update_end(&dstats->syncp);
+}
+
 static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
 {
 	vrf_dev->stats.tx_errors++;
@@ -91,6 +108,34 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
 	return stats;
 }
 
+/* Local traffic destined to local address. Reinsert the packet to rx
+ * path, similar to loopback handling.
+ */
+static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
+			  struct dst_entry *dst)
+{
+	int len = skb->len;
+
+	skb_orphan(skb);
+
+	skb_dst_set(skb, dst);
+	skb_dst_force(skb);
+
+	/* set pkt_type to avoid skb hitting packet taps twice -
+	 * once on Tx and again in Rx processing
+	 */
+	skb->pkt_type = PACKET_LOOPBACK;
+
+	skb->protocol = eth_type_trans(skb, dev);
+
+	if (likely(netif_rx(skb) == NET_RX_SUCCESS))
+		vrf_rx_stats(dev, len);
+	else
+		this_cpu_inc(dev->dstats->rx_drps);
+
+	return NETDEV_TX_OK;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 					   struct net_device *dev)
@@ -117,8 +162,51 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 		goto err;
 
 	skb_dst_drop(skb);
+
+	/* if dst.dev is loopback or the VRF device again this is locally
+	 * originated traffic destined to a local address. Short circuit
+	 * to Rx path using our local dst
+	 */
+	if (dst->dev == net->loopback_dev || dst->dev == dev) {
+		struct net_vrf *vrf = netdev_priv(dev);
+		struct rt6_info *rt6_local;
+
+		/* release looked up dst and use cached local dst */
+		dst_release(dst);
+
+		rcu_read_lock();
+
+		rt6_local = rcu_dereference(vrf->rt6_local);
+		if (unlikely(!rt6_local)) {
+			rcu_read_unlock();
+			goto err;
+		}
+
+		/* Ordering issue: cached local dst is created on newlink
+		 * before the IPv6 initialization. Using the local dst
+		 * requires rt6i_idev to be set so make sure it is.
+		 */
+		if (unlikely(!rt6_local->rt6i_idev)) {
+			rt6_local->rt6i_idev = in6_dev_get(dev);
+			if (!rt6_local->rt6i_idev) {
+				rcu_read_unlock();
+				goto err;
+			}
+		}
+
+		dst = &rt6_local->dst;
+		dst_hold(dst);
+
+		rcu_read_unlock();
+
+		return vrf_local_xmit(skb, dev, &rt6_local->dst);
+	}
+
 	skb_dst_set(skb, dst);
 
+	/* strip the ethernet header added for pass through VRF device */
+	__skb_pull(skb, skb_network_offset(skb));
+
 	ret = ip6_local_out(net, skb->sk, skb);
 	if (unlikely(net_xmit_eval(ret)))
 		dev->stats.tx_errors++;
@@ -139,29 +227,6 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 }
 #endif
 
-static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4,
-			    struct net_device *vrf_dev)
-{
-	struct rtable *rt;
-	int err = 1;
-
-	rt = ip_route_output_flow(dev_net(vrf_dev), fl4, NULL);
-	if (IS_ERR(rt))
-		goto out;
-
-	/* TO-DO: what about broadcast ? */
-	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
-		ip_rt_put(rt);
-		goto out;
-	}
-
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-	err = 0;
-out:
-	return err;
-}
-
 static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
 					   struct net_device *vrf_dev)
 {
@@ -176,9 +241,51 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
 				FLOWI_FLAG_SKIP_NH_OIF,
 		.daddr = ip4h->daddr,
 	};
+	struct net *net = dev_net(vrf_dev);
+	struct rtable *rt;
 
-	if (vrf_send_v4_prep(skb, &fl4, vrf_dev))
+	rt = ip_route_output_flow(net, &fl4, NULL);
+	if (IS_ERR(rt))
+		goto err;
+
+	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+		ip_rt_put(rt);
 		goto err;
+	}
+
+	skb_dst_drop(skb);
+
+	/* if dst.dev is loopback or the VRF device again this is locally
+	 * originated traffic destined to a local address. Short circuit
+	 * to Rx path using our local dst
+	 */
+	if (rt->dst.dev == net->loopback_dev || rt->dst.dev == vrf_dev) {
+		struct net_vrf *vrf = netdev_priv(vrf_dev);
+		struct rtable *rth_local;
+		struct dst_entry *dst = NULL;
+
+		ip_rt_put(rt);
+
+		rcu_read_lock();
+
+		rth_local = rcu_dereference(vrf->rth_local);
+		if (likely(rth_local)) {
+			dst = &rth_local->dst;
+			dst_hold(dst);
+		}
+
+		rcu_read_unlock();
+
+		if (unlikely(!dst))
+			goto err;
+
+		return vrf_local_xmit(skb, vrf_dev, dst);
+	}
+
+	skb_dst_set(skb, &rt->dst);
+
+	/* strip the ethernet header added for pass through VRF device */
+	__skb_pull(skb, skb_network_offset(skb));
 
 	if (!ip4h->saddr) {
 		ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0,
@@ -200,9 +307,6 @@ err:
 
 static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
 {
-	/* strip the ethernet header added for pass through VRF device */
-	__skb_pull(skb, skb_network_offset(skb));
-
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		return vrf_process_v4_outbound(skb, dev);
@@ -274,30 +378,59 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 
 /* holding rtnl */
-static void vrf_rt6_release(struct net_vrf *vrf)
+static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
 	struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
+	struct rt6_info *rt6_local = rtnl_dereference(vrf->rt6_local);
+	struct net *net = dev_net(dev);
+	struct dst_entry *dst;
 
-	rcu_assign_pointer(vrf->rt6, NULL);
+	RCU_INIT_POINTER(vrf->rt6, NULL);
+	RCU_INIT_POINTER(vrf->rt6_local, NULL);
+	synchronize_rcu();
+
+	/* move dev in dst's to loopback so this VRF device can be deleted
+	 * - based on dst_ifdown
+	 */
+	if (rt6) {
+		dst = &rt6->dst;
+		dev_put(dst->dev);
+		dst->dev = net->loopback_dev;
+		dev_hold(dst->dev);
+		dst_release(dst);
+	}
 
-	if (rt6)
-		dst_release(&rt6->dst);
+	if (rt6_local) {
+		if (rt6_local->rt6i_idev)
+			in6_dev_put(rt6_local->rt6i_idev);
+
+		dst = &rt6_local->dst;
+		dev_put(dst->dev);
+		dst->dev = net->loopback_dev;
+		dev_hold(dst->dev);
+		dst_release(dst);
+	}
 }
 
 static int vrf_rt6_create(struct net_device *dev)
 {
+	int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM | DST_NOCACHE;
 	struct net_vrf *vrf = netdev_priv(dev);
 	struct net *net = dev_net(dev);
 	struct fib6_table *rt6i_table;
-	struct rt6_info *rt6;
+	struct rt6_info *rt6, *rt6_local;
 	int rc = -ENOMEM;
 
+	/* IPv6 can be CONFIG enabled and then disabled runtime */
+	if (!ipv6_mod_enabled())
+		return 0;
+
 	rt6i_table = fib6_new_table(net, vrf->tb_id);
 	if (!rt6i_table)
 		goto out;
 
-	rt6 = ip6_dst_alloc(net, dev,
-			    DST_HOST | DST_NOPOLICY | DST_NOXFRM | DST_NOCACHE);
+	/* create a dst for routing packets out a VRF device */
+	rt6 = ip6_dst_alloc(net, dev, flags);
 	if (!rt6)
 		goto out;
 
@@ -305,14 +438,32 @@ static int vrf_rt6_create(struct net_device *dev)
 
 	rt6->rt6i_table = rt6i_table;
 	rt6->dst.output	= vrf_output6;
+
+	/* create a dst for local routing - packets sent locally
+	 * to local address via the VRF device as a loopback
+	 */
+	rt6_local = ip6_dst_alloc(net, dev, flags);
+	if (!rt6_local) {
+		dst_release(&rt6->dst);
+		goto out;
+	}
+
+	dst_hold(&rt6_local->dst);
+
+	rt6_local->rt6i_idev  = in6_dev_get(dev);
+	rt6_local->rt6i_flags = RTF_UP | RTF_NONEXTHOP | RTF_LOCAL;
+	rt6_local->rt6i_table = rt6i_table;
+	rt6_local->dst.input  = ip6_input;
+
 	rcu_assign_pointer(vrf->rt6, rt6);
+	rcu_assign_pointer(vrf->rt6_local, rt6_local);
 
 	rc = 0;
 out:
 	return rc;
 }
 #else
-static void vrf_rt6_release(struct net_vrf *vrf)
+static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
 }
 
@@ -381,32 +532,66 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 
 /* holding rtnl */
-static void vrf_rtable_release(struct net_vrf *vrf)
+static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
 {
 	struct rtable *rth = rtnl_dereference(vrf->rth);
+	struct rtable *rth_local = rtnl_dereference(vrf->rth_local);
+	struct net *net = dev_net(dev);
+	struct dst_entry *dst;
 
-	rcu_assign_pointer(vrf->rth, NULL);
+	RCU_INIT_POINTER(vrf->rth, NULL);
+	RCU_INIT_POINTER(vrf->rth_local, NULL);
+	synchronize_rcu();
+
+	/* move dev in dst's to loopback so this VRF device can be deleted
+	 * - based on dst_ifdown
+	 */
+	if (rth) {
+		dst = &rth->dst;
+		dev_put(dst->dev);
+		dst->dev = net->loopback_dev;
+		dev_hold(dst->dev);
+		dst_release(dst);
+	}
 
-	if (rth)
-		dst_release(&rth->dst);
+	if (rth_local) {
+		dst = &rth_local->dst;
+		dev_put(dst->dev);
+		dst->dev = net->loopback_dev;
+		dev_hold(dst->dev);
+		dst_release(dst);
+	}
 }
 
 static int vrf_rtable_create(struct net_device *dev)
 {
 	struct net_vrf *vrf = netdev_priv(dev);
-	struct rtable *rth;
+	struct rtable *rth, *rth_local;
 
 	if (!fib_new_table(dev_net(dev), vrf->tb_id))
 		return -ENOMEM;
 
+	/* create a dst for routing packets out through a VRF device */
 	rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1, 0);
 	if (!rth)
 		return -ENOMEM;
 
+	/* create a dst for local ingress routing - packets sent locally
+	 * to local address via the VRF device as a loopback
+	 */
+	rth_local = rt_dst_alloc(dev, RTCF_LOCAL, RTN_LOCAL, 1, 1, 0);
+	if (!rth_local) {
+		dst_release(&rth->dst);
+		return -ENOMEM;
+	}
+
 	rth->dst.output	= vrf_output;
 	rth->rt_table_id = vrf->tb_id;
 
+	rth_local->rt_table_id = vrf->tb_id;
+
 	rcu_assign_pointer(vrf->rth, rth);
+	rcu_assign_pointer(vrf->rth_local, rth_local);
 
 	return 0;
 }
@@ -477,8 +662,8 @@ static void vrf_dev_uninit(struct net_device *dev)
 	struct net_device *port_dev;
 	struct list_head *iter;
 
-	vrf_rtable_release(vrf);
-	vrf_rt6_release(vrf);
+	vrf_rtable_release(dev, vrf);
+	vrf_rt6_release(dev, vrf);
 
 	netdev_for_each_lower_dev(dev, port_dev, iter)
 		vrf_del_slave(dev, port_dev);
@@ -504,10 +689,16 @@ static int vrf_dev_init(struct net_device *dev)
 
 	dev->flags = IFF_MASTER | IFF_NOARP;
 
+	/* MTU is irrelevant for VRF device; set to 64k similar to lo */
+	dev->mtu = 64 * 1024;
+
+	/* similarly, oper state is irrelevant; set to up to avoid confusion */
+	dev->operstate = IF_OPER_UP;
+	netdev_lockdep_set_classes(dev);
 	return 0;
 
 out_rth:
-	vrf_rtable_release(vrf);
+	vrf_rtable_release(dev, vrf);
 out_stats:
 	free_percpu(dev->dstats);
 	dev->dstats = NULL;
@@ -588,6 +779,25 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
 	return rc;
 }
 
+static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook,
+				      struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+
+	nf_reset(skb);
+
+	if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0)
+		skb = NULL;    /* kfree_skb(skb) handled by nf code */
+
+	return skb;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 /* neighbor handling is done with actual device; do not want
  * to flip skb->dev for those ndisc packets. This really fails
@@ -623,11 +833,78 @@ out:
 	return rc;
 }
 
+static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
+					     const struct net_device *dev,
+					     struct flowi6 *fl6,
+					     int ifindex,
+					     int flags)
+{
+	struct net_vrf *vrf = netdev_priv(dev);
+	struct fib6_table *table = NULL;
+	struct rt6_info *rt6;
+
+	rcu_read_lock();
+
+	/* fib6_table does not have a refcnt and can not be freed */
+	rt6 = rcu_dereference(vrf->rt6);
+	if (likely(rt6))
+		table = rt6->rt6i_table;
+
+	rcu_read_unlock();
+
+	if (!table)
+		return NULL;
+
+	return ip6_pol_route(net, table, ifindex, fl6, flags);
+}
+
+static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
+			      int ifindex)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct flowi6 fl6 = {
+		.daddr          = iph->daddr,
+		.saddr          = iph->saddr,
+		.flowlabel      = ip6_flowinfo(iph),
+		.flowi6_mark    = skb->mark,
+		.flowi6_proto   = iph->nexthdr,
+		.flowi6_iif     = ifindex,
+	};
+	struct net *net = dev_net(vrf_dev);
+	struct rt6_info *rt6;
+
+	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+				   RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
+	if (unlikely(!rt6))
+		return;
+
+	if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst))
+		return;
+
+	skb_dst_set(skb, &rt6->dst);
+}
+
 static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
 				   struct sk_buff *skb)
 {
-	/* if packet is NDISC keep the ingress interface */
-	if (!ipv6_ndisc_frame(skb)) {
+	int orig_iif = skb->skb_iif;
+	bool need_strict;
+
+	/* loopback traffic; do not push through packet taps again.
+	 * Reset pkt_type for upper layers to process skb
+	 */
+	if (skb->pkt_type == PACKET_LOOPBACK) {
+		skb->dev = vrf_dev;
+		skb->skb_iif = vrf_dev->ifindex;
+		skb->pkt_type = PACKET_HOST;
+		goto out;
+	}
+
+	/* if packet is NDISC or addressed to multicast or link-local
+	 * then keep the ingress interface
+	 */
+	need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
+	if (!ipv6_ndisc_frame(skb) && !need_strict) {
 		skb->dev = vrf_dev;
 		skb->skb_iif = vrf_dev->ifindex;
 
@@ -638,6 +915,11 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
 		IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
 	}
 
+	if (need_strict)
+		vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
+
+	skb = vrf_rcv_nfhook(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, vrf_dev);
+out:
 	return skb;
 }
 
@@ -655,10 +937,20 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
 	skb->dev = vrf_dev;
 	skb->skb_iif = vrf_dev->ifindex;
 
+	/* loopback traffic; do not push through packet taps again.
+	 * Reset pkt_type for upper layers to process skb
+	 */
+	if (skb->pkt_type == PACKET_LOOPBACK) {
+		skb->pkt_type = PACKET_HOST;
+		goto out;
+	}
+
 	skb_push(skb, skb->mac_len);
 	dev_queue_xmit_nit(skb, vrf_dev);
 	skb_pull(skb, skb->mac_len);
 
+	skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
+out:
 	return skb;
 }
 
@@ -679,13 +971,37 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
 
 #if IS_ENABLED(CONFIG_IPV6)
 static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
-					 const struct flowi6 *fl6)
+					 struct flowi6 *fl6)
 {
+	bool need_strict = rt6_need_strict(&fl6->daddr);
+	struct net_vrf *vrf = netdev_priv(dev);
+	struct net *net = dev_net(dev);
 	struct dst_entry *dst = NULL;
+	struct rt6_info *rt;
 
-	if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-		struct net_vrf *vrf = netdev_priv(dev);
-		struct rt6_info *rt;
+	/* send to link-local or multicast address */
+	if (need_strict) {
+		int flags = RT6_LOOKUP_F_IFACE;
+
+		/* VRF device does not have a link-local address and
+		 * sending packets to link-local or mcast addresses over
+		 * a VRF device does not make sense
+		 */
+		if (fl6->flowi6_oif == dev->ifindex) {
+			struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
+
+			dst_hold(dst);
+			return dst;
+		}
+
+		if (!ipv6_addr_any(&fl6->saddr))
+			flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+		rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+		if (rt)
+			dst = &rt->dst;
+
+	} else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
 
 		rcu_read_lock();
 
@@ -698,8 +1014,52 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
 		rcu_read_unlock();
 	}
 
+	/* make sure oif is set to VRF device for lookup */
+	if (!need_strict)
+		fl6->flowi6_oif = dev->ifindex;
+
 	return dst;
 }
+
+/* called under rcu_read_lock */
+static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
+			  struct flowi6 *fl6)
+{
+	struct net *net = dev_net(dev);
+	struct dst_entry *dst;
+	struct rt6_info *rt;
+	int err;
+
+	if (rt6_need_strict(&fl6->daddr)) {
+		rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
+					  RT6_LOOKUP_F_IFACE);
+		if (unlikely(!rt))
+			return 0;
+
+		dst = &rt->dst;
+	} else {
+		__u8 flags = fl6->flowi6_flags;
+
+		fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
+		fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+
+		dst = ip6_route_output(net, sk, fl6);
+		rt = (struct rt6_info *)dst;
+
+		fl6->flowi6_flags = flags;
+	}
+
+	err = dst->error;
+	if (!err) {
+		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+					  sk ? inet6_sk(sk)->srcprefs : 0,
+					  &fl6->saddr);
+	}
+
+	dst_release(dst);
+
+	return err;
+}
 #endif
 
 static const struct l3mdev_ops vrf_l3mdev_ops = {
@@ -709,6 +1069,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
 	.l3mdev_l3_rcv		= vrf_l3_rcv,
 #if IS_ENABLED(CONFIG_IPV6)
 	.l3mdev_get_rt6_dst	= vrf_get_rt6_dst,
+	.l3mdev_get_saddr6	= vrf_get_saddr6,
 #endif
 };
 
@@ -723,6 +1084,94 @@ static const struct ethtool_ops vrf_ethtool_ops = {
 	.get_drvinfo	= vrf_get_drvinfo,
 };
 
+static inline size_t vrf_fib_rule_nl_size(void)
+{
+	size_t sz;
+
+	sz  = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
+	sz += nla_total_size(sizeof(u8));	/* FRA_L3MDEV */
+	sz += nla_total_size(sizeof(u32));	/* FRA_PRIORITY */
+
+	return sz;
+}
+
+static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
+{
+	struct fib_rule_hdr *frh;
+	struct nlmsghdr *nlh;
+	struct sk_buff *skb;
+	int err;
+
+	if (family == AF_INET6 && !ipv6_mod_enabled())
+		return 0;
+
+	skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*frh), 0);
+	if (!nlh)
+		goto nla_put_failure;
+
+	/* rule only needs to appear once */
+	nlh->nlmsg_flags &= NLM_F_EXCL;
+
+	frh = nlmsg_data(nlh);
+	memset(frh, 0, sizeof(*frh));
+	frh->family = family;
+	frh->action = FR_ACT_TO_TBL;
+
+	if (nla_put_u32(skb, FRA_L3MDEV, 1))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+
+	/* fib_nl_{new,del}rule handling looks for net from skb->sk */
+	skb->sk = dev_net(dev)->rtnl;
+	if (add_it) {
+		err = fib_nl_newrule(skb, nlh);
+		if (err == -EEXIST)
+			err = 0;
+	} else {
+		err = fib_nl_delrule(skb, nlh);
+		if (err == -ENOENT)
+			err = 0;
+	}
+	nlmsg_free(skb);
+
+	return err;
+
+nla_put_failure:
+	nlmsg_free(skb);
+
+	return -EMSGSIZE;
+}
+
+static int vrf_add_fib_rules(const struct net_device *dev)
+{
+	int err;
+
+	err = vrf_fib_rule(dev, AF_INET,  true);
+	if (err < 0)
+		goto out_err;
+
+	err = vrf_fib_rule(dev, AF_INET6, true);
+	if (err < 0)
+		goto ipv6_err;
+
+	return 0;
+
+ipv6_err:
+	vrf_fib_rule(dev, AF_INET,  false);
+
+out_err:
+	netdev_err(dev, "Failed to add FIB rules.\n");
+	return err;
+}
+
 static void vrf_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -741,6 +1190,20 @@ static void vrf_setup(struct net_device *dev)
 
 	/* don't allow vrf devices to change network namespaces. */
 	dev->features |= NETIF_F_NETNS_LOCAL;
+
+	/* does not make sense for a VLAN to be added to a vrf device */
+	dev->features   |= NETIF_F_VLAN_CHALLENGED;
+
+	/* enable offload features */
+	dev->features   |= NETIF_F_GSO_SOFTWARE;
+	dev->features   |= NETIF_F_RXCSUM | NETIF_F_HW_CSUM;
+	dev->features   |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA;
+
+	dev->hw_features = dev->features;
+	dev->hw_enc_features = dev->features;
+
+	/* default to no qdisc; user can add if desired */
+	dev->priv_flags |= IFF_NO_QUEUE;
 }
 
 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -763,6 +1226,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 		       struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net_vrf *vrf = netdev_priv(dev);
+	int err;
 
 	if (!data || !data[IFLA_VRF_TABLE])
 		return -EINVAL;
@@ -771,7 +1235,21 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 
 	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
-	return register_netdevice(dev);
+	err = register_netdevice(dev);
+	if (err)
+		goto out;
+
+	if (add_fib_rules) {
+		err = vrf_add_fib_rules(dev);
+		if (err) {
+			unregister_netdevice(dev);
+			goto out;
+		}
+		add_fib_rules = false;
+	}
+
+out:
+	return err;
 }
 
 static size_t vrf_nl_getsize(const struct net_device *dev)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f999db2f97b4..da4e3d6632f6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -11,32 +11,18 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
-#include <linux/types.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/rculist.h>
-#include <linux/netdevice.h>
-#include <linux/in.h>
-#include <linux/ip.h>
 #include <linux/udp.h>
 #include <linux/igmp.h>
-#include <linux/etherdevice.h>
 #include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <linux/hash.h>
 #include <linux/ethtool.h>
 #include <net/arp.h>
 #include <net/ndisc.h>
 #include <net/ip.h>
-#include <net/ip_tunnels.h>
 #include <net/icmp.h>
-#include <net/udp.h>
-#include <net/udp_tunnel.h>
 #include <net/rtnetlink.h>
-#include <net/route.h>
-#include <net/dsfield.h>
 #include <net/inet_ecn.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -44,12 +30,9 @@
 #include <net/protocol.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/addrconf.h>
 #include <net/ip6_tunnel.h>
 #include <net/ip6_checksum.h>
 #endif
-#include <net/dst_metadata.h>
 
 #define VXLAN_VERSION	"0.1"
 
@@ -619,42 +602,6 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
 	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
-/* Notify netdevs that UDP port started listening */
-static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
-{
-	struct net_device *dev;
-	struct sock *sk = vs->sock->sk;
-	struct net *net = sock_net(sk);
-	sa_family_t sa_family = vxlan_get_sk_family(vs);
-	__be16 port = inet_sk(sk)->inet_sport;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(net, dev) {
-		if (dev->netdev_ops->ndo_add_vxlan_port)
-			dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
-							    port);
-	}
-	rcu_read_unlock();
-}
-
-/* Notify netdevs that UDP port is no more listening */
-static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
-{
-	struct net_device *dev;
-	struct sock *sk = vs->sock->sk;
-	struct net *net = sock_net(sk);
-	sa_family_t sa_family = vxlan_get_sk_family(vs);
-	__be16 port = inet_sk(sk)->inet_sport;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(net, dev) {
-		if (dev->netdev_ops->ndo_del_vxlan_port)
-			dev->netdev_ops->ndo_del_vxlan_port(dev, sa_family,
-							    port);
-	}
-	rcu_read_unlock();
-}
-
 /* Add new entry to forwarding table -- assumes lock held */
 static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 			    const u8 *mac, union vxlan_addr *ip,
@@ -1050,7 +997,10 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
 	vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
 	spin_lock(&vn->sock_lock);
 	hlist_del_rcu(&vs->hlist);
-	vxlan_notify_del_rx_port(vs);
+	udp_tunnel_notify_del_rx_port(vs->sock,
+				      (vs->flags & VXLAN_F_GPE) ?
+				      UDP_TUNNEL_TYPE_VXLAN_GPE :
+				      UDP_TUNNEL_TYPE_VXLAN);
 	spin_unlock(&vn->sock_lock);
 
 	return true;
@@ -2525,30 +2475,24 @@ static struct device_type vxlan_type = {
 	.name = "vxlan",
 };
 
-/* Calls the ndo_add_vxlan_port of the caller in order to
+/* Calls the ndo_udp_tunnel_add of the caller in order to
  * supply the listening VXLAN udp ports. Callers are expected
- * to implement the ndo_add_vxlan_port.
+ * to implement the ndo_udp_tunnel_add.
  */
 static void vxlan_push_rx_ports(struct net_device *dev)
 {
 	struct vxlan_sock *vs;
 	struct net *net = dev_net(dev);
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-	sa_family_t sa_family;
-	__be16 port;
 	unsigned int i;
 
-	if (!dev->netdev_ops->ndo_add_vxlan_port)
-		return;
-
 	spin_lock(&vn->sock_lock);
 	for (i = 0; i < PORT_HASH_SIZE; ++i) {
-		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
-			port = inet_sk(vs->sock->sk)->inet_sport;
-			sa_family = vxlan_get_sk_family(vs);
-			dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
-							    port);
-		}
+		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist)
+			udp_tunnel_push_rx_port(dev, vs->sock,
+						(vs->flags & VXLAN_F_GPE) ?
+						UDP_TUNNEL_TYPE_VXLAN_GPE :
+						UDP_TUNNEL_TYPE_VXLAN);
 	}
 	spin_unlock(&vn->sock_lock);
 }
@@ -2750,7 +2694,10 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
 
 	spin_lock(&vn->sock_lock);
 	hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
-	vxlan_notify_add_rx_port(vs);
+	udp_tunnel_notify_add_rx_port(sock,
+				      (vs->flags & VXLAN_F_GPE) ?
+				      UDP_TUNNEL_TYPE_VXLAN_GPE :
+				      UDP_TUNNEL_TYPE_VXLAN);
 	spin_unlock(&vn->sock_lock);
 
 	/* Mark socket as an encapsulation socket. */
@@ -2952,30 +2899,6 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	return 0;
 }
 
-struct net_device *vxlan_dev_create(struct net *net, const char *name,
-				    u8 name_assign_type, struct vxlan_config *conf)
-{
-	struct nlattr *tb[IFLA_MAX+1];
-	struct net_device *dev;
-	int err;
-
-	memset(&tb, 0, sizeof(tb));
-
-	dev = rtnl_create_link(net, name, name_assign_type,
-			       &vxlan_link_ops, tb);
-	if (IS_ERR(dev))
-		return dev;
-
-	err = vxlan_dev_configure(net, dev, conf);
-	if (err < 0) {
-		free_netdev(dev);
-		return ERR_PTR(err);
-	}
-
-	return dev;
-}
-EXPORT_SYMBOL_GPL(vxlan_dev_create);
-
 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
@@ -3268,6 +3191,40 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.get_link_net	= vxlan_get_link_net,
 };
 
+struct net_device *vxlan_dev_create(struct net *net, const char *name,
+				    u8 name_assign_type,
+				    struct vxlan_config *conf)
+{
+	struct nlattr *tb[IFLA_MAX + 1];
+	struct net_device *dev;
+	int err;
+
+	memset(&tb, 0, sizeof(tb));
+
+	dev = rtnl_create_link(net, name, name_assign_type,
+			       &vxlan_link_ops, tb);
+	if (IS_ERR(dev))
+		return dev;
+
+	err = vxlan_dev_configure(net, dev, conf);
+	if (err < 0) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
+
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0) {
+		LIST_HEAD(list_kill);
+
+		vxlan_dellink(dev, &list_kill);
+		unregister_netdevice_many(&list_kill);
+		return ERR_PTR(err);
+	}
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(vxlan_dev_create);
+
 static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
 					     struct net_device *dev)
 {
@@ -3298,7 +3255,7 @@ static int vxlan_netdevice_event(struct notifier_block *unused,
 
 	if (event == NETDEV_UNREGISTER)
 		vxlan_handle_lowerdev_unregister(vn, dev);
-	else if (event == NETDEV_OFFLOAD_PUSH_VXLAN)
+	else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
 		vxlan_push_rx_ports(dev);
 
 	return NOTIFY_DONE;
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index a2fdd15f285a..33ab3345d333 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -280,6 +280,28 @@ config DSCC4
 	  To compile this driver as a module, choose M here: the
 	  module will be called dscc4.
 
+config FSL_UCC_HDLC
+	tristate "Freescale QUICC Engine HDLC support"
+	depends on HDLC
+	depends on QUICC_ENGINE
+	help
+	  Driver for Freescale QUICC Engine HDLC controller. The driver
+	  supports HDLC in NMSI and TDM mode.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called fsl_ucc_hdlc.
+
+config SLIC_DS26522
+	tristate "Slic Maxim ds26522 card support"
+	depends on SPI
+	depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
+	help
+	  This module initializes and configures the slic maxim card
+	  in T1 or E1 mode.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called slic_ds26522.
+
 config DSCC4_PCISYNC
 	bool "Etinc PCISYNC features"
 	depends on DSCC4
diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile
index c135ef47cbca..73c2326603fc 100644
--- a/drivers/net/wan/Makefile
+++ b/drivers/net/wan/Makefile
@@ -32,6 +32,8 @@ obj-$(CONFIG_WANXL)		+= wanxl.o
 obj-$(CONFIG_PCI200SYN)		+= pci200syn.o
 obj-$(CONFIG_PC300TOO)		+= pc300too.o
 obj-$(CONFIG_IXP4XX_HSS)	+= ixp4xx_hss.o
+obj-$(CONFIG_FSL_UCC_HDLC)	+= fsl_ucc_hdlc.o
+obj-$(CONFIG_SLIC_DS26522)	+= slic_ds26522.o
 
 clean-files := wanxlfw.inc
 $(obj)/wanxl.o:	$(obj)/wanxlfw.inc
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
new file mode 100644
index 000000000000..2fc50ec453d0
--- /dev/null
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -0,0 +1,1177 @@
+/* Freescale QUICC Engine HDLC Device Driver
+ *
+ * Copyright 2016 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/hdlc.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <soc/fsl/qe/qe_tdm.h>
+#include <uapi/linux/if_arp.h>
+
+#include "fsl_ucc_hdlc.h"
+
+#define DRV_DESC "Freescale QE UCC HDLC Driver"
+#define DRV_NAME "ucc_hdlc"
+
+#define TDM_PPPOHT_SLIC_MAXIN
+#define BROKEN_FRAME_INFO
+
+static struct ucc_tdm_info utdm_primary_info = {
+	.uf_info = {
+		.tsa = 0,
+		.cdp = 0,
+		.cds = 1,
+		.ctsp = 1,
+		.ctss = 1,
+		.revd = 0,
+		.urfs = 256,
+		.utfs = 256,
+		.urfet = 128,
+		.urfset = 192,
+		.utfet = 128,
+		.utftt = 0x40,
+		.ufpt = 256,
+		.mode = UCC_FAST_PROTOCOL_MODE_HDLC,
+		.ttx_trx = UCC_FAST_GUMR_TRANSPARENT_TTX_TRX_NORMAL,
+		.tenc = UCC_FAST_TX_ENCODING_NRZ,
+		.renc = UCC_FAST_RX_ENCODING_NRZ,
+		.tcrc = UCC_FAST_16_BIT_CRC,
+		.synl = UCC_FAST_SYNC_LEN_NOT_USED,
+	},
+
+	.si_info = {
+#ifdef TDM_PPPOHT_SLIC_MAXIN
+		.simr_rfsd = 1,
+		.simr_tfsd = 2,
+#else
+		.simr_rfsd = 0,
+		.simr_tfsd = 0,
+#endif
+		.simr_crt = 0,
+		.simr_sl = 0,
+		.simr_ce = 1,
+		.simr_fe = 1,
+		.simr_gm = 0,
+	},
+};
+
+static struct ucc_tdm_info utdm_info[MAX_HDLC_NUM];
+
+static int uhdlc_init(struct ucc_hdlc_private *priv)
+{
+	struct ucc_tdm_info *ut_info;
+	struct ucc_fast_info *uf_info;
+	u32 cecr_subblock;
+	u16 bd_status;
+	int ret, i;
+	void *bd_buffer;
+	dma_addr_t bd_dma_addr;
+	u32 riptr;
+	u32 tiptr;
+	u32 gumr;
+
+	ut_info = priv->ut_info;
+	uf_info = &ut_info->uf_info;
+
+	if (priv->tsa) {
+		uf_info->tsa = 1;
+		uf_info->ctsp = 1;
+	}
+	uf_info->uccm_mask = ((UCC_HDLC_UCCE_RXB | UCC_HDLC_UCCE_RXF |
+				UCC_HDLC_UCCE_TXB) << 16);
+
+	ret = ucc_fast_init(uf_info, &priv->uccf);
+	if (ret) {
+		dev_err(priv->dev, "Failed to init uccf.");
+		return ret;
+	}
+
+	priv->uf_regs = priv->uccf->uf_regs;
+	ucc_fast_disable(priv->uccf, COMM_DIR_RX | COMM_DIR_TX);
+
+	/* Loopback mode */
+	if (priv->loopback) {
+		dev_info(priv->dev, "Loopback Mode\n");
+		gumr = ioread32be(&priv->uf_regs->gumr);
+		gumr |= (UCC_FAST_GUMR_LOOPBACK | UCC_FAST_GUMR_CDS |
+			 UCC_FAST_GUMR_TCI);
+		gumr &= ~(UCC_FAST_GUMR_CTSP | UCC_FAST_GUMR_RSYN);
+		iowrite32be(gumr, &priv->uf_regs->gumr);
+	}
+
+	/* Initialize SI */
+	if (priv->tsa)
+		ucc_tdm_init(priv->utdm, priv->ut_info);
+
+	/* Write to QE CECR, UCCx channel to Stop Transmission */
+	cecr_subblock = ucc_fast_get_qe_cr_subblock(uf_info->ucc_num);
+	ret = qe_issue_cmd(QE_STOP_TX, cecr_subblock,
+			   QE_CR_PROTOCOL_UNSPECIFIED, 0);
+
+	/* Set UPSMR normal mode (need fixed)*/
+	iowrite32be(0, &priv->uf_regs->upsmr);
+
+	priv->rx_ring_size = RX_BD_RING_LEN;
+	priv->tx_ring_size = TX_BD_RING_LEN;
+	/* Alloc Rx BD */
+	priv->rx_bd_base = dma_alloc_coherent(priv->dev,
+			RX_BD_RING_LEN * sizeof(struct qe_bd *),
+			&priv->dma_rx_bd, GFP_KERNEL);
+
+	if (!priv->rx_bd_base) {
+		dev_err(priv->dev, "Cannot allocate MURAM memory for RxBDs\n");
+		ret = -ENOMEM;
+		goto free_uccf;
+	}
+
+	/* Alloc Tx BD */
+	priv->tx_bd_base = dma_alloc_coherent(priv->dev,
+			TX_BD_RING_LEN * sizeof(struct qe_bd *),
+			&priv->dma_tx_bd, GFP_KERNEL);
+
+	if (!priv->tx_bd_base) {
+		dev_err(priv->dev, "Cannot allocate MURAM memory for TxBDs\n");
+		ret = -ENOMEM;
+		goto free_rx_bd;
+	}
+
+	/* Alloc parameter ram for ucc hdlc */
+	priv->ucc_pram_offset = qe_muram_alloc(sizeof(priv->ucc_pram),
+				ALIGNMENT_OF_UCC_HDLC_PRAM);
+
+	if (priv->ucc_pram_offset < 0) {
+		dev_err(priv->dev, "Can not allocate MURAM for hdlc prameter.\n");
+		ret = -ENOMEM;
+		goto free_tx_bd;
+	}
+
+	priv->rx_skbuff = kzalloc(priv->rx_ring_size * sizeof(*priv->rx_skbuff),
+				  GFP_KERNEL);
+	if (!priv->rx_skbuff)
+		goto free_ucc_pram;
+
+	priv->tx_skbuff = kzalloc(priv->tx_ring_size * sizeof(*priv->tx_skbuff),
+				  GFP_KERNEL);
+	if (!priv->tx_skbuff)
+		goto free_rx_skbuff;
+
+	priv->skb_curtx = 0;
+	priv->skb_dirtytx = 0;
+	priv->curtx_bd = priv->tx_bd_base;
+	priv->dirty_tx = priv->tx_bd_base;
+	priv->currx_bd = priv->rx_bd_base;
+	priv->currx_bdnum = 0;
+
+	/* init parameter base */
+	cecr_subblock = ucc_fast_get_qe_cr_subblock(uf_info->ucc_num);
+	ret = qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, cecr_subblock,
+			   QE_CR_PROTOCOL_UNSPECIFIED, priv->ucc_pram_offset);
+
+	priv->ucc_pram = (struct ucc_hdlc_param __iomem *)
+					qe_muram_addr(priv->ucc_pram_offset);
+
+	/* Zero out parameter ram */
+	memset_io(priv->ucc_pram, 0, sizeof(struct ucc_hdlc_param));
+
+	/* Alloc riptr, tiptr */
+	riptr = qe_muram_alloc(32, 32);
+	if (riptr < 0) {
+		dev_err(priv->dev, "Cannot allocate MURAM mem for Receive internal temp data pointer\n");
+		ret = -ENOMEM;
+		goto free_tx_skbuff;
+	}
+
+	tiptr = qe_muram_alloc(32, 32);
+	if (tiptr < 0) {
+		dev_err(priv->dev, "Cannot allocate MURAM mem for Transmit internal temp data pointer\n");
+		ret = -ENOMEM;
+		goto free_riptr;
+	}
+
+	/* Set RIPTR, TIPTR */
+	iowrite16be(riptr, &priv->ucc_pram->riptr);
+	iowrite16be(tiptr, &priv->ucc_pram->tiptr);
+
+	/* Set MRBLR */
+	iowrite16be(MAX_RX_BUF_LENGTH, &priv->ucc_pram->mrblr);
+
+	/* Set RBASE, TBASE */
+	iowrite32be(priv->dma_rx_bd, &priv->ucc_pram->rbase);
+	iowrite32be(priv->dma_tx_bd, &priv->ucc_pram->tbase);
+
+	/* Set RSTATE, TSTATE */
+	iowrite32be(BMR_GBL | BMR_BIG_ENDIAN, &priv->ucc_pram->rstate);
+	iowrite32be(BMR_GBL | BMR_BIG_ENDIAN, &priv->ucc_pram->tstate);
+
+	/* Set C_MASK, C_PRES for 16bit CRC */
+	iowrite32be(CRC_16BIT_MASK, &priv->ucc_pram->c_mask);
+	iowrite32be(CRC_16BIT_PRES, &priv->ucc_pram->c_pres);
+
+	iowrite16be(MAX_FRAME_LENGTH, &priv->ucc_pram->mflr);
+	iowrite16be(DEFAULT_RFTHR, &priv->ucc_pram->rfthr);
+	iowrite16be(DEFAULT_RFTHR, &priv->ucc_pram->rfcnt);
+	iowrite16be(DEFAULT_ADDR_MASK, &priv->ucc_pram->hmask);
+	iowrite16be(DEFAULT_HDLC_ADDR, &priv->ucc_pram->haddr1);
+	iowrite16be(DEFAULT_HDLC_ADDR, &priv->ucc_pram->haddr2);
+	iowrite16be(DEFAULT_HDLC_ADDR, &priv->ucc_pram->haddr3);
+	iowrite16be(DEFAULT_HDLC_ADDR, &priv->ucc_pram->haddr4);
+
+	/* Get BD buffer */
+	bd_buffer = dma_alloc_coherent(priv->dev,
+				       (RX_BD_RING_LEN + TX_BD_RING_LEN) *
+				       MAX_RX_BUF_LENGTH,
+				       &bd_dma_addr, GFP_KERNEL);
+
+	if (!bd_buffer) {
+		dev_err(priv->dev, "Could not allocate buffer descriptors\n");
+		ret = -ENOMEM;
+		goto free_tiptr;
+	}
+
+	memset(bd_buffer, 0, (RX_BD_RING_LEN + TX_BD_RING_LEN)
+			* MAX_RX_BUF_LENGTH);
+
+	priv->rx_buffer = bd_buffer;
+	priv->tx_buffer = bd_buffer + RX_BD_RING_LEN * MAX_RX_BUF_LENGTH;
+
+	priv->dma_rx_addr = bd_dma_addr;
+	priv->dma_tx_addr = bd_dma_addr + RX_BD_RING_LEN * MAX_RX_BUF_LENGTH;
+
+	for (i = 0; i < RX_BD_RING_LEN; i++) {
+		if (i < (RX_BD_RING_LEN - 1))
+			bd_status = R_E_S | R_I_S;
+		else
+			bd_status = R_E_S | R_I_S | R_W_S;
+
+		iowrite16be(bd_status, &priv->rx_bd_base[i].status);
+		iowrite32be(priv->dma_rx_addr + i * MAX_RX_BUF_LENGTH,
+			    &priv->rx_bd_base[i].buf);
+	}
+
+	for (i = 0; i < TX_BD_RING_LEN; i++) {
+		if (i < (TX_BD_RING_LEN - 1))
+			bd_status =  T_I_S | T_TC_S;
+		else
+			bd_status =  T_I_S | T_TC_S | T_W_S;
+
+		iowrite16be(bd_status, &priv->tx_bd_base[i].status);
+		iowrite32be(priv->dma_tx_addr + i * MAX_RX_BUF_LENGTH,
+			    &priv->tx_bd_base[i].buf);
+	}
+
+	return 0;
+
+free_tiptr:
+	qe_muram_free(tiptr);
+free_riptr:
+	qe_muram_free(riptr);
+free_tx_skbuff:
+	kfree(priv->tx_skbuff);
+free_rx_skbuff:
+	kfree(priv->rx_skbuff);
+free_ucc_pram:
+	qe_muram_free(priv->ucc_pram_offset);
+free_tx_bd:
+	dma_free_coherent(priv->dev,
+			  TX_BD_RING_LEN * sizeof(struct qe_bd),
+			  priv->tx_bd_base, priv->dma_tx_bd);
+free_rx_bd:
+	dma_free_coherent(priv->dev,
+			  RX_BD_RING_LEN * sizeof(struct qe_bd),
+			  priv->rx_bd_base, priv->dma_rx_bd);
+free_uccf:
+	ucc_fast_free(priv->uccf);
+
+	return ret;
+}
+
+static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+	struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)hdlc->priv;
+	struct qe_bd __iomem *bd;
+	u16 bd_status;
+	unsigned long flags;
+	u8 *send_buf;
+	int i;
+	u16 *proto_head;
+
+	switch (dev->type) {
+	case ARPHRD_RAWHDLC:
+		if (skb_headroom(skb) < HDLC_HEAD_LEN) {
+			dev->stats.tx_dropped++;
+			dev_kfree_skb(skb);
+			netdev_err(dev, "No enough space for hdlc head\n");
+			return -ENOMEM;
+		}
+
+		skb_push(skb, HDLC_HEAD_LEN);
+
+		proto_head = (u16 *)skb->data;
+		*proto_head = htons(DEFAULT_HDLC_HEAD);
+
+		dev->stats.tx_bytes += skb->len;
+		break;
+
+	case ARPHRD_PPP:
+		proto_head = (u16 *)skb->data;
+		if (*proto_head != htons(DEFAULT_PPP_HEAD)) {
+			dev->stats.tx_dropped++;
+			dev_kfree_skb(skb);
+			netdev_err(dev, "Wrong ppp header\n");
+			return -ENOMEM;
+		}
+
+		dev->stats.tx_bytes += skb->len;
+		break;
+
+	default:
+		dev->stats.tx_dropped++;
+		dev_kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	pr_info("Tx data skb->len:%d ", skb->len);
+	send_buf = (u8 *)skb->data;
+	pr_info("\nTransmitted data:\n");
+	for (i = 0; i < 16; i++) {
+		if (i == skb->len)
+			pr_info("++++");
+		else
+		pr_info("%02x\n", send_buf[i]);
+	}
+	spin_lock_irqsave(&priv->lock, flags);
+
+	/* Start from the next BD that should be filled */
+	bd = priv->curtx_bd;
+	bd_status = ioread16be(&bd->status);
+	/* Save the skb pointer so we can free it later */
+	priv->tx_skbuff[priv->skb_curtx] = skb;
+
+	/* Update the current skb pointer (wrapping if this was the last) */
+	priv->skb_curtx =
+	    (priv->skb_curtx + 1) & TX_RING_MOD_MASK(TX_BD_RING_LEN);
+
+	/* copy skb data to tx buffer for sdma processing */
+	memcpy(priv->tx_buffer + (be32_to_cpu(bd->buf) - priv->dma_tx_addr),
+	       skb->data, skb->len);
+
+	/* set bd status and length */
+	bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S;
+
+	iowrite16be(bd_status, &bd->status);
+	iowrite16be(skb->len, &bd->length);
+
+	/* Move to next BD in the ring */
+	if (!(bd_status & T_W_S))
+		bd += 1;
+	else
+		bd = priv->tx_bd_base;
+
+	if (bd == priv->dirty_tx) {
+		if (!netif_queue_stopped(dev))
+			netif_stop_queue(dev);
+	}
+
+	priv->curtx_bd = bd;
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return NETDEV_TX_OK;
+}
+
+static int hdlc_tx_done(struct ucc_hdlc_private *priv)
+{
+	/* Start from the next BD that should be filled */
+	struct net_device *dev = priv->ndev;
+	struct qe_bd *bd;		/* BD pointer */
+	u16 bd_status;
+
+	bd = priv->dirty_tx;
+	bd_status = ioread16be(&bd->status);
+
+	/* Normal processing. */
+	while ((bd_status & T_R_S) == 0) {
+		struct sk_buff *skb;
+
+		/* BD contains already transmitted buffer.   */
+		/* Handle the transmitted buffer and release */
+		/* the BD to be used with the current frame  */
+
+		skb = priv->tx_skbuff[priv->skb_dirtytx];
+		if (!skb)
+			break;
+		pr_info("TxBD: %x\n", bd_status);
+		dev->stats.tx_packets++;
+		memset(priv->tx_buffer +
+		       (be32_to_cpu(bd->buf) - priv->dma_tx_addr),
+		       0, skb->len);
+		dev_kfree_skb_irq(skb);
+
+		priv->tx_skbuff[priv->skb_dirtytx] = NULL;
+		priv->skb_dirtytx =
+		    (priv->skb_dirtytx +
+		     1) & TX_RING_MOD_MASK(TX_BD_RING_LEN);
+
+		/* We freed a buffer, so now we can restart transmission */
+		if (netif_queue_stopped(dev))
+			netif_wake_queue(dev);
+
+		/* Advance the confirmation BD pointer */
+		if (!(bd_status & T_W_S))
+			bd += 1;
+		else
+			bd = priv->tx_bd_base;
+		bd_status = ioread16be(&bd->status);
+	}
+	priv->dirty_tx = bd;
+
+	return 0;
+}
+
+static int hdlc_rx_done(struct ucc_hdlc_private *priv, int rx_work_limit)
+{
+	struct net_device *dev = priv->ndev;
+	struct sk_buff *skb;
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+	struct qe_bd *bd;
+	u32 bd_status;
+	u16 length, howmany = 0;
+	u8 *bdbuffer;
+	int i;
+	static int entry;
+
+	bd = priv->currx_bd;
+	bd_status = ioread16be(&bd->status);
+
+	/* while there are received buffers and BD is full (~R_E) */
+	while (!((bd_status & (R_E_S)) || (--rx_work_limit < 0))) {
+		if (bd_status & R_OV_S)
+			dev->stats.rx_over_errors++;
+		if (bd_status & R_CR_S) {
+#ifdef BROKEN_FRAME_INFO
+			pr_info("Broken Frame with RxBD: %x\n", bd_status);
+#endif
+			dev->stats.rx_crc_errors++;
+			dev->stats.rx_dropped++;
+			goto recycle;
+		}
+		bdbuffer = priv->rx_buffer +
+			(priv->currx_bdnum * MAX_RX_BUF_LENGTH);
+		length = ioread16be(&bd->length);
+
+		pr_info("Received data length:%d", length);
+		pr_info("while entry times:%d", entry++);
+
+		pr_info("\nReceived data:\n");
+		for (i = 0; (i < 16); i++) {
+			if (i == length)
+				pr_info("++++");
+			else
+			pr_info("%02x\n", bdbuffer[i]);
+		}
+
+		switch (dev->type) {
+		case ARPHRD_RAWHDLC:
+			bdbuffer += HDLC_HEAD_LEN;
+			length -= (HDLC_HEAD_LEN + HDLC_CRC_SIZE);
+
+			skb = dev_alloc_skb(length);
+			if (!skb) {
+				dev->stats.rx_dropped++;
+				return -ENOMEM;
+			}
+
+			skb_put(skb, length);
+			skb->len = length;
+			skb->dev = dev;
+			memcpy(skb->data, bdbuffer, length);
+			break;
+
+		case ARPHRD_PPP:
+			length -= HDLC_CRC_SIZE;
+
+			skb = dev_alloc_skb(length);
+			if (!skb) {
+				dev->stats.rx_dropped++;
+				return -ENOMEM;
+			}
+
+			skb_put(skb, length);
+			skb->len = length;
+			skb->dev = dev;
+			memcpy(skb->data, bdbuffer, length);
+			break;
+		}
+
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += skb->len;
+		howmany++;
+		if (hdlc->proto)
+			skb->protocol = hdlc_type_trans(skb, dev);
+		pr_info("skb->protocol:%x\n", skb->protocol);
+		netif_receive_skb(skb);
+
+recycle:
+		iowrite16be(bd_status | R_E_S | R_I_S, &bd->status);
+
+		/* update to point at the next bd */
+		if (bd_status & R_W_S) {
+			priv->currx_bdnum = 0;
+			bd = priv->rx_bd_base;
+		} else {
+			if (priv->currx_bdnum < (RX_BD_RING_LEN - 1))
+				priv->currx_bdnum += 1;
+			else
+				priv->currx_bdnum = RX_BD_RING_LEN - 1;
+
+			bd += 1;
+		}
+
+		bd_status = ioread16be(&bd->status);
+	}
+
+	priv->currx_bd = bd;
+	return howmany;
+}
+
+static int ucc_hdlc_poll(struct napi_struct *napi, int budget)
+{
+	struct ucc_hdlc_private *priv = container_of(napi,
+						     struct ucc_hdlc_private,
+						     napi);
+	int howmany;
+
+	/* Tx event processing */
+	spin_lock(&priv->lock);
+		hdlc_tx_done(priv);
+	spin_unlock(&priv->lock);
+
+	howmany = 0;
+	howmany += hdlc_rx_done(priv, budget - howmany);
+
+	if (howmany < budget) {
+		napi_complete(napi);
+		qe_setbits32(priv->uccf->p_uccm,
+			     (UCCE_HDLC_RX_EVENTS | UCCE_HDLC_TX_EVENTS) << 16);
+	}
+
+	return howmany;
+}
+
+static irqreturn_t ucc_hdlc_irq_handler(int irq, void *dev_id)
+{
+	struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)dev_id;
+	struct net_device *dev = priv->ndev;
+	struct ucc_fast_private *uccf;
+	struct ucc_tdm_info *ut_info;
+	u32 ucce;
+	u32 uccm;
+
+	ut_info = priv->ut_info;
+	uccf = priv->uccf;
+
+	ucce = ioread32be(uccf->p_ucce);
+	uccm = ioread32be(uccf->p_uccm);
+	ucce &= uccm;
+	iowrite32be(ucce, uccf->p_ucce);
+	pr_info("irq ucce:%x\n", ucce);
+	if (!ucce)
+		return IRQ_NONE;
+
+	if ((ucce >> 16) & (UCCE_HDLC_RX_EVENTS | UCCE_HDLC_TX_EVENTS)) {
+		if (napi_schedule_prep(&priv->napi)) {
+			uccm &= ~((UCCE_HDLC_RX_EVENTS | UCCE_HDLC_TX_EVENTS)
+				  << 16);
+			iowrite32be(uccm, uccf->p_uccm);
+			__napi_schedule(&priv->napi);
+		}
+	}
+
+	/* Errors and other events */
+	if (ucce >> 16 & UCC_HDLC_UCCE_BSY)
+		dev->stats.rx_errors++;
+	if (ucce >> 16 & UCC_HDLC_UCCE_TXE)
+		dev->stats.tx_errors++;
+
+	return IRQ_HANDLED;
+}
+
+static int uhdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	const size_t size = sizeof(te1_settings);
+	te1_settings line;
+	struct ucc_hdlc_private *priv = netdev_priv(dev);
+
+	if (cmd != SIOCWANDEV)
+		return hdlc_ioctl(dev, ifr, cmd);
+
+	switch (ifr->ifr_settings.type) {
+	case IF_GET_IFACE:
+		ifr->ifr_settings.type = IF_IFACE_E1;
+		if (ifr->ifr_settings.size < size) {
+			ifr->ifr_settings.size = size; /* data size wanted */
+			return -ENOBUFS;
+		}
+		memset(&line, 0, sizeof(line));
+		line.clock_type = priv->clocking;
+
+		if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &line, size))
+			return -EFAULT;
+		return 0;
+
+	default:
+		return hdlc_ioctl(dev, ifr, cmd);
+	}
+}
+
+static int uhdlc_open(struct net_device *dev)
+{
+	u32 cecr_subblock;
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+	struct ucc_hdlc_private *priv = hdlc->priv;
+	struct ucc_tdm *utdm = priv->utdm;
+
+	if (priv->hdlc_busy != 1) {
+		if (request_irq(priv->ut_info->uf_info.irq,
+				ucc_hdlc_irq_handler, 0, "hdlc", priv))
+			return -ENODEV;
+
+		cecr_subblock = ucc_fast_get_qe_cr_subblock(
+					priv->ut_info->uf_info.ucc_num);
+
+		qe_issue_cmd(QE_INIT_TX_RX, cecr_subblock,
+			     QE_CR_PROTOCOL_UNSPECIFIED, 0);
+
+		ucc_fast_enable(priv->uccf, COMM_DIR_RX | COMM_DIR_TX);
+
+		/* Enable the TDM port */
+		if (priv->tsa)
+			utdm->si_regs->siglmr1_h |= (0x1 << utdm->tdm_port);
+
+		priv->hdlc_busy = 1;
+		netif_device_attach(priv->ndev);
+		napi_enable(&priv->napi);
+		netif_start_queue(dev);
+		hdlc_open(dev);
+	}
+
+	return 0;
+}
+
+static void uhdlc_memclean(struct ucc_hdlc_private *priv)
+{
+	qe_muram_free(priv->ucc_pram->riptr);
+	qe_muram_free(priv->ucc_pram->tiptr);
+
+	if (priv->rx_bd_base) {
+		dma_free_coherent(priv->dev,
+				  RX_BD_RING_LEN * sizeof(struct qe_bd),
+				  priv->rx_bd_base, priv->dma_rx_bd);
+
+		priv->rx_bd_base = NULL;
+		priv->dma_rx_bd = 0;
+	}
+
+	if (priv->tx_bd_base) {
+		dma_free_coherent(priv->dev,
+				  TX_BD_RING_LEN * sizeof(struct qe_bd),
+				  priv->tx_bd_base, priv->dma_tx_bd);
+
+		priv->tx_bd_base = NULL;
+		priv->dma_tx_bd = 0;
+	}
+
+	if (priv->ucc_pram) {
+		qe_muram_free(priv->ucc_pram_offset);
+		priv->ucc_pram = NULL;
+		priv->ucc_pram_offset = 0;
+	 }
+
+	kfree(priv->rx_skbuff);
+	priv->rx_skbuff = NULL;
+
+	kfree(priv->tx_skbuff);
+	priv->tx_skbuff = NULL;
+
+	if (priv->uf_regs) {
+		iounmap(priv->uf_regs);
+		priv->uf_regs = NULL;
+	}
+
+	if (priv->uccf) {
+		ucc_fast_free(priv->uccf);
+		priv->uccf = NULL;
+	}
+
+	if (priv->rx_buffer) {
+		dma_free_coherent(priv->dev,
+				  RX_BD_RING_LEN * MAX_RX_BUF_LENGTH,
+				  priv->rx_buffer, priv->dma_rx_addr);
+		priv->rx_buffer = NULL;
+		priv->dma_rx_addr = 0;
+	}
+
+	if (priv->tx_buffer) {
+		dma_free_coherent(priv->dev,
+				  TX_BD_RING_LEN * MAX_RX_BUF_LENGTH,
+				  priv->tx_buffer, priv->dma_tx_addr);
+		priv->tx_buffer = NULL;
+		priv->dma_tx_addr = 0;
+	}
+}
+
+static int uhdlc_close(struct net_device *dev)
+{
+	struct ucc_hdlc_private *priv = dev_to_hdlc(dev)->priv;
+	struct ucc_tdm *utdm = priv->utdm;
+	u32 cecr_subblock;
+
+	napi_disable(&priv->napi);
+	cecr_subblock = ucc_fast_get_qe_cr_subblock(
+				priv->ut_info->uf_info.ucc_num);
+
+	qe_issue_cmd(QE_GRACEFUL_STOP_TX, cecr_subblock,
+		     (u8)QE_CR_PROTOCOL_UNSPECIFIED, 0);
+	qe_issue_cmd(QE_CLOSE_RX_BD, cecr_subblock,
+		     (u8)QE_CR_PROTOCOL_UNSPECIFIED, 0);
+
+	if (priv->tsa)
+		utdm->si_regs->siglmr1_h &= ~(0x1 << utdm->tdm_port);
+
+	ucc_fast_disable(priv->uccf, COMM_DIR_RX | COMM_DIR_TX);
+
+	free_irq(priv->ut_info->uf_info.irq, priv);
+	netif_stop_queue(dev);
+	priv->hdlc_busy = 0;
+
+	return 0;
+}
+
+static int ucc_hdlc_attach(struct net_device *dev, unsigned short encoding,
+			   unsigned short parity)
+{
+	struct ucc_hdlc_private *priv = dev_to_hdlc(dev)->priv;
+
+	if (encoding != ENCODING_NRZ &&
+	    encoding != ENCODING_NRZI)
+		return -EINVAL;
+
+	if (parity != PARITY_NONE &&
+	    parity != PARITY_CRC32_PR1_CCITT &&
+	    parity != PARITY_CRC16_PR1_CCITT)
+		return -EINVAL;
+
+	priv->encoding = encoding;
+	priv->parity = parity;
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static void store_clk_config(struct ucc_hdlc_private *priv)
+{
+	struct qe_mux *qe_mux_reg = &qe_immr->qmx;
+
+	/* store si clk */
+	priv->cmxsi1cr_h = ioread32be(&qe_mux_reg->cmxsi1cr_h);
+	priv->cmxsi1cr_l = ioread32be(&qe_mux_reg->cmxsi1cr_l);
+
+	/* store si sync */
+	priv->cmxsi1syr = ioread32be(&qe_mux_reg->cmxsi1syr);
+
+	/* store ucc clk */
+	memcpy_fromio(priv->cmxucr, qe_mux_reg->cmxucr, 4 * sizeof(u32));
+}
+
+static void resume_clk_config(struct ucc_hdlc_private *priv)
+{
+	struct qe_mux *qe_mux_reg = &qe_immr->qmx;
+
+	memcpy_toio(qe_mux_reg->cmxucr, priv->cmxucr, 4 * sizeof(u32));
+
+	iowrite32be(priv->cmxsi1cr_h, &qe_mux_reg->cmxsi1cr_h);
+	iowrite32be(priv->cmxsi1cr_l, &qe_mux_reg->cmxsi1cr_l);
+
+	iowrite32be(priv->cmxsi1syr, &qe_mux_reg->cmxsi1syr);
+}
+
+static int uhdlc_suspend(struct device *dev)
+{
+	struct ucc_hdlc_private *priv = dev_get_drvdata(dev);
+	struct ucc_tdm_info *ut_info;
+	struct ucc_fast __iomem *uf_regs;
+
+	if (!priv)
+		return -EINVAL;
+
+	if (!netif_running(priv->ndev))
+		return 0;
+
+	netif_device_detach(priv->ndev);
+	napi_disable(&priv->napi);
+
+	ut_info = priv->ut_info;
+	uf_regs = priv->uf_regs;
+
+	/* backup gumr guemr*/
+	priv->gumr = ioread32be(&uf_regs->gumr);
+	priv->guemr = ioread8(&uf_regs->guemr);
+
+	priv->ucc_pram_bak = kmalloc(sizeof(*priv->ucc_pram_bak),
+					GFP_KERNEL);
+	if (!priv->ucc_pram_bak)
+		return -ENOMEM;
+
+	/* backup HDLC parameter */
+	memcpy_fromio(priv->ucc_pram_bak, priv->ucc_pram,
+		      sizeof(struct ucc_hdlc_param));
+
+	/* store the clk configuration */
+	store_clk_config(priv);
+
+	/* save power */
+	ucc_fast_disable(priv->uccf, COMM_DIR_RX | COMM_DIR_TX);
+
+	dev_dbg(dev, "ucc hdlc suspend\n");
+	return 0;
+}
+
+static int uhdlc_resume(struct device *dev)
+{
+	struct ucc_hdlc_private *priv = dev_get_drvdata(dev);
+	struct ucc_tdm *utdm = priv->utdm;
+	struct ucc_tdm_info *ut_info;
+	struct ucc_fast __iomem *uf_regs;
+	struct ucc_fast_private *uccf;
+	struct ucc_fast_info *uf_info;
+	int ret, i;
+	u32 cecr_subblock;
+	u16 bd_status;
+
+	if (!priv)
+		return -EINVAL;
+
+	if (!netif_running(priv->ndev))
+		return 0;
+
+	ut_info = priv->ut_info;
+	uf_info = &ut_info->uf_info;
+	uf_regs = priv->uf_regs;
+	uccf = priv->uccf;
+
+	/* restore gumr guemr */
+	iowrite8(priv->guemr, &uf_regs->guemr);
+	iowrite32be(priv->gumr, &uf_regs->gumr);
+
+	/* Set Virtual Fifo registers */
+	iowrite16be(uf_info->urfs, &uf_regs->urfs);
+	iowrite16be(uf_info->urfet, &uf_regs->urfet);
+	iowrite16be(uf_info->urfset, &uf_regs->urfset);
+	iowrite16be(uf_info->utfs, &uf_regs->utfs);
+	iowrite16be(uf_info->utfet, &uf_regs->utfet);
+	iowrite16be(uf_info->utftt, &uf_regs->utftt);
+	/* utfb, urfb are offsets from MURAM base */
+	iowrite32be(uccf->ucc_fast_tx_virtual_fifo_base_offset, &uf_regs->utfb);
+	iowrite32be(uccf->ucc_fast_rx_virtual_fifo_base_offset, &uf_regs->urfb);
+
+	/* Rx Tx and sync clock routing */
+	resume_clk_config(priv);
+
+	iowrite32be(uf_info->uccm_mask, &uf_regs->uccm);
+	iowrite32be(0xffffffff, &uf_regs->ucce);
+
+	ucc_fast_disable(priv->uccf, COMM_DIR_RX | COMM_DIR_TX);
+
+	/* rebuild SIRAM */
+	if (priv->tsa)
+		ucc_tdm_init(priv->utdm, priv->ut_info);
+
+	/* Write to QE CECR, UCCx channel to Stop Transmission */
+	cecr_subblock = ucc_fast_get_qe_cr_subblock(uf_info->ucc_num);
+	ret = qe_issue_cmd(QE_STOP_TX, cecr_subblock,
+			   (u8)QE_CR_PROTOCOL_UNSPECIFIED, 0);
+
+	/* Set UPSMR normal mode */
+	iowrite32be(0, &uf_regs->upsmr);
+
+	/* init parameter base */
+	cecr_subblock = ucc_fast_get_qe_cr_subblock(uf_info->ucc_num);
+	ret = qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, cecr_subblock,
+			   QE_CR_PROTOCOL_UNSPECIFIED, priv->ucc_pram_offset);
+
+	priv->ucc_pram = (struct ucc_hdlc_param __iomem *)
+				qe_muram_addr(priv->ucc_pram_offset);
+
+	/* restore ucc parameter */
+	memcpy_toio(priv->ucc_pram, priv->ucc_pram_bak,
+		    sizeof(struct ucc_hdlc_param));
+	kfree(priv->ucc_pram_bak);
+
+	/* rebuild BD entry */
+	for (i = 0; i < RX_BD_RING_LEN; i++) {
+		if (i < (RX_BD_RING_LEN - 1))
+			bd_status = R_E_S | R_I_S;
+		else
+			bd_status = R_E_S | R_I_S | R_W_S;
+
+		iowrite16be(bd_status, &priv->rx_bd_base[i].status);
+		iowrite32be(priv->dma_rx_addr + i * MAX_RX_BUF_LENGTH,
+			    &priv->rx_bd_base[i].buf);
+	}
+
+	for (i = 0; i < TX_BD_RING_LEN; i++) {
+		if (i < (TX_BD_RING_LEN - 1))
+			bd_status =  T_I_S | T_TC_S;
+		else
+			bd_status =  T_I_S | T_TC_S | T_W_S;
+
+		iowrite16be(bd_status, &priv->tx_bd_base[i].status);
+		iowrite32be(priv->dma_tx_addr + i * MAX_RX_BUF_LENGTH,
+			    &priv->tx_bd_base[i].buf);
+	}
+
+	/* if hdlc is busy enable TX and RX */
+	if (priv->hdlc_busy == 1) {
+		cecr_subblock = ucc_fast_get_qe_cr_subblock(
+					priv->ut_info->uf_info.ucc_num);
+
+		qe_issue_cmd(QE_INIT_TX_RX, cecr_subblock,
+			     (u8)QE_CR_PROTOCOL_UNSPECIFIED, 0);
+
+		ucc_fast_enable(priv->uccf, COMM_DIR_RX | COMM_DIR_TX);
+
+		/* Enable the TDM port */
+		if (priv->tsa)
+			utdm->si_regs->siglmr1_h |= (0x1 << utdm->tdm_port);
+	}
+
+	napi_enable(&priv->napi);
+	netif_device_attach(priv->ndev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops uhdlc_pm_ops = {
+	.suspend = uhdlc_suspend,
+	.resume = uhdlc_resume,
+	.freeze = uhdlc_suspend,
+	.thaw = uhdlc_resume,
+};
+
+#define HDLC_PM_OPS (&uhdlc_pm_ops)
+
+#else
+
+#define HDLC_PM_OPS NULL
+
+#endif
+static const struct net_device_ops uhdlc_ops = {
+	.ndo_open       = uhdlc_open,
+	.ndo_stop       = uhdlc_close,
+	.ndo_change_mtu = hdlc_change_mtu,
+	.ndo_start_xmit = hdlc_start_xmit,
+	.ndo_do_ioctl   = uhdlc_ioctl,
+};
+
+static int ucc_hdlc_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct ucc_hdlc_private *uhdlc_priv = NULL;
+	struct ucc_tdm_info *ut_info;
+	struct ucc_tdm *utdm;
+	struct resource res;
+	struct net_device *dev;
+	hdlc_device *hdlc;
+	int ucc_num;
+	const char *sprop;
+	int ret;
+	u32 val;
+
+	ret = of_property_read_u32_index(np, "cell-index", 0, &val);
+	if (ret) {
+		dev_err(&pdev->dev, "Invalid ucc property\n");
+		return -ENODEV;
+	}
+
+	ucc_num = val - 1;
+	if ((ucc_num > 3) || (ucc_num < 0)) {
+		dev_err(&pdev->dev, ": Invalid UCC num\n");
+		return -EINVAL;
+	}
+
+	memcpy(&utdm_info[ucc_num], &utdm_primary_info,
+	       sizeof(utdm_primary_info));
+
+	ut_info = &utdm_info[ucc_num];
+	ut_info->uf_info.ucc_num = ucc_num;
+
+	sprop = of_get_property(np, "rx-clock-name", NULL);
+	if (sprop) {
+		ut_info->uf_info.rx_clock = qe_clock_source(sprop);
+		if ((ut_info->uf_info.rx_clock < QE_CLK_NONE) ||
+		    (ut_info->uf_info.rx_clock > QE_CLK24)) {
+			dev_err(&pdev->dev, "Invalid rx-clock-name property\n");
+			return -EINVAL;
+		}
+	} else {
+		dev_err(&pdev->dev, "Invalid rx-clock-name property\n");
+		return -EINVAL;
+	}
+
+	sprop = of_get_property(np, "tx-clock-name", NULL);
+	if (sprop) {
+		ut_info->uf_info.tx_clock = qe_clock_source(sprop);
+		if ((ut_info->uf_info.tx_clock < QE_CLK_NONE) ||
+		    (ut_info->uf_info.tx_clock > QE_CLK24)) {
+			dev_err(&pdev->dev, "Invalid tx-clock-name property\n");
+			return -EINVAL;
+		}
+	} else {
+		dev_err(&pdev->dev, "Invalid tx-clock-name property\n");
+		return -EINVAL;
+	}
+
+	/* use the same clock when work in loopback */
+	if (ut_info->uf_info.rx_clock == ut_info->uf_info.tx_clock)
+		qe_setbrg(ut_info->uf_info.rx_clock, 20000000, 1);
+
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret)
+		return -EINVAL;
+
+	ut_info->uf_info.regs = res.start;
+	ut_info->uf_info.irq = irq_of_parse_and_map(np, 0);
+
+	uhdlc_priv = kzalloc(sizeof(*uhdlc_priv), GFP_KERNEL);
+	if (!uhdlc_priv) {
+		return -ENOMEM;
+	}
+
+	dev_set_drvdata(&pdev->dev, uhdlc_priv);
+	uhdlc_priv->dev = &pdev->dev;
+	uhdlc_priv->ut_info = ut_info;
+
+	if (of_get_property(np, "fsl,tdm-interface", NULL))
+		uhdlc_priv->tsa = 1;
+
+	if (of_get_property(np, "fsl,ucc-internal-loopback", NULL))
+		uhdlc_priv->loopback = 1;
+
+	if (uhdlc_priv->tsa == 1) {
+		utdm = kzalloc(sizeof(*utdm), GFP_KERNEL);
+		if (!utdm) {
+			ret = -ENOMEM;
+			dev_err(&pdev->dev, "No mem to alloc ucc tdm data\n");
+			goto free_uhdlc_priv;
+		}
+		uhdlc_priv->utdm = utdm;
+		ret = ucc_of_parse_tdm(np, utdm, ut_info);
+		if (ret)
+			goto free_utdm;
+	}
+
+	ret = uhdlc_init(uhdlc_priv);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to init uhdlc\n");
+		goto free_utdm;
+	}
+
+	dev = alloc_hdlcdev(uhdlc_priv);
+	if (!dev) {
+		ret = -ENOMEM;
+		pr_err("ucc_hdlc: unable to allocate memory\n");
+		goto undo_uhdlc_init;
+	}
+
+	uhdlc_priv->ndev = dev;
+	hdlc = dev_to_hdlc(dev);
+	dev->tx_queue_len = 16;
+	dev->netdev_ops = &uhdlc_ops;
+	hdlc->attach = ucc_hdlc_attach;
+	hdlc->xmit = ucc_hdlc_tx;
+	netif_napi_add(dev, &uhdlc_priv->napi, ucc_hdlc_poll, 32);
+	if (register_hdlc_device(dev)) {
+		ret = -ENOBUFS;
+		pr_err("ucc_hdlc: unable to register hdlc device\n");
+		free_netdev(dev);
+		goto free_dev;
+	}
+
+	return 0;
+
+free_dev:
+	free_netdev(dev);
+undo_uhdlc_init:
+free_utdm:
+	if (uhdlc_priv->tsa)
+		kfree(utdm);
+free_uhdlc_priv:
+	kfree(uhdlc_priv);
+	return ret;
+}
+
+static int ucc_hdlc_remove(struct platform_device *pdev)
+{
+	struct ucc_hdlc_private *priv = dev_get_drvdata(&pdev->dev);
+
+	uhdlc_memclean(priv);
+
+	if (priv->utdm->si_regs) {
+		iounmap(priv->utdm->si_regs);
+		priv->utdm->si_regs = NULL;
+	}
+
+	if (priv->utdm->siram) {
+		iounmap(priv->utdm->siram);
+		priv->utdm->siram = NULL;
+	}
+	kfree(priv);
+
+	dev_info(&pdev->dev, "UCC based hdlc module removed\n");
+
+	return 0;
+}
+
+static const struct of_device_id fsl_ucc_hdlc_of_match[] = {
+	{
+	.compatible = "fsl,ucc-hdlc",
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, fsl_ucc_hdlc_of_match);
+
+static struct platform_driver ucc_hdlc_driver = {
+	.probe	= ucc_hdlc_probe,
+	.remove	= ucc_hdlc_remove,
+	.driver	= {
+		.name		= DRV_NAME,
+		.pm		= HDLC_PM_OPS,
+		.of_match_table	= fsl_ucc_hdlc_of_match,
+	},
+};
+
+module_platform_driver(ucc_hdlc_driver);
diff --git a/drivers/net/wan/fsl_ucc_hdlc.h b/drivers/net/wan/fsl_ucc_hdlc.h
new file mode 100644
index 000000000000..881ecdeef076
--- /dev/null
+++ b/drivers/net/wan/fsl_ucc_hdlc.h
@@ -0,0 +1,147 @@
+/* Freescale QUICC Engine HDLC Device Driver
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef _UCC_HDLC_H_
+#define _UCC_HDLC_H_
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+
+#include <soc/fsl/qe/immap_qe.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <soc/fsl/qe/ucc.h>
+#include <soc/fsl/qe/ucc_fast.h>
+
+/* UCC HDLC event register */
+#define UCCE_HDLC_RX_EVENTS	\
+(UCC_HDLC_UCCE_RXF | UCC_HDLC_UCCE_RXB | UCC_HDLC_UCCE_BSY)
+#define UCCE_HDLC_TX_EVENTS	(UCC_HDLC_UCCE_TXB | UCC_HDLC_UCCE_TXE)
+
+struct ucc_hdlc_param {
+	__be16 riptr;
+	__be16 tiptr;
+	__be16 res0;
+	__be16 mrblr;
+	__be32 rstate;
+	__be32 rbase;
+	__be16 rbdstat;
+	__be16 rbdlen;
+	__be32 rdptr;
+	__be32 tstate;
+	__be32 tbase;
+	__be16 tbdstat;
+	__be16 tbdlen;
+	__be32 tdptr;
+	__be32 rbptr;
+	__be32 tbptr;
+	__be32 rcrc;
+	__be32 res1;
+	__be32 tcrc;
+	__be32 res2;
+	__be32 res3;
+	__be32 c_mask;
+	__be32 c_pres;
+	__be16 disfc;
+	__be16 crcec;
+	__be16 abtsc;
+	__be16 nmarc;
+	__be32 max_cnt;
+	__be16 mflr;
+	__be16 rfthr;
+	__be16 rfcnt;
+	__be16 hmask;
+	__be16 haddr1;
+	__be16 haddr2;
+	__be16 haddr3;
+	__be16 haddr4;
+	__be16 ts_tmp;
+	__be16 tmp_mb;
+};
+
+struct ucc_hdlc_private {
+	struct ucc_tdm	*utdm;
+	struct ucc_tdm_info *ut_info;
+	struct ucc_fast_private *uccf;
+	struct device *dev;
+	struct net_device *ndev;
+	struct napi_struct napi;
+	struct ucc_fast __iomem *uf_regs;	/* UCC Fast registers */
+	struct ucc_hdlc_param __iomem *ucc_pram;
+	u16 tsa;
+	bool hdlc_busy;
+	bool loopback;
+
+	u8 *tx_buffer;
+	u8 *rx_buffer;
+	dma_addr_t dma_tx_addr;
+	dma_addr_t dma_rx_addr;
+
+	struct qe_bd *tx_bd_base;
+	struct qe_bd *rx_bd_base;
+	dma_addr_t dma_tx_bd;
+	dma_addr_t dma_rx_bd;
+	struct qe_bd *curtx_bd;
+	struct qe_bd *currx_bd;
+	struct qe_bd *dirty_tx;
+	u16 currx_bdnum;
+
+	struct sk_buff **tx_skbuff;
+	struct sk_buff **rx_skbuff;
+	u16 skb_curtx;
+	u16 skb_currx;
+	unsigned short skb_dirtytx;
+
+	unsigned short tx_ring_size;
+	unsigned short rx_ring_size;
+	u32 ucc_pram_offset;
+
+	unsigned short encoding;
+	unsigned short parity;
+	u32 clocking;
+	spinlock_t lock;	/* lock for Tx BD and Tx buffer */
+#ifdef CONFIG_PM
+	struct ucc_hdlc_param *ucc_pram_bak;
+	u32 gumr;
+	u8 guemr;
+	u32 cmxsi1cr_l, cmxsi1cr_h;
+	u32 cmxsi1syr;
+	u32 cmxucr[4];
+#endif
+};
+
+#define TX_BD_RING_LEN	0x10
+#define RX_BD_RING_LEN	0x20
+#define RX_CLEAN_MAX	0x10
+#define NUM_OF_BUF	4
+#define MAX_RX_BUF_LENGTH	(48 * 0x20)
+#define MAX_FRAME_LENGTH	(MAX_RX_BUF_LENGTH + 8)
+#define ALIGNMENT_OF_UCC_HDLC_PRAM	64
+#define SI_BANK_SIZE	128
+#define MAX_HDLC_NUM	4
+#define HDLC_HEAD_LEN	2
+#define HDLC_CRC_SIZE	2
+#define TX_RING_MOD_MASK(size) (size - 1)
+#define RX_RING_MOD_MASK(size) (size - 1)
+
+#define HDLC_HEAD_MASK		0x0000
+#define DEFAULT_HDLC_HEAD	0xff44
+#define DEFAULT_ADDR_MASK	0x00ff
+#define DEFAULT_HDLC_ADDR	0x00ff
+
+#define BMR_GBL			0x20000000
+#define BMR_BIG_ENDIAN		0x10000000
+#define CRC_16BIT_MASK		0x0000F0B8
+#define CRC_16BIT_PRES		0x0000FFFF
+#define DEFAULT_RFTHR		1
+
+#define DEFAULT_PPP_HEAD    0xff03
+
+#endif
diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c
new file mode 100644
index 000000000000..d06a887a2352
--- /dev/null
+++ b/drivers/net/wan/slic_ds26522.c
@@ -0,0 +1,255 @@
+/*
+ * drivers/net/wan/slic_ds26522.c
+ *
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ *
+ * Author:Zhao Qiang<qiang.zhao@nxp.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/bitrev.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/spi/spi.h>
+#include <linux/wait.h>
+#include <linux/param.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+#include "slic_ds26522.h"
+
+#define DRV_NAME "ds26522"
+
+#define SLIC_TRANS_LEN 1
+#define SLIC_TWO_LEN 2
+#define SLIC_THREE_LEN 3
+
+static struct spi_device *g_spi;
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Zhao Qiang<B45475@freescale.com>");
+
+/* the read/write format of address is
+ * w/r|A13|A12|A11|A10|A9|A8|A7|A6|A5|A4|A3|A2|A1|A0|x
+ */
+static void slic_write(struct spi_device *spi, u16 addr,
+		       u8 data)
+{
+	u8 temp[3];
+
+	addr = bitrev16(addr) >> 1;
+	data = bitrev8(data);
+	temp[0] = (u8)((addr >> 8) & 0x7f);
+	temp[1] = (u8)(addr & 0xfe);
+	temp[2] = data;
+
+	/* write spi addr and value */
+	spi_write(spi, &temp[0], SLIC_THREE_LEN);
+}
+
+static u8 slic_read(struct spi_device *spi, u16 addr)
+{
+	u8 temp[2];
+	u8 data;
+
+	addr = bitrev16(addr) >> 1;
+	temp[0] = (u8)(((addr >> 8) & 0x7f) | 0x80);
+	temp[1] = (u8)(addr & 0xfe);
+
+	spi_write_then_read(spi, &temp[0], SLIC_TWO_LEN, &data,
+			    SLIC_TRANS_LEN);
+
+	data = bitrev8(data);
+	return data;
+}
+
+static bool get_slic_product_code(struct spi_device *spi)
+{
+	u8 device_id;
+
+	device_id = slic_read(spi, DS26522_IDR_ADDR);
+	if ((device_id & 0xf8) == 0x68)
+		return true;
+	else
+		return false;
+}
+
+static void ds26522_e1_spec_config(struct spi_device *spi)
+{
+	/* Receive E1 Mode, Framer Disabled */
+	slic_write(spi, DS26522_RMMR_ADDR, DS26522_RMMR_E1);
+
+	/* Transmit E1 Mode, Framer Disable */
+	slic_write(spi, DS26522_TMMR_ADDR, DS26522_TMMR_E1);
+
+	/* Receive E1 Mode Framer Enable */
+	slic_write(spi, DS26522_RMMR_ADDR,
+		   slic_read(spi, DS26522_RMMR_ADDR) | DS26522_RMMR_FRM_EN);
+
+	/* Transmit E1 Mode Framer Enable */
+	slic_write(spi, DS26522_TMMR_ADDR,
+		   slic_read(spi, DS26522_TMMR_ADDR) | DS26522_TMMR_FRM_EN);
+
+	/* RCR1, receive E1 B8zs & ESF */
+	slic_write(spi, DS26522_RCR1_ADDR,
+		   DS26522_RCR1_E1_HDB3 | DS26522_RCR1_E1_CCS);
+
+	/* RSYSCLK=2.048MHz, RSYNC-Output */
+	slic_write(spi, DS26522_RIOCR_ADDR,
+		   DS26522_RIOCR_2048KHZ | DS26522_RIOCR_RSIO_OUT);
+
+	/* TCR1 Transmit E1 b8zs */
+	slic_write(spi, DS26522_TCR1_ADDR, DS26522_TCR1_TB8ZS);
+
+	/* TSYSCLK=2.048MHz, TSYNC-Output */
+	slic_write(spi, DS26522_TIOCR_ADDR,
+		   DS26522_TIOCR_2048KHZ | DS26522_TIOCR_TSIO_OUT);
+
+	/* Set E1TAF */
+	slic_write(spi, DS26522_E1TAF_ADDR, DS26522_E1TAF_DEFAULT);
+
+	/* Set E1TNAF register */
+	slic_write(spi, DS26522_E1TNAF_ADDR, DS26522_E1TNAF_DEFAULT);
+
+	/* Receive E1 Mode Framer Enable & init Done */
+	slic_write(spi, DS26522_RMMR_ADDR, slic_read(spi, DS26522_RMMR_ADDR) |
+		   DS26522_RMMR_INIT_DONE);
+
+	/* Transmit E1 Mode Framer Enable & init Done */
+	slic_write(spi, DS26522_TMMR_ADDR, slic_read(spi, DS26522_TMMR_ADDR) |
+		   DS26522_TMMR_INIT_DONE);
+
+	/* Configure LIU E1 mode */
+	slic_write(spi, DS26522_LTRCR_ADDR, DS26522_LTRCR_E1);
+
+	/* E1 Mode default 75 ohm w/Transmit Impedance Matlinking */
+	slic_write(spi, DS26522_LTITSR_ADDR,
+		   DS26522_LTITSR_TLIS_75OHM | DS26522_LTITSR_LBOS_75OHM);
+
+	/* E1 Mode default 75 ohm Long Haul w/Receive Impedance Matlinking */
+	slic_write(spi, DS26522_LRISMR_ADDR,
+		   DS26522_LRISMR_75OHM | DS26522_LRISMR_MAX);
+
+	/* Enable Transmit output */
+	slic_write(spi, DS26522_LMCR_ADDR, DS26522_LMCR_TE);
+}
+
+static int slic_ds26522_init_configure(struct spi_device *spi)
+{
+	u16 addr;
+
+	/* set clock */
+	slic_write(spi, DS26522_GTCCR_ADDR, DS26522_GTCCR_BPREFSEL_REFCLKIN |
+			DS26522_GTCCR_BFREQSEL_2048KHZ |
+			DS26522_GTCCR_FREQSEL_2048KHZ);
+	slic_write(spi, DS26522_GTCR2_ADDR, DS26522_GTCR2_TSSYNCOUT);
+	slic_write(spi, DS26522_GFCR_ADDR, DS26522_GFCR_BPCLK_2048KHZ);
+
+	/* set gtcr */
+	slic_write(spi, DS26522_GTCR1_ADDR, DS26522_GTCR1);
+
+	/* Global LIU Software Reset Register */
+	slic_write(spi, DS26522_GLSRR_ADDR, DS26522_GLSRR_RESET);
+
+	/* Global Framer and BERT Software Reset Register */
+	slic_write(spi, DS26522_GFSRR_ADDR, DS26522_GFSRR_RESET);
+
+	usleep_range(100, 120);
+
+	slic_write(spi, DS26522_GLSRR_ADDR, DS26522_GLSRR_NORMAL);
+	slic_write(spi, DS26522_GFSRR_ADDR, DS26522_GFSRR_NORMAL);
+
+	/* Perform RX/TX SRESET,Reset receiver */
+	slic_write(spi, DS26522_RMMR_ADDR, DS26522_RMMR_SFTRST);
+
+	/* Reset tranceiver */
+	slic_write(spi, DS26522_TMMR_ADDR, DS26522_TMMR_SFTRST);
+
+	usleep_range(100, 120);
+
+	/* Zero all Framer Registers */
+	for (addr = DS26522_RF_ADDR_START; addr <= DS26522_RF_ADDR_END;
+	     addr++)
+		slic_write(spi, addr, 0);
+
+	for (addr = DS26522_TF_ADDR_START; addr <= DS26522_TF_ADDR_END;
+	     addr++)
+		slic_write(spi, addr, 0);
+
+	for (addr = DS26522_LIU_ADDR_START; addr <= DS26522_LIU_ADDR_END;
+	     addr++)
+		slic_write(spi, addr, 0);
+
+	for (addr = DS26522_BERT_ADDR_START; addr <= DS26522_BERT_ADDR_END;
+	     addr++)
+		slic_write(spi, addr, 0);
+
+	/* setup ds26522 for E1 specification */
+	ds26522_e1_spec_config(spi);
+
+	slic_write(spi, DS26522_GTCR1_ADDR, 0x00);
+
+	return 0;
+}
+
+static int slic_ds26522_remove(struct spi_device *spi)
+{
+	pr_info("DS26522 module uninstalled\n");
+	return 0;
+}
+
+static int slic_ds26522_probe(struct spi_device *spi)
+{
+	int ret = 0;
+
+	g_spi = spi;
+	spi->bits_per_word = 8;
+
+	if (!get_slic_product_code(spi))
+		return ret;
+
+	ret = slic_ds26522_init_configure(spi);
+	if (ret == 0)
+		pr_info("DS26522 cs%d configurated\n", spi->chip_select);
+
+	return ret;
+}
+
+static const struct of_device_id slic_ds26522_match[] = {
+	{
+	 .compatible = "maxim,ds26522",
+	 },
+	{},
+};
+
+static struct spi_driver slic_ds26522_driver = {
+	.driver = {
+		   .name = "ds26522",
+		   .bus = &spi_bus_type,
+		   .owner = THIS_MODULE,
+		   .of_match_table = slic_ds26522_match,
+		   },
+	.probe = slic_ds26522_probe,
+	.remove = slic_ds26522_remove,
+};
+
+static int __init slic_ds26522_init(void)
+{
+	return spi_register_driver(&slic_ds26522_driver);
+}
+
+static void __exit slic_ds26522_exit(void)
+{
+	spi_unregister_driver(&slic_ds26522_driver);
+}
+
+module_init(slic_ds26522_init);
+module_exit(slic_ds26522_exit);
diff --git a/drivers/net/wan/slic_ds26522.h b/drivers/net/wan/slic_ds26522.h
new file mode 100644
index 000000000000..22aa0ecbd9fd
--- /dev/null
+++ b/drivers/net/wan/slic_ds26522.h
@@ -0,0 +1,134 @@
+/*
+ * drivers/tdm/line_ctrl/slic_ds26522.h
+ *
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ *
+ * Author: Zhao Qiang <B45475@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#define DS26522_RF_ADDR_START	0x00
+#define DS26522_RF_ADDR_END	0xef
+#define DS26522_GLB_ADDR_START	0xf0
+#define DS26522_GLB_ADDR_END	0xff
+#define DS26522_TF_ADDR_START	0x100
+#define DS26522_TF_ADDR_END	0x1ef
+#define DS26522_LIU_ADDR_START	0x1000
+#define DS26522_LIU_ADDR_END	0x101f
+#define DS26522_TEST_ADDR_START	0x1008
+#define DS26522_TEST_ADDR_END	0x101f
+#define DS26522_BERT_ADDR_START	0x1100
+#define DS26522_BERT_ADDR_END	0x110f
+
+#define DS26522_RMMR_ADDR	0x80
+#define DS26522_RCR1_ADDR	0x81
+#define DS26522_RCR3_ADDR	0x83
+#define DS26522_RIOCR_ADDR	0x84
+
+#define DS26522_GTCR1_ADDR	0xf0
+#define DS26522_GFCR_ADDR	0xf1
+#define DS26522_GTCR2_ADDR	0xf2
+#define DS26522_GTCCR_ADDR	0xf3
+#define DS26522_GLSRR_ADDR	0xf5
+#define DS26522_GFSRR_ADDR	0xf6
+#define DS26522_IDR_ADDR	0xf8
+
+#define DS26522_E1TAF_ADDR	0x164
+#define DS26522_E1TNAF_ADDR	0x165
+#define DS26522_TMMR_ADDR	0x180
+#define DS26522_TCR1_ADDR	0x181
+#define DS26522_TIOCR_ADDR	0x184
+
+#define DS26522_LTRCR_ADDR	0x1000
+#define DS26522_LTITSR_ADDR	0x1001
+#define DS26522_LMCR_ADDR	0x1002
+#define DS26522_LRISMR_ADDR	0x1007
+
+#define MAX_NUM_OF_CHANNELS	8
+#define PQ_MDS_8E1T1_BRD_REV	0x00
+#define PQ_MDS_8E1T1_PLD_REV	0x00
+
+#define DS26522_GTCCR_BPREFSEL_REFCLKIN	0xa0
+#define DS26522_GTCCR_BFREQSEL_1544KHZ	0x08
+#define DS26522_GTCCR_FREQSEL_1544KHZ	0x04
+#define DS26522_GTCCR_BFREQSEL_2048KHZ	0x00
+#define DS26522_GTCCR_FREQSEL_2048KHZ	0x00
+
+#define DS26522_GFCR_BPCLK_2048KHZ	0x00
+
+#define DS26522_GTCR2_TSSYNCOUT	0x02
+#define DS26522_GTCR1	0x00
+
+#define DS26522_GFSRR_RESET	0x01
+#define DS26522_GFSRR_NORMAL	0x00
+
+#define DS26522_GLSRR_RESET	0x01
+#define DS26522_GLSRR_NORMAL	0x00
+
+#define DS26522_RMMR_SFTRST	0x02
+#define DS26522_RMMR_FRM_EN	0x80
+#define DS26522_RMMR_INIT_DONE	0x40
+#define DS26522_RMMR_T1		0x00
+#define DS26522_RMMR_E1		0x01
+
+#define DS26522_E1TAF_DEFAULT	0x1b
+#define DS26522_E1TNAF_DEFAULT	0x40
+
+#define DS26522_TMMR_SFTRST	0x02
+#define DS26522_TMMR_FRM_EN	0x80
+#define DS26522_TMMR_INIT_DONE	0x40
+#define DS26522_TMMR_T1		0x00
+#define DS26522_TMMR_E1		0x01
+
+#define DS26522_RCR1_T1_SYNCT	0x80
+#define DS26522_RCR1_T1_RB8ZS	0x40
+#define DS26522_RCR1_T1_SYNCC	0x08
+
+#define DS26522_RCR1_E1_HDB3	0x40
+#define DS26522_RCR1_E1_CCS	0x20
+
+#define DS26522_RIOCR_1544KHZ	0x00
+#define DS26522_RIOCR_2048KHZ	0x10
+#define DS26522_RIOCR_RSIO_OUT	0x00
+
+#define DS26522_RCR3_FLB	0x01
+
+#define DS26522_TIOCR_1544KHZ	0x00
+#define DS26522_TIOCR_2048KHZ	0x10
+#define DS26522_TIOCR_TSIO_OUT	0x04
+
+#define DS26522_TCR1_TB8ZS	0x04
+
+#define DS26522_LTRCR_T1	0x02
+#define DS26522_LTRCR_E1	0x00
+
+#define DS26522_LTITSR_TLIS_75OHM	0x00
+#define DS26522_LTITSR_LBOS_75OHM	0x00
+#define DS26522_LTITSR_TLIS_100OHM	0x10
+#define DS26522_LTITSR_TLIS_0DB_CSU	0x00
+
+#define DS26522_LRISMR_75OHM	0x00
+#define DS26522_LRISMR_100OHM	0x10
+#define DS26522_LRISMR_MAX	0x03
+
+#define DS26522_LMCR_TE	0x01
+
+enum line_rate {
+	LINE_RATE_T1,	/* T1 line rate (1.544 Mbps)      */
+	LINE_RATE_E1	/* E1 line rate (2.048 Mbps)     */
+};
+
+enum tdm_trans_mode {
+	NORMAL = 0,
+	FRAMER_LB
+};
+
+enum card_support_type {
+	LM_CARD = 0,
+	DS26522_CARD,
+	NO_CARD
+};
diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c
index bd62bc19e758..acec16b9cf49 100644
--- a/drivers/net/wireless/ath/ath10k/ahb.c
+++ b/drivers/net/wireless/ath/ath10k/ahb.c
@@ -25,10 +25,9 @@
 #include "ahb.h"
 
 static const struct of_device_id ath10k_ahb_of_match[] = {
-	/* TODO: enable this entry once everything in place.
-	 * { .compatible = "qcom,ipq4019-wifi",
-	 *   .data = (void *)ATH10K_HW_QCA4019 },
-	 */
+	{ .compatible = "qcom,ipq4019-wifi",
+	  .data = (void *)ATH10K_HW_QCA4019
+	},
 	{ }
 };
 
@@ -476,6 +475,7 @@ static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg)
 
 static int ath10k_ahb_request_irq_legacy(struct ath10k *ar)
 {
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar);
 	int ret;
 
@@ -487,6 +487,7 @@ static int ath10k_ahb_request_irq_legacy(struct ath10k *ar)
 			    ar_ahb->irq, ret);
 		return ret;
 	}
+	ar_pci->oper_irq_mode = ATH10K_PCI_IRQ_LEGACY;
 
 	return 0;
 }
@@ -918,8 +919,6 @@ int ath10k_ahb_init(void)
 {
 	int ret;
 
-	printk(KERN_ERR "AHB support is still work in progress\n");
-
 	ret = platform_driver_register(&ath10k_ahb_driver);
 	if (ret)
 		printk(KERN_ERR "failed to register ath10k ahb driver: %d\n",
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 49af62428c88..e88982921aa3 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/firmware.h>
 #include <linux/of.h>
+#include <asm/byteorder.h>
 
 #include "core.h"
 #include "mac.h"
@@ -55,7 +56,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 		.name = "qca988x hw2.0",
 		.patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR,
 		.uart_pin = 7,
-		.has_shifted_cc_wraparound = true,
+		.cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL,
 		.otp_exe_param = 0,
 		.channel_counters_freq_hz = 88000,
 		.max_probe_resp_desc_thres = 0,
@@ -68,6 +69,25 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 			.board_ext_size = QCA988X_BOARD_EXT_DATA_SZ,
 		},
 	},
+	{
+		.id = QCA9887_HW_1_0_VERSION,
+		.dev_id = QCA9887_1_0_DEVICE_ID,
+		.name = "qca9887 hw1.0",
+		.patch_load_addr = QCA9887_HW_1_0_PATCH_LOAD_ADDR,
+		.uart_pin = 7,
+		.cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL,
+		.otp_exe_param = 0,
+		.channel_counters_freq_hz = 88000,
+		.max_probe_resp_desc_thres = 0,
+		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER,
+		.cal_data_len = 2116,
+		.fw = {
+			.dir = QCA9887_HW_1_0_FW_DIR,
+			.board = QCA9887_HW_1_0_BOARD_DATA_FILE,
+			.board_size = QCA9887_BOARD_DATA_SZ,
+			.board_ext_size = QCA9887_BOARD_EXT_DATA_SZ,
+		},
+	},
 	{
 		.id = QCA6174_HW_2_1_VERSION,
 		.dev_id = QCA6164_2_1_DEVICE_ID,
@@ -148,6 +168,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 		.uart_pin = 7,
 		.otp_exe_param = 0x00000700,
 		.continuous_frag_desc = true,
+		.cck_rate_map_rev2 = true,
 		.channel_counters_freq_hz = 150000,
 		.max_probe_resp_desc_thres = 24,
 		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE,
@@ -162,6 +183,51 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 			.board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
 		},
 	},
+	{
+		.id = QCA9984_HW_1_0_DEV_VERSION,
+		.dev_id = QCA9984_1_0_DEVICE_ID,
+		.name = "qca9984/qca9994 hw1.0",
+		.patch_load_addr = QCA9984_HW_1_0_PATCH_LOAD_ADDR,
+		.uart_pin = 7,
+		.otp_exe_param = 0x00000700,
+		.continuous_frag_desc = true,
+		.cck_rate_map_rev2 = true,
+		.channel_counters_freq_hz = 150000,
+		.max_probe_resp_desc_thres = 24,
+		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE,
+		.tx_chain_mask = 0xf,
+		.rx_chain_mask = 0xf,
+		.max_spatial_stream = 4,
+		.cal_data_len = 12064,
+		.fw = {
+			.dir = QCA9984_HW_1_0_FW_DIR,
+			.board = QCA9984_HW_1_0_BOARD_DATA_FILE,
+			.board_size = QCA99X0_BOARD_DATA_SZ,
+			.board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
+		},
+	},
+	{
+		.id = QCA9888_HW_2_0_DEV_VERSION,
+		.dev_id = QCA9888_2_0_DEVICE_ID,
+		.name = "qca9888 hw2.0",
+		.patch_load_addr = QCA9888_HW_2_0_PATCH_LOAD_ADDR,
+		.uart_pin = 7,
+		.otp_exe_param = 0x00000700,
+		.continuous_frag_desc = true,
+		.channel_counters_freq_hz = 150000,
+		.max_probe_resp_desc_thres = 24,
+		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE,
+		.tx_chain_mask = 3,
+		.rx_chain_mask = 3,
+		.max_spatial_stream = 2,
+		.cal_data_len = 12064,
+		.fw = {
+			.dir = QCA9888_HW_2_0_FW_DIR,
+			.board = QCA9888_HW_2_0_BOARD_DATA_FILE,
+			.board_size = QCA99X0_BOARD_DATA_SZ,
+			.board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
+		},
+	},
 	{
 		.id = QCA9377_HW_1_0_DEV_VERSION,
 		.dev_id = QCA9377_1_0_DEVICE_ID,
@@ -202,9 +268,10 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 		.name = "qca4019 hw1.0",
 		.patch_load_addr = QCA4019_HW_1_0_PATCH_LOAD_ADDR,
 		.uart_pin = 7,
-		.has_shifted_cc_wraparound = true,
+		.cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH,
 		.otp_exe_param = 0x0010000,
 		.continuous_frag_desc = true,
+		.cck_rate_map_rev2 = true,
 		.channel_counters_freq_hz = 125000,
 		.max_probe_resp_desc_thres = 24,
 		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE,
@@ -236,6 +303,7 @@ static const char *const ath10k_core_fw_feature_str[] = {
 	[ATH10K_FW_FEATURE_SUPPORTS_ADAPTIVE_CCA] = "adaptive-cca",
 	[ATH10K_FW_FEATURE_MFP_SUPPORT] = "mfp",
 	[ATH10K_FW_FEATURE_PEER_FLOW_CONTROL] = "peer-flow-ctrl",
+	[ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param",
 };
 
 static unsigned int ath10k_core_get_fw_feature_str(char *buf,
@@ -531,6 +599,35 @@ out:
 	return ret;
 }
 
+static int ath10k_download_cal_eeprom(struct ath10k *ar)
+{
+	size_t data_len;
+	void *data = NULL;
+	int ret;
+
+	ret = ath10k_hif_fetch_cal_eeprom(ar, &data, &data_len);
+	if (ret) {
+		if (ret != -EOPNOTSUPP)
+			ath10k_warn(ar, "failed to read calibration data from EEPROM: %d\n",
+				    ret);
+		goto out_free;
+	}
+
+	ret = ath10k_download_board_data(ar, data, data_len);
+	if (ret) {
+		ath10k_warn(ar, "failed to download calibration data from EEPROM: %d\n",
+			    ret);
+		goto out_free;
+	}
+
+	ret = 0;
+
+out_free:
+	kfree(data);
+
+	return ret;
+}
+
 static int ath10k_core_get_board_id_from_otp(struct ath10k *ar)
 {
 	u32 result, address;
@@ -1083,7 +1180,7 @@ int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name,
 			}
 
 			ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "features", "",
-					ar->running_fw->fw_file.fw_features,
+					fw_file->fw_features,
 					sizeof(fw_file->fw_features));
 			break;
 		case ATH10K_FW_IE_FW_IMAGE:
@@ -1293,7 +1390,17 @@ static int ath10k_download_cal_data(struct ath10k *ar)
 	}
 
 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
-		   "boot did not find DT entry, try OTP next: %d\n",
+		   "boot did not find DT entry, try target EEPROM next: %d\n",
+		   ret);
+
+	ret = ath10k_download_cal_eeprom(ar);
+	if (ret == 0) {
+		ar->cal_mode = ATH10K_CAL_MODE_EEPROM;
+		goto done;
+	}
+
+	ath10k_dbg(ar, ATH10K_DBG_BOOT,
+		   "boot did not find target EEPROM entry, try OTP next: %d\n",
 		   ret);
 
 	ret = ath10k_download_and_run_otp(ar);
@@ -1590,7 +1697,7 @@ static int ath10k_core_init_firmware_features(struct ath10k *ar)
 		case ATH10K_FW_WMI_OP_VERSION_10_4:
 		case ATH10K_FW_WMI_OP_VERSION_UNSET:
 		case ATH10K_FW_WMI_OP_VERSION_MAX:
-			WARN_ON(1);
+			ath10k_err(ar, "htt op version not found from fw meta data");
 			return -EINVAL;
 		}
 	}
@@ -1733,6 +1840,16 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
 		if (test_bit(WMI_SERVICE_BSS_CHANNEL_INFO_64, ar->wmi.svc_map))
 			val |= WMI_10_4_BSS_CHANNEL_INFO_64;
 
+		/* 10.4 firmware supports BT-Coex without reloading firmware
+		 * via pdev param. To support Bluetooth coexistence pdev param,
+		 * WMI_COEX_GPIO_SUPPORT of extended resource config should be
+		 * enabled always.
+		 */
+		if (test_bit(WMI_SERVICE_COEX_GPIO, ar->wmi.svc_map) &&
+		    test_bit(ATH10K_FW_FEATURE_BTCOEX_PARAM,
+			     ar->running_fw->fw_file.fw_features))
+			val |= WMI_10_4_COEX_GPIO_SUPPORT;
+
 		status = ath10k_mac_ext_resource_config(ar, val);
 		if (status) {
 			ath10k_err(ar,
@@ -2062,6 +2179,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
 
 	switch (hw_rev) {
 	case ATH10K_HW_QCA988X:
+	case ATH10K_HW_QCA9887:
 		ar->regs = &qca988x_regs;
 		ar->hw_values = &qca988x_values;
 		break;
@@ -2071,9 +2189,14 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
 		ar->hw_values = &qca6174_values;
 		break;
 	case ATH10K_HW_QCA99X0:
+	case ATH10K_HW_QCA9984:
 		ar->regs = &qca99x0_regs;
 		ar->hw_values = &qca99x0_values;
 		break;
+	case ATH10K_HW_QCA9888:
+		ar->regs = &qca99x0_regs;
+		ar->hw_values = &qca9888_values;
+		break;
 	case ATH10K_HW_QCA4019:
 		ar->regs = &qca4019_regs;
 		ar->hw_values = &qca4019_values;
@@ -2159,5 +2282,5 @@ void ath10k_core_destroy(struct ath10k *ar)
 EXPORT_SYMBOL(ath10k_core_destroy);
 
 MODULE_AUTHOR("Qualcomm Atheros");
-MODULE_DESCRIPTION("Core module for QCA988X PCIe devices.");
+MODULE_DESCRIPTION("Core module for Qualcomm Atheros 802.11ac wireless LAN cards.");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 1852e0ee3fa1..30ae5bf81611 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -165,6 +165,13 @@ struct ath10k_fw_stats_peer {
 	u32 rx_duration;
 };
 
+struct ath10k_fw_extd_stats_peer {
+	struct list_head list;
+
+	u8 peer_macaddr[ETH_ALEN];
+	u32 rx_duration;
+};
+
 struct ath10k_fw_stats_vdev {
 	struct list_head list;
 
@@ -256,9 +263,11 @@ struct ath10k_fw_stats_pdev {
 };
 
 struct ath10k_fw_stats {
+	bool extended;
 	struct list_head pdevs;
 	struct list_head vdevs;
 	struct list_head peers;
+	struct list_head peers_extd;
 };
 
 #define ATH10K_TPC_TABLE_TYPE_FLAG	1
@@ -535,6 +544,13 @@ enum ath10k_fw_features {
 	 */
 	ATH10K_FW_FEATURE_PEER_FLOW_CONTROL = 13,
 
+	/* Firmware supports BT-Coex without reloading firmware via pdev param.
+	 * To support Bluetooth coexistence pdev param, WMI_COEX_GPIO_SUPPORT of
+	 * extended resource config should be enabled always. This firmware IE
+	 * is used to configure WMI_COEX_GPIO_SUPPORT.
+	 */
+	ATH10K_FW_FEATURE_BTCOEX_PARAM = 14,
+
 	/* keep last */
 	ATH10K_FW_FEATURE_COUNT,
 };
@@ -571,6 +587,7 @@ enum ath10k_cal_mode {
 	ATH10K_CAL_MODE_DT,
 	ATH10K_PRE_CAL_MODE_FILE,
 	ATH10K_PRE_CAL_MODE_DT,
+	ATH10K_CAL_MODE_EEPROM,
 };
 
 enum ath10k_crypt_mode {
@@ -593,6 +610,8 @@ static inline const char *ath10k_cal_mode_str(enum ath10k_cal_mode mode)
 		return "pre-cal-file";
 	case ATH10K_PRE_CAL_MODE_DT:
 		return "pre-cal-dt";
+	case ATH10K_CAL_MODE_EEPROM:
+		return "eeprom";
 	}
 
 	return "unknown";
@@ -657,6 +676,7 @@ struct ath10k_fw_components {
 struct ath10k {
 	struct ath_common ath_common;
 	struct ieee80211_hw *hw;
+	struct ieee80211_ops *ops;
 	struct device *dev;
 	u8 mac_addr[ETH_ALEN];
 
@@ -703,12 +723,10 @@ struct ath10k {
 		int uart_pin;
 		u32 otp_exe_param;
 
-		/* This is true if given HW chip has a quirky Cycle Counter
-		 * wraparound which resets to 0x7fffffff instead of 0. All
-		 * other CC related counters (e.g. Rx Clear Count) are divided
-		 * by 2 so they never wraparound themselves.
+		/* Type of hw cycle counter wraparound logic, for more info
+		 * refer enum ath10k_hw_cc_wraparound_type.
 		 */
-		bool has_shifted_cc_wraparound;
+		enum ath10k_hw_cc_wraparound_type cc_wraparound_type;
 
 		/* Some of chip expects fragment descriptor to be continuous
 		 * memory for any TX operation. Set continuous_frag_desc flag
@@ -716,6 +734,12 @@ struct ath10k {
 		 */
 		bool continuous_frag_desc;
 
+		/* CCK hardware rate table mapping for the newer chipsets
+		 * like QCA99X0, QCA4019 got revised. The CCK h/w rate values
+		 * are in a proper order with respect to the rate/preamble
+		 */
+		bool cck_rate_map_rev2;
+
 		u32 channel_counters_freq_hz;
 
 		/* Mgmt tx descriptors threshold for limiting probe response
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index e2511550fbb8..355e1ae665f9 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -313,13 +313,25 @@ static void ath10k_fw_stats_peers_free(struct list_head *head)
 	}
 }
 
+static void ath10k_fw_extd_stats_peers_free(struct list_head *head)
+{
+	struct ath10k_fw_extd_stats_peer *i, *tmp;
+
+	list_for_each_entry_safe(i, tmp, head, list) {
+		list_del(&i->list);
+		kfree(i);
+	}
+}
+
 static void ath10k_debug_fw_stats_reset(struct ath10k *ar)
 {
 	spin_lock_bh(&ar->data_lock);
 	ar->debug.fw_stats_done = false;
+	ar->debug.fw_stats.extended = false;
 	ath10k_fw_stats_pdevs_free(&ar->debug.fw_stats.pdevs);
 	ath10k_fw_stats_vdevs_free(&ar->debug.fw_stats.vdevs);
 	ath10k_fw_stats_peers_free(&ar->debug.fw_stats.peers);
+	ath10k_fw_extd_stats_peers_free(&ar->debug.fw_stats.peers_extd);
 	spin_unlock_bh(&ar->data_lock);
 }
 
@@ -334,6 +346,7 @@ void ath10k_debug_fw_stats_process(struct ath10k *ar, struct sk_buff *skb)
 	INIT_LIST_HEAD(&stats.pdevs);
 	INIT_LIST_HEAD(&stats.vdevs);
 	INIT_LIST_HEAD(&stats.peers);
+	INIT_LIST_HEAD(&stats.peers_extd);
 
 	spin_lock_bh(&ar->data_lock);
 	ret = ath10k_wmi_pull_fw_stats(ar, skb, &stats);
@@ -354,7 +367,7 @@ void ath10k_debug_fw_stats_process(struct ath10k *ar, struct sk_buff *skb)
 	 *     delivered which is treated as end-of-data and is itself discarded
 	 */
 	if (ath10k_peer_stats_enabled(ar))
-		ath10k_sta_update_rx_duration(ar, &stats.peers);
+		ath10k_sta_update_rx_duration(ar, &stats);
 
 	if (ar->debug.fw_stats_done) {
 		if (!ath10k_peer_stats_enabled(ar))
@@ -396,6 +409,8 @@ void ath10k_debug_fw_stats_process(struct ath10k *ar, struct sk_buff *skb)
 
 		list_splice_tail_init(&stats.peers, &ar->debug.fw_stats.peers);
 		list_splice_tail_init(&stats.vdevs, &ar->debug.fw_stats.vdevs);
+		list_splice_tail_init(&stats.peers_extd,
+				      &ar->debug.fw_stats.peers_extd);
 	}
 
 	complete(&ar->debug.fw_stats_complete);
@@ -407,6 +422,7 @@ free:
 	ath10k_fw_stats_pdevs_free(&stats.pdevs);
 	ath10k_fw_stats_vdevs_free(&stats.vdevs);
 	ath10k_fw_stats_peers_free(&stats.peers);
+	ath10k_fw_extd_stats_peers_free(&stats.peers_extd);
 
 	spin_unlock_bh(&ar->data_lock);
 }
@@ -609,25 +625,23 @@ static ssize_t ath10k_write_simulate_fw_crash(struct file *file,
 	char buf[32];
 	int ret;
 
-	mutex_lock(&ar->conf_mutex);
-
 	simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count);
 
 	/* make sure that buf is null terminated */
 	buf[sizeof(buf) - 1] = 0;
 
+	/* drop the possible '\n' from the end */
+	if (buf[count - 1] == '\n')
+		buf[count - 1] = 0;
+
+	mutex_lock(&ar->conf_mutex);
+
 	if (ar->state != ATH10K_STATE_ON &&
 	    ar->state != ATH10K_STATE_RESTARTED) {
 		ret = -ENETDOWN;
 		goto exit;
 	}
 
-	/* drop the possible '\n' from the end */
-	if (buf[count - 1] == '\n') {
-		buf[count - 1] = 0;
-		count--;
-	}
-
 	if (!strcmp(buf, "soft")) {
 		ath10k_info(ar, "simulating soft firmware crash\n");
 		ret = ath10k_wmi_force_fw_hang(ar, WMI_FORCE_FW_HANG_ASSERT, 0);
@@ -2127,6 +2141,7 @@ static ssize_t ath10k_write_btcoex(struct file *file,
 	size_t buf_size;
 	int ret;
 	bool val;
+	u32 pdev_param;
 
 	buf_size = min(count, (sizeof(buf) - 1));
 	if (copy_from_user(buf, ubuf, buf_size))
@@ -2150,14 +2165,25 @@ static ssize_t ath10k_write_btcoex(struct file *file,
 		goto exit;
 	}
 
+	pdev_param = ar->wmi.pdev_param->enable_btcoex;
+	if (test_bit(ATH10K_FW_FEATURE_BTCOEX_PARAM,
+		     ar->running_fw->fw_file.fw_features)) {
+		ret = ath10k_wmi_pdev_set_param(ar, pdev_param, val);
+		if (ret) {
+			ath10k_warn(ar, "failed to enable btcoex: %d\n", ret);
+			ret = count;
+			goto exit;
+		}
+	} else {
+		ath10k_info(ar, "restarting firmware due to btcoex change");
+		queue_work(ar->workqueue, &ar->restart_work);
+	}
+
 	if (val)
 		set_bit(ATH10K_FLAG_BTCOEX, &ar->dev_flags);
 	else
 		clear_bit(ATH10K_FLAG_BTCOEX, &ar->dev_flags);
 
-	ath10k_info(ar, "restarting firmware due to btcoex change");
-
-	queue_work(ar->workqueue, &ar->restart_work);
 	ret = count;
 
 exit:
@@ -2320,6 +2346,7 @@ int ath10k_debug_create(struct ath10k *ar)
 	INIT_LIST_HEAD(&ar->debug.fw_stats.pdevs);
 	INIT_LIST_HEAD(&ar->debug.fw_stats.vdevs);
 	INIT_LIST_HEAD(&ar->debug.fw_stats.peers);
+	INIT_LIST_HEAD(&ar->debug.fw_stats.peers_extd);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index 75c89e3625ef..c458fa96a6d4 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -154,10 +154,15 @@ ath10k_debug_get_new_fw_crash_data(struct ath10k *ar)
 #ifdef CONFIG_MAC80211_DEBUGFS
 void ath10k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir);
-void ath10k_sta_update_rx_duration(struct ath10k *ar, struct list_head *peer);
+void ath10k_sta_update_rx_duration(struct ath10k *ar,
+				   struct ath10k_fw_stats *stats);
+void ath10k_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			   struct ieee80211_sta *sta,
+			   struct station_info *sinfo);
 #else
-static inline void ath10k_sta_update_rx_duration(struct ath10k *ar,
-						 struct list_head *peer)
+static inline
+void ath10k_sta_update_rx_duration(struct ath10k *ar,
+				   struct ath10k_fw_stats *stats)
 {
 }
 #endif /* CONFIG_MAC80211_DEBUGFS */
diff --git a/drivers/net/wireless/ath/ath10k/debugfs_sta.c b/drivers/net/wireless/ath/ath10k/debugfs_sta.c
index 67ef75b60567..9955fea0802a 100644
--- a/drivers/net/wireless/ath/ath10k/debugfs_sta.c
+++ b/drivers/net/wireless/ath/ath10k/debugfs_sta.c
@@ -18,13 +18,34 @@
 #include "wmi-ops.h"
 #include "debug.h"
 
-void ath10k_sta_update_rx_duration(struct ath10k *ar, struct list_head *head)
-{	struct ieee80211_sta *sta;
+static void ath10k_sta_update_extd_stats_rx_duration(struct ath10k *ar,
+						     struct ath10k_fw_stats *stats)
+{
+	struct ath10k_fw_extd_stats_peer *peer;
+	struct ieee80211_sta *sta;
+	struct ath10k_sta *arsta;
+
+	rcu_read_lock();
+	list_for_each_entry(peer, &stats->peers_extd, list) {
+		sta = ieee80211_find_sta_by_ifaddr(ar->hw, peer->peer_macaddr,
+						   NULL);
+		if (!sta)
+			continue;
+		arsta = (struct ath10k_sta *)sta->drv_priv;
+		arsta->rx_duration += (u64)peer->rx_duration;
+	}
+	rcu_read_unlock();
+}
+
+static void ath10k_sta_update_stats_rx_duration(struct ath10k *ar,
+						struct ath10k_fw_stats *stats)
+{
 	struct ath10k_fw_stats_peer *peer;
+	struct ieee80211_sta *sta;
 	struct ath10k_sta *arsta;
 
 	rcu_read_lock();
-	list_for_each_entry(peer, head, list) {
+	list_for_each_entry(peer, &stats->peers, list) {
 		sta = ieee80211_find_sta_by_ifaddr(ar->hw, peer->peer_macaddr,
 						   NULL);
 		if (!sta)
@@ -35,6 +56,29 @@ void ath10k_sta_update_rx_duration(struct ath10k *ar, struct list_head *head)
 	rcu_read_unlock();
 }
 
+void ath10k_sta_update_rx_duration(struct ath10k *ar,
+				   struct ath10k_fw_stats *stats)
+{
+	if (stats->extended)
+		ath10k_sta_update_extd_stats_rx_duration(ar, stats);
+	else
+		ath10k_sta_update_stats_rx_duration(ar, stats);
+}
+
+void ath10k_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			   struct ieee80211_sta *sta,
+			   struct station_info *sinfo)
+{
+	struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
+	struct ath10k *ar = arsta->arvif->ar;
+
+	if (!ath10k_peer_stats_enabled(ar))
+		return;
+
+	sinfo->rx_duration = arsta->rx_duration;
+	sinfo->filled |= 1ULL << NL80211_STA_INFO_RX_DURATION;
+}
+
 static ssize_t ath10k_dbg_sta_read_aggr_mode(struct file *file,
 					     char __user *user_buf,
 					     size_t count, loff_t *ppos)
@@ -249,28 +293,6 @@ static const struct file_operations fops_delba = {
 	.llseek = default_llseek,
 };
 
-static ssize_t ath10k_dbg_sta_read_rx_duration(struct file *file,
-					       char __user *user_buf,
-					       size_t count, loff_t *ppos)
-{
-	struct ieee80211_sta *sta = file->private_data;
-	struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
-	char buf[100];
-	int len = 0;
-
-	len = scnprintf(buf, sizeof(buf),
-			"%llu usecs\n", arsta->rx_duration);
-
-	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
-}
-
-static const struct file_operations fops_rx_duration = {
-	.read = ath10k_dbg_sta_read_rx_duration,
-	.open = simple_open,
-	.owner = THIS_MODULE,
-	.llseek = default_llseek,
-};
-
 void ath10k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir)
 {
@@ -279,6 +301,4 @@ void ath10k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	debugfs_create_file("addba", S_IWUSR, dir, sta, &fops_addba);
 	debugfs_create_file("addba_resp", S_IWUSR, dir, sta, &fops_addba_resp);
 	debugfs_create_file("delba", S_IWUSR, dir, sta, &fops_delba);
-	debugfs_create_file("rx_duration", S_IRUGO, dir, sta,
-			    &fops_rx_duration);
 }
diff --git a/drivers/net/wireless/ath/ath10k/hif.h b/drivers/net/wireless/ath/ath10k/hif.h
index 89e7076c919f..b2566b06e1e1 100644
--- a/drivers/net/wireless/ath/ath10k/hif.h
+++ b/drivers/net/wireless/ath/ath10k/hif.h
@@ -87,6 +87,10 @@ struct ath10k_hif_ops {
 
 	int (*suspend)(struct ath10k *ar);
 	int (*resume)(struct ath10k *ar);
+
+	/* fetch calibration data from target eeprom */
+	int (*fetch_cal_eeprom)(struct ath10k *ar, void **data,
+				size_t *data_len);
 };
 
 static inline int ath10k_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
@@ -202,4 +206,14 @@ static inline void ath10k_hif_write32(struct ath10k *ar,
 	ar->hif.ops->write32(ar, address, data);
 }
 
+static inline int ath10k_hif_fetch_cal_eeprom(struct ath10k *ar,
+					      void **data,
+					      size_t *data_len)
+{
+	if (!ar->hif.ops->fetch_cal_eeprom)
+		return -EOPNOTSUPP;
+
+	return ar->hif.ops->fetch_cal_eeprom(ar, data, data_len);
+}
+
 #endif /* _HIF_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/htc.h b/drivers/net/wireless/ath/ath10k/htc.h
index cc827185d3e9..0c55cd92a951 100644
--- a/drivers/net/wireless/ath/ath10k/htc.h
+++ b/drivers/net/wireless/ath/ath10k/htc.h
@@ -22,7 +22,6 @@
 #include <linux/list.h>
 #include <linux/bug.h>
 #include <linux/skbuff.h>
-#include <linux/semaphore.h>
 #include <linux/timer.h>
 
 struct ath10k;
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 911c535d0863..430a83e142aa 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -485,10 +485,10 @@ struct htt_mgmt_tx_completion {
 	__le32 status;
 } __packed;
 
-#define HTT_RX_INDICATION_INFO0_EXT_TID_MASK  (0x3F)
+#define HTT_RX_INDICATION_INFO0_EXT_TID_MASK  (0x1F)
 #define HTT_RX_INDICATION_INFO0_EXT_TID_LSB   (0)
-#define HTT_RX_INDICATION_INFO0_FLUSH_VALID   (1 << 6)
-#define HTT_RX_INDICATION_INFO0_RELEASE_VALID (1 << 7)
+#define HTT_RX_INDICATION_INFO0_FLUSH_VALID   (1 << 5)
+#define HTT_RX_INDICATION_INFO0_RELEASE_VALID (1 << 6)
 
 #define HTT_RX_INDICATION_INFO1_FLUSH_START_SEQNO_MASK   0x0000003F
 #define HTT_RX_INDICATION_INFO1_FLUSH_START_SEQNO_LSB    0
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index cc979a4faeb0..78db5d679f19 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -748,7 +748,7 @@ ath10k_htt_rx_h_peer_channel(struct ath10k *ar, struct htt_rx_desc *rxd)
 	if (WARN_ON_ONCE(!arvif))
 		return NULL;
 
-	if (WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def)))
+	if (ath10k_mac_vif_chan(arvif->vif, &def))
 		return NULL;
 
 	return def.chan;
@@ -939,7 +939,8 @@ static void ath10k_process_rx(struct ath10k *ar,
 		   is_multicast_ether_addr(ieee80211_get_DA(hdr)) ?
 							"mcast" : "ucast",
 		   (__le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4,
-		   status->flag == 0 ? "legacy" : "",
+		   (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) == 0 ?
+							"legacy" : "",
 		   status->flag & RX_FLAG_HT ? "ht" : "",
 		   status->flag & RX_FLAG_VHT ? "vht" : "",
 		   status->flag & RX_FLAG_40MHZ ? "40" : "",
@@ -1904,7 +1905,6 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			return;
 		}
 	}
-	ath10k_htt_rx_msdu_buff_replenish(htt);
 }
 
 static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
@@ -2182,34 +2182,6 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
 	ath10k_mac_tx_push_pending(ar);
 }
 
-static inline enum nl80211_band phy_mode_to_band(u32 phy_mode)
-{
-	enum nl80211_band band;
-
-	switch (phy_mode) {
-	case MODE_11A:
-	case MODE_11NA_HT20:
-	case MODE_11NA_HT40:
-	case MODE_11AC_VHT20:
-	case MODE_11AC_VHT40:
-	case MODE_11AC_VHT80:
-		band = NL80211_BAND_5GHZ;
-		break;
-	case MODE_11G:
-	case MODE_11B:
-	case MODE_11GONLY:
-	case MODE_11NG_HT20:
-	case MODE_11NG_HT40:
-	case MODE_11AC_VHT20_2G:
-	case MODE_11AC_VHT40_2G:
-	case MODE_11AC_VHT80_2G:
-	default:
-		band = NL80211_BAND_2GHZ;
-	}
-
-	return band;
-}
-
 void ath10k_htt_htc_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 {
 	bool release;
@@ -2291,7 +2263,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 			ath10k_htt_tx_mgmt_dec_pending(htt);
 			spin_unlock_bh(&htt->tx_lock);
 		}
-		ath10k_mac_tx_push_pending(ar);
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_TX_COMPL_IND:
@@ -2336,12 +2307,10 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 		ath10k_htt_rx_delba(ar, resp);
 		break;
 	case HTT_T2H_MSG_TYPE_PKTLOG: {
-		struct ath10k_pktlog_hdr *hdr =
-			(struct ath10k_pktlog_hdr *)resp->pktlog_msg.payload;
-
 		trace_ath10k_htt_pktlog(ar, resp->pktlog_msg.payload,
-					sizeof(*hdr) +
-					__le16_to_cpu(hdr->size));
+					skb->len -
+					offsetof(struct htt_resp,
+						 pktlog_msg.payload));
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_RX_FLUSH: {
@@ -2442,8 +2411,6 @@ static void ath10k_htt_txrx_compl_task(unsigned long ptr)
 		dev_kfree_skb_any(skb);
 	}
 
-	ath10k_mac_tx_push_pending(ar);
-
 	num_mpdus = atomic_read(&htt->num_mpdus_ready);
 
 	while (num_mpdus) {
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index 6269c610b0a3..7c072b605bc7 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -49,7 +49,7 @@ static void __ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
 				       struct ieee80211_txq *txq)
 {
 	struct ath10k *ar = hw->priv;
-	struct ath10k_sta *arsta = (void *)txq->sta->drv_priv;
+	struct ath10k_sta *arsta;
 	struct ath10k_vif *arvif = (void *)txq->vif->drv_priv;
 	unsigned long frame_cnt;
 	unsigned long byte_cnt;
@@ -67,10 +67,12 @@ static void __ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
 	if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
 		return;
 
-	if (txq->sta)
+	if (txq->sta) {
+		arsta = (void *)txq->sta->drv_priv;
 		peer_id = arsta->peer_id;
-	else
+	} else {
 		peer_id = arvif->peer_id;
+	}
 
 	tid = txq->tid;
 	bit = BIT(peer_id % 32);
@@ -388,6 +390,8 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt)
 {
 	int size;
 
+	tasklet_kill(&htt->txrx_compl_task);
+
 	idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar);
 	idr_destroy(&htt->pending_tx);
 
@@ -733,16 +737,18 @@ static u8 ath10k_htt_tx_get_vdev_id(struct ath10k *ar, struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb);
-	struct ath10k_vif *arvif = (void *)cb->vif->drv_priv;
+	struct ath10k_vif *arvif;
 
-	if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)
+	if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) {
 		return ar->scan.vdev_id;
-	else if (cb->vif)
+	} else if (cb->vif) {
+		arvif = (void *)cb->vif->drv_priv;
 		return arvif->vdev_id;
-	else if (ar->monitor_started)
+	} else if (ar->monitor_started) {
 		return ar->monitor_vdev_id;
-	else
+	} else {
 		return 0;
+	}
 }
 
 static u8 ath10k_htt_tx_get_tid(struct sk_buff *skb, bool is_eth)
diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c
index f544d48518c3..f903d468dbe6 100644
--- a/drivers/net/wireless/ath/ath10k/hw.c
+++ b/drivers/net/wireless/ath/ath10k/hw.c
@@ -19,7 +19,6 @@
 #include "hw.h"
 
 const struct ath10k_hw_regs qca988x_regs = {
-	.rtc_state_cold_reset_mask	= 0x00000400,
 	.rtc_soc_base_address		= 0x00004000,
 	.rtc_wmac_base_address		= 0x00005000,
 	.soc_core_base_address		= 0x00009000,
@@ -46,7 +45,6 @@ const struct ath10k_hw_regs qca988x_regs = {
 };
 
 const struct ath10k_hw_regs qca6174_regs = {
-	.rtc_state_cold_reset_mask		= 0x00002000,
 	.rtc_soc_base_address			= 0x00000800,
 	.rtc_wmac_base_address			= 0x00001000,
 	.soc_core_base_address			= 0x0003a000,
@@ -73,7 +71,6 @@ const struct ath10k_hw_regs qca6174_regs = {
 };
 
 const struct ath10k_hw_regs qca99x0_regs = {
-	.rtc_state_cold_reset_mask		= 0x00000400,
 	.rtc_soc_base_address			= 0x00080000,
 	.rtc_wmac_base_address			= 0x00000000,
 	.soc_core_base_address			= 0x00082000,
@@ -168,6 +165,15 @@ const struct ath10k_hw_values qca99x0_values = {
 	.ce_desc_meta_data_lsb		= 4,
 };
 
+const struct ath10k_hw_values qca9888_values = {
+	.rtc_state_val_on		= 3,
+	.ce_count			= 12,
+	.msi_assign_ce_max		= 12,
+	.num_target_ce_config_wlan	= 10,
+	.ce_desc_meta_data_mask		= 0xFFF0,
+	.ce_desc_meta_data_lsb		= 4,
+};
+
 const struct ath10k_hw_values qca4019_values = {
 	.ce_count                       = 12,
 	.num_target_ce_config_wlan      = 10,
@@ -179,17 +185,36 @@ void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey,
 				u32 cc, u32 rcc, u32 cc_prev, u32 rcc_prev)
 {
 	u32 cc_fix = 0;
+	u32 rcc_fix = 0;
+	enum ath10k_hw_cc_wraparound_type wraparound_type;
 
 	survey->filled |= SURVEY_INFO_TIME |
 			  SURVEY_INFO_TIME_BUSY;
 
-	if (ar->hw_params.has_shifted_cc_wraparound && cc < cc_prev) {
-		cc_fix = 0x7fffffff;
-		survey->filled &= ~SURVEY_INFO_TIME_BUSY;
+	wraparound_type = ar->hw_params.cc_wraparound_type;
+
+	if (cc < cc_prev || rcc < rcc_prev) {
+		switch (wraparound_type) {
+		case ATH10K_HW_CC_WRAP_SHIFTED_ALL:
+			if (cc < cc_prev) {
+				cc_fix = 0x7fffffff;
+				survey->filled &= ~SURVEY_INFO_TIME_BUSY;
+			}
+			break;
+		case ATH10K_HW_CC_WRAP_SHIFTED_EACH:
+			if (cc < cc_prev)
+				cc_fix = 0x7fffffff;
+
+			if (rcc < rcc_prev)
+				rcc_fix = 0x7fffffff;
+			break;
+		case ATH10K_HW_CC_WRAP_DISABLED:
+			break;
+		}
 	}
 
 	cc -= cc_prev - cc_fix;
-	rcc -= rcc_prev;
+	rcc -= rcc_prev - rcc_fix;
 
 	survey->time = CCNT_TO_MSEC(ar, cc);
 	survey->time_busy = CCNT_TO_MSEC(ar, rcc);
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index aedd8987040b..e014cd732a0d 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -26,7 +26,10 @@
 #define QCA6164_2_1_DEVICE_ID   (0x0041)
 #define QCA6174_2_1_DEVICE_ID   (0x003e)
 #define QCA99X0_2_0_DEVICE_ID   (0x0040)
+#define QCA9888_2_0_DEVICE_ID	(0x0056)
+#define QCA9984_1_0_DEVICE_ID	(0x0046)
 #define QCA9377_1_0_DEVICE_ID   (0x0042)
+#define QCA9887_1_0_DEVICE_ID   (0x0050)
 
 /* QCA988X 1.0 definitions (unsupported) */
 #define QCA988X_HW_1_0_CHIP_ID_REV	0x0
@@ -38,6 +41,13 @@
 #define QCA988X_HW_2_0_BOARD_DATA_FILE	"board.bin"
 #define QCA988X_HW_2_0_PATCH_LOAD_ADDR	0x1234
 
+/* QCA9887 1.0 definitions */
+#define QCA9887_HW_1_0_VERSION		0x4100016d
+#define QCA9887_HW_1_0_CHIP_ID_REV	0
+#define QCA9887_HW_1_0_FW_DIR		ATH10K_FW_DIR "/QCA9887/hw1.0"
+#define QCA9887_HW_1_0_BOARD_DATA_FILE	"board.bin"
+#define QCA9887_HW_1_0_PATCH_LOAD_ADDR	0x1234
+
 /* QCA6174 target BMI version signatures */
 #define QCA6174_HW_1_0_VERSION		0x05000000
 #define QCA6174_HW_1_1_VERSION		0x05000001
@@ -91,6 +101,22 @@ enum qca9377_chip_id_rev {
 #define QCA99X0_HW_2_0_BOARD_DATA_FILE "board.bin"
 #define QCA99X0_HW_2_0_PATCH_LOAD_ADDR	0x1234
 
+/* QCA9984 1.0 defines */
+#define QCA9984_HW_1_0_DEV_VERSION	0x1000000
+#define QCA9984_HW_DEV_TYPE		0xa
+#define QCA9984_HW_1_0_CHIP_ID_REV	0x0
+#define QCA9984_HW_1_0_FW_DIR		ATH10K_FW_DIR "/QCA9984/hw1.0"
+#define QCA9984_HW_1_0_BOARD_DATA_FILE "board.bin"
+#define QCA9984_HW_1_0_PATCH_LOAD_ADDR	0x1234
+
+/* QCA9888 2.0 defines */
+#define QCA9888_HW_2_0_DEV_VERSION	0x1000000
+#define QCA9888_HW_DEV_TYPE		0xc
+#define QCA9888_HW_2_0_CHIP_ID_REV	0x0
+#define QCA9888_HW_2_0_FW_DIR		ATH10K_FW_DIR "/QCA9888/hw2.0"
+#define QCA9888_HW_2_0_BOARD_DATA_FILE "board.bin"
+#define QCA9888_HW_2_0_PATCH_LOAD_ADDR	0x1234
+
 /* QCA9377 1.0 definitions */
 #define QCA9377_HW_1_0_FW_DIR          ATH10K_FW_DIR "/QCA9377/hw1.0"
 #define QCA9377_HW_1_0_BOARD_DATA_FILE "board.bin"
@@ -193,12 +219,14 @@ enum ath10k_hw_rev {
 	ATH10K_HW_QCA988X,
 	ATH10K_HW_QCA6174,
 	ATH10K_HW_QCA99X0,
+	ATH10K_HW_QCA9888,
+	ATH10K_HW_QCA9984,
 	ATH10K_HW_QCA9377,
 	ATH10K_HW_QCA4019,
+	ATH10K_HW_QCA9887,
 };
 
 struct ath10k_hw_regs {
-	u32 rtc_state_cold_reset_mask;
 	u32 rtc_soc_base_address;
 	u32 rtc_wmac_base_address;
 	u32 soc_core_base_address;
@@ -241,14 +269,18 @@ struct ath10k_hw_values {
 extern const struct ath10k_hw_values qca988x_values;
 extern const struct ath10k_hw_values qca6174_values;
 extern const struct ath10k_hw_values qca99x0_values;
+extern const struct ath10k_hw_values qca9888_values;
 extern const struct ath10k_hw_values qca4019_values;
 
 void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey,
 				u32 cc, u32 rcc, u32 cc_prev, u32 rcc_prev);
 
 #define QCA_REV_988X(ar) ((ar)->hw_rev == ATH10K_HW_QCA988X)
+#define QCA_REV_9887(ar) ((ar)->hw_rev == ATH10K_HW_QCA9887)
 #define QCA_REV_6174(ar) ((ar)->hw_rev == ATH10K_HW_QCA6174)
 #define QCA_REV_99X0(ar) ((ar)->hw_rev == ATH10K_HW_QCA99X0)
+#define QCA_REV_9888(ar) ((ar)->hw_rev == ATH10K_HW_QCA9888)
+#define QCA_REV_9984(ar) ((ar)->hw_rev == ATH10K_HW_QCA9984)
 #define QCA_REV_9377(ar) ((ar)->hw_rev == ATH10K_HW_QCA9377)
 #define QCA_REV_40XX(ar) ((ar)->hw_rev == ATH10K_HW_QCA4019)
 
@@ -275,25 +307,6 @@ enum ath10k_mcast2ucast_mode {
 	ATH10K_MCAST2UCAST_ENABLED = 1,
 };
 
-struct ath10k_pktlog_hdr {
-	__le16 flags;
-	__le16 missed_cnt;
-	__le16 log_type;
-	__le16 size;
-	__le32 timestamp;
-	u8 payload[0];
-} __packed;
-
-struct ath10k_pktlog_10_4_hdr {
-	__le16 flags;
-	__le16 missed_cnt;
-	__le16 log_type;
-	__le16 size;
-	__le32 timestamp;
-	__le32 type_specific_data;
-	u8 payload[0];
-} __packed;
-
 enum ath10k_hw_rate_ofdm {
 	ATH10K_HW_RATE_OFDM_48M = 0,
 	ATH10K_HW_RATE_OFDM_24M,
@@ -315,11 +328,41 @@ enum ath10k_hw_rate_cck {
 	ATH10K_HW_RATE_CCK_SP_2M,
 };
 
+enum ath10k_hw_rate_rev2_cck {
+	ATH10K_HW_RATE_REV2_CCK_LP_1M = 1,
+	ATH10K_HW_RATE_REV2_CCK_LP_2M,
+	ATH10K_HW_RATE_REV2_CCK_LP_5_5M,
+	ATH10K_HW_RATE_REV2_CCK_LP_11M,
+	ATH10K_HW_RATE_REV2_CCK_SP_2M,
+	ATH10K_HW_RATE_REV2_CCK_SP_5_5M,
+	ATH10K_HW_RATE_REV2_CCK_SP_11M,
+};
+
 enum ath10k_hw_4addr_pad {
 	ATH10K_HW_4ADDR_PAD_AFTER,
 	ATH10K_HW_4ADDR_PAD_BEFORE,
 };
 
+enum ath10k_hw_cc_wraparound_type {
+	ATH10K_HW_CC_WRAP_DISABLED = 0,
+
+	/* This type is when the HW chip has a quirky Cycle Counter
+	 * wraparound which resets to 0x7fffffff instead of 0. All
+	 * other CC related counters (e.g. Rx Clear Count) are divided
+	 * by 2 so they never wraparound themselves.
+	 */
+	ATH10K_HW_CC_WRAP_SHIFTED_ALL = 1,
+
+	/* Each hw counter wrapsaround independently. When the
+	 * counter overflows the repestive counter is right shifted
+	 * by 1, i.e reset to 0x7fffffff, and other counters will be
+	 * running unaffected. In this type of wraparound, it should
+	 * be possible to report accurate Rx busy time unlike the
+	 * first type.
+	 */
+	ATH10K_HW_CC_WRAP_SHIFTED_EACH = 2,
+};
+
 /* Target specific defines for MAIN firmware */
 #define TARGET_NUM_VDEVS			8
 #define TARGET_NUM_PEER_AST			2
@@ -484,7 +527,6 @@ enum ath10k_hw_4addr_pad {
 /* as of IP3.7.1 */
 #define RTC_STATE_V_ON				ar->hw_values->rtc_state_val_on
 
-#define RTC_STATE_COLD_RESET_MASK		ar->regs->rtc_state_cold_reset_mask
 #define RTC_STATE_V_LSB				0
 #define RTC_STATE_V_MASK			0x00000007
 #define RTC_STATE_ADDRESS			0x0000
@@ -547,7 +589,10 @@ enum ath10k_hw_4addr_pad {
 #define WLAN_SYSTEM_SLEEP_DISABLE_MASK		0x00000001
 
 #define WLAN_GPIO_PIN0_ADDRESS			0x00000028
+#define WLAN_GPIO_PIN0_CONFIG_LSB		11
 #define WLAN_GPIO_PIN0_CONFIG_MASK		0x00007800
+#define WLAN_GPIO_PIN0_PAD_PULL_LSB		5
+#define WLAN_GPIO_PIN0_PAD_PULL_MASK		0x00000060
 #define WLAN_GPIO_PIN1_ADDRESS			0x0000002c
 #define WLAN_GPIO_PIN1_CONFIG_MASK		0x00007800
 #define WLAN_GPIO_PIN10_ADDRESS			0x00000050
@@ -560,6 +605,8 @@ enum ath10k_hw_4addr_pad {
 #define CLOCK_GPIO_BT_CLK_OUT_EN_MASK		0
 
 #define SI_CONFIG_OFFSET			0x00000000
+#define SI_CONFIG_ERR_INT_LSB			19
+#define SI_CONFIG_ERR_INT_MASK			0x00080000
 #define SI_CONFIG_BIDIR_OD_DATA_LSB		18
 #define SI_CONFIG_BIDIR_OD_DATA_MASK		0x00040000
 #define SI_CONFIG_I2C_LSB			16
@@ -573,7 +620,9 @@ enum ath10k_hw_4addr_pad {
 #define SI_CONFIG_DIVIDER_LSB			0
 #define SI_CONFIG_DIVIDER_MASK			0x0000000f
 #define SI_CS_OFFSET				0x00000004
+#define SI_CS_DONE_ERR_LSB			10
 #define SI_CS_DONE_ERR_MASK			0x00000400
+#define SI_CS_DONE_INT_LSB			9
 #define SI_CS_DONE_INT_MASK			0x00000200
 #define SI_CS_START_LSB				8
 #define SI_CS_START_MASK			0x00000100
@@ -624,7 +673,10 @@ enum ath10k_hw_4addr_pad {
 #define GPIO_BASE_ADDRESS			WLAN_GPIO_BASE_ADDRESS
 #define GPIO_PIN0_OFFSET			WLAN_GPIO_PIN0_ADDRESS
 #define GPIO_PIN1_OFFSET			WLAN_GPIO_PIN1_ADDRESS
+#define GPIO_PIN0_CONFIG_LSB			WLAN_GPIO_PIN0_CONFIG_LSB
 #define GPIO_PIN0_CONFIG_MASK			WLAN_GPIO_PIN0_CONFIG_MASK
+#define GPIO_PIN0_PAD_PULL_LSB			WLAN_GPIO_PIN0_PAD_PULL_LSB
+#define GPIO_PIN0_PAD_PULL_MASK			WLAN_GPIO_PIN0_PAD_PULL_MASK
 #define GPIO_PIN1_CONFIG_MASK			WLAN_GPIO_PIN1_CONFIG_MASK
 #define SI_BASE_ADDRESS				WLAN_SI_BASE_ADDRESS
 #define SCRATCH_BASE_ADDRESS			SOC_CORE_BASE_ADDRESS
@@ -679,6 +731,18 @@ enum ath10k_hw_4addr_pad {
 #define WINDOW_READ_ADDR_ADDRESS		MISSING
 #define WINDOW_WRITE_ADDR_ADDRESS		MISSING
 
+#define QCA9887_1_0_I2C_SDA_GPIO_PIN		5
+#define QCA9887_1_0_I2C_SDA_PIN_CONFIG		3
+#define QCA9887_1_0_SI_CLK_GPIO_PIN		17
+#define QCA9887_1_0_SI_CLK_PIN_CONFIG		3
+#define QCA9887_1_0_GPIO_ENABLE_W1TS_LOW_ADDRESS 0x00000010
+
+#define QCA9887_EEPROM_SELECT_READ		0xa10000a0
+#define QCA9887_EEPROM_ADDR_HI_MASK		0x0000ff00
+#define QCA9887_EEPROM_ADDR_HI_LSB		8
+#define QCA9887_EEPROM_ADDR_LO_MASK		0x00ff0000
+#define QCA9887_EEPROM_ADDR_LO_LSB		16
+
 #define RTC_STATE_V_GET(x) (((x) & RTC_STATE_V_MASK) >> RTC_STATE_V_LSB)
 
 #endif /* _HW_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 6dd1d26b357f..fb8e38df9446 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -62,6 +62,32 @@ static struct ieee80211_rate ath10k_rates[] = {
 	{ .bitrate = 540, .hw_value = ATH10K_HW_RATE_OFDM_54M },
 };
 
+static struct ieee80211_rate ath10k_rates_rev2[] = {
+	{ .bitrate = 10,
+	  .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_1M },
+	{ .bitrate = 20,
+	  .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_2M,
+	  .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_2M,
+	  .flags = IEEE80211_RATE_SHORT_PREAMBLE },
+	{ .bitrate = 55,
+	  .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_5_5M,
+	  .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_5_5M,
+	  .flags = IEEE80211_RATE_SHORT_PREAMBLE },
+	{ .bitrate = 110,
+	  .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_11M,
+	  .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_11M,
+	  .flags = IEEE80211_RATE_SHORT_PREAMBLE },
+
+	{ .bitrate = 60, .hw_value = ATH10K_HW_RATE_OFDM_6M },
+	{ .bitrate = 90, .hw_value = ATH10K_HW_RATE_OFDM_9M },
+	{ .bitrate = 120, .hw_value = ATH10K_HW_RATE_OFDM_12M },
+	{ .bitrate = 180, .hw_value = ATH10K_HW_RATE_OFDM_18M },
+	{ .bitrate = 240, .hw_value = ATH10K_HW_RATE_OFDM_24M },
+	{ .bitrate = 360, .hw_value = ATH10K_HW_RATE_OFDM_36M },
+	{ .bitrate = 480, .hw_value = ATH10K_HW_RATE_OFDM_48M },
+	{ .bitrate = 540, .hw_value = ATH10K_HW_RATE_OFDM_54M },
+};
+
 #define ATH10K_MAC_FIRST_OFDM_RATE_IDX 4
 
 #define ath10k_a_rates (ath10k_rates + ATH10K_MAC_FIRST_OFDM_RATE_IDX)
@@ -70,6 +96,9 @@ static struct ieee80211_rate ath10k_rates[] = {
 #define ath10k_g_rates (ath10k_rates + 0)
 #define ath10k_g_rates_size (ARRAY_SIZE(ath10k_rates))
 
+#define ath10k_g_rates_rev2 (ath10k_rates_rev2 + 0)
+#define ath10k_g_rates_rev2_size (ARRAY_SIZE(ath10k_rates_rev2))
+
 static bool ath10k_mac_bitrate_is_cck(int bitrate)
 {
 	switch (bitrate) {
@@ -679,10 +708,10 @@ static int ath10k_peer_create(struct ath10k *ar,
 
 	peer = ath10k_peer_find(ar, vdev_id, addr);
 	if (!peer) {
+		spin_unlock_bh(&ar->data_lock);
 		ath10k_warn(ar, "failed to find peer %pM on vdev %i after creation\n",
 			    addr, vdev_id);
 		ath10k_wmi_peer_delete(ar, vdev_id, addr);
-		spin_unlock_bh(&ar->data_lock);
 		return -ENOENT;
 	}
 
@@ -773,6 +802,7 @@ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id)
 {
 	struct ath10k_peer *peer, *tmp;
 	int peer_id;
+	int i;
 
 	lockdep_assert_held(&ar->conf_mutex);
 
@@ -789,6 +819,17 @@ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id)
 			ar->peer_map[peer_id] = NULL;
 		}
 
+		/* Double check that peer is properly un-referenced from
+		 * the peer_map
+		 */
+		for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) {
+			if (ar->peer_map[i] == peer) {
+				ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %p idx %d)\n",
+					    peer->addr, peer, i);
+				ar->peer_map[i] = NULL;
+			}
+		}
+
 		list_del(&peer->list);
 		kfree(peer);
 		ar->num_peers--;
@@ -799,6 +840,7 @@ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id)
 static void ath10k_peer_cleanup_all(struct ath10k *ar)
 {
 	struct ath10k_peer *peer, *tmp;
+	int i;
 
 	lockdep_assert_held(&ar->conf_mutex);
 
@@ -807,6 +849,10 @@ static void ath10k_peer_cleanup_all(struct ath10k *ar)
 		list_del(&peer->list);
 		kfree(peer);
 	}
+
+	for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++)
+		ar->peer_map[i] = NULL;
+
 	spin_unlock_bh(&ar->data_lock);
 
 	ar->num_peers = 0;
@@ -2910,7 +2956,7 @@ static int ath10k_update_channel_list(struct ath10k *ar)
 			if (channel->flags & IEEE80211_CHAN_DISABLED)
 				continue;
 
-			ch->allow_ht   = true;
+			ch->allow_ht = true;
 
 			/* FIXME: when should we really allow VHT? */
 			ch->allow_vht = true;
@@ -3646,17 +3692,18 @@ void ath10k_mgmt_over_wmi_tx_work(struct work_struct *work)
 
 static void ath10k_mac_txq_init(struct ieee80211_txq *txq)
 {
-	struct ath10k_txq *artxq = (void *)txq->drv_priv;
+	struct ath10k_txq *artxq;
 
 	if (!txq)
 		return;
 
+	artxq = (void *)txq->drv_priv;
 	INIT_LIST_HEAD(&artxq->list);
 }
 
 static void ath10k_mac_txq_unref(struct ath10k *ar, struct ieee80211_txq *txq)
 {
-	struct ath10k_txq *artxq = (void *)txq->drv_priv;
+	struct ath10k_txq *artxq;
 	struct ath10k_skb_cb *cb;
 	struct sk_buff *msdu;
 	int msdu_id;
@@ -3664,6 +3711,7 @@ static void ath10k_mac_txq_unref(struct ath10k *ar, struct ieee80211_txq *txq)
 	if (!txq)
 		return;
 
+	artxq = (void *)txq->drv_priv;
 	spin_lock_bh(&ar->txqs_lock);
 	if (!list_empty(&artxq->list))
 		list_del_init(&artxq->list);
@@ -3781,6 +3829,9 @@ void ath10k_mac_tx_push_pending(struct ath10k *ar)
 	int ret;
 	int max;
 
+	if (ar->htt.num_pending_tx >= (ar->htt.max_num_pending_tx / 2))
+		return;
+
 	spin_lock_bh(&ar->txqs_lock);
 	rcu_read_lock();
 
@@ -3826,12 +3877,16 @@ void __ath10k_scan_finish(struct ath10k *ar)
 		break;
 	case ATH10K_SCAN_RUNNING:
 	case ATH10K_SCAN_ABORTING:
-		if (!ar->scan.is_roc)
-			ieee80211_scan_completed(ar->hw,
-						 (ar->scan.state ==
-						  ATH10K_SCAN_ABORTING));
-		else if (ar->scan.roc_notify)
+		if (!ar->scan.is_roc) {
+			struct cfg80211_scan_info info = {
+				.aborted = (ar->scan.state ==
+					    ATH10K_SCAN_ABORTING),
+			};
+
+			ieee80211_scan_completed(ar->hw, &info);
+		} else if (ar->scan.roc_notify) {
 			ieee80211_remain_on_channel_expired(ar->hw);
+		}
 		/* fall through */
 	case ATH10K_SCAN_STARTING:
 		ar->scan.state = ATH10K_SCAN_IDLE;
@@ -4051,9 +4106,7 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw,
 		list_add_tail(&artxq->list, &ar->txqs);
 	spin_unlock_bh(&ar->txqs_lock);
 
-	if (ath10k_mac_tx_can_push(hw, txq))
-		tasklet_schedule(&ar->htt.txrx_compl_task);
-
+	ath10k_mac_tx_push_pending(ar);
 	ath10k_htt_tx_txq_update(hw, txq);
 }
 
@@ -4194,6 +4247,9 @@ static struct ieee80211_sta_vht_cap ath10k_create_vht_cap(struct ath10k *ar)
 			mcs_map |= IEEE80211_VHT_MCS_NOT_SUPPORTED << (i * 2);
 	}
 
+	if (ar->cfg_tx_chainmask <= 1)
+		vht_cap.cap &= ~IEEE80211_VHT_CAP_TXSTBC;
+
 	vht_cap.vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
 	vht_cap.vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
 
@@ -4231,7 +4287,7 @@ static struct ieee80211_sta_ht_cap ath10k_get_ht_cap(struct ath10k *ar)
 		ht_cap.cap |= smps;
 	}
 
-	if (ar->ht_cap_info & WMI_HT_CAP_TX_STBC)
+	if (ar->ht_cap_info & WMI_HT_CAP_TX_STBC && (ar->cfg_tx_chainmask > 1))
 		ht_cap.cap |= IEEE80211_HT_CAP_TX_STBC;
 
 	if (ar->ht_cap_info & WMI_HT_CAP_RX_STBC) {
@@ -4467,6 +4523,19 @@ static int ath10k_start(struct ieee80211_hw *hw)
 		}
 	}
 
+	param = ar->wmi.pdev_param->enable_btcoex;
+	if (test_bit(WMI_SERVICE_COEX_GPIO, ar->wmi.svc_map) &&
+	    test_bit(ATH10K_FW_FEATURE_BTCOEX_PARAM,
+		     ar->running_fw->fw_file.fw_features)) {
+		ret = ath10k_wmi_pdev_set_param(ar, param, 0);
+		if (ret) {
+			ath10k_warn(ar,
+				    "failed to set btcoex param: %d\n", ret);
+			goto err_core_stop;
+		}
+		clear_bit(ATH10K_FLAG_BTCOEX, &ar->dev_flags);
+	}
+
 	ar->num_started_vdevs = 0;
 	ath10k_regd_update(ar);
 
@@ -5932,9 +6001,17 @@ static int ath10k_sta_state(struct ieee80211_hw *hw,
 				continue;
 
 			if (peer->sta == sta) {
-				ath10k_warn(ar, "found sta peer %pM entry on vdev %i after it was supposedly removed\n",
-					    sta->addr, arvif->vdev_id);
+				ath10k_warn(ar, "found sta peer %pM (ptr %p id %d) entry on vdev %i after it was supposedly removed\n",
+					    sta->addr, peer, i, arvif->vdev_id);
 				peer->sta = NULL;
+
+				/* Clean up the peer object as well since we
+				 * must have failed to do this above.
+				 */
+				list_del(&peer->list);
+				ar->peer_map[i] = NULL;
+				kfree(peer);
+				ar->num_peers--;
 			}
 		}
 		spin_unlock_bh(&ar->data_lock);
@@ -7359,6 +7436,7 @@ static const struct ieee80211_ops ath10k_ops = {
 #endif
 #ifdef CONFIG_MAC80211_DEBUGFS
 	.sta_add_debugfs		= ath10k_sta_add_debugfs,
+	.sta_statistics			= ath10k_sta_statistics,
 #endif
 };
 
@@ -7428,21 +7506,32 @@ static const struct ieee80211_channel ath10k_5ghz_channels[] = {
 struct ath10k *ath10k_mac_create(size_t priv_size)
 {
 	struct ieee80211_hw *hw;
+	struct ieee80211_ops *ops;
 	struct ath10k *ar;
 
-	hw = ieee80211_alloc_hw(sizeof(struct ath10k) + priv_size, &ath10k_ops);
-	if (!hw)
+	ops = kmemdup(&ath10k_ops, sizeof(ath10k_ops), GFP_KERNEL);
+	if (!ops)
 		return NULL;
 
+	hw = ieee80211_alloc_hw(sizeof(struct ath10k) + priv_size, ops);
+	if (!hw) {
+		kfree(ops);
+		return NULL;
+	}
+
 	ar = hw->priv;
 	ar->hw = hw;
+	ar->ops = ops;
 
 	return ar;
 }
 
 void ath10k_mac_destroy(struct ath10k *ar)
 {
+	struct ieee80211_ops *ops = ar->ops;
+
 	ieee80211_free_hw(ar->hw);
+	kfree(ops);
 }
 
 static const struct ieee80211_iface_limit ath10k_if_limits[] = {
@@ -7695,8 +7784,14 @@ int ath10k_mac_register(struct ath10k *ar)
 		band = &ar->mac.sbands[NL80211_BAND_2GHZ];
 		band->n_channels = ARRAY_SIZE(ath10k_2ghz_channels);
 		band->channels = channels;
-		band->n_bitrates = ath10k_g_rates_size;
-		band->bitrates = ath10k_g_rates;
+
+		if (ar->hw_params.cck_rate_map_rev2) {
+			band->n_bitrates = ath10k_g_rates_rev2_size;
+			band->bitrates = ath10k_g_rates_rev2;
+		} else {
+			band->n_bitrates = ath10k_g_rates_size;
+			band->bitrates = ath10k_g_rates;
+		}
 
 		ar->hw->wiphy->bands[NL80211_BAND_2GHZ] = band;
 	}
@@ -7870,6 +7965,15 @@ int ath10k_mac_register(struct ath10k *ar)
 			ath10k_warn(ar, "failed to initialise DFS pattern detector\n");
 	}
 
+	/* Current wake_tx_queue implementation imposes a significant
+	 * performance penalty in some setups. The tx scheduling code needs
+	 * more work anyway so disable the wake_tx_queue unless firmware
+	 * supports the pull-push mechanism.
+	 */
+	if (!test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL,
+		      ar->running_fw->fw_file.fw_features))
+		ar->ops->wake_tx_queue = NULL;
+
 	ret = ath_regd_init(&ar->ath_common.regulatory, ar->hw->wiphy,
 			    ath10k_reg_notifier);
 	if (ret) {
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 8133d7b5b956..9a22c478dd1b 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -56,7 +56,10 @@ static const struct pci_device_id ath10k_pci_id_table[] = {
 	{ PCI_VDEVICE(ATHEROS, QCA6164_2_1_DEVICE_ID) }, /* PCI-E QCA6164 V2.1 */
 	{ PCI_VDEVICE(ATHEROS, QCA6174_2_1_DEVICE_ID) }, /* PCI-E QCA6174 V2.1 */
 	{ PCI_VDEVICE(ATHEROS, QCA99X0_2_0_DEVICE_ID) }, /* PCI-E QCA99X0 V2 */
+	{ PCI_VDEVICE(ATHEROS, QCA9888_2_0_DEVICE_ID) }, /* PCI-E QCA9888 V2 */
+	{ PCI_VDEVICE(ATHEROS, QCA9984_1_0_DEVICE_ID) }, /* PCI-E QCA9984 V1 */
 	{ PCI_VDEVICE(ATHEROS, QCA9377_1_0_DEVICE_ID) }, /* PCI-E QCA9377 V1 */
+	{ PCI_VDEVICE(ATHEROS, QCA9887_1_0_DEVICE_ID) }, /* PCI-E QCA9887 */
 	{0}
 };
 
@@ -81,8 +84,14 @@ static const struct ath10k_pci_supp_chip ath10k_pci_supp_chips[] = {
 
 	{ QCA99X0_2_0_DEVICE_ID, QCA99X0_HW_2_0_CHIP_ID_REV },
 
+	{ QCA9984_1_0_DEVICE_ID, QCA9984_HW_1_0_CHIP_ID_REV },
+
+	{ QCA9888_2_0_DEVICE_ID, QCA9888_HW_2_0_CHIP_ID_REV },
+
 	{ QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_0_CHIP_ID_REV },
 	{ QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_1_CHIP_ID_REV },
+
+	{ QCA9887_1_0_DEVICE_ID, QCA9887_HW_1_0_CHIP_ID_REV },
 };
 
 static void ath10k_pci_buffer_cleanup(struct ath10k *ar);
@@ -837,13 +846,16 @@ static u32 ath10k_pci_targ_cpu_to_ce_addr(struct ath10k *ar, u32 addr)
 
 	switch (ar->hw_rev) {
 	case ATH10K_HW_QCA988X:
+	case ATH10K_HW_QCA9887:
 	case ATH10K_HW_QCA6174:
 	case ATH10K_HW_QCA9377:
 		val = (ath10k_pci_read32(ar, SOC_CORE_BASE_ADDRESS +
 					  CORE_CTRL_ADDRESS) &
 		       0x7ff) << 21;
 		break;
+	case ATH10K_HW_QCA9888:
 	case ATH10K_HW_QCA99X0:
+	case ATH10K_HW_QCA9984:
 	case ATH10K_HW_QCA4019:
 		val = ath10k_pci_read32(ar, PCIE_BAR_REG_ADDRESS);
 		break;
@@ -864,7 +876,7 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	int ret = 0;
 	u32 *buf;
-	unsigned int completed_nbytes, orig_nbytes, remaining_bytes;
+	unsigned int completed_nbytes, alloc_nbytes, remaining_bytes;
 	struct ath10k_ce_pipe *ce_diag;
 	/* Host buffer address in CE space */
 	u32 ce_data;
@@ -882,9 +894,10 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
 	 *   1) 4-byte alignment
 	 *   2) Buffer in DMA-able space
 	 */
-	orig_nbytes = nbytes;
+	alloc_nbytes = min_t(unsigned int, nbytes, DIAG_TRANSFER_LIMIT);
+
 	data_buf = (unsigned char *)dma_alloc_coherent(ar->dev,
-						       orig_nbytes,
+						       alloc_nbytes,
 						       &ce_data_base,
 						       GFP_ATOMIC);
 
@@ -892,9 +905,9 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
 		ret = -ENOMEM;
 		goto done;
 	}
-	memset(data_buf, 0, orig_nbytes);
+	memset(data_buf, 0, alloc_nbytes);
 
-	remaining_bytes = orig_nbytes;
+	remaining_bytes = nbytes;
 	ce_data = ce_data_base;
 	while (remaining_bytes) {
 		nbytes = min_t(unsigned int, remaining_bytes,
@@ -954,19 +967,22 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
 		}
 
 		remaining_bytes -= nbytes;
+
+		if (ret) {
+			ath10k_warn(ar, "failed to read diag value at 0x%x: %d\n",
+				    address, ret);
+			break;
+		}
+		memcpy(data, data_buf, nbytes);
+
 		address += nbytes;
-		ce_data += nbytes;
+		data += nbytes;
 	}
 
 done:
-	if (ret == 0)
-		memcpy(data, data_buf, orig_nbytes);
-	else
-		ath10k_warn(ar, "failed to read diag value at 0x%x: %d\n",
-			    address, ret);
 
 	if (data_buf)
-		dma_free_coherent(ar->dev, orig_nbytes, data_buf,
+		dma_free_coherent(ar->dev, alloc_nbytes, data_buf,
 				  ce_data_base);
 
 	spin_unlock_bh(&ar_pci->ce_lock);
@@ -1560,6 +1576,7 @@ static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
 
 	switch (ar->hw_rev) {
 	case ATH10K_HW_QCA988X:
+	case ATH10K_HW_QCA9887:
 	case ATH10K_HW_QCA6174:
 	case ATH10K_HW_QCA9377:
 		val = ath10k_pci_read32(ar, SOC_CORE_BASE_ADDRESS +
@@ -1569,6 +1586,8 @@ static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
 				   CORE_CTRL_ADDRESS, val);
 		break;
 	case ATH10K_HW_QCA99X0:
+	case ATH10K_HW_QCA9984:
+	case ATH10K_HW_QCA9888:
 	case ATH10K_HW_QCA4019:
 		/* TODO: Find appropriate register configuration for QCA99X0
 		 *  to mask irq/MSI.
@@ -1583,6 +1602,7 @@ static void ath10k_pci_irq_msi_fw_unmask(struct ath10k *ar)
 
 	switch (ar->hw_rev) {
 	case ATH10K_HW_QCA988X:
+	case ATH10K_HW_QCA9887:
 	case ATH10K_HW_QCA6174:
 	case ATH10K_HW_QCA9377:
 		val = ath10k_pci_read32(ar, SOC_CORE_BASE_ADDRESS +
@@ -1592,6 +1612,8 @@ static void ath10k_pci_irq_msi_fw_unmask(struct ath10k *ar)
 				   CORE_CTRL_ADDRESS, val);
 		break;
 	case ATH10K_HW_QCA99X0:
+	case ATH10K_HW_QCA9984:
+	case ATH10K_HW_QCA9888:
 	case ATH10K_HW_QCA4019:
 		/* TODO: Find appropriate register configuration for QCA99X0
 		 *  to unmask irq/MSI.
@@ -1932,6 +1954,9 @@ static int ath10k_pci_get_num_banks(struct ath10k *ar)
 	switch (ar_pci->pdev->device) {
 	case QCA988X_2_0_DEVICE_ID:
 	case QCA99X0_2_0_DEVICE_ID:
+	case QCA9888_2_0_DEVICE_ID:
+	case QCA9984_1_0_DEVICE_ID:
+	case QCA9887_1_0_DEVICE_ID:
 		return 1;
 	case QCA6164_2_1_DEVICE_ID:
 	case QCA6174_2_1_DEVICE_ID:
@@ -2198,6 +2223,14 @@ static void ath10k_pci_fw_crashed_clear(struct ath10k *ar)
 	ath10k_pci_write32(ar, FW_INDICATOR_ADDRESS, val);
 }
 
+static bool ath10k_pci_has_device_gone(struct ath10k *ar)
+{
+	u32 val;
+
+	val = ath10k_pci_read32(ar, FW_INDICATOR_ADDRESS);
+	return (val == 0xffffffff);
+}
+
 /* this function effectively clears target memory controller assert line */
 static void ath10k_pci_warm_reset_si0(struct ath10k *ar)
 {
@@ -2293,16 +2326,20 @@ static int ath10k_pci_warm_reset(struct ath10k *ar)
 	return 0;
 }
 
+static int ath10k_pci_qca99x0_soft_chip_reset(struct ath10k *ar)
+{
+	ath10k_pci_irq_disable(ar);
+	return ath10k_pci_qca99x0_chip_reset(ar);
+}
+
 static int ath10k_pci_safe_chip_reset(struct ath10k *ar)
 {
-	if (QCA_REV_988X(ar) || QCA_REV_6174(ar)) {
-		return ath10k_pci_warm_reset(ar);
-	} else if (QCA_REV_99X0(ar)) {
-		ath10k_pci_irq_disable(ar);
-		return ath10k_pci_qca99x0_chip_reset(ar);
-	} else {
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+	if (!ar_pci->pci_soft_reset)
 		return -ENOTSUPP;
-	}
+
+	return ar_pci->pci_soft_reset(ar);
 }
 
 static int ath10k_pci_qca988x_chip_reset(struct ath10k *ar)
@@ -2437,16 +2474,12 @@ static int ath10k_pci_qca99x0_chip_reset(struct ath10k *ar)
 
 static int ath10k_pci_chip_reset(struct ath10k *ar)
 {
-	if (QCA_REV_988X(ar))
-		return ath10k_pci_qca988x_chip_reset(ar);
-	else if (QCA_REV_6174(ar))
-		return ath10k_pci_qca6174_chip_reset(ar);
-	else if (QCA_REV_9377(ar))
-		return ath10k_pci_qca6174_chip_reset(ar);
-	else if (QCA_REV_99X0(ar))
-		return ath10k_pci_qca99x0_chip_reset(ar);
-	else
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+	if (WARN_ON(!ar_pci->pci_hard_reset))
 		return -ENOTSUPP;
+
+	return ar_pci->pci_hard_reset(ar);
 }
 
 static int ath10k_pci_hif_power_up(struct ath10k *ar)
@@ -2559,6 +2592,144 @@ static int ath10k_pci_hif_resume(struct ath10k *ar)
 }
 #endif
 
+static bool ath10k_pci_validate_cal(void *data, size_t size)
+{
+	__le16 *cal_words = data;
+	u16 checksum = 0;
+	size_t i;
+
+	if (size % 2 != 0)
+		return false;
+
+	for (i = 0; i < size / 2; i++)
+		checksum ^= le16_to_cpu(cal_words[i]);
+
+	return checksum == 0xffff;
+}
+
+static void ath10k_pci_enable_eeprom(struct ath10k *ar)
+{
+	/* Enable SI clock */
+	ath10k_pci_soc_write32(ar, CLOCK_CONTROL_OFFSET, 0x0);
+
+	/* Configure GPIOs for I2C operation */
+	ath10k_pci_write32(ar,
+			   GPIO_BASE_ADDRESS + GPIO_PIN0_OFFSET +
+			   4 * QCA9887_1_0_I2C_SDA_GPIO_PIN,
+			   SM(QCA9887_1_0_I2C_SDA_PIN_CONFIG,
+			      GPIO_PIN0_CONFIG) |
+			   SM(1, GPIO_PIN0_PAD_PULL));
+
+	ath10k_pci_write32(ar,
+			   GPIO_BASE_ADDRESS + GPIO_PIN0_OFFSET +
+			   4 * QCA9887_1_0_SI_CLK_GPIO_PIN,
+			   SM(QCA9887_1_0_SI_CLK_PIN_CONFIG, GPIO_PIN0_CONFIG) |
+			   SM(1, GPIO_PIN0_PAD_PULL));
+
+	ath10k_pci_write32(ar,
+			   GPIO_BASE_ADDRESS +
+			   QCA9887_1_0_GPIO_ENABLE_W1TS_LOW_ADDRESS,
+			   1u << QCA9887_1_0_SI_CLK_GPIO_PIN);
+
+	/* In Swift ASIC - EEPROM clock will be (110MHz/512) = 214KHz */
+	ath10k_pci_write32(ar,
+			   SI_BASE_ADDRESS + SI_CONFIG_OFFSET,
+			   SM(1, SI_CONFIG_ERR_INT) |
+			   SM(1, SI_CONFIG_BIDIR_OD_DATA) |
+			   SM(1, SI_CONFIG_I2C) |
+			   SM(1, SI_CONFIG_POS_SAMPLE) |
+			   SM(1, SI_CONFIG_INACTIVE_DATA) |
+			   SM(1, SI_CONFIG_INACTIVE_CLK) |
+			   SM(8, SI_CONFIG_DIVIDER));
+}
+
+static int ath10k_pci_read_eeprom(struct ath10k *ar, u16 addr, u8 *out)
+{
+	u32 reg;
+	int wait_limit;
+
+	/* set device select byte and for the read operation */
+	reg = QCA9887_EEPROM_SELECT_READ |
+	      SM(addr, QCA9887_EEPROM_ADDR_LO) |
+	      SM(addr >> 8, QCA9887_EEPROM_ADDR_HI);
+	ath10k_pci_write32(ar, SI_BASE_ADDRESS + SI_TX_DATA0_OFFSET, reg);
+
+	/* write transmit data, transfer length, and START bit */
+	ath10k_pci_write32(ar, SI_BASE_ADDRESS + SI_CS_OFFSET,
+			   SM(1, SI_CS_START) | SM(1, SI_CS_RX_CNT) |
+			   SM(4, SI_CS_TX_CNT));
+
+	/* wait max 1 sec */
+	wait_limit = 100000;
+
+	/* wait for SI_CS_DONE_INT */
+	do {
+		reg = ath10k_pci_read32(ar, SI_BASE_ADDRESS + SI_CS_OFFSET);
+		if (MS(reg, SI_CS_DONE_INT))
+			break;
+
+		wait_limit--;
+		udelay(10);
+	} while (wait_limit > 0);
+
+	if (!MS(reg, SI_CS_DONE_INT)) {
+		ath10k_err(ar, "timeout while reading device EEPROM at %04x\n",
+			   addr);
+		return -ETIMEDOUT;
+	}
+
+	/* clear SI_CS_DONE_INT */
+	ath10k_pci_write32(ar, SI_BASE_ADDRESS + SI_CS_OFFSET, reg);
+
+	if (MS(reg, SI_CS_DONE_ERR)) {
+		ath10k_err(ar, "failed to read device EEPROM at %04x\n", addr);
+		return -EIO;
+	}
+
+	/* extract receive data */
+	reg = ath10k_pci_read32(ar, SI_BASE_ADDRESS + SI_RX_DATA0_OFFSET);
+	*out = reg;
+
+	return 0;
+}
+
+static int ath10k_pci_hif_fetch_cal_eeprom(struct ath10k *ar, void **data,
+					   size_t *data_len)
+{
+	u8 *caldata = NULL;
+	size_t calsize, i;
+	int ret;
+
+	if (!QCA_REV_9887(ar))
+		return -EOPNOTSUPP;
+
+	calsize = ar->hw_params.cal_data_len;
+	caldata = kmalloc(calsize, GFP_KERNEL);
+	if (!caldata)
+		return -ENOMEM;
+
+	ath10k_pci_enable_eeprom(ar);
+
+	for (i = 0; i < calsize; i++) {
+		ret = ath10k_pci_read_eeprom(ar, i, &caldata[i]);
+		if (ret)
+			goto err_free;
+	}
+
+	if (!ath10k_pci_validate_cal(caldata, calsize))
+		goto err_free;
+
+	*data = caldata;
+	*data_len = calsize;
+
+	return 0;
+
+err_free:
+	kfree(data);
+
+	return -EINVAL;
+}
+
 static const struct ath10k_hif_ops ath10k_pci_hif_ops = {
 	.tx_sg			= ath10k_pci_hif_tx_sg,
 	.diag_read		= ath10k_pci_hif_diag_read,
@@ -2578,6 +2749,7 @@ static const struct ath10k_hif_ops ath10k_pci_hif_ops = {
 	.suspend		= ath10k_pci_hif_suspend,
 	.resume			= ath10k_pci_hif_resume,
 #endif
+	.fetch_cal_eeprom	= ath10k_pci_hif_fetch_cal_eeprom,
 };
 
 /*
@@ -2591,6 +2763,9 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg)
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	int ret;
 
+	if (ath10k_pci_has_device_gone(ar))
+		return IRQ_NONE;
+
 	ret = ath10k_pci_force_wake(ar);
 	if (ret) {
 		ath10k_warn(ar, "failed to wake device up on irq: %d\n", ret);
@@ -2976,24 +3151,53 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
 	enum ath10k_hw_rev hw_rev;
 	u32 chip_id;
 	bool pci_ps;
+	int (*pci_soft_reset)(struct ath10k *ar);
+	int (*pci_hard_reset)(struct ath10k *ar);
 
 	switch (pci_dev->device) {
 	case QCA988X_2_0_DEVICE_ID:
 		hw_rev = ATH10K_HW_QCA988X;
 		pci_ps = false;
+		pci_soft_reset = ath10k_pci_warm_reset;
+		pci_hard_reset = ath10k_pci_qca988x_chip_reset;
+		break;
+	case QCA9887_1_0_DEVICE_ID:
+		dev_warn(&pdev->dev, "QCA9887 support is still experimental, there are likely bugs. You have been warned.\n");
+		hw_rev = ATH10K_HW_QCA9887;
+		pci_ps = false;
+		pci_soft_reset = ath10k_pci_warm_reset;
+		pci_hard_reset = ath10k_pci_qca988x_chip_reset;
 		break;
 	case QCA6164_2_1_DEVICE_ID:
 	case QCA6174_2_1_DEVICE_ID:
 		hw_rev = ATH10K_HW_QCA6174;
 		pci_ps = true;
+		pci_soft_reset = ath10k_pci_warm_reset;
+		pci_hard_reset = ath10k_pci_qca6174_chip_reset;
 		break;
 	case QCA99X0_2_0_DEVICE_ID:
 		hw_rev = ATH10K_HW_QCA99X0;
 		pci_ps = false;
+		pci_soft_reset = ath10k_pci_qca99x0_soft_chip_reset;
+		pci_hard_reset = ath10k_pci_qca99x0_chip_reset;
+		break;
+	case QCA9984_1_0_DEVICE_ID:
+		hw_rev = ATH10K_HW_QCA9984;
+		pci_ps = false;
+		pci_soft_reset = ath10k_pci_qca99x0_soft_chip_reset;
+		pci_hard_reset = ath10k_pci_qca99x0_chip_reset;
+		break;
+	case QCA9888_2_0_DEVICE_ID:
+		hw_rev = ATH10K_HW_QCA9888;
+		pci_ps = false;
+		pci_soft_reset = ath10k_pci_qca99x0_soft_chip_reset;
+		pci_hard_reset = ath10k_pci_qca99x0_chip_reset;
 		break;
 	case QCA9377_1_0_DEVICE_ID:
 		hw_rev = ATH10K_HW_QCA9377;
 		pci_ps = true;
+		pci_soft_reset = NULL;
+		pci_hard_reset = ath10k_pci_qca6174_chip_reset;
 		break;
 	default:
 		WARN_ON(1);
@@ -3018,6 +3222,8 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
 	ar->dev_id = pci_dev->device;
 	ar_pci->pci_ps = pci_ps;
 	ar_pci->bus_ops = &ath10k_pci_bus_ops;
+	ar_pci->pci_soft_reset = pci_soft_reset;
+	ar_pci->pci_hard_reset = pci_hard_reset;
 
 	ar->id.vendor = pdev->vendor;
 	ar->id.device = pdev->device;
@@ -3169,7 +3375,7 @@ static void __exit ath10k_pci_exit(void)
 module_exit(ath10k_pci_exit);
 
 MODULE_AUTHOR("Qualcomm Atheros");
-MODULE_DESCRIPTION("Driver support for Atheros QCA988X PCIe devices");
+MODULE_DESCRIPTION("Driver support for Qualcomm Atheros 802.11ac WLAN PCIe/AHB devices");
 MODULE_LICENSE("Dual BSD/GPL");
 
 /* QCA988x 2.0 firmware files */
@@ -3180,6 +3386,11 @@ MODULE_FIRMWARE(QCA988X_HW_2_0_FW_DIR "/" ATH10K_FW_API5_FILE);
 MODULE_FIRMWARE(QCA988X_HW_2_0_FW_DIR "/" QCA988X_HW_2_0_BOARD_DATA_FILE);
 MODULE_FIRMWARE(QCA988X_HW_2_0_FW_DIR "/" ATH10K_BOARD_API2_FILE);
 
+/* QCA9887 1.0 firmware files */
+MODULE_FIRMWARE(QCA9887_HW_1_0_FW_DIR "/" ATH10K_FW_API5_FILE);
+MODULE_FIRMWARE(QCA9887_HW_1_0_FW_DIR "/" QCA9887_HW_1_0_BOARD_DATA_FILE);
+MODULE_FIRMWARE(QCA9887_HW_1_0_FW_DIR "/" ATH10K_BOARD_API2_FILE);
+
 /* QCA6174 2.1 firmware files */
 MODULE_FIRMWARE(QCA6174_HW_2_1_FW_DIR "/" ATH10K_FW_API4_FILE);
 MODULE_FIRMWARE(QCA6174_HW_2_1_FW_DIR "/" ATH10K_FW_API5_FILE);
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index 959dc321b75e..6eca1df2ce60 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -234,6 +234,12 @@ struct ath10k_pci {
 
 	const struct ath10k_bus_ops *bus_ops;
 
+	/* Chip specific pci reset routine used to do a safe reset */
+	int (*pci_soft_reset)(struct ath10k *ar);
+
+	/* Chip specific pci full reset function */
+	int (*pci_hard_reset)(struct ath10k *ar);
+
 	/* Keep this entry in the last, memory for struct ath10k_ahb is
 	 * allocated (ahb support enabled case) in the continuation of
 	 * this struct.
diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h
index ca8d16884af1..034e7a54c5b2 100644
--- a/drivers/net/wireless/ath/ath10k/rx_desc.h
+++ b/drivers/net/wireless/ath/ath10k/rx_desc.h
@@ -656,26 +656,6 @@ struct rx_msdu_end {
  *		Reserved: HW should fill with zero.  FW should ignore.
  */
 
-#define RX_PPDU_START_SIG_RATE_SELECT_OFDM 0
-#define RX_PPDU_START_SIG_RATE_SELECT_CCK  1
-
-#define RX_PPDU_START_SIG_RATE_OFDM_48 0
-#define RX_PPDU_START_SIG_RATE_OFDM_24 1
-#define RX_PPDU_START_SIG_RATE_OFDM_12 2
-#define RX_PPDU_START_SIG_RATE_OFDM_6  3
-#define RX_PPDU_START_SIG_RATE_OFDM_54 4
-#define RX_PPDU_START_SIG_RATE_OFDM_36 5
-#define RX_PPDU_START_SIG_RATE_OFDM_18 6
-#define RX_PPDU_START_SIG_RATE_OFDM_9  7
-
-#define RX_PPDU_START_SIG_RATE_CCK_LP_11  0
-#define RX_PPDU_START_SIG_RATE_CCK_LP_5_5 1
-#define RX_PPDU_START_SIG_RATE_CCK_LP_2   2
-#define RX_PPDU_START_SIG_RATE_CCK_LP_1   3
-#define RX_PPDU_START_SIG_RATE_CCK_SP_11  4
-#define RX_PPDU_START_SIG_RATE_CCK_SP_5_5 5
-#define RX_PPDU_START_SIG_RATE_CCK_SP_2   6
-
 #define HTT_RX_PPDU_START_PREAMBLE_LEGACY        0x04
 #define HTT_RX_PPDU_START_PREAMBLE_HT            0x08
 #define HTT_RX_PPDU_START_PREAMBLE_HT_WITH_TXBF  0x09
@@ -711,25 +691,6 @@ struct rx_msdu_end {
 /* No idea what this flag means. It seems to be always set in rate. */
 #define RX_PPDU_START_RATE_FLAG BIT(3)
 
-enum rx_ppdu_start_rate {
-	RX_PPDU_START_RATE_OFDM_48M = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_OFDM_48M,
-	RX_PPDU_START_RATE_OFDM_24M = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_OFDM_24M,
-	RX_PPDU_START_RATE_OFDM_12M = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_OFDM_12M,
-	RX_PPDU_START_RATE_OFDM_6M  = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_OFDM_6M,
-	RX_PPDU_START_RATE_OFDM_54M = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_OFDM_54M,
-	RX_PPDU_START_RATE_OFDM_36M = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_OFDM_36M,
-	RX_PPDU_START_RATE_OFDM_18M = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_OFDM_18M,
-	RX_PPDU_START_RATE_OFDM_9M  = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_OFDM_9M,
-
-	RX_PPDU_START_RATE_CCK_LP_11M  = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_CCK_LP_11M,
-	RX_PPDU_START_RATE_CCK_LP_5_5M = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_CCK_LP_5_5M,
-	RX_PPDU_START_RATE_CCK_LP_2M   = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_CCK_LP_2M,
-	RX_PPDU_START_RATE_CCK_LP_1M   = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_CCK_LP_1M,
-	RX_PPDU_START_RATE_CCK_SP_11M  = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_CCK_SP_11M,
-	RX_PPDU_START_RATE_CCK_SP_5_5M = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_CCK_SP_5_5M,
-	RX_PPDU_START_RATE_CCK_SP_2M   = RX_PPDU_START_RATE_FLAG | ATH10K_HW_RATE_CCK_SP_2M,
-};
-
 struct rx_ppdu_start {
 	struct {
 		u8 pri20_mhz;
@@ -994,7 +955,41 @@ struct rx_pkt_end {
 	__le32 info0; /* %RX_PKT_END_INFO0_ */
 	__le32 phy_timestamp_1;
 	__le32 phy_timestamp_2;
-	__le32 rx_location_info; /* %RX_LOCATION_INFO_ */
+} __packed;
+
+#define RX_LOCATION_INFO0_RTT_FAC_LEGACY_MASK		0x00003fff
+#define RX_LOCATION_INFO0_RTT_FAC_LEGACY_LSB		0
+#define RX_LOCATION_INFO0_RTT_FAC_VHT_MASK		0x1fff8000
+#define RX_LOCATION_INFO0_RTT_FAC_VHT_LSB		15
+#define RX_LOCATION_INFO0_RTT_STRONGEST_CHAIN_MASK	0xc0000000
+#define RX_LOCATION_INFO0_RTT_STRONGEST_CHAIN_LSB	30
+#define RX_LOCATION_INFO0_RTT_FAC_LEGACY_STATUS		BIT(14)
+#define RX_LOCATION_INFO0_RTT_FAC_VHT_STATUS		BIT(29)
+
+#define RX_LOCATION_INFO1_RTT_PREAMBLE_TYPE_MASK	0x0000000c
+#define RX_LOCATION_INFO1_RTT_PREAMBLE_TYPE_LSB		2
+#define RX_LOCATION_INFO1_PKT_BW_MASK			0x00000030
+#define RX_LOCATION_INFO1_PKT_BW_LSB			4
+#define RX_LOCATION_INFO1_SKIP_P_SKIP_BTCF_MASK		0x0000ff00
+#define RX_LOCATION_INFO1_SKIP_P_SKIP_BTCF_LSB		8
+#define RX_LOCATION_INFO1_RTT_MSC_RATE_MASK		0x000f0000
+#define RX_LOCATION_INFO1_RTT_MSC_RATE_LSB		16
+#define RX_LOCATION_INFO1_RTT_PBD_LEG_BW_MASK		0x00300000
+#define RX_LOCATION_INFO1_RTT_PBD_LEG_BW_LSB		20
+#define RX_LOCATION_INFO1_TIMING_BACKOFF_MASK		0x07c00000
+#define RX_LOCATION_INFO1_TIMING_BACKOFF_LSB		22
+#define RX_LOCATION_INFO1_RTT_TX_FRAME_PHASE_MASK	0x18000000
+#define RX_LOCATION_INFO1_RTT_TX_FRAME_PHASE_LSB	27
+#define RX_LOCATION_INFO1_RTT_CFR_STATUS		BIT(0)
+#define RX_LOCATION_INFO1_RTT_CIR_STATUS		BIT(1)
+#define RX_LOCATION_INFO1_RTT_GI_TYPE			BIT(7)
+#define RX_LOCATION_INFO1_RTT_MAC_PHY_PHASE		BIT(29)
+#define RX_LOCATION_INFO1_RTT_TX_DATA_START_X_PHASE	BIT(30)
+#define RX_LOCATION_INFO1_RX_LOCATION_VALID		BIT(31)
+
+struct rx_location_info {
+	__le32 rx_location_info0; /* %RX_LOCATION_INFO0_ */
+	__le32 rx_location_info1; /* %RX_LOCATION_INFO1_ */
 } __packed;
 
 enum rx_phy_ppdu_end_info0 {
@@ -1067,6 +1062,17 @@ struct rx_phy_ppdu_end {
 
 struct rx_ppdu_end_qca99x0 {
 	struct rx_pkt_end rx_pkt_end;
+	__le32 rx_location_info; /* %RX_LOCATION_INFO_ */
+	struct rx_phy_ppdu_end rx_phy_ppdu_end;
+	__le32 rx_timing_offset; /* %RX_PPDU_END_RX_TIMING_OFFSET_ */
+	__le32 rx_info; /* %RX_PPDU_END_RX_INFO_ */
+	__le16 bb_length;
+	__le16 info1; /* %RX_PPDU_END_INFO1_ */
+} __packed;
+
+struct rx_ppdu_end_qca9984 {
+	struct rx_pkt_end rx_pkt_end;
+	struct rx_location_info rx_location_info;
 	struct rx_phy_ppdu_end rx_phy_ppdu_end;
 	__le32 rx_timing_offset; /* %RX_PPDU_END_RX_TIMING_OFFSET_ */
 	__le32 rx_info; /* %RX_PPDU_END_RX_INFO_ */
@@ -1080,6 +1086,7 @@ struct rx_ppdu_end {
 		struct rx_ppdu_end_qca988x qca988x;
 		struct rx_ppdu_end_qca6174 qca6174;
 		struct rx_ppdu_end_qca99x0 qca99x0;
+		struct rx_ppdu_end_qca9984 qca9984;
 	} __packed;
 } __packed;
 
diff --git a/drivers/net/wireless/ath/ath10k/spectral.c b/drivers/net/wireless/ath/ath10k/spectral.c
index 4671cfbcd8f7..7d9b0da1b010 100644
--- a/drivers/net/wireless/ath/ath10k/spectral.c
+++ b/drivers/net/wireless/ath/ath10k/spectral.c
@@ -101,9 +101,9 @@ int ath10k_spectral_process_fft(struct ath10k *ar,
 		break;
 	case 80:
 		/* TODO: As experiments with an analogue sender and various
-		 * configuaritions (fft-sizes of 64/128/256 and 20/40/80 Mhz)
+		 * configurations (fft-sizes of 64/128/256 and 20/40/80 Mhz)
 		 * show, the particular configuration of 80 MHz/64 bins does
-		 * not match with the other smaples at all. Until the reason
+		 * not match with the other samples at all. Until the reason
 		 * for that is found, don't report these samples.
 		 */
 		if (bin_len == 64)
diff --git a/drivers/net/wireless/ath/ath10k/targaddrs.h b/drivers/net/wireless/ath/ath10k/targaddrs.h
index 8e24099fa936..aaf53a81e78b 100644
--- a/drivers/net/wireless/ath/ath10k/targaddrs.h
+++ b/drivers/net/wireless/ath/ath10k/targaddrs.h
@@ -447,6 +447,9 @@ Fw Mode/SubMode Mask
 #define QCA988X_BOARD_DATA_SZ     7168
 #define QCA988X_BOARD_EXT_DATA_SZ 0
 
+#define QCA9887_BOARD_DATA_SZ     7168
+#define QCA9887_BOARD_EXT_DATA_SZ 0
+
 #define QCA6174_BOARD_DATA_SZ     8192
 #define QCA6174_BOARD_EXT_DATA_SZ 0
 
diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index 576e7c42ed65..b29a86a26c13 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -81,10 +81,11 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt,
 
 	skb_cb = ATH10K_SKB_CB(msdu);
 	txq = skb_cb->txq;
-	artxq = (void *)txq->drv_priv;
 
-	if (txq)
+	if (txq) {
+		artxq = (void *)txq->drv_priv;
 		artxq->num_fw_queued--;
+	}
 
 	ath10k_htt_tx_free_msdu_id(htt, tx_done->msdu_id);
 	ath10k_htt_tx_dec_pending(htt);
@@ -117,6 +118,9 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt,
 
 	ieee80211_tx_status(htt->ar->hw, msdu);
 	/* we do not own the msdu anymore */
+
+	ath10k_mac_tx_push_pending(ar);
+
 	return 0;
 }
 
@@ -213,6 +217,7 @@ void ath10k_peer_map_event(struct ath10k_htt *htt,
 	ath10k_dbg(ar, ATH10K_DBG_HTT, "htt peer map vdev %d peer %pM id %d\n",
 		   ev->vdev_id, ev->addr, ev->peer_id);
 
+	WARN_ON(ar->peer_map[ev->peer_id] && (ar->peer_map[ev->peer_id] != peer));
 	ar->peer_map[ev->peer_id] = peer;
 	set_bit(ev->peer_id, peer->peer_ids);
 exit:
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 2c300329ebc3..169cd2e783eb 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1104,6 +1104,7 @@ static struct wmi_pdev_param_map wmi_pdev_param_map = {
 	.wapi_mbssid_offset = WMI_PDEV_PARAM_UNSUPPORTED,
 	.arp_srcaddr = WMI_PDEV_PARAM_UNSUPPORTED,
 	.arp_dstaddr = WMI_PDEV_PARAM_UNSUPPORTED,
+	.enable_btcoex = WMI_PDEV_PARAM_UNSUPPORTED,
 };
 
 static struct wmi_pdev_param_map wmi_10x_pdev_param_map = {
@@ -1199,6 +1200,7 @@ static struct wmi_pdev_param_map wmi_10x_pdev_param_map = {
 	.wapi_mbssid_offset = WMI_PDEV_PARAM_UNSUPPORTED,
 	.arp_srcaddr = WMI_PDEV_PARAM_UNSUPPORTED,
 	.arp_dstaddr = WMI_PDEV_PARAM_UNSUPPORTED,
+	.enable_btcoex = WMI_PDEV_PARAM_UNSUPPORTED,
 };
 
 static struct wmi_pdev_param_map wmi_10_2_4_pdev_param_map = {
@@ -1294,6 +1296,7 @@ static struct wmi_pdev_param_map wmi_10_2_4_pdev_param_map = {
 	.wapi_mbssid_offset = WMI_PDEV_PARAM_UNSUPPORTED,
 	.arp_srcaddr = WMI_PDEV_PARAM_UNSUPPORTED,
 	.arp_dstaddr = WMI_PDEV_PARAM_UNSUPPORTED,
+	.enable_btcoex = WMI_PDEV_PARAM_UNSUPPORTED,
 };
 
 /* firmware 10.2 specific mappings */
@@ -1550,6 +1553,7 @@ static struct wmi_pdev_param_map wmi_10_4_pdev_param_map = {
 	.wapi_mbssid_offset = WMI_10_4_PDEV_PARAM_WAPI_MBSSID_OFFSET,
 	.arp_srcaddr = WMI_10_4_PDEV_PARAM_ARP_SRCADDR,
 	.arp_dstaddr = WMI_10_4_PDEV_PARAM_ARP_DSTADDR,
+	.enable_btcoex = WMI_10_4_PDEV_PARAM_ENABLE_BTCOEX,
 };
 
 static const struct wmi_peer_flags_map wmi_peer_flags_map = {
@@ -1822,7 +1826,7 @@ static struct sk_buff *
 ath10k_wmi_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu)
 {
 	struct ath10k_skb_cb *cb = ATH10K_SKB_CB(msdu);
-	struct ath10k_vif *arvif = (void *)cb->vif->drv_priv;
+	struct ath10k_vif *arvif;
 	struct wmi_mgmt_tx_cmd *cmd;
 	struct ieee80211_hdr *hdr;
 	struct sk_buff *skb;
@@ -1834,10 +1838,12 @@ ath10k_wmi_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu)
 	hdr = (struct ieee80211_hdr *)msdu->data;
 	fc = le16_to_cpu(hdr->frame_control);
 
-	if (cb->vif)
+	if (cb->vif) {
+		arvif = (void *)cb->vif->drv_priv;
 		vdev_id = arvif->vdev_id;
-	else
+	} else {
 		vdev_id = 0;
+	}
 
 	if (WARN_ON_ONCE(!ieee80211_is_mgmt(hdr->frame_control)))
 		return ERR_PTR(-EINVAL);
@@ -2920,6 +2926,7 @@ static int ath10k_wmi_10_4_op_pull_fw_stats(struct ath10k *ar,
 	u32 num_pdev_ext_stats;
 	u32 num_vdev_stats;
 	u32 num_peer_stats;
+	u32 num_bcnflt_stats;
 	u32 stats_id;
 	int i;
 
@@ -2930,6 +2937,7 @@ static int ath10k_wmi_10_4_op_pull_fw_stats(struct ath10k *ar,
 	num_pdev_ext_stats = __le32_to_cpu(ev->num_pdev_ext_stats);
 	num_vdev_stats = __le32_to_cpu(ev->num_vdev_stats);
 	num_peer_stats = __le32_to_cpu(ev->num_peer_stats);
+	num_bcnflt_stats = __le32_to_cpu(ev->num_bcnflt_stats);
 	stats_id = __le32_to_cpu(ev->stats_id);
 
 	for (i = 0; i < num_pdev_stats; i++) {
@@ -2970,32 +2978,57 @@ static int ath10k_wmi_10_4_op_pull_fw_stats(struct ath10k *ar,
 	/* fw doesn't implement vdev stats */
 
 	for (i = 0; i < num_peer_stats; i++) {
-		const struct wmi_10_4_peer_extd_stats *src;
+		const struct wmi_10_4_peer_stats *src;
 		struct ath10k_fw_stats_peer *dst;
-		int stats_len;
-		bool extd_peer_stats = !!(stats_id & WMI_10_4_STAT_PEER_EXTD);
-
-		if (extd_peer_stats)
-			stats_len = sizeof(struct wmi_10_4_peer_extd_stats);
-		else
-			stats_len = sizeof(struct wmi_10_4_peer_stats);
 
 		src = (void *)skb->data;
-		if (!skb_pull(skb, stats_len))
+		if (!skb_pull(skb, sizeof(*src)))
 			return -EPROTO;
 
 		dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
 		if (!dst)
 			continue;
 
-		ath10k_wmi_10_4_pull_peer_stats(&src->common, dst);
-		/* FIXME: expose 10.4 specific values */
-		if (extd_peer_stats)
-			dst->rx_duration = __le32_to_cpu(src->rx_duration);
-
+		ath10k_wmi_10_4_pull_peer_stats(src, dst);
 		list_add_tail(&dst->list, &stats->peers);
 	}
 
+	for (i = 0; i < num_bcnflt_stats; i++) {
+		const struct wmi_10_4_bss_bcn_filter_stats *src;
+
+		src = (void *)skb->data;
+		if (!skb_pull(skb, sizeof(*src)))
+			return -EPROTO;
+
+		/* FIXME: expose values to userspace
+		 *
+		 * Note: Even though this loop seems to do nothing it is
+		 * required to parse following sub-structures properly.
+		 */
+	}
+
+	if ((stats_id & WMI_10_4_STAT_PEER_EXTD) == 0)
+		return 0;
+
+	stats->extended = true;
+
+	for (i = 0; i < num_peer_stats; i++) {
+		const struct wmi_10_4_peer_extd_stats *src;
+		struct ath10k_fw_extd_stats_peer *dst;
+
+		src = (void *)skb->data;
+		if (!skb_pull(skb, sizeof(*src)))
+			return -EPROTO;
+
+		dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
+		if (!dst)
+			continue;
+
+		ether_addr_copy(dst->peer_macaddr, src->peer_macaddr.addr);
+		dst->rx_duration = __le32_to_cpu(src->rx_duration);
+		list_add_tail(&dst->list, &stats->peers_extd);
+	}
+
 	return 0;
 }
 
@@ -5253,6 +5286,9 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb)
 	case WMI_10_4_PEER_STA_KICKOUT_EVENTID:
 		ath10k_wmi_event_peer_sta_kickout(ar, skb);
 		break;
+	case WMI_10_4_ROAM_EVENTID:
+		ath10k_wmi_event_roam(ar, skb);
+		break;
 	case WMI_10_4_HOST_SWBA_EVENTID:
 		ath10k_wmi_event_host_swba(ar, skb);
 		break;
@@ -7899,6 +7935,7 @@ static const struct wmi_ops wmi_10_4_ops = {
 	.pull_phyerr = ath10k_wmi_10_4_op_pull_phyerr_ev,
 	.pull_svc_rdy = ath10k_wmi_main_op_pull_svc_rdy_ev,
 	.pull_rdy = ath10k_wmi_op_pull_rdy_ev,
+	.pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
 	.get_txbf_conf_scheme = ath10k_wmi_10_4_txbf_conf_scheme,
 
 	.gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 9fdf47ea27d0..3ef468893b3f 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -3447,6 +3447,7 @@ struct wmi_pdev_param_map {
 	u32 wapi_mbssid_offset;
 	u32 arp_srcaddr;
 	u32 arp_dstaddr;
+	u32 enable_btcoex;
 };
 
 #define WMI_PDEV_PARAM_UNSUPPORTED 0
@@ -3760,6 +3761,9 @@ enum wmi_10_4_pdev_param {
 	WMI_10_4_PDEV_PARAM_ATF_OBSS_NOISE_SCH,
 	WMI_10_4_PDEV_PARAM_ATF_OBSS_NOISE_SCALING_FACTOR,
 	WMI_10_4_PDEV_PARAM_CUST_TXPOWER_SCALE,
+	WMI_10_4_PDEV_PARAM_ATF_DYNAMIC_ENABLE,
+	WMI_10_4_PDEV_PARAM_ATF_SSID_GROUP_POLICY,
+	WMI_10_4_PDEV_PARAM_ENABLE_BTCOEX,
 };
 
 struct wmi_pdev_set_param_cmd {
@@ -4336,7 +4340,6 @@ struct wmi_10_4_peer_stats {
 } __packed;
 
 struct wmi_10_4_peer_extd_stats {
-	struct wmi_10_4_peer_stats common;
 	struct wmi_mac_addr peer_macaddr;
 	__le32 inactive_time;
 	__le32 peer_chain_rssi;
@@ -4344,6 +4347,19 @@ struct wmi_10_4_peer_extd_stats {
 	__le32 reserved[10];
 } __packed;
 
+struct wmi_10_4_bss_bcn_stats {
+	__le32 vdev_id;
+	__le32 bss_bcns_dropped;
+	__le32 bss_bcn_delivered;
+} __packed;
+
+struct wmi_10_4_bss_bcn_filter_stats {
+	__le32 bcns_dropped;
+	__le32 bcns_delivered;
+	__le32 active_filters;
+	struct wmi_10_4_bss_bcn_stats bss_stats;
+} __packed;
+
 struct wmi_10_2_pdev_ext_stats {
 	__le32 rx_rssi_comb;
 	__le32 rx_rssi[4];
diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c
index fc47b70988b1..f23c851765df 100644
--- a/drivers/net/wireless/ath/ath5k/pcu.c
+++ b/drivers/net/wireless/ath/ath5k/pcu.c
@@ -219,8 +219,8 @@ ath5k_hw_get_default_sifs(struct ath5k_hw *ah)
 		sifs = AR5K_INIT_SIFS_QUARTER_RATE;
 		break;
 	case AR5K_BWMODE_DEFAULT:
-		sifs = AR5K_INIT_SIFS_DEFAULT_BG;
 	default:
+		sifs = AR5K_INIT_SIFS_DEFAULT_BG;
 		if (channel->band == NL80211_BAND_5GHZ)
 			sifs = AR5K_INIT_SIFS_DEFAULT_A;
 		break;
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 4e11ba06f089..4ad6284fc37d 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -847,8 +847,6 @@ static int ath6kl_cfg80211_disconnect(struct wiphy *wiphy,
 
 	up(&ar->sem);
 
-	vif->sme_state = SME_DISCONNECTED;
-
 	return 0;
 }
 
@@ -859,7 +857,11 @@ void ath6kl_cfg80211_disconnect_event(struct ath6kl_vif *vif, u8 reason,
 	struct ath6kl *ar = vif->ar;
 
 	if (vif->scan_req) {
-		cfg80211_scan_done(vif->scan_req, true);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		cfg80211_scan_done(vif->scan_req, &info);
 		vif->scan_req = NULL;
 	}
 
@@ -1069,6 +1071,9 @@ static int ath6kl_cfg80211_scan(struct wiphy *wiphy,
 void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted)
 {
 	struct ath6kl *ar = vif->ar;
+	struct cfg80211_scan_info info = {
+		.aborted = aborted,
+	};
 	int i;
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: status%s\n", __func__,
@@ -1089,7 +1094,7 @@ void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted)
 	}
 
 out:
-	cfg80211_scan_done(vif->scan_req, aborted);
+	cfg80211_scan_done(vif->scan_req, &info);
 	vif->scan_req = NULL;
 }
 
@@ -1104,7 +1109,8 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq,
 
 	cfg80211_chandef_create(&chandef,
 				ieee80211_get_channel(vif->ar->wiphy, freq),
-				(mode == WMI_11G_HT20) ?
+				(mode == WMI_11G_HT20 &&
+				 ath6kl_band_2ghz.ht_cap.ht_supported) ?
 					NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT);
 
 	mutex_lock(&vif->wdev.mtx);
@@ -2971,6 +2977,7 @@ static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 
 	ath6kl_wmi_disconnect_cmd(ar->wmi, vif->fw_vif_idx);
 	clear_bit(CONNECTED, &vif->flags);
+	netif_carrier_off(vif->ndev);
 
 	/* Restore ht setting in firmware */
 	return ath6kl_restore_htcap(vif);
@@ -3614,7 +3621,11 @@ void ath6kl_cfg80211_vif_stop(struct ath6kl_vif *vif, bool wmi_ready)
 	}
 
 	if (vif->scan_req) {
-		cfg80211_scan_done(vif->scan_req, true);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		cfg80211_scan_done(vif->scan_req, &info);
 		vif->scan_req = NULL;
 	}
 
diff --git a/drivers/net/wireless/ath/ath6kl/core.h b/drivers/net/wireless/ath/ath6kl/core.h
index 7a1970e484a6..ac25f1781b42 100644
--- a/drivers/net/wireless/ath/ath6kl/core.h
+++ b/drivers/net/wireless/ath/ath6kl/core.h
@@ -148,7 +148,7 @@ enum ath6kl_fw_capability {
 	/* ratetable is the 2 stream version (max MCS15) */
 	ATH6KL_FW_CAPABILITY_RATETABLE_MCS15,
 
-	/* firmare doesn't support IP checksumming */
+	/* firmware doesn't support IP checksumming */
 	ATH6KL_FW_CAPABILITY_NO_IP_CHECKSUM,
 
 	/* this needs to be last */
diff --git a/drivers/net/wireless/ath/ath6kl/txrx.c b/drivers/net/wireless/ath/ath6kl/txrx.c
index 40432fe7a5d2..9df41d5e3249 100644
--- a/drivers/net/wireless/ath/ath6kl/txrx.c
+++ b/drivers/net/wireless/ath/ath6kl/txrx.c
@@ -1401,6 +1401,10 @@ void ath6kl_rx(struct htc_target *target, struct htc_packet *packet)
 		return;
 	}
 
+	pad_before_data_start =
+		(le16_to_cpu(dhdr->info3) >> WMI_DATA_HDR_PAD_BEFORE_DATA_SHIFT)
+			& WMI_DATA_HDR_PAD_BEFORE_DATA_MASK;
+
 	/* Get the Power save state of the STA */
 	if (vif->nw_type == AP_NETWORK) {
 		meta_type = wmi_data_hdr_get_meta(dhdr);
@@ -1408,7 +1412,7 @@ void ath6kl_rx(struct htc_target *target, struct htc_packet *packet)
 		ps_state = !!((dhdr->info >> WMI_DATA_HDR_PS_SHIFT) &
 			      WMI_DATA_HDR_PS_MASK);
 
-		offset = sizeof(struct wmi_data_hdr);
+		offset = sizeof(struct wmi_data_hdr) + pad_before_data_start;
 		trig_state = !!(le16_to_cpu(dhdr->info3) & WMI_DATA_HDR_TRIG);
 
 		switch (meta_type) {
@@ -1523,9 +1527,6 @@ void ath6kl_rx(struct htc_target *target, struct htc_packet *packet)
 	seq_no = wmi_data_hdr_get_seqno(dhdr);
 	meta_type = wmi_data_hdr_get_meta(dhdr);
 	dot11_hdr = wmi_data_hdr_get_dot11(dhdr);
-	pad_before_data_start =
-		(le16_to_cpu(dhdr->info3) >> WMI_DATA_HDR_PAD_BEFORE_DATA_SHIFT)
-			& WMI_DATA_HDR_PAD_BEFORE_DATA_MASK;
 
 	skb_pull(skb, sizeof(struct wmi_data_hdr));
 
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index 631c3a0c572b..b8cf04d11975 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -2544,8 +2544,7 @@ int ath6kl_wmi_create_pstream_cmd(struct wmi *wmi, u8 if_idx,
 	s32 nominal_phy = 0;
 	int ret;
 
-	if (!((params->user_pri < 8) &&
-	      (params->user_pri <= 0x7) &&
+	if (!((params->user_pri <= 0x7) &&
 	      (up_to_ac[params->user_pri & 0x7] == params->traffic_class) &&
 	      (params->traffic_direc == UPLINK_TRAFFIC ||
 	       params->traffic_direc == DNLINK_TRAFFIC ||
diff --git a/drivers/net/wireless/ath/ath9k/ahb.c b/drivers/net/wireless/ath/ath9k/ahb.c
index bd4a1a655f42..bea6186f745a 100644
--- a/drivers/net/wireless/ath/ath9k/ahb.c
+++ b/drivers/net/wireless/ath/ath9k/ahb.c
@@ -18,7 +18,6 @@
 
 #include <linux/nl80211.h>
 #include <linux/platform_device.h>
-#include <linux/ath9k_platform.h>
 #include <linux/module.h>
 #include "ath9k.h"
 
@@ -58,20 +57,9 @@ static void ath_ahb_read_cachesize(struct ath_common *common, int *csz)
 
 static bool ath_ahb_eeprom_read(struct ath_common *common, u32 off, u16 *data)
 {
-	struct ath_softc *sc = (struct ath_softc *)common->priv;
-	struct platform_device *pdev = to_platform_device(sc->dev);
-	struct ath9k_platform_data *pdata;
-
-	pdata = dev_get_platdata(&pdev->dev);
-	if (off >= (ARRAY_SIZE(pdata->eeprom_data))) {
-		ath_err(common,
-			"%s: flash read failed, offset %08x is out of range\n",
-			__func__, off);
-		return false;
-	}
-
-	*data = pdata->eeprom_data[off];
-	return true;
+	ath_err(common, "%s: eeprom data has to be provided externally\n",
+		__func__);
+	return false;
 }
 
 static struct ath_bus_ops ath_ahb_bus_ops  = {
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.c b/drivers/net/wireless/ath/ath9k/ar9002_phy.c
index 53d7445a5d12..61a9b85045d2 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.c
@@ -476,6 +476,7 @@ static void ar9002_hw_set_bt_ant_diversity(struct ath_hw *ah, bool enable)
 static void ar9002_hw_spectral_scan_config(struct ath_hw *ah,
 				    struct ath_spec_scan *param)
 {
+	u32 repeat_bit;
 	u8 count;
 
 	if (!param->enabled) {
@@ -486,12 +487,15 @@ static void ar9002_hw_spectral_scan_config(struct ath_hw *ah,
 	REG_SET_BIT(ah, AR_PHY_RADAR_0, AR_PHY_RADAR_0_FFT_ENA);
 	REG_SET_BIT(ah, AR_PHY_SPECTRAL_SCAN, AR_PHY_SPECTRAL_SCAN_ENABLE);
 
+	if (AR_SREV_9280(ah))
+		repeat_bit = AR_PHY_SPECTRAL_SCAN_SHORT_REPEAT;
+	else
+		repeat_bit = AR_PHY_SPECTRAL_SCAN_SHORT_REPEAT_KIWI;
+
 	if (param->short_repeat)
-		REG_SET_BIT(ah, AR_PHY_SPECTRAL_SCAN,
-			    AR_PHY_SPECTRAL_SCAN_SHORT_REPEAT);
+		REG_SET_BIT(ah, AR_PHY_SPECTRAL_SCAN, repeat_bit);
 	else
-		REG_CLR_BIT(ah, AR_PHY_SPECTRAL_SCAN,
-			    AR_PHY_SPECTRAL_SCAN_SHORT_REPEAT);
+		REG_CLR_BIT(ah, AR_PHY_SPECTRAL_SCAN, repeat_bit);
 
 	/* on AR92xx, the highest bit of count will make the the chip send
 	 * spectral samples endlessly. Check if this really was intended,
@@ -499,15 +503,25 @@ static void ar9002_hw_spectral_scan_config(struct ath_hw *ah,
 	 */
 	count = param->count;
 	if (param->endless) {
-		if (AR_SREV_9271(ah))
-			count = 0;
-		else
+		if (AR_SREV_9280(ah))
 			count = 0x80;
+		else
+			count = 0;
 	} else if (count & 0x80)
 		count = 0x7f;
+	else if (!count)
+		count = 1;
+
+	if (AR_SREV_9280(ah)) {
+		REG_RMW_FIELD(ah, AR_PHY_SPECTRAL_SCAN,
+			      AR_PHY_SPECTRAL_SCAN_COUNT, count);
+	} else {
+		REG_RMW_FIELD(ah, AR_PHY_SPECTRAL_SCAN,
+			      AR_PHY_SPECTRAL_SCAN_COUNT_KIWI, count);
+		REG_SET_BIT(ah, AR_PHY_SPECTRAL_SCAN,
+			    AR_PHY_SPECTRAL_SCAN_PHYERR_MASK_SELECT);
+	}
 
-	REG_RMW_FIELD(ah, AR_PHY_SPECTRAL_SCAN,
-		      AR_PHY_SPECTRAL_SCAN_COUNT, count);
 	REG_RMW_FIELD(ah, AR_PHY_SPECTRAL_SCAN,
 		      AR_PHY_SPECTRAL_SCAN_PERIOD, param->period);
 	REG_RMW_FIELD(ah, AR_PHY_SPECTRAL_SCAN,
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.h b/drivers/net/wireless/ath/ath9k/ar9002_phy.h
index 9d17a5375f64..2b58245f774a 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_phy.h
+++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.h
@@ -177,8 +177,11 @@
 #define AR_PHY_SPECTRAL_SCAN_PERIOD_S		8
 #define AR_PHY_SPECTRAL_SCAN_COUNT		0x00FF0000  /* Number of reports, reg 68, bits 16-23*/
 #define AR_PHY_SPECTRAL_SCAN_COUNT_S		16
+#define AR_PHY_SPECTRAL_SCAN_COUNT_KIWI		0x0FFF0000  /* Number of reports, reg 68, bits 16-27*/
+#define AR_PHY_SPECTRAL_SCAN_COUNT_KIWI_S	16
 #define AR_PHY_SPECTRAL_SCAN_SHORT_REPEAT	0x01000000  /* Short repeat, reg 68, bit 24*/
-#define AR_PHY_SPECTRAL_SCAN_SHORT_REPEAT_S	24  /* Short repeat, reg 68, bit 24*/
+#define AR_PHY_SPECTRAL_SCAN_SHORT_REPEAT_KIWI	0x10000000  /* Short repeat, reg 68, bit 28*/
+#define AR_PHY_SPECTRAL_SCAN_PHYERR_MASK_SELECT	0x40000000
 
 #define AR_PHY_RX_DELAY           0x9914
 #define AR_PHY_SEARCH_START_DELAY 0x9918
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_calib.c b/drivers/net/wireless/ath/ath9k/ar9003_calib.c
index 518e649ecff3..b6f064a8d264 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_calib.c
@@ -33,6 +33,7 @@ struct coeff {
 
 enum ar9003_cal_types {
 	IQ_MISMATCH_CAL = BIT(0),
+	TEMP_COMP_CAL = BIT(1),
 };
 
 static void ar9003_hw_setup_calibration(struct ath_hw *ah,
@@ -58,6 +59,12 @@ static void ar9003_hw_setup_calibration(struct ath_hw *ah,
 		/* Kick-off cal */
 		REG_SET_BIT(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_DO_CAL);
 		break;
+	case TEMP_COMP_CAL:
+		ath_dbg(common, CALIBRATE,
+			"starting Temperature Compensation Calibration\n");
+		REG_SET_BIT(ah, AR_CH0_THERM, AR_CH0_THERM_LOCAL);
+		REG_SET_BIT(ah, AR_CH0_THERM, AR_CH0_THERM_START);
+		break;
 	default:
 		ath_err(common, "Invalid calibration type\n");
 		break;
@@ -75,50 +82,51 @@ static bool ar9003_hw_per_calibration(struct ath_hw *ah,
 				      struct ath9k_cal_list *currCal)
 {
 	struct ath9k_hw_cal_data *caldata = ah->caldata;
-	/* Cal is assumed not done until explicitly set below */
-	bool iscaldone = false;
+	const struct ath9k_percal_data *cur_caldata = currCal->calData;
 
 	/* Calibration in progress. */
 	if (currCal->calState == CAL_RUNNING) {
 		/* Check to see if it has finished. */
-		if (!(REG_READ(ah, AR_PHY_TIMING4) & AR_PHY_TIMING4_DO_CAL)) {
+		if (REG_READ(ah, AR_PHY_TIMING4) & AR_PHY_TIMING4_DO_CAL)
+			return false;
+
+		/*
+		* Accumulate cal measures for active chains
+		*/
+		if (cur_caldata->calCollect)
+			cur_caldata->calCollect(ah);
+		ah->cal_samples++;
+
+		if (ah->cal_samples >= cur_caldata->calNumSamples) {
+			unsigned int i, numChains = 0;
+			for (i = 0; i < AR9300_MAX_CHAINS; i++) {
+				if (rxchainmask & (1 << i))
+					numChains++;
+			}
+
 			/*
-			* Accumulate cal measures for active chains
+			* Process accumulated data
 			*/
-			currCal->calData->calCollect(ah);
-			ah->cal_samples++;
-
-			if (ah->cal_samples >=
-			    currCal->calData->calNumSamples) {
-				unsigned int i, numChains = 0;
-				for (i = 0; i < AR9300_MAX_CHAINS; i++) {
-					if (rxchainmask & (1 << i))
-						numChains++;
-				}
-
-				/*
-				* Process accumulated data
-				*/
-				currCal->calData->calPostProc(ah, numChains);
+			if (cur_caldata->calPostProc)
+				cur_caldata->calPostProc(ah, numChains);
 
-				/* Calibration has finished. */
-				caldata->CalValid |= currCal->calData->calType;
-				currCal->calState = CAL_DONE;
-				iscaldone = true;
-			} else {
+			/* Calibration has finished. */
+			caldata->CalValid |= cur_caldata->calType;
+			currCal->calState = CAL_DONE;
+			return true;
+		} else {
 			/*
 			 * Set-up collection of another sub-sample until we
 			 * get desired number
 			 */
 			ar9003_hw_setup_calibration(ah, currCal);
-			}
 		}
-	} else if (!(caldata->CalValid & currCal->calData->calType)) {
+	} else if (!(caldata->CalValid & cur_caldata->calType)) {
 		/* If current cal is marked invalid in channel, kick it off */
 		ath9k_hw_reset_calibration(ah, currCal);
 	}
 
-	return iscaldone;
+	return false;
 }
 
 static int ar9003_hw_calibrate(struct ath_hw *ah, struct ath9k_channel *chan,
@@ -315,9 +323,16 @@ static const struct ath9k_percal_data iq_cal_single_sample = {
 	ar9003_hw_iqcalibrate
 };
 
+static const struct ath9k_percal_data temp_cal_single_sample = {
+	TEMP_COMP_CAL,
+	MIN_CAL_SAMPLES,
+	PER_MAX_LOG_COUNT,
+};
+
 static void ar9003_hw_init_cal_settings(struct ath_hw *ah)
 {
 	ah->iq_caldata.calData = &iq_cal_single_sample;
+	ah->temp_caldata.calData = &temp_cal_single_sample;
 
 	if (AR_SREV_9300_20_OR_LATER(ah)) {
 		ah->enabled_cals |= TX_IQ_CAL;
@@ -325,7 +340,7 @@ static void ar9003_hw_init_cal_settings(struct ath_hw *ah)
 			ah->enabled_cals |= TX_IQ_ON_AGC_CAL;
 	}
 
-	ah->supp_cals = IQ_MISMATCH_CAL;
+	ah->supp_cals = IQ_MISMATCH_CAL | TEMP_COMP_CAL;
 }
 
 #define OFF_UPPER_LT 24
@@ -1374,6 +1389,29 @@ static void ar9003_hw_cl_cal_post_proc(struct ath_hw *ah, bool is_reusable)
 	}
 }
 
+static void ar9003_hw_init_cal_common(struct ath_hw *ah)
+{
+	struct ath9k_hw_cal_data *caldata = ah->caldata;
+
+	/* Initialize list pointers */
+	ah->cal_list = ah->cal_list_last = ah->cal_list_curr = NULL;
+
+	INIT_CAL(&ah->iq_caldata);
+	INSERT_CAL(ah, &ah->iq_caldata);
+
+	INIT_CAL(&ah->temp_caldata);
+	INSERT_CAL(ah, &ah->temp_caldata);
+
+	/* Initialize current pointer to first element in list */
+	ah->cal_list_curr = ah->cal_list;
+
+	if (ah->cal_list_curr)
+		ath9k_hw_reset_calibration(ah, ah->cal_list_curr);
+
+	if (caldata)
+		caldata->CalValid = 0;
+}
+
 static bool ar9003_hw_init_cal_pcoem(struct ath_hw *ah,
 				     struct ath9k_channel *chan)
 {
@@ -1533,21 +1571,7 @@ skip_tx_iqcal:
 	/* Revert chainmask to runtime parameters */
 	ar9003_hw_set_chain_masks(ah, ah->rxchainmask, ah->txchainmask);
 
-	/* Initialize list pointers */
-	ah->cal_list = ah->cal_list_last = ah->cal_list_curr = NULL;
-
-	INIT_CAL(&ah->iq_caldata);
-	INSERT_CAL(ah, &ah->iq_caldata);
-	ath_dbg(common, CALIBRATE, "enabling IQ Calibration\n");
-
-	/* Initialize current pointer to first element in list */
-	ah->cal_list_curr = ah->cal_list;
-
-	if (ah->cal_list_curr)
-		ath9k_hw_reset_calibration(ah, ah->cal_list_curr);
-
-	if (caldata)
-		caldata->CalValid = 0;
+	ar9003_hw_init_cal_common(ah);
 
 	return true;
 }
@@ -1578,8 +1602,6 @@ static bool do_ar9003_agc_cal(struct ath_hw *ah)
 static bool ar9003_hw_init_cal_soc(struct ath_hw *ah,
 				   struct ath9k_channel *chan)
 {
-	struct ath_common *common = ath9k_hw_common(ah);
-	struct ath9k_hw_cal_data *caldata = ah->caldata;
 	bool txiqcal_done = false;
 	bool status = true;
 	bool run_agc_cal = false, sep_iq_cal = false;
@@ -1677,21 +1699,7 @@ skip_tx_iqcal:
 	/* Revert chainmask to runtime parameters */
 	ar9003_hw_set_chain_masks(ah, ah->rxchainmask, ah->txchainmask);
 
-	/* Initialize list pointers */
-	ah->cal_list = ah->cal_list_last = ah->cal_list_curr = NULL;
-
-	INIT_CAL(&ah->iq_caldata);
-	INSERT_CAL(ah, &ah->iq_caldata);
-	ath_dbg(common, CALIBRATE, "enabling IQ Calibration\n");
-
-	/* Initialize current pointer to first element in list */
-	ah->cal_list_curr = ah->cal_list;
-
-	if (ah->cal_list_curr)
-		ath9k_hw_reset_calibration(ah, ah->cal_list_curr);
-
-	if (caldata)
-		caldata->CalValid = 0;
+	ar9003_hw_init_cal_common(ah);
 
 	return true;
 }
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
index dec1a317a070..5bd2cbaf582d 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
@@ -3202,8 +3202,7 @@ static int ar9300_compress_decision(struct ath_hw *ah,
 			it, length);
 		break;
 	case _CompressBlock:
-		if (reference == 0) {
-		} else {
+		if (reference != 0) {
 			eep = ar9003_eeprom_struct_find_by_id(reference);
 			if (eep == NULL) {
 				ath_dbg(common, EEPROM,
@@ -4176,7 +4175,7 @@ static void ath9k_hw_ar9300_set_board_values(struct ath_hw *ah,
 	if (!AR_SREV_9330(ah) && !AR_SREV_9340(ah) && !AR_SREV_9531(ah))
 		ar9003_hw_internal_regulator_apply(ah);
 	ar9003_hw_apply_tuning_caps(ah);
-	ar9003_hw_apply_minccapwr_thresh(ah, chan);
+	ar9003_hw_apply_minccapwr_thresh(ah, is2ghz);
 	ar9003_hw_txend_to_xpa_off_apply(ah, is2ghz);
 	ar9003_hw_thermometer_apply(ah);
 	ar9003_hw_thermo_cal_apply(ah);
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h
index 566da789f97e..a171dbb29fbb 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h
+++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h
@@ -689,13 +689,6 @@
 #define AR_CH0_TOP_XPABIASLVL (AR_SREV_9550(ah) ? 0x3c0 : 0x300)
 #define AR_CH0_TOP_XPABIASLVL_S (AR_SREV_9550(ah) ? 6 : 8)
 
-#define AR_CH0_THERM	(AR_SREV_9300(ah) ? 0x16290 : \
-				((AR_SREV_9485(ah) ? 0x1628c : 0x16294)))
-#define AR_CH0_THERM_XPABIASLVL_MSB 0x3
-#define AR_CH0_THERM_XPABIASLVL_MSB_S 0
-#define AR_CH0_THERM_XPASHORT2GND 0x4
-#define AR_CH0_THERM_XPASHORT2GND_S 2
-
 #define AR_SWITCH_TABLE_COM_ALL (0xffff)
 #define AR_SWITCH_TABLE_COM_ALL_S (0)
 #define AR_SWITCH_TABLE_COM_AR9462_ALL (0xffffff)
@@ -712,15 +705,17 @@
 #define AR_SWITCH_TABLE_ALL (0xfff)
 #define AR_SWITCH_TABLE_ALL_S (0)
 
-#define AR_PHY_65NM_CH0_THERM       (AR_SREV_9300(ah) ? 0x16290 :\
-				     ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x16294 : 0x1628c))
+#define AR_CH0_THERM       (AR_SREV_9300(ah) ? 0x16290 :\
+			    ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x16294 : 0x1628c))
+#define AR_CH0_THERM_XPABIASLVL_MSB 0x3
+#define AR_CH0_THERM_XPABIASLVL_MSB_S 0
+#define AR_CH0_THERM_XPASHORT2GND 0x4
+#define AR_CH0_THERM_XPASHORT2GND_S 2
 
-#define AR_PHY_65NM_CH0_THERM_LOCAL   0x80000000
-#define AR_PHY_65NM_CH0_THERM_LOCAL_S 31
-#define AR_PHY_65NM_CH0_THERM_START   0x20000000
-#define AR_PHY_65NM_CH0_THERM_START_S 29
-#define AR_PHY_65NM_CH0_THERM_SAR_ADC_OUT   0x0000ff00
-#define AR_PHY_65NM_CH0_THERM_SAR_ADC_OUT_S 8
+#define AR_CH0_THERM_LOCAL   0x80000000
+#define AR_CH0_THERM_START   0x20000000
+#define AR_CH0_THERM_SAR_ADC_OUT   0x0000ff00
+#define AR_CH0_THERM_SAR_ADC_OUT_S 8
 
 #define AR_CH0_TOP2		(AR_SREV_9300(ah) ? 0x1628c : \
 					(AR_SREV_9462(ah) ? 0x16290 : 0x16284))
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index 93b3793cce2f..26fc8ecfe8c4 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -637,6 +637,8 @@ struct ath9k_vif_iter_data {
 	int nwds;      /* number of WDS vifs */
 	int nadhocs;   /* number of adhoc vifs */
 	int nocbs;     /* number of OCB vifs */
+	int nbcnvifs;  /* number of beaconing vifs */
+	struct ieee80211_vif *primary_beacon_vif;
 	struct ieee80211_vif *primary_sta;
 };
 
@@ -685,10 +687,11 @@ struct ath_beacon {
 };
 
 void ath9k_beacon_tasklet(unsigned long data);
-void ath9k_beacon_config(struct ath_softc *sc, struct ieee80211_vif *vif,
-			 u32 changed);
+void ath9k_beacon_config(struct ath_softc *sc, struct ieee80211_vif *main_vif,
+			 bool beacons);
 void ath9k_beacon_assign_slot(struct ath_softc *sc, struct ieee80211_vif *vif);
 void ath9k_beacon_remove_slot(struct ath_softc *sc, struct ieee80211_vif *vif);
+void ath9k_beacon_ensure_primary_slot(struct ath_softc *sc);
 void ath9k_set_beacon(struct ath_softc *sc);
 bool ath9k_csa_is_finished(struct ath_softc *sc, struct ieee80211_vif *vif);
 void ath9k_csa_update(struct ath_softc *sc);
diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c
index 5cf0cd7cb2d1..e36f947e19fc 100644
--- a/drivers/net/wireless/ath/ath9k/beacon.c
+++ b/drivers/net/wireless/ath/ath9k/beacon.c
@@ -50,7 +50,7 @@ static void ath9k_beaconq_config(struct ath_softc *sc)
 		txq = sc->tx.txq_map[IEEE80211_AC_BE];
 		ath9k_hw_get_txq_props(ah, txq->axq_qnum, &qi_be);
 		qi.tqi_aifs = qi_be.tqi_aifs;
-		if (ah->slottime == ATH9K_SLOT_TIME_20)
+		if (ah->slottime == 20)
 			qi.tqi_cwmin = 2*qi_be.tqi_cwmin;
 		else
 			qi.tqi_cwmin = 4*qi_be.tqi_cwmin;
@@ -209,7 +209,6 @@ void ath9k_beacon_assign_slot(struct ath_softc *sc, struct ieee80211_vif *vif)
 	}
 
 	sc->beacon.bslot[avp->av_bslot] = vif;
-	sc->nbcnvifs++;
 
 	ath_dbg(common, CONFIG, "Added interface at beacon slot: %d\n",
 		avp->av_bslot);
@@ -220,15 +219,12 @@ void ath9k_beacon_remove_slot(struct ath_softc *sc, struct ieee80211_vif *vif)
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
 	struct ath_vif *avp = (void *)vif->drv_priv;
 	struct ath_buf *bf = avp->av_bcbuf;
-	struct ath_beacon_config *cur_conf = &sc->cur_chan->beacon;
 
 	ath_dbg(common, CONFIG, "Removing interface at beacon slot: %d\n",
 		avp->av_bslot);
 
 	tasklet_disable(&sc->bcon_tasklet);
 
-	cur_conf->enable_beacon &= ~BIT(avp->av_bslot);
-
 	if (bf && bf->bf_mpdu) {
 		struct sk_buff *skb = bf->bf_mpdu;
 		dma_unmap_single(sc->dev, bf->bf_buf_addr,
@@ -240,12 +236,73 @@ void ath9k_beacon_remove_slot(struct ath_softc *sc, struct ieee80211_vif *vif)
 
 	avp->av_bcbuf = NULL;
 	sc->beacon.bslot[avp->av_bslot] = NULL;
-	sc->nbcnvifs--;
 	list_add_tail(&bf->list, &sc->beacon.bbuf);
 
 	tasklet_enable(&sc->bcon_tasklet);
 }
 
+void ath9k_beacon_ensure_primary_slot(struct ath_softc *sc)
+{
+	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
+	struct ieee80211_vif *vif;
+	struct ath_vif *avp;
+	s64 tsfadjust;
+	u32 offset;
+	int first_slot = ATH_BCBUF;
+	int slot;
+
+	tasklet_disable(&sc->bcon_tasklet);
+
+	/* Find first taken slot. */
+	for (slot = 0; slot < ATH_BCBUF; slot++) {
+		if (sc->beacon.bslot[slot]) {
+			first_slot = slot;
+			break;
+		}
+	}
+	if (first_slot == 0)
+		goto out;
+
+	/* Re-enumarate all slots, moving them forward. */
+	for (slot = 0; slot < ATH_BCBUF; slot++) {
+		if (slot + first_slot < ATH_BCBUF) {
+			vif = sc->beacon.bslot[slot + first_slot];
+			sc->beacon.bslot[slot] = vif;
+
+			if (vif) {
+				avp = (void *)vif->drv_priv;
+				avp->av_bslot = slot;
+			}
+		} else {
+			sc->beacon.bslot[slot] = NULL;
+		}
+	}
+
+	vif = sc->beacon.bslot[0];
+	if (WARN_ON(!vif))
+		goto out;
+
+	/* Get the tsf_adjust value for the new first slot. */
+	avp = (void *)vif->drv_priv;
+	tsfadjust = le64_to_cpu(avp->tsf_adjust);
+
+	ath_dbg(common, CONFIG,
+		"Adjusting global TSF after beacon slot reassignment: %lld\n",
+		(signed long long)tsfadjust);
+
+	/* Modify TSF as required and update the HW. */
+	avp->chanctx->tsf_val += tsfadjust;
+	if (sc->cur_chan == avp->chanctx) {
+		offset = ath9k_hw_get_tsf_offset(&avp->chanctx->tsf_ts, NULL);
+		ath9k_hw_settsf64(sc->sc_ah, avp->chanctx->tsf_val + offset);
+	}
+
+	/* The slots tsf_adjust will be updated by ath9k_beacon_config later. */
+
+out:
+	tasklet_enable(&sc->bcon_tasklet);
+}
+
 static int ath9k_beacon_choose_slot(struct ath_softc *sc)
 {
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
@@ -274,22 +331,33 @@ static int ath9k_beacon_choose_slot(struct ath_softc *sc)
 	return slot;
 }
 
-static void ath9k_set_tsfadjust(struct ath_softc *sc, struct ieee80211_vif *vif)
+static void ath9k_set_tsfadjust(struct ath_softc *sc,
+				struct ath_beacon_config *cur_conf)
 {
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
-	struct ath_vif *avp = (void *)vif->drv_priv;
-	struct ath_beacon_config *cur_conf = &avp->chanctx->beacon;
-	u32 tsfadjust;
+	s64 tsfadjust;
+	int slot;
 
-	if (avp->av_bslot == 0)
-		return;
+	for (slot = 0; slot < ATH_BCBUF; slot++) {
+		struct ath_vif *avp;
 
-	tsfadjust = cur_conf->beacon_interval * avp->av_bslot;
-	tsfadjust = TU_TO_USEC(tsfadjust) / ATH_BCBUF;
-	avp->tsf_adjust = cpu_to_le64(tsfadjust);
+		if (!sc->beacon.bslot[slot])
+			continue;
 
-	ath_dbg(common, CONFIG, "tsfadjust is: %llu for bslot: %d\n",
-		(unsigned long long)tsfadjust, avp->av_bslot);
+		avp = (void *)sc->beacon.bslot[slot]->drv_priv;
+
+		/* tsf_adjust is added to the TSF value. We send out the
+		 * beacon late, so need to adjust the TSF starting point to be
+		 * later in time (i.e. the theoretical first beacon has a TSF
+		 * of 0 after correction).
+		 */
+		tsfadjust = cur_conf->beacon_interval * avp->av_bslot;
+		tsfadjust = -TU_TO_USEC(tsfadjust) / ATH_BCBUF;
+		avp->tsf_adjust = cpu_to_le64(tsfadjust);
+
+		ath_dbg(common, CONFIG, "tsfadjust is: %lld for bslot: %d\n",
+			(signed long long)tsfadjust, avp->av_bslot);
+	}
 }
 
 bool ath9k_csa_is_finished(struct ath_softc *sc, struct ieee80211_vif *vif)
@@ -443,20 +511,28 @@ void ath9k_beacon_tasklet(unsigned long data)
  * Both nexttbtt and intval have to be in usecs.
  */
 static void ath9k_beacon_init(struct ath_softc *sc, u32 nexttbtt,
-			      u32 intval, bool reset_tsf)
+			      u32 intval)
 {
 	struct ath_hw *ah = sc->sc_ah;
 
 	ath9k_hw_disable_interrupts(ah);
-	if (reset_tsf)
-		ath9k_hw_reset_tsf(ah);
 	ath9k_beaconq_config(sc);
 	ath9k_hw_beaconinit(ah, nexttbtt, intval);
+	ah->imask |= ATH9K_INT_SWBA;
 	sc->beacon.bmisscnt = 0;
 	ath9k_hw_set_interrupts(ah);
 	ath9k_hw_enable_interrupts(ah);
 }
 
+static void ath9k_beacon_stop(struct ath_softc *sc)
+{
+	ath9k_hw_disable_interrupts(sc->sc_ah);
+	sc->sc_ah->imask &= ~(ATH9K_INT_SWBA | ATH9K_INT_BMISS);
+	sc->beacon.bmisscnt = 0;
+	ath9k_hw_set_interrupts(sc->sc_ah);
+	ath9k_hw_enable_interrupts(sc->sc_ah);
+}
+
 /*
  * For multi-bss ap support beacons are either staggered evenly over N slots or
  * burst together.  For the former arrange for the SWBA to be delivered for each
@@ -468,7 +544,7 @@ static void ath9k_beacon_config_ap(struct ath_softc *sc,
 	struct ath_hw *ah = sc->sc_ah;
 
 	ath9k_cmn_beacon_config_ap(ah, conf, ATH_BCBUF);
-	ath9k_beacon_init(sc, conf->nexttbtt, conf->intval, false);
+	ath9k_beacon_init(sc, conf->nexttbtt, conf->intval);
 }
 
 static void ath9k_beacon_config_sta(struct ath_hw *ah,
@@ -497,7 +573,7 @@ static void ath9k_beacon_config_adhoc(struct ath_softc *sc,
 
 	ath9k_cmn_beacon_config_adhoc(ah, conf);
 
-	ath9k_beacon_init(sc, conf->nexttbtt, conf->intval, conf->ibss_creator);
+	ath9k_beacon_init(sc, conf->nexttbtt, conf->intval);
 
 	/*
 	 * Set the global 'beacon has been configured' flag for the
@@ -507,44 +583,6 @@ static void ath9k_beacon_config_adhoc(struct ath_softc *sc,
 		set_bit(ATH_OP_BEACONS, &common->op_flags);
 }
 
-static bool ath9k_allow_beacon_config(struct ath_softc *sc,
-				      struct ieee80211_vif *vif)
-{
-	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
-	struct ath_vif *avp = (void *)vif->drv_priv;
-
-	if (ath9k_is_chanctx_enabled()) {
-		/*
-		 * If the VIF is not present in the current channel context,
-		 * then we can't do the usual opmode checks. Allow the
-		 * beacon config for the VIF to be updated in this case and
-		 * return immediately.
-		 */
-		if (sc->cur_chan != avp->chanctx)
-			return true;
-	}
-
-	if (sc->sc_ah->opmode == NL80211_IFTYPE_AP) {
-		if (vif->type != NL80211_IFTYPE_AP) {
-			ath_dbg(common, CONFIG,
-				"An AP interface is already present !\n");
-			return false;
-		}
-	}
-
-	if (sc->sc_ah->opmode == NL80211_IFTYPE_STATION) {
-		if ((vif->type == NL80211_IFTYPE_STATION) &&
-		    test_bit(ATH_OP_BEACONS, &common->op_flags) &&
-		    vif != sc->cur_chan->primary_sta) {
-			ath_dbg(common, CONFIG,
-				"Beacon already configured for a station interface\n");
-			return false;
-		}
-	}
-
-	return true;
-}
-
 static void ath9k_cache_beacon_config(struct ath_softc *sc,
 				      struct ath_chanctx *ctx,
 				      struct ieee80211_bss_conf *bss_conf)
@@ -580,87 +618,79 @@ static void ath9k_cache_beacon_config(struct ath_softc *sc,
 	if (cur_conf->dtim_period == 0)
 		cur_conf->dtim_period = 1;
 
+	ath9k_set_tsfadjust(sc, cur_conf);
 }
 
-void ath9k_beacon_config(struct ath_softc *sc, struct ieee80211_vif *vif,
-			 u32 changed)
+void ath9k_beacon_config(struct ath_softc *sc, struct ieee80211_vif *main_vif,
+			 bool beacons)
 {
-	struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
-        struct ath_hw *ah = sc->sc_ah;
-        struct ath_common *common = ath9k_hw_common(ah);
-	struct ath_vif *avp = (void *)vif->drv_priv;
-	struct ath_chanctx *ctx = avp->chanctx;
+	struct ath_hw *ah = sc->sc_ah;
+	struct ath_common *common = ath9k_hw_common(ah);
+	struct ath_vif *avp;
+	struct ath_chanctx *ctx;
 	struct ath_beacon_config *cur_conf;
 	unsigned long flags;
+	bool enabled;
 	bool skip_beacon = false;
 
-	if (!ctx)
+	if (!beacons) {
+		clear_bit(ATH_OP_BEACONS, &common->op_flags);
+		ath9k_beacon_stop(sc);
 		return;
+	}
 
-	cur_conf = &avp->chanctx->beacon;
-	if (vif->type == NL80211_IFTYPE_AP)
-		ath9k_set_tsfadjust(sc, vif);
-
-	if (!ath9k_allow_beacon_config(sc, vif))
+	if (WARN_ON(!main_vif))
 		return;
 
-	if (vif->type == NL80211_IFTYPE_STATION) {
-		ath9k_cache_beacon_config(sc, ctx, bss_conf);
-		if (ctx != sc->cur_chan)
-			return;
+	avp = (void *)main_vif->drv_priv;
+	ctx = avp->chanctx;
+	cur_conf = &ctx->beacon;
+	enabled = cur_conf->enable_beacon;
+	cur_conf->enable_beacon = beacons;
+
+	if (sc->sc_ah->opmode == NL80211_IFTYPE_STATION) {
+		ath9k_cache_beacon_config(sc, ctx, &main_vif->bss_conf);
 
 		ath9k_set_beacon(sc);
 		set_bit(ATH_OP_BEACONS, &common->op_flags);
 		return;
 	}
 
-	/*
-	 * Take care of multiple interfaces when
-	 * enabling/disabling SWBA.
-	 */
-	if (changed & BSS_CHANGED_BEACON_ENABLED) {
-		bool enabled = cur_conf->enable_beacon;
-
-		if (!bss_conf->enable_beacon) {
-			cur_conf->enable_beacon &= ~BIT(avp->av_bslot);
-		} else {
-			cur_conf->enable_beacon |= BIT(avp->av_bslot);
-			if (!enabled)
-				ath9k_cache_beacon_config(sc, ctx, bss_conf);
-		}
-	}
-
-	if (ctx != sc->cur_chan)
-		return;
+	/* Update the beacon configuration. */
+	ath9k_cache_beacon_config(sc, ctx, &main_vif->bss_conf);
 
 	/*
 	 * Configure the HW beacon registers only when we have a valid
 	 * beacon interval.
 	 */
 	if (cur_conf->beacon_interval) {
-		/*
-		 * If we are joining an existing IBSS network, start beaconing
-		 * only after a TSF-sync has taken place. Ensure that this
-		 * happens by setting the appropriate flags.
+		/* Special case to sync the TSF when joining an existing IBSS.
+		 * This is only done if no AP interface is active.
+		 * Note that mac80211 always resets the TSF when creating a new
+		 * IBSS interface.
 		 */
-		if ((changed & BSS_CHANGED_IBSS) && !bss_conf->ibss_creator &&
-		    bss_conf->enable_beacon) {
+		if (sc->sc_ah->opmode == NL80211_IFTYPE_ADHOC &&
+		    !enabled && beacons && !main_vif->bss_conf.ibss_creator) {
 			spin_lock_irqsave(&sc->sc_pm_lock, flags);
 			sc->ps_flags |= PS_BEACON_SYNC | PS_WAIT_FOR_BEACON;
 			spin_unlock_irqrestore(&sc->sc_pm_lock, flags);
 			skip_beacon = true;
-		} else {
-			ath9k_set_beacon(sc);
 		}
 
 		/*
 		 * Do not set the ATH_OP_BEACONS flag for IBSS joiner mode
 		 * here, it is done in ath9k_beacon_config_adhoc().
 		 */
-		if (cur_conf->enable_beacon && !skip_beacon)
+		if (beacons && !skip_beacon) {
 			set_bit(ATH_OP_BEACONS, &common->op_flags);
-		else
+			ath9k_set_beacon(sc);
+		} else {
 			clear_bit(ATH_OP_BEACONS, &common->op_flags);
+			ath9k_beacon_stop(sc);
+		}
+	} else {
+		clear_bit(ATH_OP_BEACONS, &common->op_flags);
+		ath9k_beacon_stop(sc);
 	}
 }
 
diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c
index e56bafcf5864..57e26a640477 100644
--- a/drivers/net/wireless/ath/ath9k/channel.c
+++ b/drivers/net/wireless/ath/ath9k/channel.c
@@ -960,6 +960,9 @@ void ath_roc_complete(struct ath_softc *sc, enum ath_roc_complete_reason reason)
 void ath_scan_complete(struct ath_softc *sc, bool abort)
 {
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
+	struct cfg80211_scan_info info = {
+		.aborted = abort,
+	};
 
 	if (abort)
 		ath_dbg(common, CHAN_CTX, "HW scan aborted\n");
@@ -969,7 +972,7 @@ void ath_scan_complete(struct ath_softc *sc, bool abort)
 	sc->offchannel.scan_req = NULL;
 	sc->offchannel.scan_vif = NULL;
 	sc->offchannel.state = ATH_OFFCHANNEL_IDLE;
-	ieee80211_scan_completed(sc->hw, abort);
+	ieee80211_scan_completed(sc->hw, &info);
 	clear_bit(ATH_OP_SCANNING, &common->op_flags);
 	spin_lock_bh(&sc->chan_lock);
 	if (test_bit(ATH_OP_MULTI_CHANNEL, &common->op_flags))
diff --git a/drivers/net/wireless/ath/ath9k/common.h b/drivers/net/wireless/ath/ath9k/common.h
index d23737342f4f..f0ab6f9955e4 100644
--- a/drivers/net/wireless/ath/ath9k/common.h
+++ b/drivers/net/wireless/ath/ath9k/common.h
@@ -50,6 +50,7 @@
 #define IEEE80211_MS_TO_TU(x)   (((x) * 1000) / 1024)
 
 struct ath_beacon_config {
+	struct ieee80211_vif *main_vif;
 	int beacon_interval;
 	u16 dtim_period;
 	u16 bmiss_timeout;
diff --git a/drivers/net/wireless/ath/ath9k/dynack.c b/drivers/net/wireless/ath/ath9k/dynack.c
index d2ff0fc0484c..7334c9b09e82 100644
--- a/drivers/net/wireless/ath/ath9k/dynack.c
+++ b/drivers/net/wireless/ath/ath9k/dynack.c
@@ -280,7 +280,7 @@ EXPORT_SYMBOL(ath_dynack_sample_ack_ts);
 void ath_dynack_node_init(struct ath_hw *ah, struct ath_node *an)
 {
 	/* ackto = slottime + sifs + air delay */
-	u32 ackto = ATH9K_SLOT_TIME_9 + 16 + 64;
+	u32 ackto = 9 + 16 + 64;
 	struct ath_dynack *da = &ah->dynack;
 
 	an->ackto = ackto;
@@ -315,7 +315,7 @@ EXPORT_SYMBOL(ath_dynack_node_deinit);
 void ath_dynack_reset(struct ath_hw *ah)
 {
 	/* ackto = slottime + sifs + air delay */
-	u32 ackto = ATH9K_SLOT_TIME_9 + 16 + 64;
+	u32 ackto = 9 + 16 + 64;
 	struct ath_dynack *da = &ah->dynack;
 
 	da->lto = jiffies;
diff --git a/drivers/net/wireless/ath/ath9k/eeprom.c b/drivers/net/wireless/ath/ath9k/eeprom.c
index a794157a147d..a449588a8009 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom.c
@@ -15,6 +15,7 @@
  */
 
 #include "hw.h"
+#include <linux/ath9k_platform.h>
 
 void ath9k_hw_analog_shift_regwrite(struct ath_hw *ah, u32 reg, u32 val)
 {
@@ -108,26 +109,42 @@ void ath9k_hw_usb_gen_fill_eeprom(struct ath_hw *ah, u16 *eep_data,
 	}
 }
 
-static bool ath9k_hw_nvram_read_blob(struct ath_hw *ah, u32 off,
-				     u16 *data)
+static bool ath9k_hw_nvram_read_array(u16 *blob, size_t blob_size,
+				      off_t offset, u16 *data)
 {
-	u16 *blob_data;
-
-	if (off * sizeof(u16) > ah->eeprom_blob->size)
+	if (offset > blob_size)
 		return false;
 
-	blob_data = (u16 *)ah->eeprom_blob->data;
-	*data =  blob_data[off];
+	*data =  blob[offset];
 	return true;
 }
 
+static bool ath9k_hw_nvram_read_pdata(struct ath9k_platform_data *pdata,
+				      off_t offset, u16 *data)
+{
+	return ath9k_hw_nvram_read_array(pdata->eeprom_data,
+					 ARRAY_SIZE(pdata->eeprom_data),
+					 offset, data);
+}
+
+static bool ath9k_hw_nvram_read_firmware(const struct firmware *eeprom_blob,
+					 off_t offset, u16 *data)
+{
+	return ath9k_hw_nvram_read_array((u16 *) eeprom_blob->data,
+					 eeprom_blob->size / sizeof(u16),
+					 offset, data);
+}
+
 bool ath9k_hw_nvram_read(struct ath_hw *ah, u32 off, u16 *data)
 {
 	struct ath_common *common = ath9k_hw_common(ah);
+	struct ath9k_platform_data *pdata = ah->dev->platform_data;
 	bool ret;
 
 	if (ah->eeprom_blob)
-		ret = ath9k_hw_nvram_read_blob(ah, off, data);
+		ret = ath9k_hw_nvram_read_firmware(ah->eeprom_blob, off, data);
+	else if (pdata && !pdata->use_eeprom && pdata->eeprom_data)
+		ret = ath9k_hw_nvram_read_pdata(pdata, off, data);
 	else
 		ret = common->bus_ops->eeprom_read(common, off, data);
 
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
index e6bcb4c90fa0..2c0e4d26e8f9 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
@@ -45,7 +45,7 @@ void ath9k_htc_beaconq_config(struct ath9k_htc_priv *priv)
 		 * Long slot time  : 2x cwmin
 		 * Short slot time : 4x cwmin
 		 */
-		if (ah->slottime == ATH9K_SLOT_TIME_20)
+		if (ah->slottime == 20)
 			qi.tqi_cwmin = 2*qi_be.tqi_cwmin;
 		else
 			qi.tqi_cwmin = 4*qi_be.tqi_cwmin;
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index c148c6c504f7..b65c1b661ade 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -678,7 +678,7 @@ static int ath9k_init_priv(struct ath9k_htc_priv *priv,
 
 	for (i = 0; i < ATH9K_HTC_MAX_BCN_VIF; i++)
 		priv->beacon.bslot[i] = NULL;
-	priv->beacon.slottime = ATH9K_SLOT_TIME_9;
+	priv->beacon.slottime = 9;
 
 	ath9k_cmn_init_channels_rates(common);
 	ath9k_cmn_init_crypto(ah);
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index 8b2895f9ac7a..d1d0c06d627c 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -454,7 +454,7 @@ static void ath9k_hw_init_defaults(struct ath_hw *ah)
 	if (AR_SREV_9100(ah))
 		ah->sta_id1_defaults |= AR_STA_ID1_AR9100_BA_FIX;
 
-	ah->slottime = ATH9K_SLOT_TIME_9;
+	ah->slottime = 9;
 	ah->globaltxtimeout = (u32) -1;
 	ah->power_mode = ATH9K_PM_UNDEFINED;
 	ah->htc_reset_init = true;
@@ -471,33 +471,34 @@ static void ath9k_hw_init_defaults(struct ath_hw *ah)
 		ah->tx_trig_level = (AR_FTRIG_512B >> AR_FTRIG_S);
 }
 
-static int ath9k_hw_init_macaddr(struct ath_hw *ah)
+static void ath9k_hw_init_macaddr(struct ath_hw *ah)
 {
 	struct ath_common *common = ath9k_hw_common(ah);
-	u32 sum;
 	int i;
 	u16 eeval;
 	static const u32 EEP_MAC[] = { EEP_MAC_LSW, EEP_MAC_MID, EEP_MAC_MSW };
 
-	sum = 0;
+	/* MAC address may already be loaded via ath9k_platform_data */
+	if (is_valid_ether_addr(common->macaddr))
+		return;
+
 	for (i = 0; i < 3; i++) {
 		eeval = ah->eep_ops->get_eeprom(ah, EEP_MAC[i]);
-		sum += eeval;
 		common->macaddr[2 * i] = eeval >> 8;
 		common->macaddr[2 * i + 1] = eeval & 0xff;
 	}
-	if (!is_valid_ether_addr(common->macaddr)) {
-		ath_err(common,
-			"eeprom contains invalid mac address: %pM\n",
-			common->macaddr);
 
-		random_ether_addr(common->macaddr);
-		ath_err(common,
-			"random mac address will be used: %pM\n",
-			common->macaddr);
-	}
+	if (is_valid_ether_addr(common->macaddr))
+		return;
 
-	return 0;
+	ath_err(common, "eeprom contains invalid mac address: %pM\n",
+		common->macaddr);
+
+	random_ether_addr(common->macaddr);
+	ath_err(common, "random mac address will be used: %pM\n",
+		common->macaddr);
+
+	return;
 }
 
 static int ath9k_hw_post_init(struct ath_hw *ah)
@@ -636,12 +637,7 @@ static int __ath9k_hw_init(struct ath_hw *ah)
 	if (r)
 		return r;
 
-	r = ath9k_hw_init_macaddr(ah);
-	if (r) {
-		ath_err(common, "Failed to initialize MAC address\n");
-		return r;
-	}
-
+	ath9k_hw_init_macaddr(ah);
 	ath9k_hw_init_hang_checks(ah);
 
 	common->state = ATH_HW_INITIALIZED;
@@ -1832,8 +1828,9 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
 	u32 saveLedState;
 	u32 saveDefAntenna;
 	u32 macStaId1;
+	struct timespec tsf_ts;
+	u32 tsf_offset;
 	u64 tsf = 0;
-	s64 usec = 0;
 	int r;
 	bool start_mci_reset = false;
 	bool save_fullsleep = ah->chip_fullsleep;
@@ -1877,8 +1874,8 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
 	macStaId1 = REG_READ(ah, AR_STA_ID1) & AR_STA_ID1_BASE_RATE_11B;
 
 	/* Save TSF before chip reset, a cold reset clears it */
+	getrawmonotonic(&tsf_ts);
 	tsf = ath9k_hw_gettsf64(ah);
-	usec = ktime_to_us(ktime_get_raw());
 
 	saveLedState = REG_READ(ah, AR_CFG_LED) &
 		(AR_CFG_LED_ASSOC_CTL | AR_CFG_LED_MODE_SEL |
@@ -1911,8 +1908,8 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
 	}
 
 	/* Restore TSF */
-	usec = ktime_to_us(ktime_get_raw()) - usec;
-	ath9k_hw_settsf64(ah, tsf + usec);
+	tsf_offset = ath9k_hw_get_tsf_offset(&tsf_ts, NULL);
+	ath9k_hw_settsf64(ah, tsf + tsf_offset);
 
 	if (AR_SREV_9280_20_OR_LATER(ah))
 		REG_SET_BIT(ah, AR_GPIO_INPUT_EN_VAL, AR_GPIO_JTAG_DISABLE);
@@ -1932,12 +1929,11 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
 	/*
 	 * Some AR91xx SoC devices frequently fail to accept TSF writes
 	 * right after the chip reset. When that happens, write a new
-	 * value after the initvals have been applied, with an offset
-	 * based on measured time difference
+	 * value after the initvals have been applied.
 	 */
 	if (AR_SREV_9100(ah) && (ath9k_hw_gettsf64(ah) < tsf)) {
-		tsf += 1500;
-		ath9k_hw_settsf64(ah, tsf);
+		tsf_offset = ath9k_hw_get_tsf_offset(&tsf_ts, NULL);
+		ath9k_hw_settsf64(ah, tsf + tsf_offset);
 	}
 
 	ath9k_hw_init_mfp(ah);
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 9cbca1229bac..2a5d3ad1169c 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -830,6 +830,7 @@ struct ath_hw {
 	/* Calibration */
 	u32 supp_cals;
 	struct ath9k_cal_list iq_caldata;
+	struct ath9k_cal_list temp_caldata;
 	struct ath9k_cal_list adcgain_caldata;
 	struct ath9k_cal_list adcdc_caldata;
 	struct ath9k_cal_list *cal_list;
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 2ee8624755f7..edc74fca60aa 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -372,7 +372,7 @@ static void ath9k_init_misc(struct ath_softc *sc)
 
 	common->last_rssi = ATH_RSSI_DUMMY_MARKER;
 	memcpy(common->bssidmask, ath_bcast_mac, ETH_ALEN);
-	sc->beacon.slottime = ATH9K_SLOT_TIME_9;
+	sc->beacon.slottime = 9;
 
 	for (i = 0; i < ARRAY_SIZE(sc->beacon.bslot); i++)
 		sc->beacon.bslot[i] = NULL;
@@ -512,31 +512,52 @@ static void ath9k_eeprom_release(struct ath_softc *sc)
 	release_firmware(sc->sc_ah->eeprom_blob);
 }
 
-static int ath9k_init_soc_platform(struct ath_softc *sc)
+static int ath9k_init_platform(struct ath_softc *sc)
 {
 	struct ath9k_platform_data *pdata = sc->dev->platform_data;
 	struct ath_hw *ah = sc->sc_ah;
-	int ret = 0;
+	struct ath_common *common = ath9k_hw_common(ah);
+	int ret;
 
 	if (!pdata)
 		return 0;
 
+	if (!pdata->use_eeprom) {
+		ah->ah_flags &= ~AH_USE_EEPROM;
+		ah->gpio_mask = pdata->gpio_mask;
+		ah->gpio_val = pdata->gpio_val;
+		ah->led_pin = pdata->led_pin;
+		ah->is_clk_25mhz = pdata->is_clk_25mhz;
+		ah->get_mac_revision = pdata->get_mac_revision;
+		ah->external_reset = pdata->external_reset;
+		ah->disable_2ghz = pdata->disable_2ghz;
+		ah->disable_5ghz = pdata->disable_5ghz;
+
+		if (!pdata->endian_check)
+			ah->ah_flags |= AH_NO_EEP_SWAP;
+	}
+
 	if (pdata->eeprom_name) {
 		ret = ath9k_eeprom_request(sc, pdata->eeprom_name);
 		if (ret)
 			return ret;
 	}
 
+	if (pdata->led_active_high)
+		ah->config.led_active_high = true;
+
 	if (pdata->tx_gain_buffalo)
 		ah->config.tx_gain_buffalo = true;
 
-	return ret;
+	if (pdata->macaddr)
+		ether_addr_copy(common->macaddr, pdata->macaddr);
+
+	return 0;
 }
 
 static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 			    const struct ath_bus_ops *bus_ops)
 {
-	struct ath9k_platform_data *pdata = sc->dev->platform_data;
 	struct ath_hw *ah = NULL;
 	struct ath9k_hw_capabilities *pCap;
 	struct ath_common *common;
@@ -550,6 +571,8 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 	ah->dev = sc->dev;
 	ah->hw = sc->hw;
 	ah->hw_version.devid = devid;
+	ah->ah_flags |= AH_USE_EEPROM;
+	ah->led_pin = -1;
 	ah->reg_ops.read = ath9k_ioread32;
 	ah->reg_ops.multi_read = ath9k_multi_ioread32;
 	ah->reg_ops.write = ath9k_iowrite32;
@@ -569,22 +592,6 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 	if (!ath9k_is_chanctx_enabled())
 		sc->cur_chan->hw_queue_base = 0;
 
-	if (!pdata || pdata->use_eeprom) {
-		ah->ah_flags |= AH_USE_EEPROM;
-		sc->sc_ah->led_pin = -1;
-	} else {
-		sc->sc_ah->gpio_mask = pdata->gpio_mask;
-		sc->sc_ah->gpio_val = pdata->gpio_val;
-		sc->sc_ah->led_pin = pdata->led_pin;
-		ah->is_clk_25mhz = pdata->is_clk_25mhz;
-		ah->get_mac_revision = pdata->get_mac_revision;
-		ah->external_reset = pdata->external_reset;
-		ah->disable_2ghz = pdata->disable_2ghz;
-		ah->disable_5ghz = pdata->disable_5ghz;
-		if (!pdata->endian_check)
-			ah->ah_flags |= AH_NO_EEP_SWAP;
-	}
-
 	common->ops = &ah->reg_ops;
 	common->bus_ops = bus_ops;
 	common->ps_ops = &ath9k_ps_ops;
@@ -600,7 +607,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 	 */
 	ath9k_init_pcoem_platform(sc);
 
-	ret = ath9k_init_soc_platform(sc);
+	ret = ath9k_init_platform(sc);
 	if (ret)
 		return ret;
 
@@ -646,9 +653,6 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 	if (ret)
 		goto err_hw;
 
-	if (pdata && pdata->macaddr)
-		memcpy(common->macaddr, pdata->macaddr, ETH_ALEN);
-
 	ret = ath9k_init_queues(sc);
 	if (ret)
 		goto err_queues;
diff --git a/drivers/net/wireless/ath/ath9k/mac.h b/drivers/net/wireless/ath/ath9k/mac.h
index 7fbf7f965f61..3bab01435a86 100644
--- a/drivers/net/wireless/ath/ath9k/mac.h
+++ b/drivers/net/wireless/ath/ath9k/mac.h
@@ -65,10 +65,6 @@
 #define INIT_SSH_RETRY  32
 #define INIT_SLG_RETRY  32
 
-#define ATH9K_SLOT_TIME_6 6
-#define ATH9K_SLOT_TIME_9 9
-#define ATH9K_SLOT_TIME_20 20
-
 #define ATH9K_TXERR_XRETRY         0x01
 #define ATH9K_TXERR_FILT           0x02
 #define ATH9K_TXERR_FIFO           0x04
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 8b6398850657..7594650f214f 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -910,6 +910,22 @@ static bool ath9k_uses_beacons(int type)
 	}
 }
 
+static void ath9k_vif_iter_set_beacon(struct ath9k_vif_iter_data *iter_data,
+				      struct ieee80211_vif *vif)
+{
+	/* Use the first (configured) interface, but prefering AP interfaces. */
+	if (!iter_data->primary_beacon_vif) {
+		iter_data->primary_beacon_vif = vif;
+	} else {
+		if (iter_data->primary_beacon_vif->type != NL80211_IFTYPE_AP &&
+		    vif->type == NL80211_IFTYPE_AP)
+		iter_data->primary_beacon_vif = vif;
+	}
+
+	iter_data->beacons = true;
+	iter_data->nbcnvifs += 1;
+}
+
 static void ath9k_vif_iter(struct ath9k_vif_iter_data *iter_data,
 			   u8 *mac, struct ieee80211_vif *vif)
 {
@@ -926,11 +942,13 @@ static void ath9k_vif_iter(struct ath9k_vif_iter_data *iter_data,
 	}
 
 	if (!vif->bss_conf.use_short_slot)
-		iter_data->slottime = ATH9K_SLOT_TIME_20;
+		iter_data->slottime = 20;
 
 	switch (vif->type) {
 	case NL80211_IFTYPE_AP:
 		iter_data->naps++;
+		if (vif->bss_conf.enable_beacon)
+			ath9k_vif_iter_set_beacon(iter_data, vif);
 		break;
 	case NL80211_IFTYPE_STATION:
 		iter_data->nstations++;
@@ -943,12 +961,12 @@ static void ath9k_vif_iter(struct ath9k_vif_iter_data *iter_data,
 	case NL80211_IFTYPE_ADHOC:
 		iter_data->nadhocs++;
 		if (vif->bss_conf.enable_beacon)
-			iter_data->beacons = true;
+			ath9k_vif_iter_set_beacon(iter_data, vif);
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
 		iter_data->nmeshes++;
 		if (vif->bss_conf.enable_beacon)
-			iter_data->beacons = true;
+			ath9k_vif_iter_set_beacon(iter_data, vif);
 		break;
 	case NL80211_IFTYPE_WDS:
 		iter_data->nwds++;
@@ -999,7 +1017,7 @@ void ath9k_calculate_iter_data(struct ath_softc *sc,
 	 */
 	memset(iter_data, 0, sizeof(*iter_data));
 	eth_broadcast_addr(iter_data->mask);
-	iter_data->slottime = ATH9K_SLOT_TIME_9;
+	iter_data->slottime = 9;
 
 	list_for_each_entry(avp, &ctx->vifs, list)
 		ath9k_vif_iter(iter_data, avp->vif->addr, avp->vif);
@@ -1061,7 +1079,7 @@ static void ath9k_set_offchannel_state(struct ath_softc *sc)
 	ah->opmode = vif->type;
 	ah->imask &= ~ATH9K_INT_SWBA;
 	ah->imask &= ~ATH9K_INT_TSFOOR;
-	ah->slottime = ATH9K_SLOT_TIME_9;
+	ah->slottime = 9;
 
 	ath_hw_setbssidmask(common);
 	ath9k_hw_setopmode(ah);
@@ -1081,7 +1099,6 @@ void ath9k_calculate_summary_state(struct ath_softc *sc,
 	struct ath_hw *ah = sc->sc_ah;
 	struct ath_common *common = ath9k_hw_common(ah);
 	struct ath9k_vif_iter_data iter_data;
-	struct ath_beacon_config *cur_conf;
 
 	ath_chanctx_check_active(sc, ctx);
 
@@ -1103,13 +1120,12 @@ void ath9k_calculate_summary_state(struct ath_softc *sc,
 	ath_hw_setbssidmask(common);
 
 	if (iter_data.naps > 0) {
-		cur_conf = &ctx->beacon;
 		ath9k_hw_set_tsfadjust(ah, true);
 		ah->opmode = NL80211_IFTYPE_AP;
-		if (cur_conf->enable_beacon)
-			iter_data.beacons = true;
 	} else {
 		ath9k_hw_set_tsfadjust(ah, false);
+		if (iter_data.beacons)
+			ath9k_beacon_ensure_primary_slot(sc);
 
 		if (iter_data.nmeshes)
 			ah->opmode = NL80211_IFTYPE_MESH_POINT;
@@ -1134,7 +1150,6 @@ void ath9k_calculate_summary_state(struct ath_softc *sc,
 			ctx->switch_after_beacon = true;
 	}
 
-	ah->imask &= ~ATH9K_INT_SWBA;
 	if (ah->opmode == NL80211_IFTYPE_STATION) {
 		bool changed = (iter_data.primary_sta != ctx->primary_sta);
 
@@ -1151,16 +1166,12 @@ void ath9k_calculate_summary_state(struct ath_softc *sc,
 			if (ath9k_hw_mci_is_enabled(sc->sc_ah))
 				ath9k_mci_update_wlan_channels(sc, true);
 		}
-	} else if (iter_data.beacons) {
-		ah->imask |= ATH9K_INT_SWBA;
 	}
+	sc->nbcnvifs = iter_data.nbcnvifs;
+	ath9k_beacon_config(sc, iter_data.primary_beacon_vif,
+			    iter_data.beacons);
 	ath9k_hw_set_interrupts(ah);
 
-	if (iter_data.beacons)
-		set_bit(ATH_OP_BEACONS, &common->op_flags);
-	else
-		clear_bit(ATH_OP_BEACONS, &common->op_flags);
-
 	if (ah->slottime != iter_data.slottime) {
 		ah->slottime = iter_data.slottime;
 		ath9k_hw_init_global_settings(ah);
@@ -1777,9 +1788,7 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
 	if ((changed & BSS_CHANGED_BEACON_ENABLED) ||
 	    (changed & BSS_CHANGED_BEACON_INT) ||
 	    (changed & BSS_CHANGED_BEACON_INFO)) {
-		ath9k_beacon_config(sc, vif, changed);
-		if (changed & BSS_CHANGED_BEACON_ENABLED)
-			ath9k_calculate_summary_state(sc, avp->chanctx);
+		ath9k_calculate_summary_state(sc, avp->chanctx);
 	}
 
 	if ((avp->chanctx == sc->cur_chan) &&
@@ -1788,6 +1797,7 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
 			slottime = 9;
 		else
 			slottime = 20;
+
 		if (vif->type == NL80211_IFTYPE_AP) {
 			/*
 			 * Defer update, so that connected stations can adjust
@@ -1823,11 +1833,19 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
 static u64 ath9k_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 {
 	struct ath_softc *sc = hw->priv;
+	struct ath_vif *avp = (void *)vif->drv_priv;
 	u64 tsf;
 
 	mutex_lock(&sc->mutex);
 	ath9k_ps_wakeup(sc);
-	tsf = ath9k_hw_gettsf64(sc->sc_ah);
+	/* Get current TSF either from HW or kernel time. */
+	if (sc->cur_chan == avp->chanctx) {
+		tsf = ath9k_hw_gettsf64(sc->sc_ah);
+	} else {
+		tsf = sc->cur_chan->tsf_val +
+		      ath9k_hw_get_tsf_offset(&sc->cur_chan->tsf_ts, NULL);
+	}
+	tsf += le64_to_cpu(avp->tsf_adjust);
 	ath9k_ps_restore(sc);
 	mutex_unlock(&sc->mutex);
 
@@ -1839,10 +1857,15 @@ static void ath9k_set_tsf(struct ieee80211_hw *hw,
 			  u64 tsf)
 {
 	struct ath_softc *sc = hw->priv;
+	struct ath_vif *avp = (void *)vif->drv_priv;
 
 	mutex_lock(&sc->mutex);
 	ath9k_ps_wakeup(sc);
-	ath9k_hw_settsf64(sc->sc_ah, tsf);
+	tsf -= le64_to_cpu(avp->tsf_adjust);
+	getrawmonotonic(&avp->chanctx->tsf_ts);
+	if (sc->cur_chan == avp->chanctx)
+		ath9k_hw_settsf64(sc->sc_ah, tsf);
+	avp->chanctx->tsf_val = tsf;
 	ath9k_ps_restore(sc);
 	mutex_unlock(&sc->mutex);
 }
@@ -1850,11 +1873,15 @@ static void ath9k_set_tsf(struct ieee80211_hw *hw,
 static void ath9k_reset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 {
 	struct ath_softc *sc = hw->priv;
+	struct ath_vif *avp = (void *)vif->drv_priv;
 
 	mutex_lock(&sc->mutex);
 
 	ath9k_ps_wakeup(sc);
-	ath9k_hw_reset_tsf(sc->sc_ah);
+	getrawmonotonic(&avp->chanctx->tsf_ts);
+	if (sc->cur_chan == avp->chanctx)
+		ath9k_hw_reset_tsf(sc->sc_ah);
+	avp->chanctx->tsf_val = 0;
 	ath9k_ps_restore(sc);
 
 	mutex_unlock(&sc->mutex);
diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index 7cdaf40c3057..0dd454acf22a 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c
@@ -19,7 +19,6 @@
 #include <linux/nl80211.h>
 #include <linux/pci.h>
 #include <linux/pci-aspm.h>
-#include <linux/ath9k_platform.h>
 #include <linux/module.h>
 #include "ath9k.h"
 
@@ -786,35 +785,21 @@ static void ath_pci_read_cachesize(struct ath_common *common, int *csz)
 
 static bool ath_pci_eeprom_read(struct ath_common *common, u32 off, u16 *data)
 {
-	struct ath_softc *sc = (struct ath_softc *) common->priv;
-	struct ath9k_platform_data *pdata = sc->dev->platform_data;
-
-	if (pdata && !pdata->use_eeprom) {
-		if (off >= (ARRAY_SIZE(pdata->eeprom_data))) {
-			ath_err(common,
-				"%s: eeprom read failed, offset %08x is out of range\n",
-				__func__, off);
-		}
-
-		*data = pdata->eeprom_data[off];
-	} else {
-		struct ath_hw *ah = (struct ath_hw *) common->ah;
-
-		common->ops->read(ah, AR5416_EEPROM_OFFSET +
-				      (off << AR5416_EEPROM_S));
-
-		if (!ath9k_hw_wait(ah,
-				   AR_EEPROM_STATUS_DATA,
-				   AR_EEPROM_STATUS_DATA_BUSY |
-				   AR_EEPROM_STATUS_DATA_PROT_ACCESS, 0,
-				   AH_WAIT_TIMEOUT)) {
-			return false;
-		}
-
-		*data = MS(common->ops->read(ah, AR_EEPROM_STATUS_DATA),
-			   AR_EEPROM_STATUS_DATA_VAL);
+	struct ath_hw *ah = (struct ath_hw *) common->ah;
+
+	common->ops->read(ah, AR5416_EEPROM_OFFSET + (off << AR5416_EEPROM_S));
+
+	if (!ath9k_hw_wait(ah,
+				AR_EEPROM_STATUS_DATA,
+				AR_EEPROM_STATUS_DATA_BUSY |
+				AR_EEPROM_STATUS_DATA_PROT_ACCESS, 0,
+				AH_WAIT_TIMEOUT)) {
+		return false;
 	}
 
+	*data = MS(common->ops->read(ah, AR_EEPROM_STATUS_DATA),
+			AR_EEPROM_STATUS_DATA_VAL);
+
 	return true;
 }
 
diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h
index 9272ca90632b..80ff69f99229 100644
--- a/drivers/net/wireless/ath/ath9k/reg.h
+++ b/drivers/net/wireless/ath/ath9k/reg.h
@@ -1122,12 +1122,12 @@ enum {
 #define AR9300_NUM_GPIO                          16
 #define AR9330_NUM_GPIO				 16
 #define AR9340_NUM_GPIO				 23
-#define AR9462_NUM_GPIO				 10
+#define AR9462_NUM_GPIO				 14
 #define AR9485_NUM_GPIO				 12
 #define AR9531_NUM_GPIO				 18
 #define AR9550_NUM_GPIO				 24
 #define AR9561_NUM_GPIO				 23
-#define AR9565_NUM_GPIO				 12
+#define AR9565_NUM_GPIO				 14
 #define AR9580_NUM_GPIO				 16
 #define AR7010_NUM_GPIO                          16
 
@@ -1139,12 +1139,12 @@ enum {
 #define AR9300_GPIO_MASK			 0x0000F4FF
 #define AR9330_GPIO_MASK			 0x0000F4FF
 #define AR9340_GPIO_MASK			 0x0000000F
-#define AR9462_GPIO_MASK			 0x000003FF
+#define AR9462_GPIO_MASK			 0x00003FFF
 #define AR9485_GPIO_MASK			 0x00000FFF
 #define AR9531_GPIO_MASK			 0x0000000F
 #define AR9550_GPIO_MASK			 0x0000000F
 #define AR9561_GPIO_MASK			 0x0000000F
-#define AR9565_GPIO_MASK			 0x00000FFF
+#define AR9565_GPIO_MASK			 0x00003FFF
 #define AR9580_GPIO_MASK			 0x0000F4FF
 #define AR7010_GPIO_MASK			 0x0000FFFF
 
diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c
index ac4781f37e78..16aca9e28b77 100644
--- a/drivers/net/wireless/ath/ath9k/tx99.c
+++ b/drivers/net/wireless/ath/ath9k/tx99.c
@@ -132,7 +132,6 @@ static int ath9k_tx99_init(struct ath_softc *sc)
 	ath9k_ps_wakeup(sc);
 
 	ath9k_hw_disable_interrupts(ah);
-	atomic_set(&ah->intr_ref_cnt, -1);
 	ath_drain_all_txq(sc);
 	ath_stoprecv(sc);
 
@@ -266,7 +265,7 @@ static const struct file_operations fops_tx99_power = {
 
 void ath9k_tx99_init_debug(struct ath_softc *sc)
 {
-	if (!AR_SREV_9300_20_OR_LATER(sc->sc_ah))
+	if (!AR_SREV_9280_20_OR_LATER(sc->sc_ah))
 		return;
 
 	debugfs_create_file("tx99", S_IRUSR | S_IWUSR,
diff --git a/drivers/net/wireless/ath/carl9170/Kconfig b/drivers/net/wireless/ath/carl9170/Kconfig
index 1a796e5f69ec..2e34baeaf764 100644
--- a/drivers/net/wireless/ath/carl9170/Kconfig
+++ b/drivers/net/wireless/ath/carl9170/Kconfig
@@ -5,12 +5,10 @@ config CARL9170
 	select FW_LOADER
 	select CRC32
 	help
-	  This is another driver for the Atheros "otus" 802.11n USB devices.
+	  This is the mainline driver for the Atheros "otus" 802.11n USB devices.
 
-	  This driver provides more features than the original,
-	  but it needs a special firmware (carl9170-1.fw) to do that.
-
-	  The firmware can be downloaded from our wiki here:
+	  It needs a special firmware (carl9170-1.fw), which can be downloaded
+	  from our wiki here:
 	  <http://wireless.kernel.org/en/users/Drivers/carl9170>
 
 	  If you choose to build a module, it'll be called carl9170.
diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c
index 8643801f31b6..231fd022f0f5 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.c
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.c
@@ -35,26 +35,27 @@ void *wcn36xx_dxe_get_next_bd(struct wcn36xx *wcn, bool is_low)
 	return ch->head_blk_ctl->bd_cpu_addr;
 }
 
+static void wcn36xx_ccu_write_register(struct wcn36xx *wcn, int addr, int data)
+{
+	wcn36xx_dbg(WCN36XX_DBG_DXE,
+		    "wcn36xx_ccu_write_register: addr=%x, data=%x\n",
+		    addr, data);
+
+	writel(data, wcn->ccu_base + addr);
+}
+
 static void wcn36xx_dxe_write_register(struct wcn36xx *wcn, int addr, int data)
 {
 	wcn36xx_dbg(WCN36XX_DBG_DXE,
 		    "wcn36xx_dxe_write_register: addr=%x, data=%x\n",
 		    addr, data);
 
-	writel(data, wcn->mmio + addr);
+	writel(data, wcn->dxe_base + addr);
 }
 
-#define wcn36xx_dxe_write_register_x(wcn, reg, reg_data)		 \
-do {									 \
-	if (wcn->chip_version == WCN36XX_CHIP_3680)			 \
-		wcn36xx_dxe_write_register(wcn, reg ## _3680, reg_data); \
-	else								 \
-		wcn36xx_dxe_write_register(wcn, reg ## _3660, reg_data); \
-} while (0)								 \
-
 static void wcn36xx_dxe_read_register(struct wcn36xx *wcn, int addr, int *data)
 {
-	*data = readl(wcn->mmio + addr);
+	*data = readl(wcn->dxe_base + addr);
 
 	wcn36xx_dbg(WCN36XX_DBG_DXE,
 		    "wcn36xx_dxe_read_register: addr=%x, data=%x\n",
@@ -701,9 +702,13 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn)
 	reg_data = WCN36XX_DXE_REG_RESET;
 	wcn36xx_dxe_write_register(wcn, WCN36XX_DXE_REG_CSR_RESET, reg_data);
 
-	/* Setting interrupt path */
-	reg_data = WCN36XX_DXE_CCU_INT;
-	wcn36xx_dxe_write_register_x(wcn, WCN36XX_DXE_REG_CCU_INT, reg_data);
+	/* Select channels for rx avail and xfer done interrupts... */
+	reg_data = (WCN36XX_DXE_INT_CH3_MASK | WCN36XX_DXE_INT_CH1_MASK) << 16 |
+		    WCN36XX_DXE_INT_CH0_MASK | WCN36XX_DXE_INT_CH4_MASK;
+	if (wcn->is_pronto)
+		wcn36xx_ccu_write_register(wcn, WCN36XX_CCU_DXE_INT_SELECT_PRONTO, reg_data);
+	else
+		wcn36xx_ccu_write_register(wcn, WCN36XX_CCU_DXE_INT_SELECT_RIVA, reg_data);
 
 	/***************************************/
 	/* Init descriptors for TX LOW channel */
diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.h b/drivers/net/wireless/ath/wcn36xx/dxe.h
index 3eca4f9594f2..c012e807753b 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.h
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.h
@@ -28,11 +28,10 @@ H2H_TEST_RX_TX = DMA2
 */
 
 /* DXE registers */
-#define WCN36XX_DXE_MEM_REG			0x202000
+#define WCN36XX_DXE_MEM_REG			0
 
-#define WCN36XX_DXE_CCU_INT			0xA0011
-#define WCN36XX_DXE_REG_CCU_INT_3660		0x200b10
-#define WCN36XX_DXE_REG_CCU_INT_3680		0x2050dc
+#define WCN36XX_CCU_DXE_INT_SELECT_RIVA		0x310
+#define WCN36XX_CCU_DXE_INT_SELECT_PRONTO	0x10dc
 
 /* TODO This must calculated properly but not hardcoded */
 #define WCN36XX_DXE_CTRL_TX_L			0x328a44
diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h
index 658bfb8baabe..4f87ef1e1eb8 100644
--- a/drivers/net/wireless/ath/wcn36xx/hal.h
+++ b/drivers/net/wireless/ath/wcn36xx/hal.h
@@ -4123,7 +4123,7 @@ struct wcn36xx_hal_update_scan_params_req {
 
 /* Update scan params - sent from host to PNO to be used during PNO
  * scanningx */
-struct update_scan_params_req_ex {
+struct wcn36xx_hal_update_scan_params_req_ex {
 
 	struct wcn36xx_hal_msg_header header;
 
@@ -4151,7 +4151,7 @@ struct update_scan_params_req_ex {
 
 	/* Cb State */
 	enum phy_chan_bond_state state;
-};
+} __packed;
 
 /* Update scan params - sent from host to PNO to be used during PNO
  * scanningx */
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index a920d7020148..e1d59da2ad20 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -19,6 +19,8 @@
 #include <linux/module.h>
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
 #include "wcn36xx.h"
 
 unsigned int wcn36xx_dbg_mask;
@@ -259,17 +261,6 @@ static void wcn36xx_feat_caps_info(struct wcn36xx *wcn)
 	}
 }
 
-static void wcn36xx_detect_chip_version(struct wcn36xx *wcn)
-{
-	if (get_feat_caps(wcn->fw_feat_caps, DOT11AC)) {
-		wcn36xx_info("Chip is 3680\n");
-		wcn->chip_version = WCN36XX_CHIP_3680;
-	} else {
-		wcn36xx_info("Chip is 3660\n");
-		wcn->chip_version = WCN36XX_CHIP_3660;
-	}
-}
-
 static int wcn36xx_start(struct ieee80211_hw *hw)
 {
 	struct wcn36xx *wcn = hw->priv;
@@ -324,9 +315,6 @@ static int wcn36xx_start(struct ieee80211_hw *hw)
 			wcn36xx_feat_caps_info(wcn);
 	}
 
-	wcn36xx_detect_chip_version(wcn);
-	wcn36xx_smd_update_cfg(wcn, WCN36XX_HAL_CFG_ENABLE_MC_ADDR_LIST, 1);
-
 	/* DMA channel initialization */
 	ret = wcn36xx_dxe_init(wcn);
 	if (ret) {
@@ -1064,7 +1052,11 @@ static int wcn36xx_init_ieee80211(struct wcn36xx *wcn)
 static int wcn36xx_platform_get_resources(struct wcn36xx *wcn,
 					  struct platform_device *pdev)
 {
+	struct device_node *mmio_node;
 	struct resource *res;
+	int index;
+	int ret;
+
 	/* Set TX IRQ */
 	res = platform_get_resource_byname(pdev, IORESOURCE_IRQ,
 					   "wcnss_wlantx_irq");
@@ -1083,19 +1075,40 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn,
 	}
 	wcn->rx_irq = res->start;
 
-	/* Map the memory */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						 "wcnss_mmio");
-	if (!res) {
-		wcn36xx_err("failed to get mmio\n");
-		return -ENOENT;
+	mmio_node = of_parse_phandle(pdev->dev.parent->of_node, "qcom,mmio", 0);
+	if (!mmio_node) {
+		wcn36xx_err("failed to acquire qcom,mmio reference\n");
+		return -EINVAL;
 	}
-	wcn->mmio = ioremap(res->start, resource_size(res));
-	if (!wcn->mmio) {
-		wcn36xx_err("failed to map io memory\n");
-		return -ENOMEM;
+
+	wcn->is_pronto = !!of_device_is_compatible(mmio_node, "qcom,pronto");
+
+	/* Map the CCU memory */
+	index = of_property_match_string(mmio_node, "reg-names", "ccu");
+	wcn->ccu_base = of_iomap(mmio_node, index);
+	if (!wcn->ccu_base) {
+		wcn36xx_err("failed to map ccu memory\n");
+		ret = -ENOMEM;
+		goto put_mmio_node;
 	}
+
+	/* Map the DXE memory */
+	index = of_property_match_string(mmio_node, "reg-names", "dxe");
+	wcn->dxe_base = of_iomap(mmio_node, index);
+	if (!wcn->dxe_base) {
+		wcn36xx_err("failed to map dxe memory\n");
+		ret = -ENOMEM;
+		goto unmap_ccu;
+	}
+
+	of_node_put(mmio_node);
 	return 0;
+
+unmap_ccu:
+	iounmap(wcn->ccu_base);
+put_mmio_node:
+	of_node_put(mmio_node);
+	return ret;
 }
 
 static int wcn36xx_probe(struct platform_device *pdev)
@@ -1138,7 +1151,8 @@ static int wcn36xx_probe(struct platform_device *pdev)
 	return 0;
 
 out_unmap:
-	iounmap(wcn->mmio);
+	iounmap(wcn->ccu_base);
+	iounmap(wcn->dxe_base);
 out_wq:
 	ieee80211_free_hw(hw);
 out_err:
@@ -1154,7 +1168,8 @@ static int wcn36xx_remove(struct platform_device *pdev)
 	mutex_destroy(&wcn->hal_mutex);
 
 	ieee80211_unregister_hw(hw);
-	iounmap(wcn->mmio);
+	iounmap(wcn->dxe_base);
+	iounmap(wcn->ccu_base);
 	ieee80211_free_hw(hw);
 
 	return 0;
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index e8b630c4f11e..a443992320f2 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -674,22 +674,25 @@ static int wcn36xx_smd_update_scan_params_rsp(void *buf, size_t len)
 	return 0;
 }
 
-int wcn36xx_smd_update_scan_params(struct wcn36xx *wcn)
+int wcn36xx_smd_update_scan_params(struct wcn36xx *wcn,
+				   u8 *channels, size_t channel_count)
 {
-	struct wcn36xx_hal_update_scan_params_req msg_body;
+	struct wcn36xx_hal_update_scan_params_req_ex msg_body;
 	int ret = 0;
 
 	mutex_lock(&wcn->hal_mutex);
 	INIT_HAL_MSG(msg_body, WCN36XX_HAL_UPDATE_SCAN_PARAM_REQ);
 
-	msg_body.dot11d_enabled	= 0;
-	msg_body.dot11d_resolved = 0;
-	msg_body.channel_count = 26;
+	msg_body.dot11d_enabled	= false;
+	msg_body.dot11d_resolved = true;
+
+	msg_body.channel_count = channel_count;
+	memcpy(msg_body.channels, channels, channel_count);
 	msg_body.active_min_ch_time = 60;
 	msg_body.active_max_ch_time = 120;
 	msg_body.passive_min_ch_time = 60;
 	msg_body.passive_max_ch_time = 110;
-	msg_body.state = 0;
+	msg_body.state = PHY_SINGLE_CHANNEL_CENTERED;
 
 	PREPARE_HAL_BUF(wcn->hal_buf, msg_body);
 
@@ -2226,17 +2229,12 @@ static void wcn36xx_smd_rsp_process(struct wcn36xx *wcn, void *buf, size_t len)
 
 	case WCN36XX_HAL_COEX_IND:
 	case WCN36XX_HAL_AVOID_FREQ_RANGE_IND:
+	case WCN36XX_HAL_DEL_BA_IND:
 	case WCN36XX_HAL_OTA_TX_COMPL_IND:
 	case WCN36XX_HAL_MISSED_BEACON_IND:
 	case WCN36XX_HAL_DELETE_STA_CONTEXT_IND:
-		msg_ind = kmalloc(sizeof(*msg_ind), GFP_KERNEL);
-		if (!msg_ind)
-			goto nomem;
-		msg_ind->msg_len = len;
-		msg_ind->msg = kmemdup(buf, len, GFP_KERNEL);
-		if (!msg_ind->msg) {
-			kfree(msg_ind);
-nomem:
+		msg_ind = kmalloc(sizeof(*msg_ind) + len, GFP_KERNEL);
+		if (!msg_ind) {
 			/*
 			 * FIXME: Do something smarter then just
 			 * printing an error.
@@ -2245,10 +2243,14 @@ nomem:
 				    msg_header->msg_type);
 			break;
 		}
-		mutex_lock(&wcn->hal_ind_mutex);
+
+		msg_ind->msg_len = len;
+		memcpy(msg_ind->msg, buf, len);
+
+		spin_lock(&wcn->hal_ind_lock);
 		list_add_tail(&msg_ind->list, &wcn->hal_ind_queue);
 		queue_work(wcn->hal_ind_wq, &wcn->hal_ind_work);
-		mutex_unlock(&wcn->hal_ind_mutex);
+		spin_unlock(&wcn->hal_ind_lock);
 		wcn36xx_dbg(WCN36XX_DBG_HAL, "indication arrived\n");
 		break;
 	default:
@@ -2262,8 +2264,9 @@ static void wcn36xx_ind_smd_work(struct work_struct *work)
 		container_of(work, struct wcn36xx, hal_ind_work);
 	struct wcn36xx_hal_msg_header *msg_header;
 	struct wcn36xx_hal_ind_msg *hal_ind_msg;
+	unsigned long flags;
 
-	mutex_lock(&wcn->hal_ind_mutex);
+	spin_lock_irqsave(&wcn->hal_ind_lock, flags);
 
 	hal_ind_msg = list_first_entry(&wcn->hal_ind_queue,
 				       struct wcn36xx_hal_ind_msg,
@@ -2273,6 +2276,7 @@ static void wcn36xx_ind_smd_work(struct work_struct *work)
 
 	switch (msg_header->msg_type) {
 	case WCN36XX_HAL_COEX_IND:
+	case WCN36XX_HAL_DEL_BA_IND:
 	case WCN36XX_HAL_AVOID_FREQ_RANGE_IND:
 		break;
 	case WCN36XX_HAL_OTA_TX_COMPL_IND:
@@ -2295,9 +2299,8 @@ static void wcn36xx_ind_smd_work(struct work_struct *work)
 			      msg_header->msg_type);
 	}
 	list_del(wcn->hal_ind_queue.next);
-	kfree(hal_ind_msg->msg);
+	spin_unlock_irqrestore(&wcn->hal_ind_lock, flags);
 	kfree(hal_ind_msg);
-	mutex_unlock(&wcn->hal_ind_mutex);
 }
 int wcn36xx_smd_open(struct wcn36xx *wcn)
 {
@@ -2310,7 +2313,7 @@ int wcn36xx_smd_open(struct wcn36xx *wcn)
 	}
 	INIT_WORK(&wcn->hal_ind_work, wcn36xx_ind_smd_work);
 	INIT_LIST_HEAD(&wcn->hal_ind_queue);
-	mutex_init(&wcn->hal_ind_mutex);
+	spin_lock_init(&wcn->hal_ind_lock);
 
 	ret = wcn->ctrl_ops->open(wcn, wcn36xx_smd_rsp_process);
 	if (ret) {
@@ -2330,5 +2333,4 @@ void wcn36xx_smd_close(struct wcn36xx *wcn)
 {
 	wcn->ctrl_ops->close();
 	destroy_workqueue(wcn->hal_ind_wq);
-	mutex_destroy(&wcn->hal_ind_mutex);
 }
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h
index d93e3fd73831..df80cbbd9d1b 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.h
+++ b/drivers/net/wireless/ath/wcn36xx/smd.h
@@ -46,8 +46,8 @@ struct wcn36xx_fw_msg_status_rsp {
 
 struct wcn36xx_hal_ind_msg {
 	struct list_head list;
-	u8 *msg;
 	size_t msg_len;
+	u8 msg[];
 };
 
 struct wcn36xx;
@@ -63,7 +63,7 @@ int wcn36xx_smd_start_scan(struct wcn36xx *wcn);
 int wcn36xx_smd_end_scan(struct wcn36xx *wcn);
 int wcn36xx_smd_finish_scan(struct wcn36xx *wcn,
 			    enum wcn36xx_hal_sys_mode mode);
-int wcn36xx_smd_update_scan_params(struct wcn36xx *wcn);
+int wcn36xx_smd_update_scan_params(struct wcn36xx *wcn, u8 *channels, size_t channel_count);
 int wcn36xx_smd_add_sta_self(struct wcn36xx *wcn, struct ieee80211_vif *vif);
 int wcn36xx_smd_delete_sta_self(struct wcn36xx *wcn, u8 *addr);
 int wcn36xx_smd_delete_sta(struct wcn36xx *wcn, u8 sta_index);
diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
index 7433d67a5929..22242d18e1fe 100644
--- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
@@ -193,7 +193,7 @@ struct wcn36xx {
 	u8			fw_minor;
 	u8			fw_major;
 	u32			fw_feat_caps[WCN36XX_HAL_CAPS_SIZE];
-	u32			chip_version;
+	bool			is_pronto;
 
 	/* extra byte for the NULL termination */
 	u8			crm_version[WCN36XX_HAL_VERSION_LENGTH + 1];
@@ -202,7 +202,8 @@ struct wcn36xx {
 	/* IRQs */
 	int			tx_irq;
 	int			rx_irq;
-	void __iomem		*mmio;
+	void __iomem		*ccu_base;
+	void __iomem		*dxe_base;
 
 	struct wcn36xx_platform_ctrl_ops *ctrl_ops;
 	/*
@@ -215,7 +216,7 @@ struct wcn36xx {
 	struct completion	hal_rsp_compl;
 	struct workqueue_struct	*hal_ind_wq;
 	struct work_struct	hal_ind_work;
-	struct mutex		hal_ind_mutex;
+	spinlock_t		hal_ind_lock;
 	struct list_head	hal_ind_queue;
 
 	/* DXE channels */
@@ -241,9 +242,6 @@ struct wcn36xx {
 
 };
 
-#define WCN36XX_CHIP_3660	0
-#define WCN36XX_CHIP_3680	1
-
 static inline bool wcn36xx_is_fw_version(struct wcn36xx *wcn,
 					 u8 major,
 					 u8 minor,
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 5769811291bf..f0e1175fb76a 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -378,6 +378,10 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
 	/* social scan on P2P_DEVICE is handled as p2p search */
 	if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE &&
 	    wil_p2p_is_social_scan(request)) {
+		if (!wil->p2p.p2p_dev_started) {
+			wil_err(wil, "P2P search requested on stopped P2P device\n");
+			return -EIO;
+		}
 		wil->scan_request = request;
 		wil->radio_wdev = wdev;
 		rc = wil_p2p_search(wil, request);
@@ -1351,6 +1355,7 @@ static int wil_cfg80211_start_p2p_device(struct wiphy *wiphy,
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 
 	wil_dbg_misc(wil, "%s: entered\n", __func__);
+	wil->p2p.p2p_dev_started = 1;
 	return 0;
 }
 
@@ -1358,8 +1363,23 @@ static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy,
 					 struct wireless_dev *wdev)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	u8 started;
 
 	wil_dbg_misc(wil, "%s: entered\n", __func__);
+	mutex_lock(&wil->mutex);
+	started = wil_p2p_stop_discovery(wil);
+	if (started && wil->scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		cfg80211_scan_done(wil->scan_request, &info);
+		wil->scan_request = NULL;
+		wil->radio_wdev = wil->wdev;
+	}
+	mutex_unlock(&wil->mutex);
+
+	wil->p2p.p2p_dev_started = 0;
 }
 
 static struct cfg80211_ops wil_cfg80211_ops = {
diff --git a/drivers/net/wireless/ath/wil6210/debug.c b/drivers/net/wireless/ath/wil6210/debug.c
index c312a667c12a..217a4591bde4 100644
--- a/drivers/net/wireless/ath/wil6210/debug.c
+++ b/drivers/net/wireless/ath/wil6210/debug.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Qualcomm Atheros, Inc.
+ * Copyright (c) 2013,2016 Qualcomm Atheros, Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -19,34 +19,31 @@
 
 void __wil_err(struct wil6210_priv *wil, const char *fmt, ...)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
-	struct va_format vaf = {
-		.fmt = fmt,
-	};
+	struct va_format vaf;
 	va_list args;
 
 	va_start(args, fmt);
+	vaf.fmt = fmt;
 	vaf.va = &args;
-	netdev_err(ndev, "%pV", &vaf);
+	netdev_err(wil_to_ndev(wil), "%pV", &vaf);
 	trace_wil6210_log_err(&vaf);
 	va_end(args);
 }
 
 void __wil_err_ratelimited(struct wil6210_priv *wil, const char *fmt, ...)
 {
-	if (net_ratelimit()) {
-		struct net_device *ndev = wil_to_ndev(wil);
-		struct va_format vaf = {
-			.fmt = fmt,
-		};
-		va_list args;
+	struct va_format vaf;
+	va_list args;
 
-		va_start(args, fmt);
-		vaf.va = &args;
-		netdev_err(ndev, "%pV", &vaf);
-		trace_wil6210_log_err(&vaf);
-		va_end(args);
-	}
+	if (!net_ratelimit())
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	netdev_err(wil_to_ndev(wil), "%pV", &vaf);
+	trace_wil6210_log_err(&vaf);
+	va_end(args);
 }
 
 void wil_dbg_ratelimited(const struct wil6210_priv *wil, const char *fmt, ...)
@@ -67,27 +64,24 @@ void wil_dbg_ratelimited(const struct wil6210_priv *wil, const char *fmt, ...)
 
 void __wil_info(struct wil6210_priv *wil, const char *fmt, ...)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
-	struct va_format vaf = {
-		.fmt = fmt,
-	};
+	struct va_format vaf;
 	va_list args;
 
 	va_start(args, fmt);
+	vaf.fmt = fmt;
 	vaf.va = &args;
-	netdev_info(ndev, "%pV", &vaf);
+	netdev_info(wil_to_ndev(wil), "%pV", &vaf);
 	trace_wil6210_log_info(&vaf);
 	va_end(args);
 }
 
 void wil_dbg_trace(struct wil6210_priv *wil, const char *fmt, ...)
 {
-	struct va_format vaf = {
-		.fmt = fmt,
-	};
+	struct va_format vaf;
 	va_list args;
 
 	va_start(args, fmt);
+	vaf.fmt = fmt;
 	vaf.va = &args;
 	trace_wil6210_log_dbg(&vaf);
 	va_end(args);
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 8e31d755bbee..4bc92e54984a 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -850,10 +850,14 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 	mutex_unlock(&wil->wmi_mutex);
 
 	if (wil->scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
 		wil_dbg_misc(wil, "Abort scan_request 0x%p\n",
 			     wil->scan_request);
 		del_timer_sync(&wil->scan_timer);
-		cfg80211_scan_done(wil->scan_request, true);
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 	}
 
@@ -1049,10 +1053,14 @@ int __wil_down(struct wil6210_priv *wil)
 	(void)wil_p2p_stop_discovery(wil);
 
 	if (wil->scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
 		wil_dbg_misc(wil, "Abort scan_request 0x%p\n",
 			     wil->scan_request);
 		del_timer_sync(&wil->scan_timer);
-		cfg80211_scan_done(wil->scan_request, true);
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 	}
 
diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c
index 1c9153894dca..e0f8aa0ebfac 100644
--- a/drivers/net/wireless/ath/wil6210/p2p.c
+++ b/drivers/net/wireless/ath/wil6210/p2p.c
@@ -114,8 +114,10 @@ int wil_p2p_listen(struct wil6210_priv *wil, unsigned int duration,
 	u8 channel = P2P_DMG_SOCIAL_CHANNEL;
 	int rc;
 
-	if (chan)
-		channel = chan->hw_value;
+	if (!chan)
+		return -EINVAL;
+
+	channel = chan->hw_value;
 
 	wil_dbg_misc(wil, "%s: duration %d\n", __func__, duration);
 
@@ -250,8 +252,12 @@ void wil_p2p_search_expired(struct work_struct *work)
 	mutex_unlock(&wil->mutex);
 
 	if (started) {
+		struct cfg80211_scan_info info = {
+			.aborted = false,
+		};
+
 		mutex_lock(&wil->p2p_wdev_mutex);
-		cfg80211_scan_done(wil->scan_request, 0);
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 		wil->radio_wdev = wil->wdev;
 		mutex_unlock(&wil->p2p_wdev_mutex);
diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c
index aeb72c438e44..7b5c4222bc33 100644
--- a/drivers/net/wireless/ath/wil6210/pcie_bus.c
+++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2015 Qualcomm Atheros, Inc.
+ * Copyright (c) 2012-2016 Qualcomm Atheros, Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -18,13 +18,20 @@
 #include <linux/pci.h>
 #include <linux/moduleparam.h>
 #include <linux/interrupt.h>
-
+#include <linux/suspend.h>
 #include "wil6210.h"
 
 static bool use_msi = true;
 module_param(use_msi, bool, S_IRUGO);
 MODULE_PARM_DESC(use_msi, " Use MSI interrupt, default - true");
 
+#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
+static int wil6210_pm_notify(struct notifier_block *notify_block,
+			     unsigned long mode, void *unused);
+#endif /* CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM */
+
 static
 void wil_set_capabilities(struct wil6210_priv *wil)
 {
@@ -238,6 +245,18 @@ static int wil_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto bus_disable;
 	}
 
+#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
+	wil->pm_notify.notifier_call = wil6210_pm_notify;
+	rc = register_pm_notifier(&wil->pm_notify);
+	if (rc)
+		/* Do not fail the driver initialization, as suspend can
+		 * be prevented in a later phase if needed
+		 */
+		wil_err(wil, "register_pm_notifier failed: %d\n", rc);
+#endif /* CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM */
+
 	wil6210_debugfs_init(wil);
 
 
@@ -267,6 +286,12 @@ static void wil_pcie_remove(struct pci_dev *pdev)
 
 	wil_dbg_misc(wil, "%s()\n", __func__);
 
+#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
+	unregister_pm_notifier(&wil->pm_notify);
+#endif /* CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM */
+
 	wil6210_debugfs_remove(wil);
 	wil_if_remove(wil);
 	wil_if_pcie_disable(wil);
@@ -335,6 +360,45 @@ static int wil6210_resume(struct device *dev, bool is_runtime)
 	return rc;
 }
 
+static int wil6210_pm_notify(struct notifier_block *notify_block,
+			     unsigned long mode, void *unused)
+{
+	struct wil6210_priv *wil = container_of(
+		notify_block, struct wil6210_priv, pm_notify);
+	int rc = 0;
+	enum wil_platform_event evt;
+
+	wil_dbg_pm(wil, "%s: mode (%ld)\n", __func__, mode);
+
+	switch (mode) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+	case PM_RESTORE_PREPARE:
+		rc = wil_can_suspend(wil, false);
+		if (rc)
+			break;
+		evt = WIL_PLATFORM_EVT_PRE_SUSPEND;
+		if (wil->platform_ops.notify)
+			rc = wil->platform_ops.notify(wil->platform_handle,
+						      evt);
+		break;
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+	case PM_POST_RESTORE:
+		evt = WIL_PLATFORM_EVT_POST_SUSPEND;
+		if (wil->platform_ops.notify)
+			rc = wil->platform_ops.notify(wil->platform_handle,
+						      evt);
+		break;
+	default:
+		wil_dbg_pm(wil, "unhandled notify mode %ld\n", mode);
+		break;
+	}
+
+	wil_dbg_pm(wil, "notification mode %ld: rc (%d)\n", mode, rc);
+	return rc;
+}
+
 static int wil6210_pm_suspend(struct device *dev)
 {
 	return wil6210_suspend(dev, false);
diff --git a/drivers/net/wireless/ath/wil6210/pm.c b/drivers/net/wireless/ath/wil6210/pm.c
index 0b7ecbcac19c..11ee24d509e5 100644
--- a/drivers/net/wireless/ath/wil6210/pm.c
+++ b/drivers/net/wireless/ath/wil6210/pm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Qualcomm Atheros, Inc.
+ * Copyright (c) 2014,2016 Qualcomm Atheros, Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -24,10 +24,32 @@ int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime)
 	wil_dbg_pm(wil, "%s(%s)\n", __func__,
 		   is_runtime ? "runtime" : "system");
 
+	if (!netif_running(wil_to_ndev(wil))) {
+		/* can always sleep when down */
+		wil_dbg_pm(wil, "Interface is down\n");
+		goto out;
+	}
+	if (test_bit(wil_status_resetting, wil->status)) {
+		wil_dbg_pm(wil, "Delay suspend when resetting\n");
+		rc = -EBUSY;
+		goto out;
+	}
+	if (wil->recovery_state != fw_recovery_idle) {
+		wil_dbg_pm(wil, "Delay suspend during recovery\n");
+		rc = -EBUSY;
+		goto out;
+	}
+
+	/* interface is running */
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
+		if (test_bit(wil_status_fwconnecting, wil->status)) {
+			wil_dbg_pm(wil, "Delay suspend when connecting\n");
+			rc = -EBUSY;
+			goto out;
+		}
 		break;
 	/* AP-like interface - can't suspend */
 	default:
@@ -36,6 +58,7 @@ int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime)
 		break;
 	}
 
+out:
 	wil_dbg_pm(wil, "%s(%s) => %s (%d)\n", __func__,
 		   is_runtime ? "runtime" : "system", rc ? "No" : "Yes", rc);
 
diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c
index a4e43796addb..f2f6a404d3d1 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c
@@ -184,6 +184,13 @@ static void wil_vring_free(struct wil6210_priv *wil, struct vring *vring,
 					&vring->va[vring->swtail].tx;
 
 			ctx = &vring->ctx[vring->swtail];
+			if (!ctx) {
+				wil_dbg_txrx(wil,
+					     "ctx(%d) was already completed\n",
+					     vring->swtail);
+				vring->swtail = wil_vring_next_tail(vring);
+				continue;
+			}
 			*d = *_d;
 			wil_txdesc_unmap(dev, d, ctx);
 			if (ctx->skb)
@@ -544,6 +551,12 @@ static int wil_rx_refill(struct wil6210_priv *wil, int count)
 			break;
 		}
 	}
+
+	/* make sure all writes to descriptors (shared memory) are done before
+	 * committing them to HW
+	 */
+	wmb();
+
 	wil_w(wil, v->hwtail, v->swtail);
 
 	return rc;
@@ -969,6 +982,13 @@ void wil_vring_fini_tx(struct wil6210_priv *wil, int id)
 	txdata->dot1x_open = false;
 	txdata->enabled = 0; /* no Tx can be in progress or start anew */
 	spin_unlock_bh(&txdata->lock);
+	/* napi_synchronize waits for completion of the current NAPI but will
+	 * not prevent the next NAPI run.
+	 * Add a memory barrier to guarantee that txdata->enabled is zeroed
+	 * before napi_synchronize so that the next scheduled NAPI will not
+	 * handle this vring
+	 */
+	wmb();
 	/* make sure NAPI won't touch this vring */
 	if (test_bit(wil_status_napi_en, wil->status))
 		napi_synchronize(&wil->napi_tx);
@@ -1551,6 +1571,13 @@ static int __wil_tx_vring_tso(struct wil6210_priv *wil, struct vring *vring,
 			     vring_index, used, used + descs_used);
 	}
 
+	/* Make sure to advance the head only after descriptor update is done.
+	 * This will prevent a race condition where the completion thread
+	 * will see the DU bit set from previous run and will handle the
+	 * skb before it was completed.
+	 */
+	wmb();
+
 	/* advance swhead */
 	wil_vring_advance_head(vring, descs_used);
 	wil_dbg_txrx(wil, "TSO: Tx swhead %d -> %d\n", swhead, vring->swhead);
@@ -1567,7 +1594,7 @@ mem_error:
 	while (descs_used > 0) {
 		struct wil_ctx *ctx;
 
-		i = (swhead + descs_used) % vring->size;
+		i = (swhead + descs_used - 1) % vring->size;
 		d = (struct vring_tx_desc *)&vring->va[i].tx;
 		_desc = &vring->va[i].tx;
 		*d = *_desc;
@@ -1691,6 +1718,13 @@ static int __wil_tx_vring(struct wil6210_priv *wil, struct vring *vring,
 			     vring_index, used, used + nr_frags + 1);
 	}
 
+	/* Make sure to advance the head only after descriptor update is done.
+	 * This will prevent a race condition where the completion thread
+	 * will see the DU bit set from previous run and will handle the
+	 * skb before it was completed.
+	 */
+	wmb();
+
 	/* advance swhead */
 	wil_vring_advance_head(vring, nr_frags + 1);
 	wil_dbg_txrx(wil, "Tx[%2d] swhead %d -> %d\n", vring_index, swhead,
@@ -1914,6 +1948,12 @@ int wil_tx_complete(struct wil6210_priv *wil, int ringid)
 				wil_consume_skb(skb, d->dma.error == 0);
 			}
 			memset(ctx, 0, sizeof(*ctx));
+			/* Make sure the ctx is zeroed before updating the tail
+			 * to prevent a case where wil_tx_vring will see
+			 * this descriptor as used and handle it before ctx zero
+			 * is completed.
+			 */
+			wmb();
 			/* There is no need to touch HW descriptor:
 			 * - ststus bit TX_DMA_STATUS_DU is set by design,
 			 *   so hardware will not try to process this desc.,
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index aa09cbcce47c..ecab4af90602 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -458,6 +458,7 @@ struct wil_tid_crypto_rx {
 struct wil_p2p_info {
 	struct ieee80211_channel listen_chan;
 	u8 discovery_started;
+	u8 p2p_dev_started;
 	u64 cookie;
 	struct timer_list discovery_timer; /* listen/search duration */
 	struct work_struct discovery_expired_work; /* listen/search expire */
@@ -662,6 +663,11 @@ struct wil6210_priv {
 	/* High Access Latency Policy voting */
 	struct wil_halp halp;
 
+#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
+	struct notifier_block pm_notify;
+#endif /* CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM */
 };
 
 #define wil_to_wiphy(i) (i->wdev->wiphy)
diff --git a/drivers/net/wireless/ath/wil6210/wil_platform.h b/drivers/net/wireless/ath/wil6210/wil_platform.h
index 33d4a34b3b1c..f8c41172a3f4 100644
--- a/drivers/net/wireless/ath/wil6210/wil_platform.h
+++ b/drivers/net/wireless/ath/wil6210/wil_platform.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015 Qualcomm Atheros, Inc.
+ * Copyright (c) 2014-2016 Qualcomm Atheros, Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -23,6 +23,8 @@ enum wil_platform_event {
 	WIL_PLATFORM_EVT_FW_CRASH = 0,
 	WIL_PLATFORM_EVT_PRE_RESET = 1,
 	WIL_PLATFORM_EVT_FW_RDY = 2,
+	WIL_PLATFORM_EVT_PRE_SUSPEND = 3,
+	WIL_PLATFORM_EVT_POST_SUSPEND = 4,
 };
 
 /**
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index b80c5d850e1e..4d92541913c0 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -426,15 +426,17 @@ static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id,
 {
 	if (wil->scan_request) {
 		struct wmi_scan_complete_event *data = d;
-		bool aborted = (data->status != WMI_SCAN_SUCCESS);
+		struct cfg80211_scan_info info = {
+			.aborted = (data->status != WMI_SCAN_SUCCESS),
+		};
 
 		wil_dbg_wmi(wil, "SCAN_COMPLETE(0x%08x)\n", data->status);
 		wil_dbg_misc(wil, "Complete scan_request 0x%p aborted %d\n",
-			     wil->scan_request, aborted);
+			     wil->scan_request, info.aborted);
 
 		del_timer_sync(&wil->scan_timer);
 		mutex_lock(&wil->p2p_wdev_mutex);
-		cfg80211_scan_done(wil->scan_request, aborted);
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->radio_wdev = wil->wdev;
 		mutex_unlock(&wil->p2p_wdev_mutex);
 		wil->scan_request = NULL;
diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c
index 7c108047fb46..0e180677c7fc 100644
--- a/drivers/net/wireless/atmel/at76c50x-usb.c
+++ b/drivers/net/wireless/atmel/at76c50x-usb.c
@@ -1922,6 +1922,9 @@ static void at76_dwork_hw_scan(struct work_struct *work)
 {
 	struct at76_priv *priv = container_of(work, struct at76_priv,
 					      dwork_hw_scan.work);
+	struct cfg80211_scan_info info = {
+		.aborted = false,
+	};
 	int ret;
 
 	if (priv->device_unplugged)
@@ -1948,7 +1951,7 @@ static void at76_dwork_hw_scan(struct work_struct *work)
 
 	mutex_unlock(&priv->mtx);
 
-	ieee80211_scan_completed(priv->hw, false);
+	ieee80211_scan_completed(priv->hw, &info);
 
 	ieee80211_wake_queues(priv->hw);
 }
diff --git a/drivers/net/wireless/broadcom/b43/Makefile b/drivers/net/wireless/broadcom/b43/Makefile
index ddc4df46656f..27fab958e3d5 100644
--- a/drivers/net/wireless/broadcom/b43/Makefile
+++ b/drivers/net/wireless/broadcom/b43/Makefile
@@ -1,6 +1,6 @@
 b43-y				+= main.o
 b43-y				+= bus.o
-b43-$(CONFIG_B43_PHY_G)		+= phy_a.o phy_g.o tables.o lo.o wa.o
+b43-$(CONFIG_B43_PHY_G)		+= phy_g.o tables.o lo.o wa.o
 b43-$(CONFIG_B43_PHY_N)		+= tables_nphy.o
 b43-$(CONFIG_B43_PHY_N)		+= radio_2055.o
 b43-$(CONFIG_B43_PHY_N)		+= radio_2056.o
diff --git a/drivers/net/wireless/broadcom/b43/leds.c b/drivers/net/wireless/broadcom/b43/leds.c
index d79ab2a227e1..cb987c2ecc6b 100644
--- a/drivers/net/wireless/broadcom/b43/leds.c
+++ b/drivers/net/wireless/broadcom/b43/leds.c
@@ -222,7 +222,7 @@ static void b43_led_get_sprominfo(struct b43_wldev *dev,
 	sprom[2] = dev->dev->bus_sprom->gpio2;
 	sprom[3] = dev->dev->bus_sprom->gpio3;
 
-	if (sprom[led_index] == 0xFF) {
+	if ((sprom[0] & sprom[1] & sprom[2] & sprom[3]) == 0xff) {
 		/* There is no LED information in the SPROM
 		 * for this LED. Hardcode it here. */
 		*activelow = false;
@@ -250,7 +250,11 @@ static void b43_led_get_sprominfo(struct b43_wldev *dev,
 			return;
 		}
 	} else {
-		*behaviour = sprom[led_index] & B43_LED_BEHAVIOUR;
+		/* keep LED disabled if no mapping is defined */
+		if (sprom[led_index] == 0xff)
+			*behaviour = B43_LED_OFF;
+		else
+			*behaviour = sprom[led_index] & B43_LED_BEHAVIOUR;
 		*activelow = !!(sprom[led_index] & B43_LED_ACTIVELOW);
 	}
 }
diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c
index 4ee5c5853f9f..6e5d9095b195 100644
--- a/drivers/net/wireless/broadcom/b43/main.c
+++ b/drivers/net/wireless/broadcom/b43/main.c
@@ -3180,7 +3180,6 @@ static void b43_rate_memory_write(struct b43_wldev *dev, u16 rate, int is_ofdm)
 static void b43_rate_memory_init(struct b43_wldev *dev)
 {
 	switch (dev->phy.type) {
-	case B43_PHYTYPE_A:
 	case B43_PHYTYPE_G:
 	case B43_PHYTYPE_N:
 	case B43_PHYTYPE_LP:
@@ -3194,8 +3193,6 @@ static void b43_rate_memory_init(struct b43_wldev *dev)
 		b43_rate_memory_write(dev, B43_OFDM_RATE_36MB, 1);
 		b43_rate_memory_write(dev, B43_OFDM_RATE_48MB, 1);
 		b43_rate_memory_write(dev, B43_OFDM_RATE_54MB, 1);
-		if (dev->phy.type == B43_PHYTYPE_A)
-			break;
 		/* fallthrough */
 	case B43_PHYTYPE_B:
 		b43_rate_memory_write(dev, B43_CCK_RATE_1MB, 0);
@@ -4604,14 +4601,6 @@ static int b43_phy_versioning(struct b43_wldev *dev)
 	if (radio_manuf != 0x17F /* Broadcom */)
 		unsupported = 1;
 	switch (phy_type) {
-	case B43_PHYTYPE_A:
-		if (radio_id != 0x2060)
-			unsupported = 1;
-		if (radio_rev != 1)
-			unsupported = 1;
-		if (radio_manuf != 0x17F)
-			unsupported = 1;
-		break;
 	case B43_PHYTYPE_B:
 		if ((radio_id & 0xFFF0) != 0x2050)
 			unsupported = 1;
@@ -4766,10 +4755,7 @@ static void b43_set_synth_pu_delay(struct b43_wldev *dev, bool idle)
 	u16 pu_delay;
 
 	/* The time value is in microseconds. */
-	if (dev->phy.type == B43_PHYTYPE_A)
-		pu_delay = 3700;
-	else
-		pu_delay = 1050;
+	pu_delay = 1050;
 	if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC) || idle)
 		pu_delay = 500;
 	if ((dev->phy.radio_ver == 0x2050) && (dev->phy.radio_rev == 8))
@@ -4784,14 +4770,10 @@ static void b43_set_pretbtt(struct b43_wldev *dev)
 	u16 pretbtt;
 
 	/* The time value is in microseconds. */
-	if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC)) {
+	if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC))
 		pretbtt = 2;
-	} else {
-		if (dev->phy.type == B43_PHYTYPE_A)
-			pretbtt = 120;
-		else
-			pretbtt = 250;
-	}
+	else
+		pretbtt = 250;
 	b43_shm_write16(dev, B43_SHM_SHARED, B43_SHM_SH_PRETBTT, pretbtt);
 	b43_write16(dev, B43_MMIO_TSF_CFP_PRETBTT, pretbtt);
 }
@@ -5380,10 +5362,6 @@ static void b43_supported_bands(struct b43_wldev *dev, bool *have_2ghz_phy,
 
 	/* As a fallback, try to guess using PHY type */
 	switch (dev->phy.type) {
-	case B43_PHYTYPE_A:
-		*have_2ghz_phy = false;
-		*have_5ghz_phy = true;
-		return;
 	case B43_PHYTYPE_G:
 	case B43_PHYTYPE_N:
 	case B43_PHYTYPE_LP:
@@ -5455,7 +5433,6 @@ static int b43_wireless_core_attach(struct b43_wldev *dev)
 	/* We don't support 5 GHz on some PHYs yet */
 	if (have_5ghz_phy) {
 		switch (dev->phy.type) {
-		case B43_PHYTYPE_A:
 		case B43_PHYTYPE_G:
 		case B43_PHYTYPE_LP:
 		case B43_PHYTYPE_HT:
diff --git a/drivers/net/wireless/broadcom/b43/phy_a.c b/drivers/net/wireless/broadcom/b43/phy_a.c
deleted file mode 100644
index 99c036f5ecb7..000000000000
--- a/drivers/net/wireless/broadcom/b43/phy_a.c
+++ /dev/null
@@ -1,595 +0,0 @@
-/*
-
-  Broadcom B43 wireless driver
-  IEEE 802.11a PHY driver
-
-  Copyright (c) 2005 Martin Langer <martin-langer@gmx.de>,
-  Copyright (c) 2005-2007 Stefano Brivio <stefano.brivio@polimi.it>
-  Copyright (c) 2005-2008 Michael Buesch <m@bues.ch>
-  Copyright (c) 2005, 2006 Danny van Dyk <kugelfang@gentoo.org>
-  Copyright (c) 2005, 2006 Andreas Jaggi <andreas.jaggi@waterwave.ch>
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; see the file COPYING.  If not, write to
-  the Free Software Foundation, Inc., 51 Franklin Steet, Fifth Floor,
-  Boston, MA 02110-1301, USA.
-
-*/
-
-#include <linux/slab.h>
-
-#include "b43.h"
-#include "phy_a.h"
-#include "phy_common.h"
-#include "wa.h"
-#include "tables.h"
-#include "main.h"
-
-
-/* Get the freq, as it has to be written to the device. */
-static inline u16 channel2freq_a(u8 channel)
-{
-	B43_WARN_ON(channel > 200);
-
-	return (5000 + 5 * channel);
-}
-
-static inline u16 freq_r3A_value(u16 frequency)
-{
-	u16 value;
-
-	if (frequency < 5091)
-		value = 0x0040;
-	else if (frequency < 5321)
-		value = 0x0000;
-	else if (frequency < 5806)
-		value = 0x0080;
-	else
-		value = 0x0040;
-
-	return value;
-}
-
-#if 0
-/* This function converts a TSSI value to dBm in Q5.2 */
-static s8 b43_aphy_estimate_power_out(struct b43_wldev *dev, s8 tssi)
-{
-	struct b43_phy *phy = &dev->phy;
-	struct b43_phy_a *aphy = phy->a;
-	s8 dbm = 0;
-	s32 tmp;
-
-	tmp = (aphy->tgt_idle_tssi - aphy->cur_idle_tssi + tssi);
-	tmp += 0x80;
-	tmp = clamp_val(tmp, 0x00, 0xFF);
-	dbm = aphy->tssi2dbm[tmp];
-	//TODO: There's a FIXME on the specs
-
-	return dbm;
-}
-#endif
-
-static void b43_radio_set_tx_iq(struct b43_wldev *dev)
-{
-	static const u8 data_high[5] = { 0x00, 0x40, 0x80, 0x90, 0xD0 };
-	static const u8 data_low[5] = { 0x00, 0x01, 0x05, 0x06, 0x0A };
-	u16 tmp = b43_radio_read16(dev, 0x001E);
-	int i, j;
-
-	for (i = 0; i < 5; i++) {
-		for (j = 0; j < 5; j++) {
-			if (tmp == (data_high[i] << 4 | data_low[j])) {
-				b43_phy_write(dev, 0x0069,
-					      (i - j) << 8 | 0x00C0);
-				return;
-			}
-		}
-	}
-}
-
-static void aphy_channel_switch(struct b43_wldev *dev, unsigned int channel)
-{
-	u16 freq, r8, tmp;
-
-	freq = channel2freq_a(channel);
-
-	r8 = b43_radio_read16(dev, 0x0008);
-	b43_write16(dev, 0x03F0, freq);
-	b43_radio_write16(dev, 0x0008, r8);
-
-	//TODO: write max channel TX power? to Radio 0x2D
-	tmp = b43_radio_read16(dev, 0x002E);
-	tmp &= 0x0080;
-	//TODO: OR tmp with the Power out estimation for this channel?
-	b43_radio_write16(dev, 0x002E, tmp);
-
-	if (freq >= 4920 && freq <= 5500) {
-		/*
-		 * r8 = (((freq * 15 * 0xE1FC780F) >> 32) / 29) & 0x0F;
-		 *    = (freq * 0.025862069
-		 */
-		r8 = 3 * freq / 116;	/* is equal to r8 = freq * 0.025862 */
-	}
-	b43_radio_write16(dev, 0x0007, (r8 << 4) | r8);
-	b43_radio_write16(dev, 0x0020, (r8 << 4) | r8);
-	b43_radio_write16(dev, 0x0021, (r8 << 4) | r8);
-	b43_radio_maskset(dev, 0x0022, 0x000F, (r8 << 4));
-	b43_radio_write16(dev, 0x002A, (r8 << 4));
-	b43_radio_write16(dev, 0x002B, (r8 << 4));
-	b43_radio_maskset(dev, 0x0008, 0x00F0, (r8 << 4));
-	b43_radio_maskset(dev, 0x0029, 0xFF0F, 0x00B0);
-	b43_radio_write16(dev, 0x0035, 0x00AA);
-	b43_radio_write16(dev, 0x0036, 0x0085);
-	b43_radio_maskset(dev, 0x003A, 0xFF20, freq_r3A_value(freq));
-	b43_radio_mask(dev, 0x003D, 0x00FF);
-	b43_radio_maskset(dev, 0x0081, 0xFF7F, 0x0080);
-	b43_radio_mask(dev, 0x0035, 0xFFEF);
-	b43_radio_maskset(dev, 0x0035, 0xFFEF, 0x0010);
-	b43_radio_set_tx_iq(dev);
-	//TODO: TSSI2dbm workaround
-//FIXME	b43_phy_xmitpower(dev);
-}
-
-static void b43_radio_init2060(struct b43_wldev *dev)
-{
-	b43_radio_write16(dev, 0x0004, 0x00C0);
-	b43_radio_write16(dev, 0x0005, 0x0008);
-	b43_radio_write16(dev, 0x0009, 0x0040);
-	b43_radio_write16(dev, 0x0005, 0x00AA);
-	b43_radio_write16(dev, 0x0032, 0x008F);
-	b43_radio_write16(dev, 0x0006, 0x008F);
-	b43_radio_write16(dev, 0x0034, 0x008F);
-	b43_radio_write16(dev, 0x002C, 0x0007);
-	b43_radio_write16(dev, 0x0082, 0x0080);
-	b43_radio_write16(dev, 0x0080, 0x0000);
-	b43_radio_write16(dev, 0x003F, 0x00DA);
-	b43_radio_mask(dev, 0x0005, ~0x0008);
-	b43_radio_mask(dev, 0x0081, ~0x0010);
-	b43_radio_mask(dev, 0x0081, ~0x0020);
-	b43_radio_mask(dev, 0x0081, ~0x0020);
-	msleep(1);		/* delay 400usec */
-
-	b43_radio_maskset(dev, 0x0081, ~0x0020, 0x0010);
-	msleep(1);		/* delay 400usec */
-
-	b43_radio_maskset(dev, 0x0005, ~0x0008, 0x0008);
-	b43_radio_mask(dev, 0x0085, ~0x0010);
-	b43_radio_mask(dev, 0x0005, ~0x0008);
-	b43_radio_mask(dev, 0x0081, ~0x0040);
-	b43_radio_maskset(dev, 0x0081, ~0x0040, 0x0040);
-	b43_radio_write16(dev, 0x0005,
-			  (b43_radio_read16(dev, 0x0081) & ~0x0008) | 0x0008);
-	b43_phy_write(dev, 0x0063, 0xDDC6);
-	b43_phy_write(dev, 0x0069, 0x07BE);
-	b43_phy_write(dev, 0x006A, 0x0000);
-
-	aphy_channel_switch(dev, dev->phy.ops->get_default_chan(dev));
-
-	msleep(1);
-}
-
-static void b43_phy_rssiagc(struct b43_wldev *dev, u8 enable)
-{
-	int i;
-
-	if (dev->phy.rev < 3) {
-		if (enable)
-			for (i = 0; i < B43_TAB_RSSIAGC1_SIZE; i++) {
-				b43_ofdmtab_write16(dev,
-					B43_OFDMTAB_LNAHPFGAIN1, i, 0xFFF8);
-				b43_ofdmtab_write16(dev,
-					B43_OFDMTAB_WRSSI, i, 0xFFF8);
-			}
-		else
-			for (i = 0; i < B43_TAB_RSSIAGC1_SIZE; i++) {
-				b43_ofdmtab_write16(dev,
-					B43_OFDMTAB_LNAHPFGAIN1, i, b43_tab_rssiagc1[i]);
-				b43_ofdmtab_write16(dev,
-					B43_OFDMTAB_WRSSI, i, b43_tab_rssiagc1[i]);
-			}
-	} else {
-		if (enable)
-			for (i = 0; i < B43_TAB_RSSIAGC1_SIZE; i++)
-				b43_ofdmtab_write16(dev,
-					B43_OFDMTAB_WRSSI, i, 0x0820);
-		else
-			for (i = 0; i < B43_TAB_RSSIAGC2_SIZE; i++)
-				b43_ofdmtab_write16(dev,
-					B43_OFDMTAB_WRSSI, i, b43_tab_rssiagc2[i]);
-	}
-}
-
-static void b43_phy_ww(struct b43_wldev *dev)
-{
-	u16 b, curr_s, best_s = 0xFFFF;
-	int i;
-
-	b43_phy_mask(dev, B43_PHY_CRS0, ~B43_PHY_CRS0_EN);
-	b43_phy_set(dev, B43_PHY_OFDM(0x1B), 0x1000);
-	b43_phy_maskset(dev, B43_PHY_OFDM(0x82), 0xF0FF, 0x0300);
-	b43_radio_set(dev, 0x0009, 0x0080);
-	b43_radio_maskset(dev, 0x0012, 0xFFFC, 0x0002);
-	b43_wa_initgains(dev);
-	b43_phy_write(dev, B43_PHY_OFDM(0xBA), 0x3ED5);
-	b = b43_phy_read(dev, B43_PHY_PWRDOWN);
-	b43_phy_write(dev, B43_PHY_PWRDOWN, (b & 0xFFF8) | 0x0005);
-	b43_radio_set(dev, 0x0004, 0x0004);
-	for (i = 0x10; i <= 0x20; i++) {
-		b43_radio_write16(dev, 0x0013, i);
-		curr_s = b43_phy_read(dev, B43_PHY_OTABLEQ) & 0x00FF;
-		if (!curr_s) {
-			best_s = 0x0000;
-			break;
-		} else if (curr_s >= 0x0080)
-			curr_s = 0x0100 - curr_s;
-		if (curr_s < best_s)
-			best_s = curr_s;
-	}
-	b43_phy_write(dev, B43_PHY_PWRDOWN, b);
-	b43_radio_mask(dev, 0x0004, 0xFFFB);
-	b43_radio_write16(dev, 0x0013, best_s);
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC1_R1, 0, 0xFFEC);
-	b43_phy_write(dev, B43_PHY_OFDM(0xB7), 0x1E80);
-	b43_phy_write(dev, B43_PHY_OFDM(0xB6), 0x1C00);
-	b43_phy_write(dev, B43_PHY_OFDM(0xB5), 0x0EC0);
-	b43_phy_write(dev, B43_PHY_OFDM(0xB2), 0x00C0);
-	b43_phy_write(dev, B43_PHY_OFDM(0xB9), 0x1FFF);
-	b43_phy_maskset(dev, B43_PHY_OFDM(0xBB), 0xF000, 0x0053);
-	b43_phy_maskset(dev, B43_PHY_OFDM61, 0xFE1F, 0x0120);
-	b43_phy_maskset(dev, B43_PHY_OFDM(0x13), 0x0FFF, 0x3000);
-	b43_phy_maskset(dev, B43_PHY_OFDM(0x14), 0x0FFF, 0x3000);
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC1, 6, 0x0017);
-	for (i = 0; i < 6; i++)
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC1, i, 0x000F);
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC1, 0x0D, 0x000E);
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC1, 0x0E, 0x0011);
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC1, 0x0F, 0x0013);
-	b43_phy_write(dev, B43_PHY_OFDM(0x33), 0x5030);
-	b43_phy_set(dev, B43_PHY_CRS0, B43_PHY_CRS0_EN);
-}
-
-static void hardware_pctl_init_aphy(struct b43_wldev *dev)
-{
-	//TODO
-}
-
-void b43_phy_inita(struct b43_wldev *dev)
-{
-	struct b43_phy *phy = &dev->phy;
-
-	/* This lowlevel A-PHY init is also called from G-PHY init.
-	 * So we must not access phy->a, if called from G-PHY code.
-	 */
-	B43_WARN_ON((phy->type != B43_PHYTYPE_A) &&
-		    (phy->type != B43_PHYTYPE_G));
-
-	might_sleep();
-
-	if (phy->rev >= 6) {
-		if (phy->type == B43_PHYTYPE_A)
-			b43_phy_mask(dev, B43_PHY_OFDM(0x1B), ~0x1000);
-		if (b43_phy_read(dev, B43_PHY_ENCORE) & B43_PHY_ENCORE_EN)
-			b43_phy_set(dev, B43_PHY_ENCORE, 0x0010);
-		else
-			b43_phy_mask(dev, B43_PHY_ENCORE, ~0x1010);
-	}
-
-	b43_wa_all(dev);
-
-	if (phy->type == B43_PHYTYPE_A) {
-		if (phy->gmode && (phy->rev < 3))
-			b43_phy_set(dev, 0x0034, 0x0001);
-		b43_phy_rssiagc(dev, 0);
-
-		b43_phy_set(dev, B43_PHY_CRS0, B43_PHY_CRS0_EN);
-
-		b43_radio_init2060(dev);
-
-		if ((dev->dev->board_vendor == SSB_BOARDVENDOR_BCM) &&
-		    ((dev->dev->board_type == SSB_BOARD_BU4306) ||
-		     (dev->dev->board_type == SSB_BOARD_BU4309))) {
-			; //TODO: A PHY LO
-		}
-
-		if (phy->rev >= 3)
-			b43_phy_ww(dev);
-
-		hardware_pctl_init_aphy(dev);
-
-		//TODO: radar detection
-	}
-
-	if ((phy->type == B43_PHYTYPE_G) &&
-	    (dev->dev->bus_sprom->boardflags_lo & B43_BFL_PACTRL)) {
-		b43_phy_maskset(dev, B43_PHY_OFDM(0x6E), 0xE000, 0x3CF);
-	}
-}
-
-/* Initialise the TSSI->dBm lookup table */
-static int b43_aphy_init_tssi2dbm_table(struct b43_wldev *dev)
-{
-	struct b43_phy *phy = &dev->phy;
-	struct b43_phy_a *aphy = phy->a;
-	s16 pab0, pab1, pab2;
-
-	pab0 = (s16) (dev->dev->bus_sprom->pa1b0);
-	pab1 = (s16) (dev->dev->bus_sprom->pa1b1);
-	pab2 = (s16) (dev->dev->bus_sprom->pa1b2);
-
-	if (pab0 != 0 && pab1 != 0 && pab2 != 0 &&
-	    pab0 != -1 && pab1 != -1 && pab2 != -1) {
-		/* The pabX values are set in SPROM. Use them. */
-		if ((s8) dev->dev->bus_sprom->itssi_a != 0 &&
-		    (s8) dev->dev->bus_sprom->itssi_a != -1)
-			aphy->tgt_idle_tssi =
-			    (s8) (dev->dev->bus_sprom->itssi_a);
-		else
-			aphy->tgt_idle_tssi = 62;
-		aphy->tssi2dbm = b43_generate_dyn_tssi2dbm_tab(dev, pab0,
-							       pab1, pab2);
-		if (!aphy->tssi2dbm)
-			return -ENOMEM;
-	} else {
-		/* pabX values not set in SPROM,
-		 * but APHY needs a generated table. */
-		aphy->tssi2dbm = NULL;
-		b43err(dev->wl, "Could not generate tssi2dBm "
-		       "table (wrong SPROM info)!\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-static int b43_aphy_op_allocate(struct b43_wldev *dev)
-{
-	struct b43_phy_a *aphy;
-	int err;
-
-	aphy = kzalloc(sizeof(*aphy), GFP_KERNEL);
-	if (!aphy)
-		return -ENOMEM;
-	dev->phy.a = aphy;
-
-	err = b43_aphy_init_tssi2dbm_table(dev);
-	if (err)
-		goto err_free_aphy;
-
-	return 0;
-
-err_free_aphy:
-	kfree(aphy);
-	dev->phy.a = NULL;
-
-	return err;
-}
-
-static void b43_aphy_op_prepare_structs(struct b43_wldev *dev)
-{
-	struct b43_phy *phy = &dev->phy;
-	struct b43_phy_a *aphy = phy->a;
-	const void *tssi2dbm;
-	int tgt_idle_tssi;
-
-	/* tssi2dbm table is constant, so it is initialized at alloc time.
-	 * Save a copy of the pointer. */
-	tssi2dbm = aphy->tssi2dbm;
-	tgt_idle_tssi = aphy->tgt_idle_tssi;
-
-	/* Zero out the whole PHY structure. */
-	memset(aphy, 0, sizeof(*aphy));
-
-	aphy->tssi2dbm = tssi2dbm;
-	aphy->tgt_idle_tssi = tgt_idle_tssi;
-
-	//TODO init struct b43_phy_a
-
-}
-
-static void b43_aphy_op_free(struct b43_wldev *dev)
-{
-	struct b43_phy *phy = &dev->phy;
-	struct b43_phy_a *aphy = phy->a;
-
-	kfree(aphy->tssi2dbm);
-	aphy->tssi2dbm = NULL;
-
-	kfree(aphy);
-	dev->phy.a = NULL;
-}
-
-static int b43_aphy_op_init(struct b43_wldev *dev)
-{
-	b43_phy_inita(dev);
-
-	return 0;
-}
-
-static inline u16 adjust_phyreg(struct b43_wldev *dev, u16 offset)
-{
-	/* OFDM registers are base-registers for the A-PHY. */
-	if ((offset & B43_PHYROUTE) == B43_PHYROUTE_OFDM_GPHY) {
-		offset &= ~B43_PHYROUTE;
-		offset |= B43_PHYROUTE_BASE;
-	}
-
-#if B43_DEBUG
-	if ((offset & B43_PHYROUTE) == B43_PHYROUTE_EXT_GPHY) {
-		/* Ext-G registers are only available on G-PHYs */
-		b43err(dev->wl, "Invalid EXT-G PHY access at "
-		       "0x%04X on A-PHY\n", offset);
-		dump_stack();
-	}
-	if ((offset & B43_PHYROUTE) == B43_PHYROUTE_N_BMODE) {
-		/* N-BMODE registers are only available on N-PHYs */
-		b43err(dev->wl, "Invalid N-BMODE PHY access at "
-		       "0x%04X on A-PHY\n", offset);
-		dump_stack();
-	}
-#endif /* B43_DEBUG */
-
-	return offset;
-}
-
-static u16 b43_aphy_op_read(struct b43_wldev *dev, u16 reg)
-{
-	reg = adjust_phyreg(dev, reg);
-	b43_write16f(dev, B43_MMIO_PHY_CONTROL, reg);
-	return b43_read16(dev, B43_MMIO_PHY_DATA);
-}
-
-static void b43_aphy_op_write(struct b43_wldev *dev, u16 reg, u16 value)
-{
-	reg = adjust_phyreg(dev, reg);
-	b43_write16f(dev, B43_MMIO_PHY_CONTROL, reg);
-	b43_write16(dev, B43_MMIO_PHY_DATA, value);
-}
-
-static u16 b43_aphy_op_radio_read(struct b43_wldev *dev, u16 reg)
-{
-	/* Register 1 is a 32-bit register. */
-	B43_WARN_ON(reg == 1);
-	/* A-PHY needs 0x40 for read access */
-	reg |= 0x40;
-
-	b43_write16(dev, B43_MMIO_RADIO_CONTROL, reg);
-	return b43_read16(dev, B43_MMIO_RADIO_DATA_LOW);
-}
-
-static void b43_aphy_op_radio_write(struct b43_wldev *dev, u16 reg, u16 value)
-{
-	/* Register 1 is a 32-bit register. */
-	B43_WARN_ON(reg == 1);
-
-	b43_write16(dev, B43_MMIO_RADIO_CONTROL, reg);
-	b43_write16(dev, B43_MMIO_RADIO_DATA_LOW, value);
-}
-
-static bool b43_aphy_op_supports_hwpctl(struct b43_wldev *dev)
-{
-	return (dev->phy.rev >= 5);
-}
-
-static void b43_aphy_op_software_rfkill(struct b43_wldev *dev,
-					bool blocked)
-{
-	struct b43_phy *phy = &dev->phy;
-
-	if (!blocked) {
-		if (phy->radio_on)
-			return;
-		b43_radio_write16(dev, 0x0004, 0x00C0);
-		b43_radio_write16(dev, 0x0005, 0x0008);
-		b43_phy_mask(dev, 0x0010, 0xFFF7);
-		b43_phy_mask(dev, 0x0011, 0xFFF7);
-		b43_radio_init2060(dev);
-	} else {
-		b43_radio_write16(dev, 0x0004, 0x00FF);
-		b43_radio_write16(dev, 0x0005, 0x00FB);
-		b43_phy_set(dev, 0x0010, 0x0008);
-		b43_phy_set(dev, 0x0011, 0x0008);
-	}
-}
-
-static int b43_aphy_op_switch_channel(struct b43_wldev *dev,
-				      unsigned int new_channel)
-{
-	if (new_channel > 200)
-		return -EINVAL;
-	aphy_channel_switch(dev, new_channel);
-
-	return 0;
-}
-
-static unsigned int b43_aphy_op_get_default_chan(struct b43_wldev *dev)
-{
-	return 36; /* Default to channel 36 */
-}
-
-static void b43_aphy_op_set_rx_antenna(struct b43_wldev *dev, int antenna)
-{//TODO
-	struct b43_phy *phy = &dev->phy;
-	u16 tmp;
-	int autodiv = 0;
-
-	if (antenna == B43_ANTENNA_AUTO0 || antenna == B43_ANTENNA_AUTO1)
-		autodiv = 1;
-
-	b43_hf_write(dev, b43_hf_read(dev) & ~B43_HF_ANTDIVHELP);
-
-	b43_phy_maskset(dev, B43_PHY_BBANDCFG, ~B43_PHY_BBANDCFG_RXANT,
-			(autodiv ? B43_ANTENNA_AUTO1 : antenna) <<
-			B43_PHY_BBANDCFG_RXANT_SHIFT);
-
-	if (autodiv) {
-		tmp = b43_phy_read(dev, B43_PHY_ANTDWELL);
-		if (antenna == B43_ANTENNA_AUTO1)
-			tmp &= ~B43_PHY_ANTDWELL_AUTODIV1;
-		else
-			tmp |= B43_PHY_ANTDWELL_AUTODIV1;
-		b43_phy_write(dev, B43_PHY_ANTDWELL, tmp);
-	}
-	if (phy->rev < 3)
-		b43_phy_maskset(dev, B43_PHY_ANTDWELL, 0xFF00, 0x24);
-	else {
-		b43_phy_set(dev, B43_PHY_OFDM61, 0x10);
-		if (phy->rev == 3) {
-			b43_phy_write(dev, B43_PHY_CLIPPWRDOWNT, 0x1D);
-			b43_phy_write(dev, B43_PHY_ADIVRELATED, 8);
-		} else {
-			b43_phy_write(dev, B43_PHY_CLIPPWRDOWNT, 0x3A);
-			b43_phy_maskset(dev, B43_PHY_ADIVRELATED, 0xFF00, 8);
-		}
-	}
-
-	b43_hf_write(dev, b43_hf_read(dev) | B43_HF_ANTDIVHELP);
-}
-
-static void b43_aphy_op_adjust_txpower(struct b43_wldev *dev)
-{//TODO
-}
-
-static enum b43_txpwr_result b43_aphy_op_recalc_txpower(struct b43_wldev *dev,
-							bool ignore_tssi)
-{//TODO
-	return B43_TXPWR_RES_DONE;
-}
-
-static void b43_aphy_op_pwork_15sec(struct b43_wldev *dev)
-{//TODO
-}
-
-static void b43_aphy_op_pwork_60sec(struct b43_wldev *dev)
-{//TODO
-}
-
-static const struct b43_phy_operations b43_phyops_a = {
-	.allocate		= b43_aphy_op_allocate,
-	.free			= b43_aphy_op_free,
-	.prepare_structs	= b43_aphy_op_prepare_structs,
-	.init			= b43_aphy_op_init,
-	.phy_read		= b43_aphy_op_read,
-	.phy_write		= b43_aphy_op_write,
-	.radio_read		= b43_aphy_op_radio_read,
-	.radio_write		= b43_aphy_op_radio_write,
-	.supports_hwpctl	= b43_aphy_op_supports_hwpctl,
-	.software_rfkill	= b43_aphy_op_software_rfkill,
-	.switch_analog		= b43_phyop_switch_analog_generic,
-	.switch_channel		= b43_aphy_op_switch_channel,
-	.get_default_chan	= b43_aphy_op_get_default_chan,
-	.set_rx_antenna		= b43_aphy_op_set_rx_antenna,
-	.recalc_txpower		= b43_aphy_op_recalc_txpower,
-	.adjust_txpower		= b43_aphy_op_adjust_txpower,
-	.pwork_15sec		= b43_aphy_op_pwork_15sec,
-	.pwork_60sec		= b43_aphy_op_pwork_60sec,
-};
diff --git a/drivers/net/wireless/broadcom/b43/phy_a.h b/drivers/net/wireless/broadcom/b43/phy_a.h
index f7d0d929a374..0a92d01c21f9 100644
--- a/drivers/net/wireless/broadcom/b43/phy_a.h
+++ b/drivers/net/wireless/broadcom/b43/phy_a.h
@@ -101,26 +101,4 @@ u32 b43_ofdmtab_read32(struct b43_wldev *dev, u16 table, u16 offset);
 void b43_ofdmtab_write32(struct b43_wldev *dev, u16 table,
 			 u16 offset, u32 value);
 
-
-struct b43_phy_a {
-	/* Pointer to the table used to convert a
-	 * TSSI value to dBm-Q5.2 */
-	const s8 *tssi2dbm;
-	/* Target idle TSSI */
-	int tgt_idle_tssi;
-	/* Current idle TSSI */
-	int cur_idle_tssi;//FIXME value currently not set
-
-	/* A-PHY TX Power control value. */
-	u16 txpwr_offset;
-
-	//TODO lots of missing stuff
-};
-
-/**
- * b43_phy_inita - Lowlevel A-PHY init routine.
- * This is _only_ used by the G-PHY code.
- */
-void b43_phy_inita(struct b43_wldev *dev);
-
 #endif /* LINUX_B43_PHY_A_H_ */
diff --git a/drivers/net/wireless/broadcom/b43/phy_common.h b/drivers/net/wireless/broadcom/b43/phy_common.h
index 78d86526799e..ced054a9850c 100644
--- a/drivers/net/wireless/broadcom/b43/phy_common.h
+++ b/drivers/net/wireless/broadcom/b43/phy_common.h
@@ -190,7 +190,6 @@ struct b43_phy_operations {
 	void (*pwork_60sec)(struct b43_wldev *dev);
 };
 
-struct b43_phy_a;
 struct b43_phy_g;
 struct b43_phy_n;
 struct b43_phy_lp;
@@ -210,8 +209,6 @@ struct b43_phy {
 #else
 	union {
 #endif
-		/* A-PHY specific information */
-		struct b43_phy_a *a;
 		/* G-PHY specific information */
 		struct b43_phy_g *g;
 		/* N-PHY specific information */
diff --git a/drivers/net/wireless/broadcom/b43/phy_g.c b/drivers/net/wireless/broadcom/b43/phy_g.c
index 462310e6e88f..822dcaa8ace6 100644
--- a/drivers/net/wireless/broadcom/b43/phy_g.c
+++ b/drivers/net/wireless/broadcom/b43/phy_g.c
@@ -31,6 +31,7 @@
 #include "phy_common.h"
 #include "lo.h"
 #include "main.h"
+#include "wa.h"
 
 #include <linux/bitrev.h>
 #include <linux/slab.h>
@@ -1987,6 +1988,25 @@ static void b43_phy_init_pctl(struct b43_wldev *dev)
 	b43_shm_clear_tssi(dev);
 }
 
+static void b43_phy_inita(struct b43_wldev *dev)
+{
+	struct b43_phy *phy = &dev->phy;
+
+	might_sleep();
+
+	if (phy->rev >= 6) {
+		if (b43_phy_read(dev, B43_PHY_ENCORE) & B43_PHY_ENCORE_EN)
+			b43_phy_set(dev, B43_PHY_ENCORE, 0x0010);
+		else
+			b43_phy_mask(dev, B43_PHY_ENCORE, ~0x1010);
+	}
+
+	b43_wa_all(dev);
+
+	if (dev->dev->bus_sprom->boardflags_lo & B43_BFL_PACTRL)
+		b43_phy_maskset(dev, B43_PHY_OFDM(0x6E), 0xE000, 0x3CF);
+}
+
 static void b43_phy_initg(struct b43_wldev *dev)
 {
 	struct b43_phy *phy = &dev->phy;
@@ -2150,11 +2170,6 @@ static void default_radio_attenuation(struct b43_wldev *dev,
 		}
 	}
 
-	if (phy->type == B43_PHYTYPE_A) {
-		rf->att = 0x60;
-		return;
-	}
-
 	switch (phy->radio_ver) {
 	case 0x2053:
 		switch (phy->radio_rev) {
diff --git a/drivers/net/wireless/broadcom/b43/wa.c b/drivers/net/wireless/broadcom/b43/wa.c
index c218c08fb2f5..0e96c08d1e17 100644
--- a/drivers/net/wireless/broadcom/b43/wa.c
+++ b/drivers/net/wireless/broadcom/b43/wa.c
@@ -30,33 +30,6 @@
 #include "phy_common.h"
 #include "wa.h"
 
-static void b43_wa_papd(struct b43_wldev *dev)
-{
-	u16 backup;
-
-	backup = b43_ofdmtab_read16(dev, B43_OFDMTAB_PWRDYN2, 0);
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_PWRDYN2, 0, 7);
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_UNKNOWN_APHY, 0, 0);
-	b43_dummy_transmission(dev, true, true);
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_PWRDYN2, 0, backup);
-}
-
-static void b43_wa_auxclipthr(struct b43_wldev *dev)
-{
-	b43_phy_write(dev, B43_PHY_OFDM(0x8E), 0x3800);
-}
-
-static void b43_wa_afcdac(struct b43_wldev *dev)
-{
-	b43_phy_write(dev, 0x0035, 0x03FF);
-	b43_phy_write(dev, 0x0036, 0x0400);
-}
-
-static void b43_wa_txdc_offset(struct b43_wldev *dev)
-{
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_DC, 0, 0x0051);
-}
-
 void b43_wa_initgains(struct b43_wldev *dev)
 {
 	struct b43_phy *phy = &dev->phy;
@@ -81,41 +54,6 @@ void b43_wa_initgains(struct b43_wldev *dev)
 		b43_phy_write(dev, 0x00BA, 0x3ED5);
 }
 
-static void b43_wa_divider(struct b43_wldev *dev)
-{
-	b43_phy_mask(dev, 0x002B, ~0x0100);
-	b43_phy_write(dev, 0x008E, 0x58C1);
-}
-
-static void b43_wa_gt(struct b43_wldev *dev) /* Gain table. */
-{
-	if (dev->phy.rev <= 2) {
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN2, 0, 15);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN2, 1, 31);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN2, 2, 42);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN2, 3, 48);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN2, 4, 58);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 0, 19);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 1, 19);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 2, 19);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 3, 19);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 4, 21);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 5, 21);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 6, 25);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN1, 0, 3);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN1, 1, 3);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN1, 2, 7);
-	} else {
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 0, 19);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 1, 19);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 2, 19);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 3, 19);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 4, 21);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 5, 21);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_GAIN0, 6, 25);
-	}
-}
-
 static void b43_wa_rssi_lt(struct b43_wldev *dev) /* RSSI lookup table */
 {
 	int i;
@@ -133,15 +71,11 @@ static void b43_wa_rssi_lt(struct b43_wldev *dev) /* RSSI lookup table */
 
 static void b43_wa_analog(struct b43_wldev *dev)
 {
-	struct b43_phy *phy = &dev->phy;
 	u16 ofdmrev;
 
 	ofdmrev = b43_phy_read(dev, B43_PHY_VERSION_OFDM) & B43_PHYVER_VERSION;
 	if (ofdmrev > 2) {
-		if (phy->type == B43_PHYTYPE_A)
-			b43_phy_write(dev, B43_PHY_PWRDOWN, 0x1808);
-		else
-			b43_phy_write(dev, B43_PHY_PWRDOWN, 0x1000);
+		b43_phy_write(dev, B43_PHY_PWRDOWN, 0x1000);
 	} else {
 		b43_ofdmtab_write16(dev, B43_OFDMTAB_DAC, 3, 0x1044);
 		b43_ofdmtab_write16(dev, B43_OFDMTAB_DAC, 4, 0x7201);
@@ -149,26 +83,13 @@ static void b43_wa_analog(struct b43_wldev *dev)
 	}
 }
 
-static void b43_wa_dac(struct b43_wldev *dev)
-{
-	if (dev->phy.analog == 1)
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_DAC, 1,
-			(b43_ofdmtab_read16(dev, B43_OFDMTAB_DAC, 1) & ~0x0034) | 0x0008);
-	else
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_DAC, 1,
-			(b43_ofdmtab_read16(dev, B43_OFDMTAB_DAC, 1) & ~0x0078) | 0x0010);
-}
-
 static void b43_wa_fft(struct b43_wldev *dev) /* Fine frequency table */
 {
 	int i;
 
-	if (dev->phy.type == B43_PHYTYPE_A)
-		for (i = 0; i < B43_TAB_FINEFREQA_SIZE; i++)
-			b43_ofdmtab_write16(dev, B43_OFDMTAB_DACRFPABB, i, b43_tab_finefreqa[i]);
-	else
-		for (i = 0; i < B43_TAB_FINEFREQG_SIZE; i++)
-			b43_ofdmtab_write16(dev, B43_OFDMTAB_DACRFPABB, i, b43_tab_finefreqg[i]);
+	for (i = 0; i < B43_TAB_FINEFREQG_SIZE; i++)
+		b43_ofdmtab_write16(dev, B43_OFDMTAB_DACRFPABB, i,
+				    b43_tab_finefreqg[i]);
 }
 
 static void b43_wa_nft(struct b43_wldev *dev) /* Noise figure table */
@@ -176,21 +97,14 @@ static void b43_wa_nft(struct b43_wldev *dev) /* Noise figure table */
 	struct b43_phy *phy = &dev->phy;
 	int i;
 
-	if (phy->type == B43_PHYTYPE_A) {
-		if (phy->rev == 2)
-			for (i = 0; i < B43_TAB_NOISEA2_SIZE; i++)
-				b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC2, i, b43_tab_noisea2[i]);
-		else
-			for (i = 0; i < B43_TAB_NOISEA3_SIZE; i++)
-				b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC2, i, b43_tab_noisea3[i]);
-	} else {
-		if (phy->rev == 1)
-			for (i = 0; i < B43_TAB_NOISEG1_SIZE; i++)
-				b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC2, i, b43_tab_noiseg1[i]);
-		else
-			for (i = 0; i < B43_TAB_NOISEG2_SIZE; i++)
-				b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC2, i, b43_tab_noiseg2[i]);
-	}
+	if (phy->rev == 1)
+		for (i = 0; i < B43_TAB_NOISEG1_SIZE; i++)
+			b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC2, i,
+					    b43_tab_noiseg1[i]);
+	else
+		for (i = 0; i < B43_TAB_NOISEG2_SIZE; i++)
+			b43_ofdmtab_write16(dev, B43_OFDMTAB_AGC2, i,
+					    b43_tab_noiseg2[i]);
 }
 
 static void b43_wa_rt(struct b43_wldev *dev) /* Rotor table */
@@ -201,14 +115,6 @@ static void b43_wa_rt(struct b43_wldev *dev) /* Rotor table */
 		b43_ofdmtab_write32(dev, B43_OFDMTAB_ROTOR, i, b43_tab_rotor[i]);
 }
 
-static void b43_write_null_nst(struct b43_wldev *dev)
-{
-	int i;
-
-	for (i = 0; i < B43_TAB_NOISESCALE_SIZE; i++)
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_NOISESCALE, i, 0);
-}
-
 static void b43_write_nst(struct b43_wldev *dev, const u16 *nst)
 {
 	int i;
@@ -221,24 +127,13 @@ static void b43_wa_nst(struct b43_wldev *dev) /* Noise scale table */
 {
 	struct b43_phy *phy = &dev->phy;
 
-	if (phy->type == B43_PHYTYPE_A) {
-		if (phy->rev <= 1)
-			b43_write_null_nst(dev);
-		else if (phy->rev == 2)
-			b43_write_nst(dev, b43_tab_noisescalea2);
-		else if (phy->rev == 3)
-			b43_write_nst(dev, b43_tab_noisescalea3);
-		else
+	if (phy->rev >= 6) {
+		if (b43_phy_read(dev, B43_PHY_ENCORE) & B43_PHY_ENCORE_EN)
 			b43_write_nst(dev, b43_tab_noisescaleg3);
+		else
+			b43_write_nst(dev, b43_tab_noisescaleg2);
 	} else {
-		if (phy->rev >= 6) {
-			if (b43_phy_read(dev, B43_PHY_ENCORE) & B43_PHY_ENCORE_EN)
-				b43_write_nst(dev, b43_tab_noisescaleg3);
-			else
-				b43_write_nst(dev, b43_tab_noisescaleg2);
-		} else {
-			b43_write_nst(dev, b43_tab_noisescaleg1);
-		}
+		b43_write_nst(dev, b43_tab_noisescaleg1);
 	}
 }
 
@@ -251,41 +146,13 @@ static void b43_wa_art(struct b43_wldev *dev) /* ADV retard table */
 				i, b43_tab_retard[i]);
 }
 
-static void b43_wa_txlna_gain(struct b43_wldev *dev)
-{
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_DC, 13, 0x0000);
-}
-
-static void b43_wa_crs_reset(struct b43_wldev *dev)
-{
-	b43_phy_write(dev, 0x002C, 0x0064);
-}
-
-static void b43_wa_2060txlna_gain(struct b43_wldev *dev)
-{
-	b43_hf_write(dev, b43_hf_read(dev) |
-			 B43_HF_2060W);
-}
-
-static void b43_wa_lms(struct b43_wldev *dev)
-{
-	b43_phy_maskset(dev, 0x0055, 0xFFC0, 0x0004);
-}
-
-static void b43_wa_mixedsignal(struct b43_wldev *dev)
-{
-	b43_ofdmtab_write16(dev, B43_OFDMTAB_DAC, 1, 3);
-}
-
 static void b43_wa_msst(struct b43_wldev *dev) /* Min sigma square table */
 {
 	struct b43_phy *phy = &dev->phy;
 	int i;
 	const u16 *tab;
 
-	if (phy->type == B43_PHYTYPE_A) {
-		tab = b43_tab_sigmasqr1;
-	} else if (phy->type == B43_PHYTYPE_G) {
+	if (phy->type == B43_PHYTYPE_G) {
 		tab = b43_tab_sigmasqr2;
 	} else {
 		B43_WARN_ON(1);
@@ -298,13 +165,6 @@ static void b43_wa_msst(struct b43_wldev *dev) /* Min sigma square table */
 	}
 }
 
-static void b43_wa_iqadc(struct b43_wldev *dev)
-{
-	if (dev->phy.analog == 4)
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_DAC, 0,
-			b43_ofdmtab_read16(dev, B43_OFDMTAB_DAC, 0) & ~0xF000);
-}
-
 static void b43_wa_crs_ed(struct b43_wldev *dev)
 {
 	struct b43_phy *phy = &dev->phy;
@@ -450,38 +310,6 @@ static void b43_wa_cpll_nonpilot(struct b43_wldev *dev)
 	b43_ofdmtab_write16(dev, B43_OFDMTAB_UNKNOWN_11, 1, 0);
 }
 
-static void b43_wa_rssi_adc(struct b43_wldev *dev)
-{
-	if (dev->phy.analog == 4)
-		b43_phy_write(dev, 0x00DC, 0x7454);
-}
-
-static void b43_wa_boards_a(struct b43_wldev *dev)
-{
-	if (dev->dev->board_vendor == SSB_BOARDVENDOR_BCM &&
-	    dev->dev->board_type == SSB_BOARD_BU4306 &&
-	    dev->dev->board_rev < 0x30) {
-		b43_phy_write(dev, 0x0010, 0xE000);
-		b43_phy_write(dev, 0x0013, 0x0140);
-		b43_phy_write(dev, 0x0014, 0x0280);
-	} else {
-		if (dev->dev->board_type == SSB_BOARD_MP4318 &&
-		    dev->dev->board_rev < 0x20) {
-			b43_phy_write(dev, 0x0013, 0x0210);
-			b43_phy_write(dev, 0x0014, 0x0840);
-		} else {
-			b43_phy_write(dev, 0x0013, 0x0140);
-			b43_phy_write(dev, 0x0014, 0x0280);
-		}
-		if (dev->phy.rev <= 4)
-			b43_phy_write(dev, 0x0010, 0xE000);
-		else
-			b43_phy_write(dev, 0x0010, 0x2000);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_DC, 1, 0x0039);
-		b43_ofdmtab_write16(dev, B43_OFDMTAB_UNKNOWN_APHY, 7, 0x0040);
-	}
-}
-
 static void b43_wa_boards_g(struct b43_wldev *dev)
 {
 	struct ssb_sprom *sprom = dev->dev->bus_sprom;
@@ -518,80 +346,7 @@ void b43_wa_all(struct b43_wldev *dev)
 {
 	struct b43_phy *phy = &dev->phy;
 
-	if (phy->type == B43_PHYTYPE_A) {
-		switch (phy->rev) {
-		case 2:
-			b43_wa_papd(dev);
-			b43_wa_auxclipthr(dev);
-			b43_wa_afcdac(dev);
-			b43_wa_txdc_offset(dev);
-			b43_wa_initgains(dev);
-			b43_wa_divider(dev);
-			b43_wa_gt(dev);
-			b43_wa_rssi_lt(dev);
-			b43_wa_analog(dev);
-			b43_wa_dac(dev);
-			b43_wa_fft(dev);
-			b43_wa_nft(dev);
-			b43_wa_rt(dev);
-			b43_wa_nst(dev);
-			b43_wa_art(dev);
-			b43_wa_txlna_gain(dev);
-			b43_wa_crs_reset(dev);
-			b43_wa_2060txlna_gain(dev);
-			b43_wa_lms(dev);
-			break;
-		case 3:
-			b43_wa_papd(dev);
-			b43_wa_mixedsignal(dev);
-			b43_wa_rssi_lt(dev);
-			b43_wa_txdc_offset(dev);
-			b43_wa_initgains(dev);
-			b43_wa_dac(dev);
-			b43_wa_nft(dev);
-			b43_wa_nst(dev);
-			b43_wa_msst(dev);
-			b43_wa_analog(dev);
-			b43_wa_gt(dev);
-			b43_wa_txpuoff_rxpuon(dev);
-			b43_wa_txlna_gain(dev);
-			break;
-		case 5:
-			b43_wa_iqadc(dev);
-		case 6:
-			b43_wa_papd(dev);
-			b43_wa_rssi_lt(dev);
-			b43_wa_txdc_offset(dev);
-			b43_wa_initgains(dev);
-			b43_wa_dac(dev);
-			b43_wa_nft(dev);
-			b43_wa_nst(dev);
-			b43_wa_msst(dev);
-			b43_wa_analog(dev);
-			b43_wa_gt(dev);
-			b43_wa_txpuoff_rxpuon(dev);
-			b43_wa_txlna_gain(dev);
-			break;
-		case 7:
-			b43_wa_iqadc(dev);
-			b43_wa_papd(dev);
-			b43_wa_rssi_lt(dev);
-			b43_wa_txdc_offset(dev);
-			b43_wa_initgains(dev);
-			b43_wa_dac(dev);
-			b43_wa_nft(dev);
-			b43_wa_nst(dev);
-			b43_wa_msst(dev);
-			b43_wa_analog(dev);
-			b43_wa_gt(dev);
-			b43_wa_txpuoff_rxpuon(dev);
-			b43_wa_txlna_gain(dev);
-			b43_wa_rssi_adc(dev);
-		default:
-			B43_WARN_ON(1);
-		}
-		b43_wa_boards_a(dev);
-	} else if (phy->type == B43_PHYTYPE_G) {
+	if (phy->type == B43_PHYTYPE_G) {
 		switch (phy->rev) {
 		case 1://XXX review rev1
 			b43_wa_crs_ed(dev);
diff --git a/drivers/net/wireless/broadcom/b43/xmit.c b/drivers/net/wireless/broadcom/b43/xmit.c
index f6201264de49..b068d5aeee24 100644
--- a/drivers/net/wireless/broadcom/b43/xmit.c
+++ b/drivers/net/wireless/broadcom/b43/xmit.c
@@ -205,7 +205,7 @@ static u16 b43_generate_tx_phy_ctl1(struct b43_wldev *dev, u8 bitrate)
 	return control;
 }
 
-static u8 b43_calc_fallback_rate(u8 bitrate)
+static u8 b43_calc_fallback_rate(u8 bitrate, int gmode)
 {
 	switch (bitrate) {
 	case B43_CCK_RATE_1MB:
@@ -216,8 +216,15 @@ static u8 b43_calc_fallback_rate(u8 bitrate)
 		return B43_CCK_RATE_2MB;
 	case B43_CCK_RATE_11MB:
 		return B43_CCK_RATE_5MB;
+	/*
+	 * Don't just fallback to CCK; it may be in 5GHz operation
+	 * and falling back to CCK won't work out very well.
+	 */
 	case B43_OFDM_RATE_6MB:
-		return B43_CCK_RATE_5MB;
+		if (gmode)
+			return B43_CCK_RATE_5MB;
+		else
+			return B43_OFDM_RATE_6MB;
 	case B43_OFDM_RATE_9MB:
 		return B43_OFDM_RATE_6MB;
 	case B43_OFDM_RATE_12MB:
@@ -438,7 +445,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 
 		rts_rate = rts_cts_rate ? rts_cts_rate->hw_value : B43_CCK_RATE_1MB;
 		rts_rate_ofdm = b43_is_ofdm_rate(rts_rate);
-		rts_rate_fb = b43_calc_fallback_rate(rts_rate);
+		rts_rate_fb = b43_calc_fallback_rate(rts_rate, phy->gmode);
 		rts_rate_fb_ofdm = b43_is_ofdm_rate(rts_rate_fb);
 
 		if (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
@@ -642,11 +649,7 @@ static s8 b43_rssinoise_postprocess(struct b43_wldev *dev, u8 in_rssi)
 	struct b43_phy *phy = &dev->phy;
 	s8 ret;
 
-	if (phy->type == B43_PHYTYPE_A) {
-		//TODO: Incomplete specs.
-		ret = 0;
-	} else
-		ret = b43_rssi_postprocess(dev, in_rssi, 0, 1, 1);
+	ret = b43_rssi_postprocess(dev, in_rssi, 0, 1, 1);
 
 	return ret;
 }
@@ -663,7 +666,6 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
 	u16 uninitialized_var(chanstat), uninitialized_var(mactime);
 	u32 uninitialized_var(macstat);
 	u16 chanid;
-	u16 phytype;
 	int padding, rate_idx;
 
 	memset(&status, 0, sizeof(status));
@@ -684,7 +686,6 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
 		chanstat = le16_to_cpu(rxhdr->format_351.channel);
 		break;
 	}
-	phytype = chanstat & B43_RX_CHAN_PHYTYPE;
 
 	if (unlikely(macstat & B43_RX_MAC_FCSERR)) {
 		dev->wl->ieee_stats.dot11FCSErrorCount++;
@@ -755,7 +756,6 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
 		else
 			status.signal = max(rxhdr->power0, rxhdr->power1);
 		break;
-	case B43_PHYTYPE_A:
 	case B43_PHYTYPE_B:
 	case B43_PHYTYPE_G:
 	case B43_PHYTYPE_LP:
@@ -802,14 +802,6 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
 
 	chanid = (chanstat & B43_RX_CHAN_ID) >> B43_RX_CHAN_ID_SHIFT;
 	switch (chanstat & B43_RX_CHAN_PHYTYPE) {
-	case B43_PHYTYPE_A:
-		status.band = NL80211_BAND_5GHZ;
-		B43_WARN_ON(1);
-		/* FIXME: We don't really know which value the "chanid" contains.
-		 *        So the following assignment might be wrong. */
-		status.freq =
-			ieee80211_channel_to_frequency(chanid, status.band);
-		break;
 	case B43_PHYTYPE_G:
 		status.band = NL80211_BAND_2GHZ;
 		/* Somewhere between 478.104 and 508.1084 firmware for G-PHY
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index c7550dab6a23..f549c25608d6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -166,41 +166,45 @@ int brcmf_sdiod_intr_register(struct brcmf_sdio_dev *sdiodev)
 		sdio_claim_irq(sdiodev->func[1], brcmf_sdiod_ib_irqhandler);
 		sdio_claim_irq(sdiodev->func[2], brcmf_sdiod_dummy_irqhandler);
 		sdio_release_host(sdiodev->func[1]);
+		sdiodev->sd_irq_requested = true;
 	}
 
 	return 0;
 }
 
-int brcmf_sdiod_intr_unregister(struct brcmf_sdio_dev *sdiodev)
+void brcmf_sdiod_intr_unregister(struct brcmf_sdio_dev *sdiodev)
 {
-	struct brcmfmac_sdio_pd *pdata;
 
-	brcmf_dbg(SDIO, "Entering\n");
+	brcmf_dbg(SDIO, "Entering oob=%d sd=%d\n",
+		  sdiodev->oob_irq_requested,
+		  sdiodev->sd_irq_requested);
 
-	pdata = &sdiodev->settings->bus.sdio;
-	if (pdata->oob_irq_supported) {
+	if (sdiodev->oob_irq_requested) {
+		struct brcmfmac_sdio_pd *pdata;
+
+		pdata = &sdiodev->settings->bus.sdio;
 		sdio_claim_host(sdiodev->func[1]);
 		brcmf_sdiod_regwb(sdiodev, SDIO_CCCR_BRCM_SEPINT, 0, NULL);
 		brcmf_sdiod_regwb(sdiodev, SDIO_CCCR_IENx, 0, NULL);
 		sdio_release_host(sdiodev->func[1]);
 
-		if (sdiodev->oob_irq_requested) {
-			sdiodev->oob_irq_requested = false;
-			if (sdiodev->irq_wake) {
-				disable_irq_wake(pdata->oob_irq_nr);
-				sdiodev->irq_wake = false;
-			}
-			free_irq(pdata->oob_irq_nr, &sdiodev->func[1]->dev);
-			sdiodev->irq_en = false;
+		sdiodev->oob_irq_requested = false;
+		if (sdiodev->irq_wake) {
+			disable_irq_wake(pdata->oob_irq_nr);
+			sdiodev->irq_wake = false;
 		}
-	} else {
+		free_irq(pdata->oob_irq_nr, &sdiodev->func[1]->dev);
+		sdiodev->irq_en = false;
+		sdiodev->oob_irq_requested = false;
+	}
+
+	if (sdiodev->sd_irq_requested) {
 		sdio_claim_host(sdiodev->func[1]);
 		sdio_release_irq(sdiodev->func[2]);
 		sdio_release_irq(sdiodev->func[1]);
 		sdio_release_host(sdiodev->func[1]);
+		sdiodev->sd_irq_requested = false;
 	}
-
-	return 0;
 }
 
 void brcmf_sdiod_change_state(struct brcmf_sdio_dev *sdiodev,
@@ -722,8 +726,10 @@ int brcmf_sdiod_recv_chain(struct brcmf_sdio_dev *sdiodev,
 			return -ENOMEM;
 		err = brcmf_sdiod_buffrw(sdiodev, SDIO_FUNC_2, false, addr,
 					 glom_skb);
-		if (err)
+		if (err) {
+			brcmu_pkt_buf_free_skb(glom_skb);
 			goto done;
+		}
 
 		skb_queue_walk(pktq, skb) {
 			memcpy(skb->data, glom_skb->data, skb->len);
@@ -1197,12 +1203,17 @@ static void brcmf_ops_sdio_remove(struct sdio_func *func)
 	brcmf_dbg(SDIO, "sdio device ID: 0x%04x\n", func->device);
 	brcmf_dbg(SDIO, "Function: %d\n", func->num);
 
-	if (func->num != 1)
-		return;
-
 	bus_if = dev_get_drvdata(&func->dev);
 	if (bus_if) {
 		sdiodev = bus_if->bus_priv.sdio;
+
+		/* start by unregistering irqs */
+		brcmf_sdiod_intr_unregister(sdiodev);
+
+		if (func->num != 1)
+			return;
+
+		/* only proceed with rest of cleanup if func 1 */
 		brcmf_sdiod_remove(sdiodev);
 
 		dev_set_drvdata(&sdiodev->func[1]->dev, NULL);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 62f475e31077..2628d5e12c64 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -541,6 +541,21 @@ brcmf_cfg80211_update_proto_addr_mode(struct wireless_dev *wdev)
 						ADDR_INDIRECT);
 }
 
+static int brcmf_get_first_free_bsscfgidx(struct brcmf_pub *drvr)
+{
+	int bsscfgidx;
+
+	for (bsscfgidx = 0; bsscfgidx < BRCMF_MAX_IFS; bsscfgidx++) {
+		/* bsscfgidx 1 is reserved for legacy P2P */
+		if (bsscfgidx == 1)
+			continue;
+		if (!drvr->iflist[bsscfgidx])
+			return bsscfgidx;
+	}
+
+	return -ENOMEM;
+}
+
 static int brcmf_cfg80211_request_ap_if(struct brcmf_if *ifp)
 {
 	struct brcmf_mbss_ssid_le mbss_ssid_le;
@@ -548,7 +563,7 @@ static int brcmf_cfg80211_request_ap_if(struct brcmf_if *ifp)
 	int err;
 
 	memset(&mbss_ssid_le, 0, sizeof(mbss_ssid_le));
-	bsscfgidx = brcmf_get_next_free_bsscfgidx(ifp->drvr);
+	bsscfgidx = brcmf_get_first_free_bsscfgidx(ifp->drvr);
 	if (bsscfgidx < 0)
 		return bsscfgidx;
 
@@ -586,7 +601,7 @@ struct wireless_dev *brcmf_ap_add_vif(struct wiphy *wiphy, const char *name,
 
 	brcmf_dbg(INFO, "Adding vif \"%s\"\n", name);
 
-	vif = brcmf_alloc_vif(cfg, NL80211_IFTYPE_AP, false);
+	vif = brcmf_alloc_vif(cfg, NL80211_IFTYPE_AP);
 	if (IS_ERR(vif))
 		return (struct wireless_dev *)vif;
 
@@ -669,20 +684,24 @@ static struct wireless_dev *brcmf_cfg80211_add_iface(struct wiphy *wiphy,
 		return ERR_PTR(-EOPNOTSUPP);
 	case NL80211_IFTYPE_AP:
 		wdev = brcmf_ap_add_vif(wiphy, name, flags, params);
-		if (!IS_ERR(wdev))
-			brcmf_cfg80211_update_proto_addr_mode(wdev);
-		return wdev;
+		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
 		wdev = brcmf_p2p_add_vif(wiphy, name, name_assign_type, type, flags, params);
-		if (!IS_ERR(wdev))
-			brcmf_cfg80211_update_proto_addr_mode(wdev);
-		return wdev;
+		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
 	default:
 		return ERR_PTR(-EINVAL);
 	}
+
+	if (IS_ERR(wdev))
+		brcmf_err("add iface %s type %d failed: err=%d\n",
+			  name, type, (int)PTR_ERR(wdev));
+	else
+		brcmf_cfg80211_update_proto_addr_mode(wdev);
+
+	return wdev;
 }
 
 static void brcmf_scan_config_mpc(struct brcmf_if *ifp, int mpc)
@@ -756,9 +775,13 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg,
 		if (!aborted)
 			cfg80211_sched_scan_results(cfg_to_wiphy(cfg));
 	} else if (scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = aborted,
+		};
+
 		brcmf_dbg(SCAN, "ESCAN Completed scan: %s\n",
 			  aborted ? "Aborted" : "Done");
-		cfg80211_scan_done(scan_request, aborted);
+		cfg80211_scan_done(scan_request, &info);
 	}
 	if (!test_and_clear_bit(BRCMF_SCAN_STATUS_BUSY, &cfg->scan_status))
 		brcmf_dbg(SCAN, "Scan complete, probably P2P scan\n");
@@ -766,12 +789,48 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg,
 	return err;
 }
 
+static int brcmf_cfg80211_del_ap_iface(struct wiphy *wiphy,
+				       struct wireless_dev *wdev)
+{
+	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
+	struct net_device *ndev = wdev->netdev;
+	struct brcmf_if *ifp = netdev_priv(ndev);
+	int ret;
+	int err;
+
+	brcmf_cfg80211_arm_vif_event(cfg, ifp->vif);
+
+	err = brcmf_fil_bsscfg_data_set(ifp, "interface_remove", NULL, 0);
+	if (err) {
+		brcmf_err("interface_remove failed %d\n", err);
+		goto err_unarm;
+	}
+
+	/* wait for firmware event */
+	ret = brcmf_cfg80211_wait_vif_event(cfg, BRCMF_E_IF_DEL,
+					    BRCMF_VIF_EVENT_TIMEOUT);
+	if (!ret) {
+		brcmf_err("timeout occurred\n");
+		err = -EIO;
+		goto err_unarm;
+	}
+
+	brcmf_remove_interface(ifp, true);
+
+err_unarm:
+	brcmf_cfg80211_arm_vif_event(cfg, NULL);
+	return err;
+}
+
 static
 int brcmf_cfg80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev)
 {
 	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
 	struct net_device *ndev = wdev->netdev;
 
+	if (ndev && ndev == cfg_to_ndev(cfg))
+		return -ENOTSUPP;
+
 	/* vif event pending in firmware */
 	if (brcmf_cfg80211_vif_event_armed(cfg))
 		return -EBUSY;
@@ -788,12 +847,13 @@ int brcmf_cfg80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev)
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_ADHOC:
 	case NL80211_IFTYPE_STATION:
-	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_MESH_POINT:
 		return -EOPNOTSUPP;
+	case NL80211_IFTYPE_AP:
+		return brcmf_cfg80211_del_ap_iface(wiphy, wdev);
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
@@ -2750,7 +2810,7 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg,
 	if (!bi->ctl_ch) {
 		ch.chspec = le16_to_cpu(bi->chanspec);
 		cfg->d11inf.decchspec(&ch);
-		bi->ctl_ch = ch.chnum;
+		bi->ctl_ch = ch.control_ch_num;
 	}
 	channel = bi->ctl_ch;
 
@@ -2868,7 +2928,7 @@ static s32 brcmf_inform_ibss(struct brcmf_cfg80211_info *cfg,
 	else
 		band = wiphy->bands[NL80211_BAND_5GHZ];
 
-	freq = ieee80211_channel_to_frequency(ch.chnum, band->band);
+	freq = ieee80211_channel_to_frequency(ch.control_ch_num, band->band);
 	cfg->channel = freq;
 	notify_channel = ieee80211_get_channel(wiphy, freq);
 
@@ -2878,7 +2938,7 @@ static s32 brcmf_inform_ibss(struct brcmf_cfg80211_info *cfg,
 	notify_ielen = le32_to_cpu(bi->ie_length);
 	notify_signal = (s16)le16_to_cpu(bi->RSSI) * 100;
 
-	brcmf_dbg(CONN, "channel: %d(%d)\n", ch.chnum, freq);
+	brcmf_dbg(CONN, "channel: %d(%d)\n", ch.control_ch_num, freq);
 	brcmf_dbg(CONN, "capability: %X\n", notify_capability);
 	brcmf_dbg(CONN, "beacon interval: %d\n", notify_interval);
 	brcmf_dbg(CONN, "signal: %d\n", notify_signal);
@@ -4439,7 +4499,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 	struct brcmf_join_params join_params;
 	enum nl80211_iftype dev_role;
 	struct brcmf_fil_bss_enable_le bss_enable;
-	u16 chanspec;
+	u16 chanspec = chandef_to_chanspec(&cfg->d11inf, &settings->chandef);
 	bool mbss;
 	int is_11d;
 
@@ -4515,16 +4575,8 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 
 	brcmf_config_ap_mgmt_ie(ifp->vif, &settings->beacon);
 
+	/* Parameters shared by all radio interfaces */
 	if (!mbss) {
-		chanspec = chandef_to_chanspec(&cfg->d11inf,
-					       &settings->chandef);
-		err = brcmf_fil_iovar_int_set(ifp, "chanspec", chanspec);
-		if (err < 0) {
-			brcmf_err("Set Channel failed: chspec=%d, %d\n",
-				  chanspec, err);
-			goto exit;
-		}
-
 		if (is_11d != ifp->vif->is_11d) {
 			err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY,
 						    is_11d);
@@ -4572,6 +4624,8 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 		err = -EINVAL;
 		goto exit;
 	}
+
+	/* Interface specific setup */
 	if (dev_role == NL80211_IFTYPE_AP) {
 		if ((brcmf_feat_is_enabled(ifp, BRCMF_FEAT_MBSS)) && (!mbss))
 			brcmf_fil_iovar_int_set(ifp, "mbss", 1);
@@ -4581,6 +4635,17 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 			brcmf_err("setting AP mode failed %d\n", err);
 			goto exit;
 		}
+		if (!mbss) {
+			/* Firmware 10.x requires setting channel after enabling
+			 * AP and before bringing interface up.
+			 */
+			err = brcmf_fil_iovar_int_set(ifp, "chanspec", chanspec);
+			if (err < 0) {
+				brcmf_err("Set Channel failed: chspec=%d, %d\n",
+					  chanspec, err);
+				goto exit;
+			}
+		}
 		err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_UP, 1);
 		if (err < 0) {
 			brcmf_err("BRCMF_C_UP error (%d)\n", err);
@@ -4601,8 +4666,23 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 			brcmf_err("SET SSID error (%d)\n", err);
 			goto exit;
 		}
+
+		if (settings->hidden_ssid) {
+			err = brcmf_fil_iovar_int_set(ifp, "closednet", 1);
+			if (err) {
+				brcmf_err("closednet error (%d)\n", err);
+				goto exit;
+			}
+		}
+
 		brcmf_dbg(TRACE, "AP mode configuration complete\n");
-	} else {
+	} else if (dev_role == NL80211_IFTYPE_P2P_GO) {
+		err = brcmf_fil_iovar_int_set(ifp, "chanspec", chanspec);
+		if (err < 0) {
+			brcmf_err("Set Channel failed: chspec=%d, %d\n",
+				  chanspec, err);
+			goto exit;
+		}
 		err = brcmf_fil_bsscfg_data_set(ifp, "ssid", &ssid_le,
 						sizeof(ssid_le));
 		if (err < 0) {
@@ -4619,7 +4699,10 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 		}
 
 		brcmf_dbg(TRACE, "GO mode configuration complete\n");
+	} else {
+		WARN_ON(1);
 	}
+
 	set_bit(BRCMF_VIF_STATUS_AP_CREATED, &ifp->vif->sme_state);
 	brcmf_net_setcarrier(ifp, true);
 
@@ -4650,6 +4733,10 @@ static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev)
 			return err;
 		}
 
+		/* First BSS doesn't get a full reset */
+		if (ifp->bsscfgidx == 0)
+			brcmf_fil_iovar_int_set(ifp, "closednet", 0);
+
 		memset(&join_params, 0, sizeof(join_params));
 		err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SET_SSID,
 					     &join_params, sizeof(join_params));
@@ -4908,6 +4995,68 @@ exit:
 	return err;
 }
 
+static int brcmf_cfg80211_get_channel(struct wiphy *wiphy,
+				      struct wireless_dev *wdev,
+				      struct cfg80211_chan_def *chandef)
+{
+	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
+	struct net_device *ndev = wdev->netdev;
+	struct brcmf_if *ifp;
+	struct brcmu_chan ch;
+	enum nl80211_band band = 0;
+	enum nl80211_chan_width width = 0;
+	u32 chanspec;
+	int freq, err;
+
+	if (!ndev)
+		return -ENODEV;
+	ifp = netdev_priv(ndev);
+
+	err = brcmf_fil_iovar_int_get(ifp, "chanspec", &chanspec);
+	if (err) {
+		brcmf_err("chanspec failed (%d)\n", err);
+		return err;
+	}
+
+	ch.chspec = chanspec;
+	cfg->d11inf.decchspec(&ch);
+
+	switch (ch.band) {
+	case BRCMU_CHAN_BAND_2G:
+		band = NL80211_BAND_2GHZ;
+		break;
+	case BRCMU_CHAN_BAND_5G:
+		band = NL80211_BAND_5GHZ;
+		break;
+	}
+
+	switch (ch.bw) {
+	case BRCMU_CHAN_BW_80:
+		width = NL80211_CHAN_WIDTH_80;
+		break;
+	case BRCMU_CHAN_BW_40:
+		width = NL80211_CHAN_WIDTH_40;
+		break;
+	case BRCMU_CHAN_BW_20:
+		width = NL80211_CHAN_WIDTH_20;
+		break;
+	case BRCMU_CHAN_BW_80P80:
+		width = NL80211_CHAN_WIDTH_80P80;
+		break;
+	case BRCMU_CHAN_BW_160:
+		width = NL80211_CHAN_WIDTH_160;
+		break;
+	}
+
+	freq = ieee80211_channel_to_frequency(ch.control_ch_num, band);
+	chandef->chan = ieee80211_get_channel(wiphy, freq);
+	chandef->width = width;
+	chandef->center_freq1 = ieee80211_channel_to_frequency(ch.chnum, band);
+	chandef->center_freq2 = 0;
+
+	return 0;
+}
+
 static int brcmf_cfg80211_crit_proto_start(struct wiphy *wiphy,
 					   struct wireless_dev *wdev,
 					   enum nl80211_crit_proto_id proto,
@@ -5070,6 +5219,7 @@ static struct cfg80211_ops brcmf_cfg80211_ops = {
 	.mgmt_tx = brcmf_cfg80211_mgmt_tx,
 	.remain_on_channel = brcmf_p2p_remain_on_channel,
 	.cancel_remain_on_channel = brcmf_cfg80211_cancel_remain_on_channel,
+	.get_channel = brcmf_cfg80211_get_channel,
 	.start_p2p_device = brcmf_p2p_start_device,
 	.stop_p2p_device = brcmf_p2p_stop_device,
 	.crit_proto_start = brcmf_cfg80211_crit_proto_start,
@@ -5078,8 +5228,7 @@ static struct cfg80211_ops brcmf_cfg80211_ops = {
 };
 
 struct brcmf_cfg80211_vif *brcmf_alloc_vif(struct brcmf_cfg80211_info *cfg,
-					   enum nl80211_iftype type,
-					   bool pm_block)
+					   enum nl80211_iftype type)
 {
 	struct brcmf_cfg80211_vif *vif_walk;
 	struct brcmf_cfg80211_vif *vif;
@@ -5094,8 +5243,6 @@ struct brcmf_cfg80211_vif *brcmf_alloc_vif(struct brcmf_cfg80211_info *cfg,
 	vif->wdev.wiphy = cfg->wiphy;
 	vif->wdev.iftype = type;
 
-	vif->pm_block = pm_block;
-
 	brcmf_init_prof(&vif->profile);
 
 	if (type == NL80211_IFTYPE_AP) {
@@ -5296,7 +5443,7 @@ brcmf_bss_roaming_done(struct brcmf_cfg80211_info *cfg,
 	else
 		band = wiphy->bands[NL80211_BAND_5GHZ];
 
-	freq = ieee80211_channel_to_frequency(ch.chnum, band->band);
+	freq = ieee80211_channel_to_frequency(ch.control_ch_num, band->band);
 	notify_channel = ieee80211_get_channel(wiphy, freq);
 
 done:
@@ -5352,7 +5499,6 @@ brcmf_notify_connect_status_ap(struct brcmf_cfg80211_info *cfg,
 			       struct net_device *ndev,
 			       const struct brcmf_event_msg *e, void *data)
 {
-	struct brcmf_if *ifp = netdev_priv(ndev);
 	static int generation;
 	u32 event = e->event_code;
 	u32 reason = e->reason;
@@ -5363,8 +5509,6 @@ brcmf_notify_connect_status_ap(struct brcmf_cfg80211_info *cfg,
 	    ndev != cfg_to_ndev(cfg)) {
 		brcmf_dbg(CONN, "AP mode link down\n");
 		complete(&cfg->vif_disabled);
-		if (ifp->vif->mbss)
-			brcmf_remove_interface(ifp);
 		return 0;
 	}
 
@@ -5818,14 +5962,15 @@ static int brcmf_construct_chaninfo(struct brcmf_cfg80211_info *cfg,
 		channel = band->channels;
 		index = band->n_channels;
 		for (j = 0; j < band->n_channels; j++) {
-			if (channel[j].hw_value == ch.chnum) {
+			if (channel[j].hw_value == ch.control_ch_num) {
 				index = j;
 				break;
 			}
 		}
 		channel[index].center_freq =
-			ieee80211_channel_to_frequency(ch.chnum, band->band);
-		channel[index].hw_value = ch.chnum;
+			ieee80211_channel_to_frequency(ch.control_ch_num,
+						       band->band);
+		channel[index].hw_value = ch.control_ch_num;
 
 		/* assuming the chanspecs order is HT20,
 		 * HT40 upper, HT40 lower, and VHT80.
@@ -5927,7 +6072,7 @@ static int brcmf_enable_bw40_2g(struct brcmf_cfg80211_info *cfg)
 			if (WARN_ON(ch.bw != BRCMU_CHAN_BW_40))
 				continue;
 			for (j = 0; j < band->n_channels; j++) {
-				if (band->channels[j].hw_value == ch.chnum)
+				if (band->channels[j].hw_value == ch.control_ch_num)
 					break;
 			}
 			if (WARN_ON(j == band->n_channels))
@@ -6193,29 +6338,15 @@ static int brcmf_setup_ifmodes(struct wiphy *wiphy, struct brcmf_if *ifp)
 	if (!combo)
 		goto err;
 
-	c0_limits = kcalloc(p2p ? 3 : 2, sizeof(*c0_limits), GFP_KERNEL);
-	if (!c0_limits)
-		goto err;
-
-	if (p2p) {
-		p2p_limits = kcalloc(4, sizeof(*p2p_limits), GFP_KERNEL);
-		if (!p2p_limits)
-			goto err;
-	}
-
-	if (mbss) {
-		mbss_limits = kcalloc(1, sizeof(*mbss_limits), GFP_KERNEL);
-		if (!mbss_limits)
-			goto err;
-	}
-
 	wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
 				 BIT(NL80211_IFTYPE_ADHOC) |
 				 BIT(NL80211_IFTYPE_AP);
 
 	c = 0;
 	i = 0;
-	combo[c].num_different_channels = 1;
+	c0_limits = kcalloc(p2p ? 3 : 2, sizeof(*c0_limits), GFP_KERNEL);
+	if (!c0_limits)
+		goto err;
 	c0_limits[i].max = 1;
 	c0_limits[i++].types = BIT(NL80211_IFTYPE_STATION);
 	if (p2p) {
@@ -6233,6 +6364,7 @@ static int brcmf_setup_ifmodes(struct wiphy *wiphy, struct brcmf_if *ifp)
 		c0_limits[i].max = 1;
 		c0_limits[i++].types = BIT(NL80211_IFTYPE_AP);
 	}
+	combo[c].num_different_channels = 1;
 	combo[c].max_interfaces = i;
 	combo[c].n_limits = i;
 	combo[c].limits = c0_limits;
@@ -6240,7 +6372,9 @@ static int brcmf_setup_ifmodes(struct wiphy *wiphy, struct brcmf_if *ifp)
 	if (p2p) {
 		c++;
 		i = 0;
-		combo[c].num_different_channels = 1;
+		p2p_limits = kcalloc(4, sizeof(*p2p_limits), GFP_KERNEL);
+		if (!p2p_limits)
+			goto err;
 		p2p_limits[i].max = 1;
 		p2p_limits[i++].types = BIT(NL80211_IFTYPE_STATION);
 		p2p_limits[i].max = 1;
@@ -6249,6 +6383,7 @@ static int brcmf_setup_ifmodes(struct wiphy *wiphy, struct brcmf_if *ifp)
 		p2p_limits[i++].types = BIT(NL80211_IFTYPE_P2P_CLIENT);
 		p2p_limits[i].max = 1;
 		p2p_limits[i++].types = BIT(NL80211_IFTYPE_P2P_DEVICE);
+		combo[c].num_different_channels = 1;
 		combo[c].max_interfaces = i;
 		combo[c].n_limits = i;
 		combo[c].limits = p2p_limits;
@@ -6256,14 +6391,19 @@ static int brcmf_setup_ifmodes(struct wiphy *wiphy, struct brcmf_if *ifp)
 
 	if (mbss) {
 		c++;
+		i = 0;
+		mbss_limits = kcalloc(1, sizeof(*mbss_limits), GFP_KERNEL);
+		if (!mbss_limits)
+			goto err;
+		mbss_limits[i].max = 4;
+		mbss_limits[i++].types = BIT(NL80211_IFTYPE_AP);
 		combo[c].beacon_int_infra_match = true;
 		combo[c].num_different_channels = 1;
-		mbss_limits[0].max = 4;
-		mbss_limits[0].types = BIT(NL80211_IFTYPE_AP);
 		combo[c].max_interfaces = 4;
-		combo[c].n_limits = 1;
+		combo[c].n_limits = i;
 		combo[c].limits = mbss_limits;
 	}
+
 	wiphy->n_iface_combinations = n_combos;
 	wiphy->iface_combinations = combo;
 	return 0;
@@ -6715,11 +6855,10 @@ struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
 		return NULL;
 	}
 
-	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+	ops = kmemdup(&brcmf_cfg80211_ops, sizeof(*ops), GFP_KERNEL);
 	if (!ops)
 		return NULL;
 
-	memcpy(ops, &brcmf_cfg80211_ops, sizeof(*ops));
 	ifp = netdev_priv(ndev);
 #ifdef CONFIG_PM
 	if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_WOWL_GTK))
@@ -6740,7 +6879,7 @@ struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
 	init_vif_event(&cfg->vif_event);
 	INIT_LIST_HEAD(&cfg->vif_list);
 
-	vif = brcmf_alloc_vif(cfg, NL80211_IFTYPE_STATION, false);
+	vif = brcmf_alloc_vif(cfg, NL80211_IFTYPE_STATION);
 	if (IS_ERR(vif))
 		goto wiphy_out;
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
index 95e35bcc16ce..7d77f869b7f1 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
@@ -20,6 +20,10 @@
 /* for brcmu_d11inf */
 #include <brcmu_d11.h>
 
+#include "core.h"
+#include "fwil_types.h"
+#include "p2p.h"
+
 #define WL_NUM_SCAN_MAX			10
 #define WL_TLV_INFO_MAX			1024
 #define WL_BSS_INFO_MAX			2048
@@ -167,7 +171,6 @@ struct vif_saved_ie {
  * @wdev: wireless device.
  * @profile: profile information.
  * @sme_state: SME state using enum brcmf_vif_status bits.
- * @pm_block: power-management blocked.
  * @list: linked list.
  * @mgmt_rx_reg: registered rx mgmt frame types.
  * @mbss: Multiple BSS type, set if not first AP (not relevant for P2P).
@@ -177,7 +180,6 @@ struct brcmf_cfg80211_vif {
 	struct wireless_dev wdev;
 	struct brcmf_cfg80211_profile profile;
 	unsigned long sme_state;
-	bool pm_block;
 	struct vif_saved_ie saved_ie;
 	struct list_head list;
 	u16 mgmt_rx_reg;
@@ -388,8 +390,7 @@ s32 brcmf_cfg80211_down(struct net_device *ndev);
 enum nl80211_iftype brcmf_cfg80211_get_iftype(struct brcmf_if *ifp);
 
 struct brcmf_cfg80211_vif *brcmf_alloc_vif(struct brcmf_cfg80211_info *cfg,
-					   enum nl80211_iftype type,
-					   bool pm_block);
+					   enum nl80211_iftype type);
 void brcmf_free_vif(struct brcmf_cfg80211_vif *vif);
 
 s32 brcmf_vif_set_mgmt_ie(struct brcmf_cfg80211_vif *vif, s32 pktflag,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
index d3fd6b1db1d9..05f22ff81d60 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
@@ -685,6 +685,8 @@ static u32 brcmf_chip_tcm_rambase(struct brcmf_chip_priv *ci)
 	case BRCM_CC_43602_CHIP_ID:
 	case BRCM_CC_4371_CHIP_ID:
 		return 0x180000;
+	case BRCM_CC_43465_CHIP_ID:
+	case BRCM_CC_43525_CHIP_ID:
 	case BRCM_CC_4365_CHIP_ID:
 	case BRCM_CC_4366_CHIP_ID:
 		return 0x200000;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index b590499f6883..8d16f0204985 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -516,7 +516,7 @@ int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked)
 	/* set appropriate operations */
 	ndev->netdev_ops = &brcmf_netdev_ops_pri;
 
-	ndev->hard_header_len += drvr->hdrlen;
+	ndev->needed_headroom += drvr->hdrlen;
 	ndev->ethtool_ops = &brcmf_ethtool_ops;
 
 	drvr->rxsz = ndev->mtu + ndev->hard_header_len +
@@ -548,12 +548,16 @@ fail:
 	return -EBADE;
 }
 
-static void brcmf_net_detach(struct net_device *ndev)
+static void brcmf_net_detach(struct net_device *ndev, bool rtnl_locked)
 {
-	if (ndev->reg_state == NETREG_REGISTERED)
-		unregister_netdev(ndev);
-	else
+	if (ndev->reg_state == NETREG_REGISTERED) {
+		if (rtnl_locked)
+			unregister_netdevice(ndev);
+		else
+			unregister_netdev(ndev);
+	} else {
 		brcmf_cfg80211_free_netdev(ndev);
+	}
 }
 
 void brcmf_net_setcarrier(struct brcmf_if *ifp, bool on)
@@ -634,7 +638,7 @@ fail:
 }
 
 struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx,
-			      bool is_p2pdev, char *name, u8 *mac_addr)
+			      bool is_p2pdev, const char *name, u8 *mac_addr)
 {
 	struct brcmf_if *ifp;
 	struct net_device *ndev;
@@ -651,7 +655,7 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx,
 			brcmf_err("ERROR: netdev:%s already exists\n",
 				  ifp->ndev->name);
 			netif_stop_queue(ifp->ndev);
-			brcmf_net_detach(ifp->ndev);
+			brcmf_net_detach(ifp->ndev, false);
 			drvr->iflist[bsscfgidx] = NULL;
 		} else {
 			brcmf_dbg(INFO, "netdev:%s ignore IF event\n",
@@ -699,7 +703,8 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx,
 	return ifp;
 }
 
-static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx)
+static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx,
+			 bool rtnl_locked)
 {
 	struct brcmf_if *ifp;
 
@@ -729,7 +734,7 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx)
 			cancel_work_sync(&ifp->multicast_work);
 			cancel_work_sync(&ifp->ndoffload_work);
 		}
-		brcmf_net_detach(ifp->ndev);
+		brcmf_net_detach(ifp->ndev, rtnl_locked);
 	} else {
 		/* Only p2p device interfaces which get dynamically created
 		 * end up here. In this case the p2p module should be informed
@@ -743,38 +748,14 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx)
 	}
 }
 
-void brcmf_remove_interface(struct brcmf_if *ifp)
+void brcmf_remove_interface(struct brcmf_if *ifp, bool rtnl_locked)
 {
 	if (!ifp || WARN_ON(ifp->drvr->iflist[ifp->bsscfgidx] != ifp))
 		return;
 	brcmf_dbg(TRACE, "Enter, bsscfgidx=%d, ifidx=%d\n", ifp->bsscfgidx,
 		  ifp->ifidx);
 	brcmf_fws_del_interface(ifp);
-	brcmf_del_if(ifp->drvr, ifp->bsscfgidx);
-}
-
-int brcmf_get_next_free_bsscfgidx(struct brcmf_pub *drvr)
-{
-	int ifidx;
-	int bsscfgidx;
-	bool available;
-	int highest;
-
-	available = false;
-	bsscfgidx = 2;
-	highest = 2;
-	for (ifidx = 0; ifidx < BRCMF_MAX_IFS; ifidx++) {
-		if (drvr->iflist[ifidx]) {
-			if (drvr->iflist[ifidx]->bsscfgidx == bsscfgidx)
-				bsscfgidx = highest + 1;
-			else if (drvr->iflist[ifidx]->bsscfgidx > highest)
-				highest = drvr->iflist[ifidx]->bsscfgidx;
-		} else {
-			available = true;
-		}
-	}
-
-	return available ? bsscfgidx : -ENOMEM;
+	brcmf_del_if(ifp->drvr, ifp->bsscfgidx, rtnl_locked);
 }
 
 #ifdef CONFIG_INET
@@ -1081,9 +1062,9 @@ fail:
 		brcmf_fws_deinit(drvr);
 	}
 	if (ifp)
-		brcmf_net_detach(ifp->ndev);
+		brcmf_net_detach(ifp->ndev, false);
 	if (p2p_ifp)
-		brcmf_net_detach(p2p_ifp->ndev);
+		brcmf_net_detach(p2p_ifp->ndev, false);
 	drvr->iflist[0] = NULL;
 	drvr->iflist[1] = NULL;
 	if (drvr->settings->ignore_probe_fail)
@@ -1152,7 +1133,7 @@ void brcmf_detach(struct device *dev)
 
 	/* make sure primary interface removed last */
 	for (i = BRCMF_MAX_IFS-1; i > -1; i--)
-		brcmf_remove_interface(drvr->iflist[i]);
+		brcmf_remove_interface(drvr->iflist[i], false);
 
 	brcmf_cfg80211_detach(drvr->config);
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
index 647d3cc2a4dc..8fa34cad5a96 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
@@ -215,9 +215,8 @@ char *brcmf_ifname(struct brcmf_if *ifp);
 struct brcmf_if *brcmf_get_ifp(struct brcmf_pub *drvr, int ifidx);
 int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked);
 struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx,
-			      bool is_p2pdev, char *name, u8 *mac_addr);
-void brcmf_remove_interface(struct brcmf_if *ifp);
-int brcmf_get_next_free_bsscfgidx(struct brcmf_pub *drvr);
+			      bool is_p2pdev, const char *name, u8 *mac_addr);
+void brcmf_remove_interface(struct brcmf_if *ifp, bool rtnl_locked);
 void brcmf_txflowblock_if(struct brcmf_if *ifp,
 			  enum brcmf_netif_stop_reason reason, bool state);
 void brcmf_txfinalize(struct brcmf_if *ifp, struct sk_buff *txp, bool success);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
index b390561255b3..79c081fd560f 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
@@ -18,6 +18,7 @@
 #include "brcmu_wifi.h"
 #include "brcmu_utils.h"
 
+#include "cfg80211.h"
 #include "core.h"
 #include "debug.h"
 #include "tracepoint.h"
@@ -182,8 +183,13 @@ static void brcmf_fweh_handle_if_event(struct brcmf_pub *drvr,
 
 	err = brcmf_fweh_call_event_handler(ifp, emsg->event_code, emsg, data);
 
-	if (ifp && ifevent->action == BRCMF_E_IF_DEL)
-		brcmf_remove_interface(ifp);
+	if (ifp && ifevent->action == BRCMF_E_IF_DEL) {
+		bool armed = brcmf_cfg80211_vif_event_armed(drvr->config);
+
+		/* Default handling in case no-one waits for this event */
+		if (!armed)
+			brcmf_remove_interface(ifp, false);
+	}
 }
 
 /**
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
index 5b30922b67ec..9f9024a7bd64 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
@@ -2101,7 +2101,7 @@ int brcmf_fws_process_skb(struct brcmf_if *ifp, struct sk_buff *skb)
 
 	brcmf_dbg(DATA, "tx proto=0x%X\n", ntohs(eh->h_proto));
 	/* determine the priority */
-	if (!skb->priority)
+	if ((skb->priority == 0) || (skb->priority > 7))
 		skb->priority = cfg80211_classify8021d(skb, NULL);
 
 	drvr->tx_multicast += !!multicast;
@@ -2469,10 +2469,22 @@ void brcmf_fws_bustxfail(struct brcmf_fws_info *fws, struct sk_buff *skb)
 void brcmf_fws_bus_blocked(struct brcmf_pub *drvr, bool flow_blocked)
 {
 	struct brcmf_fws_info *fws = drvr->fws;
+	struct brcmf_if *ifp;
+	int i;
 
-	fws->bus_flow_blocked = flow_blocked;
-	if (!flow_blocked)
-		brcmf_fws_schedule_deq(fws);
-	else
-		fws->stats.bus_flow_block++;
+	if (fws->avoid_queueing) {
+		for (i = 0; i < BRCMF_MAX_IFS; i++) {
+			ifp = drvr->iflist[i];
+			if (!ifp || !ifp->ndev)
+				continue;
+			brcmf_txflowblock_if(ifp, BRCMF_NETIF_STOP_REASON_FLOW,
+					     flow_blocked);
+		}
+	} else {
+		fws->bus_flow_blocked = flow_blocked;
+		if (!flow_blocked)
+			brcmf_fws_schedule_deq(fws);
+		else
+			fws->stats.bus_flow_block++;
+	}
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index a70cda6c0592..66f942f7448e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -1246,7 +1246,7 @@ bool brcmf_p2p_scan_finding_common_channel(struct brcmf_cfg80211_info *cfg,
 		if (!bi->ctl_ch) {
 			ch.chspec = le16_to_cpu(bi->chanspec);
 			cfg->d11inf.decchspec(&ch);
-			bi->ctl_ch = ch.chnum;
+			bi->ctl_ch = ch.control_ch_num;
 		}
 		afx_hdl->peer_chan = bi->ctl_ch;
 		brcmf_dbg(TRACE, "ACTION FRAME SCAN : Peer %pM found, channel : %d\n",
@@ -1385,7 +1385,7 @@ int brcmf_p2p_notify_action_frame_rx(struct brcmf_if *ifp,
 			if (test_bit(BRCMF_P2P_STATUS_FINDING_COMMON_CHANNEL,
 				     &p2p->status) &&
 			    (ether_addr_equal(afx_hdl->tx_dst_addr, e->addr))) {
-				afx_hdl->peer_chan = ch.chnum;
+				afx_hdl->peer_chan = ch.control_ch_num;
 				brcmf_dbg(INFO, "GON request: Peer found, channel=%d\n",
 					  afx_hdl->peer_chan);
 				complete(&afx_hdl->act_frm_scan);
@@ -1428,7 +1428,7 @@ int brcmf_p2p_notify_action_frame_rx(struct brcmf_if *ifp,
 	memcpy(&mgmt_frame->u, frame, mgmt_frame_len);
 	mgmt_frame_len += offsetof(struct ieee80211_mgmt, u);
 
-	freq = ieee80211_channel_to_frequency(ch.chnum,
+	freq = ieee80211_channel_to_frequency(ch.control_ch_num,
 					      ch.band == BRCMU_CHAN_BAND_2G ?
 					      NL80211_BAND_2GHZ :
 					      NL80211_BAND_5GHZ);
@@ -1873,7 +1873,7 @@ s32 brcmf_p2p_notify_rx_mgmt_p2p_probereq(struct brcmf_if *ifp,
 
 	if (test_bit(BRCMF_P2P_STATUS_FINDING_COMMON_CHANNEL, &p2p->status) &&
 	    (ether_addr_equal(afx_hdl->tx_dst_addr, e->addr))) {
-		afx_hdl->peer_chan = ch.chnum;
+		afx_hdl->peer_chan = ch.control_ch_num;
 		brcmf_dbg(INFO, "PROBE REQUEST: Peer found, channel=%d\n",
 			  afx_hdl->peer_chan);
 		complete(&afx_hdl->act_frm_scan);
@@ -1898,7 +1898,7 @@ s32 brcmf_p2p_notify_rx_mgmt_p2p_probereq(struct brcmf_if *ifp,
 
 	mgmt_frame = (u8 *)(rxframe + 1);
 	mgmt_frame_len = e->datalen - sizeof(*rxframe);
-	freq = ieee80211_channel_to_frequency(ch.chnum,
+	freq = ieee80211_channel_to_frequency(ch.control_ch_num,
 					      ch.band == BRCMU_CHAN_BAND_2G ?
 					      NL80211_BAND_2GHZ :
 					      NL80211_BAND_5GHZ);
@@ -2030,8 +2030,6 @@ static int brcmf_p2p_request_p2p_if(struct brcmf_p2p_info *p2p,
 
 	err = brcmf_fil_iovar_data_set(ifp, "p2p_ifadd", &if_request,
 				       sizeof(if_request));
-	if (err)
-		return err;
 
 	return err;
 }
@@ -2076,8 +2074,7 @@ static struct wireless_dev *brcmf_p2p_create_p2pdev(struct brcmf_p2p_info *p2p,
 	if (p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif)
 		return ERR_PTR(-ENOSPC);
 
-	p2p_vif = brcmf_alloc_vif(p2p->cfg, NL80211_IFTYPE_P2P_DEVICE,
-				  false);
+	p2p_vif = brcmf_alloc_vif(p2p->cfg, NL80211_IFTYPE_P2P_DEVICE);
 	if (IS_ERR(p2p_vif)) {
 		brcmf_err("could not create discovery vif\n");
 		return (struct wireless_dev *)p2p_vif;
@@ -2177,7 +2174,7 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name,
 		return ERR_PTR(-EOPNOTSUPP);
 	}
 
-	vif = brcmf_alloc_vif(cfg, type, false);
+	vif = brcmf_alloc_vif(cfg, type);
 	if (IS_ERR(vif))
 		return (struct wireless_dev *)vif;
 	brcmf_cfg80211_arm_vif_event(cfg, vif);
@@ -2264,6 +2261,8 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 			return 0;
 		brcmf_p2p_cancel_remain_on_channel(vif->ifp);
 		brcmf_p2p_deinit_discovery(p2p);
+		break;
+
 	default:
 		return -ENOTSUPP;
 	}
@@ -2289,8 +2288,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 		else
 			err = 0;
 	}
-	if (err)
-		brcmf_remove_interface(vif->ifp);
+	brcmf_remove_interface(vif->ifp, true);
 
 	brcmf_cfg80211_arm_vif_event(cfg, NULL);
 	if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE)
@@ -2396,7 +2394,7 @@ void brcmf_p2p_detach(struct brcmf_p2p_info *p2p)
 	if (vif != NULL) {
 		brcmf_p2p_cancel_remain_on_channel(vif->ifp);
 		brcmf_p2p_deinit_discovery(p2p);
-		brcmf_remove_interface(vif->ifp);
+		brcmf_remove_interface(vif->ifp, false);
 	}
 	/* just set it all to zero */
 	memset(p2p, 0, sizeof(*p2p));
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index 0af8db82da0c..3deba90c7eb5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -54,21 +54,25 @@ BRCMF_FW_NVRAM_DEF(43570, "brcmfmac43570-pcie.bin", "brcmfmac43570-pcie.txt");
 BRCMF_FW_NVRAM_DEF(4358, "brcmfmac4358-pcie.bin", "brcmfmac4358-pcie.txt");
 BRCMF_FW_NVRAM_DEF(4359, "brcmfmac4359-pcie.bin", "brcmfmac4359-pcie.txt");
 BRCMF_FW_NVRAM_DEF(4365B, "brcmfmac4365b-pcie.bin", "brcmfmac4365b-pcie.txt");
+BRCMF_FW_NVRAM_DEF(4365C, "brcmfmac4365c-pcie.bin", "brcmfmac4365c-pcie.txt");
 BRCMF_FW_NVRAM_DEF(4366B, "brcmfmac4366b-pcie.bin", "brcmfmac4366b-pcie.txt");
 BRCMF_FW_NVRAM_DEF(4366C, "brcmfmac4366c-pcie.bin", "brcmfmac4366c-pcie.txt");
 BRCMF_FW_NVRAM_DEF(4371, "brcmfmac4371-pcie.bin", "brcmfmac4371-pcie.txt");
 
 static struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602),
+	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43465_CHIP_ID, 0xFFFFFFF0, 4366C),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4350_CHIP_ID, 0x000000FF, 4350C),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4350_CHIP_ID, 0xFFFFFF00, 4350),
+	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43525_CHIP_ID, 0xFFFFFFF0, 4365C),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43567_CHIP_ID, 0xFFFFFFFF, 43570),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43569_CHIP_ID, 0xFFFFFFFF, 43570),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43570_CHIP_ID, 0xFFFFFFFF, 43570),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4358_CHIP_ID, 0xFFFFFFFF, 4358),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFFFF, 4359),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4365_CHIP_ID, 0xFFFFFFFF, 4365B),
+	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4365_CHIP_ID, 0x0000000F, 4365B),
+	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4365_CHIP_ID, 0xFFFFFFF0, 4365C),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4366_CHIP_ID, 0x0000000F, 4366B),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4366_CHIP_ID, 0xFFFFFFF0, 4366C),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4371_CHIP_ID, 0xFFFFFFFF, 4371),
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 67e69bff2545..68ab3ac15650 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -1384,8 +1384,7 @@ static int brcmf_sdio_hdparse(struct brcmf_sdio *bus, u8 *header,
 		return -ENXIO;
 	}
 	if (rd->seq_num != rx_seq) {
-		brcmf_err("seq %d: sequence number error, expect %d\n",
-			  rx_seq, rd->seq_num);
+		brcmf_dbg(SDIO, "seq %d, expected %d\n", rx_seq, rd->seq_num);
 		bus->sdcnt.rx_badseq++;
 		rd->seq_num = rx_seq;
 	}
@@ -3306,10 +3305,6 @@ static int brcmf_sdio_download_firmware(struct brcmf_sdio *bus,
 		goto err;
 	}
 
-	/* Allow full data communication using DPC from now on. */
-	brcmf_sdiod_change_state(bus->sdiodev, BRCMF_SDIOD_DATA);
-	bcmerror = 0;
-
 err:
 	brcmf_sdio_clkctl(bus, CLK_SDONLY, false);
 	sdio_release_host(bus->sdiodev->func[1]);
@@ -3666,7 +3661,7 @@ brcmf_sdio_drivestrengthinit(struct brcmf_sdio_dev *sdiodev,
 		str_shift = 11;
 		break;
 	default:
-		brcmf_err("No SDIO Drive strength init done for chip %s rev %d pmurev %d\n",
+		brcmf_dbg(INFO, "No SDIO driver strength init needed for chip %s rev %d pmurev %d\n",
 			  ci->name, ci->chiprev, ci->pmurev);
 		break;
 	}
@@ -4047,6 +4042,9 @@ static void brcmf_sdio_firmware_callback(struct device *dev,
 	}
 
 	if (err == 0) {
+		/* Allow full data communication using DPC from now on. */
+		brcmf_sdiod_change_state(bus->sdiodev, BRCMF_SDIOD_DATA);
+
 		err = brcmf_sdiod_intr_register(sdiodev);
 		if (err != 0)
 			brcmf_err("intr register failed:%d\n", err);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
index dcf0ce8cd2c1..f3da32fc6360 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
@@ -186,6 +186,7 @@ struct brcmf_sdio_dev {
 	struct brcmf_bus *bus_if;
 	struct brcmf_mp_device *settings;
 	bool oob_irq_requested;
+	bool sd_irq_requested;
 	bool irq_en;			/* irq enable flags */
 	spinlock_t irq_en_lock;
 	bool irq_wake;			/* irq wake enable flags */
@@ -293,7 +294,7 @@ struct sdpcmd_regs {
 
 /* Register/deregister interrupt handler. */
 int brcmf_sdiod_intr_register(struct brcmf_sdio_dev *sdiodev);
-int brcmf_sdiod_intr_unregister(struct brcmf_sdio_dev *sdiodev);
+void brcmf_sdiod_intr_unregister(struct brcmf_sdio_dev *sdiodev);
 
 /* sdio device register access interface */
 u8 brcmf_sdiod_regrb(struct brcmf_sdio_dev *sdiodev, u32 addr, int *ret);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c
index 796f5f9d5d5a..b7df576bb84d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c
@@ -1079,8 +1079,10 @@ bool dma_rxfill(struct dma_pub *pub)
 
 		pa = dma_map_single(di->dmadev, p->data, di->rxbufsize,
 				    DMA_FROM_DEVICE);
-		if (dma_mapping_error(di->dmadev, pa))
+		if (dma_mapping_error(di->dmadev, pa)) {
+			brcmu_pkt_buf_free_skb(p);
 			return false;
+		}
 
 		/* save the free packet pointer */
 		di->rxp[rxout] = p;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
index e16ee60639f5..c2a938b59044 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
@@ -3349,8 +3349,8 @@ static void brcms_b_coreinit(struct brcms_c_info *wlc)
 	dma_rxfill(wlc_hw->di[RX_FIFO]);
 }
 
-void
-static brcms_b_init(struct brcms_hardware *wlc_hw, u16 chanspec) {
+static void brcms_b_init(struct brcms_hardware *wlc_hw, u16 chanspec)
+{
 	u32 macintmask;
 	bool fastclk;
 	struct brcms_c_info *wlc = wlc_hw->wlc;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
index 99dac9b8a082..b3aab2fe96eb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
@@ -27017,7 +27017,7 @@ wlc_phy_rxcal_gainctrl_nphy_rev5(struct brcms_phy *pi, u8 rx_core,
 		tx_core = 1 - rx_core;
 
 	num_samps = 1024;
-	desired_log2_pwr = (cal_type == 0) ? 13 : 13;
+	desired_log2_pwr = 13;
 
 	wlc_phy_rx_iq_coeffs_nphy(pi, 0, &save_comp);
 	zero_comp.a0 = zero_comp.b0 = zero_comp.a1 = zero_comp.b1 = 0x0;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
index dd9162722495..0ab865de1491 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
@@ -87,7 +87,7 @@ void
 brcms_c_stf_ss_algo_channel_get(struct brcms_c_info *wlc, u16 *ss_algo_channel,
 			    u16 chanspec)
 {
-	struct tx_power power;
+	struct tx_power power = { };
 	u8 siso_mcs_id, cdd_mcs_id, stbc_mcs_id;
 
 	/* Clear previous settings */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c
index 2b2522bdd8eb..d8b79cb72b58 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c
@@ -107,6 +107,7 @@ static void brcmu_d11n_decchspec(struct brcmu_chan *ch)
 	u16 val;
 
 	ch->chnum = (u8)(ch->chspec & BRCMU_CHSPEC_CH_MASK);
+	ch->control_ch_num = ch->chnum;
 
 	switch (ch->chspec & BRCMU_CHSPEC_D11N_BW_MASK) {
 	case BRCMU_CHSPEC_D11N_BW_20:
@@ -118,10 +119,10 @@ static void brcmu_d11n_decchspec(struct brcmu_chan *ch)
 		val = ch->chspec & BRCMU_CHSPEC_D11N_SB_MASK;
 		if (val == BRCMU_CHSPEC_D11N_SB_L) {
 			ch->sb = BRCMU_CHAN_SB_L;
-			ch->chnum -= CH_10MHZ_APART;
+			ch->control_ch_num -= CH_10MHZ_APART;
 		} else {
 			ch->sb = BRCMU_CHAN_SB_U;
-			ch->chnum += CH_10MHZ_APART;
+			ch->control_ch_num += CH_10MHZ_APART;
 		}
 		break;
 	default:
@@ -147,6 +148,7 @@ static void brcmu_d11ac_decchspec(struct brcmu_chan *ch)
 	u16 val;
 
 	ch->chnum = (u8)(ch->chspec & BRCMU_CHSPEC_CH_MASK);
+	ch->control_ch_num = ch->chnum;
 
 	switch (ch->chspec & BRCMU_CHSPEC_D11AC_BW_MASK) {
 	case BRCMU_CHSPEC_D11AC_BW_20:
@@ -158,10 +160,10 @@ static void brcmu_d11ac_decchspec(struct brcmu_chan *ch)
 		val = ch->chspec & BRCMU_CHSPEC_D11AC_SB_MASK;
 		if (val == BRCMU_CHSPEC_D11AC_SB_L) {
 			ch->sb = BRCMU_CHAN_SB_L;
-			ch->chnum -= CH_10MHZ_APART;
+			ch->control_ch_num -= CH_10MHZ_APART;
 		} else if (val == BRCMU_CHSPEC_D11AC_SB_U) {
 			ch->sb = BRCMU_CHAN_SB_U;
-			ch->chnum += CH_10MHZ_APART;
+			ch->control_ch_num += CH_10MHZ_APART;
 		} else {
 			WARN_ON_ONCE(1);
 		}
@@ -172,16 +174,16 @@ static void brcmu_d11ac_decchspec(struct brcmu_chan *ch)
 					 BRCMU_CHSPEC_D11AC_SB_SHIFT);
 		switch (ch->sb) {
 		case BRCMU_CHAN_SB_LL:
-			ch->chnum -= CH_30MHZ_APART;
+			ch->control_ch_num -= CH_30MHZ_APART;
 			break;
 		case BRCMU_CHAN_SB_LU:
-			ch->chnum -= CH_10MHZ_APART;
+			ch->control_ch_num -= CH_10MHZ_APART;
 			break;
 		case BRCMU_CHAN_SB_UL:
-			ch->chnum += CH_10MHZ_APART;
+			ch->control_ch_num += CH_10MHZ_APART;
 			break;
 		case BRCMU_CHAN_SB_UU:
-			ch->chnum += CH_30MHZ_APART;
+			ch->control_ch_num += CH_30MHZ_APART;
 			break;
 		default:
 			WARN_ON_ONCE(1);
diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
index 699f2c2782ee..3cc42bef6245 100644
--- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
@@ -40,7 +40,9 @@
 #define BRCM_CC_4339_CHIP_ID		0x4339
 #define BRCM_CC_43430_CHIP_ID		43430
 #define BRCM_CC_4345_CHIP_ID		0x4345
+#define BRCM_CC_43465_CHIP_ID		43465
 #define BRCM_CC_4350_CHIP_ID		0x4350
+#define BRCM_CC_43525_CHIP_ID		43525
 #define BRCM_CC_4354_CHIP_ID		0x4354
 #define BRCM_CC_4356_CHIP_ID		0x4356
 #define BRCM_CC_43566_CHIP_ID		43566
diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcmu_d11.h b/drivers/net/wireless/broadcom/brcm80211/include/brcmu_d11.h
index f9745ea8b3e0..8b8b2ecb3199 100644
--- a/drivers/net/wireless/broadcom/brcm80211/include/brcmu_d11.h
+++ b/drivers/net/wireless/broadcom/brcm80211/include/brcmu_d11.h
@@ -125,14 +125,36 @@ enum brcmu_chan_sb {
 	BRCMU_CHAN_SB_UU = BRCMU_CHAN_SB_LUU,
 };
 
+/**
+ * struct brcmu_chan - stores channel formats
+ *
+ * This structure can be used with functions translating chanspec into generic
+ * channel info and the other way.
+ *
+ * @chspec: firmware specific format
+ * @chnum: center channel number
+ * @control_ch_num: control channel number
+ * @band: frequency band
+ * @bw: channel width
+ * @sb: control sideband (location of control channel against the center one)
+ */
 struct brcmu_chan {
 	u16 chspec;
 	u8 chnum;
+	u8 control_ch_num;
 	u8 band;
 	enum brcmu_chan_bw bw;
 	enum brcmu_chan_sb sb;
 };
 
+/**
+ * struct brcmu_d11inf - provides functions translating channel format
+ *
+ * @io_type: determines version of channel format used by firmware
+ * @encchspec: encodes channel info into a chanspec, requires center channel
+ *	number, ignores control one
+ * @decchspec: decodes chanspec into generic info
+ */
 struct brcmu_d11inf {
 	u8 io_type;
 
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index ca3cd2102bd6..69b826d229c5 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -1102,8 +1102,8 @@ static const char version[] = "airo.c 0.6 (Ben Reed & Javier Achirica)";
 struct airo_info;
 
 static int get_dec_u16( char *buffer, int *start, int limit );
-static void OUT4500( struct airo_info *, u16 register, u16 value );
-static unsigned short IN4500( struct airo_info *, u16 register );
+static void OUT4500( struct airo_info *, u16 reg, u16 value );
+static unsigned short IN4500( struct airo_info *, u16 reg );
 static u16 setup_card(struct airo_info*, u8 *mac, int lock);
 static int enable_MAC(struct airo_info *ai, int lock);
 static void disable_MAC(struct airo_info *ai, int lock);
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index 5adb7cefb2fe..bfd68612a535 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -4093,7 +4093,7 @@ static const char *ipw_get_status_code(u16 status)
 	return "Unknown status value.";
 }
 
-static void inline average_init(struct average *avg)
+static inline void average_init(struct average *avg)
 {
 	memset(avg, 0, sizeof(*avg));
 }
diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c
index 7bcedbb53d94..209dc9988455 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945.c
@@ -1019,12 +1019,13 @@ il3945_hw_txq_ctx_free(struct il_priv *il)
 	int txq_id;
 
 	/* Tx queues */
-	if (il->txq)
+	if (il->txq) {
 		for (txq_id = 0; txq_id < il->hw_params.max_txq_num; txq_id++)
 			if (txq_id == IL39_CMD_QUEUE_NUM)
 				il_cmd_queue_free(il);
 			else
 				il_tx_queue_free(il, txq_id);
+	}
 
 	/* free tx queue structure */
 	il_free_txq_mem(il);
diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
index eb24b9241bb2..140b6ea8f7cc 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.c
+++ b/drivers/net/wireless/intel/iwlegacy/common.c
@@ -1305,10 +1305,14 @@ il_send_scan_abort(struct il_priv *il)
 static void
 il_complete_scan(struct il_priv *il, bool aborted)
 {
+	struct cfg80211_scan_info info = {
+		.aborted = aborted,
+	};
+
 	/* check if scan was requested from mac80211 */
 	if (il->scan_request) {
 		D_SCAN("Complete scan in mac80211\n");
-		ieee80211_scan_completed(il->hw, aborted);
+		ieee80211_scan_completed(il->hw, &info);
 	}
 
 	il->scan_vif = NULL;
diff --git a/drivers/net/wireless/intel/iwlwifi/Makefile b/drivers/net/wireless/intel/iwlwifi/Makefile
index 05828c61d1ab..6e7ed908de0c 100644
--- a/drivers/net/wireless/intel/iwlwifi/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/Makefile
@@ -8,7 +8,7 @@ iwlwifi-objs		+= iwl-eeprom-read.o iwl-eeprom-parse.o
 iwlwifi-objs		+= iwl-phy-db.o iwl-nvm-parse.o
 iwlwifi-objs		+= pcie/drv.o pcie/rx.o pcie/tx.o pcie/trans.o
 iwlwifi-$(CONFIG_IWLDVM) += iwl-1000.o iwl-2000.o iwl-5000.o iwl-6000.o
-iwlwifi-$(CONFIG_IWLMVM) += iwl-7000.o iwl-8000.o iwl-9000.o
+iwlwifi-$(CONFIG_IWLMVM) += iwl-7000.o iwl-8000.o iwl-9000.o iwl-a000.o
 iwlwifi-objs		+= iwl-trans.o
 
 iwlwifi-objs += $(iwlwifi-m)
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/lib.c b/drivers/net/wireless/intel/iwlwifi/dvm/lib.c
index 8dda52ae3bb5..6c2d6da7eec6 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/lib.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/lib.c
@@ -205,23 +205,6 @@ static const __le32 iwlagn_def_3w_lookup[IWLAGN_BT_DECISION_LUT_SIZE] = {
 	cpu_to_le32(0xf0005000),
 };
 
-
-/* Loose Coex */
-static const __le32 iwlagn_loose_lookup[IWLAGN_BT_DECISION_LUT_SIZE] = {
-	cpu_to_le32(0xaaaaaaaa),
-	cpu_to_le32(0xaaaaaaaa),
-	cpu_to_le32(0xaeaaaaaa),
-	cpu_to_le32(0xaaaaaaaa),
-	cpu_to_le32(0xcc00ff28),
-	cpu_to_le32(0x0000aaaa),
-	cpu_to_le32(0xcc00aaaa),
-	cpu_to_le32(0x0000aaaa),
-	cpu_to_le32(0x00000000),
-	cpu_to_le32(0x00000000),
-	cpu_to_le32(0xf0005000),
-	cpu_to_le32(0xf0005000),
-};
-
 /* Full concurrency */
 static const __le32 iwlagn_concurrent_lookup[IWLAGN_BT_DECISION_LUT_SIZE] = {
 	cpu_to_le32(0xaaaaaaaa),
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
index 37b32a6f60fd..b49848683587 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
@@ -1317,6 +1317,7 @@ static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans,
 	trans_cfg.n_no_reclaim_cmds = ARRAY_SIZE(no_reclaim_cmds);
 
 	switch (iwlwifi_mod_params.amsdu_size) {
+	case IWL_AMSDU_DEF:
 	case IWL_AMSDU_4K:
 		trans_cfg.rx_buf_size = IWL_AMSDU_4K;
 		break;
@@ -1336,6 +1337,8 @@ static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans,
 	trans_cfg.command_groups_size = ARRAY_SIZE(iwl_dvm_groups);
 
 	trans_cfg.cmd_fifo = IWLAGN_CMD_FIFO_NUM;
+	trans_cfg.cb_data_offs = offsetof(struct ieee80211_tx_info,
+					  driver_data[2]);
 
 	WARN_ON(sizeof(priv->transport_queue_stop) * BITS_PER_BYTE <
 		priv->cfg->base_params->num_of_queues);
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rxon.c b/drivers/net/wireless/intel/iwlwifi/dvm/rxon.c
index b228552184b5..087e579854ab 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/rxon.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/rxon.c
@@ -523,11 +523,6 @@ static int iwlagn_rxon_connect(struct iwl_priv *priv,
 		return ret;
 	}
 
-	if (ctx->vif && ctx->vif->type == NL80211_IFTYPE_STATION &&
-	    priv->cfg->ht_params && priv->cfg->ht_params->smps_mode)
-		ieee80211_request_smps(ctx->vif,
-				       priv->cfg->ht_params->smps_mode);
-
 	return 0;
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/scan.c b/drivers/net/wireless/intel/iwlwifi/dvm/scan.c
index d01766f16175..17e6a32384d3 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/scan.c
@@ -94,10 +94,14 @@ static int iwl_send_scan_abort(struct iwl_priv *priv)
 
 static void iwl_complete_scan(struct iwl_priv *priv, bool aborted)
 {
+	struct cfg80211_scan_info info = {
+		.aborted = aborted,
+	};
+
 	/* check if scan was requested from mac80211 */
 	if (priv->scan_request) {
 		IWL_DEBUG_SCAN(priv, "Complete scan in mac80211\n");
-		ieee80211_scan_completed(priv->hw, aborted);
+		ieee80211_scan_completed(priv->hw, &info);
 	}
 
 	priv->scan_type = IWL_SCAN_NORMAL;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
index f4d92155fa76..64690c14ff4d 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
@@ -73,8 +73,8 @@
 /* Highest firmware API version supported */
 #define IWL7260_UCODE_API_MAX	17
 #define IWL7265_UCODE_API_MAX	17
-#define IWL7265D_UCODE_API_MAX	21
-#define IWL3168_UCODE_API_MAX	21
+#define IWL7265D_UCODE_API_MAX	24
+#define IWL3168_UCODE_API_MAX	24
 
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN	16
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
index 8bf11c918dfd..6c6725e808d4 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
@@ -70,8 +70,8 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL8000_UCODE_API_MAX	21
-#define IWL8265_UCODE_API_MAX	21
+#define IWL8000_UCODE_API_MAX	24
+#define IWL8265_UCODE_API_MAX	24
 
 /* Lowest firmware API version supported */
 #define IWL8000_UCODE_API_MIN	16
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c
index 3ac298fdd3cd..fbaf705f3fa7 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c
@@ -55,7 +55,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL9000_UCODE_API_MAX	21
+#define IWL9000_UCODE_API_MAX	24
 
 /* Lowest firmware API version supported */
 #define IWL9000_UCODE_API_MIN	16
@@ -178,6 +178,7 @@ const struct iwl_cfg iwl5165_2ac_cfg = {
 		.nvm_ver = IWL9000_NVM_VERSION,
 		.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
 		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+		.integrated = true,
 };
 
 MODULE_FIRMWARE(IWL9000_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c
new file mode 100644
index 000000000000..4d78232c8afe
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c
@@ -0,0 +1,131 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015-2016 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015-2016 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#include <linux/module.h>
+#include <linux/stringify.h>
+#include "iwl-config.h"
+#include "iwl-agn-hw.h"
+
+/* Highest firmware API version supported */
+#define IWL_A000_UCODE_API_MAX	24
+
+/* Lowest firmware API version supported */
+#define IWL_A000_UCODE_API_MIN	24
+
+/* NVM versions */
+#define IWL_A000_NVM_VERSION		0x0a1d
+#define IWL_A000_TX_POWER_VERSION	0xffff /* meaningless */
+
+/* Memory offsets and lengths */
+#define IWL_A000_DCCM_OFFSET		0x800000
+#define IWL_A000_DCCM_LEN		0x18000
+#define IWL_A000_DCCM2_OFFSET		0x880000
+#define IWL_A000_DCCM2_LEN		0x8000
+#define IWL_A000_SMEM_OFFSET		0x400000
+#define IWL_A000_SMEM_LEN		0x68000
+
+#define IWL_A000_FW_PRE "iwlwifi-Qu-a0-jf-b0-"
+#define IWL_A000_MODULE_FIRMWARE(api) \
+	IWL_A000_FW_PRE "-" __stringify(api) ".ucode"
+
+#define NVM_HW_SECTION_NUM_FAMILY_A000		10
+
+static const struct iwl_base_params iwl_a000_base_params = {
+	.eeprom_size = OTP_LOW_IMAGE_SIZE_FAMILY_A000,
+	.num_of_queues = 31,
+	.shadow_ram_support = true,
+	.led_compensation = 57,
+	.wd_timeout = IWL_LONG_WD_TIMEOUT,
+	.max_event_log_size = 512,
+	.shadow_reg_enable = true,
+	.pcie_l1_allowed = true,
+};
+
+static const struct iwl_ht_params iwl_a000_ht_params = {
+	.stbc = true,
+	.ldpc = true,
+	.ht40_bands = BIT(NL80211_BAND_2GHZ) | BIT(NL80211_BAND_5GHZ),
+};
+
+#define IWL_DEVICE_A000							\
+	.ucode_api_max = IWL_A000_UCODE_API_MAX,			\
+	.ucode_api_min = IWL_A000_UCODE_API_MIN,			\
+	.device_family = IWL_DEVICE_FAMILY_8000,			\
+	.max_inst_size = IWL60_RTC_INST_SIZE,				\
+	.max_data_size = IWL60_RTC_DATA_SIZE,				\
+	.base_params = &iwl_a000_base_params,				\
+	.led_mode = IWL_LED_RF_STATE,					\
+	.nvm_hw_section_num = NVM_HW_SECTION_NUM_FAMILY_A000,		\
+	.non_shared_ant = ANT_A,					\
+	.dccm_offset = IWL_A000_DCCM_OFFSET,				\
+	.dccm_len = IWL_A000_DCCM_LEN,					\
+	.dccm2_offset = IWL_A000_DCCM2_OFFSET,				\
+	.dccm2_len = IWL_A000_DCCM2_LEN,				\
+	.smem_offset = IWL_A000_SMEM_OFFSET,				\
+	.smem_len = IWL_A000_SMEM_LEN,					\
+	.features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM,		\
+	.apmg_not_supported = true,					\
+	.mq_rx_supported = true,					\
+	.vht_mu_mimo_supported = true,					\
+	.mac_addr_from_csr = true,					\
+	.use_tfh = true
+
+const struct iwl_cfg iwla000_2ac_cfg = {
+		.name = "Intel(R) Dual Band Wireless AC a000",
+		.fw_name_pre = IWL_A000_FW_PRE,
+		IWL_DEVICE_A000,
+		.ht_params = &iwl_a000_ht_params,
+		.nvm_ver = IWL_A000_NVM_VERSION,
+		.nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+};
+
+MODULE_FIRMWARE(IWL_A000_MODULE_FIRMWARE(IWL_A000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 4a0af7de82fd..423b23320d4f 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -66,8 +66,9 @@
 #define __IWL_CONFIG_H__
 
 #include <linux/types.h>
-#include <net/mac80211.h>
-
+#include <linux/netdevice.h>
+#include <linux/ieee80211.h>
+#include <linux/nl80211.h>
 
 enum iwl_device_family {
 	IWL_DEVICE_FAMILY_UNDEFINED,
@@ -192,7 +193,6 @@ struct iwl_base_params {
  * @ht40_bands: bitmap of bands (using %NL80211_BAND_*) that support HT40
  */
 struct iwl_ht_params {
-	enum ieee80211_smps_mode smps_mode;
 	u8 ht_greenfield_support:1,
 	   stbc:1,
 	   ldpc:1,
@@ -261,6 +261,7 @@ struct iwl_tt_params {
 #define OTP_LOW_IMAGE_SIZE_FAMILY_7000	(16 * 512 * sizeof(u16)) /* 16 KB */
 #define OTP_LOW_IMAGE_SIZE_FAMILY_8000	(32 * 512 * sizeof(u16)) /* 32 KB */
 #define OTP_LOW_IMAGE_SIZE_FAMILY_9000	OTP_LOW_IMAGE_SIZE_FAMILY_8000
+#define OTP_LOW_IMAGE_SIZE_FAMILY_A000	OTP_LOW_IMAGE_SIZE_FAMILY_9000
 
 struct iwl_eeprom_params {
 	const u8 regulatory_bands[7];
@@ -319,6 +320,7 @@ struct iwl_pwr_tx_backoff {
  * @mq_rx_supported: multi-queue rx support
  * @vht_mu_mimo_supported: VHT MU-MIMO support
  * @rf_id: need to read rf_id to determine the firmware image
+ * @integrated: discrete or integrated
  *
  * We enable the driver to be backward compatible wrt. hardware features.
  * API differences in uCode shouldn't be handled here but through TLVs
@@ -362,7 +364,9 @@ struct iwl_cfg {
 	    apmg_not_supported:1,
 	    mq_rx_supported:1,
 	    vht_mu_mimo_supported:1,
-	    rf_id:1;
+	    rf_id:1,
+	    integrated:1,
+	    use_tfh:1;
 	u8 valid_tx_ant;
 	u8 valid_rx_ant;
 	u8 non_shared_ant;
@@ -448,6 +452,7 @@ extern const struct iwl_cfg iwl4165_2ac_sdio_cfg;
 extern const struct iwl_cfg iwl9260_2ac_cfg;
 extern const struct iwl_cfg iwl9260lc_2ac_cfg;
 extern const struct iwl_cfg iwl5165_2ac_cfg;
+extern const struct iwl_cfg iwla000_2ac_cfg;
 #endif /* CONFIG_IWLMVM */
 
 #endif /* __IWL_CONFIG_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index b52913448c4a..871ad02fdb17 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -145,8 +145,10 @@
 
 #define CSR_LED_REG             (CSR_BASE+0x094)
 #define CSR_DRAM_INT_TBL_REG	(CSR_BASE+0x0A0)
-#define CSR_MAC_SHADOW_REG_CTRL	(CSR_BASE+0x0A8) /* 6000 and up */
-
+#define CSR_MAC_SHADOW_REG_CTRL		(CSR_BASE + 0x0A8) /* 6000 and up */
+#define CSR_MAC_SHADOW_REG_CTRL_RX_WAKE	BIT(20)
+#define CSR_MAC_SHADOW_REG_CTL2		(CSR_BASE + 0x0AC)
+#define CSR_MAC_SHADOW_REG_CTL2_RX_WAKE	0xFFFF
 
 /* GIO Chicken Bits (PCI Express bus link power management) */
 #define CSR_GIO_CHICKEN_BITS    (CSR_BASE+0x100)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-debug.h b/drivers/net/wireless/intel/iwlwifi/iwl-debug.h
index 110333208450..cd77c6971753 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-debug.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-debug.h
@@ -41,6 +41,7 @@ static inline bool iwl_have_debug_level(u32 level)
 #endif
 }
 
+struct device;
 void __iwl_err(struct device *dev, bool rfkill_prefix, bool only_trace,
 		const char *fmt, ...) __printf(4, 5);
 void __iwl_warn(struct device *dev, const char *fmt, ...) __printf(2, 3);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-io.h b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-io.h
index 27914eedc146..1dccae6532cf 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-io.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-io.h
@@ -1,6 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -83,6 +84,23 @@ TRACE_EVENT(iwlwifi_dev_iowrite32,
 		  __get_str(dev), __entry->offs, __entry->val)
 );
 
+TRACE_EVENT(iwlwifi_dev_iowrite64,
+	TP_PROTO(const struct device *dev, u64 offs, u64 val),
+	TP_ARGS(dev, offs, val),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u64, offs)
+		__field(u64, val)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] write io[%llu] = %llu)",
+		  __get_str(dev), __entry->offs, __entry->val)
+);
+
 TRACE_EVENT(iwlwifi_dev_iowrite_prph32,
 	TP_PROTO(const struct device *dev, u32 offs, u32 val),
 	TP_ARGS(dev, offs, val),
@@ -100,6 +118,23 @@ TRACE_EVENT(iwlwifi_dev_iowrite_prph32,
 		  __get_str(dev), __entry->offs, __entry->val)
 );
 
+TRACE_EVENT(iwlwifi_dev_iowrite_prph64,
+	TP_PROTO(const struct device *dev, u64 offs, u64 val),
+	TP_ARGS(dev, offs, val),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u64, offs)
+		__field(u64, val)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] write PRPH[%llu] = %llu)",
+		  __get_str(dev), __entry->offs, __entry->val)
+);
+
 TRACE_EVENT(iwlwifi_dev_ioread_prph32,
 	TP_PROTO(const struct device *dev, u32 offs, u32 val),
 	TP_ARGS(dev, offs, val),
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h
index f4d3cd010087..545d14b0bc92 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h
@@ -1,6 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(C) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -33,11 +34,29 @@
 static inline bool iwl_trace_data(struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (void *)skb->data;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	__le16 fc = hdr->frame_control;
+	int offs = 24; /* start with normal header length */
 
-	if (!ieee80211_is_data(hdr->frame_control))
+	if (!ieee80211_is_data(fc))
 		return false;
-	return !(info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO);
+
+	/* Try to determine if the frame is EAPOL. This might have false
+	 * positives (if there's no RFC 1042 header and we compare to some
+	 * payload instead) but since we're only doing tracing that's not
+	 * a problem.
+	 */
+
+	if (ieee80211_has_a4(fc))
+		offs += 6;
+	if (ieee80211_is_data_qos(fc))
+		offs += 2;
+	/* don't account for crypto - these are unencrypted */
+
+	/* also account for the RFC 1042 header, of course */
+	offs += 6;
+
+	return skb->len > offs + 2 &&
+	       *(__be16 *)(skb->data + offs) == cpu_to_be16(ETH_P_PAE);
 }
 
 static inline size_t iwl_rx_trace_len(const struct iwl_trans *trans,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index f52ff75f6f80..45b2f679e4d8 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -129,8 +129,8 @@ struct iwl_drv {
 };
 
 enum {
-	DVM_OP_MODE =	0,
-	MVM_OP_MODE =	1,
+	DVM_OP_MODE,
+	MVM_OP_MODE,
 };
 
 /* Protects the table contents, i.e. the ops pointer & drv list */
@@ -326,8 +326,6 @@ static int iwl_store_cscheme(struct iwl_fw *fw, const u8 *data, const u32 len)
 	int i, j;
 	struct iwl_fw_cscheme_list *l = (struct iwl_fw_cscheme_list *)data;
 	struct iwl_fw_cipher_scheme *fwcs;
-	struct ieee80211_cipher_scheme *cs;
-	u32 cipher;
 
 	if (len < sizeof(*l) ||
 	    len < sizeof(l->size) + l->size * sizeof(l->cs[0]))
@@ -335,22 +333,12 @@ static int iwl_store_cscheme(struct iwl_fw *fw, const u8 *data, const u32 len)
 
 	for (i = 0, j = 0; i < IWL_UCODE_MAX_CS && i < l->size; i++) {
 		fwcs = &l->cs[j];
-		cipher = le32_to_cpu(fwcs->cipher);
 
 		/* we skip schemes with zero cipher suite selector */
-		if (!cipher)
+		if (!fwcs->cipher)
 			continue;
 
-		cs = &fw->cs[j++];
-		cs->cipher = cipher;
-		cs->iftype = BIT(NL80211_IFTYPE_STATION);
-		cs->hdr_len = fwcs->hdr_len;
-		cs->pn_len = fwcs->pn_len;
-		cs->pn_off = fwcs->pn_off;
-		cs->key_idx_off = fwcs->key_idx_off;
-		cs->key_idx_mask = fwcs->key_idx_mask;
-		cs->key_idx_shift = fwcs->key_idx_shift;
-		cs->mic_len = fwcs->mic_len;
+		fw->cs[j++] = *fwcs;
 	}
 
 	return 0;
@@ -795,17 +783,17 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv,
 		 case IWL_UCODE_TLV_SEC_RT:
 			iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_REGULAR,
 					    tlv_len);
-			drv->fw.mvm_fw = true;
+			drv->fw.type = IWL_FW_MVM;
 			break;
 		case IWL_UCODE_TLV_SEC_INIT:
 			iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_INIT,
 					    tlv_len);
-			drv->fw.mvm_fw = true;
+			drv->fw.type = IWL_FW_MVM;
 			break;
 		case IWL_UCODE_TLV_SEC_WOWLAN:
 			iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_WOWLAN,
 					    tlv_len);
-			drv->fw.mvm_fw = true;
+			drv->fw.type = IWL_FW_MVM;
 			break;
 		case IWL_UCODE_TLV_DEF_CALIB:
 			if (tlv_len != sizeof(struct iwl_tlv_calib_data))
@@ -827,17 +815,17 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv,
 		 case IWL_UCODE_TLV_SECURE_SEC_RT:
 			iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_REGULAR,
 					    tlv_len);
-			drv->fw.mvm_fw = true;
+			drv->fw.type = IWL_FW_MVM;
 			break;
 		case IWL_UCODE_TLV_SECURE_SEC_INIT:
 			iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_INIT,
 					    tlv_len);
-			drv->fw.mvm_fw = true;
+			drv->fw.type = IWL_FW_MVM;
 			break;
 		case IWL_UCODE_TLV_SECURE_SEC_WOWLAN:
 			iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_WOWLAN,
 					    tlv_len);
-			drv->fw.mvm_fw = true;
+			drv->fw.type = IWL_FW_MVM;
 			break;
 		case IWL_UCODE_TLV_NUM_OF_CPU:
 			if (tlv_len != sizeof(u32))
@@ -1275,7 +1263,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
 	 * In mvm uCode there is no difference between data and instructions
 	 * sections.
 	 */
-	if (!fw->mvm_fw && validate_sec_sizes(drv, pieces, drv->cfg))
+	if (fw->type == IWL_FW_DVM && validate_sec_sizes(drv, pieces, drv->cfg))
 		goto try_again;
 
 	/* Allocate ucode buffers for card's bus-master loading ... */
@@ -1403,10 +1391,16 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
 	release_firmware(ucode_raw);
 
 	mutex_lock(&iwlwifi_opmode_table_mtx);
-	if (fw->mvm_fw)
-		op = &iwlwifi_opmode_table[MVM_OP_MODE];
-	else
+	switch (fw->type) {
+	case IWL_FW_DVM:
 		op = &iwlwifi_opmode_table[DVM_OP_MODE];
+		break;
+	default:
+		WARN(1, "Invalid fw type %d\n", fw->type);
+	case IWL_FW_MVM:
+		op = &iwlwifi_opmode_table[MVM_OP_MODE];
+		break;
+	}
 
 	IWL_INFO(drv, "loaded firmware version %s op_mode %s\n",
 		 drv->fw.fw_version, op->name);
@@ -1658,7 +1652,8 @@ MODULE_PARM_DESC(11n_disable,
 	"disable 11n functionality, bitmap: 1: full, 2: disable agg TX, 4: disable agg RX, 8 enable agg TX");
 module_param_named(amsdu_size, iwlwifi_mod_params.amsdu_size,
 		   int, S_IRUGO);
-MODULE_PARM_DESC(amsdu_size, "amsdu size 0:4K 1:8K 2:12K (default 0)");
+MODULE_PARM_DESC(amsdu_size,
+		 "amsdu size 0: 12K for multi Rx queue devices, 4K for other devices 1:4K 2:8K 3:12K (default 0)");
 module_param_named(fw_restart, iwlwifi_mod_params.restart_fw, bool, S_IRUGO);
 MODULE_PARM_DESC(fw_restart, "restart firmware in case of error (default true)");
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c
index bf1b69aec813..3199d345b427 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c
@@ -766,7 +766,9 @@ void iwl_init_ht_hw_capab(const struct iwl_cfg *cfg,
 	if (cfg->ht_params->ldpc)
 		ht_info->cap |= IEEE80211_HT_CAP_LDPC_CODING;
 
-	if (iwlwifi_mod_params.amsdu_size >= IWL_AMSDU_8K)
+	if ((cfg->mq_rx_supported &&
+	     iwlwifi_mod_params.amsdu_size != IWL_AMSDU_4K) ||
+	     iwlwifi_mod_params.amsdu_size >= IWL_AMSDU_8K)
 		ht_info->cap |= IEEE80211_HT_CAP_MAX_AMSDU;
 
 	ht_info->ampdu_factor = cfg->max_ht_ampdu_exponent;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h
index 1f4e50289c14..e04a91d70a15 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h
@@ -66,6 +66,7 @@
 
 #include <linux/types.h>
 #include <linux/if_ether.h>
+#include <net/cfg80211.h>
 #include "iwl-trans.h"
 
 struct iwl_nvm_data {
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
index 270f39ecd2d4..1d6f5d21a663 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
@@ -77,6 +77,7 @@
  */
 #define FH_MEM_LOWER_BOUND                   (0x1000)
 #define FH_MEM_UPPER_BOUND                   (0x2000)
+#define TFH_MEM_LOWER_BOUND                  (0xA06000)
 
 /**
  * Keep-Warm (KW) buffer base address.
@@ -118,10 +119,17 @@
 #define FH_MEM_CBBC_16_19_UPPER_BOUND		(FH_MEM_LOWER_BOUND + 0xC00)
 #define FH_MEM_CBBC_20_31_LOWER_BOUND		(FH_MEM_LOWER_BOUND + 0xB20)
 #define FH_MEM_CBBC_20_31_UPPER_BOUND		(FH_MEM_LOWER_BOUND + 0xB80)
+/* a000 TFD table address, 64 bit */
+#define TFH_TFDQ_CBB_TABLE			(TFH_MEM_LOWER_BOUND + 0x1C00)
 
 /* Find TFD CB base pointer for given queue */
-static inline unsigned int FH_MEM_CBBC_QUEUE(unsigned int chnl)
+static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans,
+					     unsigned int chnl)
 {
+	if (trans->cfg->use_tfh) {
+		WARN_ON_ONCE(chnl >= 64);
+		return TFH_TFDQ_CBB_TABLE + 8 * chnl;
+	}
 	if (chnl < 16)
 		return FH_MEM_CBBC_0_15_LOWER_BOUND + 4 * chnl;
 	if (chnl < 20)
@@ -130,6 +138,65 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(unsigned int chnl)
 	return FH_MEM_CBBC_20_31_LOWER_BOUND + 4 * (chnl - 20);
 }
 
+/* a000 configuration registers */
+
+/*
+ * TFH Configuration register.
+ *
+ * BIT fields:
+ *
+ * Bits 3:0:
+ * Define the maximum number of pending read requests.
+ * Maximum configration value allowed is 0xC
+ * Bits 9:8:
+ * Define the maximum transfer size. (64 / 128 / 256)
+ * Bit 10:
+ * When bit is set and transfer size is set to 128B, the TFH will enable
+ * reading chunks of more than 64B only if the read address is aligned to 128B.
+ * In case of DRAM read address which is not aligned to 128B, the TFH will
+ * enable transfer size which doesn't cross 64B DRAM address boundary.
+*/
+#define TFH_TRANSFER_MODE		(TFH_MEM_LOWER_BOUND + 0x1F40)
+#define TFH_TRANSFER_MAX_PENDING_REQ	0xc
+#define TFH_CHUNK_SIZE_128			BIT(8)
+#define TFH_CHUNK_SPLIT_MODE		BIT(10)
+/*
+ * Defines the offset address in dwords referring from the beginning of the
+ * Tx CMD which will be updated in DRAM.
+ * Note that the TFH offset address for Tx CMD update is always referring to
+ * the start of the TFD first TB.
+ * In case of a DRAM Tx CMD update the TFH will update PN and Key ID
+ */
+#define TFH_TXCMD_UPDATE_CFG		(TFH_MEM_LOWER_BOUND + 0x1F48)
+/*
+ * Controls TX DMA operation
+ *
+ * BIT fields:
+ *
+ * Bits 31:30: Enable the SRAM DMA channel.
+ * Turning on bit 31 will kick the SRAM2DRAM DMA.
+ * Note that the sram2dram may be enabled only after configuring the DRAM and
+ * SRAM addresses registers and the byte count register.
+ * Bits 25:24: Defines the interrupt target upon dram2sram transfer done. When
+ * set to 1 - interrupt is sent to the driver
+ * Bit 0: Indicates the snoop configuration
+*/
+#define TFH_SRV_DMA_CHNL0_CTRL	(TFH_MEM_LOWER_BOUND + 0x1F60)
+#define TFH_SRV_DMA_SNOOP	BIT(0)
+#define TFH_SRV_DMA_TO_DRIVER	BIT(24)
+#define TFH_SRV_DMA_START	BIT(31)
+
+/* Defines the DMA SRAM write start address to transfer a data block */
+#define TFH_SRV_DMA_CHNL0_SRAM_ADDR	(TFH_MEM_LOWER_BOUND + 0x1F64)
+
+/* Defines the 64bits DRAM start address to read the DMA data block from */
+#define TFH_SRV_DMA_CHNL0_DRAM_ADDR	(TFH_MEM_LOWER_BOUND + 0x1F68)
+
+/*
+ * Defines the number of bytes to transfer from DRAM to SRAM.
+ * Note that this register may be configured with non-dword aligned size.
+ */
+#define TFH_SRV_DMA_CHNL0_BC	(TFH_MEM_LOWER_BOUND + 0x1F70)
 
 /**
  * Rx SRAM Control and Status Registers (RSCSR)
@@ -344,6 +411,32 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(unsigned int chnl)
 #define RFH_RBDBUF_RBD0_LSB 0xA08300
 #define RFH_RBDBUF_RBD_LSB(q) (RFH_RBDBUF_RBD0_LSB + (q) * 8)
 
+/**
+ * RFH Status Register
+ *
+ * Bit fields:
+ *
+ * Bit 29: RBD_FETCH_IDLE
+ * This status flag is set by the RFH when there is no active RBD fetch from
+ * DRAM.
+ * Once the RFH RBD controller starts fetching (or when there is a pending
+ * RBD read response from DRAM), this flag is immediately turned off.
+ *
+ * Bit 30: SRAM_DMA_IDLE
+ * This status flag is set by the RFH when there is no active transaction from
+ * SRAM to DRAM.
+ * Once the SRAM to DRAM DMA is active, this flag is immediately turned off.
+ *
+ * Bit 31: RXF_DMA_IDLE
+ * This status flag is set by the RFH when there is no active transaction from
+ * RXF to DRAM.
+ * Once the RXF-to-DRAM DMA is active, this flag is immediately turned off.
+ */
+#define RFH_GEN_STATUS 0xA09808
+#define RBD_FETCH_IDLE	BIT(29)
+#define SRAM_DMA_IDLE	BIT(30)
+#define RXF_DMA_IDLE	BIT(31)
+
 /* DMA configuration */
 #define RFH_RXF_DMA_CFG 0xA09820
 /* RB size */
@@ -384,7 +477,9 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(unsigned int chnl)
 #define RFH_GEN_CFG	0xA09800
 #define RFH_GEN_CFG_SERVICE_DMA_SNOOP	BIT(0)
 #define RFH_GEN_CFG_RFH_DMA_SNOOP	BIT(1)
-#define RFH_GEN_CFG_RB_CHUNK_SIZE	BIT(4) /* 0 - 64B, 1- 128B */
+#define RFH_GEN_CFG_RB_CHUNK_SIZE_POS	4
+#define RFH_GEN_CFG_RB_CHUNK_SIZE_128	1
+#define RFH_GEN_CFG_RB_CHUNK_SIZE_64	0
 #define RFH_GEN_CFG_DEFAULT_RXQ_NUM_MASK 0xF00
 #define RFH_GEN_CFG_DEFAULT_RXQ_NUM_POS 8
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-error-dump.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-error-dump.h
index 09b7ea28f4a0..420c31dab263 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-error-dump.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-error-dump.h
@@ -89,6 +89,9 @@
  * @IWL_FW_ERROR_PAGING: UMAC's image memory segments which were
  *	paged to the DRAM.
  * @IWL_FW_ERROR_DUMP_RADIO_REG: Dump the radio registers.
+ * @IWL_FW_ERROR_DUMP_EXTERNAL: used only by external code utilities, and
+ *	for that reason is not in use in any other place in the Linux Wi-Fi
+ *	stack.
  */
 enum iwl_fw_error_dump_type {
 	/* 0 is deprecated */
@@ -106,6 +109,7 @@ enum iwl_fw_error_dump_type {
 	IWL_FW_ERROR_DUMP_PAGING = 12,
 	IWL_FW_ERROR_DUMP_RADIO_REG = 13,
 	IWL_FW_ERROR_DUMP_INTERNAL_TXF = 14,
+	IWL_FW_ERROR_DUMP_EXTERNAL = 15, /* Do not move */
 
 	IWL_FW_ERROR_DUMP_MAX,
 };
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
index 37dc09e8b6a7..1b1e045f8907 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
@@ -301,7 +301,8 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_capa_t;
  * @IWL_UCODE_TLV_CAPA_DC2DC_SUPPORT: supports DC2DC Command
  * @IWL_UCODE_TLV_CAPA_CSUM_SUPPORT: supports TCP Checksum Offload
  * @IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS: support radio and beacon statistics
- * @IWL_UCODE_TLV_CAPA_P2P_STANDALONE_UAPSD: support p2p standalone U-APSD
+ * @IWL_UCODE_TLV_CAPA_P2P_SCM_UAPSD: supports U-APSD on p2p interface when it
+ *	is standalone or with a BSS station interface in the same binding.
  * @IWL_UCODE_TLV_CAPA_BT_COEX_PLCR: enabled BT Coex packet level co-running
  * @IWL_UCODE_TLV_CAPA_LAR_MULTI_MCC: ucode supports LAR updates with different
  *	sources for the MCC. This TLV bit is a future replacement to
@@ -312,6 +313,9 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_capa_t;
  * @IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE: extended DTS measurement
  * @IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS: supports short PM timeouts
  * @IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT: supports bt-coex Multi-priority LUT
+ * @IWL_UCODE_TLV_CAPA_CSA_AND_TBTT_OFFLOAD: the firmware supports CSA
+ *	countdown offloading. Beacon notifications are not sent to the host.
+ *	The fw also offloads TBTT alignment.
  * @IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION: firmware will decide on what
  *	antenna the beacon should be transmitted
  * @IWL_UCODE_TLV_CAPA_BEACON_STORING: firmware will store the latest beacon
@@ -326,6 +330,9 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_capa_t;
  * @IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG: support getting more shared
  *	memory addresses from the firmware.
  * @IWL_UCODE_TLV_CAPA_LQM_SUPPORT: supports Link Quality Measurement
+ * @IWL_UCODE_TLV_CAPA_TX_POWER_ACK: reduced TX power API has larger
+ *	command size (command version 4) that supports toggling ACK TX
+ *	power reduction.
  *
  * @NUM_IWL_UCODE_TLV_CAPA: number of bits used
  */
@@ -347,7 +354,7 @@ enum iwl_ucode_tlv_capa {
 	IWL_UCODE_TLV_CAPA_DC2DC_CONFIG_SUPPORT		= (__force iwl_ucode_tlv_capa_t)19,
 	IWL_UCODE_TLV_CAPA_CSUM_SUPPORT			= (__force iwl_ucode_tlv_capa_t)21,
 	IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS		= (__force iwl_ucode_tlv_capa_t)22,
-	IWL_UCODE_TLV_CAPA_P2P_STANDALONE_UAPSD		= (__force iwl_ucode_tlv_capa_t)26,
+	IWL_UCODE_TLV_CAPA_P2P_SCM_UAPSD		= (__force iwl_ucode_tlv_capa_t)26,
 	IWL_UCODE_TLV_CAPA_BT_COEX_PLCR			= (__force iwl_ucode_tlv_capa_t)28,
 	IWL_UCODE_TLV_CAPA_LAR_MULTI_MCC		= (__force iwl_ucode_tlv_capa_t)29,
 	IWL_UCODE_TLV_CAPA_BT_COEX_RRC			= (__force iwl_ucode_tlv_capa_t)30,
@@ -356,6 +363,7 @@ enum iwl_ucode_tlv_capa {
 	IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS		= (__force iwl_ucode_tlv_capa_t)65,
 	IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT		= (__force iwl_ucode_tlv_capa_t)67,
 	IWL_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT	= (__force iwl_ucode_tlv_capa_t)68,
+	IWL_UCODE_TLV_CAPA_CSA_AND_TBTT_OFFLOAD		= (__force iwl_ucode_tlv_capa_t)70,
 	IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION		= (__force iwl_ucode_tlv_capa_t)71,
 	IWL_UCODE_TLV_CAPA_BEACON_STORING		= (__force iwl_ucode_tlv_capa_t)72,
 	IWL_UCODE_TLV_CAPA_LAR_SUPPORT_V2		= (__force iwl_ucode_tlv_capa_t)73,
@@ -365,6 +373,7 @@ enum iwl_ucode_tlv_capa {
 	IWL_UCODE_TLV_CAPA_USNIFFER_UNIFIED		= (__force iwl_ucode_tlv_capa_t)77,
 	IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG	= (__force iwl_ucode_tlv_capa_t)80,
 	IWL_UCODE_TLV_CAPA_LQM_SUPPORT			= (__force iwl_ucode_tlv_capa_t)81,
+	IWL_UCODE_TLV_CAPA_TX_POWER_ACK			= (__force iwl_ucode_tlv_capa_t)84,
 
 	NUM_IWL_UCODE_TLV_CAPA
 #ifdef __CHECKER__
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h
index e461d631893a..74ea68d1063c 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h
@@ -67,7 +67,6 @@
 #ifndef __iwl_fw_h__
 #define __iwl_fw_h__
 #include <linux/types.h>
-#include <net/mac80211.h>
 
 #include "iwl-fw-file.h"
 #include "iwl-fw-error-dump.h"
@@ -230,6 +229,16 @@ struct iwl_gscan_capabilities {
 	u32 max_number_of_black_listed_ssid;
 };
 
+/**
+ * enum iwl_fw_type - iwlwifi firmware type
+ * @IWL_FW_DVM: DVM firmware
+ * @IWL_FW_MVM: MVM firmware
+ */
+enum iwl_fw_type {
+	IWL_FW_DVM,
+	IWL_FW_MVM,
+};
+
 /**
  * struct iwl_fw - variables associated with the firmware
  *
@@ -244,7 +253,7 @@ struct iwl_gscan_capabilities {
  * @inst_evtlog_ptr: event log offset for runtime ucode.
  * @inst_evtlog_size: event log size for runtime ucode.
  * @inst_errlog_ptr: error log offfset for runtime ucode.
- * @mvm_fw: indicates this is MVM firmware
+ * @type: firmware type (&enum iwl_fw_type)
  * @cipher_scheme: optional external cipher scheme.
  * @human_readable: human readable version
  * @sdio_adma_addr: the default address to set for the ADMA in SDIO mode until
@@ -275,9 +284,9 @@ struct iwl_fw {
 	u8 valid_tx_ant;
 	u8 valid_rx_ant;
 
-	bool mvm_fw;
+	enum iwl_fw_type type;
 
-	struct ieee80211_cipher_scheme cs[IWL_UCODE_MAX_CS];
+	struct iwl_fw_cipher_scheme cs[IWL_UCODE_MAX_CS];
 	u8 human_readable[FW_VER_HUMAN_READABLE_SZ];
 
 	u32 sdio_adma_addr;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
index 32c8f84ae519..92c8b5f9a9cb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
@@ -1,7 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
  *
  * Portions of this file are derived from the ipw3945 project.
  *
@@ -51,6 +51,14 @@ void iwl_write32(struct iwl_trans *trans, u32 ofs, u32 val)
 }
 IWL_EXPORT_SYMBOL(iwl_write32);
 
+void iwl_write64(struct iwl_trans *trans, u64 ofs, u64 val)
+{
+	trace_iwlwifi_dev_iowrite64(trans->dev, ofs, val);
+	iwl_trans_write32(trans, ofs, val & 0xffffffff);
+	iwl_trans_write32(trans, ofs + 4, val >> 32);
+}
+IWL_EXPORT_SYMBOL(iwl_write64);
+
 u32 iwl_read32(struct iwl_trans *trans, u32 ofs)
 {
 	u32 val = iwl_trans_read32(trans, ofs);
@@ -102,6 +110,17 @@ void iwl_write_direct32(struct iwl_trans *trans, u32 reg, u32 value)
 }
 IWL_EXPORT_SYMBOL(iwl_write_direct32);
 
+void iwl_write_direct64(struct iwl_trans *trans, u64 reg, u64 value)
+{
+	unsigned long flags;
+
+	if (iwl_trans_grab_nic_access(trans, &flags)) {
+		iwl_write64(trans, reg, value);
+		iwl_trans_release_nic_access(trans, &flags);
+	}
+}
+IWL_EXPORT_SYMBOL(iwl_write_direct64);
+
 int iwl_poll_direct_bit(struct iwl_trans *trans, u32 addr, u32 mask,
 			int timeout)
 {
@@ -133,6 +152,14 @@ void iwl_write_prph_no_grab(struct iwl_trans *trans, u32 ofs, u32 val)
 }
 IWL_EXPORT_SYMBOL(iwl_write_prph_no_grab);
 
+void iwl_write_prph64_no_grab(struct iwl_trans *trans, u64 ofs, u64 val)
+{
+	trace_iwlwifi_dev_iowrite_prph64(trans->dev, ofs, val);
+	iwl_write_prph_no_grab(trans, ofs, val & 0xffffffff);
+	iwl_write_prph_no_grab(trans, ofs + 4, val >> 32);
+}
+IWL_EXPORT_SYMBOL(iwl_write_prph64_no_grab);
+
 u32 iwl_read_prph(struct iwl_trans *trans, u32 ofs)
 {
 	unsigned long flags;
@@ -228,9 +255,117 @@ void iwl_force_nmi(struct iwl_trans *trans)
 }
 IWL_EXPORT_SYMBOL(iwl_force_nmi);
 
-static const char *get_fh_string(int cmd)
+static const char *get_rfh_string(int cmd)
 {
 #define IWL_CMD(x) case x: return #x
+#define IWL_CMD_MQ(arg, reg, q) { if (arg == reg(q)) return #reg; }
+
+	int i;
+
+	for (i = 0; i < IWL_MAX_RX_HW_QUEUES; i++) {
+		IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_BA_LSB, i);
+		IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_WIDX, i);
+		IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_RIDX, i);
+		IWL_CMD_MQ(cmd, RFH_Q_URBD_STTS_WPTR_LSB, i);
+	};
+
+	switch (cmd) {
+	IWL_CMD(RFH_RXF_DMA_CFG);
+	IWL_CMD(RFH_GEN_CFG);
+	IWL_CMD(RFH_GEN_STATUS);
+	IWL_CMD(FH_TSSR_TX_STATUS_REG);
+	IWL_CMD(FH_TSSR_TX_ERROR_REG);
+	default:
+		return "UNKNOWN";
+	}
+#undef IWL_CMD_MQ
+}
+
+struct reg {
+	u32 addr;
+	bool is64;
+};
+
+static int iwl_dump_rfh(struct iwl_trans *trans, char **buf)
+{
+	int i, q;
+	int num_q = trans->num_rx_queues;
+	static const u32 rfh_tbl[] = {
+		RFH_RXF_DMA_CFG,
+		RFH_GEN_CFG,
+		RFH_GEN_STATUS,
+		FH_TSSR_TX_STATUS_REG,
+		FH_TSSR_TX_ERROR_REG,
+	};
+	static const struct reg rfh_mq_tbl[] = {
+		{ RFH_Q0_FRBDCB_BA_LSB, true },
+		{ RFH_Q0_FRBDCB_WIDX, false },
+		{ RFH_Q0_FRBDCB_RIDX, false },
+		{ RFH_Q0_URBD_STTS_WPTR_LSB, true },
+	};
+
+#ifdef CONFIG_IWLWIFI_DEBUGFS
+	if (buf) {
+		int pos = 0;
+		/*
+		 * Register (up to 34 for name + 8 blank/q for MQ): 40 chars
+		 * Colon + space: 2 characters
+		 * 0X%08x: 10 characters
+		 * New line: 1 character
+		 * Total of 53 characters
+		 */
+		size_t bufsz = ARRAY_SIZE(rfh_tbl) * 53 +
+			       ARRAY_SIZE(rfh_mq_tbl) * 53 * num_q + 40;
+
+		*buf = kmalloc(bufsz, GFP_KERNEL);
+		if (!*buf)
+			return -ENOMEM;
+
+		pos += scnprintf(*buf + pos, bufsz - pos,
+				"RFH register values:\n");
+
+		for (i = 0; i < ARRAY_SIZE(rfh_tbl); i++)
+			pos += scnprintf(*buf + pos, bufsz - pos,
+				"%40s: 0X%08x\n",
+				get_rfh_string(rfh_tbl[i]),
+				iwl_read_prph(trans, rfh_tbl[i]));
+
+		for (i = 0; i < ARRAY_SIZE(rfh_mq_tbl); i++)
+			for (q = 0; q < num_q; q++) {
+				u32 addr = rfh_mq_tbl[i].addr;
+
+				addr += q * (rfh_mq_tbl[i].is64 ? 8 : 4);
+				pos += scnprintf(*buf + pos, bufsz - pos,
+					"%34s(q %2d): 0X%08x\n",
+					get_rfh_string(addr), q,
+					iwl_read_prph(trans, addr));
+			}
+
+		return pos;
+	}
+#endif
+
+	IWL_ERR(trans, "RFH register values:\n");
+	for (i = 0; i < ARRAY_SIZE(rfh_tbl); i++)
+		IWL_ERR(trans, "  %34s: 0X%08x\n",
+			get_rfh_string(rfh_tbl[i]),
+			iwl_read_prph(trans, rfh_tbl[i]));
+
+	for (i = 0; i < ARRAY_SIZE(rfh_mq_tbl); i++)
+		for (q = 0; q < num_q; q++) {
+			u32 addr = rfh_mq_tbl[i].addr;
+
+			addr += q * (rfh_mq_tbl[i].is64 ? 8 : 4);
+			IWL_ERR(trans, "  %34s(q %d): 0X%08x\n",
+				get_rfh_string(addr), q,
+				iwl_read_prph(trans, addr));
+		}
+
+	return 0;
+}
+
+static const char *get_fh_string(int cmd)
+{
 	switch (cmd) {
 	IWL_CMD(FH_RSCSR_CHNL0_STTS_WPTR_REG);
 	IWL_CMD(FH_RSCSR_CHNL0_RBDCB_BASE_REG);
@@ -262,6 +397,9 @@ int iwl_dump_fh(struct iwl_trans *trans, char **buf)
 		FH_TSSR_TX_ERROR_REG
 	};
 
+	if (trans->cfg->mq_rx_supported)
+		return iwl_dump_rfh(trans, buf);
+
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	if (buf) {
 		int pos = 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.h b/drivers/net/wireless/intel/iwlwifi/iwl-io.h
index a9bcc788cae1..5c8c0e130194 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-io.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.h
@@ -34,6 +34,7 @@
 
 void iwl_write8(struct iwl_trans *trans, u32 ofs, u8 val);
 void iwl_write32(struct iwl_trans *trans, u32 ofs, u32 val);
+void iwl_write64(struct iwl_trans *trans, u64 ofs, u64 val);
 u32 iwl_read32(struct iwl_trans *trans, u32 ofs);
 
 static inline void iwl_set_bit(struct iwl_trans *trans, u32 reg, u32 mask)
@@ -53,11 +54,13 @@ int iwl_poll_direct_bit(struct iwl_trans *trans, u32 addr, u32 mask,
 
 u32 iwl_read_direct32(struct iwl_trans *trans, u32 reg);
 void iwl_write_direct32(struct iwl_trans *trans, u32 reg, u32 value);
+void iwl_write_direct64(struct iwl_trans *trans, u64 reg, u64 value);
 
 
 u32 iwl_read_prph_no_grab(struct iwl_trans *trans, u32 ofs);
 u32 iwl_read_prph(struct iwl_trans *trans, u32 ofs);
 void iwl_write_prph_no_grab(struct iwl_trans *trans, u32 ofs, u32 val);
+void iwl_write_prph64_no_grab(struct iwl_trans *trans, u64 ofs, u64 val);
 void iwl_write_prph(struct iwl_trans *trans, u32 ofs, u32 val);
 int iwl_poll_prph_bit(struct iwl_trans *trans, u32 addr,
 		      u32 bits, u32 mask, int timeout);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
index 6c5c2f9f73a2..4d32b10fe50c 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
@@ -66,7 +66,6 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/gfp.h>
-#include <net/mac80211.h>
 
 extern struct iwl_mod_params iwlwifi_mod_params;
 
@@ -87,9 +86,10 @@ enum iwl_disable_11n {
 };
 
 enum iwl_amsdu_size {
-	IWL_AMSDU_4K = 0,
-	IWL_AMSDU_8K = 1,
-	IWL_AMSDU_12K = 2,
+	IWL_AMSDU_DEF = 0,
+	IWL_AMSDU_4K = 1,
+	IWL_AMSDU_8K = 2,
+	IWL_AMSDU_12K = 3,
 };
 
 enum iwl_uapsd_disable {
@@ -105,7 +105,7 @@ enum iwl_uapsd_disable {
  * @sw_crypto: using hardware encryption, default = 0
  * @disable_11n: disable 11n capabilities, default = 0,
  *	use IWL_[DIS,EN]ABLE_HT_* constants
- * @amsdu_size: enable 8K amsdu size, default = 4K. enum iwl_amsdu_size.
+ * @amsdu_size: See &enum iwl_amsdu_size.
  * @restart_fw: restart firmware, default = 1
  * @bt_coex_active: enable bt coex, default = true
  * @led_mode: system default, default = 0
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 21653fee806c..43f8f7d45ddb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -397,6 +397,13 @@ static void iwl_init_vht_hw_capab(const struct iwl_cfg *cfg,
 		vht_cap->cap |= IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN;
 
 	switch (iwlwifi_mod_params.amsdu_size) {
+	case IWL_AMSDU_DEF:
+		if (cfg->mq_rx_supported)
+			vht_cap->cap |=
+				IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454;
+		else
+			vht_cap->cap |= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895;
+		break;
 	case IWL_AMSDU_4K:
 		vht_cap->cap |= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895;
 		break;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index 6c1d20ded04b..459bf736fd5b 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -417,5 +417,6 @@ enum {
 };
 
 #define UREG_CHICK		(0xA05C00)
+#define UREG_CHICK_MSI_ENABLE	BIT(24)
 #define UREG_CHICK_MSIX_ENABLE	BIT(25)
 #endif				/* __iwl_prph_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 8193d36ae2dd..5535e2238da3 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -211,6 +211,9 @@ struct iwl_cmd_header_wide {
 #define FH_RSCSR_FRAME_SIZE_MSK		0x00003FFF	/* bits 0-13 */
 #define FH_RSCSR_FRAME_INVALID		0x55550000
 #define FH_RSCSR_FRAME_ALIGN		0x40
+#define FH_RSCSR_RPA_EN			BIT(25)
+#define FH_RSCSR_RXQ_POS		16
+#define FH_RSCSR_RXQ_MASK		0x3F0000
 
 struct iwl_rx_packet {
 	/*
@@ -220,7 +223,13 @@ struct iwl_rx_packet {
 	 * 31:    flag flush RB request
 	 * 30:    flag ignore TC (terminal counter) request
 	 * 29:    flag fast IRQ request
-	 * 28-14: Reserved
+	 * 28-26: Reserved
+	 * 25:    Offload enabled
+	 * 24:    RPF enabled
+	 * 23:    RSS enabled
+	 * 22:    Checksum enabled
+	 * 21-16: RX queue
+	 * 15-14: Reserved
 	 * 13-00: RX frame size
 	 */
 	__le32 len_n_flags;
@@ -383,11 +392,6 @@ static inline void iwl_free_rxb(struct iwl_rx_cmd_buffer *r)
 
 #define MAX_NO_RECLAIM_CMDS	6
 
-/*
- * The first entry in driver_data array in ieee80211_tx_info
- * that can be used by the transport.
- */
-#define IWL_TRANS_FIRST_DRIVER_DATA 2
 #define IWL_MASK(lo, hi) ((1 << (hi)) | ((1 << (hi)) - (1 << (lo))))
 
 /*
@@ -491,6 +495,8 @@ struct iwl_hcmd_arr {
  * @command_groups_size: number of command groups, to avoid illegal access
  * @sdio_adma_addr: the default address to set for the ADMA in SDIO mode until
  *	we get the ALIVE from the uCode
+ * @cb_data_offs: offset inside skb->cb to store transport data at, must have
+ *	space for at least two pointers
  */
 struct iwl_trans_config {
 	struct iwl_op_mode *op_mode;
@@ -510,6 +516,8 @@ struct iwl_trans_config {
 	int command_groups_size;
 
 	u32 sdio_adma_addr;
+
+	u8 cb_data_offs;
 };
 
 struct iwl_trans_dump_data {
@@ -574,6 +582,7 @@ struct iwl_trans_txq_scd_cfg {
  *	configured. May sleep.
  * @txq_disable: de-configure a Tx queue to send AMPDUs
  *	Must be atomic
+ * @txq_set_shared_mode: change Tx queue shared/unshared marking
  * @wait_tx_queue_empty: wait until tx queues are empty. May sleep.
  * @freeze_txq_timer: prevents the timer of the queue from firing until the
  *	queue is set to awake. Must be atomic.
@@ -637,6 +646,9 @@ struct iwl_trans_ops {
 	void (*txq_disable)(struct iwl_trans *trans, int queue,
 			    bool configure_scd);
 
+	void (*txq_set_shared_mode)(struct iwl_trans *trans, u32 txq_id,
+				    bool shared);
+
 	int (*wait_tx_queue_empty)(struct iwl_trans *trans, u32 txq_bm);
 	void (*freeze_txq_timer)(struct iwl_trans *trans, unsigned long txqs,
 				 bool freeze);
@@ -749,6 +761,7 @@ enum iwl_plat_pm_mode {
  * @ops - pointer to iwl_trans_ops
  * @op_mode - pointer to the op_mode
  * @cfg - pointer to the configuration
+ * @drv - pointer to iwl_drv
  * @status: a bit-mask of transport status flags
  * @dev - pointer to struct device * that represents the device
  * @max_skb_frags: maximum number of fragments an SKB can have when transmitted.
@@ -792,6 +805,7 @@ struct iwl_trans {
 	const struct iwl_trans_ops *ops;
 	struct iwl_op_mode *op_mode;
 	const struct iwl_cfg *cfg;
+	struct iwl_drv *drv;
 	enum iwl_trans_state state;
 	unsigned long status;
 
@@ -1052,6 +1066,13 @@ iwl_trans_txq_enable_cfg(struct iwl_trans *trans, int queue, u16 ssn,
 	trans->ops->txq_enable(trans, queue, ssn, cfg, queue_wdg_timeout);
 }
 
+static inline void iwl_trans_txq_set_shared_mode(struct iwl_trans *trans,
+						 int queue, bool shared_mode)
+{
+	if (trans->ops->txq_set_shared_mode)
+		trans->ops->txq_set_shared_mode(trans, queue, shared_mode);
+}
+
 static inline void iwl_trans_txq_enable(struct iwl_trans *trans, int queue,
 					int fifo, int sta_id, int tid,
 					int frame_limit, u16 ssn,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
index a63f5bbb1ba7..5bdb6c2c8390 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
@@ -142,7 +142,7 @@ static const __le64 iwl_ci_mask[][3] = {
 		cpu_to_le64(0x0)
 	},
 	{
-		cpu_to_le64(0xFFC0000000ULL),
+		cpu_to_le64(0xFE00000000ULL),
 		cpu_to_le64(0x0ULL),
 		cpu_to_le64(0x0ULL)
 	},
@@ -615,8 +615,8 @@ static void iwl_mvm_bt_notif_iterator(void *_data, u8 *mac,
 	 * don't reduce the Tx power if one of these is true:
 	 *  we are in LOOSE
 	 *  single share antenna product
-	 *  BT is active
-	 *  we are associated
+	 *  BT is inactive
+	 *  we are not associated
 	 */
 	if (iwl_get_coex_type(mvm, vif) == BT_COEX_LOOSE_LUT ||
 	    mvm->cfg->bt_shared_single_ant || !vif->bss_conf.assoc ||
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 406cf1cb945c..b34489817c70 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1020,6 +1020,8 @@ static ssize_t iwl_dbgfs_max_amsdu_len_write(struct iwl_mvm *mvm,
 	int ret;
 
 	ret = kstrtouint(buf, 0, &max_amsdu_len);
+	if (ret)
+		return ret;
 
 	if (max_amsdu_len > IEEE80211_MAX_MPDU_LEN_VHT_11454)
 		return -EINVAL;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-coex.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-coex.h
index 2a33b694ba10..204c1b13988b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-coex.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-coex.h
@@ -70,85 +70,6 @@
 
 #define BITS(nb) (BIT(nb) - 1)
 
-/**
- * enum iwl_bt_coex_flags - flags for BT_COEX command
- * @BT_COEX_MODE_POS:
- * @BT_COEX_MODE_MSK:
- * @BT_COEX_DISABLE_OLD:
- * @BT_COEX_2W_OLD:
- * @BT_COEX_3W_OLD:
- * @BT_COEX_NW_OLD:
- * @BT_COEX_AUTO_OLD:
- * @BT_COEX_BT_OLD: Antenna is for BT (manufacuring tests)
- * @BT_COEX_WIFI_OLD: Antenna is for BT (manufacuring tests)
- * @BT_COEX_SYNC2SCO:
- * @BT_COEX_CORUNNING:
- * @BT_COEX_MPLUT:
- * @BT_COEX_TTC:
- * @BT_COEX_RRC:
- *
- * The COEX_MODE must be set for each command. Even if it is not changed.
- */
-enum iwl_bt_coex_flags {
-	BT_COEX_MODE_POS		= 3,
-	BT_COEX_MODE_MSK		= BITS(3) << BT_COEX_MODE_POS,
-	BT_COEX_DISABLE_OLD		= 0x0 << BT_COEX_MODE_POS,
-	BT_COEX_2W_OLD			= 0x1 << BT_COEX_MODE_POS,
-	BT_COEX_3W_OLD			= 0x2 << BT_COEX_MODE_POS,
-	BT_COEX_NW_OLD			= 0x3 << BT_COEX_MODE_POS,
-	BT_COEX_AUTO_OLD		= 0x5 << BT_COEX_MODE_POS,
-	BT_COEX_BT_OLD			= 0x6 << BT_COEX_MODE_POS,
-	BT_COEX_WIFI_OLD		= 0x7 << BT_COEX_MODE_POS,
-	BT_COEX_SYNC2SCO		= BIT(7),
-	BT_COEX_CORUNNING		= BIT(8),
-	BT_COEX_MPLUT			= BIT(9),
-	BT_COEX_TTC			= BIT(20),
-	BT_COEX_RRC			= BIT(21),
-};
-
-/*
- * indicates what has changed in the BT_COEX command.
- * BT_VALID_ENABLE must be set for each command. Commands without this bit will
- * discarded by the firmware
- */
-enum iwl_bt_coex_valid_bit_msk {
-	BT_VALID_ENABLE			= BIT(0),
-	BT_VALID_BT_PRIO_BOOST		= BIT(1),
-	BT_VALID_MAX_KILL		= BIT(2),
-	BT_VALID_3W_TMRS		= BIT(3),
-	BT_VALID_KILL_ACK		= BIT(4),
-	BT_VALID_KILL_CTS		= BIT(5),
-	BT_VALID_REDUCED_TX_POWER	= BIT(6),
-	BT_VALID_LUT			= BIT(7),
-	BT_VALID_WIFI_RX_SW_PRIO_BOOST	= BIT(8),
-	BT_VALID_WIFI_TX_SW_PRIO_BOOST	= BIT(9),
-	BT_VALID_MULTI_PRIO_LUT		= BIT(10),
-	BT_VALID_TRM_KICK_FILTER	= BIT(11),
-	BT_VALID_CORUN_LUT_20		= BIT(12),
-	BT_VALID_CORUN_LUT_40		= BIT(13),
-	BT_VALID_ANT_ISOLATION		= BIT(14),
-	BT_VALID_ANT_ISOLATION_THRS	= BIT(15),
-	BT_VALID_TXTX_DELTA_FREQ_THRS	= BIT(16),
-	BT_VALID_TXRX_MAX_FREQ_0	= BIT(17),
-	BT_VALID_SYNC_TO_SCO		= BIT(18),
-	BT_VALID_TTC			= BIT(20),
-	BT_VALID_RRC			= BIT(21),
-};
-
-/**
- * enum iwl_bt_reduced_tx_power - allows to reduce txpower for WiFi frames.
- * @BT_REDUCED_TX_POWER_CTL: reduce Tx power for control frames
- * @BT_REDUCED_TX_POWER_DATA: reduce Tx power for data frames
- *
- * This mechanism allows to have BT and WiFi run concurrently. Since WiFi
- * reduces its Tx power, it can work along with BT, hence reducing the amount
- * of WiFi frames being killed by BT.
- */
-enum iwl_bt_reduced_tx_power {
-	BT_REDUCED_TX_POWER_CTL		= BIT(0),
-	BT_REDUCED_TX_POWER_DATA	= BIT(1),
-};
-
 enum iwl_bt_coex_lut_type {
 	BT_COEX_TIGHT_LUT = 0,
 	BT_COEX_LOOSE_LUT,
@@ -158,64 +79,9 @@ enum iwl_bt_coex_lut_type {
 	BT_COEX_INVALID_LUT = 0xff,
 }; /* BT_COEX_DECISION_LUT_INDEX_API_E_VER_1 */
 
-#define BT_COEX_LUT_SIZE (12)
 #define BT_COEX_CORUN_LUT_SIZE (32)
-#define BT_COEX_MULTI_PRIO_LUT_SIZE (2)
-#define BT_COEX_BOOST_SIZE (4)
 #define BT_REDUCED_TX_POWER_BIT BIT(7)
 
-/**
- * struct iwl_bt_coex_cmd_old - bt coex configuration command
- * @flags:&enum iwl_bt_coex_flags
- * @max_kill:
- * @bt_reduced_tx_power: enum %iwl_bt_reduced_tx_power
- * @override_primary_lut: enum %iwl_bt_coex_lut_type: BT_COEX_INVALID_LUT
- *	should be set by default
- * @override_secondary_lut: enum %iwl_bt_coex_lut_type: BT_COEX_INVALID_LUT
- *	should be set by default
- * @bt4_antenna_isolation: antenna isolation
- * @bt4_antenna_isolation_thr: antenna threshold value
- * @bt4_tx_tx_delta_freq_thr: TxTx delta frequency
- * @bt4_tx_rx_max_freq0: TxRx max frequency
- * @bt_prio_boost: BT priority boost registers
- * @wifi_tx_prio_boost: SW boost of wifi tx priority
- * @wifi_rx_prio_boost: SW boost of wifi rx priority
- * @kill_ack_msk: kill ACK mask. 1 - Tx ACK, 0 - kill Tx of ACK.
- * @kill_cts_msk: kill CTS mask. 1 - Tx CTS, 0 - kill Tx of CTS.
- * @decision_lut: PTA decision LUT, per Prio-Ch
- * @bt4_multiprio_lut: multi priority LUT configuration
- * @bt4_corun_lut20: co-running 20 MHz LUT configuration
- * @bt4_corun_lut40: co-running 40 MHz LUT configuration
- * @valid_bit_msk: enum %iwl_bt_coex_valid_bit_msk
- *
- * The structure is used for the BT_COEX command.
- */
-struct iwl_bt_coex_cmd_old {
-	__le32 flags;
-	u8 max_kill;
-	u8 bt_reduced_tx_power;
-	u8 override_primary_lut;
-	u8 override_secondary_lut;
-
-	u8 bt4_antenna_isolation;
-	u8 bt4_antenna_isolation_thr;
-	u8 bt4_tx_tx_delta_freq_thr;
-	u8 bt4_tx_rx_max_freq0;
-
-	__le32 bt_prio_boost[BT_COEX_BOOST_SIZE];
-	__le32 wifi_tx_prio_boost;
-	__le32 wifi_rx_prio_boost;
-	__le32 kill_ack_msk;
-	__le32 kill_cts_msk;
-
-	__le32 decision_lut[BT_COEX_MAX_LUT][BT_COEX_LUT_SIZE];
-	__le32 bt4_multiprio_lut[BT_COEX_MULTI_PRIO_LUT_SIZE];
-	__le32 bt4_corun_lut20[BT_COEX_CORUN_LUT_SIZE];
-	__le32 bt4_corun_lut40[BT_COEX_CORUN_LUT_SIZE];
-
-	__le32 valid_bit_msk;
-} __packed; /* BT_COEX_CMD_API_S_VER_5 */
-
 enum iwl_bt_coex_mode {
 	BT_COEX_DISABLE			= 0x0,
 	BT_COEX_NW			= 0x1,
@@ -385,92 +251,4 @@ struct iwl_bt_coex_profile_notif {
 	u8 reserved[3];
 } __packed; /* BT_COEX_PROFILE_NTFY_API_S_VER_4 */
 
-enum iwl_bt_coex_prio_table_event {
-	BT_COEX_PRIO_TBL_EVT_INIT_CALIB1		= 0,
-	BT_COEX_PRIO_TBL_EVT_INIT_CALIB2		= 1,
-	BT_COEX_PRIO_TBL_EVT_PERIODIC_CALIB_LOW1	= 2,
-	BT_COEX_PRIO_TBL_EVT_PERIODIC_CALIB_LOW2	= 3,
-	BT_COEX_PRIO_TBL_EVT_PERIODIC_CALIB_HIGH1	= 4,
-	BT_COEX_PRIO_TBL_EVT_PERIODIC_CALIB_HIGH2	= 5,
-	BT_COEX_PRIO_TBL_EVT_DTIM			= 6,
-	BT_COEX_PRIO_TBL_EVT_SCAN52			= 7,
-	BT_COEX_PRIO_TBL_EVT_SCAN24			= 8,
-	BT_COEX_PRIO_TBL_EVT_IDLE			= 9,
-	BT_COEX_PRIO_TBL_EVT_MAX			= 16,
-}; /* BT_COEX_PRIO_TABLE_EVENTS_API_E_VER_1 */
-
-enum iwl_bt_coex_prio_table_prio {
-	BT_COEX_PRIO_TBL_DISABLED	= 0,
-	BT_COEX_PRIO_TBL_PRIO_LOW	= 1,
-	BT_COEX_PRIO_TBL_PRIO_HIGH	= 2,
-	BT_COEX_PRIO_TBL_PRIO_BYPASS	= 3,
-	BT_COEX_PRIO_TBL_PRIO_COEX_OFF	= 4,
-	BT_COEX_PRIO_TBL_PRIO_COEX_ON	= 5,
-	BT_COEX_PRIO_TBL_PRIO_COEX_IDLE = 6,
-	BT_COEX_PRIO_TBL_MAX		= 8,
-}; /* BT_COEX_PRIO_TABLE_PRIORITIES_API_E_VER_1 */
-
-#define BT_COEX_PRIO_TBL_SHRD_ANT_POS     (0)
-#define BT_COEX_PRIO_TBL_PRIO_POS         (1)
-#define BT_COEX_PRIO_TBL_RESERVED_POS     (4)
-
-/**
- * struct iwl_bt_coex_prio_tbl_cmd - priority table for BT coex
- * @prio_tbl:
- */
-struct iwl_bt_coex_prio_tbl_cmd {
-	u8 prio_tbl[BT_COEX_PRIO_TBL_EVT_MAX];
-} __packed;
-
-/**
- * struct iwl_bt_coex_ci_cmd_old - bt coex channel inhibition command
- * @bt_primary_ci:
- * @bt_secondary_ci:
- * @co_run_bw_primary:
- * @co_run_bw_secondary:
- * @primary_ch_phy_id:
- * @secondary_ch_phy_id:
- *
- * Used for BT_COEX_CI command
- */
-struct iwl_bt_coex_ci_cmd_old {
-	__le64 bt_primary_ci;
-	__le64 bt_secondary_ci;
-
-	u8 co_run_bw_primary;
-	u8 co_run_bw_secondary;
-	u8 primary_ch_phy_id;
-	u8 secondary_ch_phy_id;
-} __packed; /* BT_CI_MSG_API_S_VER_1 */
-
-/**
- * struct iwl_bt_coex_profile_notif_old - notification about BT coex
- * @mbox_msg: message from BT to WiFi
- * @msg_idx: the index of the message
- * @bt_status: 0 - off, 1 - on
- * @bt_open_conn: number of BT connections open
- * @bt_traffic_load: load of BT traffic
- * @bt_agg_traffic_load: aggregated load of BT traffic
- * @bt_ci_compliance: 0 - no CI compliance, 1 - CI compliant
- * @primary_ch_lut: LUT used for primary channel
- * @secondary_ch_lut: LUT used for secondary channel
- * @bt_activity_grading: the activity of BT enum %iwl_bt_activity_grading
- */
-struct iwl_bt_coex_profile_notif_old {
-	__le32 mbox_msg[4];
-	__le32 msg_idx;
-	u8 bt_status;
-	u8 bt_open_conn;
-	u8 bt_traffic_load;
-	u8 bt_agg_traffic_load;
-	u8 bt_ci_compliance;
-	u8 ttc_enabled;
-	u8 rrc_enabled;
-	u8 reserved;
-
-	__le32 primary_ch_lut;
-	__le32 secondary_ch_lut;
-	__le32 bt_activity_grading;
-} __packed; /* BT_COEX_PROFILE_NTFY_API_S_VER_3 */
-
 #endif /* __fw_api_bt_coex_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-mac.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-mac.h
index 95ac59d088b1..0246506ab595 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-mac.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-mac.h
@@ -72,6 +72,9 @@
 #define NUM_MAC_INDEX_DRIVER	MAC_INDEX_AUX
 #define NUM_MAC_INDEX		(MAC_INDEX_AUX + 1)
 
+#define IWL_MVM_STATION_COUNT	16
+#define IWL_MVM_TDLS_STA_COUNT	4
+
 enum iwl_ac {
 	AC_BK,
 	AC_BE,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h
index 65a7c8a4cacf..404b0de9e2dc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h
@@ -7,7 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -34,7 +34,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -310,7 +310,8 @@ enum iwl_dev_tx_power_cmd_mode {
 	IWL_TX_POWER_MODE_SET_MAC = 0,
 	IWL_TX_POWER_MODE_SET_DEVICE = 1,
 	IWL_TX_POWER_MODE_SET_CHAINS = 2,
-}; /* TX_POWER_REDUCED_FLAGS_TYPE_API_E_VER_2 */;
+	IWL_TX_POWER_MODE_SET_ACK = 3,
+}; /* TX_POWER_REDUCED_FLAGS_TYPE_API_E_VER_4 */;
 
 /**
  * struct iwl_dev_tx_power_cmd_v2 - TX power reduction command
@@ -338,7 +339,7 @@ struct iwl_dev_tx_power_cmd_v2 {
  * @v2: version 2 of the command, embedded here for easier software handling
  * @per_chain_restriction: per chain restrictions
  */
-struct iwl_dev_tx_power_cmd {
+struct iwl_dev_tx_power_cmd_v3 {
 	/* v3 is just an extension of v2 - keep this here */
 	struct iwl_dev_tx_power_cmd_v2 v2;
 	__le16 per_chain_restriction[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS];
@@ -346,6 +347,19 @@ struct iwl_dev_tx_power_cmd {
 
 #define IWL_DEV_MAX_TX_POWER 0x7FFF
 
+/**
+ * struct iwl_dev_tx_power_cmd - TX power reduction command
+ * @v3: version 3 of the command, embedded here for easier software handling
+ * @enable_ack_reduction: enable or disable close range ack TX power
+ *	reduction.
+ */
+struct iwl_dev_tx_power_cmd {
+	/* v4 is just an extension of v3 - keep this here */
+	struct iwl_dev_tx_power_cmd_v3 v3;
+	u8 enable_ack_reduction;
+	u8 reserved[3];
+} __packed; /* TX_REDUCED_POWER_API_S_VER_4 */
+
 /**
  * struct iwl_beacon_filter_cmd
  * REPLY_BEACON_FILTERING_CMD = 0xd2 (command)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h
index 1ca8e4988b88..acc5cd53e4ba 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h
@@ -296,7 +296,7 @@ enum iwl_rx_mpdu_status {
 	IWL_RX_MPDU_STATUS_OVERRUN_OK		= BIT(1),
 	IWL_RX_MPDU_STATUS_SRC_STA_FOUND	= BIT(2),
 	IWL_RX_MPDU_STATUS_KEY_VALID		= BIT(3),
-	IWL_RX_MPDU_STATUS_KEY_ERROR		= BIT(4),
+	IWL_RX_MPDU_STATUS_KEY_PARAM_OK		= BIT(4),
 	IWL_RX_MPDU_STATUS_ICV_OK		= BIT(5),
 	IWL_RX_MPDU_STATUS_MIC_OK		= BIT(6),
 	IWL_RX_MPDU_RES_STATUS_TTAK_OK		= BIT(7),
@@ -311,7 +311,7 @@ enum iwl_rx_mpdu_status {
 	IWL_RX_MPDU_STATUS_WEP_MATCH		= BIT(12),
 	IWL_RX_MPDU_STATUS_EXT_IV_MATCH		= BIT(13),
 	IWL_RX_MPDU_STATUS_KEY_ID_MATCH		= BIT(14),
-	IWL_RX_MPDU_STATUS_KEY_COLOR		= BIT(15),
+	IWL_RX_MPDU_STATUS_ROBUST_MNG_FRAME	= BIT(15),
 };
 
 enum iwl_rx_mpdu_hash_filter {
@@ -336,6 +336,18 @@ enum iwl_rx_mpdu_reorder_data {
 	IWL_RX_MPDU_REORDER_BA_OLD_SN		= 0x80000000,
 };
 
+enum iwl_rx_mpdu_phy_info {
+	IWL_RX_MPDU_PHY_AMPDU		= BIT(5),
+	IWL_RX_MPDU_PHY_AMPDU_TOGGLE	= BIT(6),
+	IWL_RX_MPDU_PHY_SHORT_PREAMBLE	= BIT(7),
+	IWL_RX_MPDU_PHY_TSF_OVERLOAD	= BIT(8),
+};
+
+enum iwl_rx_mpdu_mac_info {
+	IWL_RX_MPDU_PHY_MAC_INDEX_MASK		= 0x0f,
+	IWL_RX_MPDU_PHY_PHY_INDEX_MASK		= 0xf0,
+};
+
 struct iwl_rx_mpdu_desc {
 	/* DW2 */
 	__le16 mpdu_len;
@@ -343,9 +355,9 @@ struct iwl_rx_mpdu_desc {
 	u8 mac_flags2;
 	/* DW3 */
 	u8 amsdu_info;
-	__le16 reserved_for_software;
+	__le16 phy_info;
 	u8 mac_phy_idx;
-	/* DW4 */
+	/* DW4 - carries csum data only when rpa_en == 1 */
 	__le16 raw_csum; /* alledgedly unreliable */
 	__le16 l3l4_flags;
 	/* DW5 */
@@ -354,17 +366,17 @@ struct iwl_rx_mpdu_desc {
 	u8 sta_id_flags;
 	/* DW6 */
 	__le32 reorder_data;
-	/* DW7 */
+	/* DW7 - carries rss_hash only when rpa_en == 1 */
 	__le32 rss_hash;
-	/* DW8 */
+	/* DW8 - carries filter_match only when rpa_en == 1 */
 	__le32 filter_match;
 	/* DW9 */
 	__le32 rate_n_flags;
 	/* DW10 */
-	u8 energy_a, energy_b, channel, reserved;
+	u8 energy_a, energy_b, channel, mac_context;
 	/* DW11 */
 	__le32 gp2_on_air_rise;
-	/* DW12 & DW13 */
+	/* DW12 & DW13 - carries TSF only TSF_OVERLOAD bit == 0 */
 	__le64 tsf_on_air_rise;
 } __packed;
 
@@ -435,26 +447,26 @@ struct iwl_rxq_sync_notification {
 } __packed; /* MULTI_QUEUE_DRV_SYNC_HDR_CMD_API_S_VER_1 */
 
 /**
-* Internal message identifier
-*
-* @IWL_MVM_RXQ_EMPTY: empty sync notification
-* @IWL_MVM_RXQ_NOTIF_DEL_BA: notify RSS queues of delBA
-*/
+ * Internal message identifier
+ *
+ * @IWL_MVM_RXQ_EMPTY: empty sync notification
+ * @IWL_MVM_RXQ_NOTIF_DEL_BA: notify RSS queues of delBA
+ */
 enum iwl_mvm_rxq_notif_type {
 	IWL_MVM_RXQ_EMPTY,
 	IWL_MVM_RXQ_NOTIF_DEL_BA,
 };
 
 /**
-* struct iwl_mvm_internal_rxq_notif - Internal representation of the data sent
-* in &iwl_rxq_sync_cmd. Should be DWORD aligned.
-* FW is agnostic to the payload, so there are no endianity requirements.
-*
-* @type: value from &iwl_mvm_rxq_notif_type
-* @sync: ctrl path is waiting for all notifications to be received
-* @cookie: internal cookie to identify old notifications
-* @data: payload
-*/
+ * struct iwl_mvm_internal_rxq_notif - Internal representation of the data sent
+ * in &iwl_rxq_sync_cmd. Should be DWORD aligned.
+ * FW is agnostic to the payload, so there are no endianity requirements.
+ *
+ * @type: value from &iwl_mvm_rxq_notif_type
+ * @sync: ctrl path is waiting for all notifications to be received
+ * @cookie: internal cookie to identify old notifications
+ * @data: payload
+ */
 struct iwl_mvm_internal_rxq_notif {
 	u16 type;
 	u16 sync;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h
index 38b1d045be8e..d1c4fb849111 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h
@@ -141,6 +141,7 @@ enum iwl_sta_flags {
  * @STA_KEY_FLG_CCM: CCMP encryption algorithm
  * @STA_KEY_FLG_TKIP: TKIP encryption algorithm
  * @STA_KEY_FLG_EXT: extended cipher algorithm (depends on the FW support)
+ * @STA_KEY_FLG_GCMP: GCMP encryption algorithm
  * @STA_KEY_FLG_CMAC: CMAC encryption algorithm
  * @STA_KEY_FLG_ENC_UNKNOWN: unknown encryption algorithm
  * @STA_KEY_FLG_EN_MSK: mask for encryption algorithmi value
@@ -149,6 +150,7 @@ enum iwl_sta_flags {
  * @STA_KEY_FLG_KEYID_MSK: the index of the key
  * @STA_KEY_NOT_VALID: key is invalid
  * @STA_KEY_FLG_WEP_13BYTES: set for 13 bytes WEP key
+ * @STA_KEY_FLG_KEY_32BYTES for non-wep key set for 32 bytes key
  * @STA_KEY_MULTICAST: set for multical key
  * @STA_KEY_MFP: key is used for Management Frame Protection
  */
@@ -158,6 +160,7 @@ enum iwl_sta_key_flag {
 	STA_KEY_FLG_CCM			= (2 << 0),
 	STA_KEY_FLG_TKIP		= (3 << 0),
 	STA_KEY_FLG_EXT			= (4 << 0),
+	STA_KEY_FLG_GCMP		= (5 << 0),
 	STA_KEY_FLG_CMAC		= (6 << 0),
 	STA_KEY_FLG_ENC_UNKNOWN		= (7 << 0),
 	STA_KEY_FLG_EN_MSK		= (7 << 0),
@@ -167,6 +170,7 @@ enum iwl_sta_key_flag {
 	STA_KEY_FLG_KEYID_MSK		= (3 << STA_KEY_FLG_KEYID_POS),
 	STA_KEY_NOT_VALID		= BIT(11),
 	STA_KEY_FLG_WEP_13BYTES		= BIT(12),
+	STA_KEY_FLG_KEY_32BYTES		= BIT(12),
 	STA_KEY_MULTICAST		= BIT(14),
 	STA_KEY_MFP			= BIT(15),
 };
@@ -388,7 +392,6 @@ struct iwl_mvm_add_sta_cmd {
  * @key_offset: key offset in key storage
  * @key_flags: type %iwl_sta_key_flag
  * @key: key material data
- * @key2: key material data
  * @rx_secur_seq_cnt: RX security sequence counter for the key
  * @tkip_rx_tsc_byte2: TSC[2] for key mix ph1 detection
  * @tkip_rx_ttak: 10-byte unicast TKIP TTAK for Rx
@@ -397,8 +400,7 @@ struct iwl_mvm_add_sta_key_cmd {
 	u8 sta_id;
 	u8 key_offset;
 	__le16 key_flags;
-	u8 key[16];
-	u8 key2[16];
+	u8 key[32];
 	u8 rx_secur_seq_cnt[16];
 	u8 tkip_rx_tsc_byte2;
 	u8 reserved;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-stats.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-stats.h
index 438665a54923..4e638a44babb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-stats.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-stats.h
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -252,6 +253,20 @@ struct mvm_statistics_general_v8 {
 	u8 reserved[4 - (NUM_MAC_INDEX % 4)];
 } __packed; /* STATISTICS_GENERAL_API_S_VER_8 */
 
+/**
+ * struct mvm_statistics_load - RX statistics for multi-queue devices
+ * @air_time: accumulated air time, per mac
+ * @byte_count: accumulated byte count, per mac
+ * @pkt_count: accumulated packet count, per mac
+ * @avg_energy: average RSSI, per station
+ */
+struct mvm_statistics_load {
+	__le32 air_time[NUM_MAC_INDEX];
+	__le32 byte_count[NUM_MAC_INDEX];
+	__le32 pkt_count[NUM_MAC_INDEX];
+	u8 avg_energy[IWL_MVM_STATION_COUNT];
+} __packed; /* STATISTICS_RX_MAC_STATION_S_VER_1 */
+
 struct mvm_statistics_rx {
 	struct mvm_statistics_rx_phy ofdm;
 	struct mvm_statistics_rx_phy cck;
@@ -266,7 +281,6 @@ struct mvm_statistics_rx {
  * while associated.  To disable this behavior, set DISABLE_NOTIF flag in the
  * STATISTICS_CMD (0x9c), below.
  */
-
 struct iwl_notif_statistics_v10 {
 	__le32 flag;
 	struct mvm_statistics_rx rx;
@@ -274,6 +288,14 @@ struct iwl_notif_statistics_v10 {
 	struct mvm_statistics_general_v8 general;
 } __packed; /* STATISTICS_NTFY_API_S_VER_10 */
 
+struct iwl_notif_statistics_v11 {
+	__le32 flag;
+	struct mvm_statistics_rx rx;
+	struct mvm_statistics_tx tx;
+	struct mvm_statistics_general_v8 general;
+	struct mvm_statistics_load load_stats;
+} __packed; /* STATISTICS_NTFY_API_S_VER_11 */
+
 #define IWL_STATISTICS_FLG_CLEAR		0x1
 #define IWL_STATISTICS_FLG_DISABLE_NOTIF	0x2
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
index dadcccd88255..4144623e1616 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
@@ -137,17 +137,32 @@ enum iwl_tx_pm_timeouts {
 	PM_FRAME_ASSOC		= 3,
 };
 
-/*
- * TX command security control
- */
-#define TX_CMD_SEC_WEP			0x01
-#define TX_CMD_SEC_CCM			0x02
-#define TX_CMD_SEC_TKIP			0x03
-#define TX_CMD_SEC_EXT			0x04
 #define TX_CMD_SEC_MSK			0x07
 #define TX_CMD_SEC_WEP_KEY_IDX_POS	6
 #define TX_CMD_SEC_WEP_KEY_IDX_MSK	0xc0
-#define TX_CMD_SEC_KEY128		0x08
+
+/**
+ * enum iwl_tx_cmd_sec_ctrl - bitmasks for security control in TX command
+ * @TX_CMD_SEC_WEP: WEP encryption algorithm.
+ * @TX_CMD_SEC_CCM: CCM encryption algorithm.
+ * @TX_CMD_SEC_TKIP: TKIP encryption algorithm.
+ * @TX_CMD_SEC_EXT: extended cipher algorithm.
+ * @TX_CMD_SEC_GCMP: GCMP encryption algorithm.
+ * @TX_CMD_SEC_KEY128: set for 104 bits WEP key.
+ * @TC_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken
+ *	from the table instead of from the TX command.
+ *	If the key is taken from the key table its index should be given by the
+ *	first byte of the TX command key field.
+ */
+enum iwl_tx_cmd_sec_ctrl {
+	TX_CMD_SEC_WEP			= 0x01,
+	TX_CMD_SEC_CCM			= 0x02,
+	TX_CMD_SEC_TKIP			= 0x03,
+	TX_CMD_SEC_EXT			= 0x04,
+	TX_CMD_SEC_GCMP			= 0x05,
+	TX_CMD_SEC_KEY128		= 0x08,
+	TC_CMD_SEC_KEY_FROM_TABLE	= 0x08,
+};
 
 /* TODO: how does these values are OK with only 16 bit variable??? */
 /*
@@ -562,8 +577,8 @@ struct iwl_mvm_ba_notif {
 	u8 reserved1;
 } __packed;
 
-/*
- * struct iwl_mac_beacon_cmd - beacon template command
+/**
+ * struct iwl_mac_beacon_cmd_v6 - beacon template command
  * @tx: the tx commands associated with the beacon frame
  * @template_id: currently equal to the mac context id of the coresponding
  *  mac.
@@ -571,13 +586,34 @@ struct iwl_mvm_ba_notif {
  * @tim_size: the length of the tim IE
  * @frame: the template of the beacon frame
  */
+struct iwl_mac_beacon_cmd_v6 {
+	struct iwl_tx_cmd tx;
+	__le32 template_id;
+	__le32 tim_idx;
+	__le32 tim_size;
+	struct ieee80211_hdr frame[0];
+} __packed; /* BEACON_TEMPLATE_CMD_API_S_VER_6 */
+
+/**
+ * struct iwl_mac_beacon_cmd - beacon template command with offloaded CSA
+ * @tx: the tx commands associated with the beacon frame
+ * @template_id: currently equal to the mac context id of the coresponding
+ *  mac.
+ * @tim_idx: the offset of the tim IE in the beacon
+ * @tim_size: the length of the tim IE
+ * @ecsa_offset: offset to the ECSA IE if present
+ * @csa_offset: offset to the CSA IE if present
+ * @frame: the template of the beacon frame
+ */
 struct iwl_mac_beacon_cmd {
 	struct iwl_tx_cmd tx;
 	__le32 template_id;
 	__le32 tim_idx;
 	__le32 tim_size;
+	__le32 ecsa_offset;
+	__le32 csa_offset;
 	struct ieee80211_hdr frame[0];
-} __packed;
+} __packed; /* BEACON_TEMPLATE_CMD_API_S_VER_7 */
 
 struct iwl_beacon_notif {
 	struct iwl_mvm_tx_resp beacon_notify_hdr;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
index 41b80ae2d5f8..71076f02796e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
@@ -90,6 +90,7 @@ enum {
  * DQA queue numbers
  *
  * @IWL_MVM_DQA_CMD_QUEUE: a queue reserved for sending HCMDs to the FW
+ * @IWL_MVM_DQA_AUX_QUEUE: a queue reserved for aux frames
  * @IWL_MVM_DQA_P2P_DEVICE_QUEUE: a queue reserved for P2P device frames
  * @IWL_MVM_DQA_GCAST_QUEUE: a queue reserved for P2P GO/SoftAP GCAST frames
  * @IWL_MVM_DQA_BSS_CLIENT_QUEUE: a queue reserved for BSS activity, to ensure
@@ -108,6 +109,7 @@ enum {
  */
 enum iwl_mvm_dqa_txq {
 	IWL_MVM_DQA_CMD_QUEUE = 0,
+	IWL_MVM_DQA_AUX_QUEUE = 1,
 	IWL_MVM_DQA_P2P_DEVICE_QUEUE = 2,
 	IWL_MVM_DQA_GCAST_QUEUE = 3,
 	IWL_MVM_DQA_BSS_CLIENT_QUEUE = 4,
@@ -127,9 +129,6 @@ enum iwl_mvm_tx_fifo {
 	IWL_MVM_TX_FIFO_CMD = 7,
 };
 
-#define IWL_MVM_STATION_COUNT	16
-
-#define IWL_MVM_TDLS_STA_COUNT	4
 
 /* commands */
 enum {
@@ -314,6 +313,7 @@ enum {
 enum iwl_mac_conf_subcmd_ids {
 	LINK_QUALITY_MEASUREMENT_CMD = 0x1,
 	LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF = 0xFE,
+	CHANNEL_SWITCH_NOA_NOTIF = 0xFF,
 };
 
 enum iwl_phy_ops_subcmd_ids {
@@ -329,6 +329,7 @@ enum iwl_system_subcmd_ids {
 };
 
 enum iwl_data_path_subcmd_ids {
+	DQA_ENABLE_CMD = 0x0,
 	UPDATE_MU_GROUPS_CMD = 0x1,
 	TRIGGER_RX_QUEUES_NOTIF_CMD = 0x2,
 	MU_GROUP_MGMT_NOTIF = 0xFE,
@@ -358,6 +359,14 @@ struct iwl_cmd_response {
 	__le32 status;
 };
 
+/*
+ * struct iwl_dqa_enable_cmd
+ * @cmd_queue: the TXQ number of the command queue
+ */
+struct iwl_dqa_enable_cmd {
+	__le32 cmd_queue;
+} __packed; /* DQA_CONTROL_CMD_API_S_VER_1 */
+
 /*
  * struct iwl_tx_ant_cfg_cmd
  * @valid: valid antenna configuration
@@ -732,7 +741,7 @@ enum iwl_time_event_type {
 
 	/* P2P GO Events */
 	TE_P2P_GO_ASSOC_PROT,
-	TE_P2P_GO_REPETITIVE_NOA,
+	TE_P2P_GO_REPETITIVET_NOA,
 	TE_P2P_GO_CT_WINDOW,
 
 	/* WiDi Sync Events */
@@ -2111,4 +2120,13 @@ struct iwl_link_qual_msrmnt_notif {
 	__le32 reserved[3];
 } __packed; /* LQM_MEASUREMENT_COMPLETE_NTF_API_S_VER1 */
 
+/**
+ * Channel switch NOA notification
+ *
+ * @id_and_color: ID and color of the MAC
+ */
+struct iwl_channel_switch_noa_notif {
+	__le32 id_and_color;
+} __packed; /* CHANNEL_SWITCH_START_NTFY_API_S_VER_1 */
+
 #endif /* __fw_api_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
index e1b6b2c665eb..1abcabb9b6cd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
@@ -288,7 +288,8 @@ static void iwl_mvm_dump_fifos(struct iwl_mvm *mvm,
 			fifo_hdr->fifo_num = cpu_to_le32(i);
 
 			/* Mark the number of TXF we're pulling now */
-			iwl_trans_write_prph(mvm->trans, TXF_CPU2_NUM, i);
+			iwl_trans_write_prph(mvm->trans, TXF_CPU2_NUM, i +
+				ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size));
 
 			fifo_hdr->available_bytes =
 				cpu_to_le32(iwl_trans_read_prph(mvm->trans,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 7057f35cb2e7..7e0cdbf8bf74 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -65,6 +65,7 @@
  *****************************************************************************/
 #include <net/mac80211.h>
 #include <linux/netdevice.h>
+#include <linux/acpi.h>
 
 #include "iwl-trans.h"
 #include "iwl-op-mode.h"
@@ -122,6 +123,9 @@ static int iwl_send_rss_cfg_cmd(struct iwl_mvm *mvm)
 			     IWL_RSS_HASH_TYPE_IPV6_PAYLOAD,
 	};
 
+	if (mvm->trans->num_rx_queues == 1)
+		return 0;
+
 	/* Do not direct RSS traffic to Q 0 which is our fallback queue */
 	for (i = 0; i < ARRAY_SIZE(cmd.indirection_table); i++)
 		cmd.indirection_table[i] =
@@ -131,6 +135,23 @@ static int iwl_send_rss_cfg_cmd(struct iwl_mvm *mvm)
 	return iwl_mvm_send_cmd_pdu(mvm, RSS_CONFIG_CMD, 0, sizeof(cmd), &cmd);
 }
 
+static int iwl_mvm_send_dqa_cmd(struct iwl_mvm *mvm)
+{
+	struct iwl_dqa_enable_cmd dqa_cmd = {
+		.cmd_queue = cpu_to_le32(IWL_MVM_DQA_CMD_QUEUE),
+	};
+	u32 cmd_id = iwl_cmd_id(DQA_ENABLE_CMD, DATA_PATH_GROUP, 0);
+	int ret;
+
+	ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(dqa_cmd), &dqa_cmd);
+	if (ret)
+		IWL_ERR(mvm, "Failed to send DQA enabling command: %d\n", ret);
+	else
+		IWL_DEBUG_FW(mvm, "Working in DQA mode\n");
+
+	return ret;
+}
+
 void iwl_free_fw_paging(struct iwl_mvm *mvm)
 {
 	int i;
@@ -139,17 +160,21 @@ void iwl_free_fw_paging(struct iwl_mvm *mvm)
 		return;
 
 	for (i = 0; i < NUM_OF_FW_PAGING_BLOCKS; i++) {
-		if (!mvm->fw_paging_db[i].fw_paging_block) {
+		struct iwl_fw_paging *paging = &mvm->fw_paging_db[i];
+
+		if (!paging->fw_paging_block) {
 			IWL_DEBUG_FW(mvm,
 				     "Paging: block %d already freed, continue to next page\n",
 				     i);
 
 			continue;
 		}
+		dma_unmap_page(mvm->trans->dev, paging->fw_paging_phys,
+			       paging->fw_paging_size, DMA_BIDIRECTIONAL);
 
-		__free_pages(mvm->fw_paging_db[i].fw_paging_block,
-			     get_order(mvm->fw_paging_db[i].fw_paging_size));
-		mvm->fw_paging_db[i].fw_paging_block = NULL;
+		__free_pages(paging->fw_paging_block,
+			     get_order(paging->fw_paging_size));
+		paging->fw_paging_block = NULL;
 	}
 	kfree(mvm->trans->paging_download_buf);
 	mvm->trans->paging_download_buf = NULL;
@@ -882,6 +907,177 @@ static int iwl_mvm_config_ltr(struct iwl_mvm *mvm)
 				    sizeof(cmd), &cmd);
 }
 
+#define ACPI_WRDS_METHOD	"WRDS"
+#define ACPI_WRDS_WIFI		(0x07)
+#define ACPI_WRDS_TABLE_SIZE	10
+
+struct iwl_mvm_sar_table {
+	bool enabled;
+	u8 values[ACPI_WRDS_TABLE_SIZE];
+};
+
+#ifdef CONFIG_ACPI
+static int iwl_mvm_sar_get_wrds(struct iwl_mvm *mvm, union acpi_object *wrds,
+				struct iwl_mvm_sar_table *sar_table)
+{
+	union acpi_object *data_pkg;
+	u32 i;
+
+	/* We need at least two packages, one for the revision and one
+	 * for the data itself.  Also check that the revision is valid
+	 * (i.e. it is an integer set to 0).
+	*/
+	if (wrds->type != ACPI_TYPE_PACKAGE ||
+	    wrds->package.count < 2 ||
+	    wrds->package.elements[0].type != ACPI_TYPE_INTEGER ||
+	    wrds->package.elements[0].integer.value != 0) {
+		IWL_DEBUG_RADIO(mvm, "Unsupported wrds structure\n");
+		return -EINVAL;
+	}
+
+	/* loop through all the packages to find the one for WiFi */
+	for (i = 1; i < wrds->package.count; i++) {
+		union acpi_object *domain;
+
+		data_pkg = &wrds->package.elements[i];
+
+		/* Skip anything that is not a package with the right
+		 * amount of elements (i.e. domain_type,
+		 * enabled/disabled plus the sar table size.
+		 */
+		if (data_pkg->type != ACPI_TYPE_PACKAGE ||
+		    data_pkg->package.count != ACPI_WRDS_TABLE_SIZE + 2)
+			continue;
+
+		domain = &data_pkg->package.elements[0];
+		if (domain->type == ACPI_TYPE_INTEGER &&
+		    domain->integer.value == ACPI_WRDS_WIFI)
+			break;
+
+		data_pkg = NULL;
+	}
+
+	if (!data_pkg)
+		return -ENOENT;
+
+	if (data_pkg->package.elements[1].type != ACPI_TYPE_INTEGER)
+		return -EINVAL;
+
+	sar_table->enabled = !!(data_pkg->package.elements[1].integer.value);
+
+	for (i = 0; i < ACPI_WRDS_TABLE_SIZE; i++) {
+		union acpi_object *entry;
+
+		entry = &data_pkg->package.elements[i + 2];
+		if ((entry->type != ACPI_TYPE_INTEGER) ||
+		    (entry->integer.value > U8_MAX))
+			return -EINVAL;
+
+		sar_table->values[i] = entry->integer.value;
+	}
+
+	return 0;
+}
+
+static int iwl_mvm_sar_get_table(struct iwl_mvm *mvm,
+				 struct iwl_mvm_sar_table *sar_table)
+{
+	acpi_handle root_handle;
+	acpi_handle handle;
+	struct acpi_buffer wrds = {ACPI_ALLOCATE_BUFFER, NULL};
+	acpi_status status;
+	int ret;
+
+	root_handle = ACPI_HANDLE(mvm->dev);
+	if (!root_handle) {
+		IWL_DEBUG_RADIO(mvm,
+				"Could not retrieve root port ACPI handle\n");
+		return -ENOENT;
+	}
+
+	/* Get the method's handle */
+	status = acpi_get_handle(root_handle, (acpi_string)ACPI_WRDS_METHOD,
+				 &handle);
+	if (ACPI_FAILURE(status)) {
+		IWL_DEBUG_RADIO(mvm, "WRDS method not found\n");
+		return -ENOENT;
+	}
+
+	/* Call WRDS with no arguments */
+	status = acpi_evaluate_object(handle, NULL, NULL, &wrds);
+	if (ACPI_FAILURE(status)) {
+		IWL_DEBUG_RADIO(mvm, "WRDS invocation failed (0x%x)\n", status);
+		return -ENOENT;
+	}
+
+	ret = iwl_mvm_sar_get_wrds(mvm, wrds.pointer, sar_table);
+	kfree(wrds.pointer);
+
+	return ret;
+}
+#else /* CONFIG_ACPI */
+static int iwl_mvm_sar_get_table(struct iwl_mvm *mvm,
+				 struct iwl_mvm_sar_table *sar_table)
+{
+	return -ENOENT;
+}
+#endif /* CONFIG_ACPI */
+
+static int iwl_mvm_sar_init(struct iwl_mvm *mvm)
+{
+	struct iwl_mvm_sar_table sar_table;
+	struct iwl_dev_tx_power_cmd cmd = {
+		.v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS),
+	};
+	int ret, i, j, idx;
+	int len = sizeof(cmd);
+
+	/* we can't do anything with the table if the FW doesn't support it */
+	if (!fw_has_api(&mvm->fw->ucode_capa,
+			IWL_UCODE_TLV_API_TX_POWER_CHAIN)) {
+		IWL_DEBUG_RADIO(mvm,
+				"FW doesn't support per-chain TX power settings.\n");
+		return 0;
+	}
+
+	if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK))
+		len = sizeof(cmd.v3);
+
+	ret = iwl_mvm_sar_get_table(mvm, &sar_table);
+	if (ret < 0) {
+		IWL_DEBUG_RADIO(mvm,
+				"SAR BIOS table invalid or unavailable. (%d)\n",
+				ret);
+		/* we don't fail if the table is not available */
+		return 0;
+	}
+
+	if (!sar_table.enabled)
+		return 0;
+
+	IWL_DEBUG_RADIO(mvm, "Sending REDUCE_TX_POWER_CMD per chain\n");
+
+	BUILD_BUG_ON(IWL_NUM_CHAIN_LIMITS * IWL_NUM_SUB_BANDS !=
+		     ACPI_WRDS_TABLE_SIZE);
+
+	for (i = 0; i < IWL_NUM_CHAIN_LIMITS; i++) {
+		IWL_DEBUG_RADIO(mvm, "  Chain[%d]:\n", i);
+		for (j = 0; j < IWL_NUM_SUB_BANDS; j++) {
+			idx = (i * IWL_NUM_SUB_BANDS) + j;
+			cmd.v3.per_chain_restriction[i][j] =
+				cpu_to_le16(sar_table.values[idx]);
+			IWL_DEBUG_RADIO(mvm, "    Band[%d] = %d * .125dBm\n",
+					j, sar_table.values[idx]);
+		}
+	}
+
+	ret = iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0, len, &cmd);
+	if (ret)
+		IWL_ERR(mvm, "failed to set per-chain TX power: %d\n", ret);
+
+	return ret;
+}
+
 int iwl_mvm_up(struct iwl_mvm *mvm)
 {
 	int ret, i;
@@ -976,6 +1172,15 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	/* reset quota debouncing buffer - 0xff will yield invalid data */
 	memset(&mvm->last_quota_cmd, 0xff, sizeof(mvm->last_quota_cmd));
 
+	/* Enable DQA-mode if required */
+	if (iwl_mvm_is_dqa_supported(mvm)) {
+		ret = iwl_mvm_send_dqa_cmd(mvm);
+		if (ret)
+			goto error;
+	} else {
+		IWL_DEBUG_FW(mvm, "Working in non-DQA mode\n");
+	}
+
 	/* Add auxiliary station for scanning */
 	ret = iwl_mvm_add_aux_sta(mvm);
 	if (ret)
@@ -1048,6 +1253,10 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
 		iwl_mvm_unref(mvm, IWL_MVM_REF_UCODE_DOWN);
 
+	ret = iwl_mvm_sar_init(mvm);
+	if (ret)
+		goto error;
+
 	IWL_DEBUG_INFO(mvm, "RT uCode started.\n");
 	return 0;
  error:
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index 7aae068c02e5..69c42ce45b8a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -1006,7 +1006,7 @@ static int iwl_mvm_mac_ctxt_cmd_p2p_device(struct iwl_mvm *mvm,
 }
 
 static void iwl_mvm_mac_ctxt_set_tim(struct iwl_mvm *mvm,
-				     struct iwl_mac_beacon_cmd *beacon_cmd,
+				     struct iwl_mac_beacon_cmd_v6 *beacon_cmd,
 				     u8 *beacon, u32 frame_size)
 {
 	u32 tim_idx;
@@ -1030,6 +1030,23 @@ static void iwl_mvm_mac_ctxt_set_tim(struct iwl_mvm *mvm,
 	}
 }
 
+static u32 iwl_mvm_find_ie_offset(u8 *beacon, u8 eid, u32 frame_size)
+{
+	struct ieee80211_mgmt *mgmt = (void *)beacon;
+	const u8 *ie;
+
+	if (WARN_ON_ONCE(frame_size <= (mgmt->u.beacon.variable - beacon)))
+		return 0;
+
+	frame_size -= mgmt->u.beacon.variable - beacon;
+
+	ie = cfg80211_find_ie(eid, mgmt->u.beacon.variable, frame_size);
+	if (!ie)
+		return 0;
+
+	return ie - beacon;
+}
+
 static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
 					struct ieee80211_vif *vif,
 					struct sk_buff *beacon)
@@ -1039,7 +1056,10 @@ static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
 		.id = BEACON_TEMPLATE_CMD,
 		.flags = CMD_ASYNC,
 	};
-	struct iwl_mac_beacon_cmd beacon_cmd = {};
+	union {
+		struct iwl_mac_beacon_cmd_v6 beacon_cmd_v6;
+		struct iwl_mac_beacon_cmd beacon_cmd;
+	} u = {};
 	struct ieee80211_tx_info *info;
 	u32 beacon_skb_len;
 	u32 rate, tx_flags;
@@ -1051,18 +1071,18 @@ static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
 
 	/* TODO: for now the beacon template id is set to be the mac context id.
 	 * Might be better to handle it as another resource ... */
-	beacon_cmd.template_id = cpu_to_le32((u32)mvmvif->id);
+	u.beacon_cmd_v6.template_id = cpu_to_le32((u32)mvmvif->id);
 	info = IEEE80211_SKB_CB(beacon);
 
 	/* Set up TX command fields */
-	beacon_cmd.tx.len = cpu_to_le16((u16)beacon_skb_len);
-	beacon_cmd.tx.sta_id = mvmvif->bcast_sta.sta_id;
-	beacon_cmd.tx.life_time = cpu_to_le32(TX_CMD_LIFE_TIME_INFINITE);
+	u.beacon_cmd_v6.tx.len = cpu_to_le16((u16)beacon_skb_len);
+	u.beacon_cmd_v6.tx.sta_id = mvmvif->bcast_sta.sta_id;
+	u.beacon_cmd_v6.tx.life_time = cpu_to_le32(TX_CMD_LIFE_TIME_INFINITE);
 	tx_flags = TX_CMD_FLG_SEQ_CTL | TX_CMD_FLG_TSF;
 	tx_flags |=
 		iwl_mvm_bt_coex_tx_prio(mvm, (void *)beacon->data, info, 0) <<
 						TX_CMD_FLG_BT_PRIO_POS;
-	beacon_cmd.tx.tx_flags = cpu_to_le32(tx_flags);
+	u.beacon_cmd_v6.tx.tx_flags = cpu_to_le32(tx_flags);
 
 	if (!fw_has_capa(&mvm->fw->ucode_capa,
 			 IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION)) {
@@ -1071,7 +1091,7 @@ static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
 					     mvm->mgmt_last_antenna_idx);
 	}
 
-	beacon_cmd.tx.rate_n_flags =
+	u.beacon_cmd_v6.tx.rate_n_flags =
 		cpu_to_le32(BIT(mvm->mgmt_last_antenna_idx) <<
 			    RATE_MCS_ANT_POS);
 
@@ -1079,20 +1099,37 @@ static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
 		rate = IWL_FIRST_OFDM_RATE;
 	} else {
 		rate = IWL_FIRST_CCK_RATE;
-		beacon_cmd.tx.rate_n_flags |= cpu_to_le32(RATE_MCS_CCK_MSK);
+		u.beacon_cmd_v6.tx.rate_n_flags |=
+					cpu_to_le32(RATE_MCS_CCK_MSK);
 	}
-	beacon_cmd.tx.rate_n_flags |=
+	u.beacon_cmd_v6.tx.rate_n_flags |=
 		cpu_to_le32(iwl_mvm_mac80211_idx_to_hwrate(rate));
 
 	/* Set up TX beacon command fields */
 	if (vif->type == NL80211_IFTYPE_AP)
-		iwl_mvm_mac_ctxt_set_tim(mvm, &beacon_cmd,
+		iwl_mvm_mac_ctxt_set_tim(mvm, &u.beacon_cmd_v6,
 					 beacon->data,
 					 beacon_skb_len);
 
 	/* Submit command */
-	cmd.len[0] = sizeof(beacon_cmd);
-	cmd.data[0] = &beacon_cmd;
+
+	if (fw_has_capa(&mvm->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_CSA_AND_TBTT_OFFLOAD)) {
+		u.beacon_cmd.csa_offset =
+			cpu_to_le32(iwl_mvm_find_ie_offset(beacon->data,
+						    WLAN_EID_CHANNEL_SWITCH,
+						    beacon_skb_len));
+		u.beacon_cmd.ecsa_offset =
+			cpu_to_le32(iwl_mvm_find_ie_offset(beacon->data,
+						    WLAN_EID_EXT_CHANSWITCH_ANN,
+						    beacon_skb_len));
+
+		cmd.len[0] = sizeof(u.beacon_cmd);
+	} else {
+		cmd.len[0] = sizeof(u.beacon_cmd_v6);
+	}
+
+	cmd.data[0] = &u;
 	cmd.dataflags[0] = 0;
 	cmd.len[1] = beacon_skb_len;
 	cmd.data[1] = beacon->data;
@@ -1538,3 +1575,48 @@ void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm,
 	/* pass it as regular rx to mac80211 */
 	ieee80211_rx_napi(mvm->hw, NULL, skb, NULL);
 }
+
+void iwl_mvm_channel_switch_noa_notif(struct iwl_mvm *mvm,
+				      struct iwl_rx_cmd_buffer *rxb)
+{
+	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	struct iwl_channel_switch_noa_notif *notif = (void *)pkt->data;
+	struct ieee80211_vif *csa_vif;
+	struct iwl_mvm_vif *mvmvif;
+	int len = iwl_rx_packet_payload_len(pkt);
+	u32 id_n_color;
+
+	if (WARN_ON_ONCE(len < sizeof(*notif)))
+		return;
+
+	rcu_read_lock();
+
+	csa_vif = rcu_dereference(mvm->csa_vif);
+	if (WARN_ON(!csa_vif || !csa_vif->csa_active))
+		goto out_unlock;
+
+	id_n_color = le32_to_cpu(notif->id_and_color);
+
+	mvmvif = iwl_mvm_vif_from_mac80211(csa_vif);
+	if (WARN(FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color) != id_n_color,
+		 "channel switch noa notification on unexpected vif (csa_vif=%d, notif=%d)",
+		 FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color), id_n_color))
+		goto out_unlock;
+
+	IWL_DEBUG_INFO(mvm, "Channel Switch Started Notification\n");
+
+	queue_delayed_work(system_wq, &mvm->cs_tx_unblock_dwork,
+			   msecs_to_jiffies(IWL_MVM_CS_UNBLOCK_TX_TIMEOUT *
+					    csa_vif->bss_conf.beacon_int));
+
+	ieee80211_csa_finish(csa_vif);
+
+	rcu_read_unlock();
+
+	RCU_INIT_POINTER(mvm->csa_vif, NULL);
+
+	return;
+
+out_unlock:
+	rcu_read_unlock();
+}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index e5f267b21316..6d6064534d59 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -465,11 +465,20 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES;
 	hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP;
 
-	BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 2);
+	BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 4);
 	memcpy(mvm->ciphers, mvm_ciphers, sizeof(mvm_ciphers));
 	hw->wiphy->n_cipher_suites = ARRAY_SIZE(mvm_ciphers);
 	hw->wiphy->cipher_suites = mvm->ciphers;
 
+	if (iwl_mvm_has_new_rx_api(mvm)) {
+		mvm->ciphers[hw->wiphy->n_cipher_suites] =
+			WLAN_CIPHER_SUITE_GCMP;
+		hw->wiphy->n_cipher_suites++;
+		mvm->ciphers[hw->wiphy->n_cipher_suites] =
+			WLAN_CIPHER_SUITE_GCMP_256;
+		hw->wiphy->n_cipher_suites++;
+	}
+
 	/*
 	 * Enable 11w if advertised by firmware and software crypto
 	 * is not enabled (as the firmware will interpret some mgmt
@@ -485,10 +494,23 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 
 	/* currently FW API supports only one optional cipher scheme */
 	if (mvm->fw->cs[0].cipher) {
+		const struct iwl_fw_cipher_scheme *fwcs = &mvm->fw->cs[0];
+		struct ieee80211_cipher_scheme *cs = &mvm->cs[0];
+
 		mvm->hw->n_cipher_schemes = 1;
-		mvm->hw->cipher_schemes = &mvm->fw->cs[0];
-		mvm->ciphers[hw->wiphy->n_cipher_suites] =
-			mvm->fw->cs[0].cipher;
+
+		cs->cipher = le32_to_cpu(fwcs->cipher);
+		cs->iftype = BIT(NL80211_IFTYPE_STATION);
+		cs->hdr_len = fwcs->hdr_len;
+		cs->pn_len = fwcs->pn_len;
+		cs->pn_off = fwcs->pn_off;
+		cs->key_idx_off = fwcs->key_idx_off;
+		cs->key_idx_mask = fwcs->key_idx_mask;
+		cs->key_idx_shift = fwcs->key_idx_shift;
+		cs->mic_len = fwcs->mic_len;
+
+		mvm->hw->cipher_schemes = mvm->cs;
+		mvm->ciphers[hw->wiphy->n_cipher_suites] = cs->cipher;
 		hw->wiphy->n_cipher_suites++;
 	}
 
@@ -1011,11 +1033,7 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm)
 	memset(mvm->sta_deferred_frames, 0, sizeof(mvm->sta_deferred_frames));
 	memset(mvm->tfd_drained, 0, sizeof(mvm->tfd_drained));
 	memset(&mvm->last_bt_notif, 0, sizeof(mvm->last_bt_notif));
-	memset(&mvm->last_bt_notif_old, 0, sizeof(mvm->last_bt_notif_old));
 	memset(&mvm->last_bt_ci_cmd, 0, sizeof(mvm->last_bt_ci_cmd));
-	memset(&mvm->last_bt_ci_cmd_old, 0, sizeof(mvm->last_bt_ci_cmd_old));
-	memset(&mvm->bt_ack_kill_msk, 0, sizeof(mvm->bt_ack_kill_msk));
-	memset(&mvm->bt_cts_kill_msk, 0, sizeof(mvm->bt_cts_kill_msk));
 
 	ieee80211_wake_queues(mvm->hw);
 
@@ -1199,6 +1217,8 @@ static void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
 	flush_work(&mvm->async_handlers_wk);
 	flush_work(&mvm->add_stream_wk);
 	cancel_delayed_work_sync(&mvm->fw_dump_wk);
+	cancel_delayed_work_sync(&mvm->cs_tx_unblock_dwork);
+	cancel_delayed_work_sync(&mvm->scan_timeout_dwork);
 	iwl_mvm_free_fw_dump_desc(mvm);
 
 	mutex_lock(&mvm->mutex);
@@ -1230,18 +1250,20 @@ static int iwl_mvm_set_tx_power(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				s16 tx_power)
 {
 	struct iwl_dev_tx_power_cmd cmd = {
-		.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC),
-		.v2.mac_context_id =
+		.v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC),
+		.v3.v2.mac_context_id =
 			cpu_to_le32(iwl_mvm_vif_from_mac80211(vif)->id),
-		.v2.pwr_restriction = cpu_to_le16(8 * tx_power),
+		.v3.v2.pwr_restriction = cpu_to_le16(8 * tx_power),
 	};
 	int len = sizeof(cmd);
 
 	if (tx_power == IWL_DEFAULT_MAX_TX_POWER)
-		cmd.v2.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER);
+		cmd.v3.v2.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER);
 
+	if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK))
+		len = sizeof(cmd.v3);
 	if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_TX_POWER_CHAIN))
-		len = sizeof(cmd.v2);
+		len = sizeof(cmd.v3.v2);
 
 	return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0, len, &cmd);
 }
@@ -2360,7 +2382,7 @@ static void iwl_mvm_check_uapsd(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT))
 		return;
 
-	if (vif->p2p && !iwl_mvm_is_p2p_standalone_uapsd_supported(mvm)) {
+	if (vif->p2p && !iwl_mvm_is_p2p_scm_uapsd_supported(mvm)) {
 		vif->driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD;
 		return;
 	}
@@ -2719,6 +2741,8 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
 		key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
 		key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
@@ -2780,7 +2804,8 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
 		    sta && iwl_mvm_has_new_rx_api(mvm) &&
 		    key->flags & IEEE80211_KEY_FLAG_PAIRWISE &&
 		    (key->cipher == WLAN_CIPHER_SUITE_CCMP ||
-		     key->cipher == WLAN_CIPHER_SUITE_GCMP)) {
+		     key->cipher == WLAN_CIPHER_SUITE_GCMP ||
+		     key->cipher == WLAN_CIPHER_SUITE_GCMP_256)) {
 			struct ieee80211_key_seq seq;
 			int tid, q;
 
@@ -2834,7 +2859,8 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
 		if (sta && iwl_mvm_has_new_rx_api(mvm) &&
 		    key->flags & IEEE80211_KEY_FLAG_PAIRWISE &&
 		    (key->cipher == WLAN_CIPHER_SUITE_CCMP ||
-		     key->cipher == WLAN_CIPHER_SUITE_GCMP)) {
+		     key->cipher == WLAN_CIPHER_SUITE_GCMP ||
+		     key->cipher == WLAN_CIPHER_SUITE_GCMP_256)) {
 			mvmsta = iwl_mvm_sta_from_mac80211(sta);
 			ptk_pn = rcu_dereference_protected(
 						mvmsta->ptk_pn[keyidx],
@@ -3687,6 +3713,13 @@ static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw,
 			goto out_unlock;
 		}
 
+		/* we still didn't unblock tx. prevent new CS meanwhile */
+		if (rcu_dereference_protected(mvm->csa_tx_blocked_vif,
+					      lockdep_is_held(&mvm->mutex))) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+
 		rcu_assign_pointer(mvm->csa_vif, vif);
 
 		if (WARN_ONCE(mvmvif->csa_countdown,
@@ -3695,6 +3728,8 @@ static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw,
 			goto out_unlock;
 		}
 
+		mvmvif->csa_target_freq = chsw->chandef.chan->center_freq;
+
 		break;
 	case NL80211_IFTYPE_STATION:
 		if (mvmvif->lqm_active)
@@ -3851,8 +3886,8 @@ static int iwl_mvm_mac_get_survey(struct ieee80211_hw *hw, int idx,
 	if (idx != 0)
 		return -ENOENT;
 
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
+	if (!fw_has_capa(&mvm->fw->ucode_capa,
+			 IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
 		return -ENOENT;
 
 	mutex_lock(&mvm->mutex);
@@ -3898,8 +3933,13 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw,
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
+	if (mvmsta->avg_energy) {
+		sinfo->signal_avg = mvmsta->avg_energy;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
+	}
+
+	if (!fw_has_capa(&mvm->fw->ucode_capa,
+			 IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
 		return;
 
 	/* if beacon filtering isn't on mac80211 does it anyway */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index ffbd41dcc0d4..b4fc86d5d7ef 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -452,6 +452,7 @@ struct iwl_mvm_vif {
 	/* Indicates that CSA countdown may be started */
 	bool csa_countdown;
 	bool csa_failed;
+	u16 csa_target_freq;
 
 	/* TCP Checksum Offload */
 	netdev_features_t features;
@@ -686,13 +687,28 @@ struct iwl_mvm_baid_data {
  *	This is the state of a queue that has been fully configured (including
  *	SCD pointers, etc), has a specific RA/TID assigned to it, and can be
  *	used to send traffic.
+ * @IWL_MVM_QUEUE_SHARED: queue is shared, or in a process of becoming shared
+ *	This is a state in which a single queue serves more than one TID, all of
+ *	which are not aggregated. Note that the queue is only associated to one
+ *	RA.
+ * @IWL_MVM_QUEUE_INACTIVE: queue is allocated but no traffic on it
+ *	This is a state of a queue that has had traffic on it, but during the
+ *	last %IWL_MVM_DQA_QUEUE_TIMEOUT time period there has been no traffic on
+ *	it. In this state, when a new queue is needed to be allocated but no
+ *	such free queue exists, an inactive queue might be freed and given to
+ *	the new RA/TID.
  */
 enum iwl_mvm_queue_status {
 	IWL_MVM_QUEUE_FREE,
 	IWL_MVM_QUEUE_RESERVED,
 	IWL_MVM_QUEUE_READY,
+	IWL_MVM_QUEUE_SHARED,
+	IWL_MVM_QUEUE_INACTIVE,
 };
 
+#define IWL_MVM_DQA_QUEUE_TIMEOUT	(5 * HZ)
+#define IWL_MVM_NUM_CIPHERS             8
+
 struct iwl_mvm {
 	/* for logger access */
 	struct device *dev;
@@ -731,6 +747,7 @@ struct iwl_mvm {
 	struct iwl_sf_region sf_space;
 
 	u32 ampdu_ref;
+	bool ampdu_toggle;
 
 	struct iwl_notif_wait_data notif_wait;
 
@@ -748,11 +765,16 @@ struct iwl_mvm {
 		u32 hw_queue_to_mac80211;
 		u8 hw_queue_refcount;
 		u8 ra_sta_id; /* The RA this queue is mapped to, if exists */
+		bool reserved; /* Is this the TXQ reserved for a STA */
+		u8 mac80211_ac; /* The mac80211 AC this queue is mapped to */
 		u16 tid_bitmap; /* Bitmap of the TIDs mapped to this queue */
+		/* Timestamp for inactivation per TID of this queue */
+		unsigned long last_frame_time[IWL_MAX_TID_COUNT + 1];
 		enum iwl_mvm_queue_status status;
 	} queue_info[IWL_MAX_HW_QUEUES];
 	spinlock_t queue_info_lock; /* For syncing queue mgmt operations */
 	struct work_struct add_stream_wk; /* To add streams to queues */
+
 	atomic_t mac80211_queue_stop_count[IEEE80211_MAX_QUEUES];
 
 	const char *nvm_file_name;
@@ -787,7 +809,7 @@ struct iwl_mvm {
 	struct iwl_mcast_filter_cmd *mcast_filter_cmd;
 	enum iwl_mvm_scan_type scan_type;
 	enum iwl_mvm_sched_scan_pass_all_states sched_scan_pass_all;
-	struct timer_list scan_timer;
+	struct delayed_work scan_timeout_dwork;
 
 	/* max number of simultaneous scans the FW supports */
 	unsigned int max_scans;
@@ -910,11 +932,6 @@ struct iwl_mvm {
 	wait_queue_head_t d0i3_exit_waitq;
 
 	/* BT-Coex */
-	u8 bt_ack_kill_msk[NUM_PHY_CTX];
-	u8 bt_cts_kill_msk[NUM_PHY_CTX];
-
-	struct iwl_bt_coex_profile_notif_old last_bt_notif_old;
-	struct iwl_bt_coex_ci_cmd_old last_bt_ci_cmd_old;
 	struct iwl_bt_coex_profile_notif last_bt_notif;
 	struct iwl_bt_coex_ci_cmd last_bt_ci_cmd;
 
@@ -994,7 +1011,8 @@ struct iwl_mvm {
 
 	struct iwl_mvm_shared_mem_cfg shared_mem_cfg;
 
-	u32 ciphers[6];
+	u32 ciphers[IWL_MVM_NUM_CIPHERS];
+	struct ieee80211_cipher_scheme cs[IWL_UCODE_MAX_CS];
 	struct iwl_mvm_tof_data tof_data;
 
 	struct ieee80211_vif *nan_vif;
@@ -1006,6 +1024,8 @@ struct iwl_mvm {
 	 * clients.
 	 */
 	bool drop_bcn_ap_mode;
+
+	struct delayed_work cs_tx_unblock_dwork;
 };
 
 /* Extract MVM priv from op_mode and _hw */
@@ -1158,10 +1178,10 @@ static inline bool iwl_mvm_is_mplut_supported(struct iwl_mvm *mvm)
 }
 
 static inline
-bool iwl_mvm_is_p2p_standalone_uapsd_supported(struct iwl_mvm *mvm)
+bool iwl_mvm_is_p2p_scm_uapsd_supported(struct iwl_mvm *mvm)
 {
 	return fw_has_capa(&mvm->fw->ucode_capa,
-			   IWL_UCODE_TLV_CAPA_P2P_STANDALONE_UAPSD) &&
+			   IWL_UCODE_TLV_CAPA_P2P_SCM_UAPSD) &&
 		!(iwlwifi_mod_params.uapsd_disable &
 		  IWL_DISABLE_UAPSD_P2P_CLIENT);
 }
@@ -1321,7 +1341,6 @@ bool iwl_mvm_bcast_filter_build_cmd(struct iwl_mvm *mvm,
 void iwl_mvm_rx_rx_phy_cmd(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb);
 void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi,
 			struct iwl_rx_cmd_buffer *rxb);
-void iwl_mvm_rx_phy_cmd_mq(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb);
 void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 			struct iwl_rx_cmd_buffer *rxb, int queue);
 void iwl_mvm_rx_frame_release(struct iwl_mvm *mvm, struct napi_struct *napi,
@@ -1381,6 +1400,8 @@ void iwl_mvm_mac_ctxt_recalc_tsf_id(struct iwl_mvm *mvm,
 				    struct ieee80211_vif *vif);
 unsigned long iwl_mvm_get_used_hw_queues(struct iwl_mvm *mvm,
 					 struct ieee80211_vif *exclude_vif);
+void iwl_mvm_channel_switch_noa_notif(struct iwl_mvm *mvm,
+				      struct iwl_rx_cmd_buffer *rxb);
 /* Bindings */
 int iwl_mvm_binding_add_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_binding_remove_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
@@ -1397,7 +1418,7 @@ int iwl_mvm_scan_size(struct iwl_mvm *mvm);
 int iwl_mvm_scan_stop(struct iwl_mvm *mvm, int type, bool notify);
 int iwl_mvm_max_scan_ie_len(struct iwl_mvm *mvm);
 void iwl_mvm_report_scan_aborted(struct iwl_mvm *mvm);
-void iwl_mvm_scan_timeout(unsigned long data);
+void iwl_mvm_scan_timeout_wk(struct work_struct *work);
 
 /* Scheduled scan */
 void iwl_mvm_rx_lmac_scan_complete_notif(struct iwl_mvm *mvm,
@@ -1613,7 +1634,7 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
  */
 void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
 			 u8 tid, u8 flags);
-int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 minq, u8 maxq);
+int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, u8 minq, u8 maxq);
 
 /* Return a bitmask with all the hw supported queues, except for the
  * command queue, which can't be flushed.
@@ -1720,6 +1741,8 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
 void iwl_mvm_reorder_timer_expired(unsigned long data);
 struct ieee80211_vif *iwl_mvm_get_bss_vif(struct iwl_mvm *mvm);
 
+void iwl_mvm_inactivity_check(struct iwl_mvm *mvm);
+
 void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error);
 unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm,
 				    struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index 25a98401a64f..7a686f67f007 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -66,7 +66,6 @@
  *****************************************************************************/
 #include <linux/firmware.h>
 #include <linux/rtnetlink.h>
-#include <linux/pci.h>
 #include <linux/acpi.h>
 #include "iwl-trans.h"
 #include "iwl-csr.h"
@@ -667,8 +666,7 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
 		.mcc = cpu_to_le16(alpha2[0] << 8 | alpha2[1]),
 		.source_id = (u8)src_id,
 	};
-	struct iwl_mcc_update_resp *mcc_resp, *resp_cp = NULL;
-	struct iwl_mcc_update_resp_v1 *mcc_resp_v1 = NULL;
+	struct iwl_mcc_update_resp *resp_cp;
 	struct iwl_rx_packet *pkt;
 	struct iwl_host_cmd cmd = {
 		.id = MCC_UPDATE_CMD,
@@ -701,34 +699,36 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
 
 	/* Extract MCC response */
 	if (resp_v2) {
-		mcc_resp = (void *)pkt->data;
+		struct iwl_mcc_update_resp *mcc_resp = (void *)pkt->data;
+
 		n_channels =  __le32_to_cpu(mcc_resp->n_channels);
+		resp_len = sizeof(struct iwl_mcc_update_resp) +
+			   n_channels * sizeof(__le32);
+		resp_cp = kmemdup(mcc_resp, resp_len, GFP_KERNEL);
 	} else {
-		mcc_resp_v1 = (void *)pkt->data;
+		struct iwl_mcc_update_resp_v1 *mcc_resp_v1 = (void *)pkt->data;
+
 		n_channels =  __le32_to_cpu(mcc_resp_v1->n_channels);
+		resp_len = sizeof(struct iwl_mcc_update_resp) +
+			   n_channels * sizeof(__le32);
+		resp_cp = kzalloc(resp_len, GFP_KERNEL);
+
+		if (resp_cp) {
+			resp_cp->status = mcc_resp_v1->status;
+			resp_cp->mcc = mcc_resp_v1->mcc;
+			resp_cp->cap = mcc_resp_v1->cap;
+			resp_cp->source_id = mcc_resp_v1->source_id;
+			resp_cp->n_channels = mcc_resp_v1->n_channels;
+			memcpy(resp_cp->channels, mcc_resp_v1->channels,
+			       n_channels * sizeof(__le32));
+		}
 	}
 
-	resp_len = sizeof(struct iwl_mcc_update_resp) + n_channels *
-		sizeof(__le32);
-
-	resp_cp = kzalloc(resp_len, GFP_KERNEL);
 	if (!resp_cp) {
 		ret = -ENOMEM;
 		goto exit;
 	}
 
-	if (resp_v2) {
-		memcpy(resp_cp, mcc_resp, resp_len);
-	} else {
-		resp_cp->status = mcc_resp_v1->status;
-		resp_cp->mcc = mcc_resp_v1->mcc;
-		resp_cp->cap = mcc_resp_v1->cap;
-		resp_cp->source_id = mcc_resp_v1->source_id;
-		resp_cp->n_channels = mcc_resp_v1->n_channels;
-		memcpy(resp_cp->channels, mcc_resp_v1->channels,
-		       n_channels * sizeof(__le32));
-	}
-
 	status = le32_to_cpu(resp_cp->status);
 
 	mcc = le16_to_cpu(resp_cp->mcc);
@@ -802,9 +802,8 @@ static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc)
 	struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL};
 	acpi_status status;
 	u32 mcc_val;
-	struct pci_dev *pdev = to_pci_dev(mvm->dev);
 
-	root_handle = ACPI_HANDLE(&pdev->dev);
+	root_handle = ACPI_HANDLE(mvm->dev);
 	if (!root_handle) {
 		IWL_DEBUG_LAR(mvm,
 			      "Could not retrieve root port ACPI handle\n");
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index a68054f127fa..55d9096da68c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -431,6 +431,7 @@ static const struct iwl_hcmd_names iwl_mvm_system_names[] = {
 static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = {
 	HCMD_NAME(LINK_QUALITY_MEASUREMENT_CMD),
 	HCMD_NAME(LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF),
+	HCMD_NAME(CHANNEL_SWITCH_NOA_NOTIF),
 };
 
 /* Please keep this array *SORTED* by hex value.
@@ -494,6 +495,29 @@ static u32 calc_min_backoff(struct iwl_trans *trans, const struct iwl_cfg *cfg)
 
 static void iwl_mvm_fw_error_dump_wk(struct work_struct *work);
 
+static void iwl_mvm_tx_unblock_dwork(struct work_struct *work)
+{
+	struct iwl_mvm *mvm =
+		container_of(work, struct iwl_mvm, cs_tx_unblock_dwork.work);
+	struct ieee80211_vif *tx_blocked_vif;
+	struct iwl_mvm_vif *mvmvif;
+
+	mutex_lock(&mvm->mutex);
+
+	tx_blocked_vif =
+		rcu_dereference_protected(mvm->csa_tx_blocked_vif,
+					  lockdep_is_held(&mvm->mutex));
+
+	if (!tx_blocked_vif)
+		goto unlock;
+
+	mvmvif = iwl_mvm_vif_from_mac80211(tx_blocked_vif);
+	iwl_mvm_modify_all_sta_disable_tx(mvm, mvmvif, false);
+	RCU_INIT_POINTER(mvm->csa_tx_blocked_vif, NULL);
+unlock:
+	mutex_unlock(&mvm->mutex);
+}
+
 static struct iwl_op_mode *
 iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 		      const struct iwl_fw *fw, struct dentry *dbgfs_dir)
@@ -553,18 +577,21 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 
 	mvm->restart_fw = iwlwifi_mod_params.restart_fw ? -1 : 0;
 
-	mvm->aux_queue = 15;
 	if (!iwl_mvm_is_dqa_supported(mvm)) {
-		mvm->first_agg_queue = 16;
 		mvm->last_agg_queue = mvm->cfg->base_params->num_of_queues - 1;
+
+		if (mvm->cfg->base_params->num_of_queues == 16) {
+			mvm->aux_queue = 11;
+			mvm->first_agg_queue = 12;
+		} else {
+			mvm->aux_queue = 15;
+			mvm->first_agg_queue = 16;
+		}
 	} else {
+		mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE;
 		mvm->first_agg_queue = IWL_MVM_DQA_MIN_DATA_QUEUE;
 		mvm->last_agg_queue = IWL_MVM_DQA_MAX_DATA_QUEUE;
 	}
-	if (mvm->cfg->base_params->num_of_queues == 16) {
-		mvm->aux_queue = 11;
-		mvm->first_agg_queue = 12;
-	}
 	mvm->sf_state = SF_UNINIT;
 	mvm->cur_ucode = IWL_UCODE_INIT;
 	mvm->drop_bcn_ap_mode = true;
@@ -584,6 +611,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	INIT_WORK(&mvm->d0i3_exit_work, iwl_mvm_d0i3_exit_work);
 	INIT_DELAYED_WORK(&mvm->fw_dump_wk, iwl_mvm_fw_error_dump_wk);
 	INIT_DELAYED_WORK(&mvm->tdls_cs.dwork, iwl_mvm_tdls_ch_switch_work);
+	INIT_DELAYED_WORK(&mvm->scan_timeout_dwork, iwl_mvm_scan_timeout_wk);
 	INIT_WORK(&mvm->add_stream_wk, iwl_mvm_add_new_dqa_stream_wk);
 
 	spin_lock_init(&mvm->d0i3_tx_lock);
@@ -595,6 +623,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 
 	SET_IEEE80211_DEV(mvm->hw, mvm->trans->dev);
 
+	INIT_DELAYED_WORK(&mvm->cs_tx_unblock_dwork, iwl_mvm_tx_unblock_dwork);
+
 	/*
 	 * Populate the state variables that the transport layer needs
 	 * to know about.
@@ -603,6 +633,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	trans_cfg.no_reclaim_cmds = no_reclaim_cmds;
 	trans_cfg.n_no_reclaim_cmds = ARRAY_SIZE(no_reclaim_cmds);
 	switch (iwlwifi_mod_params.amsdu_size) {
+	case IWL_AMSDU_DEF:
 	case IWL_AMSDU_4K:
 		trans_cfg.rx_buf_size = IWL_AMSDU_4K;
 		break;
@@ -617,6 +648,10 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 		       iwlwifi_mod_params.amsdu_size);
 		trans_cfg.rx_buf_size = IWL_AMSDU_4K;
 	}
+
+	/* the hardware splits the A-MSDU */
+	if (mvm->cfg->mq_rx_supported)
+		trans_cfg.rx_buf_size = IWL_AMSDU_4K;
 	trans_cfg.wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa,
 					       IWL_UCODE_TLV_API_WIDE_CMD_HDR);
 
@@ -633,6 +668,9 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	trans_cfg.cmd_fifo = IWL_MVM_TX_FIFO_CMD;
 	trans_cfg.scd_set_active = true;
 
+	trans_cfg.cb_data_offs = offsetof(struct ieee80211_tx_info,
+					  driver_data[2]);
+
 	trans_cfg.sdio_adma_addr = fw->sdio_adma_addr;
 	trans_cfg.sw_csum_tx = IWL_MVM_SW_TX_CSUM_OFFLOAD;
 
@@ -735,9 +773,6 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 
 	iwl_mvm_tof_init(mvm);
 
-	setup_timer(&mvm->scan_timer, iwl_mvm_scan_timeout,
-		    (unsigned long)mvm);
-
 	return op_mode;
 
  out_unregister:
@@ -791,8 +826,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode)
 
 	iwl_mvm_tof_clean(mvm);
 
-	del_timer_sync(&mvm->scan_timer);
-
 	mutex_destroy(&mvm->mutex);
 	mutex_destroy(&mvm->d0i3_suspend_mutex);
 
@@ -936,8 +969,6 @@ static void iwl_mvm_rx(struct iwl_op_mode *op_mode,
 
 	if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD))
 		iwl_mvm_rx_rx_mpdu(mvm, napi, rxb);
-	else if (pkt->hdr.cmd == FRAME_RELEASE)
-		iwl_mvm_rx_frame_release(mvm, napi, rxb, 0);
 	else if (pkt->hdr.cmd == REPLY_RX_PHY_CMD)
 		iwl_mvm_rx_rx_phy_cmd(mvm, rxb);
 	else
@@ -953,11 +984,11 @@ static void iwl_mvm_rx_mq(struct iwl_op_mode *op_mode,
 
 	if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD))
 		iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, 0);
-	else if (pkt->hdr.cmd == REPLY_RX_PHY_CMD)
-		iwl_mvm_rx_phy_cmd_mq(mvm, rxb);
 	else if (unlikely(pkt->hdr.group_id == DATA_PATH_GROUP &&
 			  pkt->hdr.cmd == RX_QUEUES_NOTIFICATION))
 		iwl_mvm_rx_queue_notif(mvm, rxb, 0);
+	else if (pkt->hdr.cmd == FRAME_RELEASE)
+		iwl_mvm_rx_frame_release(mvm, napi, rxb, 0);
 	else
 		iwl_mvm_rx_common(mvm, rxb, pkt);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
index 7b1f6ad6062b..ff85865b1dda 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
@@ -308,7 +308,7 @@ static bool iwl_mvm_power_allow_uapsd(struct iwl_mvm *mvm,
 		/* Allow U-APSD only if p2p is stand alone */
 		bool is_p2p_standalone = true;
 
-		if (!iwl_mvm_is_p2p_standalone_uapsd_supported(mvm))
+		if (!iwl_mvm_is_p2p_scm_uapsd_supported(mvm))
 			return false;
 
 		ieee80211_iterate_active_interfaces_atomic(mvm->hw,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 81dd2f6a48a5..227c5ed9cbe6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -211,6 +211,9 @@ static bool rs_sgi_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 	if (is_ht80(rate) && (vht_cap->cap &
 			     IEEE80211_VHT_CAP_SHORT_GI_80))
 		return true;
+	if (is_ht160(rate) && (vht_cap->cap &
+			     IEEE80211_VHT_CAP_SHORT_GI_160))
+		return true;
 
 	return false;
 }
@@ -399,7 +402,7 @@ static int iwl_hwrate_to_plcp_idx(u32 rate_n_flags)
 static void rs_rate_scale_perform(struct iwl_mvm *mvm,
 				  struct ieee80211_sta *sta,
 				  struct iwl_lq_sta *lq_sta,
-				  int tid);
+				  int tid, bool ndp);
 static void rs_fill_lq_cmd(struct iwl_mvm *mvm,
 			   struct ieee80211_sta *sta,
 			   struct iwl_lq_sta *lq_sta,
@@ -445,6 +448,13 @@ static const u16 expected_tpt_siso_80MHz[4][IWL_RATE_COUNT] = {
 	{0, 0, 0, 0, 241, 0, 475, 701, 921, 1343, 1741, 1931, 2117, 2468, 2691},
 };
 
+static const u16 expected_tpt_siso_160MHz[4][IWL_RATE_COUNT] = {
+	{0, 0, 0, 0, 191, 0, 244, 288,  298,  308,  313,  318,  323,  328,  330},
+	{0, 0, 0, 0, 200, 0, 251, 293,  302,  312,  317,  322,  327,  332,  334},
+	{0, 0, 0, 0, 439, 0, 875, 1307, 1736, 2584, 3419, 3831, 4240, 5049, 5581},
+	{0, 0, 0, 0, 488, 0, 972, 1451, 1925, 2864, 3785, 4240, 4691, 5581, 6165},
+};
+
 static const u16 expected_tpt_mimo2_20MHz[4][IWL_RATE_COUNT] = {
 	{0, 0, 0, 0,  74, 0, 123, 155, 179, 213, 235, 243, 250,  261, 0},
 	{0, 0, 0, 0,  81, 0, 131, 164, 187, 221, 242, 250, 256,  267, 0},
@@ -466,6 +476,13 @@ static const u16 expected_tpt_mimo2_80MHz[4][IWL_RATE_COUNT] = {
 	{0, 0, 0, 0, 474, 0, 920, 1338, 1732, 2464, 3116, 3418, 3705, 4225, 4545},
 };
 
+static const u16 expected_tpt_mimo2_160MHz[4][IWL_RATE_COUNT] = {
+	{0, 0, 0, 0, 240, 0, 278,  308,  313,  319,  322,  324,  328,  330,   334},
+	{0, 0, 0, 0, 247, 0, 282,  310,  315,  320,  323,  325,  329,  332,   338},
+	{0, 0, 0, 0, 875, 0, 1735, 2582, 3414, 5043, 6619, 7389, 8147, 9629,  10592},
+	{0, 0, 0, 0, 971, 0, 1925, 2861, 3779, 5574, 7304, 8147, 8976, 10592, 11640},
+};
+
 /* mbps, mcs */
 static const struct iwl_rate_mcs_info iwl_rate_mcs[IWL_RATE_COUNT] = {
 	{  "1", "BPSK DSSS"},
@@ -901,7 +918,6 @@ static int rs_rate_from_ucode_rate(const u32 ucode_rate,
 		}
 	}
 
-	WARN_ON_ONCE(rate->bw == RATE_MCS_CHAN_WIDTH_160);
 	WARN_ON_ONCE(rate->bw == RATE_MCS_CHAN_WIDTH_80 &&
 		     !is_vht(rate));
 
@@ -1161,7 +1177,7 @@ static u8 rs_get_tid(struct ieee80211_hdr *hdr)
 }
 
 void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
-			  int tid, struct ieee80211_tx_info *info)
+			  int tid, struct ieee80211_tx_info *info, bool ndp)
 {
 	int legacy_success;
 	int retries;
@@ -1384,7 +1400,7 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 done:
 	/* See if there's a better rate or modulation mode to try. */
 	if (sta->supp_rates[info->band])
-		rs_rate_scale_perform(mvm, sta, lq_sta, tid);
+		rs_rate_scale_perform(mvm, sta, lq_sta, tid, ndp);
 }
 
 /*
@@ -1407,7 +1423,8 @@ static void rs_mac80211_tx_status(void *mvm_r,
 	    info->flags & IEEE80211_TX_CTL_NO_ACK)
 		return;
 
-	iwl_mvm_rs_tx_status(mvm, sta, rs_get_tid(hdr), info);
+	iwl_mvm_rs_tx_status(mvm, sta, rs_get_tid(hdr), info,
+			     ieee80211_is_qos_nullfunc(hdr->frame_control));
 }
 
 /*
@@ -1494,6 +1511,9 @@ static const u16 *rs_get_expected_tpt_table(struct iwl_lq_sta *lq_sta,
 		case RATE_MCS_CHAN_WIDTH_80:
 			ht_tbl_pointer = expected_tpt_siso_80MHz;
 			break;
+		case RATE_MCS_CHAN_WIDTH_160:
+			ht_tbl_pointer = expected_tpt_siso_160MHz;
+			break;
 		default:
 			WARN_ON_ONCE(1);
 		}
@@ -1508,6 +1528,9 @@ static const u16 *rs_get_expected_tpt_table(struct iwl_lq_sta *lq_sta,
 		case RATE_MCS_CHAN_WIDTH_80:
 			ht_tbl_pointer = expected_tpt_mimo2_80MHz;
 			break;
+		case RATE_MCS_CHAN_WIDTH_160:
+			ht_tbl_pointer = expected_tpt_mimo2_160MHz;
+			break;
 		default:
 			WARN_ON_ONCE(1);
 		}
@@ -1582,12 +1605,17 @@ static s32 rs_get_best_rate(struct iwl_mvm *mvm,
 
 static u32 rs_bw_from_sta_bw(struct ieee80211_sta *sta)
 {
-	if (sta->bandwidth >= IEEE80211_STA_RX_BW_80)
+	switch (sta->bandwidth) {
+	case IEEE80211_STA_RX_BW_160:
+		return RATE_MCS_CHAN_WIDTH_160;
+	case IEEE80211_STA_RX_BW_80:
 		return RATE_MCS_CHAN_WIDTH_80;
-	else if (sta->bandwidth >= IEEE80211_STA_RX_BW_40)
+	case IEEE80211_STA_RX_BW_40:
 		return RATE_MCS_CHAN_WIDTH_40;
-
-	return RATE_MCS_CHAN_WIDTH_20;
+	case IEEE80211_STA_RX_BW_20:
+	default:
+		return RATE_MCS_CHAN_WIDTH_20;
+	}
 }
 
 /*
@@ -2213,7 +2241,7 @@ static bool rs_tpc_perform(struct iwl_mvm *mvm,
 static void rs_rate_scale_perform(struct iwl_mvm *mvm,
 				  struct ieee80211_sta *sta,
 				  struct iwl_lq_sta *lq_sta,
-				  int tid)
+				  int tid, bool ndp)
 {
 	int low = IWL_RATE_INVALID;
 	int high = IWL_RATE_INVALID;
@@ -2512,7 +2540,7 @@ lq_update:
 			    (lq_sta->tx_agg_tid_en & (1 << tid)) &&
 			    (tid != IWL_MAX_TID_COUNT)) {
 				tid_data = &sta_priv->tid_data[tid];
-				if (tid_data->state == IWL_AGG_OFF) {
+				if (tid_data->state == IWL_AGG_OFF && !ndp) {
 					IWL_DEBUG_RATE(mvm,
 						       "try to aggregate tid %d\n",
 						       tid);
@@ -2565,6 +2593,9 @@ static const struct rs_init_rate_info rs_optimal_rates_ht[] = {
 	{ S8_MIN, IWL_RATE_MCS_0_INDEX},
 };
 
+/* MCS index 9 is not valid for 20MHz VHT channel width,
+ * but is ok for 40, 80 and 160MHz channels.
+ */
 static const struct rs_init_rate_info rs_optimal_rates_vht_20mhz[] = {
 	{ -60, IWL_RATE_MCS_8_INDEX },
 	{ -64, IWL_RATE_MCS_7_INDEX },
@@ -2577,7 +2608,7 @@ static const struct rs_init_rate_info rs_optimal_rates_vht_20mhz[] = {
 	{ S8_MIN, IWL_RATE_MCS_0_INDEX},
 };
 
-static const struct rs_init_rate_info rs_optimal_rates_vht_40_80mhz[] = {
+static const struct rs_init_rate_info rs_optimal_rates_vht[] = {
 	{ -60, IWL_RATE_MCS_9_INDEX },
 	{ -64, IWL_RATE_MCS_8_INDEX },
 	{ -68, IWL_RATE_MCS_7_INDEX },
@@ -2640,9 +2671,9 @@ static void rs_init_optimal_rate(struct iwl_mvm *mvm,
 			lq_sta->optimal_nentries =
 				ARRAY_SIZE(rs_optimal_rates_vht_20mhz);
 		} else {
-			lq_sta->optimal_rates = rs_optimal_rates_vht_40_80mhz;
+			lq_sta->optimal_rates = rs_optimal_rates_vht;
 			lq_sta->optimal_nentries =
-				ARRAY_SIZE(rs_optimal_rates_vht_40_80mhz);
+				ARRAY_SIZE(rs_optimal_rates_vht);
 		}
 	} else if (is_ht(rate)) {
 		lq_sta->optimal_rates = rs_optimal_rates_ht;
@@ -2734,23 +2765,25 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
 	 */
 	if (sta->vht_cap.vht_supported &&
 	    best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) {
-		if (sta->bandwidth >= IEEE80211_STA_RX_BW_40) {
-			initial_rates = rs_optimal_rates_vht_40_80mhz;
-			nentries = ARRAY_SIZE(rs_optimal_rates_vht_40_80mhz);
-			if (sta->bandwidth >= IEEE80211_STA_RX_BW_80)
-				rate->bw = RATE_MCS_CHAN_WIDTH_80;
-			else
-				rate->bw = RATE_MCS_CHAN_WIDTH_40;
-		} else if (sta->bandwidth == IEEE80211_STA_RX_BW_20) {
+		switch (sta->bandwidth) {
+		case IEEE80211_STA_RX_BW_160:
+		case IEEE80211_STA_RX_BW_80:
+		case IEEE80211_STA_RX_BW_40:
+			initial_rates = rs_optimal_rates_vht;
+			nentries = ARRAY_SIZE(rs_optimal_rates_vht);
+			break;
+		case IEEE80211_STA_RX_BW_20:
 			initial_rates = rs_optimal_rates_vht_20mhz;
 			nentries = ARRAY_SIZE(rs_optimal_rates_vht_20mhz);
-			rate->bw = RATE_MCS_CHAN_WIDTH_20;
-		} else {
+			break;
+		default:
 			IWL_ERR(mvm, "Invalid BW %d\n", sta->bandwidth);
 			goto out;
 		}
+
 		active_rate = lq_sta->active_siso_rate;
 		rate->type = LQ_VHT_SISO;
+		rate->bw = rs_bw_from_sta_bw(sta);
 	} else if (sta->ht_cap.ht_supported &&
 		   best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) {
 		initial_rates = rs_optimal_rates_ht;
@@ -3057,6 +3090,9 @@ void iwl_mvm_update_frame_stats(struct iwl_mvm *mvm, u32 rate, bool agg)
 	case RATE_MCS_CHAN_WIDTH_80:
 		mvm->drv_rx_stats.bw_80_frames++;
 		break;
+	case RATE_MCS_CHAN_WIDTH_160:
+		mvm->drv_rx_stats.bw_160_frames++;
+		break;
 	default:
 		WARN_ONCE(1, "bad BW. rate 0x%x", rate);
 	}
@@ -3705,7 +3741,8 @@ static ssize_t rs_sta_dbgfs_scale_table_read(struct file *file,
 		desc += sprintf(buff + desc, " %s",
 				(is_ht20(rate)) ? "20MHz" :
 				(is_ht40(rate)) ? "40MHz" :
-				(is_ht80(rate)) ? "80Mhz" : "BAD BW");
+				(is_ht80(rate)) ? "80MHz" :
+				(is_ht160(rate)) ? "160MHz" : "BAD BW");
 		desc += sprintf(buff + desc, " %s %s %s %s\n",
 				(rate->sgi) ? "SGI" : "NGI",
 				(rate->ldpc) ? "LDPC" : "BCC",
@@ -3787,9 +3824,10 @@ static ssize_t rs_sta_dbgfs_stats_table_read(struct file *file,
 				lq_sta->active_tbl == i ? "*" : "x",
 				rate->type,
 				rate->sgi,
-				is_ht20(rate) ? "20Mhz" :
-				is_ht40(rate) ? "40Mhz" :
-				is_ht80(rate) ? "80Mhz" : "ERR",
+				is_ht20(rate) ? "20MHz" :
+				is_ht40(rate) ? "40MHz" :
+				is_ht80(rate) ? "80MHz" :
+				is_ht160(rate) ? "160MHz" : "ERR",
 				rate->index);
 		for (j = 0; j < IWL_RATE_COUNT; j++) {
 			desc += sprintf(buff+desc,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
index 90d046fb24a0..ee207f2c0a90 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
@@ -205,6 +205,7 @@ struct rs_rate {
 #define is_ht20(rate)         ((rate)->bw == RATE_MCS_CHAN_WIDTH_20)
 #define is_ht40(rate)         ((rate)->bw == RATE_MCS_CHAN_WIDTH_40)
 #define is_ht80(rate)         ((rate)->bw == RATE_MCS_CHAN_WIDTH_80)
+#define is_ht160(rate)        ((rate)->bw == RATE_MCS_CHAN_WIDTH_160)
 
 #define IWL_MAX_MCS_DISPLAY_SIZE	12
 
@@ -362,7 +363,7 @@ void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 
 /* Notify RS about Tx status */
 void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
-			  int tid, struct ieee80211_tx_info *info);
+			  int tid, struct ieee80211_tx_info *info, bool ndp);
 
 /**
  * iwl_rate_control_register - Register the rate control algorithm callbacks
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index ab7f7eda9c13..0e60e38b2acf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -101,7 +101,7 @@ static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
 					    struct napi_struct *napi,
 					    struct sk_buff *skb,
 					    struct ieee80211_hdr *hdr, u16 len,
-					    u32 ampdu_status, u8 crypt_len,
+					    u8 crypt_len,
 					    struct iwl_rx_cmd_buffer *rxb)
 {
 	unsigned int hdrlen, fraglen;
@@ -268,7 +268,6 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi,
 	struct ieee80211_sta *sta = NULL;
 	struct sk_buff *skb;
 	u32 len;
-	u32 ampdu_status;
 	u32 rate_n_flags;
 	u32 rx_pkt_status;
 	u8 crypt_len = 0;
@@ -354,13 +353,22 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi,
 
 	if (sta) {
 		struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
+		struct ieee80211_vif *tx_blocked_vif =
+			rcu_dereference(mvm->csa_tx_blocked_vif);
 
 		/* We have tx blocked stations (with CS bit). If we heard
 		 * frames from a blocked station on a new channel we can
 		 * TX to it again.
 		 */
-		if (unlikely(mvm->csa_tx_block_bcn_timeout))
-			iwl_mvm_sta_modify_disable_tx_ap(mvm, sta, false);
+		if (unlikely(tx_blocked_vif) &&
+		    mvmsta->vif == tx_blocked_vif) {
+			struct iwl_mvm_vif *mvmvif =
+				iwl_mvm_vif_from_mac80211(tx_blocked_vif);
+
+			if (mvmvif->csa_target_freq == rx_status->freq)
+				iwl_mvm_sta_modify_disable_tx_ap(mvm, sta,
+								 false);
+		}
 
 		rs_update_last_rssi(mvm, &mvmsta->lq_sta, rx_status);
 
@@ -471,7 +479,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi,
 		iwl_mvm_ref(mvm, IWL_MVM_REF_RX);
 
 	iwl_mvm_pass_packet_to_mac80211(mvm, sta, napi, skb, hdr, len,
-					ampdu_status, crypt_len, rxb);
+					crypt_len, rxb);
 
 	if (take_ref)
 		iwl_mvm_unref(mvm, IWL_MVM_REF_RX);
@@ -490,6 +498,7 @@ struct iwl_mvm_stat_data {
 	__le32 mac_id;
 	u8 beacon_filter_average_energy;
 	struct mvm_statistics_general_v8 *general;
+	struct mvm_statistics_load *load;
 };
 
 static void iwl_mvm_stat_iterator(void *_data, u8 *mac,
@@ -606,13 +615,15 @@ iwl_mvm_rx_stats_check_trigger(struct iwl_mvm *mvm, struct iwl_rx_packet *pkt)
 void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm,
 				  struct iwl_rx_packet *pkt)
 {
-	struct iwl_notif_statistics_v10 *stats = (void *)&pkt->data;
+	struct iwl_notif_statistics_v11 *stats = (void *)&pkt->data;
 	struct iwl_mvm_stat_data data = {
 		.mvm = mvm,
 	};
+	int expected_size = iwl_mvm_has_new_rx_api(mvm) ? sizeof(*stats) :
+			    sizeof(struct iwl_notif_statistics_v10);
 	u32 temperature;
 
-	if (iwl_rx_packet_payload_len(pkt) != sizeof(*stats))
+	if (iwl_rx_packet_payload_len(pkt) != expected_size)
 		goto invalid;
 
 	temperature = le32_to_cpu(stats->general.radio_temperature);
@@ -630,6 +641,25 @@ void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm,
 		le64_to_cpu(stats->general.on_time_scan);
 
 	data.general = &stats->general;
+	if (iwl_mvm_has_new_rx_api(mvm)) {
+		int i;
+
+		data.load = &stats->load_stats;
+
+		rcu_read_lock();
+		for (i = 0; i < IWL_MVM_STATION_COUNT; i++) {
+			struct iwl_mvm_sta *sta;
+
+			if (!data.load->avg_energy[i])
+				continue;
+
+			sta = iwl_mvm_sta_from_staid_rcu(mvm, i);
+			if (!sta)
+				continue;
+			sta->avg_energy = data.load->avg_energy[i];
+		}
+		rcu_read_unlock();
+	}
 
 	iwl_mvm_rx_stats_check_trigger(mvm, pkt);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index ac2c5718e454..df6c32caa5f0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -65,19 +65,6 @@
 #include "fw-api.h"
 #include "fw-dbg.h"
 
-void iwl_mvm_rx_phy_cmd_mq(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
-{
-	mvm->ampdu_ref++;
-
-#ifdef CONFIG_IWLWIFI_DEBUGFS
-	if (mvm->last_phy_info.phy_flags & cpu_to_le16(RX_RES_PHY_FLAGS_AGG)) {
-		spin_lock(&mvm->drv_stats_lock);
-		mvm->drv_rx_stats.ampdu_count++;
-		spin_unlock(&mvm->drv_stats_lock);
-	}
-#endif
-}
-
 static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
 				   int queue, struct ieee80211_sta *sta)
 {
@@ -489,6 +476,9 @@ void iwl_mvm_reorder_timer_expired(unsigned long data)
 		rcu_read_lock();
 		sta = rcu_dereference(buf->mvm->fw_id_to_mac_id[buf->sta_id]);
 		/* SN is set to the last expired frame + 1 */
+		IWL_DEBUG_HT(buf->mvm,
+			     "Releasing expired frames for sta %u, sn %d\n",
+			     buf->sta_id, sn);
 		iwl_mvm_release_frames(buf->mvm, sta, NULL, buf, sn);
 		rcu_read_unlock();
 	} else if (buf->num_stored) {
@@ -581,12 +571,14 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
 			    struct iwl_rx_mpdu_desc *desc)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+	struct iwl_mvm_sta *mvm_sta;
 	struct iwl_mvm_baid_data *baid_data;
 	struct iwl_mvm_reorder_buffer *buffer;
 	struct sk_buff *tail;
 	u32 reorder = le32_to_cpu(desc->reorder_data);
 	bool amsdu = desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU;
+	bool last_subframe =
+		desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME;
 	u8 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 	u8 sub_frame_idx = desc->amsdu_info &
 			   IWL_RX_MPDU_AMSDU_SUBFRAME_IDX_MASK;
@@ -604,6 +596,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
 	if (WARN_ON(IS_ERR_OR_NULL(sta)))
 		return false;
 
+	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+
 	/* not a data packet */
 	if (!ieee80211_is_data_qos(hdr->frame_control) ||
 	    is_multicast_ether_addr(hdr->addr1))
@@ -651,7 +645,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
 	/* release immediately if allowed by nssn and no stored frames */
 	if (!buffer->num_stored && ieee80211_sn_less(sn, nssn)) {
 		if (iwl_mvm_is_sn_less(buffer->head_sn, nssn,
-				       buffer->buf_size))
+				       buffer->buf_size) &&
+		   (!amsdu || last_subframe))
 			buffer->head_sn = nssn;
 		/* No need to update AMSDU last SN - we are moving the head */
 		spin_unlock_bh(&buffer->lock);
@@ -685,7 +680,20 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
 		buffer->last_sub_index = sub_frame_idx;
 	}
 
-	iwl_mvm_release_frames(mvm, sta, napi, buffer, nssn);
+	/*
+	 * We cannot trust NSSN for AMSDU sub-frames that are not the last.
+	 * The reason is that NSSN advances on the first sub-frame, and may
+	 * cause the reorder buffer to advance before all the sub-frames arrive.
+	 * Example: reorder buffer contains SN 0 & 2, and we receive AMSDU with
+	 * SN 1. NSSN for first sub frame will be 3 with the result of driver
+	 * releasing SN 0,1, 2. When sub-frame 1 arrives - reorder buffer is
+	 * already ahead and it will be dropped.
+	 * If the last sub-frame is not on this queue - we will get frame
+	 * release notification with up to date NSSN.
+	 */
+	if (!amsdu || last_subframe)
+		iwl_mvm_release_frames(mvm, sta, napi, buffer, nssn);
+
 	spin_unlock_bh(&buffer->lock);
 	return true;
 
@@ -734,6 +742,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 	struct ieee80211_hdr *hdr = (void *)(pkt->data + sizeof(*desc));
 	u32 len = le16_to_cpu(desc->mpdu_len);
 	u32 rate_n_flags = le32_to_cpu(desc->rate_n_flags);
+	u16 phy_info = le16_to_cpu(desc->phy_info);
 	struct ieee80211_sta *sta = NULL;
 	struct sk_buff *skb;
 	u8 crypt_len = 0;
@@ -764,16 +773,34 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 			     le16_to_cpu(desc->status));
 		rx_status->flag |= RX_FLAG_FAILED_FCS_CRC;
 	}
-
-	rx_status->mactime = le64_to_cpu(desc->tsf_on_air_rise);
+	/* set the preamble flag if appropriate */
+	if (phy_info & IWL_RX_MPDU_PHY_SHORT_PREAMBLE)
+		rx_status->flag |= RX_FLAG_SHORTPRE;
+
+	if (likely(!(phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD))) {
+		rx_status->mactime = le64_to_cpu(desc->tsf_on_air_rise);
+		/* TSF as indicated by the firmware is at INA time */
+		rx_status->flag |= RX_FLAG_MACTIME_PLCP_START;
+	}
 	rx_status->device_timestamp = le32_to_cpu(desc->gp2_on_air_rise);
 	rx_status->band = desc->channel > 14 ? NL80211_BAND_5GHZ :
 					       NL80211_BAND_2GHZ;
 	rx_status->freq = ieee80211_channel_to_frequency(desc->channel,
 							 rx_status->band);
 	iwl_mvm_get_signal_strength(mvm, desc, rx_status);
-	/* TSF as indicated by the firmware is at INA time */
-	rx_status->flag |= RX_FLAG_MACTIME_PLCP_START;
+
+	/* update aggregation data for monitor sake on default queue */
+	if (!queue && (phy_info & IWL_RX_MPDU_PHY_AMPDU)) {
+		bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE;
+
+		rx_status->flag |= RX_FLAG_AMPDU_DETAILS;
+		rx_status->ampdu_reference = mvm->ampdu_ref;
+		/* toggle is switched whenever new aggregation starts */
+		if (toggle_bit != mvm->ampdu_toggle) {
+			mvm->ampdu_ref++;
+			mvm->ampdu_toggle = toggle_bit;
+		}
+	}
 
 	rcu_read_lock();
 
@@ -795,6 +822,8 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 
 	if (sta) {
 		struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
+		struct ieee80211_vif *tx_blocked_vif =
+			rcu_dereference(mvm->csa_tx_blocked_vif);
 		u8 baid = (u8)((le32_to_cpu(desc->reorder_data) &
 			       IWL_RX_MPDU_REORDER_BAID_MASK) >>
 			       IWL_RX_MPDU_REORDER_BAID_SHIFT);
@@ -804,8 +833,15 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 		 * frames from a blocked station on a new channel we can
 		 * TX to it again.
 		 */
-		if (unlikely(mvm->csa_tx_block_bcn_timeout))
-			iwl_mvm_sta_modify_disable_tx_ap(mvm, sta, false);
+		if (unlikely(tx_blocked_vif) &&
+		    tx_blocked_vif == mvmsta->vif) {
+			struct iwl_mvm_vif *mvmvif =
+				iwl_mvm_vif_from_mac80211(tx_blocked_vif);
+
+			if (mvmvif->csa_target_freq == rx_status->freq)
+				iwl_mvm_sta_modify_disable_tx_ap(mvm, sta,
+								 false);
+		}
 
 		rs_update_last_rssi(mvm, &mvmsta->lq_sta, rx_status);
 
@@ -828,8 +864,6 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 				iwl_mvm_fw_dbg_collect_trig(mvm, trig, NULL);
 		}
 
-		/* TODO: multi queue TCM */
-
 		if (ieee80211_is_data(hdr->frame_control))
 			iwl_mvm_rx_csum(sta, skb, desc);
 
@@ -854,14 +888,6 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 			iwl_mvm_agg_rx_received(mvm, baid);
 	}
 
-	/*
-	 * TODO: PHY info.
-	 * Verify we don't have the information in the MPDU descriptor and
-	 * that it is not needed.
-	 * Make sure for monitor mode that we are on default queue, update
-	 * ampdu_ref and the rest of phy info then
-	 */
-
 	/* Set up the HT phy flags */
 	switch (rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK) {
 	case RATE_MCS_CHAN_WIDTH_20:
@@ -905,8 +931,18 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 							    rx_status->band);
 	}
 
-	/* TODO: PHY info - update ampdu queue statistics (for debugfs) */
-	/* TODO: PHY info - gscan */
+	/* management stuff on default queue */
+	if (!queue) {
+		if (unlikely((ieee80211_is_beacon(hdr->frame_control) ||
+			      ieee80211_is_probe_resp(hdr->frame_control)) &&
+			     mvm->sched_scan_pass_all ==
+			     SCHED_SCAN_PASS_ALL_ENABLED))
+			mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_FOUND;
+
+		if (unlikely(ieee80211_is_beacon(hdr->frame_control) ||
+			     ieee80211_is_probe_resp(hdr->frame_control)))
+			rx_status->boottime_ns = ktime_get_boot_ns();
+	}
 
 	iwl_mvm_create_skb(skb, hdr, len, crypt_len, rxb);
 	if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc))
@@ -925,6 +961,9 @@ void iwl_mvm_rx_frame_release(struct iwl_mvm *mvm, struct napi_struct *napi,
 
 	int baid = release->baid;
 
+	IWL_DEBUG_HT(mvm, "Frame release notification for BAID %u, NSSN %d\n",
+		     release->baid, le16_to_cpu(release->nssn));
+
 	if (WARN_ON_ONCE(baid == IWL_RX_REORDER_DATA_INVALID_BAID))
 		return;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 6f609dd5c222..dac120f8861b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -391,15 +391,18 @@ void iwl_mvm_rx_lmac_scan_complete_notif(struct iwl_mvm *mvm,
 		ieee80211_sched_scan_stopped(mvm->hw);
 		mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_DISABLED;
 	} else if (mvm->scan_status & IWL_MVM_SCAN_REGULAR) {
+		struct cfg80211_scan_info info = {
+			.aborted = aborted,
+		};
+
 		IWL_DEBUG_SCAN(mvm, "Regular scan %s, EBS status %s (FW)\n",
 			       aborted ? "aborted" : "completed",
 			       iwl_mvm_ebs_status_str(scan_notif->ebs_status));
 
 		mvm->scan_status &= ~IWL_MVM_SCAN_REGULAR;
-		ieee80211_scan_completed(mvm->hw,
-				scan_notif->status == IWL_SCAN_OFFLOAD_ABORTED);
+		ieee80211_scan_completed(mvm->hw, &info);
 		iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN);
-		del_timer(&mvm->scan_timer);
+		cancel_delayed_work(&mvm->scan_timeout_dwork);
 	} else {
 		IWL_ERR(mvm,
 			"got scan complete notification but no scan is running\n");
@@ -1222,15 +1225,16 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
 	return -EIO;
 }
 
-#define SCAN_TIMEOUT (16 * HZ)
+#define SCAN_TIMEOUT 20000
 
-void iwl_mvm_scan_timeout(unsigned long data)
+void iwl_mvm_scan_timeout_wk(struct work_struct *work)
 {
-	struct iwl_mvm *mvm = (struct iwl_mvm *)data;
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct iwl_mvm *mvm = container_of(delayed_work, struct iwl_mvm,
+					   scan_timeout_dwork);
 
 	IWL_ERR(mvm, "regular scan timed out\n");
 
-	del_timer(&mvm->scan_timer);
 	iwl_force_nmi(mvm->trans);
 }
 
@@ -1313,7 +1317,8 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	mvm->scan_status |= IWL_MVM_SCAN_REGULAR;
 	iwl_mvm_ref(mvm, IWL_MVM_REF_SCAN);
 
-	mod_timer(&mvm->scan_timer, jiffies + SCAN_TIMEOUT);
+	queue_delayed_work(system_wq, &mvm->scan_timeout_dwork,
+			   msecs_to_jiffies(SCAN_TIMEOUT));
 
 	return 0;
 }
@@ -1430,9 +1435,13 @@ void iwl_mvm_rx_umac_scan_complete_notif(struct iwl_mvm *mvm,
 
 	/* if the scan is already stopping, we don't need to notify mac80211 */
 	if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_REGULAR) {
-		ieee80211_scan_completed(mvm->hw, aborted);
+		struct cfg80211_scan_info info = {
+			.aborted = aborted,
+		};
+
+		ieee80211_scan_completed(mvm->hw, &info);
 		iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN);
-		del_timer(&mvm->scan_timer);
+		cancel_delayed_work(&mvm->scan_timeout_dwork);
 	} else if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_SCHED) {
 		ieee80211_sched_scan_stopped(mvm->hw);
 		mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_DISABLED;
@@ -1564,7 +1573,11 @@ void iwl_mvm_report_scan_aborted(struct iwl_mvm *mvm)
 
 		uid = iwl_mvm_scan_uid_by_status(mvm, IWL_MVM_SCAN_REGULAR);
 		if (uid >= 0) {
-			ieee80211_scan_completed(mvm->hw, true);
+			struct cfg80211_scan_info info = {
+				.aborted = true,
+			};
+
+			ieee80211_scan_completed(mvm->hw, &info);
 			mvm->scan_uid_status[uid] = 0;
 		}
 		uid = iwl_mvm_scan_uid_by_status(mvm, IWL_MVM_SCAN_SCHED);
@@ -1585,8 +1598,13 @@ void iwl_mvm_report_scan_aborted(struct iwl_mvm *mvm)
 				mvm->scan_uid_status[i] = 0;
 		}
 	} else {
-		if (mvm->scan_status & IWL_MVM_SCAN_REGULAR)
-			ieee80211_scan_completed(mvm->hw, true);
+		if (mvm->scan_status & IWL_MVM_SCAN_REGULAR) {
+			struct cfg80211_scan_info info = {
+				.aborted = true,
+			};
+
+			ieee80211_scan_completed(mvm->hw, &info);
+		}
 
 		/* Sched scan will be restarted by mac80211 in
 		 * restart_hw, so do not report if FW is about to be
@@ -1628,9 +1646,14 @@ out:
 		 * to release the scan reference here.
 		 */
 		iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN);
-		del_timer(&mvm->scan_timer);
-		if (notify)
-			ieee80211_scan_completed(mvm->hw, true);
+		cancel_delayed_work(&mvm->scan_timeout_dwork);
+		if (notify) {
+			struct cfg80211_scan_info info = {
+				.aborted = true,
+			};
+
+			ieee80211_scan_completed(mvm->hw, &info);
+		}
 	} else if (notify) {
 		ieee80211_sched_scan_stopped(mvm->hw);
 		mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_DISABLED;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sf.c b/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
index 443a42855c9e..101fb04a8573 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
@@ -215,7 +215,7 @@ static int iwl_mvm_sf_config(struct iwl_mvm *mvm, u8 sta_id,
 			     enum iwl_sf_state new_state)
 {
 	struct iwl_sf_cfg_cmd sf_cmd = {
-		.state = cpu_to_le32(SF_FULL_ON),
+		.state = cpu_to_le32(new_state),
 	};
 	struct ieee80211_sta *sta;
 	int ret = 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index fea4d3437e2f..3130b9c68a74 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -310,6 +310,304 @@ static void iwl_mvm_tdls_sta_deinit(struct iwl_mvm *mvm,
 		iwl_mvm_disable_txq(mvm, i, i, IWL_MAX_TID_COUNT, 0);
 }
 
+/* Disable aggregations for a bitmap of TIDs for a given station */
+static int iwl_mvm_invalidate_sta_queue(struct iwl_mvm *mvm, int queue,
+					unsigned long disable_agg_tids,
+					bool remove_queue)
+{
+	struct iwl_mvm_add_sta_cmd cmd = {};
+	struct ieee80211_sta *sta;
+	struct iwl_mvm_sta *mvmsta;
+	u32 status;
+	u8 sta_id;
+	int ret;
+
+	spin_lock_bh(&mvm->queue_info_lock);
+	sta_id = mvm->queue_info[queue].ra_sta_id;
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	rcu_read_lock();
+
+	sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]);
+
+	if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta))) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	mvmsta = iwl_mvm_sta_from_mac80211(sta);
+
+	mvmsta->tid_disable_agg |= disable_agg_tids;
+
+	cmd.mac_id_n_color = cpu_to_le32(mvmsta->mac_id_n_color);
+	cmd.sta_id = mvmsta->sta_id;
+	cmd.add_modify = STA_MODE_MODIFY;
+	cmd.modify_mask = STA_MODIFY_QUEUES;
+	if (disable_agg_tids)
+		cmd.modify_mask |= STA_MODIFY_TID_DISABLE_TX;
+	if (remove_queue)
+		cmd.modify_mask |= STA_MODIFY_QUEUE_REMOVAL;
+	cmd.tfd_queue_msk = cpu_to_le32(mvmsta->tfd_queue_msk);
+	cmd.tid_disable_tx = cpu_to_le16(mvmsta->tid_disable_agg);
+
+	rcu_read_unlock();
+
+	/* Notify FW of queue removal from the STA queues */
+	status = ADD_STA_SUCCESS;
+	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA,
+					  iwl_mvm_add_sta_cmd_size(mvm),
+					  &cmd, &status);
+
+	return ret;
+}
+
+static int iwl_mvm_get_queue_agg_tids(struct iwl_mvm *mvm, int queue)
+{
+	struct ieee80211_sta *sta;
+	struct iwl_mvm_sta *mvmsta;
+	unsigned long tid_bitmap;
+	unsigned long agg_tids = 0;
+	s8 sta_id;
+	int tid;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	spin_lock_bh(&mvm->queue_info_lock);
+	sta_id = mvm->queue_info[queue].ra_sta_id;
+	tid_bitmap = mvm->queue_info[queue].tid_bitmap;
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
+					lockdep_is_held(&mvm->mutex));
+
+	if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta)))
+		return -EINVAL;
+
+	mvmsta = iwl_mvm_sta_from_mac80211(sta);
+
+	spin_lock_bh(&mvmsta->lock);
+	for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1) {
+		if (mvmsta->tid_data[tid].state == IWL_AGG_ON)
+			agg_tids |= BIT(tid);
+	}
+	spin_unlock_bh(&mvmsta->lock);
+
+	return agg_tids;
+}
+
+/*
+ * Remove a queue from a station's resources.
+ * Note that this only marks as free. It DOESN'T delete a BA agreement, and
+ * doesn't disable the queue
+ */
+static int iwl_mvm_remove_sta_queue_marking(struct iwl_mvm *mvm, int queue)
+{
+	struct ieee80211_sta *sta;
+	struct iwl_mvm_sta *mvmsta;
+	unsigned long tid_bitmap;
+	unsigned long disable_agg_tids = 0;
+	u8 sta_id;
+	int tid;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	spin_lock_bh(&mvm->queue_info_lock);
+	sta_id = mvm->queue_info[queue].ra_sta_id;
+	tid_bitmap = mvm->queue_info[queue].tid_bitmap;
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	rcu_read_lock();
+
+	sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]);
+
+	if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta))) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	mvmsta = iwl_mvm_sta_from_mac80211(sta);
+
+	spin_lock_bh(&mvmsta->lock);
+	/* Unmap MAC queues and TIDs from this queue */
+	for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1) {
+		if (mvmsta->tid_data[tid].state == IWL_AGG_ON)
+			disable_agg_tids |= BIT(tid);
+		mvmsta->tid_data[tid].txq_id = IEEE80211_INVAL_HW_QUEUE;
+	}
+
+	mvmsta->tfd_queue_msk &= ~BIT(queue); /* Don't use this queue anymore */
+	spin_unlock_bh(&mvmsta->lock);
+
+	rcu_read_unlock();
+
+	spin_lock_bh(&mvm->queue_info_lock);
+	/* Unmap MAC queues and TIDs from this queue */
+	mvm->queue_info[queue].hw_queue_to_mac80211 = 0;
+	mvm->queue_info[queue].hw_queue_refcount = 0;
+	mvm->queue_info[queue].tid_bitmap = 0;
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	return disable_agg_tids;
+}
+
+static int iwl_mvm_get_shared_queue(struct iwl_mvm *mvm,
+				    unsigned long tfd_queue_mask, u8 ac)
+{
+	int queue = 0;
+	u8 ac_to_queue[IEEE80211_NUM_ACS];
+	int i;
+
+	lockdep_assert_held(&mvm->queue_info_lock);
+
+	memset(&ac_to_queue, IEEE80211_INVAL_HW_QUEUE, sizeof(ac_to_queue));
+
+	/* See what ACs the existing queues for this STA have */
+	for_each_set_bit(i, &tfd_queue_mask, IWL_MVM_DQA_MAX_DATA_QUEUE) {
+		/* Only DATA queues can be shared */
+		if (i < IWL_MVM_DQA_MIN_DATA_QUEUE &&
+		    i != IWL_MVM_DQA_BSS_CLIENT_QUEUE)
+			continue;
+
+		ac_to_queue[mvm->queue_info[i].mac80211_ac] = i;
+	}
+
+	/*
+	 * The queue to share is chosen only from DATA queues as follows (in
+	 * descending priority):
+	 * 1. An AC_BE queue
+	 * 2. Same AC queue
+	 * 3. Highest AC queue that is lower than new AC
+	 * 4. Any existing AC (there always is at least 1 DATA queue)
+	 */
+
+	/* Priority 1: An AC_BE queue */
+	if (ac_to_queue[IEEE80211_AC_BE] != IEEE80211_INVAL_HW_QUEUE)
+		queue = ac_to_queue[IEEE80211_AC_BE];
+	/* Priority 2: Same AC queue */
+	else if (ac_to_queue[ac] != IEEE80211_INVAL_HW_QUEUE)
+		queue = ac_to_queue[ac];
+	/* Priority 3a: If new AC is VO and VI exists - use VI */
+	else if (ac == IEEE80211_AC_VO &&
+		 ac_to_queue[IEEE80211_AC_VI] != IEEE80211_INVAL_HW_QUEUE)
+		queue = ac_to_queue[IEEE80211_AC_VI];
+	/* Priority 3b: No BE so only AC less than the new one is BK */
+	else if (ac_to_queue[IEEE80211_AC_BK] != IEEE80211_INVAL_HW_QUEUE)
+		queue = ac_to_queue[IEEE80211_AC_BK];
+	/* Priority 4a: No BE nor BK - use VI if exists */
+	else if (ac_to_queue[IEEE80211_AC_VI] != IEEE80211_INVAL_HW_QUEUE)
+		queue = ac_to_queue[IEEE80211_AC_VI];
+	/* Priority 4b: No BE, BK nor VI - use VO if exists */
+	else if (ac_to_queue[IEEE80211_AC_VO] != IEEE80211_INVAL_HW_QUEUE)
+		queue = ac_to_queue[IEEE80211_AC_VO];
+
+	/* Make sure queue found (or not) is legal */
+	if (!((queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE &&
+	       queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE) ||
+	      (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE &&
+	       queue <= IWL_MVM_DQA_MAX_DATA_QUEUE) ||
+	      (queue == IWL_MVM_DQA_BSS_CLIENT_QUEUE))) {
+		IWL_ERR(mvm, "No DATA queues available to share\n");
+		queue = -ENOSPC;
+	}
+
+	return queue;
+}
+
+/*
+ * If a given queue has a higher AC than the TID stream that is being added to
+ * it, the queue needs to be redirected to the lower AC. This function does that
+ * in such a case, otherwise - if no redirection required - it does nothing,
+ * unless the %force param is true.
+ */
+static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid,
+				      int ac, int ssn, unsigned int wdg_timeout,
+				      bool force)
+{
+	struct iwl_scd_txq_cfg_cmd cmd = {
+		.scd_queue = queue,
+		.enable = 0,
+	};
+	bool shared_queue;
+	unsigned long mq;
+	int ret;
+
+	/*
+	 * If the AC is lower than current one - FIFO needs to be redirected to
+	 * the lowest one of the streams in the queue. Check if this is needed
+	 * here.
+	 * Notice that the enum ieee80211_ac_numbers is "flipped", so BK is with
+	 * value 3 and VO with value 0, so to check if ac X is lower than ac Y
+	 * we need to check if the numerical value of X is LARGER than of Y.
+	 */
+	spin_lock_bh(&mvm->queue_info_lock);
+	if (ac <= mvm->queue_info[queue].mac80211_ac && !force) {
+		spin_unlock_bh(&mvm->queue_info_lock);
+
+		IWL_DEBUG_TX_QUEUES(mvm,
+				    "No redirection needed on TXQ #%d\n",
+				    queue);
+		return 0;
+	}
+
+	cmd.sta_id = mvm->queue_info[queue].ra_sta_id;
+	cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[mvm->queue_info[queue].mac80211_ac];
+	mq = mvm->queue_info[queue].hw_queue_to_mac80211;
+	shared_queue = (mvm->queue_info[queue].hw_queue_refcount > 1);
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	IWL_DEBUG_TX_QUEUES(mvm, "Redirecting shared TXQ #%d to FIFO #%d\n",
+			    queue, iwl_mvm_ac_to_tx_fifo[ac]);
+
+	/* Stop MAC queues and wait for this queue to empty */
+	iwl_mvm_stop_mac_queues(mvm, mq);
+	ret = iwl_trans_wait_tx_queue_empty(mvm->trans, BIT(queue));
+	if (ret) {
+		IWL_ERR(mvm, "Error draining queue %d before reconfig\n",
+			queue);
+		ret = -EIO;
+		goto out;
+	}
+
+	/* Before redirecting the queue we need to de-activate it */
+	iwl_trans_txq_disable(mvm->trans, queue, false);
+	ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd);
+	if (ret)
+		IWL_ERR(mvm, "Failed SCD disable TXQ %d (ret=%d)\n", queue,
+			ret);
+
+	/* Make sure the SCD wrptr is correctly set before reconfiguring */
+	iwl_trans_txq_enable(mvm->trans, queue, iwl_mvm_ac_to_tx_fifo[ac],
+			     cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF,
+			     ssn, wdg_timeout);
+
+	/* TODO: Work-around SCD bug when moving back by multiples of 0x40 */
+
+	/* Redirect to lower AC */
+	iwl_mvm_reconfig_scd(mvm, queue, iwl_mvm_ac_to_tx_fifo[ac],
+			     cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF,
+			     ssn);
+
+	/* Update AC marking of the queue */
+	spin_lock_bh(&mvm->queue_info_lock);
+	mvm->queue_info[queue].mac80211_ac = ac;
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	/*
+	 * Mark queue as shared in transport if shared
+	 * Note this has to be done after queue enablement because enablement
+	 * can also set this value, and there is no indication there to shared
+	 * queues
+	 */
+	if (shared_queue)
+		iwl_trans_txq_set_shared_mode(mvm->trans, queue, true);
+
+out:
+	/* Continue using the MAC queues */
+	iwl_mvm_start_mac_queues(mvm, mq);
+
+	return ret;
+}
+
 static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
 				   struct ieee80211_sta *sta, u8 ac, int tid,
 				   struct ieee80211_hdr *hdr)
@@ -325,11 +623,20 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
 		iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false);
 	u8 mac_queue = mvmsta->vif->hw_queue[ac];
 	int queue = -1;
+	bool using_inactive_queue = false;
+	unsigned long disable_agg_tids = 0;
+	enum iwl_mvm_agg_state queue_state;
+	bool shared_queue = false;
 	int ssn;
+	unsigned long tfd_queue_mask;
 	int ret;
 
 	lockdep_assert_held(&mvm->mutex);
 
+	spin_lock_bh(&mvmsta->lock);
+	tfd_queue_mask = mvmsta->tfd_queue_msk;
+	spin_unlock_bh(&mvmsta->lock);
+
 	spin_lock_bh(&mvm->queue_info_lock);
 
 	/*
@@ -338,7 +645,8 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
 	 */
 	if (!ieee80211_is_data_qos(hdr->frame_control) ||
 	    ieee80211_is_qos_nullfunc(hdr->frame_control)) {
-		queue = iwl_mvm_find_free_queue(mvm, IWL_MVM_DQA_MIN_MGMT_QUEUE,
+		queue = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id,
+						IWL_MVM_DQA_MIN_MGMT_QUEUE,
 						IWL_MVM_DQA_MAX_MGMT_QUEUE);
 		if (queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE)
 			IWL_DEBUG_TX_QUEUES(mvm, "Found free MGMT queue #%d\n",
@@ -347,29 +655,62 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
 		/* If no such queue is found, we'll use a DATA queue instead */
 	}
 
-	if (queue < 0 && mvmsta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) {
+	if ((queue < 0 && mvmsta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) &&
+	    (mvm->queue_info[mvmsta->reserved_queue].status ==
+	     IWL_MVM_QUEUE_RESERVED ||
+	     mvm->queue_info[mvmsta->reserved_queue].status ==
+	     IWL_MVM_QUEUE_INACTIVE)) {
 		queue = mvmsta->reserved_queue;
+		mvm->queue_info[queue].reserved = true;
 		IWL_DEBUG_TX_QUEUES(mvm, "Using reserved queue #%d\n", queue);
 	}
 
 	if (queue < 0)
-		queue = iwl_mvm_find_free_queue(mvm, IWL_MVM_DQA_MIN_DATA_QUEUE,
+		queue = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id,
+						IWL_MVM_DQA_MIN_DATA_QUEUE,
 						IWL_MVM_DQA_MAX_DATA_QUEUE);
 
+	/*
+	 * Check if this queue is already allocated but inactive.
+	 * In such a case, we'll need to first free this queue before enabling
+	 * it again, so we'll mark it as reserved to make sure no new traffic
+	 * arrives on it
+	 */
+	if (queue > 0 &&
+	    mvm->queue_info[queue].status == IWL_MVM_QUEUE_INACTIVE) {
+		mvm->queue_info[queue].status = IWL_MVM_QUEUE_RESERVED;
+		using_inactive_queue = true;
+		IWL_DEBUG_TX_QUEUES(mvm,
+				    "Re-assigning TXQ %d: sta_id=%d, tid=%d\n",
+				    queue, mvmsta->sta_id, tid);
+	}
+
+	/* No free queue - we'll have to share */
+	if (queue <= 0) {
+		queue = iwl_mvm_get_shared_queue(mvm, tfd_queue_mask, ac);
+		if (queue > 0) {
+			shared_queue = true;
+			mvm->queue_info[queue].status = IWL_MVM_QUEUE_SHARED;
+		}
+	}
+
 	/*
 	 * Mark TXQ as ready, even though it hasn't been fully configured yet,
 	 * to make sure no one else takes it.
 	 * This will allow avoiding re-acquiring the lock at the end of the
 	 * configuration. On error we'll mark it back as free.
 	 */
-	if (queue >= 0)
+	if ((queue > 0) && !shared_queue)
 		mvm->queue_info[queue].status = IWL_MVM_QUEUE_READY;
 
 	spin_unlock_bh(&mvm->queue_info_lock);
 
-	/* TODO: support shared queues for same RA */
-	if (queue < 0)
+	/* This shouldn't happen - out of queues */
+	if (WARN_ON(queue <= 0)) {
+		IWL_ERR(mvm, "No available queues for tid %d on sta_id %d\n",
+			tid, cfg.sta_id);
 		return -ENOSPC;
+	}
 
 	/*
 	 * Actual en/disablement of aggregations is through the ADD_STA HCMD,
@@ -380,24 +721,103 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
 	cfg.aggregate = (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE ||
 			 queue == IWL_MVM_DQA_BSS_CLIENT_QUEUE);
 
-	IWL_DEBUG_TX_QUEUES(mvm, "Allocating queue #%d to sta %d on tid %d\n",
-			    queue, mvmsta->sta_id, tid);
+	/*
+	 * If this queue was previously inactive (idle) - we need to free it
+	 * first
+	 */
+	if (using_inactive_queue) {
+		struct iwl_scd_txq_cfg_cmd cmd = {
+			.scd_queue = queue,
+			.enable = 0,
+		};
+		u8 ac;
+
+		disable_agg_tids = iwl_mvm_remove_sta_queue_marking(mvm, queue);
+
+		spin_lock_bh(&mvm->queue_info_lock);
+		ac = mvm->queue_info[queue].mac80211_ac;
+		cmd.sta_id = mvm->queue_info[queue].ra_sta_id;
+		cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[ac];
+		spin_unlock_bh(&mvm->queue_info_lock);
+
+		/* Disable the queue */
+		iwl_mvm_invalidate_sta_queue(mvm, queue, disable_agg_tids,
+					     true);
+		iwl_trans_txq_disable(mvm->trans, queue, false);
+		ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd),
+					   &cmd);
+		if (ret) {
+			IWL_ERR(mvm,
+				"Failed to free inactive queue %d (ret=%d)\n",
+				queue, ret);
+
+			/* Re-mark the inactive queue as inactive */
+			spin_lock_bh(&mvm->queue_info_lock);
+			mvm->queue_info[queue].status = IWL_MVM_QUEUE_INACTIVE;
+			spin_unlock_bh(&mvm->queue_info_lock);
+
+			return ret;
+		}
+	}
+
+	IWL_DEBUG_TX_QUEUES(mvm,
+			    "Allocating %squeue #%d to sta %d on tid %d\n",
+			    shared_queue ? "shared " : "", queue,
+			    mvmsta->sta_id, tid);
+
+	if (shared_queue) {
+		/* Disable any open aggs on this queue */
+		disable_agg_tids = iwl_mvm_get_queue_agg_tids(mvm, queue);
+
+		if (disable_agg_tids) {
+			IWL_DEBUG_TX_QUEUES(mvm, "Disabling aggs on queue %d\n",
+					    queue);
+			iwl_mvm_invalidate_sta_queue(mvm, queue,
+						     disable_agg_tids, false);
+		}
+	}
 
 	ssn = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 	iwl_mvm_enable_txq(mvm, queue, mac_queue, ssn, &cfg,
 			   wdg_timeout);
 
+	/*
+	 * Mark queue as shared in transport if shared
+	 * Note this has to be done after queue enablement because enablement
+	 * can also set this value, and there is no indication there to shared
+	 * queues
+	 */
+	if (shared_queue)
+		iwl_trans_txq_set_shared_mode(mvm->trans, queue, true);
+
 	spin_lock_bh(&mvmsta->lock);
 	mvmsta->tid_data[tid].txq_id = queue;
+	mvmsta->tid_data[tid].is_tid_active = true;
 	mvmsta->tfd_queue_msk |= BIT(queue);
+	queue_state = mvmsta->tid_data[tid].state;
 
 	if (mvmsta->reserved_queue == queue)
 		mvmsta->reserved_queue = IEEE80211_INVAL_HW_QUEUE;
 	spin_unlock_bh(&mvmsta->lock);
 
-	ret = iwl_mvm_sta_send_to_fw(mvm, sta, true, STA_MODIFY_QUEUES);
-	if (ret)
-		goto out_err;
+	if (!shared_queue) {
+		ret = iwl_mvm_sta_send_to_fw(mvm, sta, true, STA_MODIFY_QUEUES);
+		if (ret)
+			goto out_err;
+
+		/* If we need to re-enable aggregations... */
+		if (queue_state == IWL_AGG_ON) {
+			ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true);
+			if (ret)
+				goto out_err;
+		}
+	} else {
+		/* Redirect queue, if needed */
+		ret = iwl_mvm_scd_queue_redirect(mvm, queue, tid, ac, ssn,
+						 wdg_timeout, false);
+		if (ret)
+			goto out_err;
+	}
 
 	return 0;
 
@@ -476,6 +896,9 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk)
 	unsigned long deferred_tid_traffic;
 	int sta_id, tid;
 
+	/* Check inactivity of queues */
+	iwl_mvm_inactivity_check(mvm);
+
 	mutex_lock(&mvm->mutex);
 
 	/* Go over all stations with deferred traffic */
@@ -505,6 +928,12 @@ static int iwl_mvm_reserve_sta_stream(struct iwl_mvm *mvm,
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	int queue;
 
+	/*
+	 * Check for inactive queues, so we don't reach a situation where we
+	 * can't add a STA due to a shortage in queues that doesn't really exist
+	 */
+	iwl_mvm_inactivity_check(mvm);
+
 	spin_lock_bh(&mvm->queue_info_lock);
 
 	/* Make sure we have free resources for this STA */
@@ -514,7 +943,8 @@ static int iwl_mvm_reserve_sta_stream(struct iwl_mvm *mvm,
 	     IWL_MVM_QUEUE_FREE))
 		queue = IWL_MVM_DQA_BSS_CLIENT_QUEUE;
 	else
-		queue = iwl_mvm_find_free_queue(mvm, IWL_MVM_DQA_MIN_DATA_QUEUE,
+		queue = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id,
+						IWL_MVM_DQA_MIN_DATA_QUEUE,
 						IWL_MVM_DQA_MAX_DATA_QUEUE);
 	if (queue < 0) {
 		spin_unlock_bh(&mvm->queue_info_lock);
@@ -568,8 +998,11 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 	mvm_sta->tid_disable_agg = 0xffff; /* No aggs at first */
 	mvm_sta->tfd_queue_msk = 0;
 
-	/* allocate new queues for a TDLS station */
-	if (sta->tdls) {
+	/*
+	 * Allocate new queues for a TDLS station, unless we're in DQA mode,
+	 * and then they'll be allocated dynamically
+	 */
+	if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls) {
 		ret = iwl_mvm_tdls_sta_init(mvm, sta);
 		if (ret)
 			return ret;
@@ -633,7 +1066,8 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 	return 0;
 
 err:
-	iwl_mvm_tdls_sta_deinit(mvm, sta);
+	if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls)
+		iwl_mvm_tdls_sta_deinit(mvm, sta);
 	return ret;
 }
 
@@ -819,8 +1253,9 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
 	if (iwl_mvm_has_new_rx_api(mvm))
 		kfree(mvm_sta->dup_data);
 
-	if (vif->type == NL80211_IFTYPE_STATION &&
-	    mvmvif->ap_sta_id == mvm_sta->sta_id) {
+	if ((vif->type == NL80211_IFTYPE_STATION &&
+	     mvmvif->ap_sta_id == mvm_sta->sta_id) ||
+	    iwl_mvm_is_dqa_supported(mvm)){
 		ret = iwl_mvm_drain_sta(mvm, mvm_sta, true);
 		if (ret)
 			return ret;
@@ -838,16 +1273,19 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
 		if (iwl_mvm_is_dqa_supported(mvm))
 			iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta);
 
-		/* if we are associated - we can't remove the AP STA now */
-		if (vif->bss_conf.assoc)
-			return ret;
+		if (vif->type == NL80211_IFTYPE_STATION &&
+		    mvmvif->ap_sta_id == mvm_sta->sta_id) {
+			/* if associated - we can't remove the AP STA now */
+			if (vif->bss_conf.assoc)
+				return ret;
 
-		/* unassoc - go ahead - remove the AP STA now */
-		mvmvif->ap_sta_id = IWL_MVM_STATION_COUNT;
+			/* unassoc - go ahead - remove the AP STA now */
+			mvmvif->ap_sta_id = IWL_MVM_STATION_COUNT;
 
-		/* clear d0i3_ap_sta_id if no longer relevant */
-		if (mvm->d0i3_ap_sta_id == mvm_sta->sta_id)
-			mvm->d0i3_ap_sta_id = IWL_MVM_STATION_COUNT;
+			/* clear d0i3_ap_sta_id if no longer relevant */
+			if (mvm->d0i3_ap_sta_id == mvm_sta->sta_id)
+				mvm->d0i3_ap_sta_id = IWL_MVM_STATION_COUNT;
+		}
 	}
 
 	/*
@@ -885,7 +1323,7 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
 	} else {
 		spin_unlock_bh(&mvm_sta->lock);
 
-		if (sta->tdls)
+		if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls)
 			iwl_mvm_tdls_sta_deinit(mvm, sta);
 
 		ret = iwl_mvm_rm_sta_common(mvm, mvm_sta->sta_id);
@@ -983,8 +1421,9 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
 	lockdep_assert_held(&mvm->mutex);
 
 	/* Map Aux queue to fifo - needs to happen before adding Aux station */
-	iwl_mvm_enable_ac_txq(mvm, mvm->aux_queue, mvm->aux_queue,
-			      IWL_MVM_TX_FIFO_MCAST, 0, wdg_timeout);
+	if (!iwl_mvm_is_dqa_supported(mvm))
+		iwl_mvm_enable_ac_txq(mvm, mvm->aux_queue, mvm->aux_queue,
+				      IWL_MVM_TX_FIFO_MCAST, 0, wdg_timeout);
 
 	/* Allocate aux station and assign to it the aux queue */
 	ret = iwl_mvm_allocate_int_sta(mvm, &mvm->aux_sta, BIT(mvm->aux_queue),
@@ -992,6 +1431,19 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
 	if (ret)
 		return ret;
 
+	if (iwl_mvm_is_dqa_supported(mvm)) {
+		struct iwl_trans_txq_scd_cfg cfg = {
+			.fifo = IWL_MVM_TX_FIFO_MCAST,
+			.sta_id = mvm->aux_sta.sta_id,
+			.tid = IWL_MAX_TID_COUNT,
+			.aggregate = false,
+			.frame_limit = IWL_FRAME_LIMIT,
+		};
+
+		iwl_mvm_enable_txq(mvm, mvm->aux_queue, mvm->aux_queue, 0, &cfg,
+				   wdg_timeout);
+	}
+
 	ret = iwl_mvm_add_int_sta_common(mvm, &mvm->aux_sta, NULL,
 					 MAC_INDEX_AUX, 0);
 
@@ -1316,8 +1768,8 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 
 	switch (status & IWL_ADD_STA_STATUS_MASK) {
 	case ADD_STA_SUCCESS:
-		IWL_DEBUG_INFO(mvm, "RX BA Session %sed in fw\n",
-			       start ? "start" : "stopp");
+		IWL_DEBUG_HT(mvm, "RX BA Session %sed in fw\n",
+			     start ? "start" : "stopp");
 		break;
 	case ADD_STA_IMMEDIATE_BA_FAILURE:
 		IWL_WARN(mvm, "RX BA Session refused by fw\n");
@@ -1372,13 +1824,16 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		 * supposed to happen) and we will free the session data while
 		 * RX is being processed in parallel
 		 */
+		IWL_DEBUG_HT(mvm, "Sta %d(%d) is assigned to BAID %d\n",
+			     mvm_sta->sta_id, tid, baid);
 		WARN_ON(rcu_access_pointer(mvm->baid_map[baid]));
 		rcu_assign_pointer(mvm->baid_map[baid], baid_data);
-	} else if (mvm->rx_ba_sessions > 0) {
+	} else  {
 		u8 baid = mvm_sta->tid_to_baid[tid];
 
-		/* check that restart flow didn't zero the counter */
-		mvm->rx_ba_sessions--;
+		if (mvm->rx_ba_sessions > 0)
+			/* check that restart flow didn't zero the counter */
+			mvm->rx_ba_sessions--;
 		if (!iwl_mvm_has_new_rx_api(mvm))
 			return 0;
 
@@ -1394,6 +1849,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		del_timer_sync(&baid_data->session_timer);
 		RCU_INIT_POINTER(mvm->baid_map[baid], NULL);
 		kfree_rcu(baid_data, rcu_head);
+		IWL_DEBUG_HT(mvm, "BAID %d is free\n", baid);
 	}
 	return 0;
 
@@ -1402,8 +1858,8 @@ out_free:
 	return ret;
 }
 
-static int iwl_mvm_sta_tx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
-			      int tid, u8 queue, bool start)
+int iwl_mvm_sta_tx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
+		       int tid, u8 queue, bool start)
 {
 	struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
 	struct iwl_mvm_add_sta_cmd cmd = {};
@@ -1458,6 +1914,7 @@ const u8 tid_to_mac80211_ac[] = {
 	IEEE80211_AC_VI,
 	IEEE80211_AC_VO,
 	IEEE80211_AC_VO,
+	IEEE80211_AC_VO, /* We treat MGMT as TID 8, which is set as AC_VO */
 };
 
 static const u8 tid_to_ucode_ac[] = {
@@ -1512,7 +1969,8 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	txq_id = mvmsta->tid_data[tid].txq_id;
 	if (!iwl_mvm_is_dqa_supported(mvm) ||
 	    mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) {
-		txq_id = iwl_mvm_find_free_queue(mvm, mvm->first_agg_queue,
+		txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id,
+						 mvm->first_agg_queue,
 						 mvm->last_agg_queue);
 		if (txq_id < 0) {
 			ret = txq_id;
@@ -1852,12 +2310,18 @@ static struct iwl_mvm_sta *iwl_mvm_get_key_sta(struct iwl_mvm *mvm,
 	    mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) {
 		u8 sta_id = mvmvif->ap_sta_id;
 
+		sta = rcu_dereference_check(mvm->fw_id_to_mac_id[sta_id],
+					    lockdep_is_held(&mvm->mutex));
+
 		/*
 		 * It is possible that the 'sta' parameter is NULL,
 		 * for example when a GTK is removed - the sta_id will then
 		 * be the AP ID, and no station was passed by mac80211.
 		 */
-		return iwl_mvm_sta_from_staid_protected(mvm, sta_id);
+		if (IS_ERR_OR_NULL(sta))
+			return NULL;
+
+		return iwl_mvm_sta_from_mac80211(sta);
 	}
 
 	return NULL;
@@ -1901,6 +2365,13 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
 		key_flags |= cpu_to_le16(STA_KEY_FLG_WEP);
 		memcpy(cmd.key + 3, keyconf->key, keyconf->keylen);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		key_flags |= cpu_to_le16(STA_KEY_FLG_KEY_32BYTES);
+		/* fall through */
+	case WLAN_CIPHER_SUITE_GCMP:
+		key_flags |= cpu_to_le16(STA_KEY_FLG_GCMP);
+		memcpy(cmd.key, keyconf->key, keyconf->keylen);
+		break;
 	default:
 		key_flags |= cpu_to_le16(STA_KEY_FLG_EXT);
 		memcpy(cmd.key, keyconf->key, keyconf->keylen);
@@ -1955,6 +2426,14 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm,
 		struct ieee80211_key_seq seq;
 		const u8 *pn;
 
+		switch (keyconf->cipher) {
+		case WLAN_CIPHER_SUITE_AES_CMAC:
+			igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM);
+			break;
+		default:
+			return -EINVAL;
+		}
+
 		memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen);
 		ieee80211_get_key_rx_seq(keyconf, 0, &seq);
 		pn = seq.aes_cmac.pn;
@@ -2021,6 +2500,8 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_WEP40:
 	case WLAN_CIPHER_SUITE_WEP104:
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
 					   0, NULL, 0, key_offset);
 		break;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index d2c58f134fcf..bbc1cab2c3bf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -321,6 +321,9 @@ enum iwl_mvm_agg_state {
  *	Basically when next_reclaimed reaches ssn, we can tell mac80211 that
  *	we are ready to finish the Tx AGG stop / start flow.
  * @tx_time: medium time consumed by this A-MPDU
+ * @is_tid_active: has this TID sent traffic in the last
+ *	%IWL_MVM_DQA_QUEUE_TIMEOUT time period. If %txq_id is invalid, this
+ *	field should be ignored.
  */
 struct iwl_mvm_tid_data {
 	struct sk_buff_head deferred_tx_frames;
@@ -333,6 +336,7 @@ struct iwl_mvm_tid_data {
 	u16 txq_id;
 	u16 ssn;
 	u16 tx_time;
+	bool is_tid_active;
 };
 
 static inline u16 iwl_mvm_tid_queued(struct iwl_mvm_tid_data *tid_data)
@@ -434,6 +438,7 @@ struct iwl_mvm_sta {
 	bool tlc_amsdu;
 	u8 agg_tids;
 	u8 sleep_tx_count;
+	u8 avg_energy;
 };
 
 static inline struct iwl_mvm_sta *
@@ -509,6 +514,9 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 int iwl_mvm_sta_tx_agg_flush(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, u16 tid);
 
+int iwl_mvm_sta_tx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
+		       int tid, u8 queue, bool start);
+
 int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm);
 void iwl_mvm_del_aux_sta(struct iwl_mvm *mvm);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 779bafcbc9a1..c6585ab48df3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -138,28 +138,19 @@ static void iwl_mvm_tx_csum(struct iwl_mvm *mvm, struct sk_buff *skb,
 
 		protocol = ipv6h->nexthdr;
 		while (protocol != NEXTHDR_NONE && ipv6_ext_hdr(protocol)) {
+			struct ipv6_opt_hdr *hp;
+
 			/* only supported extension headers */
 			if (protocol != NEXTHDR_ROUTING &&
 			    protocol != NEXTHDR_HOP &&
-			    protocol != NEXTHDR_DEST &&
-			    protocol != NEXTHDR_FRAGMENT) {
+			    protocol != NEXTHDR_DEST) {
 				skb_checksum_help(skb);
 				return;
 			}
 
-			if (protocol == NEXTHDR_FRAGMENT) {
-				struct frag_hdr *hp =
-					OPT_HDR(struct frag_hdr, skb, off);
-
-				protocol = hp->nexthdr;
-				off += sizeof(struct frag_hdr);
-			} else {
-				struct ipv6_opt_hdr *hp =
-					OPT_HDR(struct ipv6_opt_hdr, skb, off);
-
-				protocol = hp->nexthdr;
-				off += ipv6_optlen(hp);
-			}
+			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
+			protocol = hp->nexthdr;
+			off += ipv6_optlen(hp);
 		}
 		/* if we get here - protocol now should be TCP/UDP */
 #endif
@@ -388,6 +379,23 @@ void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd,
 	tx_cmd->rate_n_flags = cpu_to_le32((u32)rate_plcp | rate_flags);
 }
 
+static inline void iwl_mvm_set_tx_cmd_pn(struct ieee80211_tx_info *info,
+					 u8 *crypto_hdr)
+{
+	struct ieee80211_key_conf *keyconf = info->control.hw_key;
+	u64 pn;
+
+	pn = atomic64_inc_return(&keyconf->tx_pn);
+	crypto_hdr[0] = pn;
+	crypto_hdr[2] = 0;
+	crypto_hdr[3] = 0x20 | (keyconf->keyidx << 6);
+	crypto_hdr[1] = pn >> 8;
+	crypto_hdr[4] = pn >> 16;
+	crypto_hdr[5] = pn >> 24;
+	crypto_hdr[6] = pn >> 32;
+	crypto_hdr[7] = pn >> 40;
+}
+
 /*
  * Sets the fields in the Tx cmd that are crypto related
  */
@@ -405,15 +413,7 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_CCMP_256:
 		iwl_mvm_set_tx_cmd_ccmp(info, tx_cmd);
-		pn = atomic64_inc_return(&keyconf->tx_pn);
-		crypto_hdr[0] = pn;
-		crypto_hdr[2] = 0;
-		crypto_hdr[3] = 0x20 | (keyconf->keyidx << 6);
-		crypto_hdr[1] = pn >> 8;
-		crypto_hdr[4] = pn >> 16;
-		crypto_hdr[5] = pn >> 24;
-		crypto_hdr[6] = pn >> 32;
-		crypto_hdr[7] = pn >> 40;
+		iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
 		break;
 
 	case WLAN_CIPHER_SUITE_TKIP:
@@ -433,6 +433,18 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
 
 		memcpy(&tx_cmd->key[3], keyconf->key, keyconf->keylen);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		/* TODO: Taking the key from the table might introduce a race
+		 * when PTK rekeying is done, having an old packets with a PN
+		 * based on the old key but the message encrypted with a new
+		 * one.
+		 * Need to handle this.
+		 */
+		tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TC_CMD_SEC_KEY_FROM_TABLE;
+		tx_cmd->key[0] = keyconf->hw_key_idx;
+		iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
+		break;
 	default:
 		tx_cmd->sec_ctl |= TX_CMD_SEC_EXT;
 	}
@@ -534,6 +546,9 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 	 * (this is not possible for unicast packets as a TLDS discovery
 	 * response are sent without a station entry); otherwise use the
 	 * AUX station.
+	 * In DQA mode, if vif is of type STATION and frames are not multicast,
+	 * they should be sent from the BSS queue. For example, TDLS setup
+	 * frames should be sent on this queue, as they go through the AP.
 	 */
 	sta_id = mvm->aux_sta.sta_id;
 	if (info.control.vif) {
@@ -551,6 +566,9 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 
 			if (ap_sta_id != IWL_MVM_STATION_COUNT)
 				sta_id = ap_sta_id;
+		} else if (iwl_mvm_is_dqa_supported(mvm) &&
+			   info.control.vif->type == NL80211_IFTYPE_STATION) {
+			queue = IWL_MVM_DQA_BSS_CLIENT_QUEUE;
 		}
 	}
 
@@ -884,15 +902,17 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 		 * nullfunc frames should go to the MGMT queue regardless of QOS
 		 */
 		tid = IWL_MAX_TID_COUNT;
-		txq_id = mvmsta->tid_data[tid].txq_id;
 	}
 
+	if (iwl_mvm_is_dqa_supported(mvm))
+		txq_id = mvmsta->tid_data[tid].txq_id;
+
 	/* Copy MAC header from skb into command buffer */
 	memcpy(tx_cmd->hdr, hdr, hdrlen);
 
 	WARN_ON_ONCE(info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM);
 
-	if (sta->tdls) {
+	if (sta->tdls && !iwl_mvm_is_dqa_supported(mvm)) {
 		/* default to TID 0 for non-QoS packets */
 		u8 tdls_tid = tid == IWL_MAX_TID_COUNT ? 0 : tid;
 
@@ -905,9 +925,12 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 		txq_id = mvmsta->tid_data[tid].txq_id;
 	}
 
-	if (iwl_mvm_is_dqa_supported(mvm)) {
-		if (unlikely(mvmsta->tid_data[tid].txq_id ==
-			     IEEE80211_INVAL_HW_QUEUE)) {
+	/* Check if TXQ needs to be allocated or re-activated */
+	if (unlikely(txq_id == IEEE80211_INVAL_HW_QUEUE ||
+		     !mvmsta->tid_data[tid].is_tid_active) &&
+	    iwl_mvm_is_dqa_supported(mvm)) {
+		/* If TXQ needs to be allocated... */
+		if (txq_id == IEEE80211_INVAL_HW_QUEUE) {
 			iwl_mvm_tx_add_stream(mvm, mvmsta, tid, skb);
 
 			/*
@@ -917,11 +940,22 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 			iwl_trans_free_tx_cmd(mvm->trans, dev_cmd);
 			spin_unlock(&mvmsta->lock);
 			return 0;
+
 		}
 
-		txq_id = mvmsta->tid_data[tid].txq_id;
+		/* If we are here - TXQ exists and needs to be re-activated */
+		spin_lock(&mvm->queue_info_lock);
+		mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_READY;
+		mvmsta->tid_data[tid].is_tid_active = true;
+		spin_unlock(&mvm->queue_info_lock);
+
+		IWL_DEBUG_TX_QUEUES(mvm, "Re-activating queue %d for TX\n",
+				    txq_id);
 	}
 
+	/* Keep track of the time of the last frame for this RA/TID */
+	mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
+
 	IWL_DEBUG_TX(mvm, "TX to [%d|%d] Q:%d - seq: 0x%x\n", mvmsta->sta_id,
 		     tid, txq_id, IEEE80211_SEQ_TO_SN(seq_number));
 
@@ -1313,7 +1347,15 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 			bool send_eosp_ndp = false;
 
 			spin_lock_bh(&mvmsta->lock);
-			txq_agg = (mvmsta->tid_data[tid].state == IWL_AGG_ON);
+			if (iwl_mvm_is_dqa_supported(mvm)) {
+				enum iwl_mvm_agg_state state;
+
+				state = mvmsta->tid_data[tid].state;
+				txq_agg = (state == IWL_AGG_ON ||
+					state == IWL_EMPTYING_HW_QUEUE_DELBA);
+			} else {
+				txq_agg = txq_id >= mvm->first_agg_queue;
+			}
 
 			if (!is_ndp) {
 				tid_data->next_reclaimed = next_reclaimed;
@@ -1644,7 +1686,7 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 		iwl_mvm_tx_info_from_ba_notif(&ba_info, ba_notif, tid_data);
 
 		IWL_DEBUG_TX_REPLY(mvm, "No reclaim. Update rs directly\n");
-		iwl_mvm_rs_tx_status(mvm, sta, tid, &ba_info);
+		iwl_mvm_rs_tx_status(mvm, sta, tid, &ba_info, false);
 	}
 
 out:
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 161b99efd63d..68f4e7fdfc11 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -579,17 +579,29 @@ void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm)
 		iwl_mvm_dump_umac_error_log(mvm);
 }
 
-int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 minq, u8 maxq)
+int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, u8 minq, u8 maxq)
 {
 	int i;
 
 	lockdep_assert_held(&mvm->queue_info_lock);
 
+	/* Start by looking for a free queue */
 	for (i = minq; i <= maxq; i++)
 		if (mvm->queue_info[i].hw_queue_refcount == 0 &&
 		    mvm->queue_info[i].status == IWL_MVM_QUEUE_FREE)
 			return i;
 
+	/*
+	 * If no free queue found - settle for an inactive one to reconfigure
+	 * Make sure that the inactive queue either already belongs to this STA,
+	 * or that if it belongs to another one - it isn't the reserved queue
+	 */
+	for (i = minq; i <= maxq; i++)
+		if (mvm->queue_info[i].status == IWL_MVM_QUEUE_INACTIVE &&
+		    (sta_id == mvm->queue_info[i].ra_sta_id ||
+		     !mvm->queue_info[i].reserved))
+			return i;
+
 	return -ENOSPC;
 }
 
@@ -643,13 +655,21 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
 	}
 
 	/* Update mappings and refcounts */
+	if (mvm->queue_info[queue].hw_queue_refcount > 0)
+		enable_queue = false;
+
 	mvm->queue_info[queue].hw_queue_to_mac80211 |= BIT(mac80211_queue);
 	mvm->queue_info[queue].hw_queue_refcount++;
-	if (mvm->queue_info[queue].hw_queue_refcount > 1)
-		enable_queue = false;
-	else
-		mvm->queue_info[queue].ra_sta_id = cfg->sta_id;
 	mvm->queue_info[queue].tid_bitmap |= BIT(cfg->tid);
+	mvm->queue_info[queue].ra_sta_id = cfg->sta_id;
+
+	if (enable_queue) {
+		if (cfg->tid != IWL_MAX_TID_COUNT)
+			mvm->queue_info[queue].mac80211_ac =
+				tid_to_mac80211_ac[cfg->tid];
+		else
+			mvm->queue_info[queue].mac80211_ac = IEEE80211_AC_VO;
+	}
 
 	IWL_DEBUG_TX_QUEUES(mvm,
 			    "Enabling TXQ #%d refcount=%d (mac80211 map:0x%x)\n",
@@ -671,6 +691,10 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
 			.tid = cfg->tid,
 		};
 
+		/* Set sta_id in the command, if it exists */
+		if (iwl_mvm_is_dqa_supported(mvm))
+			cmd.sta_id = cfg->sta_id;
+
 		iwl_trans_txq_enable_cfg(mvm->trans, queue, ssn, NULL,
 					 wdg_timeout);
 		WARN(iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd),
@@ -752,6 +776,9 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
 	mvm->queue_info[queue].tid_bitmap = 0;
 	mvm->queue_info[queue].hw_queue_to_mac80211 = 0;
 
+	/* Regardless if this is a reserved TXQ for a STA - mark it as false */
+	mvm->queue_info[queue].reserved = false;
+
 	spin_unlock_bh(&mvm->queue_info_lock);
 
 	iwl_trans_txq_disable(mvm->trans, queue, false);
@@ -1039,6 +1066,155 @@ out:
 	ieee80211_connection_loss(vif);
 }
 
+/*
+ * Remove inactive TIDs of a given queue.
+ * If all queue TIDs are inactive - mark the queue as inactive
+ * If only some the queue TIDs are inactive - unmap them from the queue
+ */
+static void iwl_mvm_remove_inactive_tids(struct iwl_mvm *mvm,
+					 struct iwl_mvm_sta *mvmsta, int queue,
+					 unsigned long tid_bitmap)
+{
+	int tid;
+
+	lockdep_assert_held(&mvmsta->lock);
+	lockdep_assert_held(&mvm->queue_info_lock);
+
+	/* Go over all non-active TIDs, incl. IWL_MAX_TID_COUNT (for mgmt) */
+	for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1) {
+		/* If some TFDs are still queued - don't mark TID as inactive */
+		if (iwl_mvm_tid_queued(&mvmsta->tid_data[tid]))
+			tid_bitmap &= ~BIT(tid);
+	}
+
+	/* If all TIDs in the queue are inactive - mark queue as inactive. */
+	if (tid_bitmap == mvm->queue_info[queue].tid_bitmap) {
+		mvm->queue_info[queue].status = IWL_MVM_QUEUE_INACTIVE;
+
+		for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1)
+			mvmsta->tid_data[tid].is_tid_active = false;
+
+		IWL_DEBUG_TX_QUEUES(mvm, "Queue %d marked as inactive\n",
+				    queue);
+		return;
+	}
+
+	/*
+	 * If we are here, this is a shared queue and not all TIDs timed-out.
+	 * Remove the ones that did.
+	 */
+	for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1) {
+		int mac_queue = mvmsta->vif->hw_queue[tid_to_mac80211_ac[tid]];
+
+		mvmsta->tid_data[tid].txq_id = IEEE80211_INVAL_HW_QUEUE;
+		mvm->queue_info[queue].hw_queue_to_mac80211 &= ~BIT(mac_queue);
+		mvm->queue_info[queue].hw_queue_refcount--;
+		mvm->queue_info[queue].tid_bitmap &= ~BIT(tid);
+		mvmsta->tid_data[tid].is_tid_active = false;
+
+		IWL_DEBUG_TX_QUEUES(mvm,
+				    "Removing inactive TID %d from shared Q:%d\n",
+				    tid, queue);
+	}
+
+	IWL_DEBUG_TX_QUEUES(mvm,
+			    "TXQ #%d left with tid bitmap 0x%x\n", queue,
+			    mvm->queue_info[queue].tid_bitmap);
+
+	/*
+	 * There may be different TIDs with the same mac queues, so make
+	 * sure all TIDs have existing corresponding mac queues enabled
+	 */
+	tid_bitmap = mvm->queue_info[queue].tid_bitmap;
+	for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1) {
+		mvm->queue_info[queue].hw_queue_to_mac80211 |=
+			BIT(mvmsta->vif->hw_queue[tid_to_mac80211_ac[tid]]);
+	}
+
+	/* TODO: if queue was shared - need to re-enable AGGs */
+}
+
+void iwl_mvm_inactivity_check(struct iwl_mvm *mvm)
+{
+	unsigned long timeout_queues_map = 0;
+	unsigned long now = jiffies;
+	int i;
+
+	spin_lock_bh(&mvm->queue_info_lock);
+	for (i = 0; i < IWL_MAX_HW_QUEUES; i++)
+		if (mvm->queue_info[i].hw_queue_refcount > 0)
+			timeout_queues_map |= BIT(i);
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	rcu_read_lock();
+
+	/*
+	 * If a queue time outs - mark it as INACTIVE (don't remove right away
+	 * if we don't have to.) This is an optimization in case traffic comes
+	 * later, and we don't HAVE to use a currently-inactive queue
+	 */
+	for_each_set_bit(i, &timeout_queues_map, IWL_MAX_HW_QUEUES) {
+		struct ieee80211_sta *sta;
+		struct iwl_mvm_sta *mvmsta;
+		u8 sta_id;
+		int tid;
+		unsigned long inactive_tid_bitmap = 0;
+		unsigned long queue_tid_bitmap;
+
+		spin_lock_bh(&mvm->queue_info_lock);
+		queue_tid_bitmap = mvm->queue_info[i].tid_bitmap;
+
+		/* If TXQ isn't in active use anyway - nothing to do here... */
+		if (mvm->queue_info[i].status != IWL_MVM_QUEUE_READY &&
+		    mvm->queue_info[i].status != IWL_MVM_QUEUE_SHARED) {
+			spin_unlock_bh(&mvm->queue_info_lock);
+			continue;
+		}
+
+		/* Check to see if there are inactive TIDs on this queue */
+		for_each_set_bit(tid, &queue_tid_bitmap,
+				 IWL_MAX_TID_COUNT + 1) {
+			if (time_after(mvm->queue_info[i].last_frame_time[tid] +
+				       IWL_MVM_DQA_QUEUE_TIMEOUT, now))
+				continue;
+
+			inactive_tid_bitmap |= BIT(tid);
+		}
+		spin_unlock_bh(&mvm->queue_info_lock);
+
+		/* If all TIDs are active - finish check on this queue */
+		if (!inactive_tid_bitmap)
+			continue;
+
+		/*
+		 * If we are here - the queue hadn't been served recently and is
+		 * in use
+		 */
+
+		sta_id = mvm->queue_info[i].ra_sta_id;
+		sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]);
+
+		/*
+		 * If the STA doesn't exist anymore, it isn't an error. It could
+		 * be that it was removed since getting the queues, and in this
+		 * case it should've inactivated its queues anyway.
+		 */
+		if (IS_ERR_OR_NULL(sta))
+			continue;
+
+		mvmsta = iwl_mvm_sta_from_mac80211(sta);
+
+		spin_lock_bh(&mvmsta->lock);
+		spin_lock(&mvm->queue_info_lock);
+		iwl_mvm_remove_inactive_tids(mvm, mvmsta, i,
+					     inactive_tid_bitmap);
+		spin_unlock(&mvm->queue_info_lock);
+		spin_unlock_bh(&mvmsta->lock);
+	}
+
+	rcu_read_unlock();
+}
+
 int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif,
 			 enum iwl_lqm_cmd_operatrions operation,
 			 u32 duration, u32 timeout)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index a588b05e38eb..78cf9a7f3eac 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -433,6 +433,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 /* 8000 Series */
 	{IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x10B0, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)},
@@ -454,6 +455,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x24F3, 0xD010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0xD0B0, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0xB0B0, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)},
@@ -481,6 +484,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x24FD, 0x0010, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0110, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x1110, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x1130, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x0130, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x1010, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0050, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0150, iwl8265_2ac_cfg)},
@@ -491,6 +496,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x24FD, 0x0810, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x9110, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x8130, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x0910, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x0930, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x0950, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)},
 
 /* 9000 Series */
 	{IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)},
@@ -507,6 +516,9 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x2526, 0x1420, iwl5165_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl5165_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)},
+
+/* a000 Series */
+	{IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)},
 #endif /* CONFIG_IWLMVM */
 
 	{0}
@@ -598,7 +610,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	const struct iwl_cfg *cfg_7265d __maybe_unused = NULL;
 	const struct iwl_cfg *cfg_9260lc __maybe_unused = NULL;
 	struct iwl_trans *iwl_trans;
-	struct iwl_trans_pcie *trans_pcie;
 	int ret;
 
 	iwl_trans = iwl_trans_pcie_alloc(pdev, ent, cfg);
@@ -636,12 +647,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 #endif
 
 	pci_set_drvdata(pdev, iwl_trans);
+	iwl_trans->drv = iwl_drv_start(iwl_trans, cfg);
 
-	trans_pcie = IWL_TRANS_GET_PCIE_TRANS(iwl_trans);
-	trans_pcie->drv = iwl_drv_start(iwl_trans, cfg);
-
-	if (IS_ERR(trans_pcie->drv)) {
-		ret = PTR_ERR(trans_pcie->drv);
+	if (IS_ERR(iwl_trans->drv)) {
+		ret = PTR_ERR(iwl_trans->drv);
 		goto out_free_trans;
 	}
 
@@ -680,7 +689,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
 out_free_drv:
-	iwl_drv_stop(trans_pcie->drv);
+	iwl_drv_stop(iwl_trans->drv);
 out_free_trans:
 	iwl_trans_pcie_free(iwl_trans);
 	return ret;
@@ -689,7 +698,6 @@ out_free_trans:
 static void iwl_pci_remove(struct pci_dev *pdev)
 {
 	struct iwl_trans *trans = pci_get_drvdata(pdev);
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
 	/* if RTPM was in use, restore it to the state before probe */
 	if (trans->runtime_pm_mode != IWL_PLAT_PM_MODE_DISABLED) {
@@ -700,7 +708,7 @@ static void iwl_pci_remove(struct pci_dev *pdev)
 		pm_runtime_forbid(trans->dev);
 	}
 
-	iwl_drv_stop(trans_pcie->drv);
+	iwl_drv_stop(trans->drv);
 
 	iwl_trans_pcie_free(trans);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index de6974f9c52f..11e347dd44c7 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -68,12 +68,14 @@ struct iwl_host_cmd;
  * struct iwl_rx_mem_buffer
  * @page_dma: bus address of rxb page
  * @page: driver's pointer to the rxb page
+ * @invalid: rxb is in driver ownership - not owned by HW
  * @vid: index of this rxb in the global table
  */
 struct iwl_rx_mem_buffer {
 	dma_addr_t page_dma;
 	struct page *page;
 	u16 vid;
+	bool invalid;
 	struct list_head list;
 };
 
@@ -230,15 +232,16 @@ struct iwl_queue {
 #define TFD_CMD_SLOTS 32
 
 /*
- * The FH will write back to the first TB only, so we need
- * to copy some data into the buffer regardless of whether
- * it should be mapped or not. This indicates how big the
- * first TB must be to include the scratch buffer. Since
- * the scratch is 4 bytes at offset 12, it's 16 now. If we
- * make it bigger then allocations will be bigger and copy
- * slower, so that's probably not useful.
+ * The FH will write back to the first TB only, so we need to copy some data
+ * into the buffer regardless of whether it should be mapped or not.
+ * This indicates how big the first TB must be to include the scratch buffer
+ * and the assigned PN.
+ * Since PN location is 16 bytes at offset 24, it's 40 now.
+ * If we make it bigger then allocations will be bigger and copy slower, so
+ * that's probably not useful.
  */
-#define IWL_HCMD_SCRATCHBUF_SIZE	16
+#define IWL_FIRST_TB_SIZE	40
+#define IWL_FIRST_TB_SIZE_ALIGN ALIGN(IWL_FIRST_TB_SIZE, 64)
 
 struct iwl_pcie_txq_entry {
 	struct iwl_device_cmd *cmd;
@@ -248,20 +251,18 @@ struct iwl_pcie_txq_entry {
 	struct iwl_cmd_meta meta;
 };
 
-struct iwl_pcie_txq_scratch_buf {
-	struct iwl_cmd_header hdr;
-	u8 buf[8];
-	__le32 scratch;
+struct iwl_pcie_first_tb_buf {
+	u8 buf[IWL_FIRST_TB_SIZE_ALIGN];
 };
 
 /**
  * struct iwl_txq - Tx Queue for DMA
  * @q: generic Rx/Tx queue descriptor
  * @tfds: transmit frame descriptors (DMA memory)
- * @scratchbufs: start of command headers, including scratch buffers, for
+ * @first_tb_bufs: start of command headers, including scratch buffers, for
  *	the writeback -- this is DMA memory and an array holding one buffer
  *	for each command on the queue
- * @scratchbufs_dma: DMA address for the scratchbufs start
+ * @first_tb_dma: DMA address for the first_tb_bufs start
  * @entries: transmit entries (driver state)
  * @lock: queue lock
  * @stuck_timer: timer that fires if queue gets stuck
@@ -279,8 +280,8 @@ struct iwl_pcie_txq_scratch_buf {
 struct iwl_txq {
 	struct iwl_queue q;
 	struct iwl_tfd *tfds;
-	struct iwl_pcie_txq_scratch_buf *scratchbufs;
-	dma_addr_t scratchbufs_dma;
+	struct iwl_pcie_first_tb_buf *first_tb_bufs;
+	dma_addr_t first_tb_dma;
 	struct iwl_pcie_txq_entry *entries;
 	spinlock_t lock;
 	unsigned long frozen_expiry_remainder;
@@ -296,10 +297,10 @@ struct iwl_txq {
 };
 
 static inline dma_addr_t
-iwl_pcie_get_scratchbuf_dma(struct iwl_txq *txq, int idx)
+iwl_pcie_get_first_tb_dma(struct iwl_txq *txq, int idx)
 {
-	return txq->scratchbufs_dma +
-	       sizeof(struct iwl_pcie_txq_scratch_buf) * idx;
+	return txq->first_tb_dma +
+	       sizeof(struct iwl_pcie_first_tb_buf) * idx;
 }
 
 struct iwl_tso_hdr_page {
@@ -313,7 +314,6 @@ struct iwl_tso_hdr_page {
  * @rx_pool: initial pool of iwl_rx_mem_buffer for all the queues
  * @global_table: table mapping received VID from hw to rxb
  * @rba: allocator for RX replenishing
- * @drv - pointer to iwl_drv
  * @trans: pointer to the generic transport area
  * @scd_base_addr: scheduler sram base address in SRAM
  * @scd_bc_tbls: pointer to the byte count table of the scheduler
@@ -351,7 +351,6 @@ struct iwl_trans_pcie {
 	struct iwl_rx_mem_buffer *global_table[RX_POOL_SIZE];
 	struct iwl_rb_allocator rba;
 	struct iwl_trans *trans;
-	struct iwl_drv *drv;
 
 	struct net_device napi_dev;
 
@@ -385,6 +384,8 @@ struct iwl_trans_pcie {
 	wait_queue_head_t wait_command_queue;
 	wait_queue_head_t d0i3_waitq;
 
+	u8 page_offs, dev_cmd_offs;
+
 	u8 cmd_queue;
 	u8 cmd_fifo;
 	unsigned int cmd_q_wdg_timeout;
@@ -471,6 +472,10 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int queue, u16 ssn,
 			       unsigned int wdg_timeout);
 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int queue,
 				bool configure_scd);
+void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
+					bool shared_mode);
+void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans,
+				  struct iwl_txq *txq);
 int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 		      struct iwl_device_cmd *dev_cmd, int txq_id);
 void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans);
@@ -496,7 +501,7 @@ void iwl_pcie_dump_csr(struct iwl_trans *trans);
 /*****************************************************
 * Helpers
 ******************************************************/
-static inline void iwl_disable_interrupts(struct iwl_trans *trans)
+static inline void _iwl_disable_interrupts(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
@@ -519,7 +524,16 @@ static inline void iwl_disable_interrupts(struct iwl_trans *trans)
 	IWL_DEBUG_ISR(trans, "Disabled interrupts\n");
 }
 
-static inline void iwl_enable_interrupts(struct iwl_trans *trans)
+static inline void iwl_disable_interrupts(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	spin_lock(&trans_pcie->irq_lock);
+	_iwl_disable_interrupts(trans);
+	spin_unlock(&trans_pcie->irq_lock);
+}
+
+static inline void _iwl_enable_interrupts(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
@@ -542,6 +556,14 @@ static inline void iwl_enable_interrupts(struct iwl_trans *trans)
 	}
 }
 
+static inline void iwl_enable_interrupts(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	spin_lock(&trans_pcie->irq_lock);
+	_iwl_enable_interrupts(trans);
+	spin_unlock(&trans_pcie->irq_lock);
+}
 static inline void iwl_enable_hw_int_msk_msix(struct iwl_trans *trans, u32 msk)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -673,4 +695,6 @@ static inline int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans)
 int iwl_pci_fw_exit_d0i3(struct iwl_trans *trans);
 int iwl_pci_fw_enter_d0i3(struct iwl_trans *trans);
 
+void iwl_pcie_enable_rx_wake(struct iwl_trans *trans, bool enable);
+
 #endif /* __iwl_trans_int_pcie_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 0a4a3c502c3c..5c36e6d00622 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -161,21 +161,21 @@ static inline __le32 iwl_pcie_dma_addr2rbd_ptr(dma_addr_t dma_addr)
 	return cpu_to_le32((u32)(dma_addr >> 8));
 }
 
-static void iwl_pcie_write_prph_64_no_grab(struct iwl_trans *trans, u64 ofs,
-					   u64 val)
-{
-	iwl_write_prph_no_grab(trans, ofs, val & 0xffffffff);
-	iwl_write_prph_no_grab(trans, ofs + 4, val >> 32);
-}
-
 /*
  * iwl_pcie_rx_stop - stops the Rx DMA
  */
 int iwl_pcie_rx_stop(struct iwl_trans *trans)
 {
-	iwl_write_direct32(trans, FH_MEM_RCSR_CHNL0_CONFIG_REG, 0);
-	return iwl_poll_direct_bit(trans, FH_MEM_RSSR_RX_STATUS_REG,
-				   FH_RSSR_CHNL0_RX_STATUS_CHNL_IDLE, 1000);
+	if (trans->cfg->mq_rx_supported) {
+		iwl_write_prph(trans, RFH_RXF_DMA_CFG, 0);
+		return iwl_poll_prph_bit(trans, RFH_GEN_STATUS,
+					   RXF_DMA_IDLE, RXF_DMA_IDLE, 1000);
+	} else {
+		iwl_write_direct32(trans, FH_MEM_RCSR_CHNL0_CONFIG_REG, 0);
+		return iwl_poll_direct_bit(trans, FH_MEM_RSSR_RX_STATUS_REG,
+					   FH_RSSR_CHNL0_RX_STATUS_CHNL_IDLE,
+					   1000);
+	}
 }
 
 /*
@@ -211,12 +211,8 @@ static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans,
 	if (trans->cfg->mq_rx_supported)
 		iwl_write32(trans, RFH_Q_FRBDCB_WIDX_TRG(rxq->id),
 			    rxq->write_actual);
-	/*
-	 * write to FH_RSCSR_CHNL0_WPTR register even in MQ as a W/A to
-	 * hardware shadow registers bug - writing to RFH_Q_FRBDCB_WIDX will
-	 * not wake the NIC.
-	 */
-	iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
+	else
+		iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
 }
 
 static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans)
@@ -237,10 +233,10 @@ static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans)
 }
 
 /*
- * iwl_pcie_rxq_mq_restock - restock implementation for multi-queue rx
+ * iwl_pcie_rxmq_restock - restock implementation for multi-queue rx
  */
-static void iwl_pcie_rxq_mq_restock(struct iwl_trans *trans,
-				    struct iwl_rxq *rxq)
+static void iwl_pcie_rxmq_restock(struct iwl_trans *trans,
+				  struct iwl_rxq *rxq)
 {
 	struct iwl_rx_mem_buffer *rxb;
 
@@ -263,7 +259,7 @@ static void iwl_pcie_rxq_mq_restock(struct iwl_trans *trans,
 		rxb = list_first_entry(&rxq->rx_free, struct iwl_rx_mem_buffer,
 				       list);
 		list_del(&rxb->list);
-
+		rxb->invalid = false;
 		/* 12 first bits are expected to be empty */
 		WARN_ON(rxb->page_dma & DMA_BIT_MASK(12));
 		/* Point to Rx buffer via next RBD in circular buffer */
@@ -285,10 +281,10 @@ static void iwl_pcie_rxq_mq_restock(struct iwl_trans *trans,
 }
 
 /*
- * iwl_pcie_rxq_sq_restock - restock implementation for single queue rx
+ * iwl_pcie_rxsq_restock - restock implementation for single queue rx
  */
-static void iwl_pcie_rxq_sq_restock(struct iwl_trans *trans,
-				    struct iwl_rxq *rxq)
+static void iwl_pcie_rxsq_restock(struct iwl_trans *trans,
+				  struct iwl_rxq *rxq)
 {
 	struct iwl_rx_mem_buffer *rxb;
 
@@ -314,6 +310,7 @@ static void iwl_pcie_rxq_sq_restock(struct iwl_trans *trans,
 		rxb = list_first_entry(&rxq->rx_free, struct iwl_rx_mem_buffer,
 				       list);
 		list_del(&rxb->list);
+		rxb->invalid = false;
 
 		/* Point to Rx buffer via next RBD in circular buffer */
 		bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma);
@@ -347,9 +344,9 @@ static
 void iwl_pcie_rxq_restock(struct iwl_trans *trans, struct iwl_rxq *rxq)
 {
 	if (trans->cfg->mq_rx_supported)
-		iwl_pcie_rxq_mq_restock(trans, rxq);
+		iwl_pcie_rxmq_restock(trans, rxq);
 	else
-		iwl_pcie_rxq_sq_restock(trans, rxq);
+		iwl_pcie_rxsq_restock(trans, rxq);
 }
 
 /*
@@ -764,6 +761,23 @@ static void iwl_pcie_rx_hw_init(struct iwl_trans *trans, struct iwl_rxq *rxq)
 		iwl_set_bit(trans, CSR_INT_COALESCING, IWL_HOST_INT_OPER_MODE);
 }
 
+void iwl_pcie_enable_rx_wake(struct iwl_trans *trans, bool enable)
+{
+	/*
+	 * Turn on the chicken-bits that cause MAC wakeup for RX-related
+	 * values.
+	 * This costs some power, but needed for W/A 9000 integrated A-step
+	 * bug where shadow registers are not in the retention list and their
+	 * value is lost when NIC powers down
+	 */
+	if (trans->cfg->integrated) {
+		iwl_set_bit(trans, CSR_MAC_SHADOW_REG_CTRL,
+			    CSR_MAC_SHADOW_REG_CTRL_RX_WAKE);
+		iwl_set_bit(trans, CSR_MAC_SHADOW_REG_CTL2,
+			    CSR_MAC_SHADOW_REG_CTL2_RX_WAKE);
+	}
+}
+
 static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -796,17 +810,17 @@ static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans)
 
 	for (i = 0; i < trans->num_rx_queues; i++) {
 		/* Tell device where to find RBD free table in DRAM */
-		iwl_pcie_write_prph_64_no_grab(trans,
-					       RFH_Q_FRBDCB_BA_LSB(i),
-					       trans_pcie->rxq[i].bd_dma);
+		iwl_write_prph64_no_grab(trans,
+					 RFH_Q_FRBDCB_BA_LSB(i),
+					 trans_pcie->rxq[i].bd_dma);
 		/* Tell device where to find RBD used table in DRAM */
-		iwl_pcie_write_prph_64_no_grab(trans,
-					       RFH_Q_URBDCB_BA_LSB(i),
-					       trans_pcie->rxq[i].used_bd_dma);
+		iwl_write_prph64_no_grab(trans,
+					 RFH_Q_URBDCB_BA_LSB(i),
+					 trans_pcie->rxq[i].used_bd_dma);
 		/* Tell device where in DRAM to update its Rx status */
-		iwl_pcie_write_prph_64_no_grab(trans,
-					       RFH_Q_URBD_STTS_WPTR_LSB(i),
-					       trans_pcie->rxq[i].rb_stts_dma);
+		iwl_write_prph64_no_grab(trans,
+					 RFH_Q_URBD_STTS_WPTR_LSB(i),
+					 trans_pcie->rxq[i].rb_stts_dma);
 		/* Reset device indice tables */
 		iwl_write_prph_no_grab(trans, RFH_Q_FRBDCB_WIDX(i), 0);
 		iwl_write_prph_no_grab(trans, RFH_Q_FRBDCB_RIDX(i), 0);
@@ -815,33 +829,32 @@ static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans)
 		enabled |= BIT(i) | BIT(i + 16);
 	}
 
-	/* restock default queue */
-	iwl_pcie_rxq_mq_restock(trans, &trans_pcie->rxq[0]);
-
 	/*
 	 * Enable Rx DMA
-	 * Single frame mode
 	 * Rx buffer size 4 or 8k or 12k
 	 * Min RB size 4 or 8
 	 * Drop frames that exceed RB size
 	 * 512 RBDs
 	 */
 	iwl_write_prph_no_grab(trans, RFH_RXF_DMA_CFG,
-			       RFH_DMA_EN_ENABLE_VAL |
-			       rb_size | RFH_RXF_DMA_SINGLE_FRAME_MASK |
+			       RFH_DMA_EN_ENABLE_VAL | rb_size |
 			       RFH_RXF_DMA_MIN_RB_4_8 |
 			       RFH_RXF_DMA_DROP_TOO_LARGE_MASK |
 			       RFH_RXF_DMA_RBDCB_SIZE_512);
 
 	/*
 	 * Activate DMA snooping.
-	 * Set RX DMA chunk size to 64B
+	 * Set RX DMA chunk size to 64B for IOSF and 128B for PCIe
 	 * Default queue is 0
 	 */
 	iwl_write_prph_no_grab(trans, RFH_GEN_CFG, RFH_GEN_CFG_RFH_DMA_SNOOP |
 			       (DEFAULT_RXQ_NUM <<
 				RFH_GEN_CFG_DEFAULT_RXQ_NUM_POS) |
-			       RFH_GEN_CFG_SERVICE_DMA_SNOOP);
+			       RFH_GEN_CFG_SERVICE_DMA_SNOOP |
+			       (trans->cfg->integrated ?
+				RFH_GEN_CFG_RB_CHUNK_SIZE_64 :
+				RFH_GEN_CFG_RB_CHUNK_SIZE_128) <<
+			       RFH_GEN_CFG_RB_CHUNK_SIZE_POS);
 	/* Enable the relevant rx queues */
 	iwl_write_prph_no_grab(trans, RFH_RXF_RXQ_ACTIVE, enabled);
 
@@ -849,6 +862,8 @@ static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans)
 
 	/* Set interrupt coalescing timer to default (2048 usecs) */
 	iwl_write8(trans, CSR_INT_COALESCING, IWL_HOST_INT_TIMEOUT_DEF);
+
+	iwl_pcie_enable_rx_wake(trans, true);
 }
 
 static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq)
@@ -939,16 +954,18 @@ int iwl_pcie_rx_init(struct iwl_trans *trans)
 		else
 			list_add(&rxb->list, &def_rxq->rx_used);
 		trans_pcie->global_table[i] = rxb;
-		rxb->vid = (u16)i;
+		rxb->vid = (u16)(i + 1);
+		rxb->invalid = true;
 	}
 
 	iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL, def_rxq);
-	if (trans->cfg->mq_rx_supported) {
+
+	if (trans->cfg->mq_rx_supported)
 		iwl_pcie_rx_mq_hw_init(trans);
-	} else {
-		iwl_pcie_rxq_sq_restock(trans, def_rxq);
+	else
 		iwl_pcie_rx_hw_init(trans, def_rxq);
-	}
+
+	iwl_pcie_rxq_restock(trans, def_rxq);
 
 	spin_lock(&def_rxq->lock);
 	iwl_pcie_rxq_inc_wr_ptr(trans, def_rxq);
@@ -1087,6 +1104,9 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
 		if (pkt->len_n_flags == cpu_to_le32(FH_RSCSR_FRAME_INVALID))
 			break;
 
+		WARN_ON((le32_to_cpu(pkt->len_n_flags) & FH_RSCSR_RXQ_MASK) >>
+			FH_RSCSR_RXQ_POS != rxq->id);
+
 		IWL_DEBUG_RX(trans,
 			     "cmd at offset %d: %s (0x%.2x, seq 0x%x)\n",
 			     rxcb._offset,
@@ -1224,10 +1244,19 @@ restart:
 			 */
 			u16 vid = le32_to_cpu(rxq->used_bd[i]) & 0x0FFF;
 
-			if (WARN(vid >= ARRAY_SIZE(trans_pcie->global_table),
-				 "Invalid rxb index from HW %u\n", (u32)vid))
+			if (WARN(!vid ||
+				 vid > ARRAY_SIZE(trans_pcie->global_table),
+				 "Invalid rxb index from HW %u\n", (u32)vid)) {
+				iwl_force_nmi(trans);
+				goto out;
+			}
+			rxb = trans_pcie->global_table[vid - 1];
+			if (WARN(rxb->invalid,
+				 "Invalid rxb from HW %u\n", (u32)vid)) {
+				iwl_force_nmi(trans);
 				goto out;
-			rxb = trans_pcie->global_table[vid];
+			}
+			rxb->invalid = true;
 		} else {
 			rxb = rxq->queue[i];
 			rxq->queue[i] = NULL;
@@ -1507,7 +1536,7 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id)
 		 * have anything to service
 		 */
 		if (test_bit(STATUS_INT_ENABLED, &trans->status))
-			iwl_enable_interrupts(trans);
+			_iwl_enable_interrupts(trans);
 		spin_unlock(&trans_pcie->irq_lock);
 		lock_map_release(&trans->sync_cmd_lockdep_map);
 		return IRQ_NONE;
@@ -1699,15 +1728,17 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id)
 			 inta & ~trans_pcie->inta_mask);
 	}
 
+	spin_lock(&trans_pcie->irq_lock);
+	/* only Re-enable all interrupt if disabled by irq */
+	if (test_bit(STATUS_INT_ENABLED, &trans->status))
+		_iwl_enable_interrupts(trans);
 	/* we are loading the firmware, enable FH_TX interrupt only */
-	if (handled & CSR_INT_BIT_FH_TX)
+	else if (handled & CSR_INT_BIT_FH_TX)
 		iwl_enable_fw_load_int(trans);
-	/* only Re-enable all interrupt if disabled by irq */
-	else if (test_bit(STATUS_INT_ENABLED, &trans->status))
-		iwl_enable_interrupts(trans);
 	/* Re-enable RF_KILL if it occurred */
 	else if (handled & CSR_INT_BIT_RF_KILL)
 		iwl_enable_rfkill_int(trans);
+	spin_unlock(&trans_pcie->irq_lock);
 
 out:
 	lock_map_release(&trans->sync_cmd_lockdep_map);
@@ -1771,7 +1802,7 @@ void iwl_pcie_reset_ict(struct iwl_trans *trans)
 		return;
 
 	spin_lock(&trans_pcie->irq_lock);
-	iwl_disable_interrupts(trans);
+	_iwl_disable_interrupts(trans);
 
 	memset(trans_pcie->ict_tbl, 0, ICT_SIZE);
 
@@ -1787,7 +1818,7 @@ void iwl_pcie_reset_ict(struct iwl_trans *trans)
 	trans_pcie->use_ict = true;
 	trans_pcie->ict_index = 0;
 	iwl_write32(trans, CSR_INT, trans_pcie->inta_mask);
-	iwl_enable_interrupts(trans);
+	_iwl_enable_interrupts(trans);
 	spin_unlock(&trans_pcie->irq_lock);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index f603d7830a6b..74f2f035bd28 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -608,18 +608,10 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans)
 /*
  * ucode
  */
-static int iwl_pcie_load_firmware_chunk(struct iwl_trans *trans, u32 dst_addr,
-				   dma_addr_t phy_addr, u32 byte_cnt)
+static void iwl_pcie_load_firmware_chunk_fh(struct iwl_trans *trans,
+					    u32 dst_addr, dma_addr_t phy_addr,
+					    u32 byte_cnt)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	unsigned long flags;
-	int ret;
-
-	trans_pcie->ucode_write_complete = false;
-
-	if (!iwl_trans_grab_nic_access(trans, &flags))
-		return -EIO;
-
 	iwl_write32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(FH_SRVC_CHNL),
 		    FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE);
 
@@ -642,7 +634,50 @@ static int iwl_pcie_load_firmware_chunk(struct iwl_trans *trans, u32 dst_addr,
 		    FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
 		    FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_DISABLE |
 		    FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD);
+}
+
+static void iwl_pcie_load_firmware_chunk_tfh(struct iwl_trans *trans,
+					     u32 dst_addr, dma_addr_t phy_addr,
+					     u32 byte_cnt)
+{
+	/* Stop DMA channel */
+	iwl_write32(trans, TFH_SRV_DMA_CHNL0_CTRL, 0);
+
+	/* Configure SRAM address */
+	iwl_write32(trans, TFH_SRV_DMA_CHNL0_SRAM_ADDR,
+		    dst_addr);
+
+	/* Configure DRAM address - 64 bit */
+	iwl_write64(trans, TFH_SRV_DMA_CHNL0_DRAM_ADDR, phy_addr);
+
+	/* Configure byte count to transfer */
+	iwl_write32(trans, TFH_SRV_DMA_CHNL0_BC, byte_cnt);
 
+	/* Enable the DRAM2SRAM to start */
+	iwl_write32(trans, TFH_SRV_DMA_CHNL0_CTRL, TFH_SRV_DMA_SNOOP |
+						   TFH_SRV_DMA_TO_DRIVER |
+						   TFH_SRV_DMA_START);
+}
+
+static int iwl_pcie_load_firmware_chunk(struct iwl_trans *trans,
+					u32 dst_addr, dma_addr_t phy_addr,
+					u32 byte_cnt)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	unsigned long flags;
+	int ret;
+
+	trans_pcie->ucode_write_complete = false;
+
+	if (!iwl_trans_grab_nic_access(trans, &flags))
+		return -EIO;
+
+	if (trans->cfg->use_tfh)
+		iwl_pcie_load_firmware_chunk_tfh(trans, dst_addr, phy_addr,
+						 byte_cnt);
+	else
+		iwl_pcie_load_firmware_chunk_fh(trans, dst_addr, phy_addr,
+						byte_cnt);
 	iwl_trans_release_nic_access(trans, &flags);
 
 	ret = wait_event_timeout(trans_pcie->ucode_write_waitq,
@@ -801,6 +836,8 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans,
 
 	*first_ucode_section = last_read_idx;
 
+	iwl_enable_interrupts(trans);
+
 	if (cpu == 1)
 		iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFF);
 	else
@@ -980,6 +1017,8 @@ static int iwl_pcie_load_given_ucode(struct iwl_trans *trans,
 		iwl_pcie_apply_destination(trans);
 	}
 
+	iwl_enable_interrupts(trans);
+
 	/* release CPU reset */
 	iwl_write32(trans, CSR_RESET, 0);
 
@@ -1033,9 +1072,7 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 	was_hw_rfkill = iwl_is_rfkill_set(trans);
 
 	/* tell the device to stop sending interrupts */
-	spin_lock(&trans_pcie->irq_lock);
 	iwl_disable_interrupts(trans);
-	spin_unlock(&trans_pcie->irq_lock);
 
 	/* device going down, Stop using ICT table */
 	iwl_pcie_disable_ict(trans);
@@ -1079,9 +1116,7 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 	 * the time, unless the interrupt is ACKed even if the interrupt
 	 * should be masked. Re-ACK all the interrupts here.
 	 */
-	spin_lock(&trans_pcie->irq_lock);
 	iwl_disable_interrupts(trans);
-	spin_unlock(&trans_pcie->irq_lock);
 
 	/* clear all status bits */
 	clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
@@ -1215,7 +1250,6 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
 		ret = iwl_pcie_load_given_ucode_8000(trans, fw);
 	else
 		ret = iwl_pcie_load_given_ucode(trans, fw);
-	iwl_enable_interrupts(trans);
 
 	/* re-check RF-Kill state since we may have missed the interrupt */
 	hw_rfkill = iwl_is_rfkill_set(trans);
@@ -1286,6 +1320,8 @@ static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test,
 	iwl_clear_bit(trans, CSR_GP_CNTRL,
 		      CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
 
+	iwl_pcie_enable_rx_wake(trans, false);
+
 	if (reset) {
 		/*
 		 * reset TX queues -- some of their registers reset during S3
@@ -1311,6 +1347,8 @@ static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans,
 		return 0;
 	}
 
+	iwl_pcie_enable_rx_wake(trans, true);
+
 	/*
 	 * Also enables interrupts - none will happen as the device doesn't
 	 * know we're waking it up, only when the opmode actually tells it
@@ -1389,8 +1427,12 @@ static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie)
 
 	max_rx_vector = trans_pcie->allocated_vector - 1;
 
-	if (!trans_pcie->msix_enabled)
+	if (!trans_pcie->msix_enabled) {
+		if (trans->cfg->mq_rx_supported)
+			iwl_write_prph(trans, UREG_CHICK,
+				       UREG_CHICK_MSI_ENABLE);
 		return;
+	}
 
 	iwl_write_prph(trans, UREG_CHICK, UREG_CHICK_MSIX_ENABLE);
 
@@ -1567,15 +1609,11 @@ static void iwl_trans_pcie_op_mode_leave(struct iwl_trans *trans)
 	mutex_lock(&trans_pcie->mutex);
 
 	/* disable interrupts - don't enable HW RF kill interrupt */
-	spin_lock(&trans_pcie->irq_lock);
 	iwl_disable_interrupts(trans);
-	spin_unlock(&trans_pcie->irq_lock);
 
 	iwl_pcie_apm_stop(trans, true);
 
-	spin_lock(&trans_pcie->irq_lock);
 	iwl_disable_interrupts(trans);
-	spin_unlock(&trans_pcie->irq_lock);
 
 	iwl_pcie_disable_ict(trans);
 
@@ -1639,6 +1677,9 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans,
 	trans_pcie->scd_set_active = trans_cfg->scd_set_active;
 	trans_pcie->sw_csum_tx = trans_cfg->sw_csum_tx;
 
+	trans_pcie->page_offs = trans_cfg->cb_data_offs;
+	trans_pcie->dev_cmd_offs = trans_cfg->cb_data_offs + sizeof(void *);
+
 	trans->command_groups = trans_cfg->command_groups;
 	trans->command_groups_size = trans_cfg->command_groups_size;
 
@@ -1909,6 +1950,48 @@ static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block)
 
 #define IWL_FLUSH_WAIT_MS	2000
 
+void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	u32 scd_sram_addr;
+	u8 buf[16];
+	int cnt;
+
+	IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n",
+		txq->q.read_ptr, txq->q.write_ptr);
+
+	scd_sram_addr = trans_pcie->scd_base_addr +
+			SCD_TX_STTS_QUEUE_OFFSET(txq->q.id);
+	iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf));
+
+	iwl_print_hex_error(trans, buf, sizeof(buf));
+
+	for (cnt = 0; cnt < FH_TCSR_CHNL_NUM; cnt++)
+		IWL_ERR(trans, "FH TRBs(%d) = 0x%08x\n", cnt,
+			iwl_read_direct32(trans, FH_TX_TRB_REG(cnt)));
+
+	for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) {
+		u32 status = iwl_read_prph(trans, SCD_QUEUE_STATUS_BITS(cnt));
+		u8 fifo = (status >> SCD_QUEUE_STTS_REG_POS_TXF) & 0x7;
+		bool active = !!(status & BIT(SCD_QUEUE_STTS_REG_POS_ACTIVE));
+		u32 tbl_dw =
+			iwl_trans_read_mem32(trans, trans_pcie->scd_base_addr +
+					     SCD_TRANS_TBL_OFFSET_QUEUE(cnt));
+
+		if (cnt & 0x1)
+			tbl_dw = (tbl_dw & 0xFFFF0000) >> 16;
+		else
+			tbl_dw = tbl_dw & 0x0000FFFF;
+
+		IWL_ERR(trans,
+			"Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n",
+			cnt, active ? "" : "in", fifo, tbl_dw,
+			iwl_read_prph(trans, SCD_QUEUE_RDPTR(cnt)) &
+				(TFD_QUEUE_SIZE_MAX - 1),
+			iwl_read_prph(trans, SCD_QUEUE_WRPTR(cnt)));
+	}
+}
+
 static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -1916,8 +1999,6 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm)
 	struct iwl_queue *q;
 	int cnt;
 	unsigned long now = jiffies;
-	u32 scd_sram_addr;
-	u8 buf[16];
 	int ret = 0;
 
 	/* waiting for all the tx frames complete might take a while */
@@ -1957,42 +2038,8 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm)
 		IWL_DEBUG_TX_QUEUES(trans, "Queue %d is now empty.\n", cnt);
 	}
 
-	if (!ret)
-		return 0;
-
-	IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n",
-		txq->q.read_ptr, txq->q.write_ptr);
-
-	scd_sram_addr = trans_pcie->scd_base_addr +
-			SCD_TX_STTS_QUEUE_OFFSET(txq->q.id);
-	iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf));
-
-	iwl_print_hex_error(trans, buf, sizeof(buf));
-
-	for (cnt = 0; cnt < FH_TCSR_CHNL_NUM; cnt++)
-		IWL_ERR(trans, "FH TRBs(%d) = 0x%08x\n", cnt,
-			iwl_read_direct32(trans, FH_TX_TRB_REG(cnt)));
-
-	for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) {
-		u32 status = iwl_read_prph(trans, SCD_QUEUE_STATUS_BITS(cnt));
-		u8 fifo = (status >> SCD_QUEUE_STTS_REG_POS_TXF) & 0x7;
-		bool active = !!(status & BIT(SCD_QUEUE_STTS_REG_POS_ACTIVE));
-		u32 tbl_dw =
-			iwl_trans_read_mem32(trans, trans_pcie->scd_base_addr +
-					     SCD_TRANS_TBL_OFFSET_QUEUE(cnt));
-
-		if (cnt & 0x1)
-			tbl_dw = (tbl_dw & 0xFFFF0000) >> 16;
-		else
-			tbl_dw = tbl_dw & 0x0000FFFF;
-
-		IWL_ERR(trans,
-			"Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n",
-			cnt, active ? "" : "in", fifo, tbl_dw,
-			iwl_read_prph(trans, SCD_QUEUE_RDPTR(cnt)) &
-				(TFD_QUEUE_SIZE_MAX - 1),
-			iwl_read_prph(trans, SCD_QUEUE_WRPTR(cnt)));
-	}
+	if (ret)
+		iwl_trans_pcie_log_scd_error(trans, txq);
 
 	return ret;
 }
@@ -2741,6 +2788,8 @@ static const struct iwl_trans_ops trans_ops_pcie = {
 	.txq_disable = iwl_trans_pcie_txq_disable,
 	.txq_enable = iwl_trans_pcie_txq_enable,
 
+	.txq_set_shared_mode = iwl_trans_pcie_txq_set_shared_mode,
+
 	.wait_tx_queue_empty = iwl_trans_pcie_wait_txq_empty,
 	.freeze_txq_timer = iwl_trans_pcie_freeze_txq_timer,
 	.block_txq_ptrs = iwl_trans_pcie_block_txq_ptrs,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index d6beac9af029..18650dccdb58 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -70,6 +70,7 @@
  * Tx queue resumed.
  *
  ***************************************************/
+
 static int iwl_queue_space(const struct iwl_queue *q)
 {
 	unsigned int max;
@@ -154,10 +155,6 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data)
 	struct iwl_txq *txq = (void *)data;
 	struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
 	struct iwl_trans *trans = iwl_trans_pcie_get_trans(trans_pcie);
-	u32 scd_sram_addr = trans_pcie->scd_base_addr +
-				SCD_TX_STTS_QUEUE_OFFSET(txq->q.id);
-	u8 buf[16];
-	int i;
 
 	spin_lock(&txq->lock);
 	/* check if triggered erroneously */
@@ -169,38 +166,8 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data)
 
 	IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->q.id,
 		jiffies_to_msecs(txq->wd_timeout));
-	IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n",
-		txq->q.read_ptr, txq->q.write_ptr);
-
-	iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf));
-
-	iwl_print_hex_error(trans, buf, sizeof(buf));
-
-	for (i = 0; i < FH_TCSR_CHNL_NUM; i++)
-		IWL_ERR(trans, "FH TRBs(%d) = 0x%08x\n", i,
-			iwl_read_direct32(trans, FH_TX_TRB_REG(i)));
-
-	for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) {
-		u32 status = iwl_read_prph(trans, SCD_QUEUE_STATUS_BITS(i));
-		u8 fifo = (status >> SCD_QUEUE_STTS_REG_POS_TXF) & 0x7;
-		bool active = !!(status & BIT(SCD_QUEUE_STTS_REG_POS_ACTIVE));
-		u32 tbl_dw =
-			iwl_trans_read_mem32(trans,
-					     trans_pcie->scd_base_addr +
-					     SCD_TRANS_TBL_OFFSET_QUEUE(i));
-
-		if (i & 0x1)
-			tbl_dw = (tbl_dw & 0xFFFF0000) >> 16;
-		else
-			tbl_dw = tbl_dw & 0x0000FFFF;
 
-		IWL_ERR(trans,
-			"Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n",
-			i, active ? "" : "in", fifo, tbl_dw,
-			iwl_read_prph(trans, SCD_QUEUE_RDPTR(i)) &
-				(TFD_QUEUE_SIZE_MAX - 1),
-			iwl_read_prph(trans, SCD_QUEUE_WRPTR(i)));
-	}
+	iwl_trans_pcie_log_scd_error(trans, txq);
 
 	iwl_force_nmi(trans);
 }
@@ -393,7 +360,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 		return;
 	}
 
-	/* first TB is never freed - it's the scratchbuf data */
+	/* first TB is never freed - it's the bidirectional DMA data */
 
 	for (i = 1; i < num_tbs; i++) {
 		if (meta->flags & BIT(i + CMD_TB_BITMAP_POS))
@@ -491,7 +458,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX;
-	size_t scratchbuf_sz;
+	size_t tb0_buf_sz;
 	int i;
 
 	if (WARN_ON(txq->entries || txq->tfds))
@@ -526,17 +493,14 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
 	if (!txq->tfds)
 		goto error;
 
-	BUILD_BUG_ON(IWL_HCMD_SCRATCHBUF_SIZE != sizeof(*txq->scratchbufs));
-	BUILD_BUG_ON(offsetof(struct iwl_pcie_txq_scratch_buf, scratch) !=
-			sizeof(struct iwl_cmd_header) +
-			offsetof(struct iwl_tx_cmd, scratch));
+	BUILD_BUG_ON(IWL_FIRST_TB_SIZE_ALIGN != sizeof(*txq->first_tb_bufs));
 
-	scratchbuf_sz = sizeof(*txq->scratchbufs) * slots_num;
+	tb0_buf_sz = sizeof(*txq->first_tb_bufs) * slots_num;
 
-	txq->scratchbufs = dma_alloc_coherent(trans->dev, scratchbuf_sz,
-					      &txq->scratchbufs_dma,
+	txq->first_tb_bufs = dma_alloc_coherent(trans->dev, tb0_buf_sz,
+					      &txq->first_tb_dma,
 					      GFP_KERNEL);
-	if (!txq->scratchbufs)
+	if (!txq->first_tb_bufs)
 		goto err_free_tfds;
 
 	txq->q.id = txq_id;
@@ -578,22 +542,27 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
 	 * Tell nic where to find circular buffer of Tx Frame Descriptors for
 	 * given Tx queue, and enable the DMA channel used for that queue.
 	 * Circular buffer (TFD queue in DRAM) physical base address */
-	iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(txq_id),
-			   txq->q.dma_addr >> 8);
+	if (trans->cfg->use_tfh)
+		iwl_write_direct64(trans,
+				   FH_MEM_CBBC_QUEUE(trans, txq_id),
+				   txq->q.dma_addr);
+	else
+		iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id),
+				   txq->q.dma_addr >> 8);
 
 	return 0;
 }
 
-static void iwl_pcie_free_tso_page(struct sk_buff *skb)
+static void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie,
+				   struct sk_buff *skb)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct page **page_ptr;
 
-	if (info->driver_data[IWL_TRANS_FIRST_DRIVER_DATA]) {
-		struct page *page =
-			info->driver_data[IWL_TRANS_FIRST_DRIVER_DATA];
+	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
 
-		__free_page(page);
-		info->driver_data[IWL_TRANS_FIRST_DRIVER_DATA] = NULL;
+	if (*page_ptr) {
+		__free_page(*page_ptr);
+		*page_ptr = NULL;
 	}
 }
 
@@ -639,7 +608,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 			if (WARN_ON_ONCE(!skb))
 				continue;
 
-			iwl_pcie_free_tso_page(skb);
+			iwl_pcie_free_tso_page(trans_pcie, skb);
 		}
 		iwl_pcie_txq_free_tfd(trans, txq);
 		q->read_ptr = iwl_queue_inc_wrap(q->read_ptr);
@@ -708,8 +677,8 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
 		txq->tfds = NULL;
 
 		dma_free_coherent(dev,
-				  sizeof(*txq->scratchbufs) * txq->q.n_window,
-				  txq->scratchbufs, txq->scratchbufs_dma);
+				  sizeof(*txq->first_tb_bufs) * txq->q.n_window,
+				  txq->first_tb_bufs, txq->first_tb_dma);
 	}
 
 	kfree(txq->entries);
@@ -786,9 +755,14 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans)
 	for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
 	     txq_id++) {
 		struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-
-		iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(txq_id),
-				   txq->q.dma_addr >> 8);
+		if (trans->cfg->use_tfh)
+			iwl_write_direct64(trans,
+					   FH_MEM_CBBC_QUEUE(trans, txq_id),
+					   txq->q.dma_addr);
+		else
+			iwl_write_direct32(trans,
+					   FH_MEM_CBBC_QUEUE(trans, txq_id),
+					   txq->q.dma_addr >> 8);
 		iwl_pcie_txq_unmap(trans, txq_id);
 		txq->q.read_ptr = 0;
 		txq->q.write_ptr = 0;
@@ -996,6 +970,12 @@ int iwl_pcie_tx_init(struct iwl_trans *trans)
 		}
 	}
 
+	if (trans->cfg->use_tfh)
+		iwl_write_direct32(trans, TFH_TRANSFER_MODE,
+				   TFH_TRANSFER_MAX_PENDING_REQ |
+				   TFH_CHUNK_SIZE_128 |
+				   TFH_CHUNK_SPLIT_MODE);
+
 	iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE);
 	if (trans->cfg->base_params->num_of_queues > 20)
 		iwl_set_bits_prph(trans, SCD_GP_CTRL,
@@ -1084,7 +1064,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 		if (WARN_ON_ONCE(!skb))
 			continue;
 
-		iwl_pcie_free_tso_page(skb);
+		iwl_pcie_free_tso_page(trans_pcie, skb);
 
 		__skb_queue_tail(skbs, skb);
 
@@ -1115,17 +1095,17 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 
 		while (!skb_queue_empty(&overflow_skbs)) {
 			struct sk_buff *skb = __skb_dequeue(&overflow_skbs);
-			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-			u8 dev_cmd_idx = IWL_TRANS_FIRST_DRIVER_DATA + 1;
-			struct iwl_device_cmd *dev_cmd =
-				info->driver_data[dev_cmd_idx];
+			struct iwl_device_cmd *dev_cmd_ptr;
+
+			dev_cmd_ptr = *(void **)((u8 *)skb->cb +
+						 trans_pcie->dev_cmd_offs);
 
 			/*
 			 * Note that we can very well be overflowing again.
 			 * In that case, iwl_queue_space will be small again
 			 * and we won't wake mac80211's queue.
 			 */
-			iwl_trans_pcie_tx(trans, skb, dev_cmd, txq_id);
+			iwl_trans_pcie_tx(trans, skb, dev_cmd_ptr, txq_id);
 		}
 		spin_lock_bh(&txq->lock);
 
@@ -1354,6 +1334,15 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
 	txq->active = true;
 }
 
+void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
+					bool shared_mode)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
+
+	txq->ampdu = !shared_mode;
+}
+
 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
 				bool configure_scd)
 {
@@ -1413,7 +1402,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	void *dup_buf = NULL;
 	dma_addr_t phys_addr;
 	int idx;
-	u16 copy_size, cmd_size, scratch_size;
+	u16 copy_size, cmd_size, tb0_size;
 	bool had_nocopy = false;
 	u8 group_id = iwl_cmd_groupid(cmd->id);
 	int i, ret;
@@ -1444,9 +1433,9 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		if (!cmd->len[i])
 			continue;
 
-		/* need at least IWL_HCMD_SCRATCHBUF_SIZE copied */
-		if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
-			int copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
+		/* need at least IWL_FIRST_TB_SIZE copied */
+		if (copy_size < IWL_FIRST_TB_SIZE) {
+			int copy = IWL_FIRST_TB_SIZE - copy_size;
 
 			if (copy > cmdlen[i])
 				copy = cmdlen[i];
@@ -1567,8 +1556,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		}
 
 		/*
-		 * Otherwise we need at least IWL_HCMD_SCRATCHBUF_SIZE copied
-		 * in total (for the scratchbuf handling), but copy up to what
+		 * Otherwise we need at least IWL_FIRST_TB_SIZE copied
+		 * in total (for bi-directional DMA), but copy up to what
 		 * we can fit into the payload for debug dump purposes.
 		 */
 		copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]);
@@ -1577,8 +1566,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		cmd_pos += copy;
 
 		/* However, treat copy_size the proper way, we need it below */
-		if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
-			copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
+		if (copy_size < IWL_FIRST_TB_SIZE) {
+			copy = IWL_FIRST_TB_SIZE - copy_size;
 
 			if (copy > cmd->len[i])
 				copy = cmd->len[i];
@@ -1593,18 +1582,18 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		     le16_to_cpu(out_cmd->hdr.sequence),
 		     cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue);
 
-	/* start the TFD with the scratchbuf */
-	scratch_size = min_t(int, copy_size, IWL_HCMD_SCRATCHBUF_SIZE);
-	memcpy(&txq->scratchbufs[q->write_ptr], &out_cmd->hdr, scratch_size);
+	/* start the TFD with the minimum copy bytes */
+	tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
+	memcpy(&txq->first_tb_bufs[idx], &out_cmd->hdr, tb0_size);
 	iwl_pcie_txq_build_tfd(trans, txq,
-			       iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr),
-			       scratch_size, true);
+			       iwl_pcie_get_first_tb_dma(txq, idx),
+			       tb0_size, true);
 
 	/* map first command fragment, if any remains */
-	if (copy_size > scratch_size) {
+	if (copy_size > tb0_size) {
 		phys_addr = dma_map_single(trans->dev,
-					   ((u8 *)&out_cmd->hdr) + scratch_size,
-					   copy_size - scratch_size,
+					   ((u8 *)&out_cmd->hdr) + tb0_size,
+					   copy_size - tb0_size,
 					   DMA_TO_DEVICE);
 		if (dma_mapping_error(trans->dev, phys_addr)) {
 			iwl_pcie_tfd_unmap(trans, out_meta,
@@ -1614,7 +1603,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		}
 
 		iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
-				       copy_size - scratch_size, false);
+				       copy_size - tb0_size, false);
 	}
 
 	/* map the remaining (adjusted) nocopy/dup fragments */
@@ -1959,7 +1948,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
 	trace_iwlwifi_dev_tx(trans->dev, skb,
 			     &txq->tfds[txq->q.write_ptr],
 			     sizeof(struct iwl_tfd),
-			     &dev_cmd->hdr, IWL_HCMD_SCRATCHBUF_SIZE + tb1_len,
+			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
 			     skb->data + hdr_len, tb2_len);
 	trace_iwlwifi_dev_tx_data(trans->dev, skb,
 				  hdr_len, skb->len - hdr_len);
@@ -2015,7 +2004,6 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 				   struct iwl_cmd_meta *out_meta,
 				   struct iwl_device_cmd *dev_cmd, u16 tb1_len)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
@@ -2024,6 +2012,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 	u16 length, iv_len, amsdu_pad;
 	u8 *start_hdr;
 	struct iwl_tso_hdr_page *hdr_page;
+	struct page **page_ptr;
 	int ret;
 	struct tso_t tso;
 
@@ -2035,7 +2024,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 	trace_iwlwifi_dev_tx(trans->dev, skb,
 			     &txq->tfds[txq->q.write_ptr],
 			     sizeof(struct iwl_tfd),
-			     &dev_cmd->hdr, IWL_HCMD_SCRATCHBUF_SIZE + tb1_len,
+			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
 			     NULL, 0);
 
 	ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
@@ -2054,7 +2043,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 
 	get_page(hdr_page->page);
 	start_hdr = hdr_page->pos;
-	info->driver_data[IWL_TRANS_FIRST_DRIVER_DATA] = hdr_page->page;
+	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
+	*page_ptr = hdr_page->page;
 	memcpy(hdr_page->pos, skb->data + hdr_len, iv_len);
 	hdr_page->pos += iv_len;
 
@@ -2264,10 +2254,12 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 
 		/* don't put the packet on the ring, if there is no room */
 		if (unlikely(iwl_queue_space(q) < 3)) {
-			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+			struct iwl_device_cmd **dev_cmd_ptr;
+
+			dev_cmd_ptr = (void *)((u8 *)skb->cb +
+					       trans_pcie->dev_cmd_offs);
 
-			info->driver_data[IWL_TRANS_FIRST_DRIVER_DATA + 1] =
-				dev_cmd;
+			*dev_cmd_ptr = dev_cmd;
 			__skb_queue_tail(&txq->overflow_q, skb);
 
 			spin_unlock(&txq->lock);
@@ -2294,7 +2286,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 		cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
 			    INDEX_TO_SEQ(q->write_ptr)));
 
-	tb0_phys = iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr);
+	tb0_phys = iwl_pcie_get_first_tb_dma(txq, q->write_ptr);
 	scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
 		       offsetof(struct iwl_tx_cmd, scratch);
 
@@ -2312,7 +2304,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	 * setup of the first TB)
 	 */
 	len = sizeof(struct iwl_tx_cmd) + sizeof(struct iwl_cmd_header) +
-	      hdr_len - IWL_HCMD_SCRATCHBUF_SIZE;
+	      hdr_len - IWL_FIRST_TB_SIZE;
 	/* do not align A-MSDU to dword as the subframe header aligns it */
 	amsdu = ieee80211_is_data_qos(fc) &&
 		(*ieee80211_get_qos_ctl(hdr) &
@@ -2326,17 +2318,17 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 		tb1_len = len;
 	}
 
-	/* The first TB points to the scratchbuf data - min_copy bytes */
-	memcpy(&txq->scratchbufs[q->write_ptr], &dev_cmd->hdr,
-	       IWL_HCMD_SCRATCHBUF_SIZE);
+	/* The first TB points to bi-directional DMA data */
+	memcpy(&txq->first_tb_bufs[q->write_ptr], &dev_cmd->hdr,
+	       IWL_FIRST_TB_SIZE);
 	iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
-			       IWL_HCMD_SCRATCHBUF_SIZE, true);
+			       IWL_FIRST_TB_SIZE, true);
 
 	/* there must be data left over for TB1 or this code must be changed */
-	BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_HCMD_SCRATCHBUF_SIZE);
+	BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_FIRST_TB_SIZE);
 
 	/* map the data for TB1 */
-	tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_HCMD_SCRATCHBUF_SIZE;
+	tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
 	tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
 		goto out_err;
diff --git a/drivers/net/wireless/intersil/orinoco/scan.c b/drivers/net/wireless/intersil/orinoco/scan.c
index d0ceb06c72d0..6d1d084854fb 100644
--- a/drivers/net/wireless/intersil/orinoco/scan.c
+++ b/drivers/net/wireless/intersil/orinoco/scan.c
@@ -237,7 +237,11 @@ void orinoco_add_hostscan_results(struct orinoco_private *priv,
 
  scan_abort:
 	if (priv->scan_request) {
-		cfg80211_scan_done(priv->scan_request, abort);
+		struct cfg80211_scan_info info = {
+			.aborted = abort,
+		};
+
+		cfg80211_scan_done(priv->scan_request, &info);
 		priv->scan_request = NULL;
 	}
 }
@@ -245,7 +249,11 @@ void orinoco_add_hostscan_results(struct orinoco_private *priv,
 void orinoco_scan_done(struct orinoco_private *priv, bool abort)
 {
 	if (priv->scan_request) {
-		cfg80211_scan_done(priv->scan_request, abort);
+		struct cfg80211_scan_info info = {
+			.aborted = abort,
+		};
+
+		cfg80211_scan_done(priv->scan_request, &info);
 		priv->scan_request = NULL;
 	}
 }
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 4dd5adcdd29b..8c35ac838fce 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -30,6 +30,8 @@
 #include <linux/module.h>
 #include <linux/ktime.h>
 #include <net/genetlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include "mac80211_hwsim.h"
 
 #define WARN_QUEUE 100
@@ -39,8 +41,6 @@ MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("Software simulator of 802.11 radio(s) for mac80211");
 MODULE_LICENSE("GPL");
 
-static u32 wmediumd_portid;
-
 static int radios = 2;
 module_param(radios, int, 0444);
 MODULE_PARM_DESC(radios, "Number of simulated radios");
@@ -250,6 +250,43 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 	cp->magic = 0;
 }
 
+static int hwsim_net_id;
+
+static int hwsim_netgroup;
+
+struct hwsim_net {
+	int netgroup;
+	u32 wmediumd;
+};
+
+static inline int hwsim_net_get_netgroup(struct net *net)
+{
+	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
+
+	return hwsim_net->netgroup;
+}
+
+static inline void hwsim_net_set_netgroup(struct net *net)
+{
+	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
+
+	hwsim_net->netgroup = hwsim_netgroup++;
+}
+
+static inline u32 hwsim_net_get_wmediumd(struct net *net)
+{
+	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
+
+	return hwsim_net->wmediumd;
+}
+
+static inline void hwsim_net_set_wmediumd(struct net *net, u32 portid)
+{
+	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
+
+	hwsim_net->wmediumd = portid;
+}
+
 static struct class *hwsim_class;
 
 static struct net_device *hwsim_mon; /* global monitor netdev */
@@ -420,10 +457,6 @@ static const struct ieee80211_iface_limit hwsim_if_limits[] = {
 	{ .max = 1, .types = BIT(NL80211_IFTYPE_P2P_DEVICE) }
 };
 
-static const struct ieee80211_iface_limit hwsim_if_dfs_limits[] = {
-	{ .max = 8, .types = BIT(NL80211_IFTYPE_AP) },
-};
-
 static const struct ieee80211_iface_combination hwsim_if_comb[] = {
 	{
 		.limits = hwsim_if_limits,
@@ -431,18 +464,12 @@ static const struct ieee80211_iface_combination hwsim_if_comb[] = {
 		.n_limits = ARRAY_SIZE(hwsim_if_limits) - 1,
 		.max_interfaces = 2048,
 		.num_different_channels = 1,
-	},
-	{
-		.limits = hwsim_if_dfs_limits,
-		.n_limits = ARRAY_SIZE(hwsim_if_dfs_limits),
-		.max_interfaces = 8,
-		.num_different_channels = 1,
 		.radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) |
 				       BIT(NL80211_CHAN_WIDTH_20) |
 				       BIT(NL80211_CHAN_WIDTH_40) |
 				       BIT(NL80211_CHAN_WIDTH_80) |
 				       BIT(NL80211_CHAN_WIDTH_160),
-	}
+	},
 };
 
 static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = {
@@ -451,18 +478,12 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = {
 		.n_limits = ARRAY_SIZE(hwsim_if_limits),
 		.max_interfaces = 2048,
 		.num_different_channels = 1,
-	},
-	{
-		.limits = hwsim_if_dfs_limits,
-		.n_limits = ARRAY_SIZE(hwsim_if_dfs_limits),
-		.max_interfaces = 8,
-		.num_different_channels = 1,
 		.radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) |
 				       BIT(NL80211_CHAN_WIDTH_20) |
 				       BIT(NL80211_CHAN_WIDTH_40) |
 				       BIT(NL80211_CHAN_WIDTH_80) |
 				       BIT(NL80211_CHAN_WIDTH_160),
-	}
+	},
 };
 
 static spinlock_t hwsim_radio_lock;
@@ -526,6 +547,11 @@ struct mac80211_hwsim_data {
 	 */
 	u64 group;
 
+	/* group shared by radios created in the same netns */
+	int netgroup;
+	/* wmediumd portid responsible for netgroup of this radio */
+	u32 wmediumd;
+
 	int power_level;
 
 	/* difference between this hw's clock and the real clock, in usecs */
@@ -568,6 +594,7 @@ static struct genl_family hwsim_genl_family = {
 	.name = "MAC80211_HWSIM",
 	.version = 1,
 	.maxattr = HWSIM_ATTR_MAX,
+	.netnsok = true,
 };
 
 enum hwsim_multicast_groups {
@@ -955,6 +982,29 @@ static bool hwsim_ps_rx_ok(struct mac80211_hwsim_data *data,
 	return true;
 }
 
+static int hwsim_unicast_netgroup(struct mac80211_hwsim_data *data,
+				  struct sk_buff *skb, int portid)
+{
+	struct net *net;
+	bool found = false;
+	int res = -ENOENT;
+
+	rcu_read_lock();
+	for_each_net_rcu(net) {
+		if (data->netgroup == hwsim_net_get_netgroup(net)) {
+			res = genlmsg_unicast(net, skb, portid);
+			found = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	if (!found)
+		nlmsg_free(skb);
+
+	return res;
+}
+
 static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
 				       struct sk_buff *my_skb,
 				       int dst_portid)
@@ -1034,7 +1084,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
 		goto nla_put_failure;
 
 	genlmsg_end(skb, msg_head);
-	if (genlmsg_unicast(&init_net, skb, dst_portid))
+	if (hwsim_unicast_netgroup(data, skb, dst_portid))
 		goto err_free_txskb;
 
 	/* Enqueue the packet */
@@ -1202,6 +1252,9 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw,
 		if (!(data->group & data2->group))
 			continue;
 
+		if (data->netgroup != data2->netgroup)
+			continue;
+
 		if (!hwsim_chans_compat(chan, data2->tmp_chan) &&
 		    !hwsim_chans_compat(chan, data2->channel)) {
 			ieee80211_iterate_active_interfaces_atomic(
@@ -1324,7 +1377,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
 	mac80211_hwsim_monitor_rx(hw, skb, channel);
 
 	/* wmediumd mode check */
-	_portid = ACCESS_ONCE(wmediumd_portid);
+	_portid = ACCESS_ONCE(data->wmediumd);
 
 	if (_portid)
 		return mac80211_hwsim_tx_frame_nl(hw, skb, _portid);
@@ -1420,7 +1473,8 @@ static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 				    struct sk_buff *skb,
 				    struct ieee80211_channel *chan)
 {
-	u32 _pid = ACCESS_ONCE(wmediumd_portid);
+	struct mac80211_hwsim_data *data = hw->priv;
+	u32 _pid = ACCESS_ONCE(data->wmediumd);
 
 	if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) {
 		struct ieee80211_tx_info *txi = IEEE80211_SKB_CB(skb);
@@ -1887,8 +1941,12 @@ static void hw_scan_work(struct work_struct *work)
 
 	mutex_lock(&hwsim->mutex);
 	if (hwsim->scan_chan_idx >= req->n_channels) {
+		struct cfg80211_scan_info info = {
+			.aborted = false,
+		};
+
 		wiphy_debug(hwsim->hw->wiphy, "hw scan complete\n");
-		ieee80211_scan_completed(hwsim->hw, false);
+		ieee80211_scan_completed(hwsim->hw, &info);
 		hwsim->hw_scan_request = NULL;
 		hwsim->hw_scan_vif = NULL;
 		hwsim->tmp_chan = NULL;
@@ -1973,13 +2031,16 @@ static void mac80211_hwsim_cancel_hw_scan(struct ieee80211_hw *hw,
 					  struct ieee80211_vif *vif)
 {
 	struct mac80211_hwsim_data *hwsim = hw->priv;
+	struct cfg80211_scan_info info = {
+		.aborted = true,
+	};
 
 	wiphy_debug(hw->wiphy, "hwsim cancel_hw_scan\n");
 
 	cancel_delayed_work_sync(&hwsim->hw_scan);
 
 	mutex_lock(&hwsim->mutex);
-	ieee80211_scan_completed(hwsim->hw, true);
+	ieee80211_scan_completed(hwsim->hw, &info);
 	hwsim->tmp_chan = NULL;
 	hwsim->hw_scan_request = NULL;
 	hwsim->hw_scan_vif = NULL;
@@ -2349,6 +2410,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	struct ieee80211_hw *hw;
 	enum nl80211_band band;
 	const struct ieee80211_ops *ops = &mac80211_hwsim_ops;
+	struct net *net;
 	int idx;
 
 	if (WARN_ON(param->channels > 1 && !param->use_chanctx))
@@ -2366,6 +2428,13 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 		err = -ENOMEM;
 		goto failed;
 	}
+
+	if (info)
+		net = genl_info_net(info);
+	else
+		net = &init_net;
+	wiphy_net_set(hw->wiphy, net);
+
 	data = hw->priv;
 	data->hw = hw;
 
@@ -2409,13 +2478,14 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 		hw->wiphy->max_scan_ssids = 255;
 		hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
 		hw->wiphy->max_remain_on_channel_duration = 1000;
-		/* For channels > 1 DFS is not allowed */
-		hw->wiphy->n_iface_combinations = 1;
 		hw->wiphy->iface_combinations = &data->if_combination;
 		if (param->p2p_device)
 			data->if_combination = hwsim_if_comb_p2p_dev[0];
 		else
 			data->if_combination = hwsim_if_comb[0];
+		hw->wiphy->n_iface_combinations = 1;
+		/* For channels > 1 DFS is not allowed */
+		data->if_combination.radar_detect_widths = 0;
 		data->if_combination.num_different_channels = data->channels;
 	} else if (param->p2p_device) {
 		hw->wiphy->iface_combinations = hwsim_if_comb_p2p_dev;
@@ -2541,6 +2611,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	data->group = 1;
 	mutex_init(&data->mutex);
 
+	data->netgroup = hwsim_net_get_netgroup(net);
+
 	/* Enable frame retransmissions for lossy channels */
 	hw->max_rates = 4;
 	hw->max_rate_tries = 11;
@@ -2755,6 +2827,20 @@ static struct mac80211_hwsim_data *get_hwsim_data_ref_from_addr(const u8 *addr)
 	return data;
 }
 
+static void hwsim_register_wmediumd(struct net *net, u32 portid)
+{
+	struct mac80211_hwsim_data *data;
+
+	hwsim_net_set_wmediumd(net, portid);
+
+	spin_lock_bh(&hwsim_radio_lock);
+	list_for_each_entry(data, &hwsim_radios, list) {
+		if (data->netgroup == hwsim_net_get_netgroup(net))
+			data->wmediumd = portid;
+	}
+	spin_unlock_bh(&hwsim_radio_lock);
+}
+
 static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
 					   struct genl_info *info)
 {
@@ -2770,9 +2856,6 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
 	int i;
 	bool found = false;
 
-	if (info->snd_portid != wmediumd_portid)
-		return -EINVAL;
-
 	if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] ||
 	    !info->attrs[HWSIM_ATTR_FLAGS] ||
 	    !info->attrs[HWSIM_ATTR_COOKIE] ||
@@ -2788,6 +2871,12 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
 	if (!data2)
 		goto out;
 
+	if (hwsim_net_get_netgroup(genl_info_net(info)) != data2->netgroup)
+		goto out;
+
+	if (info->snd_portid != data2->wmediumd)
+		goto out;
+
 	/* look for the skb matching the cookie passed back from user */
 	skb_queue_walk_safe(&data2->pending, skb, tmp) {
 		u64 skb_cookie;
@@ -2851,9 +2940,6 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 	void *frame_data;
 	struct sk_buff *skb = NULL;
 
-	if (info->snd_portid != wmediumd_portid)
-		return -EINVAL;
-
 	if (!info->attrs[HWSIM_ATTR_ADDR_RECEIVER] ||
 	    !info->attrs[HWSIM_ATTR_FRAME] ||
 	    !info->attrs[HWSIM_ATTR_RX_RATE] ||
@@ -2879,6 +2965,12 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 	if (!data2)
 		goto out;
 
+	if (hwsim_net_get_netgroup(genl_info_net(info)) != data2->netgroup)
+		goto out;
+
+	if (info->snd_portid != data2->wmediumd)
+		goto out;
+
 	/* check if radio is configured properly */
 
 	if (data2->idle || !data2->started)
@@ -2925,6 +3017,7 @@ out:
 static int hwsim_register_received_nl(struct sk_buff *skb_2,
 				      struct genl_info *info)
 {
+	struct net *net = genl_info_net(info);
 	struct mac80211_hwsim_data *data;
 	int chans = 1;
 
@@ -2941,10 +3034,10 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2,
 	if (chans > 1)
 		return -EOPNOTSUPP;
 
-	if (wmediumd_portid)
+	if (hwsim_net_get_wmediumd(net))
 		return -EBUSY;
 
-	wmediumd_portid = info->snd_portid;
+	hwsim_register_wmediumd(net, info->snd_portid);
 
 	printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, "
 	       "switching to wmediumd mode with pid %d\n", info->snd_portid);
@@ -3014,6 +3107,9 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 				continue;
 		}
 
+		if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
+			continue;
+
 		list_del(&data->list);
 		spin_unlock_bh(&hwsim_radio_lock);
 		mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
@@ -3040,6 +3136,9 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		if (data->idx != idx)
 			continue;
 
+		if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
+			continue;
+
 		skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 		if (!skb) {
 			res = -ENOMEM;
@@ -3079,6 +3178,9 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb,
 		if (data->idx < idx)
 			continue;
 
+		if (!net_eq(wiphy_net(data->hw->wiphy), sock_net(skb->sk)))
+			continue;
+
 		res = mac80211_hwsim_get_radio(skb, data,
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq, cb,
@@ -3102,7 +3204,7 @@ static const struct genl_ops hwsim_ops[] = {
 		.cmd = HWSIM_CMD_REGISTER,
 		.policy = hwsim_genl_policy,
 		.doit = hwsim_register_received_nl,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 	},
 	{
 		.cmd = HWSIM_CMD_FRAME,
@@ -3118,13 +3220,13 @@ static const struct genl_ops hwsim_ops[] = {
 		.cmd = HWSIM_CMD_NEW_RADIO,
 		.policy = hwsim_genl_policy,
 		.doit = hwsim_new_radio_nl,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 	},
 	{
 		.cmd = HWSIM_CMD_DEL_RADIO,
 		.policy = hwsim_genl_policy,
 		.doit = hwsim_del_radio_nl,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 	},
 	{
 		.cmd = HWSIM_CMD_GET_RADIO,
@@ -3168,10 +3270,10 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb,
 
 	remove_user_radios(notify->portid);
 
-	if (notify->portid == wmediumd_portid) {
+	if (notify->portid == hwsim_net_get_wmediumd(notify->net)) {
 		printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink"
 		       " socket, switching to perfect channel medium\n");
-		wmediumd_portid = 0;
+		hwsim_register_wmediumd(notify->net, 0);
 	}
 	return NOTIFY_DONE;
 
@@ -3206,6 +3308,40 @@ failure:
 	return -EINVAL;
 }
 
+static __net_init int hwsim_init_net(struct net *net)
+{
+	hwsim_net_set_netgroup(net);
+
+	return 0;
+}
+
+static void __net_exit hwsim_exit_net(struct net *net)
+{
+	struct mac80211_hwsim_data *data, *tmp;
+
+	spin_lock_bh(&hwsim_radio_lock);
+	list_for_each_entry_safe(data, tmp, &hwsim_radios, list) {
+		if (!net_eq(wiphy_net(data->hw->wiphy), net))
+			continue;
+
+		/* Radios created in init_net are returned to init_net. */
+		if (data->netgroup == hwsim_net_get_netgroup(&init_net))
+			continue;
+
+		list_del(&data->list);
+		INIT_WORK(&data->destroy_work, destroy_radio);
+		schedule_work(&data->destroy_work);
+	}
+	spin_unlock_bh(&hwsim_radio_lock);
+}
+
+static struct pernet_operations hwsim_net_ops = {
+	.init = hwsim_init_net,
+	.exit = hwsim_exit_net,
+	.id   = &hwsim_net_id,
+	.size = sizeof(struct hwsim_net),
+};
+
 static void hwsim_exit_netlink(void)
 {
 	/* unregister the notifier */
@@ -3242,10 +3378,14 @@ static int __init init_mac80211_hwsim(void)
 	spin_lock_init(&hwsim_radio_lock);
 	INIT_LIST_HEAD(&hwsim_radios);
 
-	err = platform_driver_register(&mac80211_hwsim_driver);
+	err = register_pernet_device(&hwsim_net_ops);
 	if (err)
 		return err;
 
+	err = platform_driver_register(&mac80211_hwsim_driver);
+	if (err)
+		goto out_unregister_pernet;
+
 	hwsim_class = class_create(THIS_MODULE, "mac80211_hwsim");
 	if (IS_ERR(hwsim_class)) {
 		err = PTR_ERR(hwsim_class);
@@ -3363,6 +3503,8 @@ out_free_radios:
 	mac80211_hwsim_free();
 out_unregister_driver:
 	platform_driver_unregister(&mac80211_hwsim_driver);
+out_unregister_pernet:
+	unregister_pernet_device(&hwsim_net_ops);
 	return err;
 }
 module_init(init_mac80211_hwsim);
@@ -3376,5 +3518,6 @@ static void __exit exit_mac80211_hwsim(void)
 	mac80211_hwsim_free();
 	unregister_netdev(hwsim_mon);
 	platform_driver_unregister(&mac80211_hwsim_driver);
+	unregister_pernet_device(&hwsim_net_ops);
 }
 module_exit(exit_mac80211_hwsim);
diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c
index 776b44bfd93a..7ff2efadceca 100644
--- a/drivers/net/wireless/marvell/libertas/cfg.c
+++ b/drivers/net/wireless/marvell/libertas/cfg.c
@@ -796,10 +796,15 @@ void lbs_scan_done(struct lbs_private *priv)
 {
 	WARN_ON(!priv->scan_req);
 
-	if (priv->internal_scan)
+	if (priv->internal_scan) {
 		kfree(priv->scan_req);
-	else
-		cfg80211_scan_done(priv->scan_req, false);
+	} else {
+		struct cfg80211_scan_info info = {
+			.aborted = false,
+		};
+
+		cfg80211_scan_done(priv->scan_req, &info);
+	}
 
 	priv->scan_req = NULL;
 }
@@ -2039,8 +2044,8 @@ static int lbs_leave_ibss(struct wiphy *wiphy, struct net_device *dev)
 
 
 
-int lbs_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
-		       bool enabled, int timeout)
+static int lbs_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
+			      bool enabled, int timeout)
 {
 	struct lbs_private *priv = wiphy_priv(wiphy);
 
diff --git a/drivers/net/wireless/marvell/libertas/cmdresp.c b/drivers/net/wireless/marvell/libertas/cmdresp.c
index c95bf6dc9522..c753e36c2c0e 100644
--- a/drivers/net/wireless/marvell/libertas/cmdresp.c
+++ b/drivers/net/wireless/marvell/libertas/cmdresp.c
@@ -27,6 +27,8 @@
 void lbs_mac_event_disconnected(struct lbs_private *priv,
 				bool locally_generated)
 {
+	unsigned long flags;
+
 	if (priv->connect_status != LBS_CONNECTED)
 		return;
 
@@ -46,9 +48,11 @@ void lbs_mac_event_disconnected(struct lbs_private *priv,
 	netif_carrier_off(priv->dev);
 
 	/* Free Tx and Rx packets */
+	spin_lock_irqsave(&priv->driver_lock, flags);
 	kfree_skb(priv->currenttxskb);
 	priv->currenttxskb = NULL;
 	priv->tx_pending_len = 0;
+	spin_unlock_irqrestore(&priv->driver_lock, flags);
 
 	priv->connect_status = LBS_DISCONNECTED;
 
diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c
index 13eae9ff8c35..47f4a14c84fe 100644
--- a/drivers/net/wireless/marvell/libertas/if_sdio.c
+++ b/drivers/net/wireless/marvell/libertas/if_sdio.c
@@ -1228,7 +1228,7 @@ static int if_sdio_probe(struct sdio_func *func,
 	}
 
 	spin_lock_init(&card->lock);
-	card->workqueue = create_workqueue("libertas_sdio");
+	card->workqueue = alloc_workqueue("libertas_sdio", WQ_MEM_RECLAIM, 0);
 	INIT_WORK(&card->packet_worker, if_sdio_host_to_card_worker);
 	init_waitqueue_head(&card->pwron_waitq);
 
@@ -1326,7 +1326,6 @@ static void if_sdio_remove(struct sdio_func *func)
 	lbs_stop_card(card->priv);
 	lbs_remove_card(card->priv);
 
-	flush_workqueue(card->workqueue);
 	destroy_workqueue(card->workqueue);
 
 	while (card->packets) {
diff --git a/drivers/net/wireless/marvell/libertas/if_spi.c b/drivers/net/wireless/marvell/libertas/if_spi.c
index 82c0796377aa..c3a53cd6988e 100644
--- a/drivers/net/wireless/marvell/libertas/if_spi.c
+++ b/drivers/net/wireless/marvell/libertas/if_spi.c
@@ -1180,7 +1180,7 @@ static int if_spi_probe(struct spi_device *spi)
 	priv->fw_ready = 1;
 
 	/* Initialize interrupt handling stuff. */
-	card->workqueue = create_workqueue("libertas_spi");
+	card->workqueue = alloc_workqueue("libertas_spi", WQ_MEM_RECLAIM, 0);
 	INIT_WORK(&card->packet_work, if_spi_host_to_card_worker);
 	INIT_WORK(&card->resume_work, if_spi_resume_worker);
 
@@ -1208,7 +1208,6 @@ static int if_spi_probe(struct spi_device *spi)
 release_irq:
 	free_irq(spi->irq, card);
 terminate_workqueue:
-	flush_workqueue(card->workqueue);
 	destroy_workqueue(card->workqueue);
 	lbs_remove_card(priv); /* will call free_netdev */
 free_card:
@@ -1235,7 +1234,6 @@ static int libertas_spi_remove(struct spi_device *spi)
 	lbs_remove_card(priv); /* will call free_netdev */
 
 	free_irq(spi->irq, card);
-	flush_workqueue(card->workqueue);
 	destroy_workqueue(card->workqueue);
 	if (card->pdata->teardown)
 		card->pdata->teardown(spi);
diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c
index 0bf8916a02cf..54e426c1e405 100644
--- a/drivers/net/wireless/marvell/libertas_tf/main.c
+++ b/drivers/net/wireless/marvell/libertas_tf/main.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include "libertas_tf.h"
 
-#define DRIVER_RELEASE_VERSION "004.p0"
 /* thinfirm version: 5.132.X.pX */
 #define LBTF_FW_VER_MIN		0x05840300
 #define LBTF_FW_VER_MAX		0x0584ffff
@@ -27,12 +26,6 @@ unsigned int lbtf_debug;
 EXPORT_SYMBOL_GPL(lbtf_debug);
 module_param_named(libertas_tf_debug, lbtf_debug, int, 0644);
 
-static const char lbtf_driver_version[] = "THINFIRM-USB8388-" DRIVER_RELEASE_VERSION
-#ifdef DEBUG
-	"-dbg"
-#endif
-	"";
-
 struct workqueue_struct *lbtf_wq;
 
 static const struct ieee80211_channel lbtf_channels[] = {
@@ -742,7 +735,7 @@ EXPORT_SYMBOL_GPL(lbtf_bcn_sent);
 static int __init lbtf_init_module(void)
 {
 	lbtf_deb_enter(LBTF_DEB_MAIN);
-	lbtf_wq = create_workqueue("libertastf");
+	lbtf_wq = alloc_workqueue("libertastf", WQ_MEM_RECLAIM, 0);
 	if (lbtf_wq == NULL) {
 		printk(KERN_ERR "libertastf: couldn't create workqueue\n");
 		return -ENOMEM;
diff --git a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
index 1efef3b8273d..dc49c3de1f25 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
@@ -184,7 +184,7 @@ mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv,
 
 	tx_info_src = MWIFIEX_SKB_TXCB(skb_src);
 	skb_aggr = mwifiex_alloc_dma_align_buf(adapter->tx_buf_size,
-					       GFP_ATOMIC | GFP_DMA);
+					       GFP_ATOMIC);
 	if (!skb_aggr) {
 		spin_unlock_irqrestore(&priv->wmm.ra_list_spinlock,
 				       ra_list_flags);
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index ff948a922222..a8ff969c95c2 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -376,6 +376,29 @@ mwifiex_cfg80211_set_tx_power(struct wiphy *wiphy,
 	return mwifiex_set_tx_power(priv, &power_cfg);
 }
 
+/*
+ * CFG802.11 operation handler to get Tx power.
+ */
+static int
+mwifiex_cfg80211_get_tx_power(struct wiphy *wiphy,
+			      struct wireless_dev *wdev,
+			      int *dbm)
+{
+	struct mwifiex_adapter *adapter = mwifiex_cfg80211_get_adapter(wiphy);
+	struct mwifiex_private *priv = mwifiex_get_priv(adapter,
+							MWIFIEX_BSS_ROLE_ANY);
+	int ret = mwifiex_send_cmd(priv, HostCmd_CMD_RF_TX_PWR,
+				   HostCmd_ACT_GEN_GET, 0, NULL, true);
+
+	if (ret < 0)
+		return ret;
+
+	/* tx_power_level is set in HostCmd_CMD_RF_TX_PWR command handler */
+	*dbm = priv->tx_power_level;
+
+	return 0;
+}
+
 /*
  * CFG802.11 operation handler to set Power Save option.
  *
@@ -1672,6 +1695,9 @@ static int mwifiex_cfg80211_change_beacon(struct wiphy *wiphy,
 					  struct cfg80211_beacon_data *data)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
+	struct mwifiex_adapter *adapter = priv->adapter;
+
+	mwifiex_cancel_scan(adapter);
 
 	if (GET_BSS_ROLE(priv) != MWIFIEX_BSS_ROLE_UAP) {
 		mwifiex_dbg(priv->adapter, ERROR,
@@ -1804,6 +1830,21 @@ mwifiex_cfg80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant)
 				HostCmd_ACT_GEN_SET, 0, &ant_cfg, true);
 }
 
+static int
+mwifiex_cfg80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant)
+{
+	struct mwifiex_adapter *adapter = mwifiex_cfg80211_get_adapter(wiphy);
+	struct mwifiex_private *priv = mwifiex_get_priv(adapter,
+							MWIFIEX_BSS_ROLE_ANY);
+	mwifiex_send_cmd(priv, HostCmd_CMD_RF_ANTENNA,
+			 HostCmd_ACT_GEN_GET, 0, NULL, true);
+
+	*tx_ant = priv->tx_ant;
+	*rx_ant = priv->rx_ant;
+
+	return 0;
+}
+
 /* cfg80211 operation handler for stop ap.
  * Function stops BSS running at uAP interface.
  */
@@ -1895,10 +1936,9 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy,
 	mwifiex_set_uap_rates(bss_cfg, params);
 
 	if (mwifiex_set_secure_params(priv, bss_cfg, params)) {
-		kfree(bss_cfg);
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "Failed to parse secuirty parameters!\n");
-		return -1;
+		goto out;
 	}
 
 	mwifiex_set_ht_params(priv, bss_cfg, params);
@@ -1927,7 +1967,7 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy,
 		if (mwifiex_11h_activate(priv, false)) {
 			mwifiex_dbg(priv->adapter, ERROR,
 				    "Failed to disable 11h extensions!!");
-			return -1;
+			goto out;
 		}
 		priv->state_11h.is_11h_active = false;
 	}
@@ -1935,12 +1975,11 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy,
 	if (mwifiex_config_start_uap(priv, bss_cfg)) {
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "Failed to start AP\n");
-		kfree(bss_cfg);
-		return -1;
+		goto out;
 	}
 
 	if (mwifiex_set_mgmt_ies(priv, &params->beacon))
-		return -1;
+		goto out;
 
 	if (!netif_carrier_ok(priv->netdev))
 		netif_carrier_on(priv->netdev);
@@ -1949,6 +1988,10 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy,
 	memcpy(&priv->bss_cfg, bss_cfg, sizeof(priv->bss_cfg));
 	kfree(bss_cfg);
 	return 0;
+
+out:
+	kfree(bss_cfg);
+	return -1;
 }
 
 /*
@@ -2209,6 +2252,9 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev,
 		return -EALREADY;
 	}
 
+	if (priv->scan_block)
+		priv->scan_block = false;
+
 	if (adapter->surprise_removed || adapter->is_cmd_timedout) {
 		mwifiex_dbg(adapter, ERROR,
 			    "%s: Ignore connection.\t"
@@ -2427,6 +2473,9 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy,
 		return -EBUSY;
 	}
 
+	if (!priv->wdev.current_bss && priv->scan_block)
+		priv->scan_block = false;
+
 	if (!mwifiex_stop_bg_scan(priv))
 		cfg80211_sched_scan_stopped_rtnl(priv->wdev.wiphy);
 
@@ -2734,6 +2783,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 	struct mwifiex_private *priv;
 	struct net_device *dev;
 	void *mdev_priv;
+	int ret;
 
 	if (!adapter)
 		return ERR_PTR(-EFAULT);
@@ -2859,6 +2909,15 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 	mwifiex_init_priv_params(priv, dev);
 	priv->netdev = dev;
 
+	ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE,
+			       HostCmd_ACT_GEN_SET, 0, NULL, true);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = mwifiex_sta_init_cmd(priv, false, false);
+	if (ret)
+		return ERR_PTR(ret);
+
 	mwifiex_setup_ht_caps(&wiphy->bands[NL80211_BAND_2GHZ]->ht_cap, priv);
 	if (adapter->is_hw_11ac_capable)
 		mwifiex_setup_vht_caps(
@@ -3262,7 +3321,10 @@ static int mwifiex_cfg80211_suspend(struct wiphy *wiphy,
 	struct mwifiex_ds_hs_cfg hs_cfg;
 	int i, ret = 0, retry_num = 10;
 	struct mwifiex_private *priv;
+	struct mwifiex_private *sta_priv =
+			mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_STA);
 
+	sta_priv->scan_aborting = true;
 	for (i = 0; i < adapter->priv_num; i++) {
 		priv = adapter->priv[i];
 		mwifiex_abort_cac(priv);
@@ -3291,21 +3353,21 @@ static int mwifiex_cfg80211_suspend(struct wiphy *wiphy,
 	if (!wowlan) {
 		mwifiex_dbg(adapter, ERROR,
 			    "None of the WOWLAN triggers enabled\n");
-		return 0;
+		ret = 0;
+		goto done;
 	}
 
-	priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_STA);
-
-	if (!priv->media_connected && !wowlan->nd_config) {
+	if (!sta_priv->media_connected && !wowlan->nd_config) {
 		mwifiex_dbg(adapter, ERROR,
 			    "Can not configure WOWLAN in disconnected state\n");
-		return 0;
+		ret = 0;
+		goto done;
 	}
 
-	ret = mwifiex_set_mef_filter(priv, wowlan);
+	ret = mwifiex_set_mef_filter(sta_priv, wowlan);
 	if (ret) {
 		mwifiex_dbg(adapter, ERROR, "Failed to set MEF filter\n");
-		return ret;
+		goto done;
 	}
 
 	memset(&hs_cfg, 0, sizeof(hs_cfg));
@@ -3314,26 +3376,25 @@ static int mwifiex_cfg80211_suspend(struct wiphy *wiphy,
 	if (wowlan->nd_config) {
 		mwifiex_dbg(adapter, INFO, "Wake on net detect\n");
 		hs_cfg.conditions |= HS_CFG_COND_MAC_EVENT;
-		mwifiex_cfg80211_sched_scan_start(wiphy, priv->netdev,
+		mwifiex_cfg80211_sched_scan_start(wiphy, sta_priv->netdev,
 						  wowlan->nd_config);
 	}
 
 	if (wowlan->disconnect) {
 		hs_cfg.conditions |= HS_CFG_COND_MAC_EVENT;
-		mwifiex_dbg(priv->adapter, INFO, "Wake on device disconnect\n");
+		mwifiex_dbg(sta_priv->adapter, INFO, "Wake on device disconnect\n");
 	}
 
 	hs_cfg.is_invoke_hostcmd = false;
 	hs_cfg.gpio = adapter->hs_cfg.gpio;
 	hs_cfg.gap = adapter->hs_cfg.gap;
-	ret = mwifiex_set_hs_params(priv, HostCmd_ACT_GEN_SET,
+	ret = mwifiex_set_hs_params(sta_priv, HostCmd_ACT_GEN_SET,
 				    MWIFIEX_SYNC_CMD, &hs_cfg);
-	if (ret) {
-		mwifiex_dbg(adapter, ERROR,
-			    "Failed to set HS params\n");
-		return ret;
-	}
+	if (ret)
+		mwifiex_dbg(adapter, ERROR, "Failed to set HS params\n");
 
+done:
+	sta_priv->scan_aborting = false;
 	return ret;
 }
 
@@ -3940,12 +4001,14 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = {
 	.set_default_key = mwifiex_cfg80211_set_default_key,
 	.set_power_mgmt = mwifiex_cfg80211_set_power_mgmt,
 	.set_tx_power = mwifiex_cfg80211_set_tx_power,
+	.get_tx_power = mwifiex_cfg80211_get_tx_power,
 	.set_bitrate_mask = mwifiex_cfg80211_set_bitrate_mask,
 	.start_ap = mwifiex_cfg80211_start_ap,
 	.stop_ap = mwifiex_cfg80211_stop_ap,
 	.change_beacon = mwifiex_cfg80211_change_beacon,
 	.set_cqm_rssi_config = mwifiex_cfg80211_set_cqm_rssi_config,
 	.set_antenna = mwifiex_cfg80211_set_antenna,
+	.get_antenna = mwifiex_cfg80211_get_antenna,
 	.del_station = mwifiex_cfg80211_del_station,
 	.sched_scan_start = mwifiex_cfg80211_sched_scan_start,
 	.sched_scan_stop = mwifiex_cfg80211_sched_scan_stop,
diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 6bc2011d8609..c29f26d8baf2 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -1020,8 +1020,6 @@ mwifiex_cancel_all_pending_cmd(struct mwifiex_adapter *adapter)
 {
 	struct cmd_ctrl_node *cmd_node = NULL, *tmp_node;
 	unsigned long flags, cmd_flags;
-	struct mwifiex_private *priv;
-	int i;
 
 	spin_lock_irqsave(&adapter->mwifiex_cmd_lock, cmd_flags);
 	/* Cancel current cmd */
@@ -1046,23 +1044,7 @@ mwifiex_cancel_all_pending_cmd(struct mwifiex_adapter *adapter)
 	spin_unlock_irqrestore(&adapter->cmd_pending_q_lock, flags);
 	spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags);
 
-	mwifiex_cancel_pending_scan_cmd(adapter);
-
-	if (adapter->scan_processing) {
-		spin_lock_irqsave(&adapter->mwifiex_cmd_lock, cmd_flags);
-		adapter->scan_processing = false;
-		spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags);
-		for (i = 0; i < adapter->priv_num; i++) {
-			priv = adapter->priv[i];
-			if (!priv)
-				continue;
-			if (priv->scan_request) {
-				mwifiex_dbg(adapter, WARN, "info: aborting scan\n");
-				cfg80211_scan_done(priv->scan_request, 1);
-				priv->scan_request = NULL;
-			}
-		}
-	}
+	mwifiex_cancel_scan(adapter);
 }
 
 /*
@@ -1080,8 +1062,6 @@ mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
 {
 	struct cmd_ctrl_node *cmd_node = NULL;
 	unsigned long cmd_flags;
-	struct mwifiex_private *priv;
-	int i;
 
 	if ((adapter->curr_cmd) &&
 	    (adapter->curr_cmd->wait_q_enabled)) {
@@ -1101,23 +1081,7 @@ mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
 		mwifiex_recycle_cmd_node(adapter, cmd_node);
 	}
 
-	mwifiex_cancel_pending_scan_cmd(adapter);
-
-	if (adapter->scan_processing) {
-		spin_lock_irqsave(&adapter->mwifiex_cmd_lock, cmd_flags);
-		adapter->scan_processing = false;
-		spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags);
-		for (i = 0; i < adapter->priv_num; i++) {
-			priv = adapter->priv[i];
-			if (!priv)
-				continue;
-			if (priv->scan_request) {
-				mwifiex_dbg(adapter, WARN, "info: aborting scan\n");
-				cfg80211_scan_done(priv->scan_request, 1);
-				priv->scan_request = NULL;
-			}
-		}
-	}
+	mwifiex_cancel_scan(adapter);
 }
 
 /*
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index 8e4145abdbfa..5596b6be1898 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -462,6 +462,9 @@ enum P2P_MODES {
 #define HostCmd_ACT_SET_RX              0x0001
 #define HostCmd_ACT_SET_TX              0x0002
 #define HostCmd_ACT_SET_BOTH            0x0003
+#define HostCmd_ACT_GET_RX              0x0004
+#define HostCmd_ACT_GET_TX              0x0008
+#define HostCmd_ACT_GET_BOTH            0x000c
 
 #define RF_ANTENNA_AUTO                 0xFFFF
 
@@ -1958,8 +1961,8 @@ struct mwifiex_ie_types_btcoex_scan_time {
 	struct mwifiex_ie_types_header header;
 	u8 coex_scan;
 	u8 reserved;
-	u16 min_scan_time;
-	u16 max_scan_time;
+	__le16 min_scan_time;
+	__le16 max_scan_time;
 } __packed;
 
 struct mwifiex_ie_types_btcoex_aggr_win_size {
diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c
index 78c532f0d286..1489c90192bd 100644
--- a/drivers/net/wireless/marvell/mwifiex/init.c
+++ b/drivers/net/wireless/marvell/mwifiex/init.c
@@ -60,7 +60,7 @@ static void wakeup_timer_fn(unsigned long data)
 	adapter->hw_status = MWIFIEX_HW_STATUS_RESET;
 	mwifiex_cancel_all_pending_cmd(adapter);
 
-	if (adapter->if_ops.card_reset)
+	if (adapter->if_ops.card_reset && !adapter->hs_activated)
 		adapter->if_ops.card_reset(adapter);
 }
 
@@ -110,6 +110,8 @@ int mwifiex_init_priv(struct mwifiex_private *priv)
 	priv->tx_power_level = 0;
 	priv->max_tx_power_level = 0;
 	priv->min_tx_power_level = 0;
+	priv->tx_ant = 0;
+	priv->rx_ant = 0;
 	priv->tx_rate = 0;
 	priv->rxpd_htinfo = 0;
 	priv->rxpd_rate = 0;
@@ -788,3 +790,4 @@ poll_fw:
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(mwifiex_dnld_fw);
diff --git a/drivers/net/wireless/marvell/mwifiex/ioctl.h b/drivers/net/wireless/marvell/mwifiex/ioctl.h
index a5a48c183d37..70429815ff53 100644
--- a/drivers/net/wireless/marvell/mwifiex/ioctl.h
+++ b/drivers/net/wireless/marvell/mwifiex/ioctl.h
@@ -83,6 +83,8 @@ struct wep_key {
 #define MWIFIEX_AUTH_MODE_AUTO  0xFF
 #define BAND_CONFIG_BG          0x00
 #define BAND_CONFIG_A           0x01
+#define MWIFIEX_SEC_CHAN_BELOW	0x30
+#define MWIFIEX_SEC_CHAN_ABOVE	0x10
 #define MWIFIEX_SUPPORTED_RATES                 14
 #define MWIFIEX_SUPPORTED_RATES_EXT             32
 #define MWIFIEX_TDLS_SUPPORTED_RATES		8
@@ -341,16 +343,16 @@ enum {
 };
 
 struct mwifiex_ds_reg_rw {
-	__le32 type;
-	__le32 offset;
-	__le32 value;
+	u32 type;
+	u32 offset;
+	u32 value;
 };
 
 #define MAX_EEPROM_DATA 256
 
 struct mwifiex_ds_read_eeprom {
-	__le16 offset;
-	__le16 byte_count;
+	u16 offset;
+	u16 byte_count;
 	u8 value[MAX_EEPROM_DATA];
 };
 
diff --git a/drivers/net/wireless/marvell/mwifiex/join.c b/drivers/net/wireless/marvell/mwifiex/join.c
index 62211fca91b7..1c7b00630b90 100644
--- a/drivers/net/wireless/marvell/mwifiex/join.c
+++ b/drivers/net/wireless/marvell/mwifiex/join.c
@@ -647,6 +647,12 @@ int mwifiex_ret_802_11_associate(struct mwifiex_private *priv,
 	const u8 *ie_ptr;
 	struct ieee80211_ht_operation *assoc_resp_ht_oper;
 
+	if (!priv->attempted_bss_desc) {
+		mwifiex_dbg(priv->adapter, ERROR,
+			    "ASSOC_RESP: failed, association terminated by host\n");
+		goto done;
+	}
+
 	assoc_rsp = (struct ieee_types_assoc_rsp *) &resp->params;
 
 	cap_info = le16_to_cpu(assoc_rsp->cap_info_bitmap);
@@ -1270,6 +1276,12 @@ int mwifiex_ret_802_11_ad_hoc(struct mwifiex_private *priv,
 	u16 cmd = le16_to_cpu(resp->command);
 	u8 result;
 
+	if (!priv->attempted_bss_desc) {
+		mwifiex_dbg(priv->adapter, ERROR,
+			    "ADHOC_RESP: failed, association terminated by host\n");
+		goto done;
+	}
+
 	if (cmd == HostCmd_CMD_802_11_AD_HOC_START)
 		result = start_result->result;
 	else
@@ -1281,7 +1293,7 @@ int mwifiex_ret_802_11_ad_hoc(struct mwifiex_private *priv,
 	if (result) {
 		mwifiex_dbg(priv->adapter, ERROR, "ADHOC_RESP: failed\n");
 		if (priv->media_connected)
-			mwifiex_reset_connect_state(priv, result);
+			mwifiex_reset_connect_state(priv, result, true);
 
 		memset(&priv->curr_bss_params.bss_descriptor,
 		       0x00, sizeof(struct mwifiex_bssdescriptor));
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 8b67a552a690..db4925db39aa 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -526,10 +526,12 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context)
 	fw.fw_buf = (u8 *) adapter->firmware->data;
 	fw.fw_len = adapter->firmware->size;
 
-	if (adapter->if_ops.dnld_fw)
+	if (adapter->if_ops.dnld_fw) {
 		ret = adapter->if_ops.dnld_fw(adapter, &fw);
-	else
+	} else {
 		ret = mwifiex_dnld_fw(adapter, &fw);
+	}
+
 	if (ret == -1)
 		goto err_dnld_fw;
 
@@ -695,9 +697,13 @@ mwifiex_close(struct net_device *dev)
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 
 	if (priv->scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
 		mwifiex_dbg(priv->adapter, INFO,
 			    "aborting scan on ndo_stop\n");
-		cfg80211_scan_done(priv->scan_request, 1);
+		cfg80211_scan_done(priv->scan_request, &info);
 		priv->scan_request = NULL;
 		priv->scan_aborting = true;
 	}
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index 0207af00be42..9f6bb400bdae 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -533,6 +533,8 @@ struct mwifiex_private {
 	u16 tx_power_level;
 	u8 max_tx_power_level;
 	u8 min_tx_power_level;
+	u32 tx_ant;
+	u32 rx_ant;
 	u8 tx_rate;
 	u8 tx_htinfo;
 	u8 rxpd_htinfo;
@@ -1054,6 +1056,7 @@ int mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter);
 void mwifiex_cancel_all_pending_cmd(struct mwifiex_adapter *adapter);
 void mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter);
 void mwifiex_cancel_pending_scan_cmd(struct mwifiex_adapter *adapter);
+void mwifiex_cancel_scan(struct mwifiex_adapter *adapter);
 
 void mwifiex_recycle_cmd_node(struct mwifiex_adapter *adapter,
 			      struct cmd_ctrl_node *cmd_node);
@@ -1128,7 +1131,8 @@ int mwifiex_cmd_802_11_associate(struct mwifiex_private *priv,
 				 struct mwifiex_bssdescriptor *bss_desc);
 int mwifiex_ret_802_11_associate(struct mwifiex_private *priv,
 				 struct host_cmd_ds_command *resp);
-void mwifiex_reset_connect_state(struct mwifiex_private *priv, u16 reason);
+void mwifiex_reset_connect_state(struct mwifiex_private *priv, u16 reason,
+				 bool from_ap);
 u8 mwifiex_band_to_radio_type(u8 band);
 int mwifiex_deauthenticate(struct mwifiex_private *priv, u8 *mac);
 void mwifiex_deauthenticate_all(struct mwifiex_adapter *adapter);
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
index 0c7937eb6b77..453ab6ad4784 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -202,7 +202,6 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
 	if (mwifiex_add_card(card, &add_remove_card_sem, &pcie_ops,
 			     MWIFIEX_PCIE)) {
 		pr_err("%s failed\n", __func__);
-		kfree(card);
 		return -1;
 	}
 
@@ -440,6 +439,11 @@ static int mwifiex_pcie_disable_host_int(struct mwifiex_adapter *adapter)
 	return 0;
 }
 
+static void mwifiex_pcie_disable_host_int_noerr(struct mwifiex_adapter *adapter)
+{
+	WARN_ON(mwifiex_pcie_disable_host_int(adapter));
+}
+
 /*
  * This function enables the host interrupt.
  *
@@ -507,7 +511,7 @@ static int mwifiex_init_rxq_ring(struct mwifiex_adapter *adapter)
 	for (i = 0; i < MWIFIEX_MAX_TXRX_BD; i++) {
 		/* Allocate skb here so that firmware can DMA data from it */
 		skb = mwifiex_alloc_dma_align_buf(MWIFIEX_RX_DATA_BUF_SIZE,
-						  GFP_KERNEL | GFP_DMA);
+						  GFP_KERNEL);
 		if (!skb) {
 			mwifiex_dbg(adapter, ERROR,
 				    "Unable to allocate skb for RX ring.\n");
@@ -1319,7 +1323,7 @@ static int mwifiex_pcie_process_recv_data(struct mwifiex_adapter *adapter)
 		}
 
 		skb_tmp = mwifiex_alloc_dma_align_buf(MWIFIEX_RX_DATA_BUF_SIZE,
-						      GFP_KERNEL | GFP_DMA);
+						      GFP_KERNEL);
 		if (!skb_tmp) {
 			mwifiex_dbg(adapter, ERROR,
 				    "Unable to allocate skb.\n");
@@ -1612,6 +1616,7 @@ static int mwifiex_pcie_process_cmd_complete(struct mwifiex_adapter *adapter)
 
 	pkt_len = *((__le16 *)skb->data);
 	rx_len = le16_to_cpu(pkt_len);
+	skb_put(skb, MWIFIEX_UPLD_SIZE - skb->len);
 	skb_trim(skb, rx_len);
 	skb_pull(skb, INTF_HEADER_LEN);
 
@@ -2086,6 +2091,13 @@ static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter,
 	unsigned long flags;
 	struct pcie_service_card *card = adapter->card;
 
+	if (card->msi_enable) {
+		spin_lock_irqsave(&adapter->int_lock, flags);
+		adapter->int_status = 1;
+		spin_unlock_irqrestore(&adapter->int_lock, flags);
+		return;
+	}
+
 	if (!mwifiex_pcie_ok_to_access_hw(adapter))
 		return;
 
@@ -2187,15 +2199,44 @@ exit:
 static int mwifiex_process_pcie_int(struct mwifiex_adapter *adapter)
 {
 	int ret;
-	u32 pcie_ireg;
+	u32 pcie_ireg = 0;
 	unsigned long flags;
+	struct pcie_service_card *card = adapter->card;
 
 	spin_lock_irqsave(&adapter->int_lock, flags);
-	/* Clear out unused interrupts */
-	pcie_ireg = adapter->int_status;
+	if (!card->msi_enable) {
+		/* Clear out unused interrupts */
+		pcie_ireg = adapter->int_status;
+	}
 	adapter->int_status = 0;
 	spin_unlock_irqrestore(&adapter->int_lock, flags);
 
+	if (card->msi_enable) {
+		if (mwifiex_pcie_ok_to_access_hw(adapter)) {
+			if (mwifiex_read_reg(adapter, PCIE_HOST_INT_STATUS,
+					     &pcie_ireg)) {
+				mwifiex_dbg(adapter, ERROR,
+					    "Read register failed\n");
+				return -1;
+			}
+
+			if ((pcie_ireg != 0xFFFFFFFF) && (pcie_ireg)) {
+				if (mwifiex_write_reg(adapter,
+						      PCIE_HOST_INT_STATUS,
+						      ~pcie_ireg)) {
+					mwifiex_dbg(adapter, ERROR,
+						    "Write register failed\n");
+					return -1;
+				}
+				if (!adapter->pps_uapsd_mode &&
+				    adapter->ps_state == PS_STATE_SLEEP) {
+					adapter->ps_state = PS_STATE_AWAKE;
+					adapter->pm_wakeup_fw_try = false;
+					del_timer(&adapter->wakeup_timer);
+				}
+			}
+		}
+	}
 	while (pcie_ireg & HOST_INTR_MASK) {
 		if (pcie_ireg & HOST_INTR_DNLD_DONE) {
 			pcie_ireg &= ~HOST_INTR_DNLD_DONE;
@@ -2235,6 +2276,12 @@ static int mwifiex_process_pcie_int(struct mwifiex_adapter *adapter)
 				return ret;
 		}
 
+		if (card->msi_enable) {
+			spin_lock_irqsave(&adapter->int_lock, flags);
+			adapter->int_status = 0;
+			spin_unlock_irqrestore(&adapter->int_lock, flags);
+		}
+
 		if (mwifiex_pcie_ok_to_access_hw(adapter)) {
 			if (mwifiex_read_reg(adapter, PCIE_HOST_INT_STATUS,
 					     &pcie_ireg)) {
@@ -2254,11 +2301,17 @@ static int mwifiex_process_pcie_int(struct mwifiex_adapter *adapter)
 			}
 
 		}
+		if (!card->msi_enable) {
+			spin_lock_irqsave(&adapter->int_lock, flags);
+			pcie_ireg |= adapter->int_status;
+			adapter->int_status = 0;
+			spin_unlock_irqrestore(&adapter->int_lock, flags);
+		}
 	}
 	mwifiex_dbg(adapter, INTR,
 		    "info: cmd_sent=%d data_sent=%d\n",
 		    adapter->cmd_sent, adapter->data_sent);
-	if (adapter->ps_state != PS_STATE_SLEEP)
+	if (!card->msi_enable && adapter->ps_state != PS_STATE_SLEEP)
 		mwifiex_pcie_enable_host_int(adapter);
 
 	return 0;
@@ -2796,7 +2849,6 @@ static int mwifiex_pcie_request_irq(struct mwifiex_adapter *adapter)
 			  "MRVL_PCIE", &card->share_irq_ctx);
 	if (ret) {
 		pr_err("request_irq failed: ret=%d\n", ret);
-		adapter->card = NULL;
 		return -1;
 	}
 
@@ -2804,7 +2856,7 @@ static int mwifiex_pcie_request_irq(struct mwifiex_adapter *adapter)
 }
 
 /*
- * This function get firmare name for downloading by revision id
+ * This function gets the firmware name for downloading by revision id
  *
  * Read revision id register to get revision id
  */
@@ -2841,20 +2893,20 @@ static void mwifiex_pcie_get_fw_name(struct mwifiex_adapter *adapter)
 		version &= 0x7;
 		switch (revision_id) {
 		case PCIE8997_V2:
-			if (version == CHIP_VER_PCIEUSB)
+			if (version == CHIP_VER_PCIEUART)
 				strcpy(adapter->fw_name,
-				       PCIEUSB8997_FW_NAME_V2);
+				       PCIEUART8997_FW_NAME_V2);
 			else
 				strcpy(adapter->fw_name,
-				       PCIEUART8997_FW_NAME_V2);
+				       PCIEUSB8997_FW_NAME_V2);
 			break;
 		case PCIE8997_Z:
-			if (version == CHIP_VER_PCIEUSB)
+			if (version == CHIP_VER_PCIEUART)
 				strcpy(adapter->fw_name,
-				       PCIEUSB8997_FW_NAME_Z);
+				       PCIEUART8997_FW_NAME_Z);
 			else
 				strcpy(adapter->fw_name,
-				       PCIEUART8997_FW_NAME_Z);
+				       PCIEUSB8997_FW_NAME_Z);
 			break;
 		default:
 			strcpy(adapter->fw_name, PCIE8997_DEFAULT_FW_NAME);
@@ -2901,10 +2953,11 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter)
 {
 	struct pcie_service_card *card = adapter->card;
 	const struct mwifiex_pcie_card_reg *reg;
-	struct pci_dev *pdev = card->dev;
+	struct pci_dev *pdev;
 	int i;
 
 	if (card) {
+		pdev = card->dev;
 		if (card->msix_enable) {
 			for (i = 0; i < MWIFIEX_NUM_MSIX_VECTORS; i++)
 				synchronize_irq(card->msix_entries[i].vector);
@@ -2945,6 +2998,7 @@ static struct mwifiex_if_ops pcie_ops = {
 	.register_dev =			mwifiex_register_dev,
 	.unregister_dev =		mwifiex_unregister_dev,
 	.enable_int =			mwifiex_pcie_enable_host_int,
+	.disable_int =			mwifiex_pcie_disable_host_int_noerr,
 	.process_int_status =		mwifiex_process_int_status,
 	.host_to_card =			mwifiex_pcie_host_to_card,
 	.wakeup =			mwifiex_pm_wakeup_card,
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h
index 2592e63c32cf..f05061cea5cd 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.h
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.h
@@ -32,7 +32,7 @@
 #define PCIE8897_DEFAULT_FW_NAME "mrvl/pcie8897_uapsta.bin"
 #define PCIE8897_A0_FW_NAME "mrvl/pcie8897_uapsta_a0.bin"
 #define PCIE8897_B0_FW_NAME "mrvl/pcie8897_uapsta.bin"
-#define PCIE8997_DEFAULT_FW_NAME "mrvl/pcieuart8997_combo_v2.bin"
+#define PCIE8997_DEFAULT_FW_NAME "mrvl/pcieusb8997_combo_v2.bin"
 #define PCIEUART8997_FW_NAME_Z "mrvl/pcieuart8997_combo.bin"
 #define PCIEUART8997_FW_NAME_V2 "mrvl/pcieuart8997_combo_v2.bin"
 #define PCIEUSB8997_FW_NAME_Z "mrvl/pcieusb8997_combo.bin"
@@ -48,7 +48,7 @@
 #define PCIE8897_B0	0x1200
 #define PCIE8997_Z	0x0
 #define PCIE8997_V2	0x471
-#define CHIP_VER_PCIEUSB	0x2
+#define CHIP_VER_PCIEUART	0x3
 
 /* Constants for Buffer Descriptor (BD) rings */
 #define MWIFIEX_MAX_TXRX_BD			0x20
@@ -258,7 +258,7 @@ static const struct mwifiex_pcie_card_reg mwifiex_reg_8997 = {
 	.fw_dump_end = 0xcff,
 	.fw_dump_host_ready = 0xcc,
 	.fw_dump_read_done = 0xdd,
-	.msix_support = 1,
+	.msix_support = 0,
 };
 
 static struct memory_type_mapping mem_type_mapping_tbl_w8897[] = {
diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c
index bc5e52cebce1..21ec84794d0c 100644
--- a/drivers/net/wireless/marvell/mwifiex/scan.c
+++ b/drivers/net/wireless/marvell/mwifiex/scan.c
@@ -1896,7 +1896,8 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv)
 	u8 id = 0;
 	struct mwifiex_user_scan_cfg  *user_scan_cfg;
 
-	if (adapter->active_scan_triggered || !priv->scan_request) {
+	if (adapter->active_scan_triggered || !priv->scan_request ||
+	    priv->scan_aborting) {
 		adapter->active_scan_triggered = false;
 		return 0;
 	}
@@ -1956,10 +1957,15 @@ static void mwifiex_check_next_scan_command(struct mwifiex_private *priv)
 			mwifiex_complete_scan(priv);
 
 		if (priv->scan_request) {
+			struct cfg80211_scan_info info = {
+				.aborted = false,
+			};
+
 			mwifiex_dbg(adapter, INFO,
 				    "info: notifying scan done\n");
-			cfg80211_scan_done(priv->scan_request, 0);
+			cfg80211_scan_done(priv->scan_request, &info);
 			priv->scan_request = NULL;
+			priv->scan_aborting = false;
 		} else {
 			priv->scan_aborting = false;
 			mwifiex_dbg(adapter, INFO,
@@ -1977,10 +1983,15 @@ static void mwifiex_check_next_scan_command(struct mwifiex_private *priv)
 
 		if (!adapter->active_scan_triggered) {
 			if (priv->scan_request) {
+				struct cfg80211_scan_info info = {
+					.aborted = true,
+				};
+
 				mwifiex_dbg(adapter, INFO,
 					    "info: aborting scan\n");
-				cfg80211_scan_done(priv->scan_request, 1);
+				cfg80211_scan_done(priv->scan_request, &info);
 				priv->scan_request = NULL;
+				priv->scan_aborting = false;
 			} else {
 				priv->scan_aborting = false;
 				mwifiex_dbg(adapter, INFO,
@@ -2001,6 +2012,37 @@ static void mwifiex_check_next_scan_command(struct mwifiex_private *priv)
 	return;
 }
 
+void mwifiex_cancel_scan(struct mwifiex_adapter *adapter)
+{
+	struct mwifiex_private *priv;
+	unsigned long cmd_flags;
+	int i;
+
+	mwifiex_cancel_pending_scan_cmd(adapter);
+
+	if (adapter->scan_processing) {
+		spin_lock_irqsave(&adapter->mwifiex_cmd_lock, cmd_flags);
+		adapter->scan_processing = false;
+		spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags);
+		for (i = 0; i < adapter->priv_num; i++) {
+			priv = adapter->priv[i];
+			if (!priv)
+				continue;
+			if (priv->scan_request) {
+				struct cfg80211_scan_info info = {
+					.aborted = true,
+				};
+
+				mwifiex_dbg(adapter, INFO,
+					    "info: aborting scan\n");
+				cfg80211_scan_done(priv->scan_request, &info);
+				priv->scan_request = NULL;
+				priv->scan_aborting = false;
+			}
+		}
+	}
+}
+
 /*
  * This function handles the command response of scan.
  *
diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
index bdc51ffd43ec..d3e1561ca075 100644
--- a/drivers/net/wireless/marvell/mwifiex/sdio.c
+++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
@@ -102,10 +102,9 @@ static int mwifiex_sdio_probe_of(struct device *dev, struct sdio_mmc_card *card)
 	struct mwifiex_plt_wake_cfg *cfg;
 	int ret;
 
-	if (!dev->of_node ||
-	    !of_match_node(mwifiex_sdio_of_match_table, dev->of_node)) {
-		dev_err(dev, "sdio platform data not available\n");
-		return -1;
+	if (!of_match_node(mwifiex_sdio_of_match_table, dev->of_node)) {
+		dev_err(dev, "required compatible string missing\n");
+		return -EINVAL;
 	}
 
 	card->plt_of_node = dev->of_node;
@@ -115,7 +114,7 @@ static int mwifiex_sdio_probe_of(struct device *dev, struct sdio_mmc_card *card)
 	if (cfg && card->plt_of_node) {
 		cfg->irq_wifi = irq_of_parse_and_map(card->plt_of_node, 0);
 		if (!cfg->irq_wifi) {
-			dev_err(dev,
+			dev_dbg(dev,
 				"fail to parse irq_wifi from device tree\n");
 		} else {
 			ret = devm_request_irq(dev, cfg->irq_wifi,
@@ -183,24 +182,35 @@ mwifiex_sdio_probe(struct sdio_func *func, const struct sdio_device_id *id)
 	sdio_release_host(func);
 
 	if (ret) {
-		pr_err("%s: failed to enable function\n", __func__);
-		kfree(card);
-		return -EIO;
+		dev_err(&func->dev, "failed to enable function\n");
+		goto err_free;
 	}
 
 	/* device tree node parsing and platform specific configuration*/
-	mwifiex_sdio_probe_of(&func->dev, card);
-
-	if (mwifiex_add_card(card, &add_remove_card_sem, &sdio_ops,
-			     MWIFIEX_SDIO)) {
-		pr_err("%s: add card failed\n", __func__);
-		kfree(card);
-		sdio_claim_host(func);
-		ret = sdio_disable_func(func);
-		sdio_release_host(func);
-		ret = -1;
+	if (func->dev.of_node) {
+		ret = mwifiex_sdio_probe_of(&func->dev, card);
+		if (ret) {
+			dev_err(&func->dev, "SDIO dt node parse failed\n");
+			goto err_disable;
+		}
+	}
+
+	ret = mwifiex_add_card(card, &add_remove_card_sem, &sdio_ops,
+			       MWIFIEX_SDIO);
+	if (ret) {
+		dev_err(&func->dev, "add card failed\n");
+		goto err_disable;
 	}
 
+	return 0;
+
+err_disable:
+	sdio_claim_host(func);
+	sdio_disable_func(func);
+	sdio_release_host(func);
+err_free:
+	kfree(card);
+
 	return ret;
 }
 
@@ -544,6 +554,19 @@ static int mwifiex_pm_wakeup_card_complete(struct mwifiex_adapter *adapter)
 	return mwifiex_write_reg(adapter, CONFIGURATION_REG, 0);
 }
 
+static int mwifiex_sdio_dnld_fw(struct mwifiex_adapter *adapter,
+			struct mwifiex_fw_image *fw)
+{
+	struct sdio_mmc_card *card = adapter->card;
+	int ret;
+
+	sdio_claim_host(card->func);
+	ret = mwifiex_dnld_fw(adapter, fw);
+	sdio_release_host(card->func);
+
+	return ret;
+}
+
 /*
  * This function is used to initialize IO ports for the
  * chipsets supporting SDIO new mode eg SD8897.
@@ -1492,7 +1515,7 @@ rx_curr_single:
 		mwifiex_dbg(adapter, INFO, "info: RX: port: %d, rx_len: %d\n",
 			    port, rx_len);
 
-		skb = mwifiex_alloc_dma_align_buf(rx_len, GFP_KERNEL | GFP_DMA);
+		skb = mwifiex_alloc_dma_align_buf(rx_len, GFP_KERNEL);
 		if (!skb) {
 			mwifiex_dbg(adapter, ERROR,
 				    "single skb allocated fail,\t"
@@ -1597,7 +1620,7 @@ static int mwifiex_process_int_status(struct mwifiex_adapter *adapter)
 		rx_len = (u16) (rx_blocks * MWIFIEX_SDIO_BLOCK_SIZE);
 		mwifiex_dbg(adapter, INFO, "info: rx_len = %d\n", rx_len);
 
-		skb = mwifiex_alloc_dma_align_buf(rx_len, GFP_KERNEL | GFP_DMA);
+		skb = mwifiex_alloc_dma_align_buf(rx_len, GFP_KERNEL);
 		if (!skb)
 			return -1;
 
@@ -2732,6 +2755,7 @@ static struct mwifiex_if_ops sdio_ops = {
 	.cleanup_mpa_buf = mwifiex_cleanup_mpa_buf,
 	.cmdrsp_complete = mwifiex_sdio_cmdrsp_complete,
 	.event_complete = mwifiex_sdio_event_complete,
+	.dnld_fw = mwifiex_sdio_dnld_fw,
 	.card_reset = mwifiex_sdio_card_reset,
 	.reg_dump = mwifiex_sdio_reg_dump,
 	.device_dump = mwifiex_sdio_device_dump,
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
index e436574b1698..7897037b0992 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
@@ -313,23 +313,41 @@ static int mwifiex_cmd_rf_antenna(struct mwifiex_private *priv,
 
 	cmd->command = cpu_to_le16(HostCmd_CMD_RF_ANTENNA);
 
-	if (cmd_action != HostCmd_ACT_GEN_SET)
-		return 0;
-
-	if (priv->adapter->hw_dev_mcs_support == HT_STREAM_2X2) {
-		cmd->size = cpu_to_le16(sizeof(struct host_cmd_ds_rf_ant_mimo) +
-					S_DS_GEN);
-		ant_mimo->action_tx = cpu_to_le16(HostCmd_ACT_SET_TX);
-		ant_mimo->tx_ant_mode = cpu_to_le16((u16)ant_cfg->tx_ant);
-		ant_mimo->action_rx = cpu_to_le16(HostCmd_ACT_SET_RX);
-		ant_mimo->rx_ant_mode = cpu_to_le16((u16)ant_cfg->rx_ant);
-	} else {
-		cmd->size = cpu_to_le16(sizeof(struct host_cmd_ds_rf_ant_siso) +
-					S_DS_GEN);
-		ant_siso->action = cpu_to_le16(HostCmd_ACT_SET_BOTH);
-		ant_siso->ant_mode = cpu_to_le16((u16)ant_cfg->tx_ant);
+	switch (cmd_action) {
+	case HostCmd_ACT_GEN_SET:
+		if (priv->adapter->hw_dev_mcs_support == HT_STREAM_2X2) {
+			cmd->size = cpu_to_le16(sizeof(struct
+						host_cmd_ds_rf_ant_mimo)
+						+ S_DS_GEN);
+			ant_mimo->action_tx = cpu_to_le16(HostCmd_ACT_SET_TX);
+			ant_mimo->tx_ant_mode = cpu_to_le16((u16)ant_cfg->
+							    tx_ant);
+			ant_mimo->action_rx = cpu_to_le16(HostCmd_ACT_SET_RX);
+			ant_mimo->rx_ant_mode = cpu_to_le16((u16)ant_cfg->
+							    rx_ant);
+		} else {
+			cmd->size = cpu_to_le16(sizeof(struct
+						host_cmd_ds_rf_ant_siso) +
+						S_DS_GEN);
+			ant_siso->action = cpu_to_le16(HostCmd_ACT_SET_BOTH);
+			ant_siso->ant_mode = cpu_to_le16((u16)ant_cfg->tx_ant);
+		}
+		break;
+	case HostCmd_ACT_GEN_GET:
+		if (priv->adapter->hw_dev_mcs_support == HT_STREAM_2X2) {
+			cmd->size = cpu_to_le16(sizeof(struct
+						host_cmd_ds_rf_ant_mimo) +
+						S_DS_GEN);
+			ant_mimo->action_tx = cpu_to_le16(HostCmd_ACT_GET_TX);
+			ant_mimo->action_rx = cpu_to_le16(HostCmd_ACT_GET_RX);
+		} else {
+			cmd->size = cpu_to_le16(sizeof(struct
+						host_cmd_ds_rf_ant_siso) +
+						S_DS_GEN);
+			ant_siso->action = cpu_to_le16(HostCmd_ACT_GET_BOTH);
+		}
+		break;
 	}
-
 	return 0;
 }
 
@@ -1130,9 +1148,8 @@ static int mwifiex_cmd_reg_access(struct host_cmd_ds_command *cmd,
 		cmd->size = cpu_to_le16(sizeof(*mac_reg) + S_DS_GEN);
 		mac_reg = &cmd->params.mac_reg;
 		mac_reg->action = cpu_to_le16(cmd_action);
-		mac_reg->offset =
-			cpu_to_le16((u16) le32_to_cpu(reg_rw->offset));
-		mac_reg->value = reg_rw->value;
+		mac_reg->offset = cpu_to_le16((u16) reg_rw->offset);
+		mac_reg->value = cpu_to_le32(reg_rw->value);
 		break;
 	}
 	case HostCmd_CMD_BBP_REG_ACCESS:
@@ -1142,9 +1159,8 @@ static int mwifiex_cmd_reg_access(struct host_cmd_ds_command *cmd,
 		cmd->size = cpu_to_le16(sizeof(*bbp_reg) + S_DS_GEN);
 		bbp_reg = &cmd->params.bbp_reg;
 		bbp_reg->action = cpu_to_le16(cmd_action);
-		bbp_reg->offset =
-			cpu_to_le16((u16) le32_to_cpu(reg_rw->offset));
-		bbp_reg->value = (u8) le32_to_cpu(reg_rw->value);
+		bbp_reg->offset = cpu_to_le16((u16) reg_rw->offset);
+		bbp_reg->value = (u8) reg_rw->value;
 		break;
 	}
 	case HostCmd_CMD_RF_REG_ACCESS:
@@ -1154,8 +1170,8 @@ static int mwifiex_cmd_reg_access(struct host_cmd_ds_command *cmd,
 		cmd->size = cpu_to_le16(sizeof(*rf_reg) + S_DS_GEN);
 		rf_reg = &cmd->params.rf_reg;
 		rf_reg->action = cpu_to_le16(cmd_action);
-		rf_reg->offset = cpu_to_le16((u16) le32_to_cpu(reg_rw->offset));
-		rf_reg->value = (u8) le32_to_cpu(reg_rw->value);
+		rf_reg->offset = cpu_to_le16((u16) reg_rw->offset);
+		rf_reg->value = (u8) reg_rw->value;
 		break;
 	}
 	case HostCmd_CMD_PMIC_REG_ACCESS:
@@ -1165,9 +1181,8 @@ static int mwifiex_cmd_reg_access(struct host_cmd_ds_command *cmd,
 		cmd->size = cpu_to_le16(sizeof(*pmic_reg) + S_DS_GEN);
 		pmic_reg = &cmd->params.pmic_reg;
 		pmic_reg->action = cpu_to_le16(cmd_action);
-		pmic_reg->offset =
-				cpu_to_le16((u16) le32_to_cpu(reg_rw->offset));
-		pmic_reg->value = (u8) le32_to_cpu(reg_rw->value);
+		pmic_reg->offset = cpu_to_le16((u16) reg_rw->offset);
+		pmic_reg->value = (u8) reg_rw->value;
 		break;
 	}
 	case HostCmd_CMD_CAU_REG_ACCESS:
@@ -1177,9 +1192,8 @@ static int mwifiex_cmd_reg_access(struct host_cmd_ds_command *cmd,
 		cmd->size = cpu_to_le16(sizeof(*cau_reg) + S_DS_GEN);
 		cau_reg = &cmd->params.rf_reg;
 		cau_reg->action = cpu_to_le16(cmd_action);
-		cau_reg->offset =
-				cpu_to_le16((u16) le32_to_cpu(reg_rw->offset));
-		cau_reg->value = (u8) le32_to_cpu(reg_rw->value);
+		cau_reg->offset = cpu_to_le16((u16) reg_rw->offset);
+		cau_reg->value = (u8) reg_rw->value;
 		break;
 	}
 	case HostCmd_CMD_802_11_EEPROM_ACCESS:
@@ -1190,8 +1204,8 @@ static int mwifiex_cmd_reg_access(struct host_cmd_ds_command *cmd,
 
 		cmd->size = cpu_to_le16(sizeof(*cmd_eeprom) + S_DS_GEN);
 		cmd_eeprom->action = cpu_to_le16(cmd_action);
-		cmd_eeprom->offset = rd_eeprom->offset;
-		cmd_eeprom->byte_count = rd_eeprom->byte_count;
+		cmd_eeprom->offset = cpu_to_le16(rd_eeprom->offset);
+		cmd_eeprom->byte_count = cpu_to_le16(rd_eeprom->byte_count);
 		cmd_eeprom->value = 0;
 		break;
 	}
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
index d18c7979d723..ccf54932e321 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
@@ -469,7 +469,9 @@ static int mwifiex_ret_rf_antenna(struct mwifiex_private *priv,
 	struct host_cmd_ds_rf_ant_siso *ant_siso = &resp->params.ant_siso;
 	struct mwifiex_adapter *adapter = priv->adapter;
 
-	if (adapter->hw_dev_mcs_support == HT_STREAM_2X2)
+	if (adapter->hw_dev_mcs_support == HT_STREAM_2X2) {
+		priv->tx_ant = le16_to_cpu(ant_mimo->tx_ant_mode);
+		priv->rx_ant = le16_to_cpu(ant_mimo->rx_ant_mode);
 		mwifiex_dbg(adapter, INFO,
 			    "RF_ANT_RESP: Tx action = 0x%x, Tx Mode = 0x%04x\t"
 			    "Rx action = 0x%x, Rx Mode = 0x%04x\n",
@@ -477,12 +479,14 @@ static int mwifiex_ret_rf_antenna(struct mwifiex_private *priv,
 			    le16_to_cpu(ant_mimo->tx_ant_mode),
 			    le16_to_cpu(ant_mimo->action_rx),
 			    le16_to_cpu(ant_mimo->rx_ant_mode));
-	else
+	} else {
+		priv->tx_ant = le16_to_cpu(ant_siso->ant_mode);
+		priv->rx_ant = le16_to_cpu(ant_siso->ant_mode);
 		mwifiex_dbg(adapter, INFO,
 			    "RF_ANT_RESP: action = 0x%x, Mode = 0x%04x\n",
 			    le16_to_cpu(ant_siso->action),
 			    le16_to_cpu(ant_siso->ant_mode));
-
+	}
 	return 0;
 }
 
@@ -553,7 +557,8 @@ static int mwifiex_ret_802_11_deauthenticate(struct mwifiex_private *priv,
 	if (!memcmp(resp->params.deauth.mac_addr,
 		    &priv->curr_bss_params.bss_descriptor.mac_address,
 		    sizeof(resp->params.deauth.mac_addr)))
-		mwifiex_reset_connect_state(priv, WLAN_REASON_DEAUTH_LEAVING);
+		mwifiex_reset_connect_state(priv, WLAN_REASON_DEAUTH_LEAVING,
+					    false);
 
 	return 0;
 }
@@ -566,7 +571,7 @@ static int mwifiex_ret_802_11_deauthenticate(struct mwifiex_private *priv,
 static int mwifiex_ret_802_11_ad_hoc_stop(struct mwifiex_private *priv,
 					  struct host_cmd_ds_command *resp)
 {
-	mwifiex_reset_connect_state(priv, WLAN_REASON_DEAUTH_LEAVING);
+	mwifiex_reset_connect_state(priv, WLAN_REASON_DEAUTH_LEAVING, false);
 	return 0;
 }
 
@@ -781,45 +786,44 @@ static int mwifiex_ret_reg_access(u16 type, struct host_cmd_ds_command *resp,
 	switch (type) {
 	case HostCmd_CMD_MAC_REG_ACCESS:
 		r.mac = &resp->params.mac_reg;
-		reg_rw->offset = cpu_to_le32((u32) le16_to_cpu(r.mac->offset));
-		reg_rw->value = r.mac->value;
+		reg_rw->offset = (u32) le16_to_cpu(r.mac->offset);
+		reg_rw->value = le32_to_cpu(r.mac->value);
 		break;
 	case HostCmd_CMD_BBP_REG_ACCESS:
 		r.bbp = &resp->params.bbp_reg;
-		reg_rw->offset = cpu_to_le32((u32) le16_to_cpu(r.bbp->offset));
-		reg_rw->value = cpu_to_le32((u32) r.bbp->value);
+		reg_rw->offset = (u32) le16_to_cpu(r.bbp->offset);
+		reg_rw->value = (u32) r.bbp->value;
 		break;
 
 	case HostCmd_CMD_RF_REG_ACCESS:
 		r.rf = &resp->params.rf_reg;
-		reg_rw->offset = cpu_to_le32((u32) le16_to_cpu(r.rf->offset));
-		reg_rw->value = cpu_to_le32((u32) r.bbp->value);
+		reg_rw->offset = (u32) le16_to_cpu(r.rf->offset);
+		reg_rw->value = (u32) r.bbp->value;
 		break;
 	case HostCmd_CMD_PMIC_REG_ACCESS:
 		r.pmic = &resp->params.pmic_reg;
-		reg_rw->offset = cpu_to_le32((u32) le16_to_cpu(r.pmic->offset));
-		reg_rw->value = cpu_to_le32((u32) r.pmic->value);
+		reg_rw->offset = (u32) le16_to_cpu(r.pmic->offset);
+		reg_rw->value = (u32) r.pmic->value;
 		break;
 	case HostCmd_CMD_CAU_REG_ACCESS:
 		r.rf = &resp->params.rf_reg;
-		reg_rw->offset = cpu_to_le32((u32) le16_to_cpu(r.rf->offset));
-		reg_rw->value = cpu_to_le32((u32) r.rf->value);
+		reg_rw->offset = (u32) le16_to_cpu(r.rf->offset);
+		reg_rw->value = (u32) r.rf->value;
 		break;
 	case HostCmd_CMD_802_11_EEPROM_ACCESS:
 		r.eeprom = &resp->params.eeprom;
-		pr_debug("info: EEPROM read len=%x\n", r.eeprom->byte_count);
-		if (le16_to_cpu(eeprom->byte_count) <
-		    le16_to_cpu(r.eeprom->byte_count)) {
-			eeprom->byte_count = cpu_to_le16(0);
+		pr_debug("info: EEPROM read len=%x\n",
+				le16_to_cpu(r.eeprom->byte_count));
+		if (eeprom->byte_count < le16_to_cpu(r.eeprom->byte_count)) {
+			eeprom->byte_count = 0;
 			pr_debug("info: EEPROM read length is too big\n");
 			return -1;
 		}
-		eeprom->offset = r.eeprom->offset;
-		eeprom->byte_count = r.eeprom->byte_count;
-		if (le16_to_cpu(eeprom->byte_count) > 0)
+		eeprom->offset = le16_to_cpu(r.eeprom->offset);
+		eeprom->byte_count = le16_to_cpu(r.eeprom->byte_count);
+		if (eeprom->byte_count > 0)
 			memcpy(&eeprom->value, &r.eeprom->value,
-			       le16_to_cpu(r.eeprom->byte_count));
-
+			       min((u16)MAX_EEPROM_DATA, eeprom->byte_count));
 		break;
 	default:
 		return -1;
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c
index 0104108b4ea2..a422f3306d4d 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_event.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c
@@ -40,8 +40,8 @@
  *      - Erases current SSID and BSSID information
  *      - Sends a disconnect event to upper layers/applications.
  */
-void
-mwifiex_reset_connect_state(struct mwifiex_private *priv, u16 reason_code)
+void mwifiex_reset_connect_state(struct mwifiex_private *priv, u16 reason_code,
+				 bool from_ap)
 {
 	struct mwifiex_adapter *adapter = priv->adapter;
 
@@ -140,7 +140,7 @@ mwifiex_reset_connect_state(struct mwifiex_private *priv, u16 reason_code)
 	if (priv->bss_mode == NL80211_IFTYPE_STATION ||
 	    priv->bss_mode == NL80211_IFTYPE_P2P_CLIENT) {
 		cfg80211_disconnected(priv->netdev, reason_code, NULL, 0,
-				      false, GFP_KERNEL);
+				      !from_ap, GFP_KERNEL);
 	}
 	eth_zero_addr(priv->cfg_bssid);
 
@@ -474,8 +474,8 @@ void mwifiex_bt_coex_wlan_param_update_event(struct mwifiex_private *priv,
 			scantlv =
 			    (struct mwifiex_ie_types_btcoex_scan_time *)tlv;
 			adapter->coex_scan = scantlv->coex_scan;
-			adapter->coex_min_scan_time = scantlv->min_scan_time;
-			adapter->coex_max_scan_time = scantlv->max_scan_time;
+			adapter->coex_min_scan_time = le16_to_cpu(scantlv->min_scan_time);
+			adapter->coex_max_scan_time = le16_to_cpu(scantlv->max_scan_time);
 			break;
 
 		default:
@@ -574,7 +574,7 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv)
 		if (priv->media_connected) {
 			reason_code =
 				le16_to_cpu(*(__le16 *)adapter->event_body);
-			mwifiex_reset_connect_state(priv, reason_code);
+			mwifiex_reset_connect_state(priv, reason_code, true);
 		}
 		break;
 
@@ -589,7 +589,7 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv)
 		if (priv->media_connected) {
 			reason_code =
 				le16_to_cpu(*(__le16 *)adapter->event_body);
-			mwifiex_reset_connect_state(priv, reason_code);
+			mwifiex_reset_connect_state(priv, reason_code, true);
 		}
 		break;
 
@@ -599,7 +599,7 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv)
 		if (priv->media_connected) {
 			reason_code =
 				le16_to_cpu(*(__le16 *)adapter->event_body);
-			mwifiex_reset_connect_state(priv, reason_code);
+			mwifiex_reset_connect_state(priv, reason_code, true);
 		}
 		break;
 
@@ -708,7 +708,7 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv)
 
 	case EVENT_EXT_SCAN_REPORT:
 		mwifiex_dbg(adapter, EVENT, "event: EXT_SCAN Report\n");
-		if (adapter->ext_scan)
+		if (adapter->ext_scan && !priv->scan_aborting)
 			ret = mwifiex_handle_event_ext_scan_report(priv,
 						adapter->event_skb->data);
 
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index 8e0862657122..e06647a327b6 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -426,6 +426,10 @@ done:
 	if (bss_desc)
 		kfree(bss_desc->beacon_buf);
 	kfree(bss_desc);
+
+	if (ret < 0)
+		priv->attempted_bss_desc = NULL;
+
 	return ret;
 }
 
@@ -1247,7 +1251,7 @@ static int mwifiex_reg_mem_ioctl_reg_rw(struct mwifiex_private *priv,
 {
 	u16 cmd_no;
 
-	switch (le32_to_cpu(reg_rw->type)) {
+	switch (reg_rw->type) {
 	case MWIFIEX_REG_MAC:
 		cmd_no = HostCmd_CMD_MAC_REG_ACCESS;
 		break;
@@ -1282,9 +1286,9 @@ mwifiex_reg_write(struct mwifiex_private *priv, u32 reg_type,
 {
 	struct mwifiex_ds_reg_rw reg_rw;
 
-	reg_rw.type = cpu_to_le32(reg_type);
-	reg_rw.offset = cpu_to_le32(reg_offset);
-	reg_rw.value = cpu_to_le32(reg_value);
+	reg_rw.type = reg_type;
+	reg_rw.offset = reg_offset;
+	reg_rw.value = reg_value;
 
 	return mwifiex_reg_mem_ioctl_reg_rw(priv, &reg_rw, HostCmd_ACT_GEN_SET);
 }
@@ -1302,14 +1306,14 @@ mwifiex_reg_read(struct mwifiex_private *priv, u32 reg_type,
 	int ret;
 	struct mwifiex_ds_reg_rw reg_rw;
 
-	reg_rw.type = cpu_to_le32(reg_type);
-	reg_rw.offset = cpu_to_le32(reg_offset);
+	reg_rw.type = reg_type;
+	reg_rw.offset = reg_offset;
 	ret = mwifiex_reg_mem_ioctl_reg_rw(priv, &reg_rw, HostCmd_ACT_GEN_GET);
 
 	if (ret)
 		goto done;
 
-	*value = le32_to_cpu(reg_rw.value);
+	*value = reg_rw.value;
 
 done:
 	return ret;
@@ -1328,15 +1332,16 @@ mwifiex_eeprom_read(struct mwifiex_private *priv, u16 offset, u16 bytes,
 	int ret;
 	struct mwifiex_ds_read_eeprom rd_eeprom;
 
-	rd_eeprom.offset = cpu_to_le16((u16) offset);
-	rd_eeprom.byte_count = cpu_to_le16((u16) bytes);
+	rd_eeprom.offset =  offset;
+	rd_eeprom.byte_count = bytes;
 
 	/* Send request to firmware */
 	ret = mwifiex_send_cmd(priv, HostCmd_CMD_802_11_EEPROM_ACCESS,
 			       HostCmd_ACT_GEN_GET, 0, &rd_eeprom, true);
 
 	if (!ret)
-		memcpy(value, rd_eeprom.value, MAX_EEPROM_DATA);
+		memcpy(value, rd_eeprom.value, min((u16)MAX_EEPROM_DATA,
+		       rd_eeprom.byte_count));
 	return ret;
 }
 
diff --git a/drivers/net/wireless/marvell/mwifiex/uap_cmd.c b/drivers/net/wireless/marvell/mwifiex/uap_cmd.c
index f79d00d1e294..a7e9f544f219 100644
--- a/drivers/net/wireless/marvell/mwifiex/uap_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/uap_cmd.c
@@ -19,6 +19,7 @@
 
 #include "main.h"
 #include "11ac.h"
+#include "11n.h"
 
 /* This function parses security related parameters from cfg80211_ap_settings
  * and sets into FW understandable bss_config structure.
@@ -521,9 +522,9 @@ mwifiex_uap_bss_param_prepare(u8 *tlv, void *cmd_buf, u16 *param_size)
 		tlv += sizeof(struct host_cmd_tlv_rates) + i;
 	}
 	if (bss_cfg->channel &&
-	    ((bss_cfg->band_cfg == BAND_CONFIG_BG &&
+	    (((bss_cfg->band_cfg & BIT(0)) == BAND_CONFIG_BG &&
 	      bss_cfg->channel <= MAX_CHANNEL_BAND_BG) ||
-	    (bss_cfg->band_cfg == BAND_CONFIG_A &&
+	    ((bss_cfg->band_cfg & BIT(0)) == BAND_CONFIG_A &&
 	     bss_cfg->channel <= MAX_CHANNEL_BAND_A))) {
 		chan_band = (struct host_cmd_tlv_channel_band *)tlv;
 		chan_band->header.type = cpu_to_le16(TLV_TYPE_CHANNELBANDLIST);
@@ -833,6 +834,31 @@ void mwifiex_uap_set_channel(struct mwifiex_private *priv,
 			config_bands |= BAND_AAC;
 	}
 
+	switch (chandef.width) {
+	case NL80211_CHAN_WIDTH_5:
+	case NL80211_CHAN_WIDTH_10:
+	case NL80211_CHAN_WIDTH_20_NOHT:
+	case NL80211_CHAN_WIDTH_20:
+		break;
+	case NL80211_CHAN_WIDTH_40:
+		if (chandef.center_freq1 < chandef.chan->center_freq)
+			bss_cfg->band_cfg |= MWIFIEX_SEC_CHAN_BELOW;
+		else
+			bss_cfg->band_cfg |= MWIFIEX_SEC_CHAN_ABOVE;
+		break;
+	case NL80211_CHAN_WIDTH_80:
+	case NL80211_CHAN_WIDTH_80P80:
+	case NL80211_CHAN_WIDTH_160:
+		bss_cfg->band_cfg |=
+		    mwifiex_get_sec_chan_offset(bss_cfg->channel) << 4;
+		break;
+	default:
+		mwifiex_dbg(priv->adapter,
+			    WARN, "Unknown channel width: %d\n",
+			    chandef.width);
+		break;
+	}
+
 	priv->adapter->config_bands = config_bands;
 
 	if (old_bands != config_bands) {
diff --git a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
index 666e91af59d7..bf5660eb27d3 100644
--- a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
+++ b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
@@ -272,7 +272,7 @@ int mwifiex_handle_uap_rx_forward(struct mwifiex_private *priv,
 int mwifiex_uap_recv_packet(struct mwifiex_private *priv,
 			    struct sk_buff *skb)
 {
-	struct mwifiex_adapter *adapter = adapter;
+	struct mwifiex_adapter *adapter = priv->adapter;
 	struct mwifiex_sta_node *src_node;
 	struct ethhdr *p_ethhdr;
 	struct sk_buff *skb_uap;
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
index 870c9cd5cdf3..4341d56805f8 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
@@ -135,7 +135,8 @@ struct rtl8xxxu_rxdesc16 {
 
 	u32 seq:12;
 	u32 frag:4;
-	u32 nextpktlen:14;
+	u32 pkt_cnt:8;
+	u32 reserved:6;
 	u32 nextind:1;
 	u32 reserved0:1;
 
@@ -198,7 +199,8 @@ struct rtl8xxxu_rxdesc16 {
 
 	u32 reserved0:1;
 	u32 nextind:1;
-	u32 nextpktlen:14;
+	u32 reserved:6;
+	u32 pkt_cnt:8;
 	u32 frag:4;
 	u32 seq:12;
 
@@ -1245,6 +1247,7 @@ struct rtl8xxxu_priv {
 	u32 ep_tx_normal_queue:1;
 	u32 ep_tx_low_queue:1;
 	u32 has_xtalk:1;
+	u32 rx_buf_aggregation:1;
 	u8 xtalk;
 	unsigned int pipe_interrupt;
 	unsigned int pipe_in;
@@ -1315,8 +1318,7 @@ struct rtl8xxxu_fileops {
 	void (*phy_init_antenna_selection) (struct rtl8xxxu_priv *priv);
 	void (*phy_iq_calibrate) (struct rtl8xxxu_priv *priv);
 	void (*config_channel) (struct ieee80211_hw *hw);
-	int (*parse_rx_desc) (struct rtl8xxxu_priv *priv, struct sk_buff *skb,
-			      struct ieee80211_rx_status *rx_status);
+	int (*parse_rx_desc) (struct rtl8xxxu_priv *priv, struct sk_buff *skb);
 	void (*init_aggregation) (struct rtl8xxxu_priv *priv);
 	void (*init_statistics) (struct rtl8xxxu_priv *priv);
 	void (*enable_rf) (struct rtl8xxxu_priv *priv);
@@ -1329,6 +1331,7 @@ struct rtl8xxxu_fileops {
 	void (*report_connect) (struct rtl8xxxu_priv *priv,
 				u8 macid, bool connect);
 	int writeN_block_size;
+	int rx_agg_buf_size;
 	char tx_desc_size;
 	char rx_desc_size;
 	char has_s0s1;
@@ -1409,13 +1412,12 @@ void rtl8xxxu_gen1_report_connect(struct rtl8xxxu_priv *priv,
 				  u8 macid, bool connect);
 void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv,
 				  u8 macid, bool connect);
+void rtl8xxxu_gen1_init_aggregation(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_gen1_enable_rf(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_gen1_disable_rf(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_gen2_disable_rf(struct rtl8xxxu_priv *priv);
-int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb,
-			    struct ieee80211_rx_status *rx_status);
-int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb,
-			    struct ieee80211_rx_status *rx_status);
+int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb);
+int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb);
 int rtl8xxxu_gen2_channel_to_group(int channel);
 bool rtl8xxxu_gen2_simularity_compare(struct rtl8xxxu_priv *priv,
 				      int result[][8], int c1, int c2);
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
index 2c86b5599a30..69d1a1453ede 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
@@ -413,13 +413,8 @@ static int rtl8192cu_parse_efuse(struct rtl8xxxu_priv *priv)
 		dev_info(&priv->udev->dev,
 			 "%s: dumping efuse (0x%02zx bytes):\n",
 			 __func__, sizeof(struct rtl8192cu_efuse));
-		for (i = 0; i < sizeof(struct rtl8192cu_efuse); i += 8) {
-			dev_info(&priv->udev->dev, "%02x: "
-				 "%02x %02x %02x %02x %02x %02x %02x %02x\n", i,
-				 raw[i], raw[i + 1], raw[i + 2],
-				 raw[i + 3], raw[i + 4], raw[i + 5],
-				 raw[i + 6], raw[i + 7]);
-		}
+		for (i = 0; i < sizeof(struct rtl8192cu_efuse); i += 8)
+			dev_info(&priv->udev->dev, "%02x: %8ph\n", i, &raw[i]);
 	}
 	return 0;
 }
@@ -565,6 +560,7 @@ struct rtl8xxxu_fileops rtl8192cu_fops = {
 	.phy_iq_calibrate = rtl8xxxu_gen1_phy_iq_calibrate,
 	.config_channel = rtl8xxxu_gen1_config_channel,
 	.parse_rx_desc = rtl8xxxu_parse_rxdesc16,
+	.init_aggregation = rtl8xxxu_gen1_init_aggregation,
 	.enable_rf = rtl8xxxu_gen1_enable_rf,
 	.disable_rf = rtl8xxxu_gen1_disable_rf,
 	.usb_quirks = rtl8xxxu_gen1_usb_quirks,
@@ -572,6 +568,7 @@ struct rtl8xxxu_fileops rtl8192cu_fops = {
 	.update_rate_mask = rtl8xxxu_update_rate_mask,
 	.report_connect = rtl8xxxu_gen1_report_connect,
 	.writeN_block_size = 128,
+	.rx_agg_buf_size = 16000,
 	.tx_desc_size = sizeof(struct rtl8xxxu_txdesc32),
 	.rx_desc_size = sizeof(struct rtl8xxxu_rxdesc16),
 	.adda_1t_init = 0x0b1b25a0,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
index fe19ace0d6a0..9a1994f49b7b 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
@@ -622,13 +622,8 @@ static int rtl8192eu_parse_efuse(struct rtl8xxxu_priv *priv)
 		dev_info(&priv->udev->dev,
 			 "%s: dumping efuse (0x%02zx bytes):\n",
 			 __func__, sizeof(struct rtl8192eu_efuse));
-		for (i = 0; i < sizeof(struct rtl8192eu_efuse); i += 8) {
-			dev_info(&priv->udev->dev, "%02x: "
-				 "%02x %02x %02x %02x %02x %02x %02x %02x\n", i,
-				 raw[i], raw[i + 1], raw[i + 2],
-				 raw[i + 3], raw[i + 4], raw[i + 5],
-				 raw[i + 6], raw[i + 7]);
-		}
+		for (i = 0; i < sizeof(struct rtl8192eu_efuse); i += 8)
+			dev_info(&priv->udev->dev, "%02x: %8ph\n", i, &raw[i]);
 	}
 	return 0;
 }
@@ -1149,7 +1144,7 @@ static void rtl8192eu_phy_iqcalibrate(struct rtl8xxxu_priv *priv,
 
 		for (i = 0; i < retry; i++) {
 			path_b_ok = rtl8192eu_rx_iqk_path_b(priv);
-			if (path_a_ok == 0x03) {
+			if (path_b_ok == 0x03) {
 				val32 = rtl8xxxu_read32(priv,
 							REG_RX_POWER_BEFORE_IQK_B_2);
 				result[t][6] = (val32 >> 16) & 0x3ff;
@@ -1249,11 +1244,9 @@ static void rtl8192eu_phy_iq_calibrate(struct rtl8xxxu_priv *priv)
 		reg_e94 = result[i][0];
 		reg_e9c = result[i][1];
 		reg_ea4 = result[i][2];
-		reg_eac = result[i][3];
 		reg_eb4 = result[i][4];
 		reg_ebc = result[i][5];
 		reg_ec4 = result[i][6];
-		reg_ecc = result[i][7];
 	}
 
 	if (candidate >= 0) {
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
index a8e172ceab89..686c551581b1 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
@@ -377,6 +377,7 @@ struct rtl8xxxu_fileops rtl8723au_fops = {
 	.phy_iq_calibrate = rtl8xxxu_gen1_phy_iq_calibrate,
 	.config_channel = rtl8xxxu_gen1_config_channel,
 	.parse_rx_desc = rtl8xxxu_parse_rxdesc16,
+	.init_aggregation = rtl8xxxu_gen1_init_aggregation,
 	.enable_rf = rtl8xxxu_gen1_enable_rf,
 	.disable_rf = rtl8xxxu_gen1_disable_rf,
 	.usb_quirks = rtl8xxxu_gen1_usb_quirks,
@@ -384,6 +385,7 @@ struct rtl8xxxu_fileops rtl8723au_fops = {
 	.update_rate_mask = rtl8xxxu_update_rate_mask,
 	.report_connect = rtl8xxxu_gen1_report_connect,
 	.writeN_block_size = 1024,
+	.rx_agg_buf_size = 16000,
 	.tx_desc_size = sizeof(struct rtl8xxxu_txdesc32),
 	.rx_desc_size = sizeof(struct rtl8xxxu_rxdesc16),
 	.adda_1t_init = 0x0b1b25a0,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
index 4186e7cf0ddf..9d45afb0e3fd 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
@@ -466,13 +466,8 @@ static int rtl8723bu_parse_efuse(struct rtl8xxxu_priv *priv)
 		dev_info(&priv->udev->dev,
 			 "%s: dumping efuse (0x%02zx bytes):\n",
 			 __func__, sizeof(struct rtl8723bu_efuse));
-		for (i = 0; i < sizeof(struct rtl8723bu_efuse); i += 8) {
-			dev_info(&priv->udev->dev, "%02x: "
-				 "%02x %02x %02x %02x %02x %02x %02x %02x\n", i,
-				 raw[i], raw[i + 1], raw[i + 2],
-				 raw[i + 3], raw[i + 4], raw[i + 5],
-				 raw[i + 6], raw[i + 7]);
-		}
+		for (i = 0; i < sizeof(struct rtl8723bu_efuse); i += 8)
+			dev_info(&priv->udev->dev, "%02x: %8ph\n", i, &raw[i]);
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index 9f6dbb4490a9..77048db3b32a 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -44,6 +44,9 @@
 
 int rtl8xxxu_debug = RTL8XXXU_DEBUG_EFUSE;
 static bool rtl8xxxu_ht40_2g;
+static bool rtl8xxxu_dma_aggregation;
+static int rtl8xxxu_dma_agg_timeout = -1;
+static int rtl8xxxu_dma_agg_pages = -1;
 
 MODULE_AUTHOR("Jes Sorensen <Jes.Sorensen@redhat.com>");
 MODULE_DESCRIPTION("RTL8XXXu USB mac80211 Wireless LAN Driver");
@@ -62,10 +65,14 @@ module_param_named(debug, rtl8xxxu_debug, int, 0600);
 MODULE_PARM_DESC(debug, "Set debug mask");
 module_param_named(ht40_2g, rtl8xxxu_ht40_2g, bool, 0600);
 MODULE_PARM_DESC(ht40_2g, "Enable HT40 support on the 2.4GHz band");
+module_param_named(dma_aggregation, rtl8xxxu_dma_aggregation, bool, 0600);
+MODULE_PARM_DESC(dma_aggregation, "Enable DMA packet aggregation");
+module_param_named(dma_agg_timeout, rtl8xxxu_dma_agg_timeout, int, 0600);
+MODULE_PARM_DESC(dma_agg_timeout, "Set DMA aggregation timeout (range 1-127)");
+module_param_named(dma_agg_pages, rtl8xxxu_dma_agg_pages, int, 0600);
+MODULE_PARM_DESC(dma_agg_pages, "Set DMA aggregation pages (range 1-127, 0 to disable)");
 
 #define USB_VENDOR_ID_REALTEK		0x0bda
-/* Minimum IEEE80211_MAX_FRAME_LEN */
-#define RTL_RX_BUFFER_SIZE		IEEE80211_MAX_FRAME_LEN
 #define RTL8XXXU_RX_URBS		32
 #define RTL8XXXU_RX_URB_PENDING_WATER	8
 #define RTL8XXXU_TX_URBS		64
@@ -4407,6 +4414,73 @@ void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv,
 	rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.media_status_rpt));
 }
 
+void rtl8xxxu_gen1_init_aggregation(struct rtl8xxxu_priv *priv)
+{
+	u8 agg_ctrl, usb_spec, page_thresh, timeout;
+
+	usb_spec = rtl8xxxu_read8(priv, REG_USB_SPECIAL_OPTION);
+	usb_spec &= ~USB_SPEC_USB_AGG_ENABLE;
+	rtl8xxxu_write8(priv, REG_USB_SPECIAL_OPTION, usb_spec);
+
+	agg_ctrl = rtl8xxxu_read8(priv, REG_TRXDMA_CTRL);
+	agg_ctrl &= ~TRXDMA_CTRL_RXDMA_AGG_EN;
+
+	if (!rtl8xxxu_dma_aggregation) {
+		rtl8xxxu_write8(priv, REG_TRXDMA_CTRL, agg_ctrl);
+		return;
+	}
+
+	agg_ctrl |= TRXDMA_CTRL_RXDMA_AGG_EN;
+	rtl8xxxu_write8(priv, REG_TRXDMA_CTRL, agg_ctrl);
+
+	/*
+	 * The number of packets we can take looks to be buffer size / 512
+	 * which matches the 512 byte rounding we have to do when de-muxing
+	 * the packets.
+	 *
+	 * Sample numbers from the vendor driver:
+	 * USB High-Speed mode values:
+	 *   RxAggBlockCount = 8 : 512 byte unit
+	 *   RxAggBlockTimeout = 6
+	 *   RxAggPageCount = 48 : 128 byte unit
+	 *   RxAggPageTimeout = 4 or 6 (absolute time 34ms/(2^6))
+	 */
+
+	page_thresh = (priv->fops->rx_agg_buf_size / 512);
+	if (rtl8xxxu_dma_agg_pages >= 0) {
+		if (rtl8xxxu_dma_agg_pages <= page_thresh)
+			timeout = page_thresh;
+		else if (rtl8xxxu_dma_agg_pages <= 6)
+			dev_err(&priv->udev->dev,
+				"%s: dma_agg_pages=%i too small, minium is 6\n",
+				__func__, rtl8xxxu_dma_agg_pages);
+		else
+			dev_err(&priv->udev->dev,
+				"%s: dma_agg_pages=%i larger than limit %i\n",
+				__func__, rtl8xxxu_dma_agg_pages, page_thresh);
+	}
+	rtl8xxxu_write8(priv, REG_RXDMA_AGG_PG_TH, page_thresh);
+	/*
+	 * REG_RXDMA_AGG_PG_TH + 1 seems to be the timeout register on
+	 * gen2 chips and rtl8188eu. The rtl8723au seems unhappy if we
+	 * don't set it, so better set both.
+	 */
+	timeout = 4;
+
+	if (rtl8xxxu_dma_agg_timeout >= 0) {
+		if (rtl8xxxu_dma_agg_timeout <= 127)
+			timeout = rtl8xxxu_dma_agg_timeout;
+		else
+			dev_err(&priv->udev->dev,
+				"%s: Invalid dma_agg_timeout: %i\n",
+				__func__, rtl8xxxu_dma_agg_timeout);
+	}
+
+	rtl8xxxu_write8(priv, REG_RXDMA_AGG_PG_TH + 1, timeout);
+	rtl8xxxu_write8(priv, REG_USB_DMA_AGG_TO, timeout);
+	priv->rx_buf_aggregation = 1;
+}
+
 static void rtl8xxxu_set_basic_rates(struct rtl8xxxu_priv *priv, u32 rate_cfg)
 {
 	u32 val32;
@@ -5045,55 +5119,143 @@ static void rtl8xxxu_rx_urb_work(struct work_struct *work)
 	}
 }
 
-int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb,
-			    struct ieee80211_rx_status *rx_status)
+static void rtl8723bu_handle_c2h(struct rtl8xxxu_priv *priv,
+				 struct sk_buff *skb)
+{
+	struct rtl8723bu_c2h *c2h = (struct rtl8723bu_c2h *)skb->data;
+	struct device *dev = &priv->udev->dev;
+	int len;
+
+	len = skb->len - 2;
+
+	dev_dbg(dev, "C2H ID %02x seq %02x, len %02x source %02x\n",
+		c2h->id, c2h->seq, len, c2h->bt_info.response_source);
+
+	switch(c2h->id) {
+	case C2H_8723B_BT_INFO:
+		if (c2h->bt_info.response_source >
+		    BT_INFO_SRC_8723B_BT_ACTIVE_SEND)
+			dev_dbg(dev, "C2H_BT_INFO WiFi only firmware\n");
+		else
+			dev_dbg(dev, "C2H_BT_INFO BT/WiFi coexist firmware\n");
+
+		if (c2h->bt_info.bt_has_reset)
+			dev_dbg(dev, "BT has been reset\n");
+		if (c2h->bt_info.tx_rx_mask)
+			dev_dbg(dev, "BT TRx mask\n");
+
+		break;
+	case C2H_8723B_BT_MP_INFO:
+		dev_dbg(dev, "C2H_MP_INFO ext ID %02x, status %02x\n",
+			c2h->bt_mp_info.ext_id, c2h->bt_mp_info.status);
+		break;
+	case C2H_8723B_RA_REPORT:
+		dev_dbg(dev,
+			"C2H RA RPT: rate %02x, unk %i, macid %02x, noise %i\n",
+			c2h->ra_report.rate, c2h->ra_report.dummy0_0,
+			c2h->ra_report.macid, c2h->ra_report.noisy_state);
+		break;
+	default:
+		dev_info(dev, "Unhandled C2H event %02x seq %02x\n",
+			 c2h->id, c2h->seq);
+		print_hex_dump(KERN_INFO, "C2H content: ", DUMP_PREFIX_NONE,
+			       16, 1, c2h->raw.payload, len, false);
+		break;
+	}
+}
+
+int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
 {
-	struct rtl8xxxu_rxdesc16 *rx_desc =
-		(struct rtl8xxxu_rxdesc16 *)skb->data;
+	struct ieee80211_hw *hw = priv->hw;
+	struct ieee80211_rx_status *rx_status;
+	struct rtl8xxxu_rxdesc16 *rx_desc;
 	struct rtl8723au_phy_stats *phy_stats;
-	__le32 *_rx_desc_le = (__le32 *)skb->data;
-	u32 *_rx_desc = (u32 *)skb->data;
+	struct sk_buff *next_skb = NULL;
+	__le32 *_rx_desc_le;
+	u32 *_rx_desc;
 	int drvinfo_sz, desc_shift;
-	int i;
+	int i, pkt_cnt, pkt_len, urb_len, pkt_offset;
 
-	for (i = 0; i < (sizeof(struct rtl8xxxu_rxdesc16) / sizeof(u32)); i++)
-		_rx_desc[i] = le32_to_cpu(_rx_desc_le[i]);
+	urb_len = skb->len;
+	pkt_cnt = 0;
 
-	skb_pull(skb, sizeof(struct rtl8xxxu_rxdesc16));
+	do {
+		rx_desc = (struct rtl8xxxu_rxdesc16 *)skb->data;
+		_rx_desc_le = (__le32 *)skb->data;
+		_rx_desc = (u32 *)skb->data;
 
-	phy_stats = (struct rtl8723au_phy_stats *)skb->data;
+		for (i = 0;
+		     i < (sizeof(struct rtl8xxxu_rxdesc16) / sizeof(u32)); i++)
+			_rx_desc[i] = le32_to_cpu(_rx_desc_le[i]);
 
-	drvinfo_sz = rx_desc->drvinfo_sz * 8;
-	desc_shift = rx_desc->shift;
-	skb_pull(skb, drvinfo_sz + desc_shift);
+		/*
+		 * Only read pkt_cnt from the header if we're parsing the
+		 * first packet
+		 */
+		if (!pkt_cnt)
+			pkt_cnt = rx_desc->pkt_cnt;
+		pkt_len = rx_desc->pktlen;
 
-	if (rx_desc->phy_stats)
-		rtl8xxxu_rx_parse_phystats(priv, rx_status, phy_stats,
-					   rx_desc->rxmcs);
+		drvinfo_sz = rx_desc->drvinfo_sz * 8;
+		desc_shift = rx_desc->shift;
+		pkt_offset = roundup(pkt_len + drvinfo_sz + desc_shift +
+				     sizeof(struct rtl8xxxu_rxdesc16), 128);
 
-	rx_status->mactime = le32_to_cpu(rx_desc->tsfl);
-	rx_status->flag |= RX_FLAG_MACTIME_START;
+		if (pkt_cnt > 1)
+			next_skb = skb_clone(skb, GFP_ATOMIC);
 
-	if (!rx_desc->swdec)
-		rx_status->flag |= RX_FLAG_DECRYPTED;
-	if (rx_desc->crc32)
-		rx_status->flag |= RX_FLAG_FAILED_FCS_CRC;
-	if (rx_desc->bw)
-		rx_status->flag |= RX_FLAG_40MHZ;
+		rx_status = IEEE80211_SKB_RXCB(skb);
+		memset(rx_status, 0, sizeof(struct ieee80211_rx_status));
 
-	if (rx_desc->rxht) {
-		rx_status->flag |= RX_FLAG_HT;
-		rx_status->rate_idx = rx_desc->rxmcs - DESC_RATE_MCS0;
-	} else {
-		rx_status->rate_idx = rx_desc->rxmcs;
-	}
+		skb_pull(skb, sizeof(struct rtl8xxxu_rxdesc16));
+
+		phy_stats = (struct rtl8723au_phy_stats *)skb->data;
+
+		skb_pull(skb, drvinfo_sz + desc_shift);
+
+		skb_trim(skb, pkt_len);
+
+		if (rx_desc->phy_stats)
+			rtl8xxxu_rx_parse_phystats(priv, rx_status, phy_stats,
+						   rx_desc->rxmcs);
+
+		rx_status->mactime = le32_to_cpu(rx_desc->tsfl);
+		rx_status->flag |= RX_FLAG_MACTIME_START;
+
+		if (!rx_desc->swdec)
+			rx_status->flag |= RX_FLAG_DECRYPTED;
+		if (rx_desc->crc32)
+			rx_status->flag |= RX_FLAG_FAILED_FCS_CRC;
+		if (rx_desc->bw)
+			rx_status->flag |= RX_FLAG_40MHZ;
+
+		if (rx_desc->rxht) {
+			rx_status->flag |= RX_FLAG_HT;
+			rx_status->rate_idx = rx_desc->rxmcs - DESC_RATE_MCS0;
+		} else {
+			rx_status->rate_idx = rx_desc->rxmcs;
+		}
+
+		rx_status->freq = hw->conf.chandef.chan->center_freq;
+		rx_status->band = hw->conf.chandef.chan->band;
+
+		ieee80211_rx_irqsafe(hw, skb);
+
+		skb = next_skb;
+		if (skb)
+			skb_pull(next_skb, pkt_offset);
+
+		pkt_cnt--;
+		urb_len -= pkt_offset;
+	} while (skb && urb_len > 0 && pkt_cnt > 0);
 
 	return RX_TYPE_DATA_PKT;
 }
 
-int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb,
-			    struct ieee80211_rx_status *rx_status)
+int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
 {
+	struct ieee80211_hw *hw = priv->hw;
+	struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
 	struct rtl8xxxu_rxdesc24 *rx_desc =
 		(struct rtl8xxxu_rxdesc24 *)skb->data;
 	struct rtl8723au_phy_stats *phy_stats;
@@ -5105,6 +5267,8 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb,
 	for (i = 0; i < (sizeof(struct rtl8xxxu_rxdesc24) / sizeof(u32)); i++)
 		_rx_desc[i] = le32_to_cpu(_rx_desc_le[i]);
 
+	memset(rx_status, 0, sizeof(struct ieee80211_rx_status));
+
 	skb_pull(skb, sizeof(struct rtl8xxxu_rxdesc24));
 
 	phy_stats = (struct rtl8723au_phy_stats *)skb->data;
@@ -5116,6 +5280,8 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb,
 	if (rx_desc->rpt_sel) {
 		struct device *dev = &priv->udev->dev;
 		dev_dbg(dev, "%s: C2H packet\n", __func__);
+		rtl8723bu_handle_c2h(priv, skb);
+		dev_kfree_skb(skb);
 		return RX_TYPE_C2H;
 	}
 
@@ -5140,52 +5306,11 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb,
 		rx_status->rate_idx = rx_desc->rxmcs;
 	}
 
-	return RX_TYPE_DATA_PKT;
-}
-
-static void rtl8723bu_handle_c2h(struct rtl8xxxu_priv *priv,
-				 struct sk_buff *skb)
-{
-	struct rtl8723bu_c2h *c2h = (struct rtl8723bu_c2h *)skb->data;
-	struct device *dev = &priv->udev->dev;
-	int len;
-
-	len = skb->len - 2;
-
-	dev_dbg(dev, "C2H ID %02x seq %02x, len %02x source %02x\n",
-		c2h->id, c2h->seq, len, c2h->bt_info.response_source);
-
-	switch(c2h->id) {
-	case C2H_8723B_BT_INFO:
-		if (c2h->bt_info.response_source >
-		    BT_INFO_SRC_8723B_BT_ACTIVE_SEND)
-			dev_dbg(dev, "C2H_BT_INFO WiFi only firmware\n");
-		else
-			dev_dbg(dev, "C2H_BT_INFO BT/WiFi coexist firmware\n");
-
-		if (c2h->bt_info.bt_has_reset)
-			dev_dbg(dev, "BT has been reset\n");
-		if (c2h->bt_info.tx_rx_mask)
-			dev_dbg(dev, "BT TRx mask\n");
+	rx_status->freq = hw->conf.chandef.chan->center_freq;
+	rx_status->band = hw->conf.chandef.chan->band;
 
-		break;
-	case C2H_8723B_BT_MP_INFO:
-		dev_dbg(dev, "C2H_MP_INFO ext ID %02x, status %02x\n",
-			c2h->bt_mp_info.ext_id, c2h->bt_mp_info.status);
-		break;
-	case C2H_8723B_RA_REPORT:
-		dev_dbg(dev,
-			"C2H RA RPT: rate %02x, unk %i, macid %02x, noise %i\n",
-			c2h->ra_report.rate, c2h->ra_report.dummy0_0,
-			c2h->ra_report.macid, c2h->ra_report.noisy_state);
-		break;
-	default:
-		dev_info(dev, "Unhandled C2H event %02x seq %02x\n",
-			 c2h->id, c2h->seq);
-		print_hex_dump(KERN_INFO, "C2H content: ", DUMP_PREFIX_NONE,
-			       16, 1, c2h->raw.payload, len, false);
-		break;
-	}
+	ieee80211_rx_irqsafe(hw, skb);
+	return RX_TYPE_DATA_PKT;
 }
 
 static void rtl8xxxu_rx_complete(struct urb *urb)
@@ -5195,26 +5320,12 @@ static void rtl8xxxu_rx_complete(struct urb *urb)
 	struct ieee80211_hw *hw = rx_urb->hw;
 	struct rtl8xxxu_priv *priv = hw->priv;
 	struct sk_buff *skb = (struct sk_buff *)urb->context;
-	struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
 	struct device *dev = &priv->udev->dev;
-	int rx_type;
 
 	skb_put(skb, urb->actual_length);
 
 	if (urb->status == 0) {
-		memset(rx_status, 0, sizeof(struct ieee80211_rx_status));
-
-		rx_type = priv->fops->parse_rx_desc(priv, skb, rx_status);
-
-		rx_status->freq = hw->conf.chandef.chan->center_freq;
-		rx_status->band = hw->conf.chandef.chan->band;
-
-		if (rx_type == RX_TYPE_DATA_PKT)
-			ieee80211_rx_irqsafe(hw, skb);
-		else {
-			rtl8723bu_handle_c2h(priv, skb);
-			dev_kfree_skb(skb);
-		}
+		priv->fops->parse_rx_desc(priv, skb);
 
 		skb = NULL;
 		rx_urb->urb.context = NULL;
@@ -5234,12 +5345,20 @@ cleanup:
 static int rtl8xxxu_submit_rx_urb(struct rtl8xxxu_priv *priv,
 				  struct rtl8xxxu_rx_urb *rx_urb)
 {
+	struct rtl8xxxu_fileops *fops = priv->fops;
 	struct sk_buff *skb;
 	int skb_size;
 	int ret, rx_desc_sz;
 
-	rx_desc_sz = priv->fops->rx_desc_size;
-	skb_size = rx_desc_sz + RTL_RX_BUFFER_SIZE;
+	rx_desc_sz = fops->rx_desc_size;
+
+	if (priv->rx_buf_aggregation && fops->rx_agg_buf_size) {
+		skb_size = fops->rx_agg_buf_size;
+		skb_size += (rx_desc_sz + sizeof(struct rtl8723au_phy_stats));
+	} else {
+		skb_size = IEEE80211_MAX_FRAME_LEN;
+	}
+
 	skb = __netdev_alloc_skb(NULL, skb_size, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
@@ -5267,7 +5386,7 @@ static void rtl8xxxu_int_complete(struct urb *urb)
 		if (ret)
 			usb_unanchor_urb(urb);
 	} else {
-		dev_info(dev, "%s: Error %i\n", __func__, urb->status);
+		dev_dbg(dev, "%s: Error %i\n", __func__, urb->status);
 	}
 }
 
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
index b0e0c642302c..921c5653fff2 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
@@ -405,7 +405,11 @@
 #define REG_DWBCN1_CTRL_8723B		0x0228
 
 /* 0x0280 ~ 0x02FF	RXDMA Configuration */
-#define REG_RXDMA_AGG_PG_TH		0x0280
+#define REG_RXDMA_AGG_PG_TH		0x0280	/* 0-7 : USB DMA size bits
+						   8-14: USB DMA timeout
+						   15  : Aggregation enable
+						         Only seems to be used
+							 on 8723bu/8192eu */
 #define  RXDMA_USB_AGG_ENABLE		BIT(31)
 #define REG_RXPKT_NUM			0x0284
 #define  RXPKT_NUM_RXDMA_IDLE		BIT(17)
@@ -1052,10 +1056,14 @@
 #define  USB_HIMR_ROK			BIT(0)	/*  Receive DMA OK Interrupt */
 
 #define REG_USB_SPECIAL_OPTION		0xfe55
+#define  USB_SPEC_USB_AGG_ENABLE	BIT(3)	/* Enable USB aggregation */
+#define  USB_SPEC_INT_BULK_SELECT	BIT(4)	/* Use interrupt endpoint to
+						   deliver interrupt packet.
+						   0: Use int, 1: use bulk */
 #define REG_USB_HRPWM			0xfe58
 #define REG_USB_DMA_AGG_TO		0xfe5b
-#define REG_USB_AGG_TO			0xfe5c
-#define REG_USB_AGG_TH			0xfe5d
+#define REG_USB_AGG_TIMEOUT		0xfe5c
+#define REG_USB_AGG_THRESH		0xfe5d
 
 #define REG_NORMAL_SIE_VID		0xfe60	/* 0xfe60 - 0xfe61 */
 #define REG_NORMAL_SIE_PID		0xfe62	/* 0xfe62 - 0xfe63 */
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index b660c214dc71..91cc1397b150 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -901,7 +901,7 @@ void exhalbtc_stack_update_profile_info(void)
 {
 }
 
-void exhalbtc_update_min_bt_rssi(char bt_rssi)
+void exhalbtc_update_min_bt_rssi(s8 bt_rssi)
 {
 	struct btc_coexist *btcoexist = &gl_bt_coexist;
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index 3cbe34c535ec..3d308ebbe048 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -433,7 +433,7 @@ struct btc_stack_info {
 	u8 num_of_hid;
 	bool pan_exist;
 	bool unknown_acl_exist;
-	char min_bt_rssi;
+	s8 min_bt_rssi;
 };
 
 struct btc_statistics {
@@ -537,7 +537,7 @@ void exhalbtc_dbg_control(struct btc_coexist *btcoexist, u8 code, u8 len,
 void exhalbtc_stack_update_profile_info(void);
 void exhalbtc_set_hci_version(u16 hci_version);
 void exhalbtc_set_bt_patch_version(u16 bt_hci_version, u16 bt_patch_version);
-void exhalbtc_update_min_bt_rssi(char bt_rssi);
+void exhalbtc_update_min_bt_rssi(s8 bt_rssi);
 void exhalbtc_set_bt_exist(bool bt_exist);
 void exhalbtc_set_chip_type(u8 chip_type);
 void exhalbtc_set_ant_num(struct rtl_priv *rtlpriv, u8 type, u8 ant_num);
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index 3a0faa8fe9d4..41f77f8a309e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -526,7 +526,7 @@ static void _rtl_add_wowlan_patterns(struct ieee80211_hw *hw,
 		/* 3. calculate crc */
 		rtl_pattern.crc = _calculate_wol_pattern_crc(content, len);
 		RT_TRACE(rtlpriv, COMP_POWER, DBG_TRACE,
-			 "CRC_Remainder = 0x%x", rtl_pattern.crc);
+			 "CRC_Remainder = 0x%x\n", rtl_pattern.crc);
 
 		/* 4. write crc & mask_for_hw to hw */
 		rtlpriv->cfg->ops->add_wowlan_pattern(hw, &rtl_pattern, i);
diff --git a/drivers/net/wireless/realtek/rtlwifi/debug.c b/drivers/net/wireless/realtek/rtlwifi/debug.c
index fd25abad2b9e..33905bbacad2 100644
--- a/drivers/net/wireless/realtek/rtlwifi/debug.c
+++ b/drivers/net/wireless/realtek/rtlwifi/debug.c
@@ -48,3 +48,28 @@ void rtl_dbgp_flag_init(struct ieee80211_hw *hw)
 	/*Init Debug flag enable condition */
 }
 EXPORT_SYMBOL_GPL(rtl_dbgp_flag_init);
+
+#ifdef CONFIG_RTLWIFI_DEBUG
+void _rtl_dbg_trace(struct rtl_priv *rtlpriv, int comp, int level,
+		    const char *modname, const char *fmt, ...)
+{
+	if (unlikely((comp & rtlpriv->dbg.global_debugcomponents) &&
+		     (level <= rtlpriv->dbg.global_debuglevel))) {
+		struct va_format vaf;
+		va_list args;
+
+		va_start(args, fmt);
+
+		vaf.fmt = fmt;
+		vaf.va = &args;
+
+		printk(KERN_DEBUG "%s:%ps:<%lx-%x> %pV",
+		       modname, __builtin_return_address(0),
+		       in_interrupt(), in_atomic(),
+		       &vaf);
+
+		va_end(args);
+	}
+}
+EXPORT_SYMBOL_GPL(_rtl_dbg_trace);
+#endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/debug.h b/drivers/net/wireless/realtek/rtlwifi/debug.h
index fc794b3e9f4a..6156a79328c1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/debug.h
+++ b/drivers/net/wireless/realtek/rtlwifi/debug.h
@@ -174,15 +174,16 @@ do {									\
 	}								\
 } while (0)
 
+
+struct rtl_priv;
+
+__printf(5, 6)
+void _rtl_dbg_trace(struct rtl_priv *rtlpriv, int comp, int level,
+		    const char *modname, const char *fmt, ...);
+
 #define RT_TRACE(rtlpriv, comp, level, fmt, ...)			\
-do {									\
-	if (unlikely(((comp) & rtlpriv->dbg.global_debugcomponents) &&	\
-		     ((level) <= rtlpriv->dbg.global_debuglevel))) {	\
-		printk(KERN_DEBUG KBUILD_MODNAME ":%s():<%lx-%x> " fmt,	\
-		       __func__, in_interrupt(), in_atomic(),		\
-		       ##__VA_ARGS__);					\
-	}								\
-} while (0)
+	_rtl_dbg_trace(rtlpriv, comp, level,				\
+		       KBUILD_MODNAME, fmt, ##__VA_ARGS__)
 
 #define RTPRINT(rtlpriv, dbgtype, dbgflag, fmt, ...)			\
 do {									\
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c b/drivers/net/wireless/realtek/rtlwifi/efuse.c
index 0b4082c9272a..7becfef6cd5c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.c
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c
@@ -24,6 +24,7 @@
  *****************************************************************************/
 #include "wifi.h"
 #include "efuse.h"
+#include "pci.h"
 #include <linux/export.h>
 
 static const u8 MAX_PGPKT_SIZE = 9;
@@ -1243,3 +1244,80 @@ static u8 efuse_calculate_word_cnts(u8 word_en)
 	return word_cnts;
 }
 
+int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
+		   int max_size, u8 *hwinfo, int *params)
+{
+	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
+	struct rtl_pci_priv *rtlpcipriv = rtl_pcipriv(hw);
+	struct device *dev = &rtlpcipriv->dev.pdev->dev;
+	u16 eeprom_id;
+	u16 i, usvalue;
+
+	switch (rtlefuse->epromtype) {
+	case EEPROM_BOOT_EFUSE:
+		rtl_efuse_shadow_map_update(hw);
+		break;
+
+	case EEPROM_93C46:
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+			 "RTL8XXX did not boot from eeprom, check it !!\n");
+		return 1;
+
+	default:
+		dev_warn(dev, "no efuse data\n");
+		return 1;
+	}
+
+	memcpy(hwinfo, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0], max_size);
+
+	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, "MAP",
+		      hwinfo, max_size);
+
+	eeprom_id = *((u16 *)&hwinfo[0]);
+	if (eeprom_id != params[0]) {
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
+			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
+		rtlefuse->autoload_failflag = true;
+	} else {
+		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
+		rtlefuse->autoload_failflag = false;
+	}
+
+	if (rtlefuse->autoload_failflag)
+		return 1;
+
+	rtlefuse->eeprom_vid = *(u16 *)&hwinfo[params[1]];
+	rtlefuse->eeprom_did = *(u16 *)&hwinfo[params[2]];
+	rtlefuse->eeprom_svid = *(u16 *)&hwinfo[params[3]];
+	rtlefuse->eeprom_smid = *(u16 *)&hwinfo[params[4]];
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
+		 "EEPROMId = 0x%4x\n", eeprom_id);
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
+		 "EEPROM VID = 0x%4x\n", rtlefuse->eeprom_vid);
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
+		 "EEPROM DID = 0x%4x\n", rtlefuse->eeprom_did);
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
+		 "EEPROM SVID = 0x%4x\n", rtlefuse->eeprom_svid);
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
+		 "EEPROM SMID = 0x%4x\n", rtlefuse->eeprom_smid);
+
+	for (i = 0; i < 6; i += 2) {
+		usvalue = *(u16 *)&hwinfo[params[5] + i];
+		*((u16 *)(&rtlefuse->dev_addr[i])) = usvalue;
+	}
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "%pM\n", rtlefuse->dev_addr);
+
+	rtlefuse->eeprom_channelplan = *&hwinfo[params[6]];
+	rtlefuse->eeprom_version = *(u16 *)&hwinfo[params[7]];
+	rtlefuse->txpwr_fromeprom = true;
+	rtlefuse->eeprom_oemid = *&hwinfo[params[8]];
+
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
+		 "EEPROM Customer ID: 0x%2x\n", rtlefuse->eeprom_oemid);
+
+	/* set channel plan to world wide 13 */
+	rtlefuse->channel_plan = params[9];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtl_get_hwinfo);
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h b/drivers/net/wireless/realtek/rtlwifi/efuse.h
index be02e7894c61..51aa1210def5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.h
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h
@@ -109,5 +109,7 @@ bool efuse_shadow_update_chk(struct ieee80211_hw *hw);
 void rtl_efuse_shadow_map_update(struct ieee80211_hw *hw);
 void efuse_force_write_vendor_Id(struct ieee80211_hw *hw);
 void efuse_re_pg_section(struct ieee80211_hw *hw, u8 section_idx);
+int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
+		   int max_size, u8 *hwinfo, int *params);
 
 #endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c
index 93579cac0d45..9a64f9b703e5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/ps.c
+++ b/drivers/net/wireless/realtek/rtlwifi/ps.c
@@ -76,9 +76,9 @@ bool rtl_ps_disable_nic(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL(rtl_ps_disable_nic);
 
-bool rtl_ps_set_rf_state(struct ieee80211_hw *hw,
-			 enum rf_pwrstate state_toset,
-			 u32 changesource, bool protect_or_not)
+static bool rtl_ps_set_rf_state(struct ieee80211_hw *hw,
+				enum rf_pwrstate state_toset,
+				u32 changesource)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
@@ -86,9 +86,6 @@ bool rtl_ps_set_rf_state(struct ieee80211_hw *hw,
 	bool actionallowed = false;
 	u16 rfwait_cnt = 0;
 
-	if (protect_or_not)
-		goto no_protect;
-
 	/*Only one thread can change
 	 *the RF state at one time, and others
 	 *should wait to be executed.
@@ -119,7 +116,6 @@ bool rtl_ps_set_rf_state(struct ieee80211_hw *hw,
 		}
 	}
 
-no_protect:
 	rtstate = ppsc->rfpwr_state;
 
 	switch (state_toset) {
@@ -162,15 +158,12 @@ no_protect:
 	if (actionallowed)
 		rtlpriv->cfg->ops->set_rf_power_state(hw, state_toset);
 
-	if (!protect_or_not) {
-		spin_lock(&rtlpriv->locks.rf_ps_lock);
-		ppsc->rfchange_inprogress = false;
-		spin_unlock(&rtlpriv->locks.rf_ps_lock);
-	}
+	spin_lock(&rtlpriv->locks.rf_ps_lock);
+	ppsc->rfchange_inprogress = false;
+	spin_unlock(&rtlpriv->locks.rf_ps_lock);
 
 	return actionallowed;
 }
-EXPORT_SYMBOL(rtl_ps_set_rf_state);
 
 static void _rtl_ps_inactive_ps(struct ieee80211_hw *hw)
 {
@@ -191,7 +184,7 @@ static void _rtl_ps_inactive_ps(struct ieee80211_hw *hw)
 	}
 
 	rtl_ps_set_rf_state(hw, ppsc->inactive_pwrstate,
-			    RF_CHANGE_BY_IPS, false);
+			    RF_CHANGE_BY_IPS);
 
 	if (ppsc->inactive_pwrstate == ERFOFF &&
 	    rtlhal->interface == INTF_PCI) {
@@ -587,7 +580,7 @@ void rtl_swlps_rf_awake(struct ieee80211_hw *hw)
 	}
 
 	spin_lock_irqsave(&rtlpriv->locks.lps_lock, flag);
-	rtl_ps_set_rf_state(hw, ERFON, RF_CHANGE_BY_PS, false);
+	rtl_ps_set_rf_state(hw, ERFON, RF_CHANGE_BY_PS);
 	spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag);
 }
 
@@ -630,7 +623,7 @@ void rtl_swlps_rf_sleep(struct ieee80211_hw *hw)
 	spin_unlock(&rtlpriv->locks.rf_ps_lock);
 
 	spin_lock_irqsave(&rtlpriv->locks.lps_lock, flag);
-	rtl_ps_set_rf_state(hw, ERFSLEEP, RF_CHANGE_BY_PS , false);
+	rtl_ps_set_rf_state(hw, ERFSLEEP, RF_CHANGE_BY_PS);
 	spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag);
 
 	if (ppsc->reg_rfps_level & RT_RF_OFF_LEVL_ASPM &&
diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.h b/drivers/net/wireless/realtek/rtlwifi/ps.h
index 29dfc514212d..0df2b5203030 100644
--- a/drivers/net/wireless/realtek/rtlwifi/ps.h
+++ b/drivers/net/wireless/realtek/rtlwifi/ps.h
@@ -28,9 +28,6 @@
 
 #define MAX_SW_LPS_SLEEP_INTV	5
 
-bool rtl_ps_set_rf_state(struct ieee80211_hw *hw,
-			 enum rf_pwrstate state_toset, u32 changesource,
-			 bool protect_or_not);
 bool rtl_ps_enable_nic(struct ieee80211_hw *hw);
 bool rtl_ps_disable_nic(struct ieee80211_hw *hw);
 void rtl_ips_nic_off(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c
index 1aca77719521..ce8621a0f7aa 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rc.c
@@ -94,7 +94,7 @@ static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
 				    struct ieee80211_sta *sta,
 				    struct ieee80211_tx_rate *rate,
 				    struct ieee80211_tx_rate_control *txrc,
-				    u8 tries, char rix, int rtsctsenable,
+				    u8 tries, s8 rix, int rtsctsenable,
 				    bool not_data)
 {
 	struct rtl_mac *mac = rtl_mac(rtlpriv);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/Makefile b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/Makefile
index a85419a37651..676e7de27f27 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/Makefile
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/Makefile
@@ -12,4 +12,4 @@ rtl8188ee-objs :=		\
 
 obj-$(CONFIG_RTL8188EE) += rtl8188ee.o
 
-ccflags-y += -Idrivers/net/wireless/rtlwifi -D__CHECK_ENDIAN__
+ccflags-y += -D__CHECK_ENDIAN__
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c
index db9a7829d568..f936a491371b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c
@@ -886,7 +886,7 @@ static void dm_txpower_track_cb_therm(struct ieee80211_hw *hw)
 	u8 thermalvalue_avg_count = 0;
 	u32 thermalvalue_avg = 0;
 	long  ele_d, temp_cck;
-	char ofdm_index[2], cck_index = 0,
+	s8 ofdm_index[2], cck_index = 0,
 		ofdm_index_old[2] = {0, 0}, cck_index_old = 0;
 	int i = 0;
 	/*bool is2t = false;*/
@@ -898,7 +898,7 @@ static void dm_txpower_track_cb_therm(struct ieee80211_hw *hw)
 	/*0.1 the following TWO tables decide the
 	 *final index of OFDM/CCK swing table
 	 */
-	char delta_swing_table_idx[2][15]  = {
+	s8 delta_swing_table_idx[2][15]  = {
 		{0, 0, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11},
 		{0, 0, -1, -2, -3, -4, -4, -4, -4, -5, -7, -8, -9, -9, -10}
 	};
@@ -1790,6 +1790,7 @@ void rtl88e_dm_watchdog(struct ieee80211_hw *hw)
 	if (ppsc->p2p_ps_info.p2p_ps_mode)
 		fw_ps_awake = false;
 
+	spin_lock(&rtlpriv->locks.rf_ps_lock);
 	if ((ppsc->rfpwr_state == ERFON) &&
 	    ((!fw_current_inpsmode) && fw_ps_awake) &&
 	    (!ppsc->rfchange_inprogress)) {
@@ -1802,4 +1803,5 @@ void rtl88e_dm_watchdog(struct ieee80211_hw *hw)
 		rtl88e_dm_check_edca_turbo(hw);
 		rtl88e_dm_antenna_diversity(hw);
 	}
+	spin_unlock(&rtlpriv->locks.rf_ps_lock);
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
index 8ee83b093c0d..4ab6201daf1a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
@@ -1835,74 +1835,24 @@ static void _rtl88ee_read_adapter_info(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	u16 i, usvalue;
-	u8 hwinfo[HWSET_MAX_SIZE];
-	u16 eeprom_id;
-
-	if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		rtl_efuse_shadow_map_update(hw);
-
-		memcpy(hwinfo, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
-		       HWSET_MAX_SIZE);
-	} else if (rtlefuse->epromtype == EEPROM_93C46) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!");
-		return;
-	} else {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "boot from neither eeprom nor efuse, check it !!");
+	int params[] = {RTL8188E_EEPROM_ID, EEPROM_VID, EEPROM_DID,
+			EEPROM_SVID, EEPROM_SMID, EEPROM_MAC_ADDR,
+			EEPROM_CHANNELPLAN, EEPROM_VERSION, EEPROM_CUSTOMER_ID,
+			COUNTRY_CODE_WORLD_WIDE_13};
+	u8 *hwinfo;
+
+	hwinfo = kzalloc(HWSET_MAX_SIZE, GFP_KERNEL);
+	if (!hwinfo)
 		return;
-	}
-
-	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, "MAP\n",
-		      hwinfo, HWSET_MAX_SIZE);
 
-	eeprom_id = *((u16 *)&hwinfo[0]);
-	if (eeprom_id != RTL8188E_EEPROM_ID) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
-			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
-		rtlefuse->autoload_failflag = true;
-	} else {
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
-		rtlefuse->autoload_failflag = false;
-	}
+	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
+		goto exit;
 
-	if (rtlefuse->autoload_failflag == true)
-		return;
-	/*VID DID SVID SDID*/
-	rtlefuse->eeprom_vid = *(u16 *)&hwinfo[EEPROM_VID];
-	rtlefuse->eeprom_did = *(u16 *)&hwinfo[EEPROM_DID];
-	rtlefuse->eeprom_svid = *(u16 *)&hwinfo[EEPROM_SVID];
-	rtlefuse->eeprom_smid = *(u16 *)&hwinfo[EEPROM_SMID];
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROMId = 0x%4x\n", eeprom_id);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM VID = 0x%4x\n", rtlefuse->eeprom_vid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM DID = 0x%4x\n", rtlefuse->eeprom_did);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SVID = 0x%4x\n", rtlefuse->eeprom_svid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SMID = 0x%4x\n", rtlefuse->eeprom_smid);
-	/*customer ID*/
-	rtlefuse->eeprom_oemid = hwinfo[EEPROM_CUSTOMER_ID];
 	if (rtlefuse->eeprom_oemid == 0xFF)
-		rtlefuse->eeprom_oemid =	0;
+		rtlefuse->eeprom_oemid = 0;
 
 	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
 		 "EEPROM Customer ID: 0x%2x\n", rtlefuse->eeprom_oemid);
-	/*EEPROM version*/
-	rtlefuse->eeprom_version = *(u16 *)&hwinfo[EEPROM_VERSION];
-	/*mac address*/
-	for (i = 0; i < 6; i += 2) {
-		usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR + i];
-		*((u16 *)(&rtlefuse->dev_addr[i])) = usvalue;
-	}
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG,
-		 "dev_addr: %pM\n", rtlefuse->dev_addr);
-	/*channel plan */
-	rtlefuse->eeprom_channelplan = hwinfo[EEPROM_CHANNELPLAN];
 	/* set channel plan from efuse */
 	rtlefuse->channel_plan = rtlefuse->eeprom_channelplan;
 	/*tx power*/
@@ -1976,6 +1926,8 @@ static void _rtl88ee_read_adapter_info(struct ieee80211_hw *hw)
 
 		}
 	}
+exit:
+	kfree(hwinfo);
 }
 
 static void _rtl88ee_hal_customized_behavior(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c
index 416a9ba6382e..7498a1218cba 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c
@@ -373,7 +373,7 @@ static bool _rtl88e_phy_bb8188e_config_parafile(struct ieee80211_hw *hw)
 
 	rtstatus = phy_config_bb_with_headerfile(hw, BASEBAND_CONFIG_PHY_REG);
 	if (!rtstatus) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!\n");
 		return false;
 	}
 
@@ -383,7 +383,7 @@ static bool _rtl88e_phy_bb8188e_config_parafile(struct ieee80211_hw *hw)
 		  phy_config_bb_with_pghdr(hw, BASEBAND_CONFIG_PHY_REG);
 	}
 	if (!rtstatus) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!\n");
 		return false;
 	}
 	rtstatus =
@@ -1239,7 +1239,7 @@ u8 rtl88e_phy_sw_chnl(struct ieee80211_hw *hw)
 	if (!(is_hal_stop(rtlhal)) && !(RT_CANNOT_IO(hw))) {
 		rtl88e_phy_sw_chnl_callback(hw);
 		RT_TRACE(rtlpriv, COMP_CHAN, DBG_LOUD,
-			 "sw_chnl_inprogress false schdule workitem current channel %d\n",
+			 "sw_chnl_inprogress false schedule workitem current channel %d\n",
 			 rtlphy->current_channel);
 		rtlphy->sw_chnl_inprogress = false;
 	} else {
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c
index 40893cef7dfe..26ac4c2903c7 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c
@@ -498,7 +498,7 @@ static bool _rtl88e_phy_rf6052_config_parafile(struct ieee80211_hw *hw)
 
 		if (rtstatus != true) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-				 "Radio[%d] Fail!!", rfpath);
+				 "Radio[%d] Fail!!\n", rfpath);
 			return false;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c
index 11701064b0e1..3e3b88664883 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c
@@ -59,7 +59,7 @@ static void _rtl88ee_query_rxphystatus(struct ieee80211_hw *hw,
 	struct phy_status_rpt *phystrpt =
 		(struct phy_status_rpt *)p_drvinfo;
 	struct rtl_dm *rtldm = rtl_dm(rtl_priv(hw));
-	char rx_pwr_all = 0, rx_pwr[4];
+	s8 rx_pwr_all = 0, rx_pwr[4];
 	u8 rf_rx_num = 0, evm, pwdb_all;
 	u8 i, max_spatial_stream;
 	u32 rssi, total_rssi = 0;
@@ -540,7 +540,7 @@ void rtl88ee_tx_fill_desc(struct ieee80211_hw *hw,
 				 PCI_DMA_TODEVICE);
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, sizeof(struct tx_desc_88e));
@@ -703,7 +703,7 @@ void rtl88ee_tx_fill_cmddesc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, TX_DESC_SIZE);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
index 5a24d194ac76..9a1c2087adee 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
@@ -593,8 +593,8 @@ struct rx_fwinfo_88e {
 	u8 pwdb_all;
 	u8 cfosho[4];
 	u8 cfotail[4];
-	char rxevm[2];
-	char rxsnr[4];
+	s8 rxevm[2];
+	s8 rxsnr[4];
 	u8 pdsnr[2];
 	u8 csi_current[2];
 	u8 csi_target[2];
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.h
index 4422e31fedd9..6a72d0c8afa0 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.h
@@ -135,7 +135,7 @@ void rtl92c_dm_init_edca_turbo(struct ieee80211_hw *hw);
 void rtl92c_dm_check_txpower_tracking(struct ieee80211_hw *hw);
 void rtl92c_dm_init_rate_adaptive_mask(struct ieee80211_hw *hw);
 void rtl92c_dm_rf_saving(struct ieee80211_hw *hw, u8 bforce_in_normal);
-void rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw, char delta);
+void rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta);
 void rtl92c_phy_lc_calibrate(struct ieee80211_hw *hw);
 void rtl92c_phy_iq_calibrate(struct ieee80211_hw *hw, bool recovery);
 void rtl92c_dm_dynamic_txpower(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c
index 77e61b19bf36..60ab2ec4f4ef 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c
@@ -213,7 +213,7 @@ bool _rtl92c_phy_bb8192c_config_parafile(struct ieee80211_hw *hw)
 	rtstatus = rtlpriv->cfg->ops->config_bb_with_headerfile(hw,
 						 BASEBAND_CONFIG_PHY_REG);
 	if (!rtstatus) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!\n");
 		return false;
 	}
 	if (rtlphy->rf_type == RF_1T2R) {
@@ -226,7 +226,7 @@ bool _rtl92c_phy_bb8192c_config_parafile(struct ieee80211_hw *hw)
 						   BASEBAND_CONFIG_PHY_REG);
 	}
 	if (!rtstatus) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!\n");
 		return false;
 	}
 	rtstatus = rtlpriv->cfg->ops->config_bb_with_headerfile(hw,
@@ -757,7 +757,7 @@ u8 rtl92c_phy_sw_chnl(struct ieee80211_hw *hw)
 	if (!(is_hal_stop(rtlhal)) && !(RT_CANNOT_IO(hw))) {
 		rtl92c_phy_sw_chnl_callback(hw);
 		RT_TRACE(rtlpriv, COMP_CHAN, DBG_LOUD,
-			 "sw_chnl_inprogress false schdule workitem\n");
+			 "sw_chnl_inprogress false schedule workitem\n");
 		rtlphy->sw_chnl_inprogress = false;
 	} else {
 		RT_TRACE(rtlpriv, COMP_CHAN, DBG_LOUD,
@@ -1353,7 +1353,7 @@ static void _rtl92c_phy_iq_calibrate(struct ieee80211_hw *hw,
 }
 
 static void _rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw,
-				     char delta, bool is2t)
+				     s8 delta, bool is2t)
 {
 }
 
@@ -1518,7 +1518,7 @@ void rtl92c_phy_lc_calibrate(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL(rtl92c_phy_lc_calibrate);
 
-void rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw, char delta)
+void rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_phy *rtlphy = &(rtlpriv->phy);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.h
index 64bc49f4dbc6..202412577bf0 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.h
@@ -210,7 +210,7 @@ u8 rtl92c_phy_sw_chnl(struct ieee80211_hw *hw);
 void rtl92c_phy_iq_calibrate(struct ieee80211_hw *hw, bool b_recovery);
 void rtl92c_phy_set_beacon_hw_reg(struct ieee80211_hw *hw,
 					 u16 beaconinterval);
-void rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw, char delta);
+void rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta);
 void rtl92c_phy_lc_calibrate(struct ieee80211_hw *hw);
 void rtl92c_phy_set_rfpath_switch(struct ieee80211_hw *hw, bool bmain);
 bool rtl92c_phy_config_rf_with_headerfile(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
index 04eb5c3f8464..244607951e28 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
@@ -1680,58 +1680,18 @@ static void _rtl92ce_read_adapter_info(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	u16 i, usvalue;
-	u8 hwinfo[HWSET_MAX_SIZE];
-	u16 eeprom_id;
-
-	if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		rtl_efuse_shadow_map_update(hw);
-
-		memcpy((void *)hwinfo,
-		       (void *)&rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
-		       HWSET_MAX_SIZE);
-	} else if (rtlefuse->epromtype == EEPROM_93C46) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!");
-	}
-
-	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, "MAP",
-		      hwinfo, HWSET_MAX_SIZE);
-
-	eeprom_id = *((u16 *)&hwinfo[0]);
-	if (eeprom_id != RTL8190_EEPROM_ID) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
-			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
-		rtlefuse->autoload_failflag = true;
-	} else {
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
-		rtlefuse->autoload_failflag = false;
-	}
-
-	if (rtlefuse->autoload_failflag)
+	int params[] = {RTL8190_EEPROM_ID, EEPROM_VID, EEPROM_DID,
+			EEPROM_SVID, EEPROM_SMID, EEPROM_MAC_ADDR,
+			EEPROM_CHANNELPLAN, EEPROM_VERSION, EEPROM_CUSTOMER_ID,
+			COUNTRY_CODE_WORLD_WIDE_13};
+	u8 *hwinfo;
+
+	hwinfo = kzalloc(HWSET_MAX_SIZE, GFP_KERNEL);
+	if (!hwinfo)
 		return;
 
-	rtlefuse->eeprom_vid = *(u16 *)&hwinfo[EEPROM_VID];
-	rtlefuse->eeprom_did = *(u16 *)&hwinfo[EEPROM_DID];
-	rtlefuse->eeprom_svid = *(u16 *)&hwinfo[EEPROM_SVID];
-	rtlefuse->eeprom_smid = *(u16 *)&hwinfo[EEPROM_SMID];
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROMId = 0x%4x\n", eeprom_id);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM VID = 0x%4x\n", rtlefuse->eeprom_vid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM DID = 0x%4x\n", rtlefuse->eeprom_did);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SVID = 0x%4x\n", rtlefuse->eeprom_svid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SMID = 0x%4x\n", rtlefuse->eeprom_smid);
-
-	for (i = 0; i < 6; i += 2) {
-		usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR + i];
-		*((u16 *) (&rtlefuse->dev_addr[i])) = usvalue;
-	}
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "%pM\n", rtlefuse->dev_addr);
+	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
+		goto exit;
 
 	_rtl92ce_read_txpower_info_from_hwpg(hw,
 					     rtlefuse->autoload_failflag,
@@ -1740,18 +1700,6 @@ static void _rtl92ce_read_adapter_info(struct ieee80211_hw *hw)
 	rtl8192ce_read_bt_coexist_info_from_hwpg(hw,
 						 rtlefuse->autoload_failflag,
 						 hwinfo);
-
-	rtlefuse->eeprom_channelplan = *&hwinfo[EEPROM_CHANNELPLAN];
-	rtlefuse->eeprom_version = *(u16 *)&hwinfo[EEPROM_VERSION];
-	rtlefuse->txpwr_fromeprom = true;
-	rtlefuse->eeprom_oemid = *&hwinfo[EEPROM_CUSTOMER_ID];
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM Customer ID: 0x%2x\n", rtlefuse->eeprom_oemid);
-
-	/* set channel paln to world wide 13 */
-	rtlefuse->channel_plan = COUNTRY_CODE_WORLD_WIDE_13;
-
 	if (rtlhal->oem_id == RT_CID_DEFAULT) {
 		switch (rtlefuse->eeprom_oemid) {
 		case EEPROM_CID_DEFAULT:
@@ -1775,10 +1723,10 @@ static void _rtl92ce_read_adapter_info(struct ieee80211_hw *hw)
 		default:
 			rtlhal->oem_id = RT_CID_DEFAULT;
 			break;
-
 		}
 	}
-
+exit:
+	kfree(hwinfo);
 }
 
 static void _rtl92ce_hal_customized_behavior(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.h
index e5e1353a94c3..dadc02b5de0b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.h
@@ -102,7 +102,7 @@ void rtl92c_phy_sw_chnl_callback(struct ieee80211_hw *hw);
 u8 rtl92c_phy_sw_chnl(struct ieee80211_hw *hw);
 void rtl92c_phy_iq_calibrate(struct ieee80211_hw *hw, bool b_recovery);
 void rtl92c_phy_set_beacon_hw_reg(struct ieee80211_hw *hw, u16 beaconinterval);
-void rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw, char delta);
+void rtl92c_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta);
 void rtl92c_phy_lc_calibrate(struct ieee80211_hw *hw);
 void _rtl92ce_phy_lc_calibrate(struct ieee80211_hw *hw, bool is2t);
 void rtl92c_phy_set_rfpath_switch(struct ieee80211_hw *hw, bool bmain);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
index 84ddd4d07a1d..781af1b99eb5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
@@ -49,7 +49,7 @@ static u8 _rtl92ce_map_hwqueue_to_fwqueue(struct sk_buff *skb, u8 hw_queue)
 	return skb->priority;
 }
 
-static u8 _rtl92c_query_rxpwrpercentage(char antpower)
+static u8 _rtl92c_query_rxpwrpercentage(s8 antpower)
 {
 	if ((antpower <= -100) || (antpower >= 20))
 		return 0;
@@ -59,9 +59,9 @@ static u8 _rtl92c_query_rxpwrpercentage(char antpower)
 		return 100 + antpower;
 }
 
-static u8 _rtl92c_evm_db_to_percentage(char value)
+static u8 _rtl92c_evm_db_to_percentage(s8 value)
 {
-	char ret_val;
+	s8 ret_val;
 	ret_val = value;
 
 	if (ret_val >= 0)
@@ -449,7 +449,7 @@ void rtl92ce_tx_fill_desc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	rcu_read_lock();
@@ -615,7 +615,7 @@ void rtl92ce_tx_fill_cmddesc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, TX_DESC_SIZE);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h
index 4bec4b07e3e0..607304586c03 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h
@@ -537,8 +537,8 @@ struct rx_fwinfo_92c {
 	u8 pwdb_all;
 	u8 cfosho[4];
 	u8 cfotail[4];
-	char rxevm[2];
-	char rxsnr[4];
+	s8 rxevm[2];
+	s8 rxsnr[4];
 	u8 pdsnr[2];
 	u8 csi_current[2];
 	u8 csi_target[2];
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
index 34ce06441d1b..8789752f8143 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
@@ -347,50 +347,24 @@ static void _rtl92cu_read_adapter_info(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	u16 i, usvalue;
-	u8 hwinfo[HWSET_MAX_SIZE] = {0};
-	u16 eeprom_id;
-
-	if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		rtl_efuse_shadow_map_update(hw);
-		memcpy((void *)hwinfo,
-		       (void *)&rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
-		       HWSET_MAX_SIZE);
-	} else if (rtlefuse->epromtype == EEPROM_93C46) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!\n");
-	}
-	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_LOUD, "MAP",
-		      hwinfo, HWSET_MAX_SIZE);
-	eeprom_id = le16_to_cpu(*((__le16 *)&hwinfo[0]));
-	if (eeprom_id != RTL8190_EEPROM_ID) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
-		rtlefuse->autoload_failflag = true;
-	} else {
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
-		rtlefuse->autoload_failflag = false;
-	}
-	if (rtlefuse->autoload_failflag)
+	int params[] = {RTL8190_EEPROM_ID, EEPROM_VID, EEPROM_DID,
+			EEPROM_SVID, EEPROM_SMID, EEPROM_MAC_ADDR,
+			EEPROM_CHANNELPLAN, EEPROM_VERSION, EEPROM_CUSTOMER_ID,
+			0};
+	u8 *hwinfo;
+
+	hwinfo = kzalloc(HWSET_MAX_SIZE, GFP_KERNEL);
+	if (!hwinfo)
 		return;
-	for (i = 0; i < 6; i += 2) {
-		usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR + i];
-		*((u16 *) (&rtlefuse->dev_addr[i])) = usvalue;
-	}
-	pr_info("MAC address: %pM\n", rtlefuse->dev_addr);
+
+	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
+		goto exit;
+
 	_rtl92cu_read_txpower_info_from_hwpg(hw,
 					   rtlefuse->autoload_failflag, hwinfo);
-	rtlefuse->eeprom_vid = le16_to_cpu(*(__le16 *)&hwinfo[EEPROM_VID]);
-	rtlefuse->eeprom_did = le16_to_cpu(*(__le16 *)&hwinfo[EEPROM_DID]);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, " VID = 0x%02x PID = 0x%02x\n",
-		 rtlefuse->eeprom_vid, rtlefuse->eeprom_did);
-	rtlefuse->eeprom_channelplan = hwinfo[EEPROM_CHANNELPLAN];
-	rtlefuse->eeprom_version =
-			 le16_to_cpu(*(__le16 *)&hwinfo[EEPROM_VERSION]);
+	_rtl92cu_read_board_type(hw, hwinfo);
+
 	rtlefuse->txpwr_fromeprom = true;
-	rtlefuse->eeprom_oemid = hwinfo[EEPROM_CUSTOMER_ID];
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "EEPROM Customer ID: 0x%2x\n",
-		 rtlefuse->eeprom_oemid);
 	if (rtlhal->oem_id == RT_CID_DEFAULT) {
 		switch (rtlefuse->eeprom_oemid) {
 		case EEPROM_CID_DEFAULT:
@@ -416,7 +390,8 @@ static void _rtl92cu_read_adapter_info(struct ieee80211_hw *hw)
 			break;
 		}
 	}
-	_rtl92cu_read_board_type(hw, hwinfo);
+exit:
+	kfree(hwinfo);
 }
 
 static void _rtl92cu_hal_customized_behavior(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
index 035713311a4a..68ca734853c1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
@@ -596,7 +596,7 @@ void rtl92c_set_min_space(struct ieee80211_hw *hw, bool is2T)
 
 /*==============================================================*/
 
-static u8 _rtl92c_query_rxpwrpercentage(char antpower)
+static u8 _rtl92c_query_rxpwrpercentage(s8 antpower)
 {
 	if ((antpower <= -100) || (antpower >= 20))
 		return 0;
@@ -606,9 +606,9 @@ static u8 _rtl92c_query_rxpwrpercentage(char antpower)
 		return 100 + antpower;
 }
 
-static u8 _rtl92c_evm_db_to_percentage(char value)
+static u8 _rtl92c_evm_db_to_percentage(s8 value)
 {
-	char ret_val;
+	s8 ret_val;
 
 	ret_val = value;
 	if (ret_val >= 0)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.h
index 553a4bfac668..20a49ec8459b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.h
@@ -79,8 +79,8 @@ struct rx_fwinfo_92c {
 	u8 pwdb_all;
 	u8 cfosho[4];
 	u8 cfotail[4];
-	char rxevm[2];
-	char rxsnr[4];
+	s8 rxevm[2];
+	s8 rxsnr[4];
 	u8 pdsnr[2];
 	u8 csi_current[2];
 	u8 csi_target[2];
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
index 5624ade92cc0..ec2ea56f7933 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
@@ -465,7 +465,7 @@ static bool _rtl92c_phy_rf6052_config_parafile(struct ieee80211_hw *hw)
 		}
 		if (!rtstatus) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-				 "Radio[%d] Fail!!", rfpath);
+				 "Radio[%d] Fail!!\n", rfpath);
 			goto phy_rf_cfg_fail;
 		}
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
index f49b60d31450..b0f632462335 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
@@ -1744,65 +1744,26 @@ static void _rtl92de_read_adapter_info(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	u16 i, usvalue;
-	u8 hwinfo[HWSET_MAX_SIZE];
-	u16 eeprom_id;
-	unsigned long flags;
+	int params[] = {RTL8190_EEPROM_ID, EEPROM_VID, EEPROM_DID,
+			EEPROM_SVID, EEPROM_SMID, EEPROM_MAC_ADDR_MAC0_92D,
+			EEPROM_CHANNEL_PLAN, EEPROM_VERSION, EEPROM_CUSTOMER_ID,
+			COUNTRY_CODE_WORLD_WIDE_13};
+	int i;
+	u16 usvalue;
+	u8 *hwinfo;
 
-	if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		spin_lock_irqsave(&globalmutex_for_power_and_efuse, flags);
-		rtl_efuse_shadow_map_update(hw);
-		_rtl92de_efuse_update_chip_version(hw);
-		spin_unlock_irqrestore(&globalmutex_for_power_and_efuse, flags);
-		memcpy((void *)hwinfo, (void *)&rtlefuse->efuse_map
-		       [EFUSE_INIT_MAP][0],
-		       HWSET_MAX_SIZE);
-	} else if (rtlefuse->epromtype == EEPROM_93C46) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!\n");
-	}
-	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, "MAP",
-		      hwinfo, HWSET_MAX_SIZE);
+	hwinfo = kzalloc(HWSET_MAX_SIZE, GFP_KERNEL);
+	if (!hwinfo)
+		return;
 
-	eeprom_id = *((u16 *)&hwinfo[0]);
-	if (eeprom_id != RTL8190_EEPROM_ID) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
-			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
-		rtlefuse->autoload_failflag = true;
-	} else {
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
-		rtlefuse->autoload_failflag = false;
-	}
-	if (rtlefuse->autoload_failflag) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!\n");
+	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
 		return;
-	}
-	rtlefuse->eeprom_oemid = hwinfo[EEPROM_CUSTOMER_ID];
-	_rtl92de_read_macphymode_and_bandtype(hw, hwinfo);
 
-	/* VID, DID  SE     0xA-D */
-	rtlefuse->eeprom_vid = *(u16 *)&hwinfo[EEPROM_VID];
-	rtlefuse->eeprom_did = *(u16 *)&hwinfo[EEPROM_DID];
-	rtlefuse->eeprom_svid = *(u16 *)&hwinfo[EEPROM_SVID];
-	rtlefuse->eeprom_smid = *(u16 *)&hwinfo[EEPROM_SMID];
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "EEPROMId = 0x%4x\n", eeprom_id);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM VID = 0x%4x\n", rtlefuse->eeprom_vid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM DID = 0x%4x\n", rtlefuse->eeprom_did);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SVID = 0x%4x\n", rtlefuse->eeprom_svid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SMID = 0x%4x\n", rtlefuse->eeprom_smid);
+	_rtl92de_efuse_update_chip_version(hw);
+	_rtl92de_read_macphymode_and_bandtype(hw, hwinfo);
 
-	/* Read Permanent MAC address */
-	if (rtlhal->interfaceindex == 0) {
-		for (i = 0; i < 6; i += 2) {
-			usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR_MAC0_92D + i];
-			*((u16 *) (&rtlefuse->dev_addr[i])) = usvalue;
-		}
-	} else {
+	/* Read Permanent MAC address for 2nd interface */
+	if (rtlhal->interfaceindex != 0) {
 		for (i = 0; i < 6; i += 2) {
 			usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR_MAC1_92D + i];
 			*((u16 *) (&rtlefuse->dev_addr[i])) = usvalue;
@@ -1828,10 +1789,8 @@ static void _rtl92de_read_adapter_info(struct ieee80211_hw *hw)
 		rtlefuse->channel_plan = COUNTRY_CODE_FCC;
 		break;
 	}
-	rtlefuse->eeprom_version = *(u16 *)&hwinfo[EEPROM_VERSION];
 	rtlefuse->txpwr_fromeprom = true;
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM Customer ID: 0x%2x\n", rtlefuse->eeprom_oemid);
+	kfree(hwinfo);
 }
 
 void rtl92de_read_eeprom_info(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
index 7810fe87dca7..d334d2a5ea63 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
@@ -2695,7 +2695,7 @@ void rtl92d_phy_lc_calibrate(struct ieee80211_hw *hw)
 	RTPRINT(rtlpriv, FINIT, INIT_IQK,  "LCK:Finish!!!\n");
 }
 
-void rtl92d_phy_ap_calibrate(struct ieee80211_hw *hw, char delta)
+void rtl92d_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta)
 {
 	return;
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.h
index 48d5c6835b6a..8115bf4ac683 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.h
@@ -160,7 +160,7 @@ void rtl92d_phy_config_maccoexist_rfpage(struct ieee80211_hw *hw);
 bool rtl92d_phy_check_poweroff(struct ieee80211_hw *hw);
 void rtl92d_phy_lc_calibrate(struct ieee80211_hw *hw);
 void rtl92d_update_bbrf_configuration(struct ieee80211_hw *hw);
-void rtl92d_phy_ap_calibrate(struct ieee80211_hw *hw, char delta);
+void rtl92d_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta);
 void rtl92d_phy_iq_calibrate(struct ieee80211_hw *hw);
 void rtl92d_phy_reset_iqk_result(struct ieee80211_hw *hw);
 void rtl92d_release_cckandrw_pagea_ctl(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/rf.c
index 6a6ac540d5b5..2f479d397644 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/rf.c
@@ -601,7 +601,7 @@ bool rtl92d_phy_rf6052_config(struct ieee80211_hw *hw)
 		}
 		if (!rtstatus) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-				 "Radio[%d] Fail!!", rfpath);
+				 "Radio[%d] Fail!!\n", rfpath);
 			goto phy_rf_cfg_fail;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
index 1feaa629dd4f..e998e98d74cb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
@@ -48,7 +48,7 @@ static u8 _rtl92de_map_hwqueue_to_fwqueue(struct sk_buff *skb, u8 hw_queue)
 	return skb->priority;
 }
 
-static u8 _rtl92d_query_rxpwrpercentage(char antpower)
+static u8 _rtl92d_query_rxpwrpercentage(s8 antpower)
 {
 	if ((antpower <= -100) || (antpower >= 20))
 		return 0;
@@ -58,9 +58,9 @@ static u8 _rtl92d_query_rxpwrpercentage(char antpower)
 		return 100 + antpower;
 }
 
-static u8 _rtl92d_evm_db_to_percentage(char value)
+static u8 _rtl92d_evm_db_to_percentage(s8 value)
 {
-	char ret_val = value;
+	s8 ret_val = value;
 
 	if (ret_val >= 0)
 		ret_val = 0;
@@ -586,7 +586,7 @@ void rtl92de_tx_fill_desc(struct ieee80211_hw *hw,
 				 PCI_DMA_TODEVICE);
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, sizeof(struct tx_desc_92d));
@@ -744,7 +744,7 @@ void rtl92de_tx_fill_cmddesc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, TX_DESC_SIZE);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h
index fb5cf0634e8d..194d99f8bacf 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h
@@ -554,8 +554,8 @@ struct rx_fwinfo_92d {
 	u8 pwdb_all;
 	u8 cfosho[4];
 	u8 cfotail[4];
-	char rxevm[2];
-	char rxsnr[4];
+	s8 rxevm[2];
+	s8 rxsnr[4];
 	u8 pdsnr[2];
 	u8 csi_current[2];
 	u8 csi_target[2];
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c
index 459f3d0efa2f..e6b5786c7d4a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c
@@ -496,7 +496,7 @@ static void rtl92ee_dm_find_minimum_rssi(struct ieee80211_hw *hw)
 		rtl_dm_dig->min_undec_pwdb_for_dm =
 			rtlpriv->dm.entry_min_undec_sm_pwdb;
 		RT_TRACE(rtlpriv, COMP_BB_POWERSAVING, DBG_LOUD,
-			 "AP Ext Port or disconnet PWDB = 0x%x\n",
+			 "AP Ext Port or disconnect PWDB = 0x%x\n",
 			 rtl_dm_dig->min_undec_pwdb_for_dm);
 	}
 	RT_TRACE(rtlpriv, COMP_DIG, DBG_LOUD,
@@ -983,7 +983,7 @@ static bool _rtl92ee_dm_ra_state_check(struct ieee80211_hw *hw,
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_RATR, DBG_DMESG,
-			 "wrong rssi level setting %d !", *ratr_state);
+			 "wrong rssi level setting %d !\n", *ratr_state);
 		break;
 	}
 
@@ -1219,6 +1219,7 @@ void rtl92ee_dm_watchdog(struct ieee80211_hw *hw)
 	if (ppsc->p2p_ps_info.p2p_ps_mode)
 		fw_ps_awake = false;
 
+	spin_lock(&rtlpriv->locks.rf_ps_lock);
 	if ((ppsc->rfpwr_state == ERFON) &&
 	    ((!fw_current_inpsmode) && fw_ps_awake) &&
 	    (!ppsc->rfchange_inprogress)) {
@@ -1233,4 +1234,5 @@ void rtl92ee_dm_watchdog(struct ieee80211_hw *hw)
 		rtl92ee_dm_dynamic_atc_switch(hw);
 		rtl92ee_dm_dynamic_primary_cca_ckeck(hw);
 	}
+	spin_unlock(&rtlpriv->locks.rf_ps_lock);
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
index 9fd3f1b6e4a8..b07af8d15273 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
@@ -2098,73 +2098,24 @@ static void _rtl92ee_read_adapter_info(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	u16 i, usvalue;
-	u8 hwinfo[HWSET_MAX_SIZE];
-	u16 eeprom_id;
-
-	if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		rtl_efuse_shadow_map_update(hw);
-
-		memcpy(hwinfo, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
-		       HWSET_MAX_SIZE);
-	} else if (rtlefuse->epromtype == EEPROM_93C46) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!");
-		return;
-	} else {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "boot from neither eeprom nor efuse, check it !!");
+	int params[] = {RTL8192E_EEPROM_ID, EEPROM_VID, EEPROM_DID,
+			EEPROM_SVID, EEPROM_SMID, EEPROM_MAC_ADDR,
+			EEPROM_CHANNELPLAN, EEPROM_VERSION, EEPROM_CUSTOMER_ID,
+			COUNTRY_CODE_WORLD_WIDE_13};
+	u8 *hwinfo;
+
+	hwinfo = kzalloc(HWSET_MAX_SIZE, GFP_KERNEL);
+	if (!hwinfo)
 		return;
-	}
 
-	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, "MAP\n",
-		      hwinfo, HWSET_MAX_SIZE);
+	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
+		goto exit;
 
-	eeprom_id = *((u16 *)&hwinfo[0]);
-	if (eeprom_id != RTL8192E_EEPROM_ID) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
-			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
-		rtlefuse->autoload_failflag = true;
-	} else {
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
-		rtlefuse->autoload_failflag = false;
-	}
-
-	if (rtlefuse->autoload_failflag)
-		return;
-	/*VID DID SVID SDID*/
-	rtlefuse->eeprom_vid = *(u16 *)&hwinfo[EEPROM_VID];
-	rtlefuse->eeprom_did = *(u16 *)&hwinfo[EEPROM_DID];
-	rtlefuse->eeprom_svid = *(u16 *)&hwinfo[EEPROM_SVID];
-	rtlefuse->eeprom_smid = *(u16 *)&hwinfo[EEPROM_SMID];
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "EEPROMId = 0x%4x\n", eeprom_id);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM VID = 0x%4x\n", rtlefuse->eeprom_vid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM DID = 0x%4x\n", rtlefuse->eeprom_did);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SVID = 0x%4x\n", rtlefuse->eeprom_svid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SMID = 0x%4x\n", rtlefuse->eeprom_smid);
-	/*customer ID*/
-	rtlefuse->eeprom_oemid = *(u8 *)&hwinfo[EEPROM_CUSTOMER_ID];
 	if (rtlefuse->eeprom_oemid == 0xFF)
 		rtlefuse->eeprom_oemid = 0;
 
 	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
 		 "EEPROM Customer ID: 0x%2x\n", rtlefuse->eeprom_oemid);
-	/*EEPROM version*/
-	rtlefuse->eeprom_version = *(u8 *)&hwinfo[EEPROM_VERSION];
-	/*mac address*/
-	for (i = 0; i < 6; i += 2) {
-		usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR + i];
-		*((u16 *)(&rtlefuse->dev_addr[i])) = usvalue;
-	}
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG,
-		 "dev_addr: %pM\n", rtlefuse->dev_addr);
-	/*channel plan */
-	rtlefuse->eeprom_channelplan = *(u8 *)&hwinfo[EEPROM_CHANNELPLAN];
 	/* set channel plan from efuse */
 	rtlefuse->channel_plan = rtlefuse->eeprom_channelplan;
 	/*tx power*/
@@ -2206,6 +2157,8 @@ static void _rtl92ee_read_adapter_info(struct ieee80211_hw *hw)
 			break;
 		}
 	}
+exit:
+	kfree(hwinfo);
 }
 
 static void _rtl92ee_hal_customized_behavior(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
index 018340aedf09..beafc9a10ad8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
@@ -547,7 +547,7 @@ static void _rtl92ee_phy_store_txpower_by_rate_base(struct ieee80211_hw *hw)
 static void _phy_convert_txpower_dbm_to_relative_value(u32 *data, u8 start,
 						       u8 end, u8 base)
 {
-	char i = 0;
+	s8 i = 0;
 	u8 tmp = 0;
 	u32 temp_data = 0;
 
@@ -650,7 +650,7 @@ static bool _rtl92ee_phy_bb8192ee_config_parafile(struct ieee80211_hw *hw)
 
 	rtstatus = phy_config_bb_with_hdr_file(hw, BASEBAND_CONFIG_PHY_REG);
 	if (!rtstatus) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!\n");
 		return false;
 	}
 
@@ -662,7 +662,7 @@ static bool _rtl92ee_phy_bb8192ee_config_parafile(struct ieee80211_hw *hw)
 	}
 	_rtl92ee_phy_txpower_by_rate_configuration(hw);
 	if (!rtstatus) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!\n");
 		return false;
 	}
 	rtstatus = phy_config_bb_with_hdr_file(hw, BASEBAND_CONFIG_AGC_TAB);
@@ -1189,7 +1189,7 @@ static u8 _rtl92ee_get_txpower_by_rate(struct ieee80211_hw *hw,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_phy *rtlphy = &rtlpriv->phy;
 	u8 shift = 0, sec, tx_num;
-	char diff = 0;
+	s8 diff = 0;
 
 	sec = _rtl92ee_phy_get_ratesection_intxpower_byrate(rf, rate);
 	tx_num = RF_TX_NUM_NONIMPLEMENT;
@@ -1265,14 +1265,14 @@ static u8 _rtl92ee_get_txpower_index(struct ieee80211_hw *hw,
 			 "Illegal channel!!\n");
 	}
 
-	if (IS_CCK_RATE(rate))
+	if (IS_CCK_RATE((s8)rate))
 		tx_power = rtlefuse->txpwrlevel_cck[rfpath][index];
 	else if (DESC92C_RATE6M <= rate)
 		tx_power = rtlefuse->txpwrlevel_ht40_1s[rfpath][index];
 
 	/* OFDM-1T*/
 	if (DESC92C_RATE6M <= rate && rate <= DESC92C_RATE54M &&
-	    !IS_CCK_RATE(rate))
+	    !IS_CCK_RATE((s8)rate))
 		tx_power += rtlefuse->txpwr_legacyhtdiff[rfpath][TX_1S];
 
 	/* BW20-1S, BW20-2S */
@@ -1819,7 +1819,7 @@ u8 rtl92ee_phy_sw_chnl(struct ieee80211_hw *hw)
 	if (!(is_hal_stop(rtlhal)) && !(RT_CANNOT_IO(hw))) {
 		rtl92ee_phy_sw_chnl_callback(hw);
 		RT_TRACE(rtlpriv, COMP_CHAN, DBG_LOUD,
-			 "sw_chnl_inprogress false schdule workitem current channel %d\n",
+			 "sw_chnl_inprogress false schedule workitem current channel %d\n",
 			 rtlphy->current_channel);
 		rtlphy->sw_chnl_inprogress = false;
 	} else {
@@ -2414,19 +2414,10 @@ static void _rtl92ee_phy_reload_mac_registers(struct ieee80211_hw *hw,
 static void _rtl92ee_phy_path_adda_on(struct ieee80211_hw *hw, u32 *addareg,
 				      bool is_patha_on, bool is2t)
 {
-	u32 pathon;
 	u32 i;
 
-	pathon = is_patha_on ? 0x0fc01616 : 0x0fc01616;
-	if (!is2t) {
-		pathon = 0x0fc01616;
-		rtl_set_bbreg(hw, addareg[0], MASKDWORD, 0x0fc01616);
-	} else {
-		rtl_set_bbreg(hw, addareg[0], MASKDWORD, pathon);
-	}
-
-	for (i = 1; i < IQK_ADDA_REG_NUM; i++)
-		rtl_set_bbreg(hw, addareg[i], MASKDWORD, pathon);
+	for (i = 0; i < IQK_ADDA_REG_NUM; i++)
+		rtl_set_bbreg(hw, addareg[i], MASKDWORD, 0x0fc01616);
 }
 
 static void _rtl92ee_phy_mac_setting_calibration(struct ieee80211_hw *hw,
@@ -2978,7 +2969,7 @@ void rtl92ee_phy_lc_calibrate(struct ieee80211_hw *hw)
 	rtlphy->lck_inprogress = false;
 }
 
-void rtl92ee_phy_ap_calibrate(struct ieee80211_hw *hw, char delta)
+void rtl92ee_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta)
 {
 }
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.h
index c6e97c8df54c..49bd0e554c65 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.h
@@ -141,7 +141,7 @@ void rtl92ee_phy_set_bw_mode(struct ieee80211_hw *hw,
 void rtl92ee_phy_sw_chnl_callback(struct ieee80211_hw *hw);
 u8 rtl92ee_phy_sw_chnl(struct ieee80211_hw *hw);
 void rtl92ee_phy_iq_calibrate(struct ieee80211_hw *hw, bool b_recovery);
-void rtl92ee_phy_ap_calibrate(struct ieee80211_hw *hw, char delta);
+void rtl92ee_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta);
 void rtl92ee_phy_lc_calibrate(struct ieee80211_hw *hw);
 void rtl92ee_phy_set_rfpath_switch(struct ieee80211_hw *hw, bool bmain);
 bool rtl92ee_phy_config_rf_with_headerfile(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/rf.c
index c9bc33cd1090..73716c07d433 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/rf.c
@@ -142,7 +142,7 @@ static bool _rtl92ee_phy_rf6052_config_parafile(struct ieee80211_hw *hw)
 
 		if (!rtstatus) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-				 "Radio[%d] Fail!!", rfpath);
+				 "Radio[%d] Fail!!\n", rfpath);
 			return false;
 		}
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
index 35e6bf7e233d..2d48ccd02ac8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
@@ -56,7 +56,7 @@ static void _rtl92ee_query_rxphystatus(struct ieee80211_hw *hw,
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct phy_status_rpt *p_phystrpt = (struct phy_status_rpt *)p_drvinfo;
-	char rx_pwr_all = 0, rx_pwr[4];
+	s8 rx_pwr_all = 0, rx_pwr[4];
 	u8 rf_rx_num = 0, evm, pwdb_all;
 	u8 i, max_spatial_stream;
 	u32 rssi, total_rssi = 0;
@@ -703,7 +703,7 @@ void rtl92ee_tx_fill_desc(struct ieee80211_hw *hw,
 				 PCI_DMA_TODEVICE);
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 
@@ -867,7 +867,7 @@ void rtl92ee_tx_fill_cmddesc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, txdesc_len);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h
index a4c38345233e..8053d1b12ec4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h
@@ -650,8 +650,8 @@ struct rx_fwinfo {
 	u8 pwdb_all;
 	u8 cfosho[4];
 	u8 cfotail[4];
-	char rxevm[2];
-	char rxsnr[4];
+	s8 rxevm[2];
+	s8 rxsnr[4];
 	u8 pdsnr[2];
 	u8 csi_current[2];
 	u8 csi_target[2];
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
index 12b0978ba4fa..ddfa0aee5bf8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
@@ -1673,23 +1673,31 @@ static void _rtl92se_read_adapter_info(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_phy *rtlphy = &(rtlpriv->phy);
+	struct device *dev = &rtl_pcipriv(hw)->dev.pdev->dev;
 	u16 i, usvalue;
 	u16	eeprom_id;
 	u8 tempval;
 	u8 hwinfo[HWSET_MAX_SIZE_92S];
 	u8 rf_path, index;
 
-	if (rtlefuse->epromtype == EEPROM_93C46) {
+	switch (rtlefuse->epromtype) {
+	case EEPROM_BOOT_EFUSE:
+		rtl_efuse_shadow_map_update(hw);
+		break;
+
+	case EEPROM_93C46:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
 			 "RTL819X Not boot from eeprom, check it !!\n");
-	} else if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		rtl_efuse_shadow_map_update(hw);
+		return;
 
-		memcpy((void *)hwinfo, (void *)
-			&rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
-			HWSET_MAX_SIZE_92S);
+	default:
+		dev_warn(dev, "no efuse data\n");
+		return;
 	}
 
+	memcpy(hwinfo, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
+	       HWSET_MAX_SIZE_92S);
+
 	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, "MAP",
 		      hwinfo, HWSET_MAX_SIZE_92S);
 
@@ -1995,7 +2003,7 @@ static void _rtl92se_read_adapter_info(struct ieee80211_hw *hw)
 	rtlefuse->b1ss_support = rtlefuse->b1x1_recvcombine;
 	rtlefuse->eeprom_oemid = *&hwinfo[EEPROM_CUSTOMID];
 
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "EEPROM Customer ID: 0x%2x",
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "EEPROM Customer ID: 0x%2x\n",
 		 rtlefuse->eeprom_oemid);
 
 	/* set channel paln to world wide 13 */
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/rf.c
index 9475aa2a8fa0..34e88a3f6abe 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/rf.c
@@ -137,7 +137,7 @@ static void _rtl92s_set_antennadiff(struct ieee80211_hw *hw,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_phy *rtlphy = &(rtlpriv->phy);
-	char ant_pwr_diff = 0;
+	s8 ant_pwr_diff = 0;
 	u32	u4reg_val = 0;
 
 	if (rtlphy->rf_type == RF_2T2R) {
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
index 125b29bd2f93..d53bbf6bef81 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
@@ -360,7 +360,7 @@ void rtl92se_tx_fill_desc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	if (mac->opmode == NL80211_IFTYPE_STATION) {
@@ -529,7 +529,7 @@ void rtl92se_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	/* Clear all status	*/
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c
index 4c1c96c96a5a..42a6fba90ba9 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c
@@ -816,6 +816,7 @@ void rtl8723e_dm_watchdog(struct ieee80211_hw *hw)
 	if (ppsc->p2p_ps_info.p2p_ps_mode)
 		fw_ps_awake = false;
 
+	spin_lock(&rtlpriv->locks.rf_ps_lock);
 	if ((ppsc->rfpwr_state == ERFON) &&
 	    ((!fw_current_inpsmode) && fw_ps_awake) &&
 	    (!ppsc->rfchange_inprogress)) {
@@ -829,6 +830,7 @@ void rtl8723e_dm_watchdog(struct ieee80211_hw *hw)
 		rtl8723e_dm_bt_coexist(hw);
 		rtl8723e_dm_check_edca_turbo(hw);
 	}
+	spin_unlock(&rtlpriv->locks.rf_ps_lock);
 	if (rtlpriv->btcoexist.init_set)
 		rtl_write_byte(rtlpriv, 0x76e, 0xc);
 }
@@ -874,8 +876,8 @@ void rtl8723e_dm_bt_coexist(struct ieee80211_hw *hw)
 
 	tmp_byte = rtl_read_byte(rtlpriv, 0x40);
 	RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_LOUD,
-		 "[DM][BT], 0x40 is 0x%x", tmp_byte);
+		 "[DM][BT], 0x40 is 0x%x\n", tmp_byte);
 	RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_DMESG,
-		 "[DM][BT], bt_dm_coexist start");
+		 "[DM][BT], bt_dm_coexist start\n");
 	rtl8723e_dm_bt_coexist_8723(hw);
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c
index 44de695dc999..ec9bcf32f0ab 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c
@@ -185,7 +185,7 @@ static void rtl8723e_dm_bt_set_hw_pta_mode(struct ieee80211_hw *hw, bool b_mode)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 
 	if (BT_PTA_MODE_ON == b_mode) {
-		RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_TRACE, "PTA mode on, ");
+		RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_TRACE, "PTA mode on\n");
 		/*  Enable GPIO 0/1/2/3/8 pins for bt */
 		rtl_write_byte(rtlpriv, 0x40, 0x20);
 		rtlpriv->btcoexist.hw_coexist_all_off = false;
@@ -1401,7 +1401,7 @@ static void rtl8723e_dm_bt_inq_page_monitor(struct ieee80211_hw *hw)
 			(long)hal_coex_8723.bt_inq_page_start_time) / HZ)
 			>= 10) {
 			RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_DMESG,
-				"[BTCoex], BT Inquiry/page >= 10sec!!!");
+				"[BTCoex], BT Inquiry/page >= 10sec!!!\n");
 			hal_coex_8723.bt_inq_page_start_time = 0;
 			rtlpriv->btcoexist.cstate &=
 				~BT_COEX_STATE_BT_INQ_PAGE;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
index a4b7eac6856f..b88c7ee72dbf 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
@@ -1630,62 +1630,22 @@ static void _rtl8723e_read_adapter_info(struct ieee80211_hw *hw,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	u16 i, usvalue;
-	u8 hwinfo[HWSET_MAX_SIZE];
-	u16 eeprom_id;
+	int params[] = {RTL8190_EEPROM_ID, EEPROM_VID, EEPROM_DID,
+			EEPROM_SVID, EEPROM_SMID, EEPROM_MAC_ADDR,
+			EEPROM_CHANNELPLAN, EEPROM_VERSION, EEPROM_CUSTOMER_ID,
+			COUNTRY_CODE_WORLD_WIDE_13};
+	u8 *hwinfo;
 
 	if (b_pseudo_test) {
 		/* need add */
 		return;
 	}
-	if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		rtl_efuse_shadow_map_update(hw);
-
-		memcpy(hwinfo, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
-		       HWSET_MAX_SIZE);
-	} else if (rtlefuse->epromtype == EEPROM_93C46) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!");
-	}
-
-	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, "MAP\n",
-		      hwinfo, HWSET_MAX_SIZE);
-
-	eeprom_id = *((u16 *)&hwinfo[0]);
-	if (eeprom_id != RTL8190_EEPROM_ID) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
-			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
-		rtlefuse->autoload_failflag = true;
-	} else {
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
-		rtlefuse->autoload_failflag = false;
-	}
-
-	if (rtlefuse->autoload_failflag)
+	hwinfo = kzalloc(HWSET_MAX_SIZE, GFP_KERNEL);
+	if (!hwinfo)
 		return;
 
-	rtlefuse->eeprom_vid = *(u16 *)&hwinfo[EEPROM_VID];
-	rtlefuse->eeprom_did = *(u16 *)&hwinfo[EEPROM_DID];
-	rtlefuse->eeprom_svid = *(u16 *)&hwinfo[EEPROM_SVID];
-	rtlefuse->eeprom_smid = *(u16 *)&hwinfo[EEPROM_SMID];
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROMId = 0x%4x\n", eeprom_id);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM VID = 0x%4x\n", rtlefuse->eeprom_vid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM DID = 0x%4x\n", rtlefuse->eeprom_did);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SVID = 0x%4x\n", rtlefuse->eeprom_svid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SMID = 0x%4x\n", rtlefuse->eeprom_smid);
-
-	for (i = 0; i < 6; i += 2) {
-		usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR + i];
-		*((u16 *)(&rtlefuse->dev_addr[i])) = usvalue;
-	}
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG,
-		 "dev_addr: %pM\n", rtlefuse->dev_addr);
+	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
+		goto exit;
 
 	_rtl8723e_read_txpower_info_from_hwpg(hw, rtlefuse->autoload_failflag,
 					      hwinfo);
@@ -1693,144 +1653,138 @@ static void _rtl8723e_read_adapter_info(struct ieee80211_hw *hw,
 	rtl8723e_read_bt_coexist_info_from_hwpg(hw,
 			rtlefuse->autoload_failflag, hwinfo);
 
-	rtlefuse->eeprom_channelplan = hwinfo[EEPROM_CHANNELPLAN];
-	rtlefuse->eeprom_version = *(u16 *)&hwinfo[EEPROM_VERSION];
-	rtlefuse->txpwr_fromeprom = true;
-	rtlefuse->eeprom_oemid = hwinfo[EEPROM_CUSTOMER_ID];
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM Customer ID: 0x%2x\n", rtlefuse->eeprom_oemid);
-
-	/* set channel paln to world wide 13 */
-	rtlefuse->channel_plan = COUNTRY_CODE_WORLD_WIDE_13;
-
-	if (rtlhal->oem_id == RT_CID_DEFAULT) {
-		switch (rtlefuse->eeprom_oemid) {
-		case EEPROM_CID_DEFAULT:
-			if (rtlefuse->eeprom_did == 0x8176) {
-				if (CHK_SVID_SMID(0x10EC, 0x6151) ||
-				    CHK_SVID_SMID(0x10EC, 0x6152) ||
-				    CHK_SVID_SMID(0x10EC, 0x6154) ||
-				    CHK_SVID_SMID(0x10EC, 0x6155) ||
-				    CHK_SVID_SMID(0x10EC, 0x6177) ||
-				    CHK_SVID_SMID(0x10EC, 0x6178) ||
-				    CHK_SVID_SMID(0x10EC, 0x6179) ||
-				    CHK_SVID_SMID(0x10EC, 0x6180) ||
-				    CHK_SVID_SMID(0x10EC, 0x7151) ||
-				    CHK_SVID_SMID(0x10EC, 0x7152) ||
-				    CHK_SVID_SMID(0x10EC, 0x7154) ||
-				    CHK_SVID_SMID(0x10EC, 0x7155) ||
-				    CHK_SVID_SMID(0x10EC, 0x7177) ||
-				    CHK_SVID_SMID(0x10EC, 0x7178) ||
-				    CHK_SVID_SMID(0x10EC, 0x7179) ||
-				    CHK_SVID_SMID(0x10EC, 0x7180) ||
-				    CHK_SVID_SMID(0x10EC, 0x8151) ||
-				    CHK_SVID_SMID(0x10EC, 0x8152) ||
-				    CHK_SVID_SMID(0x10EC, 0x8154) ||
-				    CHK_SVID_SMID(0x10EC, 0x8155) ||
-				    CHK_SVID_SMID(0x10EC, 0x8181) ||
-				    CHK_SVID_SMID(0x10EC, 0x8182) ||
-				    CHK_SVID_SMID(0x10EC, 0x8184) ||
-				    CHK_SVID_SMID(0x10EC, 0x8185) ||
-				    CHK_SVID_SMID(0x10EC, 0x9151) ||
-				    CHK_SVID_SMID(0x10EC, 0x9152) ||
-				    CHK_SVID_SMID(0x10EC, 0x9154) ||
-				    CHK_SVID_SMID(0x10EC, 0x9155) ||
-				    CHK_SVID_SMID(0x10EC, 0x9181) ||
-				    CHK_SVID_SMID(0x10EC, 0x9182) ||
-				    CHK_SVID_SMID(0x10EC, 0x9184) ||
-				    CHK_SVID_SMID(0x10EC, 0x9185))
+	if (rtlhal->oem_id != RT_CID_DEFAULT)
+		return;
+
+	switch (rtlefuse->eeprom_oemid) {
+	case EEPROM_CID_DEFAULT:
+		switch (rtlefuse->eeprom_did) {
+		case 0x8176:
+			switch (rtlefuse->eeprom_svid) {
+			case 0x10EC:
+				switch (rtlefuse->eeprom_smid) {
+				case 0x6151 ... 0x6152:
+				case 0x6154 ... 0x6155:
+				case 0x6177 ... 0x6180:
+				case 0x7151 ... 0x7152:
+				case 0x7154 ... 0x7155:
+				case 0x7177 ... 0x7180:
+				case 0x8151 ... 0x8152:
+				case 0x8154 ... 0x8155:
+				case 0x8181 ... 0x8182:
+				case 0x8184 ... 0x8185:
+				case 0x9151 ... 0x9152:
+				case 0x9154 ... 0x9155:
+				case 0x9181 ... 0x9182:
+				case 0x9184 ... 0x9185:
 					rtlhal->oem_id = RT_CID_TOSHIBA;
-				else if (rtlefuse->eeprom_svid == 0x1025)
-					rtlhal->oem_id = RT_CID_819X_ACER;
-				else if (CHK_SVID_SMID(0x10EC, 0x6191) ||
-					 CHK_SVID_SMID(0x10EC, 0x6192) ||
-					 CHK_SVID_SMID(0x10EC, 0x6193) ||
-					 CHK_SVID_SMID(0x10EC, 0x7191) ||
-					 CHK_SVID_SMID(0x10EC, 0x7192) ||
-					 CHK_SVID_SMID(0x10EC, 0x7193) ||
-					 CHK_SVID_SMID(0x10EC, 0x8191) ||
-					 CHK_SVID_SMID(0x10EC, 0x8192) ||
-					 CHK_SVID_SMID(0x10EC, 0x8193) ||
-					 CHK_SVID_SMID(0x10EC, 0x9191) ||
-					 CHK_SVID_SMID(0x10EC, 0x9192) ||
-					 CHK_SVID_SMID(0x10EC, 0x9193))
+					break;
+				case 0x6191 ... 0x6193:
+				case 0x7191 ... 0x7193:
+				case 0x8191 ... 0x8193:
+				case 0x9191 ... 0x9193:
 					rtlhal->oem_id = RT_CID_819X_SAMSUNG;
-				else if (CHK_SVID_SMID(0x10EC, 0x8195) ||
-					 CHK_SVID_SMID(0x10EC, 0x9195) ||
-					 CHK_SVID_SMID(0x10EC, 0x7194) ||
-					 CHK_SVID_SMID(0x10EC, 0x8200) ||
-					 CHK_SVID_SMID(0x10EC, 0x8201) ||
-					 CHK_SVID_SMID(0x10EC, 0x8202) ||
-					 CHK_SVID_SMID(0x10EC, 0x9200))
-					rtlhal->oem_id = RT_CID_819X_LENOVO;
-				else if (CHK_SVID_SMID(0x10EC, 0x8197) ||
-					 CHK_SVID_SMID(0x10EC, 0x9196))
+					break;
+				case 0x8197:
+				case 0x9196:
 					rtlhal->oem_id = RT_CID_819X_CLEVO;
-				else if (CHK_SVID_SMID(0x1028, 0x8194) ||
-					 CHK_SVID_SMID(0x1028, 0x8198) ||
-					 CHK_SVID_SMID(0x1028, 0x9197) ||
-					 CHK_SVID_SMID(0x1028, 0x9198))
+					break;
+				case 0x8203:
+					rtlhal->oem_id = RT_CID_819X_PRONETS;
+					break;
+				case 0x8195:
+				case 0x9195:
+				case 0x7194:
+				case 0x8200 ... 0x8202:
+				case 0x9200:
+					rtlhal->oem_id = RT_CID_819X_LENOVO;
+					break;
+				}
+			case 0x1025:
+				rtlhal->oem_id = RT_CID_819X_ACER;
+				break;
+			case 0x1028:
+				switch (rtlefuse->eeprom_smid) {
+				case 0x8194:
+				case 0x8198:
+				case 0x9197 ... 0x9198:
 					rtlhal->oem_id = RT_CID_819X_DELL;
-				else if (CHK_SVID_SMID(0x103C, 0x1629))
+					break;
+				}
+				break;
+			case 0x103C:
+				switch (rtlefuse->eeprom_smid) {
+				case 0x1629:
 					rtlhal->oem_id = RT_CID_819X_HP;
-				else if (CHK_SVID_SMID(0x1A32, 0x2315))
+				}
+				break;
+			case 0x1A32:
+				switch (rtlefuse->eeprom_smid) {
+				case 0x2315:
 					rtlhal->oem_id = RT_CID_819X_QMI;
-				else if (CHK_SVID_SMID(0x10EC, 0x8203))
-					rtlhal->oem_id = RT_CID_819X_PRONETS;
-				else if (CHK_SVID_SMID(0x1043, 0x84B5))
-					rtlhal->oem_id =
-						 RT_CID_819X_EDIMAX_ASUS;
-				else
-					rtlhal->oem_id = RT_CID_DEFAULT;
-			} else if (rtlefuse->eeprom_did == 0x8178) {
-				if (CHK_SVID_SMID(0x10EC, 0x6181) ||
-				    CHK_SVID_SMID(0x10EC, 0x6182) ||
-				    CHK_SVID_SMID(0x10EC, 0x6184) ||
-				    CHK_SVID_SMID(0x10EC, 0x6185) ||
-				    CHK_SVID_SMID(0x10EC, 0x7181) ||
-				    CHK_SVID_SMID(0x10EC, 0x7182) ||
-				    CHK_SVID_SMID(0x10EC, 0x7184) ||
-				    CHK_SVID_SMID(0x10EC, 0x7185) ||
-				    CHK_SVID_SMID(0x10EC, 0x8181) ||
-				    CHK_SVID_SMID(0x10EC, 0x8182) ||
-				    CHK_SVID_SMID(0x10EC, 0x8184) ||
-				    CHK_SVID_SMID(0x10EC, 0x8185) ||
-				    CHK_SVID_SMID(0x10EC, 0x9181) ||
-				    CHK_SVID_SMID(0x10EC, 0x9182) ||
-				    CHK_SVID_SMID(0x10EC, 0x9184) ||
-				    CHK_SVID_SMID(0x10EC, 0x9185))
-					rtlhal->oem_id = RT_CID_TOSHIBA;
-				else if (rtlefuse->eeprom_svid == 0x1025)
-					rtlhal->oem_id = RT_CID_819X_ACER;
-				else if (CHK_SVID_SMID(0x10EC, 0x8186))
-					rtlhal->oem_id = RT_CID_819X_PRONETS;
-				else if (CHK_SVID_SMID(0x1043, 0x8486))
+					break;
+				}
+				break;
+			case 0x1043:
+				switch (rtlefuse->eeprom_smid) {
+				case 0x84B5:
 					rtlhal->oem_id =
-						     RT_CID_819X_EDIMAX_ASUS;
-				else
-					rtlhal->oem_id = RT_CID_DEFAULT;
-			} else {
-				rtlhal->oem_id = RT_CID_DEFAULT;
+						RT_CID_819X_EDIMAX_ASUS;
+				}
+				break;
 			}
 			break;
-		case EEPROM_CID_TOSHIBA:
-			rtlhal->oem_id = RT_CID_TOSHIBA;
-			break;
-		case EEPROM_CID_CCX:
-			rtlhal->oem_id = RT_CID_CCX;
-			break;
-		case EEPROM_CID_QMI:
-			rtlhal->oem_id = RT_CID_819X_QMI;
-			break;
-		case EEPROM_CID_WHQL:
+		case 0x8178:
+			switch (rtlefuse->eeprom_svid) {
+			case 0x10ec:
+				switch (rtlefuse->eeprom_smid) {
+				case 0x6181 ... 0x6182:
+				case 0x6184 ... 0x6185:
+				case 0x7181 ... 0x7182:
+				case 0x7184 ... 0x7185:
+				case 0x8181 ... 0x8182:
+				case 0x8184 ... 0x8185:
+				case 0x9181 ... 0x9182:
+				case 0x9184 ... 0x9185:
+					rtlhal->oem_id = RT_CID_TOSHIBA;
+					break;
+				case 0x8186:
+					rtlhal->oem_id =
+						RT_CID_819X_PRONETS;
+					break;
+				}
 				break;
-		default:
-			rtlhal->oem_id = RT_CID_DEFAULT;
+			case 0x1025:
+				rtlhal->oem_id = RT_CID_819X_ACER;
+				break;
+			case 0x1043:
+				switch (rtlefuse->eeprom_smid) {
+				case 0x8486:
+					rtlhal->oem_id =
+					     RT_CID_819X_EDIMAX_ASUS;
+				}
+				break;
+			}
 			break;
-
 		}
+		break;
+	case EEPROM_CID_TOSHIBA:
+		rtlhal->oem_id = RT_CID_TOSHIBA;
+		break;
+	case EEPROM_CID_CCX:
+		rtlhal->oem_id = RT_CID_CCX;
+		break;
+	case EEPROM_CID_QMI:
+		rtlhal->oem_id = RT_CID_819X_QMI;
+		break;
+	case EEPROM_CID_WHQL:
+		break;
+	default:
+		rtlhal->oem_id = RT_CID_DEFAULT;
+		break;
 	}
+exit:
+	kfree(hwinfo);
 }
 
 static void _rtl8723e_hal_customized_behavior(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c
index d367097f490b..601b78efedfb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c
@@ -213,7 +213,7 @@ static bool _rtl8723e_phy_bb8192c_config_parafile(struct ieee80211_hw *hw)
 	rtstatus = _rtl8723e_phy_config_bb_with_headerfile(hw,
 						BASEBAND_CONFIG_PHY_REG);
 	if (rtstatus != true) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!\n");
 		return false;
 	}
 
@@ -227,7 +227,7 @@ static bool _rtl8723e_phy_bb8192c_config_parafile(struct ieee80211_hw *hw)
 					BASEBAND_CONFIG_PHY_REG);
 	}
 	if (rtstatus != true) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!\n");
 		return false;
 	}
 	rtstatus =
@@ -893,7 +893,7 @@ u8 rtl8723e_phy_sw_chnl(struct ieee80211_hw *hw)
 	if (!(is_hal_stop(rtlhal)) && !(RT_CANNOT_IO(hw))) {
 		rtl8723e_phy_sw_chnl_callback(hw);
 		RT_TRACE(rtlpriv, COMP_CHAN, DBG_LOUD,
-			 "sw_chnl_inprogress false schdule workitem\n");
+			 "sw_chnl_inprogress false schedule workitem\n");
 		rtlphy->sw_chnl_inprogress = false;
 	} else {
 		RT_TRACE(rtlpriv, COMP_CHAN, DBG_LOUD,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/rf.c
index 9ebc8281ff99..422771778e03 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/rf.c
@@ -504,7 +504,7 @@ static bool _rtl8723e_phy_rf6052_config_parafile(struct ieee80211_hw *hw)
 
 		if (rtstatus != true) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-				 "Radio[%d] Fail!!", rfpath);
+				 "Radio[%d] Fail!!\n", rfpath);
 			return false;
 		}
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
index 7b4a9b63583b..e93125ebed81 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
@@ -389,7 +389,7 @@ void rtl8723e_tx_fill_desc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	if (mac->opmode == NL80211_IFTYPE_STATION) {
@@ -557,7 +557,7 @@ void rtl8723e_tx_fill_cmddesc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, TX_DESC_SIZE);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h
index 32970bf18856..43d4c791d563 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h
@@ -522,8 +522,8 @@ struct rx_fwinfo_8723e {
 	u8 pwdb_all;
 	u8 cfosho[4];
 	u8 cfotail[4];
-	char rxevm[2];
-	char rxsnr[4];
+	s8 rxevm[2];
+	s8 rxsnr[4];
 	u8 pdsnr[2];
 	u8 csi_current[2];
 	u8 csi_target[2];
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
index 3a81cdba8ca3..131c0d1d633e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
@@ -758,11 +758,11 @@ static void rtl8723be_dm_txpower_tracking_callback_thermalmeter(
 	u8 ofdm_min_index = 6;
 	u8 index_for_channel = 0;
 
-	char delta_swing_table_idx_tup_a[TXSCALE_TABLE_SIZE] = {
+	s8 delta_swing_table_idx_tup_a[TXSCALE_TABLE_SIZE] = {
 		0, 0, 1, 2, 2, 2, 3, 3, 3, 4,  5,
 		5, 6, 6, 7, 7, 8, 8, 9, 9, 9, 10,
 		10, 11, 11, 12, 12, 13, 14, 15};
-	char delta_swing_table_idx_tdown_a[TXSCALE_TABLE_SIZE] = {
+	s8 delta_swing_table_idx_tdown_a[TXSCALE_TABLE_SIZE] = {
 		0, 0, 1, 2, 2, 2, 3, 3, 3, 4,  5,
 		5, 6, 6, 6, 6, 7, 7, 7, 8, 8,  9,
 		9, 10, 10, 11, 12, 13, 14, 15};
@@ -1279,6 +1279,7 @@ void rtl8723be_dm_watchdog(struct ieee80211_hw *hw)
 	if (ppsc->p2p_ps_info.p2p_ps_mode)
 		fw_ps_awake = false;
 
+	spin_lock(&rtlpriv->locks.rf_ps_lock);
 	if ((ppsc->rfpwr_state == ERFON) &&
 		((!fw_current_inpsmode) && fw_ps_awake) &&
 		(!ppsc->rfchange_inprogress)) {
@@ -1294,5 +1295,6 @@ void rtl8723be_dm_watchdog(struct ieee80211_hw *hw)
 		rtl8723be_dm_check_txpower_tracking(hw);
 		rtl8723be_dm_dynamic_txpower(hw);
 	}
+	spin_unlock(&rtlpriv->locks.rf_ps_lock);
 	rtlpriv->dm.dbginfo.num_qry_beacon_pkt = 0;
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
index 5a3df9198ddf..82e4476cab23 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
@@ -1474,7 +1474,7 @@ static enum version_8723e _rtl8723be_read_chip_version(struct ieee80211_hw *hw)
 
 	value32 = rtl_read_dword(rtlpriv, REG_SYS_CFG1);
 	if ((value32 & (CHIP_8723B)) != CHIP_8723B)
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "unkown chip version\n");
+		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "unknown chip version\n");
 	else
 		version = (enum version_8723e)CHIP_8723B;
 
@@ -2026,9 +2026,12 @@ static void _rtl8723be_read_adapter_info(struct ieee80211_hw *hw,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	u16 i, usvalue;
-	u8 hwinfo[HWSET_MAX_SIZE];
-	u16 eeprom_id;
+	int params[] = {RTL8723BE_EEPROM_ID, EEPROM_VID, EEPROM_DID,
+			EEPROM_SVID, EEPROM_SMID, EEPROM_MAC_ADDR,
+			EEPROM_CHANNELPLAN, EEPROM_VERSION, EEPROM_CUSTOMER_ID,
+			COUNTRY_CODE_WORLD_WIDE_13};
+	u8 *hwinfo;
+	int i;
 	bool is_toshiba_smid1 = false;
 	bool is_toshiba_smid2 = false;
 	bool is_samsung_smid = false;
@@ -2055,52 +2058,13 @@ static void _rtl8723be_read_adapter_info(struct ieee80211_hw *hw,
 		/* needs to be added */
 		return;
 	}
-	if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		rtl_efuse_shadow_map_update(hw);
 
-		memcpy(hwinfo, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
-		       HWSET_MAX_SIZE);
-	} else if (rtlefuse->epromtype == EEPROM_93C46) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!");
-	}
-	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, ("MAP\n"),
-		      hwinfo, HWSET_MAX_SIZE);
-
-	eeprom_id = *((u16 *)&hwinfo[0]);
-	if (eeprom_id != RTL8723BE_EEPROM_ID) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
-			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
-		rtlefuse->autoload_failflag = true;
-	} else {
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
-		rtlefuse->autoload_failflag = false;
-	}
-
-	if (rtlefuse->autoload_failflag)
+	hwinfo = kzalloc(HWSET_MAX_SIZE, GFP_KERNEL);
+	if (!hwinfo)
 		return;
 
-	rtlefuse->eeprom_vid = *(u16 *)&hwinfo[EEPROM_VID];
-	rtlefuse->eeprom_did = *(u16 *)&hwinfo[EEPROM_DID];
-	rtlefuse->eeprom_svid = *(u16 *)&hwinfo[EEPROM_SVID];
-	rtlefuse->eeprom_smid = *(u16 *)&hwinfo[EEPROM_SMID];
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROMId = 0x%4x\n", eeprom_id);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM VID = 0x%4x\n", rtlefuse->eeprom_vid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM DID = 0x%4x\n", rtlefuse->eeprom_did);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SVID = 0x%4x\n", rtlefuse->eeprom_svid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SMID = 0x%4x\n", rtlefuse->eeprom_smid);
-
-	for (i = 0; i < 6; i += 2) {
-		usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR + i];
-		*((u16 *)(&rtlefuse->dev_addr[i])) = usvalue;
-	}
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "dev_addr: %pM\n",
-		 rtlefuse->dev_addr);
+	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
+		goto exit;
 
 	/*parse xtal*/
 	rtlefuse->crystalcap = hwinfo[EEPROM_XTAL_8723BE];
@@ -2114,14 +2078,6 @@ static void _rtl8723be_read_adapter_info(struct ieee80211_hw *hw,
 						 rtlefuse->autoload_failflag,
 						 hwinfo);
 
-	rtlefuse->eeprom_channelplan = hwinfo[EEPROM_CHANNELPLAN];
-	rtlefuse->eeprom_version = *(u16 *)&hwinfo[EEPROM_VERSION];
-	rtlefuse->txpwr_fromeprom = true;
-	rtlefuse->eeprom_oemid = hwinfo[EEPROM_CUSTOMER_ID];
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM Customer ID: 0x%2x\n", rtlefuse->eeprom_oemid);
-
 	/* set channel plan from efuse */
 	rtlefuse->channel_plan = rtlefuse->eeprom_channelplan;
 
@@ -2232,6 +2188,8 @@ static void _rtl8723be_read_adapter_info(struct ieee80211_hw *hw,
 			break;
 		}
 	}
+exit:
+	kfree(hwinfo);
 }
 
 static void _rtl8723be_hal_customized_behavior(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
index 445f681d08c0..285818df149b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
@@ -379,7 +379,7 @@ static void _rtl8723be_phy_store_txpower_by_rate_base(struct ieee80211_hw *hw)
 static void _phy_convert_txpower_dbm_to_relative_value(u32 *data, u8 start,
 						u8 end, u8 base_val)
 {
-	char i = 0;
+	s8 i = 0;
 	u8 temp_value = 0;
 	u32 temp_data = 0;
 
@@ -467,7 +467,7 @@ static bool _rtl8723be_phy_bb8723b_config_parafile(struct ieee80211_hw *hw)
 	rtstatus = _rtl8723be_phy_config_bb_with_headerfile(hw,
 						BASEBAND_CONFIG_PHY_REG);
 	if (!rtstatus) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!\n");
 		return false;
 	}
 	_rtl8723be_phy_init_tx_power_by_rate(hw);
@@ -478,7 +478,7 @@ static bool _rtl8723be_phy_bb8723b_config_parafile(struct ieee80211_hw *hw)
 	}
 	phy_txpower_by_rate_config(hw);
 	if (!rtstatus) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!\n");
 		return false;
 	}
 	rtstatus = _rtl8723be_phy_config_bb_with_headerfile(hw,
@@ -953,7 +953,7 @@ static u8 _rtl8723be_get_txpower_by_rate(struct ieee80211_hw *hw,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_phy *rtlphy = &rtlpriv->phy;
 	u8 shift = 0, rate_section, tx_num;
-	char tx_pwr_diff = 0;
+	s8 tx_pwr_diff = 0;
 
 	rate_section = _rtl8723be_phy_get_ratesection_intxpower_byrate(rfpath,
 								       rate);
@@ -1019,7 +1019,7 @@ static u8 _rtl8723be_get_txpower_index(struct ieee80211_hw *hw, u8 path,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	u8 index = (channel - 1);
-	u8 txpower;
+	u8 txpower = 0;
 	u8 power_diff_byrate = 0;
 
 	if (channel > 14 || channel < 1) {
@@ -1395,7 +1395,7 @@ u8 rtl8723be_phy_sw_chnl(struct ieee80211_hw *hw)
 	if (!(is_hal_stop(rtlhal)) && !(RT_CANNOT_IO(hw))) {
 		rtl8723be_phy_sw_chnl_callback(hw);
 		RT_TRACE(rtlpriv, COMP_CHAN, DBG_LOUD,
-			 "sw_chnl_inprogress false schdule workitem current channel %d\n",
+			 "sw_chnl_inprogress false schedule workitem current channel %d\n",
 			 rtlphy->current_channel);
 		rtlphy->sw_chnl_inprogress = false;
 	} else {
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/rf.c
index 97f5a0377e7a..78f4f18d87b5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/rf.c
@@ -502,7 +502,7 @@ static bool _rtl8723be_phy_rf6052_config_parafile(struct ieee80211_hw *hw)
 
 		if (!rtstatus) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-				 "Radio[%d] Fail!!", rfpath);
+				 "Radio[%d] Fail!!\n", rfpath);
 			return false;
 		}
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c
index 60345975f9fd..2175aecbb8f4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c
@@ -56,7 +56,7 @@ static void _rtl8723be_query_rxphystatus(struct ieee80211_hw *hw,
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct phy_status_rpt *p_phystrpt = (struct phy_status_rpt *)p_drvinfo;
-	char rx_pwr_all = 0, rx_pwr[4];
+	s8 rx_pwr_all = 0, rx_pwr[4];
 	u8 rf_rx_num = 0, evm, pwdb_all, pwdb_all_bt = 0;
 	u8 i, max_spatial_stream;
 	u32 rssi, total_rssi = 0;
@@ -464,7 +464,7 @@ void rtl8723be_tx_fill_desc(struct ieee80211_hw *hw,
 	mapping = pci_map_single(rtlpci->pdev, skb->data, skb->len,
 				 PCI_DMA_TODEVICE);
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
-		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE, "DMA mapping error");
+		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE, "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, sizeof(struct tx_desc_8723be));
@@ -616,7 +616,7 @@ void rtl8723be_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, TX_DESC_SIZE);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h
index 40c36607b8b9..8a9fe41ac15b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h
@@ -385,9 +385,9 @@ struct phy_status_rpt {
 	u8 cck_rpt_b_ofdm_cfosho_b;
 	u8 rsvd_1;/* ch_corr_msb; */
 	u8 noise_power_db_msb;
-	char path_cfotail[2];
+	s8 path_cfotail[2];
 	u8 pcts_mask[2];
-	char stream_rxevm[2];
+	s8 stream_rxevm[2];
 	u8 path_rxsnr[2];
 	u8 noise_power_db_lsb;
 	u8 rsvd_2[3];
@@ -422,8 +422,8 @@ struct rx_fwinfo_8723be {
 	u8 pwdb_all;
 	u8 cfosho[4];
 	u8 cfotail[4];
-	char rxevm[2];
-	char rxsnr[2];
+	s8 rxevm[2];
+	s8 rxsnr[2];
 	u8 pcts_msk_rpt[2];
 	u8 pdsnr[2];
 	u8 csi_current[2];
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
index 17a681788611..bdfd444955d2 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
@@ -843,7 +843,7 @@ static void rtl8821ae_dm_dig(struct ieee80211_hw *hw)
 				dm_digtable->rssi_val_min + offset;
 
 		RT_TRACE(rtlpriv, COMP_DIG, DBG_LOUD,
-			 "dm_digtable->rssi_val_min=0x%x,dm_digtable->rx_gain_max = 0x%x",
+			 "dm_digtable->rssi_val_min=0x%x,dm_digtable->rx_gain_max = 0x%x\n",
 			 dm_digtable->rssi_val_min,
 			 dm_digtable->rx_gain_max);
 		if (rtlpriv->dm.one_entry_only) {
@@ -1355,7 +1355,7 @@ void rtl8812ae_dm_txpwr_track_set_pwr(struct ieee80211_hw *hw,
 	u32 final_swing_idx[2];
 	u8 pwr_tracking_limit = 26; /*+1.0dB*/
 	u8 tx_rate = 0xFF;
-	char final_ofdm_swing_index = 0;
+	s8 final_ofdm_swing_index = 0;
 
 	if (rtldm->tx_rate != 0xFF)
 		tx_rate =
@@ -2045,7 +2045,7 @@ void rtl8821ae_dm_txpwr_track_set_pwr(struct ieee80211_hw *hw,
 	u32 final_swing_idx[1];
 	u8 pwr_tracking_limit = 26; /*+1.0dB*/
 	u8 tx_rate = 0xFF;
-	char final_ofdm_swing_index = 0;
+	s8 final_ofdm_swing_index = 0;
 
 	if (rtldm->tx_rate != 0xFF)
 		tx_rate = rtl8821ae_hw_rate_to_mrate(hw, rtldm->tx_rate);
@@ -2682,9 +2682,9 @@ static void rtl8821ae_dm_check_edca_turbo(struct ieee80211_hw *hw)
 	bool b_edca_turbo_on = false;
 
 	RT_TRACE(rtlpriv, COMP_TURBO, DBG_LOUD,
-		 "rtl8821ae_dm_check_edca_turbo=====>");
+		 "rtl8821ae_dm_check_edca_turbo=====>\n");
 	RT_TRACE(rtlpriv, COMP_TURBO, DBG_LOUD,
-		 "Orginial BE PARAM: 0x%x\n",
+		 "Original BE PARAM: 0x%x\n",
 		 rtl_read_dword(rtlpriv, DM_REG_EDCA_BE_11N));
 
 	if (rtlpriv->dm.dbginfo.num_non_be_pkt > 0x100)
@@ -2949,6 +2949,7 @@ void rtl8821ae_dm_watchdog(struct ieee80211_hw *hw)
 	if (ppsc->p2p_ps_info.p2p_ps_mode)
 		fw_ps_awake = false;
 
+	spin_lock(&rtlpriv->locks.rf_ps_lock);
 	if ((ppsc->rfpwr_state == ERFON) &&
 	    ((!fw_current_inpsmode) && fw_ps_awake) &&
 	    (!ppsc->rfchange_inprogress)) {
@@ -2967,6 +2968,7 @@ void rtl8821ae_dm_watchdog(struct ieee80211_hw *hw)
 			rtl8821ae_dm_check_txpower_tracking_thermalmeter(hw);
 		rtl8821ae_dm_iq_calibrate(hw);
 	}
+	spin_unlock(&rtlpriv->locks.rf_ps_lock);
 
 	rtlpriv->dm.dbginfo.num_qry_beacon_pkt = 0;
 	RT_TRACE(rtlpriv, COMP_DIG, DBG_DMESG, "\n");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index 71e4dd9965bb..0cddf1ad0fff 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -3101,79 +3101,22 @@ static void _rtl8821ae_read_adapter_info(struct ieee80211_hw *hw, bool b_pseudo_
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
 	struct rtl_pci_priv *pcipriv = rtl_pcipriv(hw);
-	u16 i, usvalue;
-	u8 hwinfo[HWSET_MAX_SIZE];
-	u16 eeprom_id;
+	int params[] = {RTL_EEPROM_ID, EEPROM_VID, EEPROM_DID,
+			EEPROM_SVID, EEPROM_SMID, EEPROM_MAC_ADDR,
+			EEPROM_CHANNELPLAN, EEPROM_VERSION, EEPROM_CUSTOMER_ID,
+			COUNTRY_CODE_WORLD_WIDE_13};
+	u8 *hwinfo;
 
 	if (b_pseudo_test) {
 		;/* need add */
 	}
 
-	if (rtlefuse->epromtype == EEPROM_BOOT_EFUSE) {
-		rtl_efuse_shadow_map_update(hw);
-		memcpy(hwinfo, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
-		       HWSET_MAX_SIZE);
-	} else if (rtlefuse->epromtype == EEPROM_93C46) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL819X Not boot from eeprom, check it !!");
-	}
-
-	RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_DMESG, "MAP\n",
-		      hwinfo, HWSET_MAX_SIZE);
-
-	eeprom_id = *((u16 *)&hwinfo[0]);
-	if (eeprom_id != RTL_EEPROM_ID) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
-			 "EEPROM ID(%#x) is invalid!!\n", eeprom_id);
-		rtlefuse->autoload_failflag = true;
-	} else {
-		RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Autoload OK\n");
-		rtlefuse->autoload_failflag = false;
-	}
-
-	if (rtlefuse->autoload_failflag) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "RTL8812AE autoload_failflag, check it !!");
+	hwinfo = kzalloc(HWSET_MAX_SIZE, GFP_KERNEL);
+	if (!hwinfo)
 		return;
-	}
-
-	rtlefuse->eeprom_version = *(u8 *)&hwinfo[EEPROM_VERSION];
-	if (rtlefuse->eeprom_version == 0xff)
-			rtlefuse->eeprom_version = 0;
 
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM version: 0x%2x\n", rtlefuse->eeprom_version);
-
-	rtlefuse->eeprom_vid = *(u16 *)&hwinfo[EEPROM_VID];
-	rtlefuse->eeprom_did = *(u16 *)&hwinfo[EEPROM_DID];
-	rtlefuse->eeprom_svid = *(u16 *)&hwinfo[EEPROM_SVID];
-	rtlefuse->eeprom_smid = *(u16 *)&hwinfo[EEPROM_SMID];
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROMId = 0x%4x\n", eeprom_id);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM VID = 0x%4x\n", rtlefuse->eeprom_vid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM DID = 0x%4x\n", rtlefuse->eeprom_did);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SVID = 0x%4x\n", rtlefuse->eeprom_svid);
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM SMID = 0x%4x\n", rtlefuse->eeprom_smid);
-
-	/*customer ID*/
-	rtlefuse->eeprom_oemid = *(u8 *)&hwinfo[EEPROM_CUSTOMER_ID];
-	if (rtlefuse->eeprom_oemid == 0xFF)
-		rtlefuse->eeprom_oemid = 0;
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "EEPROM Customer ID: 0x%2x\n", rtlefuse->eeprom_oemid);
-
-	for (i = 0; i < 6; i += 2) {
-		usvalue = *(u16 *)&hwinfo[EEPROM_MAC_ADDR + i];
-		*((u16 *)(&rtlefuse->dev_addr[i])) = usvalue;
-	}
-
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG,
-		 "dev_addr: %pM\n", rtlefuse->dev_addr);
+	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
+		goto exit;
 
 	_rtl8821ae_read_txpower_info_from_hwpg(hw, rtlefuse->autoload_failflag,
 					       hwinfo);
@@ -3273,6 +3216,8 @@ static void _rtl8821ae_read_adapter_info(struct ieee80211_hw *hw, bool b_pseudo_
 			break;
 		}
 	}
+exit:
+	kfree(hwinfo);
 }
 
 /*static void _rtl8821ae_hal_customized_behavior(struct ieee80211_hw *hw)
@@ -3829,7 +3774,7 @@ void rtl8821ae_update_hal_rate_tbl(struct ieee80211_hw *hw,
 		rtl8821ae_update_hal_rate_mask(hw, sta, rssi_level);
 	else
 		/*RT_TRACE(rtlpriv, COMP_RATR,DBG_LOUD,
-			   "rtl8821ae_update_hal_rate_tbl() Error! 8821ae FW RA Only");*/
+			   "rtl8821ae_update_hal_rate_tbl() Error! 8821ae FW RA Only\n");*/
 		rtl8821ae_update_hal_rate_table(hw, sta);
 }
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
index 0c3b9ce86e2e..a71bfe38e7e1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
@@ -366,12 +366,12 @@ u32 phy_get_tx_swing_8812A(struct ieee80211_hw *hw, u8	band,
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
 	struct rtl_dm *rtldm = rtl_dm(rtlpriv);
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
-	char reg_swing_2g = -1;/* 0xff; */
-	char reg_swing_5g = -1;/* 0xff; */
-	char swing_2g = -1 * reg_swing_2g;
-	char swing_5g = -1 * reg_swing_5g;
+	s8 reg_swing_2g = -1;/* 0xff; */
+	s8 reg_swing_5g = -1;/* 0xff; */
+	s8 swing_2g = -1 * reg_swing_2g;
+	s8 swing_5g = -1 * reg_swing_5g;
 	u32  out = 0x200;
-	const char auto_temp = -1;
+	const s8 auto_temp = -1;
 
 	RT_TRACE(rtlpriv, COMP_SCAN, DBG_LOUD,
 		 "===> PHY_GetTxBBSwing_8812A, bbSwing_2G: %d, bbSwing_5G: %d,autoload_failflag=%d.\n",
@@ -524,7 +524,7 @@ void rtl8821ae_phy_switch_wirelessband(struct ieee80211_hw *hw, u8 band)
 	struct rtl_dm *rtldm = rtl_dm(rtlpriv);
 	u8 current_band = rtlhal->current_bandtype;
 	u32 txpath, rxpath;
-	char bb_diff_between_band;
+	s8 bb_diff_between_band;
 
 	txpath = rtl8821ae_phy_query_bb_reg(hw, RTXPATH, 0xf0);
 	rxpath = rtl8821ae_phy_query_bb_reg(hw, RCCK_RX, 0x0f000000);
@@ -581,7 +581,7 @@ void rtl8821ae_phy_switch_wirelessband(struct ieee80211_hw *hw, u8 band)
 		count = 0;
 		reg_41a = rtl_read_word(rtlpriv, REG_TXPKT_EMPTY);
 		RT_TRACE(rtlpriv, COMP_SCAN, DBG_LOUD,
-			 "Reg41A value %d", reg_41a);
+			 "Reg41A value %d\n", reg_41a);
 		reg_41a &= 0x30;
 		while ((reg_41a != 0x30) && (count < 50)) {
 			udelay(50);
@@ -591,7 +591,7 @@ void rtl8821ae_phy_switch_wirelessband(struct ieee80211_hw *hw, u8 band)
 			reg_41a &= 0x30;
 			count++;
 			RT_TRACE(rtlpriv, COMP_SCAN, DBG_LOUD,
-				 "Reg41A value %d", reg_41a);
+				 "Reg41A value %d\n", reg_41a);
 		}
 		if (count != 0)
 			RT_TRACE(rtlpriv, COMP_MLME, DBG_LOUD,
@@ -986,7 +986,7 @@ static void _rtl8812ae_phy_cross_reference_ht_and_vht_txpower_limit(struct ieee8
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_phy *rtlphy = &rtlpriv->phy;
 	u8 regulation, bw, channel, rate_section;
-	char temp_pwrlmt = 0;
+	s8 temp_pwrlmt = 0;
 
 	for (regulation = 0; regulation < MAX_REGULATION_NUM; ++regulation) {
 		for (bw = 0; bw < MAX_5G_BANDWITH_NUM; ++bw) {
@@ -1013,7 +1013,7 @@ static void _rtl8812ae_phy_cross_reference_ht_and_vht_txpower_limit(struct ieee8
 									rtlphy->txpwr_limit_5g[regulation][bw][3][channel][RF90_PATH_A];
 							}
 
-							RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, "use other value %d", temp_pwrlmt);
+							RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, "use other value %d\n", temp_pwrlmt);
 						}
 					}
 				}
@@ -1155,7 +1155,7 @@ static void _rtl8812ae_phy_convert_txpower_limit_to_power_index(struct ieee80211
 	u8 regulation, bw, channel, rate_section;
 	u8 base_index2_4G = 0;
 	u8 base_index5G = 0;
-	char temp_value = 0, temp_pwrlmt = 0;
+	s8 temp_value = 0, temp_pwrlmt = 0;
 	u8 rf_path = 0;
 
 	RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
@@ -1467,11 +1467,11 @@ static bool _rtl8812ae_eq_n_byte(u8 *str1, u8 *str2, u32 num)
 	return true;
 }
 
-static char _rtl8812ae_phy_get_chnl_idx_of_txpwr_lmt(struct ieee80211_hw *hw,
+static s8 _rtl8812ae_phy_get_chnl_idx_of_txpwr_lmt(struct ieee80211_hw *hw,
 					      u8 band, u8 channel)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	char channel_index = -1;
+	s8 channel_index = -1;
 	u8  i = 0;
 
 	if (band == BAND_ON_2_4G)
@@ -1482,12 +1482,12 @@ static char _rtl8812ae_phy_get_chnl_idx_of_txpwr_lmt(struct ieee80211_hw *hw,
 				channel_index = i;
 		}
 	} else
-		RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD, "Invalid Band %d in %s",
+		RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD, "Invalid Band %d in %s\n",
 			 band,  __func__);
 
 	if (channel_index == -1)
 		RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD,
-			 "Invalid Channel %d of Band %d in %s", channel,
+			 "Invalid Channel %d of Band %d in %s\n", channel,
 			 band, __func__);
 
 	return channel_index;
@@ -1502,7 +1502,7 @@ static void _rtl8812ae_phy_set_txpower_limit(struct ieee80211_hw *hw, u8 *pregul
 	struct rtl_phy *rtlphy = &rtlpriv->phy;
 	u8 regulation = 0, bandwidth = 0, rate_section = 0, channel;
 	u8 channel_index;
-	char power_limit = 0, prev_power_limit, ret;
+	s8 power_limit = 0, prev_power_limit, ret;
 
 	if (!_rtl8812ae_get_integer_from_string((char *)pchannel, &channel) ||
 	    !_rtl8812ae_get_integer_from_string((char *)ppower_limit,
@@ -1665,7 +1665,7 @@ static bool _rtl8821ae_phy_bb8821a_config_parafile(struct ieee80211_hw *hw)
 	rtstatus = _rtl8821ae_phy_config_bb_with_headerfile(hw,
 						       BASEBAND_CONFIG_PHY_REG);
 	if (rtstatus != true) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Write BB Reg Fail!!\n");
 		return false;
 	}
 	_rtl8821ae_phy_init_tx_power_by_rate(hw);
@@ -1674,7 +1674,7 @@ static bool _rtl8821ae_phy_bb8821a_config_parafile(struct ieee80211_hw *hw)
 						    BASEBAND_CONFIG_PHY_REG);
 	}
 	if (rtstatus != true) {
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!");
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "BB_PG Reg Fail!!\n");
 		return false;
 	}
 
@@ -2254,9 +2254,9 @@ static bool _rtl8821ae_phy_get_chnl_index(u8 channel, u8 *chnl_index)
 	return in_24g;
 }
 
-static char _rtl8821ae_phy_get_ratesection_intxpower_byrate(u8 path, u8 rate)
+static s8 _rtl8821ae_phy_get_ratesection_intxpower_byrate(u8 path, u8 rate)
 {
-	char rate_section = 0;
+	s8 rate_section = 0;
 	switch (rate) {
 	case DESC_RATE1M:
 	case DESC_RATE2M:
@@ -2338,9 +2338,9 @@ static char _rtl8821ae_phy_get_ratesection_intxpower_byrate(u8 path, u8 rate)
 	return rate_section;
 }
 
-static char _rtl8812ae_phy_get_world_wide_limit(char  *limit_table)
+static s8 _rtl8812ae_phy_get_world_wide_limit(s8  *limit_table)
 {
-	char min = limit_table[0];
+	s8 min = limit_table[0];
 	u8 i = 0;
 
 	for (i = 0; i < MAX_REGULATION_NUM; ++i) {
@@ -2350,7 +2350,7 @@ static char _rtl8812ae_phy_get_world_wide_limit(char  *limit_table)
 	return min;
 }
 
-static char _rtl8812ae_phy_get_txpower_limit(struct ieee80211_hw *hw,
+static s8 _rtl8812ae_phy_get_txpower_limit(struct ieee80211_hw *hw,
 					     u8 band,
 					     enum ht_channel_width bandwidth,
 					     enum radio_path rf_path,
@@ -2362,7 +2362,7 @@ static char _rtl8812ae_phy_get_txpower_limit(struct ieee80211_hw *hw,
 	short band_temp = -1, regulation = -1, bandwidth_temp = -1,
 		 rate_section = -1, channel_temp = -1;
 	u16 bd, regu, bdwidth, sec, chnl;
-	char power_limit = MAX_POWER_INDEX;
+	s8 power_limit = MAX_POWER_INDEX;
 
 	if (rtlefuse->eeprom_regulatory == 2)
 		return MAX_POWER_INDEX;
@@ -2489,7 +2489,7 @@ static char _rtl8812ae_phy_get_txpower_limit(struct ieee80211_hw *hw,
 	chnl = channel_temp;
 
 	if (band == BAND_ON_2_4G) {
-		char limits[10] = {0};
+		s8 limits[10] = {0};
 		u8 i;
 
 		for (i = 0; i < 4; ++i)
@@ -2501,7 +2501,7 @@ static char _rtl8812ae_phy_get_txpower_limit(struct ieee80211_hw *hw,
 			rtlphy->txpwr_limit_2_4g[regu][bdwidth]
 					[sec][chnl][rf_path];
 	} else if (band == BAND_ON_5G) {
-		char limits[10] = {0};
+		s8 limits[10] = {0};
 		u8 i;
 
 		for (i = 0; i < MAX_REGULATION_NUM; ++i)
@@ -2519,14 +2519,14 @@ static char _rtl8812ae_phy_get_txpower_limit(struct ieee80211_hw *hw,
 	return power_limit;
 }
 
-static char _rtl8821ae_phy_get_txpower_by_rate(struct ieee80211_hw *hw,
+static s8 _rtl8821ae_phy_get_txpower_by_rate(struct ieee80211_hw *hw,
 					u8 band, u8 path, u8 rate)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_phy *rtlphy = &rtlpriv->phy;
 	u8 shift = 0, rate_section, tx_num;
-	char tx_pwr_diff = 0;
-	char limit = 0;
+	s8 tx_pwr_diff = 0;
+	s8 limit = 0;
 
 	rate_section = _rtl8821ae_phy_get_ratesection_intxpower_byrate(path, rate);
 	tx_num = RF_TX_NUM_NONIMPLEMENT;
@@ -2639,7 +2639,7 @@ static u8 _rtl8821ae_get_txpower_index(struct ieee80211_hw *hw, u8 path,
 	u8 index = (channel - 1);
 	u8 txpower = 0;
 	bool in_24g = false;
-	char powerdiff_byrate = 0;
+	s8 powerdiff_byrate = 0;
 
 	if (((rtlhal->current_bandtype == BAND_ON_2_4G) &&
 	    (channel > 14 || channel < 1)) ||
@@ -4637,7 +4637,7 @@ void rtl8821ae_phy_lc_calibrate(struct ieee80211_hw *hw)
 {
 }
 
-void rtl8821ae_phy_ap_calibrate(struct ieee80211_hw *hw, char delta)
+void rtl8821ae_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta)
 {
 }
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.h
index c411f0a95cc4..1285e1adfe9d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.h
@@ -236,7 +236,7 @@ void rtl8821ae_phy_iq_calibrate(struct ieee80211_hw *hw,
 				bool b_recovery);
 void rtl8812ae_phy_iq_calibrate(struct ieee80211_hw *hw,
 				bool b_recovery);
-void rtl8821ae_phy_ap_calibrate(struct ieee80211_hw *hw, char delta);
+void rtl8821ae_phy_ap_calibrate(struct ieee80211_hw *hw, s8 delta);
 void rtl8821ae_phy_lc_calibrate(struct ieee80211_hw *hw);
 void rtl8821ae_phy_set_rfpath_switch(struct ieee80211_hw *hw, bool bmain);
 bool rtl8812ae_phy_config_rf_with_headerfile(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/rf.c
index 2922538160e5..c6ab957023e6 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/rf.c
@@ -454,7 +454,7 @@ static bool _rtl8821ae_phy_rf6052_config_parafile(struct ieee80211_hw *hw)
 
 		if (!rtstatus) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-				 "Radio[%d] Fail!!", rfpath);
+				 "Radio[%d] Fail!!\n", rfpath);
 			return false;
 		}
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c
index 41efaa148d13..27727186ba5f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c
@@ -48,7 +48,7 @@ static u8 _rtl8821ae_map_hwqueue_to_fwqueue(struct sk_buff *skb, u8 hw_queue)
 	return skb->priority;
 }
 
-static u16 odm_cfo(char value)
+static u16 odm_cfo(s8 value)
 {
 	int ret_val;
 
@@ -64,9 +64,9 @@ static u16 odm_cfo(char value)
 	return ret_val;
 }
 
-static u8 _rtl8821ae_evm_dbm_jaguar(char value)
+static u8 _rtl8821ae_evm_dbm_jaguar(s8 value)
 {
-	char ret_val = value;
+	s8 ret_val = value;
 
 	/* -33dB~0dB to 33dB ~ 0dB*/
 	if (ret_val == -128)
@@ -88,7 +88,7 @@ static void query_rxphystatus(struct ieee80211_hw *hw,
 	struct phy_status_rpt *p_phystrpt = (struct phy_status_rpt *)p_drvinfo;
 	struct rtl_dm *rtldm = rtl_dm(rtl_priv(hw));
 	struct rtl_phy *rtlphy = &rtlpriv->phy;
-	char rx_pwr_all = 0, rx_pwr[4];
+	s8 rx_pwr_all = 0, rx_pwr[4];
 	u8 rf_rx_num = 0, evm, evmdbm, pwdb_all;
 	u8 i, max_spatial_stream;
 	u32 rssi, total_rssi = 0;
@@ -170,7 +170,7 @@ static void query_rxphystatus(struct ieee80211_hw *hw,
 					pwdb_all = 100;
 			}
 		} else { /* 8821 */
-			char pout = -6;
+			s8 pout = -6;
 
 			switch (lan_idx) {
 			case 5:
@@ -275,7 +275,7 @@ static void query_rxphystatus(struct ieee80211_hw *hw,
 		if (bpacket_match_bssid) {
 			for (i = RF90_PATH_A; i <= RF90_PATH_B; i++)
 				rtl_priv(hw)->dm.cfo_tail[i] =
-					(char)p_phystrpt->cfotail[i];
+					(s8)p_phystrpt->cfotail[i];
 
 			rtl_priv(hw)->dm.packet_count++;
 		}
@@ -716,7 +716,7 @@ void rtl8821ae_tx_fill_desc(struct ieee80211_hw *hw,
 				 PCI_DMA_TODEVICE);
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, sizeof(struct tx_desc_8821ae));
@@ -857,7 +857,7 @@ void rtl8821ae_tx_fill_cmddesc(struct ieee80211_hw *hw,
 
 	if (pci_dma_mapping_error(rtlpci->pdev, mapping)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_TRACE,
-			 "DMA mapping error");
+			 "DMA mapping error\n");
 		return;
 	}
 	CLEAR_PCI_TX_DESC_CONTENT(pdesc, TX_DESC_SIZE);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h
index ad565bebf1d5..b6f3c564b8d1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h
@@ -390,11 +390,11 @@ struct phy_status_rpt {
 	u8 cfosho[4];	/* DW 1 byte 1 DW 2 byte 0 */
 
 	/* DWORD 2 */
-	char cfotail[4];	/* DW 2 byte 1 DW 3 byte 0 */
+	s8 cfotail[4];	/* DW 2 byte 1 DW 3 byte 0 */
 
 	/* DWORD 3 */
-	char rxevm[2];	/* DW 3 byte 1 DW 3 byte 2 */
-	char rxsnr[2];	/* DW 3 byte 3 DW 4 byte 0 */
+	s8 rxevm[2];	/* DW 3 byte 1 DW 3 byte 2 */
+	s8 rxsnr[2];	/* DW 3 byte 3 DW 4 byte 0 */
 
 	/* DWORD 4 */
 	u8 pcts_msk_rpt[2];
@@ -418,8 +418,8 @@ struct rx_fwinfo_8821ae {
 	u8 pwdb_all;
 	u8 cfosho[4];
 	u8 cfotail[4];
-	char rxevm[2];
-	char rxsnr[4];
+	s8 rxevm[2];
+	s8 rxsnr[4];
 	u8 pdsnr[2];
 	u8 csi_current[2];
 	u8 csi_target[2];
diff --git a/drivers/net/wireless/realtek/rtlwifi/stats.c b/drivers/net/wireless/realtek/rtlwifi/stats.c
index d8b30690b00d..61700fa05570 100644
--- a/drivers/net/wireless/realtek/rtlwifi/stats.c
+++ b/drivers/net/wireless/realtek/rtlwifi/stats.c
@@ -26,7 +26,7 @@
 #include "stats.h"
 #include <linux/export.h>
 
-u8 rtl_query_rxpwrpercentage(char antpower)
+u8 rtl_query_rxpwrpercentage(s8 antpower)
 {
 	if ((antpower <= -100) || (antpower >= 20))
 		return 0;
@@ -37,9 +37,9 @@ u8 rtl_query_rxpwrpercentage(char antpower)
 }
 EXPORT_SYMBOL(rtl_query_rxpwrpercentage);
 
-u8 rtl_evm_db_to_percentage(char value)
+u8 rtl_evm_db_to_percentage(s8 value)
 {
-	char ret_val = clamp(-value, 0, 33) * 3;
+	s8 ret_val = clamp(-value, 0, 33) * 3;
 
 	if (ret_val == 99)
 		ret_val = 100;
diff --git a/drivers/net/wireless/realtek/rtlwifi/stats.h b/drivers/net/wireless/realtek/rtlwifi/stats.h
index 2b57dffef572..bd0108f93182 100644
--- a/drivers/net/wireless/realtek/rtlwifi/stats.h
+++ b/drivers/net/wireless/realtek/rtlwifi/stats.h
@@ -33,8 +33,8 @@
 /* Rx smooth factor */
 #define	RX_SMOOTH_FACTOR			20
 
-u8 rtl_query_rxpwrpercentage(char antpower);
-u8 rtl_evm_db_to_percentage(char value);
+u8 rtl_query_rxpwrpercentage(s8 antpower);
+u8 rtl_evm_db_to_percentage(s8 value);
 long rtl_signal_scale_mapping(struct ieee80211_hw *hw, long currsig);
 void rtl_process_phyinfo(struct ieee80211_hw *hw, u8 *buffer,
 			 struct rtl_stats *pstatus);
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 4e0ab4d42aa6..c5086c2229aa 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -1089,7 +1089,7 @@ struct dynamic_primary_cca {
 };
 
 struct rtl_regulatory {
-	char alpha2[2];
+	s8 alpha2[2];
 	u16 country_code;
 	u16 max_power_level;
 	u32 tp_scale;
@@ -1256,16 +1256,16 @@ struct rtl_phy {
 	u8 cur_bw20_txpwridx;
 	u8 cur_bw40_txpwridx;
 
-	char txpwr_limit_2_4g[MAX_REGULATION_NUM]
-			     [MAX_2_4G_BANDWITH_NUM]
-			     [MAX_RATE_SECTION_NUM]
-			     [CHANNEL_MAX_NUMBER_2G]
-			     [MAX_RF_PATH_NUM];
-	char txpwr_limit_5g[MAX_REGULATION_NUM]
-			   [MAX_5G_BANDWITH_NUM]
+	s8 txpwr_limit_2_4g[MAX_REGULATION_NUM]
+			   [MAX_2_4G_BANDWITH_NUM]
 			   [MAX_RATE_SECTION_NUM]
-			   [CHANNEL_MAX_NUMBER_5G]
+			   [CHANNEL_MAX_NUMBER_2G]
 			   [MAX_RF_PATH_NUM];
+	s8 txpwr_limit_5g[MAX_REGULATION_NUM]
+			 [MAX_5G_BANDWITH_NUM]
+			 [MAX_RATE_SECTION_NUM]
+			 [CHANNEL_MAX_NUMBER_5G]
+			 [MAX_RF_PATH_NUM];
 
 	u32 rfreg_chnlval[2];
 	bool apk_done;
@@ -1639,7 +1639,7 @@ struct fast_ant_training {
 };
 
 struct dm_phy_dbg_info {
-	char rx_snrdb[4];
+	s8 rx_snrdb[4];
 	u64 num_qry_phy_status;
 	u64 num_qry_phy_status_cck;
 	u64 num_qry_phy_status_ofdm;
@@ -1688,16 +1688,16 @@ struct rtl_dm {
 	u8 txpower_track_control;
 	bool interrupt_migration;
 	bool disable_tx_int;
-	char ofdm_index[MAX_RF_PATH];
+	s8 ofdm_index[MAX_RF_PATH];
 	u8 default_ofdm_index;
 	u8 default_cck_index;
-	char cck_index;
-	char delta_power_index[MAX_RF_PATH];
-	char delta_power_index_last[MAX_RF_PATH];
-	char power_index_offset[MAX_RF_PATH];
-	char absolute_ofdm_swing_idx[MAX_RF_PATH];
-	char remnant_ofdm_swing_idx[MAX_RF_PATH];
-	char remnant_cck_idx;
+	s8 cck_index;
+	s8 delta_power_index[MAX_RF_PATH];
+	s8 delta_power_index_last[MAX_RF_PATH];
+	s8 power_index_offset[MAX_RF_PATH];
+	s8 absolute_ofdm_swing_idx[MAX_RF_PATH];
+	s8 remnant_ofdm_swing_idx[MAX_RF_PATH];
+	s8 remnant_cck_idx;
 	bool modify_txagc_flag_path_a;
 	bool modify_txagc_flag_path_b;
 
@@ -1726,8 +1726,8 @@ struct rtl_dm {
 	u8	swing_idx_cck_base;
 	bool	swing_flag_cck;
 
-	char	swing_diff_2g;
-	char	swing_diff_5g;
+	s8	swing_diff_2g;
+	s8	swing_diff_5g;
 
 	u8 delta_swing_table_idx_24gccka_p[DEL_SW_IDX_SZ];
 	u8 delta_swing_table_idx_24gccka_n[DEL_SW_IDX_SZ];
@@ -1838,17 +1838,17 @@ struct rtl_efuse {
 	 *
 	 * Sizes of these arrays are decided by the larger ones.
 	 */
-	char txpwr_cckdiff[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
-	char txpwr_ht20diff[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
-	char txpwr_ht40diff[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
-	char txpwr_legacyhtdiff[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
+	s8 txpwr_cckdiff[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
+	s8 txpwr_ht20diff[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
+	s8 txpwr_ht40diff[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
+	s8 txpwr_legacyhtdiff[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
 
 	u8 txpwr_5g_bw40base[MAX_RF_PATH][CHANNEL_MAX_NUMBER];
 	u8 txpwr_5g_bw80base[MAX_RF_PATH][CHANNEL_MAX_NUMBER_5G_80M];
-	char txpwr_5g_ofdmdiff[MAX_RF_PATH][MAX_TX_COUNT];
-	char txpwr_5g_bw20diff[MAX_RF_PATH][MAX_TX_COUNT];
-	char txpwr_5g_bw40diff[MAX_RF_PATH][MAX_TX_COUNT];
-	char txpwr_5g_bw80diff[MAX_RF_PATH][MAX_TX_COUNT];
+	s8 txpwr_5g_ofdmdiff[MAX_RF_PATH][MAX_TX_COUNT];
+	s8 txpwr_5g_bw20diff[MAX_RF_PATH][MAX_TX_COUNT];
+	s8 txpwr_5g_bw40diff[MAX_RF_PATH][MAX_TX_COUNT];
+	s8 txpwr_5g_bw80diff[MAX_RF_PATH][MAX_TX_COUNT];
 
 	u8 txpwr_safetyflag;			/* Band edge enable flag */
 	u16 eeprom_txpowerdiff;
@@ -2006,7 +2006,7 @@ struct rtl_stats {
 	bool is_ht;
 	bool packet_toself;
 	bool packet_beacon;	/*for rssi */
-	char cck_adc_pwdb[4];	/*for rx path selection */
+	s8 cck_adc_pwdb[4];	/*for rx path selection */
 
 	bool is_vht;
 	bool is_short_gi;
@@ -2413,9 +2413,9 @@ struct dig_t {
 	u8 presta_cstate;
 	u8 curmultista_cstate;
 	u8 stop_dig;
-	char back_val;
-	char back_range_max;
-	char back_range_min;
+	s8 back_val;
+	s8 back_range_max;
+	s8 back_range_min;
 	u8 rx_gain_max;
 	u8 rx_gain_min;
 	u8 min_undec_pwdb_for_dm;
@@ -2441,8 +2441,8 @@ struct dig_t {
 	u8 cur_cs_ratiostate;
 	u8 pre_cs_ratiostate;
 	u8 backoff_enable_flag;
-	char backoffval_range_max;
-	char backoffval_range_min;
+	s8 backoffval_range_max;
+	s8 backoffval_range_min;
 	u8 dig_min_0;
 	u8 dig_min_1;
 	u8 bt30_cur_igi;
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 569918c485b4..603c90470225 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2134,6 +2134,7 @@ static void rndis_get_scan_results(struct work_struct *work)
 	struct rndis_wlan_private *priv =
 		container_of(work, struct rndis_wlan_private, scan_work.work);
 	struct usbnet *usbdev = priv->usbdev;
+	struct cfg80211_scan_info info = {};
 	int ret;
 
 	netdev_dbg(usbdev->net, "get_scan_results\n");
@@ -2143,7 +2144,8 @@ static void rndis_get_scan_results(struct work_struct *work)
 
 	ret = rndis_check_bssid_list(usbdev, NULL, NULL);
 
-	cfg80211_scan_done(priv->scan_request, ret < 0);
+	info.aborted = ret < 0;
+	cfg80211_scan_done(priv->scan_request, &info);
 
 	priv->scan_request = NULL;
 }
@@ -3574,7 +3576,11 @@ static int rndis_wlan_stop(struct usbnet *usbdev)
 	flush_workqueue(priv->workqueue);
 
 	if (priv->scan_request) {
-		cfg80211_scan_done(priv->scan_request, true);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		cfg80211_scan_done(priv->scan_request, &info);
 		priv->scan_request = NULL;
 	}
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index 40658b62d077..35c14cc3f0d2 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
@@ -398,7 +398,7 @@ static int rsi_mgmt_pkt_to_core(struct rsi_common *common,
 			return -ENOLINK;
 
 		msg_len -= pad_bytes;
-		if ((msg_len <= 0) || (!msg)) {
+		if (msg_len <= 0) {
 			rsi_dbg(MGMT_RX_ZONE,
 				"%s: Invalid rx msg of len = %d\n",
 				__func__, msg_len);
diff --git a/drivers/net/wireless/st/cw1200/scan.c b/drivers/net/wireless/st/cw1200/scan.c
index 983788156bb0..0a0ff7e31f5b 100644
--- a/drivers/net/wireless/st/cw1200/scan.c
+++ b/drivers/net/wireless/st/cw1200/scan.c
@@ -167,6 +167,10 @@ void cw1200_scan_work(struct work_struct *work)
 	}
 
 	if (!priv->scan.req || (priv->scan.curr == priv->scan.end)) {
+		struct cfg80211_scan_info info = {
+			.aborted = priv->scan.status ? 1 : 0,
+		};
+
 		if (priv->scan.output_power != priv->output_power)
 			wsm_set_output_power(priv, priv->output_power * 10);
 		if (priv->join_status == CW1200_JOIN_STATUS_STA &&
@@ -188,7 +192,7 @@ void cw1200_scan_work(struct work_struct *work)
 		cw1200_scan_restart_delayed(priv);
 		wsm_unlock_tx(priv);
 		mutex_unlock(&priv->conf_mutex);
-		ieee80211_scan_completed(priv->hw, priv->scan.status ? 1 : 0);
+		ieee80211_scan_completed(priv->hw, &info);
 		up(&priv->scan.lock);
 		return;
 	} else {
diff --git a/drivers/net/wireless/ti/wl1251/event.c b/drivers/net/wireless/ti/wl1251/event.c
index c98630394a1a..d0593bc1f1a9 100644
--- a/drivers/net/wireless/ti/wl1251/event.c
+++ b/drivers/net/wireless/ti/wl1251/event.c
@@ -36,7 +36,11 @@ static int wl1251_event_scan_complete(struct wl1251 *wl,
 		     mbox->scheduled_scan_channels);
 
 	if (wl->scanning) {
-		ieee80211_scan_completed(wl->hw, false);
+		struct cfg80211_scan_info info = {
+			.aborted = false,
+		};
+
+		ieee80211_scan_completed(wl->hw, &info);
 		wl1251_debug(DEBUG_MAC80211, "mac80211 hw scan completed");
 		wl->scanning = false;
 		if (wl->hw->conf.flags & IEEE80211_CONF_IDLE)
diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 56384a4e2a35..bbf7604889b7 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -448,7 +448,11 @@ static void wl1251_op_stop(struct ieee80211_hw *hw)
 	WARN_ON(wl->state != WL1251_STATE_ON);
 
 	if (wl->scanning) {
-		ieee80211_scan_completed(wl->hw, true);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		ieee80211_scan_completed(wl->hw, &info);
 		wl->scanning = false;
 	}
 
diff --git a/drivers/net/wireless/ti/wl18xx/event.c b/drivers/net/wireless/ti/wl18xx/event.c
index ef811848d141..2c5df43b8ed9 100644
--- a/drivers/net/wireless/ti/wl18xx/event.c
+++ b/drivers/net/wireless/ti/wl18xx/event.c
@@ -112,12 +112,18 @@ static int wlcore_smart_config_decode_event(struct wl1271 *wl,
 	return 0;
 }
 
-static void wlcore_event_time_sync(struct wl1271 *wl, u16 tsf_msb, u16 tsf_lsb)
+static void wlcore_event_time_sync(struct wl1271 *wl,
+				   u16 tsf_high_msb, u16 tsf_high_lsb,
+				   u16 tsf_low_msb, u16 tsf_low_lsb)
 {
-	u32 clock;
-	/* convert the MSB+LSB to a u32 TSF value */
-	clock = (tsf_msb << 16) | tsf_lsb;
-	wl1271_info("TIME_SYNC_EVENT_ID: clock %u", clock);
+	u32 clock_low;
+	u32 clock_high;
+
+	clock_high = (tsf_high_msb << 16) | tsf_high_lsb;
+	clock_low = (tsf_low_msb << 16) | tsf_low_lsb;
+
+	wl1271_info("TIME_SYNC_EVENT_ID: clock_high %u, clock low %u",
+		    clock_high, clock_low);
 }
 
 int wl18xx_process_mailbox_events(struct wl1271 *wl)
@@ -138,8 +144,10 @@ int wl18xx_process_mailbox_events(struct wl1271 *wl)
 
 	if (vector & TIME_SYNC_EVENT_ID)
 		wlcore_event_time_sync(wl,
-				mbox->time_sync_tsf_msb,
-				mbox->time_sync_tsf_lsb);
+			mbox->time_sync_tsf_high_msb,
+			mbox->time_sync_tsf_high_lsb,
+			mbox->time_sync_tsf_low_msb,
+			mbox->time_sync_tsf_low_lsb);
 
 	if (vector & RADAR_DETECTED_EVENT_ID) {
 		wl1271_info("radar event: channel %d type %s",
@@ -187,11 +195,11 @@ int wl18xx_process_mailbox_events(struct wl1271 *wl)
 	 */
 	if (vector & MAX_TX_FAILURE_EVENT_ID)
 		wlcore_event_max_tx_failure(wl,
-				le32_to_cpu(mbox->tx_retry_exceeded_bitmap));
+				le16_to_cpu(mbox->tx_retry_exceeded_bitmap));
 
 	if (vector & INACTIVE_STA_EVENT_ID)
 		wlcore_event_inactive_sta(wl,
-				le32_to_cpu(mbox->inactive_sta_bitmap));
+				le16_to_cpu(mbox->inactive_sta_bitmap));
 
 	if (vector & REMAIN_ON_CHANNEL_COMPLETE_EVENT_ID)
 		wlcore_event_roc_complete(wl);
diff --git a/drivers/net/wireless/ti/wl18xx/event.h b/drivers/net/wireless/ti/wl18xx/event.h
index 070de1274694..ce8ea9c04052 100644
--- a/drivers/net/wireless/ti/wl18xx/event.h
+++ b/drivers/net/wireless/ti/wl18xx/event.h
@@ -74,10 +74,16 @@ struct wl18xx_event_mailbox {
 	__le16 bss_loss_bitmap;
 
 	/* bitmap of stations (by HLID) which exceeded max tx retries */
-	__le32 tx_retry_exceeded_bitmap;
+	__le16 tx_retry_exceeded_bitmap;
+
+	/* time sync high msb*/
+	__le16 time_sync_tsf_high_msb;
 
 	/* bitmap of inactive stations (by HLID) */
-	__le32 inactive_sta_bitmap;
+	__le16 inactive_sta_bitmap;
+
+	/* time sync high lsb*/
+	__le16 time_sync_tsf_high_lsb;
 
 	/* rx BA win size indicated by RX_BA_WIN_SIZE_CHANGE_EVENT_ID */
 	u8 rx_ba_role_id;
@@ -98,14 +104,15 @@ struct wl18xx_event_mailbox {
 	u8 sc_sync_channel;
 	u8 sc_sync_band;
 
-	/* time sync msb*/
-	u16 time_sync_tsf_msb;
+	/* time sync low msb*/
+	__le16 time_sync_tsf_low_msb;
+
 	/* radar detect */
 	u8 radar_channel;
 	u8 radar_type;
 
-	/* time sync lsb*/
-	u16 time_sync_tsf_lsb;
+	/* time sync low lsb*/
+	__le16 time_sync_tsf_low_lsb;
 
 } __packed;
 
diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c
index ae47c79cb9b6..00a04dfc03d1 100644
--- a/drivers/net/wireless/ti/wl18xx/main.c
+++ b/drivers/net/wireless/ti/wl18xx/main.c
@@ -1214,6 +1214,10 @@ static void wl18xx_convert_fw_status(struct wl1271 *wl, void *raw_fw_status,
 			int_fw_status->counters.tx_voice_released_blks;
 	fw_status->counters.tx_last_rate =
 			int_fw_status->counters.tx_last_rate;
+	fw_status->counters.tx_last_rate_mbps =
+			int_fw_status->counters.tx_last_rate_mbps;
+	fw_status->counters.hlid =
+			int_fw_status->counters.hlid;
 
 	fw_status->log_start_addr = le32_to_cpu(int_fw_status->log_start_addr);
 
@@ -1821,9 +1825,12 @@ static const struct ieee80211_iface_limit wl18xx_iface_limits[] = {
 	},
 	{
 		.max = 1,
-		.types = BIT(NL80211_IFTYPE_AP) |
-			 BIT(NL80211_IFTYPE_P2P_GO) |
-			 BIT(NL80211_IFTYPE_P2P_CLIENT),
+		.types =   BIT(NL80211_IFTYPE_AP)
+			 | BIT(NL80211_IFTYPE_P2P_GO)
+			 | BIT(NL80211_IFTYPE_P2P_CLIENT)
+#ifdef CONFIG_MAC80211_MESH
+			 | BIT(NL80211_IFTYPE_MESH_POINT)
+#endif
 	},
 	{
 		.max = 1,
@@ -1836,6 +1843,12 @@ static const struct ieee80211_iface_limit wl18xx_iface_ap_limits[] = {
 		.max = 2,
 		.types = BIT(NL80211_IFTYPE_AP),
 	},
+#ifdef CONFIG_MAC80211_MESH
+	{
+		.max = 1,
+		.types = BIT(NL80211_IFTYPE_MESH_POINT),
+	},
+#endif
 	{
 		.max = 1,
 		.types = BIT(NL80211_IFTYPE_P2P_DEVICE),
diff --git a/drivers/net/wireless/ti/wl18xx/tx.c b/drivers/net/wireless/ti/wl18xx/tx.c
index ebaf66ef3f84..876aef10f95a 100644
--- a/drivers/net/wireless/ti/wl18xx/tx.c
+++ b/drivers/net/wireless/ti/wl18xx/tx.c
@@ -30,9 +30,9 @@
 
 static
 void wl18xx_get_last_tx_rate(struct wl1271 *wl, struct ieee80211_vif *vif,
-			     u8 band, struct ieee80211_tx_rate *rate)
+			     u8 band, struct ieee80211_tx_rate *rate, u8 hlid)
 {
-	u8 fw_rate = wl->fw_status->counters.tx_last_rate;
+	u8 fw_rate = wl->links[hlid].fw_rate_idx;
 
 	if (fw_rate > CONF_HW_RATE_INDEX_MAX) {
 		wl1271_error("last Tx rate invalid: %d", fw_rate);
@@ -79,6 +79,7 @@ static void wl18xx_tx_complete_packet(struct wl1271 *wl, u8 tx_stat_byte)
 	struct sk_buff *skb;
 	int id = tx_stat_byte & WL18XX_TX_STATUS_DESC_ID_MASK;
 	bool tx_success;
+	struct wl1271_tx_hw_descr *tx_desc;
 
 	/* check for id legality */
 	if (unlikely(id >= wl->num_tx_desc || wl->tx_frames[id] == NULL)) {
@@ -91,6 +92,7 @@ static void wl18xx_tx_complete_packet(struct wl1271 *wl, u8 tx_stat_byte)
 
 	skb = wl->tx_frames[id];
 	info = IEEE80211_SKB_CB(skb);
+	tx_desc = (struct wl1271_tx_hw_descr *)skb->data;
 
 	if (wl12xx_is_dummy_packet(wl, skb)) {
 		wl1271_free_tx_id(wl, id);
@@ -105,7 +107,9 @@ static void wl18xx_tx_complete_packet(struct wl1271 *wl, u8 tx_stat_byte)
 	 * the info->status structures
 	 */
 	wl18xx_get_last_tx_rate(wl, info->control.vif,
-				info->band, &info->status.rates[0]);
+				info->band,
+				&info->status.rates[0],
+				tx_desc->hlid);
 
 	info->status.rates[0].count = 1; /* no data about retries */
 	info->status.ack_signal = -1;
@@ -144,12 +148,22 @@ void wl18xx_tx_immediate_complete(struct wl1271 *wl)
 	struct wl18xx_fw_status_priv *status_priv =
 		(struct wl18xx_fw_status_priv *)wl->fw_status->priv;
 	struct wl18xx_priv *priv = wl->priv;
-	u8 i;
+	u8 i, hlid;
 
 	/* nothing to do here */
 	if (priv->last_fw_rls_idx == status_priv->fw_release_idx)
 		return;
 
+	/* update rates per link */
+	hlid = wl->fw_status->counters.hlid;
+
+	if (hlid < WLCORE_MAX_LINKS) {
+		wl->links[hlid].fw_rate_idx =
+				wl->fw_status->counters.tx_last_rate;
+		wl->links[hlid].fw_rate_mbps =
+				wl->fw_status->counters.tx_last_rate_mbps;
+	}
+
 	/* freed Tx descriptors */
 	wl1271_debug(DEBUG_TX, "last released desc = %d, current idx = %d",
 		     priv->last_fw_rls_idx, status_priv->fw_release_idx);
diff --git a/drivers/net/wireless/ti/wl18xx/wl18xx.h b/drivers/net/wireless/ti/wl18xx/wl18xx.h
index 71e9e382ce80..5371cbdd54e0 100644
--- a/drivers/net/wireless/ti/wl18xx/wl18xx.h
+++ b/drivers/net/wireless/ti/wl18xx/wl18xx.h
@@ -29,7 +29,7 @@
 #define WL18XX_IFTYPE_VER	9
 #define WL18XX_MAJOR_VER	WLCORE_FW_VER_IGNORE
 #define WL18XX_SUBTYPE_VER	WLCORE_FW_VER_IGNORE
-#define WL18XX_MINOR_VER	11
+#define WL18XX_MINOR_VER	58
 
 #define WL18XX_CMD_MAX_SIZE          740
 
@@ -125,7 +125,11 @@ struct wl18xx_fw_packet_counters {
 	/* Tx rate of the last transmitted packet */
 	u8 tx_last_rate;
 
-	u8 padding[2];
+	/* Tx rate or Tx rate estimate pre-calculated by fw in mbps units */
+	u8 tx_last_rate_mbps;
+
+	/* hlid for which the rates were reported */
+	u8 hlid;
 } __packed;
 
 /* FW status registers */
diff --git a/drivers/net/wireless/ti/wlcore/acx.h b/drivers/net/wireless/ti/wlcore/acx.h
index 0d61fae88dcb..6321ed472891 100644
--- a/drivers/net/wireless/ti/wlcore/acx.h
+++ b/drivers/net/wireless/ti/wlcore/acx.h
@@ -105,6 +105,7 @@ enum wl12xx_role {
 	WL1271_ROLE_DEVICE,
 	WL1271_ROLE_P2P_CL,
 	WL1271_ROLE_P2P_GO,
+	WL1271_ROLE_MESH_POINT,
 
 	WL12XX_INVALID_ROLE_TYPE = 0xff
 };
diff --git a/drivers/net/wireless/ti/wlcore/boot.c b/drivers/net/wireless/ti/wlcore/boot.c
index 19b7ec7b69c2..f75d30444117 100644
--- a/drivers/net/wireless/ti/wlcore/boot.c
+++ b/drivers/net/wireless/ti/wlcore/boot.c
@@ -130,7 +130,7 @@ fail:
 	wl1271_error("Your WiFi FW version (%u.%u.%u.%u.%u) is invalid.\n"
 		     "Please use at least FW %s\n"
 		     "You can get the latest firmwares at:\n"
-		     "git://github.com/TI-OpenLink/firmwares.git",
+		     "git://git.ti.com/wilink8-wlan/wl18xx_fw.git",
 		     fw_ver[FW_VER_CHIP], fw_ver[FW_VER_IF_TYPE],
 		     fw_ver[FW_VER_MAJOR], fw_ver[FW_VER_SUBTYPE],
 		     fw_ver[FW_VER_MINOR], min_fw_str);
diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c
index 33153565ad62..7f4da727bb7b 100644
--- a/drivers/net/wireless/ti/wlcore/cmd.c
+++ b/drivers/net/wireless/ti/wlcore/cmd.c
@@ -629,11 +629,14 @@ int wl12xx_cmd_role_start_ap(struct wl1271 *wl, struct wl12xx_vif *wlvif)
 
 	wl1271_debug(DEBUG_CMD, "cmd role start ap %d", wlvif->role_id);
 
-	/* trying to use hidden SSID with an old hostapd version */
-	if (wlvif->ssid_len == 0 && !bss_conf->hidden_ssid) {
-		wl1271_error("got a null SSID from beacon/bss");
-		ret = -EINVAL;
-		goto out;
+	/* If MESH --> ssid_len is always 0 */
+	if (!ieee80211_vif_is_mesh(vif)) {
+		/* trying to use hidden SSID with an old hostapd version */
+		if (wlvif->ssid_len == 0 && !bss_conf->hidden_ssid) {
+			wl1271_error("got a null SSID from beacon/bss");
+			ret = -EINVAL;
+			goto out;
+		}
 	}
 
 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
@@ -1566,6 +1569,13 @@ int wl12xx_cmd_add_peer(struct wl1271 *wl, struct wl12xx_vif *wlvif,
 		cpu_to_le32(wl1271_tx_enabled_rates_get(wl, sta_rates,
 							wlvif->band));
 
+	if (!cmd->supported_rates) {
+		wl1271_debug(DEBUG_CMD,
+			     "peer has no supported rates yet, configuring basic rates: 0x%x",
+			     wlvif->basic_rate_set);
+		cmd->supported_rates = cpu_to_le32(wlvif->basic_rate_set);
+	}
+
 	wl1271_debug(DEBUG_CMD, "new peer rates=0x%x queues=0x%x",
 		     cmd->supported_rates, sta->uapsd_queues);
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 10fd24c28ece..1d689169da76 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -221,6 +221,7 @@ static void wlcore_rc_update_work(struct work_struct *work)
 	struct wl12xx_vif *wlvif = container_of(work, struct wl12xx_vif,
 						rc_update_work);
 	struct wl1271 *wl = wlvif->wl;
+	struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif);
 
 	mutex_lock(&wl->mutex);
 
@@ -231,8 +232,16 @@ static void wlcore_rc_update_work(struct work_struct *work)
 	if (ret < 0)
 		goto out;
 
-	wlcore_hw_sta_rc_update(wl, wlvif);
+	if (ieee80211_vif_is_mesh(vif)) {
+		ret = wl1271_acx_set_ht_capabilities(wl, &wlvif->rc_ht_cap,
+						     true, wlvif->sta.hlid);
+		if (ret < 0)
+			goto out_sleep;
+	} else {
+		wlcore_hw_sta_rc_update(wl, wlvif);
+	}
 
+out_sleep:
 	wl1271_ps_elp_sleep(wl);
 out:
 	mutex_unlock(&wl->mutex);
@@ -2153,10 +2162,14 @@ static void wlcore_free_klv_template(struct wl1271 *wl, u8 *idx)
 
 static u8 wl12xx_get_role_type(struct wl1271 *wl, struct wl12xx_vif *wlvif)
 {
+	struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif);
+
 	switch (wlvif->bss_type) {
 	case BSS_TYPE_AP_BSS:
 		if (wlvif->p2p)
 			return WL1271_ROLE_P2P_GO;
+		else if (ieee80211_vif_is_mesh(vif))
+			return WL1271_ROLE_MESH_POINT;
 		else
 			return WL1271_ROLE_AP;
 
@@ -2198,6 +2211,7 @@ static int wl12xx_init_vif_data(struct wl1271 *wl, struct ieee80211_vif *vif)
 		wlvif->p2p = 1;
 		/* fall-through */
 	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_MESH_POINT:
 		wlvif->bss_type = BSS_TYPE_AP_BSS;
 		break;
 	default:
@@ -2615,6 +2629,10 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl,
 
 	if (wl->scan.state != WL1271_SCAN_STATE_IDLE &&
 	    wl->scan_wlvif == wlvif) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
 		/*
 		 * Rearm the tx watchdog just before idling scan. This
 		 * prevents just-finished scans from triggering the watchdog
@@ -2625,7 +2643,7 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl,
 		memset(wl->scan.scanned_ch, 0, sizeof(wl->scan.scanned_ch));
 		wl->scan_wlvif = NULL;
 		wl->scan.req = NULL;
-		ieee80211_scan_completed(wl->hw, true);
+		ieee80211_scan_completed(wl->hw, &info);
 	}
 
 	if (wl->sched_vif == wlvif)
@@ -3649,6 +3667,9 @@ static void wl1271_op_cancel_hw_scan(struct ieee80211_hw *hw,
 {
 	struct wl1271 *wl = hw->priv;
 	struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vif);
+	struct cfg80211_scan_info info = {
+		.aborted = true,
+	};
 	int ret;
 
 	wl1271_debug(DEBUG_MAC80211, "mac80211 cancel hw scan");
@@ -3681,7 +3702,7 @@ static void wl1271_op_cancel_hw_scan(struct ieee80211_hw *hw,
 	memset(wl->scan.scanned_ch, 0, sizeof(wl->scan.scanned_ch));
 	wl->scan_wlvif = NULL;
 	wl->scan.req = NULL;
-	ieee80211_scan_completed(wl->hw, true);
+	ieee80211_scan_completed(wl->hw, &info);
 
 out_sleep:
 	wl1271_ps_elp_sleep(wl);
@@ -4124,9 +4145,14 @@ static void wl1271_bss_info_changed_ap(struct wl1271 *wl,
 		if (ret < 0)
 			goto out;
 
-		ret = wl1271_ap_set_probe_resp_tmpl(wl, wlvif->basic_rate, vif);
-		if (ret < 0)
-			goto out;
+		/* No need to set probe resp template for mesh */
+		if (!ieee80211_vif_is_mesh(vif)) {
+			ret = wl1271_ap_set_probe_resp_tmpl(wl,
+							    wlvif->basic_rate,
+							    vif);
+			if (ret < 0)
+				goto out;
+		}
 
 		ret = wlcore_set_beacon_template(wl, vif, true);
 		if (ret < 0)
@@ -4960,6 +4986,7 @@ static int wl12xx_sta_add(struct wl1271 *wl,
 		return ret;
 
 	wl_sta = (struct wl1271_station *)sta->drv_priv;
+	wl_sta->wl = wl;
 	hlid = wl_sta->hlid;
 
 	ret = wl12xx_cmd_add_peer(wl, wlvif, sta, hlid);
@@ -5091,6 +5118,11 @@ static int wl12xx_update_sta_state(struct wl1271 *wl,
 		if (ret < 0)
 			return ret;
 
+		/* reconfigure rates */
+		ret = wl12xx_cmd_add_peer(wl, wlvif, sta, wl_sta->hlid);
+		if (ret < 0)
+			return ret;
+
 		ret = wl1271_acx_set_ht_capabilities(wl, &sta->ht_cap, true,
 						     wl_sta->hlid);
 		if (ret)
@@ -5629,6 +5661,7 @@ static void wlcore_op_sta_rc_update(struct ieee80211_hw *hw,
 
 	/* this callback is atomic, so schedule a new work */
 	wlvif->rc_update_bw = sta->bandwidth;
+	memcpy(&wlvif->rc_ht_cap, &sta->ht_cap, sizeof(sta->ht_cap));
 	ieee80211_queue_work(hw, &wlvif->rc_update_work);
 }
 
@@ -5667,6 +5700,16 @@ out:
 	mutex_unlock(&wl->mutex);
 }
 
+static u32 wlcore_op_get_expected_throughput(struct ieee80211_sta *sta)
+{
+	struct wl1271_station *wl_sta = (struct wl1271_station *)sta->drv_priv;
+	struct wl1271 *wl = wl_sta->wl;
+	u8 hlid = wl_sta->hlid;
+
+	/* return in units of Kbps */
+	return (wl->links[hlid].fw_rate_mbps * 1000);
+}
+
 static bool wl1271_tx_frames_pending(struct ieee80211_hw *hw)
 {
 	struct wl1271 *wl = hw->priv;
@@ -5867,6 +5910,7 @@ static const struct ieee80211_ops wl1271_ops = {
 	.switch_vif_chanctx = wlcore_op_switch_vif_chanctx,
 	.sta_rc_update = wlcore_op_sta_rc_update,
 	.sta_statistics = wlcore_op_sta_statistics,
+	.get_expected_throughput = wlcore_op_get_expected_throughput,
 	CFG80211_TESTMODE_CMD(wl1271_tm_cmd)
 };
 
@@ -6050,7 +6094,11 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
 					 BIT(NL80211_IFTYPE_AP) |
 					 BIT(NL80211_IFTYPE_P2P_DEVICE) |
 					 BIT(NL80211_IFTYPE_P2P_CLIENT) |
+#ifdef CONFIG_MAC80211_MESH
+					 BIT(NL80211_IFTYPE_MESH_POINT) |
+#endif
 					 BIT(NL80211_IFTYPE_P2P_GO);
+
 	wl->hw->wiphy->max_scan_ssids = 1;
 	wl->hw->wiphy->max_sched_scan_ssids = 16;
 	wl->hw->wiphy->max_match_sets = 16;
diff --git a/drivers/net/wireless/ti/wlcore/rx.c b/drivers/net/wireless/ti/wlcore/rx.c
index c9bd294a0aa6..b9e14045195f 100644
--- a/drivers/net/wireless/ti/wlcore/rx.c
+++ b/drivers/net/wireless/ti/wlcore/rx.c
@@ -222,6 +222,13 @@ int wlcore_rx(struct wl1271 *wl, struct wl_fw_status *status)
 	enum wl_rx_buf_align rx_align;
 	int ret = 0;
 
+	/* update rates per link */
+	hlid = status->counters.hlid;
+
+	if (hlid < WLCORE_MAX_LINKS)
+		wl->links[hlid].fw_rate_mbps =
+				status->counters.tx_last_rate_mbps;
+
 	while (drv_rx_counter != fw_rx_counter) {
 		buf_size = 0;
 		rx_counter = drv_rx_counter;
diff --git a/drivers/net/wireless/ti/wlcore/scan.c b/drivers/net/wireless/ti/wlcore/scan.c
index 23343643207a..5612f5916b4e 100644
--- a/drivers/net/wireless/ti/wlcore/scan.c
+++ b/drivers/net/wireless/ti/wlcore/scan.c
@@ -36,6 +36,9 @@ void wl1271_scan_complete_work(struct work_struct *work)
 	struct delayed_work *dwork;
 	struct wl1271 *wl;
 	struct wl12xx_vif *wlvif;
+	struct cfg80211_scan_info info = {
+		.aborted = false,
+	};
 	int ret;
 
 	dwork = to_delayed_work(work);
@@ -82,7 +85,7 @@ void wl1271_scan_complete_work(struct work_struct *work)
 
 	wlcore_cmd_regdomain_config_locked(wl);
 
-	ieee80211_scan_completed(wl->hw, false);
+	ieee80211_scan_completed(wl->hw, &info);
 
 out:
 	mutex_unlock(&wl->mutex);
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index c172da56b550..5839acbbc782 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -241,7 +241,6 @@ static int wlcore_probe_of(struct device *dev, int *irq,
 	*irq = irq_of_parse_and_map(np, 0);
 	if (!*irq) {
 		dev_err(dev, "No irq in platform data\n");
-		kfree(pdev_data);
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index cea9443c22a6..6d24040889b8 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -70,16 +70,30 @@
 #define WSPI_MAX_CHUNK_SIZE    4092
 
 /*
- * only support SPI for 12xx - this code should be reworked when 18xx
- * support is introduced
+ * wl18xx driver aggregation buffer size is (13 * PAGE_SIZE) compared to
+ * (4 * PAGE_SIZE) for wl12xx, so use the larger buffer needed for wl18xx
  */
-#define SPI_AGGR_BUFFER_SIZE (4 * PAGE_SIZE)
+#define SPI_AGGR_BUFFER_SIZE (13 * PAGE_SIZE)
 
 /* Maximum number of SPI write chunks */
 #define WSPI_MAX_NUM_OF_CHUNKS \
 	((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1)
 
 
+struct wilink_familiy_data {
+	char name[8];
+};
+
+const struct wilink_familiy_data *wilink_data;
+
+static const struct wilink_familiy_data wl18xx_data = {
+	.name = "wl18xx",
+};
+
+static const struct wilink_familiy_data wl12xx_data = {
+	.name = "wl12xx",
+};
+
 struct wl12xx_spi_glue {
 	struct device *dev;
 	struct platform_device *core;
@@ -119,6 +133,7 @@ static void wl12xx_spi_init(struct device *child)
 	struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent);
 	struct spi_transfer t;
 	struct spi_message m;
+	struct spi_device *spi = to_spi_device(glue->dev);
 	u8 *cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL);
 
 	if (!cmd) {
@@ -151,6 +166,7 @@ static void wl12xx_spi_init(struct device *child)
 		cmd[6] |= WSPI_INIT_CMD_EN_FIXEDBUSY;
 
 	cmd[7] = crc7_be(0, cmd+2, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END;
+
 	/*
 	 * The above is the logical order; it must actually be stored
 	 * in the buffer byte-swapped.
@@ -163,6 +179,28 @@ static void wl12xx_spi_init(struct device *child)
 	spi_message_add_tail(&t, &m);
 
 	spi_sync(to_spi_device(glue->dev), &m);
+
+	/* Send extra clocks with inverted CS (high). this is required
+	 * by the wilink family in order to successfully enter WSPI mode.
+	 */
+	spi->mode ^= SPI_CS_HIGH;
+	memset(&m, 0, sizeof(m));
+	spi_message_init(&m);
+
+	cmd[0] = 0xff;
+	cmd[1] = 0xff;
+	cmd[2] = 0xff;
+	cmd[3] = 0xff;
+	__swab32s((u32 *)cmd);
+
+	t.tx_buf = cmd;
+	t.len = 4;
+	spi_message_add_tail(&t, &m);
+
+	spi_sync(to_spi_device(glue->dev), &m);
+
+	/* Restore chip select configration to normal */
+	spi->mode ^= SPI_CS_HIGH;
 	kfree(cmd);
 }
 
@@ -270,22 +308,25 @@ static int __must_check wl12xx_spi_raw_read(struct device *child, int addr,
 	return 0;
 }
 
-static int __must_check wl12xx_spi_raw_write(struct device *child, int addr,
-					     void *buf, size_t len, bool fixed)
+static int __wl12xx_spi_raw_write(struct device *child, int addr,
+				  void *buf, size_t len, bool fixed)
 {
 	struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent);
-	/* SPI write buffers - 2 for each chunk */
-	struct spi_transfer t[2 * WSPI_MAX_NUM_OF_CHUNKS];
+	struct spi_transfer *t;
 	struct spi_message m;
 	u32 commands[WSPI_MAX_NUM_OF_CHUNKS]; /* 1 command per chunk */
 	u32 *cmd;
 	u32 chunk_len;
 	int i;
 
+	/* SPI write buffers - 2 for each chunk */
+	t = kzalloc(sizeof(*t) * 2 * WSPI_MAX_NUM_OF_CHUNKS, GFP_KERNEL);
+	if (!t)
+		return -ENOMEM;
+
 	WARN_ON(len > SPI_AGGR_BUFFER_SIZE);
 
 	spi_message_init(&m);
-	memset(t, 0, sizeof(t));
 
 	cmd = &commands[0];
 	i = 0;
@@ -318,9 +359,26 @@ static int __must_check wl12xx_spi_raw_write(struct device *child, int addr,
 
 	spi_sync(to_spi_device(glue->dev), &m);
 
+	kfree(t);
 	return 0;
 }
 
+static int __must_check wl12xx_spi_raw_write(struct device *child, int addr,
+					     void *buf, size_t len, bool fixed)
+{
+	int ret;
+
+	/* The ELP wakeup write may fail the first time due to internal
+	 * hardware latency. It is safer to send the wakeup command twice to
+	 * avoid unexpected failures.
+	 */
+	if (addr == HW_ACCESS_ELP_CTRL_REG)
+		ret = __wl12xx_spi_raw_write(child, addr, buf, len, fixed);
+	ret = __wl12xx_spi_raw_write(child, addr, buf, len, fixed);
+
+	return ret;
+}
+
 /**
  * wl12xx_spi_set_power - power on/off the wl12xx unit
  * @child: wl12xx device handle.
@@ -349,17 +407,38 @@ static int wl12xx_spi_set_power(struct device *child, bool enable)
 	return ret;
 }
 
+/**
+ * wl12xx_spi_set_block_size
+ *
+ * This function is not needed for spi mode, but need to be present.
+ * Without it defined the wlcore fallback to use the wrong packet
+ * allignment on tx.
+ */
+static void wl12xx_spi_set_block_size(struct device *child,
+				      unsigned int blksz)
+{
+}
+
 static struct wl1271_if_operations spi_ops = {
 	.read		= wl12xx_spi_raw_read,
 	.write		= wl12xx_spi_raw_write,
 	.reset		= wl12xx_spi_reset,
 	.init		= wl12xx_spi_init,
 	.power		= wl12xx_spi_set_power,
-	.set_block_size = NULL,
+	.set_block_size = wl12xx_spi_set_block_size,
 };
 
 static const struct of_device_id wlcore_spi_of_match_table[] = {
-	{ .compatible = "ti,wl1271" },
+	{ .compatible = "ti,wl1271", .data = &wl12xx_data},
+	{ .compatible = "ti,wl1273", .data = &wl12xx_data},
+	{ .compatible = "ti,wl1281", .data = &wl12xx_data},
+	{ .compatible = "ti,wl1283", .data = &wl12xx_data},
+	{ .compatible = "ti,wl1801", .data = &wl18xx_data},
+	{ .compatible = "ti,wl1805", .data = &wl18xx_data},
+	{ .compatible = "ti,wl1807", .data = &wl18xx_data},
+	{ .compatible = "ti,wl1831", .data = &wl18xx_data},
+	{ .compatible = "ti,wl1835", .data = &wl18xx_data},
+	{ .compatible = "ti,wl1837", .data = &wl18xx_data},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, wlcore_spi_of_match_table);
@@ -375,18 +454,24 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue,
 			   struct wlcore_platdev_data *pdev_data)
 {
 	struct device_node *dt_node = spi->dev.of_node;
-	int ret;
+	const struct of_device_id *of_id;
+
+	of_id = of_match_node(wlcore_spi_of_match_table, dt_node);
+	if (!of_id)
+		return -ENODEV;
+
+	wilink_data = of_id->data;
+	dev_info(&spi->dev, "selected chip familiy is %s\n",
+		 wilink_data->name);
 
 	if (of_find_property(dt_node, "clock-xtal", NULL))
 		pdev_data->ref_clock_xtal = true;
 
-	ret = of_property_read_u32(dt_node, "ref-clock-frequency",
-				   &pdev_data->ref_clock_freq);
-	if (ret) {
-		dev_err(glue->dev,
-			"can't get reference clock frequency (%d)\n", ret);
-		return ret;
-	}
+	/* optional clock frequency params */
+	of_property_read_u32(dt_node, "ref-clock-frequency",
+			     &pdev_data->ref_clock_freq);
+	of_property_read_u32(dt_node, "tcxo-clock-frequency",
+			     &pdev_data->tcxo_clock_freq);
 
 	return 0;
 }
@@ -437,7 +522,8 @@ static int wl1271_probe(struct spi_device *spi)
 		return ret;
 	}
 
-	glue->core = platform_device_alloc("wl12xx", PLATFORM_DEVID_AUTO);
+	glue->core = platform_device_alloc(wilink_data->name,
+					   PLATFORM_DEVID_AUTO);
 	if (!glue->core) {
 		dev_err(glue->dev, "can't allocate platform_device\n");
 		return -ENOMEM;
diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h
index 5c4199f3a19a..242b4e37b94c 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore_i.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h
@@ -171,6 +171,12 @@ struct wl_fw_status {
 
 		/* Tx rate of the last transmitted packet */
 		u8 tx_last_rate;
+
+		/* Tx rate or Tx rate estimate pre calculated by fw in mbps */
+		u8 tx_last_rate_mbps;
+
+		/* hlid for which the rates were reported */
+		u8 hlid;
 	} counters;
 
 	u32 log_start_addr;
@@ -273,6 +279,12 @@ struct wl1271_link {
 	/* bitmap of TIDs where RX BA sessions are active for this link */
 	u8 ba_bitmap;
 
+	/* the last fw rate index we used for this link */
+	u8 fw_rate_idx;
+
+	/* the last fw rate [Mbps] we used for this link */
+	u8 fw_rate_mbps;
+
 	/* The wlvif this link belongs to. Might be null for global links */
 	struct wl12xx_vif *wlvif;
 
@@ -335,6 +347,7 @@ struct wl1271_station {
 	 * Used in both AP and STA mode.
 	 */
 	u64 total_freed_pkts;
+	struct wl1271 *wl;
 };
 
 struct wl12xx_vif {
@@ -472,6 +485,7 @@ struct wl12xx_vif {
 
 	/* update rate conrol */
 	enum ieee80211_sta_rx_bandwidth rc_update_bw;
+	struct ieee80211_sta_ht_cap rc_ht_cap;
 	struct work_struct rc_update_work;
 
 	/*
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 13fd734b61ec..82d94f83b6b4 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -378,8 +378,7 @@ static int wl3501_esbq_exec(struct wl3501_card *this, void *sig, int sig_size)
 	return rc;
 }
 
-static int wl3501_get_mib_value(struct wl3501_card *this, u8 index,
-				void *bf, int size)
+static int wl3501_request_mib(struct wl3501_card *this, u8 index, void *bf)
 {
 	struct wl3501_get_req sig = {
 		.sig_id	    = WL3501_SIG_GET_REQ,
@@ -395,20 +394,32 @@ static int wl3501_get_mib_value(struct wl3501_card *this, u8 index,
 			wl3501_set_to_wla(this, ptr, &sig, sizeof(sig));
 			wl3501_esbq_req(this, &ptr);
 			this->sig_get_confirm.mib_status = 255;
-			spin_unlock_irqrestore(&this->lock, flags);
-			rc = wait_event_interruptible(this->wait,
-				this->sig_get_confirm.mib_status != 255);
-			if (!rc)
-				memcpy(bf, this->sig_get_confirm.mib_value,
-				       size);
-			goto out;
+			rc = 0;
 		}
 	}
 	spin_unlock_irqrestore(&this->lock, flags);
-out:
+
 	return rc;
 }
 
+static int wl3501_get_mib_value(struct wl3501_card *this, u8 index,
+				void *bf, int size)
+{
+	int rc;
+
+	rc = wl3501_request_mib(this, index, bf);
+	if (rc)
+		return rc;
+
+	rc = wait_event_interruptible(this->wait,
+		this->sig_get_confirm.mib_status != 255);
+	if (rc)
+		return rc;
+
+	memcpy(bf, this->sig_get_confirm.mib_value, size);
+	return 0;
+}
+
 static int wl3501_pwr_mgmt(struct wl3501_card *this, int suspend)
 {
 	struct wl3501_pwr_mgmt_req sig = {
diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index ea8321a483f9..9d2369269abf 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -40,6 +40,7 @@ config NFC_MEI_PHY
 
 config NFC_SIM
 	tristate "NFC hardware simulator driver"
+	depends on NFC_DIGITAL
 	help
 	  This driver declares two virtual NFC devices supporting NFC-DEP
 	  protocol. An LLCP connection can be established between them and
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index e44a7a2f4061..7c1eaea3b685 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -345,7 +345,7 @@ static void fdp_nci_release_firmware(struct nci_dev *ndev)
 
 	if (info->ram_patch) {
 		release_firmware(info->ram_patch);
-		info->otp_patch = NULL;
+		info->ram_patch = NULL;
 	}
 }
 
@@ -353,7 +353,7 @@ static int fdp_nci_patch_otp(struct nci_dev *ndev)
 {
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
 	struct device *dev = &info->phy->i2c_dev->dev;
-	u8 conn_id;
+	int conn_id;
 	int r = 0;
 
 	if (info->otp_version >= info->otp_patch_version)
@@ -424,7 +424,7 @@ static int fdp_nci_patch_ram(struct nci_dev *ndev)
 {
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
 	struct device *dev = &info->phy->i2c_dev->dev;
-	u8 conn_id;
+	int conn_id;
 	int r = 0;
 
 	if (info->ram_version >= info->ram_patch_version)
diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c
index 93aaca586858..a466e7978466 100644
--- a/drivers/nfc/nfcsim.c
+++ b/drivers/nfc/nfcsim.c
@@ -16,525 +16,492 @@
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
 #include <linux/nfc.h>
 #include <net/nfc/nfc.h>
+#include <net/nfc/digital.h>
 
-#define DEV_ERR(_dev, fmt, args...) nfc_err(&_dev->nfc_dev->dev, \
-						"%s: " fmt, __func__, ## args)
+#define NFCSIM_ERR(d, fmt, args...) nfc_err(&d->nfc_digital_dev->nfc_dev->dev, \
+					    "%s: " fmt, __func__, ## args)
 
-#define DEV_DBG(_dev, fmt, args...) dev_dbg(&_dev->nfc_dev->dev, \
-						"%s: " fmt, __func__, ## args)
+#define NFCSIM_DBG(d, fmt, args...) dev_dbg(&d->nfc_digital_dev->nfc_dev->dev, \
+					    "%s: " fmt, __func__, ## args)
 
-#define NFCSIM_VERSION "0.1"
+#define NFCSIM_VERSION "0.2"
 
-#define NFCSIM_POLL_NONE	0
-#define NFCSIM_POLL_INITIATOR	1
-#define NFCSIM_POLL_TARGET	2
-#define NFCSIM_POLL_DUAL	(NFCSIM_POLL_INITIATOR | NFCSIM_POLL_TARGET)
+#define NFCSIM_MODE_NONE	0
+#define NFCSIM_MODE_INITIATOR	1
+#define NFCSIM_MODE_TARGET	2
 
-#define RX_DEFAULT_DELAY	5
+#define NFCSIM_CAPABILITIES (NFC_DIGITAL_DRV_CAPS_IN_CRC   | \
+			     NFC_DIGITAL_DRV_CAPS_TG_CRC)
 
 struct nfcsim {
-	struct nfc_dev *nfc_dev;
+	struct nfc_digital_dev *nfc_digital_dev;
 
-	struct mutex lock;
+	struct work_struct recv_work;
+	struct delayed_work send_work;
 
-	struct delayed_work recv_work;
+	struct nfcsim_link *link_in;
+	struct nfcsim_link *link_out;
 
-	struct sk_buff *clone_skb;
+	bool up;
+	u8 mode;
+	u8 rf_tech;
 
-	struct delayed_work poll_work;
-	u8 polling_mode;
-	u8 curr_polling_mode;
+	u16 recv_timeout;
 
-	u8 shutting_down;
+	nfc_digital_cmd_complete_t cb;
+	void *arg;
 
-	u8 up;
+	u8 dropframe;
+};
 
-	u8 initiator;
+struct nfcsim_link {
+	struct mutex lock;
 
-	u32 rx_delay;
+	u8 rf_tech;
+	u8 mode;
 
-	data_exchange_cb_t cb;
-	void *cb_context;
+	u8 shutdown;
 
-	struct nfcsim *peer_dev;
+	struct sk_buff *skb;
+	wait_queue_head_t recv_wait;
+	u8 cond;
 };
 
-static struct nfcsim *dev0;
-static struct nfcsim *dev1;
-
-static struct workqueue_struct *wq;
-
-static void nfcsim_cleanup_dev(struct nfcsim *dev, u8 shutdown)
+static struct nfcsim_link *nfcsim_link_new(void)
 {
-	DEV_DBG(dev, "shutdown=%d\n", shutdown);
+	struct nfcsim_link *link;
 
-	mutex_lock(&dev->lock);
+	link = kzalloc(sizeof(struct nfcsim_link), GFP_KERNEL);
+	if (!link)
+		return NULL;
 
-	dev->polling_mode = NFCSIM_POLL_NONE;
-	dev->shutting_down = shutdown;
-	dev->cb = NULL;
-	dev_kfree_skb(dev->clone_skb);
-	dev->clone_skb = NULL;
+	mutex_init(&link->lock);
+	init_waitqueue_head(&link->recv_wait);
 
-	mutex_unlock(&dev->lock);
-
-	cancel_delayed_work_sync(&dev->poll_work);
-	cancel_delayed_work_sync(&dev->recv_work);
+	return link;
 }
 
-static int nfcsim_target_found(struct nfcsim *dev)
+static void nfcsim_link_free(struct nfcsim_link *link)
 {
-	struct nfc_target nfc_tgt;
+	dev_kfree_skb(link->skb);
+	kfree(link);
+}
 
-	DEV_DBG(dev, "\n");
+static void nfcsim_link_recv_wake(struct nfcsim_link *link)
+{
+	link->cond = 1;
+	wake_up_interruptible(&link->recv_wait);
+}
 
-	memset(&nfc_tgt, 0, sizeof(struct nfc_target));
+static void nfcsim_link_set_skb(struct nfcsim_link *link, struct sk_buff *skb,
+				u8 rf_tech, u8 mode)
+{
+	mutex_lock(&link->lock);
 
-	nfc_tgt.supported_protocols = NFC_PROTO_NFC_DEP_MASK;
-	nfc_targets_found(dev->nfc_dev, &nfc_tgt, 1);
+	dev_kfree_skb(link->skb);
+	link->skb = skb;
+	link->rf_tech = rf_tech;
+	link->mode = mode;
 
-	return 0;
+	mutex_unlock(&link->lock);
 }
 
-static int nfcsim_dev_up(struct nfc_dev *nfc_dev)
+static void nfcsim_link_recv_cancel(struct nfcsim_link *link)
 {
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
+	mutex_lock(&link->lock);
 
-	DEV_DBG(dev, "\n");
+	link->mode = NFCSIM_MODE_NONE;
 
-	mutex_lock(&dev->lock);
+	mutex_unlock(&link->lock);
 
-	dev->up = 1;
-
-	mutex_unlock(&dev->lock);
-
-	return 0;
+	nfcsim_link_recv_wake(link);
 }
 
-static int nfcsim_dev_down(struct nfc_dev *nfc_dev)
+static void nfcsim_link_shutdown(struct nfcsim_link *link)
 {
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
-
-	DEV_DBG(dev, "\n");
+	mutex_lock(&link->lock);
 
-	mutex_lock(&dev->lock);
+	link->shutdown = 1;
+	link->mode = NFCSIM_MODE_NONE;
 
-	dev->up = 0;
+	mutex_unlock(&link->lock);
 
-	mutex_unlock(&dev->lock);
-
-	return 0;
+	nfcsim_link_recv_wake(link);
 }
 
-static int nfcsim_dep_link_up(struct nfc_dev *nfc_dev,
-			      struct nfc_target *target,
-			      u8 comm_mode, u8 *gb, size_t gb_len)
+static struct sk_buff *nfcsim_link_recv_skb(struct nfcsim_link *link,
+					    int timeout, u8 rf_tech, u8 mode)
 {
 	int rc;
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
-	struct nfcsim *peer = dev->peer_dev;
-	u8 *remote_gb;
-	size_t remote_gb_len;
+	struct sk_buff *skb;
 
-	DEV_DBG(dev, "target_idx: %d, comm_mode: %d\n", target->idx, comm_mode);
+	rc = wait_event_interruptible_timeout(link->recv_wait,
+					      link->cond,
+					      msecs_to_jiffies(timeout));
 
-	mutex_lock(&peer->lock);
+	mutex_lock(&link->lock);
 
-	nfc_tm_activated(peer->nfc_dev, NFC_PROTO_NFC_DEP_MASK,
-			 NFC_COMM_ACTIVE, gb, gb_len);
+	skb = link->skb;
+	link->skb = NULL;
 
-	remote_gb = nfc_get_local_general_bytes(peer->nfc_dev, &remote_gb_len);
-	if (!remote_gb) {
-		DEV_ERR(peer, "Can't get remote general bytes\n");
+	if (!rc) {
+		rc = -ETIMEDOUT;
+		goto done;
+	}
 
-		mutex_unlock(&peer->lock);
-		return -EINVAL;
+	if (!skb || link->rf_tech != rf_tech || link->mode == mode) {
+		rc = -EINVAL;
+		goto done;
 	}
 
-	mutex_unlock(&peer->lock);
+	if (link->shutdown) {
+		rc = -ENODEV;
+		goto done;
+	}
 
-	mutex_lock(&dev->lock);
+done:
+	mutex_unlock(&link->lock);
 
-	rc = nfc_set_remote_general_bytes(nfc_dev, remote_gb, remote_gb_len);
-	if (rc) {
-		DEV_ERR(dev, "Can't set remote general bytes\n");
-		mutex_unlock(&dev->lock);
-		return rc;
+	if (rc < 0) {
+		dev_kfree_skb(skb);
+		skb = ERR_PTR(rc);
 	}
 
-	rc = nfc_dep_link_is_up(nfc_dev, target->idx, NFC_COMM_ACTIVE,
-				NFC_RF_INITIATOR);
-
-	mutex_unlock(&dev->lock);
+	link->cond = 0;
 
-	return rc;
+	return skb;
 }
 
-static int nfcsim_dep_link_down(struct nfc_dev *nfc_dev)
+static void nfcsim_send_wq(struct work_struct *work)
 {
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
+	struct nfcsim *dev = container_of(work, struct nfcsim, send_work.work);
 
-	DEV_DBG(dev, "\n");
-
-	nfcsim_cleanup_dev(dev, 0);
-
-	return 0;
+	/*
+	 * To effectively send data, the device just wake up its link_out which
+	 * is the link_in of the peer device. The exchanged skb has already been
+	 * stored in the dev->link_out through nfcsim_link_set_skb().
+	 */
+	nfcsim_link_recv_wake(dev->link_out);
 }
 
-static int nfcsim_start_poll(struct nfc_dev *nfc_dev,
-			     u32 im_protocols, u32 tm_protocols)
+static void nfcsim_recv_wq(struct work_struct *work)
 {
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
-	int rc;
-
-	mutex_lock(&dev->lock);
+	struct nfcsim *dev = container_of(work, struct nfcsim, recv_work);
+	struct sk_buff *skb;
 
-	if (dev->polling_mode != NFCSIM_POLL_NONE) {
-		DEV_ERR(dev, "Already in polling mode\n");
-		rc = -EBUSY;
-		goto exit;
-	}
+	skb = nfcsim_link_recv_skb(dev->link_in, dev->recv_timeout,
+				   dev->rf_tech, dev->mode);
 
-	if (im_protocols & NFC_PROTO_NFC_DEP_MASK)
-		dev->polling_mode |= NFCSIM_POLL_INITIATOR;
+	if (!dev->up) {
+		NFCSIM_ERR(dev, "Device is down\n");
 
-	if (tm_protocols & NFC_PROTO_NFC_DEP_MASK)
-		dev->polling_mode |= NFCSIM_POLL_TARGET;
+		if (!IS_ERR(skb))
+			dev_kfree_skb(skb);
 
-	if (dev->polling_mode == NFCSIM_POLL_NONE) {
-		DEV_ERR(dev, "Unsupported polling mode\n");
-		rc = -EINVAL;
-		goto exit;
+		skb = ERR_PTR(-ENODEV);
 	}
 
-	dev->initiator = 0;
-	dev->curr_polling_mode = NFCSIM_POLL_NONE;
+	dev->cb(dev->nfc_digital_dev, dev->arg, skb);
+}
 
-	queue_delayed_work(wq, &dev->poll_work, 0);
+static int nfcsim_send(struct nfc_digital_dev *ddev, struct sk_buff *skb,
+		       u16 timeout, nfc_digital_cmd_complete_t cb, void *arg)
+{
+	struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
+	u8 delay;
 
-	DEV_DBG(dev, "Start polling: im: 0x%X, tm: 0x%X\n", im_protocols,
-		tm_protocols);
+	if (!dev->up) {
+		NFCSIM_ERR(dev, "Device is down\n");
+		return -ENODEV;
+	}
 
-	rc = 0;
-exit:
-	mutex_unlock(&dev->lock);
+	dev->recv_timeout = timeout;
+	dev->cb = cb;
+	dev->arg = arg;
 
-	return rc;
-}
+	schedule_work(&dev->recv_work);
 
-static void nfcsim_stop_poll(struct nfc_dev *nfc_dev)
-{
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
+	if (dev->dropframe) {
+		NFCSIM_DBG(dev, "dropping frame (out of %d)\n", dev->dropframe);
+		dev_kfree_skb(skb);
+		dev->dropframe--;
 
-	DEV_DBG(dev, "Stop poll\n");
+		return 0;
+	}
 
-	mutex_lock(&dev->lock);
+	if (skb) {
+		nfcsim_link_set_skb(dev->link_out, skb, dev->rf_tech,
+				    dev->mode);
 
-	dev->polling_mode = NFCSIM_POLL_NONE;
+		/* Add random delay (between 3 and 10 ms) before sending data */
+		get_random_bytes(&delay, 1);
+		delay = 3 + (delay & 0x07);
 
-	mutex_unlock(&dev->lock);
+		schedule_delayed_work(&dev->send_work, msecs_to_jiffies(delay));
+	}
 
-	cancel_delayed_work_sync(&dev->poll_work);
+	return 0;
 }
 
-static int nfcsim_activate_target(struct nfc_dev *nfc_dev,
-				  struct nfc_target *target, u32 protocol)
+static void nfcsim_abort_cmd(struct nfc_digital_dev *ddev)
 {
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
-
-	DEV_DBG(dev, "\n");
+	struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
 
-	return -ENOTSUPP;
+	nfcsim_link_recv_cancel(dev->link_in);
 }
 
-static void nfcsim_deactivate_target(struct nfc_dev *nfc_dev,
-				     struct nfc_target *target, u8 mode)
+static int nfcsim_switch_rf(struct nfc_digital_dev *ddev, bool on)
 {
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
+	struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
+
+	dev->up = on;
 
-	DEV_DBG(dev, "\n");
+	return 0;
 }
 
-static void nfcsim_wq_recv(struct work_struct *work)
+static int nfcsim_in_configure_hw(struct nfc_digital_dev *ddev,
+					  int type, int param)
 {
-	struct nfcsim *dev = container_of(work, struct nfcsim,
-					  recv_work.work);
+	struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
 
-	mutex_lock(&dev->lock);
+	switch (type) {
+	case NFC_DIGITAL_CONFIG_RF_TECH:
+		dev->up = true;
+		dev->mode = NFCSIM_MODE_INITIATOR;
+		dev->rf_tech = param;
+		break;
 
-	if (dev->shutting_down || !dev->up || !dev->clone_skb) {
-		dev_kfree_skb(dev->clone_skb);
-		goto exit;
-	}
+	case NFC_DIGITAL_CONFIG_FRAMING:
+		break;
 
-	if (dev->initiator) {
-		if (!dev->cb) {
-			DEV_ERR(dev, "Null recv callback\n");
-			dev_kfree_skb(dev->clone_skb);
-			goto exit;
-		}
-
-		dev->cb(dev->cb_context, dev->clone_skb, 0);
-		dev->cb = NULL;
-	} else {
-		nfc_tm_data_received(dev->nfc_dev, dev->clone_skb);
+	default:
+		NFCSIM_ERR(dev, "Invalid configuration type: %d\n", type);
+		return -EINVAL;
 	}
 
-exit:
-	dev->clone_skb = NULL;
+	return 0;
+}
 
-	mutex_unlock(&dev->lock);
+static int nfcsim_in_send_cmd(struct nfc_digital_dev *ddev,
+			       struct sk_buff *skb, u16 timeout,
+			       nfc_digital_cmd_complete_t cb, void *arg)
+{
+	return nfcsim_send(ddev, skb, timeout, cb, arg);
 }
 
-static int nfcsim_tx(struct nfc_dev *nfc_dev, struct nfc_target *target,
-		     struct sk_buff *skb, data_exchange_cb_t cb,
-		     void *cb_context)
+static int nfcsim_tg_configure_hw(struct nfc_digital_dev *ddev,
+					  int type, int param)
 {
-	struct nfcsim *dev = nfc_get_drvdata(nfc_dev);
-	struct nfcsim *peer = dev->peer_dev;
-	int err;
+	struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
 
-	mutex_lock(&dev->lock);
+	switch (type) {
+	case NFC_DIGITAL_CONFIG_RF_TECH:
+		dev->up = true;
+		dev->mode = NFCSIM_MODE_TARGET;
+		dev->rf_tech = param;
+		break;
 
-	if (dev->shutting_down || !dev->up) {
-		mutex_unlock(&dev->lock);
-		err = -ENODEV;
-		goto exit;
+	case NFC_DIGITAL_CONFIG_FRAMING:
+		break;
+
+	default:
+		NFCSIM_ERR(dev, "Invalid configuration type: %d\n", type);
+		return -EINVAL;
 	}
 
-	dev->cb = cb;
-	dev->cb_context = cb_context;
+	return 0;
+}
 
-	mutex_unlock(&dev->lock);
+static int nfcsim_tg_send_cmd(struct nfc_digital_dev *ddev,
+			       struct sk_buff *skb, u16 timeout,
+			       nfc_digital_cmd_complete_t cb, void *arg)
+{
+	return nfcsim_send(ddev, skb, timeout, cb, arg);
+}
 
-	mutex_lock(&peer->lock);
+static int nfcsim_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
+			    nfc_digital_cmd_complete_t cb, void *arg)
+{
+	return nfcsim_send(ddev, NULL, timeout, cb, arg);
+}
 
-	peer->clone_skb = skb_clone(skb, GFP_KERNEL);
+static struct nfc_digital_ops nfcsim_digital_ops = {
+	.in_configure_hw = nfcsim_in_configure_hw,
+	.in_send_cmd = nfcsim_in_send_cmd,
 
-	if (!peer->clone_skb) {
-		DEV_ERR(dev, "skb_clone failed\n");
-		mutex_unlock(&peer->lock);
-		err = -ENOMEM;
-		goto exit;
-	}
+	.tg_listen = nfcsim_tg_listen,
+	.tg_configure_hw = nfcsim_tg_configure_hw,
+	.tg_send_cmd = nfcsim_tg_send_cmd,
 
-	/* This simulates an arbitrary transmission delay between the 2 devices.
-	 * If packet transmission occurs immediately between them, we have a
-	 * non-stop flow of several tens of thousands SYMM packets per second
-	 * and a burning cpu.
-	 */
-	queue_delayed_work(wq, &peer->recv_work,
-			msecs_to_jiffies(dev->rx_delay));
+	.abort_cmd = nfcsim_abort_cmd,
+	.switch_rf = nfcsim_switch_rf,
+};
+
+static struct dentry *nfcsim_debugfs_root;
 
-	mutex_unlock(&peer->lock);
+static void nfcsim_debugfs_init(void)
+{
+	nfcsim_debugfs_root = debugfs_create_dir("nfcsim", NULL);
 
-	err = 0;
-exit:
-	dev_kfree_skb(skb);
+	if (!nfcsim_debugfs_root)
+		pr_err("Could not create debugfs entry\n");
 
-	return err;
 }
 
-static int nfcsim_im_transceive(struct nfc_dev *nfc_dev,
-				struct nfc_target *target, struct sk_buff *skb,
-				data_exchange_cb_t cb, void *cb_context)
+static void nfcsim_debugfs_remove(void)
 {
-	return nfcsim_tx(nfc_dev, target, skb, cb, cb_context);
+	debugfs_remove_recursive(nfcsim_debugfs_root);
 }
 
-static int nfcsim_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb)
+static void nfcsim_debugfs_init_dev(struct nfcsim *dev)
 {
-	return nfcsim_tx(nfc_dev, NULL, skb, NULL, NULL);
-}
-
-static struct nfc_ops nfcsim_nfc_ops = {
-	.dev_up = nfcsim_dev_up,
-	.dev_down = nfcsim_dev_down,
-	.dep_link_up = nfcsim_dep_link_up,
-	.dep_link_down = nfcsim_dep_link_down,
-	.start_poll = nfcsim_start_poll,
-	.stop_poll = nfcsim_stop_poll,
-	.activate_target = nfcsim_activate_target,
-	.deactivate_target = nfcsim_deactivate_target,
-	.im_transceive = nfcsim_im_transceive,
-	.tm_send = nfcsim_tm_send,
-};
+	struct dentry *dev_dir;
+	char devname[5]; /* nfcX\0 */
+	u32 idx;
+	int n;
 
-static void nfcsim_set_polling_mode(struct nfcsim *dev)
-{
-	if (dev->polling_mode == NFCSIM_POLL_NONE) {
-		dev->curr_polling_mode = NFCSIM_POLL_NONE;
+	if (!nfcsim_debugfs_root) {
+		NFCSIM_ERR(dev, "nfcsim debugfs not initialized\n");
 		return;
 	}
 
-	if (dev->curr_polling_mode == NFCSIM_POLL_NONE) {
-		if (dev->polling_mode & NFCSIM_POLL_INITIATOR)
-			dev->curr_polling_mode = NFCSIM_POLL_INITIATOR;
-		else
-			dev->curr_polling_mode = NFCSIM_POLL_TARGET;
-
+	idx = dev->nfc_digital_dev->nfc_dev->idx;
+	n = snprintf(devname, sizeof(devname), "nfc%d", idx);
+	if (n >= sizeof(devname)) {
+		NFCSIM_ERR(dev, "Could not compute dev name for dev %d\n", idx);
 		return;
 	}
 
-	if (dev->polling_mode == NFCSIM_POLL_DUAL) {
-		if (dev->curr_polling_mode == NFCSIM_POLL_TARGET)
-			dev->curr_polling_mode = NFCSIM_POLL_INITIATOR;
-		else
-			dev->curr_polling_mode = NFCSIM_POLL_TARGET;
+	dev_dir = debugfs_create_dir(devname, nfcsim_debugfs_root);
+	if (!dev_dir) {
+		NFCSIM_ERR(dev, "Could not create debugfs entries for nfc%d\n",
+			   idx);
+		return;
 	}
+
+	debugfs_create_u8("dropframe", 0664, dev_dir, &dev->dropframe);
 }
 
-static void nfcsim_wq_poll(struct work_struct *work)
+static struct nfcsim *nfcsim_device_new(struct nfcsim_link *link_in,
+					struct nfcsim_link *link_out)
 {
-	struct nfcsim *dev = container_of(work, struct nfcsim, poll_work.work);
-	struct nfcsim *peer = dev->peer_dev;
+	struct nfcsim *dev;
+	int rc;
 
-	/* These work items run on an ordered workqueue and are therefore
-	 * serialized. So we can take both mutexes without being dead locked.
-	 */
-	mutex_lock(&dev->lock);
-	mutex_lock(&peer->lock);
+	dev = kzalloc(sizeof(struct nfcsim), GFP_KERNEL);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
 
-	nfcsim_set_polling_mode(dev);
+	INIT_DELAYED_WORK(&dev->send_work, nfcsim_send_wq);
+	INIT_WORK(&dev->recv_work, nfcsim_recv_wq);
 
-	if (dev->curr_polling_mode == NFCSIM_POLL_NONE) {
-		DEV_DBG(dev, "Not polling\n");
-		goto unlock;
+	dev->nfc_digital_dev =
+			nfc_digital_allocate_device(&nfcsim_digital_ops,
+						    NFC_PROTO_NFC_DEP_MASK,
+						    NFCSIM_CAPABILITIES,
+						    0, 0);
+	if (!dev->nfc_digital_dev) {
+		kfree(dev);
+		return ERR_PTR(-ENOMEM);
 	}
 
-	DEV_DBG(dev, "Polling as %s",
-		dev->curr_polling_mode == NFCSIM_POLL_INITIATOR ?
-		"initiator\n" : "target\n");
+	nfc_digital_set_drvdata(dev->nfc_digital_dev, dev);
 
-	if (dev->curr_polling_mode == NFCSIM_POLL_TARGET)
-		goto sched_work;
+	dev->link_in = link_in;
+	dev->link_out = link_out;
 
-	if (peer->curr_polling_mode == NFCSIM_POLL_TARGET) {
-		peer->polling_mode = NFCSIM_POLL_NONE;
-		dev->polling_mode = NFCSIM_POLL_NONE;
-
-		dev->initiator = 1;
-
-		nfcsim_target_found(dev);
+	rc = nfc_digital_register_device(dev->nfc_digital_dev);
+	if (rc) {
+		pr_err("Could not register digital device (%d)\n", rc);
+		nfc_digital_free_device(dev->nfc_digital_dev);
+		kfree(dev);
 
-		goto unlock;
+		return ERR_PTR(rc);
 	}
 
-sched_work:
-	/* This defines the delay for an initiator to check if the other device
-	 * is polling in target mode.
-	 * If the device starts in dual mode polling, it switches between
-	 * initiator and target at every round.
-	 * Because the wq is ordered and only 1 work item is executed at a time,
-	 * we'll always have one device polling as initiator and the other as
-	 * target at some point, even if both are started in dual mode.
-	 */
-	queue_delayed_work(wq, &dev->poll_work, msecs_to_jiffies(200));
+	nfcsim_debugfs_init_dev(dev);
 
-unlock:
-	mutex_unlock(&peer->lock);
-	mutex_unlock(&dev->lock);
+	return dev;
 }
 
-static struct nfcsim *nfcsim_init_dev(void)
+static void nfcsim_device_free(struct nfcsim *dev)
 {
-	struct nfcsim *dev;
-	int rc = -ENOMEM;
+	nfc_digital_unregister_device(dev->nfc_digital_dev);
 
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (dev == NULL)
-		return ERR_PTR(-ENOMEM);
+	dev->up = false;
 
-	mutex_init(&dev->lock);
+	nfcsim_link_shutdown(dev->link_in);
 
-	INIT_DELAYED_WORK(&dev->recv_work, nfcsim_wq_recv);
-	INIT_DELAYED_WORK(&dev->poll_work, nfcsim_wq_poll);
+	cancel_delayed_work_sync(&dev->send_work);
+	cancel_work_sync(&dev->recv_work);
 
-	dev->nfc_dev = nfc_allocate_device(&nfcsim_nfc_ops,
-					   NFC_PROTO_NFC_DEP_MASK,
-					   0, 0);
-	if (!dev->nfc_dev)
-		goto error;
+	nfc_digital_free_device(dev->nfc_digital_dev);
 
-	nfc_set_drvdata(dev->nfc_dev, dev);
-
-	rc = nfc_register_device(dev->nfc_dev);
-	if (rc)
-		goto free_nfc_dev;
-
-	dev->rx_delay = RX_DEFAULT_DELAY;
-	return dev;
-
-free_nfc_dev:
-	nfc_free_device(dev->nfc_dev);
-
-error:
 	kfree(dev);
-
-	return ERR_PTR(rc);
 }
 
-static void nfcsim_free_device(struct nfcsim *dev)
-{
-	nfc_unregister_device(dev->nfc_dev);
-
-	nfc_free_device(dev->nfc_dev);
-
-	kfree(dev);
-}
+static struct nfcsim *dev0;
+static struct nfcsim *dev1;
 
 static int __init nfcsim_init(void)
 {
+	struct nfcsim_link *link0, *link1;
 	int rc;
 
-	/* We need an ordered wq to ensure that poll_work items are executed
-	 * one at a time.
-	 */
-	wq = alloc_ordered_workqueue("nfcsim", 0);
-	if (!wq) {
+	link0 = nfcsim_link_new();
+	link1 = nfcsim_link_new();
+	if (!link0 || !link1) {
 		rc = -ENOMEM;
-		goto exit;
+		goto exit_err;
 	}
 
-	dev0 = nfcsim_init_dev();
+	nfcsim_debugfs_init();
+
+	dev0 = nfcsim_device_new(link0, link1);
 	if (IS_ERR(dev0)) {
 		rc = PTR_ERR(dev0);
-		goto exit;
+		goto exit_err;
 	}
 
-	dev1 = nfcsim_init_dev();
+	dev1 = nfcsim_device_new(link1, link0);
 	if (IS_ERR(dev1)) {
-		kfree(dev0);
+		nfcsim_device_free(dev0);
 
 		rc = PTR_ERR(dev1);
-		goto exit;
+		goto exit_err;
 	}
 
-	dev0->peer_dev = dev1;
-	dev1->peer_dev = dev0;
+	pr_info("nfcsim " NFCSIM_VERSION " initialized\n");
+
+	return 0;
 
-	pr_debug("NFCsim " NFCSIM_VERSION " initialized\n");
+exit_err:
+	pr_err("Failed to initialize nfcsim driver (%d)\n", rc);
 
-	rc = 0;
-exit:
-	if (rc)
-		pr_err("Failed to initialize nfcsim driver (%d)\n",
-		       rc);
+	nfcsim_link_free(link0);
+	nfcsim_link_free(link1);
 
 	return rc;
 }
 
 static void __exit nfcsim_exit(void)
 {
-	nfcsim_cleanup_dev(dev0, 1);
-	nfcsim_cleanup_dev(dev1, 1);
+	struct nfcsim_link *link0, *link1;
+
+	link0 = dev0->link_in;
+	link1 = dev0->link_out;
+
+	nfcsim_device_free(dev0);
+	nfcsim_device_free(dev1);
 
-	nfcsim_free_device(dev0);
-	nfcsim_free_device(dev1);
+	nfcsim_link_free(link0);
+	nfcsim_link_free(link1);
 
-	destroy_workqueue(wq);
+	nfcsim_debugfs_remove();
 }
 
 module_init(nfcsim_init);
diff --git a/drivers/nfc/nfcwilink.c b/drivers/nfc/nfcwilink.c
index f81e500e7650..3fbd18b8e473 100644
--- a/drivers/nfc/nfcwilink.c
+++ b/drivers/nfc/nfcwilink.c
@@ -94,7 +94,7 @@ struct nfcwilink {
 	struct nci_dev			*ndev;
 	unsigned long			flags;
 
-	char				st_register_cb_status;
+	int				st_register_cb_status;
 	long				(*st_write) (struct sk_buff *);
 
 	struct completion		completed;
@@ -320,7 +320,7 @@ exit:
 }
 
 /* Called by ST when registration is complete */
-static void nfcwilink_register_complete(void *priv_data, char data)
+static void nfcwilink_register_complete(void *priv_data, int data)
 {
 	struct nfcwilink *drv = priv_data;
 
diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index 8ca060324b6a..33ed78be2750 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -464,10 +464,8 @@ static int pn533_usb_probe(struct usb_interface *interface,
 		return -ENOMEM;
 
 	in_buf = kzalloc(in_buf_len, GFP_KERNEL);
-	if (!in_buf) {
-		rc = -ENOMEM;
-		goto out_free_phy;
-	}
+	if (!in_buf)
+		return -ENOMEM;
 
 	phy->udev = usb_get_dev(interface_to_usbdev(interface));
 	phy->interface = interface;
@@ -554,8 +552,7 @@ error:
 	usb_free_urb(phy->out_urb);
 	usb_put_dev(phy->udev);
 	kfree(in_buf);
-out_free_phy:
-	kfree(phy);
+
 	return rc;
 }
 
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index 87d509996704..2b2330b235e6 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -343,7 +343,26 @@ in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = {
 	},
 	[NFC_DIGITAL_FRAMING_NFCF_NFC_DEP] = {
 		/* nfc_digital_framing_nfcf */
-		{ PORT100_IN_PROT_END, 0 },
+		{ PORT100_IN_PROT_INITIAL_GUARD_TIME,     18 },
+		{ PORT100_IN_PROT_ADD_CRC,                 1 },
+		{ PORT100_IN_PROT_CHECK_CRC,               1 },
+		{ PORT100_IN_PROT_MULTI_CARD,              0 },
+		{ PORT100_IN_PROT_ADD_PARITY,              0 },
+		{ PORT100_IN_PROT_CHECK_PARITY,            0 },
+		{ PORT100_IN_PROT_BITWISE_AC_RECV_MODE,    0 },
+		{ PORT100_IN_PROT_VALID_BIT_NUMBER,        8 },
+		{ PORT100_IN_PROT_CRYPTO1,                 0 },
+		{ PORT100_IN_PROT_ADD_SOF,                 0 },
+		{ PORT100_IN_PROT_CHECK_SOF,               0 },
+		{ PORT100_IN_PROT_ADD_EOF,                 0 },
+		{ PORT100_IN_PROT_CHECK_EOF,               0 },
+		{ PORT100_IN_PROT_DEAF_TIME,               4 },
+		{ PORT100_IN_PROT_CRM,                     0 },
+		{ PORT100_IN_PROT_CRM_MIN_LEN,             0 },
+		{ PORT100_IN_PROT_T1_TAG_FRAME,            0 },
+		{ PORT100_IN_PROT_RFCA,                    0 },
+		{ PORT100_IN_PROT_GUARD_TIME_AT_INITIATOR, 6 },
+		{ PORT100_IN_PROT_END,                     0 },
 	},
 	[NFC_DIGITAL_FRAMING_NFC_DEP_ACTIVATED] = {
 		{ PORT100_IN_PROT_END, 0 },
@@ -437,6 +456,12 @@ struct port100 {
 	struct urb *out_urb;
 	struct urb *in_urb;
 
+	/* This mutex protects the out_urb and avoids to submit a new command
+	 * through port100_send_frame_async() while the previous one is being
+	 * canceled through port100_abort_cmd().
+	 */
+	struct mutex out_urb_lock;
+
 	struct work_struct cmd_complete_work;
 
 	u8 cmd_type;
@@ -445,6 +470,9 @@ struct port100 {
 	 * for any queuing/locking mechanism at driver level.
 	 */
 	struct port100_cmd *cmd;
+
+	bool cmd_cancel;
+	struct completion cmd_cancel_done;
 };
 
 struct port100_cmd {
@@ -699,10 +727,27 @@ static int port100_send_ack(struct port100 *dev)
 {
 	int rc;
 
+	mutex_lock(&dev->out_urb_lock);
+
+	init_completion(&dev->cmd_cancel_done);
+
+	usb_kill_urb(dev->out_urb);
+
 	dev->out_urb->transfer_buffer = ack_frame;
 	dev->out_urb->transfer_buffer_length = sizeof(ack_frame);
 	rc = usb_submit_urb(dev->out_urb, GFP_KERNEL);
 
+	/* Set the cmd_cancel flag only if the URB has been successfully
+	 * submitted. It will be reset by the out URB completion callback
+	 * port100_send_complete().
+	 */
+	dev->cmd_cancel = !rc;
+
+	mutex_unlock(&dev->out_urb_lock);
+
+	if (!rc)
+		wait_for_completion(&dev->cmd_cancel_done);
+
 	return rc;
 }
 
@@ -711,6 +756,16 @@ static int port100_send_frame_async(struct port100 *dev, struct sk_buff *out,
 {
 	int rc;
 
+	mutex_lock(&dev->out_urb_lock);
+
+	/* A command cancel frame as been sent through dev->out_urb. Don't try
+	 * to submit a new one.
+	 */
+	if (dev->cmd_cancel) {
+		rc = -EAGAIN;
+		goto exit;
+	}
+
 	dev->out_urb->transfer_buffer = out->data;
 	dev->out_urb->transfer_buffer_length = out->len;
 
@@ -722,16 +777,15 @@ static int port100_send_frame_async(struct port100 *dev, struct sk_buff *out,
 
 	rc = usb_submit_urb(dev->out_urb, GFP_KERNEL);
 	if (rc)
-		return rc;
+		goto exit;
 
 	rc = port100_submit_urb_for_ack(dev, GFP_KERNEL);
 	if (rc)
-		goto error;
+		usb_unlink_urb(dev->out_urb);
 
-	return 0;
+exit:
+	mutex_unlock(&dev->out_urb_lock);
 
-error:
-	usb_unlink_urb(dev->out_urb);
 	return rc;
 }
 
@@ -790,6 +844,12 @@ static int port100_send_cmd_async(struct port100 *dev, u8 cmd_code,
 			PORT100_FRAME_MAX_PAYLOAD_LEN +
 			PORT100_FRAME_TAIL_LEN;
 
+	if (dev->cmd) {
+		nfc_err(&dev->interface->dev,
+			"A command is still in process\n");
+		return -EBUSY;
+	}
+
 	resp = alloc_skb(resp_len, GFP_KERNEL);
 	if (!resp)
 		return -ENOMEM;
@@ -867,6 +927,11 @@ static void port100_send_complete(struct urb *urb)
 {
 	struct port100 *dev = urb->context;
 
+	if (dev->cmd_cancel) {
+		dev->cmd_cancel = false;
+		complete(&dev->cmd_cancel_done);
+	}
+
 	switch (urb->status) {
 	case 0:
 		break; /* success */
@@ -985,6 +1050,10 @@ static int port100_switch_rf(struct nfc_digital_dev *ddev, bool on)
 
 	*skb_put(skb, 1) = on ? 1 : 0;
 
+	/* Cancel the last command if the device is being switched off */
+	if (!on)
+		port100_abort_cmd(ddev);
+
 	resp = port100_send_cmd_sync(dev, PORT100_CMD_SWITCH_RF, skb);
 
 	if (IS_ERR(resp))
@@ -1430,6 +1499,7 @@ static int port100_probe(struct usb_interface *interface,
 	if (!dev)
 		return -ENOMEM;
 
+	mutex_init(&dev->out_urb_lock);
 	dev->udev = usb_get_dev(interface_to_usbdev(interface));
 	dev->interface = interface;
 	usb_set_intfdata(interface, dev);
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index 10842b7051b3..26c9dbbccb0c 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -1048,6 +1048,10 @@ static int trf7970a_init(struct trf7970a *trf)
 	if (ret)
 		goto err_out;
 
+	ret = trf7970a_write(trf, TRF7970A_NFC_TARGET_LEVEL, 0);
+	if (ret)
+		goto err_out;
+
 	usleep_range(1000, 2000);
 
 	trf->chip_status_ctrl &= ~TRF7970A_CHIP_STATUS_RF_ON;
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 495e06d9f7e7..7e262ef06ede 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -287,14 +287,13 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 		return -ENOMEM;
 	}
 
-	disk->driverfs_dev	= dev;
 	disk->first_minor	= 0;
 	disk->fops		= &nd_blk_fops;
 	disk->queue		= q;
 	disk->flags		= GENHD_FL_EXT_DEVT;
 	nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
 	set_capacity(disk, 0);
-	add_disk(disk);
+	device_add_disk(dev, disk);
 
 	if (nsblk_meta_size(nsblk)) {
 		int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 68a7c3c1eed9..9dce03f420eb 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1243,7 +1243,6 @@ static int btt_blk_init(struct btt *btt)
 	}
 
 	nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
-	btt->btt_disk->driverfs_dev = &btt->nd_btt->dev;
 	btt->btt_disk->first_minor = 0;
 	btt->btt_disk->fops = &btt_fops;
 	btt->btt_disk->private_data = btt;
@@ -1258,7 +1257,7 @@ static int btt_blk_init(struct btt *btt)
 	btt->btt_queue->queuedata = btt;
 
 	set_capacity(btt->btt_disk, 0);
-	add_disk(btt->btt_disk);
+	device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
 	if (btt_meta_size(btt)) {
 		int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
 
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index f085f8bceae8..5e4e5c772ea5 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -312,7 +312,7 @@ EXPORT_SYMBOL(__nd_driver_register);
 
 int nvdimm_revalidate_disk(struct gendisk *disk)
 {
-	struct device *dev = disk->driverfs_dev;
+	struct device *dev = disk_to_dev(disk)->parent;
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	const char *pol = nd_region->ro ? "only" : "write";
 
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index f7718ec685fa..cea8350fbc7e 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -344,6 +344,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
 	u64 checksum, offset;
+	unsigned long align;
+	enum nd_pfn_mode mode;
 	struct nd_namespace_io *nsio;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
@@ -386,22 +388,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -ENXIO;
 	}
 
+	align = le32_to_cpu(pfn_sb->align);
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	if (align == 0)
+		align = 1UL << ilog2(offset);
+	mode = le32_to_cpu(pfn_sb->mode);
+
 	if (!nd_pfn->uuid) {
-		/* from probe we allocate */
+		/*
+		 * When probing a namepace via nd_pfn_probe() the uuid
+		 * is NULL (see: nd_pfn_devinit()) we init settings from
+		 * pfn_sb
+		 */
 		nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
 		if (!nd_pfn->uuid)
 			return -ENOMEM;
+		nd_pfn->align = align;
+		nd_pfn->mode = mode;
 	} else {
-		/* from init we validate */
+		/*
+		 * When probing a pfn / dax instance we validate the
+		 * live settings against the pfn_sb
+		 */
 		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
 			return -ENODEV;
+
+		/*
+		 * If the uuid validates, but other settings mismatch
+		 * return EINVAL because userspace has managed to change
+		 * the configuration without specifying new
+		 * identification.
+		 */
+		if (nd_pfn->align != align || nd_pfn->mode != mode) {
+			dev_err(&nd_pfn->dev,
+					"init failed, settings mismatch\n");
+			dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
+					nd_pfn->align, align, nd_pfn->mode,
+					mode);
+			return -EINVAL;
+		}
 	}
 
-	if (nd_pfn->align == 0)
-		nd_pfn->align = le32_to_cpu(pfn_sb->align);
-	if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
+	if (align > nvdimm_namespace_capacity(ndns)) {
 		dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
-				nd_pfn->align, nvdimm_namespace_capacity(ndns));
+				align, nvdimm_namespace_capacity(ndns));
 		return -EINVAL;
 	}
 
@@ -411,7 +441,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 	 * namespace has changed since the pfn superblock was
 	 * established.
 	 */
-	offset = le64_to_cpu(pfn_sb->dataoff);
 	nsio = to_nd_namespace_io(&ndns->dev);
 	if (offset >= resource_size(&nsio->res)) {
 		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
@@ -419,10 +448,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -EBUSY;
 	}
 
-	if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
+	if ((align && !IS_ALIGNED(offset, align))
 			|| !IS_ALIGNED(offset, PAGE_SIZE)) {
-		dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
-				offset);
+		dev_err(&nd_pfn->dev,
+				"bad offset: %#llx dax disabled align: %#lx\n",
+				offset, align);
 		return -ENXIO;
 	}
 
@@ -502,7 +532,6 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
 	res->start += start_pad;
 	res->end -= end_trunc;
 
-	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
 	if (nd_pfn->mode == PFN_MODE_RAM) {
 		if (offset < SZ_8K)
 			return ERR_PTR(-EINVAL);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 608fc4464574..36cb39047d5b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -283,6 +283,7 @@ static int pmem_attach_disk(struct device *dev,
 	blk_queue_max_hw_sectors(q, UINT_MAX);
 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
 	q->queuedata = pmem;
 
 	disk = alloc_disk_node(0, nid);
@@ -297,14 +298,13 @@ static int pmem_attach_disk(struct device *dev,
 	disk->queue		= q;
 	disk->flags		= GENHD_FL_EXT_DEVT;
 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
-	disk->driverfs_dev = dev;
 	set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
 			/ 512);
 	if (devm_init_badblocks(dev, &pmem->bb))
 		return -ENOMEM;
 	nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
 	disk->bb = &pmem->bb;
-	add_disk(disk);
+	device_add_disk(dev, disk);
 	revalidate_disk(disk);
 
 	return 0;
diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig
index a39d9431eaec..b7c78a5b1f7a 100644
--- a/drivers/nvme/Kconfig
+++ b/drivers/nvme/Kconfig
@@ -1 +1,2 @@
 source "drivers/nvme/host/Kconfig"
+source "drivers/nvme/target/Kconfig"
diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile
index 9421e829d2a9..0096a7fd1431 100644
--- a/drivers/nvme/Makefile
+++ b/drivers/nvme/Makefile
@@ -1,2 +1,3 @@
 
 obj-y		+= host/
+obj-y		+= target/
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index d296fc3ae06e..db39d53cdfb9 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -24,3 +24,22 @@ config BLK_DEV_NVME_SCSI
 	  to say N here, unless you run a distro that abuses the SCSI
 	  emulation to provide stable device names for mount by id, like
 	  some OpenSuSE and SLES versions.
+
+config NVME_FABRICS
+	tristate
+
+config NVME_RDMA
+	tristate "NVM Express over Fabrics RDMA host driver"
+	depends on INFINIBAND
+	depends on BLK_DEV_NVME
+	select NVME_FABRICS
+	select SG_POOL
+	help
+	  This provides support for the NVMe over Fabrics protocol using
+	  the RDMA (Infiniband, RoCE, iWarp) transport.  This allows you
+	  to use remote block devices exported using the NVMe protocol set.
+
+	  To configure a NVMe over Fabrics controller use the nvme-cli tool
+	  from https://github.com/linux-nvme/nvme-cli.
+
+	  If unsure, say N.
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 9a3ca892b4a7..47abcec23514 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,8 +1,14 @@
 obj-$(CONFIG_NVME_CORE)			+= nvme-core.o
 obj-$(CONFIG_BLK_DEV_NVME)		+= nvme.o
+obj-$(CONFIG_NVME_FABRICS)		+= nvme-fabrics.o
+obj-$(CONFIG_NVME_RDMA)			+= nvme-rdma.o
 
 nvme-core-y				:= core.o
 nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI)	+= scsi.o
 nvme-core-$(CONFIG_NVM)			+= lightnvm.o
 
 nvme-y					+= pci.o
+
+nvme-fabrics-y				+= fabrics.o
+
+nvme-rdma-y				+= rdma.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1a51584a382b..7ff2e820bbf4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -30,6 +30,7 @@
 #include <asm/unaligned.h>
 
 #include "nvme.h"
+#include "fabrics.h"
 
 #define NVME_MINORS		(1U << MINORBITS)
 
@@ -47,8 +48,10 @@ unsigned char shutdown_timeout = 5;
 module_param(shutdown_timeout, byte, 0644);
 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
 
-static int nvme_major;
-module_param(nvme_major, int, 0);
+unsigned int nvme_max_retries = 5;
+module_param_named(max_retries, nvme_max_retries, uint, 0644);
+MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
+EXPORT_SYMBOL_GPL(nvme_max_retries);
 
 static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
@@ -58,6 +61,23 @@ static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
+void nvme_cancel_request(struct request *req, void *data, bool reserved)
+{
+	int status;
+
+	if (!blk_mq_request_started(req))
+		return;
+
+	dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
+				"Cancelling I/O %d", req->tag);
+
+	status = NVME_SC_ABORT_REQ;
+	if (blk_queue_dying(req->q))
+		status |= NVME_SC_DNR;
+	blk_mq_complete_request(req, status);
+}
+EXPORT_SYMBOL_GPL(nvme_cancel_request);
+
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		enum nvme_ctrl_state new_state)
 {
@@ -68,7 +88,9 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 	switch (new_state) {
 	case NVME_CTRL_LIVE:
 		switch (old_state) {
+		case NVME_CTRL_NEW:
 		case NVME_CTRL_RESETTING:
+		case NVME_CTRL_RECONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -78,6 +100,16 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 	case NVME_CTRL_RESETTING:
 		switch (old_state) {
 		case NVME_CTRL_NEW:
+		case NVME_CTRL_LIVE:
+		case NVME_CTRL_RECONNECTING:
+			changed = true;
+			/* FALLTHRU */
+		default:
+			break;
+		}
+		break;
+	case NVME_CTRL_RECONNECTING:
+		switch (old_state) {
 		case NVME_CTRL_LIVE:
 			changed = true;
 			/* FALLTHRU */
@@ -89,6 +121,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		switch (old_state) {
 		case NVME_CTRL_LIVE:
 		case NVME_CTRL_RESETTING:
+		case NVME_CTRL_RECONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -174,21 +207,21 @@ void nvme_requeue_req(struct request *req)
 EXPORT_SYMBOL_GPL(nvme_requeue_req);
 
 struct request *nvme_alloc_request(struct request_queue *q,
-		struct nvme_command *cmd, unsigned int flags)
+		struct nvme_command *cmd, unsigned int flags, int qid)
 {
-	bool write = cmd->common.opcode & 1;
 	struct request *req;
 
-	req = blk_mq_alloc_request(q, write, flags);
+	if (qid == NVME_QID_ANY) {
+		req = blk_mq_alloc_request(q, nvme_is_write(cmd), flags);
+	} else {
+		req = blk_mq_alloc_request_hctx(q, nvme_is_write(cmd), flags,
+				qid ? qid - 1 : 0);
+	}
 	if (IS_ERR(req))
 		return req;
 
 	req->cmd_type = REQ_TYPE_DRV_PRIV;
 	req->cmd_flags |= REQ_FAILFAST_DRIVER;
-	req->__data_len = 0;
-	req->__sector = (sector_t) -1;
-	req->bio = req->biotail = NULL;
-
 	req->cmd = (unsigned char *)cmd;
 	req->cmd_len = sizeof(struct nvme_command);
 
@@ -290,9 +323,9 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 
 	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
 		memcpy(cmd, req->cmd, sizeof(*cmd));
-	else if (req->cmd_flags & REQ_FLUSH)
+	else if (req_op(req) == REQ_OP_FLUSH)
 		nvme_setup_flush(ns, cmd);
-	else if (req->cmd_flags & REQ_DISCARD)
+	else if (req_op(req) == REQ_OP_DISCARD)
 		ret = nvme_setup_discard(ns, req, cmd);
 	else
 		nvme_setup_rw(ns, req, cmd);
@@ -307,12 +340,12 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd);
  */
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		struct nvme_completion *cqe, void *buffer, unsigned bufflen,
-		unsigned timeout)
+		unsigned timeout, int qid, int at_head, int flags)
 {
 	struct request *req;
 	int ret;
 
-	req = nvme_alloc_request(q, cmd, 0);
+	req = nvme_alloc_request(q, cmd, flags, qid);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -325,17 +358,19 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 			goto out;
 	}
 
-	blk_execute_rq(req->q, NULL, req, 0);
+	blk_execute_rq(req->q, NULL, req, at_head);
 	ret = req->errors;
  out:
 	blk_mq_free_request(req);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(__nvme_submit_sync_cmd);
 
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buffer, unsigned bufflen)
 {
-	return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0);
+	return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0,
+			NVME_QID_ANY, 0, 0);
 }
 EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
 
@@ -344,7 +379,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
 		u32 *result, unsigned timeout)
 {
-	bool write = cmd->common.opcode & 1;
+	bool write = nvme_is_write(cmd);
 	struct nvme_completion cqe;
 	struct nvme_ns *ns = q->queuedata;
 	struct gendisk *disk = ns ? ns->disk : NULL;
@@ -353,7 +388,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 	void *meta = NULL;
 	int ret;
 
-	req = nvme_alloc_request(q, cmd, 0);
+	req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -439,6 +474,74 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 			result, timeout);
 }
 
+static void nvme_keep_alive_end_io(struct request *rq, int error)
+{
+	struct nvme_ctrl *ctrl = rq->end_io_data;
+
+	blk_mq_free_request(rq);
+
+	if (error) {
+		dev_err(ctrl->device,
+			"failed nvme_keep_alive_end_io error=%d\n", error);
+		return;
+	}
+
+	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+}
+
+static int nvme_keep_alive(struct nvme_ctrl *ctrl)
+{
+	struct nvme_command c;
+	struct request *rq;
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_keep_alive;
+
+	rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
+			NVME_QID_ANY);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	rq->timeout = ctrl->kato * HZ;
+	rq->end_io_data = ctrl;
+
+	blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io);
+
+	return 0;
+}
+
+static void nvme_keep_alive_work(struct work_struct *work)
+{
+	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+			struct nvme_ctrl, ka_work);
+
+	if (nvme_keep_alive(ctrl)) {
+		/* allocation failure, reset the controller */
+		dev_err(ctrl->device, "keep-alive failed\n");
+		ctrl->ops->reset_ctrl(ctrl);
+		return;
+	}
+}
+
+void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
+{
+	if (unlikely(ctrl->kato == 0))
+		return;
+
+	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+}
+EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
+
+void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
+{
+	if (unlikely(ctrl->kato == 0))
+		return;
+
+	cancel_delayed_work_sync(&ctrl->ka_work);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
+
 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
@@ -500,10 +603,11 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 	memset(&c, 0, sizeof(c));
 	c.features.opcode = nvme_admin_get_features;
 	c.features.nsid = cpu_to_le32(nsid);
-	c.features.prp1 = cpu_to_le64(dma_addr);
+	c.features.dptr.prp1 = cpu_to_le64(dma_addr);
 	c.features.fid = cpu_to_le32(fid);
 
-	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
+	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0,
+			NVME_QID_ANY, 0, 0);
 	if (ret >= 0)
 		*result = le32_to_cpu(cqe.result);
 	return ret;
@@ -518,11 +622,12 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 
 	memset(&c, 0, sizeof(c));
 	c.features.opcode = nvme_admin_set_features;
-	c.features.prp1 = cpu_to_le64(dma_addr);
+	c.features.dptr.prp1 = cpu_to_le64(dma_addr);
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
-	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
+	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0,
+			NVME_QID_ANY, 0, 0);
 	if (ret >= 0)
 		*result = le32_to_cpu(cqe.result);
 	return ret;
@@ -558,11 +663,22 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 
 	status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
 			&result);
-	if (status)
+	if (status < 0)
 		return status;
 
-	nr_io_queues = min(result & 0xffff, result >> 16) + 1;
-	*count = min(*count, nr_io_queues);
+	/*
+	 * Degraded controllers might return an error when setting the queue
+	 * count.  We still want to be able to bring them online and offer
+	 * access to the admin queue, as that might be only way to fix them up.
+	 */
+	if (status > 0) {
+		dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+		*count = 0;
+	} else {
+		nr_io_queues = min(result & 0xffff, result >> 16) + 1;
+		*count = min(*count, nr_io_queues);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvme_set_queue_count);
@@ -726,6 +842,7 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 {
 	struct blk_integrity integrity;
 
+	memset(&integrity, 0, sizeof(integrity));
 	switch (ns->pi_type) {
 	case NVME_NS_DPS_PI_TYPE3:
 		integrity.profile = &t10_pi_type3_crc;
@@ -764,7 +881,7 @@ static void nvme_config_discard(struct nvme_ns *ns)
 
 	ns->queue->limits.discard_alignment = logical_block_size;
 	ns->queue->limits.discard_granularity = logical_block_size;
-	blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
+	blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
 }
 
@@ -991,6 +1108,15 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
 	ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
 	if (ret)
 		return ret;
+
+	/* Checking for ctrl->tagset is a trick to avoid sleeping on module
+	 * load, since we only need the quirk on reset_controller. Notice
+	 * that the HGST device needs this delay only in firmware activation
+	 * procedure; unfortunately we have no (easy) way to verify this.
+	 */
+	if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset)
+		msleep(NVME_QUIRK_DELAY_AMOUNT);
+
 	return nvme_wait_ready(ctrl, cap, false);
 }
 EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
@@ -1088,6 +1214,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	struct nvme_id_ctrl *id;
 	u64 cap;
 	int ret, page_shift;
+	u32 max_hw_sectors;
 
 	ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
 	if (ret) {
@@ -1120,9 +1247,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	memcpy(ctrl->model, id->mn, sizeof(id->mn));
 	memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
 	if (id->mdts)
-		ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
+		max_hw_sectors = 1 << (id->mdts + page_shift - 9);
 	else
-		ctrl->max_hw_sectors = UINT_MAX;
+		max_hw_sectors = UINT_MAX;
+	ctrl->max_hw_sectors =
+		min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
 
 	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
 		unsigned int max_hw_sectors;
@@ -1138,9 +1267,33 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	}
 
 	nvme_set_queue_limits(ctrl, ctrl->admin_q);
+	ctrl->sgls = le32_to_cpu(id->sgls);
+	ctrl->kas = le16_to_cpu(id->kas);
+
+	if (ctrl->ops->is_fabrics) {
+		ctrl->icdoff = le16_to_cpu(id->icdoff);
+		ctrl->ioccsz = le32_to_cpu(id->ioccsz);
+		ctrl->iorcsz = le32_to_cpu(id->iorcsz);
+		ctrl->maxcmd = le16_to_cpu(id->maxcmd);
+
+		/*
+		 * In fabrics we need to verify the cntlid matches the
+		 * admin connect
+		 */
+		if (ctrl->cntlid != le16_to_cpu(id->cntlid))
+			ret = -EINVAL;
+
+		if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
+			dev_err(ctrl->dev,
+				"keep-alive support is mandatory for fabrics\n");
+			ret = -EINVAL;
+		}
+	} else {
+		ctrl->cntlid = le16_to_cpu(id->cntlid);
+	}
 
 	kfree(id);
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_init_identify);
 
@@ -1322,7 +1475,7 @@ static struct attribute *nvme_ns_attrs[] = {
 	NULL,
 };
 
-static umode_t nvme_attrs_are_visible(struct kobject *kobj,
+static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
 		struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -1341,7 +1494,7 @@ static umode_t nvme_attrs_are_visible(struct kobject *kobj,
 
 static const struct attribute_group nvme_ns_attr_group = {
 	.attrs		= nvme_ns_attrs,
-	.is_visible	= nvme_attrs_are_visible,
+	.is_visible	= nvme_ns_attrs_are_visible,
 };
 
 #define nvme_show_str_function(field)						\
@@ -1367,6 +1520,49 @@ nvme_show_str_function(serial);
 nvme_show_str_function(firmware_rev);
 nvme_show_int_function(cntlid);
 
+static ssize_t nvme_sysfs_delete(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	if (device_remove_file_self(dev, attr))
+		ctrl->ops->delete_ctrl(ctrl);
+	return count;
+}
+static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
+
+static ssize_t nvme_sysfs_show_transport(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->ops->name);
+}
+static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
+
+static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			ctrl->ops->get_subsysnqn(ctrl));
+}
+static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
+
+static ssize_t nvme_sysfs_show_address(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	return ctrl->ops->get_address(ctrl, buf, PAGE_SIZE);
+}
+static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL);
+
 static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_reset_controller.attr,
 	&dev_attr_rescan_controller.attr,
@@ -1374,11 +1570,38 @@ static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_serial.attr,
 	&dev_attr_firmware_rev.attr,
 	&dev_attr_cntlid.attr,
+	&dev_attr_delete_controller.attr,
+	&dev_attr_transport.attr,
+	&dev_attr_subsysnqn.attr,
+	&dev_attr_address.attr,
 	NULL
 };
 
+#define CHECK_ATTR(ctrl, a, name)		\
+	if ((a) == &dev_attr_##name.attr &&	\
+	    !(ctrl)->ops->get_##name)		\
+		return 0
+
+static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
+		struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	if (a == &dev_attr_delete_controller.attr) {
+		if (!ctrl->ops->delete_ctrl)
+			return 0;
+	}
+
+	CHECK_ATTR(ctrl, a, subsysnqn);
+	CHECK_ATTR(ctrl, a, address);
+
+	return a->mode;
+}
+
 static struct attribute_group nvme_dev_attrs_group = {
-	.attrs = nvme_dev_attrs,
+	.attrs		= nvme_dev_attrs,
+	.is_visible	= nvme_dev_attrs_are_visible,
 };
 
 static const struct attribute_group *nvme_dev_attr_groups[] = {
@@ -1394,19 +1617,22 @@ static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
 	return nsa->ns_id - nsb->ns_id;
 }
 
-static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
-	struct nvme_ns *ns;
-
-	lockdep_assert_held(&ctrl->namespaces_mutex);
+	struct nvme_ns *ns, *ret = NULL;
 
+	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
-		if (ns->ns_id == nsid)
-			return ns;
+		if (ns->ns_id == nsid) {
+			kref_get(&ns->kref);
+			ret = ns;
+			break;
+		}
 		if (ns->ns_id > nsid)
 			break;
 	}
-	return NULL;
+	mutex_unlock(&ctrl->namespaces_mutex);
+	return ret;
 }
 
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
@@ -1415,8 +1641,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	struct gendisk *disk;
 	int node = dev_to_node(ctrl->dev);
 
-	lockdep_assert_held(&ctrl->namespaces_mutex);
-
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
 		return;
@@ -1445,24 +1669,24 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	nvme_set_queue_limits(ctrl, ns->queue);
 
-	disk->major = nvme_major;
-	disk->first_minor = 0;
 	disk->fops = &nvme_fops;
 	disk->private_data = ns;
 	disk->queue = ns->queue;
-	disk->driverfs_dev = ctrl->device;
 	disk->flags = GENHD_FL_EXT_DEVT;
 	sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
 
 	if (nvme_revalidate_disk(ns->disk))
 		goto out_free_disk;
 
-	list_add_tail_rcu(&ns->list, &ctrl->namespaces);
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_add_tail(&ns->list, &ctrl->namespaces);
+	mutex_unlock(&ctrl->namespaces_mutex);
+
 	kref_get(&ctrl->kref);
 	if (ns->type == NVME_NS_LIGHTNVM)
 		return;
 
-	add_disk(ns->disk);
+	device_add_disk(ctrl->device, ns->disk);
 	if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_attr_group))
 		pr_warn("%s: failed to create sysfs group for identification\n",
@@ -1480,8 +1704,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 static void nvme_ns_remove(struct nvme_ns *ns)
 {
-	lockdep_assert_held(&ns->ctrl->namespaces_mutex);
-
 	if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
 		return;
 
@@ -1494,8 +1716,11 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
+
+	mutex_lock(&ns->ctrl->namespaces_mutex);
 	list_del_init(&ns->list);
-	synchronize_rcu();
+	mutex_unlock(&ns->ctrl->namespaces_mutex);
+
 	nvme_put_ns(ns);
 }
 
@@ -1503,14 +1728,26 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns;
 
-	ns = nvme_find_ns(ctrl, nsid);
+	ns = nvme_find_get_ns(ctrl, nsid);
 	if (ns) {
 		if (revalidate_disk(ns->disk))
 			nvme_ns_remove(ns);
+		nvme_put_ns(ns);
 	} else
 		nvme_alloc_ns(ctrl, nsid);
 }
 
+static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
+					unsigned nsid)
+{
+	struct nvme_ns *ns, *next;
+
+	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
+		if (ns->ns_id > nsid)
+			nvme_ns_remove(ns);
+	}
+}
+
 static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
 {
 	struct nvme_ns *ns;
@@ -1525,7 +1762,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
 	for (i = 0; i < num_lists; i++) {
 		ret = nvme_identify_ns_list(ctrl, prev, ns_list);
 		if (ret)
-			goto out;
+			goto free;
 
 		for (j = 0; j < min(nn, 1024U); j++) {
 			nsid = le32_to_cpu(ns_list[j]);
@@ -1535,32 +1772,30 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
 			nvme_validate_ns(ctrl, nsid);
 
 			while (++prev < nsid) {
-				ns = nvme_find_ns(ctrl, prev);
-				if (ns)
+				ns = nvme_find_get_ns(ctrl, prev);
+				if (ns) {
 					nvme_ns_remove(ns);
+					nvme_put_ns(ns);
+				}
 			}
 		}
 		nn -= j;
 	}
  out:
+	nvme_remove_invalid_namespaces(ctrl, prev);
+ free:
 	kfree(ns_list);
 	return ret;
 }
 
 static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
 {
-	struct nvme_ns *ns, *next;
 	unsigned i;
 
-	lockdep_assert_held(&ctrl->namespaces_mutex);
-
 	for (i = 1; i <= nn; i++)
 		nvme_validate_ns(ctrl, i);
 
-	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
-		if (ns->ns_id > nn)
-			nvme_ns_remove(ns);
-	}
+	nvme_remove_invalid_namespaces(ctrl, nn);
 }
 
 static void nvme_scan_work(struct work_struct *work)
@@ -1576,7 +1811,6 @@ static void nvme_scan_work(struct work_struct *work)
 	if (nvme_identify_ctrl(ctrl, &id))
 		return;
 
-	mutex_lock(&ctrl->namespaces_mutex);
 	nn = le32_to_cpu(id->nn);
 	if (ctrl->vs >= NVME_VS(1, 1) &&
 	    !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
@@ -1585,6 +1819,7 @@ static void nvme_scan_work(struct work_struct *work)
 	}
 	nvme_scan_ns_sequential(ctrl, nn);
  done:
+	mutex_lock(&ctrl->namespaces_mutex);
 	list_sort(NULL, &ctrl->namespaces, ns_cmp);
 	mutex_unlock(&ctrl->namespaces_mutex);
 	kfree(id);
@@ -1604,6 +1839,11 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_queue_scan);
 
+/*
+ * This function iterates the namespace list unlocked to allow recovery from
+ * controller failure. It is up to the caller to ensure the namespace list is
+ * not modified by scan work while this function is executing.
+ */
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns, *next;
@@ -1617,10 +1857,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	if (ctrl->state == NVME_CTRL_DEAD)
 		nvme_kill_queues(ctrl);
 
-	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
 		nvme_ns_remove(ns);
-	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
 
@@ -1791,11 +2029,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
-		if (!kref_get_unless_zero(&ns->kref))
-			continue;
-
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*
 		 * Revalidating a dead namespace sets capacity to 0. This will
 		 * end buffered writers dirtying pages that can't be synced.
@@ -1806,10 +2041,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 		blk_set_queue_dying(ns->queue);
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_mq_start_stopped_hw_queues(ns->queue, true);
-
-		nvme_put_ns(ns);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_kill_queues);
 
@@ -1817,8 +2050,8 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		spin_lock_irq(ns->queue->queue_lock);
 		queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
 		spin_unlock_irq(ns->queue->queue_lock);
@@ -1826,7 +2059,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
 		blk_mq_cancel_requeue_work(ns->queue);
 		blk_mq_stop_hw_queues(ns->queue);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_stop_queues);
 
@@ -1834,13 +2067,13 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
 		blk_mq_start_stopped_hw_queues(ns->queue, true);
 		blk_mq_kick_requeue_list(ns->queue);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
@@ -1848,16 +2081,10 @@ int __init nvme_core_init(void)
 {
 	int result;
 
-	result = register_blkdev(nvme_major, "nvme");
-	if (result < 0)
-		return result;
-	else if (result > 0)
-		nvme_major = result;
-
 	result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
 							&nvme_dev_fops);
 	if (result < 0)
-		goto unregister_blkdev;
+		return result;
 	else if (result > 0)
 		nvme_char_major = result;
 
@@ -1871,8 +2098,6 @@ int __init nvme_core_init(void)
 
  unregister_chrdev:
 	__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
- unregister_blkdev:
-	unregister_blkdev(nvme_major, "nvme");
 	return result;
 }
 
@@ -1880,7 +2105,6 @@ void nvme_core_exit(void)
 {
 	class_destroy(nvme_class);
 	__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
-	unregister_blkdev(nvme_major, "nvme");
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
new file mode 100644
index 000000000000..dc996761042f
--- /dev/null
+++ b/drivers/nvme/host/fabrics.c
@@ -0,0 +1,952 @@
+/*
+ * NVMe over Fabrics common host code.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include "nvme.h"
+#include "fabrics.h"
+
+static LIST_HEAD(nvmf_transports);
+static DEFINE_MUTEX(nvmf_transports_mutex);
+
+static LIST_HEAD(nvmf_hosts);
+static DEFINE_MUTEX(nvmf_hosts_mutex);
+
+static struct nvmf_host *nvmf_default_host;
+
+static struct nvmf_host *__nvmf_host_find(const char *hostnqn)
+{
+	struct nvmf_host *host;
+
+	list_for_each_entry(host, &nvmf_hosts, list) {
+		if (!strcmp(host->nqn, hostnqn))
+			return host;
+	}
+
+	return NULL;
+}
+
+static struct nvmf_host *nvmf_host_add(const char *hostnqn)
+{
+	struct nvmf_host *host;
+
+	mutex_lock(&nvmf_hosts_mutex);
+	host = __nvmf_host_find(hostnqn);
+	if (host)
+		goto out_unlock;
+
+	host = kmalloc(sizeof(*host), GFP_KERNEL);
+	if (!host)
+		goto out_unlock;
+
+	kref_init(&host->ref);
+	memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
+	uuid_le_gen(&host->id);
+
+	list_add_tail(&host->list, &nvmf_hosts);
+out_unlock:
+	mutex_unlock(&nvmf_hosts_mutex);
+	return host;
+}
+
+static struct nvmf_host *nvmf_host_default(void)
+{
+	struct nvmf_host *host;
+
+	host = kmalloc(sizeof(*host), GFP_KERNEL);
+	if (!host)
+		return NULL;
+
+	kref_init(&host->ref);
+	uuid_le_gen(&host->id);
+	snprintf(host->nqn, NVMF_NQN_SIZE,
+		"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUl", &host->id);
+
+	mutex_lock(&nvmf_hosts_mutex);
+	list_add_tail(&host->list, &nvmf_hosts);
+	mutex_unlock(&nvmf_hosts_mutex);
+
+	return host;
+}
+
+static void nvmf_host_destroy(struct kref *ref)
+{
+	struct nvmf_host *host = container_of(ref, struct nvmf_host, ref);
+
+	mutex_lock(&nvmf_hosts_mutex);
+	list_del(&host->list);
+	mutex_unlock(&nvmf_hosts_mutex);
+
+	kfree(host);
+}
+
+static void nvmf_host_put(struct nvmf_host *host)
+{
+	if (host)
+		kref_put(&host->ref, nvmf_host_destroy);
+}
+
+/**
+ * nvmf_get_address() -  Get address/port
+ * @ctrl:	Host NVMe controller instance which we got the address
+ * @buf:	OUTPUT parameter that will contain the address/port
+ * @size:	buffer size
+ */
+int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
+{
+	return snprintf(buf, size, "traddr=%s,trsvcid=%s\n",
+			ctrl->opts->traddr, ctrl->opts->trsvcid);
+}
+EXPORT_SYMBOL_GPL(nvmf_get_address);
+
+/**
+ * nvmf_get_subsysnqn() - Get subsystem NQN
+ * @ctrl:	Host NVMe controller instance which we got the NQN
+ */
+const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
+{
+	return ctrl->opts->subsysnqn;
+}
+EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
+
+/**
+ * nvmf_reg_read32() -  NVMe Fabrics "Property Get" API function.
+ * @ctrl:	Host NVMe controller instance maintaining the admin
+ *		queue used to submit the property read command to
+ *		the allocated NVMe controller resource on the target system.
+ * @off:	Starting offset value of the targeted property
+ *		register (see the fabrics section of the NVMe standard).
+ * @val:	OUTPUT parameter that will contain the value of
+ *		the property after a successful read.
+ *
+ * Used by the host system to retrieve a 32-bit capsule property value
+ * from an NVMe controller on the target system.
+ *
+ * ("Capsule property" is an "PCIe register concept" applied to the
+ * NVMe fabrics space.)
+ *
+ * Return:
+ *	0: successful read
+ *	> 0: NVMe error status code
+ *	< 0: Linux errno error code
+ */
+int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+{
+	struct nvme_command cmd;
+	struct nvme_completion cqe;
+	int ret;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.prop_get.opcode = nvme_fabrics_command;
+	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
+	cmd.prop_get.offset = cpu_to_le32(off);
+
+	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, NULL, 0, 0,
+			NVME_QID_ANY, 0, 0);
+
+	if (ret >= 0)
+		*val = le64_to_cpu(cqe.result64);
+	if (unlikely(ret != 0))
+		dev_err(ctrl->device,
+			"Property Get error: %d, offset %#x\n",
+			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nvmf_reg_read32);
+
+/**
+ * nvmf_reg_read64() -  NVMe Fabrics "Property Get" API function.
+ * @ctrl:	Host NVMe controller instance maintaining the admin
+ *		queue used to submit the property read command to
+ *		the allocated controller resource on the target system.
+ * @off:	Starting offset value of the targeted property
+ *		register (see the fabrics section of the NVMe standard).
+ * @val:	OUTPUT parameter that will contain the value of
+ *		the property after a successful read.
+ *
+ * Used by the host system to retrieve a 64-bit capsule property value
+ * from an NVMe controller on the target system.
+ *
+ * ("Capsule property" is an "PCIe register concept" applied to the
+ * NVMe fabrics space.)
+ *
+ * Return:
+ *	0: successful read
+ *	> 0: NVMe error status code
+ *	< 0: Linux errno error code
+ */
+int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+{
+	struct nvme_command cmd;
+	struct nvme_completion cqe;
+	int ret;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.prop_get.opcode = nvme_fabrics_command;
+	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
+	cmd.prop_get.attrib = 1;
+	cmd.prop_get.offset = cpu_to_le32(off);
+
+	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, NULL, 0, 0,
+			NVME_QID_ANY, 0, 0);
+
+	if (ret >= 0)
+		*val = le64_to_cpu(cqe.result64);
+	if (unlikely(ret != 0))
+		dev_err(ctrl->device,
+			"Property Get error: %d, offset %#x\n",
+			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nvmf_reg_read64);
+
+/**
+ * nvmf_reg_write32() -  NVMe Fabrics "Property Write" API function.
+ * @ctrl:	Host NVMe controller instance maintaining the admin
+ *		queue used to submit the property read command to
+ *		the allocated NVMe controller resource on the target system.
+ * @off:	Starting offset value of the targeted property
+ *		register (see the fabrics section of the NVMe standard).
+ * @val:	Input parameter that contains the value to be
+ *		written to the property.
+ *
+ * Used by the NVMe host system to write a 32-bit capsule property value
+ * to an NVMe controller on the target system.
+ *
+ * ("Capsule property" is an "PCIe register concept" applied to the
+ * NVMe fabrics space.)
+ *
+ * Return:
+ *	0: successful write
+ *	> 0: NVMe error status code
+ *	< 0: Linux errno error code
+ */
+int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
+{
+	struct nvme_command cmd;
+	int ret;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.prop_set.opcode = nvme_fabrics_command;
+	cmd.prop_set.fctype = nvme_fabrics_type_property_set;
+	cmd.prop_set.attrib = 0;
+	cmd.prop_set.offset = cpu_to_le32(off);
+	cmd.prop_set.value = cpu_to_le64(val);
+
+	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, NULL, 0, 0,
+			NVME_QID_ANY, 0, 0);
+	if (unlikely(ret))
+		dev_err(ctrl->device,
+			"Property Set error: %d, offset %#x\n",
+			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nvmf_reg_write32);
+
+/**
+ * nvmf_log_connect_error() - Error-parsing-diagnostic print
+ * out function for connect() errors.
+ *
+ * @ctrl: the specific /dev/nvmeX device that had the error.
+ *
+ * @errval: Error code to be decoded in a more human-friendly
+ *	    printout.
+ *
+ * @offset: For use with the NVMe error code NVME_SC_CONNECT_INVALID_PARAM.
+ *
+ * @cmd: This is the SQE portion of a submission capsule.
+ *
+ * @data: This is the "Data" portion of a submission capsule.
+ */
+static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
+		int errval, int offset, struct nvme_command *cmd,
+		struct nvmf_connect_data *data)
+{
+	int err_sctype = errval & (~NVME_SC_DNR);
+
+	switch (err_sctype) {
+
+	case (NVME_SC_CONNECT_INVALID_PARAM):
+		if (offset >> 16) {
+			char *inv_data = "Connect Invalid Data Parameter";
+
+			switch (offset & 0xffff) {
+			case (offsetof(struct nvmf_connect_data, cntlid)):
+				dev_err(ctrl->device,
+					"%s, cntlid: %d\n",
+					inv_data, data->cntlid);
+				break;
+			case (offsetof(struct nvmf_connect_data, hostnqn)):
+				dev_err(ctrl->device,
+					"%s, hostnqn \"%s\"\n",
+					inv_data, data->hostnqn);
+				break;
+			case (offsetof(struct nvmf_connect_data, subsysnqn)):
+				dev_err(ctrl->device,
+					"%s, subsysnqn \"%s\"\n",
+					inv_data, data->subsysnqn);
+				break;
+			default:
+				dev_err(ctrl->device,
+					"%s, starting byte offset: %d\n",
+				       inv_data, offset & 0xffff);
+				break;
+			}
+		} else {
+			char *inv_sqe = "Connect Invalid SQE Parameter";
+
+			switch (offset) {
+			case (offsetof(struct nvmf_connect_command, qid)):
+				dev_err(ctrl->device,
+				       "%s, qid %d\n",
+					inv_sqe, cmd->connect.qid);
+				break;
+			default:
+				dev_err(ctrl->device,
+					"%s, starting byte offset: %d\n",
+					inv_sqe, offset);
+			}
+		}
+		break;
+	default:
+		dev_err(ctrl->device,
+			"Connect command failed, error wo/DNR bit: %d\n",
+			err_sctype);
+		break;
+	} /* switch (err_sctype) */
+}
+
+/**
+ * nvmf_connect_admin_queue() - NVMe Fabrics Admin Queue "Connect"
+ *				API function.
+ * @ctrl:	Host nvme controller instance used to request
+ *              a new NVMe controller allocation on the target
+ *              system and  establish an NVMe Admin connection to
+ *              that controller.
+ *
+ * This function enables an NVMe host device to request a new allocation of
+ * an NVMe controller resource on a target system as well establish a
+ * fabrics-protocol connection of the NVMe Admin queue between the
+ * host system device and the allocated NVMe controller on the
+ * target system via a NVMe Fabrics "Connect" command.
+ *
+ * Return:
+ *	0: success
+ *	> 0: NVMe error status code
+ *	< 0: Linux errno error code
+ *
+ */
+int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
+{
+	struct nvme_command cmd;
+	struct nvme_completion cqe;
+	struct nvmf_connect_data *data;
+	int ret;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.connect.opcode = nvme_fabrics_command;
+	cmd.connect.fctype = nvme_fabrics_type_connect;
+	cmd.connect.qid = 0;
+	cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
+	/*
+	 * Set keep-alive timeout in seconds granularity (ms * 1000)
+	 * and add a grace period for controller kato enforcement
+	 */
+	cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 :
+		cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000);
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le));
+	data->cntlid = cpu_to_le16(0xffff);
+	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
+	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
+
+	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe,
+			data, sizeof(*data), 0, NVME_QID_ANY, 1,
+			BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+	if (ret) {
+		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(cqe.result),
+				       &cmd, data);
+		goto out_free_data;
+	}
+
+	ctrl->cntlid = le16_to_cpu(cqe.result16);
+
+out_free_data:
+	kfree(data);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue);
+
+/**
+ * nvmf_connect_io_queue() - NVMe Fabrics I/O Queue "Connect"
+ *			     API function.
+ * @ctrl:	Host nvme controller instance used to establish an
+ *		NVMe I/O queue connection to the already allocated NVMe
+ *		controller on the target system.
+ * @qid:	NVMe I/O queue number for the new I/O connection between
+ *		host and target (note qid == 0 is illegal as this is
+ *		the Admin queue, per NVMe standard).
+ *
+ * This function issues a fabrics-protocol connection
+ * of a NVMe I/O queue (via NVMe Fabrics "Connect" command)
+ * between the host system device and the allocated NVMe controller
+ * on the target system.
+ *
+ * Return:
+ *	0: success
+ *	> 0: NVMe error status code
+ *	< 0: Linux errno error code
+ */
+int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
+{
+	struct nvme_command cmd;
+	struct nvmf_connect_data *data;
+	struct nvme_completion cqe;
+	int ret;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.connect.opcode = nvme_fabrics_command;
+	cmd.connect.fctype = nvme_fabrics_type_connect;
+	cmd.connect.qid = cpu_to_le16(qid);
+	cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le));
+	data->cntlid = cpu_to_le16(ctrl->cntlid);
+	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
+	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
+
+	ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &cqe,
+			data, sizeof(*data), 0, qid, 1,
+			BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+	if (ret) {
+		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(cqe.result),
+				       &cmd, data);
+	}
+	kfree(data);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
+
+/**
+ * nvmf_register_transport() - NVMe Fabrics Library registration function.
+ * @ops:	Transport ops instance to be registered to the
+ *		common fabrics library.
+ *
+ * API function that registers the type of specific transport fabric
+ * being implemented to the common NVMe fabrics library. Part of
+ * the overall init sequence of starting up a fabrics driver.
+ */
+void nvmf_register_transport(struct nvmf_transport_ops *ops)
+{
+	mutex_lock(&nvmf_transports_mutex);
+	list_add_tail(&ops->entry, &nvmf_transports);
+	mutex_unlock(&nvmf_transports_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmf_register_transport);
+
+/**
+ * nvmf_unregister_transport() - NVMe Fabrics Library unregistration function.
+ * @ops:	Transport ops instance to be unregistered from the
+ *		common fabrics library.
+ *
+ * Fabrics API function that unregisters the type of specific transport
+ * fabric being implemented from the common NVMe fabrics library.
+ * Part of the overall exit sequence of unloading the implemented driver.
+ */
+void nvmf_unregister_transport(struct nvmf_transport_ops *ops)
+{
+	mutex_lock(&nvmf_transports_mutex);
+	list_del(&ops->entry);
+	mutex_unlock(&nvmf_transports_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmf_unregister_transport);
+
+static struct nvmf_transport_ops *nvmf_lookup_transport(
+		struct nvmf_ctrl_options *opts)
+{
+	struct nvmf_transport_ops *ops;
+
+	lockdep_assert_held(&nvmf_transports_mutex);
+
+	list_for_each_entry(ops, &nvmf_transports, entry) {
+		if (strcmp(ops->name, opts->transport) == 0)
+			return ops;
+	}
+
+	return NULL;
+}
+
+static const match_table_t opt_tokens = {
+	{ NVMF_OPT_TRANSPORT,		"transport=%s"		},
+	{ NVMF_OPT_TRADDR,		"traddr=%s"		},
+	{ NVMF_OPT_TRSVCID,		"trsvcid=%s"		},
+	{ NVMF_OPT_NQN,			"nqn=%s"		},
+	{ NVMF_OPT_QUEUE_SIZE,		"queue_size=%d"		},
+	{ NVMF_OPT_NR_IO_QUEUES,	"nr_io_queues=%d"	},
+	{ NVMF_OPT_RECONNECT_DELAY,	"reconnect_delay=%d"	},
+	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
+	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
+	{ NVMF_OPT_ERR,			NULL			}
+};
+
+static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
+		const char *buf)
+{
+	substring_t args[MAX_OPT_ARGS];
+	char *options, *o, *p;
+	int token, ret = 0;
+	size_t nqnlen  = 0;
+
+	/* Set defaults */
+	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
+	opts->nr_io_queues = num_online_cpus();
+	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
+
+	options = o = kstrdup(buf, GFP_KERNEL);
+	if (!options)
+		return -ENOMEM;
+
+	while ((p = strsep(&o, ",\n")) != NULL) {
+		if (!*p)
+			continue;
+
+		token = match_token(p, opt_tokens, args);
+		opts->mask |= token;
+		switch (token) {
+		case NVMF_OPT_TRANSPORT:
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			opts->transport = p;
+			break;
+		case NVMF_OPT_NQN:
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			opts->subsysnqn = p;
+			nqnlen = strlen(opts->subsysnqn);
+			if (nqnlen >= NVMF_NQN_SIZE) {
+				pr_err("%s needs to be < %d bytes\n",
+				opts->subsysnqn, NVMF_NQN_SIZE);
+				ret = -EINVAL;
+				goto out;
+			}
+			opts->discovery_nqn =
+				!(strcmp(opts->subsysnqn,
+					 NVME_DISC_SUBSYS_NAME));
+			if (opts->discovery_nqn)
+				opts->nr_io_queues = 0;
+			break;
+		case NVMF_OPT_TRADDR:
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			opts->traddr = p;
+			break;
+		case NVMF_OPT_TRSVCID:
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			opts->trsvcid = p;
+			break;
+		case NVMF_OPT_QUEUE_SIZE:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			if (token < NVMF_MIN_QUEUE_SIZE ||
+			    token > NVMF_MAX_QUEUE_SIZE) {
+				pr_err("Invalid queue_size %d\n", token);
+				ret = -EINVAL;
+				goto out;
+			}
+			opts->queue_size = token;
+			break;
+		case NVMF_OPT_NR_IO_QUEUES:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			if (token <= 0) {
+				pr_err("Invalid number of IOQs %d\n", token);
+				ret = -EINVAL;
+				goto out;
+			}
+			opts->nr_io_queues = min_t(unsigned int,
+					num_online_cpus(), token);
+			break;
+		case NVMF_OPT_KATO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (opts->discovery_nqn) {
+				pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token < 0) {
+				pr_err("Invalid keep_alive_tmo %d\n", token);
+				ret = -EINVAL;
+				goto out;
+			} else if (token == 0) {
+				/* Allowed for debug */
+				pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
+			}
+			opts->kato = token;
+			break;
+		case NVMF_OPT_HOSTNQN:
+			if (opts->host) {
+				pr_err("hostnqn already user-assigned: %s\n",
+				       opts->host->nqn);
+				ret = -EADDRINUSE;
+				goto out;
+			}
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			nqnlen = strlen(p);
+			if (nqnlen >= NVMF_NQN_SIZE) {
+				pr_err("%s needs to be < %d bytes\n",
+					p, NVMF_NQN_SIZE);
+				ret = -EINVAL;
+				goto out;
+			}
+			opts->host = nvmf_host_add(p);
+			if (!opts->host) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			break;
+		case NVMF_OPT_RECONNECT_DELAY:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			if (token <= 0) {
+				pr_err("Invalid reconnect_delay %d\n", token);
+				ret = -EINVAL;
+				goto out;
+			}
+			opts->reconnect_delay = token;
+			break;
+		default:
+			pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
+				p);
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	if (!opts->host) {
+		kref_get(&nvmf_default_host->ref);
+		opts->host = nvmf_default_host;
+	}
+
+out:
+	if (!opts->discovery_nqn && !opts->kato)
+		opts->kato = NVME_DEFAULT_KATO;
+	kfree(options);
+	return ret;
+}
+
+static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
+		unsigned int required_opts)
+{
+	if ((opts->mask & required_opts) != required_opts) {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
+			if ((opt_tokens[i].token & required_opts) &&
+			    !(opt_tokens[i].token & opts->mask)) {
+				pr_warn("missing parameter '%s'\n",
+					opt_tokens[i].pattern);
+			}
+		}
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
+		unsigned int allowed_opts)
+{
+	if (opts->mask & ~allowed_opts) {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
+			if (opt_tokens[i].token & ~allowed_opts) {
+				pr_warn("invalid parameter '%s'\n",
+					opt_tokens[i].pattern);
+			}
+		}
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void nvmf_free_options(struct nvmf_ctrl_options *opts)
+{
+	nvmf_host_put(opts->host);
+	kfree(opts->transport);
+	kfree(opts->traddr);
+	kfree(opts->trsvcid);
+	kfree(opts->subsysnqn);
+	kfree(opts);
+}
+EXPORT_SYMBOL_GPL(nvmf_free_options);
+
+#define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
+#define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
+				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
+
+static struct nvme_ctrl *
+nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
+{
+	struct nvmf_ctrl_options *opts;
+	struct nvmf_transport_ops *ops;
+	struct nvme_ctrl *ctrl;
+	int ret;
+
+	opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+	if (!opts)
+		return ERR_PTR(-ENOMEM);
+
+	ret = nvmf_parse_options(opts, buf);
+	if (ret)
+		goto out_free_opts;
+
+	/*
+	 * Check the generic options first as we need a valid transport for
+	 * the lookup below.  Then clear the generic flags so that transport
+	 * drivers don't have to care about them.
+	 */
+	ret = nvmf_check_required_opts(opts, NVMF_REQUIRED_OPTS);
+	if (ret)
+		goto out_free_opts;
+	opts->mask &= ~NVMF_REQUIRED_OPTS;
+
+	mutex_lock(&nvmf_transports_mutex);
+	ops = nvmf_lookup_transport(opts);
+	if (!ops) {
+		pr_info("no handler found for transport %s.\n",
+			opts->transport);
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	ret = nvmf_check_required_opts(opts, ops->required_opts);
+	if (ret)
+		goto out_unlock;
+	ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS |
+				ops->allowed_opts | ops->required_opts);
+	if (ret)
+		goto out_unlock;
+
+	ctrl = ops->create_ctrl(dev, opts);
+	if (IS_ERR(ctrl)) {
+		ret = PTR_ERR(ctrl);
+		goto out_unlock;
+	}
+
+	mutex_unlock(&nvmf_transports_mutex);
+	return ctrl;
+
+out_unlock:
+	mutex_unlock(&nvmf_transports_mutex);
+out_free_opts:
+	nvmf_host_put(opts->host);
+	kfree(opts);
+	return ERR_PTR(ret);
+}
+
+static struct class *nvmf_class;
+static struct device *nvmf_device;
+static DEFINE_MUTEX(nvmf_dev_mutex);
+
+static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf,
+		size_t count, loff_t *pos)
+{
+	struct seq_file *seq_file = file->private_data;
+	struct nvme_ctrl *ctrl;
+	const char *buf;
+	int ret = 0;
+
+	if (count > PAGE_SIZE)
+		return -ENOMEM;
+
+	buf = memdup_user_nul(ubuf, count);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	mutex_lock(&nvmf_dev_mutex);
+	if (seq_file->private) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	ctrl = nvmf_create_ctrl(nvmf_device, buf, count);
+	if (IS_ERR(ctrl)) {
+		ret = PTR_ERR(ctrl);
+		goto out_unlock;
+	}
+
+	seq_file->private = ctrl;
+
+out_unlock:
+	mutex_unlock(&nvmf_dev_mutex);
+	kfree(buf);
+	return ret ? ret : count;
+}
+
+static int nvmf_dev_show(struct seq_file *seq_file, void *private)
+{
+	struct nvme_ctrl *ctrl;
+	int ret = 0;
+
+	mutex_lock(&nvmf_dev_mutex);
+	ctrl = seq_file->private;
+	if (!ctrl) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	seq_printf(seq_file, "instance=%d,cntlid=%d\n",
+			ctrl->instance, ctrl->cntlid);
+
+out_unlock:
+	mutex_unlock(&nvmf_dev_mutex);
+	return ret;
+}
+
+static int nvmf_dev_open(struct inode *inode, struct file *file)
+{
+	/*
+	 * The miscdevice code initializes file->private_data, but doesn't
+	 * make use of it later.
+	 */
+	file->private_data = NULL;
+	return single_open(file, nvmf_dev_show, NULL);
+}
+
+static int nvmf_dev_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq_file = file->private_data;
+	struct nvme_ctrl *ctrl = seq_file->private;
+
+	if (ctrl)
+		nvme_put_ctrl(ctrl);
+	return single_release(inode, file);
+}
+
+static const struct file_operations nvmf_dev_fops = {
+	.owner		= THIS_MODULE,
+	.write		= nvmf_dev_write,
+	.read		= seq_read,
+	.open		= nvmf_dev_open,
+	.release	= nvmf_dev_release,
+};
+
+static struct miscdevice nvmf_misc = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name           = "nvme-fabrics",
+	.fops		= &nvmf_dev_fops,
+};
+
+static int __init nvmf_init(void)
+{
+	int ret;
+
+	nvmf_default_host = nvmf_host_default();
+	if (!nvmf_default_host)
+		return -ENOMEM;
+
+	nvmf_class = class_create(THIS_MODULE, "nvme-fabrics");
+	if (IS_ERR(nvmf_class)) {
+		pr_err("couldn't register class nvme-fabrics\n");
+		ret = PTR_ERR(nvmf_class);
+		goto out_free_host;
+	}
+
+	nvmf_device =
+		device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
+	if (IS_ERR(nvmf_device)) {
+		pr_err("couldn't create nvme-fabris device!\n");
+		ret = PTR_ERR(nvmf_device);
+		goto out_destroy_class;
+	}
+
+	ret = misc_register(&nvmf_misc);
+	if (ret) {
+		pr_err("couldn't register misc device: %d\n", ret);
+		goto out_destroy_device;
+	}
+
+	return 0;
+
+out_destroy_device:
+	device_destroy(nvmf_class, MKDEV(0, 0));
+out_destroy_class:
+	class_destroy(nvmf_class);
+out_free_host:
+	nvmf_host_put(nvmf_default_host);
+	return ret;
+}
+
+static void __exit nvmf_exit(void)
+{
+	misc_deregister(&nvmf_misc);
+	device_destroy(nvmf_class, MKDEV(0, 0));
+	class_destroy(nvmf_class);
+	nvmf_host_put(nvmf_default_host);
+
+	BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvmf_connect_data) != 1024);
+}
+
+MODULE_LICENSE("GPL v2");
+
+module_init(nvmf_init);
+module_exit(nvmf_exit);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
new file mode 100644
index 000000000000..89df52c8be97
--- /dev/null
+++ b/drivers/nvme/host/fabrics.h
@@ -0,0 +1,132 @@
+/*
+ * NVMe over Fabrics common host code.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _NVME_FABRICS_H
+#define _NVME_FABRICS_H 1
+
+#include <linux/in.h>
+#include <linux/inet.h>
+
+#define NVMF_MIN_QUEUE_SIZE	16
+#define NVMF_MAX_QUEUE_SIZE	1024
+#define NVMF_DEF_QUEUE_SIZE	128
+#define NVMF_DEF_RECONNECT_DELAY	10
+
+/*
+ * Define a host as seen by the target.  We allocate one at boot, but also
+ * allow the override it when creating controllers.  This is both to provide
+ * persistence of the Host NQN over multiple boots, and to allow using
+ * multiple ones, for example in a container scenario.  Because we must not
+ * use different Host NQNs with the same Host ID we generate a Host ID and
+ * use this structure to keep track of the relation between the two.
+ */
+struct nvmf_host {
+	struct kref		ref;
+	struct list_head	list;
+	char			nqn[NVMF_NQN_SIZE];
+	uuid_le			id;
+};
+
+/**
+ * enum nvmf_parsing_opts - used to define the sysfs parsing options used.
+ */
+enum {
+	NVMF_OPT_ERR		= 0,
+	NVMF_OPT_TRANSPORT	= 1 << 0,
+	NVMF_OPT_NQN		= 1 << 1,
+	NVMF_OPT_TRADDR		= 1 << 2,
+	NVMF_OPT_TRSVCID	= 1 << 3,
+	NVMF_OPT_QUEUE_SIZE	= 1 << 4,
+	NVMF_OPT_NR_IO_QUEUES	= 1 << 5,
+	NVMF_OPT_TL_RETRY_COUNT	= 1 << 6,
+	NVMF_OPT_KATO		= 1 << 7,
+	NVMF_OPT_HOSTNQN	= 1 << 8,
+	NVMF_OPT_RECONNECT_DELAY = 1 << 9,
+};
+
+/**
+ * struct nvmf_ctrl_options - Used to hold the options specified
+ *			      with the parsing opts enum.
+ * @mask:	Used by the fabrics library to parse through sysfs options
+ *		on adding a NVMe controller.
+ * @transport:	Holds the fabric transport "technology name" (for a lack of
+ *		better description) that will be used by an NVMe controller
+ *		being added.
+ * @subsysnqn:	Hold the fully qualified NQN subystem name (format defined
+ *		in the NVMe specification, "NVMe Qualified Names").
+ * @traddr:	network address that will be used by the host to communicate
+ *		to the added NVMe controller.
+ * @trsvcid:	network port used for host-controller communication.
+ * @queue_size: Number of IO queue elements.
+ * @nr_io_queues: Number of controller IO queues that will be established.
+ * @reconnect_delay: Time between two consecutive reconnect attempts.
+ * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
+ * @kato:	Keep-alive timeout.
+ * @host:	Virtual NVMe host, contains the NQN and Host ID.
+ */
+struct nvmf_ctrl_options {
+	unsigned		mask;
+	char			*transport;
+	char			*subsysnqn;
+	char			*traddr;
+	char			*trsvcid;
+	size_t			queue_size;
+	unsigned int		nr_io_queues;
+	unsigned int		reconnect_delay;
+	bool			discovery_nqn;
+	unsigned int		kato;
+	struct nvmf_host	*host;
+};
+
+/*
+ * struct nvmf_transport_ops - used to register a specific
+ *			       fabric implementation of NVMe fabrics.
+ * @entry:		Used by the fabrics library to add the new
+ *			registration entry to its linked-list internal tree.
+ * @name:		Name of the NVMe fabric driver implementation.
+ * @required_opts:	sysfs command-line options that must be specified
+ *			when adding a new NVMe controller.
+ * @allowed_opts:	sysfs command-line options that can be specified
+ *			when adding a new NVMe controller.
+ * @create_ctrl():	function pointer that points to a non-NVMe
+ *			implementation-specific fabric technology
+ *			that would go into starting up that fabric
+ *			for the purpose of conneciton to an NVMe controller
+ *			using that fabric technology.
+ *
+ * Notes:
+ *	1. At minimum, 'required_opts' and 'allowed_opts' should
+ *	   be set to the same enum parsing options defined earlier.
+ *	2. create_ctrl() must be defined (even if it does nothing)
+ */
+struct nvmf_transport_ops {
+	struct list_head	entry;
+	const char		*name;
+	int			required_opts;
+	int			allowed_opts;
+	struct nvme_ctrl	*(*create_ctrl)(struct device *dev,
+					struct nvmf_ctrl_options *opts);
+};
+
+int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
+int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
+int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
+int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
+int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
+void nvmf_register_transport(struct nvmf_transport_ops *ops);
+void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
+void nvmf_free_options(struct nvmf_ctrl_options *opts);
+const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
+int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
+
+#endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index a0af0558354c..63f483daf930 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -156,7 +156,7 @@ struct nvme_nvm_completion {
 
 #define NVME_NVM_LP_MLC_PAIRS 886
 struct nvme_nvm_lp_mlc {
-	__u16			num_pairs;
+	__le16			num_pairs;
 	__u8			pairs[NVME_NVM_LP_MLC_PAIRS];
 };
 
@@ -500,7 +500,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	struct bio *bio = rqd->bio;
 	struct nvme_nvm_command *cmd;
 
-	rq = blk_mq_alloc_request(q, bio_rw(bio), 0);
+	rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0);
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1daa0482de0e..ab18b78102bf 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -38,6 +38,11 @@ extern unsigned char admin_timeout;
 extern unsigned char shutdown_timeout;
 #define SHUTDOWN_TIMEOUT	(shutdown_timeout * HZ)
 
+#define NVME_DEFAULT_KATO	5
+#define NVME_KATO_GRACE		10
+
+extern unsigned int nvme_max_retries;
+
 enum {
 	NVME_NS_LBA		= 0,
 	NVME_NS_LIGHTNVM	= 1,
@@ -65,12 +70,26 @@ enum nvme_quirks {
 	 * logical blocks.
 	 */
 	NVME_QUIRK_DISCARD_ZEROES		= (1 << 2),
+
+	/*
+	 * The controller needs a delay before starts checking the device
+	 * readiness, which is done by reading the NVME_CSTS_RDY bit.
+	 */
+	NVME_QUIRK_DELAY_BEFORE_CHK_RDY		= (1 << 3),
 };
 
+/* The below value is the specific amount of delay needed before checking
+ * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the
+ * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
+ * found empirically.
+ */
+#define NVME_QUIRK_DELAY_AMOUNT		2000
+
 enum nvme_ctrl_state {
 	NVME_CTRL_NEW,
 	NVME_CTRL_LIVE,
 	NVME_CTRL_RESETTING,
+	NVME_CTRL_RECONNECTING,
 	NVME_CTRL_DELETING,
 	NVME_CTRL_DEAD,
 };
@@ -80,6 +99,7 @@ struct nvme_ctrl {
 	spinlock_t lock;
 	const struct nvme_ctrl_ops *ops;
 	struct request_queue *admin_q;
+	struct request_queue *connect_q;
 	struct device *dev;
 	struct kref kref;
 	int instance;
@@ -107,10 +127,22 @@ struct nvme_ctrl {
 	u8 event_limit;
 	u8 vwc;
 	u32 vs;
+	u32 sgls;
+	u16 kas;
+	unsigned int kato;
 	bool subsystem;
 	unsigned long quirks;
 	struct work_struct scan_work;
 	struct work_struct async_event_work;
+	struct delayed_work ka_work;
+
+	/* Fabrics only */
+	u16 sqsize;
+	u32 ioccsz;
+	u32 iorcsz;
+	u16 icdoff;
+	u16 maxcmd;
+	struct nvmf_ctrl_options *opts;
 };
 
 /*
@@ -144,7 +176,9 @@ struct nvme_ns {
 };
 
 struct nvme_ctrl_ops {
+	const char *name;
 	struct module *module;
+	bool is_fabrics;
 	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
 	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
@@ -152,6 +186,9 @@ struct nvme_ctrl_ops {
 	void (*free_ctrl)(struct nvme_ctrl *ctrl);
 	void (*post_scan)(struct nvme_ctrl *ctrl);
 	void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
+	int (*delete_ctrl)(struct nvme_ctrl *ctrl);
+	const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
+	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 };
 
 static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
@@ -177,7 +214,7 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 
 static inline unsigned nvme_map_len(struct request *rq)
 {
-	if (rq->cmd_flags & REQ_DISCARD)
+	if (req_op(rq) == REQ_OP_DISCARD)
 		return sizeof(struct nvme_dsm_range);
 	else
 		return blk_rq_bytes(rq);
@@ -185,7 +222,7 @@ static inline unsigned nvme_map_len(struct request *rq)
 
 static inline void nvme_cleanup_cmd(struct request *req)
 {
-	if (req->cmd_flags & REQ_DISCARD)
+	if (req_op(req) == REQ_OP_DISCARD)
 		kfree(req->completion_data);
 }
 
@@ -204,9 +241,11 @@ static inline int nvme_error_status(u16 status)
 static inline bool nvme_req_needs_retry(struct request *req, u16 status)
 {
 	return !(status & NVME_SC_DNR || blk_noretry_request(req)) &&
-		(jiffies - req->start_time) < req->timeout;
+		(jiffies - req->start_time) < req->timeout &&
+		req->retries < nvme_max_retries;
 }
 
+void nvme_cancel_request(struct request *req, void *data, bool reserved);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		enum nvme_ctrl_state new_state);
 int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
@@ -230,8 +269,9 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
 void nvme_kill_queues(struct nvme_ctrl *ctrl);
 
+#define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
-		struct nvme_command *cmd, unsigned int flags);
+		struct nvme_command *cmd, unsigned int flags, int qid);
 void nvme_requeue_req(struct request *req);
 int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
@@ -239,7 +279,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		struct nvme_completion *cqe, void *buffer, unsigned bufflen,
-		unsigned timeout);
+		unsigned timeout, int qid, int at_head, int flags);
 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void __user *ubuffer, unsigned bufflen, u32 *result,
 		unsigned timeout);
@@ -256,6 +296,8 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 			dma_addr_t dma_addr, u32 *result);
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
+void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
+void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
 
 struct sg_io_hdr;
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index befac5b19490..4cb9b156cab7 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -310,6 +310,11 @@ static int nvme_init_iod(struct request *rq, unsigned size,
 	iod->npages = -1;
 	iod->nents = 0;
 	iod->length = size;
+
+	if (!(rq->cmd_flags & REQ_DONTPREP)) {
+		rq->retries = 0;
+		rq->cmd_flags |= REQ_DONTPREP;
+	}
 	return 0;
 }
 
@@ -520,8 +525,8 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 			goto out_unmap;
 	}
 
-	cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-	cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+	cmnd->rw.dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+	cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
 	if (blk_integrity_rq(req))
 		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
 	return BLK_MQ_RQ_QUEUE_OK;
@@ -623,6 +628,7 @@ static void nvme_complete_rq(struct request *req)
 
 	if (unlikely(req->errors)) {
 		if (nvme_req_needs_retry(req, req->errors)) {
+			req->retries++;
 			nvme_requeue_req(req);
 			return;
 		}
@@ -901,7 +907,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 		 req->tag, nvmeq->qid);
 
 	abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
-			BLK_MQ_REQ_NOWAIT);
+			BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
 	if (IS_ERR(abort_req)) {
 		atomic_inc(&dev->ctrl.abort_limit);
 		return BLK_EH_RESET_TIMER;
@@ -919,22 +925,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	return BLK_EH_RESET_TIMER;
 }
 
-static void nvme_cancel_io(struct request *req, void *data, bool reserved)
-{
-	int status;
-
-	if (!blk_mq_request_started(req))
-		return;
-
-	dev_dbg_ratelimited(((struct nvme_dev *) data)->ctrl.device,
-				"Cancelling I/O %d", req->tag);
-
-	status = NVME_SC_ABORT_REQ;
-	if (blk_queue_dying(req->q))
-		status |= NVME_SC_DNR;
-	blk_mq_complete_request(req, status);
-}
-
 static void nvme_free_queue(struct nvme_queue *nvmeq)
 {
 	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
@@ -1399,16 +1389,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	/*
-	 * Degraded controllers might return an error when setting the queue
-	 * count.  We still want to be able to bring them online and offer
-	 * access to the admin queue, as that might be only way to fix them up.
-	 */
-	if (result > 0) {
-		dev_err(dev->ctrl.device,
-			"Could not set queue count (%d)\n", result);
+	if (nr_io_queues == 0)
 		return 0;
-	}
 
 	if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
 		result = nvme_cmb_qdepth(dev, nr_io_queues,
@@ -1536,7 +1518,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
 	cmd.delete_queue.opcode = opcode;
 	cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
 
-	req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
+	req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1727,8 +1709,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	}
 	nvme_pci_disable(dev);
 
-	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_io, dev);
-	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_io, dev);
+	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
+	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
 	mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -1902,6 +1884,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
 }
 
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+	.name			= "pcie",
 	.module			= THIS_MODULE,
 	.reg_read32		= nvme_pci_reg_read32,
 	.reg_write32		= nvme_pci_reg_write32,
@@ -1940,7 +1923,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	node = dev_to_node(&pdev->dev);
 	if (node == NUMA_NO_NODE)
-		set_dev_node(&pdev->dev, 0);
+		set_dev_node(&pdev->dev, first_memory_node);
 
 	dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
 	if (!dev)
@@ -2037,6 +2020,24 @@ static void nvme_remove(struct pci_dev *pdev)
 	nvme_put_ctrl(&dev->ctrl);
 }
 
+static int nvme_pci_sriov_configure(struct pci_dev *pdev, int numvfs)
+{
+	int ret = 0;
+
+	if (numvfs == 0) {
+		if (pci_vfs_assigned(pdev)) {
+			dev_warn(&pdev->dev,
+				"Cannot disable SR-IOV VFs while assigned\n");
+			return -EPERM;
+		}
+		pci_disable_sriov(pdev);
+		return 0;
+	}
+
+	ret = pci_enable_sriov(pdev, numvfs);
+	return ret ? ret : numvfs;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int nvme_suspend(struct device *dev)
 {
@@ -2122,6 +2123,8 @@ static const struct pci_device_id nvme_id_table[] = {
 				NVME_QUIRK_DISCARD_ZEROES, },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
+	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ 0, }
@@ -2137,6 +2140,7 @@ static struct pci_driver nvme_driver = {
 	.driver		= {
 		.pm	= &nvme_dev_pm_ops,
 	},
+	.sriov_configure = nvme_pci_sriov_configure,
 	.err_handler	= &nvme_err_handler,
 };
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
new file mode 100644
index 000000000000..3e3ce2b0424e
--- /dev/null
+++ b/drivers/nvme/host/rdma.c
@@ -0,0 +1,2018 @@
+/*
+ * NVMe over Fabrics RDMA host code.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/jiffies.h>
+#include <linux/atomic.h>
+#include <linux/blk-mq.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/scatterlist.h>
+#include <linux/nvme.h>
+#include <linux/t10-pi.h>
+#include <asm/unaligned.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include <linux/nvme-rdma.h>
+
+#include "nvme.h"
+#include "fabrics.h"
+
+
+#define NVME_RDMA_CONNECT_TIMEOUT_MS	1000		/* 1 second */
+
+#define NVME_RDMA_MAX_SEGMENT_SIZE	0xffffff	/* 24-bit SGL field */
+
+#define NVME_RDMA_MAX_SEGMENTS		256
+
+#define NVME_RDMA_MAX_INLINE_SEGMENTS	1
+
+#define NVME_RDMA_MAX_PAGES_PER_MR	512
+
+#define NVME_RDMA_DEF_RECONNECT_DELAY	20
+
+/*
+ * We handle AEN commands ourselves and don't even let the
+ * block layer know about them.
+ */
+#define NVME_RDMA_NR_AEN_COMMANDS      1
+#define NVME_RDMA_AQ_BLKMQ_DEPTH       \
+	(NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
+
+struct nvme_rdma_device {
+	struct ib_device       *dev;
+	struct ib_pd	       *pd;
+	struct ib_mr	       *mr;
+	struct kref		ref;
+	struct list_head	entry;
+};
+
+struct nvme_rdma_qe {
+	struct ib_cqe		cqe;
+	void			*data;
+	u64			dma;
+};
+
+struct nvme_rdma_queue;
+struct nvme_rdma_request {
+	struct ib_mr		*mr;
+	struct nvme_rdma_qe	sqe;
+	struct ib_sge		sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
+	u32			num_sge;
+	int			nents;
+	bool			inline_data;
+	bool			need_inval;
+	struct ib_reg_wr	reg_wr;
+	struct ib_cqe		reg_cqe;
+	struct nvme_rdma_queue  *queue;
+	struct sg_table		sg_table;
+	struct scatterlist	first_sgl[];
+};
+
+enum nvme_rdma_queue_flags {
+	NVME_RDMA_Q_CONNECTED = (1 << 0),
+};
+
+struct nvme_rdma_queue {
+	struct nvme_rdma_qe	*rsp_ring;
+	u8			sig_count;
+	int			queue_size;
+	size_t			cmnd_capsule_len;
+	struct nvme_rdma_ctrl	*ctrl;
+	struct nvme_rdma_device	*device;
+	struct ib_cq		*ib_cq;
+	struct ib_qp		*qp;
+
+	unsigned long		flags;
+	struct rdma_cm_id	*cm_id;
+	int			cm_error;
+	struct completion	cm_done;
+};
+
+struct nvme_rdma_ctrl {
+	/* read and written in the hot path */
+	spinlock_t		lock;
+
+	/* read only in the hot path */
+	struct nvme_rdma_queue	*queues;
+	u32			queue_count;
+
+	/* other member variables */
+	struct blk_mq_tag_set	tag_set;
+	struct work_struct	delete_work;
+	struct work_struct	reset_work;
+	struct work_struct	err_work;
+
+	struct nvme_rdma_qe	async_event_sqe;
+
+	int			reconnect_delay;
+	struct delayed_work	reconnect_work;
+
+	struct list_head	list;
+
+	struct blk_mq_tag_set	admin_tag_set;
+	struct nvme_rdma_device	*device;
+
+	u64			cap;
+	u32			max_fr_pages;
+
+	union {
+		struct sockaddr addr;
+		struct sockaddr_in addr_in;
+	};
+
+	struct nvme_ctrl	ctrl;
+};
+
+static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
+{
+	return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
+}
+
+static LIST_HEAD(device_list);
+static DEFINE_MUTEX(device_list_mutex);
+
+static LIST_HEAD(nvme_rdma_ctrl_list);
+static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
+
+static struct workqueue_struct *nvme_rdma_wq;
+
+/*
+ * Disabling this option makes small I/O goes faster, but is fundamentally
+ * unsafe.  With it turned off we will have to register a global rkey that
+ * allows read and write access to all physical memory.
+ */
+static bool register_always = true;
+module_param(register_always, bool, 0444);
+MODULE_PARM_DESC(register_always,
+	 "Use memory registration even for contiguous memory regions");
+
+static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
+		struct rdma_cm_event *event);
+static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
+
+/* XXX: really should move to a generic header sooner or later.. */
+static inline void put_unaligned_le24(u32 val, u8 *p)
+{
+	*p++ = val;
+	*p++ = val >> 8;
+	*p++ = val >> 16;
+}
+
+static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue)
+{
+	return queue - queue->ctrl->queues;
+}
+
+static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue)
+{
+	return queue->cmnd_capsule_len - sizeof(struct nvme_command);
+}
+
+static void nvme_rdma_free_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
+		size_t capsule_size, enum dma_data_direction dir)
+{
+	ib_dma_unmap_single(ibdev, qe->dma, capsule_size, dir);
+	kfree(qe->data);
+}
+
+static int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
+		size_t capsule_size, enum dma_data_direction dir)
+{
+	qe->data = kzalloc(capsule_size, GFP_KERNEL);
+	if (!qe->data)
+		return -ENOMEM;
+
+	qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
+	if (ib_dma_mapping_error(ibdev, qe->dma)) {
+		kfree(qe->data);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void nvme_rdma_free_ring(struct ib_device *ibdev,
+		struct nvme_rdma_qe *ring, size_t ib_queue_size,
+		size_t capsule_size, enum dma_data_direction dir)
+{
+	int i;
+
+	for (i = 0; i < ib_queue_size; i++)
+		nvme_rdma_free_qe(ibdev, &ring[i], capsule_size, dir);
+	kfree(ring);
+}
+
+static struct nvme_rdma_qe *nvme_rdma_alloc_ring(struct ib_device *ibdev,
+		size_t ib_queue_size, size_t capsule_size,
+		enum dma_data_direction dir)
+{
+	struct nvme_rdma_qe *ring;
+	int i;
+
+	ring = kcalloc(ib_queue_size, sizeof(struct nvme_rdma_qe), GFP_KERNEL);
+	if (!ring)
+		return NULL;
+
+	for (i = 0; i < ib_queue_size; i++) {
+		if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir))
+			goto out_free_ring;
+	}
+
+	return ring;
+
+out_free_ring:
+	nvme_rdma_free_ring(ibdev, ring, i, capsule_size, dir);
+	return NULL;
+}
+
+static void nvme_rdma_qp_event(struct ib_event *event, void *context)
+{
+	pr_debug("QP event %d\n", event->event);
+}
+
+static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
+{
+	wait_for_completion_interruptible_timeout(&queue->cm_done,
+			msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1);
+	return queue->cm_error;
+}
+
+static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
+{
+	struct nvme_rdma_device *dev = queue->device;
+	struct ib_qp_init_attr init_attr;
+	int ret;
+
+	memset(&init_attr, 0, sizeof(init_attr));
+	init_attr.event_handler = nvme_rdma_qp_event;
+	/* +1 for drain */
+	init_attr.cap.max_send_wr = factor * queue->queue_size + 1;
+	/* +1 for drain */
+	init_attr.cap.max_recv_wr = queue->queue_size + 1;
+	init_attr.cap.max_recv_sge = 1;
+	init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
+	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+	init_attr.qp_type = IB_QPT_RC;
+	init_attr.send_cq = queue->ib_cq;
+	init_attr.recv_cq = queue->ib_cq;
+
+	ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
+
+	queue->qp = queue->cm_id->qp;
+	return ret;
+}
+
+static int nvme_rdma_reinit_request(void *data, struct request *rq)
+{
+	struct nvme_rdma_ctrl *ctrl = data;
+	struct nvme_rdma_device *dev = ctrl->device;
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	int ret = 0;
+
+	if (!req->need_inval)
+		goto out;
+
+	ib_dereg_mr(req->mr);
+
+	req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
+			ctrl->max_fr_pages);
+	if (IS_ERR(req->mr)) {
+		ret = PTR_ERR(req->mr);
+		req->mr = NULL;
+	}
+
+	req->need_inval = false;
+
+out:
+	return ret;
+}
+
+static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
+		struct request *rq, unsigned int queue_idx)
+{
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
+	struct nvme_rdma_device *dev = queue->device;
+
+	if (req->mr)
+		ib_dereg_mr(req->mr);
+
+	nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
+			DMA_TO_DEVICE);
+}
+
+static void nvme_rdma_exit_request(void *data, struct request *rq,
+				unsigned int hctx_idx, unsigned int rq_idx)
+{
+	return __nvme_rdma_exit_request(data, rq, hctx_idx + 1);
+}
+
+static void nvme_rdma_exit_admin_request(void *data, struct request *rq,
+				unsigned int hctx_idx, unsigned int rq_idx)
+{
+	return __nvme_rdma_exit_request(data, rq, 0);
+}
+
+static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
+		struct request *rq, unsigned int queue_idx)
+{
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
+	struct nvme_rdma_device *dev = queue->device;
+	struct ib_device *ibdev = dev->dev;
+	int ret;
+
+	BUG_ON(queue_idx >= ctrl->queue_count);
+
+	ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
+			DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
+			ctrl->max_fr_pages);
+	if (IS_ERR(req->mr)) {
+		ret = PTR_ERR(req->mr);
+		goto out_free_qe;
+	}
+
+	req->queue = queue;
+
+	return 0;
+
+out_free_qe:
+	nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
+			DMA_TO_DEVICE);
+	return -ENOMEM;
+}
+
+static int nvme_rdma_init_request(void *data, struct request *rq,
+				unsigned int hctx_idx, unsigned int rq_idx,
+				unsigned int numa_node)
+{
+	return __nvme_rdma_init_request(data, rq, hctx_idx + 1);
+}
+
+static int nvme_rdma_init_admin_request(void *data, struct request *rq,
+				unsigned int hctx_idx, unsigned int rq_idx,
+				unsigned int numa_node)
+{
+	return __nvme_rdma_init_request(data, rq, 0);
+}
+
+static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+		unsigned int hctx_idx)
+{
+	struct nvme_rdma_ctrl *ctrl = data;
+	struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
+
+	BUG_ON(hctx_idx >= ctrl->queue_count);
+
+	hctx->driver_data = queue;
+	return 0;
+}
+
+static int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+		unsigned int hctx_idx)
+{
+	struct nvme_rdma_ctrl *ctrl = data;
+	struct nvme_rdma_queue *queue = &ctrl->queues[0];
+
+	BUG_ON(hctx_idx != 0);
+
+	hctx->driver_data = queue;
+	return 0;
+}
+
+static void nvme_rdma_free_dev(struct kref *ref)
+{
+	struct nvme_rdma_device *ndev =
+		container_of(ref, struct nvme_rdma_device, ref);
+
+	mutex_lock(&device_list_mutex);
+	list_del(&ndev->entry);
+	mutex_unlock(&device_list_mutex);
+
+	if (!register_always)
+		ib_dereg_mr(ndev->mr);
+	ib_dealloc_pd(ndev->pd);
+
+	kfree(ndev);
+}
+
+static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
+{
+	kref_put(&dev->ref, nvme_rdma_free_dev);
+}
+
+static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
+{
+	return kref_get_unless_zero(&dev->ref);
+}
+
+static struct nvme_rdma_device *
+nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
+{
+	struct nvme_rdma_device *ndev;
+
+	mutex_lock(&device_list_mutex);
+	list_for_each_entry(ndev, &device_list, entry) {
+		if (ndev->dev->node_guid == cm_id->device->node_guid &&
+		    nvme_rdma_dev_get(ndev))
+			goto out_unlock;
+	}
+
+	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
+	if (!ndev)
+		goto out_err;
+
+	ndev->dev = cm_id->device;
+	kref_init(&ndev->ref);
+
+	ndev->pd = ib_alloc_pd(ndev->dev);
+	if (IS_ERR(ndev->pd))
+		goto out_free_dev;
+
+	if (!register_always) {
+		ndev->mr = ib_get_dma_mr(ndev->pd,
+					    IB_ACCESS_LOCAL_WRITE |
+					    IB_ACCESS_REMOTE_READ |
+					    IB_ACCESS_REMOTE_WRITE);
+		if (IS_ERR(ndev->mr))
+			goto out_free_pd;
+	}
+
+	if (!(ndev->dev->attrs.device_cap_flags &
+	      IB_DEVICE_MEM_MGT_EXTENSIONS)) {
+		dev_err(&ndev->dev->dev,
+			"Memory registrations not supported.\n");
+		goto out_free_mr;
+	}
+
+	list_add(&ndev->entry, &device_list);
+out_unlock:
+	mutex_unlock(&device_list_mutex);
+	return ndev;
+
+out_free_mr:
+	if (!register_always)
+		ib_dereg_mr(ndev->mr);
+out_free_pd:
+	ib_dealloc_pd(ndev->pd);
+out_free_dev:
+	kfree(ndev);
+out_err:
+	mutex_unlock(&device_list_mutex);
+	return NULL;
+}
+
+static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
+{
+	struct nvme_rdma_device *dev = queue->device;
+	struct ib_device *ibdev = dev->dev;
+
+	rdma_destroy_qp(queue->cm_id);
+	ib_free_cq(queue->ib_cq);
+
+	nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
+			sizeof(struct nvme_completion), DMA_FROM_DEVICE);
+
+	nvme_rdma_dev_put(dev);
+}
+
+static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
+		struct nvme_rdma_device *dev)
+{
+	struct ib_device *ibdev = dev->dev;
+	const int send_wr_factor = 3;			/* MR, SEND, INV */
+	const int cq_factor = send_wr_factor + 1;	/* + RECV */
+	int comp_vector, idx = nvme_rdma_queue_idx(queue);
+
+	int ret;
+
+	queue->device = dev;
+
+	/*
+	 * The admin queue is barely used once the controller is live, so don't
+	 * bother to spread it out.
+	 */
+	if (idx == 0)
+		comp_vector = 0;
+	else
+		comp_vector = idx % ibdev->num_comp_vectors;
+
+
+	/* +1 for ib_stop_cq */
+	queue->ib_cq = ib_alloc_cq(dev->dev, queue,
+				cq_factor * queue->queue_size + 1, comp_vector,
+				IB_POLL_SOFTIRQ);
+	if (IS_ERR(queue->ib_cq)) {
+		ret = PTR_ERR(queue->ib_cq);
+		goto out;
+	}
+
+	ret = nvme_rdma_create_qp(queue, send_wr_factor);
+	if (ret)
+		goto out_destroy_ib_cq;
+
+	queue->rsp_ring = nvme_rdma_alloc_ring(ibdev, queue->queue_size,
+			sizeof(struct nvme_completion), DMA_FROM_DEVICE);
+	if (!queue->rsp_ring) {
+		ret = -ENOMEM;
+		goto out_destroy_qp;
+	}
+
+	return 0;
+
+out_destroy_qp:
+	ib_destroy_qp(queue->qp);
+out_destroy_ib_cq:
+	ib_free_cq(queue->ib_cq);
+out:
+	return ret;
+}
+
+static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
+		int idx, size_t queue_size)
+{
+	struct nvme_rdma_queue *queue;
+	int ret;
+
+	queue = &ctrl->queues[idx];
+	queue->ctrl = ctrl;
+	init_completion(&queue->cm_done);
+
+	if (idx > 0)
+		queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
+	else
+		queue->cmnd_capsule_len = sizeof(struct nvme_command);
+
+	queue->queue_size = queue_size;
+
+	queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
+			RDMA_PS_TCP, IB_QPT_RC);
+	if (IS_ERR(queue->cm_id)) {
+		dev_info(ctrl->ctrl.device,
+			"failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
+		return PTR_ERR(queue->cm_id);
+	}
+
+	queue->cm_error = -ETIMEDOUT;
+	ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr,
+			NVME_RDMA_CONNECT_TIMEOUT_MS);
+	if (ret) {
+		dev_info(ctrl->ctrl.device,
+			"rdma_resolve_addr failed (%d).\n", ret);
+		goto out_destroy_cm_id;
+	}
+
+	ret = nvme_rdma_wait_for_cm(queue);
+	if (ret) {
+		dev_info(ctrl->ctrl.device,
+			"rdma_resolve_addr wait failed (%d).\n", ret);
+		goto out_destroy_cm_id;
+	}
+
+	set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
+
+	return 0;
+
+out_destroy_cm_id:
+	rdma_destroy_id(queue->cm_id);
+	return ret;
+}
+
+static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
+{
+	rdma_disconnect(queue->cm_id);
+	ib_drain_qp(queue->qp);
+}
+
+static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
+{
+	nvme_rdma_destroy_queue_ib(queue);
+	rdma_destroy_id(queue->cm_id);
+}
+
+static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue)
+{
+	if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
+		return;
+	nvme_rdma_stop_queue(queue);
+	nvme_rdma_free_queue(queue);
+}
+
+static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
+{
+	int i;
+
+	for (i = 1; i < ctrl->queue_count; i++)
+		nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
+}
+
+static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
+{
+	int i, ret = 0;
+
+	for (i = 1; i < ctrl->queue_count; i++) {
+		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
+{
+	int i, ret;
+
+	for (i = 1; i < ctrl->queue_count; i++) {
+		ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.sqsize);
+		if (ret) {
+			dev_info(ctrl->ctrl.device,
+				"failed to initialize i/o queue: %d\n", ret);
+			goto out_free_queues;
+		}
+	}
+
+	return 0;
+
+out_free_queues:
+	for (; i >= 1; i--)
+		nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
+
+	return ret;
+}
+
+static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
+{
+	nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe,
+			sizeof(struct nvme_command), DMA_TO_DEVICE);
+	nvme_rdma_stop_and_free_queue(&ctrl->queues[0]);
+	blk_cleanup_queue(ctrl->ctrl.admin_q);
+	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+	nvme_rdma_dev_put(ctrl->device);
+}
+
+static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+	if (list_empty(&ctrl->list))
+		goto free_ctrl;
+
+	mutex_lock(&nvme_rdma_ctrl_mutex);
+	list_del(&ctrl->list);
+	mutex_unlock(&nvme_rdma_ctrl_mutex);
+
+	if (ctrl->ctrl.tagset) {
+		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_free_tag_set(&ctrl->tag_set);
+		nvme_rdma_dev_put(ctrl->device);
+	}
+	kfree(ctrl->queues);
+	nvmf_free_options(nctrl->opts);
+free_ctrl:
+	kfree(ctrl);
+}
+
+static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
+{
+	struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
+			struct nvme_rdma_ctrl, reconnect_work);
+	bool changed;
+	int ret;
+
+	if (ctrl->queue_count > 1) {
+		nvme_rdma_free_io_queues(ctrl);
+
+		ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+		if (ret)
+			goto requeue;
+	}
+
+	nvme_rdma_stop_and_free_queue(&ctrl->queues[0]);
+
+	ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set);
+	if (ret)
+		goto requeue;
+
+	ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+	if (ret)
+		goto requeue;
+
+	blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+
+	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
+	if (ret)
+		goto stop_admin_q;
+
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	if (ret)
+		goto stop_admin_q;
+
+	nvme_start_keep_alive(&ctrl->ctrl);
+
+	if (ctrl->queue_count > 1) {
+		ret = nvme_rdma_init_io_queues(ctrl);
+		if (ret)
+			goto stop_admin_q;
+
+		ret = nvme_rdma_connect_io_queues(ctrl);
+		if (ret)
+			goto stop_admin_q;
+	}
+
+	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	WARN_ON_ONCE(!changed);
+
+	if (ctrl->queue_count > 1)
+		nvme_start_queues(&ctrl->ctrl);
+
+	dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
+
+	return;
+
+stop_admin_q:
+	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+requeue:
+	/* Make sure we are not resetting/deleting */
+	if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) {
+		dev_info(ctrl->ctrl.device,
+			"Failed reconnect attempt, requeueing...\n");
+		queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+					ctrl->reconnect_delay * HZ);
+	}
+}
+
+static void nvme_rdma_error_recovery_work(struct work_struct *work)
+{
+	struct nvme_rdma_ctrl *ctrl = container_of(work,
+			struct nvme_rdma_ctrl, err_work);
+
+	nvme_stop_keep_alive(&ctrl->ctrl);
+	if (ctrl->queue_count > 1)
+		nvme_stop_queues(&ctrl->ctrl);
+	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+
+	/* We must take care of fastfail/requeue all our inflight requests */
+	if (ctrl->queue_count > 1)
+		blk_mq_tagset_busy_iter(&ctrl->tag_set,
+					nvme_cancel_request, &ctrl->ctrl);
+	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
+				nvme_cancel_request, &ctrl->ctrl);
+
+	dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n",
+		ctrl->reconnect_delay);
+
+	queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+				ctrl->reconnect_delay * HZ);
+}
+
+static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
+{
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
+		return;
+
+	queue_work(nvme_rdma_wq, &ctrl->err_work);
+}
+
+static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
+		const char *op)
+{
+	struct nvme_rdma_queue *queue = cq->cq_context;
+	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
+
+	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+		dev_info(ctrl->ctrl.device,
+			     "%s for CQE 0x%p failed with status %s (%d)\n",
+			     op, wc->wr_cqe,
+			     ib_wc_status_msg(wc->status), wc->status);
+	nvme_rdma_error_recovery(ctrl);
+}
+
+static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	if (unlikely(wc->status != IB_WC_SUCCESS))
+		nvme_rdma_wr_error(cq, wc, "MEMREG");
+}
+
+static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	if (unlikely(wc->status != IB_WC_SUCCESS))
+		nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
+}
+
+static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
+		struct nvme_rdma_request *req)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_send_wr wr = {
+		.opcode		    = IB_WR_LOCAL_INV,
+		.next		    = NULL,
+		.num_sge	    = 0,
+		.send_flags	    = 0,
+		.ex.invalidate_rkey = req->mr->rkey,
+	};
+
+	req->reg_cqe.done = nvme_rdma_inv_rkey_done;
+	wr.wr_cqe = &req->reg_cqe;
+
+	return ib_post_send(queue->qp, &wr, &bad_wr);
+}
+
+static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
+		struct request *rq)
+{
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
+	struct nvme_rdma_device *dev = queue->device;
+	struct ib_device *ibdev = dev->dev;
+	int res;
+
+	if (!blk_rq_bytes(rq))
+		return;
+
+	if (req->need_inval) {
+		res = nvme_rdma_inv_rkey(queue, req);
+		if (res < 0) {
+			dev_err(ctrl->ctrl.device,
+				"Queueing INV WR for rkey %#x failed (%d)\n",
+				req->mr->rkey, res);
+			nvme_rdma_error_recovery(queue->ctrl);
+		}
+	}
+
+	ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
+			req->nents, rq_data_dir(rq) ==
+				    WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+	nvme_cleanup_cmd(rq);
+	sg_free_table_chained(&req->sg_table, true);
+}
+
+static int nvme_rdma_set_sg_null(struct nvme_command *c)
+{
+	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+
+	sg->addr = 0;
+	put_unaligned_le24(0, sg->length);
+	put_unaligned_le32(0, sg->key);
+	sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
+	return 0;
+}
+
+static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
+		struct nvme_rdma_request *req, struct nvme_command *c)
+{
+	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+
+	req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
+	req->sge[1].length = sg_dma_len(req->sg_table.sgl);
+	req->sge[1].lkey = queue->device->pd->local_dma_lkey;
+
+	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
+	sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
+	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
+
+	req->inline_data = true;
+	req->num_sge++;
+	return 0;
+}
+
+static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
+		struct nvme_rdma_request *req, struct nvme_command *c)
+{
+	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+
+	sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl));
+	put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length);
+	put_unaligned_le32(queue->device->mr->rkey, sg->key);
+	sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
+	return 0;
+}
+
+static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
+		struct nvme_rdma_request *req, struct nvme_command *c,
+		int count)
+{
+	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+	int nr;
+
+	nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, PAGE_SIZE);
+	if (nr < count) {
+		if (nr < 0)
+			return nr;
+		return -EINVAL;
+	}
+
+	ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
+
+	req->reg_cqe.done = nvme_rdma_memreg_done;
+	memset(&req->reg_wr, 0, sizeof(req->reg_wr));
+	req->reg_wr.wr.opcode = IB_WR_REG_MR;
+	req->reg_wr.wr.wr_cqe = &req->reg_cqe;
+	req->reg_wr.wr.num_sge = 0;
+	req->reg_wr.mr = req->mr;
+	req->reg_wr.key = req->mr->rkey;
+	req->reg_wr.access = IB_ACCESS_LOCAL_WRITE |
+			     IB_ACCESS_REMOTE_READ |
+			     IB_ACCESS_REMOTE_WRITE;
+
+	req->need_inval = true;
+
+	sg->addr = cpu_to_le64(req->mr->iova);
+	put_unaligned_le24(req->mr->length, sg->length);
+	put_unaligned_le32(req->mr->rkey, sg->key);
+	sg->type = (NVME_KEY_SGL_FMT_DATA_DESC << 4) |
+			NVME_SGL_FMT_INVALIDATE;
+
+	return 0;
+}
+
+static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
+		struct request *rq, unsigned int map_len,
+		struct nvme_command *c)
+{
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_rdma_device *dev = queue->device;
+	struct ib_device *ibdev = dev->dev;
+	int nents, count;
+	int ret;
+
+	req->num_sge = 1;
+	req->inline_data = false;
+	req->need_inval = false;
+
+	c->common.flags |= NVME_CMD_SGL_METABUF;
+
+	if (!blk_rq_bytes(rq))
+		return nvme_rdma_set_sg_null(c);
+
+	req->sg_table.sgl = req->first_sgl;
+	ret = sg_alloc_table_chained(&req->sg_table, rq->nr_phys_segments,
+				req->sg_table.sgl);
+	if (ret)
+		return -ENOMEM;
+
+	nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
+	BUG_ON(nents > rq->nr_phys_segments);
+	req->nents = nents;
+
+	count = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents,
+		    rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	if (unlikely(count <= 0)) {
+		sg_free_table_chained(&req->sg_table, true);
+		return -EIO;
+	}
+
+	if (count == 1) {
+		if (rq_data_dir(rq) == WRITE &&
+		    map_len <= nvme_rdma_inline_data_size(queue) &&
+		    nvme_rdma_queue_idx(queue))
+			return nvme_rdma_map_sg_inline(queue, req, c);
+
+		if (!register_always)
+			return nvme_rdma_map_sg_single(queue, req, c);
+	}
+
+	return nvme_rdma_map_sg_fr(queue, req, c, count);
+}
+
+static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	if (unlikely(wc->status != IB_WC_SUCCESS))
+		nvme_rdma_wr_error(cq, wc, "SEND");
+}
+
+static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
+		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
+		struct ib_send_wr *first, bool flush)
+{
+	struct ib_send_wr wr, *bad_wr;
+	int ret;
+
+	sge->addr   = qe->dma;
+	sge->length = sizeof(struct nvme_command),
+	sge->lkey   = queue->device->pd->local_dma_lkey;
+
+	qe->cqe.done = nvme_rdma_send_done;
+
+	wr.next       = NULL;
+	wr.wr_cqe     = &qe->cqe;
+	wr.sg_list    = sge;
+	wr.num_sge    = num_sge;
+	wr.opcode     = IB_WR_SEND;
+	wr.send_flags = 0;
+
+	/*
+	 * Unsignalled send completions are another giant desaster in the
+	 * IB Verbs spec:  If we don't regularly post signalled sends
+	 * the send queue will fill up and only a QP reset will rescue us.
+	 * Would have been way to obvious to handle this in hardware or
+	 * at least the RDMA stack..
+	 *
+	 * This messy and racy code sniplet is copy and pasted from the iSER
+	 * initiator, and the magic '32' comes from there as well.
+	 *
+	 * Always signal the flushes. The magic request used for the flush
+	 * sequencer is not allocated in our driver's tagset and it's
+	 * triggered to be freed by blk_cleanup_queue(). So we need to
+	 * always mark it as signaled to ensure that the "wr_cqe", which is
+	 * embeded in request's payload, is not freed when __ib_process_cq()
+	 * calls wr_cqe->done().
+	 */
+	if ((++queue->sig_count % 32) == 0 || flush)
+		wr.send_flags |= IB_SEND_SIGNALED;
+
+	if (first)
+		first->next = &wr;
+	else
+		first = &wr;
+
+	ret = ib_post_send(queue->qp, first, &bad_wr);
+	if (ret) {
+		dev_err(queue->ctrl->ctrl.device,
+			     "%s failed with error code %d\n", __func__, ret);
+	}
+	return ret;
+}
+
+static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
+		struct nvme_rdma_qe *qe)
+{
+	struct ib_recv_wr wr, *bad_wr;
+	struct ib_sge list;
+	int ret;
+
+	list.addr   = qe->dma;
+	list.length = sizeof(struct nvme_completion);
+	list.lkey   = queue->device->pd->local_dma_lkey;
+
+	qe->cqe.done = nvme_rdma_recv_done;
+
+	wr.next     = NULL;
+	wr.wr_cqe   = &qe->cqe;
+	wr.sg_list  = &list;
+	wr.num_sge  = 1;
+
+	ret = ib_post_recv(queue->qp, &wr, &bad_wr);
+	if (ret) {
+		dev_err(queue->ctrl->ctrl.device,
+			"%s failed with error code %d\n", __func__, ret);
+	}
+	return ret;
+}
+
+static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
+{
+	u32 queue_idx = nvme_rdma_queue_idx(queue);
+
+	if (queue_idx == 0)
+		return queue->ctrl->admin_tag_set.tags[queue_idx];
+	return queue->ctrl->tag_set.tags[queue_idx - 1];
+}
+
+static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
+	struct nvme_rdma_queue *queue = &ctrl->queues[0];
+	struct ib_device *dev = queue->device->dev;
+	struct nvme_rdma_qe *sqe = &ctrl->async_event_sqe;
+	struct nvme_command *cmd = sqe->data;
+	struct ib_sge sge;
+	int ret;
+
+	if (WARN_ON_ONCE(aer_idx != 0))
+		return;
+
+	ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE);
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->common.opcode = nvme_admin_async_event;
+	cmd->common.command_id = NVME_RDMA_AQ_BLKMQ_DEPTH;
+	cmd->common.flags |= NVME_CMD_SGL_METABUF;
+	nvme_rdma_set_sg_null(cmd);
+
+	ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
+			DMA_TO_DEVICE);
+
+	ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false);
+	WARN_ON_ONCE(ret);
+}
+
+static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
+		struct nvme_completion *cqe, struct ib_wc *wc, int tag)
+{
+	u16 status = le16_to_cpu(cqe->status);
+	struct request *rq;
+	struct nvme_rdma_request *req;
+	int ret = 0;
+
+	status >>= 1;
+
+	rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
+	if (!rq) {
+		dev_err(queue->ctrl->ctrl.device,
+			"tag 0x%x on QP %#x not found\n",
+			cqe->command_id, queue->qp->qp_num);
+		nvme_rdma_error_recovery(queue->ctrl);
+		return ret;
+	}
+	req = blk_mq_rq_to_pdu(rq);
+
+	if (rq->cmd_type == REQ_TYPE_DRV_PRIV && rq->special)
+		memcpy(rq->special, cqe, sizeof(*cqe));
+
+	if (rq->tag == tag)
+		ret = 1;
+
+	if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
+	    wc->ex.invalidate_rkey == req->mr->rkey)
+		req->need_inval = false;
+
+	blk_mq_complete_request(rq, status);
+
+	return ret;
+}
+
+static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
+{
+	struct nvme_rdma_qe *qe =
+		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
+	struct nvme_rdma_queue *queue = cq->cq_context;
+	struct ib_device *ibdev = queue->device->dev;
+	struct nvme_completion *cqe = qe->data;
+	const size_t len = sizeof(struct nvme_completion);
+	int ret = 0;
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		nvme_rdma_wr_error(cq, wc, "RECV");
+		return 0;
+	}
+
+	ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
+	/*
+	 * AEN requests are special as they don't time out and can
+	 * survive any kind of queue freeze and often don't respond to
+	 * aborts.  We don't even bother to allocate a struct request
+	 * for them but rather special case them here.
+	 */
+	if (unlikely(nvme_rdma_queue_idx(queue) == 0 &&
+			cqe->command_id >= NVME_RDMA_AQ_BLKMQ_DEPTH))
+		nvme_complete_async_event(&queue->ctrl->ctrl, cqe);
+	else
+		ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag);
+	ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
+
+	nvme_rdma_post_recv(queue, qe);
+	return ret;
+}
+
+static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	__nvme_rdma_recv_done(cq, wc, -1);
+}
+
+static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
+{
+	int ret, i;
+
+	for (i = 0; i < queue->queue_size; i++) {
+		ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]);
+		if (ret)
+			goto out_destroy_queue_ib;
+	}
+
+	return 0;
+
+out_destroy_queue_ib:
+	nvme_rdma_destroy_queue_ib(queue);
+	return ret;
+}
+
+static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
+		struct rdma_cm_event *ev)
+{
+	if (ev->param.conn.private_data_len) {
+		struct nvme_rdma_cm_rej *rej =
+			(struct nvme_rdma_cm_rej *)ev->param.conn.private_data;
+
+		dev_err(queue->ctrl->ctrl.device,
+			"Connect rejected, status %d.", le16_to_cpu(rej->sts));
+		/* XXX: Think of something clever to do here... */
+	} else {
+		dev_err(queue->ctrl->ctrl.device,
+			"Connect rejected, no private data.\n");
+	}
+
+	return -ECONNRESET;
+}
+
+static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
+{
+	struct nvme_rdma_device *dev;
+	int ret;
+
+	dev = nvme_rdma_find_get_device(queue->cm_id);
+	if (!dev) {
+		dev_err(queue->cm_id->device->dma_device,
+			"no client data found!\n");
+		return -ECONNREFUSED;
+	}
+
+	ret = nvme_rdma_create_queue_ib(queue, dev);
+	if (ret) {
+		nvme_rdma_dev_put(dev);
+		goto out;
+	}
+
+	ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
+	if (ret) {
+		dev_err(queue->ctrl->ctrl.device,
+			"rdma_resolve_route failed (%d).\n",
+			queue->cm_error);
+		goto out_destroy_queue;
+	}
+
+	return 0;
+
+out_destroy_queue:
+	nvme_rdma_destroy_queue_ib(queue);
+out:
+	return ret;
+}
+
+static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
+{
+	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
+	struct rdma_conn_param param = { };
+	struct nvme_rdma_cm_req priv;
+	int ret;
+
+	param.qp_num = queue->qp->qp_num;
+	param.flow_control = 1;
+
+	param.responder_resources = queue->device->dev->attrs.max_qp_rd_atom;
+	/* maximum retry count */
+	param.retry_count = 7;
+	param.rnr_retry_count = 7;
+	param.private_data = &priv;
+	param.private_data_len = sizeof(priv);
+
+	priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
+	priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue));
+	priv.hrqsize = cpu_to_le16(queue->queue_size);
+	priv.hsqsize = cpu_to_le16(queue->queue_size);
+
+	ret = rdma_connect(queue->cm_id, &param);
+	if (ret) {
+		dev_err(ctrl->ctrl.device,
+			"rdma_connect failed (%d).\n", ret);
+		goto out_destroy_queue_ib;
+	}
+
+	return 0;
+
+out_destroy_queue_ib:
+	nvme_rdma_destroy_queue_ib(queue);
+	return ret;
+}
+
+/**
+ * nvme_rdma_device_unplug() - Handle RDMA device unplug
+ * @queue:      Queue that owns the cm_id that caught the event
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * In our case, the RDMA resources are managed per controller and not
+ * only per queue. So the way we handle this is we trigger an implicit
+ * controller deletion upon the first DEVICE_REMOVAL event we see, and
+ * hold the event inflight until the controller deletion is completed.
+ *
+ * One exception that we need to handle is the destruction of the cm_id
+ * that caught the event. Since we hold the callout until the controller
+ * deletion is completed, we'll deadlock if the controller deletion will
+ * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
+ * of destroying this queue before-hand, destroy the queue resources
+ * after the controller deletion completed with the exception of destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
+{
+	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
+	int ret, ctrl_deleted = 0;
+
+	/* First disable the queue so ctrl delete won't free it */
+	if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
+		goto out;
+
+	/* delete the controller */
+	ret = __nvme_rdma_del_ctrl(ctrl);
+	if (!ret) {
+		dev_warn(ctrl->ctrl.device,
+			"Got rdma device removal event, deleting ctrl\n");
+		flush_work(&ctrl->delete_work);
+
+		/* Return non-zero so the cm_id will destroy implicitly */
+		ctrl_deleted = 1;
+
+		/* Free this queue ourselves */
+		rdma_disconnect(queue->cm_id);
+		ib_drain_qp(queue->qp);
+		nvme_rdma_destroy_queue_ib(queue);
+	}
+
+out:
+	return ctrl_deleted;
+}
+
+static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
+		struct rdma_cm_event *ev)
+{
+	struct nvme_rdma_queue *queue = cm_id->context;
+	int cm_error = 0;
+
+	dev_dbg(queue->ctrl->ctrl.device, "%s (%d): status %d id %p\n",
+		rdma_event_msg(ev->event), ev->event,
+		ev->status, cm_id);
+
+	switch (ev->event) {
+	case RDMA_CM_EVENT_ADDR_RESOLVED:
+		cm_error = nvme_rdma_addr_resolved(queue);
+		break;
+	case RDMA_CM_EVENT_ROUTE_RESOLVED:
+		cm_error = nvme_rdma_route_resolved(queue);
+		break;
+	case RDMA_CM_EVENT_ESTABLISHED:
+		queue->cm_error = nvme_rdma_conn_established(queue);
+		/* complete cm_done regardless of success/failure */
+		complete(&queue->cm_done);
+		return 0;
+	case RDMA_CM_EVENT_REJECTED:
+		cm_error = nvme_rdma_conn_rejected(queue, ev);
+		break;
+	case RDMA_CM_EVENT_ADDR_ERROR:
+	case RDMA_CM_EVENT_ROUTE_ERROR:
+	case RDMA_CM_EVENT_CONNECT_ERROR:
+	case RDMA_CM_EVENT_UNREACHABLE:
+		dev_dbg(queue->ctrl->ctrl.device,
+			"CM error event %d\n", ev->event);
+		cm_error = -ECONNRESET;
+		break;
+	case RDMA_CM_EVENT_DISCONNECTED:
+	case RDMA_CM_EVENT_ADDR_CHANGE:
+	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+		dev_dbg(queue->ctrl->ctrl.device,
+			"disconnect received - connection closed\n");
+		nvme_rdma_error_recovery(queue->ctrl);
+		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		/* return 1 means impliciy CM ID destroy */
+		return nvme_rdma_device_unplug(queue);
+	default:
+		dev_err(queue->ctrl->ctrl.device,
+			"Unexpected RDMA CM event (%d)\n", ev->event);
+		nvme_rdma_error_recovery(queue->ctrl);
+		break;
+	}
+
+	if (cm_error) {
+		queue->cm_error = cm_error;
+		complete(&queue->cm_done);
+	}
+
+	return 0;
+}
+
+static enum blk_eh_timer_return
+nvme_rdma_timeout(struct request *rq, bool reserved)
+{
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+
+	/* queue error recovery */
+	nvme_rdma_error_recovery(req->queue->ctrl);
+
+	/* fail with DNR on cmd timeout */
+	rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR;
+
+	return BLK_EH_HANDLED;
+}
+
+static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+		const struct blk_mq_queue_data *bd)
+{
+	struct nvme_ns *ns = hctx->queue->queuedata;
+	struct nvme_rdma_queue *queue = hctx->driver_data;
+	struct request *rq = bd->rq;
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_rdma_qe *sqe = &req->sqe;
+	struct nvme_command *c = sqe->data;
+	bool flush = false;
+	struct ib_device *dev;
+	unsigned int map_len;
+	int ret;
+
+	WARN_ON_ONCE(rq->tag < 0);
+
+	dev = queue->device->dev;
+	ib_dma_sync_single_for_cpu(dev, sqe->dma,
+			sizeof(struct nvme_command), DMA_TO_DEVICE);
+
+	ret = nvme_setup_cmd(ns, rq, c);
+	if (ret)
+		return ret;
+
+	c->common.command_id = rq->tag;
+	blk_mq_start_request(rq);
+
+	map_len = nvme_map_len(rq);
+	ret = nvme_rdma_map_data(queue, rq, map_len, c);
+	if (ret < 0) {
+		dev_err(queue->ctrl->ctrl.device,
+			     "Failed to map data (%d)\n", ret);
+		nvme_cleanup_cmd(rq);
+		goto err;
+	}
+
+	ib_dma_sync_single_for_device(dev, sqe->dma,
+			sizeof(struct nvme_command), DMA_TO_DEVICE);
+
+	if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH)
+		flush = true;
+	ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+			req->need_inval ? &req->reg_wr.wr : NULL, flush);
+	if (ret) {
+		nvme_rdma_unmap_data(queue, rq);
+		goto err;
+	}
+
+	return BLK_MQ_RQ_QUEUE_OK;
+err:
+	return (ret == -ENOMEM || ret == -EAGAIN) ?
+		BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+}
+
+static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+{
+	struct nvme_rdma_queue *queue = hctx->driver_data;
+	struct ib_cq *cq = queue->ib_cq;
+	struct ib_wc wc;
+	int found = 0;
+
+	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+	while (ib_poll_cq(cq, 1, &wc) > 0) {
+		struct ib_cqe *cqe = wc.wr_cqe;
+
+		if (cqe) {
+			if (cqe->done == nvme_rdma_recv_done)
+				found |= __nvme_rdma_recv_done(cq, &wc, tag);
+			else
+				cqe->done(cq, &wc);
+		}
+	}
+
+	return found;
+}
+
+static void nvme_rdma_complete_rq(struct request *rq)
+{
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_rdma_queue *queue = req->queue;
+	int error = 0;
+
+	nvme_rdma_unmap_data(queue, rq);
+
+	if (unlikely(rq->errors)) {
+		if (nvme_req_needs_retry(rq, rq->errors)) {
+			nvme_requeue_req(rq);
+			return;
+		}
+
+		if (rq->cmd_type == REQ_TYPE_DRV_PRIV)
+			error = rq->errors;
+		else
+			error = nvme_error_status(rq->errors);
+	}
+
+	blk_mq_end_request(rq, error);
+}
+
+static struct blk_mq_ops nvme_rdma_mq_ops = {
+	.queue_rq	= nvme_rdma_queue_rq,
+	.complete	= nvme_rdma_complete_rq,
+	.map_queue	= blk_mq_map_queue,
+	.init_request	= nvme_rdma_init_request,
+	.exit_request	= nvme_rdma_exit_request,
+	.reinit_request	= nvme_rdma_reinit_request,
+	.init_hctx	= nvme_rdma_init_hctx,
+	.poll		= nvme_rdma_poll,
+	.timeout	= nvme_rdma_timeout,
+};
+
+static struct blk_mq_ops nvme_rdma_admin_mq_ops = {
+	.queue_rq	= nvme_rdma_queue_rq,
+	.complete	= nvme_rdma_complete_rq,
+	.map_queue	= blk_mq_map_queue,
+	.init_request	= nvme_rdma_init_admin_request,
+	.exit_request	= nvme_rdma_exit_admin_request,
+	.reinit_request	= nvme_rdma_reinit_request,
+	.init_hctx	= nvme_rdma_init_admin_hctx,
+	.timeout	= nvme_rdma_timeout,
+};
+
+static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
+{
+	int error;
+
+	error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+	if (error)
+		return error;
+
+	ctrl->device = ctrl->queues[0].device;
+
+	/*
+	 * We need a reference on the device as long as the tag_set is alive,
+	 * as the MRs in the request structures need a valid ib_device.
+	 */
+	error = -EINVAL;
+	if (!nvme_rdma_dev_get(ctrl->device))
+		goto out_free_queue;
+
+	ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
+		ctrl->device->dev->attrs.max_fast_reg_page_list_len);
+
+	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
+	ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops;
+	ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
+	ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
+	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
+		SG_CHUNK_SIZE * sizeof(struct scatterlist);
+	ctrl->admin_tag_set.driver_data = ctrl;
+	ctrl->admin_tag_set.nr_hw_queues = 1;
+	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+
+	error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
+	if (error)
+		goto out_put_dev;
+
+	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+	if (IS_ERR(ctrl->ctrl.admin_q)) {
+		error = PTR_ERR(ctrl->ctrl.admin_q);
+		goto out_free_tagset;
+	}
+
+	error = nvmf_connect_admin_queue(&ctrl->ctrl);
+	if (error)
+		goto out_cleanup_queue;
+
+	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	if (error) {
+		dev_err(ctrl->ctrl.device,
+			"prop_get NVME_REG_CAP failed\n");
+		goto out_cleanup_queue;
+	}
+
+	ctrl->ctrl.sqsize =
+		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+
+	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	if (error)
+		goto out_cleanup_queue;
+
+	ctrl->ctrl.max_hw_sectors =
+		(ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9);
+
+	error = nvme_init_identify(&ctrl->ctrl);
+	if (error)
+		goto out_cleanup_queue;
+
+	error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
+			&ctrl->async_event_sqe, sizeof(struct nvme_command),
+			DMA_TO_DEVICE);
+	if (error)
+		goto out_cleanup_queue;
+
+	nvme_start_keep_alive(&ctrl->ctrl);
+
+	return 0;
+
+out_cleanup_queue:
+	blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_free_tagset:
+	/* disconnect and drain the queue before freeing the tagset */
+	nvme_rdma_stop_queue(&ctrl->queues[0]);
+	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+out_put_dev:
+	nvme_rdma_dev_put(ctrl->device);
+out_free_queue:
+	nvme_rdma_free_queue(&ctrl->queues[0]);
+	return error;
+}
+
+static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
+{
+	nvme_stop_keep_alive(&ctrl->ctrl);
+	cancel_work_sync(&ctrl->err_work);
+	cancel_delayed_work_sync(&ctrl->reconnect_work);
+
+	if (ctrl->queue_count > 1) {
+		nvme_stop_queues(&ctrl->ctrl);
+		blk_mq_tagset_busy_iter(&ctrl->tag_set,
+					nvme_cancel_request, &ctrl->ctrl);
+		nvme_rdma_free_io_queues(ctrl);
+	}
+
+	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+		nvme_shutdown_ctrl(&ctrl->ctrl);
+
+	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
+				nvme_cancel_request, &ctrl->ctrl);
+	nvme_rdma_destroy_admin_queue(ctrl);
+}
+
+static void nvme_rdma_del_ctrl_work(struct work_struct *work)
+{
+	struct nvme_rdma_ctrl *ctrl = container_of(work,
+				struct nvme_rdma_ctrl, delete_work);
+
+	nvme_remove_namespaces(&ctrl->ctrl);
+	nvme_rdma_shutdown_ctrl(ctrl);
+	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_put_ctrl(&ctrl->ctrl);
+}
+
+static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
+{
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
+		return -EBUSY;
+
+	if (!queue_work(nvme_rdma_wq, &ctrl->delete_work))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+	int ret;
+
+	ret = __nvme_rdma_del_ctrl(ctrl);
+	if (ret)
+		return ret;
+
+	flush_work(&ctrl->delete_work);
+
+	return 0;
+}
+
+static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
+{
+	struct nvme_rdma_ctrl *ctrl = container_of(work,
+				struct nvme_rdma_ctrl, delete_work);
+
+	nvme_remove_namespaces(&ctrl->ctrl);
+	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_put_ctrl(&ctrl->ctrl);
+}
+
+static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
+{
+	struct nvme_rdma_ctrl *ctrl = container_of(work,
+					struct nvme_rdma_ctrl, reset_work);
+	int ret;
+	bool changed;
+
+	nvme_rdma_shutdown_ctrl(ctrl);
+
+	ret = nvme_rdma_configure_admin_queue(ctrl);
+	if (ret) {
+		/* ctrl is already shutdown, just remove the ctrl */
+		INIT_WORK(&ctrl->delete_work, nvme_rdma_remove_ctrl_work);
+		goto del_dead_ctrl;
+	}
+
+	if (ctrl->queue_count > 1) {
+		ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+		if (ret)
+			goto del_dead_ctrl;
+
+		ret = nvme_rdma_init_io_queues(ctrl);
+		if (ret)
+			goto del_dead_ctrl;
+
+		ret = nvme_rdma_connect_io_queues(ctrl);
+		if (ret)
+			goto del_dead_ctrl;
+	}
+
+	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	WARN_ON_ONCE(!changed);
+
+	if (ctrl->queue_count > 1) {
+		nvme_start_queues(&ctrl->ctrl);
+		nvme_queue_scan(&ctrl->ctrl);
+	}
+
+	return;
+
+del_dead_ctrl:
+	/* Deleting this dead controller... */
+	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
+	WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work));
+}
+
+static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
+		return -EBUSY;
+
+	if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
+		return -EBUSY;
+
+	flush_work(&ctrl->reset_work);
+
+	return 0;
+}
+
+static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
+	.name			= "rdma",
+	.module			= THIS_MODULE,
+	.is_fabrics		= true,
+	.reg_read32		= nvmf_reg_read32,
+	.reg_read64		= nvmf_reg_read64,
+	.reg_write32		= nvmf_reg_write32,
+	.reset_ctrl		= nvme_rdma_reset_ctrl,
+	.free_ctrl		= nvme_rdma_free_ctrl,
+	.submit_async_event	= nvme_rdma_submit_async_event,
+	.delete_ctrl		= nvme_rdma_del_ctrl,
+	.get_subsysnqn		= nvmf_get_subsysnqn,
+	.get_address		= nvmf_get_address,
+};
+
+static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
+{
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	int ret;
+
+	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	if (ret)
+		return ret;
+
+	ctrl->queue_count = opts->nr_io_queues + 1;
+	if (ctrl->queue_count < 2)
+		return 0;
+
+	dev_info(ctrl->ctrl.device,
+		"creating %d I/O queues.\n", opts->nr_io_queues);
+
+	ret = nvme_rdma_init_io_queues(ctrl);
+	if (ret)
+		return ret;
+
+	/*
+	 * We need a reference on the device as long as the tag_set is alive,
+	 * as the MRs in the request structures need a valid ib_device.
+	 */
+	ret = -EINVAL;
+	if (!nvme_rdma_dev_get(ctrl->device))
+		goto out_free_io_queues;
+
+	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
+	ctrl->tag_set.ops = &nvme_rdma_mq_ops;
+	ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
+	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
+	ctrl->tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+	ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
+		SG_CHUNK_SIZE * sizeof(struct scatterlist);
+	ctrl->tag_set.driver_data = ctrl;
+	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
+
+	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
+	if (ret)
+		goto out_put_dev;
+	ctrl->ctrl.tagset = &ctrl->tag_set;
+
+	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
+	if (IS_ERR(ctrl->ctrl.connect_q)) {
+		ret = PTR_ERR(ctrl->ctrl.connect_q);
+		goto out_free_tag_set;
+	}
+
+	ret = nvme_rdma_connect_io_queues(ctrl);
+	if (ret)
+		goto out_cleanup_connect_q;
+
+	return 0;
+
+out_cleanup_connect_q:
+	blk_cleanup_queue(ctrl->ctrl.connect_q);
+out_free_tag_set:
+	blk_mq_free_tag_set(&ctrl->tag_set);
+out_put_dev:
+	nvme_rdma_dev_put(ctrl->device);
+out_free_io_queues:
+	nvme_rdma_free_io_queues(ctrl);
+	return ret;
+}
+
+static int nvme_rdma_parse_ipaddr(struct sockaddr_in *in_addr, char *p)
+{
+	u8 *addr = (u8 *)&in_addr->sin_addr.s_addr;
+	size_t buflen = strlen(p);
+
+	/* XXX: handle IPv6 addresses */
+
+	if (buflen > INET_ADDRSTRLEN)
+		return -EINVAL;
+	if (in4_pton(p, buflen, addr, '\0', NULL) == 0)
+		return -EINVAL;
+	in_addr->sin_family = AF_INET;
+	return 0;
+}
+
+static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+		struct nvmf_ctrl_options *opts)
+{
+	struct nvme_rdma_ctrl *ctrl;
+	int ret;
+	bool changed;
+
+	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		return ERR_PTR(-ENOMEM);
+	ctrl->ctrl.opts = opts;
+	INIT_LIST_HEAD(&ctrl->list);
+
+	ret = nvme_rdma_parse_ipaddr(&ctrl->addr_in, opts->traddr);
+	if (ret) {
+		pr_err("malformed IP address passed: %s\n", opts->traddr);
+		goto out_free_ctrl;
+	}
+
+	if (opts->mask & NVMF_OPT_TRSVCID) {
+		u16 port;
+
+		ret = kstrtou16(opts->trsvcid, 0, &port);
+		if (ret)
+			goto out_free_ctrl;
+
+		ctrl->addr_in.sin_port = cpu_to_be16(port);
+	} else {
+		ctrl->addr_in.sin_port = cpu_to_be16(NVME_RDMA_IP_PORT);
+	}
+
+	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
+				0 /* no quirks, we're perfect! */);
+	if (ret)
+		goto out_free_ctrl;
+
+	ctrl->reconnect_delay = opts->reconnect_delay;
+	INIT_DELAYED_WORK(&ctrl->reconnect_work,
+			nvme_rdma_reconnect_ctrl_work);
+	INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
+	INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
+	INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
+	spin_lock_init(&ctrl->lock);
+
+	ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+	ctrl->ctrl.sqsize = opts->queue_size;
+	ctrl->ctrl.kato = opts->kato;
+
+	ret = -ENOMEM;
+	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(*ctrl->queues),
+				GFP_KERNEL);
+	if (!ctrl->queues)
+		goto out_uninit_ctrl;
+
+	ret = nvme_rdma_configure_admin_queue(ctrl);
+	if (ret)
+		goto out_kfree_queues;
+
+	/* sanity check icdoff */
+	if (ctrl->ctrl.icdoff) {
+		dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+		goto out_remove_admin_queue;
+	}
+
+	/* sanity check keyed sgls */
+	if (!(ctrl->ctrl.sgls & (1 << 20))) {
+		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
+		goto out_remove_admin_queue;
+	}
+
+	if (opts->queue_size > ctrl->ctrl.maxcmd) {
+		/* warn if maxcmd is lower than queue_size */
+		dev_warn(ctrl->ctrl.device,
+			"queue_size %zu > ctrl maxcmd %u, clamping down\n",
+			opts->queue_size, ctrl->ctrl.maxcmd);
+		opts->queue_size = ctrl->ctrl.maxcmd;
+	}
+
+	if (opts->nr_io_queues) {
+		ret = nvme_rdma_create_io_queues(ctrl);
+		if (ret)
+			goto out_remove_admin_queue;
+	}
+
+	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	WARN_ON_ONCE(!changed);
+
+	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
+		ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
+
+	kref_get(&ctrl->ctrl.kref);
+
+	mutex_lock(&nvme_rdma_ctrl_mutex);
+	list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
+	mutex_unlock(&nvme_rdma_ctrl_mutex);
+
+	if (opts->nr_io_queues) {
+		nvme_queue_scan(&ctrl->ctrl);
+		nvme_queue_async_events(&ctrl->ctrl);
+	}
+
+	return &ctrl->ctrl;
+
+out_remove_admin_queue:
+	nvme_stop_keep_alive(&ctrl->ctrl);
+	nvme_rdma_destroy_admin_queue(ctrl);
+out_kfree_queues:
+	kfree(ctrl->queues);
+out_uninit_ctrl:
+	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_put_ctrl(&ctrl->ctrl);
+	if (ret > 0)
+		ret = -EIO;
+	return ERR_PTR(ret);
+out_free_ctrl:
+	kfree(ctrl);
+	return ERR_PTR(ret);
+}
+
+static struct nvmf_transport_ops nvme_rdma_transport = {
+	.name		= "rdma",
+	.required_opts	= NVMF_OPT_TRADDR,
+	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY,
+	.create_ctrl	= nvme_rdma_create_ctrl,
+};
+
+static int __init nvme_rdma_init_module(void)
+{
+	nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
+	if (!nvme_rdma_wq)
+		return -ENOMEM;
+
+	nvmf_register_transport(&nvme_rdma_transport);
+	return 0;
+}
+
+static void __exit nvme_rdma_cleanup_module(void)
+{
+	struct nvme_rdma_ctrl *ctrl;
+
+	nvmf_unregister_transport(&nvme_rdma_transport);
+
+	mutex_lock(&nvme_rdma_ctrl_mutex);
+	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
+		__nvme_rdma_del_ctrl(ctrl);
+	mutex_unlock(&nvme_rdma_ctrl_mutex);
+
+	destroy_workqueue(nvme_rdma_wq);
+}
+
+module_init(nvme_rdma_init_module);
+module_exit(nvme_rdma_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
new file mode 100644
index 000000000000..a5c31cbeb481
--- /dev/null
+++ b/drivers/nvme/target/Kconfig
@@ -0,0 +1,36 @@
+
+config NVME_TARGET
+	tristate "NVMe Target support"
+	depends on BLOCK
+	depends on CONFIGFS_FS
+	help
+	  This enabled target side support for the NVMe protocol, that is
+	  it allows the Linux kernel to implement NVMe subsystems and
+	  controllers and export Linux block devices as NVMe namespaces.
+	  You need to select at least one of the transports below to make this
+	  functionality useful.
+
+	  To configure the NVMe target you probably want to use the nvmetcli
+	  tool from http://git.infradead.org/users/hch/nvmetcli.git.
+
+config NVME_TARGET_LOOP
+	tristate "NVMe loopback device support"
+	depends on BLK_DEV_NVME
+	depends on NVME_TARGET
+	select NVME_FABRICS
+	select SG_POOL
+	help
+	  This enables the NVMe loopback device support, which can be useful
+	  to test NVMe host and target side features.
+
+	  If unsure, say N.
+
+config NVME_TARGET_RDMA
+	tristate "NVMe over Fabrics RDMA target support"
+	depends on INFINIBAND
+	depends on NVME_TARGET
+	help
+	  This enables the NVMe RDMA target support, which allows exporting NVMe
+	  devices over RDMA.
+
+	  If unsure, say N.
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
new file mode 100644
index 000000000000..b7a06232c9da
--- /dev/null
+++ b/drivers/nvme/target/Makefile
@@ -0,0 +1,9 @@
+
+obj-$(CONFIG_NVME_TARGET)		+= nvmet.o
+obj-$(CONFIG_NVME_TARGET_LOOP)		+= nvme-loop.o
+obj-$(CONFIG_NVME_TARGET_RDMA)		+= nvmet-rdma.o
+
+nvmet-y		+= core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
+			discovery.o
+nvme-loop-y	+= loop.o
+nvmet-rdma-y	+= rdma.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
new file mode 100644
index 000000000000..2fac17a5ad53
--- /dev/null
+++ b/drivers/nvme/target/admin-cmd.c
@@ -0,0 +1,465 @@
+/*
+ * NVMe admin command implementation.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/random.h>
+#include <generated/utsrelease.h>
+#include "nvmet.h"
+
+u32 nvmet_get_log_page_len(struct nvme_command *cmd)
+{
+	u32 len = le16_to_cpu(cmd->get_log_page.numdu);
+
+	len <<= 16;
+	len += le16_to_cpu(cmd->get_log_page.numdl);
+	/* NUMD is a 0's based value */
+	len += 1;
+	len *= sizeof(u32);
+
+	return len;
+}
+
+static void nvmet_execute_get_log_page(struct nvmet_req *req)
+{
+	size_t data_len = nvmet_get_log_page_len(req->cmd);
+	void *buf;
+	u16 status = 0;
+
+	buf = kzalloc(data_len, GFP_KERNEL);
+	if (!buf) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	switch (req->cmd->get_log_page.lid) {
+	case 0x01:
+		/*
+		 * We currently never set the More bit in the status field,
+		 * so all error log entries are invalid and can be zeroed out.
+		 * This is called a minum viable implementation (TM) of this
+		 * mandatory log page.
+		 */
+		break;
+	case 0x02:
+		/*
+		 * XXX: fill out actual smart log
+		 *
+		 * We might have a hard time coming up with useful values for
+		 * many of the fields, and even when we have useful data
+		 * available (e.g. units or commands read/written) those aren't
+		 * persistent over power loss.
+		 */
+		break;
+	case 0x03:
+		/*
+		 * We only support a single firmware slot which always is
+		 * active, so we can zero out the whole firmware slot log and
+		 * still claim to fully implement this mandatory log page.
+		 */
+		break;
+	default:
+		BUG();
+	}
+
+	status = nvmet_copy_to_sgl(req, 0, buf, data_len);
+
+	kfree(buf);
+out:
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvme_id_ctrl *id;
+	u64 serial;
+	u16 status = 0;
+
+	id = kzalloc(sizeof(*id), GFP_KERNEL);
+	if (!id) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	/* XXX: figure out how to assign real vendors IDs. */
+	id->vid = 0;
+	id->ssvid = 0;
+
+	/* generate a random serial number as our controllers are ephemeral: */
+	get_random_bytes(&serial, sizeof(serial));
+	memset(id->sn, ' ', sizeof(id->sn));
+	snprintf(id->sn, sizeof(id->sn), "%llx", serial);
+
+	memset(id->mn, ' ', sizeof(id->mn));
+	strncpy((char *)id->mn, "Linux", sizeof(id->mn));
+
+	memset(id->fr, ' ', sizeof(id->fr));
+	strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
+
+	id->rab = 6;
+
+	/*
+	 * XXX: figure out how we can assign a IEEE OUI, but until then
+	 * the safest is to leave it as zeroes.
+	 */
+
+	/* we support multiple ports and multiples hosts: */
+	id->mic = (1 << 0) | (1 << 1);
+
+	/* no limit on data transfer sizes for now */
+	id->mdts = 0;
+	id->cntlid = cpu_to_le16(ctrl->cntlid);
+	id->ver = cpu_to_le32(ctrl->subsys->ver);
+
+	/* XXX: figure out what to do about RTD3R/RTD3 */
+	id->oaes = cpu_to_le32(1 << 8);
+	id->ctratt = cpu_to_le32(1 << 0);
+
+	id->oacs = 0;
+
+	/*
+	 * We don't really have a practical limit on the number of abort
+	 * comands.  But we don't do anything useful for abort either, so
+	 * no point in allowing more abort commands than the spec requires.
+	 */
+	id->acl = 3;
+
+	id->aerl = NVMET_ASYNC_EVENTS - 1;
+
+	/* first slot is read-only, only one slot supported */
+	id->frmw = (1 << 0) | (1 << 1);
+	id->lpa = (1 << 0) | (1 << 2);
+	id->elpe = NVMET_ERROR_LOG_SLOTS - 1;
+	id->npss = 0;
+
+	/* We support keep-alive timeout in granularity of seconds */
+	id->kas = cpu_to_le16(NVMET_KAS);
+
+	id->sqes = (0x6 << 4) | 0x6;
+	id->cqes = (0x4 << 4) | 0x4;
+
+	/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
+	id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
+
+	id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
+	id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM);
+
+	/* XXX: don't report vwc if the underlying device is write through */
+	id->vwc = NVME_CTRL_VWC_PRESENT;
+
+	/*
+	 * We can't support atomic writes bigger than a LBA without support
+	 * from the backend device.
+	 */
+	id->awun = 0;
+	id->awupf = 0;
+
+	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
+	if (ctrl->ops->has_keyed_sgls)
+		id->sgls |= cpu_to_le32(1 << 2);
+	if (ctrl->ops->sqe_inline_size)
+		id->sgls |= cpu_to_le32(1 << 20);
+
+	strcpy(id->subnqn, ctrl->subsys->subsysnqn);
+
+	/* Max command capsule size is sqe + single page of in-capsule data */
+	id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
+				  ctrl->ops->sqe_inline_size) / 16);
+	/* Max response capsule size is cqe */
+	id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
+
+	id->msdbd = ctrl->ops->msdbd;
+
+	/*
+	 * Meh, we don't really support any power state.  Fake up the same
+	 * values that qemu does.
+	 */
+	id->psd[0].max_power = cpu_to_le16(0x9c4);
+	id->psd[0].entry_lat = cpu_to_le32(0x10);
+	id->psd[0].exit_lat = cpu_to_le32(0x4);
+
+	status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+
+	kfree(id);
+out:
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_identify_ns(struct nvmet_req *req)
+{
+	struct nvmet_ns *ns;
+	struct nvme_id_ns *id;
+	u16 status = 0;
+
+	ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+	if (!ns) {
+		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+		goto out;
+	}
+
+	id = kzalloc(sizeof(*id), GFP_KERNEL);
+	if (!id) {
+		status = NVME_SC_INTERNAL;
+		goto out_put_ns;
+	}
+
+	/*
+	 * nuse = ncap = nsze isn't aways true, but we have no way to find
+	 * that out from the underlying device.
+	 */
+	id->ncap = id->nuse = id->nsze =
+		cpu_to_le64(ns->size >> ns->blksize_shift);
+
+	/*
+	 * We just provide a single LBA format that matches what the
+	 * underlying device reports.
+	 */
+	id->nlbaf = 0;
+	id->flbas = 0;
+
+	/*
+	 * Our namespace might always be shared.  Not just with other
+	 * controllers, but also with any other user of the block device.
+	 */
+	id->nmic = (1 << 0);
+
+	memcpy(&id->nguid, &ns->nguid, sizeof(uuid_le));
+
+	id->lbaf[0].ds = ns->blksize_shift;
+
+	status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+
+	kfree(id);
+out_put_ns:
+	nvmet_put_namespace(ns);
+out:
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_identify_nslist(struct nvmet_req *req)
+{
+	static const int buf_size = 4096;
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvmet_ns *ns;
+	u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
+	__le32 *list;
+	u16 status = 0;
+	int i = 0;
+
+	list = kzalloc(buf_size, GFP_KERNEL);
+	if (!list) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+		if (ns->nsid <= min_nsid)
+			continue;
+		list[i++] = cpu_to_le32(ns->nsid);
+		if (i == buf_size / sizeof(__le32))
+			break;
+	}
+	rcu_read_unlock();
+
+	status = nvmet_copy_to_sgl(req, 0, list, buf_size);
+
+	kfree(list);
+out:
+	nvmet_req_complete(req, status);
+}
+
+/*
+ * A "mimimum viable" abort implementation: the command is mandatory in the
+ * spec, but we are not required to do any useful work.  We couldn't really
+ * do a useful abort, so don't bother even with waiting for the command
+ * to be exectuted and return immediately telling the command to abort
+ * wasn't found.
+ */
+static void nvmet_execute_abort(struct nvmet_req *req)
+{
+	nvmet_set_result(req, 1);
+	nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_set_features(struct nvmet_req *req)
+{
+	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
+	u64 val;
+	u32 val32;
+	u16 status = 0;
+
+	switch (cdw10 & 0xf) {
+	case NVME_FEAT_NUM_QUEUES:
+		nvmet_set_result(req,
+			(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
+		break;
+	case NVME_FEAT_KATO:
+		val = le64_to_cpu(req->cmd->prop_set.value);
+		val32 = val & 0xffff;
+		req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
+		nvmet_set_result(req, req->sq->ctrl->kato);
+		break;
+	default:
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		break;
+	}
+
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_get_features(struct nvmet_req *req)
+{
+	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
+	u16 status = 0;
+
+	switch (cdw10 & 0xf) {
+	/*
+	 * These features are mandatory in the spec, but we don't
+	 * have a useful way to implement them.  We'll eventually
+	 * need to come up with some fake values for these.
+	 */
+#if 0
+	case NVME_FEAT_ARBITRATION:
+		break;
+	case NVME_FEAT_POWER_MGMT:
+		break;
+	case NVME_FEAT_TEMP_THRESH:
+		break;
+	case NVME_FEAT_ERR_RECOVERY:
+		break;
+	case NVME_FEAT_IRQ_COALESCE:
+		break;
+	case NVME_FEAT_IRQ_CONFIG:
+		break;
+	case NVME_FEAT_WRITE_ATOMIC:
+		break;
+	case NVME_FEAT_ASYNC_EVENT:
+		break;
+#endif
+	case NVME_FEAT_VOLATILE_WC:
+		nvmet_set_result(req, 1);
+		break;
+	case NVME_FEAT_NUM_QUEUES:
+		nvmet_set_result(req,
+			(subsys->max_qid-1) | ((subsys->max_qid-1) << 16));
+		break;
+	case NVME_FEAT_KATO:
+		nvmet_set_result(req, req->sq->ctrl->kato * 1000);
+		break;
+	default:
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		break;
+	}
+
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_async_event(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+
+	mutex_lock(&ctrl->lock);
+	if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
+		mutex_unlock(&ctrl->lock);
+		nvmet_req_complete(req, NVME_SC_ASYNC_LIMIT | NVME_SC_DNR);
+		return;
+	}
+	ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req;
+	mutex_unlock(&ctrl->lock);
+
+	schedule_work(&ctrl->async_event_work);
+}
+
+static void nvmet_execute_keep_alive(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+
+	pr_debug("ctrl %d update keep-alive timer for %d secs\n",
+		ctrl->cntlid, ctrl->kato);
+
+	mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
+	nvmet_req_complete(req, 0);
+}
+
+int nvmet_parse_admin_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	req->ns = NULL;
+
+	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
+		pr_err("nvmet: got admin cmd %d while CC.EN == 0\n",
+				cmd->common.opcode);
+		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+	}
+	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
+		pr_err("nvmet: got admin cmd %d while CSTS.RDY == 0\n",
+				cmd->common.opcode);
+		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+	}
+
+	switch (cmd->common.opcode) {
+	case nvme_admin_get_log_page:
+		req->data_len = nvmet_get_log_page_len(cmd);
+
+		switch (cmd->get_log_page.lid) {
+		case 0x01:
+		case 0x02:
+		case 0x03:
+			req->execute = nvmet_execute_get_log_page;
+			return 0;
+		}
+		break;
+	case nvme_admin_identify:
+		req->data_len = 4096;
+		switch (le32_to_cpu(cmd->identify.cns)) {
+		case 0x00:
+			req->execute = nvmet_execute_identify_ns;
+			return 0;
+		case 0x01:
+			req->execute = nvmet_execute_identify_ctrl;
+			return 0;
+		case 0x02:
+			req->execute = nvmet_execute_identify_nslist;
+			return 0;
+		}
+		break;
+	case nvme_admin_abort_cmd:
+		req->execute = nvmet_execute_abort;
+		req->data_len = 0;
+		return 0;
+	case nvme_admin_set_features:
+		req->execute = nvmet_execute_set_features;
+		req->data_len = 0;
+		return 0;
+	case nvme_admin_get_features:
+		req->execute = nvmet_execute_get_features;
+		req->data_len = 0;
+		return 0;
+	case nvme_admin_async_event:
+		req->execute = nvmet_execute_async_event;
+		req->data_len = 0;
+		return 0;
+	case nvme_admin_keep_alive:
+		req->execute = nvmet_execute_keep_alive;
+		req->data_len = 0;
+		return 0;
+	}
+
+	pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+	return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
new file mode 100644
index 000000000000..af5e2dc4a3d5
--- /dev/null
+++ b/drivers/nvme/target/configfs.c
@@ -0,0 +1,917 @@
+/*
+ * Configfs interface for the NVMe target.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/ctype.h>
+
+#include "nvmet.h"
+
+static struct config_item_type nvmet_host_type;
+static struct config_item_type nvmet_subsys_type;
+
+/*
+ * nvmet_port Generic ConfigFS definitions.
+ * Used in any place in the ConfigFS tree that refers to an address.
+ */
+static ssize_t nvmet_addr_adrfam_show(struct config_item *item,
+		char *page)
+{
+	switch (to_nvmet_port(item)->disc_addr.adrfam) {
+	case NVMF_ADDR_FAMILY_IP4:
+		return sprintf(page, "ipv4\n");
+	case NVMF_ADDR_FAMILY_IP6:
+		return sprintf(page, "ipv6\n");
+	case NVMF_ADDR_FAMILY_IB:
+		return sprintf(page, "ib\n");
+	default:
+		return sprintf(page, "\n");
+	}
+}
+
+static ssize_t nvmet_addr_adrfam_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	if (port->enabled) {
+		pr_err("Cannot modify address while enabled\n");
+		pr_err("Disable the address before modifying\n");
+		return -EACCES;
+	}
+
+	if (sysfs_streq(page, "ipv4")) {
+		port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4;
+	} else if (sysfs_streq(page, "ipv6")) {
+		port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6;
+	} else if (sysfs_streq(page, "ib")) {
+		port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB;
+	} else {
+		pr_err("Invalid value '%s' for adrfam\n", page);
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+CONFIGFS_ATTR(nvmet_, addr_adrfam);
+
+static ssize_t nvmet_addr_portid_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	return snprintf(page, PAGE_SIZE, "%d\n",
+			le16_to_cpu(port->disc_addr.portid));
+}
+
+static ssize_t nvmet_addr_portid_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+	u16 portid = 0;
+
+	if (kstrtou16(page, 0, &portid)) {
+		pr_err("Invalid value '%s' for portid\n", page);
+		return -EINVAL;
+	}
+
+	if (port->enabled) {
+		pr_err("Cannot modify address while enabled\n");
+		pr_err("Disable the address before modifying\n");
+		return -EACCES;
+	}
+	port->disc_addr.portid = cpu_to_le16(portid);
+	return count;
+}
+
+CONFIGFS_ATTR(nvmet_, addr_portid);
+
+static ssize_t nvmet_addr_traddr_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	return snprintf(page, PAGE_SIZE, "%s\n",
+			port->disc_addr.traddr);
+}
+
+static ssize_t nvmet_addr_traddr_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	if (count > NVMF_TRADDR_SIZE) {
+		pr_err("Invalid value '%s' for traddr\n", page);
+		return -EINVAL;
+	}
+
+	if (port->enabled) {
+		pr_err("Cannot modify address while enabled\n");
+		pr_err("Disable the address before modifying\n");
+		return -EACCES;
+	}
+	return snprintf(port->disc_addr.traddr,
+			sizeof(port->disc_addr.traddr), "%s", page);
+}
+
+CONFIGFS_ATTR(nvmet_, addr_traddr);
+
+static ssize_t nvmet_addr_treq_show(struct config_item *item,
+		char *page)
+{
+	switch (to_nvmet_port(item)->disc_addr.treq) {
+	case NVMF_TREQ_NOT_SPECIFIED:
+		return sprintf(page, "not specified\n");
+	case NVMF_TREQ_REQUIRED:
+		return sprintf(page, "required\n");
+	case NVMF_TREQ_NOT_REQUIRED:
+		return sprintf(page, "not required\n");
+	default:
+		return sprintf(page, "\n");
+	}
+}
+
+static ssize_t nvmet_addr_treq_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	if (port->enabled) {
+		pr_err("Cannot modify address while enabled\n");
+		pr_err("Disable the address before modifying\n");
+		return -EACCES;
+	}
+
+	if (sysfs_streq(page, "not specified")) {
+		port->disc_addr.treq = NVMF_TREQ_NOT_SPECIFIED;
+	} else if (sysfs_streq(page, "required")) {
+		port->disc_addr.treq = NVMF_TREQ_REQUIRED;
+	} else if (sysfs_streq(page, "not required")) {
+		port->disc_addr.treq = NVMF_TREQ_NOT_REQUIRED;
+	} else {
+		pr_err("Invalid value '%s' for treq\n", page);
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+CONFIGFS_ATTR(nvmet_, addr_treq);
+
+static ssize_t nvmet_addr_trsvcid_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	return snprintf(page, PAGE_SIZE, "%s\n",
+			port->disc_addr.trsvcid);
+}
+
+static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	if (count > NVMF_TRSVCID_SIZE) {
+		pr_err("Invalid value '%s' for trsvcid\n", page);
+		return -EINVAL;
+	}
+	if (port->enabled) {
+		pr_err("Cannot modify address while enabled\n");
+		pr_err("Disable the address before modifying\n");
+		return -EACCES;
+	}
+	return snprintf(port->disc_addr.trsvcid,
+			sizeof(port->disc_addr.trsvcid), "%s", page);
+}
+
+CONFIGFS_ATTR(nvmet_, addr_trsvcid);
+
+static ssize_t nvmet_addr_trtype_show(struct config_item *item,
+		char *page)
+{
+	switch (to_nvmet_port(item)->disc_addr.trtype) {
+	case NVMF_TRTYPE_RDMA:
+		return sprintf(page, "rdma\n");
+	case NVMF_TRTYPE_LOOP:
+		return sprintf(page, "loop\n");
+	default:
+		return sprintf(page, "\n");
+	}
+}
+
+static void nvmet_port_init_tsas_rdma(struct nvmet_port *port)
+{
+	port->disc_addr.trtype = NVMF_TRTYPE_RDMA;
+	memset(&port->disc_addr.tsas.rdma, 0, NVMF_TSAS_SIZE);
+	port->disc_addr.tsas.rdma.qptype = NVMF_RDMA_QPTYPE_CONNECTED;
+	port->disc_addr.tsas.rdma.prtype = NVMF_RDMA_PRTYPE_NOT_SPECIFIED;
+	port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM;
+}
+
+static void nvmet_port_init_tsas_loop(struct nvmet_port *port)
+{
+	port->disc_addr.trtype = NVMF_TRTYPE_LOOP;
+	memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
+}
+
+static ssize_t nvmet_addr_trtype_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	if (port->enabled) {
+		pr_err("Cannot modify address while enabled\n");
+		pr_err("Disable the address before modifying\n");
+		return -EACCES;
+	}
+
+	if (sysfs_streq(page, "rdma")) {
+		nvmet_port_init_tsas_rdma(port);
+	} else if (sysfs_streq(page, "loop")) {
+		nvmet_port_init_tsas_loop(port);
+	} else {
+		pr_err("Invalid value '%s' for trtype\n", page);
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+CONFIGFS_ATTR(nvmet_, addr_trtype);
+
+/*
+ * Namespace structures & file operation functions below
+ */
+static ssize_t nvmet_ns_device_path_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%s\n", to_nvmet_ns(item)->device_path);
+}
+
+static ssize_t nvmet_ns_device_path_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+	struct nvmet_subsys *subsys = ns->subsys;
+	int ret;
+
+	mutex_lock(&subsys->lock);
+	ret = -EBUSY;
+	if (nvmet_ns_enabled(ns))
+		goto out_unlock;
+
+	kfree(ns->device_path);
+
+	ret = -ENOMEM;
+	ns->device_path = kstrdup(page, GFP_KERNEL);
+	if (!ns->device_path)
+		goto out_unlock;
+
+	mutex_unlock(&subsys->lock);
+	return count;
+
+out_unlock:
+	mutex_unlock(&subsys->lock);
+	return ret;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, device_path);
+
+static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
+}
+
+static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+	struct nvmet_subsys *subsys = ns->subsys;
+	u8 nguid[16];
+	const char *p = page;
+	int i;
+	int ret = 0;
+
+	mutex_lock(&subsys->lock);
+	if (nvmet_ns_enabled(ns)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	for (i = 0; i < 16; i++) {
+		if (p + 2 > page + count) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		if (!isxdigit(p[0]) || !isxdigit(p[1])) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		nguid[i] = (hex_to_bin(p[0]) << 4) | hex_to_bin(p[1]);
+		p += 2;
+
+		if (*p == '-' || *p == ':')
+			p++;
+	}
+
+	memcpy(&ns->nguid, nguid, sizeof(nguid));
+out_unlock:
+	mutex_unlock(&subsys->lock);
+	return ret ? ret : count;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, device_nguid);
+
+static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d\n", nvmet_ns_enabled(to_nvmet_ns(item)));
+}
+
+static ssize_t nvmet_ns_enable_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+	bool enable;
+	int ret = 0;
+
+	if (strtobool(page, &enable))
+		return -EINVAL;
+
+	if (enable)
+		ret = nvmet_ns_enable(ns);
+	else
+		nvmet_ns_disable(ns);
+
+	return ret ? ret : count;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, enable);
+
+static struct configfs_attribute *nvmet_ns_attrs[] = {
+	&nvmet_ns_attr_device_path,
+	&nvmet_ns_attr_device_nguid,
+	&nvmet_ns_attr_enable,
+	NULL,
+};
+
+static void nvmet_ns_release(struct config_item *item)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+
+	nvmet_ns_free(ns);
+}
+
+static struct configfs_item_operations nvmet_ns_item_ops = {
+	.release		= nvmet_ns_release,
+};
+
+static struct config_item_type nvmet_ns_type = {
+	.ct_item_ops		= &nvmet_ns_item_ops,
+	.ct_attrs		= nvmet_ns_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_group *nvmet_ns_make(struct config_group *group,
+		const char *name)
+{
+	struct nvmet_subsys *subsys = namespaces_to_subsys(&group->cg_item);
+	struct nvmet_ns *ns;
+	int ret;
+	u32 nsid;
+
+	ret = kstrtou32(name, 0, &nsid);
+	if (ret)
+		goto out;
+
+	ret = -EINVAL;
+	if (nsid == 0 || nsid == 0xffffffff)
+		goto out;
+
+	ret = -ENOMEM;
+	ns = nvmet_ns_alloc(subsys, nsid);
+	if (!ns)
+		goto out;
+	config_group_init_type_name(&ns->group, name, &nvmet_ns_type);
+
+	pr_info("adding nsid %d to subsystem %s\n", nsid, subsys->subsysnqn);
+
+	return &ns->group;
+out:
+	return ERR_PTR(ret);
+}
+
+static struct configfs_group_operations nvmet_namespaces_group_ops = {
+	.make_group		= nvmet_ns_make,
+};
+
+static struct config_item_type nvmet_namespaces_type = {
+	.ct_group_ops		= &nvmet_namespaces_group_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static int nvmet_port_subsys_allow_link(struct config_item *parent,
+		struct config_item *target)
+{
+	struct nvmet_port *port = to_nvmet_port(parent->ci_parent);
+	struct nvmet_subsys *subsys;
+	struct nvmet_subsys_link *link, *p;
+	int ret;
+
+	if (target->ci_type != &nvmet_subsys_type) {
+		pr_err("can only link subsystems into the subsystems dir.!\n");
+		return -EINVAL;
+	}
+	subsys = to_subsys(target);
+	link = kmalloc(sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+	link->subsys = subsys;
+
+	down_write(&nvmet_config_sem);
+	ret = -EEXIST;
+	list_for_each_entry(p, &port->subsystems, entry) {
+		if (p->subsys == subsys)
+			goto out_free_link;
+	}
+
+	if (list_empty(&port->subsystems)) {
+		ret = nvmet_enable_port(port);
+		if (ret)
+			goto out_free_link;
+	}
+
+	list_add_tail(&link->entry, &port->subsystems);
+	nvmet_genctr++;
+	up_write(&nvmet_config_sem);
+	return 0;
+
+out_free_link:
+	up_write(&nvmet_config_sem);
+	kfree(link);
+	return ret;
+}
+
+static int nvmet_port_subsys_drop_link(struct config_item *parent,
+		struct config_item *target)
+{
+	struct nvmet_port *port = to_nvmet_port(parent->ci_parent);
+	struct nvmet_subsys *subsys = to_subsys(target);
+	struct nvmet_subsys_link *p;
+
+	down_write(&nvmet_config_sem);
+	list_for_each_entry(p, &port->subsystems, entry) {
+		if (p->subsys == subsys)
+			goto found;
+	}
+	up_write(&nvmet_config_sem);
+	return -EINVAL;
+
+found:
+	list_del(&p->entry);
+	nvmet_genctr++;
+	if (list_empty(&port->subsystems))
+		nvmet_disable_port(port);
+	up_write(&nvmet_config_sem);
+	kfree(p);
+	return 0;
+}
+
+static struct configfs_item_operations nvmet_port_subsys_item_ops = {
+	.allow_link		= nvmet_port_subsys_allow_link,
+	.drop_link		= nvmet_port_subsys_drop_link,
+};
+
+static struct config_item_type nvmet_port_subsys_type = {
+	.ct_item_ops		= &nvmet_port_subsys_item_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static int nvmet_allowed_hosts_allow_link(struct config_item *parent,
+		struct config_item *target)
+{
+	struct nvmet_subsys *subsys = to_subsys(parent->ci_parent);
+	struct nvmet_host *host;
+	struct nvmet_host_link *link, *p;
+	int ret;
+
+	if (target->ci_type != &nvmet_host_type) {
+		pr_err("can only link hosts into the allowed_hosts directory!\n");
+		return -EINVAL;
+	}
+
+	host = to_host(target);
+	link = kmalloc(sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+	link->host = host;
+
+	down_write(&nvmet_config_sem);
+	ret = -EINVAL;
+	if (subsys->allow_any_host) {
+		pr_err("can't add hosts when allow_any_host is set!\n");
+		goto out_free_link;
+	}
+
+	ret = -EEXIST;
+	list_for_each_entry(p, &subsys->hosts, entry) {
+		if (!strcmp(nvmet_host_name(p->host), nvmet_host_name(host)))
+			goto out_free_link;
+	}
+	list_add_tail(&link->entry, &subsys->hosts);
+	nvmet_genctr++;
+	up_write(&nvmet_config_sem);
+	return 0;
+out_free_link:
+	up_write(&nvmet_config_sem);
+	kfree(link);
+	return ret;
+}
+
+static int nvmet_allowed_hosts_drop_link(struct config_item *parent,
+		struct config_item *target)
+{
+	struct nvmet_subsys *subsys = to_subsys(parent->ci_parent);
+	struct nvmet_host *host = to_host(target);
+	struct nvmet_host_link *p;
+
+	down_write(&nvmet_config_sem);
+	list_for_each_entry(p, &subsys->hosts, entry) {
+		if (!strcmp(nvmet_host_name(p->host), nvmet_host_name(host)))
+			goto found;
+	}
+	up_write(&nvmet_config_sem);
+	return -EINVAL;
+
+found:
+	list_del(&p->entry);
+	nvmet_genctr++;
+	up_write(&nvmet_config_sem);
+	kfree(p);
+	return 0;
+}
+
+static struct configfs_item_operations nvmet_allowed_hosts_item_ops = {
+	.allow_link		= nvmet_allowed_hosts_allow_link,
+	.drop_link		= nvmet_allowed_hosts_drop_link,
+};
+
+static struct config_item_type nvmet_allowed_hosts_type = {
+	.ct_item_ops		= &nvmet_allowed_hosts_item_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static ssize_t nvmet_subsys_attr_allow_any_host_show(struct config_item *item,
+		char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%d\n",
+		to_subsys(item)->allow_any_host);
+}
+
+static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+	bool allow_any_host;
+	int ret = 0;
+
+	if (strtobool(page, &allow_any_host))
+		return -EINVAL;
+
+	down_write(&nvmet_config_sem);
+	if (allow_any_host && !list_empty(&subsys->hosts)) {
+		pr_err("Can't set allow_any_host when explicit hosts are set!\n");
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	subsys->allow_any_host = allow_any_host;
+out_unlock:
+	up_write(&nvmet_config_sem);
+	return ret ? ret : count;
+}
+
+CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
+
+static struct configfs_attribute *nvmet_subsys_attrs[] = {
+	&nvmet_subsys_attr_attr_allow_any_host,
+	NULL,
+};
+
+/*
+ * Subsystem structures & folder operation functions below
+ */
+static void nvmet_subsys_release(struct config_item *item)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+
+	nvmet_subsys_put(subsys);
+}
+
+static struct configfs_item_operations nvmet_subsys_item_ops = {
+	.release		= nvmet_subsys_release,
+};
+
+static struct config_item_type nvmet_subsys_type = {
+	.ct_item_ops		= &nvmet_subsys_item_ops,
+	.ct_attrs		= nvmet_subsys_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_group *nvmet_subsys_make(struct config_group *group,
+		const char *name)
+{
+	struct nvmet_subsys *subsys;
+
+	if (sysfs_streq(name, NVME_DISC_SUBSYS_NAME)) {
+		pr_err("can't create discovery subsystem through configfs\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME);
+	if (!subsys)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type);
+
+	config_group_init_type_name(&subsys->namespaces_group,
+			"namespaces", &nvmet_namespaces_type);
+	configfs_add_default_group(&subsys->namespaces_group, &subsys->group);
+
+	config_group_init_type_name(&subsys->allowed_hosts_group,
+			"allowed_hosts", &nvmet_allowed_hosts_type);
+	configfs_add_default_group(&subsys->allowed_hosts_group,
+			&subsys->group);
+
+	return &subsys->group;
+}
+
+static struct configfs_group_operations nvmet_subsystems_group_ops = {
+	.make_group		= nvmet_subsys_make,
+};
+
+static struct config_item_type nvmet_subsystems_type = {
+	.ct_group_ops		= &nvmet_subsystems_group_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static ssize_t nvmet_referral_enable_show(struct config_item *item,
+		char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%d\n", to_nvmet_port(item)->enabled);
+}
+
+static ssize_t nvmet_referral_enable_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent);
+	struct nvmet_port *port = to_nvmet_port(item);
+	bool enable;
+
+	if (strtobool(page, &enable))
+		goto inval;
+
+	if (enable)
+		nvmet_referral_enable(parent, port);
+	else
+		nvmet_referral_disable(port);
+
+	return count;
+inval:
+	pr_err("Invalid value '%s' for enable\n", page);
+	return -EINVAL;
+}
+
+CONFIGFS_ATTR(nvmet_referral_, enable);
+
+/*
+ * Discovery Service subsystem definitions
+ */
+static struct configfs_attribute *nvmet_referral_attrs[] = {
+	&nvmet_attr_addr_adrfam,
+	&nvmet_attr_addr_portid,
+	&nvmet_attr_addr_treq,
+	&nvmet_attr_addr_traddr,
+	&nvmet_attr_addr_trsvcid,
+	&nvmet_attr_addr_trtype,
+	&nvmet_referral_attr_enable,
+	NULL,
+};
+
+static void nvmet_referral_release(struct config_item *item)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	nvmet_referral_disable(port);
+	kfree(port);
+}
+
+static struct configfs_item_operations nvmet_referral_item_ops = {
+	.release	= nvmet_referral_release,
+};
+
+static struct config_item_type nvmet_referral_type = {
+	.ct_owner	= THIS_MODULE,
+	.ct_attrs	= nvmet_referral_attrs,
+	.ct_item_ops	= &nvmet_referral_item_ops,
+};
+
+static struct config_group *nvmet_referral_make(
+		struct config_group *group, const char *name)
+{
+	struct nvmet_port *port;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&port->entry);
+	config_group_init_type_name(&port->group, name, &nvmet_referral_type);
+
+	return &port->group;
+}
+
+static struct configfs_group_operations nvmet_referral_group_ops = {
+	.make_group		= nvmet_referral_make,
+};
+
+static struct config_item_type nvmet_referrals_type = {
+	.ct_owner	= THIS_MODULE,
+	.ct_group_ops	= &nvmet_referral_group_ops,
+};
+
+/*
+ * Ports definitions.
+ */
+static void nvmet_port_release(struct config_item *item)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	kfree(port);
+}
+
+static struct configfs_attribute *nvmet_port_attrs[] = {
+	&nvmet_attr_addr_adrfam,
+	&nvmet_attr_addr_treq,
+	&nvmet_attr_addr_traddr,
+	&nvmet_attr_addr_trsvcid,
+	&nvmet_attr_addr_trtype,
+	NULL,
+};
+
+static struct configfs_item_operations nvmet_port_item_ops = {
+	.release		= nvmet_port_release,
+};
+
+static struct config_item_type nvmet_port_type = {
+	.ct_attrs		= nvmet_port_attrs,
+	.ct_item_ops		= &nvmet_port_item_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_group *nvmet_ports_make(struct config_group *group,
+		const char *name)
+{
+	struct nvmet_port *port;
+	u16 portid;
+
+	if (kstrtou16(name, 0, &portid))
+		return ERR_PTR(-EINVAL);
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&port->entry);
+	INIT_LIST_HEAD(&port->subsystems);
+	INIT_LIST_HEAD(&port->referrals);
+
+	port->disc_addr.portid = cpu_to_le16(portid);
+	config_group_init_type_name(&port->group, name, &nvmet_port_type);
+
+	config_group_init_type_name(&port->subsys_group,
+			"subsystems", &nvmet_port_subsys_type);
+	configfs_add_default_group(&port->subsys_group, &port->group);
+
+	config_group_init_type_name(&port->referrals_group,
+			"referrals", &nvmet_referrals_type);
+	configfs_add_default_group(&port->referrals_group, &port->group);
+
+	return &port->group;
+}
+
+static struct configfs_group_operations nvmet_ports_group_ops = {
+	.make_group		= nvmet_ports_make,
+};
+
+static struct config_item_type nvmet_ports_type = {
+	.ct_group_ops		= &nvmet_ports_group_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_group nvmet_subsystems_group;
+static struct config_group nvmet_ports_group;
+
+static void nvmet_host_release(struct config_item *item)
+{
+	struct nvmet_host *host = to_host(item);
+
+	kfree(host);
+}
+
+static struct configfs_item_operations nvmet_host_item_ops = {
+	.release		= nvmet_host_release,
+};
+
+static struct config_item_type nvmet_host_type = {
+	.ct_item_ops		= &nvmet_host_item_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_group *nvmet_hosts_make_group(struct config_group *group,
+		const char *name)
+{
+	struct nvmet_host *host;
+
+	host = kzalloc(sizeof(*host), GFP_KERNEL);
+	if (!host)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&host->group, name, &nvmet_host_type);
+
+	return &host->group;
+}
+
+static struct configfs_group_operations nvmet_hosts_group_ops = {
+	.make_group		= nvmet_hosts_make_group,
+};
+
+static struct config_item_type nvmet_hosts_type = {
+	.ct_group_ops		= &nvmet_hosts_group_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_group nvmet_hosts_group;
+
+static struct config_item_type nvmet_root_type = {
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct configfs_subsystem nvmet_configfs_subsystem = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf	= "nvmet",
+			.ci_type	= &nvmet_root_type,
+		},
+	},
+};
+
+int __init nvmet_init_configfs(void)
+{
+	int ret;
+
+	config_group_init(&nvmet_configfs_subsystem.su_group);
+	mutex_init(&nvmet_configfs_subsystem.su_mutex);
+
+	config_group_init_type_name(&nvmet_subsystems_group,
+			"subsystems", &nvmet_subsystems_type);
+	configfs_add_default_group(&nvmet_subsystems_group,
+			&nvmet_configfs_subsystem.su_group);
+
+	config_group_init_type_name(&nvmet_ports_group,
+			"ports", &nvmet_ports_type);
+	configfs_add_default_group(&nvmet_ports_group,
+			&nvmet_configfs_subsystem.su_group);
+
+	config_group_init_type_name(&nvmet_hosts_group,
+			"hosts", &nvmet_hosts_type);
+	configfs_add_default_group(&nvmet_hosts_group,
+			&nvmet_configfs_subsystem.su_group);
+
+	ret = configfs_register_subsystem(&nvmet_configfs_subsystem);
+	if (ret) {
+		pr_err("configfs_register_subsystem: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+void __exit nvmet_exit_configfs(void)
+{
+	configfs_unregister_subsystem(&nvmet_configfs_subsystem);
+}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
new file mode 100644
index 000000000000..8a891ca53367
--- /dev/null
+++ b/drivers/nvme/target/core.c
@@ -0,0 +1,964 @@
+/*
+ * Common code for the NVMe target.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include "nvmet.h"
+
+static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
+
+/*
+ * This read/write semaphore is used to synchronize access to configuration
+ * information on a target system that will result in discovery log page
+ * information change for at least one host.
+ * The full list of resources to protected by this semaphore is:
+ *
+ *  - subsystems list
+ *  - per-subsystem allowed hosts list
+ *  - allow_any_host subsystem attribute
+ *  - nvmet_genctr
+ *  - the nvmet_transports array
+ *
+ * When updating any of those lists/structures write lock should be obtained,
+ * while when reading (popolating discovery log page or checking host-subsystem
+ * link) read lock is obtained to allow concurrent reads.
+ */
+DECLARE_RWSEM(nvmet_config_sem);
+
+static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
+		const char *subsysnqn);
+
+u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
+		size_t len)
+{
+	if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
+		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+	return 0;
+}
+
+u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
+{
+	if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
+		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+	return 0;
+}
+
+static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
+{
+	return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
+}
+
+static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
+{
+	struct nvmet_req *req;
+
+	while (1) {
+		mutex_lock(&ctrl->lock);
+		if (!ctrl->nr_async_event_cmds) {
+			mutex_unlock(&ctrl->lock);
+			return;
+		}
+
+		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
+		mutex_unlock(&ctrl->lock);
+		nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
+	}
+}
+
+static void nvmet_async_event_work(struct work_struct *work)
+{
+	struct nvmet_ctrl *ctrl =
+		container_of(work, struct nvmet_ctrl, async_event_work);
+	struct nvmet_async_event *aen;
+	struct nvmet_req *req;
+
+	while (1) {
+		mutex_lock(&ctrl->lock);
+		aen = list_first_entry_or_null(&ctrl->async_events,
+				struct nvmet_async_event, entry);
+		if (!aen || !ctrl->nr_async_event_cmds) {
+			mutex_unlock(&ctrl->lock);
+			return;
+		}
+
+		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
+		nvmet_set_result(req, nvmet_async_event_result(aen));
+
+		list_del(&aen->entry);
+		kfree(aen);
+
+		mutex_unlock(&ctrl->lock);
+		nvmet_req_complete(req, 0);
+	}
+}
+
+static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
+		u8 event_info, u8 log_page)
+{
+	struct nvmet_async_event *aen;
+
+	aen = kmalloc(sizeof(*aen), GFP_KERNEL);
+	if (!aen)
+		return;
+
+	aen->event_type = event_type;
+	aen->event_info = event_info;
+	aen->log_page = log_page;
+
+	mutex_lock(&ctrl->lock);
+	list_add_tail(&aen->entry, &ctrl->async_events);
+	mutex_unlock(&ctrl->lock);
+
+	schedule_work(&ctrl->async_event_work);
+}
+
+int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
+{
+	int ret = 0;
+
+	down_write(&nvmet_config_sem);
+	if (nvmet_transports[ops->type])
+		ret = -EINVAL;
+	else
+		nvmet_transports[ops->type] = ops;
+	up_write(&nvmet_config_sem);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nvmet_register_transport);
+
+void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops)
+{
+	down_write(&nvmet_config_sem);
+	nvmet_transports[ops->type] = NULL;
+	up_write(&nvmet_config_sem);
+}
+EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
+
+int nvmet_enable_port(struct nvmet_port *port)
+{
+	struct nvmet_fabrics_ops *ops;
+	int ret;
+
+	lockdep_assert_held(&nvmet_config_sem);
+
+	ops = nvmet_transports[port->disc_addr.trtype];
+	if (!ops) {
+		up_write(&nvmet_config_sem);
+		request_module("nvmet-transport-%d", port->disc_addr.trtype);
+		down_write(&nvmet_config_sem);
+		ops = nvmet_transports[port->disc_addr.trtype];
+		if (!ops) {
+			pr_err("transport type %d not supported\n",
+				port->disc_addr.trtype);
+			return -EINVAL;
+		}
+	}
+
+	if (!try_module_get(ops->owner))
+		return -EINVAL;
+
+	ret = ops->add_port(port);
+	if (ret) {
+		module_put(ops->owner);
+		return ret;
+	}
+
+	port->enabled = true;
+	return 0;
+}
+
+void nvmet_disable_port(struct nvmet_port *port)
+{
+	struct nvmet_fabrics_ops *ops;
+
+	lockdep_assert_held(&nvmet_config_sem);
+
+	port->enabled = false;
+
+	ops = nvmet_transports[port->disc_addr.trtype];
+	ops->remove_port(port);
+	module_put(ops->owner);
+}
+
+static void nvmet_keep_alive_timer(struct work_struct *work)
+{
+	struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
+			struct nvmet_ctrl, ka_work);
+
+	pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
+		ctrl->cntlid, ctrl->kato);
+
+	ctrl->ops->delete_ctrl(ctrl);
+}
+
+static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
+{
+	pr_debug("ctrl %d start keep-alive timer for %d secs\n",
+		ctrl->cntlid, ctrl->kato);
+
+	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
+	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+}
+
+static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
+{
+	pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
+
+	cancel_delayed_work_sync(&ctrl->ka_work);
+}
+
+static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
+		__le32 nsid)
+{
+	struct nvmet_ns *ns;
+
+	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+		if (ns->nsid == le32_to_cpu(nsid))
+			return ns;
+	}
+
+	return NULL;
+}
+
+struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
+{
+	struct nvmet_ns *ns;
+
+	rcu_read_lock();
+	ns = __nvmet_find_namespace(ctrl, nsid);
+	if (ns)
+		percpu_ref_get(&ns->ref);
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void nvmet_destroy_namespace(struct percpu_ref *ref)
+{
+	struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
+
+	complete(&ns->disable_done);
+}
+
+void nvmet_put_namespace(struct nvmet_ns *ns)
+{
+	percpu_ref_put(&ns->ref);
+}
+
+int nvmet_ns_enable(struct nvmet_ns *ns)
+{
+	struct nvmet_subsys *subsys = ns->subsys;
+	struct nvmet_ctrl *ctrl;
+	int ret = 0;
+
+	mutex_lock(&subsys->lock);
+	if (!list_empty(&ns->dev_link))
+		goto out_unlock;
+
+	ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
+			NULL);
+	if (IS_ERR(ns->bdev)) {
+		pr_err("nvmet: failed to open block device %s: (%ld)\n",
+			ns->device_path, PTR_ERR(ns->bdev));
+		ret = PTR_ERR(ns->bdev);
+		ns->bdev = NULL;
+		goto out_unlock;
+	}
+
+	ns->size = i_size_read(ns->bdev->bd_inode);
+	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+	ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
+				0, GFP_KERNEL);
+	if (ret)
+		goto out_blkdev_put;
+
+	if (ns->nsid > subsys->max_nsid)
+		subsys->max_nsid = ns->nsid;
+
+	/*
+	 * The namespaces list needs to be sorted to simplify the implementation
+	 * of the Identify Namepace List subcommand.
+	 */
+	if (list_empty(&subsys->namespaces)) {
+		list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
+	} else {
+		struct nvmet_ns *old;
+
+		list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
+			BUG_ON(ns->nsid == old->nsid);
+			if (ns->nsid < old->nsid)
+				break;
+		}
+
+		list_add_tail_rcu(&ns->dev_link, &old->dev_link);
+	}
+
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
+
+	ret = 0;
+out_unlock:
+	mutex_unlock(&subsys->lock);
+	return ret;
+out_blkdev_put:
+	blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+	ns->bdev = NULL;
+	goto out_unlock;
+}
+
+void nvmet_ns_disable(struct nvmet_ns *ns)
+{
+	struct nvmet_subsys *subsys = ns->subsys;
+	struct nvmet_ctrl *ctrl;
+
+	mutex_lock(&subsys->lock);
+	if (list_empty(&ns->dev_link)) {
+		mutex_unlock(&subsys->lock);
+		return;
+	}
+	list_del_init(&ns->dev_link);
+	mutex_unlock(&subsys->lock);
+
+	/*
+	 * Now that we removed the namespaces from the lookup list, we
+	 * can kill the per_cpu ref and wait for any remaining references
+	 * to be dropped, as well as a RCU grace period for anyone only
+	 * using the namepace under rcu_read_lock().  Note that we can't
+	 * use call_rcu here as we need to ensure the namespaces have
+	 * been fully destroyed before unloading the module.
+	 */
+	percpu_ref_kill(&ns->ref);
+	synchronize_rcu();
+	wait_for_completion(&ns->disable_done);
+	percpu_ref_exit(&ns->ref);
+
+	mutex_lock(&subsys->lock);
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
+
+	if (ns->bdev)
+		blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+	mutex_unlock(&subsys->lock);
+}
+
+void nvmet_ns_free(struct nvmet_ns *ns)
+{
+	nvmet_ns_disable(ns);
+
+	kfree(ns->device_path);
+	kfree(ns);
+}
+
+struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
+{
+	struct nvmet_ns *ns;
+
+	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+	if (!ns)
+		return NULL;
+
+	INIT_LIST_HEAD(&ns->dev_link);
+	init_completion(&ns->disable_done);
+
+	ns->nsid = nsid;
+	ns->subsys = subsys;
+
+	return ns;
+}
+
+static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+{
+	if (status)
+		nvmet_set_status(req, status);
+
+	/* XXX: need to fill in something useful for sq_head */
+	req->rsp->sq_head = 0;
+	if (likely(req->sq)) /* may happen during early failure */
+		req->rsp->sq_id = cpu_to_le16(req->sq->qid);
+	req->rsp->command_id = req->cmd->common.command_id;
+
+	if (req->ns)
+		nvmet_put_namespace(req->ns);
+	req->ops->queue_response(req);
+}
+
+void nvmet_req_complete(struct nvmet_req *req, u16 status)
+{
+	__nvmet_req_complete(req, status);
+	percpu_ref_put(&req->sq->ref);
+}
+EXPORT_SYMBOL_GPL(nvmet_req_complete);
+
+void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
+		u16 qid, u16 size)
+{
+	cq->qid = qid;
+	cq->size = size;
+
+	ctrl->cqs[qid] = cq;
+}
+
+void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
+		u16 qid, u16 size)
+{
+	sq->qid = qid;
+	sq->size = size;
+
+	ctrl->sqs[qid] = sq;
+}
+
+void nvmet_sq_destroy(struct nvmet_sq *sq)
+{
+	/*
+	 * If this is the admin queue, complete all AERs so that our
+	 * queue doesn't have outstanding requests on it.
+	 */
+	if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
+		nvmet_async_events_free(sq->ctrl);
+	percpu_ref_kill(&sq->ref);
+	wait_for_completion(&sq->free_done);
+	percpu_ref_exit(&sq->ref);
+
+	if (sq->ctrl) {
+		nvmet_ctrl_put(sq->ctrl);
+		sq->ctrl = NULL; /* allows reusing the queue later */
+	}
+}
+EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
+
+static void nvmet_sq_free(struct percpu_ref *ref)
+{
+	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
+
+	complete(&sq->free_done);
+}
+
+int nvmet_sq_init(struct nvmet_sq *sq)
+{
+	int ret;
+
+	ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
+	if (ret) {
+		pr_err("percpu_ref init failed!\n");
+		return ret;
+	}
+	init_completion(&sq->free_done);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_sq_init);
+
+bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
+		struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
+{
+	u8 flags = req->cmd->common.flags;
+	u16 status;
+
+	req->cq = cq;
+	req->sq = sq;
+	req->ops = ops;
+	req->sg = NULL;
+	req->sg_cnt = 0;
+	req->rsp->status = 0;
+
+	/* no support for fused commands yet */
+	if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		goto fail;
+	}
+
+	/* either variant of SGLs is fine, as we don't support metadata */
+	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
+		     (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		goto fail;
+	}
+
+	if (unlikely(!req->sq->ctrl))
+		/* will return an error for any Non-connect command: */
+		status = nvmet_parse_connect_cmd(req);
+	else if (likely(req->sq->qid != 0))
+		status = nvmet_parse_io_cmd(req);
+	else if (req->cmd->common.opcode == nvme_fabrics_command)
+		status = nvmet_parse_fabrics_cmd(req);
+	else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
+		status = nvmet_parse_discovery_cmd(req);
+	else
+		status = nvmet_parse_admin_cmd(req);
+
+	if (status)
+		goto fail;
+
+	if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		goto fail;
+	}
+
+	return true;
+
+fail:
+	__nvmet_req_complete(req, status);
+	return false;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_init);
+
+static inline bool nvmet_cc_en(u32 cc)
+{
+	return cc & 0x1;
+}
+
+static inline u8 nvmet_cc_css(u32 cc)
+{
+	return (cc >> 4) & 0x7;
+}
+
+static inline u8 nvmet_cc_mps(u32 cc)
+{
+	return (cc >> 7) & 0xf;
+}
+
+static inline u8 nvmet_cc_ams(u32 cc)
+{
+	return (cc >> 11) & 0x7;
+}
+
+static inline u8 nvmet_cc_shn(u32 cc)
+{
+	return (cc >> 14) & 0x3;
+}
+
+static inline u8 nvmet_cc_iosqes(u32 cc)
+{
+	return (cc >> 16) & 0xf;
+}
+
+static inline u8 nvmet_cc_iocqes(u32 cc)
+{
+	return (cc >> 20) & 0xf;
+}
+
+static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
+{
+	lockdep_assert_held(&ctrl->lock);
+
+	if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
+	    nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
+	    nvmet_cc_mps(ctrl->cc) != 0 ||
+	    nvmet_cc_ams(ctrl->cc) != 0 ||
+	    nvmet_cc_css(ctrl->cc) != 0) {
+		ctrl->csts = NVME_CSTS_CFS;
+		return;
+	}
+
+	ctrl->csts = NVME_CSTS_RDY;
+}
+
+static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
+{
+	lockdep_assert_held(&ctrl->lock);
+
+	/* XXX: tear down queues? */
+	ctrl->csts &= ~NVME_CSTS_RDY;
+	ctrl->cc = 0;
+}
+
+void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
+{
+	u32 old;
+
+	mutex_lock(&ctrl->lock);
+	old = ctrl->cc;
+	ctrl->cc = new;
+
+	if (nvmet_cc_en(new) && !nvmet_cc_en(old))
+		nvmet_start_ctrl(ctrl);
+	if (!nvmet_cc_en(new) && nvmet_cc_en(old))
+		nvmet_clear_ctrl(ctrl);
+	if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
+		nvmet_clear_ctrl(ctrl);
+		ctrl->csts |= NVME_CSTS_SHST_CMPLT;
+	}
+	if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
+		ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
+	mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
+{
+	/* command sets supported: NVMe command set: */
+	ctrl->cap = (1ULL << 37);
+	/* CC.EN timeout in 500msec units: */
+	ctrl->cap |= (15ULL << 24);
+	/* maximum queue entries supported: */
+	ctrl->cap |= NVMET_QUEUE_SIZE - 1;
+}
+
+u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
+		struct nvmet_req *req, struct nvmet_ctrl **ret)
+{
+	struct nvmet_subsys *subsys;
+	struct nvmet_ctrl *ctrl;
+	u16 status = 0;
+
+	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
+	if (!subsys) {
+		pr_warn("connect request for invalid subsystem %s!\n",
+			subsysnqn);
+		req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn);
+		return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+	}
+
+	mutex_lock(&subsys->lock);
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		if (ctrl->cntlid == cntlid) {
+			if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
+				pr_warn("hostnqn mismatch.\n");
+				continue;
+			}
+			if (!kref_get_unless_zero(&ctrl->ref))
+				continue;
+
+			*ret = ctrl;
+			goto out;
+		}
+	}
+
+	pr_warn("could not find controller %d for subsys %s / host %s\n",
+		cntlid, subsysnqn, hostnqn);
+	req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid);
+	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+
+out:
+	mutex_unlock(&subsys->lock);
+	nvmet_subsys_put(subsys);
+	return status;
+}
+
+static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
+		const char *hostnqn)
+{
+	struct nvmet_host_link *p;
+
+	if (subsys->allow_any_host)
+		return true;
+
+	list_for_each_entry(p, &subsys->hosts, entry) {
+		if (!strcmp(nvmet_host_name(p->host), hostnqn))
+			return true;
+	}
+
+	return false;
+}
+
+static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
+		const char *hostnqn)
+{
+	struct nvmet_subsys_link *s;
+
+	list_for_each_entry(s, &req->port->subsystems, entry) {
+		if (__nvmet_host_allowed(s->subsys, hostnqn))
+			return true;
+	}
+
+	return false;
+}
+
+bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
+		const char *hostnqn)
+{
+	lockdep_assert_held(&nvmet_config_sem);
+
+	if (subsys->type == NVME_NQN_DISC)
+		return nvmet_host_discovery_allowed(req, hostnqn);
+	else
+		return __nvmet_host_allowed(subsys, hostnqn);
+}
+
+u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
+		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
+{
+	struct nvmet_subsys *subsys;
+	struct nvmet_ctrl *ctrl;
+	int ret;
+	u16 status;
+
+	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
+	if (!subsys) {
+		pr_warn("connect request for invalid subsystem %s!\n",
+			subsysnqn);
+		req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn);
+		goto out;
+	}
+
+	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+	down_read(&nvmet_config_sem);
+	if (!nvmet_host_allowed(req, subsys, hostnqn)) {
+		pr_info("connect by host %s for subsystem %s not allowed\n",
+			hostnqn, subsysnqn);
+		req->rsp->result = IPO_IATTR_CONNECT_DATA(hostnqn);
+		up_read(&nvmet_config_sem);
+		goto out_put_subsystem;
+	}
+	up_read(&nvmet_config_sem);
+
+	status = NVME_SC_INTERNAL;
+	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		goto out_put_subsystem;
+	mutex_init(&ctrl->lock);
+
+	nvmet_init_cap(ctrl);
+
+	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
+	INIT_LIST_HEAD(&ctrl->async_events);
+
+	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
+	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
+
+	kref_init(&ctrl->ref);
+	ctrl->subsys = subsys;
+
+	ctrl->cqs = kcalloc(subsys->max_qid + 1,
+			sizeof(struct nvmet_cq *),
+			GFP_KERNEL);
+	if (!ctrl->cqs)
+		goto out_free_ctrl;
+
+	ctrl->sqs = kcalloc(subsys->max_qid + 1,
+			sizeof(struct nvmet_sq *),
+			GFP_KERNEL);
+	if (!ctrl->sqs)
+		goto out_free_cqs;
+
+	ret = ida_simple_get(&subsys->cntlid_ida,
+			     NVME_CNTLID_MIN, NVME_CNTLID_MAX,
+			     GFP_KERNEL);
+	if (ret < 0) {
+		status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
+		goto out_free_sqs;
+	}
+	ctrl->cntlid = ret;
+
+	ctrl->ops = req->ops;
+	if (ctrl->subsys->type == NVME_NQN_DISC) {
+		/* Don't accept keep-alive timeout for discovery controllers */
+		if (kato) {
+			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+			goto out_free_sqs;
+		}
+
+		/*
+		 * Discovery controllers use some arbitrary high value in order
+		 * to cleanup stale discovery sessions
+		 *
+		 * From the latest base diff RC:
+		 * "The Keep Alive command is not supported by
+		 * Discovery controllers. A transport may specify a
+		 * fixed Discovery controller activity timeout value
+		 * (e.g., 2 minutes).  If no commands are received
+		 * by a Discovery controller within that time
+		 * period, the controller may perform the
+		 * actions for Keep Alive Timer expiration".
+		 */
+		ctrl->kato = NVMET_DISC_KATO;
+	} else {
+		/* keep-alive timeout in seconds */
+		ctrl->kato = DIV_ROUND_UP(kato, 1000);
+	}
+	nvmet_start_keep_alive_timer(ctrl);
+
+	mutex_lock(&subsys->lock);
+	list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
+	mutex_unlock(&subsys->lock);
+
+	*ctrlp = ctrl;
+	return 0;
+
+out_free_sqs:
+	kfree(ctrl->sqs);
+out_free_cqs:
+	kfree(ctrl->cqs);
+out_free_ctrl:
+	kfree(ctrl);
+out_put_subsystem:
+	nvmet_subsys_put(subsys);
+out:
+	return status;
+}
+
+static void nvmet_ctrl_free(struct kref *ref)
+{
+	struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
+	struct nvmet_subsys *subsys = ctrl->subsys;
+
+	nvmet_stop_keep_alive_timer(ctrl);
+
+	mutex_lock(&subsys->lock);
+	list_del(&ctrl->subsys_entry);
+	mutex_unlock(&subsys->lock);
+
+	ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
+	nvmet_subsys_put(subsys);
+
+	kfree(ctrl->sqs);
+	kfree(ctrl->cqs);
+	kfree(ctrl);
+}
+
+void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
+{
+	kref_put(&ctrl->ref, nvmet_ctrl_free);
+}
+
+static void nvmet_fatal_error_handler(struct work_struct *work)
+{
+	struct nvmet_ctrl *ctrl =
+			container_of(work, struct nvmet_ctrl, fatal_err_work);
+
+	pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
+	ctrl->ops->delete_ctrl(ctrl);
+}
+
+void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
+{
+	ctrl->csts |= NVME_CSTS_CFS;
+	INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
+	schedule_work(&ctrl->fatal_err_work);
+}
+EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
+
+static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
+		const char *subsysnqn)
+{
+	struct nvmet_subsys_link *p;
+
+	if (!port)
+		return NULL;
+
+	if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
+			NVMF_NQN_SIZE)) {
+		if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
+			return NULL;
+		return nvmet_disc_subsys;
+	}
+
+	down_read(&nvmet_config_sem);
+	list_for_each_entry(p, &port->subsystems, entry) {
+		if (!strncmp(p->subsys->subsysnqn, subsysnqn,
+				NVMF_NQN_SIZE)) {
+			if (!kref_get_unless_zero(&p->subsys->ref))
+				break;
+			up_read(&nvmet_config_sem);
+			return p->subsys;
+		}
+	}
+	up_read(&nvmet_config_sem);
+	return NULL;
+}
+
+struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
+		enum nvme_subsys_type type)
+{
+	struct nvmet_subsys *subsys;
+
+	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
+	if (!subsys)
+		return NULL;
+
+	subsys->ver = (1 << 16) | (2 << 8) | 1; /* NVMe 1.2.1 */
+
+	switch (type) {
+	case NVME_NQN_NVME:
+		subsys->max_qid = NVMET_NR_QUEUES;
+		break;
+	case NVME_NQN_DISC:
+		subsys->max_qid = 0;
+		break;
+	default:
+		pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
+		kfree(subsys);
+		return NULL;
+	}
+	subsys->type = type;
+	subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
+			GFP_KERNEL);
+	if (!subsys->subsysnqn) {
+		kfree(subsys);
+		return NULL;
+	}
+
+	kref_init(&subsys->ref);
+
+	mutex_init(&subsys->lock);
+	INIT_LIST_HEAD(&subsys->namespaces);
+	INIT_LIST_HEAD(&subsys->ctrls);
+
+	ida_init(&subsys->cntlid_ida);
+
+	INIT_LIST_HEAD(&subsys->hosts);
+
+	return subsys;
+}
+
+static void nvmet_subsys_free(struct kref *ref)
+{
+	struct nvmet_subsys *subsys =
+		container_of(ref, struct nvmet_subsys, ref);
+
+	WARN_ON_ONCE(!list_empty(&subsys->namespaces));
+
+	ida_destroy(&subsys->cntlid_ida);
+	kfree(subsys->subsysnqn);
+	kfree(subsys);
+}
+
+void nvmet_subsys_put(struct nvmet_subsys *subsys)
+{
+	kref_put(&subsys->ref, nvmet_subsys_free);
+}
+
+static int __init nvmet_init(void)
+{
+	int error;
+
+	error = nvmet_init_discovery();
+	if (error)
+		goto out;
+
+	error = nvmet_init_configfs();
+	if (error)
+		goto out_exit_discovery;
+	return 0;
+
+out_exit_discovery:
+	nvmet_exit_discovery();
+out:
+	return error;
+}
+
+static void __exit nvmet_exit(void)
+{
+	nvmet_exit_configfs();
+	nvmet_exit_discovery();
+
+	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
+	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
+}
+
+module_init(nvmet_init);
+module_exit(nvmet_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
new file mode 100644
index 000000000000..6f65646e89cf
--- /dev/null
+++ b/drivers/nvme/target/discovery.c
@@ -0,0 +1,221 @@
+/*
+ * Discovery service for the NVMe over Fabrics target.
+ * Copyright (C) 2016 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/slab.h>
+#include <generated/utsrelease.h>
+#include "nvmet.h"
+
+struct nvmet_subsys *nvmet_disc_subsys;
+
+u64 nvmet_genctr;
+
+void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port)
+{
+	down_write(&nvmet_config_sem);
+	if (list_empty(&port->entry)) {
+		list_add_tail(&port->entry, &parent->referrals);
+		port->enabled = true;
+		nvmet_genctr++;
+	}
+	up_write(&nvmet_config_sem);
+}
+
+void nvmet_referral_disable(struct nvmet_port *port)
+{
+	down_write(&nvmet_config_sem);
+	if (!list_empty(&port->entry)) {
+		port->enabled = false;
+		list_del_init(&port->entry);
+		nvmet_genctr++;
+	}
+	up_write(&nvmet_config_sem);
+}
+
+static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
+		struct nvmet_port *port, char *subsys_nqn, u8 type, u32 numrec)
+{
+	struct nvmf_disc_rsp_page_entry *e = &hdr->entries[numrec];
+
+	e->trtype = port->disc_addr.trtype;
+	e->adrfam = port->disc_addr.adrfam;
+	e->treq = port->disc_addr.treq;
+	e->portid = port->disc_addr.portid;
+	/* we support only dynamic controllers */
+	e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
+	e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
+	e->nqntype = type;
+	memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
+	memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
+	memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE);
+	memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE);
+}
+
+static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
+{
+	const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvmf_disc_rsp_page_hdr *hdr;
+	size_t data_len = nvmet_get_log_page_len(req->cmd);
+	size_t alloc_len = max(data_len, sizeof(*hdr));
+	int residual_len = data_len - sizeof(*hdr);
+	struct nvmet_subsys_link *p;
+	struct nvmet_port *r;
+	u32 numrec = 0;
+	u16 status = 0;
+
+	/*
+	 * Make sure we're passing at least a buffer of response header size.
+	 * If host provided data len is less than the header size, only the
+	 * number of bytes requested by host will be sent to host.
+	 */
+	hdr = kzalloc(alloc_len, GFP_KERNEL);
+	if (!hdr) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	down_read(&nvmet_config_sem);
+	list_for_each_entry(p, &req->port->subsystems, entry) {
+		if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn))
+			continue;
+		if (residual_len >= entry_size) {
+			nvmet_format_discovery_entry(hdr, req->port,
+					p->subsys->subsysnqn,
+					NVME_NQN_NVME, numrec);
+			residual_len -= entry_size;
+		}
+		numrec++;
+	}
+
+	list_for_each_entry(r, &req->port->referrals, entry) {
+		if (residual_len >= entry_size) {
+			nvmet_format_discovery_entry(hdr, r,
+					NVME_DISC_SUBSYS_NAME,
+					NVME_NQN_DISC, numrec);
+			residual_len -= entry_size;
+		}
+		numrec++;
+	}
+
+	hdr->genctr = cpu_to_le64(nvmet_genctr);
+	hdr->numrec = cpu_to_le64(numrec);
+	hdr->recfmt = cpu_to_le16(0);
+
+	up_read(&nvmet_config_sem);
+
+	status = nvmet_copy_to_sgl(req, 0, hdr, data_len);
+	kfree(hdr);
+out:
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvme_id_ctrl *id;
+	u16 status = 0;
+
+	id = kzalloc(sizeof(*id), GFP_KERNEL);
+	if (!id) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	memset(id->fr, ' ', sizeof(id->fr));
+	strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
+
+	/* no limit on data transfer sizes for now */
+	id->mdts = 0;
+	id->cntlid = cpu_to_le16(ctrl->cntlid);
+	id->ver = cpu_to_le32(ctrl->subsys->ver);
+	id->lpa = (1 << 2);
+
+	/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
+	id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
+
+	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
+	if (ctrl->ops->has_keyed_sgls)
+		id->sgls |= cpu_to_le32(1 << 2);
+	if (ctrl->ops->sqe_inline_size)
+		id->sgls |= cpu_to_le32(1 << 20);
+
+	strcpy(id->subnqn, ctrl->subsys->subsysnqn);
+
+	status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+
+	kfree(id);
+out:
+	nvmet_req_complete(req, status);
+}
+
+int nvmet_parse_discovery_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	req->ns = NULL;
+
+	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
+		pr_err("nvmet: got cmd %d while not ready\n",
+				cmd->common.opcode);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+
+	switch (cmd->common.opcode) {
+	case nvme_admin_get_log_page:
+		req->data_len = nvmet_get_log_page_len(cmd);
+
+		switch (cmd->get_log_page.lid) {
+		case NVME_LOG_DISC:
+			req->execute = nvmet_execute_get_disc_log_page;
+			return 0;
+		default:
+			pr_err("nvmet: unsupported get_log_page lid %d\n",
+				cmd->get_log_page.lid);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+		}
+	case nvme_admin_identify:
+		req->data_len = 4096;
+		switch (le32_to_cpu(cmd->identify.cns)) {
+		case 0x01:
+			req->execute =
+				nvmet_execute_identify_disc_ctrl;
+			return 0;
+		default:
+			pr_err("nvmet: unsupported identify cns %d\n",
+				le32_to_cpu(cmd->identify.cns));
+			return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+		}
+	default:
+		pr_err("nvmet: unsupported cmd %d\n",
+				cmd->common.opcode);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+
+	pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+	return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+}
+
+int __init nvmet_init_discovery(void)
+{
+	nvmet_disc_subsys =
+		nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC);
+	if (!nvmet_disc_subsys)
+		return -ENOMEM;
+	return 0;
+}
+
+void nvmet_exit_discovery(void)
+{
+	nvmet_subsys_put(nvmet_disc_subsys);
+}
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
new file mode 100644
index 000000000000..9a97ae67e656
--- /dev/null
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -0,0 +1,240 @@
+/*
+ * NVMe Fabrics command implementation.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include "nvmet.h"
+
+static void nvmet_execute_prop_set(struct nvmet_req *req)
+{
+	u16 status = 0;
+
+	if (!(req->cmd->prop_set.attrib & 1)) {
+		u64 val = le64_to_cpu(req->cmd->prop_set.value);
+
+		switch (le32_to_cpu(req->cmd->prop_set.offset)) {
+		case NVME_REG_CC:
+			nvmet_update_cc(req->sq->ctrl, val);
+			break;
+		default:
+			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+			break;
+		}
+	} else {
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+	}
+
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_prop_get(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	u16 status = 0;
+	u64 val = 0;
+
+	if (req->cmd->prop_get.attrib & 1) {
+		switch (le32_to_cpu(req->cmd->prop_get.offset)) {
+		case NVME_REG_CAP:
+			val = ctrl->cap;
+			break;
+		default:
+			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+			break;
+		}
+	} else {
+		switch (le32_to_cpu(req->cmd->prop_get.offset)) {
+		case NVME_REG_VS:
+			val = ctrl->subsys->ver;
+			break;
+		case NVME_REG_CC:
+			val = ctrl->cc;
+			break;
+		case NVME_REG_CSTS:
+			val = ctrl->csts;
+			break;
+		default:
+			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+			break;
+		}
+	}
+
+	req->rsp->result64 = cpu_to_le64(val);
+	nvmet_req_complete(req, status);
+}
+
+int nvmet_parse_fabrics_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	req->ns = NULL;
+
+	switch (cmd->fabrics.fctype) {
+	case nvme_fabrics_type_property_set:
+		req->data_len = 0;
+		req->execute = nvmet_execute_prop_set;
+		break;
+	case nvme_fabrics_type_property_get:
+		req->data_len = 0;
+		req->execute = nvmet_execute_prop_get;
+		break;
+	default:
+		pr_err("received unknown capsule type 0x%x\n",
+			cmd->fabrics.fctype);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+
+	return 0;
+}
+
+static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
+{
+	struct nvmf_connect_command *c = &req->cmd->connect;
+	u16 qid = le16_to_cpu(c->qid);
+	u16 sqsize = le16_to_cpu(c->sqsize);
+	struct nvmet_ctrl *old;
+
+	old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
+	if (old) {
+		pr_warn("queue already connected!\n");
+		return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
+	}
+
+	nvmet_cq_setup(ctrl, req->cq, qid, sqsize);
+	nvmet_sq_setup(ctrl, req->sq, qid, sqsize);
+	return 0;
+}
+
+static void nvmet_execute_admin_connect(struct nvmet_req *req)
+{
+	struct nvmf_connect_command *c = &req->cmd->connect;
+	struct nvmf_connect_data *d;
+	struct nvmet_ctrl *ctrl = NULL;
+	u16 status = 0;
+
+	d = kmap(sg_page(req->sg)) + req->sg->offset;
+
+	/* zero out initial completion result, assign values as needed */
+	req->rsp->result = 0;
+
+	if (c->recfmt != 0) {
+		pr_warn("invalid connect version (%d).\n",
+			le16_to_cpu(c->recfmt));
+		status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
+		goto out;
+	}
+
+	if (unlikely(d->cntlid != cpu_to_le16(0xffff))) {
+		pr_warn("connect attempt for invalid controller ID %#x\n",
+			d->cntlid);
+		status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+		req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid);
+		goto out;
+	}
+
+	status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
+			le32_to_cpu(c->kato), &ctrl);
+	if (status)
+		goto out;
+
+	status = nvmet_install_queue(ctrl, req);
+	if (status) {
+		nvmet_ctrl_put(ctrl);
+		goto out;
+	}
+
+	pr_info("creating controller %d for NQN %s.\n",
+			ctrl->cntlid, ctrl->hostnqn);
+	req->rsp->result16 = cpu_to_le16(ctrl->cntlid);
+
+out:
+	kunmap(sg_page(req->sg));
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_io_connect(struct nvmet_req *req)
+{
+	struct nvmf_connect_command *c = &req->cmd->connect;
+	struct nvmf_connect_data *d;
+	struct nvmet_ctrl *ctrl = NULL;
+	u16 qid = le16_to_cpu(c->qid);
+	u16 status = 0;
+
+	d = kmap(sg_page(req->sg)) + req->sg->offset;
+
+	/* zero out initial completion result, assign values as needed */
+	req->rsp->result = 0;
+
+	if (c->recfmt != 0) {
+		pr_warn("invalid connect version (%d).\n",
+			le16_to_cpu(c->recfmt));
+		status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
+		goto out;
+	}
+
+	status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
+			le16_to_cpu(d->cntlid),
+			req, &ctrl);
+	if (status)
+		goto out;
+
+	if (unlikely(qid > ctrl->subsys->max_qid)) {
+		pr_warn("invalid queue id (%d)\n", qid);
+		status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+		req->rsp->result = IPO_IATTR_CONNECT_SQE(qid);
+		goto out_ctrl_put;
+	}
+
+	status = nvmet_install_queue(ctrl, req);
+	if (status) {
+		/* pass back cntlid that had the issue of installing queue */
+		req->rsp->result16 = cpu_to_le16(ctrl->cntlid);
+		goto out_ctrl_put;
+	}
+
+	pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
+
+out:
+	kunmap(sg_page(req->sg));
+	nvmet_req_complete(req, status);
+	return;
+
+out_ctrl_put:
+	nvmet_ctrl_put(ctrl);
+	goto out;
+}
+
+int nvmet_parse_connect_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	req->ns = NULL;
+
+	if (req->cmd->common.opcode != nvme_fabrics_command) {
+		pr_err("invalid command 0x%x on unconnected queue.\n",
+			cmd->fabrics.opcode);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+	if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
+		pr_err("invalid capsule type 0x%x on unconnected queue.\n",
+			cmd->fabrics.fctype);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+
+	req->data_len = sizeof(struct nvmf_connect_data);
+	if (cmd->connect.qid == 0)
+		req->execute = nvmet_execute_admin_connect;
+	else
+		req->execute = nvmet_execute_io_connect;
+	return 0;
+}
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
new file mode 100644
index 000000000000..2cd069b691ae
--- /dev/null
+++ b/drivers/nvme/target/io-cmd.c
@@ -0,0 +1,215 @@
+/*
+ * NVMe I/O command implementation.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+static void nvmet_bio_done(struct bio *bio)
+{
+	struct nvmet_req *req = bio->bi_private;
+
+	nvmet_req_complete(req,
+		bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+
+	if (bio != &req->inline_bio)
+		bio_put(bio);
+}
+
+static inline u32 nvmet_rw_len(struct nvmet_req *req)
+{
+	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
+			req->ns->blksize_shift;
+}
+
+static void nvmet_inline_bio_init(struct nvmet_req *req)
+{
+	struct bio *bio = &req->inline_bio;
+
+	bio_init(bio);
+	bio->bi_max_vecs = NVMET_MAX_INLINE_BIOVEC;
+	bio->bi_io_vec = req->inline_bvec;
+}
+
+static void nvmet_execute_rw(struct nvmet_req *req)
+{
+	int sg_cnt = req->sg_cnt;
+	struct scatterlist *sg;
+	struct bio *bio;
+	sector_t sector;
+	blk_qc_t cookie;
+	int op, op_flags = 0, i;
+
+	if (!req->sg_cnt) {
+		nvmet_req_complete(req, 0);
+		return;
+	}
+
+	if (req->cmd->rw.opcode == nvme_cmd_write) {
+		op = REQ_OP_WRITE;
+		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+			op_flags |= REQ_FUA;
+	} else {
+		op = REQ_OP_READ;
+	}
+
+	sector = le64_to_cpu(req->cmd->rw.slba);
+	sector <<= (req->ns->blksize_shift - 9);
+
+	nvmet_inline_bio_init(req);
+	bio = &req->inline_bio;
+	bio->bi_bdev = req->ns->bdev;
+	bio->bi_iter.bi_sector = sector;
+	bio->bi_private = req;
+	bio->bi_end_io = nvmet_bio_done;
+	bio_set_op_attrs(bio, op, op_flags);
+
+	for_each_sg(req->sg, sg, req->sg_cnt, i) {
+		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+				!= sg->length) {
+			struct bio *prev = bio;
+
+			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+			bio->bi_bdev = req->ns->bdev;
+			bio->bi_iter.bi_sector = sector;
+			bio_set_op_attrs(bio, op, op_flags);
+
+			bio_chain(bio, prev);
+			cookie = submit_bio(prev);
+		}
+
+		sector += sg->length >> 9;
+		sg_cnt--;
+	}
+
+	cookie = submit_bio(bio);
+
+	blk_poll(bdev_get_queue(req->ns->bdev), cookie);
+}
+
+static void nvmet_execute_flush(struct nvmet_req *req)
+{
+	struct bio *bio;
+
+	nvmet_inline_bio_init(req);
+	bio = &req->inline_bio;
+
+	bio->bi_bdev = req->ns->bdev;
+	bio->bi_private = req;
+	bio->bi_end_io = nvmet_bio_done;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+
+	submit_bio(bio);
+}
+
+static u16 nvmet_discard_range(struct nvmet_ns *ns,
+		struct nvme_dsm_range *range, struct bio **bio)
+{
+	if (__blkdev_issue_discard(ns->bdev,
+			le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
+			le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
+			GFP_KERNEL, 0, bio))
+		return NVME_SC_INTERNAL | NVME_SC_DNR;
+	return 0;
+}
+
+static void nvmet_execute_discard(struct nvmet_req *req)
+{
+	struct nvme_dsm_range range;
+	struct bio *bio = NULL;
+	int i;
+	u16 status;
+
+	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+		status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+				sizeof(range));
+		if (status)
+			break;
+
+		status = nvmet_discard_range(req->ns, &range, &bio);
+		if (status)
+			break;
+	}
+
+	if (bio) {
+		bio->bi_private = req;
+		bio->bi_end_io = nvmet_bio_done;
+		if (status) {
+			bio->bi_error = -EIO;
+			bio_endio(bio);
+		} else {
+			submit_bio(bio);
+		}
+	} else {
+		nvmet_req_complete(req, status);
+	}
+}
+
+static void nvmet_execute_dsm(struct nvmet_req *req)
+{
+	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+	case NVME_DSMGMT_AD:
+		nvmet_execute_discard(req);
+		return;
+	case NVME_DSMGMT_IDR:
+	case NVME_DSMGMT_IDW:
+	default:
+		/* Not supported yet */
+		nvmet_req_complete(req, 0);
+		return;
+	}
+}
+
+int nvmet_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
+		pr_err("nvmet: got io cmd %d while CC.EN == 0\n",
+				cmd->common.opcode);
+		req->ns = NULL;
+		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+	}
+
+	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
+		pr_err("nvmet: got io cmd %d while CSTS.RDY == 0\n",
+				cmd->common.opcode);
+		req->ns = NULL;
+		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+	}
+
+	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
+	if (!req->ns)
+		return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+	switch (cmd->common.opcode) {
+	case nvme_cmd_read:
+	case nvme_cmd_write:
+		req->execute = nvmet_execute_rw;
+		req->data_len = nvmet_rw_len(req);
+		return 0;
+	case nvme_cmd_flush:
+		req->execute = nvmet_execute_flush;
+		req->data_len = 0;
+		return 0;
+	case nvme_cmd_dsm:
+		req->execute = nvmet_execute_dsm;
+		req->data_len = le32_to_cpu(cmd->dsm.nr) *
+			sizeof(struct nvme_dsm_range);
+		return 0;
+	default:
+		pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
new file mode 100644
index 000000000000..94e782987cc9
--- /dev/null
+++ b/drivers/nvme/target/loop.c
@@ -0,0 +1,754 @@
+/*
+ * NVMe over Fabrics loopback device.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/scatterlist.h>
+#include <linux/delay.h>
+#include <linux/blk-mq.h>
+#include <linux/nvme.h>
+#include <linux/module.h>
+#include <linux/parser.h>
+#include <linux/t10-pi.h>
+#include "nvmet.h"
+#include "../host/nvme.h"
+#include "../host/fabrics.h"
+
+#define NVME_LOOP_AQ_DEPTH		256
+
+#define NVME_LOOP_MAX_SEGMENTS		256
+
+/*
+ * We handle AEN commands ourselves and don't even let the
+ * block layer know about them.
+ */
+#define NVME_LOOP_NR_AEN_COMMANDS	1
+#define NVME_LOOP_AQ_BLKMQ_DEPTH	\
+	(NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
+
+struct nvme_loop_iod {
+	struct nvme_command	cmd;
+	struct nvme_completion	rsp;
+	struct nvmet_req	req;
+	struct nvme_loop_queue	*queue;
+	struct work_struct	work;
+	struct sg_table		sg_table;
+	struct scatterlist	first_sgl[];
+};
+
+struct nvme_loop_ctrl {
+	spinlock_t		lock;
+	struct nvme_loop_queue	*queues;
+	u32			queue_count;
+
+	struct blk_mq_tag_set	admin_tag_set;
+
+	struct list_head	list;
+	u64			cap;
+	struct blk_mq_tag_set	tag_set;
+	struct nvme_loop_iod	async_event_iod;
+	struct nvme_ctrl	ctrl;
+
+	struct nvmet_ctrl	*target_ctrl;
+	struct work_struct	delete_work;
+	struct work_struct	reset_work;
+};
+
+static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
+{
+	return container_of(ctrl, struct nvme_loop_ctrl, ctrl);
+}
+
+struct nvme_loop_queue {
+	struct nvmet_cq		nvme_cq;
+	struct nvmet_sq		nvme_sq;
+	struct nvme_loop_ctrl	*ctrl;
+};
+
+static struct nvmet_port *nvmet_loop_port;
+
+static LIST_HEAD(nvme_loop_ctrl_list);
+static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
+
+static void nvme_loop_queue_response(struct nvmet_req *nvme_req);
+static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl);
+
+static struct nvmet_fabrics_ops nvme_loop_ops;
+
+static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
+{
+	return queue - queue->ctrl->queues;
+}
+
+static void nvme_loop_complete_rq(struct request *req)
+{
+	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
+	int error = 0;
+
+	nvme_cleanup_cmd(req);
+	sg_free_table_chained(&iod->sg_table, true);
+
+	if (unlikely(req->errors)) {
+		if (nvme_req_needs_retry(req, req->errors)) {
+			nvme_requeue_req(req);
+			return;
+		}
+
+		if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+			error = req->errors;
+		else
+			error = nvme_error_status(req->errors);
+	}
+
+	blk_mq_end_request(req, error);
+}
+
+static void nvme_loop_queue_response(struct nvmet_req *nvme_req)
+{
+	struct nvme_loop_iod *iod =
+		container_of(nvme_req, struct nvme_loop_iod, req);
+	struct nvme_completion *cqe = &iod->rsp;
+
+	/*
+	 * AEN requests are special as they don't time out and can
+	 * survive any kind of queue freeze and often don't respond to
+	 * aborts.  We don't even bother to allocate a struct request
+	 * for them but rather special case them here.
+	 */
+	if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 &&
+			cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) {
+		nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe);
+	} else {
+		struct request *req = blk_mq_rq_from_pdu(iod);
+
+		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
+			memcpy(req->special, cqe, sizeof(*cqe));
+		blk_mq_complete_request(req, le16_to_cpu(cqe->status) >> 1);
+	}
+}
+
+static void nvme_loop_execute_work(struct work_struct *work)
+{
+	struct nvme_loop_iod *iod =
+		container_of(work, struct nvme_loop_iod, work);
+
+	iod->req.execute(&iod->req);
+}
+
+static enum blk_eh_timer_return
+nvme_loop_timeout(struct request *rq, bool reserved)
+{
+	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
+
+	/* queue error recovery */
+	schedule_work(&iod->queue->ctrl->reset_work);
+
+	/* fail with DNR on admin cmd timeout */
+	rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR;
+
+	return BLK_EH_HANDLED;
+}
+
+static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+		const struct blk_mq_queue_data *bd)
+{
+	struct nvme_ns *ns = hctx->queue->queuedata;
+	struct nvme_loop_queue *queue = hctx->driver_data;
+	struct request *req = bd->rq;
+	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
+	int ret;
+
+	ret = nvme_setup_cmd(ns, req, &iod->cmd);
+	if (ret)
+		return ret;
+
+	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
+	iod->req.port = nvmet_loop_port;
+	if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
+			&queue->nvme_sq, &nvme_loop_ops)) {
+		nvme_cleanup_cmd(req);
+		blk_mq_start_request(req);
+		nvme_loop_queue_response(&iod->req);
+		return 0;
+	}
+
+	if (blk_rq_bytes(req)) {
+		iod->sg_table.sgl = iod->first_sgl;
+		ret = sg_alloc_table_chained(&iod->sg_table,
+			req->nr_phys_segments, iod->sg_table.sgl);
+		if (ret)
+			return BLK_MQ_RQ_QUEUE_BUSY;
+
+		iod->req.sg = iod->sg_table.sgl;
+		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
+		BUG_ON(iod->req.sg_cnt > req->nr_phys_segments);
+	}
+
+	iod->cmd.common.command_id = req->tag;
+	blk_mq_start_request(req);
+
+	schedule_work(&iod->work);
+	return 0;
+}
+
+static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+{
+	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(arg);
+	struct nvme_loop_queue *queue = &ctrl->queues[0];
+	struct nvme_loop_iod *iod = &ctrl->async_event_iod;
+
+	memset(&iod->cmd, 0, sizeof(iod->cmd));
+	iod->cmd.common.opcode = nvme_admin_async_event;
+	iod->cmd.common.command_id = NVME_LOOP_AQ_BLKMQ_DEPTH;
+	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
+
+	if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq,
+			&nvme_loop_ops)) {
+		dev_err(ctrl->ctrl.device, "failed async event work\n");
+		return;
+	}
+
+	schedule_work(&iod->work);
+}
+
+static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
+		struct nvme_loop_iod *iod, unsigned int queue_idx)
+{
+	BUG_ON(queue_idx >= ctrl->queue_count);
+
+	iod->req.cmd = &iod->cmd;
+	iod->req.rsp = &iod->rsp;
+	iod->queue = &ctrl->queues[queue_idx];
+	INIT_WORK(&iod->work, nvme_loop_execute_work);
+	return 0;
+}
+
+static int nvme_loop_init_request(void *data, struct request *req,
+				unsigned int hctx_idx, unsigned int rq_idx,
+				unsigned int numa_node)
+{
+	return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), hctx_idx + 1);
+}
+
+static int nvme_loop_init_admin_request(void *data, struct request *req,
+				unsigned int hctx_idx, unsigned int rq_idx,
+				unsigned int numa_node)
+{
+	return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), 0);
+}
+
+static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+		unsigned int hctx_idx)
+{
+	struct nvme_loop_ctrl *ctrl = data;
+	struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
+
+	BUG_ON(hctx_idx >= ctrl->queue_count);
+
+	hctx->driver_data = queue;
+	return 0;
+}
+
+static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+		unsigned int hctx_idx)
+{
+	struct nvme_loop_ctrl *ctrl = data;
+	struct nvme_loop_queue *queue = &ctrl->queues[0];
+
+	BUG_ON(hctx_idx != 0);
+
+	hctx->driver_data = queue;
+	return 0;
+}
+
+static struct blk_mq_ops nvme_loop_mq_ops = {
+	.queue_rq	= nvme_loop_queue_rq,
+	.complete	= nvme_loop_complete_rq,
+	.map_queue	= blk_mq_map_queue,
+	.init_request	= nvme_loop_init_request,
+	.init_hctx	= nvme_loop_init_hctx,
+	.timeout	= nvme_loop_timeout,
+};
+
+static struct blk_mq_ops nvme_loop_admin_mq_ops = {
+	.queue_rq	= nvme_loop_queue_rq,
+	.complete	= nvme_loop_complete_rq,
+	.map_queue	= blk_mq_map_queue,
+	.init_request	= nvme_loop_init_admin_request,
+	.init_hctx	= nvme_loop_init_admin_hctx,
+	.timeout	= nvme_loop_timeout,
+};
+
+static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
+{
+	blk_cleanup_queue(ctrl->ctrl.admin_q);
+	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
+}
+
+static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
+
+	if (list_empty(&ctrl->list))
+		goto free_ctrl;
+
+	mutex_lock(&nvme_loop_ctrl_mutex);
+	list_del(&ctrl->list);
+	mutex_unlock(&nvme_loop_ctrl_mutex);
+
+	if (nctrl->tagset) {
+		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_free_tag_set(&ctrl->tag_set);
+	}
+	kfree(ctrl->queues);
+	nvmf_free_options(nctrl->opts);
+free_ctrl:
+	kfree(ctrl);
+}
+
+static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
+{
+	int error;
+
+	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
+	ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
+	ctrl->admin_tag_set.queue_depth = NVME_LOOP_AQ_BLKMQ_DEPTH;
+	ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
+	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
+		SG_CHUNK_SIZE * sizeof(struct scatterlist);
+	ctrl->admin_tag_set.driver_data = ctrl;
+	ctrl->admin_tag_set.nr_hw_queues = 1;
+	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+
+	ctrl->queues[0].ctrl = ctrl;
+	error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
+	if (error)
+		return error;
+	ctrl->queue_count = 1;
+
+	error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
+	if (error)
+		goto out_free_sq;
+
+	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+	if (IS_ERR(ctrl->ctrl.admin_q)) {
+		error = PTR_ERR(ctrl->ctrl.admin_q);
+		goto out_free_tagset;
+	}
+
+	error = nvmf_connect_admin_queue(&ctrl->ctrl);
+	if (error)
+		goto out_cleanup_queue;
+
+	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	if (error) {
+		dev_err(ctrl->ctrl.device,
+			"prop_get NVME_REG_CAP failed\n");
+		goto out_cleanup_queue;
+	}
+
+	ctrl->ctrl.sqsize =
+		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+
+	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	if (error)
+		goto out_cleanup_queue;
+
+	ctrl->ctrl.max_hw_sectors =
+		(NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
+
+	error = nvme_init_identify(&ctrl->ctrl);
+	if (error)
+		goto out_cleanup_queue;
+
+	nvme_start_keep_alive(&ctrl->ctrl);
+
+	return 0;
+
+out_cleanup_queue:
+	blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_free_tagset:
+	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+out_free_sq:
+	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
+	return error;
+}
+
+static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
+{
+	int i;
+
+	nvme_stop_keep_alive(&ctrl->ctrl);
+
+	if (ctrl->queue_count > 1) {
+		nvme_stop_queues(&ctrl->ctrl);
+		blk_mq_tagset_busy_iter(&ctrl->tag_set,
+					nvme_cancel_request, &ctrl->ctrl);
+
+		for (i = 1; i < ctrl->queue_count; i++)
+			nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+	}
+
+	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+		nvme_shutdown_ctrl(&ctrl->ctrl);
+
+	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
+				nvme_cancel_request, &ctrl->ctrl);
+	nvme_loop_destroy_admin_queue(ctrl);
+}
+
+static void nvme_loop_del_ctrl_work(struct work_struct *work)
+{
+	struct nvme_loop_ctrl *ctrl = container_of(work,
+				struct nvme_loop_ctrl, delete_work);
+
+	nvme_remove_namespaces(&ctrl->ctrl);
+	nvme_loop_shutdown_ctrl(ctrl);
+	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_put_ctrl(&ctrl->ctrl);
+}
+
+static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
+{
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
+		return -EBUSY;
+
+	if (!schedule_work(&ctrl->delete_work))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int nvme_loop_del_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
+	int ret;
+
+	ret = __nvme_loop_del_ctrl(ctrl);
+	if (ret)
+		return ret;
+
+	flush_work(&ctrl->delete_work);
+
+	return 0;
+}
+
+static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
+{
+	struct nvme_loop_ctrl *ctrl;
+
+	mutex_lock(&nvme_loop_ctrl_mutex);
+	list_for_each_entry(ctrl, &nvme_loop_ctrl_list, list) {
+		if (ctrl->ctrl.cntlid == nctrl->cntlid)
+			__nvme_loop_del_ctrl(ctrl);
+	}
+	mutex_unlock(&nvme_loop_ctrl_mutex);
+}
+
+static void nvme_loop_reset_ctrl_work(struct work_struct *work)
+{
+	struct nvme_loop_ctrl *ctrl = container_of(work,
+					struct nvme_loop_ctrl, reset_work);
+	bool changed;
+	int i, ret;
+
+	nvme_loop_shutdown_ctrl(ctrl);
+
+	ret = nvme_loop_configure_admin_queue(ctrl);
+	if (ret)
+		goto out_disable;
+
+	for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
+		ctrl->queues[i].ctrl = ctrl;
+		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+		if (ret)
+			goto out_free_queues;
+
+		ctrl->queue_count++;
+	}
+
+	for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
+		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+		if (ret)
+			goto out_free_queues;
+	}
+
+	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	WARN_ON_ONCE(!changed);
+
+	nvme_queue_scan(&ctrl->ctrl);
+	nvme_queue_async_events(&ctrl->ctrl);
+
+	nvme_start_queues(&ctrl->ctrl);
+
+	return;
+
+out_free_queues:
+	for (i = 1; i < ctrl->queue_count; i++)
+		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+	nvme_loop_destroy_admin_queue(ctrl);
+out_disable:
+	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
+	nvme_remove_namespaces(&ctrl->ctrl);
+	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_put_ctrl(&ctrl->ctrl);
+}
+
+static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
+
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
+		return -EBUSY;
+
+	if (!schedule_work(&ctrl->reset_work))
+		return -EBUSY;
+
+	flush_work(&ctrl->reset_work);
+
+	return 0;
+}
+
+static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
+	.name			= "loop",
+	.module			= THIS_MODULE,
+	.is_fabrics		= true,
+	.reg_read32		= nvmf_reg_read32,
+	.reg_read64		= nvmf_reg_read64,
+	.reg_write32		= nvmf_reg_write32,
+	.reset_ctrl		= nvme_loop_reset_ctrl,
+	.free_ctrl		= nvme_loop_free_ctrl,
+	.submit_async_event	= nvme_loop_submit_async_event,
+	.delete_ctrl		= nvme_loop_del_ctrl,
+	.get_subsysnqn		= nvmf_get_subsysnqn,
+};
+
+static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	int ret, i;
+
+	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	if (ret || !opts->nr_io_queues)
+		return ret;
+
+	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
+		opts->nr_io_queues);
+
+	for (i = 1; i <= opts->nr_io_queues; i++) {
+		ctrl->queues[i].ctrl = ctrl;
+		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+		if (ret)
+			goto out_destroy_queues;
+
+		ctrl->queue_count++;
+	}
+
+	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
+	ctrl->tag_set.ops = &nvme_loop_mq_ops;
+	ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
+	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
+	ctrl->tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+	ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
+		SG_CHUNK_SIZE * sizeof(struct scatterlist);
+	ctrl->tag_set.driver_data = ctrl;
+	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
+	ctrl->ctrl.tagset = &ctrl->tag_set;
+
+	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
+	if (ret)
+		goto out_destroy_queues;
+
+	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
+	if (IS_ERR(ctrl->ctrl.connect_q)) {
+		ret = PTR_ERR(ctrl->ctrl.connect_q);
+		goto out_free_tagset;
+	}
+
+	for (i = 1; i <= opts->nr_io_queues; i++) {
+		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+		if (ret)
+			goto out_cleanup_connect_q;
+	}
+
+	return 0;
+
+out_cleanup_connect_q:
+	blk_cleanup_queue(ctrl->ctrl.connect_q);
+out_free_tagset:
+	blk_mq_free_tag_set(&ctrl->tag_set);
+out_destroy_queues:
+	for (i = 1; i < ctrl->queue_count; i++)
+		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+	return ret;
+}
+
+static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
+		struct nvmf_ctrl_options *opts)
+{
+	struct nvme_loop_ctrl *ctrl;
+	bool changed;
+	int ret;
+
+	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		return ERR_PTR(-ENOMEM);
+	ctrl->ctrl.opts = opts;
+	INIT_LIST_HEAD(&ctrl->list);
+
+	INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
+	INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
+
+	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
+				0 /* no quirks, we're perfect! */);
+	if (ret)
+		goto out_put_ctrl;
+
+	spin_lock_init(&ctrl->lock);
+
+	ret = -ENOMEM;
+
+	ctrl->ctrl.sqsize = opts->queue_size;
+	ctrl->ctrl.kato = opts->kato;
+
+	ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
+			GFP_KERNEL);
+	if (!ctrl->queues)
+		goto out_uninit_ctrl;
+
+	ret = nvme_loop_configure_admin_queue(ctrl);
+	if (ret)
+		goto out_free_queues;
+
+	if (opts->queue_size > ctrl->ctrl.maxcmd) {
+		/* warn if maxcmd is lower than queue_size */
+		dev_warn(ctrl->ctrl.device,
+			"queue_size %zu > ctrl maxcmd %u, clamping down\n",
+			opts->queue_size, ctrl->ctrl.maxcmd);
+		opts->queue_size = ctrl->ctrl.maxcmd;
+	}
+
+	if (opts->nr_io_queues) {
+		ret = nvme_loop_create_io_queues(ctrl);
+		if (ret)
+			goto out_remove_admin_queue;
+	}
+
+	nvme_loop_init_iod(ctrl, &ctrl->async_event_iod, 0);
+
+	dev_info(ctrl->ctrl.device,
+		 "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
+
+	kref_get(&ctrl->ctrl.kref);
+
+	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	WARN_ON_ONCE(!changed);
+
+	mutex_lock(&nvme_loop_ctrl_mutex);
+	list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
+	mutex_unlock(&nvme_loop_ctrl_mutex);
+
+	if (opts->nr_io_queues) {
+		nvme_queue_scan(&ctrl->ctrl);
+		nvme_queue_async_events(&ctrl->ctrl);
+	}
+
+	return &ctrl->ctrl;
+
+out_remove_admin_queue:
+	nvme_loop_destroy_admin_queue(ctrl);
+out_free_queues:
+	kfree(ctrl->queues);
+out_uninit_ctrl:
+	nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
+	nvme_put_ctrl(&ctrl->ctrl);
+	if (ret > 0)
+		ret = -EIO;
+	return ERR_PTR(ret);
+}
+
+static int nvme_loop_add_port(struct nvmet_port *port)
+{
+	/*
+	 * XXX: disalow adding more than one port so
+	 * there is no connection rejections when a
+	 * a subsystem is assigned to a port for which
+	 * loop doesn't have a pointer.
+	 * This scenario would be possible if we allowed
+	 * more than one port to be added and a subsystem
+	 * was assigned to a port other than nvmet_loop_port.
+	 */
+
+	if (nvmet_loop_port)
+		return -EPERM;
+
+	nvmet_loop_port = port;
+	return 0;
+}
+
+static void nvme_loop_remove_port(struct nvmet_port *port)
+{
+	if (port == nvmet_loop_port)
+		nvmet_loop_port = NULL;
+}
+
+static struct nvmet_fabrics_ops nvme_loop_ops = {
+	.owner		= THIS_MODULE,
+	.type		= NVMF_TRTYPE_LOOP,
+	.add_port	= nvme_loop_add_port,
+	.remove_port	= nvme_loop_remove_port,
+	.queue_response = nvme_loop_queue_response,
+	.delete_ctrl	= nvme_loop_delete_ctrl,
+};
+
+static struct nvmf_transport_ops nvme_loop_transport = {
+	.name		= "loop",
+	.create_ctrl	= nvme_loop_create_ctrl,
+};
+
+static int __init nvme_loop_init_module(void)
+{
+	int ret;
+
+	ret = nvmet_register_transport(&nvme_loop_ops);
+	if (ret)
+		return ret;
+	nvmf_register_transport(&nvme_loop_transport);
+	return 0;
+}
+
+static void __exit nvme_loop_cleanup_module(void)
+{
+	struct nvme_loop_ctrl *ctrl, *next;
+
+	nvmf_unregister_transport(&nvme_loop_transport);
+	nvmet_unregister_transport(&nvme_loop_ops);
+
+	mutex_lock(&nvme_loop_ctrl_mutex);
+	list_for_each_entry_safe(ctrl, next, &nvme_loop_ctrl_list, list)
+		__nvme_loop_del_ctrl(ctrl);
+	mutex_unlock(&nvme_loop_ctrl_mutex);
+
+	flush_scheduled_work();
+}
+
+module_init(nvme_loop_init_module);
+module_exit(nvme_loop_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
new file mode 100644
index 000000000000..57dd6d834c28
--- /dev/null
+++ b/drivers/nvme/target/nvmet.h
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVMET_H
+#define _NVMET_H
+
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/percpu-refcount.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/nvme.h>
+#include <linux/configfs.h>
+#include <linux/rcupdate.h>
+#include <linux/blkdev.h>
+
+#define NVMET_ASYNC_EVENTS		4
+#define NVMET_ERROR_LOG_SLOTS		128
+
+/* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM
+ * The 16 bit shift is to set IATTR bit to 1, which means offending
+ * offset starts in the data section of connect()
+ */
+#define IPO_IATTR_CONNECT_DATA(x)	\
+	(cpu_to_le32((1 << 16) | (offsetof(struct nvmf_connect_data, x))))
+#define IPO_IATTR_CONNECT_SQE(x)	\
+	(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
+
+struct nvmet_ns {
+	struct list_head	dev_link;
+	struct percpu_ref	ref;
+	struct block_device	*bdev;
+	u32			nsid;
+	u32			blksize_shift;
+	loff_t			size;
+	u8			nguid[16];
+
+	struct nvmet_subsys	*subsys;
+	const char		*device_path;
+
+	struct config_group	device_group;
+	struct config_group	group;
+
+	struct completion	disable_done;
+};
+
+static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct nvmet_ns, group);
+}
+
+static inline bool nvmet_ns_enabled(struct nvmet_ns *ns)
+{
+	return !list_empty_careful(&ns->dev_link);
+}
+
+struct nvmet_cq {
+	u16			qid;
+	u16			size;
+};
+
+struct nvmet_sq {
+	struct nvmet_ctrl	*ctrl;
+	struct percpu_ref	ref;
+	u16			qid;
+	u16			size;
+	struct completion	free_done;
+};
+
+/**
+ * struct nvmet_port -	Common structure to keep port
+ *				information for the target.
+ * @entry:		List head for holding a list of these elements.
+ * @disc_addr:		Address information is stored in a format defined
+ *				for a discovery log page entry.
+ * @group:		ConfigFS group for this element's folder.
+ * @priv:		Private data for the transport.
+ */
+struct nvmet_port {
+	struct list_head		entry;
+	struct nvmf_disc_rsp_page_entry	disc_addr;
+	struct config_group		group;
+	struct config_group		subsys_group;
+	struct list_head		subsystems;
+	struct config_group		referrals_group;
+	struct list_head		referrals;
+	void				*priv;
+	bool				enabled;
+};
+
+static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct nvmet_port,
+			group);
+}
+
+struct nvmet_ctrl {
+	struct nvmet_subsys	*subsys;
+	struct nvmet_cq		**cqs;
+	struct nvmet_sq		**sqs;
+
+	struct mutex		lock;
+	u64			cap;
+	u32			cc;
+	u32			csts;
+
+	u16			cntlid;
+	u32			kato;
+
+	struct nvmet_req	*async_event_cmds[NVMET_ASYNC_EVENTS];
+	unsigned int		nr_async_event_cmds;
+	struct list_head	async_events;
+	struct work_struct	async_event_work;
+
+	struct list_head	subsys_entry;
+	struct kref		ref;
+	struct delayed_work	ka_work;
+	struct work_struct	fatal_err_work;
+
+	struct nvmet_fabrics_ops *ops;
+
+	char			subsysnqn[NVMF_NQN_FIELD_LEN];
+	char			hostnqn[NVMF_NQN_FIELD_LEN];
+};
+
+struct nvmet_subsys {
+	enum nvme_subsys_type	type;
+
+	struct mutex		lock;
+	struct kref		ref;
+
+	struct list_head	namespaces;
+	unsigned int		max_nsid;
+
+	struct list_head	ctrls;
+	struct ida		cntlid_ida;
+
+	struct list_head	hosts;
+	bool			allow_any_host;
+
+	u16			max_qid;
+
+	u64			ver;
+	char			*subsysnqn;
+
+	struct config_group	group;
+
+	struct config_group	namespaces_group;
+	struct config_group	allowed_hosts_group;
+};
+
+static inline struct nvmet_subsys *to_subsys(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct nvmet_subsys, group);
+}
+
+static inline struct nvmet_subsys *namespaces_to_subsys(
+		struct config_item *item)
+{
+	return container_of(to_config_group(item), struct nvmet_subsys,
+			namespaces_group);
+}
+
+struct nvmet_host {
+	struct config_group	group;
+};
+
+static inline struct nvmet_host *to_host(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct nvmet_host, group);
+}
+
+static inline char *nvmet_host_name(struct nvmet_host *host)
+{
+	return config_item_name(&host->group.cg_item);
+}
+
+struct nvmet_host_link {
+	struct list_head	entry;
+	struct nvmet_host	*host;
+};
+
+struct nvmet_subsys_link {
+	struct list_head	entry;
+	struct nvmet_subsys	*subsys;
+};
+
+struct nvmet_req;
+struct nvmet_fabrics_ops {
+	struct module *owner;
+	unsigned int type;
+	unsigned int sqe_inline_size;
+	unsigned int msdbd;
+	bool has_keyed_sgls : 1;
+	void (*queue_response)(struct nvmet_req *req);
+	int (*add_port)(struct nvmet_port *port);
+	void (*remove_port)(struct nvmet_port *port);
+	void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
+};
+
+#define NVMET_MAX_INLINE_BIOVEC	8
+
+struct nvmet_req {
+	struct nvme_command	*cmd;
+	struct nvme_completion	*rsp;
+	struct nvmet_sq		*sq;
+	struct nvmet_cq		*cq;
+	struct nvmet_ns		*ns;
+	struct scatterlist	*sg;
+	struct bio		inline_bio;
+	struct bio_vec		inline_bvec[NVMET_MAX_INLINE_BIOVEC];
+	int			sg_cnt;
+	size_t			data_len;
+
+	struct nvmet_port	*port;
+
+	void (*execute)(struct nvmet_req *req);
+	struct nvmet_fabrics_ops *ops;
+};
+
+static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
+{
+	req->rsp->status = cpu_to_le16(status << 1);
+}
+
+static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
+{
+	req->rsp->result = cpu_to_le32(result);
+}
+
+/*
+ * NVMe command writes actually are DMA reads for us on the target side.
+ */
+static inline enum dma_data_direction
+nvmet_data_dir(struct nvmet_req *req)
+{
+	return nvme_is_write(req->cmd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+}
+
+struct nvmet_async_event {
+	struct list_head	entry;
+	u8			event_type;
+	u8			event_info;
+	u8			log_page;
+};
+
+int nvmet_parse_connect_cmd(struct nvmet_req *req);
+int nvmet_parse_io_cmd(struct nvmet_req *req);
+int nvmet_parse_admin_cmd(struct nvmet_req *req);
+int nvmet_parse_discovery_cmd(struct nvmet_req *req);
+int nvmet_parse_fabrics_cmd(struct nvmet_req *req);
+
+bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
+		struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops);
+void nvmet_req_complete(struct nvmet_req *req, u16 status);
+
+void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
+		u16 size);
+void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
+		u16 size);
+void nvmet_sq_destroy(struct nvmet_sq *sq);
+int nvmet_sq_init(struct nvmet_sq *sq);
+
+void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
+
+void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
+u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
+		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
+u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
+		struct nvmet_req *req, struct nvmet_ctrl **ret);
+void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
+
+struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
+		enum nvme_subsys_type type);
+void nvmet_subsys_put(struct nvmet_subsys *subsys);
+
+struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid);
+void nvmet_put_namespace(struct nvmet_ns *ns);
+int nvmet_ns_enable(struct nvmet_ns *ns);
+void nvmet_ns_disable(struct nvmet_ns *ns);
+struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
+void nvmet_ns_free(struct nvmet_ns *ns);
+
+int nvmet_register_transport(struct nvmet_fabrics_ops *ops);
+void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops);
+
+int nvmet_enable_port(struct nvmet_port *port);
+void nvmet_disable_port(struct nvmet_port *port);
+
+void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port);
+void nvmet_referral_disable(struct nvmet_port *port);
+
+u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
+		size_t len);
+u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
+		size_t len);
+
+u32 nvmet_get_log_page_len(struct nvme_command *cmd);
+
+#define NVMET_QUEUE_SIZE	1024
+#define NVMET_NR_QUEUES		64
+#define NVMET_MAX_CMD		NVMET_QUEUE_SIZE
+#define NVMET_KAS		10
+#define NVMET_DISC_KATO		120
+
+int __init nvmet_init_configfs(void);
+void __exit nvmet_exit_configfs(void);
+
+int __init nvmet_init_discovery(void);
+void nvmet_exit_discovery(void);
+
+extern struct nvmet_subsys *nvmet_disc_subsys;
+extern u64 nvmet_genctr;
+extern struct rw_semaphore nvmet_config_sem;
+
+bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
+		const char *hostnqn);
+
+#endif /* _NVMET_H */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
new file mode 100644
index 000000000000..e06d504bdf0c
--- /dev/null
+++ b/drivers/nvme/target/rdma.c
@@ -0,0 +1,1448 @@
+/*
+ * NVMe over Fabrics RDMA target.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/atomic.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/nvme.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/wait.h>
+#include <linux/inet.h>
+#include <asm/unaligned.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
+
+#include <linux/nvme-rdma.h>
+#include "nvmet.h"
+
+/*
+ * We allow up to a page of inline data to go with the SQE
+ */
+#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
+
+struct nvmet_rdma_cmd {
+	struct ib_sge		sge[2];
+	struct ib_cqe		cqe;
+	struct ib_recv_wr	wr;
+	struct scatterlist	inline_sg;
+	struct page		*inline_page;
+	struct nvme_command     *nvme_cmd;
+	struct nvmet_rdma_queue	*queue;
+};
+
+enum {
+	NVMET_RDMA_REQ_INLINE_DATA	= (1 << 0),
+	NVMET_RDMA_REQ_INVALIDATE_RKEY	= (1 << 1),
+};
+
+struct nvmet_rdma_rsp {
+	struct ib_sge		send_sge;
+	struct ib_cqe		send_cqe;
+	struct ib_send_wr	send_wr;
+
+	struct nvmet_rdma_cmd	*cmd;
+	struct nvmet_rdma_queue	*queue;
+
+	struct ib_cqe		read_cqe;
+	struct rdma_rw_ctx	rw;
+
+	struct nvmet_req	req;
+
+	u8			n_rdma;
+	u32			flags;
+	u32			invalidate_rkey;
+
+	struct list_head	wait_list;
+	struct list_head	free_list;
+};
+
+enum nvmet_rdma_queue_state {
+	NVMET_RDMA_Q_CONNECTING,
+	NVMET_RDMA_Q_LIVE,
+	NVMET_RDMA_Q_DISCONNECTING,
+};
+
+struct nvmet_rdma_queue {
+	struct rdma_cm_id	*cm_id;
+	struct nvmet_port	*port;
+	struct ib_cq		*cq;
+	atomic_t		sq_wr_avail;
+	struct nvmet_rdma_device *dev;
+	spinlock_t		state_lock;
+	enum nvmet_rdma_queue_state state;
+	struct nvmet_cq		nvme_cq;
+	struct nvmet_sq		nvme_sq;
+
+	struct nvmet_rdma_rsp	*rsps;
+	struct list_head	free_rsps;
+	spinlock_t		rsps_lock;
+	struct nvmet_rdma_cmd	*cmds;
+
+	struct work_struct	release_work;
+	struct list_head	rsp_wait_list;
+	struct list_head	rsp_wr_wait_list;
+	spinlock_t		rsp_wr_wait_lock;
+
+	int			idx;
+	int			host_qid;
+	int			recv_queue_size;
+	int			send_queue_size;
+
+	struct list_head	queue_list;
+};
+
+struct nvmet_rdma_device {
+	struct ib_device	*device;
+	struct ib_pd		*pd;
+	struct ib_srq		*srq;
+	struct nvmet_rdma_cmd	*srq_cmds;
+	size_t			srq_size;
+	struct kref		ref;
+	struct list_head	entry;
+};
+
+static bool nvmet_rdma_use_srq;
+module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
+MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
+
+static DEFINE_IDA(nvmet_rdma_queue_ida);
+static LIST_HEAD(nvmet_rdma_queue_list);
+static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
+
+static LIST_HEAD(device_list);
+static DEFINE_MUTEX(device_list_mutex);
+
+static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
+static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
+static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
+
+static struct nvmet_fabrics_ops nvmet_rdma_ops;
+
+/* XXX: really should move to a generic header sooner or later.. */
+static inline u32 get_unaligned_le24(const u8 *p)
+{
+	return (u32)p[0] | (u32)p[1] << 8 | (u32)p[2] << 16;
+}
+
+static inline bool nvmet_rdma_need_data_in(struct nvmet_rdma_rsp *rsp)
+{
+	return nvme_is_write(rsp->req.cmd) &&
+		rsp->req.data_len &&
+		!(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA);
+}
+
+static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
+{
+	return !nvme_is_write(rsp->req.cmd) &&
+		rsp->req.data_len &&
+		!rsp->req.rsp->status &&
+		!(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA);
+}
+
+static inline struct nvmet_rdma_rsp *
+nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
+{
+	struct nvmet_rdma_rsp *rsp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->rsps_lock, flags);
+	rsp = list_first_entry(&queue->free_rsps,
+				struct nvmet_rdma_rsp, free_list);
+	list_del(&rsp->free_list);
+	spin_unlock_irqrestore(&queue->rsps_lock, flags);
+
+	return rsp;
+}
+
+static inline void
+nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
+	list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
+	spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
+}
+
+static void nvmet_rdma_free_sgl(struct scatterlist *sgl, unsigned int nents)
+{
+	struct scatterlist *sg;
+	int count;
+
+	if (!sgl || !nents)
+		return;
+
+	for_each_sg(sgl, sg, nents, count)
+		__free_page(sg_page(sg));
+	kfree(sgl);
+}
+
+static int nvmet_rdma_alloc_sgl(struct scatterlist **sgl, unsigned int *nents,
+		u32 length)
+{
+	struct scatterlist *sg;
+	struct page *page;
+	unsigned int nent;
+	int i = 0;
+
+	nent = DIV_ROUND_UP(length, PAGE_SIZE);
+	sg = kmalloc_array(nent, sizeof(struct scatterlist), GFP_KERNEL);
+	if (!sg)
+		goto out;
+
+	sg_init_table(sg, nent);
+
+	while (length) {
+		u32 page_len = min_t(u32, length, PAGE_SIZE);
+
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			goto out_free_pages;
+
+		sg_set_page(&sg[i], page, page_len, 0);
+		length -= page_len;
+		i++;
+	}
+	*sgl = sg;
+	*nents = nent;
+	return 0;
+
+out_free_pages:
+	while (i > 0) {
+		i--;
+		__free_page(sg_page(&sg[i]));
+	}
+	kfree(sg);
+out:
+	return NVME_SC_INTERNAL;
+}
+
+static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
+			struct nvmet_rdma_cmd *c, bool admin)
+{
+	/* NVMe command / RDMA RECV */
+	c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL);
+	if (!c->nvme_cmd)
+		goto out;
+
+	c->sge[0].addr = ib_dma_map_single(ndev->device, c->nvme_cmd,
+			sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
+	if (ib_dma_mapping_error(ndev->device, c->sge[0].addr))
+		goto out_free_cmd;
+
+	c->sge[0].length = sizeof(*c->nvme_cmd);
+	c->sge[0].lkey = ndev->pd->local_dma_lkey;
+
+	if (!admin) {
+		c->inline_page = alloc_pages(GFP_KERNEL,
+				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+		if (!c->inline_page)
+			goto out_unmap_cmd;
+		c->sge[1].addr = ib_dma_map_page(ndev->device,
+				c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE,
+				DMA_FROM_DEVICE);
+		if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
+			goto out_free_inline_page;
+		c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
+		c->sge[1].lkey = ndev->pd->local_dma_lkey;
+	}
+
+	c->cqe.done = nvmet_rdma_recv_done;
+
+	c->wr.wr_cqe = &c->cqe;
+	c->wr.sg_list = c->sge;
+	c->wr.num_sge = admin ? 1 : 2;
+
+	return 0;
+
+out_free_inline_page:
+	if (!admin) {
+		__free_pages(c->inline_page,
+				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+	}
+out_unmap_cmd:
+	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
+			sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
+out_free_cmd:
+	kfree(c->nvme_cmd);
+
+out:
+	return -ENOMEM;
+}
+
+static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
+		struct nvmet_rdma_cmd *c, bool admin)
+{
+	if (!admin) {
+		ib_dma_unmap_page(ndev->device, c->sge[1].addr,
+				NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE);
+		__free_pages(c->inline_page,
+				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+	}
+	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
+				sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
+	kfree(c->nvme_cmd);
+}
+
+static struct nvmet_rdma_cmd *
+nvmet_rdma_alloc_cmds(struct nvmet_rdma_device *ndev,
+		int nr_cmds, bool admin)
+{
+	struct nvmet_rdma_cmd *cmds;
+	int ret = -EINVAL, i;
+
+	cmds = kcalloc(nr_cmds, sizeof(struct nvmet_rdma_cmd), GFP_KERNEL);
+	if (!cmds)
+		goto out;
+
+	for (i = 0; i < nr_cmds; i++) {
+		ret = nvmet_rdma_alloc_cmd(ndev, cmds + i, admin);
+		if (ret)
+			goto out_free;
+	}
+
+	return cmds;
+
+out_free:
+	while (--i >= 0)
+		nvmet_rdma_free_cmd(ndev, cmds + i, admin);
+	kfree(cmds);
+out:
+	return ERR_PTR(ret);
+}
+
+static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
+		struct nvmet_rdma_cmd *cmds, int nr_cmds, bool admin)
+{
+	int i;
+
+	for (i = 0; i < nr_cmds; i++)
+		nvmet_rdma_free_cmd(ndev, cmds + i, admin);
+	kfree(cmds);
+}
+
+static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
+		struct nvmet_rdma_rsp *r)
+{
+	/* NVMe CQE / RDMA SEND */
+	r->req.rsp = kmalloc(sizeof(*r->req.rsp), GFP_KERNEL);
+	if (!r->req.rsp)
+		goto out;
+
+	r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.rsp,
+			sizeof(*r->req.rsp), DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
+		goto out_free_rsp;
+
+	r->send_sge.length = sizeof(*r->req.rsp);
+	r->send_sge.lkey = ndev->pd->local_dma_lkey;
+
+	r->send_cqe.done = nvmet_rdma_send_done;
+
+	r->send_wr.wr_cqe = &r->send_cqe;
+	r->send_wr.sg_list = &r->send_sge;
+	r->send_wr.num_sge = 1;
+	r->send_wr.send_flags = IB_SEND_SIGNALED;
+
+	/* Data In / RDMA READ */
+	r->read_cqe.done = nvmet_rdma_read_data_done;
+	return 0;
+
+out_free_rsp:
+	kfree(r->req.rsp);
+out:
+	return -ENOMEM;
+}
+
+static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
+		struct nvmet_rdma_rsp *r)
+{
+	ib_dma_unmap_single(ndev->device, r->send_sge.addr,
+				sizeof(*r->req.rsp), DMA_TO_DEVICE);
+	kfree(r->req.rsp);
+}
+
+static int
+nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
+{
+	struct nvmet_rdma_device *ndev = queue->dev;
+	int nr_rsps = queue->recv_queue_size * 2;
+	int ret = -EINVAL, i;
+
+	queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
+			GFP_KERNEL);
+	if (!queue->rsps)
+		goto out;
+
+	for (i = 0; i < nr_rsps; i++) {
+		struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
+
+		ret = nvmet_rdma_alloc_rsp(ndev, rsp);
+		if (ret)
+			goto out_free;
+
+		list_add_tail(&rsp->free_list, &queue->free_rsps);
+	}
+
+	return 0;
+
+out_free:
+	while (--i >= 0) {
+		struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
+
+		list_del(&rsp->free_list);
+		nvmet_rdma_free_rsp(ndev, rsp);
+	}
+	kfree(queue->rsps);
+out:
+	return ret;
+}
+
+static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
+{
+	struct nvmet_rdma_device *ndev = queue->dev;
+	int i, nr_rsps = queue->recv_queue_size * 2;
+
+	for (i = 0; i < nr_rsps; i++) {
+		struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
+
+		list_del(&rsp->free_list);
+		nvmet_rdma_free_rsp(ndev, rsp);
+	}
+	kfree(queue->rsps);
+}
+
+static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
+		struct nvmet_rdma_cmd *cmd)
+{
+	struct ib_recv_wr *bad_wr;
+
+	if (ndev->srq)
+		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
+	return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
+}
+
+static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
+{
+	spin_lock(&queue->rsp_wr_wait_lock);
+	while (!list_empty(&queue->rsp_wr_wait_list)) {
+		struct nvmet_rdma_rsp *rsp;
+		bool ret;
+
+		rsp = list_entry(queue->rsp_wr_wait_list.next,
+				struct nvmet_rdma_rsp, wait_list);
+		list_del(&rsp->wait_list);
+
+		spin_unlock(&queue->rsp_wr_wait_lock);
+		ret = nvmet_rdma_execute_command(rsp);
+		spin_lock(&queue->rsp_wr_wait_lock);
+
+		if (!ret) {
+			list_add(&rsp->wait_list, &queue->rsp_wr_wait_list);
+			break;
+		}
+	}
+	spin_unlock(&queue->rsp_wr_wait_lock);
+}
+
+
+static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
+{
+	struct nvmet_rdma_queue *queue = rsp->queue;
+
+	atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
+
+	if (rsp->n_rdma) {
+		rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp,
+				queue->cm_id->port_num, rsp->req.sg,
+				rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
+	}
+
+	if (rsp->req.sg != &rsp->cmd->inline_sg)
+		nvmet_rdma_free_sgl(rsp->req.sg, rsp->req.sg_cnt);
+
+	if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
+		nvmet_rdma_process_wr_wait_list(queue);
+
+	nvmet_rdma_put_rsp(rsp);
+}
+
+static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue)
+{
+	if (queue->nvme_sq.ctrl) {
+		nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
+	} else {
+		/*
+		 * we didn't setup the controller yet in case
+		 * of admin connect error, just disconnect and
+		 * cleanup the queue
+		 */
+		nvmet_rdma_queue_disconnect(queue);
+	}
+}
+
+static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct nvmet_rdma_rsp *rsp =
+		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
+
+	nvmet_rdma_release_rsp(rsp);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS &&
+		     wc->status != IB_WC_WR_FLUSH_ERR)) {
+		pr_err("SEND for CQE 0x%p failed with status %s (%d).\n",
+			wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
+		nvmet_rdma_error_comp(rsp->queue);
+	}
+}
+
+static void nvmet_rdma_queue_response(struct nvmet_req *req)
+{
+	struct nvmet_rdma_rsp *rsp =
+		container_of(req, struct nvmet_rdma_rsp, req);
+	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+	struct ib_send_wr *first_wr, *bad_wr;
+
+	if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
+		rsp->send_wr.opcode = IB_WR_SEND_WITH_INV;
+		rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey;
+	} else {
+		rsp->send_wr.opcode = IB_WR_SEND;
+	}
+
+	if (nvmet_rdma_need_data_out(rsp))
+		first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+				cm_id->port_num, NULL, &rsp->send_wr);
+	else
+		first_wr = &rsp->send_wr;
+
+	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
+	if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
+		pr_err("sending cmd response failed\n");
+		nvmet_rdma_release_rsp(rsp);
+	}
+}
+
+static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct nvmet_rdma_rsp *rsp =
+		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
+	struct nvmet_rdma_queue *queue = cq->cq_context;
+
+	WARN_ON(rsp->n_rdma <= 0);
+	atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
+	rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp,
+			queue->cm_id->port_num, rsp->req.sg,
+			rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
+	rsp->n_rdma = 0;
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		nvmet_rdma_release_rsp(rsp);
+		if (wc->status != IB_WC_WR_FLUSH_ERR) {
+			pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n",
+				wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
+			nvmet_rdma_error_comp(queue);
+		}
+		return;
+	}
+
+	rsp->req.execute(&rsp->req);
+}
+
+static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
+		u64 off)
+{
+	sg_init_table(&rsp->cmd->inline_sg, 1);
+	sg_set_page(&rsp->cmd->inline_sg, rsp->cmd->inline_page, len, off);
+	rsp->req.sg = &rsp->cmd->inline_sg;
+	rsp->req.sg_cnt = 1;
+}
+
+static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
+{
+	struct nvme_sgl_desc *sgl = &rsp->req.cmd->common.dptr.sgl;
+	u64 off = le64_to_cpu(sgl->addr);
+	u32 len = le32_to_cpu(sgl->length);
+
+	if (!nvme_is_write(rsp->req.cmd))
+		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+
+	if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
+		pr_err("invalid inline data offset!\n");
+		return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
+	}
+
+	/* no data command? */
+	if (!len)
+		return 0;
+
+	nvmet_rdma_use_inline_sg(rsp, len, off);
+	rsp->flags |= NVMET_RDMA_REQ_INLINE_DATA;
+	return 0;
+}
+
+static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
+		struct nvme_keyed_sgl_desc *sgl, bool invalidate)
+{
+	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+	u64 addr = le64_to_cpu(sgl->addr);
+	u32 len = get_unaligned_le24(sgl->length);
+	u32 key = get_unaligned_le32(sgl->key);
+	int ret;
+	u16 status;
+
+	/* no data command? */
+	if (!len)
+		return 0;
+
+	/* use the already allocated data buffer if possible */
+	if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
+		nvmet_rdma_use_inline_sg(rsp, len, 0);
+	} else {
+		status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+				len);
+		if (status)
+			return status;
+	}
+
+	ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
+			rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
+			nvmet_data_dir(&rsp->req));
+	if (ret < 0)
+		return NVME_SC_INTERNAL;
+	rsp->n_rdma += ret;
+
+	if (invalidate) {
+		rsp->invalidate_rkey = key;
+		rsp->flags |= NVMET_RDMA_REQ_INVALIDATE_RKEY;
+	}
+
+	return 0;
+}
+
+static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
+{
+	struct nvme_keyed_sgl_desc *sgl = &rsp->req.cmd->common.dptr.ksgl;
+
+	switch (sgl->type >> 4) {
+	case NVME_SGL_FMT_DATA_DESC:
+		switch (sgl->type & 0xf) {
+		case NVME_SGL_FMT_OFFSET:
+			return nvmet_rdma_map_sgl_inline(rsp);
+		default:
+			pr_err("invalid SGL subtype: %#x\n", sgl->type);
+			return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		}
+	case NVME_KEY_SGL_FMT_DATA_DESC:
+		switch (sgl->type & 0xf) {
+		case NVME_SGL_FMT_ADDRESS | NVME_SGL_FMT_INVALIDATE:
+			return nvmet_rdma_map_sgl_keyed(rsp, sgl, true);
+		case NVME_SGL_FMT_ADDRESS:
+			return nvmet_rdma_map_sgl_keyed(rsp, sgl, false);
+		default:
+			pr_err("invalid SGL subtype: %#x\n", sgl->type);
+			return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		}
+	default:
+		pr_err("invalid SGL type: %#x\n", sgl->type);
+		return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR;
+	}
+}
+
+static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
+{
+	struct nvmet_rdma_queue *queue = rsp->queue;
+
+	if (unlikely(atomic_sub_return(1 + rsp->n_rdma,
+			&queue->sq_wr_avail) < 0)) {
+		pr_debug("IB send queue full (needed %d): queue %u cntlid %u\n",
+				1 + rsp->n_rdma, queue->idx,
+				queue->nvme_sq.ctrl->cntlid);
+		atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
+		return false;
+	}
+
+	if (nvmet_rdma_need_data_in(rsp)) {
+		if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp,
+				queue->cm_id->port_num, &rsp->read_cqe, NULL))
+			nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
+	} else {
+		rsp->req.execute(&rsp->req);
+	}
+
+	return true;
+}
+
+static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
+		struct nvmet_rdma_rsp *cmd)
+{
+	u16 status;
+
+	cmd->queue = queue;
+	cmd->n_rdma = 0;
+	cmd->req.port = queue->port;
+
+	if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
+			&queue->nvme_sq, &nvmet_rdma_ops))
+		return;
+
+	status = nvmet_rdma_map_sgl(cmd);
+	if (status)
+		goto out_err;
+
+	if (unlikely(!nvmet_rdma_execute_command(cmd))) {
+		spin_lock(&queue->rsp_wr_wait_lock);
+		list_add_tail(&cmd->wait_list, &queue->rsp_wr_wait_list);
+		spin_unlock(&queue->rsp_wr_wait_lock);
+	}
+
+	return;
+
+out_err:
+	nvmet_req_complete(&cmd->req, status);
+}
+
+static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct nvmet_rdma_cmd *cmd =
+		container_of(wc->wr_cqe, struct nvmet_rdma_cmd, cqe);
+	struct nvmet_rdma_queue *queue = cq->cq_context;
+	struct nvmet_rdma_rsp *rsp;
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		if (wc->status != IB_WC_WR_FLUSH_ERR) {
+			pr_err("RECV for CQE 0x%p failed with status %s (%d)\n",
+				wc->wr_cqe, ib_wc_status_msg(wc->status),
+				wc->status);
+			nvmet_rdma_error_comp(queue);
+		}
+		return;
+	}
+
+	if (unlikely(wc->byte_len < sizeof(struct nvme_command))) {
+		pr_err("Ctrl Fatal Error: capsule size less than 64 bytes\n");
+		nvmet_rdma_error_comp(queue);
+		return;
+	}
+
+	cmd->queue = queue;
+	rsp = nvmet_rdma_get_rsp(queue);
+	rsp->cmd = cmd;
+	rsp->flags = 0;
+	rsp->req.cmd = cmd->nvme_cmd;
+
+	if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&queue->state_lock, flags);
+		if (queue->state == NVMET_RDMA_Q_CONNECTING)
+			list_add_tail(&rsp->wait_list, &queue->rsp_wait_list);
+		else
+			nvmet_rdma_put_rsp(rsp);
+		spin_unlock_irqrestore(&queue->state_lock, flags);
+		return;
+	}
+
+	nvmet_rdma_handle_command(queue, rsp);
+}
+
+static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev)
+{
+	if (!ndev->srq)
+		return;
+
+	nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
+	ib_destroy_srq(ndev->srq);
+}
+
+static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
+{
+	struct ib_srq_init_attr srq_attr = { NULL, };
+	struct ib_srq *srq;
+	size_t srq_size;
+	int ret, i;
+
+	srq_size = 4095;	/* XXX: tune */
+
+	srq_attr.attr.max_wr = srq_size;
+	srq_attr.attr.max_sge = 2;
+	srq_attr.attr.srq_limit = 0;
+	srq_attr.srq_type = IB_SRQT_BASIC;
+	srq = ib_create_srq(ndev->pd, &srq_attr);
+	if (IS_ERR(srq)) {
+		/*
+		 * If SRQs aren't supported we just go ahead and use normal
+		 * non-shared receive queues.
+		 */
+		pr_info("SRQ requested but not supported.\n");
+		return 0;
+	}
+
+	ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
+	if (IS_ERR(ndev->srq_cmds)) {
+		ret = PTR_ERR(ndev->srq_cmds);
+		goto out_destroy_srq;
+	}
+
+	ndev->srq = srq;
+	ndev->srq_size = srq_size;
+
+	for (i = 0; i < srq_size; i++)
+		nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+
+	return 0;
+
+out_destroy_srq:
+	ib_destroy_srq(srq);
+	return ret;
+}
+
+static void nvmet_rdma_free_dev(struct kref *ref)
+{
+	struct nvmet_rdma_device *ndev =
+		container_of(ref, struct nvmet_rdma_device, ref);
+
+	mutex_lock(&device_list_mutex);
+	list_del(&ndev->entry);
+	mutex_unlock(&device_list_mutex);
+
+	nvmet_rdma_destroy_srq(ndev);
+	ib_dealloc_pd(ndev->pd);
+
+	kfree(ndev);
+}
+
+static struct nvmet_rdma_device *
+nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
+{
+	struct nvmet_rdma_device *ndev;
+	int ret;
+
+	mutex_lock(&device_list_mutex);
+	list_for_each_entry(ndev, &device_list, entry) {
+		if (ndev->device->node_guid == cm_id->device->node_guid &&
+		    kref_get_unless_zero(&ndev->ref))
+			goto out_unlock;
+	}
+
+	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
+	if (!ndev)
+		goto out_err;
+
+	ndev->device = cm_id->device;
+	kref_init(&ndev->ref);
+
+	ndev->pd = ib_alloc_pd(ndev->device);
+	if (IS_ERR(ndev->pd))
+		goto out_free_dev;
+
+	if (nvmet_rdma_use_srq) {
+		ret = nvmet_rdma_init_srq(ndev);
+		if (ret)
+			goto out_free_pd;
+	}
+
+	list_add(&ndev->entry, &device_list);
+out_unlock:
+	mutex_unlock(&device_list_mutex);
+	pr_debug("added %s.\n", ndev->device->name);
+	return ndev;
+
+out_free_pd:
+	ib_dealloc_pd(ndev->pd);
+out_free_dev:
+	kfree(ndev);
+out_err:
+	mutex_unlock(&device_list_mutex);
+	return NULL;
+}
+
+static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
+{
+	struct ib_qp_init_attr qp_attr;
+	struct nvmet_rdma_device *ndev = queue->dev;
+	int comp_vector, nr_cqe, ret, i;
+
+	/*
+	 * Spread the io queues across completion vectors,
+	 * but still keep all admin queues on vector 0.
+	 */
+	comp_vector = !queue->host_qid ? 0 :
+		queue->idx % ndev->device->num_comp_vectors;
+
+	/*
+	 * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND.
+	 */
+	nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
+
+	queue->cq = ib_alloc_cq(ndev->device, queue,
+			nr_cqe + 1, comp_vector,
+			IB_POLL_WORKQUEUE);
+	if (IS_ERR(queue->cq)) {
+		ret = PTR_ERR(queue->cq);
+		pr_err("failed to create CQ cqe= %d ret= %d\n",
+		       nr_cqe + 1, ret);
+		goto out;
+	}
+
+	memset(&qp_attr, 0, sizeof(qp_attr));
+	qp_attr.qp_context = queue;
+	qp_attr.event_handler = nvmet_rdma_qp_event;
+	qp_attr.send_cq = queue->cq;
+	qp_attr.recv_cq = queue->cq;
+	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+	qp_attr.qp_type = IB_QPT_RC;
+	/* +1 for drain */
+	qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
+	qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
+	qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
+					ndev->device->attrs.max_sge);
+
+	if (ndev->srq) {
+		qp_attr.srq = ndev->srq;
+	} else {
+		/* +1 for drain */
+		qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
+		qp_attr.cap.max_recv_sge = 2;
+	}
+
+	ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
+	if (ret) {
+		pr_err("failed to create_qp ret= %d\n", ret);
+		goto err_destroy_cq;
+	}
+
+	atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
+
+	pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
+		 __func__, queue->cq->cqe, qp_attr.cap.max_send_sge,
+		 qp_attr.cap.max_send_wr, queue->cm_id);
+
+	if (!ndev->srq) {
+		for (i = 0; i < queue->recv_queue_size; i++) {
+			queue->cmds[i].queue = queue;
+			nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
+		}
+	}
+
+out:
+	return ret;
+
+err_destroy_cq:
+	ib_free_cq(queue->cq);
+	goto out;
+}
+
+static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
+{
+	rdma_destroy_qp(queue->cm_id);
+	ib_free_cq(queue->cq);
+}
+
+static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
+{
+	pr_info("freeing queue %d\n", queue->idx);
+
+	nvmet_sq_destroy(&queue->nvme_sq);
+
+	nvmet_rdma_destroy_queue_ib(queue);
+	if (!queue->dev->srq) {
+		nvmet_rdma_free_cmds(queue->dev, queue->cmds,
+				queue->recv_queue_size,
+				!queue->host_qid);
+	}
+	nvmet_rdma_free_rsps(queue);
+	ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
+	kfree(queue);
+}
+
+static void nvmet_rdma_release_queue_work(struct work_struct *w)
+{
+	struct nvmet_rdma_queue *queue =
+		container_of(w, struct nvmet_rdma_queue, release_work);
+	struct rdma_cm_id *cm_id = queue->cm_id;
+	struct nvmet_rdma_device *dev = queue->dev;
+
+	nvmet_rdma_free_queue(queue);
+	rdma_destroy_id(cm_id);
+	kref_put(&dev->ref, nvmet_rdma_free_dev);
+}
+
+static int
+nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
+				struct nvmet_rdma_queue *queue)
+{
+	struct nvme_rdma_cm_req *req;
+
+	req = (struct nvme_rdma_cm_req *)conn->private_data;
+	if (!req || conn->private_data_len == 0)
+		return NVME_RDMA_CM_INVALID_LEN;
+
+	if (le16_to_cpu(req->recfmt) != NVME_RDMA_CM_FMT_1_0)
+		return NVME_RDMA_CM_INVALID_RECFMT;
+
+	queue->host_qid = le16_to_cpu(req->qid);
+
+	/*
+	 * req->hsqsize corresponds to our recv queue size
+	 * req->hrqsize corresponds to our send queue size
+	 */
+	queue->recv_queue_size = le16_to_cpu(req->hsqsize);
+	queue->send_queue_size = le16_to_cpu(req->hrqsize);
+
+	if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
+		return NVME_RDMA_CM_INVALID_HSQSIZE;
+
+	/* XXX: Should we enforce some kind of max for IO queues? */
+
+	return 0;
+}
+
+static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id,
+				enum nvme_rdma_cm_status status)
+{
+	struct nvme_rdma_cm_rej rej;
+
+	rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
+	rej.sts = cpu_to_le16(status);
+
+	return rdma_reject(cm_id, (void *)&rej, sizeof(rej));
+}
+
+static struct nvmet_rdma_queue *
+nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
+		struct rdma_cm_id *cm_id,
+		struct rdma_cm_event *event)
+{
+	struct nvmet_rdma_queue *queue;
+	int ret;
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue) {
+		ret = NVME_RDMA_CM_NO_RSC;
+		goto out_reject;
+	}
+
+	ret = nvmet_sq_init(&queue->nvme_sq);
+	if (ret)
+		goto out_free_queue;
+
+	ret = nvmet_rdma_parse_cm_connect_req(&event->param.conn, queue);
+	if (ret)
+		goto out_destroy_sq;
+
+	/*
+	 * Schedules the actual release because calling rdma_destroy_id from
+	 * inside a CM callback would trigger a deadlock. (great API design..)
+	 */
+	INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
+	queue->dev = ndev;
+	queue->cm_id = cm_id;
+
+	spin_lock_init(&queue->state_lock);
+	queue->state = NVMET_RDMA_Q_CONNECTING;
+	INIT_LIST_HEAD(&queue->rsp_wait_list);
+	INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
+	spin_lock_init(&queue->rsp_wr_wait_lock);
+	INIT_LIST_HEAD(&queue->free_rsps);
+	spin_lock_init(&queue->rsps_lock);
+
+	queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
+	if (queue->idx < 0) {
+		ret = NVME_RDMA_CM_NO_RSC;
+		goto out_free_queue;
+	}
+
+	ret = nvmet_rdma_alloc_rsps(queue);
+	if (ret) {
+		ret = NVME_RDMA_CM_NO_RSC;
+		goto out_ida_remove;
+	}
+
+	if (!ndev->srq) {
+		queue->cmds = nvmet_rdma_alloc_cmds(ndev,
+				queue->recv_queue_size,
+				!queue->host_qid);
+		if (IS_ERR(queue->cmds)) {
+			ret = NVME_RDMA_CM_NO_RSC;
+			goto out_free_responses;
+		}
+	}
+
+	ret = nvmet_rdma_create_queue_ib(queue);
+	if (ret) {
+		pr_err("%s: creating RDMA queue failed (%d).\n",
+			__func__, ret);
+		ret = NVME_RDMA_CM_NO_RSC;
+		goto out_free_cmds;
+	}
+
+	return queue;
+
+out_free_cmds:
+	if (!ndev->srq) {
+		nvmet_rdma_free_cmds(queue->dev, queue->cmds,
+				queue->recv_queue_size,
+				!queue->host_qid);
+	}
+out_free_responses:
+	nvmet_rdma_free_rsps(queue);
+out_ida_remove:
+	ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
+out_destroy_sq:
+	nvmet_sq_destroy(&queue->nvme_sq);
+out_free_queue:
+	kfree(queue);
+out_reject:
+	nvmet_rdma_cm_reject(cm_id, ret);
+	return NULL;
+}
+
+static void nvmet_rdma_qp_event(struct ib_event *event, void *priv)
+{
+	struct nvmet_rdma_queue *queue = priv;
+
+	switch (event->event) {
+	case IB_EVENT_COMM_EST:
+		rdma_notify(queue->cm_id, event->event);
+		break;
+	default:
+		pr_err("received unrecognized IB QP event %d\n", event->event);
+		break;
+	}
+}
+
+static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
+		struct nvmet_rdma_queue *queue,
+		struct rdma_conn_param *p)
+{
+	struct rdma_conn_param  param = { };
+	struct nvme_rdma_cm_rep priv = { };
+	int ret = -ENOMEM;
+
+	param.rnr_retry_count = 7;
+	param.flow_control = 1;
+	param.initiator_depth = min_t(u8, p->initiator_depth,
+		queue->dev->device->attrs.max_qp_init_rd_atom);
+	param.private_data = &priv;
+	param.private_data_len = sizeof(priv);
+	priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
+	priv.crqsize = cpu_to_le16(queue->recv_queue_size);
+
+	ret = rdma_accept(cm_id, &param);
+	if (ret)
+		pr_err("rdma_accept failed (error code = %d)\n", ret);
+
+	return ret;
+}
+
+static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
+		struct rdma_cm_event *event)
+{
+	struct nvmet_rdma_device *ndev;
+	struct nvmet_rdma_queue *queue;
+	int ret = -EINVAL;
+
+	ndev = nvmet_rdma_find_get_device(cm_id);
+	if (!ndev) {
+		pr_err("no client data!\n");
+		nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC);
+		return -ECONNREFUSED;
+	}
+
+	queue = nvmet_rdma_alloc_queue(ndev, cm_id, event);
+	if (!queue) {
+		ret = -ENOMEM;
+		goto put_device;
+	}
+	queue->port = cm_id->context;
+
+	ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
+	if (ret)
+		goto release_queue;
+
+	mutex_lock(&nvmet_rdma_queue_mutex);
+	list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list);
+	mutex_unlock(&nvmet_rdma_queue_mutex);
+
+	return 0;
+
+release_queue:
+	nvmet_rdma_free_queue(queue);
+put_device:
+	kref_put(&ndev->ref, nvmet_rdma_free_dev);
+
+	return ret;
+}
+
+static void nvmet_rdma_queue_established(struct nvmet_rdma_queue *queue)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->state_lock, flags);
+	if (queue->state != NVMET_RDMA_Q_CONNECTING) {
+		pr_warn("trying to establish a connected queue\n");
+		goto out_unlock;
+	}
+	queue->state = NVMET_RDMA_Q_LIVE;
+
+	while (!list_empty(&queue->rsp_wait_list)) {
+		struct nvmet_rdma_rsp *cmd;
+
+		cmd = list_first_entry(&queue->rsp_wait_list,
+					struct nvmet_rdma_rsp, wait_list);
+		list_del(&cmd->wait_list);
+
+		spin_unlock_irqrestore(&queue->state_lock, flags);
+		nvmet_rdma_handle_command(queue, cmd);
+		spin_lock_irqsave(&queue->state_lock, flags);
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&queue->state_lock, flags);
+}
+
+static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
+{
+	bool disconnect = false;
+	unsigned long flags;
+
+	pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state);
+
+	spin_lock_irqsave(&queue->state_lock, flags);
+	switch (queue->state) {
+	case NVMET_RDMA_Q_CONNECTING:
+	case NVMET_RDMA_Q_LIVE:
+		disconnect = true;
+		queue->state = NVMET_RDMA_Q_DISCONNECTING;
+		break;
+	case NVMET_RDMA_Q_DISCONNECTING:
+		break;
+	}
+	spin_unlock_irqrestore(&queue->state_lock, flags);
+
+	if (disconnect) {
+		rdma_disconnect(queue->cm_id);
+		ib_drain_qp(queue->cm_id->qp);
+		schedule_work(&queue->release_work);
+	}
+}
+
+static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
+{
+	bool disconnect = false;
+
+	mutex_lock(&nvmet_rdma_queue_mutex);
+	if (!list_empty(&queue->queue_list)) {
+		list_del_init(&queue->queue_list);
+		disconnect = true;
+	}
+	mutex_unlock(&nvmet_rdma_queue_mutex);
+
+	if (disconnect)
+		__nvmet_rdma_queue_disconnect(queue);
+}
+
+static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
+		struct nvmet_rdma_queue *queue)
+{
+	WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING);
+
+	pr_err("failed to connect queue\n");
+	schedule_work(&queue->release_work);
+}
+
+static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
+		struct rdma_cm_event *event)
+{
+	struct nvmet_rdma_queue *queue = NULL;
+	int ret = 0;
+
+	if (cm_id->qp)
+		queue = cm_id->qp->qp_context;
+
+	pr_debug("%s (%d): status %d id %p\n",
+		rdma_event_msg(event->event), event->event,
+		event->status, cm_id);
+
+	switch (event->event) {
+	case RDMA_CM_EVENT_CONNECT_REQUEST:
+		ret = nvmet_rdma_queue_connect(cm_id, event);
+		break;
+	case RDMA_CM_EVENT_ESTABLISHED:
+		nvmet_rdma_queue_established(queue);
+		break;
+	case RDMA_CM_EVENT_ADDR_CHANGE:
+	case RDMA_CM_EVENT_DISCONNECTED:
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+		/*
+		 * We can get the device removal callback even for a
+		 * CM ID that we aren't actually using.  In that case
+		 * the context pointer is NULL, so we shouldn't try
+		 * to disconnect a non-existing queue.  But we also
+		 * need to return 1 so that the core will destroy
+		 * it's own ID.  What a great API design..
+		 */
+		if (queue)
+			nvmet_rdma_queue_disconnect(queue);
+		else
+			ret = 1;
+		break;
+	case RDMA_CM_EVENT_REJECTED:
+	case RDMA_CM_EVENT_UNREACHABLE:
+	case RDMA_CM_EVENT_CONNECT_ERROR:
+		nvmet_rdma_queue_connect_fail(cm_id, queue);
+		break;
+	default:
+		pr_err("received unrecognized RDMA CM event %d\n",
+			event->event);
+		break;
+	}
+
+	return ret;
+}
+
+static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl)
+{
+	struct nvmet_rdma_queue *queue;
+
+restart:
+	mutex_lock(&nvmet_rdma_queue_mutex);
+	list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
+		if (queue->nvme_sq.ctrl == ctrl) {
+			list_del_init(&queue->queue_list);
+			mutex_unlock(&nvmet_rdma_queue_mutex);
+
+			__nvmet_rdma_queue_disconnect(queue);
+			goto restart;
+		}
+	}
+	mutex_unlock(&nvmet_rdma_queue_mutex);
+}
+
+static int nvmet_rdma_add_port(struct nvmet_port *port)
+{
+	struct rdma_cm_id *cm_id;
+	struct sockaddr_in addr_in;
+	u16 port_in;
+	int ret;
+
+	switch (port->disc_addr.adrfam) {
+	case NVMF_ADDR_FAMILY_IP4:
+		break;
+	default:
+		pr_err("address family %d not supported\n",
+				port->disc_addr.adrfam);
+		return -EINVAL;
+	}
+
+	ret = kstrtou16(port->disc_addr.trsvcid, 0, &port_in);
+	if (ret)
+		return ret;
+
+	addr_in.sin_family = AF_INET;
+	addr_in.sin_addr.s_addr = in_aton(port->disc_addr.traddr);
+	addr_in.sin_port = htons(port_in);
+
+	cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port,
+			RDMA_PS_TCP, IB_QPT_RC);
+	if (IS_ERR(cm_id)) {
+		pr_err("CM ID creation failed\n");
+		return PTR_ERR(cm_id);
+	}
+
+	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr_in);
+	if (ret) {
+		pr_err("binding CM ID to %pISpc failed (%d)\n", &addr_in, ret);
+		goto out_destroy_id;
+	}
+
+	ret = rdma_listen(cm_id, 128);
+	if (ret) {
+		pr_err("listening to %pISpc failed (%d)\n", &addr_in, ret);
+		goto out_destroy_id;
+	}
+
+	pr_info("enabling port %d (%pISpc)\n",
+		le16_to_cpu(port->disc_addr.portid), &addr_in);
+	port->priv = cm_id;
+	return 0;
+
+out_destroy_id:
+	rdma_destroy_id(cm_id);
+	return ret;
+}
+
+static void nvmet_rdma_remove_port(struct nvmet_port *port)
+{
+	struct rdma_cm_id *cm_id = port->priv;
+
+	rdma_destroy_id(cm_id);
+}
+
+static struct nvmet_fabrics_ops nvmet_rdma_ops = {
+	.owner			= THIS_MODULE,
+	.type			= NVMF_TRTYPE_RDMA,
+	.sqe_inline_size	= NVMET_RDMA_INLINE_DATA_SIZE,
+	.msdbd			= 1,
+	.has_keyed_sgls		= 1,
+	.add_port		= nvmet_rdma_add_port,
+	.remove_port		= nvmet_rdma_remove_port,
+	.queue_response		= nvmet_rdma_queue_response,
+	.delete_ctrl		= nvmet_rdma_delete_ctrl,
+};
+
+static int __init nvmet_rdma_init(void)
+{
+	return nvmet_register_transport(&nvmet_rdma_ops);
+}
+
+static void __exit nvmet_rdma_exit(void)
+{
+	struct nvmet_rdma_queue *queue;
+
+	nvmet_unregister_transport(&nvmet_rdma_ops);
+
+	flush_scheduled_work();
+
+	mutex_lock(&nvmet_rdma_queue_mutex);
+	while ((queue = list_first_entry_or_null(&nvmet_rdma_queue_list,
+			struct nvmet_rdma_queue, queue_list))) {
+		list_del_init(&queue->queue_list);
+
+		mutex_unlock(&nvmet_rdma_queue_mutex);
+		__nvmet_rdma_queue_disconnect(queue);
+		mutex_lock(&nvmet_rdma_queue_mutex);
+	}
+	mutex_unlock(&nvmet_rdma_queue_mutex);
+
+	flush_scheduled_work();
+	ida_destroy(&nvmet_rdma_queue_ida);
+}
+
+module_init(nvmet_rdma_init);
+module_exit(nvmet_rdma_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */
diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig
index 3041d48e7155..f550c4596a7a 100644
--- a/drivers/nvmem/Kconfig
+++ b/drivers/nvmem/Kconfig
@@ -15,7 +15,8 @@ if NVMEM
 
 config NVMEM_IMX_OCOTP
 	tristate "i.MX6 On-Chip OTP Controller support"
-	depends on SOC_IMX6
+	depends on SOC_IMX6 || COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  This is a driver for the On-Chip OTP Controller (OCOTP) available on
 	  i.MX6 SoCs, providing access to 4 Kbits of one-time programmable
@@ -50,7 +51,6 @@ config MTK_EFUSE
 	tristate "Mediatek SoCs EFUSE support"
 	depends on ARCH_MEDIATEK || COMPILE_TEST
 	depends on HAS_IOMEM
-	select REGMAP_MMIO
 	help
 	  This is a driver to access hardware related data like sensor
 	  calibration, HDMI impedance etc.
diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c
index 75e66ef5b0ec..ac27b9bac3b9 100644
--- a/drivers/nvmem/imx-ocotp.c
+++ b/drivers/nvmem/imx-ocotp.c
@@ -15,6 +15,7 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 
+#include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -26,6 +27,7 @@
 
 struct ocotp_priv {
 	struct device *dev;
+	struct clk *clk;
 	void __iomem *base;
 	unsigned int nregs;
 };
@@ -36,7 +38,7 @@ static int imx_ocotp_read(void *context, unsigned int offset,
 	struct ocotp_priv *priv = context;
 	unsigned int count;
 	u32 *buf = val;
-	int i;
+	int i, ret;
 	u32 index;
 
 	index = offset >> 2;
@@ -45,9 +47,16 @@ static int imx_ocotp_read(void *context, unsigned int offset,
 	if (count > (priv->nregs - index))
 		count = priv->nregs - index;
 
+	ret = clk_prepare_enable(priv->clk);
+	if (ret < 0) {
+		dev_err(priv->dev, "failed to prepare/enable ocotp clk\n");
+		return ret;
+	}
 	for (i = index; i < (index + count); i++)
 		*buf++ = readl(priv->base + 0x400 + i * 0x10);
 
+	clk_disable_unprepare(priv->clk);
+
 	return 0;
 }
 
@@ -85,8 +94,12 @@ static int imx_ocotp_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
 	of_id = of_match_device(imx_ocotp_dt_ids, dev);
-	priv->nregs = (unsigned int)of_id->data;
+	priv->nregs = (unsigned long)of_id->data;
 	imx_ocotp_nvmem_config.size = 4 * priv->nregs;
 	imx_ocotp_nvmem_config.dev = dev;
 	imx_ocotp_nvmem_config.priv = priv;
diff --git a/drivers/nvmem/mtk-efuse.c b/drivers/nvmem/mtk-efuse.c
index 9c49369beea5..32fd572e18c5 100644
--- a/drivers/nvmem/mtk-efuse.c
+++ b/drivers/nvmem/mtk-efuse.c
@@ -14,15 +14,35 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/io.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
-#include <linux/regmap.h>
 
-static struct regmap_config mtk_regmap_config = {
-	.reg_bits = 32,
-	.val_bits = 32,
-	.reg_stride = 4,
-};
+static int mtk_reg_read(void *context,
+			unsigned int reg, void *_val, size_t bytes)
+{
+	void __iomem *base = context;
+	u32 *val = _val;
+	int i = 0, words = bytes / 4;
+
+	while (words--)
+		*val++ = readl(base + reg + (i++ * 4));
+
+	return 0;
+}
+
+static int mtk_reg_write(void *context,
+			 unsigned int reg, void *_val, size_t bytes)
+{
+	void __iomem *base = context;
+	u32 *val = _val;
+	int i = 0, words = bytes / 4;
+
+	while (words--)
+		writel(*val++, base + reg + (i++ * 4));
+
+	return 0;
+}
 
 static int mtk_efuse_probe(struct platform_device *pdev)
 {
@@ -30,7 +50,6 @@ static int mtk_efuse_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct nvmem_device *nvmem;
 	struct nvmem_config *econfig;
-	struct regmap *regmap;
 	void __iomem *base;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -42,14 +61,12 @@ static int mtk_efuse_probe(struct platform_device *pdev)
 	if (!econfig)
 		return -ENOMEM;
 
-	mtk_regmap_config.max_register = resource_size(res) - 1;
-
-	regmap = devm_regmap_init_mmio(dev, base, &mtk_regmap_config);
-	if (IS_ERR(regmap)) {
-		dev_err(dev, "regmap init failed\n");
-		return PTR_ERR(regmap);
-	}
-
+	econfig->stride = 4;
+	econfig->word_size = 4;
+	econfig->reg_read = mtk_reg_read;
+	econfig->reg_write = mtk_reg_write;
+	econfig->size = resource_size(res);
+	econfig->priv = base;
 	econfig->dev = dev;
 	econfig->owner = THIS_MODULE;
 	nvmem = nvmem_register(econfig);
diff --git a/drivers/nvmem/mxs-ocotp.c b/drivers/nvmem/mxs-ocotp.c
index 2bb3c5799ac4..d26dd03cec80 100644
--- a/drivers/nvmem/mxs-ocotp.c
+++ b/drivers/nvmem/mxs-ocotp.c
@@ -25,7 +25,6 @@
 #include <linux/nvmem-provider.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
-#include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/stmp_device.h>
 
@@ -66,11 +65,10 @@ static int mxs_ocotp_wait(struct mxs_ocotp *otp)
 	return 0;
 }
 
-static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size,
-			  void *val, size_t val_size)
+static int mxs_ocotp_read(void *context, unsigned int offset,
+			  void *val, size_t bytes)
 {
 	struct mxs_ocotp *otp = context;
-	unsigned int offset = *(u32 *)reg;
 	u32 *buf = val;
 	int ret;
 
@@ -94,17 +92,16 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size,
 	if (ret)
 		goto close_banks;
 
-	while (val_size >= reg_size) {
+	while (bytes) {
 		if ((offset < OCOTP_DATA_OFFSET) || (offset % 16)) {
 			/* fill up non-data register */
-			*buf = 0;
+			*buf++ = 0;
 		} else {
-			*buf = readl(otp->base + offset);
+			*buf++ = readl(otp->base + offset);
 		}
 
-		buf++;
-		val_size -= reg_size;
-		offset += reg_size;
+		bytes -= 4;
+		offset += 4;
 	}
 
 close_banks:
@@ -117,57 +114,29 @@ disable_clk:
 	return ret;
 }
 
-static int mxs_ocotp_write(void *context, const void *data, size_t count)
-{
-	/* We don't want to support writing */
-	return 0;
-}
-
-static bool mxs_ocotp_writeable_reg(struct device *dev, unsigned int reg)
-{
-	return false;
-}
-
 static struct nvmem_config ocotp_config = {
 	.name = "mxs-ocotp",
+	.stride = 16,
+	.word_size = 4,
 	.owner = THIS_MODULE,
+	.reg_read = mxs_ocotp_read,
 };
 
-static const struct regmap_range imx23_ranges[] = {
-	regmap_reg_range(OCOTP_DATA_OFFSET, 0x210),
-};
-
-static const struct regmap_access_table imx23_access = {
-	.yes_ranges = imx23_ranges,
-	.n_yes_ranges = ARRAY_SIZE(imx23_ranges),
-};
-
-static const struct regmap_range imx28_ranges[] = {
-	regmap_reg_range(OCOTP_DATA_OFFSET, 0x290),
-};
-
-static const struct regmap_access_table imx28_access = {
-	.yes_ranges = imx28_ranges,
-	.n_yes_ranges = ARRAY_SIZE(imx28_ranges),
+struct mxs_data {
+	int size;
 };
 
-static struct regmap_bus mxs_ocotp_bus = {
-	.read = mxs_ocotp_read,
-	.write = mxs_ocotp_write, /* make regmap_init() happy */
-	.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
-	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
+static const struct mxs_data imx23_data = {
+	.size = 0x220,
 };
 
-static struct regmap_config mxs_ocotp_config = {
-	.reg_bits = 32,
-	.val_bits = 32,
-	.reg_stride = 16,
-	.writeable_reg = mxs_ocotp_writeable_reg,
+static const struct mxs_data imx28_data = {
+	.size = 0x2a0,
 };
 
 static const struct of_device_id mxs_ocotp_match[] = {
-	{ .compatible = "fsl,imx23-ocotp", .data = &imx23_access },
-	{ .compatible = "fsl,imx28-ocotp", .data = &imx28_access },
+	{ .compatible = "fsl,imx23-ocotp", .data = &imx23_data },
+	{ .compatible = "fsl,imx28-ocotp", .data = &imx28_data },
 	{ /* sentinel */},
 };
 MODULE_DEVICE_TABLE(of, mxs_ocotp_match);
@@ -175,11 +144,10 @@ MODULE_DEVICE_TABLE(of, mxs_ocotp_match);
 static int mxs_ocotp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	const struct mxs_data *data;
 	struct mxs_ocotp *otp;
 	struct resource *res;
 	const struct of_device_id *match;
-	struct regmap *regmap;
-	const struct regmap_access_table *access;
 	int ret;
 
 	match = of_match_device(dev->driver->of_match_table, dev);
@@ -205,17 +173,10 @@ static int mxs_ocotp_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	access = match->data;
-	mxs_ocotp_config.rd_table = access;
-	mxs_ocotp_config.max_register = access->yes_ranges[0].range_max;
-
-	regmap = devm_regmap_init(dev, &mxs_ocotp_bus, otp, &mxs_ocotp_config);
-	if (IS_ERR(regmap)) {
-		dev_err(dev, "regmap init failed\n");
-		ret = PTR_ERR(regmap);
-		goto err_clk;
-	}
+	data = match->data;
 
+	ocotp_config.size = data->size;
+	ocotp_config.priv = otp;
 	ocotp_config.dev = dev;
 	otp->nvmem = nvmem_register(&ocotp_config);
 	if (IS_ERR(otp->nvmem)) {
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index b3bec3aaa45d..bc07ad30c9bf 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -74,6 +74,7 @@ config OF_NET
 config OF_MDIO
 	def_tristate PHYLIB
 	depends on PHYLIB
+	select FIXED_PHY
 	help
 	  OpenFirmware MDIO bus (Ethernet PHY) accessors
 
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 33daffc4392c..0e02947a8a7a 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -743,6 +743,19 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 	return rc;
 }
 
+/**
+ * of_get_flat_dt_subnode_by_name - get the subnode by given name
+ *
+ * @node: the parent node
+ * @uname: the name of subnode
+ * @return offset of the subnode, or -FDT_ERR_NOTFOUND if there is none
+ */
+
+int of_get_flat_dt_subnode_by_name(unsigned long node, const char *uname)
+{
+	return fdt_subnode_offset(initial_boot_params, node, uname);
+}
+
 /**
  * of_get_flat_dt_root - find the root node in the flat blob
  */
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index e051e1b57609..b470f7e3521d 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -19,6 +19,7 @@
 #include <linux/of_gpio.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
+#include <linux/of_net.h>
 #include <linux/module.h>
 
 MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
@@ -331,6 +332,41 @@ struct phy_device *of_phy_connect(struct net_device *dev,
 }
 EXPORT_SYMBOL(of_phy_connect);
 
+/**
+ * of_phy_get_and_connect
+ * - Get phy node and connect to the phy described in the device tree
+ * @dev: pointer to net_device claiming the phy
+ * @np: Pointer to device tree node for the net_device claiming the phy
+ * @hndlr: Link state callback for the network device
+ *
+ * If successful, returns a pointer to the phy_device with the embedded
+ * struct device refcount incremented by one, or NULL on failure. The
+ * refcount must be dropped by calling phy_disconnect() or phy_detach().
+ */
+struct phy_device *of_phy_get_and_connect(struct net_device *dev,
+					  struct device_node *np,
+					  void (*hndlr)(struct net_device *))
+{
+	phy_interface_t iface;
+	struct device_node *phy_np;
+	struct phy_device *phy;
+
+	iface = of_get_phy_mode(np);
+	if (iface < 0)
+		return NULL;
+
+	phy_np = of_parse_phandle(np, "phy-handle", 0);
+	if (!phy_np)
+		return NULL;
+
+	phy = of_phy_connect(dev, phy_np, hndlr, 0, iface);
+
+	of_node_put(phy_np);
+
+	return phy;
+}
+EXPORT_SYMBOL(of_phy_get_and_connect);
+
 /**
  * of_phy_attach - Attach to a PHY without starting the state machine
  * @dev: pointer to net_device claiming the phy
@@ -361,7 +397,6 @@ struct phy_device *of_phy_attach(struct net_device *dev,
 }
 EXPORT_SYMBOL(of_phy_attach);
 
-#if defined(CONFIG_FIXED_PHY)
 /*
  * of_phy_is_fixed_link() and of_phy_register_fixed_link() must
  * support two DT bindings:
@@ -451,4 +486,3 @@ int of_phy_register_fixed_link(struct device_node *np)
 	return -ENODEV;
 }
 EXPORT_SYMBOL(of_phy_register_fixed_link);
-#endif
diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
index 0f2784bc1874..ed5a097f0801 100644
--- a/drivers/of/of_numa.c
+++ b/drivers/of/of_numa.c
@@ -91,8 +91,8 @@ static int __init of_numa_parse_memory_nodes(void)
 		pr_debug("NUMA:  base = %llx len = %llx, node = %u\n",
 			 rsrc.start, rsrc.end - rsrc.start + 1, nid);
 
-		r = numa_add_memblk(nid, rsrc.start,
-				    rsrc.end - rsrc.start + 1);
+
+		r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
 		if (r)
 			break;
 	}
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 216648233874..06af99f64ad8 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -21,6 +21,7 @@
 #include <linux/sizes.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/sort.h>
+#include <linux/slab.h>
 
 #define MAX_RESERVED_REGIONS	16
 static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
@@ -296,53 +297,95 @@ static inline struct reserved_mem *__find_rmem(struct device_node *node)
 	return NULL;
 }
 
+struct rmem_assigned_device {
+	struct device *dev;
+	struct reserved_mem *rmem;
+	struct list_head list;
+};
+
+static LIST_HEAD(of_rmem_assigned_device_list);
+static DEFINE_MUTEX(of_rmem_assigned_device_mutex);
+
 /**
- * of_reserved_mem_device_init() - assign reserved memory region to given device
+ * of_reserved_mem_device_init_by_idx() - assign reserved memory region to
+ *					  given device
+ * @dev:	Pointer to the device to configure
+ * @np:		Pointer to the device_node with 'reserved-memory' property
+ * @idx:	Index of selected region
  *
- * This function assign memory region pointed by "memory-region" device tree
- * property to the given device.
+ * This function assigns respective DMA-mapping operations based on reserved
+ * memory region specified by 'memory-region' property in @np node to the @dev
+ * device. When driver needs to use more than one reserved memory region, it
+ * should allocate child devices and initialize regions by name for each of
+ * child device.
+ *
+ * Returns error code or zero on success.
  */
-int of_reserved_mem_device_init(struct device *dev)
+int of_reserved_mem_device_init_by_idx(struct device *dev,
+				       struct device_node *np, int idx)
 {
+	struct rmem_assigned_device *rd;
+	struct device_node *target;
 	struct reserved_mem *rmem;
-	struct device_node *np;
 	int ret;
 
-	np = of_parse_phandle(dev->of_node, "memory-region", 0);
-	if (!np)
+	if (!np || !dev)
+		return -EINVAL;
+
+	target = of_parse_phandle(np, "memory-region", idx);
+	if (!target)
 		return -ENODEV;
 
-	rmem = __find_rmem(np);
-	of_node_put(np);
+	rmem = __find_rmem(target);
+	of_node_put(target);
 
 	if (!rmem || !rmem->ops || !rmem->ops->device_init)
 		return -EINVAL;
 
+	rd = kmalloc(sizeof(struct rmem_assigned_device), GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+
 	ret = rmem->ops->device_init(rmem, dev);
-	if (ret == 0)
+	if (ret == 0) {
+		rd->dev = dev;
+		rd->rmem = rmem;
+
+		mutex_lock(&of_rmem_assigned_device_mutex);
+		list_add(&rd->list, &of_rmem_assigned_device_list);
+		mutex_unlock(&of_rmem_assigned_device_mutex);
+
 		dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
+	} else {
+		kfree(rd);
+	}
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(of_reserved_mem_device_init);
+EXPORT_SYMBOL_GPL(of_reserved_mem_device_init_by_idx);
 
 /**
  * of_reserved_mem_device_release() - release reserved memory device structures
+ * @dev:	Pointer to the device to deconfigure
  *
  * This function releases structures allocated for memory region handling for
  * the given device.
  */
 void of_reserved_mem_device_release(struct device *dev)
 {
-	struct reserved_mem *rmem;
-	struct device_node *np;
-
-	np = of_parse_phandle(dev->of_node, "memory-region", 0);
-	if (!np)
-		return;
-
-	rmem = __find_rmem(np);
-	of_node_put(np);
+	struct rmem_assigned_device *rd;
+	struct reserved_mem *rmem = NULL;
+
+	mutex_lock(&of_rmem_assigned_device_mutex);
+	list_for_each_entry(rd, &of_rmem_assigned_device_list, list) {
+		if (rd->dev == dev) {
+			rmem = rd->rmem;
+			list_del(&rd->list);
+			kfree(rd);
+			break;
+		}
+	}
+	mutex_unlock(&of_rmem_assigned_device_mutex);
 
 	if (!rmem || !rmem->ops || !rmem->ops->device_release)
 		return;
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 1fa6925733d3..8db5079f09a7 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -51,6 +51,9 @@ obj-$(CONFIG_ACPI)    += pci-acpi.o
 # SMBIOS provided firmware instance and labels
 obj-$(CONFIG_PCI_LABEL) += pci-label.o
 
+# Intel MID platform PM support
+obj-$(CONFIG_X86_INTEL_MID) += pci-mid.o
+
 obj-$(CONFIG_PCI_SYSCALL) += syscall.o
 
 obj-$(CONFIG_PCI_STUB) += pci-stub.o
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
new file mode 100644
index 000000000000..c878aa71173b
--- /dev/null
+++ b/drivers/pci/pci-mid.c
@@ -0,0 +1,77 @@
+/*
+ * Intel MID platform PM support
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/intel-mid.h>
+
+#include "pci.h"
+
+static bool mid_pci_power_manageable(struct pci_dev *dev)
+{
+	return true;
+}
+
+static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
+{
+	return intel_mid_pci_set_power_state(pdev, state);
+}
+
+static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
+{
+	return PCI_D3hot;
+}
+
+static int mid_pci_sleep_wake(struct pci_dev *dev, bool enable)
+{
+	return 0;
+}
+
+static int mid_pci_run_wake(struct pci_dev *dev, bool enable)
+{
+	return 0;
+}
+
+static bool mid_pci_need_resume(struct pci_dev *dev)
+{
+	return false;
+}
+
+static struct pci_platform_pm_ops mid_pci_platform_pm = {
+	.is_manageable	= mid_pci_power_manageable,
+	.set_state	= mid_pci_set_power_state,
+	.choose_state	= mid_pci_choose_state,
+	.sleep_wake	= mid_pci_sleep_wake,
+	.run_wake	= mid_pci_run_wake,
+	.need_resume	= mid_pci_need_resume,
+};
+
+#define ICPU(model)	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
+
+static const struct x86_cpu_id lpss_cpu_ids[] = {
+	ICPU(INTEL_FAM6_ATOM_MERRIFIELD1),
+	{}
+};
+
+static int __init mid_pci_init(void)
+{
+	const struct x86_cpu_id *id;
+
+	id = x86_match_cpu(lpss_cpu_ids);
+	if (id)
+		pci_set_platform_pm(&mid_pci_platform_pm);
+	return 0;
+}
+arch_initcall(mid_pci_init);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c8b4dbdd1bdd..badbddc683f0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -530,8 +530,8 @@ static const struct pci_platform_pm_ops *pci_platform_pm;
 
 int pci_set_platform_pm(const struct pci_platform_pm_ops *ops)
 {
-	if (!ops->is_manageable || !ops->set_state || !ops->choose_state
-	    || !ops->sleep_wake)
+	if (!ops->is_manageable || !ops->set_state || !ops->choose_state ||
+	    !ops->sleep_wake || !ops->run_wake || !ops->need_resume)
 		return -EINVAL;
 	pci_platform_pm = ops;
 	return 0;
diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index dfbab61a1b47..1fa3a3219c45 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -221,9 +221,9 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos,
 		else
 			pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL,
 					      *(u16 *)buf);
-		buf += 2;
+		buf += 4;
 	}
-	len += 2;
+	len += 4;
 
 	/*
 	 * If we have any Low Priority VCs and a VC Arbitration Table Offset
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 140436a046c0..8e4d7f590b06 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -603,7 +603,8 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 
 	irq = platform_get_irq(pmu_device, 0);
 	if (irq >= 0 && irq_is_percpu(irq)) {
-		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
+		on_each_cpu_mask(&cpu_pmu->supported_cpus,
+				 cpu_pmu_disable_percpu_irq, &irq, 1);
 		free_percpu_irq(irq, &hw_events->percpu_pmu);
 	} else {
 		for (i = 0; i < irqs; ++i) {
@@ -645,7 +646,9 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 				irq);
 			return err;
 		}
-		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
+
+		on_each_cpu_mask(&cpu_pmu->supported_cpus,
+				 cpu_pmu_enable_percpu_irq, &irq, 1);
 	} else {
 		for (i = 0; i < irqs; ++i) {
 			int cpu = i;
@@ -961,9 +964,23 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 		i++;
 	} while (1);
 
-	/* If we didn't manage to parse anything, claim to support all CPUs */
-	if (cpumask_weight(&pmu->supported_cpus) == 0)
-		cpumask_setall(&pmu->supported_cpus);
+	/* If we didn't manage to parse anything, try the interrupt affinity */
+	if (cpumask_weight(&pmu->supported_cpus) == 0) {
+		if (!using_spi) {
+			/* If using PPIs, check the affinity of the partition */
+			int ret, irq;
+
+			irq = platform_get_irq(pdev, 0);
+			ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
+			if (ret) {
+				kfree(irqs);
+				return ret;
+			}
+		} else {
+			/* Otherwise default to all CPUs */
+			cpumask_setall(&pmu->supported_cpus);
+		}
+	}
 
 	/* If we matched up the IRQ affinities, use them to route the SPIs */
 	if (using_spi && i == pdev->num_resources)
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index b869b98835f4..19bff3a10f69 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -44,6 +44,16 @@ config ARMADA375_USBCLUSTER_PHY
 	depends on OF && HAS_IOMEM
 	select GENERIC_PHY
 
+config PHY_DA8XX_USB
+	tristate "TI DA8xx USB PHY Driver"
+	depends on ARCH_DAVINCI_DA8XX
+	select GENERIC_PHY
+	select MFD_SYSCON
+	help
+	  Enable this to support the USB PHY on DA8xx SoCs.
+
+	  This driver controls both the USB 1.1 PHY and the USB 2.0 PHY.
+
 config PHY_DM816X_USB
 	tristate "TI dm816x USB PHY driver"
 	depends on ARCH_OMAP2PLUS
@@ -176,6 +186,7 @@ config TWL4030_USB
 	tristate "TWL4030 USB Transceiver Driver"
 	depends on TWL4030_CORE && REGULATOR_TWL4030 && USB_MUSB_OMAP2PLUS
 	depends on USB_SUPPORT
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't 'y'
 	select GENERIC_PHY
 	select USB_PHY
 	help
@@ -434,4 +445,12 @@ config PHY_CYGNUS_PCIE
 
 source "drivers/phy/tegra/Kconfig"
 
+config PHY_NS2_PCIE
+	tristate "Broadcom Northstar2 PCIe PHY driver"
+	depends on OF && MDIO_BUS_MUX_BCM_IPROC
+	select GENERIC_PHY
+	default ARCH_BCM_IPROC
+	help
+	  Enable this to support the Broadcom Northstar2 PCIe PHY.
+	  If unsure, say N.
 endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index 9c3e73ccabc4..90ae19879b0a 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_GENERIC_PHY)		+= phy-core.o
 obj-$(CONFIG_PHY_BCM_NS_USB2)		+= phy-bcm-ns-usb2.o
 obj-$(CONFIG_PHY_BERLIN_USB)		+= phy-berlin-usb.o
 obj-$(CONFIG_PHY_BERLIN_SATA)		+= phy-berlin-sata.o
+obj-$(CONFIG_PHY_DA8XX_USB)		+= phy-da8xx-usb.o
 obj-$(CONFIG_PHY_DM816X_USB)		+= phy-dm816x-usb.o
 obj-$(CONFIG_ARMADA375_USBCLUSTER_PHY)	+= phy-armada375-usb2.o
 obj-$(CONFIG_BCM_KONA_USB2_PHY)		+= phy-bcm-kona-usb2.o
@@ -53,5 +54,5 @@ obj-$(CONFIG_PHY_TUSB1210)		+= phy-tusb1210.o
 obj-$(CONFIG_PHY_BRCM_SATA)		+= phy-brcm-sata.o
 obj-$(CONFIG_PHY_PISTACHIO_USB)		+= phy-pistachio-usb.o
 obj-$(CONFIG_PHY_CYGNUS_PCIE)		+= phy-bcm-cygnus-pcie.o
-
 obj-$(CONFIG_ARCH_TEGRA) += tegra/
+obj-$(CONFIG_PHY_NS2_PCIE)		+= phy-bcm-ns2-pcie.o
diff --git a/drivers/phy/phy-bcm-ns-usb2.c b/drivers/phy/phy-bcm-ns-usb2.c
index 95ab6b2a0de5..58dff80e9386 100644
--- a/drivers/phy/phy-bcm-ns-usb2.c
+++ b/drivers/phy/phy-bcm-ns-usb2.c
@@ -109,8 +109,8 @@ static int bcm_ns_usb2_probe(struct platform_device *pdev)
 	}
 
 	usb2->phy = devm_phy_create(dev, NULL, &ops);
-	if (IS_ERR(dev))
-		return PTR_ERR(dev);
+	if (IS_ERR(usb2->phy))
+		return PTR_ERR(usb2->phy);
 
 	phy_set_drvdata(usb2->phy, usb2);
 	platform_set_drvdata(pdev, usb2);
diff --git a/drivers/phy/phy-bcm-ns2-pcie.c b/drivers/phy/phy-bcm-ns2-pcie.c
new file mode 100644
index 000000000000..9513f7ab1eaa
--- /dev/null
+++ b/drivers/phy/phy-bcm-ns2-pcie.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/of_mdio.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+#include <linux/phy/phy.h>
+
+struct ns2_pci_phy {
+	struct mdio_device *mdiodev;
+	struct phy *phy;
+};
+
+#define BLK_ADDR_REG_OFFSET	0x1f
+#define PLL_AFE1_100MHZ_BLK	0x2100
+#define PLL_CLK_AMP_OFFSET	0x03
+#define PLL_CLK_AMP_2P05V	0x2b18
+
+static int ns2_pci_phy_init(struct phy *p)
+{
+	struct ns2_pci_phy *phy = phy_get_drvdata(p);
+	int rc;
+
+	/* select the AFE 100MHz block page */
+	rc = mdiobus_write(phy->mdiodev->bus, phy->mdiodev->addr,
+			   BLK_ADDR_REG_OFFSET, PLL_AFE1_100MHZ_BLK);
+	if (rc)
+		goto err;
+
+	/* set the 100 MHz reference clock amplitude to 2.05 v */
+	rc = mdiobus_write(phy->mdiodev->bus, phy->mdiodev->addr,
+			   PLL_CLK_AMP_OFFSET, PLL_CLK_AMP_2P05V);
+	if (rc)
+		goto err;
+
+	return 0;
+
+err:
+	dev_err(&phy->mdiodev->dev, "Error %d writing to phy\n", rc);
+	return rc;
+}
+
+static struct phy_ops ns2_pci_phy_ops = {
+	.init = ns2_pci_phy_init,
+};
+
+static int ns2_pci_phy_probe(struct mdio_device *mdiodev)
+{
+	struct device *dev = &mdiodev->dev;
+	struct phy_provider *provider;
+	struct ns2_pci_phy *p;
+	struct phy *phy;
+
+	phy = devm_phy_create(dev, dev->of_node, &ns2_pci_phy_ops);
+	if (IS_ERR(phy)) {
+		dev_err(dev, "failed to create Phy\n");
+		return PTR_ERR(phy);
+	}
+
+	p = devm_kmalloc(dev, sizeof(struct ns2_pci_phy),
+			 GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	p->mdiodev = mdiodev;
+	dev_set_drvdata(dev, p);
+
+	p->phy = phy;
+	phy_set_drvdata(phy, p);
+
+	provider = devm_of_phy_provider_register(&phy->dev,
+						 of_phy_simple_xlate);
+	if (IS_ERR(provider)) {
+		dev_err(dev, "failed to register Phy provider\n");
+		return PTR_ERR(provider);
+	}
+
+	dev_info(dev, "%s PHY registered\n", dev_name(dev));
+
+	return 0;
+}
+
+static const struct of_device_id ns2_pci_phy_of_match[] = {
+	{ .compatible = "brcm,ns2-pcie-phy", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, ns2_pci_phy_of_match);
+
+static struct mdio_driver ns2_pci_phy_driver = {
+	.mdiodrv = {
+		.driver = {
+			.name = "phy-bcm-ns2-pci",
+			.of_match_table = ns2_pci_phy_of_match,
+		},
+	},
+	.probe = ns2_pci_phy_probe,
+};
+mdio_module_driver(ns2_pci_phy_driver);
+
+MODULE_AUTHOR("Broadcom");
+MODULE_DESCRIPTION("Broadcom Northstar2 PCI Phy driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:phy-bcm-ns2-pci");
diff --git a/drivers/phy/phy-brcm-sata.c b/drivers/phy/phy-brcm-sata.c
index 6c4c5cb791ca..18d662610075 100644
--- a/drivers/phy/phy-brcm-sata.c
+++ b/drivers/phy/phy-brcm-sata.c
@@ -45,6 +45,7 @@ enum brcm_sata_phy_version {
 	BRCM_SATA_PHY_STB_28NM,
 	BRCM_SATA_PHY_STB_40NM,
 	BRCM_SATA_PHY_IPROC_NS2,
+	BRCM_SATA_PHY_IPROC_NSP,
 };
 
 struct brcm_sata_port {
@@ -73,6 +74,13 @@ enum sata_phy_regs {
 
 	PLL_REG_BANK_0				= 0x050,
 	PLL_REG_BANK_0_PLLCONTROL_0		= 0x81,
+	PLLCONTROL_0_FREQ_DET_RESTART		= BIT(13),
+	PLLCONTROL_0_FREQ_MONITOR		= BIT(12),
+	PLLCONTROL_0_SEQ_START			= BIT(15),
+	PLL_CAP_CONTROL				= 0x85,
+	PLL_ACTRL2				= 0x8b,
+	PLL_ACTRL2_SELDIV_MASK			= 0x1f,
+	PLL_ACTRL2_SELDIV_SHIFT			= 9,
 
 	PLL1_REG_BANK				= 0x060,
 	PLL1_ACTRL2				= 0x82,
@@ -80,6 +88,7 @@ enum sata_phy_regs {
 	PLL1_ACTRL4				= 0x84,
 
 	OOB_REG_BANK				= 0x150,
+	OOB1_REG_BANK				= 0x160,
 	OOB_CTRL1				= 0x80,
 	OOB_CTRL1_BURST_MAX_MASK		= 0xf,
 	OOB_CTRL1_BURST_MAX_SHIFT		= 12,
@@ -271,6 +280,73 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port)
 	return 0;
 }
 
+static int brcm_nsp_sata_init(struct brcm_sata_port *port)
+{
+	struct brcm_sata_phy *priv = port->phy_priv;
+	struct device *dev = port->phy_priv->dev;
+	void __iomem *base = priv->phy_base;
+	unsigned int oob_bank;
+	unsigned int val, try;
+
+	/* Configure OOB control */
+	if (port->portnum == 0)
+		oob_bank = OOB_REG_BANK;
+	else if (port->portnum == 1)
+		oob_bank = OOB1_REG_BANK;
+	else
+		return -EINVAL;
+
+	val = 0x0;
+	val |= (0x0f << OOB_CTRL1_BURST_MAX_SHIFT);
+	val |= (0x06 << OOB_CTRL1_BURST_MIN_SHIFT);
+	val |= (0x0f << OOB_CTRL1_WAKE_IDLE_MAX_SHIFT);
+	val |= (0x06 << OOB_CTRL1_WAKE_IDLE_MIN_SHIFT);
+	brcm_sata_phy_wr(base, oob_bank, OOB_CTRL1, 0x0, val);
+
+	val = 0x0;
+	val |= (0x2e << OOB_CTRL2_RESET_IDLE_MAX_SHIFT);
+	val |= (0x02 << OOB_CTRL2_BURST_CNT_SHIFT);
+	val |= (0x16 << OOB_CTRL2_RESET_IDLE_MIN_SHIFT);
+	brcm_sata_phy_wr(base, oob_bank, OOB_CTRL2, 0x0, val);
+
+
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_ACTRL2,
+		~(PLL_ACTRL2_SELDIV_MASK << PLL_ACTRL2_SELDIV_SHIFT),
+		0x0c << PLL_ACTRL2_SELDIV_SHIFT);
+
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_CAP_CONTROL,
+						0xff0, 0x4f0);
+
+	val = PLLCONTROL_0_FREQ_DET_RESTART | PLLCONTROL_0_FREQ_MONITOR;
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+								~val, val);
+	val = PLLCONTROL_0_SEQ_START;
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+								~val, 0);
+	mdelay(10);
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+								~val, val);
+
+	/* Wait for pll_seq_done bit */
+	try = 50;
+	while (try--) {
+		val = brcm_sata_phy_rd(base, BLOCK0_REG_BANK,
+					BLOCK0_XGXSSTATUS);
+		if (val & BLOCK0_XGXSSTATUS_PLL_LOCK)
+			break;
+		msleep(20);
+	}
+	if (!try) {
+		/* PLL did not lock; give up */
+		dev_err(dev, "port%d PLL did not lock\n", port->portnum);
+		return -ETIMEDOUT;
+	}
+
+	dev_dbg(dev, "port%d initialized\n", port->portnum);
+
+	return 0;
+}
+
 static int brcm_sata_phy_init(struct phy *phy)
 {
 	int rc;
@@ -284,6 +360,9 @@ static int brcm_sata_phy_init(struct phy *phy)
 	case BRCM_SATA_PHY_IPROC_NS2:
 		rc = brcm_ns2_sata_init(port);
 		break;
+	case BRCM_SATA_PHY_IPROC_NSP:
+		rc = brcm_nsp_sata_init(port);
+		break;
 	default:
 		rc = -ENODEV;
 	};
@@ -303,6 +382,8 @@ static const struct of_device_id brcm_sata_phy_of_match[] = {
 	  .data = (void *)BRCM_SATA_PHY_STB_40NM },
 	{ .compatible	= "brcm,iproc-ns2-sata-phy",
 	  .data = (void *)BRCM_SATA_PHY_IPROC_NS2 },
+	{ .compatible = "brcm,iproc-nsp-sata-phy",
+	  .data = (void *)BRCM_SATA_PHY_IPROC_NSP },
 	{},
 };
 MODULE_DEVICE_TABLE(of, brcm_sata_phy_of_match);
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index b72e9a3b6429..8eca906b6e70 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -342,6 +342,21 @@ int phy_power_off(struct phy *phy)
 }
 EXPORT_SYMBOL_GPL(phy_power_off);
 
+int phy_set_mode(struct phy *phy, enum phy_mode mode)
+{
+	int ret;
+
+	if (!phy || !phy->ops->set_mode)
+		return 0;
+
+	mutex_lock(&phy->mutex);
+	ret = phy->ops->set_mode(phy, mode);
+	mutex_unlock(&phy->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_set_mode);
+
 /**
  * _of_phy_get() - lookup and obtain a reference to a phy by phandle
  * @np: device_node for which to get the phy
diff --git a/drivers/phy/phy-da8xx-usb.c b/drivers/phy/phy-da8xx-usb.c
new file mode 100644
index 000000000000..b2e59b6170ac
--- /dev/null
+++ b/drivers/phy/phy-da8xx-usb.c
@@ -0,0 +1,245 @@
+/*
+ * phy-da8xx-usb - TI DaVinci DA8xx USB PHY driver
+ *
+ * Copyright (C) 2016 David Lechner <david@lechnology.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/mfd/da8xx-cfgchip.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+struct da8xx_usb_phy {
+	struct phy_provider	*phy_provider;
+	struct phy		*usb11_phy;
+	struct phy		*usb20_phy;
+	struct clk		*usb11_clk;
+	struct clk		*usb20_clk;
+	struct regmap		*regmap;
+};
+
+static int da8xx_usb11_phy_power_on(struct phy *phy)
+{
+	struct da8xx_usb_phy *d_phy = phy_get_drvdata(phy);
+	int ret;
+
+	ret = clk_prepare_enable(d_phy->usb11_clk);
+	if (ret)
+		return ret;
+
+	regmap_write_bits(d_phy->regmap, CFGCHIP(2), CFGCHIP2_USB1SUSPENDM,
+			  CFGCHIP2_USB1SUSPENDM);
+
+	return 0;
+}
+
+static int da8xx_usb11_phy_power_off(struct phy *phy)
+{
+	struct da8xx_usb_phy *d_phy = phy_get_drvdata(phy);
+
+	regmap_write_bits(d_phy->regmap, CFGCHIP(2), CFGCHIP2_USB1SUSPENDM, 0);
+
+	clk_disable_unprepare(d_phy->usb11_clk);
+
+	return 0;
+}
+
+static const struct phy_ops da8xx_usb11_phy_ops = {
+	.power_on	= da8xx_usb11_phy_power_on,
+	.power_off	= da8xx_usb11_phy_power_off,
+	.owner		= THIS_MODULE,
+};
+
+static int da8xx_usb20_phy_power_on(struct phy *phy)
+{
+	struct da8xx_usb_phy *d_phy = phy_get_drvdata(phy);
+	int ret;
+
+	ret = clk_prepare_enable(d_phy->usb20_clk);
+	if (ret)
+		return ret;
+
+	regmap_write_bits(d_phy->regmap, CFGCHIP(2), CFGCHIP2_OTGPWRDN, 0);
+
+	return 0;
+}
+
+static int da8xx_usb20_phy_power_off(struct phy *phy)
+{
+	struct da8xx_usb_phy *d_phy = phy_get_drvdata(phy);
+
+	regmap_write_bits(d_phy->regmap, CFGCHIP(2), CFGCHIP2_OTGPWRDN,
+			  CFGCHIP2_OTGPWRDN);
+
+	clk_disable_unprepare(d_phy->usb20_clk);
+
+	return 0;
+}
+
+static int da8xx_usb20_phy_set_mode(struct phy *phy, enum phy_mode mode)
+{
+	struct da8xx_usb_phy *d_phy = phy_get_drvdata(phy);
+	u32 val;
+
+	switch (mode) {
+	case PHY_MODE_USB_HOST:		/* Force VBUS valid, ID = 0 */
+		val = CFGCHIP2_OTGMODE_FORCE_HOST;
+		break;
+	case PHY_MODE_USB_DEVICE:	/* Force VBUS valid, ID = 1 */
+		val = CFGCHIP2_OTGMODE_FORCE_DEVICE;
+		break;
+	case PHY_MODE_USB_OTG:	/* Don't override the VBUS/ID comparators */
+		val = CFGCHIP2_OTGMODE_NO_OVERRIDE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_write_bits(d_phy->regmap, CFGCHIP(2), CFGCHIP2_OTGMODE_MASK,
+			  val);
+
+	return 0;
+}
+
+static const struct phy_ops da8xx_usb20_phy_ops = {
+	.power_on	= da8xx_usb20_phy_power_on,
+	.power_off	= da8xx_usb20_phy_power_off,
+	.set_mode	= da8xx_usb20_phy_set_mode,
+	.owner		= THIS_MODULE,
+};
+
+static struct phy *da8xx_usb_phy_of_xlate(struct device *dev,
+					 struct of_phandle_args *args)
+{
+	struct da8xx_usb_phy *d_phy = dev_get_drvdata(dev);
+
+	if (!d_phy)
+		return ERR_PTR(-ENODEV);
+
+	switch (args->args[0]) {
+	case 0:
+		return d_phy->usb20_phy;
+	case 1:
+		return d_phy->usb11_phy;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+}
+
+static int da8xx_usb_phy_probe(struct platform_device *pdev)
+{
+	struct device		*dev = &pdev->dev;
+	struct device_node	*node = dev->of_node;
+	struct da8xx_usb_phy	*d_phy;
+
+	d_phy = devm_kzalloc(dev, sizeof(*d_phy), GFP_KERNEL);
+	if (!d_phy)
+		return -ENOMEM;
+
+	if (node)
+		d_phy->regmap = syscon_regmap_lookup_by_compatible(
+							"ti,da830-cfgchip");
+	else
+		d_phy->regmap = syscon_regmap_lookup_by_pdevname("syscon.0");
+	if (IS_ERR(d_phy->regmap)) {
+		dev_err(dev, "Failed to get syscon\n");
+		return PTR_ERR(d_phy->regmap);
+	}
+
+	d_phy->usb11_clk = devm_clk_get(dev, "usb11_phy");
+	if (IS_ERR(d_phy->usb11_clk)) {
+		dev_err(dev, "Failed to get usb11_phy clock\n");
+		return PTR_ERR(d_phy->usb11_clk);
+	}
+
+	d_phy->usb20_clk = devm_clk_get(dev, "usb20_phy");
+	if (IS_ERR(d_phy->usb20_clk)) {
+		dev_err(dev, "Failed to get usb20_phy clock\n");
+		return PTR_ERR(d_phy->usb20_clk);
+	}
+
+	d_phy->usb11_phy = devm_phy_create(dev, node, &da8xx_usb11_phy_ops);
+	if (IS_ERR(d_phy->usb11_phy)) {
+		dev_err(dev, "Failed to create usb11 phy\n");
+		return PTR_ERR(d_phy->usb11_phy);
+	}
+
+	d_phy->usb20_phy = devm_phy_create(dev, node, &da8xx_usb20_phy_ops);
+	if (IS_ERR(d_phy->usb20_phy)) {
+		dev_err(dev, "Failed to create usb20 phy\n");
+		return PTR_ERR(d_phy->usb20_phy);
+	}
+
+	platform_set_drvdata(pdev, d_phy);
+	phy_set_drvdata(d_phy->usb11_phy, d_phy);
+	phy_set_drvdata(d_phy->usb20_phy, d_phy);
+
+	if (node) {
+		d_phy->phy_provider = devm_of_phy_provider_register(dev,
+							da8xx_usb_phy_of_xlate);
+		if (IS_ERR(d_phy->phy_provider)) {
+			dev_err(dev, "Failed to create phy provider\n");
+			return PTR_ERR(d_phy->phy_provider);
+		}
+	} else {
+		int ret;
+
+		ret = phy_create_lookup(d_phy->usb11_phy, "usb-phy", "ohci.0");
+		if (ret)
+			dev_warn(dev, "Failed to create usb11 phy lookup\n");
+		ret = phy_create_lookup(d_phy->usb20_phy, "usb-phy",
+					"musb-da8xx");
+		if (ret)
+			dev_warn(dev, "Failed to create usb20 phy lookup\n");
+	}
+
+	return 0;
+}
+
+static int da8xx_usb_phy_remove(struct platform_device *pdev)
+{
+	struct da8xx_usb_phy *d_phy = platform_get_drvdata(pdev);
+
+	if (!pdev->dev.of_node) {
+		phy_remove_lookup(d_phy->usb20_phy, "usb-phy", "musb-da8xx");
+		phy_remove_lookup(d_phy->usb11_phy, "usb-phy", "ohci.0");
+	}
+
+	return 0;
+}
+
+static const struct of_device_id da8xx_usb_phy_ids[] = {
+	{ .compatible = "ti,da830-usb-phy" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, da8xx_usb_phy_ids);
+
+static struct platform_driver da8xx_usb_phy_driver = {
+	.probe	= da8xx_usb_phy_probe,
+	.remove	= da8xx_usb_phy_remove,
+	.driver	= {
+		.name	= "da8xx-usb-phy",
+		.of_match_table = da8xx_usb_phy_ids,
+	},
+};
+
+module_platform_driver(da8xx_usb_phy_driver);
+
+MODULE_ALIAS("platform:da8xx-usb-phy");
+MODULE_AUTHOR("David Lechner <david@lechnology.com>");
+MODULE_DESCRIPTION("TI DA8xx USB PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c
index 3acd2a1808df..213e2e15339c 100644
--- a/drivers/phy/phy-miphy28lp.c
+++ b/drivers/phy/phy-miphy28lp.c
@@ -1143,7 +1143,8 @@ static int miphy28lp_probe_resets(struct device_node *node,
 	struct miphy28lp_dev *miphy_dev = miphy_phy->phydev;
 	int err;
 
-	miphy_phy->miphy_rst = of_reset_control_get(node, "miphy-sw-rst");
+	miphy_phy->miphy_rst =
+		of_reset_control_get_shared(node, "miphy-sw-rst");
 
 	if (IS_ERR(miphy_phy->miphy_rst)) {
 		dev_err(miphy_dev->dev,
diff --git a/drivers/phy/phy-qcom-ufs-qmp-14nm.c b/drivers/phy/phy-qcom-ufs-qmp-14nm.c
index 56631e77c11d..6ee51490f786 100644
--- a/drivers/phy/phy-qcom-ufs-qmp-14nm.c
+++ b/drivers/phy/phy-qcom-ufs-qmp-14nm.c
@@ -140,7 +140,6 @@ static int ufs_qcom_phy_qmp_14nm_probe(struct platform_device *pdev)
 
 	phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL);
 	if (!phy) {
-		dev_err(dev, "%s: failed to allocate phy\n", __func__);
 		err = -ENOMEM;
 		goto out;
 	}
diff --git a/drivers/phy/phy-qcom-ufs-qmp-20nm.c b/drivers/phy/phy-qcom-ufs-qmp-20nm.c
index b16ea77d07b9..770087ab05e2 100644
--- a/drivers/phy/phy-qcom-ufs-qmp-20nm.c
+++ b/drivers/phy/phy-qcom-ufs-qmp-20nm.c
@@ -196,7 +196,6 @@ static int ufs_qcom_phy_qmp_20nm_probe(struct platform_device *pdev)
 
 	phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL);
 	if (!phy) {
-		dev_err(dev, "%s: failed to allocate phy\n", __func__);
 		err = -ENOMEM;
 		goto out;
 	}
diff --git a/drivers/phy/phy-rcar-gen3-usb2.c b/drivers/phy/phy-rcar-gen3-usb2.c
index 76bb88f0700a..31156c9c4707 100644
--- a/drivers/phy/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/phy-rcar-gen3-usb2.c
@@ -21,6 +21,7 @@
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
+#include <linux/workqueue.h>
 
 /******* USB2.0 Host registers (original offset is +0x200) *******/
 #define USB2_INT_ENABLE		0x000
@@ -81,9 +82,25 @@ struct rcar_gen3_chan {
 	struct extcon_dev *extcon;
 	struct phy *phy;
 	struct regulator *vbus;
+	struct work_struct work;
+	bool extcon_host;
 	bool has_otg;
 };
 
+static void rcar_gen3_phy_usb2_work(struct work_struct *work)
+{
+	struct rcar_gen3_chan *ch = container_of(work, struct rcar_gen3_chan,
+						 work);
+
+	if (ch->extcon_host) {
+		extcon_set_cable_state_(ch->extcon, EXTCON_USB_HOST, true);
+		extcon_set_cable_state_(ch->extcon, EXTCON_USB, false);
+	} else {
+		extcon_set_cable_state_(ch->extcon, EXTCON_USB_HOST, false);
+		extcon_set_cable_state_(ch->extcon, EXTCON_USB, true);
+	}
+}
+
 static void rcar_gen3_set_host_mode(struct rcar_gen3_chan *ch, int host)
 {
 	void __iomem *usb2_base = ch->base;
@@ -130,8 +147,8 @@ static void rcar_gen3_init_for_host(struct rcar_gen3_chan *ch)
 	rcar_gen3_set_host_mode(ch, 1);
 	rcar_gen3_enable_vbus_ctrl(ch, 1);
 
-	extcon_set_cable_state_(ch->extcon, EXTCON_USB_HOST, true);
-	extcon_set_cable_state_(ch->extcon, EXTCON_USB, false);
+	ch->extcon_host = true;
+	schedule_work(&ch->work);
 }
 
 static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch)
@@ -140,14 +157,8 @@ static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch)
 	rcar_gen3_set_host_mode(ch, 0);
 	rcar_gen3_enable_vbus_ctrl(ch, 0);
 
-	extcon_set_cable_state_(ch->extcon, EXTCON_USB_HOST, false);
-	extcon_set_cable_state_(ch->extcon, EXTCON_USB, true);
-}
-
-static bool rcar_gen3_check_vbus(struct rcar_gen3_chan *ch)
-{
-	return !!(readl(ch->base + USB2_ADPCTRL) &
-		  USB2_ADPCTRL_OTGSESSVLD);
+	ch->extcon_host = false;
+	schedule_work(&ch->work);
 }
 
 static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
@@ -157,13 +168,7 @@ static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
 
 static void rcar_gen3_device_recognition(struct rcar_gen3_chan *ch)
 {
-	bool is_host = true;
-
-	/* B-device? */
-	if (rcar_gen3_check_id(ch) && rcar_gen3_check_vbus(ch))
-		is_host = false;
-
-	if (is_host)
+	if (!rcar_gen3_check_id(ch))
 		rcar_gen3_init_for_host(ch);
 	else
 		rcar_gen3_init_for_peri(ch);
@@ -313,6 +318,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
 	if (irq >= 0) {
 		int ret;
 
+		INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work);
 		irq = devm_request_irq(dev, irq, rcar_gen3_phy_usb2_irq,
 				       IRQF_SHARED, dev_name(dev), channel);
 		if (irq < 0)
diff --git a/drivers/phy/phy-rockchip-dp.c b/drivers/phy/phy-rockchip-dp.c
index 793ecb6d87bc..8b267a746576 100644
--- a/drivers/phy/phy-rockchip-dp.c
+++ b/drivers/phy/phy-rockchip-dp.c
@@ -90,7 +90,7 @@ static int rockchip_dp_phy_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL);
-	if (IS_ERR(dp))
+	if (!dp)
 		return -ENOMEM;
 
 	dp->dev = dev;
diff --git a/drivers/phy/phy-rockchip-usb.c b/drivers/phy/phy-rockchip-usb.c
index d60b149cff0f..2a7381f4fe4c 100644
--- a/drivers/phy/phy-rockchip-usb.c
+++ b/drivers/phy/phy-rockchip-usb.c
@@ -236,9 +236,10 @@ static int rockchip_usb_phy_init(struct rockchip_usb_phy_base *base,
 			goto err_clk_prov;
 	}
 
-	err = devm_add_action(base->dev, rockchip_usb_phy_action, rk_phy);
+	err = devm_add_action_or_reset(base->dev, rockchip_usb_phy_action,
+				       rk_phy);
 	if (err)
-		goto err_devm_action;
+		return err;
 
 	rk_phy->phy = devm_phy_create(base->dev, child, &ops);
 	if (IS_ERR(rk_phy->phy)) {
@@ -256,9 +257,6 @@ static int rockchip_usb_phy_init(struct rockchip_usb_phy_base *base,
 	else
 		return rockchip_usb_phy_power(rk_phy, 1);
 
-err_devm_action:
-	if (!rk_phy->uart_enabled)
-		of_clk_del_provider(child);
 err_clk_prov:
 	if (!rk_phy->uart_enabled)
 		clk_unregister(rk_phy->clk480m);
@@ -397,8 +395,13 @@ static int rockchip_usb_phy_probe(struct platform_device *pdev)
 	phy_base->pdata = match->data;
 
 	phy_base->dev = dev;
-	phy_base->reg_base = syscon_regmap_lookup_by_phandle(dev->of_node,
-							     "rockchip,grf");
+	phy_base->reg_base = ERR_PTR(-ENODEV);
+	if (dev->parent && dev->parent->of_node)
+		phy_base->reg_base = syscon_node_to_regmap(
+						dev->parent->of_node);
+	if (IS_ERR(phy_base->reg_base))
+		phy_base->reg_base = syscon_regmap_lookup_by_phandle(
+						dev->of_node, "rockchip,grf");
 	if (IS_ERR(phy_base->reg_base)) {
 		dev_err(&pdev->dev, "Missing rockchip,grf property\n");
 		return PTR_ERR(phy_base->reg_base);
@@ -463,7 +466,11 @@ static int __init rockchip_init_usb_uart(void)
 		return -ENOTSUPP;
 	}
 
-	grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+	grf = ERR_PTR(-ENODEV);
+	if (np->parent)
+		grf = syscon_node_to_regmap(np->parent);
+	if (IS_ERR(grf))
+		grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
 	if (IS_ERR(grf)) {
 		pr_err("%s: Missing rockchip,grf property, %lu\n",
 		       __func__, PTR_ERR(grf));
diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c
index 1d5ae5f8ef69..b1f44ab669fb 100644
--- a/drivers/phy/phy-stih407-usb.c
+++ b/drivers/phy/phy-stih407-usb.c
@@ -105,13 +105,13 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev)
 	phy_dev->dev = dev;
 	dev_set_drvdata(dev, phy_dev);
 
-	phy_dev->rstc = devm_reset_control_get(dev, "global");
+	phy_dev->rstc = devm_reset_control_get_shared(dev, "global");
 	if (IS_ERR(phy_dev->rstc)) {
 		dev_err(dev, "failed to ctrl picoPHY reset\n");
 		return PTR_ERR(phy_dev->rstc);
 	}
 
-	phy_dev->rstport = devm_reset_control_get(dev, "port");
+	phy_dev->rstport = devm_reset_control_get_exclusive(dev, "port");
 	if (IS_ERR(phy_dev->rstport)) {
 		dev_err(dev, "failed to ctrl picoPHY reset\n");
 		return PTR_ERR(phy_dev->rstport);
diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index bae54f7a1f48..0a45bc6088ae 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -94,6 +94,7 @@
 
 enum sun4i_usb_phy_type {
 	sun4i_a10_phy,
+	sun6i_a31_phy,
 	sun8i_a33_phy,
 	sun8i_h3_phy,
 };
@@ -122,7 +123,6 @@ struct sun4i_usb_phy_data {
 	/* phy0 / otg related variables */
 	struct extcon_dev *extcon;
 	bool phy0_init;
-	bool phy0_poll;
 	struct gpio_desc *id_det_gpio;
 	struct gpio_desc *vbus_det_gpio;
 	struct power_supply *vbus_power_supply;
@@ -175,7 +175,7 @@ static void sun4i_usb_phy_write(struct sun4i_usb_phy *phy, u32 addr, u32 data,
 {
 	struct sun4i_usb_phy_data *phy_data = to_sun4i_usb_phy_data(phy);
 	u32 temp, usbc_bit = BIT(phy->index * 2);
-	void *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
+	void __iomem *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
 	int i;
 
 	mutex_lock(&phy_data->mutex);
@@ -343,6 +343,24 @@ static bool sun4i_usb_phy0_have_vbus_det(struct sun4i_usb_phy_data *data)
 	return data->vbus_det_gpio || data->vbus_power_supply;
 }
 
+static bool sun4i_usb_phy0_poll(struct sun4i_usb_phy_data *data)
+{
+	if ((data->id_det_gpio && data->id_det_irq <= 0) ||
+	    (data->vbus_det_gpio && data->vbus_det_irq <= 0))
+		return true;
+
+	/*
+	 * The A31 companion pmic (axp221) does not generate vbus change
+	 * interrupts when the board is driving vbus, so we must poll
+	 * when using the pmic for vbus-det _and_ we're driving vbus.
+	 */
+	if (data->cfg->type == sun6i_a31_phy &&
+	    data->vbus_power_supply && data->phys[0].regulator_on)
+		return true;
+
+	return false;
+}
+
 static int sun4i_usb_phy_power_on(struct phy *_phy)
 {
 	struct sun4i_usb_phy *phy = phy_get_drvdata(_phy);
@@ -364,7 +382,7 @@ static int sun4i_usb_phy_power_on(struct phy *_phy)
 	phy->regulator_on = true;
 
 	/* We must report Vbus high within OTG_TIME_A_WAIT_VRISE msec. */
-	if (phy->index == 0 && data->vbus_det_gpio && data->phy0_poll)
+	if (phy->index == 0 && sun4i_usb_phy0_poll(data))
 		mod_delayed_work(system_wq, &data->detect, DEBOUNCE_TIME);
 
 	return 0;
@@ -385,7 +403,7 @@ static int sun4i_usb_phy_power_off(struct phy *_phy)
 	 * phy0 vbus typically slowly discharges, sometimes this causes the
 	 * Vbus gpio to not trigger an edge irq on Vbus off, so force a rescan.
 	 */
-	if (phy->index == 0 && data->vbus_det_gpio && !data->phy0_poll)
+	if (phy->index == 0 && !sun4i_usb_phy0_poll(data))
 		mod_delayed_work(system_wq, &data->detect, POLL_TIME);
 
 	return 0;
@@ -468,7 +486,7 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work)
 	if (vbus_notify)
 		extcon_set_cable_state_(data->extcon, EXTCON_USB, vbus_det);
 
-	if (data->phy0_poll)
+	if (sun4i_usb_phy0_poll(data))
 		queue_delayed_work(system_wq, &data->detect, POLL_TIME);
 }
 
@@ -514,9 +532,9 @@ static int sun4i_usb_phy_remove(struct platform_device *pdev)
 
 	if (data->vbus_power_nb_registered)
 		power_supply_unreg_notifier(&data->vbus_power_nb);
-	if (data->id_det_irq >= 0)
+	if (data->id_det_irq > 0)
 		devm_free_irq(dev, data->id_det_irq, data);
-	if (data->vbus_det_irq >= 0)
+	if (data->vbus_det_irq > 0)
 		devm_free_irq(dev, data->vbus_det_irq, data);
 
 	cancel_delayed_work_sync(&data->detect);
@@ -644,12 +662,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 	}
 
 	data->id_det_irq = gpiod_to_irq(data->id_det_gpio);
-	data->vbus_det_irq = gpiod_to_irq(data->vbus_det_gpio);
-	if ((data->id_det_gpio && data->id_det_irq < 0) ||
-	    (data->vbus_det_gpio && data->vbus_det_irq < 0))
-		data->phy0_poll = true;
-
-	if (data->id_det_irq >= 0) {
+	if (data->id_det_irq > 0) {
 		ret = devm_request_irq(dev, data->id_det_irq,
 				sun4i_usb_phy0_id_vbus_det_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
@@ -660,7 +673,8 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (data->vbus_det_irq >= 0) {
+	data->vbus_det_irq = gpiod_to_irq(data->vbus_det_gpio);
+	if (data->vbus_det_irq > 0) {
 		ret = devm_request_irq(dev, data->vbus_det_irq,
 				sun4i_usb_phy0_id_vbus_det_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
@@ -711,7 +725,7 @@ static const struct sun4i_usb_phy_cfg sun5i_a13_cfg = {
 
 static const struct sun4i_usb_phy_cfg sun6i_a31_cfg = {
 	.num_phys = 3,
-	.type = sun4i_a10_phy,
+	.type = sun6i_a31_phy,
 	.disc_thresh = 3,
 	.phyctl_offset = REG_PHYCTL_A10,
 	.dedicated_clocks = true,
diff --git a/drivers/phy/phy-xgene.c b/drivers/phy/phy-xgene.c
index 385362e5b2f6..ae266e0c8368 100644
--- a/drivers/phy/phy-xgene.c
+++ b/drivers/phy/phy-xgene.c
@@ -518,7 +518,7 @@ enum clk_type_t {
 	CLK_INT_SING = 2,	/* Internal single ended */
 };
 
-enum phy_mode {
+enum xgene_phy_mode {
 	MODE_SATA	= 0,	/* List them for simple reference */
 	MODE_SGMII	= 1,
 	MODE_PCIE	= 2,
@@ -542,7 +542,7 @@ struct xgene_sata_override_param {
 struct xgene_phy_ctx {
 	struct device *dev;
 	struct phy *phy;
-	enum phy_mode mode;		/* Mode of operation */
+	enum xgene_phy_mode mode;		/* Mode of operation */
 	enum clk_type_t clk_type;	/* Input clock selection */
 	void __iomem *sds_base;		/* PHY CSR base addr */
 	struct clk *clk;		/* Optional clock */
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index e4bc1151e04f..42a5c1dddfef 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_PINCTRL_PISTACHIO)	+= pinctrl-pistachio.o
 obj-$(CONFIG_PINCTRL_ROCKCHIP)	+= pinctrl-rockchip.o
 obj-$(CONFIG_PINCTRL_SINGLE)	+= pinctrl-single.o
 obj-$(CONFIG_PINCTRL_SIRF)	+= sirf/
-obj-$(CONFIG_PINCTRL_TEGRA)	+= tegra/
+obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
 obj-$(CONFIG_PINCTRL_TZ1090)	+= pinctrl-tz1090.o
 obj-$(CONFIG_PINCTRL_TZ1090_PDC)	+= pinctrl-tz1090-pdc.o
 obj-$(CONFIG_PINCTRL_U300)	+= pinctrl-u300.o
diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
index 47ccfcc8a647..eccb47480e1d 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c
@@ -209,9 +209,9 @@ static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
 		pin_reg = &info->pin_regs[pin_id];
 
 		if (pin_reg->mux_reg == -1) {
-			dev_err(ipctl->dev, "Pin(%s) does not support mux function\n",
+			dev_dbg(ipctl->dev, "Pin(%s) does not support mux function\n",
 				info->pins[pin_id].name);
-			return -EINVAL;
+			continue;
 		}
 
 		if (info->flags & SHARE_MUX_CONF_REG) {
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 677a811b3a6f..7abfd42e8ffd 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -401,9 +401,9 @@ static const struct byt_simple_func_mux byt_score_sata_mux[] = {
 static const unsigned int byt_score_plt_clk0_pins[] = { 96 };
 static const unsigned int byt_score_plt_clk1_pins[] = { 97 };
 static const unsigned int byt_score_plt_clk2_pins[] = { 98 };
-static const unsigned int byt_score_plt_clk4_pins[] = { 99 };
-static const unsigned int byt_score_plt_clk5_pins[] = { 100 };
-static const unsigned int byt_score_plt_clk3_pins[] = { 101 };
+static const unsigned int byt_score_plt_clk3_pins[] = { 99 };
+static const unsigned int byt_score_plt_clk4_pins[] = { 100 };
+static const unsigned int byt_score_plt_clk5_pins[] = { 101 };
 static const struct byt_simple_func_mux byt_score_plt_clk_mux[] = {
 	SIMPLE_FUNC("plt_clk", 1),
 };
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index cf9bafa10acf..bfdf720db270 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1580,6 +1580,9 @@ static inline void pcs_irq_set(struct pcs_soc_data *pcs_soc,
 		else
 			mask &= ~soc_mask;
 		pcs->write(mask, pcswi->reg);
+
+		/* flush posted write */
+		mask = pcs->read(pcswi->reg);
 		raw_spin_unlock(&pcs->lock);
 	}
 
diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c
index a13f2b6f6fc0..b9375544dff0 100644
--- a/drivers/pinctrl/pinctrl-xway.c
+++ b/drivers/pinctrl/pinctrl-xway.c
@@ -1724,9 +1724,9 @@ static int pinmux_xway_probe(struct platform_device *pdev)
 	}
 	xway_pctrl_desc.pins = xway_info.pads;
 
-	/* load the gpio chip */
+	/* register the gpio chip */
 	xway_chip.parent = &pdev->dev;
-	ret = gpiochip_add(&xway_chip);
+	ret = devm_gpiochip_add_data(&pdev->dev, &xway_chip, NULL);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to register gpio chip\n");
 		return ret;
@@ -1749,7 +1749,6 @@ static int pinmux_xway_probe(struct platform_device *pdev)
 	/* register with the generic lantiq layer */
 	ret = ltq_pinctrl_register(pdev, &xway_info);
 	if (ret) {
-		gpiochip_remove(&xway_chip);
 		dev_err(&pdev->dev, "Failed to register pinctrl driver\n");
 		return ret;
 	}
diff --git a/drivers/pinctrl/tegra/Makefile b/drivers/pinctrl/tegra/Makefile
index a927379b6794..d9ea2be69cc4 100644
--- a/drivers/pinctrl/tegra/Makefile
+++ b/drivers/pinctrl/tegra/Makefile
@@ -1,4 +1,4 @@
-obj-y					+= pinctrl-tegra.o
+obj-$(CONFIG_PINCTRL_TEGRA)		+= pinctrl-tegra.o
 obj-$(CONFIG_PINCTRL_TEGRA20)		+= pinctrl-tegra20.o
 obj-$(CONFIG_PINCTRL_TEGRA30)		+= pinctrl-tegra30.o
 obj-$(CONFIG_PINCTRL_TEGRA114)		+= pinctrl-tegra114.o
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 6d8ee3b15872..8abd80dbcbed 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -151,13 +151,19 @@ static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
 		goto exit;
 	}
 
+	if (u_cmd.outsize != s_cmd->outsize ||
+	    u_cmd.insize != s_cmd->insize) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
 	s_cmd->command += ec->cmd_offset;
 	ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd);
 	/* Only copy data to userland if data was received. */
 	if (ret < 0)
 		goto exit;
 
-	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + u_cmd.insize))
+	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize))
 		ret = -EFAULT;
 exit:
 	kfree(s_cmd);
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 3ec0025d19e7..81b8dcca8891 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -603,6 +603,8 @@ config ASUS_WIRELESS
 	tristate "Asus Wireless Radio Control Driver"
 	depends on ACPI
 	depends on INPUT
+	select NEW_LEDS
+	select LEDS_CLASS
 	---help---
 	  The Asus Wireless Radio Control handles the airplane mode hotkey
 	  present on some Asus laptops.
@@ -668,6 +670,7 @@ config ACPI_TOSHIBA
 	depends on SERIO_I8042 || SERIO_I8042 = n
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
 	depends on RFKILL || RFKILL = n
+	depends on IIO
 	select INPUT_POLLDEV
 	select INPUT_SPARSEKMAP
 	---help---
@@ -770,6 +773,18 @@ config INTEL_HID_EVENT
 	  To compile this driver as a module, choose M here: the module will
 	  be called intel_hid.
 
+config INTEL_VBTN
+	tristate "INTEL VIRTUAL BUTTON"
+	depends on ACPI
+	depends on INPUT
+	select INPUT_SPARSEKMAP
+	help
+	  This driver provides support for the Intel Virtual Button interface.
+	  Some laptops require this driver for power button support.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called intel_vbtn.
+
 config INTEL_SCU_IPC
 	bool "Intel SCU IPC Support"
 	depends on X86_INTEL_MID
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 9b11b4073e03..2efa86d2a1a7 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_TOSHIBA_BT_RFKILL)	+= toshiba_bluetooth.o
 obj-$(CONFIG_TOSHIBA_HAPS)	+= toshiba_haps.o
 obj-$(CONFIG_TOSHIBA_WMI)	+= toshiba-wmi.o
 obj-$(CONFIG_INTEL_HID_EVENT)	+= intel-hid.o
+obj-$(CONFIG_INTEL_VBTN)	+= intel-vbtn.o
 obj-$(CONFIG_INTEL_SCU_IPC)	+= intel_scu_ipc.o
 obj-$(CONFIG_INTEL_SCU_IPC_UTIL) += intel_scu_ipcutil.o
 obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 091ca7ada8fc..adecc1c555f0 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -78,6 +78,15 @@ static struct quirk_entry quirk_asus_x200ca = {
 	.wapf = 2,
 };
 
+static struct quirk_entry quirk_no_rfkill = {
+	.no_rfkill = true,
+};
+
+static struct quirk_entry quirk_no_rfkill_wapf4 = {
+	.wapf = 4,
+	.no_rfkill = true,
+};
+
 static int dmi_matched(const struct dmi_system_id *dmi)
 {
 	quirks = dmi->driver_data;
@@ -133,7 +142,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"),
 		},
-		.driver_data = &quirk_asus_wapf4,
+		.driver_data = &quirk_no_rfkill_wapf4,
 	},
 	{
 		.callback = dmi_matched,
@@ -142,7 +151,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"),
 		},
-		.driver_data = &quirk_asus_wapf4,
+		.driver_data = &quirk_no_rfkill_wapf4,
 	},
 	{
 		.callback = dmi_matched,
@@ -306,6 +315,42 @@ static const struct dmi_system_id asus_quirks[] = {
 		},
 		.driver_data = &quirk_asus_x200ca,
 	},
+	{
+		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. X555UB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X555UB"),
+		},
+		.driver_data = &quirk_no_rfkill,
+	},
+	{
+		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. N552VW",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "N552VW"),
+		},
+		.driver_data = &quirk_no_rfkill,
+	},
+	{
+		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. U303LB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "U303LB"),
+		},
+		.driver_data = &quirk_no_rfkill,
+	},
+	{
+		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. Z550MA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Z550MA"),
+		},
+		.driver_data = &quirk_no_rfkill,
+	},
 	{},
 };
 
@@ -356,6 +401,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
 	{ KE_KEY, 0x67, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV */
 	{ KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } },
 	{ KE_IGNORE, 0x6E, },  /* Low Battery notification */
+	{ KE_KEY, 0x7a, { KEY_ALS_TOGGLE } }, /* Ambient Light Sensor Toggle */
 	{ KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */
 	{ KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */
 	{ KE_KEY, 0x82, { KEY_CAMERA } },
diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c
index 9ec721e26532..9f31bc1a47d0 100644
--- a/drivers/platform/x86/asus-wireless.c
+++ b/drivers/platform/x86/asus-wireless.c
@@ -15,11 +15,78 @@
 #include <linux/acpi.h>
 #include <linux/input.h>
 #include <linux/pci_ids.h>
+#include <linux/leds.h>
+
+#define ASUS_WIRELESS_LED_STATUS 0x2
+#define ASUS_WIRELESS_LED_OFF 0x4
+#define ASUS_WIRELESS_LED_ON 0x5
 
 struct asus_wireless_data {
 	struct input_dev *idev;
+	struct acpi_device *adev;
+	struct workqueue_struct *wq;
+	struct work_struct led_work;
+	struct led_classdev led;
+	int led_state;
 };
 
+static u64 asus_wireless_method(acpi_handle handle, const char *method,
+				int param)
+{
+	struct acpi_object_list p;
+	union acpi_object obj;
+	acpi_status s;
+	u64 ret;
+
+	acpi_handle_debug(handle, "Evaluating method %s, parameter %#x\n",
+			  method, param);
+	obj.type = ACPI_TYPE_INTEGER;
+	obj.integer.value = param;
+	p.count = 1;
+	p.pointer = &obj;
+
+	s = acpi_evaluate_integer(handle, (acpi_string) method, &p, &ret);
+	if (ACPI_FAILURE(s))
+		acpi_handle_err(handle,
+				"Failed to eval method %s, param %#x (%d)\n",
+				method, param, s);
+	acpi_handle_debug(handle, "%s returned %#x\n", method, (uint) ret);
+	return ret;
+}
+
+static enum led_brightness led_state_get(struct led_classdev *led)
+{
+	struct asus_wireless_data *data;
+	int s;
+
+	data = container_of(led, struct asus_wireless_data, led);
+	s = asus_wireless_method(acpi_device_handle(data->adev), "HSWC",
+				 ASUS_WIRELESS_LED_STATUS);
+	if (s == ASUS_WIRELESS_LED_ON)
+		return LED_FULL;
+	return LED_OFF;
+}
+
+static void led_state_update(struct work_struct *work)
+{
+	struct asus_wireless_data *data;
+
+	data = container_of(work, struct asus_wireless_data, led_work);
+	asus_wireless_method(acpi_device_handle(data->adev), "HSWC",
+			     data->led_state);
+}
+
+static void led_state_set(struct led_classdev *led,
+				  enum led_brightness value)
+{
+	struct asus_wireless_data *data;
+
+	data = container_of(led, struct asus_wireless_data, led);
+	data->led_state = value == LED_OFF ? ASUS_WIRELESS_LED_OFF :
+					     ASUS_WIRELESS_LED_ON;
+	queue_work(data->wq, &data->led_work);
+}
+
 static void asus_wireless_notify(struct acpi_device *adev, u32 event)
 {
 	struct asus_wireless_data *data = acpi_driver_data(adev);
@@ -37,6 +104,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event)
 static int asus_wireless_add(struct acpi_device *adev)
 {
 	struct asus_wireless_data *data;
+	int err;
 
 	data = devm_kzalloc(&adev->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -52,11 +120,32 @@ static int asus_wireless_add(struct acpi_device *adev)
 	data->idev->id.vendor = PCI_VENDOR_ID_ASUSTEK;
 	set_bit(EV_KEY, data->idev->evbit);
 	set_bit(KEY_RFKILL, data->idev->keybit);
-	return input_register_device(data->idev);
+	err = input_register_device(data->idev);
+	if (err)
+		return err;
+
+	data->adev = adev;
+	data->wq = create_singlethread_workqueue("asus_wireless_workqueue");
+	if (!data->wq)
+		return -ENOMEM;
+	INIT_WORK(&data->led_work, led_state_update);
+	data->led.name = "asus-wireless::airplane";
+	data->led.brightness_set = led_state_set;
+	data->led.brightness_get = led_state_get;
+	data->led.flags = LED_CORE_SUSPENDRESUME;
+	data->led.max_brightness = 1;
+	err = devm_led_classdev_register(&adev->dev, &data->led);
+	if (err)
+		destroy_workqueue(data->wq);
+	return err;
 }
 
 static int asus_wireless_remove(struct acpi_device *adev)
 {
+	struct asus_wireless_data *data = acpi_driver_data(adev);
+
+	if (data->wq)
+		destroy_workqueue(data->wq);
 	return 0;
 }
 
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index a26dca3640ea..7c093a0b78bb 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -2069,9 +2069,11 @@ static int asus_wmi_add(struct platform_device *pdev)
 	if (err)
 		goto fail_leds;
 
-	err = asus_wmi_rfkill_init(asus);
-	if (err)
-		goto fail_rfkill;
+	if (!asus->driver->quirks->no_rfkill) {
+		err = asus_wmi_rfkill_init(asus);
+		if (err)
+			goto fail_rfkill;
+	}
 
 	/* Some Asus desktop boards export an acpi-video backlight interface,
 	   stop this from showing up */
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index 4da4c8bafe70..5de1df510ebd 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -38,6 +38,7 @@ struct key_entry;
 struct asus_wmi;
 
 struct quirk_entry {
+	bool no_rfkill;
 	bool hotplug_wireless;
 	bool scalar_panel_brightness;
 	bool store_backlight_power;
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 15c6f1191aec..d2bc092defd7 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -80,66 +80,115 @@ static const struct dmi_system_id dell_wmi_smbios_list[] __initconst = {
 };
 
 /*
+ * Keymap for WMI events of type 0x0000
+ *
  * Certain keys are flagged as KE_IGNORE. All of these are either
  * notifications (rather than requests for change) or are also sent
  * via the keyboard controller so should not be sent again.
  */
-
-static const struct key_entry dell_wmi_legacy_keymap[] __initconst = {
+static const struct key_entry dell_wmi_keymap_type_0000[] __initconst = {
 	{ KE_IGNORE, 0x003a, { KEY_CAPSLOCK } },
 
-	{ KE_KEY, 0xe045, { KEY_PROG1 } },
-	{ KE_KEY, 0xe009, { KEY_EJECTCD } },
-
-	/* These also contain the brightness level at offset 6 */
-	{ KE_KEY, 0xe006, { KEY_BRIGHTNESSUP } },
-	{ KE_KEY, 0xe005, { KEY_BRIGHTNESSDOWN } },
+	/* Key code is followed by brightness level */
+	{ KE_KEY,    0xe005, { KEY_BRIGHTNESSDOWN } },
+	{ KE_KEY,    0xe006, { KEY_BRIGHTNESSUP } },
 
 	/* Battery health status button */
-	{ KE_KEY, 0xe007, { KEY_BATTERY } },
+	{ KE_KEY,    0xe007, { KEY_BATTERY } },
 
-	/* Radio devices state change */
+	/* Radio devices state change, key code is followed by other values */
 	{ KE_IGNORE, 0xe008, { KEY_RFKILL } },
 
-	/* The next device is at offset 6, the active devices are at
-	   offset 8 and the attached devices at offset 10 */
-	{ KE_KEY, 0xe00b, { KEY_SWITCHVIDEOMODE } },
+	{ KE_KEY,    0xe009, { KEY_EJECTCD } },
 
+	/* Key code is followed by: next, active and attached devices */
+	{ KE_KEY,    0xe00b, { KEY_SWITCHVIDEOMODE } },
+
+	/* Key code is followed by keyboard illumination level */
 	{ KE_IGNORE, 0xe00c, { KEY_KBDILLUMTOGGLE } },
 
 	/* BIOS error detected */
 	{ KE_IGNORE, 0xe00d, { KEY_RESERVED } },
 
+	/* Unknown, defined in ACPI DSDT */
+	/* { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, */
+
 	/* Wifi Catcher */
-	{ KE_KEY, 0xe011, {KEY_PROG2 } },
+	{ KE_KEY,    0xe011, { KEY_PROG2 } },
 
 	/* Ambient light sensor toggle */
 	{ KE_IGNORE, 0xe013, { KEY_RESERVED } },
 
 	{ KE_IGNORE, 0xe020, { KEY_MUTE } },
 
+	/* Unknown, defined in ACPI DSDT */
+	/* { KE_IGNORE, 0xe023, { KEY_RESERVED } }, */
+
+	/* Untested, Dell Instant Launch key on Inspiron 7520 */
+	/* { KE_IGNORE, 0xe024, { KEY_RESERVED } }, */
+
 	/* Dell Instant Launch key */
-	{ KE_KEY, 0xe025, { KEY_PROG4 } },
-	{ KE_KEY, 0xe029, { KEY_PROG4 } },
+	{ KE_KEY,    0xe025, { KEY_PROG4 } },
 
 	/* Audio panel key */
 	{ KE_IGNORE, 0xe026, { KEY_RESERVED } },
 
+	/* LCD Display On/Off Control key */
+	{ KE_KEY,    0xe027, { KEY_DISPLAYTOGGLE } },
+
+	/* Untested, Multimedia key on Dell Vostro 3560 */
+	/* { KE_IGNORE, 0xe028, { KEY_RESERVED } }, */
+
+	/* Dell Instant Launch key */
+	{ KE_KEY,    0xe029, { KEY_PROG4 } },
+
+	/* Untested, Windows Mobility Center button on Inspiron 7520 */
+	/* { KE_IGNORE, 0xe02a, { KEY_RESERVED } }, */
+
+	/* Unknown, defined in ACPI DSDT */
+	/* { KE_IGNORE, 0xe02b, { KEY_RESERVED } }, */
+
+	/* Untested, Dell Audio With Preset Switch button on Inspiron 7520 */
+	/* { KE_IGNORE, 0xe02c, { KEY_RESERVED } }, */
+
 	{ KE_IGNORE, 0xe02e, { KEY_VOLUMEDOWN } },
 	{ KE_IGNORE, 0xe030, { KEY_VOLUMEUP } },
 	{ KE_IGNORE, 0xe033, { KEY_KBDILLUMUP } },
 	{ KE_IGNORE, 0xe034, { KEY_KBDILLUMDOWN } },
 	{ KE_IGNORE, 0xe03a, { KEY_CAPSLOCK } },
+
+	/* NIC Link is Up */
+	{ KE_IGNORE, 0xe043, { KEY_RESERVED } },
+
+	/* NIC Link is Down */
+	{ KE_IGNORE, 0xe044, { KEY_RESERVED } },
+
+	/*
+	 * This entry is very suspicious!
+	 * Originally Matthew Garrett created this dell-wmi driver specially for
+	 * "button with a picture of a battery" which has event code 0xe045.
+	 * Later Mario Limonciello from Dell told us that event code 0xe045 is
+	 * reported by Num Lock and should be ignored because key is send also
+	 * by keyboard controller.
+	 * So for now we will ignore this event to prevent potential double
+	 * Num Lock key press.
+	 */
 	{ KE_IGNORE, 0xe045, { KEY_NUMLOCK } },
+
+	/* Scroll lock and also going to tablet mode on portable devices */
 	{ KE_IGNORE, 0xe046, { KEY_SCROLLLOCK } },
+
+	/* Untested, going from tablet mode on portable devices */
+	/* { KE_IGNORE, 0xe047, { KEY_RESERVED } }, */
+
+	/* Dell Support Center key */
+	{ KE_IGNORE, 0xe06e, { KEY_RESERVED } },
+
 	{ KE_IGNORE, 0xe0f7, { KEY_MUTE } },
 	{ KE_IGNORE, 0xe0f8, { KEY_VOLUMEDOWN } },
 	{ KE_IGNORE, 0xe0f9, { KEY_VOLUMEUP } },
-	{ KE_END, 0 }
 };
 
-static bool dell_new_hk_type;
-
 struct dell_bios_keymap_entry {
 	u16 scancode;
 	u16 keycode;
@@ -153,6 +202,7 @@ struct dell_bios_hotkey_table {
 
 struct dell_dmi_results {
 	int err;
+	int keymap_size;
 	struct key_entry *keymap;
 };
 
@@ -201,10 +251,12 @@ static const u16 bios_to_linux_keycode[256] __initconst = {
 };
 
 /*
+ * Keymap for WMI events of type 0x0010
+ *
  * These are applied if the 0xB2 DMI hotkey table is present and doesn't
  * override them.
  */
-static const struct key_entry dell_wmi_extra_keymap[] __initconst = {
+static const struct key_entry dell_wmi_keymap_type_0010[] __initconst = {
 	/* Fn-lock */
 	{ KE_IGNORE, 0x151, { KEY_RESERVED } },
 
@@ -224,21 +276,39 @@ static const struct key_entry dell_wmi_extra_keymap[] __initconst = {
 	{ KE_IGNORE, 0x155, { KEY_RESERVED } },
 };
 
+/*
+ * Keymap for WMI events of type 0x0011
+ */
+static const struct key_entry dell_wmi_keymap_type_0011[] __initconst = {
+	/* Battery unplugged */
+	{ KE_IGNORE, 0xfff0, { KEY_RESERVED } },
+
+	/* Battery inserted */
+	{ KE_IGNORE, 0xfff1, { KEY_RESERVED } },
+
+	/* Keyboard backlight level changed */
+	{ KE_IGNORE, 0x01e1, { KEY_RESERVED } },
+	{ KE_IGNORE, 0x02ea, { KEY_RESERVED } },
+	{ KE_IGNORE, 0x02eb, { KEY_RESERVED } },
+	{ KE_IGNORE, 0x02ec, { KEY_RESERVED } },
+	{ KE_IGNORE, 0x02f6, { KEY_RESERVED } },
+};
+
 static struct input_dev *dell_wmi_input_dev;
 
-static void dell_wmi_process_key(int reported_key)
+static void dell_wmi_process_key(int type, int code)
 {
 	const struct key_entry *key;
 
 	key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev,
-						reported_key);
+						(type << 16) | code);
 	if (!key) {
-		pr_info("Unknown key with scancode 0x%x pressed\n",
-			reported_key);
+		pr_info("Unknown key with type 0x%04x and code 0x%04x pressed\n",
+			type, code);
 		return;
 	}
 
-	pr_debug("Key %x pressed\n", reported_key);
+	pr_debug("Key with type 0x%04x and code 0x%04x pressed\n", type, code);
 
 	/* Don't report brightness notifications that will also come via ACPI */
 	if ((key->keycode == KEY_BRIGHTNESSUP ||
@@ -246,7 +316,7 @@ static void dell_wmi_process_key(int reported_key)
 	    acpi_video_handles_brightness_key_presses())
 		return;
 
-	if (reported_key == 0xe025 && !wmi_requires_smbios_request)
+	if (type == 0x0000 && code == 0xe025 && !wmi_requires_smbios_request)
 		return;
 
 	sparse_keymap_report_entry(dell_wmi_input_dev, key, 1, true);
@@ -284,18 +354,6 @@ static void dell_wmi_notify(u32 value, void *context)
 
 	buffer_entry = (u16 *)obj->buffer.pointer;
 	buffer_size = obj->buffer.length/2;
-
-	if (!dell_new_hk_type) {
-		if (buffer_size >= 3 && buffer_entry[1] == 0x0)
-			dell_wmi_process_key(buffer_entry[2]);
-		else if (buffer_size >= 2)
-			dell_wmi_process_key(buffer_entry[1]);
-		else
-			pr_info("Received unknown WMI event\n");
-		kfree(obj);
-		return;
-	}
-
 	buffer_end = buffer_entry + buffer_size;
 
 	/*
@@ -330,62 +388,18 @@ static void dell_wmi_notify(u32 value, void *context)
 		pr_debug("Process buffer (%*ph)\n", len*2, buffer_entry);
 
 		switch (buffer_entry[1]) {
-		case 0x00:
-			for (i = 2; i < len; ++i) {
-				switch (buffer_entry[i]) {
-				case 0xe043:
-					/* NIC Link is Up */
-					pr_debug("NIC Link is Up\n");
-					break;
-				case 0xe044:
-					/* NIC Link is Down */
-					pr_debug("NIC Link is Down\n");
-					break;
-				case 0xe045:
-					/* Unknown event but defined in DSDT */
-				default:
-					/* Unknown event */
-					pr_info("Unknown WMI event type 0x00: "
-						"0x%x\n", (int)buffer_entry[i]);
-					break;
-				}
-			}
+		case 0x0000: /* One key pressed or event occurred */
+			if (len > 2)
+				dell_wmi_process_key(0x0000, buffer_entry[2]);
+			/* Other entries could contain additional information */
 			break;
-		case 0x10:
-			/* Keys pressed */
+		case 0x0010: /* Sequence of keys pressed */
+		case 0x0011: /* Sequence of events occurred */
 			for (i = 2; i < len; ++i)
-				dell_wmi_process_key(buffer_entry[i]);
-			break;
-		case 0x11:
-			for (i = 2; i < len; ++i) {
-				switch (buffer_entry[i]) {
-				case 0xfff0:
-					/* Battery unplugged */
-					pr_debug("Battery unplugged\n");
-					break;
-				case 0xfff1:
-					/* Battery inserted */
-					pr_debug("Battery inserted\n");
-					break;
-				case 0x01e1:
-				case 0x02ea:
-				case 0x02eb:
-				case 0x02ec:
-				case 0x02f6:
-					/* Keyboard backlight level changed */
-					pr_debug("Keyboard backlight level "
-						 "changed\n");
-					break;
-				default:
-					/* Unknown event */
-					pr_info("Unknown WMI event type 0x11: "
-						"0x%x\n", (int)buffer_entry[i]);
-					break;
-				}
-			}
+				dell_wmi_process_key(buffer_entry[1],
+						     buffer_entry[i]);
 			break;
-		default:
-			/* Unknown event */
+		default: /* Unknown event */
 			pr_info("Unknown WMI event type 0x%x\n",
 				(int)buffer_entry[1]);
 			break;
@@ -410,7 +424,6 @@ static bool have_scancode(u32 scancode, const struct key_entry *keymap, int len)
 }
 
 static void __init handle_dmi_entry(const struct dmi_header *dm,
-
 				    void *opaque)
 
 {
@@ -418,7 +431,6 @@ static void __init handle_dmi_entry(const struct dmi_header *dm,
 	struct dell_bios_hotkey_table *table;
 	int hotkey_num, i, pos = 0;
 	struct key_entry *keymap;
-	int num_bios_keys;
 
 	if (results->err || results->keymap)
 		return;		/* We already found the hotkey table. */
@@ -442,8 +454,7 @@ static void __init handle_dmi_entry(const struct dmi_header *dm,
 		return;
 	}
 
-	keymap = kcalloc(hotkey_num + ARRAY_SIZE(dell_wmi_extra_keymap) + 1,
-			 sizeof(struct key_entry), GFP_KERNEL);
+	keymap = kcalloc(hotkey_num, sizeof(struct key_entry), GFP_KERNEL);
 	if (!keymap) {
 		results->err = -ENOMEM;
 		return;
@@ -480,31 +491,15 @@ static void __init handle_dmi_entry(const struct dmi_header *dm,
 		pos++;
 	}
 
-	num_bios_keys = pos;
-
-	for (i = 0; i < ARRAY_SIZE(dell_wmi_extra_keymap); i++) {
-		const struct key_entry *entry = &dell_wmi_extra_keymap[i];
-
-		/*
-		 * Check if we've already found this scancode.  This takes
-		 * quadratic time, but it doesn't matter unless the list
-		 * of extra keys gets very long.
-		 */
-		if (!have_scancode(entry->code, keymap, num_bios_keys)) {
-			keymap[pos] = *entry;
-			pos++;
-		}
-	}
-
-	keymap[pos].type = KE_END;
-
 	results->keymap = keymap;
+	results->keymap_size = pos;
 }
 
 static int __init dell_wmi_input_setup(void)
 {
 	struct dell_dmi_results dmi_results = {};
-	int err;
+	struct key_entry *keymap;
+	int err, i, pos = 0;
 
 	dell_wmi_input_dev = input_allocate_device();
 	if (!dell_wmi_input_dev)
@@ -528,21 +523,71 @@ static int __init dell_wmi_input_setup(void)
 		goto err_free_dev;
 	}
 
-	if (dmi_results.keymap) {
-		dell_new_hk_type = true;
+	keymap = kcalloc(dmi_results.keymap_size +
+			 ARRAY_SIZE(dell_wmi_keymap_type_0000) +
+			 ARRAY_SIZE(dell_wmi_keymap_type_0010) +
+			 ARRAY_SIZE(dell_wmi_keymap_type_0011) +
+			 1,
+			 sizeof(struct key_entry), GFP_KERNEL);
+	if (!keymap) {
+		kfree(dmi_results.keymap);
+		err = -ENOMEM;
+		goto err_free_dev;
+	}
+
+	/* Append table with events of type 0x0010 which comes from DMI */
+	for (i = 0; i < dmi_results.keymap_size; i++) {
+		keymap[pos] = dmi_results.keymap[i];
+		keymap[pos].code |= (0x0010 << 16);
+		pos++;
+	}
+
+	kfree(dmi_results.keymap);
 
-		err = sparse_keymap_setup(dell_wmi_input_dev,
-					  dmi_results.keymap, NULL);
+	/* Append table with extra events of type 0x0010 which are not in DMI */
+	for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0010); i++) {
+		const struct key_entry *entry = &dell_wmi_keymap_type_0010[i];
 
 		/*
-		 * Sparse keymap library makes a copy of keymap so we
-		 * don't need the original one that was allocated.
+		 * Check if we've already found this scancode.  This takes
+		 * quadratic time, but it doesn't matter unless the list
+		 * of extra keys gets very long.
 		 */
-		kfree(dmi_results.keymap);
-	} else {
-		err = sparse_keymap_setup(dell_wmi_input_dev,
-					  dell_wmi_legacy_keymap, NULL);
+		if (dmi_results.keymap_size &&
+		    have_scancode(entry->code | (0x0010 << 16),
+				  keymap, dmi_results.keymap_size)
+		   )
+			continue;
+
+		keymap[pos] = *entry;
+		keymap[pos].code |= (0x0010 << 16);
+		pos++;
+	}
+
+	/* Append table with events of type 0x0011 */
+	for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0011); i++) {
+		keymap[pos] = dell_wmi_keymap_type_0011[i];
+		keymap[pos].code |= (0x0011 << 16);
+		pos++;
 	}
+
+	/*
+	 * Now append also table with "legacy" events of type 0x0000. Some of
+	 * them are reported also on laptops which have scancodes in DMI.
+	 */
+	for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0000); i++) {
+		keymap[pos] = dell_wmi_keymap_type_0000[i];
+		pos++;
+	}
+
+	keymap[pos].type = KE_END;
+
+	err = sparse_keymap_setup(dell_wmi_input_dev, keymap, NULL);
+	/*
+	 * Sparse keymap library makes a copy of keymap so we don't need the
+	 * original one that was allocated.
+	 */
+	kfree(keymap);
 	if (err)
 		goto err_free_dev;
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index ce41bc34288d..61f39abf5dc8 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -88,9 +88,6 @@
 
 #define ACPI_FUJITSU_NOTIFY_CODE1     0x80
 
-#define ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS     0x86
-#define ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS     0x87
-
 /* FUNC interface - command values */
 #define FUNC_RFKILL	0x1000
 #define FUNC_LEDS	0x1001
@@ -108,6 +105,8 @@
 #define LOGOLAMP_POWERON 0x2000
 #define LOGOLAMP_ALWAYS  0x4000
 #define RADIO_LED_ON	0x20
+#define ECO_LED	0x10000
+#define ECO_LED_ON	0x80000
 #endif
 
 /* Hotkey details */
@@ -121,13 +120,6 @@
 #define RINGBUFFERSIZE 40
 
 /* Debugging */
-#define FUJLAPTOP_LOG	   ACPI_FUJITSU_HID ": "
-#define FUJLAPTOP_ERR	   KERN_ERR FUJLAPTOP_LOG
-#define FUJLAPTOP_NOTICE   KERN_NOTICE FUJLAPTOP_LOG
-#define FUJLAPTOP_INFO	   KERN_INFO FUJLAPTOP_LOG
-#define FUJLAPTOP_DEBUG    KERN_DEBUG FUJLAPTOP_LOG
-
-#define FUJLAPTOP_DBG_ALL	  0xffff
 #define FUJLAPTOP_DBG_ERROR	  0x0001
 #define FUJLAPTOP_DBG_WARN	  0x0002
 #define FUJLAPTOP_DBG_INFO	  0x0004
@@ -136,7 +128,7 @@
 #ifdef CONFIG_FUJITSU_LAPTOP_DEBUG
 #define vdbg_printk(a_dbg_level, format, arg...) \
 	do { if (dbg_level & a_dbg_level) \
-		printk(FUJLAPTOP_DEBUG "%s: " format, __func__ , ## arg); \
+		printk(KERN_DEBUG pr_fmt("%s: " format), __func__, ## arg); \
 	} while (0)
 #else
 #define vdbg_printk(a_dbg_level, format, arg...) \
@@ -176,6 +168,7 @@ struct fujitsu_hotkey_t {
 	int logolamp_registered;
 	int kblamps_registered;
 	int radio_led_registered;
+	int eco_led_registered;
 };
 
 static struct fujitsu_hotkey_t *fujitsu_hotkey;
@@ -212,6 +205,16 @@ static struct led_classdev radio_led = {
  .brightness_get = radio_led_get,
  .brightness_set = radio_led_set
 };
+
+static enum led_brightness eco_led_get(struct led_classdev *cdev);
+static void eco_led_set(struct led_classdev *cdev,
+			       enum led_brightness brightness);
+
+static struct led_classdev eco_led = {
+ .name = "fujitsu::eco_led",
+ .brightness_get = eco_led_get,
+ .brightness_set = eco_led_set
+};
 #endif
 
 #ifdef CONFIG_FUJITSU_LAPTOP_DEBUG
@@ -296,6 +299,18 @@ static void radio_led_set(struct led_classdev *cdev,
 		call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0);
 }
 
+static void eco_led_set(struct led_classdev *cdev,
+				enum led_brightness brightness)
+{
+	int curr;
+
+	curr = call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0);
+	if (brightness >= LED_FULL)
+		call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr | ECO_LED_ON);
+	else
+		call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr & ~ECO_LED_ON);
+}
+
 static enum led_brightness logolamp_get(struct led_classdev *cdev)
 {
 	enum led_brightness brightness = LED_OFF;
@@ -330,6 +345,16 @@ static enum led_brightness radio_led_get(struct led_classdev *cdev)
 
 	return brightness;
 }
+
+static enum led_brightness eco_led_get(struct led_classdev *cdev)
+{
+	enum led_brightness brightness = LED_OFF;
+
+	if (call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) & ECO_LED_ON)
+		brightness = LED_FULL;
+
+	return brightness;
+}
 #endif
 
 /* Hardware access for LCD brightness control */
@@ -856,6 +881,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	set_bit(fujitsu->keycode3, input->keybit);
 	set_bit(fujitsu->keycode4, input->keybit);
 	set_bit(fujitsu->keycode5, input->keybit);
+	set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit);
 	set_bit(KEY_UNKNOWN, input->keybit);
 
 	error = input_register_device(input);
@@ -943,6 +969,23 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 			       result);
 		}
 	}
+
+	/* Support for eco led is not always signaled in bit corresponding
+	 * to the bit used to control the led. According to the DSDT table,
+	 * bit 14 seems to indicate presence of said led as well.
+	 * Confirm by testing the status.
+	*/
+	if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) &&
+	   (call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
+		result = led_classdev_register(&fujitsu->pf_device->dev,
+						&eco_led);
+		if (result == 0) {
+			fujitsu_hotkey->eco_led_registered = 1;
+		} else {
+			pr_err("Could not register LED handler for eco LED, error %i\n",
+			       result);
+		}
+	}
 #endif
 
 	return result;
@@ -972,6 +1015,9 @@ static int acpi_fujitsu_hotkey_remove(struct acpi_device *device)
 
 	if (fujitsu_hotkey->radio_led_registered)
 		led_classdev_unregister(&radio_led);
+
+	if (fujitsu_hotkey->eco_led_registered)
+		led_classdev_unregister(&eco_led);
 #endif
 
 	input_unregister_device(input);
@@ -1060,6 +1106,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 			}
 		}
 
+		/* On some models (first seen on the Skylake-based Lifebook
+		 * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
+		 * handled in software; its state is queried using FUNC_RFKILL
+		 */
+		if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
+		    (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
+			keycode = KEY_TOUCHPAD_TOGGLE;
+			input_report_key(input, keycode, 1);
+			input_sync(input);
+			input_report_key(input, keycode, 0);
+			input_sync(input);
+		}
+
 		break;
 	default:
 		keycode = KEY_UNKNOWN;
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 6f145f2d004d..96ffda493266 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -718,6 +718,11 @@ static int __init hp_wmi_rfkill_setup(struct platform_device *device)
 	if (err)
 		return err;
 
+	err = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, &wireless,
+				   sizeof(wireless), 0);
+	if (err)
+		return err;
+
 	if (wireless & 0x1) {
 		wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev,
 					   RFKILL_TYPE_WLAN,
@@ -882,7 +887,7 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 	wwan_rfkill = NULL;
 	rfkill2_count = 0;
 
-	if (hp_wmi_bios_2009_later() || hp_wmi_rfkill_setup(device))
+	if (hp_wmi_rfkill_setup(device))
 		hp_wmi_rfkill2_setup(device);
 
 	err = device_create_file(&device->dev, &dev_attr_display);
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index a818db6aa08f..ed5874217ee7 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -122,8 +122,8 @@ static int intel_hid_input_setup(struct platform_device *device)
 	return 0;
 
 err_free_device:
-		input_free_device(priv->input_dev);
-		return ret;
+	input_free_device(priv->input_dev);
+	return ret;
 }
 
 static void intel_hid_input_destroy(struct platform_device *device)
@@ -224,7 +224,6 @@ static int intel_hid_remove(struct platform_device *device)
 	acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
 	intel_hid_input_destroy(device);
 	intel_hid_set_enable(&device->dev, 0);
-	acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
 
 	/*
 	 * Even if we failed to shut off the event stream, we can still
diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
new file mode 100644
index 000000000000..146d02f8c9bc
--- /dev/null
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -0,0 +1,188 @@
+/*
+ *  Intel Virtual Button driver for Windows 8.1+
+ *
+ *  Copyright (C) 2016 AceLan Kao <acelan.kao@canonical.com>
+ *  Copyright (C) 2016 Alex Hung <alex.hung@canonical.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
+#include <linux/input/sparse-keymap.h>
+#include <linux/acpi.h>
+#include <acpi/acpi_bus.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("AceLan Kao");
+
+static const struct acpi_device_id intel_vbtn_ids[] = {
+	{"INT33D6", 0},
+	{"", 0},
+};
+
+/* In theory, these are HID usages. */
+static const struct key_entry intel_vbtn_keymap[] = {
+	{ KE_IGNORE, 0xC0, { KEY_POWER } },	/* power key press */
+	{ KE_KEY, 0xC1, { KEY_POWER } },	/* power key release */
+	{ KE_END },
+};
+
+struct intel_vbtn_priv {
+	struct input_dev *input_dev;
+};
+
+static int intel_vbtn_input_setup(struct platform_device *device)
+{
+	struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
+	int ret;
+
+	priv->input_dev = input_allocate_device();
+	if (!priv->input_dev)
+		return -ENOMEM;
+
+	ret = sparse_keymap_setup(priv->input_dev, intel_vbtn_keymap, NULL);
+	if (ret)
+		goto err_free_device;
+
+	priv->input_dev->dev.parent = &device->dev;
+	priv->input_dev->name = "Intel Virtual Button driver";
+	priv->input_dev->id.bustype = BUS_HOST;
+
+	ret = input_register_device(priv->input_dev);
+	if (ret)
+		goto err_free_device;
+
+	return 0;
+
+err_free_device:
+	input_free_device(priv->input_dev);
+	return ret;
+}
+
+static void intel_vbtn_input_destroy(struct platform_device *device)
+{
+	struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
+
+	input_unregister_device(priv->input_dev);
+}
+
+static void notify_handler(acpi_handle handle, u32 event, void *context)
+{
+	struct platform_device *device = context;
+	struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
+
+	if (!sparse_keymap_report_event(priv->input_dev, event, 1, true))
+		dev_info(&device->dev, "unknown event index 0x%x\n",
+			 event);
+}
+
+static int intel_vbtn_probe(struct platform_device *device)
+{
+	acpi_handle handle = ACPI_HANDLE(&device->dev);
+	struct intel_vbtn_priv *priv;
+	acpi_status status;
+	int err;
+
+	status = acpi_evaluate_object(handle, "VBDL", NULL, NULL);
+	if (!ACPI_SUCCESS(status)) {
+		dev_warn(&device->dev, "failed to read Intel Virtual Button driver\n");
+		return -ENODEV;
+	}
+
+	priv = devm_kzalloc(&device->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	dev_set_drvdata(&device->dev, priv);
+
+	err = intel_vbtn_input_setup(device);
+	if (err) {
+		pr_err("Failed to setup Intel Virtual Button\n");
+		return err;
+	}
+
+	status = acpi_install_notify_handler(handle,
+					     ACPI_DEVICE_NOTIFY,
+					     notify_handler,
+					     device);
+	if (ACPI_FAILURE(status)) {
+		err = -EBUSY;
+		goto err_remove_input;
+	}
+
+	return 0;
+
+err_remove_input:
+	intel_vbtn_input_destroy(device);
+
+	return err;
+}
+
+static int intel_vbtn_remove(struct platform_device *device)
+{
+	acpi_handle handle = ACPI_HANDLE(&device->dev);
+
+	intel_vbtn_input_destroy(device);
+	acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
+
+	/*
+	 * Even if we failed to shut off the event stream, we can still
+	 * safely detach from the device.
+	 */
+	return 0;
+}
+
+static struct platform_driver intel_vbtn_pl_driver = {
+	.driver = {
+		.name = "intel-vbtn",
+		.acpi_match_table = intel_vbtn_ids,
+	},
+	.probe = intel_vbtn_probe,
+	.remove = intel_vbtn_remove,
+};
+MODULE_DEVICE_TABLE(acpi, intel_vbtn_ids);
+
+static acpi_status __init
+check_acpi_dev(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	const struct acpi_device_id *ids = context;
+	struct acpi_device *dev;
+
+	if (acpi_bus_get_device(handle, &dev) != 0)
+		return AE_OK;
+
+	if (acpi_match_device_ids(dev, ids) == 0)
+		if (acpi_create_platform_device(dev))
+			dev_info(&dev->dev,
+				 "intel-vbtn: created platform device\n");
+
+	return AE_OK;
+}
+
+static int __init intel_vbtn_init(void)
+{
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+			    ACPI_UINT32_MAX, check_acpi_dev, NULL,
+			    (void *)intel_vbtn_ids, NULL);
+
+	return platform_driver_register(&intel_vbtn_pl_driver);
+}
+module_init(intel_vbtn_init);
+
+static void __exit intel_vbtn_exit(void)
+{
+	platform_driver_unregister(&intel_vbtn_pl_driver);
+}
+module_exit(intel_vbtn_exit);
diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c
index 2776bec89c88..520b58a04daa 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -23,9 +23,9 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/pci.h>
-#include <linux/seq_file.h>
 
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include <asm/pmc_core.h>
 
 #include "intel_pmc_core.h"
@@ -77,30 +77,18 @@ int intel_pmc_slp_s0_counter_read(u32 *data)
 }
 EXPORT_SYMBOL_GPL(intel_pmc_slp_s0_counter_read);
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-static int pmc_core_dev_state_show(struct seq_file *s, void *unused)
+static int pmc_core_dev_state_get(void *data, u64 *val)
 {
-	struct pmc_dev *pmcdev = s->private;
-	u32 counter_val;
+	struct pmc_dev *pmcdev = data;
+	u32 value;
 
-	counter_val = pmc_core_reg_read(pmcdev,
-					SPT_PMC_SLP_S0_RES_COUNTER_OFFSET);
-	seq_printf(s, "%u\n", pmc_core_adjust_slp_s0_step(counter_val));
+	value = pmc_core_reg_read(pmcdev, SPT_PMC_SLP_S0_RES_COUNTER_OFFSET);
+	*val = pmc_core_adjust_slp_s0_step(value);
 
 	return 0;
 }
 
-static int pmc_core_dev_state_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pmc_core_dev_state_show, inode->i_private);
-}
-
-static const struct file_operations pmc_core_dev_state_ops = {
-	.open           = pmc_core_dev_state_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = single_release,
-};
+DEFINE_DEBUGFS_ATTRIBUTE(pmc_core_dev_state, pmc_core_dev_state_get, NULL, "%llu\n");
 
 static void pmc_core_dbgfs_unregister(struct pmc_dev *pmcdev)
 {
@@ -112,12 +100,12 @@ static int pmc_core_dbgfs_register(struct pmc_dev *pmcdev)
 	struct dentry *dir, *file;
 
 	dir = debugfs_create_dir("pmc_core", NULL);
-	if (!dir)
+	if (IS_ERR_OR_NULL(dir))
 		return -ENOMEM;
 
 	pmcdev->dbgfs_dir = dir;
 	file = debugfs_create_file("slp_s0_residency_usec", S_IFREG | S_IRUGO,
-				   dir, pmcdev, &pmc_core_dev_state_ops);
+				   dir, pmcdev, &pmc_core_dev_state);
 
 	if (!file) {
 		pmc_core_dbgfs_unregister(pmcdev);
@@ -126,22 +114,12 @@ static int pmc_core_dbgfs_register(struct pmc_dev *pmcdev)
 
 	return 0;
 }
-#else
-static inline int pmc_core_dbgfs_register(struct pmc_dev *pmcdev)
-{
-	return 0;
-}
-
-static inline void pmc_core_dbgfs_unregister(struct pmc_dev *pmcdev)
-{
-}
-#endif /* CONFIG_DEBUG_FS */
 
 static const struct x86_cpu_id intel_pmc_core_ids[] = {
-	{ X86_VENDOR_INTEL, 6, 0x4e, X86_FEATURE_MWAIT,
-		(kernel_ulong_t)NULL}, /* Skylake CPUID Signature */
-	{ X86_VENDOR_INTEL, 6, 0x5e, X86_FEATURE_MWAIT,
-		(kernel_ulong_t)NULL}, /* Skylake CPUID Signature */
+	{ X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_MOBILE, X86_FEATURE_MWAIT,
+		(kernel_ulong_t)NULL},
+	{ X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_DESKTOP, X86_FEATURE_MWAIT,
+		(kernel_ulong_t)NULL},
 	{}
 };
 
@@ -182,10 +160,8 @@ static int pmc_core_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	}
 
 	err = pmc_core_dbgfs_register(pmcdev);
-	if (err < 0) {
-		dev_err(&dev->dev, "PMC Core: debugfs register failed.\n");
-		return err;
-	}
+	if (err < 0)
+		dev_warn(&dev->dev, "PMC Core: debugfs register failed.\n");
 
 	pmc.has_slp_s0_res = true;
 	return 0;
diff --git a/drivers/platform/x86/intel_pmc_core.h b/drivers/platform/x86/intel_pmc_core.h
index a9dadaf787c1..e3f671f4d122 100644
--- a/drivers/platform/x86/intel_pmc_core.h
+++ b/drivers/platform/x86/intel_pmc_core.h
@@ -23,6 +23,7 @@
 
 /* Sunrise Point Power Management Controller PCI Device ID */
 #define SPT_PMC_PCI_DEVICE_ID			0x9d21
+
 #define SPT_PMC_BASE_ADDR_OFFSET		0x48
 #define SPT_PMC_SLP_S0_RES_COUNTER_OFFSET	0x13c
 #define SPT_PMC_MMIO_REG_LEN			0x100
@@ -42,9 +43,7 @@
 struct pmc_dev {
 	u32 base_addr;
 	void __iomem *regbase;
-#if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbgfs_dir;
-#endif /* CONFIG_DEBUG_FS */
 	bool has_slp_s0_res;
 };
 
diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index f5134acd6ff0..ef29f18b1951 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -32,6 +32,7 @@
 #include <linux/suspend.h>
 
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include <asm/intel_pmc_ipc.h>
 #include <asm/intel_punit_ipc.h>
 #include <asm/intel_telemetry.h>
@@ -78,7 +79,7 @@
 #define TELEM_EVT_LEN(x) (sizeof(x)/sizeof((x)[0]))
 
 #define TELEM_DEBUGFS_CPU(model, data) \
-	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&data}
+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&data}
 
 #define TELEM_CHECK_AND_PARSE_EVTS(EVTID, EVTNUM, BUF, EVTLOG, EVTDAT, MASK) { \
 	if (evtlog[index].telem_evtid == (EVTID)) { \
@@ -331,7 +332,7 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = {
 };
 
 static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = {
-	TELEM_DEBUGFS_CPU(0x5c, telem_apl_debugfs_conf),
+	TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf),
 	{}
 };
 
diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c
index 09c84a2b1c2c..6ebdbd2b04fc 100644
--- a/drivers/platform/x86/intel_telemetry_pltdrv.c
+++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
@@ -28,6 +28,7 @@
 #include <linux/platform_device.h>
 
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include <asm/intel_pmc_ipc.h>
 #include <asm/intel_punit_ipc.h>
 #include <asm/intel_telemetry.h>
@@ -82,7 +83,7 @@
 #define TELEM_SET_VERBOSITY_BITS(x, y)	((x) |= ((y) << 27))
 
 #define TELEM_CPU(model, data) \
-	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&data }
+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&data }
 
 enum telemetry_action {
 	TELEM_UPDATE = 0,
@@ -163,7 +164,7 @@ static struct telemetry_plt_config telem_apl_config = {
 };
 
 static const struct x86_cpu_id telemetry_cpu_ids[] = {
-	TELEM_CPU(0x5c, telem_apl_config),
+	TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config),
 	{}
 };
 
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 01e12d221a8b..9d60a40d8b3f 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -4,7 +4,7 @@
  *  Copyright (C) 2002-2004 John Belmonte
  *  Copyright (C) 2008 Philip Langdale
  *  Copyright (C) 2010 Pierre Ducroquet
- *  Copyright (C) 2014-2015 Azael Avalos
+ *  Copyright (C) 2014-2016 Azael Avalos
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -31,7 +31,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#define TOSHIBA_ACPI_VERSION	"0.23"
+#define TOSHIBA_ACPI_VERSION	"0.24"
 #define PROC_INTERFACE_VERSION	1
 
 #include <linux/kernel.h>
@@ -53,6 +53,7 @@
 #include <linux/uaccess.h>
 #include <linux/miscdevice.h>
 #include <linux/rfkill.h>
+#include <linux/iio/iio.h>
 #include <linux/toshiba.h>
 #include <acpi/video.h>
 
@@ -134,6 +135,7 @@ MODULE_LICENSE("GPL");
 
 /* Field definitions */
 #define HCI_ACCEL_MASK			0x7fff
+#define HCI_ACCEL_DIRECTION_MASK	0x8000
 #define HCI_HOTKEY_DISABLE		0x0b
 #define HCI_HOTKEY_ENABLE		0x09
 #define HCI_HOTKEY_SPECIAL_FUNCTIONS	0x10
@@ -178,6 +180,7 @@ struct toshiba_acpi_dev {
 	struct led_classdev eco_led;
 	struct miscdevice miscdev;
 	struct rfkill *wwan_rfk;
+	struct iio_dev *indio_dev;
 
 	int force_fan;
 	int last_key_event;
@@ -1958,28 +1961,6 @@ static ssize_t touchpad_show(struct device *dev,
 }
 static DEVICE_ATTR_RW(touchpad);
 
-static ssize_t position_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
-{
-	struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev);
-	u32 xyval, zval, tmp;
-	u16 x, y, z;
-	int ret;
-
-	xyval = zval = 0;
-	ret = toshiba_accelerometer_get(toshiba, &xyval, &zval);
-	if (ret < 0)
-		return ret;
-
-	x = xyval & HCI_ACCEL_MASK;
-	tmp = xyval >> HCI_MISC_SHIFT;
-	y = tmp & HCI_ACCEL_MASK;
-	z = zval & HCI_ACCEL_MASK;
-
-	return sprintf(buf, "%d %d %d\n", x, y, z);
-}
-static DEVICE_ATTR_RO(position);
-
 static ssize_t usb_sleep_charge_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
@@ -2350,7 +2331,6 @@ static struct attribute *toshiba_attributes[] = {
 	&dev_attr_available_kbd_modes.attr,
 	&dev_attr_kbd_backlight_timeout.attr,
 	&dev_attr_touchpad.attr,
-	&dev_attr_position.attr,
 	&dev_attr_usb_sleep_charge.attr,
 	&dev_attr_sleep_functions_on_battery.attr,
 	&dev_attr_usb_rapid_charge.attr,
@@ -2377,8 +2357,6 @@ static umode_t toshiba_sysfs_is_visible(struct kobject *kobj,
 		exists = (drv->kbd_mode == SCI_KBD_MODE_AUTO) ? true : false;
 	else if (attr == &dev_attr_touchpad.attr)
 		exists = (drv->touchpad_supported) ? true : false;
-	else if (attr == &dev_attr_position.attr)
-		exists = (drv->accelerometer_supported) ? true : false;
 	else if (attr == &dev_attr_usb_sleep_charge.attr)
 		exists = (drv->usb_sleep_charge_supported) ? true : false;
 	else if (attr == &dev_attr_sleep_functions_on_battery.attr)
@@ -2419,6 +2397,81 @@ static void toshiba_acpi_kbd_bl_work(struct work_struct *work)
 					0x92, 0);
 }
 
+/*
+ * IIO device
+ */
+
+enum toshiba_iio_accel_chan {
+	AXIS_X,
+	AXIS_Y,
+	AXIS_Z
+};
+
+static int toshiba_iio_accel_get_axis(enum toshiba_iio_accel_chan chan)
+{
+	u32 xyval, zval;
+	int ret;
+
+	ret = toshiba_accelerometer_get(toshiba_acpi, &xyval, &zval);
+	if (ret < 0)
+		return ret;
+
+	switch (chan) {
+	case AXIS_X:
+		return xyval & HCI_ACCEL_DIRECTION_MASK ?
+			-(xyval & HCI_ACCEL_MASK) : xyval & HCI_ACCEL_MASK;
+	case AXIS_Y:
+		return (xyval >> HCI_MISC_SHIFT) & HCI_ACCEL_DIRECTION_MASK ?
+			-((xyval >> HCI_MISC_SHIFT) & HCI_ACCEL_MASK) :
+			(xyval >> HCI_MISC_SHIFT) & HCI_ACCEL_MASK;
+	case AXIS_Z:
+		return zval & HCI_ACCEL_DIRECTION_MASK ?
+			-(zval & HCI_ACCEL_MASK) : zval & HCI_ACCEL_MASK;
+	}
+
+	return ret;
+}
+
+static int toshiba_iio_accel_read_raw(struct iio_dev *indio_dev,
+				      struct iio_chan_spec const *chan,
+				      int *val, int *val2, long mask)
+{
+	int ret;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		ret = toshiba_iio_accel_get_axis(chan->channel);
+		if (ret == -EIO || ret == -ENODEV)
+			return ret;
+
+		*val = ret;
+
+		return IIO_VAL_INT;
+	}
+
+	return -EINVAL;
+}
+
+#define TOSHIBA_IIO_ACCEL_CHANNEL(axis, chan) { \
+	.type = IIO_ACCEL, \
+	.modified = 1, \
+	.channel = chan, \
+	.channel2 = IIO_MOD_##axis, \
+	.output = 1, \
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
+}
+
+static const struct iio_chan_spec toshiba_iio_accel_channels[] = {
+	TOSHIBA_IIO_ACCEL_CHANNEL(X, AXIS_X),
+	TOSHIBA_IIO_ACCEL_CHANNEL(Y, AXIS_Y),
+	TOSHIBA_IIO_ACCEL_CHANNEL(Z, AXIS_Z),
+};
+
+static const struct iio_info toshiba_iio_accel_info = {
+	.driver_module = THIS_MODULE,
+	.read_raw = &toshiba_iio_accel_read_raw,
+};
+
 /*
  * Misc device
  */
@@ -2904,6 +2957,11 @@ static int toshiba_acpi_remove(struct acpi_device *acpi_dev)
 
 	remove_toshiba_proc_entries(dev);
 
+	if (dev->accelerometer_supported && dev->indio_dev) {
+		iio_device_unregister(dev->indio_dev);
+		iio_device_free(dev->indio_dev);
+	}
+
 	if (dev->sysfs_created)
 		sysfs_remove_group(&dev->acpi_dev->dev.kobj,
 				   &toshiba_attr_group);
@@ -3051,6 +3109,30 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 	dev->touchpad_supported = !ret;
 
 	toshiba_accelerometer_available(dev);
+	if (dev->accelerometer_supported) {
+		dev->indio_dev = iio_device_alloc(sizeof(*dev));
+		if (!dev->indio_dev) {
+			pr_err("Unable to allocate iio device\n");
+			goto iio_error;
+		}
+
+		pr_info("Registering Toshiba accelerometer iio device\n");
+
+		dev->indio_dev->info = &toshiba_iio_accel_info;
+		dev->indio_dev->name = "Toshiba accelerometer";
+		dev->indio_dev->dev.parent = &acpi_dev->dev;
+		dev->indio_dev->modes = INDIO_DIRECT_MODE;
+		dev->indio_dev->channels = toshiba_iio_accel_channels;
+		dev->indio_dev->num_channels =
+					ARRAY_SIZE(toshiba_iio_accel_channels);
+
+		ret = iio_device_register(dev->indio_dev);
+		if (ret < 0) {
+			pr_err("Unable to register iio device\n");
+			iio_device_free(dev->indio_dev);
+		}
+	}
+iio_error:
 
 	toshiba_usb_sleep_charge_available(dev);
 
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 5edee645d890..262285e48a09 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -21,7 +21,7 @@
 #include <linux/isapnp.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern struct pnp_protocol isapnp_protocol;
 
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 81603d99082b..bedb361746a0 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -46,7 +46,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/kernel.h>
@@ -587,6 +586,6 @@ static int __init pnpbios_thread_init(void)
 }
 
 /* Start the kernel thread later: */
-module_init(pnpbios_thread_init);
+device_initcall(pnpbios_thread_init);
 
 EXPORT_SYMBOL(pnpbios_protocol);
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 421770ddafa3..acd4a1524a1e 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -309,6 +309,7 @@ config BATTERY_RX51
 config CHARGER_ISP1704
 	tristate "ISP1704 USB Charger Detection"
 	depends on USB_PHY
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	help
 	  Say Y to enable support for USB Charger Detection with
 	  ISP1707/ISP1704 USB transceivers.
@@ -393,6 +394,7 @@ config CHARGER_QCOM_SMBB
 	tristate "Qualcomm Switch-Mode Battery Charger and Boost"
 	depends on MFD_SPMI_PMIC || COMPILE_TEST
 	depends on OF
+	depends on EXTCON
 	help
 	  Say Y to include support for the Switch-Mode Battery Charger and
 	  Boost (SMBB) hardware found in Qualcomm PM8941 PMICs.  The charger
diff --git a/drivers/power/axp20x_usb_power.c b/drivers/power/axp20x_usb_power.c
index 421a90b83567..6af6feb7058d 100644
--- a/drivers/power/axp20x_usb_power.c
+++ b/drivers/power/axp20x_usb_power.c
@@ -42,6 +42,7 @@
 #define AXP20X_VBUS_MON_VBUS_VALID	BIT(3)
 
 struct axp20x_usb_power {
+	struct device_node *np;
 	struct regmap *regmap;
 	struct power_supply *supply;
 };
@@ -85,7 +86,12 @@ static int axp20x_usb_power_get_property(struct power_supply *psy,
 
 		switch (v & AXP20X_VBUS_CLIMIT_MASK) {
 		case AXP20X_VBUC_CLIMIT_100mA:
-			val->intval = 100000;
+			if (of_device_is_compatible(power->np,
+					"x-powers,axp202-usb-power-supply")) {
+				val->intval = 100000;
+			} else {
+				val->intval = -1; /* No 100mA limit */
+			}
 			break;
 		case AXP20X_VBUC_CLIMIT_500mA:
 			val->intval = 500000;
@@ -122,16 +128,19 @@ static int axp20x_usb_power_get_property(struct power_supply *psy,
 			break;
 		}
 
-		ret = regmap_read(power->regmap, AXP20X_USB_OTG_STATUS, &v);
-		if (ret)
-			return ret;
+		val->intval = POWER_SUPPLY_HEALTH_GOOD;
 
-		if (!(v & AXP20X_USB_STATUS_VBUS_VALID)) {
-			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
-			break;
-		}
+		if (of_device_is_compatible(power->np,
+				"x-powers,axp202-usb-power-supply")) {
+			ret = regmap_read(power->regmap,
+					  AXP20X_USB_OTG_STATUS, &v);
+			if (ret)
+				return ret;
 
-		val->intval = POWER_SUPPLY_HEALTH_GOOD;
+			if (!(v & AXP20X_USB_STATUS_VBUS_VALID))
+				val->intval =
+					POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		}
 		break;
 	case POWER_SUPPLY_PROP_PRESENT:
 		val->intval = !!(input & AXP20X_PWR_STATUS_VBUS_PRESENT);
@@ -156,6 +165,14 @@ static enum power_supply_property axp20x_usb_power_properties[] = {
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 };
 
+static enum power_supply_property axp22x_usb_power_properties[] = {
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN,
+	POWER_SUPPLY_PROP_CURRENT_MAX,
+};
+
 static const struct power_supply_desc axp20x_usb_power_desc = {
 	.name = "axp20x-usb",
 	.type = POWER_SUPPLY_TYPE_USB,
@@ -164,13 +181,25 @@ static const struct power_supply_desc axp20x_usb_power_desc = {
 	.get_property = axp20x_usb_power_get_property,
 };
 
+static const struct power_supply_desc axp22x_usb_power_desc = {
+	.name = "axp20x-usb",
+	.type = POWER_SUPPLY_TYPE_USB,
+	.properties = axp22x_usb_power_properties,
+	.num_properties = ARRAY_SIZE(axp22x_usb_power_properties),
+	.get_property = axp20x_usb_power_get_property,
+};
+
 static int axp20x_usb_power_probe(struct platform_device *pdev)
 {
 	struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent);
 	struct power_supply_config psy_cfg = {};
 	struct axp20x_usb_power *power;
-	static const char * const irq_names[] = { "VBUS_PLUGIN",
-		"VBUS_REMOVAL", "VBUS_VALID", "VBUS_NOT_VALID" };
+	static const char * const axp20x_irq_names[] = { "VBUS_PLUGIN",
+		"VBUS_REMOVAL", "VBUS_VALID", "VBUS_NOT_VALID", NULL };
+	static const char * const axp22x_irq_names[] = {
+		"VBUS_PLUGIN", "VBUS_REMOVAL", NULL };
+	static const char * const *irq_names;
+	const struct power_supply_desc *usb_power_desc;
 	int i, irq, ret;
 
 	if (!of_device_is_available(pdev->dev.of_node))
@@ -185,31 +214,47 @@ static int axp20x_usb_power_probe(struct platform_device *pdev)
 	if (!power)
 		return -ENOMEM;
 
+	power->np = pdev->dev.of_node;
 	power->regmap = axp20x->regmap;
 
-	/* Enable vbus valid checking */
-	ret = regmap_update_bits(power->regmap, AXP20X_VBUS_MON,
-		    AXP20X_VBUS_MON_VBUS_VALID, AXP20X_VBUS_MON_VBUS_VALID);
-	if (ret)
-		return ret;
+	if (of_device_is_compatible(power->np,
+			"x-powers,axp202-usb-power-supply")) {
+		/* Enable vbus valid checking */
+		ret = regmap_update_bits(power->regmap, AXP20X_VBUS_MON,
+					 AXP20X_VBUS_MON_VBUS_VALID,
+					 AXP20X_VBUS_MON_VBUS_VALID);
+		if (ret)
+			return ret;
 
-	/* Enable vbus voltage and current measurement */
-	ret = regmap_update_bits(power->regmap, AXP20X_ADC_EN1,
+		/* Enable vbus voltage and current measurement */
+		ret = regmap_update_bits(power->regmap, AXP20X_ADC_EN1,
 			AXP20X_ADC_EN1_VBUS_CURR | AXP20X_ADC_EN1_VBUS_VOLT,
 			AXP20X_ADC_EN1_VBUS_CURR | AXP20X_ADC_EN1_VBUS_VOLT);
-	if (ret)
-		return ret;
+		if (ret)
+			return ret;
+
+		usb_power_desc = &axp20x_usb_power_desc;
+		irq_names = axp20x_irq_names;
+	} else if (of_device_is_compatible(power->np,
+			"x-powers,axp221-usb-power-supply")) {
+		usb_power_desc = &axp22x_usb_power_desc;
+		irq_names = axp22x_irq_names;
+	} else {
+		dev_err(&pdev->dev, "Unsupported AXP variant: %ld\n",
+			axp20x->variant);
+		return -EINVAL;
+	}
 
 	psy_cfg.of_node = pdev->dev.of_node;
 	psy_cfg.drv_data = power;
 
-	power->supply = devm_power_supply_register(&pdev->dev,
-					&axp20x_usb_power_desc, &psy_cfg);
+	power->supply = devm_power_supply_register(&pdev->dev, usb_power_desc,
+						   &psy_cfg);
 	if (IS_ERR(power->supply))
 		return PTR_ERR(power->supply);
 
 	/* Request irqs after registering, as irqs may trigger immediately */
-	for (i = 0; i < ARRAY_SIZE(irq_names); i++) {
+	for (i = 0; irq_names[i]; i++) {
 		irq = platform_get_irq_byname(pdev, irq_names[i]);
 		if (irq < 0) {
 			dev_warn(&pdev->dev, "No IRQ for %s: %d\n",
@@ -229,6 +274,7 @@ static int axp20x_usb_power_probe(struct platform_device *pdev)
 
 static const struct of_device_id axp20x_usb_power_match[] = {
 	{ .compatible = "x-powers,axp202-usb-power-supply" },
+	{ .compatible = "x-powers,axp221-usb-power-supply" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, axp20x_usb_power_match);
diff --git a/drivers/power/axp288_charger.c b/drivers/power/axp288_charger.c
index e4d569f57acc..4030eeb7cf65 100644
--- a/drivers/power/axp288_charger.c
+++ b/drivers/power/axp288_charger.c
@@ -129,10 +129,6 @@
 
 #define AXP288_EXTCON_DEV_NAME		"axp288_extcon"
 
-#define AXP288_EXTCON_SLOW_CHARGER		"SLOW-CHARGER"
-#define AXP288_EXTCON_DOWNSTREAM_CHARGER	"CHARGE-DOWNSTREAM"
-#define AXP288_EXTCON_FAST_CHARGER		"FAST-CHARGER"
-
 enum {
 	VBUS_OV_IRQ = 0,
 	CHARGE_DONE_IRQ,
@@ -158,7 +154,7 @@ struct axp288_chrg_info {
 	/* OTG/Host mode */
 	struct {
 		struct work_struct work;
-		struct extcon_specific_cable_nb cable;
+		struct extcon_dev *cable;
 		struct notifier_block id_nb;
 		bool id_short;
 	} otg;
@@ -586,17 +582,15 @@ static void axp288_charger_extcon_evt_worker(struct work_struct *work)
 	bool old_connected = info->cable.connected;
 
 	/* Determine cable/charger type */
-	if (extcon_get_cable_state(edev, AXP288_EXTCON_SLOW_CHARGER) > 0) {
+	if (extcon_get_cable_state_(edev, EXTCON_CHG_USB_SDP) > 0) {
 		dev_dbg(&info->pdev->dev, "USB SDP charger  is connected");
 		info->cable.connected = true;
 		info->cable.chg_type = POWER_SUPPLY_TYPE_USB;
-	} else if (extcon_get_cable_state(edev,
-				AXP288_EXTCON_DOWNSTREAM_CHARGER) > 0) {
+	} else if (extcon_get_cable_state_(edev, EXTCON_CHG_USB_CDP) > 0) {
 		dev_dbg(&info->pdev->dev, "USB CDP charger is connected");
 		info->cable.connected = true;
 		info->cable.chg_type = POWER_SUPPLY_TYPE_USB_CDP;
-	} else if (extcon_get_cable_state(edev,
-					AXP288_EXTCON_FAST_CHARGER) > 0) {
+	} else if (extcon_get_cable_state_(edev, EXTCON_CHG_USB_DCP) > 0) {
 		dev_dbg(&info->pdev->dev, "USB DCP charger is connected");
 		info->cable.connected = true;
 		info->cable.chg_type = POWER_SUPPLY_TYPE_USB_DCP;
@@ -692,8 +686,8 @@ static int axp288_charger_handle_otg_evt(struct notifier_block *nb,
 {
 	struct axp288_chrg_info *info =
 	    container_of(nb, struct axp288_chrg_info, otg.id_nb);
-	struct extcon_dev *edev = param;
-	int usb_host = extcon_get_cable_state(edev, "USB-Host");
+	struct extcon_dev *edev = info->otg.cable;
+	int usb_host = extcon_get_cable_state_(edev, EXTCON_USB_HOST);
 
 	dev_dbg(&info->pdev->dev, "external connector USB-Host is %s\n",
 				usb_host ? "attached" : "detached");
@@ -848,10 +842,33 @@ static int axp288_charger_probe(struct platform_device *pdev)
 	/* Register for extcon notification */
 	INIT_WORK(&info->cable.work, axp288_charger_extcon_evt_worker);
 	info->cable.nb.notifier_call = axp288_charger_handle_cable_evt;
-	ret = extcon_register_notifier(info->cable.edev, EXTCON_NONE, &info->cable.nb);
+	ret = extcon_register_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
+					&info->cable.nb);
+	if (ret) {
+		dev_err(&info->pdev->dev,
+			"failed to register extcon notifier for SDP %d\n", ret);
+		return ret;
+	}
+
+	ret = extcon_register_notifier(info->cable.edev, EXTCON_CHG_USB_CDP,
+					&info->cable.nb);
+	if (ret) {
+		dev_err(&info->pdev->dev,
+			"failed to register extcon notifier for CDP %d\n", ret);
+		extcon_unregister_notifier(info->cable.edev,
+				EXTCON_CHG_USB_SDP, &info->cable.nb);
+		return ret;
+	}
+
+	ret = extcon_register_notifier(info->cable.edev, EXTCON_CHG_USB_DCP,
+					&info->cable.nb);
 	if (ret) {
 		dev_err(&info->pdev->dev,
-			"failed to register extcon notifier %d\n", ret);
+			"failed to register extcon notifier for DCP %d\n", ret);
+		extcon_unregister_notifier(info->cable.edev,
+				EXTCON_CHG_USB_SDP, &info->cable.nb);
+		extcon_unregister_notifier(info->cable.edev,
+				EXTCON_CHG_USB_CDP, &info->cable.nb);
 		return ret;
 	}
 
@@ -871,14 +888,14 @@ static int axp288_charger_probe(struct platform_device *pdev)
 	/* Register for OTG notification */
 	INIT_WORK(&info->otg.work, axp288_charger_otg_evt_worker);
 	info->otg.id_nb.notifier_call = axp288_charger_handle_otg_evt;
-	ret = extcon_register_interest(&info->otg.cable, NULL, "USB-Host",
+	ret = extcon_register_notifier(info->otg.cable, EXTCON_USB_HOST,
 				       &info->otg.id_nb);
 	if (ret)
 		dev_warn(&pdev->dev, "failed to register otg notifier\n");
 
-	if (info->otg.cable.edev)
-		info->otg.id_short = extcon_get_cable_state(
-					info->otg.cable.edev, "USB-Host");
+	if (info->otg.cable)
+		info->otg.id_short = extcon_get_cable_state_(
+					info->otg.cable, EXTCON_USB_HOST);
 
 	/* Register charger interrupts */
 	for (i = 0; i < CHRG_INTR_END; i++) {
@@ -905,11 +922,17 @@ static int axp288_charger_probe(struct platform_device *pdev)
 	return 0;
 
 intr_reg_failed:
-	if (info->otg.cable.edev)
-		extcon_unregister_interest(&info->otg.cable);
+	if (info->otg.cable)
+		extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST,
+					&info->otg.id_nb);
 	power_supply_unregister(info->psy_usb);
 psy_reg_failed:
-	extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, &info->cable.nb);
+	extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
+					&info->cable.nb);
+	extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_CDP,
+					&info->cable.nb);
+	extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP,
+					&info->cable.nb);
 	return ret;
 }
 
@@ -917,10 +940,16 @@ static int axp288_charger_remove(struct platform_device *pdev)
 {
 	struct axp288_chrg_info *info =  dev_get_drvdata(&pdev->dev);
 
-	if (info->otg.cable.edev)
-		extcon_unregister_interest(&info->otg.cable);
+	if (info->otg.cable)
+		extcon_unregister_notifier(info->otg.cable, EXTCON_USB_HOST,
+					&info->otg.id_nb);
 
-	extcon_unregister_notifier(info->cable.edev, EXTCON_NONE, &info->cable.nb);
+	extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_SDP,
+					&info->cable.nb);
+	extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_CDP,
+					&info->cable.nb);
+	extcon_unregister_notifier(info->cable.edev, EXTCON_CHG_USB_DCP,
+					&info->cable.nb);
 	power_supply_unregister(info->psy_usb);
 
 	return 0;
diff --git a/drivers/power/bq27xxx_battery.c b/drivers/power/bq27xxx_battery.c
index 45f6ebf88df6..323d05a12f9b 100644
--- a/drivers/power/bq27xxx_battery.c
+++ b/drivers/power/bq27xxx_battery.c
@@ -82,6 +82,7 @@
  *
  * These are indexes into a device's register mapping array.
  */
+
 enum bq27xxx_reg_index {
 	BQ27XXX_REG_CTRL = 0,	/* Control */
 	BQ27XXX_REG_TEMP,	/* Temperature */
@@ -100,157 +101,144 @@ enum bq27xxx_reg_index {
 	BQ27XXX_REG_SOC,	/* State-of-Charge */
 	BQ27XXX_REG_DCAP,	/* Design Capacity */
 	BQ27XXX_REG_AP,		/* Average Power */
+	BQ27XXX_REG_MAX,	/* sentinel */
 };
 
 /* Register mappings */
-static u8 bq27000_regs[] = {
-	0x00,	/* CONTROL	*/
-	0x06,	/* TEMP		*/
-	INVALID_REG_ADDR,	/* INT TEMP - NA*/
-	0x08,	/* VOLT		*/
-	0x14,	/* AVG CURR	*/
-	0x0a,	/* FLAGS	*/
-	0x16,	/* TTE		*/
-	0x18,	/* TTF		*/
-	0x1c,	/* TTES		*/
-	0x26,	/* TTECP	*/
-	0x0c,	/* NAC		*/
-	0x12,	/* LMD(FCC)	*/
-	0x2a,	/* CYCT		*/
-	0x22,	/* AE		*/
-	0x0b,	/* SOC(RSOC)	*/
-	0x76,	/* DCAP(ILMD)	*/
-	0x24,	/* AP		*/
-};
-
-static u8 bq27010_regs[] = {
-	0x00,	/* CONTROL	*/
-	0x06,	/* TEMP		*/
-	INVALID_REG_ADDR,	/* INT TEMP - NA*/
-	0x08,	/* VOLT		*/
-	0x14,	/* AVG CURR	*/
-	0x0a,	/* FLAGS	*/
-	0x16,	/* TTE		*/
-	0x18,	/* TTF		*/
-	0x1c,	/* TTES		*/
-	0x26,	/* TTECP	*/
-	0x0c,	/* NAC		*/
-	0x12,	/* LMD(FCC)	*/
-	0x2a,	/* CYCT		*/
-	INVALID_REG_ADDR,	/* AE - NA	*/
-	0x0b,	/* SOC(RSOC)	*/
-	0x76,	/* DCAP(ILMD)	*/
-	INVALID_REG_ADDR,	/* AP - NA	*/
-};
-
-static u8 bq27500_regs[] = {
-	0x00,	/* CONTROL	*/
-	0x06,	/* TEMP		*/
-	0x28,	/* INT TEMP	*/
-	0x08,	/* VOLT		*/
-	0x14,	/* AVG CURR	*/
-	0x0a,	/* FLAGS	*/
-	0x16,	/* TTE		*/
-	INVALID_REG_ADDR,	/* TTF - NA	*/
-	0x1a,	/* TTES		*/
-	INVALID_REG_ADDR,	/* TTECP - NA	*/
-	0x0c,	/* NAC		*/
-	0x12,	/* LMD(FCC)	*/
-	0x2a,	/* CYCT		*/
-	INVALID_REG_ADDR,	/* AE - NA	*/
-	0x2c,	/* SOC(RSOC)	*/
-	0x3c,	/* DCAP(ILMD)	*/
-	INVALID_REG_ADDR,	/* AP - NA	*/
-};
-
-static u8 bq27530_regs[] = {
-	0x00,	/* CONTROL	*/
-	0x06,	/* TEMP		*/
-	0x32,	/* INT TEMP	*/
-	0x08,	/* VOLT		*/
-	0x14,	/* AVG CURR	*/
-	0x0a,	/* FLAGS	*/
-	0x16,	/* TTE		*/
-	INVALID_REG_ADDR,	/* TTF - NA	*/
-	INVALID_REG_ADDR,	/* TTES - NA	*/
-	INVALID_REG_ADDR,	/* TTECP - NA	*/
-	0x0c,	/* NAC		*/
-	0x12,	/* LMD(FCC)	*/
-	0x2a,	/* CYCT		*/
-	INVALID_REG_ADDR,	/* AE - NA	*/
-	0x2c,	/* SOC(RSOC)	*/
-	INVALID_REG_ADDR,	/* DCAP - NA	*/
-	0x24,	/* AP		*/
-};
-
-static u8 bq27541_regs[] = {
-	0x00,	/* CONTROL	*/
-	0x06,	/* TEMP		*/
-	0x28,	/* INT TEMP	*/
-	0x08,	/* VOLT		*/
-	0x14,	/* AVG CURR	*/
-	0x0a,	/* FLAGS	*/
-	0x16,	/* TTE		*/
-	INVALID_REG_ADDR,	/* TTF - NA	*/
-	INVALID_REG_ADDR,	/* TTES - NA	*/
-	INVALID_REG_ADDR,	/* TTECP - NA	*/
-	0x0c,	/* NAC		*/
-	0x12,	/* LMD(FCC)	*/
-	0x2a,	/* CYCT		*/
-	INVALID_REG_ADDR,	/* AE - NA	*/
-	0x2c,	/* SOC(RSOC)	*/
-	0x3c,	/* DCAP		*/
-	0x24,	/* AP		*/
-};
-
-static u8 bq27545_regs[] = {
-	0x00,	/* CONTROL	*/
-	0x06,	/* TEMP		*/
-	0x28,	/* INT TEMP	*/
-	0x08,	/* VOLT		*/
-	0x14,	/* AVG CURR	*/
-	0x0a,	/* FLAGS	*/
-	0x16,	/* TTE		*/
-	INVALID_REG_ADDR,	/* TTF - NA	*/
-	INVALID_REG_ADDR,	/* TTES - NA	*/
-	INVALID_REG_ADDR,	/* TTECP - NA	*/
-	0x0c,	/* NAC		*/
-	0x12,	/* LMD(FCC)	*/
-	0x2a,	/* CYCT		*/
-	INVALID_REG_ADDR,	/* AE - NA	*/
-	0x2c,	/* SOC(RSOC)	*/
-	INVALID_REG_ADDR,	/* DCAP - NA */
-	0x24,	/* AP		*/
-};
-
-static u8 bq27421_regs[] = {
-	0x00,	/* CONTROL	*/
-	0x02,	/* TEMP		*/
-	0x1e,	/* INT TEMP	*/
-	0x04,	/* VOLT		*/
-	0x10,	/* AVG CURR	*/
-	0x06,	/* FLAGS	*/
-	INVALID_REG_ADDR,	/* TTE - NA	*/
-	INVALID_REG_ADDR,	/* TTF - NA	*/
-	INVALID_REG_ADDR,	/* TTES - NA	*/
-	INVALID_REG_ADDR,	/* TTECP - NA	*/
-	0x08,	/* NAC		*/
-	0x0e,	/* FCC		*/
-	INVALID_REG_ADDR,	/* CYCT - NA	*/
-	INVALID_REG_ADDR,	/* AE - NA	*/
-	0x1c,	/* SOC		*/
-	0x3c,	/* DCAP		*/
-	0x18,	/* AP		*/
-};
-
-static u8 *bq27xxx_regs[] = {
-	[BQ27000] = bq27000_regs,
-	[BQ27010] = bq27010_regs,
-	[BQ27500] = bq27500_regs,
-	[BQ27530] = bq27530_regs,
-	[BQ27541] = bq27541_regs,
-	[BQ27545] = bq27545_regs,
-	[BQ27421] = bq27421_regs,
+static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
+	[BQ27000] = {
+		[BQ27XXX_REG_CTRL] = 0x00,
+		[BQ27XXX_REG_TEMP] = 0x06,
+		[BQ27XXX_REG_INT_TEMP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_VOLT] = 0x08,
+		[BQ27XXX_REG_AI] = 0x14,
+		[BQ27XXX_REG_FLAGS] = 0x0a,
+		[BQ27XXX_REG_TTE] = 0x16,
+		[BQ27XXX_REG_TTF] = 0x18,
+		[BQ27XXX_REG_TTES] = 0x1c,
+		[BQ27XXX_REG_TTECP] = 0x26,
+		[BQ27XXX_REG_NAC] = 0x0c,
+		[BQ27XXX_REG_FCC] = 0x12,
+		[BQ27XXX_REG_CYCT] = 0x2a,
+		[BQ27XXX_REG_AE] = 0x22,
+		[BQ27XXX_REG_SOC] = 0x0b,
+		[BQ27XXX_REG_DCAP] = 0x76,
+		[BQ27XXX_REG_AP] = 0x24,
+	},
+	[BQ27010] = {
+		[BQ27XXX_REG_CTRL] = 0x00,
+		[BQ27XXX_REG_TEMP] = 0x06,
+		[BQ27XXX_REG_INT_TEMP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_VOLT] = 0x08,
+		[BQ27XXX_REG_AI] = 0x14,
+		[BQ27XXX_REG_FLAGS] = 0x0a,
+		[BQ27XXX_REG_TTE] = 0x16,
+		[BQ27XXX_REG_TTF] = 0x18,
+		[BQ27XXX_REG_TTES] = 0x1c,
+		[BQ27XXX_REG_TTECP] = 0x26,
+		[BQ27XXX_REG_NAC] = 0x0c,
+		[BQ27XXX_REG_FCC] = 0x12,
+		[BQ27XXX_REG_CYCT] = 0x2a,
+		[BQ27XXX_REG_AE] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_SOC] = 0x0b,
+		[BQ27XXX_REG_DCAP] = 0x76,
+		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
+	},
+	[BQ27500] = {
+		[BQ27XXX_REG_CTRL] = 0x00,
+		[BQ27XXX_REG_TEMP] = 0x06,
+		[BQ27XXX_REG_INT_TEMP] = 0x28,
+		[BQ27XXX_REG_VOLT] = 0x08,
+		[BQ27XXX_REG_AI] = 0x14,
+		[BQ27XXX_REG_FLAGS] = 0x0a,
+		[BQ27XXX_REG_TTE] = 0x16,
+		[BQ27XXX_REG_TTF] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTES] = 0x1a,
+		[BQ27XXX_REG_TTECP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_NAC] = 0x0c,
+		[BQ27XXX_REG_FCC] = 0x12,
+		[BQ27XXX_REG_CYCT] = 0x2a,
+		[BQ27XXX_REG_AE] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_SOC] = 0x2c,
+		[BQ27XXX_REG_DCAP] = 0x3c,
+		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
+	},
+	[BQ27530] = {
+		[BQ27XXX_REG_CTRL] = 0x00,
+		[BQ27XXX_REG_TEMP] = 0x06,
+		[BQ27XXX_REG_INT_TEMP] = 0x32,
+		[BQ27XXX_REG_VOLT] = 0x08,
+		[BQ27XXX_REG_AI] = 0x14,
+		[BQ27XXX_REG_FLAGS] = 0x0a,
+		[BQ27XXX_REG_TTE] = 0x16,
+		[BQ27XXX_REG_TTF] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTES] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTECP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_NAC] = 0x0c,
+		[BQ27XXX_REG_FCC] = 0x12,
+		[BQ27XXX_REG_CYCT] = 0x2a,
+		[BQ27XXX_REG_AE] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_SOC] = 0x2c,
+		[BQ27XXX_REG_DCAP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_AP] = 0x24,
+	},
+	[BQ27541] = {
+		[BQ27XXX_REG_CTRL] = 0x00,
+		[BQ27XXX_REG_TEMP] = 0x06,
+		[BQ27XXX_REG_INT_TEMP] = 0x28,
+		[BQ27XXX_REG_VOLT] = 0x08,
+		[BQ27XXX_REG_AI] = 0x14,
+		[BQ27XXX_REG_FLAGS] = 0x0a,
+		[BQ27XXX_REG_TTE] = 0x16,
+		[BQ27XXX_REG_TTF] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTES] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTECP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_NAC] = 0x0c,
+		[BQ27XXX_REG_FCC] = 0x12,
+		[BQ27XXX_REG_CYCT] = 0x2a,
+		[BQ27XXX_REG_AE] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_SOC] = 0x2c,
+		[BQ27XXX_REG_DCAP] = 0x3c,
+		[BQ27XXX_REG_AP] = 0x24,
+	},
+	[BQ27545] = {
+		[BQ27XXX_REG_CTRL] = 0x00,
+		[BQ27XXX_REG_TEMP] = 0x06,
+		[BQ27XXX_REG_INT_TEMP] = 0x28,
+		[BQ27XXX_REG_VOLT] = 0x08,
+		[BQ27XXX_REG_AI] = 0x14,
+		[BQ27XXX_REG_FLAGS] = 0x0a,
+		[BQ27XXX_REG_TTE] = 0x16,
+		[BQ27XXX_REG_TTF] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTES] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTECP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_NAC] = 0x0c,
+		[BQ27XXX_REG_FCC] = 0x12,
+		[BQ27XXX_REG_CYCT] = 0x2a,
+		[BQ27XXX_REG_AE] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_SOC] = 0x2c,
+		[BQ27XXX_REG_DCAP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_AP] = 0x24,
+	},
+	[BQ27421] = {
+		[BQ27XXX_REG_CTRL] = 0x00,
+		[BQ27XXX_REG_TEMP] = 0x02,
+		[BQ27XXX_REG_INT_TEMP] = 0x1e,
+		[BQ27XXX_REG_VOLT] = 0x04,
+		[BQ27XXX_REG_AI] = 0x10,
+		[BQ27XXX_REG_FLAGS] = 0x06,
+		[BQ27XXX_REG_TTE] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTF] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTES] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_TTECP] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_NAC] = 0x08,
+		[BQ27XXX_REG_FCC] = 0x0e,
+		[BQ27XXX_REG_CYCT] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_AE] = INVALID_REG_ADDR,
+		[BQ27XXX_REG_SOC] = 0x1c,
+		[BQ27XXX_REG_DCAP] = 0x3c,
+		[BQ27XXX_REG_AP] = 0x18,
+	},
 };
 
 static enum power_supply_property bq27000_battery_props[] = {
@@ -735,11 +723,8 @@ static void bq27xxx_battery_poll(struct work_struct *work)
 
 	bq27xxx_battery_update(di);
 
-	if (poll_interval > 0) {
-		/* The timer does not have to be accurate. */
-		set_timer_slack(&di->work.timer, poll_interval * HZ / 4);
+	if (poll_interval > 0)
 		schedule_delayed_work(&di->work, poll_interval * HZ);
-	}
 }
 
 /*
diff --git a/drivers/power/bq27xxx_battery_i2c.c b/drivers/power/bq27xxx_battery_i2c.c
index b8f8d3ade31b..85d4ea2a9c20 100644
--- a/drivers/power/bq27xxx_battery_i2c.c
+++ b/drivers/power/bq27xxx_battery_i2c.c
@@ -1,5 +1,5 @@
 /*
- * SCI Reset driver for Keystone based devices
+ * BQ27xxx battery monitor I2C driver
  *
  * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
  *	Andrew F. Davis <afd@ti.com>
diff --git a/drivers/power/max8903_charger.c b/drivers/power/max8903_charger.c
index 17876caf31e5..fdc73d686153 100644
--- a/drivers/power/max8903_charger.c
+++ b/drivers/power/max8903_charger.c
@@ -23,13 +23,16 @@
 #include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
 #include <linux/slab.h>
 #include <linux/power_supply.h>
 #include <linux/platform_device.h>
 #include <linux/power/max8903_charger.h>
 
 struct max8903_data {
-	struct max8903_pdata pdata;
+	struct max8903_pdata *pdata;
 	struct device *dev;
 	struct power_supply *psy;
 	struct power_supply_desc psy_desc;
@@ -53,8 +56,8 @@ static int max8903_get_property(struct power_supply *psy,
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
-		if (data->pdata.chg) {
-			if (gpio_get_value(data->pdata.chg) == 0)
+		if (gpio_is_valid(data->pdata->chg)) {
+			if (gpio_get_value(data->pdata->chg) == 0)
 				val->intval = POWER_SUPPLY_STATUS_CHARGING;
 			else if (data->usb_in || data->ta_in)
 				val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
@@ -75,13 +78,14 @@ static int max8903_get_property(struct power_supply *psy,
 	default:
 		return -EINVAL;
 	}
+
 	return 0;
 }
 
 static irqreturn_t max8903_dcin(int irq, void *_data)
 {
 	struct max8903_data *data = _data;
-	struct max8903_pdata *pdata = &data->pdata;
+	struct max8903_pdata *pdata = data->pdata;
 	bool ta_in;
 	enum power_supply_type old_type;
 
@@ -93,11 +97,11 @@ static irqreturn_t max8903_dcin(int irq, void *_data)
 	data->ta_in = ta_in;
 
 	/* Set Current-Limit-Mode 1:DC 0:USB */
-	if (pdata->dcm)
+	if (gpio_is_valid(pdata->dcm))
 		gpio_set_value(pdata->dcm, ta_in ? 1 : 0);
 
 	/* Charger Enable / Disable (cen is negated) */
-	if (pdata->cen)
+	if (gpio_is_valid(pdata->cen))
 		gpio_set_value(pdata->cen, ta_in ? 0 :
 				(data->usb_in ? 0 : 1));
 
@@ -122,7 +126,7 @@ static irqreturn_t max8903_dcin(int irq, void *_data)
 static irqreturn_t max8903_usbin(int irq, void *_data)
 {
 	struct max8903_data *data = _data;
-	struct max8903_pdata *pdata = &data->pdata;
+	struct max8903_pdata *pdata = data->pdata;
 	bool usb_in;
 	enum power_supply_type old_type;
 
@@ -136,7 +140,7 @@ static irqreturn_t max8903_usbin(int irq, void *_data)
 	/* Do not touch Current-Limit-Mode */
 
 	/* Charger Enable / Disable (cen is negated) */
-	if (pdata->cen)
+	if (gpio_is_valid(pdata->cen))
 		gpio_set_value(pdata->cen, usb_in ? 0 :
 				(data->ta_in ? 0 : 1));
 
@@ -161,7 +165,7 @@ static irqreturn_t max8903_usbin(int irq, void *_data)
 static irqreturn_t max8903_fault(int irq, void *_data)
 {
 	struct max8903_data *data = _data;
-	struct max8903_pdata *pdata = &data->pdata;
+	struct max8903_pdata *pdata = data->pdata;
 	bool fault;
 
 	fault = gpio_get_value(pdata->flt) ? false : true;
@@ -179,57 +183,109 @@ static irqreturn_t max8903_fault(int irq, void *_data)
 	return IRQ_HANDLED;
 }
 
-static int max8903_probe(struct platform_device *pdev)
+static struct max8903_pdata *max8903_parse_dt_data(struct device *dev)
 {
-	struct max8903_data *data;
+	struct device_node *np = dev->of_node;
+	struct max8903_pdata *pdata = NULL;
+
+	if (!np)
+		return NULL;
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	pdata->dc_valid = false;
+	pdata->usb_valid = false;
+
+	pdata->cen = of_get_named_gpio(np, "cen-gpios", 0);
+	if (!gpio_is_valid(pdata->cen))
+		pdata->cen = -EINVAL;
+
+	pdata->chg = of_get_named_gpio(np, "chg-gpios", 0);
+	if (!gpio_is_valid(pdata->chg))
+		pdata->chg = -EINVAL;
+
+	pdata->flt = of_get_named_gpio(np, "flt-gpios", 0);
+	if (!gpio_is_valid(pdata->flt))
+		pdata->flt = -EINVAL;
+
+	pdata->usus = of_get_named_gpio(np, "usus-gpios", 0);
+	if (!gpio_is_valid(pdata->usus))
+		pdata->usus = -EINVAL;
+
+	pdata->dcm = of_get_named_gpio(np, "dcm-gpios", 0);
+	if (!gpio_is_valid(pdata->dcm))
+		pdata->dcm = -EINVAL;
+
+	pdata->dok = of_get_named_gpio(np, "dok-gpios", 0);
+	if (!gpio_is_valid(pdata->dok))
+		pdata->dok = -EINVAL;
+	else
+		pdata->dc_valid = true;
+
+	pdata->uok = of_get_named_gpio(np, "uok-gpios", 0);
+	if (!gpio_is_valid(pdata->uok))
+		pdata->uok = -EINVAL;
+	else
+		pdata->usb_valid = true;
+
+	return pdata;
+}
+
+static int max8903_setup_gpios(struct platform_device *pdev)
+{
+	struct max8903_data *data = platform_get_drvdata(pdev);
 	struct device *dev = &pdev->dev;
 	struct max8903_pdata *pdata = pdev->dev.platform_data;
-	struct power_supply_config psy_cfg = {};
 	int ret = 0;
 	int gpio;
 	int ta_in = 0;
 	int usb_in = 0;
 
-	data = devm_kzalloc(dev, sizeof(struct max8903_data), GFP_KERNEL);
-	if (data == NULL) {
-		dev_err(dev, "Cannot allocate memory.\n");
-		return -ENOMEM;
-	}
-	memcpy(&data->pdata, pdata, sizeof(struct max8903_pdata));
-	data->dev = dev;
-	platform_set_drvdata(pdev, data);
-
-	if (pdata->dc_valid == false && pdata->usb_valid == false) {
-		dev_err(dev, "No valid power sources.\n");
-		return -EINVAL;
-	}
-
 	if (pdata->dc_valid) {
-		if (pdata->dok && gpio_is_valid(pdata->dok) &&
-				pdata->dcm && gpio_is_valid(pdata->dcm)) {
+		if (gpio_is_valid(pdata->dok)) {
+			ret = devm_gpio_request(dev, pdata->dok,
+						data->psy_desc.name);
+			if (ret) {
+				dev_err(dev,
+					"Failed GPIO request for dok: %d err %d\n",
+					pdata->dok, ret);
+				return ret;
+			}
+
 			gpio = pdata->dok; /* PULL_UPed Interrupt */
 			ta_in = gpio_get_value(gpio) ? 0 : 1;
-
-			gpio = pdata->dcm; /* Output */
-			gpio_set_value(gpio, ta_in);
 		} else {
-			dev_err(dev, "When DC is wired, DOK and DCM should"
-					" be wired as well.\n");
+			dev_err(dev, "When DC is wired, DOK should be wired as well.\n");
 			return -EINVAL;
 		}
-	} else {
-		if (pdata->dcm) {
-			if (gpio_is_valid(pdata->dcm))
-				gpio_set_value(pdata->dcm, 0);
-			else {
-				dev_err(dev, "Invalid pin: dcm.\n");
-				return -EINVAL;
-			}
+	}
+
+	if (gpio_is_valid(pdata->dcm)) {
+		ret = devm_gpio_request(dev, pdata->dcm, data->psy_desc.name);
+		if (ret) {
+			dev_err(dev,
+				"Failed GPIO request for dcm: %d err %d\n",
+				pdata->dcm, ret);
+			return ret;
 		}
+
+		gpio = pdata->dcm; /* Output */
+		gpio_set_value(gpio, ta_in);
 	}
 
 	if (pdata->usb_valid) {
-		if (pdata->uok && gpio_is_valid(pdata->uok)) {
+		if (gpio_is_valid(pdata->uok)) {
+			ret = devm_gpio_request(dev, pdata->uok,
+						data->psy_desc.name);
+			if (ret) {
+				dev_err(dev,
+					"Failed GPIO request for uok: %d err %d\n",
+					pdata->uok, ret);
+				return ret;
+			}
+
 			gpio = pdata->uok;
 			usb_in = gpio_get_value(gpio) ? 0 : 1;
 		} else {
@@ -239,33 +295,45 @@ static int max8903_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (pdata->cen) {
-		if (gpio_is_valid(pdata->cen)) {
-			gpio_set_value(pdata->cen, (ta_in || usb_in) ? 0 : 1);
-		} else {
-			dev_err(dev, "Invalid pin: cen.\n");
-			return -EINVAL;
+	if (gpio_is_valid(pdata->cen)) {
+		ret = devm_gpio_request(dev, pdata->cen, data->psy_desc.name);
+		if (ret) {
+			dev_err(dev,
+				"Failed GPIO request for cen: %d err %d\n",
+				pdata->cen, ret);
+			return ret;
 		}
+
+		gpio_set_value(pdata->cen, (ta_in || usb_in) ? 0 : 1);
 	}
 
-	if (pdata->chg) {
-		if (!gpio_is_valid(pdata->chg)) {
-			dev_err(dev, "Invalid pin: chg.\n");
-			return -EINVAL;
+	if (gpio_is_valid(pdata->chg)) {
+		ret = devm_gpio_request(dev, pdata->chg, data->psy_desc.name);
+		if (ret) {
+			dev_err(dev,
+				"Failed GPIO request for chg: %d err %d\n",
+				pdata->chg, ret);
+			return ret;
 		}
 	}
 
-	if (pdata->flt) {
-		if (!gpio_is_valid(pdata->flt)) {
-			dev_err(dev, "Invalid pin: flt.\n");
-			return -EINVAL;
+	if (gpio_is_valid(pdata->flt)) {
+		ret = devm_gpio_request(dev, pdata->flt, data->psy_desc.name);
+		if (ret) {
+			dev_err(dev,
+				"Failed GPIO request for flt: %d err %d\n",
+				pdata->flt, ret);
+			return ret;
 		}
 	}
 
-	if (pdata->usus) {
-		if (!gpio_is_valid(pdata->usus)) {
-			dev_err(dev, "Invalid pin: usus.\n");
-			return -EINVAL;
+	if (gpio_is_valid(pdata->usus)) {
+		ret = devm_gpio_request(dev, pdata->usus, data->psy_desc.name);
+		if (ret) {
+			dev_err(dev,
+				"Failed GPIO request for usus: %d err %d\n",
+				pdata->usus, ret);
+			return ret;
 		}
 	}
 
@@ -273,14 +341,52 @@ static int max8903_probe(struct platform_device *pdev)
 	data->ta_in = ta_in;
 	data->usb_in = usb_in;
 
+	return 0;
+}
+
+static int max8903_probe(struct platform_device *pdev)
+{
+	struct max8903_data *data;
+	struct device *dev = &pdev->dev;
+	struct max8903_pdata *pdata = pdev->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
+	int ret = 0;
+
+	data = devm_kzalloc(dev, sizeof(struct max8903_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	if (IS_ENABLED(CONFIG_OF) && !pdata && dev->of_node)
+		pdata = max8903_parse_dt_data(dev);
+
+	if (!pdata) {
+		dev_err(dev, "No platform data.\n");
+		return -EINVAL;
+	}
+
+	pdev->dev.platform_data = pdata;
+	data->pdata = pdata;
+	data->dev = dev;
+	platform_set_drvdata(pdev, data);
+
+	if (pdata->dc_valid == false && pdata->usb_valid == false) {
+		dev_err(dev, "No valid power sources.\n");
+		return -EINVAL;
+	}
+
+	ret = max8903_setup_gpios(pdev);
+	if (ret)
+		return ret;
+
 	data->psy_desc.name = "max8903_charger";
-	data->psy_desc.type = (ta_in) ? POWER_SUPPLY_TYPE_MAINS :
-			((usb_in) ? POWER_SUPPLY_TYPE_USB :
+	data->psy_desc.type = (data->ta_in) ? POWER_SUPPLY_TYPE_MAINS :
+			((data->usb_in) ? POWER_SUPPLY_TYPE_USB :
 			 POWER_SUPPLY_TYPE_BATTERY);
 	data->psy_desc.get_property = max8903_get_property;
 	data->psy_desc.properties = max8903_charger_props;
 	data->psy_desc.num_properties = ARRAY_SIZE(max8903_charger_props);
 
+	psy_cfg.of_node = dev->of_node;
 	psy_cfg.drv_data = data;
 
 	data->psy = devm_power_supply_register(dev, &data->psy_desc, &psy_cfg);
@@ -315,7 +421,7 @@ static int max8903_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (pdata->flt) {
+	if (gpio_is_valid(pdata->flt)) {
 		ret = devm_request_threaded_irq(dev, gpio_to_irq(pdata->flt),
 					NULL, max8903_fault,
 					IRQF_TRIGGER_FALLING |
@@ -331,10 +437,17 @@ static int max8903_probe(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id max8903_match_ids[] = {
+	{ .compatible = "maxim,max8903", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, max8903_match_ids);
+
 static struct platform_driver max8903_driver = {
 	.probe	= max8903_probe,
 	.driver = {
 		.name	= "max8903-charger",
+		.of_match_table = max8903_match_ids
 	},
 };
 
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 456987c88baa..a74d8ca383a1 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -491,8 +491,11 @@ int power_supply_get_property(struct power_supply *psy,
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
-	if (atomic_read(&psy->use_cnt) <= 0)
+	if (atomic_read(&psy->use_cnt) <= 0) {
+		if (!psy->initialized)
+			return -EAGAIN;
 		return -ENODEV;
+	}
 
 	return psy->desc->get_property(psy, psp, val);
 }
@@ -565,11 +568,12 @@ static int power_supply_read_temp(struct thermal_zone_device *tzd,
 
 	WARN_ON(tzd == NULL);
 	psy = tzd->devdata;
-	ret = psy->desc->get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	if (ret)
+		return ret;
 
 	/* Convert tenths of degree Celsius to milli degree Celsius. */
-	if (!ret)
-		*temp = val.intval * 100;
+	*temp = val.intval * 100;
 
 	return ret;
 }
@@ -612,10 +616,12 @@ static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd,
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->desc->get_property(psy,
-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
-	if (!ret)
-		*state = val.intval;
+	ret = power_supply_get_property(psy,
+			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
+	if (ret)
+		return ret;
+
+	*state = val.intval;
 
 	return ret;
 }
@@ -628,10 +634,12 @@ static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd,
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->desc->get_property(psy,
-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
-	if (!ret)
-		*state = val.intval;
+	ret = power_supply_get_property(psy,
+			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+	if (ret)
+		return ret;
+
+	*state = val.intval;
 
 	return ret;
 }
@@ -780,6 +788,7 @@ __power_supply_register(struct device *parent,
 	 *    after calling power_supply_register()).
 	 */
 	atomic_inc(&psy->use_cnt);
+	psy->initialized = true;
 
 	queue_delayed_work(system_power_efficient_wq,
 			   &psy->deferred_register_work,
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 80fed98832f9..bcde8d13476a 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -83,7 +83,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 			if (ret == -ENODATA)
 				dev_dbg(dev, "driver has no data for `%s' property\n",
 					attr->attr.name);
-			else if (ret != -ENODEV)
+			else if (ret != -ENODEV && ret != -EAGAIN)
 				dev_err(dev, "driver failed to report `%s' property: %zd\n",
 					attr->attr.name, ret);
 			return ret;
diff --git a/drivers/power/qcom_smbb.c b/drivers/power/qcom_smbb.c
index 5eb1e9e543e2..b5896ba2a602 100644
--- a/drivers/power/qcom_smbb.c
+++ b/drivers/power/qcom_smbb.c
@@ -34,6 +34,7 @@
 #include <linux/power_supply.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
+#include <linux/extcon.h>
 
 #define SMBB_CHG_VMAX		0x040
 #define SMBB_CHG_VSAFE		0x041
@@ -111,6 +112,7 @@ struct smbb_charger {
 	unsigned int revision;
 	unsigned int addr;
 	struct device *dev;
+	struct extcon_dev *edev;
 
 	bool dc_disabled;
 	bool jeita_ext_temp;
@@ -125,6 +127,11 @@ struct smbb_charger {
 	struct regmap *regmap;
 };
 
+static const unsigned int smbb_usb_extcon_cable[] = {
+	EXTCON_USB,
+	EXTCON_NONE,
+};
+
 static int smbb_vbat_weak_fn(unsigned int index)
 {
 	return 2100000 + index * 100000;
@@ -371,6 +378,8 @@ static irqreturn_t smbb_usb_valid_handler(int irq, void *_data)
 	struct smbb_charger *chg = _data;
 
 	smbb_set_line_flag(chg, irq, STATUS_USBIN_VALID);
+	extcon_set_cable_state_(chg->edev, EXTCON_USB,
+				chg->status & STATUS_USBIN_VALID);
 	power_supply_changed(chg->usb_psy);
 
 	return IRQ_HANDLED;
@@ -849,6 +858,18 @@ static int smbb_charger_probe(struct platform_device *pdev)
 		return PTR_ERR(chg->usb_psy);
 	}
 
+	chg->edev = devm_extcon_dev_allocate(&pdev->dev, smbb_usb_extcon_cable);
+	if (IS_ERR(chg->edev)) {
+		dev_err(&pdev->dev, "failed to allocate extcon device\n");
+		return -ENOMEM;
+	}
+
+	rc = devm_extcon_dev_register(&pdev->dev, chg->edev);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "failed to register extcon device\n");
+		return rc;
+	}
+
 	if (!chg->dc_disabled) {
 		dc_cfg.drv_data = chg;
 		dc_cfg.supplied_to = smbb_bif;
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 9bb2622c23bf..7053abced0bc 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -148,7 +148,8 @@ config POWER_RESET_XGENE
 
 config POWER_RESET_KEYSTONE
 	bool "Keystone reset driver"
-	depends on ARCH_KEYSTONE
+	depends on ARCH_KEYSTONE || COMPILE_TEST
+	depends on HAS_IOMEM
 	select MFD_SYSCON
 	help
 	  Reboot support for the KEYSTONE SoCs.
@@ -183,5 +184,19 @@ config POWER_RESET_ZX
 	help
 	  Reboot support for ZTE SoCs.
 
+config REBOOT_MODE
+	tristate
+
+config SYSCON_REBOOT_MODE
+	tristate "Generic SYSCON regmap reboot mode driver"
+	depends on OF
+	select REBOOT_MODE
+	select MFD_SYSCON
+	help
+	  Say y here will enable reboot mode driver. This will
+	  get reboot mode arguments and store it in SYSCON mapped
+	  register, then the bootloader can read it to take different
+	  action according to the mode.
+
 endif
 
diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
index ab7aa8614d1f..d6b2560d5c4a 100644
--- a/drivers/power/reset/Makefile
+++ b/drivers/power/reset/Makefile
@@ -21,3 +21,5 @@ obj-$(CONFIG_POWER_RESET_SYSCON) += syscon-reboot.o
 obj-$(CONFIG_POWER_RESET_SYSCON_POWEROFF) += syscon-poweroff.o
 obj-$(CONFIG_POWER_RESET_RMOBILE) += rmobile-reset.o
 obj-$(CONFIG_POWER_RESET_ZX) += zx-reboot.o
+obj-$(CONFIG_REBOOT_MODE) += reboot-mode.o
+obj-$(CONFIG_SYSCON_REBOOT_MODE) += syscon-reboot-mode.o
diff --git a/drivers/power/reset/reboot-mode.c b/drivers/power/reset/reboot-mode.c
new file mode 100644
index 000000000000..2dfbbce0f817
--- /dev/null
+++ b/drivers/power/reset/reboot-mode.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/reboot.h>
+#include "reboot-mode.h"
+
+#define PREFIX "mode-"
+
+struct mode_info {
+	const char *mode;
+	u32 magic;
+	struct list_head list;
+};
+
+static unsigned int get_reboot_mode_magic(struct reboot_mode_driver *reboot,
+					  const char *cmd)
+{
+	const char *normal = "normal";
+	int magic = 0;
+	struct mode_info *info;
+
+	if (!cmd)
+		cmd = normal;
+
+	list_for_each_entry(info, &reboot->head, list) {
+		if (!strcmp(info->mode, cmd)) {
+			magic = info->magic;
+			break;
+		}
+	}
+
+	return magic;
+}
+
+static int reboot_mode_notify(struct notifier_block *this,
+			      unsigned long mode, void *cmd)
+{
+	struct reboot_mode_driver *reboot;
+	unsigned int magic;
+
+	reboot = container_of(this, struct reboot_mode_driver, reboot_notifier);
+	magic = get_reboot_mode_magic(reboot, cmd);
+	if (magic)
+		reboot->write(reboot, magic);
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * reboot_mode_register - register a reboot mode driver
+ * @reboot: reboot mode driver
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int reboot_mode_register(struct reboot_mode_driver *reboot)
+{
+	struct mode_info *info;
+	struct property *prop;
+	struct device_node *np = reboot->dev->of_node;
+	size_t len = strlen(PREFIX);
+	int ret;
+
+	INIT_LIST_HEAD(&reboot->head);
+
+	for_each_property_of_node(np, prop) {
+		if (strncmp(prop->name, PREFIX, len))
+			continue;
+
+		info = devm_kzalloc(reboot->dev, sizeof(*info), GFP_KERNEL);
+		if (!info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (of_property_read_u32(np, prop->name, &info->magic)) {
+			dev_err(reboot->dev, "reboot mode %s without magic number\n",
+				info->mode);
+			devm_kfree(reboot->dev, info);
+			continue;
+		}
+
+		info->mode = kstrdup_const(prop->name + len, GFP_KERNEL);
+		if (!info->mode) {
+			ret =  -ENOMEM;
+			goto error;
+		} else if (info->mode[0] == '\0') {
+			kfree_const(info->mode);
+			ret = -EINVAL;
+			dev_err(reboot->dev, "invalid mode name(%s): too short!\n",
+				prop->name);
+			goto error;
+		}
+
+		list_add_tail(&info->list, &reboot->head);
+	}
+
+	reboot->reboot_notifier.notifier_call = reboot_mode_notify;
+	register_reboot_notifier(&reboot->reboot_notifier);
+
+	return 0;
+
+error:
+	list_for_each_entry(info, &reboot->head, list)
+		kfree_const(info->mode);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(reboot_mode_register);
+
+/**
+ * reboot_mode_unregister - unregister a reboot mode driver
+ * @reboot: reboot mode driver
+ */
+int reboot_mode_unregister(struct reboot_mode_driver *reboot)
+{
+	struct mode_info *info;
+
+	unregister_reboot_notifier(&reboot->reboot_notifier);
+
+	list_for_each_entry(info, &reboot->head, list)
+		kfree_const(info->mode);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(reboot_mode_unregister);
+
+MODULE_AUTHOR("Andy Yan <andy.yan@rock-chips.com");
+MODULE_DESCRIPTION("System reboot mode core library");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/power/reset/reboot-mode.h b/drivers/power/reset/reboot-mode.h
new file mode 100644
index 000000000000..2491bb71f591
--- /dev/null
+++ b/drivers/power/reset/reboot-mode.h
@@ -0,0 +1,14 @@
+#ifndef __REBOOT_MODE_H__
+#define __REBOOT_MODE_H__
+
+struct reboot_mode_driver {
+	struct device *dev;
+	struct list_head head;
+	int (*write)(struct reboot_mode_driver *reboot, unsigned int magic);
+	struct notifier_block reboot_notifier;
+};
+
+int reboot_mode_register(struct reboot_mode_driver *reboot);
+int reboot_mode_unregister(struct reboot_mode_driver *reboot);
+
+#endif
diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c
index 5560b0dbc180..b68338399e5e 100644
--- a/drivers/power/reset/syscon-poweroff.c
+++ b/drivers/power/reset/syscon-poweroff.c
@@ -30,7 +30,7 @@ static struct regmap *map;
 static u32 offset;
 static u32 mask;
 
-void syscon_poweroff(void)
+static void syscon_poweroff(void)
 {
 	/* Issue the poweroff */
 	regmap_write(map, offset, mask);
diff --git a/drivers/power/reset/syscon-reboot-mode.c b/drivers/power/reset/syscon-reboot-mode.c
new file mode 100644
index 000000000000..9e1cba5dd58e
--- /dev/null
+++ b/drivers/power/reset/syscon-reboot-mode.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include "reboot-mode.h"
+
+struct syscon_reboot_mode {
+	struct regmap *map;
+	struct reboot_mode_driver reboot;
+	u32 offset;
+	u32 mask;
+};
+
+static int syscon_reboot_mode_write(struct reboot_mode_driver *reboot,
+				    unsigned int magic)
+{
+	struct syscon_reboot_mode *syscon_rbm;
+	int ret;
+
+	syscon_rbm = container_of(reboot, struct syscon_reboot_mode, reboot);
+
+	ret = regmap_update_bits(syscon_rbm->map, syscon_rbm->offset,
+				 syscon_rbm->mask, magic);
+	if (ret < 0)
+		dev_err(reboot->dev, "update reboot mode bits failed\n");
+
+	return ret;
+}
+
+static int syscon_reboot_mode_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct syscon_reboot_mode *syscon_rbm;
+
+	syscon_rbm = devm_kzalloc(&pdev->dev, sizeof(*syscon_rbm), GFP_KERNEL);
+	if (!syscon_rbm)
+		return -ENOMEM;
+
+	syscon_rbm->reboot.dev = &pdev->dev;
+	syscon_rbm->reboot.write = syscon_reboot_mode_write;
+	syscon_rbm->mask = 0xffffffff;
+
+	dev_set_drvdata(&pdev->dev, syscon_rbm);
+
+	syscon_rbm->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
+	if (IS_ERR(syscon_rbm->map))
+		return PTR_ERR(syscon_rbm->map);
+
+	if (of_property_read_u32(pdev->dev.of_node, "offset",
+	    &syscon_rbm->offset))
+		return -EINVAL;
+
+	of_property_read_u32(pdev->dev.of_node, "mask", &syscon_rbm->mask);
+
+	ret = reboot_mode_register(&syscon_rbm->reboot);
+	if (ret)
+		dev_err(&pdev->dev, "can't register reboot mode\n");
+
+	return ret;
+}
+
+static int syscon_reboot_mode_remove(struct platform_device *pdev)
+{
+	struct syscon_reboot_mode *syscon_rbm = dev_get_drvdata(&pdev->dev);
+
+	return reboot_mode_unregister(&syscon_rbm->reboot);
+}
+
+static const struct of_device_id syscon_reboot_mode_of_match[] = {
+	{ .compatible = "syscon-reboot-mode" },
+	{}
+};
+
+static struct platform_driver syscon_reboot_mode_driver = {
+	.probe = syscon_reboot_mode_probe,
+	.remove = syscon_reboot_mode_remove,
+	.driver = {
+		.name = "syscon-reboot-mode",
+		.of_match_table = syscon_reboot_mode_of_match,
+	},
+};
+module_platform_driver(syscon_reboot_mode_driver);
+
+MODULE_AUTHOR("Andy Yan <andy.yan@rock-chips.com");
+MODULE_DESCRIPTION("SYSCON reboot mode driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c
index d9f56730c735..73dfae41def8 100644
--- a/drivers/power/tps65217_charger.c
+++ b/drivers/power/tps65217_charger.c
@@ -197,6 +197,7 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 {
 	struct tps65217 *tps = dev_get_drvdata(pdev->dev.parent);
 	struct tps65217_charger *charger;
+	struct power_supply_config cfg = {};
 	int ret;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
@@ -208,9 +209,12 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 	charger->tps = tps;
 	charger->dev = &pdev->dev;
 
+	cfg.of_node = pdev->dev.of_node;
+	cfg.drv_data = charger;
+
 	charger->ac = devm_power_supply_register(&pdev->dev,
 						 &tps65217_charger_desc,
-						 NULL);
+						 &cfg);
 	if (IS_ERR(charger->ac)) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
 		return PTR_ERR(charger->ac);
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index b2766b867b0e..fbab29dfa793 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -33,6 +33,7 @@
 
 #include <asm/processor.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 
 /* Local defines */
 #define MSR_PLATFORM_POWER_LIMIT	0x0000065C
@@ -335,14 +336,14 @@ static int release_zone(struct powercap_zone *power_zone)
 
 static int find_nr_power_limit(struct rapl_domain *rd)
 {
-	int i;
+	int i, nr_pl = 0;
 
 	for (i = 0; i < NR_POWER_LIMITS; i++) {
-		if (rd->rpl[i].name == NULL)
-			break;
+		if (rd->rpl[i].name)
+			nr_pl++;
 	}
 
-	return i;
+	return nr_pl;
 }
 
 static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
@@ -425,15 +426,38 @@ static const struct powercap_zone_ops zone_ops[] = {
 	},
 };
 
-static int set_power_limit(struct powercap_zone *power_zone, int id,
+
+/*
+ * Constraint index used by powercap can be different than power limit (PL)
+ * index in that some  PLs maybe missing due to non-existant MSRs. So we
+ * need to convert here by finding the valid PLs only (name populated).
+ */
+static int contraint_to_pl(struct rapl_domain *rd, int cid)
+{
+	int i, j;
+
+	for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) {
+		if ((rd->rpl[i].name) && j++ == cid) {
+			pr_debug("%s: index %d\n", __func__, i);
+			return i;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int set_power_limit(struct powercap_zone *power_zone, int cid,
 			u64 power_limit)
 {
 	struct rapl_domain *rd;
 	struct rapl_package *rp;
 	int ret = 0;
+	int id;
 
 	get_online_cpus();
 	rd = power_zone_to_rapl_domain(power_zone);
+	id = contraint_to_pl(rd, cid);
+
 	rp = rd->rp;
 
 	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
@@ -460,16 +484,18 @@ set_exit:
 	return ret;
 }
 
-static int get_current_power_limit(struct powercap_zone *power_zone, int id,
+static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
 					u64 *data)
 {
 	struct rapl_domain *rd;
 	u64 val;
 	int prim;
 	int ret = 0;
+	int id;
 
 	get_online_cpus();
 	rd = power_zone_to_rapl_domain(power_zone);
+	id = contraint_to_pl(rd, cid);
 	switch (rd->rpl[id].prim_id) {
 	case PL1_ENABLE:
 		prim = POWER_LIMIT1;
@@ -491,14 +517,17 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int id,
 	return ret;
 }
 
-static int set_time_window(struct powercap_zone *power_zone, int id,
+static int set_time_window(struct powercap_zone *power_zone, int cid,
 								u64 window)
 {
 	struct rapl_domain *rd;
 	int ret = 0;
+	int id;
 
 	get_online_cpus();
 	rd = power_zone_to_rapl_domain(power_zone);
+	id = contraint_to_pl(rd, cid);
+
 	switch (rd->rpl[id].prim_id) {
 	case PL1_ENABLE:
 		rapl_write_data_raw(rd, TIME_WINDOW1, window);
@@ -513,14 +542,17 @@ static int set_time_window(struct powercap_zone *power_zone, int id,
 	return ret;
 }
 
-static int get_time_window(struct powercap_zone *power_zone, int id, u64 *data)
+static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data)
 {
 	struct rapl_domain *rd;
 	u64 val;
 	int ret = 0;
+	int id;
 
 	get_online_cpus();
 	rd = power_zone_to_rapl_domain(power_zone);
+	id = contraint_to_pl(rd, cid);
+
 	switch (rd->rpl[id].prim_id) {
 	case PL1_ENABLE:
 		ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val);
@@ -539,15 +571,17 @@ static int get_time_window(struct powercap_zone *power_zone, int id, u64 *data)
 	return ret;
 }
 
-static const char *get_constraint_name(struct powercap_zone *power_zone, int id)
+static const char *get_constraint_name(struct powercap_zone *power_zone, int cid)
 {
-	struct rapl_power_limit *rpl;
 	struct rapl_domain *rd;
+	int id;
 
 	rd = power_zone_to_rapl_domain(power_zone);
-	rpl = (struct rapl_power_limit *) &rd->rpl[id];
+	id = contraint_to_pl(rd, cid);
+	if (id >= 0)
+		return rd->rpl[id].name;
 
-	return rpl->name;
+	return NULL;
 }
 
 
@@ -1096,27 +1130,36 @@ static const struct rapl_defaults rapl_defaults_cht = {
 		}
 
 static const struct x86_cpu_id rapl_ids[] __initconst = {
-	RAPL_CPU(0x2a, rapl_defaults_core),/* Sandy Bridge */
-	RAPL_CPU(0x2d, rapl_defaults_core),/* Sandy Bridge EP */
-	RAPL_CPU(0x37, rapl_defaults_byt),/* Valleyview */
-	RAPL_CPU(0x3a, rapl_defaults_core),/* Ivy Bridge */
-	RAPL_CPU(0x3c, rapl_defaults_core),/* Haswell */
-	RAPL_CPU(0x3d, rapl_defaults_core),/* Broadwell */
-	RAPL_CPU(0x3f, rapl_defaults_hsw_server),/* Haswell servers */
-	RAPL_CPU(0x4f, rapl_defaults_hsw_server),/* Broadwell servers */
-	RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */
-	RAPL_CPU(0x46, rapl_defaults_core),/* Haswell */
-	RAPL_CPU(0x47, rapl_defaults_core),/* Broadwell-H */
-	RAPL_CPU(0x4E, rapl_defaults_core),/* Skylake */
-	RAPL_CPU(0x4C, rapl_defaults_cht),/* Braswell/Cherryview */
-	RAPL_CPU(0x4A, rapl_defaults_tng),/* Tangier */
-	RAPL_CPU(0x56, rapl_defaults_core),/* Future Xeon */
-	RAPL_CPU(0x5A, rapl_defaults_ann),/* Annidale */
-	RAPL_CPU(0X5C, rapl_defaults_core),/* Broxton */
-	RAPL_CPU(0x5E, rapl_defaults_core),/* Skylake-H/S */
-	RAPL_CPU(0x57, rapl_defaults_hsw_server),/* Knights Landing */
-	RAPL_CPU(0x8E, rapl_defaults_core),/* Kabylake */
-	RAPL_CPU(0x9E, rapl_defaults_core),/* Kabylake */
+	RAPL_CPU(INTEL_FAM6_SANDYBRIDGE,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_SANDYBRIDGE_X,	rapl_defaults_core),
+
+	RAPL_CPU(INTEL_FAM6_IVYBRIDGE,		rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_IVYBRIDGE_X,	rapl_defaults_core),
+
+	RAPL_CPU(INTEL_FAM6_HASWELL_CORE,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_HASWELL_ULT,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_HASWELL_GT3E,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_HASWELL_X,		rapl_defaults_hsw_server),
+
+	RAPL_CPU(INTEL_FAM6_BROADWELL_CORE,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_BROADWELL_GT3E,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_BROADWELL_XEON_D,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_BROADWELL_X,	rapl_defaults_hsw_server),
+
+	RAPL_CPU(INTEL_FAM6_SKYLAKE_DESKTOP,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_SKYLAKE_MOBILE,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_SKYLAKE_X,		rapl_defaults_hsw_server),
+	RAPL_CPU(INTEL_FAM6_KABYLAKE_MOBILE,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_KABYLAKE_DESKTOP,	rapl_defaults_core),
+
+	RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1,	rapl_defaults_byt),
+	RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT,	rapl_defaults_cht),
+	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD1,	rapl_defaults_tng),
+	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD2,	rapl_defaults_ann),
+	RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON,	rapl_defaults_core),
+
+	RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL,	rapl_defaults_hsw_server),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
@@ -1373,6 +1416,37 @@ static int rapl_check_domain(int cpu, int domain)
 	return 0;
 }
 
+
+/*
+ * Check if power limits are available. Two cases when they are not available:
+ * 1. Locked by BIOS, in this case we still provide read-only access so that
+ *    users can see what limit is set by the BIOS.
+ * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not
+ *    exist at all. In this case, we do not show the contraints in powercap.
+ *
+ * Called after domains are detected and initialized.
+ */
+static void rapl_detect_powerlimit(struct rapl_domain *rd)
+{
+	u64 val64;
+	int i;
+
+	/* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
+	if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
+		if (val64) {
+			pr_info("RAPL package %d domain %s locked by BIOS\n",
+				rd->rp->id, rd->name);
+			rd->state |= DOMAIN_STATE_BIOS_LOCKED;
+		}
+	}
+	/* check if power limit MSRs exists, otherwise domain is monitoring only */
+	for (i = 0; i < NR_POWER_LIMITS; i++) {
+		int prim = rd->rpl[i].prim_id;
+		if (rapl_read_data_raw(rd, prim, false, &val64))
+			rd->rpl[i].name = NULL;
+	}
+}
+
 /* Detect active and valid domains for the given CPU, caller must
  * ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
  */
@@ -1381,7 +1455,6 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
 	int i;
 	int ret = 0;
 	struct rapl_domain *rd;
-	u64 locked;
 
 	for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
 		/* use physical package id to read counters */
@@ -1392,7 +1465,7 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
 	}
 	rp->nr_domains = bitmap_weight(&rp->domain_map,	RAPL_DOMAIN_MAX);
 	if (!rp->nr_domains) {
-		pr_err("no valid rapl domains found in package %d\n", rp->id);
+		pr_debug("no valid rapl domains found in package %d\n", rp->id);
 		ret = -ENODEV;
 		goto done;
 	}
@@ -1406,17 +1479,9 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
 	}
 	rapl_init_domains(rp);
 
-	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
-		/* check if the domain is locked by BIOS */
-		ret = rapl_read_data_raw(rd, FW_LOCK, false, &locked);
-		if (ret)
-			return ret;
-		if (locked) {
-			pr_info("RAPL package %d domain %s locked by BIOS\n",
-				rp->id, rd->name);
-			rd->state |= DOMAIN_STATE_BIOS_LOCKED;
-		}
-	}
+	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++)
+		rapl_detect_powerlimit(rd);
+
 
 
 done:
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index 38a8bbe74810..83797d89c30f 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -195,7 +195,7 @@ static void parport_detach(struct parport *port)
 	struct pps_client_pp *device;
 
 	/* FIXME: oooh, this is ugly! */
-	if (strcmp(pardev->name, KBUILD_MODNAME))
+	if (!pardev || strcmp(pardev->name, KBUILD_MODNAME))
 		/* not our port */
 		return;
 
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 144cbf5b3e5a..6c88e31c01f7 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -498,6 +498,15 @@ config REGULATOR_MT6311
 	  This driver supports the control of different power rails of device
 	  through regulator interface.
 
+config REGULATOR_MT6323
+	tristate "MediaTek MT6323 PMIC"
+	depends on MFD_MT6397
+	help
+	  Say y here to select this option to enable the power regulator of
+	  MediaTek MT6323 PMIC.
+	  This driver supports the control of different power rails of device
+	  through regulator interface.
+
 config REGULATOR_MT6397
 	tristate "MediaTek MT6397 PMIC"
 	depends on MFD_MT6397
@@ -543,12 +552,12 @@ config REGULATOR_PCF50633
 	 on PCF50633
 
 config REGULATOR_PFUZE100
-	tristate "Freescale PFUZE100/PFUZE200 regulator driver"
+	tristate "Freescale PFUZE100/200/3000 regulator driver"
 	depends on I2C
 	select REGMAP_I2C
 	help
 	  Say y here to support the regulators found on the Freescale
-	  PFUZE100/PFUZE200 PMIC.
+	  PFUZE100/200/3000 PMIC.
 
 config REGULATOR_PV88060
 	tristate "Powerventure Semiconductor PV88060 regulator"
@@ -636,10 +645,11 @@ config REGULATOR_RK808
 	  outputs which can be controlled by i2c communication.
 
 config REGULATOR_RN5T618
-	tristate "Ricoh RN5T618 voltage regulators"
+	tristate "Ricoh RN5T567/618 voltage regulators"
 	depends on MFD_RN5T618
 	help
-	  Say y here to support the regulators found on Ricoh RN5T618 PMIC.
+	  Say y here to support the regulators found on Ricoh RN5T567 or
+	  RN5T618 PMIC.
 
 config REGULATOR_RT5033
 	tristate "Richtek RT5033 Regulators"
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 85a1d44a3939..f3da9eea9ab6 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -65,6 +65,7 @@ obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o
 obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o
 obj-$(CONFIG_REGULATOR_MC13XXX_CORE) +=  mc13xxx-regulator-core.o
 obj-$(CONFIG_REGULATOR_MT6311) += mt6311-regulator.o
+obj-$(CONFIG_REGULATOR_MT6323)	+= mt6323-regulator.o
 obj-$(CONFIG_REGULATOR_MT6397)	+= mt6397-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SMD_RPM) += qcom_smd-regulator.o
diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index a1cd0d4f8257..7652477e6a9d 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c
@@ -395,12 +395,6 @@ static int act8865_pdata_from_dt(struct device *dev,
 	struct act8865_regulator_data *regulator;
 	struct of_regulator_match *matches;
 
-	np = of_get_child_by_name(dev->of_node, "regulators");
-	if (!np) {
-		dev_err(dev, "missing 'regulators' subnode in DT\n");
-		return -EINVAL;
-	}
-
 	switch (type) {
 	case ACT8600:
 		matches = act8600_matches;
@@ -419,6 +413,12 @@ static int act8865_pdata_from_dt(struct device *dev,
 		return -EINVAL;
 	}
 
+	np = of_get_child_by_name(dev->of_node, "regulators");
+	if (!np) {
+		dev_err(dev, "missing 'regulators' subnode in DT\n");
+		return -EINVAL;
+	}
+
 	matched = of_regulator_match(dev, np, matches, num_matches);
 	of_node_put(np);
 	if (matched <= 0)
diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c
index 63cd5e68c864..3a6d0290c54c 100644
--- a/drivers/regulator/anatop-regulator.c
+++ b/drivers/regulator/anatop-regulator.c
@@ -296,7 +296,7 @@ static int anatop_regulator_probe(struct platform_device *pdev)
 		if (!sreg->sel && !strcmp(sreg->name, "vddpu"))
 			sreg->sel = 22;
 
-		if (!sreg->sel) {
+		if (!sreg->bypass && !sreg->sel) {
 			dev_err(&pdev->dev, "Failed to read a valid default voltage selector.\n");
 			return -EINVAL;
 		}
diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index 514a5e8fdbab..6d9ac76a772f 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -36,6 +36,8 @@
 
 #define AXP20X_FREQ_DCDC_MASK		0x0f
 
+#define AXP22X_MISC_N_VBUSEN_FUNC	BIT(4)
+
 #define AXP_DESC_IO(_family, _id, _match, _supply, _min, _max, _step, _vreg,	\
 		    _vmask, _ereg, _emask, _enable_val, _disable_val)		\
 	[_family##_##_id] = {							\
@@ -230,6 +232,73 @@ static const struct regulator_desc axp22x_regulators[] = {
 	AXP_DESC_FIXED(AXP22X, RTC_LDO, "rtc_ldo", "ips", 3000),
 };
 
+static const struct regulator_desc axp22x_drivevbus_regulator = {
+	.name		= "drivevbus",
+	.supply_name	= "drivevbus",
+	.of_match	= of_match_ptr("drivevbus"),
+	.regulators_node = of_match_ptr("regulators"),
+	.type		= REGULATOR_VOLTAGE,
+	.owner		= THIS_MODULE,
+	.enable_reg	= AXP20X_VBUS_IPSOUT_MGMT,
+	.enable_mask	= BIT(2),
+	.ops		= &axp20x_ops_sw,
+};
+
+static const struct regulator_linear_range axp809_dcdc4_ranges[] = {
+	REGULATOR_LINEAR_RANGE(600000, 0x0, 0x2f, 20000),
+	REGULATOR_LINEAR_RANGE(1800000, 0x30, 0x38, 100000),
+};
+
+static const struct regulator_linear_range axp809_dldo1_ranges[] = {
+	REGULATOR_LINEAR_RANGE(700000, 0x0, 0x1a, 100000),
+	REGULATOR_LINEAR_RANGE(3400000, 0x1b, 0x1f, 200000),
+};
+
+static const struct regulator_desc axp809_regulators[] = {
+	AXP_DESC(AXP809, DCDC1, "dcdc1", "vin1", 1600, 3400, 100,
+		 AXP22X_DCDC1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL1, BIT(1)),
+	AXP_DESC(AXP809, DCDC2, "dcdc2", "vin2", 600, 1540, 20,
+		 AXP22X_DCDC2_V_OUT, 0x3f, AXP22X_PWR_OUT_CTRL1, BIT(2)),
+	AXP_DESC(AXP809, DCDC3, "dcdc3", "vin3", 600, 1860, 20,
+		 AXP22X_DCDC3_V_OUT, 0x3f, AXP22X_PWR_OUT_CTRL1, BIT(3)),
+	AXP_DESC_RANGES(AXP809, DCDC4, "dcdc4", "vin4", axp809_dcdc4_ranges,
+			57, AXP22X_DCDC4_V_OUT, 0x3f, AXP22X_PWR_OUT_CTRL1,
+			BIT(4)),
+	AXP_DESC(AXP809, DCDC5, "dcdc5", "vin5", 1000, 2550, 50,
+		 AXP22X_DCDC5_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL1, BIT(5)),
+	/* secondary switchable output of DCDC1 */
+	AXP_DESC_SW(AXP809, DC1SW, "dc1sw", NULL, AXP22X_PWR_OUT_CTRL2,
+		    BIT(7)),
+	/* LDO regulator internally chained to DCDC5 */
+	AXP_DESC(AXP809, DC5LDO, "dc5ldo", NULL, 700, 1400, 100,
+		 AXP22X_DC5LDO_V_OUT, 0x7, AXP22X_PWR_OUT_CTRL1, BIT(0)),
+	AXP_DESC(AXP809, ALDO1, "aldo1", "aldoin", 700, 3300, 100,
+		 AXP22X_ALDO1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL1, BIT(6)),
+	AXP_DESC(AXP809, ALDO2, "aldo2", "aldoin", 700, 3300, 100,
+		 AXP22X_ALDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL1, BIT(7)),
+	AXP_DESC(AXP809, ALDO3, "aldo3", "aldoin", 700, 3300, 100,
+		 AXP22X_ALDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(5)),
+	AXP_DESC_RANGES(AXP809, DLDO1, "dldo1", "dldoin", axp809_dldo1_ranges,
+			32, AXP22X_DLDO1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2,
+			BIT(3)),
+	AXP_DESC(AXP809, DLDO2, "dldo2", "dldoin", 700, 3300, 100,
+		 AXP22X_DLDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(4)),
+	AXP_DESC(AXP809, ELDO1, "eldo1", "eldoin", 700, 3300, 100,
+		 AXP22X_ELDO1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(0)),
+	AXP_DESC(AXP809, ELDO2, "eldo2", "eldoin", 700, 3300, 100,
+		 AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)),
+	AXP_DESC(AXP809, ELDO3, "eldo3", "eldoin", 700, 3300, 100,
+		 AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)),
+	AXP_DESC_IO(AXP809, LDO_IO0, "ldo_io0", "ips", 700, 3300, 100,
+		    AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07,
+		    AXP22X_IO_ENABLED, AXP22X_IO_DISABLED),
+	AXP_DESC_IO(AXP809, LDO_IO1, "ldo_io1", "ips", 700, 3300, 100,
+		    AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07,
+		    AXP22X_IO_ENABLED, AXP22X_IO_DISABLED),
+	AXP_DESC_FIXED(AXP809, RTC_LDO, "rtc_ldo", "ips", 1800),
+	AXP_DESC_SW(AXP809, SW, "sw", "swin", AXP22X_PWR_OUT_CTRL2, BIT(6)),
+};
+
 static int axp20x_set_dcdc_freq(struct platform_device *pdev, u32 dcdcfreq)
 {
 	struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent);
@@ -245,6 +314,7 @@ static int axp20x_set_dcdc_freq(struct platform_device *pdev, u32 dcdcfreq)
 		break;
 	case AXP221_ID:
 	case AXP223_ID:
+	case AXP809_ID:
 		min = 1800;
 		max = 4050;
 		def = 3000;
@@ -324,6 +394,7 @@ static int axp20x_set_dcdc_workmode(struct regulator_dev *rdev, int id, u32 work
 
 	case AXP221_ID:
 	case AXP223_ID:
+	case AXP809_ID:
 		if (id < AXP22X_DCDC1 || id > AXP22X_DCDC5)
 			return -EINVAL;
 
@@ -352,8 +423,9 @@ static int axp20x_regulator_probe(struct platform_device *pdev)
 	};
 	int ret, i, nregulators;
 	u32 workmode;
-	const char *axp22x_dc1_name = axp22x_regulators[AXP22X_DCDC1].name;
-	const char *axp22x_dc5_name = axp22x_regulators[AXP22X_DCDC5].name;
+	const char *dcdc1_name = axp22x_regulators[AXP22X_DCDC1].name;
+	const char *dcdc5_name = axp22x_regulators[AXP22X_DCDC5].name;
+	bool drivevbus = false;
 
 	switch (axp20x->variant) {
 	case AXP202_ID:
@@ -365,6 +437,12 @@ static int axp20x_regulator_probe(struct platform_device *pdev)
 	case AXP223_ID:
 		regulators = axp22x_regulators;
 		nregulators = AXP22X_REG_ID_MAX;
+		drivevbus = of_property_read_bool(pdev->dev.parent->of_node,
+						  "x-powers,drive-vbus-en");
+		break;
+	case AXP809_ID:
+		regulators = axp809_regulators;
+		nregulators = AXP809_REG_ID_MAX;
 		break;
 	default:
 		dev_err(&pdev->dev, "Unsupported AXP variant: %ld\n",
@@ -388,22 +466,22 @@ static int axp20x_regulator_probe(struct platform_device *pdev)
 		 * part of this loop to see where we save the DT defined
 		 * name.
 		 */
-		if (regulators == axp22x_regulators) {
-			if (i == AXP22X_DC1SW) {
-				new_desc = devm_kzalloc(&pdev->dev,
-							sizeof(*desc),
-							GFP_KERNEL);
-				*new_desc = regulators[i];
-				new_desc->supply_name = axp22x_dc1_name;
-				desc = new_desc;
-			} else if (i == AXP22X_DC5LDO) {
-				new_desc = devm_kzalloc(&pdev->dev,
-							sizeof(*desc),
-							GFP_KERNEL);
-				*new_desc = regulators[i];
-				new_desc->supply_name = axp22x_dc5_name;
-				desc = new_desc;
-			}
+		if ((regulators == axp22x_regulators && i == AXP22X_DC1SW) ||
+		    (regulators == axp809_regulators && i == AXP809_DC1SW)) {
+			new_desc = devm_kzalloc(&pdev->dev, sizeof(*desc),
+						GFP_KERNEL);
+			*new_desc = regulators[i];
+			new_desc->supply_name = dcdc1_name;
+			desc = new_desc;
+		}
+
+		if ((regulators == axp22x_regulators && i == AXP22X_DC5LDO) ||
+		    (regulators == axp809_regulators && i == AXP809_DC5LDO)) {
+			new_desc = devm_kzalloc(&pdev->dev, sizeof(*desc),
+						GFP_KERNEL);
+			*new_desc = regulators[i];
+			new_desc->supply_name = dcdc5_name;
+			desc = new_desc;
 		}
 
 		rdev = devm_regulator_register(&pdev->dev, desc, &config);
@@ -426,16 +504,29 @@ static int axp20x_regulator_probe(struct platform_device *pdev)
 		/*
 		 * Save AXP22X DCDC1 / DCDC5 regulator names for later.
 		 */
-		if (regulators == axp22x_regulators) {
-			/* Can we use rdev->constraints->name instead? */
-			if (i == AXP22X_DCDC1)
-				of_property_read_string(rdev->dev.of_node,
-							"regulator-name",
-							&axp22x_dc1_name);
-			else if (i == AXP22X_DCDC5)
-				of_property_read_string(rdev->dev.of_node,
-							"regulator-name",
-							&axp22x_dc5_name);
+		if ((regulators == axp22x_regulators && i == AXP22X_DCDC1) ||
+		    (regulators == axp809_regulators && i == AXP809_DCDC1))
+			of_property_read_string(rdev->dev.of_node,
+						"regulator-name",
+						&dcdc1_name);
+
+		if ((regulators == axp22x_regulators && i == AXP22X_DCDC5) ||
+		    (regulators == axp809_regulators && i == AXP809_DCDC5))
+			of_property_read_string(rdev->dev.of_node,
+						"regulator-name",
+						&dcdc5_name);
+	}
+
+	if (drivevbus) {
+		/* Change N_VBUSEN sense pin to DRIVEVBUS output pin */
+		regmap_update_bits(axp20x->regmap, AXP20X_OVER_TMP,
+				   AXP22X_MISC_N_VBUSEN_FUNC, 0);
+		rdev = devm_regulator_register(&pdev->dev,
+					       &axp22x_drivevbus_regulator,
+					       &config);
+		if (IS_ERR(rdev)) {
+			dev_err(&pdev->dev, "Failed to register drivevbus\n");
+			return PTR_ERR(rdev);
 		}
 	}
 
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index ec8184d53f13..db320e8fa865 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2508,33 +2508,6 @@ int regulator_is_enabled(struct regulator *regulator)
 }
 EXPORT_SYMBOL_GPL(regulator_is_enabled);
 
-/**
- * regulator_can_change_voltage - check if regulator can change voltage
- * @regulator: regulator source
- *
- * Returns positive if the regulator driver backing the source/client
- * can change its voltage, false otherwise. Useful for detecting fixed
- * or dummy regulators and disabling voltage change logic in the client
- * driver.
- */
-int regulator_can_change_voltage(struct regulator *regulator)
-{
-	struct regulator_dev	*rdev = regulator->rdev;
-
-	if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
-		if (rdev->desc->n_voltages - rdev->desc->linear_min_sel > 1)
-			return 1;
-
-		if (rdev->desc->continuous_voltage_range &&
-		    rdev->constraints->min_uV && rdev->constraints->max_uV &&
-		    rdev->constraints->min_uV != rdev->constraints->max_uV)
-			return 1;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(regulator_can_change_voltage);
-
 /**
  * regulator_count_voltages - count regulator_list_voltage() selectors
  * @regulator: regulator source
diff --git a/drivers/regulator/da9052-regulator.c b/drivers/regulator/da9052-regulator.c
index 1050cb77561a..9ececfef42d6 100644
--- a/drivers/regulator/da9052-regulator.c
+++ b/drivers/regulator/da9052-regulator.c
@@ -333,7 +333,7 @@ static const struct regulator_ops da9052_ldo_ops = {
 static struct da9052_regulator_info da9052_regulator_info[] = {
 	DA9052_DCDC(BUCK1, 25, 500, 2075, 6, 6, DA9052_SUPPLY_VBCOREGO),
 	DA9052_DCDC(BUCK2, 25, 500, 2075, 6, 6, DA9052_SUPPLY_VBPROGO),
-	DA9052_DCDC(BUCK3, 25, 925, 2500, 6, 6, DA9052_SUPPLY_VBMEMGO),
+	DA9052_DCDC(BUCK3, 25, 950, 2525, 6, 6, DA9052_SUPPLY_VBMEMGO),
 	DA9052_DCDC(BUCK4, 50, 1800, 3600, 5, 6, 0),
 	DA9052_LDO(LDO1, 50, 600, 1800, 5, 6, 0),
 	DA9052_LDO(LDO2, 25, 600, 1800, 6, 6, DA9052_SUPPLY_VLDO2GO),
@@ -350,8 +350,8 @@ static struct da9052_regulator_info da9052_regulator_info[] = {
 static struct da9052_regulator_info da9053_regulator_info[] = {
 	DA9052_DCDC(BUCK1, 25, 500, 2075, 6, 6, DA9052_SUPPLY_VBCOREGO),
 	DA9052_DCDC(BUCK2, 25, 500, 2075, 6, 6, DA9052_SUPPLY_VBPROGO),
-	DA9052_DCDC(BUCK3, 25, 925, 2500, 6, 6, DA9052_SUPPLY_VBMEMGO),
-	DA9052_DCDC(BUCK4, 25, 925, 2500, 6, 6, 0),
+	DA9052_DCDC(BUCK3, 25, 950, 2525, 6, 6, DA9052_SUPPLY_VBMEMGO),
+	DA9052_DCDC(BUCK4, 25, 950, 2525, 6, 6, 0),
 	DA9052_LDO(LDO1, 50, 600, 1800, 5, 6, 0),
 	DA9052_LDO(LDO2, 25, 600, 1800, 6, 6, DA9052_SUPPLY_VLDO2GO),
 	DA9052_LDO(LDO3, 25, 1725, 3300, 6, 6, DA9052_SUPPLY_VLDO3GO),
diff --git a/drivers/regulator/da9210-regulator.c b/drivers/regulator/da9210-regulator.c
index 01c0e3709b66..d0496d6b0934 100644
--- a/drivers/regulator/da9210-regulator.c
+++ b/drivers/regulator/da9210-regulator.c
@@ -21,12 +21,11 @@
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/slab.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
+#include <linux/of_device.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regmap.h>
 
@@ -179,6 +178,13 @@ error_i2c:
 /*
  * I2C driver interface functions
  */
+
+static const struct of_device_id da9210_dt_ids[] = {
+	{ .compatible = "dlg,da9210", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, da9210_dt_ids);
+
 static int da9210_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
@@ -188,6 +194,16 @@ static int da9210_i2c_probe(struct i2c_client *i2c,
 	struct regulator_dev *rdev = NULL;
 	struct regulator_config config = { };
 	int error;
+	const struct of_device_id *match;
+
+	if (i2c->dev.of_node && !pdata) {
+		match = of_match_device(of_match_ptr(da9210_dt_ids),
+						&i2c->dev);
+		if (!match) {
+			dev_err(&i2c->dev, "Error: No device match found\n");
+			return -ENODEV;
+		}
+	}
 
 	chip = devm_kzalloc(&i2c->dev, sizeof(struct da9210), GFP_KERNEL);
 	if (!chip)
@@ -264,6 +280,7 @@ MODULE_DEVICE_TABLE(i2c, da9210_i2c_id);
 static struct i2c_driver da9210_regulator_driver = {
 	.driver = {
 		.name = "da9210",
+		.of_match_table = of_match_ptr(da9210_dt_ids),
 	},
 	.probe = da9210_i2c_probe,
 	.id_table = da9210_i2c_id,
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
index 236abf473db5..aa47280efd32 100644
--- a/drivers/regulator/da9211-regulator.c
+++ b/drivers/regulator/da9211-regulator.c
@@ -1,5 +1,6 @@
 /*
- * da9211-regulator.c - Regulator device driver for DA9211/DA9213/DA9215
+ * da9211-regulator.c - Regulator device driver for DA9211/DA9212
+ * /DA9213/DA9214/DA9215
  * Copyright (C) 2015  Dialog Semiconductor Ltd.
  *
  * This library is free software; you can redistribute it and/or
@@ -493,7 +494,9 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 
 static const struct i2c_device_id da9211_i2c_id[] = {
 	{"da9211", DA9211},
+	{"da9212", DA9212},
 	{"da9213", DA9213},
+	{"da9214", DA9214},
 	{"da9215", DA9215},
 	{},
 };
@@ -502,8 +505,10 @@ MODULE_DEVICE_TABLE(i2c, da9211_i2c_id);
 #ifdef CONFIG_OF
 static const struct of_device_id da9211_dt_ids[] = {
 	{ .compatible = "dlg,da9211", .data = &da9211_i2c_id[0] },
-	{ .compatible = "dlg,da9213", .data = &da9211_i2c_id[1] },
-	{ .compatible = "dlg,da9215", .data = &da9211_i2c_id[2] },
+	{ .compatible = "dlg,da9212", .data = &da9211_i2c_id[1] },
+	{ .compatible = "dlg,da9213", .data = &da9211_i2c_id[2] },
+	{ .compatible = "dlg,da9214", .data = &da9211_i2c_id[3] },
+	{ .compatible = "dlg,da9215", .data = &da9211_i2c_id[4] },
 	{},
 };
 MODULE_DEVICE_TABLE(of, da9211_dt_ids);
@@ -521,5 +526,5 @@ static struct i2c_driver da9211_regulator_driver = {
 module_i2c_driver(da9211_regulator_driver);
 
 MODULE_AUTHOR("James Ban <James.Ban.opensource@diasemi.com>");
-MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211/DA9213/DA9215");
+MODULE_DESCRIPTION("DA9211/DA9212/DA9213/DA9214/DA9215 regulator driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/da9211-regulator.h b/drivers/regulator/da9211-regulator.h
index d6ad96fc64d3..b841bbf330cc 100644
--- a/drivers/regulator/da9211-regulator.h
+++ b/drivers/regulator/da9211-regulator.h
@@ -1,5 +1,6 @@
 /*
- * da9211-regulator.h - Regulator definitions for DA9211/DA9213/DA9215
+ * da9211-regulator.h - Regulator definitions for DA9211/DA9212
+ * /DA9213/DA9214/DA9215
  * Copyright (C) 2015  Dialog Semiconductor Ltd.
  *
  * This program is free software; you can redistribute it and/or
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index ff62d69ba0be..988a7472c2ab 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -79,18 +79,8 @@ of_get_fixed_voltage_config(struct device *dev,
 		config->enabled_at_boot = true;
 
 	config->gpio = of_get_named_gpio(np, "gpio", 0);
-	/*
-	 * of_get_named_gpio() currently returns ENODEV rather than
-	 * EPROBE_DEFER. This code attempts to be compatible with both
-	 * for now; the ENODEV check can be removed once the API is fixed.
-	 * of_get_named_gpio() doesn't differentiate between a missing
-	 * property (which would be fine here, since the GPIO is optional)
-	 * and some other error. Patches have been posted for both issues.
-	 * Once they are check in, we should replace this with:
-	 * if (config->gpio < 0 && config->gpio != -ENOENT)
-	 */
-	if ((config->gpio == -ENODEV) || (config->gpio == -EPROBE_DEFER))
-		return ERR_PTR(-EPROBE_DEFER);
+	if ((config->gpio < 0) && (config->gpio != -ENOENT))
+		return ERR_PTR(config->gpio);
 
 	of_property_read_u32(np, "startup-delay-us", &config->startup_delay);
 
diff --git a/drivers/regulator/lp873x-regulator.c b/drivers/regulator/lp873x-regulator.c
index b4ffd113ba21..e504b9148226 100644
--- a/drivers/regulator/lp873x-regulator.c
+++ b/drivers/regulator/lp873x-regulator.c
@@ -20,7 +20,7 @@
 #include <linux/mfd/lp873x.h>
 
 #define LP873X_REGULATOR(_name, _id, _of, _ops, _n, _vr, _vm, _er, _em, \
-			 _delay, _lr, _nlr, _cr)			\
+			 _delay, _lr, _cr)				\
 	[_id] = {							\
 		.desc = {						\
 			.name			= _name,		\
@@ -37,7 +37,7 @@
 			.enable_mask		= _em,			\
 			.ramp_delay		= _delay,		\
 			.linear_ranges		= _lr,			\
-			.n_linear_ranges	= _nlr,			\
+			.n_linear_ranges	= ARRAY_SIZE(_lr),	\
 		},							\
 		.ctrl2_reg = _cr,					\
 	}
@@ -175,22 +175,20 @@ static const struct lp873x_regulator regulators[] = {
 			 256, LP873X_REG_BUCK0_VOUT,
 			 LP873X_BUCK0_VOUT_BUCK0_VSET, LP873X_REG_BUCK0_CTRL_1,
 			 LP873X_BUCK0_CTRL_1_BUCK0_EN, 10000,
-			 buck0_buck1_ranges, 4, LP873X_REG_BUCK0_CTRL_2),
+			 buck0_buck1_ranges, LP873X_REG_BUCK0_CTRL_2),
 	LP873X_REGULATOR("BUCK1", LP873X_BUCK_1, "buck1", lp873x_buck01_ops,
 			 256, LP873X_REG_BUCK1_VOUT,
 			 LP873X_BUCK1_VOUT_BUCK1_VSET, LP873X_REG_BUCK1_CTRL_1,
 			 LP873X_BUCK1_CTRL_1_BUCK1_EN, 10000,
-			 buck0_buck1_ranges, 4, LP873X_REG_BUCK1_CTRL_2),
+			 buck0_buck1_ranges, LP873X_REG_BUCK1_CTRL_2),
 	LP873X_REGULATOR("LDO0", LP873X_LDO_0, "ldo0", lp873x_ldo01_ops, 26,
 			 LP873X_REG_LDO0_VOUT, LP873X_LDO0_VOUT_LDO0_VSET,
 			 LP873X_REG_LDO0_CTRL,
-			 LP873X_LDO0_CTRL_LDO0_EN, 0, ldo0_ldo1_ranges, 1,
-			 0xFF),
+			 LP873X_LDO0_CTRL_LDO0_EN, 0, ldo0_ldo1_ranges, 0xFF),
 	LP873X_REGULATOR("LDO1", LP873X_LDO_1, "ldo1", lp873x_ldo01_ops, 26,
 			 LP873X_REG_LDO1_VOUT, LP873X_LDO1_VOUT_LDO1_VSET,
 			 LP873X_REG_LDO1_CTRL,
-			 LP873X_LDO1_CTRL_LDO1_EN, 0, ldo0_ldo1_ranges, 1,
-			 0xFF),
+			 LP873X_LDO1_CTRL_LDO1_EN, 0, ldo0_ldo1_ranges, 0xFF),
 };
 
 static int lp873x_regulator_probe(struct platform_device *pdev)
diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 321e804aeab0..a1b49a6d538f 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -123,6 +123,9 @@ static int max77620_regulator_set_fps_src(struct max77620_regulator *pmic,
 	unsigned int val;
 	int ret;
 
+	if (!rinfo)
+		return 0;
+
 	switch (fps_src) {
 	case MAX77620_FPS_SRC_0:
 	case MAX77620_FPS_SRC_1:
@@ -171,6 +174,9 @@ static int max77620_regulator_set_fps_slots(struct max77620_regulator *pmic,
 	int pd = rpdata->active_fps_pd_slot;
 	int ret = 0;
 
+	if (!rinfo)
+		return 0;
+
 	if (is_suspend) {
 		pu = rpdata->suspend_fps_pu_slot;
 		pd = rpdata->suspend_fps_pd_slot;
@@ -680,7 +686,6 @@ static struct max77620_regulator_info max77620_regs_info[MAX77620_NUM_REGS] = {
 	RAIL_SD(SD1, sd1, "in-sd1", SD1, 600000, 1550000, 12500, 0x22, SD1),
 	RAIL_SD(SD2, sd2, "in-sd2", SDX, 600000, 3787500, 12500, 0xFF, NONE),
 	RAIL_SD(SD3, sd3, "in-sd3", SDX, 600000, 3787500, 12500, 0xFF, NONE),
-	RAIL_SD(SD4, sd4, "in-sd4", SDX, 600000, 3787500, 12500, 0xFF, NONE),
 
 	RAIL_LDO(LDO0, ldo0, "in-ldo0-1", N, 800000, 2375000, 25000),
 	RAIL_LDO(LDO1, ldo1, "in-ldo0-1", N, 800000, 2375000, 25000),
diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c
index 08d2f13eca00..3958f50c5975 100644
--- a/drivers/regulator/max8973-regulator.c
+++ b/drivers/regulator/max8973-regulator.c
@@ -271,22 +271,18 @@ static int max8973_set_ramp_delay(struct regulator_dev *rdev,
 	struct max8973_chip *max = rdev_get_drvdata(rdev);
 	unsigned int control;
 	int ret;
-	int ret_val;
 
 	/* Set ramp delay */
-	if (ramp_delay < 25000) {
+	if (ramp_delay <= 12000)
 		control = MAX8973_RAMP_12mV_PER_US;
-		ret_val = 12000;
-	} else if (ramp_delay < 50000) {
+	else if (ramp_delay <= 25000)
 		control = MAX8973_RAMP_25mV_PER_US;
-		ret_val = 25000;
-	} else if (ramp_delay < 200000) {
+	else if (ramp_delay <= 50000)
 		control = MAX8973_RAMP_50mV_PER_US;
-		ret_val = 50000;
-	} else {
+	else if (ramp_delay <= 200000)
 		control = MAX8973_RAMP_200mV_PER_US;
-		ret_val = 200000;
-	}
+	else
+		return -EINVAL;
 
 	ret = regmap_update_bits(max->regmap, MAX8973_CONTROL1,
 			MAX8973_RAMP_MASK, control);
diff --git a/drivers/regulator/mt6323-regulator.c b/drivers/regulator/mt6323-regulator.c
new file mode 100644
index 000000000000..b7b9670f0979
--- /dev/null
+++ b/drivers/regulator/mt6323-regulator.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Chen Zhong <chen.zhong@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/mt6397/core.h>
+#include <linux/mfd/mt6323/registers.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/mt6323-regulator.h>
+#include <linux/regulator/of_regulator.h>
+
+#define MT6323_LDO_MODE_NORMAL	0
+#define MT6323_LDO_MODE_LP	1
+
+/*
+ * MT6323 regulators' information
+ *
+ * @desc: standard fields of regulator description.
+ * @qi: Mask for query enable signal status of regulators
+ * @vselon_reg: Register sections for hardware control mode of bucks
+ * @vselctrl_reg: Register for controlling the buck control mode.
+ * @vselctrl_mask: Mask for query buck's voltage control mode.
+ */
+struct mt6323_regulator_info {
+	struct regulator_desc desc;
+	u32 qi;
+	u32 vselon_reg;
+	u32 vselctrl_reg;
+	u32 vselctrl_mask;
+	u32 modeset_reg;
+	u32 modeset_mask;
+};
+
+#define MT6323_BUCK(match, vreg, min, max, step, volt_ranges, enreg,	\
+		vosel, vosel_mask, voselon, vosel_ctrl)			\
+[MT6323_ID_##vreg] = {							\
+	.desc = {							\
+		.name = #vreg,						\
+		.of_match = of_match_ptr(match),			\
+		.ops = &mt6323_volt_range_ops,				\
+		.type = REGULATOR_VOLTAGE,				\
+		.id = MT6323_ID_##vreg,					\
+		.owner = THIS_MODULE,					\
+		.n_voltages = (max - min)/step + 1,			\
+		.linear_ranges = volt_ranges,				\
+		.n_linear_ranges = ARRAY_SIZE(volt_ranges),		\
+		.vsel_reg = vosel,					\
+		.vsel_mask = vosel_mask,				\
+		.enable_reg = enreg,					\
+		.enable_mask = BIT(0),					\
+	},								\
+	.qi = BIT(13),							\
+	.vselon_reg = voselon,						\
+	.vselctrl_reg = vosel_ctrl,					\
+	.vselctrl_mask = BIT(1),					\
+}
+
+#define MT6323_LDO(match, vreg, ldo_volt_table, enreg, enbit, vosel,	\
+		vosel_mask, _modeset_reg, _modeset_mask)		\
+[MT6323_ID_##vreg] = {							\
+	.desc = {							\
+		.name = #vreg,						\
+		.of_match = of_match_ptr(match),			\
+		.ops = &mt6323_volt_table_ops,				\
+		.type = REGULATOR_VOLTAGE,				\
+		.id = MT6323_ID_##vreg,					\
+		.owner = THIS_MODULE,					\
+		.n_voltages = ARRAY_SIZE(ldo_volt_table),		\
+		.volt_table = ldo_volt_table,				\
+		.vsel_reg = vosel,					\
+		.vsel_mask = vosel_mask,				\
+		.enable_reg = enreg,					\
+		.enable_mask = BIT(enbit),				\
+	},								\
+	.qi = BIT(15),							\
+	.modeset_reg = _modeset_reg,					\
+	.modeset_mask = _modeset_mask,					\
+}
+
+#define MT6323_REG_FIXED(match, vreg, enreg, enbit, volt,		\
+		_modeset_reg, _modeset_mask)				\
+[MT6323_ID_##vreg] = {							\
+	.desc = {							\
+		.name = #vreg,						\
+		.of_match = of_match_ptr(match),			\
+		.ops = &mt6323_volt_fixed_ops,				\
+		.type = REGULATOR_VOLTAGE,				\
+		.id = MT6323_ID_##vreg,					\
+		.owner = THIS_MODULE,					\
+		.n_voltages = 1,					\
+		.enable_reg = enreg,					\
+		.enable_mask = BIT(enbit),				\
+		.min_uV = volt,						\
+	},								\
+	.qi = BIT(15),							\
+	.modeset_reg = _modeset_reg,					\
+	.modeset_mask = _modeset_mask,					\
+}
+
+static const struct regulator_linear_range buck_volt_range1[] = {
+	REGULATOR_LINEAR_RANGE(700000, 0, 0x7f, 6250),
+};
+
+static const struct regulator_linear_range buck_volt_range2[] = {
+	REGULATOR_LINEAR_RANGE(1400000, 0, 0x7f, 12500),
+};
+
+static const struct regulator_linear_range buck_volt_range3[] = {
+	REGULATOR_LINEAR_RANGE(500000, 0, 0x3f, 50000),
+};
+
+static const u32 ldo_volt_table1[] = {
+	3300000, 3400000, 3500000, 3600000,
+};
+
+static const u32 ldo_volt_table2[] = {
+	1500000, 1800000, 2500000, 2800000,
+};
+
+static const u32 ldo_volt_table3[] = {
+	1800000, 3300000,
+};
+
+static const u32 ldo_volt_table4[] = {
+	3000000, 3300000,
+};
+
+static const u32 ldo_volt_table5[] = {
+	1200000, 1300000, 1500000, 1800000, 2000000, 2800000, 3000000, 3300000,
+};
+
+static const u32 ldo_volt_table6[] = {
+	1200000, 1300000, 1500000, 1800000, 2500000, 2800000, 3000000, 2000000,
+};
+
+static const u32 ldo_volt_table7[] = {
+	1200000, 1300000, 1500000, 1800000,
+};
+
+static const u32 ldo_volt_table8[] = {
+	1800000, 3000000,
+};
+
+static const u32 ldo_volt_table9[] = {
+	1200000, 1350000, 1500000, 1800000,
+};
+
+static const u32 ldo_volt_table10[] = {
+	1200000, 1300000, 1500000, 1800000,
+};
+
+static int mt6323_get_status(struct regulator_dev *rdev)
+{
+	int ret;
+	u32 regval;
+	struct mt6323_regulator_info *info = rdev_get_drvdata(rdev);
+
+	ret = regmap_read(rdev->regmap, info->desc.enable_reg, &regval);
+	if (ret != 0) {
+		dev_err(&rdev->dev, "Failed to get enable reg: %d\n", ret);
+		return ret;
+	}
+
+	return (regval & info->qi) ? REGULATOR_STATUS_ON : REGULATOR_STATUS_OFF;
+}
+
+static int mt6323_ldo_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	int ret, val = 0;
+	struct mt6323_regulator_info *info = rdev_get_drvdata(rdev);
+
+	if (!info->modeset_mask) {
+		dev_err(&rdev->dev, "regulator %s doesn't support set_mode\n",
+			info->desc.name);
+		return -EINVAL;
+	}
+
+	switch (mode) {
+	case REGULATOR_MODE_STANDBY:
+		val = MT6323_LDO_MODE_LP;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = MT6323_LDO_MODE_NORMAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	val <<= ffs(info->modeset_mask) - 1;
+
+	ret = regmap_update_bits(rdev->regmap, info->modeset_reg,
+				  info->modeset_mask, val);
+
+	return ret;
+}
+
+static unsigned int mt6323_ldo_get_mode(struct regulator_dev *rdev)
+{
+	unsigned int val;
+	unsigned int mode;
+	int ret;
+	struct mt6323_regulator_info *info = rdev_get_drvdata(rdev);
+
+	if (!info->modeset_mask) {
+		dev_err(&rdev->dev, "regulator %s doesn't support get_mode\n",
+			info->desc.name);
+		return -EINVAL;
+	}
+
+	ret = regmap_read(rdev->regmap, info->modeset_reg, &val);
+	if (ret < 0)
+		return ret;
+
+	val &= info->modeset_mask;
+	val >>= ffs(info->modeset_mask) - 1;
+
+	if (val & 0x1)
+		mode = REGULATOR_MODE_STANDBY;
+	else
+		mode = REGULATOR_MODE_NORMAL;
+
+	return mode;
+}
+
+static const struct regulator_ops mt6323_volt_range_ops = {
+	.list_voltage = regulator_list_voltage_linear_range,
+	.map_voltage = regulator_map_voltage_linear_range,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6323_get_status,
+};
+
+static const struct regulator_ops mt6323_volt_table_ops = {
+	.list_voltage = regulator_list_voltage_table,
+	.map_voltage = regulator_map_voltage_iterate,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6323_get_status,
+	.set_mode = mt6323_ldo_set_mode,
+	.get_mode = mt6323_ldo_get_mode,
+};
+
+static const struct regulator_ops mt6323_volt_fixed_ops = {
+	.list_voltage = regulator_list_voltage_linear,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6323_get_status,
+	.set_mode = mt6323_ldo_set_mode,
+	.get_mode = mt6323_ldo_get_mode,
+};
+
+/* The array is indexed by id(MT6323_ID_XXX) */
+static struct mt6323_regulator_info mt6323_regulators[] = {
+	MT6323_BUCK("buck_vproc", VPROC, 700000, 1493750, 6250,
+		buck_volt_range1, MT6323_VPROC_CON7, MT6323_VPROC_CON9, 0x7f,
+		MT6323_VPROC_CON10, MT6323_VPROC_CON5),
+	MT6323_BUCK("buck_vsys", VSYS, 1400000, 2987500, 12500,
+		buck_volt_range2, MT6323_VSYS_CON7, MT6323_VSYS_CON9, 0x7f,
+		MT6323_VSYS_CON10, MT6323_VSYS_CON5),
+	MT6323_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
+		buck_volt_range3, MT6323_VPA_CON7, MT6323_VPA_CON9,
+		0x3f, MT6323_VPA_CON10, MT6323_VPA_CON5),
+	MT6323_REG_FIXED("ldo_vtcxo", VTCXO, MT6323_ANALDO_CON1, 10, 2800000,
+		MT6323_ANALDO_CON1, 0x2),
+	MT6323_REG_FIXED("ldo_vcn28", VCN28, MT6323_ANALDO_CON19, 12, 2800000,
+		MT6323_ANALDO_CON20, 0x2),
+	MT6323_LDO("ldo_vcn33_bt", VCN33_BT, ldo_volt_table1,
+		MT6323_ANALDO_CON16, 7, MT6323_ANALDO_CON16, 0xC,
+		MT6323_ANALDO_CON21, 0x2),
+	MT6323_LDO("ldo_vcn33_wifi", VCN33_WIFI, ldo_volt_table1,
+		MT6323_ANALDO_CON17, 12, MT6323_ANALDO_CON16, 0xC,
+		MT6323_ANALDO_CON21, 0x2),
+	MT6323_REG_FIXED("ldo_va", VA, MT6323_ANALDO_CON2, 14, 2800000,
+		MT6323_ANALDO_CON2, 0x2),
+	MT6323_LDO("ldo_vcama", VCAMA, ldo_volt_table2,
+		MT6323_ANALDO_CON4, 15, MT6323_ANALDO_CON10, 0x60, -1, 0),
+	MT6323_REG_FIXED("ldo_vio28", VIO28, MT6323_DIGLDO_CON0, 14, 2800000,
+		MT6323_DIGLDO_CON0, 0x2),
+	MT6323_REG_FIXED("ldo_vusb", VUSB, MT6323_DIGLDO_CON2, 14, 3300000,
+		MT6323_DIGLDO_CON2, 0x2),
+	MT6323_LDO("ldo_vmc", VMC, ldo_volt_table3,
+		MT6323_DIGLDO_CON3, 12, MT6323_DIGLDO_CON24, 0x10,
+		MT6323_DIGLDO_CON3, 0x2),
+	MT6323_LDO("ldo_vmch", VMCH, ldo_volt_table4,
+		MT6323_DIGLDO_CON5, 14, MT6323_DIGLDO_CON26, 0x80,
+		MT6323_DIGLDO_CON5, 0x2),
+	MT6323_LDO("ldo_vemc3v3", VEMC3V3, ldo_volt_table4,
+		MT6323_DIGLDO_CON6, 14, MT6323_DIGLDO_CON27, 0x80,
+		MT6323_DIGLDO_CON6, 0x2),
+	MT6323_LDO("ldo_vgp1", VGP1, ldo_volt_table5,
+		MT6323_DIGLDO_CON7, 15, MT6323_DIGLDO_CON28, 0xE0,
+		MT6323_DIGLDO_CON7, 0x2),
+	MT6323_LDO("ldo_vgp2", VGP2, ldo_volt_table6,
+		MT6323_DIGLDO_CON8, 15, MT6323_DIGLDO_CON29, 0xE0,
+		MT6323_DIGLDO_CON8, 0x2),
+	MT6323_LDO("ldo_vgp3", VGP3, ldo_volt_table7,
+		MT6323_DIGLDO_CON9, 15, MT6323_DIGLDO_CON30, 0x60,
+		MT6323_DIGLDO_CON9, 0x2),
+	MT6323_REG_FIXED("ldo_vcn18", VCN18, MT6323_DIGLDO_CON11, 14, 1800000,
+		MT6323_DIGLDO_CON11, 0x2),
+	MT6323_LDO("ldo_vsim1", VSIM1, ldo_volt_table8,
+		MT6323_DIGLDO_CON13, 15, MT6323_DIGLDO_CON34, 0x20,
+		MT6323_DIGLDO_CON13, 0x2),
+	MT6323_LDO("ldo_vsim2", VSIM2, ldo_volt_table8,
+		MT6323_DIGLDO_CON14, 15, MT6323_DIGLDO_CON35, 0x20,
+		MT6323_DIGLDO_CON14, 0x2),
+	MT6323_REG_FIXED("ldo_vrtc", VRTC, MT6323_DIGLDO_CON15, 8, 2800000,
+		-1, 0),
+	MT6323_LDO("ldo_vcamaf", VCAMAF, ldo_volt_table5,
+		MT6323_DIGLDO_CON31, 15, MT6323_DIGLDO_CON32, 0xE0,
+		MT6323_DIGLDO_CON31, 0x2),
+	MT6323_LDO("ldo_vibr", VIBR, ldo_volt_table5,
+		MT6323_DIGLDO_CON39, 15, MT6323_DIGLDO_CON40, 0xE0,
+		MT6323_DIGLDO_CON39, 0x2),
+	MT6323_REG_FIXED("ldo_vrf18", VRF18, MT6323_DIGLDO_CON45, 15, 1825000,
+		MT6323_DIGLDO_CON45, 0x2),
+	MT6323_LDO("ldo_vm", VM, ldo_volt_table9,
+		MT6323_DIGLDO_CON47, 14, MT6323_DIGLDO_CON48, 0x30,
+		MT6323_DIGLDO_CON47, 0x2),
+	MT6323_REG_FIXED("ldo_vio18", VIO18, MT6323_DIGLDO_CON49, 14, 1800000,
+		MT6323_DIGLDO_CON49, 0x2),
+	MT6323_LDO("ldo_vcamd", VCAMD, ldo_volt_table10,
+		MT6323_DIGLDO_CON51, 14, MT6323_DIGLDO_CON52, 0x60,
+		MT6323_DIGLDO_CON51, 0x2),
+	MT6323_REG_FIXED("ldo_vcamio", VCAMIO, MT6323_DIGLDO_CON53, 14, 1800000,
+		MT6323_DIGLDO_CON53, 0x2),
+};
+
+static int mt6323_set_buck_vosel_reg(struct platform_device *pdev)
+{
+	struct mt6397_chip *mt6323 = dev_get_drvdata(pdev->dev.parent);
+	int i;
+	u32 regval;
+
+	for (i = 0; i < MT6323_MAX_REGULATOR; i++) {
+		if (mt6323_regulators[i].vselctrl_reg) {
+			if (regmap_read(mt6323->regmap,
+				mt6323_regulators[i].vselctrl_reg,
+				&regval) < 0) {
+				dev_err(&pdev->dev,
+					"Failed to read buck ctrl\n");
+				return -EIO;
+			}
+
+			if (regval & mt6323_regulators[i].vselctrl_mask) {
+				mt6323_regulators[i].desc.vsel_reg =
+				mt6323_regulators[i].vselon_reg;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int mt6323_regulator_probe(struct platform_device *pdev)
+{
+	struct mt6397_chip *mt6323 = dev_get_drvdata(pdev->dev.parent);
+	struct regulator_config config = {};
+	struct regulator_dev *rdev;
+	int i;
+	u32 reg_value;
+
+	/* Query buck controller to select activated voltage register part */
+	if (mt6323_set_buck_vosel_reg(pdev))
+		return -EIO;
+
+	/* Read PMIC chip revision to update constraints and voltage table */
+	if (regmap_read(mt6323->regmap, MT6323_CID, &reg_value) < 0) {
+		dev_err(&pdev->dev, "Failed to read Chip ID\n");
+		return -EIO;
+	}
+	dev_info(&pdev->dev, "Chip ID = 0x%x\n", reg_value);
+
+	for (i = 0; i < MT6323_MAX_REGULATOR; i++) {
+		config.dev = &pdev->dev;
+		config.driver_data = &mt6323_regulators[i];
+		config.regmap = mt6323->regmap;
+		rdev = devm_regulator_register(&pdev->dev,
+				&mt6323_regulators[i].desc, &config);
+		if (IS_ERR(rdev)) {
+			dev_err(&pdev->dev, "failed to register %s\n",
+				mt6323_regulators[i].desc.name);
+			return PTR_ERR(rdev);
+		}
+	}
+	return 0;
+}
+
+static const struct platform_device_id mt6323_platform_ids[] = {
+	{"mt6323-regulator", 0},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(platform, mt6323_platform_ids);
+
+static struct platform_driver mt6323_regulator_driver = {
+	.driver = {
+		.name = "mt6323-regulator",
+	},
+	.probe = mt6323_regulator_probe,
+	.id_table = mt6323_platform_ids,
+};
+
+module_platform_driver(mt6323_regulator_driver);
+
+MODULE_AUTHOR("Chen Zhong <chen.zhong@mediatek.com>");
+MODULE_DESCRIPTION("Regulator Driver for MediaTek MT6323 PMIC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/mt6397-regulator.c b/drivers/regulator/mt6397-regulator.c
index 17a5b6c2d6a9..c6c6aa85e4e8 100644
--- a/drivers/regulator/mt6397-regulator.c
+++ b/drivers/regulator/mt6397-regulator.c
@@ -23,6 +23,9 @@
 #include <linux/regulator/mt6397-regulator.h>
 #include <linux/regulator/of_regulator.h>
 
+#define MT6397_BUCK_MODE_AUTO	0
+#define MT6397_BUCK_MODE_FORCE_PWM	1
+
 /*
  * MT6397 regulators' information
  *
@@ -38,10 +41,14 @@ struct mt6397_regulator_info {
 	u32 vselon_reg;
 	u32 vselctrl_reg;
 	u32 vselctrl_mask;
+	u32 modeset_reg;
+	u32 modeset_mask;
+	u32 modeset_shift;
 };
 
 #define MT6397_BUCK(match, vreg, min, max, step, volt_ranges, enreg,	\
-		vosel, vosel_mask, voselon, vosel_ctrl)			\
+		vosel, vosel_mask, voselon, vosel_ctrl, _modeset_reg,	\
+		_modeset_shift)					\
 [MT6397_ID_##vreg] = {							\
 	.desc = {							\
 		.name = #vreg,						\
@@ -62,6 +69,9 @@ struct mt6397_regulator_info {
 	.vselon_reg = voselon,						\
 	.vselctrl_reg = vosel_ctrl,					\
 	.vselctrl_mask = BIT(1),					\
+	.modeset_reg = _modeset_reg,					\
+	.modeset_mask = BIT(_modeset_shift),				\
+	.modeset_shift = _modeset_shift					\
 }
 
 #define MT6397_LDO(match, vreg, ldo_volt_table, enreg, enbit, vosel,	\
@@ -145,6 +155,63 @@ static const u32 ldo_volt_table7[] = {
 	1300000, 1500000, 1800000, 2000000, 2500000, 2800000, 3000000, 3300000,
 };
 
+static int mt6397_regulator_set_mode(struct regulator_dev *rdev,
+				     unsigned int mode)
+{
+	struct mt6397_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret, val;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = MT6397_BUCK_MODE_FORCE_PWM;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = MT6397_BUCK_MODE_AUTO;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err_mode;
+	}
+
+	dev_dbg(&rdev->dev, "mt6397 buck set_mode %#x, %#x, %#x, %#x\n",
+		info->modeset_reg, info->modeset_mask,
+		info->modeset_shift, val);
+
+	val <<= info->modeset_shift;
+	ret = regmap_update_bits(rdev->regmap, info->modeset_reg,
+				 info->modeset_mask, val);
+err_mode:
+	if (ret != 0) {
+		dev_err(&rdev->dev,
+			"Failed to set mt6397 buck mode: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static unsigned int mt6397_regulator_get_mode(struct regulator_dev *rdev)
+{
+	struct mt6397_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret, regval;
+
+	ret = regmap_read(rdev->regmap, info->modeset_reg, &regval);
+	if (ret != 0) {
+		dev_err(&rdev->dev,
+			"Failed to get mt6397 buck mode: %d\n", ret);
+		return ret;
+	}
+
+	switch ((regval & info->modeset_mask) >> info->modeset_shift) {
+	case MT6397_BUCK_MODE_AUTO:
+		return REGULATOR_MODE_NORMAL;
+	case MT6397_BUCK_MODE_FORCE_PWM:
+		return REGULATOR_MODE_FAST;
+	default:
+		return -EINVAL;
+	}
+}
+
 static int mt6397_get_status(struct regulator_dev *rdev)
 {
 	int ret;
@@ -160,7 +227,7 @@ static int mt6397_get_status(struct regulator_dev *rdev)
 	return (regval & info->qi) ? REGULATOR_STATUS_ON : REGULATOR_STATUS_OFF;
 }
 
-static struct regulator_ops mt6397_volt_range_ops = {
+static const struct regulator_ops mt6397_volt_range_ops = {
 	.list_voltage = regulator_list_voltage_linear_range,
 	.map_voltage = regulator_map_voltage_linear_range,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
@@ -170,9 +237,11 @@ static struct regulator_ops mt6397_volt_range_ops = {
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
 	.get_status = mt6397_get_status,
+	.set_mode = mt6397_regulator_set_mode,
+	.get_mode = mt6397_regulator_get_mode,
 };
 
-static struct regulator_ops mt6397_volt_table_ops = {
+static const struct regulator_ops mt6397_volt_table_ops = {
 	.list_voltage = regulator_list_voltage_table,
 	.map_voltage = regulator_map_voltage_iterate,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
@@ -184,7 +253,7 @@ static struct regulator_ops mt6397_volt_table_ops = {
 	.get_status = mt6397_get_status,
 };
 
-static struct regulator_ops mt6397_volt_fixed_ops = {
+static const struct regulator_ops mt6397_volt_fixed_ops = {
 	.list_voltage = regulator_list_voltage_linear,
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -196,28 +265,30 @@ static struct regulator_ops mt6397_volt_fixed_ops = {
 static struct mt6397_regulator_info mt6397_regulators[] = {
 	MT6397_BUCK("buck_vpca15", VPCA15, 700000, 1493750, 6250,
 		buck_volt_range1, MT6397_VCA15_CON7, MT6397_VCA15_CON9, 0x7f,
-		MT6397_VCA15_CON10, MT6397_VCA15_CON5),
+		MT6397_VCA15_CON10, MT6397_VCA15_CON5, MT6397_VCA15_CON2, 11),
 	MT6397_BUCK("buck_vpca7", VPCA7, 700000, 1493750, 6250,
 		buck_volt_range1, MT6397_VPCA7_CON7, MT6397_VPCA7_CON9, 0x7f,
-		MT6397_VPCA7_CON10, MT6397_VPCA7_CON5),
+		MT6397_VPCA7_CON10, MT6397_VPCA7_CON5, MT6397_VPCA7_CON2, 8),
 	MT6397_BUCK("buck_vsramca15", VSRAMCA15, 700000, 1493750, 6250,
 		buck_volt_range1, MT6397_VSRMCA15_CON7, MT6397_VSRMCA15_CON9,
-		0x7f, MT6397_VSRMCA15_CON10, MT6397_VSRMCA15_CON5),
+		0x7f, MT6397_VSRMCA15_CON10, MT6397_VSRMCA15_CON5,
+		MT6397_VSRMCA15_CON2, 8),
 	MT6397_BUCK("buck_vsramca7", VSRAMCA7, 700000, 1493750, 6250,
 		buck_volt_range1, MT6397_VSRMCA7_CON7, MT6397_VSRMCA7_CON9,
-		0x7f, MT6397_VSRMCA7_CON10, MT6397_VSRMCA7_CON5),
+		0x7f, MT6397_VSRMCA7_CON10, MT6397_VSRMCA7_CON5,
+		MT6397_VSRMCA7_CON2, 8),
 	MT6397_BUCK("buck_vcore", VCORE, 700000, 1493750, 6250,
 		buck_volt_range1, MT6397_VCORE_CON7, MT6397_VCORE_CON9, 0x7f,
-		MT6397_VCORE_CON10, MT6397_VCORE_CON5),
+		MT6397_VCORE_CON10, MT6397_VCORE_CON5, MT6397_VCORE_CON2, 8),
 	MT6397_BUCK("buck_vgpu", VGPU, 700000, 1493750, 6250, buck_volt_range1,
 		MT6397_VGPU_CON7, MT6397_VGPU_CON9, 0x7f,
-		MT6397_VGPU_CON10, MT6397_VGPU_CON5),
+		MT6397_VGPU_CON10, MT6397_VGPU_CON5, MT6397_VGPU_CON2, 8),
 	MT6397_BUCK("buck_vdrm", VDRM, 800000, 1593750, 6250, buck_volt_range2,
 		MT6397_VDRM_CON7, MT6397_VDRM_CON9, 0x7f,
-		MT6397_VDRM_CON10, MT6397_VDRM_CON5),
+		MT6397_VDRM_CON10, MT6397_VDRM_CON5, MT6397_VDRM_CON2, 8),
 	MT6397_BUCK("buck_vio18", VIO18, 1500000, 2120000, 20000,
 		buck_volt_range3, MT6397_VIO18_CON7, MT6397_VIO18_CON9, 0x1f,
-		MT6397_VIO18_CON10, MT6397_VIO18_CON5),
+		MT6397_VIO18_CON10, MT6397_VIO18_CON5, MT6397_VIO18_CON2, 8),
 	MT6397_REG_FIXED("ldo_vtcxo", VTCXO, MT6397_ANALDO_CON0, 10, 2800000),
 	MT6397_REG_FIXED("ldo_va28", VA28, MT6397_ANALDO_CON1, 14, 2800000),
 	MT6397_LDO("ldo_vcama", VCAMA, ldo_volt_table1,
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index cd828dbf9d52..4f613ec99500 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -163,6 +163,9 @@ static void of_get_regulation_constraints(struct device_node *np,
 					"regulator-suspend-microvolt", &pval))
 			suspend_state->uV = pval;
 
+		if (i == PM_SUSPEND_MEM)
+			constraints->initial_state = PM_SUSPEND_MEM;
+
 		of_node_put(suspend_np);
 		suspend_state = NULL;
 		suspend_np = NULL;
diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c
index 2a44e5dd9c2a..cb18b5c4f2db 100644
--- a/drivers/regulator/pfuze100-regulator.c
+++ b/drivers/regulator/pfuze100-regulator.c
@@ -70,6 +70,7 @@ struct pfuze_chip {
 	struct device *dev;
 	struct pfuze_regulator regulator_descs[PFUZE100_MAX_REGULATOR];
 	struct regulator_dev *regulators[PFUZE100_MAX_REGULATOR];
+	struct pfuze_regulator *pfuze_regulators;
 };
 
 static const int pfuze100_swbst[] = {
@@ -334,8 +335,6 @@ static struct pfuze_regulator pfuze3000_regulators[] = {
 	PFUZE100_VGEN_REG(PFUZE3000, VLDO4, PFUZE100_VGEN6VOL, 1800000, 3300000, 100000),
 };
 
-static struct pfuze_regulator *pfuze_regulators;
-
 #ifdef CONFIG_OF
 /* PFUZE100 */
 static struct of_regulator_match pfuze100_matches[] = {
@@ -563,21 +562,21 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 	/* use the right regulators after identify the right device */
 	switch (pfuze_chip->chip_id) {
 	case PFUZE3000:
-		pfuze_regulators = pfuze3000_regulators;
+		pfuze_chip->pfuze_regulators = pfuze3000_regulators;
 		regulator_num = ARRAY_SIZE(pfuze3000_regulators);
 		sw_check_start = PFUZE3000_SW2;
 		sw_check_end = PFUZE3000_SW2;
 		sw_hi = 1 << 3;
 		break;
 	case PFUZE200:
-		pfuze_regulators = pfuze200_regulators;
+		pfuze_chip->pfuze_regulators = pfuze200_regulators;
 		regulator_num = ARRAY_SIZE(pfuze200_regulators);
 		sw_check_start = PFUZE200_SW2;
 		sw_check_end = PFUZE200_SW3B;
 		break;
 	case PFUZE100:
 	default:
-		pfuze_regulators = pfuze100_regulators;
+		pfuze_chip->pfuze_regulators = pfuze100_regulators;
 		regulator_num = ARRAY_SIZE(pfuze100_regulators);
 		sw_check_start = PFUZE100_SW2;
 		sw_check_end = PFUZE100_SW4;
@@ -587,7 +586,7 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 		(pfuze_chip->chip_id == PFUZE100) ? "100" :
 		((pfuze_chip->chip_id == PFUZE200) ? "200" : "3000"));
 
-	memcpy(pfuze_chip->regulator_descs, pfuze_regulators,
+	memcpy(pfuze_chip->regulator_descs, pfuze_chip->pfuze_regulators,
 		sizeof(pfuze_chip->regulator_descs));
 
 	ret = pfuze_parse_regulators_dt(pfuze_chip);
@@ -631,7 +630,7 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 			devm_regulator_register(&client->dev, desc, &config);
 		if (IS_ERR(pfuze_chip->regulators[i])) {
 			dev_err(&client->dev, "register regulator%s failed\n",
-				pfuze_regulators[i].desc.name);
+				pfuze_chip->pfuze_regulators[i].desc.name);
 			return PTR_ERR(pfuze_chip->regulators[i]);
 		}
 	}
@@ -650,5 +649,5 @@ static struct i2c_driver pfuze_driver = {
 module_i2c_driver(pfuze_driver);
 
 MODULE_AUTHOR("Robin Gong <b38343@freescale.com>");
-MODULE_DESCRIPTION("Regulator Driver for Freescale PFUZE100/PFUZE200 PMIC");
+MODULE_DESCRIPTION("Regulator Driver for Freescale PFUZE100/200/3000 PMIC");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/pv88060-regulator.c b/drivers/regulator/pv88060-regulator.c
index c448b727f5f8..6c4afc73ecac 100644
--- a/drivers/regulator/pv88060-regulator.c
+++ b/drivers/regulator/pv88060-regulator.c
@@ -14,7 +14,6 @@
  */
 
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -25,8 +24,6 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/regulator/of_regulator.h>
-#include <linux/proc_fs.h>
-#include <linux/uaccess.h>
 #include "pv88060-regulator.h"
 
 #define PV88060_MAX_REGULATORS	14
diff --git a/drivers/regulator/pv88080-regulator.c b/drivers/regulator/pv88080-regulator.c
index d7107566c429..81950bdb1cc4 100644
--- a/drivers/regulator/pv88080-regulator.c
+++ b/drivers/regulator/pv88080-regulator.c
@@ -14,7 +14,6 @@
  */
 
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -25,8 +24,6 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/regulator/of_regulator.h>
-#include <linux/proc_fs.h>
-#include <linux/uaccess.h>
 #include "pv88080-regulator.h"
 
 #define PV88080_MAX_REGULATORS	3
diff --git a/drivers/regulator/pv88090-regulator.c b/drivers/regulator/pv88090-regulator.c
index 0057c6740d6f..421641175352 100644
--- a/drivers/regulator/pv88090-regulator.c
+++ b/drivers/regulator/pv88090-regulator.c
@@ -14,7 +14,6 @@
  */
 
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -25,8 +24,6 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/regulator/of_regulator.h>
-#include <linux/proc_fs.h>
-#include <linux/uaccess.h>
 #include "pv88090-regulator.h"
 
 #define PV88090_MAX_REGULATORS	5
diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c
index fafa3488e960..666bc3bb52ef 100644
--- a/drivers/regulator/pwm-regulator.c
+++ b/drivers/regulator/pwm-regulator.c
@@ -20,6 +20,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/pwm.h>
+#include <linux/gpio/consumer.h>
 
 struct pwm_regulator_data {
 	/*  Shared */
@@ -38,6 +39,9 @@ struct pwm_regulator_data {
 
 	/* Continuous voltage */
 	int volt_uV;
+
+	/* Enable GPIO */
+	struct gpio_desc *enb_gpio;
 };
 
 struct pwm_voltages {
@@ -94,6 +98,9 @@ static int pwm_regulator_enable(struct regulator_dev *dev)
 {
 	struct pwm_regulator_data *drvdata = rdev_get_drvdata(dev);
 
+	if (drvdata->enb_gpio)
+		gpiod_set_value_cansleep(drvdata->enb_gpio, 1);
+
 	return pwm_enable(drvdata->pwm);
 }
 
@@ -103,6 +110,9 @@ static int pwm_regulator_disable(struct regulator_dev *dev)
 
 	pwm_disable(drvdata->pwm);
 
+	if (drvdata->enb_gpio)
+		gpiod_set_value_cansleep(drvdata->enb_gpio, 0);
+
 	return 0;
 }
 
@@ -110,6 +120,9 @@ static int pwm_regulator_is_enabled(struct regulator_dev *dev)
 {
 	struct pwm_regulator_data *drvdata = rdev_get_drvdata(dev);
 
+	if (drvdata->enb_gpio && !gpiod_get_value_cansleep(drvdata->enb_gpio))
+		return false;
+
 	return pwm_is_enabled(drvdata->pwm);
 }
 
@@ -132,6 +145,7 @@ static int pwm_regulator_set_voltage(struct regulator_dev *rdev,
 	unsigned int duty_pulse;
 	u64 req_period;
 	u32 rem;
+	int old_uV = pwm_regulator_get_voltage(rdev);
 	int ret;
 
 	pwm_get_args(drvdata->pwm, &pargs);
@@ -159,15 +173,14 @@ static int pwm_regulator_set_voltage(struct regulator_dev *rdev,
 		return ret;
 	}
 
-	ret = pwm_enable(drvdata->pwm);
-	if (ret) {
-		dev_err(&rdev->dev, "Failed to enable PWM: %d\n", ret);
-		return ret;
-	}
 	drvdata->volt_uV = min_uV;
 
-	/* Delay required by PWM regulator to settle to the new voltage */
-	usleep_range(ramp_delay, ramp_delay + 1000);
+	if ((ramp_delay == 0) || !pwm_regulator_is_enabled(rdev))
+		return 0;
+
+	/* Ramp delay is in uV/uS. Adjust to uS and delay */
+	ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay);
+	usleep_range(ramp_delay, ramp_delay + DIV_ROUND_UP(ramp_delay, 10));
 
 	return 0;
 }
@@ -253,6 +266,7 @@ static int pwm_regulator_probe(struct platform_device *pdev)
 	struct regulator_dev *regulator;
 	struct regulator_config config = { };
 	struct device_node *np = pdev->dev.of_node;
+	enum gpiod_flags gpio_flags;
 	int ret;
 
 	if (!np) {
@@ -290,6 +304,18 @@ static int pwm_regulator_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	if (init_data->constraints.boot_on || init_data->constraints.always_on)
+		gpio_flags = GPIOD_OUT_HIGH;
+	else
+		gpio_flags = GPIOD_OUT_LOW;
+	drvdata->enb_gpio = devm_gpiod_get_optional(&pdev->dev, "enable",
+						    gpio_flags);
+	if (IS_ERR(drvdata->enb_gpio)) {
+		ret = PTR_ERR(drvdata->enb_gpio);
+		dev_err(&pdev->dev, "Failed to get enable GPIO: %d\n", ret);
+		return ret;
+	}
+
 	/*
 	 * FIXME: pwm_apply_args() should be removed when switching to the
 	 * atomic PWM API.
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 526bf23dcb49..5022fa8d10c6 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -152,7 +152,6 @@ static const struct regulator_ops rpm_smps_ldo_ops_fixed = {
 	.enable = rpm_reg_enable,
 	.disable = rpm_reg_disable,
 	.is_enabled = rpm_reg_is_enabled,
-	.list_voltage = regulator_list_voltage_linear_range,
 
 	.get_voltage = rpm_reg_get_voltage,
 	.set_voltage = rpm_reg_set_voltage,
@@ -212,7 +211,7 @@ static const struct regulator_desc pma8084_switch = {
 static const struct regulator_desc pm8x41_hfsmps = {
 	.linear_ranges = (struct regulator_linear_range[]) {
 		REGULATOR_LINEAR_RANGE( 375000,  0,  95, 12500),
-		REGULATOR_LINEAR_RANGE(1550000, 96, 158, 25000),
+		REGULATOR_LINEAR_RANGE(1575000, 96, 158, 25000),
 	},
 	.n_linear_ranges = 2,
 	.n_voltages = 159,
diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c
index 84cce21e98cd..16c5f84e06a7 100644
--- a/drivers/regulator/qcom_spmi-regulator.c
+++ b/drivers/regulator/qcom_spmi-regulator.c
@@ -1085,6 +1085,8 @@ static struct regulator_ops spmi_vs_ops = {
 	.set_pull_down		= spmi_regulator_common_set_pull_down,
 	.set_soft_start		= spmi_regulator_common_set_soft_start,
 	.set_over_current_protection = spmi_regulator_vs_ocp,
+	.set_mode		= spmi_regulator_common_set_mode,
+	.get_mode		= spmi_regulator_common_get_mode,
 };
 
 static struct regulator_ops spmi_boost_ops = {
@@ -1496,6 +1498,7 @@ static const struct spmi_regulator_data pm8941_regulators[] = {
 	{ "s1", 0x1400, "vdd_s1", },
 	{ "s2", 0x1700, "vdd_s2", },
 	{ "s3", 0x1a00, "vdd_s3", },
+	{ "s4", 0xa000, },
 	{ "l1", 0x4000, "vdd_l1_l3", },
 	{ "l2", 0x4100, "vdd_l2_lvs_1_2_3", },
 	{ "l3", 0x4200, "vdd_l1_l3", },
@@ -1523,8 +1526,8 @@ static const struct spmi_regulator_data pm8941_regulators[] = {
 	{ "lvs1", 0x8000, "vdd_l2_lvs_1_2_3", },
 	{ "lvs2", 0x8100, "vdd_l2_lvs_1_2_3", },
 	{ "lvs3", 0x8200, "vdd_l2_lvs_1_2_3", },
-	{ "mvs1", 0x8300, "vin_5vs", },
-	{ "mvs2", 0x8400, "vin_5vs", },
+	{ "5vs1", 0x8300, "vin_5vs", "ocp-5vs1", },
+	{ "5vs2", 0x8400, "vin_5vs", "ocp-5vs2", },
 	{ }
 };
 
diff --git a/drivers/regulator/rn5t618-regulator.c b/drivers/regulator/rn5t618-regulator.c
index b85ceb8ff911..9c930eb68cda 100644
--- a/drivers/regulator/rn5t618-regulator.c
+++ b/drivers/regulator/rn5t618-regulator.c
@@ -46,6 +46,23 @@ static struct regulator_ops rn5t618_reg_ops = {
 		.vsel_mask	= (vmask),				\
 	}
 
+static struct regulator_desc rn5t567_regulators[] = {
+	/* DCDC */
+	REG(DCDC1, DC1CTL, BIT(0), DC1DAC, 0xff, 600000, 3500000, 12500),
+	REG(DCDC2, DC2CTL, BIT(0), DC2DAC, 0xff, 600000, 3500000, 12500),
+	REG(DCDC3, DC3CTL, BIT(0), DC3DAC, 0xff, 600000, 3500000, 12500),
+	REG(DCDC4, DC4CTL, BIT(0), DC4DAC, 0xff, 600000, 3500000, 12500),
+	/* LDO */
+	REG(LDO1, LDOEN1, BIT(0), LDO1DAC, 0x7f, 900000, 3500000, 25000),
+	REG(LDO2, LDOEN1, BIT(1), LDO2DAC, 0x7f, 900000, 3500000, 25000),
+	REG(LDO3, LDOEN1, BIT(2), LDO3DAC, 0x7f, 600000, 3500000, 25000),
+	REG(LDO4, LDOEN1, BIT(3), LDO4DAC, 0x7f, 900000, 3500000, 25000),
+	REG(LDO5, LDOEN1, BIT(4), LDO5DAC, 0x7f, 900000, 3500000, 25000),
+	/* LDO RTC */
+	REG(LDORTC1, LDOEN2, BIT(4), LDORTCDAC, 0x7f, 1200000, 3500000, 25000),
+	REG(LDORTC2, LDOEN2, BIT(5), LDORTC2DAC, 0x7f, 900000, 3500000, 25000),
+};
+
 static struct regulator_desc rn5t618_regulators[] = {
 	/* DCDC */
 	REG(DCDC1, DC1CTL, BIT(0), DC1DAC, 0xff, 600000, 3500000, 12500),
@@ -67,18 +84,33 @@ static int rn5t618_regulator_probe(struct platform_device *pdev)
 	struct rn5t618 *rn5t618 = dev_get_drvdata(pdev->dev.parent);
 	struct regulator_config config = { };
 	struct regulator_dev *rdev;
+	struct regulator_desc *regulators;
 	int i;
 
+	switch (rn5t618->variant) {
+	case RN5T567:
+		regulators = rn5t567_regulators;
+		break;
+	case RN5T618:
+		regulators = rn5t618_regulators;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	config.dev = pdev->dev.parent;
+	config.regmap = rn5t618->regmap;
+
 	for (i = 0; i < RN5T618_REG_NUM; i++) {
-		config.dev = pdev->dev.parent;
-		config.regmap = rn5t618->regmap;
+		if (!regulators[i].name)
+			continue;
 
 		rdev = devm_regulator_register(&pdev->dev,
-					       &rn5t618_regulators[i],
+					       &regulators[i],
 					       &config);
 		if (IS_ERR(rdev)) {
 			dev_err(&pdev->dev, "failed to register %s regulator\n",
-				rn5t618_regulators[i].name);
+				regulators[i].name);
 			return PTR_ERR(rdev);
 		}
 	}
diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index 02fb6b4ea820..d838e77dd947 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -750,7 +750,7 @@ static const struct regulator_linear_range s2mps15_ldo_voltage_ranges3[] = {
 
 /* voltage range for s2mps15 LDO 7, 8, 9 and 10 */
 static const struct regulator_linear_range s2mps15_ldo_voltage_ranges4[] = {
-	REGULATOR_LINEAR_RANGE(700000, 0xc, 0x18, 25000),
+	REGULATOR_LINEAR_RANGE(700000, 0x10, 0x20, 25000),
 };
 
 /* voltage range for s2mps15 LDO 1 */
@@ -760,12 +760,12 @@ static const struct regulator_linear_range s2mps15_ldo_voltage_ranges5[] = {
 
 /* voltage range for s2mps15 BUCK 1, 2, 3, 4, 5, 6 and 7 */
 static const struct regulator_linear_range s2mps15_buck_voltage_ranges1[] = {
-	REGULATOR_LINEAR_RANGE(500000, 0x20, 0xb0, 6250),
+	REGULATOR_LINEAR_RANGE(500000, 0x20, 0xc0, 6250),
 };
 
 /* voltage range for s2mps15 BUCK 8, 9 and 10 */
 static const struct regulator_linear_range s2mps15_buck_voltage_ranges2[] = {
-	REGULATOR_LINEAR_RANGE(1000000, 0x20, 0xc0, 12500),
+	REGULATOR_LINEAR_RANGE(1000000, 0x20, 0x78, 12500),
 };
 
 static const struct regulator_desc s2mps15_regulators[] = {
diff --git a/drivers/regulator/tps65217-regulator.c b/drivers/regulator/tps65217-regulator.c
index adbe4fc5cf07..2d12b9af3540 100644
--- a/drivers/regulator/tps65217-regulator.c
+++ b/drivers/regulator/tps65217-regulator.c
@@ -28,7 +28,7 @@
 #include <linux/mfd/tps65217.h>
 
 #define TPS65217_REGULATOR(_name, _id, _of_match, _ops, _n, _vr, _vm, _em, \
-                           _t, _lr, _nlr) \
+			   _t, _lr, _nlr,  _sr, _sm)	\
 	{						\
 		.name		= _name,		\
 		.id		= _id,			\
@@ -45,6 +45,8 @@
 		.volt_table	= _t,			\
 		.linear_ranges	= _lr,			\
 		.n_linear_ranges = _nlr,		\
+		.bypass_reg	= _sr,			\
+		.bypass_mask	= _sm,			\
 	}						\
 
 static const unsigned int LDO1_VSEL_table[] = {
@@ -118,6 +120,35 @@ static int tps65217_pmic_set_voltage_sel(struct regulator_dev *dev,
 	return ret;
 }
 
+static int tps65217_pmic_set_suspend_enable(struct regulator_dev *dev)
+{
+	struct tps65217 *tps = rdev_get_drvdata(dev);
+	unsigned int rid = rdev_get_id(dev);
+
+	if (rid < TPS65217_DCDC_1 || rid > TPS65217_LDO_4)
+		return -EINVAL;
+
+	return tps65217_clear_bits(tps, dev->desc->bypass_reg,
+				   dev->desc->bypass_mask,
+				   TPS65217_PROTECT_L1);
+}
+
+static int tps65217_pmic_set_suspend_disable(struct regulator_dev *dev)
+{
+	struct tps65217 *tps = rdev_get_drvdata(dev);
+	unsigned int rid = rdev_get_id(dev);
+
+	if (rid < TPS65217_DCDC_1 || rid > TPS65217_LDO_4)
+		return -EINVAL;
+
+	if (!tps->strobes[rid])
+		return -EINVAL;
+
+	return tps65217_set_bits(tps, dev->desc->bypass_reg,
+				 dev->desc->bypass_mask,
+				 tps->strobes[rid], TPS65217_PROTECT_L1);
+}
+
 /* Operations permitted on DCDCx, LDO2, LDO3 and LDO4 */
 static struct regulator_ops tps65217_pmic_ops = {
 	.is_enabled		= regulator_is_enabled_regmap,
@@ -127,6 +158,8 @@ static struct regulator_ops tps65217_pmic_ops = {
 	.set_voltage_sel	= tps65217_pmic_set_voltage_sel,
 	.list_voltage		= regulator_list_voltage_linear_range,
 	.map_voltage		= regulator_map_voltage_linear_range,
+	.set_suspend_enable	= tps65217_pmic_set_suspend_enable,
+	.set_suspend_disable	= tps65217_pmic_set_suspend_disable,
 };
 
 /* Operations permitted on LDO1 */
@@ -138,41 +171,50 @@ static struct regulator_ops tps65217_pmic_ldo1_ops = {
 	.set_voltage_sel	= tps65217_pmic_set_voltage_sel,
 	.list_voltage		= regulator_list_voltage_table,
 	.map_voltage		= regulator_map_voltage_ascend,
+	.set_suspend_enable	= tps65217_pmic_set_suspend_enable,
+	.set_suspend_disable	= tps65217_pmic_set_suspend_disable,
 };
 
 static const struct regulator_desc regulators[] = {
 	TPS65217_REGULATOR("DCDC1", TPS65217_DCDC_1, "dcdc1",
 			   tps65217_pmic_ops, 64, TPS65217_REG_DEFDCDC1,
 			   TPS65217_DEFDCDCX_DCDC_MASK, TPS65217_ENABLE_DC1_EN,
-			   NULL, tps65217_uv1_ranges, 2),
+			   NULL, tps65217_uv1_ranges, 2, TPS65217_REG_SEQ1,
+			   TPS65217_SEQ1_DC1_SEQ_MASK),
 	TPS65217_REGULATOR("DCDC2", TPS65217_DCDC_2, "dcdc2",
 			   tps65217_pmic_ops, 64, TPS65217_REG_DEFDCDC2,
 			   TPS65217_DEFDCDCX_DCDC_MASK, TPS65217_ENABLE_DC2_EN,
 			   NULL, tps65217_uv1_ranges,
-			   ARRAY_SIZE(tps65217_uv1_ranges)),
+			   ARRAY_SIZE(tps65217_uv1_ranges), TPS65217_REG_SEQ1,
+			   TPS65217_SEQ1_DC2_SEQ_MASK),
 	TPS65217_REGULATOR("DCDC3", TPS65217_DCDC_3, "dcdc3",
 			   tps65217_pmic_ops, 64, TPS65217_REG_DEFDCDC3,
 			   TPS65217_DEFDCDCX_DCDC_MASK, TPS65217_ENABLE_DC3_EN,
-			   NULL, tps65217_uv1_ranges, 1),
+			   NULL, tps65217_uv1_ranges, 1, TPS65217_REG_SEQ2,
+			   TPS65217_SEQ2_DC3_SEQ_MASK),
 	TPS65217_REGULATOR("LDO1", TPS65217_LDO_1, "ldo1",
 			   tps65217_pmic_ldo1_ops, 16, TPS65217_REG_DEFLDO1,
 			   TPS65217_DEFLDO1_LDO1_MASK, TPS65217_ENABLE_LDO1_EN,
-			   LDO1_VSEL_table, NULL, 0),
+			   LDO1_VSEL_table, NULL, 0, TPS65217_REG_SEQ2,
+			   TPS65217_SEQ2_LDO1_SEQ_MASK),
 	TPS65217_REGULATOR("LDO2", TPS65217_LDO_2, "ldo2", tps65217_pmic_ops,
 			   64, TPS65217_REG_DEFLDO2,
 			   TPS65217_DEFLDO2_LDO2_MASK, TPS65217_ENABLE_LDO2_EN,
 			   NULL, tps65217_uv1_ranges,
-			   ARRAY_SIZE(tps65217_uv1_ranges)),
+			   ARRAY_SIZE(tps65217_uv1_ranges), TPS65217_REG_SEQ3,
+			   TPS65217_SEQ3_LDO2_SEQ_MASK),
 	TPS65217_REGULATOR("LDO3", TPS65217_LDO_3, "ldo3", tps65217_pmic_ops,
 			   32, TPS65217_REG_DEFLS1, TPS65217_DEFLDO3_LDO3_MASK,
 			   TPS65217_ENABLE_LS1_EN | TPS65217_DEFLDO3_LDO3_EN,
 			   NULL, tps65217_uv2_ranges,
-			   ARRAY_SIZE(tps65217_uv2_ranges)),
+			   ARRAY_SIZE(tps65217_uv2_ranges), TPS65217_REG_SEQ3,
+			   TPS65217_SEQ3_LDO3_SEQ_MASK),
 	TPS65217_REGULATOR("LDO4", TPS65217_LDO_4, "ldo4", tps65217_pmic_ops,
 			   32, TPS65217_REG_DEFLS2, TPS65217_DEFLDO4_LDO4_MASK,
 			   TPS65217_ENABLE_LS2_EN | TPS65217_DEFLDO4_LDO4_EN,
 			   NULL, tps65217_uv2_ranges,
-			   ARRAY_SIZE(tps65217_uv2_ranges)),
+			   ARRAY_SIZE(tps65217_uv2_ranges), TPS65217_REG_SEQ4,
+			   TPS65217_SEQ4_LDO4_SEQ_MASK),
 };
 
 static int tps65217_regulator_probe(struct platform_device *pdev)
@@ -181,13 +223,18 @@ static int tps65217_regulator_probe(struct platform_device *pdev)
 	struct tps65217_board *pdata = dev_get_platdata(tps->dev);
 	struct regulator_dev *rdev;
 	struct regulator_config config = { };
-	int i;
+	int i, ret;
+	unsigned int val;
 
 	if (tps65217_chip_id(tps) != TPS65217) {
 		dev_err(&pdev->dev, "Invalid tps chip version\n");
 		return -ENODEV;
 	}
 
+	/* Allocate memory for strobes */
+	tps->strobes = devm_kzalloc(&pdev->dev, sizeof(u8) *
+				    TPS65217_NUM_REGULATOR, GFP_KERNEL);
+
 	platform_set_drvdata(pdev, tps);
 
 	for (i = 0; i < TPS65217_NUM_REGULATOR; i++) {
@@ -205,6 +252,10 @@ static int tps65217_regulator_probe(struct platform_device *pdev)
 				pdev->name);
 			return PTR_ERR(rdev);
 		}
+
+		/* Store default strobe info */
+		ret = tps65217_reg_read(tps, regulators[i].bypass_reg, &val);
+		tps->strobes[i] = val & regulators[i].bypass_mask;
 	}
 
 	return 0;
diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c
index a5e5634eeb9e..d1e631d64a20 100644
--- a/drivers/regulator/tps65218-regulator.c
+++ b/drivers/regulator/tps65218-regulator.c
@@ -31,7 +31,7 @@ enum tps65218_regulators { DCDC1, DCDC2, DCDC3, DCDC4,
 			   DCDC5, DCDC6, LDO1, LS3 };
 
 #define TPS65218_REGULATOR(_name, _id, _type, _ops, _n, _vr, _vm, _er, _em, \
-			    _cr, _cm, _lr, _nlr, _delay, _fuv)		\
+			   _cr, _cm, _lr, _nlr, _delay, _fuv, _sr, _sm)	\
 	{							\
 		.name			= _name,		\
 		.id			= _id,			\
@@ -49,7 +49,9 @@ enum tps65218_regulators { DCDC1, DCDC2, DCDC3, DCDC4,
 		.linear_ranges		= _lr,			\
 		.n_linear_ranges	= _nlr,			\
 		.ramp_delay		= _delay,		\
-		.fixed_uV		= _fuv			\
+		.fixed_uV		= _fuv,			\
+		.bypass_reg	= _sr,				\
+		.bypass_mask	= _sm,				\
 	}							\
 
 #define TPS65218_INFO(_id, _nm, _min, _max)	\
@@ -157,6 +159,40 @@ static int tps65218_pmic_disable(struct regulator_dev *dev)
 				   dev->desc->enable_mask, TPS65218_PROTECT_L1);
 }
 
+static int tps65218_pmic_set_suspend_enable(struct regulator_dev *dev)
+{
+	struct tps65218 *tps = rdev_get_drvdata(dev);
+	unsigned int rid = rdev_get_id(dev);
+
+	if (rid < TPS65218_DCDC_1 || rid > TPS65218_LDO_1)
+		return -EINVAL;
+
+	return tps65218_clear_bits(tps, dev->desc->bypass_reg,
+				   dev->desc->bypass_mask,
+				   TPS65218_PROTECT_L1);
+}
+
+static int tps65218_pmic_set_suspend_disable(struct regulator_dev *dev)
+{
+	struct tps65218 *tps = rdev_get_drvdata(dev);
+	unsigned int rid = rdev_get_id(dev);
+
+	if (rid < TPS65218_DCDC_1 || rid > TPS65218_LDO_1)
+		return -EINVAL;
+
+	if (!tps->info[rid]->strobe) {
+		if (rid == TPS65218_DCDC_3)
+			tps->info[rid]->strobe = 3;
+		else
+			return -EINVAL;
+	}
+
+	return tps65218_set_bits(tps, dev->desc->bypass_reg,
+				 dev->desc->bypass_mask,
+				 tps->info[rid]->strobe,
+				 TPS65218_PROTECT_L1);
+}
+
 /* Operations permitted on DCDC1, DCDC2 */
 static struct regulator_ops tps65218_dcdc12_ops = {
 	.is_enabled		= regulator_is_enabled_regmap,
@@ -167,6 +203,8 @@ static struct regulator_ops tps65218_dcdc12_ops = {
 	.list_voltage		= regulator_list_voltage_linear_range,
 	.map_voltage		= regulator_map_voltage_linear_range,
 	.set_voltage_time_sel	= regulator_set_voltage_time_sel,
+	.set_suspend_enable	= tps65218_pmic_set_suspend_enable,
+	.set_suspend_disable	= tps65218_pmic_set_suspend_disable,
 };
 
 /* Operations permitted on DCDC3, DCDC4 and LDO1 */
@@ -178,6 +216,8 @@ static struct regulator_ops tps65218_ldo1_dcdc34_ops = {
 	.set_voltage_sel	= tps65218_pmic_set_voltage_sel,
 	.list_voltage		= regulator_list_voltage_linear_range,
 	.map_voltage		= regulator_map_voltage_linear_range,
+	.set_suspend_enable	= tps65218_pmic_set_suspend_enable,
+	.set_suspend_disable	= tps65218_pmic_set_suspend_disable,
 };
 
 static const int ls3_currents[] = { 100, 200, 500, 1000 };
@@ -247,6 +287,8 @@ static struct regulator_ops tps65218_dcdc56_pmic_ops = {
 	.is_enabled		= regulator_is_enabled_regmap,
 	.enable			= tps65218_pmic_enable,
 	.disable		= tps65218_pmic_disable,
+	.set_suspend_enable	= tps65218_pmic_set_suspend_enable,
+	.set_suspend_disable	= tps65218_pmic_set_suspend_disable,
 };
 
 static const struct regulator_desc regulators[] = {
@@ -254,42 +296,47 @@ static const struct regulator_desc regulators[] = {
 			   tps65218_dcdc12_ops, 64, TPS65218_REG_CONTROL_DCDC1,
 			   TPS65218_CONTROL_DCDC1_MASK, TPS65218_REG_ENABLE1,
 			   TPS65218_ENABLE1_DC1_EN, 0, 0, dcdc1_dcdc2_ranges,
-			   2, 4000, 0),
+			   2, 4000, 0, TPS65218_REG_SEQ3,
+			   TPS65218_SEQ3_DC1_SEQ_MASK),
 	TPS65218_REGULATOR("DCDC2", TPS65218_DCDC_2, REGULATOR_VOLTAGE,
 			   tps65218_dcdc12_ops, 64, TPS65218_REG_CONTROL_DCDC2,
 			   TPS65218_CONTROL_DCDC2_MASK, TPS65218_REG_ENABLE1,
 			   TPS65218_ENABLE1_DC2_EN, 0, 0, dcdc1_dcdc2_ranges,
-			   2, 4000, 0),
+			   2, 4000, 0, TPS65218_REG_SEQ3,
+			   TPS65218_SEQ3_DC2_SEQ_MASK),
 	TPS65218_REGULATOR("DCDC3", TPS65218_DCDC_3, REGULATOR_VOLTAGE,
 			   tps65218_ldo1_dcdc34_ops, 64,
 			   TPS65218_REG_CONTROL_DCDC3,
 			   TPS65218_CONTROL_DCDC3_MASK, TPS65218_REG_ENABLE1,
 			   TPS65218_ENABLE1_DC3_EN, 0, 0, ldo1_dcdc3_ranges, 2,
-			   0, 0),
+			   0, 0, TPS65218_REG_SEQ4, TPS65218_SEQ4_DC3_SEQ_MASK),
 	TPS65218_REGULATOR("DCDC4", TPS65218_DCDC_4, REGULATOR_VOLTAGE,
 			   tps65218_ldo1_dcdc34_ops, 53,
 			   TPS65218_REG_CONTROL_DCDC4,
 			   TPS65218_CONTROL_DCDC4_MASK, TPS65218_REG_ENABLE1,
 			   TPS65218_ENABLE1_DC4_EN, 0, 0, dcdc4_ranges, 2,
-			   0, 0),
+			   0, 0, TPS65218_REG_SEQ4, TPS65218_SEQ4_DC4_SEQ_MASK),
 	TPS65218_REGULATOR("DCDC5", TPS65218_DCDC_5, REGULATOR_VOLTAGE,
 			   tps65218_dcdc56_pmic_ops, 1, -1, -1,
 			   TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC5_EN, 0, 0,
-			   NULL, 0, 0, 1000000),
+			   NULL, 0, 0, 1000000, TPS65218_REG_SEQ5,
+			   TPS65218_SEQ5_DC5_SEQ_MASK),
 	TPS65218_REGULATOR("DCDC6", TPS65218_DCDC_6, REGULATOR_VOLTAGE,
 			   tps65218_dcdc56_pmic_ops, 1, -1, -1,
 			   TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC6_EN, 0, 0,
-			   NULL, 0, 0, 1800000),
+			   NULL, 0, 0, 1800000, TPS65218_REG_SEQ5,
+			   TPS65218_SEQ5_DC6_SEQ_MASK),
 	TPS65218_REGULATOR("LDO1", TPS65218_LDO_1, REGULATOR_VOLTAGE,
 			   tps65218_ldo1_dcdc34_ops, 64,
 			   TPS65218_REG_CONTROL_LDO1,
 			   TPS65218_CONTROL_LDO1_MASK, TPS65218_REG_ENABLE2,
 			   TPS65218_ENABLE2_LDO1_EN, 0, 0, ldo1_dcdc3_ranges,
-			   2, 0, 0),
+			   2, 0, 0, TPS65218_REG_SEQ6,
+			   TPS65218_SEQ6_LDO1_SEQ_MASK),
 	TPS65218_REGULATOR("LS3", TPS65218_LS_3, REGULATOR_CURRENT,
 			   tps65218_ls3_ops, 0, 0, 0, TPS65218_REG_ENABLE2,
 			   TPS65218_ENABLE2_LS3_EN, TPS65218_REG_CONFIG2,
-			   TPS65218_CONFIG2_LS3ILIM_MASK, NULL, 0, 0, 0),
+			   TPS65218_CONFIG2_LS3ILIM_MASK, NULL, 0, 0, 0, 0, 0),
 };
 
 static int tps65218_regulator_probe(struct platform_device *pdev)
@@ -300,7 +347,8 @@ static int tps65218_regulator_probe(struct platform_device *pdev)
 	struct regulator_dev *rdev;
 	const struct of_device_id	*match;
 	struct regulator_config config = { };
-	int id;
+	int id, ret;
+	unsigned int val;
 
 	match = of_match_device(tps65218_of_match, &pdev->dev);
 	if (!match)
@@ -327,6 +375,12 @@ static int tps65218_regulator_probe(struct platform_device *pdev)
 		return PTR_ERR(rdev);
 	}
 
+	ret = tps65218_reg_read(tps, regulators[id].bypass_reg, &val);
+	if (ret)
+		return ret;
+
+	tps->info[id]->strobe = val & regulators[id].bypass_mask;
+
 	return 0;
 }
 
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index faeb5ee92c9e..210681d6b743 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -905,7 +905,7 @@ static struct regulator_ops twlsmps_ops = {
 			twl4030reg_map_mode)
 #define TWL6030_FIXED_LDO(label, offset, mVolts, turnon_delay) \
 		TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \
-			0x0, TWL6030, twl6030fixed_ops, 0x0)
+			0x0, TWL6030, twl6030fixed_ops, NULL)
 
 #define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) \
 static const struct twlreg_info TWL4030_INFO_##label = { \
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 42b34cd1f002..fd2eff440098 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -228,7 +228,7 @@ check_XRC (struct ccw1         *de_ccw,
 	data->ga_extended |= 0x08; /* switch on 'Time Stamp Valid'   */
 	data->ga_extended |= 0x02; /* switch on 'Extended Parameter' */
 
-	rc = get_sync_clock(&data->ep_sys_time);
+	rc = get_phys_clock(&data->ep_sys_time);
 	/* Ignore return code if sync clock is switched off. */
 	if (rc == -EOPNOTSUPP || rc == -EACCES)
 		rc = 0;
@@ -339,7 +339,7 @@ static int check_XRC_on_prefix(struct PFX_eckd_data *pfxdata,
 	pfxdata->define_extent.ga_extended |= 0x02; /* 'Extended Parameter' */
 	pfxdata->validity.time_stamp = 1;	    /* 'Time Stamp Valid'   */
 
-	rc = get_sync_clock(&pfxdata->define_extent.ep_sys_time);
+	rc = get_phys_clock(&pfxdata->define_extent.ep_sys_time);
 	/* Ignore return code if sync clock is switched off. */
 	if (rc == -EOPNOTSUPP || rc == -EACCES)
 		rc = 0;
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 31d544a87ba9..e2fa759bf2ad 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -45,7 +45,6 @@ int dasd_gendisk_alloc(struct dasd_block *block)
 	gdp->major = DASD_MAJOR;
 	gdp->first_minor = base->devindex << DASD_PARTN_BITS;
 	gdp->fops = &dasd_device_operations;
-	gdp->driverfs_dev = &base->cdev->dev;
 
 	/*
 	 * Set device name.
@@ -76,7 +75,7 @@ int dasd_gendisk_alloc(struct dasd_block *block)
 	gdp->queue = block->request_queue;
 	block->gdp = gdp;
 	set_capacity(block->gdp, 0);
-	add_disk(block->gdp);
+	device_add_disk(&base->cdev->dev, block->gdp);
 	return 0;
 }
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index bed53c46dd90..fac1b51ea0de 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -615,9 +615,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	dev_info->dcssblk_queue = blk_alloc_queue(GFP_KERNEL);
 	dev_info->gd->queue = dev_info->dcssblk_queue;
 	dev_info->gd->private_data = dev_info;
-	dev_info->gd->driverfs_dev = &dev_info->dev;
 	blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
 	blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
+	queue_flag_set_unlocked(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
 
 	seg_byte_size = (dev_info->end - dev_info->start + 1);
 	set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
@@ -655,7 +655,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 		goto put_dev;
 
 	get_device(&dev_info->dev);
-	add_disk(dev_info->gd);
+	device_add_disk(&dev_info->dev, dev_info->gd);
 
 	switch (dev_info->segment_type) {
 		case SEG_TYPE_SR:
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index e6f54d3b8969..9f16ea6964ec 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -512,7 +512,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 		goto out_queue;
 
 	rq->queuedata = scmdev;
-	bdev->gendisk->driverfs_dev = &scmdev->dev;
 	bdev->gendisk->private_data = scmdev;
 	bdev->gendisk->fops = &scm_blk_devops;
 	bdev->gendisk->queue = rq;
@@ -531,7 +530,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 
 	/* 512 byte sectors */
 	set_capacity(bdev->gendisk, scmdev->size >> 9);
-	add_disk(bdev->gendisk);
+	device_add_disk(&scmdev->dev, bdev->gendisk);
 	return 0;
 
 out_queue:
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index ef04a9f7a704..7b9c50aa4cc9 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -438,18 +438,9 @@ do_kdgkb_ioctl(struct kbd_data *kbd, struct kbsentry __user *u_kbs,
 			return -EFAULT;
 		if (len > sizeof(u_kbs->kb_string))
 			return -EINVAL;
-		p = kmalloc(len, GFP_KERNEL);
-		if (!p)
-			return -ENOMEM;
-		if (copy_from_user(p, u_kbs->kb_string, len)) {
-			kfree(p);
-			return -EFAULT;
-		}
-		/*
-		 * Make sure the string is terminated by 0. User could have
-		 * modified it between us running strnlen_user() and copying it.
-		 */
-		p[len - 1] = 0;
+		p = memdup_user_nul(u_kbs->kb_string, len);
+		if (IS_ERR(p))
+			return PTR_ERR(p);
 		kfree(kbd->func_table[kb_func]);
 		kbd->func_table[kb_func] = p;
 		break;
diff --git a/drivers/s390/char/sclp_con.c b/drivers/s390/char/sclp_con.c
index 5880def98fc1..6037bc87e767 100644
--- a/drivers/s390/char/sclp_con.c
+++ b/drivers/s390/char/sclp_con.c
@@ -319,7 +319,8 @@ sclp_console_init(void)
 	int i;
 	int rc;
 
-	if (!CONSOLE_IS_SCLP)
+	/* SCLP consoles are handled together */
+	if (!(CONSOLE_IS_SCLP || CONSOLE_IS_VT220))
 		return 0;
 	rc = sclp_rw_init();
 	if (rc)
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
index 2ced50ccca63..1406fb688a26 100644
--- a/drivers/s390/char/sclp_config.c
+++ b/drivers/s390/char/sclp_config.c
@@ -47,7 +47,7 @@ static void sclp_cpu_capability_notify(struct work_struct *work)
 	int cpu;
 	struct device *dev;
 
-	s390_adjust_jiffies();
+	s390_update_cpu_mhz();
 	pr_info("CPU capability may have changed\n");
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 5043ecfa1fbc..16992e2a40ad 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -185,7 +185,7 @@ static ssize_t zcore_reipl_write(struct file *filp, const char __user *buf,
 {
 	if (ipl_block) {
 		diag308(DIAG308_SET, ipl_block);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 	}
 	return count;
 }
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 50597f9522fe..e96aced58627 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -47,8 +47,6 @@ static DEFINE_MUTEX(info_lock);
 /* Time after which channel-path status may be outdated. */
 static unsigned long chp_info_expires;
 
-/* Workqueue to perform pending configure tasks. */
-static struct workqueue_struct *chp_wq;
 static struct work_struct cfg_work;
 
 /* Wait queue for configure completion events. */
@@ -428,11 +426,14 @@ int chp_update_desc(struct channel_path *chp)
 	if (rc)
 		return rc;
 
-	rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
-	if (rc)
-		return rc;
+	/*
+	 * Fetching the following data is optional. Not all machines or
+	 * hypervisors implement the required chsc commands.
+	 */
+	chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+	chsc_get_channel_measurement_chars(chp);
 
-	return chsc_get_channel_measurement_chars(chp);
+	return 0;
 }
 
 /**
@@ -714,7 +715,7 @@ static void cfg_func(struct work_struct *work)
 		wake_up_interruptible(&cfg_wait_queue);
 		return;
 	}
-	queue_work(chp_wq, &cfg_work);
+	schedule_work(&cfg_work);
 }
 
 /**
@@ -732,7 +733,7 @@ void chp_cfg_schedule(struct chp_id chpid, int configure)
 	cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure);
 	cfg_busy = 1;
 	mutex_unlock(&cfg_lock);
-	queue_work(chp_wq, &cfg_work);
+	schedule_work(&cfg_work);
 }
 
 /**
@@ -766,11 +767,6 @@ static int __init chp_init(void)
 	ret = crw_register_handler(CRW_RSC_CPATH, chp_process_crw);
 	if (ret)
 		return ret;
-	chp_wq = create_singlethread_workqueue("cio_chp");
-	if (!chp_wq) {
-		crw_unregister_handler(CRW_RSC_CPATH);
-		return -ENOMEM;
-	}
 	INIT_WORK(&cfg_work, cfg_func);
 	init_waitqueue_head(&cfg_wait_queue);
 	if (info_update())
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index af0232290dc4..bb5a68226cda 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -4,7 +4,7 @@
  */
 
 #ifndef S390_CHP_H
-#define S390_CHP_H S390_CHP_H
+#define S390_CHP_H
 
 #include <linux/types.h>
 #include <linux/device.h>
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index c424c0c7367e..940e725bde1e 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -907,7 +907,8 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt,
 	struct chsc_scpd *scpd_area;
 	int ccode, ret;
 
-	if ((rfmt == 1) && !css_general_characteristics.fcs)
+	if ((rfmt == 1 || rfmt == 0) && c == 1 &&
+	    !css_general_characteristics.fcs)
 		return -EINVAL;
 	if ((rfmt == 2) && !css_general_characteristics.cib)
 		return -EINVAL;
@@ -939,7 +940,6 @@ EXPORT_SYMBOL_GPL(chsc_determine_channel_path_desc);
 int chsc_determine_base_channel_path_desc(struct chp_id chpid,
 					  struct channel_path_desc *desc)
 {
-	struct chsc_response_struct *chsc_resp;
 	struct chsc_scpd *scpd_area;
 	unsigned long flags;
 	int ret;
@@ -949,8 +949,8 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid,
 	ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, scpd_area);
 	if (ret)
 		goto out;
-	chsc_resp = (void *)&scpd_area->response;
-	memcpy(desc, &chsc_resp->data, sizeof(*desc));
+
+	memcpy(desc, scpd_area->data, sizeof(*desc));
 out:
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
@@ -959,18 +959,17 @@ out:
 int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid,
 					  struct channel_path_desc_fmt1 *desc)
 {
-	struct chsc_response_struct *chsc_resp;
 	struct chsc_scpd *scpd_area;
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&chsc_page_lock, flags);
 	scpd_area = chsc_page;
-	ret = chsc_determine_channel_path_desc(chpid, 0, 0, 1, 0, scpd_area);
+	ret = chsc_determine_channel_path_desc(chpid, 0, 1, 1, 0, scpd_area);
 	if (ret)
 		goto out;
-	chsc_resp = (void *)&scpd_area->response;
-	memcpy(desc, &chsc_resp->data, sizeof(*desc));
+
+	memcpy(desc, scpd_area->data, sizeof(*desc));
 out:
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
@@ -1020,7 +1019,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 	chp->cmg = -1;
 
 	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
-		return 0;
+		return -EINVAL;
 
 	spin_lock_irq(&chsc_page_lock);
 	memset(chsc_page, 0, PAGE_SIZE);
@@ -1176,7 +1175,7 @@ exit:
 EXPORT_SYMBOL_GPL(css_general_characteristics);
 EXPORT_SYMBOL_GPL(css_chsc_characteristics);
 
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta)
 {
 	struct {
 		struct chsc_header request;
@@ -1186,7 +1185,9 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
 		unsigned int ctrl : 16;
 		unsigned int rsvd2[5];
 		struct chsc_header response;
-		unsigned int rsvd3[7];
+		unsigned int rsvd3[3];
+		u64 clock_delta;
+		unsigned int rsvd4[2];
 	} __attribute__ ((packed)) *rr;
 	int rc;
 
@@ -1200,6 +1201,8 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
 	if (rc)
 		return -EIO;
 	rc = (rr->response.code == 0x0001) ? 0 : -EIO;
+	if (clock_delta)
+		*clock_delta = rr->clock_delta;
 	return rc;
 }
 
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 0de134c3a204..67c87b6e63ec 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -112,8 +112,9 @@ struct chsc_scpd {
 	u32 last_chpid:8;
 	u32 zeroes1;
 	struct chsc_header response;
-	u8 data[PAGE_SIZE - 20];
-} __attribute__ ((packed));
+	u32:32;
+	u8 data[0];
+} __packed;
 
 struct chsc_sda_area {
 	struct chsc_header request;
diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c
index b6f12c2bb114..735052ecd3e5 100644
--- a/drivers/s390/cio/chsc_sch.c
+++ b/drivers/s390/cio/chsc_sch.c
@@ -552,7 +552,7 @@ static int chsc_ioctl_info_cu(void __user *user_cd)
 		goto out_free;
 	}
 	scucd_area->request.length = 0x0010;
-	scucd_area->request.code = 0x0028;
+	scucd_area->request.code = 0x0026;
 	scucd_area->m = cd->m;
 	scucd_area->fmt1 = cd->fmt;
 	scucd_area->cssid = cd->cssid;
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index b2afad5a5682..268aa23afa01 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -164,6 +164,9 @@ static inline u64 time_to_avg_nsec(u32 value, u32 count)
 	return ret;
 }
 
+#define CMF_OFF 0
+#define CMF_ON	2
+
 /*
  * Activate or deactivate the channel monitor. When area is NULL,
  * the monitor is deactivated. The channel monitor needs to
@@ -176,7 +179,7 @@ static inline void cmf_activate(void *area, unsigned int onoff)
 	register long __gpr1 asm("1");
 
 	__gpr2 = area;
-	__gpr1 = onoff ? 2 : 0;
+	__gpr1 = onoff;
 	/* activate channel measurement */
 	asm("schm" : : "d" (__gpr2), "d" (__gpr1) );
 }
@@ -587,7 +590,7 @@ static int alloc_cmb(struct ccw_device *cdev)
 			/* everything ok */
 			memset(mem, 0, size);
 			cmb_area.mem = mem;
-			cmf_activate(cmb_area.mem, 1);
+			cmf_activate(cmb_area.mem, CMF_ON);
 		}
 	}
 
@@ -621,7 +624,7 @@ static void free_cmb(struct ccw_device *cdev)
 	if (list_empty(&cmb_area.list)) {
 		ssize_t size;
 		size = sizeof(struct cmb) * cmb_area.num_channels;
-		cmf_activate(NULL, 0);
+		cmf_activate(NULL, CMF_OFF);
 		free_pages((unsigned long)cmb_area.mem, get_order(size));
 		cmb_area.mem = NULL;
 	}
@@ -753,6 +756,17 @@ static void reset_cmb(struct ccw_device *cdev)
 	cmf_generic_reset(cdev);
 }
 
+static int cmf_enabled(struct ccw_device *cdev)
+{
+	int enabled;
+
+	spin_lock_irq(cdev->ccwlock);
+	enabled = !!cdev->private->cmb;
+	spin_unlock_irq(cdev->ccwlock);
+
+	return enabled;
+}
+
 static struct attribute_group cmf_attr_group;
 
 static struct cmb_operations cmbops_basic = {
@@ -830,7 +844,7 @@ static int alloc_cmbe(struct ccw_device *cdev)
 
 	/* activate global measurement if this is the first channel */
 	if (list_empty(&cmb_area.list))
-		cmf_activate(NULL, 1);
+		cmf_activate(NULL, CMF_ON);
 	list_add_tail(&cdev->private->cmb_list, &cmb_area.list);
 
 	spin_unlock_irq(cdev->ccwlock);
@@ -867,7 +881,7 @@ static void free_cmbe(struct ccw_device *cdev)
 	/* deactivate global measurement if this is the last channel */
 	list_del_init(&cdev->private->cmb_list);
 	if (list_empty(&cmb_area.list))
-		cmf_activate(NULL, 0);
+		cmf_activate(NULL, CMF_OFF);
 	spin_unlock_irq(cdev->ccwlock);
 	spin_unlock(&cmb_area.lock);
 }
@@ -1153,13 +1167,8 @@ static ssize_t cmb_enable_show(struct device *dev,
 			       char *buf)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
-	int enabled;
 
-	spin_lock_irq(cdev->ccwlock);
-	enabled = !!cdev->private->cmb;
-	spin_unlock_irq(cdev->ccwlock);
-
-	return sprintf(buf, "%d\n", enabled);
+	return sprintf(buf, "%d\n", cmf_enabled(cdev));
 }
 
 static ssize_t cmb_enable_store(struct device *dev,
@@ -1199,15 +1208,20 @@ int ccw_set_cmf(struct ccw_device *cdev, int enable)
  *  @cdev:	The ccw device to be enabled
  *
  *  Returns %0 for success or a negative error value.
- *
+ *  Note: If this is called on a device for which channel measurement is already
+ *	  enabled a reset of the measurement data is triggered.
  *  Context:
  *    non-atomic
  */
 int enable_cmf(struct ccw_device *cdev)
 {
-	int ret;
+	int ret = 0;
 
 	device_lock(&cdev->dev);
+	if (cmf_enabled(cdev)) {
+		cmbops->reset(cdev);
+		goto out_unlock;
+	}
 	get_device(&cdev->dev);
 	ret = cmbops->alloc(cdev);
 	if (ret)
@@ -1226,7 +1240,7 @@ int enable_cmf(struct ccw_device *cdev)
 out:
 	if (ret)
 		put_device(&cdev->dev);
-
+out_unlock:
 	device_unlock(&cdev->dev);
 	return ret;
 }
@@ -1321,7 +1335,7 @@ void cmf_reactivate(void)
 {
 	spin_lock(&cmb_area.lock);
 	if (!list_empty(&cmb_area.list))
-		cmf_activate(cmb_area.mem, 1);
+		cmf_activate(cmb_area.mem, CMF_ON);
 	spin_unlock(&cmb_area.lock);
 }
 
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index a69f702a2fcc..877d9f601e63 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -97,7 +97,7 @@ void ccw_device_clear_options(struct ccw_device *cdev, unsigned long flags)
 }
 
 /**
- * ccw_device_is_pathgroup - determine if paths to this device are grouped
+ * ccw_device_is_pathgroup() - determine if paths to this device are grouped
  * @cdev: ccw device
  *
  * Return non-zero if there is a path group, zero otherwise.
@@ -109,7 +109,7 @@ int ccw_device_is_pathgroup(struct ccw_device *cdev)
 EXPORT_SYMBOL(ccw_device_is_pathgroup);
 
 /**
- * ccw_device_is_multipath - determine if device is operating in multipath mode
+ * ccw_device_is_multipath() - determine if device is operating in multipath mode
  * @cdev: ccw device
  *
  * Return non-zero if device is operating in multipath mode, zero otherwise.
@@ -457,7 +457,7 @@ __u8 ccw_device_get_path_mask(struct ccw_device *cdev)
 }
 
 /**
- * chp_get_chp_desc - return newly allocated channel-path descriptor
+ * ccw_device_get_chp_desc() - return newly allocated channel-path descriptor
  * @cdev: device to obtain the descriptor for
  * @chp_idx: index of the channel path
  *
@@ -477,7 +477,7 @@ struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *cdev,
 }
 
 /**
- * ccw_device_get_id - obtain a ccw device id
+ * ccw_device_get_id() - obtain a ccw device id
  * @cdev: device to obtain the id for
  * @dev_id: where to fill in the values
  */
@@ -488,7 +488,7 @@ void ccw_device_get_id(struct ccw_device *cdev, struct ccw_dev_id *dev_id)
 EXPORT_SYMBOL(ccw_device_get_id);
 
 /**
- * ccw_device_tm_start_key - perform start function
+ * ccw_device_tm_start_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
@@ -533,7 +533,7 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
 EXPORT_SYMBOL(ccw_device_tm_start_key);
 
 /**
- * ccw_device_tm_start_timeout_key - perform start function
+ * ccw_device_tm_start_timeout_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
@@ -559,7 +559,7 @@ int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
 EXPORT_SYMBOL(ccw_device_tm_start_timeout_key);
 
 /**
- * ccw_device_tm_start - perform start function
+ * ccw_device_tm_start() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
@@ -577,7 +577,7 @@ int ccw_device_tm_start(struct ccw_device *cdev, struct tcw *tcw,
 EXPORT_SYMBOL(ccw_device_tm_start);
 
 /**
- * ccw_device_tm_start_timeout - perform start function
+ * ccw_device_tm_start_timeout() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
@@ -596,7 +596,7 @@ int ccw_device_tm_start_timeout(struct ccw_device *cdev, struct tcw *tcw,
 EXPORT_SYMBOL(ccw_device_tm_start_timeout);
 
 /**
- * ccw_device_get_mdc - accumulate max data count
+ * ccw_device_get_mdc() - accumulate max data count
  * @cdev: ccw device for which the max data count is accumulated
  * @mask: mask of paths to use
  *
@@ -642,7 +642,7 @@ int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask)
 EXPORT_SYMBOL(ccw_device_get_mdc);
 
 /**
- * ccw_device_tm_intrg - perform interrogate function
+ * ccw_device_tm_intrg() - perform interrogate function
  * @cdev: ccw device on which to perform the interrogate function
  *
  * Perform an interrogate function on the given ccw device. Return zero on
@@ -664,7 +664,7 @@ int ccw_device_tm_intrg(struct ccw_device *cdev)
 EXPORT_SYMBOL(ccw_device_tm_intrg);
 
 /**
- * ccw_device_get_schid - obtain a subchannel id
+ * ccw_device_get_schid() - obtain a subchannel id
  * @cdev: device to obtain the id for
  * @schid: where to fill in the values
  */
diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h
index 22b58104683b..89a787790888 100644
--- a/drivers/s390/cio/idset.h
+++ b/drivers/s390/cio/idset.h
@@ -4,7 +4,7 @@
  */
 
 #ifndef S390_IDSET_H
-#define S390_IDSET_H S390_IDSET_H
+#define S390_IDSET_H
 
 #include <asm/schid.h>
 
diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c
index 98984818618f..8225da619014 100644
--- a/drivers/s390/cio/ioasm.c
+++ b/drivers/s390/cio/ioasm.c
@@ -12,7 +12,7 @@
 #include "orb.h"
 #include "cio.h"
 
-int stsch(struct subchannel_id schid, struct schib *addr)
+static inline int __stsch(struct subchannel_id schid, struct schib *addr)
 {
 	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode = -EIO;
@@ -26,13 +26,21 @@ int stsch(struct subchannel_id schid, struct schib *addr)
 		: "+d" (ccode), "=m" (*addr)
 		: "d" (reg1), "a" (addr)
 		: "cc");
+	return ccode;
+}
+
+int stsch(struct subchannel_id schid, struct schib *addr)
+{
+	int ccode;
+
+	ccode = __stsch(schid, addr);
 	trace_s390_cio_stsch(schid, addr, ccode);
 
 	return ccode;
 }
 EXPORT_SYMBOL(stsch);
 
-int msch(struct subchannel_id schid, struct schib *addr)
+static inline int __msch(struct subchannel_id schid, struct schib *addr)
 {
 	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode = -EIO;
@@ -46,12 +54,20 @@ int msch(struct subchannel_id schid, struct schib *addr)
 		: "+d" (ccode)
 		: "d" (reg1), "a" (addr), "m" (*addr)
 		: "cc");
+	return ccode;
+}
+
+int msch(struct subchannel_id schid, struct schib *addr)
+{
+	int ccode;
+
+	ccode = __msch(schid, addr);
 	trace_s390_cio_msch(schid, addr, ccode);
 
 	return ccode;
 }
 
-int tsch(struct subchannel_id schid, struct irb *addr)
+static inline int __tsch(struct subchannel_id schid, struct irb *addr)
 {
 	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
@@ -63,12 +79,20 @@ int tsch(struct subchannel_id schid, struct irb *addr)
 		: "=d" (ccode), "=m" (*addr)
 		: "d" (reg1), "a" (addr)
 		: "cc");
+	return ccode;
+}
+
+int tsch(struct subchannel_id schid, struct irb *addr)
+{
+	int ccode;
+
+	ccode = __tsch(schid, addr);
 	trace_s390_cio_tsch(schid, addr, ccode);
 
 	return ccode;
 }
 
-int ssch(struct subchannel_id schid, union orb *addr)
+static inline int __ssch(struct subchannel_id schid, union orb *addr)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode = -EIO;
@@ -82,13 +106,21 @@ int ssch(struct subchannel_id schid, union orb *addr)
 		: "+d" (ccode)
 		: "d" (reg1), "a" (addr), "m" (*addr)
 		: "cc", "memory");
+	return ccode;
+}
+
+int ssch(struct subchannel_id schid, union orb *addr)
+{
+	int ccode;
+
+	ccode = __ssch(schid, addr);
 	trace_s390_cio_ssch(schid, addr, ccode);
 
 	return ccode;
 }
 EXPORT_SYMBOL(ssch);
 
-int csch(struct subchannel_id schid)
+static inline int __csch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode;
@@ -100,6 +132,14 @@ int csch(struct subchannel_id schid)
 		: "=d" (ccode)
 		: "d" (reg1)
 		: "cc");
+	return ccode;
+}
+
+int csch(struct subchannel_id schid)
+{
+	int ccode;
+
+	ccode = __csch(schid);
 	trace_s390_cio_csch(schid, ccode);
 
 	return ccode;
@@ -140,7 +180,7 @@ int chsc(void *chsc_area)
 }
 EXPORT_SYMBOL(chsc);
 
-int rchp(struct chp_id chpid)
+static inline int __rchp(struct chp_id chpid)
 {
 	register struct chp_id reg1 asm ("1") = chpid;
 	int ccode;
@@ -151,12 +191,20 @@ int rchp(struct chp_id chpid)
 		"	ipm	%0\n"
 		"	srl	%0,28"
 		: "=d" (ccode) : "d" (reg1) : "cc");
+	return ccode;
+}
+
+int rchp(struct chp_id chpid)
+{
+	int ccode;
+
+	ccode = __rchp(chpid);
 	trace_s390_cio_rchp(chpid, ccode);
 
 	return ccode;
 }
 
-int rsch(struct subchannel_id schid)
+static inline int __rsch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode;
@@ -168,12 +216,21 @@ int rsch(struct subchannel_id schid)
 		: "=d" (ccode)
 		: "d" (reg1)
 		: "cc", "memory");
+
+	return ccode;
+}
+
+int rsch(struct subchannel_id schid)
+{
+	int ccode;
+
+	ccode = __rsch(schid);
 	trace_s390_cio_rsch(schid, ccode);
 
 	return ccode;
 }
 
-int hsch(struct subchannel_id schid)
+static inline int __hsch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode;
@@ -185,12 +242,20 @@ int hsch(struct subchannel_id schid)
 		: "=d" (ccode)
 		: "d" (reg1)
 		: "cc");
+	return ccode;
+}
+
+int hsch(struct subchannel_id schid)
+{
+	int ccode;
+
+	ccode = __hsch(schid);
 	trace_s390_cio_hsch(schid, ccode);
 
 	return ccode;
 }
 
-int xsch(struct subchannel_id schid)
+static inline int __xsch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode;
@@ -202,6 +267,14 @@ int xsch(struct subchannel_id schid)
 		: "=d" (ccode)
 		: "d" (reg1)
 		: "cc");
+	return ccode;
+}
+
+int xsch(struct subchannel_id schid)
+{
+	int ccode;
+
+	ccode = __xsch(schid);
 	trace_s390_cio_xsch(schid, ccode);
 
 	return ccode;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 327255da115a..4feb27215ab6 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -169,6 +169,19 @@ static int ap_configuration_available(void)
 	return test_facility(12);
 }
 
+static inline struct ap_queue_status
+__pqap_tapq(ap_qid_t qid, unsigned long *info)
+{
+	register unsigned long reg0 asm ("0") = qid;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = 0UL;
+
+	asm volatile(".long 0xb2af0000"		/* PQAP(TAPQ) */
+		     : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
+	*info = reg2;
+	return reg1;
+}
+
 /**
  * ap_test_queue(): Test adjunct processor queue.
  * @qid: The AP queue number
@@ -179,17 +192,15 @@ static int ap_configuration_available(void)
 static inline struct ap_queue_status
 ap_test_queue(ap_qid_t qid, unsigned long *info)
 {
-	register unsigned long reg0 asm ("0") = qid;
-	register struct ap_queue_status reg1 asm ("1");
-	register unsigned long reg2 asm ("2") = 0UL;
+	struct ap_queue_status aqs;
+	unsigned long _info;
 
 	if (test_facility(15))
-		reg0 |= 1UL << 23;		/* set APFT T bit*/
-	asm volatile(".long 0xb2af0000"		/* PQAP(TAPQ) */
-		     : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
+		qid |= 1UL << 23;		/* set APFT T bit*/
+	aqs = __pqap_tapq(qid, &_info);
 	if (info)
-		*info = reg2;
-	return reg1;
+		*info = _info;
+	return aqs;
 }
 
 /**
@@ -237,14 +248,12 @@ ap_queue_interruption_control(ap_qid_t qid, void *ind)
  *
  * Returns 0 on success, or -EOPNOTSUPP.
  */
-static inline int ap_query_configuration(void)
+static inline int __ap_query_configuration(void)
 {
 	register unsigned long reg0 asm ("0") = 0x04000000UL;
 	register unsigned long reg1 asm ("1") = -EINVAL;
 	register void *reg2 asm ("2") = (void *) ap_configuration;
 
-	if (!ap_configuration)
-		return -EOPNOTSUPP;
 	asm volatile(
 		".long 0xb2af0000\n"		/* PQAP(QCI) */
 		"0: la    %1,0\n"
@@ -257,6 +266,13 @@ static inline int ap_query_configuration(void)
 	return reg1;
 }
 
+static inline int ap_query_configuration(void)
+{
+	if (!ap_configuration)
+		return -EOPNOTSUPP;
+	return __ap_query_configuration();
+}
+
 /**
  * ap_init_configuration(): Allocate and query configuration array.
  */
@@ -346,6 +362,26 @@ static int ap_queue_enable_interruption(struct ap_device *ap_dev, void *ind)
 	}
 }
 
+static inline struct ap_queue_status
+__nqap(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
+{
+	typedef struct { char _[length]; } msgblock;
+	register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = (unsigned long) msg;
+	register unsigned long reg3 asm ("3") = (unsigned long) length;
+	register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
+	register unsigned long reg5 asm ("5") = psmid & 0xffffffff;
+
+	asm volatile (
+		"0: .long 0xb2ad0042\n"		/* NQAP */
+		"   brc   2,0b"
+		: "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
+		: "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg)
+		: "cc");
+	return reg1;
+}
+
 /**
  * __ap_send(): Send message to adjunct processor queue.
  * @qid: The AP queue number
@@ -363,24 +399,9 @@ static inline struct ap_queue_status
 __ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length,
 	  unsigned int special)
 {
-	typedef struct { char _[length]; } msgblock;
-	register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
-	register struct ap_queue_status reg1 asm ("1");
-	register unsigned long reg2 asm ("2") = (unsigned long) msg;
-	register unsigned long reg3 asm ("3") = (unsigned long) length;
-	register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
-	register unsigned long reg5 asm ("5") = psmid & 0xffffffff;
-
 	if (special == 1)
-		reg0 |= 0x400000UL;
-
-	asm volatile (
-		"0: .long 0xb2ad0042\n"		/* NQAP */
-		"   brc   2,0b"
-		: "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
-		: "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg)
-		: "cc" );
-	return reg1;
+		qid |= 0x400000UL;
+	return __nqap(qid, psmid, msg, length);
 }
 
 int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index ec2e014e885c..bf40063de202 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -19,6 +19,7 @@
 #include <linux/seq_file.h>
 #include <linux/ethtool.h>
 #include <linux/hashtable.h>
+#include <linux/ip.h>
 
 #include <net/ipv6.h>
 #include <net/if_inet6.h>
@@ -144,6 +145,7 @@ struct qeth_perf_stats {
 	unsigned int sg_alloc_page_rx;
 	unsigned int tx_csum;
 	unsigned int tx_lin;
+	unsigned int tx_linfail;
 };
 
 /* Routing stuff */
@@ -559,7 +561,6 @@ enum qeth_ip_types {
 	QETH_IP_TYPE_NORMAL,
 	QETH_IP_TYPE_VIPA,
 	QETH_IP_TYPE_RXIP,
-	QETH_IP_TYPE_DEL_ALL_MC,
 };
 
 enum qeth_cmd_buffer_state {
@@ -740,17 +741,10 @@ struct qeth_vlan_vid {
 	unsigned short vid;
 };
 
-enum qeth_mac_disposition {
-	QETH_DISP_MAC_DELETE = 0,
-	QETH_DISP_MAC_DO_NOTHING = 1,
-	QETH_DISP_MAC_ADD = 2,
-};
-
-struct qeth_mac {
-	u8 mac_addr[OSA_ADDR_LEN];
-	u8 is_uc:1;
-	u8 disp_flag:2;
-	struct hlist_node hnode;
+enum qeth_addr_disposition {
+	QETH_DISP_ADDR_DELETE = 0,
+	QETH_DISP_ADDR_DO_NOTHING = 1,
+	QETH_DISP_ADDR_ADD = 2,
 };
 
 struct qeth_rx {
@@ -798,6 +792,8 @@ struct qeth_card {
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	struct list_head vid_list;
 	DECLARE_HASHTABLE(mac_htable, 4);
+	DECLARE_HASHTABLE(ip_htable, 4);
+	DECLARE_HASHTABLE(ip_mc_htable, 4);
 	struct work_struct kernel_thread_starter;
 	spinlock_t thread_mask_lock;
 	unsigned long thread_start_mask;
@@ -805,8 +801,6 @@ struct qeth_card {
 	unsigned long thread_running_mask;
 	struct task_struct *recovery_task;
 	spinlock_t ip_lock;
-	struct list_head ip_list;
-	struct list_head *ip_tbd_list;
 	struct qeth_ipato ipato;
 	struct list_head cmd_waiter_list;
 	/* QDIO buffer handling */
@@ -844,6 +838,19 @@ struct qeth_trap_id {
 /*some helper functions*/
 #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "")
 
+/**
+ * qeth_get_elements_for_range() -	find number of SBALEs to cover range.
+ * @start:				Start of the address range.
+ * @end:				Address after the end of the range.
+ *
+ * Returns the number of pages, and thus QDIO buffer elements, needed to cover
+ * the specified address range.
+ */
+static inline int qeth_get_elements_for_range(addr_t start, addr_t end)
+{
+	return PFN_UP(end - 1) - PFN_DOWN(start);
+}
+
 static inline int qeth_get_micros(void)
 {
 	return (int) (get_tod_clock() >> 12);
@@ -865,6 +872,11 @@ static inline int qeth_get_ip_version(struct sk_buff *skb)
 	}
 }
 
+static inline int qeth_get_ip_protocol(struct sk_buff *skb)
+{
+	return ip_hdr(skb)->protocol;
+}
+
 static inline void qeth_put_buffer_pool_entry(struct qeth_card *card,
 		struct qeth_buffer_pool_entry *entry)
 {
@@ -981,12 +993,13 @@ int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16,
 			  int (*reply_cb)(struct qeth_card *,
 					  struct qeth_reply *, unsigned long),
 			  void *);
+int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long);
 struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
 						 enum qeth_ipa_funcs,
 						 __u16, __u16,
 						 enum qeth_prot_versions);
-int qeth_start_ipa_tx_checksum(struct qeth_card *);
-int qeth_set_rx_csum(struct qeth_card *, int);
+int qeth_set_features(struct net_device *, netdev_features_t);
+netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
 
 /* exports for OSN */
 int qeth_osn_assist(struct net_device *, void *, int);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index b7b74776e2ff..7dba6c8537a1 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1464,8 +1464,6 @@ static int qeth_setup_card(struct qeth_card *card)
 	card->thread_allowed_mask = 0;
 	card->thread_running_mask = 0;
 	INIT_WORK(&card->kernel_thread_starter, qeth_start_kernel_thread);
-	INIT_LIST_HEAD(&card->ip_list);
-	INIT_LIST_HEAD(card->ip_tbd_list);
 	INIT_LIST_HEAD(&card->cmd_waiter_list);
 	init_waitqueue_head(&card->wait_q);
 	/* initial options */
@@ -1500,11 +1498,6 @@ static struct qeth_card *qeth_alloc_card(void)
 	if (!card)
 		goto out;
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
-	card->ip_tbd_list = kzalloc(sizeof(struct list_head), GFP_KERNEL);
-	if (!card->ip_tbd_list) {
-		QETH_DBF_TEXT(SETUP, 0, "iptbdnom");
-		goto out_card;
-	}
 	if (qeth_setup_channel(&card->read))
 		goto out_ip;
 	if (qeth_setup_channel(&card->write))
@@ -1517,8 +1510,6 @@ static struct qeth_card *qeth_alloc_card(void)
 out_channel:
 	qeth_clean_channel(&card->read);
 out_ip:
-	kfree(card->ip_tbd_list);
-out_card:
 	kfree(card);
 out:
 	return NULL;
@@ -3757,6 +3748,14 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev,
 }
 EXPORT_SYMBOL_GPL(qeth_qdio_output_handler);
 
+/* We cannot use outbound queue 3 for unicast packets on HiperSockets */
+static inline int qeth_cut_iqd_prio(struct qeth_card *card, int queue_num)
+{
+	if ((card->info.type == QETH_CARD_TYPE_IQD) && (queue_num == 3))
+		return 2;
+	return queue_num;
+}
+
 /**
  * Note: Function assumes that we have 4 outbound queues.
  */
@@ -3784,9 +3783,9 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
 			return card->qdio.default_out_queue;
 		}
 		if (card->qdio.do_prio_queueing == QETH_PRIO_Q_ING_PREC)
-			return ~tos >> 6 & 3;
+			return qeth_cut_iqd_prio(card, ~tos >> 6 & 3);
 		if (tos & IPTOS_MINCOST)
-			return 3;
+			return qeth_cut_iqd_prio(card, 3);
 		if (tos & IPTOS_RELIABILITY)
 			return 2;
 		if (tos & IPTOS_THROUGHPUT)
@@ -3797,11 +3796,12 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
 	case QETH_PRIO_Q_ING_SKB:
 		if (skb->priority > 5)
 			return 0;
-		return ~skb->priority >> 1 & 3;
+		return qeth_cut_iqd_prio(card, ~skb->priority >> 1 & 3);
 	case QETH_PRIO_Q_ING_VLAN:
 		tci = &((struct ethhdr *)skb->data)->h_proto;
 		if (*tci == ETH_P_8021Q)
-			return ~*(tci + 1) >> (VLAN_PRIO_SHIFT + 1) & 3;
+			return qeth_cut_iqd_prio(card, ~*(tci + 1) >>
+			(VLAN_PRIO_SHIFT + 1) & 3);
 		break;
 	default:
 		break;
@@ -3810,41 +3810,54 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(qeth_get_priority_queue);
 
+/**
+ * qeth_get_elements_for_frags() -	find number of SBALEs for skb frags.
+ * @skb:				SKB address
+ *
+ * Returns the number of pages, and thus QDIO buffer elements, needed to cover
+ * fragmented part of the SKB. Returns zero for linear SKB.
+ */
 int qeth_get_elements_for_frags(struct sk_buff *skb)
 {
-	int cnt, length, e, elements = 0;
-	struct skb_frag_struct *frag;
-	char *data;
+	int cnt, elements = 0;
 
 	for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) {
-		frag = &skb_shinfo(skb)->frags[cnt];
-		data = (char *)page_to_phys(skb_frag_page(frag)) +
-			frag->page_offset;
-		length = frag->size;
-		e = PFN_UP((unsigned long)data + length - 1) -
-			PFN_DOWN((unsigned long)data);
-		elements += e;
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[cnt];
+
+		elements += qeth_get_elements_for_range(
+			(addr_t)skb_frag_address(frag),
+			(addr_t)skb_frag_address(frag) + skb_frag_size(frag));
 	}
 	return elements;
 }
 EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
 
+/**
+ * qeth_get_elements_no() -	find number of SBALEs for skb data, inc. frags.
+ * @card:			qeth card structure, to check max. elems.
+ * @skb:			SKB address
+ * @extra_elems:		extra elems needed, to check against max.
+ *
+ * Returns the number of pages, and thus QDIO buffer elements, needed to cover
+ * skb data, including linear part and fragments. Checks if the result plus
+ * extra_elems fits under the limit for the card. Returns 0 if it does not.
+ * Note: extra_elems is not included in the returned result.
+ */
 int qeth_get_elements_no(struct qeth_card *card,
-		     struct sk_buff *skb, int elems)
+		     struct sk_buff *skb, int extra_elems)
 {
-	int dlen = skb->len - skb->data_len;
-	int elements_needed = PFN_UP((unsigned long)skb->data + dlen - 1) -
-		PFN_DOWN((unsigned long)skb->data);
-
-	elements_needed += qeth_get_elements_for_frags(skb);
+	int elements = qeth_get_elements_for_range(
+				(addr_t)skb->data,
+				(addr_t)skb->data + skb_headlen(skb)) +
+			qeth_get_elements_for_frags(skb);
 
-	if ((elements_needed + elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
+	if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
 		QETH_DBF_MESSAGE(2, "Invalid size of IP packet "
 			"(Number=%d / Length=%d). Discarded.\n",
-			(elements_needed+elems), skb->len);
+			elements + extra_elems, skb->len);
 		return 0;
 	}
-	return elements_needed;
+	return elements;
 }
 EXPORT_SYMBOL_GPL(qeth_get_elements_no);
 
@@ -3859,7 +3872,7 @@ int qeth_hdr_chk_and_bounce(struct sk_buff *skb, struct qeth_hdr **hdr, int len)
 		rest = len - inpage;
 		if (rest > hroom)
 			return 1;
-		memmove(skb->data - rest, skb->data, skb->len - skb->data_len);
+		memmove(skb->data - rest, skb->data, skb_headlen(skb));
 		skb->data -= rest;
 		skb->tail -= rest;
 		*hdr = (struct qeth_hdr *)skb->data;
@@ -3873,7 +3886,7 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb,
 	struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill,
 	int offset)
 {
-	int length = skb->len - skb->data_len;
+	int length = skb_headlen(skb);
 	int length_here;
 	int element;
 	char *data;
@@ -4967,7 +4980,6 @@ static void qeth_core_free_card(struct qeth_card *card)
 	qeth_clean_channel(&card->write);
 	if (card->dev)
 		free_netdev(card->dev);
-	kfree(card->ip_tbd_list);
 	qeth_free_qdio_buffers(card);
 	unregister_service_level(&card->qeth_service_level);
 	kfree(card);
@@ -5265,8 +5277,8 @@ no_mem:
 }
 EXPORT_SYMBOL_GPL(qeth_core_get_next_skb);
 
-static int qeth_setassparms_cb(struct qeth_card *card,
-			       struct qeth_reply *reply, unsigned long data)
+int qeth_setassparms_cb(struct qeth_card *card,
+			struct qeth_reply *reply, unsigned long data)
 {
 	struct qeth_ipa_cmd *cmd;
 
@@ -5294,6 +5306,7 @@ static int qeth_setassparms_cb(struct qeth_card *card,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(qeth_setassparms_cb);
 
 struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
 						 enum qeth_ipa_funcs ipa_func,
@@ -5788,6 +5801,7 @@ static struct {
 	{"tx do_QDIO count"},
 	{"tx csum"},
 	{"tx lin"},
+	{"tx linfail"},
 	{"cq handler count"},
 	{"cq handler time"}
 };
@@ -5848,8 +5862,9 @@ void qeth_core_get_ethtool_stats(struct net_device *dev,
 	data[32] = card->perf_stats.outbound_do_qdio_cnt;
 	data[33] = card->perf_stats.tx_csum;
 	data[34] = card->perf_stats.tx_lin;
-	data[35] = card->perf_stats.cq_cnt;
-	data[36] = card->perf_stats.cq_time;
+	data[35] = card->perf_stats.tx_linfail;
+	data[36] = card->perf_stats.cq_cnt;
+	data[37] = card->perf_stats.cq_time;
 }
 EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats);
 
@@ -6048,74 +6063,136 @@ int qeth_core_ethtool_get_settings(struct net_device *netdev,
 }
 EXPORT_SYMBOL_GPL(qeth_core_ethtool_get_settings);
 
-static int qeth_send_checksum_command(struct qeth_card *card)
+static int qeth_send_checksum_on(struct qeth_card *card, int cstype)
 {
+	long rxtx_arg;
 	int rc;
 
-	rc = qeth_send_simple_setassparms(card, IPA_INBOUND_CHECKSUM,
-					  IPA_CMD_ASS_START, 0);
+	rc = qeth_send_simple_setassparms(card, cstype, IPA_CMD_ASS_START, 0);
 	if (rc) {
-		dev_warn(&card->gdev->dev, "Starting HW checksumming for %s "
-			"failed, using SW checksumming\n",
-			QETH_CARD_IFNAME(card));
+		dev_warn(&card->gdev->dev,
+			 "Starting HW checksumming for %s failed, using SW checksumming\n",
+			 QETH_CARD_IFNAME(card));
 		return rc;
 	}
-	rc = qeth_send_simple_setassparms(card, IPA_INBOUND_CHECKSUM,
-					  IPA_CMD_ASS_ENABLE,
-					  card->info.csum_mask);
+	rxtx_arg = (cstype == IPA_OUTBOUND_CHECKSUM) ? card->info.tx_csum_mask
+						     : card->info.csum_mask;
+	rc = qeth_send_simple_setassparms(card, cstype, IPA_CMD_ASS_ENABLE,
+					  rxtx_arg);
 	if (rc) {
-		dev_warn(&card->gdev->dev, "Enabling HW checksumming for %s "
-			"failed, using SW checksumming\n",
-			QETH_CARD_IFNAME(card));
+		dev_warn(&card->gdev->dev,
+			 "Enabling HW checksumming for %s failed, using SW checksumming\n",
+			 QETH_CARD_IFNAME(card));
 		return rc;
 	}
+
+	dev_info(&card->gdev->dev, "HW Checksumming (%sbound) enabled\n",
+		 cstype == IPA_INBOUND_CHECKSUM ? "in" : "out");
 	return 0;
 }
 
-int qeth_set_rx_csum(struct qeth_card *card, int on)
+static int qeth_set_ipa_csum(struct qeth_card *card, int on, int cstype)
 {
 	int rc;
 
 	if (on) {
-		rc = qeth_send_checksum_command(card);
+		rc = qeth_send_checksum_on(card, cstype);
 		if (rc)
 			return -EIO;
-		dev_info(&card->gdev->dev,
-			"HW Checksumming (inbound) enabled\n");
 	} else {
-		rc = qeth_send_simple_setassparms(card,
-			IPA_INBOUND_CHECKSUM, IPA_CMD_ASS_STOP, 0);
+		rc = qeth_send_simple_setassparms(card, cstype,
+						  IPA_CMD_ASS_STOP, 0);
 		if (rc)
 			return -EIO;
 	}
 	return 0;
 }
-EXPORT_SYMBOL_GPL(qeth_set_rx_csum);
 
-int qeth_start_ipa_tx_checksum(struct qeth_card *card)
+static int qeth_set_ipa_tso(struct qeth_card *card, int on)
 {
-	int rc = 0;
+	int rc;
 
-	if (!qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM))
-		return rc;
-	rc = qeth_send_simple_setassparms(card, IPA_OUTBOUND_CHECKSUM,
-					  IPA_CMD_ASS_START, 0);
-	if (rc)
-		goto err_out;
-	rc = qeth_send_simple_setassparms(card, IPA_OUTBOUND_CHECKSUM,
-					  IPA_CMD_ASS_ENABLE,
-					  card->info.tx_csum_mask);
-	if (rc)
-		goto err_out;
+	QETH_CARD_TEXT(card, 3, "sttso");
 
-	dev_info(&card->gdev->dev, "HW TX Checksumming enabled\n");
-	return rc;
-err_out:
-	dev_warn(&card->gdev->dev, "Enabling HW TX checksumming for %s "
-		"failed, using SW TX checksumming\n", QETH_CARD_IFNAME(card));
+	if (on) {
+		rc = qeth_send_simple_setassparms(card, IPA_OUTBOUND_TSO,
+						  IPA_CMD_ASS_START, 0);
+		if (rc) {
+			dev_warn(&card->gdev->dev,
+				 "Starting outbound TCP segmentation offload for %s failed\n",
+				 QETH_CARD_IFNAME(card));
+			return -EIO;
+		}
+		dev_info(&card->gdev->dev, "Outbound TSO enabled\n");
+	} else {
+		rc = qeth_send_simple_setassparms(card, IPA_OUTBOUND_TSO,
+						  IPA_CMD_ASS_STOP, 0);
+	}
 	return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_start_ipa_tx_checksum);
+
+int qeth_set_features(struct net_device *dev, netdev_features_t features)
+{
+	struct qeth_card *card = dev->ml_priv;
+	netdev_features_t changed = dev->features ^ features;
+	int rc = 0;
+
+	QETH_DBF_TEXT(SETUP, 2, "setfeat");
+	QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
+
+	if ((changed & NETIF_F_IP_CSUM)) {
+		rc = qeth_set_ipa_csum(card,
+				       features & NETIF_F_IP_CSUM ? 1 : 0,
+				       IPA_OUTBOUND_CHECKSUM);
+		if (rc)
+			changed ^= NETIF_F_IP_CSUM;
+	}
+	if ((changed & NETIF_F_RXCSUM)) {
+		rc = qeth_set_ipa_csum(card,
+					features & NETIF_F_RXCSUM ? 1 : 0,
+					IPA_INBOUND_CHECKSUM);
+		if (rc)
+			changed ^= NETIF_F_RXCSUM;
+	}
+	if ((changed & NETIF_F_TSO)) {
+		rc = qeth_set_ipa_tso(card, features & NETIF_F_TSO ? 1 : 0);
+		if (rc)
+			changed ^= NETIF_F_TSO;
+	}
+
+	/* everything changed successfully? */
+	if ((dev->features ^ features) == changed)
+		return 0;
+	/* something went wrong. save changed features and return error */
+	dev->features ^= changed;
+	return -EIO;
+}
+EXPORT_SYMBOL_GPL(qeth_set_features);
+
+netdev_features_t qeth_fix_features(struct net_device *dev,
+				    netdev_features_t features)
+{
+	struct qeth_card *card = dev->ml_priv;
+
+	QETH_DBF_TEXT(SETUP, 2, "fixfeat");
+	if (!qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM))
+		features &= ~NETIF_F_IP_CSUM;
+	if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
+		features &= ~NETIF_F_RXCSUM;
+	if (!qeth_is_supported(card, IPA_OUTBOUND_TSO)) {
+		features &= ~NETIF_F_TSO;
+		dev_info(&card->gdev->dev, "Outbound TSO not supported on %s\n",
+			 QETH_CARD_IFNAME(card));
+	}
+	/* if the card isn't up, remove features that require hw changes */
+	if (card->state == CARD_STATE_DOWN ||
+	    card->state == CARD_STATE_RECOVER)
+		features = features & ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
+					NETIF_F_TSO);
+	QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
+	return features;
+}
+EXPORT_SYMBOL_GPL(qeth_fix_features);
 
 static int __init qeth_core_init(void)
 {
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index e6e5b9671bf2..75b29fd2fcf4 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -243,6 +243,10 @@ static ssize_t qeth_dev_prioqing_store(struct device *dev,
 		card->qdio.do_prio_queueing = QETH_NO_PRIO_QUEUEING;
 		card->qdio.default_out_queue = 2;
 	} else if (sysfs_streq(buf, "no_prio_queueing:3")) {
+		if (card->info.type == QETH_CARD_TYPE_IQD) {
+			rc = -EPERM;
+			goto out;
+		}
 		card->qdio.do_prio_queueing = QETH_NO_PRIO_QUEUEING;
 		card->qdio.default_out_queue = 3;
 	} else if (sysfs_streq(buf, "no_prio_queueing")) {
diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h
index 0767556404bd..29d9fb3890ad 100644
--- a/drivers/s390/net/qeth_l2.h
+++ b/drivers/s390/net/qeth_l2.h
@@ -12,4 +12,11 @@ int qeth_l2_create_device_attributes(struct device *);
 void qeth_l2_remove_device_attributes(struct device *);
 void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card);
 
+struct qeth_mac {
+	u8 mac_addr[OSA_ADDR_LEN];
+	u8 is_uc:1;
+	u8 disp_flag:2;
+	struct hlist_node hnode;
+};
+
 #endif /* __QETH_L2_H__ */
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 80b1979e8d95..7bc20c5188bc 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -404,38 +404,6 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
 	return rc;
 }
 
-static netdev_features_t qeth_l2_fix_features(struct net_device *dev,
-					      netdev_features_t features)
-{
-	struct qeth_card *card = dev->ml_priv;
-
-	QETH_DBF_TEXT(SETUP, 2, "fixfeat");
-	if (!qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM))
-		features &= ~NETIF_F_IP_CSUM;
-	if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
-		features &= ~NETIF_F_RXCSUM;
-	QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
-	return features;
-}
-
-static int qeth_l2_set_features(struct net_device *dev,
-				netdev_features_t features)
-{
-	struct qeth_card *card = dev->ml_priv;
-	netdev_features_t changed = dev->features ^ features;
-
-	QETH_DBF_TEXT(SETUP, 2, "setfeat");
-	QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
-
-	if (card->state == CARD_STATE_DOWN ||
-	    card->state == CARD_STATE_RECOVER)
-		return 0;
-
-	if (!(changed & NETIF_F_RXCSUM))
-		return 0;
-	return qeth_set_rx_csum(card, features & NETIF_F_RXCSUM ? 1 : 0);
-}
-
 static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
 {
 	QETH_DBF_TEXT(SETUP , 2, "stopcard");
@@ -780,7 +748,7 @@ qeth_l2_add_mac(struct qeth_card *card, struct netdev_hw_addr *ha, u8 is_uc)
 			qeth_l2_mac_hash(ha->addr)) {
 		if (is_uc == mac->is_uc &&
 		    !memcmp(ha->addr, mac->mac_addr, OSA_ADDR_LEN)) {
-			mac->disp_flag = QETH_DISP_MAC_DO_NOTHING;
+			mac->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
 			return;
 		}
 	}
@@ -792,7 +760,7 @@ qeth_l2_add_mac(struct qeth_card *card, struct netdev_hw_addr *ha, u8 is_uc)
 
 	memcpy(mac->mac_addr, ha->addr, OSA_ADDR_LEN);
 	mac->is_uc = is_uc;
-	mac->disp_flag = QETH_DISP_MAC_ADD;
+	mac->disp_flag = QETH_DISP_ADDR_ADD;
 
 	hash_add(card->mac_htable, &mac->hnode,
 			qeth_l2_mac_hash(mac->mac_addr));
@@ -825,7 +793,7 @@ static void qeth_l2_set_rx_mode(struct net_device *dev)
 		qeth_l2_add_mac(card, ha, 1);
 
 	hash_for_each_safe(card->mac_htable, i, tmp, mac, hnode) {
-		if (mac->disp_flag == QETH_DISP_MAC_DELETE) {
+		if (mac->disp_flag == QETH_DISP_ADDR_DELETE) {
 			if (!mac->is_uc)
 				rc = qeth_l2_send_delgroupmac(card,
 						mac->mac_addr);
@@ -837,15 +805,15 @@ static void qeth_l2_set_rx_mode(struct net_device *dev)
 			hash_del(&mac->hnode);
 			kfree(mac);
 
-		} else if (mac->disp_flag == QETH_DISP_MAC_ADD) {
+		} else if (mac->disp_flag == QETH_DISP_ADDR_ADD) {
 			rc = qeth_l2_write_mac(card, mac);
 			if (rc) {
 				hash_del(&mac->hnode);
 				kfree(mac);
 			} else
-				mac->disp_flag = QETH_DISP_MAC_DELETE;
+				mac->disp_flag = QETH_DISP_ADDR_DELETE;
 		} else
-			mac->disp_flag = QETH_DISP_MAC_DELETE;
+			mac->disp_flag = QETH_DISP_ADDR_DELETE;
 	}
 
 	spin_unlock_bh(&card->mclock);
@@ -869,6 +837,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	int data_offset = -1;
 	int elements_needed = 0;
 	int hd_len = 0;
+	int nr_frags;
 
 	if (card->qdio.do_prio_queueing || (cast_type &&
 					card->info.is_multicast_different))
@@ -892,6 +861,23 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 	netif_stop_queue(dev);
 
+	/* fix hardware limitation: as long as we do not have sbal
+	 * chaining we can not send long frag lists
+	 */
+	if ((card->info.type != QETH_CARD_TYPE_IQD) &&
+	    !qeth_get_elements_no(card, new_skb, 0)) {
+		int lin_rc = skb_linearize(new_skb);
+
+		if (card->options.performance_stats) {
+			if (lin_rc)
+				card->perf_stats.tx_linfail++;
+			else
+				card->perf_stats.tx_lin++;
+		}
+		if (lin_rc)
+			goto tx_drop;
+	}
+
 	if (card->info.type == QETH_CARD_TYPE_OSN)
 		hdr = (struct qeth_hdr *)skb->data;
 	else {
@@ -943,6 +929,14 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!rc) {
 		card->stats.tx_packets++;
 		card->stats.tx_bytes += tx_bytes;
+		if (card->options.performance_stats) {
+			nr_frags = skb_shinfo(new_skb)->nr_frags;
+			if (nr_frags) {
+				card->perf_stats.sg_skbs_sent++;
+				/* nr_frags + skb->data */
+				card->perf_stats.sg_frags_sent += nr_frags + 1;
+			}
+		}
 		if (new_skb != skb)
 			dev_kfree_skb_any(skb);
 		rc = NETDEV_TX_OK;
@@ -1051,6 +1045,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 		qeth_l2_set_offline(cgdev);
 
 	if (card->dev) {
+		netif_napi_del(&card->napi);
 		unregister_netdev(card->dev);
 		card->dev = NULL;
 	}
@@ -1086,8 +1081,8 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
 	.ndo_vlan_rx_add_vid	= qeth_l2_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid   = qeth_l2_vlan_rx_kill_vid,
 	.ndo_tx_timeout	   	= qeth_tx_timeout,
-	.ndo_fix_features	= qeth_l2_fix_features,
-	.ndo_set_features	= qeth_l2_set_features
+	.ndo_fix_features	= qeth_fix_features,
+	.ndo_set_features	= qeth_set_features
 };
 
 static int qeth_l2_setup_netdev(struct qeth_card *card)
@@ -1118,12 +1113,25 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 		&qeth_l2_ethtool_ops : &qeth_l2_osn_ops;
 	card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
-		card->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
-		/* Turn on RX offloading per default */
-		card->dev->features |= NETIF_F_RXCSUM;
+		card->dev->hw_features = NETIF_F_SG;
+		card->dev->vlan_features = NETIF_F_SG;
+		/* OSA 3S and earlier has no RX/TX support */
+		if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
+			card->dev->hw_features |= NETIF_F_IP_CSUM;
+			card->dev->vlan_features |= NETIF_F_IP_CSUM;
+		}
+		if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM)) {
+			card->dev->hw_features |= NETIF_F_RXCSUM;
+			card->dev->vlan_features |= NETIF_F_RXCSUM;
+		}
+		/* Turn on SG per default */
+		card->dev->features |= NETIF_F_SG;
 	}
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
+	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+				  PAGE_SIZE;
+	card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1);
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT);
 	netif_carrier_off(card->dev);
@@ -1135,9 +1143,6 @@ static int qeth_l2_start_ipassists(struct qeth_card *card)
 	/* configure isolation level */
 	if (qeth_set_access_ctrl_online(card, 0))
 		return -ENODEV;
-	if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
-		qeth_set_rx_csum(card, 1);
-	qeth_start_ipa_tx_checksum(card);
 	return 0;
 }
 
@@ -1206,7 +1211,8 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 contin:
 	if ((card->info.type == QETH_CARD_TYPE_OSD) ||
 	    (card->info.type == QETH_CARD_TYPE_OSX)) {
-		if (qeth_l2_start_ipassists(card))
+		rc = qeth_l2_start_ipassists(card);
+		if (rc)
 			goto out_remove;
 	}
 
@@ -1800,6 +1806,12 @@ static int qeth_bridgeport_makerc(struct qeth_card *card,
 			dev_err(&card->gdev->dev,
 	"The device is not configured as a Bridge Port\n");
 			break;
+		case 0x2B10:
+		case 0x0010: /* OS mismatch */
+			rc = -EPERM;
+			dev_err(&card->gdev->dev,
+	"A Bridge Port is already configured by a different operating system\n");
+			break;
 		case 0x2B14:
 		case 0x0014: /* Another device is Primary */
 			switch (setcmd) {
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 551a4b4c03fd..26f79533e62e 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -10,16 +10,23 @@
 #define __QETH_L3_H__
 
 #include "qeth_core.h"
+#include <linux/hashtable.h>
 
 #define QETH_SNIFF_AVAIL	0x0008
 
 struct qeth_ipaddr {
-	struct list_head entry;
+	struct hlist_node hnode;
 	enum qeth_ip_types type;
 	enum qeth_ipa_setdelip_flags set_flags;
 	enum qeth_ipa_setdelip_flags del_flags;
-	int is_multicast;
-	int users;
+	u8 is_multicast:1;
+	u8 in_progress:1;
+	u8 disp_flag:2;
+
+	/* is changed only for normal ip addresses
+	 * for non-normal addresses it always is  1
+	 */
+	int  ref_counter;
 	enum qeth_prot_versions proto;
 	unsigned char mac[OSA_ADDR_LEN];
 	union {
@@ -32,7 +39,24 @@ struct qeth_ipaddr {
 			unsigned int pfxlen;
 		} a6;
 	} u;
+
 };
+static inline  u64 qeth_l3_ipaddr_hash(struct qeth_ipaddr *addr)
+{
+	u64  ret = 0;
+	u8 *point;
+
+	if (addr->proto == QETH_PROT_IPV6) {
+		point = (u8 *) &addr->u.a6.addr;
+		ret = get_unaligned((u64 *)point) ^
+			get_unaligned((u64 *) (point + 8));
+	}
+	if (addr->proto == QETH_PROT_IPV4) {
+		point = (u8 *) &addr->u.a4.addr;
+		ret = get_unaligned((u32 *) point);
+	}
+	return ret;
+}
 
 struct qeth_ipato_entry {
 	struct list_head entry;
@@ -60,6 +84,5 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *);
 struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
 int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
 int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
-void qeth_l3_set_ip_addr_list(struct qeth_card *);
 
 #endif /* __QETH_L3_H__ */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index ac544330daeb..72934666fedf 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -30,6 +30,7 @@
 #include <net/ip6_fib.h>
 #include <net/ip6_checksum.h>
 #include <net/iucv/af_iucv.h>
+#include <linux/hashtable.h>
 
 #include "qeth_l3.h"
 
@@ -57,7 +58,7 @@ static int qeth_l3_isxdigit(char *buf)
 
 static void qeth_l3_ipaddr4_to_string(const __u8 *addr, char *buf)
 {
-	sprintf(buf, "%i.%i.%i.%i", addr[0], addr[1], addr[2], addr[3]);
+	sprintf(buf, "%pI4", addr);
 }
 
 static int qeth_l3_string_to_ipaddr4(const char *buf, __u8 *addr)
@@ -204,104 +205,129 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
 	return rc;
 }
 
-/*
- * Add IP to be added to todo list. If there is already an "add todo"
- * in this list we just incremenent the reference count.
- * Returns 0 if we  just incremented reference count.
- */
-static int __qeth_l3_insert_ip_todo(struct qeth_card *card,
-					struct qeth_ipaddr *addr, int add)
+inline int
+qeth_l3_ipaddrs_is_equal(struct qeth_ipaddr *addr1, struct qeth_ipaddr *addr2)
 {
-	struct qeth_ipaddr *tmp, *t;
-	int found = 0;
+	return addr1->proto == addr2->proto &&
+		!memcmp(&addr1->u, &addr2->u, sizeof(addr1->u))  &&
+		!memcmp(&addr1->mac, &addr2->mac, sizeof(addr1->mac));
+}
 
-	if (card->options.sniffer)
-		return 0;
-	list_for_each_entry_safe(tmp, t, card->ip_tbd_list, entry) {
-		if ((addr->type == QETH_IP_TYPE_DEL_ALL_MC) &&
-		    (tmp->type == QETH_IP_TYPE_DEL_ALL_MC))
-			return 0;
-		if ((tmp->proto        == QETH_PROT_IPV4)     &&
-		    (addr->proto       == QETH_PROT_IPV4)     &&
-		    (tmp->type         == addr->type)         &&
-		    (tmp->is_multicast == addr->is_multicast) &&
-		    (tmp->u.a4.addr    == addr->u.a4.addr)    &&
-		    (tmp->u.a4.mask    == addr->u.a4.mask)) {
-			found = 1;
-			break;
-		}
-		if ((tmp->proto        == QETH_PROT_IPV6)      &&
-		    (addr->proto       == QETH_PROT_IPV6)      &&
-		    (tmp->type         == addr->type)          &&
-		    (tmp->is_multicast == addr->is_multicast)  &&
-		    (tmp->u.a6.pfxlen  == addr->u.a6.pfxlen)   &&
-		    (memcmp(&tmp->u.a6.addr, &addr->u.a6.addr,
-			    sizeof(struct in6_addr)) == 0)) {
-			found = 1;
-			break;
-		}
-	}
-	if (found) {
-		if (addr->users != 0)
-			tmp->users += addr->users;
-		else
-			tmp->users += add ? 1 : -1;
-		if (tmp->users == 0) {
-			list_del(&tmp->entry);
-			kfree(tmp);
-		}
-		return 0;
+static struct qeth_ipaddr *
+qeth_l3_ip_from_hash(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+{
+	struct qeth_ipaddr *addr;
+
+	if (tmp_addr->is_multicast) {
+		hash_for_each_possible(card->ip_mc_htable,  addr,
+				hnode, qeth_l3_ipaddr_hash(tmp_addr))
+			if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr))
+				return addr;
 	} else {
-		if (addr->type == QETH_IP_TYPE_DEL_ALL_MC)
-			list_add(&addr->entry, card->ip_tbd_list);
-		else {
-			if (addr->users == 0)
-				addr->users += add ? 1 : -1;
-			if (add && (addr->type == QETH_IP_TYPE_NORMAL) &&
-			    qeth_l3_is_addr_covered_by_ipato(card, addr)) {
-				QETH_CARD_TEXT(card, 2, "tkovaddr");
-				addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
-			}
-			list_add_tail(&addr->entry, card->ip_tbd_list);
-		}
-		return 1;
+		hash_for_each_possible(card->ip_htable,  addr,
+				hnode, qeth_l3_ipaddr_hash(tmp_addr))
+			if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr))
+				return addr;
 	}
+
+	return NULL;
 }
 
-int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *addr)
+int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 {
-	unsigned long flags;
 	int rc = 0;
+	struct qeth_ipaddr *addr;
 
 	QETH_CARD_TEXT(card, 4, "delip");
 
-	if (addr->proto == QETH_PROT_IPV4)
-		QETH_CARD_HEX(card, 4, &addr->u.a4.addr, 4);
+	if (tmp_addr->proto == QETH_PROT_IPV4)
+		QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
 	else {
-		QETH_CARD_HEX(card, 4, &addr->u.a6.addr, 8);
-		QETH_CARD_HEX(card, 4, ((char *)&addr->u.a6.addr) + 8, 8);
+		QETH_CARD_HEX(card, 4, &tmp_addr->u.a6.addr, 8);
+		QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8);
 	}
-	spin_lock_irqsave(&card->ip_lock, flags);
-	rc = __qeth_l3_insert_ip_todo(card, addr, 0);
-	spin_unlock_irqrestore(&card->ip_lock, flags);
+
+	addr = qeth_l3_ip_from_hash(card, tmp_addr);
+	if (!addr)
+		return -ENOENT;
+
+	addr->ref_counter--;
+	if (addr->type == QETH_IP_TYPE_NORMAL && addr->ref_counter > 0)
+		return rc;
+	if (addr->in_progress)
+		return -EINPROGRESS;
+
+	rc = qeth_l3_deregister_addr_entry(card, addr);
+
+	hash_del(&addr->hnode);
+	kfree(addr);
+
 	return rc;
 }
 
-int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *addr)
+int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 {
-	unsigned long flags;
 	int rc = 0;
+	struct qeth_ipaddr *addr;
 
 	QETH_CARD_TEXT(card, 4, "addip");
-	if (addr->proto == QETH_PROT_IPV4)
-		QETH_CARD_HEX(card, 4, &addr->u.a4.addr, 4);
+
+	if (tmp_addr->proto == QETH_PROT_IPV4)
+		QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
 	else {
-		QETH_CARD_HEX(card, 4, &addr->u.a6.addr, 8);
-		QETH_CARD_HEX(card, 4, ((char *)&addr->u.a6.addr) + 8, 8);
+		QETH_CARD_HEX(card, 4, &tmp_addr->u.a6.addr, 8);
+		QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8);
 	}
-	spin_lock_irqsave(&card->ip_lock, flags);
-	rc = __qeth_l3_insert_ip_todo(card, addr, 1);
-	spin_unlock_irqrestore(&card->ip_lock, flags);
+
+	addr = qeth_l3_ip_from_hash(card, tmp_addr);
+	if (!addr) {
+		addr = qeth_l3_get_addr_buffer(tmp_addr->proto);
+		if (!addr)
+			return -ENOMEM;
+
+		memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr));
+		addr->ref_counter = 1;
+
+		if (addr->type == QETH_IP_TYPE_NORMAL  &&
+				qeth_l3_is_addr_covered_by_ipato(card, addr)) {
+			QETH_CARD_TEXT(card, 2, "tkovaddr");
+			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+		}
+		hash_add(card->ip_htable, &addr->hnode,
+				qeth_l3_ipaddr_hash(addr));
+
+		/* qeth_l3_register_addr_entry can go to sleep
+		 * if we add a IPV4 addr. It is caused by the reason
+		 * that SETIP ipa cmd starts ARP staff for IPV4 addr.
+		 * Thus we should unlock spinlock, and make a protection
+		 * using in_progress variable to indicate that there is
+		 * an hardware operation with this IPV4 address
+		 */
+		if (addr->proto == QETH_PROT_IPV4) {
+			addr->in_progress = 1;
+			spin_unlock_bh(&card->ip_lock);
+			rc = qeth_l3_register_addr_entry(card, addr);
+			spin_lock_bh(&card->ip_lock);
+			addr->in_progress = 0;
+		} else
+			rc = qeth_l3_register_addr_entry(card, addr);
+
+		if (!rc || (rc == IPA_RC_DUPLICATE_IP_ADDRESS) ||
+				(rc == IPA_RC_LAN_OFFLINE)) {
+			addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+			if (addr->ref_counter < 1) {
+				qeth_l3_delete_ip(card, addr);
+				kfree(addr);
+			}
+		} else {
+			hash_del(&addr->hnode);
+			kfree(addr);
+		}
+	} else {
+			if (addr->type == QETH_IP_TYPE_NORMAL)
+				addr->ref_counter++;
+	}
+
 	return rc;
 }
 
@@ -312,229 +338,88 @@ struct qeth_ipaddr *qeth_l3_get_addr_buffer(
 	struct qeth_ipaddr *addr;
 
 	addr = kzalloc(sizeof(struct qeth_ipaddr), GFP_ATOMIC);
-	if (addr == NULL) {
+	if (!addr)
 		return NULL;
-	}
+
 	addr->type = QETH_IP_TYPE_NORMAL;
+	addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
 	addr->proto = prot;
+
 	return addr;
 }
 
-static void qeth_l3_delete_mc_addresses(struct qeth_card *card)
-{
-	struct qeth_ipaddr *iptodo;
-	unsigned long flags;
-
-	QETH_CARD_TEXT(card, 4, "delmc");
-	iptodo = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-	if (!iptodo) {
-		QETH_CARD_TEXT(card, 2, "dmcnomem");
-		return;
-	}
-	iptodo->type = QETH_IP_TYPE_DEL_ALL_MC;
-	spin_lock_irqsave(&card->ip_lock, flags);
-	if (!__qeth_l3_insert_ip_todo(card, iptodo, 0))
-		kfree(iptodo);
-	spin_unlock_irqrestore(&card->ip_lock, flags);
-}
-
-/*
- * Add/remove address to/from card's ip list, i.e. try to add or remove
- * reference to/from an IP address that is already registered on the card.
- * Returns:
- *	0  address was on card and its reference count has been adjusted,
- *	   but is still > 0, so nothing has to be done
- *	   also returns 0 if card was not on card and the todo was to delete
- *	   the address -> there is also nothing to be done
- *	1  address was not on card and the todo is to add it to the card's ip
- *	   list
- *	-1 address was on card and its reference count has been decremented
- *	   to <= 0 by the todo -> address must be removed from card
- */
-static int __qeth_l3_ref_ip_on_card(struct qeth_card *card,
-		struct qeth_ipaddr *todo, struct qeth_ipaddr **__addr)
+static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
 {
 	struct qeth_ipaddr *addr;
-	int found = 0;
-
-	list_for_each_entry(addr, &card->ip_list, entry) {
-		if ((addr->proto == QETH_PROT_IPV4) &&
-		    (todo->proto == QETH_PROT_IPV4) &&
-		    (addr->type == todo->type) &&
-		    (addr->u.a4.addr == todo->u.a4.addr) &&
-		    (addr->u.a4.mask == todo->u.a4.mask)) {
-			found = 1;
-			break;
-		}
-		if ((addr->proto == QETH_PROT_IPV6) &&
-		    (todo->proto == QETH_PROT_IPV6) &&
-		    (addr->type == todo->type) &&
-		    (addr->u.a6.pfxlen == todo->u.a6.pfxlen) &&
-		    (memcmp(&addr->u.a6.addr, &todo->u.a6.addr,
-			    sizeof(struct in6_addr)) == 0)) {
-			found = 1;
-			break;
-		}
-	}
-	if (found) {
-		addr->users += todo->users;
-		if (addr->users <= 0) {
-			*__addr = addr;
-			return -1;
-		} else {
-			/* for VIPA and RXIP limit refcount to 1 */
-			if (addr->type != QETH_IP_TYPE_NORMAL)
-				addr->users = 1;
-			return 0;
-		}
-	}
-	if (todo->users > 0) {
-		/* for VIPA and RXIP limit refcount to 1 */
-		if (todo->type != QETH_IP_TYPE_NORMAL)
-			todo->users = 1;
-		return 1;
-	} else
-		return 0;
-}
-
-static void __qeth_l3_delete_all_mc(struct qeth_card *card,
-					unsigned long *flags)
-{
-	struct list_head fail_list;
-	struct qeth_ipaddr *addr, *tmp;
-	int rc;
-
-	INIT_LIST_HEAD(&fail_list);
-again:
-	list_for_each_entry_safe(addr, tmp, &card->ip_list, entry) {
-		if (addr->is_multicast) {
-			list_del(&addr->entry);
-			spin_unlock_irqrestore(&card->ip_lock, *flags);
-			rc = qeth_l3_deregister_addr_entry(card, addr);
-			spin_lock_irqsave(&card->ip_lock, *flags);
-			if (!rc || (rc == IPA_RC_MC_ADDR_NOT_FOUND))
-				kfree(addr);
-			else
-				list_add_tail(&addr->entry, &fail_list);
-			goto again;
-		}
-	}
-	list_splice(&fail_list, &card->ip_list);
-}
-
-void qeth_l3_set_ip_addr_list(struct qeth_card *card)
-{
-	struct list_head *tbd_list;
-	struct qeth_ipaddr *todo, *addr;
-	unsigned long flags;
-	int rc;
+	struct hlist_node *tmp;
+	int i;
 
-	QETH_CARD_TEXT(card, 2, "sdiplist");
-	QETH_CARD_HEX(card, 2, &card, sizeof(void *));
+	QETH_CARD_TEXT(card, 4, "clearip");
 
-	if (!qeth_card_hw_is_reachable(card) || card->options.sniffer)
+	if (recover && card->options.sniffer)
 		return;
 
-	spin_lock_irqsave(&card->ip_lock, flags);
-	tbd_list = card->ip_tbd_list;
-	card->ip_tbd_list = kzalloc(sizeof(struct list_head), GFP_ATOMIC);
-	if (!card->ip_tbd_list) {
-		QETH_CARD_TEXT(card, 0, "silnomem");
-		card->ip_tbd_list = tbd_list;
-		spin_unlock_irqrestore(&card->ip_lock, flags);
-		return;
-	} else
-		INIT_LIST_HEAD(card->ip_tbd_list);
-
-	while (!list_empty(tbd_list)) {
-		todo = list_entry(tbd_list->next, struct qeth_ipaddr, entry);
-		list_del(&todo->entry);
-		if (todo->type == QETH_IP_TYPE_DEL_ALL_MC) {
-			__qeth_l3_delete_all_mc(card, &flags);
-			kfree(todo);
+	spin_lock_bh(&card->ip_lock);
+
+	hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
+		if (!recover) {
+			hash_del(&addr->hnode);
+			kfree(addr);
 			continue;
 		}
-		rc = __qeth_l3_ref_ip_on_card(card, todo, &addr);
-		if (rc == 0) {
-			/* nothing to be done; only adjusted refcount */
-			kfree(todo);
-		} else if (rc == 1) {
-			/* new entry to be added to on-card list */
-			spin_unlock_irqrestore(&card->ip_lock, flags);
-			rc = qeth_l3_register_addr_entry(card, todo);
-			spin_lock_irqsave(&card->ip_lock, flags);
-			if (!rc || (rc == IPA_RC_LAN_OFFLINE))
-				list_add_tail(&todo->entry, &card->ip_list);
-			else
-				kfree(todo);
-		} else if (rc == -1) {
-			/* on-card entry to be removed */
-			list_del_init(&addr->entry);
-			spin_unlock_irqrestore(&card->ip_lock, flags);
-			rc = qeth_l3_deregister_addr_entry(card, addr);
-			spin_lock_irqsave(&card->ip_lock, flags);
-			if (!rc || (rc == IPA_RC_IP_ADDRESS_NOT_DEFINED))
-				kfree(addr);
-			else
-				list_add_tail(&addr->entry, &card->ip_list);
-			kfree(todo);
-		}
+		addr->disp_flag = QETH_DISP_ADDR_ADD;
 	}
-	spin_unlock_irqrestore(&card->ip_lock, flags);
-	kfree(tbd_list);
-}
 
-static void qeth_l3_clear_ip_list(struct qeth_card *card, int recover)
-{
-	struct qeth_ipaddr *addr, *tmp;
-	unsigned long flags;
+	spin_unlock_bh(&card->ip_lock);
 
-	QETH_CARD_TEXT(card, 4, "clearip");
-	if (recover && card->options.sniffer)
-		return;
-	spin_lock_irqsave(&card->ip_lock, flags);
-	/* clear todo list */
-	list_for_each_entry_safe(addr, tmp, card->ip_tbd_list, entry) {
-		list_del(&addr->entry);
+	spin_lock_bh(&card->mclock);
+
+	hash_for_each_safe(card->ip_mc_htable, i, tmp, addr, hnode) {
+		hash_del(&addr->hnode);
 		kfree(addr);
 	}
 
-	while (!list_empty(&card->ip_list)) {
-		addr = list_entry(card->ip_list.next,
-				  struct qeth_ipaddr, entry);
-		list_del_init(&addr->entry);
-		if (!recover || addr->is_multicast) {
-			kfree(addr);
-			continue;
-		}
-		list_add_tail(&addr->entry, card->ip_tbd_list);
-	}
-	spin_unlock_irqrestore(&card->ip_lock, flags);
-}
+	spin_unlock_bh(&card->mclock);
+
 
-static int qeth_l3_address_exists_in_list(struct list_head *list,
-		struct qeth_ipaddr *addr, int same_type)
+}
+static void qeth_l3_recover_ip(struct qeth_card *card)
 {
-	struct qeth_ipaddr *tmp;
+	struct qeth_ipaddr *addr;
+	struct hlist_node *tmp;
+	int i;
+	int rc;
 
-	list_for_each_entry(tmp, list, entry) {
-		if ((tmp->proto == QETH_PROT_IPV4) &&
-		    (addr->proto == QETH_PROT_IPV4) &&
-		    ((same_type && (tmp->type == addr->type)) ||
-		    (!same_type && (tmp->type != addr->type))) &&
-		    (tmp->u.a4.addr == addr->u.a4.addr))
-			return 1;
+	QETH_CARD_TEXT(card, 4, "recoverip");
+
+	spin_lock_bh(&card->ip_lock);
+
+	hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
+		if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
+			if (addr->proto == QETH_PROT_IPV4) {
+				addr->in_progress = 1;
+				spin_unlock_bh(&card->ip_lock);
+				rc = qeth_l3_register_addr_entry(card, addr);
+				spin_lock_bh(&card->ip_lock);
+				addr->in_progress = 0;
+			} else
+				rc = qeth_l3_register_addr_entry(card, addr);
+
+			if (!rc) {
+				addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+				if (addr->ref_counter < 1) {
+					qeth_l3_delete_ip(card, addr);
+					kfree(addr);
+				}
+			} else {
+				hash_del(&addr->hnode);
+				kfree(addr);
+			}
+		}
+	}
 
-		if ((tmp->proto == QETH_PROT_IPV6) &&
-		    (addr->proto == QETH_PROT_IPV6) &&
-		    ((same_type && (tmp->type == addr->type)) ||
-		    (!same_type && (tmp->type != addr->type))) &&
-		    (memcmp(&tmp->u.a6.addr, &addr->u.a6.addr,
-			    sizeof(struct in6_addr)) == 0))
-			return 1;
+	spin_unlock_bh(&card->ip_lock);
 
-	}
-	return 0;
 }
 
 static int qeth_l3_send_setdelmc(struct qeth_card *card,
@@ -712,27 +597,28 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
  */
 static void qeth_l3_clear_ipato_list(struct qeth_card *card)
 {
-
 	struct qeth_ipato_entry *ipatoe, *tmp;
-	unsigned long flags;
 
-	spin_lock_irqsave(&card->ip_lock, flags);
+	spin_lock_bh(&card->ip_lock);
+
 	list_for_each_entry_safe(ipatoe, tmp, &card->ipato.entries, entry) {
 		list_del(&ipatoe->entry);
 		kfree(ipatoe);
 	}
-	spin_unlock_irqrestore(&card->ip_lock, flags);
+
+	spin_unlock_bh(&card->ip_lock);
 }
 
 int qeth_l3_add_ipato_entry(struct qeth_card *card,
 				struct qeth_ipato_entry *new)
 {
 	struct qeth_ipato_entry *ipatoe;
-	unsigned long flags;
 	int rc = 0;
 
 	QETH_CARD_TEXT(card, 2, "addipato");
-	spin_lock_irqsave(&card->ip_lock, flags);
+
+	spin_lock_bh(&card->ip_lock);
+
 	list_for_each_entry(ipatoe, &card->ipato.entries, entry) {
 		if (ipatoe->proto != new->proto)
 			continue;
@@ -743,10 +629,12 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
 			break;
 		}
 	}
+
 	if (!rc)
 		list_add_tail(&new->entry, &card->ipato.entries);
 
-	spin_unlock_irqrestore(&card->ip_lock, flags);
+	spin_unlock_bh(&card->ip_lock);
+
 	return rc;
 }
 
@@ -754,10 +642,11 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card,
 		enum qeth_prot_versions proto, u8 *addr, int mask_bits)
 {
 	struct qeth_ipato_entry *ipatoe, *tmp;
-	unsigned long flags;
 
 	QETH_CARD_TEXT(card, 2, "delipato");
-	spin_lock_irqsave(&card->ip_lock, flags);
+
+	spin_lock_bh(&card->ip_lock);
+
 	list_for_each_entry_safe(ipatoe, tmp, &card->ipato.entries, entry) {
 		if (ipatoe->proto != proto)
 			continue;
@@ -768,7 +657,8 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card,
 			kfree(ipatoe);
 		}
 	}
-	spin_unlock_irqrestore(&card->ip_lock, flags);
+
+	spin_unlock_bh(&card->ip_lock);
 }
 
 /*
@@ -778,7 +668,6 @@ int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
 	      const u8 *addr)
 {
 	struct qeth_ipaddr *ipaddr;
-	unsigned long flags;
 	int rc = 0;
 
 	ipaddr = qeth_l3_get_addr_buffer(proto);
@@ -797,18 +686,18 @@ int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
 		ipaddr->del_flags = QETH_IPA_DELIP_VIPA_FLAG;
 	} else
 		return -ENOMEM;
-	spin_lock_irqsave(&card->ip_lock, flags);
-	if (qeth_l3_address_exists_in_list(&card->ip_list, ipaddr, 0) ||
-	    qeth_l3_address_exists_in_list(card->ip_tbd_list, ipaddr, 0))
+
+	spin_lock_bh(&card->ip_lock);
+
+	if (!qeth_l3_ip_from_hash(card, ipaddr))
 		rc = -EEXIST;
-	spin_unlock_irqrestore(&card->ip_lock, flags);
-	if (rc) {
-		kfree(ipaddr);
-		return rc;
-	}
-	if (!qeth_l3_add_ip(card, ipaddr))
-		kfree(ipaddr);
-	qeth_l3_set_ip_addr_list(card);
+	else
+		qeth_l3_add_ip(card, ipaddr);
+
+	spin_unlock_bh(&card->ip_lock);
+
+	kfree(ipaddr);
+
 	return rc;
 }
 
@@ -831,9 +720,12 @@ void qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
 		ipaddr->type = QETH_IP_TYPE_VIPA;
 	} else
 		return;
-	if (!qeth_l3_delete_ip(card, ipaddr))
-		kfree(ipaddr);
-	qeth_l3_set_ip_addr_list(card);
+
+	spin_lock_bh(&card->ip_lock);
+	qeth_l3_delete_ip(card, ipaddr);
+	spin_unlock_bh(&card->ip_lock);
+
+	kfree(ipaddr);
 }
 
 /*
@@ -843,7 +735,6 @@ int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
 	      const u8 *addr)
 {
 	struct qeth_ipaddr *ipaddr;
-	unsigned long flags;
 	int rc = 0;
 
 	ipaddr = qeth_l3_get_addr_buffer(proto);
@@ -857,24 +748,25 @@ int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
 			memcpy(&ipaddr->u.a6.addr, addr, 16);
 			ipaddr->u.a6.pfxlen = 0;
 		}
+
 		ipaddr->type = QETH_IP_TYPE_RXIP;
 		ipaddr->set_flags = QETH_IPA_SETIP_TAKEOVER_FLAG;
 		ipaddr->del_flags = 0;
 	} else
 		return -ENOMEM;
-	spin_lock_irqsave(&card->ip_lock, flags);
-	if (qeth_l3_address_exists_in_list(&card->ip_list, ipaddr, 0) ||
-	    qeth_l3_address_exists_in_list(card->ip_tbd_list, ipaddr, 0))
+
+	spin_lock_bh(&card->ip_lock);
+
+	if (!qeth_l3_ip_from_hash(card, ipaddr))
 		rc = -EEXIST;
-	spin_unlock_irqrestore(&card->ip_lock, flags);
-	if (rc) {
-		kfree(ipaddr);
-		return rc;
-	}
-	if (!qeth_l3_add_ip(card, ipaddr))
-		kfree(ipaddr);
-	qeth_l3_set_ip_addr_list(card);
-	return 0;
+	else
+		qeth_l3_add_ip(card, ipaddr);
+
+	spin_unlock_bh(&card->ip_lock);
+
+	kfree(ipaddr);
+
+	return rc;
 }
 
 void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
@@ -896,9 +788,12 @@ void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
 		ipaddr->type = QETH_IP_TYPE_RXIP;
 	} else
 		return;
-	if (!qeth_l3_delete_ip(card, ipaddr))
-		kfree(ipaddr);
-	qeth_l3_set_ip_addr_list(card);
+
+	spin_lock_bh(&card->ip_lock);
+	qeth_l3_delete_ip(card, ipaddr);
+	spin_unlock_bh(&card->ip_lock);
+
+	kfree(ipaddr);
 }
 
 static int qeth_l3_register_addr_entry(struct qeth_card *card,
@@ -908,6 +803,7 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card,
 	int rc = 0;
 	int cnt = 3;
 
+
 	if (addr->proto == QETH_PROT_IPV4) {
 		QETH_CARD_TEXT(card, 2, "setaddr4");
 		QETH_CARD_HEX(card, 3, &addr->u.a4.addr, sizeof(int));
@@ -1013,36 +909,6 @@ static int qeth_l3_setadapter_parms(struct qeth_card *card)
 	return rc;
 }
 
-static int qeth_l3_default_setassparms_cb(struct qeth_card *card,
-			struct qeth_reply *reply, unsigned long data)
-{
-	struct qeth_ipa_cmd *cmd;
-
-	QETH_CARD_TEXT(card, 4, "defadpcb");
-
-	cmd = (struct qeth_ipa_cmd *) data;
-	if (cmd->hdr.return_code == 0) {
-		cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
-		if (cmd->hdr.prot_version == QETH_PROT_IPV4)
-			card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled;
-		if (cmd->hdr.prot_version == QETH_PROT_IPV6)
-			card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled;
-	}
-	if (cmd->data.setassparms.hdr.assist_no == IPA_INBOUND_CHECKSUM &&
-	    cmd->data.setassparms.hdr.command_code == IPA_CMD_ASS_START) {
-		card->info.csum_mask = cmd->data.setassparms.data.flags_32bit;
-		QETH_CARD_TEXT_(card, 3, "csum:%d", card->info.csum_mask);
-	}
-	if (cmd->data.setassparms.hdr.assist_no == IPA_OUTBOUND_CHECKSUM &&
-	    cmd->data.setassparms.hdr.command_code == IPA_CMD_ASS_START) {
-		card->info.tx_csum_mask =
-			cmd->data.setassparms.data.flags_32bit;
-		QETH_CARD_TEXT_(card, 3, "tcsu:%d", card->info.tx_csum_mask);
-	}
-
-	return 0;
-}
-
 #ifdef CONFIG_QETH_IPV6
 static int qeth_l3_send_simple_setassparms_ipv6(struct qeth_card *card,
 		enum qeth_ipa_funcs ipa_func, __u16 cmd_code)
@@ -1056,7 +922,7 @@ static int qeth_l3_send_simple_setassparms_ipv6(struct qeth_card *card,
 	if (!iob)
 		return -ENOMEM;
 	rc = qeth_send_setassparms(card, iob, 0, 0,
-				   qeth_l3_default_setassparms_cb, NULL);
+				   qeth_setassparms_cb, NULL);
 	return rc;
 }
 #endif
@@ -1291,47 +1157,6 @@ out:
 	return rc;
 }
 
-static void qeth_l3_start_ipa_checksum(struct qeth_card *card)
-{
-	QETH_CARD_TEXT(card, 3, "strtcsum");
-	if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM)
-	    && (card->dev->features & NETIF_F_RXCSUM))
-		qeth_set_rx_csum(card, 1);
-}
-
-static void qeth_l3_start_ipa_tx_checksum(struct qeth_card *card)
-{
-	QETH_CARD_TEXT(card, 3, "strttxcs");
-	qeth_start_ipa_tx_checksum(card);
-}
-
-static int qeth_l3_start_ipa_tso(struct qeth_card *card)
-{
-	int rc;
-
-	QETH_CARD_TEXT(card, 3, "sttso");
-
-	if (!qeth_is_supported(card, IPA_OUTBOUND_TSO)) {
-		dev_info(&card->gdev->dev,
-			"Outbound TSO not supported on %s\n",
-			QETH_CARD_IFNAME(card));
-		rc = -EOPNOTSUPP;
-	} else {
-		rc = qeth_send_simple_setassparms(card, IPA_OUTBOUND_TSO,
-						  IPA_CMD_ASS_START, 0);
-		if (rc)
-			dev_warn(&card->gdev->dev, "Starting outbound TCP "
-				"segmentation offload for %s failed\n",
-				QETH_CARD_IFNAME(card));
-		else
-			dev_info(&card->gdev->dev,
-				"Outbound TSO enabled\n");
-	}
-	if (rc)
-		card->dev->features &= ~NETIF_F_TSO;
-	return rc;
-}
-
 static int qeth_l3_start_ipassists(struct qeth_card *card)
 {
 	QETH_CARD_TEXT(card, 3, "strtipas");
@@ -1345,9 +1170,6 @@ static int qeth_l3_start_ipassists(struct qeth_card *card)
 	qeth_l3_start_ipa_multicast(card);		/* go on*/
 	qeth_l3_start_ipa_ipv6(card);		/* go on*/
 	qeth_l3_start_ipa_broadcast(card);		/* go on*/
-	qeth_l3_start_ipa_checksum(card);		/* go on*/
-	qeth_l3_start_ipa_tx_checksum(card);
-	qeth_l3_start_ipa_tso(card);		/* go on*/
 	return 0;
 }
 
@@ -1507,31 +1329,99 @@ qeth_diags_trace(struct qeth_card *card, enum qeth_diags_trace_cmds diags_cmd)
 	return qeth_send_ipa_cmd(card, iob, qeth_diags_trace_cb, NULL);
 }
 
-static void qeth_l3_get_mac_for_ipm(__u32 ipm, char *mac,
-				struct net_device *dev)
+static void qeth_l3_get_mac_for_ipm(__u32 ipm, char *mac)
 {
 	ip_eth_mc_map(ipm, mac);
 }
 
-static void qeth_l3_add_mc(struct qeth_card *card, struct in_device *in4_dev)
+static void qeth_l3_mark_all_mc_to_be_deleted(struct qeth_card *card)
+{
+	struct qeth_ipaddr *addr;
+	int i;
+
+	hash_for_each(card->ip_mc_htable, i, addr, hnode)
+		addr->disp_flag = QETH_DISP_ADDR_DELETE;
+
+}
+
+static void qeth_l3_add_all_new_mc(struct qeth_card *card)
+{
+	struct qeth_ipaddr *addr;
+	struct hlist_node *tmp;
+	int i;
+	int rc;
+
+	hash_for_each_safe(card->ip_mc_htable, i, tmp, addr, hnode) {
+		if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
+			rc = qeth_l3_register_addr_entry(card, addr);
+			if (!rc || (rc == IPA_RC_LAN_OFFLINE))
+				addr->ref_counter = 1;
+			else {
+				hash_del(&addr->hnode);
+				kfree(addr);
+			}
+		}
+	}
+
+}
+
+static void qeth_l3_delete_nonused_mc(struct qeth_card *card)
+{
+	struct qeth_ipaddr *addr;
+	struct hlist_node *tmp;
+	int i;
+	int rc;
+
+	hash_for_each_safe(card->ip_mc_htable, i, tmp, addr, hnode) {
+		if (addr->disp_flag == QETH_DISP_ADDR_DELETE) {
+			rc = qeth_l3_deregister_addr_entry(card, addr);
+			if (!rc || (rc == IPA_RC_MC_ADDR_NOT_FOUND)) {
+				hash_del(&addr->hnode);
+				kfree(addr);
+			}
+		}
+	}
+
+}
+
+
+static void
+qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev)
 {
-	struct qeth_ipaddr *ipm;
 	struct ip_mc_list *im4;
+	struct qeth_ipaddr *tmp, *ipm;
 	char buf[MAX_ADDR_LEN];
 
 	QETH_CARD_TEXT(card, 4, "addmc");
+
+	tmp = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
+		if (!tmp)
+			return;
+
 	for (im4 = rcu_dereference(in4_dev->mc_list); im4 != NULL;
 	     im4 = rcu_dereference(im4->next_rcu)) {
-		qeth_l3_get_mac_for_ipm(im4->multiaddr, buf, in4_dev->dev);
-		ipm = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-		if (!ipm)
-			continue;
-		ipm->u.a4.addr = im4->multiaddr;
-		memcpy(ipm->mac, buf, OSA_ADDR_LEN);
-		ipm->is_multicast = 1;
-		if (!qeth_l3_add_ip(card, ipm))
-			kfree(ipm);
+		qeth_l3_get_mac_for_ipm(im4->multiaddr, buf);
+
+		tmp->u.a4.addr = im4->multiaddr;
+		memcpy(tmp->mac, buf, sizeof(tmp->mac));
+
+		ipm = qeth_l3_ip_from_hash(card, tmp);
+		if (ipm) {
+			ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+		} else {
+			ipm = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
+			if (!ipm)
+				continue;
+			memcpy(ipm->mac, buf, sizeof(tmp->mac));
+			ipm->u.a4.addr = im4->multiaddr;
+			ipm->is_multicast = 1;
+			ipm->disp_flag = QETH_DISP_ADDR_ADD;
+			hash_add(card->ip_mc_htable,
+					&ipm->hnode, qeth_l3_ipaddr_hash(ipm));
+		}
 	}
+
+	kfree(tmp);
 }
 
 /* called with rcu_read_lock */
@@ -1541,6 +1431,7 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card)
 	u16 vid;
 
 	QETH_CARD_TEXT(card, 4, "addmcvl");
+
 	if (!qeth_is_supported(card, IPA_FULL_VLAN))
 		return;
 
@@ -1555,7 +1446,7 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card)
 		in_dev = __in_dev_get_rcu(netdev);
 		if (!in_dev)
 			continue;
-		qeth_l3_add_mc(card, in_dev);
+		qeth_l3_add_mc_to_hash(card, in_dev);
 	}
 }
 
@@ -1564,36 +1455,60 @@ static void qeth_l3_add_multicast_ipv4(struct qeth_card *card)
 	struct in_device *in4_dev;
 
 	QETH_CARD_TEXT(card, 4, "chkmcv4");
+
 	rcu_read_lock();
 	in4_dev = __in_dev_get_rcu(card->dev);
 	if (in4_dev == NULL)
 		goto unlock;
-	qeth_l3_add_mc(card, in4_dev);
+	qeth_l3_add_mc_to_hash(card, in4_dev);
 	qeth_l3_add_vlan_mc(card);
 unlock:
 	rcu_read_unlock();
 }
 
 #ifdef CONFIG_QETH_IPV6
-static void qeth_l3_add_mc6(struct qeth_card *card, struct inet6_dev *in6_dev)
+static void
+qeth_l3_add_mc6_to_hash(struct qeth_card *card, struct inet6_dev *in6_dev)
 {
 	struct qeth_ipaddr *ipm;
 	struct ifmcaddr6 *im6;
+	struct qeth_ipaddr *tmp;
 	char buf[MAX_ADDR_LEN];
 
 	QETH_CARD_TEXT(card, 4, "addmc6");
+
+	tmp = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
+		if (!tmp)
+			return;
+
 	for (im6 = in6_dev->mc_list; im6 != NULL; im6 = im6->next) {
 		ndisc_mc_map(&im6->mca_addr, buf, in6_dev->dev, 0);
+
+		memcpy(tmp->mac, buf, sizeof(tmp->mac));
+		memcpy(&tmp->u.a6.addr, &im6->mca_addr.s6_addr,
+		       sizeof(struct in6_addr));
+		tmp->is_multicast = 1;
+
+		ipm = qeth_l3_ip_from_hash(card, tmp);
+		if (ipm) {
+			ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+			continue;
+		}
+
 		ipm = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
 		if (!ipm)
 			continue;
-		ipm->is_multicast = 1;
+
 		memcpy(ipm->mac, buf, OSA_ADDR_LEN);
 		memcpy(&ipm->u.a6.addr, &im6->mca_addr.s6_addr,
 		       sizeof(struct in6_addr));
-		if (!qeth_l3_add_ip(card, ipm))
-			kfree(ipm);
+		ipm->is_multicast = 1;
+		ipm->disp_flag = QETH_DISP_ADDR_ADD;
+		hash_add(card->ip_mc_htable,
+				&ipm->hnode, qeth_l3_ipaddr_hash(ipm));
+
 	}
+	kfree(tmp);
 }
 
 /* called with rcu_read_lock */
@@ -1603,6 +1518,7 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card)
 	u16 vid;
 
 	QETH_CARD_TEXT(card, 4, "admc6vl");
+
 	if (!qeth_is_supported(card, IPA_FULL_VLAN))
 		return;
 
@@ -1618,7 +1534,7 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card)
 		if (!in_dev)
 			continue;
 		read_lock_bh(&in_dev->lock);
-		qeth_l3_add_mc6(card, in_dev);
+		qeth_l3_add_mc6_to_hash(card, in_dev);
 		read_unlock_bh(&in_dev->lock);
 		in6_dev_put(in_dev);
 	}
@@ -1629,14 +1545,16 @@ static void qeth_l3_add_multicast_ipv6(struct qeth_card *card)
 	struct inet6_dev *in6_dev;
 
 	QETH_CARD_TEXT(card, 4, "chkmcv6");
+
 	if (!qeth_is_supported(card, IPA_IPV6))
 		return ;
 	in6_dev = in6_dev_get(card->dev);
-	if (in6_dev == NULL)
+	if (!in6_dev)
 		return;
+
 	rcu_read_lock();
 	read_lock_bh(&in6_dev->lock);
-	qeth_l3_add_mc6(card, in6_dev);
+	qeth_l3_add_mc6_to_hash(card, in6_dev);
 	qeth_l3_add_vlan_mc6(card);
 	read_unlock_bh(&in6_dev->lock);
 	rcu_read_unlock();
@@ -1660,16 +1578,23 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card,
 	in_dev = in_dev_get(netdev);
 	if (!in_dev)
 		return;
+
+	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
+	if (!addr)
+		return;
+
+	spin_lock_bh(&card->ip_lock);
+
 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
-		addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-		if (addr) {
-			addr->u.a4.addr = ifa->ifa_address;
-			addr->u.a4.mask = ifa->ifa_mask;
-			addr->type = QETH_IP_TYPE_NORMAL;
-			if (!qeth_l3_delete_ip(card, addr))
-				kfree(addr);
-		}
+		addr->u.a4.addr = ifa->ifa_address;
+		addr->u.a4.mask = ifa->ifa_mask;
+		addr->type = QETH_IP_TYPE_NORMAL;
+		qeth_l3_delete_ip(card, addr);
 	}
+
+	spin_unlock_bh(&card->ip_lock);
+
+	kfree(addr);
 	in_dev_put(in_dev);
 }
 
@@ -1687,20 +1612,28 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card,
 	netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
 	if (!netdev)
 		return;
+
 	in6_dev = in6_dev_get(netdev);
 	if (!in6_dev)
 		return;
+
+	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
+	if (!addr)
+		return;
+
+	spin_lock_bh(&card->ip_lock);
+
 	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
-		addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-		if (addr) {
-			memcpy(&addr->u.a6.addr, &ifa->addr,
-			       sizeof(struct in6_addr));
-			addr->u.a6.pfxlen = ifa->prefix_len;
-			addr->type = QETH_IP_TYPE_NORMAL;
-			if (!qeth_l3_delete_ip(card, addr))
-				kfree(addr);
-		}
+		memcpy(&addr->u.a6.addr, &ifa->addr,
+		       sizeof(struct in6_addr));
+		addr->u.a6.pfxlen = ifa->prefix_len;
+		addr->type = QETH_IP_TYPE_NORMAL;
+		qeth_l3_delete_ip(card, addr);
 	}
+
+	spin_unlock_bh(&card->ip_lock);
+
+	kfree(addr);
 	in6_dev_put(in6_dev);
 #endif /* CONFIG_QETH_IPV6 */
 }
@@ -1727,18 +1660,16 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
 				    __be16 proto, u16 vid)
 {
 	struct qeth_card *card = dev->ml_priv;
-	unsigned long flags;
 
 	QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
+
 	if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
 		QETH_CARD_TEXT(card, 3, "kidREC");
 		return 0;
 	}
-	spin_lock_irqsave(&card->vlanlock, flags);
 	/* unregister IP addresses of vlan device */
 	qeth_l3_free_vlan_addresses(card, vid);
 	clear_bit(vid, card->active_vlans);
-	spin_unlock_irqrestore(&card->vlanlock, flags);
 	qeth_l3_set_multicast_list(card->dev);
 	return 0;
 }
@@ -1994,8 +1925,8 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev,
 static int qeth_l3_verify_dev(struct net_device *dev)
 {
 	struct qeth_card *card;
-	unsigned long flags;
 	int rc = 0;
+	unsigned long flags;
 
 	read_lock_irqsave(&qeth_core_card_list.rwlock, flags);
 	list_for_each_entry(card, &qeth_core_card_list.list, list) {
@@ -2051,7 +1982,7 @@ static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
 		card->state = CARD_STATE_SOFTSETUP;
 	}
 	if (card->state == CARD_STATE_SOFTSETUP) {
-		qeth_l3_clear_ip_list(card, 1);
+		qeth_l3_clear_ip_htable(card, 1);
 		qeth_clear_ipacmd_list(card);
 		card->state = CARD_STATE_HARDSETUP;
 	}
@@ -2106,12 +2037,20 @@ static void qeth_l3_set_multicast_list(struct net_device *dev)
 	    (card->state != CARD_STATE_UP))
 		return;
 	if (!card->options.sniffer) {
-		qeth_l3_delete_mc_addresses(card);
+
+		spin_lock_bh(&card->mclock);
+
+		qeth_l3_mark_all_mc_to_be_deleted(card);
+
 		qeth_l3_add_multicast_ipv4(card);
 #ifdef CONFIG_QETH_IPV6
 		qeth_l3_add_multicast_ipv6(card);
 #endif
-		qeth_l3_set_ip_addr_list(card);
+		qeth_l3_delete_nonused_mc(card);
+		qeth_l3_add_all_new_mc(card);
+
+		spin_unlock_bh(&card->mclock);
+
 		if (!qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE))
 			return;
 	}
@@ -2375,22 +2314,21 @@ static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata)
 	if (rc) {
 		if (copy_to_user(udata, qinfo.udata, 4))
 			rc = -EFAULT;
-			goto free_and_out;
-	} else {
+		goto free_and_out;
+	}
 #ifdef CONFIG_QETH_IPV6
-		if (qinfo.mask_bits & QETH_QARP_WITH_IPV6) {
-			/* fails in case of GuestLAN QDIO mode */
-			qeth_l3_query_arp_cache_info(card, QETH_PROT_IPV6,
-				&qinfo);
-		}
+	if (qinfo.mask_bits & QETH_QARP_WITH_IPV6) {
+		/* fails in case of GuestLAN QDIO mode */
+		qeth_l3_query_arp_cache_info(card, QETH_PROT_IPV6, &qinfo);
+	}
 #endif
-		if (copy_to_user(udata, qinfo.udata, qinfo.udata_len)) {
-			QETH_CARD_TEXT(card, 4, "qactf");
-			rc = -EFAULT;
-			goto free_and_out;
-		}
-		QETH_CARD_TEXT(card, 4, "qacts");
+	if (copy_to_user(udata, qinfo.udata, qinfo.udata_len)) {
+		QETH_CARD_TEXT(card, 4, "qactf");
+		rc = -EFAULT;
+		goto free_and_out;
 	}
+	QETH_CARD_TEXT(card, 4, "qacts");
+
 free_and_out:
 	kfree(qinfo.udata);
 out:
@@ -2427,7 +2365,7 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card,
 	rc = qeth_send_setassparms(card, iob,
 				   sizeof(struct qeth_arp_cache_entry),
 				   (unsigned long) entry,
-				   qeth_l3_default_setassparms_cb, NULL);
+				   qeth_setassparms_cb, NULL);
 	if (rc) {
 		tmp = rc;
 		qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf);
@@ -2467,7 +2405,7 @@ static int qeth_l3_arp_remove_entry(struct qeth_card *card,
 		return -ENOMEM;
 	rc = qeth_send_setassparms(card, iob,
 				   12, (unsigned long)buf,
-				   qeth_l3_default_setassparms_cb, NULL);
+				   qeth_setassparms_cb, NULL);
 	if (rc) {
 		tmp = rc;
 		memset(buf, 0, 16);
@@ -2793,15 +2731,34 @@ static void qeth_tso_fill_header(struct qeth_card *card,
 	}
 }
 
-static inline int qeth_l3_tso_elements(struct sk_buff *skb)
+/**
+ * qeth_l3_get_elements_no_tso() - find number of SBALEs for skb data for tso
+ * @card:			   qeth card structure, to check max. elems.
+ * @skb:			   SKB address
+ * @extra_elems:		   extra elems needed, to check against max.
+ *
+ * Returns the number of pages, and thus QDIO buffer elements, needed to cover
+ * skb data, including linear part and fragments, but excluding TCP header.
+ * (Exclusion of TCP header distinguishes it from qeth_get_elements_no().)
+ * Checks if the result plus extra_elems fits under the limit for the card.
+ * Returns 0 if it does not.
+ * Note: extra_elems is not included in the returned result.
+ */
+static int qeth_l3_get_elements_no_tso(struct qeth_card *card,
+			struct sk_buff *skb, int extra_elems)
 {
-	unsigned long tcpd = (unsigned long)tcp_hdr(skb) +
-		tcp_hdr(skb)->doff * 4;
-	int tcpd_len = skb_headlen(skb) - (tcpd - (unsigned long)skb->data);
-	int elements = PFN_UP(tcpd + tcpd_len - 1) - PFN_DOWN(tcpd);
-
-	elements += qeth_get_elements_for_frags(skb);
+	addr_t tcpdptr = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb);
+	int elements = qeth_get_elements_for_range(
+				tcpdptr,
+				(addr_t)skb->data + skb_headlen(skb)) +
+				qeth_get_elements_for_frags(skb);
 
+	if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
+		QETH_DBF_MESSAGE(2,
+	"Invalid size of TSO IP packet (Number=%d / Length=%d). Discarded.\n",
+				elements + extra_elems, skb->len);
+		return 0;
+	}
 	return elements;
 }
 
@@ -2810,8 +2767,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	int rc;
 	u16 *tag;
 	struct qeth_hdr *hdr = NULL;
-	int elements_needed = 0;
-	int elems;
+	int hdr_elements = 0;
+	int elements;
 	struct qeth_card *card = dev->ml_priv;
 	struct sk_buff *new_skb = NULL;
 	int ipv = qeth_get_ip_version(skb);
@@ -2822,7 +2779,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			qeth_get_priority_queue(card, skb, ipv, cast_type) :
 			card->qdio.default_out_queue];
 	int tx_bytes = skb->len;
-	bool large_send;
+	bool use_tso;
 	int data_offset = -1;
 	int nr_frags;
 
@@ -2847,10 +2804,12 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		card->perf_stats.outbound_start_time = qeth_get_micros();
 	}
 
-	large_send = skb_is_gso(skb);
+	/* Ignore segment size from skb_is_gso(), 1 page is always used. */
+	use_tso = skb_is_gso(skb) &&
+		  (qeth_get_ip_protocol(skb) == IPPROTO_TCP) && (ipv == 4);
 
-	if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) &&
-	    (skb_shinfo(skb)->nr_frags == 0)) {
+	if ((card->info.type == QETH_CARD_TYPE_IQD) &&
+	    !skb_is_nonlinear(skb)) {
 		new_skb = skb;
 		if (new_skb->protocol == ETH_P_AF_IUCV)
 			data_offset = 0;
@@ -2859,7 +2818,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
 		if (!hdr)
 			goto tx_drop;
-		elements_needed++;
+		hdr_elements++;
 	} else {
 		/* create a clone with writeable headroom */
 		new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso)
@@ -2894,22 +2853,28 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* fix hardware limitation: as long as we do not have sbal
 	 * chaining we can not send long frag lists
 	 */
-	if (large_send) {
-		if (qeth_l3_tso_elements(new_skb) + 1 > 16) {
-			if (skb_linearize(new_skb))
-				goto tx_drop;
-			if (card->options.performance_stats)
+	if ((card->info.type != QETH_CARD_TYPE_IQD) &&
+	    ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
+	     (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) {
+		int lin_rc = skb_linearize(new_skb);
+
+		if (card->options.performance_stats) {
+			if (lin_rc)
+				card->perf_stats.tx_linfail++;
+			else
 				card->perf_stats.tx_lin++;
 		}
+		if (lin_rc)
+			goto tx_drop;
 	}
 
-	if (large_send && (cast_type == RTN_UNSPEC)) {
+	if (use_tso) {
 		hdr = (struct qeth_hdr *)skb_push(new_skb,
 						sizeof(struct qeth_hdr_tso));
 		memset(hdr, 0, sizeof(struct qeth_hdr_tso));
 		qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type);
 		qeth_tso_fill_header(card, hdr, new_skb);
-		elements_needed++;
+		hdr_elements++;
 	} else {
 		if (data_offset < 0) {
 			hdr = (struct qeth_hdr *)skb_push(new_skb,
@@ -2930,31 +2895,31 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			qeth_l3_hdr_csum(card, hdr, new_skb);
 	}
 
-	elems = qeth_get_elements_no(card, new_skb, elements_needed);
-	if (!elems) {
+	elements = use_tso ?
+		   qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) :
+		   qeth_get_elements_no(card, new_skb, hdr_elements);
+	if (!elements) {
 		if (data_offset >= 0)
 			kmem_cache_free(qeth_core_header_cache, hdr);
 		goto tx_drop;
 	}
-	elements_needed += elems;
-	nr_frags = skb_shinfo(new_skb)->nr_frags;
+	elements += hdr_elements;
 
 	if (card->info.type != QETH_CARD_TYPE_IQD) {
 		int len;
-		if (large_send)
+		if (use_tso)
 			len = ((unsigned long)tcp_hdr(new_skb) +
-				tcp_hdr(new_skb)->doff * 4) -
+				tcp_hdrlen(new_skb)) -
 				(unsigned long)new_skb->data;
 		else
 			len = sizeof(struct qeth_hdr_layer3);
 
 		if (qeth_hdr_chk_and_bounce(new_skb, &hdr, len))
 			goto tx_drop;
-		rc = qeth_do_send_packet(card, queue, new_skb, hdr,
-					 elements_needed);
+		rc = qeth_do_send_packet(card, queue, new_skb, hdr, elements);
 	} else
 		rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr,
-					elements_needed, data_offset, 0);
+					elements, data_offset, 0);
 
 	if (!rc) {
 		card->stats.tx_packets++;
@@ -2962,7 +2927,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (new_skb != skb)
 			dev_kfree_skb_any(skb);
 		if (card->options.performance_stats) {
-			if (large_send) {
+			nr_frags = skb_shinfo(new_skb)->nr_frags;
+			if (use_tso) {
 				card->perf_stats.large_send_bytes += tx_bytes;
 				card->perf_stats.large_send_cnt++;
 			}
@@ -3048,36 +3014,6 @@ static int qeth_l3_stop(struct net_device *dev)
 	return 0;
 }
 
-static netdev_features_t qeth_l3_fix_features(struct net_device *dev,
-	netdev_features_t features)
-{
-	struct qeth_card *card = dev->ml_priv;
-
-	if (!qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM))
-		features &= ~NETIF_F_IP_CSUM;
-	if (!qeth_is_supported(card, IPA_OUTBOUND_TSO))
-		features &= ~NETIF_F_TSO;
-	if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
-		features &= ~NETIF_F_RXCSUM;
-	return features;
-}
-
-static int qeth_l3_set_features(struct net_device *dev,
-	netdev_features_t features)
-{
-	struct qeth_card *card = dev->ml_priv;
-	netdev_features_t changed = dev->features ^ features;
-
-	if (!(changed & NETIF_F_RXCSUM))
-		return 0;
-
-	if (card->state == CARD_STATE_DOWN ||
-	    card->state == CARD_STATE_RECOVER)
-		return 0;
-
-	return qeth_set_rx_csum(card, features & NETIF_F_RXCSUM ? 1 : 0);
-}
-
 static const struct ethtool_ops qeth_l3_ethtool_ops = {
 	.get_link = ethtool_op_get_link,
 	.get_strings = qeth_core_get_strings,
@@ -3120,8 +3056,8 @@ static const struct net_device_ops qeth_l3_netdev_ops = {
 	.ndo_set_rx_mode	= qeth_l3_set_multicast_list,
 	.ndo_do_ioctl		= qeth_l3_do_ioctl,
 	.ndo_change_mtu		= qeth_change_mtu,
-	.ndo_fix_features	= qeth_l3_fix_features,
-	.ndo_set_features	= qeth_l3_set_features,
+	.ndo_fix_features	= qeth_fix_features,
+	.ndo_set_features	= qeth_set_features,
 	.ndo_vlan_rx_add_vid	= qeth_l3_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid   = qeth_l3_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= qeth_tx_timeout,
@@ -3136,8 +3072,8 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
 	.ndo_set_rx_mode	= qeth_l3_set_multicast_list,
 	.ndo_do_ioctl		= qeth_l3_do_ioctl,
 	.ndo_change_mtu		= qeth_change_mtu,
-	.ndo_fix_features	= qeth_l3_fix_features,
-	.ndo_set_features	= qeth_l3_set_features,
+	.ndo_fix_features	= qeth_fix_features,
+	.ndo_set_features	= qeth_set_features,
 	.ndo_vlan_rx_add_vid	= qeth_l3_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid   = qeth_l3_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= qeth_tx_timeout,
@@ -3169,7 +3105,10 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 				card->dev->hw_features = NETIF_F_SG |
 					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
 					NETIF_F_TSO;
-				card->dev->features = NETIF_F_RXCSUM;
+				card->dev->vlan_features = NETIF_F_SG |
+					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
+					NETIF_F_TSO;
+				card->dev->features = NETIF_F_SG;
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -3195,7 +3134,9 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
 	netif_keep_dst(card->dev);
-	card->dev->gso_max_size = 15 * PAGE_SIZE;
+	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+				  PAGE_SIZE;
+	card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1);
 
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT);
@@ -3226,11 +3167,12 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 		qeth_l3_set_offline(cgdev);
 
 	if (card->dev) {
+		netif_napi_del(&card->napi);
 		unregister_netdev(card->dev);
 		card->dev = NULL;
 	}
 
-	qeth_l3_clear_ip_list(card, 0);
+	qeth_l3_clear_ip_htable(card, 0);
 	qeth_l3_clear_ipato_list(card);
 	return;
 }
@@ -3315,7 +3257,7 @@ contin:
 	card->state = CARD_STATE_SOFTSETUP;
 
 	qeth_set_allowed_threads(card, 0xffffffff, 0);
-	qeth_l3_set_ip_addr_list(card);
+	qeth_l3_recover_ip(card);
 	if (card->lan_online)
 		netif_carrier_on(card->dev);
 	else
@@ -3516,6 +3458,7 @@ EXPORT_SYMBOL_GPL(qeth_l3_discipline);
 static int qeth_l3_ip_event(struct notifier_block *this,
 			    unsigned long event, void *ptr)
 {
+
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 	struct net_device *dev = (struct net_device *)ifa->ifa_dev->dev;
 	struct qeth_ipaddr *addr;
@@ -3530,27 +3473,27 @@ static int qeth_l3_ip_event(struct notifier_block *this,
 	QETH_CARD_TEXT(card, 3, "ipevent");
 
 	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-	if (addr != NULL) {
+	if (addr) {
 		addr->u.a4.addr = ifa->ifa_address;
 		addr->u.a4.mask = ifa->ifa_mask;
 		addr->type = QETH_IP_TYPE_NORMAL;
 	} else
-		goto out;
+		return NOTIFY_DONE;
 
 	switch (event) {
 	case NETDEV_UP:
-		if (!qeth_l3_add_ip(card, addr))
-			kfree(addr);
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_add_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
 		break;
 	case NETDEV_DOWN:
-		if (!qeth_l3_delete_ip(card, addr))
-			kfree(addr);
-		break;
-	default:
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_delete_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
 		break;
 	}
-	qeth_l3_set_ip_addr_list(card);
-out:
+
+	kfree(addr);
 	return NOTIFY_DONE;
 }
 
@@ -3579,27 +3522,27 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
 		return NOTIFY_DONE;
 
 	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-	if (addr != NULL) {
+	if (addr) {
 		memcpy(&addr->u.a6.addr, &ifa->addr, sizeof(struct in6_addr));
 		addr->u.a6.pfxlen = ifa->prefix_len;
 		addr->type = QETH_IP_TYPE_NORMAL;
 	} else
-		goto out;
+		return NOTIFY_DONE;
 
 	switch (event) {
 	case NETDEV_UP:
-		if (!qeth_l3_add_ip(card, addr))
-			kfree(addr);
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_add_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
 		break;
 	case NETDEV_DOWN:
-		if (!qeth_l3_delete_ip(card, addr))
-			kfree(addr);
-		break;
-	default:
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_delete_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
 		break;
 	}
-	qeth_l3_set_ip_addr_list(card);
-out:
+
+	kfree(addr);
 	return NOTIFY_DONE;
 }
 
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index 386eb7b89b1e..65645b11fc19 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -8,6 +8,7 @@
 
 #include <linux/slab.h>
 #include <asm/ebcdic.h>
+#include <linux/hashtable.h>
 #include "qeth_l3.h"
 
 #define QETH_DEVICE_ATTR(_id, _name, _mode, _show, _store) \
@@ -285,19 +286,19 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 	if (card->options.hsuid[0]) {
 		/* delete old ip address */
 		addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-		if (addr != NULL) {
-			addr->u.a6.addr.s6_addr32[0] = 0xfe800000;
-			addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-			for (i = 8; i < 16; i++)
-				addr->u.a6.addr.s6_addr[i] =
-					card->options.hsuid[i - 8];
-			addr->u.a6.pfxlen = 0;
-			addr->type = QETH_IP_TYPE_NORMAL;
-		} else
+		if (!addr)
 			return -ENOMEM;
-		if (!qeth_l3_delete_ip(card, addr))
-			kfree(addr);
-		qeth_l3_set_ip_addr_list(card);
+
+		addr->u.a6.addr.s6_addr32[0] = 0xfe800000;
+		addr->u.a6.addr.s6_addr32[1] = 0x00000000;
+		for (i = 8; i < 16; i++)
+			addr->u.a6.addr.s6_addr[i] =
+				card->options.hsuid[i - 8];
+		addr->u.a6.pfxlen = 0;
+		addr->type = QETH_IP_TYPE_NORMAL;
+
+		qeth_l3_delete_ip(card, addr);
+		kfree(addr);
 	}
 
 	if (strlen(tmp) == 0) {
@@ -328,9 +329,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 		addr->type = QETH_IP_TYPE_NORMAL;
 	} else
 		return -ENOMEM;
-	if (!qeth_l3_add_ip(card, addr))
-		kfree(addr);
-	qeth_l3_set_ip_addr_list(card);
+	qeth_l3_add_ip(card, addr);
+	kfree(addr);
 
 	return count;
 }
@@ -367,8 +367,8 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
-	struct qeth_ipaddr *tmpipa, *t;
-	int rc = 0;
+	struct qeth_ipaddr *addr;
+	int i, rc = 0;
 
 	if (!card)
 		return -EINVAL;
@@ -384,21 +384,20 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
 		card->ipato.enabled = (card->ipato.enabled)? 0 : 1;
 	} else if (sysfs_streq(buf, "1")) {
 		card->ipato.enabled = 1;
-		list_for_each_entry_safe(tmpipa, t, card->ip_tbd_list, entry) {
-			if ((tmpipa->type == QETH_IP_TYPE_NORMAL) &&
-				qeth_l3_is_addr_covered_by_ipato(card, tmpipa))
-				tmpipa->set_flags |=
+		hash_for_each(card->ip_htable, i, addr, hnode) {
+				if ((addr->type == QETH_IP_TYPE_NORMAL) &&
+				qeth_l3_is_addr_covered_by_ipato(card, addr))
+					addr->set_flags |=
 					QETH_IPA_SETIP_TAKEOVER_FLAG;
-		}
-
+			}
 	} else if (sysfs_streq(buf, "0")) {
 		card->ipato.enabled = 0;
-		list_for_each_entry_safe(tmpipa, t, card->ip_tbd_list, entry) {
-			if (tmpipa->set_flags &
-				QETH_IPA_SETIP_TAKEOVER_FLAG)
-				tmpipa->set_flags &=
-					~QETH_IPA_SETIP_TAKEOVER_FLAG;
-		}
+		hash_for_each(card->ip_htable, i, addr, hnode) {
+			if (addr->set_flags &
+			QETH_IPA_SETIP_TAKEOVER_FLAG)
+				addr->set_flags &=
+				~QETH_IPA_SETIP_TAKEOVER_FLAG;
+			}
 	} else
 		rc = -EINVAL;
 out:
@@ -452,7 +451,6 @@ static ssize_t qeth_l3_dev_ipato_add_show(char *buf, struct qeth_card *card,
 			enum qeth_prot_versions proto)
 {
 	struct qeth_ipato_entry *ipatoe;
-	unsigned long flags;
 	char addr_str[40];
 	int entry_len; /* length of 1 entry string, differs between v4 and v6 */
 	int i = 0;
@@ -460,7 +458,7 @@ static ssize_t qeth_l3_dev_ipato_add_show(char *buf, struct qeth_card *card,
 	entry_len = (proto == QETH_PROT_IPV4)? 12 : 40;
 	/* add strlen for "/<mask>\n" */
 	entry_len += (proto == QETH_PROT_IPV4)? 5 : 6;
-	spin_lock_irqsave(&card->ip_lock, flags);
+	spin_lock_bh(&card->ip_lock);
 	list_for_each_entry(ipatoe, &card->ipato.entries, entry) {
 		if (ipatoe->proto != proto)
 			continue;
@@ -473,7 +471,7 @@ static ssize_t qeth_l3_dev_ipato_add_show(char *buf, struct qeth_card *card,
 		i += snprintf(buf + i, PAGE_SIZE - i,
 			      "%s/%i\n", addr_str, ipatoe->mask_bits);
 	}
-	spin_unlock_irqrestore(&card->ip_lock, flags);
+	spin_unlock_bh(&card->ip_lock);
 	i += snprintf(buf + i, PAGE_SIZE - i, "\n");
 
 	return i;
@@ -689,15 +687,15 @@ static ssize_t qeth_l3_dev_vipa_add_show(char *buf, struct qeth_card *card,
 			enum qeth_prot_versions proto)
 {
 	struct qeth_ipaddr *ipaddr;
+	struct hlist_node  *tmp;
 	char addr_str[40];
 	int entry_len; /* length of 1 entry string, differs between v4 and v6 */
-	unsigned long flags;
 	int i = 0;
 
 	entry_len = (proto == QETH_PROT_IPV4)? 12 : 40;
 	entry_len += 2; /* \n + terminator */
-	spin_lock_irqsave(&card->ip_lock, flags);
-	list_for_each_entry(ipaddr, &card->ip_list, entry) {
+	spin_lock_bh(&card->ip_lock);
+	hash_for_each_safe(card->ip_htable, i, tmp, ipaddr, hnode) {
 		if (ipaddr->proto != proto)
 			continue;
 		if (ipaddr->type != QETH_IP_TYPE_VIPA)
@@ -711,7 +709,7 @@ static ssize_t qeth_l3_dev_vipa_add_show(char *buf, struct qeth_card *card,
 			addr_str);
 		i += snprintf(buf + i, PAGE_SIZE - i, "%s\n", addr_str);
 	}
-	spin_unlock_irqrestore(&card->ip_lock, flags);
+	spin_unlock_bh(&card->ip_lock);
 	i += snprintf(buf + i, PAGE_SIZE - i, "\n");
 
 	return i;
@@ -851,15 +849,15 @@ static ssize_t qeth_l3_dev_rxip_add_show(char *buf, struct qeth_card *card,
 		       enum qeth_prot_versions proto)
 {
 	struct qeth_ipaddr *ipaddr;
+	struct hlist_node *tmp;
 	char addr_str[40];
 	int entry_len; /* length of 1 entry string, differs between v4 and v6 */
-	unsigned long flags;
 	int i = 0;
 
 	entry_len = (proto == QETH_PROT_IPV4)? 12 : 40;
 	entry_len += 2; /* \n + terminator */
-	spin_lock_irqsave(&card->ip_lock, flags);
-	list_for_each_entry(ipaddr, &card->ip_list, entry) {
+	spin_lock_bh(&card->ip_lock);
+	hash_for_each_safe(card->ip_htable, i, tmp, ipaddr, hnode) {
 		if (ipaddr->proto != proto)
 			continue;
 		if (ipaddr->type != QETH_IP_TYPE_RXIP)
@@ -873,7 +871,7 @@ static ssize_t qeth_l3_dev_rxip_add_show(char *buf, struct qeth_card *card,
 			addr_str);
 		i += snprintf(buf + i, PAGE_SIZE - i, "%s\n", addr_str);
 	}
-	spin_unlock_irqrestore(&card->ip_lock, flags);
+	spin_unlock_bh(&card->ip_lock);
 	i += snprintf(buf + i, PAGE_SIZE - i, "\n");
 
 	return i;
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index d4c285688ce9..3ddc85e6efd6 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1122,7 +1122,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
 		} else {
 			struct scsi_cmnd *SCp;
 
-			SCp = scsi_host_find_tag(SDp->host, SCSI_NO_TAG);
+			SCp = SDp->current_cmnd;
 			if(unlikely(SCp == NULL)) {
 				sdev_printk(KERN_ERR, SDp,
 					"no saved request for untagged cmd\n");
@@ -1826,7 +1826,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 		       slot->tag, slot);
 	} else {
 		slot->tag = SCSI_NO_TAG;
-		/* must populate current_cmnd for scsi_host_find_tag to work */
+		/* save current command for reselection */
 		SCp->device->current_cmnd = SCp;
 	}
 	/* sanity check: some of the commands generated by the mid-layer
diff --git a/drivers/scsi/cxgbi/Makefile b/drivers/scsi/cxgbi/Makefile
index 86007e344955..a73781ac1800 100644
--- a/drivers/scsi/cxgbi/Makefile
+++ b/drivers/scsi/cxgbi/Makefile
@@ -1,2 +1,4 @@
+ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
+
 obj-$(CONFIG_SCSI_CXGB3_ISCSI)	+= libcxgbi.o cxgb3i/
 obj-$(CONFIG_SCSI_CXGB4_ISCSI)	+= libcxgbi.o cxgb4i/
diff --git a/drivers/scsi/cxgbi/cxgb3i/Kbuild b/drivers/scsi/cxgbi/cxgb3i/Kbuild
index 961a12f6d318..663c52e05d81 100644
--- a/drivers/scsi/cxgbi/cxgb3i/Kbuild
+++ b/drivers/scsi/cxgbi/cxgb3i/Kbuild
@@ -1,3 +1,4 @@
 ccflags-y += -I$(srctree)/drivers/net/ethernet/chelsio/cxgb3
+ccflags-y += -I$(srctree)/drivers/net/ethernet/chelsio/libcxgb
 
 obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i.o
diff --git a/drivers/scsi/cxgbi/cxgb3i/Kconfig b/drivers/scsi/cxgbi/cxgb3i/Kconfig
index e4603985dce3..f68c871b16ca 100644
--- a/drivers/scsi/cxgbi/cxgb3i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb3i/Kconfig
@@ -5,6 +5,7 @@ config SCSI_CXGB3_ISCSI
 	select ETHERNET
 	select NET_VENDOR_CHELSIO
 	select CHELSIO_T3
+	select CHELSIO_LIB
 	select SCSI_ISCSI_ATTRS
 	---help---
 	  This driver supports iSCSI offload for the Chelsio T3 devices.
diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
index e22a268fd311..33e83464e091 100644
--- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
+++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
@@ -1028,7 +1028,7 @@ cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS] = {
  * cxgb3i_ofld_init - allocate and initialize resources for each adapter found
  * @cdev:	cxgbi adapter
  */
-int cxgb3i_ofld_init(struct cxgbi_device *cdev)
+static int cxgb3i_ofld_init(struct cxgbi_device *cdev)
 {
 	struct t3cdev *t3dev = (struct t3cdev *)cdev->lldev;
 	struct adap_ports port;
@@ -1076,64 +1076,69 @@ static inline void ulp_mem_io_set_hdr(struct sk_buff *skb, unsigned int addr)
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
 	req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(addr >> 5) |
 				   V_ULPTX_CMD(ULP_MEM_WRITE));
-	req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE >> 5) |
-			 V_ULPTX_NFLITS((PPOD_SIZE >> 3) + 1));
+	req->len = htonl(V_ULP_MEMIO_DATA_LEN(IPPOD_SIZE >> 5) |
+			 V_ULPTX_NFLITS((IPPOD_SIZE >> 3) + 1));
 }
 
-static int ddp_set_map(struct cxgbi_sock *csk, struct cxgbi_pagepod_hdr *hdr,
-			unsigned int idx, unsigned int npods,
-				struct cxgbi_gather_list *gl)
+static struct cxgbi_ppm *cdev2ppm(struct cxgbi_device *cdev)
 {
-	struct cxgbi_device *cdev = csk->cdev;
-	struct cxgbi_ddp_info *ddp = cdev->ddp;
-	unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ddp->llimit;
-	int i;
+	return ((struct t3cdev *)cdev->lldev)->ulp_iscsi;
+}
 
-	log_debug(1 << CXGBI_DBG_DDP,
-		"csk 0x%p, idx %u, npods %u, gl 0x%p.\n",
-		csk, idx, npods, gl);
+static int ddp_set_map(struct cxgbi_ppm *ppm, struct cxgbi_sock *csk,
+		       struct cxgbi_task_tag_info *ttinfo)
+{
+	unsigned int idx = ttinfo->idx;
+	unsigned int npods = ttinfo->npods;
+	struct scatterlist *sg = ttinfo->sgl;
+	struct cxgbi_pagepod *ppod;
+	struct ulp_mem_io *req;
+	unsigned int sg_off;
+	unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ppm->llimit;
+	int i;
 
-	for (i = 0; i < npods; i++, idx++, pm_addr += PPOD_SIZE) {
+	for (i = 0; i < npods; i++, idx++, pm_addr += IPPOD_SIZE) {
 		struct sk_buff *skb = alloc_wr(sizeof(struct ulp_mem_io) +
-						PPOD_SIZE, 0, GFP_ATOMIC);
+					       IPPOD_SIZE, 0, GFP_ATOMIC);
 
 		if (!skb)
 			return -ENOMEM;
-
 		ulp_mem_io_set_hdr(skb, pm_addr);
-		cxgbi_ddp_ppod_set((struct cxgbi_pagepod *)(skb->head +
-					sizeof(struct ulp_mem_io)),
-				   hdr, gl, i * PPOD_PAGES_MAX);
+		req = (struct ulp_mem_io *)skb->head;
+		ppod = (struct cxgbi_pagepod *)(req + 1);
+		sg_off = i * PPOD_PAGES_MAX;
+		cxgbi_ddp_set_one_ppod(ppod, ttinfo, &sg,
+				       &sg_off);
 		skb->priority = CPL_PRIORITY_CONTROL;
-		cxgb3_ofld_send(cdev->lldev, skb);
+		cxgb3_ofld_send(ppm->lldev, skb);
 	}
 	return 0;
 }
 
-static void ddp_clear_map(struct cxgbi_hba *chba, unsigned int tag,
-			  unsigned int idx, unsigned int npods)
+static void ddp_clear_map(struct cxgbi_device *cdev, struct cxgbi_ppm *ppm,
+			  struct cxgbi_task_tag_info *ttinfo)
 {
-	struct cxgbi_device *cdev = chba->cdev;
-	struct cxgbi_ddp_info *ddp = cdev->ddp;
-	unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ddp->llimit;
+	unsigned int idx = ttinfo->idx;
+	unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ppm->llimit;
+	unsigned int npods = ttinfo->npods;
 	int i;
 
 	log_debug(1 << CXGBI_DBG_DDP,
-		"cdev 0x%p, idx %u, npods %u, tag 0x%x.\n",
-		cdev, idx, npods, tag);
+		  "cdev 0x%p, clear idx %u, npods %u.\n",
+		  cdev, idx, npods);
 
-	for (i = 0; i < npods; i++, idx++, pm_addr += PPOD_SIZE) {
+	for (i = 0; i < npods; i++, idx++, pm_addr += IPPOD_SIZE) {
 		struct sk_buff *skb = alloc_wr(sizeof(struct ulp_mem_io) +
-						PPOD_SIZE, 0, GFP_ATOMIC);
+					       IPPOD_SIZE, 0, GFP_ATOMIC);
 
 		if (!skb) {
-			pr_err("tag 0x%x, 0x%x, %d/%u, skb OOM.\n",
-				tag, idx, i, npods);
+			pr_err("cdev 0x%p, clear ddp, %u,%d/%u, skb OOM.\n",
+			       cdev, idx, i, npods);
 			continue;
 		}
 		ulp_mem_io_set_hdr(skb, pm_addr);
 		skb->priority = CPL_PRIORITY_CONTROL;
-		cxgb3_ofld_send(cdev->lldev, skb);
+		cxgb3_ofld_send(ppm->lldev, skb);
 	}
 }
 
@@ -1203,82 +1208,68 @@ static int ddp_setup_conn_digest(struct cxgbi_sock *csk, unsigned int tid,
 }
 
 /**
- * t3_ddp_cleanup - release the cxgb3 adapter's ddp resource
- * @cdev: cxgb3i adapter
- * release all the resource held by the ddp pagepod manager for a given
- * adapter if needed
- */
-
-static void t3_ddp_cleanup(struct cxgbi_device *cdev)
-{
-	struct t3cdev *tdev = (struct t3cdev *)cdev->lldev;
-
-	if (cxgbi_ddp_cleanup(cdev)) {
-		pr_info("t3dev 0x%p, ulp_iscsi no more user.\n", tdev);
-		tdev->ulp_iscsi = NULL;
-	}
-}
-
-/**
- * ddp_init - initialize the cxgb3 adapter's ddp resource
+ * cxgb3i_ddp_init - initialize the cxgb3 adapter's ddp resource
  * @cdev: cxgb3i adapter
  * initialize the ddp pagepod manager for a given adapter
  */
 static int cxgb3i_ddp_init(struct cxgbi_device *cdev)
 {
 	struct t3cdev *tdev = (struct t3cdev *)cdev->lldev;
-	struct cxgbi_ddp_info *ddp = tdev->ulp_iscsi;
+	struct net_device *ndev = cdev->ports[0];
+	struct cxgbi_tag_format tformat;
+	unsigned int ppmax, tagmask = 0;
 	struct ulp_iscsi_info uinfo;
-	unsigned int pgsz_factor[4];
 	int i, err;
 
-	if (ddp) {
-		kref_get(&ddp->refcnt);
-		pr_warn("t3dev 0x%p, ddp 0x%p already set up.\n",
-			tdev, tdev->ulp_iscsi);
-		cdev->ddp = ddp;
-		return -EALREADY;
-	}
-
 	err = tdev->ctl(tdev, ULP_ISCSI_GET_PARAMS, &uinfo);
 	if (err < 0) {
-		pr_err("%s, failed to get iscsi param err=%d.\n",
-			 tdev->name, err);
+		pr_err("%s, failed to get iscsi param %d.\n",
+		       ndev->name, err);
 		return err;
 	}
+	if (uinfo.llimit >= uinfo.ulimit) {
+		pr_warn("T3 %s, iscsi NOT enabled %u ~ %u!\n",
+			ndev->name, uinfo.llimit, uinfo.ulimit);
+		return -EACCES;
+	}
 
-	err = cxgbi_ddp_init(cdev, uinfo.llimit, uinfo.ulimit,
-			uinfo.max_txsz, uinfo.max_rxsz);
-	if (err < 0)
-		return err;
+	ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
+	tagmask = cxgbi_tagmask_set(ppmax);
 
-	ddp = cdev->ddp;
+	pr_info("T3 %s: 0x%x~0x%x, 0x%x, tagmask 0x%x -> 0x%x.\n",
+		ndev->name, uinfo.llimit, uinfo.ulimit, ppmax, uinfo.tagmask,
+		tagmask);
 
-	uinfo.tagmask = ddp->idx_mask << PPOD_IDX_SHIFT;
-	cxgbi_ddp_page_size_factor(pgsz_factor);
+	memset(&tformat, 0, sizeof(struct cxgbi_tag_format));
 	for (i = 0; i < 4; i++)
-		uinfo.pgsz_factor[i] = pgsz_factor[i];
-	uinfo.ulimit = uinfo.llimit + (ddp->nppods << PPOD_SIZE_SHIFT);
+		tformat.pgsz_order[i] = uinfo.pgsz_factor[i];
+	cxgbi_tagmask_check(tagmask, &tformat);
 
-	err = tdev->ctl(tdev, ULP_ISCSI_SET_PARAMS, &uinfo);
-	if (err < 0) {
-		pr_warn("%s unable to set iscsi param err=%d, ddp disabled.\n",
-			tdev->name, err);
-		cxgbi_ddp_cleanup(cdev);
-		return err;
+	cxgbi_ddp_ppm_setup(&tdev->ulp_iscsi, cdev, &tformat, ppmax,
+			    uinfo.llimit, uinfo.llimit, 0);
+	if (!(cdev->flags & CXGBI_FLAG_DDP_OFF)) {
+		uinfo.tagmask = tagmask;
+		uinfo.ulimit = uinfo.llimit + (ppmax << PPOD_SIZE_SHIFT);
+
+		err = tdev->ctl(tdev, ULP_ISCSI_SET_PARAMS, &uinfo);
+		if (err < 0) {
+			pr_err("T3 %s fail to set iscsi param %d.\n",
+			       ndev->name, err);
+			cdev->flags |= CXGBI_FLAG_DDP_OFF;
+		}
+		err = 0;
 	}
-	tdev->ulp_iscsi = ddp;
 
 	cdev->csk_ddp_setup_digest = ddp_setup_conn_digest;
 	cdev->csk_ddp_setup_pgidx = ddp_setup_conn_pgidx;
-	cdev->csk_ddp_set = ddp_set_map;
-	cdev->csk_ddp_clear = ddp_clear_map;
-
-	pr_info("tdev 0x%p, nppods %u, bits %u, mask 0x%x,0x%x pkt %u/%u, "
-		"%u/%u.\n",
-		tdev, ddp->nppods, ddp->idx_bits, ddp->idx_mask,
-		ddp->rsvd_tag_mask, ddp->max_txsz, uinfo.max_txsz,
-		ddp->max_rxsz, uinfo.max_rxsz);
+	cdev->csk_ddp_set_map = ddp_set_map;
+	cdev->csk_ddp_clear_map = ddp_clear_map;
+	cdev->cdev2ppm = cdev2ppm;
+	cdev->tx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
+				  uinfo.max_txsz - ISCSI_PDU_NONPAYLOAD_LEN);
+	cdev->rx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
+				  uinfo.max_rxsz - ISCSI_PDU_NONPAYLOAD_LEN);
+
 	return 0;
 }
 
@@ -1325,7 +1316,6 @@ static void cxgb3i_dev_open(struct t3cdev *t3dev)
 	cdev->rx_credit_thres = cxgb3i_rx_credit_thres;
 	cdev->skb_tx_rsvd = CXGB3I_TX_HEADER_LEN;
 	cdev->skb_rx_extra = sizeof(struct cpl_iscsi_hdr_norss);
-	cdev->dev_ddp_cleanup = t3_ddp_cleanup;
 	cdev->itp = &cxgb3i_iscsi_transport;
 
 	err = cxgb3i_ddp_init(cdev);
diff --git a/drivers/scsi/cxgbi/cxgb4i/Kbuild b/drivers/scsi/cxgbi/cxgb4i/Kbuild
index 37458643749b..38e03c280417 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kbuild
+++ b/drivers/scsi/cxgbi/cxgb4i/Kbuild
@@ -1,3 +1,4 @@
 ccflags-y += -I$(srctree)/drivers/net/ethernet/chelsio/cxgb4
+ccflags-y += -I$(srctree)/drivers/net/ethernet/chelsio/libcxgb
 
 obj-$(CONFIG_SCSI_CXGB4_ISCSI) += cxgb4i.o
diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig
index 8c4e423037b6..594f593c8821 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -5,6 +5,7 @@ config SCSI_CXGB4_ISCSI
 	select ETHERNET
 	select NET_VENDOR_CHELSIO
 	select CHELSIO_T4
+	select CHELSIO_LIB
 	select SCSI_ISCSI_ATTRS
 	---help---
 	  This driver supports iSCSI offload for the Chelsio T4 devices.
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 339f6b7f4803..e4ba2d2616cd 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -1503,7 +1503,7 @@ rel_resource_without_clip:
 	return -EINVAL;
 }
 
-cxgb4i_cplhandler_func cxgb4i_cplhandlers[NUM_CPL_CMDS] = {
+static cxgb4i_cplhandler_func cxgb4i_cplhandlers[NUM_CPL_CMDS] = {
 	[CPL_ACT_ESTABLISH] = do_act_establish,
 	[CPL_ACT_OPEN_RPL] = do_act_open_rpl,
 	[CPL_PEER_CLOSE] = do_peer_close,
@@ -1519,7 +1519,7 @@ cxgb4i_cplhandler_func cxgb4i_cplhandlers[NUM_CPL_CMDS] = {
 	[CPL_RX_DATA] = do_rx_data,
 };
 
-int cxgb4i_ofld_init(struct cxgbi_device *cdev)
+static int cxgb4i_ofld_init(struct cxgbi_device *cdev)
 {
 	int rc;
 
@@ -1543,24 +1543,22 @@ int cxgb4i_ofld_init(struct cxgbi_device *cdev)
 	return 0;
 }
 
-/*
- * functions to program the pagepod in h/w
- */
-#define ULPMEM_IDATA_MAX_NPPODS	4 /* 256/PPOD_SIZE */
-static inline void ulp_mem_io_set_hdr(struct cxgb4_lld_info *lldi,
-				struct ulp_mem_io *req,
-				unsigned int wr_len, unsigned int dlen,
-				unsigned int pm_addr)
+static inline void
+ulp_mem_io_set_hdr(struct cxgbi_device *cdev,
+		   struct ulp_mem_io *req,
+		   unsigned int wr_len, unsigned int dlen,
+		   unsigned int pm_addr,
+		   int tid)
 {
+	struct cxgb4_lld_info *lldi = cxgbi_cdev_priv(cdev);
 	struct ulptx_idata *idata = (struct ulptx_idata *)(req + 1);
 
-	INIT_ULPTX_WR(req, wr_len, 0, 0);
-	if (is_t4(lldi->adapter_type))
-		req->cmd = htonl(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
-					(ULP_MEMIO_ORDER_F));
-	else
-		req->cmd = htonl(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
-					(T5_ULP_MEMIO_IMM_F));
+	INIT_ULPTX_WR(req, wr_len, 0, tid);
+	req->wr.wr_hi = htonl(FW_WR_OP_V(FW_ULPTX_WR) |
+		FW_WR_ATOMIC_V(0));
+	req->cmd = htonl(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
+		ULP_MEMIO_ORDER_V(is_t4(lldi->adapter_type)) |
+		T5_ULP_MEMIO_IMM_V(!is_t4(lldi->adapter_type)));
 	req->dlen = htonl(ULP_MEMIO_DATA_LEN_V(dlen >> 5));
 	req->lock_addr = htonl(ULP_MEMIO_ADDR_V(pm_addr >> 5));
 	req->len16 = htonl(DIV_ROUND_UP(wr_len - sizeof(req->wr), 16));
@@ -1569,82 +1567,89 @@ static inline void ulp_mem_io_set_hdr(struct cxgb4_lld_info *lldi,
 	idata->len = htonl(dlen);
 }
 
-static int ddp_ppod_write_idata(struct cxgbi_device *cdev, unsigned int port_id,
-				struct cxgbi_pagepod_hdr *hdr, unsigned int idx,
-				unsigned int npods,
-				struct cxgbi_gather_list *gl,
-				unsigned int gl_pidx)
+static struct sk_buff *
+ddp_ppod_init_idata(struct cxgbi_device *cdev,
+		    struct cxgbi_ppm *ppm,
+		    unsigned int idx, unsigned int npods,
+		    unsigned int tid)
 {
-	struct cxgbi_ddp_info *ddp = cdev->ddp;
-	struct cxgb4_lld_info *lldi = cxgbi_cdev_priv(cdev);
-	struct sk_buff *skb;
-	struct ulp_mem_io *req;
-	struct ulptx_idata *idata;
-	struct cxgbi_pagepod *ppod;
-	unsigned int pm_addr = idx * PPOD_SIZE + ddp->llimit;
-	unsigned int dlen = PPOD_SIZE * npods;
+	unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ppm->llimit;
+	unsigned int dlen = npods << PPOD_SIZE_SHIFT;
 	unsigned int wr_len = roundup(sizeof(struct ulp_mem_io) +
 				sizeof(struct ulptx_idata) + dlen, 16);
-	unsigned int i;
+	struct sk_buff *skb = alloc_wr(wr_len, 0, GFP_ATOMIC);
 
-	skb = alloc_wr(wr_len, 0, GFP_ATOMIC);
 	if (!skb) {
-		pr_err("cdev 0x%p, idx %u, npods %u, OOM.\n",
-			cdev, idx, npods);
-		return -ENOMEM;
+		pr_err("%s: %s idx %u, npods %u, OOM.\n",
+		       __func__, ppm->ndev->name, idx, npods);
+		return NULL;
 	}
-	req = (struct ulp_mem_io *)skb->head;
-	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	ulp_mem_io_set_hdr(lldi, req, wr_len, dlen, pm_addr);
+	ulp_mem_io_set_hdr(cdev, (struct ulp_mem_io *)skb->head, wr_len, dlen,
+			   pm_addr, tid);
+
+	return skb;
+}
+
+static int ddp_ppod_write_idata(struct cxgbi_ppm *ppm, struct cxgbi_sock *csk,
+				struct cxgbi_task_tag_info *ttinfo,
+				unsigned int idx, unsigned int npods,
+				struct scatterlist **sg_pp,
+				unsigned int *sg_off)
+{
+	struct cxgbi_device *cdev = csk->cdev;
+	struct sk_buff *skb = ddp_ppod_init_idata(cdev, ppm, idx, npods,
+						  csk->tid);
+	struct ulp_mem_io *req;
+	struct ulptx_idata *idata;
+	struct cxgbi_pagepod *ppod;
+	int i;
+
+	if (!skb)
+		return -ENOMEM;
+
+	req = (struct ulp_mem_io *)skb->head;
 	idata = (struct ulptx_idata *)(req + 1);
 	ppod = (struct cxgbi_pagepod *)(idata + 1);
 
-	for (i = 0; i < npods; i++, ppod++, gl_pidx += PPOD_PAGES_MAX) {
-		if (!hdr && !gl)
-			cxgbi_ddp_ppod_clear(ppod);
-		else
-			cxgbi_ddp_ppod_set(ppod, hdr, gl, gl_pidx);
-	}
+	for (i = 0; i < npods; i++, ppod++)
+		cxgbi_ddp_set_one_ppod(ppod, ttinfo, sg_pp, sg_off);
+
+	cxgbi_skcb_set_flag(skb, SKCBF_TX_MEM_WRITE);
+	cxgbi_skcb_set_flag(skb, SKCBF_TX_FLAG_COMPL);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->port_id);
+
+	spin_lock_bh(&csk->lock);
+	cxgbi_sock_skb_entail(csk, skb);
+	spin_unlock_bh(&csk->lock);
 
-	cxgb4_ofld_send(cdev->ports[port_id], skb);
 	return 0;
 }
 
-static int ddp_set_map(struct cxgbi_sock *csk, struct cxgbi_pagepod_hdr *hdr,
-			unsigned int idx, unsigned int npods,
-			struct cxgbi_gather_list *gl)
+static int ddp_set_map(struct cxgbi_ppm *ppm, struct cxgbi_sock *csk,
+		       struct cxgbi_task_tag_info *ttinfo)
 {
+	unsigned int pidx = ttinfo->idx;
+	unsigned int npods = ttinfo->npods;
 	unsigned int i, cnt;
 	int err = 0;
+	struct scatterlist *sg = ttinfo->sgl;
+	unsigned int offset = 0;
 
-	for (i = 0; i < npods; i += cnt, idx += cnt) {
-		cnt = npods - i;
-		if (cnt > ULPMEM_IDATA_MAX_NPPODS)
-			cnt = ULPMEM_IDATA_MAX_NPPODS;
-		err = ddp_ppod_write_idata(csk->cdev, csk->port_id, hdr,
-					idx, cnt, gl, 4 * i);
-		if (err < 0)
-			break;
-	}
-	return err;
-}
-
-static void ddp_clear_map(struct cxgbi_hba *chba, unsigned int tag,
-			  unsigned int idx, unsigned int npods)
-{
-	unsigned int i, cnt;
-	int err;
+	ttinfo->cid = csk->port_id;
 
-	for (i = 0; i < npods; i += cnt, idx += cnt) {
+	for (i = 0; i < npods; i += cnt, pidx += cnt) {
 		cnt = npods - i;
+
 		if (cnt > ULPMEM_IDATA_MAX_NPPODS)
 			cnt = ULPMEM_IDATA_MAX_NPPODS;
-		err = ddp_ppod_write_idata(chba->cdev, chba->port_id, NULL,
-					idx, cnt, NULL, 0);
+		err = ddp_ppod_write_idata(ppm, csk, ttinfo, pidx, cnt,
+					   &sg, &offset);
 		if (err < 0)
 			break;
 	}
+
+	return err;
 }
 
 static int ddp_setup_conn_pgidx(struct cxgbi_sock *csk, unsigned int tid,
@@ -1710,48 +1715,46 @@ static int ddp_setup_conn_digest(struct cxgbi_sock *csk, unsigned int tid,
 	return 0;
 }
 
+static struct cxgbi_ppm *cdev2ppm(struct cxgbi_device *cdev)
+{
+	return (struct cxgbi_ppm *)(*((struct cxgb4_lld_info *)
+				       (cxgbi_cdev_priv(cdev)))->iscsi_ppm);
+}
+
 static int cxgb4i_ddp_init(struct cxgbi_device *cdev)
 {
 	struct cxgb4_lld_info *lldi = cxgbi_cdev_priv(cdev);
-	struct cxgbi_ddp_info *ddp = cdev->ddp;
-	unsigned int tagmask, pgsz_factor[4];
-	int err;
-
-	if (ddp) {
-		kref_get(&ddp->refcnt);
-		pr_warn("cdev 0x%p, ddp 0x%p already set up.\n",
-			cdev, cdev->ddp);
-		return -EALREADY;
+	struct net_device *ndev = cdev->ports[0];
+	struct cxgbi_tag_format tformat;
+	unsigned int ppmax;
+	int i;
+
+	if (!lldi->vr->iscsi.size) {
+		pr_warn("%s, iscsi NOT enabled, check config!\n", ndev->name);
+		return -EACCES;
 	}
 
-	err = cxgbi_ddp_init(cdev, lldi->vr->iscsi.start,
-			lldi->vr->iscsi.start + lldi->vr->iscsi.size - 1,
-			lldi->iscsi_iolen, lldi->iscsi_iolen);
-	if (err < 0)
-		return err;
+	cdev->flags |= CXGBI_FLAG_USE_PPOD_OFLDQ;
+	ppmax = lldi->vr->iscsi.size >> PPOD_SIZE_SHIFT;
 
-	ddp = cdev->ddp;
+	memset(&tformat, 0, sizeof(struct cxgbi_tag_format));
+	for (i = 0; i < 4; i++)
+		tformat.pgsz_order[i] = (lldi->iscsi_pgsz_order >> (i << 3))
+					 & 0xF;
+	cxgbi_tagmask_check(lldi->iscsi_tagmask, &tformat);
 
-	tagmask = ddp->idx_mask << PPOD_IDX_SHIFT;
-	cxgbi_ddp_page_size_factor(pgsz_factor);
-	cxgb4_iscsi_init(lldi->ports[0], tagmask, pgsz_factor);
+	cxgbi_ddp_ppm_setup(lldi->iscsi_ppm, cdev, &tformat, ppmax,
+			    lldi->iscsi_llimit, lldi->vr->iscsi.start, 2);
 
 	cdev->csk_ddp_setup_digest = ddp_setup_conn_digest;
 	cdev->csk_ddp_setup_pgidx = ddp_setup_conn_pgidx;
-	cdev->csk_ddp_set = ddp_set_map;
-	cdev->csk_ddp_clear = ddp_clear_map;
-
-	pr_info("cxgb4i 0x%p tag: sw %u, rsvd %u,%u, mask 0x%x.\n",
-		cdev, cdev->tag_format.sw_bits, cdev->tag_format.rsvd_bits,
-		cdev->tag_format.rsvd_shift, cdev->tag_format.rsvd_mask);
-	pr_info("cxgb4i 0x%p, nppods %u, bits %u, mask 0x%x,0x%x pkt %u/%u, "
-		" %u/%u.\n",
-		cdev, ddp->nppods, ddp->idx_bits, ddp->idx_mask,
-		ddp->rsvd_tag_mask, ddp->max_txsz, lldi->iscsi_iolen,
-		ddp->max_rxsz, lldi->iscsi_iolen);
-	pr_info("cxgb4i 0x%p max payload size: %u/%u, %u/%u.\n",
-		cdev, cdev->tx_max_size, ddp->max_txsz, cdev->rx_max_size,
-		ddp->max_rxsz);
+	cdev->csk_ddp_set_map = ddp_set_map;
+	cdev->tx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
+				  lldi->iscsi_iolen - ISCSI_PDU_NONPAYLOAD_LEN);
+	cdev->rx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
+				  lldi->iscsi_iolen - ISCSI_PDU_NONPAYLOAD_LEN);
+	cdev->cdev2ppm = cdev2ppm;
+
 	return 0;
 }
 
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index ead83a24bcd1..d1421139e6ea 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -64,6 +64,14 @@ static DEFINE_MUTEX(cdev_mutex);
 static LIST_HEAD(cdev_rcu_list);
 static DEFINE_SPINLOCK(cdev_rcu_lock);
 
+static inline void cxgbi_decode_sw_tag(u32 sw_tag, int *idx, int *age)
+{
+	if (age)
+		*age = sw_tag & 0x7FFF;
+	if (idx)
+		*idx = (sw_tag >> 16) & 0x7FFF;
+}
+
 int cxgbi_device_portmap_create(struct cxgbi_device *cdev, unsigned int base,
 				unsigned int max_conn)
 {
@@ -113,12 +121,7 @@ static inline void cxgbi_device_destroy(struct cxgbi_device *cdev)
 		"cdev 0x%p, p# %u.\n", cdev, cdev->nports);
 	cxgbi_hbas_remove(cdev);
 	cxgbi_device_portmap_cleanup(cdev);
-	if (cdev->dev_ddp_cleanup)
-		cdev->dev_ddp_cleanup(cdev);
-	else
-		cxgbi_ddp_cleanup(cdev);
-	if (cdev->ddp)
-		cxgbi_ddp_cleanup(cdev);
+	cxgbi_ppm_release(cdev->cdev2ppm(cdev));
 	if (cdev->pmap.max_connect)
 		cxgbi_free_big_mem(cdev->pmap.port_csk);
 	kfree(cdev);
@@ -1182,525 +1185,245 @@ out_err:
 	goto done;
 }
 
-/*
- * Direct Data Placement -
- * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted
- * final destination host-memory buffers based on the Initiator Task Tag (ITT)
- * in Data-In or Target Task Tag (TTT) in Data-Out PDUs.
- * The host memory address is programmed into h/w in the format of pagepod
- * entries.
- * The location of the pagepod entry is encoded into ddp tag which is used as
- * the base for ITT/TTT.
- */
-
-static unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4};
-static unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16};
-static unsigned char page_idx = DDP_PGIDX_MAX;
-
-static unsigned char sw_tag_idx_bits;
-static unsigned char sw_tag_age_bits;
-
-/*
- * Direct-Data Placement page size adjustment
- */
-static int ddp_adjust_page_table(void)
+static inline void
+scmd_get_params(struct scsi_cmnd *sc, struct scatterlist **sgl,
+		unsigned int *sgcnt, unsigned int *dlen,
+		unsigned int prot)
 {
-	int i;
-	unsigned int base_order, order;
-
-	if (PAGE_SIZE < (1UL << ddp_page_shift[0])) {
-		pr_info("PAGE_SIZE 0x%lx too small, min 0x%lx\n",
-			PAGE_SIZE, 1UL << ddp_page_shift[0]);
-		return -EINVAL;
-	}
-
-	base_order = get_order(1UL << ddp_page_shift[0]);
-	order = get_order(1UL << PAGE_SHIFT);
+	struct scsi_data_buffer *sdb = prot ? scsi_prot(sc) : scsi_out(sc);
 
-	for (i = 0; i < DDP_PGIDX_MAX; i++) {
-		/* first is the kernel page size, then just doubling */
-		ddp_page_order[i] = order - base_order + i;
-		ddp_page_shift[i] = PAGE_SHIFT + i;
-	}
-	return 0;
+	*sgl = sdb->table.sgl;
+	*sgcnt = sdb->table.nents;
+	*dlen = sdb->length;
+	/* Caution: for protection sdb, sdb->length is invalid */
 }
 
-static int ddp_find_page_index(unsigned long pgsz)
+void cxgbi_ddp_set_one_ppod(struct cxgbi_pagepod *ppod,
+			    struct cxgbi_task_tag_info *ttinfo,
+			    struct scatterlist **sg_pp, unsigned int *sg_off)
 {
+	struct scatterlist *sg = sg_pp ? *sg_pp : NULL;
+	unsigned int offset = sg_off ? *sg_off : 0;
+	dma_addr_t addr = 0UL;
+	unsigned int len = 0;
 	int i;
 
-	for (i = 0; i < DDP_PGIDX_MAX; i++) {
-		if (pgsz == (1UL << ddp_page_shift[i]))
-			return i;
-	}
-	pr_info("ddp page size %lu not supported.\n", pgsz);
-	return DDP_PGIDX_MAX;
-}
+	memcpy(ppod, &ttinfo->hdr, sizeof(struct cxgbi_pagepod_hdr));
 
-static void ddp_setup_host_page_size(void)
-{
-	if (page_idx == DDP_PGIDX_MAX) {
-		page_idx = ddp_find_page_index(PAGE_SIZE);
+	if (sg) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+	}
 
-		if (page_idx == DDP_PGIDX_MAX) {
-			pr_info("system PAGE %lu, update hw.\n", PAGE_SIZE);
-			if (ddp_adjust_page_table() < 0) {
-				pr_info("PAGE %lu, disable ddp.\n", PAGE_SIZE);
-				return;
+	for (i = 0; i < PPOD_PAGES_MAX; i++) {
+		if (sg) {
+			ppod->addr[i] = cpu_to_be64(addr + offset);
+			offset += PAGE_SIZE;
+			if (offset == (len + sg->offset)) {
+				offset = 0;
+				sg = sg_next(sg);
+				if (sg) {
+					addr = sg_dma_address(sg);
+					len = sg_dma_len(sg);
+				}
 			}
-			page_idx = ddp_find_page_index(PAGE_SIZE);
+		} else {
+			ppod->addr[i] = 0ULL;
 		}
-		pr_info("system PAGE %lu, ddp idx %u.\n", PAGE_SIZE, page_idx);
 	}
-}
-
-void cxgbi_ddp_page_size_factor(int *pgsz_factor)
-{
-	int i;
-
-	for (i = 0; i < DDP_PGIDX_MAX; i++)
-		pgsz_factor[i] = ddp_page_order[i];
-}
-EXPORT_SYMBOL_GPL(cxgbi_ddp_page_size_factor);
-
-/*
- * DDP setup & teardown
- */
-
-void cxgbi_ddp_ppod_set(struct cxgbi_pagepod *ppod,
-			struct cxgbi_pagepod_hdr *hdr,
-			struct cxgbi_gather_list *gl, unsigned int gidx)
-{
-	int i;
-
-	memcpy(ppod, hdr, sizeof(*hdr));
-	for (i = 0; i < (PPOD_PAGES_MAX + 1); i++, gidx++) {
-		ppod->addr[i] = gidx < gl->nelem ?
-				cpu_to_be64(gl->phys_addr[gidx]) : 0ULL;
-	}
-}
-EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_set);
-
-void cxgbi_ddp_ppod_clear(struct cxgbi_pagepod *ppod)
-{
-	memset(ppod, 0, sizeof(*ppod));
-}
-EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_clear);
-
-static inline int ddp_find_unused_entries(struct cxgbi_ddp_info *ddp,
-					unsigned int start, unsigned int max,
-					unsigned int count,
-					struct cxgbi_gather_list *gl)
-{
-	unsigned int i, j, k;
 
-	/*  not enough entries */
-	if ((max - start) < count) {
-		log_debug(1 << CXGBI_DBG_DDP,
-			"NOT enough entries %u+%u < %u.\n", start, count, max);
-		return -EBUSY;
+	/*
+	 * the fifth address needs to be repeated in the next ppod, so do
+	 * not move sg
+	 */
+	if (sg_pp) {
+		*sg_pp = sg;
+		*sg_off = offset;
 	}
 
-	max -= count;
-	spin_lock(&ddp->map_lock);
-	for (i = start; i < max;) {
-		for (j = 0, k = i; j < count; j++, k++) {
-			if (ddp->gl_map[k])
-				break;
-		}
-		if (j == count) {
-			for (j = 0, k = i; j < count; j++, k++)
-				ddp->gl_map[k] = gl;
-			spin_unlock(&ddp->map_lock);
-			return i;
+	if (offset == len) {
+		offset = 0;
+		sg = sg_next(sg);
+		if (sg) {
+			addr = sg_dma_address(sg);
+			len = sg_dma_len(sg);
 		}
-		i += j + 1;
 	}
-	spin_unlock(&ddp->map_lock);
-	log_debug(1 << CXGBI_DBG_DDP,
-		"NO suitable entries %u available.\n", count);
-	return -EBUSY;
-}
-
-static inline void ddp_unmark_entries(struct cxgbi_ddp_info *ddp,
-						int start, int count)
-{
-	spin_lock(&ddp->map_lock);
-	memset(&ddp->gl_map[start], 0,
-		count * sizeof(struct cxgbi_gather_list *));
-	spin_unlock(&ddp->map_lock);
+	ppod->addr[i] = sg ? cpu_to_be64(addr + offset) : 0ULL;
 }
+EXPORT_SYMBOL_GPL(cxgbi_ddp_set_one_ppod);
 
-static inline void ddp_gl_unmap(struct pci_dev *pdev,
-					struct cxgbi_gather_list *gl)
-{
-	int i;
+/*
+ * APIs interacting with open-iscsi libraries
+ */
 
-	for (i = 0; i < gl->nelem; i++)
-		dma_unmap_page(&pdev->dev, gl->phys_addr[i], PAGE_SIZE,
-				PCI_DMA_FROMDEVICE);
-}
+static unsigned char padding[4];
 
-static inline int ddp_gl_map(struct pci_dev *pdev,
-				    struct cxgbi_gather_list *gl)
+void cxgbi_ddp_ppm_setup(void **ppm_pp, struct cxgbi_device *cdev,
+			 struct cxgbi_tag_format *tformat, unsigned int ppmax,
+			 unsigned int llimit, unsigned int start,
+			 unsigned int rsvd_factor)
 {
-	int i;
+	int err = cxgbi_ppm_init(ppm_pp, cdev->ports[0], cdev->pdev,
+				cdev->lldev, tformat, ppmax, llimit, start,
+				rsvd_factor);
 
-	for (i = 0; i < gl->nelem; i++) {
-		gl->phys_addr[i] = dma_map_page(&pdev->dev, gl->pages[i], 0,
-						PAGE_SIZE,
-						PCI_DMA_FROMDEVICE);
-		if (unlikely(dma_mapping_error(&pdev->dev, gl->phys_addr[i]))) {
-			log_debug(1 << CXGBI_DBG_DDP,
-				"page %d 0x%p, 0x%p dma mapping err.\n",
-				i, gl->pages[i], pdev);
-			goto unmap;
-		}
-	}
-	return i;
-unmap:
-	if (i) {
-		unsigned int nelem = gl->nelem;
+	if (err >= 0) {
+		struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
 
-		gl->nelem = i;
-		ddp_gl_unmap(pdev, gl);
-		gl->nelem = nelem;
+		if (ppm->ppmax < 1024 ||
+		    ppm->tformat.pgsz_idx_dflt >= DDP_PGIDX_MAX)
+			cdev->flags |= CXGBI_FLAG_DDP_OFF;
+		err = 0;
+	} else {
+		cdev->flags |= CXGBI_FLAG_DDP_OFF;
 	}
-	return -EINVAL;
-}
-
-static void ddp_release_gl(struct cxgbi_gather_list *gl,
-				  struct pci_dev *pdev)
-{
-	ddp_gl_unmap(pdev, gl);
-	kfree(gl);
 }
+EXPORT_SYMBOL_GPL(cxgbi_ddp_ppm_setup);
 
-static struct cxgbi_gather_list *ddp_make_gl(unsigned int xferlen,
-						    struct scatterlist *sgl,
-						    unsigned int sgcnt,
-						    struct pci_dev *pdev,
-						    gfp_t gfp)
+static int cxgbi_ddp_sgl_check(struct scatterlist *sgl, int nents)
 {
-	struct cxgbi_gather_list *gl;
+	int i;
+	int last_sgidx = nents - 1;
 	struct scatterlist *sg = sgl;
-	struct page *sgpage = sg_page(sg);
-	unsigned int sglen = sg->length;
-	unsigned int sgoffset = sg->offset;
-	unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >>
-				PAGE_SHIFT;
-	int i = 1, j = 0;
-
-	if (xferlen < DDP_THRESHOLD) {
-		log_debug(1 << CXGBI_DBG_DDP,
-			"xfer %u < threshold %u, no ddp.\n",
-			xferlen, DDP_THRESHOLD);
-		return NULL;
-	}
-
-	gl = kzalloc(sizeof(struct cxgbi_gather_list) +
-		     npages * (sizeof(dma_addr_t) +
-		     sizeof(struct page *)), gfp);
-	if (!gl) {
-		log_debug(1 << CXGBI_DBG_DDP,
-			"xfer %u, %u pages, OOM.\n", xferlen, npages);
-		return NULL;
-	}
 
-	 log_debug(1 << CXGBI_DBG_DDP,
-		"xfer %u, sgl %u, gl max %u.\n", xferlen, sgcnt, npages);
-
-	gl->pages = (struct page **)&gl->phys_addr[npages];
-	gl->nelem = npages;
-	gl->length = xferlen;
-	gl->offset = sgoffset;
-	gl->pages[0] = sgpage;
-
-	for (i = 1, sg = sg_next(sgl), j = 0; i < sgcnt;
-		i++, sg = sg_next(sg)) {
-		struct page *page = sg_page(sg);
-
-		if (sgpage == page && sg->offset == sgoffset + sglen)
-			sglen += sg->length;
-		else {
-			/*  make sure the sgl is fit for ddp:
-			 *  each has the same page size, and
-			 *  all of the middle pages are used completely
-			 */
-			if ((j && sgoffset) || ((i != sgcnt - 1) &&
-			    ((sglen + sgoffset) & ~PAGE_MASK))) {
-				log_debug(1 << CXGBI_DBG_DDP,
-					"page %d/%u, %u + %u.\n",
-					i, sgcnt, sgoffset, sglen);
-				goto error_out;
-			}
+	for (i = 0; i < nents; i++, sg = sg_next(sg)) {
+		unsigned int len = sg->length + sg->offset;
 
-			j++;
-			if (j == gl->nelem || sg->offset) {
-				log_debug(1 << CXGBI_DBG_DDP,
-					"page %d/%u, offset %u.\n",
-					j, gl->nelem, sg->offset);
-				goto error_out;
-			}
-			gl->pages[j] = page;
-			sglen = sg->length;
-			sgoffset = sg->offset;
-			sgpage = page;
-		}
-	}
-	gl->nelem = ++j;
-
-	if (ddp_gl_map(pdev, gl) < 0)
-		goto error_out;
-
-	return gl;
-
-error_out:
-	kfree(gl);
-	return NULL;
-}
-
-static void ddp_tag_release(struct cxgbi_hba *chba, u32 tag)
-{
-	struct cxgbi_device *cdev = chba->cdev;
-	struct cxgbi_ddp_info *ddp = cdev->ddp;
-	u32 idx;
-
-	idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask;
-	if (idx < ddp->nppods) {
-		struct cxgbi_gather_list *gl = ddp->gl_map[idx];
-		unsigned int npods;
-
-		if (!gl || !gl->nelem) {
-			pr_warn("tag 0x%x, idx %u, gl 0x%p, %u.\n",
-				tag, idx, gl, gl ? gl->nelem : 0);
-			return;
-		}
-		npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
-		log_debug(1 << CXGBI_DBG_DDP,
-			"tag 0x%x, release idx %u, npods %u.\n",
-			tag, idx, npods);
-		cdev->csk_ddp_clear(chba, tag, idx, npods);
-		ddp_unmark_entries(ddp, idx, npods);
-		ddp_release_gl(gl, ddp->pdev);
-	} else
-		pr_warn("tag 0x%x, idx %u > max %u.\n", tag, idx, ddp->nppods);
-}
-
-static int ddp_tag_reserve(struct cxgbi_sock *csk, unsigned int tid,
-			   u32 sw_tag, u32 *tagp, struct cxgbi_gather_list *gl,
-			   gfp_t gfp)
-{
-	struct cxgbi_device *cdev = csk->cdev;
-	struct cxgbi_ddp_info *ddp = cdev->ddp;
-	struct cxgbi_tag_format *tformat = &cdev->tag_format;
-	struct cxgbi_pagepod_hdr hdr;
-	unsigned int npods;
-	int idx = -1;
-	int err = -ENOMEM;
-	u32 tag;
-
-	npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
-	if (ddp->idx_last == ddp->nppods)
-		idx = ddp_find_unused_entries(ddp, 0, ddp->nppods,
-							npods, gl);
-	else {
-		idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1,
-							ddp->nppods, npods,
-							gl);
-		if (idx < 0 && ddp->idx_last >= npods) {
-			idx = ddp_find_unused_entries(ddp, 0,
-				min(ddp->idx_last + npods, ddp->nppods),
-							npods, gl);
+		if ((sg->offset & 0x3) || (i && sg->offset) ||
+		    ((i != last_sgidx) && len != PAGE_SIZE)) {
+			log_debug(1 << CXGBI_DBG_DDP,
+				  "sg %u/%u, %u,%u, not aligned.\n",
+				  i, nents, sg->offset, sg->length);
+			goto err_out;
 		}
 	}
-	if (idx < 0) {
-		log_debug(1 << CXGBI_DBG_DDP,
-			"xferlen %u, gl %u, npods %u NO DDP.\n",
-			gl->length, gl->nelem, npods);
-		return idx;
-	}
-
-	tag = cxgbi_ddp_tag_base(tformat, sw_tag);
-	tag |= idx << PPOD_IDX_SHIFT;
-
-	hdr.rsvd = 0;
-	hdr.vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
-	hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
-	hdr.max_offset = htonl(gl->length);
-	hdr.page_offset = htonl(gl->offset);
-
-	err = cdev->csk_ddp_set(csk, &hdr, idx, npods, gl);
-	if (err < 0)
-		goto unmark_entries;
-
-	ddp->idx_last = idx;
-	log_debug(1 << CXGBI_DBG_DDP,
-		"xfer %u, gl %u,%u, tid 0x%x, tag 0x%x->0x%x(%u,%u).\n",
-		gl->length, gl->nelem, gl->offset, tid, sw_tag, tag, idx,
-		npods);
-	*tagp = tag;
 	return 0;
-
-unmark_entries:
-	ddp_unmark_entries(ddp, idx, npods);
-	return err;
+err_out:
+	return -EINVAL;
 }
 
-int cxgbi_ddp_reserve(struct cxgbi_sock *csk, unsigned int *tagp,
-			unsigned int sw_tag, unsigned int xferlen,
-			struct scatterlist *sgl, unsigned int sgcnt, gfp_t gfp)
+static int cxgbi_ddp_reserve(struct cxgbi_conn *cconn,
+			     struct cxgbi_task_data *tdata, u32 sw_tag,
+			     unsigned int xferlen)
 {
+	struct cxgbi_sock *csk = cconn->cep->csk;
 	struct cxgbi_device *cdev = csk->cdev;
-	struct cxgbi_tag_format *tformat = &cdev->tag_format;
-	struct cxgbi_gather_list *gl;
+	struct cxgbi_ppm *ppm = cdev->cdev2ppm(cdev);
+	struct cxgbi_task_tag_info *ttinfo = &tdata->ttinfo;
+	struct scatterlist *sgl = ttinfo->sgl;
+	unsigned int sgcnt = ttinfo->nents;
+	unsigned int sg_offset = sgl->offset;
 	int err;
 
-	if (page_idx >= DDP_PGIDX_MAX || !cdev->ddp ||
-	    xferlen < DDP_THRESHOLD) {
+	if (cdev->flags & CXGBI_FLAG_DDP_OFF) {
 		log_debug(1 << CXGBI_DBG_DDP,
-			"pgidx %u, xfer %u, NO ddp.\n", page_idx, xferlen);
+			  "cdev 0x%p DDP off.\n", cdev);
 		return -EINVAL;
 	}
 
-	if (!cxgbi_sw_tag_usable(tformat, sw_tag)) {
+	if (!ppm || xferlen < DDP_THRESHOLD || !sgcnt ||
+	    ppm->tformat.pgsz_idx_dflt >= DDP_PGIDX_MAX) {
 		log_debug(1 << CXGBI_DBG_DDP,
-			"sw_tag 0x%x NOT usable.\n", sw_tag);
+			  "ppm 0x%p, pgidx %u, xfer %u, sgcnt %u, NO ddp.\n",
+			  ppm, ppm ? ppm->tformat.pgsz_idx_dflt : DDP_PGIDX_MAX,
+			  xferlen, ttinfo->nents);
 		return -EINVAL;
 	}
 
-	gl = ddp_make_gl(xferlen, sgl, sgcnt, cdev->pdev, gfp);
-	if (!gl)
-		return -ENOMEM;
+	/* make sure the buffer is suitable for ddp */
+	if (cxgbi_ddp_sgl_check(sgl, sgcnt) < 0)
+		return -EINVAL;
 
-	err = ddp_tag_reserve(csk, csk->tid, sw_tag, tagp, gl, gfp);
-	if (err < 0)
-		ddp_release_gl(gl, cdev->pdev);
+	ttinfo->nr_pages = (xferlen + sgl->offset + (1 << PAGE_SHIFT) - 1) >>
+			    PAGE_SHIFT;
 
-	return err;
-}
+	/*
+	 * the ddp tag will be used for the itt in the outgoing pdu,
+	 * the itt genrated by libiscsi is saved in the ppm and can be
+	 * retrieved via the ddp tag
+	 */
+	err = cxgbi_ppm_ppods_reserve(ppm, ttinfo->nr_pages, 0, &ttinfo->idx,
+				      &ttinfo->tag, (unsigned long)sw_tag);
+	if (err < 0) {
+		cconn->ddp_full++;
+		return err;
+	}
+	ttinfo->npods = err;
 
-static void ddp_destroy(struct kref *kref)
-{
-	struct cxgbi_ddp_info *ddp = container_of(kref,
-						struct cxgbi_ddp_info,
-						refcnt);
-	struct cxgbi_device *cdev = ddp->cdev;
-	int i = 0;
+	 /* setup dma from scsi command sgl */
+	sgl->offset = 0;
+	err = dma_map_sg(&ppm->pdev->dev, sgl, sgcnt, DMA_FROM_DEVICE);
+	sgl->offset = sg_offset;
+	if (err == 0) {
+		pr_info("%s: 0x%x, xfer %u, sgl %u dma mapping err.\n",
+			__func__, sw_tag, xferlen, sgcnt);
+		goto rel_ppods;
+	}
+	if (err != ttinfo->nr_pages) {
+		log_debug(1 << CXGBI_DBG_DDP,
+			  "%s: sw tag 0x%x, xfer %u, sgl %u, dma count %d.\n",
+			  __func__, sw_tag, xferlen, sgcnt, err);
+	}
 
-	pr_info("kref 0, destroy ddp 0x%p, cdev 0x%p.\n", ddp, cdev);
+	ttinfo->flags |= CXGBI_PPOD_INFO_FLAG_MAPPED;
+	ttinfo->cid = csk->port_id;
 
-	while (i < ddp->nppods) {
-		struct cxgbi_gather_list *gl = ddp->gl_map[i];
+	cxgbi_ppm_make_ppod_hdr(ppm, ttinfo->tag, csk->tid, sgl->offset,
+				xferlen, &ttinfo->hdr);
 
-		if (gl) {
-			int npods = (gl->nelem + PPOD_PAGES_MAX - 1)
-					>> PPOD_PAGES_SHIFT;
-			pr_info("cdev 0x%p, ddp %d + %d.\n", cdev, i, npods);
-			kfree(gl);
-			i += npods;
-		} else
-			i++;
+	if (cdev->flags & CXGBI_FLAG_USE_PPOD_OFLDQ) {
+		/* write ppod from xmit_pdu (of iscsi_scsi_command pdu) */
+		ttinfo->flags |= CXGBI_PPOD_INFO_FLAG_VALID;
+	} else {
+		/* write ppod from control queue now */
+		err = cdev->csk_ddp_set_map(ppm, csk, ttinfo);
+		if (err < 0)
+			goto rel_ppods;
 	}
-	cxgbi_free_big_mem(ddp);
-}
-
-int cxgbi_ddp_cleanup(struct cxgbi_device *cdev)
-{
-	struct cxgbi_ddp_info *ddp = cdev->ddp;
 
-	log_debug(1 << CXGBI_DBG_DDP,
-		"cdev 0x%p, release ddp 0x%p.\n", cdev, ddp);
-	cdev->ddp = NULL;
-	if (ddp)
-		return kref_put(&ddp->refcnt, ddp_destroy);
 	return 0;
-}
-EXPORT_SYMBOL_GPL(cxgbi_ddp_cleanup);
 
-int cxgbi_ddp_init(struct cxgbi_device *cdev,
-		   unsigned int llimit, unsigned int ulimit,
-		   unsigned int max_txsz, unsigned int max_rxsz)
-{
-	struct cxgbi_ddp_info *ddp;
-	unsigned int ppmax, bits;
+rel_ppods:
+	cxgbi_ppm_ppod_release(ppm, ttinfo->idx);
 
-	ppmax = (ulimit - llimit + 1) >> PPOD_SIZE_SHIFT;
-	bits = __ilog2_u32(ppmax) + 1;
-	if (bits > PPOD_IDX_MAX_SIZE)
-		bits = PPOD_IDX_MAX_SIZE;
-	ppmax = (1 << (bits - 1)) - 1;
-
-	ddp = cxgbi_alloc_big_mem(sizeof(struct cxgbi_ddp_info) +
-				ppmax * (sizeof(struct cxgbi_gather_list *) +
-					 sizeof(struct sk_buff *)),
-				GFP_KERNEL);
-	if (!ddp) {
-		pr_warn("cdev 0x%p, ddp ppmax %u OOM.\n", cdev, ppmax);
-		return -ENOMEM;
+	if (ttinfo->flags & CXGBI_PPOD_INFO_FLAG_MAPPED) {
+		ttinfo->flags &= ~CXGBI_PPOD_INFO_FLAG_MAPPED;
+		dma_unmap_sg(&ppm->pdev->dev, sgl, sgcnt, DMA_FROM_DEVICE);
 	}
-	ddp->gl_map = (struct cxgbi_gather_list **)(ddp + 1);
-	cdev->ddp = ddp;
-
-	spin_lock_init(&ddp->map_lock);
-	kref_init(&ddp->refcnt);
-
-	ddp->cdev = cdev;
-	ddp->pdev = cdev->pdev;
-	ddp->llimit = llimit;
-	ddp->ulimit = ulimit;
-	ddp->max_txsz = min_t(unsigned int, max_txsz, ULP2_MAX_PKT_SIZE);
-	ddp->max_rxsz = min_t(unsigned int, max_rxsz, ULP2_MAX_PKT_SIZE);
-	ddp->nppods = ppmax;
-	ddp->idx_last = ppmax;
-	ddp->idx_bits = bits;
-	ddp->idx_mask = (1 << bits) - 1;
-	ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1;
-
-	cdev->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits;
-	cdev->tag_format.rsvd_bits = ddp->idx_bits;
-	cdev->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
-	cdev->tag_format.rsvd_mask = (1 << cdev->tag_format.rsvd_bits) - 1;
-
-	pr_info("%s tag format, sw %u, rsvd %u,%u, mask 0x%x.\n",
-		cdev->ports[0]->name, cdev->tag_format.sw_bits,
-		cdev->tag_format.rsvd_bits, cdev->tag_format.rsvd_shift,
-		cdev->tag_format.rsvd_mask);
-
-	cdev->tx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
-				ddp->max_txsz - ISCSI_PDU_NONPAYLOAD_LEN);
-	cdev->rx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
-				ddp->max_rxsz - ISCSI_PDU_NONPAYLOAD_LEN);
-
-	log_debug(1 << CXGBI_DBG_DDP,
-		"%s max payload size: %u/%u, %u/%u.\n",
-		cdev->ports[0]->name, cdev->tx_max_size, ddp->max_txsz,
-		cdev->rx_max_size, ddp->max_rxsz);
-	return 0;
+	return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(cxgbi_ddp_init);
-
-/*
- * APIs interacting with open-iscsi libraries
- */
-
-static unsigned char padding[4];
 
 static void task_release_itt(struct iscsi_task *task, itt_t hdr_itt)
 {
 	struct scsi_cmnd *sc = task->sc;
 	struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
-	struct cxgbi_hba *chba = cconn->chba;
-	struct cxgbi_tag_format *tformat = &chba->cdev->tag_format;
+	struct cxgbi_device *cdev = cconn->chba->cdev;
+	struct cxgbi_ppm *ppm = cdev->cdev2ppm(cdev);
 	u32 tag = ntohl((__force u32)hdr_itt);
 
 	log_debug(1 << CXGBI_DBG_DDP,
-		   "cdev 0x%p, release tag 0x%x.\n", chba->cdev, tag);
+		  "cdev 0x%p, task 0x%p, release tag 0x%x.\n",
+		  cdev, task, tag);
 	if (sc &&
 	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE) &&
-	    cxgbi_is_ddp_tag(tformat, tag))
-		ddp_tag_release(chba, tag);
+	    cxgbi_ppm_is_ddp_tag(ppm, tag)) {
+		struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
+		struct cxgbi_task_tag_info *ttinfo = &tdata->ttinfo;
+
+		if (!(cdev->flags & CXGBI_FLAG_USE_PPOD_OFLDQ))
+			cdev->csk_ddp_clear_map(cdev, ppm, ttinfo);
+		cxgbi_ppm_ppod_release(ppm, ttinfo->idx);
+		dma_unmap_sg(&ppm->pdev->dev, ttinfo->sgl, ttinfo->nents,
+			     DMA_FROM_DEVICE);
+	}
+}
+
+static inline u32 cxgbi_build_sw_tag(u32 idx, u32 age)
+{
+	/* assume idx and age both are < 0x7FFF (32767) */
+	return (idx << 16) | age;
 }
 
 static int task_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
@@ -1710,34 +1433,41 @@ static int task_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
 	struct iscsi_session *sess = conn->session;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
-	struct cxgbi_hba *chba = cconn->chba;
-	struct cxgbi_tag_format *tformat = &chba->cdev->tag_format;
-	u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt;
+	struct cxgbi_device *cdev = cconn->chba->cdev;
+	struct cxgbi_ppm *ppm = cdev->cdev2ppm(cdev);
+	u32 sw_tag = cxgbi_build_sw_tag(task->itt, sess->age);
 	u32 tag = 0;
 	int err = -EINVAL;
 
 	if (sc &&
-	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE)) {
-		err = cxgbi_ddp_reserve(cconn->cep->csk, &tag, sw_tag,
-					scsi_in(sc)->length,
-					scsi_in(sc)->table.sgl,
-					scsi_in(sc)->table.nents,
-					GFP_ATOMIC);
-		if (err < 0)
-			log_debug(1 << CXGBI_DBG_DDP,
-				"csk 0x%p, R task 0x%p, %u,%u, no ddp.\n",
-				cconn->cep->csk, task, scsi_in(sc)->length,
-				scsi_in(sc)->table.nents);
+	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE)
+	) {
+		struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
+		struct cxgbi_task_tag_info *ttinfo = &tdata->ttinfo;
+
+		scmd_get_params(sc, &ttinfo->sgl, &ttinfo->nents,
+				&tdata->dlen, 0);
+		err = cxgbi_ddp_reserve(cconn, tdata, sw_tag, tdata->dlen);
+		if (!err)
+			tag = ttinfo->tag;
+		else
+			 log_debug(1 << CXGBI_DBG_DDP,
+				   "csk 0x%p, R task 0x%p, %u,%u, no ddp.\n",
+				   cconn->cep->csk, task, tdata->dlen,
+				   ttinfo->nents);
 	}
 
-	if (err < 0)
-		tag = cxgbi_set_non_ddp_tag(tformat, sw_tag);
+	if (err < 0) {
+		err = cxgbi_ppm_make_non_ddp_tag(ppm, sw_tag, &tag);
+		if (err < 0)
+			return err;
+	}
 	/*  the itt need to sent in big-endian order */
 	*hdr_itt = (__force itt_t)htonl(tag);
 
 	log_debug(1 << CXGBI_DBG_DDP,
-		"cdev 0x%p, task 0x%p, 0x%x(0x%x,0x%x)->0x%x/0x%x.\n",
-		chba->cdev, task, sw_tag, task->itt, sess->age, tag, *hdr_itt);
+		  "cdev 0x%p, task 0x%p, 0x%x(0x%x,0x%x)->0x%x/0x%x.\n",
+		  cdev, task, sw_tag, task->itt, sess->age, tag, *hdr_itt);
 	return 0;
 }
 
@@ -1746,19 +1476,24 @@ void cxgbi_parse_pdu_itt(struct iscsi_conn *conn, itt_t itt, int *idx, int *age)
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
 	struct cxgbi_device *cdev = cconn->chba->cdev;
-	u32 tag = ntohl((__force u32) itt);
+	struct cxgbi_ppm *ppm = cdev->cdev2ppm(cdev);
+	u32 tag = ntohl((__force u32)itt);
 	u32 sw_bits;
 
-	sw_bits = cxgbi_tag_nonrsvd_bits(&cdev->tag_format, tag);
-	if (idx)
-		*idx = sw_bits & ((1 << cconn->task_idx_bits) - 1);
-	if (age)
-		*age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK;
+	if (ppm) {
+		if (cxgbi_ppm_is_ddp_tag(ppm, tag))
+			sw_bits = cxgbi_ppm_get_tag_caller_data(ppm, tag);
+		else
+			sw_bits = cxgbi_ppm_decode_non_ddp_tag(ppm, tag);
+	} else {
+		sw_bits = tag;
+	}
 
+	cxgbi_decode_sw_tag(sw_bits, idx, age);
 	log_debug(1 << CXGBI_DBG_DDP,
-		"cdev 0x%p, tag 0x%x/0x%x, -> 0x%x(0x%x,0x%x).\n",
-		cdev, tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
-		age ? *age : 0xFF);
+		  "cdev 0x%p, tag 0x%x/0x%x, -> 0x%x(0x%x,0x%x).\n",
+		  cdev, tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
+		  age ? *age : 0xFF);
 }
 EXPORT_SYMBOL_GPL(cxgbi_parse_pdu_itt);
 
@@ -2260,7 +1995,9 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 	struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
 	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
+	struct cxgbi_task_tag_info *ttinfo = &tdata->ttinfo;
 	struct sk_buff *skb = tdata->skb;
+	struct cxgbi_sock *csk = NULL;
 	unsigned int datalen;
 	int err;
 
@@ -2270,8 +2007,28 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 		return 0;
 	}
 
+	if (cconn && cconn->cep)
+		csk = cconn->cep->csk;
+	if (!csk) {
+		log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
+			  "task 0x%p, csk gone.\n", task);
+		return -EPIPE;
+	}
+
 	datalen = skb->data_len;
 	tdata->skb = NULL;
+
+	/* write ppod first if using ofldq to write ppod */
+	if (ttinfo->flags & CXGBI_PPOD_INFO_FLAG_VALID) {
+		struct cxgbi_ppm *ppm = csk->cdev->cdev2ppm(csk->cdev);
+
+		ttinfo->flags &= ~CXGBI_PPOD_INFO_FLAG_VALID;
+		if (csk->cdev->csk_ddp_set_map(ppm, csk, ttinfo) < 0)
+			pr_err("task 0x%p, ppod writing using ofldq failed.\n",
+			       task);
+			/* continue. Let fl get the data */
+	}
+
 	err = cxgbi_sock_send_pdus(cconn->cep->csk, skb);
 	if (err > 0) {
 		int pdulen = err;
@@ -2313,12 +2070,14 @@ EXPORT_SYMBOL_GPL(cxgbi_conn_xmit_pdu);
 
 void cxgbi_cleanup_task(struct iscsi_task *task)
 {
+	struct iscsi_tcp_task *tcp_task = task->dd_data;
 	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
 
 	log_debug(1 << CXGBI_DBG_ISCSI,
 		"task 0x%p, skb 0x%p, itt 0x%x.\n",
 		task, tdata->skb, task->hdr_itt);
 
+	tcp_task->dd_data = NULL;
 	/*  never reached the xmit task callout */
 	if (tdata->skb)
 		__kfree_skb(tdata->skb);
@@ -2528,6 +2287,7 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session,
 	struct iscsi_conn *conn = cls_conn->dd_data;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
+	struct cxgbi_ppm *ppm;
 	struct iscsi_endpoint *ep;
 	struct cxgbi_endpoint *cep;
 	struct cxgbi_sock *csk;
@@ -2540,7 +2300,10 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session,
 	/*  setup ddp pagesize */
 	cep = ep->dd_data;
 	csk = cep->csk;
-	err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid, page_idx, 0);
+
+	ppm = csk->cdev->cdev2ppm(csk->cdev);
+	err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid,
+					     ppm->tformat.pgsz_idx_dflt, 0);
 	if (err < 0)
 		return err;
 
@@ -2915,16 +2678,7 @@ EXPORT_SYMBOL_GPL(cxgbi_attr_is_visible);
 
 static int __init libcxgbi_init_module(void)
 {
-	sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
-	sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
-
 	pr_info("%s", version);
-
-	pr_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n",
-		ISCSI_ITT_MASK, sw_tag_idx_bits,
-		ISCSI_AGE_MASK, sw_tag_age_bits);
-
-	ddp_setup_host_page_size();
 	return 0;
 }
 
diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h
index 9842301f7980..e7802738f5d2 100644
--- a/drivers/scsi/cxgbi/libcxgbi.h
+++ b/drivers/scsi/cxgbi/libcxgbi.h
@@ -24,9 +24,12 @@
 #include <linux/scatterlist.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
+#include <linux/version.h>
 #include <scsi/scsi_device.h>
 #include <scsi/libiscsi_tcp.h>
 
+#include <libcxgb_ppm.h>
+
 enum cxgbi_dbg_flag {
 	CXGBI_DBG_ISCSI,
 	CXGBI_DBG_DDP,
@@ -84,92 +87,11 @@ static inline unsigned int cxgbi_ulp_extra_len(int submode)
 	return ulp2_extra_len[submode & 3];
 }
 
-/*
- * struct pagepod_hdr, pagepod - pagepod format
- */
-
 #define CPL_RX_DDP_STATUS_DDP_SHIFT	16 /* ddp'able */
 #define CPL_RX_DDP_STATUS_PAD_SHIFT	19 /* pad error */
 #define CPL_RX_DDP_STATUS_HCRC_SHIFT	20 /* hcrc error */
 #define CPL_RX_DDP_STATUS_DCRC_SHIFT	21 /* dcrc error */
 
-struct cxgbi_pagepod_hdr {
-	u32 vld_tid;
-	u32 pgsz_tag_clr;
-	u32 max_offset;
-	u32 page_offset;
-	u64 rsvd;
-};
-
-#define PPOD_PAGES_MAX			4
-struct cxgbi_pagepod {
-	struct cxgbi_pagepod_hdr hdr;
-	u64 addr[PPOD_PAGES_MAX + 1];
-};
-
-struct cxgbi_tag_format {
-	unsigned char sw_bits;
-	unsigned char rsvd_bits;
-	unsigned char rsvd_shift;
-	unsigned char filler[1];
-	u32 rsvd_mask;
-};
-
-struct cxgbi_gather_list {
-	unsigned int tag;
-	unsigned int length;
-	unsigned int offset;
-	unsigned int nelem;
-	struct page **pages;
-	dma_addr_t phys_addr[0];
-};
-
-struct cxgbi_ddp_info {
-	struct kref refcnt;
-	struct cxgbi_device *cdev;
-	struct pci_dev *pdev;
-	unsigned int max_txsz;
-	unsigned int max_rxsz;
-	unsigned int llimit;
-	unsigned int ulimit;
-	unsigned int nppods;
-	unsigned int idx_last;
-	unsigned char idx_bits;
-	unsigned char filler[3];
-	unsigned int idx_mask;
-	unsigned int rsvd_tag_mask;
-	spinlock_t map_lock;
-	struct cxgbi_gather_list **gl_map;
-};
-
-#define DDP_PGIDX_MAX		4
-#define DDP_THRESHOLD		2048
-
-#define PPOD_PAGES_SHIFT	2       /*  4 pages per pod */
-
-#define PPOD_SIZE               sizeof(struct cxgbi_pagepod)  /*  64 */
-#define PPOD_SIZE_SHIFT         6
-
-#define ULPMEM_DSGL_MAX_NPPODS	16	/*  1024/PPOD_SIZE */
-#define ULPMEM_IDATA_MAX_NPPODS	4	/*  256/PPOD_SIZE */
-#define PCIE_MEMWIN_MAX_NPPODS	16	/*  1024/PPOD_SIZE */
-
-#define PPOD_COLOR_SHIFT	0
-#define PPOD_COLOR(x)		((x) << PPOD_COLOR_SHIFT)
-
-#define PPOD_IDX_SHIFT          6
-#define PPOD_IDX_MAX_SIZE       24
-
-#define PPOD_TID_SHIFT		0
-#define PPOD_TID(x)		((x) << PPOD_TID_SHIFT)
-
-#define PPOD_TAG_SHIFT		6
-#define PPOD_TAG(x)		((x) << PPOD_TAG_SHIFT)
-
-#define PPOD_VALID_SHIFT	24
-#define PPOD_VALID(x)		((x) << PPOD_VALID_SHIFT)
-#define PPOD_VALID_FLAG		PPOD_VALID(1U)
-
 /*
  * sge_opaque_hdr -
  * Opaque version of structure the SGE stores at skb->head of TX_DATA packets
@@ -279,6 +201,8 @@ struct cxgbi_skb_tx_cb {
 
 enum cxgbi_skcb_flags {
 	SKCBF_TX_NEED_HDR,	/* packet needs a header */
+	SKCBF_TX_MEM_WRITE,     /* memory write */
+	SKCBF_TX_FLAG_COMPL,    /* wr completion flag */
 	SKCBF_RX_COALESCED,	/* received whole pdu */
 	SKCBF_RX_HDR,		/* received pdu header */
 	SKCBF_RX_DATA,		/* received pdu payload */
@@ -527,6 +451,9 @@ struct cxgbi_ports_map {
 #define CXGBI_FLAG_DEV_T4		0x2
 #define CXGBI_FLAG_ADAPTER_RESET	0x4
 #define CXGBI_FLAG_IPV4_SET		0x10
+#define CXGBI_FLAG_USE_PPOD_OFLDQ       0x40
+#define CXGBI_FLAG_DDP_OFF		0x100
+
 struct cxgbi_device {
 	struct list_head list_head;
 	struct list_head rcu_node;
@@ -548,15 +475,14 @@ struct cxgbi_device {
 	unsigned int tx_max_size;
 	unsigned int rx_max_size;
 	struct cxgbi_ports_map pmap;
-	struct cxgbi_tag_format tag_format;
-	struct cxgbi_ddp_info *ddp;
 
 	void (*dev_ddp_cleanup)(struct cxgbi_device *);
-	int (*csk_ddp_set)(struct cxgbi_sock *, struct cxgbi_pagepod_hdr *,
-				unsigned int, unsigned int,
-				struct cxgbi_gather_list *);
-	void (*csk_ddp_clear)(struct cxgbi_hba *,
-				unsigned int, unsigned int, unsigned int);
+	struct cxgbi_ppm* (*cdev2ppm)(struct cxgbi_device *);
+	int (*csk_ddp_set_map)(struct cxgbi_ppm *, struct cxgbi_sock *,
+			       struct cxgbi_task_tag_info *);
+	void (*csk_ddp_clear_map)(struct cxgbi_device *cdev,
+				  struct cxgbi_ppm *,
+				  struct cxgbi_task_tag_info *);
 	int (*csk_ddp_setup_digest)(struct cxgbi_sock *,
 				unsigned int, int, int, int);
 	int (*csk_ddp_setup_pgidx)(struct cxgbi_sock *,
@@ -580,6 +506,8 @@ struct cxgbi_conn {
 	struct iscsi_conn *iconn;
 	struct cxgbi_hba *chba;
 	u32 task_idx_bits;
+	unsigned int ddp_full;
+	unsigned int ddp_tag_full;
 };
 
 struct cxgbi_endpoint {
@@ -593,85 +521,15 @@ struct cxgbi_task_data {
 	unsigned short nr_frags;
 	struct page_frag frags[MAX_PDU_FRAGS];
 	struct sk_buff *skb;
+	unsigned int dlen;
 	unsigned int offset;
 	unsigned int count;
 	unsigned int sgoffset;
+	struct cxgbi_task_tag_info ttinfo;
 };
 #define iscsi_task_cxgbi_data(task) \
 	((task)->dd_data + sizeof(struct iscsi_tcp_task))
 
-static inline int cxgbi_is_ddp_tag(struct cxgbi_tag_format *tformat, u32 tag)
-{
-	return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)));
-}
-
-static inline int cxgbi_sw_tag_usable(struct cxgbi_tag_format *tformat,
-					u32 sw_tag)
-{
-	sw_tag >>= (32 - tformat->rsvd_bits);
-	return !sw_tag;
-}
-
-static inline u32 cxgbi_set_non_ddp_tag(struct cxgbi_tag_format *tformat,
-					u32 sw_tag)
-{
-	unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
-	u32 mask = (1 << shift) - 1;
-
-	if (sw_tag && (sw_tag & ~mask)) {
-		u32 v1 = sw_tag & ((1 << shift) - 1);
-		u32 v2 = (sw_tag >> (shift - 1)) << shift;
-
-		return v2 | v1 | 1 << shift;
-	}
-
-	return sw_tag | 1 << shift;
-}
-
-static inline u32 cxgbi_ddp_tag_base(struct cxgbi_tag_format *tformat,
-					u32 sw_tag)
-{
-	u32 mask = (1 << tformat->rsvd_shift) - 1;
-
-	if (sw_tag && (sw_tag & ~mask)) {
-		u32 v1 = sw_tag & mask;
-		u32 v2 = sw_tag >> tformat->rsvd_shift;
-
-		v2 <<= tformat->rsvd_bits + tformat->rsvd_shift;
-
-		return v2 | v1;
-	}
-
-	return sw_tag;
-}
-
-static inline u32 cxgbi_tag_rsvd_bits(struct cxgbi_tag_format *tformat,
-					u32 tag)
-{
-	if (cxgbi_is_ddp_tag(tformat, tag))
-		return (tag >> tformat->rsvd_shift) & tformat->rsvd_mask;
-
-	return 0;
-}
-
-static inline u32 cxgbi_tag_nonrsvd_bits(struct cxgbi_tag_format *tformat,
-					u32 tag)
-{
-	unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
-	u32 v1, v2;
-
-	if (cxgbi_is_ddp_tag(tformat, tag)) {
-		v1 = tag & ((1 << tformat->rsvd_shift) - 1);
-		v2 = (tag >> (shift + 1)) << tformat->rsvd_shift;
-	} else {
-		u32 mask = (1 << shift) - 1;
-		tag &= ~(1 << shift);
-		v1 = tag & mask;
-		v2 = (tag >> 1) & ~mask;
-	}
-	return v1 | v2;
-}
-
 static inline void *cxgbi_alloc_big_mem(unsigned int size,
 					gfp_t gfp)
 {
@@ -749,7 +607,11 @@ int cxgbi_ddp_init(struct cxgbi_device *, unsigned int, unsigned int,
 			unsigned int, unsigned int);
 int cxgbi_ddp_cleanup(struct cxgbi_device *);
 void cxgbi_ddp_page_size_factor(int *);
-void cxgbi_ddp_ppod_clear(struct cxgbi_pagepod *);
-void cxgbi_ddp_ppod_set(struct cxgbi_pagepod *, struct cxgbi_pagepod_hdr *,
-			struct cxgbi_gather_list *, unsigned int);
+void cxgbi_ddp_set_one_ppod(struct cxgbi_pagepod *,
+			    struct cxgbi_task_tag_info *,
+			    struct scatterlist **sg_pp, unsigned int *sg_off);
+void cxgbi_ddp_ppm_setup(void **ppm_pp, struct cxgbi_device *,
+			 struct cxgbi_tag_format *, unsigned int ppmax,
+			 unsigned int llimit, unsigned int start,
+			 unsigned int rsvd_factor);
 #endif	/*__LIBCXGBI_H__*/
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d6a691e27d33..d6803a9e5ab8 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -10093,6 +10093,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
 		ioa_cfg->intr_flag = IPR_USE_MSI;
 	else {
 		ioa_cfg->intr_flag = IPR_USE_LSI;
+		ioa_cfg->clear_isr = 1;
 		ioa_cfg->nvectors = 1;
 		dev_info(&pdev->dev, "Cannot enable MSI.\n");
 	}
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 935c43095109..497bc1558377 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -233,15 +233,8 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
 	task->task_state_flags = SAS_TASK_STATE_PENDING;
 	qc->lldd_task = task;
 
-	switch (qc->tf.protocol) {
-	case ATA_PROT_NCQ:
-		task->ata_task.use_ncq = 1;
-		/* fall through */
-	case ATAPI_PROT_DMA:
-	case ATA_PROT_DMA:
-		task->ata_task.dma_xfer = 1;
-		break;
-	}
+	task->ata_task.use_ncq = ata_is_ncq(qc->tf.protocol);
+	task->ata_task.dma_xfer = ata_is_dma(qc->tf.protocol);
 
 	if (qc->scsicmd)
 		ASSIGN_SAS_TASK(qc->scsicmd, task);
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 3b11aad03752..2f2a9910e30e 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -726,7 +726,7 @@ static int _osd_req_list_objects(struct osd_request *or,
 		return PTR_ERR(bio);
 	}
 
-	bio->bi_rw &= ~REQ_WRITE;
+	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 	or->in.bio = bio;
 	or->in.total_bytes = bio->bi_iter.bi_size;
 	return 0;
@@ -824,7 +824,7 @@ void osd_req_write(struct osd_request *or,
 {
 	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len);
 	WARN_ON(or->out.bio || or->out.total_bytes);
-	WARN_ON(0 == (bio->bi_rw & REQ_WRITE));
+	WARN_ON(!op_is_write(bio_op(bio)));
 	or->out.bio = bio;
 	or->out.total_bytes = len;
 }
@@ -839,7 +839,7 @@ int osd_req_write_kern(struct osd_request *or,
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	bio->bi_rw |= REQ_WRITE; /* FIXME: bio_set_dir() */
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	osd_req_write(or, obj, offset, bio, len);
 	return 0;
 }
@@ -875,7 +875,7 @@ void osd_req_read(struct osd_request *or,
 {
 	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len);
 	WARN_ON(or->in.bio || or->in.total_bytes);
-	WARN_ON(bio->bi_rw & REQ_WRITE);
+	WARN_ON(op_is_write(bio_op(bio)));
 	or->in.bio = bio;
 	or->in.total_bytes = len;
 }
@@ -956,7 +956,7 @@ static int _osd_req_finalize_cdb_cont(struct osd_request *or, const u8 *cap_key)
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	bio->bi_rw |= REQ_WRITE;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	/* integrity check the continuation before the bio is linked
 	 * with the other data segments since the continuation
@@ -1077,7 +1077,7 @@ int osd_req_write_sg_kern(struct osd_request *or,
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	bio->bi_rw |= REQ_WRITE;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	osd_req_write_sg(or, obj, bio, sglist, numentries);
 
 	return 0;
@@ -1558,18 +1558,25 @@ static int _osd_req_finalize_data_integrity(struct osd_request *or,
 static struct request *_make_request(struct request_queue *q, bool has_write,
 			      struct _osd_io_info *oii, gfp_t flags)
 {
-	if (oii->bio)
-		return blk_make_request(q, oii->bio, flags);
-	else {
-		struct request *req;
-
-		req = blk_get_request(q, has_write ? WRITE : READ, flags);
-		if (IS_ERR(req))
-			return req;
+	struct request *req;
+	struct bio *bio = oii->bio;
+	int ret;
 
-		blk_rq_set_block_pc(req);
+	req = blk_get_request(q, has_write ? WRITE : READ, flags);
+	if (IS_ERR(req))
 		return req;
+	blk_rq_set_block_pc(req);
+
+	for_each_bio(bio) {
+		struct bio *bounce_bio = bio;
+
+		blk_queue_bounce(req->q, &bounce_bio);
+		ret = blk_rq_append_bio(req, bounce_bio);
+		if (ret)
+			return ERR_PTR(ret);
 	}
+
+	return req;
 }
 
 static int _init_blk_request(struct osd_request *or,
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 5649c200d37c..a92a62dea793 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2548,7 +2548,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 	if (!vha->flags.online)
 		return;
 
-	if (rsp->msix->cpuid != smp_processor_id()) {
+	if (rsp->msix && rsp->msix->cpuid != smp_processor_id()) {
 		/* if kernel does not notify qla of IRQ's CPU change,
 		 * then set it here.
 		 */
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index ff41c310c900..eaccd651ccda 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -429,7 +429,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
 	 * here, and we don't know what device it is
 	 * trying to work with, leave it as-is.
 	 */
-	vmax = 8;	/* max length of vendor */
+	vmax = sizeof(devinfo->vendor);
 	vskip = vendor;
 	while (vmax > 0 && *vskip == ' ') {
 		vmax--;
@@ -439,7 +439,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
 	while (vmax > 0 && vskip[vmax - 1] == ' ')
 		--vmax;
 
-	mmax = 16;	/* max length of model */
+	mmax = sizeof(devinfo->model);
 	mskip = model;
 	while (mmax > 0 && *mskip == ' ') {
 		mmax--;
@@ -455,10 +455,12 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
 			 * Behave like the older version of get_device_flags.
 			 */
 			if (memcmp(devinfo->vendor, vskip, vmax) ||
-					devinfo->vendor[vmax])
+					(vmax < sizeof(devinfo->vendor) &&
+						devinfo->vendor[vmax]))
 				continue;
 			if (memcmp(devinfo->model, mskip, mmax) ||
-					devinfo->model[mmax])
+					(mmax < sizeof(devinfo->model) &&
+						devinfo->model[mmax]))
 				continue;
 			return devinfo;
 		} else {
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a8b610eaa0ca..106a6adbd6f1 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1128,7 +1128,6 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
  */
 void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
 {
-	scmd->device->host->host_failed--;
 	scmd->eh_eflags = 0;
 	list_move_tail(&scmd->eh_entry, done_q);
 }
@@ -2227,6 +2226,9 @@ int scsi_error_handler(void *data)
 		else
 			scsi_unjam_host(shost);
 
+		/* All scmds have been handled */
+		shost->host_failed = 0;
+
 		/*
 		 * Note - if the above fails completely, the action is to take
 		 * individual devices offline and flush the queue of any
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 60bff78e9ead..d3e852ad5aa3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1012,7 +1012,8 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
 	} else {
-		scmd_printk(KERN_ERR, SCpnt, "Unknown command %llx\n", (unsigned long long) rq->cmd_flags);
+		scmd_printk(KERN_ERR, SCpnt, "Unknown command %llu,%llx\n",
+			    req_op(rq), (unsigned long long) rq->cmd_flags);
 		goto out;
 	}
 
@@ -1137,21 +1138,26 @@ static int sd_init_command(struct scsi_cmnd *cmd)
 {
 	struct request *rq = cmd->request;
 
-	if (rq->cmd_flags & REQ_DISCARD)
+	switch (req_op(rq)) {
+	case REQ_OP_DISCARD:
 		return sd_setup_discard_cmnd(cmd);
-	else if (rq->cmd_flags & REQ_WRITE_SAME)
+	case REQ_OP_WRITE_SAME:
 		return sd_setup_write_same_cmnd(cmd);
-	else if (rq->cmd_flags & REQ_FLUSH)
+	case REQ_OP_FLUSH:
 		return sd_setup_flush_cmnd(cmd);
-	else
+	case REQ_OP_READ:
+	case REQ_OP_WRITE:
 		return sd_setup_read_write_cmnd(cmd);
+	default:
+		BUG();
+	}
 }
 
 static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 {
 	struct request *rq = SCpnt->request;
 
-	if (rq->cmd_flags & REQ_DISCARD)
+	if (req_op(rq) == REQ_OP_DISCARD)
 		__free_page(rq->completion_data);
 
 	if (SCpnt->cmnd != rq->cmd) {
@@ -1613,8 +1619,7 @@ static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
 		return -EOPNOTSUPP;
 	return sd_pr_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00,
 			old_key, new_key, 0,
-			(1 << 0) /* APTPL */ |
-			(1 << 2) /* ALL_TG_PT */);
+			(1 << 0) /* APTPL */);
 }
 
 static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
@@ -1774,7 +1779,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	unsigned char op = SCpnt->cmnd[0];
 	unsigned char unmap = SCpnt->cmnd[1] & 8;
 
-	if (req->cmd_flags & REQ_DISCARD || req->cmd_flags & REQ_WRITE_SAME) {
+	if (req_op(req) == REQ_OP_DISCARD || req_op(req) == REQ_OP_WRITE_SAME) {
 		if (!result) {
 			good_bytes = blk_rq_bytes(req);
 			scsi_set_resid(SCpnt, 0);
@@ -2988,7 +2993,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 
 	sd_revalidate_disk(gd);
 
-	gd->driverfs_dev = &sdp->sdev_gendev;
 	gd->flags = GENHD_FL_EXT_DEVT;
 	if (sdp->removable) {
 		gd->flags |= GENHD_FL_REMOVABLE;
@@ -2996,7 +3000,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 	}
 
 	blk_pm_runtime_init(sdp->request_queue, dev);
-	add_disk(gd);
+	device_add_disk(dev, gd);
 	if (sdkp->capacity)
 		sd_dif_config_host(sdkp);
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 64c867405ad4..ed179348de80 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -713,7 +713,6 @@ static int sr_probe(struct device *dev)
 	get_capabilities(cd);
 	sr_vendor_init(cd);
 
-	disk->driverfs_dev = &sdev->sdev_gendev;
 	set_capacity(disk, cd->capacity);
 	disk->private_data = &cd->driver;
 	disk->queue = sdev->request_queue;
@@ -730,7 +729,7 @@ static int sr_probe(struct device *dev)
 
 	dev_set_drvdata(dev, cd);
 	disk->flags |= GENHD_FL_REMOVABLE;
-	add_disk(disk);
+	device_add_disk(&sdev->sdev_gendev, disk);
 
 	sdev_printk(KERN_DEBUG, sdev,
 		    "Attached scsi CD-ROM %s\n", cd->cdi.name);
diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c
index a9bac3bf20de..c887ecdaf19b 100644
--- a/drivers/sh/pm_runtime.c
+++ b/drivers/sh/pm_runtime.c
@@ -34,15 +34,6 @@ static struct pm_clk_notifier_block platform_bus_notifier = {
 
 static int __init sh_pm_runtime_init(void)
 {
-	if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
-		if (!of_find_compatible_node(NULL, NULL,
-					     "renesas,cpg-mstp-clocks"))
-			return 0;
-		if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS_OF) &&
-		    of_find_node_with_property(NULL, "#power-domain-cells"))
-			return 0;
-	}
-
 	pm_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 	return 0;
 }
diff --git a/drivers/soc/fsl/qe/Kconfig b/drivers/soc/fsl/qe/Kconfig
index 20978f2058a6..73a2e08b47ef 100644
--- a/drivers/soc/fsl/qe/Kconfig
+++ b/drivers/soc/fsl/qe/Kconfig
@@ -22,7 +22,7 @@ config UCC_SLOW
 
 config UCC_FAST
 	bool
-	default y if UCC_GETH
+	default y if UCC_GETH || QE_TDM
 	help
 	  This option provides qe_lib support to UCC fast
 	  protocols: HDLC, Ethernet, ATM, transparent
@@ -31,6 +31,10 @@ config UCC
 	bool
 	default y if UCC_FAST || UCC_SLOW
 
+config QE_TDM
+	bool
+	default y if FSL_UCC_HDLC
+
 config QE_USB
 	bool
 	default y if USB_FSL_QE
diff --git a/drivers/soc/fsl/qe/Makefile b/drivers/soc/fsl/qe/Makefile
index ffac5410c5c7..2031d385bc7e 100644
--- a/drivers/soc/fsl/qe/Makefile
+++ b/drivers/soc/fsl/qe/Makefile
@@ -6,5 +6,6 @@ obj-$(CONFIG_CPM)	+= qe_common.o
 obj-$(CONFIG_UCC)	+= ucc.o
 obj-$(CONFIG_UCC_SLOW)	+= ucc_slow.o
 obj-$(CONFIG_UCC_FAST)	+= ucc_fast.o
+obj-$(CONFIG_QE_TDM)	+= qe_tdm.o
 obj-$(CONFIG_QE_USB)	+= usb.o
 obj-$(CONFIG_QE_GPIO)	+= gpio.o
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 709fc63809e5..7026507e6f1d 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -239,6 +239,12 @@ enum qe_clock qe_clock_source(const char *source)
 	if (strcasecmp(source, "none") == 0)
 		return QE_CLK_NONE;
 
+	if (strcmp(source, "tsync_pin") == 0)
+		return QE_TSYNC_PIN;
+
+	if (strcmp(source, "rsync_pin") == 0)
+		return QE_RSYNC_PIN;
+
 	if (strncasecmp(source, "brg", 3) == 0) {
 		i = simple_strtoul(source + 3, NULL, 10);
 		if ((i >= 1) && (i <= 16))
diff --git a/drivers/soc/fsl/qe/qe_tdm.c b/drivers/soc/fsl/qe/qe_tdm.c
new file mode 100644
index 000000000000..5e48b1470178
--- /dev/null
+++ b/drivers/soc/fsl/qe/qe_tdm.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2015 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Authors:	Zhao Qiang <qiang.zhao@nxp.com>
+ *
+ * Description:
+ * QE TDM API Set - TDM specific routines implementations.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <soc/fsl/qe/qe_tdm.h>
+
+static int set_tdm_framer(const char *tdm_framer_type)
+{
+	if (strcmp(tdm_framer_type, "e1") == 0)
+		return TDM_FRAMER_E1;
+	else if (strcmp(tdm_framer_type, "t1") == 0)
+		return TDM_FRAMER_T1;
+	else
+		return -EINVAL;
+}
+
+static void set_si_param(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info)
+{
+	struct si_mode_info *si_info = &ut_info->si_info;
+
+	if (utdm->tdm_mode == TDM_INTERNAL_LOOPBACK) {
+		si_info->simr_crt = 1;
+		si_info->simr_rfsd = 0;
+	}
+}
+
+int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
+		     struct ucc_tdm_info *ut_info)
+{
+	const char *sprop;
+	int ret = 0;
+	u32 val;
+	struct resource *res;
+	struct device_node *np2;
+	static int siram_init_flag;
+	struct platform_device *pdev;
+
+	sprop = of_get_property(np, "fsl,rx-sync-clock", NULL);
+	if (sprop) {
+		ut_info->uf_info.rx_sync = qe_clock_source(sprop);
+		if ((ut_info->uf_info.rx_sync < QE_CLK_NONE) ||
+		    (ut_info->uf_info.rx_sync > QE_RSYNC_PIN)) {
+			pr_err("QE-TDM: Invalid rx-sync-clock property\n");
+			return -EINVAL;
+		}
+	} else {
+		pr_err("QE-TDM: Invalid rx-sync-clock property\n");
+		return -EINVAL;
+	}
+
+	sprop = of_get_property(np, "fsl,tx-sync-clock", NULL);
+	if (sprop) {
+		ut_info->uf_info.tx_sync = qe_clock_source(sprop);
+		if ((ut_info->uf_info.tx_sync < QE_CLK_NONE) ||
+		    (ut_info->uf_info.tx_sync > QE_TSYNC_PIN)) {
+			pr_err("QE-TDM: Invalid tx-sync-clock property\n");
+		return -EINVAL;
+		}
+	} else {
+		pr_err("QE-TDM: Invalid tx-sync-clock property\n");
+		return -EINVAL;
+	}
+
+	ret = of_property_read_u32_index(np, "fsl,tx-timeslot-mask", 0, &val);
+	if (ret) {
+		pr_err("QE-TDM: Invalid tx-timeslot-mask property\n");
+		return -EINVAL;
+	}
+	utdm->tx_ts_mask = val;
+
+	ret = of_property_read_u32_index(np, "fsl,rx-timeslot-mask", 0, &val);
+	if (ret) {
+		ret = -EINVAL;
+		pr_err("QE-TDM: Invalid rx-timeslot-mask property\n");
+		return ret;
+	}
+	utdm->rx_ts_mask = val;
+
+	ret = of_property_read_u32_index(np, "fsl,tdm-id", 0, &val);
+	if (ret) {
+		ret = -EINVAL;
+		pr_err("QE-TDM: No fsl,tdm-id property for this UCC\n");
+		return ret;
+	}
+	utdm->tdm_port = val;
+	ut_info->uf_info.tdm_num = utdm->tdm_port;
+
+	if (of_get_property(np, "fsl,tdm-internal-loopback", NULL))
+		utdm->tdm_mode = TDM_INTERNAL_LOOPBACK;
+	else
+		utdm->tdm_mode = TDM_NORMAL;
+
+	sprop = of_get_property(np, "fsl,tdm-framer-type", NULL);
+	if (!sprop) {
+		ret = -EINVAL;
+		pr_err("QE-TDM: No tdm-framer-type property for UCC\n");
+		return ret;
+	}
+	ret = set_tdm_framer(sprop);
+	if (ret < 0)
+		return -EINVAL;
+	utdm->tdm_framer_type = ret;
+
+	ret = of_property_read_u32_index(np, "fsl,siram-entry-id", 0, &val);
+	if (ret) {
+		ret = -EINVAL;
+		pr_err("QE-TDM: No siram entry id for UCC\n");
+		return ret;
+	}
+	utdm->siram_entry_id = val;
+
+	set_si_param(utdm, ut_info);
+
+	np2 = of_find_compatible_node(NULL, NULL, "fsl,t1040-qe-si");
+	if (!np2)
+		return -EINVAL;
+
+	pdev = of_find_device_by_node(np2);
+	if (!pdev) {
+		pr_err("%s: failed to lookup pdev\n", np2->name);
+		of_node_put(np2);
+		return -EINVAL;
+	}
+
+	of_node_put(np2);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	utdm->si_regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(utdm->si_regs)) {
+		ret = PTR_ERR(utdm->si_regs);
+		goto err_miss_siram_property;
+	}
+
+	np2 = of_find_compatible_node(NULL, NULL, "fsl,t1040-qe-siram");
+	if (!np2) {
+		ret = -EINVAL;
+		goto err_miss_siram_property;
+	}
+
+	pdev = of_find_device_by_node(np2);
+	if (!pdev) {
+		ret = -EINVAL;
+		pr_err("%s: failed to lookup pdev\n", np2->name);
+		of_node_put(np2);
+		goto err_miss_siram_property;
+	}
+
+	of_node_put(np2);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	utdm->siram = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(utdm->siram)) {
+		ret = PTR_ERR(utdm->siram);
+		goto err_miss_siram_property;
+	}
+
+	if (siram_init_flag == 0) {
+		memset_io(utdm->siram, 0,  res->end - res->start + 1);
+		siram_init_flag = 1;
+	}
+
+	return ret;
+
+err_miss_siram_property:
+	devm_iounmap(&pdev->dev, utdm->si_regs);
+	return ret;
+}
+
+void ucc_tdm_init(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info)
+{
+	struct si1 __iomem *si_regs;
+	u16 __iomem *siram;
+	u16 siram_entry_valid;
+	u16 siram_entry_closed;
+	u16 ucc_num;
+	u8 csel;
+	u16 sixmr;
+	u16 tdm_port;
+	u32 siram_entry_id;
+	u32 mask;
+	int i;
+
+	si_regs = utdm->si_regs;
+	siram = utdm->siram;
+	ucc_num = ut_info->uf_info.ucc_num;
+	tdm_port = utdm->tdm_port;
+	siram_entry_id = utdm->siram_entry_id;
+
+	if (utdm->tdm_framer_type == TDM_FRAMER_T1)
+		utdm->num_of_ts = 24;
+	if (utdm->tdm_framer_type == TDM_FRAMER_E1)
+		utdm->num_of_ts = 32;
+
+	/* set siram table */
+	csel = (ucc_num < 4) ? ucc_num + 9 : ucc_num - 3;
+
+	siram_entry_valid = SIR_CSEL(csel) | SIR_BYTE | SIR_CNT(0);
+	siram_entry_closed = SIR_IDLE | SIR_BYTE | SIR_CNT(0);
+
+	for (i = 0; i < utdm->num_of_ts; i++) {
+		mask = 0x01 << i;
+
+		if (utdm->tx_ts_mask & mask)
+			iowrite16be(siram_entry_valid,
+				    &siram[siram_entry_id * 32 + i]);
+		else
+			iowrite16be(siram_entry_closed,
+				    &siram[siram_entry_id * 32 + i]);
+
+		if (utdm->rx_ts_mask & mask)
+			iowrite16be(siram_entry_valid,
+				    &siram[siram_entry_id * 32 + 0x200 +  i]);
+		else
+			iowrite16be(siram_entry_closed,
+				    &siram[siram_entry_id * 32 + 0x200 +  i]);
+	}
+
+	setbits16(&siram[(siram_entry_id * 32) + (utdm->num_of_ts - 1)],
+		  SIR_LAST);
+	setbits16(&siram[(siram_entry_id * 32) + 0x200 + (utdm->num_of_ts - 1)],
+		  SIR_LAST);
+
+	/* Set SIxMR register */
+	sixmr = SIMR_SAD(siram_entry_id);
+
+	sixmr &= ~SIMR_SDM_MASK;
+
+	if (utdm->tdm_mode == TDM_INTERNAL_LOOPBACK)
+		sixmr |= SIMR_SDM_INTERNAL_LOOPBACK;
+	else
+		sixmr |= SIMR_SDM_NORMAL;
+
+	sixmr |= SIMR_RFSD(ut_info->si_info.simr_rfsd) |
+			SIMR_TFSD(ut_info->si_info.simr_tfsd);
+
+	if (ut_info->si_info.simr_crt)
+		sixmr |= SIMR_CRT;
+	if (ut_info->si_info.simr_sl)
+		sixmr |= SIMR_SL;
+	if (ut_info->si_info.simr_ce)
+		sixmr |= SIMR_CE;
+	if (ut_info->si_info.simr_fe)
+		sixmr |= SIMR_FE;
+	if (ut_info->si_info.simr_gm)
+		sixmr |= SIMR_GM;
+
+	switch (tdm_port) {
+	case 0:
+		iowrite16be(sixmr, &si_regs->sixmr1[0]);
+		break;
+	case 1:
+		iowrite16be(sixmr, &si_regs->sixmr1[1]);
+		break;
+	case 2:
+		iowrite16be(sixmr, &si_regs->sixmr1[2]);
+		break;
+	case 3:
+		iowrite16be(sixmr, &si_regs->sixmr1[3]);
+		break;
+	default:
+		pr_err("QE-TDM: can not find tdm sixmr reg\n");
+		break;
+	}
+}
diff --git a/drivers/soc/fsl/qe/ucc.c b/drivers/soc/fsl/qe/ucc.c
index b59d3358f9bd..c646d8713861 100644
--- a/drivers/soc/fsl/qe/ucc.c
+++ b/drivers/soc/fsl/qe/ucc.c
@@ -25,6 +25,12 @@
 #include <soc/fsl/qe/qe.h>
 #include <soc/fsl/qe/ucc.h>
 
+#define UCC_TDM_NUM 8
+#define RX_SYNC_SHIFT_BASE 30
+#define TX_SYNC_SHIFT_BASE 14
+#define RX_CLK_SHIFT_BASE 28
+#define TX_CLK_SHIFT_BASE 12
+
 int ucc_set_qe_mux_mii_mng(unsigned int ucc_num)
 {
 	unsigned long flags;
@@ -210,3 +216,447 @@ int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock,
 
 	return 0;
 }
+
+static int ucc_get_tdm_common_clk(u32 tdm_num, enum qe_clock clock)
+{
+	int clock_bits = -EINVAL;
+
+	/*
+	 * for TDM[0, 1, 2, 3], TX and RX use  common
+	 * clock source BRG3,4 and CLK1,2
+	 * for TDM[4, 5, 6, 7], TX and RX use  common
+	 * clock source BRG12,13 and CLK23,24
+	 */
+	switch (tdm_num) {
+	case 0:
+	case 1:
+	case 2:
+	case 3:
+		switch (clock) {
+		case QE_BRG3:
+			clock_bits = 1;
+			break;
+		case QE_BRG4:
+			clock_bits = 2;
+			break;
+		case QE_CLK1:
+			clock_bits = 4;
+			break;
+		case QE_CLK2:
+			clock_bits = 5;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 4:
+	case 5:
+	case 6:
+	case 7:
+		switch (clock) {
+		case QE_BRG12:
+			clock_bits = 1;
+			break;
+		case QE_BRG13:
+			clock_bits = 2;
+			break;
+		case QE_CLK23:
+			clock_bits = 4;
+			break;
+		case QE_CLK24:
+			clock_bits = 5;
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return clock_bits;
+}
+
+static int ucc_get_tdm_rx_clk(u32 tdm_num, enum qe_clock clock)
+{
+	int clock_bits = -EINVAL;
+
+	switch (tdm_num) {
+	case 0:
+		switch (clock) {
+		case QE_CLK3:
+			clock_bits = 6;
+			break;
+		case QE_CLK8:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 1:
+		switch (clock) {
+		case QE_CLK5:
+			clock_bits = 6;
+			break;
+		case QE_CLK10:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 2:
+		switch (clock) {
+		case QE_CLK7:
+			clock_bits = 6;
+			break;
+		case QE_CLK12:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 3:
+		switch (clock) {
+		case QE_CLK9:
+			clock_bits = 6;
+			break;
+		case QE_CLK14:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 4:
+		switch (clock) {
+		case QE_CLK11:
+			clock_bits = 6;
+			break;
+		case QE_CLK16:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 5:
+		switch (clock) {
+		case QE_CLK13:
+			clock_bits = 6;
+			break;
+		case QE_CLK18:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 6:
+		switch (clock) {
+		case QE_CLK15:
+			clock_bits = 6;
+			break;
+		case QE_CLK20:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 7:
+		switch (clock) {
+		case QE_CLK17:
+			clock_bits = 6;
+			break;
+		case QE_CLK22:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	}
+
+	return clock_bits;
+}
+
+static int ucc_get_tdm_tx_clk(u32 tdm_num, enum qe_clock clock)
+{
+	int clock_bits = -EINVAL;
+
+	switch (tdm_num) {
+	case 0:
+		switch (clock) {
+		case QE_CLK4:
+			clock_bits = 6;
+			break;
+		case QE_CLK9:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 1:
+		switch (clock) {
+		case QE_CLK6:
+			clock_bits = 6;
+			break;
+		case QE_CLK11:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 2:
+		switch (clock) {
+		case QE_CLK8:
+			clock_bits = 6;
+			break;
+		case QE_CLK13:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 3:
+		switch (clock) {
+		case QE_CLK10:
+			clock_bits = 6;
+			break;
+		case QE_CLK15:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 4:
+		switch (clock) {
+		case QE_CLK12:
+			clock_bits = 6;
+			break;
+		case QE_CLK17:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 5:
+		switch (clock) {
+		case QE_CLK14:
+			clock_bits = 6;
+			break;
+		case QE_CLK19:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 6:
+		switch (clock) {
+		case QE_CLK16:
+			clock_bits = 6;
+			break;
+		case QE_CLK21:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 7:
+		switch (clock) {
+		case QE_CLK18:
+			clock_bits = 6;
+			break;
+		case QE_CLK3:
+			clock_bits = 7;
+			break;
+		default:
+			break;
+		}
+		break;
+	}
+
+	return clock_bits;
+}
+
+/* tdm_num: TDM A-H port num is 0-7 */
+static int ucc_get_tdm_rxtx_clk(enum comm_dir mode, u32 tdm_num,
+				enum qe_clock clock)
+{
+	int clock_bits;
+
+	clock_bits = ucc_get_tdm_common_clk(tdm_num, clock);
+	if (clock_bits > 0)
+		return clock_bits;
+	if (mode == COMM_DIR_RX)
+		clock_bits = ucc_get_tdm_rx_clk(tdm_num, clock);
+	if (mode == COMM_DIR_TX)
+		clock_bits = ucc_get_tdm_tx_clk(tdm_num, clock);
+	return clock_bits;
+}
+
+static u32 ucc_get_tdm_clk_shift(enum comm_dir mode, u32 tdm_num)
+{
+	u32 shift;
+
+	shift = (mode == COMM_DIR_RX) ? RX_CLK_SHIFT_BASE : TX_CLK_SHIFT_BASE;
+	if (tdm_num < 4)
+		shift -= tdm_num * 4;
+	else
+		shift -= (tdm_num - 4) * 4;
+
+	return shift;
+}
+
+int ucc_set_tdm_rxtx_clk(u32 tdm_num, enum qe_clock clock,
+			 enum comm_dir mode)
+{
+	int clock_bits;
+	u32 shift;
+	struct qe_mux __iomem *qe_mux_reg;
+	 __be32 __iomem *cmxs1cr;
+
+	qe_mux_reg = &qe_immr->qmx;
+
+	if (tdm_num > 7 || tdm_num < 0)
+		return -EINVAL;
+
+	/* The communications direction must be RX or TX */
+	if (mode != COMM_DIR_RX && mode != COMM_DIR_TX)
+		return -EINVAL;
+
+	clock_bits = ucc_get_tdm_rxtx_clk(mode, tdm_num, clock);
+	if (clock_bits < 0)
+		return -EINVAL;
+
+	shift = ucc_get_tdm_clk_shift(mode, tdm_num);
+
+	cmxs1cr = (tdm_num < 4) ? &qe_mux_reg->cmxsi1cr_l :
+				  &qe_mux_reg->cmxsi1cr_h;
+
+	qe_clrsetbits32(cmxs1cr, QE_CMXUCR_TX_CLK_SRC_MASK << shift,
+			clock_bits << shift);
+
+	return 0;
+}
+
+static int ucc_get_tdm_sync_source(u32 tdm_num, enum qe_clock clock,
+				   enum comm_dir mode)
+{
+	int source = -EINVAL;
+
+	if (mode == COMM_DIR_RX && clock == QE_RSYNC_PIN) {
+		source = 0;
+		return source;
+	}
+	if (mode == COMM_DIR_TX && clock == QE_TSYNC_PIN) {
+		source = 0;
+		return source;
+	}
+
+	switch (tdm_num) {
+	case 0:
+	case 1:
+		switch (clock) {
+		case QE_BRG9:
+			source = 1;
+			break;
+		case QE_BRG10:
+			source = 2;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 2:
+	case 3:
+		switch (clock) {
+		case QE_BRG9:
+			source = 1;
+			break;
+		case QE_BRG11:
+			source = 2;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 4:
+	case 5:
+		switch (clock) {
+		case QE_BRG13:
+			source = 1;
+			break;
+		case QE_BRG14:
+			source = 2;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 6:
+	case 7:
+		switch (clock) {
+		case QE_BRG13:
+			source = 1;
+			break;
+		case QE_BRG15:
+			source = 2;
+			break;
+		default:
+			break;
+		}
+		break;
+	}
+
+	return source;
+}
+
+static u32 ucc_get_tdm_sync_shift(enum comm_dir mode, u32 tdm_num)
+{
+	u32 shift;
+
+	shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : RX_SYNC_SHIFT_BASE;
+	shift -= tdm_num * 2;
+
+	return shift;
+}
+
+int ucc_set_tdm_rxtx_sync(u32 tdm_num, enum qe_clock clock,
+			  enum comm_dir mode)
+{
+	int source;
+	u32 shift;
+	struct qe_mux *qe_mux_reg;
+
+	qe_mux_reg = &qe_immr->qmx;
+
+	if (tdm_num >= UCC_TDM_NUM)
+		return -EINVAL;
+
+	/* The communications direction must be RX or TX */
+	if (mode != COMM_DIR_RX && mode != COMM_DIR_TX)
+		return -EINVAL;
+
+	source = ucc_get_tdm_sync_source(tdm_num, clock, mode);
+	if (source < 0)
+		return -EINVAL;
+
+	shift = ucc_get_tdm_sync_shift(mode, tdm_num);
+
+	qe_clrsetbits32(&qe_mux_reg->cmxsi1syr,
+			QE_CMXUCR_TX_CLK_SRC_MASK << shift,
+			source << shift);
+
+	return 0;
+}
diff --git a/drivers/soc/fsl/qe/ucc_fast.c b/drivers/soc/fsl/qe/ucc_fast.c
index a7689310fe40..83d8d16e3a69 100644
--- a/drivers/soc/fsl/qe/ucc_fast.c
+++ b/drivers/soc/fsl/qe/ucc_fast.c
@@ -327,6 +327,42 @@ int ucc_fast_init(struct ucc_fast_info * uf_info, struct ucc_fast_private ** ucc
 			ucc_fast_free(uccf);
 			return -EINVAL;
 		}
+	} else {
+		/* tdm Rx clock routing */
+		if ((uf_info->rx_clock != QE_CLK_NONE) &&
+		    ucc_set_tdm_rxtx_clk(uf_info->tdm_num, uf_info->rx_clock,
+					 COMM_DIR_RX)) {
+			pr_err("%s: illegal value for RX clock", __func__);
+			ucc_fast_free(uccf);
+			return -EINVAL;
+		}
+
+		/* tdm Tx clock routing */
+		if ((uf_info->tx_clock != QE_CLK_NONE) &&
+		    ucc_set_tdm_rxtx_clk(uf_info->tdm_num, uf_info->tx_clock,
+					 COMM_DIR_TX)) {
+			pr_err("%s: illegal value for TX clock", __func__);
+			ucc_fast_free(uccf);
+			return -EINVAL;
+		}
+
+		/* tdm Rx sync clock routing */
+		if ((uf_info->rx_sync != QE_CLK_NONE) &&
+		    ucc_set_tdm_rxtx_sync(uf_info->tdm_num, uf_info->rx_sync,
+					  COMM_DIR_RX)) {
+			pr_err("%s: illegal value for RX clock", __func__);
+			ucc_fast_free(uccf);
+			return -EINVAL;
+		}
+
+		/* tdm Tx sync clock routing */
+		if ((uf_info->tx_sync != QE_CLK_NONE) &&
+		    ucc_set_tdm_rxtx_sync(uf_info->tdm_num, uf_info->tx_sync,
+					  COMM_DIR_TX)) {
+			pr_err("%s: illegal value for TX clock", __func__);
+			ucc_fast_free(uccf);
+			return -EINVAL;
+		}
 	}
 
 	/* Set interrupt mask register at UCC level. */
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index cd89682065b9..1026e180eed7 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -578,7 +578,7 @@ static int rockchip_spi_transfer_one(
 		struct spi_device *spi,
 		struct spi_transfer *xfer)
 {
-	int ret = 1;
+	int ret = 0;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
@@ -627,6 +627,8 @@ static int rockchip_spi_transfer_one(
 			spi_enable_chip(rs, 1);
 			ret = rockchip_spi_prepare_dma(rs);
 		}
+		/* successful DMA prepare means the transfer is in progress */
+		ret = ret ? ret : 1;
 	} else {
 		spi_enable_chip(rs, 1);
 		ret = rockchip_spi_pio_transfer(rs);
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index 1ddd9e2309b6..cf007f3b83ec 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -173,13 +173,17 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 {
 	struct sun4i_spi *sspi = spi_master_get_devdata(master);
 	unsigned int mclk_rate, div, timeout;
+	unsigned int start, end, tx_time;
 	unsigned int tx_len = 0;
 	int ret = 0;
 	u32 reg;
 
 	/* We don't support transfer larger than the FIFO */
 	if (tfr->len > SUN4I_FIFO_DEPTH)
-		return -EINVAL;
+		return -EMSGSIZE;
+
+	if (tfr->tx_buf && tfr->len >= SUN4I_FIFO_DEPTH)
+		return -EMSGSIZE;
 
 	reinit_completion(&sspi->done);
 	sspi->tx_buf = tfr->tx_buf;
@@ -269,8 +273,12 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	sun4i_spi_write(sspi, SUN4I_BURST_CNT_REG, SUN4I_BURST_CNT(tfr->len));
 	sun4i_spi_write(sspi, SUN4I_XMIT_CNT_REG, SUN4I_XMIT_CNT(tx_len));
 
-	/* Fill the TX FIFO */
-	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH);
+	/*
+	 * Fill the TX FIFO
+	 * Filling the FIFO fully causes timeout for some reason
+	 * at least on spi2 on A10s
+	 */
+	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH - 1);
 
 	/* Enable the interrupts */
 	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, SUN4I_INT_CTL_TC);
@@ -279,9 +287,16 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	reg = sun4i_spi_read(sspi, SUN4I_CTL_REG);
 	sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH);
 
+	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
+	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
-					      msecs_to_jiffies(1000));
+					      msecs_to_jiffies(tx_time));
+	end = jiffies;
 	if (!timeout) {
+		dev_warn(&master->dev,
+			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
+			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
+			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
 		goto out;
 	}
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 42e2c4bd690a..7fce79a60608 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -160,6 +160,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 {
 	struct sun6i_spi *sspi = spi_master_get_devdata(master);
 	unsigned int mclk_rate, div, timeout;
+	unsigned int start, end, tx_time;
 	unsigned int tx_len = 0;
 	int ret = 0;
 	u32 reg;
@@ -269,9 +270,16 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG);
 	sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH);
 
+	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
+	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
-					      msecs_to_jiffies(1000));
+					      msecs_to_jiffies(tx_time));
+	end = jiffies;
 	if (!timeout) {
+		dev_warn(&master->dev,
+			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
+			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
+			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
 		goto out;
 	}
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 443f664534e1..29ea8d2f9824 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -646,6 +646,13 @@ free_master:
 
 static int ti_qspi_remove(struct platform_device *pdev)
 {
+	struct ti_qspi *qspi = platform_get_drvdata(pdev);
+	int rc;
+
+	rc = spi_master_suspend(qspi->master);
+	if (rc)
+		return rc;
+
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 77e6e45951f4..7589c8af4368 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -622,6 +622,8 @@ void spi_unregister_device(struct spi_device *spi)
 
 	if (spi->dev.of_node)
 		of_node_clear_flag(spi->dev.of_node, OF_POPULATED);
+	if (ACPI_COMPANION(&spi->dev))
+		acpi_device_clear_enumerated(ACPI_COMPANION(&spi->dev));
 	device_unregister(&spi->dev);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_device);
@@ -1646,18 +1648,15 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 	return 1;
 }
 
-static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
-				       void *data, void **return_value)
+static acpi_status acpi_register_spi_device(struct spi_master *master,
+					    struct acpi_device *adev)
 {
-	struct spi_master *master = data;
 	struct list_head resource_list;
-	struct acpi_device *adev;
 	struct spi_device *spi;
 	int ret;
 
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
+	if (acpi_bus_get_status(adev) || !adev->status.present ||
+	    acpi_device_enumerated(adev))
 		return AE_OK;
 
 	spi = spi_alloc_device(master);
@@ -1683,6 +1682,8 @@ static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
 	if (spi->irq < 0)
 		spi->irq = acpi_dev_gpio_irq_get(adev, 0);
 
+	acpi_device_set_enumerated(adev);
+
 	adev->power.flags.ignore_parent = true;
 	strlcpy(spi->modalias, acpi_device_hid(adev), sizeof(spi->modalias));
 	if (spi_add_device(spi)) {
@@ -1695,6 +1696,18 @@ static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
 	return AE_OK;
 }
 
+static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
+				       void *data, void **return_value)
+{
+	struct spi_master *master = data;
+	struct acpi_device *adev;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+
+	return acpi_register_spi_device(master, adev);
+}
+
 static void acpi_register_spi_devices(struct spi_master *master)
 {
 	acpi_status status;
@@ -3107,6 +3120,77 @@ static struct notifier_block spi_of_notifier = {
 extern struct notifier_block spi_of_notifier;
 #endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */
 
+#if IS_ENABLED(CONFIG_ACPI)
+static int spi_acpi_master_match(struct device *dev, const void *data)
+{
+	return ACPI_COMPANION(dev->parent) == data;
+}
+
+static int spi_acpi_device_match(struct device *dev, void *data)
+{
+	return ACPI_COMPANION(dev) == data;
+}
+
+static struct spi_master *acpi_spi_find_master_by_adev(struct acpi_device *adev)
+{
+	struct device *dev;
+
+	dev = class_find_device(&spi_master_class, NULL, adev,
+				spi_acpi_master_match);
+	if (!dev)
+		return NULL;
+
+	return container_of(dev, struct spi_master, dev);
+}
+
+static struct spi_device *acpi_spi_find_device_by_adev(struct acpi_device *adev)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&spi_bus_type, NULL, adev, spi_acpi_device_match);
+
+	return dev ? to_spi_device(dev) : NULL;
+}
+
+static int acpi_spi_notify(struct notifier_block *nb, unsigned long value,
+			   void *arg)
+{
+	struct acpi_device *adev = arg;
+	struct spi_master *master;
+	struct spi_device *spi;
+
+	switch (value) {
+	case ACPI_RECONFIG_DEVICE_ADD:
+		master = acpi_spi_find_master_by_adev(adev->parent);
+		if (!master)
+			break;
+
+		acpi_register_spi_device(master, adev);
+		put_device(&master->dev);
+		break;
+	case ACPI_RECONFIG_DEVICE_REMOVE:
+		if (!acpi_device_enumerated(adev))
+			break;
+
+		spi = acpi_spi_find_device_by_adev(adev);
+		if (!spi)
+			break;
+
+		spi_unregister_device(spi);
+		put_device(&spi->dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block spi_acpi_notifier = {
+	.notifier_call = acpi_spi_notify,
+};
+#else
+extern struct notifier_block spi_acpi_notifier;
+#endif
+
 static int __init spi_init(void)
 {
 	int	status;
@@ -3127,6 +3211,8 @@ static int __init spi_init(void)
 
 	if (IS_ENABLED(CONFIG_OF_DYNAMIC))
 		WARN_ON(of_reconfig_notifier_register(&spi_of_notifier));
+	if (IS_ENABLED(CONFIG_ACPI))
+		WARN_ON(acpi_reconfig_notifier_register(&spi_acpi_notifier));
 
 	return 0;
 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 7c197d1a1231..af9476460023 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -102,4 +102,6 @@ source "drivers/staging/most/Kconfig"
 
 source "drivers/staging/i4l/Kconfig"
 
+source "drivers/staging/ks7010/Kconfig"
+
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index a470c7276142..9f6009dcafa8 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -40,3 +40,4 @@ obj-$(CONFIG_FSL_MC_BUS)	+= fsl-mc/
 obj-$(CONFIG_WILC1000)		+= wilc1000/
 obj-$(CONFIG_MOST)		+= most/
 obj-$(CONFIG_ISDN_I4L)		+= i4l/
+obj-$(CONFIG_KS7010)		+= ks7010/
diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index 6480f60ebf6c..06e41d24ec62 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -24,26 +24,19 @@ config ANDROID_LOW_MEMORY_KILLER
 	  scripts (/init.rc), and it defines priority values with minimum free memory size
 	  for each priority.
 
-config SYNC
-	bool "Synchronization framework"
-	default n
-	select ANON_INODES
-	select DMA_SHARED_BUFFER
-	---help---
-	  This option enables the framework for synchronization between multiple
-	  drivers.  Sync implementations can take advantage of hardware
-	  synchronization built into devices like GPUs.
-
 config SW_SYNC
-	bool "Software synchronization objects"
+	bool "Software synchronization framework"
 	default n
-	depends on SYNC
 	depends on SYNC_FILE
+	depends on DEBUG_FS
 	---help---
 	  A sync object driver that uses a 32bit counter to coordinate
 	  synchronization.  Useful when there is no hardware primitive backing
 	  the synchronization.
 
+	  WARNING: improper use of this can result in deadlocking kernel
+	  drivers from userspace. Intended for test and debug only.
+
 source "drivers/staging/android/ion/Kconfig"
 
 endif # if ANDROID
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index 980d6dc4b265..7ca61b77a8d4 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -4,5 +4,4 @@ obj-y					+= ion/
 
 obj-$(CONFIG_ASHMEM)			+= ashmem.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)	+= lowmemorykiller.o
-obj-$(CONFIG_SYNC)			+= sync.o sync_debug.o
-obj-$(CONFIG_SW_SYNC)			+= sw_sync.o
+obj-$(CONFIG_SW_SYNC)			+= sw_sync.o sync_debug.o
diff --git a/drivers/staging/android/sw_sync.c b/drivers/staging/android/sw_sync.c
index af39ff58fa33..115c9174705f 100644
--- a/drivers/staging/android/sw_sync.c
+++ b/drivers/staging/android/sw_sync.c
@@ -1,5 +1,5 @@
 /*
- * drivers/base/sw_sync.c
+ * drivers/dma-buf/sw_sync.c
  *
  * Copyright (C) 2012 Google, Inc.
  *
@@ -14,76 +14,331 @@
  *
  */
 
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/export.h>
 #include <linux/file.h>
 #include <linux/fs.h>
-#include <linux/miscdevice.h>
-#include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/sync_file.h>
 
-#include "sw_sync.h"
+#include "sync_debug.h"
 
-struct fence *sw_sync_pt_create(struct sw_sync_timeline *obj, u32 value)
+#define CREATE_TRACE_POINTS
+#include "trace/sync.h"
+
+struct sw_sync_create_fence_data {
+	__u32	value;
+	char	name[32];
+	__s32	fence; /* fd of new fence */
+};
+
+#define SW_SYNC_IOC_MAGIC	'W'
+
+#define SW_SYNC_IOC_CREATE_FENCE	_IOWR(SW_SYNC_IOC_MAGIC, 0,\
+		struct sw_sync_create_fence_data)
+#define SW_SYNC_IOC_INC			_IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
+
+static const struct fence_ops timeline_fence_ops;
+
+static inline struct sync_pt *fence_to_sync_pt(struct fence *fence)
+{
+	if (fence->ops != &timeline_fence_ops)
+		return NULL;
+	return container_of(fence, struct sync_pt, base);
+}
+
+/**
+ * sync_timeline_create() - creates a sync object
+ * @name:	sync_timeline name
+ *
+ * Creates a new sync_timeline. Returns the sync_timeline object or NULL in
+ * case of error.
+ */
+struct sync_timeline *sync_timeline_create(const char *name)
+{
+	struct sync_timeline *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return NULL;
+
+	kref_init(&obj->kref);
+	obj->context = fence_context_alloc(1);
+	strlcpy(obj->name, name, sizeof(obj->name));
+
+	INIT_LIST_HEAD(&obj->child_list_head);
+	INIT_LIST_HEAD(&obj->active_list_head);
+	spin_lock_init(&obj->child_list_lock);
+
+	sync_timeline_debug_add(obj);
+
+	return obj;
+}
+
+static void sync_timeline_free(struct kref *kref)
+{
+	struct sync_timeline *obj =
+		container_of(kref, struct sync_timeline, kref);
+
+	sync_timeline_debug_remove(obj);
+
+	kfree(obj);
+}
+
+static void sync_timeline_get(struct sync_timeline *obj)
+{
+	kref_get(&obj->kref);
+}
+
+static void sync_timeline_put(struct sync_timeline *obj)
+{
+	kref_put(&obj->kref, sync_timeline_free);
+}
+
+/**
+ * sync_timeline_signal() - signal a status change on a sync_timeline
+ * @obj:	sync_timeline to signal
+ * @inc:	num to increment on timeline->value
+ *
+ * A sync implementation should call this any time one of it's fences
+ * has signaled or has an error condition.
+ */
+static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
+{
+	unsigned long flags;
+	struct sync_pt *pt, *next;
+
+	trace_sync_timeline(obj);
+
+	spin_lock_irqsave(&obj->child_list_lock, flags);
+
+	obj->value += inc;
+
+	list_for_each_entry_safe(pt, next, &obj->active_list_head,
+				 active_list) {
+		if (fence_is_signaled_locked(&pt->base))
+			list_del_init(&pt->active_list);
+	}
+
+	spin_unlock_irqrestore(&obj->child_list_lock, flags);
+}
+
+/**
+ * sync_pt_create() - creates a sync pt
+ * @parent:	fence's parent sync_timeline
+ * @size:	size to allocate for this pt
+ * @inc:	value of the fence
+ *
+ * Creates a new sync_pt as a child of @parent.  @size bytes will be
+ * allocated allowing for implementation specific data to be kept after
+ * the generic sync_timeline struct. Returns the sync_pt object or
+ * NULL in case of error.
+ */
+static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size,
+			     unsigned int value)
+{
+	unsigned long flags;
+	struct sync_pt *pt;
+
+	if (size < sizeof(*pt))
+		return NULL;
+
+	pt = kzalloc(size, GFP_KERNEL);
+	if (!pt)
+		return NULL;
+
+	spin_lock_irqsave(&obj->child_list_lock, flags);
+	sync_timeline_get(obj);
+	fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock,
+		   obj->context, value);
+	list_add_tail(&pt->child_list, &obj->child_list_head);
+	INIT_LIST_HEAD(&pt->active_list);
+	spin_unlock_irqrestore(&obj->child_list_lock, flags);
+	return pt;
+}
+
+static const char *timeline_fence_get_driver_name(struct fence *fence)
+{
+	return "sw_sync";
+}
+
+static const char *timeline_fence_get_timeline_name(struct fence *fence)
+{
+	struct sync_timeline *parent = fence_parent(fence);
+
+	return parent->name;
+}
+
+static void timeline_fence_release(struct fence *fence)
 {
-	struct sw_sync_pt *pt;
+	struct sync_pt *pt = fence_to_sync_pt(fence);
+	struct sync_timeline *parent = fence_parent(fence);
+	unsigned long flags;
 
-	pt = (struct sw_sync_pt *)
-		sync_pt_create(&obj->obj, sizeof(struct sw_sync_pt));
+	spin_lock_irqsave(fence->lock, flags);
+	list_del(&pt->child_list);
+	if (WARN_ON_ONCE(!list_empty(&pt->active_list)))
+		list_del(&pt->active_list);
+	spin_unlock_irqrestore(fence->lock, flags);
 
-	pt->value = value;
+	sync_timeline_put(parent);
+	fence_free(fence);
+}
+
+static bool timeline_fence_signaled(struct fence *fence)
+{
+	struct sync_timeline *parent = fence_parent(fence);
 
-	return (struct fence *)pt;
+	return (fence->seqno > parent->value) ? false : true;
 }
-EXPORT_SYMBOL(sw_sync_pt_create);
 
-static int sw_sync_fence_has_signaled(struct fence *fence)
+static bool timeline_fence_enable_signaling(struct fence *fence)
 {
-	struct sw_sync_pt *pt = (struct sw_sync_pt *)fence;
-	struct sw_sync_timeline *obj =
-		(struct sw_sync_timeline *)fence_parent(fence);
+	struct sync_pt *pt = fence_to_sync_pt(fence);
+	struct sync_timeline *parent = fence_parent(fence);
 
-	return (pt->value > obj->value) ? 0 : 1;
+	if (timeline_fence_signaled(fence))
+		return false;
+
+	list_add_tail(&pt->active_list, &parent->active_list_head);
+	return true;
 }
 
-static void sw_sync_timeline_value_str(struct sync_timeline *sync_timeline,
-				       char *str, int size)
+static void timeline_fence_value_str(struct fence *fence,
+				    char *str, int size)
 {
-	struct sw_sync_timeline *timeline =
-		(struct sw_sync_timeline *)sync_timeline;
-	snprintf(str, size, "%d", timeline->value);
+	snprintf(str, size, "%d", fence->seqno);
 }
 
-static void sw_sync_fence_value_str(struct fence *fence, char *str, int size)
+static void timeline_fence_timeline_value_str(struct fence *fence,
+					     char *str, int size)
 {
-	struct sw_sync_pt *pt = (struct sw_sync_pt *)fence;
+	struct sync_timeline *parent = fence_parent(fence);
 
-	snprintf(str, size, "%d", pt->value);
+	snprintf(str, size, "%d", parent->value);
 }
 
-static struct sync_timeline_ops sw_sync_timeline_ops = {
-	.driver_name = "sw_sync",
-	.has_signaled = sw_sync_fence_has_signaled,
-	.timeline_value_str = sw_sync_timeline_value_str,
-	.fence_value_str = sw_sync_fence_value_str,
+static const struct fence_ops timeline_fence_ops = {
+	.get_driver_name = timeline_fence_get_driver_name,
+	.get_timeline_name = timeline_fence_get_timeline_name,
+	.enable_signaling = timeline_fence_enable_signaling,
+	.signaled = timeline_fence_signaled,
+	.wait = fence_default_wait,
+	.release = timeline_fence_release,
+	.fence_value_str = timeline_fence_value_str,
+	.timeline_value_str = timeline_fence_timeline_value_str,
 };
 
-struct sw_sync_timeline *sw_sync_timeline_create(const char *name)
+/*
+ * *WARNING*
+ *
+ * improper use of this can result in deadlocking kernel drivers from userspace.
+ */
+
+/* opening sw_sync create a new sync obj */
+static int sw_sync_debugfs_open(struct inode *inode, struct file *file)
 {
-	struct sw_sync_timeline *obj = (struct sw_sync_timeline *)
-		sync_timeline_create(&sw_sync_timeline_ops,
-				     sizeof(struct sw_sync_timeline),
-				     name);
+	struct sync_timeline *obj;
+	char task_comm[TASK_COMM_LEN];
 
-	return obj;
+	get_task_comm(task_comm, current);
+
+	obj = sync_timeline_create(task_comm);
+	if (!obj)
+		return -ENOMEM;
+
+	file->private_data = obj;
+
+	return 0;
 }
-EXPORT_SYMBOL(sw_sync_timeline_create);
 
-void sw_sync_timeline_inc(struct sw_sync_timeline *obj, u32 inc)
+static int sw_sync_debugfs_release(struct inode *inode, struct file *file)
 {
-	obj->value += inc;
+	struct sync_timeline *obj = file->private_data;
+
+	smp_wmb();
+
+	sync_timeline_put(obj);
+	return 0;
+}
+
+static long sw_sync_ioctl_create_fence(struct sync_timeline *obj,
+				       unsigned long arg)
+{
+	int fd = get_unused_fd_flags(O_CLOEXEC);
+	int err;
+	struct sync_pt *pt;
+	struct sync_file *sync_file;
+	struct sw_sync_create_fence_data data;
+
+	if (fd < 0)
+		return fd;
+
+	if (copy_from_user(&data, (void __user *)arg, sizeof(data))) {
+		err = -EFAULT;
+		goto err;
+	}
+
+	pt = sync_pt_create(obj, sizeof(*pt), data.value);
+	if (!pt) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	sync_file = sync_file_create(&pt->base);
+	if (!sync_file) {
+		fence_put(&pt->base);
+		err = -ENOMEM;
+		goto err;
+	}
+
+	data.fence = fd;
+	if (copy_to_user((void __user *)arg, &data, sizeof(data))) {
+		fput(sync_file->file);
+		err = -EFAULT;
+		goto err;
+	}
+
+	fd_install(fd, sync_file->file);
+
+	return 0;
+
+err:
+	put_unused_fd(fd);
+	return err;
+}
 
-	sync_timeline_signal(&obj->obj);
+static long sw_sync_ioctl_inc(struct sync_timeline *obj, unsigned long arg)
+{
+	u32 value;
+
+	if (copy_from_user(&value, (void __user *)arg, sizeof(value)))
+		return -EFAULT;
+
+	sync_timeline_signal(obj, value);
+
+	return 0;
+}
+
+static long sw_sync_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct sync_timeline *obj = file->private_data;
+
+	switch (cmd) {
+	case SW_SYNC_IOC_CREATE_FENCE:
+		return sw_sync_ioctl_create_fence(obj, arg);
+
+	case SW_SYNC_IOC_INC:
+		return sw_sync_ioctl_inc(obj, arg);
+
+	default:
+		return -ENOTTY;
+	}
 }
-EXPORT_SYMBOL(sw_sync_timeline_inc);
+
+const struct file_operations sw_sync_debugfs_fops = {
+	.open           = sw_sync_debugfs_open,
+	.release        = sw_sync_debugfs_release,
+	.unlocked_ioctl = sw_sync_ioctl,
+	.compat_ioctl	= sw_sync_ioctl,
+};
diff --git a/drivers/staging/android/sw_sync.h b/drivers/staging/android/sw_sync.h
deleted file mode 100644
index e18667bfb0ca..000000000000
--- a/drivers/staging/android/sw_sync.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * include/linux/sw_sync.h
- *
- * Copyright (C) 2012 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _LINUX_SW_SYNC_H
-#define _LINUX_SW_SYNC_H
-
-#include <linux/types.h>
-#include <linux/kconfig.h>
-#include "sync.h"
-#include "uapi/sw_sync.h"
-
-struct sw_sync_timeline {
-	struct	sync_timeline	obj;
-
-	u32			value;
-};
-
-struct sw_sync_pt {
-	struct fence		pt;
-
-	u32			value;
-};
-
-#if IS_ENABLED(CONFIG_SW_SYNC)
-struct sw_sync_timeline *sw_sync_timeline_create(const char *name);
-void sw_sync_timeline_inc(struct sw_sync_timeline *obj, u32 inc);
-
-struct fence *sw_sync_pt_create(struct sw_sync_timeline *obj, u32 value);
-#else
-static inline struct sw_sync_timeline *sw_sync_timeline_create(const char *name)
-{
-	return NULL;
-}
-
-static inline void sw_sync_timeline_inc(struct sw_sync_timeline *obj, u32 inc)
-{
-}
-
-static inline struct fence *sw_sync_pt_create(struct sw_sync_timeline *obj,
-					      u32 value)
-{
-	return NULL;
-}
-#endif /* IS_ENABLED(CONFIG_SW_SYNC) */
-
-#endif /* _LINUX_SW_SYNC_H */
diff --git a/drivers/staging/android/sync.c b/drivers/staging/android/sync.c
deleted file mode 100644
index 1d14c83c7f7c..000000000000
--- a/drivers/staging/android/sync.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * drivers/base/sync.c
- *
- * Copyright (C) 2012 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/debugfs.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/anon_inodes.h>
-
-#include "sync.h"
-
-#define CREATE_TRACE_POINTS
-#include "trace/sync.h"
-
-static const struct fence_ops android_fence_ops;
-
-struct sync_timeline *sync_timeline_create(const struct sync_timeline_ops *ops,
-					   int size, const char *name)
-{
-	struct sync_timeline *obj;
-
-	if (size < sizeof(struct sync_timeline))
-		return NULL;
-
-	obj = kzalloc(size, GFP_KERNEL);
-	if (!obj)
-		return NULL;
-
-	kref_init(&obj->kref);
-	obj->ops = ops;
-	obj->context = fence_context_alloc(1);
-	strlcpy(obj->name, name, sizeof(obj->name));
-
-	INIT_LIST_HEAD(&obj->child_list_head);
-	INIT_LIST_HEAD(&obj->active_list_head);
-	spin_lock_init(&obj->child_list_lock);
-
-	sync_timeline_debug_add(obj);
-
-	return obj;
-}
-EXPORT_SYMBOL(sync_timeline_create);
-
-static void sync_timeline_free(struct kref *kref)
-{
-	struct sync_timeline *obj =
-		container_of(kref, struct sync_timeline, kref);
-
-	sync_timeline_debug_remove(obj);
-
-	kfree(obj);
-}
-
-static void sync_timeline_get(struct sync_timeline *obj)
-{
-	kref_get(&obj->kref);
-}
-
-static void sync_timeline_put(struct sync_timeline *obj)
-{
-	kref_put(&obj->kref, sync_timeline_free);
-}
-
-void sync_timeline_destroy(struct sync_timeline *obj)
-{
-	obj->destroyed = true;
-	/*
-	 * Ensure timeline is marked as destroyed before
-	 * changing timeline's fences status.
-	 */
-	smp_wmb();
-
-	sync_timeline_put(obj);
-}
-EXPORT_SYMBOL(sync_timeline_destroy);
-
-void sync_timeline_signal(struct sync_timeline *obj)
-{
-	unsigned long flags;
-	struct fence *fence, *next;
-
-	trace_sync_timeline(obj);
-
-	spin_lock_irqsave(&obj->child_list_lock, flags);
-
-	list_for_each_entry_safe(fence, next, &obj->active_list_head,
-				 active_list) {
-		if (fence_is_signaled_locked(fence))
-			list_del_init(&fence->active_list);
-	}
-
-	spin_unlock_irqrestore(&obj->child_list_lock, flags);
-}
-EXPORT_SYMBOL(sync_timeline_signal);
-
-struct fence *sync_pt_create(struct sync_timeline *obj, int size)
-{
-	unsigned long flags;
-	struct fence *fence;
-
-	if (size < sizeof(*fence))
-		return NULL;
-
-	fence = kzalloc(size, GFP_KERNEL);
-	if (!fence)
-		return NULL;
-
-	spin_lock_irqsave(&obj->child_list_lock, flags);
-	sync_timeline_get(obj);
-	fence_init(fence, &android_fence_ops, &obj->child_list_lock,
-		   obj->context, ++obj->value);
-	list_add_tail(&fence->child_list, &obj->child_list_head);
-	INIT_LIST_HEAD(&fence->active_list);
-	spin_unlock_irqrestore(&obj->child_list_lock, flags);
-	return fence;
-}
-EXPORT_SYMBOL(sync_pt_create);
-
-static const char *android_fence_get_driver_name(struct fence *fence)
-{
-	struct sync_timeline *parent = fence_parent(fence);
-
-	return parent->ops->driver_name;
-}
-
-static const char *android_fence_get_timeline_name(struct fence *fence)
-{
-	struct sync_timeline *parent = fence_parent(fence);
-
-	return parent->name;
-}
-
-static void android_fence_release(struct fence *fence)
-{
-	struct sync_timeline *parent = fence_parent(fence);
-	unsigned long flags;
-
-	spin_lock_irqsave(fence->lock, flags);
-	list_del(&fence->child_list);
-	if (WARN_ON_ONCE(!list_empty(&fence->active_list)))
-		list_del(&fence->active_list);
-	spin_unlock_irqrestore(fence->lock, flags);
-
-	sync_timeline_put(parent);
-	fence_free(fence);
-}
-
-static bool android_fence_signaled(struct fence *fence)
-{
-	struct sync_timeline *parent = fence_parent(fence);
-	int ret;
-
-	ret = parent->ops->has_signaled(fence);
-	if (ret < 0)
-		fence->status = ret;
-	return ret;
-}
-
-static bool android_fence_enable_signaling(struct fence *fence)
-{
-	struct sync_timeline *parent = fence_parent(fence);
-
-	if (android_fence_signaled(fence))
-		return false;
-
-	list_add_tail(&fence->active_list, &parent->active_list_head);
-	return true;
-}
-
-static void android_fence_value_str(struct fence *fence,
-				    char *str, int size)
-{
-	struct sync_timeline *parent = fence_parent(fence);
-
-	if (!parent->ops->fence_value_str) {
-		if (size)
-			*str = 0;
-		return;
-	}
-	parent->ops->fence_value_str(fence, str, size);
-}
-
-static void android_fence_timeline_value_str(struct fence *fence,
-					     char *str, int size)
-{
-	struct sync_timeline *parent = fence_parent(fence);
-
-	if (!parent->ops->timeline_value_str) {
-		if (size)
-			*str = 0;
-		return;
-	}
-	parent->ops->timeline_value_str(parent, str, size);
-}
-
-static const struct fence_ops android_fence_ops = {
-	.get_driver_name = android_fence_get_driver_name,
-	.get_timeline_name = android_fence_get_timeline_name,
-	.enable_signaling = android_fence_enable_signaling,
-	.signaled = android_fence_signaled,
-	.wait = fence_default_wait,
-	.release = android_fence_release,
-	.fence_value_str = android_fence_value_str,
-	.timeline_value_str = android_fence_timeline_value_str,
-};
diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h
deleted file mode 100644
index b56885c14839..000000000000
--- a/drivers/staging/android/sync.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * include/linux/sync.h
- *
- * Copyright (C) 2012 Google, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _LINUX_SYNC_H
-#define _LINUX_SYNC_H
-
-#include <linux/types.h>
-#include <linux/kref.h>
-#include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/fence.h>
-
-#include <linux/sync_file.h>
-#include <uapi/linux/sync_file.h>
-
-struct sync_timeline;
-
-/**
- * struct sync_timeline_ops - sync object implementation ops
- * @driver_name:	name of the implementation
- * @has_signaled:	returns:
- *			  1 if pt has signaled
- *			  0 if pt has not signaled
- *			 <0 on error
- * @timeline_value_str: fill str with the value of the sync_timeline's counter
- * @fence_value_str:	fill str with the value of the fence
- */
-struct sync_timeline_ops {
-	const char *driver_name;
-
-	/* required */
-	int (*has_signaled)(struct fence *fence);
-
-	/* optional */
-	void (*timeline_value_str)(struct sync_timeline *timeline, char *str,
-				   int size);
-
-	/* optional */
-	void (*fence_value_str)(struct fence *fence, char *str, int size);
-};
-
-/**
- * struct sync_timeline - sync object
- * @kref:		reference count on fence.
- * @ops:		ops that define the implementation of the sync_timeline
- * @name:		name of the sync_timeline. Useful for debugging
- * @destroyed:		set when sync_timeline is destroyed
- * @child_list_head:	list of children sync_pts for this sync_timeline
- * @child_list_lock:	lock protecting @child_list_head, destroyed, and
- *			fence.status
- * @active_list_head:	list of active (unsignaled/errored) sync_pts
- * @sync_timeline_list:	membership in global sync_timeline_list
- */
-struct sync_timeline {
-	struct kref		kref;
-	const struct sync_timeline_ops	*ops;
-	char			name[32];
-
-	/* protected by child_list_lock */
-	bool			destroyed;
-	int			context, value;
-
-	struct list_head	child_list_head;
-	spinlock_t		child_list_lock;
-
-	struct list_head	active_list_head;
-
-#ifdef CONFIG_DEBUG_FS
-	struct list_head	sync_timeline_list;
-#endif
-};
-
-static inline struct sync_timeline *fence_parent(struct fence *fence)
-{
-	return container_of(fence->lock, struct sync_timeline,
-			    child_list_lock);
-}
-
-/*
- * API for sync_timeline implementers
- */
-
-/**
- * sync_timeline_create() - creates a sync object
- * @ops:	specifies the implementation ops for the object
- * @size:	size to allocate for this obj
- * @name:	sync_timeline name
- *
- * Creates a new sync_timeline which will use the implementation specified by
- * @ops.  @size bytes will be allocated allowing for implementation specific
- * data to be kept after the generic sync_timeline struct. Returns the
- * sync_timeline object or NULL in case of error.
- */
-struct sync_timeline *sync_timeline_create(const struct sync_timeline_ops *ops,
-					   int size, const char *name);
-
-/**
- * sync_timeline_destroy() - destroys a sync object
- * @obj:	sync_timeline to destroy
- *
- * A sync implementation should call this when the @obj is going away
- * (i.e. module unload.)  @obj won't actually be freed until all its children
- * fences are freed.
- */
-void sync_timeline_destroy(struct sync_timeline *obj);
-
-/**
- * sync_timeline_signal() - signal a status change on a sync_timeline
- * @obj:	sync_timeline to signal
- *
- * A sync implementation should call this any time one of it's fences
- * has signaled or has an error condition.
- */
-void sync_timeline_signal(struct sync_timeline *obj);
-
-/**
- * sync_pt_create() - creates a sync pt
- * @parent:	fence's parent sync_timeline
- * @size:	size to allocate for this pt
- *
- * Creates a new fence as a child of @parent.  @size bytes will be
- * allocated allowing for implementation specific data to be kept after
- * the generic sync_timeline struct. Returns the fence object or
- * NULL in case of error.
- */
-struct fence *sync_pt_create(struct sync_timeline *parent, int size);
-
-#ifdef CONFIG_DEBUG_FS
-
-void sync_timeline_debug_add(struct sync_timeline *obj);
-void sync_timeline_debug_remove(struct sync_timeline *obj);
-void sync_file_debug_add(struct sync_file *fence);
-void sync_file_debug_remove(struct sync_file *fence);
-void sync_dump(void);
-
-#else
-# define sync_timeline_debug_add(obj)
-# define sync_timeline_debug_remove(obj)
-# define sync_file_debug_add(fence)
-# define sync_file_debug_remove(fence)
-# define sync_dump()
-#endif
-
-#endif /* _LINUX_SYNC_H */
diff --git a/drivers/staging/android/sync_debug.c b/drivers/staging/android/sync_debug.c
index 5f57499c98bf..4c5a85595a85 100644
--- a/drivers/staging/android/sync_debug.c
+++ b/drivers/staging/android/sync_debug.c
@@ -15,21 +15,7 @@
  */
 
 #include <linux/debugfs.h>
-#include <linux/export.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/poll.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/anon_inodes.h>
-#include <linux/time64.h>
-#include <linux/sync_file.h>
-#include "sw_sync.h"
-
-#ifdef CONFIG_DEBUG_FS
+#include "sync_debug.h"
 
 static struct dentry *dbgfs;
 
@@ -105,7 +91,7 @@ static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show)
 		seq_printf(s, "@%lld.%09ld", (s64)ts64.tv_sec, ts64.tv_nsec);
 	}
 
-	if ((!fence || fence->ops->timeline_value_str) &&
+	if (fence->ops->timeline_value_str &&
 		fence->ops->fence_value_str) {
 		char value[64];
 		bool success;
@@ -113,10 +99,9 @@ static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show)
 		fence->ops->fence_value_str(fence, value, sizeof(value));
 		success = strlen(value);
 
-		if (success)
+		if (success) {
 			seq_printf(s, ": %s", value);
 
-		if (success && fence) {
 			fence->ops->timeline_value_str(fence, value,
 						       sizeof(value));
 
@@ -133,22 +118,13 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
 	struct list_head *pos;
 	unsigned long flags;
 
-	seq_printf(s, "%s %s", obj->name, obj->ops->driver_name);
-
-	if (obj->ops->timeline_value_str) {
-		char value[64];
-
-		obj->ops->timeline_value_str(obj, value, sizeof(value));
-		seq_printf(s, ": %s", value);
-	}
-
-	seq_puts(s, "\n");
+	seq_printf(s, "%s: %d\n", obj->name, obj->value);
 
 	spin_lock_irqsave(&obj->child_list_lock, flags);
 	list_for_each(pos, &obj->child_list_head) {
-		struct fence *fence =
-			container_of(pos, struct fence, child_list);
-		sync_print_fence(s, fence, false);
+		struct sync_pt *pt =
+			container_of(pos, struct sync_pt, child_list);
+		sync_print_fence(s, &pt->base, false);
 	}
 	spin_unlock_irqrestore(&obj->child_list_lock, flags);
 }
@@ -209,126 +185,19 @@ static const struct file_operations sync_info_debugfs_fops = {
 	.release        = single_release,
 };
 
-/*
- * *WARNING*
- *
- * improper use of this can result in deadlocking kernel drivers from userspace.
- */
-
-/* opening sw_sync create a new sync obj */
-static int sw_sync_debugfs_open(struct inode *inode, struct file *file)
-{
-	struct sw_sync_timeline *obj;
-	char task_comm[TASK_COMM_LEN];
-
-	get_task_comm(task_comm, current);
-
-	obj = sw_sync_timeline_create(task_comm);
-	if (!obj)
-		return -ENOMEM;
-
-	file->private_data = obj;
-
-	return 0;
-}
-
-static int sw_sync_debugfs_release(struct inode *inode, struct file *file)
-{
-	struct sw_sync_timeline *obj = file->private_data;
-
-	sync_timeline_destroy(&obj->obj);
-	return 0;
-}
-
-static long sw_sync_ioctl_create_fence(struct sw_sync_timeline *obj,
-				       unsigned long arg)
-{
-	int fd = get_unused_fd_flags(O_CLOEXEC);
-	int err;
-	struct fence *fence;
-	struct sync_file *sync_file;
-	struct sw_sync_create_fence_data data;
-
-	if (fd < 0)
-		return fd;
-
-	if (copy_from_user(&data, (void __user *)arg, sizeof(data))) {
-		err = -EFAULT;
-		goto err;
-	}
-
-	fence = sw_sync_pt_create(obj, data.value);
-	if (!fence) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	sync_file = sync_file_create(fence);
-	if (!sync_file) {
-		fence_put(fence);
-		err = -ENOMEM;
-		goto err;
-	}
-
-	data.fence = fd;
-	if (copy_to_user((void __user *)arg, &data, sizeof(data))) {
-		fput(sync_file->file);
-		err = -EFAULT;
-		goto err;
-	}
-
-	fd_install(fd, sync_file->file);
-
-	return 0;
-
-err:
-	put_unused_fd(fd);
-	return err;
-}
-
-static long sw_sync_ioctl_inc(struct sw_sync_timeline *obj, unsigned long arg)
-{
-	u32 value;
-
-	if (copy_from_user(&value, (void __user *)arg, sizeof(value)))
-		return -EFAULT;
-
-	sw_sync_timeline_inc(obj, value);
-
-	return 0;
-}
-
-static long sw_sync_ioctl(struct file *file, unsigned int cmd,
-			  unsigned long arg)
-{
-	struct sw_sync_timeline *obj = file->private_data;
-
-	switch (cmd) {
-	case SW_SYNC_IOC_CREATE_FENCE:
-		return sw_sync_ioctl_create_fence(obj, arg);
-
-	case SW_SYNC_IOC_INC:
-		return sw_sync_ioctl_inc(obj, arg);
-
-	default:
-		return -ENOTTY;
-	}
-}
-
-static const struct file_operations sw_sync_debugfs_fops = {
-	.open           = sw_sync_debugfs_open,
-	.release        = sw_sync_debugfs_release,
-	.unlocked_ioctl = sw_sync_ioctl,
-	.compat_ioctl = sw_sync_ioctl,
-};
-
 static __init int sync_debugfs_init(void)
 {
 	dbgfs = debugfs_create_dir("sync", NULL);
 
-	debugfs_create_file("info", 0444, dbgfs, NULL, &sync_info_debugfs_fops);
-	debugfs_create_file("sw_sync", 0644, dbgfs, NULL,
-			    &sw_sync_debugfs_fops);
+	/*
+	 * The debugfs files won't ever get removed and thus, there is
+	 * no need to protect it against removal races. The use of
+	 * debugfs_create_file_unsafe() is actually safe here.
+	 */
+	debugfs_create_file_unsafe("info", 0444, dbgfs, NULL,
+				   &sync_info_debugfs_fops);
+	debugfs_create_file_unsafe("sw_sync", 0644, dbgfs, NULL,
+				   &sw_sync_debugfs_fops);
 
 	return 0;
 }
@@ -359,5 +228,3 @@ void sync_dump(void)
 		}
 	}
 }
-
-#endif
diff --git a/drivers/staging/android/sync_debug.h b/drivers/staging/android/sync_debug.h
new file mode 100644
index 000000000000..425ebc5c32aa
--- /dev/null
+++ b/drivers/staging/android/sync_debug.h
@@ -0,0 +1,83 @@
+/*
+ * include/linux/sync.h
+ *
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_SYNC_H
+#define _LINUX_SYNC_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/fence.h>
+
+#include <linux/sync_file.h>
+#include <uapi/linux/sync_file.h>
+
+/**
+ * struct sync_timeline - sync object
+ * @kref:		reference count on fence.
+ * @name:		name of the sync_timeline. Useful for debugging
+ * @child_list_head:	list of children sync_pts for this sync_timeline
+ * @child_list_lock:	lock protecting @child_list_head and fence.status
+ * @active_list_head:	list of active (unsignaled/errored) sync_pts
+ * @sync_timeline_list:	membership in global sync_timeline_list
+ */
+struct sync_timeline {
+	struct kref		kref;
+	char			name[32];
+
+	/* protected by child_list_lock */
+	int			context, value;
+
+	struct list_head	child_list_head;
+	spinlock_t		child_list_lock;
+
+	struct list_head	active_list_head;
+
+	struct list_head	sync_timeline_list;
+};
+
+static inline struct sync_timeline *fence_parent(struct fence *fence)
+{
+	return container_of(fence->lock, struct sync_timeline,
+			    child_list_lock);
+}
+
+/**
+ * struct sync_pt - sync_pt object
+ * @base: base fence object
+ * @child_list: sync timeline child's list
+ * @active_list: sync timeline active child's list
+ */
+struct sync_pt {
+	struct fence base;
+	struct list_head child_list;
+	struct list_head active_list;
+};
+
+#ifdef CONFIG_SW_SYNC
+
+extern const struct file_operations sw_sync_debugfs_fops;
+
+void sync_timeline_debug_add(struct sync_timeline *obj);
+void sync_timeline_debug_remove(struct sync_timeline *obj);
+void sync_file_debug_add(struct sync_file *fence);
+void sync_file_debug_remove(struct sync_file *fence);
+void sync_dump(void);
+
+#else
+# define sync_timeline_debug_add(obj)
+# define sync_timeline_debug_remove(obj)
+# define sync_file_debug_add(fence)
+# define sync_file_debug_remove(fence)
+# define sync_dump()
+#endif
+
+#endif /* _LINUX_SYNC_H */
diff --git a/drivers/staging/android/trace/sync.h b/drivers/staging/android/trace/sync.h
index a0f80f41677e..6b5ce9640ddd 100644
--- a/drivers/staging/android/trace/sync.h
+++ b/drivers/staging/android/trace/sync.h
@@ -5,7 +5,7 @@
 #if !defined(_TRACE_SYNC_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SYNC_H
 
-#include "../sync.h"
+#include "../sync_debug.h"
 #include <linux/tracepoint.h>
 
 TRACE_EVENT(sync_timeline,
@@ -15,21 +15,15 @@ TRACE_EVENT(sync_timeline,
 
 	TP_STRUCT__entry(
 			__string(name, timeline->name)
-			__array(char, value, 32)
+			__field(u32, value)
 	),
 
 	TP_fast_assign(
 			__assign_str(name, timeline->name);
-			if (timeline->ops->timeline_value_str) {
-				timeline->ops->timeline_value_str(timeline,
-							__entry->value,
-							sizeof(__entry->value));
-			} else {
-				__entry->value[0] = '\0';
-			}
+			__entry->value = timeline->value;
 	),
 
-	TP_printk("name=%s value=%s", __get_str(name), __entry->value)
+	TP_printk("name=%s value=%d", __get_str(name), __entry->value)
 );
 
 #endif /* if !defined(_TRACE_SYNC_H) || defined(TRACE_HEADER_MULTI_READ) */
diff --git a/drivers/staging/android/uapi/sw_sync.h b/drivers/staging/android/uapi/sw_sync.h
deleted file mode 100644
index 9b5d4869505c..000000000000
--- a/drivers/staging/android/uapi/sw_sync.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2012 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _UAPI_LINUX_SW_SYNC_H
-#define _UAPI_LINUX_SW_SYNC_H
-
-#include <linux/types.h>
-
-struct sw_sync_create_fence_data {
-	__u32	value;
-	char	name[32];
-	__s32	fence; /* fd of new fence */
-};
-
-#define SW_SYNC_IOC_MAGIC	'W'
-
-#define SW_SYNC_IOC_CREATE_FENCE	_IOWR(SW_SYNC_IOC_MAGIC, 0,\
-		struct sw_sync_create_fence_data)
-#define SW_SYNC_IOC_INC			_IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
-
-#endif /* _UAPI_LINUX_SW_SYNC_H */
diff --git a/drivers/staging/comedi/comedi.h b/drivers/staging/comedi/comedi.h
index ad5297f6d418..08fb26b51a5f 100644
--- a/drivers/staging/comedi/comedi.h
+++ b/drivers/staging/comedi/comedi.h
@@ -779,7 +779,7 @@ struct comedi_subdinfo {
 	unsigned int flags;
 	unsigned int range_type;
 	unsigned int settling_time_0;
-	unsigned insn_bits_support;
+	unsigned int insn_bits_support;
 	unsigned int unused[8];
 };
 
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 629080f39db0..1999eed4f4c5 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -312,8 +312,8 @@ static void comedi_file_reset(struct file *file)
 	}
 	cfp->last_attached = dev->attached;
 	cfp->last_detach_count = dev->detach_count;
-	ACCESS_ONCE(cfp->read_subdev) = read_s;
-	ACCESS_ONCE(cfp->write_subdev) = write_s;
+	WRITE_ONCE(cfp->read_subdev, read_s);
+	WRITE_ONCE(cfp->write_subdev, write_s);
 }
 
 static void comedi_file_check(struct file *file)
@@ -331,7 +331,7 @@ static struct comedi_subdevice *comedi_file_read_subdevice(struct file *file)
 	struct comedi_file *cfp = file->private_data;
 
 	comedi_file_check(file);
-	return ACCESS_ONCE(cfp->read_subdev);
+	return READ_ONCE(cfp->read_subdev);
 }
 
 static struct comedi_subdevice *comedi_file_write_subdevice(struct file *file)
@@ -339,7 +339,7 @@ static struct comedi_subdevice *comedi_file_write_subdevice(struct file *file)
 	struct comedi_file *cfp = file->private_data;
 
 	comedi_file_check(file);
-	return ACCESS_ONCE(cfp->write_subdev);
+	return READ_ONCE(cfp->write_subdev);
 }
 
 static int resize_async_buffer(struct comedi_device *dev,
@@ -1256,16 +1256,17 @@ static int parse_insn(struct comedi_device *dev, struct comedi_insn *insn,
 		switch (insn->insn) {
 		case INSN_GTOD:
 			{
-				struct timeval tv;
+				struct timespec64 tv;
 
 				if (insn->n != 2) {
 					ret = -EINVAL;
 					break;
 				}
 
-				do_gettimeofday(&tv);
-				data[0] = tv.tv_sec;
-				data[1] = tv.tv_usec;
+				ktime_get_real_ts64(&tv);
+				/* unsigned data safe until 2106 */
+				data[0] = (unsigned int)tv.tv_sec;
+				data[1] = tv.tv_nsec / NSEC_PER_USEC;
 				ret = 2;
 
 				break;
@@ -1992,7 +1993,7 @@ static int do_setrsubd_ioctl(struct comedi_device *dev, unsigned long arg,
 	    !(s_old->async->cmd.flags & CMDF_WRITE))
 		return -EBUSY;
 
-	ACCESS_ONCE(cfp->read_subdev) = s_new;
+	WRITE_ONCE(cfp->read_subdev, s_new);
 	return 0;
 }
 
@@ -2034,7 +2035,7 @@ static int do_setwsubd_ioctl(struct comedi_device *dev, unsigned long arg,
 	    (s_old->async->cmd.flags & CMDF_WRITE))
 		return -EBUSY;
 
-	ACCESS_ONCE(cfp->write_subdev) = s_new;
+	WRITE_ONCE(cfp->write_subdev, s_new);
 	return 0;
 }
 
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1564.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1564.c
deleted file mode 100644
index f0c0d58383ca..000000000000
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1564.c
+++ /dev/null
@@ -1,187 +0,0 @@
-static int apci1564_timer_insn_config(struct comedi_device *dev,
-				      struct comedi_subdevice *s,
-				      struct comedi_insn *insn,
-				      unsigned int *data)
-{
-	struct apci1564_private *devpriv = dev->private;
-	unsigned int ctrl;
-
-	devpriv->tsk_current = current;
-
-	/* Stop the timer */
-	ctrl = inl(devpriv->timer + ADDI_TCW_CTRL_REG);
-	ctrl &= ~(ADDI_TCW_CTRL_GATE | ADDI_TCW_CTRL_TRIG |
-		  ADDI_TCW_CTRL_ENA);
-	outl(ctrl, devpriv->timer + ADDI_TCW_CTRL_REG);
-
-	if (data[1] == 1) {
-		/* Enable timer int & disable all the other int sources */
-		outl(ADDI_TCW_CTRL_IRQ_ENA,
-		     devpriv->timer + ADDI_TCW_CTRL_REG);
-		outl(0x0, dev->iobase + APCI1564_DI_IRQ_REG);
-		outl(0x0, dev->iobase + APCI1564_DO_IRQ_REG);
-		outl(0x0, dev->iobase + APCI1564_WDOG_IRQ_REG);
-		if (devpriv->counters) {
-			unsigned long iobase;
-
-			iobase = devpriv->counters + ADDI_TCW_IRQ_REG;
-			outl(0x0, iobase + APCI1564_COUNTER(0));
-			outl(0x0, iobase + APCI1564_COUNTER(1));
-			outl(0x0, iobase + APCI1564_COUNTER(2));
-		}
-	} else {
-		/* disable Timer interrupt */
-		outl(0x0, devpriv->timer + ADDI_TCW_CTRL_REG);
-	}
-
-	/* Loading Timebase */
-	outl(data[2], devpriv->timer + ADDI_TCW_TIMEBASE_REG);
-
-	/* Loading the Reload value */
-	outl(data[3], devpriv->timer + ADDI_TCW_RELOAD_REG);
-
-	ctrl = inl(devpriv->timer + ADDI_TCW_CTRL_REG);
-	ctrl &= ~(ADDI_TCW_CTRL_CNTR_ENA | ADDI_TCW_CTRL_MODE_MASK |
-		  ADDI_TCW_CTRL_GATE | ADDI_TCW_CTRL_TRIG |
-		  ADDI_TCW_CTRL_TIMER_ENA | ADDI_TCW_CTRL_RESET_ENA |
-		  ADDI_TCW_CTRL_WARN_ENA | ADDI_TCW_CTRL_ENA);
-	ctrl |= ADDI_TCW_CTRL_MODE(2) | ADDI_TCW_CTRL_TIMER_ENA;
-	outl(ctrl, devpriv->timer + ADDI_TCW_CTRL_REG);
-
-	return insn->n;
-}
-
-static int apci1564_timer_insn_write(struct comedi_device *dev,
-				     struct comedi_subdevice *s,
-				     struct comedi_insn *insn,
-				     unsigned int *data)
-{
-	struct apci1564_private *devpriv = dev->private;
-	unsigned int ctrl;
-
-	ctrl = inl(devpriv->timer + ADDI_TCW_CTRL_REG);
-	ctrl &= ~(ADDI_TCW_CTRL_GATE | ADDI_TCW_CTRL_TRIG);
-	switch (data[1]) {
-	case 0:	/* Stop The Timer */
-		ctrl &= ~ADDI_TCW_CTRL_ENA;
-		break;
-	case 1:	/* Enable the Timer */
-		ctrl |= ADDI_TCW_CTRL_ENA;
-		break;
-	}
-	outl(ctrl, devpriv->timer + ADDI_TCW_CTRL_REG);
-
-	return insn->n;
-}
-
-static int apci1564_timer_insn_read(struct comedi_device *dev,
-				    struct comedi_subdevice *s,
-				    struct comedi_insn *insn,
-				    unsigned int *data)
-{
-	struct apci1564_private *devpriv = dev->private;
-
-	/* Stores the status of the Timer */
-	data[0] = inl(devpriv->timer + ADDI_TCW_STATUS_REG) &
-		  ADDI_TCW_STATUS_OVERFLOW;
-
-	/* Stores the Actual value of the Timer */
-	data[1] = inl(devpriv->timer + ADDI_TCW_VAL_REG);
-
-	return insn->n;
-}
-
-static int apci1564_counter_insn_config(struct comedi_device *dev,
-					struct comedi_subdevice *s,
-					struct comedi_insn *insn,
-					unsigned int *data)
-{
-	struct apci1564_private *devpriv = dev->private;
-	unsigned int chan = CR_CHAN(insn->chanspec);
-	unsigned long iobase = devpriv->counters + APCI1564_COUNTER(chan);
-	unsigned int ctrl;
-
-	devpriv->tsk_current = current;
-
-	/* Stop The Timer */
-	ctrl = inl(iobase + ADDI_TCW_CTRL_REG);
-	ctrl &= ~(ADDI_TCW_CTRL_GATE | ADDI_TCW_CTRL_TRIG |
-		  ADDI_TCW_CTRL_ENA);
-	outl(ctrl, iobase + ADDI_TCW_CTRL_REG);
-
-	/* Set the reload value */
-	outl(data[3], iobase + ADDI_TCW_RELOAD_REG);
-
-	/* Set the mode */
-	ctrl &= ~(ADDI_TCW_CTRL_EXT_CLK_MASK | ADDI_TCW_CTRL_MODE_MASK |
-		  ADDI_TCW_CTRL_TIMER_ENA | ADDI_TCW_CTRL_RESET_ENA |
-		  ADDI_TCW_CTRL_WARN_ENA);
-	ctrl |= ADDI_TCW_CTRL_CNTR_ENA | ADDI_TCW_CTRL_MODE(data[4]);
-	outl(ctrl, iobase + ADDI_TCW_CTRL_REG);
-
-	/* Enable or Disable Interrupt */
-	if (data[1])
-		ctrl |= ADDI_TCW_CTRL_IRQ_ENA;
-	else
-		ctrl &= ~ADDI_TCW_CTRL_IRQ_ENA;
-	outl(ctrl, iobase + ADDI_TCW_CTRL_REG);
-
-	/* Set the Up/Down selection */
-	if (data[6])
-		ctrl |= ADDI_TCW_CTRL_CNT_UP;
-	else
-		ctrl &= ~ADDI_TCW_CTRL_CNT_UP;
-	outl(ctrl, iobase + ADDI_TCW_CTRL_REG);
-
-	return insn->n;
-}
-
-static int apci1564_counter_insn_write(struct comedi_device *dev,
-				       struct comedi_subdevice *s,
-				       struct comedi_insn *insn,
-				       unsigned int *data)
-{
-	struct apci1564_private *devpriv = dev->private;
-	unsigned int chan = CR_CHAN(insn->chanspec);
-	unsigned long iobase = devpriv->counters + APCI1564_COUNTER(chan);
-	unsigned int ctrl;
-
-	ctrl = inl(iobase + ADDI_TCW_CTRL_REG);
-	ctrl &= ~(ADDI_TCW_CTRL_GATE | ADDI_TCW_CTRL_TRIG);
-	switch (data[1]) {
-	case 0:	/* Stops the Counter subdevice */
-		ctrl = 0;
-		break;
-	case 1:	/* Start the Counter subdevice */
-		ctrl |= ADDI_TCW_CTRL_ENA;
-		break;
-	case 2:	/* Clears the Counter subdevice */
-		ctrl |= ADDI_TCW_CTRL_GATE;
-		break;
-	}
-	outl(ctrl, iobase + ADDI_TCW_CTRL_REG);
-
-	return insn->n;
-}
-
-static int apci1564_counter_insn_read(struct comedi_device *dev,
-				      struct comedi_subdevice *s,
-				      struct comedi_insn *insn,
-				      unsigned int *data)
-{
-	struct apci1564_private *devpriv = dev->private;
-	unsigned int chan = CR_CHAN(insn->chanspec);
-	unsigned long iobase = devpriv->counters + APCI1564_COUNTER(chan);
-	unsigned int status;
-
-	/* Read the Counter Actual Value. */
-	data[0] = inl(iobase + ADDI_TCW_VAL_REG);
-
-	status = inl(iobase + ADDI_TCW_STATUS_REG);
-	data[1] = (status & ADDI_TCW_STATUS_SOFT_TRIG) ? 1 : 0;
-	data[2] = (status & ADDI_TCW_STATUS_HARDWARE_TRIG) ? 1 : 0;
-	data[3] = (status & ADDI_TCW_STATUS_SOFT_CLR) ? 1 : 0;
-	data[4] = (status & ADDI_TCW_STATUS_OVERFLOW) ? 1 : 0;
-
-	return insn->n;
-}
diff --git a/drivers/staging/comedi/drivers/addi_apci_1564.c b/drivers/staging/comedi/drivers/addi_apci_1564.c
index f1ccfbd4c578..9bfb79c2e5c8 100644
--- a/drivers/staging/comedi/drivers/addi_apci_1564.c
+++ b/drivers/staging/comedi/drivers/addi_apci_1564.c
@@ -21,9 +21,62 @@
  * details.
  */
 
+/*
+ * Driver: addi_apci_1564
+ * Description: ADDI-DATA APCI-1564 Digital I/O board
+ * Devices: [ADDI-DATA] APCI-1564 (addi_apci_1564)
+ * Author: H Hartley Sweeten <hsweeten@visionengravers.com>
+ * Updated: Thu, 02 Jun 2016 13:12:46 -0700
+ * Status: untested
+ *
+ * Configuration Options: not applicable, uses comedi PCI auto config
+ *
+ * This board has the following features:
+ *   - 32 optically isolated digital inputs (24V), 16 of which can
+ *     generate change-of-state (COS) interrupts (channels 4 to 19)
+ *   - 32 optically isolated digital outputs (10V to 36V)
+ *   - 1 8-bit watchdog for resetting the outputs
+ *   - 1 12-bit timer
+ *   - 3 32-bit counters
+ *   - 2 diagnostic inputs
+ *
+ * The COS, timer, and counter subdevices all use the dev->read_subdev to
+ * return the interrupt status. The sample data is updated and returned when
+ * any of these subdevices generate an interrupt. The sample data format is:
+ *
+ *    Bit   Description
+ *   -----  ------------------------------------------
+ *    31    COS interrupt
+ *    30    timer interrupt
+ *    29    counter 2 interrupt
+ *    28    counter 1 interrupt
+ *    27    counter 0 interrupt
+ *   26:20  not used
+ *   19:4   COS digital input state (channels 19 to 4)
+ *    3:0   not used
+ *
+ * The COS interrupts must be configured using an INSN_CONFIG_DIGITAL_TRIG
+ * instruction before they can be enabled by an async command. The COS
+ * interrupts will stay active until canceled.
+ *
+ * The timer subdevice does not use an async command. All control is handled
+ * by the (*insn_config).
+ *
+ * FIXME: The format of the ADDI_TCW_TIMEBASE_REG is not descibed in the
+ * datasheet I have. The INSN_CONFIG_SET_CLOCK_SRC currently just writes
+ * the raw data[1] to this register along with the raw data[2] value to the
+ * ADDI_TCW_RELOAD_REG. If anyone tests this and can determine the actual
+ * timebase/reload operation please let me know.
+ *
+ * The counter subdevice also does not use an async command. All control is
+ * handled by the (*insn_config).
+ *
+ * FIXME: The operation of the counters is not really described in the
+ * datasheet I have. The (*insn_config) needs more work.
+ */
+
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
 
 #include "../comedi_pci.h"
 #include "addi_tcw.h"
@@ -77,6 +130,7 @@
 #define APCI1564_DI_REG				0x00
 #define APCI1564_DI_INT_MODE1_REG		0x04
 #define APCI1564_DI_INT_MODE2_REG		0x08
+#define APCI1564_DI_INT_MODE_MASK		0x000ffff0 /* chans [19:4] */
 #define APCI1564_DI_INT_STATUS_REG		0x0c
 #define APCI1564_DI_IRQ_REG			0x10
 #define APCI1564_DI_IRQ_ENA			BIT(2)
@@ -90,14 +144,7 @@
 #define APCI1564_DO_INT_STATUS_VCC		BIT(0)
 #define APCI1564_DO_IRQ_REG			0x20
 #define APCI1564_DO_IRQ_INTR			BIT(0)
-#define APCI1564_WDOG_REG			0x24
-#define APCI1564_WDOG_RELOAD_REG		0x28
-#define APCI1564_WDOG_TIMEBASE_REG		0x2c
-#define APCI1564_WDOG_CTRL_REG			0x30
-#define APCI1564_WDOG_STATUS_REG		0x34
-#define APCI1564_WDOG_IRQ_REG			0x38
-#define APCI1564_WDOG_WARN_TIMEVAL_REG		0x3c
-#define APCI1564_WDOG_WARN_TIMEBASE_REG		0x40
+#define APCI1564_WDOG_IOBASE			0x24
 
 /*
  * devpriv->timer Register Map (see addi_tcw.h for register/bit defines)
@@ -111,18 +158,24 @@
  */
 #define APCI1564_COUNTER(x)			((x) * 0x20)
 
+/*
+ * The dev->read_subdev is used to return the interrupt events along with
+ * the state of the interrupt capable inputs.
+ */
+#define APCI1564_EVENT_COS			BIT(31)
+#define APCI1564_EVENT_TIMER			BIT(30)
+#define APCI1564_EVENT_COUNTER(x)		BIT(27 + (x)) /* counter 0-2 */
+#define APCI1564_EVENT_MASK			0xfff0000f /* all but [19:4] */
+
 struct apci1564_private {
 	unsigned long eeprom;	/* base address of EEPROM register */
 	unsigned long timer;	/* base address of 12-bit timer */
 	unsigned long counters;	/* base address of 32-bit counters */
-	unsigned int mode1;	/* riding-edge/high level channels */
+	unsigned int mode1;	/* rising-edge/high level channels */
 	unsigned int mode2;	/* falling-edge/low level channels */
 	unsigned int ctrl;	/* interrupt mode OR (edge) . AND (level) */
-	struct task_struct *tsk_current;
 };
 
-#include "addi-data/hwdrv_apci1564.c"
-
 static int apci1564_reset(struct comedi_device *dev)
 {
 	struct apci1564_private *devpriv = dev->private;
@@ -138,7 +191,7 @@ static int apci1564_reset(struct comedi_device *dev)
 	outl(0x0, dev->iobase + APCI1564_DO_INT_CTRL_REG);
 
 	/* Reset the watchdog registers */
-	addi_watchdog_reset(dev->iobase + APCI1564_WDOG_REG);
+	addi_watchdog_reset(dev->iobase + APCI1564_WDOG_IOBASE);
 
 	/* Reset the timer registers */
 	outl(0x0, devpriv->timer + ADDI_TCW_CTRL_REG);
@@ -165,55 +218,54 @@ static irqreturn_t apci1564_interrupt(int irq, void *d)
 	unsigned int ctrl;
 	unsigned int chan;
 
+	s->state &= ~APCI1564_EVENT_MASK;
+
 	status = inl(dev->iobase + APCI1564_DI_IRQ_REG);
 	if (status & APCI1564_DI_IRQ_ENA) {
-		/* disable the interrupt */
+		/* get the COS interrupt state and set the event flag */
+		s->state = inl(dev->iobase + APCI1564_DI_INT_STATUS_REG);
+		s->state &= APCI1564_DI_INT_MODE_MASK;
+		s->state |= APCI1564_EVENT_COS;
+
+		/* clear the interrupt */
 		outl(status & ~APCI1564_DI_IRQ_ENA,
 		     dev->iobase + APCI1564_DI_IRQ_REG);
-
-		s->state = inl(dev->iobase + APCI1564_DI_INT_STATUS_REG) &
-			   0xffff;
-		comedi_buf_write_samples(s, &s->state, 1);
-		comedi_handle_events(dev, s);
-
-		/* enable the interrupt */
 		outl(status, dev->iobase + APCI1564_DI_IRQ_REG);
 	}
 
 	status = inl(devpriv->timer + ADDI_TCW_IRQ_REG);
-	if (status & 0x01) {
-		/*  Disable Timer Interrupt */
+	if (status & ADDI_TCW_IRQ) {
+		s->state |= APCI1564_EVENT_TIMER;
+
+		/* clear the interrupt */
 		ctrl = inl(devpriv->timer + ADDI_TCW_CTRL_REG);
 		outl(0x0, devpriv->timer + ADDI_TCW_CTRL_REG);
-
-		/* Send a signal to from kernel to user space */
-		send_sig(SIGIO, devpriv->tsk_current, 0);
-
-		/*  Enable Timer Interrupt */
 		outl(ctrl, devpriv->timer + ADDI_TCW_CTRL_REG);
 	}
 
 	if (devpriv->counters) {
-		for (chan = 0; chan < 4; chan++) {
+		for (chan = 0; chan < 3; chan++) {
 			unsigned long iobase;
 
 			iobase = devpriv->counters + APCI1564_COUNTER(chan);
 
 			status = inl(iobase + ADDI_TCW_IRQ_REG);
-			if (status & 0x01) {
-				/*  Disable Counter Interrupt */
+			if (status & ADDI_TCW_IRQ) {
+				s->state |= APCI1564_EVENT_COUNTER(chan);
+
+				/* clear the interrupt */
 				ctrl = inl(iobase + ADDI_TCW_CTRL_REG);
 				outl(0x0, iobase + ADDI_TCW_CTRL_REG);
-
-				/* Send a signal to from kernel to user space */
-				send_sig(SIGIO, devpriv->tsk_current, 0);
-
-				/*  Enable Counter Interrupt */
 				outl(ctrl, iobase + ADDI_TCW_CTRL_REG);
 			}
 		}
 	}
 
+	if (s->state & APCI1564_EVENT_MASK) {
+		comedi_buf_write_samples(s, &s->state, 1);
+		comedi_handle_events(dev, s);
+	}
+
 	return IRQ_HANDLED;
 }
 
@@ -255,7 +307,7 @@ static int apci1564_diag_insn_bits(struct comedi_device *dev,
 /*
  * Change-Of-State (COS) interrupt configuration
  *
- * Channels 0 to 15 are interruptible. These channels can be configured
+ * Channels 4 to 19 are interruptible. These channels can be configured
  * to generate interrupts based on AND/OR logic for the desired channels.
  *
  *	OR logic
@@ -343,6 +395,10 @@ static int apci1564_cos_insn_config(struct comedi_device *dev,
 		default:
 			return -EINVAL;
 		}
+
+		/* ensure the mode bits are in-range for channels [19:4] */
+		devpriv->mode1 &= APCI1564_DI_INT_MODE_MASK;
+		devpriv->mode2 &= APCI1564_DI_INT_MODE_MASK;
 		break;
 	default:
 		return -EINVAL;
@@ -409,7 +465,7 @@ static int apci1564_cos_cmd(struct comedi_device *dev,
 {
 	struct apci1564_private *devpriv = dev->private;
 
-	if (!devpriv->ctrl) {
+	if (!devpriv->ctrl && !(devpriv->mode1 || devpriv->mode2)) {
 		dev_warn(dev->class_dev,
 			 "Interrupts disabled due to mode configuration!\n");
 		return -EINVAL;
@@ -433,6 +489,173 @@ static int apci1564_cos_cancel(struct comedi_device *dev,
 	return 0;
 }
 
+static int apci1564_timer_insn_config(struct comedi_device *dev,
+				      struct comedi_subdevice *s,
+				      struct comedi_insn *insn,
+				      unsigned int *data)
+{
+	struct apci1564_private *devpriv = dev->private;
+	unsigned int val;
+
+	switch (data[0]) {
+	case INSN_CONFIG_ARM:
+		if (data[1] > s->maxdata)
+			return -EINVAL;
+		outl(data[1], devpriv->timer + ADDI_TCW_RELOAD_REG);
+		outl(ADDI_TCW_CTRL_IRQ_ENA | ADDI_TCW_CTRL_TIMER_ENA,
+		     devpriv->timer + ADDI_TCW_CTRL_REG);
+		break;
+	case INSN_CONFIG_DISARM:
+		outl(0x0, devpriv->timer + ADDI_TCW_CTRL_REG);
+		break;
+	case INSN_CONFIG_GET_COUNTER_STATUS:
+		data[1] = 0;
+		val = inl(devpriv->timer + ADDI_TCW_CTRL_REG);
+		if (val & ADDI_TCW_CTRL_IRQ_ENA)
+			data[1] |= COMEDI_COUNTER_ARMED;
+		if (val & ADDI_TCW_CTRL_TIMER_ENA)
+			data[1] |= COMEDI_COUNTER_COUNTING;
+		val = inl(devpriv->timer + ADDI_TCW_STATUS_REG);
+		if (val & ADDI_TCW_STATUS_OVERFLOW)
+			data[1] |= COMEDI_COUNTER_TERMINAL_COUNT;
+		data[2] = COMEDI_COUNTER_ARMED | COMEDI_COUNTER_COUNTING |
+			  COMEDI_COUNTER_TERMINAL_COUNT;
+		break;
+	case INSN_CONFIG_SET_CLOCK_SRC:
+		if (data[2] > s->maxdata)
+			return -EINVAL;
+		outl(data[1], devpriv->timer + ADDI_TCW_TIMEBASE_REG);
+		outl(data[2], devpriv->timer + ADDI_TCW_RELOAD_REG);
+		break;
+	case INSN_CONFIG_GET_CLOCK_SRC:
+		data[1] = inl(devpriv->timer + ADDI_TCW_TIMEBASE_REG);
+		data[2] = inl(devpriv->timer + ADDI_TCW_RELOAD_REG);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return insn->n;
+}
+
+static int apci1564_timer_insn_write(struct comedi_device *dev,
+				     struct comedi_subdevice *s,
+				     struct comedi_insn *insn,
+				     unsigned int *data)
+{
+	struct apci1564_private *devpriv = dev->private;
+
+	/* just write the last last to the reload register */
+	if (insn->n) {
+		unsigned int val = data[insn->n - 1];
+
+		outl(val, devpriv->timer + ADDI_TCW_RELOAD_REG);
+	}
+
+	return insn->n;
+}
+
+static int apci1564_timer_insn_read(struct comedi_device *dev,
+				    struct comedi_subdevice *s,
+				    struct comedi_insn *insn,
+				    unsigned int *data)
+{
+	struct apci1564_private *devpriv = dev->private;
+	int i;
+
+	/* return the actual value of the timer */
+	for (i = 0; i < insn->n; i++)
+		data[i] = inl(devpriv->timer + ADDI_TCW_VAL_REG);
+
+	return insn->n;
+}
+
+static int apci1564_counter_insn_config(struct comedi_device *dev,
+					struct comedi_subdevice *s,
+					struct comedi_insn *insn,
+					unsigned int *data)
+{
+	struct apci1564_private *devpriv = dev->private;
+	unsigned int chan = CR_CHAN(insn->chanspec);
+	unsigned long iobase = devpriv->counters + APCI1564_COUNTER(chan);
+	unsigned int val;
+
+	switch (data[0]) {
+	case INSN_CONFIG_ARM:
+		val = inl(iobase + ADDI_TCW_CTRL_REG);
+		val |= ADDI_TCW_CTRL_IRQ_ENA | ADDI_TCW_CTRL_CNTR_ENA;
+		outl(data[1], iobase + ADDI_TCW_RELOAD_REG);
+		outl(val, iobase + ADDI_TCW_CTRL_REG);
+		break;
+	case INSN_CONFIG_DISARM:
+		val = inl(iobase + ADDI_TCW_CTRL_REG);
+		val &= ~(ADDI_TCW_CTRL_IRQ_ENA | ADDI_TCW_CTRL_CNTR_ENA);
+		outl(val, iobase + ADDI_TCW_CTRL_REG);
+		break;
+	case INSN_CONFIG_SET_COUNTER_MODE:
+		/*
+		 * FIXME: The counter operation is not described in the
+		 * datasheet. For now just write the raw data[1] value to
+		 * the control register.
+		 */
+		outl(data[1], iobase + ADDI_TCW_CTRL_REG);
+		break;
+	case INSN_CONFIG_GET_COUNTER_STATUS:
+		data[1] = 0;
+		val = inl(iobase + ADDI_TCW_CTRL_REG);
+		if (val & ADDI_TCW_CTRL_IRQ_ENA)
+			data[1] |= COMEDI_COUNTER_ARMED;
+		if (val & ADDI_TCW_CTRL_CNTR_ENA)
+			data[1] |= COMEDI_COUNTER_COUNTING;
+		val = inl(iobase + ADDI_TCW_STATUS_REG);
+		if (val & ADDI_TCW_STATUS_OVERFLOW)
+			data[1] |= COMEDI_COUNTER_TERMINAL_COUNT;
+		data[2] = COMEDI_COUNTER_ARMED | COMEDI_COUNTER_COUNTING |
+			  COMEDI_COUNTER_TERMINAL_COUNT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return insn->n;
+}
+
+static int apci1564_counter_insn_write(struct comedi_device *dev,
+				       struct comedi_subdevice *s,
+				       struct comedi_insn *insn,
+				       unsigned int *data)
+{
+	struct apci1564_private *devpriv = dev->private;
+	unsigned int chan = CR_CHAN(insn->chanspec);
+	unsigned long iobase = devpriv->counters + APCI1564_COUNTER(chan);
+
+	/* just write the last last to the reload register */
+	if (insn->n) {
+		unsigned int val = data[insn->n - 1];
+
+		outl(val, iobase + ADDI_TCW_RELOAD_REG);
+	}
+
+	return insn->n;
+}
+
+static int apci1564_counter_insn_read(struct comedi_device *dev,
+				      struct comedi_subdevice *s,
+				      struct comedi_insn *insn,
+				      unsigned int *data)
+{
+	struct apci1564_private *devpriv = dev->private;
+	unsigned int chan = CR_CHAN(insn->chanspec);
+	unsigned long iobase = devpriv->counters + APCI1564_COUNTER(chan);
+	int i;
+
+	/* return the actual value of the counter */
+	for (i = 0; i < insn->n; i++)
+		data[i] = inl(iobase + ADDI_TCW_VAL_REG);
+
+	return insn->n;
+}
+
 static int apci1564_auto_attach(struct comedi_device *dev,
 				unsigned long context_unused)
 {
@@ -501,7 +724,7 @@ static int apci1564_auto_attach(struct comedi_device *dev,
 	if (dev->irq) {
 		dev->read_subdev = s;
 		s->type		= COMEDI_SUBD_DI;
-		s->subdev_flags	= SDF_READABLE | SDF_CMD_READ;
+		s->subdev_flags	= SDF_READABLE | SDF_CMD_READ | SDF_LSAMPL;
 		s->n_chan	= 1;
 		s->maxdata	= 1;
 		s->range_table	= &range_digital;
@@ -543,7 +766,7 @@ static int apci1564_auto_attach(struct comedi_device *dev,
 
 	/* Initialize the watchdog subdevice */
 	s = &dev->subdevices[5];
-	ret = addi_watchdog_init(s, dev->iobase + APCI1564_WDOG_REG);
+	ret = addi_watchdog_init(s, dev->iobase + APCI1564_WDOG_IOBASE);
 	if (ret)
 		return ret;
 
diff --git a/drivers/staging/comedi/drivers/adl_pci9118.c b/drivers/staging/comedi/drivers/adl_pci9118.c
index 4437ea3abe8d..be70bd333807 100644
--- a/drivers/staging/comedi/drivers/adl_pci9118.c
+++ b/drivers/staging/comedi/drivers/adl_pci9118.c
@@ -570,7 +570,7 @@ static int pci9118_ai_cancel(struct comedi_device *dev,
 	/* set default config (disable burst and triggers) */
 	devpriv->ai_cfg = PCI9118_AI_CFG_PDTRG | PCI9118_AI_CFG_PETRG;
 	outl(devpriv->ai_cfg, dev->iobase + PCI9118_AI_CFG_REG);
-	/* reset acqusition control */
+	/* reset acquisition control */
 	devpriv->ai_ctrl = 0;
 	outl(devpriv->ai_ctrl, dev->iobase + PCI9118_AI_CTRL_REG);
 	outl(0, dev->iobase + PCI9118_AI_BURST_NUM_REG);
@@ -1022,12 +1022,12 @@ static int pci9118_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
 
 	/*
 	 * Configure analog input and load the chanlist.
-	 * The acqusition control bits are enabled later.
+	 * The acquisition control bits are enabled later.
 	 */
 	pci9118_set_chanlist(dev, s, cmd->chanlist_len, cmd->chanlist,
 			     devpriv->ai_add_front, devpriv->ai_add_back);
 
-	/* Determine acqusition mode and calculate timing */
+	/* Determine acquisition mode and calculate timing */
 	devpriv->ai_do = 0;
 	if (cmd->scan_begin_src != TRIG_TIMER &&
 	    cmd->convert_src == TRIG_TIMER) {
@@ -1097,7 +1097,7 @@ static int pci9118_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
 
 	if (devpriv->ai_do == 0) {
 		dev_err(dev->class_dev,
-			"Unable to determine acqusition mode! BUG in (*do_cmdtest)?\n");
+			"Unable to determine acquisition mode! BUG in (*do_cmdtest)?\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/staging/comedi/drivers/cb_pcidas64.c b/drivers/staging/comedi/drivers/cb_pcidas64.c
index c773b8ca6599..1f9c08a845b6 100644
--- a/drivers/staging/comedi/drivers/cb_pcidas64.c
+++ b/drivers/staging/comedi/drivers/cb_pcidas64.c
@@ -1238,7 +1238,7 @@ static void disable_plx_interrupts(struct comedi_device *dev)
 
 	devpriv->plx_intcsr_bits = 0;
 	writel(devpriv->plx_intcsr_bits,
-	       devpriv->plx9080_iobase + PLX_INTRCS_REG);
+	       devpriv->plx9080_iobase + PLX_REG_INTCSR);
 }
 
 static void disable_ai_interrupts(struct comedi_device *dev)
@@ -1291,14 +1291,14 @@ static void init_plx9080(struct comedi_device *dev)
 	void __iomem *plx_iobase = devpriv->plx9080_iobase;
 
 	devpriv->plx_control_bits =
-		readl(devpriv->plx9080_iobase + PLX_CONTROL_REG);
+		readl(devpriv->plx9080_iobase + PLX_REG_CNTRL);
 
 #ifdef __BIG_ENDIAN
-	bits = BIGEND_DMA0 | BIGEND_DMA1;
+	bits = PLX_BIGEND_DMA0 | PLX_BIGEND_DMA1;
 #else
 	bits = 0;
 #endif
-	writel(bits, devpriv->plx9080_iobase + PLX_BIGEND_REG);
+	writel(bits, devpriv->plx9080_iobase + PLX_REG_BIGEND);
 
 	disable_plx_interrupts(dev);
 
@@ -1308,38 +1308,39 @@ static void init_plx9080(struct comedi_device *dev)
 	/*  configure dma0 mode */
 	bits = 0;
 	/*  enable ready input, not sure if this is necessary */
-	bits |= PLX_DMA_EN_READYIN_BIT;
+	bits |= PLX_DMAMODE_READYIEN;
 	/*  enable bterm, not sure if this is necessary */
-	bits |= PLX_EN_BTERM_BIT;
+	bits |= PLX_DMAMODE_BTERMIEN;
 	/*  enable dma chaining */
-	bits |= PLX_EN_CHAIN_BIT;
+	bits |= PLX_DMAMODE_CHAINEN;
 	/*  enable interrupt on dma done
 	 *  (probably don't need this, since chain never finishes) */
-	bits |= PLX_EN_DMA_DONE_INTR_BIT;
+	bits |= PLX_DMAMODE_DONEIEN;
 	/*  don't increment local address during transfers
 	 *  (we are transferring from a fixed fifo register) */
-	bits |= PLX_LOCAL_ADDR_CONST_BIT;
+	bits |= PLX_DMAMODE_LACONST;
 	/*  route dma interrupt to pci bus */
-	bits |= PLX_DMA_INTR_PCI_BIT;
+	bits |= PLX_DMAMODE_INTRPCI;
 	/*  enable demand mode */
-	bits |= PLX_DEMAND_MODE_BIT;
+	bits |= PLX_DMAMODE_DEMAND;
 	/*  enable local burst mode */
-	bits |= PLX_DMA_LOCAL_BURST_EN_BIT;
+	bits |= PLX_DMAMODE_BURSTEN;
 	/*  4020 uses 32 bit dma */
 	if (board->layout == LAYOUT_4020)
-		bits |= PLX_LOCAL_BUS_32_WIDE_BITS;
+		bits |= PLX_DMAMODE_WIDTH32;
 	else		/*  localspace0 bus is 16 bits wide */
-		bits |= PLX_LOCAL_BUS_16_WIDE_BITS;
-	writel(bits, plx_iobase + PLX_DMA1_MODE_REG);
+		bits |= PLX_DMAMODE_WIDTH16;
+	writel(bits, plx_iobase + PLX_REG_DMAMODE1);
 	if (ao_cmd_is_supported(board))
-		writel(bits, plx_iobase + PLX_DMA0_MODE_REG);
+		writel(bits, plx_iobase + PLX_REG_DMAMODE0);
 
 	/*  enable interrupts on plx 9080 */
 	devpriv->plx_intcsr_bits |=
-	    ICS_AERR | ICS_PERR | ICS_PIE | ICS_PLIE | ICS_PAIE | ICS_LIE |
-	    ICS_DMA0_E | ICS_DMA1_E;
+	    PLX_INTCSR_LSEABORTEN | PLX_INTCSR_LSEPARITYEN | PLX_INTCSR_PIEN |
+	    PLX_INTCSR_PLIEN | PLX_INTCSR_PABORTIEN | PLX_INTCSR_LIOEN |
+	    PLX_INTCSR_DMA0IEN | PLX_INTCSR_DMA1IEN;
 	writel(devpriv->plx_intcsr_bits,
-	       devpriv->plx9080_iobase + PLX_INTRCS_REG);
+	       devpriv->plx9080_iobase + PLX_REG_INTCSR);
 }
 
 static void disable_ai_pacing(struct comedi_device *dev)
@@ -1533,8 +1534,8 @@ static int alloc_and_init_dma_members(struct comedi_device *dev)
 			cpu_to_le32((devpriv->ai_dma_desc_bus_addr +
 				     ((i + 1) % ai_dma_ring_count(board)) *
 				     sizeof(devpriv->ai_dma_desc[0])) |
-				    PLX_DESC_IN_PCI_BIT | PLX_INTR_TERM_COUNT |
-				    PLX_XFER_LOCAL_TO_PCI);
+				    PLX_DMADPR_DESCPCI | PLX_DMADPR_TCINTR |
+				    PLX_DMADPR_XFERL2P);
 	}
 	if (ao_cmd_is_supported(board)) {
 		for (i = 0; i < AO_DMA_RING_COUNT; i++) {
@@ -1548,8 +1549,8 @@ static int alloc_and_init_dma_members(struct comedi_device *dev)
 				cpu_to_le32((devpriv->ao_dma_desc_bus_addr +
 					     ((i + 1) % (AO_DMA_RING_COUNT)) *
 					     sizeof(devpriv->ao_dma_desc[0])) |
-					    PLX_DESC_IN_PCI_BIT |
-					    PLX_INTR_TERM_COUNT);
+					    PLX_DMADPR_DESCPCI |
+					    PLX_DMADPR_TCINTR);
 		}
 	}
 	return 0;
@@ -1613,9 +1614,9 @@ static const int i2c_low_udelay = 10;
 static void i2c_set_sda(struct comedi_device *dev, int state)
 {
 	struct pcidas64_private *devpriv = dev->private;
-	static const int data_bit = CTL_EE_W;
+	static const int data_bit = PLX_CNTRL_EEWB;
 	void __iomem *plx_control_addr = devpriv->plx9080_iobase +
-					 PLX_CONTROL_REG;
+					 PLX_REG_CNTRL;
 
 	if (state) {
 		/*  set data line high */
@@ -1634,9 +1635,9 @@ static void i2c_set_sda(struct comedi_device *dev, int state)
 static void i2c_set_scl(struct comedi_device *dev, int state)
 {
 	struct pcidas64_private *devpriv = dev->private;
-	static const int clock_bit = CTL_USERO;
+	static const int clock_bit = PLX_CNTRL_USERO;
 	void __iomem *plx_control_addr = devpriv->plx9080_iobase +
-					 PLX_CONTROL_REG;
+					 PLX_REG_CNTRL;
 
 	if (state) {
 		/*  set clock line high */
@@ -1707,7 +1708,7 @@ static void i2c_write(struct comedi_device *dev, unsigned int address,
 	 */
 
 	/*  make sure we dont send anything to eeprom */
-	devpriv->plx_control_bits &= ~CTL_EE_CS;
+	devpriv->plx_control_bits &= ~PLX_CNTRL_EECS;
 
 	i2c_stop(dev);
 	i2c_start(dev);
@@ -2367,14 +2368,8 @@ static inline void dma_start_sync(struct comedi_device *dev,
 
 	/*  spinlock for plx dma control/status reg */
 	spin_lock_irqsave(&dev->spinlock, flags);
-	if (channel)
-		writeb(PLX_DMA_EN_BIT | PLX_DMA_START_BIT |
-		       PLX_CLEAR_DMA_INTR_BIT,
-		       devpriv->plx9080_iobase + PLX_DMA1_CS_REG);
-	else
-		writeb(PLX_DMA_EN_BIT | PLX_DMA_START_BIT |
-		       PLX_CLEAR_DMA_INTR_BIT,
-		       devpriv->plx9080_iobase + PLX_DMA0_CS_REG);
+	writeb(PLX_DMACSR_ENABLE | PLX_DMACSR_START | PLX_DMACSR_CLEARINTR,
+	       devpriv->plx9080_iobase + PLX_REG_DMACSR(channel));
 	spin_unlock_irqrestore(&dev->spinlock, flags);
 }
 
@@ -2552,21 +2547,17 @@ static inline void load_first_dma_descriptor(struct comedi_device *dev,
 	 * block.  Initializing them to zero seems to fix the problem.
 	 */
 	if (dma_channel) {
-		writel(0,
-		       devpriv->plx9080_iobase + PLX_DMA1_TRANSFER_SIZE_REG);
-		writel(0, devpriv->plx9080_iobase + PLX_DMA1_PCI_ADDRESS_REG);
-		writel(0,
-		       devpriv->plx9080_iobase + PLX_DMA1_LOCAL_ADDRESS_REG);
+		writel(0, devpriv->plx9080_iobase + PLX_REG_DMASIZ1);
+		writel(0, devpriv->plx9080_iobase + PLX_REG_DMAPADR1);
+		writel(0, devpriv->plx9080_iobase + PLX_REG_DMALADR1);
 		writel(descriptor_bits,
-		       devpriv->plx9080_iobase + PLX_DMA1_DESCRIPTOR_REG);
+		       devpriv->plx9080_iobase + PLX_REG_DMADPR1);
 	} else {
-		writel(0,
-		       devpriv->plx9080_iobase + PLX_DMA0_TRANSFER_SIZE_REG);
-		writel(0, devpriv->plx9080_iobase + PLX_DMA0_PCI_ADDRESS_REG);
-		writel(0,
-		       devpriv->plx9080_iobase + PLX_DMA0_LOCAL_ADDRESS_REG);
+		writel(0, devpriv->plx9080_iobase + PLX_REG_DMASIZ0);
+		writel(0, devpriv->plx9080_iobase + PLX_REG_DMAPADR0);
+		writel(0, devpriv->plx9080_iobase + PLX_REG_DMALADR0);
 		writel(descriptor_bits,
-		       devpriv->plx9080_iobase + PLX_DMA0_DESCRIPTOR_REG);
+		       devpriv->plx9080_iobase + PLX_REG_DMADPR0);
 	}
 }
 
@@ -2643,9 +2634,9 @@ static int ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
 		/*  give location of first dma descriptor */
 		load_first_dma_descriptor(dev, 1,
 					  devpriv->ai_dma_desc_bus_addr |
-					  PLX_DESC_IN_PCI_BIT |
-					  PLX_INTR_TERM_COUNT |
-					  PLX_XFER_LOCAL_TO_PCI);
+					  PLX_DMADPR_DESCPCI |
+					  PLX_DMADPR_TCINTR |
+					  PLX_DMADPR_XFERL2P);
 
 		dma_start_sync(dev, 1);
 	}
@@ -2803,12 +2794,7 @@ static void drain_dma_buffers(struct comedi_device *dev, unsigned int channel)
 	int num_samples = 0;
 	void __iomem *pci_addr_reg;
 
-	if (channel)
-		pci_addr_reg =
-		    devpriv->plx9080_iobase + PLX_DMA1_PCI_ADDRESS_REG;
-	else
-		pci_addr_reg =
-		    devpriv->plx9080_iobase + PLX_DMA0_PCI_ADDRESS_REG;
+	pci_addr_reg = devpriv->plx9080_iobase + PLX_REG_DMAPADR(channel);
 
 	/*  loop until we have read all the full buffers */
 	for (j = 0, next_transfer_addr = readl(pci_addr_reg);
@@ -2850,12 +2836,12 @@ static void handle_ai_interrupt(struct comedi_device *dev,
 	}
 	/*  spin lock makes sure no one else changes plx dma control reg */
 	spin_lock_irqsave(&dev->spinlock, flags);
-	dma1_status = readb(devpriv->plx9080_iobase + PLX_DMA1_CS_REG);
-	if (plx_status & ICS_DMA1_A) {	/*  dma chan 1 interrupt */
-		writeb((dma1_status & PLX_DMA_EN_BIT) | PLX_CLEAR_DMA_INTR_BIT,
-		       devpriv->plx9080_iobase + PLX_DMA1_CS_REG);
+	dma1_status = readb(devpriv->plx9080_iobase + PLX_REG_DMACSR1);
+	if (plx_status & PLX_INTCSR_DMA1IA) {	/*  dma chan 1 interrupt */
+		writeb((dma1_status & PLX_DMACSR_ENABLE) | PLX_DMACSR_CLEARINTR,
+		       devpriv->plx9080_iobase + PLX_REG_DMACSR1);
 
-		if (dma1_status & PLX_DMA_EN_BIT)
+		if (dma1_status & PLX_DMACSR_ENABLE)
 			drain_dma_buffers(dev, 1);
 	}
 	spin_unlock_irqrestore(&dev->spinlock, flags);
@@ -2902,12 +2888,12 @@ static int last_ao_dma_load_completed(struct comedi_device *dev)
 	unsigned short dma_status;
 
 	buffer_index = prev_ao_dma_index(dev);
-	dma_status = readb(devpriv->plx9080_iobase + PLX_DMA0_CS_REG);
-	if ((dma_status & PLX_DMA_DONE_BIT) == 0)
+	dma_status = readb(devpriv->plx9080_iobase + PLX_REG_DMACSR0);
+	if ((dma_status & PLX_DMACSR_DONE) == 0)
 		return 0;
 
 	transfer_address =
-		readl(devpriv->plx9080_iobase + PLX_DMA0_PCI_ADDRESS_REG);
+		readl(devpriv->plx9080_iobase + PLX_REG_DMAPADR0);
 	if (transfer_address != devpriv->ao_buffer_bus_addr[buffer_index])
 		return 0;
 
@@ -2917,8 +2903,8 @@ static int last_ao_dma_load_completed(struct comedi_device *dev)
 static inline int ao_dma_needs_restart(struct comedi_device *dev,
 				       unsigned short dma_status)
 {
-	if ((dma_status & PLX_DMA_DONE_BIT) == 0 ||
-	    (dma_status & PLX_DMA_EN_BIT) == 0)
+	if ((dma_status & PLX_DMACSR_DONE) == 0 ||
+	    (dma_status & PLX_DMACSR_ENABLE) == 0)
 		return 0;
 	if (last_ao_dma_load_completed(dev))
 		return 0;
@@ -2931,9 +2917,8 @@ static void restart_ao_dma(struct comedi_device *dev)
 	struct pcidas64_private *devpriv = dev->private;
 	unsigned int dma_desc_bits;
 
-	dma_desc_bits =
-		readl(devpriv->plx9080_iobase + PLX_DMA0_DESCRIPTOR_REG);
-	dma_desc_bits &= ~PLX_END_OF_CHAIN_BIT;
+	dma_desc_bits = readl(devpriv->plx9080_iobase + PLX_REG_DMADPR0);
+	dma_desc_bits &= ~PLX_DMADPR_CHAINEND;
 	load_first_dma_descriptor(dev, 0, dma_desc_bits);
 
 	dma_start_sync(dev, 0);
@@ -2974,14 +2959,14 @@ static unsigned int load_ao_dma_buffer(struct comedi_device *dev,
 	devpriv->ao_dma_desc[buffer_index].transfer_size = cpu_to_le32(nbytes);
 	/* set end of chain bit so we catch underruns */
 	next_bits = le32_to_cpu(devpriv->ao_dma_desc[buffer_index].next);
-	next_bits |= PLX_END_OF_CHAIN_BIT;
+	next_bits |= PLX_DMADPR_CHAINEND;
 	devpriv->ao_dma_desc[buffer_index].next = cpu_to_le32(next_bits);
 	/*
 	 * clear end of chain bit on previous buffer now that we have set it
 	 * for the last buffer
 	 */
 	next_bits = le32_to_cpu(devpriv->ao_dma_desc[prev_buffer_index].next);
-	next_bits &= ~PLX_END_OF_CHAIN_BIT;
+	next_bits &= ~PLX_DMADPR_CHAINEND;
 	devpriv->ao_dma_desc[prev_buffer_index].next = cpu_to_le32(next_bits);
 
 	devpriv->ao_dma_index = (buffer_index + 1) % AO_DMA_RING_COUNT;
@@ -2994,8 +2979,7 @@ static void load_ao_dma(struct comedi_device *dev, const struct comedi_cmd *cmd)
 	struct pcidas64_private *devpriv = dev->private;
 	unsigned int num_bytes;
 	unsigned int next_transfer_addr;
-	void __iomem *pci_addr_reg =
-		devpriv->plx9080_iobase + PLX_DMA0_PCI_ADDRESS_REG;
+	void __iomem *pci_addr_reg = devpriv->plx9080_iobase + PLX_REG_DMAPADR0;
 	unsigned int buffer_index;
 
 	do {
@@ -3030,17 +3014,18 @@ static void handle_ao_interrupt(struct comedi_device *dev,
 
 	/*  spin lock makes sure no one else changes plx dma control reg */
 	spin_lock_irqsave(&dev->spinlock, flags);
-	dma0_status = readb(devpriv->plx9080_iobase + PLX_DMA0_CS_REG);
-	if (plx_status & ICS_DMA0_A) {	/*  dma chan 0 interrupt */
-		if ((dma0_status & PLX_DMA_EN_BIT) &&
-		    !(dma0_status & PLX_DMA_DONE_BIT))
-			writeb(PLX_DMA_EN_BIT | PLX_CLEAR_DMA_INTR_BIT,
-			       devpriv->plx9080_iobase + PLX_DMA0_CS_REG);
-		else
-			writeb(PLX_CLEAR_DMA_INTR_BIT,
-			       devpriv->plx9080_iobase + PLX_DMA0_CS_REG);
+	dma0_status = readb(devpriv->plx9080_iobase + PLX_REG_DMACSR0);
+	if (plx_status & PLX_INTCSR_DMA0IA) {	/*  dma chan 0 interrupt */
+		if ((dma0_status & PLX_DMACSR_ENABLE) &&
+		    !(dma0_status & PLX_DMACSR_DONE)) {
+			writeb(PLX_DMACSR_ENABLE | PLX_DMACSR_CLEARINTR,
+			       devpriv->plx9080_iobase + PLX_REG_DMACSR0);
+		} else {
+			writeb(PLX_DMACSR_CLEARINTR,
+			       devpriv->plx9080_iobase + PLX_REG_DMACSR0);
+		}
 		spin_unlock_irqrestore(&dev->spinlock, flags);
-		if (dma0_status & PLX_DMA_EN_BIT) {
+		if (dma0_status & PLX_DMACSR_ENABLE) {
 			load_ao_dma(dev, cmd);
 			/* try to recover from dma end-of-chain event */
 			if (ao_dma_needs_restart(dev, dma0_status))
@@ -3069,7 +3054,7 @@ static irqreturn_t handle_interrupt(int irq, void *d)
 	uint32_t plx_status;
 	uint32_t plx_bits;
 
-	plx_status = readl(devpriv->plx9080_iobase + PLX_INTRCS_REG);
+	plx_status = readl(devpriv->plx9080_iobase + PLX_REG_INTCSR);
 	status = readw(devpriv->main_iobase + HW_STATUS_REG);
 
 	/*
@@ -3083,10 +3068,11 @@ static irqreturn_t handle_interrupt(int irq, void *d)
 	handle_ai_interrupt(dev, status, plx_status);
 	handle_ao_interrupt(dev, status, plx_status);
 
-	/*  clear possible plx9080 interrupt sources */
-	if (plx_status & ICS_LDIA) {	/*  clear local doorbell interrupt */
-		plx_bits = readl(devpriv->plx9080_iobase + PLX_DBR_OUT_REG);
-		writel(plx_bits, devpriv->plx9080_iobase + PLX_DBR_OUT_REG);
+	/* clear possible plx9080 interrupt sources */
+	if (plx_status & PLX_INTCSR_LDBIA) {
+		/* clear local doorbell interrupt */
+		plx_bits = readl(devpriv->plx9080_iobase + PLX_REG_L2PDBELL);
+		writel(plx_bits, devpriv->plx9080_iobase + PLX_REG_L2PDBELL);
 	}
 
 	return IRQ_HANDLED;
@@ -3324,7 +3310,7 @@ static int ao_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
 	set_dac_select_reg(dev, cmd);
 	set_dac_interval_regs(dev, cmd);
 	load_first_dma_descriptor(dev, 0, devpriv->ao_dma_desc_bus_addr |
-				  PLX_DESC_IN_PCI_BIT | PLX_INTR_TERM_COUNT);
+				  PLX_DMADPR_DESCPCI | PLX_DMADPR_TCINTR);
 
 	set_dac_control1_reg(dev, cmd);
 	s->async->inttrig = ao_inttrig;
@@ -3725,19 +3711,19 @@ static uint16_t read_eeprom(struct comedi_device *dev, uint8_t address)
 	unsigned int bitstream = (read_command << 8) | address;
 	unsigned int bit;
 	void __iomem * const plx_control_addr =
-		devpriv->plx9080_iobase + PLX_CONTROL_REG;
+		devpriv->plx9080_iobase + PLX_REG_CNTRL;
 	uint16_t value;
 	static const int value_length = 16;
 	static const int eeprom_udelay = 1;
 
 	udelay(eeprom_udelay);
-	devpriv->plx_control_bits &= ~CTL_EE_CLK & ~CTL_EE_CS;
+	devpriv->plx_control_bits &= ~PLX_CNTRL_EESK & ~PLX_CNTRL_EECS;
 	/*  make sure we don't send anything to the i2c bus on 4020 */
-	devpriv->plx_control_bits |= CTL_USERO;
+	devpriv->plx_control_bits |= PLX_CNTRL_USERO;
 	writel(devpriv->plx_control_bits, plx_control_addr);
 	/*  activate serial eeprom */
 	udelay(eeprom_udelay);
-	devpriv->plx_control_bits |= CTL_EE_CS;
+	devpriv->plx_control_bits |= PLX_CNTRL_EECS;
 	writel(devpriv->plx_control_bits, plx_control_addr);
 
 	/*  write read command and desired memory address */
@@ -3745,16 +3731,16 @@ static uint16_t read_eeprom(struct comedi_device *dev, uint8_t address)
 		/*  set bit to be written */
 		udelay(eeprom_udelay);
 		if (bitstream & bit)
-			devpriv->plx_control_bits |= CTL_EE_W;
+			devpriv->plx_control_bits |= PLX_CNTRL_EEWB;
 		else
-			devpriv->plx_control_bits &= ~CTL_EE_W;
+			devpriv->plx_control_bits &= ~PLX_CNTRL_EEWB;
 		writel(devpriv->plx_control_bits, plx_control_addr);
 		/*  clock in bit */
 		udelay(eeprom_udelay);
-		devpriv->plx_control_bits |= CTL_EE_CLK;
+		devpriv->plx_control_bits |= PLX_CNTRL_EESK;
 		writel(devpriv->plx_control_bits, plx_control_addr);
 		udelay(eeprom_udelay);
-		devpriv->plx_control_bits &= ~CTL_EE_CLK;
+		devpriv->plx_control_bits &= ~PLX_CNTRL_EESK;
 		writel(devpriv->plx_control_bits, plx_control_addr);
 	}
 	/*  read back value from eeprom memory location */
@@ -3762,19 +3748,19 @@ static uint16_t read_eeprom(struct comedi_device *dev, uint8_t address)
 	for (bit = 1 << (value_length - 1); bit; bit >>= 1) {
 		/*  clock out bit */
 		udelay(eeprom_udelay);
-		devpriv->plx_control_bits |= CTL_EE_CLK;
+		devpriv->plx_control_bits |= PLX_CNTRL_EESK;
 		writel(devpriv->plx_control_bits, plx_control_addr);
 		udelay(eeprom_udelay);
-		devpriv->plx_control_bits &= ~CTL_EE_CLK;
+		devpriv->plx_control_bits &= ~PLX_CNTRL_EESK;
 		writel(devpriv->plx_control_bits, plx_control_addr);
 		udelay(eeprom_udelay);
-		if (readl(plx_control_addr) & CTL_EE_R)
+		if (readl(plx_control_addr) & PLX_CNTRL_EERB)
 			value |= bit;
 	}
 
 	/*  deactivate eeprom serial input */
 	udelay(eeprom_udelay);
-	devpriv->plx_control_bits &= ~CTL_EE_CS;
+	devpriv->plx_control_bits &= ~PLX_CNTRL_EECS;
 	writel(devpriv->plx_control_bits, plx_control_addr);
 
 	return value;
@@ -3962,7 +3948,8 @@ static int setup_subdevices(struct comedi_device *dev)
 
 	/* serial EEPROM, if present */
 	s = &dev->subdevices[8];
-	if (readl(devpriv->plx9080_iobase + PLX_CONTROL_REG) & CTL_EECHK) {
+	if (readl(devpriv->plx9080_iobase + PLX_REG_CNTRL) &
+	    PLX_CNTRL_EEPRESENT) {
 		s->type = COMEDI_SUBD_MEMORY;
 		s->subdev_flags = SDF_READABLE | SDF_INTERNAL;
 		s->n_chan = 128;
@@ -4019,16 +4006,16 @@ static int auto_attach(struct comedi_device *dev,
 	}
 
 	/*  figure out what local addresses are */
-	local_range = readl(devpriv->plx9080_iobase + PLX_LAS0RNG_REG) &
-		      LRNG_MEM_MASK;
-	local_decode = readl(devpriv->plx9080_iobase + PLX_LAS0MAP_REG) &
-		       local_range & LMAP_MEM_MASK;
+	local_range = readl(devpriv->plx9080_iobase + PLX_REG_LAS0RR) &
+		      PLX_LASRR_MEM_MASK;
+	local_decode = readl(devpriv->plx9080_iobase + PLX_REG_LAS0BA) &
+		       local_range & PLX_LASBA_MEM_MASK;
 	devpriv->local0_iobase = ((uint32_t)devpriv->main_phys_iobase &
 				  ~local_range) | local_decode;
-	local_range = readl(devpriv->plx9080_iobase + PLX_LAS1RNG_REG) &
-		      LRNG_MEM_MASK;
-	local_decode = readl(devpriv->plx9080_iobase + PLX_LAS1MAP_REG) &
-		       local_range & LMAP_MEM_MASK;
+	local_range = readl(devpriv->plx9080_iobase + PLX_REG_LAS1RR) &
+		      PLX_LASRR_MEM_MASK;
+	local_decode = readl(devpriv->plx9080_iobase + PLX_REG_LAS1BA) &
+		       local_range & PLX_LASBA_MEM_MASK;
 	devpriv->local1_iobase = ((uint32_t)devpriv->dio_counter_phys_iobase &
 				  ~local_range) | local_decode;
 
diff --git a/drivers/staging/comedi/drivers/comedi_bond.c b/drivers/staging/comedi/drivers/comedi_bond.c
index 50b76eccb7d7..64a5ea3810d4 100644
--- a/drivers/staging/comedi/drivers/comedi_bond.c
+++ b/drivers/staging/comedi/drivers/comedi_bond.c
@@ -55,16 +55,16 @@
 
 struct bonded_device {
 	struct comedi_device *dev;
-	unsigned minor;
-	unsigned subdev;
-	unsigned nchans;
+	unsigned int minor;
+	unsigned int subdev;
+	unsigned int nchans;
 };
 
 struct comedi_bond_private {
 	char name[256];
 	struct bonded_device **devs;
-	unsigned ndevs;
-	unsigned nchans;
+	unsigned int ndevs;
+	unsigned int nchans;
 };
 
 static int bonding_dio_insn_bits(struct comedi_device *dev,
diff --git a/drivers/staging/comedi/drivers/daqboard2000.c b/drivers/staging/comedi/drivers/daqboard2000.c
index a536a15c1d30..65daef0c00d5 100644
--- a/drivers/staging/comedi/drivers/daqboard2000.c
+++ b/drivers/staging/comedi/drivers/daqboard2000.c
@@ -116,12 +116,12 @@
 #define DAQBOARD2000_SUBSYSTEM_IDS4	0x0004	/* Daqboard/2000 - 4 Dacs */
 
 /* Initialization bits for the Serial EEPROM Control Register */
-#define DAQBOARD2000_SECRProgPinHi      0x8001767e
-#define DAQBOARD2000_SECRProgPinLo      0x8000767e
-#define DAQBOARD2000_SECRLocalBusHi     0xc000767e
-#define DAQBOARD2000_SECRLocalBusLo     0x8000767e
-#define DAQBOARD2000_SECRReloadHi       0xa000767e
-#define DAQBOARD2000_SECRReloadLo       0x8000767e
+#define DB2K_SECR_PROG_PIN_HI		0x8001767e
+#define DB2K_SECR_PROG_PIN_LO		0x8000767e
+#define DB2K_SECR_LOCAL_BUS_HI		0xc000767e
+#define DB2K_SECR_LOCAL_BUS_LO		0x8000767e
+#define DB2K_SECR_RELOAD_HI		0xa000767e
+#define DB2K_SECR_RELOAD_LO		0x8000767e
 
 /* SECR status bits */
 #define DAQBOARD2000_EEPROM_PRESENT     0x10000000
@@ -151,119 +151,108 @@ static const struct comedi_lrange range_daqboard2000_ai = {
 /*
  * Register Memory Map
  */
-#define acqControl			0x00		/* u16 */
-#define acqScanListFIFO			0x02		/* u16 */
-#define acqPacerClockDivLow		0x04		/* u32 */
-#define acqScanCounter			0x08		/* u16 */
-#define acqPacerClockDivHigh		0x0a		/* u16 */
-#define acqTriggerCount			0x0c		/* u16 */
-#define acqResultsFIFO			0x10		/* u16 */
-#define acqResultsShadow		0x14		/* u16 */
-#define acqAdcResult			0x18		/* u16 */
-#define dacScanCounter			0x1c		/* u16 */
-#define dacControl			0x20		/* u16 */
-#define dacFIFO				0x24		/* s16 */
-#define dacPacerClockDiv		0x2a		/* u16 */
-#define refDacs				0x2c		/* u16 */
-#define dioControl			0x30		/* u16 */
-#define dioP3hsioData			0x32		/* s16 */
-#define dioP3Control			0x34		/* u16 */
-#define calEepromControl		0x36		/* u16 */
-#define dacSetting(x)			(0x38 + (x)*2)	/* s16 */
-#define dioP2ExpansionIO8Bit		0x40		/* s16 */
-#define ctrTmrControl			0x80		/* u16 */
-#define ctrInput(x)			(0x88 + (x)*2)	/* s16 */
-#define timerDivisor(x)			(0xa0 + (x)*2)	/* u16 */
-#define dmaControl			0xb0		/* u16 */
-#define trigControl			0xb2		/* u16 */
-#define calEeprom			0xb8		/* u16 */
-#define acqDigitalMark			0xba		/* u16 */
-#define trigDacs			0xbc		/* u16 */
-#define dioP2ExpansionIO16Bit(x)	(0xc0 + (x)*2)	/* s16 */
+#define DB2K_REG_ACQ_CONTROL			0x00		/* u16 (w) */
+#define DB2K_REG_ACQ_STATUS			0x00		/* u16 (r) */
+#define DB2K_REG_ACQ_SCAN_LIST_FIFO		0x02		/* u16 */
+#define DB2K_REG_ACQ_PACER_CLOCK_DIV_LOW	0x04		/* u32 */
+#define DB2K_REG_ACQ_SCAN_COUNTER		0x08		/* u16 */
+#define DB2K_REG_ACQ_PACER_CLOCK_DIV_HIGH	0x0a		/* u16 */
+#define DB2K_REG_ACQ_TRIGGER_COUNT		0x0c		/* u16 */
+#define DB2K_REG_ACQ_RESULTS_FIFO		0x10		/* u16 */
+#define DB2K_REG_ACQ_RESULTS_SHADOW		0x14		/* u16 */
+#define DB2K_REG_ACQ_ADC_RESULT			0x18		/* u16 */
+#define DB2K_REG_DAC_SCAN_COUNTER		0x1c		/* u16 */
+#define DB2K_REG_DAC_CONTROL			0x20		/* u16 (w) */
+#define DB2K_REG_DAC_STATUS			0x20		/* u16 (r) */
+#define DB2K_REG_DAC_FIFO			0x24		/* s16 */
+#define DB2K_REG_DAC_PACER_CLOCK_DIV		0x2a		/* u16 */
+#define DB2K_REG_REF_DACS			0x2c		/* u16 */
+#define DB2K_REG_DIO_CONTROL			0x30		/* u16 */
+#define DB2K_REG_P3_HSIO_DATA			0x32		/* s16 */
+#define DB2K_REG_P3_CONTROL			0x34		/* u16 */
+#define DB2K_REG_CAL_EEPROM_CONTROL		0x36		/* u16 */
+#define DB2K_REG_DAC_SETTING(x)			(0x38 + (x) * 2) /* s16 */
+#define DB2K_REG_DIO_P2_EXP_IO_8_BIT		0x40		/* s16 */
+#define DB2K_REG_COUNTER_TIMER_CONTROL		0x80		/* u16 */
+#define DB2K_REG_COUNTER_INPUT(x)		(0x88 + (x) * 2) /* s16 */
+#define DB2K_REG_TIMER_DIV(x)			(0xa0 + (x) * 2) /* u16 */
+#define DB2K_REG_DMA_CONTROL			0xb0		/* u16 */
+#define DB2K_REG_TRIG_CONTROL			0xb2		/* u16 */
+#define DB2K_REG_CAL_EEPROM			0xb8		/* u16 */
+#define DB2K_REG_ACQ_DIGITAL_MARK		0xba		/* u16 */
+#define DB2K_REG_TRIG_DACS			0xbc		/* u16 */
+#define DB2K_REG_DIO_P2_EXP_IO_16_BIT(x)	(0xc0 + (x) * 2) /* s16 */
 
 /* Scan Sequencer programming */
-#define DAQBOARD2000_SeqStartScanList            0x0011
-#define DAQBOARD2000_SeqStopScanList             0x0010
+#define DB2K_ACQ_CONTROL_SEQ_START_SCAN_LIST		0x0011
+#define DB2K_ACQ_CONTROL_SEQ_STOP_SCAN_LIST		0x0010
 
 /* Prepare for acquisition */
-#define DAQBOARD2000_AcqResetScanListFifo        0x0004
-#define DAQBOARD2000_AcqResetResultsFifo         0x0002
-#define DAQBOARD2000_AcqResetConfigPipe          0x0001
-
-/* Acqusition status bits */
-#define DAQBOARD2000_AcqResultsFIFOMore1Sample   0x0001
-#define DAQBOARD2000_AcqResultsFIFOHasValidData  0x0002
-#define DAQBOARD2000_AcqResultsFIFOOverrun       0x0004
-#define DAQBOARD2000_AcqLogicScanning            0x0008
-#define DAQBOARD2000_AcqConfigPipeFull           0x0010
-#define DAQBOARD2000_AcqScanListFIFOEmpty        0x0020
-#define DAQBOARD2000_AcqAdcNotReady              0x0040
-#define DAQBOARD2000_ArbitrationFailure          0x0080
-#define DAQBOARD2000_AcqPacerOverrun             0x0100
-#define DAQBOARD2000_DacPacerOverrun             0x0200
-#define DAQBOARD2000_AcqHardwareError            0x01c0
-
-/* Scan Sequencer programming */
-#define DAQBOARD2000_SeqStartScanList            0x0011
-#define DAQBOARD2000_SeqStopScanList             0x0010
+#define DB2K_ACQ_CONTROL_RESET_SCAN_LIST_FIFO		0x0004
+#define DB2K_ACQ_CONTROL_RESET_RESULTS_FIFO		0x0002
+#define DB2K_ACQ_CONTROL_RESET_CONFIG_PIPE		0x0001
 
 /* Pacer Clock Control */
-#define DAQBOARD2000_AdcPacerInternal            0x0030
-#define DAQBOARD2000_AdcPacerExternal            0x0032
-#define DAQBOARD2000_AdcPacerEnable              0x0031
-#define DAQBOARD2000_AdcPacerEnableDacPacer      0x0034
-#define DAQBOARD2000_AdcPacerDisable             0x0030
-#define DAQBOARD2000_AdcPacerNormalMode          0x0060
-#define DAQBOARD2000_AdcPacerCompatibilityMode   0x0061
-#define DAQBOARD2000_AdcPacerInternalOutEnable   0x0008
-#define DAQBOARD2000_AdcPacerExternalRising      0x0100
+#define DB2K_ACQ_CONTROL_ADC_PACER_INTERNAL		0x0030
+#define DB2K_ACQ_CONTROL_ADC_PACER_EXTERNAL		0x0032
+#define DB2K_ACQ_CONTROL_ADC_PACER_ENABLE		0x0031
+#define DB2K_ACQ_CONTROL_ADC_PACER_ENABLE_DAC_PACER	0x0034
+#define DB2K_ACQ_CONTROL_ADC_PACER_DISABLE		0x0030
+#define DB2K_ACQ_CONTROL_ADC_PACER_NORMAL_MODE		0x0060
+#define DB2K_ACQ_CONTROL_ADC_PACER_COMPATIBILITY_MODE	0x0061
+#define DB2K_ACQ_CONTROL_ADC_PACER_INTERNAL_OUT_ENABLE	0x0008
+#define DB2K_ACQ_CONTROL_ADC_PACER_EXTERNAL_RISING	0x0100
+
+/* Acquisition status bits */
+#define DB2K_ACQ_STATUS_RESULTS_FIFO_MORE_1_SAMPLE	0x0001
+#define DB2K_ACQ_STATUS_RESULTS_FIFO_HAS_DATA		0x0002
+#define DB2K_ACQ_STATUS_RESULTS_FIFO_OVERRUN		0x0004
+#define DB2K_ACQ_STATUS_LOGIC_SCANNING			0x0008
+#define DB2K_ACQ_STATUS_CONFIG_PIPE_FULL		0x0010
+#define DB2K_ACQ_STATUS_SCAN_LIST_FIFO_EMPTY		0x0020
+#define DB2K_ACQ_STATUS_ADC_NOT_READY			0x0040
+#define DB2K_ACQ_STATUS_ARBITRATION_FAILURE		0x0080
+#define DB2K_ACQ_STATUS_ADC_PACER_OVERRUN		0x0100
+#define DB2K_ACQ_STATUS_DAC_PACER_OVERRUN		0x0200
 
 /* DAC status */
-#define DAQBOARD2000_DacFull                     0x0001
-#define DAQBOARD2000_RefBusy                     0x0002
-#define DAQBOARD2000_TrgBusy                     0x0004
-#define DAQBOARD2000_CalBusy                     0x0008
-#define DAQBOARD2000_Dac0Busy                    0x0010
-#define DAQBOARD2000_Dac1Busy                    0x0020
-#define DAQBOARD2000_Dac2Busy                    0x0040
-#define DAQBOARD2000_Dac3Busy                    0x0080
+#define DB2K_DAC_STATUS_DAC_FULL			0x0001
+#define DB2K_DAC_STATUS_REF_BUSY			0x0002
+#define DB2K_DAC_STATUS_TRIG_BUSY			0x0004
+#define DB2K_DAC_STATUS_CAL_BUSY			0x0008
+#define DB2K_DAC_STATUS_DAC_BUSY(x)			(0x0010 << (x))
 
 /* DAC control */
-#define DAQBOARD2000_Dac0Enable                  0x0021
-#define DAQBOARD2000_Dac1Enable                  0x0031
-#define DAQBOARD2000_Dac2Enable                  0x0041
-#define DAQBOARD2000_Dac3Enable                  0x0051
-#define DAQBOARD2000_DacEnableBit                0x0001
-#define DAQBOARD2000_Dac0Disable                 0x0020
-#define DAQBOARD2000_Dac1Disable                 0x0030
-#define DAQBOARD2000_Dac2Disable                 0x0040
-#define DAQBOARD2000_Dac3Disable                 0x0050
-#define DAQBOARD2000_DacResetFifo                0x0004
-#define DAQBOARD2000_DacPatternDisable           0x0060
-#define DAQBOARD2000_DacPatternEnable            0x0061
-#define DAQBOARD2000_DacSelectSignedData         0x0002
-#define DAQBOARD2000_DacSelectUnsignedData       0x0000
+#define DB2K_DAC_CONTROL_ENABLE_BIT			0x0001
+#define DB2K_DAC_CONTROL_DATA_IS_SIGNED			0x0002
+#define DB2K_DAC_CONTROL_RESET_FIFO			0x0004
+#define DB2K_DAC_CONTROL_DAC_DISABLE(x)			(0x0020 + ((x) << 4))
+#define DB2K_DAC_CONTROL_DAC_ENABLE(x)			(0x0021 + ((x) << 4))
+#define DB2K_DAC_CONTROL_PATTERN_DISABLE		0x0060
+#define DB2K_DAC_CONTROL_PATTERN_ENABLE			0x0061
 
 /* Trigger Control */
-#define DAQBOARD2000_TrigAnalog                  0x0000
-#define DAQBOARD2000_TrigTTL                     0x0010
-#define DAQBOARD2000_TrigTransHiLo               0x0004
-#define DAQBOARD2000_TrigTransLoHi               0x0000
-#define DAQBOARD2000_TrigAbove                   0x0000
-#define DAQBOARD2000_TrigBelow                   0x0004
-#define DAQBOARD2000_TrigLevelSense              0x0002
-#define DAQBOARD2000_TrigEdgeSense               0x0000
-#define DAQBOARD2000_TrigEnable                  0x0001
-#define DAQBOARD2000_TrigDisable                 0x0000
+#define DB2K_TRIG_CONTROL_TYPE_ANALOG			0x0000
+#define DB2K_TRIG_CONTROL_TYPE_TTL			0x0010
+#define DB2K_TRIG_CONTROL_EDGE_HI_LO			0x0004
+#define DB2K_TRIG_CONTROL_EDGE_LO_HI			0x0000
+#define DB2K_TRIG_CONTROL_LEVEL_ABOVE			0x0000
+#define DB2K_TRIG_CONTROL_LEVEL_BELOW			0x0004
+#define DB2K_TRIG_CONTROL_SENSE_LEVEL			0x0002
+#define DB2K_TRIG_CONTROL_SENSE_EDGE			0x0000
+#define DB2K_TRIG_CONTROL_ENABLE			0x0001
+#define DB2K_TRIG_CONTROL_DISABLE			0x0000
 
 /* Reference Dac Selection */
-#define DAQBOARD2000_PosRefDacSelect             0x0100
-#define DAQBOARD2000_NegRefDacSelect             0x0000
+#define DB2K_REF_DACS_SET				0x0080
+#define DB2K_REF_DACS_SELECT_POS_REF			0x0100
+#define DB2K_REF_DACS_SELECT_NEG_REF			0x0000
 
 struct daq200_boardtype {
 	const char *name;
 	int id;
 };
+
 static const struct daq200_boardtype boardtypes[] = {
 	{"ids2", DAQBOARD2000_SUBSYSTEM_IDS2},
 	{"ids4", DAQBOARD2000_SUBSYSTEM_IDS4},
@@ -276,15 +265,16 @@ struct daqboard2000_private {
 	void __iomem *plx;
 };
 
-static void writeAcqScanListEntry(struct comedi_device *dev, u16 entry)
+static void daqboard2000_write_acq_scan_list_entry(struct comedi_device *dev,
+						   u16 entry)
 {
-	/* udelay(4); */
-	writew(entry & 0x00ff, dev->mmio + acqScanListFIFO);
-	/* udelay(4); */
-	writew((entry >> 8) & 0x00ff, dev->mmio + acqScanListFIFO);
+	writew(entry & 0x00ff, dev->mmio + DB2K_REG_ACQ_SCAN_LIST_FIFO);
+	writew((entry >> 8) & 0x00ff,
+	       dev->mmio + DB2K_REG_ACQ_SCAN_LIST_FIFO);
 }
 
-static void setup_sampling(struct comedi_device *dev, int chan, int gain)
+static void daqboard2000_setup_sampling(struct comedi_device *dev, int chan,
+					int gain)
 {
 	u16 word0, word1, word2, word3;
 
@@ -315,17 +305,13 @@ static void setup_sampling(struct comedi_device *dev, int chan, int gain)
 		word3 = 0;
 		break;
 	}
-/*
-  dev->eeprom.correctionDACSE[i][j][k].offset = 0x800;
-  dev->eeprom.correctionDACSE[i][j][k].gain = 0xc00;
-*/
 	/* These should be read from EEPROM */
-	word2 |= 0x0800;
-	word3 |= 0xc000;
-	writeAcqScanListEntry(dev, word0);
-	writeAcqScanListEntry(dev, word1);
-	writeAcqScanListEntry(dev, word2);
-	writeAcqScanListEntry(dev, word3);
+	word2 |= 0x0800;	/* offset */
+	word3 |= 0xc000;	/* gain */
+	daqboard2000_write_acq_scan_list_entry(dev, word0);
+	daqboard2000_write_acq_scan_list_entry(dev, word1);
+	daqboard2000_write_acq_scan_list_entry(dev, word2);
+	daqboard2000_write_acq_scan_list_entry(dev, word3);
 }
 
 static int daqboard2000_ai_status(struct comedi_device *dev,
@@ -335,7 +321,7 @@ static int daqboard2000_ai_status(struct comedi_device *dev,
 {
 	unsigned int status;
 
-	status = readw(dev->mmio + acqControl);
+	status = readw(dev->mmio + DB2K_REG_ACQ_STATUS);
 	if (status & context)
 		return 0;
 	return -EBUSY;
@@ -350,50 +336,58 @@ static int daqboard2000_ai_insn_read(struct comedi_device *dev,
 	int ret;
 	int i;
 
-	writew(DAQBOARD2000_AcqResetScanListFifo |
-	       DAQBOARD2000_AcqResetResultsFifo |
-	       DAQBOARD2000_AcqResetConfigPipe, dev->mmio + acqControl);
+	writew(DB2K_ACQ_CONTROL_RESET_SCAN_LIST_FIFO |
+	       DB2K_ACQ_CONTROL_RESET_RESULTS_FIFO |
+	       DB2K_ACQ_CONTROL_RESET_CONFIG_PIPE,
+	       dev->mmio + DB2K_REG_ACQ_CONTROL);
 
 	/*
 	 * If pacer clock is not set to some high value (> 10 us), we
 	 * risk multiple samples to be put into the result FIFO.
 	 */
 	/* 1 second, should be long enough */
-	writel(1000000, dev->mmio + acqPacerClockDivLow);
-	writew(0, dev->mmio + acqPacerClockDivHigh);
+	writel(1000000, dev->mmio + DB2K_REG_ACQ_PACER_CLOCK_DIV_LOW);
+	writew(0, dev->mmio + DB2K_REG_ACQ_PACER_CLOCK_DIV_HIGH);
 
 	gain = CR_RANGE(insn->chanspec);
 	chan = CR_CHAN(insn->chanspec);
 
-	/* This doesn't look efficient.  I decided to take the conservative
+	/*
+	 * This doesn't look efficient.  I decided to take the conservative
 	 * approach when I did the insn conversion.  Perhaps it would be
 	 * better to have broken it completely, then someone would have been
-	 * forced to fix it.  --ds */
+	 * forced to fix it.  --ds
+	 */
 	for (i = 0; i < insn->n; i++) {
-		setup_sampling(dev, chan, gain);
+		daqboard2000_setup_sampling(dev, chan, gain);
 		/* Enable reading from the scanlist FIFO */
-		writew(DAQBOARD2000_SeqStartScanList, dev->mmio + acqControl);
+		writew(DB2K_ACQ_CONTROL_SEQ_START_SCAN_LIST,
+		       dev->mmio + DB2K_REG_ACQ_CONTROL);
 
 		ret = comedi_timeout(dev, s, insn, daqboard2000_ai_status,
-				     DAQBOARD2000_AcqConfigPipeFull);
+				     DB2K_ACQ_STATUS_CONFIG_PIPE_FULL);
 		if (ret)
 			return ret;
 
-		writew(DAQBOARD2000_AdcPacerEnable, dev->mmio + acqControl);
+		writew(DB2K_ACQ_CONTROL_ADC_PACER_ENABLE,
+		       dev->mmio + DB2K_REG_ACQ_CONTROL);
 
 		ret = comedi_timeout(dev, s, insn, daqboard2000_ai_status,
-				     DAQBOARD2000_AcqLogicScanning);
+				     DB2K_ACQ_STATUS_LOGIC_SCANNING);
 		if (ret)
 			return ret;
 
-		ret = comedi_timeout(dev, s, insn, daqboard2000_ai_status,
-				     DAQBOARD2000_AcqResultsFIFOHasValidData);
+		ret =
+		comedi_timeout(dev, s, insn, daqboard2000_ai_status,
+			       DB2K_ACQ_STATUS_RESULTS_FIFO_HAS_DATA);
 		if (ret)
 			return ret;
 
-		data[i] = readw(dev->mmio + acqResultsFIFO);
-		writew(DAQBOARD2000_AdcPacerDisable, dev->mmio + acqControl);
-		writew(DAQBOARD2000_SeqStopScanList, dev->mmio + acqControl);
+		data[i] = readw(dev->mmio + DB2K_REG_ACQ_RESULTS_FIFO);
+		writew(DB2K_ACQ_CONTROL_ADC_PACER_DISABLE,
+		       dev->mmio + DB2K_REG_ACQ_CONTROL);
+		writew(DB2K_ACQ_CONTROL_SEQ_STOP_SCAN_LIST,
+		       dev->mmio + DB2K_REG_ACQ_CONTROL);
 	}
 
 	return i;
@@ -407,8 +401,8 @@ static int daqboard2000_ao_eoc(struct comedi_device *dev,
 	unsigned int chan = CR_CHAN(insn->chanspec);
 	unsigned int status;
 
-	status = readw(dev->mmio + dacControl);
-	if ((status & ((chan + 1) * 0x0010)) == 0)
+	status = readw(dev->mmio + DB2K_REG_DAC_STATUS);
+	if ((status & DB2K_DAC_STATUS_DAC_BUSY(chan)) == 0)
 		return 0;
 	return -EBUSY;
 }
@@ -425,7 +419,7 @@ static int daqboard2000_ao_insn_write(struct comedi_device *dev,
 		unsigned int val = data[i];
 		int ret;
 
-		writew(val, dev->mmio + dacSetting(chan));
+		writew(val, dev->mmio + DB2K_REG_DAC_SETTING(chan));
 
 		ret = comedi_timeout(dev, s, insn, daqboard2000_ao_eoc, 0);
 		if (ret)
@@ -437,39 +431,39 @@ static int daqboard2000_ao_insn_write(struct comedi_device *dev,
 	return insn->n;
 }
 
-static void daqboard2000_resetLocalBus(struct comedi_device *dev)
+static void daqboard2000_reset_local_bus(struct comedi_device *dev)
 {
 	struct daqboard2000_private *devpriv = dev->private;
 
-	writel(DAQBOARD2000_SECRLocalBusHi, devpriv->plx + 0x6c);
+	writel(DB2K_SECR_LOCAL_BUS_HI, devpriv->plx + 0x6c);
 	mdelay(10);
-	writel(DAQBOARD2000_SECRLocalBusLo, devpriv->plx + 0x6c);
+	writel(DB2K_SECR_LOCAL_BUS_LO, devpriv->plx + 0x6c);
 	mdelay(10);
 }
 
-static void daqboard2000_reloadPLX(struct comedi_device *dev)
+static void daqboard2000_reload_plx(struct comedi_device *dev)
 {
 	struct daqboard2000_private *devpriv = dev->private;
 
-	writel(DAQBOARD2000_SECRReloadLo, devpriv->plx + 0x6c);
+	writel(DB2K_SECR_RELOAD_LO, devpriv->plx + 0x6c);
 	mdelay(10);
-	writel(DAQBOARD2000_SECRReloadHi, devpriv->plx + 0x6c);
+	writel(DB2K_SECR_RELOAD_HI, devpriv->plx + 0x6c);
 	mdelay(10);
-	writel(DAQBOARD2000_SECRReloadLo, devpriv->plx + 0x6c);
+	writel(DB2K_SECR_RELOAD_LO, devpriv->plx + 0x6c);
 	mdelay(10);
 }
 
-static void daqboard2000_pulseProgPin(struct comedi_device *dev)
+static void daqboard2000_pulse_prog_pin(struct comedi_device *dev)
 {
 	struct daqboard2000_private *devpriv = dev->private;
 
-	writel(DAQBOARD2000_SECRProgPinHi, devpriv->plx + 0x6c);
+	writel(DB2K_SECR_PROG_PIN_HI, devpriv->plx + 0x6c);
 	mdelay(10);
-	writel(DAQBOARD2000_SECRProgPinLo, devpriv->plx + 0x6c);
+	writel(DB2K_SECR_PROG_PIN_LO, devpriv->plx + 0x6c);
 	mdelay(10);	/* Not in the original code, but I like symmetry... */
 }
 
-static int daqboard2000_pollCPLD(struct comedi_device *dev, int mask)
+static int daqboard2000_poll_cpld(struct comedi_device *dev, int mask)
 {
 	int result = 0;
 	int i;
@@ -482,17 +476,17 @@ static int daqboard2000_pollCPLD(struct comedi_device *dev, int mask)
 			result = 1;
 			break;
 		}
-		udelay(100);
+		usleep_range(100, 1000);
 	}
 	udelay(5);
 	return result;
 }
 
-static int daqboard2000_writeCPLD(struct comedi_device *dev, int data)
+static int daqboard2000_write_cpld(struct comedi_device *dev, int data)
 {
 	int result = 0;
 
-	udelay(10);
+	usleep_range(10, 20);
 	writew(data, dev->mmio + 0x1000);
 	if ((readw(dev->mmio + 0x1000) & DAQBOARD2000_CPLD_INIT) ==
 	    DAQBOARD2000_CPLD_INIT) {
@@ -501,9 +495,9 @@ static int daqboard2000_writeCPLD(struct comedi_device *dev, int data)
 	return result;
 }
 
-static int initialize_daqboard2000(struct comedi_device *dev,
-				   const u8 *cpld_array, size_t len,
-				   unsigned long context)
+static int daqboard2000_load_firmware(struct comedi_device *dev,
+				      const u8 *cpld_array, size_t len,
+				      unsigned long context)
 {
 	struct daqboard2000_private *devpriv = dev->private;
 	int result = -EIO;
@@ -518,10 +512,10 @@ static int initialize_daqboard2000(struct comedi_device *dev,
 		return -EIO;
 
 	for (retry = 0; retry < 3; retry++) {
-		daqboard2000_resetLocalBus(dev);
-		daqboard2000_reloadPLX(dev);
-		daqboard2000_pulseProgPin(dev);
-		if (daqboard2000_pollCPLD(dev, DAQBOARD2000_CPLD_INIT)) {
+		daqboard2000_reset_local_bus(dev);
+		daqboard2000_reload_plx(dev);
+		daqboard2000_pulse_prog_pin(dev);
+		if (daqboard2000_poll_cpld(dev, DAQBOARD2000_CPLD_INIT)) {
 			for (i = 0; i < len; i++) {
 				if (cpld_array[i] == 0xff &&
 				    cpld_array[i + 1] == 0x20)
@@ -530,12 +524,12 @@ static int initialize_daqboard2000(struct comedi_device *dev,
 			for (; i < len; i += 2) {
 				int data =
 				    (cpld_array[i] << 8) + cpld_array[i + 1];
-				if (!daqboard2000_writeCPLD(dev, data))
+				if (!daqboard2000_write_cpld(dev, data))
 					break;
 			}
 			if (i >= len) {
-				daqboard2000_resetLocalBus(dev);
-				daqboard2000_reloadPLX(dev);
+				daqboard2000_reset_local_bus(dev);
+				daqboard2000_reload_plx(dev);
 				result = 0;
 				break;
 			}
@@ -544,79 +538,83 @@ static int initialize_daqboard2000(struct comedi_device *dev,
 	return result;
 }
 
-static void daqboard2000_adcStopDmaTransfer(struct comedi_device *dev)
+static void daqboard2000_adc_stop_dma_transfer(struct comedi_device *dev)
 {
 }
 
-static void daqboard2000_adcDisarm(struct comedi_device *dev)
+static void daqboard2000_adc_disarm(struct comedi_device *dev)
 {
 	/* Disable hardware triggers */
 	udelay(2);
-	writew(DAQBOARD2000_TrigAnalog | DAQBOARD2000_TrigDisable,
-	       dev->mmio + trigControl);
+	writew(DB2K_TRIG_CONTROL_TYPE_ANALOG | DB2K_TRIG_CONTROL_DISABLE,
+	       dev->mmio + DB2K_REG_TRIG_CONTROL);
 	udelay(2);
-	writew(DAQBOARD2000_TrigTTL | DAQBOARD2000_TrigDisable,
-	       dev->mmio + trigControl);
+	writew(DB2K_TRIG_CONTROL_TYPE_TTL | DB2K_TRIG_CONTROL_DISABLE,
+	       dev->mmio + DB2K_REG_TRIG_CONTROL);
 
 	/* Stop the scan list FIFO from loading the configuration pipe */
 	udelay(2);
-	writew(DAQBOARD2000_SeqStopScanList, dev->mmio + acqControl);
+	writew(DB2K_ACQ_CONTROL_SEQ_STOP_SCAN_LIST,
+	       dev->mmio + DB2K_REG_ACQ_CONTROL);
 
 	/* Stop the pacer clock */
 	udelay(2);
-	writew(DAQBOARD2000_AdcPacerDisable, dev->mmio + acqControl);
+	writew(DB2K_ACQ_CONTROL_ADC_PACER_DISABLE,
+	       dev->mmio + DB2K_REG_ACQ_CONTROL);
 
 	/* Stop the input dma (abort channel 1) */
-	daqboard2000_adcStopDmaTransfer(dev);
+	daqboard2000_adc_stop_dma_transfer(dev);
 }
 
-static void daqboard2000_activateReferenceDacs(struct comedi_device *dev)
+static void daqboard2000_activate_reference_dacs(struct comedi_device *dev)
 {
 	unsigned int val;
 	int timeout;
 
 	/*  Set the + reference dac value in the FPGA */
-	writew(0x80 | DAQBOARD2000_PosRefDacSelect, dev->mmio + refDacs);
+	writew(DB2K_REF_DACS_SET | DB2K_REF_DACS_SELECT_POS_REF,
+	       dev->mmio + DB2K_REG_REF_DACS);
 	for (timeout = 0; timeout < 20; timeout++) {
-		val = readw(dev->mmio + dacControl);
-		if ((val & DAQBOARD2000_RefBusy) == 0)
+		val = readw(dev->mmio + DB2K_REG_DAC_STATUS);
+		if ((val & DB2K_DAC_STATUS_REF_BUSY) == 0)
 			break;
 		udelay(2);
 	}
 
 	/*  Set the - reference dac value in the FPGA */
-	writew(0x80 | DAQBOARD2000_NegRefDacSelect, dev->mmio + refDacs);
+	writew(DB2K_REF_DACS_SET | DB2K_REF_DACS_SELECT_NEG_REF,
+	       dev->mmio + DB2K_REG_REF_DACS);
 	for (timeout = 0; timeout < 20; timeout++) {
-		val = readw(dev->mmio + dacControl);
-		if ((val & DAQBOARD2000_RefBusy) == 0)
+		val = readw(dev->mmio + DB2K_REG_DAC_STATUS);
+		if ((val & DB2K_DAC_STATUS_REF_BUSY) == 0)
 			break;
 		udelay(2);
 	}
 }
 
-static void daqboard2000_initializeCtrs(struct comedi_device *dev)
+static void daqboard2000_initialize_ctrs(struct comedi_device *dev)
 {
 }
 
-static void daqboard2000_initializeTmrs(struct comedi_device *dev)
+static void daqboard2000_initialize_tmrs(struct comedi_device *dev)
 {
 }
 
-static void daqboard2000_dacDisarm(struct comedi_device *dev)
+static void daqboard2000_dac_disarm(struct comedi_device *dev)
 {
 }
 
-static void daqboard2000_initializeAdc(struct comedi_device *dev)
+static void daqboard2000_initialize_adc(struct comedi_device *dev)
 {
-	daqboard2000_adcDisarm(dev);
-	daqboard2000_activateReferenceDacs(dev);
-	daqboard2000_initializeCtrs(dev);
-	daqboard2000_initializeTmrs(dev);
+	daqboard2000_adc_disarm(dev);
+	daqboard2000_activate_reference_dacs(dev);
+	daqboard2000_initialize_ctrs(dev);
+	daqboard2000_initialize_tmrs(dev);
 }
 
-static void daqboard2000_initializeDac(struct comedi_device *dev)
+static void daqboard2000_initialize_dac(struct comedi_device *dev)
 {
-	daqboard2000_dacDisarm(dev);
+	daqboard2000_dac_disarm(dev);
 }
 
 static int daqboard2000_8255_cb(struct comedi_device *dev,
@@ -683,12 +681,12 @@ static int daqboard2000_auto_attach(struct comedi_device *dev,
 
 	result = comedi_load_firmware(dev, &comedi_to_pci_dev(dev)->dev,
 				      DAQBOARD2000_FIRMWARE,
-				      initialize_daqboard2000, 0);
+				      daqboard2000_load_firmware, 0);
 	if (result < 0)
 		return result;
 
-	daqboard2000_initializeAdc(dev);
-	daqboard2000_initializeDac(dev);
+	daqboard2000_initialize_adc(dev);
+	daqboard2000_initialize_dac(dev);
 
 	s = &dev->subdevices[0];
 	/* ai subdevice */
@@ -714,7 +712,7 @@ static int daqboard2000_auto_attach(struct comedi_device *dev,
 
 	s = &dev->subdevices[2];
 	return subdev_8255_init(dev, s, daqboard2000_8255_cb,
-				dioP2ExpansionIO8Bit);
+				DB2K_REG_DIO_P2_EXP_IO_8_BIT);
 }
 
 static void daqboard2000_detach(struct comedi_device *dev)
diff --git a/drivers/staging/comedi/drivers/das16.c b/drivers/staging/comedi/drivers/das16.c
index fd8e0b76f764..5d157951f63f 100644
--- a/drivers/staging/comedi/drivers/das16.c
+++ b/drivers/staging/comedi/drivers/das16.c
@@ -92,37 +92,37 @@
 #define DAS16_AO_LSB_REG(x)		((x) ? 0x06 : 0x04)
 #define DAS16_AO_MSB_REG(x)		((x) ? 0x07 : 0x05)
 #define DAS16_STATUS_REG		0x08
-#define DAS16_STATUS_BUSY		(1 << 7)
-#define DAS16_STATUS_UNIPOLAR		(1 << 6)
-#define DAS16_STATUS_MUXBIT		(1 << 5)
-#define DAS16_STATUS_INT		(1 << 4)
+#define DAS16_STATUS_BUSY		BIT(7)
+#define DAS16_STATUS_UNIPOLAR		BIT(6)
+#define DAS16_STATUS_MUXBIT		BIT(5)
+#define DAS16_STATUS_INT		BIT(4)
 #define DAS16_CTRL_REG			0x09
-#define DAS16_CTRL_INTE			(1 << 7)
+#define DAS16_CTRL_INTE			BIT(7)
 #define DAS16_CTRL_IRQ(x)		(((x) & 0x7) << 4)
-#define DAS16_CTRL_DMAE			(1 << 2)
+#define DAS16_CTRL_DMAE			BIT(2)
 #define DAS16_CTRL_PACING_MASK		(3 << 0)
 #define DAS16_CTRL_INT_PACER		(3 << 0)
 #define DAS16_CTRL_EXT_PACER		(2 << 0)
 #define DAS16_CTRL_SOFT_PACER		(0 << 0)
 #define DAS16_PACER_REG			0x0a
 #define DAS16_PACER_BURST_LEN(x)	(((x) & 0xf) << 4)
-#define DAS16_PACER_CTR0		(1 << 1)
-#define DAS16_PACER_TRIG0		(1 << 0)
+#define DAS16_PACER_CTR0		BIT(1)
+#define DAS16_PACER_TRIG0		BIT(0)
 #define DAS16_GAIN_REG			0x0b
 #define DAS16_TIMER_BASE_REG		0x0c	/* to 0x0f */
 
 #define DAS1600_CONV_REG		0x404
-#define DAS1600_CONV_DISABLE		(1 << 6)
+#define DAS1600_CONV_DISABLE		BIT(6)
 #define DAS1600_BURST_REG		0x405
-#define DAS1600_BURST_VAL		(1 << 6)
+#define DAS1600_BURST_VAL		BIT(6)
 #define DAS1600_ENABLE_REG		0x406
-#define DAS1600_ENABLE_VAL		(1 << 6)
+#define DAS1600_ENABLE_VAL		BIT(6)
 #define DAS1600_STATUS_REG		0x407
-#define DAS1600_STATUS_BME		(1 << 6)
-#define DAS1600_STATUS_ME		(1 << 5)
-#define DAS1600_STATUS_CD		(1 << 4)
-#define DAS1600_STATUS_WS		(1 << 1)
-#define DAS1600_STATUS_CLK_10MHZ	(1 << 0)
+#define DAS1600_STATUS_BME		BIT(6)
+#define DAS1600_STATUS_ME		BIT(5)
+#define DAS1600_STATUS_CD		BIT(4)
+#define DAS1600_STATUS_WS		BIT(1)
+#define DAS1600_STATUS_CLK_10MHZ	BIT(0)
 
 static const struct comedi_lrange range_das1x01_bip = {
 	4, {
@@ -198,6 +198,7 @@ enum {
 	das16_pg_1601,
 	das16_pg_1602,
 };
+
 static const int *const das16_gainlists[] = {
 	NULL,
 	das16jr_gainlist,
@@ -428,8 +429,10 @@ static const struct das16_board das16_boards[] = {
 	},
 };
 
-/* Period for timer interrupt in jiffies.  It's a function
- * to deal with possibility of dynamic HZ patches  */
+/*
+ * Period for timer interrupt in jiffies.  It's a function
+ * to deal with possibility of dynamic HZ patches
+ */
 static inline int timer_period(void)
 {
 	return HZ / 20;
diff --git a/drivers/staging/comedi/drivers/das16m1.c b/drivers/staging/comedi/drivers/das16m1.c
index 3a37373fbb6f..bb8d6ec0632e 100644
--- a/drivers/staging/comedi/drivers/das16m1.c
+++ b/drivers/staging/comedi/drivers/das16m1.c
@@ -1,56 +1,52 @@
 /*
-    comedi/drivers/das16m1.c
-    CIO-DAS16/M1 driver
-    Author: Frank Mori Hess, based on code from the das16
-      driver.
-    Copyright (C) 2001 Frank Mori Hess <fmhess@users.sourceforge.net>
-
-    COMEDI - Linux Control and Measurement Device Interface
-    Copyright (C) 2000 David A. Schleef <ds@schleef.org>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-*/
+ * Comedi driver for CIO-DAS16/M1
+ * Author: Frank Mori Hess, based on code from the das16 driver.
+ * Copyright (C) 2001 Frank Mori Hess <fmhess@users.sourceforge.net>
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 2000 David A. Schleef <ds@schleef.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
 /*
-Driver: das16m1
-Description: CIO-DAS16/M1
-Author: Frank Mori Hess <fmhess@users.sourceforge.net>
-Devices: [Measurement Computing] CIO-DAS16/M1 (das16m1)
-Status: works
-
-This driver supports a single board - the CIO-DAS16/M1.
-As far as I know, there are no other boards that have
-the same register layout.  Even the CIO-DAS16/M1/16 is
-significantly different.
-
-I was _barely_ able to reach the full 1 MHz capability
-of this board, using a hard real-time interrupt
-(set the TRIG_RT flag in your struct comedi_cmd and use
-rtlinux or RTAI).  The board can't do dma, so the bottleneck is
-pulling the data across the ISA bus.  I timed the interrupt
-handler, and it took my computer ~470 microseconds to pull 512
-samples from the board.  So at 1 Mhz sampling rate,
-expect your CPU to be spending almost all of its
-time in the interrupt handler.
-
-This board has some unusual restrictions for its channel/gain list.  If the
-list has 2 or more channels in it, then two conditions must be satisfied:
-(1) - even/odd channels must appear at even/odd indices in the list
-(2) - the list must have an even number of entries.
-
-Options:
-	[0] - base io address
-	[1] - irq (optional, but you probably want it)
-
-irq can be omitted, although the cmd interface will not work without it.
-*/
+ * Driver: das16m1
+ * Description: CIO-DAS16/M1
+ * Author: Frank Mori Hess <fmhess@users.sourceforge.net>
+ * Devices: [Measurement Computing] CIO-DAS16/M1 (das16m1)
+ * Status: works
+ *
+ * This driver supports a single board - the CIO-DAS16/M1. As far as I know,
+ * there are no other boards that have the same register layout. Even the
+ * CIO-DAS16/M1/16 is significantly different.
+ *
+ * I was _barely_ able to reach the full 1 MHz capability of this board, using
+ * a hard real-time interrupt (set the TRIG_RT flag in your struct comedi_cmd
+ * and use rtlinux or RTAI). The board can't do dma, so the bottleneck is
+ * pulling the data across the ISA bus. I timed the interrupt handler, and it
+ * took my computer ~470 microseconds to pull 512 samples from the board. So
+ * at 1 Mhz sampling rate, expect your CPU to be spending almost all of its
+ * time in the interrupt handler.
+ *
+ * This board has some unusual restrictions for its channel/gain list.  If the
+ * list has 2 or more channels in it, then two conditions must be satisfied:
+ * (1) - even/odd channels must appear at even/odd indices in the list
+ * (2) - the list must have an even number of entries.
+ *
+ * Configuration options:
+ *   [0] - base io address
+ *   [1] - irq (optional, but you probably want it)
+ *
+ * irq can be omitted, although the cmd interface will not work without it.
+ */
 
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -60,52 +56,38 @@ irq can be omitted, although the cmd interface will not work without it.
 #include "8255.h"
 #include "comedi_8254.h"
 
-#define DAS16M1_SIZE2 8
-
-#define FIFO_SIZE 1024		/*  1024 sample fifo */
-
 /*
-    CIO-DAS16_M1.pdf
-
-    "cio-das16/m1"
-
-  0		a/d bits 0-3, mux		start 12 bit
-  1		a/d bits 4-11		unused
-  2		status		control
-  3		di 4 bit		do 4 bit
-  4		unused			clear interrupt
-  5		interrupt, pacer
-  6		channel/gain queue address
-  7		channel/gain queue data
-  89ab		8254
-  cdef		8254
-  400		8255
-  404-407	8254
-
-*/
-
-#define DAS16M1_AI             0	/*  16-bit wide register */
-#define   AI_CHAN(x)             ((x) & 0xf)
-#define DAS16M1_CS             2
-#define   EXT_TRIG_BIT           0x1
-#define   OVRUN                  0x20
-#define   IRQDATA                0x80
-#define DAS16M1_DIO            3
-#define DAS16M1_CLEAR_INTR     4
-#define DAS16M1_INTR_CONTROL   5
-#define   EXT_PACER              0x2
-#define   INT_PACER              0x3
-#define   PACER_MASK             0x3
-#define   INTE                   0x80
-#define DAS16M1_QUEUE_ADDR     6
-#define DAS16M1_QUEUE_DATA     7
-#define   Q_CHAN(x)              ((x) & 0x7)
-#define   Q_RANGE(x)             (((x) & 0xf) << 4)
-#define   UNIPOLAR               0x40
-#define DAS16M1_8254_FIRST             0x8
-#define DAS16M1_8254_SECOND            0xc
-#define DAS16M1_82C55                  0x400
-#define DAS16M1_8254_THIRD             0x404
+ * Register map (dev->iobase)
+ */
+#define DAS16M1_AI_REG			0x00	/* 16-bit register */
+#define DAS16M1_AI_TO_CHAN(x)		(((x) >> 0) & 0xf)
+#define DAS16M1_AI_TO_SAMPLE(x)		(((x) >> 4) & 0xfff)
+#define DAS16M1_CS_REG			0x02
+#define DAS16M1_CS_EXT_TRIG		BIT(0)
+#define DAS16M1_CS_OVRUN		BIT(5)
+#define DAS16M1_CS_IRQDATA		BIT(7)
+#define DAS16M1_DI_REG			0x03
+#define DAS16M1_DO_REG			0x03
+#define DAS16M1_CLR_INTR_REG		0x04
+#define DAS16M1_INTR_CTRL_REG		0x05
+#define DAS16M1_INTR_CTRL_PACER(x)	(((x) & 0x3) << 0)
+#define DAS16M1_INTR_CTRL_PACER_EXT	DAS16M1_INTR_CTRL_PACER(2)
+#define DAS16M1_INTR_CTRL_PACER_INT	DAS16M1_INTR_CTRL_PACER(3)
+#define DAS16M1_INTR_CTRL_PACER_MASK	DAS16M1_INTR_CTRL_PACER(3)
+#define DAS16M1_INTR_CTRL_IRQ(x)	(((x) & 0x7) << 4)
+#define DAS16M1_INTR_CTRL_INTE		BIT(7)
+#define DAS16M1_Q_ADDR_REG		0x06
+#define DAS16M1_Q_REG			0x07
+#define DAS16M1_Q_CHAN(x)              (((x) & 0x7) << 0)
+#define DAS16M1_Q_RANGE(x)             (((x) & 0xf) << 4)
+#define DAS16M1_8254_IOBASE1		0x08
+#define DAS16M1_8254_IOBASE2		0x0c
+#define DAS16M1_8255_IOBASE		0x400
+#define DAS16M1_8254_IOBASE3		0x404
+
+#define DAS16M1_SIZE2			0x08
+
+#define DAS16M1_AI_FIFO_SZ		1024	/* # samples */
 
 static const struct comedi_lrange range_das16m1 = {
 	9, {
@@ -121,29 +103,46 @@ static const struct comedi_lrange range_das16m1 = {
 	}
 };
 
-struct das16m1_private_struct {
+struct das16m1_private {
 	struct comedi_8254 *counter;
-	unsigned int control_state;
-	unsigned int adc_count;	/*  number of samples completed */
-	/* initial value in lower half of hardware conversion counter,
-	 * needed to keep track of whether new count has been loaded into
-	 * counter yet (loaded by first sample conversion) */
+	unsigned int intr_ctrl;
+	unsigned int adc_count;
 	u16 initial_hw_count;
-	unsigned short ai_buffer[FIFO_SIZE];
+	unsigned short ai_buffer[DAS16M1_AI_FIFO_SZ];
 	unsigned long extra_iobase;
 };
 
-static inline unsigned short munge_sample(unsigned short data)
+static void das16m1_ai_set_queue(struct comedi_device *dev,
+				 unsigned int *chanspec, unsigned int len)
 {
-	return (data >> 4) & 0xfff;
+	unsigned int i;
+
+	for (i = 0; i < len; i++) {
+		unsigned int chan = CR_CHAN(chanspec[i]);
+		unsigned int range = CR_RANGE(chanspec[i]);
+
+		outb(i, dev->iobase + DAS16M1_Q_ADDR_REG);
+		outb(DAS16M1_Q_CHAN(chan) | DAS16M1_Q_RANGE(range),
+		     dev->iobase + DAS16M1_Q_REG);
+	}
 }
 
-static void munge_sample_array(unsigned short *array, unsigned int num_elements)
+static void das16m1_ai_munge(struct comedi_device *dev,
+			     struct comedi_subdevice *s,
+			     void *data, unsigned int num_bytes,
+			     unsigned int start_chan_index)
 {
+	unsigned short *array = data;
+	unsigned int nsamples = comedi_bytes_to_samples(s, num_bytes);
 	unsigned int i;
 
-	for (i = 0; i < num_elements; i++)
-		array[i] = munge_sample(array[i]);
+	/*
+	 * The fifo values have the channel number in the lower 4-bits and
+	 * the sample in the upper 12-bits. This just shifts the values
+	 * to remove the channel numbers.
+	 */
+	for (i = 0; i < nsamples; i++)
+		array[i] = DAS16M1_AI_TO_SAMPLE(array[i]);
 }
 
 static int das16m1_ai_check_chanlist(struct comedi_device *dev,
@@ -174,8 +173,9 @@ static int das16m1_ai_check_chanlist(struct comedi_device *dev,
 	return 0;
 }
 
-static int das16m1_cmd_test(struct comedi_device *dev,
-			    struct comedi_subdevice *s, struct comedi_cmd *cmd)
+static int das16m1_ai_cmdtest(struct comedi_device *dev,
+			      struct comedi_subdevice *s,
+			      struct comedi_cmd *cmd)
 {
 	int err = 0;
 
@@ -245,17 +245,13 @@ static int das16m1_cmd_test(struct comedi_device *dev,
 	return 0;
 }
 
-static int das16m1_cmd_exec(struct comedi_device *dev,
-			    struct comedi_subdevice *s)
+static int das16m1_ai_cmd(struct comedi_device *dev,
+			  struct comedi_subdevice *s)
 {
-	struct das16m1_private_struct *devpriv = dev->private;
+	struct das16m1_private *devpriv = dev->private;
 	struct comedi_async *async = s->async;
 	struct comedi_cmd *cmd = &async->cmd;
-	unsigned int byte, i;
-
-	/* disable interrupts and internal pacer */
-	devpriv->control_state &= ~INTE & ~PACER_MASK;
-	outb(devpriv->control_state, dev->iobase + DAS16M1_INTR_CONTROL);
+	unsigned int byte;
 
 	/*  set software count */
 	devpriv->adc_count = 0;
@@ -274,48 +270,47 @@ static int das16m1_cmd_exec(struct comedi_device *dev,
 	 */
 	devpriv->initial_hw_count = comedi_8254_read(devpriv->counter, 1);
 
-	/* setup channel/gain queue */
-	for (i = 0; i < cmd->chanlist_len; i++) {
-		outb(i, dev->iobase + DAS16M1_QUEUE_ADDR);
-		byte =
-		    Q_CHAN(CR_CHAN(cmd->chanlist[i])) |
-		    Q_RANGE(CR_RANGE(cmd->chanlist[i]));
-		outb(byte, dev->iobase + DAS16M1_QUEUE_DATA);
-	}
+	das16m1_ai_set_queue(dev, cmd->chanlist, cmd->chanlist_len);
 
 	/* enable interrupts and set internal pacer counter mode and counts */
-	devpriv->control_state &= ~PACER_MASK;
+	devpriv->intr_ctrl &= ~DAS16M1_INTR_CTRL_PACER_MASK;
 	if (cmd->convert_src == TRIG_TIMER) {
 		comedi_8254_update_divisors(dev->pacer);
 		comedi_8254_pacer_enable(dev->pacer, 1, 2, true);
-		devpriv->control_state |= INT_PACER;
+		devpriv->intr_ctrl |= DAS16M1_INTR_CTRL_PACER_INT;
 	} else {	/* TRIG_EXT */
-		devpriv->control_state |= EXT_PACER;
+		devpriv->intr_ctrl |= DAS16M1_INTR_CTRL_PACER_EXT;
 	}
 
 	/*  set control & status register */
 	byte = 0;
-	/* if we are using external start trigger (also board dislikes having
-	 * both start and conversion triggers external simultaneously) */
+	/*
+	 * If we are using external start trigger (also board dislikes having
+	 * both start and conversion triggers external simultaneously).
+	 */
 	if (cmd->start_src == TRIG_EXT && cmd->convert_src != TRIG_EXT)
-		byte |= EXT_TRIG_BIT;
+		byte |= DAS16M1_CS_EXT_TRIG;
 
-	outb(byte, dev->iobase + DAS16M1_CS);
-	/* clear interrupt bit */
-	outb(0, dev->iobase + DAS16M1_CLEAR_INTR);
+	outb(byte, dev->iobase + DAS16M1_CS_REG);
+
+	/* clear interrupt */
+	outb(0, dev->iobase + DAS16M1_CLR_INTR_REG);
 
-	devpriv->control_state |= INTE;
-	outb(devpriv->control_state, dev->iobase + DAS16M1_INTR_CONTROL);
+	devpriv->intr_ctrl |= DAS16M1_INTR_CTRL_INTE;
+	outb(devpriv->intr_ctrl, dev->iobase + DAS16M1_INTR_CTRL_REG);
 
 	return 0;
 }
 
-static int das16m1_cancel(struct comedi_device *dev, struct comedi_subdevice *s)
+static int das16m1_ai_cancel(struct comedi_device *dev,
+			     struct comedi_subdevice *s)
 {
-	struct das16m1_private_struct *devpriv = dev->private;
+	struct das16m1_private *devpriv = dev->private;
 
-	devpriv->control_state &= ~INTE & ~PACER_MASK;
-	outb(devpriv->control_state, dev->iobase + DAS16M1_INTR_CONTROL);
+	/* disable interrupts and pacer */
+	devpriv->intr_ctrl &= ~(DAS16M1_INTR_CTRL_INTE |
+				DAS16M1_INTR_CTRL_PACER_MASK);
+	outb(devpriv->intr_ctrl, dev->iobase + DAS16M1_INTR_CTRL_REG);
 
 	return 0;
 }
@@ -327,67 +322,58 @@ static int das16m1_ai_eoc(struct comedi_device *dev,
 {
 	unsigned int status;
 
-	status = inb(dev->iobase + DAS16M1_CS);
-	if (status & IRQDATA)
+	status = inb(dev->iobase + DAS16M1_CS_REG);
+	if (status & DAS16M1_CS_IRQDATA)
 		return 0;
 	return -EBUSY;
 }
 
-static int das16m1_ai_rinsn(struct comedi_device *dev,
-			    struct comedi_subdevice *s,
-			    struct comedi_insn *insn, unsigned int *data)
+static int das16m1_ai_insn_read(struct comedi_device *dev,
+				struct comedi_subdevice *s,
+				struct comedi_insn *insn,
+				unsigned int *data)
 {
-	struct das16m1_private_struct *devpriv = dev->private;
 	int ret;
-	int n;
-	int byte;
-
-	/* disable interrupts and internal pacer */
-	devpriv->control_state &= ~INTE & ~PACER_MASK;
-	outb(devpriv->control_state, dev->iobase + DAS16M1_INTR_CONTROL);
-
-	/* setup channel/gain queue */
-	outb(0, dev->iobase + DAS16M1_QUEUE_ADDR);
-	byte =
-	    Q_CHAN(CR_CHAN(insn->chanspec)) | Q_RANGE(CR_RANGE(insn->chanspec));
-	outb(byte, dev->iobase + DAS16M1_QUEUE_DATA);
-
-	for (n = 0; n < insn->n; n++) {
-		/* clear IRQDATA bit */
-		outb(0, dev->iobase + DAS16M1_CLEAR_INTR);
+	int i;
+
+	das16m1_ai_set_queue(dev, &insn->chanspec, 1);
+
+	for (i = 0; i < insn->n; i++) {
+		unsigned short val;
+
+		/* clear interrupt */
+		outb(0, dev->iobase + DAS16M1_CLR_INTR_REG);
 		/* trigger conversion */
-		outb(0, dev->iobase);
+		outb(0, dev->iobase + DAS16M1_AI_REG);
 
 		ret = comedi_timeout(dev, s, insn, das16m1_ai_eoc, 0);
 		if (ret)
 			return ret;
 
-		data[n] = munge_sample(inw(dev->iobase));
+		val = inw(dev->iobase + DAS16M1_AI_REG);
+		data[i] = DAS16M1_AI_TO_SAMPLE(val);
 	}
 
-	return n;
+	return insn->n;
 }
 
-static int das16m1_di_rbits(struct comedi_device *dev,
-			    struct comedi_subdevice *s,
-			    struct comedi_insn *insn, unsigned int *data)
+static int das16m1_di_insn_bits(struct comedi_device *dev,
+				struct comedi_subdevice *s,
+				struct comedi_insn *insn,
+				unsigned int *data)
 {
-	unsigned int bits;
-
-	bits = inb(dev->iobase + DAS16M1_DIO) & 0xf;
-	data[1] = bits;
-	data[0] = 0;
+	data[1] = inb(dev->iobase + DAS16M1_DI_REG) & 0xf;
 
 	return insn->n;
 }
 
-static int das16m1_do_wbits(struct comedi_device *dev,
-			    struct comedi_subdevice *s,
-			    struct comedi_insn *insn,
-			    unsigned int *data)
+static int das16m1_do_insn_bits(struct comedi_device *dev,
+				struct comedi_subdevice *s,
+				struct comedi_insn *insn,
+				unsigned int *data)
 {
 	if (comedi_dio_update_state(s, data))
-		outb(s->state, dev->iobase + DAS16M1_DIO);
+		outb(s->state, dev->iobase + DAS16M1_DO_REG);
 
 	data[1] = s->state;
 
@@ -396,33 +382,33 @@ static int das16m1_do_wbits(struct comedi_device *dev,
 
 static void das16m1_handler(struct comedi_device *dev, unsigned int status)
 {
-	struct das16m1_private_struct *devpriv = dev->private;
-	struct comedi_subdevice *s;
-	struct comedi_async *async;
-	struct comedi_cmd *cmd;
+	struct das16m1_private *devpriv = dev->private;
+	struct comedi_subdevice *s = dev->read_subdev;
+	struct comedi_async *async = s->async;
+	struct comedi_cmd *cmd = &async->cmd;
 	u16 num_samples;
 	u16 hw_counter;
 
-	s = dev->read_subdev;
-	async = s->async;
-	cmd = &async->cmd;
-
 	/* figure out how many samples are in fifo */
 	hw_counter = comedi_8254_read(devpriv->counter, 1);
-	/* make sure hardware counter reading is not bogus due to initial value
-	 * not having been loaded yet */
+	/*
+	 * Make sure hardware counter reading is not bogus due to initial
+	 * value not having been loaded yet.
+	 */
 	if (devpriv->adc_count == 0 &&
 	    hw_counter == devpriv->initial_hw_count) {
 		num_samples = 0;
 	} else {
-		/* The calculation of num_samples looks odd, but it uses the
+		/*
+		 * The calculation of num_samples looks odd, but it uses the
 		 * following facts. 16 bit hardware counter is initialized with
 		 * value of zero (which really means 0x1000).  The counter
 		 * decrements by one on each conversion (when the counter
 		 * decrements from zero it goes to 0xffff).  num_samples is a
 		 * 16 bit variable, so it will roll over in a similar fashion
 		 * to the hardware counter.  Work it out, and this is what you
-		 * get. */
+		 * get.
+		 */
 		num_samples = -hw_counter - devpriv->adc_count;
 	}
 	/*  check if we only need some of the points */
@@ -431,10 +417,9 @@ static void das16m1_handler(struct comedi_device *dev, unsigned int status)
 			num_samples = cmd->stop_arg * cmd->chanlist_len;
 	}
 	/*  make sure we dont try to get too many points if fifo has overrun */
-	if (num_samples > FIFO_SIZE)
-		num_samples = FIFO_SIZE;
+	if (num_samples > DAS16M1_AI_FIFO_SZ)
+		num_samples = DAS16M1_AI_FIFO_SZ;
 	insw(dev->iobase, devpriv->ai_buffer, num_samples);
-	munge_sample_array(devpriv->ai_buffer, num_samples);
 	comedi_buf_write_samples(s, devpriv->ai_buffer, num_samples);
 	devpriv->adc_count += num_samples;
 
@@ -445,9 +430,11 @@ static void das16m1_handler(struct comedi_device *dev, unsigned int status)
 		}
 	}
 
-	/* this probably won't catch overruns since the card doesn't generate
-	 * overrun interrupts, but we might as well try */
-	if (status & OVRUN) {
+	/*
+	 * This probably won't catch overruns since the card doesn't generate
+	 * overrun interrupts, but we might as well try.
+	 */
+	if (status & DAS16M1_CS_OVRUN) {
 		async->events |= COMEDI_CB_ERROR;
 		dev_err(dev->class_dev, "fifo overflow\n");
 	}
@@ -455,14 +442,15 @@ static void das16m1_handler(struct comedi_device *dev, unsigned int status)
 	comedi_handle_events(dev, s);
 }
 
-static int das16m1_poll(struct comedi_device *dev, struct comedi_subdevice *s)
+static int das16m1_ai_poll(struct comedi_device *dev,
+			   struct comedi_subdevice *s)
 {
 	unsigned long flags;
 	unsigned int status;
 
 	/*  prevent race with interrupt handler */
 	spin_lock_irqsave(&dev->spinlock, flags);
-	status = inb(dev->iobase + DAS16M1_CS);
+	status = inb(dev->iobase + DAS16M1_CS_REG);
 	das16m1_handler(dev, status);
 	spin_unlock_irqrestore(&dev->spinlock, flags);
 
@@ -481,9 +469,9 @@ static irqreturn_t das16m1_interrupt(int irq, void *d)
 	/*  prevent race with comedi_poll() */
 	spin_lock(&dev->spinlock);
 
-	status = inb(dev->iobase + DAS16M1_CS);
+	status = inb(dev->iobase + DAS16M1_CS_REG);
 
-	if ((status & (IRQDATA | OVRUN)) == 0) {
+	if ((status & (DAS16M1_CS_IRQDATA | DAS16M1_CS_OVRUN)) == 0) {
 		dev_err(dev->class_dev, "spurious interrupt\n");
 		spin_unlock(&dev->spinlock);
 		return IRQ_NONE;
@@ -492,7 +480,7 @@ static irqreturn_t das16m1_interrupt(int irq, void *d)
 	das16m1_handler(dev, status);
 
 	/* clear interrupt */
-	outb(0, dev->iobase + DAS16M1_CLEAR_INTR);
+	outb(0, dev->iobase + DAS16M1_CLR_INTR_REG);
 
 	spin_unlock(&dev->spinlock);
 	return IRQ_HANDLED;
@@ -522,15 +510,10 @@ static int das16m1_irq_bits(unsigned int irq)
 	}
 }
 
-/*
- * Options list:
- *   0  I/O base
- *   1  IRQ
- */
 static int das16m1_attach(struct comedi_device *dev,
 			  struct comedi_devconfig *it)
 {
-	struct das16m1_private_struct *devpriv;
+	struct das16m1_private *devpriv;
 	struct comedi_subdevice *s;
 	int ret;
 
@@ -541,12 +524,12 @@ static int das16m1_attach(struct comedi_device *dev,
 	ret = comedi_request_region(dev, it->options[0], 0x10);
 	if (ret)
 		return ret;
-	/* Request an additional region for the 8255 */
-	ret = __comedi_request_region(dev, dev->iobase + DAS16M1_82C55,
+	/* Request an additional region for the 8255 and 3rd 8254 */
+	ret = __comedi_request_region(dev, dev->iobase + DAS16M1_8255_IOBASE,
 				      DAS16M1_SIZE2);
 	if (ret)
 		return ret;
-	devpriv->extra_iobase = dev->iobase + DAS16M1_82C55;
+	devpriv->extra_iobase = dev->iobase + DAS16M1_8255_IOBASE;
 
 	/* only irqs 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, and 15 are valid */
 	if ((1 << it->options[1]) & 0xdcfc) {
@@ -556,12 +539,12 @@ static int das16m1_attach(struct comedi_device *dev,
 			dev->irq = it->options[1];
 	}
 
-	dev->pacer = comedi_8254_init(dev->iobase + DAS16M1_8254_SECOND,
+	dev->pacer = comedi_8254_init(dev->iobase + DAS16M1_8254_IOBASE2,
 				      I8254_OSC_BASE_10MHZ, I8254_IO8, 0);
 	if (!dev->pacer)
 		return -ENOMEM;
 
-	devpriv->counter = comedi_8254_init(dev->iobase + DAS16M1_8254_FIRST,
+	devpriv->counter = comedi_8254_init(dev->iobase + DAS16M1_8254_IOBASE1,
 					    0, I8254_IO8, 0);
 	if (!devpriv->counter)
 		return -ENOMEM;
@@ -570,61 +553,62 @@ static int das16m1_attach(struct comedi_device *dev,
 	if (ret)
 		return ret;
 
+	/* Analog Input subdevice */
 	s = &dev->subdevices[0];
-	/* ai */
-	s->type = COMEDI_SUBD_AI;
-	s->subdev_flags = SDF_READABLE | SDF_DIFF;
-	s->n_chan = 8;
-	s->maxdata = (1 << 12) - 1;
-	s->range_table = &range_das16m1;
-	s->insn_read = das16m1_ai_rinsn;
+	s->type		= COMEDI_SUBD_AI;
+	s->subdev_flags	= SDF_READABLE | SDF_DIFF;
+	s->n_chan	= 8;
+	s->maxdata	= 0x0fff;
+	s->range_table	= &range_das16m1;
+	s->insn_read	= das16m1_ai_insn_read;
 	if (dev->irq) {
 		dev->read_subdev = s;
-		s->subdev_flags |= SDF_CMD_READ;
-		s->len_chanlist = 256;
-		s->do_cmdtest = das16m1_cmd_test;
-		s->do_cmd = das16m1_cmd_exec;
-		s->cancel = das16m1_cancel;
-		s->poll = das16m1_poll;
+		s->subdev_flags	|= SDF_CMD_READ;
+		s->len_chanlist	= 256;
+		s->do_cmdtest	= das16m1_ai_cmdtest;
+		s->do_cmd	= das16m1_ai_cmd;
+		s->cancel	= das16m1_ai_cancel;
+		s->poll		= das16m1_ai_poll;
+		s->munge	= das16m1_ai_munge;
 	}
 
+	/* Digital Input subdevice */
 	s = &dev->subdevices[1];
-	/* di */
-	s->type = COMEDI_SUBD_DI;
-	s->subdev_flags = SDF_READABLE;
-	s->n_chan = 4;
-	s->maxdata = 1;
-	s->range_table = &range_digital;
-	s->insn_bits = das16m1_di_rbits;
-
+	s->type		= COMEDI_SUBD_DI;
+	s->subdev_flags	= SDF_READABLE;
+	s->n_chan	= 4;
+	s->maxdata	= 1;
+	s->range_table	= &range_digital;
+	s->insn_bits	= das16m1_di_insn_bits;
+
+	/* Digital Output subdevice */
 	s = &dev->subdevices[2];
-	/* do */
-	s->type = COMEDI_SUBD_DO;
-	s->subdev_flags = SDF_WRITABLE;
-	s->n_chan = 4;
-	s->maxdata = 1;
-	s->range_table = &range_digital;
-	s->insn_bits = das16m1_do_wbits;
-
+	s->type		= COMEDI_SUBD_DO;
+	s->subdev_flags	= SDF_WRITABLE;
+	s->n_chan	= 4;
+	s->maxdata	= 1;
+	s->range_table	= &range_digital;
+	s->insn_bits	= das16m1_do_insn_bits;
+
+	/* Digital I/O subdevice (8255) */
 	s = &dev->subdevices[3];
-	/* 8255 */
-	ret = subdev_8255_init(dev, s, NULL, DAS16M1_82C55);
+	ret = subdev_8255_init(dev, s, NULL, DAS16M1_8255_IOBASE);
 	if (ret)
 		return ret;
 
 	/*  initialize digital output lines */
-	outb(0, dev->iobase + DAS16M1_DIO);
+	outb(0, dev->iobase + DAS16M1_DO_REG);
 
 	/* set the interrupt level */
-	devpriv->control_state = das16m1_irq_bits(dev->irq) << 4;
-	outb(devpriv->control_state, dev->iobase + DAS16M1_INTR_CONTROL);
+	devpriv->intr_ctrl = DAS16M1_INTR_CTRL_IRQ(das16m1_irq_bits(dev->irq));
+	outb(devpriv->intr_ctrl, dev->iobase + DAS16M1_INTR_CTRL_REG);
 
 	return 0;
 }
 
 static void das16m1_detach(struct comedi_device *dev)
 {
-	struct das16m1_private_struct *devpriv = dev->private;
+	struct das16m1_private *devpriv = dev->private;
 
 	if (devpriv) {
 		if (devpriv->extra_iobase)
@@ -643,5 +627,5 @@ static struct comedi_driver das16m1_driver = {
 module_comedi_driver(das16m1_driver);
 
 MODULE_AUTHOR("Comedi http://www.comedi.org");
-MODULE_DESCRIPTION("Comedi low-level driver");
+MODULE_DESCRIPTION("Comedi driver for CIO-DAS16/M1 ISA cards");
 MODULE_LICENSE("GPL");
diff --git a/drivers/staging/comedi/drivers/das6402.c b/drivers/staging/comedi/drivers/das6402.c
index 1701294b79cd..0fdf5e02182f 100644
--- a/drivers/staging/comedi/drivers/das6402.c
+++ b/drivers/staging/comedi/drivers/das6402.c
@@ -50,48 +50,50 @@
 #define DAS6402_AO_LSB_REG(x)		(0x04 + ((x) * 2))
 #define DAS6402_AO_MSB_REG(x)		(0x05 + ((x) * 2))
 #define DAS6402_STATUS_REG		0x08
-#define DAS6402_STATUS_FFNE		(1 << 0)
-#define DAS6402_STATUS_FHALF		(1 << 1)
-#define DAS6402_STATUS_FFULL		(1 << 2)
-#define DAS6402_STATUS_XINT		(1 << 3)
-#define DAS6402_STATUS_INT		(1 << 4)
-#define DAS6402_STATUS_XTRIG		(1 << 5)
-#define DAS6402_STATUS_INDGT		(1 << 6)
-#define DAS6402_STATUS_10MHZ		(1 << 7)
-#define DAS6402_STATUS_W_CLRINT		(1 << 0)
-#define DAS6402_STATUS_W_CLRXTR		(1 << 1)
-#define DAS6402_STATUS_W_CLRXIN		(1 << 2)
-#define DAS6402_STATUS_W_EXTEND		(1 << 4)
-#define DAS6402_STATUS_W_ARMED		(1 << 5)
-#define DAS6402_STATUS_W_POSTMODE	(1 << 6)
-#define DAS6402_STATUS_W_10MHZ		(1 << 7)
+#define DAS6402_STATUS_FFNE		BIT(0)
+#define DAS6402_STATUS_FHALF		BIT(1)
+#define DAS6402_STATUS_FFULL		BIT(2)
+#define DAS6402_STATUS_XINT		BIT(3)
+#define DAS6402_STATUS_INT		BIT(4)
+#define DAS6402_STATUS_XTRIG		BIT(5)
+#define DAS6402_STATUS_INDGT		BIT(6)
+#define DAS6402_STATUS_10MHZ		BIT(7)
+#define DAS6402_STATUS_W_CLRINT		BIT(0)
+#define DAS6402_STATUS_W_CLRXTR		BIT(1)
+#define DAS6402_STATUS_W_CLRXIN		BIT(2)
+#define DAS6402_STATUS_W_EXTEND		BIT(4)
+#define DAS6402_STATUS_W_ARMED		BIT(5)
+#define DAS6402_STATUS_W_POSTMODE	BIT(6)
+#define DAS6402_STATUS_W_10MHZ		BIT(7)
 #define DAS6402_CTRL_REG		0x09
-#define DAS6402_CTRL_SOFT_TRIG		(0 << 0)
-#define DAS6402_CTRL_EXT_FALL_TRIG	(1 << 0)
-#define DAS6402_CTRL_EXT_RISE_TRIG	(2 << 0)
-#define DAS6402_CTRL_PACER_TRIG		(3 << 0)
-#define DAS6402_CTRL_BURSTEN		(1 << 2)
-#define DAS6402_CTRL_XINTE		(1 << 3)
+#define DAS6402_CTRL_TRIG(x)		((x) << 0)
+#define DAS6402_CTRL_SOFT_TRIG		DAS6402_CTRL_TRIG(0)
+#define DAS6402_CTRL_EXT_FALL_TRIG	DAS6402_CTRL_TRIG(1)
+#define DAS6402_CTRL_EXT_RISE_TRIG	DAS6402_CTRL_TRIG(2)
+#define DAS6402_CTRL_PACER_TRIG		DAS6402_CTRL_TRIG(3)
+#define DAS6402_CTRL_BURSTEN		BIT(2)
+#define DAS6402_CTRL_XINTE		BIT(3)
 #define DAS6402_CTRL_IRQ(x)		((x) << 4)
-#define DAS6402_CTRL_INTE		(1 << 7)
+#define DAS6402_CTRL_INTE		BIT(7)
 #define DAS6402_TRIG_REG		0x0a
-#define DAS6402_TRIG_TGEN		(1 << 0)
-#define DAS6402_TRIG_TGSEL		(1 << 1)
-#define DAS6402_TRIG_TGPOL		(1 << 2)
-#define DAS6402_TRIG_PRETRIG		(1 << 3)
+#define DAS6402_TRIG_TGEN		BIT(0)
+#define DAS6402_TRIG_TGSEL		BIT(1)
+#define DAS6402_TRIG_TGPOL		BIT(2)
+#define DAS6402_TRIG_PRETRIG		BIT(3)
 #define DAS6402_AO_RANGE(_chan, _range)	((_range) << ((_chan) ? 6 : 4))
 #define DAS6402_AO_RANGE_MASK(_chan)	(3 << ((_chan) ? 6 : 4))
 #define DAS6402_MODE_REG		0x0b
-#define DAS6402_MODE_RANGE(x)		((x) << 0)
-#define DAS6402_MODE_POLLED		(0 << 2)
-#define DAS6402_MODE_FIFONEPTY		(1 << 2)
-#define DAS6402_MODE_FIFOHFULL		(2 << 2)
-#define DAS6402_MODE_EOB		(3 << 2)
-#define DAS6402_MODE_ENHANCED		(1 << 4)
-#define DAS6402_MODE_SE			(1 << 5)
-#define DAS6402_MODE_UNI		(1 << 6)
-#define DAS6402_MODE_DMA1		(0 << 7)
-#define DAS6402_MODE_DMA3		(1 << 7)
+#define DAS6402_MODE_RANGE(x)		((x) << 2)
+#define DAS6402_MODE_POLLED		DAS6402_MODE_RANGE(0)
+#define DAS6402_MODE_FIFONEPTY		DAS6402_MODE_RANGE(1)
+#define DAS6402_MODE_FIFOHFULL		DAS6402_MODE_RANGE(2)
+#define DAS6402_MODE_EOB		DAS6402_MODE_RANGE(3)
+#define DAS6402_MODE_ENHANCED		BIT(4)
+#define DAS6402_MODE_SE			BIT(5)
+#define DAS6402_MODE_UNI		BIT(6)
+#define DAS6402_MODE_DMA(x)		((x) << 7)
+#define DAS6402_MODE_DMA1		DAS6402_MODE_DMA(0)
+#define DAS6402_MODE_DMA3		DAS6402_MODE_DMA(1)
 #define DAS6402_TIMER_BASE		0x0c
 
 static const struct comedi_lrange das6402_ai_ranges = {
diff --git a/drivers/staging/comedi/drivers/das800.c b/drivers/staging/comedi/drivers/das800.c
index b02f12201cf7..fd4cb4911671 100644
--- a/drivers/staging/comedi/drivers/das800.c
+++ b/drivers/staging/comedi/drivers/das800.c
@@ -1,56 +1,56 @@
 /*
-    comedi/drivers/das800.c
-    Driver for Keitley das800 series boards and compatibles
-    Copyright (C) 2000 Frank Mori Hess <fmhess@users.sourceforge.net>
-
-    COMEDI - Linux Control and Measurement Device Interface
-    Copyright (C) 2000 David A. Schleef <ds@schleef.org>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-*/
+ * comedi/drivers/das800.c
+ * Driver for Keitley das800 series boards and compatibles
+ * Copyright (C) 2000 Frank Mori Hess <fmhess@users.sourceforge.net>
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 2000 David A. Schleef <ds@schleef.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
 /*
-Driver: das800
-Description: Keithley Metrabyte DAS800 (& compatibles)
-Author: Frank Mori Hess <fmhess@users.sourceforge.net>
-Devices: [Keithley Metrabyte] DAS-800 (das-800), DAS-801 (das-801),
-  DAS-802 (das-802),
-  [Measurement Computing] CIO-DAS800 (cio-das800),
-  CIO-DAS801 (cio-das801), CIO-DAS802 (cio-das802),
-  CIO-DAS802/16 (cio-das802/16)
-Status: works, cio-das802/16 untested - email me if you have tested it
-
-Configuration options:
-  [0] - I/O port base address
-  [1] - IRQ (optional, required for timed or externally triggered conversions)
-
-Notes:
-	IRQ can be omitted, although the cmd interface will not work without it.
-
-	All entries in the channel/gain list must use the same gain and be
-	consecutive channels counting upwards in channel number (these are
-	hardware limitations.)
-
-	I've never tested the gain setting stuff since I only have a
-	DAS-800 board with fixed gain.
-
-	The cio-das802/16 does not have a fifo-empty status bit!  Therefore
-	only fifo-half-full transfers are possible with this card.
-
-cmd triggers supported:
-	start_src:      TRIG_NOW | TRIG_EXT
-	scan_begin_src: TRIG_FOLLOW
-	scan_end_src:   TRIG_COUNT
-	convert_src:    TRIG_TIMER | TRIG_EXT
-	stop_src:       TRIG_NONE | TRIG_COUNT
-*/
+ * Driver: das800
+ * Description: Keithley Metrabyte DAS800 (& compatibles)
+ * Author: Frank Mori Hess <fmhess@users.sourceforge.net>
+ * Devices: [Keithley Metrabyte] DAS-800 (das-800), DAS-801 (das-801),
+ * DAS-802 (das-802),
+ * [Measurement Computing] CIO-DAS800 (cio-das800),
+ * CIO-DAS801 (cio-das801), CIO-DAS802 (cio-das802),
+ * CIO-DAS802/16 (cio-das802/16)
+ * Status: works, cio-das802/16 untested - email me if you have tested it
+ *
+ * Configuration options:
+ * [0] - I/O port base address
+ * [1] - IRQ (optional, required for timed or externally triggered conversions)
+ *
+ * Notes:
+ *	IRQ can be omitted, although the cmd interface will not work without it.
+ *
+ *	All entries in the channel/gain list must use the same gain and be
+ *	consecutive channels counting upwards in channel number (these are
+ *	hardware limitations.)
+ *
+ *	I've never tested the gain setting stuff since I only have a
+ *	DAS-800 board with fixed gain.
+ *
+ *	The cio-das802/16 does not have a fifo-empty status bit!  Therefore
+ *	only fifo-half-full transfers are possible with this card.
+ *
+ * cmd triggers supported:
+ *	start_src:      TRIG_NOW | TRIG_EXT
+ *	scan_begin_src: TRIG_FOLLOW
+ *	scan_end_src:   TRIG_COUNT
+ *	convert_src:    TRIG_TIMER | TRIG_EXT
+ *	stop_src:       TRIG_NONE | TRIG_COUNT
+ */
 
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -218,7 +218,7 @@ struct das800_private {
 };
 
 static void das800_ind_write(struct comedi_device *dev,
-			     unsigned val, unsigned reg)
+			     unsigned int val, unsigned int reg)
 {
 	/*
 	 * Select dev->iobase + 2 to be desired register
@@ -228,7 +228,7 @@ static void das800_ind_write(struct comedi_device *dev,
 	outb(val, dev->iobase + 2);
 }
 
-static unsigned das800_ind_read(struct comedi_device *dev, unsigned reg)
+static unsigned int das800_ind_read(struct comedi_device *dev, unsigned int reg)
 {
 	/*
 	 * Select dev->iobase + 7 to be desired register
diff --git a/drivers/staging/comedi/drivers/dmm32at.c b/drivers/staging/comedi/drivers/dmm32at.c
index 958c0d4aae5c..b8606ded0623 100644
--- a/drivers/staging/comedi/drivers/dmm32at.c
+++ b/drivers/staging/comedi/drivers/dmm32at.c
@@ -46,73 +46,75 @@
 #define DMM32AT_AI_START_CONV_REG	0x00
 #define DMM32AT_AI_LSB_REG		0x00
 #define DMM32AT_AUX_DOUT_REG		0x01
-#define DMM32AT_AUX_DOUT2		(1 << 2)  /* J3.42 - OUT2 (OUT2EN) */
-#define DMM32AT_AUX_DOUT1		(1 << 1)  /* J3.43 */
-#define DMM32AT_AUX_DOUT0		(1 << 0)  /* J3.44 - OUT0 (OUT0EN) */
+#define DMM32AT_AUX_DOUT2		BIT(2)  /* J3.42 - OUT2 (OUT2EN) */
+#define DMM32AT_AUX_DOUT1		BIT(1)  /* J3.43 */
+#define DMM32AT_AUX_DOUT0		BIT(0)  /* J3.44 - OUT0 (OUT0EN) */
 #define DMM32AT_AI_MSB_REG		0x01
 #define DMM32AT_AI_LO_CHAN_REG		0x02
 #define DMM32AT_AI_HI_CHAN_REG		0x03
 #define DMM32AT_AUX_DI_REG		0x04
-#define DMM32AT_AUX_DI_DACBUSY		(1 << 7)
-#define DMM32AT_AUX_DI_CALBUSY		(1 << 6)
-#define DMM32AT_AUX_DI3			(1 << 3)  /* J3.45 - ADCLK (CLKSEL) */
-#define DMM32AT_AUX_DI2			(1 << 2)  /* J3.46 - GATE12 (GT12EN) */
-#define DMM32AT_AUX_DI1			(1 << 1)  /* J3.47 - GATE0 (GT0EN) */
-#define DMM32AT_AUX_DI0			(1 << 0)  /* J3.48 - CLK0 (SRC0) */
+#define DMM32AT_AUX_DI_DACBUSY		BIT(7)
+#define DMM32AT_AUX_DI_CALBUSY		BIT(6)
+#define DMM32AT_AUX_DI3			BIT(3)  /* J3.45 - ADCLK (CLKSEL) */
+#define DMM32AT_AUX_DI2			BIT(2)  /* J3.46 - GATE12 (GT12EN) */
+#define DMM32AT_AUX_DI1			BIT(1)  /* J3.47 - GATE0 (GT0EN) */
+#define DMM32AT_AUX_DI0			BIT(0)  /* J3.48 - CLK0 (SRC0) */
 #define DMM32AT_AO_LSB_REG		0x04
 #define DMM32AT_AO_MSB_REG		0x05
 #define DMM32AT_AO_MSB_DACH(x)		((x) << 6)
 #define DMM32AT_FIFO_DEPTH_REG		0x06
 #define DMM32AT_FIFO_CTRL_REG		0x07
-#define DMM32AT_FIFO_CTRL_FIFOEN	(1 << 3)
-#define DMM32AT_FIFO_CTRL_SCANEN	(1 << 2)
-#define DMM32AT_FIFO_CTRL_FIFORST	(1 << 1)
+#define DMM32AT_FIFO_CTRL_FIFOEN	BIT(3)
+#define DMM32AT_FIFO_CTRL_SCANEN	BIT(2)
+#define DMM32AT_FIFO_CTRL_FIFORST	BIT(1)
 #define DMM32AT_FIFO_STATUS_REG		0x07
-#define DMM32AT_FIFO_STATUS_EF		(1 << 7)
-#define DMM32AT_FIFO_STATUS_HF		(1 << 6)
-#define DMM32AT_FIFO_STATUS_FF		(1 << 5)
-#define DMM32AT_FIFO_STATUS_OVF		(1 << 4)
-#define DMM32AT_FIFO_STATUS_FIFOEN	(1 << 3)
-#define DMM32AT_FIFO_STATUS_SCANEN	(1 << 2)
+#define DMM32AT_FIFO_STATUS_EF		BIT(7)
+#define DMM32AT_FIFO_STATUS_HF		BIT(6)
+#define DMM32AT_FIFO_STATUS_FF		BIT(5)
+#define DMM32AT_FIFO_STATUS_OVF		BIT(4)
+#define DMM32AT_FIFO_STATUS_FIFOEN	BIT(3)
+#define DMM32AT_FIFO_STATUS_SCANEN	BIT(2)
 #define DMM32AT_FIFO_STATUS_PAGE_MASK	(3 << 0)
 #define DMM32AT_CTRL_REG		0x08
-#define DMM32AT_CTRL_RESETA		(1 << 5)
-#define DMM32AT_CTRL_RESETD		(1 << 4)
-#define DMM32AT_CTRL_INTRST		(1 << 3)
-#define DMM32AT_CTRL_PAGE_8254		(0 << 0)
-#define DMM32AT_CTRL_PAGE_8255		(1 << 0)
-#define DMM32AT_CTRL_PAGE_CALIB		(3 << 0)
+#define DMM32AT_CTRL_RESETA		BIT(5)
+#define DMM32AT_CTRL_RESETD		BIT(4)
+#define DMM32AT_CTRL_INTRST		BIT(3)
+#define DMM32AT_CTRL_PAGE(x)		((x) << 0)
+#define DMM32AT_CTRL_PAGE_8254		DMM32AT_CTRL_PAGE(0)
+#define DMM32AT_CTRL_PAGE_8255		DMM32AT_CTRL_PAGE(1)
+#define DMM32AT_CTRL_PAGE_CALIB		DMM32AT_CTRL_PAGE(3)
 #define DMM32AT_AI_STATUS_REG		0x08
-#define DMM32AT_AI_STATUS_STS		(1 << 7)
-#define DMM32AT_AI_STATUS_SD1		(1 << 6)
-#define DMM32AT_AI_STATUS_SD0		(1 << 5)
+#define DMM32AT_AI_STATUS_STS		BIT(7)
+#define DMM32AT_AI_STATUS_SD1		BIT(6)
+#define DMM32AT_AI_STATUS_SD0		BIT(5)
 #define DMM32AT_AI_STATUS_ADCH_MASK	(0x1f << 0)
 #define DMM32AT_INTCLK_REG		0x09
-#define DMM32AT_INTCLK_ADINT		(1 << 7)
-#define DMM32AT_INTCLK_DINT		(1 << 6)
-#define DMM32AT_INTCLK_TINT		(1 << 5)
-#define DMM32AT_INTCLK_CLKEN		(1 << 1)  /* 1=see below  0=software */
-#define DMM32AT_INTCLK_CLKSEL		(1 << 0)  /* 1=OUT2  0=EXTCLK */
+#define DMM32AT_INTCLK_ADINT		BIT(7)
+#define DMM32AT_INTCLK_DINT		BIT(6)
+#define DMM32AT_INTCLK_TINT		BIT(5)
+#define DMM32AT_INTCLK_CLKEN		BIT(1)  /* 1=see below  0=software */
+#define DMM32AT_INTCLK_CLKSEL		BIT(0)  /* 1=OUT2  0=EXTCLK */
 #define DMM32AT_CTRDIO_CFG_REG		0x0a
-#define DMM32AT_CTRDIO_CFG_FREQ12	(1 << 7)  /* CLK12 1=100KHz 0=10MHz */
-#define DMM32AT_CTRDIO_CFG_FREQ0	(1 << 6)  /* CLK0  1=10KHz  0=10MHz */
-#define DMM32AT_CTRDIO_CFG_OUT2EN	(1 << 5)  /* J3.42 1=OUT2 is DOUT2 */
-#define DMM32AT_CTRDIO_CFG_OUT0EN	(1 << 4)  /* J3,44 1=OUT0 is DOUT0 */
-#define DMM32AT_CTRDIO_CFG_GT0EN	(1 << 2)  /* J3.47 1=DIN1 is GATE0 */
-#define DMM32AT_CTRDIO_CFG_SRC0		(1 << 1)  /* CLK0 is 0=FREQ0 1=J3.48 */
-#define DMM32AT_CTRDIO_CFG_GT12EN	(1 << 0)  /* J3.46 1=DIN2 is GATE12 */
+#define DMM32AT_CTRDIO_CFG_FREQ12	BIT(7)  /* CLK12 1=100KHz 0=10MHz */
+#define DMM32AT_CTRDIO_CFG_FREQ0	BIT(6)  /* CLK0  1=10KHz  0=10MHz */
+#define DMM32AT_CTRDIO_CFG_OUT2EN	BIT(5)  /* J3.42 1=OUT2 is DOUT2 */
+#define DMM32AT_CTRDIO_CFG_OUT0EN	BIT(4)  /* J3,44 1=OUT0 is DOUT0 */
+#define DMM32AT_CTRDIO_CFG_GT0EN	BIT(2)  /* J3.47 1=DIN1 is GATE0 */
+#define DMM32AT_CTRDIO_CFG_SRC0		BIT(1)  /* CLK0 is 0=FREQ0 1=J3.48 */
+#define DMM32AT_CTRDIO_CFG_GT12EN	BIT(0)  /* J3.46 1=DIN2 is GATE12 */
 #define DMM32AT_AI_CFG_REG		0x0b
-#define DMM32AT_AI_CFG_SCINT_20US	(0 << 4)
-#define DMM32AT_AI_CFG_SCINT_15US	(1 << 4)
-#define DMM32AT_AI_CFG_SCINT_10US	(2 << 4)
-#define DMM32AT_AI_CFG_SCINT_5US	(3 << 4)
-#define DMM32AT_AI_CFG_RANGE		(1 << 3)  /* 0=5V  1=10V */
-#define DMM32AT_AI_CFG_ADBU		(1 << 2)  /* 0=bipolar  1=unipolar */
+#define DMM32AT_AI_CFG_SCINT(x)		((x) << 4)
+#define DMM32AT_AI_CFG_SCINT_20US	DMM32AT_AI_CFG_SCINT(0)
+#define DMM32AT_AI_CFG_SCINT_15US	DMM32AT_AI_CFG_SCINT(1)
+#define DMM32AT_AI_CFG_SCINT_10US	DMM32AT_AI_CFG_SCINT(2)
+#define DMM32AT_AI_CFG_SCINT_5US	DMM32AT_AI_CFG_SCINT(3)
+#define DMM32AT_AI_CFG_RANGE		BIT(3)  /* 0=5V  1=10V */
+#define DMM32AT_AI_CFG_ADBU		BIT(2)  /* 0=bipolar  1=unipolar */
 #define DMM32AT_AI_CFG_GAIN(x)		((x) << 0)
 #define DMM32AT_AI_READBACK_REG		0x0b
-#define DMM32AT_AI_READBACK_WAIT	(1 << 7)  /* DMM32AT_AI_STATUS_STS */
-#define DMM32AT_AI_READBACK_RANGE	(1 << 3)
-#define DMM32AT_AI_READBACK_ADBU	(1 << 2)
+#define DMM32AT_AI_READBACK_WAIT	BIT(7)  /* DMM32AT_AI_STATUS_STS */
+#define DMM32AT_AI_READBACK_RANGE	BIT(3)
+#define DMM32AT_AI_READBACK_ADBU	BIT(2)
 #define DMM32AT_AI_READBACK_GAIN_MASK	(3 << 0)
 
 #define DMM32AT_CLK1 0x0d
diff --git a/drivers/staging/comedi/drivers/dt2801.c b/drivers/staging/comedi/drivers/dt2801.c
index 6c7b4d27c27c..c2ce1eb87385 100644
--- a/drivers/staging/comedi/drivers/dt2801.c
+++ b/drivers/staging/comedi/drivers/dt2801.c
@@ -4,30 +4,30 @@
  *
  */
 /*
-Driver: dt2801
-Description: Data Translation DT2801 series and DT01-EZ
-Author: ds
-Status: works
-Devices: [Data Translation] DT2801 (dt2801), DT2801-A, DT2801/5716A,
-  DT2805, DT2805/5716A, DT2808, DT2818, DT2809, DT01-EZ
-
-This driver can autoprobe the type of board.
-
-Configuration options:
-  [0] - I/O port base address
-  [1] - unused
-  [2] - A/D reference 0=differential, 1=single-ended
-  [3] - A/D range
-	  0 = [-10, 10]
-	  1 = [0,10]
-  [4] - D/A 0 range
-	  0 = [-10, 10]
-	  1 = [-5,5]
-	  2 = [-2.5,2.5]
-	  3 = [0,10]
-	  4 = [0,5]
-  [5] - D/A 1 range (same choices)
-*/
+ * Driver: dt2801
+ * Description: Data Translation DT2801 series and DT01-EZ
+ * Author: ds
+ * Status: works
+ * Devices: [Data Translation] DT2801 (dt2801), DT2801-A, DT2801/5716A,
+ * DT2805, DT2805/5716A, DT2808, DT2818, DT2809, DT01-EZ
+ *
+ * This driver can autoprobe the type of board.
+ *
+ * Configuration options:
+ * [0] - I/O port base address
+ * [1] - unused
+ * [2] - A/D reference 0=differential, 1=single-ended
+ * [3] - A/D range
+ *	  0 = [-10, 10]
+ *	  1 = [0,10]
+ * [4] - D/A 0 range
+ *	  0 = [-10, 10]
+ *	  1 = [-5,5]
+ *	  2 = [-2.5,2.5]
+ *	  3 = [0,10]
+ *	  4 = [0,5]
+ * [5] - D/A 1 range (same choices)
+ */
 
 #include <linux/module.h>
 #include "../comedidev.h"
@@ -65,9 +65,10 @@ Configuration options:
 #define DT_C_SET_AD      0xd
 #define DT_C_READ_AD     0xe
 
-/* Command modifiers (only used with read/write), EXTTRIG can be
-   used with some other commands.
-*/
+/*
+ * Command modifiers (only used with read/write), EXTTRIG can be
+ * used with some other commands.
+ */
 #define DT_MOD_DMA     BIT(4)
 #define DT_MOD_CONT    BIT(5)
 #define DT_MOD_EXTCLK  BIT(6)
@@ -135,9 +136,10 @@ struct dt2801_board {
 	int dabits;
 };
 
-/* Typeid's for the different boards of the DT2801-series
-   (taken from the test-software, that comes with the board)
-   */
+/*
+ * Typeid's for the different boards of the DT2801-series
+ * (taken from the test-software, that comes with the board)
+ */
 static const struct dt2801_board boardtypes[] = {
 	{
 	 .name = "dt2801",
@@ -209,15 +211,18 @@ struct dt2801_private {
 	const struct comedi_lrange *dac_range_types[2];
 };
 
-/* These are the low-level routines:
-   writecommand: write a command to the board
-   writedata: write data byte
-   readdata: read data byte
+/*
+ * These are the low-level routines:
+ * writecommand: write a command to the board
+ * writedata: write data byte
+ * readdata: read data byte
  */
 
-/* Only checks DataOutReady-flag, not the Ready-flag as it is done
-   in the examples of the manual. I don't see why this should be
-   necessary. */
+/*
+ * Only checks DataOutReady-flag, not the Ready-flag as it is done
+ *  in the examples of the manual. I don't see why this should be
+ *  necessary.
+ */
 static int dt2801_readdata(struct comedi_device *dev, int *data)
 {
 	int stat = 0;
@@ -517,14 +522,14 @@ static int dt2801_dio_insn_config(struct comedi_device *dev,
 }
 
 /*
-   options:
-	[0] - i/o base
-	[1] - unused
-	[2] - a/d 0=differential, 1=single-ended
-	[3] - a/d range 0=[-10,10], 1=[0,10]
-	[4] - dac0 range 0=[-10,10], 1=[-5,5], 2=[-2.5,2.5] 3=[0,10], 4=[0,5]
-	[5] - dac1 range 0=[-10,10], 1=[-5,5], 2=[-2.5,2.5] 3=[0,10], 4=[0,5]
-*/
+ * options:
+ *	[0] - i/o base
+ *	[1] - unused
+ *	[2] - a/d 0=differential, 1=single-ended
+ *	[3] - a/d range 0=[-10,10], 1=[0,10]
+ *	[4] - dac0 range 0=[-10,10], 1=[-5,5], 2=[-2.5,2.5] 3=[0,10], 4=[0,5]
+ *	[5] - dac1 range 0=[-10,10], 1=[-5,5], 2=[-2.5,2.5] 3=[0,10], 4=[0,5]
+ */
 static int dt2801_attach(struct comedi_device *dev, struct comedi_devconfig *it)
 {
 	const struct dt2801_board *board;
diff --git a/drivers/staging/comedi/drivers/dt2811.c b/drivers/staging/comedi/drivers/dt2811.c
index a80773291fdc..904f637797b6 100644
--- a/drivers/staging/comedi/drivers/dt2811.c
+++ b/drivers/staging/comedi/drivers/dt2811.c
@@ -1,224 +1,469 @@
 /*
-   comedi/drivers/dt2811.c
-   Hardware driver for Data Translation DT2811
-
-   COMEDI - Linux Control and Measurement Device Interface
-   History:
-   Base Version  - David A. Schleef <ds@schleef.org>
-   December 1998 - Updated to work.  David does not have a DT2811
-   board any longer so this was suffering from bitrot.
-   Updated performed by ...
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+ * Comedi driver for Data Translation DT2811
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) David A. Schleef <ds@schleef.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  */
+
 /*
-Driver: dt2811
-Description: Data Translation DT2811
-Author: ds
-Devices: [Data Translation] DT2811-PGL (dt2811-pgl), DT2811-PGH (dt2811-pgh)
-Status: works
-
-Configuration options:
-  [0] - I/O port base address
-  [1] - IRQ, although this is currently unused
-  [2] - A/D reference
-	  0 = signle-ended
-	  1 = differential
-	  2 = pseudo-differential (common reference)
-  [3] - A/D range
-	  0 = [-5, 5]
-	  1 = [-2.5, 2.5]
-	  2 = [0, 5]
-  [4] - D/A 0 range (same choices)
-  [4] - D/A 1 range (same choices)
-*/
+ * Driver: dt2811
+ * Description: Data Translation DT2811
+ * Author: ds
+ * Devices: [Data Translation] DT2811-PGL (dt2811-pgl), DT2811-PGH (dt2811-pgh)
+ * Status: works
+ *
+ * Configuration options:
+ *   [0] - I/O port base address
+ *   [1] - IRQ (optional, needed for async command support)
+ *   [2] - A/D reference (# of analog inputs)
+ *	   0 = single-ended (16 channels)
+ *	   1 = differential (8 channels)
+ *	   2 = pseudo-differential (16 channels)
+ *   [3] - A/D range (deprecated, see below)
+ *   [4] - D/A 0 range (deprecated, see below)
+ *   [5] - D/A 1 range (deprecated, see below)
+ *
+ * Notes:
+ *   - A/D ranges are not programmable but the gain is. The AI subdevice has
+ *     a range_table containing all the possible analog input range/gain
+ *     options for the dt2811-pgh or dt2811-pgl. Use the range that matches
+ *     your board configuration and the desired gain to correctly convert
+ *     between data values and physical units and to set the correct output
+ *     gain.
+ *   - D/A ranges are not programmable. The AO subdevice has a range_table
+ *     containing all the possible analog output ranges. Use the range
+ *     that matches your board configuration to convert between data
+ *     values and physical units.
+ */
 
 #include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
 #include "../comedidev.h"
 
-static const struct comedi_lrange range_dt2811_pgh_ai_5_unipolar = {
-	4, {
-		UNI_RANGE(5),
-		UNI_RANGE(2.5),
-		UNI_RANGE(1.25),
-		UNI_RANGE(0.625)
-	}
+/*
+ * Register I/O map
+ */
+#define DT2811_ADCSR_REG		0x00	/* r/w  A/D Control/Status */
+#define DT2811_ADCSR_ADDONE		BIT(7)	/* r      1=A/D conv done */
+#define DT2811_ADCSR_ADERROR		BIT(6)	/* r      1=A/D error */
+#define DT2811_ADCSR_ADBUSY		BIT(5)	/* r      1=A/D busy */
+#define DT2811_ADCSR_CLRERROR		BIT(4)
+#define DT2811_ADCSR_DMAENB		BIT(3)	/* r/w    1=dma ena */
+#define DT2811_ADCSR_INTENB		BIT(2)	/* r/w    1=interupts ena */
+#define DT2811_ADCSR_ADMODE(x)		(((x) & 0x3) << 0)
+
+#define DT2811_ADGCR_REG		0x01	/* r/w  A/D Gain/Channel */
+#define DT2811_ADGCR_GAIN(x)		(((x) & 0x3) << 6)
+#define DT2811_ADGCR_CHAN(x)		(((x) & 0xf) << 0)
+
+#define DT2811_ADDATA_LO_REG		0x02	/* r   A/D Data low byte */
+#define DT2811_ADDATA_HI_REG		0x03	/* r   A/D Data high byte */
+
+#define DT2811_DADATA_LO_REG(x)		(0x02 + ((x) * 2)) /* w D/A Data low */
+#define DT2811_DADATA_HI_REG(x)		(0x03 + ((x) * 2)) /* w D/A Data high */
+
+#define DT2811_DI_REG			0x06	/* r   Digital Input Port 0 */
+#define DT2811_DO_REG			0x06	/* w   Digital Output Port 1 */
+
+#define DT2811_TMRCTR_REG		0x07	/* r/w  Timer/Counter */
+#define DT2811_TMRCTR_MANTISSA(x)	(((x) & 0x7) << 3)
+#define DT2811_TMRCTR_EXPONENT(x)	(((x) & 0x7) << 0)
+
+#define DT2811_OSC_BASE			1666	/* 600 kHz = 1666.6667ns */
+
+/*
+ * Timer frequency control:
+ *   DT2811_TMRCTR_MANTISSA	DT2811_TMRCTR_EXPONENT
+ *   val  divisor  frequency	val  multiply divisor/divide frequency by
+ *    0      1      600 kHz	 0   1
+ *    1     10       60 kHz	 1   10
+ *    2      2      300 kHz	 2   100
+ *    3      3      200 kHz	 3   1000
+ *    4      4      150 kHz	 4   10000
+ *    5      5      120 kHz	 5   100000
+ *    6      6      100 kHz	 6   1000000
+ *    7     12       50 kHz	 7   10000000
+ */
+const unsigned int dt2811_clk_dividers[] = {
+	1, 10, 2, 3, 4, 5, 6, 12
 };
 
-static const struct comedi_lrange range_dt2811_pgh_ai_2_5_bipolar = {
-	4, {
-		BIP_RANGE(2.5),
-		BIP_RANGE(1.25),
-		BIP_RANGE(0.625),
-		BIP_RANGE(0.3125)
-	}
+const unsigned int dt2811_clk_multipliers[] = {
+	1, 10, 100, 1000, 10000, 100000, 1000000, 10000000
 };
 
-static const struct comedi_lrange range_dt2811_pgh_ai_5_bipolar = {
-	4, {
-		BIP_RANGE(5),
-		BIP_RANGE(2.5),
-		BIP_RANGE(1.25),
-		BIP_RANGE(0.625)
+/*
+ * The Analog Input range is set using jumpers on the board.
+ *
+ * Input Range		W9  W10
+ * -5V to +5V		In  Out
+ * -2.5V to +2.5V	In  In
+ * 0V to +5V		Out In
+ *
+ * The gain may be set to 1, 2, 4, or 8 (on the dt2811-pgh) or to
+ * 1, 10, 100, 500 (on the dt2811-pgl).
+ */
+static const struct comedi_lrange dt2811_pgh_ai_ranges = {
+	12, {
+		BIP_RANGE(5),		/* range 0: gain=1 */
+		BIP_RANGE(2.5),		/* range 1: gain=2 */
+		BIP_RANGE(1.25),	/* range 2: gain=4 */
+		BIP_RANGE(0.625),	/* range 3: gain=8 */
+
+		BIP_RANGE(2.5),		/* range 0+4: gain=1 */
+		BIP_RANGE(1.25),	/* range 1+4: gain=2 */
+		BIP_RANGE(0.625),	/* range 2+4: gain=4 */
+		BIP_RANGE(0.3125),	/* range 3+4: gain=8 */
+
+		UNI_RANGE(5),		/* range 0+8: gain=1 */
+		UNI_RANGE(2.5),		/* range 1+8: gain=2 */
+		UNI_RANGE(1.25),	/* range 2+8: gain=4 */
+		UNI_RANGE(0.625)	/* range 3+8: gain=8 */
 	}
 };
 
-static const struct comedi_lrange range_dt2811_pgl_ai_5_unipolar = {
-	4, {
-		UNI_RANGE(5),
-		UNI_RANGE(0.5),
-		UNI_RANGE(0.05),
-		UNI_RANGE(0.01)
+static const struct comedi_lrange dt2811_pgl_ai_ranges = {
+	12, {
+		BIP_RANGE(5),		/* range 0: gain=1 */
+		BIP_RANGE(0.5),		/* range 1: gain=10 */
+		BIP_RANGE(0.05),	/* range 2: gain=100 */
+		BIP_RANGE(0.01),	/* range 3: gain=500 */
+
+		BIP_RANGE(2.5),		/* range 0+4: gain=1 */
+		BIP_RANGE(0.25),	/* range 1+4: gain=10 */
+		BIP_RANGE(0.025),	/* range 2+4: gain=100 */
+		BIP_RANGE(0.005),	/* range 3+4: gain=500 */
+
+		UNI_RANGE(5),		/* range 0+8: gain=1 */
+		UNI_RANGE(0.5),		/* range 1+8: gain=10 */
+		UNI_RANGE(0.05),	/* range 2+8: gain=100 */
+		UNI_RANGE(0.01)		/* range 3+8: gain=500 */
 	}
 };
 
-static const struct comedi_lrange range_dt2811_pgl_ai_2_5_bipolar = {
-	4, {
+/*
+ * The Analog Output range is set per-channel using jumpers on the board.
+ *
+ *			DAC0 Jumpers		DAC1 Jumpers
+ * Output Range		W5  W6  W7  W8		W1  W2  W3  W4
+ * -5V to +5V		In  Out In  Out		In  Out In  Out
+ * -2.5V to +2.5V	In  Out Out In		In  Out Out In
+ * 0 to +5V		Out In  Out In		Out In  Out In
+ */
+static const struct comedi_lrange dt2811_ao_ranges = {
+	3, {
+		BIP_RANGE(5),	/* default setting from factory */
 		BIP_RANGE(2.5),
-		BIP_RANGE(0.25),
-		BIP_RANGE(0.025),
-		BIP_RANGE(0.005)
+		UNI_RANGE(5)
 	}
 };
 
-static const struct comedi_lrange range_dt2811_pgl_ai_5_bipolar = {
-	4, {
-		BIP_RANGE(5),
-		BIP_RANGE(0.5),
-		BIP_RANGE(0.05),
-		BIP_RANGE(0.01)
-	}
+struct dt2811_board {
+	const char *name;
+	unsigned int is_pgh:1;
 };
 
-/*
+static const struct dt2811_board dt2811_boards[] = {
+	{
+		.name		= "dt2811-pgh",
+		.is_pgh		= 1,
+	}, {
+		.name		= "dt2811-pgl",
+	},
+};
 
-   0x00    ADCSR R/W  A/D Control/Status Register
-   bit 7 - (R) 1 indicates A/D conversion done
-   reading ADDAT clears bit
-   (W) ignored
-   bit 6 - (R) 1 indicates A/D error
-   (W) ignored
-   bit 5 - (R) 1 indicates A/D busy, cleared at end
-   of conversion
-   (W) ignored
-   bit 4 - (R) 0
-   (W)
-   bit 3 - (R) 0
-   bit 2 - (R/W) 1 indicates interrupts enabled
-   bits 1,0 - (R/W) mode bits
-   00  single conversion on ADGCR load
-   01  continuous conversion, internal clock,
-   (clock enabled on ADGCR load)
-   10  continuous conversion, internal clock,
-   external trigger
-   11  continuous conversion, external clock,
-   external trigger
-
-   0x01    ADGCR R/W A/D Gain/Channel Register
-   bit 6,7 - (R/W) gain select
-   00  gain=1, both PGH, PGL models
-   01  gain=2 PGH, 10 PGL
-   10  gain=4 PGH, 100 PGL
-   11  gain=8 PGH, 500 PGL
-   bit 4,5 - reserved
-   bit 3-0 - (R/W) channel select
-   channel number from 0-15
-
-   0x02,0x03 (R) ADDAT A/D Data Register
-   (W) DADAT0 D/A Data Register 0
-   0x02 low byte
-   0x03 high byte
-
-   0x04,0x05 (W) DADAT0 D/A Data Register 1
-
-   0x06 (R) DIO0 Digital Input Port 0
-   (W) DIO1 Digital Output Port 1
-
-   0x07 TMRCTR (R/W) Timer/Counter Register
-   bits 6,7 - reserved
-   bits 5-3 - Timer frequency control (mantissa)
-   543  divisor  freqency (kHz)
-   000  1        600
-   001  10       60
-   010  2        300
-   011  3        200
-   100  4        150
-   101  5        120
-   110  6        100
-   111  12       50
-   bits 2-0 - Timer frequency control (exponent)
-   210  multiply divisor/divide frequency by
-   000  1
-   001  10
-   010  100
-   011  1000
-   100  10000
-   101  100000
-   110  1000000
-   111  10000000
+struct dt2811_private {
+	unsigned int ai_divisor;
+};
 
- */
+static unsigned int dt2811_ai_read_sample(struct comedi_device *dev,
+					  struct comedi_subdevice *s)
+{
+	unsigned int val;
 
-#define TIMEOUT 10000
+	val = inb(dev->iobase + DT2811_ADDATA_LO_REG) |
+	      (inb(dev->iobase + DT2811_ADDATA_HI_REG) << 8);
 
-#define DT2811_ADCSR 0
-#define DT2811_ADGCR 1
-#define DT2811_ADDATLO 2
-#define DT2811_ADDATHI 3
-#define DT2811_DADAT0LO 2
-#define DT2811_DADAT0HI 3
-#define DT2811_DADAT1LO 4
-#define DT2811_DADAT1HI 5
-#define DT2811_DIO 6
-#define DT2811_TMRCTR 7
+	return val & s->maxdata;
+}
 
-/*
- * flags
- */
+static irqreturn_t dt2811_interrupt(int irq, void *d)
+{
+	struct comedi_device *dev = d;
+	struct comedi_subdevice *s = dev->read_subdev;
+	struct comedi_async *async = s->async;
+	struct comedi_cmd *cmd = &async->cmd;
+	unsigned int status;
 
-/* ADCSR */
+	if (!dev->attached)
+		return IRQ_NONE;
 
-#define DT2811_ADDONE   0x80
-#define DT2811_ADERROR  0x40
-#define DT2811_ADBUSY   0x20
-#define DT2811_CLRERROR 0x10
-#define DT2811_INTENB   0x04
-#define DT2811_ADMODE   0x03
+	status = inb(dev->iobase + DT2811_ADCSR_REG);
 
-struct dt2811_board {
-	const char *name;
-	const struct comedi_lrange *bip_5;
-	const struct comedi_lrange *bip_2_5;
-	const struct comedi_lrange *unip_5;
-};
+	if (status & DT2811_ADCSR_ADERROR) {
+		async->events |= COMEDI_CB_OVERFLOW;
 
-enum { card_2811_pgh, card_2811_pgl };
+		outb(status | DT2811_ADCSR_CLRERROR,
+		     dev->iobase + DT2811_ADCSR_REG);
+	}
 
-struct dt2811_private {
-	int ntrig;
-	int curadchan;
-	enum {
-		adc_singleended, adc_diff, adc_pseudo_diff
-	} adc_mux;
-	enum {
-		dac_bipolar_5, dac_bipolar_2_5, dac_unipolar_5
-	} dac_range[2];
-	const struct comedi_lrange *range_type_list[2];
-};
+	if (status & DT2811_ADCSR_ADDONE) {
+		unsigned short val;
 
-static const struct comedi_lrange *dac_range_types[] = {
-	&range_bipolar5,
-	&range_bipolar2_5,
-	&range_unipolar5
-};
+		val = dt2811_ai_read_sample(dev, s);
+		comedi_buf_write_samples(s, &val, 1);
+	}
+
+	if (cmd->stop_src == TRIG_COUNT && async->scans_done >= cmd->stop_arg)
+		async->events |= COMEDI_CB_EOA;
+
+	comedi_handle_events(dev, s);
+
+	return IRQ_HANDLED;
+}
+
+static int dt2811_ai_cancel(struct comedi_device *dev,
+			    struct comedi_subdevice *s)
+{
+	/*
+	 * Mode 0
+	 * Single conversion
+	 *
+	 * Loading a chanspec will trigger a conversion.
+	 */
+	outb(DT2811_ADCSR_ADMODE(0), dev->iobase + DT2811_ADCSR_REG);
+
+	return 0;
+}
+
+static void dt2811_ai_set_chanspec(struct comedi_device *dev,
+				   unsigned int chanspec)
+{
+	unsigned int chan = CR_CHAN(chanspec);
+	unsigned int range = CR_RANGE(chanspec);
+
+	outb(DT2811_ADGCR_CHAN(chan) | DT2811_ADGCR_GAIN(range),
+	     dev->iobase + DT2811_ADGCR_REG);
+}
+
+static int dt2811_ai_cmd(struct comedi_device *dev,
+			 struct comedi_subdevice *s)
+{
+	struct dt2811_private *devpriv = dev->private;
+	struct comedi_cmd *cmd = &s->async->cmd;
+	unsigned int mode;
+
+	if (cmd->start_src == TRIG_NOW) {
+		/*
+		 * Mode 1
+		 * Continuous conversion, internal trigger and clock
+		 *
+		 * This resets the trigger flip-flop, disabling A/D strobes.
+		 * The timer/counter register is loaded with the division
+		 * ratio which will give the required sample rate.
+		 *
+		 * Loading the first chanspec sets the trigger flip-flop,
+		 * enabling the timer/counter. A/D strobes are then generated
+		 * at the rate set by the internal clock/divider.
+		 */
+		mode = DT2811_ADCSR_ADMODE(1);
+	} else { /* TRIG_EXT */
+		if (cmd->convert_src == TRIG_TIMER) {
+			/*
+			 * Mode 2
+			 * Continuous conversion, external trigger
+			 *
+			 * Similar to Mode 1, with the exception that the
+			 * trigger flip-flop must be set by a negative edge
+			 * on the external trigger input.
+			 */
+			mode = DT2811_ADCSR_ADMODE(2);
+		} else { /* TRIG_EXT */
+			/*
+			 * Mode 3
+			 * Continuous conversion, external trigger, clock
+			 *
+			 * Similar to Mode 2, with the exception that the
+			 * conversion rate is set by the frequency on the
+			 * external clock/divider.
+			 */
+			mode = DT2811_ADCSR_ADMODE(3);
+		}
+	}
+	outb(mode | DT2811_ADCSR_INTENB, dev->iobase + DT2811_ADCSR_REG);
+
+	/* load timer */
+	outb(devpriv->ai_divisor, dev->iobase + DT2811_TMRCTR_REG);
+
+	/* load chanspec - enables timer */
+	dt2811_ai_set_chanspec(dev, cmd->chanlist[0]);
+
+	return 0;
+}
+
+static unsigned int dt2811_ns_to_timer(unsigned int *nanosec,
+				       unsigned int flags)
+{
+	unsigned long long ns = *nanosec;
+	unsigned int ns_lo = COMEDI_MIN_SPEED;
+	unsigned int ns_hi = 0;
+	unsigned int divisor_hi = 0;
+	unsigned int divisor_lo = 0;
+	unsigned int _div;
+	unsigned int _mult;
+
+	/*
+	 * Work through all the divider/multiplier values to find the two
+	 * closest divisors to generate the requested nanosecond timing.
+	 */
+	for (_div = 0; _div <= 7; _div++) {
+		for (_mult = 0; _mult <= 7; _mult++) {
+			unsigned int div = dt2811_clk_dividers[_div];
+			unsigned int mult = dt2811_clk_multipliers[_mult];
+			unsigned long long divider = div * mult;
+			unsigned int divisor = DT2811_TMRCTR_MANTISSA(_div) |
+					       DT2811_TMRCTR_EXPONENT(_mult);
+
+			/*
+			 * The timer can be configured to run at a slowest
+			 * speed of 0.005hz (600 Khz/120000000), which requires
+			 * 37-bits to represent the nanosecond value. Limit the
+			 * slowest timing to what comedi handles (32-bits).
+			 */
+			ns = divider * DT2811_OSC_BASE;
+			if (ns > COMEDI_MIN_SPEED)
+				continue;
+
+			/* Check for fastest found timing */
+			if (ns <= *nanosec && ns > ns_hi) {
+				ns_hi = ns;
+				divisor_hi = divisor;
+			}
+			/* Check for slowest found timing */
+			if (ns >= *nanosec && ns < ns_lo) {
+				ns_lo = ns;
+				divisor_lo = divisor;
+			}
+		}
+	}
+
+	/*
+	 * The slowest found timing will be invalid if the requested timing
+	 * is faster than what can be generated by the timer. Fix it so that
+	 * CMDF_ROUND_UP returns valid timing.
+	 */
+	if (ns_lo == COMEDI_MIN_SPEED) {
+		ns_lo = ns_hi;
+		divisor_lo = divisor_hi;
+	}
+	/*
+	 * The fastest found timing will be invalid if the requested timing
+	 * is less than what can be generated by the timer. Fix it so that
+	 * CMDF_ROUND_NEAREST and CMDF_ROUND_DOWN return valid timing.
+	 */
+	if (ns_hi == 0) {
+		ns_hi = ns_lo;
+		divisor_hi = divisor_lo;
+	}
+
+	switch (flags & CMDF_ROUND_MASK) {
+	case CMDF_ROUND_NEAREST:
+	default:
+		if (ns_hi - *nanosec < *nanosec - ns_lo) {
+			*nanosec = ns_lo;
+			return divisor_lo;
+		}
+		*nanosec = ns_hi;
+		return divisor_hi;
+	case CMDF_ROUND_UP:
+		*nanosec = ns_lo;
+		return divisor_lo;
+	case CMDF_ROUND_DOWN:
+		*nanosec = ns_hi;
+		return divisor_hi;
+	}
+}
+
+static int dt2811_ai_cmdtest(struct comedi_device *dev,
+			     struct comedi_subdevice *s,
+			     struct comedi_cmd *cmd)
+{
+	struct dt2811_private *devpriv = dev->private;
+	unsigned int arg;
+	int err = 0;
+
+	/* Step 1 : check if triggers are trivially valid */
+
+	err |= comedi_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_EXT);
+	err |= comedi_check_trigger_src(&cmd->scan_begin_src, TRIG_FOLLOW);
+	err |= comedi_check_trigger_src(&cmd->convert_src,
+					TRIG_TIMER | TRIG_EXT);
+	err |= comedi_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT);
+	err |= comedi_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE);
+
+	if (err)
+		return 1;
+
+	/* Step 2a : make sure trigger sources are unique */
+
+	err |= comedi_check_trigger_is_unique(cmd->start_src);
+	err |= comedi_check_trigger_is_unique(cmd->convert_src);
+	err |= comedi_check_trigger_is_unique(cmd->stop_src);
+
+	/* Step 2b : and mutually compatible */
+
+	if (cmd->convert_src == TRIG_EXT && cmd->start_src != TRIG_EXT)
+		err |= -EINVAL;
+
+	if (err)
+		return 2;
+
+	/* Step 3: check if arguments are trivially valid */
+
+	err |= comedi_check_trigger_arg_is(&cmd->start_arg, 0);
+	err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, 0);
+	if (cmd->convert_src == TRIG_TIMER)
+		err |= comedi_check_trigger_arg_min(&cmd->convert_arg, 12500);
+	err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg,
+					   cmd->chanlist_len);
+	if (cmd->stop_src == TRIG_COUNT)
+		err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1);
+	else	/* TRIG_NONE */
+		err |= comedi_check_trigger_arg_is(&cmd->stop_arg, 0);
+
+	if (err)
+		return 3;
+
+	/* Step 4: fix up any arguments */
+
+	if (cmd->convert_src == TRIG_TIMER) {
+		arg = cmd->convert_arg;
+		devpriv->ai_divisor = dt2811_ns_to_timer(&arg, cmd->flags);
+		err |= comedi_check_trigger_arg_is(&cmd->convert_arg, arg);
+	} else { /* TRIG_EXT */
+		/* The convert_arg is used to set the divisor. */
+		devpriv->ai_divisor = cmd->convert_arg;
+	}
+
+	if (err)
+		return 4;
+
+	/* Step 5: check channel list if it exists */
+
+	return 0;
+}
 
 static int dt2811_ai_eoc(struct comedi_device *dev,
 			 struct comedi_subdevice *s,
@@ -227,32 +472,33 @@ static int dt2811_ai_eoc(struct comedi_device *dev,
 {
 	unsigned int status;
 
-	status = inb(dev->iobase + DT2811_ADCSR);
-	if ((status & DT2811_ADBUSY) == 0)
+	status = inb(dev->iobase + DT2811_ADCSR_REG);
+	if ((status & DT2811_ADCSR_ADBUSY) == 0)
 		return 0;
 	return -EBUSY;
 }
 
-static int dt2811_ai_insn(struct comedi_device *dev, struct comedi_subdevice *s,
-			  struct comedi_insn *insn, unsigned int *data)
+static int dt2811_ai_insn_read(struct comedi_device *dev,
+			       struct comedi_subdevice *s,
+			       struct comedi_insn *insn,
+			       unsigned int *data)
 {
-	int chan = CR_CHAN(insn->chanspec);
 	int ret;
 	int i;
 
+	/* We will already be in Mode 0 */
 	for (i = 0; i < insn->n; i++) {
-		outb(chan, dev->iobase + DT2811_ADGCR);
+		/* load chanspec and trigger conversion */
+		dt2811_ai_set_chanspec(dev, insn->chanspec);
 
 		ret = comedi_timeout(dev, s, insn, dt2811_ai_eoc, 0);
 		if (ret)
 			return ret;
 
-		data[i] = inb(dev->iobase + DT2811_ADDATLO);
-		data[i] |= inb(dev->iobase + DT2811_ADDATHI) << 8;
-		data[i] &= 0xfff;
+		data[i] = dt2811_ai_read_sample(dev, s);
 	}
 
-	return i;
+	return insn->n;
 }
 
 static int dt2811_ao_insn_write(struct comedi_device *dev,
@@ -266,9 +512,9 @@ static int dt2811_ao_insn_write(struct comedi_device *dev,
 
 	for (i = 0; i < insn->n; i++) {
 		val = data[i];
-		outb(val & 0xff, dev->iobase + DT2811_DADAT0LO + 2 * chan);
+		outb(val & 0xff, dev->iobase + DT2811_DADATA_LO_REG(chan));
 		outb((val >> 8) & 0xff,
-		     dev->iobase + DT2811_DADAT0HI + 2 * chan);
+		     dev->iobase + DT2811_DADATA_HI_REG(chan));
 	}
 	s->readback[chan] = val;
 
@@ -277,9 +523,10 @@ static int dt2811_ao_insn_write(struct comedi_device *dev,
 
 static int dt2811_di_insn_bits(struct comedi_device *dev,
 			       struct comedi_subdevice *s,
-			       struct comedi_insn *insn, unsigned int *data)
+			       struct comedi_insn *insn,
+			       unsigned int *data)
 {
-	data[1] = inb(dev->iobase + DT2811_DIO);
+	data[1] = inb(dev->iobase + DT2811_DI_REG);
 
 	return insn->n;
 }
@@ -290,185 +537,118 @@ static int dt2811_do_insn_bits(struct comedi_device *dev,
 			       unsigned int *data)
 {
 	if (comedi_dio_update_state(s, data))
-		outb(s->state, dev->iobase + DT2811_DIO);
+		outb(s->state, dev->iobase + DT2811_DO_REG);
 
 	data[1] = s->state;
 
 	return insn->n;
 }
 
-/*
-  options[0]   Board base address
-  options[1]   IRQ
-  options[2]   Input configuration
-		 0 == single-ended
-		 1 == differential
-		 2 == pseudo-differential
-  options[3]   Analog input range configuration
-		 0 == bipolar 5  (-5V -- +5V)
-		 1 == bipolar 2.5V  (-2.5V -- +2.5V)
-		 2 == unipolar 5V  (0V -- +5V)
-  options[4]   Analog output 0 range configuration
-		 0 == bipolar 5  (-5V -- +5V)
-		 1 == bipolar 2.5V  (-2.5V -- +2.5V)
-		 2 == unipolar 5V  (0V -- +5V)
-  options[5]   Analog output 1 range configuration
-		 0 == bipolar 5  (-5V -- +5V)
-		 1 == bipolar 2.5V  (-2.5V -- +2.5V)
-		 2 == unipolar 5V  (0V -- +5V)
-*/
+static void dt2811_reset(struct comedi_device *dev)
+{
+	/* This is the initialization sequence from the users manual */
+	outb(DT2811_ADCSR_ADMODE(0), dev->iobase + DT2811_ADCSR_REG);
+	usleep_range(100, 1000);
+	inb(dev->iobase + DT2811_ADDATA_LO_REG);
+	inb(dev->iobase + DT2811_ADDATA_HI_REG);
+	outb(DT2811_ADCSR_ADMODE(0) | DT2811_ADCSR_CLRERROR,
+	     dev->iobase + DT2811_ADCSR_REG);
+}
+
 static int dt2811_attach(struct comedi_device *dev, struct comedi_devconfig *it)
 {
-	/* int i; */
 	const struct dt2811_board *board = dev->board_ptr;
 	struct dt2811_private *devpriv;
-	int ret;
 	struct comedi_subdevice *s;
+	int ret;
+
+	devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv));
+	if (!devpriv)
+		return -ENOMEM;
 
 	ret = comedi_request_region(dev, it->options[0], 0x8);
 	if (ret)
 		return ret;
 
-#if 0
-	outb(0, dev->iobase + DT2811_ADCSR);
-	udelay(100);
-	i = inb(dev->iobase + DT2811_ADDATLO);
-	i = inb(dev->iobase + DT2811_ADDATHI);
-#endif
+	dt2811_reset(dev);
+
+	/* IRQ's 2,3,5,7 are valid for async command support */
+	if (it->options[1] <= 7  && (BIT(it->options[1]) & 0xac)) {
+		ret = request_irq(it->options[1], dt2811_interrupt, 0,
+				  dev->board_name, dev);
+		if (ret == 0)
+			dev->irq = it->options[1];
+	}
 
 	ret = comedi_alloc_subdevices(dev, 4);
 	if (ret)
 		return ret;
 
-	devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv));
-	if (!devpriv)
-		return -ENOMEM;
-
-	switch (it->options[2]) {
-	case 0:
-		devpriv->adc_mux = adc_singleended;
-		break;
-	case 1:
-		devpriv->adc_mux = adc_diff;
-		break;
-	case 2:
-		devpriv->adc_mux = adc_pseudo_diff;
-		break;
-	default:
-		devpriv->adc_mux = adc_singleended;
-		break;
-	}
-	switch (it->options[4]) {
-	case 0:
-		devpriv->dac_range[0] = dac_bipolar_5;
-		break;
-	case 1:
-		devpriv->dac_range[0] = dac_bipolar_2_5;
-		break;
-	case 2:
-		devpriv->dac_range[0] = dac_unipolar_5;
-		break;
-	default:
-		devpriv->dac_range[0] = dac_bipolar_5;
-		break;
-	}
-	switch (it->options[5]) {
-	case 0:
-		devpriv->dac_range[1] = dac_bipolar_5;
-		break;
-	case 1:
-		devpriv->dac_range[1] = dac_bipolar_2_5;
-		break;
-	case 2:
-		devpriv->dac_range[1] = dac_unipolar_5;
-		break;
-	default:
-		devpriv->dac_range[1] = dac_bipolar_5;
-		break;
-	}
-
+	/* Analog Input subdevice */
 	s = &dev->subdevices[0];
-	/* initialize the ADC subdevice */
-	s->type = COMEDI_SUBD_AI;
-	s->subdev_flags = SDF_READABLE | SDF_GROUND;
-	s->n_chan = devpriv->adc_mux == adc_diff ? 8 : 16;
-	s->insn_read = dt2811_ai_insn;
-	s->maxdata = 0xfff;
-	switch (it->options[3]) {
-	case 0:
-	default:
-		s->range_table = board->bip_5;
-		break;
-	case 1:
-		s->range_table = board->bip_2_5;
-		break;
-	case 2:
-		s->range_table = board->unip_5;
-		break;
+	s->type		= COMEDI_SUBD_AI;
+	s->subdev_flags	= SDF_READABLE |
+			  (it->options[2] == 1) ? SDF_DIFF :
+			  (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND;
+	s->n_chan	= (it->options[2] == 1) ? 8 : 16;
+	s->maxdata	= 0x0fff;
+	s->range_table	= board->is_pgh ? &dt2811_pgh_ai_ranges
+					: &dt2811_pgl_ai_ranges;
+	s->insn_read	= dt2811_ai_insn_read;
+	if (dev->irq) {
+		dev->read_subdev = s;
+		s->subdev_flags	|= SDF_CMD_READ;
+		s->len_chanlist	= 1;
+		s->do_cmdtest	= dt2811_ai_cmdtest;
+		s->do_cmd	= dt2811_ai_cmd;
+		s->cancel	= dt2811_ai_cancel;
 	}
 
+	/* Analog Output subdevice */
 	s = &dev->subdevices[1];
-	/* ao subdevice */
-	s->type = COMEDI_SUBD_AO;
-	s->subdev_flags = SDF_WRITABLE;
-	s->n_chan = 2;
-	s->maxdata = 0xfff;
-	s->range_table_list = devpriv->range_type_list;
-	devpriv->range_type_list[0] = dac_range_types[devpriv->dac_range[0]];
-	devpriv->range_type_list[1] = dac_range_types[devpriv->dac_range[1]];
-	s->insn_write = dt2811_ao_insn_write;
+	s->type		= COMEDI_SUBD_AO;
+	s->subdev_flags	= SDF_WRITABLE;
+	s->n_chan	= 2;
+	s->maxdata	= 0x0fff;
+	s->range_table	= &dt2811_ao_ranges;
+	s->insn_write	= dt2811_ao_insn_write;
 
 	ret = comedi_alloc_subdev_readback(s);
 	if (ret)
 		return ret;
 
+	/* Digital Input subdevice */
 	s = &dev->subdevices[2];
-	/* di subdevice */
-	s->type = COMEDI_SUBD_DI;
-	s->subdev_flags = SDF_READABLE;
-	s->n_chan = 8;
-	s->insn_bits = dt2811_di_insn_bits;
-	s->maxdata = 1;
-	s->range_table = &range_digital;
-
+	s->type		= COMEDI_SUBD_DI;
+	s->subdev_flags	= SDF_READABLE;
+	s->n_chan	= 8;
+	s->maxdata	= 1;
+	s->range_table	= &range_digital;
+	s->insn_bits	= dt2811_di_insn_bits;
+
+	/* Digital Output subdevice */
 	s = &dev->subdevices[3];
-	/* do subdevice */
-	s->type = COMEDI_SUBD_DO;
-	s->subdev_flags = SDF_WRITABLE;
-	s->n_chan = 8;
-	s->insn_bits = dt2811_do_insn_bits;
-	s->maxdata = 1;
-	s->state = 0;
-	s->range_table = &range_digital;
+	s->type		= COMEDI_SUBD_DO;
+	s->subdev_flags	= SDF_WRITABLE;
+	s->n_chan	= 8;
+	s->maxdata	= 1;
+	s->range_table	= &range_digital;
+	s->insn_bits	= dt2811_do_insn_bits;
 
 	return 0;
 }
 
-static const struct dt2811_board boardtypes[] = {
-	{
-		.name		= "dt2811-pgh",
-		.bip_5		= &range_dt2811_pgh_ai_5_bipolar,
-		.bip_2_5	= &range_dt2811_pgh_ai_2_5_bipolar,
-		.unip_5		= &range_dt2811_pgh_ai_5_unipolar,
-	}, {
-		.name		= "dt2811-pgl",
-		.bip_5		= &range_dt2811_pgl_ai_5_bipolar,
-		.bip_2_5	= &range_dt2811_pgl_ai_2_5_bipolar,
-		.unip_5		= &range_dt2811_pgl_ai_5_unipolar,
-	},
-};
-
 static struct comedi_driver dt2811_driver = {
 	.driver_name	= "dt2811",
 	.module		= THIS_MODULE,
 	.attach		= dt2811_attach,
 	.detach		= comedi_legacy_detach,
-	.board_name	= &boardtypes[0].name,
-	.num_names	= ARRAY_SIZE(boardtypes),
+	.board_name	= &dt2811_boards[0].name,
+	.num_names	= ARRAY_SIZE(dt2811_boards),
 	.offset		= sizeof(struct dt2811_board),
 };
 module_comedi_driver(dt2811_driver);
 
 MODULE_AUTHOR("Comedi http://www.comedi.org");
-MODULE_DESCRIPTION("Comedi low-level driver");
+MODULE_DESCRIPTION("Comedi driver for Data Translation DT2811 series boards");
 MODULE_LICENSE("GPL");
diff --git a/drivers/staging/comedi/drivers/dt2814.c b/drivers/staging/comedi/drivers/dt2814.c
index 66705f9a0621..2f903bedcefa 100644
--- a/drivers/staging/comedi/drivers/dt2814.c
+++ b/drivers/staging/comedi/drivers/dt2814.c
@@ -1,38 +1,38 @@
 /*
-    comedi/drivers/dt2814.c
-    Hardware driver for Data Translation DT2814
-
-    COMEDI - Linux Control and Measurement Device Interface
-    Copyright (C) 1998 David A. Schleef <ds@schleef.org>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-*/
+ * comedi/drivers/dt2814.c
+ * Hardware driver for Data Translation DT2814
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1998 David A. Schleef <ds@schleef.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
 /*
-Driver: dt2814
-Description: Data Translation DT2814
-Author: ds
-Status: complete
-Devices: [Data Translation] DT2814 (dt2814)
-
-Configuration options:
-  [0] - I/O port base address
-  [1] - IRQ
-
-This card has 16 analog inputs multiplexed onto a 12 bit ADC.  There
-is a minimally useful onboard clock.  The base frequency for the
-clock is selected by jumpers, and the clock divider can be selected
-via programmed I/O.  Unfortunately, the clock divider can only be
-a power of 10, from 1 to 10^7, of which only 3 or 4 are useful.  In
-addition, the clock does not seem to be very accurate.
-*/
+ * Driver: dt2814
+ * Description: Data Translation DT2814
+ * Author: ds
+ * Status: complete
+ * Devices: [Data Translation] DT2814 (dt2814)
+ *
+ * Configuration options:
+ * [0] - I/O port base address
+ * [1] - IRQ
+ *
+ * This card has 16 analog inputs multiplexed onto a 12 bit ADC.  There
+ * is a minimally useful onboard clock.  The base frequency for the
+ * clock is selected by jumpers, and the clock divider can be selected
+ * via programmed I/O.  Unfortunately, the clock divider can only be
+ * a power of 10, from 1 to 10^7, of which only 3 or 4 are useful.  In
+ * addition, the clock does not seem to be very accurate.
+ */
 
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -215,8 +215,10 @@ static irqreturn_t dt2814_interrupt(int irq, void *d)
 		int i;
 
 		outb(0, dev->iobase + DT2814_CSR);
-		/* note: turning off timed mode triggers another
-		   sample. */
+		/*
+		 * note: turning off timed mode triggers another
+		 * sample.
+		 */
 
 		for (i = 0; i < DT2814_TIMEOUT; i++) {
 			if (inb(dev->iobase + DT2814_CSR) & DT2814_FINISH)
diff --git a/drivers/staging/comedi/drivers/dt2815.c b/drivers/staging/comedi/drivers/dt2815.c
index fb08569c1ac1..0be77cc40a79 100644
--- a/drivers/staging/comedi/drivers/dt2815.c
+++ b/drivers/staging/comedi/drivers/dt2815.c
@@ -1,55 +1,55 @@
 /*
-   comedi/drivers/dt2815.c
-   Hardware driver for Data Translation DT2815
-
-   COMEDI - Linux Control and Measurement Device Interface
-   Copyright (C) 1999 Anders Blomdell <anders.blomdell@control.lth.se>
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+ * comedi/drivers/dt2815.c
+ * Hardware driver for Data Translation DT2815
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1999 Anders Blomdell <anders.blomdell@control.lth.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  */
 /*
-Driver: dt2815
-Description: Data Translation DT2815
-Author: ds
-Status: mostly complete, untested
-Devices: [Data Translation] DT2815 (dt2815)
-
-I'm not sure anyone has ever tested this board.  If you have information
-contrary, please update.
-
-Configuration options:
-  [0] - I/O port base base address
-  [1] - IRQ (unused)
-  [2] - Voltage unipolar/bipolar configuration
-	0 == unipolar 5V  (0V -- +5V)
-	1 == bipolar 5V  (-5V -- +5V)
-  [3] - Current offset configuration
-	0 == disabled  (0mA -- +32mAV)
-	1 == enabled  (+4mA -- +20mAV)
-  [4] - Firmware program configuration
-	0 == program 1 (see manual table 5-4)
-	1 == program 2 (see manual table 5-4)
-	2 == program 3 (see manual table 5-4)
-	3 == program 4 (see manual table 5-4)
-  [5] - Analog output 0 range configuration
-	0 == voltage
-	1 == current
-  [6] - Analog output 1 range configuration (same options)
-  [7] - Analog output 2 range configuration (same options)
-  [8] - Analog output 3 range configuration (same options)
-  [9] - Analog output 4 range configuration (same options)
-  [10] - Analog output 5 range configuration (same options)
-  [11] - Analog output 6 range configuration (same options)
-  [12] - Analog output 7 range configuration (same options)
-*/
+ * Driver: dt2815
+ * Description: Data Translation DT2815
+ * Author: ds
+ * Status: mostly complete, untested
+ * Devices: [Data Translation] DT2815 (dt2815)
+ *
+ * I'm not sure anyone has ever tested this board.  If you have information
+ * contrary, please update.
+ *
+ * Configuration options:
+ * [0] - I/O port base base address
+ * [1] - IRQ (unused)
+ * [2] - Voltage unipolar/bipolar configuration
+ *	0 == unipolar 5V  (0V -- +5V)
+ *	1 == bipolar 5V  (-5V -- +5V)
+ * [3] - Current offset configuration
+ *	0 == disabled  (0mA -- +32mAV)
+ *	1 == enabled  (+4mA -- +20mAV)
+ * [4] - Firmware program configuration
+ *	0 == program 1 (see manual table 5-4)
+ *	1 == program 2 (see manual table 5-4)
+ *	2 == program 3 (see manual table 5-4)
+ *	3 == program 4 (see manual table 5-4)
+ * [5] - Analog output 0 range configuration
+ *	0 == voltage
+ *	1 == current
+ * [6] - Analog output 1 range configuration (same options)
+ * [7] - Analog output 2 range configuration (same options)
+ * [8] - Analog output 3 range configuration (same options)
+ * [9] - Analog output 4 range configuration (same options)
+ * [10] - Analog output 5 range configuration (same options)
+ * [11] - Analog output 6 range configuration (same options)
+ * [12] - Analog output 7 range configuration (same options)
+ */
 
 #include <linux/module.h>
 #include "../comedidev.h"
@@ -120,27 +120,27 @@ static int dt2815_ao_insn(struct comedi_device *dev, struct comedi_subdevice *s,
 }
 
 /*
-  options[0]   Board base address
-  options[1]   IRQ (not applicable)
-  options[2]   Voltage unipolar/bipolar configuration
-		0 == unipolar 5V  (0V -- +5V)
-		1 == bipolar 5V  (-5V -- +5V)
-  options[3]   Current offset configuration
-		0 == disabled  (0mA -- +32mAV)
-		1 == enabled  (+4mA -- +20mAV)
-  options[4]   Firmware program configuration
-		0 == program 1 (see manual table 5-4)
-		1 == program 2 (see manual table 5-4)
-		2 == program 3 (see manual table 5-4)
-		3 == program 4 (see manual table 5-4)
-  options[5]   Analog output 0 range configuration
-		0 == voltage
-		1 == current
-  options[6]   Analog output 1 range configuration
-  ...
-  options[12]   Analog output 7 range configuration
-		0 == voltage
-		1 == current
+ * options[0]   Board base address
+ * options[1]   IRQ (not applicable)
+ * options[2]   Voltage unipolar/bipolar configuration
+ *		0 == unipolar 5V  (0V -- +5V)
+ *		1 == bipolar 5V  (-5V -- +5V)
+ * options[3]   Current offset configuration
+ *		0 == disabled  (0mA -- +32mAV)
+ *		1 == enabled  (+4mA -- +20mAV)
+ * options[4]   Firmware program configuration
+ *		0 == program 1 (see manual table 5-4)
+ *		1 == program 2 (see manual table 5-4)
+ *		2 == program 3 (see manual table 5-4)
+ *		3 == program 4 (see manual table 5-4)
+ * options[5]   Analog output 0 range configuration
+ *		0 == voltage
+ *		1 == current
+ * options[6]   Analog output 1 range configuration
+ * ...
+ * options[12]   Analog output 7 range configuration
+ *		0 == voltage
+ *		1 == current
  */
 
 static int dt2815_attach(struct comedi_device *dev, struct comedi_devconfig *it)
diff --git a/drivers/staging/comedi/drivers/dt2817.c b/drivers/staging/comedi/drivers/dt2817.c
index 5131deebf66f..39d2566e49bf 100644
--- a/drivers/staging/comedi/drivers/dt2817.c
+++ b/drivers/staging/comedi/drivers/dt2817.c
@@ -1,37 +1,37 @@
 /*
-    comedi/drivers/dt2817.c
-    Hardware driver for Data Translation DT2817
-
-    COMEDI - Linux Control and Measurement Device Interface
-    Copyright (C) 1998 David A. Schleef <ds@schleef.org>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-*/
+ * comedi/drivers/dt2817.c
+ * Hardware driver for Data Translation DT2817
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1998 David A. Schleef <ds@schleef.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
 /*
-Driver: dt2817
-Description: Data Translation DT2817
-Author: ds
-Status: complete
-Devices: [Data Translation] DT2817 (dt2817)
-
-A very simple digital I/O card.  Four banks of 8 lines, each bank
-is configurable for input or output.  One wonders why it takes a
-50 page manual to describe this thing.
-
-The driver (which, btw, is much less than 50 pages) has 1 subdevice
-with 32 channels, configurable in groups of 8.
-
-Configuration options:
-  [0] - I/O port base base address
-*/
+ * Driver: dt2817
+ * Description: Data Translation DT2817
+ * Author: ds
+ * Status: complete
+ * Devices: [Data Translation] DT2817 (dt2817)
+ *
+ * A very simple digital I/O card.  Four banks of 8 lines, each bank
+ * is configurable for input or output.  One wonders why it takes a
+ * 50 page manual to describe this thing.
+ *
+ * The driver (which, btw, is much less than 50 pages) has 1 subdevice
+ * with 32 channels, configurable in groups of 8.
+ *
+ * Configuration options:
+ * [0] - I/O port base base address
+ */
 
 #include <linux/module.h>
 #include "../comedidev.h"
diff --git a/drivers/staging/comedi/drivers/gsc_hpdi.c b/drivers/staging/comedi/drivers/gsc_hpdi.c
index 63b5cbc44bda..af4b4175af4d 100644
--- a/drivers/staging/comedi/drivers/gsc_hpdi.c
+++ b/drivers/staging/comedi/drivers/gsc_hpdi.c
@@ -158,10 +158,7 @@ static void gsc_hpdi_drain_dma(struct comedi_device *dev, unsigned int channel)
 	unsigned int size;
 	unsigned int next;
 
-	if (channel)
-		next = readl(devpriv->plx9080_mmio + PLX_DMA1_PCI_ADDRESS_REG);
-	else
-		next = readl(devpriv->plx9080_mmio + PLX_DMA0_PCI_ADDRESS_REG);
+	next = readl(devpriv->plx9080_mmio + PLX_REG_DMAPADR(channel));
 
 	idx = devpriv->dma_desc_index;
 	start = le32_to_cpu(devpriv->dma_desc[idx].pci_start_addr);
@@ -201,8 +198,9 @@ static irqreturn_t gsc_hpdi_interrupt(int irq, void *d)
 	if (!dev->attached)
 		return IRQ_NONE;
 
-	plx_status = readl(devpriv->plx9080_mmio + PLX_INTRCS_REG);
-	if ((plx_status & (ICS_DMA0_A | ICS_DMA1_A | ICS_LIA)) == 0)
+	plx_status = readl(devpriv->plx9080_mmio + PLX_REG_INTCSR);
+	if ((plx_status &
+	     (PLX_INTCSR_DMA0IA | PLX_INTCSR_DMA1IA | PLX_INTCSR_PLIA)) == 0)
 		return IRQ_NONE;
 
 	hpdi_intr_status = readl(dev->mmio + INTERRUPT_STATUS_REG);
@@ -213,32 +211,32 @@ static irqreturn_t gsc_hpdi_interrupt(int irq, void *d)
 
 	/* spin lock makes sure no one else changes plx dma control reg */
 	spin_lock_irqsave(&dev->spinlock, flags);
-	dma0_status = readb(devpriv->plx9080_mmio + PLX_DMA0_CS_REG);
-	if (plx_status & ICS_DMA0_A) {
+	dma0_status = readb(devpriv->plx9080_mmio + PLX_REG_DMACSR0);
+	if (plx_status & PLX_INTCSR_DMA0IA) {
 		/* dma chan 0 interrupt */
-		writeb((dma0_status & PLX_DMA_EN_BIT) | PLX_CLEAR_DMA_INTR_BIT,
-		       devpriv->plx9080_mmio + PLX_DMA0_CS_REG);
+		writeb((dma0_status & PLX_DMACSR_ENABLE) | PLX_DMACSR_CLEARINTR,
+		       devpriv->plx9080_mmio + PLX_REG_DMACSR0);
 
-		if (dma0_status & PLX_DMA_EN_BIT)
+		if (dma0_status & PLX_DMACSR_ENABLE)
 			gsc_hpdi_drain_dma(dev, 0);
 	}
 	spin_unlock_irqrestore(&dev->spinlock, flags);
 
 	/* spin lock makes sure no one else changes plx dma control reg */
 	spin_lock_irqsave(&dev->spinlock, flags);
-	dma1_status = readb(devpriv->plx9080_mmio + PLX_DMA1_CS_REG);
-	if (plx_status & ICS_DMA1_A) {
+	dma1_status = readb(devpriv->plx9080_mmio + PLX_REG_DMACSR1);
+	if (plx_status & PLX_INTCSR_DMA1IA) {
 		/* XXX */ /* dma chan 1 interrupt */
-		writeb((dma1_status & PLX_DMA_EN_BIT) | PLX_CLEAR_DMA_INTR_BIT,
-		       devpriv->plx9080_mmio + PLX_DMA1_CS_REG);
+		writeb((dma1_status & PLX_DMACSR_ENABLE) | PLX_DMACSR_CLEARINTR,
+		       devpriv->plx9080_mmio + PLX_REG_DMACSR1);
 	}
 	spin_unlock_irqrestore(&dev->spinlock, flags);
 
 	/* clear possible plx9080 interrupt sources */
-	if (plx_status & ICS_LDIA) {
+	if (plx_status & PLX_INTCSR_LDBIA) {
 		/* clear local doorbell interrupt */
-		plx_bits = readl(devpriv->plx9080_mmio + PLX_DBR_OUT_REG);
-		writel(plx_bits, devpriv->plx9080_mmio + PLX_DBR_OUT_REG);
+		plx_bits = readl(devpriv->plx9080_mmio + PLX_REG_L2PDBELL);
+		writel(plx_bits, devpriv->plx9080_mmio + PLX_REG_L2PDBELL);
 	}
 
 	if (hpdi_board_status & RX_OVERRUN_BIT) {
@@ -307,19 +305,19 @@ static int gsc_hpdi_cmd(struct comedi_device *dev,
 	 * occasionally cause problems with transfer of first dma
 	 * block.  Initializing them to zero seems to fix the problem.
 	 */
-	writel(0, devpriv->plx9080_mmio + PLX_DMA0_TRANSFER_SIZE_REG);
-	writel(0, devpriv->plx9080_mmio + PLX_DMA0_PCI_ADDRESS_REG);
-	writel(0, devpriv->plx9080_mmio + PLX_DMA0_LOCAL_ADDRESS_REG);
+	writel(0, devpriv->plx9080_mmio + PLX_REG_DMASIZ0);
+	writel(0, devpriv->plx9080_mmio + PLX_REG_DMAPADR0);
+	writel(0, devpriv->plx9080_mmio + PLX_REG_DMALADR0);
 
 	/* give location of first dma descriptor */
-	bits = devpriv->dma_desc_phys_addr | PLX_DESC_IN_PCI_BIT |
-	       PLX_INTR_TERM_COUNT | PLX_XFER_LOCAL_TO_PCI;
-	writel(bits, devpriv->plx9080_mmio + PLX_DMA0_DESCRIPTOR_REG);
+	bits = devpriv->dma_desc_phys_addr | PLX_DMADPR_DESCPCI |
+	       PLX_DMADPR_TCINTR | PLX_DMADPR_XFERL2P;
+	writel(bits, devpriv->plx9080_mmio + PLX_REG_DMADPR0);
 
 	/* enable dma transfer */
 	spin_lock_irqsave(&dev->spinlock, flags);
-	writeb(PLX_DMA_EN_BIT | PLX_DMA_START_BIT | PLX_CLEAR_DMA_INTR_BIT,
-	       devpriv->plx9080_mmio + PLX_DMA0_CS_REG);
+	writeb(PLX_DMACSR_ENABLE | PLX_DMACSR_START | PLX_DMACSR_CLEARINTR,
+	       devpriv->plx9080_mmio + PLX_REG_DMACSR0);
 	spin_unlock_irqrestore(&dev->spinlock, flags);
 
 	if (cmd->stop_src == TRIG_COUNT)
@@ -424,8 +422,8 @@ static int gsc_hpdi_setup_dma_descriptors(struct comedi_device *dev,
 {
 	struct hpdi_private *devpriv = dev->private;
 	dma_addr_t phys_addr = devpriv->dma_desc_phys_addr;
-	u32 next_bits = PLX_DESC_IN_PCI_BIT | PLX_INTR_TERM_COUNT |
-			PLX_XFER_LOCAL_TO_PCI;
+	u32 next_bits = PLX_DMADPR_DESCPCI | PLX_DMADPR_TCINTR |
+			PLX_DMADPR_XFERL2P;
 	unsigned int offset = 0;
 	unsigned int idx = 0;
 	unsigned int i;
@@ -536,9 +534,10 @@ static int gsc_hpdi_init(struct comedi_device *dev)
 
 	/* enable interrupts */
 	plx_intcsr_bits =
-	    ICS_AERR | ICS_PERR | ICS_PIE | ICS_PLIE | ICS_PAIE | ICS_LIE |
-	    ICS_DMA0_E;
-	writel(plx_intcsr_bits, devpriv->plx9080_mmio + PLX_INTRCS_REG);
+	    PLX_INTCSR_LSEABORTEN | PLX_INTCSR_LSEPARITYEN | PLX_INTCSR_PIEN |
+	    PLX_INTCSR_PLIEN | PLX_INTCSR_PABORTIEN | PLX_INTCSR_LIOEN |
+	    PLX_INTCSR_DMA0IEN;
+	writel(plx_intcsr_bits, devpriv->plx9080_mmio + PLX_REG_INTCSR);
 
 	return 0;
 }
@@ -550,13 +549,13 @@ static void gsc_hpdi_init_plx9080(struct comedi_device *dev)
 	void __iomem *plx_iobase = devpriv->plx9080_mmio;
 
 #ifdef __BIG_ENDIAN
-	bits = BIGEND_DMA0 | BIGEND_DMA1;
+	bits = PLX_BIGEND_DMA0 | PLX_BIGEND_DMA1;
 #else
 	bits = 0;
 #endif
-	writel(bits, devpriv->plx9080_mmio + PLX_BIGEND_REG);
+	writel(bits, devpriv->plx9080_mmio + PLX_REG_BIGEND);
 
-	writel(0, devpriv->plx9080_mmio + PLX_INTRCS_REG);
+	writel(0, devpriv->plx9080_mmio + PLX_REG_INTCSR);
 
 	gsc_hpdi_abort_dma(dev, 0);
 	gsc_hpdi_abort_dma(dev, 1);
@@ -564,27 +563,27 @@ static void gsc_hpdi_init_plx9080(struct comedi_device *dev)
 	/* configure dma0 mode */
 	bits = 0;
 	/* enable ready input */
-	bits |= PLX_DMA_EN_READYIN_BIT;
+	bits |= PLX_DMAMODE_READYIEN;
 	/* enable dma chaining */
-	bits |= PLX_EN_CHAIN_BIT;
+	bits |= PLX_DMAMODE_CHAINEN;
 	/*
 	 * enable interrupt on dma done
 	 * (probably don't need this, since chain never finishes)
 	 */
-	bits |= PLX_EN_DMA_DONE_INTR_BIT;
+	bits |= PLX_DMAMODE_DONEIEN;
 	/*
 	 * don't increment local address during transfers
 	 * (we are transferring from a fixed fifo register)
 	 */
-	bits |= PLX_LOCAL_ADDR_CONST_BIT;
+	bits |= PLX_DMAMODE_LACONST;
 	/* route dma interrupt to pci bus */
-	bits |= PLX_DMA_INTR_PCI_BIT;
+	bits |= PLX_DMAMODE_INTRPCI;
 	/* enable demand mode */
-	bits |= PLX_DEMAND_MODE_BIT;
+	bits |= PLX_DMAMODE_DEMAND;
 	/* enable local burst mode */
-	bits |= PLX_DMA_LOCAL_BURST_EN_BIT;
-	bits |= PLX_LOCAL_BUS_32_WIDE_BITS;
-	writel(bits, plx_iobase + PLX_DMA0_MODE_REG);
+	bits |= PLX_DMAMODE_BURSTEN;
+	bits |= PLX_DMAMODE_WIDTH32;
+	writel(bits, plx_iobase + PLX_REG_DMAMODE0);
 }
 
 static int gsc_hpdi_auto_attach(struct comedi_device *dev,
@@ -680,7 +679,7 @@ static void gsc_hpdi_detach(struct comedi_device *dev)
 		free_irq(dev->irq, dev);
 	if (devpriv) {
 		if (devpriv->plx9080_mmio) {
-			writel(0, devpriv->plx9080_mmio + PLX_INTRCS_REG);
+			writel(0, devpriv->plx9080_mmio + PLX_REG_INTCSR);
 			iounmap(devpriv->plx9080_mmio);
 		}
 		if (dev->mmio)
diff --git a/drivers/staging/comedi/drivers/jr3_pci.c b/drivers/staging/comedi/drivers/jr3_pci.c
index b87192e0f9aa..6c4ff023717f 100644
--- a/drivers/staging/comedi/drivers/jr3_pci.c
+++ b/drivers/staging/comedi/drivers/jr3_pci.c
@@ -1,20 +1,20 @@
 /*
-  comedi/drivers/jr3_pci.c
-  hardware driver for JR3/PCI force sensor board
-
-  COMEDI - Linux Control and Measurement Device Interface
-  Copyright (C) 2007 Anders Blomdell <anders.blomdell@control.lth.se>
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-*/
+ * comedi/drivers/jr3_pci.c
+ * hardware driver for JR3/PCI force sensor board
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 2007 Anders Blomdell <anders.blomdell@control.lth.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
 /*
  * Driver: jr3_pci
  * Description: JR3/PCI force sensor board
@@ -231,7 +231,7 @@ static unsigned int jr3_pci_ai_read_chan(struct comedi_device *dev,
 
 	if (chan < 56) {
 		unsigned int axis = chan % 8;
-		unsigned filter = chan / 8;
+		unsigned int filter = chan / 8;
 
 		switch (axis) {
 		case 0:
@@ -690,7 +690,7 @@ static int jr3_pci_auto_attach(struct comedi_device *dev,
 	if (sizeof(struct jr3_channel) != 0xc00) {
 		dev_err(dev->class_dev,
 			"sizeof(struct jr3_channel) = %x [expected %x]\n",
-			(unsigned)sizeof(struct jr3_channel), 0xc00);
+			(unsigned int)sizeof(struct jr3_channel), 0xc00);
 		return -EINVAL;
 	}
 
diff --git a/drivers/staging/comedi/drivers/me_daq.c b/drivers/staging/comedi/drivers/me_daq.c
index 3bf0caa18ab0..c0b7a300e428 100644
--- a/drivers/staging/comedi/drivers/me_daq.c
+++ b/drivers/staging/comedi/drivers/me_daq.c
@@ -150,7 +150,7 @@ struct me_private_data {
 	unsigned short dac_ctrl;	/* Mirror of the DAC_CONTROL register */
 };
 
-static inline void sleep(unsigned sec)
+static inline void sleep(unsigned int sec)
 {
 	schedule_timeout_interruptible(sec * HZ);
 }
diff --git a/drivers/staging/comedi/drivers/mpc624.c b/drivers/staging/comedi/drivers/mpc624.c
index 826e4399c87e..9bda761433c2 100644
--- a/drivers/staging/comedi/drivers/mpc624.c
+++ b/drivers/staging/comedi/drivers/mpc624.c
@@ -103,7 +103,7 @@ static const struct comedi_lrange range_mpc624_bipolar1 = {
 /* BIP_RANGE(1.01)  this is correct, */
 	 /*  but my MPC-624 actually seems to have a range of 2.02 */
 	 BIP_RANGE(2.02)
-	 }
+	}
 };
 
 static const struct comedi_lrange range_mpc624_bipolar10 = {
@@ -112,7 +112,7 @@ static const struct comedi_lrange range_mpc624_bipolar10 = {
 /* BIP_RANGE(10.1)   this is correct, */
 	 /*  but my MPC-624 actually seems to have a range of 20.2 */
 	 BIP_RANGE(20.2)
-	 }
+	}
 };
 
 static unsigned int mpc624_ai_get_sample(struct comedi_device *dev,
diff --git a/drivers/staging/comedi/drivers/ni_65xx.c b/drivers/staging/comedi/drivers/ni_65xx.c
index 251117be1205..07f38e385469 100644
--- a/drivers/staging/comedi/drivers/ni_65xx.c
+++ b/drivers/staging/comedi/drivers/ni_65xx.c
@@ -151,10 +151,10 @@ enum ni_65xx_boardid {
 
 struct ni_65xx_board {
 	const char *name;
-	unsigned num_dio_ports;
-	unsigned num_di_ports;
-	unsigned num_do_ports;
-	unsigned legacy_invert:1;
+	unsigned int num_dio_ports;
+	unsigned int num_di_ports;
+	unsigned int num_do_ports;
+	unsigned int legacy_invert:1;
 };
 
 static const struct ni_65xx_board ni_65xx_boards[] = {
@@ -360,7 +360,7 @@ static int ni_65xx_dio_insn_config(struct comedi_device *dev,
 	unsigned long base_port = (unsigned long)s->private;
 	unsigned int chan = CR_CHAN(insn->chanspec);
 	unsigned int chan_mask = NI_65XX_CHAN_TO_MASK(chan);
-	unsigned port = base_port + NI_65XX_CHAN_TO_PORT(chan);
+	unsigned int port = base_port + NI_65XX_CHAN_TO_PORT(chan);
 	unsigned int interval;
 	unsigned int val;
 
@@ -428,14 +428,14 @@ static int ni_65xx_dio_insn_bits(struct comedi_device *dev,
 	unsigned long base_port = (unsigned long)s->private;
 	unsigned int base_chan = CR_CHAN(insn->chanspec);
 	int last_port_offset = NI_65XX_CHAN_TO_PORT(s->n_chan - 1);
-	unsigned read_bits = 0;
+	unsigned int read_bits = 0;
 	int port_offset;
 
 	for (port_offset = NI_65XX_CHAN_TO_PORT(base_chan);
 	     port_offset <= last_port_offset; port_offset++) {
-		unsigned port = base_port + port_offset;
+		unsigned int port = base_port + port_offset;
 		int base_port_channel = NI_65XX_PORT_TO_CHAN(port_offset);
-		unsigned port_mask, port_data, bits;
+		unsigned int port_mask, port_data, bits;
 		int bitshift = base_port_channel - base_chan;
 
 		if (bitshift >= 32)
@@ -640,7 +640,7 @@ static int ni_65xx_auto_attach(struct comedi_device *dev,
 	struct pci_dev *pcidev = comedi_to_pci_dev(dev);
 	const struct ni_65xx_board *board = NULL;
 	struct comedi_subdevice *s;
-	unsigned i;
+	unsigned int i;
 	int ret;
 
 	if (context < ARRAY_SIZE(ni_65xx_boards))
diff --git a/drivers/staging/comedi/drivers/ni_pcidio.c b/drivers/staging/comedi/drivers/ni_pcidio.c
index 02a532990979..35ef1925703f 100644
--- a/drivers/staging/comedi/drivers/ni_pcidio.c
+++ b/drivers/staging/comedi/drivers/ni_pcidio.c
@@ -170,12 +170,12 @@ comedi_nonfree_firmware tarball available from http://www.comedi.org
 #define DMA_Line_Control_Group1		76
 #define DMA_Line_Control_Group2		108
 /* channel zero is none */
-static inline unsigned primary_DMAChannel_bits(unsigned channel)
+static inline unsigned int primary_DMAChannel_bits(unsigned int channel)
 {
 	return channel & 0x3;
 }
 
-static inline unsigned secondary_DMAChannel_bits(unsigned channel)
+static inline unsigned int secondary_DMAChannel_bits(unsigned int channel)
 {
 	return (channel << 2) & 0xc;
 }
diff --git a/drivers/staging/comedi/drivers/ni_pcimio.c b/drivers/staging/comedi/drivers/ni_pcimio.c
index 344aa343e5e1..d8917392b9f9 100644
--- a/drivers/staging/comedi/drivers/ni_pcimio.c
+++ b/drivers/staging/comedi/drivers/ni_pcimio.c
@@ -1064,12 +1064,12 @@ static void m_series_init_eeprom_buffer(struct comedi_device *dev)
 	struct mite *mite = devpriv->mite;
 	resource_size_t daq_phys_addr;
 	static const int Start_Cal_EEPROM = 0x400;
-	static const unsigned window_size = 10;
+	static const unsigned int window_size = 10;
 	static const int serial_number_eeprom_offset = 0x4;
 	static const int serial_number_eeprom_length = 0x4;
-	unsigned old_iodwbsr_bits;
-	unsigned old_iodwbsr1_bits;
-	unsigned old_iodwcr1_bits;
+	unsigned int old_iodwbsr_bits;
+	unsigned int old_iodwbsr1_bits;
+	unsigned int old_iodwcr1_bits;
 	int i;
 
 	/* IO Window 1 needs to be temporarily mapped to read the eeprom */
diff --git a/drivers/staging/comedi/drivers/pcmmio.c b/drivers/staging/comedi/drivers/pcmmio.c
index 10472e6dd002..70ad497dd20b 100644
--- a/drivers/staging/comedi/drivers/pcmmio.c
+++ b/drivers/staging/comedi/drivers/pcmmio.c
@@ -84,25 +84,25 @@
 #define PCMMIO_AI_LSB_REG			0x00
 #define PCMMIO_AI_MSB_REG			0x01
 #define PCMMIO_AI_CMD_REG			0x02
-#define PCMMIO_AI_CMD_SE			(1 << 7)
-#define PCMMIO_AI_CMD_ODD_CHAN			(1 << 6)
+#define PCMMIO_AI_CMD_SE			BIT(7)
+#define PCMMIO_AI_CMD_ODD_CHAN			BIT(6)
 #define PCMMIO_AI_CMD_CHAN_SEL(x)		(((x) & 0x3) << 4)
 #define PCMMIO_AI_CMD_RANGE(x)			(((x) & 0x3) << 2)
 #define PCMMIO_RESOURCE_REG			0x02
 #define PCMMIO_RESOURCE_IRQ(x)			(((x) & 0xf) << 0)
 #define PCMMIO_AI_STATUS_REG			0x03
-#define PCMMIO_AI_STATUS_DATA_READY		(1 << 7)
-#define PCMMIO_AI_STATUS_DATA_DMA_PEND		(1 << 6)
-#define PCMMIO_AI_STATUS_CMD_DMA_PEND		(1 << 5)
-#define PCMMIO_AI_STATUS_IRQ_PEND		(1 << 4)
-#define PCMMIO_AI_STATUS_DATA_DRQ_ENA		(1 << 2)
-#define PCMMIO_AI_STATUS_REG_SEL		(1 << 3)
-#define PCMMIO_AI_STATUS_CMD_DRQ_ENA		(1 << 1)
-#define PCMMIO_AI_STATUS_IRQ_ENA		(1 << 0)
+#define PCMMIO_AI_STATUS_DATA_READY		BIT(7)
+#define PCMMIO_AI_STATUS_DATA_DMA_PEND		BIT(6)
+#define PCMMIO_AI_STATUS_CMD_DMA_PEND		BIT(5)
+#define PCMMIO_AI_STATUS_IRQ_PEND		BIT(4)
+#define PCMMIO_AI_STATUS_DATA_DRQ_ENA		BIT(2)
+#define PCMMIO_AI_STATUS_REG_SEL		BIT(3)
+#define PCMMIO_AI_STATUS_CMD_DRQ_ENA		BIT(1)
+#define PCMMIO_AI_STATUS_IRQ_ENA		BIT(0)
 #define PCMMIO_AI_RES_ENA_REG			0x03
 #define PCMMIO_AI_RES_ENA_CMD_REG_ACCESS	(0 << 3)
-#define PCMMIO_AI_RES_ENA_AI_RES_ACCESS		(1 << 3)
-#define PCMMIO_AI_RES_ENA_DIO_RES_ACCESS	(1 << 4)
+#define PCMMIO_AI_RES_ENA_AI_RES_ACCESS		BIT(3)
+#define PCMMIO_AI_RES_ENA_DIO_RES_ACCESS	BIT(4)
 #define PCMMIO_AI_2ND_ADC_OFFSET		0x04
 
 #define PCMMIO_AO_LSB_REG			0x08
@@ -125,14 +125,14 @@
 #define PCMMIO_AO_CMD_CHAN_SEL(x)		(((x) & 0x03) << 1)
 #define PCMMIO_AO_CMD_CHAN_SEL_ALL		(0x0f << 0)
 #define PCMMIO_AO_STATUS_REG			0x0b
-#define PCMMIO_AO_STATUS_DATA_READY		(1 << 7)
-#define PCMMIO_AO_STATUS_DATA_DMA_PEND		(1 << 6)
-#define PCMMIO_AO_STATUS_CMD_DMA_PEND		(1 << 5)
-#define PCMMIO_AO_STATUS_IRQ_PEND		(1 << 4)
-#define PCMMIO_AO_STATUS_DATA_DRQ_ENA		(1 << 2)
-#define PCMMIO_AO_STATUS_REG_SEL		(1 << 3)
-#define PCMMIO_AO_STATUS_CMD_DRQ_ENA		(1 << 1)
-#define PCMMIO_AO_STATUS_IRQ_ENA		(1 << 0)
+#define PCMMIO_AO_STATUS_DATA_READY		BIT(7)
+#define PCMMIO_AO_STATUS_DATA_DMA_PEND		BIT(6)
+#define PCMMIO_AO_STATUS_CMD_DMA_PEND		BIT(5)
+#define PCMMIO_AO_STATUS_IRQ_PEND		BIT(4)
+#define PCMMIO_AO_STATUS_DATA_DRQ_ENA		BIT(2)
+#define PCMMIO_AO_STATUS_REG_SEL		BIT(3)
+#define PCMMIO_AO_STATUS_CMD_DRQ_ENA		BIT(1)
+#define PCMMIO_AO_STATUS_IRQ_ENA		BIT(0)
 #define PCMMIO_AO_RESOURCE_ENA_REG		0x0b
 #define PCMMIO_AO_2ND_DAC_OFFSET		0x04
 
diff --git a/drivers/staging/comedi/drivers/pcmuio.c b/drivers/staging/comedi/drivers/pcmuio.c
index 7ea813022ff6..8ad64f2625fe 100644
--- a/drivers/staging/comedi/drivers/pcmuio.c
+++ b/drivers/staging/comedi/drivers/pcmuio.c
@@ -307,7 +307,7 @@ static void pcmuio_stop_intr(struct comedi_device *dev,
 
 static void pcmuio_handle_intr_subdev(struct comedi_device *dev,
 				      struct comedi_subdevice *s,
-				      unsigned triggered)
+				      unsigned int triggered)
 {
 	struct pcmuio_private *devpriv = dev->private;
 	int asic = pcmuio_subdevice_to_asic(s);
diff --git a/drivers/staging/comedi/drivers/plx9080.h b/drivers/staging/comedi/drivers/plx9080.h
index 8d1aee00b19f..0e20cc5c9a69 100644
--- a/drivers/staging/comedi/drivers/plx9080.h
+++ b/drivers/staging/comedi/drivers/plx9080.h
@@ -3,15 +3,6 @@
  *
  * Copyright (C) 2002,2003 Frank Mori Hess <fmhess@users.sourceforge.net>
  *
- * I modified this file from the plx9060.h header for the
- * wanXL device driver in the linux kernel,
- * for the register offsets and bit definitions.  Made minor modifications,
- * added plx9080 registers and
- * stripped out stuff that was specifically for the wanXL driver.
- * Note: I've only made sure the definitions are correct as far
- * as I make use of them.  There are still various plx9060-isms
- * left in this header file.
- *
  ********************************************************************
  *
  * Copyright (C) 1999 RG Studio s.c.
@@ -28,392 +19,611 @@
 #ifndef __COMEDI_PLX9080_H
 #define __COMEDI_PLX9080_H
 
-/*  descriptor block used for chained dma transfers */
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+
+/**
+ * struct plx_dma_desc - DMA descriptor format for PLX PCI 9080
+ * @pci_start_addr:	PCI Bus address for transfer (DMAPADR).
+ * @local_start_addr:	Local Bus address for transfer (DMALADR).
+ * @transfer_size:	Transfer size in bytes (max 8 MiB) (DMASIZ).
+ * @next:		Address of next descriptor + flags (DMADPR).
+ *
+ * Describes the format of a scatter-gather DMA descriptor for the PLX
+ * PCI 9080.  All members are raw, little-endian register values that
+ * will be transferred by the DMA engine from local or PCI memory into
+ * corresponding registers for the DMA channel.
+ *
+ * The DMA descriptors must be aligned on a 16-byte boundary.  Bits 3:0
+ * of @next contain flags describing the address space of the next
+ * descriptor (local or PCI), an "end of chain" marker, an "interrupt on
+ * terminal count" bit, and a data transfer direction.
+ */
 struct plx_dma_desc {
 	__le32 pci_start_addr;
 	__le32 local_start_addr;
-	/* transfer_size is in bytes, only first 23 bits of register are used */
 	__le32 transfer_size;
-	/*
-	 * address of next descriptor (quad word aligned), plus some
-	 * additional bits (see PLX_DMA0_DESCRIPTOR_REG)
-	 */
 	__le32 next;
 };
 
-/**********************************************************************
-**            Register Offsets and Bit Definitions
-**
-** Note: All offsets zero relative.  IE. Some standard base address
-** must be added to the Register Number to properly access the register.
-**
-**********************************************************************/
-
-/* L, Local Addr Space 0 Range Register */
-#define PLX_LAS0RNG_REG         0x0000
-/* L, Local Addr Space 1 Range Register */
-#define PLX_LAS1RNG_REG         0x00f0
-#define  LRNG_IO           0x00000001	/* Map to: 1=I/O, 0=Mem */
-#define  LRNG_ANY32        0x00000000	/* Locate anywhere in 32 bit */
-#define  LRNG_LT1MB        0x00000002	/* Locate in 1st meg */
-#define  LRNG_ANY64        0x00000004	/* Locate anywhere in 64 bit */
-/*  bits that specify range for memory io */
-#define  LRNG_MEM_MASK     0xfffffff0
-/*  bits that specify range for normal io */
-#define  LRNG_IO_MASK     0xfffffffa
-/* L, Local Addr Space 0 Remap Register */
-#define PLX_LAS0MAP_REG         0x0004
-/* L, Local Addr Space 1 Remap Register */
-#define PLX_LAS1MAP_REG         0x00f4
-#define  LMAP_EN           0x00000001	/* Enable slave decode */
-/*  bits that specify decode for memory io */
-#define  LMAP_MEM_MASK     0xfffffff0
-/*  bits that specify decode bits for normal io */
-#define  LMAP_IO_MASK     0xfffffffa
-
 /*
- * Mode/Arbitration Register.
+ * Register Offsets and Bit Definitions
  */
-#define PLX_MARB_REG         0x8	/* L, Local Arbitration Register */
-#define PLX_DMAARB_REG      0xac
-enum marb_bits {
-	MARB_LLT_MASK = 0x000000ff,	/* Local Bus Latency Timer */
-	MARB_LPT_MASK = 0x0000ff00,	/* Local Bus Pause Timer */
-	MARB_LTEN = 0x00010000,	/* Latency Timer Enable */
-	MARB_LPEN = 0x00020000,	/* Pause Timer Enable */
-	MARB_BREQ = 0x00040000,	/* Local Bus BREQ Enable */
-	MARB_DMA_PRIORITY_MASK = 0x00180000,
-	/* local bus direct slave give up bus mode */
-	MARB_LBDS_GIVE_UP_BUS_MODE = 0x00200000,
-	/* direct slave LLOCKo# enable */
-	MARB_DS_LLOCK_ENABLE = 0x00400000,
-	MARB_PCI_REQUEST_MODE = 0x00800000,
-	MARB_PCIV21_MODE = 0x01000000,	/* pci specification v2.1 mode */
-	MARB_PCI_READ_NO_WRITE_MODE = 0x02000000,
-	MARB_PCI_READ_WITH_WRITE_FLUSH_MODE = 0x04000000,
-	/* gate local bus latency timer with BREQ */
-	MARB_GATE_TIMER_WITH_BREQ = 0x08000000,
-	MARB_PCI_READ_NO_FLUSH_MODE = 0x10000000,
-	MARB_USE_SUBSYSTEM_IDS = 0x20000000,
-};
-
-#define PLX_BIGEND_REG 0xc
-enum bigend_bits {
-	/* use big endian ordering for configuration register accesses */
-	BIGEND_CONFIG = 0x1,
-	BIGEND_DIRECT_MASTER = 0x2,
-	BIGEND_DIRECT_SLAVE_LOCAL0 = 0x4,
-	BIGEND_ROM = 0x8,
-	/*
-	 * use byte lane consisting of most significant bits instead of
-	 * least significant
-	 */
-	BIGEND_BYTE_LANE = 0x10,
-	BIGEND_DIRECT_SLAVE_LOCAL1 = 0x20,
-	BIGEND_DMA1 = 0x40,
-	BIGEND_DMA0 = 0x80,
-};
 
+/* Local Address Space 0 Range Register */
+#define PLX_REG_LAS0RR		0x0000
+/* Local Address Space 1 Range Register */
+#define PLX_REG_LAS1RR		0x00f0
+
+#define PLX_LASRR_IO		BIT(0)		/* Map to: 1=I/O, 0=Mem */
+#define PLX_LASRR_ANY32		(BIT(1) * 0)	/* Locate anywhere in 32 bit */
+#define PLX_LASRR_LT1MB		(BIT(1) * 1)	/* Locate in 1st meg */
+#define PLX_LASRR_ANY64		(BIT(1) * 2)	/* Locate anywhere in 64 bit */
+#define PLX_LASRR_MLOC_MASK	GENMASK(2, 1)	/* Memory location bits */
+#define PLX_LASRR_PREFETCH	BIT(3)		/* Memory is prefetchable */
+/* bits that specify range for memory space decode bits */
+#define PLX_LASRR_MEM_MASK	GENMASK(31, 4)
+/* bits that specify range for i/o space decode bits */
+#define PLX_LASRR_IO_MASK	GENMASK(31, 2)
+
+/* Local Address Space 0 Local Base Address (Remap) Register */
+#define PLX_REG_LAS0BA		0x0004
+/* Local Address Space 1 Local Base Address (Remap) Register */
+#define PLX_REG_LAS1BA		0x00f4
+
+#define PLX_LASBA_EN		BIT(0)		/* Enable slave decode */
+/* bits that specify local base address for memory space */
+#define PLX_LASBA_MEM_MASK	GENMASK(31, 4)
+/* bits that specify local base address for i/o space */
+#define PLX_LASBA_IO_MASK	GENMASK(31, 2)
+
+/* Mode/Arbitration Register */
+#define PLX_REG_MARBR		0x0008
+/* DMA Arbitration Register (alias of MARBR). */
+#define PLX_REG_DMAARB		0x00ac
+
+/* Local Bus Latency Timer */
+#define PLX_MARBR_LT(x)		(BIT(0) * ((x) & 0xff))
+#define PLX_MARBR_LT_MASK	GENMASK(7, 0)
+#define PLX_MARBR_LT_SHIFT	0
+/* Local Bus Pause Timer */
+#define PLX_MARBR_PT(x)		(BIT(8) * ((x) & 0xff))
+#define PLX_MARBR_PT_MASK	GENMASK(15, 8)
+#define PLX_MARBR_PT_SHIFT	8
+/* Local Bus Latency Timer Enable */
+#define PLX_MARBR_LTEN		BIT(16)
+/* Local Bus Pause Timer Enable */
+#define PLX_MARBR_PTEN		BIT(17)
+/* Local Bus BREQ Enable */
+#define PLX_MARBR_BREQEN	BIT(18)
+/* DMA Channel Priority */
+#define PLX_MARBR_PRIO_ROT	(BIT(19) * 0)	/* Rotational priority */
+#define PLX_MARBR_PRIO_DMA0	(BIT(19) * 1)	/* DMA channel 0 has priority */
+#define PLX_MARBR_PRIO_DMA1	(BIT(19) * 2)	/* DMA channel 1 has priority */
+#define PLX_MARBR_PRIO_MASK	GENMASK(20, 19)
+/* Local Bus Direct Slave Give Up Bus Mode */
+#define PLX_MARBR_DSGUBM	BIT(21)
+/* Direct Slace LLOCKo# Enable */
+#define PLX_MARBR_DSLLOCKOEN	BIT(22)
+/* PCI Request Mode */
+#define PLX_MARBR_PCIREQM	BIT(23)
+/* PCI Specification v2.1 Mode */
+#define PLX_MARBR_PCIV21M	BIT(24)
+/* PCI Read No Write Mode */
+#define PLX_MARBR_PCIRNWM	BIT(25)
+/* PCI Read with Write Flush Mode */
+#define PLX_MARBR_PCIRWFM	BIT(26)
+/* Gate Local Bus Latency Timer with BREQ */
+#define PLX_MARBR_GLTBREQ	BIT(27)
+/* PCI Read No Flush Mode */
+#define PLX_MARBR_PCIRNFM	BIT(28)
 /*
-** Note: The Expansion ROM  stuff is only relevant to the PC environment.
-**       This expansion ROM code is executed by the host CPU at boot time.
-**       For this reason no bit definitions are provided here.
+ * Make reads from PCI Configuration register 0 return Subsystem ID and
+ * Subsystem Vendor ID instead of Device ID and Vendor ID
  */
-#define PLX_ROMRNG_REG         0x0010	/* L, Expn ROM Space Range Register */
-/* L, Local Addr Space Range Register */
-#define PLX_ROMMAP_REG         0x0014
-
-#define PLX_REGION0_REG         0x0018	/* L, Local Bus Region 0 Descriptor */
-#define  RGN_WIDTH         0x00000002	/* Local bus width bits */
-#define  RGN_8BITS         0x00000000	/* 08 bit Local Bus */
-#define  RGN_16BITS        0x00000001	/* 16 bit Local Bus */
-#define  RGN_32BITS        0x00000002	/* 32 bit Local Bus */
-#define  RGN_MWS           0x0000003C	/* Memory Access Wait States */
-#define  RGN_0MWS          0x00000000
-#define  RGN_1MWS          0x00000004
-#define  RGN_2MWS          0x00000008
-#define  RGN_3MWS          0x0000000C
-#define  RGN_4MWS          0x00000010
-#define  RGN_6MWS          0x00000018
-#define  RGN_8MWS          0x00000020
-#define  RGN_MRE           0x00000040	/* Memory Space Ready Input Enable */
-#define  RGN_MBE           0x00000080	/* Memory Space Bterm Input Enable */
-#define  RGN_READ_PREFETCH_DISABLE 0x00000100
-#define  RGN_ROM_PREFETCH_DISABLE 0x00000200
-#define  RGN_READ_PREFETCH_COUNT_ENABLE 0x00000400
-#define  RGN_RWS           0x003C0000	/* Expn ROM Wait States */
-#define  RGN_RRE           0x00400000	/* ROM Space Ready Input Enable */
-#define  RGN_RBE           0x00800000	/* ROM Space Bterm Input Enable */
-#define  RGN_MBEN          0x01000000	/* Memory Space Burst Enable */
-#define  RGN_RBEN          0x04000000	/* ROM Space Burst Enable */
-#define  RGN_THROT         0x08000000	/* De-assert TRDY when FIFO full */
-#define  RGN_TRD           0xF0000000	/* Target Ready Delay /8 */
-
-#define PLX_REGION1_REG         0x00f8	/* L, Local Bus Region 1 Descriptor */
-
-#define PLX_DMRNG_REG          0x001C	/* L, Direct Master Range Register */
-
-#define PLX_LBAPMEM_REG        0x0020	/* L, Lcl Base Addr for PCI mem space */
-
-#define PLX_LBAPIO_REG         0x0024	/* L, Lcl Base Addr for PCI I/O space */
-
-#define PLX_DMMAP_REG          0x0028	/* L, Direct Master Remap Register */
-#define  DMM_MAE           0x00000001	/* Direct Mstr Memory Acc Enable */
-#define  DMM_IAE           0x00000002	/* Direct Mstr I/O Acc Enable */
-#define  DMM_LCK           0x00000004	/* LOCK Input Enable */
-#define  DMM_PF4           0x00000008	/* Prefetch 4 Mode Enable */
-#define  DMM_THROT         0x00000010	/* Assert IRDY when read FIFO full */
-#define  DMM_PAF0          0x00000000	/* Programmable Almost fill level */
-#define  DMM_PAF1          0x00000020	/* Programmable Almost fill level */
-#define  DMM_PAF2          0x00000040	/* Programmable Almost fill level */
-#define  DMM_PAF3          0x00000060	/* Programmable Almost fill level */
-#define  DMM_PAF4          0x00000080	/* Programmable Almost fill level */
-#define  DMM_PAF5          0x000000A0	/* Programmable Almost fill level */
-#define  DMM_PAF6          0x000000C0	/* Programmable Almost fill level */
-#define  DMM_PAF7          0x000000D0	/* Programmable Almost fill level */
-#define  DMM_MAP           0xFFFF0000	/* Remap Address Bits */
-
-#define PLX_CAR_REG            0x002C	/* L, Configuration Address Register */
-#define  CAR_CT0           0x00000000	/* Config Type 0 */
-#define  CAR_CT1           0x00000001	/* Config Type 1 */
-#define  CAR_REG           0x000000FC	/* Register Number Bits */
-#define  CAR_FUN           0x00000700	/* Function Number Bits */
-#define  CAR_DEV           0x0000F800	/* Device Number Bits */
-#define  CAR_BUS           0x00FF0000	/* Bus Number Bits */
-#define  CAR_CFG           0x80000000	/* Config Spc Access Enable */
-
-#define PLX_DBR_IN_REG         0x0060	/* L, PCI to Local Doorbell Register */
-
-#define PLX_DBR_OUT_REG        0x0064	/* L, Local to PCI Doorbell Register */
-
-#define PLX_INTRCS_REG         0x0068	/* L, Interrupt Control/Status Reg */
-#define  ICS_AERR          0x00000001	/* Assert LSERR on ABORT */
-#define  ICS_PERR          0x00000002	/* Assert LSERR on Parity Error */
-#define  ICS_SERR          0x00000004	/* Generate PCI SERR# */
-#define  ICS_MBIE          0x00000008	/*  mailbox interrupt enable */
-#define  ICS_PIE           0x00000100	/* PCI Interrupt Enable */
-#define  ICS_PDIE          0x00000200	/* PCI Doorbell Interrupt Enable */
-#define  ICS_PAIE          0x00000400	/* PCI Abort Interrupt Enable */
-#define  ICS_PLIE          0x00000800	/* PCI Local Int Enable */
-#define  ICS_RAE           0x00001000	/* Retry Abort Enable */
-#define  ICS_PDIA          0x00002000	/* PCI Doorbell Interrupt Active */
-#define  ICS_PAIA          0x00004000	/* PCI Abort Interrupt Active */
-#define  ICS_LIA           0x00008000	/* Local Interrupt Active */
-#define  ICS_LIE           0x00010000	/* Local Interrupt Enable */
-#define  ICS_LDIE          0x00020000	/* Local Doorbell Int Enable */
-#define  ICS_DMA0_E        0x00040000	/* DMA #0 Interrupt Enable */
-#define  ICS_DMA1_E        0x00080000	/* DMA #1 Interrupt Enable */
-#define  ICS_LDIA          0x00100000	/* Local Doorbell Int Active */
-#define  ICS_DMA0_A        0x00200000	/* DMA #0 Interrupt Active */
-#define  ICS_DMA1_A        0x00400000	/* DMA #1 Interrupt Active */
-#define  ICS_BIA           0x00800000	/* BIST Interrupt Active */
-#define  ICS_TA_DM         0x01000000	/* Target Abort - Direct Master */
-#define  ICS_TA_DMA0       0x02000000	/* Target Abort - DMA #0 */
-#define  ICS_TA_DMA1       0x04000000	/* Target Abort - DMA #1 */
-#define  ICS_TA_RA         0x08000000	/* Target Abort - Retry Timeout */
-/*  mailbox x is active */
-#define  ICS_MBIA(x)       (0x10000000 << ((x) & 0x3))
-
-#define PLX_CONTROL_REG        0x006C	/* L, EEPROM Cntl & PCI Cmd Codes */
-#define  CTL_RDMA          0x0000000E	/* DMA Read Command */
-#define  CTL_WDMA          0x00000070	/* DMA Write Command */
-#define  CTL_RMEM          0x00000600	/* Memory Read Command */
-#define  CTL_WMEM          0x00007000	/* Memory Write Command */
-#define  CTL_USERO         0x00010000	/* USERO output pin control bit */
-#define  CTL_USERI         0x00020000	/* USERI input pin bit */
-#define  CTL_EE_CLK        0x01000000	/* EEPROM Clock line */
-#define  CTL_EE_CS         0x02000000	/* EEPROM Chip Select */
-#define  CTL_EE_W          0x04000000	/* EEPROM Write bit */
-#define  CTL_EE_R          0x08000000	/* EEPROM Read bit */
-#define  CTL_EECHK         0x10000000	/* EEPROM Present bit */
-#define  CTL_EERLD         0x20000000	/* EEPROM Reload Register */
-#define  CTL_RESET         0x40000000	/* !! Adapter Reset !! */
-#define  CTL_READY         0x80000000	/* Local Init Done */
-
-#define PLX_ID_REG	0x70	/*  hard-coded plx vendor and device ids */
-
-#define PLX_REVISION_REG	0x74	/*  silicon revision */
-
-#define PLX_DMA0_MODE_REG	0x80	/*  dma channel 0 mode register */
-#define PLX_DMA1_MODE_REG	0x94	/*  dma channel 0 mode register */
-#define  PLX_LOCAL_BUS_16_WIDE_BITS	0x1
-#define  PLX_LOCAL_BUS_32_WIDE_BITS	0x3
-#define  PLX_LOCAL_BUS_WIDTH_MASK	0x3
-#define  PLX_DMA_EN_READYIN_BIT	0x40	/*  enable ready in input */
-#define  PLX_EN_BTERM_BIT	0x80	/*  enable BTERM# input */
-#define  PLX_DMA_LOCAL_BURST_EN_BIT	0x100	/*  enable local burst mode */
-#define  PLX_EN_CHAIN_BIT	0x200	/*  enables chaining */
-/*  enables interrupt on dma done */
-#define  PLX_EN_DMA_DONE_INTR_BIT	0x400
-/*  hold local address constant (don't increment) */
-#define  PLX_LOCAL_ADDR_CONST_BIT	0x800
-/*  enables demand-mode for dma transfer */
-#define  PLX_DEMAND_MODE_BIT	0x1000
-#define  PLX_EOT_ENABLE_BIT	0x4000
-#define  PLX_STOP_MODE_BIT 0x8000
-/*  routes dma interrupt to pci bus (instead of local bus) */
-#define  PLX_DMA_INTR_PCI_BIT	0x20000
-
-/*  pci address that dma transfers start at */
-#define PLX_DMA0_PCI_ADDRESS_REG	0x84
-#define PLX_DMA1_PCI_ADDRESS_REG	0x98
-
-/*  local address that dma transfers start at */
-#define PLX_DMA0_LOCAL_ADDRESS_REG	0x88
-#define PLX_DMA1_LOCAL_ADDRESS_REG	0x9c
-
-/*  number of bytes to transfer (first 23 bits) */
-#define PLX_DMA0_TRANSFER_SIZE_REG	0x8c
-#define PLX_DMA1_TRANSFER_SIZE_REG	0xa0
-
-#define PLX_DMA0_DESCRIPTOR_REG	0x90	/*  descriptor pointer register */
-#define PLX_DMA1_DESCRIPTOR_REG	0xa4
-/*  descriptor is located in pci space (not local space) */
-#define  PLX_DESC_IN_PCI_BIT	0x1
-#define  PLX_END_OF_CHAIN_BIT	0x2	/*  end of chain bit */
-/*  interrupt when this descriptor's transfer is finished */
-#define  PLX_INTR_TERM_COUNT	0x4
-/*  transfer from local to pci bus (not pci to local) */
-#define  PLX_XFER_LOCAL_TO_PCI 0x8
-
-#define PLX_DMA0_CS_REG	0xa8	/*  command status register */
-#define PLX_DMA1_CS_REG	0xa9
-#define  PLX_DMA_EN_BIT	0x1	/*  enable dma channel */
-#define  PLX_DMA_START_BIT	0x2	/*  start dma transfer */
-#define  PLX_DMA_ABORT_BIT	0x4	/*  abort dma transfer */
-#define  PLX_CLEAR_DMA_INTR_BIT	0x8	/*  clear dma interrupt */
-#define  PLX_DMA_DONE_BIT	0x10	/*  transfer done status bit */
-
-#define PLX_DMA0_THRESHOLD_REG	0xb0	/*  command status register */
+#define PLX_MARBR_SUBSYSIDS	BIT(29)
+
+/* Big/Little Endian Descriptor Register */
+#define PLX_REG_BIGEND		0x000c
+
+/* Configuration Register Big Endian Mode */
+#define PLX_BIGEND_CONFIG	BIT(0)
+/* Direct Master Big Endian Mode */
+#define PLX_BIGEND_DM		BIT(1)
+/* Direct Slave Address Space 0 Big Endian Mode */
+#define PLX_BIGEND_DSAS0	BIT(2)
+/* Direct Slave Expansion ROM Big Endian Mode */
+#define PLX_BIGEND_EROM		BIT(3)
+/* Big Endian Byte Lane Mode - use most significant byte lanes */
+#define PLX_BIGEND_BEBLM	BIT(4)
+/* Direct Slave Address Space 1 Big Endian Mode */
+#define PLX_BIGEND_DSAS1	BIT(5)
+/* DMA Channel 1 Big Endian Mode */
+#define PLX_BIGEND_DMA1		BIT(6)
+/* DMA Channel 0 Big Endian Mode */
+#define PLX_BIGEND_DMA0		BIT(7)
+/* DMA Channel N Big Endian Mode (N <= 1) */
+#define PLX_BIGEND_DMA(n)	((n) ? PLX_BIGEND_DMA1 : PLX_BIGEND_DMA0)
 
 /*
- * Accesses near the end of memory can cause the PLX chip
- * to pre-fetch data off of end-of-ram.  Limit the size of
- * memory so host-side accesses cannot occur.
+ * Note: The Expansion ROM  stuff is only relevant to the PC environment.
+ *       This expansion ROM code is executed by the host CPU at boot time.
+ *       For this reason no bit definitions are provided here.
  */
 
-#define PLX_PREFETCH   32
+/* Expansion ROM Range Register */
+#define PLX_REG_EROMRR		0x0010
+/* Expansion ROM Local Base Address (Remap) Register */
+#define PLX_REG_EROMBA		0x0014
+
+/* Local Address Space 0/Expansion ROM Bus Region Descriptor Register */
+#define PLX_REG_LBRD0		0x0018
+/* Local Address Space 1 Bus Region Descriptor Register */
+#define PLX_REG_LBRD1		0x00f8
+
+/* Memory Space Local Bus Width */
+#define PLX_LBRD_MSWIDTH8	(BIT(0) * 0)	/* 8 bits wide */
+#define PLX_LBRD_MSWIDTH16	(BIT(0) * 1)	/* 16 bits wide */
+#define PLX_LBRD_MSWIDTH32	(BIT(0) * 2)	/* 32 bits wide */
+#define PLX_LBRD_MSWIDTH32A	(BIT(0) * 3)	/* 32 bits wide */
+#define PLX_LBRD_MSWIDTH_MASK	GENMASK(1, 0)
+#define PLX_LBRD_MSWIDTH_SHIFT	0
+/* Memory Space Internal Wait States */
+#define PLX_LBRD_MSIWS(x)	(BIT(2) * ((x) & 0xf))
+#define PLX_LBRD_MSIWS_MASK	GENMASK(5, 2)
+#define PLX_LBRD_MSIWS_SHIFT	2
+/* Memory Space Ready Input Enable */
+#define PLX_LBRD_MSREADYIEN	BIT(6)
+/* Memory Space BTERM# Input Enable */
+#define PLX_LBRD_MSBTERMIEN	BIT(7)
+/* Memory Space 0 Prefetch Disable (LBRD0 only) */
+#define PLX_LBRD0_MSPREDIS	BIT(8)
+/* Memory Space 1 Burst Enable (LBRD1 only) */
+#define PLX_LBRD1_MSBURSTEN	BIT(8)
+/* Expansion ROM Space Prefetch Disable (LBRD0 only) */
+#define PLX_LBRD0_EROMPREDIS	BIT(9)
+/* Memory Space 1 Prefetch Disable (LBRD1 only) */
+#define PLX_LBRD1_MSPREDIS	BIT(9)
+/* Read Prefetch Count Enable */
+#define PLX_LBRD_RPFCOUNTEN	BIT(10)
+/* Prefetch Counter */
+#define PLX_LBRD_PFCOUNT(x)	(BIT(11) * ((x) & 0xf))
+#define PLX_LBRD_PFCOUNT_MASK	GENMASK(14, 11)
+#define PLX_LBRD_PFCOUNT_SHIFT	11
+/* Expansion ROM Space Local Bus Width (LBRD0 only) */
+#define PLX_LBRD0_EROMWIDTH8	(BIT(16) * 0)	/* 8 bits wide */
+#define PLX_LBRD0_EROMWIDTH16	(BIT(16) * 1)	/* 16 bits wide */
+#define PLX_LBRD0_EROMWIDTH32	(BIT(16) * 2)	/* 32 bits wide */
+#define PLX_LBRD0_EROMWIDTH32A	(BIT(16) * 3)	/* 32 bits wide */
+#define PLX_LBRD0_EROMWIDTH_MASK	GENMASK(17, 16)
+#define PLX_LBRD0_EROMWIDTH_SHIFT	16
+/* Expansion ROM Space Internal Wait States (LBRD0 only) */
+#define PLX_LBRD0_EROMIWS(x)	(BIT(18) * ((x) & 0xf))
+#define PLX_LBRD0_EROMIWS_MASK	GENMASK(21, 18)
+#define PLX_LBRD0_EROMIWS_SHIFT	18
+/* Expansion ROM Space Ready Input Enable (LBDR0 only) */
+#define PLX_LBRD0_EROMREADYIEN	BIT(22)
+/* Expansion ROM Space BTERM# Input Enable (LBRD0 only) */
+#define PLX_LBRD0_EROMBTERMIEN	BIT(23)
+/* Memory Space 0 Burst Enable (LBRD0 only) */
+#define PLX_LBRD0_MSBURSTEN	BIT(24)
+/* Extra Long Load From Serial EEPROM  (LBRD0 only) */
+#define PLX_LBRD0_EELONGLOAD	BIT(25)
+/* Expansion ROM Space Burst Enable (LBRD0 only) */
+#define PLX_LBRD0_EROMBURSTEN	BIT(26)
+/* Direct Slave PCI Write Mode - assert TRDY# when FIFO full (LBRD0 only) */
+#define PLX_LBRD0_DSWMTRDY	BIT(27)
+/* PCI Target Retry Delay Clocks / 8 (LBRD0 only) */
+#define PLX_LBRD0_TRDELAY(x)	(BIT(28) * ((x) & 0xF))
+#define PLX_LBRD0_TRDELAY_MASK	GENMASK(31, 28)
+#define PLX_LBRD0_TRDELAY_SHIFT	28
+
+/* Local Range Register for Direct Master to PCI */
+#define PLX_REG_DMRR		0x001c
+
+/* Local Bus Base Address Register for Direct Master to PCI Memory */
+#define PLX_REG_DMLBAM		0x0020
+
+/* Local Base Address Register for Direct Master to PCI IO/CFG */
+#define PLX_REG_DMLBAI		0x0024
+
+/* PCI Base Address (Remap) Register for Direct Master to PCI Memory */
+#define PLX_REG_DMPBAM		0x0028
+
+/* Direct Master Memory Access Enable */
+#define PLX_DMPBAM_MEMACCEN	BIT(0)
+/* Direct Master I/O Access Enable */
+#define PLX_DMPBAM_IOACCEN	BIT(1)
+/* LLOCK# Input Enable */
+#define PLX_DMPBAM_LLOCKIEN	BIT(2)
+/* Direct Master Read Prefetch Size Control (bits 12, 3) */
+#define PLX_DMPBAM_RPSIZECONT	((BIT(12) * 0) | (BIT(3) * 0))
+#define PLX_DMPBAM_RPSIZE4	((BIT(12) * 0) | (BIT(3) * 1))
+#define PLX_DMPBAM_RPSIZE8	((BIT(12) * 1) | (BIT(3) * 0))
+#define PLX_DMPBAM_RPSIZE16	((BIT(12) * 1) | (BIT(3) * 1))
+#define PLX_DMPBAM_RPSIZE_MASK	(BIT(12) | BIT(3))
+/* Direct Master PCI Read Mode - deassert IRDY when FIFO full */
+#define PLX_DMPBAM_RMIRDY	BIT(4)
+/* Programmable Almost Full Level (bits 10, 8:5) */
+#define PLX_DMPBAM_PAFL(x)	((BIT(10) * !!((x) & 0x10)) | \
+				 (BIT(5) * ((x) & 0xf)))
+#define PLX_DMPBAM_TO_PAFL(v)	((((BIT(10) & (v)) >> 1) | \
+				  (GENMASK(8, 5) & (v))) >> 5)
+#define PLX_DMPBAM_PAFL_MASK	(BIT(10) | GENMASK(8, 5))
+/* Write And Invalidate Mode */
+#define PLX_DMPBAM_WIM		BIT(9)
+/* Direct Master Prefetch Limit */
+#define PLX_DBPBAM_PFLIMIT	BIT(11)
+/* I/O Remap Select */
+#define PLX_DMPBAM_IOREMAPSEL	BIT(13)
+/* Direct Master Write Delay */
+#define PLX_DMPBAM_WDELAYNONE	(BIT(14) * 0)
+#define PLX_DMPBAM_WDELAY4	(BIT(14) * 1)
+#define PLX_DMPBAM_WDELAY8	(BIT(14) * 2)
+#define PLX_DMPBAM_WDELAY16	(BIT(14) * 3)
+#define PLX_DMPBAM_WDELAY_MASK	GENMASK(15, 14)
+/* Remap of Local-to-PCI Space Into PCI Address Space */
+#define PLX_DMPBAM_REMAP_MASK	GENMASK(31, 16)
+
+/* PCI Configuration Address Register for Direct Master to PCI IO/CFG */
+#define PLX_REG_DMCFGA		0x002c
+
+/* Congiguration Type */
+#define PLX_DMCFGA_TYPE0	(BIT(0) * 0)
+#define PLX_DMCFGA_TYPE1	(BIT(0) * 1)
+#define PLX_DMCFGA_TYPE_MASK	GENMASK(1, 0)
+/* Register Number */
+#define PLX_DMCFGA_REGNUM(x)	(BIT(2) * ((x) & 0x3f))
+#define PLX_DMCFGA_REGNUM_MASK	GENMASK(7, 2)
+#define PLX_DMCFGA_REGNUM_SHIFT	2
+/* Function Number */
+#define PLX_DMCFGA_FUNCNUM(x)	(BIT(8) * ((x) & 0x7))
+#define PLX_DMCFGA_FUNCNUM_MASK	GENMASK(10, 8)
+#define PLX_DMCFGA_FUNCNUM_SHIFT 8
+/* Device Number */
+#define PLX_DMCFGA_DEVNUM(x)	(BIT(11) * ((x) & 0x1f))
+#define PLX_DMCFGA_DEVNUM_MASK	GENMASK(15, 11)
+#define PLX_DMCFGA_DEVNUM_SHIFT	11
+/* Bus Number */
+#define PLX_DMCFGA_BUSNUM(x)	(BIT(16) * ((x) & 0xff))
+#define PLX_DMCFGA_BUSNUM_MASK	GENMASK(23, 16)
+#define PLX_DMCFGA_BUSNUM_SHIFT	16
+/* Configuration Enable */
+#define PLX_DMCFGA_CONFIGEN	BIT(31)
 
 /*
- * The PCI Interface, via the PCI-9060 Chip, has up to eight (8) Mailbox
- * Registers.  The PUTS (Power-Up Test Suite) handles the board-side
- * interface/interaction using the first 4 registers.  Specifications for
- * the use of the full PUTS' command and status interface is contained
- * within a separate SBE PUTS Manual.  The Host-Side Device Driver only
- * uses a subset of the full PUTS interface.
+ * Mailbox Register N (N <= 7)
+ *
+ * Note that if the I2O feature is enabled (QSR[0] is set), Mailbox Register 0
+ * is replaced by the Inbound Queue Port, and Mailbox Register 1 is replaced
+ * by the Outbound Queue Port.  However, Mailbox Register 0 and 1 are always
+ * accessible at alternative offsets if the I2O feature is enabled.
  */
+#define PLX_REG_MBOX(n)		(0x0040 + (n) * 4)
+#define PLX_REG_MBOX0		PLX_REG_MBOX(0)
+#define PLX_REG_MBOX1		PLX_REG_MBOX(1)
+#define PLX_REG_MBOX2		PLX_REG_MBOX(2)
+#define PLX_REG_MBOX3		PLX_REG_MBOX(3)
+#define PLX_REG_MBOX4		PLX_REG_MBOX(4)
+#define PLX_REG_MBOX5		PLX_REG_MBOX(5)
+#define PLX_REG_MBOX6		PLX_REG_MBOX(6)
+#define PLX_REG_MBOX7		PLX_REG_MBOX(7)
+
+/* Alternative offsets for Mailbox Registers 0 and 1 (in case I2O is enabled) */
+#define PLX_REG_ALT_MBOX(n)	((n) < 2 ? 0x0078 + (n) * 4 : PLX_REG_MBOX(n))
+#define PLX_REG_ALT_MBOX0	PLX_REG_ALT_MBOX(0)
+#define PLX_REG_ALT_MBOX1	PLX_REG_ALT_MBOX(1)
+
+/* PCI-to-Local Doorbell Register */
+#define PLX_REG_P2LDBELL	0x0060
+
+/* Local-to-PCI Doorbell Register */
+#define PLX_REG_L2PDBELL	0x0064
+
+/* Interrupt Control/Status Register */
+#define PLX_REG_INTCSR		0x0068
+
+/* Enable Local Bus LSERR# when PCI Bus Target Abort or Master Abort occurs */
+#define PLX_INTCSR_LSEABORTEN	BIT(0)
+/* Enable Local Bus LSERR# when PCI parity error occurs */
+#define PLX_INTCSR_LSEPARITYEN	BIT(1)
+/* Generate PCI Bus SERR# when set to 1 */
+#define PLX_INTCSR_GENSERR	BIT(2)
+/* Mailbox Interrupt Enable (local bus interrupts on PCI write to MBOX0-3) */
+#define PLX_INTCSR_MBIEN	BIT(3)
+/* PCI Interrupt Enable */
+#define PLX_INTCSR_PIEN		BIT(8)
+/* PCI Doorbell Interrupt Enable */
+#define PLX_INTCSR_PDBIEN	BIT(9)
+/* PCI Abort Interrupt Enable */
+#define PLX_INTCSR_PABORTIEN	BIT(10)
+/* PCI Local Interrupt Enable */
+#define PLX_INTCSR_PLIEN	BIT(11)
+/* Retry Abort Enable (for diagnostic purposes only) */
+#define PLX_INTCSR_RAEN		BIT(12)
+/* PCI Doorbell Interrupt Active (read-only) */
+#define PLX_INTCSR_PDBIA	BIT(13)
+/* PCI Abort Interrupt Active (read-only) */
+#define PLX_INTCSR_PABORTIA	BIT(14)
+/* Local Interrupt (LINTi#) Active (read-only) */
+#define PLX_INTCSR_PLIA		BIT(15)
+/* Local Interrupt Output (LINTo#) Enable */
+#define PLX_INTCSR_LIOEN	BIT(16)
+/* Local Doorbell Interrupt Enable */
+#define PLX_INTCSR_LDBIEN	BIT(17)
+/* DMA Channel 0 Interrupt Enable */
+#define PLX_INTCSR_DMA0IEN	BIT(18)
+/* DMA Channel 1 Interrupt Enable */
+#define PLX_INTCSR_DMA1IEN	BIT(19)
+/* DMA Channel N Interrupt Enable (N <= 1) */
+#define PLX_INTCSR_DMAIEN(n)	((n) ? PLX_INTCSR_DMA1IEN : PLX_INTCSR_DMA0IEN)
+/* Local Doorbell Interrupt Active (read-only) */
+#define PLX_INTCSR_LDBIA	BIT(20)
+/* DMA Channel 0 Interrupt Active (read-only) */
+#define PLX_INTCSR_DMA0IA	BIT(21)
+/* DMA Channel 1 Interrupt Active (read-only) */
+#define PLX_INTCSR_DMA1IA	BIT(22)
+/* DMA Channel N Interrupt Active (N <= 1) (read-only) */
+#define PLX_INTCSR_DMAIA(n)	((n) ? PLX_INTCSR_DMA1IA : PLX_INTCSR_DMA0IA)
+/* BIST Interrupt Active (read-only) */
+#define PLX_INTCSR_BISTIA	BIT(23)
+/* Direct Master Not Bus Master During Master Or Target Abort (read-only) */
+#define PLX_INTCSR_ABNOTDM	BIT(24)
+/* DMA Channel 0 Not Bus Master During Master Or Target Abort (read-only) */
+#define PLX_INTCSR_ABNOTDMA0	BIT(25)
+/* DMA Channel 1 Not Bus Master During Master Or Target Abort (read-only) */
+#define PLX_INTCSR_ABNOTDMA1	BIT(26)
+/* DMA Channel N Not Bus Master During Master Or Target Abort (read-only) */
+#define PLX_INTCSR_ABNOTDMA(n)	((n) ? PLX_INTCSR_ABNOTDMA1 \
+				     : PLX_INTCSR_ABNOTDMA0)
+/* Target Abort Not Generated After 256 Master Retries (read-only) */
+#define PLX_INTCSR_ABNOTRETRY	BIT(27)
+/* PCI Wrote Mailbox 0 (enabled if bit 3 set) (read-only) */
+#define PLX_INTCSR_MB0IA	BIT(28)
+/* PCI Wrote Mailbox 1 (enabled if bit 3 set) (read-only) */
+#define PLX_INTCSR_MB1IA	BIT(29)
+/* PCI Wrote Mailbox 2 (enabled if bit 3 set) (read-only) */
+#define PLX_INTCSR_MB2IA	BIT(30)
+/* PCI Wrote Mailbox 3 (enabled if bit 3 set) (read-only) */
+#define PLX_INTCSR_MB3IA	BIT(31)
+/* PCI Wrote Mailbox N (N <= 3) (enabled if bit 3 set) (read-only) */
+#define PLX_INTCSR_MBIA(n)	BIT(28 + (n))
 
-/*****************************************/
-/***    MAILBOX #(-1) - MEM ACCESS STS ***/
-/*****************************************/
-
-#define MBX_STS_VALID      0x57584744	/* 'WXGD' */
-#define MBX_STS_DILAV      0x44475857	/* swapped = 'DGXW' */
-
-/*****************************************/
-/***    MAILBOX #0  -  PUTS STATUS     ***/
-/*****************************************/
-
-#define MBX_STS_MASK       0x000000ff	/* PUTS Status Register bits */
-#define MBX_STS_TMASK      0x0000000f	/* register bits for TEST number */
-
-#define MBX_STS_PCIRESET   0x00000100	/* Host issued PCI reset request */
-#define MBX_STS_BUSY       0x00000080	/* PUTS is in progress */
-#define MBX_STS_ERROR      0x00000040	/* PUTS has failed */
 /*
- * Undefined -> status in transition. We are in process of changing bits;
- * we SET Error bit before RESET of Busy bit
+ * Serial EEPROM Control, PCI Command Codes, User I/O Control,
+ * Init Control Register
  */
-#define MBX_STS_RESERVED   0x000000c0
-
-#define MBX_RESERVED_5     0x00000020	/* FYI: reserved/unused bit */
-#define MBX_RESERVED_4     0x00000010	/* FYI: reserved/unused bit */
-
-/******************************************/
-/***    MAILBOX #1  -  PUTS COMMANDS    ***/
-/******************************************/
-
+#define PLX_REG_CNTRL		0x006c
+
+/* PCI Read Command Code For DMA */
+#define PLX_CNTRL_CCRDMA(x)	(BIT(0) * ((x) & 0xf))
+#define PLX_CNTRL_CCRDMA_MASK	GENMASK(3, 0)
+#define PLX_CNTRL_CCRDMA_SHIFT	0
+#define PLX_CNTRL_CCRDMA_NORMAL	PLX_CNTRL_CCRDMA(14)	/* value after reset */
+/* PCI Write Command Code For DMA 0 */
+#define PLX_CNTRL_CCWDMA(x)	(BIT(4) * ((x) & 0xf))
+#define PLX_CNTRL_CCWDMA_MASK	GENMASK(7, 4)
+#define PLX_CNTRL_CCWDMA_SHIFT	4
+#define PLX_CNTRL_CCWDMA_NORMAL	PLX_CNTRL_CCWDMA(7)	/* value after reset */
+/* PCI Memory Read Command Code For Direct Master */
+#define PLX_CNTRL_CCRDM(x)	(BIT(8) * ((x) & 0xf))
+#define PLX_CNTRL_CCRDM_MASK	GENMASK(11, 8)
+#define PLX_CNTRL_CCRDM_SHIFT	8
+#define PLX_CNTRL_CCRDM_NORMAL	PLX_CNTRL_CCRDM(6)	/* value after reset */
+/* PCI Memory Write Command Code For Direct Master */
+#define PLX_CNTRL_CCWDM(x)	(BIT(12) * ((x) & 0xf))
+#define PLX_CNTRL_CCWDM_MASK	GENMASK(15, 12)
+#define PLX_CNTRL_CCWDM_SHIFT	12
+#define PLX_CNTRL_CCWDM_NORMAL	PLX_CNTRL_CCWDM(7)	/* value after reset */
+/* General Purpose Output (USERO) */
+#define PLX_CNTRL_USERO		BIT(16)
+/* General Purpose Input (USERI) (read-only) */
+#define PLX_CNTRL_USERI		BIT(17)
+/* Serial EEPROM Clock Output (EESK) */
+#define PLX_CNTRL_EESK		BIT(24)
+/* Serial EEPROM Chip Select Output (EECS) */
+#define PLX_CNTRL_EECS		BIT(25)
+/* Serial EEPROM Data Write Bit (EEDI (sic)) */
+#define PLX_CNTRL_EEWB		BIT(26)
+/* Serial EEPROM Data Read Bit (EEDO (sic)) (read-only) */
+#define PLX_CNTRL_EERB		BIT(27)
+/* Serial EEPROM Present (read-only) */
+#define PLX_CNTRL_EEPRESENT	BIT(28)
+/* Reload Configuration Registers from EEPROM */
+#define PLX_CNTRL_EERELOAD	BIT(29)
+/* PCI Adapter Software Reset (asserts LRESETo#) */
+#define PLX_CNTRL_RESET		BIT(30)
+/* Local Init Status (read-only) */
+#define PLX_CNTRL_INITDONE	BIT(31)
 /*
- * Any attempt to execute an unimplement command results in the PUTS
- * interface executing a NOOP and continuing as if the offending command
- * completed normally.  Note: this supplies a simple method to interrogate
- * mailbox command processing functionality.
+ * Combined command code stuff for convenience.
  */
+#define PLX_CNTRL_CC_MASK	\
+	(PLX_CNTRL_CCRDMA_MASK | PLX_CNTRL_CCWDMA_MASK | \
+	 PLX_CNTRL_CCRDM_MASK | PLX_CNTRL_CCWDM_MASK)
+#define PLX_CNTRL_CC_NORMAL	\
+	(PLX_CNTRL_CCRDMA_NORMAL | PLX_CNTRL_CCWDMA_NORMAL | \
+	 PLX_CNTRL_CCRDM_NORMAL | PLX_CNTRL_CCWDM_NORMAL) /* val after reset */
+
+/* PCI Permanent Configuration ID Register (hard-coded PLX vendor and device) */
+#define PLX_REG_PCIHIDR		0x0070
+
+/* Hard-coded ID for PLX PCI 9080 */
+#define PLX_PCIHIDR_9080	0x908010b5
+
+/* PCI Permanent Revision ID Register (hard-coded silicon revision) (8-bit). */
+#define PLX_REG_PCIHREV		0x0074
+
+/* DMA Channel N Mode Register (N <= 1) */
+#define PLX_REG_DMAMODE(n)	((n) ? PLX_REG_DMAMODE1 : PLX_REG_DMAMODE0)
+#define PLX_REG_DMAMODE0	0x0080
+#define PLX_REG_DMAMODE1	0x0094
+
+/* Local Bus Width */
+#define PLX_DMAMODE_WIDTH8	(BIT(0) * 0)	/* 8 bits wide */
+#define PLX_DMAMODE_WIDTH16	(BIT(0) * 1)	/* 16 bits wide */
+#define PLX_DMAMODE_WIDTH32	(BIT(0) * 2)	/* 32 bits wide */
+#define PLX_DMAMODE_WIDTH32A	(BIT(0) * 3)	/* 32 bits wide */
+#define PLX_DMAMODE_WIDTH_MASK	GENMASK(1, 0)
+#define PLX_DMAMODE_WIDTH_SHIFT	0
+/* Internal Wait States */
+#define PLX_DMAMODE_IWS(x)	(BIT(2) * ((x) & 0xf))
+#define PLX_DMAMODE_IWS_MASK	GENMASK(5, 2)
+#define PLX_DMAMODE_SHIFT	2
+/* Ready Input Enable */
+#define PLX_DMAMODE_READYIEN	BIT(6)
+/* BTERM# Input Enable */
+#define PLX_DMAMODE_BTERMIEN	BIT(7)
+/* Local Burst Enable */
+#define PLX_DMAMODE_BURSTEN	BIT(8)
+/* Chaining Enable */
+#define PLX_DMAMODE_CHAINEN	BIT(9)
+/* Done Interrupt Enable */
+#define PLX_DMAMODE_DONEIEN	BIT(10)
+/* Hold Local Address Constant */
+#define PLX_DMAMODE_LACONST	BIT(11)
+/* Demand Mode */
+#define PLX_DMAMODE_DEMAND	BIT(12)
+/* Write And Invalidate Mode */
+#define PLX_DMAMODE_WINVALIDATE	BIT(13)
+/* DMA EOT Enable - enables EOT0# or EOT1# input pin */
+#define PLX_DMAMODE_EOTEN	BIT(14)
+/* DMA Stop Data Transfer Mode - 0:BLAST; 1:EOT asserted or DREQ deasserted */
+#define PLX_DMAMODE_STOP	BIT(15)
+/* DMA Clear Count Mode - count in descriptor cleared on completion */
+#define PLX_DMAMODE_CLRCOUNT	BIT(16)
+/* DMA Channel Interrupt Select - 0:local bus interrupt; 1:PCI interrupt */
+#define PLX_DMAMODE_INTRPCI	BIT(17)
+
+/* DMA Channel N PCI Address Register (N <= 1) */
+#define PLX_REG_DMAPADR(n)	((n) ? PLX_REG_DMAPADR1 : PLX_REG_DMAPADR0)
+#define PLX_REG_DMAPADR0	0x0084
+#define PLX_REG_DMAPADR1	0x0098
+
+/* DMA Channel N Local Address Register (N <= 1) */
+#define PLX_REG_DMALADR(n)	((n) ? PLX_REG_DMALADR1 : PLX_REG_DMALADR0)
+#define PLX_REG_DMALADR0	0x0088
+#define PLX_REG_DMALADR1	0x009c
+
+/* DMA Channel N Transfer Size (Bytes) Register (N <= 1) (first 23 bits) */
+#define PLX_REG_DMASIZ(n)	((n) ? PLX_REG_DMASIZ1 : PLX_REG_DMASIZ0)
+#define PLX_REG_DMASIZ0		0x008c
+#define PLX_REG_DMASIZ1		0x00a0
+
+/* DMA Channel N Descriptor Pointer Register (N <= 1) */
+#define PLX_REG_DMADPR(n)	((n) ? PLX_REG_DMADPR1 : PLX_REG_DMADPR0)
+#define PLX_REG_DMADPR0		0x0090
+#define PLX_REG_DMADPR1		0x00a4
+
+/* Descriptor Located In PCI Address Space (not local address space) */
+#define PLX_DMADPR_DESCPCI	BIT(0)
+/* End Of Chain */
+#define PLX_DMADPR_CHAINEND	BIT(1)
+/* Interrupt After Terminal Count */
+#define PLX_DMADPR_TCINTR	BIT(2)
+/* Direction Of Transfer Local Bus To PCI (not PCI to local) */
+#define PLX_DMADPR_XFERL2P	BIT(3)
+/* Next Descriptor Address Bits 31:4 (16 byte boundary) */
+#define PLX_DMADPR_NEXT_MASK	GENMASK(31, 4)
+
+/* DMA Channel N Command/Status Register (N <= 1) (8-bit) */
+#define PLX_REG_DMACSR(n)	((n) ? PLX_REG_DMACSR1 : PLX_REG_DMACSR0)
+#define PLX_REG_DMACSR0		0x00a8
+#define PLX_REG_DMACSR1		0x00a9
+
+/* Channel Enable */
+#define PLX_DMACSR_ENABLE	BIT(0)
+/* Channel Start - write 1 to start transfer (write-only) */
+#define PLX_DMACSR_START	BIT(1)
+/* Channel Abort - write 1 to abort transfer (write-only) */
+#define PLX_DMACSR_ABORT	BIT(2)
+/* Clear Interrupt - write 1 to clear DMA Channel Interrupt (write-only) */
+#define PLX_DMACSR_CLEARINTR	BIT(3)
+/* Channel Done - transfer complete/inactive (read-only) */
+#define PLX_DMACSR_DONE		BIT(4)
+
+/* DMA Threshold Register */
+#define PLX_REG_DMATHR		0x00b0
 
-#define MBX_CMD_MASK       0xffff0000	/* PUTS Command Register bits */
-
-#define MBX_CMD_ABORTJ     0x85000000	/* abort and jump */
-#define MBX_CMD_RESETP     0x86000000	/* reset and pause at start */
-#define MBX_CMD_PAUSE      0x87000000	/* pause immediately */
-#define MBX_CMD_PAUSEC     0x88000000	/* pause on completion */
-#define MBX_CMD_RESUME     0x89000000	/* resume operation */
-#define MBX_CMD_STEP       0x8a000000	/* single step tests */
-
-#define MBX_CMD_BSWAP      0x8c000000	/* identify byte swap scheme */
-#define MBX_CMD_BSWAP_0    0x8c000000	/* use scheme 0 */
-#define MBX_CMD_BSWAP_1    0x8c000001	/* use scheme 1 */
-
-/* setup host memory access window size */
-#define MBX_CMD_SETHMS     0x8d000000
-/* setup host memory access base address */
-#define MBX_CMD_SETHBA     0x8e000000
-/* perform memory setup and continue (IE. Done) */
-#define MBX_CMD_MGO        0x8f000000
-#define MBX_CMD_NOOP       0xFF000000	/* dummy, illegal command */
-
-/*****************************************/
-/***    MAILBOX #2  -  MEMORY SIZE     ***/
-/*****************************************/
-
-#define MBX_MEMSZ_MASK     0xffff0000	/* PUTS Memory Size Register bits */
-
-#define MBX_MEMSZ_128KB    0x00020000	/* 128 kilobyte board */
-#define MBX_MEMSZ_256KB    0x00040000	/* 256 kilobyte board */
-#define MBX_MEMSZ_512KB    0x00080000	/* 512 kilobyte board */
-#define MBX_MEMSZ_1MB      0x00100000	/* 1 megabyte board */
-#define MBX_MEMSZ_2MB      0x00200000	/* 2 megabyte board */
-#define MBX_MEMSZ_4MB      0x00400000	/* 4 megabyte board */
-#define MBX_MEMSZ_8MB      0x00800000	/* 8 megabyte board */
-#define MBX_MEMSZ_16MB     0x01000000	/* 16 megabyte board */
-
-/***************************************/
-/***    MAILBOX #2  -  BOARD TYPE    ***/
-/***************************************/
-
-#define MBX_BTYPE_MASK          0x0000ffff	/* PUTS Board Type Register */
-/* PUTS Board Family Register */
-#define MBX_BTYPE_FAMILY_MASK   0x0000ff00
-#define MBX_BTYPE_SUBTYPE_MASK  0x000000ff	/* PUTS Board Subtype */
-
-#define MBX_BTYPE_PLX9060       0x00000100	/* PLX family type */
-#define MBX_BTYPE_PLX9080       0x00000300	/* PLX wanXL100s family type */
-
-#define MBX_BTYPE_WANXL_4       0x00000104	/* wanXL400, 4-port */
-#define MBX_BTYPE_WANXL_2       0x00000102	/* wanXL200, 2-port */
-#define MBX_BTYPE_WANXL_1s      0x00000301	/* wanXL100s, 1-port */
-#define MBX_BTYPE_WANXL_1t      0x00000401	/* wanXL100T1, 1-port */
+/*
+ * DMA Threshold constraints:
+ * (C0PLAF + 1) + (C0PLAE + 1) <= 32
+ * (C0LPAF + 1) + (C0LPAE + 1) <= 32
+ * (C1PLAF + 1) + (C1PLAE + 1) <= 16
+ * (C1LPAF + 1) + (C1LPAE + 1) <= 16
+ */
 
-/*****************************************/
-/***    MAILBOX #3  -  SHMQ MAILBOX    ***/
-/*****************************************/
+/* DMA Channel 0 PCI-to-Local Almost Full (divided by 2, minus 1) */
+#define PLX_DMATHR_C0PLAF(x)	(BIT(0) * ((x) & 0xf))
+#define PLX_DMATHR_C0PLAF_MASK	GENMASK(3, 0)
+#define PLX_DMATHR_C0PLAF_SHIFT	0
+/* DMA Channel 0 Local-to-PCI Almost Empty (divided by 2, minus 1) */
+#define PLX_DMATHR_C0LPAE(x)	(BIT(4) * ((x) & 0xf))
+#define PLX_DMATHR_C0LPAE_MASK	GENMASK(7, 4)
+#define PLX_DMATHR_C0LPAE_SHIFT	4
+/* DMA Channel 0 Local-to-PCI Almost Full (divided by 2, minus 1) */
+#define PLX_DMATHR_C0LPAF(x)	(BIT(8) * ((x) & 0xf))
+#define PLX_DMATHR_C0LPAF_MASK	GENMASK(11, 8)
+#define PLX_DMATHR_C0LPAF_SHIFT	8
+/* DMA Channel 0 PCI-to-Local Almost Empty (divided by 2, minus 1) */
+#define PLX_DMATHR_C0PLAE(x)	(BIT(12) * ((x) & 0xf))
+#define PLX_DMATHR_C0PLAE_MASK	GENMASK(15, 12)
+#define PLX_DMATHR_C0PLAE_SHIFT	12
+/* DMA Channel 1 PCI-to-Local Almost Full (divided by 2, minus 1) */
+#define PLX_DMATHR_C1PLAF(x)	(BIT(16) * ((x) & 0xf))
+#define PLX_DMATHR_C1PLAF_MASK	GENMASK(19, 16)
+#define PLX_DMATHR_C1PLAF_SHIFT	16
+/* DMA Channel 1 Local-to-PCI Almost Empty (divided by 2, minus 1) */
+#define PLX_DMATHR_C1LPAE(x)	(BIT(20) * ((x) & 0xf))
+#define PLX_DMATHR_C1LPAE_MASK	GENMASK(23, 20)
+#define PLX_DMATHR_C1LPAE_SHIFT	20
+/* DMA Channel 1 Local-to-PCI Almost Full (divided by 2, minus 1) */
+#define PLX_DMATHR_C1LPAF(x)	(BIT(24) * ((x) & 0xf))
+#define PLX_DMATHR_C1LPAF_MASK	GENMASK(27, 24)
+#define PLX_DMATHR_C1LPAF_SHIFT	24
+/* DMA Channel 1 PCI-to-Local Almost Empty (divided by 2, minus 1) */
+#define PLX_DMATHR_C1PLAE(x)	(BIT(28) * ((x) & 0xf))
+#define PLX_DMATHR_C1PLAE_MASK	GENMASK(31, 28)
+#define PLX_DMATHR_C1PLAE_SHIFT	28
 
-#define MBX_SMBX_MASK           0x000000ff	/* PUTS SHMQ Mailbox bits */
+/*
+ * Messaging Queue Registers OPLFIS, OPLFIM, IQP, OQP, MQCR, QBAR, IFHPR,
+ * IFTPR, IPHPR, IPTPR, OFHPR, OFTPR, OPHPR, OPTPR, and QSR have been omitted.
+ * They are used by the I2O feature.  (IQP and OQP occupy the usual offsets of
+ * the MBOX0 and MBOX1 registers if the I2O feature is enabled, but MBOX0 and
+ * MBOX1 are accessible via alternative offsets.
+ */
 
-/***************************************/
-/***    GENERIC HOST-SIDE DRIVER     ***/
-/***************************************/
+/* Queue Status/Control Register */
+#define PLX_REG_QSR		0x00e8
 
-#define MBX_ERR    0
-#define MBX_OK     1
+/* Value of QSR after reset - disables I2O feature completely. */
+#define PLX_QSR_VALUE_AFTER_RESET	0x00000050
 
-/* mailbox check routine - type of testing */
-#define MBXCHK_STS      0x00	/* check for PUTS status */
-#define MBXCHK_NOWAIT   0x01	/* dont care about PUTS status */
+/*
+ * Accesses near the end of memory can cause the PLX chip
+ * to pre-fetch data off of end-of-ram.  Limit the size of
+ * memory so host-side accesses cannot occur.
+ */
 
-/* system allocates this many bytes for address mapping mailbox space */
-#define MBX_ADDR_SPACE_360 0x80	/* wanXL100s/200/400 */
-#define MBX_ADDR_MASK_360 (MBX_ADDR_SPACE_360 - 1)
+#define PLX_PREFETCH   32
 
+/**
+ * plx9080_abort_dma - Abort a PLX PCI 9080 DMA transfer
+ * @iobase:	Remapped base address of configuration registers.
+ * @channel:	DMA channel number (0 or 1).
+ *
+ * Aborts the DMA transfer on the channel, which must have been enabled
+ * and started beforehand.
+ *
+ * Return:
+ *	%0 on success.
+ *	-%ETIMEDOUT if timed out waiting for abort to complete.
+ */
 static inline int plx9080_abort_dma(void __iomem *iobase, unsigned int channel)
 {
 	void __iomem *dma_cs_addr;
@@ -421,29 +631,26 @@ static inline int plx9080_abort_dma(void __iomem *iobase, unsigned int channel)
 	const int timeout = 10000;
 	unsigned int i;
 
-	if (channel)
-		dma_cs_addr = iobase + PLX_DMA1_CS_REG;
-	else
-		dma_cs_addr = iobase + PLX_DMA0_CS_REG;
+	dma_cs_addr = iobase + PLX_REG_DMACSR(channel);
 
-	/*  abort dma transfer if necessary */
+	/* abort dma transfer if necessary */
 	dma_status = readb(dma_cs_addr);
-	if ((dma_status & PLX_DMA_EN_BIT) == 0)
+	if ((dma_status & PLX_DMACSR_ENABLE) == 0)
 		return 0;
 
-	/*  wait to make sure done bit is zero */
-	for (i = 0; (dma_status & PLX_DMA_DONE_BIT) && i < timeout; i++) {
+	/* wait to make sure done bit is zero */
+	for (i = 0; (dma_status & PLX_DMACSR_DONE) && i < timeout; i++) {
 		udelay(1);
 		dma_status = readb(dma_cs_addr);
 	}
 	if (i == timeout)
 		return -ETIMEDOUT;
 
-	/*  disable and abort channel */
-	writeb(PLX_DMA_ABORT_BIT, dma_cs_addr);
-	/*  wait for dma done bit */
+	/* disable and abort channel */
+	writeb(PLX_DMACSR_ABORT, dma_cs_addr);
+	/* wait for dma done bit */
 	dma_status = readb(dma_cs_addr);
-	for (i = 0; (dma_status & PLX_DMA_DONE_BIT) == 0 && i < timeout; i++) {
+	for (i = 0; (dma_status & PLX_DMACSR_DONE) == 0 && i < timeout; i++) {
 		udelay(1);
 		dma_status = readb(dma_cs_addr);
 	}
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index e9e43139157d..802f51e46405 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -643,7 +643,7 @@ static int daqp_ao_insn_write(struct comedi_device *dev,
 	outb(0, dev->iobase + DAQP_AUX_REG);
 
 	for (i = 0; i > insn->n; i++) {
-		unsigned val = data[i];
+		unsigned int val = data[i];
 		int ret;
 
 		/* D/A transfer rate is about 8ms */
diff --git a/drivers/staging/comedi/drivers/rtd520.c b/drivers/staging/comedi/drivers/rtd520.c
index 9b6c56773247..e00e9c6268ae 100644
--- a/drivers/staging/comedi/drivers/rtd520.c
+++ b/drivers/staging/comedi/drivers/rtd520.c
@@ -362,7 +362,7 @@ struct rtd_private {
 	long ai_count;		/* total transfer size (samples) */
 	int xfer_count;		/* # to transfer data. 0->1/2FIFO */
 	int flags;		/* flag event modes */
-	unsigned fifosz;
+	unsigned int fifosz;
 
 	/* 8254 Timer/Counter gate and clock sources */
 	unsigned char timer_gate_src[3];
@@ -491,9 +491,9 @@ static void rtd_load_channelgain_list(struct comedi_device *dev,
 static int rtd520_probe_fifo_depth(struct comedi_device *dev)
 {
 	unsigned int chanspec = CR_PACK(0, 0, AREF_GROUND);
-	unsigned i;
-	static const unsigned limit = 0x2000;
-	unsigned fifo_size = 0;
+	unsigned int i;
+	static const unsigned int limit = 0x2000;
+	unsigned int fifo_size = 0;
 
 	writel(0, dev->mmio + LAS0_ADC_FIFO_CLEAR);
 	rtd_load_channelgain_list(dev, 1, &chanspec);
@@ -501,7 +501,7 @@ static int rtd520_probe_fifo_depth(struct comedi_device *dev)
 	writel(0, dev->mmio + LAS0_ADC_CONVERSION);
 	/* convert  samples */
 	for (i = 0; i < limit; ++i) {
-		unsigned fifo_status;
+		unsigned int fifo_status;
 		/* trigger conversion */
 		writew(0, dev->mmio + LAS0_ADC);
 		usleep_range(1, 1000);
@@ -1175,7 +1175,7 @@ static void rtd_reset(struct comedi_device *dev)
 
 	writel(0, dev->mmio + LAS0_BOARD_RESET);
 	usleep_range(100, 1000);	/* needed? */
-	writel(0, devpriv->lcfg + PLX_INTRCS_REG);
+	writel(0, devpriv->lcfg + PLX_REG_INTCSR);
 	writew(0, dev->mmio + LAS0_IT);
 	writew(~0, dev->mmio + LAS0_CLEAR);
 	readw(dev->mmio + LAS0_CLEAR);
@@ -1316,7 +1316,8 @@ static int rtd_auto_attach(struct comedi_device *dev,
 	devpriv->fifosz = ret;
 
 	if (dev->irq)
-		writel(ICS_PIE | ICS_PLIE, devpriv->lcfg + PLX_INTRCS_REG);
+		writel(PLX_INTCSR_PIEN | PLX_INTCSR_PLIEN,
+		       devpriv->lcfg + PLX_REG_INTCSR);
 
 	return 0;
 }
diff --git a/drivers/staging/comedi/drivers/s626.c b/drivers/staging/comedi/drivers/s626.c
index c5e08635e01e..4a87b4b52400 100644
--- a/drivers/staging/comedi/drivers/s626.c
+++ b/drivers/staging/comedi/drivers/s626.c
@@ -708,7 +708,7 @@ static uint16_t s626_get_mode_a(struct comedi_device *dev,
 	uint16_t cra;
 	uint16_t crb;
 	uint16_t setup;
-	unsigned cntsrc, clkmult, clkpol, encmode;
+	unsigned int cntsrc, clkmult, clkpol, encmode;
 
 	/* Fetch CRA and CRB register images. */
 	cra = s626_debi_read(dev, S626_LP_CRA(chan));
@@ -763,7 +763,7 @@ static uint16_t s626_get_mode_b(struct comedi_device *dev,
 	uint16_t cra;
 	uint16_t crb;
 	uint16_t setup;
-	unsigned cntsrc, clkmult, clkpol, encmode;
+	unsigned int cntsrc, clkmult, clkpol, encmode;
 
 	/* Fetch CRA and CRB register images. */
 	cra = s626_debi_read(dev, S626_LP_CRA(chan));
@@ -838,7 +838,7 @@ static void s626_set_mode_a(struct comedi_device *dev,
 	struct s626_private *devpriv = dev->private;
 	uint16_t cra;
 	uint16_t crb;
-	unsigned cntsrc, clkmult, clkpol;
+	unsigned int cntsrc, clkmult, clkpol;
 
 	/* Initialize CRA and CRB images. */
 	/* Preload trigger is passed through. */
@@ -916,7 +916,7 @@ static void s626_set_mode_b(struct comedi_device *dev,
 	struct s626_private *devpriv = dev->private;
 	uint16_t cra;
 	uint16_t crb;
-	unsigned cntsrc, clkmult, clkpol;
+	unsigned int cntsrc, clkmult, clkpol;
 
 	/* Initialize CRA and CRB images. */
 	/* IndexSrc is passed through. */
diff --git a/drivers/staging/comedi/drivers/s626.h b/drivers/staging/comedi/drivers/s626.h
index b83424e7507b..6a00a64c6f3a 100644
--- a/drivers/staging/comedi/drivers/s626.h
+++ b/drivers/staging/comedi/drivers/s626.h
@@ -29,8 +29,10 @@
 #define S626_ENCODER_CHANNELS   6
 #define S626_DIO_CHANNELS       48
 #define S626_DIO_BANKS		3	/* Number of DIO groups. */
-#define S626_DIO_EXTCHANS	40	/* Number of extended-capability
-					 * DIO channels. */
+#define S626_DIO_EXTCHANS	40	/*
+					 * Number of extended-capability
+					 * DIO channels.
+					 */
 
 #define S626_NUM_TRIMDACS	12	/* Number of valid TrimDAC channels. */
 
@@ -48,21 +50,29 @@
 #define S626_GSEL_BIPOLAR10V	0x00A0	/* S626_LP_GSEL setting 10V bipolar. */
 
 /* Error codes that must be visible to this base class. */
-#define S626_ERR_ILLEGAL_PARM	0x00010000	/* Illegal function parameter
-						 * value was specified. */
+#define S626_ERR_ILLEGAL_PARM	0x00010000	/*
+						 * Illegal function parameter
+						 * value was specified.
+						 */
 #define S626_ERR_I2C		0x00020000	/* I2C error. */
-#define S626_ERR_COUNTERSETUP	0x00200000	/* Illegal setup specified for
-						 * counter channel. */
+#define S626_ERR_COUNTERSETUP	0x00200000	/*
+						 * Illegal setup specified for
+						 * counter channel.
+						 */
 #define S626_ERR_DEBI_TIMEOUT	0x00400000	/* DEBI transfer timed out. */
 
 /*
  * Organization (physical order) and size (in DWORDs) of logical DMA buffers
  * contained by ANA_DMABUF.
  */
-#define S626_ADC_DMABUF_DWORDS	40	/* ADC DMA buffer must hold 16 samples,
-					 * plus pre/post garbage samples. */
-#define S626_DAC_WDMABUF_DWORDS	1	/* DAC output DMA buffer holds a single
-					 * sample. */
+#define S626_ADC_DMABUF_DWORDS	40	/*
+					 * ADC DMA buffer must hold 16 samples,
+					 * plus pre/post garbage samples.
+					 */
+#define S626_DAC_WDMABUF_DWORDS	1	/*
+					 * DAC output DMA buffer holds a single
+					 * sample.
+					 */
 
 /* All remaining space in 4KB DMA buffer is available for the RPS1 program. */
 
@@ -95,60 +105,90 @@
 #define S626_RPS_IRQ		0x60000000	/* IRQ */
 
 #define S626_RPS_LOGICAL_OR	0x08000000	/* Logical OR conditionals. */
-#define S626_RPS_INVERT		0x04000000	/* Test for negated
-						 * semaphores. */
+#define S626_RPS_INVERT		0x04000000	/*
+						 * Test for negated
+						 * semaphores.
+						 */
 #define S626_RPS_DEBI		0x00000002	/* DEBI done */
 
-#define S626_RPS_SIG0		0x00200000	/* RPS semaphore 0
-						 * (used by ADC). */
-#define S626_RPS_SIG1		0x00400000	/* RPS semaphore 1
-						 * (used by DAC). */
-#define S626_RPS_SIG2		0x00800000	/* RPS semaphore 2
-						 * (not used). */
+#define S626_RPS_SIG0		0x00200000	/*
+						 * RPS semaphore 0
+						 * (used by ADC).
+						 */
+#define S626_RPS_SIG1		0x00400000	/*
+						 * RPS semaphore 1
+						 * (used by DAC).
+						 */
+#define S626_RPS_SIG2		0x00800000	/*
+						 * RPS semaphore 2
+						 * (not used).
+						 */
 #define S626_RPS_GPIO2		0x00080000	/* RPS GPIO2 */
 #define S626_RPS_GPIO3		0x00100000	/* RPS GPIO3 */
 
-#define S626_RPS_SIGADC		S626_RPS_SIG0	/* Trigger/status for
-						 * ADC's RPS program. */
-#define S626_RPS_SIGDAC		S626_RPS_SIG1	/* Trigger/status for
-						 * DAC's RPS program. */
+#define S626_RPS_SIGADC		S626_RPS_SIG0	/*
+						 * Trigger/status for
+						 * ADC's RPS program.
+						 */
+#define S626_RPS_SIGDAC		S626_RPS_SIG1	/*
+						 * Trigger/status for
+						 * DAC's RPS program.
+						 */
 
 /* RPS clock parameters. */
-#define S626_RPSCLK_SCALAR	8	/* This is apparent ratio of
-					 * PCI/RPS clks (undocumented!!). */
+#define S626_RPSCLK_SCALAR	8	/*
+					 * This is apparent ratio of
+					 * PCI/RPS clks (undocumented!!).
+					 */
 #define S626_RPSCLK_PER_US	(33 / S626_RPSCLK_SCALAR)
-					/* Number of RPS clocks in one
-					 * microsecond. */
+					/*
+					 * Number of RPS clocks in one
+					 * microsecond.
+					 */
 
 /* Event counter source addresses. */
 #define S626_SBA_RPS_A0		0x27	/* Time of RPS0 busy, in PCI clocks. */
 
 /* GPIO constants. */
-#define S626_GPIO_BASE		0x10004000	/* GPIO 0,2,3 = inputs,
-						 * GPIO3 = IRQ; GPIO1 = out. */
+#define S626_GPIO_BASE		0x10004000	/*
+						 * GPIO 0,2,3 = inputs,
+						 * GPIO3 = IRQ; GPIO1 = out.
+						 */
 #define S626_GPIO1_LO		0x00000000	/* GPIO1 set to LOW. */
 #define S626_GPIO1_HI		0x00001000	/* GPIO1 set to HIGH. */
 
 /* Primary Status Register (PSR) constants. */
 #define S626_PSR_DEBI_E		0x00040000	/* DEBI event flag. */
 #define S626_PSR_DEBI_S		0x00080000	/* DEBI status flag. */
-#define S626_PSR_A2_IN		0x00008000	/* Audio output DMA2 protection
-						 * address reached. */
-#define S626_PSR_AFOU		0x00000800	/* Audio FIFO under/overflow
-						 * detected. */
-#define S626_PSR_GPIO2		0x00000020	/* GPIO2 input pin: 0=AdcBusy,
-						 * 1=AdcIdle. */
-#define S626_PSR_EC0S		0x00000001	/* Event counter 0 threshold
-						 * reached. */
+#define S626_PSR_A2_IN		0x00008000	/*
+						 * Audio output DMA2 protection
+						 * address reached.
+						 */
+#define S626_PSR_AFOU		0x00000800	/*
+						 * Audio FIFO under/overflow
+						 * detected.
+						 */
+#define S626_PSR_GPIO2		0x00000020	/*
+						 * GPIO2 input pin: 0=AdcBusy,
+						 * 1=AdcIdle.
+						 */
+#define S626_PSR_EC0S		0x00000001	/*
+						 * Event counter 0 threshold
+						 * reached.
+						 */
 
 /* Secondary Status Register (SSR) constants. */
-#define S626_SSR_AF2_OUT	0x00000200	/* Audio 2 output FIFO
-						 * under/overflow detected. */
+#define S626_SSR_AF2_OUT	0x00000200	/*
+						 * Audio 2 output FIFO
+						 * under/overflow detected.
+						 */
 
 /* Master Control Register 1 (MC1) constants. */
 #define S626_MC1_SOFT_RESET	0x80000000	/* Invoke 7146 soft reset. */
-#define S626_MC1_SHUTDOWN	0x3FFF0000	/* Shut down all MC1-controlled
-						 * enables. */
+#define S626_MC1_SHUTDOWN	0x3FFF0000	/*
+						 * Shut down all MC1-controlled
+						 * enables.
+						 */
 
 #define S626_MC1_ERPS1		0x2000	/* Enab/disable RPS task 1. */
 #define S626_MC1_ERPS0		0x1000	/* Enab/disable RPS task 0. */
@@ -177,15 +217,23 @@
 #define S626_P_DEBIAD		0x0088	/* DEBI target address. */
 #define S626_P_I2CCTRL		0x008C	/* I2C control. */
 #define S626_P_I2CSTAT		0x0090	/* I2C status. */
-#define S626_P_BASEA2_IN	0x00AC	/* Audio input 2 base physical DMAbuf
-					 * address. */
-#define S626_P_PROTA2_IN	0x00B0	/* Audio input 2 physical DMAbuf
-					 * protection address. */
+#define S626_P_BASEA2_IN	0x00AC	/*
+					 * Audio input 2 base physical DMAbuf
+					 * address.
+					 */
+#define S626_P_PROTA2_IN	0x00B0	/*
+					 * Audio input 2 physical DMAbuf
+					 * protection address.
+					 */
 #define S626_P_PAGEA2_IN	0x00B4	/* Audio input 2 paging attributes. */
-#define S626_P_BASEA2_OUT	0x00B8	/* Audio output 2 base physical DMAbuf
-					 * address. */
-#define S626_P_PROTA2_OUT	0x00BC	/* Audio output 2 physical DMAbuf
-					 * protection address. */
+#define S626_P_BASEA2_OUT	0x00B8	/*
+					 * Audio output 2 base physical DMAbuf
+					 * address.
+					 */
+#define S626_P_PROTA2_OUT	0x00BC	/*
+					 * Audio output 2 physical DMAbuf
+					 * protection address.
+					 */
 #define S626_P_PAGEA2_OUT	0x00C0	/* Audio output 2 paging attributes. */
 #define S626_P_RPSPAGE0		0x00C4	/* RPS0 page. */
 #define S626_P_RPSPAGE1		0x00C8	/* RPS1 page. */
@@ -205,8 +253,10 @@
 #define S626_P_PSR		0x0110	/* Primary status. */
 #define S626_P_SSR		0x0114	/* Secondary status. */
 #define S626_P_EC1R		0x0118	/* Event counter set 1. */
-#define S626_P_ADP4		0x0138	/* Logical audio DMA pointer of audio
-					 * input FIFO A2_IN. */
+#define S626_P_ADP4		0x0138	/*
+					 * Logical audio DMA pointer of audio
+					 * input FIFO A2_IN.
+					 */
 #define S626_P_FB_BUFFER1	0x0144	/* Audio feedback buffer 1. */
 #define S626_P_FB_BUFFER2	0x0148	/* Audio feedback buffer 2. */
 #define S626_P_TSL1		0x0180	/* Audio time slot list 1. */
@@ -243,13 +293,19 @@
 #define S626_LP_RDMISC2		0x0082	/* Read Misc2. */
 
 /* Bit masks for MISC1 register that are the same for reads and writes. */
-#define S626_MISC1_WENABLE	0x8000	/* enab writes to MISC2 (except Clear
-					 * Watchdog bit). */
+#define S626_MISC1_WENABLE	0x8000	/*
+					 * enab writes to MISC2 (except Clear
+					 * Watchdog bit).
+					 */
 #define S626_MISC1_WDISABLE	0x0000	/* Disable writes to MISC2. */
-#define S626_MISC1_EDCAP	0x1000	/* Enable edge capture on DIO chans
-					 * specified by S626_LP_WRCAPSELx. */
-#define S626_MISC1_NOEDCAP	0x0000	/* Disable edge capture on specified
-					 * DIO chans. */
+#define S626_MISC1_EDCAP	0x1000	/*
+					 * Enable edge capture on DIO chans
+					 * specified by S626_LP_WRCAPSELx.
+					 */
+#define S626_MISC1_NOEDCAP	0x0000	/*
+					 * Disable edge capture on specified
+					 * DIO chans.
+					 */
 
 /* Bit masks for MISC1 register reads. */
 #define S626_RDMISC1_WDTIMEOUT	0x4000	/* Watchdog timer timed out. */
@@ -268,35 +324,49 @@
 #define S626_A1_RUN		0x20000000	/* Run A1 based on TSL1. */
 #define S626_A1_SWAP		0x00200000	/* Use big-endian for A1. */
 #define S626_A2_SWAP		0x00100000	/* Use big-endian for A2. */
-#define S626_WS_MODES		0x00019999	/* WS0 = TSL1 trigger input,
-						 * WS1-WS4 = CS* outputs. */
-
-#if S626_PLATFORM == S626_INTEL		/* Base ACON1 config: always run
-					 * A1 based on TSL1. */
+#define S626_WS_MODES		0x00019999	/*
+						 * WS0 = TSL1 trigger input,
+						 * WS1-WS4 = CS* outputs.
+						 */
+
+#if S626_PLATFORM == S626_INTEL		/*
+					 * Base ACON1 config: always run
+					 * A1 based on TSL1.
+					 */
 #define S626_ACON1_BASE		(S626_WS_MODES | S626_A1_RUN)
 #elif S626_PLATFORM == S626_MOTOROLA
 #define S626_ACON1_BASE		\
 	(S626_WS_MODES | S626_A1_RUN | S626_A1_SWAP | S626_A2_SWAP)
 #endif
 
-#define S626_ACON1_ADCSTART	S626_ACON1_BASE	/* Start ADC: run A1
-						 * based on TSL1. */
+#define S626_ACON1_ADCSTART	S626_ACON1_BASE	/*
+						 * Start ADC: run A1
+						 * based on TSL1.
+						 */
 #define S626_ACON1_DACSTART	(S626_ACON1_BASE | S626_A2_RUN)
 /* Start transmit to DAC: run A2 based on TSL2. */
 #define S626_ACON1_DACSTOP	S626_ACON1_BASE	/* Halt A2. */
 
 /* Bit masks for ACON2 register. */
 #define S626_A1_CLKSRC_BCLK1	0x00000000	/* A1 bit rate = BCLK1 (ADC). */
-#define S626_A2_CLKSRC_X1	0x00800000	/* A2 bit rate = ACLK/1
-						 * (DACs). */
-#define S626_A2_CLKSRC_X2	0x00C00000	/* A2 bit rate = ACLK/2
-						 * (DACs). */
-#define S626_A2_CLKSRC_X4	0x01400000	/* A2 bit rate = ACLK/4
-						 * (DACs). */
+#define S626_A2_CLKSRC_X1	0x00800000	/*
+						 * A2 bit rate = ACLK/1
+						 * (DACs).
+						 */
+#define S626_A2_CLKSRC_X2	0x00C00000	/*
+						 * A2 bit rate = ACLK/2
+						 * (DACs).
+						 */
+#define S626_A2_CLKSRC_X4	0x01400000	/*
+						 * A2 bit rate = ACLK/4
+						 * (DACs).
+						 */
 #define S626_INVERT_BCLK2	0x00100000	/* Invert BCLK2 (DACs). */
 #define S626_BCLK2_OE		0x00040000	/* Enable BCLK2 (DACs). */
-#define S626_ACON2_XORMASK	0x000C0000	/* XOR mask for ACON2
-						 * active-low bits. */
+#define S626_ACON2_XORMASK	0x000C0000	/*
+						 * XOR mask for ACON2
+						 * active-low bits.
+						 */
 
 #define S626_ACON2_INIT		(S626_ACON2_XORMASK ^ \
 				 (S626_A1_CLKSRC_BCLK1 | S626_A2_CLKSRC_X2 | \
@@ -308,12 +378,18 @@
 #define S626_WS3		0x10000000
 #define S626_WS4		0x08000000
 #define S626_RSD1		0x01000000	/* Shift A1 data in on SD1. */
-#define S626_SDW_A1		0x00800000	/* Store rcv'd char at next char
-						 * slot of DWORD1 buffer. */
-#define S626_SIB_A1		0x00400000	/* Store rcv'd char at next
-						 * char slot of FB1 buffer. */
-#define S626_SF_A1		0x00200000	/* Write unsigned long
-						 * buffer to input FIFO. */
+#define S626_SDW_A1		0x00800000	/*
+						 * Store rcv'd char at next char
+						 * slot of DWORD1 buffer.
+						 */
+#define S626_SIB_A1		0x00400000	/*
+						 * Store rcv'd char at next
+						 * char slot of FB1 buffer.
+						 */
+#define S626_SF_A1		0x00200000	/*
+						 * Write unsigned long
+						 * buffer to input FIFO.
+						 */
 
 /* Select parallel-to-serial converter's data source: */
 #define S626_XFIFO_0		0x00000000	/* Data fifo byte 0. */
@@ -324,31 +400,45 @@
 #define S626_XFB1		0x00000050	/* FB_BUFFER byte 1. */
 #define S626_XFB2		0x00000060	/* FB_BUFFER byte 2. */
 #define S626_XFB3		0x00000070	/* FB_BUFFER byte 3. */
-#define S626_SIB_A2		0x00000200	/* Store next dword from A2's
+#define S626_SIB_A2		0x00000200	/*
+						 * Store next dword from A2's
 						 * input shifter to FB2
-						 * buffer. */
-#define S626_SF_A2		0x00000100	/* Store next dword from A2's
+						 * buffer.
+						 */
+#define S626_SF_A2		0x00000100	/*
+						 * Store next dword from A2's
 						 * input shifter to its input
-						 * fifo. */
-#define S626_LF_A2		0x00000080	/* Load next dword from A2's
+						 * fifo.
+						 */
+#define S626_LF_A2		0x00000080	/*
+						 * Load next dword from A2's
 						 * output fifo into its
-						 * output dword buffer. */
+						 * output dword buffer.
+						 */
 #define S626_XSD2		0x00000008	/* Shift data out on SD2. */
 #define S626_RSD3		0x00001800	/* Shift data in on SD3. */
 #define S626_RSD2		0x00001000	/* Shift data in on SD2. */
-#define S626_LOW_A2		0x00000002	/* Drive last SD low for 7 clks,
-						 * then tri-state. */
+#define S626_LOW_A2		0x00000002	/*
+						 * Drive last SD low for 7 clks,
+						 * then tri-state.
+						 */
 #define S626_EOS		0x00000001	/* End of superframe. */
 
 /* I2C configuration constants. */
-#define S626_I2C_CLKSEL		0x0400		/* I2C bit rate =
-						 * PCIclk/480 = 68.75 KHz. */
-#define S626_I2C_BITRATE	68.75		/* I2C bus data bit rate
+#define S626_I2C_CLKSEL		0x0400		/*
+						 * I2C bit rate =
+						 * PCIclk/480 = 68.75 KHz.
+						 */
+#define S626_I2C_BITRATE	68.75		/*
+						 * I2C bus data bit rate
 						 * (determined by
-						 * S626_I2C_CLKSEL) in KHz. */
-#define S626_I2C_WRTIME		15.0		/* Worst case time, in msec,
+						 * S626_I2C_CLKSEL) in KHz.
+						 */
+#define S626_I2C_WRTIME		15.0		/*
+						 * Worst case time, in msec,
 						 * for EEPROM internal write
-						 * op. */
+						 * op.
+						 */
 
 /* I2C manifest constants. */
 
@@ -368,8 +458,10 @@
 #define S626_I2C_B0(ATTR, VAL)	(((ATTR) << 2) | ((VAL) <<  8))
 
 /* DEBI command constants. */
-#define S626_DEBI_CMD_SIZE16	(2 << 17)	/* Transfer size is always
-						 * 2 bytes. */
+#define S626_DEBI_CMD_SIZE16	(2 << 17)	/*
+						 * Transfer size is always
+						 * 2 bytes.
+						 */
 #define S626_DEBI_CMD_READ	0x00010000	/* Read operation. */
 #define S626_DEBI_CMD_WRITE	0x00000000	/* Write operation. */
 
@@ -380,42 +472,58 @@
 #define S626_DEBI_CMD_WRWORD	(S626_DEBI_CMD_WRITE | S626_DEBI_CMD_SIZE16)
 
 /* DEBI configuration constants. */
-#define S626_DEBI_CFG_XIRQ_EN	0x80000000	/* Enable external interrupt
-						 * on GPIO3. */
+#define S626_DEBI_CFG_XIRQ_EN	0x80000000	/*
+						 * Enable external interrupt
+						 * on GPIO3.
+						 */
 #define S626_DEBI_CFG_XRESUME	0x40000000	/* Resume block */
-						/* Transfer when XIRQ
-						 * deasserted. */
+						/*
+						 * Transfer when XIRQ
+						 * deasserted.
+						 */
 #define S626_DEBI_CFG_TOQ	0x03C00000	/* Timeout (15 PCI cycles). */
 #define S626_DEBI_CFG_FAST	0x10000000	/* Fast mode enable. */
 
 /* 4-bit field that specifies DEBI timeout value in PCI clock cycles: */
-#define S626_DEBI_CFG_TOUT_BIT	22	/* Finish DEBI cycle after this many
-					 * clocks. */
+#define S626_DEBI_CFG_TOUT_BIT	22	/*
+					 * Finish DEBI cycle after this many
+					 * clocks.
+					 */
 
 /* 2-bit field that specifies Endian byte lane steering: */
-#define S626_DEBI_CFG_SWAP_NONE	0x00000000	/* Straight - don't swap any
-						 * bytes (Intel). */
+#define S626_DEBI_CFG_SWAP_NONE	0x00000000	/*
+						 * Straight - don't swap any
+						 * bytes (Intel).
+						 */
 #define S626_DEBI_CFG_SWAP_2	0x00100000	/* 2-byte swap (Motorola). */
 #define S626_DEBI_CFG_SWAP_4	0x00200000	/* 4-byte swap. */
-#define S626_DEBI_CFG_SLAVE16	0x00080000	/* Slave is able to serve
-						 * 16-bit cycles. */
-#define S626_DEBI_CFG_INC	0x00040000	/* Enable address increment
-						 * for block transfers. */
+#define S626_DEBI_CFG_SLAVE16	0x00080000	/*
+						 * Slave is able to serve
+						 * 16-bit cycles.
+						 */
+#define S626_DEBI_CFG_INC	0x00040000	/*
+						 * Enable address increment
+						 * for block transfers.
+						 */
 #define S626_DEBI_CFG_INTEL	0x00020000	/* Intel style local bus. */
 #define S626_DEBI_CFG_TIMEROFF	0x00010000	/* Disable timer. */
 
 #if S626_PLATFORM == S626_INTEL
 
-#define S626_DEBI_TOUT		7	/* Wait 7 PCI clocks (212 ns) before
-					 * polling RDY. */
+#define S626_DEBI_TOUT		7	/*
+					 * Wait 7 PCI clocks (212 ns) before
+					 * polling RDY.
+					 */
 
 /* Intel byte lane steering (pass through all byte lanes). */
 #define S626_DEBI_SWAP		S626_DEBI_CFG_SWAP_NONE
 
 #elif S626_PLATFORM == S626_MOTOROLA
 
-#define S626_DEBI_TOUT		15	/* Wait 15 PCI clocks (454 ns) maximum
-					 * before timing out. */
+#define S626_DEBI_TOUT		15	/*
+					 * Wait 15 PCI clocks (454 ns) maximum
+					 * before timing out.
+					 */
 
 /* Motorola byte lane steering. */
 #define S626_DEBI_SWAP		S626_DEBI_CFG_SWAP_2
@@ -429,10 +537,14 @@
 
 /* LoadSrc values: */
 #define S626_LOADSRC_INDX	0	/* Preload core in response to Index. */
-#define S626_LOADSRC_OVER	1	/* Preload core in response to
-					 * Overflow. */
-#define S626_LOADSRCB_OVERA	2	/* Preload B core in response to
-					 * A Overflow. */
+#define S626_LOADSRC_OVER	1	/*
+					 * Preload core in response to
+					 * Overflow.
+					 */
+#define S626_LOADSRCB_OVERA	2	/*
+					 * Preload B core in response to
+					 * A Overflow.
+					 */
 #define S626_LOADSRC_NONE	3	/* Never preload core. */
 
 /* IntSrc values: */
@@ -469,10 +581,14 @@
 #define S626_CNTSRC_SYSCLK_DOWN	3	/* System clock down */
 
 /* ClkPol values: */
-#define S626_CLKPOL_POS		0	/* Counter/Extender clock is
-					 * active high. */
-#define S626_CLKPOL_NEG		1	/* Counter/Extender clock is
-					 * active low. */
+#define S626_CLKPOL_POS		0	/*
+					 * Counter/Extender clock is
+					 * active high.
+					 */
+#define S626_CLKPOL_NEG		1	/*
+					 * Counter/Extender clock is
+					 * active low.
+					 */
 #define S626_CNTDIR_UP		0	/* Timer counts up. */
 #define S626_CNTDIR_DOWN	1	/* Timer counts down. */
 
@@ -488,8 +604,10 @@
 
 /* Sanity-check limits for parameters. */
 
-#define S626_NUM_COUNTERS	6	/* Maximum valid counter
-					 * logical channel number. */
+#define S626_NUM_COUNTERS	6	/*
+					 * Maximum valid counter
+					 * logical channel number.
+					 */
 #define S626_NUM_INTSOURCES	4
 #define S626_NUM_LATCHSOURCES	4
 #define S626_NUM_CLKMULTS	4
diff --git a/drivers/staging/comedi/drivers/serial2002.c b/drivers/staging/comedi/drivers/serial2002.c
index 7a1defcf2102..0d33e520f635 100644
--- a/drivers/staging/comedi/drivers/serial2002.c
+++ b/drivers/staging/comedi/drivers/serial2002.c
@@ -95,7 +95,7 @@ struct serial_data {
 #define S2002_CFG_SIGN(x)		(((x) >> 13) & 0x1)
 #define S2002_CFG_BASE(x)		(((x) >> 14) & 0xfffff)
 
-static long serial2002_tty_ioctl(struct file *f, unsigned op,
+static long serial2002_tty_ioctl(struct file *f, unsigned int op,
 				 unsigned long param)
 {
 	if (f->f_op->unlocked_ioctl)
@@ -379,7 +379,10 @@ static int serial2002_setup_subdevice(struct comedi_subdevice *s,
 				range_table_list[chan] =
 				    (const struct comedi_lrange *)&range[j];
 			}
-			maxdata_list[chan] = ((long long)1 << cfg[j].bits) - 1;
+			if (cfg[j].bits < 32)
+				maxdata_list[chan] = (1u << cfg[j].bits) - 1;
+			else
+				maxdata_list[chan] = 0xffffffff;
 			chan++;
 		}
 	}
diff --git a/drivers/staging/fsl-mc/bus/dpbp.c b/drivers/staging/fsl-mc/bus/dpbp.c
index c31fe1bca191..fe271fbd629b 100644
--- a/drivers/staging/fsl-mc/bus/dpbp.c
+++ b/drivers/staging/fsl-mc/bus/dpbp.c
@@ -1,4 +1,4 @@
-/* Copyright 2013-2014 Freescale Semiconductor Inc.
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -57,12 +57,14 @@ int dpbp_open(struct fsl_mc_io *mc_io,
 	      u16 *token)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_open *cmd_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_OPEN,
 					  cmd_flags, 0);
-	cmd.params[0] |= mc_enc(0, 32, dpbp_id);
+	cmd_params = (struct dpbp_cmd_open *)cmd.params;
+	cmd_params->dpbp_id = cpu_to_le32(dpbp_id);
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -70,7 +72,7 @@ int dpbp_open(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*token = MC_CMD_HDR_READ_TOKEN(cmd.header);
+	*token = mc_cmd_hdr_read_token(&cmd);
 
 	return err;
 }
@@ -143,7 +145,7 @@ int dpbp_create(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*token = MC_CMD_HDR_READ_TOKEN(cmd.header);
+	*token = mc_cmd_hdr_read_token(&cmd);
 
 	return 0;
 }
@@ -231,6 +233,7 @@ int dpbp_is_enabled(struct fsl_mc_io *mc_io,
 		    int *en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_rsp_is_enabled *rsp_params;
 	int err;
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_IS_ENABLED, cmd_flags,
@@ -242,7 +245,8 @@ int dpbp_is_enabled(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*en = (int)mc_dec(cmd.params[0], 0, 1);
+	rsp_params = (struct dpbp_rsp_is_enabled *)cmd.params;
+	*en = rsp_params->enabled & DPBP_ENABLE;
 
 	return 0;
 }
@@ -286,14 +290,16 @@ int dpbp_set_irq(struct fsl_mc_io *mc_io,
 		 struct dpbp_irq_cfg *irq_cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_set_irq *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_SET_IRQ,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 8, irq_index);
-	cmd.params[0] |= mc_enc(32, 32, irq_cfg->val);
-	cmd.params[1] |= mc_enc(0, 64, irq_cfg->addr);
-	cmd.params[2] |= mc_enc(0, 32, irq_cfg->irq_num);
+	cmd_params = (struct dpbp_cmd_set_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
+	cmd_params->irq_val = cpu_to_le32(irq_cfg->val);
+	cmd_params->irq_addr = cpu_to_le64(irq_cfg->addr);
+	cmd_params->irq_num = cpu_to_le32(irq_cfg->irq_num);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -319,12 +325,15 @@ int dpbp_get_irq(struct fsl_mc_io *mc_io,
 		 struct dpbp_irq_cfg *irq_cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_get_irq *cmd_params;
+	struct dpbp_rsp_get_irq *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_IRQ,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpbp_cmd_get_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -332,10 +341,12 @@ int dpbp_get_irq(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	irq_cfg->val = (u32)mc_dec(cmd.params[0], 0, 32);
-	irq_cfg->addr = (u64)mc_dec(cmd.params[1], 0, 64);
-	irq_cfg->irq_num = (int)mc_dec(cmd.params[2], 0, 32);
-	*type = (int)mc_dec(cmd.params[2], 32, 32);
+	rsp_params = (struct dpbp_rsp_get_irq *)cmd.params;
+	irq_cfg->val = le32_to_cpu(rsp_params->irq_val);
+	irq_cfg->addr = le64_to_cpu(rsp_params->irq_addr);
+	irq_cfg->irq_num = le32_to_cpu(rsp_params->irq_num);
+	*type = le32_to_cpu(rsp_params->type);
+
 	return 0;
 }
 
@@ -361,12 +372,14 @@ int dpbp_set_irq_enable(struct fsl_mc_io *mc_io,
 			u8 en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_set_irq_enable *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_SET_IRQ_ENABLE,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 8, en);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpbp_cmd_set_irq_enable *)cmd.params;
+	cmd_params->enable = en & DPBP_ENABLE;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -389,12 +402,15 @@ int dpbp_get_irq_enable(struct fsl_mc_io *mc_io,
 			u8 *en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_get_irq_enable *cmd_params;
+	struct dpbp_rsp_get_irq_enable *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_IRQ_ENABLE,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpbp_cmd_get_irq_enable *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -402,7 +418,8 @@ int dpbp_get_irq_enable(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*en = (u8)mc_dec(cmd.params[0], 0, 8);
+	rsp_params = (struct dpbp_rsp_get_irq_enable *)cmd.params;
+	*en = rsp_params->enabled & DPBP_ENABLE;
 	return 0;
 }
 
@@ -429,12 +446,14 @@ int dpbp_set_irq_mask(struct fsl_mc_io *mc_io,
 		      u32 mask)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_set_irq_mask *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_SET_IRQ_MASK,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, mask);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpbp_cmd_set_irq_mask *)cmd.params;
+	cmd_params->mask = cpu_to_le32(mask);
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -460,12 +479,15 @@ int dpbp_get_irq_mask(struct fsl_mc_io *mc_io,
 		      u32 *mask)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_get_irq_mask *cmd_params;
+	struct dpbp_rsp_get_irq_mask *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_IRQ_MASK,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpbp_cmd_get_irq_mask *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -473,7 +495,9 @@ int dpbp_get_irq_mask(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*mask = (u32)mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dpbp_rsp_get_irq_mask *)cmd.params;
+	*mask = le32_to_cpu(rsp_params->mask);
+
 	return 0;
 }
 
@@ -497,13 +521,16 @@ int dpbp_get_irq_status(struct fsl_mc_io *mc_io,
 			u32 *status)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_get_irq_status *cmd_params;
+	struct dpbp_rsp_get_irq_status *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_IRQ_STATUS,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, *status);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpbp_cmd_get_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(*status);
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -511,7 +538,9 @@ int dpbp_get_irq_status(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*status = (u32)mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dpbp_rsp_get_irq_status *)cmd.params;
+	*status = le32_to_cpu(rsp_params->status);
+
 	return 0;
 }
 
@@ -535,12 +564,14 @@ int dpbp_clear_irq_status(struct fsl_mc_io *mc_io,
 			  u32 status)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_clear_irq_status *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_CLEAR_IRQ_STATUS,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, status);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpbp_cmd_clear_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(status);
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -562,6 +593,7 @@ int dpbp_get_attributes(struct fsl_mc_io *mc_io,
 			struct dpbp_attr *attr)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_rsp_get_attributes *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -574,10 +606,12 @@ int dpbp_get_attributes(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	attr->bpid = (u16)mc_dec(cmd.params[0], 16, 16);
-	attr->id = (int)mc_dec(cmd.params[0], 32, 32);
-	attr->version.major = (u16)mc_dec(cmd.params[1], 0, 16);
-	attr->version.minor = (u16)mc_dec(cmd.params[1], 16, 16);
+	rsp_params = (struct dpbp_rsp_get_attributes *)cmd.params;
+	attr->bpid = le16_to_cpu(rsp_params->bpid);
+	attr->id = le32_to_cpu(rsp_params->id);
+	attr->version.major = le16_to_cpu(rsp_params->version_major);
+	attr->version.minor = le16_to_cpu(rsp_params->version_minor);
+
 	return 0;
 }
 EXPORT_SYMBOL(dpbp_get_attributes);
@@ -597,19 +631,19 @@ int dpbp_set_notifications(struct fsl_mc_io *mc_io,
 			   struct dpbp_notification_cfg	*cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_cmd_set_notifications *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPBP_CMDID_SET_NOTIFICATIONS,
-					  cmd_flags,
-					  token);
-
-	cmd.params[0] |= mc_enc(0, 32, cfg->depletion_entry);
-	cmd.params[0] |= mc_enc(32, 32, cfg->depletion_exit);
-	cmd.params[1] |= mc_enc(0, 32, cfg->surplus_entry);
-	cmd.params[1] |= mc_enc(32, 32, cfg->surplus_exit);
-	cmd.params[2] |= mc_enc(0, 16, cfg->options);
-	cmd.params[3] |= mc_enc(0, 64, cfg->message_ctx);
-	cmd.params[4] |= mc_enc(0, 64, cfg->message_iova);
+					  cmd_flags, token);
+	cmd_params = (struct dpbp_cmd_set_notifications *)cmd.params;
+	cmd_params->depletion_entry = cpu_to_le32(cfg->depletion_entry);
+	cmd_params->depletion_exit = cpu_to_le32(cfg->depletion_exit);
+	cmd_params->surplus_entry = cpu_to_le32(cfg->surplus_entry);
+	cmd_params->surplus_exit = cpu_to_le32(cfg->surplus_exit);
+	cmd_params->options = cpu_to_le16(cfg->options);
+	cmd_params->message_ctx = cpu_to_le64(cfg->message_ctx);
+	cmd_params->message_iova = cpu_to_le64(cfg->message_iova);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -630,6 +664,7 @@ int dpbp_get_notifications(struct fsl_mc_io *mc_io,
 			   struct dpbp_notification_cfg	*cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpbp_rsp_get_notifications *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -643,13 +678,14 @@ int dpbp_get_notifications(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	cfg->depletion_entry = (u32)mc_dec(cmd.params[0], 0, 32);
-	cfg->depletion_exit = (u32)mc_dec(cmd.params[0], 32, 32);
-	cfg->surplus_entry = (u32)mc_dec(cmd.params[1], 0, 32);
-	cfg->surplus_exit = (u32)mc_dec(cmd.params[1], 32, 32);
-	cfg->options = (u16)mc_dec(cmd.params[2], 0, 16);
-	cfg->message_ctx = (u64)mc_dec(cmd.params[3], 0, 64);
-	cfg->message_iova = (u64)mc_dec(cmd.params[4], 0, 64);
+	rsp_params = (struct dpbp_rsp_get_notifications *)cmd.params;
+	cfg->depletion_entry = le32_to_cpu(rsp_params->depletion_entry);
+	cfg->depletion_exit = le32_to_cpu(rsp_params->depletion_exit);
+	cfg->surplus_entry = le32_to_cpu(rsp_params->surplus_entry);
+	cfg->surplus_exit = le32_to_cpu(rsp_params->surplus_exit);
+	cfg->options = le16_to_cpu(rsp_params->options);
+	cfg->message_ctx = le64_to_cpu(rsp_params->message_ctx);
+	cfg->message_iova = le64_to_cpu(rsp_params->message_iova);
 
 	return 0;
 }
diff --git a/drivers/staging/fsl-mc/bus/dpmcp-cmd.h b/drivers/staging/fsl-mc/bus/dpmcp-cmd.h
index c9b52dd7ba31..d098a6d8f6bc 100644
--- a/drivers/staging/fsl-mc/bus/dpmcp-cmd.h
+++ b/drivers/staging/fsl-mc/bus/dpmcp-cmd.h
@@ -1,4 +1,4 @@
-/* Copyright 2013-2015 Freescale Semiconductor Inc.
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -53,4 +53,88 @@
 #define DPMCP_CMDID_GET_IRQ_MASK			0x015
 #define DPMCP_CMDID_GET_IRQ_STATUS			0x016
 
+struct dpmcp_cmd_open {
+	__le32 dpmcp_id;
+};
+
+struct dpmcp_cmd_create {
+	__le32 portal_id;
+};
+
+struct dpmcp_cmd_set_irq {
+	/* cmd word 0 */
+	u8 irq_index;
+	u8 pad[3];
+	__le32 irq_val;
+	/* cmd word 1 */
+	__le64 irq_addr;
+	/* cmd word 2 */
+	__le32 irq_num;
+};
+
+struct dpmcp_cmd_get_irq {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpmcp_rsp_get_irq {
+	/* cmd word 0 */
+	__le32 irq_val;
+	__le32 pad;
+	/* cmd word 1 */
+	__le64 irq_paddr;
+	/* cmd word 2 */
+	__le32 irq_num;
+	__le32 type;
+};
+
+#define DPMCP_ENABLE		0x1
+
+struct dpmcp_cmd_set_irq_enable {
+	u8 enable;
+	u8 pad[3];
+	u8 irq_index;
+};
+
+struct dpmcp_cmd_get_irq_enable {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpmcp_rsp_get_irq_enable {
+	u8 enabled;
+};
+
+struct dpmcp_cmd_set_irq_mask {
+	__le32 mask;
+	u8 irq_index;
+};
+
+struct dpmcp_cmd_get_irq_mask {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpmcp_rsp_get_irq_mask {
+	__le32 mask;
+};
+
+struct dpmcp_cmd_get_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dpmcp_rsp_get_irq_status {
+	__le32 status;
+};
+
+struct dpmcp_rsp_get_attributes {
+	/* response word 0 */
+	__le32 pad;
+	__le32 id;
+	/* response word 1 */
+	__le16 version_major;
+	__le16 version_minor;
+};
+
 #endif /* _FSL_DPMCP_CMD_H */
diff --git a/drivers/staging/fsl-mc/bus/dpmcp.c b/drivers/staging/fsl-mc/bus/dpmcp.c
index fd6dd4e07b87..06440176243a 100644
--- a/drivers/staging/fsl-mc/bus/dpmcp.c
+++ b/drivers/staging/fsl-mc/bus/dpmcp.c
@@ -1,4 +1,4 @@
-/* Copyright 2013-2015 Freescale Semiconductor Inc.
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -57,12 +57,14 @@ int dpmcp_open(struct fsl_mc_io *mc_io,
 	       u16 *token)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_open *cmd_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_OPEN,
 					  cmd_flags, 0);
-	cmd.params[0] |= mc_enc(0, 32, dpmcp_id);
+	cmd_params = (struct dpmcp_cmd_open *)cmd.params;
+	cmd_params->dpmcp_id = cpu_to_le32(dpmcp_id);
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -70,7 +72,7 @@ int dpmcp_open(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*token = MC_CMD_HDR_READ_TOKEN(cmd.header);
+	*token = mc_cmd_hdr_read_token(&cmd);
 
 	return err;
 }
@@ -127,12 +129,15 @@ int dpmcp_create(struct fsl_mc_io *mc_io,
 		 u16 *token)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_create *cmd_params;
+
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_CREATE,
 					  cmd_flags, 0);
-	cmd.params[0] |= mc_enc(0, 32, cfg->portal_id);
+	cmd_params = (struct dpmcp_cmd_create *)cmd.params;
+	cmd_params->portal_id = cpu_to_le32(cfg->portal_id);
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -140,7 +145,7 @@ int dpmcp_create(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*token = MC_CMD_HDR_READ_TOKEN(cmd.header);
+	*token = mc_cmd_hdr_read_token(&cmd);
 
 	return 0;
 }
@@ -206,14 +211,16 @@ int dpmcp_set_irq(struct fsl_mc_io *mc_io,
 		  struct dpmcp_irq_cfg	*irq_cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_set_irq *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_SET_IRQ,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 8, irq_index);
-	cmd.params[0] |= mc_enc(32, 32, irq_cfg->val);
-	cmd.params[1] |= mc_enc(0, 64, irq_cfg->paddr);
-	cmd.params[2] |= mc_enc(0, 32, irq_cfg->irq_num);
+	cmd_params = (struct dpmcp_cmd_set_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
+	cmd_params->irq_val = cpu_to_le32(irq_cfg->val);
+	cmd_params->irq_addr = cpu_to_le64(irq_cfg->paddr);
+	cmd_params->irq_num = cpu_to_le32(irq_cfg->irq_num);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -239,12 +246,15 @@ int dpmcp_get_irq(struct fsl_mc_io *mc_io,
 		  struct dpmcp_irq_cfg	*irq_cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_get_irq *cmd_params;
+	struct dpmcp_rsp_get_irq *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_IRQ,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpmcp_cmd_get_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -252,10 +262,11 @@ int dpmcp_get_irq(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	irq_cfg->val = (u32)mc_dec(cmd.params[0], 0, 32);
-	irq_cfg->paddr = (u64)mc_dec(cmd.params[1], 0, 64);
-	irq_cfg->irq_num = (int)mc_dec(cmd.params[2], 0, 32);
-	*type = (int)mc_dec(cmd.params[2], 32, 32);
+	rsp_params = (struct dpmcp_rsp_get_irq *)cmd.params;
+	irq_cfg->val = le32_to_cpu(rsp_params->irq_val);
+	irq_cfg->paddr = le64_to_cpu(rsp_params->irq_paddr);
+	irq_cfg->irq_num = le32_to_cpu(rsp_params->irq_num);
+	*type = le32_to_cpu(rsp_params->type);
 	return 0;
 }
 
@@ -281,12 +292,14 @@ int dpmcp_set_irq_enable(struct fsl_mc_io *mc_io,
 			 u8 en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_set_irq_enable *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_SET_IRQ_ENABLE,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 8, en);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpmcp_cmd_set_irq_enable *)cmd.params;
+	cmd_params->enable = en & DPMCP_ENABLE;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -309,12 +322,15 @@ int dpmcp_get_irq_enable(struct fsl_mc_io *mc_io,
 			 u8 *en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_get_irq_enable *cmd_params;
+	struct dpmcp_rsp_get_irq_enable *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_IRQ_ENABLE,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpmcp_cmd_get_irq_enable *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -322,7 +338,8 @@ int dpmcp_get_irq_enable(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*en = (u8)mc_dec(cmd.params[0], 0, 8);
+	rsp_params = (struct dpmcp_rsp_get_irq_enable *)cmd.params;
+	*en = rsp_params->enabled & DPMCP_ENABLE;
 	return 0;
 }
 
@@ -349,12 +366,15 @@ int dpmcp_set_irq_mask(struct fsl_mc_io *mc_io,
 		       u32 mask)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_set_irq_mask *cmd_params;
+
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_SET_IRQ_MASK,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, mask);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpmcp_cmd_set_irq_mask *)cmd.params;
+	cmd_params->mask = cpu_to_le32(mask);
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -380,12 +400,16 @@ int dpmcp_get_irq_mask(struct fsl_mc_io *mc_io,
 		       u32 *mask)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_get_irq_mask *cmd_params;
+	struct dpmcp_rsp_get_irq_mask *rsp_params;
+
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_IRQ_MASK,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpmcp_cmd_get_irq_mask *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -393,7 +417,9 @@ int dpmcp_get_irq_mask(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*mask = (u32)mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dpmcp_rsp_get_irq_mask *)cmd.params;
+	*mask = le32_to_cpu(rsp_params->mask);
+
 	return 0;
 }
 
@@ -417,12 +443,16 @@ int dpmcp_get_irq_status(struct fsl_mc_io *mc_io,
 			 u32 *status)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_cmd_get_irq_status *cmd_params;
+	struct dpmcp_rsp_get_irq_status *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_IRQ_STATUS,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dpmcp_cmd_get_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(*status);
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -430,7 +460,9 @@ int dpmcp_get_irq_status(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*status = (u32)mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dpmcp_rsp_get_irq_status *)cmd.params;
+	*status = le32_to_cpu(rsp_params->status);
+
 	return 0;
 }
 
@@ -450,6 +482,7 @@ int dpmcp_get_attributes(struct fsl_mc_io *mc_io,
 			 struct dpmcp_attr *attr)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmcp_rsp_get_attributes *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -462,8 +495,10 @@ int dpmcp_get_attributes(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	attr->id = (int)mc_dec(cmd.params[0], 32, 32);
-	attr->version.major = (u16)mc_dec(cmd.params[1], 0, 16);
-	attr->version.minor = (u16)mc_dec(cmd.params[1], 16, 16);
+	rsp_params = (struct dpmcp_rsp_get_attributes *)cmd.params;
+	attr->id = le32_to_cpu(rsp_params->id);
+	attr->version.major = le16_to_cpu(rsp_params->version_major);
+	attr->version.minor = le16_to_cpu(rsp_params->version_minor);
+
 	return 0;
 }
diff --git a/drivers/staging/fsl-mc/bus/dpmng-cmd.h b/drivers/staging/fsl-mc/bus/dpmng-cmd.h
index ba8cfa9635dd..779bf9c25bc0 100644
--- a/drivers/staging/fsl-mc/bus/dpmng-cmd.h
+++ b/drivers/staging/fsl-mc/bus/dpmng-cmd.h
@@ -1,4 +1,4 @@
-/* Copyright 2013-2014 Freescale Semiconductor Inc.
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -44,4 +44,14 @@
 #define DPMNG_CMDID_GET_CONT_ID			0x830
 #define DPMNG_CMDID_GET_VERSION			0x831
 
+struct dpmng_rsp_get_container_id {
+	__le32 container_id;
+};
+
+struct dpmng_rsp_get_version {
+	__le32 revision;
+	__le32 version_major;
+	__le32 version_minor;
+};
+
 #endif /* __FSL_DPMNG_CMD_H */
diff --git a/drivers/staging/fsl-mc/bus/dpmng.c b/drivers/staging/fsl-mc/bus/dpmng.c
index f633fcd86e51..660bbe7ea899 100644
--- a/drivers/staging/fsl-mc/bus/dpmng.c
+++ b/drivers/staging/fsl-mc/bus/dpmng.c
@@ -1,4 +1,4 @@
-/* Copyright 2013-2014 Freescale Semiconductor Inc.
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -48,6 +48,7 @@ int mc_get_version(struct fsl_mc_io *mc_io,
 		   struct mc_version *mc_ver_info)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmng_rsp_get_version *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -61,12 +62,14 @@ int mc_get_version(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	mc_ver_info->revision = mc_dec(cmd.params[0], 0, 32);
-	mc_ver_info->major = mc_dec(cmd.params[0], 32, 32);
-	mc_ver_info->minor = mc_dec(cmd.params[1], 0, 32);
+	rsp_params = (struct dpmng_rsp_get_version *)cmd.params;
+	mc_ver_info->revision = le32_to_cpu(rsp_params->revision);
+	mc_ver_info->major = le32_to_cpu(rsp_params->version_major);
+	mc_ver_info->minor = le32_to_cpu(rsp_params->version_minor);
 
 	return 0;
 }
+EXPORT_SYMBOL(mc_get_version);
 
 /**
  * dpmng_get_container_id() - Get container ID associated with a given portal.
@@ -81,6 +84,7 @@ int dpmng_get_container_id(struct fsl_mc_io *mc_io,
 			   int *container_id)
 {
 	struct mc_command cmd = { 0 };
+	struct dpmng_rsp_get_container_id *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -94,7 +98,8 @@ int dpmng_get_container_id(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*container_id = mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dpmng_rsp_get_container_id *)cmd.params;
+	*container_id = le32_to_cpu(rsp_params->container_id);
 
 	return 0;
 }
diff --git a/drivers/staging/fsl-mc/bus/dprc-cmd.h b/drivers/staging/fsl-mc/bus/dprc-cmd.h
index 9b854fa8e84d..bb127f4a3ae7 100644
--- a/drivers/staging/fsl-mc/bus/dprc-cmd.h
+++ b/drivers/staging/fsl-mc/bus/dprc-cmd.h
@@ -1,4 +1,4 @@
-/* Copyright 2013-2014 Freescale Semiconductor Inc.
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -84,4 +84,381 @@
 
 #define DPRC_CMDID_GET_CONNECTION		0x16C
 
+struct dprc_cmd_open {
+	__le32 container_id;
+};
+
+struct dprc_cmd_create_container {
+	/* cmd word 0 */
+	__le32 options;
+	__le16 icid;
+	__le16 pad0;
+	/* cmd word 1 */
+	__le32 pad1;
+	__le32 portal_id;
+	/* cmd words 2-3 */
+	u8 label[16];
+};
+
+struct dprc_rsp_create_container {
+	/* response word 0 */
+	__le64 pad0;
+	/* response word 1 */
+	__le32 child_container_id;
+	__le32 pad1;
+	/* response word 2 */
+	__le64 child_portal_addr;
+};
+
+struct dprc_cmd_destroy_container {
+	__le32 child_container_id;
+};
+
+struct dprc_cmd_reset_container {
+	__le32 child_container_id;
+};
+
+struct dprc_cmd_set_irq {
+	/* cmd word 0 */
+	__le32 irq_val;
+	u8 irq_index;
+	u8 pad[3];
+	/* cmd word 1 */
+	__le64 irq_addr;
+	/* cmd word 2 */
+	__le32 irq_num;
+};
+
+struct dprc_cmd_get_irq {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dprc_rsp_get_irq {
+	/* response word 0 */
+	__le32 irq_val;
+	__le32 pad;
+	/* response word 1 */
+	__le64 irq_addr;
+	/* response word 2 */
+	__le32 irq_num;
+	__le32 type;
+};
+
+#define DPRC_ENABLE		0x1
+
+struct dprc_cmd_set_irq_enable {
+	u8 enable;
+	u8 pad[3];
+	u8 irq_index;
+};
+
+struct dprc_cmd_get_irq_enable {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dprc_rsp_get_irq_enable {
+	u8 enabled;
+};
+
+struct dprc_cmd_set_irq_mask {
+	__le32 mask;
+	u8 irq_index;
+};
+
+struct dprc_cmd_get_irq_mask {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dprc_rsp_get_irq_mask {
+	__le32 mask;
+};
+
+struct dprc_cmd_get_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dprc_rsp_get_irq_status {
+	__le32 status;
+};
+
+struct dprc_cmd_clear_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dprc_rsp_get_attributes {
+	/* response word 0 */
+	__le32 container_id;
+	__le16 icid;
+	__le16 pad;
+	/* response word 1 */
+	__le32 options;
+	__le32 portal_id;
+	/* response word 2 */
+	__le16 version_major;
+	__le16 version_minor;
+};
+
+struct dprc_cmd_set_res_quota {
+	/* cmd word 0 */
+	__le32 child_container_id;
+	__le16 quota;
+	__le16 pad;
+	/* cmd words 1-2 */
+	u8 type[16];
+};
+
+struct dprc_cmd_get_res_quota {
+	/* cmd word 0 */
+	__le32 child_container_id;
+	__le32 pad;
+	/* cmd word 1-2 */
+	u8 type[16];
+};
+
+struct dprc_rsp_get_res_quota {
+	__le32 pad;
+	__le16 quota;
+};
+
+struct dprc_cmd_assign {
+	/* cmd word 0 */
+	__le32 container_id;
+	__le32 options;
+	/* cmd word 1 */
+	__le32 num;
+	__le32 id_base_align;
+	/* cmd word 2-3 */
+	u8 type[16];
+};
+
+struct dprc_cmd_unassign {
+	/* cmd word 0 */
+	__le32 child_container_id;
+	__le32 options;
+	/* cmd word 1 */
+	__le32 num;
+	__le32 id_base_align;
+	/* cmd word 2-3 */
+	u8 type[16];
+};
+
+struct dprc_rsp_get_pool_count {
+	__le32 pool_count;
+};
+
+struct dprc_cmd_get_pool {
+	__le32 pool_index;
+};
+
+struct dprc_rsp_get_pool {
+	/* response word 0 */
+	__le64 pad;
+	/* response word 1-2 */
+	u8 type[16];
+};
+
+struct dprc_rsp_get_obj_count {
+	__le32 pad;
+	__le32 obj_count;
+};
+
+struct dprc_cmd_get_obj {
+	__le32 obj_index;
+};
+
+struct dprc_rsp_get_obj {
+	/* response word 0 */
+	__le32 pad0;
+	__le32 id;
+	/* response word 1 */
+	__le16 vendor;
+	u8 irq_count;
+	u8 region_count;
+	__le32 state;
+	/* response word 2 */
+	__le16 version_major;
+	__le16 version_minor;
+	__le16 flags;
+	__le16 pad1;
+	/* response word 3-4 */
+	u8 type[16];
+	/* response word 5-6 */
+	u8 label[16];
+};
+
+struct dprc_cmd_get_obj_desc {
+	/* cmd word 0 */
+	__le32 obj_id;
+	__le32 pad;
+	/* cmd word 1-2 */
+	u8 type[16];
+};
+
+struct dprc_rsp_get_obj_desc {
+	/* response word 0 */
+	__le32 pad0;
+	__le32 id;
+	/* response word 1 */
+	__le16 vendor;
+	u8 irq_count;
+	u8 region_count;
+	__le32 state;
+	/* response word 2 */
+	__le16 version_major;
+	__le16 version_minor;
+	__le16 flags;
+	__le16 pad1;
+	/* response word 3-4 */
+	u8 type[16];
+	/* response word 5-6 */
+	u8 label[16];
+};
+
+struct dprc_cmd_get_res_count {
+	/* cmd word 0 */
+	__le64 pad;
+	/* cmd word 1-2 */
+	u8 type[16];
+};
+
+struct dprc_rsp_get_res_count {
+	__le32 res_count;
+};
+
+struct dprc_cmd_get_res_ids {
+	/* cmd word 0 */
+	u8 pad0[5];
+	u8 iter_status;
+	__le16 pad1;
+	/* cmd word 1 */
+	__le32 base_id;
+	__le32 last_id;
+	/* cmd word 2-3 */
+	u8 type[16];
+};
+
+struct dprc_rsp_get_res_ids {
+	/* response word 0 */
+	u8 pad0[5];
+	u8 iter_status;
+	__le16 pad1;
+	/* response word 1 */
+	__le32 base_id;
+	__le32 last_id;
+};
+
+struct dprc_cmd_get_obj_region {
+	/* cmd word 0 */
+	__le32 obj_id;
+	__le16 pad0;
+	u8 region_index;
+	u8 pad1;
+	/* cmd word 1-2 */
+	__le64 pad2[2];
+	/* cmd word 3-4 */
+	u8 obj_type[16];
+};
+
+struct dprc_rsp_get_obj_region {
+	/* response word 0 */
+	__le64 pad;
+	/* response word 1 */
+	__le64 base_addr;
+	/* response word 2 */
+	__le32 size;
+};
+
+struct dprc_cmd_set_obj_label {
+	/* cmd word 0 */
+	__le32 obj_id;
+	__le32 pad;
+	/* cmd word 1-2 */
+	u8 label[16];
+	/* cmd word 3-4 */
+	u8 obj_type[16];
+};
+
+struct dprc_cmd_set_obj_irq {
+	/* cmd word 0 */
+	__le32 irq_val;
+	u8 irq_index;
+	u8 pad[3];
+	/* cmd word 1 */
+	__le64 irq_addr;
+	/* cmd word 2 */
+	__le32 irq_num;
+	__le32 obj_id;
+	/* cmd word 3-4 */
+	u8 obj_type[16];
+};
+
+struct dprc_cmd_get_obj_irq {
+	/* cmd word 0 */
+	__le32 obj_id;
+	u8 irq_index;
+	u8 pad[3];
+	/* cmd word 1-2 */
+	u8 obj_type[16];
+};
+
+struct dprc_rsp_get_obj_irq {
+	/* response word 0 */
+	__le32 irq_val;
+	__le32 pad;
+	/* response word 1 */
+	__le64 irq_addr;
+	/* response word 2 */
+	__le32 irq_num;
+	__le32 type;
+};
+
+struct dprc_cmd_connect {
+	/* cmd word 0 */
+	__le32 ep1_id;
+	__le32 ep1_interface_id;
+	/* cmd word 1 */
+	__le32 ep2_id;
+	__le32 ep2_interface_id;
+	/* cmd word 2-3 */
+	u8 ep1_type[16];
+	/* cmd word 4 */
+	__le32 max_rate;
+	__le32 committed_rate;
+	/* cmd word 5-6 */
+	u8 ep2_type[16];
+};
+
+struct dprc_cmd_disconnect {
+	/* cmd word 0 */
+	__le32 id;
+	__le32 interface_id;
+	/* cmd word 1-2 */
+	u8 type[16];
+};
+
+struct dprc_cmd_get_connection {
+	/* cmd word 0 */
+	__le32 ep1_id;
+	__le32 ep1_interface_id;
+	/* cmd word 1-2 */
+	u8 ep1_type[16];
+};
+
+struct dprc_rsp_get_connection {
+	/* response word 0-2 */
+	__le64 pad[3];
+	/* response word 3 */
+	__le32 ep2_id;
+	__le32 ep2_interface_id;
+	/* response word 4-5 */
+	u8 ep2_type[16];
+	/* response word 6 */
+	__le32 state;
+};
+
 #endif /* _FSL_DPRC_CMD_H */
diff --git a/drivers/staging/fsl-mc/bus/dprc-driver.c b/drivers/staging/fsl-mc/bus/dprc-driver.c
index 7fc47173c164..d2a71f14bf72 100644
--- a/drivers/staging/fsl-mc/bus/dprc-driver.c
+++ b/drivers/staging/fsl-mc/bus/dprc-driver.c
@@ -760,7 +760,12 @@ error_cleanup_msi_domain:
  */
 static void dprc_teardown_irq(struct fsl_mc_device *mc_dev)
 {
+	struct fsl_mc_device_irq *irq = mc_dev->irqs[0];
+
 	(void)disable_dprc_irq(mc_dev);
+
+	devm_free_irq(&mc_dev->dev, irq->msi_desc->irq, &mc_dev->dev);
+
 	fsl_mc_free_irqs(mc_dev);
 }
 
@@ -791,21 +796,28 @@ static int dprc_remove(struct fsl_mc_device *mc_dev)
 		dprc_teardown_irq(mc_dev);
 
 	device_for_each_child(&mc_dev->dev, NULL, __fsl_mc_device_remove);
+
+	if (dev_get_msi_domain(&mc_dev->dev)) {
+		fsl_mc_cleanup_irq_pool(mc_bus);
+		dev_set_msi_domain(&mc_dev->dev, NULL);
+	}
+
 	dprc_cleanup_all_resource_pools(mc_dev);
+
 	error = dprc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
 	if (error < 0)
 		dev_err(&mc_dev->dev, "dprc_close() failed: %d\n", error);
 
-	if (dev_get_msi_domain(&mc_dev->dev)) {
-		fsl_mc_cleanup_irq_pool(mc_bus);
-		dev_set_msi_domain(&mc_dev->dev, NULL);
+	if (!fsl_mc_is_root_dprc(&mc_dev->dev)) {
+		fsl_destroy_mc_io(mc_dev->mc_io);
+		mc_dev->mc_io = NULL;
 	}
 
 	dev_info(&mc_dev->dev, "DPRC device unbound from driver");
 	return 0;
 }
 
-static const struct fsl_mc_device_match_id match_id_table[] = {
+static const struct fsl_mc_device_id match_id_table[] = {
 	{
 	 .vendor = FSL_MC_VENDOR_FREESCALE,
 	 .obj_type = "dprc"},
diff --git a/drivers/staging/fsl-mc/bus/dprc.c b/drivers/staging/fsl-mc/bus/dprc.c
index a2c47377cc4e..c26054981333 100644
--- a/drivers/staging/fsl-mc/bus/dprc.c
+++ b/drivers/staging/fsl-mc/bus/dprc.c
@@ -1,4 +1,4 @@
-/* Copyright 2013-2014 Freescale Semiconductor Inc.
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -51,12 +51,14 @@ int dprc_open(struct fsl_mc_io *mc_io,
 	      u16 *token)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_open *cmd_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_OPEN, cmd_flags,
 					  0);
-	cmd.params[0] |= mc_enc(0, 32, container_id);
+	cmd_params = (struct dprc_cmd_open *)cmd.params;
+	cmd_params->container_id = cpu_to_le32(container_id);
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -64,7 +66,7 @@ int dprc_open(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*token = MC_CMD_HDR_READ_TOKEN(cmd.header);
+	*token = mc_cmd_hdr_read_token(&cmd);
 
 	return 0;
 }
@@ -115,28 +117,17 @@ int dprc_create_container(struct fsl_mc_io *mc_io,
 			  u64 *child_portal_offset)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_create_container *cmd_params;
+	struct dprc_rsp_create_container *rsp_params;
 	int err;
 
 	/* prepare command */
-	cmd.params[0] |= mc_enc(32, 16, cfg->icid);
-	cmd.params[0] |= mc_enc(0, 32, cfg->options);
-	cmd.params[1] |= mc_enc(32, 32, cfg->portal_id);
-	cmd.params[2] |= mc_enc(0, 8, cfg->label[0]);
-	cmd.params[2] |= mc_enc(8, 8, cfg->label[1]);
-	cmd.params[2] |= mc_enc(16, 8, cfg->label[2]);
-	cmd.params[2] |= mc_enc(24, 8, cfg->label[3]);
-	cmd.params[2] |= mc_enc(32, 8, cfg->label[4]);
-	cmd.params[2] |= mc_enc(40, 8, cfg->label[5]);
-	cmd.params[2] |= mc_enc(48, 8, cfg->label[6]);
-	cmd.params[2] |= mc_enc(56, 8, cfg->label[7]);
-	cmd.params[3] |= mc_enc(0, 8, cfg->label[8]);
-	cmd.params[3] |= mc_enc(8, 8, cfg->label[9]);
-	cmd.params[3] |= mc_enc(16, 8, cfg->label[10]);
-	cmd.params[3] |= mc_enc(24, 8, cfg->label[11]);
-	cmd.params[3] |= mc_enc(32, 8, cfg->label[12]);
-	cmd.params[3] |= mc_enc(40, 8, cfg->label[13]);
-	cmd.params[3] |= mc_enc(48, 8, cfg->label[14]);
-	cmd.params[3] |= mc_enc(56, 8, cfg->label[15]);
+	cmd_params = (struct dprc_cmd_create_container *)cmd.params;
+	cmd_params->options = cpu_to_le32(cfg->options);
+	cmd_params->icid = cpu_to_le16(cfg->icid);
+	cmd_params->portal_id = cpu_to_le32(cfg->portal_id);
+	strncpy(cmd_params->label, cfg->label, 16);
+	cmd_params->label[15] = '\0';
 
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_CREATE_CONT,
 					  cmd_flags, token);
@@ -147,8 +138,9 @@ int dprc_create_container(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*child_container_id = mc_dec(cmd.params[1], 0, 32);
-	*child_portal_offset = mc_dec(cmd.params[2], 0, 64);
+	rsp_params = (struct dprc_rsp_create_container *)cmd.params;
+	*child_container_id = le32_to_cpu(rsp_params->child_container_id);
+	*child_portal_offset = le64_to_cpu(rsp_params->child_portal_addr);
 
 	return 0;
 }
@@ -181,11 +173,13 @@ int dprc_destroy_container(struct fsl_mc_io *mc_io,
 			   int child_container_id)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_destroy_container *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_DESTROY_CONT,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, child_container_id);
+	cmd_params = (struct dprc_cmd_destroy_container *)cmd.params;
+	cmd_params->child_container_id = cpu_to_le32(child_container_id);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -219,11 +213,13 @@ int dprc_reset_container(struct fsl_mc_io *mc_io,
 			 int child_container_id)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_reset_container *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_RESET_CONT,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, child_container_id);
+	cmd_params = (struct dprc_cmd_reset_container *)cmd.params;
+	cmd_params->child_container_id = cpu_to_le32(child_container_id);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -249,13 +245,16 @@ int dprc_get_irq(struct fsl_mc_io *mc_io,
 		 struct dprc_irq_cfg *irq_cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_irq *cmd_params;
+	struct dprc_rsp_get_irq *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_IRQ,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dprc_cmd_get_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -263,10 +262,11 @@ int dprc_get_irq(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	irq_cfg->val = mc_dec(cmd.params[0], 0, 32);
-	irq_cfg->paddr = mc_dec(cmd.params[1], 0, 64);
-	irq_cfg->irq_num = mc_dec(cmd.params[2], 0, 32);
-	*type = mc_dec(cmd.params[2], 32, 32);
+	rsp_params = (struct dprc_rsp_get_irq *)cmd.params;
+	irq_cfg->val = le32_to_cpu(rsp_params->irq_val);
+	irq_cfg->paddr = le64_to_cpu(rsp_params->irq_addr);
+	irq_cfg->irq_num = le32_to_cpu(rsp_params->irq_num);
+	*type = le32_to_cpu(rsp_params->type);
 
 	return 0;
 }
@@ -288,15 +288,17 @@ int dprc_set_irq(struct fsl_mc_io *mc_io,
 		 struct dprc_irq_cfg *irq_cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_set_irq *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_IRQ,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
-	cmd.params[0] |= mc_enc(0, 32, irq_cfg->val);
-	cmd.params[1] |= mc_enc(0, 64, irq_cfg->paddr);
-	cmd.params[2] |= mc_enc(0, 32, irq_cfg->irq_num);
+	cmd_params = (struct dprc_cmd_set_irq *)cmd.params;
+	cmd_params->irq_val = cpu_to_le32(irq_cfg->val);
+	cmd_params->irq_index = irq_index;
+	cmd_params->irq_addr = cpu_to_le64(irq_cfg->paddr);
+	cmd_params->irq_num = cpu_to_le32(irq_cfg->irq_num);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -319,12 +321,15 @@ int dprc_get_irq_enable(struct fsl_mc_io *mc_io,
 			u8 *en)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_irq_enable *cmd_params;
+	struct dprc_rsp_get_irq_enable *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_IRQ_ENABLE,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dprc_cmd_get_irq_enable *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -332,7 +337,8 @@ int dprc_get_irq_enable(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*en = mc_dec(cmd.params[0], 0, 8);
+	rsp_params = (struct dprc_rsp_get_irq_enable *)cmd.params;
+	*en = rsp_params->enabled & DPRC_ENABLE;
 
 	return 0;
 }
@@ -359,12 +365,14 @@ int dprc_set_irq_enable(struct fsl_mc_io *mc_io,
 			u8 en)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_set_irq_enable *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_IRQ_ENABLE,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 8, en);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dprc_cmd_set_irq_enable *)cmd.params;
+	cmd_params->enable = en & DPRC_ENABLE;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -390,12 +398,15 @@ int dprc_get_irq_mask(struct fsl_mc_io *mc_io,
 		      u32 *mask)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_irq_mask *cmd_params;
+	struct dprc_rsp_get_irq_mask *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_IRQ_MASK,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dprc_cmd_get_irq_mask *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -403,7 +414,8 @@ int dprc_get_irq_mask(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*mask = mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dprc_rsp_get_irq_mask *)cmd.params;
+	*mask = le32_to_cpu(rsp_params->mask);
 
 	return 0;
 }
@@ -431,12 +443,14 @@ int dprc_set_irq_mask(struct fsl_mc_io *mc_io,
 		      u32 mask)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_set_irq_mask *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_IRQ_MASK,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, mask);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dprc_cmd_set_irq_mask *)cmd.params;
+	cmd_params->mask = cpu_to_le32(mask);
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -461,13 +475,16 @@ int dprc_get_irq_status(struct fsl_mc_io *mc_io,
 			u32 *status)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_irq_status *cmd_params;
+	struct dprc_rsp_get_irq_status *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_IRQ_STATUS,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, *status);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dprc_cmd_get_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(*status);
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -475,7 +492,8 @@ int dprc_get_irq_status(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*status = mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dprc_rsp_get_irq_status *)cmd.params;
+	*status = le32_to_cpu(rsp_params->status);
 
 	return 0;
 }
@@ -499,12 +517,14 @@ int dprc_clear_irq_status(struct fsl_mc_io *mc_io,
 			  u32 status)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_clear_irq_status *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_CLEAR_IRQ_STATUS,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, status);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
+	cmd_params = (struct dprc_cmd_clear_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(status);
+	cmd_params->irq_index = irq_index;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -525,6 +545,7 @@ int dprc_get_attributes(struct fsl_mc_io *mc_io,
 			struct dprc_attributes *attr)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_rsp_get_attributes *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -538,12 +559,13 @@ int dprc_get_attributes(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	attr->container_id = mc_dec(cmd.params[0], 0, 32);
-	attr->icid = mc_dec(cmd.params[0], 32, 16);
-	attr->options = mc_dec(cmd.params[1], 0, 32);
-	attr->portal_id = mc_dec(cmd.params[1], 32, 32);
-	attr->version.major = mc_dec(cmd.params[2], 0, 16);
-	attr->version.minor = mc_dec(cmd.params[2], 16, 16);
+	rsp_params = (struct dprc_rsp_get_attributes *)cmd.params;
+	attr->container_id = le32_to_cpu(rsp_params->container_id);
+	attr->icid = le16_to_cpu(rsp_params->icid);
+	attr->options = le32_to_cpu(rsp_params->options);
+	attr->portal_id = le32_to_cpu(rsp_params->portal_id);
+	attr->version.major = le16_to_cpu(rsp_params->version_major);
+	attr->version.minor = le16_to_cpu(rsp_params->version_minor);
 
 	return 0;
 }
@@ -581,28 +603,16 @@ int dprc_set_res_quota(struct fsl_mc_io *mc_io,
 		       u16 quota)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_set_res_quota *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_RES_QUOTA,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, child_container_id);
-	cmd.params[0] |= mc_enc(32, 16, quota);
-	cmd.params[1] |= mc_enc(0, 8, type[0]);
-	cmd.params[1] |= mc_enc(8, 8, type[1]);
-	cmd.params[1] |= mc_enc(16, 8, type[2]);
-	cmd.params[1] |= mc_enc(24, 8, type[3]);
-	cmd.params[1] |= mc_enc(32, 8, type[4]);
-	cmd.params[1] |= mc_enc(40, 8, type[5]);
-	cmd.params[1] |= mc_enc(48, 8, type[6]);
-	cmd.params[1] |= mc_enc(56, 8, type[7]);
-	cmd.params[2] |= mc_enc(0, 8, type[8]);
-	cmd.params[2] |= mc_enc(8, 8, type[9]);
-	cmd.params[2] |= mc_enc(16, 8, type[10]);
-	cmd.params[2] |= mc_enc(24, 8, type[11]);
-	cmd.params[2] |= mc_enc(32, 8, type[12]);
-	cmd.params[2] |= mc_enc(40, 8, type[13]);
-	cmd.params[2] |= mc_enc(48, 8, type[14]);
-	cmd.params[2] |= mc_enc(56, 8, '\0');
+	cmd_params = (struct dprc_cmd_set_res_quota *)cmd.params;
+	cmd_params->child_container_id = cpu_to_le32(child_container_id);
+	cmd_params->quota = cpu_to_le16(quota);
+	strncpy(cmd_params->type, type, 16);
+	cmd_params->type[15] = '\0';
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -631,28 +641,17 @@ int dprc_get_res_quota(struct fsl_mc_io *mc_io,
 		       u16 *quota)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_res_quota *cmd_params;
+	struct dprc_rsp_get_res_quota *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_RES_QUOTA,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, child_container_id);
-	cmd.params[1] |= mc_enc(0, 8, type[0]);
-	cmd.params[1] |= mc_enc(8, 8, type[1]);
-	cmd.params[1] |= mc_enc(16, 8, type[2]);
-	cmd.params[1] |= mc_enc(24, 8, type[3]);
-	cmd.params[1] |= mc_enc(32, 8, type[4]);
-	cmd.params[1] |= mc_enc(40, 8, type[5]);
-	cmd.params[1] |= mc_enc(48, 8, type[6]);
-	cmd.params[1] |= mc_enc(56, 8, type[7]);
-	cmd.params[2] |= mc_enc(0, 8, type[8]);
-	cmd.params[2] |= mc_enc(8, 8, type[9]);
-	cmd.params[2] |= mc_enc(16, 8, type[10]);
-	cmd.params[2] |= mc_enc(24, 8, type[11]);
-	cmd.params[2] |= mc_enc(32, 8, type[12]);
-	cmd.params[2] |= mc_enc(40, 8, type[13]);
-	cmd.params[2] |= mc_enc(48, 8, type[14]);
-	cmd.params[2] |= mc_enc(56, 8, '\0');
+	cmd_params = (struct dprc_cmd_get_res_quota *)cmd.params;
+	cmd_params->child_container_id = cpu_to_le32(child_container_id);
+	strncpy(cmd_params->type, type, 16);
+	cmd_params->type[15] = '\0';
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -660,7 +659,8 @@ int dprc_get_res_quota(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*quota = mc_dec(cmd.params[0], 32, 16);
+	rsp_params = (struct dprc_rsp_get_res_quota *)cmd.params;
+	*quota = le16_to_cpu(rsp_params->quota);
 
 	return 0;
 }
@@ -704,30 +704,18 @@ int dprc_assign(struct fsl_mc_io *mc_io,
 		struct dprc_res_req *res_req)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_assign *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_ASSIGN,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, container_id);
-	cmd.params[0] |= mc_enc(32, 32, res_req->options);
-	cmd.params[1] |= mc_enc(0, 32, res_req->num);
-	cmd.params[1] |= mc_enc(32, 32, res_req->id_base_align);
-	cmd.params[2] |= mc_enc(0, 8, res_req->type[0]);
-	cmd.params[2] |= mc_enc(8, 8, res_req->type[1]);
-	cmd.params[2] |= mc_enc(16, 8, res_req->type[2]);
-	cmd.params[2] |= mc_enc(24, 8, res_req->type[3]);
-	cmd.params[2] |= mc_enc(32, 8, res_req->type[4]);
-	cmd.params[2] |= mc_enc(40, 8, res_req->type[5]);
-	cmd.params[2] |= mc_enc(48, 8, res_req->type[6]);
-	cmd.params[2] |= mc_enc(56, 8, res_req->type[7]);
-	cmd.params[3] |= mc_enc(0, 8, res_req->type[8]);
-	cmd.params[3] |= mc_enc(8, 8, res_req->type[9]);
-	cmd.params[3] |= mc_enc(16, 8, res_req->type[10]);
-	cmd.params[3] |= mc_enc(24, 8, res_req->type[11]);
-	cmd.params[3] |= mc_enc(32, 8, res_req->type[12]);
-	cmd.params[3] |= mc_enc(40, 8, res_req->type[13]);
-	cmd.params[3] |= mc_enc(48, 8, res_req->type[14]);
-	cmd.params[3] |= mc_enc(56, 8, res_req->type[15]);
+	cmd_params = (struct dprc_cmd_assign *)cmd.params;
+	cmd_params->container_id = cpu_to_le32(container_id);
+	cmd_params->options = cpu_to_le32(res_req->options);
+	cmd_params->num = cpu_to_le32(res_req->num);
+	cmd_params->id_base_align = cpu_to_le32(res_req->id_base_align);
+	strncpy(cmd_params->type, res_req->type, 16);
+	cmd_params->type[15] = '\0';
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -755,31 +743,19 @@ int dprc_unassign(struct fsl_mc_io *mc_io,
 		  struct dprc_res_req *res_req)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_unassign *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_UNASSIGN,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(0, 32, child_container_id);
-	cmd.params[0] |= mc_enc(32, 32, res_req->options);
-	cmd.params[1] |= mc_enc(0, 32, res_req->num);
-	cmd.params[1] |= mc_enc(32, 32, res_req->id_base_align);
-	cmd.params[2] |= mc_enc(0, 8, res_req->type[0]);
-	cmd.params[2] |= mc_enc(8, 8, res_req->type[1]);
-	cmd.params[2] |= mc_enc(16, 8, res_req->type[2]);
-	cmd.params[2] |= mc_enc(24, 8, res_req->type[3]);
-	cmd.params[2] |= mc_enc(32, 8, res_req->type[4]);
-	cmd.params[2] |= mc_enc(40, 8, res_req->type[5]);
-	cmd.params[2] |= mc_enc(48, 8, res_req->type[6]);
-	cmd.params[2] |= mc_enc(56, 8, res_req->type[7]);
-	cmd.params[3] |= mc_enc(0, 8, res_req->type[8]);
-	cmd.params[3] |= mc_enc(8, 8, res_req->type[9]);
-	cmd.params[3] |= mc_enc(16, 8, res_req->type[10]);
-	cmd.params[3] |= mc_enc(24, 8, res_req->type[11]);
-	cmd.params[3] |= mc_enc(32, 8, res_req->type[12]);
-	cmd.params[3] |= mc_enc(40, 8, res_req->type[13]);
-	cmd.params[3] |= mc_enc(48, 8, res_req->type[14]);
-	cmd.params[3] |= mc_enc(56, 8, res_req->type[15]);
+	cmd_params = (struct dprc_cmd_unassign *)cmd.params;
+	cmd_params->child_container_id = cpu_to_le32(child_container_id);
+	cmd_params->options = cpu_to_le32(res_req->options);
+	cmd_params->num = cpu_to_le32(res_req->num);
+	cmd_params->id_base_align = cpu_to_le32(res_req->id_base_align);
+	strncpy(cmd_params->type, res_req->type, 16);
+	cmd_params->type[15] = '\0';
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -800,6 +776,7 @@ int dprc_get_pool_count(struct fsl_mc_io *mc_io,
 			int *pool_count)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_rsp_get_pool_count *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -812,7 +789,8 @@ int dprc_get_pool_count(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*pool_count = mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dprc_rsp_get_pool_count *)cmd.params;
+	*pool_count = le32_to_cpu(rsp_params->pool_count);
 
 	return 0;
 }
@@ -839,13 +817,16 @@ int dprc_get_pool(struct fsl_mc_io *mc_io,
 		  char *type)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_pool *cmd_params;
+	struct dprc_rsp_get_pool *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_POOL,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(0, 32, pool_index);
+	cmd_params = (struct dprc_cmd_get_pool *)cmd.params;
+	cmd_params->pool_index = cpu_to_le32(pool_index);
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -853,21 +834,8 @@ int dprc_get_pool(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	type[0] = mc_dec(cmd.params[1], 0, 8);
-	type[1] = mc_dec(cmd.params[1], 8, 8);
-	type[2] = mc_dec(cmd.params[1], 16, 8);
-	type[3] = mc_dec(cmd.params[1], 24, 8);
-	type[4] = mc_dec(cmd.params[1], 32, 8);
-	type[5] = mc_dec(cmd.params[1], 40, 8);
-	type[6] = mc_dec(cmd.params[1], 48, 8);
-	type[7] = mc_dec(cmd.params[1], 56, 8);
-	type[8] = mc_dec(cmd.params[2], 0, 8);
-	type[9] = mc_dec(cmd.params[2], 8, 8);
-	type[10] = mc_dec(cmd.params[2], 16, 8);
-	type[11] = mc_dec(cmd.params[2], 24, 8);
-	type[12] = mc_dec(cmd.params[2], 32, 8);
-	type[13] = mc_dec(cmd.params[2], 40, 8);
-	type[14] = mc_dec(cmd.params[2], 48, 8);
+	rsp_params = (struct dprc_rsp_get_pool *)cmd.params;
+	strncpy(type, rsp_params->type, 16);
 	type[15] = '\0';
 
 	return 0;
@@ -888,6 +856,7 @@ int dprc_get_obj_count(struct fsl_mc_io *mc_io,
 		       int *obj_count)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_rsp_get_obj_count *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -900,7 +869,8 @@ int dprc_get_obj_count(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*obj_count = mc_dec(cmd.params[0], 32, 32);
+	rsp_params = (struct dprc_rsp_get_obj_count *)cmd.params;
+	*obj_count = le32_to_cpu(rsp_params->obj_count);
 
 	return 0;
 }
@@ -928,13 +898,16 @@ int dprc_get_obj(struct fsl_mc_io *mc_io,
 		 struct dprc_obj_desc *obj_desc)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_obj *cmd_params;
+	struct dprc_rsp_get_obj *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(0, 32, obj_index);
+	cmd_params = (struct dprc_cmd_get_obj *)cmd.params;
+	cmd_params->obj_index = cpu_to_le32(obj_index);
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -942,45 +915,18 @@ int dprc_get_obj(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	obj_desc->id = mc_dec(cmd.params[0], 32, 32);
-	obj_desc->vendor = mc_dec(cmd.params[1], 0, 16);
-	obj_desc->irq_count = mc_dec(cmd.params[1], 16, 8);
-	obj_desc->region_count = mc_dec(cmd.params[1], 24, 8);
-	obj_desc->state = mc_dec(cmd.params[1], 32, 32);
-	obj_desc->ver_major = mc_dec(cmd.params[2], 0, 16);
-	obj_desc->ver_minor = mc_dec(cmd.params[2], 16, 16);
-	obj_desc->flags = mc_dec(cmd.params[2], 32, 16);
-	obj_desc->type[0] = mc_dec(cmd.params[3], 0, 8);
-	obj_desc->type[1] = mc_dec(cmd.params[3], 8, 8);
-	obj_desc->type[2] = mc_dec(cmd.params[3], 16, 8);
-	obj_desc->type[3] = mc_dec(cmd.params[3], 24, 8);
-	obj_desc->type[4] = mc_dec(cmd.params[3], 32, 8);
-	obj_desc->type[5] = mc_dec(cmd.params[3], 40, 8);
-	obj_desc->type[6] = mc_dec(cmd.params[3], 48, 8);
-	obj_desc->type[7] = mc_dec(cmd.params[3], 56, 8);
-	obj_desc->type[8] = mc_dec(cmd.params[4], 0, 8);
-	obj_desc->type[9] = mc_dec(cmd.params[4], 8, 8);
-	obj_desc->type[10] = mc_dec(cmd.params[4], 16, 8);
-	obj_desc->type[11] = mc_dec(cmd.params[4], 24, 8);
-	obj_desc->type[12] = mc_dec(cmd.params[4], 32, 8);
-	obj_desc->type[13] = mc_dec(cmd.params[4], 40, 8);
-	obj_desc->type[14] = mc_dec(cmd.params[4], 48, 8);
+	rsp_params = (struct dprc_rsp_get_obj *)cmd.params;
+	obj_desc->id = le32_to_cpu(rsp_params->id);
+	obj_desc->vendor = le16_to_cpu(rsp_params->vendor);
+	obj_desc->irq_count = rsp_params->irq_count;
+	obj_desc->region_count = rsp_params->region_count;
+	obj_desc->state = le32_to_cpu(rsp_params->state);
+	obj_desc->ver_major = le16_to_cpu(rsp_params->version_major);
+	obj_desc->ver_minor = le16_to_cpu(rsp_params->version_minor);
+	obj_desc->flags = le16_to_cpu(rsp_params->flags);
+	strncpy(obj_desc->type, rsp_params->type, 16);
 	obj_desc->type[15] = '\0';
-	obj_desc->label[0] = mc_dec(cmd.params[5], 0, 8);
-	obj_desc->label[1] = mc_dec(cmd.params[5], 8, 8);
-	obj_desc->label[2] = mc_dec(cmd.params[5], 16, 8);
-	obj_desc->label[3] = mc_dec(cmd.params[5], 24, 8);
-	obj_desc->label[4] = mc_dec(cmd.params[5], 32, 8);
-	obj_desc->label[5] = mc_dec(cmd.params[5], 40, 8);
-	obj_desc->label[6] = mc_dec(cmd.params[5], 48, 8);
-	obj_desc->label[7] = mc_dec(cmd.params[5], 56, 8);
-	obj_desc->label[8] = mc_dec(cmd.params[6], 0, 8);
-	obj_desc->label[9] = mc_dec(cmd.params[6], 8, 8);
-	obj_desc->label[10] = mc_dec(cmd.params[6], 16, 8);
-	obj_desc->label[11] = mc_dec(cmd.params[6], 24, 8);
-	obj_desc->label[12] = mc_dec(cmd.params[6], 32, 8);
-	obj_desc->label[13] = mc_dec(cmd.params[6], 40, 8);
-	obj_desc->label[14] = mc_dec(cmd.params[6], 48, 8);
+	strncpy(obj_desc->label, rsp_params->label, 16);
 	obj_desc->label[15] = '\0';
 	return 0;
 }
@@ -1007,29 +953,18 @@ int dprc_get_obj_desc(struct fsl_mc_io *mc_io,
 		      struct dprc_obj_desc *obj_desc)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_obj_desc *cmd_params;
+	struct dprc_rsp_get_obj_desc *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ_DESC,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(0, 32, obj_id);
-	cmd.params[1] |= mc_enc(0, 8, obj_type[0]);
-	cmd.params[1] |= mc_enc(8, 8, obj_type[1]);
-	cmd.params[1] |= mc_enc(16, 8, obj_type[2]);
-	cmd.params[1] |= mc_enc(24, 8, obj_type[3]);
-	cmd.params[1] |= mc_enc(32, 8, obj_type[4]);
-	cmd.params[1] |= mc_enc(40, 8, obj_type[5]);
-	cmd.params[1] |= mc_enc(48, 8, obj_type[6]);
-	cmd.params[1] |= mc_enc(56, 8, obj_type[7]);
-	cmd.params[2] |= mc_enc(0, 8, obj_type[8]);
-	cmd.params[2] |= mc_enc(8, 8, obj_type[9]);
-	cmd.params[2] |= mc_enc(16, 8, obj_type[10]);
-	cmd.params[2] |= mc_enc(24, 8, obj_type[11]);
-	cmd.params[2] |= mc_enc(32, 8, obj_type[12]);
-	cmd.params[2] |= mc_enc(40, 8, obj_type[13]);
-	cmd.params[2] |= mc_enc(48, 8, obj_type[14]);
-	cmd.params[2] |= mc_enc(56, 8, obj_type[15]);
+	cmd_params = (struct dprc_cmd_get_obj_desc *)cmd.params;
+	cmd_params->obj_id = cpu_to_le32(obj_id);
+	strncpy(cmd_params->type, obj_type, 16);
+	cmd_params->type[15] = '\0';
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -1037,46 +972,19 @@ int dprc_get_obj_desc(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	obj_desc->id = (int)mc_dec(cmd.params[0], 32, 32);
-	obj_desc->vendor = (u16)mc_dec(cmd.params[1], 0, 16);
-	obj_desc->vendor = (u8)mc_dec(cmd.params[1], 16, 8);
-	obj_desc->region_count = (u8)mc_dec(cmd.params[1], 24, 8);
-	obj_desc->state = (u32)mc_dec(cmd.params[1], 32, 32);
-	obj_desc->ver_major = (u16)mc_dec(cmd.params[2], 0, 16);
-	obj_desc->ver_minor = (u16)mc_dec(cmd.params[2], 16, 16);
-	obj_desc->flags = mc_dec(cmd.params[2], 32, 16);
-	obj_desc->type[0] = (char)mc_dec(cmd.params[3], 0, 8);
-	obj_desc->type[1] = (char)mc_dec(cmd.params[3], 8, 8);
-	obj_desc->type[2] = (char)mc_dec(cmd.params[3], 16, 8);
-	obj_desc->type[3] = (char)mc_dec(cmd.params[3], 24, 8);
-	obj_desc->type[4] = (char)mc_dec(cmd.params[3], 32, 8);
-	obj_desc->type[5] = (char)mc_dec(cmd.params[3], 40, 8);
-	obj_desc->type[6] = (char)mc_dec(cmd.params[3], 48, 8);
-	obj_desc->type[7] = (char)mc_dec(cmd.params[3], 56, 8);
-	obj_desc->type[8] = (char)mc_dec(cmd.params[4], 0, 8);
-	obj_desc->type[9] = (char)mc_dec(cmd.params[4], 8, 8);
-	obj_desc->type[10] = (char)mc_dec(cmd.params[4], 16, 8);
-	obj_desc->type[11] = (char)mc_dec(cmd.params[4], 24, 8);
-	obj_desc->type[12] = (char)mc_dec(cmd.params[4], 32, 8);
-	obj_desc->type[13] = (char)mc_dec(cmd.params[4], 40, 8);
-	obj_desc->type[14] = (char)mc_dec(cmd.params[4], 48, 8);
-	obj_desc->type[15] = (char)mc_dec(cmd.params[4], 56, 8);
-	obj_desc->label[0] = (char)mc_dec(cmd.params[5], 0, 8);
-	obj_desc->label[1] = (char)mc_dec(cmd.params[5], 8, 8);
-	obj_desc->label[2] = (char)mc_dec(cmd.params[5], 16, 8);
-	obj_desc->label[3] = (char)mc_dec(cmd.params[5], 24, 8);
-	obj_desc->label[4] = (char)mc_dec(cmd.params[5], 32, 8);
-	obj_desc->label[5] = (char)mc_dec(cmd.params[5], 40, 8);
-	obj_desc->label[6] = (char)mc_dec(cmd.params[5], 48, 8);
-	obj_desc->label[7] = (char)mc_dec(cmd.params[5], 56, 8);
-	obj_desc->label[8] = (char)mc_dec(cmd.params[6], 0, 8);
-	obj_desc->label[9] = (char)mc_dec(cmd.params[6], 8, 8);
-	obj_desc->label[10] = (char)mc_dec(cmd.params[6], 16, 8);
-	obj_desc->label[11] = (char)mc_dec(cmd.params[6], 24, 8);
-	obj_desc->label[12] = (char)mc_dec(cmd.params[6], 32, 8);
-	obj_desc->label[13] = (char)mc_dec(cmd.params[6], 40, 8);
-	obj_desc->label[14] = (char)mc_dec(cmd.params[6], 48, 8);
-	obj_desc->label[15] = (char)mc_dec(cmd.params[6], 56, 8);
+	rsp_params = (struct dprc_rsp_get_obj_desc *)cmd.params;
+	obj_desc->id = le32_to_cpu(rsp_params->id);
+	obj_desc->vendor = le16_to_cpu(rsp_params->vendor);
+	obj_desc->irq_count = rsp_params->irq_count;
+	obj_desc->region_count = rsp_params->region_count;
+	obj_desc->state = le32_to_cpu(rsp_params->state);
+	obj_desc->ver_major = le16_to_cpu(rsp_params->version_major);
+	obj_desc->ver_minor = le16_to_cpu(rsp_params->version_minor);
+	obj_desc->flags = le16_to_cpu(rsp_params->flags);
+	strncpy(obj_desc->type, rsp_params->type, 16);
+	obj_desc->type[15] = '\0';
+	strncpy(obj_desc->label, rsp_params->label, 16);
+	obj_desc->label[15] = '\0';
 
 	return 0;
 }
@@ -1103,32 +1011,20 @@ int dprc_set_obj_irq(struct fsl_mc_io *mc_io,
 		     struct dprc_irq_cfg *irq_cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_set_obj_irq *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_OBJ_IRQ,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
-	cmd.params[0] |= mc_enc(0, 32, irq_cfg->val);
-	cmd.params[1] |= mc_enc(0, 64, irq_cfg->paddr);
-	cmd.params[2] |= mc_enc(0, 32, irq_cfg->irq_num);
-	cmd.params[2] |= mc_enc(32, 32, obj_id);
-	cmd.params[3] |= mc_enc(0, 8, obj_type[0]);
-	cmd.params[3] |= mc_enc(8, 8, obj_type[1]);
-	cmd.params[3] |= mc_enc(16, 8, obj_type[2]);
-	cmd.params[3] |= mc_enc(24, 8, obj_type[3]);
-	cmd.params[3] |= mc_enc(32, 8, obj_type[4]);
-	cmd.params[3] |= mc_enc(40, 8, obj_type[5]);
-	cmd.params[3] |= mc_enc(48, 8, obj_type[6]);
-	cmd.params[3] |= mc_enc(56, 8, obj_type[7]);
-	cmd.params[4] |= mc_enc(0, 8, obj_type[8]);
-	cmd.params[4] |= mc_enc(8, 8, obj_type[9]);
-	cmd.params[4] |= mc_enc(16, 8, obj_type[10]);
-	cmd.params[4] |= mc_enc(24, 8, obj_type[11]);
-	cmd.params[4] |= mc_enc(32, 8, obj_type[12]);
-	cmd.params[4] |= mc_enc(40, 8, obj_type[13]);
-	cmd.params[4] |= mc_enc(48, 8, obj_type[14]);
-	cmd.params[4] |= mc_enc(56, 8, obj_type[15]);
+	cmd_params = (struct dprc_cmd_set_obj_irq *)cmd.params;
+	cmd_params->irq_val = cpu_to_le32(irq_cfg->val);
+	cmd_params->irq_index = irq_index;
+	cmd_params->irq_addr = cpu_to_le64(irq_cfg->paddr);
+	cmd_params->irq_num = cpu_to_le32(irq_cfg->irq_num);
+	cmd_params->obj_id = cpu_to_le32(obj_id);
+	strncpy(cmd_params->obj_type, obj_type, 16);
+	cmd_params->obj_type[15] = '\0';
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -1159,30 +1055,19 @@ int dprc_get_obj_irq(struct fsl_mc_io *mc_io,
 		     struct dprc_irq_cfg *irq_cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_obj_irq *cmd_params;
+	struct dprc_rsp_get_obj_irq *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ_IRQ,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(0, 32, obj_id);
-	cmd.params[0] |= mc_enc(32, 8, irq_index);
-	cmd.params[1] |= mc_enc(0, 8, obj_type[0]);
-	cmd.params[1] |= mc_enc(8, 8, obj_type[1]);
-	cmd.params[1] |= mc_enc(16, 8, obj_type[2]);
-	cmd.params[1] |= mc_enc(24, 8, obj_type[3]);
-	cmd.params[1] |= mc_enc(32, 8, obj_type[4]);
-	cmd.params[1] |= mc_enc(40, 8, obj_type[5]);
-	cmd.params[1] |= mc_enc(48, 8, obj_type[6]);
-	cmd.params[1] |= mc_enc(56, 8, obj_type[7]);
-	cmd.params[2] |= mc_enc(0, 8, obj_type[8]);
-	cmd.params[2] |= mc_enc(8, 8, obj_type[9]);
-	cmd.params[2] |= mc_enc(16, 8, obj_type[10]);
-	cmd.params[2] |= mc_enc(24, 8, obj_type[11]);
-	cmd.params[2] |= mc_enc(32, 8, obj_type[12]);
-	cmd.params[2] |= mc_enc(40, 8, obj_type[13]);
-	cmd.params[2] |= mc_enc(48, 8, obj_type[14]);
-	cmd.params[2] |= mc_enc(56, 8, obj_type[15]);
+	cmd_params = (struct dprc_cmd_get_obj_irq *)cmd.params;
+	cmd_params->obj_id = cpu_to_le32(obj_id);
+	cmd_params->irq_index = irq_index;
+	strncpy(cmd_params->obj_type, obj_type, 16);
+	cmd_params->obj_type[15] = '\0';
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -1190,10 +1075,11 @@ int dprc_get_obj_irq(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	irq_cfg->val = (u32)mc_dec(cmd.params[0], 0, 32);
-	irq_cfg->paddr = (u64)mc_dec(cmd.params[1], 0, 64);
-	irq_cfg->irq_num = (int)mc_dec(cmd.params[2], 0, 32);
-	*type = (int)mc_dec(cmd.params[2], 32, 32);
+	rsp_params = (struct dprc_rsp_get_obj_irq *)cmd.params;
+	irq_cfg->val = le32_to_cpu(rsp_params->irq_val);
+	irq_cfg->paddr = le64_to_cpu(rsp_params->irq_addr);
+	irq_cfg->irq_num = le32_to_cpu(rsp_params->irq_num);
+	*type = le32_to_cpu(rsp_params->type);
 
 	return 0;
 }
@@ -1218,29 +1104,16 @@ int dprc_get_res_count(struct fsl_mc_io *mc_io,
 		       int *res_count)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_res_count *cmd_params;
+	struct dprc_rsp_get_res_count *rsp_params;
 	int err;
 
-	*res_count = 0;
-
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_RES_COUNT,
 					  cmd_flags, token);
-	cmd.params[1] |= mc_enc(0, 8, type[0]);
-	cmd.params[1] |= mc_enc(8, 8, type[1]);
-	cmd.params[1] |= mc_enc(16, 8, type[2]);
-	cmd.params[1] |= mc_enc(24, 8, type[3]);
-	cmd.params[1] |= mc_enc(32, 8, type[4]);
-	cmd.params[1] |= mc_enc(40, 8, type[5]);
-	cmd.params[1] |= mc_enc(48, 8, type[6]);
-	cmd.params[1] |= mc_enc(56, 8, type[7]);
-	cmd.params[2] |= mc_enc(0, 8, type[8]);
-	cmd.params[2] |= mc_enc(8, 8, type[9]);
-	cmd.params[2] |= mc_enc(16, 8, type[10]);
-	cmd.params[2] |= mc_enc(24, 8, type[11]);
-	cmd.params[2] |= mc_enc(32, 8, type[12]);
-	cmd.params[2] |= mc_enc(40, 8, type[13]);
-	cmd.params[2] |= mc_enc(48, 8, type[14]);
-	cmd.params[2] |= mc_enc(56, 8, '\0');
+	cmd_params = (struct dprc_cmd_get_res_count *)cmd.params;
+	strncpy(cmd_params->type, type, 16);
+	cmd_params->type[15] = '\0';
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -1248,7 +1121,8 @@ int dprc_get_res_count(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*res_count = mc_dec(cmd.params[0], 0, 32);
+	rsp_params = (struct dprc_rsp_get_res_count *)cmd.params;
+	*res_count = le32_to_cpu(rsp_params->res_count);
 
 	return 0;
 }
@@ -1271,30 +1145,19 @@ int dprc_get_res_ids(struct fsl_mc_io *mc_io,
 		     struct dprc_res_ids_range_desc *range_desc)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_res_ids *cmd_params;
+	struct dprc_rsp_get_res_ids *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_RES_IDS,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(42, 7, range_desc->iter_status);
-	cmd.params[1] |= mc_enc(0, 32, range_desc->base_id);
-	cmd.params[1] |= mc_enc(32, 32, range_desc->last_id);
-	cmd.params[2] |= mc_enc(0, 8, type[0]);
-	cmd.params[2] |= mc_enc(8, 8, type[1]);
-	cmd.params[2] |= mc_enc(16, 8, type[2]);
-	cmd.params[2] |= mc_enc(24, 8, type[3]);
-	cmd.params[2] |= mc_enc(32, 8, type[4]);
-	cmd.params[2] |= mc_enc(40, 8, type[5]);
-	cmd.params[2] |= mc_enc(48, 8, type[6]);
-	cmd.params[2] |= mc_enc(56, 8, type[7]);
-	cmd.params[3] |= mc_enc(0, 8, type[8]);
-	cmd.params[3] |= mc_enc(8, 8, type[9]);
-	cmd.params[3] |= mc_enc(16, 8, type[10]);
-	cmd.params[3] |= mc_enc(24, 8, type[11]);
-	cmd.params[3] |= mc_enc(32, 8, type[12]);
-	cmd.params[3] |= mc_enc(40, 8, type[13]);
-	cmd.params[3] |= mc_enc(48, 8, type[14]);
-	cmd.params[3] |= mc_enc(56, 8, '\0');
+	cmd_params = (struct dprc_cmd_get_res_ids *)cmd.params;
+	cmd_params->iter_status = range_desc->iter_status;
+	cmd_params->base_id = cpu_to_le32(range_desc->base_id);
+	cmd_params->last_id = cpu_to_le32(range_desc->last_id);
+	strncpy(cmd_params->type, type, 16);
+	cmd_params->type[15] = '\0';
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -1302,9 +1165,10 @@ int dprc_get_res_ids(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	range_desc->iter_status = mc_dec(cmd.params[0], 42, 7);
-	range_desc->base_id = mc_dec(cmd.params[1], 0, 32);
-	range_desc->last_id = mc_dec(cmd.params[1], 32, 32);
+	rsp_params = (struct dprc_rsp_get_res_ids *)cmd.params;
+	range_desc->iter_status = rsp_params->iter_status;
+	range_desc->base_id = le32_to_cpu(rsp_params->base_id);
+	range_desc->last_id = le32_to_cpu(rsp_params->last_id);
 
 	return 0;
 }
@@ -1331,29 +1195,18 @@ int dprc_get_obj_region(struct fsl_mc_io *mc_io,
 			struct dprc_region_desc *region_desc)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_obj_region *cmd_params;
+	struct dprc_rsp_get_obj_region *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ_REG,
 					  cmd_flags, token);
-	cmd.params[0] |= mc_enc(0, 32, obj_id);
-	cmd.params[0] |= mc_enc(48, 8, region_index);
-	cmd.params[3] |= mc_enc(0, 8, obj_type[0]);
-	cmd.params[3] |= mc_enc(8, 8, obj_type[1]);
-	cmd.params[3] |= mc_enc(16, 8, obj_type[2]);
-	cmd.params[3] |= mc_enc(24, 8, obj_type[3]);
-	cmd.params[3] |= mc_enc(32, 8, obj_type[4]);
-	cmd.params[3] |= mc_enc(40, 8, obj_type[5]);
-	cmd.params[3] |= mc_enc(48, 8, obj_type[6]);
-	cmd.params[3] |= mc_enc(56, 8, obj_type[7]);
-	cmd.params[4] |= mc_enc(0, 8, obj_type[8]);
-	cmd.params[4] |= mc_enc(8, 8, obj_type[9]);
-	cmd.params[4] |= mc_enc(16, 8, obj_type[10]);
-	cmd.params[4] |= mc_enc(24, 8, obj_type[11]);
-	cmd.params[4] |= mc_enc(32, 8, obj_type[12]);
-	cmd.params[4] |= mc_enc(40, 8, obj_type[13]);
-	cmd.params[4] |= mc_enc(48, 8, obj_type[14]);
-	cmd.params[4] |= mc_enc(56, 8, '\0');
+	cmd_params = (struct dprc_cmd_get_obj_region *)cmd.params;
+	cmd_params->obj_id = cpu_to_le32(obj_id);
+	cmd_params->region_index = region_index;
+	strncpy(cmd_params->obj_type, obj_type, 16);
+	cmd_params->obj_type[15] = '\0';
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -1361,8 +1214,9 @@ int dprc_get_obj_region(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	region_desc->base_offset = mc_dec(cmd.params[1], 0, 64);
-	region_desc->size = mc_dec(cmd.params[2], 0, 32);
+	rsp_params = (struct dprc_rsp_get_obj_region *)cmd.params;
+	region_desc->base_offset = le64_to_cpu(rsp_params->base_addr);
+	region_desc->size = le32_to_cpu(rsp_params->size);
 
 	return 0;
 }
@@ -1387,45 +1241,18 @@ int dprc_set_obj_label(struct fsl_mc_io *mc_io,
 		       char *label)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_set_obj_label *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_OBJ_LABEL,
 					  cmd_flags,
 					  token);
-
-	cmd.params[0] |= mc_enc(0, 32, obj_id);
-	cmd.params[1] |= mc_enc(0, 8, label[0]);
-	cmd.params[1] |= mc_enc(8, 8, label[1]);
-	cmd.params[1] |= mc_enc(16, 8, label[2]);
-	cmd.params[1] |= mc_enc(24, 8, label[3]);
-	cmd.params[1] |= mc_enc(32, 8, label[4]);
-	cmd.params[1] |= mc_enc(40, 8, label[5]);
-	cmd.params[1] |= mc_enc(48, 8, label[6]);
-	cmd.params[1] |= mc_enc(56, 8, label[7]);
-	cmd.params[2] |= mc_enc(0, 8, label[8]);
-	cmd.params[2] |= mc_enc(8, 8, label[9]);
-	cmd.params[2] |= mc_enc(16, 8, label[10]);
-	cmd.params[2] |= mc_enc(24, 8, label[11]);
-	cmd.params[2] |= mc_enc(32, 8, label[12]);
-	cmd.params[2] |= mc_enc(40, 8, label[13]);
-	cmd.params[2] |= mc_enc(48, 8, label[14]);
-	cmd.params[2] |= mc_enc(56, 8, label[15]);
-	cmd.params[3] |= mc_enc(0, 8, obj_type[0]);
-	cmd.params[3] |= mc_enc(8, 8, obj_type[1]);
-	cmd.params[3] |= mc_enc(16, 8, obj_type[2]);
-	cmd.params[3] |= mc_enc(24, 8, obj_type[3]);
-	cmd.params[3] |= mc_enc(32, 8, obj_type[4]);
-	cmd.params[3] |= mc_enc(40, 8, obj_type[5]);
-	cmd.params[3] |= mc_enc(48, 8, obj_type[6]);
-	cmd.params[3] |= mc_enc(56, 8, obj_type[7]);
-	cmd.params[4] |= mc_enc(0, 8, obj_type[8]);
-	cmd.params[4] |= mc_enc(8, 8, obj_type[9]);
-	cmd.params[4] |= mc_enc(16, 8, obj_type[10]);
-	cmd.params[4] |= mc_enc(24, 8, obj_type[11]);
-	cmd.params[4] |= mc_enc(32, 8, obj_type[12]);
-	cmd.params[4] |= mc_enc(40, 8, obj_type[13]);
-	cmd.params[4] |= mc_enc(48, 8, obj_type[14]);
-	cmd.params[4] |= mc_enc(56, 8, obj_type[15]);
+	cmd_params = (struct dprc_cmd_set_obj_label *)cmd.params;
+	cmd_params->obj_id = cpu_to_le32(obj_id);
+	strncpy(cmd_params->label, label, 16);
+	cmd_params->label[15] = '\0';
+	strncpy(cmd_params->obj_type, obj_type, 16);
+	cmd_params->obj_type[15] = '\0';
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -1453,49 +1280,23 @@ int dprc_connect(struct fsl_mc_io *mc_io,
 		 const struct dprc_connection_cfg *cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_connect *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_CONNECT,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(0, 32, endpoint1->id);
-	cmd.params[0] |= mc_enc(32, 32, endpoint1->if_id);
-	cmd.params[1] |= mc_enc(0, 32, endpoint2->id);
-	cmd.params[1] |= mc_enc(32, 32, endpoint2->if_id);
-	cmd.params[2] |= mc_enc(0, 8, endpoint1->type[0]);
-	cmd.params[2] |= mc_enc(8, 8, endpoint1->type[1]);
-	cmd.params[2] |= mc_enc(16, 8, endpoint1->type[2]);
-	cmd.params[2] |= mc_enc(24, 8, endpoint1->type[3]);
-	cmd.params[2] |= mc_enc(32, 8, endpoint1->type[4]);
-	cmd.params[2] |= mc_enc(40, 8, endpoint1->type[5]);
-	cmd.params[2] |= mc_enc(48, 8, endpoint1->type[6]);
-	cmd.params[2] |= mc_enc(56, 8, endpoint1->type[7]);
-	cmd.params[3] |= mc_enc(0, 8, endpoint1->type[8]);
-	cmd.params[3] |= mc_enc(8, 8, endpoint1->type[9]);
-	cmd.params[3] |= mc_enc(16, 8, endpoint1->type[10]);
-	cmd.params[3] |= mc_enc(24, 8, endpoint1->type[11]);
-	cmd.params[3] |= mc_enc(32, 8, endpoint1->type[12]);
-	cmd.params[3] |= mc_enc(40, 8, endpoint1->type[13]);
-	cmd.params[3] |= mc_enc(48, 8, endpoint1->type[14]);
-	cmd.params[3] |= mc_enc(56, 8, endpoint1->type[15]);
-	cmd.params[4] |= mc_enc(0, 32, cfg->max_rate);
-	cmd.params[4] |= mc_enc(32, 32, cfg->committed_rate);
-	cmd.params[5] |= mc_enc(0, 8, endpoint2->type[0]);
-	cmd.params[5] |= mc_enc(8, 8, endpoint2->type[1]);
-	cmd.params[5] |= mc_enc(16, 8, endpoint2->type[2]);
-	cmd.params[5] |= mc_enc(24, 8, endpoint2->type[3]);
-	cmd.params[5] |= mc_enc(32, 8, endpoint2->type[4]);
-	cmd.params[5] |= mc_enc(40, 8, endpoint2->type[5]);
-	cmd.params[5] |= mc_enc(48, 8, endpoint2->type[6]);
-	cmd.params[5] |= mc_enc(56, 8, endpoint2->type[7]);
-	cmd.params[6] |= mc_enc(0, 8, endpoint2->type[8]);
-	cmd.params[6] |= mc_enc(8, 8, endpoint2->type[9]);
-	cmd.params[6] |= mc_enc(16, 8, endpoint2->type[10]);
-	cmd.params[6] |= mc_enc(24, 8, endpoint2->type[11]);
-	cmd.params[6] |= mc_enc(32, 8, endpoint2->type[12]);
-	cmd.params[6] |= mc_enc(40, 8, endpoint2->type[13]);
-	cmd.params[6] |= mc_enc(48, 8, endpoint2->type[14]);
-	cmd.params[6] |= mc_enc(56, 8, endpoint2->type[15]);
+	cmd_params = (struct dprc_cmd_connect *)cmd.params;
+	cmd_params->ep1_id = cpu_to_le32(endpoint1->id);
+	cmd_params->ep1_interface_id = cpu_to_le32(endpoint1->if_id);
+	cmd_params->ep2_id = cpu_to_le32(endpoint2->id);
+	cmd_params->ep2_interface_id = cpu_to_le32(endpoint2->if_id);
+	strncpy(cmd_params->ep1_type, endpoint1->type, 16);
+	cmd_params->ep1_type[15] = '\0';
+	cmd_params->max_rate = cpu_to_le32(cfg->max_rate);
+	cmd_params->committed_rate = cpu_to_le32(cfg->committed_rate);
+	strncpy(cmd_params->ep2_type, endpoint2->type, 16);
+	cmd_params->ep2_type[15] = '\0';
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -1516,29 +1317,17 @@ int dprc_disconnect(struct fsl_mc_io *mc_io,
 		    const struct dprc_endpoint *endpoint)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_disconnect *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_DISCONNECT,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(0, 32, endpoint->id);
-	cmd.params[0] |= mc_enc(32, 32, endpoint->if_id);
-	cmd.params[1] |= mc_enc(0, 8, endpoint->type[0]);
-	cmd.params[1] |= mc_enc(8, 8, endpoint->type[1]);
-	cmd.params[1] |= mc_enc(16, 8, endpoint->type[2]);
-	cmd.params[1] |= mc_enc(24, 8, endpoint->type[3]);
-	cmd.params[1] |= mc_enc(32, 8, endpoint->type[4]);
-	cmd.params[1] |= mc_enc(40, 8, endpoint->type[5]);
-	cmd.params[1] |= mc_enc(48, 8, endpoint->type[6]);
-	cmd.params[1] |= mc_enc(56, 8, endpoint->type[7]);
-	cmd.params[2] |= mc_enc(0, 8, endpoint->type[8]);
-	cmd.params[2] |= mc_enc(8, 8, endpoint->type[9]);
-	cmd.params[2] |= mc_enc(16, 8, endpoint->type[10]);
-	cmd.params[2] |= mc_enc(24, 8, endpoint->type[11]);
-	cmd.params[2] |= mc_enc(32, 8, endpoint->type[12]);
-	cmd.params[2] |= mc_enc(40, 8, endpoint->type[13]);
-	cmd.params[2] |= mc_enc(48, 8, endpoint->type[14]);
-	cmd.params[2] |= mc_enc(56, 8, endpoint->type[15]);
+	cmd_params = (struct dprc_cmd_disconnect *)cmd.params;
+	cmd_params->id = cpu_to_le32(endpoint->id);
+	cmd_params->interface_id = cpu_to_le32(endpoint->if_id);
+	strncpy(cmd_params->type, endpoint->type, 16);
+	cmd_params->type[15] = '\0';
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -1567,30 +1356,19 @@ int dprc_get_connection(struct fsl_mc_io *mc_io,
 			int *state)
 {
 	struct mc_command cmd = { 0 };
+	struct dprc_cmd_get_connection *cmd_params;
+	struct dprc_rsp_get_connection *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_CONNECTION,
 					  cmd_flags,
 					  token);
-	cmd.params[0] |= mc_enc(0, 32, endpoint1->id);
-	cmd.params[0] |= mc_enc(32, 32, endpoint1->if_id);
-	cmd.params[1] |= mc_enc(0, 8, endpoint1->type[0]);
-	cmd.params[1] |= mc_enc(8, 8, endpoint1->type[1]);
-	cmd.params[1] |= mc_enc(16, 8, endpoint1->type[2]);
-	cmd.params[1] |= mc_enc(24, 8, endpoint1->type[3]);
-	cmd.params[1] |= mc_enc(32, 8, endpoint1->type[4]);
-	cmd.params[1] |= mc_enc(40, 8, endpoint1->type[5]);
-	cmd.params[1] |= mc_enc(48, 8, endpoint1->type[6]);
-	cmd.params[1] |= mc_enc(56, 8, endpoint1->type[7]);
-	cmd.params[2] |= mc_enc(0, 8, endpoint1->type[8]);
-	cmd.params[2] |= mc_enc(8, 8, endpoint1->type[9]);
-	cmd.params[2] |= mc_enc(16, 8, endpoint1->type[10]);
-	cmd.params[2] |= mc_enc(24, 8, endpoint1->type[11]);
-	cmd.params[2] |= mc_enc(32, 8, endpoint1->type[12]);
-	cmd.params[2] |= mc_enc(40, 8, endpoint1->type[13]);
-	cmd.params[2] |= mc_enc(48, 8, endpoint1->type[14]);
-	cmd.params[2] |= mc_enc(56, 8, endpoint1->type[15]);
+	cmd_params = (struct dprc_cmd_get_connection *)cmd.params;
+	cmd_params->ep1_id = cpu_to_le32(endpoint1->id);
+	cmd_params->ep1_interface_id = cpu_to_le32(endpoint1->if_id);
+	strncpy(cmd_params->ep1_type, endpoint1->type, 16);
+	cmd_params->ep1_type[15] = '\0';
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -1598,25 +1376,12 @@ int dprc_get_connection(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	endpoint2->id = mc_dec(cmd.params[3], 0, 32);
-	endpoint2->if_id = mc_dec(cmd.params[3], 32, 32);
-	endpoint2->type[0] = mc_dec(cmd.params[4], 0, 8);
-	endpoint2->type[1] = mc_dec(cmd.params[4], 8, 8);
-	endpoint2->type[2] = mc_dec(cmd.params[4], 16, 8);
-	endpoint2->type[3] = mc_dec(cmd.params[4], 24, 8);
-	endpoint2->type[4] = mc_dec(cmd.params[4], 32, 8);
-	endpoint2->type[5] = mc_dec(cmd.params[4], 40, 8);
-	endpoint2->type[6] = mc_dec(cmd.params[4], 48, 8);
-	endpoint2->type[7] = mc_dec(cmd.params[4], 56, 8);
-	endpoint2->type[8] = mc_dec(cmd.params[5], 0, 8);
-	endpoint2->type[9] = mc_dec(cmd.params[5], 8, 8);
-	endpoint2->type[10] = mc_dec(cmd.params[5], 16, 8);
-	endpoint2->type[11] = mc_dec(cmd.params[5], 24, 8);
-	endpoint2->type[12] = mc_dec(cmd.params[5], 32, 8);
-	endpoint2->type[13] = mc_dec(cmd.params[5], 40, 8);
-	endpoint2->type[14] = mc_dec(cmd.params[5], 48, 8);
-	endpoint2->type[15] = mc_dec(cmd.params[5], 56, 8);
-	*state = mc_dec(cmd.params[6], 0, 32);
+	rsp_params = (struct dprc_rsp_get_connection *)cmd.params;
+	endpoint2->id = le32_to_cpu(rsp_params->ep2_id);
+	endpoint2->if_id = le32_to_cpu(rsp_params->ep2_interface_id);
+	strncpy(endpoint2->type, rsp_params->ep2_type, 16);
+	endpoint2->type[15] = '\0';
+	*state = le32_to_cpu(rsp_params->state);
 
 	return 0;
 }
diff --git a/drivers/staging/fsl-mc/bus/mc-allocator.c b/drivers/staging/fsl-mc/bus/mc-allocator.c
index fb08f22a7f9c..e59d85060c7b 100644
--- a/drivers/staging/fsl-mc/bus/mc-allocator.c
+++ b/drivers/staging/fsl-mc/bus/mc-allocator.c
@@ -717,7 +717,7 @@ static int fsl_mc_allocator_remove(struct fsl_mc_device *mc_dev)
 	return 0;
 }
 
-static const struct fsl_mc_device_match_id match_id_table[] = {
+static const struct fsl_mc_device_id match_id_table[] = {
 	{
 	 .vendor = FSL_MC_VENDOR_FREESCALE,
 	 .obj_type = "dpbp",
diff --git a/drivers/staging/fsl-mc/bus/mc-bus.c b/drivers/staging/fsl-mc/bus/mc-bus.c
index 405364307561..db3afdbdf4ae 100644
--- a/drivers/staging/fsl-mc/bus/mc-bus.c
+++ b/drivers/staging/fsl-mc/bus/mc-bus.c
@@ -24,8 +24,6 @@
 
 static struct kmem_cache *mc_dev_cache;
 
-static bool fsl_mc_is_root_dprc(struct device *dev);
-
 /**
  * fsl_mc_bus_match - device to driver matching callback
  * @dev: the MC object device structure to match against
@@ -36,7 +34,7 @@ static bool fsl_mc_is_root_dprc(struct device *dev);
  */
 static int fsl_mc_bus_match(struct device *dev, struct device_driver *drv)
 {
-	const struct fsl_mc_device_match_id *id;
+	const struct fsl_mc_device_id *id;
 	struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
 	struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(drv);
 	bool found = false;
@@ -78,14 +76,45 @@ out:
  */
 static int fsl_mc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-	pr_debug("%s invoked\n", __func__);
+	struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
+
+	if (add_uevent_var(env, "MODALIAS=fsl-mc:v%08Xd%s",
+			   mc_dev->obj_desc.vendor,
+			   mc_dev->obj_desc.type))
+		return -ENOMEM;
+
 	return 0;
 }
 
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
+
+	return sprintf(buf, "fsl-mc:v%08Xd%s\n", mc_dev->obj_desc.vendor,
+		       mc_dev->obj_desc.type);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *fsl_mc_dev_attrs[] = {
+	&dev_attr_modalias.attr,
+	NULL,
+};
+
+static const struct attribute_group fsl_mc_dev_group = {
+	.attrs = fsl_mc_dev_attrs,
+};
+
+static const struct attribute_group *fsl_mc_dev_groups[] = {
+	&fsl_mc_dev_group,
+	NULL,
+};
+
 struct bus_type fsl_mc_bus_type = {
 	.name = "fsl-mc",
 	.match = fsl_mc_bus_match,
 	.uevent = fsl_mc_bus_uevent,
+	.dev_groups = fsl_mc_dev_groups,
 };
 EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
 
@@ -216,19 +245,6 @@ static void fsl_mc_get_root_dprc(struct device *dev,
 	}
 }
 
-/**
- * fsl_mc_is_root_dprc - function to check if a given device is a root dprc
- */
-static bool fsl_mc_is_root_dprc(struct device *dev)
-{
-	struct device *root_dprc_dev;
-
-	fsl_mc_get_root_dprc(dev, &root_dprc_dev);
-	if (!root_dprc_dev)
-		return false;
-	return dev == root_dprc_dev;
-}
-
 static int get_dprc_attr(struct fsl_mc_io *mc_io,
 			 int container_id, struct dprc_attributes *attr)
 {
@@ -392,6 +408,19 @@ error_cleanup_regions:
 	return error;
 }
 
+/**
+ * fsl_mc_is_root_dprc - function to check if a given device is a root dprc
+ */
+bool fsl_mc_is_root_dprc(struct device *dev)
+{
+	struct device *root_dprc_dev;
+
+	fsl_mc_get_root_dprc(dev, &root_dprc_dev);
+	if (!root_dprc_dev)
+		return false;
+	return dev == root_dprc_dev;
+}
+
 /**
  * Add a newly discovered MC object device to be visible in Linux
  */
@@ -550,10 +579,6 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev)
 
 	if (strcmp(mc_dev->obj_desc.type, "dprc") == 0) {
 		mc_bus = to_fsl_mc_bus(mc_dev);
-		if (mc_dev->mc_io) {
-			fsl_destroy_mc_io(mc_dev->mc_io);
-			mc_dev->mc_io = NULL;
-		}
 
 		if (fsl_mc_is_root_dprc(&mc_dev->dev)) {
 			if (atomic_read(&root_dprc_count) > 0)
@@ -781,6 +806,10 @@ static int fsl_mc_bus_remove(struct platform_device *pdev)
 		return -EINVAL;
 
 	fsl_mc_device_remove(mc->root_mc_bus_dev);
+
+	fsl_destroy_mc_io(mc->root_mc_bus_dev->mc_io);
+	mc->root_mc_bus_dev->mc_io = NULL;
+
 	dev_info(&pdev->dev, "Root MC bus device removed");
 	return 0;
 }
diff --git a/drivers/staging/fsl-mc/bus/mc-msi.c b/drivers/staging/fsl-mc/bus/mc-msi.c
index e202b2b88c63..c7be156ae5e0 100644
--- a/drivers/staging/fsl-mc/bus/mc-msi.c
+++ b/drivers/staging/fsl-mc/bus/mc-msi.c
@@ -20,11 +20,26 @@
 #include "../include/mc-sys.h"
 #include "dprc-cmd.h"
 
+/*
+ * Generate a unique ID identifying the interrupt (only used within the MSI
+ * irqdomain.  Combine the icid with the interrupt index.
+ */
+static irq_hw_number_t fsl_mc_domain_calc_hwirq(struct fsl_mc_device *dev,
+						struct msi_desc *desc)
+{
+	/*
+	 * Make the base hwirq value for ICID*10000 so it is readable
+	 * as a decimal value in /proc/interrupts.
+	 */
+	return (irq_hw_number_t)(desc->fsl_mc.msi_index + (dev->icid * 10000));
+}
+
 static void fsl_mc_msi_set_desc(msi_alloc_info_t *arg,
 				struct msi_desc *desc)
 {
 	arg->desc = desc;
-	arg->hwirq = (irq_hw_number_t)desc->fsl_mc.msi_index;
+	arg->hwirq = fsl_mc_domain_calc_hwirq(to_fsl_mc_device(desc->dev),
+					      desc);
 }
 
 static void fsl_mc_msi_update_dom_ops(struct msi_domain_info *info)
diff --git a/drivers/staging/fsl-mc/bus/mc-sys.c b/drivers/staging/fsl-mc/bus/mc-sys.c
index 810a611c1cb0..0c185abe665e 100644
--- a/drivers/staging/fsl-mc/bus/mc-sys.c
+++ b/drivers/staging/fsl-mc/bus/mc-sys.c
@@ -53,8 +53,20 @@
 #define MC_CMD_COMPLETION_POLLING_MIN_SLEEP_USECS    10
 #define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS    500
 
-#define MC_CMD_HDR_READ_CMDID(_hdr) \
-	((u16)mc_dec((_hdr), MC_CMD_HDR_CMDID_O, MC_CMD_HDR_CMDID_S))
+static enum mc_cmd_status mc_cmd_hdr_read_status(struct mc_command *cmd)
+{
+	struct mc_cmd_header *hdr = (struct mc_cmd_header *)&cmd->header;
+
+	return (enum mc_cmd_status)hdr->status;
+}
+
+static u16 mc_cmd_hdr_read_cmdid(struct mc_command *cmd)
+{
+	struct mc_cmd_header *hdr = (struct mc_cmd_header *)&cmd->header;
+	u16 cmd_id = le16_to_cpu(hdr->cmd_id);
+
+	return (cmd_id & MC_CMD_HDR_CMDID_MASK) >> MC_CMD_HDR_CMDID_SHIFT;
+}
 
 /**
  * Creates an MC I/O object
@@ -261,10 +273,11 @@ static inline void mc_write_command(struct mc_command __iomem *portal,
 
 	/* copy command parameters into the portal */
 	for (i = 0; i < MC_CMD_NUM_OF_PARAMS; i++)
-		writeq(cmd->params[i], &portal->params[i]);
+		__raw_writeq(cmd->params[i], &portal->params[i]);
+	__iowmb();
 
 	/* submit the command by writing the header */
-	writeq(cmd->header, &portal->header);
+	__raw_writeq(cmd->header, &portal->header);
 }
 
 /**
@@ -284,14 +297,17 @@ static inline enum mc_cmd_status mc_read_response(struct mc_command __iomem *
 	enum mc_cmd_status status;
 
 	/* Copy command response header from MC portal: */
-	resp->header = readq(&portal->header);
-	status = MC_CMD_HDR_READ_STATUS(resp->header);
+	__iormb();
+	resp->header = __raw_readq(&portal->header);
+	__iormb();
+	status = mc_cmd_hdr_read_status(resp);
 	if (status != MC_CMD_STATUS_OK)
 		return status;
 
 	/* Copy command response data from MC portal: */
 	for (i = 0; i < MC_CMD_NUM_OF_PARAMS; i++)
-		resp->params[i] = readq(&portal->params[i]);
+		resp->params[i] = __raw_readq(&portal->params[i]);
+	__iormb();
 
 	return status;
 }
@@ -331,10 +347,8 @@ static int mc_polling_wait_preemptible(struct fsl_mc_io *mc_io,
 			dev_dbg(mc_io->dev,
 				"MC command timed out (portal: %#llx, obj handle: %#x, command: %#x)\n",
 				 mc_io->portal_phys_addr,
-				 (unsigned int)
-					MC_CMD_HDR_READ_TOKEN(cmd->header),
-				 (unsigned int)
-					MC_CMD_HDR_READ_CMDID(cmd->header));
+				 (unsigned int)mc_cmd_hdr_read_token(cmd),
+				 (unsigned int)mc_cmd_hdr_read_cmdid(cmd));
 
 			return -ETIMEDOUT;
 		}
@@ -373,10 +387,8 @@ static int mc_polling_wait_atomic(struct fsl_mc_io *mc_io,
 			dev_dbg(mc_io->dev,
 				"MC command timed out (portal: %#llx, obj handle: %#x, command: %#x)\n",
 				 mc_io->portal_phys_addr,
-				 (unsigned int)
-					MC_CMD_HDR_READ_TOKEN(cmd->header),
-				 (unsigned int)
-					MC_CMD_HDR_READ_CMDID(cmd->header));
+				 (unsigned int)mc_cmd_hdr_read_token(cmd),
+				 (unsigned int)mc_cmd_hdr_read_cmdid(cmd));
 
 			return -ETIMEDOUT;
 		}
@@ -429,8 +441,8 @@ int mc_send_command(struct fsl_mc_io *mc_io, struct mc_command *cmd)
 		dev_dbg(mc_io->dev,
 			"MC command failed: portal: %#llx, obj handle: %#x, command: %#x, status: %s (%#x)\n",
 			 mc_io->portal_phys_addr,
-			 (unsigned int)MC_CMD_HDR_READ_TOKEN(cmd->header),
-			 (unsigned int)MC_CMD_HDR_READ_CMDID(cmd->header),
+			 (unsigned int)mc_cmd_hdr_read_token(cmd),
+			 (unsigned int)mc_cmd_hdr_read_cmdid(cmd),
 			 mc_status_to_string(status),
 			 (unsigned int)status);
 
diff --git a/drivers/staging/fsl-mc/include/dpbp-cmd.h b/drivers/staging/fsl-mc/include/dpbp-cmd.h
index c57b454a2912..4828ccd0cffd 100644
--- a/drivers/staging/fsl-mc/include/dpbp-cmd.h
+++ b/drivers/staging/fsl-mc/include/dpbp-cmd.h
@@ -1,4 +1,4 @@
-/* Copyright 2013-2014 Freescale Semiconductor Inc.
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -59,4 +59,127 @@
 
 #define DPBP_CMDID_SET_NOTIFICATIONS		0x01b0
 #define DPBP_CMDID_GET_NOTIFICATIONS		0x01b1
+
+struct dpbp_cmd_open {
+	__le32 dpbp_id;
+};
+
+#define DPBP_ENABLE			0x1
+
+struct dpbp_rsp_is_enabled {
+	u8 enabled;
+};
+
+struct dpbp_cmd_set_irq {
+	/* cmd word 0 */
+	u8 irq_index;
+	u8 pad[3];
+	__le32 irq_val;
+	/* cmd word 1 */
+	__le64 irq_addr;
+	/* cmd word 2 */
+	__le32 irq_num;
+};
+
+struct dpbp_cmd_get_irq {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpbp_rsp_get_irq {
+	/* response word 0 */
+	__le32 irq_val;
+	__le32 pad;
+	/* response word 1 */
+	__le64 irq_addr;
+	/* response word 2 */
+	__le32 irq_num;
+	__le32 type;
+};
+
+struct dpbp_cmd_set_irq_enable {
+	u8 enable;
+	u8 pad[3];
+	u8 irq_index;
+};
+
+struct dpbp_cmd_get_irq_enable {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpbp_rsp_get_irq_enable {
+	u8 enabled;
+};
+
+struct dpbp_cmd_set_irq_mask {
+	__le32 mask;
+	u8 irq_index;
+};
+
+struct dpbp_cmd_get_irq_mask {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpbp_rsp_get_irq_mask {
+	__le32 mask;
+};
+
+struct dpbp_cmd_get_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dpbp_rsp_get_irq_status {
+	__le32 status;
+};
+
+struct dpbp_cmd_clear_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dpbp_rsp_get_attributes {
+	/* response word 0 */
+	__le16 pad;
+	__le16 bpid;
+	__le32 id;
+	/* response word 1 */
+	__le16 version_major;
+	__le16 version_minor;
+};
+
+struct dpbp_cmd_set_notifications {
+	/* cmd word 0 */
+	__le32 depletion_entry;
+	__le32 depletion_exit;
+	/* cmd word 1 */
+	__le32 surplus_entry;
+	__le32 surplus_exit;
+	/* cmd word 2 */
+	__le16 options;
+	__le16 pad[3];
+	/* cmd word 3 */
+	__le64 message_ctx;
+	/* cmd word 4 */
+	__le64 message_iova;
+};
+
+struct dpbp_rsp_get_notifications {
+	/* response word 0 */
+	__le32 depletion_entry;
+	__le32 depletion_exit;
+	/* response word 1 */
+	__le32 surplus_entry;
+	__le32 surplus_exit;
+	/* response word 2 */
+	__le16 options;
+	__le16 pad[3];
+	/* response word 3 */
+	__le64 message_ctx;
+	/* response word 4 */
+	__le64 message_iova;
+};
+
 #endif /* _FSL_DPBP_CMD_H */
diff --git a/drivers/staging/fsl-mc/include/mc-cmd.h b/drivers/staging/fsl-mc/include/mc-cmd.h
index 65277e3de44d..5decb9890c31 100644
--- a/drivers/staging/fsl-mc/include/mc-cmd.h
+++ b/drivers/staging/fsl-mc/include/mc-cmd.h
@@ -34,18 +34,14 @@
 
 #define MC_CMD_NUM_OF_PARAMS	7
 
-#define MAKE_UMASK64(_width) \
-	((u64)((_width) < 64 ? ((u64)1 << (_width)) - 1 : -1))
-
-static inline u64 mc_enc(int lsoffset, int width, u64 val)
-{
-	return (u64)(((u64)val & MAKE_UMASK64(width)) << lsoffset);
-}
-
-static inline u64 mc_dec(u64 val, int lsoffset, int width)
-{
-	return (u64)((val >> lsoffset) & MAKE_UMASK64(width));
-}
+struct mc_cmd_header {
+	u8 src_id;
+	u8 flags_hw;
+	u8 status;
+	u8 flags_sw;
+	__le16 token;
+	__le16 cmd_id;
+};
 
 struct mc_command {
 	u64 header;
@@ -72,60 +68,41 @@ enum mc_cmd_status {
  */
 
 /* High priority flag */
-#define MC_CMD_FLAG_PRI		0x00008000
+#define MC_CMD_FLAG_PRI		0x80
 /* Command completion flag */
-#define MC_CMD_FLAG_INTR_DIS	0x01000000
-
-/*
- * TODO Remove following two defines after completion of flib 8.0.0
- * integration
- */
-#define MC_CMD_PRI_LOW		0 /*!< Low Priority command indication */
-#define MC_CMD_PRI_HIGH		1 /*!< High Priority command indication */
-
-#define MC_CMD_HDR_CMDID_O	52	/* Command ID field offset */
-#define MC_CMD_HDR_CMDID_S	12	/* Command ID field size */
-#define MC_CMD_HDR_TOKEN_O	38	/* Token field offset */
-#define MC_CMD_HDR_TOKEN_S	10	/* Token field size */
-#define MC_CMD_HDR_STATUS_O	16	/* Status field offset */
-#define MC_CMD_HDR_STATUS_S	8	/* Status field size*/
-#define MC_CMD_HDR_FLAGS_O	0	/* Flags field offset */
-#define MC_CMD_HDR_FLAGS_S	32	/* Flags field size*/
-#define MC_CMD_HDR_FLAGS_MASK	0xFF00FF00 /* Command flags mask */
-
-#define MC_CMD_HDR_READ_STATUS(_hdr) \
-	((enum mc_cmd_status)mc_dec((_hdr), \
-		MC_CMD_HDR_STATUS_O, MC_CMD_HDR_STATUS_S))
-
-#define MC_CMD_HDR_READ_TOKEN(_hdr) \
-	((u16)mc_dec((_hdr), MC_CMD_HDR_TOKEN_O, MC_CMD_HDR_TOKEN_S))
-
-#define MC_CMD_HDR_READ_FLAGS(_hdr) \
-	((u32)mc_dec((_hdr), MC_CMD_HDR_FLAGS_O, MC_CMD_HDR_FLAGS_S))
+#define MC_CMD_FLAG_INTR_DIS	0x01
 
-#define MC_EXT_OP(_ext, _param, _offset, _width, _type, _arg) \
-	((_ext)[_param] |= mc_enc((_offset), (_width), _arg))
-
-#define MC_CMD_OP(_cmd, _param, _offset, _width, _type, _arg) \
-	((_cmd).params[_param] |= mc_enc((_offset), (_width), _arg))
-
-#define MC_RSP_OP(_cmd, _param, _offset, _width, _type, _arg) \
-	(_arg = (_type)mc_dec(_cmd.params[_param], (_offset), (_width)))
+#define MC_CMD_HDR_CMDID_MASK		0xFFF0
+#define MC_CMD_HDR_CMDID_SHIFT		4
+#define MC_CMD_HDR_TOKEN_MASK		0xFFC0
+#define MC_CMD_HDR_TOKEN_SHIFT		6
 
 static inline u64 mc_encode_cmd_header(u16 cmd_id,
 				       u32 cmd_flags,
 				       u16 token)
 {
-	u64 hdr;
+	u64 header = 0;
+	struct mc_cmd_header *hdr = (struct mc_cmd_header *)&header;
+
+	hdr->cmd_id = cpu_to_le16((cmd_id << MC_CMD_HDR_CMDID_SHIFT) &
+				  MC_CMD_HDR_CMDID_MASK);
+	hdr->token = cpu_to_le16((token << MC_CMD_HDR_TOKEN_SHIFT) &
+				 MC_CMD_HDR_TOKEN_MASK);
+	hdr->status = MC_CMD_STATUS_READY;
+	if (cmd_flags & MC_CMD_FLAG_PRI)
+		hdr->flags_hw = MC_CMD_FLAG_PRI;
+	if (cmd_flags & MC_CMD_FLAG_INTR_DIS)
+		hdr->flags_sw = MC_CMD_FLAG_INTR_DIS;
+
+	return header;
+}
 
-	hdr = mc_enc(MC_CMD_HDR_CMDID_O, MC_CMD_HDR_CMDID_S, cmd_id);
-	hdr |= mc_enc(MC_CMD_HDR_FLAGS_O, MC_CMD_HDR_FLAGS_S,
-		       (cmd_flags & MC_CMD_HDR_FLAGS_MASK));
-	hdr |= mc_enc(MC_CMD_HDR_TOKEN_O, MC_CMD_HDR_TOKEN_S, token);
-	hdr |= mc_enc(MC_CMD_HDR_STATUS_O, MC_CMD_HDR_STATUS_S,
-		       MC_CMD_STATUS_READY);
+static inline u16 mc_cmd_hdr_read_token(struct mc_command *cmd)
+{
+	struct mc_cmd_header *hdr = (struct mc_cmd_header *)&cmd->header;
+	u16 token = le16_to_cpu(hdr->token);
 
-	return hdr;
+	return (token & MC_CMD_HDR_TOKEN_MASK) >> MC_CMD_HDR_TOKEN_SHIFT;
 }
 
 #endif /* __FSL_MC_CMD_H */
diff --git a/drivers/staging/fsl-mc/include/mc.h b/drivers/staging/fsl-mc/include/mc.h
index ac7c1ce68c03..853cbf38a400 100644
--- a/drivers/staging/fsl-mc/include/mc.h
+++ b/drivers/staging/fsl-mc/include/mc.h
@@ -39,7 +39,7 @@ struct fsl_mc_bus;
  */
 struct fsl_mc_driver {
 	struct device_driver driver;
-	const struct fsl_mc_device_match_id *match_id_table;
+	const struct fsl_mc_device_id *match_id_table;
 	int (*probe)(struct fsl_mc_device *dev);
 	int (*remove)(struct fsl_mc_device *dev);
 	void (*shutdown)(struct fsl_mc_device *dev);
@@ -50,23 +50,6 @@ struct fsl_mc_driver {
 #define to_fsl_mc_driver(_drv) \
 	container_of(_drv, struct fsl_mc_driver, driver)
 
-/**
- * struct fsl_mc_device_match_id - MC object device Id entry for driver matching
- * @vendor: vendor ID
- * @obj_type: MC object type
- * @ver_major: MC object version major number
- * @ver_minor: MC object version minor number
- *
- * Type of entries in the "device Id" table for MC object devices supported by
- * a MC object device driver. The last entry of the table has vendor set to 0x0
- */
-struct fsl_mc_device_match_id {
-	u16 vendor;
-	const char obj_type[16];
-	u32 ver_major;
-	u32 ver_minor;
-};
-
 /**
  * enum fsl_mc_pool_type - Types of allocatable MC bus resources
  *
@@ -224,6 +207,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev);
 
 void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
 
+bool fsl_mc_is_root_dprc(struct device *dev);
+
 extern struct bus_type fsl_mc_bus_type;
 
 #endif /* _FSL_MC_H_ */
diff --git a/drivers/staging/iio/accel/Kconfig b/drivers/staging/iio/accel/Kconfig
index f066aa30f0ac..1c994b57c7d2 100644
--- a/drivers/staging/iio/accel/Kconfig
+++ b/drivers/staging/iio/accel/Kconfig
@@ -51,20 +51,6 @@ config ADIS16240
 	  To compile this driver as a module, say M here: the module will be
 	  called adis16240.
 
-config LIS3L02DQ
-	tristate "ST Microelectronics LIS3L02DQ Accelerometer Driver"
-	depends on SPI
-	select IIO_TRIGGER if IIO_BUFFER
-	depends on !IIO_BUFFER || IIO_KFIFO_BUF
-	depends on GPIOLIB || COMPILE_TEST
-	help
-	  Say Y here to build SPI support for the ST microelectronics
-	  accelerometer. The driver supplies direct access via sysfs files
-	  and an event interface via a character device.
-
-	  To compile this driver as a module, say M here: the module will be
-	  called lis3l02dq.
-
 config SCA3000
 	depends on IIO_BUFFER
 	depends on SPI
diff --git a/drivers/staging/iio/accel/Makefile b/drivers/staging/iio/accel/Makefile
index 415329c96f0c..1810a434a755 100644
--- a/drivers/staging/iio/accel/Makefile
+++ b/drivers/staging/iio/accel/Makefile
@@ -14,9 +14,5 @@ obj-$(CONFIG_ADIS16209) += adis16209.o
 adis16240-y             := adis16240_core.o
 obj-$(CONFIG_ADIS16240) += adis16240.o
 
-lis3l02dq-y		:= lis3l02dq_core.o
-lis3l02dq-$(CONFIG_IIO_BUFFER) += lis3l02dq_ring.o
-obj-$(CONFIG_LIS3L02DQ)	+= lis3l02dq.o
-
 sca3000-y		:= sca3000_core.o sca3000_ring.o
 obj-$(CONFIG_SCA3000)	+= sca3000.o
diff --git a/drivers/staging/iio/accel/lis3l02dq.h b/drivers/staging/iio/accel/lis3l02dq.h
deleted file mode 100644
index 6bd3d4d5bc9d..000000000000
--- a/drivers/staging/iio/accel/lis3l02dq.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * LISL02DQ.h -- support STMicroelectronics LISD02DQ
- *               3d 2g Linear Accelerometers via SPI
- *
- * Copyright (c) 2007 Jonathan Cameron <jic23@kernel.org>
- *
- * Loosely based upon tle62x0.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef SPI_LIS3L02DQ_H_
-#define SPI_LIS3L02DQ_H_
-#define LIS3L02DQ_READ_REG(a) ((a) | 0x80)
-#define LIS3L02DQ_WRITE_REG(a) a
-
-/* Calibration parameters */
-#define LIS3L02DQ_REG_OFFSET_X_ADDR		0x16
-#define LIS3L02DQ_REG_OFFSET_Y_ADDR		0x17
-#define LIS3L02DQ_REG_OFFSET_Z_ADDR		0x18
-
-#define LIS3L02DQ_REG_GAIN_X_ADDR		0x19
-#define LIS3L02DQ_REG_GAIN_Y_ADDR		0x1A
-#define LIS3L02DQ_REG_GAIN_Z_ADDR		0x1B
-
-/* Control Register (1 of 2) */
-#define LIS3L02DQ_REG_CTRL_1_ADDR		0x20
-/* Power ctrl - either bit set corresponds to on*/
-#define LIS3L02DQ_REG_CTRL_1_PD_ON		0xC0
-
-/* Decimation Factor  */
-#define LIS3L02DQ_DEC_MASK			0x30
-#define LIS3L02DQ_REG_CTRL_1_DF_128		0x00
-#define LIS3L02DQ_REG_CTRL_1_DF_64		0x10
-#define LIS3L02DQ_REG_CTRL_1_DF_32		0x20
-#define LIS3L02DQ_REG_CTRL_1_DF_8		(0x10 | 0x20)
-
-/* Self Test Enable */
-#define LIS3L02DQ_REG_CTRL_1_SELF_TEST_ON	0x08
-
-/* Axes enable ctrls */
-#define LIS3L02DQ_REG_CTRL_1_AXES_Z_ENABLE	0x04
-#define LIS3L02DQ_REG_CTRL_1_AXES_Y_ENABLE	0x02
-#define LIS3L02DQ_REG_CTRL_1_AXES_X_ENABLE	0x01
-
-/* Control Register (2 of 2) */
-#define LIS3L02DQ_REG_CTRL_2_ADDR		0x21
-
-/* Block Data Update only after MSB and LSB read */
-#define LIS3L02DQ_REG_CTRL_2_BLOCK_UPDATE	0x40
-
-/* Set to big endian output */
-#define LIS3L02DQ_REG_CTRL_2_BIG_ENDIAN		0x20
-
-/* Reboot memory content */
-#define LIS3L02DQ_REG_CTRL_2_REBOOT_MEMORY	0x10
-
-/* Interrupt Enable - applies data ready to the RDY pad */
-#define LIS3L02DQ_REG_CTRL_2_ENABLE_INTERRUPT	0x08
-
-/* Enable Data Ready Generation - relationship with previous unclear in docs */
-#define LIS3L02DQ_REG_CTRL_2_ENABLE_DATA_READY_GENERATION 0x04
-
-/* SPI 3 wire mode */
-#define LIS3L02DQ_REG_CTRL_2_THREE_WIRE_SPI_MODE	0x02
-
-/* Data alignment, default is 12 bit right justified
- * - option for 16 bit left justified
- */
-#define LIS3L02DQ_REG_CTRL_2_DATA_ALIGNMENT_16_BIT_LEFT_JUSTIFIED	0x01
-
-/* Interrupt related stuff */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_ADDR			0x23
-
-/* Switch from or combination of conditions to and */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_BOOLEAN_AND		0x80
-
-/* Latch interrupt request,
- * if on ack must be given by reading the ack register
- */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_LATCH_SRC		0x40
-
-/* Z Interrupt on High (above threshold) */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_INTERRUPT_Z_HIGH	0x20
-/* Z Interrupt on Low */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_INTERRUPT_Z_LOW	0x10
-/* Y Interrupt on High */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_INTERRUPT_Y_HIGH	0x08
-/* Y Interrupt on Low */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_INTERRUPT_Y_LOW	0x04
-/* X Interrupt on High */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_INTERRUPT_X_HIGH	0x02
-/* X Interrupt on Low */
-#define LIS3L02DQ_REG_WAKE_UP_CFG_INTERRUPT_X_LOW 0x01
-
-/* Register that gives description of what caused interrupt
- * - latched if set in CFG_ADDRES
- */
-#define LIS3L02DQ_REG_WAKE_UP_SRC_ADDR			0x24
-/* top bit ignored */
-/* Interrupt Active */
-#define LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_ACTIVATED	0x40
-/* Interupts that have been triggered */
-#define LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_Z_HIGH	0x20
-#define LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_Z_LOW	0x10
-#define LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_Y_HIGH	0x08
-#define LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_Y_LOW	0x04
-#define LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_X_HIGH	0x02
-#define LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_X_LOW	0x01
-
-#define LIS3L02DQ_REG_WAKE_UP_ACK_ADDR			0x25
-
-/* Status register */
-#define LIS3L02DQ_REG_STATUS_ADDR			0x27
-/* XYZ axis data overrun - first is all overrun? */
-#define LIS3L02DQ_REG_STATUS_XYZ_OVERRUN		0x80
-#define LIS3L02DQ_REG_STATUS_Z_OVERRUN			0x40
-#define LIS3L02DQ_REG_STATUS_Y_OVERRUN			0x20
-#define LIS3L02DQ_REG_STATUS_X_OVERRUN			0x10
-/* XYZ new data available - first is all 3 available? */
-#define LIS3L02DQ_REG_STATUS_XYZ_NEW_DATA		0x08
-#define LIS3L02DQ_REG_STATUS_Z_NEW_DATA			0x04
-#define LIS3L02DQ_REG_STATUS_Y_NEW_DATA			0x02
-#define LIS3L02DQ_REG_STATUS_X_NEW_DATA			0x01
-
-/* The accelerometer readings - low and high bytes.
- * Form of high byte dependent on justification set in ctrl reg
- */
-#define LIS3L02DQ_REG_OUT_X_L_ADDR			0x28
-#define LIS3L02DQ_REG_OUT_X_H_ADDR			0x29
-#define LIS3L02DQ_REG_OUT_Y_L_ADDR			0x2A
-#define LIS3L02DQ_REG_OUT_Y_H_ADDR			0x2B
-#define LIS3L02DQ_REG_OUT_Z_L_ADDR			0x2C
-#define LIS3L02DQ_REG_OUT_Z_H_ADDR			0x2D
-
-/* Threshold values for all axes and both above and below thresholds
- * - i.e. there is only one value
- */
-#define LIS3L02DQ_REG_THS_L_ADDR			0x2E
-#define LIS3L02DQ_REG_THS_H_ADDR			0x2F
-
-#define LIS3L02DQ_DEFAULT_CTRL1 (LIS3L02DQ_REG_CTRL_1_PD_ON	      \
-				 | LIS3L02DQ_REG_CTRL_1_AXES_Z_ENABLE \
-				 | LIS3L02DQ_REG_CTRL_1_AXES_Y_ENABLE \
-				 | LIS3L02DQ_REG_CTRL_1_AXES_X_ENABLE \
-				 | LIS3L02DQ_REG_CTRL_1_DF_128)
-
-#define LIS3L02DQ_DEFAULT_CTRL2	0
-
-#define LIS3L02DQ_MAX_TX 12
-#define LIS3L02DQ_MAX_RX 12
-/**
- * struct lis3l02dq_state - device instance specific data
- * @us:			actual spi_device
- * @trig:		data ready trigger registered with iio
- * @buf_lock:		mutex to protect tx and rx
- * @tx:			transmit buffer
- * @rx:			receive buffer
- **/
-struct lis3l02dq_state {
-	struct spi_device		*us;
-	struct iio_trigger		*trig;
-	struct mutex			buf_lock;
-	int				gpio;
-	bool				trigger_on;
-
-	u8	tx[LIS3L02DQ_MAX_RX] ____cacheline_aligned;
-	u8	rx[LIS3L02DQ_MAX_RX] ____cacheline_aligned;
-};
-
-int lis3l02dq_spi_read_reg_8(struct iio_dev *indio_dev,
-			     u8 reg_address,
-			     u8 *val);
-
-int lis3l02dq_spi_write_reg_8(struct iio_dev *indio_dev,
-			      u8 reg_address,
-			      u8 val);
-
-int lis3l02dq_disable_all_events(struct iio_dev *indio_dev);
-
-#ifdef CONFIG_IIO_BUFFER
-/* At the moment triggers are only used for buffer
- * filling. This may change!
- */
-void lis3l02dq_remove_trigger(struct iio_dev *indio_dev);
-int lis3l02dq_probe_trigger(struct iio_dev *indio_dev);
-
-int lis3l02dq_configure_buffer(struct iio_dev *indio_dev);
-void lis3l02dq_unconfigure_buffer(struct iio_dev *indio_dev);
-
-irqreturn_t lis3l02dq_data_rdy_trig_poll(int irq, void *private);
-#define lis3l02dq_th lis3l02dq_data_rdy_trig_poll
-
-#else /* CONFIG_IIO_BUFFER */
-#define lis3l02dq_th lis3l02dq_nobuffer
-
-static inline void lis3l02dq_remove_trigger(struct iio_dev *indio_dev)
-{
-}
-
-static inline int lis3l02dq_probe_trigger(struct iio_dev *indio_dev)
-{
-	return 0;
-}
-
-static int lis3l02dq_configure_buffer(struct iio_dev *indio_dev)
-{
-	return 0;
-}
-
-static inline void lis3l02dq_unconfigure_buffer(struct iio_dev *indio_dev)
-{
-}
-#endif /* CONFIG_IIO_BUFFER */
-#endif /* SPI_LIS3L02DQ_H_ */
diff --git a/drivers/staging/iio/accel/lis3l02dq_core.c b/drivers/staging/iio/accel/lis3l02dq_core.c
deleted file mode 100644
index 7a6fed3f2d3f..000000000000
--- a/drivers/staging/iio/accel/lis3l02dq_core.c
+++ /dev/null
@@ -1,814 +0,0 @@
-/*
- * lis3l02dq.c	support STMicroelectronics LISD02DQ
- *		3d 2g Linear Accelerometers via SPI
- *
- * Copyright (c) 2007 Jonathan Cameron <jic23@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Settings:
- * 16 bit left justified mode used.
- */
-
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
-#include <linux/mutex.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/spi/spi.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
-#include <linux/module.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/sysfs.h>
-#include <linux/iio/events.h>
-#include <linux/iio/buffer.h>
-
-#include "lis3l02dq.h"
-
-/* At the moment the spi framework doesn't allow global setting of cs_change.
- * It's in the likely to be added comment at the top of spi.h.
- * This means that use cannot be made of spi_write etc.
- */
-/* direct copy of the irq_default_primary_handler */
-#ifndef CONFIG_IIO_BUFFER
-static irqreturn_t lis3l02dq_nobuffer(int irq, void *private)
-{
-	return IRQ_WAKE_THREAD;
-}
-#endif
-
-/**
- * lis3l02dq_spi_read_reg_8() - read single byte from a single register
- * @indio_dev: iio_dev for this actual device
- * @reg_address: the address of the register to be read
- * @val: pass back the resulting value
- **/
-int lis3l02dq_spi_read_reg_8(struct iio_dev *indio_dev,
-			     u8 reg_address, u8 *val)
-{
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-	int ret;
-	struct spi_transfer xfer = {
-		.tx_buf = st->tx,
-		.rx_buf = st->rx,
-		.bits_per_word = 8,
-		.len = 2,
-	};
-
-	mutex_lock(&st->buf_lock);
-	st->tx[0] = LIS3L02DQ_READ_REG(reg_address);
-	st->tx[1] = 0;
-
-	ret = spi_sync_transfer(st->us, &xfer, 1);
-	*val = st->rx[1];
-	mutex_unlock(&st->buf_lock);
-
-	return ret;
-}
-
-/**
- * lis3l02dq_spi_write_reg_8() - write single byte to a register
- * @indio_dev: iio_dev for this device
- * @reg_address: the address of the register to be written
- * @val: the value to write
- **/
-int lis3l02dq_spi_write_reg_8(struct iio_dev *indio_dev,
-			      u8 reg_address,
-			      u8 val)
-{
-	int ret;
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-
-	mutex_lock(&st->buf_lock);
-	st->tx[0] = LIS3L02DQ_WRITE_REG(reg_address);
-	st->tx[1] = val;
-	ret = spi_write(st->us, st->tx, 2);
-	mutex_unlock(&st->buf_lock);
-
-	return ret;
-}
-
-/**
- * lisl302dq_spi_write_reg_s16() - write 2 bytes to a pair of registers
- * @indio_dev: iio_dev for this device
- * @lower_reg_address: the address of the lower of the two registers.
- *               Second register is assumed to have address one greater.
- * @value: value to be written
- **/
-static int lis3l02dq_spi_write_reg_s16(struct iio_dev *indio_dev,
-				       u8 lower_reg_address,
-				       s16 value)
-{
-	int ret;
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-	struct spi_transfer xfers[] = { {
-			.tx_buf = st->tx,
-			.bits_per_word = 8,
-			.len = 2,
-			.cs_change = 1,
-		}, {
-			.tx_buf = st->tx + 2,
-			.bits_per_word = 8,
-			.len = 2,
-		},
-	};
-
-	mutex_lock(&st->buf_lock);
-	st->tx[0] = LIS3L02DQ_WRITE_REG(lower_reg_address);
-	st->tx[1] = value & 0xFF;
-	st->tx[2] = LIS3L02DQ_WRITE_REG(lower_reg_address + 1);
-	st->tx[3] = (value >> 8) & 0xFF;
-
-	ret = spi_sync_transfer(st->us, xfers, ARRAY_SIZE(xfers));
-	mutex_unlock(&st->buf_lock);
-
-	return ret;
-}
-
-static int lis3l02dq_read_reg_s16(struct iio_dev *indio_dev,
-				  u8 lower_reg_address,
-				  int *val)
-{
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-	int ret;
-	s16 tempval;
-	struct spi_transfer xfers[] = { {
-			.tx_buf = st->tx,
-			.rx_buf = st->rx,
-			.bits_per_word = 8,
-			.len = 2,
-			.cs_change = 1,
-		}, {
-			.tx_buf = st->tx + 2,
-			.rx_buf = st->rx + 2,
-			.bits_per_word = 8,
-			.len = 2,
-		},
-	};
-
-	mutex_lock(&st->buf_lock);
-	st->tx[0] = LIS3L02DQ_READ_REG(lower_reg_address);
-	st->tx[1] = 0;
-	st->tx[2] = LIS3L02DQ_READ_REG(lower_reg_address + 1);
-	st->tx[3] = 0;
-
-	ret = spi_sync_transfer(st->us, xfers, ARRAY_SIZE(xfers));
-	if (ret) {
-		dev_err(&st->us->dev, "problem when reading 16 bit register");
-		goto error_ret;
-	}
-	tempval = (s16)(st->rx[1]) | ((s16)(st->rx[3]) << 8);
-
-	*val = tempval;
-error_ret:
-	mutex_unlock(&st->buf_lock);
-	return ret;
-}
-
-enum lis3l02dq_rm_ind {
-	LIS3L02DQ_ACCEL,
-	LIS3L02DQ_GAIN,
-	LIS3L02DQ_BIAS,
-};
-
-static u8 lis3l02dq_axis_map[3][3] = {
-	[LIS3L02DQ_ACCEL] = { LIS3L02DQ_REG_OUT_X_L_ADDR,
-			      LIS3L02DQ_REG_OUT_Y_L_ADDR,
-			      LIS3L02DQ_REG_OUT_Z_L_ADDR },
-	[LIS3L02DQ_GAIN] = { LIS3L02DQ_REG_GAIN_X_ADDR,
-			     LIS3L02DQ_REG_GAIN_Y_ADDR,
-			     LIS3L02DQ_REG_GAIN_Z_ADDR },
-	[LIS3L02DQ_BIAS] = { LIS3L02DQ_REG_OFFSET_X_ADDR,
-			     LIS3L02DQ_REG_OFFSET_Y_ADDR,
-			     LIS3L02DQ_REG_OFFSET_Z_ADDR }
-};
-
-static int lis3l02dq_read_thresh(struct iio_dev *indio_dev,
-				 const struct iio_chan_spec *chan,
-				 enum iio_event_type type,
-				 enum iio_event_direction dir,
-				 enum iio_event_info info,
-				 int *val, int *val2)
-{
-	int ret;
-
-	ret = lis3l02dq_read_reg_s16(indio_dev, LIS3L02DQ_REG_THS_L_ADDR, val);
-	if (ret)
-		return ret;
-	return IIO_VAL_INT;
-}
-
-static int lis3l02dq_write_thresh(struct iio_dev *indio_dev,
-				  const struct iio_chan_spec *chan,
-				  enum iio_event_type type,
-				  enum iio_event_direction dir,
-				  enum iio_event_info info,
-				  int val, int val2)
-{
-	u16 value = val;
-
-	return lis3l02dq_spi_write_reg_s16(indio_dev,
-					   LIS3L02DQ_REG_THS_L_ADDR,
-					   value);
-}
-
-static int lis3l02dq_write_raw(struct iio_dev *indio_dev,
-			       struct iio_chan_spec const *chan,
-			       int val,
-			       int val2,
-			       long mask)
-{
-	int ret = -EINVAL, reg;
-	u8 uval;
-	s8 sval;
-
-	switch (mask) {
-	case IIO_CHAN_INFO_CALIBBIAS:
-		if (val > 255 || val < -256)
-			return -EINVAL;
-		sval = val;
-		reg = lis3l02dq_axis_map[LIS3L02DQ_BIAS][chan->address];
-		ret = lis3l02dq_spi_write_reg_8(indio_dev, reg, sval);
-		break;
-	case IIO_CHAN_INFO_CALIBSCALE:
-		if (val & ~0xFF)
-			return -EINVAL;
-		uval = val;
-		reg = lis3l02dq_axis_map[LIS3L02DQ_GAIN][chan->address];
-		ret = lis3l02dq_spi_write_reg_8(indio_dev, reg, uval);
-		break;
-	}
-	return ret;
-}
-
-static int lis3l02dq_read_raw(struct iio_dev *indio_dev,
-			      struct iio_chan_spec const *chan,
-			      int *val,
-			      int *val2,
-			      long mask)
-{
-	u8 utemp;
-	s8 stemp;
-	ssize_t ret = 0;
-	u8 reg;
-
-	switch (mask) {
-	case IIO_CHAN_INFO_RAW:
-		/* Take the iio_dev status lock */
-		mutex_lock(&indio_dev->mlock);
-		if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) {
-			ret = -EBUSY;
-		} else {
-			reg = lis3l02dq_axis_map
-				[LIS3L02DQ_ACCEL][chan->address];
-			ret = lis3l02dq_read_reg_s16(indio_dev, reg, val);
-		}
-		mutex_unlock(&indio_dev->mlock);
-		if (ret < 0)
-			goto error_ret;
-		return IIO_VAL_INT;
-	case IIO_CHAN_INFO_SCALE:
-		*val = 0;
-		*val2 = 9580;
-		return IIO_VAL_INT_PLUS_MICRO;
-	case IIO_CHAN_INFO_CALIBSCALE:
-		reg = lis3l02dq_axis_map[LIS3L02DQ_GAIN][chan->address];
-		ret = lis3l02dq_spi_read_reg_8(indio_dev, reg, &utemp);
-		if (ret)
-			goto error_ret;
-		/* to match with what previous code does */
-		*val = utemp;
-		return IIO_VAL_INT;
-
-	case IIO_CHAN_INFO_CALIBBIAS:
-		reg = lis3l02dq_axis_map[LIS3L02DQ_BIAS][chan->address];
-		ret = lis3l02dq_spi_read_reg_8(indio_dev, reg, (u8 *)&stemp);
-		/* to match with what previous code does */
-		*val = stemp;
-		return IIO_VAL_INT;
-	}
-error_ret:
-	return ret;
-}
-
-static ssize_t lis3l02dq_read_frequency(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	int ret, len = 0;
-	s8 t;
-
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_CTRL_1_ADDR,
-				       (u8 *)&t);
-	if (ret)
-		return ret;
-	t &= LIS3L02DQ_DEC_MASK;
-	switch (t) {
-	case LIS3L02DQ_REG_CTRL_1_DF_128:
-		len = sprintf(buf, "280\n");
-		break;
-	case LIS3L02DQ_REG_CTRL_1_DF_64:
-		len = sprintf(buf, "560\n");
-		break;
-	case LIS3L02DQ_REG_CTRL_1_DF_32:
-		len = sprintf(buf, "1120\n");
-		break;
-	case LIS3L02DQ_REG_CTRL_1_DF_8:
-		len = sprintf(buf, "4480\n");
-		break;
-	}
-	return len;
-}
-
-static ssize_t lis3l02dq_write_frequency(struct device *dev,
-					 struct device_attribute *attr,
-					 const char *buf,
-					 size_t len)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	unsigned long val;
-	int ret;
-	u8 t;
-
-	ret = kstrtoul(buf, 10, &val);
-	if (ret)
-		return ret;
-
-	mutex_lock(&indio_dev->mlock);
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_CTRL_1_ADDR,
-				       &t);
-	if (ret)
-		goto error_ret_mutex;
-	/* Wipe the bits clean */
-	t &= ~LIS3L02DQ_DEC_MASK;
-	switch (val) {
-	case 280:
-		t |= LIS3L02DQ_REG_CTRL_1_DF_128;
-		break;
-	case 560:
-		t |= LIS3L02DQ_REG_CTRL_1_DF_64;
-		break;
-	case 1120:
-		t |= LIS3L02DQ_REG_CTRL_1_DF_32;
-		break;
-	case 4480:
-		t |= LIS3L02DQ_REG_CTRL_1_DF_8;
-		break;
-	default:
-		ret = -EINVAL;
-		goto error_ret_mutex;
-	}
-
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_1_ADDR,
-					t);
-
-error_ret_mutex:
-	mutex_unlock(&indio_dev->mlock);
-
-	return ret ? ret : len;
-}
-
-static int lis3l02dq_initial_setup(struct iio_dev *indio_dev)
-{
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-	int ret;
-	u8 val, valtest;
-
-	st->us->mode = SPI_MODE_3;
-
-	spi_setup(st->us);
-
-	val = LIS3L02DQ_DEFAULT_CTRL1;
-	/* Write suitable defaults to ctrl1 */
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_1_ADDR,
-					val);
-	if (ret) {
-		dev_err(&st->us->dev, "problem with setup control register 1");
-		goto err_ret;
-	}
-	/* Repeat as sometimes doesn't work first time? */
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_1_ADDR,
-					val);
-	if (ret) {
-		dev_err(&st->us->dev, "problem with setup control register 1");
-		goto err_ret;
-	}
-
-	/*
-	 * Read back to check this has worked acts as loose test of correct
-	 * chip
-	 */
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_CTRL_1_ADDR,
-				       &valtest);
-	if (ret || (valtest != val)) {
-		dev_err(&indio_dev->dev,
-			"device not playing ball %d %d\n", valtest, val);
-		ret = -EINVAL;
-		goto err_ret;
-	}
-
-	val = LIS3L02DQ_DEFAULT_CTRL2;
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_2_ADDR,
-					val);
-	if (ret) {
-		dev_err(&st->us->dev, "problem with setup control register 2");
-		goto err_ret;
-	}
-
-	val = LIS3L02DQ_REG_WAKE_UP_CFG_LATCH_SRC;
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_WAKE_UP_CFG_ADDR,
-					val);
-	if (ret)
-		dev_err(&st->us->dev, "problem with interrupt cfg register");
-err_ret:
-
-	return ret;
-}
-
-static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
-			      lis3l02dq_read_frequency,
-			      lis3l02dq_write_frequency);
-
-static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("280 560 1120 4480");
-
-static irqreturn_t lis3l02dq_event_handler(int irq, void *private)
-{
-	struct iio_dev *indio_dev = private;
-	u8 t;
-
-	s64 timestamp = iio_get_time_ns();
-
-	lis3l02dq_spi_read_reg_8(indio_dev,
-				 LIS3L02DQ_REG_WAKE_UP_SRC_ADDR,
-				 &t);
-
-	if (t & LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_Z_HIGH)
-		iio_push_event(indio_dev,
-			       IIO_MOD_EVENT_CODE(IIO_ACCEL,
-						  0,
-						  IIO_MOD_Z,
-						  IIO_EV_TYPE_THRESH,
-						  IIO_EV_DIR_RISING),
-			       timestamp);
-
-	if (t & LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_Z_LOW)
-		iio_push_event(indio_dev,
-			       IIO_MOD_EVENT_CODE(IIO_ACCEL,
-						  0,
-						  IIO_MOD_Z,
-						  IIO_EV_TYPE_THRESH,
-						  IIO_EV_DIR_FALLING),
-			       timestamp);
-
-	if (t & LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_Y_HIGH)
-		iio_push_event(indio_dev,
-			       IIO_MOD_EVENT_CODE(IIO_ACCEL,
-						  0,
-						  IIO_MOD_Y,
-						  IIO_EV_TYPE_THRESH,
-						  IIO_EV_DIR_RISING),
-			       timestamp);
-
-	if (t & LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_Y_LOW)
-		iio_push_event(indio_dev,
-			       IIO_MOD_EVENT_CODE(IIO_ACCEL,
-						  0,
-						  IIO_MOD_Y,
-						  IIO_EV_TYPE_THRESH,
-						  IIO_EV_DIR_FALLING),
-			       timestamp);
-
-	if (t & LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_X_HIGH)
-		iio_push_event(indio_dev,
-			       IIO_MOD_EVENT_CODE(IIO_ACCEL,
-						  0,
-						  IIO_MOD_X,
-						  IIO_EV_TYPE_THRESH,
-						  IIO_EV_DIR_RISING),
-			       timestamp);
-
-	if (t & LIS3L02DQ_REG_WAKE_UP_SRC_INTERRUPT_X_LOW)
-		iio_push_event(indio_dev,
-			       IIO_MOD_EVENT_CODE(IIO_ACCEL,
-						  0,
-						  IIO_MOD_X,
-						  IIO_EV_TYPE_THRESH,
-						  IIO_EV_DIR_FALLING),
-			       timestamp);
-
-	/* Ack and allow for new interrupts */
-	lis3l02dq_spi_read_reg_8(indio_dev,
-				 LIS3L02DQ_REG_WAKE_UP_ACK_ADDR,
-				 &t);
-
-	return IRQ_HANDLED;
-}
-
-static const struct iio_event_spec lis3l02dq_event[] = {
-	{
-		.type = IIO_EV_TYPE_THRESH,
-		.dir = IIO_EV_DIR_RISING,
-		.mask_separate = BIT(IIO_EV_INFO_ENABLE),
-		.mask_shared_by_type = BIT(IIO_EV_INFO_VALUE),
-	}, {
-		.type = IIO_EV_TYPE_THRESH,
-		.dir = IIO_EV_DIR_FALLING,
-		.mask_separate = BIT(IIO_EV_INFO_ENABLE),
-		.mask_shared_by_type = BIT(IIO_EV_INFO_VALUE),
-	}
-};
-
-#define LIS3L02DQ_CHAN(index, mod)				\
-	{							\
-		.type = IIO_ACCEL,				\
-		.modified = 1,					\
-		.channel2 = mod,				\
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |	\
-			BIT(IIO_CHAN_INFO_CALIBSCALE) |		\
-			BIT(IIO_CHAN_INFO_CALIBBIAS),		\
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \
-		.address = index,				\
-		.scan_index = index,				\
-		.scan_type = {					\
-			.sign = 's',				\
-			.realbits = 12,				\
-			.storagebits = 16,			\
-		},						\
-		.event_spec = lis3l02dq_event,			\
-		.num_event_specs = ARRAY_SIZE(lis3l02dq_event),	\
-	 }
-
-static const struct iio_chan_spec lis3l02dq_channels[] = {
-	LIS3L02DQ_CHAN(0, IIO_MOD_X),
-	LIS3L02DQ_CHAN(1, IIO_MOD_Y),
-	LIS3L02DQ_CHAN(2, IIO_MOD_Z),
-	IIO_CHAN_SOFT_TIMESTAMP(3)
-};
-
-static int lis3l02dq_read_event_config(struct iio_dev *indio_dev,
-				       const struct iio_chan_spec *chan,
-				       enum iio_event_type type,
-				       enum iio_event_direction dir)
-{
-	u8 val;
-	int ret;
-	u8 mask = 1 << (chan->channel2 * 2 + (dir == IIO_EV_DIR_RISING));
-
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_WAKE_UP_CFG_ADDR,
-				       &val);
-	if (ret < 0)
-		return ret;
-
-	return !!(val & mask);
-}
-
-int lis3l02dq_disable_all_events(struct iio_dev *indio_dev)
-{
-	int ret;
-	u8 control, val;
-
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_CTRL_2_ADDR,
-				       &control);
-
-	control &= ~LIS3L02DQ_REG_CTRL_2_ENABLE_INTERRUPT;
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_2_ADDR,
-					control);
-	if (ret)
-		goto error_ret;
-	/* Also for consistency clear the mask */
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_WAKE_UP_CFG_ADDR,
-				       &val);
-	if (ret)
-		goto error_ret;
-	val &= ~0x3f;
-
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_WAKE_UP_CFG_ADDR,
-					val);
-	if (ret)
-		goto error_ret;
-
-	ret = control;
-error_ret:
-	return ret;
-}
-
-static int lis3l02dq_write_event_config(struct iio_dev *indio_dev,
-					const struct iio_chan_spec *chan,
-					enum iio_event_type type,
-					enum iio_event_direction dir,
-					int state)
-{
-	int ret = 0;
-	u8 val, control;
-	u8 currentlyset;
-	bool changed = false;
-	u8 mask = 1 << (chan->channel2 * 2 + (dir == IIO_EV_DIR_RISING));
-
-	mutex_lock(&indio_dev->mlock);
-	/* read current control */
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_CTRL_2_ADDR,
-				       &control);
-	if (ret)
-		goto error_ret;
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_WAKE_UP_CFG_ADDR,
-				       &val);
-	if (ret < 0)
-		goto error_ret;
-	currentlyset = val & mask;
-
-	if (!currentlyset && state) {
-		changed = true;
-		val |= mask;
-	} else if (currentlyset && !state) {
-		changed = true;
-		val &= ~mask;
-	}
-
-	if (changed) {
-		ret = lis3l02dq_spi_write_reg_8(indio_dev,
-						LIS3L02DQ_REG_WAKE_UP_CFG_ADDR,
-						val);
-		if (ret)
-			goto error_ret;
-		control = val & 0x3f ?
-			(control | LIS3L02DQ_REG_CTRL_2_ENABLE_INTERRUPT) :
-			(control & ~LIS3L02DQ_REG_CTRL_2_ENABLE_INTERRUPT);
-		ret = lis3l02dq_spi_write_reg_8(indio_dev,
-						LIS3L02DQ_REG_CTRL_2_ADDR,
-						control);
-		if (ret)
-			goto error_ret;
-	}
-
-error_ret:
-	mutex_unlock(&indio_dev->mlock);
-	return ret;
-}
-
-static struct attribute *lis3l02dq_attributes[] = {
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
-	&iio_const_attr_sampling_frequency_available.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group lis3l02dq_attribute_group = {
-	.attrs = lis3l02dq_attributes,
-};
-
-static const struct iio_info lis3l02dq_info = {
-	.read_raw = &lis3l02dq_read_raw,
-	.write_raw = &lis3l02dq_write_raw,
-	.read_event_value = &lis3l02dq_read_thresh,
-	.write_event_value = &lis3l02dq_write_thresh,
-	.write_event_config = &lis3l02dq_write_event_config,
-	.read_event_config = &lis3l02dq_read_event_config,
-	.driver_module = THIS_MODULE,
-	.attrs = &lis3l02dq_attribute_group,
-};
-
-static int lis3l02dq_probe(struct spi_device *spi)
-{
-	int ret;
-	struct lis3l02dq_state *st;
-	struct iio_dev *indio_dev;
-
-	indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st));
-	if (!indio_dev)
-		return -ENOMEM;
-	st = iio_priv(indio_dev);
-	/* this is only used for removal purposes */
-	spi_set_drvdata(spi, indio_dev);
-
-	st->us = spi;
-	st->gpio = of_get_gpio(spi->dev.of_node, 0);
-	mutex_init(&st->buf_lock);
-	indio_dev->name = spi->dev.driver->name;
-	indio_dev->dev.parent = &spi->dev;
-	indio_dev->info = &lis3l02dq_info;
-	indio_dev->channels = lis3l02dq_channels;
-	indio_dev->num_channels = ARRAY_SIZE(lis3l02dq_channels);
-
-	indio_dev->modes = INDIO_DIRECT_MODE;
-
-	ret = lis3l02dq_configure_buffer(indio_dev);
-	if (ret)
-		return ret;
-
-	if (spi->irq) {
-		ret = request_threaded_irq(st->us->irq,
-					   &lis3l02dq_th,
-					   &lis3l02dq_event_handler,
-					   IRQF_TRIGGER_RISING,
-					   "lis3l02dq",
-					   indio_dev);
-		if (ret)
-			goto error_unreg_buffer_funcs;
-
-		ret = lis3l02dq_probe_trigger(indio_dev);
-		if (ret)
-			goto error_free_interrupt;
-	}
-
-	/* Get the device into a sane initial state */
-	ret = lis3l02dq_initial_setup(indio_dev);
-	if (ret)
-		goto error_remove_trigger;
-
-	ret = iio_device_register(indio_dev);
-	if (ret)
-		goto error_remove_trigger;
-
-	return 0;
-
-error_remove_trigger:
-	if (spi->irq)
-		lis3l02dq_remove_trigger(indio_dev);
-error_free_interrupt:
-	if (spi->irq)
-		free_irq(st->us->irq, indio_dev);
-error_unreg_buffer_funcs:
-	lis3l02dq_unconfigure_buffer(indio_dev);
-	return ret;
-}
-
-/* Power down the device */
-static int lis3l02dq_stop_device(struct iio_dev *indio_dev)
-{
-	int ret;
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-	u8 val = 0;
-
-	mutex_lock(&indio_dev->mlock);
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_1_ADDR,
-					val);
-	if (ret) {
-		dev_err(&st->us->dev, "problem with turning device off: ctrl1");
-		goto err_ret;
-	}
-
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_2_ADDR,
-					val);
-	if (ret)
-		dev_err(&st->us->dev, "problem with turning device off: ctrl2");
-err_ret:
-	mutex_unlock(&indio_dev->mlock);
-	return ret;
-}
-
-/* fixme, confirm ordering in this function */
-static int lis3l02dq_remove(struct spi_device *spi)
-{
-	struct iio_dev *indio_dev = spi_get_drvdata(spi);
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-
-	iio_device_unregister(indio_dev);
-
-	lis3l02dq_disable_all_events(indio_dev);
-	lis3l02dq_stop_device(indio_dev);
-
-	if (spi->irq)
-		free_irq(st->us->irq, indio_dev);
-
-	lis3l02dq_remove_trigger(indio_dev);
-	lis3l02dq_unconfigure_buffer(indio_dev);
-
-	return 0;
-}
-
-static struct spi_driver lis3l02dq_driver = {
-	.driver = {
-		.name = "lis3l02dq",
-	},
-	.probe = lis3l02dq_probe,
-	.remove = lis3l02dq_remove,
-};
-module_spi_driver(lis3l02dq_driver);
-
-MODULE_AUTHOR("Jonathan Cameron <jic23@kernel.org>");
-MODULE_DESCRIPTION("ST LIS3L02DQ Accelerometer SPI driver");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("spi:lis3l02dq");
diff --git a/drivers/staging/iio/accel/lis3l02dq_ring.c b/drivers/staging/iio/accel/lis3l02dq_ring.c
deleted file mode 100644
index 50c162e0c31f..000000000000
--- a/drivers/staging/iio/accel/lis3l02dq_ring.c
+++ /dev/null
@@ -1,428 +0,0 @@
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-#include <linux/mutex.h>
-#include <linux/kernel.h>
-#include <linux/spi/spi.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/kfifo_buf.h>
-#include <linux/iio/trigger.h>
-#include <linux/iio/trigger_consumer.h>
-#include "lis3l02dq.h"
-
-/**
- * combine_8_to_16() utility function to munge two u8s into u16
- **/
-static inline u16 combine_8_to_16(u8 lower, u8 upper)
-{
-	u16 _lower = lower;
-	u16 _upper = upper;
-
-	return _lower | (_upper << 8);
-}
-
-/**
- * lis3l02dq_data_rdy_trig_poll() the event handler for the data rdy trig
- **/
-irqreturn_t lis3l02dq_data_rdy_trig_poll(int irq, void *private)
-{
-	struct iio_dev *indio_dev = private;
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-
-	if (st->trigger_on) {
-		iio_trigger_poll(st->trig);
-		return IRQ_HANDLED;
-	}
-
-	return IRQ_WAKE_THREAD;
-}
-
-static const u8 read_all_tx_array[] = {
-	LIS3L02DQ_READ_REG(LIS3L02DQ_REG_OUT_X_L_ADDR), 0,
-	LIS3L02DQ_READ_REG(LIS3L02DQ_REG_OUT_X_H_ADDR), 0,
-	LIS3L02DQ_READ_REG(LIS3L02DQ_REG_OUT_Y_L_ADDR), 0,
-	LIS3L02DQ_READ_REG(LIS3L02DQ_REG_OUT_Y_H_ADDR), 0,
-	LIS3L02DQ_READ_REG(LIS3L02DQ_REG_OUT_Z_L_ADDR), 0,
-	LIS3L02DQ_READ_REG(LIS3L02DQ_REG_OUT_Z_H_ADDR), 0,
-};
-
-/**
- * lis3l02dq_read_all() Reads all channels currently selected
- * @indio_dev:	IIO device state
- * @rx_array:	(dma capable) receive array, must be at least
- *		4*number of channels
- **/
-static int lis3l02dq_read_all(struct iio_dev *indio_dev, u8 *rx_array)
-{
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-	struct spi_transfer *xfers;
-	struct spi_message msg;
-	int ret, i, j = 0;
-
-	xfers = kcalloc(bitmap_weight(indio_dev->active_scan_mask,
-				      indio_dev->masklength) * 2,
-			sizeof(*xfers), GFP_KERNEL);
-	if (!xfers)
-		return -ENOMEM;
-
-	mutex_lock(&st->buf_lock);
-
-	for (i = 0; i < ARRAY_SIZE(read_all_tx_array) / 4; i++)
-		if (test_bit(i, indio_dev->active_scan_mask)) {
-			/* lower byte */
-			xfers[j].tx_buf = st->tx + (2 * j);
-			st->tx[2 * j] = read_all_tx_array[i * 4];
-			st->tx[2 * j + 1] = 0;
-			if (rx_array)
-				xfers[j].rx_buf = rx_array + (j * 2);
-			xfers[j].bits_per_word = 8;
-			xfers[j].len = 2;
-			xfers[j].cs_change = 1;
-			j++;
-
-			/* upper byte */
-			xfers[j].tx_buf = st->tx + (2 * j);
-			st->tx[2 * j] = read_all_tx_array[i * 4 + 2];
-			st->tx[2 * j + 1] = 0;
-			if (rx_array)
-				xfers[j].rx_buf = rx_array + (j * 2);
-			xfers[j].bits_per_word = 8;
-			xfers[j].len = 2;
-			xfers[j].cs_change = 1;
-			j++;
-		}
-
-	/* After these are transmitted, the rx_buff should have
-	 * values in alternate bytes
-	 */
-	spi_message_init(&msg);
-	for (j = 0; j < bitmap_weight(indio_dev->active_scan_mask,
-				      indio_dev->masklength) * 2; j++)
-		spi_message_add_tail(&xfers[j], &msg);
-
-	ret = spi_sync(st->us, &msg);
-	mutex_unlock(&st->buf_lock);
-	kfree(xfers);
-
-	return ret;
-}
-
-static int lis3l02dq_get_buffer_element(struct iio_dev *indio_dev,
-					u8 *buf)
-{
-	int ret, i;
-	u8 *rx_array;
-	s16 *data = (s16 *)buf;
-	int scan_count = bitmap_weight(indio_dev->active_scan_mask,
-				       indio_dev->masklength);
-
-	rx_array = kcalloc(4, scan_count, GFP_KERNEL);
-	if (!rx_array)
-		return -ENOMEM;
-	ret = lis3l02dq_read_all(indio_dev, rx_array);
-	if (ret < 0) {
-		kfree(rx_array);
-		return ret;
-	}
-	for (i = 0; i < scan_count; i++)
-		data[i] = combine_8_to_16(rx_array[i * 4 + 1],
-					rx_array[i * 4 + 3]);
-	kfree(rx_array);
-
-	return i * sizeof(data[0]);
-}
-
-static irqreturn_t lis3l02dq_trigger_handler(int irq, void *p)
-{
-	struct iio_poll_func *pf = p;
-	struct iio_dev *indio_dev = pf->indio_dev;
-	int len = 0;
-	char *data;
-
-	data = kmalloc(indio_dev->scan_bytes, GFP_KERNEL);
-	if (!data)
-		goto done;
-
-	if (!bitmap_empty(indio_dev->active_scan_mask, indio_dev->masklength))
-		len = lis3l02dq_get_buffer_element(indio_dev, data);
-
-	iio_push_to_buffers_with_timestamp(indio_dev, data, pf->timestamp);
-
-	kfree(data);
-done:
-	iio_trigger_notify_done(indio_dev->trig);
-	return IRQ_HANDLED;
-}
-
-/* Caller responsible for locking as necessary. */
-static int
-__lis3l02dq_write_data_ready_config(struct iio_dev *indio_dev, bool state)
-{
-	int ret;
-	u8 valold;
-	bool currentlyset;
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-
-	/* Get the current event mask register */
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_CTRL_2_ADDR,
-				       &valold);
-	if (ret)
-		goto error_ret;
-	/* Find out if data ready is already on */
-	currentlyset
-		= valold & LIS3L02DQ_REG_CTRL_2_ENABLE_DATA_READY_GENERATION;
-
-	/* Disable requested */
-	if (!state && currentlyset) {
-		/* Disable the data ready signal */
-		valold &= ~LIS3L02DQ_REG_CTRL_2_ENABLE_DATA_READY_GENERATION;
-
-		/* The double write is to overcome a hardware bug? */
-		ret = lis3l02dq_spi_write_reg_8(indio_dev,
-						LIS3L02DQ_REG_CTRL_2_ADDR,
-						valold);
-		if (ret)
-			goto error_ret;
-		ret = lis3l02dq_spi_write_reg_8(indio_dev,
-						LIS3L02DQ_REG_CTRL_2_ADDR,
-						valold);
-		if (ret)
-			goto error_ret;
-		st->trigger_on = false;
-	/* Enable requested */
-	} else if (state && !currentlyset) {
-		/* If not set, enable requested
-		 * first disable all events
-		 */
-		ret = lis3l02dq_disable_all_events(indio_dev);
-		if (ret < 0)
-			goto error_ret;
-
-		valold = ret |
-			LIS3L02DQ_REG_CTRL_2_ENABLE_DATA_READY_GENERATION;
-
-		st->trigger_on = true;
-		ret = lis3l02dq_spi_write_reg_8(indio_dev,
-						LIS3L02DQ_REG_CTRL_2_ADDR,
-						valold);
-		if (ret)
-			goto error_ret;
-	}
-
-	return 0;
-error_ret:
-	return ret;
-}
-
-/**
- * lis3l02dq_data_rdy_trigger_set_state() set datardy interrupt state
- *
- * If disabling the interrupt also does a final read to ensure it is clear.
- * This is only important in some cases where the scan enable elements are
- * switched before the buffer is reenabled.
- **/
-static int lis3l02dq_data_rdy_trigger_set_state(struct iio_trigger *trig,
-						bool state)
-{
-	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
-	int ret = 0;
-	u8 t;
-
-	__lis3l02dq_write_data_ready_config(indio_dev, state);
-	if (!state) {
-		/*
-		 * A possible quirk with the handler is currently worked around
-		 * by ensuring outstanding read events are cleared.
-		 */
-		ret = lis3l02dq_read_all(indio_dev, NULL);
-	}
-	lis3l02dq_spi_read_reg_8(indio_dev,
-				 LIS3L02DQ_REG_WAKE_UP_SRC_ADDR,
-				 &t);
-	return ret;
-}
-
-/**
- * lis3l02dq_trig_try_reen() try reenabling irq for data rdy trigger
- * @trig:	the datardy trigger
- */
-static int lis3l02dq_trig_try_reen(struct iio_trigger *trig)
-{
-	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-	int i;
-
-	/* If gpio still high (or high again)
-	 * In theory possible we will need to do this several times
-	 */
-	for (i = 0; i < 5; i++)
-		if (gpio_get_value(st->gpio))
-			lis3l02dq_read_all(indio_dev, NULL);
-		else
-			break;
-	if (i == 5)
-		pr_info("Failed to clear the interrupt for lis3l02dq\n");
-
-	/* irq reenabled so success! */
-	return 0;
-}
-
-static const struct iio_trigger_ops lis3l02dq_trigger_ops = {
-	.owner = THIS_MODULE,
-	.set_trigger_state = &lis3l02dq_data_rdy_trigger_set_state,
-	.try_reenable = &lis3l02dq_trig_try_reen,
-};
-
-int lis3l02dq_probe_trigger(struct iio_dev *indio_dev)
-{
-	int ret;
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-
-	st->trig = iio_trigger_alloc("lis3l02dq-dev%d", indio_dev->id);
-	if (!st->trig) {
-		ret = -ENOMEM;
-		goto error_ret;
-	}
-
-	st->trig->dev.parent = &st->us->dev;
-	st->trig->ops = &lis3l02dq_trigger_ops;
-	iio_trigger_set_drvdata(st->trig, indio_dev);
-	ret = iio_trigger_register(st->trig);
-	if (ret)
-		goto error_free_trig;
-
-	return 0;
-
-error_free_trig:
-	iio_trigger_free(st->trig);
-error_ret:
-	return ret;
-}
-
-void lis3l02dq_remove_trigger(struct iio_dev *indio_dev)
-{
-	struct lis3l02dq_state *st = iio_priv(indio_dev);
-
-	iio_trigger_unregister(st->trig);
-	iio_trigger_free(st->trig);
-}
-
-void lis3l02dq_unconfigure_buffer(struct iio_dev *indio_dev)
-{
-	iio_dealloc_pollfunc(indio_dev->pollfunc);
-	iio_kfifo_free(indio_dev->buffer);
-}
-
-static int lis3l02dq_buffer_postenable(struct iio_dev *indio_dev)
-{
-	/* Disable unwanted channels otherwise the interrupt will not clear */
-	u8 t;
-	int ret;
-	bool oneenabled = false;
-
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_CTRL_1_ADDR,
-				       &t);
-	if (ret)
-		goto error_ret;
-
-	if (test_bit(0, indio_dev->active_scan_mask)) {
-		t |= LIS3L02DQ_REG_CTRL_1_AXES_X_ENABLE;
-		oneenabled = true;
-	} else {
-		t &= ~LIS3L02DQ_REG_CTRL_1_AXES_X_ENABLE;
-	}
-	if (test_bit(1, indio_dev->active_scan_mask)) {
-		t |= LIS3L02DQ_REG_CTRL_1_AXES_Y_ENABLE;
-		oneenabled = true;
-	} else {
-		t &= ~LIS3L02DQ_REG_CTRL_1_AXES_Y_ENABLE;
-	}
-	if (test_bit(2, indio_dev->active_scan_mask)) {
-		t |= LIS3L02DQ_REG_CTRL_1_AXES_Z_ENABLE;
-		oneenabled = true;
-	} else {
-		t &= ~LIS3L02DQ_REG_CTRL_1_AXES_Z_ENABLE;
-	}
-	if (!oneenabled) /* what happens in this case is unknown */
-		return -EINVAL;
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_1_ADDR,
-					t);
-	if (ret)
-		goto error_ret;
-
-	return iio_triggered_buffer_postenable(indio_dev);
-error_ret:
-	return ret;
-}
-
-/* Turn all channels on again */
-static int lis3l02dq_buffer_predisable(struct iio_dev *indio_dev)
-{
-	u8 t;
-	int ret;
-
-	ret = iio_triggered_buffer_predisable(indio_dev);
-	if (ret)
-		goto error_ret;
-
-	ret = lis3l02dq_spi_read_reg_8(indio_dev,
-				       LIS3L02DQ_REG_CTRL_1_ADDR,
-				       &t);
-	if (ret)
-		goto error_ret;
-	t |= LIS3L02DQ_REG_CTRL_1_AXES_X_ENABLE |
-		LIS3L02DQ_REG_CTRL_1_AXES_Y_ENABLE |
-		LIS3L02DQ_REG_CTRL_1_AXES_Z_ENABLE;
-
-	ret = lis3l02dq_spi_write_reg_8(indio_dev,
-					LIS3L02DQ_REG_CTRL_1_ADDR,
-					t);
-
-error_ret:
-	return ret;
-}
-
-static const struct iio_buffer_setup_ops lis3l02dq_buffer_setup_ops = {
-	.postenable = &lis3l02dq_buffer_postenable,
-	.predisable = &lis3l02dq_buffer_predisable,
-};
-
-int lis3l02dq_configure_buffer(struct iio_dev *indio_dev)
-{
-	int ret;
-	struct iio_buffer *buffer;
-
-	buffer = iio_kfifo_allocate();
-	if (!buffer)
-		return -ENOMEM;
-
-	iio_device_attach_buffer(indio_dev, buffer);
-
-	buffer->scan_timestamp = true;
-	indio_dev->setup_ops = &lis3l02dq_buffer_setup_ops;
-
-	/* Functions are NULL as we set handler below */
-	indio_dev->pollfunc = iio_alloc_pollfunc(&iio_pollfunc_store_time,
-						 &lis3l02dq_trigger_handler,
-						 0,
-						 indio_dev,
-						 "lis3l02dq_consumer%d",
-						 indio_dev->id);
-
-	if (!indio_dev->pollfunc) {
-		ret = -ENOMEM;
-		goto error_iio_sw_rb_free;
-	}
-
-	indio_dev->modes |= INDIO_BUFFER_TRIGGERED;
-	return 0;
-
-error_iio_sw_rb_free:
-	iio_kfifo_free(indio_dev->buffer);
-	return ret;
-}
diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c
index a8f533af9eca..b5625f5d5e0e 100644
--- a/drivers/staging/iio/accel/sca3000_core.c
+++ b/drivers/staging/iio/accel/sca3000_core.c
@@ -594,7 +594,7 @@ static ssize_t sca3000_read_frequency(struct device *dev,
 		goto error_ret_mut;
 	ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL);
 	mutex_unlock(&st->lock);
-	if (ret)
+	if (ret < 0)
 		goto error_ret;
 	val = ret;
 	if (base_freq > 0)
@@ -774,7 +774,7 @@ static irqreturn_t sca3000_event_handler(int irq, void *private)
 	struct iio_dev *indio_dev = private;
 	struct sca3000_state *st = iio_priv(indio_dev);
 	int ret, val;
-	s64 last_timestamp = iio_get_time_ns();
+	s64 last_timestamp = iio_get_time_ns(indio_dev);
 
 	/*
 	 * Could lead if badly timed to an extra read of status reg,
@@ -1046,6 +1046,8 @@ static int sca3000_clean_setup(struct sca3000_state *st)
 
 	/* Disable ring buffer */
 	ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL);
+	if (ret < 0)
+		goto error_ret;
 	ret = sca3000_write_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL,
 				     (ret & SCA3000_OUT_CTRL_PROT_MASK)
 				     | SCA3000_OUT_CTRL_BUF_X_EN
diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c
index a06b46cb81ca..2177f1dd2b5d 100644
--- a/drivers/staging/iio/adc/ad7280a.c
+++ b/drivers/staging/iio/adc/ad7280a.c
@@ -705,7 +705,7 @@ static irqreturn_t ad7280_event_handler(int irq, void *private)
 							IIO_EV_DIR_RISING,
 							IIO_EV_TYPE_THRESH,
 							0, 0, 0),
-					       iio_get_time_ns());
+					       iio_get_time_ns(indio_dev));
 			else if (((channels[i] >> 11) & 0xFFF) <=
 				st->cell_threshlow)
 				iio_push_event(indio_dev,
@@ -715,7 +715,7 @@ static irqreturn_t ad7280_event_handler(int irq, void *private)
 							IIO_EV_DIR_FALLING,
 							IIO_EV_TYPE_THRESH,
 							0, 0, 0),
-					       iio_get_time_ns());
+					       iio_get_time_ns(indio_dev));
 		} else {
 			if (((channels[i] >> 11) & 0xFFF) >= st->aux_threshhigh)
 				iio_push_event(indio_dev,
@@ -724,7 +724,7 @@ static irqreturn_t ad7280_event_handler(int irq, void *private)
 							0,
 							IIO_EV_TYPE_THRESH,
 							IIO_EV_DIR_RISING),
-					       iio_get_time_ns());
+					       iio_get_time_ns(indio_dev));
 			else if (((channels[i] >> 11) & 0xFFF) <=
 				st->aux_threshlow)
 				iio_push_event(indio_dev,
@@ -733,7 +733,7 @@ static irqreturn_t ad7280_event_handler(int irq, void *private)
 							0,
 							IIO_EV_TYPE_THRESH,
 							IIO_EV_DIR_FALLING),
-					       iio_get_time_ns());
+					       iio_get_time_ns(indio_dev));
 		}
 	}
 
diff --git a/drivers/staging/iio/adc/ad7606_ring.c b/drivers/staging/iio/adc/ad7606_ring.c
index a6f8eb11242c..0572df9aad85 100644
--- a/drivers/staging/iio/adc/ad7606_ring.c
+++ b/drivers/staging/iio/adc/ad7606_ring.c
@@ -77,7 +77,8 @@ static void ad7606_poll_bh_to_ring(struct work_struct *work_s)
 			goto done;
 	}
 
-	iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns());
+	iio_push_to_buffers_with_timestamp(indio_dev, buf,
+					   iio_get_time_ns(indio_dev));
 done:
 	gpio_set_value(st->pdata->gpio_convst, 0);
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/staging/iio/adc/ad7606_spi.c b/drivers/staging/iio/adc/ad7606_spi.c
index 825da0769936..9587fa86dc69 100644
--- a/drivers/staging/iio/adc/ad7606_spi.c
+++ b/drivers/staging/iio/adc/ad7606_spi.c
@@ -21,7 +21,7 @@ static int ad7606_spi_read_block(struct device *dev,
 {
 	struct spi_device *spi = to_spi_device(dev);
 	int i, ret;
-	unsigned short *data;
+	unsigned short *data = buf;
 	__be16 *bdata = buf;
 
 	ret = spi_read(spi, buf, count * 2);
diff --git a/drivers/staging/iio/adc/ad7816.c b/drivers/staging/iio/adc/ad7816.c
index ac3735c7f4a9..5e8115b01011 100644
--- a/drivers/staging/iio/adc/ad7816.c
+++ b/drivers/staging/iio/adc/ad7816.c
@@ -253,7 +253,8 @@ static const struct attribute_group ad7816_attribute_group = {
 
 static irqreturn_t ad7816_event_handler(int irq, void *private)
 {
-	iio_push_event(private, IIO_EVENT_CODE_AD7816_OTI, iio_get_time_ns());
+	iio_push_event(private, IIO_EVENT_CODE_AD7816_OTI,
+		       iio_get_time_ns((struct iio_dev *)private));
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/staging/iio/addac/adt7316.c b/drivers/staging/iio/addac/adt7316.c
index a10e7d8e6002..3faffe59c933 100644
--- a/drivers/staging/iio/addac/adt7316.c
+++ b/drivers/staging/iio/addac/adt7316.c
@@ -1752,7 +1752,7 @@ static irqreturn_t adt7316_event_handler(int irq, void *private)
 		if ((chip->id & ID_FAMILY_MASK) != ID_ADT75XX)
 			stat1 &= 0x1F;
 
-		time = iio_get_time_ns();
+		time = iio_get_time_ns(indio_dev);
 		if (stat1 & BIT(0))
 			iio_push_event(indio_dev,
 				       IIO_UNMOD_EVENT_CODE(IIO_TEMP, 0,
@@ -1804,7 +1804,7 @@ static irqreturn_t adt7316_event_handler(int irq, void *private)
 							    0,
 							    IIO_EV_TYPE_THRESH,
 							    IIO_EV_DIR_RISING),
-				       iio_get_time_ns());
+				       iio_get_time_ns(indio_dev));
 	}
 
 	return IRQ_HANDLED;
diff --git a/drivers/staging/iio/cdc/ad7150.c b/drivers/staging/iio/cdc/ad7150.c
index f6b9a10326ea..5578a077fcfb 100644
--- a/drivers/staging/iio/cdc/ad7150.c
+++ b/drivers/staging/iio/cdc/ad7150.c
@@ -493,7 +493,7 @@ static irqreturn_t ad7150_event_handler(int irq, void *private)
 	struct iio_dev *indio_dev = private;
 	struct ad7150_chip_info *chip = iio_priv(indio_dev);
 	u8 int_status;
-	s64 timestamp = iio_get_time_ns();
+	s64 timestamp = iio_get_time_ns(indio_dev);
 	int ret;
 
 	ret = i2c_smbus_read_byte_data(chip->client, AD7150_STATUS);
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 9f43976f4ef2..170ac980abcb 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -444,10 +444,10 @@ static ssize_t ad5933_store(struct device *dev,
 		st->settling_cycles = val;
 
 		/* 2x, 4x handling, see datasheet */
-		if (val > 511)
-			val = (val >> 1) | (1 << 9);
-		else if (val > 1022)
+		if (val > 1022)
 			val = (val >> 2) | (3 << 9);
+		else if (val > 511)
+			val = (val >> 1) | (1 << 9);
 
 		dat = cpu_to_be16(val);
 		ret = ad5933_i2c_write(st->client,
diff --git a/drivers/staging/iio/light/tsl2x7x_core.c b/drivers/staging/iio/light/tsl2x7x_core.c
index d553c8e18fcc..ea15bc1c300c 100644
--- a/drivers/staging/iio/light/tsl2x7x_core.c
+++ b/drivers/staging/iio/light/tsl2x7x_core.c
@@ -1554,7 +1554,7 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void *private)
 {
 	struct iio_dev *indio_dev = private;
 	struct tsl2X7X_chip *chip = iio_priv(indio_dev);
-	s64 timestamp = iio_get_time_ns();
+	s64 timestamp = iio_get_time_ns(indio_dev);
 	int ret;
 	u8 value;
 
diff --git a/drivers/staging/ks7010/Kconfig b/drivers/staging/ks7010/Kconfig
new file mode 100644
index 000000000000..0b9217674d5b
--- /dev/null
+++ b/drivers/staging/ks7010/Kconfig
@@ -0,0 +1,10 @@
+config KS7010
+	tristate "KeyStream KS7010 SDIO support"
+	depends on MMC && WIRELESS
+	select WIRELESS_EXT
+	select WEXT_PRIV
+	select FW_LOADER
+	help
+	  This is a driver for KeyStream KS7010 based SDIO WIFI cards. It is
+	  found on at least later Spectec SDW-821 (FCC-ID "S2Y-WLAN-11G-K" only,
+	  sadly not FCC-ID "S2Y-WLAN-11B-G") and Spectec SDW-823 microSD cards.
diff --git a/drivers/staging/ks7010/Makefile b/drivers/staging/ks7010/Makefile
new file mode 100644
index 000000000000..69fcf8d655c7
--- /dev/null
+++ b/drivers/staging/ks7010/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_KS7010) += ks7010.o
+
+ccflags-y 	     += -DKS_WLAN_DEBUG=0
+ks7010-y	     := michael_mic.o ks_hostif.o ks_wlan_net.o ks7010_sdio.o
diff --git a/drivers/staging/ks7010/TODO b/drivers/staging/ks7010/TODO
new file mode 100644
index 000000000000..2938d35be5bb
--- /dev/null
+++ b/drivers/staging/ks7010/TODO
@@ -0,0 +1,36 @@
+KS7010 Linux driver
+===================
+
+This driver is based on source code from the Ben Nanonote extra repository [1]
+which is based on the original v007 release from Renesas [2]. Some more
+background info about the chipset can be found here [3] and here [4]. Thank
+you to all which already participated in cleaning up the driver so far!
+
+[1] http://projects.qi-hardware.com/index.php/p/openwrt-packages/source/tree/master/ks7010/src
+[2] http://downloads.qi-hardware.com/software/ks7010_sdio_v007.tar.bz2
+[3] http://en.qi-hardware.com/wiki/Ben_NanoNote_Wi-Fi
+[4] https://wikidevi.com/wiki/Renesas
+
+TODO
+----
+
+First a few words what not to do (at least not blindly):
+
+- don't be overly strict with the 80 char limit. Only if it REALLY makes the
+  code more readable
+- No '#if 0/1' removal unless the surrounding code is understood and removal is
+  really OK. There might be some hints hidden there.
+
+Now the TODOs:
+
+- fix codechecker warnings (checkpatch, sparse, smatch). But PLEASE make sure
+  that you are not only silencing the warning but really fixing code. You
+  should understand the change you submit.
+- fix the 'card removal' event when card is inserted when booting
+- check what other upstream wireless mechanisms can be used instead of the
+  custom ones here
+
+Please send any patches to:
+Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Wolfram Sang <wsa@the-dreams.de>
+Linux Driver Project Developer List <driverdev-devel@linuxdriverproject.org>
diff --git a/drivers/staging/ks7010/eap_packet.h b/drivers/staging/ks7010/eap_packet.h
new file mode 100644
index 000000000000..16a392abdaec
--- /dev/null
+++ b/drivers/staging/ks7010/eap_packet.h
@@ -0,0 +1,129 @@
+#ifndef EAP_PACKET_H
+#define EAP_PACKET_H
+
+#define WBIT(n) (1 << (n))
+
+#ifndef ETH_ALEN
+#define ETH_ALEN 6
+#endif
+
+struct ether_hdr {
+	unsigned char h_dest[ETH_ALEN];	/* destination eth addr */
+	unsigned char h_source[ETH_ALEN];	/* source ether addr    */
+	unsigned char h_dest_snap;
+	unsigned char h_source_snap;
+	unsigned char h_command;
+	unsigned char h_vendor_id[3];
+	unsigned short h_proto;	/* packet type ID field */
+#define ETHER_PROTOCOL_TYPE_EAP		0x888e
+#define ETHER_PROTOCOL_TYPE_IP		0x0800
+#define ETHER_PROTOCOL_TYPE_ARP		0x0806
+	/* followed by length octets of data */
+} __attribute__ ((packed));
+
+struct ieee802_1x_hdr {
+	unsigned char version;
+	unsigned char type;
+	unsigned short length;
+	/* followed by length octets of data */
+} __attribute__ ((packed));
+
+#define EAPOL_VERSION 2
+
+enum { IEEE802_1X_TYPE_EAP_PACKET = 0,
+	IEEE802_1X_TYPE_EAPOL_START = 1,
+	IEEE802_1X_TYPE_EAPOL_LOGOFF = 2,
+	IEEE802_1X_TYPE_EAPOL_KEY = 3,
+	IEEE802_1X_TYPE_EAPOL_ENCAPSULATED_ASF_ALERT = 4
+};
+
+enum { EAPOL_KEY_TYPE_RC4 = 1, EAPOL_KEY_TYPE_RSN = 2,
+	EAPOL_KEY_TYPE_WPA = 254
+};
+
+#define IEEE8021X_REPLAY_COUNTER_LEN 8
+#define IEEE8021X_KEY_SIGN_LEN 16
+#define IEEE8021X_KEY_IV_LEN 16
+
+#define IEEE8021X_KEY_INDEX_FLAG 0x80
+#define IEEE8021X_KEY_INDEX_MASK 0x03
+
+struct ieee802_1x_eapol_key {
+	unsigned char type;
+	unsigned short key_length;
+	/* does not repeat within the life of the keying material used to
+	 * encrypt the Key field; 64-bit NTP timestamp MAY be used here */
+	unsigned char replay_counter[IEEE8021X_REPLAY_COUNTER_LEN];
+	unsigned char key_iv[IEEE8021X_KEY_IV_LEN];	/* cryptographically random number */
+	unsigned char key_index;	/* key flag in the most significant bit:
+					 * 0 = broadcast (default key),
+					 * 1 = unicast (key mapping key); key index is in the
+					 * 7 least significant bits */
+	/* HMAC-MD5 message integrity check computed with MS-MPPE-Send-Key as
+	 * the key */
+	unsigned char key_signature[IEEE8021X_KEY_SIGN_LEN];
+
+	/* followed by key: if packet body length = 44 + key length, then the
+	 * key field (of key_length bytes) contains the key in encrypted form;
+	 * if packet body length = 44, key field is absent and key_length
+	 * represents the number of least significant octets from
+	 * MS-MPPE-Send-Key attribute to be used as the keying material;
+	 * RC4 key used in encryption = Key-IV + MS-MPPE-Recv-Key */
+} __attribute__ ((packed));
+
+#define WPA_NONCE_LEN 32
+#define WPA_REPLAY_COUNTER_LEN 8
+
+struct wpa_eapol_key {
+	unsigned char type;
+	unsigned short key_info;
+	unsigned short key_length;
+	unsigned char replay_counter[WPA_REPLAY_COUNTER_LEN];
+	unsigned char key_nonce[WPA_NONCE_LEN];
+	unsigned char key_iv[16];
+	unsigned char key_rsc[8];
+	unsigned char key_id[8];	/* Reserved in IEEE 802.11i/RSN */
+	unsigned char key_mic[16];
+	unsigned short key_data_length;
+	/* followed by key_data_length bytes of key_data */
+} __attribute__ ((packed));
+
+#define WPA_KEY_INFO_TYPE_MASK (WBIT(0) | WBIT(1) | WBIT(2))
+#define WPA_KEY_INFO_TYPE_HMAC_MD5_RC4 WBIT(0)
+#define WPA_KEY_INFO_TYPE_HMAC_SHA1_AES WBIT(1)
+#define WPA_KEY_INFO_KEY_TYPE WBIT(3)	/* 1 = Pairwise, 0 = Group key */
+/* bit4..5 is used in WPA, but is reserved in IEEE 802.11i/RSN */
+#define WPA_KEY_INFO_KEY_INDEX_MASK (WBIT(4) | WBIT(5))
+#define WPA_KEY_INFO_KEY_INDEX_SHIFT 4
+#define WPA_KEY_INFO_INSTALL WBIT(6)	/* pairwise */
+#define WPA_KEY_INFO_TXRX WBIT(6)	/* group */
+#define WPA_KEY_INFO_ACK WBIT(7)
+#define WPA_KEY_INFO_MIC WBIT(8)
+#define WPA_KEY_INFO_SECURE WBIT(9)
+#define WPA_KEY_INFO_ERROR WBIT(10)
+#define WPA_KEY_INFO_REQUEST WBIT(11)
+#define WPA_KEY_INFO_ENCR_KEY_DATA WBIT(12)	/* IEEE 802.11i/RSN only */
+
+#define WPA_CAPABILITY_PREAUTH WBIT(0)
+
+#define GENERIC_INFO_ELEM 0xdd
+#define RSN_INFO_ELEM 0x30
+
+enum {
+	REASON_UNSPECIFIED = 1,
+	REASON_DEAUTH_LEAVING = 3,
+	REASON_INVALID_IE = 13,
+	REASON_MICHAEL_MIC_FAILURE = 14,
+	REASON_4WAY_HANDSHAKE_TIMEOUT = 15,
+	REASON_GROUP_KEY_UPDATE_TIMEOUT = 16,
+	REASON_IE_IN_4WAY_DIFFERS = 17,
+	REASON_GROUP_CIPHER_NOT_VALID = 18,
+	REASON_PAIRWISE_CIPHER_NOT_VALID = 19,
+	REASON_AKMP_NOT_VALID = 20,
+	REASON_UNSUPPORTED_RSN_IE_VERSION = 21,
+	REASON_INVALID_RSN_IE_CAPAB = 22,
+	REASON_IEEE_802_1X_AUTH_FAILED = 23,
+	REASON_CIPHER_SUITE_REJECTED = 24
+};
+
+#endif /* EAP_PACKET_H */
diff --git a/drivers/staging/ks7010/ks7010_sdio.c b/drivers/staging/ks7010/ks7010_sdio.c
new file mode 100644
index 000000000000..b7337fd813d5
--- /dev/null
+++ b/drivers/staging/ks7010/ks7010_sdio.c
@@ -0,0 +1,1236 @@
+/*
+ *   Driver for KeyStream, KS7010 based SDIO cards.
+ *
+ *   Copyright (C) 2006-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *   Copyright (C) 2016 Sang Engineering, Wolfram Sang
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+
+#include <linux/firmware.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/sdio_func.h>
+#include <linux/workqueue.h>
+#include <asm/atomic.h>
+
+#include "ks_wlan.h"
+#include "ks_wlan_ioctl.h"
+#include "ks_hostif.h"
+#include "ks7010_sdio.h"
+
+#define KS7010_FUNC_NUM 1
+#define KS7010_IO_BLOCK_SIZE 512
+#define KS7010_MAX_CLOCK 25000000
+
+static const struct sdio_device_id ks7010_sdio_ids[] = {
+	{SDIO_DEVICE(SDIO_VENDOR_ID_KS_CODE_A, SDIO_DEVICE_ID_KS_7010)},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_KS_CODE_B, SDIO_DEVICE_ID_KS_7010)},
+	{ /* all zero */ }
+};
+MODULE_DEVICE_TABLE(sdio, ks7010_sdio_ids);
+
+/* macro */
+
+#define inc_txqhead(priv) \
+        ( priv->tx_dev.qhead = (priv->tx_dev.qhead + 1) % TX_DEVICE_BUFF_SIZE )
+#define inc_txqtail(priv) \
+        ( priv->tx_dev.qtail = (priv->tx_dev.qtail + 1) % TX_DEVICE_BUFF_SIZE )
+#define cnt_txqbody(priv) \
+        (((priv->tx_dev.qtail + TX_DEVICE_BUFF_SIZE) - (priv->tx_dev.qhead)) % TX_DEVICE_BUFF_SIZE )
+
+#define inc_rxqhead(priv) \
+        ( priv->rx_dev.qhead = (priv->rx_dev.qhead + 1) % RX_DEVICE_BUFF_SIZE )
+#define inc_rxqtail(priv) \
+        ( priv->rx_dev.qtail = (priv->rx_dev.qtail + 1) % RX_DEVICE_BUFF_SIZE )
+#define cnt_rxqbody(priv) \
+        (((priv->rx_dev.qtail + RX_DEVICE_BUFF_SIZE) - (priv->rx_dev.qhead)) % RX_DEVICE_BUFF_SIZE )
+
+static int ks7010_sdio_read(struct ks_wlan_private *priv, unsigned int address,
+			    unsigned char *buffer, int length)
+{
+	struct ks_sdio_card *card;
+	int rc;
+
+	card = priv->ks_wlan_hw.sdio_card;
+
+	if (length == 1)	/* CMD52 */
+		*buffer = sdio_readb(card->func, address, &rc);
+	else	/* CMD53 multi-block transfer */
+		rc = sdio_memcpy_fromio(card->func, buffer, address, length);
+
+	if (rc != 0)
+		DPRINTK(1, "sdio error=%d size=%d\n", rc, length);
+
+	return rc;
+}
+
+static int ks7010_sdio_write(struct ks_wlan_private *priv, unsigned int address,
+			     unsigned char *buffer, int length)
+{
+	struct ks_sdio_card *card;
+	int rc;
+
+	card = priv->ks_wlan_hw.sdio_card;
+
+	if (length == 1)	/* CMD52 */
+		sdio_writeb(card->func, *buffer, (unsigned int)address, &rc);
+	else	/* CMD53 */
+		rc = sdio_memcpy_toio(card->func, (unsigned int)address, buffer,
+				      length);
+
+	if (rc != 0)
+		DPRINTK(1, "sdio error=%d size=%d\n", rc, length);
+
+	return rc;
+}
+
+void ks_wlan_hw_sleep_doze_request(struct ks_wlan_private *priv)
+{
+	unsigned char rw_data;
+	int retval;
+
+	DPRINTK(4, "\n");
+
+	/* clear request */
+	atomic_set(&priv->sleepstatus.doze_request, 0);
+
+	if (atomic_read(&priv->sleepstatus.status) == 0) {
+		rw_data = GCR_B_DOZE;
+		retval =
+		    ks7010_sdio_write(priv, GCR_B, &rw_data, sizeof(rw_data));
+		if (retval) {
+			DPRINTK(1, " error : GCR_B=%02X\n", rw_data);
+			goto out;
+		}
+		DPRINTK(4, "PMG SET!! : GCR_B=%02X\n", rw_data);
+		DPRINTK(3, "sleep_mode=SLP_SLEEP\n");
+		atomic_set(&priv->sleepstatus.status, 1);
+		priv->last_doze = jiffies;
+	} else {
+		DPRINTK(1, "sleep_mode=%d\n", priv->sleep_mode);
+	}
+
+ out:
+	priv->sleep_mode = atomic_read(&priv->sleepstatus.status);
+	return;
+}
+
+void ks_wlan_hw_sleep_wakeup_request(struct ks_wlan_private *priv)
+{
+	unsigned char rw_data;
+	int retval;
+
+	DPRINTK(4, "\n");
+
+	/* clear request */
+	atomic_set(&priv->sleepstatus.wakeup_request, 0);
+
+	if (atomic_read(&priv->sleepstatus.status) == 1) {
+		rw_data = WAKEUP_REQ;
+		retval =
+		    ks7010_sdio_write(priv, WAKEUP, &rw_data, sizeof(rw_data));
+		if (retval) {
+			DPRINTK(1, " error : WAKEUP=%02X\n", rw_data);
+			goto out;
+		}
+		DPRINTK(4, "wake up : WAKEUP=%02X\n", rw_data);
+		atomic_set(&priv->sleepstatus.status, 0);
+		priv->last_wakeup = jiffies;
+		++priv->wakeup_count;
+	} else {
+		DPRINTK(1, "sleep_mode=%d\n", priv->sleep_mode);
+	}
+
+ out:
+	priv->sleep_mode = atomic_read(&priv->sleepstatus.status);
+	return;
+}
+
+void ks_wlan_hw_wakeup_request(struct ks_wlan_private *priv)
+{
+	unsigned char rw_data;
+	int retval;
+
+	DPRINTK(4, "\n");
+	if (atomic_read(&priv->psstatus.status) == PS_SNOOZE) {
+		rw_data = WAKEUP_REQ;
+		retval =
+		    ks7010_sdio_write(priv, WAKEUP, &rw_data, sizeof(rw_data));
+		if (retval) {
+			DPRINTK(1, " error : WAKEUP=%02X\n", rw_data);
+		}
+		DPRINTK(4, "wake up : WAKEUP=%02X\n", rw_data);
+		priv->last_wakeup = jiffies;
+		++priv->wakeup_count;
+	} else {
+		DPRINTK(1, "psstatus=%d\n",
+			atomic_read(&priv->psstatus.status));
+	}
+}
+
+int _ks_wlan_hw_power_save(struct ks_wlan_private *priv)
+{
+	int rc = 0;
+	unsigned char rw_data;
+	int retval;
+
+	if (priv->reg.powermgt == POWMGT_ACTIVE_MODE)
+		return rc;
+
+	if (priv->reg.operation_mode == MODE_INFRASTRUCTURE &&
+	    (priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+
+		//DPRINTK(1,"psstatus.status=%d\n",atomic_read(&priv->psstatus.status));
+		if (priv->dev_state == DEVICE_STATE_SLEEP) {
+			switch (atomic_read(&priv->psstatus.status)) {
+			case PS_SNOOZE:	/* 4 */
+				break;
+			default:
+				DPRINTK(5, "\npsstatus.status=%d\npsstatus.confirm_wait=%d\npsstatus.snooze_guard=%d\ncnt_txqbody=%d\n",
+					atomic_read(&priv->psstatus.status),
+					atomic_read(&priv->psstatus.confirm_wait),
+					atomic_read(&priv->psstatus.snooze_guard),
+					cnt_txqbody(priv));
+
+				if (!atomic_read(&priv->psstatus.confirm_wait)
+				    && !atomic_read(&priv->psstatus.snooze_guard)
+				    && !cnt_txqbody(priv)) {
+					retval =
+					    ks7010_sdio_read(priv, INT_PENDING,
+							     &rw_data,
+							     sizeof(rw_data));
+					if (retval) {
+						DPRINTK(1,
+							" error : INT_PENDING=%02X\n",
+							rw_data);
+						queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+								   &priv->ks_wlan_hw.rw_wq, 1);
+						break;
+					}
+					if (!rw_data) {
+						rw_data = GCR_B_DOZE;
+						retval =
+						    ks7010_sdio_write(priv,
+								      GCR_B,
+								      &rw_data,
+								      sizeof(rw_data));
+						if (retval) {
+							DPRINTK(1,
+								" error : GCR_B=%02X\n",
+								rw_data);
+							queue_delayed_work
+							    (priv->ks_wlan_hw.ks7010sdio_wq,
+							     &priv->ks_wlan_hw.rw_wq, 1);
+							break;
+						}
+						DPRINTK(4,
+							"PMG SET!! : GCR_B=%02X\n",
+							rw_data);
+						atomic_set(&priv->psstatus.
+							   status, PS_SNOOZE);
+						DPRINTK(3,
+							"psstatus.status=PS_SNOOZE\n");
+					} else {
+						queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+								   &priv->ks_wlan_hw.rw_wq, 1);
+					}
+				} else {
+					queue_delayed_work(priv->ks_wlan_hw.
+							   ks7010sdio_wq,
+							   &priv->ks_wlan_hw.rw_wq,
+							   0);
+				}
+				break;
+			}
+		}
+
+	}
+
+	return rc;
+}
+
+int ks_wlan_hw_power_save(struct ks_wlan_private *priv)
+{
+	queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+			   &priv->ks_wlan_hw.rw_wq, 1);
+	return 0;
+}
+
+static int enqueue_txdev(struct ks_wlan_private *priv, unsigned char *p,
+			 unsigned long size,
+			 void (*complete_handler) (void *arg1, void *arg2),
+			 void *arg1, void *arg2)
+{
+	struct tx_device_buffer *sp;
+
+	if (priv->dev_state < DEVICE_STATE_BOOT) {
+		kfree(p);
+		if (complete_handler != NULL)
+			(*complete_handler) (arg1, arg2);
+		return 1;
+	}
+
+	if ((TX_DEVICE_BUFF_SIZE - 1) <= cnt_txqbody(priv)) {
+		/* in case of buffer overflow */
+		DPRINTK(1, "tx buffer overflow\n");
+		kfree(p);
+		if (complete_handler != NULL)
+			(*complete_handler) (arg1, arg2);
+		return 1;
+	}
+
+	sp = &priv->tx_dev.tx_dev_buff[priv->tx_dev.qtail];
+	sp->sendp = p;
+	sp->size = size;
+	sp->complete_handler = complete_handler;
+	sp->arg1 = arg1;
+	sp->arg2 = arg2;
+	inc_txqtail(priv);
+
+	return 0;
+}
+
+/* write data */
+static int write_to_device(struct ks_wlan_private *priv, unsigned char *buffer,
+			   unsigned long size)
+{
+	int rc, retval;
+	unsigned char rw_data;
+	struct hostif_hdr *hdr;
+	hdr = (struct hostif_hdr *)buffer;
+	rc = 0;
+
+	DPRINTK(4, "size=%d\n", hdr->size);
+	if (hdr->event < HIF_DATA_REQ || HIF_REQ_MAX < hdr->event) {
+		DPRINTK(1, "unknown event=%04X\n", hdr->event);
+		return 0;
+	}
+
+	retval = ks7010_sdio_write(priv, DATA_WINDOW, buffer, size);
+	if (retval) {
+		DPRINTK(1, " write error : retval=%d\n", retval);
+		return -4;
+	}
+
+	rw_data = WRITE_STATUS_BUSY;
+	retval =
+	    ks7010_sdio_write(priv, WRITE_STATUS, &rw_data, sizeof(rw_data));
+	if (retval) {
+		DPRINTK(1, " error : WRITE_STATUS=%02X\n", rw_data);
+		return -3;
+	}
+
+	return 0;
+}
+
+static void tx_device_task(void *dev)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev;
+	struct tx_device_buffer *sp;
+	int rc = 0;
+
+	DPRINTK(4, "\n");
+	if (cnt_txqbody(priv) > 0
+	    && atomic_read(&priv->psstatus.status) != PS_SNOOZE) {
+		sp = &priv->tx_dev.tx_dev_buff[priv->tx_dev.qhead];
+		if (priv->dev_state >= DEVICE_STATE_BOOT) {
+			rc = write_to_device(priv, sp->sendp, sp->size);
+			if (rc) {
+				DPRINTK(1, "write_to_device error !!(%d)\n",
+					rc);
+				queue_delayed_work(priv->ks_wlan_hw.
+						   ks7010sdio_wq,
+						   &priv->ks_wlan_hw.rw_wq, 1);
+				return;
+			}
+
+		}
+		kfree(sp->sendp);	/* allocated memory free */
+		if (sp->complete_handler != NULL)	/* TX Complete */
+			(*sp->complete_handler) (sp->arg1, sp->arg2);
+		inc_txqhead(priv);
+
+		if (cnt_txqbody(priv) > 0) {
+			queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+					   &priv->ks_wlan_hw.rw_wq, 0);
+		}
+	}
+	return;
+}
+
+int ks_wlan_hw_tx(struct ks_wlan_private *priv, void *p, unsigned long size,
+		  void (*complete_handler) (void *arg1, void *arg2),
+		  void *arg1, void *arg2)
+{
+	int result = 0;
+	struct hostif_hdr *hdr;
+	hdr = (struct hostif_hdr *)p;
+
+	if (hdr->event < HIF_DATA_REQ || HIF_REQ_MAX < hdr->event) {
+		DPRINTK(1, "unknown event=%04X\n", hdr->event);
+		return 0;
+	}
+
+	/* add event to hostt buffer */
+	priv->hostt.buff[priv->hostt.qtail] = hdr->event;
+	priv->hostt.qtail = (priv->hostt.qtail + 1) % SME_EVENT_BUFF_SIZE;
+
+	DPRINTK(4, "event=%04X\n", hdr->event);
+	spin_lock(&priv->tx_dev.tx_dev_lock);
+	result = enqueue_txdev(priv, p, size, complete_handler, arg1, arg2);
+	spin_unlock(&priv->tx_dev.tx_dev_lock);
+
+	if (cnt_txqbody(priv) > 0) {
+		queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+				   &priv->ks_wlan_hw.rw_wq, 0);
+	}
+	return result;
+}
+
+static void rx_event_task(unsigned long dev)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev;
+	struct rx_device_buffer *rp;
+
+	DPRINTK(4, "\n");
+
+	if (cnt_rxqbody(priv) > 0 && priv->dev_state >= DEVICE_STATE_BOOT) {
+		rp = &priv->rx_dev.rx_dev_buff[priv->rx_dev.qhead];
+		hostif_receive(priv, rp->data, rp->size);
+		inc_rxqhead(priv);
+
+		if (cnt_rxqbody(priv) > 0) {
+			tasklet_schedule(&priv->ks_wlan_hw.rx_bh_task);
+		}
+	}
+
+	return;
+}
+
+static void ks_wlan_hw_rx(void *dev, uint16_t size)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev;
+	int retval;
+	struct rx_device_buffer *rx_buffer;
+	struct hostif_hdr *hdr;
+	unsigned char read_status;
+	unsigned short event = 0;
+
+	DPRINTK(4, "\n");
+
+	/* receive data */
+	if (cnt_rxqbody(priv) >= (RX_DEVICE_BUFF_SIZE - 1)) {
+		/* in case of buffer overflow */
+		DPRINTK(1, "rx buffer overflow \n");
+		goto error_out;
+	}
+	rx_buffer = &priv->rx_dev.rx_dev_buff[priv->rx_dev.qtail];
+
+	retval =
+	    ks7010_sdio_read(priv, DATA_WINDOW, &rx_buffer->data[0],
+			     hif_align_size(size));
+	if (retval) {
+		goto error_out;
+	}
+
+	/* length check */
+	if (size > 2046 || size == 0) {
+#ifdef KS_WLAN_DEBUG
+		if (KS_WLAN_DEBUG > 5)
+			print_hex_dump_bytes("INVALID DATA dump: ",
+					     DUMP_PREFIX_OFFSET,
+					     rx_buffer->data, 32);
+#endif
+		/* rx_status update */
+		read_status = READ_STATUS_IDLE;
+		retval =
+		    ks7010_sdio_write(priv, READ_STATUS, &read_status,
+				      sizeof(read_status));
+		if (retval) {
+			DPRINTK(1, " error : READ_STATUS=%02X\n", read_status);
+		}
+		goto error_out;
+	}
+
+	hdr = (struct hostif_hdr *)&rx_buffer->data[0];
+	rx_buffer->size = le16_to_cpu(hdr->size) + sizeof(hdr->size);
+	event = hdr->event;
+	inc_rxqtail(priv);
+
+	/* read status update */
+	read_status = READ_STATUS_IDLE;
+	retval =
+	    ks7010_sdio_write(priv, READ_STATUS, &read_status,
+			      sizeof(read_status));
+	if (retval) {
+		DPRINTK(1, " error : READ_STATUS=%02X\n", read_status);
+	}
+	DPRINTK(4, "READ_STATUS=%02X\n", read_status);
+
+	if (atomic_read(&priv->psstatus.confirm_wait)) {
+		if (IS_HIF_CONF(event)) {
+			DPRINTK(4, "IS_HIF_CONF true !!\n");
+			atomic_dec(&priv->psstatus.confirm_wait);
+		}
+	}
+
+	/* rx_event_task((void *)priv); */
+	tasklet_schedule(&priv->ks_wlan_hw.rx_bh_task);
+
+ error_out:
+	return;
+}
+
+static void ks7010_rw_function(struct work_struct *work)
+{
+	struct hw_info_t *hw;
+	struct ks_wlan_private *priv;
+	unsigned char rw_data;
+	int retval;
+
+	hw = container_of(work, struct hw_info_t, rw_wq.work);
+	priv = container_of(hw, struct ks_wlan_private, ks_wlan_hw);
+
+	DPRINTK(4, "\n");
+
+	/* wiat after DOZE */
+	if (time_after(priv->last_doze + ((30 * HZ) / 1000), jiffies)) {
+		DPRINTK(4, "wait after DOZE \n");
+		queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+				   &priv->ks_wlan_hw.rw_wq, 1);
+		return;
+	}
+
+	/* wiat after WAKEUP */
+	while (time_after(priv->last_wakeup + ((30 * HZ) / 1000), jiffies)) {
+		DPRINTK(4, "wait after WAKEUP \n");
+/*		queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,&priv->ks_wlan_hw.rw_wq,
+		(priv->last_wakeup + ((30*HZ)/1000) - jiffies));*/
+		printk("wake: %lu %lu\n", priv->last_wakeup + (30 * HZ) / 1000,
+		       jiffies);
+		msleep(30);
+	}
+
+	sdio_claim_host(priv->ks_wlan_hw.sdio_card->func);
+
+	/* power save wakeup */
+	if (atomic_read(&priv->psstatus.status) == PS_SNOOZE) {
+		if (cnt_txqbody(priv) > 0) {
+			ks_wlan_hw_wakeup_request(priv);
+			queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+					   &priv->ks_wlan_hw.rw_wq, 1);
+		}
+		goto err_out;
+	}
+
+	/* sleep mode doze */
+	if (atomic_read(&priv->sleepstatus.doze_request) == 1) {
+		ks_wlan_hw_sleep_doze_request(priv);
+		goto err_out;
+	}
+	/* sleep mode wakeup */
+	if (atomic_read(&priv->sleepstatus.wakeup_request) == 1) {
+		ks_wlan_hw_sleep_wakeup_request(priv);
+		goto err_out;
+	}
+
+	/* read (WriteStatus/ReadDataSize FN1:00_0014) */
+	retval =
+	    ks7010_sdio_read(priv, WSTATUS_RSIZE, &rw_data, sizeof(rw_data));
+	if (retval) {
+		DPRINTK(1, " error : WSTATUS_RSIZE=%02X psstatus=%d\n", rw_data,
+			atomic_read(&priv->psstatus.status));
+		goto err_out;
+	}
+	DPRINTK(4, "WSTATUS_RSIZE=%02X\n", rw_data);
+
+	if (rw_data & RSIZE_MASK) {	/* Read schedule */
+		ks_wlan_hw_rx((void *)priv,
+			      (uint16_t) (((rw_data & RSIZE_MASK) << 4)));
+	}
+	if ((rw_data & WSTATUS_MASK)) {
+		tx_device_task((void *)priv);
+	}
+	_ks_wlan_hw_power_save(priv);
+
+ err_out:
+	sdio_release_host(priv->ks_wlan_hw.sdio_card->func);
+
+	return;
+}
+
+static void ks_sdio_interrupt(struct sdio_func *func)
+{
+	int retval;
+	struct ks_sdio_card *card;
+	struct ks_wlan_private *priv;
+	unsigned char status, rsize, rw_data;
+
+	card = sdio_get_drvdata(func);
+	priv = card->priv;
+	DPRINTK(4, "\n");
+
+	if (priv->dev_state >= DEVICE_STATE_BOOT) {
+		retval =
+		    ks7010_sdio_read(priv, INT_PENDING, &status,
+				     sizeof(status));
+		if (retval) {
+			DPRINTK(1, "read INT_PENDING Failed!!(%d)\n", retval);
+			goto intr_out;
+		}
+		DPRINTK(4, "INT_PENDING=%02X\n", rw_data);
+
+		/* schedule task for interrupt status */
+		/* bit7 -> Write General Communication B register */
+		/* read (General Communication B register) */
+		/* bit5 -> Write Status Idle */
+		/* bit2 -> Read Status Busy  */
+		if (status & INT_GCR_B
+		    || atomic_read(&priv->psstatus.status) == PS_SNOOZE) {
+			retval =
+			    ks7010_sdio_read(priv, GCR_B, &rw_data,
+					     sizeof(rw_data));
+			if (retval) {
+				DPRINTK(1, " error : GCR_B=%02X\n", rw_data);
+				goto intr_out;
+			}
+			/* DPRINTK(1, "GCR_B=%02X\n", rw_data); */
+			if (rw_data == GCR_B_ACTIVE) {
+				if (atomic_read(&priv->psstatus.status) ==
+				    PS_SNOOZE) {
+					atomic_set(&priv->psstatus.status,
+						   PS_WAKEUP);
+					priv->wakeup_count = 0;
+				}
+				complete(&priv->psstatus.wakeup_wait);
+			}
+
+		}
+
+		do {
+			/* read (WriteStatus/ReadDataSize FN1:00_0014) */
+			retval =
+			    ks7010_sdio_read(priv, WSTATUS_RSIZE, &rw_data,
+					     sizeof(rw_data));
+			if (retval) {
+				DPRINTK(1, " error : WSTATUS_RSIZE=%02X\n",
+					rw_data);
+				goto intr_out;
+			}
+			DPRINTK(4, "WSTATUS_RSIZE=%02X\n", rw_data);
+			rsize = rw_data & RSIZE_MASK;
+			if (rsize) {	/* Read schedule */
+				ks_wlan_hw_rx((void *)priv,
+					      (uint16_t) (((rsize) << 4)));
+			}
+			if (rw_data & WSTATUS_MASK) {
+#if 0
+				if (status & INT_WRITE_STATUS
+				    && !cnt_txqbody(priv)) {
+					/* dummy write for interrupt clear */
+					rw_data = 0;
+					retval =
+					    ks7010_sdio_write(priv, DATA_WINDOW,
+							      &rw_data,
+							      sizeof(rw_data));
+					if (retval) {
+						DPRINTK(1,
+							"write DATA_WINDOW Failed!!(%d)\n",
+							retval);
+					}
+					status &= ~INT_WRITE_STATUS;
+				} else {
+#endif
+					if (atomic_read(&priv->psstatus.status) == PS_SNOOZE) {
+						if (cnt_txqbody(priv)) {
+							ks_wlan_hw_wakeup_request(priv);
+							queue_delayed_work
+							    (priv->ks_wlan_hw.
+							     ks7010sdio_wq,
+							     &priv->ks_wlan_hw.
+							     rw_wq, 1);
+							return;
+						}
+					} else {
+						tx_device_task((void *)priv);
+					}
+#if 0
+				}
+#endif
+			}
+		} while (rsize);
+	}
+
+ intr_out:
+	queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+			   &priv->ks_wlan_hw.rw_wq, 0);
+	return;
+}
+
+static int trx_device_init(struct ks_wlan_private *priv)
+{
+	/* initialize values (tx) */
+	priv->tx_dev.qtail = priv->tx_dev.qhead = 0;
+
+	/* initialize values (rx) */
+	priv->rx_dev.qtail = priv->rx_dev.qhead = 0;
+
+	/* initialize spinLock (tx,rx) */
+	spin_lock_init(&priv->tx_dev.tx_dev_lock);
+	spin_lock_init(&priv->rx_dev.rx_dev_lock);
+
+	tasklet_init(&priv->ks_wlan_hw.rx_bh_task, rx_event_task,
+		     (unsigned long)priv);
+
+	return 0;
+}
+
+static void trx_device_exit(struct ks_wlan_private *priv)
+{
+	struct tx_device_buffer *sp;
+
+	/* tx buffer clear */
+	while (cnt_txqbody(priv) > 0) {
+		sp = &priv->tx_dev.tx_dev_buff[priv->tx_dev.qhead];
+		kfree(sp->sendp);	/* allocated memory free */
+		if (sp->complete_handler != NULL)	/* TX Complete */
+			(*sp->complete_handler) (sp->arg1, sp->arg2);
+		inc_txqhead(priv);
+	}
+
+	tasklet_kill(&priv->ks_wlan_hw.rx_bh_task);
+
+	return;
+}
+
+static int ks7010_sdio_update_index(struct ks_wlan_private *priv, u32 index)
+{
+	int rc = 0;
+	int retval;
+	unsigned char *data_buf;
+	data_buf = NULL;
+
+	data_buf = kmalloc(sizeof(u32), GFP_KERNEL);
+	if (!data_buf) {
+		rc = 1;
+		goto error_out;
+	}
+
+	memcpy(data_buf, &index, sizeof(index));
+	retval = ks7010_sdio_write(priv, WRITE_INDEX, data_buf, sizeof(index));
+	if (retval) {
+		rc = 2;
+		goto error_out;
+	}
+
+	retval = ks7010_sdio_write(priv, READ_INDEX, data_buf, sizeof(index));
+	if (retval) {
+		rc = 3;
+		goto error_out;
+	}
+ error_out:
+	if (data_buf)
+		kfree(data_buf);
+	return rc;
+}
+
+#define ROM_BUFF_SIZE (64*1024)
+static int ks7010_sdio_data_compare(struct ks_wlan_private *priv, u32 address,
+				    unsigned char *data, unsigned int size)
+{
+	int rc = 0;
+	int retval;
+	unsigned char *read_buf;
+	read_buf = NULL;
+	read_buf = kmalloc(ROM_BUFF_SIZE, GFP_KERNEL);
+	if (!read_buf) {
+		rc = 1;
+		goto error_out;
+	}
+	retval = ks7010_sdio_read(priv, address, read_buf, size);
+	if (retval) {
+		rc = 2;
+		goto error_out;
+	}
+	retval = memcmp(data, read_buf, size);
+
+	if (retval) {
+		DPRINTK(0, "data compare error (%d) \n", retval);
+		rc = 3;
+		goto error_out;
+	}
+ error_out:
+	if (read_buf)
+		kfree(read_buf);
+	return rc;
+}
+
+static int ks7010_upload_firmware(struct ks_wlan_private *priv,
+				  struct ks_sdio_card *card)
+{
+	unsigned int size, offset, n = 0;
+	unsigned char *rom_buf;
+	unsigned char rw_data = 0;
+	int retval, rc = 0;
+	int length;
+	const struct firmware *fw_entry = NULL;
+
+	rom_buf = NULL;
+
+	/* buffer allocate */
+	rom_buf = kmalloc(ROM_BUFF_SIZE, GFP_KERNEL);
+	if (!rom_buf) {
+		rc = 3;
+		goto error_out0;
+	}
+
+	sdio_claim_host(card->func);
+
+	/* Firmware running ? */
+	retval = ks7010_sdio_read(priv, GCR_A, &rw_data, sizeof(rw_data));
+	if (rw_data == GCR_A_RUN) {
+		DPRINTK(0, "MAC firmware running ...\n");
+		rc = 0;
+		goto error_out0;
+	}
+
+	retval = request_firmware(&fw_entry, ROM_FILE, &priv->ks_wlan_hw.sdio_card->func->dev);
+	if (retval)
+		return retval;
+
+	length = fw_entry->size;
+
+	/* Load Program */
+	n = 0;
+	do {
+		if (length >= ROM_BUFF_SIZE) {
+			size = ROM_BUFF_SIZE;
+			length = length - ROM_BUFF_SIZE;
+		} else {
+			size = length;
+			length = 0;
+		}
+		DPRINTK(4, "size = %d\n", size);
+		if (size == 0)
+			break;
+		memcpy(rom_buf, fw_entry->data + n, size);
+		/* Update write index */
+		offset = n;
+		retval =
+		    ks7010_sdio_update_index(priv,
+					     KS7010_IRAM_ADDRESS + offset);
+		if (retval) {
+			rc = 6;
+			goto error_out1;
+		}
+
+		/* Write data */
+		retval = ks7010_sdio_write(priv, DATA_WINDOW, rom_buf, size);
+		if (retval) {
+			rc = 8;
+			goto error_out1;
+		}
+
+		/* compare */
+		retval =
+		    ks7010_sdio_data_compare(priv, DATA_WINDOW, rom_buf, size);
+		if (retval) {
+			rc = 9;
+			goto error_out1;
+		}
+		n += size;
+
+	} while (size);
+
+	/* Remap request */
+	rw_data = GCR_A_REMAP;
+	retval = ks7010_sdio_write(priv, GCR_A, &rw_data, sizeof(rw_data));
+	if (retval) {
+		rc = 11;
+		goto error_out1;
+	}
+	DPRINTK(4, " REMAP Request : GCR_A=%02X\n", rw_data);
+
+	/* Firmware running check */
+	for (n = 0; n < 50; ++n) {
+		mdelay(10);	/* wait_ms(10); */
+		retval =
+		    ks7010_sdio_read(priv, GCR_A, &rw_data, sizeof(rw_data));
+		if (retval) {
+			rc = 11;
+			goto error_out1;
+		}
+		if (rw_data == GCR_A_RUN)
+			break;
+	}
+	DPRINTK(4, "firmware wakeup (%d)!!!!\n", n);
+	if ((50) <= n) {
+		DPRINTK(1, "firmware can't start\n");
+		rc = 12;
+		goto error_out1;
+	}
+
+	rc = 0;
+
+ error_out1:
+	release_firmware(fw_entry);
+ error_out0:
+	sdio_release_host(card->func);
+	if (rom_buf)
+		kfree(rom_buf);
+	return rc;
+}
+
+static void ks7010_card_init(struct ks_wlan_private *priv)
+{
+	DPRINTK(5, "\ncard_init_task()\n");
+
+	/* init_waitqueue_head(&priv->confirm_wait); */
+	init_completion(&priv->confirm_wait);
+
+	DPRINTK(5, "init_completion()\n");
+
+	/* get mac address & firmware version */
+	hostif_sme_enqueue(priv, SME_START);
+
+	DPRINTK(5, "hostif_sme_enqueu()\n");
+
+	if (!wait_for_completion_interruptible_timeout
+	    (&priv->confirm_wait, 5 * HZ)) {
+		DPRINTK(1, "wait time out!! SME_START\n");
+	}
+
+	if (priv->mac_address_valid && priv->version_size) {
+		priv->dev_state = DEVICE_STATE_PREINIT;
+	}
+
+	hostif_sme_enqueue(priv, SME_GET_EEPROM_CKSUM);
+
+	/* load initial wireless parameter */
+	hostif_sme_enqueue(priv, SME_STOP_REQUEST);
+
+	hostif_sme_enqueue(priv, SME_RTS_THRESHOLD_REQUEST);
+	hostif_sme_enqueue(priv, SME_FRAGMENTATION_THRESHOLD_REQUEST);
+
+	hostif_sme_enqueue(priv, SME_WEP_INDEX_REQUEST);
+	hostif_sme_enqueue(priv, SME_WEP_KEY1_REQUEST);
+	hostif_sme_enqueue(priv, SME_WEP_KEY2_REQUEST);
+	hostif_sme_enqueue(priv, SME_WEP_KEY3_REQUEST);
+	hostif_sme_enqueue(priv, SME_WEP_KEY4_REQUEST);
+
+	hostif_sme_enqueue(priv, SME_WEP_FLAG_REQUEST);
+	hostif_sme_enqueue(priv, SME_RSN_ENABLED_REQUEST);
+	hostif_sme_enqueue(priv, SME_MODE_SET_REQUEST);
+	hostif_sme_enqueue(priv, SME_START_REQUEST);
+
+	if (!wait_for_completion_interruptible_timeout
+	    (&priv->confirm_wait, 5 * HZ)) {
+		DPRINTK(1, "wait time out!! wireless parameter set\n");
+	}
+
+	if (priv->dev_state >= DEVICE_STATE_PREINIT) {
+		DPRINTK(1, "DEVICE READY!!\n");
+		priv->dev_state = DEVICE_STATE_READY;
+	} else {
+		DPRINTK(1, "dev_state=%d\n", priv->dev_state);
+	}
+}
+
+static void ks7010_init_defaults(struct ks_wlan_private *priv)
+{
+	priv->reg.tx_rate = TX_RATE_AUTO;
+	priv->reg.preamble = LONG_PREAMBLE;
+	priv->reg.powermgt = POWMGT_ACTIVE_MODE;
+	priv->reg.scan_type = ACTIVE_SCAN;
+	priv->reg.beacon_lost_count = 20;
+	priv->reg.rts = 2347UL;
+	priv->reg.fragment = 2346UL;
+	priv->reg.phy_type = D_11BG_COMPATIBLE_MODE;
+	priv->reg.cts_mode = CTS_MODE_FALSE;
+	priv->reg.rate_set.body[11] = TX_RATE_54M;
+	priv->reg.rate_set.body[10] = TX_RATE_48M;
+	priv->reg.rate_set.body[9] = TX_RATE_36M;
+	priv->reg.rate_set.body[8] = TX_RATE_18M;
+	priv->reg.rate_set.body[7] = TX_RATE_9M;
+	priv->reg.rate_set.body[6] = TX_RATE_24M | BASIC_RATE;
+	priv->reg.rate_set.body[5] = TX_RATE_12M | BASIC_RATE;
+	priv->reg.rate_set.body[4] = TX_RATE_6M | BASIC_RATE;
+	priv->reg.rate_set.body[3] = TX_RATE_11M | BASIC_RATE;
+	priv->reg.rate_set.body[2] = TX_RATE_5M | BASIC_RATE;
+	priv->reg.rate_set.body[1] = TX_RATE_2M | BASIC_RATE;
+	priv->reg.rate_set.body[0] = TX_RATE_1M | BASIC_RATE;
+	priv->reg.tx_rate = TX_RATE_FULL_AUTO;
+	priv->reg.rate_set.size = 12;
+}
+
+static int ks7010_sdio_probe(struct sdio_func *func,
+			     const struct sdio_device_id *device)
+{
+	struct ks_wlan_private *priv;
+	struct ks_sdio_card *card;
+	struct net_device *netdev;
+	unsigned char rw_data;
+	int ret;
+
+	DPRINTK(5, "ks7010_sdio_probe()\n");
+
+	priv = NULL;
+	netdev = NULL;
+
+	/* initilize ks_sdio_card */
+	card = kzalloc(sizeof(struct ks_sdio_card), GFP_KERNEL);
+	if (!card)
+		return -ENOMEM;
+
+	card->func = func;
+	spin_lock_init(&card->lock);
+
+	/*** Initialize  SDIO ***/
+	sdio_claim_host(func);
+
+	/* bus setting  */
+	/* Issue config request to override clock rate */
+
+	/* function blocksize set */
+	ret = sdio_set_block_size(func, KS7010_IO_BLOCK_SIZE);
+	DPRINTK(5, "multi_block=%d sdio_set_block_size()=%d %d\n",
+		func->card->cccr.multi_block, func->cur_blksize, ret);
+
+	/* Allocate the slot current */
+
+	/* function enable */
+	ret = sdio_enable_func(func);
+	DPRINTK(5, "sdio_enable_func() %d\n", ret);
+	if (ret)
+		goto error_free_card;
+
+	/* interrupt disable */
+	sdio_writeb(func, 0, INT_ENABLE, &ret);
+	if (ret)
+		goto error_free_card;
+	sdio_writeb(func, 0xff, INT_PENDING, &ret);
+	if (ret)
+		goto error_disable_func;
+
+	/* setup interrupt handler */
+	ret = sdio_claim_irq(func, ks_sdio_interrupt);
+	if (ret)
+		goto error_disable_func;
+
+	sdio_release_host(func);
+
+	sdio_set_drvdata(func, card);
+
+	DPRINTK(5, "class = 0x%X, vendor = 0x%X, "
+		"device = 0x%X\n", func->class, func->vendor, func->device);
+
+	/* private memory allocate */
+	netdev = alloc_etherdev(sizeof(*priv));
+	if (netdev == NULL) {
+		printk(KERN_ERR "ks7010 : Unable to alloc new net device\n");
+		goto error_release_irq;
+	}
+	if (dev_alloc_name(netdev, "wlan%d") < 0) {
+		printk(KERN_ERR "ks7010 :  Couldn't get name!\n");
+		goto error_free_netdev;
+	}
+
+	priv = netdev_priv(netdev);
+
+	card->priv = priv;
+	SET_NETDEV_DEV(netdev, &card->func->dev);	/* for create sysfs symlinks */
+
+	/* private memory initialize */
+	priv->ks_wlan_hw.sdio_card = card;
+	init_completion(&priv->ks_wlan_hw.ks7010_sdio_wait);
+	priv->ks_wlan_hw.read_buf = NULL;
+	priv->ks_wlan_hw.read_buf = kmalloc(RX_DATA_SIZE, GFP_KERNEL);
+	if (!priv->ks_wlan_hw.read_buf) {
+		goto error_free_netdev;
+	}
+	priv->dev_state = DEVICE_STATE_PREBOOT;
+	priv->net_dev = netdev;
+	priv->firmware_version[0] = '\0';
+	priv->version_size = 0;
+	priv->last_doze = jiffies;	/* set current jiffies */
+	priv->last_wakeup = jiffies;
+	memset(&priv->nstats, 0, sizeof(priv->nstats));
+	memset(&priv->wstats, 0, sizeof(priv->wstats));
+
+	/* sleep mode */
+	atomic_set(&priv->sleepstatus.doze_request, 0);
+	atomic_set(&priv->sleepstatus.wakeup_request, 0);
+	atomic_set(&priv->sleepstatus.wakeup_request, 0);
+
+	trx_device_init(priv);
+	hostif_init(priv);
+	ks_wlan_net_start(netdev);
+
+	ks7010_init_defaults(priv);
+
+	/* Upload firmware */
+	ret = ks7010_upload_firmware(priv, card);	/* firmware load */
+	if (ret) {
+		printk(KERN_ERR
+		       "ks7010: firmware load failed !! retern code = %d\n",
+		       ret);
+		goto error_free_read_buf;
+	}
+
+	/* interrupt setting */
+	/* clear Interrupt status write (ARMtoSD_InterruptPending FN1:00_0024) */
+	rw_data = 0xff;
+	sdio_claim_host(func);
+	ret = ks7010_sdio_write(priv, INT_PENDING, &rw_data, sizeof(rw_data));
+	sdio_release_host(func);
+	if (ret) {
+		DPRINTK(1, " error : INT_PENDING=%02X\n", rw_data);
+	}
+	DPRINTK(4, " clear Interrupt : INT_PENDING=%02X\n", rw_data);
+
+	/* enable ks7010sdio interrupt (INT_GCR_B|INT_READ_STATUS|INT_WRITE_STATUS) */
+	rw_data = (INT_GCR_B | INT_READ_STATUS | INT_WRITE_STATUS);
+	sdio_claim_host(func);
+	ret = ks7010_sdio_write(priv, INT_ENABLE, &rw_data, sizeof(rw_data));
+	sdio_release_host(func);
+	if (ret) {
+		DPRINTK(1, " error : INT_ENABLE=%02X\n", rw_data);
+	}
+	DPRINTK(4, " enable Interrupt : INT_ENABLE=%02X\n", rw_data);
+	priv->dev_state = DEVICE_STATE_BOOT;
+
+	priv->ks_wlan_hw.ks7010sdio_wq = create_workqueue("ks7010sdio_wq");
+	if (!priv->ks_wlan_hw.ks7010sdio_wq) {
+		DPRINTK(1, "create_workqueue failed !!\n");
+		goto error_free_read_buf;
+	}
+
+	INIT_DELAYED_WORK(&priv->ks_wlan_hw.rw_wq, ks7010_rw_function);
+	ks7010_card_init(priv);
+
+	ret = register_netdev(priv->net_dev);
+	if (ret)
+		goto error_free_read_buf;
+
+	return 0;
+
+ error_free_read_buf:
+	kfree(priv->ks_wlan_hw.read_buf);
+	priv->ks_wlan_hw.read_buf = NULL;
+ error_free_netdev:
+	free_netdev(priv->net_dev);
+	card->priv = NULL;
+ error_release_irq:
+	sdio_claim_host(func);
+	sdio_release_irq(func);
+ error_disable_func:
+	sdio_disable_func(func);
+ error_free_card:
+	sdio_release_host(func);
+	sdio_set_drvdata(func, NULL);
+	kfree(card);
+
+	return -ENODEV;
+}
+
+static void ks7010_sdio_remove(struct sdio_func *func)
+{
+	int ret;
+	struct ks_sdio_card *card;
+	struct ks_wlan_private *priv;
+	struct net_device *netdev;
+	DPRINTK(1, "ks7010_sdio_remove()\n");
+
+	card = sdio_get_drvdata(func);
+
+	if (card == NULL)
+		return;
+
+	DPRINTK(1, "priv = card->priv\n");
+	priv = card->priv;
+	netdev = priv->net_dev;
+	if (priv) {
+		ks_wlan_net_stop(netdev);
+		DPRINTK(1, "ks_wlan_net_stop\n");
+
+		/* interrupt disable */
+		sdio_claim_host(func);
+		sdio_writeb(func, 0, INT_ENABLE, &ret);
+		sdio_writeb(func, 0xff, INT_PENDING, &ret);
+		sdio_release_host(func);
+		DPRINTK(1, "interrupt disable\n");
+
+		/* send stop request to MAC */
+		{
+			struct hostif_stop_request_t *pp;
+			pp = (struct hostif_stop_request_t *)
+			    kzalloc(hif_align_size(sizeof(*pp)), GFP_KERNEL);
+			if (pp == NULL) {
+				DPRINTK(3, "allocate memory failed..\n");
+				return;	/* to do goto ni suru */
+			}
+			pp->header.size =
+			    cpu_to_le16((uint16_t)
+					(sizeof(*pp) -
+					 sizeof(pp->header.size)));
+			pp->header.event = cpu_to_le16((uint16_t) HIF_STOP_REQ);
+
+			sdio_claim_host(func);
+			write_to_device(priv, (unsigned char *)pp,
+					hif_align_size(sizeof(*pp)));
+			sdio_release_host(func);
+			kfree(pp);
+		}
+		DPRINTK(1, "STOP Req\n");
+
+		if (priv->ks_wlan_hw.ks7010sdio_wq) {
+			flush_workqueue(priv->ks_wlan_hw.ks7010sdio_wq);
+			destroy_workqueue(priv->ks_wlan_hw.ks7010sdio_wq);
+		}
+		DPRINTK(1,
+			"destroy_workqueue(priv->ks_wlan_hw.ks7010sdio_wq);\n");
+
+		hostif_exit(priv);
+		DPRINTK(1, "hostif_exit\n");
+
+		unregister_netdev(netdev);
+
+		trx_device_exit(priv);
+		if (priv->ks_wlan_hw.read_buf) {
+			kfree(priv->ks_wlan_hw.read_buf);
+		}
+		free_netdev(priv->net_dev);
+		card->priv = NULL;
+	}
+
+	sdio_claim_host(func);
+	sdio_release_irq(func);
+	DPRINTK(1, "sdio_release_irq()\n");
+	sdio_disable_func(func);
+	DPRINTK(1, "sdio_disable_func()\n");
+	sdio_release_host(func);
+
+	sdio_set_drvdata(func, NULL);
+
+	kfree(card);
+	DPRINTK(1, "kfree()\n");
+
+	DPRINTK(5, " Bye !!\n");
+	return;
+}
+
+static struct sdio_driver ks7010_sdio_driver = {
+	.name = "ks7010_sdio",
+	.id_table = ks7010_sdio_ids,
+	.probe = ks7010_sdio_probe,
+	.remove = ks7010_sdio_remove,
+};
+
+module_driver(ks7010_sdio_driver, sdio_register_driver, sdio_unregister_driver);
+MODULE_AUTHOR("Sang Engineering, Qi-Hardware, KeyStream");
+MODULE_DESCRIPTION("Driver for KeyStream KS7010 based SDIO cards");
+MODULE_LICENSE("GPL v2");
+MODULE_FIRMWARE(ROM_FILE);
diff --git a/drivers/staging/ks7010/ks7010_sdio.h b/drivers/staging/ks7010/ks7010_sdio.h
new file mode 100644
index 000000000000..c72064b48bd8
--- /dev/null
+++ b/drivers/staging/ks7010/ks7010_sdio.h
@@ -0,0 +1,147 @@
+/*
+ *   Driver for KeyStream, KS7010 based SDIO cards. 
+ *
+ *   Copyright (C) 2006-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+#ifndef _KS7010_SDIO_H
+#define _KS7010_SDIO_H
+
+#ifdef	DEVICE_ALIGNMENT
+#undef	DEVICE_ALIGNMENT
+#endif
+#define DEVICE_ALIGNMENT 32
+
+/*  SDIO KeyStream vendor and device */
+#define SDIO_VENDOR_ID_KS_CODE_A	0x005b
+#define SDIO_VENDOR_ID_KS_CODE_B	0x0023
+/* Older sources suggest earlier versions were named 7910 or 79xx */
+#define SDIO_DEVICE_ID_KS_7010		0x7910
+
+/* Read Status Register */
+#define READ_STATUS		0x000000
+#define READ_STATUS_BUSY	0
+#define READ_STATUS_IDLE	1
+
+/* Read Index Register */
+#define READ_INDEX		0x000004
+
+/* Read Data Size Register */
+#define READ_DATA_SIZE		0x000008
+
+/* Write Status Register */
+#define WRITE_STATUS		0x00000C
+#define WRITE_STATUS_BUSY	0
+#define WRITE_STATUS_IDLE	1
+
+/* Write Index Register */
+#define WRITE_INDEX		0x000010
+
+/* Write Status/Read Data Size Register 
+ * for network packet (less than 2048 bytes data)
+ */
+#define WSTATUS_RSIZE		0x000014
+#define WSTATUS_MASK		0x80	/* Write Status Register value */
+#define RSIZE_MASK		0x7F	/* Read Data Size Register value [10:4] */
+
+/* ARM to SD interrupt Enable */
+#define INT_ENABLE		0x000020
+/* ARM to SD interrupt Pending */
+#define INT_PENDING		0x000024
+
+#define INT_GCR_B		(1<<7)
+#define INT_GCR_A		(1<<6)
+#define INT_WRITE_STATUS	(1<<5)
+#define INT_WRITE_INDEX		(1<<4)
+#define INT_WRITE_SIZE		(1<<3)
+#define INT_READ_STATUS		(1<<2)
+#define INT_READ_INDEX		(1<<1)
+#define INT_READ_SIZE		(1<<0)
+
+/* General Communication Register A */
+#define GCR_A			0x000028
+#define GCR_A_INIT		0
+#define GCR_A_REMAP		1
+#define GCR_A_RUN		2
+
+/* General Communication Register B */
+#define GCR_B			0x00002C
+#define GCR_B_ACTIVE		0
+#define GCR_B_DOZE		1
+
+/* Wakeup Register */
+/* #define WAKEUP			0x008104 */
+/* #define WAKEUP_REQ		0x00 */
+#define WAKEUP			0x008018
+#define WAKEUP_REQ		0x5a
+
+/* AHB Data Window  0x010000-0x01FFFF */
+#define DATA_WINDOW		0x010000
+#define WINDOW_SIZE		64*1024
+
+#define KS7010_IRAM_ADDRESS	0x06000000
+
+/* 
+ * struct define
+ */
+struct hw_info_t {
+	struct ks_sdio_card *sdio_card;
+	struct completion ks7010_sdio_wait;
+	struct workqueue_struct *ks7010sdio_wq;
+	struct delayed_work rw_wq;
+	unsigned char *read_buf;
+	struct tasklet_struct rx_bh_task;
+};
+
+struct ks_sdio_packet {
+	struct ks_sdio_packet *next;
+	u16 nb;
+	u8 buffer[0] __attribute__ ((aligned(4)));
+};
+
+struct ks_sdio_card {
+	struct sdio_func *func;
+	struct ks_wlan_private *priv;
+	spinlock_t lock;
+};
+
+/* Tx Device struct */
+#define	TX_DEVICE_BUFF_SIZE	1024
+
+struct tx_device_buffer {
+	unsigned char *sendp;	/* pointer of send req data */
+	unsigned int size;
+	void (*complete_handler) (void *arg1, void *arg2);
+	void *arg1;
+	void *arg2;
+};
+
+struct tx_device {
+	struct tx_device_buffer tx_dev_buff[TX_DEVICE_BUFF_SIZE];
+	unsigned int qhead;	/* tx buffer queue first pointer */
+	unsigned int qtail;	/* tx buffer queue last pointer */
+	spinlock_t tx_dev_lock;
+};
+
+/* Rx Device struct */
+#define	RX_DATA_SIZE	(2 + 2 + 2347 + 1)
+#define	RX_DEVICE_BUFF_SIZE	32
+
+struct rx_device_buffer {
+	unsigned char data[RX_DATA_SIZE];
+	unsigned int size;
+};
+
+struct rx_device {
+	struct rx_device_buffer rx_dev_buff[RX_DEVICE_BUFF_SIZE];
+	unsigned int qhead;	/* rx buffer queue first pointer */
+	unsigned int qtail;	/* rx buffer queue last pointer */
+	spinlock_t rx_dev_lock;
+};
+#define	ROM_FILE "ks7010sd.rom"
+
+#endif /* _KS7010_SDIO_H */
diff --git a/drivers/staging/ks7010/ks_hostif.c b/drivers/staging/ks7010/ks_hostif.c
new file mode 100644
index 000000000000..a8822fe2bd60
--- /dev/null
+++ b/drivers/staging/ks7010/ks_hostif.c
@@ -0,0 +1,2760 @@
+/*
+ *   Driver for KeyStream wireless LAN cards.
+ *
+ *   Copyright (C) 2005-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+
+#include "ks_wlan.h"
+#include "ks_hostif.h"
+#include "eap_packet.h"
+#include "michael_mic.h"
+
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+
+/* Include Wireless Extension definition and check version */
+#include <net/iw_handler.h>	/* New driver API */
+
+extern int ks_wlan_hw_tx(struct ks_wlan_private *priv, void *p,
+			 unsigned long size,
+			 void (*complete_handler) (void *arg1, void *arg2),
+			 void *arg1, void *arg2);
+extern void send_packet_complete(void *, void *);
+
+extern void ks_wlan_hw_wakeup_request(struct ks_wlan_private *priv);
+extern int ks_wlan_hw_power_save(struct ks_wlan_private *priv);
+
+/* macro */
+#define inc_smeqhead(priv) \
+        ( priv->sme_i.qhead = (priv->sme_i.qhead + 1) % SME_EVENT_BUFF_SIZE )
+#define inc_smeqtail(priv) \
+        ( priv->sme_i.qtail = (priv->sme_i.qtail + 1) % SME_EVENT_BUFF_SIZE )
+#define cnt_smeqbody(priv) \
+        (((priv->sme_i.qtail + SME_EVENT_BUFF_SIZE) - (priv->sme_i.qhead)) % SME_EVENT_BUFF_SIZE )
+
+#define KS_WLAN_MEM_FLAG (GFP_ATOMIC)
+
+static
+inline u8 get_BYTE(struct ks_wlan_private *priv)
+{
+	u8 data;
+	data = *(priv->rxp)++;
+	/* length check in advance ! */
+	--(priv->rx_size);
+	return data;
+}
+
+static
+inline u16 get_WORD(struct ks_wlan_private *priv)
+{
+	u16 data;
+	data = (get_BYTE(priv) & 0xff);
+	data |= ((get_BYTE(priv) << 8) & 0xff00);
+	return data;
+}
+
+static
+inline u32 get_DWORD(struct ks_wlan_private *priv)
+{
+	u32 data;
+	data = (get_BYTE(priv) & 0xff);
+	data |= ((get_BYTE(priv) << 8) & 0x0000ff00);
+	data |= ((get_BYTE(priv) << 16) & 0x00ff0000);
+	data |= ((get_BYTE(priv) << 24) & 0xff000000);
+	return data;
+}
+
+void ks_wlan_hw_wakeup_task(struct work_struct *work)
+{
+	struct ks_wlan_private *priv =
+	    container_of(work, struct ks_wlan_private, ks_wlan_wakeup_task);
+	int ps_status = atomic_read(&priv->psstatus.status);
+
+	if (ps_status == PS_SNOOZE) {
+		ks_wlan_hw_wakeup_request(priv);
+		if (!wait_for_completion_interruptible_timeout(&priv->psstatus.wakeup_wait, HZ / 50)) {	/* 20ms timeout */
+			DPRINTK(1, "wake up timeout !!!\n");
+			schedule_work(&priv->ks_wlan_wakeup_task);
+			return;
+		}
+	} else {
+		DPRINTK(1, "ps_status=%d\n", ps_status);
+	}
+
+	/* power save */
+	if (atomic_read(&priv->sme_task.count) > 0) {
+		DPRINTK(4, "sme task enable.\n");
+		tasklet_enable(&priv->sme_task);
+	}
+}
+
+static
+int ks_wlan_do_power_save(struct ks_wlan_private *priv)
+{
+	int rc = 0;
+
+	DPRINTK(4, "psstatus.status=%d\n", atomic_read(&priv->psstatus.status));
+
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+		hostif_sme_enqueue(priv, SME_POW_MNGMT_REQUEST);
+	} else {
+		priv->dev_state = DEVICE_STATE_READY;
+	}
+	return rc;
+}
+
+static
+int get_current_ap(struct ks_wlan_private *priv, struct link_ap_info_t *ap_info)
+{
+	struct local_ap_t *ap;
+	union iwreq_data wrqu;
+	struct net_device *netdev = priv->net_dev;
+	int rc = 0;
+
+	DPRINTK(3, "\n");
+	ap = &(priv->current_ap);
+
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == DISCONNECT_STATUS) {
+		memset(ap, 0, sizeof(struct local_ap_t));
+		return 1;
+	}
+
+	/* bssid */
+	memcpy(&(ap->bssid[0]), &(ap_info->bssid[0]), ETH_ALEN);
+	/* essid */
+	memcpy(&(ap->ssid.body[0]), &(priv->reg.ssid.body[0]),
+	       priv->reg.ssid.size);
+	ap->ssid.size = priv->reg.ssid.size;
+	/* rate_set */
+	memcpy(&(ap->rate_set.body[0]), &(ap_info->rate_set.body[0]),
+	       ap_info->rate_set.size);
+	ap->rate_set.size = ap_info->rate_set.size;
+	if (ap_info->ext_rate_set.size) {
+		/* rate_set */
+		memcpy(&(ap->rate_set.body[ap->rate_set.size]),
+		       &(ap_info->ext_rate_set.body[0]),
+		       ap_info->ext_rate_set.size);
+		ap->rate_set.size += ap_info->ext_rate_set.size;
+	}
+	/* channel */
+	ap->channel = ap_info->ds_parameter.channel;
+	/* rssi */
+	ap->rssi = ap_info->rssi;
+	/* sq */
+	ap->sq = ap_info->sq;
+	/* noise */
+	ap->noise = ap_info->noise;
+	/* capability */
+	ap->capability = ap_info->capability;
+	/* rsn */
+	if ((ap_info->rsn_mode & RSN_MODE_WPA2)
+	    && (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)) {
+		ap->rsn_ie.id = 0x30;
+		if (ap_info->rsn.size <= RSN_IE_BODY_MAX) {
+			ap->rsn_ie.size = ap_info->rsn.size;
+			memcpy(&(ap->rsn_ie.body[0]), &(ap_info->rsn.body[0]),
+			       ap_info->rsn.size);
+		} else {
+			ap->rsn_ie.size = RSN_IE_BODY_MAX;
+			memcpy(&(ap->rsn_ie.body[0]), &(ap_info->rsn.body[0]),
+			       RSN_IE_BODY_MAX);
+		}
+	} else if ((ap_info->rsn_mode & RSN_MODE_WPA)
+		   && (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA)) {
+		ap->wpa_ie.id = 0xdd;
+		if (ap_info->rsn.size <= RSN_IE_BODY_MAX) {
+			ap->wpa_ie.size = ap_info->rsn.size;
+			memcpy(&(ap->wpa_ie.body[0]), &(ap_info->rsn.body[0]),
+			       ap_info->rsn.size);
+		} else {
+			ap->wpa_ie.size = RSN_IE_BODY_MAX;
+			memcpy(&(ap->wpa_ie.body[0]), &(ap_info->rsn.body[0]),
+			       RSN_IE_BODY_MAX);
+		}
+	} else {
+		ap->rsn_ie.id = 0;
+		ap->rsn_ie.size = 0;
+		ap->wpa_ie.id = 0;
+		ap->wpa_ie.size = 0;
+	}
+
+	wrqu.data.length = 0;
+	wrqu.data.flags = 0;
+	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+		memcpy(wrqu.ap_addr.sa_data,
+		       &(priv->current_ap.bssid[0]), ETH_ALEN);
+		DPRINTK(3,
+			"IWEVENT: connect bssid=%02x:%02x:%02x:%02x:%02x:%02x\n",
+			(unsigned char)wrqu.ap_addr.sa_data[0],
+			(unsigned char)wrqu.ap_addr.sa_data[1],
+			(unsigned char)wrqu.ap_addr.sa_data[2],
+			(unsigned char)wrqu.ap_addr.sa_data[3],
+			(unsigned char)wrqu.ap_addr.sa_data[4],
+			(unsigned char)wrqu.ap_addr.sa_data[5]);
+		wireless_send_event(netdev, SIOCGIWAP, &wrqu, NULL);
+	}
+	DPRINTK(4, "\n    Link AP\n");
+	DPRINTK(4, "    bssid=%02X:%02X:%02X:%02X:%02X:%02X\n \
+   essid=%s\n    rate_set=%02X,%02X,%02X,%02X,%02X,%02X,%02X,%02X\n    channel=%d\n \
+   rssi=%d\n    sq=%d\n    capability=%04X\n", ap->bssid[0], ap->bssid[1], ap->bssid[2], ap->bssid[3], ap->bssid[4], ap->bssid[5], &(ap->ssid.body[0]), ap->rate_set.body[0], ap->rate_set.body[1], ap->rate_set.body[2], ap->rate_set.body[3], ap->rate_set.body[4], ap->rate_set.body[5], ap->rate_set.body[6], ap->rate_set.body[7], ap->channel, ap->rssi, ap->sq, ap->capability);
+	DPRINTK(4, "\n    Link AP\n    rsn.mode=%d\n    rsn.size=%d\n",
+		ap_info->rsn_mode, ap_info->rsn.size);
+	DPRINTK(4, "\n    ext_rate_set_size=%d\n    rate_set_size=%d\n",
+		ap_info->ext_rate_set.size, ap_info->rate_set.size);
+
+	return rc;
+}
+
+static
+int get_ap_information(struct ks_wlan_private *priv, struct ap_info_t *ap_info,
+		       struct local_ap_t *ap)
+{
+	unsigned char *bp;
+	int bsize, offset;
+	int rc = 0;
+
+	DPRINTK(3, "\n");
+	memset(ap, 0, sizeof(struct local_ap_t));
+
+	/* bssid */
+	memcpy(&(ap->bssid[0]), &(ap_info->bssid[0]), ETH_ALEN);
+	/* rssi */
+	ap->rssi = ap_info->rssi;
+	/* sq */
+	ap->sq = ap_info->sq;
+	/* noise */
+	ap->noise = ap_info->noise;
+	/* capability */
+	ap->capability = ap_info->capability;
+	/* channel */
+	ap->channel = ap_info->ch_info;
+
+	bp = &(ap_info->body[0]);
+	bsize = ap_info->body_size;
+	offset = 0;
+
+	while (bsize > offset) {
+		/* DPRINTK(4, "Element ID=%d \n",*bp); */
+		switch (*bp) {
+		case 0:	/* ssid */
+			if (*(bp + 1) <= SSID_MAX_SIZE) {
+				ap->ssid.size = *(bp + 1);
+			} else {
+				DPRINTK(1, "size over :: ssid size=%d \n",
+					*(bp + 1));
+				ap->ssid.size = SSID_MAX_SIZE;
+			}
+			memcpy(&(ap->ssid.body[0]), bp + 2, ap->ssid.size);
+			break;
+		case 1:	/* rate */
+		case 50:	/* ext rate */
+			if ((*(bp + 1) + ap->rate_set.size) <=
+			    RATE_SET_MAX_SIZE) {
+				memcpy(&(ap->rate_set.body[ap->rate_set.size]),
+				       bp + 2, *(bp + 1));
+				ap->rate_set.size += *(bp + 1);
+			} else {
+				DPRINTK(1, "size over :: rate size=%d \n",
+					(*(bp + 1) + ap->rate_set.size));
+				memcpy(&(ap->rate_set.body[ap->rate_set.size]),
+				       bp + 2,
+				       RATE_SET_MAX_SIZE - ap->rate_set.size);
+				ap->rate_set.size +=
+				    (RATE_SET_MAX_SIZE - ap->rate_set.size);
+			}
+			break;
+		case 3:	/* DS parameter */
+			break;
+		case 48:	/* RSN(WPA2) */
+			ap->rsn_ie.id = *bp;
+			if (*(bp + 1) <= RSN_IE_BODY_MAX) {
+				ap->rsn_ie.size = *(bp + 1);
+			} else {
+				DPRINTK(1, "size over :: rsn size=%d \n",
+					*(bp + 1));
+				ap->rsn_ie.size = RSN_IE_BODY_MAX;
+			}
+			memcpy(&(ap->rsn_ie.body[0]), bp + 2, ap->rsn_ie.size);
+			break;
+		case 221:	/* WPA */
+			if (!memcmp(bp + 2, "\x00\x50\xf2\x01", 4)) {	/* WPA OUI check */
+				ap->wpa_ie.id = *bp;
+				if (*(bp + 1) <= RSN_IE_BODY_MAX) {
+					ap->wpa_ie.size = *(bp + 1);
+				} else {
+					DPRINTK(1,
+						"size over :: wpa size=%d \n",
+						*(bp + 1));
+					ap->wpa_ie.size = RSN_IE_BODY_MAX;
+				}
+				memcpy(&(ap->wpa_ie.body[0]), bp + 2,
+				       ap->wpa_ie.size);
+			}
+			break;
+
+		case 2:	/* FH parameter */
+		case 4:	/* CF parameter */
+		case 5:	/* TIM */
+		case 6:	/* IBSS parameter */
+		case 7:	/* Country */
+		case 42:	/* ERP information */
+		case 47:	/* Reserve ID 47 Broadcom AP */
+			break;
+		default:
+			DPRINTK(4, "unknown Element ID=%d \n", *bp);
+			break;
+		}
+		offset += 2;	/* id & size field */
+		offset += *(bp + 1);	/* +size offset */
+		bp += (*(bp + 1) + 2);	/* pointer update */
+	}
+
+	return rc;
+}
+
+static
+void hostif_data_indication(struct ks_wlan_private *priv)
+{
+	unsigned int rx_ind_size;	/* indicate data size */
+	struct sk_buff *skb;
+	unsigned short auth_type;
+	unsigned char temp[256];
+
+	unsigned char RecvMIC[8];
+	char buf[128];
+	struct ether_hdr *eth_hdr;
+	unsigned short eth_proto;
+	unsigned long now;
+	struct mic_failure_t *mic_failure;
+	struct ieee802_1x_hdr *aa1x_hdr;
+	struct wpa_eapol_key *eap_key;
+	struct michel_mic_t michel_mic;
+	union iwreq_data wrqu;
+
+	DPRINTK(3, "\n");
+
+	/* min length check */
+	if (priv->rx_size <= ETH_HLEN) {
+		DPRINTK(3, "rx_size = %d\n", priv->rx_size);
+		priv->nstats.rx_errors++;
+		return;
+	}
+
+	auth_type = get_WORD(priv);	/* AuthType */
+	get_WORD(priv);	/* Reserve Area */
+
+	eth_hdr = (struct ether_hdr *)(priv->rxp);
+	eth_proto = ntohs(eth_hdr->h_proto);
+	DPRINTK(3, "ether protocol = %04X\n", eth_proto);
+
+	/* source address check */
+	if (!memcmp(&priv->eth_addr[0], eth_hdr->h_source, ETH_ALEN)) {
+		DPRINTK(1, "invalid : source is own mac address !!\n");
+		DPRINTK(1,
+			"eth_hdrernet->h_dest=%02X:%02X:%02X:%02X:%02X:%02X\n",
+			eth_hdr->h_source[0], eth_hdr->h_source[1],
+			eth_hdr->h_source[2], eth_hdr->h_source[3],
+			eth_hdr->h_source[4], eth_hdr->h_source[5]);
+		priv->nstats.rx_errors++;
+		return;
+	}
+
+	/*  for WPA */
+	if (auth_type != TYPE_DATA && priv->wpa.rsn_enabled) {
+		if (memcmp(&eth_hdr->h_source[0], &priv->eth_addr[0], ETH_ALEN)) {	/* source address check */
+			if (eth_hdr->h_dest_snap != eth_hdr->h_source_snap) {
+				DPRINTK(1, "invalid data format\n");
+				priv->nstats.rx_errors++;
+				return;
+			}
+			if (((auth_type == TYPE_PMK1
+			      && priv->wpa.pairwise_suite ==
+			      IW_AUTH_CIPHER_TKIP) || (auth_type == TYPE_GMK1
+						       && priv->wpa.
+						       group_suite ==
+						       IW_AUTH_CIPHER_TKIP)
+			     || (auth_type == TYPE_GMK2
+				 && priv->wpa.group_suite ==
+				 IW_AUTH_CIPHER_TKIP))
+			    && priv->wpa.key[auth_type - 1].key_len) {
+				DPRINTK(4, "TKIP: protocol=%04X: size=%u\n",
+					eth_proto, priv->rx_size);
+				/* MIC save */
+				memcpy(&RecvMIC[0],
+				       (priv->rxp) + ((priv->rx_size) - 8), 8);
+				priv->rx_size = priv->rx_size - 8;
+				if (auth_type > 0 && auth_type < 4) {	/* auth_type check */
+					MichaelMICFunction(&michel_mic, (uint8_t *) priv->wpa.key[auth_type - 1].rx_mic_key, (uint8_t *) priv->rxp, (int)priv->rx_size, (uint8_t) 0,	/* priority */
+							   (uint8_t *)
+							   michel_mic.Result);
+				}
+				if (memcmp(michel_mic.Result, RecvMIC, 8)) {
+					now = jiffies;
+					mic_failure = &priv->wpa.mic_failure;
+					/* MIC FAILURE */
+					if (mic_failure->last_failure_time &&
+					    (now -
+					     mic_failure->last_failure_time) /
+					    HZ >= 60) {
+						mic_failure->failure = 0;
+					}
+					DPRINTK(4, "MIC FAILURE \n");
+					if (mic_failure->failure == 0) {
+						mic_failure->failure = 1;
+						mic_failure->counter = 0;
+					} else if (mic_failure->failure == 1) {
+						mic_failure->failure = 2;
+						mic_failure->counter =
+						    (uint16_t) ((now -
+								 mic_failure->
+								 last_failure_time)
+								/ HZ);
+						if (!mic_failure->counter)	/* mic_failure counter value range 1-60 */
+							mic_failure->counter =
+							    1;
+					}
+					priv->wpa.mic_failure.
+					    last_failure_time = now;
+					/*  needed parameters: count, keyid, key type, TSC */
+					sprintf(buf,
+						"MLME-MICHAELMICFAILURE.indication(keyid=%d %scast addr="
+						"%02x:%02x:%02x:%02x:%02x:%02x)",
+						auth_type - 1,
+						eth_hdr->
+						h_dest[0] & 0x01 ? "broad" :
+						"uni", eth_hdr->h_source[0],
+						eth_hdr->h_source[1],
+						eth_hdr->h_source[2],
+						eth_hdr->h_source[3],
+						eth_hdr->h_source[4],
+						eth_hdr->h_source[5]);
+					memset(&wrqu, 0, sizeof(wrqu));
+					wrqu.data.length = strlen(buf);
+					DPRINTK(4,
+						"IWEVENT:MICHAELMICFAILURE\n");
+					wireless_send_event(priv->net_dev,
+							    IWEVCUSTOM, &wrqu,
+							    buf);
+					return;
+				}
+			}
+		}
+	}
+
+	if ((priv->connect_status & FORCE_DISCONNECT) ||
+	    priv->wpa.mic_failure.failure == 2) {
+		return;
+	}
+
+	/* check 13th byte at rx data */
+	switch (*(priv->rxp + 12)) {
+	case 0xAA:	/* SNAP */
+		rx_ind_size = priv->rx_size - 6;
+		skb = dev_alloc_skb(rx_ind_size);
+		DPRINTK(4, "SNAP, rx_ind_size = %d\n", rx_ind_size);
+
+		if (skb) {
+			memcpy(skb_put(skb, 12), priv->rxp, 12);	/* 8802/FDDI MAC copy */
+			/* (SNAP+UI..) skip */
+			memcpy(skb_put(skb, rx_ind_size - 12), priv->rxp + 18, rx_ind_size - 12);	/* copy after Type */
+
+			aa1x_hdr = (struct ieee802_1x_hdr *)(priv->rxp + 20);
+			if (aa1x_hdr->type == IEEE802_1X_TYPE_EAPOL_KEY
+			    && priv->wpa.rsn_enabled) {
+				eap_key =
+				    (struct wpa_eapol_key *)(aa1x_hdr + 1);
+				atomic_set(&priv->psstatus.snooze_guard, 1);
+			}
+
+			/* rx indication */
+			skb->dev = priv->net_dev;
+			skb->protocol = eth_type_trans(skb, skb->dev);
+			priv->nstats.rx_packets++;
+			priv->nstats.rx_bytes += rx_ind_size;
+			skb->dev->last_rx = jiffies;
+			netif_rx(skb);
+		} else {
+			printk(KERN_WARNING
+			       "%s: Memory squeeze, dropping packet.\n",
+			       skb->dev->name);
+			priv->nstats.rx_dropped++;
+		}
+		break;
+	case 0xF0:	/* NETBEUI/NetBIOS */
+		rx_ind_size = (priv->rx_size + 2);
+		skb = dev_alloc_skb(rx_ind_size);
+		DPRINTK(3, "NETBEUI/NetBIOS rx_ind_size=%d\n", rx_ind_size);
+
+		if (skb) {
+			memcpy(skb_put(skb, 12), priv->rxp, 12);	/* 8802/FDDI MAC copy */
+
+			temp[0] = (((rx_ind_size - 12) >> 8) & 0xff);	/* NETBEUI size add */
+			temp[1] = ((rx_ind_size - 12) & 0xff);
+			memcpy(skb_put(skb, 2), temp, 2);
+
+			memcpy(skb_put(skb, rx_ind_size - 14), priv->rxp + 12, rx_ind_size - 14);	/* copy after Type */
+
+			aa1x_hdr = (struct ieee802_1x_hdr *)(priv->rxp + 14);
+			if (aa1x_hdr->type == IEEE802_1X_TYPE_EAPOL_KEY
+			    && priv->wpa.rsn_enabled) {
+				eap_key =
+				    (struct wpa_eapol_key *)(aa1x_hdr + 1);
+				atomic_set(&priv->psstatus.snooze_guard, 1);
+			}
+
+			/* rx indication */
+			skb->dev = priv->net_dev;
+			skb->protocol = eth_type_trans(skb, skb->dev);
+			priv->nstats.rx_packets++;
+			priv->nstats.rx_bytes += rx_ind_size;
+			skb->dev->last_rx = jiffies;
+			netif_rx(skb);
+		} else {
+			printk(KERN_WARNING
+			       "%s: Memory squeeze, dropping packet.\n",
+			       skb->dev->name);
+			priv->nstats.rx_dropped++;
+		}
+		break;
+	default:	/* other rx data */
+		DPRINTK(2, "invalid data format\n");
+		priv->nstats.rx_errors++;
+	}
+}
+
+static
+void hostif_mib_get_confirm(struct ks_wlan_private *priv)
+{
+	struct net_device *dev = priv->net_dev;
+	uint32_t mib_status;
+	uint32_t mib_attribute;
+	uint16_t mib_val_size;
+	uint16_t mib_val_type;
+
+	DPRINTK(3, "\n");
+
+	mib_status = get_DWORD(priv);	/* MIB status */
+	mib_attribute = get_DWORD(priv);	/* MIB atttibute */
+	mib_val_size = get_WORD(priv);	/* MIB value size */
+	mib_val_type = get_WORD(priv);	/* MIB value type */
+
+	if (mib_status != 0) {
+		/* in case of error */
+		DPRINTK(1, "attribute=%08X, status=%08X\n", mib_attribute,
+			mib_status);
+		return;
+	}
+
+	switch (mib_attribute) {
+	case DOT11_MAC_ADDRESS:
+		/* MAC address */
+		DPRINTK(3, " mib_attribute=DOT11_MAC_ADDRESS\n");
+		hostif_sme_enqueue(priv, SME_GET_MAC_ADDRESS);
+		memcpy(priv->eth_addr, priv->rxp, ETH_ALEN);
+		priv->mac_address_valid = 1;
+		dev->dev_addr[0] = priv->eth_addr[0];
+		dev->dev_addr[1] = priv->eth_addr[1];
+		dev->dev_addr[2] = priv->eth_addr[2];
+		dev->dev_addr[3] = priv->eth_addr[3];
+		dev->dev_addr[4] = priv->eth_addr[4];
+		dev->dev_addr[5] = priv->eth_addr[5];
+		dev->dev_addr[6] = 0x00;
+		dev->dev_addr[7] = 0x00;
+		printk(KERN_INFO
+		       "ks_wlan: MAC ADDRESS = %02x:%02x:%02x:%02x:%02x:%02x\n",
+		       priv->eth_addr[0], priv->eth_addr[1], priv->eth_addr[2],
+		       priv->eth_addr[3], priv->eth_addr[4], priv->eth_addr[5]);
+		break;
+	case DOT11_PRODUCT_VERSION:
+		/* firmware version */
+		DPRINTK(3, " mib_attribute=DOT11_PRODUCT_VERSION\n");
+		priv->version_size = priv->rx_size;
+		memcpy(priv->firmware_version, priv->rxp, priv->rx_size);
+		priv->firmware_version[priv->rx_size] = '\0';
+		printk(KERN_INFO "ks_wlan: firmware ver. = %s\n",
+		       priv->firmware_version);
+		hostif_sme_enqueue(priv, SME_GET_PRODUCT_VERSION);
+		/* wake_up_interruptible_all(&priv->confirm_wait); */
+		complete(&priv->confirm_wait);
+		break;
+	case LOCAL_GAIN:
+		memcpy(&priv->gain, priv->rxp, sizeof(priv->gain));
+		DPRINTK(3, "TxMode=%d, RxMode=%d, TxGain=%d, RxGain=%d\n",
+			priv->gain.TxMode, priv->gain.RxMode, priv->gain.TxGain,
+			priv->gain.RxGain);
+		break;
+	case LOCAL_EEPROM_SUM:
+		memcpy(&priv->eeprom_sum, priv->rxp, sizeof(priv->eeprom_sum));
+		DPRINTK(1, "eeprom_sum.type=%x, eeprom_sum.result=%x\n",
+			priv->eeprom_sum.type, priv->eeprom_sum.result);
+		if (priv->eeprom_sum.type == 0) {
+			priv->eeprom_checksum = EEPROM_CHECKSUM_NONE;
+		} else if (priv->eeprom_sum.type == 1) {
+			if (priv->eeprom_sum.result == 0) {
+				priv->eeprom_checksum = EEPROM_NG;
+				printk("LOCAL_EEPROM_SUM NG\n");
+			} else if (priv->eeprom_sum.result == 1) {
+				priv->eeprom_checksum = EEPROM_OK;
+			}
+		} else {
+			printk("LOCAL_EEPROM_SUM error!\n");
+		}
+		break;
+	default:
+		DPRINTK(1, "mib_attribute=%08x\n", (unsigned int)mib_attribute);
+		break;
+	}
+}
+
+static
+void hostif_mib_set_confirm(struct ks_wlan_private *priv)
+{
+	uint32_t mib_status;	/* +04 MIB Status */
+	uint32_t mib_attribute;	/* +08 MIB attribute */
+
+	DPRINTK(3, "\n");
+
+	mib_status = get_DWORD(priv);	/* MIB Status */
+	mib_attribute = get_DWORD(priv);	/* MIB attribute */
+
+	if (mib_status != 0) {
+		/* in case of error */
+		DPRINTK(1, "error :: attribute=%08X, status=%08X\n",
+			mib_attribute, mib_status);
+	}
+
+	switch (mib_attribute) {
+	case DOT11_RTS_THRESHOLD:
+		hostif_sme_enqueue(priv, SME_RTS_THRESHOLD_CONFIRM);
+		break;
+	case DOT11_FRAGMENTATION_THRESHOLD:
+		hostif_sme_enqueue(priv, SME_FRAGMENTATION_THRESHOLD_CONFIRM);
+		break;
+	case DOT11_WEP_DEFAULT_KEY_ID:
+		if (!priv->wpa.wpa_enabled)
+			hostif_sme_enqueue(priv, SME_WEP_INDEX_CONFIRM);
+		break;
+	case DOT11_WEP_DEFAULT_KEY_VALUE1:
+		DPRINTK(2, "DOT11_WEP_DEFAULT_KEY_VALUE1:mib_status=%d\n",
+			(int)mib_status);
+		if (priv->wpa.rsn_enabled)
+			hostif_sme_enqueue(priv, SME_SET_PMK_TSC);
+		else
+			hostif_sme_enqueue(priv, SME_WEP_KEY1_CONFIRM);
+		break;
+	case DOT11_WEP_DEFAULT_KEY_VALUE2:
+		DPRINTK(2, "DOT11_WEP_DEFAULT_KEY_VALUE2:mib_status=%d\n",
+			(int)mib_status);
+		if (priv->wpa.rsn_enabled)
+			hostif_sme_enqueue(priv, SME_SET_GMK1_TSC);
+		else
+			hostif_sme_enqueue(priv, SME_WEP_KEY2_CONFIRM);
+		break;
+	case DOT11_WEP_DEFAULT_KEY_VALUE3:
+		DPRINTK(2, "DOT11_WEP_DEFAULT_KEY_VALUE3:mib_status=%d\n",
+			(int)mib_status);
+		if (priv->wpa.rsn_enabled)
+			hostif_sme_enqueue(priv, SME_SET_GMK2_TSC);
+		else
+			hostif_sme_enqueue(priv, SME_WEP_KEY3_CONFIRM);
+		break;
+	case DOT11_WEP_DEFAULT_KEY_VALUE4:
+		DPRINTK(2, "DOT11_WEP_DEFAULT_KEY_VALUE4:mib_status=%d\n",
+			(int)mib_status);
+		if (!priv->wpa.rsn_enabled)
+			hostif_sme_enqueue(priv, SME_WEP_KEY4_CONFIRM);
+		break;
+	case DOT11_PRIVACY_INVOKED:
+		if (!priv->wpa.rsn_enabled)
+			hostif_sme_enqueue(priv, SME_WEP_FLAG_CONFIRM);
+		break;
+	case DOT11_RSN_ENABLED:
+		DPRINTK(2, "DOT11_RSN_ENABLED:mib_status=%d\n",
+			(int)mib_status);
+		hostif_sme_enqueue(priv, SME_RSN_ENABLED_CONFIRM);
+		break;
+	case LOCAL_RSN_MODE:
+		hostif_sme_enqueue(priv, SME_RSN_MODE_CONFIRM);
+		break;
+	case LOCAL_MULTICAST_ADDRESS:
+		hostif_sme_enqueue(priv, SME_MULTICAST_REQUEST);
+		break;
+	case LOCAL_MULTICAST_FILTER:
+		hostif_sme_enqueue(priv, SME_MULTICAST_CONFIRM);
+		break;
+	case LOCAL_CURRENTADDRESS:
+		priv->mac_address_valid = 1;
+		break;
+	case DOT11_RSN_CONFIG_MULTICAST_CIPHER:
+		DPRINTK(2, "DOT11_RSN_CONFIG_MULTICAST_CIPHER:mib_status=%d\n",
+			(int)mib_status);
+		hostif_sme_enqueue(priv, SME_RSN_MCAST_CONFIRM);
+		break;
+	case DOT11_RSN_CONFIG_UNICAST_CIPHER:
+		DPRINTK(2, "DOT11_RSN_CONFIG_UNICAST_CIPHER:mib_status=%d\n",
+			(int)mib_status);
+		hostif_sme_enqueue(priv, SME_RSN_UCAST_CONFIRM);
+		break;
+	case DOT11_RSN_CONFIG_AUTH_SUITE:
+		DPRINTK(2, "DOT11_RSN_CONFIG_AUTH_SUITE:mib_status=%d\n",
+			(int)mib_status);
+		hostif_sme_enqueue(priv, SME_RSN_AUTH_CONFIRM);
+		break;
+	case DOT11_PMK_TSC:
+		DPRINTK(2, "DOT11_PMK_TSC:mib_status=%d\n", (int)mib_status);
+		break;
+	case DOT11_GMK1_TSC:
+		DPRINTK(2, "DOT11_GMK1_TSC:mib_status=%d\n", (int)mib_status);
+		if (atomic_read(&priv->psstatus.snooze_guard)) {
+			atomic_set(&priv->psstatus.snooze_guard, 0);
+		}
+		break;
+	case DOT11_GMK2_TSC:
+		DPRINTK(2, "DOT11_GMK2_TSC:mib_status=%d\n", (int)mib_status);
+		if (atomic_read(&priv->psstatus.snooze_guard)) {
+			atomic_set(&priv->psstatus.snooze_guard, 0);
+		}
+		break;
+	case LOCAL_PMK:
+		DPRINTK(2, "LOCAL_PMK:mib_status=%d\n", (int)mib_status);
+		break;
+	case LOCAL_GAIN:
+		DPRINTK(2, "LOCAL_GAIN:mib_status=%d\n", (int)mib_status);
+		break;
+#ifdef WPS
+	case LOCAL_WPS_ENABLE:
+		DPRINTK(2, "LOCAL_WPS_ENABLE:mib_status=%d\n", (int)mib_status);
+		break;
+	case LOCAL_WPS_PROBE_REQ:
+		DPRINTK(2, "LOCAL_WPS_PROBE_REQ:mib_status=%d\n",
+			(int)mib_status);
+		break;
+#endif /* WPS */
+	case LOCAL_REGION:
+		DPRINTK(2, "LOCAL_REGION:mib_status=%d\n", (int)mib_status);
+	default:
+		break;
+	}
+}
+
+static
+void hostif_power_mngmt_confirm(struct ks_wlan_private *priv)
+{
+	DPRINTK(3, "\n");
+
+	if (priv->reg.powermgt > POWMGT_ACTIVE_MODE &&
+	    priv->reg.operation_mode == MODE_INFRASTRUCTURE) {
+		atomic_set(&priv->psstatus.confirm_wait, 0);
+		priv->dev_state = DEVICE_STATE_SLEEP;
+		ks_wlan_hw_power_save(priv);
+	} else {
+		priv->dev_state = DEVICE_STATE_READY;
+	}
+
+}
+
+static
+void hostif_sleep_confirm(struct ks_wlan_private *priv)
+{
+	DPRINTK(3, "\n");
+
+	atomic_set(&priv->sleepstatus.doze_request, 1);
+	queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+			   &priv->ks_wlan_hw.rw_wq, 1);
+}
+
+static
+void hostif_start_confirm(struct ks_wlan_private *priv)
+{
+#ifdef  WPS
+	union iwreq_data wrqu;
+	wrqu.data.length = 0;
+	wrqu.data.flags = 0;
+	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+		memset(wrqu.ap_addr.sa_data, '\0', ETH_ALEN);
+		DPRINTK(3, "IWEVENT: disconnect\n");
+		wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL);
+	}
+#endif
+	DPRINTK(3, " scan_ind_count=%d\n", priv->scan_ind_count);
+	hostif_sme_enqueue(priv, SME_START_CONFIRM);
+}
+
+static
+void hostif_connect_indication(struct ks_wlan_private *priv)
+{
+	unsigned short connect_code;
+	unsigned int tmp = 0;
+	unsigned int old_status = priv->connect_status;
+	struct net_device *netdev = priv->net_dev;
+	union iwreq_data wrqu0;
+	connect_code = get_WORD(priv);
+
+	switch (connect_code) {
+	case RESULT_CONNECT:	/* connect */
+		DPRINTK(3, "connect :: scan_ind_count=%d\n",
+			priv->scan_ind_count);
+		if (!(priv->connect_status & FORCE_DISCONNECT))
+			netif_carrier_on(netdev);
+		tmp = FORCE_DISCONNECT & priv->connect_status;
+		priv->connect_status = tmp + CONNECT_STATUS;
+		break;
+	case RESULT_DISCONNECT:	/* disconnect */
+		DPRINTK(3, "disconnect :: scan_ind_count=%d\n",
+			priv->scan_ind_count);
+		netif_carrier_off(netdev);
+		tmp = FORCE_DISCONNECT & priv->connect_status;
+		priv->connect_status = tmp + DISCONNECT_STATUS;
+		break;
+	default:
+		DPRINTK(1, "unknown connect_code=%d :: scan_ind_count=%d\n",
+			connect_code, priv->scan_ind_count);
+		netif_carrier_off(netdev);
+		tmp = FORCE_DISCONNECT & priv->connect_status;
+		priv->connect_status = tmp + DISCONNECT_STATUS;
+		break;
+	}
+
+	get_current_ap(priv, (struct link_ap_info_t *)priv->rxp);
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS &&
+	    (old_status & CONNECT_STATUS_MASK) == DISCONNECT_STATUS) {
+		/* for power save */
+		atomic_set(&priv->psstatus.snooze_guard, 0);
+		atomic_set(&priv->psstatus.confirm_wait, 0);
+	}
+	ks_wlan_do_power_save(priv);
+
+	wrqu0.data.length = 0;
+	wrqu0.data.flags = 0;
+	wrqu0.ap_addr.sa_family = ARPHRD_ETHER;
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == DISCONNECT_STATUS &&
+	    (old_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+		memset(wrqu0.ap_addr.sa_data, '\0', ETH_ALEN);
+		DPRINTK(3, "IWEVENT: disconnect\n");
+		DPRINTK(3, "disconnect :: scan_ind_count=%d\n",
+			priv->scan_ind_count);
+		wireless_send_event(netdev, SIOCGIWAP, &wrqu0, NULL);
+	}
+	priv->scan_ind_count = 0;
+}
+
+static
+void hostif_scan_indication(struct ks_wlan_private *priv)
+{
+	int i;
+	struct ap_info_t *ap_info;
+
+	DPRINTK(3, "scan_ind_count = %d\n", priv->scan_ind_count);
+	ap_info = (struct ap_info_t *)(priv->rxp);
+
+	if (priv->scan_ind_count != 0) {
+		for (i = 0; i < priv->aplist.size; i++) {	/* bssid check */
+			if (!memcmp
+			    (&(ap_info->bssid[0]),
+			     &(priv->aplist.ap[i].bssid[0]), ETH_ALEN)) {
+				if (ap_info->frame_type ==
+				    FRAME_TYPE_PROBE_RESP)
+					get_ap_information(priv, ap_info,
+							   &(priv->aplist.
+							     ap[i]));
+				return;
+			}
+		}
+	}
+	priv->scan_ind_count++;
+	if (priv->scan_ind_count < LOCAL_APLIST_MAX + 1) {
+		DPRINTK(4, " scan_ind_count=%d :: aplist.size=%d\n",
+			priv->scan_ind_count, priv->aplist.size);
+		get_ap_information(priv, (struct ap_info_t *)(priv->rxp),
+				   &(priv->aplist.
+				     ap[priv->scan_ind_count - 1]));
+		priv->aplist.size = priv->scan_ind_count;
+	} else {
+		DPRINTK(4, " count over :: scan_ind_count=%d\n",
+			priv->scan_ind_count);
+	}
+
+}
+
+static
+void hostif_stop_confirm(struct ks_wlan_private *priv)
+{
+	unsigned int tmp = 0;
+	unsigned int old_status = priv->connect_status;
+	struct net_device *netdev = priv->net_dev;
+	union iwreq_data wrqu0;
+
+	DPRINTK(3, "\n");
+	if (priv->dev_state == DEVICE_STATE_SLEEP)
+		priv->dev_state = DEVICE_STATE_READY;
+
+	/* disconnect indication */
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+		netif_carrier_off(netdev);
+		tmp = FORCE_DISCONNECT & priv->connect_status;
+		priv->connect_status = tmp | DISCONNECT_STATUS;
+		printk("IWEVENT: disconnect\n");
+
+		wrqu0.data.length = 0;
+		wrqu0.data.flags = 0;
+		wrqu0.ap_addr.sa_family = ARPHRD_ETHER;
+		if ((priv->connect_status & CONNECT_STATUS_MASK) ==
+		    DISCONNECT_STATUS
+		    && (old_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+			memset(wrqu0.ap_addr.sa_data, '\0', ETH_ALEN);
+			DPRINTK(3, "IWEVENT: disconnect\n");
+			printk("IWEVENT: disconnect\n");
+			DPRINTK(3, "disconnect :: scan_ind_count=%d\n",
+				priv->scan_ind_count);
+			wireless_send_event(netdev, SIOCGIWAP, &wrqu0, NULL);
+		}
+		priv->scan_ind_count = 0;
+	}
+
+	hostif_sme_enqueue(priv, SME_STOP_CONFIRM);
+}
+
+static
+void hostif_ps_adhoc_set_confirm(struct ks_wlan_private *priv)
+{
+	DPRINTK(3, "\n");
+	priv->infra_status = 0;	/* infrastructure mode cancel */
+	hostif_sme_enqueue(priv, SME_MODE_SET_CONFIRM);
+
+}
+
+static
+void hostif_infrastructure_set_confirm(struct ks_wlan_private *priv)
+{
+	uint16_t result_code;
+	DPRINTK(3, "\n");
+	result_code = get_WORD(priv);
+	DPRINTK(3, "result code = %d\n", result_code);
+	priv->infra_status = 1;	/* infrastructure mode set */
+	hostif_sme_enqueue(priv, SME_MODE_SET_CONFIRM);
+}
+
+static
+void hostif_adhoc_set_confirm(struct ks_wlan_private *priv)
+{
+	DPRINTK(3, "\n");
+	priv->infra_status = 1;	/* infrastructure mode set */
+	hostif_sme_enqueue(priv, SME_MODE_SET_CONFIRM);
+}
+
+static
+void hostif_associate_indication(struct ks_wlan_private *priv)
+{
+	struct association_request_t *assoc_req;
+	struct association_response_t *assoc_resp;
+	unsigned char *pb;
+	union iwreq_data wrqu;
+	char buf[IW_CUSTOM_MAX];
+	char *pbuf = &buf[0];
+	int i;
+
+	static const char associnfo_leader0[] = "ASSOCINFO(ReqIEs=";
+	static const char associnfo_leader1[] = " RespIEs=";
+
+	DPRINTK(3, "\n");
+	assoc_req = (struct association_request_t *)(priv->rxp);
+	assoc_resp = (struct association_response_t *)(assoc_req + 1);
+	pb = (unsigned char *)(assoc_resp + 1);
+
+	memset(&wrqu, 0, sizeof(wrqu));
+	memcpy(pbuf, associnfo_leader0, sizeof(associnfo_leader0) - 1);
+	wrqu.data.length += sizeof(associnfo_leader0) - 1;
+	pbuf += sizeof(associnfo_leader0) - 1;
+
+	for (i = 0; i < assoc_req->reqIEs_size; i++)
+		pbuf += sprintf(pbuf, "%02x", *(pb + i));
+	wrqu.data.length += (assoc_req->reqIEs_size) * 2;
+
+	memcpy(pbuf, associnfo_leader1, sizeof(associnfo_leader1) - 1);
+	wrqu.data.length += sizeof(associnfo_leader1) - 1;
+	pbuf += sizeof(associnfo_leader1) - 1;
+
+	pb += assoc_req->reqIEs_size;
+	for (i = 0; i < assoc_resp->respIEs_size; i++)
+		pbuf += sprintf(pbuf, "%02x", *(pb + i));
+	wrqu.data.length += (assoc_resp->respIEs_size) * 2;
+
+	pbuf += sprintf(pbuf, ")");
+	wrqu.data.length += 1;
+
+	DPRINTK(3, "IWEVENT:ASSOCINFO\n");
+	wireless_send_event(priv->net_dev, IWEVCUSTOM, &wrqu, buf);
+}
+
+static
+void hostif_bss_scan_confirm(struct ks_wlan_private *priv)
+{
+	unsigned int result_code;
+	struct net_device *dev = priv->net_dev;
+	union iwreq_data wrqu;
+	result_code = get_DWORD(priv);
+	DPRINTK(2, "result=%d :: scan_ind_count=%d\n", result_code,
+		priv->scan_ind_count);
+
+	priv->sme_i.sme_flag &= ~SME_AP_SCAN;
+	hostif_sme_enqueue(priv, SME_BSS_SCAN_CONFIRM);
+
+	wrqu.data.length = 0;
+	wrqu.data.flags = 0;
+	DPRINTK(3, "IWEVENT: SCAN CONFIRM\n");
+	wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
+	priv->scan_ind_count = 0;
+}
+
+static
+void hostif_phy_information_confirm(struct ks_wlan_private *priv)
+{
+	struct iw_statistics *wstats = &priv->wstats;
+	unsigned char rssi, signal, noise;
+	unsigned char LinkSpeed;
+	unsigned int TransmittedFrameCount, ReceivedFragmentCount;
+	unsigned int FailedCount, FCSErrorCount;
+
+	DPRINTK(3, "\n");
+	rssi = get_BYTE(priv);
+	signal = get_BYTE(priv);
+	noise = get_BYTE(priv);
+	LinkSpeed = get_BYTE(priv);
+	TransmittedFrameCount = get_DWORD(priv);
+	ReceivedFragmentCount = get_DWORD(priv);
+	FailedCount = get_DWORD(priv);
+	FCSErrorCount = get_DWORD(priv);
+
+	DPRINTK(4, "phyinfo confirm rssi=%d signal=%d\n", rssi, signal);
+	priv->current_rate = (LinkSpeed & RATE_MASK);
+	wstats->qual.qual = signal;
+	wstats->qual.level = 256 - rssi;
+	wstats->qual.noise = 0;	/* invalid noise value */
+	wstats->qual.updated = IW_QUAL_ALL_UPDATED | IW_QUAL_DBM;
+
+	DPRINTK(3, "\n    rssi=%u\n    signal=%u\n    LinkSpeed=%ux500Kbps\n \
+   TransmittedFrameCount=%u\n    ReceivedFragmentCount=%u\n    FailedCount=%u\n \
+   FCSErrorCount=%u\n", rssi, signal, LinkSpeed, TransmittedFrameCount, ReceivedFragmentCount, FailedCount, FCSErrorCount);
+
+	/* wake_up_interruptible_all(&priv->confirm_wait); */
+	complete(&priv->confirm_wait);
+}
+
+static
+void hostif_mic_failure_confirm(struct ks_wlan_private *priv)
+{
+	DPRINTK(3, "mic_failure=%u\n", priv->wpa.mic_failure.failure);
+	hostif_sme_enqueue(priv, SME_MIC_FAILURE_CONFIRM);
+}
+
+static
+void hostif_event_check(struct ks_wlan_private *priv)
+{
+	unsigned short event;
+
+	DPRINTK(4, "\n");
+	event = get_WORD(priv);	/* get event */
+	switch (event) {
+	case HIF_DATA_IND:
+		hostif_data_indication(priv);
+		break;
+	case HIF_MIB_GET_CONF:
+		hostif_mib_get_confirm(priv);
+		break;
+	case HIF_MIB_SET_CONF:
+		hostif_mib_set_confirm(priv);
+		break;
+	case HIF_POWERMGT_CONF:
+		hostif_power_mngmt_confirm(priv);
+		break;
+	case HIF_SLEEP_CONF:
+		hostif_sleep_confirm(priv);
+		break;
+	case HIF_START_CONF:
+		hostif_start_confirm(priv);
+		break;
+	case HIF_CONNECT_IND:
+		hostif_connect_indication(priv);
+		break;
+	case HIF_STOP_CONF:
+		hostif_stop_confirm(priv);
+		break;
+	case HIF_PS_ADH_SET_CONF:
+		hostif_ps_adhoc_set_confirm(priv);
+		break;
+	case HIF_INFRA_SET_CONF:
+	case HIF_INFRA_SET2_CONF:
+		hostif_infrastructure_set_confirm(priv);
+		break;
+	case HIF_ADH_SET_CONF:
+	case HIF_ADH_SET2_CONF:
+		hostif_adhoc_set_confirm(priv);
+		break;
+	case HIF_ASSOC_INFO_IND:
+		hostif_associate_indication(priv);
+		break;
+	case HIF_MIC_FAILURE_CONF:
+		hostif_mic_failure_confirm(priv);
+		break;
+	case HIF_SCAN_CONF:
+		hostif_bss_scan_confirm(priv);
+		break;
+	case HIF_PHY_INFO_CONF:
+	case HIF_PHY_INFO_IND:
+		hostif_phy_information_confirm(priv);
+		break;
+	case HIF_SCAN_IND:
+		hostif_scan_indication(priv);
+		break;
+	case HIF_AP_SET_CONF:
+	default:
+		//DPRINTK(1, "undefined event[%04X]\n", event);
+		printk("undefined event[%04X]\n", event);
+		/* wake_up_all(&priv->confirm_wait); */
+		complete(&priv->confirm_wait);
+		break;
+	}
+
+	/* add event to hostt buffer */
+	priv->hostt.buff[priv->hostt.qtail] = event;
+	priv->hostt.qtail = (priv->hostt.qtail + 1) % SME_EVENT_BUFF_SIZE;
+}
+
+#define CHECK_ALINE(size) (size%4 ? (size+(4-(size%4))):size)
+
+int hostif_data_request(struct ks_wlan_private *priv, struct sk_buff *packet)
+{
+	unsigned int packet_len = 0;
+
+	unsigned char *buffer = NULL;
+	unsigned int length = 0;
+	struct hostif_data_request_t *pp;
+	unsigned char *p;
+	int result = 0;
+	unsigned short eth_proto;
+	struct ether_hdr *eth_hdr;
+	struct michel_mic_t michel_mic;
+	unsigned short keyinfo = 0;
+	struct ieee802_1x_hdr *aa1x_hdr;
+	struct wpa_eapol_key *eap_key;
+	struct ethhdr *eth;
+
+	packet_len = packet->len;
+	if (packet_len > ETH_FRAME_LEN) {
+		DPRINTK(1, "bad length packet_len=%d \n", packet_len);
+		dev_kfree_skb(packet);
+		return -1;
+	}
+
+	if (((priv->connect_status & CONNECT_STATUS_MASK) == DISCONNECT_STATUS)
+	    || (priv->connect_status & FORCE_DISCONNECT)
+	    || priv->wpa.mic_failure.stop) {
+		DPRINTK(3, " DISCONNECT\n");
+		if (netif_queue_stopped(priv->net_dev))
+			netif_wake_queue(priv->net_dev);
+		if (packet)
+			dev_kfree_skb(packet);
+
+		return 0;
+	}
+
+	/* for PowerSave */
+	if (atomic_read(&priv->psstatus.status) == PS_SNOOZE) {	/* power save wakeup */
+		if (!netif_queue_stopped(priv->net_dev))
+			netif_stop_queue(priv->net_dev);
+	}
+
+	DPRINTK(4, "skb_buff length=%d\n", packet_len);
+	pp = (struct hostif_data_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp) + 6 + packet_len + 8),
+		    KS_WLAN_MEM_FLAG);
+
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		dev_kfree_skb(packet);
+		return -2;
+	}
+
+	p = (unsigned char *)pp->data;
+
+	buffer = packet->data;
+	length = packet->len;
+
+	/* packet check */
+	eth = (struct ethhdr *)packet->data;
+	if (memcmp(&priv->eth_addr[0], eth->h_source, ETH_ALEN)) {
+		DPRINTK(1, "invalid mac address !!\n");
+		DPRINTK(1, "ethernet->h_source=%02X:%02X:%02X:%02X:%02X:%02X\n",
+			eth->h_source[0], eth->h_source[1], eth->h_source[2],
+			eth->h_source[3], eth->h_source[4], eth->h_source[5]);
+		return -3;
+	}
+
+	/* MAC address copy */
+	memcpy(p, buffer, 12);	/* DST/SRC MAC address */
+	p += 12;
+	buffer += 12;
+	length -= 12;
+	/* EtherType/Length check */
+	if (*(buffer + 1) + (*buffer << 8) > 1500) {
+		/* ProtocolEAP = *(buffer+1) + (*buffer << 8); */
+		/* DPRINTK(2, "Send [SNAP]Type %x\n",ProtocolEAP); */
+		/* SAP/CTL/OUI(6 byte) add */
+		*p++ = 0xAA;	/* DSAP */
+		*p++ = 0xAA;	/* SSAP */
+		*p++ = 0x03;	/* CTL */
+		*p++ = 0x00;	/* OUI ("000000") */
+		*p++ = 0x00;	/* OUI ("000000") */
+		*p++ = 0x00;	/* OUI ("000000") */
+		packet_len += 6;
+	} else {
+		DPRINTK(4, "DIX\n");
+		/* Length(2 byte) delete */
+		buffer += 2;
+		length -= 2;
+		packet_len -= 2;
+	}
+
+	/* pp->data copy */
+	memcpy(p, buffer, length);
+
+	p += length;
+
+	/* for WPA */
+	eth_hdr = (struct ether_hdr *)&pp->data[0];
+	eth_proto = ntohs(eth_hdr->h_proto);
+
+	/* for MIC FAILUER REPORT check */
+	if (eth_proto == ETHER_PROTOCOL_TYPE_EAP
+	    && priv->wpa.mic_failure.failure > 0) {
+		aa1x_hdr = (struct ieee802_1x_hdr *)(eth_hdr + 1);
+		if (aa1x_hdr->type == IEEE802_1X_TYPE_EAPOL_KEY) {
+			eap_key = (struct wpa_eapol_key *)(aa1x_hdr + 1);
+			keyinfo = ntohs(eap_key->key_info);
+		}
+	}
+
+	if (priv->wpa.rsn_enabled && priv->wpa.key[0].key_len) {
+		if (eth_proto == ETHER_PROTOCOL_TYPE_EAP
+		    && !(priv->wpa.key[1].key_len)
+		    && !(priv->wpa.key[2].key_len)
+		    && !(priv->wpa.key[3].key_len)) {
+			pp->auth_type = cpu_to_le16((uint16_t) TYPE_AUTH);	/* no encryption */
+		} else {
+			if (priv->wpa.pairwise_suite == IW_AUTH_CIPHER_TKIP) {
+				MichaelMICFunction(&michel_mic, (uint8_t *) priv->wpa.key[0].tx_mic_key, (uint8_t *) & pp->data[0], (int)packet_len, (uint8_t) 0,	/* priority */
+						   (uint8_t *) michel_mic.
+						   Result);
+				memcpy(p, michel_mic.Result, 8);
+				length += 8;
+				packet_len += 8;
+				p += 8;
+				pp->auth_type =
+				    cpu_to_le16((uint16_t) TYPE_DATA);
+
+			} else if (priv->wpa.pairwise_suite ==
+				   IW_AUTH_CIPHER_CCMP) {
+				pp->auth_type =
+				    cpu_to_le16((uint16_t) TYPE_DATA);
+			}
+		}
+	} else {
+		if (eth_proto == ETHER_PROTOCOL_TYPE_EAP)
+			pp->auth_type = cpu_to_le16((uint16_t) TYPE_AUTH);
+		else
+			pp->auth_type = cpu_to_le16((uint16_t) TYPE_DATA);
+	}
+
+	/* header value set */
+	pp->header.size =
+	    cpu_to_le16((uint16_t)
+			(sizeof(*pp) - sizeof(pp->header.size) + packet_len));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_DATA_REQ);
+
+	/* tx request */
+	result =
+	    ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp) + packet_len),
+			  (void *)send_packet_complete, (void *)priv,
+			  (void *)packet);
+
+	/* MIC FAILUER REPORT check */
+	if (eth_proto == ETHER_PROTOCOL_TYPE_EAP
+	    && priv->wpa.mic_failure.failure > 0) {
+		if (keyinfo & WPA_KEY_INFO_ERROR
+		    && keyinfo & WPA_KEY_INFO_REQUEST) {
+			DPRINTK(3, " MIC ERROR Report SET : %04X\n", keyinfo);
+			hostif_sme_enqueue(priv, SME_MIC_FAILURE_REQUEST);
+		}
+		if (priv->wpa.mic_failure.failure == 2)
+			priv->wpa.mic_failure.stop = 1;
+	}
+
+	return result;
+}
+
+#define ps_confirm_wait_inc(priv)  do{if(atomic_read(&priv->psstatus.status) > PS_ACTIVE_SET){ \
+                                                  atomic_inc(&priv->psstatus.confirm_wait); \
+                                                  /* atomic_set(&priv->psstatus.status, PS_CONF_WAIT);*/ \
+                                      } }while(0)
+
+static
+void hostif_mib_get_request(struct ks_wlan_private *priv,
+			    unsigned long mib_attribute)
+{
+	struct hostif_mib_get_request_t *pp;
+
+	DPRINTK(3, "\n");
+
+	/* make primitive */
+	pp = (struct hostif_mib_get_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_MIB_GET_REQ);
+	pp->mib_attribute = cpu_to_le32((uint32_t) mib_attribute);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+static
+void hostif_mib_set_request(struct ks_wlan_private *priv,
+			    unsigned long mib_attribute, unsigned short size,
+			    unsigned short type, void *vp)
+{
+	struct hostif_mib_set_request_t *pp;
+
+	DPRINTK(3, "\n");
+
+	if (priv->dev_state < DEVICE_STATE_BOOT) {
+		DPRINTK(3, "DeviceRemove\n");
+		return;
+	}
+
+	/* make primitive */
+	pp = (struct hostif_mib_set_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp) + size), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+
+	pp->header.size =
+	    cpu_to_le16((uint16_t)
+			(sizeof(*pp) - sizeof(pp->header.size) + size));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_MIB_SET_REQ);
+	pp->mib_attribute = cpu_to_le32((uint32_t) mib_attribute);
+	pp->mib_value.size = cpu_to_le16((uint16_t) size);
+	pp->mib_value.type = cpu_to_le16((uint16_t) type);
+	memcpy(&pp->mib_value.body, vp, size);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp) + size), NULL, NULL,
+		      NULL);
+}
+
+static
+void hostif_start_request(struct ks_wlan_private *priv, unsigned char mode)
+{
+	struct hostif_start_request_t *pp;
+
+	DPRINTK(3, "\n");
+
+	/* make primitive */
+	pp = (struct hostif_start_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_START_REQ);
+	pp->mode = cpu_to_le16((uint16_t) mode);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+
+	priv->aplist.size = 0;
+	priv->scan_ind_count = 0;
+}
+
+static
+void hostif_ps_adhoc_set_request(struct ks_wlan_private *priv)
+{
+	struct hostif_ps_adhoc_set_request_t *pp;
+	uint16_t capability;
+
+	DPRINTK(3, "\n");
+
+	/* make primitive */
+	pp = (struct hostif_ps_adhoc_set_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	memset(pp, 0, sizeof(*pp));
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_PS_ADH_SET_REQ);
+	pp->phy_type = cpu_to_le16((uint16_t) (priv->reg.phy_type));
+	pp->cts_mode = cpu_to_le16((uint16_t) (priv->reg.cts_mode));
+	pp->scan_type = cpu_to_le16((uint16_t) (priv->reg.scan_type));
+	pp->channel = cpu_to_le16((uint16_t) (priv->reg.channel));
+	pp->rate_set.size = priv->reg.rate_set.size;
+	memcpy(&pp->rate_set.body[0], &priv->reg.rate_set.body[0],
+	       priv->reg.rate_set.size);
+
+	capability = 0x0000;
+	if (priv->reg.preamble == SHORT_PREAMBLE) {
+		/* short preamble */
+		capability |= BSS_CAP_SHORT_PREAMBLE;
+	}
+	capability &= ~(BSS_CAP_PBCC);	/* pbcc not support */
+	if (priv->reg.phy_type != D_11B_ONLY_MODE) {
+		capability |= BSS_CAP_SHORT_SLOT_TIME;	/* ShortSlotTime support */
+		capability &= ~(BSS_CAP_DSSS_OFDM);	/* DSSS OFDM */
+	}
+	pp->capability = cpu_to_le16((uint16_t) capability);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+static
+void hostif_infrastructure_set_request(struct ks_wlan_private *priv)
+{
+	struct hostif_infrastructure_set_request_t *pp;
+	uint16_t capability;
+
+	DPRINTK(3, "ssid.size=%d \n", priv->reg.ssid.size);
+
+	/* make primitive */
+	pp = (struct hostif_infrastructure_set_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_INFRA_SET_REQ);
+	pp->phy_type = cpu_to_le16((uint16_t) (priv->reg.phy_type));
+	pp->cts_mode = cpu_to_le16((uint16_t) (priv->reg.cts_mode));
+	pp->scan_type = cpu_to_le16((uint16_t) (priv->reg.scan_type));
+
+	pp->rate_set.size = priv->reg.rate_set.size;
+	memcpy(&pp->rate_set.body[0], &priv->reg.rate_set.body[0],
+	       priv->reg.rate_set.size);
+	pp->ssid.size = priv->reg.ssid.size;
+	memcpy(&pp->ssid.body[0], &priv->reg.ssid.body[0], priv->reg.ssid.size);
+
+	capability = 0x0000;
+	if (priv->reg.preamble == SHORT_PREAMBLE) {
+		/* short preamble */
+		capability |= BSS_CAP_SHORT_PREAMBLE;
+	}
+	capability &= ~(BSS_CAP_PBCC);	/* pbcc not support */
+	if (priv->reg.phy_type != D_11B_ONLY_MODE) {
+		capability |= BSS_CAP_SHORT_SLOT_TIME;	/* ShortSlotTime support */
+		capability &= ~(BSS_CAP_DSSS_OFDM);	/* DSSS OFDM not support */
+	}
+	pp->capability = cpu_to_le16((uint16_t) capability);
+	pp->beacon_lost_count =
+	    cpu_to_le16((uint16_t) (priv->reg.beacon_lost_count));
+	pp->auth_type = cpu_to_le16((uint16_t) (priv->reg.authenticate_type));
+
+	pp->channel_list.body[0] = 1;
+	pp->channel_list.body[1] = 8;
+	pp->channel_list.body[2] = 2;
+	pp->channel_list.body[3] = 9;
+	pp->channel_list.body[4] = 3;
+	pp->channel_list.body[5] = 10;
+	pp->channel_list.body[6] = 4;
+	pp->channel_list.body[7] = 11;
+	pp->channel_list.body[8] = 5;
+	pp->channel_list.body[9] = 12;
+	pp->channel_list.body[10] = 6;
+	pp->channel_list.body[11] = 13;
+	pp->channel_list.body[12] = 7;
+	if (priv->reg.phy_type == D_11G_ONLY_MODE) {
+		pp->channel_list.size = 13;
+	} else {
+		pp->channel_list.body[13] = 14;
+		pp->channel_list.size = 14;
+	}
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+void hostif_infrastructure_set2_request(struct ks_wlan_private *priv)
+{
+	struct hostif_infrastructure_set2_request_t *pp;
+	uint16_t capability;
+
+	DPRINTK(2, "ssid.size=%d \n", priv->reg.ssid.size);
+
+	/* make primitive */
+	pp = (struct hostif_infrastructure_set2_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_INFRA_SET2_REQ);
+	pp->phy_type = cpu_to_le16((uint16_t) (priv->reg.phy_type));
+	pp->cts_mode = cpu_to_le16((uint16_t) (priv->reg.cts_mode));
+	pp->scan_type = cpu_to_le16((uint16_t) (priv->reg.scan_type));
+
+	pp->rate_set.size = priv->reg.rate_set.size;
+	memcpy(&pp->rate_set.body[0], &priv->reg.rate_set.body[0],
+	       priv->reg.rate_set.size);
+	pp->ssid.size = priv->reg.ssid.size;
+	memcpy(&pp->ssid.body[0], &priv->reg.ssid.body[0], priv->reg.ssid.size);
+
+	capability = 0x0000;
+	if (priv->reg.preamble == SHORT_PREAMBLE) {
+		/* short preamble */
+		capability |= BSS_CAP_SHORT_PREAMBLE;
+	}
+	capability &= ~(BSS_CAP_PBCC);	/* pbcc not support */
+	if (priv->reg.phy_type != D_11B_ONLY_MODE) {
+		capability |= BSS_CAP_SHORT_SLOT_TIME;	/* ShortSlotTime support */
+		capability &= ~(BSS_CAP_DSSS_OFDM);	/* DSSS OFDM not support */
+	}
+	pp->capability = cpu_to_le16((uint16_t) capability);
+	pp->beacon_lost_count =
+	    cpu_to_le16((uint16_t) (priv->reg.beacon_lost_count));
+	pp->auth_type = cpu_to_le16((uint16_t) (priv->reg.authenticate_type));
+
+	pp->channel_list.body[0] = 1;
+	pp->channel_list.body[1] = 8;
+	pp->channel_list.body[2] = 2;
+	pp->channel_list.body[3] = 9;
+	pp->channel_list.body[4] = 3;
+	pp->channel_list.body[5] = 10;
+	pp->channel_list.body[6] = 4;
+	pp->channel_list.body[7] = 11;
+	pp->channel_list.body[8] = 5;
+	pp->channel_list.body[9] = 12;
+	pp->channel_list.body[10] = 6;
+	pp->channel_list.body[11] = 13;
+	pp->channel_list.body[12] = 7;
+	if (priv->reg.phy_type == D_11G_ONLY_MODE) {
+		pp->channel_list.size = 13;
+	} else {
+		pp->channel_list.body[13] = 14;
+		pp->channel_list.size = 14;
+	}
+
+	memcpy(pp->bssid, priv->reg.bssid, ETH_ALEN);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+static
+void hostif_adhoc_set_request(struct ks_wlan_private *priv)
+{
+	struct hostif_adhoc_set_request_t *pp;
+	uint16_t capability;
+
+	DPRINTK(3, "\n");
+
+	/* make primitive */
+	pp = (struct hostif_adhoc_set_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	memset(pp, 0, sizeof(*pp));
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_ADH_SET_REQ);
+	pp->phy_type = cpu_to_le16((uint16_t) (priv->reg.phy_type));
+	pp->cts_mode = cpu_to_le16((uint16_t) (priv->reg.cts_mode));
+	pp->scan_type = cpu_to_le16((uint16_t) (priv->reg.scan_type));
+	pp->channel = cpu_to_le16((uint16_t) (priv->reg.channel));
+	pp->rate_set.size = priv->reg.rate_set.size;
+	memcpy(&pp->rate_set.body[0], &priv->reg.rate_set.body[0],
+	       priv->reg.rate_set.size);
+	pp->ssid.size = priv->reg.ssid.size;
+	memcpy(&pp->ssid.body[0], &priv->reg.ssid.body[0], priv->reg.ssid.size);
+
+	capability = 0x0000;
+	if (priv->reg.preamble == SHORT_PREAMBLE) {
+		/* short preamble */
+		capability |= BSS_CAP_SHORT_PREAMBLE;
+	}
+	capability &= ~(BSS_CAP_PBCC);	/* pbcc not support */
+	if (priv->reg.phy_type != D_11B_ONLY_MODE) {
+		capability |= BSS_CAP_SHORT_SLOT_TIME;	/* ShortSlotTime support */
+		capability &= ~(BSS_CAP_DSSS_OFDM);	/* DSSS OFDM not support */
+	}
+	pp->capability = cpu_to_le16((uint16_t) capability);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+static
+void hostif_adhoc_set2_request(struct ks_wlan_private *priv)
+{
+	struct hostif_adhoc_set2_request_t *pp;
+	uint16_t capability;
+
+	DPRINTK(3, "\n");
+
+	/* make primitive */
+	pp = (struct hostif_adhoc_set2_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	memset(pp, 0, sizeof(*pp));
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_ADH_SET_REQ);
+	pp->phy_type = cpu_to_le16((uint16_t) (priv->reg.phy_type));
+	pp->cts_mode = cpu_to_le16((uint16_t) (priv->reg.cts_mode));
+	pp->scan_type = cpu_to_le16((uint16_t) (priv->reg.scan_type));
+	pp->rate_set.size = priv->reg.rate_set.size;
+	memcpy(&pp->rate_set.body[0], &priv->reg.rate_set.body[0],
+	       priv->reg.rate_set.size);
+	pp->ssid.size = priv->reg.ssid.size;
+	memcpy(&pp->ssid.body[0], &priv->reg.ssid.body[0], priv->reg.ssid.size);
+
+	capability = 0x0000;
+	if (priv->reg.preamble == SHORT_PREAMBLE) {
+		/* short preamble */
+		capability |= BSS_CAP_SHORT_PREAMBLE;
+	}
+	capability &= ~(BSS_CAP_PBCC);	/* pbcc not support */
+	if (priv->reg.phy_type != D_11B_ONLY_MODE) {
+		capability |= BSS_CAP_SHORT_SLOT_TIME;	/* ShortSlotTime support */
+		capability &= ~(BSS_CAP_DSSS_OFDM);	/* DSSS OFDM not support */
+	}
+	pp->capability = cpu_to_le16((uint16_t) capability);
+
+	pp->channel_list.body[0] = priv->reg.channel;
+	pp->channel_list.size = 1;
+	memcpy(pp->bssid, priv->reg.bssid, ETH_ALEN);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+static
+void hostif_stop_request(struct ks_wlan_private *priv)
+{
+	struct hostif_stop_request_t *pp;
+
+	DPRINTK(3, "\n");
+
+	/* make primitive */
+	pp = (struct hostif_stop_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_STOP_REQ);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+static
+void hostif_phy_information_request(struct ks_wlan_private *priv)
+{
+	struct hostif_phy_information_request_t *pp;
+
+	DPRINTK(3, "\n");
+
+	/* make primitive */
+	pp = (struct hostif_phy_information_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_PHY_INFO_REQ);
+	if (priv->reg.phy_info_timer) {
+		pp->type = cpu_to_le16((uint16_t) TIME_TYPE);
+		pp->time = cpu_to_le16((uint16_t) (priv->reg.phy_info_timer));
+	} else {
+		pp->type = cpu_to_le16((uint16_t) NORMAL_TYPE);
+		pp->time = cpu_to_le16((uint16_t) 0);
+	}
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+static
+void hostif_power_mngmt_request(struct ks_wlan_private *priv,
+				unsigned long mode, unsigned long wake_up,
+				unsigned long receiveDTIMs)
+{
+	struct hostif_power_mngmt_request_t *pp;
+
+	DPRINTK(3, "mode=%lu wake_up=%lu receiveDTIMs=%lu\n", mode, wake_up,
+		receiveDTIMs);
+	/* make primitive */
+	pp = (struct hostif_power_mngmt_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_POWERMGT_REQ);
+	pp->mode = cpu_to_le32((uint32_t) mode);
+	pp->wake_up = cpu_to_le32((uint32_t) wake_up);
+	pp->receiveDTIMs = cpu_to_le32((uint32_t) receiveDTIMs);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+static
+void hostif_sleep_request(struct ks_wlan_private *priv, unsigned long mode)
+{
+	struct hostif_sleep_request_t *pp;
+
+	DPRINTK(3, "mode=%lu \n", mode);
+
+	if (mode == SLP_SLEEP) {
+		/* make primitive */
+		pp = (struct hostif_sleep_request_t *)
+		    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+		if (pp == NULL) {
+			DPRINTK(3, "allocate memory failed..\n");
+			return;
+		}
+		pp->header.size =
+		    cpu_to_le16((uint16_t)
+				(sizeof(*pp) - sizeof(pp->header.size)));
+		pp->header.event = cpu_to_le16((uint16_t) HIF_SLEEP_REQ);
+
+		/* send to device request */
+		ps_confirm_wait_inc(priv);
+		ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL,
+			      NULL);
+	} else if (mode == SLP_ACTIVE) {
+		atomic_set(&priv->sleepstatus.wakeup_request, 1);
+		queue_delayed_work(priv->ks_wlan_hw.ks7010sdio_wq,
+				   &priv->ks_wlan_hw.rw_wq, 1);
+	} else {
+		DPRINTK(3, "invalid mode %ld \n", mode);
+		return;
+	}
+}
+
+static
+void hostif_bss_scan_request(struct ks_wlan_private *priv,
+			     unsigned long scan_type, uint8_t * scan_ssid,
+			     uint8_t scan_ssid_len)
+{
+	struct hostif_bss_scan_request_t *pp;
+
+	DPRINTK(2, "\n");
+	/* make primitive */
+	pp = (struct hostif_bss_scan_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_SCAN_REQ);
+	pp->scan_type = scan_type;
+
+	pp->ch_time_min = cpu_to_le32((uint32_t) 110);	/* default value */
+	pp->ch_time_max = cpu_to_le32((uint32_t) 130);	/* default value */
+	pp->channel_list.body[0] = 1;
+	pp->channel_list.body[1] = 8;
+	pp->channel_list.body[2] = 2;
+	pp->channel_list.body[3] = 9;
+	pp->channel_list.body[4] = 3;
+	pp->channel_list.body[5] = 10;
+	pp->channel_list.body[6] = 4;
+	pp->channel_list.body[7] = 11;
+	pp->channel_list.body[8] = 5;
+	pp->channel_list.body[9] = 12;
+	pp->channel_list.body[10] = 6;
+	pp->channel_list.body[11] = 13;
+	pp->channel_list.body[12] = 7;
+	if (priv->reg.phy_type == D_11G_ONLY_MODE) {
+		pp->channel_list.size = 13;
+	} else {
+		pp->channel_list.body[13] = 14;
+		pp->channel_list.size = 14;
+	}
+	pp->ssid.size = 0;
+
+	/* specified SSID SCAN */
+	if (scan_ssid_len > 0 && scan_ssid_len <= 32) {
+		pp->ssid.size = scan_ssid_len;
+		memcpy(&pp->ssid.body[0], scan_ssid, scan_ssid_len);
+	}
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+
+	priv->aplist.size = 0;
+	priv->scan_ind_count = 0;
+}
+
+static
+void hostif_mic_failure_request(struct ks_wlan_private *priv,
+				unsigned short failure_count,
+				unsigned short timer)
+{
+	struct hostif_mic_failure_request_t *pp;
+
+	DPRINTK(3, "count=%d :: timer=%d\n", failure_count, timer);
+	/* make primitive */
+	pp = (struct hostif_mic_failure_request_t *)
+	    kmalloc(hif_align_size(sizeof(*pp)), KS_WLAN_MEM_FLAG);
+	if (pp == NULL) {
+		DPRINTK(3, "allocate memory failed..\n");
+		return;
+	}
+	pp->header.size =
+	    cpu_to_le16((uint16_t) (sizeof(*pp) - sizeof(pp->header.size)));
+	pp->header.event = cpu_to_le16((uint16_t) HIF_MIC_FAILURE_REQ);
+	pp->failure_count = cpu_to_le16((uint16_t) failure_count);
+	pp->timer = cpu_to_le16((uint16_t) timer);
+
+	/* send to device request */
+	ps_confirm_wait_inc(priv);
+	ks_wlan_hw_tx(priv, pp, hif_align_size(sizeof(*pp)), NULL, NULL, NULL);
+}
+
+/* Device I/O Recieve indicate */
+static void devio_rec_ind(struct ks_wlan_private *priv, unsigned char *p,
+			  unsigned int size)
+{
+	if (priv->device_open_status) {
+		spin_lock(&priv->dev_read_lock);	/* request spin lock */
+		priv->dev_data[atomic_read(&priv->rec_count)] = p;
+		priv->dev_size[atomic_read(&priv->rec_count)] = size;
+
+		if (atomic_read(&priv->event_count) != DEVICE_STOCK_COUNT) {
+			/* rx event count inc */
+			atomic_inc(&priv->event_count);
+		}
+		atomic_inc(&priv->rec_count);
+		if (atomic_read(&priv->rec_count) == DEVICE_STOCK_COUNT)
+			atomic_set(&priv->rec_count, 0);
+
+		wake_up_interruptible_all(&priv->devread_wait);
+
+		/* release spin lock */
+		spin_unlock(&priv->dev_read_lock);
+	}
+}
+
+void hostif_receive(struct ks_wlan_private *priv, unsigned char *p,
+		    unsigned int size)
+{
+	DPRINTK(4, "\n");
+
+	devio_rec_ind(priv, p, size);
+
+	priv->rxp = p;
+	priv->rx_size = size;
+
+	if (get_WORD(priv) == priv->rx_size) {	/* length check !! */
+		hostif_event_check(priv);	/* event check */
+	}
+}
+
+static
+void hostif_sme_set_wep(struct ks_wlan_private *priv, int type)
+{
+	uint32_t val;
+	switch (type) {
+	case SME_WEP_INDEX_REQUEST:
+		val = cpu_to_le32((uint32_t) (priv->reg.wep_index));
+		hostif_mib_set_request(priv, DOT11_WEP_DEFAULT_KEY_ID,
+				       sizeof(val), MIB_VALUE_TYPE_INT, &val);
+		break;
+	case SME_WEP_KEY1_REQUEST:
+		if (!priv->wpa.wpa_enabled)
+			hostif_mib_set_request(priv,
+					       DOT11_WEP_DEFAULT_KEY_VALUE1,
+					       priv->reg.wep_key[0].size,
+					       MIB_VALUE_TYPE_OSTRING,
+					       &priv->reg.wep_key[0].val[0]);
+		break;
+	case SME_WEP_KEY2_REQUEST:
+		if (!priv->wpa.wpa_enabled)
+			hostif_mib_set_request(priv,
+					       DOT11_WEP_DEFAULT_KEY_VALUE2,
+					       priv->reg.wep_key[1].size,
+					       MIB_VALUE_TYPE_OSTRING,
+					       &priv->reg.wep_key[1].val[0]);
+		break;
+	case SME_WEP_KEY3_REQUEST:
+		if (!priv->wpa.wpa_enabled)
+			hostif_mib_set_request(priv,
+					       DOT11_WEP_DEFAULT_KEY_VALUE3,
+					       priv->reg.wep_key[2].size,
+					       MIB_VALUE_TYPE_OSTRING,
+					       &priv->reg.wep_key[2].val[0]);
+		break;
+	case SME_WEP_KEY4_REQUEST:
+		if (!priv->wpa.wpa_enabled)
+			hostif_mib_set_request(priv,
+					       DOT11_WEP_DEFAULT_KEY_VALUE4,
+					       priv->reg.wep_key[3].size,
+					       MIB_VALUE_TYPE_OSTRING,
+					       &priv->reg.wep_key[3].val[0]);
+		break;
+	case SME_WEP_FLAG_REQUEST:
+		val = cpu_to_le32((uint32_t) (priv->reg.privacy_invoked));
+		hostif_mib_set_request(priv, DOT11_PRIVACY_INVOKED,
+				       sizeof(val), MIB_VALUE_TYPE_BOOL, &val);
+		break;
+	}
+
+	return;
+}
+
+struct wpa_suite_t {
+	unsigned short size;
+	unsigned char suite[4][CIPHER_ID_LEN];
+} __attribute__ ((packed));
+
+struct rsn_mode_t {
+	uint32_t rsn_mode;
+	uint16_t rsn_capability;
+} __attribute__ ((packed));
+
+static
+void hostif_sme_set_rsn(struct ks_wlan_private *priv, int type)
+{
+	struct wpa_suite_t wpa_suite;
+	struct rsn_mode_t rsn_mode;
+	uint32_t val;
+
+	memset(&wpa_suite, 0, sizeof(wpa_suite));
+
+	switch (type) {
+	case SME_RSN_UCAST_REQUEST:
+		wpa_suite.size = cpu_to_le16((uint16_t) 1);
+		switch (priv->wpa.pairwise_suite) {
+		case IW_AUTH_CIPHER_NONE:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_NONE, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_NONE, CIPHER_ID_LEN);
+			break;
+		case IW_AUTH_CIPHER_WEP40:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_WEP40, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_WEP40, CIPHER_ID_LEN);
+			break;
+		case IW_AUTH_CIPHER_TKIP:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_TKIP, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_TKIP, CIPHER_ID_LEN);
+			break;
+		case IW_AUTH_CIPHER_CCMP:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_CCMP, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_CCMP, CIPHER_ID_LEN);
+			break;
+		case IW_AUTH_CIPHER_WEP104:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_WEP104, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_WEP104, CIPHER_ID_LEN);
+			break;
+		}
+
+		hostif_mib_set_request(priv, DOT11_RSN_CONFIG_UNICAST_CIPHER,
+				       sizeof(wpa_suite.size) +
+				       CIPHER_ID_LEN * wpa_suite.size,
+				       MIB_VALUE_TYPE_OSTRING, &wpa_suite);
+		break;
+	case SME_RSN_MCAST_REQUEST:
+		switch (priv->wpa.group_suite) {
+		case IW_AUTH_CIPHER_NONE:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_NONE, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_NONE, CIPHER_ID_LEN);
+			break;
+		case IW_AUTH_CIPHER_WEP40:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_WEP40, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_WEP40, CIPHER_ID_LEN);
+			break;
+		case IW_AUTH_CIPHER_TKIP:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_TKIP, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_TKIP, CIPHER_ID_LEN);
+			break;
+		case IW_AUTH_CIPHER_CCMP:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_CCMP, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_CCMP, CIPHER_ID_LEN);
+			break;
+		case IW_AUTH_CIPHER_WEP104:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA2_WEP104, CIPHER_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       CIPHER_ID_WPA_WEP104, CIPHER_ID_LEN);
+			break;
+		}
+
+		hostif_mib_set_request(priv, DOT11_RSN_CONFIG_MULTICAST_CIPHER,
+				       CIPHER_ID_LEN, MIB_VALUE_TYPE_OSTRING,
+				       &wpa_suite.suite[0][0]);
+		break;
+	case SME_RSN_AUTH_REQUEST:
+		wpa_suite.size = cpu_to_le16((uint16_t) 1);
+		switch (priv->wpa.key_mgmt_suite) {
+		case IW_AUTH_KEY_MGMT_802_1X:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       KEY_MGMT_ID_WPA2_1X, KEY_MGMT_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       KEY_MGMT_ID_WPA_1X, KEY_MGMT_ID_LEN);
+			break;
+		case IW_AUTH_KEY_MGMT_PSK:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       KEY_MGMT_ID_WPA2_PSK, KEY_MGMT_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       KEY_MGMT_ID_WPA_PSK, KEY_MGMT_ID_LEN);
+			break;
+		case 0:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       KEY_MGMT_ID_WPA2_NONE, KEY_MGMT_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       KEY_MGMT_ID_WPA_NONE, KEY_MGMT_ID_LEN);
+			break;
+		case 4:
+			if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2)
+				memcpy(&wpa_suite.suite[0][0],
+				       KEY_MGMT_ID_WPA2_WPANONE,
+				       KEY_MGMT_ID_LEN);
+			else
+				memcpy(&wpa_suite.suite[0][0],
+				       KEY_MGMT_ID_WPA_WPANONE,
+				       KEY_MGMT_ID_LEN);
+			break;
+		}
+
+		hostif_mib_set_request(priv, DOT11_RSN_CONFIG_AUTH_SUITE,
+				       sizeof(wpa_suite.size) +
+				       KEY_MGMT_ID_LEN * wpa_suite.size,
+				       MIB_VALUE_TYPE_OSTRING, &wpa_suite);
+		break;
+	case SME_RSN_ENABLED_REQUEST:
+		val = cpu_to_le32((uint32_t) (priv->wpa.rsn_enabled));
+		hostif_mib_set_request(priv, DOT11_RSN_ENABLED,
+				       sizeof(val), MIB_VALUE_TYPE_BOOL, &val);
+		break;
+	case SME_RSN_MODE_REQUEST:
+		if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA2) {
+			rsn_mode.rsn_mode =
+			    cpu_to_le32((uint32_t) RSN_MODE_WPA2);
+			rsn_mode.rsn_capability = cpu_to_le16((uint16_t) 0);
+		} else if (priv->wpa.version == IW_AUTH_WPA_VERSION_WPA) {
+			rsn_mode.rsn_mode =
+			    cpu_to_le32((uint32_t) RSN_MODE_WPA);
+			rsn_mode.rsn_capability = cpu_to_le16((uint16_t) 0);
+		} else {
+			rsn_mode.rsn_mode =
+			    cpu_to_le32((uint32_t) RSN_MODE_NONE);
+			rsn_mode.rsn_capability = cpu_to_le16((uint16_t) 0);
+		}
+		hostif_mib_set_request(priv, LOCAL_RSN_MODE, sizeof(rsn_mode),
+				       MIB_VALUE_TYPE_OSTRING, &rsn_mode);
+		break;
+
+	}
+	return;
+}
+
+static
+void hostif_sme_mode_setup(struct ks_wlan_private *priv)
+{
+	unsigned char rate_size;
+	unsigned char rate_octet[RATE_SET_MAX_SIZE];
+	int i = 0;
+
+	/* rate setting if rate segging is auto for changing phy_type (#94) */
+	if (priv->reg.tx_rate == TX_RATE_FULL_AUTO) {
+		if (priv->reg.phy_type == D_11B_ONLY_MODE) {
+			priv->reg.rate_set.body[3] = TX_RATE_11M;
+			priv->reg.rate_set.body[2] = TX_RATE_5M;
+			priv->reg.rate_set.body[1] = TX_RATE_2M | BASIC_RATE;
+			priv->reg.rate_set.body[0] = TX_RATE_1M | BASIC_RATE;
+			priv->reg.rate_set.size = 4;
+		} else {	/* D_11G_ONLY_MODE or D_11BG_COMPATIBLE_MODE */
+			priv->reg.rate_set.body[11] = TX_RATE_54M;
+			priv->reg.rate_set.body[10] = TX_RATE_48M;
+			priv->reg.rate_set.body[9] = TX_RATE_36M;
+			priv->reg.rate_set.body[8] = TX_RATE_18M;
+			priv->reg.rate_set.body[7] = TX_RATE_9M;
+			priv->reg.rate_set.body[6] = TX_RATE_24M | BASIC_RATE;
+			priv->reg.rate_set.body[5] = TX_RATE_12M | BASIC_RATE;
+			priv->reg.rate_set.body[4] = TX_RATE_6M | BASIC_RATE;
+			priv->reg.rate_set.body[3] = TX_RATE_11M | BASIC_RATE;
+			priv->reg.rate_set.body[2] = TX_RATE_5M | BASIC_RATE;
+			priv->reg.rate_set.body[1] = TX_RATE_2M | BASIC_RATE;
+			priv->reg.rate_set.body[0] = TX_RATE_1M | BASIC_RATE;
+			priv->reg.rate_set.size = 12;
+		}
+	}
+
+	/* rate mask by phy setting */
+	if (priv->reg.phy_type == D_11B_ONLY_MODE) {
+		for (i = 0; i < priv->reg.rate_set.size; i++) {
+			if (IS_11B_RATE(priv->reg.rate_set.body[i])) {
+				if ((priv->reg.rate_set.body[i] & RATE_MASK) >=
+				    TX_RATE_5M)
+					rate_octet[i] =
+					    priv->reg.rate_set.
+					    body[i] & RATE_MASK;
+				else
+					rate_octet[i] =
+					    priv->reg.rate_set.body[i];
+			} else
+				break;
+		}
+
+	} else {	/* D_11G_ONLY_MODE or D_11BG_COMPATIBLE_MODE */
+		for (i = 0; i < priv->reg.rate_set.size; i++) {
+			if (IS_11BG_RATE(priv->reg.rate_set.body[i])) {
+				if (IS_OFDM_EXT_RATE
+				    (priv->reg.rate_set.body[i]))
+					rate_octet[i] =
+					    priv->reg.rate_set.
+					    body[i] & RATE_MASK;
+				else
+					rate_octet[i] =
+					    priv->reg.rate_set.body[i];
+			} else
+				break;
+		}
+	}
+	rate_size = i;
+	if (rate_size == 0) {
+		if (priv->reg.phy_type == D_11G_ONLY_MODE)
+			rate_octet[0] = TX_RATE_6M | BASIC_RATE;
+		else
+			rate_octet[0] = TX_RATE_2M | BASIC_RATE;
+		rate_size = 1;
+	}
+
+	/* rate set update */
+	priv->reg.rate_set.size = rate_size;
+	memcpy(&priv->reg.rate_set.body[0], &rate_octet[0], rate_size);
+
+	switch (priv->reg.operation_mode) {
+	case MODE_PSEUDO_ADHOC:
+		/* Pseudo Ad-Hoc mode */
+		hostif_ps_adhoc_set_request(priv);
+		break;
+	case MODE_INFRASTRUCTURE:
+		/* Infrastructure mode */
+		if (!is_valid_ether_addr((u8 *) priv->reg.bssid)) {
+			hostif_infrastructure_set_request(priv);
+		} else {
+			hostif_infrastructure_set2_request(priv);
+			DPRINTK(2,
+				"Infra bssid = %02x:%02x:%02x:%02x:%02x:%02x\n",
+				priv->reg.bssid[0], priv->reg.bssid[1],
+				priv->reg.bssid[2], priv->reg.bssid[3],
+				priv->reg.bssid[4], priv->reg.bssid[5]);
+		}
+		break;
+	case MODE_ADHOC:
+		/* IEEE802.11 Ad-Hoc mode */
+		if (!is_valid_ether_addr((u8 *) priv->reg.bssid)) {
+			hostif_adhoc_set_request(priv);
+		} else {
+			hostif_adhoc_set2_request(priv);
+			DPRINTK(2,
+				"Adhoc bssid = %02x:%02x:%02x:%02x:%02x:%02x\n",
+				priv->reg.bssid[0], priv->reg.bssid[1],
+				priv->reg.bssid[2], priv->reg.bssid[3],
+				priv->reg.bssid[4], priv->reg.bssid[5]);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return;
+}
+
+static
+void hostif_sme_multicast_set(struct ks_wlan_private *priv)
+{
+
+	struct net_device *dev = priv->net_dev;
+	int mc_count;
+	struct netdev_hw_addr *ha;
+	char set_address[NIC_MAX_MCAST_LIST * ETH_ALEN];
+	unsigned long filter_type;
+	int i = 0;
+
+	DPRINTK(3, "\n");
+
+	spin_lock(&priv->multicast_spin);
+
+	memset(set_address, 0, NIC_MAX_MCAST_LIST * ETH_ALEN);
+
+	if (dev->flags & IFF_PROMISC) {
+		filter_type = cpu_to_le32((uint32_t) MCAST_FILTER_PROMISC);
+		hostif_mib_set_request(priv, LOCAL_MULTICAST_FILTER,
+				       sizeof(filter_type), MIB_VALUE_TYPE_BOOL,
+				       &filter_type);
+	} else if ((netdev_mc_count(dev) > NIC_MAX_MCAST_LIST)
+		   || (dev->flags & IFF_ALLMULTI)) {
+		filter_type = cpu_to_le32((uint32_t) MCAST_FILTER_MCASTALL);
+		hostif_mib_set_request(priv, LOCAL_MULTICAST_FILTER,
+				       sizeof(filter_type), MIB_VALUE_TYPE_BOOL,
+				       &filter_type);
+	} else {
+		if (priv->sme_i.sme_flag & SME_MULTICAST) {
+			mc_count = netdev_mc_count(dev);
+			netdev_for_each_mc_addr(ha, dev) {
+				memcpy(&set_address[i * ETH_ALEN], ha->addr,
+				       ETH_ALEN);
+				i++;
+			}
+			priv->sme_i.sme_flag &= ~SME_MULTICAST;
+			hostif_mib_set_request(priv, LOCAL_MULTICAST_ADDRESS,
+					       (ETH_ALEN * mc_count),
+					       MIB_VALUE_TYPE_OSTRING,
+					       &set_address[0]);
+		} else {
+			filter_type =
+			    cpu_to_le32((uint32_t) MCAST_FILTER_MCAST);
+			priv->sme_i.sme_flag |= SME_MULTICAST;
+			hostif_mib_set_request(priv, LOCAL_MULTICAST_FILTER,
+					       sizeof(filter_type),
+					       MIB_VALUE_TYPE_BOOL,
+					       &filter_type);
+		}
+	}
+
+	spin_unlock(&priv->multicast_spin);
+
+}
+
+static
+void hostif_sme_powermgt_set(struct ks_wlan_private *priv)
+{
+	unsigned long mode, wake_up, receiveDTIMs;
+
+	DPRINTK(3, "\n");
+	switch (priv->reg.powermgt) {
+	case POWMGT_ACTIVE_MODE:
+		mode = POWER_ACTIVE;
+		wake_up = 0;
+		receiveDTIMs = 0;
+		break;
+	case POWMGT_SAVE1_MODE:
+		if (priv->reg.operation_mode == MODE_INFRASTRUCTURE) {
+			mode = POWER_SAVE;
+			wake_up = 0;
+			receiveDTIMs = 0;
+		} else {
+			mode = POWER_ACTIVE;
+			wake_up = 0;
+			receiveDTIMs = 0;
+		}
+		break;
+	case POWMGT_SAVE2_MODE:
+		if (priv->reg.operation_mode == MODE_INFRASTRUCTURE) {
+			mode = POWER_SAVE;
+			wake_up = 0;
+			receiveDTIMs = 1;
+		} else {
+			mode = POWER_ACTIVE;
+			wake_up = 0;
+			receiveDTIMs = 0;
+		}
+		break;
+	default:
+		mode = POWER_ACTIVE;
+		wake_up = 0;
+		receiveDTIMs = 0;
+		break;
+	}
+	hostif_power_mngmt_request(priv, mode, wake_up, receiveDTIMs);
+
+	return;
+}
+
+static
+void hostif_sme_sleep_set(struct ks_wlan_private *priv)
+{
+	DPRINTK(3, "\n");
+	switch (priv->sleep_mode) {
+	case SLP_SLEEP:
+		hostif_sleep_request(priv, priv->sleep_mode);
+		break;
+	case SLP_ACTIVE:
+		hostif_sleep_request(priv, priv->sleep_mode);
+		break;
+	default:
+		break;
+	}
+
+	return;
+}
+
+static
+void hostif_sme_set_key(struct ks_wlan_private *priv, int type)
+{
+	uint32_t val;
+	switch (type) {
+	case SME_SET_FLAG:
+		val = cpu_to_le32((uint32_t) (priv->reg.privacy_invoked));
+		hostif_mib_set_request(priv, DOT11_PRIVACY_INVOKED,
+				       sizeof(val), MIB_VALUE_TYPE_BOOL, &val);
+		break;
+	case SME_SET_TXKEY:
+		val = cpu_to_le32((uint32_t) (priv->wpa.txkey));
+		hostif_mib_set_request(priv, DOT11_WEP_DEFAULT_KEY_ID,
+				       sizeof(val), MIB_VALUE_TYPE_INT, &val);
+		break;
+	case SME_SET_KEY1:
+		hostif_mib_set_request(priv, DOT11_WEP_DEFAULT_KEY_VALUE1,
+				       priv->wpa.key[0].key_len,
+				       MIB_VALUE_TYPE_OSTRING,
+				       &priv->wpa.key[0].key_val[0]);
+		break;
+	case SME_SET_KEY2:
+		hostif_mib_set_request(priv, DOT11_WEP_DEFAULT_KEY_VALUE2,
+				       priv->wpa.key[1].key_len,
+				       MIB_VALUE_TYPE_OSTRING,
+				       &priv->wpa.key[1].key_val[0]);
+		break;
+	case SME_SET_KEY3:
+		hostif_mib_set_request(priv, DOT11_WEP_DEFAULT_KEY_VALUE3,
+				       priv->wpa.key[2].key_len,
+				       MIB_VALUE_TYPE_OSTRING,
+				       &priv->wpa.key[2].key_val[0]);
+		break;
+	case SME_SET_KEY4:
+		hostif_mib_set_request(priv, DOT11_WEP_DEFAULT_KEY_VALUE4,
+				       priv->wpa.key[3].key_len,
+				       MIB_VALUE_TYPE_OSTRING,
+				       &priv->wpa.key[3].key_val[0]);
+		break;
+	case SME_SET_PMK_TSC:
+		hostif_mib_set_request(priv, DOT11_PMK_TSC,
+				       WPA_RX_SEQ_LEN, MIB_VALUE_TYPE_OSTRING,
+				       &priv->wpa.key[0].rx_seq[0]);
+		break;
+	case SME_SET_GMK1_TSC:
+		hostif_mib_set_request(priv, DOT11_GMK1_TSC,
+				       WPA_RX_SEQ_LEN, MIB_VALUE_TYPE_OSTRING,
+				       &priv->wpa.key[1].rx_seq[0]);
+		break;
+	case SME_SET_GMK2_TSC:
+		hostif_mib_set_request(priv, DOT11_GMK2_TSC,
+				       WPA_RX_SEQ_LEN, MIB_VALUE_TYPE_OSTRING,
+				       &priv->wpa.key[2].rx_seq[0]);
+		break;
+	}
+	return;
+}
+
+static
+void hostif_sme_set_pmksa(struct ks_wlan_private *priv)
+{
+	struct pmk_cache_t {
+		uint16_t size;
+		struct {
+			uint8_t bssid[ETH_ALEN];
+			uint8_t pmkid[IW_PMKID_LEN];
+		} __attribute__ ((packed)) list[PMK_LIST_MAX];
+	} __attribute__ ((packed)) pmkcache;
+	struct pmk_t *pmk;
+	struct list_head *ptr;
+	int i;
+
+	DPRINTK(4, "pmklist.size=%d\n", priv->pmklist.size);
+	i = 0;
+	list_for_each(ptr, &priv->pmklist.head) {
+		pmk = list_entry(ptr, struct pmk_t, list);
+		if (i < PMK_LIST_MAX) {
+			memcpy(pmkcache.list[i].bssid, pmk->bssid, ETH_ALEN);
+			memcpy(pmkcache.list[i].pmkid, pmk->pmkid,
+			       IW_PMKID_LEN);
+			i++;
+		}
+	}
+	pmkcache.size = cpu_to_le16((uint16_t) (priv->pmklist.size));
+	hostif_mib_set_request(priv, LOCAL_PMK,
+			       sizeof(priv->pmklist.size) + (ETH_ALEN +
+							     IW_PMKID_LEN) *
+			       (priv->pmklist.size), MIB_VALUE_TYPE_OSTRING,
+			       &pmkcache);
+}
+
+/* execute sme */
+static
+void hostif_sme_execute(struct ks_wlan_private *priv, int event)
+{
+	uint32_t val;
+
+	DPRINTK(3, "event=%d\n", event);
+	switch (event) {
+	case SME_START:
+		if (priv->dev_state == DEVICE_STATE_BOOT) {
+			hostif_mib_get_request(priv, DOT11_MAC_ADDRESS);
+		}
+		break;
+	case SME_MULTICAST_REQUEST:
+		hostif_sme_multicast_set(priv);
+		break;
+	case SME_MACADDRESS_SET_REQUEST:
+		hostif_mib_set_request(priv, LOCAL_CURRENTADDRESS, ETH_ALEN,
+				       MIB_VALUE_TYPE_OSTRING,
+				       &priv->eth_addr[0]);
+		break;
+	case SME_BSS_SCAN_REQUEST:
+		hostif_bss_scan_request(priv, priv->reg.scan_type,
+					priv->scan_ssid, priv->scan_ssid_len);
+		break;
+	case SME_POW_MNGMT_REQUEST:
+		hostif_sme_powermgt_set(priv);
+		break;
+	case SME_PHY_INFO_REQUEST:
+		hostif_phy_information_request(priv);
+		break;
+	case SME_MIC_FAILURE_REQUEST:
+		if (priv->wpa.mic_failure.failure == 1) {
+			hostif_mic_failure_request(priv,
+						   priv->wpa.mic_failure.
+						   failure - 1, 0);
+		} else if (priv->wpa.mic_failure.failure == 2) {
+			hostif_mic_failure_request(priv,
+						   priv->wpa.mic_failure.
+						   failure - 1,
+						   priv->wpa.mic_failure.
+						   counter);
+		} else
+			DPRINTK(4,
+				"SME_MIC_FAILURE_REQUEST: failure count=%u error?\n",
+				priv->wpa.mic_failure.failure);
+		break;
+	case SME_MIC_FAILURE_CONFIRM:
+		if (priv->wpa.mic_failure.failure == 2) {
+			if (priv->wpa.mic_failure.stop)
+				priv->wpa.mic_failure.stop = 0;
+			priv->wpa.mic_failure.failure = 0;
+			hostif_start_request(priv, priv->reg.operation_mode);
+		}
+		break;
+	case SME_GET_MAC_ADDRESS:
+		if (priv->dev_state == DEVICE_STATE_BOOT) {
+			hostif_mib_get_request(priv, DOT11_PRODUCT_VERSION);
+		}
+		break;
+	case SME_GET_PRODUCT_VERSION:
+		if (priv->dev_state == DEVICE_STATE_BOOT) {
+			priv->dev_state = DEVICE_STATE_PREINIT;
+		}
+		break;
+	case SME_STOP_REQUEST:
+		hostif_stop_request(priv);
+		break;
+	case SME_RTS_THRESHOLD_REQUEST:
+		val = cpu_to_le32((uint32_t) (priv->reg.rts));
+		hostif_mib_set_request(priv, DOT11_RTS_THRESHOLD,
+				       sizeof(val), MIB_VALUE_TYPE_INT, &val);
+		break;
+	case SME_FRAGMENTATION_THRESHOLD_REQUEST:
+		val = cpu_to_le32((uint32_t) (priv->reg.fragment));
+		hostif_mib_set_request(priv, DOT11_FRAGMENTATION_THRESHOLD,
+				       sizeof(val), MIB_VALUE_TYPE_INT, &val);
+		break;
+	case SME_WEP_INDEX_REQUEST:
+	case SME_WEP_KEY1_REQUEST:
+	case SME_WEP_KEY2_REQUEST:
+	case SME_WEP_KEY3_REQUEST:
+	case SME_WEP_KEY4_REQUEST:
+	case SME_WEP_FLAG_REQUEST:
+		hostif_sme_set_wep(priv, event);
+		break;
+	case SME_RSN_UCAST_REQUEST:
+	case SME_RSN_MCAST_REQUEST:
+	case SME_RSN_AUTH_REQUEST:
+	case SME_RSN_ENABLED_REQUEST:
+	case SME_RSN_MODE_REQUEST:
+		hostif_sme_set_rsn(priv, event);
+		break;
+	case SME_SET_FLAG:
+	case SME_SET_TXKEY:
+	case SME_SET_KEY1:
+	case SME_SET_KEY2:
+	case SME_SET_KEY3:
+	case SME_SET_KEY4:
+	case SME_SET_PMK_TSC:
+	case SME_SET_GMK1_TSC:
+	case SME_SET_GMK2_TSC:
+		hostif_sme_set_key(priv, event);
+		break;
+	case SME_SET_PMKSA:
+		hostif_sme_set_pmksa(priv);
+		break;
+#ifdef WPS
+	case SME_WPS_ENABLE_REQUEST:
+		hostif_mib_set_request(priv, LOCAL_WPS_ENABLE,
+				       sizeof(priv->wps.wps_enabled),
+				       MIB_VALUE_TYPE_INT,
+				       &priv->wps.wps_enabled);
+		break;
+	case SME_WPS_PROBE_REQUEST:
+		hostif_mib_set_request(priv, LOCAL_WPS_PROBE_REQ,
+				       priv->wps.ielen,
+				       MIB_VALUE_TYPE_OSTRING, priv->wps.ie);
+		break;
+#endif /* WPS */
+	case SME_MODE_SET_REQUEST:
+		hostif_sme_mode_setup(priv);
+		break;
+	case SME_SET_GAIN:
+		hostif_mib_set_request(priv, LOCAL_GAIN,
+				       sizeof(priv->gain),
+				       MIB_VALUE_TYPE_OSTRING, &priv->gain);
+		break;
+	case SME_GET_GAIN:
+		hostif_mib_get_request(priv, LOCAL_GAIN);
+		break;
+	case SME_GET_EEPROM_CKSUM:
+		priv->eeprom_checksum = EEPROM_FW_NOT_SUPPORT;	/* initialize */
+		hostif_mib_get_request(priv, LOCAL_EEPROM_SUM);
+		break;
+	case SME_START_REQUEST:
+		hostif_start_request(priv, priv->reg.operation_mode);
+		break;
+	case SME_START_CONFIRM:
+		/* for power save */
+		atomic_set(&priv->psstatus.snooze_guard, 0);
+		atomic_set(&priv->psstatus.confirm_wait, 0);
+		if (priv->dev_state == DEVICE_STATE_PREINIT) {
+			priv->dev_state = DEVICE_STATE_INIT;
+		}
+		/* wake_up_interruptible_all(&priv->confirm_wait); */
+		complete(&priv->confirm_wait);
+		break;
+	case SME_SLEEP_REQUEST:
+		hostif_sme_sleep_set(priv);
+		break;
+	case SME_SET_REGION:
+		val = cpu_to_le32((uint32_t) (priv->region));
+		hostif_mib_set_request(priv, LOCAL_REGION,
+				       sizeof(val), MIB_VALUE_TYPE_INT, &val);
+		break;
+	case SME_MULTICAST_CONFIRM:
+	case SME_BSS_SCAN_CONFIRM:
+	case SME_POW_MNGMT_CONFIRM:
+	case SME_PHY_INFO_CONFIRM:
+	case SME_STOP_CONFIRM:
+	case SME_RTS_THRESHOLD_CONFIRM:
+	case SME_FRAGMENTATION_THRESHOLD_CONFIRM:
+	case SME_WEP_INDEX_CONFIRM:
+	case SME_WEP_KEY1_CONFIRM:
+	case SME_WEP_KEY2_CONFIRM:
+	case SME_WEP_KEY3_CONFIRM:
+	case SME_WEP_KEY4_CONFIRM:
+	case SME_WEP_FLAG_CONFIRM:
+	case SME_RSN_UCAST_CONFIRM:
+	case SME_RSN_MCAST_CONFIRM:
+	case SME_RSN_AUTH_CONFIRM:
+	case SME_RSN_ENABLED_CONFIRM:
+	case SME_RSN_MODE_CONFIRM:
+	case SME_MODE_SET_CONFIRM:
+		break;
+	case SME_TERMINATE:
+	default:
+		break;
+	}
+}
+
+static
+void hostif_sme_task(unsigned long dev)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev;
+
+	DPRINTK(3, "\n");
+
+	if (priv->dev_state >= DEVICE_STATE_BOOT) {
+		if (0 < cnt_smeqbody(priv)
+		    && priv->dev_state >= DEVICE_STATE_BOOT) {
+			hostif_sme_execute(priv,
+					   priv->sme_i.event_buff[priv->sme_i.
+								  qhead]);
+			inc_smeqhead(priv);
+			if (0 < cnt_smeqbody(priv))
+				tasklet_schedule(&priv->sme_task);
+		}
+	}
+	return;
+}
+
+/* send to Station Management Entity module */
+void hostif_sme_enqueue(struct ks_wlan_private *priv, unsigned short event)
+{
+	DPRINTK(3, "\n");
+
+	/* enqueue sme event */
+	if (cnt_smeqbody(priv) < (SME_EVENT_BUFF_SIZE - 1)) {
+		priv->sme_i.event_buff[priv->sme_i.qtail] = event;
+		inc_smeqtail(priv);
+		//DPRINTK(3,"inc_smeqtail \n");
+#ifdef KS_WLAN_DEBUG
+		if (priv->sme_i.max_event_count < cnt_smeqbody(priv))
+			priv->sme_i.max_event_count = cnt_smeqbody(priv);
+#endif /* KS_WLAN_DEBUG */
+	} else {
+		/* in case of buffer overflow */
+		//DPRINTK(2,"sme queue buffer overflow\n");
+		printk("sme queue buffer overflow\n");
+	}
+
+	tasklet_schedule(&priv->sme_task);
+
+}
+
+int hostif_init(struct ks_wlan_private *priv)
+{
+	int rc = 0;
+	int i;
+
+	DPRINTK(3, "\n");
+
+	priv->aplist.size = 0;
+	for (i = 0; i < LOCAL_APLIST_MAX; i++)
+		memset(&(priv->aplist.ap[i]), 0, sizeof(struct local_ap_t));
+	priv->infra_status = 0;
+	priv->current_rate = 4;
+	priv->connect_status = DISCONNECT_STATUS;
+
+	spin_lock_init(&priv->multicast_spin);
+
+	spin_lock_init(&priv->dev_read_lock);
+	init_waitqueue_head(&priv->devread_wait);
+	priv->dev_count = 0;
+	atomic_set(&priv->event_count, 0);
+	atomic_set(&priv->rec_count, 0);
+
+	/* for power save */
+	atomic_set(&priv->psstatus.status, PS_NONE);
+	atomic_set(&priv->psstatus.confirm_wait, 0);
+	atomic_set(&priv->psstatus.snooze_guard, 0);
+	/* init_waitqueue_head(&priv->psstatus.wakeup_wait); */
+	init_completion(&priv->psstatus.wakeup_wait);
+	//INIT_WORK(&priv->ks_wlan_wakeup_task, ks_wlan_hw_wakeup_task, (void *)priv);
+	INIT_WORK(&priv->ks_wlan_wakeup_task, ks_wlan_hw_wakeup_task);
+
+	/* WPA */
+	memset(&(priv->wpa), 0, sizeof(priv->wpa));
+	priv->wpa.rsn_enabled = 0;
+	priv->wpa.mic_failure.failure = 0;
+	priv->wpa.mic_failure.last_failure_time = 0;
+	priv->wpa.mic_failure.stop = 0;
+	memset(&(priv->pmklist), 0, sizeof(priv->pmklist));
+	INIT_LIST_HEAD(&priv->pmklist.head);
+	for (i = 0; i < PMK_LIST_MAX; i++)
+		INIT_LIST_HEAD(&priv->pmklist.pmk[i].list);
+
+	priv->sme_i.sme_status = SME_IDLE;
+	priv->sme_i.qhead = priv->sme_i.qtail = 0;
+#ifdef KS_WLAN_DEBUG
+	priv->sme_i.max_event_count = 0;
+#endif
+	spin_lock_init(&priv->sme_i.sme_spin);
+	priv->sme_i.sme_flag = 0;
+
+	tasklet_init(&priv->sme_task, hostif_sme_task, (unsigned long)priv);
+
+	return rc;
+}
+
+void hostif_exit(struct ks_wlan_private *priv)
+{
+	tasklet_kill(&priv->sme_task);
+	return;
+}
diff --git a/drivers/staging/ks7010/ks_hostif.h b/drivers/staging/ks7010/ks_hostif.h
new file mode 100644
index 000000000000..dc806b5b47be
--- /dev/null
+++ b/drivers/staging/ks7010/ks_hostif.h
@@ -0,0 +1,644 @@
+/*
+ *   Driver for KeyStream wireless LAN
+ *   
+ *   Copyright (c) 2005-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+
+#ifndef _KS_HOSTIF_H_
+#define _KS_HOSTIF_H_
+/*
+ * HOST-MAC I/F events
+ */
+#define HIF_DATA_REQ		0xE001
+#define HIF_DATA_IND		0xE801
+#define HIF_MIB_GET_REQ		0xE002
+#define HIF_MIB_GET_CONF	0xE802
+#define HIF_MIB_SET_REQ		0xE003
+#define HIF_MIB_SET_CONF	0xE803
+#define HIF_POWERMGT_REQ	0xE004
+#define HIF_POWERMGT_CONF	0xE804
+#define HIF_START_REQ		0xE005
+#define HIF_START_CONF		0xE805
+#define HIF_CONNECT_IND		0xE806
+#define HIF_STOP_REQ		0xE006
+#define HIF_STOP_CONF		0xE807
+#define HIF_PS_ADH_SET_REQ	0xE007
+#define HIF_PS_ADH_SET_CONF	0xE808
+#define HIF_INFRA_SET_REQ	0xE008
+#define HIF_INFRA_SET_CONF	0xE809
+#define HIF_ADH_SET_REQ		0xE009
+#define HIF_ADH_SET_CONF	0xE80A
+#define HIF_AP_SET_REQ		0xE00A
+#define HIF_AP_SET_CONF		0xE80B
+#define HIF_ASSOC_INFO_IND	0xE80C
+#define HIF_MIC_FAILURE_REQ	0xE00B
+#define HIF_MIC_FAILURE_CONF	0xE80D
+#define HIF_SCAN_REQ		0xE00C
+#define HIF_SCAN_CONF		0xE80E
+#define HIF_PHY_INFO_REQ	0xE00D
+#define HIF_PHY_INFO_CONF	0xE80F
+#define HIF_SLEEP_REQ		0xE00E
+#define HIF_SLEEP_CONF		0xE810
+#define HIF_PHY_INFO_IND	0xE811
+#define HIF_SCAN_IND		0xE812
+#define HIF_INFRA_SET2_REQ	0xE00F
+#define HIF_INFRA_SET2_CONF	0xE813
+#define HIF_ADH_SET2_REQ	0xE010
+#define HIF_ADH_SET2_CONF	0xE814
+
+#define HIF_REQ_MAX		0xE010
+
+/*
+ * HOST-MAC I/F data structure
+ * Byte alignmet Little Endian
+ */
+
+struct hostif_hdr {
+	uint16_t size;
+	uint16_t event;
+} __attribute__ ((packed));
+
+struct hostif_data_request_t {
+	struct hostif_hdr header;
+	uint16_t auth_type;
+#define TYPE_DATA 0x0000
+#define TYPE_AUTH 0x0001
+	uint16_t reserved;
+	uint8_t data[0];
+} __attribute__ ((packed));
+
+struct hostif_data_indication_t {
+	struct hostif_hdr header;
+	uint16_t auth_type;
+/* #define TYPE_DATA 0x0000 */
+#define TYPE_PMK1 0x0001
+#define TYPE_GMK1 0x0002
+#define TYPE_GMK2 0x0003
+	uint16_t reserved;
+	uint8_t data[0];
+} __attribute__ ((packed));
+
+#define CHANNEL_LIST_MAX_SIZE 14
+struct channel_list_t {
+	uint8_t size;
+	uint8_t body[CHANNEL_LIST_MAX_SIZE];
+	uint8_t pad;
+} __attribute__ ((packed));
+
+/* MIB Attribute */
+#define DOT11_MAC_ADDRESS                 0x21010100	/* MAC Address (R) */
+#define DOT11_PRODUCT_VERSION             0x31024100	/* FirmWare Version (R) */
+#define DOT11_RTS_THRESHOLD               0x21020100	/* RTS Threshold (R/W) */
+#define DOT11_FRAGMENTATION_THRESHOLD     0x21050100	/* Fragment Threshold (R/W) */
+#define DOT11_PRIVACY_INVOKED             0x15010100	/* WEP ON/OFF (W) */
+#define DOT11_WEP_DEFAULT_KEY_ID          0x15020100	/* WEP Index (W) */
+#define DOT11_WEP_DEFAULT_KEY_VALUE1      0x13020101	/* WEP Key#1(TKIP AES: PairwiseTemporalKey) (W) */
+#define DOT11_WEP_DEFAULT_KEY_VALUE2      0x13020102	/* WEP Key#2(TKIP AES: GroupKey1) (W) */
+#define DOT11_WEP_DEFAULT_KEY_VALUE3      0x13020103	/* WEP Key#3(TKIP AES: GroupKey2) (W) */
+#define DOT11_WEP_DEFAULT_KEY_VALUE4      0x13020104	/* WEP Key#4 (W) */
+#define DOT11_WEP_LIST                    0x13020100	/* WEP LIST */
+#define	DOT11_DESIRED_SSID		  0x11090100	/* SSID */
+#define	DOT11_CURRENT_CHANNEL		  0x45010100	/* channel set */
+#define	DOT11_OPERATION_RATE_SET	  0x11110100	/* rate set */
+
+#define LOCAL_AP_SEARCH_INTEAVAL          0xF1010100	/* AP search interval (R/W) */
+#define LOCAL_CURRENTADDRESS              0xF1050100	/* MAC Adress change (W) */
+#define LOCAL_MULTICAST_ADDRESS           0xF1060100	/* Multicast Adress (W) */
+#define LOCAL_MULTICAST_FILTER            0xF1060200	/* Multicast Adress Filter enable/disable (W) */
+#define LOCAL_SEARCHED_AP_LIST            0xF1030100	/* AP list (R) */
+#define LOCAL_LINK_AP_STATUS              0xF1040100	/* Link AP status (R) */
+#define	LOCAL_PACKET_STATISTICS		  0xF1020100	/* tx,rx packets statistics */
+#define LOCAL_AP_SCAN_LIST_TYPE_SET	  0xF1030200	/* AP_SCAN_LIST_TYPE */
+
+#define DOT11_RSN_ENABLED                 0x15070100	/* WPA enable/disable (W) */
+#define LOCAL_RSN_MODE                    0x56010100	/* RSN mode WPA/WPA2 (W) */
+#define DOT11_RSN_CONFIG_MULTICAST_CIPHER 0x51040100	/* GroupKeyCipherSuite (W) */
+#define DOT11_RSN_CONFIG_UNICAST_CIPHER   0x52020100	/* PairwiseKeyCipherSuite (W) */
+#define DOT11_RSN_CONFIG_AUTH_SUITE       0x53020100	/* AuthenticationKeyManagementSuite (W) */
+#define DOT11_RSN_CONFIG_VERSION          0x51020100	/* RSN version (W) */
+#define LOCAL_RSN_CONFIG_ALL              0x5F010100	/* RSN CONFIG ALL (W) */
+#define DOT11_PMK_TSC                     0x55010100	/* PMK_TSC (W) */
+#define DOT11_GMK1_TSC                    0x55010101	/* GMK1_TSC (W) */
+#define DOT11_GMK2_TSC                    0x55010102	/* GMK2_TSC (W) */
+#define DOT11_GMK3_TSC   		  0x55010103	/* GMK3_TSC */
+#define LOCAL_PMK                         0x58010100	/* Pairwise Master Key cache (W) */
+
+#define LOCAL_REGION                      0xF10A0100	/* Region setting */
+
+#ifdef WPS
+#define LOCAL_WPS_ENABLE                  0xF10B0100	/* WiFi Protected Setup */
+#define LOCAL_WPS_PROBE_REQ               0xF10C0100	/* WPS Probe Request */
+#endif /* WPS */
+
+#define LOCAL_GAIN                        0xF10D0100	/* Carrer sense threshold for demo ato show */
+#define LOCAL_EEPROM_SUM                  0xF10E0100	/* EEPROM checksum information */
+
+struct hostif_mib_get_request_t {
+	struct hostif_hdr header;
+	uint32_t mib_attribute;
+} __attribute__ ((packed));
+
+struct hostif_mib_value_t {
+	uint16_t size;
+	uint16_t type;
+#define MIB_VALUE_TYPE_NULL     0
+#define MIB_VALUE_TYPE_INT      1
+#define MIB_VALUE_TYPE_BOOL     2
+#define MIB_VALUE_TYPE_COUNT32  3
+#define MIB_VALUE_TYPE_OSTRING  4
+	uint8_t body[0];
+} __attribute__ ((packed));
+
+struct hostif_mib_get_confirm_t {
+	struct hostif_hdr header;
+	uint32_t mib_status;
+#define MIB_SUCCESS    0
+#define MIB_INVALID    1
+#define MIB_READ_ONLY  2
+#define MIB_WRITE_ONLY 3
+	uint32_t mib_attribute;
+	struct hostif_mib_value_t mib_value;
+} __attribute__ ((packed));
+
+struct hostif_mib_set_request_t {
+	struct hostif_hdr header;
+	uint32_t mib_attribute;
+	struct hostif_mib_value_t mib_value;
+} __attribute__ ((packed));
+
+struct hostif_mib_set_confirm_t {
+	struct hostif_hdr header;
+	uint32_t mib_status;
+	uint32_t mib_attribute;
+} __attribute__ ((packed));
+
+struct hostif_power_mngmt_request_t {
+	struct hostif_hdr header;
+	uint32_t mode;
+#define POWER_ACTIVE  1
+#define POWER_SAVE    2
+	uint32_t wake_up;
+#define SLEEP_FALSE 0
+#define SLEEP_TRUE  1	/* not used */
+	uint32_t receiveDTIMs;
+#define DTIM_FALSE 0
+#define DTIM_TRUE  1
+} __attribute__ ((packed));
+
+/* power management mode */
+enum {
+	POWMGT_ACTIVE_MODE = 0,
+	POWMGT_SAVE1_MODE,
+	POWMGT_SAVE2_MODE
+};
+
+#define	RESULT_SUCCESS            0
+#define	RESULT_INVALID_PARAMETERS 1
+#define	RESULT_NOT_SUPPORTED      2
+/* #define	RESULT_ALREADY_RUNNING    3 */
+#define	RESULT_ALREADY_RUNNING    7
+
+struct hostif_power_mngmt_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+} __attribute__ ((packed));
+
+struct hostif_start_request_t {
+	struct hostif_hdr header;
+	uint16_t mode;
+#define MODE_PSEUDO_ADHOC   0
+#define MODE_INFRASTRUCTURE 1
+#define MODE_AP             2	/* not used */
+#define MODE_ADHOC          3
+} __attribute__ ((packed));
+
+struct hostif_start_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+} __attribute__ ((packed));
+
+#define SSID_MAX_SIZE 32
+struct ssid_t {
+	uint8_t size;
+	uint8_t body[SSID_MAX_SIZE];
+	uint8_t ssid_pad;
+} __attribute__ ((packed));
+
+#define RATE_SET_MAX_SIZE 16
+struct rate_set8_t {
+	uint8_t size;
+	uint8_t body[8];
+	uint8_t rate_pad;
+} __attribute__ ((packed));
+
+struct FhParms_t {
+	uint16_t dwellTime;
+	uint8_t hopSet;
+	uint8_t hopPattern;
+	uint8_t hopIndex;
+} __attribute__ ((packed));
+
+struct DsParms_t {
+	uint8_t channel;
+} __attribute__ ((packed));
+
+struct CfParms_t {
+	uint8_t count;
+	uint8_t period;
+	uint16_t maxDuration;
+	uint16_t durRemaining;
+} __attribute__ ((packed));
+
+struct IbssParms_t {
+	uint16_t atimWindow;
+} __attribute__ ((packed));
+
+struct rsn_t {
+	uint8_t size;
+#define RSN_BODY_SIZE 64
+	uint8_t body[RSN_BODY_SIZE];
+} __attribute__ ((packed));
+
+struct ErpParams_t {
+	uint8_t erp_info;
+} __attribute__ ((packed));
+
+struct rate_set16_t {
+	uint8_t size;
+	uint8_t body[16];
+	uint8_t rate_pad;
+} __attribute__ ((packed));
+
+struct ap_info_t {
+	uint8_t bssid[6];	/* +00 */
+	uint8_t rssi;	/* +06 */
+	uint8_t sq;	/* +07 */
+	uint8_t noise;	/* +08 */
+	uint8_t pad0;	/* +09 */
+	uint16_t beacon_period;	/* +10 */
+	uint16_t capability;	/* +12 */
+#define BSS_CAP_ESS             (1<<0)
+#define BSS_CAP_IBSS            (1<<1)
+#define BSS_CAP_CF_POLABLE      (1<<2)
+#define BSS_CAP_CF_POLL_REQ     (1<<3)
+#define BSS_CAP_PRIVACY         (1<<4)
+#define BSS_CAP_SHORT_PREAMBLE  (1<<5)
+#define BSS_CAP_PBCC            (1<<6)
+#define BSS_CAP_CHANNEL_AGILITY (1<<7)
+#define BSS_CAP_SHORT_SLOT_TIME (1<<10)
+#define BSS_CAP_DSSS_OFDM       (1<<13)
+	uint8_t frame_type;	/* +14 */
+	uint8_t ch_info;	/* +15 */
+#define FRAME_TYPE_BEACON	0x80
+#define FRAME_TYPE_PROBE_RESP	0x50
+	uint16_t body_size;	/* +16 */
+	uint8_t body[1024];	/* +18 */
+	/* +1032 */
+} __attribute__ ((packed));
+
+struct link_ap_info_t {
+	uint8_t bssid[6];	/* +00 */
+	uint8_t rssi;	/* +06 */
+	uint8_t sq;	/* +07 */
+	uint8_t noise;	/* +08 */
+	uint8_t pad0;	/* +09 */
+	uint16_t beacon_period;	/* +10 */
+	uint16_t capability;	/* +12 */
+	struct rate_set8_t rate_set;	/* +14 */
+	struct FhParms_t fh_parameter;	/* +24 */
+	struct DsParms_t ds_parameter;	/* +29 */
+	struct CfParms_t cf_parameter;	/* +30 */
+	struct IbssParms_t ibss_parameter;	/* +36 */
+	struct ErpParams_t erp_parameter;	/* +38 */
+	uint8_t pad1;	/* +39 */
+	struct rate_set8_t ext_rate_set;	/* +40 */
+	uint8_t DTIM_period;	/* +50 */
+	uint8_t rsn_mode;	/* +51 */
+#define RSN_MODE_NONE	0
+#define RSN_MODE_WPA	1
+#define RSN_MODE_WPA2	2
+	struct {
+		uint8_t size;	/* +52 */
+		uint8_t body[128];	/* +53 */
+	} __attribute__ ((packed)) rsn;
+} __attribute__ ((packed));
+
+struct hostif_connect_indication_t {
+	struct hostif_hdr header;
+	uint16_t connect_code;
+#define RESULT_CONNECT    0
+#define RESULT_DISCONNECT 1
+	struct link_ap_info_t link_ap_info;
+} __attribute__ ((packed));
+
+struct hostif_stop_request_t {
+	struct hostif_hdr header;
+} __attribute__ ((packed));
+
+struct hostif_stop_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+} __attribute__ ((packed));
+
+struct hostif_ps_adhoc_set_request_t {
+	struct hostif_hdr header;
+	uint16_t phy_type;
+#define D_11B_ONLY_MODE		0
+#define D_11G_ONLY_MODE		1
+#define D_11BG_COMPATIBLE_MODE	2
+#define D_11A_ONLY_MODE		3
+	uint16_t cts_mode;
+#define CTS_MODE_FALSE	0
+#define CTS_MODE_TRUE	1
+	uint16_t channel;
+	struct rate_set16_t rate_set;
+	uint16_t capability;	/* bit5:preamble bit6:pbcc pbcc not supported always 0 
+				 * bit10:ShortSlotTime bit13:DSSS-OFDM DSSS-OFDM not supported always 0 */
+	uint16_t scan_type;
+} __attribute__ ((packed));
+
+struct hostif_ps_adhoc_set_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+} __attribute__ ((packed));
+
+struct hostif_infrastructure_set_request_t {
+	struct hostif_hdr header;
+	uint16_t phy_type;
+	uint16_t cts_mode;
+	struct rate_set16_t rate_set;
+	struct ssid_t ssid;
+	uint16_t capability;	/* bit5:preamble bit6:pbcc pbcc not supported always 0 
+				 * bit10:ShortSlotTime bit13:DSSS-OFDM DSSS-OFDM not supported always 0 */
+	uint16_t beacon_lost_count;
+	uint16_t auth_type;
+#define AUTH_TYPE_OPEN_SYSTEM 0
+#define AUTH_TYPE_SHARED_KEY  1
+	struct channel_list_t channel_list;
+	uint16_t scan_type;
+} __attribute__ ((packed));
+
+struct hostif_infrastructure_set2_request_t {
+	struct hostif_hdr header;
+	uint16_t phy_type;
+	uint16_t cts_mode;
+	struct rate_set16_t rate_set;
+	struct ssid_t ssid;
+	uint16_t capability;	/* bit5:preamble bit6:pbcc pbcc not supported always 0 
+				 * bit10:ShortSlotTime bit13:DSSS-OFDM DSSS-OFDM not supported always 0 */
+	uint16_t beacon_lost_count;
+	uint16_t auth_type;
+#define AUTH_TYPE_OPEN_SYSTEM 0
+#define AUTH_TYPE_SHARED_KEY  1
+	struct channel_list_t channel_list;
+	uint16_t scan_type;
+	uint8_t bssid[ETH_ALEN];
+} __attribute__ ((packed));
+
+struct hostif_infrastructure_set_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+} __attribute__ ((packed));
+
+struct hostif_adhoc_set_request_t {
+	struct hostif_hdr header;
+	uint16_t phy_type;
+	uint16_t cts_mode;
+	uint16_t channel;
+	struct rate_set16_t rate_set;
+	struct ssid_t ssid;
+	uint16_t capability;	/* bit5:preamble bit6:pbcc pbcc not supported always 0 
+				 * bit10:ShortSlotTime bit13:DSSS-OFDM DSSS-OFDM not supported always 0 */
+	uint16_t scan_type;
+} __attribute__ ((packed));
+
+struct hostif_adhoc_set2_request_t {
+	struct hostif_hdr header;
+	uint16_t phy_type;
+	uint16_t cts_mode;
+	uint16_t reserved;
+	struct rate_set16_t rate_set;
+	struct ssid_t ssid;
+	uint16_t capability;	/* bit5:preamble bit6:pbcc pbcc not supported always 0 
+				 * bit10:ShortSlotTime bit13:DSSS-OFDM DSSS-OFDM not supported always 0 */
+	uint16_t scan_type;
+	struct channel_list_t channel_list;
+	uint8_t bssid[ETH_ALEN];
+} __attribute__ ((packed));
+
+struct hostif_adhoc_set_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+} __attribute__ ((packed));
+
+struct last_associate_t {
+	uint8_t type;
+	uint8_t status;
+} __attribute__ ((packed));
+
+struct association_request_t {
+	uint8_t type;
+#define FRAME_TYPE_ASSOC_REQ	0x00
+#define FRAME_TYPE_REASSOC_REQ	0x20
+	uint8_t pad;
+	uint16_t capability;
+	uint16_t listen_interval;
+	uint8_t ap_address[6];
+	uint16_t reqIEs_size;
+} __attribute__ ((packed));
+
+struct association_response_t {
+	uint8_t type;
+#define FRAME_TYPE_ASSOC_RESP	0x10
+#define FRAME_TYPE_REASSOC_RESP	0x30
+	uint8_t pad;
+	uint16_t capability;
+	uint16_t status;
+	uint16_t association_id;
+	uint16_t respIEs_size;
+} __attribute__ ((packed));
+
+struct hostif_associate_indication_t {
+	struct hostif_hdr header;
+	struct association_request_t assoc_req;
+	struct association_response_t assoc_resp;
+	/* followed by (reqIEs_size + respIEs_size) octets of data */
+	/* reqIEs data *//* respIEs data */
+} __attribute__ ((packed));
+
+struct hostif_bss_scan_request_t {
+	struct hostif_hdr header;
+	uint8_t scan_type;
+#define ACTIVE_SCAN  0
+#define PASSIVE_SCAN 1
+	uint8_t pad[3];
+	uint32_t ch_time_min;
+	uint32_t ch_time_max;
+	struct channel_list_t channel_list;
+	struct ssid_t ssid;
+} __attribute__ ((packed));
+
+struct hostif_bss_scan_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+	uint16_t reserved;
+} __attribute__ ((packed));
+
+struct hostif_phy_information_request_t {
+	struct hostif_hdr header;
+	uint16_t type;
+#define NORMAL_TYPE	0
+#define TIME_TYPE	1
+	uint16_t time;	/* unit 100ms */
+} __attribute__ ((packed));
+
+struct hostif_phy_information_confirm_t {
+	struct hostif_hdr header;
+	uint8_t rssi;
+	uint8_t sq;
+	uint8_t noise;
+	uint8_t link_speed;
+	uint32_t tx_frame;
+	uint32_t rx_frame;
+	uint32_t tx_error;
+	uint32_t rx_error;
+} __attribute__ ((packed));
+
+/* sleep mode */
+#define SLP_ACTIVE  0
+#define SLP_SLEEP   1
+struct hostif_sleep_request_t {
+	struct hostif_hdr header;
+} __attribute__ ((packed));
+
+struct hostif_sleep_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+} __attribute__ ((packed));
+
+struct hostif_mic_failure_request_t {
+	struct hostif_hdr header;
+	uint16_t failure_count;
+	uint16_t timer;
+} __attribute__ ((packed));
+
+struct hostif_mic_failure_confirm_t {
+	struct hostif_hdr header;
+	uint16_t result_code;
+} __attribute__ ((packed));
+
+#define BASIC_RATE	0x80
+#define RATE_MASK	0x7F
+
+#define TX_RATE_AUTO      0xff
+#define TX_RATE_1M_FIXED  0
+#define TX_RATE_2M_FIXED  1
+#define TX_RATE_1_2M_AUTO 2
+#define TX_RATE_5M_FIXED  3
+#define TX_RATE_11M_FIXED 4
+
+#define TX_RATE_FULL_AUTO	0
+#define TX_RATE_11_AUTO		1
+#define TX_RATE_11B_AUTO	2
+#define TX_RATE_11BG_AUTO	3
+#define TX_RATE_MANUAL_AUTO	4
+#define TX_RATE_FIXED		5
+
+/* 11b rate */
+#define TX_RATE_1M	(uint8_t)(10/5)	/* 11b 11g basic rate */
+#define TX_RATE_2M	(uint8_t)(20/5)	/* 11b 11g basic rate */
+#define TX_RATE_5M	(uint8_t)(55/5)	/* 11g basic rate */
+#define TX_RATE_11M	(uint8_t)(110/5)	/* 11g basic rate */
+
+/* 11g rate */
+#define TX_RATE_6M	(uint8_t)(60/5)	/* 11g basic rate */
+#define TX_RATE_12M	(uint8_t)(120/5)	/* 11g basic rate */
+#define TX_RATE_24M	(uint8_t)(240/5)	/* 11g basic rate */
+#define TX_RATE_9M	(uint8_t)(90/5)
+#define TX_RATE_18M	(uint8_t)(180/5)
+#define TX_RATE_36M	(uint8_t)(360/5)
+#define TX_RATE_48M	(uint8_t)(480/5)
+#define TX_RATE_54M	(uint8_t)(540/5)
+
+#define IS_11B_RATE(A) (((A&RATE_MASK)==TX_RATE_1M)||((A&RATE_MASK)==TX_RATE_2M)||\
+                        ((A&RATE_MASK)==TX_RATE_5M)||((A&RATE_MASK)==TX_RATE_11M))
+
+#define IS_OFDM_RATE(A) (((A&RATE_MASK)==TX_RATE_6M)||((A&RATE_MASK)==TX_RATE_12M)||\
+                        ((A&RATE_MASK)==TX_RATE_24M)||((A&RATE_MASK)==TX_RATE_9M)||\
+                        ((A&RATE_MASK)==TX_RATE_18M)||((A&RATE_MASK)==TX_RATE_36M)||\
+                        ((A&RATE_MASK)==TX_RATE_48M)||((A&RATE_MASK)==TX_RATE_54M))
+
+#define IS_11BG_RATE(A) (IS_11B_RATE(A)||IS_OFDM_RATE(A))
+
+#define IS_OFDM_EXT_RATE(A)  (((A&RATE_MASK)==TX_RATE_9M)||((A&RATE_MASK)==TX_RATE_18M)||\
+                             ((A&RATE_MASK)==TX_RATE_36M)||((A&RATE_MASK)==TX_RATE_48M)||\
+                             ((A&RATE_MASK)==TX_RATE_54M))
+
+enum {
+	CONNECT_STATUS = 0,
+	DISCONNECT_STATUS
+};
+
+/* preamble type */
+enum {
+	LONG_PREAMBLE = 0,
+	SHORT_PREAMBLE
+};
+
+/* multicast filter */
+#define MCAST_FILTER_MCAST    0
+#define MCAST_FILTER_MCASTALL 1
+#define MCAST_FILTER_PROMISC  2
+
+#define NIC_MAX_MCAST_LIST 32
+
+/* macro function */
+#define HIF_EVENT_MASK 0xE800
+#define IS_HIF_IND(_EVENT)  ((_EVENT&HIF_EVENT_MASK)==0xE800  && \
+                             ((_EVENT&~HIF_EVENT_MASK)==0x0001 || \
+                              (_EVENT&~HIF_EVENT_MASK)==0x0006 || \
+                              (_EVENT&~HIF_EVENT_MASK)==0x000C || \
+                              (_EVENT&~HIF_EVENT_MASK)==0x0011 || \
+                              (_EVENT&~HIF_EVENT_MASK)==0x0012))
+
+#define IS_HIF_CONF(_EVENT) ((_EVENT&HIF_EVENT_MASK)==0xE800  && \
+                             (_EVENT&~HIF_EVENT_MASK)>0x0000  && \
+                             (_EVENT&~HIF_EVENT_MASK)<0x0012  && \
+                             !IS_HIF_IND(_EVENT) )
+
+#ifdef __KERNEL__
+
+#include "ks_wlan.h"
+
+/* function prototype */
+extern int hostif_data_request(struct ks_wlan_private *priv,
+			       struct sk_buff *packet);
+extern void hostif_receive(struct ks_wlan_private *priv, unsigned char *p,
+			   unsigned int size);
+extern void hostif_sme_enqueue(struct ks_wlan_private *priv, uint16_t event);
+extern int hostif_init(struct ks_wlan_private *priv);
+extern void hostif_exit(struct ks_wlan_private *priv);
+
+static
+inline int hif_align_size(int size)
+{
+#ifdef	KS_ATOM
+	if (size < 1024)
+		size = 1024;
+#endif
+#ifdef	DEVICE_ALIGNMENT
+	return (size % DEVICE_ALIGNMENT) ? size + DEVICE_ALIGNMENT -
+	    (size % DEVICE_ALIGNMENT) : size;
+#else
+	return size;
+#endif
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _KS_HOSTIF_H_ */
diff --git a/drivers/staging/ks7010/ks_wlan.h b/drivers/staging/ks7010/ks_wlan.h
new file mode 100644
index 000000000000..f05dc0122fcb
--- /dev/null
+++ b/drivers/staging/ks7010/ks_wlan.h
@@ -0,0 +1,505 @@
+/*
+ *   Driver for KeyStream IEEE802.11 b/g wireless LAN cards.
+ *
+ *   Copyright (C) 2006-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+
+#ifndef _KS_WLAN_H
+#define _KS_WLAN_H
+
+#define WPS
+
+#include <linux/version.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/spinlock.h>	/* spinlock_t                                   */
+#include <linux/sched.h>	/* wait_queue_head_t                            */
+#include <linux/types.h>	/* pid_t                                        */
+#include <linux/netdevice.h>	/* struct net_device_stats,  struct sk_buff     */
+#include <linux/etherdevice.h>
+#include <linux/wireless.h>
+#include <asm/atomic.h>	/* struct atmic_t                               */
+#include <linux/timer.h>	/* struct timer_list */
+#include <linux/string.h>
+#include <linux/completion.h>	/* struct completion */
+#include <linux/workqueue.h>
+
+#include <asm/io.h>
+
+#include "ks7010_sdio.h"
+
+#ifdef KS_WLAN_DEBUG
+#define DPRINTK(n, fmt, args...) \
+                 if (KS_WLAN_DEBUG>(n)) printk(KERN_NOTICE "%s: "fmt, __FUNCTION__, ## args)
+#else
+#define DPRINTK(n, fmt, args...)
+#endif
+
+struct ks_wlan_parameter {
+	uint8_t operation_mode;	/* Operation Mode */
+	uint8_t channel;	/*  Channel */
+	uint8_t tx_rate;	/*  Transmit Rate */
+	struct {
+		uint8_t size;
+		uint8_t body[16];
+	} rate_set;
+	uint8_t bssid[ETH_ALEN];	/* BSSID */
+	struct {
+		uint8_t size;
+		uint8_t body[32 + 1];
+	} ssid;	/*  SSID */
+	uint8_t preamble;	/*  Preamble */
+	uint8_t powermgt;	/*  PowerManagementMode */
+	uint32_t scan_type;	/*  AP List Scan Type */
+#define BEACON_LOST_COUNT_MIN 0
+#define BEACON_LOST_COUNT_MAX 65535
+	uint32_t beacon_lost_count;	/*  Beacon Lost Count */
+	uint32_t rts;	/*  RTS Threashold */
+	uint32_t fragment;	/*  Fragmentation Threashold */
+	uint32_t privacy_invoked;
+	uint32_t wep_index;
+	struct {
+		uint8_t size;
+		uint8_t val[13 * 2 + 1];
+	} wep_key[4];
+	uint16_t authenticate_type;
+	uint16_t phy_type;	/* 11b/11g/11bg mode type */
+	uint16_t cts_mode;	/* for 11g/11bg mode cts mode */
+	uint16_t phy_info_timer;	/* phy information timer */
+};
+
+enum {
+	DEVICE_STATE_OFF = 0,	/* this means hw_unavailable is != 0 */
+	DEVICE_STATE_PREBOOT,	/* we are in a pre-boot state (empty RAM) */
+	DEVICE_STATE_BOOT,	/* boot state (fw upload, run fw) */
+	DEVICE_STATE_PREINIT,	/* pre-init state */
+	DEVICE_STATE_INIT,	/* init state (restore MIB backup to device) */
+	DEVICE_STATE_READY,	/* driver&device are in operational state */
+	DEVICE_STATE_SLEEP	/* device in sleep mode */
+};
+
+/* SME flag */
+#define SME_MODE_SET	    (1<<0)
+#define SME_RTS             (1<<1)
+#define SME_FRAG            (1<<2)
+#define SME_WEP_FLAG        (1<<3)
+#define SME_WEP_INDEX       (1<<4)
+#define SME_WEP_VAL1        (1<<5)
+#define SME_WEP_VAL2        (1<<6)
+#define SME_WEP_VAL3        (1<<7)
+#define SME_WEP_VAL4        (1<<8)
+#define SME_WEP_VAL_MASK    (SME_WEP_VAL1|SME_WEP_VAL2|SME_WEP_VAL3|SME_WEP_VAL4)
+#define SME_RSN             (1<<9)
+#define SME_RSN_MULTICAST   (1<<10)
+#define SME_RSN_UNICAST	    (1<<11)
+#define SME_RSN_AUTH	    (1<<12)
+
+#define SME_AP_SCAN         (1<<13)
+#define SME_MULTICAST       (1<<14)
+
+/* SME Event */
+enum {
+	SME_START,
+
+	SME_MULTICAST_REQUEST,
+	SME_MACADDRESS_SET_REQUEST,
+	SME_BSS_SCAN_REQUEST,
+	SME_SET_FLAG,
+	SME_SET_TXKEY,
+	SME_SET_KEY1,
+	SME_SET_KEY2,
+	SME_SET_KEY3,
+	SME_SET_KEY4,
+	SME_SET_PMK_TSC,
+	SME_SET_GMK1_TSC,
+	SME_SET_GMK2_TSC,
+	SME_SET_GMK3_TSC,
+	SME_SET_PMKSA,
+	SME_POW_MNGMT_REQUEST,
+	SME_PHY_INFO_REQUEST,
+	SME_MIC_FAILURE_REQUEST,
+	SME_GET_MAC_ADDRESS,
+	SME_GET_PRODUCT_VERSION,
+	SME_STOP_REQUEST,
+	SME_RTS_THRESHOLD_REQUEST,
+	SME_FRAGMENTATION_THRESHOLD_REQUEST,
+	SME_WEP_INDEX_REQUEST,
+	SME_WEP_KEY1_REQUEST,
+	SME_WEP_KEY2_REQUEST,
+	SME_WEP_KEY3_REQUEST,
+	SME_WEP_KEY4_REQUEST,
+	SME_WEP_FLAG_REQUEST,
+	SME_RSN_UCAST_REQUEST,
+	SME_RSN_MCAST_REQUEST,
+	SME_RSN_AUTH_REQUEST,
+	SME_RSN_ENABLED_REQUEST,
+	SME_RSN_MODE_REQUEST,
+#ifdef WPS
+	SME_WPS_ENABLE_REQUEST,
+	SME_WPS_PROBE_REQUEST,
+#endif
+	SME_SET_GAIN,
+	SME_GET_GAIN,
+	SME_SLEEP_REQUEST,
+	SME_SET_REGION,
+	SME_MODE_SET_REQUEST,
+	SME_START_REQUEST,
+	SME_GET_EEPROM_CKSUM,
+
+	SME_MIC_FAILURE_CONFIRM,
+	SME_START_CONFIRM,
+
+	SME_MULTICAST_CONFIRM,
+	SME_BSS_SCAN_CONFIRM,
+	SME_GET_CURRENT_AP,
+	SME_POW_MNGMT_CONFIRM,
+	SME_PHY_INFO_CONFIRM,
+	SME_STOP_CONFIRM,
+	SME_RTS_THRESHOLD_CONFIRM,
+	SME_FRAGMENTATION_THRESHOLD_CONFIRM,
+	SME_WEP_INDEX_CONFIRM,
+	SME_WEP_KEY1_CONFIRM,
+	SME_WEP_KEY2_CONFIRM,
+	SME_WEP_KEY3_CONFIRM,
+	SME_WEP_KEY4_CONFIRM,
+	SME_WEP_FLAG_CONFIRM,
+	SME_RSN_UCAST_CONFIRM,
+	SME_RSN_MCAST_CONFIRM,
+	SME_RSN_AUTH_CONFIRM,
+	SME_RSN_ENABLED_CONFIRM,
+	SME_RSN_MODE_CONFIRM,
+	SME_MODE_SET_CONFIRM,
+	SME_SLEEP_CONFIRM,
+
+	SME_RSN_SET_CONFIRM,
+	SME_WEP_SET_CONFIRM,
+	SME_TERMINATE,
+
+	SME_EVENT_SIZE	/* end */
+};
+
+/* SME Status */
+enum {
+	SME_IDLE,
+	SME_SETUP,
+	SME_DISCONNECT,
+	SME_CONNECT
+};
+
+#define	SME_EVENT_BUFF_SIZE	128
+
+struct sme_info {
+	int sme_status;
+	int event_buff[SME_EVENT_BUFF_SIZE];
+	unsigned int qhead;
+	unsigned int qtail;
+#ifdef KS_WLAN_DEBUG
+	/* for debug */
+	unsigned int max_event_count;
+#endif
+	spinlock_t sme_spin;
+	unsigned long sme_flag;
+};
+
+struct hostt_t {
+	int buff[SME_EVENT_BUFF_SIZE];
+	unsigned int qhead;
+	unsigned int qtail;
+};
+
+#define RSN_IE_BODY_MAX 64
+struct rsn_ie_t {
+	uint8_t id;	/* 0xdd = WPA or 0x30 = RSN */
+	uint8_t size;	/* max ? 255 ? */
+	uint8_t body[RSN_IE_BODY_MAX];
+} __attribute__ ((packed));
+
+#ifdef WPS
+#define WPS_IE_BODY_MAX 255
+struct wps_ie_t {
+	uint8_t id;	/* 221 'dd <len> 00 50 F2 04' */
+	uint8_t size;	/* max ? 255 ? */
+	uint8_t body[WPS_IE_BODY_MAX];
+} __attribute__ ((packed));
+#endif /* WPS */
+
+struct local_ap_t {
+	uint8_t bssid[6];
+	uint8_t rssi;
+	uint8_t sq;
+	struct {
+		uint8_t size;
+		uint8_t body[32];
+		uint8_t ssid_pad;
+	} ssid;
+	struct {
+		uint8_t size;
+		uint8_t body[16];
+		uint8_t rate_pad;
+	} rate_set;
+	uint16_t capability;
+	uint8_t channel;
+	uint8_t noise;
+	struct rsn_ie_t wpa_ie;
+	struct rsn_ie_t rsn_ie;
+#ifdef WPS
+	struct wps_ie_t wps_ie;
+#endif /* WPS */
+};
+
+#define LOCAL_APLIST_MAX 31
+#define LOCAL_CURRENT_AP LOCAL_APLIST_MAX
+struct local_aplist_t {
+	int size;
+	struct local_ap_t ap[LOCAL_APLIST_MAX + 1];
+};
+
+struct local_gain_t {
+	uint8_t TxMode;
+	uint8_t RxMode;
+	uint8_t TxGain;
+	uint8_t RxGain;
+};
+
+struct local_eeprom_sum_t {
+	uint8_t type;
+	uint8_t result;
+};
+
+enum {
+	EEPROM_OK,
+	EEPROM_CHECKSUM_NONE,
+	EEPROM_FW_NOT_SUPPORT,
+	EEPROM_NG,
+};
+
+/* Power Save Status */
+enum {
+	PS_NONE,
+	PS_ACTIVE_SET,
+	PS_SAVE_SET,
+	PS_CONF_WAIT,
+	PS_SNOOZE,
+	PS_WAKEUP
+};
+
+struct power_save_status_t {
+	atomic_t status;	/* initialvalue 0 */
+	struct completion wakeup_wait;
+	atomic_t confirm_wait;
+	atomic_t snooze_guard;
+};
+
+struct sleep_status_t {
+	atomic_t status;	/* initialvalue 0 */
+	atomic_t doze_request;
+	atomic_t wakeup_request;
+};
+
+/* WPA */
+struct scan_ext_t {
+	unsigned int flag;
+	char ssid[IW_ESSID_MAX_SIZE + 1];
+};
+
+enum {
+	CIPHER_NONE,
+	CIPHER_WEP40,
+	CIPHER_TKIP,
+	CIPHER_CCMP,
+	CIPHER_WEP104
+};
+
+#define CIPHER_ID_WPA_NONE    "\x00\x50\xf2\x00"
+#define CIPHER_ID_WPA_WEP40   "\x00\x50\xf2\x01"
+#define CIPHER_ID_WPA_TKIP    "\x00\x50\xf2\x02"
+#define CIPHER_ID_WPA_CCMP    "\x00\x50\xf2\x04"
+#define CIPHER_ID_WPA_WEP104  "\x00\x50\xf2\x05"
+
+#define CIPHER_ID_WPA2_NONE   "\x00\x0f\xac\x00"
+#define CIPHER_ID_WPA2_WEP40  "\x00\x0f\xac\x01"
+#define CIPHER_ID_WPA2_TKIP   "\x00\x0f\xac\x02"
+#define CIPHER_ID_WPA2_CCMP   "\x00\x0f\xac\x04"
+#define CIPHER_ID_WPA2_WEP104 "\x00\x0f\xac\x05"
+
+#define CIPHER_ID_LEN    4
+
+enum {
+	KEY_MGMT_802_1X,
+	KEY_MGMT_PSK,
+	KEY_MGMT_WPANONE,
+};
+
+#define KEY_MGMT_ID_WPA_NONE     "\x00\x50\xf2\x00"
+#define KEY_MGMT_ID_WPA_1X       "\x00\x50\xf2\x01"
+#define KEY_MGMT_ID_WPA_PSK      "\x00\x50\xf2\x02"
+#define KEY_MGMT_ID_WPA_WPANONE  "\x00\x50\xf2\xff"
+
+#define KEY_MGMT_ID_WPA2_NONE    "\x00\x0f\xac\x00"
+#define KEY_MGMT_ID_WPA2_1X      "\x00\x0f\xac\x01"
+#define KEY_MGMT_ID_WPA2_PSK     "\x00\x0f\xac\x02"
+#define KEY_MGMT_ID_WPA2_WPANONE "\x00\x0f\xac\xff"
+
+#define KEY_MGMT_ID_LEN  4
+
+#define MIC_KEY_SIZE 8
+
+struct wpa_key_t {
+	uint32_t ext_flags;	/* IW_ENCODE_EXT_xxx */
+	uint8_t tx_seq[IW_ENCODE_SEQ_MAX_SIZE];	/* LSB first */
+	uint8_t rx_seq[IW_ENCODE_SEQ_MAX_SIZE];	/* LSB first */
+	struct sockaddr addr;	/* ff:ff:ff:ff:ff:ff for broadcast/multicast
+				 * (group) keys or unicast address for
+				 * individual keys */
+	uint16_t alg;
+	uint16_t key_len;	/* WEP: 5 or 13, TKIP: 32, CCMP: 16 */
+	uint8_t key_val[IW_ENCODING_TOKEN_MAX];
+	uint8_t tx_mic_key[MIC_KEY_SIZE];
+	uint8_t rx_mic_key[MIC_KEY_SIZE];
+};
+#define WPA_KEY_INDEX_MAX 4
+#define WPA_RX_SEQ_LEN 6
+
+struct mic_failure_t {
+	uint16_t failure;	/* MIC Failure counter 0 or 1 or 2 */
+	uint16_t counter;	/* 1sec counter 0-60 */
+	uint32_t last_failure_time;
+	int stop;	/* stop flag */
+};
+
+struct wpa_status_t {
+	int wpa_enabled;
+	unsigned int rsn_enabled;
+	int version;
+	int pairwise_suite;	/* unicast cipher */
+	int group_suite;	/* multicast cipher */
+	int key_mgmt_suite;	/* authentication key management suite */
+	int auth_alg;
+	int txkey;
+	struct wpa_key_t key[WPA_KEY_INDEX_MAX];
+	struct scan_ext_t scan_ext;
+	struct mic_failure_t mic_failure;
+};
+
+#include <linux/list.h>
+#define PMK_LIST_MAX 8
+struct pmk_list_t {
+	uint16_t size;
+	struct list_head head;
+	struct pmk_t {
+		struct list_head list;
+		uint8_t bssid[ETH_ALEN];
+		uint8_t pmkid[IW_PMKID_LEN];
+	} pmk[PMK_LIST_MAX];
+};
+
+#ifdef WPS
+struct wps_status_t {
+	int wps_enabled;
+	int ielen;
+	uint8_t ie[255];
+};
+#endif /* WPS */
+
+struct ks_wlan_private {
+
+	struct hw_info_t ks_wlan_hw;	/* hardware information */
+
+	struct net_device *net_dev;
+	int reg_net;	/* register_netdev */
+	struct net_device_stats nstats;
+	struct iw_statistics wstats;
+
+	struct completion confirm_wait;
+
+	/* trx device & sme */
+	struct tx_device tx_dev;
+	struct rx_device rx_dev;
+	struct sme_info sme_i;
+	u8 *rxp;
+	unsigned int rx_size;
+	struct tasklet_struct sme_task;
+	struct work_struct ks_wlan_wakeup_task;
+	int scan_ind_count;
+
+	unsigned char eth_addr[ETH_ALEN];
+
+	struct local_aplist_t aplist;
+	struct local_ap_t current_ap;
+	struct power_save_status_t psstatus;
+	struct sleep_status_t sleepstatus;
+	struct wpa_status_t wpa;
+	struct pmk_list_t pmklist;
+	/* wireless parameter */
+	struct ks_wlan_parameter reg;
+	uint8_t current_rate;
+
+	char nick[IW_ESSID_MAX_SIZE + 1];
+
+	spinlock_t multicast_spin;
+
+	spinlock_t dev_read_lock;
+	wait_queue_head_t devread_wait;
+
+	unsigned int need_commit;	/* for ioctl */
+
+	/* DeviceIoControl */
+	int device_open_status;
+	atomic_t event_count;
+	atomic_t rec_count;
+	int dev_count;
+#define DEVICE_STOCK_COUNT 20
+	unsigned char *dev_data[DEVICE_STOCK_COUNT];
+	int dev_size[DEVICE_STOCK_COUNT];
+
+	/* ioctl : IOCTL_FIRMWARE_VERSION */
+	unsigned char firmware_version[128 + 1];
+	int version_size;
+
+	int mac_address_valid;	/* Mac Address Status */
+
+	int dev_state;
+
+	struct sk_buff *skb;
+	unsigned int cur_rx;	/* Index into the Rx buffer of next Rx pkt. */
+	/* spinlock_t lock; */
+#define FORCE_DISCONNECT    0x80000000
+#define CONNECT_STATUS_MASK 0x7FFFFFFF
+	uint32_t connect_status;	/* connect status */
+	int infra_status;	/* Infractructure status */
+
+	uint8_t data_buff[0x1000];
+
+	uint8_t scan_ssid_len;
+	uint8_t scan_ssid[IW_ESSID_MAX_SIZE + 1];
+	struct local_gain_t gain;
+#ifdef WPS
+	struct net_device *l2_dev;
+	int l2_fd;
+	struct wps_status_t wps;
+#endif /* WPS */
+	uint8_t sleep_mode;
+
+	uint8_t region;
+	struct local_eeprom_sum_t eeprom_sum;
+	uint8_t eeprom_checksum;
+
+	struct hostt_t hostt;
+
+	unsigned long last_doze;
+	unsigned long last_wakeup;
+
+	uint wakeup_count;	/* for detect wakeup loop */
+};
+
+extern int ks_wlan_net_start(struct net_device *dev);
+extern int ks_wlan_net_stop(struct net_device *dev);
+
+#endif /* _KS_WLAN_H */
diff --git a/drivers/staging/ks7010/ks_wlan_ioctl.h b/drivers/staging/ks7010/ks_wlan_ioctl.h
new file mode 100644
index 000000000000..49369e497808
--- /dev/null
+++ b/drivers/staging/ks7010/ks_wlan_ioctl.h
@@ -0,0 +1,67 @@
+/*
+ *   Driver for KeyStream 11b/g wireless LAN
+ *   
+ *   Copyright (c) 2005-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+
+#ifndef _KS_WLAN_IOCTL_H
+#define _KS_WLAN_IOCTL_H
+
+#include <linux/wireless.h>
+/* The low order bit identify a SET (0) or a GET (1) ioctl.  */
+
+/*					SIOCIWFIRSTPRIV+0 */
+/* former KS_WLAN_GET_DRIVER_VERSION	SIOCIWFIRSTPRIV+1 */
+/*					SIOCIWFIRSTPRIV+2 */
+#define KS_WLAN_GET_FIRM_VERSION	SIOCIWFIRSTPRIV+3
+#ifdef WPS
+#define KS_WLAN_SET_WPS_ENABLE 		SIOCIWFIRSTPRIV+4
+#define KS_WLAN_GET_WPS_ENABLE 		SIOCIWFIRSTPRIV+5
+#define KS_WLAN_SET_WPS_PROBE_REQ	SIOCIWFIRSTPRIV+6
+#endif
+#define KS_WLAN_GET_EEPROM_CKSUM	SIOCIWFIRSTPRIV+7
+#define KS_WLAN_SET_PREAMBLE		SIOCIWFIRSTPRIV+8
+#define KS_WLAN_GET_PREAMBLE		SIOCIWFIRSTPRIV+9
+#define KS_WLAN_SET_POWER_SAVE		SIOCIWFIRSTPRIV+10
+#define KS_WLAN_GET_POWER_SAVE		SIOCIWFIRSTPRIV+11
+#define KS_WLAN_SET_SCAN_TYPE		SIOCIWFIRSTPRIV+12
+#define KS_WLAN_GET_SCAN_TYPE		SIOCIWFIRSTPRIV+13
+#define KS_WLAN_SET_RX_GAIN		SIOCIWFIRSTPRIV+14
+#define KS_WLAN_GET_RX_GAIN		SIOCIWFIRSTPRIV+15
+#define KS_WLAN_HOSTT			SIOCIWFIRSTPRIV+16	/* unused */
+//#define KS_WLAN_SET_REGION            SIOCIWFIRSTPRIV+17
+#define KS_WLAN_SET_BEACON_LOST		SIOCIWFIRSTPRIV+18
+#define KS_WLAN_GET_BEACON_LOST		SIOCIWFIRSTPRIV+19
+
+#define KS_WLAN_SET_TX_GAIN		SIOCIWFIRSTPRIV+20
+#define KS_WLAN_GET_TX_GAIN		SIOCIWFIRSTPRIV+21
+
+/* for KS7010 */
+#define KS_WLAN_SET_PHY_TYPE		SIOCIWFIRSTPRIV+22
+#define KS_WLAN_GET_PHY_TYPE		SIOCIWFIRSTPRIV+23
+#define KS_WLAN_SET_CTS_MODE		SIOCIWFIRSTPRIV+24
+#define KS_WLAN_GET_CTS_MODE		SIOCIWFIRSTPRIV+25
+/*					SIOCIWFIRSTPRIV+26 */
+/*					SIOCIWFIRSTPRIV+27 */
+#define KS_WLAN_SET_SLEEP_MODE		SIOCIWFIRSTPRIV+28	/* sleep mode */
+#define KS_WLAN_GET_SLEEP_MODE		SIOCIWFIRSTPRIV+29	/* sleep mode */
+/*					SIOCIWFIRSTPRIV+30 */
+/*					SIOCIWFIRSTPRIV+31 */
+
+#ifdef __KERNEL__
+
+#include "ks_wlan.h"
+#include <linux/netdevice.h>
+
+extern int ks_wlan_read_config_file(struct ks_wlan_private *priv);
+extern int ks_wlan_setup_parameter(struct ks_wlan_private *priv,
+				   unsigned int commit_flag);
+
+#endif /* __KERNEL__ */
+
+#endif /* _KS_WLAN_IOCTL_H */
diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c
new file mode 100644
index 000000000000..1e21eb1c4667
--- /dev/null
+++ b/drivers/staging/ks7010/ks_wlan_net.c
@@ -0,0 +1,3528 @@
+/*
+ *   Driver for KeyStream 11b/g wireless LAN
+ *
+ *   Copyright (C) 2005-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/mii.h>
+#include <linux/pci.h>
+#include <linux/ctype.h>
+#include <linux/timer.h>
+#include <asm/atomic.h>
+#include <linux/io.h>
+#include <asm/uaccess.h>
+
+static int wep_on_off;
+#define	WEP_OFF		0
+#define	WEP_ON_64BIT	1
+#define	WEP_ON_128BIT	2
+
+#include "ks_wlan.h"
+#include "ks_hostif.h"
+#include "ks_wlan_ioctl.h"
+
+/* Include Wireless Extension definition and check version */
+#include <linux/wireless.h>
+#define WIRELESS_SPY	/* enable iwspy support */
+#include <net/iw_handler.h>	/* New driver API */
+
+/* Frequency list (map channels to frequencies) */
+static const long frequency_list[] = { 2412, 2417, 2422, 2427, 2432, 2437, 2442,
+	2447, 2452, 2457, 2462, 2467, 2472, 2484
+};
+
+/* A few details needed for WEP (Wireless Equivalent Privacy) */
+#define MAX_KEY_SIZE 13	/* 128 (?) bits */
+#define MIN_KEY_SIZE  5	/* 40 bits RC4 - WEP */
+typedef struct wep_key_t {
+	u16 len;
+	u8 key[16];	/* 40-bit and 104-bit keys */
+} wep_key_t;
+
+/* Backward compatibility */
+#ifndef IW_ENCODE_NOKEY
+#define IW_ENCODE_NOKEY 0x0800	/* Key is write only, so not present */
+#define IW_ENCODE_MODE  (IW_ENCODE_DISABLED | IW_ENCODE_RESTRICTED | IW_ENCODE_OPEN)
+#endif /* IW_ENCODE_NOKEY */
+
+/* List of Wireless Handlers (new API) */
+static const struct iw_handler_def ks_wlan_handler_def;
+
+#define KSC_OPNOTSUPP	/* Operation Not Support */
+
+/*
+ *	function prototypes
+ */
+extern int ks_wlan_hw_tx(struct ks_wlan_private *priv, void *p,
+			 unsigned long size,
+			 void (*complete_handler) (void *arg1, void *arg2),
+			 void *arg1, void *arg2);
+static int ks_wlan_open(struct net_device *dev);
+static void ks_wlan_tx_timeout(struct net_device *dev);
+static int ks_wlan_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int ks_wlan_close(struct net_device *dev);
+static void ks_wlan_set_multicast_list(struct net_device *dev);
+static struct net_device_stats *ks_wlan_get_stats(struct net_device *dev);
+static int ks_wlan_set_mac_address(struct net_device *dev, void *addr);
+static int ks_wlan_netdev_ioctl(struct net_device *dev, struct ifreq *rq,
+				int cmd);
+
+static atomic_t update_phyinfo;
+static struct timer_list update_phyinfo_timer;
+static
+int ks_wlan_update_phy_information(struct ks_wlan_private *priv)
+{
+	struct iw_statistics *wstats = &priv->wstats;
+
+	DPRINTK(4, "in_interrupt = %ld\n", in_interrupt());
+
+	if (priv->dev_state < DEVICE_STATE_READY) {
+		return -1;	/* not finished initialize */
+	}
+	if (atomic_read(&update_phyinfo))
+		return 1;
+
+	/* The status */
+	wstats->status = priv->reg.operation_mode;	/* Operation mode */
+
+	/* Signal quality and co. But where is the noise level ??? */
+	hostif_sme_enqueue(priv, SME_PHY_INFO_REQUEST);
+
+	/* interruptible_sleep_on_timeout(&priv->confirm_wait, HZ/2); */
+	if (!wait_for_completion_interruptible_timeout
+	    (&priv->confirm_wait, HZ / 2)) {
+		DPRINTK(1, "wait time out!!\n");
+	}
+
+	atomic_inc(&update_phyinfo);
+	update_phyinfo_timer.expires = jiffies + HZ;	/* 1sec */
+	add_timer(&update_phyinfo_timer);
+
+	return 0;
+}
+
+static
+void ks_wlan_update_phyinfo_timeout(unsigned long ptr)
+{
+	DPRINTK(4, "in_interrupt = %ld\n", in_interrupt());
+	atomic_set(&update_phyinfo, 0);
+}
+
+int ks_wlan_setup_parameter(struct ks_wlan_private *priv,
+			    unsigned int commit_flag)
+{
+	DPRINTK(2, "\n");
+
+	hostif_sme_enqueue(priv, SME_STOP_REQUEST);
+
+	if (commit_flag & SME_RTS)
+		hostif_sme_enqueue(priv, SME_RTS_THRESHOLD_REQUEST);
+	if (commit_flag & SME_FRAG)
+		hostif_sme_enqueue(priv, SME_FRAGMENTATION_THRESHOLD_REQUEST);
+
+	if (commit_flag & SME_WEP_INDEX)
+		hostif_sme_enqueue(priv, SME_WEP_INDEX_REQUEST);
+	if (commit_flag & SME_WEP_VAL1)
+		hostif_sme_enqueue(priv, SME_WEP_KEY1_REQUEST);
+	if (commit_flag & SME_WEP_VAL2)
+		hostif_sme_enqueue(priv, SME_WEP_KEY2_REQUEST);
+	if (commit_flag & SME_WEP_VAL3)
+		hostif_sme_enqueue(priv, SME_WEP_KEY3_REQUEST);
+	if (commit_flag & SME_WEP_VAL4)
+		hostif_sme_enqueue(priv, SME_WEP_KEY4_REQUEST);
+	if (commit_flag & SME_WEP_FLAG)
+		hostif_sme_enqueue(priv, SME_WEP_FLAG_REQUEST);
+
+	if (commit_flag & SME_RSN) {
+		hostif_sme_enqueue(priv, SME_RSN_ENABLED_REQUEST);
+		hostif_sme_enqueue(priv, SME_RSN_MODE_REQUEST);
+	}
+	if (commit_flag & SME_RSN_MULTICAST)
+		hostif_sme_enqueue(priv, SME_RSN_MCAST_REQUEST);
+	if (commit_flag & SME_RSN_UNICAST)
+		hostif_sme_enqueue(priv, SME_RSN_UCAST_REQUEST);
+	if (commit_flag & SME_RSN_AUTH)
+		hostif_sme_enqueue(priv, SME_RSN_AUTH_REQUEST);
+
+	hostif_sme_enqueue(priv, SME_MODE_SET_REQUEST);
+
+	hostif_sme_enqueue(priv, SME_START_REQUEST);
+
+	return 0;
+}
+
+/*
+ * Initial Wireless Extension code for Ks_Wlannet driver by :
+ *	Jean Tourrilhes <jt@hpl.hp.com> - HPL - 17 November 00
+ * Conversion to new driver API by :
+ *	Jean Tourrilhes <jt@hpl.hp.com> - HPL - 26 March 02
+ * Javier also did a good amount of work here, adding some new extensions
+ * and fixing my code. Let's just say that without him this code just
+ * would not work at all... - Jean II
+ */
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get protocol name */
+static int ks_wlan_get_name(struct net_device *dev,
+			    struct iw_request_info *info, char *cwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (priv->dev_state < DEVICE_STATE_READY) {
+		strcpy(cwrq, "NOT READY!");
+	} else if (priv->reg.phy_type == D_11B_ONLY_MODE) {
+		strcpy(cwrq, "IEEE 802.11b");
+	} else if (priv->reg.phy_type == D_11G_ONLY_MODE) {
+		strcpy(cwrq, "IEEE 802.11g");
+	} else {
+		strcpy(cwrq, "IEEE 802.11b/g");
+	}
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set frequency */
+static int ks_wlan_set_freq(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_freq *fwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	int rc = -EINPROGRESS;	/* Call commit handler */
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* If setting by frequency, convert to a channel */
+	if ((fwrq->e == 1) &&
+	    (fwrq->m >= (int)2.412e8) && (fwrq->m <= (int)2.487e8)) {
+		int f = fwrq->m / 100000;
+		int c = 0;
+		while ((c < 14) && (f != frequency_list[c]))
+			c++;
+		/* Hack to fall through... */
+		fwrq->e = 0;
+		fwrq->m = c + 1;
+	}
+	/* Setting by channel number */
+	if ((fwrq->m > 1000) || (fwrq->e > 0))
+		rc = -EOPNOTSUPP;
+	else {
+		int channel = fwrq->m;
+		/* We should do a better check than that,
+		 * based on the card capability !!! */
+		if ((channel < 1) || (channel > 14)) {
+			printk(KERN_DEBUG
+			       "%s: New channel value of %d is invalid!\n",
+			       dev->name, fwrq->m);
+			rc = -EINVAL;
+		} else {
+			/* Yes ! We can set it !!! */
+			priv->reg.channel = (u8) (channel);
+			priv->need_commit |= SME_MODE_SET;
+		}
+	}
+
+	return rc;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get frequency */
+static int ks_wlan_get_freq(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_freq *fwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	int f;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+		f = (int)priv->current_ap.channel;
+	} else
+		f = (int)priv->reg.channel;
+	fwrq->m = frequency_list[f - 1] * 100000;
+	fwrq->e = 1;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set ESSID */
+static int ks_wlan_set_essid(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	size_t len;
+
+	DPRINTK(2, " %d\n", dwrq->flags);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* Check if we asked for `any' */
+	if (dwrq->flags == 0) {
+		/* Just send an empty SSID list */
+		memset(priv->reg.ssid.body, 0, sizeof(priv->reg.ssid.body));
+		priv->reg.ssid.size = 0;
+	} else {
+#if 1
+		len = dwrq->length;
+		/* iwconfig uses nul termination in SSID.. */
+		if (len > 0 && extra[len - 1] == '\0')
+			len--;
+
+		/* Check the size of the string */
+		if (len > IW_ESSID_MAX_SIZE) {
+			return -EINVAL;
+		}
+#else
+		/* Check the size of the string */
+		if (dwrq->length > IW_ESSID_MAX_SIZE + 1) {
+			return -E2BIG;
+		}
+#endif
+
+		/* Set the SSID */
+		memset(priv->reg.ssid.body, 0, sizeof(priv->reg.ssid.body));
+
+#if 1
+		memcpy(priv->reg.ssid.body, extra, len);
+		priv->reg.ssid.size = len;
+#else
+		memcpy(priv->reg.ssid.body, extra, dwrq->length);
+		priv->reg.ssid.size = dwrq->length;
+#endif
+	}
+	/* Write it to the card */
+	priv->need_commit |= SME_MODE_SET;
+
+//      return  -EINPROGRESS;   /* Call commit handler */
+	ks_wlan_setup_parameter(priv, priv->need_commit);
+	priv->need_commit = 0;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get ESSID */
+static int ks_wlan_get_essid(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* Note : if dwrq->flags != 0, we should
+	 * get the relevant SSID from the SSID list... */
+	if (priv->reg.ssid.size) {
+		/* Get the current SSID */
+		memcpy(extra, priv->reg.ssid.body, priv->reg.ssid.size);
+#if 0
+		extra[priv->reg.ssid.size] = '\0';
+#endif
+		/* If none, we may want to get the one that was set */
+
+		/* Push it out ! */
+#if 1
+		dwrq->length = priv->reg.ssid.size;
+#else
+		dwrq->length = priv->reg.ssid.size + 1;
+#endif
+		dwrq->flags = 1;	/* active */
+	} else {
+#if 1
+		dwrq->length = 0;
+#else
+		extra[0] = '\0';
+		dwrq->length = 1;
+#endif
+		dwrq->flags = 0;	/* ANY */
+	}
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set AP address */
+static int ks_wlan_set_wap(struct net_device *dev, struct iw_request_info *info,
+			   struct sockaddr *ap_addr, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (priv->reg.operation_mode == MODE_ADHOC ||
+	    priv->reg.operation_mode == MODE_INFRASTRUCTURE) {
+		memcpy(priv->reg.bssid, (u8 *) & ap_addr->sa_data, ETH_ALEN);
+
+		if (is_valid_ether_addr((u8 *) priv->reg.bssid)) {
+			priv->need_commit |= SME_MODE_SET;
+		}
+	} else {
+		memset(priv->reg.bssid, 0x0, ETH_ALEN);
+		return -EOPNOTSUPP;
+	}
+
+	DPRINTK(2, "bssid = %02x:%02x:%02x:%02x:%02x:%02x\n",
+		priv->reg.bssid[0], priv->reg.bssid[1], priv->reg.bssid[2],
+		priv->reg.bssid[3], priv->reg.bssid[4], priv->reg.bssid[5]);
+
+	/* Write it to the card */
+	if (priv->need_commit) {
+		priv->need_commit |= SME_MODE_SET;
+		return -EINPROGRESS;	/* Call commit handler */
+	}
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get AP address */
+static int ks_wlan_get_wap(struct net_device *dev, struct iw_request_info *info,
+			   struct sockaddr *awrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+		memcpy(awrq->sa_data, &(priv->current_ap.bssid[0]), ETH_ALEN);
+	} else {
+		memset(awrq->sa_data, 0, ETH_ALEN);
+	}
+
+	awrq->sa_family = ARPHRD_ETHER;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Nickname */
+static int ks_wlan_set_nick(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_point *dwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* Check the size of the string */
+	if (dwrq->length > 16 + 1) {
+		return -E2BIG;
+	}
+	memset(priv->nick, 0, sizeof(priv->nick));
+	memcpy(priv->nick, extra, dwrq->length);
+
+	return -EINPROGRESS;	/* Call commit handler */
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Nickname */
+static int ks_wlan_get_nick(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_point *dwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	strncpy(extra, priv->nick, 16);
+	extra[16] = '\0';
+	dwrq->length = strlen(extra) + 1;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Bit-Rate */
+static int ks_wlan_set_rate(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_param *vwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	int i = 0;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (priv->reg.phy_type == D_11B_ONLY_MODE) {
+		if (vwrq->fixed == 1) {
+			switch (vwrq->value) {
+			case 11000000:
+			case 5500000:
+				priv->reg.rate_set.body[0] =
+				    (uint8_t) (vwrq->value / 500000);
+				break;
+			case 2000000:
+			case 1000000:
+				priv->reg.rate_set.body[0] =
+				    ((uint8_t) (vwrq->value / 500000)) |
+				    BASIC_RATE;
+				break;
+			default:
+				return -EINVAL;
+			}
+			priv->reg.tx_rate = TX_RATE_FIXED;
+			priv->reg.rate_set.size = 1;
+		} else {	/* vwrq->fixed == 0 */
+			if (vwrq->value > 0) {
+				switch (vwrq->value) {
+				case 11000000:
+					priv->reg.rate_set.body[3] =
+					    TX_RATE_11M;
+					i++;
+				case 5500000:
+					priv->reg.rate_set.body[2] = TX_RATE_5M;
+					i++;
+				case 2000000:
+					priv->reg.rate_set.body[1] =
+					    TX_RATE_2M | BASIC_RATE;
+					i++;
+				case 1000000:
+					priv->reg.rate_set.body[0] =
+					    TX_RATE_1M | BASIC_RATE;
+					i++;
+					break;
+				default:
+					return -EINVAL;
+				}
+				priv->reg.tx_rate = TX_RATE_MANUAL_AUTO;
+				priv->reg.rate_set.size = i;
+			} else {
+				priv->reg.rate_set.body[3] = TX_RATE_11M;
+				priv->reg.rate_set.body[2] = TX_RATE_5M;
+				priv->reg.rate_set.body[1] =
+				    TX_RATE_2M | BASIC_RATE;
+				priv->reg.rate_set.body[0] =
+				    TX_RATE_1M | BASIC_RATE;
+				priv->reg.tx_rate = TX_RATE_FULL_AUTO;
+				priv->reg.rate_set.size = 4;
+			}
+		}
+	} else {	/* D_11B_ONLY_MODE or  D_11BG_COMPATIBLE_MODE */
+		if (vwrq->fixed == 1) {
+			switch (vwrq->value) {
+			case 54000000:
+			case 48000000:
+			case 36000000:
+			case 18000000:
+			case 9000000:
+				priv->reg.rate_set.body[0] =
+				    (uint8_t) (vwrq->value / 500000);
+				break;
+			case 24000000:
+			case 12000000:
+			case 11000000:
+			case 6000000:
+			case 5500000:
+			case 2000000:
+			case 1000000:
+				priv->reg.rate_set.body[0] =
+				    ((uint8_t) (vwrq->value / 500000)) |
+				    BASIC_RATE;
+				break;
+			default:
+				return -EINVAL;
+			}
+			priv->reg.tx_rate = TX_RATE_FIXED;
+			priv->reg.rate_set.size = 1;
+		} else {	/* vwrq->fixed == 0 */
+			if (vwrq->value > 0) {
+				switch (vwrq->value) {
+				case 54000000:
+					priv->reg.rate_set.body[11] =
+					    TX_RATE_54M;
+					i++;
+				case 48000000:
+					priv->reg.rate_set.body[10] =
+					    TX_RATE_48M;
+					i++;
+				case 36000000:
+					priv->reg.rate_set.body[9] =
+					    TX_RATE_36M;
+					i++;
+				case 24000000:
+				case 18000000:
+				case 12000000:
+				case 11000000:
+				case 9000000:
+				case 6000000:
+					if (vwrq->value == 24000000) {
+						priv->reg.rate_set.body[8] =
+						    TX_RATE_18M;
+						i++;
+						priv->reg.rate_set.body[7] =
+						    TX_RATE_9M;
+						i++;
+						priv->reg.rate_set.body[6] =
+						    TX_RATE_24M | BASIC_RATE;
+						i++;
+						priv->reg.rate_set.body[5] =
+						    TX_RATE_12M | BASIC_RATE;
+						i++;
+						priv->reg.rate_set.body[4] =
+						    TX_RATE_6M | BASIC_RATE;
+						i++;
+						priv->reg.rate_set.body[3] =
+						    TX_RATE_11M | BASIC_RATE;
+						i++;
+					} else if (vwrq->value == 18000000) {
+						priv->reg.rate_set.body[7] =
+						    TX_RATE_18M;
+						i++;
+						priv->reg.rate_set.body[6] =
+						    TX_RATE_9M;
+						i++;
+						priv->reg.rate_set.body[5] =
+						    TX_RATE_12M | BASIC_RATE;
+						i++;
+						priv->reg.rate_set.body[4] =
+						    TX_RATE_6M | BASIC_RATE;
+						i++;
+						priv->reg.rate_set.body[3] =
+						    TX_RATE_11M | BASIC_RATE;
+						i++;
+					} else if (vwrq->value == 12000000) {
+						priv->reg.rate_set.body[6] =
+						    TX_RATE_9M;
+						i++;
+						priv->reg.rate_set.body[5] =
+						    TX_RATE_12M | BASIC_RATE;
+						i++;
+						priv->reg.rate_set.body[4] =
+						    TX_RATE_6M | BASIC_RATE;
+						i++;
+						priv->reg.rate_set.body[3] =
+						    TX_RATE_11M | BASIC_RATE;
+						i++;
+					} else if (vwrq->value == 11000000) {
+						priv->reg.rate_set.body[5] =
+						    TX_RATE_9M;
+						i++;
+						priv->reg.rate_set.body[4] =
+						    TX_RATE_6M | BASIC_RATE;
+						i++;
+						priv->reg.rate_set.body[3] =
+						    TX_RATE_11M | BASIC_RATE;
+						i++;
+					} else if (vwrq->value == 9000000) {
+						priv->reg.rate_set.body[4] =
+						    TX_RATE_9M;
+						i++;
+						priv->reg.rate_set.body[3] =
+						    TX_RATE_6M | BASIC_RATE;
+						i++;
+					} else {	/* vwrq->value == 6000000 */
+						priv->reg.rate_set.body[3] =
+						    TX_RATE_6M | BASIC_RATE;
+						i++;
+					}
+				case 5500000:
+					priv->reg.rate_set.body[2] =
+					    TX_RATE_5M | BASIC_RATE;
+					i++;
+				case 2000000:
+					priv->reg.rate_set.body[1] =
+					    TX_RATE_2M | BASIC_RATE;
+					i++;
+				case 1000000:
+					priv->reg.rate_set.body[0] =
+					    TX_RATE_1M | BASIC_RATE;
+					i++;
+					break;
+				default:
+					return -EINVAL;
+				}
+				priv->reg.tx_rate = TX_RATE_MANUAL_AUTO;
+				priv->reg.rate_set.size = i;
+			} else {
+				priv->reg.rate_set.body[11] = TX_RATE_54M;
+				priv->reg.rate_set.body[10] = TX_RATE_48M;
+				priv->reg.rate_set.body[9] = TX_RATE_36M;
+				priv->reg.rate_set.body[8] = TX_RATE_18M;
+				priv->reg.rate_set.body[7] = TX_RATE_9M;
+				priv->reg.rate_set.body[6] =
+				    TX_RATE_24M | BASIC_RATE;
+				priv->reg.rate_set.body[5] =
+				    TX_RATE_12M | BASIC_RATE;
+				priv->reg.rate_set.body[4] =
+				    TX_RATE_6M | BASIC_RATE;
+				priv->reg.rate_set.body[3] =
+				    TX_RATE_11M | BASIC_RATE;
+				priv->reg.rate_set.body[2] =
+				    TX_RATE_5M | BASIC_RATE;
+				priv->reg.rate_set.body[1] =
+				    TX_RATE_2M | BASIC_RATE;
+				priv->reg.rate_set.body[0] =
+				    TX_RATE_1M | BASIC_RATE;
+				priv->reg.tx_rate = TX_RATE_FULL_AUTO;
+				priv->reg.rate_set.size = 12;
+			}
+		}
+	}
+
+	priv->need_commit |= SME_MODE_SET;
+
+	return -EINPROGRESS;	/* Call commit handler */
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Bit-Rate */
+static int ks_wlan_get_rate(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_param *vwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	DPRINTK(2, "in_interrupt = %ld update_phyinfo = %d\n",
+		in_interrupt(), atomic_read(&update_phyinfo));
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (!atomic_read(&update_phyinfo)) {
+		ks_wlan_update_phy_information(priv);
+	}
+	vwrq->value = ((priv->current_rate) & RATE_MASK) * 500000;
+	if (priv->reg.tx_rate == TX_RATE_FIXED)
+		vwrq->fixed = 1;
+	else
+		vwrq->fixed = 0;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set RTS threshold */
+static int ks_wlan_set_rts(struct net_device *dev, struct iw_request_info *info,
+			   struct iw_param *vwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	int rthr = vwrq->value;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (vwrq->disabled)
+		rthr = 2347;
+	if ((rthr < 0) || (rthr > 2347)) {
+		return -EINVAL;
+	}
+	priv->reg.rts = rthr;
+	priv->need_commit |= SME_RTS;
+
+	return -EINPROGRESS;	/* Call commit handler */
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get RTS threshold */
+static int ks_wlan_get_rts(struct net_device *dev, struct iw_request_info *info,
+			   struct iw_param *vwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	vwrq->value = priv->reg.rts;
+	vwrq->disabled = (vwrq->value >= 2347);
+	vwrq->fixed = 1;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Fragmentation threshold */
+static int ks_wlan_set_frag(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_param *vwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	int fthr = vwrq->value;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (vwrq->disabled)
+		fthr = 2346;
+	if ((fthr < 256) || (fthr > 2346)) {
+		return -EINVAL;
+	}
+	fthr &= ~0x1;	/* Get an even value - is it really needed ??? */
+	priv->reg.fragment = fthr;
+	priv->need_commit |= SME_FRAG;
+
+	return -EINPROGRESS;	/* Call commit handler */
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Fragmentation threshold */
+static int ks_wlan_get_frag(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_param *vwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	vwrq->value = priv->reg.fragment;
+	vwrq->disabled = (vwrq->value >= 2346);
+	vwrq->fixed = 1;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Mode of Operation */
+static int ks_wlan_set_mode(struct net_device *dev,
+			    struct iw_request_info *info, __u32 * uwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	DPRINTK(2, "mode=%d\n", *uwrq);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	switch (*uwrq) {
+	case IW_MODE_ADHOC:
+		priv->reg.operation_mode = MODE_ADHOC;
+		priv->need_commit |= SME_MODE_SET;
+		break;
+	case IW_MODE_INFRA:
+		priv->reg.operation_mode = MODE_INFRASTRUCTURE;
+		priv->need_commit |= SME_MODE_SET;
+		break;
+	case IW_MODE_AUTO:
+	case IW_MODE_MASTER:
+	case IW_MODE_REPEAT:
+	case IW_MODE_SECOND:
+	case IW_MODE_MONITOR:
+	default:
+		return -EINVAL;
+	}
+
+	return -EINPROGRESS;	/* Call commit handler */
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Mode of Operation */
+static int ks_wlan_get_mode(struct net_device *dev,
+			    struct iw_request_info *info, __u32 * uwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* If not managed, assume it's ad-hoc */
+	switch (priv->reg.operation_mode) {
+	case MODE_INFRASTRUCTURE:
+		*uwrq = IW_MODE_INFRA;
+		break;
+	case MODE_ADHOC:
+		*uwrq = IW_MODE_ADHOC;
+		break;
+	default:
+		*uwrq = IW_MODE_ADHOC;
+	}
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Encryption Key */
+static int ks_wlan_set_encode(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	wep_key_t key;
+	int index = (dwrq->flags & IW_ENCODE_INDEX);
+	int current_index = priv->reg.wep_index;
+	int i;
+
+	DPRINTK(2, "flags=%04X\n", dwrq->flags);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* index check */
+	if ((index < 0) || (index > 4))
+		return -EINVAL;
+	else if (index == 0)
+		index = current_index;
+	else
+		index--;
+
+	/* Is WEP supported ? */
+	/* Basic checking: do we have a key to set ? */
+	if (dwrq->length > 0) {
+		if (dwrq->length > MAX_KEY_SIZE) {	/* Check the size of the key */
+			return -EINVAL;
+		}
+		if (dwrq->length > MIN_KEY_SIZE) {	/* Set the length */
+			key.len = MAX_KEY_SIZE;
+			priv->reg.privacy_invoked = 0x01;
+			priv->need_commit |= SME_WEP_FLAG;
+			wep_on_off = WEP_ON_128BIT;
+		} else {
+			if (dwrq->length > 0) {
+				key.len = MIN_KEY_SIZE;
+				priv->reg.privacy_invoked = 0x01;
+				priv->need_commit |= SME_WEP_FLAG;
+				wep_on_off = WEP_ON_64BIT;
+			} else {	/* Disable the key */
+				key.len = 0;
+			}
+		}
+		/* Check if the key is not marked as invalid */
+		if (!(dwrq->flags & IW_ENCODE_NOKEY)) {
+			/* Cleanup */
+			memset(key.key, 0, MAX_KEY_SIZE);
+			/* Copy the key in the driver */
+			if (copy_from_user
+			    (key.key, dwrq->pointer, dwrq->length)) {
+				key.len = 0;
+				return -EFAULT;
+			}
+			/* Send the key to the card */
+			priv->reg.wep_key[index].size = key.len;
+			for (i = 0; i < (priv->reg.wep_key[index].size); i++) {
+				priv->reg.wep_key[index].val[i] = key.key[i];
+			}
+			priv->need_commit |= (SME_WEP_VAL1 << index);
+			priv->reg.wep_index = index;
+			priv->need_commit |= SME_WEP_INDEX;
+		}
+	} else {
+		if (dwrq->flags & IW_ENCODE_DISABLED) {
+			priv->reg.wep_key[0].size = 0;
+			priv->reg.wep_key[1].size = 0;
+			priv->reg.wep_key[2].size = 0;
+			priv->reg.wep_key[3].size = 0;
+			priv->reg.privacy_invoked = 0x00;
+			if (priv->reg.authenticate_type == AUTH_TYPE_SHARED_KEY) {
+				priv->need_commit |= SME_MODE_SET;
+			}
+			priv->reg.authenticate_type = AUTH_TYPE_OPEN_SYSTEM;
+			wep_on_off = WEP_OFF;
+			priv->need_commit |= SME_WEP_FLAG;
+		} else {
+			/* Do we want to just set the transmit key index ? */
+			if ((index >= 0) && (index < 4)) {
+				/* set_wep_key(priv, index, 0, 0, 1);   xxx */
+				if (priv->reg.wep_key[index].size) {
+					priv->reg.wep_index = index;
+					priv->need_commit |= SME_WEP_INDEX;
+				} else
+					return -EINVAL;
+			}
+		}
+	}
+
+	/* Commit the changes if needed */
+	if (dwrq->flags & IW_ENCODE_MODE)
+		priv->need_commit |= SME_WEP_FLAG;
+
+	if (dwrq->flags & IW_ENCODE_OPEN) {
+		if (priv->reg.authenticate_type == AUTH_TYPE_SHARED_KEY) {
+			priv->need_commit |= SME_MODE_SET;
+		}
+		priv->reg.authenticate_type = AUTH_TYPE_OPEN_SYSTEM;
+	} else if (dwrq->flags & IW_ENCODE_RESTRICTED) {
+		if (priv->reg.authenticate_type == AUTH_TYPE_OPEN_SYSTEM) {
+			priv->need_commit |= SME_MODE_SET;
+		}
+		priv->reg.authenticate_type = AUTH_TYPE_SHARED_KEY;
+	}
+//      return -EINPROGRESS;            /* Call commit handler */
+	if (priv->need_commit) {
+		ks_wlan_setup_parameter(priv, priv->need_commit);
+		priv->need_commit = 0;
+	}
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Encryption Key */
+static int ks_wlan_get_encode(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	char zeros[16];
+	int index = (dwrq->flags & IW_ENCODE_INDEX) - 1;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	dwrq->flags = IW_ENCODE_DISABLED;
+
+	/* Check encryption mode */
+	switch (priv->reg.authenticate_type) {
+	case AUTH_TYPE_OPEN_SYSTEM:
+		dwrq->flags = IW_ENCODE_OPEN;
+		break;
+	case AUTH_TYPE_SHARED_KEY:
+		dwrq->flags = IW_ENCODE_RESTRICTED;
+		break;
+	}
+
+	memset(zeros, 0, sizeof(zeros));
+
+	/* Which key do we want ? -1 -> tx index */
+	if ((index < 0) || (index >= 4))
+		index = priv->reg.wep_index;
+	if (priv->reg.privacy_invoked) {
+		dwrq->flags &= ~IW_ENCODE_DISABLED;
+		/* dwrq->flags |= IW_ENCODE_NOKEY; */
+	}
+	dwrq->flags |= index + 1;
+	DPRINTK(2, "encoding flag = 0x%04X\n", dwrq->flags);
+	/* Copy the key to the user buffer */
+	if ((index >= 0) && (index < 4))
+		dwrq->length = priv->reg.wep_key[index].size;
+	if (dwrq->length > 16) {
+		dwrq->length = 0;
+	}
+#if 1	/* IW_ENCODE_NOKEY; */
+	if (dwrq->length) {
+		if ((index >= 0) && (index < 4))
+			memcpy(extra, priv->reg.wep_key[index].val,
+			       dwrq->length);
+	} else
+		memcpy(extra, zeros, dwrq->length);
+#endif
+	return 0;
+}
+
+#ifndef KSC_OPNOTSUPP
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Tx-Power */
+static int ks_wlan_set_txpow(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_param *vwrq, char *extra)
+{
+	return -EOPNOTSUPP;	/* Not Support */
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Tx-Power */
+static int ks_wlan_get_txpow(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_param *vwrq, char *extra)
+{
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* Not Support */
+	vwrq->value = 0;
+	vwrq->disabled = (vwrq->value == 0);
+	vwrq->fixed = 1;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Retry limits */
+static int ks_wlan_set_retry(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_param *vwrq, char *extra)
+{
+	return -EOPNOTSUPP;	/* Not Support */
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Retry limits */
+static int ks_wlan_get_retry(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_param *vwrq, char *extra)
+{
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* Not Support */
+	vwrq->value = 0;
+	vwrq->disabled = (vwrq->value == 0);
+	vwrq->fixed = 1;
+	return 0;
+}
+#endif /* KSC_OPNOTSUPP */
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get range info */
+static int ks_wlan_get_range(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	struct iw_range *range = (struct iw_range *)extra;
+	int i, k;
+
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	dwrq->length = sizeof(struct iw_range);
+	memset(range, 0, sizeof(*range));
+	range->min_nwid = 0x0000;
+	range->max_nwid = 0x0000;
+	range->num_channels = 14;
+	/* Should be based on cap_rid.country to give only
+	 * what the current card support */
+	k = 0;
+	for (i = 0; i < 13; i++) {	/* channel 1 -- 13 */
+		range->freq[k].i = i + 1;	/* List index */
+		range->freq[k].m = frequency_list[i] * 100000;
+		range->freq[k++].e = 1;	/* Values in table in MHz -> * 10^5 * 10 */
+	}
+	range->num_frequency = k;
+	if (priv->reg.phy_type == D_11B_ONLY_MODE || priv->reg.phy_type == D_11BG_COMPATIBLE_MODE) {	/* channel 14 */
+		range->freq[13].i = 14;	/* List index */
+		range->freq[13].m = frequency_list[13] * 100000;
+		range->freq[13].e = 1;	/* Values in table in MHz -> * 10^5 * 10 */
+		range->num_frequency = 14;
+	}
+
+	/* Hum... Should put the right values there */
+	range->max_qual.qual = 100;
+	range->max_qual.level = 256 - 128;	/* 0 dBm? */
+	range->max_qual.noise = 256 - 128;
+	range->sensitivity = 1;
+
+	if (priv->reg.phy_type == D_11B_ONLY_MODE) {
+		range->bitrate[0] = 1e6;
+		range->bitrate[1] = 2e6;
+		range->bitrate[2] = 5.5e6;
+		range->bitrate[3] = 11e6;
+		range->num_bitrates = 4;
+	} else {	/* D_11G_ONLY_MODE or D_11BG_COMPATIBLE_MODE */
+		range->bitrate[0] = 1e6;
+		range->bitrate[1] = 2e6;
+		range->bitrate[2] = 5.5e6;
+		range->bitrate[3] = 11e6;
+
+		range->bitrate[4] = 6e6;
+		range->bitrate[5] = 9e6;
+		range->bitrate[6] = 12e6;
+		if (IW_MAX_BITRATES < 9) {
+			range->bitrate[7] = 54e6;
+			range->num_bitrates = 8;
+		} else {
+			range->bitrate[7] = 18e6;
+			range->bitrate[8] = 24e6;
+			range->bitrate[9] = 36e6;
+			range->bitrate[10] = 48e6;
+			range->bitrate[11] = 54e6;
+
+			range->num_bitrates = 12;
+		}
+	}
+
+	/* Set an indication of the max TCP throughput
+	 * in bit/s that we can expect using this interface.
+	 * May be use for QoS stuff... Jean II */
+	if (i > 2)
+		range->throughput = 5000 * 1000;
+	else
+		range->throughput = 1500 * 1000;
+
+	range->min_rts = 0;
+	range->max_rts = 2347;
+	range->min_frag = 256;
+	range->max_frag = 2346;
+
+	range->encoding_size[0] = 5;	/* WEP: RC4 40 bits */
+	range->encoding_size[1] = 13;	/* WEP: RC4 ~128 bits */
+	range->num_encoding_sizes = 2;
+	range->max_encoding_tokens = 4;
+
+	/* power management not support */
+	range->pmp_flags = IW_POWER_ON;
+	range->pmt_flags = IW_POWER_ON;
+	range->pm_capa = 0;
+
+	/* Transmit Power - values are in dBm( or mW) */
+	range->txpower[0] = -256;
+	range->num_txpower = 1;
+	range->txpower_capa = IW_TXPOW_DBM;
+	/* range->txpower_capa = IW_TXPOW_MWATT; */
+
+	range->we_version_source = 21;
+	range->we_version_compiled = WIRELESS_EXT;
+
+	range->retry_capa = IW_RETRY_ON;
+	range->retry_flags = IW_RETRY_ON;
+	range->r_time_flags = IW_RETRY_ON;
+
+	/* Experimental measurements - boundary 11/5.5 Mb/s */
+	/* Note : with or without the (local->rssi), results
+	 * are somewhat different. - Jean II */
+	range->avg_qual.qual = 50;
+	range->avg_qual.level = 186;	/* -70 dBm */
+	range->avg_qual.noise = 0;
+
+	/* Event capability (kernel + driver) */
+	range->event_capa[0] = (IW_EVENT_CAPA_K_0 |
+				IW_EVENT_CAPA_MASK(SIOCGIWAP) |
+				IW_EVENT_CAPA_MASK(SIOCGIWSCAN));
+	range->event_capa[1] = IW_EVENT_CAPA_K_1;
+	range->event_capa[4] = (IW_EVENT_CAPA_MASK(IWEVCUSTOM) |
+				IW_EVENT_CAPA_MASK(IWEVMICHAELMICFAILURE));
+
+	/* encode extension (WPA) capability */
+	range->enc_capa = (IW_ENC_CAPA_WPA |
+			   IW_ENC_CAPA_WPA2 |
+			   IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP);
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Power Management */
+static int ks_wlan_set_power(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_param *vwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	short enabled;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	enabled = vwrq->disabled ? 0 : 1;
+	if (enabled == 0) {	/* 0 */
+		priv->reg.powermgt = POWMGT_ACTIVE_MODE;
+	} else if (enabled) {	/* 1 */
+		if (priv->reg.operation_mode == MODE_INFRASTRUCTURE)
+			priv->reg.powermgt = POWMGT_SAVE1_MODE;
+		else
+			return -EINVAL;
+	} else if (enabled) {	/* 2 */
+		if (priv->reg.operation_mode == MODE_INFRASTRUCTURE)
+			priv->reg.powermgt = POWMGT_SAVE2_MODE;
+		else
+			return -EINVAL;
+	} else
+		return -EINVAL;
+
+	hostif_sme_enqueue(priv, SME_POW_MNGMT_REQUEST);
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Power Management */
+static int ks_wlan_get_power(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_param *vwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (priv->reg.powermgt > 0)
+		vwrq->disabled = 0;
+	else
+		vwrq->disabled = 1;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get wirless statistics */
+static int ks_wlan_get_iwstats(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_quality *vwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	vwrq->qual = 0;	/* not supported */
+	vwrq->level = priv->wstats.qual.level;
+	vwrq->noise = 0;	/* not supported */
+	vwrq->updated = 0;
+
+	return 0;
+}
+
+#ifndef KSC_OPNOTSUPP
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set Sensitivity */
+static int ks_wlan_set_sens(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_param *vwrq,
+			    char *extra)
+{
+	return -EOPNOTSUPP;	/* Not Support */
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get Sensitivity */
+static int ks_wlan_get_sens(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_param *vwrq,
+			    char *extra)
+{
+	/* Not Support */
+	vwrq->value = 0;
+	vwrq->disabled = (vwrq->value == 0);
+	vwrq->fixed = 1;
+	return 0;
+}
+#endif /* KSC_OPNOTSUPP */
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get AP List */
+/* Note : this is deprecated in favor of IWSCAN */
+static int ks_wlan_get_aplist(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	struct sockaddr *address = (struct sockaddr *)extra;
+	struct iw_quality qual[LOCAL_APLIST_MAX];
+
+	int i;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	for (i = 0; i < priv->aplist.size; i++) {
+		memcpy(address[i].sa_data, &(priv->aplist.ap[i].bssid[0]),
+		       ETH_ALEN);
+		address[i].sa_family = ARPHRD_ETHER;
+		qual[i].level = 256 - priv->aplist.ap[i].rssi;
+		qual[i].qual = priv->aplist.ap[i].sq;
+		qual[i].noise = 0;	/* invalid noise value */
+		qual[i].updated = 7;
+	}
+	if (i) {
+		dwrq->flags = 1;	/* Should be define'd */
+		memcpy(extra + sizeof(struct sockaddr) * i,
+		       &qual, sizeof(struct iw_quality) * i);
+	}
+	dwrq->length = i;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : Initiate Scan */
+static int ks_wlan_set_scan(struct net_device *dev,
+			    struct iw_request_info *info,
+			    union iwreq_data *wrqu, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	struct iw_scan_req *req = NULL;
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/* specified SSID SCAN */
+	if (wrqu->data.length == sizeof(struct iw_scan_req)
+	    && wrqu->data.flags & IW_SCAN_THIS_ESSID) {
+		req = (struct iw_scan_req *)extra;
+		priv->scan_ssid_len = req->essid_len;
+		memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len);
+	} else {
+		priv->scan_ssid_len = 0;
+	}
+
+	priv->sme_i.sme_flag |= SME_AP_SCAN;
+	hostif_sme_enqueue(priv, SME_BSS_SCAN_REQUEST);
+
+	/* At this point, just return to the user. */
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Translate scan data returned from the card to a card independent
+ * format that the Wireless Tools will understand - Jean II
+ */
+static inline char *ks_wlan_translate_scan(struct net_device *dev,
+					   struct iw_request_info *info,
+					   char *current_ev, char *end_buf,
+					   struct local_ap_t *ap)
+{
+	/* struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv; */
+	struct iw_event iwe;	/* Temporary buffer */
+	u16 capabilities;
+	char *current_val;	/* For rates */
+	int i;
+	static const char rsn_leader[] = "rsn_ie=";
+	static const char wpa_leader[] = "wpa_ie=";
+	char buf0[RSN_IE_BODY_MAX * 2 + 30];
+	char buf1[RSN_IE_BODY_MAX * 2 + 30];
+	char *pbuf;
+	/* First entry *MUST* be the AP MAC address */
+	iwe.cmd = SIOCGIWAP;
+	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
+	memcpy(iwe.u.ap_addr.sa_data, ap->bssid, ETH_ALEN);
+	current_ev =
+	    iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+				 IW_EV_ADDR_LEN);
+
+	/* Other entries will be displayed in the order we give them */
+
+	/* Add the ESSID */
+	iwe.u.data.length = ap->ssid.size;
+	if (iwe.u.data.length > 32)
+		iwe.u.data.length = 32;
+	iwe.cmd = SIOCGIWESSID;
+	iwe.u.data.flags = 1;
+	current_ev =
+	    iwe_stream_add_point(info, current_ev, end_buf, &iwe,
+				 &(ap->ssid.body[0]));
+
+	/* Add mode */
+	iwe.cmd = SIOCGIWMODE;
+	capabilities = le16_to_cpu(ap->capability);
+	if (capabilities & (BSS_CAP_ESS | BSS_CAP_IBSS)) {
+		if (capabilities & BSS_CAP_ESS)
+			iwe.u.mode = IW_MODE_INFRA;
+		else
+			iwe.u.mode = IW_MODE_ADHOC;
+		current_ev =
+		    iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+					 IW_EV_UINT_LEN);
+	}
+
+	/* Add frequency */
+	iwe.cmd = SIOCGIWFREQ;
+	iwe.u.freq.m = ap->channel;
+	iwe.u.freq.m = frequency_list[iwe.u.freq.m - 1] * 100000;
+	iwe.u.freq.e = 1;
+	current_ev =
+	    iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+				 IW_EV_FREQ_LEN);
+
+	/* Add quality statistics */
+	iwe.cmd = IWEVQUAL;
+	iwe.u.qual.level = 256 - ap->rssi;
+	iwe.u.qual.qual = ap->sq;
+	iwe.u.qual.noise = 0;	/* invalid noise value */
+	current_ev =
+	    iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+				 IW_EV_QUAL_LEN);
+
+	/* Add encryption capability */
+	iwe.cmd = SIOCGIWENCODE;
+	if (capabilities & BSS_CAP_PRIVACY)
+		iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
+	else
+		iwe.u.data.flags = IW_ENCODE_DISABLED;
+	iwe.u.data.length = 0;
+	current_ev =
+	    iwe_stream_add_point(info, current_ev, end_buf, &iwe,
+				 &(ap->ssid.body[0]));
+
+	/* Rate : stuffing multiple values in a single event require a bit
+	 * more of magic - Jean II */
+	current_val = current_ev + IW_EV_LCP_LEN;
+
+	iwe.cmd = SIOCGIWRATE;
+	/* Those two flags are ignored... */
+	iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
+
+	/* Max 16 values */
+	for (i = 0; i < 16; i++) {
+		/* NULL terminated */
+		if (i >= ap->rate_set.size)
+			break;
+		/* Bit rate given in 500 kb/s units (+ 0x80) */
+		iwe.u.bitrate.value = ((ap->rate_set.body[i] & 0x7f) * 500000);
+		/* Add new value to event */
+		current_val =
+		    iwe_stream_add_value(info, current_ev, current_val, end_buf,
+					 &iwe, IW_EV_PARAM_LEN);
+	}
+	/* Check if we added any event */
+	if ((current_val - current_ev) > IW_EV_LCP_LEN)
+		current_ev = current_val;
+
+#define GENERIC_INFO_ELEM_ID 0xdd
+#define RSN_INFO_ELEM_ID 0x30
+	if (ap->rsn_ie.id == RSN_INFO_ELEM_ID && ap->rsn_ie.size != 0) {
+		pbuf = &buf0[0];
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVCUSTOM;
+		memcpy(buf0, rsn_leader, sizeof(rsn_leader) - 1);
+		iwe.u.data.length += sizeof(rsn_leader) - 1;
+		pbuf += sizeof(rsn_leader) - 1;
+
+		pbuf += sprintf(pbuf, "%02x", ap->rsn_ie.id);
+		pbuf += sprintf(pbuf, "%02x", ap->rsn_ie.size);
+		iwe.u.data.length += 4;
+
+		for (i = 0; i < ap->rsn_ie.size; i++)
+			pbuf += sprintf(pbuf, "%02x", ap->rsn_ie.body[i]);
+		iwe.u.data.length += (ap->rsn_ie.size) * 2;
+
+		DPRINTK(4, "ap->rsn.size=%d\n", ap->rsn_ie.size);
+
+		current_ev =
+		    iwe_stream_add_point(info, current_ev, end_buf, &iwe,
+					 &buf0[0]);
+	}
+	if (ap->wpa_ie.id == GENERIC_INFO_ELEM_ID && ap->wpa_ie.size != 0) {
+		pbuf = &buf1[0];
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVCUSTOM;
+		memcpy(buf1, wpa_leader, sizeof(wpa_leader) - 1);
+		iwe.u.data.length += sizeof(wpa_leader) - 1;
+		pbuf += sizeof(wpa_leader) - 1;
+
+		pbuf += sprintf(pbuf, "%02x", ap->wpa_ie.id);
+		pbuf += sprintf(pbuf, "%02x", ap->wpa_ie.size);
+		iwe.u.data.length += 4;
+
+		for (i = 0; i < ap->wpa_ie.size; i++)
+			pbuf += sprintf(pbuf, "%02x", ap->wpa_ie.body[i]);
+		iwe.u.data.length += (ap->wpa_ie.size) * 2;
+
+		DPRINTK(4, "ap->rsn.size=%d\n", ap->wpa_ie.size);
+		DPRINTK(4, "iwe.u.data.length=%d\n", iwe.u.data.length);
+
+		current_ev =
+		    iwe_stream_add_point(info, current_ev, end_buf, &iwe,
+					 &buf1[0]);
+	}
+
+	/* The other data in the scan result are not really
+	 * interesting, so for now drop it - Jean II */
+	return current_ev;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : Read Scan Results */
+static int ks_wlan_get_scan(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_point *dwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	int i;
+	char *current_ev = extra;
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (priv->sme_i.sme_flag & SME_AP_SCAN) {
+		DPRINTK(2, "flag AP_SCAN\n");
+		return -EAGAIN;
+	}
+
+	if (priv->aplist.size == 0) {
+		/* Client error, no scan results...
+		 * The caller need to restart the scan. */
+		DPRINTK(2, "aplist 0\n");
+		return -ENODATA;
+	}
+#if 0
+	/* current connect ap */
+	if ((priv->connect_status & CONNECT_STATUS_MASK) == CONNECT_STATUS) {
+		if ((extra + dwrq->length) - current_ev <= IW_EV_ADDR_LEN) {
+			dwrq->length = 0;
+			return -E2BIG;
+		}
+		current_ev = ks_wlan_translate_scan(dev, current_ev,
+//                                                  extra + IW_SCAN_MAX_DATA,
+						    extra + dwrq->length,
+						    &(priv->current_ap));
+	}
+#endif
+	/* Read and parse all entries */
+	for (i = 0; i < priv->aplist.size; i++) {
+		if ((extra + dwrq->length) - current_ev <= IW_EV_ADDR_LEN) {
+			dwrq->length = 0;
+			return -E2BIG;
+		}
+		/* Translate to WE format this entry */
+		current_ev = ks_wlan_translate_scan(dev, info, current_ev,
+//                                                  extra + IW_SCAN_MAX_DATA,
+						    extra + dwrq->length,
+						    &(priv->aplist.ap[i]));
+	}
+	/* Length of data */
+	dwrq->length = (current_ev - extra);
+	dwrq->flags = 0;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Commit handler : called after a bunch of SET operations */
+static int ks_wlan_config_commit(struct net_device *dev,
+				 struct iw_request_info *info, void *zwrq,
+				 char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (!priv->need_commit)
+		return 0;
+
+	ks_wlan_setup_parameter(priv, priv->need_commit);
+	priv->need_commit = 0;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless handler : set association ie params */
+static int ks_wlan_set_genie(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	return 0;
+//      return -EOPNOTSUPP;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless handler : set authentication mode params */
+static int ks_wlan_set_auth_mode(struct net_device *dev,
+				 struct iw_request_info *info,
+				 struct iw_param *vwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	int index = (vwrq->flags & IW_AUTH_INDEX);
+	int value = vwrq->value;
+
+	DPRINTK(2, "index=%d:value=%08X\n", index, value);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	switch (index) {
+	case IW_AUTH_WPA_VERSION:	/* 0 */
+		switch (value) {
+		case IW_AUTH_WPA_VERSION_DISABLED:
+			priv->wpa.version = value;
+			if (priv->wpa.rsn_enabled) {
+				priv->wpa.rsn_enabled = 0;
+			}
+			priv->need_commit |= SME_RSN;
+			break;
+		case IW_AUTH_WPA_VERSION_WPA:
+		case IW_AUTH_WPA_VERSION_WPA2:
+			priv->wpa.version = value;
+			if (!(priv->wpa.rsn_enabled)) {
+				priv->wpa.rsn_enabled = 1;
+			}
+			priv->need_commit |= SME_RSN;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	case IW_AUTH_CIPHER_PAIRWISE:	/* 1 */
+		switch (value) {
+		case IW_AUTH_CIPHER_NONE:
+			if (priv->reg.privacy_invoked) {
+				priv->reg.privacy_invoked = 0x00;
+				priv->need_commit |= SME_WEP_FLAG;
+			}
+			break;
+		case IW_AUTH_CIPHER_WEP40:
+		case IW_AUTH_CIPHER_TKIP:
+		case IW_AUTH_CIPHER_CCMP:
+		case IW_AUTH_CIPHER_WEP104:
+			if (!priv->reg.privacy_invoked) {
+				priv->reg.privacy_invoked = 0x01;
+				priv->need_commit |= SME_WEP_FLAG;
+			}
+			priv->wpa.pairwise_suite = value;
+			priv->need_commit |= SME_RSN_UNICAST;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	case IW_AUTH_CIPHER_GROUP:	/* 2 */
+		switch (value) {
+		case IW_AUTH_CIPHER_NONE:
+			if (priv->reg.privacy_invoked) {
+				priv->reg.privacy_invoked = 0x00;
+				priv->need_commit |= SME_WEP_FLAG;
+			}
+			break;
+		case IW_AUTH_CIPHER_WEP40:
+		case IW_AUTH_CIPHER_TKIP:
+		case IW_AUTH_CIPHER_CCMP:
+		case IW_AUTH_CIPHER_WEP104:
+			if (!priv->reg.privacy_invoked) {
+				priv->reg.privacy_invoked = 0x01;
+				priv->need_commit |= SME_WEP_FLAG;
+			}
+			priv->wpa.group_suite = value;
+			priv->need_commit |= SME_RSN_MULTICAST;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	case IW_AUTH_KEY_MGMT:	/* 3 */
+		switch (value) {
+		case IW_AUTH_KEY_MGMT_802_1X:
+		case IW_AUTH_KEY_MGMT_PSK:
+		case 0:	/* NONE or 802_1X_NO_WPA */
+		case 4:	/* WPA_NONE */
+			priv->wpa.key_mgmt_suite = value;
+			priv->need_commit |= SME_RSN_AUTH;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	case IW_AUTH_80211_AUTH_ALG:	/* 6 */
+		switch (value) {
+		case IW_AUTH_ALG_OPEN_SYSTEM:
+			priv->wpa.auth_alg = value;
+			priv->reg.authenticate_type = AUTH_TYPE_OPEN_SYSTEM;
+			break;
+		case IW_AUTH_ALG_SHARED_KEY:
+			priv->wpa.auth_alg = value;
+			priv->reg.authenticate_type = AUTH_TYPE_SHARED_KEY;
+			break;
+		case IW_AUTH_ALG_LEAP:
+		default:
+			return -EOPNOTSUPP;
+		}
+		priv->need_commit |= SME_MODE_SET;
+		break;
+	case IW_AUTH_WPA_ENABLED:	/* 7 */
+		priv->wpa.wpa_enabled = value;
+		break;
+	case IW_AUTH_PRIVACY_INVOKED:	/* 10 */
+		if ((value && !priv->reg.privacy_invoked) ||
+		    (!value && priv->reg.privacy_invoked)) {
+			priv->reg.privacy_invoked = value ? 0x01 : 0x00;
+			priv->need_commit |= SME_WEP_FLAG;
+		}
+		break;
+	case IW_AUTH_RX_UNENCRYPTED_EAPOL:	/* 4 */
+	case IW_AUTH_TKIP_COUNTERMEASURES:	/* 5 */
+	case IW_AUTH_DROP_UNENCRYPTED:	/* 8 */
+	case IW_AUTH_ROAMING_CONTROL:	/* 9 */
+	default:
+		break;
+	}
+
+	/* return -EINPROGRESS; */
+	if (priv->need_commit) {
+		ks_wlan_setup_parameter(priv, priv->need_commit);
+		priv->need_commit = 0;
+	}
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless handler : get authentication mode params */
+static int ks_wlan_get_auth_mode(struct net_device *dev,
+				 struct iw_request_info *info,
+				 struct iw_param *vwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	int index = (vwrq->flags & IW_AUTH_INDEX);
+	DPRINTK(2, "index=%d\n", index);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/*  WPA (not used ?? wpa_supplicant) */
+	switch (index) {
+	case IW_AUTH_WPA_VERSION:
+		vwrq->value = priv->wpa.version;
+		break;
+	case IW_AUTH_CIPHER_PAIRWISE:
+		vwrq->value = priv->wpa.pairwise_suite;
+		break;
+	case IW_AUTH_CIPHER_GROUP:
+		vwrq->value = priv->wpa.group_suite;
+		break;
+	case IW_AUTH_KEY_MGMT:
+		vwrq->value = priv->wpa.key_mgmt_suite;
+		break;
+	case IW_AUTH_80211_AUTH_ALG:
+		vwrq->value = priv->wpa.auth_alg;
+		break;
+	case IW_AUTH_WPA_ENABLED:
+		vwrq->value = priv->wpa.rsn_enabled;
+		break;
+	case IW_AUTH_RX_UNENCRYPTED_EAPOL:	/* OK??? */
+	case IW_AUTH_TKIP_COUNTERMEASURES:
+	case IW_AUTH_DROP_UNENCRYPTED:
+	default:
+		/* return -EOPNOTSUPP; */
+		break;
+	}
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set encoding token & mode (WPA)*/
+static int ks_wlan_set_encode_ext(struct net_device *dev,
+				  struct iw_request_info *info,
+				  struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	struct iw_encode_ext *enc;
+	int index = dwrq->flags & IW_ENCODE_INDEX;
+	unsigned int commit = 0;
+
+	enc = (struct iw_encode_ext *)extra;
+
+	DPRINTK(2, "flags=%04X:: ext_flags=%08X\n", dwrq->flags,
+		enc->ext_flags);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (index < 1 || index > 4)
+		return -EINVAL;
+	else
+		index--;
+
+	if (dwrq->flags & IW_ENCODE_DISABLED) {
+		priv->wpa.key[index].key_len = 0;
+	}
+
+	if (enc) {
+		priv->wpa.key[index].ext_flags = enc->ext_flags;
+		if (enc->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) {
+			priv->wpa.txkey = index;
+			commit |= SME_WEP_INDEX;
+		} else if (enc->ext_flags & IW_ENCODE_EXT_RX_SEQ_VALID) {
+			memcpy(&priv->wpa.key[index].rx_seq[0],
+			       enc->rx_seq, IW_ENCODE_SEQ_MAX_SIZE);
+		}
+
+		memcpy(&priv->wpa.key[index].addr.sa_data[0],
+		       &enc->addr.sa_data[0], ETH_ALEN);
+
+		switch (enc->alg) {
+		case IW_ENCODE_ALG_NONE:
+			if (priv->reg.privacy_invoked) {
+				priv->reg.privacy_invoked = 0x00;
+				commit |= SME_WEP_FLAG;
+			}
+			priv->wpa.key[index].key_len = 0;
+
+			break;
+		case IW_ENCODE_ALG_WEP:
+		case IW_ENCODE_ALG_CCMP:
+			if (!priv->reg.privacy_invoked) {
+				priv->reg.privacy_invoked = 0x01;
+				commit |= SME_WEP_FLAG;
+			}
+			if (enc->key_len) {
+				memcpy(&priv->wpa.key[index].key_val[0],
+				       &enc->key[0], enc->key_len);
+				priv->wpa.key[index].key_len = enc->key_len;
+				commit |= (SME_WEP_VAL1 << index);
+			}
+			break;
+		case IW_ENCODE_ALG_TKIP:
+			if (!priv->reg.privacy_invoked) {
+				priv->reg.privacy_invoked = 0x01;
+				commit |= SME_WEP_FLAG;
+			}
+			if (enc->key_len == 32) {
+				memcpy(&priv->wpa.key[index].key_val[0],
+				       &enc->key[0], enc->key_len - 16);
+				priv->wpa.key[index].key_len =
+				    enc->key_len - 16;
+				if (priv->wpa.key_mgmt_suite == 4) {	/* WPA_NONE */
+					memcpy(&priv->wpa.key[index].
+					       tx_mic_key[0], &enc->key[16], 8);
+					memcpy(&priv->wpa.key[index].
+					       rx_mic_key[0], &enc->key[16], 8);
+				} else {
+					memcpy(&priv->wpa.key[index].
+					       tx_mic_key[0], &enc->key[16], 8);
+					memcpy(&priv->wpa.key[index].
+					       rx_mic_key[0], &enc->key[24], 8);
+				}
+				commit |= (SME_WEP_VAL1 << index);
+			}
+			break;
+		default:
+			return -EINVAL;
+		}
+		priv->wpa.key[index].alg = enc->alg;
+	} else
+		return -EINVAL;
+
+	if (commit) {
+		if (commit & SME_WEP_INDEX)
+			hostif_sme_enqueue(priv, SME_SET_TXKEY);
+		if (commit & SME_WEP_VAL_MASK)
+			hostif_sme_enqueue(priv, SME_SET_KEY1 + index);
+		if (commit & SME_WEP_FLAG)
+			hostif_sme_enqueue(priv, SME_WEP_FLAG_REQUEST);
+	}
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : get encoding token & mode (WPA)*/
+static int ks_wlan_get_encode_ext(struct net_device *dev,
+				  struct iw_request_info *info,
+				  struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+
+	/* for SLEEP MODE */
+	/*  WPA (not used ?? wpa_supplicant)
+	   struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+	   struct iw_encode_ext *enc;
+	   enc = (struct iw_encode_ext *)extra;
+	   int index = dwrq->flags & IW_ENCODE_INDEX;
+	   WPA (not used ?? wpa_supplicant) */
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : PMKSA cache operation (WPA2) */
+static int ks_wlan_set_pmksa(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	struct iw_pmksa *pmksa;
+	int i;
+	struct pmk_t *pmk;
+	struct list_head *ptr;
+
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (!extra) {
+		return -EINVAL;
+	}
+	pmksa = (struct iw_pmksa *)extra;
+	DPRINTK(2, "cmd=%d\n", pmksa->cmd);
+
+	switch (pmksa->cmd) {
+	case IW_PMKSA_ADD:
+		if (list_empty(&priv->pmklist.head)) {	/* new list */
+			for (i = 0; i < PMK_LIST_MAX; i++) {
+				pmk = &priv->pmklist.pmk[i];
+				if (!memcmp
+				    ("\x00\x00\x00\x00\x00\x00", pmk->bssid,
+				     ETH_ALEN))
+					break;
+			}
+			memcpy(pmk->bssid, pmksa->bssid.sa_data, ETH_ALEN);
+			memcpy(pmk->pmkid, pmksa->pmkid, IW_PMKID_LEN);
+			list_add(&pmk->list, &priv->pmklist.head);
+			priv->pmklist.size++;
+		} else {	/* search cache data */
+			list_for_each(ptr, &priv->pmklist.head) {
+				pmk = list_entry(ptr, struct pmk_t, list);
+				if (!memcmp(pmksa->bssid.sa_data, pmk->bssid, ETH_ALEN)) {	/* match address! list move to head. */
+					memcpy(pmk->pmkid, pmksa->pmkid,
+					       IW_PMKID_LEN);
+					list_move(&pmk->list,
+						  &priv->pmklist.head);
+					break;
+				}
+			}
+			if (ptr == &priv->pmklist.head) {	/* not find address. */
+				if (PMK_LIST_MAX > priv->pmklist.size) {	/* new cache data */
+					for (i = 0; i < PMK_LIST_MAX; i++) {
+						pmk = &priv->pmklist.pmk[i];
+						if (!memcmp
+						    ("\x00\x00\x00\x00\x00\x00",
+						     pmk->bssid, ETH_ALEN))
+							break;
+					}
+					memcpy(pmk->bssid, pmksa->bssid.sa_data,
+					       ETH_ALEN);
+					memcpy(pmk->pmkid, pmksa->pmkid,
+					       IW_PMKID_LEN);
+					list_add(&pmk->list,
+						 &priv->pmklist.head);
+					priv->pmklist.size++;
+				} else {	/* overwrite old cache data */
+					pmk =
+					    list_entry(priv->pmklist.head.prev,
+						       struct pmk_t, list);
+					memcpy(pmk->bssid, pmksa->bssid.sa_data,
+					       ETH_ALEN);
+					memcpy(pmk->pmkid, pmksa->pmkid,
+					       IW_PMKID_LEN);
+					list_move(&pmk->list,
+						  &priv->pmklist.head);
+				}
+			}
+		}
+		break;
+	case IW_PMKSA_REMOVE:
+		if (list_empty(&priv->pmklist.head)) {	/* list empty */
+			return -EINVAL;
+		} else {	/* search cache data */
+			list_for_each(ptr, &priv->pmklist.head) {
+				pmk = list_entry(ptr, struct pmk_t, list);
+				if (!memcmp(pmksa->bssid.sa_data, pmk->bssid, ETH_ALEN)) {	/* match address! list del. */
+					memset(pmk->bssid, 0, ETH_ALEN);
+					memset(pmk->pmkid, 0, IW_PMKID_LEN);
+					list_del_init(&pmk->list);
+					break;
+				}
+			}
+			if (ptr == &priv->pmklist.head) {	/* not find address. */
+				return 0;
+			}
+		}
+		break;
+	case IW_PMKSA_FLUSH:
+		memset(&(priv->pmklist), 0, sizeof(priv->pmklist));
+		INIT_LIST_HEAD(&priv->pmklist.head);
+		for (i = 0; i < PMK_LIST_MAX; i++)
+			INIT_LIST_HEAD(&priv->pmklist.pmk[i].list);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	hostif_sme_enqueue(priv, SME_SET_PMKSA);
+	return 0;
+}
+
+static struct iw_statistics *ks_get_wireless_stats(struct net_device *dev)
+{
+
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	struct iw_statistics *wstats = &priv->wstats;
+
+	if (!atomic_read(&update_phyinfo)) {
+		if (priv->dev_state < DEVICE_STATE_READY)
+			return NULL;	/* not finished initialize */
+		else
+			return wstats;
+	}
+
+	/* Packets discarded in the wireless adapter due to wireless
+	 * specific problems */
+	wstats->discard.nwid = 0;	/* Rx invalid nwid      */
+	wstats->discard.code = 0;	/* Rx invalid crypt     */
+	wstats->discard.fragment = 0;	/* Rx invalid frag      */
+	wstats->discard.retries = 0;	/* Tx excessive retries */
+	wstats->discard.misc = 0;	/* Invalid misc         */
+	wstats->miss.beacon = 0;	/* Missed beacon        */
+
+	return wstats;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : set stop request */
+static int ks_wlan_set_stop_request(struct net_device *dev,
+				    struct iw_request_info *info, __u32 * uwrq,
+				    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (!(*uwrq))
+		return -EINVAL;
+
+	hostif_sme_enqueue(priv, SME_STOP_REQUEST);
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Wireless Handler : set MLME */
+#include <linux/ieee80211.h>
+static int ks_wlan_set_mlme(struct net_device *dev,
+			    struct iw_request_info *info, struct iw_point *dwrq,
+			    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	struct iw_mlme *mlme = (struct iw_mlme *)extra;
+	__u32 mode;
+
+	DPRINTK(2, ":%d :%d\n", mlme->cmd, mlme->reason_code);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	switch (mlme->cmd) {
+	case IW_MLME_DEAUTH:
+		if (mlme->reason_code == WLAN_REASON_MIC_FAILURE) {
+			return 0;
+		}
+	case IW_MLME_DISASSOC:
+		mode = 1;
+		return ks_wlan_set_stop_request(dev, NULL, &mode, NULL);
+	default:
+		return -EOPNOTSUPP;	/* Not Support */
+	}
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get firemware version */
+static int ks_wlan_get_firmware_version(struct net_device *dev,
+					struct iw_request_info *info,
+					struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	strcpy(extra, &(priv->firmware_version[0]));
+	dwrq->length = priv->version_size + 1;
+	return 0;
+}
+
+#if 0
+/*------------------------------------------------------------------*/
+/* Private handler : set force disconnect status */
+static int ks_wlan_set_detach(struct net_device *dev,
+			      struct iw_request_info *info, __u32 * uwrq,
+			      char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq == CONNECT_STATUS) {	/* 0 */
+		priv->connect_status &= ~FORCE_DISCONNECT;
+		if ((priv->connect_status & CONNECT_STATUS_MASK) ==
+		    CONNECT_STATUS)
+			netif_carrier_on(dev);
+	} else if (*uwrq == DISCONNECT_STATUS) {	/* 1 */
+		priv->connect_status |= FORCE_DISCONNECT;
+		netif_carrier_off(dev);
+	} else
+		return -EINVAL;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get force disconnect status */
+static int ks_wlan_get_detach(struct net_device *dev,
+			      struct iw_request_info *info, __u32 * uwrq,
+			      char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = ((priv->connect_status & FORCE_DISCONNECT) ? 1 : 0);
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get connect status */
+static int ks_wlan_get_connect(struct net_device *dev,
+			       struct iw_request_info *info, __u32 * uwrq,
+			       char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = (priv->connect_status & CONNECT_STATUS_MASK);
+	return 0;
+}
+#endif
+
+/*------------------------------------------------------------------*/
+/* Private handler : set preamble */
+static int ks_wlan_set_preamble(struct net_device *dev,
+				struct iw_request_info *info, __u32 * uwrq,
+				char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq == LONG_PREAMBLE) {	/* 0 */
+		priv->reg.preamble = LONG_PREAMBLE;
+	} else if (*uwrq == SHORT_PREAMBLE) {	/* 1 */
+		priv->reg.preamble = SHORT_PREAMBLE;
+	} else
+		return -EINVAL;
+
+	priv->need_commit |= SME_MODE_SET;
+	return -EINPROGRESS;	/* Call commit handler */
+
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get preamble */
+static int ks_wlan_get_preamble(struct net_device *dev,
+				struct iw_request_info *info, __u32 * uwrq,
+				char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->reg.preamble;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : set power save mode */
+static int ks_wlan_set_powermgt(struct net_device *dev,
+				struct iw_request_info *info, __u32 * uwrq,
+				char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq == POWMGT_ACTIVE_MODE) {	/* 0 */
+		priv->reg.powermgt = POWMGT_ACTIVE_MODE;
+	} else if (*uwrq == POWMGT_SAVE1_MODE) {	/* 1 */
+		if (priv->reg.operation_mode == MODE_INFRASTRUCTURE)
+			priv->reg.powermgt = POWMGT_SAVE1_MODE;
+		else
+			return -EINVAL;
+	} else if (*uwrq == POWMGT_SAVE2_MODE) {	/* 2 */
+		if (priv->reg.operation_mode == MODE_INFRASTRUCTURE)
+			priv->reg.powermgt = POWMGT_SAVE2_MODE;
+		else
+			return -EINVAL;
+	} else
+		return -EINVAL;
+
+	hostif_sme_enqueue(priv, SME_POW_MNGMT_REQUEST);
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get power save made */
+static int ks_wlan_get_powermgt(struct net_device *dev,
+				struct iw_request_info *info, __u32 * uwrq,
+				char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->reg.powermgt;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : set scan type */
+static int ks_wlan_set_scan_type(struct net_device *dev,
+				 struct iw_request_info *info, __u32 * uwrq,
+				 char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq == ACTIVE_SCAN) {	/* 0 */
+		priv->reg.scan_type = ACTIVE_SCAN;
+	} else if (*uwrq == PASSIVE_SCAN) {	/* 1 */
+		priv->reg.scan_type = PASSIVE_SCAN;
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get scan type */
+static int ks_wlan_get_scan_type(struct net_device *dev,
+				 struct iw_request_info *info, __u32 * uwrq,
+				 char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->reg.scan_type;
+	return 0;
+}
+
+#if 0
+/*------------------------------------------------------------------*/
+/* Private handler : write raw data to device */
+static int ks_wlan_data_write(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+	unsigned char *wbuff = NULL;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	wbuff = (unsigned char *)kmalloc(dwrq->length, GFP_ATOMIC);
+	if (!wbuff)
+		return -EFAULT;
+	memcpy(wbuff, extra, dwrq->length);
+
+	/* write to device */
+	ks_wlan_hw_tx(priv, wbuff, dwrq->length, NULL, NULL, NULL);
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : read raw data form device */
+static int ks_wlan_data_read(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+	unsigned short read_length;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (!atomic_read(&priv->event_count)) {
+		if (priv->dev_state < DEVICE_STATE_BOOT) {	/* Remove device */
+			read_length = 4;
+			memset(extra, 0xff, read_length);
+			dwrq->length = read_length;
+			return 0;
+		}
+		read_length = 0;
+		memset(extra, 0, 1);
+		dwrq->length = 0;
+		return 0;
+	}
+
+	if (atomic_read(&priv->event_count) > 0)
+		atomic_dec(&priv->event_count);
+
+	spin_lock(&priv->dev_read_lock);	/* request spin lock */
+
+	/* Copy length max size 0x07ff */
+	if (priv->dev_size[priv->dev_count] > 2047)
+		read_length = 2047;
+	else
+		read_length = priv->dev_size[priv->dev_count];
+
+	/* Copy data */
+	memcpy(extra, &(priv->dev_data[priv->dev_count][0]), read_length);
+
+	spin_unlock(&priv->dev_read_lock);	/* release spin lock */
+
+	/* Initialize */
+	priv->dev_data[priv->dev_count] = 0;
+	priv->dev_size[priv->dev_count] = 0;
+
+	priv->dev_count++;
+	if (priv->dev_count == DEVICE_STOCK_COUNT)
+		priv->dev_count = 0;
+
+	/* Set read size */
+	dwrq->length = read_length;
+
+	return 0;
+}
+#endif
+
+#if 0
+/*------------------------------------------------------------------*/
+/* Private handler : get wep string */
+#define WEP_ASCII_BUFF_SIZE (17+64*4+1)
+static int ks_wlan_get_wep_ascii(struct net_device *dev,
+				 struct iw_request_info *info,
+				 struct iw_point *dwrq, char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+	int i, j, len = 0;
+	char tmp[WEP_ASCII_BUFF_SIZE];
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	strcpy(tmp, " WEP keys ASCII \n");
+	len += strlen(" WEP keys ASCII \n");
+
+	for (i = 0; i < 4; i++) {
+		strcpy(tmp + len, "\t[");
+		len += strlen("\t[");
+		tmp[len] = '1' + i;
+		len++;
+		strcpy(tmp + len, "] ");
+		len += strlen("] ");
+		if (priv->reg.wep_key[i].size) {
+			strcpy(tmp + len,
+			       (priv->reg.wep_key[i].size <
+				6 ? "(40bits) [" : "(104bits) ["));
+			len +=
+			    strlen((priv->reg.wep_key[i].size <
+				    6 ? "(40bits) [" : "(104bits) ["));
+			for (j = 0; j < priv->reg.wep_key[i].size; j++, len++)
+				tmp[len] =
+				    (isprint(priv->reg.wep_key[i].val[j]) ?
+				     priv->reg.wep_key[i].val[j] : ' ');
+
+			strcpy(tmp + len, "]\n");
+			len += strlen("]\n");
+		} else {
+			strcpy(tmp + len, "off\n");
+			len += strlen("off\n");
+		}
+	}
+
+	memcpy(extra, tmp, len);
+	dwrq->length = len + 1;
+	return 0;
+}
+#endif
+
+/*------------------------------------------------------------------*/
+/* Private handler : set beacon lost count */
+static int ks_wlan_set_beacon_lost(struct net_device *dev,
+				   struct iw_request_info *info, __u32 * uwrq,
+				   char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq >= BEACON_LOST_COUNT_MIN && *uwrq <= BEACON_LOST_COUNT_MAX) {
+		priv->reg.beacon_lost_count = *uwrq;
+	} else
+		return -EINVAL;
+
+	if (priv->reg.operation_mode == MODE_INFRASTRUCTURE) {
+		priv->need_commit |= SME_MODE_SET;
+		return -EINPROGRESS;	/* Call commit handler */
+	} else
+		return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get beacon lost count */
+static int ks_wlan_get_beacon_lost(struct net_device *dev,
+				   struct iw_request_info *info, __u32 * uwrq,
+				   char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->reg.beacon_lost_count;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : set phy type */
+static int ks_wlan_set_phy_type(struct net_device *dev,
+				struct iw_request_info *info, __u32 * uwrq,
+				char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq == D_11B_ONLY_MODE) {	/* 0 */
+		priv->reg.phy_type = D_11B_ONLY_MODE;
+	} else if (*uwrq == D_11G_ONLY_MODE) {	/* 1 */
+		priv->reg.phy_type = D_11G_ONLY_MODE;
+	} else if (*uwrq == D_11BG_COMPATIBLE_MODE) {	/* 2 */
+		priv->reg.phy_type = D_11BG_COMPATIBLE_MODE;
+	} else
+		return -EINVAL;
+
+	priv->need_commit |= SME_MODE_SET;
+	return -EINPROGRESS;	/* Call commit handler */
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get phy type */
+static int ks_wlan_get_phy_type(struct net_device *dev,
+				struct iw_request_info *info, __u32 * uwrq,
+				char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->reg.phy_type;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : set cts mode */
+static int ks_wlan_set_cts_mode(struct net_device *dev,
+				struct iw_request_info *info, __u32 * uwrq,
+				char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq == CTS_MODE_FALSE) {	/* 0 */
+		priv->reg.cts_mode = CTS_MODE_FALSE;
+	} else if (*uwrq == CTS_MODE_TRUE) {	/* 1 */
+		if (priv->reg.phy_type == D_11G_ONLY_MODE ||
+		    priv->reg.phy_type == D_11BG_COMPATIBLE_MODE)
+			priv->reg.cts_mode = CTS_MODE_TRUE;
+		else
+			priv->reg.cts_mode = CTS_MODE_FALSE;
+	} else
+		return -EINVAL;
+
+	priv->need_commit |= SME_MODE_SET;
+	return -EINPROGRESS;	/* Call commit handler */
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get cts mode */
+static int ks_wlan_get_cts_mode(struct net_device *dev,
+				struct iw_request_info *info, __u32 * uwrq,
+				char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->reg.cts_mode;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : set sleep mode */
+static int ks_wlan_set_sleep_mode(struct net_device *dev,
+				  struct iw_request_info *info,
+				  __u32 * uwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	DPRINTK(2, "\n");
+
+	if (*uwrq == SLP_SLEEP) {
+		priv->sleep_mode = *uwrq;
+		printk("SET_SLEEP_MODE %d\n", priv->sleep_mode);
+
+		hostif_sme_enqueue(priv, SME_STOP_REQUEST);
+		hostif_sme_enqueue(priv, SME_SLEEP_REQUEST);
+
+	} else if (*uwrq == SLP_ACTIVE) {
+		priv->sleep_mode = *uwrq;
+		printk("SET_SLEEP_MODE %d\n", priv->sleep_mode);
+		hostif_sme_enqueue(priv, SME_SLEEP_REQUEST);
+	} else {
+		printk("SET_SLEEP_MODE %d errror\n", *uwrq);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get sleep mode */
+static int ks_wlan_get_sleep_mode(struct net_device *dev,
+				  struct iw_request_info *info,
+				  __u32 * uwrq, char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	DPRINTK(2, "GET_SLEEP_MODE %d\n", priv->sleep_mode);
+	*uwrq = priv->sleep_mode;
+
+	return 0;
+}
+
+#if 0
+/*------------------------------------------------------------------*/
+/* Private handler : set phy information timer */
+static int ks_wlan_set_phy_information_timer(struct net_device *dev,
+					     struct iw_request_info *info,
+					     __u32 * uwrq, char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq >= 0 && *uwrq <= 0xFFFF)	/* 0-65535 */
+		priv->reg.phy_info_timer = (uint16_t) * uwrq;
+	else
+		return -EINVAL;
+
+	hostif_sme_enqueue(priv, SME_PHY_INFO_REQUEST);
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get phy information timer */
+static int ks_wlan_get_phy_information_timer(struct net_device *dev,
+					     struct iw_request_info *info,
+					     __u32 * uwrq, char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->reg.phy_info_timer;
+	return 0;
+}
+#endif
+
+#ifdef WPS
+/*------------------------------------------------------------------*/
+/* Private handler : set WPS enable */
+static int ks_wlan_set_wps_enable(struct net_device *dev,
+				  struct iw_request_info *info, __u32 * uwrq,
+				  char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq == 0 || *uwrq == 1)
+		priv->wps.wps_enabled = *uwrq;
+	else
+		return -EINVAL;
+
+	hostif_sme_enqueue(priv, SME_WPS_ENABLE_REQUEST);
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get WPS enable */
+static int ks_wlan_get_wps_enable(struct net_device *dev,
+				  struct iw_request_info *info, __u32 * uwrq,
+				  char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->wps.wps_enabled;
+	printk("return=%d\n", *uwrq);
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : set WPS probe req */
+static int ks_wlan_set_wps_probe_req(struct net_device *dev,
+				     struct iw_request_info *info,
+				     struct iw_point *dwrq, char *extra)
+{
+	uint8_t *p = extra;
+	unsigned char len;
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	DPRINTK(2, "dwrq->length=%d\n", dwrq->length);
+
+	/* length check */
+	if (p[1] + 2 != dwrq->length || dwrq->length > 256) {
+		return -EINVAL;
+	}
+
+	priv->wps.ielen = p[1] + 2 + 1;	/* IE header + IE + sizeof(len) */
+	len = p[1] + 2;	/* IE header + IE */
+
+	memcpy(priv->wps.ie, &len, sizeof(len));
+	p = memcpy(priv->wps.ie + 1, p, len);
+
+	DPRINTK(2, "%d(%#x): %02X %02X %02X %02X ... %02X %02X %02X\n",
+		priv->wps.ielen, priv->wps.ielen, p[0], p[1], p[2], p[3],
+		p[priv->wps.ielen - 3], p[priv->wps.ielen - 2],
+		p[priv->wps.ielen - 1]);
+
+	hostif_sme_enqueue(priv, SME_WPS_PROBE_REQUEST);
+
+	return 0;
+}
+
+#if 0
+/*------------------------------------------------------------------*/
+/* Private handler : get WPS probe req */
+static int ks_wlan_get_wps_probe_req(struct net_device *dev,
+				     struct iw_request_info *info,
+				     __u32 * uwrq, char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+	DPRINTK(2, "\n");
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	return 0;
+}
+#endif
+#endif /* WPS */
+
+/*------------------------------------------------------------------*/
+/* Private handler : set tx gain control value */
+static int ks_wlan_set_tx_gain(struct net_device *dev,
+			       struct iw_request_info *info, __u32 * uwrq,
+			       char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq >= 0 && *uwrq <= 0xFF)	/* 0-255 */
+		priv->gain.TxGain = (uint8_t) * uwrq;
+	else
+		return -EINVAL;
+
+	if (priv->gain.TxGain < 0xFF)
+		priv->gain.TxMode = 1;
+	else
+		priv->gain.TxMode = 0;
+
+	hostif_sme_enqueue(priv, SME_SET_GAIN);
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get tx gain control value */
+static int ks_wlan_get_tx_gain(struct net_device *dev,
+			       struct iw_request_info *info, __u32 * uwrq,
+			       char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->gain.TxGain;
+	hostif_sme_enqueue(priv, SME_GET_GAIN);
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : set rx gain control value */
+static int ks_wlan_set_rx_gain(struct net_device *dev,
+			       struct iw_request_info *info, __u32 * uwrq,
+			       char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq >= 0 && *uwrq <= 0xFF)	/* 0-255 */
+		priv->gain.RxGain = (uint8_t) * uwrq;
+	else
+		return -EINVAL;
+
+	if (priv->gain.RxGain < 0xFF)
+		priv->gain.RxMode = 1;
+	else
+		priv->gain.RxMode = 0;
+
+	hostif_sme_enqueue(priv, SME_SET_GAIN);
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get rx gain control value */
+static int ks_wlan_get_rx_gain(struct net_device *dev,
+			       struct iw_request_info *info, __u32 * uwrq,
+			       char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	*uwrq = priv->gain.RxGain;
+	hostif_sme_enqueue(priv, SME_GET_GAIN);
+	return 0;
+}
+
+#if 0
+/*------------------------------------------------------------------*/
+/* Private handler : set region value */
+static int ks_wlan_set_region(struct net_device *dev,
+			      struct iw_request_info *info, __u32 * uwrq,
+			      char *extra)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)dev->priv;
+
+	if (priv->sleep_mode == SLP_SLEEP) {
+		return -EPERM;
+	}
+	/* for SLEEP MODE */
+	if (*uwrq >= 0x9 && *uwrq <= 0xF)	/* 0x9-0xf */
+		priv->region = (uint8_t) * uwrq;
+	else
+		return -EINVAL;
+
+	hostif_sme_enqueue(priv, SME_SET_REGION);
+	return 0;
+}
+#endif
+
+/*------------------------------------------------------------------*/
+/* Private handler : get eeprom checksum result */
+static int ks_wlan_get_eeprom_cksum(struct net_device *dev,
+				    struct iw_request_info *info, __u32 * uwrq,
+				    char *extra)
+{
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	*uwrq = priv->eeprom_checksum;
+	return 0;
+}
+
+static void print_hif_event(int event)
+{
+
+	switch (event) {
+	case HIF_DATA_REQ:
+		printk("HIF_DATA_REQ\n");
+		break;
+	case HIF_DATA_IND:
+		printk("HIF_DATA_IND\n");
+		break;
+	case HIF_MIB_GET_REQ:
+		printk("HIF_MIB_GET_REQ\n");
+		break;
+	case HIF_MIB_GET_CONF:
+		printk("HIF_MIB_GET_CONF\n");
+		break;
+	case HIF_MIB_SET_REQ:
+		printk("HIF_MIB_SET_REQ\n");
+		break;
+	case HIF_MIB_SET_CONF:
+		printk("HIF_MIB_SET_CONF\n");
+		break;
+	case HIF_POWERMGT_REQ:
+		printk("HIF_POWERMGT_REQ\n");
+		break;
+	case HIF_POWERMGT_CONF:
+		printk("HIF_POWERMGT_CONF\n");
+		break;
+	case HIF_START_REQ:
+		printk("HIF_START_REQ\n");
+		break;
+	case HIF_START_CONF:
+		printk("HIF_START_CONF\n");
+		break;
+	case HIF_CONNECT_IND:
+		printk("HIF_CONNECT_IND\n");
+		break;
+	case HIF_STOP_REQ:
+		printk("HIF_STOP_REQ\n");
+		break;
+	case HIF_STOP_CONF:
+		printk("HIF_STOP_CONF\n");
+		break;
+	case HIF_PS_ADH_SET_REQ:
+		printk("HIF_PS_ADH_SET_REQ\n");
+		break;
+	case HIF_PS_ADH_SET_CONF:
+		printk("HIF_PS_ADH_SET_CONF\n");
+		break;
+	case HIF_INFRA_SET_REQ:
+		printk("HIF_INFRA_SET_REQ\n");
+		break;
+	case HIF_INFRA_SET_CONF:
+		printk("HIF_INFRA_SET_CONF\n");
+		break;
+	case HIF_ADH_SET_REQ:
+		printk("HIF_ADH_SET_REQ\n");
+		break;
+	case HIF_ADH_SET_CONF:
+		printk("HIF_ADH_SET_CONF\n");
+		break;
+	case HIF_AP_SET_REQ:
+		printk("HIF_AP_SET_REQ\n");
+		break;
+	case HIF_AP_SET_CONF:
+		printk("HIF_AP_SET_CONF\n");
+		break;
+	case HIF_ASSOC_INFO_IND:
+		printk("HIF_ASSOC_INFO_IND\n");
+		break;
+	case HIF_MIC_FAILURE_REQ:
+		printk("HIF_MIC_FAILURE_REQ\n");
+		break;
+	case HIF_MIC_FAILURE_CONF:
+		printk("HIF_MIC_FAILURE_CONF\n");
+		break;
+	case HIF_SCAN_REQ:
+		printk("HIF_SCAN_REQ\n");
+		break;
+	case HIF_SCAN_CONF:
+		printk("HIF_SCAN_CONF\n");
+		break;
+	case HIF_PHY_INFO_REQ:
+		printk("HIF_PHY_INFO_REQ\n");
+		break;
+	case HIF_PHY_INFO_CONF:
+		printk("HIF_PHY_INFO_CONF\n");
+		break;
+	case HIF_SLEEP_REQ:
+		printk("HIF_SLEEP_REQ\n");
+		break;
+	case HIF_SLEEP_CONF:
+		printk("HIF_SLEEP_CONF\n");
+		break;
+	case HIF_PHY_INFO_IND:
+		printk("HIF_PHY_INFO_IND\n");
+		break;
+	case HIF_SCAN_IND:
+		printk("HIF_SCAN_IND\n");
+		break;
+	case HIF_INFRA_SET2_REQ:
+		printk("HIF_INFRA_SET2_REQ\n");
+		break;
+	case HIF_INFRA_SET2_CONF:
+		printk("HIF_INFRA_SET2_CONF\n");
+		break;
+	case HIF_ADH_SET2_REQ:
+		printk("HIF_ADH_SET2_REQ\n");
+		break;
+	case HIF_ADH_SET2_CONF:
+		printk("HIF_ADH_SET2_CONF\n");
+	}
+}
+
+/*------------------------------------------------------------------*/
+/* Private handler : get host command history */
+static int ks_wlan_hostt(struct net_device *dev, struct iw_request_info *info,
+			 __u32 * uwrq, char *extra)
+{
+	int i, event;
+	struct ks_wlan_private *priv =
+	    (struct ks_wlan_private *)netdev_priv(dev);
+
+	for (i = 63; i >= 0; i--) {
+		event =
+		    priv->hostt.buff[(priv->hostt.qtail - 1 - i) %
+				     SME_EVENT_BUFF_SIZE];
+		print_hif_event(event);
+	}
+	return 0;
+}
+
+/* Structures to export the Wireless Handlers */
+
+static const struct iw_priv_args ks_wlan_private_args[] = {
+/*{ cmd, set_args, get_args, name[16] } */
+	{KS_WLAN_GET_FIRM_VERSION, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_CHAR | (128 + 1), "GetFirmwareVer"},
+#ifdef WPS
+	{KS_WLAN_SET_WPS_ENABLE, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetWPSEnable"},
+	{KS_WLAN_GET_WPS_ENABLE, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetW"},
+	{KS_WLAN_SET_WPS_PROBE_REQ, IW_PRIV_TYPE_BYTE | 2047, IW_PRIV_TYPE_NONE,
+	 "SetWPSProbeReq"},
+#endif /* WPS */
+	{KS_WLAN_SET_PREAMBLE, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetPreamble"},
+	{KS_WLAN_GET_PREAMBLE, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetPreamble"},
+	{KS_WLAN_SET_POWER_SAVE, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetPowerSave"},
+	{KS_WLAN_GET_POWER_SAVE, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetPowerSave"},
+	{KS_WLAN_SET_SCAN_TYPE, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetScanType"},
+	{KS_WLAN_GET_SCAN_TYPE, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetScanType"},
+	{KS_WLAN_SET_RX_GAIN, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetRxGain"},
+	{KS_WLAN_GET_RX_GAIN, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetRxGain"},
+	{KS_WLAN_HOSTT, IW_PRIV_TYPE_NONE, IW_PRIV_TYPE_CHAR | (128 + 1),
+	 "hostt"},
+	{KS_WLAN_SET_BEACON_LOST, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetBeaconLost"},
+	{KS_WLAN_GET_BEACON_LOST, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetBeaconLost"},
+	{KS_WLAN_SET_SLEEP_MODE, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetSleepMode"},
+	{KS_WLAN_GET_SLEEP_MODE, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetSleepMode"},
+	{KS_WLAN_SET_TX_GAIN, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetTxGain"},
+	{KS_WLAN_GET_TX_GAIN, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetTxGain"},
+	{KS_WLAN_SET_PHY_TYPE, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetPhyType"},
+	{KS_WLAN_GET_PHY_TYPE, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetPhyType"},
+	{KS_WLAN_SET_CTS_MODE, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+	 IW_PRIV_TYPE_NONE, "SetCtsMode"},
+	{KS_WLAN_GET_CTS_MODE, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetCtsMode"},
+	{KS_WLAN_GET_EEPROM_CKSUM, IW_PRIV_TYPE_NONE,
+	 IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "GetChecksum"},
+};
+
+static const iw_handler ks_wlan_handler[] = {
+	(iw_handler) ks_wlan_config_commit,	/* SIOCSIWCOMMIT */
+	(iw_handler) ks_wlan_get_name,	/* SIOCGIWNAME */
+	(iw_handler) NULL,	/* SIOCSIWNWID */
+	(iw_handler) NULL,	/* SIOCGIWNWID */
+	(iw_handler) ks_wlan_set_freq,	/* SIOCSIWFREQ */
+	(iw_handler) ks_wlan_get_freq,	/* SIOCGIWFREQ */
+	(iw_handler) ks_wlan_set_mode,	/* SIOCSIWMODE */
+	(iw_handler) ks_wlan_get_mode,	/* SIOCGIWMODE */
+#ifndef KSC_OPNOTSUPP
+	(iw_handler) ks_wlan_set_sens,	/* SIOCSIWSENS */
+	(iw_handler) ks_wlan_get_sens,	/* SIOCGIWSENS */
+#else /* KSC_OPNOTSUPP */
+	(iw_handler) NULL,	/* SIOCSIWSENS */
+	(iw_handler) NULL,	/* SIOCGIWSENS */
+#endif /* KSC_OPNOTSUPP */
+	(iw_handler) NULL,	/* SIOCSIWRANGE */
+	(iw_handler) ks_wlan_get_range,	/* SIOCGIWRANGE */
+	(iw_handler) NULL,	/* SIOCSIWPRIV */
+	(iw_handler) NULL,	/* SIOCGIWPRIV */
+	(iw_handler) NULL,	/* SIOCSIWSTATS */
+	(iw_handler) ks_wlan_get_iwstats,	/* SIOCGIWSTATS */
+	(iw_handler) NULL,	/* SIOCSIWSPY */
+	(iw_handler) NULL,	/* SIOCGIWSPY */
+	(iw_handler) NULL,	/* SIOCSIWTHRSPY */
+	(iw_handler) NULL,	/* SIOCGIWTHRSPY */
+	(iw_handler) ks_wlan_set_wap,	/* SIOCSIWAP */
+	(iw_handler) ks_wlan_get_wap,	/* SIOCGIWAP */
+//      (iw_handler) NULL,                      /* SIOCSIWMLME */
+	(iw_handler) ks_wlan_set_mlme,	/* SIOCSIWMLME */
+	(iw_handler) ks_wlan_get_aplist,	/* SIOCGIWAPLIST */
+	(iw_handler) ks_wlan_set_scan,	/* SIOCSIWSCAN */
+	(iw_handler) ks_wlan_get_scan,	/* SIOCGIWSCAN */
+	(iw_handler) ks_wlan_set_essid,	/* SIOCSIWESSID */
+	(iw_handler) ks_wlan_get_essid,	/* SIOCGIWESSID */
+	(iw_handler) ks_wlan_set_nick,	/* SIOCSIWNICKN */
+	(iw_handler) ks_wlan_get_nick,	/* SIOCGIWNICKN */
+	(iw_handler) NULL,	/* -- hole -- */
+	(iw_handler) NULL,	/* -- hole -- */
+	(iw_handler) ks_wlan_set_rate,	/* SIOCSIWRATE */
+	(iw_handler) ks_wlan_get_rate,	/* SIOCGIWRATE */
+	(iw_handler) ks_wlan_set_rts,	/* SIOCSIWRTS */
+	(iw_handler) ks_wlan_get_rts,	/* SIOCGIWRTS */
+	(iw_handler) ks_wlan_set_frag,	/* SIOCSIWFRAG */
+	(iw_handler) ks_wlan_get_frag,	/* SIOCGIWFRAG */
+#ifndef KSC_OPNOTSUPP
+	(iw_handler) ks_wlan_set_txpow,	/* SIOCSIWTXPOW */
+	(iw_handler) ks_wlan_get_txpow,	/* SIOCGIWTXPOW */
+	(iw_handler) ks_wlan_set_retry,	/* SIOCSIWRETRY */
+	(iw_handler) ks_wlan_get_retry,	/* SIOCGIWRETRY */
+#else /* KSC_OPNOTSUPP */
+	(iw_handler) NULL,	/* SIOCSIWTXPOW */
+	(iw_handler) NULL,	/* SIOCGIWTXPOW */
+	(iw_handler) NULL,	/* SIOCSIWRETRY */
+	(iw_handler) NULL,	/* SIOCGIWRETRY */
+#endif /* KSC_OPNOTSUPP */
+	(iw_handler) ks_wlan_set_encode,	/* SIOCSIWENCODE */
+	(iw_handler) ks_wlan_get_encode,	/* SIOCGIWENCODE */
+	(iw_handler) ks_wlan_set_power,	/* SIOCSIWPOWER */
+	(iw_handler) ks_wlan_get_power,	/* SIOCGIWPOWER */
+	(iw_handler) NULL,	/* -- hole -- */
+	(iw_handler) NULL,	/* -- hole -- */
+//      (iw_handler) NULL,                      /* SIOCSIWGENIE */
+	(iw_handler) ks_wlan_set_genie,	/* SIOCSIWGENIE */
+	(iw_handler) NULL,	/* SIOCGIWGENIE */
+	(iw_handler) ks_wlan_set_auth_mode,	/* SIOCSIWAUTH */
+	(iw_handler) ks_wlan_get_auth_mode,	/* SIOCGIWAUTH */
+	(iw_handler) ks_wlan_set_encode_ext,	/* SIOCSIWENCODEEXT */
+	(iw_handler) ks_wlan_get_encode_ext,	/* SIOCGIWENCODEEXT */
+	(iw_handler) ks_wlan_set_pmksa,	/* SIOCSIWPMKSA */
+	(iw_handler) NULL,	/* -- hole -- */
+};
+
+/* private_handler */
+static const iw_handler ks_wlan_private_handler[] = {
+	(iw_handler) NULL,	/*  0 */
+	(iw_handler) NULL,	/*  1, used to be: KS_WLAN_GET_DRIVER_VERSION */
+	(iw_handler) NULL,	/*  2 */
+	(iw_handler) ks_wlan_get_firmware_version,	/*  3 KS_WLAN_GET_FIRM_VERSION */
+#ifdef WPS
+	(iw_handler) ks_wlan_set_wps_enable,	/*  4 KS_WLAN_SET_WPS_ENABLE  */
+	(iw_handler) ks_wlan_get_wps_enable,	/*  5 KS_WLAN_GET_WPS_ENABLE  */
+	(iw_handler) ks_wlan_set_wps_probe_req,	/*  6 KS_WLAN_SET_WPS_PROBE_REQ */
+#else
+	(iw_handler) NULL,	/*  4 */
+	(iw_handler) NULL,	/*  5 */
+	(iw_handler) NULL,	/*  6 */
+#endif /* WPS */
+
+	(iw_handler) ks_wlan_get_eeprom_cksum,	/*  7 KS_WLAN_GET_CONNECT */
+	(iw_handler) ks_wlan_set_preamble,	/*  8 KS_WLAN_SET_PREAMBLE */
+	(iw_handler) ks_wlan_get_preamble,	/*  9 KS_WLAN_GET_PREAMBLE */
+	(iw_handler) ks_wlan_set_powermgt,	/* 10 KS_WLAN_SET_POWER_SAVE */
+	(iw_handler) ks_wlan_get_powermgt,	/* 11 KS_WLAN_GET_POWER_SAVE */
+	(iw_handler) ks_wlan_set_scan_type,	/* 12 KS_WLAN_SET_SCAN_TYPE */
+	(iw_handler) ks_wlan_get_scan_type,	/* 13 KS_WLAN_GET_SCAN_TYPE */
+	(iw_handler) ks_wlan_set_rx_gain,	/* 14 KS_WLAN_SET_RX_GAIN */
+	(iw_handler) ks_wlan_get_rx_gain,	/* 15 KS_WLAN_GET_RX_GAIN */
+	(iw_handler) ks_wlan_hostt,	/* 16 KS_WLAN_HOSTT */
+	(iw_handler) NULL,	/* 17 */
+	(iw_handler) ks_wlan_set_beacon_lost,	/* 18 KS_WLAN_SET_BECAN_LOST */
+	(iw_handler) ks_wlan_get_beacon_lost,	/* 19 KS_WLAN_GET_BECAN_LOST */
+	(iw_handler) ks_wlan_set_tx_gain,	/* 20 KS_WLAN_SET_TX_GAIN */
+	(iw_handler) ks_wlan_get_tx_gain,	/* 21 KS_WLAN_GET_TX_GAIN */
+	(iw_handler) ks_wlan_set_phy_type,	/* 22 KS_WLAN_SET_PHY_TYPE */
+	(iw_handler) ks_wlan_get_phy_type,	/* 23 KS_WLAN_GET_PHY_TYPE */
+	(iw_handler) ks_wlan_set_cts_mode,	/* 24 KS_WLAN_SET_CTS_MODE */
+	(iw_handler) ks_wlan_get_cts_mode,	/* 25 KS_WLAN_GET_CTS_MODE */
+	(iw_handler) NULL,	/* 26 */
+	(iw_handler) NULL,	/* 27 */
+	(iw_handler) ks_wlan_set_sleep_mode,	/* 28 KS_WLAN_SET_SLEEP_MODE */
+	(iw_handler) ks_wlan_get_sleep_mode,	/* 29 KS_WLAN_GET_SLEEP_MODE */
+	(iw_handler) NULL,	/* 30 */
+	(iw_handler) NULL,	/* 31 */
+};
+
+static const struct iw_handler_def ks_wlan_handler_def = {
+	.num_standard = sizeof(ks_wlan_handler) / sizeof(iw_handler),
+	.num_private = sizeof(ks_wlan_private_handler) / sizeof(iw_handler),
+	.num_private_args =
+	    sizeof(ks_wlan_private_args) / sizeof(struct iw_priv_args),
+	.standard = (iw_handler *) ks_wlan_handler,
+	.private = (iw_handler *) ks_wlan_private_handler,
+	.private_args = (struct iw_priv_args *)ks_wlan_private_args,
+	.get_wireless_stats = ks_get_wireless_stats,
+};
+
+static int ks_wlan_netdev_ioctl(struct net_device *dev, struct ifreq *rq,
+				int cmd)
+{
+	int rc = 0;
+	struct iwreq *wrq = (struct iwreq *)rq;
+	switch (cmd) {
+	case SIOCIWFIRSTPRIV + 20:	/* KS_WLAN_SET_STOP_REQ */
+		rc = ks_wlan_set_stop_request(dev, NULL, &(wrq->u.mode), NULL);
+		break;
+		// All other calls are currently unsupported
+	default:
+		rc = -EOPNOTSUPP;
+	}
+
+	DPRINTK(5, "return=%d\n", rc);
+	return rc;
+}
+
+static
+struct net_device_stats *ks_wlan_get_stats(struct net_device *dev)
+{
+	struct ks_wlan_private *priv = netdev_priv(dev);
+
+	if (priv->dev_state < DEVICE_STATE_READY) {
+		return NULL;	/* not finished initialize */
+	}
+
+	return &priv->nstats;
+}
+
+static
+int ks_wlan_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct ks_wlan_private *priv = netdev_priv(dev);
+	struct sockaddr *mac_addr = (struct sockaddr *)addr;
+	if (netif_running(dev))
+		return -EBUSY;
+	memcpy(dev->dev_addr, mac_addr->sa_data, dev->addr_len);
+	memcpy(priv->eth_addr, mac_addr->sa_data, ETH_ALEN);
+
+	priv->mac_address_valid = 0;
+	hostif_sme_enqueue(priv, SME_MACADDRESS_SET_REQUEST);
+	printk(KERN_INFO
+	       "ks_wlan: MAC ADDRESS = %02x:%02x:%02x:%02x:%02x:%02x\n",
+	       priv->eth_addr[0], priv->eth_addr[1], priv->eth_addr[2],
+	       priv->eth_addr[3], priv->eth_addr[4], priv->eth_addr[5]);
+	return 0;
+}
+
+static
+void ks_wlan_tx_timeout(struct net_device *dev)
+{
+	struct ks_wlan_private *priv = netdev_priv(dev);
+
+	DPRINTK(1, "head(%d) tail(%d)!!\n", priv->tx_dev.qhead,
+		priv->tx_dev.qtail);
+	if (!netif_queue_stopped(dev)) {
+		netif_stop_queue(dev);
+	}
+	priv->nstats.tx_errors++;
+	netif_wake_queue(dev);
+
+	return;
+}
+
+static
+int ks_wlan_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ks_wlan_private *priv = netdev_priv(dev);
+	int rc = 0;
+
+	DPRINTK(3, "in_interrupt()=%ld\n", in_interrupt());
+
+	if (skb == NULL) {
+		printk(KERN_ERR "ks_wlan:  skb == NULL!!!\n");
+		return 0;
+	}
+	if (priv->dev_state < DEVICE_STATE_READY) {
+		dev_kfree_skb(skb);
+		return 0;	/* not finished initialize */
+	}
+
+	if (netif_running(dev))
+		netif_stop_queue(dev);
+
+	rc = hostif_data_request(priv, skb);
+	netif_trans_update(dev);
+
+	DPRINTK(4, "rc=%d\n", rc);
+	if (rc) {
+		rc = 0;
+	}
+
+	return rc;
+}
+
+void send_packet_complete(void *arg1, void *arg2)
+{
+	struct ks_wlan_private *priv = (struct ks_wlan_private *)arg1;
+	struct sk_buff *packet = (struct sk_buff *)arg2;
+
+	DPRINTK(3, "\n");
+
+	priv->nstats.tx_bytes += packet->len;
+	priv->nstats.tx_packets++;
+
+	if (netif_queue_stopped(priv->net_dev))
+		netif_wake_queue(priv->net_dev);
+
+	if (packet) {
+		dev_kfree_skb(packet);
+		packet = NULL;
+	}
+
+}
+
+/* Set or clear the multicast filter for this adaptor.
+   This routine is not state sensitive and need not be SMP locked. */
+static
+void ks_wlan_set_multicast_list(struct net_device *dev)
+{
+	struct ks_wlan_private *priv = netdev_priv(dev);
+
+	DPRINTK(4, "\n");
+	if (priv->dev_state < DEVICE_STATE_READY) {
+		return;	/* not finished initialize */
+	}
+	hostif_sme_enqueue(priv, SME_MULTICAST_REQUEST);
+
+	return;
+}
+
+static
+int ks_wlan_open(struct net_device *dev)
+{
+	struct ks_wlan_private *priv = netdev_priv(dev);
+
+	priv->cur_rx = 0;
+
+	if (!priv->mac_address_valid) {
+		printk(KERN_ERR "ks_wlan : %s Not READY !!\n", dev->name);
+		return -EBUSY;
+	} else
+		netif_start_queue(dev);
+
+	return 0;
+}
+
+static
+int ks_wlan_close(struct net_device *dev)
+{
+
+	netif_stop_queue(dev);
+
+	DPRINTK(4, "%s: Shutting down ethercard, status was 0x%4.4x.\n",
+		dev->name, 0x00);
+
+	return 0;
+}
+
+/* Operational parameters that usually are not changed. */
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT  (3*HZ)
+static const unsigned char dummy_addr[] =
+    { 0x00, 0x0b, 0xe3, 0x00, 0x00, 0x00 };
+
+static const struct net_device_ops ks_wlan_netdev_ops = {
+	.ndo_start_xmit = ks_wlan_start_xmit,
+	.ndo_open = ks_wlan_open,
+	.ndo_stop = ks_wlan_close,
+	.ndo_do_ioctl = ks_wlan_netdev_ioctl,
+	.ndo_set_mac_address = ks_wlan_set_mac_address,
+	.ndo_get_stats = ks_wlan_get_stats,
+	.ndo_tx_timeout = ks_wlan_tx_timeout,
+	.ndo_set_rx_mode = ks_wlan_set_multicast_list,
+};
+
+int ks_wlan_net_start(struct net_device *dev)
+{
+	struct ks_wlan_private *priv;
+	/* int rc; */
+
+	priv = netdev_priv(dev);
+	priv->mac_address_valid = 0;
+	priv->need_commit = 0;
+
+	priv->device_open_status = 1;
+
+	/* phy information update timer */
+	atomic_set(&update_phyinfo, 0);
+	init_timer(&update_phyinfo_timer);
+	update_phyinfo_timer.function = ks_wlan_update_phyinfo_timeout;
+	update_phyinfo_timer.data = (unsigned long)priv;
+
+	/* dummy address set */
+	memcpy(priv->eth_addr, dummy_addr, ETH_ALEN);
+	dev->dev_addr[0] = priv->eth_addr[0];
+	dev->dev_addr[1] = priv->eth_addr[1];
+	dev->dev_addr[2] = priv->eth_addr[2];
+	dev->dev_addr[3] = priv->eth_addr[3];
+	dev->dev_addr[4] = priv->eth_addr[4];
+	dev->dev_addr[5] = priv->eth_addr[5];
+	dev->dev_addr[6] = 0x00;
+	dev->dev_addr[7] = 0x00;
+
+	/* The ks_wlan-specific entries in the device structure. */
+	dev->netdev_ops = &ks_wlan_netdev_ops;
+	dev->wireless_handlers = (struct iw_handler_def *)&ks_wlan_handler_def;
+	dev->watchdog_timeo = TX_TIMEOUT;
+
+	netif_carrier_off(dev);
+
+	return 0;
+}
+
+int ks_wlan_net_stop(struct net_device *dev)
+{
+	struct ks_wlan_private *priv = netdev_priv(dev);
+
+	int ret = 0;
+	priv->device_open_status = 0;
+	del_timer_sync(&update_phyinfo_timer);
+
+	if (netif_running(dev))
+		netif_stop_queue(dev);
+
+	return ret;
+}
+
+int ks_wlan_reset(struct net_device *dev)
+{
+	return 0;
+}
diff --git a/drivers/staging/ks7010/michael_mic.c b/drivers/staging/ks7010/michael_mic.c
new file mode 100644
index 000000000000..e14c109b3cab
--- /dev/null
+++ b/drivers/staging/ks7010/michael_mic.c
@@ -0,0 +1,139 @@
+/*
+ *   Driver for KeyStream wireless LAN
+ *
+ *   Copyright (C) 2005-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include "michael_mic.h"
+
+// Rotation functions on 32 bit values
+#define ROL32( A, n ) 	( ((A) << (n)) | ( ((A)>>(32-(n))) & ( (1UL << (n)) - 1 ) ) )
+#define ROR32( A, n ) 	ROL32( (A), 32-(n) )
+// Convert from Byte[] to UInt32 in a portable way
+#define getUInt32( A, B ) 	(uint32_t)(A[B+0] << 0) + (A[B+1] << 8) + (A[B+2] << 16) + (A[B+3] << 24)
+
+// Convert from UInt32 to Byte[] in a portable way
+#define putUInt32( A, B, C ) 	A[B+0] = (uint8_t) (C & 0xff);		\
+				A[B+1] = (uint8_t) ((C>>8) & 0xff);	\
+				A[B+2] = (uint8_t) ((C>>16) & 0xff);	\
+				A[B+3] = (uint8_t) ((C>>24) & 0xff)
+
+// Reset the state to the empty message.
+#define MichaelClear( A ) 	A->L = A->K0; \
+				A->R = A->K1; \
+				A->nBytesInM = 0;
+
+static
+void MichaelInitializeFunction(struct michel_mic_t *Mic, uint8_t * key)
+{
+	// Set the key
+	Mic->K0 = getUInt32(key, 0);
+	Mic->K1 = getUInt32(key, 4);
+
+	//clear();
+	MichaelClear(Mic);
+}
+
+#define MichaelBlockFunction(L, R)				\
+do{								\
+	R ^= ROL32( L, 17 );					\
+	L += R;							\
+	R ^= ((L & 0xff00ff00) >> 8) | ((L & 0x00ff00ff) << 8);	\
+	L += R;							\
+	R ^= ROL32( L, 3 );					\
+	L += R;							\
+	R ^= ROR32( L, 2 );					\
+	L += R;							\
+}while(0)
+
+static
+void MichaelAppend(struct michel_mic_t *Mic, uint8_t * src, int nBytes)
+{
+	int addlen;
+	if (Mic->nBytesInM) {
+		addlen = 4 - Mic->nBytesInM;
+		if (addlen > nBytes)
+			addlen = nBytes;
+		memcpy(&Mic->M[Mic->nBytesInM], src, addlen);
+		Mic->nBytesInM += addlen;
+		src += addlen;
+		nBytes -= addlen;
+
+		if (Mic->nBytesInM < 4)
+			return;
+
+		Mic->L ^= getUInt32(Mic->M, 0);
+		MichaelBlockFunction(Mic->L, Mic->R);
+		Mic->nBytesInM = 0;
+	}
+
+	while (nBytes >= 4) {
+		Mic->L ^= getUInt32(src, 0);
+		MichaelBlockFunction(Mic->L, Mic->R);
+		src += 4;
+		nBytes -= 4;
+	}
+
+	if (nBytes > 0) {
+		Mic->nBytesInM = nBytes;
+		memcpy(Mic->M, src, nBytes);
+	}
+}
+
+static
+void MichaelGetMIC(struct michel_mic_t *Mic, uint8_t * dst)
+{
+	uint8_t *data = Mic->M;
+	switch (Mic->nBytesInM) {
+	case 0:
+		Mic->L ^= 0x5a;
+		break;
+	case 1:
+		Mic->L ^= data[0] | 0x5a00;
+		break;
+	case 2:
+		Mic->L ^= data[0] | (data[1] << 8) | 0x5a0000;
+		break;
+	case 3:
+		Mic->L ^= data[0] | (data[1] << 8) | (data[2] << 16) |
+		    0x5a000000;
+		break;
+	}
+	MichaelBlockFunction(Mic->L, Mic->R);
+	MichaelBlockFunction(Mic->L, Mic->R);
+	// The appendByte function has already computed the result.
+	putUInt32(dst, 0, Mic->L);
+	putUInt32(dst, 4, Mic->R);
+
+	// Reset to the empty message.
+	MichaelClear(Mic);
+}
+
+void MichaelMICFunction(struct michel_mic_t *Mic, uint8_t * Key,
+			uint8_t * Data, int Len, uint8_t priority,
+			uint8_t * Result)
+{
+	uint8_t pad_data[4] = { priority, 0, 0, 0 };
+	// Compute the MIC value
+	/*
+	 * IEEE802.11i  page 47
+	 * Figure 43g TKIP MIC processing format
+	 * +--+--+--------+--+----+--+--+--+--+--+--+--+--+
+	 * |6 |6 |1       |3 |M   |1 |1 |1 |1 |1 |1 |1 |1 | Octet
+	 * +--+--+--------+--+----+--+--+--+--+--+--+--+--+
+	 * |DA|SA|Priority|0 |Data|M0|M1|M2|M3|M4|M5|M6|M7|
+	 * +--+--+--------+--+----+--+--+--+--+--+--+--+--+
+	 */
+	MichaelInitializeFunction(Mic, Key);
+	MichaelAppend(Mic, (uint8_t *) Data, 12);	/* |DA|SA| */
+	MichaelAppend(Mic, pad_data, 4);	/* |Priority|0|0|0| */
+	MichaelAppend(Mic, (uint8_t *) (Data + 12), Len - 12);	/* |Data| */
+	MichaelGetMIC(Mic, Result);
+}
diff --git a/drivers/staging/ks7010/michael_mic.h b/drivers/staging/ks7010/michael_mic.h
new file mode 100644
index 000000000000..c7e4eb280961
--- /dev/null
+++ b/drivers/staging/ks7010/michael_mic.h
@@ -0,0 +1,26 @@
+/*
+ *   Driver for KeyStream wireless LAN
+ *
+ *   Copyright (C) 2005-2008 KeyStream Corp.
+ *   Copyright (C) 2009 Renesas Technology Corp.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License version 2 as
+ *   published by the Free Software Foundation.
+ */
+
+/* MichelMIC routine define */
+struct michel_mic_t {
+	uint32_t K0;	// Key 
+	uint32_t K1;	// Key 
+	uint32_t L;	// Current state 
+	uint32_t R;	// Current state 
+	uint8_t M[4];	// Message accumulator (single word) 
+	int nBytesInM;	// # bytes in M 
+	uint8_t Result[8];
+};
+
+extern
+void MichaelMICFunction(struct michel_mic_t *Mic, uint8_t * Key,
+			uint8_t * Data, int Len, uint8_t priority,
+			uint8_t * Result);
diff --git a/drivers/staging/lustre/include/linux/libcfs/curproc.h b/drivers/staging/lustre/include/linux/libcfs/curproc.h
index 1edfca58c1c6..be0675d8ff5e 100644
--- a/drivers/staging/lustre/include/linux/libcfs/curproc.h
+++ b/drivers/staging/lustre/include/linux/libcfs/curproc.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
index 4141afb101bb..3f6447c65042 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
index 455c54d0d17c..25adab19fd86 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
index 2e008bffc89a..d3f9a6020ee3 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
@@ -16,10 +16,6 @@
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see http://www.gnu.org/licenses
  *
- * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores,
- * CA 94065 USA or visit www.oracle.com if you need additional information or
- * have any questions.
- *
  * GPL HEADER END
  */
 /*
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
index 119986bc7961..6949a1846635 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
index 4b9102bd95d5..cce6b58e3682 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
index ac4e8cfe6c8c..8c75d5075590 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 2fd2a9690a34..4daa3823f60a 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
index e02cde5aeca1..0ee60ff336f2 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h
index 2c7ec2d28f38..008da4497bda 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
index f9b20c5accbf..a7e1340e69a1 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
index a268ef7aa19d..e8695e4a39d1 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
index 7656b09b8752..b646acd1f7e7 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-dlc.h b/drivers/staging/lustre/include/linux/lnet/lib-dlc.h
index 6ce9accb91ad..dfff17088403 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-dlc.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-dlc.h
@@ -35,7 +35,7 @@
 #define MAX_NUM_SHOW_ENTRIES	32
 #define LNET_MAX_STR_LEN	128
 #define LNET_MAX_SHOW_NUM_CPT	128
-#define LNET_UNDEFINED_HOPS	((__u32) -1)
+#define LNET_UNDEFINED_HOPS	((__u32)(-1))
 
 struct lnet_ioctl_config_lnd_cmn_tunables {
 	__u32 lct_version;
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index 24c4a08e6dc6..7967b013cbae 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -38,6 +38,7 @@
 #include <linux/kthread.h>
 #include <linux/uio.h>
 #include <linux/types.h>
+#include <linux/completion.h>
 
 #include "types.h"
 #include "lnetctl.h"
@@ -610,7 +611,7 @@ typedef struct {
 	/* rcd ready for free */
 	struct list_head		  ln_rcd_zombie;
 	/* serialise startup/shutdown */
-	struct semaphore		  ln_rc_signal;
+	struct completion		  ln_rc_signal;
 
 	struct mutex			  ln_api_mutex;
 	struct mutex			  ln_lnd_mutex;
diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h
index 1c679cb72785..e098b6c086e1 100644
--- a/drivers/staging/lustre/include/linux/lnet/types.h
+++ b/drivers/staging/lustre/include/linux/lnet/types.h
@@ -68,9 +68,9 @@ typedef __u64 lnet_nid_t;
 typedef __u32 lnet_pid_t;
 
 /** wildcard NID that matches any end-point address */
-#define LNET_NID_ANY	((lnet_nid_t) -1)
+#define LNET_NID_ANY	((lnet_nid_t)(-1))
 /** wildcard PID that matches any lnet_pid_t */
-#define LNET_PID_ANY	((lnet_pid_t) -1)
+#define LNET_PID_ANY	((lnet_pid_t)(-1))
 
 #define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
 #define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 6c59f2ff2220..4f5978b3767b 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -44,7 +40,7 @@
 
 static lnd_t the_o2iblnd;
 
-kib_data_t kiblnd_data;
+struct kib_data kiblnd_data;
 
 static __u32 kiblnd_cksum(void *ptr, int nob)
 {
@@ -98,40 +94,40 @@ static char *kiblnd_msgtype2str(int type)
 
 static int kiblnd_msgtype2size(int type)
 {
-	const int hdr_size = offsetof(kib_msg_t, ibm_u);
+	const int hdr_size = offsetof(struct kib_msg, ibm_u);
 
 	switch (type) {
 	case IBLND_MSG_CONNREQ:
 	case IBLND_MSG_CONNACK:
-		return hdr_size + sizeof(kib_connparams_t);
+		return hdr_size + sizeof(struct kib_connparams);
 
 	case IBLND_MSG_NOOP:
 		return hdr_size;
 
 	case IBLND_MSG_IMMEDIATE:
-		return offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]);
+		return offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[0]);
 
 	case IBLND_MSG_PUT_REQ:
-		return hdr_size + sizeof(kib_putreq_msg_t);
+		return hdr_size + sizeof(struct kib_putreq_msg);
 
 	case IBLND_MSG_PUT_ACK:
-		return hdr_size + sizeof(kib_putack_msg_t);
+		return hdr_size + sizeof(struct kib_putack_msg);
 
 	case IBLND_MSG_GET_REQ:
-		return hdr_size + sizeof(kib_get_msg_t);
+		return hdr_size + sizeof(struct kib_get_msg);
 
 	case IBLND_MSG_PUT_NAK:
 	case IBLND_MSG_PUT_DONE:
 	case IBLND_MSG_GET_DONE:
-		return hdr_size + sizeof(kib_completion_msg_t);
+		return hdr_size + sizeof(struct kib_completion_msg);
 	default:
 		return -1;
 	}
 }
 
-static int kiblnd_unpack_rd(kib_msg_t *msg, int flip)
+static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
 {
-	kib_rdma_desc_t *rd;
+	struct kib_rdma_desc *rd;
 	int nob;
 	int n;
 	int i;
@@ -156,7 +152,7 @@ static int kiblnd_unpack_rd(kib_msg_t *msg, int flip)
 		return 1;
 	}
 
-	nob = offsetof(kib_msg_t, ibm_u) +
+	nob = offsetof(struct kib_msg, ibm_u) +
 	      kiblnd_rd_msg_size(rd, msg->ibm_type, n);
 
 	if (msg->ibm_nob < nob) {
@@ -176,10 +172,10 @@ static int kiblnd_unpack_rd(kib_msg_t *msg, int flip)
 	return 0;
 }
 
-void kiblnd_pack_msg(lnet_ni_t *ni, kib_msg_t *msg, int version,
+void kiblnd_pack_msg(lnet_ni_t *ni, struct kib_msg *msg, int version,
 		     int credits, lnet_nid_t dstnid, __u64 dststamp)
 {
-	kib_net_t *net = ni->ni_data;
+	struct kib_net *net = ni->ni_data;
 
 	/*
 	 * CAVEAT EMPTOR! all message fields not set here should have been
@@ -202,9 +198,9 @@ void kiblnd_pack_msg(lnet_ni_t *ni, kib_msg_t *msg, int version,
 	}
 }
 
-int kiblnd_unpack_msg(kib_msg_t *msg, int nob)
+int kiblnd_unpack_msg(struct kib_msg *msg, int nob)
 {
-	const int hdr_size = offsetof(kib_msg_t, ibm_u);
+	const int hdr_size = offsetof(struct kib_msg, ibm_u);
 	__u32 msg_cksum;
 	__u16 version;
 	int msg_nob;
@@ -315,10 +311,10 @@ int kiblnd_unpack_msg(kib_msg_t *msg, int nob)
 	return 0;
 }
 
-int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
+int kiblnd_create_peer(lnet_ni_t *ni, struct kib_peer **peerp, lnet_nid_t nid)
 {
-	kib_peer_t *peer;
-	kib_net_t *net = ni->ni_data;
+	struct kib_peer *peer;
+	struct kib_net *net = ni->ni_data;
 	int cpt = lnet_cpt_of_nid(nid);
 	unsigned long flags;
 
@@ -357,9 +353,9 @@ int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
 	return 0;
 }
 
-void kiblnd_destroy_peer(kib_peer_t *peer)
+void kiblnd_destroy_peer(struct kib_peer *peer)
 {
-	kib_net_t *net = peer->ibp_ni->ni_data;
+	struct kib_net *net = peer->ibp_ni->ni_data;
 
 	LASSERT(net);
 	LASSERT(!atomic_read(&peer->ibp_refcount));
@@ -378,7 +374,7 @@ void kiblnd_destroy_peer(kib_peer_t *peer)
 	atomic_dec(&net->ibn_npeers);
 }
 
-kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid)
+struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid)
 {
 	/*
 	 * the caller is responsible for accounting the additional reference
@@ -386,10 +382,10 @@ kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid)
 	 */
 	struct list_head *peer_list = kiblnd_nid2peerlist(nid);
 	struct list_head *tmp;
-	kib_peer_t *peer;
+	struct kib_peer *peer;
 
 	list_for_each(tmp, peer_list) {
-		peer = list_entry(tmp, kib_peer_t, ibp_list);
+		peer = list_entry(tmp, struct kib_peer, ibp_list);
 		LASSERT(!kiblnd_peer_idle(peer));
 
 		if (peer->ibp_nid != nid)
@@ -404,7 +400,7 @@ kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid)
 	return NULL;
 }
 
-void kiblnd_unlink_peer_locked(kib_peer_t *peer)
+void kiblnd_unlink_peer_locked(struct kib_peer *peer)
 {
 	LASSERT(list_empty(&peer->ibp_conns));
 
@@ -417,7 +413,7 @@ void kiblnd_unlink_peer_locked(kib_peer_t *peer)
 static int kiblnd_get_peer_info(lnet_ni_t *ni, int index,
 				lnet_nid_t *nidp, int *count)
 {
-	kib_peer_t *peer;
+	struct kib_peer *peer;
 	struct list_head *ptmp;
 	int i;
 	unsigned long flags;
@@ -426,7 +422,7 @@ static int kiblnd_get_peer_info(lnet_ni_t *ni, int index,
 
 	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
 		list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
-			peer = list_entry(ptmp, kib_peer_t, ibp_list);
+			peer = list_entry(ptmp, struct kib_peer, ibp_list);
 			LASSERT(!kiblnd_peer_idle(peer));
 
 			if (peer->ibp_ni != ni)
@@ -448,17 +444,17 @@ static int kiblnd_get_peer_info(lnet_ni_t *ni, int index,
 	return -ENOENT;
 }
 
-static void kiblnd_del_peer_locked(kib_peer_t *peer)
+static void kiblnd_del_peer_locked(struct kib_peer *peer)
 {
 	struct list_head *ctmp;
 	struct list_head *cnxt;
-	kib_conn_t *conn;
+	struct kib_conn *conn;
 
 	if (list_empty(&peer->ibp_conns)) {
 		kiblnd_unlink_peer_locked(peer);
 	} else {
 		list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
-			conn = list_entry(ctmp, kib_conn_t, ibc_list);
+			conn = list_entry(ctmp, struct kib_conn, ibc_list);
 
 			kiblnd_close_conn_locked(conn, 0);
 		}
@@ -475,7 +471,7 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid)
 	LIST_HEAD(zombies);
 	struct list_head *ptmp;
 	struct list_head *pnxt;
-	kib_peer_t *peer;
+	struct kib_peer *peer;
 	int lo;
 	int hi;
 	int i;
@@ -494,7 +490,7 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid)
 
 	for (i = lo; i <= hi; i++) {
 		list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
-			peer = list_entry(ptmp, kib_peer_t, ibp_list);
+			peer = list_entry(ptmp, struct kib_peer, ibp_list);
 			LASSERT(!kiblnd_peer_idle(peer));
 
 			if (peer->ibp_ni != ni)
@@ -522,11 +518,11 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid)
 	return rc;
 }
 
-static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index)
+static struct kib_conn *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index)
 {
-	kib_peer_t *peer;
+	struct kib_peer *peer;
 	struct list_head *ptmp;
-	kib_conn_t *conn;
+	struct kib_conn *conn;
 	struct list_head *ctmp;
 	int i;
 	unsigned long flags;
@@ -535,7 +531,7 @@ static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index)
 
 	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
 		list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
-			peer = list_entry(ptmp, kib_peer_t, ibp_list);
+			peer = list_entry(ptmp, struct kib_peer, ibp_list);
 			LASSERT(!kiblnd_peer_idle(peer));
 
 			if (peer->ibp_ni != ni)
@@ -545,7 +541,7 @@ static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index)
 				if (index-- > 0)
 					continue;
 
-				conn = list_entry(ctmp, kib_conn_t,
+				conn = list_entry(ctmp, struct kib_conn,
 						  ibc_list);
 				kiblnd_conn_addref(conn);
 				read_unlock_irqrestore(
@@ -594,7 +590,7 @@ static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
 		cmid->route.path_rec->mtu = mtu;
 }
 
-static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt)
+static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
 {
 	cpumask_t *mask;
 	int vectors;
@@ -621,7 +617,7 @@ static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt)
 	return 1;
 }
 
-kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
+struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
 			       int state, int version)
 {
 	/*
@@ -634,12 +630,12 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 	 * its ref on 'cmid').
 	 */
 	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	kib_net_t *net = peer->ibp_ni->ni_data;
-	kib_dev_t *dev;
+	struct kib_net *net = peer->ibp_ni->ni_data;
+	struct kib_dev *dev;
 	struct ib_qp_init_attr *init_qp_attr;
 	struct kib_sched_info *sched;
 	struct ib_cq_init_attr cq_attr = {};
-	kib_conn_t *conn;
+	struct kib_conn *conn;
 	struct ib_cq *cq;
 	unsigned long flags;
 	int cpt;
@@ -723,7 +719,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 	write_unlock_irqrestore(glock, flags);
 
 	LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt,
-			 IBLND_RX_MSGS(conn) * sizeof(kib_rx_t));
+			 IBLND_RX_MSGS(conn) * sizeof(struct kib_rx));
 	if (!conn->ibc_rxs) {
 		CERROR("Cannot allocate RX buffers\n");
 		goto failed_2;
@@ -833,10 +829,10 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 	return NULL;
 }
 
-void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn)
+void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn)
 {
 	struct rdma_cm_id *cmid = conn->ibc_cmid;
-	kib_peer_t *peer = conn->ibc_peer;
+	struct kib_peer *peer = conn->ibc_peer;
 	int rc;
 
 	LASSERT(!in_interrupt());
@@ -879,7 +875,7 @@ void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn)
 
 	if (conn->ibc_rxs) {
 		LIBCFS_FREE(conn->ibc_rxs,
-			    IBLND_RX_MSGS(conn) * sizeof(kib_rx_t));
+			    IBLND_RX_MSGS(conn) * sizeof(struct kib_rx));
 	}
 
 	if (conn->ibc_connvars)
@@ -890,7 +886,7 @@ void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn)
 
 	/* See CAVEAT EMPTOR above in kiblnd_create_conn */
 	if (conn->ibc_state != IBLND_CONN_INIT) {
-		kib_net_t *net = peer->ibp_ni->ni_data;
+		struct kib_net *net = peer->ibp_ni->ni_data;
 
 		kiblnd_peer_decref(peer);
 		rdma_destroy_id(cmid);
@@ -900,15 +896,15 @@ void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn)
 	LIBCFS_FREE(conn, sizeof(*conn));
 }
 
-int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why)
+int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why)
 {
-	kib_conn_t *conn;
+	struct kib_conn *conn;
 	struct list_head *ctmp;
 	struct list_head *cnxt;
 	int count = 0;
 
 	list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
-		conn = list_entry(ctmp, kib_conn_t, ibc_list);
+		conn = list_entry(ctmp, struct kib_conn, ibc_list);
 
 		CDEBUG(D_NET, "Closing conn -> %s, version: %x, reason: %d\n",
 		       libcfs_nid2str(peer->ibp_nid),
@@ -921,16 +917,16 @@ int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why)
 	return count;
 }
 
-int kiblnd_close_stale_conns_locked(kib_peer_t *peer,
+int kiblnd_close_stale_conns_locked(struct kib_peer *peer,
 				    int version, __u64 incarnation)
 {
-	kib_conn_t *conn;
+	struct kib_conn *conn;
 	struct list_head *ctmp;
 	struct list_head *cnxt;
 	int count = 0;
 
 	list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
-		conn = list_entry(ctmp, kib_conn_t, ibc_list);
+		conn = list_entry(ctmp, struct kib_conn, ibc_list);
 
 		if (conn->ibc_version     == version &&
 		    conn->ibc_incarnation == incarnation)
@@ -951,7 +947,7 @@ int kiblnd_close_stale_conns_locked(kib_peer_t *peer,
 
 static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid)
 {
-	kib_peer_t *peer;
+	struct kib_peer *peer;
 	struct list_head *ptmp;
 	struct list_head *pnxt;
 	int lo;
@@ -972,7 +968,7 @@ static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid)
 
 	for (i = lo; i <= hi; i++) {
 		list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
-			peer = list_entry(ptmp, kib_peer_t, ibp_list);
+			peer = list_entry(ptmp, struct kib_peer, ibp_list);
 			LASSERT(!kiblnd_peer_idle(peer));
 
 			if (peer->ibp_ni != ni)
@@ -1016,7 +1012,7 @@ static int kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
 		break;
 	}
 	case IOC_LIBCFS_GET_CONN: {
-		kib_conn_t *conn;
+		struct kib_conn *conn;
 
 		rc = 0;
 		conn = kiblnd_get_conn_by_idx(ni, data->ioc_count);
@@ -1052,7 +1048,7 @@ static void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
 	unsigned long last_alive = 0;
 	unsigned long now = cfs_time_current();
 	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	kib_peer_t *peer;
+	struct kib_peer *peer;
 	unsigned long flags;
 
 	read_lock_irqsave(glock, flags);
@@ -1078,7 +1074,7 @@ static void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
 	       last_alive ? cfs_duration_sec(now - last_alive) : -1);
 }
 
-static void kiblnd_free_pages(kib_pages_t *p)
+static void kiblnd_free_pages(struct kib_pages *p)
 {
 	int npages = p->ibp_npages;
 	int i;
@@ -1088,22 +1084,22 @@ static void kiblnd_free_pages(kib_pages_t *p)
 			__free_page(p->ibp_pages[i]);
 	}
 
-	LIBCFS_FREE(p, offsetof(kib_pages_t, ibp_pages[npages]));
+	LIBCFS_FREE(p, offsetof(struct kib_pages, ibp_pages[npages]));
 }
 
-int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages)
+int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages)
 {
-	kib_pages_t *p;
+	struct kib_pages *p;
 	int i;
 
 	LIBCFS_CPT_ALLOC(p, lnet_cpt_table(), cpt,
-			 offsetof(kib_pages_t, ibp_pages[npages]));
+			 offsetof(struct kib_pages, ibp_pages[npages]));
 	if (!p) {
 		CERROR("Can't allocate descriptor for %d pages\n", npages);
 		return -ENOMEM;
 	}
 
-	memset(p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
+	memset(p, 0, offsetof(struct kib_pages, ibp_pages[npages]));
 	p->ibp_npages = npages;
 
 	for (i = 0; i < npages; i++) {
@@ -1121,9 +1117,9 @@ int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages)
 	return 0;
 }
 
-void kiblnd_unmap_rx_descs(kib_conn_t *conn)
+void kiblnd_unmap_rx_descs(struct kib_conn *conn)
 {
-	kib_rx_t *rx;
+	struct kib_rx *rx;
 	int i;
 
 	LASSERT(conn->ibc_rxs);
@@ -1145,9 +1141,9 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn)
 	conn->ibc_rx_pages = NULL;
 }
 
-void kiblnd_map_rx_descs(kib_conn_t *conn)
+void kiblnd_map_rx_descs(struct kib_conn *conn)
 {
-	kib_rx_t *rx;
+	struct kib_rx *rx;
 	struct page *pg;
 	int pg_off;
 	int ipg;
@@ -1158,7 +1154,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn)
 		rx = &conn->ibc_rxs[i];
 
 		rx->rx_conn = conn;
-		rx->rx_msg = (kib_msg_t *)(((char *)page_address(pg)) + pg_off);
+		rx->rx_msg = (struct kib_msg *)(((char *)page_address(pg)) + pg_off);
 
 		rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev,
 						       rx->rx_msg,
@@ -1183,10 +1179,10 @@ void kiblnd_map_rx_descs(kib_conn_t *conn)
 	}
 }
 
-static void kiblnd_unmap_tx_pool(kib_tx_pool_t *tpo)
+static void kiblnd_unmap_tx_pool(struct kib_tx_pool *tpo)
 {
-	kib_hca_dev_t *hdev = tpo->tpo_hdev;
-	kib_tx_t *tx;
+	struct kib_hca_dev *hdev = tpo->tpo_hdev;
+	struct kib_tx *tx;
 	int i;
 
 	LASSERT(!tpo->tpo_pool.po_allocated);
@@ -1206,9 +1202,9 @@ static void kiblnd_unmap_tx_pool(kib_tx_pool_t *tpo)
 	tpo->tpo_hdev = NULL;
 }
 
-static kib_hca_dev_t *kiblnd_current_hdev(kib_dev_t *dev)
+static struct kib_hca_dev *kiblnd_current_hdev(struct kib_dev *dev)
 {
-	kib_hca_dev_t *hdev;
+	struct kib_hca_dev *hdev;
 	unsigned long flags;
 	int i = 0;
 
@@ -1232,14 +1228,14 @@ static kib_hca_dev_t *kiblnd_current_hdev(kib_dev_t *dev)
 	return hdev;
 }
 
-static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
+static void kiblnd_map_tx_pool(struct kib_tx_pool *tpo)
 {
-	kib_pages_t *txpgs = tpo->tpo_tx_pages;
-	kib_pool_t *pool = &tpo->tpo_pool;
-	kib_net_t *net = pool->po_owner->ps_net;
-	kib_dev_t *dev;
+	struct kib_pages *txpgs = tpo->tpo_tx_pages;
+	struct kib_pool *pool = &tpo->tpo_pool;
+	struct kib_net *net = pool->po_owner->ps_net;
+	struct kib_dev *dev;
 	struct page *page;
-	kib_tx_t *tx;
+	struct kib_tx *tx;
 	int page_offset;
 	int ipage;
 	int i;
@@ -1260,7 +1256,7 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
 		page = txpgs->ibp_pages[ipage];
 		tx = &tpo->tpo_tx_descs[i];
 
-		tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
+		tx->tx_msg = (struct kib_msg *)(((char *)page_address(page)) +
 					   page_offset);
 
 		tx->tx_msgaddr = kiblnd_dma_map_single(
@@ -1283,11 +1279,11 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
 	}
 }
 
-struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
+struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, struct kib_rdma_desc *rd,
 				    int negotiated_nfrags)
 {
-	kib_net_t *net = ni->ni_data;
-	kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev;
+	struct kib_net *net = ni->ni_data;
+	struct kib_hca_dev *hdev = net->ibn_dev->ibd_hdev;
 	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
 	__u16 nfrags;
 	int mod;
@@ -1304,7 +1300,7 @@ struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
 	return hdev->ibh_mrs;
 }
 
-static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo)
+static void kiblnd_destroy_fmr_pool(struct kib_fmr_pool *fpo)
 {
 	LASSERT(!fpo->fpo_map_count);
 
@@ -1335,7 +1331,7 @@ static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo)
 
 static void kiblnd_destroy_fmr_pool_list(struct list_head *head)
 {
-	kib_fmr_pool_t *fpo, *tmp;
+	struct kib_fmr_pool *fpo, *tmp;
 
 	list_for_each_entry_safe(fpo, tmp, head, fpo_list) {
 		list_del(&fpo->fpo_list);
@@ -1361,7 +1357,7 @@ kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
 	return max(IBLND_FMR_POOL_FLUSH, size);
 }
 
-static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
+static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
 {
 	struct ib_fmr_pool_param param = {
 		.max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE,
@@ -1388,7 +1384,7 @@ static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
 	return rc;
 }
 
-static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
+static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
 {
 	struct kib_fast_reg_descriptor *frd, *tmp;
 	int i, rc;
@@ -1438,12 +1434,12 @@ out:
 	return rc;
 }
 
-static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps,
-				  kib_fmr_pool_t **pp_fpo)
+static int kiblnd_create_fmr_pool(struct kib_fmr_poolset *fps,
+				  struct kib_fmr_pool **pp_fpo)
 {
-	kib_dev_t *dev = fps->fps_net->ibn_dev;
+	struct kib_dev *dev = fps->fps_net->ibn_dev;
 	struct ib_device_attr *dev_attr;
-	kib_fmr_pool_t *fpo;
+	struct kib_fmr_pool *fpo;
 	int rc;
 
 	LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo));
@@ -1488,7 +1484,7 @@ out_fpo:
 	return rc;
 }
 
-static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps,
+static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps,
 				    struct list_head *zombies)
 {
 	if (!fps->fps_net) /* intialized? */
@@ -1497,8 +1493,8 @@ static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps,
 	spin_lock(&fps->fps_lock);
 
 	while (!list_empty(&fps->fps_pool_list)) {
-		kib_fmr_pool_t *fpo = list_entry(fps->fps_pool_list.next,
-						 kib_fmr_pool_t, fpo_list);
+		struct kib_fmr_pool *fpo = list_entry(fps->fps_pool_list.next,
+						 struct kib_fmr_pool, fpo_list);
 		fpo->fpo_failed = 1;
 		list_del(&fpo->fpo_list);
 		if (!fpo->fpo_map_count)
@@ -1510,7 +1506,7 @@ static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps,
 	spin_unlock(&fps->fps_lock);
 }
 
-static void kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps)
+static void kiblnd_fini_fmr_poolset(struct kib_fmr_poolset *fps)
 {
 	if (fps->fps_net) { /* initialized? */
 		kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list);
@@ -1519,11 +1515,11 @@ static void kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps)
 }
 
 static int
-kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts,
-			kib_net_t *net,
+kiblnd_init_fmr_poolset(struct kib_fmr_poolset *fps, int cpt, int ncpts,
+			struct kib_net *net,
 			struct lnet_ioctl_config_o2iblnd_tunables *tunables)
 {
-	kib_fmr_pool_t *fpo;
+	struct kib_fmr_pool *fpo;
 	int rc;
 
 	memset(fps, 0, sizeof(*fps));
@@ -1546,7 +1542,7 @@ kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts,
 	return rc;
 }
 
-static int kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, unsigned long now)
+static int kiblnd_fmr_pool_is_idle(struct kib_fmr_pool *fpo, unsigned long now)
 {
 	if (fpo->fpo_map_count) /* still in use */
 		return 0;
@@ -1556,10 +1552,10 @@ static int kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, unsigned long now)
 }
 
 static int
-kiblnd_map_tx_pages(kib_tx_t *tx, kib_rdma_desc_t *rd)
+kiblnd_map_tx_pages(struct kib_tx *tx, struct kib_rdma_desc *rd)
 {
 	__u64 *pages = tx->tx_pages;
-	kib_hca_dev_t *hdev;
+	struct kib_hca_dev *hdev;
 	int npages;
 	int size;
 	int i;
@@ -1577,13 +1573,13 @@ kiblnd_map_tx_pages(kib_tx_t *tx, kib_rdma_desc_t *rd)
 	return npages;
 }
 
-void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
+void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status)
 {
 	LIST_HEAD(zombies);
-	kib_fmr_pool_t *fpo = fmr->fmr_pool;
-	kib_fmr_poolset_t *fps;
+	struct kib_fmr_pool *fpo = fmr->fmr_pool;
+	struct kib_fmr_poolset *fps;
 	unsigned long now = cfs_time_current();
-	kib_fmr_pool_t *tmp;
+	struct kib_fmr_pool *tmp;
 	int rc;
 
 	if (!fpo)
@@ -1633,14 +1629,14 @@ void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
 		kiblnd_destroy_fmr_pool_list(&zombies);
 }
 
-int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
-			kib_rdma_desc_t *rd, __u32 nob, __u64 iov,
-			kib_fmr_t *fmr)
+int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
+			struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
+			struct kib_fmr *fmr)
 {
 	__u64 *pages = tx->tx_pages;
 	bool is_rx = (rd != tx->tx_rd);
         bool tx_pages_mapped = 0;
-	kib_fmr_pool_t *fpo;
+	struct kib_fmr_pool *fpo;
 	int npages = 0;
 	__u64 version;
 	int rc;
@@ -1780,7 +1776,7 @@ int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
 	goto again;
 }
 
-static void kiblnd_fini_pool(kib_pool_t *pool)
+static void kiblnd_fini_pool(struct kib_pool *pool)
 {
 	LASSERT(list_empty(&pool->po_free_list));
 	LASSERT(!pool->po_allocated);
@@ -1788,7 +1784,7 @@ static void kiblnd_fini_pool(kib_pool_t *pool)
 	CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name);
 }
 
-static void kiblnd_init_pool(kib_poolset_t *ps, kib_pool_t *pool, int size)
+static void kiblnd_init_pool(struct kib_poolset *ps, struct kib_pool *pool, int size)
 {
 	CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name);
 
@@ -1801,10 +1797,10 @@ static void kiblnd_init_pool(kib_poolset_t *ps, kib_pool_t *pool, int size)
 
 static void kiblnd_destroy_pool_list(struct list_head *head)
 {
-	kib_pool_t *pool;
+	struct kib_pool *pool;
 
 	while (!list_empty(head)) {
-		pool = list_entry(head->next, kib_pool_t, po_list);
+		pool = list_entry(head->next, struct kib_pool, po_list);
 		list_del(&pool->po_list);
 
 		LASSERT(pool->po_owner);
@@ -1812,15 +1808,15 @@ static void kiblnd_destroy_pool_list(struct list_head *head)
 	}
 }
 
-static void kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies)
+static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies)
 {
 	if (!ps->ps_net) /* intialized? */
 		return;
 
 	spin_lock(&ps->ps_lock);
 	while (!list_empty(&ps->ps_pool_list)) {
-		kib_pool_t *po = list_entry(ps->ps_pool_list.next,
-					    kib_pool_t, po_list);
+		struct kib_pool *po = list_entry(ps->ps_pool_list.next,
+					    struct kib_pool, po_list);
 		po->po_failed = 1;
 		list_del(&po->po_list);
 		if (!po->po_allocated)
@@ -1831,7 +1827,7 @@ static void kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies)
 	spin_unlock(&ps->ps_lock);
 }
 
-static void kiblnd_fini_poolset(kib_poolset_t *ps)
+static void kiblnd_fini_poolset(struct kib_poolset *ps)
 {
 	if (ps->ps_net) { /* initialized? */
 		kiblnd_destroy_pool_list(&ps->ps_failed_pool_list);
@@ -1839,14 +1835,14 @@ static void kiblnd_fini_poolset(kib_poolset_t *ps)
 	}
 }
 
-static int kiblnd_init_poolset(kib_poolset_t *ps, int cpt,
-			       kib_net_t *net, char *name, int size,
+static int kiblnd_init_poolset(struct kib_poolset *ps, int cpt,
+			       struct kib_net *net, char *name, int size,
 			       kib_ps_pool_create_t po_create,
 			       kib_ps_pool_destroy_t po_destroy,
 			       kib_ps_node_init_t nd_init,
 			       kib_ps_node_fini_t nd_fini)
 {
-	kib_pool_t *pool;
+	struct kib_pool *pool;
 	int rc;
 
 	memset(ps, 0, sizeof(*ps));
@@ -1874,7 +1870,7 @@ static int kiblnd_init_poolset(kib_poolset_t *ps, int cpt,
 	return rc;
 }
 
-static int kiblnd_pool_is_idle(kib_pool_t *pool, unsigned long now)
+static int kiblnd_pool_is_idle(struct kib_pool *pool, unsigned long now)
 {
 	if (pool->po_allocated) /* still in use */
 		return 0;
@@ -1883,11 +1879,11 @@ static int kiblnd_pool_is_idle(kib_pool_t *pool, unsigned long now)
 	return cfs_time_aftereq(now, pool->po_deadline);
 }
 
-void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node)
+void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node)
 {
 	LIST_HEAD(zombies);
-	kib_poolset_t *ps = pool->po_owner;
-	kib_pool_t *tmp;
+	struct kib_poolset *ps = pool->po_owner;
+	struct kib_pool *tmp;
 	unsigned long now = cfs_time_current();
 
 	spin_lock(&ps->ps_lock);
@@ -1913,10 +1909,10 @@ void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node)
 		kiblnd_destroy_pool_list(&zombies);
 }
 
-struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps)
+struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps)
 {
 	struct list_head *node;
-	kib_pool_t *pool;
+	struct kib_pool *pool;
 	unsigned int interval = 1;
 	unsigned long time_before;
 	unsigned int trips = 0;
@@ -1986,9 +1982,9 @@ struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps)
 	goto again;
 }
 
-static void kiblnd_destroy_tx_pool(kib_pool_t *pool)
+static void kiblnd_destroy_tx_pool(struct kib_pool *pool)
 {
-	kib_tx_pool_t *tpo = container_of(pool, kib_tx_pool_t, tpo_pool);
+	struct kib_tx_pool *tpo = container_of(pool, struct kib_tx_pool, tpo_pool);
 	int i;
 
 	LASSERT(!pool->po_allocated);
@@ -2002,7 +1998,7 @@ static void kiblnd_destroy_tx_pool(kib_pool_t *pool)
 		goto out;
 
 	for (i = 0; i < pool->po_size; i++) {
-		kib_tx_t *tx = &tpo->tpo_tx_descs[i];
+		struct kib_tx *tx = &tpo->tpo_tx_descs[i];
 
 		list_del(&tx->tx_list);
 		if (tx->tx_pages)
@@ -2011,8 +2007,8 @@ static void kiblnd_destroy_tx_pool(kib_pool_t *pool)
 				    sizeof(*tx->tx_pages));
 		if (tx->tx_frags)
 			LIBCFS_FREE(tx->tx_frags,
-				    IBLND_MAX_RDMA_FRAGS *
-					    sizeof(*tx->tx_frags));
+				    (1 + IBLND_MAX_RDMA_FRAGS) *
+				     sizeof(*tx->tx_frags));
 		if (tx->tx_wrq)
 			LIBCFS_FREE(tx->tx_wrq,
 				    (1 + IBLND_MAX_RDMA_FRAGS) *
@@ -2023,12 +2019,12 @@ static void kiblnd_destroy_tx_pool(kib_pool_t *pool)
 				    sizeof(*tx->tx_sge));
 		if (tx->tx_rd)
 			LIBCFS_FREE(tx->tx_rd,
-				    offsetof(kib_rdma_desc_t,
+				    offsetof(struct kib_rdma_desc,
 					     rd_frags[IBLND_MAX_RDMA_FRAGS]));
 	}
 
 	LIBCFS_FREE(tpo->tpo_tx_descs,
-		    pool->po_size * sizeof(kib_tx_t));
+		    pool->po_size * sizeof(struct kib_tx));
 out:
 	kiblnd_fini_pool(pool);
 	LIBCFS_FREE(tpo, sizeof(*tpo));
@@ -2041,13 +2037,13 @@ static int kiblnd_tx_pool_size(int ncpts)
 	return max(IBLND_TX_POOL, ntx);
 }
 
-static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size,
-				 kib_pool_t **pp_po)
+static int kiblnd_create_tx_pool(struct kib_poolset *ps, int size,
+				 struct kib_pool **pp_po)
 {
 	int i;
 	int npg;
-	kib_pool_t *pool;
-	kib_tx_pool_t *tpo;
+	struct kib_pool *pool;
+	struct kib_tx_pool *tpo;
 
 	LIBCFS_CPT_ALLOC(tpo, lnet_cpt_table(), ps->ps_cpt, sizeof(*tpo));
 	if (!tpo) {
@@ -2068,17 +2064,17 @@ static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size,
 	}
 
 	LIBCFS_CPT_ALLOC(tpo->tpo_tx_descs, lnet_cpt_table(), ps->ps_cpt,
-			 size * sizeof(kib_tx_t));
+			 size * sizeof(struct kib_tx));
 	if (!tpo->tpo_tx_descs) {
 		CERROR("Can't allocate %d tx descriptors\n", size);
 		ps->ps_pool_destroy(pool);
 		return -ENOMEM;
 	}
 
-	memset(tpo->tpo_tx_descs, 0, size * sizeof(kib_tx_t));
+	memset(tpo->tpo_tx_descs, 0, size * sizeof(struct kib_tx));
 
 	for (i = 0; i < size; i++) {
-		kib_tx_t *tx = &tpo->tpo_tx_descs[i];
+		struct kib_tx *tx = &tpo->tpo_tx_descs[i];
 
 		tx->tx_pool = tpo;
 		if (ps->ps_net->ibn_fmr_ps) {
@@ -2090,11 +2086,12 @@ static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size,
 		}
 
 		LIBCFS_CPT_ALLOC(tx->tx_frags, lnet_cpt_table(), ps->ps_cpt,
-				 IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_frags));
+				 (1 + IBLND_MAX_RDMA_FRAGS) *
+				 sizeof(*tx->tx_frags));
 		if (!tx->tx_frags)
 			break;
 
-		sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS);
+		sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1);
 
 		LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt,
 				 (1 + IBLND_MAX_RDMA_FRAGS) *
@@ -2109,7 +2106,7 @@ static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size,
 			break;
 
 		LIBCFS_CPT_ALLOC(tx->tx_rd, lnet_cpt_table(), ps->ps_cpt,
-				 offsetof(kib_rdma_desc_t,
+				 offsetof(struct kib_rdma_desc,
 					  rd_frags[IBLND_MAX_RDMA_FRAGS]));
 		if (!tx->tx_rd)
 			break;
@@ -2125,22 +2122,23 @@ static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size,
 	return -ENOMEM;
 }
 
-static void kiblnd_tx_init(kib_pool_t *pool, struct list_head *node)
+static void kiblnd_tx_init(struct kib_pool *pool, struct list_head *node)
 {
-	kib_tx_poolset_t *tps = container_of(pool->po_owner, kib_tx_poolset_t,
-					     tps_poolset);
-	kib_tx_t *tx  = list_entry(node, kib_tx_t, tx_list);
+	struct kib_tx_poolset *tps = container_of(pool->po_owner,
+						  struct kib_tx_poolset,
+						  tps_poolset);
+	struct kib_tx *tx = list_entry(node, struct kib_tx, tx_list);
 
 	tx->tx_cookie = tps->tps_next_tx_cookie++;
 }
 
-static void kiblnd_net_fini_pools(kib_net_t *net)
+static void kiblnd_net_fini_pools(struct kib_net *net)
 {
 	int i;
 
 	cfs_cpt_for_each(i, lnet_cpt_table()) {
-		kib_tx_poolset_t *tps;
-		kib_fmr_poolset_t *fps;
+		struct kib_tx_poolset *tps;
+		struct kib_fmr_poolset *fps;
 
 		if (net->ibn_tx_ps) {
 			tps = net->ibn_tx_ps[i];
@@ -2164,7 +2162,7 @@ static void kiblnd_net_fini_pools(kib_net_t *net)
 	}
 }
 
-static int kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts,
+static int kiblnd_net_init_pools(struct kib_net *net, lnet_ni_t *ni, __u32 *cpts,
 				 int ncpts)
 {
 	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
@@ -2206,7 +2204,7 @@ static int kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts,
 	 * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt].
 	 */
 	net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
-					   sizeof(kib_fmr_poolset_t));
+					   sizeof(struct kib_fmr_poolset));
 	if (!net->ibn_fmr_ps) {
 		CERROR("Failed to allocate FMR pool array\n");
 		rc = -ENOMEM;
@@ -2234,7 +2232,7 @@ static int kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts,
 	 * number of CPTs that exist, i.e net->ibn_tx_ps[cpt].
 	 */
 	net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
-					  sizeof(kib_tx_poolset_t));
+					  sizeof(struct kib_tx_poolset));
 	if (!net->ibn_tx_ps) {
 		CERROR("Failed to allocate tx pool array\n");
 		rc = -ENOMEM;
@@ -2263,7 +2261,7 @@ static int kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts,
 	return rc;
 }
 
-static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
+static int kiblnd_hdev_get_attr(struct kib_hca_dev *hdev)
 {
 	/*
 	 * It's safe to assume a HCA can handle a page size
@@ -2283,7 +2281,7 @@ static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
 	return -EINVAL;
 }
 
-static void kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev)
+static void kiblnd_hdev_cleanup_mrs(struct kib_hca_dev *hdev)
 {
 	if (!hdev->ibh_mrs)
 		return;
@@ -2293,7 +2291,7 @@ static void kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev)
 	hdev->ibh_mrs = NULL;
 }
 
-void kiblnd_hdev_destroy(kib_hca_dev_t *hdev)
+void kiblnd_hdev_destroy(struct kib_hca_dev *hdev)
 {
 	kiblnd_hdev_cleanup_mrs(hdev);
 
@@ -2306,7 +2304,7 @@ void kiblnd_hdev_destroy(kib_hca_dev_t *hdev)
 	LIBCFS_FREE(hdev, sizeof(*hdev));
 }
 
-static int kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev)
+static int kiblnd_hdev_setup_mrs(struct kib_hca_dev *hdev)
 {
 	struct ib_mr *mr;
 	int rc;
@@ -2335,7 +2333,7 @@ static int kiblnd_dummy_callback(struct rdma_cm_id *cmid,
 	return 0;
 }
 
-static int kiblnd_dev_need_failover(kib_dev_t *dev)
+static int kiblnd_dev_need_failover(struct kib_dev *dev)
 {
 	struct rdma_cm_id *cmid;
 	struct sockaddr_in srcaddr;
@@ -2389,15 +2387,15 @@ static int kiblnd_dev_need_failover(kib_dev_t *dev)
 	return rc;
 }
 
-int kiblnd_dev_failover(kib_dev_t *dev)
+int kiblnd_dev_failover(struct kib_dev *dev)
 {
 	LIST_HEAD(zombie_tpo);
 	LIST_HEAD(zombie_ppo);
 	LIST_HEAD(zombie_fpo);
 	struct rdma_cm_id *cmid  = NULL;
-	kib_hca_dev_t *hdev  = NULL;
+	struct kib_hca_dev *hdev  = NULL;
 	struct ib_pd *pd;
-	kib_net_t *net;
+	struct kib_net *net;
 	struct sockaddr_in addr;
 	unsigned long flags;
 	int rc = 0;
@@ -2522,7 +2520,7 @@ int kiblnd_dev_failover(kib_dev_t *dev)
 	return rc;
 }
 
-void kiblnd_destroy_dev(kib_dev_t *dev)
+void kiblnd_destroy_dev(struct kib_dev *dev)
 {
 	LASSERT(!dev->ibd_nnets);
 	LASSERT(list_empty(&dev->ibd_nets));
@@ -2536,10 +2534,10 @@ void kiblnd_destroy_dev(kib_dev_t *dev)
 	LIBCFS_FREE(dev, sizeof(*dev));
 }
 
-static kib_dev_t *kiblnd_create_dev(char *ifname)
+static struct kib_dev *kiblnd_create_dev(char *ifname)
 {
 	struct net_device *netdev;
-	kib_dev_t *dev;
+	struct kib_dev *dev;
 	__u32 netmask;
 	__u32 ip;
 	int up;
@@ -2654,7 +2652,7 @@ static void kiblnd_base_shutdown(void)
 
 static void kiblnd_shutdown(lnet_ni_t *ni)
 {
-	kib_net_t *net = ni->ni_data;
+	struct kib_net *net = ni->ni_data;
 	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
 	int i;
 	unsigned long flags;
@@ -2851,7 +2849,7 @@ static int kiblnd_start_schedulers(struct kib_sched_info *sched)
 	return rc;
 }
 
-static int kiblnd_dev_start_threads(kib_dev_t *dev, int newdev, __u32 *cpts,
+static int kiblnd_dev_start_threads(struct kib_dev *dev, int newdev, __u32 *cpts,
 				    int ncpts)
 {
 	int cpt;
@@ -2877,10 +2875,10 @@ static int kiblnd_dev_start_threads(kib_dev_t *dev, int newdev, __u32 *cpts,
 	return 0;
 }
 
-static kib_dev_t *kiblnd_dev_search(char *ifname)
+static struct kib_dev *kiblnd_dev_search(char *ifname)
 {
-	kib_dev_t *alias = NULL;
-	kib_dev_t *dev;
+	struct kib_dev *alias = NULL;
+	struct kib_dev *dev;
 	char *colon;
 	char *colon2;
 
@@ -2912,8 +2910,8 @@ static kib_dev_t *kiblnd_dev_search(char *ifname)
 static int kiblnd_startup(lnet_ni_t *ni)
 {
 	char *ifname;
-	kib_dev_t *ibdev = NULL;
-	kib_net_t *net;
+	struct kib_dev *ibdev = NULL;
+	struct kib_net *net;
 	struct timespec64 tv;
 	unsigned long flags;
 	int rc;
@@ -3020,11 +3018,11 @@ static void __exit ko2iblnd_exit(void)
 
 static int __init ko2iblnd_init(void)
 {
-	CLASSERT(sizeof(kib_msg_t) <= IBLND_MSG_SIZE);
-	CLASSERT(offsetof(kib_msg_t,
+	CLASSERT(sizeof(struct kib_msg) <= IBLND_MSG_SIZE);
+	CLASSERT(offsetof(struct kib_msg,
 			  ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
 			  <= IBLND_MSG_SIZE);
-	CLASSERT(offsetof(kib_msg_t,
+	CLASSERT(offsetof(struct kib_msg,
 			  ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
 			  <= IBLND_MSG_SIZE);
 
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index b22984fd9ad3..078a0c3e8845 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -78,12 +74,12 @@
 #define IBLND_N_SCHED			2
 #define IBLND_N_SCHED_HIGH		4
 
-typedef struct {
+struct kib_tunables {
 	int *kib_dev_failover;           /* HCA failover */
 	unsigned int *kib_service;       /* IB service number */
 	int *kib_min_reconnect_interval; /* first failed connection retry... */
 	int *kib_max_reconnect_interval; /* exponentially increasing to this */
-	int *kib_cksum;                  /* checksum kib_msg_t? */
+	int *kib_cksum;                  /* checksum struct kib_msg? */
 	int *kib_timeout;                /* comms timeout (seconds) */
 	int *kib_keepalive;              /* keepalive timeout (seconds) */
 	int *kib_ntx;                    /* # tx descs */
@@ -94,22 +90,22 @@ typedef struct {
 	int *kib_require_priv_port;      /* accept only privileged ports */
 	int *kib_use_priv_port; /* use privileged port for active connect */
 	int *kib_nscheds;                /* # threads on each CPT */
-} kib_tunables_t;
+};
 
-extern kib_tunables_t  kiblnd_tunables;
+extern struct kib_tunables  kiblnd_tunables;
 
 #define IBLND_MSG_QUEUE_SIZE_V1   8 /* V1 only : # messages/RDMAs in-flight */
 #define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */
 
 #define IBLND_CREDITS_DEFAULT     8 /* default # of peer credits */
-#define IBLND_CREDITS_MAX	  ((typeof(((kib_msg_t *) 0)->ibm_credits)) - 1)  /* Max # of peer credits */
+#define IBLND_CREDITS_MAX	  ((typeof(((struct kib_msg *)0)->ibm_credits)) - 1)  /* Max # of peer credits */
 
 /* when eagerly to return credits */
 #define IBLND_CREDITS_HIGHWATER(t, v)	((v) == IBLND_MSG_VERSION_1 ? \
 					IBLND_CREDIT_HIGHWATER_V1 : \
 					t->lnd_peercredits_hiw)
 
-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \
+#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
 							       cb, dev, \
 							       ps, qpt)
 
@@ -150,7 +146,7 @@ struct kib_hca_dev;
 #define KIB_IFNAME_SIZE	      256
 #endif
 
-typedef struct {
+struct kib_dev {
 	struct list_head   ibd_list;            /* chain on kib_devs */
 	struct list_head   ibd_fail_list;       /* chain on kib_failed_devs */
 	__u32              ibd_ifip;            /* IPoIB interface IP */
@@ -165,9 +161,9 @@ typedef struct {
 	unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */
 	struct list_head   ibd_nets;
 	struct kib_hca_dev *ibd_hdev;
-} kib_dev_t;
+};
 
-typedef struct kib_hca_dev {
+struct kib_hca_dev {
 	struct rdma_cm_id  *ibh_cmid;           /* listener cmid */
 	struct ib_device   *ibh_ibdev;          /* IB device */
 	int                ibh_page_shift;      /* page shift of current HCA */
@@ -177,19 +173,19 @@ typedef struct kib_hca_dev {
 	__u64              ibh_mr_size;         /* size of MR */
 	struct ib_mr	   *ibh_mrs;		/* global MR */
 	struct ib_pd       *ibh_pd;             /* PD */
-	kib_dev_t          *ibh_dev;            /* owner */
+	struct kib_dev	   *ibh_dev;		/* owner */
 	atomic_t           ibh_ref;             /* refcount */
-} kib_hca_dev_t;
+};
 
 /** # of seconds to keep pool alive */
 #define IBLND_POOL_DEADLINE     300
 /** # of seconds to retry if allocation failed */
 #define IBLND_POOL_RETRY	1
 
-typedef struct {
+struct kib_pages {
 	int                ibp_npages;          /* # pages */
 	struct page        *ibp_pages[0];       /* page array */
-} kib_pages_t;
+};
 
 struct kib_pool;
 struct kib_poolset;
@@ -204,7 +200,7 @@ struct kib_net;
 
 #define IBLND_POOL_NAME_LEN     32
 
-typedef struct kib_poolset {
+struct kib_poolset {
 	spinlock_t            ps_lock;            /* serialize */
 	struct kib_net        *ps_net;            /* network it belongs to */
 	char                  ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
@@ -220,31 +216,31 @@ typedef struct kib_poolset {
 	kib_ps_pool_destroy_t ps_pool_destroy;    /* destroy a pool */
 	kib_ps_node_init_t    ps_node_init; /* initialize new allocated node */
 	kib_ps_node_fini_t    ps_node_fini;       /* finalize node */
-} kib_poolset_t;
+};
 
-typedef struct kib_pool {
+struct kib_pool {
 	struct list_head      po_list;       /* chain on pool list */
 	struct list_head      po_free_list;  /* pre-allocated node */
-	kib_poolset_t         *po_owner;     /* pool_set of this pool */
+	struct kib_poolset	*po_owner;	/* pool_set of this pool */
 	unsigned long         po_deadline;   /* deadline of this pool */
 	int                   po_allocated;  /* # of elements in use */
 	int                   po_failed;     /* pool is created on failed HCA */
 	int                   po_size;       /* # of pre-allocated elements */
-} kib_pool_t;
+};
 
-typedef struct {
-	kib_poolset_t         tps_poolset;        /* pool-set */
+struct kib_tx_poolset {
+	struct kib_poolset	tps_poolset;		/* pool-set */
 	__u64                 tps_next_tx_cookie; /* cookie of TX */
-} kib_tx_poolset_t;
+};
 
-typedef struct {
-	kib_pool_t            tpo_pool;           /* pool */
-	struct kib_hca_dev    *tpo_hdev;          /* device for this pool */
-	struct kib_tx         *tpo_tx_descs;      /* all the tx descriptors */
-	kib_pages_t           *tpo_tx_pages;      /* premapped tx msg pages */
-} kib_tx_pool_t;
+struct kib_tx_pool {
+	struct kib_pool		 tpo_pool;	/* pool */
+	struct kib_hca_dev	*tpo_hdev;	/* device for this pool */
+	struct kib_tx		*tpo_tx_descs;	/* all the tx descriptors */
+	struct kib_pages	*tpo_tx_pages;	/* premapped tx msg pages */
+};
 
-typedef struct {
+struct kib_fmr_poolset {
 	spinlock_t            fps_lock;            /* serialize */
 	struct kib_net        *fps_net;            /* IB network */
 	struct list_head      fps_pool_list;       /* FMR pool list */
@@ -257,7 +253,7 @@ typedef struct {
 	int                   fps_increasing;      /* is allocating new pool */
 	unsigned long         fps_next_retry;      /* time stamp for retry if*/
 						   /* failed to allocate */
-} kib_fmr_poolset_t;
+};
 
 struct kib_fast_reg_descriptor { /* For fast registration */
 	struct list_head		 frd_list;
@@ -267,10 +263,10 @@ struct kib_fast_reg_descriptor { /* For fast registration */
 	bool				 frd_valid;
 };
 
-typedef struct {
-	struct list_head      fpo_list;            /* chain on pool list */
-	struct kib_hca_dev    *fpo_hdev;           /* device for this pool */
-	kib_fmr_poolset_t     *fpo_owner;          /* owner of this pool */
+struct kib_fmr_pool {
+	struct list_head	 fpo_list;	/* chain on pool list */
+	struct kib_hca_dev	*fpo_hdev;	/* device for this pool */
+	struct kib_fmr_poolset	*fpo_owner;	/* owner of this pool */
 	union {
 		struct {
 			struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
@@ -284,17 +280,17 @@ typedef struct {
 	int                   fpo_failed;          /* fmr pool is failed */
 	int                   fpo_map_count;       /* # of mapped FMR */
 	int		      fpo_is_fmr;
-} kib_fmr_pool_t;
+};
 
-typedef struct {
-	kib_fmr_pool_t			*fmr_pool;	/* pool of FMR */
+struct kib_fmr {
+	struct kib_fmr_pool		*fmr_pool;	/* pool of FMR */
 	struct ib_pool_fmr		*fmr_pfmr;	/* IB pool fmr */
 	struct kib_fast_reg_descriptor	*fmr_frd;
 	u32				 fmr_key;
-} kib_fmr_t;
+};
 
-typedef struct kib_net {
-	struct list_head      ibn_list;       /* chain on kib_dev_t::ibd_nets */
+struct kib_net {
+	struct list_head      ibn_list;       /* chain on struct kib_dev::ibd_nets */
 	__u64                 ibn_incarnation;/* my epoch */
 	int                   ibn_init;       /* initialisation state */
 	int                   ibn_shutdown;   /* shutting down? */
@@ -302,11 +298,11 @@ typedef struct kib_net {
 	atomic_t              ibn_npeers;     /* # peers extant */
 	atomic_t              ibn_nconns;     /* # connections extant */
 
-	kib_tx_poolset_t      **ibn_tx_ps;    /* tx pool-set */
-	kib_fmr_poolset_t     **ibn_fmr_ps;   /* fmr pool-set */
+	struct kib_tx_poolset	**ibn_tx_ps;	/* tx pool-set */
+	struct kib_fmr_poolset	**ibn_fmr_ps;	/* fmr pool-set */
 
-	kib_dev_t             *ibn_dev;       /* underlying IB device */
-} kib_net_t;
+	struct kib_dev		*ibn_dev;	/* underlying IB device */
+};
 
 #define KIB_THREAD_SHIFT		16
 #define KIB_THREAD_ID(cpt, tid)		((cpt) << KIB_THREAD_SHIFT | (tid))
@@ -322,7 +318,7 @@ struct kib_sched_info {
 	int                ibs_cpt;      /* CPT id */
 };
 
-typedef struct {
+struct kib_data {
 	int               kib_init;           /* initialisation state */
 	int               kib_shutdown;       /* shut down? */
 	struct list_head  kib_devs;           /* IB devices extant */
@@ -349,7 +345,7 @@ typedef struct {
 	spinlock_t kib_connd_lock;          /* serialise */
 	struct ib_qp_attr kib_error_qpa;    /* QP->ERROR */
 	struct kib_sched_info **kib_scheds; /* percpt data for schedulers */
-} kib_data_t;
+};
 
 #define IBLND_INIT_NOTHING 0
 #define IBLND_INIT_DATA    1
@@ -360,51 +356,51 @@ typedef struct {
  * These are sent in sender's byte order (i.e. receiver flips).
  */
 
-typedef struct kib_connparams {
+struct kib_connparams {
 	__u16        ibcp_queue_depth;
 	__u16        ibcp_max_frags;
 	__u32        ibcp_max_msg_size;
-} WIRE_ATTR kib_connparams_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct kib_immediate_msg {
 	lnet_hdr_t   ibim_hdr;        /* portals header */
 	char         ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kib_immediate_msg_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct kib_rdma_frag {
 	__u32        rf_nob;          /* # bytes this frag */
 	__u64        rf_addr;         /* CAVEAT EMPTOR: misaligned!! */
-} WIRE_ATTR kib_rdma_frag_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct kib_rdma_desc {
 	__u32           rd_key;       /* local/remote key */
 	__u32           rd_nfrags;    /* # fragments */
-	kib_rdma_frag_t rd_frags[0];  /* buffer frags */
-} WIRE_ATTR kib_rdma_desc_t;
+	struct kib_rdma_frag	rd_frags[0];	/* buffer frags */
+} WIRE_ATTR;
 
-typedef struct {
+struct kib_putreq_msg {
 	lnet_hdr_t      ibprm_hdr;    /* portals header */
 	__u64           ibprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR kib_putreq_msg_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct kib_putack_msg {
 	__u64           ibpam_src_cookie; /* reflected completion cookie */
 	__u64           ibpam_dst_cookie; /* opaque completion cookie */
-	kib_rdma_desc_t ibpam_rd;         /* sender's sink buffer */
-} WIRE_ATTR kib_putack_msg_t;
+	struct kib_rdma_desc ibpam_rd;         /* sender's sink buffer */
+} WIRE_ATTR;
 
-typedef struct {
+struct kib_get_msg {
 	lnet_hdr_t      ibgm_hdr;     /* portals header */
 	__u64           ibgm_cookie;  /* opaque completion cookie */
-	kib_rdma_desc_t ibgm_rd;      /* rdma descriptor */
-} WIRE_ATTR kib_get_msg_t;
+	struct kib_rdma_desc ibgm_rd;      /* rdma descriptor */
+} WIRE_ATTR;
 
-typedef struct {
+struct kib_completion_msg {
 	__u64           ibcm_cookie;  /* opaque completion cookie */
 	__s32           ibcm_status;  /* < 0 failure: >= 0 length */
-} WIRE_ATTR kib_completion_msg_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct kib_msg {
 	/* First 2 fields fixed FOR ALL TIME */
 	__u32           ibm_magic;    /* I'm an ibnal message */
 	__u16           ibm_version;  /* this is my version number */
@@ -419,14 +415,14 @@ typedef struct {
 	__u64           ibm_dststamp; /* destination's incarnation */
 
 	union {
-		kib_connparams_t     connparams;
-		kib_immediate_msg_t  immediate;
-		kib_putreq_msg_t     putreq;
-		kib_putack_msg_t     putack;
-		kib_get_msg_t        get;
-		kib_completion_msg_t completion;
+		struct kib_connparams		connparams;
+		struct kib_immediate_msg	immediate;
+		struct kib_putreq_msg		putreq;
+		struct kib_putack_msg		putack;
+		struct kib_get_msg		get;
+		struct kib_completion_msg	completion;
 	} WIRE_ATTR ibm_u;
-} WIRE_ATTR kib_msg_t;
+} WIRE_ATTR;
 
 #define IBLND_MSG_MAGIC     LNET_PROTO_IB_MAGIC /* unique magic */
 
@@ -445,14 +441,14 @@ typedef struct {
 #define IBLND_MSG_GET_REQ   0xd6	/* getreq (sink->src) */
 #define IBLND_MSG_GET_DONE  0xd7	/* completion (src->sink: all OK) */
 
-typedef struct {
+struct kib_rej {
 	__u32            ibr_magic;       /* sender's magic */
 	__u16            ibr_version;     /* sender's version */
 	__u8             ibr_why;         /* reject reason */
 	__u8             ibr_padding;     /* padding */
 	__u64            ibr_incarnation; /* incarnation of peer */
-	kib_connparams_t ibr_cp;          /* connection parameters */
-} WIRE_ATTR kib_rej_t;
+	struct kib_connparams ibr_cp;          /* connection parameters */
+} WIRE_ATTR;
 
 /* connection rejection reasons */
 #define IBLND_REJECT_CONN_RACE      1 /* You lost connection race */
@@ -467,28 +463,26 @@ typedef struct {
 
 /***********************************************************************/
 
-typedef struct kib_rx                         /* receive message */
-{
+struct kib_rx {					/* receive message */
 	struct list_head       rx_list;       /* queue for attention */
 	struct kib_conn        *rx_conn;      /* owning conn */
 	int                    rx_nob; /* # bytes received (-1 while posted) */
 	enum ib_wc_status      rx_status;     /* completion status */
-	kib_msg_t              *rx_msg;       /* message buffer (host vaddr) */
+	struct kib_msg		*rx_msg;	/* message buffer (host vaddr) */
 	__u64                  rx_msgaddr;    /* message buffer (I/O addr) */
 	DECLARE_PCI_UNMAP_ADDR(rx_msgunmap);  /* for dma_unmap_single() */
 	struct ib_recv_wr      rx_wrq;        /* receive work item... */
 	struct ib_sge          rx_sge;        /* ...and its memory */
-} kib_rx_t;
+};
 
 #define IBLND_POSTRX_DONT_POST    0 /* don't post */
 #define IBLND_POSTRX_NO_CREDIT    1 /* post: no credits */
 #define IBLND_POSTRX_PEER_CREDIT  2 /* post: give peer back 1 credit */
 #define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give self back 1 reserved credit */
 
-typedef struct kib_tx                         /* transmit message */
-{
+struct kib_tx {					/* transmit message */
 	struct list_head      tx_list; /* queue on idle_txs ibc_tx_queue etc. */
-	kib_tx_pool_t         *tx_pool;       /* pool I'm from */
+	struct kib_tx_pool	*tx_pool;	/* pool I'm from */
 	struct kib_conn       *tx_conn;       /* owning conn */
 	short                 tx_sending;     /* # tx callbacks outstanding */
 	short                 tx_queued;      /* queued for sending */
@@ -497,28 +491,28 @@ typedef struct kib_tx                         /* transmit message */
 	unsigned long         tx_deadline;    /* completion deadline */
 	__u64                 tx_cookie;      /* completion cookie */
 	lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
-	kib_msg_t             *tx_msg;        /* message buffer (host vaddr) */
+	struct kib_msg	      *tx_msg;        /* message buffer (host vaddr) */
 	__u64                 tx_msgaddr;     /* message buffer (I/O addr) */
 	DECLARE_PCI_UNMAP_ADDR(tx_msgunmap);  /* for dma_unmap_single() */
 	int                   tx_nwrq;        /* # send work items */
 	struct ib_rdma_wr     *tx_wrq;        /* send work items... */
 	struct ib_sge         *tx_sge;        /* ...and their memory */
-	kib_rdma_desc_t       *tx_rd;         /* rdma descriptor */
+	struct kib_rdma_desc  *tx_rd;         /* rdma descriptor */
 	int                   tx_nfrags;      /* # entries in... */
 	struct scatterlist    *tx_frags;      /* dma_map_sg descriptor */
 	__u64                 *tx_pages;      /* rdma phys page addrs */
-	kib_fmr_t             fmr;	      /* FMR */
+	struct kib_fmr        fmr;	      /* FMR */
 	int                   tx_dmadir;      /* dma direction */
-} kib_tx_t;
+};
 
-typedef struct kib_connvars {
-	kib_msg_t cv_msg; /* connection-in-progress variables */
-} kib_connvars_t;
+struct kib_connvars {
+	struct kib_msg cv_msg; /* connection-in-progress variables */
+};
 
-typedef struct kib_conn {
+struct kib_conn {
 	struct kib_sched_info *ibc_sched;      /* scheduler information */
 	struct kib_peer       *ibc_peer;       /* owning peer */
-	kib_hca_dev_t         *ibc_hdev;       /* HCA bound on */
+	struct kib_hca_dev         *ibc_hdev;       /* HCA bound on */
 	struct list_head ibc_list;             /* stash on peer's conn list */
 	struct list_head      ibc_sched_list;  /* schedule for attention */
 	__u16                 ibc_version;     /* version of connection */
@@ -553,14 +547,14 @@ typedef struct kib_conn {
 					       /* reserve an ACK/DONE msg */
 	struct list_head ibc_active_txs; /* active tx awaiting completion */
 	spinlock_t            ibc_lock;        /* serialise */
-	kib_rx_t              *ibc_rxs;        /* the rx descs */
-	kib_pages_t           *ibc_rx_pages;   /* premapped rx msg pages */
+	struct kib_rx              *ibc_rxs;        /* the rx descs */
+	struct kib_pages           *ibc_rx_pages;   /* premapped rx msg pages */
 
 	struct rdma_cm_id     *ibc_cmid;       /* CM id */
 	struct ib_cq          *ibc_cq;         /* completion queue */
 
-	kib_connvars_t        *ibc_connvars; /* in-progress connection state */
-} kib_conn_t;
+	struct kib_connvars	*ibc_connvars;	/* in-progress connection state */
+};
 
 #define IBLND_CONN_INIT           0	 /* being initialised */
 #define IBLND_CONN_ACTIVE_CONNECT 1	 /* active sending req */
@@ -569,7 +563,7 @@ typedef struct kib_conn {
 #define IBLND_CONN_CLOSING        4	 /* being closed */
 #define IBLND_CONN_DISCONNECTED   5	 /* disconnected */
 
-typedef struct kib_peer {
+struct kib_peer {
 	struct list_head ibp_list;        /* stash on global peer list */
 	lnet_nid_t       ibp_nid;         /* who's on the other end(s) */
 	lnet_ni_t        *ibp_ni;         /* LNet interface */
@@ -596,11 +590,11 @@ typedef struct kib_peer {
 	__u16		 ibp_max_frags;
 	/* max_peer_credits */
 	__u16		 ibp_queue_depth;
-} kib_peer_t;
+};
 
-extern kib_data_t kiblnd_data;
+extern struct kib_data kiblnd_data;
 
-void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);
+void kiblnd_hdev_destroy(struct kib_hca_dev *hdev);
 
 int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
 
@@ -645,14 +639,14 @@ kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
 }
 
 static inline void
-kiblnd_hdev_addref_locked(kib_hca_dev_t *hdev)
+kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev)
 {
 	LASSERT(atomic_read(&hdev->ibh_ref) > 0);
 	atomic_inc(&hdev->ibh_ref);
 }
 
 static inline void
-kiblnd_hdev_decref(kib_hca_dev_t *hdev)
+kiblnd_hdev_decref(struct kib_hca_dev *hdev)
 {
 	LASSERT(atomic_read(&hdev->ibh_ref) > 0);
 	if (atomic_dec_and_test(&hdev->ibh_ref))
@@ -660,7 +654,7 @@ kiblnd_hdev_decref(kib_hca_dev_t *hdev)
 }
 
 static inline int
-kiblnd_dev_can_failover(kib_dev_t *dev)
+kiblnd_dev_can_failover(struct kib_dev *dev)
 {
 	if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
 		return 0;
@@ -716,7 +710,7 @@ do {							    \
 } while (0)
 
 static inline bool
-kiblnd_peer_connecting(kib_peer_t *peer)
+kiblnd_peer_connecting(struct kib_peer *peer)
 {
 	return peer->ibp_connecting ||
 	       peer->ibp_reconnecting ||
@@ -724,7 +718,7 @@ kiblnd_peer_connecting(kib_peer_t *peer)
 }
 
 static inline bool
-kiblnd_peer_idle(kib_peer_t *peer)
+kiblnd_peer_idle(struct kib_peer *peer)
 {
 	return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
 }
@@ -739,23 +733,23 @@ kiblnd_nid2peerlist(lnet_nid_t nid)
 }
 
 static inline int
-kiblnd_peer_active(kib_peer_t *peer)
+kiblnd_peer_active(struct kib_peer *peer)
 {
 	/* Am I in the peer hash table? */
 	return !list_empty(&peer->ibp_list);
 }
 
-static inline kib_conn_t *
-kiblnd_get_conn_locked(kib_peer_t *peer)
+static inline struct kib_conn *
+kiblnd_get_conn_locked(struct kib_peer *peer)
 {
 	LASSERT(!list_empty(&peer->ibp_conns));
 
 	/* just return the first connection */
-	return list_entry(peer->ibp_conns.next, kib_conn_t, ibc_list);
+	return list_entry(peer->ibp_conns.next, struct kib_conn, ibc_list);
 }
 
 static inline int
-kiblnd_send_keepalive(kib_conn_t *conn)
+kiblnd_send_keepalive(struct kib_conn *conn)
 {
 	return (*kiblnd_tunables.kib_keepalive > 0) &&
 		cfs_time_after(jiffies, conn->ibc_last_send +
@@ -764,7 +758,7 @@ kiblnd_send_keepalive(kib_conn_t *conn)
 }
 
 static inline int
-kiblnd_need_noop(kib_conn_t *conn)
+kiblnd_need_noop(struct kib_conn *conn)
 {
 	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
 	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
@@ -800,14 +794,14 @@ kiblnd_need_noop(kib_conn_t *conn)
 }
 
 static inline void
-kiblnd_abort_receives(kib_conn_t *conn)
+kiblnd_abort_receives(struct kib_conn *conn)
 {
 	ib_modify_qp(conn->ibc_cmid->qp,
 		     &kiblnd_data.kib_error_qpa, IB_QP_STATE);
 }
 
 static inline const char *
-kiblnd_queue2str(kib_conn_t *conn, struct list_head *q)
+kiblnd_queue2str(struct kib_conn *conn, struct list_head *q)
 {
 	if (q == &conn->ibc_tx_queue)
 		return "tx_queue";
@@ -858,21 +852,21 @@ kiblnd_wreqid2type(__u64 wreqid)
 }
 
 static inline void
-kiblnd_set_conn_state(kib_conn_t *conn, int state)
+kiblnd_set_conn_state(struct kib_conn *conn, int state)
 {
 	conn->ibc_state = state;
 	mb();
 }
 
 static inline void
-kiblnd_init_msg(kib_msg_t *msg, int type, int body_nob)
+kiblnd_init_msg(struct kib_msg *msg, int type, int body_nob)
 {
 	msg->ibm_type = type;
-	msg->ibm_nob  = offsetof(kib_msg_t, ibm_u) + body_nob;
+	msg->ibm_nob  = offsetof(struct kib_msg, ibm_u) + body_nob;
 }
 
 static inline int
-kiblnd_rd_size(kib_rdma_desc_t *rd)
+kiblnd_rd_size(struct kib_rdma_desc *rd)
 {
 	int   i;
 	int   size;
@@ -884,25 +878,25 @@ kiblnd_rd_size(kib_rdma_desc_t *rd)
 }
 
 static inline __u64
-kiblnd_rd_frag_addr(kib_rdma_desc_t *rd, int index)
+kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index)
 {
 	return rd->rd_frags[index].rf_addr;
 }
 
 static inline __u32
-kiblnd_rd_frag_size(kib_rdma_desc_t *rd, int index)
+kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index)
 {
 	return rd->rd_frags[index].rf_nob;
 }
 
 static inline __u32
-kiblnd_rd_frag_key(kib_rdma_desc_t *rd, int index)
+kiblnd_rd_frag_key(struct kib_rdma_desc *rd, int index)
 {
 	return rd->rd_key;
 }
 
 static inline int
-kiblnd_rd_consume_frag(kib_rdma_desc_t *rd, int index, __u32 nob)
+kiblnd_rd_consume_frag(struct kib_rdma_desc *rd, int index, __u32 nob)
 {
 	if (nob < rd->rd_frags[index].rf_nob) {
 		rd->rd_frags[index].rf_addr += nob;
@@ -915,14 +909,14 @@ kiblnd_rd_consume_frag(kib_rdma_desc_t *rd, int index, __u32 nob)
 }
 
 static inline int
-kiblnd_rd_msg_size(kib_rdma_desc_t *rd, int msgtype, int n)
+kiblnd_rd_msg_size(struct kib_rdma_desc *rd, int msgtype, int n)
 {
 	LASSERT(msgtype == IBLND_MSG_GET_REQ ||
 		msgtype == IBLND_MSG_PUT_ACK);
 
 	return msgtype == IBLND_MSG_GET_REQ ?
-	       offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]) :
-	       offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
+	       offsetof(struct kib_get_msg, ibgm_rd.rd_frags[n]) :
+	       offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[n]);
 }
 
 static inline __u64
@@ -981,17 +975,17 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
 #define KIBLND_CONN_PARAM(e)     ((e)->param.conn.private_data)
 #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
 
-struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
+struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, struct kib_rdma_desc *rd,
 				    int negotiated_nfrags);
-void kiblnd_map_rx_descs(kib_conn_t *conn);
-void kiblnd_unmap_rx_descs(kib_conn_t *conn);
-void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
-struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
+void kiblnd_map_rx_descs(struct kib_conn *conn);
+void kiblnd_unmap_rx_descs(struct kib_conn *conn);
+void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node);
+struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps);
 
-int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
-			 kib_rdma_desc_t *rd, __u32 nob, __u64 iov,
-			 kib_fmr_t *fmr);
-void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
+int  kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
+			 struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
+			 struct kib_fmr *fmr);
+void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status);
 
 int kiblnd_tunables_setup(struct lnet_ni *ni);
 void kiblnd_tunables_init(void);
@@ -1001,30 +995,31 @@ int  kiblnd_scheduler(void *arg);
 int  kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
 int  kiblnd_failover_thread(void *arg);
 
-int  kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages);
+int  kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages);
 
 int  kiblnd_cm_callback(struct rdma_cm_id *cmid,
 			struct rdma_cm_event *event);
 int  kiblnd_translate_mtu(int value);
 
-int  kiblnd_dev_failover(kib_dev_t *dev);
-int  kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
-void kiblnd_destroy_peer(kib_peer_t *peer);
-bool kiblnd_reconnect_peer(kib_peer_t *peer);
-void kiblnd_destroy_dev(kib_dev_t *dev);
-void kiblnd_unlink_peer_locked(kib_peer_t *peer);
-kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid);
-int  kiblnd_close_stale_conns_locked(kib_peer_t *peer,
+int  kiblnd_dev_failover(struct kib_dev *dev);
+int  kiblnd_create_peer(lnet_ni_t *ni, struct kib_peer **peerp, lnet_nid_t nid);
+void kiblnd_destroy_peer(struct kib_peer *peer);
+bool kiblnd_reconnect_peer(struct kib_peer *peer);
+void kiblnd_destroy_dev(struct kib_dev *dev);
+void kiblnd_unlink_peer_locked(struct kib_peer *peer);
+struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid);
+int  kiblnd_close_stale_conns_locked(struct kib_peer *peer,
 				     int version, __u64 incarnation);
-int  kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why);
+int  kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why);
 
-kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
-			       int state, int version);
-void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn);
-void kiblnd_close_conn(kib_conn_t *conn, int error);
-void kiblnd_close_conn_locked(kib_conn_t *conn, int error);
+struct kib_conn *kiblnd_create_conn(struct kib_peer *peer,
+				    struct rdma_cm_id *cmid,
+				    int state, int version);
+void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn);
+void kiblnd_close_conn(struct kib_conn *conn, int error);
+void kiblnd_close_conn_locked(struct kib_conn *conn, int error);
 
-void kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid);
+void kiblnd_launch_tx(lnet_ni_t *ni, struct kib_tx *tx, lnet_nid_t nid);
 void kiblnd_txlist_done(lnet_ni_t *ni, struct list_head *txlist,
 			int status);
 
@@ -1032,10 +1027,10 @@ void kiblnd_qp_event(struct ib_event *event, void *arg);
 void kiblnd_cq_event(struct ib_event *event, void *arg);
 void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
 
-void kiblnd_pack_msg(lnet_ni_t *ni, kib_msg_t *msg, int version,
+void kiblnd_pack_msg(lnet_ni_t *ni, struct kib_msg *msg, int version,
 		     int credits, lnet_nid_t dstnid, __u64 dststamp);
-int  kiblnd_unpack_msg(kib_msg_t *msg, int nob);
-int  kiblnd_post_rx(kib_rx_t *rx, int credit);
+int  kiblnd_unpack_msg(struct kib_msg *msg, int nob);
+int  kiblnd_post_rx(struct kib_rx *rx, int credit);
 
 int  kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
 int  kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 845e49a52430..596a697b9d39 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -40,22 +36,22 @@
 
 #include "o2iblnd.h"
 
-static void kiblnd_peer_alive(kib_peer_t *peer);
-static void kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error);
-static void kiblnd_check_sends(kib_conn_t *conn);
-static void kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx,
+static void kiblnd_peer_alive(struct kib_peer *peer);
+static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
+static void kiblnd_check_sends(struct kib_conn *conn);
+static void kiblnd_init_tx_msg(lnet_ni_t *ni, struct kib_tx *tx,
 				int type, int body_nob);
-static int kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
-			     int resid, kib_rdma_desc_t *dstrd, __u64 dstcookie);
-static void kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn);
-static void kiblnd_queue_tx(kib_tx_t *tx, kib_conn_t *conn);
-static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx);
+static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
+			     int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie);
+static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
+static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
+static void kiblnd_unmap_tx(lnet_ni_t *ni, struct kib_tx *tx);
 
 static void
-kiblnd_tx_done(lnet_ni_t *ni, kib_tx_t *tx)
+kiblnd_tx_done(lnet_ni_t *ni, struct kib_tx *tx)
 {
 	lnet_msg_t *lntmsg[2];
-	kib_net_t *net = ni->ni_data;
+	struct kib_net *net = ni->ni_data;
 	int rc;
 	int i;
 
@@ -97,10 +93,10 @@ kiblnd_tx_done(lnet_ni_t *ni, kib_tx_t *tx)
 void
 kiblnd_txlist_done(lnet_ni_t *ni, struct list_head *txlist, int status)
 {
-	kib_tx_t *tx;
+	struct kib_tx *tx;
 
 	while (!list_empty(txlist)) {
-		tx = list_entry(txlist->next, kib_tx_t, tx_list);
+		tx = list_entry(txlist->next, struct kib_tx, tx_list);
 
 		list_del(&tx->tx_list);
 		/* complete now */
@@ -110,19 +106,19 @@ kiblnd_txlist_done(lnet_ni_t *ni, struct list_head *txlist, int status)
 	}
 }
 
-static kib_tx_t *
+static struct kib_tx *
 kiblnd_get_idle_tx(lnet_ni_t *ni, lnet_nid_t target)
 {
-	kib_net_t *net = (kib_net_t *)ni->ni_data;
+	struct kib_net *net = (struct kib_net *)ni->ni_data;
 	struct list_head *node;
-	kib_tx_t *tx;
-	kib_tx_poolset_t *tps;
+	struct kib_tx *tx;
+	struct kib_tx_poolset *tps;
 
 	tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
 	node = kiblnd_pool_alloc_node(&tps->tps_poolset);
 	if (!node)
 		return NULL;
-	tx = list_entry(node, kib_tx_t, tx_list);
+	tx = list_entry(node, struct kib_tx, tx_list);
 
 	LASSERT(!tx->tx_nwrq);
 	LASSERT(!tx->tx_queued);
@@ -138,9 +134,9 @@ kiblnd_get_idle_tx(lnet_ni_t *ni, lnet_nid_t target)
 }
 
 static void
-kiblnd_drop_rx(kib_rx_t *rx)
+kiblnd_drop_rx(struct kib_rx *rx)
 {
-	kib_conn_t *conn = rx->rx_conn;
+	struct kib_conn *conn = rx->rx_conn;
 	struct kib_sched_info *sched = conn->ibc_sched;
 	unsigned long flags;
 
@@ -153,10 +149,10 @@ kiblnd_drop_rx(kib_rx_t *rx)
 }
 
 int
-kiblnd_post_rx(kib_rx_t *rx, int credit)
+kiblnd_post_rx(struct kib_rx *rx, int credit)
 {
-	kib_conn_t *conn = rx->rx_conn;
-	kib_net_t *net = conn->ibc_peer->ibp_ni->ni_data;
+	struct kib_conn *conn = rx->rx_conn;
+	struct kib_net *net = conn->ibc_peer->ibp_ni->ni_data;
 	struct ib_recv_wr *bad_wrq = NULL;
 	struct ib_mr *mr = conn->ibc_hdev->ibh_mrs;
 	int rc;
@@ -223,13 +219,13 @@ out:
 	return rc;
 }
 
-static kib_tx_t *
-kiblnd_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie)
+static struct kib_tx *
+kiblnd_find_waiting_tx_locked(struct kib_conn *conn, int txtype, __u64 cookie)
 {
 	struct list_head *tmp;
 
 	list_for_each(tmp, &conn->ibc_active_txs) {
-		kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
+		struct kib_tx *tx = list_entry(tmp, struct kib_tx, tx_list);
 
 		LASSERT(!tx->tx_queued);
 		LASSERT(tx->tx_sending || tx->tx_waiting);
@@ -249,9 +245,9 @@ kiblnd_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie)
 }
 
 static void
-kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
+kiblnd_handle_completion(struct kib_conn *conn, int txtype, int status, __u64 cookie)
 {
-	kib_tx_t *tx;
+	struct kib_tx *tx;
 	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
 	int idle;
 
@@ -287,10 +283,10 @@ kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
 }
 
 static void
-kiblnd_send_completion(kib_conn_t *conn, int type, int status, __u64 cookie)
+kiblnd_send_completion(struct kib_conn *conn, int type, int status, __u64 cookie)
 {
 	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
-	kib_tx_t *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
+	struct kib_tx *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
 
 	if (!tx) {
 		CERROR("Can't get tx for completion %x for %s\n",
@@ -300,19 +296,19 @@ kiblnd_send_completion(kib_conn_t *conn, int type, int status, __u64 cookie)
 
 	tx->tx_msg->ibm_u.completion.ibcm_status = status;
 	tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
-	kiblnd_init_tx_msg(ni, tx, type, sizeof(kib_completion_msg_t));
+	kiblnd_init_tx_msg(ni, tx, type, sizeof(struct kib_completion_msg));
 
 	kiblnd_queue_tx(tx, conn);
 }
 
 static void
-kiblnd_handle_rx(kib_rx_t *rx)
+kiblnd_handle_rx(struct kib_rx *rx)
 {
-	kib_msg_t *msg = rx->rx_msg;
-	kib_conn_t *conn = rx->rx_conn;
+	struct kib_msg *msg = rx->rx_msg;
+	struct kib_conn *conn = rx->rx_conn;
 	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
 	int credits = msg->ibm_credits;
-	kib_tx_t *tx;
+	struct kib_tx *tx;
 	int rc = 0;
 	int rc2;
 	int post_credit;
@@ -467,12 +463,12 @@ kiblnd_handle_rx(kib_rx_t *rx)
 }
 
 static void
-kiblnd_rx_complete(kib_rx_t *rx, int status, int nob)
+kiblnd_rx_complete(struct kib_rx *rx, int status, int nob)
 {
-	kib_msg_t *msg = rx->rx_msg;
-	kib_conn_t *conn = rx->rx_conn;
+	struct kib_msg *msg = rx->rx_msg;
+	struct kib_conn *conn = rx->rx_conn;
 	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
-	kib_net_t *net = ni->ni_data;
+	struct kib_net *net = ni->ni_data;
 	int rc;
 	int err = -EIO;
 
@@ -561,10 +557,10 @@ kiblnd_kvaddr_to_page(unsigned long vaddr)
 }
 
 static int
-kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob)
+kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, struct kib_rdma_desc *rd, __u32 nob)
 {
-	kib_hca_dev_t *hdev;
-	kib_fmr_poolset_t *fps;
+	struct kib_hca_dev *hdev;
+	struct kib_fmr_poolset *fps;
 	int cpt;
 	int rc;
 
@@ -593,9 +589,9 @@ kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob)
 	return 0;
 }
 
-static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx)
+static void kiblnd_unmap_tx(lnet_ni_t *ni, struct kib_tx *tx)
 {
-	kib_net_t *net = ni->ni_data;
+	struct kib_net *net = ni->ni_data;
 
 	LASSERT(net);
 
@@ -609,11 +605,11 @@ static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx)
 	}
 }
 
-static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
+static int kiblnd_map_tx(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
 			 int nfrags)
 {
-	kib_net_t *net = ni->ni_data;
-	kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev;
+	struct kib_net *net = ni->ni_data;
+	struct kib_hca_dev *hdev = net->ibn_dev->ibd_hdev;
 	struct ib_mr *mr    = NULL;
 	__u32 nob;
 	int i;
@@ -651,10 +647,10 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 }
 
 static int
-kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
+kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
 		    unsigned int niov, struct kvec *iov, int offset, int nob)
 {
-	kib_net_t *net = ni->ni_data;
+	struct kib_net *net = ni->ni_data;
 	struct page *page;
 	struct scatterlist *sg;
 	unsigned long vaddr;
@@ -689,6 +685,10 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 
 		sg_set_page(sg, page, fragnob, page_offset);
 		sg = sg_next(sg);
+		if (!sg) {
+			CERROR("lacking enough sg entries to map tx\n");
+			return -EFAULT;
+		}
 
 		if (offset + fragnob < iov->iov_len) {
 			offset += fragnob;
@@ -704,10 +704,10 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 }
 
 static int
-kiblnd_setup_rd_kiov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
+kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
 		     int nkiov, lnet_kiov_t *kiov, int offset, int nob)
 {
-	kib_net_t *net = ni->ni_data;
+	struct kib_net *net = ni->ni_data;
 	struct scatterlist *sg;
 	int fragnob;
 
@@ -733,6 +733,10 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 		sg_set_page(sg, kiov->kiov_page, fragnob,
 			    kiov->kiov_offset + offset);
 		sg = sg_next(sg);
+		if (!sg) {
+			CERROR("lacking enough sg entries to map tx\n");
+			return -EFAULT;
+		}
 
 		offset = 0;
 		kiov++;
@@ -744,11 +748,11 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 }
 
 static int
-kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
+kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
 	__must_hold(&conn->ibc_lock)
 {
-	kib_msg_t *msg = tx->tx_msg;
-	kib_peer_t *peer = conn->ibc_peer;
+	struct kib_msg *msg = tx->tx_msg;
+	struct kib_peer *peer = conn->ibc_peer;
 	struct lnet_ni *ni = peer->ibp_ni;
 	int ver = conn->ibc_version;
 	int rc;
@@ -901,11 +905,11 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
 }
 
 static void
-kiblnd_check_sends(kib_conn_t *conn)
+kiblnd_check_sends(struct kib_conn *conn)
 {
 	int ver = conn->ibc_version;
 	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
-	kib_tx_t *tx;
+	struct kib_tx *tx;
 
 	/* Don't send anything until after the connection is established */
 	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
@@ -924,7 +928,7 @@ kiblnd_check_sends(kib_conn_t *conn)
 	while (conn->ibc_reserved_credits > 0 &&
 	       !list_empty(&conn->ibc_tx_queue_rsrvd)) {
 		tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
-				kib_tx_t, tx_list);
+				struct kib_tx, tx_list);
 		list_del(&tx->tx_list);
 		list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
 		conn->ibc_reserved_credits--;
@@ -948,16 +952,16 @@ kiblnd_check_sends(kib_conn_t *conn)
 		if (!list_empty(&conn->ibc_tx_queue_nocred)) {
 			credit = 0;
 			tx = list_entry(conn->ibc_tx_queue_nocred.next,
-					kib_tx_t, tx_list);
+					struct kib_tx, tx_list);
 		} else if (!list_empty(&conn->ibc_tx_noops)) {
 			LASSERT(!IBLND_OOB_CAPABLE(ver));
 			credit = 1;
 			tx = list_entry(conn->ibc_tx_noops.next,
-					kib_tx_t, tx_list);
+					struct kib_tx, tx_list);
 		} else if (!list_empty(&conn->ibc_tx_queue)) {
 			credit = 1;
 			tx = list_entry(conn->ibc_tx_queue.next,
-					kib_tx_t, tx_list);
+					struct kib_tx, tx_list);
 		} else {
 			break;
 		}
@@ -970,10 +974,10 @@ kiblnd_check_sends(kib_conn_t *conn)
 }
 
 static void
-kiblnd_tx_complete(kib_tx_t *tx, int status)
+kiblnd_tx_complete(struct kib_tx *tx, int status)
 {
 	int failed = (status != IB_WC_SUCCESS);
-	kib_conn_t *conn = tx->tx_conn;
+	struct kib_conn *conn = tx->tx_conn;
 	int idle;
 
 	LASSERT(tx->tx_sending > 0);
@@ -1025,12 +1029,12 @@ kiblnd_tx_complete(kib_tx_t *tx, int status)
 }
 
 static void
-kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
+kiblnd_init_tx_msg(lnet_ni_t *ni, struct kib_tx *tx, int type, int body_nob)
 {
-	kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
+	struct kib_hca_dev *hdev = tx->tx_pool->tpo_hdev;
 	struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
 	struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
-	int nob = offsetof(kib_msg_t, ibm_u) + body_nob;
+	int nob = offsetof(struct kib_msg, ibm_u) + body_nob;
 	struct ib_mr *mr = hdev->ibh_mrs;
 
 	LASSERT(tx->tx_nwrq >= 0);
@@ -1057,11 +1061,11 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
 }
 
 static int
-kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
-		 int resid, kib_rdma_desc_t *dstrd, __u64 dstcookie)
+kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
+		 int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie)
 {
-	kib_msg_t *ibmsg = tx->tx_msg;
-	kib_rdma_desc_t *srcrd = tx->tx_rd;
+	struct kib_msg *ibmsg = tx->tx_msg;
+	struct kib_rdma_desc *srcrd = tx->tx_rd;
 	struct ib_sge *sge = &tx->tx_sge[0];
 	struct ib_rdma_wr *wrq, *next;
 	int rc  = resid;
@@ -1099,7 +1103,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
 
 		wrknob = min(min(kiblnd_rd_frag_size(srcrd, srcidx),
 				 kiblnd_rd_frag_size(dstrd, dstidx)),
-			     (__u32) resid);
+			     (__u32)resid);
 
 		sge = &tx->tx_sge[tx->tx_nwrq];
 		sge->addr   = kiblnd_rd_frag_addr(srcrd, srcidx);
@@ -1135,13 +1139,13 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
 	ibmsg->ibm_u.completion.ibcm_status = rc;
 	ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
 	kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx,
-			   type, sizeof(kib_completion_msg_t));
+			   type, sizeof(struct kib_completion_msg));
 
 	return rc;
 }
 
 static void
-kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn)
+kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn)
 {
 	struct list_head *q;
 
@@ -1196,7 +1200,7 @@ kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn)
 }
 
 static void
-kiblnd_queue_tx(kib_tx_t *tx, kib_conn_t *conn)
+kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
 {
 	spin_lock(&conn->ibc_lock);
 	kiblnd_queue_tx_locked(tx, conn);
@@ -1243,11 +1247,11 @@ static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
 }
 
 static void
-kiblnd_connect_peer(kib_peer_t *peer)
+kiblnd_connect_peer(struct kib_peer *peer)
 {
 	struct rdma_cm_id *cmid;
-	kib_dev_t *dev;
-	kib_net_t *net = peer->ibp_ni->ni_data;
+	struct kib_dev *dev;
+	struct kib_net *net = peer->ibp_ni->ni_data;
 	struct sockaddr_in srcaddr;
 	struct sockaddr_in dstaddr;
 	int rc;
@@ -1311,7 +1315,7 @@ kiblnd_connect_peer(kib_peer_t *peer)
 }
 
 bool
-kiblnd_reconnect_peer(kib_peer_t *peer)
+kiblnd_reconnect_peer(struct kib_peer *peer)
 {
 	rwlock_t *glock = &kiblnd_data.kib_global_lock;
 	char *reason = NULL;
@@ -1361,11 +1365,11 @@ no_reconnect:
 }
 
 void
-kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
+kiblnd_launch_tx(lnet_ni_t *ni, struct kib_tx *tx, lnet_nid_t nid)
 {
-	kib_peer_t *peer;
-	kib_peer_t *peer2;
-	kib_conn_t *conn;
+	struct kib_peer *peer;
+	struct kib_peer *peer2;
+	struct kib_conn *conn;
 	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
 	unsigned long flags;
 	int rc;
@@ -1468,7 +1472,7 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
 	peer->ibp_connecting = 1;
 
 	/* always called with a ref on ni, which prevents ni being shutdown */
-	LASSERT(!((kib_net_t *)ni->ni_data)->ibn_shutdown);
+	LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown);
 
 	if (tx)
 		list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
@@ -1495,9 +1499,9 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 	lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
 	unsigned int payload_offset = lntmsg->msg_offset;
 	unsigned int payload_nob = lntmsg->msg_len;
-	kib_msg_t *ibmsg;
-	kib_rdma_desc_t  *rd;
-	kib_tx_t *tx;
+	struct kib_msg *ibmsg;
+	struct kib_rdma_desc  *rd;
+	struct kib_tx *tx;
 	int nob;
 	int rc;
 
@@ -1528,7 +1532,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 			break;		  /* send IMMEDIATE */
 
 		/* is the REPLY message too small for RDMA? */
-		nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
+		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
 		if (nob <= IBLND_MSG_SIZE)
 			break;		  /* send IMMEDIATE */
 
@@ -1558,7 +1562,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 			return -EIO;
 		}
 
-		nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[rd->rd_nfrags]);
+		nob = offsetof(struct kib_get_msg, ibgm_rd.rd_frags[rd->rd_nfrags]);
 		ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
 		ibmsg->ibm_u.get.ibgm_hdr = *hdr;
 
@@ -1580,7 +1584,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 	case LNET_MSG_REPLY:
 	case LNET_MSG_PUT:
 		/* Is the payload small enough not to need RDMA? */
-		nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
+		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]);
 		if (nob <= IBLND_MSG_SIZE)
 			break;		  /* send IMMEDIATE */
 
@@ -1610,7 +1614,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 		ibmsg = tx->tx_msg;
 		ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
 		ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
-		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(kib_putreq_msg_t));
+		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(struct kib_putreq_msg));
 
 		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg on completion */
 		tx->tx_waiting = 1;	     /* waiting for PUT_{ACK,NAK} */
@@ -1620,7 +1624,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 
 	/* send IMMEDIATE */
 
-	LASSERT(offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob])
+	LASSERT(offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob])
 		 <= IBLND_MSG_SIZE);
 
 	tx = kiblnd_get_idle_tx(ni, target.nid);
@@ -1635,16 +1639,16 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 
 	if (payload_kiov)
 		lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
-				    offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
+				    offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
 				    payload_niov, payload_kiov,
 				    payload_offset, payload_nob);
 	else
 		lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg,
-				   offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
+				   offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
 				   payload_niov, payload_iov,
 				   payload_offset, payload_nob);
 
-	nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]);
+	nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
 	kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
 
 	tx->tx_lntmsg[0] = lntmsg;	      /* finalise lntmsg on completion */
@@ -1653,7 +1657,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 }
 
 static void
-kiblnd_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg)
+kiblnd_reply(lnet_ni_t *ni, struct kib_rx *rx, lnet_msg_t *lntmsg)
 {
 	lnet_process_id_t target = lntmsg->msg_target;
 	unsigned int niov = lntmsg->msg_niov;
@@ -1661,7 +1665,7 @@ kiblnd_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg)
 	lnet_kiov_t *kiov = lntmsg->msg_kiov;
 	unsigned int offset = lntmsg->msg_offset;
 	unsigned int nob = lntmsg->msg_len;
-	kib_tx_t *tx;
+	struct kib_tx *tx;
 	int rc;
 
 	tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid);
@@ -1718,10 +1722,10 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
 	    unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
 	    unsigned int offset, unsigned int mlen, unsigned int rlen)
 {
-	kib_rx_t *rx = private;
-	kib_msg_t *rxmsg = rx->rx_msg;
-	kib_conn_t *conn = rx->rx_conn;
-	kib_tx_t *tx;
+	struct kib_rx *rx = private;
+	struct kib_msg *rxmsg = rx->rx_msg;
+	struct kib_conn *conn = rx->rx_conn;
+	struct kib_tx *tx;
 	int nob;
 	int post_credit = IBLND_POSTRX_PEER_CREDIT;
 	int rc = 0;
@@ -1736,7 +1740,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
 		LBUG();
 
 	case IBLND_MSG_IMMEDIATE:
-		nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
+		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[rlen]);
 		if (nob > rx->rx_nob) {
 			CERROR("Immediate message from %s too big: %d(%d)\n",
 			       libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
@@ -1748,19 +1752,19 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
 		if (kiov)
 			lnet_copy_flat2kiov(niov, kiov, offset,
 					    IBLND_MSG_SIZE, rxmsg,
-					    offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
+					    offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
 					    mlen);
 		else
 			lnet_copy_flat2iov(niov, iov, offset,
 					   IBLND_MSG_SIZE, rxmsg,
-					   offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
+					   offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
 					   mlen);
 		lnet_finalize(ni, lntmsg, 0);
 		break;
 
 	case IBLND_MSG_PUT_REQ: {
-		kib_msg_t	*txmsg;
-		kib_rdma_desc_t *rd;
+		struct kib_msg	*txmsg;
+		struct kib_rdma_desc *rd;
 
 		if (!mlen) {
 			lnet_finalize(ni, lntmsg, 0);
@@ -1796,7 +1800,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
 			break;
 		}
 
-		nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[rd->rd_nfrags]);
+		nob = offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[rd->rd_nfrags]);
 		txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
 		txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
 
@@ -1847,7 +1851,7 @@ kiblnd_thread_fini(void)
 }
 
 static void
-kiblnd_peer_alive(kib_peer_t *peer)
+kiblnd_peer_alive(struct kib_peer *peer)
 {
 	/* This is racy, but everyone's only writing cfs_time_current() */
 	peer->ibp_last_alive = cfs_time_current();
@@ -1855,7 +1859,7 @@ kiblnd_peer_alive(kib_peer_t *peer)
 }
 
 static void
-kiblnd_peer_notify(kib_peer_t *peer)
+kiblnd_peer_notify(struct kib_peer *peer)
 {
 	int error = 0;
 	unsigned long last_alive = 0;
@@ -1878,7 +1882,7 @@ kiblnd_peer_notify(kib_peer_t *peer)
 }
 
 void
-kiblnd_close_conn_locked(kib_conn_t *conn, int error)
+kiblnd_close_conn_locked(struct kib_conn *conn, int error)
 {
 	/*
 	 * This just does the immediate housekeeping. 'error' is zero for a
@@ -1888,8 +1892,8 @@ kiblnd_close_conn_locked(kib_conn_t *conn, int error)
 	 * already dealing with it (either to set it up or tear it down).
 	 * Caller holds kib_global_lock exclusively in irq context
 	 */
-	kib_peer_t *peer = conn->ibc_peer;
-	kib_dev_t *dev;
+	struct kib_peer *peer = conn->ibc_peer;
+	struct kib_dev *dev;
 	unsigned long flags;
 
 	LASSERT(error || conn->ibc_state >= IBLND_CONN_ESTABLISHED);
@@ -1918,7 +1922,7 @@ kiblnd_close_conn_locked(kib_conn_t *conn, int error)
 		        list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
 	}
 
-	dev = ((kib_net_t *)peer->ibp_ni->ni_data)->ibn_dev;
+	dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev;
 	list_del(&conn->ibc_list);
 	/* connd (see below) takes over ibc_list's ref */
 
@@ -1948,7 +1952,7 @@ kiblnd_close_conn_locked(kib_conn_t *conn, int error)
 }
 
 void
-kiblnd_close_conn(kib_conn_t *conn, int error)
+kiblnd_close_conn(struct kib_conn *conn, int error)
 {
 	unsigned long flags;
 
@@ -1960,11 +1964,11 @@ kiblnd_close_conn(kib_conn_t *conn, int error)
 }
 
 static void
-kiblnd_handle_early_rxs(kib_conn_t *conn)
+kiblnd_handle_early_rxs(struct kib_conn *conn)
 {
 	unsigned long flags;
-	kib_rx_t *rx;
-	kib_rx_t *tmp;
+	struct kib_rx *rx;
+	struct kib_rx *tmp;
 
 	LASSERT(!in_interrupt());
 	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
@@ -1982,17 +1986,17 @@ kiblnd_handle_early_rxs(kib_conn_t *conn)
 }
 
 static void
-kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs)
+kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
 {
 	LIST_HEAD(zombies);
 	struct list_head *tmp;
 	struct list_head *nxt;
-	kib_tx_t *tx;
+	struct kib_tx *tx;
 
 	spin_lock(&conn->ibc_lock);
 
 	list_for_each_safe(tmp, nxt, txs) {
-		tx = list_entry(tmp, kib_tx_t, tx_list);
+		tx = list_entry(tmp, struct kib_tx, tx_list);
 
 		if (txs == &conn->ibc_active_txs) {
 			LASSERT(!tx->tx_queued);
@@ -2017,7 +2021,7 @@ kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs)
 }
 
 static void
-kiblnd_finalise_conn(kib_conn_t *conn)
+kiblnd_finalise_conn(struct kib_conn *conn)
 {
 	LASSERT(!in_interrupt());
 	LASSERT(conn->ibc_state > IBLND_CONN_INIT);
@@ -2045,7 +2049,7 @@ kiblnd_finalise_conn(kib_conn_t *conn)
 }
 
 static void
-kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error)
+kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error)
 {
 	LIST_HEAD(zombies);
 	unsigned long flags;
@@ -2099,11 +2103,11 @@ kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error)
 }
 
 static void
-kiblnd_connreq_done(kib_conn_t *conn, int status)
+kiblnd_connreq_done(struct kib_conn *conn, int status)
 {
-	kib_peer_t *peer = conn->ibc_peer;
-	kib_tx_t *tx;
-	kib_tx_t *tmp;
+	struct kib_peer *peer = conn->ibc_peer;
+	struct kib_tx *tx;
+	struct kib_tx *tmp;
 	struct list_head txs;
 	unsigned long flags;
 	int active;
@@ -2209,7 +2213,7 @@ kiblnd_connreq_done(kib_conn_t *conn, int status)
 }
 
 static void
-kiblnd_reject(struct rdma_cm_id *cmid, kib_rej_t *rej)
+kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej)
 {
 	int rc;
 
@@ -2223,17 +2227,17 @@ static int
 kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 {
 	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
-	kib_msg_t *reqmsg = priv;
-	kib_msg_t *ackmsg;
-	kib_dev_t *ibdev;
-	kib_peer_t *peer;
-	kib_peer_t *peer2;
-	kib_conn_t *conn;
+	struct kib_msg *reqmsg = priv;
+	struct kib_msg *ackmsg;
+	struct kib_dev *ibdev;
+	struct kib_peer *peer;
+	struct kib_peer *peer2;
+	struct kib_conn *conn;
 	lnet_ni_t *ni  = NULL;
-	kib_net_t *net = NULL;
+	struct kib_net *net = NULL;
 	lnet_nid_t nid;
 	struct rdma_conn_param cp;
-	kib_rej_t rej;
+	struct kib_rej rej;
 	int version = IBLND_MSG_VERSION;
 	unsigned long flags;
 	int rc;
@@ -2242,7 +2246,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	LASSERT(!in_interrupt());
 
 	/* cmid inherits 'context' from the corresponding listener id */
-	ibdev = (kib_dev_t *)cmid->context;
+	ibdev = (struct kib_dev *)cmid->context;
 	LASSERT(ibdev);
 
 	memset(&rej, 0, sizeof(rej));
@@ -2260,7 +2264,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		goto failed;
 	}
 
-	if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
+	if (priv_nob < offsetof(struct kib_msg, ibm_type)) {
 		CERROR("Short connection request\n");
 		goto failed;
 	}
@@ -2295,7 +2299,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
 
 	if (ni) {
-		net = (kib_net_t *)ni->ni_data;
+		net = (struct kib_net *)ni->ni_data;
 		rej.ibr_incarnation = net->ibn_incarnation;
 	}
 
@@ -2534,11 +2538,11 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 }
 
 static void
-kiblnd_check_reconnect(kib_conn_t *conn, int version,
-		       __u64 incarnation, int why, kib_connparams_t *cp)
+kiblnd_check_reconnect(struct kib_conn *conn, int version,
+		       __u64 incarnation, int why, struct kib_connparams *cp)
 {
 	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	kib_peer_t *peer = conn->ibc_peer;
+	struct kib_peer *peer = conn->ibc_peer;
 	char *reason;
 	int msg_size = IBLND_MSG_SIZE;
 	int frag_num = -1;
@@ -2647,9 +2651,9 @@ out:
 }
 
 static void
-kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob)
+kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
 {
-	kib_peer_t *peer = conn->ibc_peer;
+	struct kib_peer *peer = conn->ibc_peer;
 
 	LASSERT(!in_interrupt());
 	LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
@@ -2667,9 +2671,9 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob)
 		break;
 
 	case IB_CM_REJ_CONSUMER_DEFINED:
-		if (priv_nob >= offsetof(kib_rej_t, ibr_padding)) {
-			kib_rej_t *rej = priv;
-			kib_connparams_t *cp = NULL;
+		if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) {
+			struct kib_rej *rej = priv;
+			struct kib_connparams *cp = NULL;
 			int flip = 0;
 			__u64 incarnation = -1;
 
@@ -2692,7 +2696,7 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob)
 				flip = 1;
 			}
 
-			if (priv_nob >= sizeof(kib_rej_t) &&
+			if (priv_nob >= sizeof(struct kib_rej) &&
 			    rej->ibr_version > IBLND_MSG_VERSION_1) {
 				/*
 				 * priv_nob is always 148 in current version
@@ -2775,12 +2779,12 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob)
 }
 
 static void
-kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob)
+kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
 {
-	kib_peer_t *peer = conn->ibc_peer;
+	struct kib_peer *peer = conn->ibc_peer;
 	lnet_ni_t *ni = peer->ibp_ni;
-	kib_net_t *net = ni->ni_data;
-	kib_msg_t *msg = priv;
+	struct kib_net *net = ni->ni_data;
+	struct kib_msg *msg = priv;
 	int ver = conn->ibc_version;
 	int rc = kiblnd_unpack_msg(msg, priv_nob);
 	unsigned long flags;
@@ -2877,9 +2881,9 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob)
 static int
 kiblnd_active_connect(struct rdma_cm_id *cmid)
 {
-	kib_peer_t *peer = (kib_peer_t *)cmid->context;
-	kib_conn_t *conn;
-	kib_msg_t *msg;
+	struct kib_peer *peer = (struct kib_peer *)cmid->context;
+	struct kib_conn *conn;
+	struct kib_msg *msg;
 	struct rdma_conn_param cp;
 	int version;
 	__u64 incarnation;
@@ -2944,8 +2948,8 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
 int
 kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 {
-	kib_peer_t *peer;
-	kib_conn_t *conn;
+	struct kib_peer *peer;
+	struct kib_conn *conn;
 	int rc;
 
 	switch (event->event) {
@@ -2963,7 +2967,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		return rc;
 
 	case RDMA_CM_EVENT_ADDR_ERROR:
-		peer = (kib_peer_t *)cmid->context;
+		peer = (struct kib_peer *)cmid->context;
 		CNETERR("%s: ADDR ERROR %d\n",
 		        libcfs_nid2str(peer->ibp_nid), event->status);
 		kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
@@ -2971,7 +2975,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		return -EHOSTUNREACH;      /* rc destroys cmid */
 
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
-		peer = (kib_peer_t *)cmid->context;
+		peer = (struct kib_peer *)cmid->context;
 
 		CDEBUG(D_NET, "%s Addr resolved: %d\n",
 		       libcfs_nid2str(peer->ibp_nid), event->status);
@@ -2994,7 +2998,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		return rc;		      /* rc destroys cmid */
 
 	case RDMA_CM_EVENT_ROUTE_ERROR:
-		peer = (kib_peer_t *)cmid->context;
+		peer = (struct kib_peer *)cmid->context;
 		CNETERR("%s: ROUTE ERROR %d\n",
 			libcfs_nid2str(peer->ibp_nid), event->status);
 		kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
@@ -3002,7 +3006,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		return -EHOSTUNREACH;	   /* rc destroys cmid */
 
 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
-		peer = (kib_peer_t *)cmid->context;
+		peer = (struct kib_peer *)cmid->context;
 		CDEBUG(D_NET, "%s Route resolved: %d\n",
 		       libcfs_nid2str(peer->ibp_nid), event->status);
 
@@ -3016,7 +3020,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		return event->status;	   /* rc destroys cmid */
 
 	case RDMA_CM_EVENT_UNREACHABLE:
-		conn = (kib_conn_t *)cmid->context;
+		conn = (struct kib_conn *)cmid->context;
 		LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
 			conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
 		CNETERR("%s: UNREACHABLE %d\n",
@@ -3026,7 +3030,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		return 0;
 
 	case RDMA_CM_EVENT_CONNECT_ERROR:
-		conn = (kib_conn_t *)cmid->context;
+		conn = (struct kib_conn *)cmid->context;
 		LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
 			conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
 		CNETERR("%s: CONNECT ERROR %d\n",
@@ -3036,7 +3040,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		return 0;
 
 	case RDMA_CM_EVENT_REJECTED:
-		conn = (kib_conn_t *)cmid->context;
+		conn = (struct kib_conn *)cmid->context;
 		switch (conn->ibc_state) {
 		default:
 			LBUG();
@@ -3058,7 +3062,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		return 0;
 
 	case RDMA_CM_EVENT_ESTABLISHED:
-		conn = (kib_conn_t *)cmid->context;
+		conn = (struct kib_conn *)cmid->context;
 		switch (conn->ibc_state) {
 		default:
 			LBUG();
@@ -3084,7 +3088,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 		CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n");
 		return 0;
 	case RDMA_CM_EVENT_DISCONNECTED:
-		conn = (kib_conn_t *)cmid->context;
+		conn = (struct kib_conn *)cmid->context;
 		if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
 			CERROR("%s DISCONNECTED\n",
 			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
@@ -3113,13 +3117,13 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 }
 
 static int
-kiblnd_check_txs_locked(kib_conn_t *conn, struct list_head *txs)
+kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs)
 {
-	kib_tx_t *tx;
+	struct kib_tx *tx;
 	struct list_head *ttmp;
 
 	list_for_each(ttmp, txs) {
-		tx = list_entry(ttmp, kib_tx_t, tx_list);
+		tx = list_entry(ttmp, struct kib_tx, tx_list);
 
 		if (txs != &conn->ibc_active_txs) {
 			LASSERT(tx->tx_queued);
@@ -3140,7 +3144,7 @@ kiblnd_check_txs_locked(kib_conn_t *conn, struct list_head *txs)
 }
 
 static int
-kiblnd_conn_timed_out_locked(kib_conn_t *conn)
+kiblnd_conn_timed_out_locked(struct kib_conn *conn)
 {
 	return  kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) ||
 		kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) ||
@@ -3156,10 +3160,10 @@ kiblnd_check_conns(int idx)
 	LIST_HEAD(checksends);
 	struct list_head *peers = &kiblnd_data.kib_peers[idx];
 	struct list_head *ptmp;
-	kib_peer_t *peer;
-	kib_conn_t *conn;
-	kib_conn_t *temp;
-	kib_conn_t *tmp;
+	struct kib_peer *peer;
+	struct kib_conn *conn;
+	struct kib_conn *temp;
+	struct kib_conn *tmp;
 	struct list_head *ctmp;
 	unsigned long flags;
 
@@ -3171,13 +3175,13 @@ kiblnd_check_conns(int idx)
 	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
 	list_for_each(ptmp, peers) {
-		peer = list_entry(ptmp, kib_peer_t, ibp_list);
+		peer = list_entry(ptmp, struct kib_peer, ibp_list);
 
 		list_for_each(ctmp, &peer->ibp_conns) {
 			int timedout;
 			int sendnoop;
 
-			conn = list_entry(ctmp, kib_conn_t, ibc_list);
+			conn = list_entry(ctmp, struct kib_conn, ibc_list);
 
 			LASSERT(conn->ibc_state == IBLND_CONN_ESTABLISHED);
 
@@ -3235,7 +3239,7 @@ kiblnd_check_conns(int idx)
 }
 
 static void
-kiblnd_disconnect_conn(kib_conn_t *conn)
+kiblnd_disconnect_conn(struct kib_conn *conn)
 {
 	LASSERT(!in_interrupt());
 	LASSERT(current == kiblnd_data.kib_connd);
@@ -3264,7 +3268,7 @@ kiblnd_connd(void *arg)
 	spinlock_t *lock= &kiblnd_data.kib_connd_lock;
 	wait_queue_t wait;
 	unsigned long flags;
-	kib_conn_t *conn;
+	struct kib_conn *conn;
 	int timeout;
 	int i;
 	int dropped_lock;
@@ -3284,10 +3288,10 @@ kiblnd_connd(void *arg)
 		dropped_lock = 0;
 
 		if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
-			kib_peer_t *peer = NULL;
+			struct kib_peer *peer = NULL;
 
 			conn = list_entry(kiblnd_data.kib_connd_zombies.next,
-					  kib_conn_t, ibc_list);
+					  struct kib_conn, ibc_list);
 			list_del(&conn->ibc_list);
 			if (conn->ibc_reconnect) {
 				peer = conn->ibc_peer;
@@ -3314,7 +3318,7 @@ kiblnd_connd(void *arg)
 
 		if (!list_empty(&kiblnd_data.kib_connd_conns)) {
 			conn = list_entry(kiblnd_data.kib_connd_conns.next,
-					  kib_conn_t, ibc_list);
+					  struct kib_conn, ibc_list);
 			list_del(&conn->ibc_list);
 
 			spin_unlock_irqrestore(lock, flags);
@@ -3338,7 +3342,7 @@ kiblnd_connd(void *arg)
 				break;
 
 			conn = list_entry(kiblnd_data.kib_reconn_list.next,
-					  kib_conn_t, ibc_list);
+					  struct kib_conn, ibc_list);
 			list_del(&conn->ibc_list);
 
 			spin_unlock_irqrestore(lock, flags);
@@ -3409,7 +3413,7 @@ kiblnd_connd(void *arg)
 void
 kiblnd_qp_event(struct ib_event *event, void *arg)
 {
-	kib_conn_t *conn = arg;
+	struct kib_conn *conn = arg;
 
 	switch (event->event) {
 	case IB_EVENT_COMM_EST:
@@ -3471,7 +3475,7 @@ kiblnd_cq_completion(struct ib_cq *cq, void *arg)
 	 * occurred.  But in this case, !ibc_nrx && !ibc_nsends_posted
 	 * and this CQ is about to be destroyed so I NOOP.
 	 */
-	kib_conn_t *conn = arg;
+	struct kib_conn *conn = arg;
 	struct kib_sched_info *sched = conn->ibc_sched;
 	unsigned long flags;
 
@@ -3498,7 +3502,7 @@ kiblnd_cq_completion(struct ib_cq *cq, void *arg)
 void
 kiblnd_cq_event(struct ib_event *event, void *arg)
 {
-	kib_conn_t *conn = arg;
+	struct kib_conn *conn = arg;
 
 	CERROR("%s: async CQ event type %d\n",
 	       libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
@@ -3509,7 +3513,7 @@ kiblnd_scheduler(void *arg)
 {
 	long id = (long)arg;
 	struct kib_sched_info *sched;
-	kib_conn_t *conn;
+	struct kib_conn *conn;
 	wait_queue_t wait;
 	unsigned long flags;
 	struct ib_wc wc;
@@ -3544,7 +3548,7 @@ kiblnd_scheduler(void *arg)
 		did_something = 0;
 
 		if (!list_empty(&sched->ibs_conns)) {
-			conn = list_entry(sched->ibs_conns.next, kib_conn_t,
+			conn = list_entry(sched->ibs_conns.next, struct kib_conn,
 					  ibc_sched_list);
 			/* take over kib_sched_conns' ref on conn... */
 			LASSERT(conn->ibc_scheduled);
@@ -3644,7 +3648,7 @@ int
 kiblnd_failover_thread(void *arg)
 {
 	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	kib_dev_t *dev;
+	struct kib_dev *dev;
 	wait_queue_t wait;
 	unsigned long flags;
 	int rc;
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
index f8fdd4ae3dbf..44e960f60833 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -145,7 +141,7 @@ static int use_privileged_port = 1;
 module_param(use_privileged_port, int, 0644);
 MODULE_PARM_DESC(use_privileged_port, "use privileged port when initiating connection");
 
-kib_tunables_t kiblnd_tunables = {
+struct kib_tunables kiblnd_tunables = {
 	.kib_dev_failover      = &dev_failover,
 	.kib_service           = &service,
 	.kib_cksum             = &cksum,
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index 406c0e7a57b9..07ec540946cd 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -44,14 +40,14 @@
 #include "socklnd.h"
 
 static lnd_t the_ksocklnd;
-ksock_nal_data_t ksocknal_data;
+struct ksock_nal_data ksocknal_data;
 
-static ksock_interface_t *
+static struct ksock_interface *
 ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
 {
-	ksock_net_t *net = ni->ni_data;
+	struct ksock_net *net = ni->ni_data;
 	int i;
-	ksock_interface_t *iface;
+	struct ksock_interface *iface;
 
 	for (i = 0; i < net->ksnn_ninterfaces; i++) {
 		LASSERT(i < LNET_MAX_INTERFACES);
@@ -64,10 +60,10 @@ ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
 	return NULL;
 }
 
-static ksock_route_t *
+static struct ksock_route *
 ksocknal_create_route(__u32 ipaddr, int port)
 {
-	ksock_route_t *route;
+	struct ksock_route *route;
 
 	LIBCFS_ALLOC(route, sizeof(*route));
 	if (!route)
@@ -89,7 +85,7 @@ ksocknal_create_route(__u32 ipaddr, int port)
 }
 
 void
-ksocknal_destroy_route(ksock_route_t *route)
+ksocknal_destroy_route(struct ksock_route *route)
 {
 	LASSERT(!atomic_read(&route->ksnr_refcount));
 
@@ -100,11 +96,11 @@ ksocknal_destroy_route(ksock_route_t *route)
 }
 
 static int
-ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
+ksocknal_create_peer(struct ksock_peer **peerp, lnet_ni_t *ni, lnet_process_id_t id)
 {
 	int cpt = lnet_cpt_of_nid(id.nid);
-	ksock_net_t *net = ni->ni_data;
-	ksock_peer_t *peer;
+	struct ksock_net *net = ni->ni_data;
+	struct ksock_peer *peer;
 
 	LASSERT(id.nid != LNET_NID_ANY);
 	LASSERT(id.pid != LNET_PID_ANY);
@@ -148,9 +144,9 @@ ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
 }
 
 void
-ksocknal_destroy_peer(ksock_peer_t *peer)
+ksocknal_destroy_peer(struct ksock_peer *peer)
 {
-	ksock_net_t *net = peer->ksnp_ni->ni_data;
+	struct ksock_net *net = peer->ksnp_ni->ni_data;
 
 	CDEBUG(D_NET, "peer %s %p deleted\n",
 	       libcfs_id2str(peer->ksnp_id), peer);
@@ -175,15 +171,15 @@ ksocknal_destroy_peer(ksock_peer_t *peer)
 	spin_unlock_bh(&net->ksnn_lock);
 }
 
-ksock_peer_t *
+struct ksock_peer *
 ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id)
 {
 	struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
 	struct list_head *tmp;
-	ksock_peer_t *peer;
+	struct ksock_peer *peer;
 
 	list_for_each(tmp, peer_list) {
-		peer = list_entry(tmp, ksock_peer_t, ksnp_list);
+		peer = list_entry(tmp, struct ksock_peer, ksnp_list);
 
 		LASSERT(!peer->ksnp_closing);
 
@@ -202,10 +198,10 @@ ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id)
 	return NULL;
 }
 
-ksock_peer_t *
+struct ksock_peer *
 ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id)
 {
-	ksock_peer_t *peer;
+	struct ksock_peer *peer;
 
 	read_lock(&ksocknal_data.ksnd_global_lock);
 	peer = ksocknal_find_peer_locked(ni, id);
@@ -217,11 +213,11 @@ ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id)
 }
 
 static void
-ksocknal_unlink_peer_locked(ksock_peer_t *peer)
+ksocknal_unlink_peer_locked(struct ksock_peer *peer)
 {
 	int i;
 	__u32 ip;
-	ksock_interface_t *iface;
+	struct ksock_interface *iface;
 
 	for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
 		LASSERT(i < LNET_MAX_INTERFACES);
@@ -253,9 +249,9 @@ ksocknal_get_peer_info(lnet_ni_t *ni, int index,
 		       lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip,
 		       int *port, int *conn_count, int *share_count)
 {
-	ksock_peer_t *peer;
+	struct ksock_peer *peer;
 	struct list_head *ptmp;
-	ksock_route_t *route;
+	struct ksock_route *route;
 	struct list_head *rtmp;
 	int i;
 	int j;
@@ -265,7 +261,7 @@ ksocknal_get_peer_info(lnet_ni_t *ni, int index,
 
 	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
 		list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
 
 			if (peer->ksnp_ni != ni)
 				continue;
@@ -303,7 +299,7 @@ ksocknal_get_peer_info(lnet_ni_t *ni, int index,
 				if (index-- > 0)
 					continue;
 
-				route = list_entry(rtmp, ksock_route_t,
+				route = list_entry(rtmp, struct ksock_route,
 						   ksnr_list);
 
 				*id = peer->ksnp_id;
@@ -323,11 +319,11 @@ ksocknal_get_peer_info(lnet_ni_t *ni, int index,
 }
 
 static void
-ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
+ksocknal_associate_route_conn_locked(struct ksock_route *route, struct ksock_conn *conn)
 {
-	ksock_peer_t *peer = route->ksnr_peer;
+	struct ksock_peer *peer = route->ksnr_peer;
 	int type = conn->ksnc_type;
-	ksock_interface_t *iface;
+	struct ksock_interface *iface;
 
 	conn->ksnc_route = route;
 	ksocknal_route_addref(route);
@@ -369,11 +365,11 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
 }
 
 static void
-ksocknal_add_route_locked(ksock_peer_t *peer, ksock_route_t *route)
+ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route)
 {
 	struct list_head *tmp;
-	ksock_conn_t *conn;
-	ksock_route_t *route2;
+	struct ksock_conn *conn;
+	struct ksock_route *route2;
 
 	LASSERT(!peer->ksnp_closing);
 	LASSERT(!route->ksnr_peer);
@@ -383,7 +379,7 @@ ksocknal_add_route_locked(ksock_peer_t *peer, ksock_route_t *route)
 
 	/* LASSERT(unique) */
 	list_for_each(tmp, &peer->ksnp_routes) {
-		route2 = list_entry(tmp, ksock_route_t, ksnr_list);
+		route2 = list_entry(tmp, struct ksock_route, ksnr_list);
 
 		if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
 			CERROR("Duplicate route %s %pI4h\n",
@@ -399,7 +395,7 @@ ksocknal_add_route_locked(ksock_peer_t *peer, ksock_route_t *route)
 	list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
 
 	list_for_each(tmp, &peer->ksnp_conns) {
-		conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+		conn = list_entry(tmp, struct ksock_conn, ksnc_list);
 
 		if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
 			continue;
@@ -410,11 +406,11 @@ ksocknal_add_route_locked(ksock_peer_t *peer, ksock_route_t *route)
 }
 
 static void
-ksocknal_del_route_locked(ksock_route_t *route)
+ksocknal_del_route_locked(struct ksock_route *route)
 {
-	ksock_peer_t *peer = route->ksnr_peer;
-	ksock_interface_t *iface;
-	ksock_conn_t *conn;
+	struct ksock_peer *peer = route->ksnr_peer;
+	struct ksock_interface *iface;
+	struct ksock_conn *conn;
 	struct list_head *ctmp;
 	struct list_head *cnxt;
 
@@ -422,7 +418,7 @@ ksocknal_del_route_locked(ksock_route_t *route)
 
 	/* Close associated conns */
 	list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
-		conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
+		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
 
 		if (conn->ksnc_route != route)
 			continue;
@@ -455,10 +451,10 @@ int
 ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
 {
 	struct list_head *tmp;
-	ksock_peer_t *peer;
-	ksock_peer_t *peer2;
-	ksock_route_t *route;
-	ksock_route_t *route2;
+	struct ksock_peer *peer;
+	struct ksock_peer *peer2;
+	struct ksock_route *route;
+	struct ksock_route *route2;
 	int rc;
 
 	if (id.nid == LNET_NID_ANY ||
@@ -479,7 +475,7 @@ ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
 	write_lock_bh(&ksocknal_data.ksnd_global_lock);
 
 	/* always called with a ref on ni, so shutdown can't have started */
-	LASSERT(!((ksock_net_t *) ni->ni_data)->ksnn_shutdown);
+	LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
 
 	peer2 = ksocknal_find_peer_locked(ni, id);
 	if (peer2) {
@@ -493,7 +489,7 @@ ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
 
 	route2 = NULL;
 	list_for_each(tmp, &peer->ksnp_routes) {
-		route2 = list_entry(tmp, ksock_route_t, ksnr_list);
+		route2 = list_entry(tmp, struct ksock_route, ksnr_list);
 
 		if (route2->ksnr_ipaddr == ipaddr)
 			break;
@@ -514,10 +510,10 @@ ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
 }
 
 static void
-ksocknal_del_peer_locked(ksock_peer_t *peer, __u32 ip)
+ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip)
 {
-	ksock_conn_t *conn;
-	ksock_route_t *route;
+	struct ksock_conn *conn;
+	struct ksock_route *route;
 	struct list_head *tmp;
 	struct list_head *nxt;
 	int nshared;
@@ -528,7 +524,7 @@ ksocknal_del_peer_locked(ksock_peer_t *peer, __u32 ip)
 	ksocknal_peer_addref(peer);
 
 	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-		route = list_entry(tmp, ksock_route_t, ksnr_list);
+		route = list_entry(tmp, struct ksock_route, ksnr_list);
 
 		/* no match */
 		if (!(!ip || route->ksnr_ipaddr == ip))
@@ -541,7 +537,7 @@ ksocknal_del_peer_locked(ksock_peer_t *peer, __u32 ip)
 
 	nshared = 0;
 	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-		route = list_entry(tmp, ksock_route_t, ksnr_list);
+		route = list_entry(tmp, struct ksock_route, ksnr_list);
 		nshared += route->ksnr_share_count;
 	}
 
@@ -551,7 +547,7 @@ ksocknal_del_peer_locked(ksock_peer_t *peer, __u32 ip)
 		 * left
 		 */
 		list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-			route = list_entry(tmp, ksock_route_t, ksnr_list);
+			route = list_entry(tmp, struct ksock_route, ksnr_list);
 
 			/* we should only be removing auto-entries */
 			LASSERT(!route->ksnr_share_count);
@@ -559,7 +555,7 @@ ksocknal_del_peer_locked(ksock_peer_t *peer, __u32 ip)
 		}
 
 		list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
-			conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+			conn = list_entry(tmp, struct ksock_conn, ksnc_list);
 
 			ksocknal_close_conn_locked(conn, 0);
 		}
@@ -575,7 +571,7 @@ ksocknal_del_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
 	LIST_HEAD(zombies);
 	struct list_head *ptmp;
 	struct list_head *pnxt;
-	ksock_peer_t *peer;
+	struct ksock_peer *peer;
 	int lo;
 	int hi;
 	int i;
@@ -593,7 +589,7 @@ ksocknal_del_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
 
 	for (i = lo; i <= hi; i++) {
 		list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
 
 			if (peer->ksnp_ni != ni)
 				continue;
@@ -628,12 +624,12 @@ ksocknal_del_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
 	return rc;
 }
 
-static ksock_conn_t *
+static struct ksock_conn *
 ksocknal_get_conn_by_idx(lnet_ni_t *ni, int index)
 {
-	ksock_peer_t *peer;
+	struct ksock_peer *peer;
 	struct list_head *ptmp;
-	ksock_conn_t *conn;
+	struct ksock_conn *conn;
 	struct list_head *ctmp;
 	int i;
 
@@ -641,7 +637,7 @@ ksocknal_get_conn_by_idx(lnet_ni_t *ni, int index)
 
 	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
 		list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
 
 			LASSERT(!peer->ksnp_closing);
 
@@ -652,7 +648,7 @@ ksocknal_get_conn_by_idx(lnet_ni_t *ni, int index)
 				if (index-- > 0)
 					continue;
 
-				conn = list_entry(ctmp, ksock_conn_t,
+				conn = list_entry(ctmp, struct ksock_conn,
 						  ksnc_list);
 				ksocknal_conn_addref(conn);
 				read_unlock(&ksocknal_data.ksnd_global_lock);
@@ -665,11 +661,11 @@ ksocknal_get_conn_by_idx(lnet_ni_t *ni, int index)
 	return NULL;
 }
 
-static ksock_sched_t *
+static struct ksock_sched *
 ksocknal_choose_scheduler_locked(unsigned int cpt)
 {
 	struct ksock_sched_info	*info = ksocknal_data.ksnd_sched_info[cpt];
-	ksock_sched_t *sched;
+	struct ksock_sched *sched;
 	int i;
 
 	LASSERT(info->ksi_nthreads > 0);
@@ -691,7 +687,7 @@ ksocknal_choose_scheduler_locked(unsigned int cpt)
 static int
 ksocknal_local_ipvec(lnet_ni_t *ni, __u32 *ipaddrs)
 {
-	ksock_net_t *net = ni->ni_data;
+	struct ksock_net *net = ni->ni_data;
 	int i;
 	int nip;
 
@@ -719,7 +715,7 @@ ksocknal_local_ipvec(lnet_ni_t *ni, __u32 *ipaddrs)
 }
 
 static int
-ksocknal_match_peerip(ksock_interface_t *iface, __u32 *ips, int nips)
+ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips)
 {
 	int best_netmatch = 0;
 	int best_xor      = 0;
@@ -751,12 +747,12 @@ ksocknal_match_peerip(ksock_interface_t *iface, __u32 *ips, int nips)
 }
 
 static int
-ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
+ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips)
 {
 	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
-	ksock_net_t *net = peer->ksnp_ni->ni_data;
-	ksock_interface_t *iface;
-	ksock_interface_t *best_iface;
+	struct ksock_net *net = peer->ksnp_ni->ni_data;
+	struct ksock_interface *iface;
+	struct ksock_interface *best_iface;
 	int n_ips;
 	int i;
 	int j;
@@ -862,17 +858,17 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
 }
 
 static void
-ksocknal_create_routes(ksock_peer_t *peer, int port,
+ksocknal_create_routes(struct ksock_peer *peer, int port,
 		       __u32 *peer_ipaddrs, int npeer_ipaddrs)
 {
-	ksock_route_t *newroute = NULL;
+	struct ksock_route *newroute = NULL;
 	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
 	lnet_ni_t *ni = peer->ksnp_ni;
-	ksock_net_t *net = ni->ni_data;
+	struct ksock_net *net = ni->ni_data;
 	struct list_head *rtmp;
-	ksock_route_t *route;
-	ksock_interface_t *iface;
-	ksock_interface_t *best_iface;
+	struct ksock_route *route;
+	struct ksock_interface *iface;
+	struct ksock_interface *best_iface;
 	int best_netmatch;
 	int this_netmatch;
 	int best_nroutes;
@@ -919,7 +915,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port,
 		/* Already got a route? */
 		route = NULL;
 		list_for_each(rtmp, &peer->ksnp_routes) {
-			route = list_entry(rtmp, ksock_route_t, ksnr_list);
+			route = list_entry(rtmp, struct ksock_route, ksnr_list);
 
 			if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
 				break;
@@ -941,7 +937,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port,
 
 			/* Using this interface already? */
 			list_for_each(rtmp, &peer->ksnp_routes) {
-				route = list_entry(rtmp, ksock_route_t,
+				route = list_entry(rtmp, struct ksock_route,
 						   ksnr_list);
 
 				if (route->ksnr_myipaddr == iface->ksni_ipaddr)
@@ -985,7 +981,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port,
 int
 ksocknal_accept(lnet_ni_t *ni, struct socket *sock)
 {
-	ksock_connreq_t *cr;
+	struct ksock_connreq *cr;
 	int rc;
 	__u32 peer_ip;
 	int peer_port;
@@ -1014,9 +1010,9 @@ ksocknal_accept(lnet_ni_t *ni, struct socket *sock)
 }
 
 static int
-ksocknal_connecting(ksock_peer_t *peer, __u32 ipaddr)
+ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr)
 {
-	ksock_route_t *route;
+	struct ksock_route *route;
 
 	list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
 		if (route->ksnr_ipaddr == ipaddr)
@@ -1026,7 +1022,7 @@ ksocknal_connecting(ksock_peer_t *peer, __u32 ipaddr)
 }
 
 int
-ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
+ksocknal_create_conn(lnet_ni_t *ni, struct ksock_route *route,
 		     struct socket *sock, int type)
 {
 	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
@@ -1034,15 +1030,15 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
 	lnet_process_id_t peerid;
 	struct list_head *tmp;
 	__u64 incarnation;
-	ksock_conn_t *conn;
-	ksock_conn_t *conn2;
-	ksock_peer_t *peer = NULL;
-	ksock_peer_t *peer2;
-	ksock_sched_t *sched;
+	struct ksock_conn *conn;
+	struct ksock_conn *conn2;
+	struct ksock_peer *peer = NULL;
+	struct ksock_peer *peer2;
+	struct ksock_sched *sched;
 	ksock_hello_msg_t *hello;
 	int cpt;
-	ksock_tx_t *tx;
-	ksock_tx_t *txtmp;
+	struct ksock_tx *tx;
+	struct ksock_tx *txtmp;
 	int rc;
 	int active;
 	char *warn = NULL;
@@ -1150,7 +1146,7 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
 		write_lock_bh(global_lock);
 
 		/* called with a ref on ni, so shutdown can't have started */
-		LASSERT(!((ksock_net_t *) ni->ni_data)->ksnn_shutdown);
+		LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
 
 		peer2 = ksocknal_find_peer_locked(ni, peerid);
 		if (!peer2) {
@@ -1233,7 +1229,7 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
 	 */
 	if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
 		list_for_each(tmp, &peer->ksnp_conns) {
-			conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
+			conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
 
 			if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
 			    conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
@@ -1273,7 +1269,7 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
 	 * continually create duplicate routes.
 	 */
 	list_for_each(tmp, &peer->ksnp_routes) {
-		route = list_entry(tmp, ksock_route_t, ksnr_list);
+		route = list_entry(tmp, struct ksock_route, ksnr_list);
 
 		if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
 			continue;
@@ -1432,16 +1428,16 @@ failed_0:
 }
 
 void
-ksocknal_close_conn_locked(ksock_conn_t *conn, int error)
+ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
 {
 	/*
 	 * This just does the immmediate housekeeping, and queues the
 	 * connection for the reaper to terminate.
 	 * Caller holds ksnd_global_lock exclusively in irq context
 	 */
-	ksock_peer_t *peer = conn->ksnc_peer;
-	ksock_route_t *route;
-	ksock_conn_t *conn2;
+	struct ksock_peer *peer = conn->ksnc_peer;
+	struct ksock_route *route;
+	struct ksock_conn *conn2;
 	struct list_head *tmp;
 
 	LASSERT(!peer->ksnp_error);
@@ -1459,7 +1455,7 @@ ksocknal_close_conn_locked(ksock_conn_t *conn, int error)
 
 		conn2 = NULL;
 		list_for_each(tmp, &peer->ksnp_conns) {
-			conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
+			conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
 
 			if (conn2->ksnc_route == route &&
 			    conn2->ksnc_type == conn->ksnc_type)
@@ -1484,7 +1480,7 @@ ksocknal_close_conn_locked(ksock_conn_t *conn, int error)
 		/* No more connections to this peer */
 
 		if (!list_empty(&peer->ksnp_tx_queue)) {
-			ksock_tx_t *tx;
+			struct ksock_tx *tx;
 
 			LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
 
@@ -1524,7 +1520,7 @@ ksocknal_close_conn_locked(ksock_conn_t *conn, int error)
 }
 
 void
-ksocknal_peer_failed(ksock_peer_t *peer)
+ksocknal_peer_failed(struct ksock_peer *peer)
 {
 	int notify = 0;
 	unsigned long last_alive = 0;
@@ -1552,12 +1548,12 @@ ksocknal_peer_failed(ksock_peer_t *peer)
 }
 
 void
-ksocknal_finalize_zcreq(ksock_conn_t *conn)
+ksocknal_finalize_zcreq(struct ksock_conn *conn)
 {
-	ksock_peer_t *peer = conn->ksnc_peer;
-	ksock_tx_t *tx;
-	ksock_tx_t *temp;
-	ksock_tx_t *tmp;
+	struct ksock_peer *peer = conn->ksnc_peer;
+	struct ksock_tx *tx;
+	struct ksock_tx *temp;
+	struct ksock_tx *tmp;
 	LIST_HEAD(zlist);
 
 	/*
@@ -1589,7 +1585,7 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn)
 }
 
 void
-ksocknal_terminate_conn(ksock_conn_t *conn)
+ksocknal_terminate_conn(struct ksock_conn *conn)
 {
 	/*
 	 * This gets called by the reaper (guaranteed thread context) to
@@ -1597,8 +1593,8 @@ ksocknal_terminate_conn(ksock_conn_t *conn)
 	 * ksnc_refcount will eventually hit zero, and then the reaper will
 	 * destroy it.
 	 */
-	ksock_peer_t *peer = conn->ksnc_peer;
-	ksock_sched_t *sched = conn->ksnc_scheduler;
+	struct ksock_peer *peer = conn->ksnc_peer;
+	struct ksock_sched *sched = conn->ksnc_scheduler;
 	int failed = 0;
 
 	LASSERT(conn->ksnc_closing);
@@ -1656,7 +1652,7 @@ ksocknal_terminate_conn(ksock_conn_t *conn)
 }
 
 void
-ksocknal_queue_zombie_conn(ksock_conn_t *conn)
+ksocknal_queue_zombie_conn(struct ksock_conn *conn)
 {
 	/* Queue the conn for the reaper to destroy */
 
@@ -1670,7 +1666,7 @@ ksocknal_queue_zombie_conn(ksock_conn_t *conn)
 }
 
 void
-ksocknal_destroy_conn(ksock_conn_t *conn)
+ksocknal_destroy_conn(struct ksock_conn *conn)
 {
 	unsigned long last_rcv;
 
@@ -1730,15 +1726,15 @@ ksocknal_destroy_conn(ksock_conn_t *conn)
 }
 
 int
-ksocknal_close_peer_conns_locked(ksock_peer_t *peer, __u32 ipaddr, int why)
+ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why)
 {
-	ksock_conn_t *conn;
+	struct ksock_conn *conn;
 	struct list_head *ctmp;
 	struct list_head *cnxt;
 	int count = 0;
 
 	list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
-		conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
+		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
 
 		if (!ipaddr || conn->ksnc_ipaddr == ipaddr) {
 			count++;
@@ -1750,9 +1746,9 @@ ksocknal_close_peer_conns_locked(ksock_peer_t *peer, __u32 ipaddr, int why)
 }
 
 int
-ksocknal_close_conn_and_siblings(ksock_conn_t *conn, int why)
+ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
 {
-	ksock_peer_t *peer = conn->ksnc_peer;
+	struct ksock_peer *peer = conn->ksnc_peer;
 	__u32 ipaddr = conn->ksnc_ipaddr;
 	int count;
 
@@ -1768,7 +1764,7 @@ ksocknal_close_conn_and_siblings(ksock_conn_t *conn, int why)
 int
 ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr)
 {
-	ksock_peer_t *peer;
+	struct ksock_peer *peer;
 	struct list_head *ptmp;
 	struct list_head *pnxt;
 	int lo;
@@ -1789,7 +1785,7 @@ ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr)
 	for (i = lo; i <= hi; i++) {
 		list_for_each_safe(ptmp, pnxt,
 				   &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
 
 			if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
 			      (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
@@ -1844,7 +1840,7 @@ ksocknal_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
 	int connect = 1;
 	unsigned long last_alive = 0;
 	unsigned long now = cfs_time_current();
-	ksock_peer_t *peer = NULL;
+	struct ksock_peer *peer = NULL;
 	rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
 	lnet_process_id_t id = {
 		.nid = nid,
@@ -1856,11 +1852,11 @@ ksocknal_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
 	peer = ksocknal_find_peer_locked(ni, id);
 	if (peer) {
 		struct list_head *tmp;
-		ksock_conn_t *conn;
+		struct ksock_conn *conn;
 		int bufnob;
 
 		list_for_each(tmp, &peer->ksnp_conns) {
-			conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+			conn = list_entry(tmp, struct ksock_conn, ksnc_list);
 			bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
 
 			if (bufnob < conn->ksnc_tx_bufnob) {
@@ -1902,12 +1898,12 @@ ksocknal_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
 }
 
 static void
-ksocknal_push_peer(ksock_peer_t *peer)
+ksocknal_push_peer(struct ksock_peer *peer)
 {
 	int index;
 	int i;
 	struct list_head *tmp;
-	ksock_conn_t *conn;
+	struct ksock_conn *conn;
 
 	for (index = 0; ; index++) {
 		read_lock(&ksocknal_data.ksnd_global_lock);
@@ -1917,7 +1913,7 @@ ksocknal_push_peer(ksock_peer_t *peer)
 
 		list_for_each(tmp, &peer->ksnp_conns) {
 			if (i++ == index) {
-				conn = list_entry(tmp, ksock_conn_t,
+				conn = list_entry(tmp, struct ksock_conn,
 						  ksnc_list);
 				ksocknal_conn_addref(conn);
 				break;
@@ -1954,7 +1950,7 @@ static int ksocknal_push(lnet_ni_t *ni, lnet_process_id_t id)
 		int peer_off; /* searching offset in peer hash table */
 
 		for (peer_off = 0; ; peer_off++) {
-			ksock_peer_t *peer;
+			struct ksock_peer *peer;
 			int i = 0;
 
 			read_lock(&ksocknal_data.ksnd_global_lock);
@@ -1986,15 +1982,15 @@ static int ksocknal_push(lnet_ni_t *ni, lnet_process_id_t id)
 static int
 ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
 {
-	ksock_net_t *net = ni->ni_data;
-	ksock_interface_t *iface;
+	struct ksock_net *net = ni->ni_data;
+	struct ksock_interface *iface;
 	int rc;
 	int i;
 	int j;
 	struct list_head *ptmp;
-	ksock_peer_t *peer;
+	struct ksock_peer *peer;
 	struct list_head *rtmp;
-	ksock_route_t *route;
+	struct ksock_route *route;
 
 	if (!ipaddress || !netmask)
 		return -EINVAL;
@@ -2017,7 +2013,7 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
 
 		for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
 			list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
-				peer = list_entry(ptmp, ksock_peer_t,
+				peer = list_entry(ptmp, struct ksock_peer,
 						  ksnp_list);
 
 				for (j = 0; j < peer->ksnp_n_passive_ips; j++)
@@ -2025,7 +2021,7 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
 						iface->ksni_npeers++;
 
 				list_for_each(rtmp, &peer->ksnp_routes) {
-					route = list_entry(rtmp, ksock_route_t,
+					route = list_entry(rtmp, struct ksock_route,
 							   ksnr_list);
 
 					if (route->ksnr_myipaddr == ipaddress)
@@ -2044,12 +2040,12 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
 }
 
 static void
-ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
+ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr)
 {
 	struct list_head *tmp;
 	struct list_head *nxt;
-	ksock_route_t *route;
-	ksock_conn_t *conn;
+	struct ksock_route *route;
+	struct ksock_conn *conn;
 	int i;
 	int j;
 
@@ -2063,7 +2059,7 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
 		}
 
 	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-		route = list_entry(tmp, ksock_route_t, ksnr_list);
+		route = list_entry(tmp, struct ksock_route, ksnr_list);
 
 		if (route->ksnr_myipaddr != ipaddr)
 			continue;
@@ -2077,7 +2073,7 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
 	}
 
 	list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
-		conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+		conn = list_entry(tmp, struct ksock_conn, ksnc_list);
 
 		if (conn->ksnc_myipaddr == ipaddr)
 			ksocknal_close_conn_locked(conn, 0);
@@ -2087,11 +2083,11 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
 static int
 ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
 {
-	ksock_net_t *net = ni->ni_data;
+	struct ksock_net *net = ni->ni_data;
 	int rc = -ENOENT;
 	struct list_head *tmp;
 	struct list_head *nxt;
-	ksock_peer_t *peer;
+	struct ksock_peer *peer;
 	__u32 this_ip;
 	int i;
 	int j;
@@ -2115,7 +2111,7 @@ ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
 		for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
 			list_for_each_safe(tmp, nxt,
 					   &ksocknal_data.ksnd_peers[j]) {
-				peer = list_entry(tmp, ksock_peer_t, ksnp_list);
+				peer = list_entry(tmp, struct ksock_peer, ksnp_list);
 
 				if (peer->ksnp_ni != ni)
 					continue;
@@ -2139,8 +2135,8 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
 
 	switch (cmd) {
 	case IOC_LIBCFS_GET_INTERFACE: {
-		ksock_net_t       *net = ni->ni_data;
-		ksock_interface_t *iface;
+		struct ksock_net       *net = ni->ni_data;
+		struct ksock_interface *iface;
 
 		read_lock(&ksocknal_data.ksnd_global_lock);
 
@@ -2209,7 +2205,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
 		int txmem;
 		int rxmem;
 		int nagle;
-		ksock_conn_t *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
+		struct ksock_conn *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
 
 		if (!conn)
 			return -ENOENT;
@@ -2284,8 +2280,8 @@ ksocknal_free_buffers(void)
 
 	if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
 		struct list_head zlist;
-		ksock_tx_t *tx;
-		ksock_tx_t *temp;
+		struct ksock_tx *tx;
+		struct ksock_tx *temp;
 
 		list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
 		list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
@@ -2304,7 +2300,7 @@ static void
 ksocknal_base_shutdown(void)
 {
 	struct ksock_sched_info *info;
-	ksock_sched_t *sched;
+	struct ksock_sched *sched;
 	int i;
 	int j;
 
@@ -2446,7 +2442,7 @@ ksocknal_base_startup(void)
 		goto failed;
 
 	cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
-		ksock_sched_t *sched;
+		struct ksock_sched *sched;
 		int nthrs;
 
 		nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
@@ -2534,7 +2530,7 @@ ksocknal_base_startup(void)
 static void
 ksocknal_debug_peerhash(lnet_ni_t *ni)
 {
-	ksock_peer_t *peer = NULL;
+	struct ksock_peer *peer = NULL;
 	struct list_head *tmp;
 	int i;
 
@@ -2542,7 +2538,7 @@ ksocknal_debug_peerhash(lnet_ni_t *ni)
 
 	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
 		list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(tmp, ksock_peer_t, ksnp_list);
+			peer = list_entry(tmp, struct ksock_peer, ksnp_list);
 
 			if (peer->ksnp_ni == ni)
 				break;
@@ -2552,8 +2548,8 @@ ksocknal_debug_peerhash(lnet_ni_t *ni)
 	}
 
 	if (peer) {
-		ksock_route_t *route;
-		ksock_conn_t  *conn;
+		struct ksock_route *route;
+		struct ksock_conn  *conn;
 
 		CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
 		      libcfs_id2str(peer->ksnp_id),
@@ -2565,7 +2561,7 @@ ksocknal_debug_peerhash(lnet_ni_t *ni)
 		      !list_empty(&peer->ksnp_zc_req_list));
 
 		list_for_each(tmp, &peer->ksnp_routes) {
-			route = list_entry(tmp, ksock_route_t, ksnr_list);
+			route = list_entry(tmp, struct ksock_route, ksnr_list);
 			CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
 			      atomic_read(&route->ksnr_refcount),
 			      route->ksnr_scheduled, route->ksnr_connecting,
@@ -2573,7 +2569,7 @@ ksocknal_debug_peerhash(lnet_ni_t *ni)
 		}
 
 		list_for_each(tmp, &peer->ksnp_conns) {
-			conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+			conn = list_entry(tmp, struct ksock_conn, ksnc_list);
 			CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
 			      atomic_read(&conn->ksnc_conn_refcount),
 			      atomic_read(&conn->ksnc_sock_refcount),
@@ -2587,7 +2583,7 @@ ksocknal_debug_peerhash(lnet_ni_t *ni)
 void
 ksocknal_shutdown(lnet_ni_t *ni)
 {
-	ksock_net_t *net = ni->ni_data;
+	struct ksock_net *net = ni->ni_data;
 	int i;
 	lnet_process_id_t anyid = {0};
 
@@ -2637,7 +2633,7 @@ ksocknal_shutdown(lnet_ni_t *ni)
 }
 
 static int
-ksocknal_enumerate_interfaces(ksock_net_t *net)
+ksocknal_enumerate_interfaces(struct ksock_net *net)
 {
 	char **names;
 	int i;
@@ -2694,7 +2690,7 @@ ksocknal_enumerate_interfaces(ksock_net_t *net)
 }
 
 static int
-ksocknal_search_new_ipif(ksock_net_t *net)
+ksocknal_search_new_ipif(struct ksock_net *net)
 {
 	int new_ipif = 0;
 	int i;
@@ -2703,7 +2699,7 @@ ksocknal_search_new_ipif(ksock_net_t *net)
 		char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
 		char *colon = strchr(ifnam, ':');
 		int found  = 0;
-		ksock_net_t *tmp;
+		struct ksock_net *tmp;
 		int j;
 
 		if (colon) /* ignore alias device */
@@ -2760,7 +2756,7 @@ ksocknal_start_schedulers(struct ksock_sched_info *info)
 	for (i = 0; i < nthrs; i++) {
 		long id;
 		char name[20];
-		ksock_sched_t *sched;
+		struct ksock_sched *sched;
 
 		id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
 		sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
@@ -2782,7 +2778,7 @@ ksocknal_start_schedulers(struct ksock_sched_info *info)
 }
 
 static int
-ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts)
+ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
 {
 	int newif = ksocknal_search_new_ipif(net);
 	int rc;
@@ -2810,7 +2806,7 @@ ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts)
 int
 ksocknal_startup(lnet_ni_t *ni)
 {
-	ksock_net_t *net;
+	struct ksock_net *net;
 	int rc;
 	int i;
 
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
index a60d72f9432f..a56632b4ee37 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -77,8 +77,7 @@
 
 struct ksock_sched_info;
 
-typedef struct				  /* per scheduler state */
-{
+struct ksock_sched {				/* per scheduler state */
 	spinlock_t              kss_lock;       /* serialise */
 	struct list_head        kss_rx_conns;   /* conn waiting to be read */
 	struct list_head        kss_tx_conns;   /* conn waiting to be written */
@@ -89,13 +88,13 @@ typedef struct				  /* per scheduler state */
 	struct ksock_sched_info *kss_info;	/* owner of it */
 	struct page             *kss_rx_scratch_pgs[LNET_MAX_IOV];
 	struct kvec             kss_scratch_iov[LNET_MAX_IOV];
-} ksock_sched_t;
+};
 
 struct ksock_sched_info {
 	int                     ksi_nthreads_max; /* max allowed threads */
 	int                     ksi_nthreads;     /* number of threads */
 	int                     ksi_cpt;          /* CPT id */
-	ksock_sched_t           *ksi_scheds;      /* array of schedulers */
+	struct ksock_sched	*ksi_scheds;	  /* array of schedulers */
 };
 
 #define KSOCK_CPT_SHIFT           16
@@ -103,16 +102,15 @@ struct ksock_sched_info {
 #define KSOCK_THREAD_CPT(id)      ((id) >> KSOCK_CPT_SHIFT)
 #define KSOCK_THREAD_SID(id)      ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
 
-typedef struct                                  /* in-use interface */
-{
+struct ksock_interface {			/* in-use interface */
 	__u32		ksni_ipaddr;		/* interface's IP address */
 	__u32		ksni_netmask;		/* interface's network mask */
 	int		ksni_nroutes;		/* # routes using (active) */
 	int		ksni_npeers;		/* # peers using (passive) */
 	char		ksni_name[IFNAMSIZ];	/* interface name */
-} ksock_interface_t;
+};
 
-typedef struct {
+struct ksock_tunables {
 	int          *ksnd_timeout;            /* "stuck" socket timeout
 						* (seconds) */
 	int          *ksnd_nscheds;            /* # scheduler threads in each
@@ -155,24 +153,24 @@ typedef struct {
 						* Chelsio TOE) */
 	int          *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to
 						* enable ZC receive */
-} ksock_tunables_t;
+};
 
-typedef struct {
+struct ksock_net {
 	__u64		  ksnn_incarnation;	/* my epoch */
 	spinlock_t	  ksnn_lock;		/* serialise */
 	struct list_head	  ksnn_list;		/* chain on global list */
 	int		  ksnn_npeers;		/* # peers */
 	int		  ksnn_shutdown;	/* shutting down? */
 	int		  ksnn_ninterfaces;	/* IP interfaces */
-	ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES];
-} ksock_net_t;
+	struct ksock_interface ksnn_interfaces[LNET_MAX_INTERFACES];
+};
 
 /** connd timeout */
 #define SOCKNAL_CONND_TIMEOUT  120
 /** reserved thread for accepting & creating new connd */
 #define SOCKNAL_CONND_RESV     1
 
-typedef struct {
+struct ksock_nal_data {
 	int                     ksnd_init;              /* initialisation state
 							 */
 	int                     ksnd_nnets;             /* # networks set up */
@@ -229,7 +227,7 @@ typedef struct {
 	spinlock_t              ksnd_tx_lock;           /* serialise, g_lock
 							 * unsafe */
 
-} ksock_nal_data_t;
+};
 
 #define SOCKNAL_INIT_NOTHING 0
 #define SOCKNAL_INIT_DATA    1
@@ -250,8 +248,7 @@ struct ksock_peer;  /* forward ref */
 struct ksock_route; /* forward ref */
 struct ksock_proto; /* forward ref */
 
-typedef struct                             /* transmit packet */
-{
+struct ksock_tx {			   /* transmit packet */
 	struct list_head  tx_list;         /* queue on conn for transmission etc
 					    */
 	struct list_head  tx_zc_list;      /* queue on peer for ZC request */
@@ -281,20 +278,20 @@ typedef struct                             /* transmit packet */
 			struct kvec iov[1];  /* virt hdr + payload */
 		} virt;
 	} tx_frags;
-} ksock_tx_t;
+};
 
-#define KSOCK_NOOP_TX_SIZE (offsetof(ksock_tx_t, tx_frags.paged.kiov[0]))
+#define KSOCK_NOOP_TX_SIZE (offsetof(struct ksock_tx, tx_frags.paged.kiov[0]))
 
-/* network zero copy callback descriptor embedded in ksock_tx_t */
+/* network zero copy callback descriptor embedded in struct ksock_tx */
 
 /*
  * space for the rx frag descriptors; we either read a single contiguous
  * header, or up to LNET_MAX_IOV frags of payload of either type.
  */
-typedef union {
+union ksock_rxiovspace {
 	struct kvec      iov[LNET_MAX_IOV];
 	lnet_kiov_t      kiov[LNET_MAX_IOV];
-} ksock_rxiovspace_t;
+};
 
 #define SOCKNAL_RX_KSM_HEADER   1 /* reading ksock message header */
 #define SOCKNAL_RX_LNET_HEADER  2 /* reading lnet message header */
@@ -303,7 +300,7 @@ typedef union {
 #define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
 #define SOCKNAL_RX_SLOP         6 /* skipping body */
 
-typedef struct ksock_conn {
+struct ksock_conn {
 	struct ksock_peer  *ksnc_peer;        /* owning peer */
 	struct ksock_route *ksnc_route;       /* owning route */
 	struct list_head   ksnc_list;         /* stash on peer's conn list */
@@ -314,8 +311,8 @@ typedef struct ksock_conn {
 						     * write_space() callback */
 	atomic_t           ksnc_conn_refcount;/* conn refcount */
 	atomic_t           ksnc_sock_refcount;/* sock refcount */
-	ksock_sched_t      *ksnc_scheduler;   /* who schedules this connection
-					       */
+	struct ksock_sched *ksnc_scheduler;	/* who schedules this connection
+					         */
 	__u32              ksnc_myipaddr;     /* my IP */
 	__u32              ksnc_ipaddr;       /* peer's IP */
 	int                ksnc_port;         /* peer's port */
@@ -341,7 +338,7 @@ typedef struct ksock_conn {
 	struct kvec        *ksnc_rx_iov;      /* the iovec frags */
 	int                ksnc_rx_nkiov;     /* # page frags */
 	lnet_kiov_t        *ksnc_rx_kiov;     /* the page frags */
-	ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */
+	union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */
 	__u32              ksnc_rx_csum;      /* partial checksum for incoming
 					       * data */
 	void               *ksnc_cookie;      /* rx lnet_finalize passthru arg
@@ -357,7 +354,7 @@ typedef struct ksock_conn {
 	struct list_head   ksnc_tx_list;      /* where I enq waiting for output
 					       * space */
 	struct list_head   ksnc_tx_queue;     /* packets waiting to be sent */
-	ksock_tx_t         *ksnc_tx_carrier;  /* next TX that can carry a LNet
+	struct ksock_tx         *ksnc_tx_carrier;  /* next TX that can carry a LNet
 					       * message or ZC-ACK */
 	unsigned long      ksnc_tx_deadline;  /* when (in jiffies) tx times out
 					       */
@@ -367,9 +364,9 @@ typedef struct ksock_conn {
 	int		   ksnc_tx_scheduled; /* being progressed */
 	unsigned long      ksnc_tx_last_post; /* time stamp of the last posted
 					       * TX */
-} ksock_conn_t;
+};
 
-typedef struct ksock_route {
+struct ksock_route {
 	struct list_head  ksnr_list;           /* chain on peer route list */
 	struct list_head  ksnr_connd_list;     /* chain on ksnr_connd_routes */
 	struct ksock_peer *ksnr_peer;          /* owning peer */
@@ -389,11 +386,11 @@ typedef struct ksock_route {
 	unsigned int      ksnr_share_count;    /* created explicitly? */
 	int               ksnr_conn_count;     /* # conns established by this
 						* route */
-} ksock_route_t;
+};
 
 #define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
 
-typedef struct ksock_peer {
+struct ksock_peer {
 	struct list_head   ksnp_list;           /* stash on global peer list */
 	unsigned long      ksnp_last_alive;     /* when (in jiffies) I was last
 						 * alive */
@@ -420,49 +417,49 @@ typedef struct ksock_peer {
 
 	/* preferred local interfaces */
 	__u32              ksnp_passive_ips[LNET_MAX_INTERFACES];
-} ksock_peer_t;
+};
 
-typedef struct ksock_connreq {
+struct ksock_connreq {
 	struct list_head ksncr_list;  /* stash on ksnd_connd_connreqs */
 	lnet_ni_t        *ksncr_ni;   /* chosen NI */
 	struct socket    *ksncr_sock; /* accepted socket */
-} ksock_connreq_t;
+};
 
-extern ksock_nal_data_t ksocknal_data;
-extern ksock_tunables_t ksocknal_tunables;
+extern struct ksock_nal_data ksocknal_data;
+extern struct ksock_tunables ksocknal_tunables;
 
 #define SOCKNAL_MATCH_NO  0 /* TX can't match type of connection */
 #define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
 #define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not
 			     * preferred */
 
-typedef struct ksock_proto {
+struct ksock_proto {
 	/* version number of protocol */
 	int        pro_version;
 
 	/* handshake function */
-	int        (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *);
+	int        (*pro_send_hello)(struct ksock_conn *, ksock_hello_msg_t *);
 
 	/* handshake function */
-	int        (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);
+	int        (*pro_recv_hello)(struct ksock_conn *, ksock_hello_msg_t *, int);
 
 	/* message pack */
-	void       (*pro_pack)(ksock_tx_t *);
+	void       (*pro_pack)(struct ksock_tx *);
 
 	/* message unpack */
 	void       (*pro_unpack)(ksock_msg_t *);
 
 	/* queue tx on the connection */
-	ksock_tx_t *(*pro_queue_tx_msg)(ksock_conn_t *, ksock_tx_t *);
+	struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *);
 
 	/* queue ZC ack on the connection */
-	int        (*pro_queue_tx_zcack)(ksock_conn_t *, ksock_tx_t *, __u64);
+	int        (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64);
 
 	/* handle ZC request */
-	int        (*pro_handle_zcreq)(ksock_conn_t *, __u64, int);
+	int        (*pro_handle_zcreq)(struct ksock_conn *, __u64, int);
 
 	/* handle ZC ACK */
-	int        (*pro_handle_zcack)(ksock_conn_t *, __u64, __u64);
+	int        (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64);
 
 	/*
 	 * msg type matches the connection type:
@@ -471,12 +468,12 @@ typedef struct ksock_proto {
 	 *   return MATCH_YES : matching type
 	 *   return MATCH_MAY : can be backup
 	 */
-	int        (*pro_match_tx)(ksock_conn_t *, ksock_tx_t *, int);
-} ksock_proto_t;
+	int        (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int);
+};
 
-extern ksock_proto_t ksocknal_protocol_v1x;
-extern ksock_proto_t ksocknal_protocol_v2x;
-extern ksock_proto_t ksocknal_protocol_v3x;
+extern struct ksock_proto ksocknal_protocol_v1x;
+extern struct ksock_proto ksocknal_protocol_v2x;
+extern struct ksock_proto ksocknal_protocol_v3x;
 
 #define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
 #define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
@@ -517,17 +514,17 @@ ksocknal_nid2peerlist(lnet_nid_t nid)
 }
 
 static inline void
-ksocknal_conn_addref(ksock_conn_t *conn)
+ksocknal_conn_addref(struct ksock_conn *conn)
 {
 	LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
 	atomic_inc(&conn->ksnc_conn_refcount);
 }
 
-void ksocknal_queue_zombie_conn(ksock_conn_t *conn);
-void ksocknal_finalize_zcreq(ksock_conn_t *conn);
+void ksocknal_queue_zombie_conn(struct ksock_conn *conn);
+void ksocknal_finalize_zcreq(struct ksock_conn *conn);
 
 static inline void
-ksocknal_conn_decref(ksock_conn_t *conn)
+ksocknal_conn_decref(struct ksock_conn *conn)
 {
 	LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
 	if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
@@ -535,7 +532,7 @@ ksocknal_conn_decref(ksock_conn_t *conn)
 }
 
 static inline int
-ksocknal_connsock_addref(ksock_conn_t *conn)
+ksocknal_connsock_addref(struct ksock_conn *conn)
 {
 	int rc = -ESHUTDOWN;
 
@@ -551,7 +548,7 @@ ksocknal_connsock_addref(ksock_conn_t *conn)
 }
 
 static inline void
-ksocknal_connsock_decref(ksock_conn_t *conn)
+ksocknal_connsock_decref(struct ksock_conn *conn)
 {
 	LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
 	if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
@@ -563,17 +560,17 @@ ksocknal_connsock_decref(ksock_conn_t *conn)
 }
 
 static inline void
-ksocknal_tx_addref(ksock_tx_t *tx)
+ksocknal_tx_addref(struct ksock_tx *tx)
 {
 	LASSERT(atomic_read(&tx->tx_refcount) > 0);
 	atomic_inc(&tx->tx_refcount);
 }
 
-void ksocknal_tx_prep(ksock_conn_t *, ksock_tx_t *tx);
-void ksocknal_tx_done(lnet_ni_t *ni, ksock_tx_t *tx);
+void ksocknal_tx_prep(struct ksock_conn *, struct ksock_tx *tx);
+void ksocknal_tx_done(lnet_ni_t *ni, struct ksock_tx *tx);
 
 static inline void
-ksocknal_tx_decref(ksock_tx_t *tx)
+ksocknal_tx_decref(struct ksock_tx *tx)
 {
 	LASSERT(atomic_read(&tx->tx_refcount) > 0);
 	if (atomic_dec_and_test(&tx->tx_refcount))
@@ -581,16 +578,16 @@ ksocknal_tx_decref(ksock_tx_t *tx)
 }
 
 static inline void
-ksocknal_route_addref(ksock_route_t *route)
+ksocknal_route_addref(struct ksock_route *route)
 {
 	LASSERT(atomic_read(&route->ksnr_refcount) > 0);
 	atomic_inc(&route->ksnr_refcount);
 }
 
-void ksocknal_destroy_route(ksock_route_t *route);
+void ksocknal_destroy_route(struct ksock_route *route);
 
 static inline void
-ksocknal_route_decref(ksock_route_t *route)
+ksocknal_route_decref(struct ksock_route *route)
 {
 	LASSERT(atomic_read(&route->ksnr_refcount) > 0);
 	if (atomic_dec_and_test(&route->ksnr_refcount))
@@ -598,16 +595,16 @@ ksocknal_route_decref(ksock_route_t *route)
 }
 
 static inline void
-ksocknal_peer_addref(ksock_peer_t *peer)
+ksocknal_peer_addref(struct ksock_peer *peer)
 {
 	LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
 	atomic_inc(&peer->ksnp_refcount);
 }
 
-void ksocknal_destroy_peer(ksock_peer_t *peer);
+void ksocknal_destroy_peer(struct ksock_peer *peer);
 
 static inline void
-ksocknal_peer_decref(ksock_peer_t *peer)
+ksocknal_peer_decref(struct ksock_peer *peer)
 {
 	LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
 	if (atomic_dec_and_test(&peer->ksnp_refcount))
@@ -625,71 +622,71 @@ int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
 int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
 
 int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
-ksock_peer_t *ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id);
-ksock_peer_t *ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id);
-void ksocknal_peer_failed(ksock_peer_t *peer);
-int ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
+struct ksock_peer *ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id);
+struct ksock_peer *ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id);
+void ksocknal_peer_failed(struct ksock_peer *peer);
+int ksocknal_create_conn(lnet_ni_t *ni, struct ksock_route *route,
 			 struct socket *sock, int type);
-void ksocknal_close_conn_locked(ksock_conn_t *conn, int why);
-void ksocknal_terminate_conn(ksock_conn_t *conn);
-void ksocknal_destroy_conn(ksock_conn_t *conn);
-int  ksocknal_close_peer_conns_locked(ksock_peer_t *peer,
+void ksocknal_close_conn_locked(struct ksock_conn *conn, int why);
+void ksocknal_terminate_conn(struct ksock_conn *conn);
+void ksocknal_destroy_conn(struct ksock_conn *conn);
+int  ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
 				      __u32 ipaddr, int why);
-int ksocknal_close_conn_and_siblings(ksock_conn_t *conn, int why);
+int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
 int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr);
-ksock_conn_t *ksocknal_find_conn_locked(ksock_peer_t *peer,
-					ksock_tx_t *tx, int nonblk);
+struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
+					struct ksock_tx *tx, int nonblk);
 
-int  ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx,
+int  ksocknal_launch_packet(lnet_ni_t *ni, struct ksock_tx *tx,
 			    lnet_process_id_t id);
-ksock_tx_t *ksocknal_alloc_tx(int type, int size);
-void ksocknal_free_tx(ksock_tx_t *tx);
-ksock_tx_t *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
-void ksocknal_next_tx_carrier(ksock_conn_t *conn);
-void ksocknal_queue_tx_locked(ksock_tx_t *tx, ksock_conn_t *conn);
+struct ksock_tx *ksocknal_alloc_tx(int type, int size);
+void ksocknal_free_tx(struct ksock_tx *tx);
+struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
+void ksocknal_next_tx_carrier(struct ksock_conn *conn);
+void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn);
 void ksocknal_txlist_done(lnet_ni_t *ni, struct list_head *txlist, int error);
 void ksocknal_notify(lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
 void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);
 int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
 void ksocknal_thread_fini(void);
-void ksocknal_launch_all_connections_locked(ksock_peer_t *peer);
-ksock_route_t *ksocknal_find_connectable_route_locked(ksock_peer_t *peer);
-ksock_route_t *ksocknal_find_connecting_route_locked(ksock_peer_t *peer);
-int ksocknal_new_packet(ksock_conn_t *conn, int skip);
+void ksocknal_launch_all_connections_locked(struct ksock_peer *peer);
+struct ksock_route *ksocknal_find_connectable_route_locked(struct ksock_peer *peer);
+struct ksock_route *ksocknal_find_connecting_route_locked(struct ksock_peer *peer);
+int ksocknal_new_packet(struct ksock_conn *conn, int skip);
 int ksocknal_scheduler(void *arg);
 int ksocknal_connd(void *arg);
 int ksocknal_reaper(void *arg);
-int ksocknal_send_hello(lnet_ni_t *ni, ksock_conn_t *conn,
+int ksocknal_send_hello(lnet_ni_t *ni, struct ksock_conn *conn,
 			lnet_nid_t peer_nid, ksock_hello_msg_t *hello);
-int ksocknal_recv_hello(lnet_ni_t *ni, ksock_conn_t *conn,
+int ksocknal_recv_hello(lnet_ni_t *ni, struct ksock_conn *conn,
 			ksock_hello_msg_t *hello, lnet_process_id_t *id,
 			__u64 *incarnation);
-void ksocknal_read_callback(ksock_conn_t *conn);
-void ksocknal_write_callback(ksock_conn_t *conn);
-
-int ksocknal_lib_zc_capable(ksock_conn_t *conn);
-void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn);
-void ksocknal_lib_set_callback(struct socket *sock,  ksock_conn_t *conn);
-void ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn);
-void ksocknal_lib_push_conn(ksock_conn_t *conn);
-int ksocknal_lib_get_conn_addrs(ksock_conn_t *conn);
+void ksocknal_read_callback(struct ksock_conn *conn);
+void ksocknal_write_callback(struct ksock_conn *conn);
+
+int ksocknal_lib_zc_capable(struct ksock_conn *conn);
+void ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn);
+void ksocknal_lib_set_callback(struct socket *sock,  struct ksock_conn *conn);
+void ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn);
+void ksocknal_lib_push_conn(struct ksock_conn *conn);
+int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn);
 int ksocknal_lib_setup_sock(struct socket *so);
-int ksocknal_lib_send_iov(ksock_conn_t *conn, ksock_tx_t *tx);
-int ksocknal_lib_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx);
-void ksocknal_lib_eager_ack(ksock_conn_t *conn);
-int ksocknal_lib_recv_iov(ksock_conn_t *conn);
-int ksocknal_lib_recv_kiov(ksock_conn_t *conn);
-int ksocknal_lib_get_conn_tunables(ksock_conn_t *conn, int *txmem,
+int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
+int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
+void ksocknal_lib_eager_ack(struct ksock_conn *conn);
+int ksocknal_lib_recv_iov(struct ksock_conn *conn);
+int ksocknal_lib_recv_kiov(struct ksock_conn *conn);
+int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
 				   int *rxmem, int *nagle);
 
-void ksocknal_read_callback(ksock_conn_t *conn);
-void ksocknal_write_callback(ksock_conn_t *conn);
+void ksocknal_read_callback(struct ksock_conn *conn);
+void ksocknal_write_callback(struct ksock_conn *conn);
 
 int ksocknal_tunables_init(void);
 
-void ksocknal_lib_csum_tx(ksock_tx_t *tx);
+void ksocknal_lib_csum_tx(struct ksock_tx *tx);
 
-int ksocknal_lib_memory_pressure(ksock_conn_t *conn);
+int ksocknal_lib_memory_pressure(struct ksock_conn *conn);
 int ksocknal_lib_bind_thread_to_cpu(int id);
 
 #endif /* _SOCKLND_SOCKLND_H_ */
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index 976fd78926e0..303576d815c6 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -23,10 +23,10 @@
 
 #include "socklnd.h"
 
-ksock_tx_t *
+struct ksock_tx *
 ksocknal_alloc_tx(int type, int size)
 {
-	ksock_tx_t *tx = NULL;
+	struct ksock_tx *tx = NULL;
 
 	if (type == KSOCK_MSG_NOOP) {
 		LASSERT(size == KSOCK_NOOP_TX_SIZE);
@@ -36,7 +36,7 @@ ksocknal_alloc_tx(int type, int size)
 
 		if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
 			tx = list_entry(ksocknal_data.ksnd_idle_noop_txs. \
-					    next, ksock_tx_t, tx_list);
+					    next, struct ksock_tx, tx_list);
 			LASSERT(tx->tx_desc_size == size);
 			list_del(&tx->tx_list);
 		}
@@ -61,10 +61,10 @@ ksocknal_alloc_tx(int type, int size)
 	return tx;
 }
 
-ksock_tx_t *
+struct ksock_tx *
 ksocknal_alloc_tx_noop(__u64 cookie, int nonblk)
 {
-	ksock_tx_t *tx;
+	struct ksock_tx *tx;
 
 	tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE);
 	if (!tx) {
@@ -87,7 +87,7 @@ ksocknal_alloc_tx_noop(__u64 cookie, int nonblk)
 }
 
 void
-ksocknal_free_tx(ksock_tx_t *tx)
+ksocknal_free_tx(struct ksock_tx *tx)
 {
 	atomic_dec(&ksocknal_data.ksnd_nactive_txs);
 
@@ -104,7 +104,7 @@ ksocknal_free_tx(ksock_tx_t *tx)
 }
 
 static int
-ksocknal_send_iov(ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
 {
 	struct kvec *iov = tx->tx_iov;
 	int nob;
@@ -126,7 +126,7 @@ ksocknal_send_iov(ksock_conn_t *conn, ksock_tx_t *tx)
 	do {
 		LASSERT(tx->tx_niov > 0);
 
-		if (nob < (int) iov->iov_len) {
+		if (nob < (int)iov->iov_len) {
 			iov->iov_base = (void *)((char *)iov->iov_base + nob);
 			iov->iov_len -= nob;
 			return rc;
@@ -141,7 +141,7 @@ ksocknal_send_iov(ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 static int
-ksocknal_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
 {
 	lnet_kiov_t *kiov = tx->tx_kiov;
 	int nob;
@@ -179,7 +179,7 @@ ksocknal_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 static int
-ksocknal_transmit(ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
 {
 	int rc;
 	int bufnob;
@@ -247,7 +247,7 @@ ksocknal_transmit(ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 static int
-ksocknal_recv_iov(ksock_conn_t *conn)
+ksocknal_recv_iov(struct ksock_conn *conn)
 {
 	struct kvec *iov = conn->ksnc_rx_iov;
 	int nob;
@@ -294,7 +294,7 @@ ksocknal_recv_iov(ksock_conn_t *conn)
 }
 
 static int
-ksocknal_recv_kiov(ksock_conn_t *conn)
+ksocknal_recv_kiov(struct ksock_conn *conn)
 {
 	lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
 	int nob;
@@ -326,7 +326,7 @@ ksocknal_recv_kiov(ksock_conn_t *conn)
 	do {
 		LASSERT(conn->ksnc_rx_nkiov > 0);
 
-		if (nob < (int) kiov->kiov_len) {
+		if (nob < (int)kiov->kiov_len) {
 			kiov->kiov_offset += nob;
 			kiov->kiov_len -= nob;
 			return -EAGAIN;
@@ -341,7 +341,7 @@ ksocknal_recv_kiov(ksock_conn_t *conn)
 }
 
 static int
-ksocknal_receive(ksock_conn_t *conn)
+ksocknal_receive(struct ksock_conn *conn)
 {
 	/*
 	 * Return 1 on success, 0 on EOF, < 0 on error.
@@ -391,7 +391,7 @@ ksocknal_receive(ksock_conn_t *conn)
 }
 
 void
-ksocknal_tx_done(lnet_ni_t *ni, ksock_tx_t *tx)
+ksocknal_tx_done(lnet_ni_t *ni, struct ksock_tx *tx)
 {
 	lnet_msg_t *lnetmsg = tx->tx_lnetmsg;
 	int rc = (!tx->tx_resid && !tx->tx_zc_aborted) ? 0 : -EIO;
@@ -412,10 +412,10 @@ ksocknal_tx_done(lnet_ni_t *ni, ksock_tx_t *tx)
 void
 ksocknal_txlist_done(lnet_ni_t *ni, struct list_head *txlist, int error)
 {
-	ksock_tx_t *tx;
+	struct ksock_tx *tx;
 
 	while (!list_empty(txlist)) {
-		tx = list_entry(txlist->next, ksock_tx_t, tx_list);
+		tx = list_entry(txlist->next, struct ksock_tx, tx_list);
 
 		if (error && tx->tx_lnetmsg) {
 			CNETERR("Deleting packet type %d len %d %s->%s\n",
@@ -435,10 +435,10 @@ ksocknal_txlist_done(lnet_ni_t *ni, struct list_head *txlist, int error)
 }
 
 static void
-ksocknal_check_zc_req(ksock_tx_t *tx)
+ksocknal_check_zc_req(struct ksock_tx *tx)
 {
-	ksock_conn_t *conn = tx->tx_conn;
-	ksock_peer_t *peer = conn->ksnc_peer;
+	struct ksock_conn *conn = tx->tx_conn;
+	struct ksock_peer *peer = conn->ksnc_peer;
 
 	/*
 	 * Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx
@@ -482,9 +482,9 @@ ksocknal_check_zc_req(ksock_tx_t *tx)
 }
 
 static void
-ksocknal_uncheck_zc_req(ksock_tx_t *tx)
+ksocknal_uncheck_zc_req(struct ksock_tx *tx)
 {
-	ksock_peer_t *peer = tx->tx_conn->ksnc_peer;
+	struct ksock_peer *peer = tx->tx_conn->ksnc_peer;
 
 	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
 	LASSERT(tx->tx_zc_capable);
@@ -508,7 +508,7 @@ ksocknal_uncheck_zc_req(ksock_tx_t *tx)
 }
 
 static int
-ksocknal_process_transmit(ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
 {
 	int rc;
 
@@ -583,7 +583,7 @@ ksocknal_process_transmit(ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 static void
-ksocknal_launch_connection_locked(ksock_route_t *route)
+ksocknal_launch_connection_locked(struct ksock_route *route)
 {
 	/* called holding write lock on ksnd_global_lock */
 
@@ -604,9 +604,9 @@ ksocknal_launch_connection_locked(ksock_route_t *route)
 }
 
 void
-ksocknal_launch_all_connections_locked(ksock_peer_t *peer)
+ksocknal_launch_all_connections_locked(struct ksock_peer *peer)
 {
-	ksock_route_t *route;
+	struct ksock_route *route;
 
 	/* called holding write lock on ksnd_global_lock */
 	for (;;) {
@@ -619,18 +619,18 @@ ksocknal_launch_all_connections_locked(ksock_peer_t *peer)
 	}
 }
 
-ksock_conn_t *
-ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk)
+struct ksock_conn *
+ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx, int nonblk)
 {
 	struct list_head *tmp;
-	ksock_conn_t *conn;
-	ksock_conn_t *typed = NULL;
-	ksock_conn_t *fallback = NULL;
+	struct ksock_conn *conn;
+	struct ksock_conn *typed = NULL;
+	struct ksock_conn *fallback = NULL;
 	int tnob = 0;
 	int fnob = 0;
 
 	list_for_each(tmp, &peer->ksnp_conns) {
-		ksock_conn_t *c  = list_entry(tmp, ksock_conn_t, ksnc_list);
+		struct ksock_conn *c  = list_entry(tmp, struct ksock_conn, ksnc_list);
 		int nob = atomic_read(&c->ksnc_tx_nob) +
 			c->ksnc_sock->sk->sk_wmem_queued;
 		int rc;
@@ -677,7 +677,7 @@ ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk)
 }
 
 void
-ksocknal_tx_prep(ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_tx_prep(struct ksock_conn *conn, struct ksock_tx *tx)
 {
 	conn->ksnc_proto->pro_pack(tx);
 
@@ -687,11 +687,11 @@ ksocknal_tx_prep(ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 void
-ksocknal_queue_tx_locked(ksock_tx_t *tx, ksock_conn_t *conn)
+ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn)
 {
-	ksock_sched_t *sched = conn->ksnc_scheduler;
+	struct ksock_sched *sched = conn->ksnc_scheduler;
 	ksock_msg_t *msg = &tx->tx_msg;
-	ksock_tx_t *ztx = NULL;
+	struct ksock_tx *ztx = NULL;
 	int bufnob = 0;
 
 	/*
@@ -784,15 +784,15 @@ ksocknal_queue_tx_locked(ksock_tx_t *tx, ksock_conn_t *conn)
 	spin_unlock_bh(&sched->kss_lock);
 }
 
-ksock_route_t *
-ksocknal_find_connectable_route_locked(ksock_peer_t *peer)
+struct ksock_route *
+ksocknal_find_connectable_route_locked(struct ksock_peer *peer)
 {
 	unsigned long now = cfs_time_current();
 	struct list_head *tmp;
-	ksock_route_t *route;
+	struct ksock_route *route;
 
 	list_for_each(tmp, &peer->ksnp_routes) {
-		route = list_entry(tmp, ksock_route_t, ksnr_list);
+		route = list_entry(tmp, struct ksock_route, ksnr_list);
 
 		LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
 
@@ -820,14 +820,14 @@ ksocknal_find_connectable_route_locked(ksock_peer_t *peer)
 	return NULL;
 }
 
-ksock_route_t *
-ksocknal_find_connecting_route_locked(ksock_peer_t *peer)
+struct ksock_route *
+ksocknal_find_connecting_route_locked(struct ksock_peer *peer)
 {
 	struct list_head *tmp;
-	ksock_route_t *route;
+	struct ksock_route *route;
 
 	list_for_each(tmp, &peer->ksnp_routes) {
-		route = list_entry(tmp, ksock_route_t, ksnr_list);
+		route = list_entry(tmp, struct ksock_route, ksnr_list);
 
 		LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
 
@@ -839,10 +839,10 @@ ksocknal_find_connecting_route_locked(ksock_peer_t *peer)
 }
 
 int
-ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
+ksocknal_launch_packet(lnet_ni_t *ni, struct ksock_tx *tx, lnet_process_id_t id)
 {
-	ksock_peer_t *peer;
-	ksock_conn_t *conn;
+	struct ksock_peer *peer;
+	struct ksock_conn *conn;
 	rwlock_t *g_lock;
 	int retry;
 	int rc;
@@ -942,7 +942,7 @@ ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 	lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
 	unsigned int payload_offset = lntmsg->msg_offset;
 	unsigned int payload_nob = lntmsg->msg_len;
-	ksock_tx_t *tx;
+	struct ksock_tx *tx;
 	int desc_size;
 	int rc;
 
@@ -960,10 +960,10 @@ ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 	LASSERT(!in_interrupt());
 
 	if (payload_iov)
-		desc_size = offsetof(ksock_tx_t,
+		desc_size = offsetof(struct ksock_tx,
 				     tx_frags.virt.iov[1 + payload_niov]);
 	else
-		desc_size = offsetof(ksock_tx_t,
+		desc_size = offsetof(struct ksock_tx,
 				     tx_frags.paged.kiov[payload_niov]);
 
 	if (lntmsg->msg_vmflush)
@@ -1037,7 +1037,7 @@ ksocknal_thread_fini(void)
 }
 
 int
-ksocknal_new_packet(ksock_conn_t *conn, int nob_to_skip)
+ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip)
 {
 	static char ksocknal_slop_buffer[4096];
 
@@ -1120,7 +1120,7 @@ ksocknal_new_packet(ksock_conn_t *conn, int nob_to_skip)
 }
 
 static int
-ksocknal_process_receive(ksock_conn_t *conn)
+ksocknal_process_receive(struct ksock_conn *conn)
 {
 	lnet_hdr_t *lhdr;
 	lnet_process_id_t *id;
@@ -1328,8 +1328,8 @@ ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
 	      unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
 	      unsigned int offset, unsigned int mlen, unsigned int rlen)
 {
-	ksock_conn_t *conn = private;
-	ksock_sched_t *sched = conn->ksnc_scheduler;
+	struct ksock_conn *conn = private;
+	struct ksock_sched *sched = conn->ksnc_scheduler;
 
 	LASSERT(mlen <= rlen);
 	LASSERT(niov <= LNET_MAX_IOV);
@@ -1382,7 +1382,7 @@ ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
 }
 
 static inline int
-ksocknal_sched_cansleep(ksock_sched_t *sched)
+ksocknal_sched_cansleep(struct ksock_sched *sched)
 {
 	int rc;
 
@@ -1399,9 +1399,9 @@ ksocknal_sched_cansleep(ksock_sched_t *sched)
 int ksocknal_scheduler(void *arg)
 {
 	struct ksock_sched_info *info;
-	ksock_sched_t *sched;
-	ksock_conn_t *conn;
-	ksock_tx_t *tx;
+	struct ksock_sched *sched;
+	struct ksock_conn *conn;
+	struct ksock_tx *tx;
 	int rc;
 	int nloops = 0;
 	long id = (long)arg;
@@ -1426,7 +1426,7 @@ int ksocknal_scheduler(void *arg)
 
 		if (!list_empty(&sched->kss_rx_conns)) {
 			conn = list_entry(sched->kss_rx_conns.next,
-					  ksock_conn_t, ksnc_rx_list);
+					  struct ksock_conn, ksnc_rx_list);
 			list_del(&conn->ksnc_rx_list);
 
 			LASSERT(conn->ksnc_rx_scheduled);
@@ -1481,7 +1481,7 @@ int ksocknal_scheduler(void *arg)
 			}
 
 			conn = list_entry(sched->kss_tx_conns.next,
-					  ksock_conn_t, ksnc_tx_list);
+					  struct ksock_conn, ksnc_tx_list);
 			list_del(&conn->ksnc_tx_list);
 
 			LASSERT(conn->ksnc_tx_scheduled);
@@ -1489,7 +1489,7 @@ int ksocknal_scheduler(void *arg)
 			LASSERT(!list_empty(&conn->ksnc_tx_queue));
 
 			tx = list_entry(conn->ksnc_tx_queue.next,
-					ksock_tx_t, tx_list);
+					struct ksock_tx, tx_list);
 
 			if (conn->ksnc_tx_carrier == tx)
 				ksocknal_next_tx_carrier(conn);
@@ -1575,9 +1575,9 @@ int ksocknal_scheduler(void *arg)
  * Add connection to kss_rx_conns of scheduler
  * and wakeup the scheduler.
  */
-void ksocknal_read_callback(ksock_conn_t *conn)
+void ksocknal_read_callback(struct ksock_conn *conn)
 {
-	ksock_sched_t *sched;
+	struct ksock_sched *sched;
 
 	sched = conn->ksnc_scheduler;
 
@@ -1600,9 +1600,9 @@ void ksocknal_read_callback(ksock_conn_t *conn)
  * Add connection to kss_tx_conns of scheduler
  * and wakeup the scheduler.
  */
-void ksocknal_write_callback(ksock_conn_t *conn)
+void ksocknal_write_callback(struct ksock_conn *conn)
 {
-	ksock_sched_t *sched;
+	struct ksock_sched *sched;
 
 	sched = conn->ksnc_scheduler;
 
@@ -1623,7 +1623,7 @@ void ksocknal_write_callback(ksock_conn_t *conn)
 	spin_unlock_bh(&sched->kss_lock);
 }
 
-static ksock_proto_t *
+static struct ksock_proto *
 ksocknal_parse_proto_version(ksock_hello_msg_t *hello)
 {
 	__u32 version = 0;
@@ -1666,11 +1666,11 @@ ksocknal_parse_proto_version(ksock_hello_msg_t *hello)
 }
 
 int
-ksocknal_send_hello(lnet_ni_t *ni, ksock_conn_t *conn,
+ksocknal_send_hello(lnet_ni_t *ni, struct ksock_conn *conn,
 		    lnet_nid_t peer_nid, ksock_hello_msg_t *hello)
 {
 	/* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
-	ksock_net_t *net = (ksock_net_t *)ni->ni_data;
+	struct ksock_net *net = (struct ksock_net *)ni->ni_data;
 
 	LASSERT(hello->kshm_nips <= LNET_MAX_INTERFACES);
 
@@ -1704,7 +1704,7 @@ ksocknal_invert_type(int type)
 }
 
 int
-ksocknal_recv_hello(lnet_ni_t *ni, ksock_conn_t *conn,
+ksocknal_recv_hello(lnet_ni_t *ni, struct ksock_conn *conn,
 		    ksock_hello_msg_t *hello, lnet_process_id_t *peerid,
 		    __u64 *incarnation)
 {
@@ -1718,7 +1718,7 @@ ksocknal_recv_hello(lnet_ni_t *ni, ksock_conn_t *conn,
 	int timeout;
 	int proto_match;
 	int rc;
-	ksock_proto_t *proto;
+	struct ksock_proto *proto;
 	lnet_process_id_t recv_id;
 
 	/* socket type set on active connections - not set on passive */
@@ -1847,10 +1847,10 @@ ksocknal_recv_hello(lnet_ni_t *ni, ksock_conn_t *conn,
 }
 
 static int
-ksocknal_connect(ksock_route_t *route)
+ksocknal_connect(struct ksock_route *route)
 {
 	LIST_HEAD(zombies);
-	ksock_peer_t *peer = route->ksnr_peer;
+	struct ksock_peer *peer = route->ksnr_peer;
 	int type;
 	int wanted;
 	struct socket *sock;
@@ -1989,7 +1989,7 @@ ksocknal_connect(ksock_route_t *route)
 	if (!list_empty(&peer->ksnp_tx_queue) &&
 	    !peer->ksnp_accepting &&
 	    !ksocknal_find_connecting_route_locked(peer)) {
-		ksock_conn_t *conn;
+		struct ksock_conn *conn;
 
 		/*
 		 * ksnp_tx_queue is queued on a conn on successful
@@ -1997,7 +1997,7 @@ ksocknal_connect(ksock_route_t *route)
 		 */
 		if (!list_empty(&peer->ksnp_conns)) {
 			conn = list_entry(peer->ksnp_conns.next,
-					  ksock_conn_t, ksnc_list);
+					  struct ksock_conn, ksnc_list);
 			LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
 		}
 
@@ -2131,10 +2131,10 @@ ksocknal_connd_check_stop(time64_t sec, long *timeout)
  * Go through connd_routes queue looking for a route that we can process
  * right now, @timeout_p can be updated if we need to come back later
  */
-static ksock_route_t *
+static struct ksock_route *
 ksocknal_connd_get_route_locked(signed long *timeout_p)
 {
-	ksock_route_t *route;
+	struct ksock_route *route;
 	unsigned long now;
 
 	now = cfs_time_current();
@@ -2158,7 +2158,7 @@ int
 ksocknal_connd(void *arg)
 {
 	spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
-	ksock_connreq_t *cr;
+	struct ksock_connreq *cr;
 	wait_queue_t wait;
 	int nloops = 0;
 	int cons_retry = 0;
@@ -2174,7 +2174,7 @@ ksocknal_connd(void *arg)
 	ksocknal_data.ksnd_connd_running++;
 
 	while (!ksocknal_data.ksnd_shuttingdown) {
-		ksock_route_t *route = NULL;
+		struct ksock_route *route = NULL;
 		time64_t sec = ktime_get_real_seconds();
 		long timeout = MAX_SCHEDULE_TIMEOUT;
 		int dropped_lock = 0;
@@ -2192,8 +2192,8 @@ ksocknal_connd(void *arg)
 
 		if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
 			/* Connection accepted by the listener */
-			cr = list_entry(ksocknal_data.ksnd_connd_connreqs. \
-					    next, ksock_connreq_t, ksncr_list);
+			cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next,
+					struct ksock_connreq, ksncr_list);
 
 			list_del(&cr->ksncr_list);
 			spin_unlock_bh(connd_lock);
@@ -2267,17 +2267,17 @@ ksocknal_connd(void *arg)
 	return 0;
 }
 
-static ksock_conn_t *
-ksocknal_find_timed_out_conn(ksock_peer_t *peer)
+static struct ksock_conn *
+ksocknal_find_timed_out_conn(struct ksock_peer *peer)
 {
 	/* We're called with a shared lock on ksnd_global_lock */
-	ksock_conn_t *conn;
+	struct ksock_conn *conn;
 	struct list_head *ctmp;
 
 	list_for_each(ctmp, &peer->ksnp_conns) {
 		int error;
 
-		conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
+		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
 
 		/* Don't need the {get,put}connsock dance to deref ksnc_sock */
 		LASSERT(!conn->ksnc_closing);
@@ -2351,10 +2351,10 @@ ksocknal_find_timed_out_conn(ksock_peer_t *peer)
 }
 
 static inline void
-ksocknal_flush_stale_txs(ksock_peer_t *peer)
+ksocknal_flush_stale_txs(struct ksock_peer *peer)
 {
-	ksock_tx_t *tx;
-	ksock_tx_t *tmp;
+	struct ksock_tx *tx;
+	struct ksock_tx *tmp;
 	LIST_HEAD(stale_txs);
 
 	write_lock_bh(&ksocknal_data.ksnd_global_lock);
@@ -2374,12 +2374,12 @@ ksocknal_flush_stale_txs(ksock_peer_t *peer)
 }
 
 static int
-ksocknal_send_keepalive_locked(ksock_peer_t *peer)
+ksocknal_send_keepalive_locked(struct ksock_peer *peer)
 	__must_hold(&ksocknal_data.ksnd_global_lock)
 {
-	ksock_sched_t *sched;
-	ksock_conn_t *conn;
-	ksock_tx_t *tx;
+	struct ksock_sched *sched;
+	struct ksock_conn *conn;
+	struct ksock_tx *tx;
 
 	if (list_empty(&peer->ksnp_conns)) /* last_alive will be updated by create_conn */
 		return 0;
@@ -2440,9 +2440,9 @@ static void
 ksocknal_check_peer_timeouts(int idx)
 {
 	struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
-	ksock_peer_t *peer;
-	ksock_conn_t *conn;
-	ksock_tx_t *tx;
+	struct ksock_peer *peer;
+	struct ksock_conn *conn;
+	struct ksock_tx *tx;
 
  again:
 	/*
@@ -2483,8 +2483,8 @@ ksocknal_check_peer_timeouts(int idx)
 		 * holding only shared lock
 		 */
 		if (!list_empty(&peer->ksnp_tx_queue)) {
-			ksock_tx_t *tx = list_entry(peer->ksnp_tx_queue.next,
-						    ksock_tx_t, tx_list);
+			struct ksock_tx *tx = list_entry(peer->ksnp_tx_queue.next,
+						    struct ksock_tx, tx_list);
 
 			if (cfs_time_aftereq(cfs_time_current(),
 					     tx->tx_deadline)) {
@@ -2518,7 +2518,7 @@ ksocknal_check_peer_timeouts(int idx)
 		}
 
 		tx = list_entry(peer->ksnp_zc_req_list.next,
-				ksock_tx_t, tx_zc_list);
+				struct ksock_tx, tx_zc_list);
 		deadline = tx->tx_deadline;
 		resid = tx->tx_resid;
 		conn = tx->tx_conn;
@@ -2544,8 +2544,8 @@ int
 ksocknal_reaper(void *arg)
 {
 	wait_queue_t wait;
-	ksock_conn_t *conn;
-	ksock_sched_t *sched;
+	struct ksock_conn *conn;
+	struct ksock_sched *sched;
 	struct list_head enomem_conns;
 	int nenomem_conns;
 	long timeout;
@@ -2563,7 +2563,7 @@ ksocknal_reaper(void *arg)
 	while (!ksocknal_data.ksnd_shuttingdown) {
 		if (!list_empty(&ksocknal_data.ksnd_deathrow_conns)) {
 			conn = list_entry(ksocknal_data.ksnd_deathrow_conns.next,
-					  ksock_conn_t, ksnc_list);
+					  struct ksock_conn, ksnc_list);
 			list_del(&conn->ksnc_list);
 
 			spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
@@ -2577,7 +2577,7 @@ ksocknal_reaper(void *arg)
 
 		if (!list_empty(&ksocknal_data.ksnd_zombie_conns)) {
 			conn = list_entry(ksocknal_data.ksnd_zombie_conns.next,
-					  ksock_conn_t, ksnc_list);
+					  struct ksock_conn, ksnc_list);
 			list_del(&conn->ksnc_list);
 
 			spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
@@ -2599,7 +2599,7 @@ ksocknal_reaper(void *arg)
 		/* reschedule all the connections that stalled with ENOMEM... */
 		nenomem_conns = 0;
 		while (!list_empty(&enomem_conns)) {
-			conn = list_entry(enomem_conns.next, ksock_conn_t,
+			conn = list_entry(enomem_conns.next, struct ksock_conn,
 					  ksnc_tx_list);
 			list_del(&conn->ksnc_tx_list);
 
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 964b4e338fe0..6a17757fce1e 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -37,7 +33,7 @@
 #include "socklnd.h"
 
 int
-ksocknal_lib_get_conn_addrs(ksock_conn_t *conn)
+ksocknal_lib_get_conn_addrs(struct ksock_conn *conn)
 {
 	int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr,
 				   &conn->ksnc_port);
@@ -60,7 +56,7 @@ ksocknal_lib_get_conn_addrs(ksock_conn_t *conn)
 }
 
 int
-ksocknal_lib_zc_capable(ksock_conn_t *conn)
+ksocknal_lib_zc_capable(struct ksock_conn *conn)
 {
 	int caps = conn->ksnc_sock->sk->sk_route_caps;
 
@@ -75,7 +71,7 @@ ksocknal_lib_zc_capable(ksock_conn_t *conn)
 }
 
 int
-ksocknal_lib_send_iov(ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
 {
 	struct socket *sock = conn->ksnc_sock;
 	int nob;
@@ -118,7 +114,7 @@ ksocknal_lib_send_iov(ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 int
-ksocknal_lib_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
 {
 	struct socket *sock = conn->ksnc_sock;
 	lnet_kiov_t *kiov = tx->tx_kiov;
@@ -187,7 +183,7 @@ ksocknal_lib_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 void
-ksocknal_lib_eager_ack(ksock_conn_t *conn)
+ksocknal_lib_eager_ack(struct ksock_conn *conn)
 {
 	int opt = 1;
 	struct socket *sock = conn->ksnc_sock;
@@ -203,7 +199,7 @@ ksocknal_lib_eager_ack(ksock_conn_t *conn)
 }
 
 int
-ksocknal_lib_recv_iov(ksock_conn_t *conn)
+ksocknal_lib_recv_iov(struct ksock_conn *conn)
 {
 #if SOCKNAL_SINGLE_FRAG_RX
 	struct kvec scratch;
@@ -309,7 +305,7 @@ ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
 }
 
 int
-ksocknal_lib_recv_kiov(ksock_conn_t *conn)
+ksocknal_lib_recv_kiov(struct ksock_conn *conn)
 {
 #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
 	struct kvec scratch;
@@ -393,7 +389,7 @@ ksocknal_lib_recv_kiov(ksock_conn_t *conn)
 }
 
 void
-ksocknal_lib_csum_tx(ksock_tx_t *tx)
+ksocknal_lib_csum_tx(struct ksock_tx *tx)
 {
 	int i;
 	__u32 csum;
@@ -432,7 +428,7 @@ ksocknal_lib_csum_tx(ksock_tx_t *tx)
 }
 
 int
-ksocknal_lib_get_conn_tunables(ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
+ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle)
 {
 	struct socket *sock = conn->ksnc_sock;
 	int len;
@@ -562,7 +558,7 @@ ksocknal_lib_setup_sock(struct socket *sock)
 }
 
 void
-ksocknal_lib_push_conn(ksock_conn_t *conn)
+ksocknal_lib_push_conn(struct ksock_conn *conn)
 {
 	struct sock *sk;
 	struct tcp_sock *tp;
@@ -599,7 +595,7 @@ ksocknal_lib_push_conn(ksock_conn_t *conn)
 static void
 ksocknal_data_ready(struct sock *sk)
 {
-	ksock_conn_t *conn;
+	struct ksock_conn *conn;
 
 	/* interleave correctly with closing sockets... */
 	LASSERT(!in_irq());
@@ -619,7 +615,7 @@ ksocknal_data_ready(struct sock *sk)
 static void
 ksocknal_write_space(struct sock *sk)
 {
-	ksock_conn_t *conn;
+	struct ksock_conn *conn;
 	int wspace;
 	int min_wpace;
 
@@ -663,14 +659,14 @@ ksocknal_write_space(struct sock *sk)
 }
 
 void
-ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
+ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn)
 {
 	conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
 	conn->ksnc_saved_write_space = sock->sk->sk_write_space;
 }
 
 void
-ksocknal_lib_set_callback(struct socket *sock,  ksock_conn_t *conn)
+ksocknal_lib_set_callback(struct socket *sock,  struct ksock_conn *conn)
 {
 	sock->sk->sk_user_data = conn;
 	sock->sk->sk_data_ready = ksocknal_data_ready;
@@ -678,7 +674,7 @@ ksocknal_lib_set_callback(struct socket *sock,  ksock_conn_t *conn)
 }
 
 void
-ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
+ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn)
 {
 	/*
 	 * Remove conn's network callbacks.
@@ -697,10 +693,10 @@ ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
 }
 
 int
-ksocknal_lib_memory_pressure(ksock_conn_t *conn)
+ksocknal_lib_memory_pressure(struct ksock_conn *conn)
 {
 	int rc = 0;
-	ksock_sched_t *sched;
+	struct ksock_sched *sched;
 
 	sched = conn->ksnc_scheduler;
 	spin_lock_bh(&sched->kss_lock);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
index 6329cbe66573..fc7eec83ac07 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
@@ -139,7 +139,7 @@ module_param(protocol, int, 0644);
 MODULE_PARM_DESC(protocol, "protocol version");
 #endif
 
-ksock_tunables_t ksocknal_tunables;
+struct ksock_tunables ksocknal_tunables;
 
 int ksocknal_tunables_init(void)
 {
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
index 32cc31e4cc29..82e174f6d9fe 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
@@ -38,8 +38,8 @@
  *   pro_match_tx()       : Called holding glock
  */
 
-static ksock_tx_t *
-ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg)
+static struct ksock_tx *
+ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg)
 {
 	/* V1.x, just enqueue it */
 	list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
@@ -47,9 +47,9 @@ ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg)
 }
 
 void
-ksocknal_next_tx_carrier(ksock_conn_t *conn)
+ksocknal_next_tx_carrier(struct ksock_conn *conn)
 {
-	ksock_tx_t *tx = conn->ksnc_tx_carrier;
+	struct ksock_tx *tx = conn->ksnc_tx_carrier;
 
 	/* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
 	LASSERT(!list_empty(&conn->ksnc_tx_queue));
@@ -66,10 +66,10 @@ ksocknal_next_tx_carrier(ksock_conn_t *conn)
 }
 
 static int
-ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn,
-			   ksock_tx_t *tx_ack, __u64 cookie)
+ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn,
+			   struct ksock_tx *tx_ack, __u64 cookie)
 {
-	ksock_tx_t *tx = conn->ksnc_tx_carrier;
+	struct ksock_tx *tx = conn->ksnc_tx_carrier;
 
 	LASSERT(!tx_ack ||
 		tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
@@ -112,10 +112,10 @@ ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn,
 	return 1;
 }
 
-static ksock_tx_t *
-ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg)
+static struct ksock_tx *
+ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg)
 {
-	ksock_tx_t *tx  = conn->ksnc_tx_carrier;
+	struct ksock_tx *tx  = conn->ksnc_tx_carrier;
 
 	/*
 	 * Enqueue tx_msg:
@@ -149,10 +149,10 @@ ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg)
 }
 
 static int
-ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn,
-			   ksock_tx_t *tx_ack, __u64 cookie)
+ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
+			   struct ksock_tx *tx_ack, __u64 cookie)
 {
-	ksock_tx_t *tx;
+	struct ksock_tx *tx;
 
 	if (conn->ksnc_type != SOCKLND_CONN_ACK)
 		return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
@@ -267,7 +267,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn,
 }
 
 static int
-ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
+ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
 {
 	int nob;
 
@@ -311,7 +311,7 @@ ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
 }
 
 static int
-ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
+ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
 {
 	int nob;
 
@@ -355,18 +355,18 @@ ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
 
 /* (Sink) handle incoming ZC request from sender */
 static int
-ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
+ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote)
 {
-	ksock_peer_t *peer = c->ksnc_peer;
-	ksock_conn_t *conn;
-	ksock_tx_t *tx;
+	struct ksock_peer *peer = c->ksnc_peer;
+	struct ksock_conn *conn;
+	struct ksock_tx *tx;
 	int rc;
 
 	read_lock(&ksocknal_data.ksnd_global_lock);
 
 	conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
 	if (conn) {
-		ksock_sched_t *sched = conn->ksnc_scheduler;
+		struct ksock_sched *sched = conn->ksnc_scheduler;
 
 		LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
 
@@ -399,12 +399,12 @@ ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
 
 /* (Sender) handle ZC_ACK from sink */
 static int
-ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
+ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2)
 {
-	ksock_peer_t *peer = conn->ksnc_peer;
-	ksock_tx_t *tx;
-	ksock_tx_t *temp;
-	ksock_tx_t *tmp;
+	struct ksock_peer *peer = conn->ksnc_peer;
+	struct ksock_tx *tx;
+	struct ksock_tx *temp;
+	struct ksock_tx *tmp;
 	LIST_HEAD(zlist);
 	int count;
 
@@ -446,7 +446,7 @@ ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
 }
 
 static int
-ksocknal_send_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello)
+ksocknal_send_hello_v1(struct ksock_conn *conn, ksock_hello_msg_t *hello)
 {
 	struct socket *sock = conn->ksnc_sock;
 	lnet_hdr_t *hdr;
@@ -503,7 +503,7 @@ ksocknal_send_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello)
 	if (!hello->kshm_nips)
 		goto out;
 
-	for (i = 0; i < (int) hello->kshm_nips; i++)
+	for (i = 0; i < (int)hello->kshm_nips; i++)
 		hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]);
 
 	rc = lnet_sock_write(sock, hello->kshm_ips,
@@ -521,7 +521,7 @@ out:
 }
 
 static int
-ksocknal_send_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello)
+ksocknal_send_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello)
 {
 	struct socket *sock = conn->ksnc_sock;
 	int rc;
@@ -563,7 +563,7 @@ ksocknal_send_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello)
 }
 
 static int
-ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,
+ksocknal_recv_hello_v1(struct ksock_conn *conn, ksock_hello_msg_t *hello,
 		       int timeout)
 {
 	struct socket *sock = conn->ksnc_sock;
@@ -622,7 +622,7 @@ ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,
 		goto out;
 	}
 
-	for (i = 0; i < (int) hello->kshm_nips; i++) {
+	for (i = 0; i < (int)hello->kshm_nips; i++) {
 		hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
 
 		if (!hello->kshm_ips[i]) {
@@ -639,7 +639,7 @@ out:
 }
 
 static int
-ksocknal_recv_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout)
+ksocknal_recv_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello, int timeout)
 {
 	struct socket *sock = conn->ksnc_sock;
 	int rc;
@@ -690,7 +690,7 @@ ksocknal_recv_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout
 		return rc;
 	}
 
-	for (i = 0; i < (int) hello->kshm_nips; i++) {
+	for (i = 0; i < (int)hello->kshm_nips; i++) {
 		if (conn->ksnc_flip)
 			__swab32s(&hello->kshm_ips[i]);
 
@@ -705,7 +705,7 @@ ksocknal_recv_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout
 }
 
 static void
-ksocknal_pack_msg_v1(ksock_tx_t *tx)
+ksocknal_pack_msg_v1(struct ksock_tx *tx)
 {
 	/* V1.x has no KSOCK_MSG_NOOP */
 	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
@@ -719,7 +719,7 @@ ksocknal_pack_msg_v1(ksock_tx_t *tx)
 }
 
 static void
-ksocknal_pack_msg_v2(ksock_tx_t *tx)
+ksocknal_pack_msg_v2(struct ksock_tx *tx)
 {
 	tx->tx_iov[0].iov_base = &tx->tx_msg;
 
@@ -755,7 +755,7 @@ ksocknal_unpack_msg_v2(ksock_msg_t *msg)
 	return;  /* Do nothing */
 }
 
-ksock_proto_t  ksocknal_protocol_v1x = {
+struct ksock_proto ksocknal_protocol_v1x = {
 	.pro_version        = KSOCK_PROTO_V1,
 	.pro_send_hello     = ksocknal_send_hello_v1,
 	.pro_recv_hello     = ksocknal_recv_hello_v1,
@@ -768,7 +768,7 @@ ksock_proto_t  ksocknal_protocol_v1x = {
 	.pro_match_tx       = ksocknal_match_tx
 };
 
-ksock_proto_t  ksocknal_protocol_v2x = {
+struct ksock_proto ksocknal_protocol_v2x = {
 	.pro_version        = KSOCK_PROTO_V2,
 	.pro_send_hello     = ksocknal_send_hello_v2,
 	.pro_recv_hello     = ksocknal_recv_hello_v2,
@@ -781,7 +781,7 @@ ksock_proto_t  ksocknal_protocol_v2x = {
 	.pro_match_tx       = ksocknal_match_tx
 };
 
-ksock_proto_t  ksocknal_protocol_v3x = {
+struct ksock_proto ksocknal_protocol_v3x = {
 	.pro_version        = KSOCK_PROTO_V3,
 	.pro_send_hello     = ksocknal_send_hello_v2,
 	.pro_recv_hello     = ksocknal_recv_hello_v2,
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index 8c260c3d5da4..42b15a769183 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -366,12 +362,12 @@ void libcfs_debug_dumplog(void)
 	 * get to schedule()
 	 */
 	init_waitqueue_entry(&wait, current);
-	set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(&debug_ctlwq, &wait);
 
 	dumper = kthread_run(libcfs_debug_dumplog_thread,
 			     (void *)(long)current_pid(),
 			     "libcfs_debug_dumper");
+	set_current_state(TASK_INTERRUPTIBLE);
 	if (IS_ERR(dumper))
 		pr_err("LustreError: cannot start log dump thread: %ld\n",
 		       PTR_ERR(dumper));
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c
index 086e690bd6f2..9288ee08d1f7 100644
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ b/drivers/staging/lustre/lnet/libcfs/fail.c
@@ -16,10 +16,6 @@
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see http://www.gnu.org/licenses
  *
- * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores,
- * CA 94065 USA or visit www.oracle.com if you need additional information or
- * have any questions.
- *
  * GPL HEADER END
  */
 /*
diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c
index cc45ed82b2be..23283b6e09ab 100644
--- a/drivers/staging/lustre/lnet/libcfs/hash.c
+++ b/drivers/staging/lustre/lnet/libcfs/hash.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
index 50ac1536db4b..fc697cdfcdaf 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
index 84f9b7b47581..5c0116ade909 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
@@ -99,6 +99,7 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
 					 (*type)->cht_size);
 
 	if (err != 0) {
+		ahash_request_free(*req);
 		crypto_free_ahash(tfm);
 		return err;
 	}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c
index 13d31e8a931d..3e22cad18a8b 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
index 638e4b33d3a9..435b784c52f8 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c
index 86f32ffc5d04..a6a76a681ea9 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c
@@ -11,7 +11,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  */
 /*
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
index d89f71ee45b2..38308f8b6aae 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
index bbe19a684c81..291d286eab48 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
index 91c2ae8f9d67..8b551d2708ba 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
index f2d041118cf7..86b4d25cad46 100644
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/prng.c b/drivers/staging/lustre/lnet/libcfs/prng.c
index c75ae9a68e76..a9bdb284fd15 100644
--- a/drivers/staging/lustre/lnet/libcfs/prng.c
+++ b/drivers/staging/lustre/lnet/libcfs/prng.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
index 7739b9469c5a..1c7efdfaffcf 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h
index ac84e7f4c859..d878676bc375 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.h
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/workitem.c b/drivers/staging/lustre/lnet/libcfs/workitem.c
index 92236ae59e49..e98c818a14fb 100644
--- a/drivers/staging/lustre/lnet/libcfs/workitem.c
+++ b/drivers/staging/lustre/lnet/libcfs/workitem.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c
index 1452bb3ad9eb..8c50c99d82d5 100644
--- a/drivers/staging/lustre/lnet/lnet/acceptor.c
+++ b/drivers/staging/lustre/lnet/lnet/acceptor.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index fe0dbe7468e7..346db892f275 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1677,7 +1673,7 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
 	if (!ni || !config)
 		return;
 
-	net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
+	net_config = (struct lnet_ioctl_net_config *)config->cfg_bulk;
 	if (!net_config)
 		return;
 
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
index 480cc9c6caab..a72afdf68bb2 100644
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
index adbcadbab1be..d05c6cc797f6 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
index 75d31217bf92..1834bf7a27ef 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c
index e671aed373df..b430046dc294 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-me.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-me.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index c5d5bedb3128..e6d3b801d87d 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c
index f879d7f28708..910e106e221d 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c
index 468eda611bf8..08402712a452 100644
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ b/drivers/staging/lustre/lnet/lnet/lo.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c
index 246b5c141d01..4ffbd3e441e8 100644
--- a/drivers/staging/lustre/lnet/lnet/module.c
+++ b/drivers/staging/lustre/lnet/lnet/module.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -200,7 +196,7 @@ static int __init lnet_init(void)
 		 * Have to schedule a separate thread to avoid deadlocking
 		 * in modload
 		 */
-		(void) kthread_run(lnet_configure, NULL, "lnet_initd");
+		(void)kthread_run(lnet_configure, NULL, "lnet_initd");
 	}
 
 	return 0;
diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c
index 7d76f28d3a7a..e4aceb71c4ec 100644
--- a/drivers/staging/lustre/lnet/lnet/net_fault.c
+++ b/drivers/staging/lustre/lnet/lnet/net_fault.c
@@ -760,9 +760,7 @@ lnet_delay_rule_add(struct lnet_fault_attr *attr)
 		wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running);
 	}
 
-	init_timer(&rule->dl_timer);
-	rule->dl_timer.function = delay_timer_cb;
-	rule->dl_timer.data = (unsigned long)rule;
+	setup_timer(&rule->dl_timer, delay_timer_cb, (unsigned long)rule);
 
 	spin_lock_init(&rule->dl_lock);
 	INIT_LIST_HEAD(&rule->dl_msg_list);
diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c
index ebf468fbc64f..a6d7a6159b8f 100644
--- a/drivers/staging/lustre/lnet/lnet/nidstrings.c
+++ b/drivers/staging/lustre/lnet/lnet/nidstrings.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
index b026feebc03a..e8061916c241 100644
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ b/drivers/staging/lustre/lnet/lnet/peer.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index b01dc424c514..063543233035 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -18,6 +18,7 @@
  */
 
 #define DEBUG_SUBSYSTEM S_LNET
+#include <linux/completion.h>
 #include "../../include/linux/lnet/lib-lnet.h"
 
 #define LNET_NRB_TINY_MIN	512	/* min value for each CPT */
@@ -1065,7 +1066,7 @@ lnet_router_checker_start(void)
 		return -EINVAL;
 	}
 
-	sema_init(&the_lnet.ln_rc_signal, 0);
+	init_completion(&the_lnet.ln_rc_signal);
 
 	rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh);
 	if (rc) {
@@ -1079,7 +1080,7 @@ lnet_router_checker_start(void)
 		rc = PTR_ERR(task);
 		CERROR("Can't start router checker thread: %d\n", rc);
 		/* block until event callback signals exit */
-		down(&the_lnet.ln_rc_signal);
+		wait_for_completion(&the_lnet.ln_rc_signal);
 		rc = LNetEQFree(the_lnet.ln_rc_eqh);
 		LASSERT(!rc);
 		the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
@@ -1112,7 +1113,7 @@ lnet_router_checker_stop(void)
 	wake_up(&the_lnet.ln_rc_waitq);
 
 	/* block until event callback signals exit */
-	down(&the_lnet.ln_rc_signal);
+	wait_for_completion(&the_lnet.ln_rc_signal);
 	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
 
 	rc = LNetEQFree(the_lnet.ln_rc_eqh);
@@ -1295,7 +1296,7 @@ rescan:
 	lnet_prune_rc_data(1); /* wait for UNLINK */
 
 	the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
-	up(&the_lnet.ln_rc_signal);
+	complete(&the_lnet.ln_rc_signal);
 	/* The unlink event callback will signal final completion */
 	return 0;
 }
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
index a63d86c4c10d..13d0454e7fcb 100644
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c
index 408c614b6ca3..b786f8b4a73d 100644
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ b/drivers/staging/lustre/lnet/selftest/conctl.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 6f687581117d..1be3cad727ae 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h
index 90c3385a355c..7ec6fc96959e 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.h
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
index a03e52d29d3f..4c33621f06da 100644
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
index becd22e41da9..78b147732615 100644
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
index 30e4f71f14c2..c2f121f44d33 100644
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/module.c b/drivers/staging/lustre/lnet/selftest/module.c
index cc046b1d4d0a..71485f992297 100644
--- a/drivers/staging/lustre/lnet/selftest/module.c
+++ b/drivers/staging/lustre/lnet/selftest/module.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c
index ad26fe9dd4af..9331ca4e3606 100644
--- a/drivers/staging/lustre/lnet/selftest/ping_test.c
+++ b/drivers/staging/lustre/lnet/selftest/ping_test.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 3c45a7cfae18..3b26d6eb4240 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h
index c9b904cade16..4ab2ee264004 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.h
+++ b/drivers/staging/lustre/lnet/selftest/rpc.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
index 4eac1c9e639f..d033ac03d953 100644
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h
@@ -15,12 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- * copy of GPLv2].
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c
index b6c4aae007af..dcd22580b1f0 100644
--- a/drivers/staging/lustre/lnet/selftest/timer.c
+++ b/drivers/staging/lustre/lnet/selftest/timer.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lnet/selftest/timer.h b/drivers/staging/lustre/lnet/selftest/timer.h
index f1fbebd8a67c..441d6d6b4f8e 100644
--- a/drivers/staging/lustre/lnet/selftest/timer.h
+++ b/drivers/staging/lustre/lnet/selftest/timer.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig
index 8ac7cd4d6fdb..9f5d75f166e7 100644
--- a/drivers/staging/lustre/lustre/Kconfig
+++ b/drivers/staging/lustre/lustre/Kconfig
@@ -54,9 +54,3 @@ config LUSTRE_TRANSLATE_ERRNOS
 	bool
 	depends on LUSTRE_FS && !X86
 	default y
-
-config LUSTRE_LLITE_LLOOP
-	tristate "Lustre virtual block device"
-	depends on LUSTRE_FS && BLOCK
-	depends on !PPC_64K_PAGES && !ARM64_64K_PAGES && !MICROBLAZE_64K_PAGES && !PAGE_SIZE_64KB && !IA64_PAGE_SIZE_64KB && !PARISC_PAGE_SIZE_64KB
-	default m
diff --git a/drivers/staging/lustre/lustre/fid/fid_internal.h b/drivers/staging/lustre/lustre/fid/fid_internal.h
index b79a813977cf..5c53773ecc5a 100644
--- a/drivers/staging/lustre/lustre/fid/fid_internal.h
+++ b/drivers/staging/lustre/lustre/fid/fid_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c
index dd65159ebb38..99ae7eb6720e 100644
--- a/drivers/staging/lustre/lustre/fid/fid_lib.c
+++ b/drivers/staging/lustre/lustre/fid/fid_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 3a4df626462f..454744d25956 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -98,8 +94,10 @@ static int seq_client_rpc(struct lu_client_seq *seq,
 		 * request here, otherwise if MDT0 is failed(umounted),
 		 * it can not release the export of MDT0
 		 */
-		if (seq->lcs_type == LUSTRE_SEQ_DATA)
-			req->rq_no_delay = req->rq_no_resend = 1;
+		if (seq->lcs_type == LUSTRE_SEQ_DATA) {
+			req->rq_no_delay = 1;
+			req->rq_no_resend = 1;
+		}
 		debug_mask = D_CONSOLE;
 	} else {
 		if (seq->lcs_type == LUSTRE_SEQ_METADATA) {
diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c
index 1f0e78686278..81b7ca9ea2fd 100644
--- a/drivers/staging/lustre/lustre/fid/lproc_fid.c
+++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c
index 5a04e99d9249..0100a935f4ff 100644
--- a/drivers/staging/lustre/lustre/fld/fld_cache.c
+++ b/drivers/staging/lustre/lustre/fld/fld_cache.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
index 75d6a48637a9..f0efe5b9fbec 100644
--- a/drivers/staging/lustre/lustre/fld/fld_internal.h
+++ b/drivers/staging/lustre/lustre/fld/fld_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
index 304c0ec268c9..e59d626a1548 100644
--- a/drivers/staging/lustre/lustre/fld/fld_request.c
+++ b/drivers/staging/lustre/lustre/fld/fld_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fld/lproc_fld.c b/drivers/staging/lustre/lustre/fld/lproc_fld.c
index ca898befeba6..61ac420798af 100644
--- a/drivers/staging/lustre/lustre/fld/lproc_fld.c
+++ b/drivers/staging/lustre/lustre/fld/lproc_fld.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index d4c33dd110ab..3cd4a2577d90 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -2326,7 +2322,8 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie,
  */
 struct cl_client_cache {
 	/**
-	 * # of users (OSCs)
+	 * # of client cache refcount
+	 * # of users (OSCs) + 2 (held by llite and lov)
 	 */
 	atomic_t		ccc_users;
 	/**
@@ -2361,6 +2358,13 @@ struct cl_client_cache {
 
 };
 
+/**
+ * cl_cache functions
+ */
+struct cl_client_cache *cl_cache_init(unsigned long lru_page_max);
+void cl_cache_incref(struct cl_client_cache *cache);
+void cl_cache_decref(struct cl_client_cache *cache);
+
 /** @} cl_page */
 
 /** \defgroup cl_lock cl_lock
diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h
index f6df3f33e770..4a15228b5570 100644
--- a/drivers/staging/lustre/lustre/include/interval_tree.h
+++ b/drivers/staging/lustre/lustre/include/interval_tree.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 79d8f93075d1..1eb64ec4bed4 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
index 3420cfd1278d..d18e8a76bb25 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
index c6c7f54637fb..5842cb18b49e 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_user.h b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
index 9cc2849f3f85..e967950e8536 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
index 4146c9c3999f..d68e60e7fef7 100644
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index 2816512185af..6e25c1bb6aa3 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -783,7 +779,7 @@ do {								      \
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		     \
 		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
 		lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
-		CDEBUG(mask, format, ## __VA_ARGS__);		    \
+		CDEBUG(mask, format "\n", ## __VA_ARGS__);		    \
 	}								 \
 } while (0)
 
diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
index 07d45de69dd9..c2340d643e84 100644
--- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
+++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 9c53c1792dc8..051864c23b5b 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -386,7 +382,7 @@ static inline __u64 fid_ver_oid(const struct lu_fid *fid)
  * used for other purposes and not risk collisions with existing inodes.
  *
  * Different FID Format
- * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
  */
 enum fid_seq {
 	FID_SEQ_OST_MDT0	= 0,
@@ -704,7 +700,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
  * be passed through unchanged.  Only legacy OST objects in "group 0"
  * will be mapped into the IDIF namespace so that they can fit into the
  * struct lu_fid fields without loss.  For reference see:
- * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
  */
 static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid,
 			       __u32 ost_idx)
@@ -1241,8 +1237,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 						  */
 #define OBD_CONNECT_ATTRFID	       0x4000ULL /*Server can GetAttr By Fid*/
 #define OBD_CONNECT_NODEVOH	       0x8000ULL /*No open hndl on specl nodes*/
-#define OBD_CONNECT_RMT_CLIENT	      0x10000ULL /*Remote client */
-#define OBD_CONNECT_RMT_CLIENT_FORCE  0x20000ULL /*Remote client by force */
+#define OBD_CONNECT_RMT_CLIENT	      0x10000ULL /* Remote client, never used
+						  * in production. Removed in
+						  * 2.9. Keep this flag to
+						  * avoid reuse.
+						  */
+#define OBD_CONNECT_RMT_CLIENT_FORCE  0x20000ULL /* Remote client by force,
+						  * never used in production.
+						  * Removed in 2.9. Keep this
+						  * flag to avoid reuse
+						  */
 #define OBD_CONNECT_BRW_SIZE	      0x40000ULL /*Max bytes per rpc */
 #define OBD_CONNECT_QUOTA64	      0x80000ULL /*Not used since 2.4 */
 #define OBD_CONNECT_MDS_CAPA	     0x100000ULL /*MDS capability */
@@ -1703,7 +1707,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
 #define OBD_MD_FLXATTRLS     (0x0000002000000000ULL) /* xattr list */
 #define OBD_MD_FLXATTRRM     (0x0000004000000000ULL) /* xattr remove */
 #define OBD_MD_FLACL	     (0x0000008000000000ULL) /* ACL */
-#define OBD_MD_FLRMTPERM     (0x0000010000000000ULL) /* remote permission */
+/*	OBD_MD_FLRMTPERM     (0x0000010000000000ULL) remote perm, obsolete */
 #define OBD_MD_FLMDSCAPA     (0x0000020000000000ULL) /* MDS capability */
 #define OBD_MD_FLOSSCAPA     (0x0000040000000000ULL) /* OSS capability */
 #define OBD_MD_FLCKSPLIT     (0x0000080000000000ULL) /* Check split on server */
@@ -1715,10 +1719,10 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
 						      */
 #define OBD_MD_FLOBJCOUNT    (0x0000400000000000ULL) /* for multiple destroy */
 
-#define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */
-#define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */
-#define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */
-#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */
+/*	OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */
+/*	OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */
+/*	OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */
+/*	OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
 
 #define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
 #define OBD_MD_FLRELEASED    (0x0020000000000000ULL) /* file released */
@@ -2159,26 +2163,8 @@ enum {
 	CFS_SETUID_PERM = 0x01,
 	CFS_SETGID_PERM = 0x02,
 	CFS_SETGRP_PERM = 0x04,
-	CFS_RMTACL_PERM = 0x08,
-	CFS_RMTOWN_PERM = 0x10
 };
 
-/* inode access permission for remote user, the inode info are omitted,
- * for client knows them.
- */
-struct mdt_remote_perm {
-	__u32	   rp_uid;
-	__u32	   rp_gid;
-	__u32	   rp_fsuid;
-	__u32	   rp_fsuid_h;
-	__u32	   rp_fsgid;
-	__u32	   rp_fsgid_h;
-	__u32	   rp_access_perm; /* MAY_READ/WRITE/EXEC */
-	__u32	   rp_padding;
-};
-
-void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p);
-
 struct mdt_rec_setattr {
 	__u32	   sa_opcode;
 	__u32	   sa_cap;
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 59ba48ac31a7..ef6f38ff359e 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -215,7 +211,7 @@ struct ost_id {
 #define IOC_OBD_STATFS		  _IOWR('f', 164, struct obd_statfs *)
 #define IOC_LOV_GETINFO		 _IOWR('f', 165, struct lov_user_mds_data *)
 #define LL_IOC_FLUSHCTX		 _IOW('f', 166, long)
-#define LL_IOC_RMTACL		   _IOW('f', 167, long)
+/* LL_IOC_RMTACL			167 obsolete */
 #define LL_IOC_GETOBDCOUNT	      _IOR('f', 168, long)
 #define LL_IOC_LLOOP_ATTACH	     _IOWR('f', 169, long)
 #define LL_IOC_LLOOP_DETACH	     _IOWR('f', 170, long)
@@ -542,19 +538,6 @@ struct identity_downcall_data {
 	__u32			    idd_groups[0];
 };
 
-/* for non-mapped uid/gid */
-#define NOBODY_UID      99
-#define NOBODY_GID      99
-
-#define INVALID_ID      (-1)
-
-enum {
-	RMT_LSETFACL    = 1,
-	RMT_LGETFACL    = 2,
-	RMT_RSETFACL    = 3,
-	RMT_RGETFACL    = 4
-};
-
 /* lustre volatile file support
  * file name header: .^L^S^T^R:volatile"
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h
index aa4cfa7b749d..fecabe139b1f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_acl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_acl.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
index e229e91f7f56..95a0be13c0fb 100644
--- a/drivers/staging/lustre/lustre/include/lustre_cfg.h
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h
index 8a089413c92e..93c1bdaf71a4 100644
--- a/drivers/staging/lustre/lustre/include/lustre_debug.h
+++ b/drivers/staging/lustre/lustre/include/lustre_debug.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
index b36821ffb252..8886458748c1 100644
--- a/drivers/staging/lustre/lustre/include/lustre_disk.h
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index 9cade144faca..60051a5cfe20 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1077,7 +1073,7 @@ void ldlm_lock2handle(const struct ldlm_lock *lock,
 struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags);
 void ldlm_cancel_callback(struct ldlm_lock *);
 int ldlm_lock_remove_from_lru(struct ldlm_lock *);
-int ldlm_lock_set_data(struct lustre_handle *, void *);
+int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data);
 
 /**
  * Obtain a lock reference by its handle.
@@ -1166,10 +1162,10 @@ do {					    \
 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
 void ldlm_lock_put(struct ldlm_lock *lock);
 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
-void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
-int  ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode);
+int  ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode);
 void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
 void ldlm_lock_allow_match(struct ldlm_lock *lock);
 void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
@@ -1178,10 +1174,10 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 			       enum ldlm_type type, ldlm_policy_data_t *,
 			       enum ldlm_mode mode, struct lustre_handle *,
 			       int unref);
-enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
 					   __u64 *bits);
 void ldlm_lock_cancel(struct ldlm_lock *lock);
-void ldlm_lock_dump_handle(int level, struct lustre_handle *);
+void ldlm_lock_dump_handle(int level, const struct lustre_handle *);
 void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
 
 /* resource.c */
@@ -1255,9 +1251,9 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 			  enum ldlm_type type, __u8 with_policy,
 			  enum ldlm_mode mode,
 			  __u64 *flags, void *lvb, __u32 lvb_len,
-			  struct lustre_handle *lockh, int rc);
+			  const struct lustre_handle *lockh, int rc);
 int ldlm_cli_update_pool(struct ptlrpc_request *req);
-int ldlm_cli_cancel(struct lustre_handle *lockh,
+int ldlm_cli_cancel(const struct lustre_handle *lockh,
 		    enum ldlm_cancel_flags cancel_flags);
 int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
 			   enum ldlm_cancel_flags flags, void *opaque);
diff --git a/drivers/staging/lustre/lustre/include/lustre_eacl.h b/drivers/staging/lustre/lustre/include/lustre_eacl.h
index 0b66593a9526..d1039e1ff70d 100644
--- a/drivers/staging/lustre/lustre/include/lustre_eacl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_eacl.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -70,17 +66,6 @@ typedef struct {
 #define CFS_ACL_XATTR_COUNT(size, prefix) \
 	(((size) - sizeof(prefix ## _header)) / sizeof(prefix ## _entry))
 
-extern ext_acl_xattr_header *
-lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size);
-extern int
-lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, size_t size,
-			      posix_acl_xattr_header **out);
-extern void
-lustre_ext_acl_xattr_free(ext_acl_xattr_header *header);
-extern ext_acl_xattr_header *
-lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
-			   ext_acl_xattr_header *ext_header);
-
 #endif /* CONFIG_FS_POSIX_ACL */
 
 /** @} eacl */
diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h
index 3014d27e6dc2..6e7cc4689fb8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_export.h
+++ b/drivers/staging/lustre/lustre/include/lustre_export.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -180,19 +176,6 @@ static inline int exp_connect_lru_resize(struct obd_export *exp)
 	return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE);
 }
 
-static inline int exp_connect_rmtclient(struct obd_export *exp)
-{
-	return !!(exp_connect_flags(exp) & OBD_CONNECT_RMT_CLIENT);
-}
-
-static inline int client_is_remote(struct obd_export *exp)
-{
-	struct obd_import *imp = class_exp2cliimp(exp);
-
-	return !!(imp->imp_connect_data.ocd_connect_flags &
-		  OBD_CONNECT_RMT_CLIENT);
-}
-
 static inline int exp_connect_vbr(struct obd_export *exp)
 {
 	return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR);
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index 12e8b585c2b4..743671a547ef 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -45,7 +41,7 @@
  *
  * @{
  *
- * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
  * describes the FID namespace and interoperability requirements for FIDs.
  * The important parts of that document are included here for reference.
  *
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
index 4cf2b0e61672..932410d3e3cc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fld.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fld.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
index 5488a698dabd..cde7ed702c86 100644
--- a/drivers/staging/lustre/lustre/include/lustre_ha.h
+++ b/drivers/staging/lustre/lustre/include/lustre_ha.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h
index 27f169d2ed34..1a63a6b9e116 100644
--- a/drivers/staging/lustre/lustre/include/lustre_handles.h
+++ b/drivers/staging/lustre/lustre/include/lustre_handles.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index 8325c82b3ebf..4445be7a59dd 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_intent.h b/drivers/staging/lustre/lustre/include/lustre_intent.h
index c491d52d86a2..ed2b6c674109 100644
--- a/drivers/staging/lustre/lustre/include/lustre_intent.h
+++ b/drivers/staging/lustre/lustre/include/lustre_intent.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -38,7 +34,11 @@
 #define LUSTRE_INTENT_H
 
 /* intent IT_XXX are defined in lustre/include/obd.h */
-struct lustre_intent_data {
+
+struct lookup_intent {
+	int		it_op;
+	int		it_create_mode;
+	__u64		it_flags;
 	int		it_disposition;
 	int		it_status;
 	__u64		it_lock_handle;
@@ -46,17 +46,23 @@ struct lustre_intent_data {
 	int		it_lock_mode;
 	int		it_remote_lock_mode;
 	__u64	   it_remote_lock_handle;
-	void	   *it_data;
+	struct ptlrpc_request *it_request;
 	unsigned int    it_lock_set:1;
 };
 
-struct lookup_intent {
-	int     it_op;
-	int     it_create_mode;
-	__u64   it_flags;
-	union {
-		struct lustre_intent_data lustre;
-	} d;
-};
+static inline int it_disposition(struct lookup_intent *it, int flag)
+{
+	return it->it_disposition & flag;
+}
+
+static inline void it_set_disposition(struct lookup_intent *it, int flag)
+{
+	it->it_disposition |= flag;
+}
+
+static inline void it_clear_disposition(struct lookup_intent *it, int flag)
+{
+	it->it_disposition &= ~flag;
+}
 
 #endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index 00b976766aef..06958f217fc8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h
index fcc5ebbceed8..b16897702559 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lite.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lite.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
index 49618e186824..b96e02317bfc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index f267ff8a6ec8..fa62b95d351f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -189,9 +185,6 @@ struct mdc_cache_waiter {
 };
 
 /* mdc/mdc_locks.c */
-int it_disposition(struct lookup_intent *it, int flag);
-void it_clear_disposition(struct lookup_intent *it, int flag);
-void it_set_disposition(struct lookup_intent *it, int flag);
 int it_open_error(int phase, struct lookup_intent *it);
 
 static inline bool cl_is_lov_delay_create(unsigned int flags)
diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h
index 95d27ddecfb3..4104bd9bd5c4 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mds.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mds.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index a7973d5de168..d5debd615fdf 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -270,6 +266,11 @@
 /* Macro to hide a typecast. */
 #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
 
+struct ptlrpc_replay_async_args {
+	int		praa_old_state;
+	int		praa_old_status;
+};
+
 /**
  * Structure to single define portal connection.
  */
@@ -479,8 +480,9 @@ enum rq_phase {
 	RQ_PHASE_BULK	   = 0xebc0de02,
 	RQ_PHASE_INTERPRET      = 0xebc0de03,
 	RQ_PHASE_COMPLETE       = 0xebc0de04,
-	RQ_PHASE_UNREGISTERING  = 0xebc0de05,
-	RQ_PHASE_UNDEFINED      = 0xebc0de06
+	RQ_PHASE_UNREG_RPC	= 0xebc0de05,
+	RQ_PHASE_UNREG_BULK	= 0xebc0de06,
+	RQ_PHASE_UNDEFINED	= 0xebc0de07
 };
 
 /** Type of request interpreter call-back */
@@ -1247,22 +1249,103 @@ struct ptlrpc_hpreq_ops {
 	void (*hpreq_fini)(struct ptlrpc_request *);
 };
 
-/**
- * Represents remote procedure call.
- *
- * This is a staple structure used by everybody wanting to send a request
- * in Lustre.
- */
-struct ptlrpc_request {
-	/* Request type: one of PTL_RPC_MSG_* */
-	int rq_type;
-	/** Result of request processing */
-	int rq_status;
+struct ptlrpc_cli_req {
+	/** For bulk requests on client only: bulk descriptor */
+	struct ptlrpc_bulk_desc		*cr_bulk;
+	/** optional time limit for send attempts */
+	long				 cr_delay_limit;
+	/** time request was first queued */
+	time_t				 cr_queued_time;
+	/** request sent timeval */
+	struct timespec64		 cr_sent_tv;
+	/** time for request really sent out */
+	time_t				 cr_sent_out;
+	/** when req reply unlink must finish. */
+	time_t				 cr_reply_deadline;
+	/** when req bulk unlink must finish. */
+	time_t				 cr_bulk_deadline;
+	/** when req unlink must finish. */
+	time_t				 cr_req_deadline;
+	/** Portal to which this request would be sent */
+	short				 cr_req_ptl;
+	/** Portal where to wait for reply and where reply would be sent */
+	short				 cr_rep_ptl;
+	/** request resending number */
+	unsigned int			 cr_resend_nr;
+	/** What was import generation when this request was sent */
+	int				 cr_imp_gen;
+	enum lustre_imp_state		 cr_send_state;
+	/** Per-request waitq introduced by bug 21938 for recovery waiting */
+	wait_queue_head_t		 cr_set_waitq;
+	/** Link item for request set lists */
+	struct list_head		 cr_set_chain;
+	/** link to waited ctx */
+	struct list_head		 cr_ctx_chain;
+
+	/** client's half ctx */
+	struct ptlrpc_cli_ctx		*cr_cli_ctx;
+	/** Link back to the request set */
+	struct ptlrpc_request_set	*cr_set;
+	/** outgoing request MD handle */
+	lnet_handle_md_t		 cr_req_md_h;
+	/** request-out callback parameter */
+	struct ptlrpc_cb_id		 cr_req_cbid;
+	/** incoming reply MD handle */
+	lnet_handle_md_t		 cr_reply_md_h;
+	wait_queue_head_t		 cr_reply_waitq;
+	/** reply callback parameter */
+	struct ptlrpc_cb_id		 cr_reply_cbid;
+	/** Async completion handler, called when reply is received */
+	ptlrpc_interpterer_t		 cr_reply_interp;
+	/** Async completion context */
+	union ptlrpc_async_args		 cr_async_args;
+	/** Opaq data for replay and commit callbacks. */
+	void				*cr_cb_data;
 	/**
-	 * Linkage item through which this request is included into
-	 * sending/delayed lists on client and into rqbd list on server
+	 * Commit callback, called when request is committed and about to be
+	 * freed.
 	 */
-	struct list_head rq_list;
+	void (*cr_commit_cb)(struct ptlrpc_request *);
+	/** Replay callback, called after request is replayed at recovery */
+	void (*cr_replay_cb)(struct ptlrpc_request *);
+};
+
+/** client request member alias */
+/* NB: these alias should NOT be used by any new code, instead they should
+ * be removed step by step to avoid potential abuse
+ */
+#define rq_bulk			rq_cli.cr_bulk
+#define rq_delay_limit		rq_cli.cr_delay_limit
+#define rq_queued_time		rq_cli.cr_queued_time
+#define rq_sent_tv		rq_cli.cr_sent_tv
+#define rq_real_sent		rq_cli.cr_sent_out
+#define rq_reply_deadline	rq_cli.cr_reply_deadline
+#define rq_bulk_deadline	rq_cli.cr_bulk_deadline
+#define rq_req_deadline		rq_cli.cr_req_deadline
+#define rq_nr_resend		rq_cli.cr_resend_nr
+#define rq_request_portal	rq_cli.cr_req_ptl
+#define rq_reply_portal		rq_cli.cr_rep_ptl
+#define rq_import_generation	rq_cli.cr_imp_gen
+#define rq_send_state		rq_cli.cr_send_state
+#define rq_set_chain		rq_cli.cr_set_chain
+#define rq_ctx_chain		rq_cli.cr_ctx_chain
+#define rq_set			rq_cli.cr_set
+#define rq_set_waitq		rq_cli.cr_set_waitq
+#define rq_cli_ctx		rq_cli.cr_cli_ctx
+#define rq_req_md_h		rq_cli.cr_req_md_h
+#define rq_req_cbid		rq_cli.cr_req_cbid
+#define rq_reply_md_h		rq_cli.cr_reply_md_h
+#define rq_reply_waitq		rq_cli.cr_reply_waitq
+#define rq_reply_cbid		rq_cli.cr_reply_cbid
+#define rq_interpret_reply	rq_cli.cr_reply_interp
+#define rq_async_args		rq_cli.cr_async_args
+#define rq_cb_data		rq_cli.cr_cb_data
+#define rq_commit_cb		rq_cli.cr_commit_cb
+#define rq_replay_cb		rq_cli.cr_replay_cb
+
+struct ptlrpc_srv_req {
+	/** initial thread servicing this request */
+	struct ptlrpc_thread		*sr_svc_thread;
 	/**
 	 * Server side list of incoming unserved requests sorted by arrival
 	 * time.  Traversed from time to time to notice about to expire
@@ -1270,32 +1353,86 @@ struct ptlrpc_request {
 	 * know server is alive and well, just very busy to service their
 	 * requests in time
 	 */
-	struct list_head rq_timed_list;
-	/** server-side history, used for debugging purposes. */
-	struct list_head rq_history_list;
+	struct list_head		sr_timed_list;
 	/** server-side per-export list */
-	struct list_head rq_exp_list;
-	/** server-side hp handlers */
-	struct ptlrpc_hpreq_ops *rq_ops;
-
-	/** initial thread servicing this request */
-	struct ptlrpc_thread *rq_svc_thread;
-
+	struct list_head		sr_exp_list;
+	/** server-side history, used for debuging purposes. */
+	struct list_head		sr_hist_list;
 	/** history sequence # */
-	__u64 rq_history_seq;
+	__u64				sr_hist_seq;
+	/** the index of service's srv_at_array into which request is linked */
+	time_t				sr_at_index;
+	/** authed uid */
+	uid_t				sr_auth_uid;
+	/** authed uid mapped to */
+	uid_t				sr_auth_mapped_uid;
+	/** RPC is generated from what part of Lustre */
+	enum lustre_sec_part		sr_sp_from;
+	/** request session context */
+	struct lu_context		sr_ses;
 	/** \addtogroup  nrs
 	 * @{
 	 */
 	/** stub for NRS request */
-	struct ptlrpc_nrs_request rq_nrq;
+	struct ptlrpc_nrs_request	sr_nrq;
 	/** @} nrs */
-	/** the index of service's srv_at_array into which request is linked */
-	u32 rq_at_index;
+	/** request arrival time */
+	struct timespec64		sr_arrival_time;
+	/** server's half ctx */
+	struct ptlrpc_svc_ctx		*sr_svc_ctx;
+	/** (server side), pointed directly into req buffer */
+	struct ptlrpc_user_desc		*sr_user_desc;
+	/** separated reply state */
+	struct ptlrpc_reply_state	*sr_reply_state;
+	/** server-side hp handlers */
+	struct ptlrpc_hpreq_ops		*sr_ops;
+	/** incoming request buffer */
+	struct ptlrpc_request_buffer_desc *sr_rqbd;
+};
+
+/** server request member alias */
+/* NB: these alias should NOT be used by any new code, instead they should
+ * be removed step by step to avoid potential abuse
+ */
+#define rq_svc_thread		rq_srv.sr_svc_thread
+#define rq_timed_list		rq_srv.sr_timed_list
+#define rq_exp_list		rq_srv.sr_exp_list
+#define rq_history_list		rq_srv.sr_hist_list
+#define rq_history_seq		rq_srv.sr_hist_seq
+#define rq_at_index		rq_srv.sr_at_index
+#define rq_auth_uid		rq_srv.sr_auth_uid
+#define rq_auth_mapped_uid	rq_srv.sr_auth_mapped_uid
+#define rq_sp_from		rq_srv.sr_sp_from
+#define rq_session		rq_srv.sr_ses
+#define rq_nrq			rq_srv.sr_nrq
+#define rq_arrival_time		rq_srv.sr_arrival_time
+#define rq_reply_state		rq_srv.sr_reply_state
+#define rq_svc_ctx		rq_srv.sr_svc_ctx
+#define rq_user_desc		rq_srv.sr_user_desc
+#define rq_ops			rq_srv.sr_ops
+#define rq_rqbd			rq_srv.sr_rqbd
+
+/**
+ * Represents remote procedure call.
+ *
+ * This is a staple structure used by everybody wanting to send a request
+ * in Lustre.
+ */
+struct ptlrpc_request {
+	/* Request type: one of PTL_RPC_MSG_* */
+	int				 rq_type;
+	/** Result of request processing */
+	int				 rq_status;
+	/**
+	 * Linkage item through which this request is included into
+	 * sending/delayed lists on client and into rqbd list on server
+	 */
+	struct list_head		 rq_list;
 	/** Lock to protect request flags and some other important bits, like
 	 * rq_list
 	 */
 	spinlock_t rq_lock;
-	/** client-side flags are serialized by rq_lock */
+	/** client-side flags are serialized by rq_lock @{ */
 	unsigned int rq_intr:1, rq_replied:1, rq_err:1,
 		rq_timedout:1, rq_resend:1, rq_restart:1,
 		/**
@@ -1311,18 +1448,15 @@ struct ptlrpc_request {
 		rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
 		rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
 		rq_early:1,
-		rq_req_unlink:1, rq_reply_unlink:1,
+		rq_req_unlinked:1,	/* unlinked request buffer from lnet */
+		rq_reply_unlinked:1,	/* unlinked reply buffer from lnet */
 		rq_memalloc:1,      /* req originated from "kswapd" */
-		/* server-side flags */
-		rq_packed_final:1,  /* packed final reply */
-		rq_hp:1,	    /* high priority RPC */
-		rq_at_linked:1,     /* link into service's srv_at_array */
-		rq_reply_truncate:1,
 		rq_committed:1,
-		/* whether the "rq_set" is a valid one */
+		rq_reply_truncated:1,
+		/** whether the "rq_set" is a valid one */
 		rq_invalid_rqset:1,
 		rq_generation_set:1,
-		/* do not resend request on -EINPROGRESS */
+		/** do not resend request on -EINPROGRESS */
 		rq_no_retry_einprogress:1,
 		/* allow the req to be sent if the import is in recovery
 		 * status
@@ -1330,20 +1464,24 @@ struct ptlrpc_request {
 		rq_allow_replay:1,
 		/* bulk request, sent to server, but uncommitted */
 		rq_unstable:1;
+	/** @} */
 
-	unsigned int rq_nr_resend;
-
-	enum rq_phase rq_phase; /* one of RQ_PHASE_* */
-	enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
-	atomic_t rq_refcount; /* client-side refcount for SENT race,
-			       * server-side refcount for multiple replies
-			       */
-
-	/** Portal to which this request would be sent */
-	short rq_request_portal;  /* XXX FIXME bug 249 */
-	/** Portal where to wait for reply and where reply would be sent */
-	short rq_reply_portal;    /* XXX FIXME bug 249 */
+	/** server-side flags @{ */
+	unsigned int
+		rq_hp:1,		/**< high priority RPC */
+		rq_at_linked:1,		/**< link into service's srv_at_array */
+		rq_packed_final:1;	/**< packed final reply */
+	/** @} */
 
+	/** one of RQ_PHASE_* */
+	enum rq_phase			rq_phase;
+	/** one of RQ_PHASE_* to be used next */
+	enum rq_phase			rq_next_phase;
+	/**
+	 * client-side refcount for SENT race, server-side refcount
+	 * for multiple replies
+	 */
+	atomic_t			rq_refcount;
 	/**
 	 * client-side:
 	 * !rq_truncate : # reply bytes actually received,
@@ -1354,6 +1492,8 @@ struct ptlrpc_request {
 	int rq_reqlen;
 	/** Reply length */
 	int rq_replen;
+	/** Pool if request is from preallocated list */
+	struct ptlrpc_request_pool     *rq_pool;
 	/** Request message - what client sent */
 	struct lustre_msg *rq_reqmsg;
 	/** Reply message - server response */
@@ -1366,19 +1506,20 @@ struct ptlrpc_request {
 	 * List item to for replay list. Not yet committed requests get linked
 	 * there.
 	 * Also see \a rq_replay comment above.
+	 * It's also link chain on obd_export::exp_req_replay_queue
 	 */
 	struct list_head rq_replay_list;
-
+	/** non-shared members for client & server request*/
+	union {
+		struct ptlrpc_cli_req    rq_cli;
+		struct ptlrpc_srv_req    rq_srv;
+	};
 	/**
 	 * security and encryption data
 	 * @{
 	 */
-	struct ptlrpc_cli_ctx   *rq_cli_ctx;     /**< client's half ctx */
-	struct ptlrpc_svc_ctx   *rq_svc_ctx;     /**< server's half ctx */
-	struct list_head	       rq_ctx_chain;   /**< link to waited ctx */
-
-	struct sptlrpc_flavor    rq_flvr;	/**< for client & server */
-	enum lustre_sec_part     rq_sp_from;
+	/** description of flavors for client & server */
+	struct sptlrpc_flavor		rq_flvr;
 
 	/* client/server security flags */
 	unsigned int
@@ -1388,7 +1529,6 @@ struct ptlrpc_request {
 				 rq_bulk_write:1,    /* request bulk write */
 				 /* server authentication flags */
 				 rq_auth_gss:1,      /* authenticated by gss */
-				 rq_auth_remote:1,   /* authed as remote user */
 				 rq_auth_usr_root:1, /* authed as root */
 				 rq_auth_usr_mdt:1,  /* authed as mdt */
 				 rq_auth_usr_ost:1,  /* authed as ost */
@@ -1397,19 +1537,15 @@ struct ptlrpc_request {
 				 rq_pack_bulk:1,
 				 /* doesn't expect reply FIXME */
 				 rq_no_reply:1,
-				 rq_pill_init:1;     /* pill initialized */
-
-	uid_t		    rq_auth_uid;	/* authed uid */
-	uid_t		    rq_auth_mapped_uid; /* authed uid mapped to */
-
-	/* (server side), pointed directly into req buffer */
-	struct ptlrpc_user_desc *rq_user_desc;
-
-	/* various buffer pointers */
-	struct lustre_msg       *rq_reqbuf;      /* req wrapper */
-	char		    *rq_repbuf;      /* rep buffer */
-	struct lustre_msg       *rq_repdata;     /* rep wrapper msg */
-	struct lustre_msg       *rq_clrbuf;      /* only in priv mode */
+				 rq_pill_init:1, /* pill initialized */
+				 rq_srv_req:1; /* server request */
+
+	/** various buffer pointers */
+	struct lustre_msg       *rq_reqbuf;	/**< req wrapper */
+	char			*rq_repbuf;	/**< rep buffer */
+	struct lustre_msg       *rq_repdata;	/**< rep wrapper msg */
+	/** only in priv mode */
+	struct lustre_msg       *rq_clrbuf;
 	int		      rq_reqbuf_len;  /* req wrapper buf len */
 	int		      rq_reqdata_len; /* req wrapper msg len */
 	int		      rq_repbuf_len;  /* rep buffer len */
@@ -1426,97 +1562,28 @@ struct ptlrpc_request {
 	__u32 rq_req_swab_mask;
 	__u32 rq_rep_swab_mask;
 
-	/** What was import generation when this request was sent */
-	int rq_import_generation;
-	enum lustre_imp_state rq_send_state;
-
 	/** how many early replies (for stats) */
 	int rq_early_count;
 
-	/** client+server request */
-	lnet_handle_md_t     rq_req_md_h;
-	struct ptlrpc_cb_id  rq_req_cbid;
-	/** optional time limit for send attempts */
-	long       rq_delay_limit;
-	/** time request was first queued */
-	unsigned long	   rq_queued_time;
-
-	/* server-side... */
-	/** request arrival time */
-	struct timespec64	rq_arrival_time;
-	/** separated reply state */
-	struct ptlrpc_reply_state *rq_reply_state;
-	/** incoming request buffer */
-	struct ptlrpc_request_buffer_desc *rq_rqbd;
-
-	/** client-only incoming reply */
-	lnet_handle_md_t     rq_reply_md_h;
-	wait_queue_head_t	  rq_reply_waitq;
-	struct ptlrpc_cb_id  rq_reply_cbid;
-
+	/** Server-side, export on which request was received */
+	struct obd_export		*rq_export;
+	/** import where request is being sent */
+	struct obd_import		*rq_import;
 	/** our LNet NID */
 	lnet_nid_t	   rq_self;
 	/** Peer description (the other side) */
 	lnet_process_id_t    rq_peer;
-	/** Server-side, export on which request was received */
-	struct obd_export   *rq_export;
-	/** Client side, import where request is being sent */
-	struct obd_import   *rq_import;
-
-	/** Replay callback, called after request is replayed at recovery */
-	void (*rq_replay_cb)(struct ptlrpc_request *);
 	/**
-	 * Commit callback, called when request is committed and about to be
-	 * freed.
+	 * service time estimate (secs)
+	 * If the request is not served by this time, it is marked as timed out.
 	 */
-	void (*rq_commit_cb)(struct ptlrpc_request *);
-	/** Opaq data for replay and commit callbacks. */
-	void  *rq_cb_data;
-
-	/** For bulk requests on client only: bulk descriptor */
-	struct ptlrpc_bulk_desc *rq_bulk;
-
-	/** client outgoing req */
+	int			rq_timeout;
 	/**
 	 * when request/reply sent (secs), or time when request should be sent
 	 */
 	time64_t rq_sent;
-	/** time for request really sent out */
-	time64_t rq_real_sent;
-
-	/** when request must finish. volatile
-	 * so that servers' early reply updates to the deadline aren't
-	 * kept in per-cpu cache
-	 */
-	volatile time64_t rq_deadline;
-	/** when req reply unlink must finish. */
-	time64_t rq_reply_deadline;
-	/** when req bulk unlink must finish. */
-	time64_t rq_bulk_deadline;
-	/**
-	 * service time estimate (secs)
-	 * If the requestsis not served by this time, it is marked as timed out.
-	 */
-	int    rq_timeout;
-
-	/** Multi-rpc bits */
-	/** Per-request waitq introduced by bug 21938 for recovery waiting */
-	wait_queue_head_t rq_set_waitq;
-	/** Link item for request set lists */
-	struct list_head  rq_set_chain;
-	/** Link back to the request set */
-	struct ptlrpc_request_set *rq_set;
-	/** Async completion handler, called when reply is received */
-	ptlrpc_interpterer_t rq_interpret_reply;
-	/** Async completion context */
-	union ptlrpc_async_args rq_async_args;
-
-	/** Pool if request is from preallocated list */
-	struct ptlrpc_request_pool *rq_pool;
-
-	struct lu_context	   rq_session;
-	struct lu_context	   rq_recov_session;
-
+	/** when request must finish. */
+	time64_t		  rq_deadline;
 	/** request format description */
 	struct req_capsule	  rq_pill;
 };
@@ -1629,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase)
 		return "Interpret";
 	case RQ_PHASE_COMPLETE:
 		return "Complete";
-	case RQ_PHASE_UNREGISTERING:
-		return "Unregistering";
+	case RQ_PHASE_UNREG_RPC:
+		return "UnregRPC";
+	case RQ_PHASE_UNREG_BULK:
+		return "UnregBULK";
 	default:
 		return "?Phase?";
 	}
@@ -1657,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
 #define DEBUG_REQ_FLAGS(req)						    \
 	ptlrpc_rqphase2str(req),						\
 	FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"),		    \
-	FLAG(req->rq_err, "E"),						 \
+	FLAG(req->rq_err, "E"),	FLAG(req->rq_net_err, "e"),		    \
 	FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),   \
 	FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),		  \
 	FLAG(req->rq_no_resend, "N"),					   \
@@ -1665,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
 	FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"),		     \
 	FLAG(req->rq_committed, "M")
 
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s"
 
 void _debug_req(struct ptlrpc_request *req,
 		struct libcfs_debug_msg_data *data, const char *fmt, ...)
@@ -2316,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
 
 	desc = req->rq_bulk;
 
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
-	    req->rq_bulk_deadline > ktime_get_real_seconds())
+	if (req->rq_bulk_deadline > ktime_get_real_seconds())
 		return 1;
 
 	if (!desc)
@@ -2664,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
 	if (req->rq_phase == new_phase)
 		return;
 
-	if (new_phase == RQ_PHASE_UNREGISTERING) {
+	if (new_phase == RQ_PHASE_UNREG_RPC ||
+	    new_phase == RQ_PHASE_UNREG_BULK) {
+		/* No embedded unregistering phases */
+		if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+		    req->rq_phase == RQ_PHASE_UNREG_BULK)
+			return;
+
 		req->rq_next_phase = req->rq_phase;
 		if (req->rq_import)
 			atomic_inc(&req->rq_import->imp_unregistering);
 	}
 
-	if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+	if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+	    req->rq_phase == RQ_PHASE_UNREG_BULK) {
 		if (req->rq_import)
 			atomic_dec(&req->rq_import->imp_unregistering);
 	}
@@ -2687,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
 static inline int
 ptlrpc_client_early(struct ptlrpc_request *req)
 {
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    req->rq_reply_deadline > ktime_get_real_seconds())
-		return 0;
 	return req->rq_early;
 }
 
@@ -2699,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req)
 static inline int
 ptlrpc_client_replied(struct ptlrpc_request *req)
 {
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    req->rq_reply_deadline > ktime_get_real_seconds())
+	if (req->rq_reply_deadline > ktime_get_real_seconds())
 		return 0;
 	return req->rq_replied;
 }
@@ -2709,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req)
 static inline int
 ptlrpc_client_recv(struct ptlrpc_request *req)
 {
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    req->rq_reply_deadline > ktime_get_real_seconds())
+	if (req->rq_reply_deadline > ktime_get_real_seconds())
 		return 1;
 	return req->rq_receiving_reply;
 }
@@ -2721,13 +2791,16 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
 	int rc;
 
 	spin_lock(&req->rq_lock);
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    req->rq_reply_deadline > ktime_get_real_seconds()) {
+	if (req->rq_reply_deadline > ktime_get_real_seconds()) {
+		spin_unlock(&req->rq_lock);
+		return 1;
+	}
+	if (req->rq_req_deadline > ktime_get_real_seconds()) {
 		spin_unlock(&req->rq_lock);
 		return 1;
 	}
-	rc = req->rq_receiving_reply;
-	rc = rc || req->rq_req_unlink || req->rq_reply_unlink;
+	rc = !req->rq_req_unlinked || !req->rq_reply_unlinked ||
+	     req->rq_receiving_reply;
 	spin_unlock(&req->rq_lock);
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
index a42cf90c1cd8..82aadd32c2b8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_param.h
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index 0aac4391ea16..544a43c862b9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -164,7 +160,7 @@ extern struct req_format RQF_MDS_IS_SUBDIR;
 extern struct req_format RQF_MDS_DONE_WRITING;
 extern struct req_format RQF_MDS_REINT;
 extern struct req_format RQF_MDS_REINT_CREATE;
-extern struct req_format RQF_MDS_REINT_CREATE_RMT_ACL;
+extern struct req_format RQF_MDS_REINT_CREATE_ACL;
 extern struct req_format RQF_MDS_REINT_CREATE_SLAVE;
 extern struct req_format RQF_MDS_REINT_CREATE_SYM;
 extern struct req_format RQF_MDS_REINT_OPEN;
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
index 01b4e6726a68..90c183424802 100644
--- a/drivers/staging/lustre/lustre/include/lustre_sec.h
+++ b/drivers/staging/lustre/lustre/include/lustre_sec.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -221,13 +217,13 @@ enum sptlrpc_bulk_service {
 
 #define SPTLRPC_FLVR_DEFAULT	    SPTLRPC_FLVR_NULL
 
-#define SPTLRPC_FLVR_INVALID	    ((__u32) 0xFFFFFFFF)
-#define SPTLRPC_FLVR_ANY		((__u32) 0xFFF00000)
+#define SPTLRPC_FLVR_INVALID	    ((__u32)0xFFFFFFFF)
+#define SPTLRPC_FLVR_ANY		((__u32)0xFFF00000)
 
 /**
  * extract the useful part from wire flavor
  */
-#define WIRE_FLVR(wflvr)		(((__u32) (wflvr)) & 0x000FFFFF)
+#define WIRE_FLVR(wflvr)		(((__u32)(wflvr)) & 0x000FFFFF)
 
 /** @} flavor */
 
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index 2d926e0ee647..a1bc2c478ff9 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -232,6 +228,12 @@ enum {
 #define MDC_MAX_RIF_DEFAULT       8
 #define MDC_MAX_RIF_MAX	 512
 
+enum obd_cl_sem_lock_class {
+	OBD_CLI_SEM_NORMAL,
+	OBD_CLI_SEM_MGC,
+	OBD_CLI_SEM_MDCOSC,
+};
+
 struct mdc_rpc_lock;
 struct obd_import;
 struct client_obd {
@@ -419,7 +421,7 @@ struct lov_obd {
 	enum lustre_sec_part    lov_sp_me;
 
 	/* Cached LRU and unstable data from upper layer */
-	void		       *lov_cache;
+	struct cl_client_cache *lov_cache;
 
 	struct rw_semaphore     lov_notify_lock;
 
@@ -1119,9 +1121,6 @@ struct md_ops {
 			     ldlm_policy_data_t *, enum ldlm_mode,
 			     enum ldlm_cancel_flags flags, void *opaque);
 
-	int (*get_remote_perm)(struct obd_export *, const struct lu_fid *,
-			       __u32, struct ptlrpc_request **);
-
 	int (*intent_getattr_async)(struct obd_export *,
 				    struct md_enqueue_info *,
 				    struct ldlm_enqueue_info *);
diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h
index f6c18df906a8..a8a81e662a56 100644
--- a/drivers/staging/lustre/lustre/include/obd_cksum.h
+++ b/drivers/staging/lustre/lustre/include/obd_cksum.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 32863bcb30b9..6482a937000b 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1654,16 +1650,6 @@ static inline int md_init_ea_size(struct obd_export *exp, int easize,
 					       cookiesize, def_cookiesize);
 }
 
-static inline int md_get_remote_perm(struct obd_export *exp,
-				     const struct lu_fid *fid, __u32 suppgid,
-				     struct ptlrpc_request **request)
-{
-	EXP_CHECK_MD_OP(exp, get_remote_perm);
-	EXP_MD_COUNTER_INCREMENT(exp, get_remote_perm);
-	return MDP(exp->exp_obd, get_remote_perm)(exp, fid, suppgid,
-						  request);
-}
-
 static inline int md_intent_getattr_async(struct obd_export *exp,
 					  struct md_enqueue_info *minfo,
 					  struct ldlm_enqueue_info *einfo)
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index 60034d39b00d..845e64a56c21 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -368,6 +364,9 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2  0x515
 #define OBD_FAIL_PTLRPC_DELAY_IMP_FULL   0x516
 #define OBD_FAIL_PTLRPC_CANCEL_RESEND    0x517
+#define OBD_FAIL_PTLRPC_DROP_BULK	 0x51a
+#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK	 0x51b
+#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
 
 #define OBD_FAIL_OBD_PING_NET	    0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
index 323060626fdf..f4a70ebddeaf 100644
--- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c
+++ b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c
index 621323f6ee60..ea8840cb9056 100644
--- a/drivers/staging/lustre/lustre/ldlm/l_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
index cf1f1783632f..f5023d9b78f5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index 349bfcc9b331..d6b61bc39135 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -259,14 +255,13 @@ reprocess:
 			 * overflow and underflow.
 			 */
 			if ((new->l_policy_data.l_flock.start >
-			     (lock->l_policy_data.l_flock.end + 1))
-			    && (lock->l_policy_data.l_flock.end !=
-				OBD_OBJECT_EOF))
+			     (lock->l_policy_data.l_flock.end + 1)) &&
+			    (lock->l_policy_data.l_flock.end != OBD_OBJECT_EOF))
 				continue;
 
 			if ((new->l_policy_data.l_flock.end <
-			     (lock->l_policy_data.l_flock.start - 1))
-			    && (lock->l_policy_data.l_flock.start != 0))
+			     (lock->l_policy_data.l_flock.start - 1)) &&
+			    (lock->l_policy_data.l_flock.start != 0))
 				break;
 
 			if (new->l_policy_data.l_flock.start <
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
index b1bed1e17d32..79f4e6fa193e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
index 32f227f37799..e4cf65d2d3b1 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index b4ffbe2fc4ed..7c832aae7d5e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -345,7 +341,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	 * Set cl_chksum* to CRC32 for now to avoid returning screwed info
 	 * through procfs.
 	 */
-	cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
+	cli->cl_cksum_type = OBD_CKSUM_CRC32;
+	cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
 	atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
 
 	/* This value may be reduced at connect time in
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index bff94ea12d6f..a5993f745ebe 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -662,7 +658,7 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
  * r/w reference type is determined by \a mode
  * Calls ldlm_lock_addref_internal.
  */
-void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode)
 {
 	struct ldlm_lock *lock;
 
@@ -704,7 +700,7 @@ void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
  *
  * \retval -EAGAIN lock is being canceled.
  */
-int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode)
+int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode)
 {
 	struct ldlm_lock *lock;
 	int	       result;
@@ -836,7 +832,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 /**
  * Decrease reader/writer refcount for LDLM lock with handle \a lockh
  */
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode)
 {
 	struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
 
@@ -853,7 +849,7 @@ EXPORT_SYMBOL(ldlm_lock_decref);
  *
  * Typical usage is for GROUP locks which we cannot allow to be cached.
  */
-void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode)
 {
 	struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
 
@@ -1322,7 +1318,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 }
 EXPORT_SYMBOL(ldlm_lock_match);
 
-enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
 					   __u64 *bits)
 {
 	struct ldlm_lock *lock;
@@ -1444,7 +1440,7 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
 		memcpy(data, lvb, size);
 		break;
 	default:
-		LDLM_ERROR(lock, "Unknown LVB type: %d\n", lock->l_lvb_type);
+		LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type);
 		dump_stack();
 		return -EINVAL;
 	}
@@ -1853,7 +1849,7 @@ EXPORT_SYMBOL(ldlm_lock_cancel);
 /**
  * Set opaque data into the lock that only makes sense to upper layer.
  */
-int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
+int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data)
 {
 	struct ldlm_lock *lock = ldlm_handle2lock(lockh);
 	int rc = -EINVAL;
@@ -1879,7 +1875,7 @@ struct export_cl_data {
  *
  * Used when printing all locks on a resource for debug purposes.
  */
-void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
+void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh)
 {
 	struct ldlm_lock *lock;
 
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index ab739f079a48..821939ff2e6b 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -503,7 +499,7 @@ static int ldlm_handle_setinfo(struct ptlrpc_request *req)
 
 static inline void ldlm_callback_errmsg(struct ptlrpc_request *req,
 					const char *msg, int rc,
-					struct lustre_handle *handle)
+					const struct lustre_handle *handle)
 {
 	DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req,
 		  "%s: [nid %s] [rc %d] [lock %#llx]",
@@ -641,7 +637,8 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 		 */
 		if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) ||
 		    ldlm_is_failed(lock)) {
-			LDLM_DEBUG(lock, "callback on lock %#llx - lock disappeared\n",
+			LDLM_DEBUG(lock,
+				   "callback on lock %#llx - lock disappeared",
 				   dlm_req->lock_handle[0].cookie);
 			unlock_res_and_lock(lock);
 			LDLM_LOCK_RELEASE(lock);
@@ -1011,9 +1008,11 @@ static int ldlm_setup(void)
 		blp->blp_min_threads = LDLM_NTHRS_INIT;
 		blp->blp_max_threads = LDLM_NTHRS_MAX;
 	} else {
-		blp->blp_min_threads = blp->blp_max_threads =
-			min_t(int, LDLM_NTHRS_MAX, max_t(int, LDLM_NTHRS_INIT,
-							 ldlm_num_threads));
+		blp->blp_min_threads = min_t(int, LDLM_NTHRS_MAX,
+					     max_t(int, LDLM_NTHRS_INIT,
+						   ldlm_num_threads));
+
+		blp->blp_max_threads = blp->blp_min_threads;
 	}
 
 	for (i = 0; i < blp->blp_min_threads; i++) {
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
index 0c1965ddabb9..0aed39c46154 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index b913ba9cf97c..657ed4012776 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index 107314e284a0..af487f9937f4 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -340,7 +336,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 			  enum ldlm_type type, __u8 with_policy,
 			  enum ldlm_mode mode,
 			  __u64 *flags, void *lvb, __u32 lvb_len,
-			  struct lustre_handle *lockh, int rc)
+			  const struct lustre_handle *lockh, int rc)
 {
 	struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
 	int is_replay = *flags & LDLM_FL_REPLAY;
@@ -715,7 +711,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
 
 			lock->l_req_extent = policy->l_extent;
 		}
-		LDLM_DEBUG(lock, "client-side enqueue START, flags %llx\n",
+		LDLM_DEBUG(lock, "client-side enqueue START, flags %llx",
 			   *flags);
 	}
 
@@ -1027,7 +1023,7 @@ EXPORT_SYMBOL(ldlm_cli_update_pool);
  *
  * Lock must not have any readers or writers by this time.
  */
-int ldlm_cli_cancel(struct lustre_handle *lockh,
+int ldlm_cli_cancel(const struct lustre_handle *lockh,
 		    enum ldlm_cancel_flags cancel_flags)
 {
 	struct obd_export *exp;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index e99c89c34cd0..51a28d96af39 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1279,7 +1275,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
 {
 	check_res_locked(res);
 
-	LDLM_DEBUG(lock, "About to add this lock:\n");
+	LDLM_DEBUG(lock, "About to add this lock:");
 
 	if (ldlm_is_destroyed(lock)) {
 		CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index 2ce10ff01b80..2cbb1b80bd41 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -1,11 +1,7 @@
 obj-$(CONFIG_LUSTRE_FS) += lustre.o
-obj-$(CONFIG_LUSTRE_LLITE_LLOOP) += llite_lloop.o
 lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
 	    rw.o namei.o symlink.o llite_mmap.o \
-	    xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o \
-	    rw26.o super25.o statahead.o \
+	    xattr.o xattr_cache.o rw26.o super25.o statahead.o \
 	    glimpse.o lcommon_cl.o lcommon_misc.o \
 	    vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \
 	    lproc_llite.o
-
-llite_lloop-y := lloop.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index 1b6f82a1a435..581a63a0a63e 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -206,27 +202,27 @@ int ll_d_init(struct dentry *de)
 
 void ll_intent_drop_lock(struct lookup_intent *it)
 {
-	if (it->it_op && it->d.lustre.it_lock_mode) {
+	if (it->it_op && it->it_lock_mode) {
 		struct lustre_handle handle;
 
-		handle.cookie = it->d.lustre.it_lock_handle;
+		handle.cookie = it->it_lock_handle;
 
 		CDEBUG(D_DLMTRACE, "releasing lock with cookie %#llx from it %p\n",
 		       handle.cookie, it);
-		ldlm_lock_decref(&handle, it->d.lustre.it_lock_mode);
+		ldlm_lock_decref(&handle, it->it_lock_mode);
 
 		/* bug 494: intent_release may be called multiple times, from
 		 * this thread and we don't want to double-decref this lock
 		 */
-		it->d.lustre.it_lock_mode = 0;
-		if (it->d.lustre.it_remote_lock_mode != 0) {
-			handle.cookie = it->d.lustre.it_remote_lock_handle;
+		it->it_lock_mode = 0;
+		if (it->it_remote_lock_mode != 0) {
+			handle.cookie = it->it_remote_lock_handle;
 
 			CDEBUG(D_DLMTRACE, "releasing remote lock with cookie%#llx from it %p\n",
 			       handle.cookie, it);
 			ldlm_lock_decref(&handle,
-					 it->d.lustre.it_remote_lock_mode);
-			it->d.lustre.it_remote_lock_mode = 0;
+					 it->it_remote_lock_mode);
+			it->it_remote_lock_mode = 0;
 		}
 	}
 }
@@ -237,13 +233,13 @@ void ll_intent_release(struct lookup_intent *it)
 	ll_intent_drop_lock(it);
 	/* We are still holding extra reference on a request, need to free it */
 	if (it_disposition(it, DISP_ENQ_OPEN_REF))
-		ptlrpc_req_finished(it->d.lustre.it_data); /* ll_file_open */
+		ptlrpc_req_finished(it->it_request); /* ll_file_open */
 
 	if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
-		ptlrpc_req_finished(it->d.lustre.it_data);
+		ptlrpc_req_finished(it->it_request);
 
-	it->d.lustre.it_disposition = 0;
-	it->d.lustre.it_data = NULL;
+	it->it_disposition = 0;
+	it->it_request = NULL;
 }
 
 void ll_invalidate_aliases(struct inode *inode)
@@ -253,7 +249,7 @@ void ll_invalidate_aliases(struct inode *inode)
 	CDEBUG(D_INODE, "marking dentries for ino "DFID"(%p) invalid\n",
 	       PFID(ll_inode2fid(inode)), inode);
 
-	ll_lock_dcache(inode);
+	spin_lock(&inode->i_lock);
 	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		CDEBUG(D_DENTRY, "dentry in drop %pd (%p) parent %p inode %p flags %d\n",
 		       dentry, dentry, dentry->d_parent,
@@ -261,7 +257,7 @@ void ll_invalidate_aliases(struct inode *inode)
 
 		d_lustre_invalidate(dentry, 0);
 	}
-	ll_unlock_dcache(inode);
+	spin_unlock(&inode->i_lock);
 }
 
 int ll_revalidate_it_finish(struct ptlrpc_request *request,
@@ -283,7 +279,7 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request,
 
 void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
 {
-	if (it->d.lustre.it_lock_mode && inode) {
+	if (it->it_lock_mode && inode) {
 		struct ll_sb_info *sbi = ll_i2sbi(inode);
 
 		CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"(%p)\n",
@@ -306,6 +302,17 @@ static int ll_revalidate_dentry(struct dentry *dentry,
 {
 	struct inode *dir = d_inode(dentry->d_parent);
 
+	/* If this is intermediate component path lookup and we were able to get
+	 * to this dentry, then its lock has not been revoked and the
+	 * path component is valid.
+	 */
+	if (lookup_flags & LOOKUP_PARENT)
+		return 1;
+
+	/* Symlink - always valid as long as the dentry was found */
+	if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))
+		return 1;
+
 	/*
 	 * if open&create is set, talk to MDS to make sure file is created if
 	 * necessary, because we can't do this in ->open() later since that's
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 4b00d1ac84fb..5b381779c827 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -366,7 +362,7 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
 
 		ll_finish_md_op_data(op_data);
 
-		request = (struct ptlrpc_request *)it.d.lustre.it_data;
+		request = (struct ptlrpc_request *)it.it_request;
 		if (request)
 			ptlrpc_req_finished(request);
 		if (rc < 0) {
@@ -378,7 +374,7 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
 		CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n",
 		       PFID(ll_inode2fid(dir)), dir);
 		md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
-				 &it.d.lustre.it_lock_handle, dir, NULL);
+				 &it.it_lock_handle, dir, NULL);
 	} else {
 		/* for cross-ref object, l_ast_data of the lock may not be set,
 		 * we reset it here
@@ -1076,17 +1072,11 @@ static int copy_and_ioctl(int cmd, struct obd_export *exp,
 	void *copy;
 	int rc;
 
-	copy = kzalloc(size, GFP_NOFS);
-	if (!copy)
-		return -ENOMEM;
-
-	if (copy_from_user(copy, data, size)) {
-		rc = -EFAULT;
-		goto out;
-	}
+	copy = memdup_user(data, size);
+	if (IS_ERR(copy))
+		return PTR_ERR(copy);
 
 	rc = obd_iocontrol(cmd, exp, size, copy, NULL);
-out:
 	kfree(copy);
 
 	return rc;
@@ -1107,8 +1097,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
 	case Q_QUOTAOFF:
 	case Q_SETQUOTA:
 	case Q_SETINFO:
-		if (!capable(CFS_CAP_SYS_ADMIN) ||
-		    sbi->ll_flags & LL_SBI_RMT_CLIENT)
+		if (!capable(CFS_CAP_SYS_ADMIN))
 			return -EPERM;
 		break;
 	case Q_GETQUOTA:
@@ -1116,8 +1105,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
 		      !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) ||
 		     (type == GRPQUOTA &&
 		      !in_egroup_p(make_kgid(&init_user_ns, id)))) &&
-		    (!capable(CFS_CAP_SYS_ADMIN) ||
-		     sbi->ll_flags & LL_SBI_RMT_CLIENT))
+		      !capable(CFS_CAP_SYS_ADMIN))
 			return -EPERM;
 		break;
 	case Q_GETINFO:
@@ -1128,9 +1116,6 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
 	}
 
 	if (valid != QC_GENERAL) {
-		if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
-			return -EOPNOTSUPP;
-
 		if (cmd == Q_GETINFO)
 			qctl->qc_cmd = Q_GETOINFO;
 		else if (cmd == Q_GETQUOTA)
@@ -1538,7 +1523,9 @@ skip_lmm:
 			st.st_atime   = body->atime;
 			st.st_mtime   = body->mtime;
 			st.st_ctime   = body->ctime;
-			st.st_ino     = inode->i_ino;
+			st.st_ino     = cl_fid_build_ino(&body->fid1,
+							 sbi->ll_flags &
+							 LL_SBI_32BIT_API);
 
 			lmdp = (struct lov_user_mds_data __user *)arg;
 			if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) {
@@ -1631,8 +1618,7 @@ free_lmm:
 		struct obd_quotactl *oqctl;
 		int error = 0;
 
-		if (!capable(CFS_CAP_SYS_ADMIN) ||
-		    sbi->ll_flags & LL_SBI_RMT_CLIENT)
+		if (!capable(CFS_CAP_SYS_ADMIN))
 			return -EPERM;
 
 		oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
@@ -1655,8 +1641,7 @@ free_lmm:
 	case OBD_IOC_POLL_QUOTACHECK: {
 		struct if_quotacheck *check;
 
-		if (!capable(CFS_CAP_SYS_ADMIN) ||
-		    sbi->ll_flags & LL_SBI_RMT_CLIENT)
+		if (!capable(CFS_CAP_SYS_ADMIN))
 			return -EPERM;
 
 		check = kzalloc(sizeof(*check), GFP_NOFS);
@@ -1713,20 +1698,6 @@ out_quotactl:
 		return ll_get_obd_name(inode, cmd, arg);
 	case LL_IOC_FLUSHCTX:
 		return ll_flush_ctx(inode);
-#ifdef CONFIG_FS_POSIX_ACL
-	case LL_IOC_RMTACL: {
-		if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
-			struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
-			rc = rct_add(&sbi->ll_rct, current_pid(), arg);
-			if (!rc)
-				fd->fd_flags |= LL_FILE_RMTACL;
-			return rc;
-		} else {
-			return 0;
-		}
-	}
-#endif
 	case LL_IOC_GETOBDCOUNT: {
 		int count, vallen;
 		struct obd_export *exp;
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index f47f2acaf90c..57281b9e31ff 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -348,18 +344,6 @@ int ll_file_release(struct inode *inode, struct file *file)
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
 	       PFID(ll_inode2fid(inode)), inode);
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
-		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
-		if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
-			fd->fd_flags &= ~LL_FILE_RMTACL;
-			rct_del(&sbi->ll_rct, current_pid());
-			et_search_free(&sbi->ll_et, current_pid());
-		}
-	}
-#endif
-
 	if (!is_root_inode(inode))
 		ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
 	fd = LUSTRE_FPRIVATE(file);
@@ -415,7 +399,19 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm,
 	 * parameters. No need for the open lock
 	 */
 	if (!lmm && lmmsize == 0) {
-		itp->it_flags |= MDS_OPEN_LOCK;
+		struct ll_dentry_data *ldd = ll_d2d(dentry);
+		/*
+		 * If we came via ll_iget_for_nfs, then we need to request
+		 * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
+		 *
+		 * NB: when ldd is NULL, it must have come via normal
+		 * lookup path only, since ll_iget_for_nfs always calls
+		 * ll_d_init().
+		 */
+		if (ldd && ldd->lld_nfs_dentry) {
+			ldd->lld_nfs_dentry = 0;
+			itp->it_flags |= MDS_OPEN_LOCK;
+		}
 		if (itp->it_flags & FMODE_WRITE)
 			opc = LUSTRE_OPC_CREATE;
 	}
@@ -453,7 +449,7 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm,
 	}
 
 	rc = ll_prep_inode(&inode, req, NULL, itp);
-	if (!rc && itp->d.lustre.it_lock_mode)
+	if (!rc && itp->it_lock_mode)
 		ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
 
 out:
@@ -480,13 +476,12 @@ void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
 		       struct obd_client_handle *och)
 {
-	struct ptlrpc_request *req = it->d.lustre.it_data;
 	struct mdt_body *body;
 
-	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+	body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
 	och->och_fh = body->handle;
 	och->och_fid = body->fid1;
-	och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
+	och->och_lease_handle.cookie = it->it_lock_handle;
 	och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
 	och->och_flags = it->it_flags;
 
@@ -504,7 +499,6 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
 	LASSERT(fd);
 
 	if (och) {
-		struct ptlrpc_request *req = it->d.lustre.it_data;
 		struct mdt_body *body;
 		int rc;
 
@@ -512,13 +506,19 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
 		if (rc != 0)
 			return rc;
 
-		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+		body = req_capsule_server_get(&it->it_request->rq_pill,
+					      &RMF_MDT_BODY);
 		ll_ioepoch_open(lli, body->ioepoch);
 	}
 
 	LUSTRE_FPRIVATE(file) = fd;
 	ll_readahead_init(inode, &fd->fd_ras);
 	fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+
+	/* ll_cl_context initialize */
+	rwlock_init(&fd->fd_lock);
+	INIT_LIST_HEAD(&fd->fd_lccs);
+
 	return 0;
 }
 
@@ -574,7 +574,7 @@ int ll_file_open(struct inode *inode, struct file *file)
 		return 0;
 	}
 
-	if (!it || !it->d.lustre.it_disposition) {
+	if (!it || !it->it_disposition) {
 		/* Convert f_flags into access mode. We cannot use file->f_mode,
 		 * because everything but O_ACCMODE mask was stripped from
 		 * there
@@ -644,7 +644,7 @@ restart:
 		}
 	} else {
 		LASSERT(*och_usecount == 0);
-		if (!it->d.lustre.it_disposition) {
+		if (!it->it_disposition) {
 			/* We cannot just request lock handle now, new ELC code
 			 * means that one of other OPEN locks for this file
 			 * could be cancelled, and since blocking ast handler
@@ -681,7 +681,7 @@ restart:
 
 		LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
 			 "inode %p: disposition %x, status %d\n", inode,
-			 it_disposition(it, ~0), it->d.lustre.it_status);
+			 it_disposition(it, ~0), it->it_status);
 
 		rc = ll_local_open(file, it, fd, *och_p);
 		if (rc)
@@ -724,7 +724,7 @@ out_openerr:
 	}
 
 	if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
-		ptlrpc_req_finished(it->d.lustre.it_data);
+		ptlrpc_req_finished(it->it_request);
 		it_clear_disposition(it, DISP_ENQ_OPEN_REF);
 	}
 
@@ -865,12 +865,12 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 
 	/* already get lease, handle lease lock */
 	ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
-	if (it.d.lustre.it_lock_mode == 0 ||
-	    it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
+	if (it.it_lock_mode == 0 ||
+	    it.it_lock_bits != MDS_INODELOCK_OPEN) {
 		/* open lock must return for lease */
 		CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
-		       PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
-		       it.d.lustre.it_lock_bits);
+		       PFID(ll_inode2fid(inode)), it.it_lock_mode,
+		       it.it_lock_bits);
 		rc = -EPROTO;
 		goto out_close;
 	}
@@ -880,10 +880,10 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 
 out_close:
 	/* Cancel open lock */
-	if (it.d.lustre.it_lock_mode != 0) {
+	if (it.it_lock_mode != 0) {
 		ldlm_lock_decref_and_cancel(&och->och_lease_handle,
-					    it.d.lustre.it_lock_mode);
-		it.d.lustre.it_lock_mode = 0;
+					    it.it_lock_mode);
+		it.it_lock_mode = 0;
 		och->och_lease_handle.cookie = 0ULL;
 	}
 	rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
@@ -1178,7 +1178,9 @@ restart:
 			CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
 			LBUG();
 		}
+		ll_cl_add(file, env, io);
 		result = cl_io_loop(env, io);
+		ll_cl_remove(file, env);
 		if (args->via_io_subtype == IO_NORMAL)
 			up_read(&lli->lli_trunc_sem);
 		if (write_mutex_locked)
@@ -1397,7 +1399,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
 	rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
 	if (rc)
 		goto out_unlock;
-	rc = oit.d.lustre.it_status;
+	rc = oit.it_status;
 	if (rc < 0)
 		goto out_req_free;
 
@@ -1410,7 +1412,7 @@ out_unlock:
 out:
 	return rc;
 out_req_free:
-	ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
+	ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request);
 	goto out;
 }
 
@@ -1698,7 +1700,7 @@ int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
 out:
 	/* this one is in place of ll_file_open */
 	if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
-		ptlrpc_req_finished(it->d.lustre.it_data);
+		ptlrpc_req_finished(it->it_request);
 		it_clear_disposition(it, DISP_ENQ_OPEN_REF);
 	}
 	return rc;
@@ -2972,8 +2974,11 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 		 * here to preserve get_cwd functionality on 2.6.
 		 * Bug 10503
 		 */
-		if (!d_inode(dentry)->i_nlink)
+		if (!d_inode(dentry)->i_nlink) {
+			spin_lock(&inode->i_lock);
 			d_lustre_invalidate(dentry, 0);
+			spin_unlock(&inode->i_lock);
+		}
 
 		ll_lookup_finish_locks(&oit, inode);
 	} else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
@@ -3124,6 +3129,9 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type)
 	spin_lock(&lli->lli_lock);
 	/* VFS' acl_permission_check->check_acl will release the refcount */
 	acl = posix_acl_dup(lli->lli_posix_acl);
+#ifdef CONFIG_FS_POSIX_ACL
+	forget_cached_acl(inode, type);
+#endif
 	spin_unlock(&lli->lli_lock);
 
 	return acl;
@@ -3150,9 +3158,6 @@ int ll_inode_permission(struct inode *inode, int mask)
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
 	       PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
 
-	if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
-		return lustre_check_remote_perm(inode, mask);
-
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
 	rc = generic_permission(inode, mask);
 
@@ -3601,13 +3606,13 @@ again:
 
 	rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
 			NULL, 0, NULL, 0);
-	ptlrpc_req_finished(it.d.lustre.it_data);
-	it.d.lustre.it_data = NULL;
+	ptlrpc_req_finished(it.it_request);
+	it.it_request = NULL;
 
 	ll_finish_md_op_data(op_data);
 
-	mode = it.d.lustre.it_lock_mode;
-	it.d.lustre.it_lock_mode = 0;
+	mode = it.it_lock_mode;
+	it.it_lock_mode = 0;
 	ll_intent_drop_lock(&it);
 
 	if (rc == 0) {
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
index d8ea75424e2f..92004a05f9ee 100644
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index 6c00715b438f..396e4e4f0715 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
index 12f3e71f48c2..f6be105eeef7 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -100,7 +96,8 @@ int cl_ocd_update(struct obd_device *host,
 	__u64 flags;
 	int   result;
 
-	if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+	if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) &&
+	    watched->obd_set_up && !watched->obd_stopping) {
 		cli = &watched->u.cli;
 		lco = owner;
 		flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
@@ -115,9 +112,10 @@ int cl_ocd_update(struct obd_device *host,
 		mutex_unlock(&lco->lco_lock);
 		result = 0;
 	} else {
-		CERROR("unexpected notification from %s %s!\n",
+		CERROR("unexpected notification from %s %s (setup:%d,stopping:%d)!\n",
 		       watched->obd_type->typ_name,
-		       watched->obd_name);
+		       watched->obd_name, watched->obd_set_up,
+		       watched->obd_stopping);
 		result = -EINVAL;
 	}
 	return result;
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
index 2df551d3ae6c..2326b40a0870 100644
--- a/drivers/staging/lustre/lustre/llite/llite_close.c
+++ b/drivers/staging/lustre/lustre/llite/llite_close.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 3f2f30b6542c..4d6d589a1677 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -68,6 +64,7 @@ struct ll_dentry_data {
 	struct lookup_intent		*lld_it;
 	unsigned int			lld_sa_generation;
 	unsigned int			lld_invalid:1;
+	unsigned int			lld_nfs_dentry:1;
 	struct rcu_head			lld_rcu_head;
 };
 
@@ -76,9 +73,6 @@ struct ll_dentry_data {
 #define LLI_INODE_MAGIC		 0x111d0de5
 #define LLI_INODE_DEAD		  0xdeadd00d
 
-/* remote client permission cache */
-#define REMOTE_PERM_HASHSIZE 16
-
 struct ll_getname_data {
 	struct dir_context ctx;
 	char	    *lgd_name;      /* points to a buffer with NAME_MAX+1 size */
@@ -86,19 +80,6 @@ struct ll_getname_data {
 	int	      lgd_found;     /* inode matched? */
 };
 
-/* llite setxid/access permission for user on remote client */
-struct ll_remote_perm {
-	struct hlist_node	lrp_list;
-	uid_t		   lrp_uid;
-	gid_t		   lrp_gid;
-	uid_t		   lrp_fsuid;
-	gid_t		   lrp_fsgid;
-	int		   lrp_access_perm; /* MAY_READ/WRITE/EXEC, this
-					     * is access permission with
-					     * lrp_fsuid/lrp_fsgid.
-					     */
-};
-
 struct ll_grouplock {
 	struct lu_env	*lg_env;
 	struct cl_io	*lg_io;
@@ -133,9 +114,6 @@ struct ll_inode_info {
 	spinlock_t			lli_lock;
 	struct posix_acl		*lli_posix_acl;
 
-	struct hlist_head		*lli_remote_perms;
-	struct mutex				lli_rmtperm_mutex;
-
 	/* identifying fields for both metadata and data stacks. */
 	struct lu_fid		   lli_fid;
 	/* Parent fid for accessing default stripe data on parent directory
@@ -145,8 +123,6 @@ struct ll_inode_info {
 
 	struct list_head	      lli_close_list;
 
-	unsigned long		      lli_rmtperm_time;
-
 	/* handle is to be sent to MDS later on done_writing and setattr.
 	 * Open handle data are needed for the recovery to reconstruct
 	 * the inode state on the MDS. XXX: recovery is not ready yet.
@@ -411,7 +387,7 @@ enum stats_track_type {
 #define LL_SBI_FLOCK	     0x04
 #define LL_SBI_USER_XATTR	0x08 /* support user xattr */
 #define LL_SBI_ACL	       0x10 /* support ACL */
-#define LL_SBI_RMT_CLIENT	0x40 /* remote client */
+/* LL_SBI_RMT_CLIENT		 0x40	 remote client */
 #define LL_SBI_MDS_CAPA		 0x80 /* support mds capa, obsolete */
 #define LL_SBI_OSS_CAPA		0x100 /* support oss capa, obsolete */
 #define LL_SBI_LOCALFLOCK       0x200 /* Local flocks support by kernel */
@@ -433,7 +409,7 @@ enum stats_track_type {
 	"xattr",	\
 	"acl",		\
 	"???",		\
-	"rmt_client",	\
+	"???",		\
 	"mds_capa",	\
 	"oss_capa",	\
 	"flock",	\
@@ -449,26 +425,6 @@ enum stats_track_type {
 	"xattr",	\
 }
 
-#define RCE_HASHES      32
-
-struct rmtacl_ctl_entry {
-	struct list_head       rce_list;
-	pid_t	    rce_key; /* hash key */
-	int	      rce_ops; /* acl operation type */
-};
-
-struct rmtacl_ctl_table {
-	spinlock_t	rct_lock;
-	struct list_head	rct_entries[RCE_HASHES];
-};
-
-#define EE_HASHES       32
-
-struct eacl_table {
-	spinlock_t	et_lock;
-	struct list_head	et_entries[EE_HASHES];
-};
-
 struct ll_sb_info {
 	/* this protects pglist and ra_info.  It isn't safe to
 	 * grab from interrupt contexts
@@ -497,7 +453,7 @@ struct ll_sb_info {
 	 * any page which is sent to a server as part of a bulk request,
 	 * but is uncommitted to stable storage.
 	 */
-	struct cl_client_cache    ll_cache;
+	struct cl_client_cache    *ll_cache;
 
 	struct lprocfs_stats     *ll_ra_stats;
 
@@ -533,8 +489,6 @@ struct ll_sb_info {
 	dev_t			  ll_sdev_orig; /* save s_dev before assign for
 						 * clustered nfs
 						 */
-	struct rmtacl_ctl_table   ll_rct;
-	struct eacl_table	 ll_et;
 	__kernel_fsid_t		  ll_fsid;
 	struct kobject		 ll_kobj; /* sysfs object */
 	struct super_block	*ll_sb; /* struct super_block (for sysfs code)*/
@@ -640,6 +594,8 @@ struct ll_file_data {
 	 * false: unknown failure, should report.
 	 */
 	bool fd_write_failed;
+	rwlock_t fd_lock; /* protect lcc list */
+	struct list_head fd_lccs; /* list of ll_cl_context */
 };
 
 struct lov_stripe_md;
@@ -715,8 +671,9 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
 int ll_readahead(const struct lu_env *env, struct cl_io *io,
 		 struct cl_page_list *queue, struct ll_readahead_state *ras,
 		 bool hit);
-struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage);
-void ll_cl_fini(struct ll_cl_context *lcc);
+struct ll_cl_context *ll_cl_find(struct file *file);
+void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io);
+void ll_cl_remove(struct file *file, const struct lu_env *env);
 
 extern const struct address_space_operations ll_aops;
 
@@ -858,11 +815,11 @@ struct vvp_io_args {
 };
 
 struct ll_cl_context {
+	struct list_head	 lcc_list;
 	void	   *lcc_cookie;
+	const struct lu_env	*lcc_env;
 	struct cl_io   *lcc_io;
 	struct cl_page *lcc_page;
-	struct lu_env  *lcc_env;
-	int	     lcc_refcheck;
 };
 
 struct ll_thread_info {
@@ -983,14 +940,6 @@ ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
 ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int ll_removexattr(struct dentry *dentry, const char *name);
 
-/* llite/remote_perm.c */
-extern struct kmem_cache *ll_remote_perm_cachep;
-extern struct kmem_cache *ll_rmtperm_hash_cachep;
-
-void free_rmtperm_hash(struct hlist_head *hash);
-int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm);
-int lustre_check_remote_perm(struct inode *inode, int mask);
-
 /**
  * Common IO arguments for various VFS I/O interfaces.
  */
@@ -1004,40 +953,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
 void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
 
-/* llite/llite_rmtacl.c */
-#ifdef CONFIG_FS_POSIX_ACL
-struct eacl_entry {
-	struct list_head	    ee_list;
-	pid_t		 ee_key; /* hash key */
-	struct lu_fid	 ee_fid;
-	int		   ee_type; /* ACL type for ACCESS or DEFAULT */
-	ext_acl_xattr_header *ee_acl;
-};
-
-u64 rce_ops2valid(int ops);
-struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key);
-int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops);
-int rct_del(struct rmtacl_ctl_table *rct, pid_t key);
-void rct_init(struct rmtacl_ctl_table *rct);
-void rct_fini(struct rmtacl_ctl_table *rct);
-
-void ee_free(struct eacl_entry *ee);
-int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
-	   ext_acl_xattr_header *header);
-struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
-				 struct lu_fid *fid, int type);
-void et_search_free(struct eacl_table *et, pid_t key);
-void et_init(struct eacl_table *et);
-void et_fini(struct eacl_table *et);
-#else
-static inline u64 rce_ops2valid(int ops)
-{
-	return 0;
-}
-#endif
-
 /* statahead.c */
-
 #define LL_SA_RPC_MIN	   2
 #define LL_SA_RPC_DEF	   32
 #define LL_SA_RPC_MAX	   8192
@@ -1281,7 +1197,7 @@ static inline int ll_file_nolock(const struct file *file)
 static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
 				    struct lookup_intent *it, __u64 *bits)
 {
-	if (!it->d.lustre.it_lock_set) {
+	if (!it->it_lock_set) {
 		struct lustre_handle handle;
 
 		/* If this inode is a remote object, it will get two
@@ -1292,36 +1208,26 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
 		 * LOOKUP and PERM locks, so revoking either locks will
 		 * case the dcache being cleared
 		 */
-		if (it->d.lustre.it_remote_lock_mode) {
-			handle.cookie = it->d.lustre.it_remote_lock_handle;
+		if (it->it_remote_lock_mode) {
+			handle.cookie = it->it_remote_lock_handle;
 			CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n",
 			       PFID(ll_inode2fid(inode)), inode,
 			       handle.cookie);
 			md_set_lock_data(exp, &handle.cookie, inode, NULL);
 		}
 
-		handle.cookie = it->d.lustre.it_lock_handle;
+		handle.cookie = it->it_lock_handle;
 
 		CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n",
 		       PFID(ll_inode2fid(inode)), inode, handle.cookie);
 
 		md_set_lock_data(exp, &handle.cookie, inode,
-				 &it->d.lustre.it_lock_bits);
-		it->d.lustre.it_lock_set = 1;
+				 &it->it_lock_bits);
+		it->it_lock_set = 1;
 	}
 
 	if (bits)
-		*bits = it->d.lustre.it_lock_bits;
-}
-
-static inline void ll_lock_dcache(struct inode *inode)
-{
-	spin_lock(&inode->i_lock);
-}
-
-static inline void ll_unlock_dcache(struct inode *inode)
-{
-	spin_unlock(&inode->i_lock);
+		*bits = it->it_lock_bits;
 }
 
 static inline int d_lustre_invalid(const struct dentry *dentry)
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 96c7e9fc6e5f..546063e728db 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -87,15 +83,11 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
 	pages = si.totalram - si.totalhigh;
 	lru_page_max = pages / 2;
 
-	/* initialize ll_cache data */
-	atomic_set(&sbi->ll_cache.ccc_users, 0);
-	sbi->ll_cache.ccc_lru_max = lru_page_max;
-	atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
-	spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
-	INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
-
-	atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0);
-	init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq);
+	sbi->ll_cache = cl_cache_init(lru_page_max);
+	if (!sbi->ll_cache) {
+		kfree(sbi);
+		return NULL;
+	}
 
 	sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
 					   SBI_DEFAULT_READAHEAD_MAX);
@@ -135,6 +127,11 @@ static void ll_free_sbi(struct super_block *sb)
 {
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 
+	if (sbi->ll_cache) {
+		cl_cache_decref(sbi->ll_cache);
+		sbi->ll_cache = NULL;
+	}
+
 	kfree(sbi);
 }
 
@@ -175,8 +172,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 				  OBD_CONNECT_VERSION  | OBD_CONNECT_BRW_SIZE |
 				  OBD_CONNECT_CANCELSET | OBD_CONNECT_FID     |
 				  OBD_CONNECT_AT       | OBD_CONNECT_LOV_V3   |
-				  OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_VBR    |
-				  OBD_CONNECT_FULL20   | OBD_CONNECT_64BITHASH|
+				  OBD_CONNECT_VBR	| OBD_CONNECT_FULL20  |
+				  OBD_CONNECT_64BITHASH |
 				  OBD_CONNECT_EINPROGRESS |
 				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
 				  OBD_CONNECT_LAYOUTLOCK |
@@ -217,8 +214,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 
 	/* real client */
 	data->ocd_connect_flags |= OBD_CONNECT_REAL;
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
-		data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
 
 	data->ocd_brw_size = MD_MAX_BRW_SIZE;
 
@@ -311,18 +306,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 		sbi->ll_flags &= ~LL_SBI_ACL;
 	}
 
-	if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) {
-		if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
-			sbi->ll_flags |= LL_SBI_RMT_CLIENT;
-			LCONSOLE_INFO("client is set as remote by default.\n");
-		}
-	} else {
-		if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-			sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
-			LCONSOLE_INFO("client claims to be remote, but server rejected, forced to be local.\n");
-		}
-	}
-
 	if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
 		sbi->ll_flags |= LL_SBI_64BIT_HASH;
 
@@ -356,10 +339,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 				  OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
 				  OBD_CONNECT_CANCELSET | OBD_CONNECT_FID      |
 				  OBD_CONNECT_SRVLOCK   | OBD_CONNECT_TRUNCLOCK|
-				  OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT |
-				  OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR|
-				  OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH |
-				  OBD_CONNECT_MAXBYTES |
+				  OBD_CONNECT_AT	| OBD_CONNECT_OSS_CAPA |
+				  OBD_CONNECT_VBR	| OBD_CONNECT_FULL20   |
+				  OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
 				  OBD_CONNECT_EINPROGRESS |
 				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
 				  OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
@@ -382,8 +364,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	}
 
 	data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
-		data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
 
 	CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
 	       data->ocd_connect_flags,
@@ -446,9 +426,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	 * XXX: move this to after cbd setup?
 	 */
 	valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
-		valid |= OBD_MD_FLRMTPERM;
-	else if (sbi->ll_flags & LL_SBI_ACL)
+	if (sbi->ll_flags & LL_SBI_ACL)
 		valid |= OBD_MD_FLACL;
 
 	op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
@@ -504,13 +482,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 		goto out_root;
 	}
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-		rct_init(&sbi->ll_rct);
-		et_init(&sbi->ll_et);
-	}
-#endif
-
 	checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
 	err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
 				 KEY_CHECKSUM, sizeof(checksum), &checksum,
@@ -518,8 +489,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	cl_sb_init(sb);
 
 	err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
-				 KEY_CACHE_SET, sizeof(sbi->ll_cache),
-				 &sbi->ll_cache, NULL);
+				 KEY_CACHE_SET, sizeof(*sbi->ll_cache),
+				 sbi->ll_cache, NULL);
 
 	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
@@ -564,8 +535,6 @@ out_lock_cn_cb:
 out_dt:
 	obd_disconnect(sbi->ll_dt_exp);
 	sbi->ll_dt_exp = NULL;
-	/* Make sure all OScs are gone, since cl_cache is accessing sbi. */
-	obd_zombie_barrier();
 out_md_fid:
 	obd_fid_fini(sbi->ll_md_exp->exp_obd);
 out_md:
@@ -608,13 +577,6 @@ static void client_common_put_super(struct super_block *sb)
 {
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-		et_fini(&sbi->ll_et);
-		rct_fini(&sbi->ll_rct);
-	}
-#endif
-
 	ll_close_thread_shutdown(sbi->ll_lcq);
 
 	cl_sb_fini(sb);
@@ -622,10 +584,6 @@ static void client_common_put_super(struct super_block *sb)
 	obd_fid_fini(sbi->ll_dt_exp->exp_obd);
 	obd_disconnect(sbi->ll_dt_exp);
 	sbi->ll_dt_exp = NULL;
-	/* wait till all OSCs are gone, since cl_cache is accessing sbi.
-	 * see LU-2543.
-	 */
-	obd_zombie_barrier();
 
 	ldebugfs_unregister_mountpoint(sbi);
 
@@ -704,11 +662,6 @@ static int ll_options(char *options, int *flags)
 			*flags &= ~tmp;
 			goto next;
 		}
-		tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT);
-		if (tmp) {
-			*flags |= tmp;
-			goto next;
-		}
 		tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
 		if (tmp) {
 			*flags |= tmp;
@@ -792,12 +745,9 @@ void ll_lli_init(struct ll_inode_info *lli)
 	lli->lli_maxbytes = MAX_LFS_FILESIZE;
 	spin_lock_init(&lli->lli_lock);
 	lli->lli_posix_acl = NULL;
-	lli->lli_remote_perms = NULL;
-	mutex_init(&lli->lli_rmtperm_mutex);
 	/* Do not set lli_fid, it has been initialized already. */
 	fid_zero(&lli->lli_pfid);
 	INIT_LIST_HEAD(&lli->lli_close_list);
-	lli->lli_rmtperm_time = 0;
 	lli->lli_pending_och = NULL;
 	lli->lli_mds_read_och = NULL;
 	lli->lli_mds_write_och = NULL;
@@ -864,7 +814,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
 	try_module_get(THIS_MODULE);
 
 	/* client additional sb info */
-	lsi->lsi_llsbi = sbi = ll_init_sbi(sb);
+	sbi = ll_init_sbi(sb);
+	lsi->lsi_llsbi = sbi;
 	if (!sbi) {
 		module_put(THIS_MODULE);
 		kfree(cfg);
@@ -965,12 +916,12 @@ void ll_put_super(struct super_block *sb)
 	if (!force) {
 		struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
 
-		rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq,
-				  !atomic_read(&sbi->ll_cache.ccc_unstable_nr),
+		rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq,
+				  !atomic_read(&sbi->ll_cache->ccc_unstable_nr),
 				  &lwi);
 	}
 
-	ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr);
+	ccc_count = atomic_read(&sbi->ll_cache->ccc_unstable_nr);
 	if (!force && rc != -EINTR)
 		LASSERTF(!ccc_count, "count: %i\n", ccc_count);
 
@@ -1078,17 +1029,9 @@ void ll_clear_inode(struct inode *inode)
 
 	ll_xattr_cache_destroy(inode);
 
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-		LASSERT(!lli->lli_posix_acl);
-		if (lli->lli_remote_perms) {
-			free_rmtperm_hash(lli->lli_remote_perms);
-			lli->lli_remote_perms = NULL;
-		}
-	}
 #ifdef CONFIG_FS_POSIX_ACL
-	else if (lli->lli_posix_acl) {
+	if (lli->lli_posix_acl) {
 		LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
-		LASSERT(!lli->lli_remote_perms);
 		posix_acl_release(lli->lli_posix_acl);
 		lli->lli_posix_acl = NULL;
 	}
@@ -1540,12 +1483,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 			lli->lli_maxbytes = MAX_LFS_FILESIZE;
 	}
 
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-		if (body->valid & OBD_MD_FLRMTPERM)
-			ll_update_remote_perm(inode, md->remote_perm);
-	}
 #ifdef CONFIG_FS_POSIX_ACL
-	else if (body->valid & OBD_MD_FLACL) {
+	if (body->valid & OBD_MD_FLACL) {
 		spin_lock(&lli->lli_lock);
 		if (lli->lli_posix_acl)
 			posix_acl_release(lli->lli_posix_acl);
@@ -1979,7 +1918,13 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
 		 * At this point server returns to client's same fid as client
 		 * generated for creating. So using ->fid1 is okay here.
 		 */
-		LASSERT(fid_is_sane(&md.body->fid1));
+		if (!fid_is_sane(&md.body->fid1)) {
+			CERROR("%s: Fid is insane " DFID "\n",
+			       ll_get_fsname(sb, NULL, 0),
+			       PFID(&md.body->fid1));
+			rc = -EINVAL;
+			goto out;
+		}
 
 		*inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
 					     sbi->ll_flags & LL_SBI_32BIT_API),
@@ -2006,11 +1951,11 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
 	 * 3. proc2: refresh layout and layout lock granted
 	 * 4. proc1: to apply a stale layout
 	 */
-	if (it && it->d.lustre.it_lock_mode != 0) {
+	if (it && it->it_lock_mode != 0) {
 		struct lustre_handle lockh;
 		struct ldlm_lock *lock;
 
-		lockh.cookie = it->d.lustre.it_lock_handle;
+		lockh.cookie = it->it_lock_handle;
 		lock = ldlm_handle2lock(&lockh);
 		LASSERT(lock);
 		if (ldlm_has_layout(lock)) {
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 88ef1cac9e0f..66ee5db5fce8 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -200,18 +196,11 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
 
 	set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
 
-	/* we grab lli_trunc_sem to exclude truncate case.
-	 * Otherwise, we could add dirty pages into osc cache
-	 * while truncate is on-going.
-	 */
 	inode = vvp_object_inode(io->ci_obj);
 	lli = ll_i2info(inode);
-	down_read(&lli->lli_trunc_sem);
 
 	result = cl_io_loop(env, io);
 
-	up_read(&lli->lli_trunc_sem);
-
 	cfs_restore_sigs(set);
 
 	if (result == 0) {
@@ -315,8 +304,13 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
 		vio->u.fault.ft_flags = 0;
 		vio->u.fault.ft_flags_valid = false;
 
+		/* May call ll_readpage() */
+		ll_cl_add(vma->vm_file, env, io);
+
 		result = cl_io_loop(env, io);
 
+		ll_cl_remove(vma->vm_file, env);
+
 		/* ft_flags are only valid if we reached
 		 * the call to filemap_fault
 		 */
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index c1eef6198b25..65972c892731 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -172,6 +168,24 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
 
 	/* N.B. d_obtain_alias() drops inode ref on error */
 	result = d_obtain_alias(inode);
+	if (!IS_ERR(result)) {
+		int rc;
+
+		rc = ll_d_init(result);
+		if (rc < 0) {
+			dput(result);
+			result = ERR_PTR(rc);
+		} else {
+			struct ll_dentry_data *ldd = ll_d2d(result);
+
+			/*
+			 * Need to signal to the ll_intent_file_open that
+			 * we came from NFS and so opencache needs to be
+			 * enabled for this one
+			 */
+			ldd->lld_nfs_dentry = 1;
+		}
+	}
 
 	return result;
 }
diff --git a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c b/drivers/staging/lustre/lustre/llite/llite_rmtacl.c
deleted file mode 100644
index 8509b07cb5c7..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/llite_rmtacl.c
- *
- * Lustre Remote User Access Control List.
- *
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#ifdef CONFIG_FS_POSIX_ACL
-
-#include "../include/lustre_lite.h"
-#include "../include/lustre_eacl.h"
-#include "llite_internal.h"
-
-static inline __u32 rce_hashfunc(uid_t id)
-{
-	return id & (RCE_HASHES - 1);
-}
-
-static inline __u32 ee_hashfunc(uid_t id)
-{
-	return id & (EE_HASHES - 1);
-}
-
-u64 rce_ops2valid(int ops)
-{
-	switch (ops) {
-	case RMT_LSETFACL:
-		return OBD_MD_FLRMTLSETFACL;
-	case RMT_LGETFACL:
-		return OBD_MD_FLRMTLGETFACL;
-	case RMT_RSETFACL:
-		return OBD_MD_FLRMTRSETFACL;
-	case RMT_RGETFACL:
-		return OBD_MD_FLRMTRGETFACL;
-	default:
-		return 0;
-	}
-}
-
-static struct rmtacl_ctl_entry *rce_alloc(pid_t key, int ops)
-{
-	struct rmtacl_ctl_entry *rce;
-
-	rce = kzalloc(sizeof(*rce), GFP_NOFS);
-	if (!rce)
-		return NULL;
-
-	INIT_LIST_HEAD(&rce->rce_list);
-	rce->rce_key = key;
-	rce->rce_ops = ops;
-
-	return rce;
-}
-
-static void rce_free(struct rmtacl_ctl_entry *rce)
-{
-	if (!list_empty(&rce->rce_list))
-		list_del(&rce->rce_list);
-
-	kfree(rce);
-}
-
-static struct rmtacl_ctl_entry *__rct_search(struct rmtacl_ctl_table *rct,
-					     pid_t key)
-{
-	struct rmtacl_ctl_entry *rce;
-	struct list_head *head = &rct->rct_entries[rce_hashfunc(key)];
-
-	list_for_each_entry(rce, head, rce_list)
-		if (rce->rce_key == key)
-			return rce;
-
-	return NULL;
-}
-
-struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key)
-{
-	struct rmtacl_ctl_entry *rce;
-
-	spin_lock(&rct->rct_lock);
-	rce = __rct_search(rct, key);
-	spin_unlock(&rct->rct_lock);
-	return rce;
-}
-
-int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops)
-{
-	struct rmtacl_ctl_entry *rce, *e;
-
-	rce = rce_alloc(key, ops);
-	if (!rce)
-		return -ENOMEM;
-
-	spin_lock(&rct->rct_lock);
-	e = __rct_search(rct, key);
-	if (unlikely(e)) {
-		CWARN("Unexpected stale rmtacl_entry found: [key: %d] [ops: %d]\n",
-		      (int)key, ops);
-		rce_free(e);
-	}
-	list_add_tail(&rce->rce_list, &rct->rct_entries[rce_hashfunc(key)]);
-	spin_unlock(&rct->rct_lock);
-
-	return 0;
-}
-
-int rct_del(struct rmtacl_ctl_table *rct, pid_t key)
-{
-	struct rmtacl_ctl_entry *rce;
-
-	spin_lock(&rct->rct_lock);
-	rce = __rct_search(rct, key);
-	if (rce)
-		rce_free(rce);
-	spin_unlock(&rct->rct_lock);
-
-	return rce ? 0 : -ENOENT;
-}
-
-void rct_init(struct rmtacl_ctl_table *rct)
-{
-	int i;
-
-	spin_lock_init(&rct->rct_lock);
-	for (i = 0; i < RCE_HASHES; i++)
-		INIT_LIST_HEAD(&rct->rct_entries[i]);
-}
-
-void rct_fini(struct rmtacl_ctl_table *rct)
-{
-	struct rmtacl_ctl_entry *rce;
-	int i;
-
-	spin_lock(&rct->rct_lock);
-	for (i = 0; i < RCE_HASHES; i++)
-		while (!list_empty(&rct->rct_entries[i])) {
-			rce = list_entry(rct->rct_entries[i].next,
-					 struct rmtacl_ctl_entry, rce_list);
-			rce_free(rce);
-		}
-	spin_unlock(&rct->rct_lock);
-}
-
-static struct eacl_entry *ee_alloc(pid_t key, struct lu_fid *fid, int type,
-				   ext_acl_xattr_header *header)
-{
-	struct eacl_entry *ee;
-
-	ee = kzalloc(sizeof(*ee), GFP_NOFS);
-	if (!ee)
-		return NULL;
-
-	INIT_LIST_HEAD(&ee->ee_list);
-	ee->ee_key = key;
-	ee->ee_fid = *fid;
-	ee->ee_type = type;
-	ee->ee_acl = header;
-
-	return ee;
-}
-
-void ee_free(struct eacl_entry *ee)
-{
-	if (!list_empty(&ee->ee_list))
-		list_del(&ee->ee_list);
-
-	if (ee->ee_acl)
-		lustre_ext_acl_xattr_free(ee->ee_acl);
-
-	kfree(ee);
-}
-
-static struct eacl_entry *__et_search_del(struct eacl_table *et, pid_t key,
-					  struct lu_fid *fid, int type)
-{
-	struct eacl_entry *ee;
-	struct list_head *head = &et->et_entries[ee_hashfunc(key)];
-
-	LASSERT(fid);
-	list_for_each_entry(ee, head, ee_list)
-		if (ee->ee_key == key) {
-			if (lu_fid_eq(&ee->ee_fid, fid) &&
-			    ee->ee_type == type) {
-				list_del_init(&ee->ee_list);
-				return ee;
-			}
-		}
-
-	return NULL;
-}
-
-struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
-				 struct lu_fid *fid, int type)
-{
-	struct eacl_entry *ee;
-
-	spin_lock(&et->et_lock);
-	ee = __et_search_del(et, key, fid, type);
-	spin_unlock(&et->et_lock);
-	return ee;
-}
-
-void et_search_free(struct eacl_table *et, pid_t key)
-{
-	struct eacl_entry *ee, *next;
-	struct list_head *head = &et->et_entries[ee_hashfunc(key)];
-
-	spin_lock(&et->et_lock);
-	list_for_each_entry_safe(ee, next, head, ee_list)
-		if (ee->ee_key == key)
-			ee_free(ee);
-
-	spin_unlock(&et->et_lock);
-}
-
-int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
-	   ext_acl_xattr_header *header)
-{
-	struct eacl_entry *ee, *e;
-
-	ee = ee_alloc(key, fid, type, header);
-	if (!ee)
-		return -ENOMEM;
-
-	spin_lock(&et->et_lock);
-	e = __et_search_del(et, key, fid, type);
-	if (unlikely(e)) {
-		CWARN("Unexpected stale eacl_entry found: [key: %d] [fid: " DFID "] [type: %d]\n",
-		      (int)key, PFID(fid), type);
-		ee_free(e);
-	}
-	list_add_tail(&ee->ee_list, &et->et_entries[ee_hashfunc(key)]);
-	spin_unlock(&et->et_lock);
-
-	return 0;
-}
-
-void et_init(struct eacl_table *et)
-{
-	int i;
-
-	spin_lock_init(&et->et_lock);
-	for (i = 0; i < EE_HASHES; i++)
-		INIT_LIST_HEAD(&et->et_entries[i]);
-}
-
-void et_fini(struct eacl_table *et)
-{
-	struct eacl_entry *ee;
-	int i;
-
-	spin_lock(&et->et_lock);
-	for (i = 0; i < EE_HASHES; i++)
-		while (!list_empty(&et->et_entries[i])) {
-			ee = list_entry(et->et_entries[i].next,
-					struct eacl_entry, ee_list);
-			ee_free(ee);
-		}
-	spin_unlock(&et->et_lock);
-}
-
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
deleted file mode 100644
index 813a9a354e5f..000000000000
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ /dev/null
@@ -1,883 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-/*
- *  linux/drivers/block/loop.c
- *
- *  Written by Theodore Ts'o, 3/29/93
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
- *
- * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
- * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
- *
- * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
- *
- * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
- *
- * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
- *
- * Loadable modules and other fixes by AK, 1998
- *
- * Maximum number of loop devices now dynamic via max_loop module parameter.
- * Russell Kroll <rkroll@exploits.org> 19990701
- *
- * Maximum number of loop devices when compiled-in now selectable by passing
- * max_loop=<1-255> to the kernel on boot.
- * Erik I. Bols?, <eriki@himolde.no>, Oct 31, 1999
- *
- * Completely rewrite request handling to be make_request_fn style and
- * non blocking, pushing work to a helper thread. Lots of fixes from
- * Al Viro too.
- * Jens Axboe <axboe@suse.de>, Nov 2000
- *
- * Support up to 256 loop devices
- * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
- *
- * Support for falling back on the write file operation when the address space
- * operations prepare_write and/or commit_write are not available on the
- * backing filesystem.
- * Anton Altaparmakov, 16 Feb 2005
- *
- * Still To Fix:
- * - Advisory locking is ignored here.
- * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/major.h>
-#include <linux/wait.h>
-#include <linux/blkdev.h>
-#include <linux/blkpg.h>
-#include <linux/init.h>
-#include <linux/swap.h>
-#include <linux/slab.h>
-#include <linux/suspend.h>
-#include <linux/writeback.h>
-#include <linux/buffer_head.h>		/* for invalidate_bdev() */
-#include <linux/completion.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/pagevec.h>
-#include <linux/uaccess.h>
-
-#include "../include/lustre_lib.h"
-#include "../include/lustre_lite.h"
-#include "llite_internal.h"
-
-#define LLOOP_MAX_SEGMENTS	LNET_MAX_IOV
-
-/* Possible states of device */
-enum {
-	LLOOP_UNBOUND,
-	LLOOP_BOUND,
-	LLOOP_RUNDOWN,
-};
-
-struct lloop_device {
-	int		  lo_number;
-	int		  lo_refcnt;
-	loff_t	       lo_offset;
-	loff_t	       lo_sizelimit;
-	int		  lo_flags;
-	struct file	 *lo_backing_file;
-	struct block_device *lo_device;
-	unsigned	     lo_blocksize;
-
-	gfp_t		  old_gfp_mask;
-
-	spinlock_t		lo_lock;
-	struct bio		*lo_bio;
-	struct bio		*lo_biotail;
-	int			lo_state;
-	struct semaphore	lo_sem;
-	struct mutex		lo_ctl_mutex;
-	atomic_t	 lo_pending;
-	wait_queue_head_t	  lo_bh_wait;
-
-	struct request_queue *lo_queue;
-
-	const struct lu_env *lo_env;
-	struct cl_io	 lo_io;
-	struct ll_dio_pages  lo_pvec;
-
-	/* data to handle bio for lustre. */
-	struct lo_request_data {
-		struct page *lrd_pages[LLOOP_MAX_SEGMENTS];
-		loff_t       lrd_offsets[LLOOP_MAX_SEGMENTS];
-	} lo_requests[1];
-};
-
-/*
- * Loop flags
- */
-enum {
-	LO_FLAGS_READ_ONLY       = 1,
-};
-
-static int lloop_major;
-#define MAX_LOOP_DEFAULT  16
-static int max_loop = MAX_LOOP_DEFAULT;
-static struct lloop_device *loop_dev;
-static struct gendisk **disks;
-static struct mutex lloop_mutex;
-static void *ll_iocontrol_magic;
-
-static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
-{
-	loff_t size, offset, loopsize;
-
-	/* Compute loopsize in bytes */
-	size = i_size_read(file->f_mapping->host);
-	offset = lo->lo_offset;
-	loopsize = size - offset;
-	if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
-		loopsize = lo->lo_sizelimit;
-
-	/*
-	 * Unfortunately, if we want to do I/O on the device,
-	 * the number of 512-byte sectors has to fit into a sector_t.
-	 */
-	return loopsize >> 9;
-}
-
-static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
-{
-	const struct lu_env  *env   = lo->lo_env;
-	struct cl_io	 *io    = &lo->lo_io;
-	struct inode	 *inode = file_inode(lo->lo_backing_file);
-	struct cl_object     *obj = ll_i2info(inode)->lli_clob;
-	pgoff_t	       offset;
-	int		   ret;
-	int		   rw;
-	u32		   page_count = 0;
-	struct bio_vec       bvec;
-	struct bvec_iter   iter;
-	struct bio	   *bio;
-	ssize_t	       bytes;
-
-	struct ll_dio_pages  *pvec = &lo->lo_pvec;
-	struct page	 **pages = pvec->ldp_pages;
-	loff_t	       *offsets = pvec->ldp_offsets;
-
-	truncate_inode_pages(inode->i_mapping, 0);
-
-	/* initialize the IO */
-	memset(io, 0, sizeof(*io));
-	io->ci_obj = obj;
-	ret = cl_io_init(env, io, CIT_MISC, obj);
-	if (ret)
-		return io->ci_result;
-	io->ci_lockreq = CILR_NEVER;
-
-	rw = head->bi_rw;
-	for (bio = head; bio ; bio = bio->bi_next) {
-		LASSERT(rw == bio->bi_rw);
-
-		offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset;
-		bio_for_each_segment(bvec, bio, iter) {
-			BUG_ON(bvec.bv_offset != 0);
-			BUG_ON(bvec.bv_len != PAGE_SIZE);
-
-			pages[page_count] = bvec.bv_page;
-			offsets[page_count] = offset;
-			page_count++;
-			offset += bvec.bv_len;
-		}
-		LASSERT(page_count <= LLOOP_MAX_SEGMENTS);
-	}
-
-	ll_stats_ops_tally(ll_i2sbi(inode),
-			(rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ,
-			page_count);
-
-	pvec->ldp_size = page_count << PAGE_SHIFT;
-	pvec->ldp_nr = page_count;
-
-	/* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
-	 * write those pages into OST. Even worse case is that more pages
-	 * would be asked to write out to swap space, and then finally get here
-	 * again.
-	 * Unfortunately this is NOT easy to fix.
-	 * Thoughts on solution:
-	 * 0. Define a reserved pool for cl_pages, which could be a list of
-	 *    pre-allocated cl_pages;
-	 * 1. Define a new operation in cl_object_operations{}, says clo_depth,
-	 *    which measures how many layers for this lustre object. Generally
-	 *    speaking, the depth would be 2, one for llite, and one for lovsub.
-	 *    However, for SNS, there will be more since we need additional page
-	 *    to store parity;
-	 * 2. Reserve the # of (page_count * depth) cl_pages from the reserved
-	 *    pool. Afterwards, the clio would allocate the pages from reserved
-	 *    pool, this guarantees we needn't allocate the cl_pages from
-	 *    generic cl_page slab cache.
-	 *    Of course, if there is NOT enough pages in the pool, we might
-	 *    be asked to write less pages once, this purely depends on
-	 *    implementation. Anyway, we should be careful to avoid deadlocking.
-	 */
-	inode_lock(inode);
-	bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
-	inode_unlock(inode);
-	cl_io_fini(env, io);
-	return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
-}
-
-/*
- * Add bio to back of pending list
- */
-static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&lo->lo_lock, flags);
-	if (lo->lo_biotail) {
-		lo->lo_biotail->bi_next = bio;
-		lo->lo_biotail = bio;
-	} else {
-		lo->lo_bio = lo->lo_biotail = bio;
-	}
-	spin_unlock_irqrestore(&lo->lo_lock, flags);
-
-	atomic_inc(&lo->lo_pending);
-	if (waitqueue_active(&lo->lo_bh_wait))
-		wake_up(&lo->lo_bh_wait);
-}
-
-/*
- * Grab first pending buffer
- */
-static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
-{
-	struct bio *first;
-	struct bio **bio;
-	unsigned int count = 0;
-	unsigned int page_count = 0;
-	int rw;
-
-	spin_lock_irq(&lo->lo_lock);
-	first = lo->lo_bio;
-	if (unlikely(!first)) {
-		spin_unlock_irq(&lo->lo_lock);
-		return 0;
-	}
-
-	/* TODO: need to split the bio, too bad. */
-	LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS);
-
-	rw = first->bi_rw;
-	bio = &lo->lo_bio;
-	while (*bio && (*bio)->bi_rw == rw) {
-		CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u\n",
-		       (unsigned long long)(*bio)->bi_iter.bi_sector,
-		       (*bio)->bi_iter.bi_size,
-		       page_count, (*bio)->bi_vcnt);
-		if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS)
-			break;
-
-		page_count += (*bio)->bi_vcnt;
-		count++;
-		bio = &(*bio)->bi_next;
-	}
-	if (*bio) {
-		/* Some of bios can't be mergeable. */
-		lo->lo_bio = *bio;
-		*bio = NULL;
-	} else {
-		/* Hit the end of queue */
-		lo->lo_biotail = NULL;
-		lo->lo_bio = NULL;
-	}
-	*req = first;
-	spin_unlock_irq(&lo->lo_lock);
-	return count;
-}
-
-static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio)
-{
-	struct lloop_device *lo = q->queuedata;
-	int rw = bio_rw(old_bio);
-	int inactive;
-
-	blk_queue_split(q, &old_bio, q->bio_split);
-
-	if (!lo)
-		goto err;
-
-	CDEBUG(D_INFO, "submit bio sector %llu size %u\n",
-	       (unsigned long long)old_bio->bi_iter.bi_sector,
-	       old_bio->bi_iter.bi_size);
-
-	spin_lock_irq(&lo->lo_lock);
-	inactive = lo->lo_state != LLOOP_BOUND;
-	spin_unlock_irq(&lo->lo_lock);
-	if (inactive)
-		goto err;
-
-	if (rw == WRITE) {
-		if (lo->lo_flags & LO_FLAGS_READ_ONLY)
-			goto err;
-	} else if (rw == READA) {
-		rw = READ;
-	} else if (rw != READ) {
-		CERROR("lloop: unknown command (%x)\n", rw);
-		goto err;
-	}
-	loop_add_bio(lo, old_bio);
-	return BLK_QC_T_NONE;
-err:
-	bio_io_error(old_bio);
-	return BLK_QC_T_NONE;
-}
-
-static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
-{
-	int ret;
-
-	ret = do_bio_lustrebacked(lo, bio);
-	while (bio) {
-		struct bio *tmp = bio->bi_next;
-
-		bio->bi_next = NULL;
-		bio->bi_error = ret;
-		bio_endio(bio);
-		bio = tmp;
-	}
-}
-
-static inline int loop_active(struct lloop_device *lo)
-{
-	return atomic_read(&lo->lo_pending) ||
-		(lo->lo_state == LLOOP_RUNDOWN);
-}
-
-/*
- * worker thread that handles reads/writes to file backed loop devices,
- * to avoid blocking in our make_request_fn.
- */
-static int loop_thread(void *data)
-{
-	struct lloop_device *lo = data;
-	struct bio *bio;
-	unsigned int count;
-	unsigned long times = 0;
-	unsigned long total_count = 0;
-
-	struct lu_env *env;
-	int refcheck;
-	int ret = 0;
-
-	set_user_nice(current, MIN_NICE);
-
-	lo->lo_state = LLOOP_BOUND;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env)) {
-		ret = PTR_ERR(env);
-		goto out;
-	}
-
-	lo->lo_env = env;
-	memset(&lo->lo_pvec, 0, sizeof(lo->lo_pvec));
-	lo->lo_pvec.ldp_pages   = lo->lo_requests[0].lrd_pages;
-	lo->lo_pvec.ldp_offsets = lo->lo_requests[0].lrd_offsets;
-
-	/*
-	 * up sem, we are running
-	 */
-	up(&lo->lo_sem);
-
-	for (;;) {
-		wait_event(lo->lo_bh_wait, loop_active(lo));
-		if (!atomic_read(&lo->lo_pending)) {
-			int exiting = 0;
-
-			spin_lock_irq(&lo->lo_lock);
-			exiting = (lo->lo_state == LLOOP_RUNDOWN);
-			spin_unlock_irq(&lo->lo_lock);
-			if (exiting)
-				break;
-		}
-
-		bio = NULL;
-		count = loop_get_bio(lo, &bio);
-		if (!count) {
-			CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
-			continue;
-		}
-
-		total_count += count;
-		if (total_count < count) {     /* overflow */
-			total_count = count;
-			times = 1;
-		} else {
-			times++;
-		}
-		if ((times & 127) == 0) {
-			CDEBUG(D_INFO, "total: %lu, count: %lu, avg: %lu\n",
-			       total_count, times, total_count / times);
-		}
-
-		LASSERT(bio);
-		LASSERT(count <= atomic_read(&lo->lo_pending));
-		loop_handle_bio(lo, bio);
-		atomic_sub(count, &lo->lo_pending);
-	}
-	cl_env_put(env, &refcheck);
-
-out:
-	up(&lo->lo_sem);
-	return ret;
-}
-
-static int loop_set_fd(struct lloop_device *lo, struct file *unused,
-		       struct block_device *bdev, struct file *file)
-{
-	struct inode	 *inode;
-	struct address_space *mapping;
-	int		   lo_flags = 0;
-	int		   error;
-	loff_t		size;
-
-	if (!try_module_get(THIS_MODULE))
-		return -ENODEV;
-
-	error = -EBUSY;
-	if (lo->lo_state != LLOOP_UNBOUND)
-		goto out;
-
-	mapping = file->f_mapping;
-	inode = mapping->host;
-
-	error = -EINVAL;
-	if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC)
-		goto out;
-
-	if (!(file->f_mode & FMODE_WRITE))
-		lo_flags |= LO_FLAGS_READ_ONLY;
-
-	size = get_loop_size(lo, file);
-
-	if ((loff_t)(sector_t)size != size) {
-		error = -EFBIG;
-		goto out;
-	}
-
-	/* remove all pages in cache so as dirty pages not to be existent. */
-	truncate_inode_pages(mapping, 0);
-
-	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
-
-	lo->lo_blocksize = PAGE_SIZE;
-	lo->lo_device = bdev;
-	lo->lo_flags = lo_flags;
-	lo->lo_backing_file = file;
-	lo->lo_sizelimit = 0;
-	lo->old_gfp_mask = mapping_gfp_mask(mapping);
-	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
-
-	lo->lo_bio = lo->lo_biotail = NULL;
-
-	/*
-	 * set queue make_request_fn, and add limits based on lower level
-	 * device
-	 */
-	blk_queue_make_request(lo->lo_queue, loop_make_request);
-	lo->lo_queue->queuedata = lo;
-
-	/* queue parameters */
-	CLASSERT(PAGE_SIZE < (1 << (sizeof(unsigned short) * 8)));
-	blk_queue_logical_block_size(lo->lo_queue,
-				     (unsigned short)PAGE_SIZE);
-	blk_queue_max_hw_sectors(lo->lo_queue,
-				 LLOOP_MAX_SEGMENTS << (PAGE_SHIFT - 9));
-	blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-
-	set_capacity(disks[lo->lo_number], size);
-	bd_set_size(bdev, size << 9);
-
-	set_blocksize(bdev, lo->lo_blocksize);
-
-	kthread_run(loop_thread, lo, "lloop%d", lo->lo_number);
-	down(&lo->lo_sem);
-	return 0;
-
-out:
-	/* This is safe: open() is still holding a reference. */
-	module_put(THIS_MODULE);
-	return error;
-}
-
-static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev,
-		       int count)
-{
-	struct file *filp = lo->lo_backing_file;
-	gfp_t gfp = lo->old_gfp_mask;
-
-	if (lo->lo_state != LLOOP_BOUND)
-		return -ENXIO;
-
-	if (lo->lo_refcnt > count)	/* we needed one fd for the ioctl */
-		return -EBUSY;
-
-	if (!filp)
-		return -EINVAL;
-
-	spin_lock_irq(&lo->lo_lock);
-	lo->lo_state = LLOOP_RUNDOWN;
-	spin_unlock_irq(&lo->lo_lock);
-	wake_up(&lo->lo_bh_wait);
-
-	down(&lo->lo_sem);
-	lo->lo_backing_file = NULL;
-	lo->lo_device = NULL;
-	lo->lo_offset = 0;
-	lo->lo_sizelimit = 0;
-	lo->lo_flags = 0;
-	invalidate_bdev(bdev);
-	set_capacity(disks[lo->lo_number], 0);
-	bd_set_size(bdev, 0);
-	mapping_set_gfp_mask(filp->f_mapping, gfp);
-	lo->lo_state = LLOOP_UNBOUND;
-	fput(filp);
-	/* This is safe: open() is still holding a reference. */
-	module_put(THIS_MODULE);
-	return 0;
-}
-
-static int lo_open(struct block_device *bdev, fmode_t mode)
-{
-	struct lloop_device *lo = bdev->bd_disk->private_data;
-
-	mutex_lock(&lo->lo_ctl_mutex);
-	lo->lo_refcnt++;
-	mutex_unlock(&lo->lo_ctl_mutex);
-
-	return 0;
-}
-
-static void lo_release(struct gendisk *disk, fmode_t mode)
-{
-	struct lloop_device *lo = disk->private_data;
-
-	mutex_lock(&lo->lo_ctl_mutex);
-	--lo->lo_refcnt;
-	mutex_unlock(&lo->lo_ctl_mutex);
-}
-
-/* lloop device node's ioctl function. */
-static int lo_ioctl(struct block_device *bdev, fmode_t mode,
-		    unsigned int cmd, unsigned long arg)
-{
-	struct lloop_device *lo = bdev->bd_disk->private_data;
-	struct inode *inode = NULL;
-	int err = 0;
-
-	mutex_lock(&lloop_mutex);
-	switch (cmd) {
-	case LL_IOC_LLOOP_DETACH: {
-		err = loop_clr_fd(lo, bdev, 2);
-		if (err == 0)
-			blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */
-		break;
-	}
-
-	case LL_IOC_LLOOP_INFO: {
-		struct lu_fid fid;
-
-		if (!lo->lo_backing_file) {
-			err = -ENOENT;
-			break;
-		}
-		if (!inode)
-			inode = file_inode(lo->lo_backing_file);
-		if (lo->lo_state == LLOOP_BOUND)
-			fid = ll_i2info(inode)->lli_fid;
-		else
-			fid_zero(&fid);
-
-		if (copy_to_user((void __user *)arg, &fid, sizeof(fid)))
-			err = -EFAULT;
-		break;
-	}
-
-	default:
-		err = -EINVAL;
-		break;
-	}
-	mutex_unlock(&lloop_mutex);
-
-	return err;
-}
-
-static struct block_device_operations lo_fops = {
-	.owner =	THIS_MODULE,
-	.open =	 lo_open,
-	.release =      lo_release,
-	.ioctl =	lo_ioctl,
-};
-
-/* dynamic iocontrol callback.
- * This callback is registered in lloop_init and will be called by
- * ll_iocontrol_call.
- *
- * This is a llite regular file ioctl function. It takes the responsibility
- * of attaching or detaching a file by a lloop's device number.
- */
-static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
-				   unsigned int cmd, unsigned long arg,
-				   void *magic, int *rcp)
-{
-	struct lloop_device *lo = NULL;
-	struct block_device *bdev = NULL;
-	int err = 0;
-	dev_t dev;
-
-	if (magic != ll_iocontrol_magic)
-		return LLIOC_CONT;
-
-	if (!disks) {
-		err = -ENODEV;
-		goto out1;
-	}
-
-	CWARN("Enter llop_ioctl\n");
-
-	mutex_lock(&lloop_mutex);
-	switch (cmd) {
-	case LL_IOC_LLOOP_ATTACH: {
-		struct lloop_device *lo_free = NULL;
-		int i;
-
-		for (i = 0; i < max_loop; i++, lo = NULL) {
-			lo = &loop_dev[i];
-			if (lo->lo_state == LLOOP_UNBOUND) {
-				if (!lo_free)
-					lo_free = lo;
-				continue;
-			}
-			if (file_inode(lo->lo_backing_file) == file_inode(file))
-				break;
-		}
-		if (lo || !lo_free) {
-			err = -EBUSY;
-			goto out;
-		}
-
-		lo = lo_free;
-		dev = MKDEV(lloop_major, lo->lo_number);
-
-		/* quit if the used pointer is writable */
-		if (put_user((long)old_encode_dev(dev), (long __user *)arg)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		bdev = blkdev_get_by_dev(dev, file->f_mode, NULL);
-		if (IS_ERR(bdev)) {
-			err = PTR_ERR(bdev);
-			goto out;
-		}
-
-		get_file(file);
-		err = loop_set_fd(lo, NULL, bdev, file);
-		if (err) {
-			fput(file);
-			blkdev_put(bdev, 0);
-		}
-
-		break;
-	}
-
-	case LL_IOC_LLOOP_DETACH_BYDEV: {
-		int minor;
-
-		dev = old_decode_dev(arg);
-		if (MAJOR(dev) != lloop_major) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		minor = MINOR(dev);
-		if (minor > max_loop - 1) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		lo = &loop_dev[minor];
-		if (lo->lo_state != LLOOP_BOUND) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		bdev = lo->lo_device;
-		err = loop_clr_fd(lo, bdev, 1);
-		if (err == 0)
-			blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */
-
-		break;
-	}
-
-	default:
-		err = -EINVAL;
-		break;
-	}
-
-out:
-	mutex_unlock(&lloop_mutex);
-out1:
-	if (rcp)
-		*rcp = err;
-	return LLIOC_STOP;
-}
-
-static int __init lloop_init(void)
-{
-	int	i;
-	unsigned int cmdlist[] = {
-		LL_IOC_LLOOP_ATTACH,
-		LL_IOC_LLOOP_DETACH_BYDEV,
-	};
-
-	if (max_loop < 1 || max_loop > 256) {
-		max_loop = MAX_LOOP_DEFAULT;
-		CWARN("lloop: invalid max_loop (must be between 1 and 256), using default (%u)\n",
-		      max_loop);
-	}
-
-	lloop_major = register_blkdev(0, "lloop");
-	if (lloop_major < 0)
-		return -EIO;
-
-	CDEBUG(D_CONFIG, "registered lloop major %d with %u minors\n",
-	       lloop_major, max_loop);
-
-	ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
-	if (!ll_iocontrol_magic)
-		goto out_mem1;
-
-	loop_dev = kcalloc(max_loop, sizeof(*loop_dev), GFP_KERNEL);
-	if (!loop_dev)
-		goto out_mem1;
-
-	disks = kcalloc(max_loop, sizeof(*disks), GFP_KERNEL);
-	if (!disks)
-		goto out_mem2;
-
-	for (i = 0; i < max_loop; i++) {
-		disks[i] = alloc_disk(1);
-		if (!disks[i])
-			goto out_mem3;
-	}
-
-	mutex_init(&lloop_mutex);
-
-	for (i = 0; i < max_loop; i++) {
-		struct lloop_device *lo = &loop_dev[i];
-		struct gendisk *disk = disks[i];
-
-		lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
-		if (!lo->lo_queue)
-			goto out_mem4;
-
-		mutex_init(&lo->lo_ctl_mutex);
-		sema_init(&lo->lo_sem, 0);
-		init_waitqueue_head(&lo->lo_bh_wait);
-		lo->lo_number = i;
-		spin_lock_init(&lo->lo_lock);
-		disk->major = lloop_major;
-		disk->first_minor = i;
-		disk->fops = &lo_fops;
-		sprintf(disk->disk_name, "lloop%d", i);
-		disk->private_data = lo;
-		disk->queue = lo->lo_queue;
-	}
-
-	/* We cannot fail after we call this, so another loop!*/
-	for (i = 0; i < max_loop; i++)
-		add_disk(disks[i]);
-	return 0;
-
-out_mem4:
-	while (i--)
-		blk_cleanup_queue(loop_dev[i].lo_queue);
-	i = max_loop;
-out_mem3:
-	while (i--)
-		put_disk(disks[i]);
-	kfree(disks);
-out_mem2:
-	kfree(loop_dev);
-out_mem1:
-	unregister_blkdev(lloop_major, "lloop");
-	ll_iocontrol_unregister(ll_iocontrol_magic);
-	CERROR("lloop: ran out of memory\n");
-	return -ENOMEM;
-}
-
-static void lloop_exit(void)
-{
-	int i;
-
-	ll_iocontrol_unregister(ll_iocontrol_magic);
-	for (i = 0; i < max_loop; i++) {
-		del_gendisk(disks[i]);
-		blk_cleanup_queue(loop_dev[i].lo_queue);
-		put_disk(disks[i]);
-	}
-
-	unregister_blkdev(lloop_major, "lloop");
-
-	kfree(disks);
-	kfree(loop_dev);
-}
-
-module_param(max_loop, int, 0444);
-MODULE_PARM_DESC(max_loop, "maximum of lloop_device");
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre virtual block device");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(lloop_init);
-module_exit(lloop_exit);
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 55d62eb11957..e86bf3c53be3 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -180,11 +176,7 @@ LUSTRE_RO_ATTR(filesfree);
 static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr,
 				char *buf)
 {
-	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
-					      ll_kobj);
-
-	return sprintf(buf, "%s client\n",
-			sbi->ll_flags & LL_SBI_RMT_CLIENT ? "remote" : "local");
+	return sprintf(buf, "local client\n");
 }
 LUSTRE_RO_ATTR(client_type);
 
@@ -364,7 +356,7 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
 {
 	struct super_block     *sb    = m->private;
 	struct ll_sb_info      *sbi   = ll_s2sbi(sb);
-	struct cl_client_cache *cache = &sbi->ll_cache;
+	struct cl_client_cache *cache = sbi->ll_cache;
 	int shift = 20 - PAGE_SHIFT;
 	int max_cached_mb;
 	int unused_mb;
@@ -391,7 +383,7 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 {
 	struct super_block *sb = ((struct seq_file *)file->private_data)->private;
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
-	struct cl_client_cache *cache = &sbi->ll_cache;
+	struct cl_client_cache *cache = sbi->ll_cache;
 	struct lu_env *env;
 	int refcheck;
 	int mult, rc, pages_number;
@@ -830,7 +822,7 @@ static ssize_t unstable_stats_show(struct kobject *kobj,
 {
 	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
 					      ll_kobj);
-	struct cl_client_cache *cache = &sbi->ll_cache;
+	struct cl_client_cache *cache = sbi->ll_cache;
 	int pages, mb;
 
 	pages = atomic_read(&cache->ccc_unstable_nr);
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 5eba0ebae10f..3664bfd0178b 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -144,7 +140,7 @@ static void ll_invalidate_negative_children(struct inode *dir)
 {
 	struct dentry *dentry, *tmp_subdir;
 
-	ll_lock_dcache(dir);
+	spin_lock(&dir->i_lock);
 	hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!list_empty(&dentry->d_subdirs)) {
@@ -159,7 +155,7 @@ static void ll_invalidate_negative_children(struct inode *dir)
 		}
 		spin_unlock(&dentry->d_lock);
 	}
-	ll_unlock_dcache(dir);
+	spin_unlock(&dir->i_lock);
 }
 
 int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
@@ -318,9 +314,10 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
 	if (hlist_empty(&inode->i_dentry))
 		return NULL;
 
-	discon_alias = invalid_alias = NULL;
+	discon_alias = NULL;
+	invalid_alias = NULL;
 
-	ll_lock_dcache(inode);
+	spin_lock(&inode->i_lock);
 	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		LASSERT(alias != dentry);
 
@@ -345,7 +342,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
 		dget_dlock(alias);
 		spin_unlock(&alias->d_lock);
 	}
-	ll_unlock_dcache(inode);
+	spin_unlock(&inode->i_lock);
 
 	return alias;
 }
@@ -396,7 +393,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
 	 * when I return
 	 */
 	CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
-	       it->d.lustre.it_disposition);
+	       it->it_disposition);
 	if (!it_disposition(it, DISP_LOOKUP_NEG)) {
 		rc = ll_prep_inode(&inode, request, (*de)->d_sb, it);
 		if (rc)
@@ -448,7 +445,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
 		/* Check that parent has UPDATE lock. */
 		struct lookup_intent parent_it = {
 					.it_op = IT_GETATTR,
-					.d.lustre.it_lock_handle = 0 };
+					.it_lock_handle = 0 };
 
 		if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it,
 				       &ll_i2info(parent)->lli_fid, NULL)) {
@@ -625,13 +622,10 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 		if (d_really_is_positive(dentry) && it_disposition(it, DISP_OPEN_OPEN)) {
 			/* Open dentry. */
 			if (S_ISFIFO(d_inode(dentry)->i_mode)) {
-				/* We cannot call open here as it would
-				 * deadlock.
+				/* We cannot call open here as it might
+				 * deadlock. This case is unreachable in
+				 * practice because of OBD_CONNECT_NODEVOH.
 				 */
-				if (it_disposition(it, DISP_ENQ_OPEN_REF))
-					ptlrpc_req_finished(
-						       (struct ptlrpc_request *)
-							  it->d.lustre.it_data);
 				rc = finish_no_open(file, de);
 			} else {
 				file->private_data = it;
@@ -662,10 +656,10 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
 	struct ll_sb_info *sbi = ll_i2sbi(dir);
 	int rc;
 
-	LASSERT(it && it->d.lustre.it_disposition);
+	LASSERT(it && it->it_disposition);
 
 	LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
-	request = it->d.lustre.it_data;
+	request = it->it_request;
 	it_clear_disposition(it, DISP_ENQ_CREATE_REF);
 	rc = ll_prep_inode(&inode, request, dir->i_sb, it);
 	if (rc) {
diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c
deleted file mode 100644
index e9d25317cd28..000000000000
--- a/drivers/staging/lustre/lustre/llite/remote_perm.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/remote_perm.c
- *
- * Lustre Permission Cache for Remote Client
- *
- * Author: Lai Siyao <lsy@clusterfs.com>
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/types.h>
-
-#include "../include/lustre_lite.h"
-#include "../include/lustre_ha.h"
-#include "../include/lustre_dlm.h"
-#include "../include/lprocfs_status.h"
-#include "../include/lustre_disk.h"
-#include "../include/lustre_param.h"
-#include "llite_internal.h"
-
-struct kmem_cache *ll_remote_perm_cachep;
-struct kmem_cache *ll_rmtperm_hash_cachep;
-
-static inline struct ll_remote_perm *alloc_ll_remote_perm(void)
-{
-	struct ll_remote_perm *lrp;
-
-	lrp = kmem_cache_zalloc(ll_remote_perm_cachep, GFP_KERNEL);
-	if (lrp)
-		INIT_HLIST_NODE(&lrp->lrp_list);
-	return lrp;
-}
-
-static inline void free_ll_remote_perm(struct ll_remote_perm *lrp)
-{
-	if (!lrp)
-		return;
-
-	if (!hlist_unhashed(&lrp->lrp_list))
-		hlist_del(&lrp->lrp_list);
-	kmem_cache_free(ll_remote_perm_cachep, lrp);
-}
-
-static struct hlist_head *alloc_rmtperm_hash(void)
-{
-	struct hlist_head *hash;
-	int i;
-
-	hash = kmem_cache_zalloc(ll_rmtperm_hash_cachep, GFP_NOFS);
-	if (!hash)
-		return NULL;
-
-	for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
-		INIT_HLIST_HEAD(hash + i);
-
-	return hash;
-}
-
-void free_rmtperm_hash(struct hlist_head *hash)
-{
-	int i;
-	struct ll_remote_perm *lrp;
-	struct hlist_node *next;
-
-	if (!hash)
-		return;
-
-	for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
-		hlist_for_each_entry_safe(lrp, next, hash + i, lrp_list)
-			free_ll_remote_perm(lrp);
-	kmem_cache_free(ll_rmtperm_hash_cachep, hash);
-}
-
-static inline int remote_perm_hashfunc(uid_t uid)
-{
-	return uid & (REMOTE_PERM_HASHSIZE - 1);
-}
-
-/* NB: setxid permission is not checked here, instead it's done on
- * MDT when client get remote permission.
- */
-static int do_check_remote_perm(struct ll_inode_info *lli, int mask)
-{
-	struct hlist_head *head;
-	struct ll_remote_perm *lrp;
-	int found = 0, rc;
-
-	if (!lli->lli_remote_perms)
-		return -ENOENT;
-
-	head = lli->lli_remote_perms +
-		remote_perm_hashfunc(from_kuid(&init_user_ns, current_uid()));
-
-	spin_lock(&lli->lli_lock);
-	hlist_for_each_entry(lrp, head, lrp_list) {
-		if (lrp->lrp_uid != from_kuid(&init_user_ns, current_uid()))
-			continue;
-		if (lrp->lrp_gid != from_kgid(&init_user_ns, current_gid()))
-			continue;
-		if (lrp->lrp_fsuid != from_kuid(&init_user_ns, current_fsuid()))
-			continue;
-		if (lrp->lrp_fsgid != from_kgid(&init_user_ns, current_fsgid()))
-			continue;
-		found = 1;
-		break;
-	}
-
-	if (!found) {
-		rc = -ENOENT;
-		goto out;
-	}
-
-	CDEBUG(D_SEC, "found remote perm: %u/%u/%u/%u - %#x\n",
-	       lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
-	       lrp->lrp_access_perm);
-	rc = ((lrp->lrp_access_perm & mask) == mask) ? 0 : -EACCES;
-
-out:
-	spin_unlock(&lli->lli_lock);
-	return rc;
-}
-
-int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm)
-{
-	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ll_remote_perm *lrp = NULL, *tmp = NULL;
-	struct hlist_head *head, *perm_hash = NULL;
-
-	LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT);
-
-#if 0
-	if (perm->rp_uid != current->uid ||
-	    perm->rp_gid != current->gid ||
-	    perm->rp_fsuid != current->fsuid ||
-	    perm->rp_fsgid != current->fsgid) {
-		/* user might setxid in this small period */
-		CDEBUG(D_SEC,
-		       "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n",
-		       perm->rp_uid, perm->rp_gid, perm->rp_fsuid,
-		       perm->rp_fsgid, current->uid, current->gid,
-		       current->fsuid, current->fsgid);
-		return -EAGAIN;
-	}
-#endif
-
-	if (!lli->lli_remote_perms) {
-		perm_hash = alloc_rmtperm_hash();
-		if (!perm_hash) {
-			CERROR("alloc lli_remote_perms failed!\n");
-			return -ENOMEM;
-		}
-	}
-
-	spin_lock(&lli->lli_lock);
-
-	if (!lli->lli_remote_perms)
-		lli->lli_remote_perms = perm_hash;
-	else
-		free_rmtperm_hash(perm_hash);
-
-	head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid);
-
-again:
-	hlist_for_each_entry(tmp, head, lrp_list) {
-		if (tmp->lrp_uid != perm->rp_uid)
-			continue;
-		if (tmp->lrp_gid != perm->rp_gid)
-			continue;
-		if (tmp->lrp_fsuid != perm->rp_fsuid)
-			continue;
-		if (tmp->lrp_fsgid != perm->rp_fsgid)
-			continue;
-		free_ll_remote_perm(lrp);
-		lrp = tmp;
-		break;
-	}
-
-	if (!lrp) {
-		spin_unlock(&lli->lli_lock);
-		lrp = alloc_ll_remote_perm();
-		if (!lrp) {
-			CERROR("alloc memory for ll_remote_perm failed!\n");
-			return -ENOMEM;
-		}
-		spin_lock(&lli->lli_lock);
-		goto again;
-	}
-
-	lrp->lrp_access_perm = perm->rp_access_perm;
-	if (lrp != tmp) {
-		lrp->lrp_uid	 = perm->rp_uid;
-		lrp->lrp_gid	 = perm->rp_gid;
-		lrp->lrp_fsuid       = perm->rp_fsuid;
-		lrp->lrp_fsgid       = perm->rp_fsgid;
-		hlist_add_head(&lrp->lrp_list, head);
-	}
-	lli->lli_rmtperm_time = cfs_time_current();
-	spin_unlock(&lli->lli_lock);
-
-	CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n",
-	       lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
-	       lrp->lrp_access_perm);
-
-	return 0;
-}
-
-int lustre_check_remote_perm(struct inode *inode, int mask)
-{
-	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ll_sb_info *sbi = ll_i2sbi(inode);
-	struct ptlrpc_request *req = NULL;
-	struct mdt_remote_perm *perm;
-	unsigned long save;
-	int i = 0, rc;
-
-	do {
-		save = lli->lli_rmtperm_time;
-		rc = do_check_remote_perm(lli, mask);
-		if (!rc || (rc != -ENOENT && i))
-			break;
-
-		might_sleep();
-
-		mutex_lock(&lli->lli_rmtperm_mutex);
-		/* check again */
-		if (save != lli->lli_rmtperm_time) {
-			rc = do_check_remote_perm(lli, mask);
-			if (!rc || (rc != -ENOENT && i)) {
-				mutex_unlock(&lli->lli_rmtperm_mutex);
-				break;
-			}
-		}
-
-		if (i++ > 5) {
-			CERROR("check remote perm falls in dead loop!\n");
-			LBUG();
-		}
-
-		rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode),
-					ll_i2suppgid(inode), &req);
-		if (rc) {
-			mutex_unlock(&lli->lli_rmtperm_mutex);
-			break;
-		}
-
-		perm = req_capsule_server_swab_get(&req->rq_pill, &RMF_ACL,
-						   lustre_swab_mdt_remote_perm);
-		if (unlikely(!perm)) {
-			mutex_unlock(&lli->lli_rmtperm_mutex);
-			rc = -EPROTO;
-			break;
-		}
-
-		rc = ll_update_remote_perm(inode, perm);
-		mutex_unlock(&lli->lli_rmtperm_mutex);
-		if (rc == -ENOMEM)
-			break;
-
-		ptlrpc_req_finished(req);
-		req = NULL;
-	} while (1);
-	ptlrpc_req_finished(req);
-	return rc;
-}
-
-#if 0  /* NB: remote perms can't be freed in ll_mdc_blocking_ast of UPDATE lock,
-	* because it will fail sanity test 48.
-	*/
-void ll_free_remote_perms(struct inode *inode)
-{
-	struct ll_inode_info *lli = ll_i2info(inode);
-	struct hlist_head *hash = lli->lli_remote_perms;
-	struct ll_remote_perm *lrp;
-	struct hlist_node *node, *next;
-	int i;
-
-	LASSERT(hash);
-
-	spin_lock(&lli->lli_lock);
-
-	for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) {
-		hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list)
-			free_ll_remote_perm(lrp);
-	}
-
-	spin_unlock(&lli->lli_lock);
-}
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 336397773fbb..87393c4bd51e 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -59,84 +55,6 @@
 #include "llite_internal.h"
 #include "../include/linux/lustre_compat25.h"
 
-/**
- * Finalizes cl-data before exiting typical address_space operation. Dual to
- * ll_cl_init().
- */
-void ll_cl_fini(struct ll_cl_context *lcc)
-{
-	struct lu_env  *env  = lcc->lcc_env;
-	struct cl_io   *io   = lcc->lcc_io;
-	struct cl_page *page = lcc->lcc_page;
-
-	LASSERT(lcc->lcc_cookie == current);
-	LASSERT(env);
-
-	if (page) {
-		lu_ref_del(&page->cp_reference, "cl_io", io);
-		cl_page_put(env, page);
-	}
-
-	cl_env_put(env, &lcc->lcc_refcheck);
-}
-
-/**
- * Initializes common cl-data at the typical address_space operation entry
- * point.
- */
-struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage)
-{
-	struct ll_cl_context *lcc;
-	struct lu_env    *env;
-	struct cl_io     *io;
-	struct cl_object *clob;
-	struct vvp_io    *vio;
-
-	int refcheck;
-	int result = 0;
-
-	clob = ll_i2info(file_inode(file))->lli_clob;
-	LASSERT(clob);
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return ERR_CAST(env);
-
-	lcc = &ll_env_info(env)->lti_io_ctx;
-	memset(lcc, 0, sizeof(*lcc));
-	lcc->lcc_env = env;
-	lcc->lcc_refcheck = refcheck;
-	lcc->lcc_cookie = current;
-
-	vio = vvp_env_io(env);
-	io = vio->vui_cl.cis_io;
-	lcc->lcc_io = io;
-	if (!io)
-		result = -EIO;
-
-	if (result == 0 && vmpage) {
-		struct cl_page   *page;
-
-		LASSERT(io->ci_state == CIS_IO_GOING);
-		LASSERT(vio->vui_fd == LUSTRE_FPRIVATE(file));
-		page = cl_page_find(env, clob, vmpage->index, vmpage,
-				    CPT_CACHEABLE);
-		if (!IS_ERR(page)) {
-			lcc->lcc_page = page;
-			lu_ref_add(&page->cp_reference, "cl_io", io);
-			result = 0;
-		} else {
-			result = PTR_ERR(page);
-		}
-	}
-	if (result) {
-		ll_cl_fini(lcc);
-		lcc = ERR_PTR(result);
-	}
-
-	return lcc;
-}
-
 static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
 
 /**
@@ -1112,17 +1030,70 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	return result;
 }
 
+struct ll_cl_context *ll_cl_find(struct file *file)
+{
+	struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+	struct ll_cl_context *lcc;
+	struct ll_cl_context *found = NULL;
+
+	read_lock(&fd->fd_lock);
+	list_for_each_entry(lcc, &fd->fd_lccs, lcc_list) {
+		if (lcc->lcc_cookie == current) {
+			found = lcc;
+			break;
+		}
+	}
+	read_unlock(&fd->fd_lock);
+
+	return found;
+}
+
+void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io)
+{
+	struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+	struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
+
+	memset(lcc, 0, sizeof(*lcc));
+	INIT_LIST_HEAD(&lcc->lcc_list);
+	lcc->lcc_cookie = current;
+	lcc->lcc_env = env;
+	lcc->lcc_io = io;
+
+	write_lock(&fd->fd_lock);
+	list_add(&lcc->lcc_list, &fd->fd_lccs);
+	write_unlock(&fd->fd_lock);
+}
+
+void ll_cl_remove(struct file *file, const struct lu_env *env)
+{
+	struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+	struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
+
+	write_lock(&fd->fd_lock);
+	list_del_init(&lcc->lcc_list);
+	write_unlock(&fd->fd_lock);
+}
+
 int ll_readpage(struct file *file, struct page *vmpage)
 {
+	struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob;
 	struct ll_cl_context *lcc;
+	const struct lu_env  *env;
+	struct cl_io   *io;
+	struct cl_page *page;
 	int result;
 
-	lcc = ll_cl_init(file, vmpage);
-	if (!IS_ERR(lcc)) {
-		struct lu_env  *env  = lcc->lcc_env;
-		struct cl_io   *io   = lcc->lcc_io;
-		struct cl_page *page = lcc->lcc_page;
+	lcc = ll_cl_find(file);
+	if (!lcc) {
+		unlock_page(vmpage);
+		return -EIO;
+	}
 
+	env = lcc->lcc_env;
+	io = lcc->lcc_io;
+	LASSERT(io->ci_state == CIS_IO_GOING);
+	page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+	if (!IS_ERR(page)) {
 		LASSERT(page->cp_type == CPT_CACHEABLE);
 		if (likely(!PageUptodate(vmpage))) {
 			cl_page_assume(env, io, page);
@@ -1132,10 +1103,10 @@ int ll_readpage(struct file *file, struct page *vmpage)
 			unlock_page(vmpage);
 			result = 0;
 		}
-		ll_cl_fini(lcc);
+		cl_page_put(env, page);
 	} else {
 		unlock_page(vmpage);
-		result = PTR_ERR(lcc);
+		result = PTR_ERR(page);
 	}
 	return result;
 }
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index c12a048fce59..d98c7acc0832 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -489,7 +485,7 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
 			  struct page **pagep, void **fsdata)
 {
 	struct ll_cl_context *lcc;
-	struct lu_env  *env;
+	const struct lu_env  *env;
 	struct cl_io   *io;
 	struct cl_page *page;
 	struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
@@ -501,9 +497,9 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
 
 	CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
 
-	lcc = ll_cl_init(file, NULL);
-	if (IS_ERR(lcc)) {
-		result = PTR_ERR(lcc);
+	lcc = ll_cl_find(file);
+	if (!lcc) {
+		result = -EIO;
 		goto out;
 	}
 
@@ -579,8 +575,6 @@ out:
 			unlock_page(vmpage);
 			put_page(vmpage);
 		}
-		if (!IS_ERR(lcc))
-			ll_cl_fini(lcc);
 	} else {
 		*pagep = vmpage;
 		*fsdata = lcc;
@@ -593,7 +587,7 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
 			struct page *vmpage, void *fsdata)
 {
 	struct ll_cl_context *lcc = fsdata;
-	struct lu_env *env;
+	const struct lu_env *env;
 	struct cl_io *io;
 	struct vvp_io *vio;
 	struct cl_page *page;
@@ -631,6 +625,10 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
 	} else {
 		cl_page_disown(env, io, page);
 
+		lcc->lcc_page = NULL;
+		lu_ref_del(&page->cp_reference, "cl_io", io);
+		cl_page_put(env, page);
+
 		/* page list is not contiguous now, commit it now */
 		unplug = true;
 	}
@@ -639,7 +637,6 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
 	    file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
 		result = vvp_io_write_commit(env, io);
 
-	ll_cl_fini(lcc);
 	return result >= 0 ? copied : result;
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index 6322f88661e8..f77524294c27 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -650,7 +646,7 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
 		}
 	}
 
-	it->d.lustre.it_lock_handle = entry->se_handle;
+	it->it_lock_handle = entry->se_handle;
 	rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL);
 	if (rc != 1) {
 		rc = -EAGAIN;
@@ -704,7 +700,7 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
 		 * process enqueues lock on child with parent lock held, eg.
 		 * unlink.
 		 */
-		handle = it->d.lustre.it_lock_handle;
+		handle = it->it_lock_handle;
 		ll_intent_drop_lock(it);
 	}
 
@@ -854,7 +850,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
 {
 	struct inode	     *inode = d_inode(dentry);
 	struct lookup_intent      it = { .it_op = IT_GETATTR,
-					 .d.lustre.it_lock_handle = 0 };
+					 .it_lock_handle = 0 };
 	struct md_enqueue_info   *minfo;
 	struct ldlm_enqueue_info *einfo;
 	int rc;
@@ -869,7 +865,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
 	rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),
 				NULL);
 	if (rc == 1) {
-		entry->se_handle = it.d.lustre.it_lock_handle;
+		entry->se_handle = it.it_lock_handle;
 		ll_intent_release(&it);
 		return 1;
 	}
@@ -1573,7 +1569,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
 		if (entry->se_stat == SA_ENTRY_SUCC && entry->se_inode) {
 			struct inode *inode = entry->se_inode;
 			struct lookup_intent it = { .it_op = IT_GETATTR,
-						    .d.lustre.it_lock_handle =
+						    .it_lock_handle =
 						     entry->se_handle };
 			__u64 bits;
 
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 415750b0bff4..3dd7e0eb0b54 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -118,19 +114,6 @@ static int __init lustre_init(void)
 	if (!ll_file_data_slab)
 		goto out_cache;
 
-	ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm_cache",
-						  sizeof(struct ll_remote_perm),
-						      0, 0, NULL);
-	if (!ll_remote_perm_cachep)
-		goto out_cache;
-
-	ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash_cache",
-						   REMOTE_PERM_HASHSIZE *
-						   sizeof(struct list_head),
-						   0, 0, NULL);
-	if (!ll_rmtperm_hash_cachep)
-		goto out_cache;
-
 	llite_root = debugfs_create_dir("llite", debugfs_lustre_root);
 	if (IS_ERR_OR_NULL(llite_root)) {
 		rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM;
@@ -194,8 +177,6 @@ out_debugfs:
 out_cache:
 	kmem_cache_destroy(ll_inode_cachep);
 	kmem_cache_destroy(ll_file_data_slab);
-	kmem_cache_destroy(ll_remote_perm_cachep);
-	kmem_cache_destroy(ll_rmtperm_hash_cachep);
 	return rc;
 }
 
@@ -213,10 +194,6 @@ static void __exit lustre_exit(void)
 	vvp_global_fini();
 
 	kmem_cache_destroy(ll_inode_cachep);
-	kmem_cache_destroy(ll_rmtperm_hash_cachep);
-
-	kmem_cache_destroy(ll_remote_perm_cachep);
-
 	kmem_cache_destroy(ll_file_data_slab);
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 3fc736ccf85e..8c8bdfe1ad71 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index 47101de1c020..e623216e962d 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -150,8 +146,8 @@ struct lu_context_key vvp_session_key = {
 	.lct_fini = vvp_session_key_fini
 };
 
-void *vvp_thread_key_init(const struct lu_context *ctx,
-			  struct lu_context_key *key)
+static void *vvp_thread_key_init(const struct lu_context *ctx,
+				 struct lu_context_key *key)
 {
 	struct vvp_thread_info *vti;
 
@@ -161,8 +157,8 @@ void *vvp_thread_key_init(const struct lu_context *ctx,
 	return vti;
 }
 
-void vvp_thread_key_fini(const struct lu_context *ctx,
-			 struct lu_context_key *key, void *data)
+static void vvp_thread_key_fini(const struct lu_context *ctx,
+				struct lu_context_key *key, void *data)
 {
 	struct vvp_thread_info *vti = data;
 
@@ -564,7 +560,7 @@ static int vvp_pgcache_show(struct seq_file *f, void *v)
 
 	env = cl_env_get(&refcheck);
 	if (!IS_ERR(env)) {
-		pos = *(loff_t *) v;
+		pos = *(loff_t *)v;
 		vvp_pgcache_id_unpack(pos, &id);
 		sbi = f->private;
 		clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index 27b9b0a01f32..79fc428461ed 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 5bf9592ae5d2..94916dcc6caa 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -47,8 +43,8 @@
 #include "llite_internal.h"
 #include "vvp_internal.h"
 
-struct vvp_io *cl2vvp_io(const struct lu_env *env,
-			 const struct cl_io_slice *slice)
+static struct vvp_io *cl2vvp_io(const struct lu_env *env,
+				const struct cl_io_slice *slice)
 {
 	struct vvp_io *vio;
 
@@ -954,7 +950,8 @@ static int vvp_io_write_start(const struct lu_env *env,
 		 * out-of-order writes.
 		 */
 		ll_merge_attr(env, inode);
-		pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
+		pos = i_size_read(inode);
+		io->u.ci_wr.wr.crw_pos = pos;
 		vio->vui_iocb->ki_pos = pos;
 	} else {
 		LASSERT(vio->vui_iocb->ki_pos == pos);
@@ -1259,7 +1256,7 @@ static int vvp_io_read_page(const struct lu_env *env,
 	return 0;
 }
 
-void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
+static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
 {
 	CLOBINVRNT(env, ios->cis_io->ci_obj,
 		   vvp_object_invariant(ios->cis_io->ci_obj));
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
index f5bd6c22e112..64be0c9df35b 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index 18c9df7ebdda..2c520b0bf6ca 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 6cd2af7a958f..2e566d90bb94 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c
index fb886291a4e2..9fe9d6c0a7d1 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_req.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_req.c
@@ -60,10 +60,10 @@ static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice)
  *    - o_ioepoch,
  *
  */
-void vvp_req_attr_set(const struct lu_env *env,
-		      const struct cl_req_slice *slice,
-		      const struct cl_object *obj,
-		      struct cl_req_attr *attr, u64 flags)
+static void vvp_req_attr_set(const struct lu_env *env,
+			     const struct cl_req_slice *slice,
+			     const struct cl_object *obj,
+			     struct cl_req_attr *attr, u64 flags)
 {
 	struct inode *inode;
 	struct obdo  *oa;
@@ -87,8 +87,8 @@ void vvp_req_attr_set(const struct lu_env *env,
 	       JOBSTATS_JOBID_SIZE);
 }
 
-void vvp_req_completion(const struct lu_env *env,
-			const struct cl_req_slice *slice, int ioret)
+static void vvp_req_completion(const struct lu_env *env,
+			       const struct cl_req_slice *slice, int ioret)
 {
 	struct vvp_req *vrq;
 
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index 608014b0dbcd..98303cf85815 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -111,11 +107,6 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct ptlrpc_request *req = NULL;
 	int xattr_type, rc;
-#ifdef CONFIG_FS_POSIX_ACL
-	struct rmtacl_ctl_entry *rce = NULL;
-	posix_acl_xattr_header *new_value = NULL;
-	ext_acl_xattr_header *acl = NULL;
-#endif
 	const char *pv = value;
 
 	xattr_type = get_xattr_type(name);
@@ -143,62 +134,9 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 	    strcmp(name, "security.selinux") == 0)
 		return -EOPNOTSUPP;
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
-	    (xattr_type == XATTR_ACL_ACCESS_T ||
-	    xattr_type == XATTR_ACL_DEFAULT_T)) {
-		rce = rct_search(&sbi->ll_rct, current_pid());
-		if (!rce ||
-		    (rce->rce_ops != RMT_LSETFACL &&
-		    rce->rce_ops != RMT_RSETFACL))
-			return -EOPNOTSUPP;
-
-		if (rce->rce_ops == RMT_LSETFACL) {
-			struct eacl_entry *ee;
-
-			ee = et_search_del(&sbi->ll_et, current_pid(),
-					   ll_inode2fid(inode), xattr_type);
-			if (valid & OBD_MD_FLXATTR) {
-				acl = lustre_acl_xattr_merge2ext(
-						(posix_acl_xattr_header *)value,
-						size, ee->ee_acl);
-				if (IS_ERR(acl)) {
-					ee_free(ee);
-					return PTR_ERR(acl);
-				}
-				size =  CFS_ACL_XATTR_SIZE(\
-						le32_to_cpu(acl->a_count), \
-						ext_acl_xattr);
-				pv = (const char *)acl;
-			}
-			ee_free(ee);
-		} else if (rce->rce_ops == RMT_RSETFACL) {
-			rc = lustre_posix_acl_xattr_filter(
-						(posix_acl_xattr_header *)value,
-						size, &new_value);
-			if (unlikely(rc < 0))
-				return rc;
-			size = rc;
-
-			pv = (const char *)new_value;
-		} else {
-			return -EOPNOTSUPP;
-		}
-
-		valid |= rce_ops2valid(rce->rce_ops);
-	}
-#endif
 	rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
 			 valid, name, pv, size, 0, flags,
 			 ll_i2suppgid(inode), &req);
-#ifdef CONFIG_FS_POSIX_ACL
-	/*
-	 * Release the posix ACL space.
-	 */
-	kfree(new_value);
-	if (acl)
-		lustre_ext_acl_xattr_free(acl);
-#endif
 	if (rc) {
 		if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
 			LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
@@ -288,7 +226,6 @@ int ll_getxattr_common(struct inode *inode, const char *name,
 	struct mdt_body *body;
 	int xattr_type, rc;
 	void *xdata;
-	struct rmtacl_ctl_entry *rce = NULL;
 	struct ll_inode_info *lli = ll_i2info(inode);
 
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
@@ -319,24 +256,11 @@ int ll_getxattr_common(struct inode *inode, const char *name,
 		return -EOPNOTSUPP;
 
 #ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
-	    (xattr_type == XATTR_ACL_ACCESS_T ||
-	    xattr_type == XATTR_ACL_DEFAULT_T)) {
-		rce = rct_search(&sbi->ll_rct, current_pid());
-		if (!rce ||
-		    (rce->rce_ops != RMT_LSETFACL &&
-		    rce->rce_ops != RMT_LGETFACL &&
-		    rce->rce_ops != RMT_RSETFACL &&
-		    rce->rce_ops != RMT_RGETFACL))
-			return -EOPNOTSUPP;
-	}
-
 	/* posix acl is under protection of LOOKUP lock. when calling to this,
 	 * we just have path resolution to the target inode, so we have great
 	 * chance that cached ACL is uptodate.
 	 */
-	if (xattr_type == XATTR_ACL_ACCESS_T &&
-	    !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
+	if (xattr_type == XATTR_ACL_ACCESS_T) {
 		struct posix_acl *acl;
 
 		spin_lock(&lli->lli_lock);
@@ -378,9 +302,7 @@ do_getxattr:
 	} else {
 getxattr_nocache:
 		rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
-				valid | (rce ? rce_ops2valid(rce->rce_ops) : 0),
-				name, NULL, 0, size, 0, &req);
-
+				 valid, name, NULL, 0, size, 0, &req);
 		if (rc < 0)
 			goto out_xattr;
 
@@ -417,25 +339,6 @@ getxattr_nocache:
 		rc = body->eadatasize;
 	}
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (rce && rce->rce_ops == RMT_LSETFACL) {
-		ext_acl_xattr_header *acl;
-
-		acl = lustre_posix_acl_xattr_2ext(buffer, rc);
-		if (IS_ERR(acl)) {
-			rc = PTR_ERR(acl);
-			goto out;
-		}
-
-		rc = ee_add(&sbi->ll_et, current_pid(), ll_inode2fid(inode),
-			    xattr_type, acl);
-		if (unlikely(rc < 0)) {
-			lustre_ext_acl_xattr_free(acl);
-			goto out;
-		}
-	}
-#endif
-
 out_xattr:
 	if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
 		LCONSOLE_INFO(
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index d7e17abbe361..8089da8143d9 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -288,8 +288,8 @@ static int ll_xattr_find_get_lock(struct inode *inode,
 				       LCK_PR);
 		if (mode != 0) {
 			/* fake oit in mdc_revalidate_lock() manner */
-			oit->d.lustre.it_lock_handle = lockh.cookie;
-			oit->d.lustre.it_lock_mode = mode;
+			oit->it_lock_handle = lockh.cookie;
+			oit->it_lock_mode = mode;
 			goto out;
 		}
 	}
@@ -315,7 +315,7 @@ static int ll_xattr_find_get_lock(struct inode *inode,
 		return rc;
 	}
 
-	*req = (struct ptlrpc_request *)oit->d.lustre.it_data;
+	*req = oit->it_request;
 out:
 	down_write(&lli->lli_xattrs_list_rwsem);
 	mutex_unlock(&lli->lli_xattrs_enq_lock);
@@ -362,10 +362,10 @@ static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
 		goto out_maybe_drop;
 	}
 
-	if (oit->d.lustre.it_status < 0) {
+	if (oit->it_status < 0) {
 		CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n",
-		       oit->d.lustre.it_status, PFID(ll_inode2fid(inode)));
-		rc = oit->d.lustre.it_status;
+		       oit->it_status, PFID(ll_inode2fid(inode)));
+		rc = oit->it_status;
 		/* xattr data is so large that we don't want to cache it */
 		if (rc == -ERANGE)
 			rc = -EAGAIN;
@@ -448,8 +448,8 @@ out_destroy:
 	up_write(&lli->lli_xattrs_list_rwsem);
 
 	ldlm_lock_decref_and_cancel((struct lustre_handle *)
-					&oit->d.lustre.it_lock_handle,
-					oit->d.lustre.it_lock_mode);
+					&oit->it_lock_handle,
+					oit->it_lock_mode);
 
 	goto out_no_unlock;
 }
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
index 378691b2a062..a3d170aa6fd2 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_fld.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
index e0958eaed054..2f58fdab8d1e 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -84,11 +80,11 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 	/*
 	 * We got LOOKUP lock, but we really need attrs.
 	 */
-	pmode = it->d.lustre.it_lock_mode;
+	pmode = it->it_lock_mode;
 	if (pmode) {
-		plock.cookie = it->d.lustre.it_lock_handle;
-		it->d.lustre.it_lock_mode = 0;
-		it->d.lustre.it_data = NULL;
+		plock.cookie = it->it_lock_handle;
+		it->it_lock_mode = 0;
+		it->it_request = NULL;
 	}
 
 	LASSERT(fid_is_sane(&body->fid1));
@@ -134,14 +130,14 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 	 * maintain dcache consistency. Thus drop UPDATE|PERM lock here
 	 * and put LOOKUP in request.
 	 */
-	if (it->d.lustre.it_lock_mode != 0) {
-		it->d.lustre.it_remote_lock_handle =
-					it->d.lustre.it_lock_handle;
-		it->d.lustre.it_remote_lock_mode = it->d.lustre.it_lock_mode;
+	if (it->it_lock_mode != 0) {
+		it->it_remote_lock_handle =
+					it->it_lock_handle;
+		it->it_remote_lock_mode = it->it_lock_mode;
 	}
 
-	it->d.lustre.it_lock_handle = plock.cookie;
-	it->d.lustre.it_lock_mode = pmode;
+	it->it_lock_handle = plock.cookie;
+	it->it_lock_mode = pmode;
 
 out_free_op_data:
 	kfree(op_data);
@@ -201,9 +197,9 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
 	 * Nothing is found, do not access body->fid1 as it is zero and thus
 	 * pointless.
 	 */
-	if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) &&
-	    !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) &&
-	    !(it->d.lustre.it_disposition & DISP_OPEN_OPEN))
+	if ((it->it_disposition & DISP_LOOKUP_NEG) &&
+	    !(it->it_disposition & DISP_OPEN_CREATE) &&
+	    !(it->it_disposition & DISP_OPEN_OPEN))
 		return rc;
 
 	body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
index 7007e4c48035..0beafc49b8d2 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h
+++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 9e31f6b03f9e..0e1588a43187 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1683,7 +1679,7 @@ lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
 		   struct lustre_handle *lockh, void *lmm, int lmmsize,
 		   __u64 extra_lock_flags)
 {
-	struct ptlrpc_request      *req = it->d.lustre.it_data;
+	struct ptlrpc_request      *req = it->it_request;
 	struct obd_device	  *obd = exp->exp_obd;
 	struct lmv_obd	     *lmv = &obd->u.lmv;
 	struct lustre_handle	plock;
@@ -1705,11 +1701,11 @@ lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
 	/*
 	 * We got LOOKUP lock, but we really need attrs.
 	 */
-	pmode = it->d.lustre.it_lock_mode;
+	pmode = it->it_lock_mode;
 	LASSERT(pmode != 0);
 	memcpy(&plock, lockh, sizeof(plock));
-	it->d.lustre.it_lock_mode = 0;
-	it->d.lustre.it_data = NULL;
+	it->it_lock_mode = 0;
+	it->it_request = NULL;
 	fid1 = body->fid1;
 
 	ptlrpc_req_finished(req);
@@ -2611,27 +2607,6 @@ static int lmv_clear_open_replay_data(struct obd_export *exp,
 	return md_clear_open_replay_data(tgt->ltd_exp, och);
 }
 
-static int lmv_get_remote_perm(struct obd_export *exp,
-			       const struct lu_fid *fid,
-			       __u32 suppgid, struct ptlrpc_request **request)
-{
-	struct obd_device       *obd = exp->exp_obd;
-	struct lmv_obd	  *lmv = &obd->u.lmv;
-	struct lmv_tgt_desc     *tgt;
-	int		      rc;
-
-	rc = lmv_check_connect(obd);
-	if (rc)
-		return rc;
-
-	tgt = lmv_find_target(lmv, fid);
-	if (IS_ERR(tgt))
-		return PTR_ERR(tgt);
-
-	rc = md_get_remote_perm(tgt->ltd_exp, fid, suppgid, request);
-	return rc;
-}
-
 static int lmv_intent_getattr_async(struct obd_export *exp,
 				    struct md_enqueue_info *minfo,
 				    struct ldlm_enqueue_info *einfo)
@@ -2686,7 +2661,7 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
 	struct lmv_obd      *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 	int		  rc = 0, i;
-	__u64		curspace, curinodes;
+	__u64 curspace = 0, curinodes = 0;
 
 	if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
 	    !lmv->desc.ld_tgt_count) {
@@ -2699,7 +2674,6 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
 		return rc;
 	}
 
-	curspace = curinodes = 0;
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
 		int err;
 
@@ -2796,7 +2770,6 @@ static struct md_ops lmv_md_ops = {
 	.free_lustre_md		= lmv_free_lustre_md,
 	.set_open_replay_data	= lmv_set_open_replay_data,
 	.clear_open_replay_data	= lmv_clear_open_replay_data,
-	.get_remote_perm	= lmv_get_remote_perm,
 	.intent_getattr_async	= lmv_intent_getattr_async,
 	.revalidate_lock	= lmv_revalidate_lock
 };
diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
index b39e364a29ab..c29c361eb0cc 100644
--- a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
+++ b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index ac9744e887ae..9740568d9521 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
index dae8e89bcf6d..b1f260d43bc7 100644
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
index 460f0fa5e6b1..5053dead17bb 100644
--- a/drivers/staging/lustre/lustre/lov/lov_ea.c
+++ b/drivers/staging/lustre/lustre/lov/lov_ea.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
index eef9afac8467..12bd511e8988 100644
--- a/drivers/staging/lustre/lustre/lov/lov_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
index 86cb3f8f9246..84032a510254 100644
--- a/drivers/staging/lustre/lustre/lov/lov_io.c
+++ b/drivers/staging/lustre/lustre/lov/lov_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c
index 1b203d18c6e9..f3a0583f28f5 100644
--- a/drivers/staging/lustre/lustre/lov/lov_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lov_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
index 56ef41d17ad7..b9c90865fdfc 100644
--- a/drivers/staging/lustre/lustre/lov/lov_merge.c
+++ b/drivers/staging/lustre/lustre/lov/lov_merge.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
index e15ef2ece893..9b92d5522edb 100644
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -896,6 +892,12 @@ static int lov_cleanup(struct obd_device *obd)
 		kfree(lov->lov_tgts);
 		lov->lov_tgt_size = 0;
 	}
+
+	if (lov->lov_cache) {
+		cl_cache_decref(lov->lov_cache);
+		lov->lov_cache = NULL;
+	}
+
 	return 0;
 }
 
@@ -1772,7 +1774,8 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
 	fm_start = fiemap->fm_start;
 	fm_length = fiemap->fm_length;
 	/* Calculate start stripe, last stripe and length of mapping */
-	actual_start_stripe = start_stripe = lov_stripe_number(lsm, fm_start);
+	start_stripe = lov_stripe_number(lsm, fm_start);
+	actual_start_stripe = start_stripe;
 	fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size :
 						fm_start + fm_length - 1);
 	/* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */
@@ -2095,11 +2098,9 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
 	u32 count;
 	int i, rc = 0, err;
 	struct lov_tgt_desc *tgt;
-	unsigned incr, check_uuid,
-		 do_inactive, no_set;
-	unsigned next_id = 0,  mds_con = 0;
+	unsigned int incr = 0, check_uuid = 0, do_inactive = 0, no_set = 0;
+	unsigned int next_id = 0, mds_con = 0;
 
-	incr = check_uuid = do_inactive = no_set = 0;
 	if (!set) {
 		no_set = 1;
 		set = ptlrpc_prep_set();
@@ -2126,6 +2127,7 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
 		LASSERT(!lov->lov_cache);
 		lov->lov_cache = val;
 		do_inactive = 1;
+		cl_cache_incref(lov->lov_cache);
 	}
 
 	for (i = 0; i < count; i++, val = (char *)val + incr) {
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index 561d493b2cdf..f9621b0fd469 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -185,8 +181,8 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
 		}
 
 		LU_OBJECT_DEBUG(mask, env, &stripe->co_lu,
-				"stripe %d is already owned.\n", idx);
-		LU_OBJECT_DEBUG(mask, env, old_obj, "owned.\n");
+				"stripe %d is already owned.", idx);
+		LU_OBJECT_DEBUG(mask, env, old_obj, "owned.");
 		LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n");
 		cl_object_put(env, stripe);
 	}
diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c
index 9302f06c34ef..ecca74fbff00 100644
--- a/drivers/staging/lustre/lustre/lov/lov_offset.c
+++ b/drivers/staging/lustre/lustre/lov/lov_offset.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -74,7 +70,7 @@ pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
 {
 	loff_t offset;
 
-	offset = lov_stripe_size(lsm, stripe_index << PAGE_SHIFT, stripe);
+	offset = lov_stripe_size(lsm, (stripe_index << PAGE_SHIFT) + 1, stripe);
 	return offset >> PAGE_SHIFT;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index 0215ea54df8d..869ef41b13ca 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
index 0306f00c3f33..c17026f14896 100644
--- a/drivers/staging/lustre/lustre/lov/lov_page.c
+++ b/drivers/staging/lustre/lustre/lov/lov_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
index 690292ecebdc..4c2d21729589 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pool.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pool.c
@@ -14,12 +14,8 @@
  * in the LICENSE file that accompanied this code).
  *
  * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see [sun.com URL with a
- * copy of GPLv2].
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * version 2 along with this program; If not, see
+ * http://http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
index 1be4b921c01f..4099b51f826e 100644
--- a/drivers/staging/lustre/lustre/lov/lov_request.c
+++ b/drivers/staging/lustre/lustre/lov/lov_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
index 35f6b1d66ff4..b519a1940e1e 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_io.c b/drivers/staging/lustre/lustre/lov/lovsub_io.c
index 783ec687a4e7..6a9820218a3e 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_io.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
index e92edfb618b7..38f9b735c241 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
index bcaae1e5b840..fb2f2660b3e9 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_object.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c
index 9badedcce2bf..b2e68c3e820d 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_page.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lproc_lov.c b/drivers/staging/lustre/lustre/lov/lproc_lov.c
index 0dcb6b6a7782..eb6d30d34e3a 100644
--- a/drivers/staging/lustre/lustre/lov/lproc_lov.c
+++ b/drivers/staging/lustre/lustre/lov/lproc_lov.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
index 5c7a15dd7bd2..98d15fb247bc 100644
--- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
+++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
index c5519aeb0d8a..58f2841cabe4 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h
+++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
index 856c54e03b6b..143bd7628572 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -471,6 +467,18 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
 	rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
 
 	mdc_setattr_pack_rec(rec, op_data);
+	/*
+	 * The client will zero out local timestamps when losing the IBITS lock
+	 * so any new RPC timestamps will update the client inode's timestamps.
+	 * There was a defect on the server side which allowed the atime to be
+	 * overwritten by a zeroed-out atime packed into the close RPC.
+	 *
+	 * Proactively clear the MDS_ATTR_ATIME flag in the RPC in this case
+	 * to avoid zeroing the atime on old unpatched servers.  See LU-8041.
+	 */
+	if (rec->sa_atime == 0)
+		rec->sa_valid &= ~MDS_ATTR_ATIME;
+
 	mdc_ioepoch_pack(epoch, op_data);
 	mdc_hsm_release_pack(req, op_data);
 }
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 3b1bc9111b93..f48b58423307 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -54,61 +50,43 @@ struct mdc_getattr_args {
 	struct ldlm_enqueue_info    *ga_einfo;
 };
 
-int it_disposition(struct lookup_intent *it, int flag)
-{
-	return it->d.lustre.it_disposition & flag;
-}
-EXPORT_SYMBOL(it_disposition);
-
-void it_set_disposition(struct lookup_intent *it, int flag)
-{
-	it->d.lustre.it_disposition |= flag;
-}
-EXPORT_SYMBOL(it_set_disposition);
-
-void it_clear_disposition(struct lookup_intent *it, int flag)
-{
-	it->d.lustre.it_disposition &= ~flag;
-}
-EXPORT_SYMBOL(it_clear_disposition);
-
 int it_open_error(int phase, struct lookup_intent *it)
 {
 	if (it_disposition(it, DISP_OPEN_LEASE)) {
 		if (phase >= DISP_OPEN_LEASE)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
 	if (it_disposition(it, DISP_OPEN_OPEN)) {
 		if (phase >= DISP_OPEN_OPEN)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
 
 	if (it_disposition(it, DISP_OPEN_CREATE)) {
 		if (phase >= DISP_OPEN_CREATE)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
 
 	if (it_disposition(it, DISP_LOOKUP_EXECD)) {
 		if (phase >= DISP_LOOKUP_EXECD)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
 
 	if (it_disposition(it, DISP_IT_EXECD)) {
 		if (phase >= DISP_IT_EXECD)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
-	CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition,
-	       it->d.lustre.it_status);
+	CERROR("it disp: %X, status: %d\n", it->it_disposition,
+	       it->it_status);
 	LBUG();
 	return 0;
 }
@@ -347,10 +325,6 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
 	mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm,
 		      lmmsize);
 
-	/* for remote client, fetch remote perm for current user */
-	if (client_is_remote(exp))
-		req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
-				     sizeof(struct mdt_remote_perm));
 	ptlrpc_request_set_replen(req);
 	return req;
 }
@@ -444,9 +418,7 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
 	struct obd_device     *obddev = class_exp2obd(exp);
 	u64		       valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE |
 				       OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA |
-				       OBD_MD_MEA |
-				       (client_is_remote(exp) ?
-					       OBD_MD_FLRMTPERM : OBD_MD_FLACL);
+				       OBD_MD_MEA | OBD_MD_FLACL;
 	struct ldlm_intent    *lit;
 	int		    rc;
 	int		    easize;
@@ -478,9 +450,6 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
 	mdc_getattr_pack(req, valid, it->it_flags, op_data, easize);
 
 	req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, easize);
-	if (client_is_remote(exp))
-		req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
-				     sizeof(struct mdt_remote_perm));
 	ptlrpc_request_set_replen(req);
 	return req;
 }
@@ -555,7 +524,6 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 	struct req_capsule  *pill = &req->rq_pill;
 	struct ldlm_request *lockreq;
 	struct ldlm_reply   *lockrep;
-	struct lustre_intent_data *intent = &it->d.lustre;
 	struct ldlm_lock    *lock;
 	void		*lvb_data = NULL;
 	int		  lvb_len = 0;
@@ -589,17 +557,17 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 
 	lockrep = req_capsule_server_get(pill, &RMF_DLM_REP);
 
-	intent->it_disposition = (int)lockrep->lock_policy_res1;
-	intent->it_status = (int)lockrep->lock_policy_res2;
-	intent->it_lock_mode = einfo->ei_mode;
-	intent->it_lock_handle = lockh->cookie;
-	intent->it_data = req;
+	it->it_disposition = (int)lockrep->lock_policy_res1;
+	it->it_status = (int)lockrep->lock_policy_res2;
+	it->it_lock_mode = einfo->ei_mode;
+	it->it_lock_handle = lockh->cookie;
+	it->it_request = req;
 
 	/* Technically speaking rq_transno must already be zero if
 	 * it_status is in error, so the check is a bit redundant
 	 */
-	if ((!req->rq_transno || intent->it_status < 0) && req->rq_replay)
-		mdc_clear_replay_flag(req, intent->it_status);
+	if ((!req->rq_transno || it->it_status < 0) && req->rq_replay)
+		mdc_clear_replay_flag(req, it->it_status);
 
 	/* If we're doing an IT_OPEN which did not result in an actual
 	 * successful open, then we need to remove the bit which saves
@@ -610,11 +578,11 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 	 * (bug 3440)
 	 */
 	if (it->it_op & IT_OPEN && req->rq_replay &&
-	    (!it_disposition(it, DISP_OPEN_OPEN) || intent->it_status != 0))
-		mdc_clear_replay_flag(req, intent->it_status);
+	    (!it_disposition(it, DISP_OPEN_OPEN) || it->it_status != 0))
+		mdc_clear_replay_flag(req, it->it_status);
 
 	DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d",
-		  it->it_op, intent->it_disposition, intent->it_status);
+		  it->it_op, it->it_disposition, it->it_status);
 
 	/* We know what to expect, so we do any byte flipping required here */
 	if (it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR)) {
@@ -687,16 +655,6 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 					memcpy(lmm, eadata, body->eadatasize);
 			}
 		}
-
-		if (body->valid & OBD_MD_FLRMTPERM) {
-			struct mdt_remote_perm *perm;
-
-			LASSERT(client_is_remote(exp));
-			perm = req_capsule_server_swab_get(pill, &RMF_ACL,
-						lustre_swab_mdt_remote_perm);
-			if (!perm)
-				return -EPROTO;
-		}
 	} else if (it->it_op & IT_LAYOUT) {
 		/* maybe the lock was granted right away and layout
 		 * is packed into RMF_DLM_LVB of req
@@ -715,7 +673,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 	if (lock && ldlm_has_layout(lock) && lvb_data) {
 		void *lmm;
 
-		LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d\n",
+		LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d",
 			   ldlm_it2str(it->it_op), lvb_len);
 
 		lmm = libcfs_kvzalloc(lvb_len, GFP_NOFS);
@@ -923,9 +881,9 @@ resend:
 		}
 		ptlrpc_req_finished(req);
 
-		it->d.lustre.it_lock_handle = 0;
-		it->d.lustre.it_lock_mode = 0;
-		it->d.lustre.it_data = NULL;
+		it->it_lock_handle = 0;
+		it->it_lock_mode = 0;
+		it->it_request = NULL;
 	}
 
 	return rc;
@@ -949,8 +907,8 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 		/* The server failed before it even started executing the
 		 * intent, i.e. because it couldn't unpack the request.
 		 */
-		LASSERT(it->d.lustre.it_status != 0);
-		return it->d.lustre.it_status;
+		LASSERT(it->it_status != 0);
+		return it->it_status;
 	}
 	rc = it_open_error(DISP_IT_EXECD, it);
 	if (rc)
@@ -1033,15 +991,15 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 				    LDLM_IBITS, &policy, LCK_NL,
 				    &old_lock, 0)) {
 			ldlm_lock_decref_and_cancel(lockh,
-						    it->d.lustre.it_lock_mode);
+						    it->it_lock_mode);
 			memcpy(lockh, &old_lock, sizeof(old_lock));
-			it->d.lustre.it_lock_handle = lockh->cookie;
+			it->it_lock_handle = lockh->cookie;
 		}
 	}
 	CDEBUG(D_DENTRY,
 	       "D_IT dentry %.*s intent: %s status %d disp %x rc %d\n",
 	       op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op),
-	       it->d.lustre.it_status, it->d.lustre.it_disposition, rc);
+	       it->it_status, it->it_disposition, rc);
 	return rc;
 }
 
@@ -1057,8 +1015,8 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
 	ldlm_policy_data_t policy;
 	enum ldlm_mode mode;
 
-	if (it->d.lustre.it_lock_handle) {
-		lockh.cookie = it->d.lustre.it_lock_handle;
+	if (it->it_lock_handle) {
+		lockh.cookie = it->it_lock_handle;
 		mode = ldlm_revalidate_lock_handle(&lockh, bits);
 	} else {
 		fid_build_reg_res_name(fid, &res_id);
@@ -1099,11 +1057,11 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
 	}
 
 	if (mode) {
-		it->d.lustre.it_lock_handle = lockh.cookie;
-		it->d.lustre.it_lock_mode = mode;
+		it->it_lock_handle = lockh.cookie;
+		it->it_lock_mode = mode;
 	} else {
-		it->d.lustre.it_lock_handle = 0;
-		it->d.lustre.it_lock_mode = 0;
+		it->it_lock_handle = 0;
+		it->it_lock_mode = 0;
 	}
 
 	return !!mode;
@@ -1125,15 +1083,15 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
  * ll_create/ll_open gets called.
  *
  * The server will return to us, in it_disposition, an indication of
- * exactly what d.lustre.it_status refers to.
+ * exactly what it_status refers to.
  *
- * If DISP_OPEN_OPEN is set, then d.lustre.it_status refers to the open() call,
+ * If DISP_OPEN_OPEN is set, then it_status refers to the open() call,
  * otherwise if DISP_OPEN_CREATE is set, then it status is the
  * creation failure mode.  In either case, one of DISP_LOOKUP_NEG or
  * DISP_LOOKUP_POS will be set, indicating whether the child lookup
  * was successful.
  *
- * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the
+ * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the
  * child lookup.
  */
 int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
@@ -1166,7 +1124,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 		 * be called in revalidate_it if we already have a lock, let's
 		 * verify that.
 		 */
-		it->d.lustre.it_lock_handle = 0;
+		it->it_lock_handle = 0;
 		rc = mdc_revalidate_lock(exp, it, &op_data->op_fid2, NULL);
 		/* Only return failure if it was not GETATTR by cfid
 		 * (from inode_revalidate)
@@ -1188,7 +1146,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 	if (rc < 0)
 		return rc;
 
-	*reqp = it->d.lustre.it_data;
+	*reqp = it->it_request;
 	rc = mdc_finish_intent_lock(exp, *reqp, op_data, it, &lockh);
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
index 4ef3db147f87..5dba2c813857 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -234,7 +230,7 @@ rebuild:
 						MDS_INODELOCK_UPDATE);
 
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
-				   &RQF_MDS_REINT_CREATE_RMT_ACL);
+				   &RQF_MDS_REINT_CREATE_ACL);
 	if (!req) {
 		ldlm_lock_list_put(&cancels, l_bl_ast, count);
 		return -ENOMEM;
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 86b7445365f4..d4cc73bb6e1e 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -150,16 +146,6 @@ static int mdc_getattr_common(struct obd_export *exp,
 			return -EPROTO;
 	}
 
-	if (body->valid & OBD_MD_FLRMTPERM) {
-		struct mdt_remote_perm *perm;
-
-		LASSERT(client_is_remote(exp));
-		perm = req_capsule_server_swab_get(pill, &RMF_ACL,
-						lustre_swab_mdt_remote_perm);
-		if (!perm)
-			return -EPROTO;
-	}
-
 	return 0;
 }
 
@@ -190,11 +176,6 @@ static int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data,
 
 	req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
 			     op_data->op_mode);
-	if (op_data->op_valid & OBD_MD_FLRMTPERM) {
-		LASSERT(client_is_remote(exp));
-		req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
-				     sizeof(struct mdt_remote_perm));
-	}
 	ptlrpc_request_set_replen(req);
 
 	rc = mdc_getattr_common(exp, req);
@@ -539,16 +520,7 @@ static int mdc_get_lustre_md(struct obd_export *exp,
 	}
 	rc = 0;
 
-	if (md->body->valid & OBD_MD_FLRMTPERM) {
-		/* remote permission */
-		LASSERT(client_is_remote(exp));
-		md->remote_perm = req_capsule_server_swab_get(pill, &RMF_ACL,
-						lustre_swab_mdt_remote_perm);
-		if (!md->remote_perm) {
-			rc = -EPROTO;
-			goto out;
-		}
-	} else if (md->body->valid & OBD_MD_FLACL) {
+	if (md->body->valid & OBD_MD_FLACL) {
 		/* for ACL, it's possible that FLACL is set but aclsize is zero.
 		 * only when aclsize != 0 there's an actual segment for ACL
 		 * in reply buffer.
@@ -665,7 +637,7 @@ int mdc_set_open_replay_data(struct obd_export *exp,
 	struct md_open_data   *mod;
 	struct mdt_rec_create *rec;
 	struct mdt_body       *body;
-	struct ptlrpc_request *open_req = it->d.lustre.it_data;
+	struct ptlrpc_request *open_req = it->it_request;
 	struct obd_import     *imp = open_req->rq_import;
 
 	if (!open_req->rq_replay)
@@ -1168,7 +1140,7 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp,
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0);
+	mdc_pack_body(req, NULL, 0, 0, -1, 0);
 
 	/* Copy hsm_progress struct */
 	req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS);
@@ -1202,7 +1174,7 @@ static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives)
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0);
+	mdc_pack_body(req, NULL, 0, 0, -1, 0);
 
 	/* Copy hsm_progress struct */
 	archive_mask = req_capsule_client_get(&req->rq_pill,
@@ -1241,7 +1213,7 @@ static int mdc_ioc_hsm_current_action(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+	mdc_pack_body(req, &op_data->op_fid1, 0, 0,
 		      op_data->op_suppgids[0], 0);
 
 	ptlrpc_request_set_replen(req);
@@ -1277,7 +1249,7 @@ static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp)
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0);
+	mdc_pack_body(req, NULL, 0, 0, -1, 0);
 
 	ptlrpc_request_set_replen(req);
 
@@ -1306,7 +1278,7 @@ static int mdc_ioc_hsm_state_get(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+	mdc_pack_body(req, &op_data->op_fid1, 0, 0,
 		      op_data->op_suppgids[0], 0);
 
 	ptlrpc_request_set_replen(req);
@@ -1347,7 +1319,7 @@ static int mdc_ioc_hsm_state_set(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+	mdc_pack_body(req, &op_data->op_fid1, 0, 0,
 		      op_data->op_suppgids[0], 0);
 
 	/* Copy states */
@@ -1394,7 +1366,7 @@ static int mdc_ioc_hsm_request(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0);
+	mdc_pack_body(req, NULL, 0, 0, -1, 0);
 
 	/* Copy hsm_request struct */
 	req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST);
@@ -1807,7 +1779,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 	case IOC_OBD_STATFS: {
 		struct obd_statfs stat_buf = {0};
 
-		if (*((__u32 *) data->ioc_inlbuf2) != 0) {
+		if (*((__u32 *)data->ioc_inlbuf2) != 0) {
 			rc = -ENODEV;
 			goto out;
 		}
@@ -2001,7 +1973,7 @@ static int mdc_hsm_copytool_send(int len, void *val)
 
 	if (len < sizeof(*lh) + sizeof(*hal)) {
 		CERROR("Short HSM message %d < %d\n", len,
-		       (int) (sizeof(*lh) + sizeof(*hal)));
+		       (int)(sizeof(*lh) + sizeof(*hal)));
 		return -EPROTO;
 	}
 	if (lh->kuc_magic == __swab16(KUC_MAGIC)) {
@@ -2432,41 +2404,6 @@ static int mdc_process_config(struct obd_device *obd, u32 len, void *buf)
 	return rc;
 }
 
-/* get remote permission for current user on fid */
-static int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
-			       __u32 suppgid, struct ptlrpc_request **request)
-{
-	struct ptlrpc_request  *req;
-	int		    rc;
-
-	LASSERT(client_is_remote(exp));
-
-	*request = NULL;
-	req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR);
-	if (!req)
-		return -ENOMEM;
-
-	rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR);
-	if (rc) {
-		ptlrpc_request_free(req);
-		return rc;
-	}
-
-	mdc_pack_body(req, fid, OBD_MD_FLRMTPERM, 0, suppgid, 0);
-
-	req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
-			     sizeof(struct mdt_remote_perm));
-
-	ptlrpc_request_set_replen(req);
-
-	rc = ptlrpc_queue_wait(req);
-	if (rc)
-		ptlrpc_req_finished(req);
-	else
-		*request = req;
-	return rc;
-}
-
 static struct obd_ops mdc_obd_ops = {
 	.owner          = THIS_MODULE,
 	.setup          = mdc_setup,
@@ -2518,7 +2455,6 @@ static struct md_ops mdc_md_ops = {
 	.free_lustre_md		= mdc_free_lustre_md,
 	.set_open_replay_data	= mdc_set_open_replay_data,
 	.clear_open_replay_data	= mdc_clear_open_replay_data,
-	.get_remote_perm	= mdc_get_remote_perm,
 	.intent_getattr_async	= mdc_intent_getattr_async,
 	.revalidate_lock	= mdc_revalidate_lock
 };
diff --git a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
index 8d5bc5a751a4..0735220b2a18 100644
--- a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
+++ b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_internal.h b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
index 82fb8f46e037..f146f7521c92 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_internal.h
+++ b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 2311a437c441..9d0bd4745865 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -500,7 +496,9 @@ static void do_requeue(struct config_llog_data *cld)
 	 * export which is being disconnected. Take the client
 	 * semaphore to make the check non-racy.
 	 */
-	down_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem);
+	down_read_nested(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem,
+			 OBD_CLI_SEM_MGC);
+
 	if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) {
 		int rc;
 
@@ -1034,7 +1032,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 		rc = sptlrpc_parse_flavor(val, &flvr);
 		if (rc) {
 			CERROR("invalid sptlrpc flavor %s to MGS\n",
-			       (char *) val);
+			       (char *)val);
 			return rc;
 		}
 
@@ -1050,7 +1048,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 			sptlrpc_flavor2name(&cli->cl_flvr_mgc,
 					    str, sizeof(str));
 			LCONSOLE_ERROR("asking sptlrpc flavor %s to MGS but currently %s is in use\n",
-				       (char *) val, str);
+				       (char *)val, str);
 			rc = -EPERM;
 		}
 		return rc;
diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile
index c404eb3864ff..df7e47f35a66 100644
--- a/drivers/staging/lustre/lustre/obdclass/Makefile
+++ b/drivers/staging/lustre/lustre/obdclass/Makefile
@@ -5,5 +5,4 @@ obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \
 	      genops.o uuid.o lprocfs_status.o lprocfs_counters.o \
 	      lustre_handles.o lustre_peer.o statfs_pack.o \
 	      obdo.o obd_config.o obd_mount.o lu_object.o lu_ref.o \
-	      cl_object.o cl_page.o cl_lock.o cl_io.o \
-	      acl.o kernelcomm.o
+	      cl_object.o cl_page.o cl_lock.o cl_io.o kernelcomm.o
diff --git a/drivers/staging/lustre/lustre/obdclass/acl.c b/drivers/staging/lustre/lustre/obdclass/acl.c
deleted file mode 100644
index 0e02ae97b7ed..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/acl.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/acl.c
- *
- * Lustre Access Control List.
- *
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-#include "../include/lu_object.h"
-#include "../include/lustre_acl.h"
-#include "../include/lustre_eacl.h"
-#include "../include/obd_support.h"
-
-#ifdef CONFIG_FS_POSIX_ACL
-
-#define CFS_ACL_XATTR_VERSION POSIX_ACL_XATTR_VERSION
-
-enum {
-	ES_UNK  = 0,    /* unknown stat */
-	ES_UNC  = 1,    /* ACL entry is not changed */
-	ES_MOD  = 2,    /* ACL entry is modified */
-	ES_ADD  = 3,    /* ACL entry is added */
-	ES_DEL  = 4     /* ACL entry is deleted */
-};
-
-static inline void lustre_ext_acl_le_to_cpu(ext_acl_xattr_entry *d,
-					    ext_acl_xattr_entry *s)
-{
-	d->e_tag	= le16_to_cpu(s->e_tag);
-	d->e_perm       = le16_to_cpu(s->e_perm);
-	d->e_id	 = le32_to_cpu(s->e_id);
-	d->e_stat       = le32_to_cpu(s->e_stat);
-}
-
-static inline void lustre_ext_acl_cpu_to_le(ext_acl_xattr_entry *d,
-					    ext_acl_xattr_entry *s)
-{
-	d->e_tag	= cpu_to_le16(s->e_tag);
-	d->e_perm       = cpu_to_le16(s->e_perm);
-	d->e_id	 = cpu_to_le32(s->e_id);
-	d->e_stat       = cpu_to_le32(s->e_stat);
-}
-
-static inline void lustre_posix_acl_le_to_cpu(posix_acl_xattr_entry *d,
-					      posix_acl_xattr_entry *s)
-{
-	d->e_tag	= le16_to_cpu(s->e_tag);
-	d->e_perm       = le16_to_cpu(s->e_perm);
-	d->e_id	 = le32_to_cpu(s->e_id);
-}
-
-static inline void lustre_posix_acl_cpu_to_le(posix_acl_xattr_entry *d,
-					      posix_acl_xattr_entry *s)
-{
-	d->e_tag	= cpu_to_le16(s->e_tag);
-	d->e_perm       = cpu_to_le16(s->e_perm);
-	d->e_id	 = cpu_to_le32(s->e_id);
-}
-
-/* if "new_count == 0", then "new = {a_version, NULL}", NOT NULL. */
-static int lustre_posix_acl_xattr_reduce_space(posix_acl_xattr_header **header,
-					       int old_count, int new_count)
-{
-	int old_size = CFS_ACL_XATTR_SIZE(old_count, posix_acl_xattr);
-	int new_size = CFS_ACL_XATTR_SIZE(new_count, posix_acl_xattr);
-	posix_acl_xattr_header *new;
-
-	if (unlikely(old_count <= new_count))
-		return old_size;
-
-	new = kmemdup(*header, new_size, GFP_NOFS);
-	if (unlikely(!new))
-		return -ENOMEM;
-
-	kfree(*header);
-	*header = new;
-	return new_size;
-}
-
-/* if "new_count == 0", then "new = {0, NULL}", NOT NULL. */
-static int lustre_ext_acl_xattr_reduce_space(ext_acl_xattr_header **header,
-					     int old_count)
-{
-	int ext_count = le32_to_cpu((*header)->a_count);
-	int ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr);
-	ext_acl_xattr_header *new;
-
-	if (unlikely(old_count <= ext_count))
-		return 0;
-
-	new = kmemdup(*header, ext_size, GFP_NOFS);
-	if (unlikely(!new))
-		return -ENOMEM;
-
-	kfree(*header);
-	*header = new;
-	return 0;
-}
-
-/*
- * Generate new extended ACL based on the posix ACL.
- */
-ext_acl_xattr_header *
-lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size)
-{
-	int count, i, esize;
-	ext_acl_xattr_header *new;
-
-	if (unlikely(size < 0))
-		return ERR_PTR(-EINVAL);
-	else if (!size)
-		count = 0;
-	else
-		count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
-	esize = CFS_ACL_XATTR_SIZE(count, ext_acl_xattr);
-	new = kzalloc(esize, GFP_NOFS);
-	if (unlikely(!new))
-		return ERR_PTR(-ENOMEM);
-
-	new->a_count = cpu_to_le32(count);
-	for (i = 0; i < count; i++) {
-		new->a_entries[i].e_tag  = header->a_entries[i].e_tag;
-		new->a_entries[i].e_perm = header->a_entries[i].e_perm;
-		new->a_entries[i].e_id   = header->a_entries[i].e_id;
-		new->a_entries[i].e_stat = cpu_to_le32(ES_UNK);
-	}
-
-	return new;
-}
-EXPORT_SYMBOL(lustre_posix_acl_xattr_2ext);
-
-/*
- * Filter out the "nobody" entries in the posix ACL.
- */
-int lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, size_t size,
-				  posix_acl_xattr_header **out)
-{
-	int count, i, j, rc = 0;
-	__u32 id;
-	posix_acl_xattr_header *new;
-
-	if (!size)
-		return 0;
-	if (size < sizeof(*new))
-		return -EINVAL;
-
-	new = kzalloc(size, GFP_NOFS);
-	if (unlikely(!new))
-		return -ENOMEM;
-
-	new->a_version = cpu_to_le32(CFS_ACL_XATTR_VERSION);
-	count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
-	for (i = 0, j = 0; i < count; i++) {
-		id = le32_to_cpu(header->a_entries[i].e_id);
-		switch (le16_to_cpu(header->a_entries[i].e_tag)) {
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			if (id != ACL_UNDEFINED_ID) {
-				rc = -EIO;
-				goto _out;
-			}
-
-			memcpy(&new->a_entries[j++], &header->a_entries[i],
-			       sizeof(posix_acl_xattr_entry));
-			break;
-		case ACL_USER:
-			if (id != NOBODY_UID)
-				memcpy(&new->a_entries[j++],
-				       &header->a_entries[i],
-				       sizeof(posix_acl_xattr_entry));
-			break;
-		case ACL_GROUP:
-			if (id != NOBODY_GID)
-				memcpy(&new->a_entries[j++],
-				       &header->a_entries[i],
-				       sizeof(posix_acl_xattr_entry));
-			break;
-		default:
-			rc = -EIO;
-			goto _out;
-		}
-	}
-
-	/* free unused space. */
-	rc = lustre_posix_acl_xattr_reduce_space(&new, count, j);
-	if (rc >= 0) {
-		size = rc;
-		*out = new;
-		rc = 0;
-	}
-
-_out:
-	if (rc) {
-		kfree(new);
-		size = rc;
-	}
-	return size;
-}
-EXPORT_SYMBOL(lustre_posix_acl_xattr_filter);
-
-/*
- * Release the extended ACL space.
- */
-void lustre_ext_acl_xattr_free(ext_acl_xattr_header *header)
-{
-	kfree(header);
-}
-EXPORT_SYMBOL(lustre_ext_acl_xattr_free);
-
-static ext_acl_xattr_entry *
-lustre_ext_acl_xattr_search(ext_acl_xattr_header *header,
-			    posix_acl_xattr_entry *entry, int *pos)
-{
-	int once, start, end, i, j, count = le32_to_cpu(header->a_count);
-
-	once = 0;
-	start = *pos;
-	end = count;
-
-again:
-	for (i = start; i < end; i++) {
-		if (header->a_entries[i].e_tag == entry->e_tag &&
-		    header->a_entries[i].e_id == entry->e_id) {
-			j = i;
-			if (++i >= count)
-				i = 0;
-			*pos = i;
-			return &header->a_entries[j];
-		}
-	}
-
-	if (!once) {
-		once = 1;
-		start = 0;
-		end = *pos;
-		goto again;
-	}
-
-	return NULL;
-}
-
-/*
- * Merge the posix ACL and the extended ACL into new extended ACL.
- */
-ext_acl_xattr_header *
-lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
-			   ext_acl_xattr_header *ext_header)
-{
-	int ori_ext_count, posix_count, ext_count, ext_size;
-	int i, j, pos = 0, rc = 0;
-	posix_acl_xattr_entry pae;
-	ext_acl_xattr_header *new;
-	ext_acl_xattr_entry *ee, eae;
-
-	if (unlikely(size < 0))
-		return ERR_PTR(-EINVAL);
-	else if (!size)
-		posix_count = 0;
-	else
-		posix_count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
-	ori_ext_count = le32_to_cpu(ext_header->a_count);
-	ext_count = posix_count + ori_ext_count;
-	ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr);
-
-	new = kzalloc(ext_size, GFP_NOFS);
-	if (unlikely(!new))
-		return ERR_PTR(-ENOMEM);
-
-	for (i = 0, j = 0; i < posix_count; i++) {
-		lustre_posix_acl_le_to_cpu(&pae, &posix_header->a_entries[i]);
-		switch (pae.e_tag) {
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			if (pae.e_id != ACL_UNDEFINED_ID) {
-				rc = -EIO;
-				goto out;
-		}
-		case ACL_USER:
-			/* ignore "nobody" entry. */
-			if (pae.e_id == NOBODY_UID)
-				break;
-
-			new->a_entries[j].e_tag =
-					posix_header->a_entries[i].e_tag;
-			new->a_entries[j].e_perm =
-					posix_header->a_entries[i].e_perm;
-			new->a_entries[j].e_id =
-					posix_header->a_entries[i].e_id;
-			ee = lustre_ext_acl_xattr_search(ext_header,
-					&posix_header->a_entries[i], &pos);
-			if (ee) {
-				if (posix_header->a_entries[i].e_perm !=
-								ee->e_perm)
-					/* entry modified. */
-					ee->e_stat =
-					new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_MOD);
-				else
-					/* entry unchanged. */
-					ee->e_stat =
-					new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_UNC);
-			} else {
-				/* new entry. */
-				new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_ADD);
-			}
-			break;
-		case ACL_GROUP:
-			/* ignore "nobody" entry. */
-			if (pae.e_id == NOBODY_GID)
-				break;
-			new->a_entries[j].e_tag =
-					posix_header->a_entries[i].e_tag;
-			new->a_entries[j].e_perm =
-					posix_header->a_entries[i].e_perm;
-			new->a_entries[j].e_id =
-					posix_header->a_entries[i].e_id;
-			ee = lustre_ext_acl_xattr_search(ext_header,
-					&posix_header->a_entries[i], &pos);
-			if (ee) {
-				if (posix_header->a_entries[i].e_perm !=
-								ee->e_perm)
-					/* entry modified. */
-					ee->e_stat =
-					new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_MOD);
-				else
-					/* entry unchanged. */
-					ee->e_stat =
-					new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_UNC);
-			} else {
-				/* new entry. */
-				new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_ADD);
-			}
-			break;
-		default:
-			rc = -EIO;
-			goto out;
-		}
-	}
-
-	/* process deleted entries. */
-	for (i = 0; i < ori_ext_count; i++) {
-		lustre_ext_acl_le_to_cpu(&eae, &ext_header->a_entries[i]);
-		if (eae.e_stat == ES_UNK) {
-			/* ignore "nobody" entry. */
-			if ((eae.e_tag == ACL_USER && eae.e_id == NOBODY_UID) ||
-			    (eae.e_tag == ACL_GROUP && eae.e_id == NOBODY_GID))
-				continue;
-
-			new->a_entries[j].e_tag =
-						ext_header->a_entries[i].e_tag;
-			new->a_entries[j].e_perm =
-						ext_header->a_entries[i].e_perm;
-			new->a_entries[j].e_id = ext_header->a_entries[i].e_id;
-			new->a_entries[j++].e_stat = cpu_to_le32(ES_DEL);
-		}
-	}
-
-	new->a_count = cpu_to_le32(j);
-	/* free unused space. */
-	rc = lustre_ext_acl_xattr_reduce_space(&new, ext_count);
-
-out:
-	if (rc) {
-		kfree(new);
-		new = ERR_PTR(rc);
-	}
-	return new;
-}
-EXPORT_SYMBOL(lustre_acl_xattr_merge2ext);
-
-#endif
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
index 7eb0ad7b3644..e866754a42d5 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
index 583fb5f33889..e72f1fc00a13 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
index 26a576b63a72..9d7b5939b0fd 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
index 5940f30318ec..91a5806d0239 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -577,7 +573,7 @@ static inline struct cl_env *cl_env_fetch(void)
 {
 	struct cl_env *cle;
 
-	cle = cfs_hash_lookup(cl_env_hash, (void *) (long) current->pid);
+	cle = cfs_hash_lookup(cl_env_hash, (void *)(long)current->pid);
 	LASSERT(ergo(cle, cle->ce_magic == &cl_env_init0));
 	return cle;
 }
@@ -588,7 +584,7 @@ static inline void cl_env_attach(struct cl_env *cle)
 		int rc;
 
 		LASSERT(!cle->ce_owner);
-		cle->ce_owner = (void *) (long) current->pid;
+		cle->ce_owner = (void *)(long)current->pid;
 		rc = cfs_hash_add_unique(cl_env_hash, cle->ce_owner,
 					 &cle->ce_node);
 		LASSERT(rc == 0);
@@ -599,7 +595,7 @@ static inline void cl_env_do_detach(struct cl_env *cle)
 {
 	void *cookie;
 
-	LASSERT(cle->ce_owner == (void *) (long) current->pid);
+	LASSERT(cle->ce_owner == (void *)(long)current->pid);
 	cookie = cfs_hash_del(cl_env_hash, cle->ce_owner,
 			      &cle->ce_node);
 	LASSERT(cookie == cle);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index b754f516e557..db2dc6b39073 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1076,3 +1072,49 @@ void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
 	slice->cpl_page = page;
 }
 EXPORT_SYMBOL(cl_page_slice_add);
+
+/**
+ * Allocate and initialize cl_cache, called by ll_init_sbi().
+ */
+struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
+{
+	struct cl_client_cache	*cache = NULL;
+
+	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+	if (!cache)
+		return NULL;
+
+	/* Initialize cache data */
+	atomic_set(&cache->ccc_users, 1);
+	cache->ccc_lru_max = lru_page_max;
+	atomic_set(&cache->ccc_lru_left, lru_page_max);
+	spin_lock_init(&cache->ccc_lru_lock);
+	INIT_LIST_HEAD(&cache->ccc_lru);
+
+	atomic_set(&cache->ccc_unstable_nr, 0);
+	init_waitqueue_head(&cache->ccc_unstable_waitq);
+
+	return cache;
+}
+EXPORT_SYMBOL(cl_cache_init);
+
+/**
+ * Increase cl_cache refcount
+ */
+void cl_cache_incref(struct cl_client_cache *cache)
+{
+	atomic_inc(&cache->ccc_users);
+}
+EXPORT_SYMBOL(cl_cache_incref);
+
+/**
+ * Decrease cl_cache refcount and free the cache if refcount=0.
+ * Since llite, lov and osc all hold cl_cache refcount,
+ * the free will not cause race. (LU-6173)
+ */
+void cl_cache_decref(struct cl_client_cache *cache)
+{
+	if (atomic_dec_and_test(&cache->ccc_users))
+		kfree(cache);
+}
+EXPORT_SYMBOL(cl_cache_decref);
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index f48816af8be7..d9d2a1952b8b 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
index e4edfb2c0a20..8acf67239fa8 100644
--- a/drivers/staging/lustre/lustre/obdclass/debug.c
+++ b/drivers/staging/lustre/lustre/obdclass/debug.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index d95f11d62a32..99c2da632b51 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
index 8405eccdac19..a0f65c470f4d 100644
--- a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
+++ b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
index 2cd4522462d9..33342bfcc90e 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
index b41b65e2f021..c6cc6a7666e3 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
index e6bf414a4444..8f70dd2686f9 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
index 79194d8cb587..1784ca063428 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -123,8 +119,10 @@ static int llog_read_header(const struct lu_env *env,
 		handle->lgh_last_idx = 0; /* header is record with index 0 */
 		llh->llh_count = 1;	 /* for the header record */
 		llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
-		llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
-		llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
+		llh->llh_hdr.lrh_len = LLOG_CHUNK_SIZE;
+		llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
+		llh->llh_hdr.lrh_index = 0;
+		llh->llh_tail.lrt_index = 0;
 		llh->llh_timestamp = ktime_get_real_seconds();
 		if (uuid)
 			memcpy(&llh->llh_tgtuuid, uuid,
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
index c27d4ec1df9e..a82a2950295a 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_cat.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
index 7fb48dda355e..f7949525d952 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
index 826623f528da..6ace7e097859 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
index 967ba2e1bfcb..f7b9b190350c 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index 5a1eae1de2ec..279b625f1afe 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index e04385760f21..9b03059f34d6 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_ref.c b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
index 993697b660f6..e9f6040d19eb 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_ref.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
index 403ceea06186..082f530c527c 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
index b1abe023bb35..5974a9bf77c0 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
index cb1d65c3d95d..0eab1236501b 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1021,8 +1017,8 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 		/* Search proc entries */
 		while (lvars[j].name) {
 			var = &lvars[j];
-			if (!class_match_param(key, var->name, NULL)
-			    && keylen == strlen(var->name)) {
+			if (!class_match_param(key, var->name, NULL) &&
+			    keylen == strlen(var->name)) {
 				matched++;
 				rc = -EROFS;
 				if (var->fops && var->fops->write) {
@@ -1077,7 +1073,7 @@ int class_config_llog_handler(const struct lu_env *env,
 {
 	struct config_llog_instance *clli = data;
 	int cfg_len = rec->lrh_len;
-	char *cfg_buf = (char *) (rec + 1);
+	char *cfg_buf = (char *)(rec + 1);
 	int rc = 0;
 
 	switch (rec->lrh_type) {
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
index e0c90adc72a7..aa84a50e9904 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -192,7 +188,7 @@ static int lustre_start_simple(char *obdname, char *type, char *uuid,
 	return rc;
 }
 
-DEFINE_MUTEX(mgc_start_lock);
+static DEFINE_MUTEX(mgc_start_lock);
 
 /** Set up a mgc obd to process startup logs
  *
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
index 748e33f017d5..8583a4a8c206 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
index fb4e3ae845e0..4bad1fa27d40 100644
--- a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
+++ b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/uuid.c b/drivers/staging/lustre/lustre/obdclass/uuid.c
index b0b0157a6334..abd9b1ae72cd 100644
--- a/drivers/staging/lustre/lustre/obdclass/uuid.c
+++ b/drivers/staging/lustre/lustre/obdclass/uuid.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 91ef06f17934..5b29c4a44fe5 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index 33a113213bf5..7e83d395b998 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 5a14bea961b4..d1a7d6beee60 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -127,9 +123,9 @@ static const char *oes_strings[] = {
 		/* ----- part 4 ----- */				      \
 		## __VA_ARGS__);					      \
 	if (lvl == D_ERROR && __ext->oe_dlmlock)			      \
-		LDLM_ERROR(__ext->oe_dlmlock, "extent: %p\n", __ext);	      \
+		LDLM_ERROR(__ext->oe_dlmlock, "extent: %p", __ext);	      \
 	else								      \
-		LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p\n", __ext);	      \
+		LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p", __ext);	      \
 } while (0)
 
 #undef EASSERTF
@@ -2371,7 +2367,7 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
 	oap->oap_obj_off = offset;
 	LASSERT(!(offset & ~PAGE_MASK));
 
-	if (!client_is_remote(exp) && capable(CFS_CAP_SYS_RESOURCE))
+	if (capable(CFS_CAP_SYS_RESOURCE))
 		oap->oap_brw_flags = OBD_BRW_NOQUOTA;
 
 	INIT_LIST_HEAD(&oap->oap_pending_item);
@@ -2410,8 +2406,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 
 	/* Set the OBD_BRW_SRVLOCK before the page is queued. */
 	brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0;
-	if (!client_is_remote(osc_export(osc)) &&
-	    capable(CFS_CAP_SYS_RESOURCE)) {
+	if (capable(CFS_CAP_SYS_RESOURCE)) {
 		brw_flags |= OBD_BRW_NOQUOTA;
 		cmd |= OBD_BRW_NOQUOTA;
 	}
@@ -2773,7 +2768,8 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
 	ext->oe_sync = 1;
 	ext->oe_urgent = 1;
 	ext->oe_start = start;
-	ext->oe_end = ext->oe_max_end = end;
+	ext->oe_end = end;
+	ext->oe_max_end = end;
 	ext->oe_obj = obj;
 	ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK);
 	ext->oe_nr_pages = page_count;
@@ -3308,7 +3304,8 @@ int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
 		goto out;
 
 	cb = mode == CLM_READ ? check_and_discard_cb : discard_cb;
-	info->oti_fn_index = info->oti_next_index = start;
+	info->oti_fn_index = start;
+	info->oti_next_index = start;
 	do {
 		res = osc_page_gang_lookup(env, io, osc,
 					   info->oti_next_index, end, cb, osc);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
index ae19d396b537..c8c3f1ca77be 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -66,7 +62,7 @@ struct osc_io {
 	/** super class */
 	struct cl_io_slice oi_cl;
 	/** true if this io is lockless. */
-	int		oi_lockless;
+	unsigned int		oi_lockless;
 	/** how many LRU pages are reserved for this IO */
 	int oi_lru_reserved;
 
@@ -355,11 +351,6 @@ struct osc_page {
 	 * Boolean, true iff page is under transfer. Used for sanity checking.
 	 */
 	unsigned	      ops_transfer_pinned:1,
-	/**
-	 * True for a `temporary page' created by read-ahead code, probably
-	 * outside of any DLM lock.
-	 */
-			      ops_temp:1,
 	/**
 	 * in LRU?
 	 */
diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c
index d4fe507f165f..83d30c135ba4 100644
--- a/drivers/staging/lustre/lustre/osc/osc_dev.c
+++ b/drivers/staging/lustre/lustre/osc/osc_dev.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index 7fad8278150f..7a27f0961955 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
index d534b0e0edf6..6e3dcd38913f 100644
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -221,7 +217,8 @@ static void osc_page_touch_at(const struct lu_env *env,
 	       kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms,
 	       loi->loi_lvb.lvb_size);
 
-	attr->cat_mtime = attr->cat_ctime = LTIME_S(CURRENT_TIME);
+	attr->cat_ctime = LTIME_S(CURRENT_TIME);
+	attr->cat_mtime = attr->cat_ctime;
 	valid = CAT_MTIME | CAT_CTIME;
 	if (kms > loi->loi_kms) {
 		attr->cat_kms = kms;
@@ -458,7 +455,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
 			unsigned int cl_valid = 0;
 
 			if (ia_valid & ATTR_SIZE) {
-				attr->cat_size = attr->cat_kms = size;
+				attr->cat_size = size;
+				attr->cat_kms = size;
 				cl_valid = CAT_SIZE | CAT_KMS;
 			}
 			if (ia_valid & ATTR_MTIME_SET) {
@@ -526,7 +524,8 @@ static void osc_io_setattr_end(const struct lu_env *env,
 
 	if (cbargs->opc_rpc_sent) {
 		wait_for_completion(&cbargs->opc_sync);
-		result = io->ci_result = cbargs->opc_rc;
+		result = cbargs->opc_rc;
+		io->ci_result = cbargs->opc_rc;
 	}
 	if (result == 0) {
 		if (oio->oi_lockless) {
@@ -575,7 +574,8 @@ static int osc_io_write_start(const struct lu_env *env,
 
 	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1);
 	cl_object_attr_lock(obj);
-	attr->cat_mtime = attr->cat_ctime = ktime_get_real_seconds();
+	attr->cat_ctime = ktime_get_real_seconds();
+	attr->cat_mtime = attr->cat_ctime;
 	rc = cl_object_attr_set(env, obj, attr, CAT_MTIME | CAT_CTIME);
 	cl_object_attr_unlock(obj);
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
index 16f9cd9d3b12..717d3ffb6789 100644
--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -638,11 +634,10 @@ static int weigh_cb(const struct lu_env *env, struct cl_io *io,
 
 	if (cl_page_is_vmlocked(env, page) ||
 	    PageDirty(page->cp_vmpage) || PageWriteback(page->cp_vmpage)
-	   ) {
-		(*(unsigned long *)cbdata)++;
+	   )
 		return CLP_GANG_ABORT;
-	}
 
+	*(pgoff_t *)cbdata = osc_index(ops) + 1;
 	return CLP_GANG_OKAY;
 }
 
@@ -652,7 +647,7 @@ static unsigned long osc_lock_weight(const struct lu_env *env,
 {
 	struct cl_io *io = &osc_env_info(env)->oti_io;
 	struct cl_object *obj = cl_object_top(&oscobj->oo_cl);
-	unsigned long npages = 0;
+	pgoff_t page_index;
 	int result;
 
 	io->ci_obj = obj;
@@ -661,11 +656,12 @@ static unsigned long osc_lock_weight(const struct lu_env *env,
 	if (result != 0)
 		return result;
 
+	page_index = cl_index(obj, extent->start);
 	do {
 		result = osc_page_gang_lookup(env, io, oscobj,
-					      cl_index(obj, extent->start),
+					      page_index,
 					      cl_index(obj, extent->end),
-					      weigh_cb, (void *)&npages);
+					      weigh_cb, (void *)&page_index);
 		if (result == CLP_GANG_ABORT)
 			break;
 		if (result == CLP_GANG_RESCHED)
@@ -673,7 +669,7 @@ static unsigned long osc_lock_weight(const struct lu_env *env,
 	} while (result != CLP_GANG_OKAY);
 	cl_io_fini(env, io);
 
-	return npages;
+	return result == CLP_GANG_ABORT ? 1 : 0;
 }
 
 /**
@@ -703,7 +699,7 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
 
 	LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
 	obj = dlmlock->l_ast_data;
-	if (obj) {
+	if (!obj) {
 		weight = 1;
 		goto out;
 	}
@@ -1120,7 +1116,8 @@ static void osc_lock_set_writer(const struct lu_env *env,
 		}
 	} else {
 		LASSERT(cl_io_is_mkwrite(io));
-		io_start = io_end = io->u.ci_fault.ft_index;
+		io_start = io->u.ci_fault.ft_index;
+		io_end = io->u.ci_fault.ft_index;
 	}
 
 	if (descr->cld_mode >= CLM_WRITE &&
@@ -1171,7 +1168,7 @@ int osc_lock_init(const struct lu_env *env,
 		osc_lock_set_writer(env, io, obj, oscl);
 
 
-	LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx\n",
+	LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx",
 			  lock, oscl, oscl->ols_flags);
 
 	return 0;
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
index 738ab10ab274..d211d1905e83 100644
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index c29c2eabe39c..355f496a2093 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -52,13 +48,6 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
  *  @{
  */
 
-static int osc_page_protected(const struct lu_env *env,
-			      const struct osc_page *opg,
-			      enum cl_lock_mode mode, int unref)
-{
-	return 1;
-}
-
 /*****************************************************************************
  *
  * Page operations.
@@ -110,8 +99,6 @@ int osc_page_cache_add(const struct lu_env *env,
 	struct osc_page *opg = cl2osc_page(slice);
 	int result;
 
-	LINVRNT(osc_page_protected(env, opg, CLM_WRITE, 0));
-
 	osc_page_transfer_get(opg, "transfer\0cache");
 	result = osc_queue_async_io(env, io, opg);
 	if (result != 0)
@@ -214,8 +201,6 @@ static void osc_page_delete(const struct lu_env *env,
 	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
 	int rc;
 
-	LINVRNT(opg->ops_temp || osc_page_protected(env, opg, CLM_READ, 1));
-
 	CDEBUG(D_TRACE, "%p\n", opg);
 	osc_page_transfer_put(env, opg);
 	rc = osc_teardown_async_page(env, obj, opg);
@@ -254,8 +239,6 @@ static void osc_page_clip(const struct lu_env *env,
 	struct osc_page *opg = cl2osc_page(slice);
 	struct osc_async_page *oap = &opg->ops_oap;
 
-	LINVRNT(osc_page_protected(env, opg, CLM_READ, 0));
-
 	opg->ops_from = from;
 	opg->ops_to = to;
 	spin_lock(&oap->oap_lock);
@@ -269,8 +252,6 @@ static int osc_page_cancel(const struct lu_env *env,
 	struct osc_page *opg = cl2osc_page(slice);
 	int rc = 0;
 
-	LINVRNT(osc_page_protected(env, opg, CLM_READ, 0));
-
 	/* Check if the transferring against this page
 	 * is completed, or not even queued.
 	 */
@@ -320,10 +301,6 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
 		cl_page_slice_add(page, &opg->ops_cl, obj, index,
 				  &osc_page_ops);
 	}
-	/*
-	 * Cannot assert osc_page_protected() here as read-ahead
-	 * creates temporary pages outside of a lock.
-	 */
 	/* ops_inflight and ops_lru are the same field, but it doesn't
 	 * hurt to initialize it twice :-)
 	 */
@@ -380,10 +357,6 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
 		     enum cl_req_type crt, int brw_flags)
 {
 	struct osc_async_page *oap = &opg->ops_oap;
-	struct osc_object *obj = oap->oap_obj;
-
-	LINVRNT(osc_page_protected(env, opg,
-				   crt == CRT_WRITE ? CLM_WRITE : CLM_READ, 1));
 
 	LASSERTF(oap->oap_magic == OAP_MAGIC, "Bad oap magic: oap %p, magic 0x%x\n",
 		 oap, oap->oap_magic);
@@ -398,8 +371,7 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
 	if (osc_over_unstable_soft_limit(oap->oap_cli))
 		oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
 
-	if (!client_is_remote(osc_export(obj)) &&
-	    capable(CFS_CAP_SYS_RESOURCE)) {
+	if (capable(CFS_CAP_SYS_RESOURCE)) {
 		oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
 		oap->oap_cmd |= OBD_BRW_NOQUOTA;
 	}
@@ -440,7 +412,7 @@ static int osc_cache_too_much(struct client_obd *cli)
 	int pages = atomic_read(&cli->cl_lru_in_list);
 	unsigned long budget;
 
-	budget = cache->ccc_lru_max / atomic_read(&cache->ccc_users);
+	budget = cache->ccc_lru_max / (atomic_read(&cache->ccc_users) - 2);
 
 	/* if it's going to run out LRU slots, we should free some, but not
 	 * too much to maintain fairness among OSCs.
@@ -740,7 +712,7 @@ int osc_lru_reclaim(struct client_obd *cli)
 	cache->ccc_lru_shrinkers++;
 	list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
 
-	max_scans = atomic_read(&cache->ccc_users);
+	max_scans = atomic_read(&cache->ccc_users) - 2;
 	while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) {
 		cli = list_entry(cache->ccc_lru.next, struct client_obd,
 				 cl_lru_osc);
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 47417f88fe3c..536b868ff776 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -474,7 +470,8 @@ static int osc_real_create(struct obd_export *exp, struct obdo *oa,
 		DEBUG_REQ(D_HA, req,
 			  "delorphan from OST integration");
 		/* Don't resend the delorphan req */
-		req->rq_no_resend = req->rq_no_delay = 1;
+		req->rq_no_resend = 1;
+		req->rq_no_delay = 1;
 	}
 
 	rc = ptlrpc_queue_wait(req);
@@ -2249,7 +2246,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	struct lustre_handle lockh = { 0 };
 	struct ptlrpc_request *req = NULL;
 	int intent = *flags & LDLM_FL_HAS_INTENT;
-	__u64 match_lvb = agl ? 0 : LDLM_FL_LVB_READY;
+	__u64 match_flags = *flags;
 	enum ldlm_mode mode;
 	int rc;
 
@@ -2284,7 +2281,11 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	mode = einfo->ei_mode;
 	if (einfo->ei_mode == LCK_PR)
 		mode |= LCK_PW;
-	mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id,
+	if (agl == 0)
+		match_flags |= LDLM_FL_LVB_READY;
+	if (intent != 0)
+		match_flags |= LDLM_FL_BLOCK_GRANTED;
+	mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id,
 			       einfo->ei_type, policy, mode, &lockh, 0);
 	if (mode) {
 		struct ldlm_lock *matched;
@@ -2775,7 +2776,8 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
 		tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
 		memcpy(tmp, key, keylen);
 
-		req->rq_no_delay = req->rq_no_resend = 1;
+		req->rq_no_delay = 1;
+		req->rq_no_resend = 1;
 		ptlrpc_request_set_replen(req);
 		rc = ptlrpc_queue_wait(req);
 		if (rc)
@@ -2915,7 +2917,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 
 		LASSERT(!cli->cl_cache); /* only once */
 		cli->cl_cache = val;
-		atomic_inc(&cli->cl_cache->ccc_users);
+		cl_cache_incref(cli->cl_cache);
 		cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
 
 		/* add this osc into entity list */
@@ -3295,7 +3297,7 @@ static int osc_cleanup(struct obd_device *obd)
 		list_del_init(&cli->cl_lru_osc);
 		spin_unlock(&cli->cl_cache->ccc_lru_lock);
 		cli->cl_lru_left = NULL;
-		atomic_dec(&cli->cl_cache->ccc_users);
+		cl_cache_decref(cli->cl_cache);
 		cli->cl_cache = NULL;
 	}
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index 4b7912a2cb52..d4463d7c81d2 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -587,14 +583,19 @@ static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
 	spin_unlock(&pool->prp_lock);
 }
 
-static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
-				      __u32 version, int opcode,
-				      int count, __u32 *lengths, char **bufs,
-				      struct ptlrpc_cli_ctx *ctx)
+int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+			     __u32 version, int opcode, char **bufs,
+			     struct ptlrpc_cli_ctx *ctx)
 {
-	struct obd_import *imp = request->rq_import;
+	int count;
+	struct obd_import *imp;
+	__u32 *lengths;
 	int rc;
 
+	count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
+	imp = request->rq_import;
+	lengths = request->rq_pill.rc_area[RCL_CLIENT];
+
 	if (unlikely(ctx)) {
 		request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
 	} else {
@@ -602,20 +603,16 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
 		if (rc)
 			goto out_free;
 	}
-
 	sptlrpc_req_set_flavor(request, opcode);
 
 	rc = lustre_pack_request(request, imp->imp_msg_magic, count,
 				 lengths, bufs);
-	if (rc) {
-		LASSERT(!request->rq_pool);
+	if (rc)
 		goto out_ctx;
-	}
 
 	lustre_msg_add_version(request->rq_reqmsg, version);
 	request->rq_send_state = LUSTRE_IMP_FULL;
 	request->rq_type = PTL_RPC_MSG_REQUEST;
-	request->rq_export = NULL;
 
 	request->rq_req_cbid.cbid_fn = request_out_callback;
 	request->rq_req_cbid.cbid_arg = request;
@@ -624,6 +621,8 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
 	request->rq_reply_cbid.cbid_arg = request;
 
 	request->rq_reply_deadline = 0;
+	request->rq_bulk_deadline = 0;
+	request->rq_req_deadline = 0;
 	request->rq_phase = RQ_PHASE_NEW;
 	request->rq_next_phase = RQ_PHASE_UNDEFINED;
 
@@ -632,40 +631,49 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
 
 	ptlrpc_at_set_req_timeout(request);
 
-	spin_lock_init(&request->rq_lock);
-	INIT_LIST_HEAD(&request->rq_list);
-	INIT_LIST_HEAD(&request->rq_timed_list);
-	INIT_LIST_HEAD(&request->rq_replay_list);
-	INIT_LIST_HEAD(&request->rq_ctx_chain);
-	INIT_LIST_HEAD(&request->rq_set_chain);
-	INIT_LIST_HEAD(&request->rq_history_list);
-	INIT_LIST_HEAD(&request->rq_exp_list);
-	init_waitqueue_head(&request->rq_reply_waitq);
-	init_waitqueue_head(&request->rq_set_waitq);
 	request->rq_xid = ptlrpc_next_xid();
-	atomic_set(&request->rq_refcount, 1);
-
 	lustre_msg_set_opc(request->rq_reqmsg, opcode);
 
+	/* Let's setup deadline for req/reply/bulk unlink for opcode. */
+	if (cfs_fail_val == opcode) {
+		time_t *fail_t = NULL, *fail2_t = NULL;
+
+		if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
+			fail_t = &request->rq_bulk_deadline;
+		} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+			fail_t = &request->rq_reply_deadline;
+		} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) {
+			fail_t = &request->rq_req_deadline;
+		} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) {
+			fail_t = &request->rq_reply_deadline;
+			fail2_t = &request->rq_bulk_deadline;
+		}
+
+		if (fail_t) {
+			*fail_t = ktime_get_real_seconds() + LONG_UNLINK;
+
+			if (fail2_t)
+				*fail2_t = ktime_get_real_seconds() +
+						 LONG_UNLINK;
+
+			/* The RPC is infected, let the test change the
+			 * fail_loc
+			 */
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(cfs_time_seconds(2));
+			set_current_state(TASK_RUNNING);
+		}
+	}
+
 	return 0;
+
 out_ctx:
+	LASSERT(!request->rq_pool);
 	sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1);
 out_free:
 	class_import_put(imp);
 	return rc;
 }
-
-int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
-			     __u32 version, int opcode, char **bufs,
-			     struct ptlrpc_cli_ctx *ctx)
-{
-	int count;
-
-	count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
-	return __ptlrpc_request_bufs_pack(request, version, opcode, count,
-					  request->rq_pill.rc_area[RCL_CLIENT],
-					  bufs, ctx);
-}
 EXPORT_SYMBOL(ptlrpc_request_bufs_pack);
 
 /**
@@ -722,7 +730,9 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
 		request = ptlrpc_prep_req_from_pool(pool);
 
 	if (request) {
-		LASSERTF((unsigned long)imp > 0x1000, "%p\n", imp);
+		ptlrpc_cli_req_init(request);
+
+		LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
 		LASSERT(imp != LP_POISON);
 		LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p\n",
 			 imp->imp_client);
@@ -1163,9 +1173,9 @@ static int after_reply(struct ptlrpc_request *req)
 
 	LASSERT(obd);
 	/* repbuf must be unlinked */
-	LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink);
+	LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked);
 
-	if (req->rq_reply_truncate) {
+	if (req->rq_reply_truncated) {
 		if (ptlrpc_no_resend(req)) {
 			DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d",
 				  req->rq_nob_received, req->rq_repbuf_len);
@@ -1239,8 +1249,9 @@ static int after_reply(struct ptlrpc_request *req)
 	}
 
 	ktime_get_real_ts64(&work_start);
-	timediff = (work_start.tv_sec - req->rq_arrival_time.tv_sec) * USEC_PER_SEC +
-		   (work_start.tv_nsec - req->rq_arrival_time.tv_nsec) / NSEC_PER_USEC;
+	timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC +
+		   (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) /
+								 NSEC_PER_USEC;
 	if (obd->obd_svc_stats) {
 		lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
 				    timediff);
@@ -1503,16 +1514,28 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 		if (!(req->rq_phase == RQ_PHASE_RPC ||
 		      req->rq_phase == RQ_PHASE_BULK ||
 		      req->rq_phase == RQ_PHASE_INTERPRET ||
-		      req->rq_phase == RQ_PHASE_UNREGISTERING ||
+		      req->rq_phase == RQ_PHASE_UNREG_RPC ||
+		      req->rq_phase == RQ_PHASE_UNREG_BULK ||
 		      req->rq_phase == RQ_PHASE_COMPLETE)) {
 			DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
 			LBUG();
 		}
 
-		if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+		if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+		    req->rq_phase == RQ_PHASE_UNREG_BULK) {
 			LASSERT(req->rq_next_phase != req->rq_phase);
 			LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
 
+			if (req->rq_req_deadline &&
+			    !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
+				req->rq_req_deadline = 0;
+			if (req->rq_reply_deadline &&
+			    !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
+				req->rq_reply_deadline = 0;
+			if (req->rq_bulk_deadline &&
+			    !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
+				req->rq_bulk_deadline = 0;
+
 			/*
 			 * Skip processing until reply is unlinked. We
 			 * can't return to pool before that and we can't
@@ -1520,7 +1543,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 			 * sure that all rdma transfers finished and will
 			 * not corrupt any data.
 			 */
-			if (ptlrpc_client_recv_or_unlink(req) ||
+			if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
+			    ptlrpc_client_recv_or_unlink(req))
+				continue;
+			if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
 			    ptlrpc_client_bulk_active(req))
 				continue;
 
@@ -1998,7 +2024,7 @@ void ptlrpc_interrupted_set(void *data)
 			list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
 		if (req->rq_phase != RQ_PHASE_RPC &&
-		    req->rq_phase != RQ_PHASE_UNREGISTERING)
+		    req->rq_phase != RQ_PHASE_UNREG_RPC)
 			continue;
 
 		ptlrpc_mark_interrupted(req);
@@ -2195,11 +2221,11 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
 {
 	if (!request)
 		return;
+	LASSERT(!request->rq_srv_req);
+	LASSERT(!request->rq_export);
 	LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
-	LASSERTF(!request->rq_rqbd, "req %p\n", request);/* client-side */
 	LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
 	LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
-	LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request);
 	LASSERTF(!request->rq_replay, "req %p\n", request);
 
 	req_capsule_fini(&request->rq_pill);
@@ -2225,10 +2251,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
 
 	if (request->rq_repbuf)
 		sptlrpc_cli_free_repbuf(request);
-	if (request->rq_export) {
-		class_export_put(request->rq_export);
-		request->rq_export = NULL;
-	}
+
 	if (request->rq_import) {
 		class_import_put(request->rq_import);
 		request->rq_import = NULL;
@@ -2313,8 +2336,9 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 
 	/* Let's setup deadline for reply unlink. */
 	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    async && request->rq_reply_deadline == 0)
-		request->rq_reply_deadline = ktime_get_real_seconds()+LONG_UNLINK;
+	    async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
+		request->rq_reply_deadline =
+			ktime_get_real_seconds() + LONG_UNLINK;
 
 	/* Nothing left to do. */
 	if (!ptlrpc_client_recv_or_unlink(request))
@@ -2327,7 +2351,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 		return 1;
 
 	/* Move to "Unregistering" phase as reply was not unlinked yet. */
-	ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
+	ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC);
 
 	/* Do not wait for unlink to finish. */
 	if (async)
@@ -2359,9 +2383,10 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 
 		LASSERT(rc == -ETIMEDOUT);
 		DEBUG_REQ(D_WARNING, request,
-			  "Unexpectedly long timeout rvcng=%d unlnk=%d/%d",
+			  "Unexpectedly long timeout receiving_reply=%d req_ulinked=%d reply_unlinked=%d",
 			  request->rq_receiving_reply,
-			  request->rq_req_unlink, request->rq_reply_unlink);
+			  request->rq_req_unlinked,
+			  request->rq_reply_unlinked);
 	}
 	return 0;
 }
@@ -2618,11 +2643,6 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
 }
 EXPORT_SYMBOL(ptlrpc_queue_wait);
 
-struct ptlrpc_replay_async_args {
-	int praa_old_state;
-	int praa_old_status;
-};
-
 /**
  * Callback used for replayed requests reply processing.
  * In case of successful reply calls registered request replay callback.
@@ -2961,7 +2981,6 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
 	req->rq_timeout		= obd_timeout;
 	req->rq_sent		= ktime_get_real_seconds();
 	req->rq_deadline	= req->rq_sent + req->rq_timeout;
-	req->rq_reply_deadline	= req->rq_deadline;
 	req->rq_phase		= RQ_PHASE_INTERPRET;
 	req->rq_next_phase	= RQ_PHASE_COMPLETE;
 	req->rq_xid		= ptlrpc_next_xid();
@@ -3017,27 +3036,17 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	ptlrpc_cli_req_init(req);
+
 	req->rq_send_state = LUSTRE_IMP_FULL;
 	req->rq_type = PTL_RPC_MSG_REQUEST;
 	req->rq_import = class_import_get(imp);
-	req->rq_export = NULL;
 	req->rq_interpret_reply = work_interpreter;
 	/* don't want reply */
-	req->rq_receiving_reply = 0;
-	req->rq_req_unlink = req->rq_reply_unlink = 0;
-	req->rq_no_delay = req->rq_no_resend = 1;
+	req->rq_no_delay = 1;
+	req->rq_no_resend = 1;
 	req->rq_pill.rc_fmt = (void *)&worker_format;
 
-	spin_lock_init(&req->rq_lock);
-	INIT_LIST_HEAD(&req->rq_list);
-	INIT_LIST_HEAD(&req->rq_replay_list);
-	INIT_LIST_HEAD(&req->rq_set_chain);
-	INIT_LIST_HEAD(&req->rq_history_list);
-	INIT_LIST_HEAD(&req->rq_exp_list);
-	init_waitqueue_head(&req->rq_reply_waitq);
-	init_waitqueue_head(&req->rq_set_waitq);
-	atomic_set(&req->rq_refcount, 1);
-
 	CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args));
 	args = ptlrpc_req_async_args(req);
 	args->cb = cb;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
index a14daff3fca0..177a379da9fa 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/connection.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
index fdcde9bbd788..b1ce72511509 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -55,27 +51,33 @@ void request_out_callback(lnet_event_t *ev)
 {
 	struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
 	struct ptlrpc_request *req = cbid->cbid_arg;
+	bool wakeup = false;
 
-	LASSERT(ev->type == LNET_EVENT_SEND ||
-		ev->type == LNET_EVENT_UNLINK);
+	LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK);
 	LASSERT(ev->unlinked);
 
 	DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
 
 	sptlrpc_request_out_callback(req);
+
 	spin_lock(&req->rq_lock);
 	req->rq_real_sent = ktime_get_real_seconds();
-	if (ev->unlinked)
-		req->rq_req_unlink = 0;
+	req->rq_req_unlinked = 1;
+	/* reply_in_callback happened before request_out_callback? */
+	if (req->rq_reply_unlinked)
+		wakeup = true;
 
 	if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
 		/* Failed send: make it seem like the reply timed out, just
 		 * like failing sends in client.c does currently...
 		 */
-
 		req->rq_net_err = 1;
-		ptlrpc_client_wake_req(req);
+		wakeup = true;
 	}
+
+	if (wakeup)
+		ptlrpc_client_wake_req(req);
+
 	spin_unlock(&req->rq_lock);
 
 	ptlrpc_req_finished(req);
@@ -104,7 +106,7 @@ void reply_in_callback(lnet_event_t *ev)
 	req->rq_receiving_reply = 0;
 	req->rq_early = 0;
 	if (ev->unlinked)
-		req->rq_reply_unlink = 0;
+		req->rq_reply_unlinked = 1;
 
 	if (ev->status)
 		goto out_wake;
@@ -118,7 +120,7 @@ void reply_in_callback(lnet_event_t *ev)
 	if (ev->mlength < ev->rlength) {
 		CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
 		       req->rq_replen, ev->rlength, ev->offset);
-		req->rq_reply_truncate = 1;
+		req->rq_reply_truncated = 1;
 		req->rq_replied = 1;
 		req->rq_status = -EOVERFLOW;
 		req->rq_nob_received = ev->rlength + ev->offset;
@@ -135,7 +137,8 @@ void reply_in_callback(lnet_event_t *ev)
 
 		req->rq_early_count++; /* number received, client side */
 
-		if (req->rq_replied)   /* already got the real reply */
+		/* already got the real reply or buffers are already unlinked */
+		if (req->rq_replied || req->rq_reply_unlinked == 1)
 			goto out_wake;
 
 		req->rq_early = 1;
@@ -328,6 +331,7 @@ void request_in_callback(lnet_event_t *ev)
 		}
 	}
 
+	ptlrpc_srv_req_init(req);
 	/* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
 	 * flags are reset and scalars are zero.  We only set the message
 	 * size to non-zero if this was a successful receive.
@@ -341,10 +345,6 @@ void request_in_callback(lnet_event_t *ev)
 	req->rq_self = ev->target.nid;
 	req->rq_rqbd = rqbd;
 	req->rq_phase = RQ_PHASE_NEW;
-	spin_lock_init(&req->rq_lock);
-	INIT_LIST_HEAD(&req->rq_timed_list);
-	INIT_LIST_HEAD(&req->rq_exp_list);
-	atomic_set(&req->rq_refcount, 1);
 	if (ev->type == LNET_EVENT_PUT)
 		CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n",
 		       req, req->rq_xid, ev->mlength);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index a4f7544f46b8..3292e6ea0102 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -360,9 +356,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
 						  "still on delayed list");
 				}
 
-				CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n",
+				CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n",
 				       cli_tgt,
-				       ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
 				       atomic_read(&imp->
 						   imp_unregistering));
 			}
@@ -698,7 +693,8 @@ int ptlrpc_connect_import(struct obd_import *imp)
 
 	lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER);
 
-	request->rq_no_resend = request->rq_no_delay = 1;
+	request->rq_no_resend = 1;
+	request->rq_no_delay = 1;
 	request->rq_send_state = LUSTRE_IMP_CONNECTING;
 	/* Allow a slightly larger reply for future growth compatibility */
 	req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
index c0ecd1625dc4..ab5d85174245 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/layout.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -198,7 +194,7 @@ static const struct req_msg_field *mds_reint_create_slave_client[] = {
 	&RMF_DLM_REQ
 };
 
-static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = {
+static const struct req_msg_field *mds_reint_create_acl_client[] = {
 	&RMF_PTLRPC_BODY,
 	&RMF_REC_REINT,
 	&RMF_CAPA1,
@@ -679,7 +675,7 @@ static struct req_format *req_formats[] = {
 	&RQF_MDS_DONE_WRITING,
 	&RQF_MDS_REINT,
 	&RQF_MDS_REINT_CREATE,
-	&RQF_MDS_REINT_CREATE_RMT_ACL,
+	&RQF_MDS_REINT_CREATE_ACL,
 	&RQF_MDS_REINT_CREATE_SLAVE,
 	&RQF_MDS_REINT_CREATE_SYM,
 	&RQF_MDS_REINT_OPEN,
@@ -1242,10 +1238,10 @@ struct req_format RQF_MDS_REINT_CREATE =
 			mds_reint_create_client, mdt_body_capa);
 EXPORT_SYMBOL(RQF_MDS_REINT_CREATE);
 
-struct req_format RQF_MDS_REINT_CREATE_RMT_ACL =
-	DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL",
-			mds_reint_create_rmt_acl_client, mdt_body_capa);
-EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL);
+struct req_format RQF_MDS_REINT_CREATE_ACL =
+	DEFINE_REQ_FMT0("MDS_REINT_CREATE_ACL",
+			mds_reint_create_acl_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_ACL);
 
 struct req_format RQF_MDS_REINT_CREATE_SLAVE =
 	DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA",
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
index a23ac5f9ae96..0f55c01feba8 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
index fbccb62213b5..bccdace7e51f 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index 64c0f1e17f36..bc93b75744e1 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -872,7 +868,8 @@ ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s,
 
 		if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */
 			srhi->srhi_req = NULL;
-			seq = srhi->srhi_seq = 0;
+			seq = 0;
+			srhi->srhi_seq = 0;
 		} else { /* the next sequence */
 			seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits);
 		}
@@ -1161,7 +1158,6 @@ void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes)
 
 	lprocfs_counter_add(svc_stats, idx, bytes);
 }
-
 EXPORT_SYMBOL(ptlrpc_lprocfs_brw);
 
 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
index 10b8fe82a342..11ec82545347 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -251,7 +247,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
 
 	/* Let's setup deadline for reply unlink. */
 	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
-	    async && req->rq_bulk_deadline == 0)
+	    async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
 		req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;
 
 	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
@@ -270,7 +266,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
 		return 1;				/* never registered */
 
 	/* Move to "Unregistering" phase as bulk was not unlinked yet. */
-	ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);
+	ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);
 
 	/* Do not wait for unlink to finish. */
 	if (async)
@@ -581,19 +577,18 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 	}
 
 	spin_lock(&request->rq_lock);
-	/* If the MD attach succeeds, there _will_ be a reply_in callback */
-	request->rq_receiving_reply = !noreply;
-	request->rq_req_unlink = 1;
 	/* We are responsible for unlinking the reply buffer */
-	request->rq_reply_unlink = !noreply;
+	request->rq_reply_unlinked = noreply;
+	request->rq_receiving_reply = !noreply;
 	/* Clear any flags that may be present from previous sends. */
+	request->rq_req_unlinked = 0;
 	request->rq_replied = 0;
 	request->rq_err = 0;
 	request->rq_timedout = 0;
 	request->rq_net_err = 0;
 	request->rq_resend = 0;
 	request->rq_restart = 0;
-	request->rq_reply_truncate = 0;
+	request->rq_reply_truncated = 0;
 	spin_unlock(&request->rq_lock);
 
 	if (!noreply) {
@@ -608,7 +603,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 		reply_md.user_ptr = &request->rq_reply_cbid;
 		reply_md.eq_handle = ptlrpc_eq_h;
 
-		/* We must see the unlink callback to unset rq_reply_unlink,
+		/* We must see the unlink callback to set rq_reply_unlinked,
 		 * so we can't auto-unlink
 		 */
 		rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
@@ -637,7 +632,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 
 	OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
 
-	ktime_get_real_ts64(&request->rq_arrival_time);
+	ktime_get_real_ts64(&request->rq_sent_tv);
 	request->rq_sent = ktime_get_real_seconds();
 	/* We give the server rq_timeout secs to process the req, and
 	 * add the network latency for our local timeout.
@@ -655,9 +650,10 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 			  connection,
 			  request->rq_request_portal,
 			  request->rq_xid, 0);
-	if (rc == 0)
+	if (likely(rc == 0))
 		goto out;
 
+	request->rq_req_unlinked = 1;
 	ptlrpc_req_finished(request);
 	if (noreply)
 		goto out;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
index c444f516856f..d88faf61e740 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
@@ -769,7 +769,7 @@ static int nrs_policy_register(struct ptlrpc_nrs *nrs,
 	spin_unlock(&nrs->nrs_lock);
 
 	if (rc != 0)
-		(void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+		(void)nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
 
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
index 811acf6fc786..b514f18fae50 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1806,19 +1802,6 @@ void lustre_swab_obd_quotactl(struct obd_quotactl *q)
 }
 EXPORT_SYMBOL(lustre_swab_obd_quotactl);
 
-void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p)
-{
-	__swab32s(&p->rp_uid);
-	__swab32s(&p->rp_gid);
-	__swab32s(&p->rp_fsuid);
-	__swab32s(&p->rp_fsuid_h);
-	__swab32s(&p->rp_fsgid);
-	__swab32s(&p->rp_fsgid_h);
-	__swab32s(&p->rp_access_perm);
-	__swab32s(&p->rp_padding);
-};
-EXPORT_SYMBOL(lustre_swab_mdt_remote_perm);
-
 void lustre_swab_fid2path(struct getinfo_fid2path *gf)
 {
 	lustre_swab_lu_fid(&gf->gf_fid);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
index ec3af109a1d7..6c820e944171 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pers.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
index 8a869315c258..c0529d808d81 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -57,7 +53,8 @@ ptlrpc_prep_ping(struct obd_import *imp)
 					LUSTRE_OBD_VERSION, OBD_PING);
 	if (req) {
 		ptlrpc_request_set_replen(req);
-		req->rq_no_resend = req->rq_no_delay = 1;
+		req->rq_no_resend = 1;
+		req->rq_no_delay = 1;
 	}
 	return req;
 }
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
index 6ca26c98de1b..a9831fab80f3 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -292,4 +288,47 @@ static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set)
 	if (atomic_dec_and_test(&set->set_refcount))
 		kfree(set);
 }
+
+/** initialise ptlrpc common fields */
+static inline void ptlrpc_req_comm_init(struct ptlrpc_request *req)
+{
+	spin_lock_init(&req->rq_lock);
+	atomic_set(&req->rq_refcount, 1);
+	INIT_LIST_HEAD(&req->rq_list);
+	INIT_LIST_HEAD(&req->rq_replay_list);
+}
+
+/** initialise client side ptlrpc request */
+static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req)
+{
+	struct ptlrpc_cli_req *cr = &req->rq_cli;
+
+	ptlrpc_req_comm_init(req);
+
+	req->rq_receiving_reply = 0;
+	req->rq_req_unlinked = 1;
+	req->rq_reply_unlinked = 1;
+
+	req->rq_receiving_reply = 0;
+	req->rq_req_unlinked = 1;
+	req->rq_reply_unlinked = 1;
+
+	INIT_LIST_HEAD(&cr->cr_set_chain);
+	INIT_LIST_HEAD(&cr->cr_ctx_chain);
+	init_waitqueue_head(&cr->cr_reply_waitq);
+	init_waitqueue_head(&cr->cr_set_waitq);
+}
+
+/** initialise server side ptlrpc request */
+static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req)
+{
+	struct ptlrpc_srv_req *sr = &req->rq_srv;
+
+	ptlrpc_req_comm_init(req);
+	req->rq_srv_req = 1;
+	INIT_LIST_HEAD(&sr->sr_exp_list);
+	INIT_LIST_HEAD(&sr->sr_timed_list);
+	INIT_LIST_HEAD(&sr->sr_hist_list);
+}
+
 #endif /* PTLRPC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
index a8ec0e9d7b2e..a70d5843f30e 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
index 76a355a9db8b..0a374b6c2f71 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -161,9 +157,9 @@ static int ptlrpcd_users;
 
 void ptlrpcd_wake(struct ptlrpc_request *req)
 {
-	struct ptlrpc_request_set *rq_set = req->rq_set;
+	struct ptlrpc_request_set *set = req->rq_set;
 
-	wake_up(&rq_set->set_waitq);
+	wake_up(&set->set_waitq);
 }
 EXPORT_SYMBOL(ptlrpcd_wake);
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
index 30d9a164e52d..718b3a8d61c6 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index 187fd1d6898c..dbd819fa6b75 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -867,11 +863,9 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
 	if (!req)
 		return -ENOMEM;
 
-	spin_lock_init(&req->rq_lock);
+	ptlrpc_cli_req_init(req);
 	atomic_set(&req->rq_refcount, 10000);
-	INIT_LIST_HEAD(&req->rq_ctx_chain);
-	init_waitqueue_head(&req->rq_reply_waitq);
-	init_waitqueue_head(&req->rq_set_waitq);
+
 	req->rq_import = imp;
 	req->rq_flvr = sec->ps_flvr;
 	req->rq_cli_ctx = ctx;
@@ -1051,6 +1045,8 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
 	if (!early_req)
 		return -ENOMEM;
 
+	ptlrpc_cli_req_init(early_req);
+
 	early_size = req->rq_nob_received;
 	early_bufsz = size_roundup_power2(early_size);
 	early_buf = libcfs_kvzalloc(early_bufsz, GFP_NOFS);
@@ -1099,12 +1095,11 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
 	memcpy(early_buf, req->rq_repbuf, early_size);
 	spin_unlock(&req->rq_lock);
 
-	spin_lock_init(&early_req->rq_lock);
 	early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
 	early_req->rq_flvr = req->rq_flvr;
 	early_req->rq_repbuf = early_buf;
 	early_req->rq_repbuf_len = early_bufsz;
-	early_req->rq_repdata = (struct lustre_msg *) early_buf;
+	early_req->rq_repdata = (struct lustre_msg *)early_buf;
 	early_req->rq_repdata_len = early_size;
 	early_req->rq_early = 1;
 	early_req->rq_reqmsg = req->rq_reqmsg;
@@ -1556,7 +1551,7 @@ void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
 	/* move from segment + 1 to end segment */
 	LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
 	oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
-	movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
+	movesize = oldmsg_size - ((unsigned long)src - (unsigned long)msg);
 	LASSERT(movesize >= 0);
 
 	if (movesize)
@@ -2196,6 +2191,9 @@ int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
 
 	pud = lustre_msg_buf(msg, offset, 0);
 
+	if (!pud)
+		return -EINVAL;
+
 	pud->pud_uid = from_kuid(&init_user_ns, current_uid());
 	pud->pud_gid = from_kgid(&init_user_ns, current_gid());
 	pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid());
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index 02e6cda4c995..5f4d79718589 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -273,7 +269,7 @@ static unsigned long enc_pools_shrink_scan(struct shrinker *s,
 static inline
 int npages_to_npools(unsigned long npages)
 {
-	return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
+	return (int)((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
 }
 
 /*
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
index a51b18bbfd34..c14035479c5f 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -648,7 +644,7 @@ static int logname2fsname(const char *logname, char *buf, int buflen)
 		return -EINVAL;
 	}
 
-	len = min((int) (ptr - logname), buflen - 1);
+	len = min((int)(ptr - logname), buflen - 1);
 
 	memcpy(buf, logname, len);
 	buf[len] = '\0';
@@ -819,7 +815,7 @@ void sptlrpc_conf_client_adapt(struct obd_device *obd)
 	CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid);
 
 	/* serialize with connect/disconnect import */
-	down_read(&obd->u.cli.cl_sem);
+	down_read_nested(&obd->u.cli.cl_sem, OBD_CLI_SEM_MDCOSC);
 
 	imp = obd->u.cli.cl_import;
 	if (imp) {
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
index 9082da06b28a..9b9801ece582 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
index e610a8ddd223..07273f577969 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
index 40e5349de38c..70a61e12bb7b 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -60,7 +56,7 @@ static struct ptlrpc_svc_ctx    null_svc_ctx;
 static inline
 void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
 {
-	msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24;
+	msg->lm_secflvr |= (((__u32)sp) & 0xFF) << 24;
 }
 
 static inline
@@ -265,7 +261,8 @@ int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
 		memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);
 
 		kvfree(req->rq_reqbuf);
-		req->rq_reqbuf = req->rq_reqmsg = newbuf;
+		req->rq_reqbuf = newbuf;
+		req->rq_reqmsg = newbuf;
 		req->rq_reqbuf_len = alloc_size;
 
 		if (req->rq_import)
@@ -329,7 +326,7 @@ int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
 	rs->rs_svc_ctx = req->rq_svc_ctx;
 	atomic_inc(&req->rq_svc_ctx->sc_refcount);
 
-	rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+	rs->rs_repbuf = (struct lustre_msg *)(rs + 1);
 	rs->rs_repbuf_len = rs_size - sizeof(*rs);
 	rs->rs_msg = rs->rs_repbuf;
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
index 37c9f4c453de..5c4590b0c521 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -298,7 +294,7 @@ int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
 	LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
 
 	bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
-	token = (struct plain_bulk_token *) bsd->bsd_data;
+	token = (struct plain_bulk_token *)bsd->bsd_data;
 
 	bsd->bsd_version = 0;
 	bsd->bsd_flags = 0;
@@ -343,7 +339,7 @@ int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
 	LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS);
 
 	bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0);
-	tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+	tokenv = (struct plain_bulk_token *)bsdv->bsd_data;
 
 	if (req->rq_bulk_write) {
 		if (bsdv->bsd_flags & BSD_FL_ERR)
@@ -574,8 +570,12 @@ int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
 	lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
 	req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0);
 
-	if (req->rq_pack_udesc)
-		sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF);
+	if (req->rq_pack_udesc) {
+		int rc = sptlrpc_pack_user_desc(req->rq_reqbuf,
+					      PLAIN_PACK_USER_OFF);
+		if (rc < 0)
+			return rc;
+	}
 
 	return 0;
 }
@@ -811,7 +811,7 @@ int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
 
 	rs->rs_svc_ctx = req->rq_svc_ctx;
 	atomic_inc(&req->rq_svc_ctx->sc_refcount);
-	rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+	rs->rs_repbuf = (struct lustre_msg *)(rs + 1);
 	rs->rs_repbuf_len = rs_size - sizeof(*rs);
 
 	lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
@@ -891,7 +891,7 @@ int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
 	LASSERT(req->rq_pack_bulk);
 
 	bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
-	tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+	tokenr = (struct plain_bulk_token *)bsdr->bsd_data;
 	bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
 
 	bsdv->bsd_version = 0;
@@ -926,7 +926,7 @@ int plain_svc_wrap_bulk(struct ptlrpc_request *req,
 
 	bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
 	bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
-	tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+	tokenv = (struct plain_bulk_token *)bsdv->bsd_data;
 
 	bsdv->bsd_version = 0;
 	bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
index 17c7b9749f67..4788c4940c2a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
index aacc8108391d..6cc2b2edf3fc 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1269,8 +1265,6 @@ void lustre_assert_wire_constants(void)
 		 OBD_MD_FLXATTRRM);
 	LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n",
 		 OBD_MD_FLACL);
-	LASSERTF(OBD_MD_FLRMTPERM == (0x0000010000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTPERM);
 	LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n",
 		 OBD_MD_FLMDSCAPA);
 	LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n",
@@ -1281,14 +1275,6 @@ void lustre_assert_wire_constants(void)
 		 OBD_MD_FLCROSSREF);
 	LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n",
 		 OBD_MD_FLGETATTRLOCK);
-	LASSERTF(OBD_MD_FLRMTLSETFACL == (0x0001000000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTLSETFACL);
-	LASSERTF(OBD_MD_FLRMTLGETFACL == (0x0002000000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTLGETFACL);
-	LASSERTF(OBD_MD_FLRMTRSETFACL == (0x0004000000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTRSETFACL);
-	LASSERTF(OBD_MD_FLRMTRGETFACL == (0x0008000000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTRGETFACL);
 	LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n",
 		 OBD_MD_FLDATAVERSION);
 	CLASSERT(OBD_FL_INLINEDATA == 0x00000001);
@@ -1895,44 +1881,6 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n",
 		 (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding));
 
-	/* Checks for struct mdt_remote_perm */
-	LASSERTF((int)sizeof(struct mdt_remote_perm) == 32, "found %lld\n",
-		 (long long)(int)sizeof(struct mdt_remote_perm));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_uid) == 0, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_uid));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_uid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_uid));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_gid) == 4, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_gid));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_gid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_gid));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsuid) == 8, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_fsuid));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsgid) == 16, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_fsgid));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_access_perm) == 24, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_access_perm));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_padding) == 28, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_padding));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_padding) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_padding));
-	LASSERTF(CFS_SETUID_PERM == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_SETUID_PERM);
-	LASSERTF(CFS_SETGID_PERM == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_SETGID_PERM);
-	LASSERTF(CFS_SETGRP_PERM == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_SETGRP_PERM);
-	LASSERTF(CFS_RMTACL_PERM == 0x00000008UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_RMTACL_PERM);
-	LASSERTF(CFS_RMTOWN_PERM == 0x00000010UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_RMTOWN_PERM);
-
 	/* Checks for struct mdt_rec_setattr */
 	LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n",
 		 (long long)(int)sizeof(struct mdt_rec_setattr));
diff --git a/drivers/staging/lustre/sysfs-fs-lustre b/drivers/staging/lustre/sysfs-fs-lustre
index 873e2cf31217..20206ba965af 100644
--- a/drivers/staging/lustre/sysfs-fs-lustre
+++ b/drivers/staging/lustre/sysfs-fs-lustre
@@ -294,6 +294,14 @@ Description:
 		Controls extended attributes client-side cache.
 		1 to enable, 0 to disable.
 
+What:		/sys/fs/lustre/llite/<fsname>-<uuid>/unstable_stats
+Date:		Apr 2016
+Contact:	"Oleg Drokin" <oleg.drokin@intel.com>
+Description:
+		Shows number of pages that were sent and acknowledged by
+		server but were not yet committed and therefore still
+		pinned in client memory even though no longer dirty.
+
 What:		/sys/fs/lustre/ldlm/cancel_unused_locks_before_replay
 Date:		May 2015
 Contact:	"Oleg Drokin" <oleg.drokin@intel.com>
diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig
index de7e9f52e7eb..cae42e56f270 100644
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig
@@ -21,24 +21,20 @@ if STAGING_MEDIA
 # Please keep them in alphabetic order
 source "drivers/staging/media/bcm2048/Kconfig"
 
+source "drivers/staging/media/cec/Kconfig"
+
 source "drivers/staging/media/cxd2099/Kconfig"
 
 source "drivers/staging/media/davinci_vpfe/Kconfig"
 
-source "drivers/staging/media/mn88472/Kconfig"
-
-source "drivers/staging/media/mx2/Kconfig"
-
-source "drivers/staging/media/mx3/Kconfig"
-
-source "drivers/staging/media/omap1/Kconfig"
-
 source "drivers/staging/media/omap4iss/Kconfig"
 
-source "drivers/staging/media/timb/Kconfig"
+source "drivers/staging/media/pulse8-cec/Kconfig"
 
 source "drivers/staging/media/tw686x-kh/Kconfig"
 
+source "drivers/staging/media/s5p-cec/Kconfig"
+
 # Keep LIRC at the end, as it has sub-menus
 source "drivers/staging/media/lirc/Kconfig"
 
diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile
index 60a35b3a47e7..87ce8ad1e22a 100644
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile
@@ -1,11 +1,9 @@
 obj-$(CONFIG_I2C_BCM2048)	+= bcm2048/
+obj-$(CONFIG_MEDIA_CEC)		+= cec/
+obj-$(CONFIG_VIDEO_SAMSUNG_S5P_CEC) += s5p-cec/
 obj-$(CONFIG_DVB_CXD2099)	+= cxd2099/
 obj-$(CONFIG_LIRC_STAGING)	+= lirc/
 obj-$(CONFIG_VIDEO_DM365_VPFE)	+= davinci_vpfe/
-obj-$(CONFIG_VIDEO_MX2)		+= mx2/
-obj-$(CONFIG_VIDEO_MX3)		+= mx3/
-obj-$(CONFIG_VIDEO_OMAP1)	+= omap1/
 obj-$(CONFIG_VIDEO_OMAP4)	+= omap4iss/
-obj-$(CONFIG_DVB_MN88472)       += mn88472/
-obj-$(CONFIG_VIDEO_TIMBERDALE)  += timb/
+obj-$(CONFIG_USB_PULSE8_CEC)    += pulse8-cec/
 obj-$(CONFIG_VIDEO_TW686X_KH)	+= tw686x-kh/
diff --git a/drivers/staging/media/cec/Kconfig b/drivers/staging/media/cec/Kconfig
new file mode 100644
index 000000000000..21457a1f6c9f
--- /dev/null
+++ b/drivers/staging/media/cec/Kconfig
@@ -0,0 +1,15 @@
+config MEDIA_CEC
+	bool "CEC API (EXPERIMENTAL)"
+	depends on MEDIA_SUPPORT
+	select MEDIA_CEC_EDID
+	---help---
+	  Enable the CEC API.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cec.
+
+config MEDIA_CEC_DEBUG
+	bool "CEC debugfs interface (EXPERIMENTAL)"
+	depends on MEDIA_CEC && DEBUG_FS
+	---help---
+	  Turns on the DebugFS interface for CEC devices.
diff --git a/drivers/staging/media/cec/Makefile b/drivers/staging/media/cec/Makefile
new file mode 100644
index 000000000000..bd7f3c593468
--- /dev/null
+++ b/drivers/staging/media/cec/Makefile
@@ -0,0 +1,5 @@
+cec-objs := cec-core.o cec-adap.o cec-api.o
+
+ifeq ($(CONFIG_MEDIA_CEC),y)
+  obj-$(CONFIG_MEDIA_SUPPORT) += cec.o
+endif
diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO
new file mode 100644
index 000000000000..a10d4f82b954
--- /dev/null
+++ b/drivers/staging/media/cec/TODO
@@ -0,0 +1,31 @@
+The reason why cec.c is still in staging is that I would like
+to have a bit more confidence in the uABI. The kABI is fine,
+no problem there, but I would like to let the public API mature
+a bit.
+
+Once I'm confident that I didn't miss anything then the cec.c source
+can move to drivers/media and the linux/cec.h and linux/cec-funcs.h
+headers can move to uapi/linux and added to uapi/linux/Kbuild to make
+them public.
+
+Hopefully this will happen later in 2016.
+
+Other TODOs:
+
+- Add a flag to inhibit passing CEC RC messages to the rc subsystem.
+  Applications should be able to choose this when calling S_LOG_ADDRS.
+- If the reply field of cec_msg is set then when the reply arrives it
+  is only sent to the filehandle that transmitted the original message
+  and not to any followers. Should this behavior change or perhaps
+  controlled through a cec_msg flag?
+- Should CEC_LOG_ADDR_TYPE_SPECIFIC be replaced by TYPE_2ND_TV and TYPE_PROCESSOR?
+  And also TYPE_SWITCH and TYPE_CDC_ONLY in addition to the TYPE_UNREGISTERED?
+  This should give the framework more information about the device type
+  since SPECIFIC and UNREGISTERED give no useful information.
+- Once this is out of staging this should no longer be a separate
+  config option, instead it should be selected by drivers that want it.
+- Revisit the IS_REACHABLE(RC_CORE): perhaps the RC_CORE support should
+  be enabled through a separate config option in drivers/media/Kconfig
+  or rc/Kconfig?
+
+Hans Verkuil <hans.verkuil@cisco.com>
diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c
new file mode 100644
index 000000000000..9fffddb7ac7e
--- /dev/null
+++ b/drivers/staging/media/cec/cec-adap.c
@@ -0,0 +1,1654 @@
+/*
+ * cec-adap.c - HDMI Consumer Electronics Control framework - CEC adapter
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "cec-priv.h"
+
+static int cec_report_features(struct cec_adapter *adap, unsigned int la_idx);
+static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx);
+
+/*
+ * 400 ms is the time it takes for one 16 byte message to be
+ * transferred and 5 is the maximum number of retries. Add
+ * another 100 ms as a margin. So if the transmit doesn't
+ * finish before that time something is really wrong and we
+ * have to time out.
+ *
+ * This is a sign that something it really wrong and a warning
+ * will be issued.
+ */
+#define CEC_XFER_TIMEOUT_MS (5 * 400 + 100)
+
+#define call_op(adap, op, arg...) \
+	(adap->ops->op ? adap->ops->op(adap, ## arg) : 0)
+
+#define call_void_op(adap, op, arg...)			\
+	do {						\
+		if (adap->ops->op)			\
+			adap->ops->op(adap, ## arg);	\
+	} while (0)
+
+static int cec_log_addr2idx(const struct cec_adapter *adap, u8 log_addr)
+{
+	int i;
+
+	for (i = 0; i < adap->log_addrs.num_log_addrs; i++)
+		if (adap->log_addrs.log_addr[i] == log_addr)
+			return i;
+	return -1;
+}
+
+static unsigned int cec_log_addr2dev(const struct cec_adapter *adap, u8 log_addr)
+{
+	int i = cec_log_addr2idx(adap, log_addr);
+
+	return adap->log_addrs.primary_device_type[i < 0 ? 0 : i];
+}
+
+/*
+ * Queue a new event for this filehandle. If ts == 0, then set it
+ * to the current time.
+ *
+ * The two events that are currently defined do not need to keep track
+ * of intermediate events, so no actual queue of events is needed,
+ * instead just store the latest state and the total number of lost
+ * messages.
+ *
+ * Should new events be added in the future that require intermediate
+ * results to be queued as well, then a proper queue data structure is
+ * required. But until then, just keep it simple.
+ */
+void cec_queue_event_fh(struct cec_fh *fh,
+			const struct cec_event *new_ev, u64 ts)
+{
+	struct cec_event *ev = &fh->events[new_ev->event - 1];
+
+	if (ts == 0)
+		ts = ktime_get_ns();
+
+	mutex_lock(&fh->lock);
+	if (new_ev->event == CEC_EVENT_LOST_MSGS &&
+	    fh->pending_events & (1 << new_ev->event)) {
+		/*
+		 * If there is already a lost_msgs event, then just
+		 * update the lost_msgs count. This effectively
+		 * merges the old and new events into one.
+		 */
+		ev->lost_msgs.lost_msgs += new_ev->lost_msgs.lost_msgs;
+		goto unlock;
+	}
+
+	/*
+	 * Intermediate states are not interesting, so just
+	 * overwrite any older event.
+	 */
+	*ev = *new_ev;
+	ev->ts = ts;
+	fh->pending_events |= 1 << new_ev->event;
+
+unlock:
+	mutex_unlock(&fh->lock);
+	wake_up_interruptible(&fh->wait);
+}
+
+/* Queue a new event for all open filehandles. */
+static void cec_queue_event(struct cec_adapter *adap,
+			    const struct cec_event *ev)
+{
+	u64 ts = ktime_get_ns();
+	struct cec_fh *fh;
+
+	mutex_lock(&adap->devnode.fhs_lock);
+	list_for_each_entry(fh, &adap->devnode.fhs, list)
+		cec_queue_event_fh(fh, ev, ts);
+	mutex_unlock(&adap->devnode.fhs_lock);
+}
+
+/*
+ * Queue a new message for this filehandle. If there is no more room
+ * in the queue, then send the LOST_MSGS event instead.
+ */
+static void cec_queue_msg_fh(struct cec_fh *fh, const struct cec_msg *msg)
+{
+	static const struct cec_event ev_lost_msg = {
+		.ts = 0,
+		.event = CEC_EVENT_LOST_MSGS,
+		.flags = 0,
+		{
+			.lost_msgs.lost_msgs = 1,
+		},
+	};
+	struct cec_msg_entry *entry;
+
+	mutex_lock(&fh->lock);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		goto lost_msgs;
+
+	entry->msg = *msg;
+	/* Add new msg at the end of the queue */
+	list_add_tail(&entry->list, &fh->msgs);
+
+	/*
+	 * if the queue now has more than CEC_MAX_MSG_RX_QUEUE_SZ
+	 * messages, drop the oldest one and send a lost message event.
+	 */
+	if (fh->queued_msgs == CEC_MAX_MSG_RX_QUEUE_SZ) {
+		list_del(&entry->list);
+		goto lost_msgs;
+	}
+	fh->queued_msgs++;
+	mutex_unlock(&fh->lock);
+	wake_up_interruptible(&fh->wait);
+	return;
+
+lost_msgs:
+	mutex_unlock(&fh->lock);
+	cec_queue_event_fh(fh, &ev_lost_msg, 0);
+}
+
+/*
+ * Queue the message for those filehandles that are in monitor mode.
+ * If valid_la is true (this message is for us or was sent by us),
+ * then pass it on to any monitoring filehandle. If this message
+ * isn't for us or from us, then only give it to filehandles that
+ * are in MONITOR_ALL mode.
+ *
+ * This can only happen if the CEC_CAP_MONITOR_ALL capability is
+ * set and the CEC adapter was placed in 'monitor all' mode.
+ */
+static void cec_queue_msg_monitor(struct cec_adapter *adap,
+				  const struct cec_msg *msg,
+				  bool valid_la)
+{
+	struct cec_fh *fh;
+	u32 monitor_mode = valid_la ? CEC_MODE_MONITOR :
+				      CEC_MODE_MONITOR_ALL;
+
+	mutex_lock(&adap->devnode.fhs_lock);
+	list_for_each_entry(fh, &adap->devnode.fhs, list) {
+		if (fh->mode_follower >= monitor_mode)
+			cec_queue_msg_fh(fh, msg);
+	}
+	mutex_unlock(&adap->devnode.fhs_lock);
+}
+
+/*
+ * Queue the message for follower filehandles.
+ */
+static void cec_queue_msg_followers(struct cec_adapter *adap,
+				    const struct cec_msg *msg)
+{
+	struct cec_fh *fh;
+
+	mutex_lock(&adap->devnode.fhs_lock);
+	list_for_each_entry(fh, &adap->devnode.fhs, list) {
+		if (fh->mode_follower == CEC_MODE_FOLLOWER)
+			cec_queue_msg_fh(fh, msg);
+	}
+	mutex_unlock(&adap->devnode.fhs_lock);
+}
+
+/* Notify userspace of an adapter state change. */
+static void cec_post_state_event(struct cec_adapter *adap)
+{
+	struct cec_event ev = {
+		.event = CEC_EVENT_STATE_CHANGE,
+	};
+
+	ev.state_change.phys_addr = adap->phys_addr;
+	ev.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
+	cec_queue_event(adap, &ev);
+}
+
+/*
+ * A CEC transmit (and a possible wait for reply) completed.
+ * If this was in blocking mode, then complete it, otherwise
+ * queue the message for userspace to dequeue later.
+ *
+ * This function is called with adap->lock held.
+ */
+static void cec_data_completed(struct cec_data *data)
+{
+	/*
+	 * Delete this transmit from the filehandle's xfer_list since
+	 * we're done with it.
+	 *
+	 * Note that if the filehandle is closed before this transmit
+	 * finished, then the release() function will set data->fh to NULL.
+	 * Without that we would be referring to a closed filehandle.
+	 */
+	if (data->fh)
+		list_del(&data->xfer_list);
+
+	if (data->blocking) {
+		/*
+		 * Someone is blocking so mark the message as completed
+		 * and call complete.
+		 */
+		data->completed = true;
+		complete(&data->c);
+	} else {
+		/*
+		 * No blocking, so just queue the message if needed and
+		 * free the memory.
+		 */
+		if (data->fh)
+			cec_queue_msg_fh(data->fh, &data->msg);
+		kfree(data);
+	}
+}
+
+/*
+ * A pending CEC transmit needs to be cancelled, either because the CEC
+ * adapter is disabled or the transmit takes an impossibly long time to
+ * finish.
+ *
+ * This function is called with adap->lock held.
+ */
+static void cec_data_cancel(struct cec_data *data)
+{
+	/*
+	 * It's either the current transmit, or it is a pending
+	 * transmit. Take the appropriate action to clear it.
+	 */
+	if (data->adap->transmitting == data) {
+		data->adap->transmitting = NULL;
+	} else {
+		list_del_init(&data->list);
+		if (!(data->msg.tx_status & CEC_TX_STATUS_OK))
+			data->adap->transmit_queue_sz--;
+	}
+
+	/* Mark it as an error */
+	data->msg.tx_ts = ktime_get_ns();
+	data->msg.tx_status = CEC_TX_STATUS_ERROR |
+			      CEC_TX_STATUS_MAX_RETRIES;
+	data->attempts = 0;
+	data->msg.tx_error_cnt = 1;
+	/* Queue transmitted message for monitoring purposes */
+	cec_queue_msg_monitor(data->adap, &data->msg, 1);
+
+	cec_data_completed(data);
+}
+
+/*
+ * Main CEC state machine
+ *
+ * Wait until the thread should be stopped, or we are not transmitting and
+ * a new transmit message is queued up, in which case we start transmitting
+ * that message. When the adapter finished transmitting the message it will
+ * call cec_transmit_done().
+ *
+ * If the adapter is disabled, then remove all queued messages instead.
+ *
+ * If the current transmit times out, then cancel that transmit.
+ */
+int cec_thread_func(void *_adap)
+{
+	struct cec_adapter *adap = _adap;
+
+	for (;;) {
+		unsigned int signal_free_time;
+		struct cec_data *data;
+		bool timeout = false;
+		u8 attempts;
+
+		if (adap->transmitting) {
+			int err;
+
+			/*
+			 * We are transmitting a message, so add a timeout
+			 * to prevent the state machine to get stuck waiting
+			 * for this message to finalize and add a check to
+			 * see if the adapter is disabled in which case the
+			 * transmit should be canceled.
+			 */
+			err = wait_event_interruptible_timeout(adap->kthread_waitq,
+				kthread_should_stop() ||
+				(!adap->is_configured && !adap->is_configuring) ||
+				(!adap->transmitting &&
+				 !list_empty(&adap->transmit_queue)),
+				msecs_to_jiffies(CEC_XFER_TIMEOUT_MS));
+			timeout = err == 0;
+		} else {
+			/* Otherwise we just wait for something to happen. */
+			wait_event_interruptible(adap->kthread_waitq,
+				kthread_should_stop() ||
+				(!adap->transmitting &&
+				 !list_empty(&adap->transmit_queue)));
+		}
+
+		mutex_lock(&adap->lock);
+
+		if ((!adap->is_configured && !adap->is_configuring) ||
+		    kthread_should_stop()) {
+			/*
+			 * If the adapter is disabled, or we're asked to stop,
+			 * then cancel any pending transmits.
+			 */
+			while (!list_empty(&adap->transmit_queue)) {
+				data = list_first_entry(&adap->transmit_queue,
+							struct cec_data, list);
+				cec_data_cancel(data);
+			}
+			if (adap->transmitting)
+				cec_data_cancel(adap->transmitting);
+
+			/*
+			 * Cancel the pending timeout work. We have to unlock
+			 * the mutex when flushing the work since
+			 * cec_wait_timeout() will take it. This is OK since
+			 * no new entries can be added to wait_queue as long
+			 * as adap->transmitting is NULL, which it is due to
+			 * the cec_data_cancel() above.
+			 */
+			while (!list_empty(&adap->wait_queue)) {
+				data = list_first_entry(&adap->wait_queue,
+							struct cec_data, list);
+
+				if (!cancel_delayed_work(&data->work)) {
+					mutex_unlock(&adap->lock);
+					flush_scheduled_work();
+					mutex_lock(&adap->lock);
+				}
+				cec_data_cancel(data);
+			}
+			goto unlock;
+		}
+
+		if (adap->transmitting && timeout) {
+			/*
+			 * If we timeout, then log that. This really shouldn't
+			 * happen and is an indication of a faulty CEC adapter
+			 * driver, or the CEC bus is in some weird state.
+			 */
+			dprintk(0, "message %*ph timed out!\n",
+				adap->transmitting->msg.len,
+				adap->transmitting->msg.msg);
+			/* Just give up on this. */
+			cec_data_cancel(adap->transmitting);
+			goto unlock;
+		}
+
+		/*
+		 * If we are still transmitting, or there is nothing new to
+		 * transmit, then just continue waiting.
+		 */
+		if (adap->transmitting || list_empty(&adap->transmit_queue))
+			goto unlock;
+
+		/* Get a new message to transmit */
+		data = list_first_entry(&adap->transmit_queue,
+					struct cec_data, list);
+		list_del_init(&data->list);
+		adap->transmit_queue_sz--;
+		/* Make this the current transmitting message */
+		adap->transmitting = data;
+
+		/*
+		 * Suggested number of attempts as per the CEC 2.0 spec:
+		 * 4 attempts is the default, except for 'secondary poll
+		 * messages', i.e. poll messages not sent during the adapter
+		 * configuration phase when it allocates logical addresses.
+		 */
+		if (data->msg.len == 1 && adap->is_configured)
+			attempts = 2;
+		else
+			attempts = 4;
+
+		/* Set the suggested signal free time */
+		if (data->attempts) {
+			/* should be >= 3 data bit periods for a retry */
+			signal_free_time = CEC_SIGNAL_FREE_TIME_RETRY;
+		} else if (data->new_initiator) {
+			/* should be >= 5 data bit periods for new initiator */
+			signal_free_time = CEC_SIGNAL_FREE_TIME_NEW_INITIATOR;
+		} else {
+			/*
+			 * should be >= 7 data bit periods for sending another
+			 * frame immediately after another.
+			 */
+			signal_free_time = CEC_SIGNAL_FREE_TIME_NEXT_XFER;
+		}
+		if (data->attempts == 0)
+			data->attempts = attempts;
+
+		/* Tell the adapter to transmit, cancel on error */
+		if (adap->ops->adap_transmit(adap, data->attempts,
+					     signal_free_time, &data->msg))
+			cec_data_cancel(data);
+
+unlock:
+		mutex_unlock(&adap->lock);
+
+		if (kthread_should_stop())
+			break;
+	}
+	return 0;
+}
+
+/*
+ * Called by the CEC adapter if a transmit finished.
+ */
+void cec_transmit_done(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt,
+		       u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt)
+{
+	struct cec_data *data;
+	struct cec_msg *msg;
+	u64 ts = ktime_get_ns();
+
+	dprintk(2, "cec_transmit_done %02x\n", status);
+	mutex_lock(&adap->lock);
+	data = adap->transmitting;
+	if (!data) {
+		/*
+		 * This can happen if a transmit was issued and the cable is
+		 * unplugged while the transmit is ongoing. Ignore this
+		 * transmit in that case.
+		 */
+		dprintk(1, "cec_transmit_done without an ongoing transmit!\n");
+		goto unlock;
+	}
+
+	msg = &data->msg;
+
+	/* Drivers must fill in the status! */
+	WARN_ON(status == 0);
+	msg->tx_ts = ts;
+	msg->tx_status |= status;
+	msg->tx_arb_lost_cnt += arb_lost_cnt;
+	msg->tx_nack_cnt += nack_cnt;
+	msg->tx_low_drive_cnt += low_drive_cnt;
+	msg->tx_error_cnt += error_cnt;
+
+	/* Mark that we're done with this transmit */
+	adap->transmitting = NULL;
+
+	/*
+	 * If there are still retry attempts left and there was an error and
+	 * the hardware didn't signal that it retried itself (by setting
+	 * CEC_TX_STATUS_MAX_RETRIES), then we will retry ourselves.
+	 */
+	if (data->attempts > 1 &&
+	    !(status & (CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_OK))) {
+		/* Retry this message */
+		data->attempts--;
+		/* Add the message in front of the transmit queue */
+		list_add(&data->list, &adap->transmit_queue);
+		adap->transmit_queue_sz++;
+		goto wake_thread;
+	}
+
+	data->attempts = 0;
+
+	/* Always set CEC_TX_STATUS_MAX_RETRIES on error */
+	if (!(status & CEC_TX_STATUS_OK))
+		msg->tx_status |= CEC_TX_STATUS_MAX_RETRIES;
+
+	/* Queue transmitted message for monitoring purposes */
+	cec_queue_msg_monitor(adap, msg, 1);
+
+	if ((status & CEC_TX_STATUS_OK) && adap->is_configured &&
+	    msg->timeout) {
+		/*
+		 * Queue the message into the wait queue if we want to wait
+		 * for a reply.
+		 */
+		list_add_tail(&data->list, &adap->wait_queue);
+		schedule_delayed_work(&data->work,
+				      msecs_to_jiffies(msg->timeout));
+	} else {
+		/* Otherwise we're done */
+		cec_data_completed(data);
+	}
+
+wake_thread:
+	/*
+	 * Wake up the main thread to see if another message is ready
+	 * for transmitting or to retry the current message.
+	 */
+	wake_up_interruptible(&adap->kthread_waitq);
+unlock:
+	mutex_unlock(&adap->lock);
+}
+EXPORT_SYMBOL_GPL(cec_transmit_done);
+
+/*
+ * Called when waiting for a reply times out.
+ */
+static void cec_wait_timeout(struct work_struct *work)
+{
+	struct cec_data *data = container_of(work, struct cec_data, work.work);
+	struct cec_adapter *adap = data->adap;
+
+	mutex_lock(&adap->lock);
+	/*
+	 * Sanity check in case the timeout and the arrival of the message
+	 * happened at the same time.
+	 */
+	if (list_empty(&data->list))
+		goto unlock;
+
+	/* Mark the message as timed out */
+	list_del_init(&data->list);
+	data->msg.rx_ts = ktime_get_ns();
+	data->msg.rx_status = CEC_RX_STATUS_TIMEOUT;
+	cec_data_completed(data);
+unlock:
+	mutex_unlock(&adap->lock);
+}
+
+/*
+ * Transmit a message. The fh argument may be NULL if the transmit is not
+ * associated with a specific filehandle.
+ *
+ * This function is called with adap->lock held.
+ */
+int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg,
+			struct cec_fh *fh, bool block)
+{
+	struct cec_data *data;
+	u8 last_initiator = 0xff;
+	unsigned int timeout;
+	int res = 0;
+
+	msg->rx_ts = 0;
+	msg->tx_ts = 0;
+	msg->rx_status = 0;
+	msg->tx_status = 0;
+	msg->tx_arb_lost_cnt = 0;
+	msg->tx_nack_cnt = 0;
+	msg->tx_low_drive_cnt = 0;
+	msg->tx_error_cnt = 0;
+	msg->flags = 0;
+	msg->sequence = ++adap->sequence;
+	if (!msg->sequence)
+		msg->sequence = ++adap->sequence;
+
+	if (msg->reply && msg->timeout == 0) {
+		/* Make sure the timeout isn't 0. */
+		msg->timeout = 1000;
+	}
+
+	/* Sanity checks */
+	if (msg->len == 0 || msg->len > CEC_MAX_MSG_SIZE) {
+		dprintk(1, "cec_transmit_msg: invalid length %d\n", msg->len);
+		return -EINVAL;
+	}
+	if (msg->timeout && msg->len == 1) {
+		dprintk(1, "cec_transmit_msg: can't reply for poll msg\n");
+		return -EINVAL;
+	}
+	memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len);
+	if (msg->len == 1) {
+		if (cec_msg_initiator(msg) != 0xf ||
+		    cec_msg_destination(msg) == 0xf) {
+			dprintk(1, "cec_transmit_msg: invalid poll message\n");
+			return -EINVAL;
+		}
+		if (cec_has_log_addr(adap, cec_msg_destination(msg))) {
+			/*
+			 * If the destination is a logical address our adapter
+			 * has already claimed, then just NACK this.
+			 * It depends on the hardware what it will do with a
+			 * POLL to itself (some OK this), so it is just as
+			 * easy to handle it here so the behavior will be
+			 * consistent.
+			 */
+			msg->tx_ts = ktime_get_ns();
+			msg->tx_status = CEC_TX_STATUS_NACK |
+					 CEC_TX_STATUS_MAX_RETRIES;
+			msg->tx_nack_cnt = 1;
+			return 0;
+		}
+	}
+	if (msg->len > 1 && !cec_msg_is_broadcast(msg) &&
+	    cec_has_log_addr(adap, cec_msg_destination(msg))) {
+		dprintk(1, "cec_transmit_msg: destination is the adapter itself\n");
+		return -EINVAL;
+	}
+	if (cec_msg_initiator(msg) != 0xf &&
+	    !cec_has_log_addr(adap, cec_msg_initiator(msg))) {
+		dprintk(1, "cec_transmit_msg: initiator has unknown logical address %d\n",
+			cec_msg_initiator(msg));
+		return -EINVAL;
+	}
+	if (!adap->is_configured && !adap->is_configuring)
+		return -ENONET;
+
+	if (adap->transmit_queue_sz >= CEC_MAX_MSG_TX_QUEUE_SZ)
+		return -EBUSY;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	if (msg->len > 1 && msg->msg[1] == CEC_MSG_CDC_MESSAGE) {
+		msg->msg[2] = adap->phys_addr >> 8;
+		msg->msg[3] = adap->phys_addr & 0xff;
+	}
+
+	if (msg->timeout)
+		dprintk(2, "cec_transmit_msg: %*ph (wait for 0x%02x%s)\n",
+			msg->len, msg->msg, msg->reply, !block ? ", nb" : "");
+	else
+		dprintk(2, "cec_transmit_msg: %*ph%s\n",
+			msg->len, msg->msg, !block ? " (nb)" : "");
+
+	data->msg = *msg;
+	data->fh = fh;
+	data->adap = adap;
+	data->blocking = block;
+
+	/*
+	 * Determine if this message follows a message from the same
+	 * initiator. Needed to determine the free signal time later on.
+	 */
+	if (msg->len > 1) {
+		if (!(list_empty(&adap->transmit_queue))) {
+			const struct cec_data *last;
+
+			last = list_last_entry(&adap->transmit_queue,
+					       const struct cec_data, list);
+			last_initiator = cec_msg_initiator(&last->msg);
+		} else if (adap->transmitting) {
+			last_initiator =
+				cec_msg_initiator(&adap->transmitting->msg);
+		}
+	}
+	data->new_initiator = last_initiator != cec_msg_initiator(msg);
+	init_completion(&data->c);
+	INIT_DELAYED_WORK(&data->work, cec_wait_timeout);
+
+	if (fh)
+		list_add_tail(&data->xfer_list, &fh->xfer_list);
+	list_add_tail(&data->list, &adap->transmit_queue);
+	adap->transmit_queue_sz++;
+	if (!adap->transmitting)
+		wake_up_interruptible(&adap->kthread_waitq);
+
+	/* All done if we don't need to block waiting for completion */
+	if (!block)
+		return 0;
+
+	/*
+	 * If we don't get a completion before this time something is really
+	 * wrong and we time out.
+	 */
+	timeout = CEC_XFER_TIMEOUT_MS;
+	/* Add the requested timeout if we have to wait for a reply as well */
+	if (msg->timeout)
+		timeout += msg->timeout;
+
+	/*
+	 * Release the lock and wait, retake the lock afterwards.
+	 */
+	mutex_unlock(&adap->lock);
+	res = wait_for_completion_killable_timeout(&data->c,
+						   msecs_to_jiffies(timeout));
+	mutex_lock(&adap->lock);
+
+	if (data->completed) {
+		/* The transmit completed (possibly with an error) */
+		*msg = data->msg;
+		kfree(data);
+		return 0;
+	}
+	/*
+	 * The wait for completion timed out or was interrupted, so mark this
+	 * as non-blocking and disconnect from the filehandle since it is
+	 * still 'in flight'. When it finally completes it will just drop the
+	 * result silently.
+	 */
+	data->blocking = false;
+	if (data->fh)
+		list_del(&data->xfer_list);
+	data->fh = NULL;
+
+	if (res == 0) { /* timed out */
+		/* Check if the reply or the transmit failed */
+		if (msg->timeout && (msg->tx_status & CEC_TX_STATUS_OK))
+			msg->rx_status = CEC_RX_STATUS_TIMEOUT;
+		else
+			msg->tx_status = CEC_TX_STATUS_MAX_RETRIES;
+	}
+	return res > 0 ? 0 : res;
+}
+
+/* Helper function to be used by drivers and this framework. */
+int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg,
+		     bool block)
+{
+	int ret;
+
+	mutex_lock(&adap->lock);
+	ret = cec_transmit_msg_fh(adap, msg, NULL, block);
+	mutex_unlock(&adap->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cec_transmit_msg);
+
+/*
+ * I don't like forward references but without this the low-level
+ * cec_received_msg() function would come after a bunch of high-level
+ * CEC protocol handling functions. That was very confusing.
+ */
+static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
+			      bool is_reply);
+
+/* Called by the CEC adapter if a message is received */
+void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg)
+{
+	struct cec_data *data;
+	u8 msg_init = cec_msg_initiator(msg);
+	u8 msg_dest = cec_msg_destination(msg);
+	bool is_reply = false;
+	bool valid_la = true;
+
+	if (WARN_ON(!msg->len || msg->len > CEC_MAX_MSG_SIZE))
+		return;
+
+	msg->rx_ts = ktime_get_ns();
+	msg->rx_status = CEC_RX_STATUS_OK;
+	msg->sequence = msg->reply = msg->timeout = 0;
+	msg->tx_status = 0;
+	msg->tx_ts = 0;
+	msg->flags = 0;
+	memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len);
+
+	mutex_lock(&adap->lock);
+	dprintk(2, "cec_received_msg: %*ph\n", msg->len, msg->msg);
+
+	/* Check if this message was for us (directed or broadcast). */
+	if (!cec_msg_is_broadcast(msg))
+		valid_la = cec_has_log_addr(adap, msg_dest);
+
+	/* It's a valid message and not a poll or CDC message */
+	if (valid_la && msg->len > 1 && msg->msg[1] != CEC_MSG_CDC_MESSAGE) {
+		u8 cmd = msg->msg[1];
+		bool abort = cmd == CEC_MSG_FEATURE_ABORT;
+
+		/* The aborted command is in msg[2] */
+		if (abort)
+			cmd = msg->msg[2];
+
+		/*
+		 * Walk over all transmitted messages that are waiting for a
+		 * reply.
+		 */
+		list_for_each_entry(data, &adap->wait_queue, list) {
+			struct cec_msg *dst = &data->msg;
+
+			/* Does the command match? */
+			if ((abort && cmd != dst->msg[1]) ||
+			    (!abort && cmd != dst->reply))
+				continue;
+
+			/* Does the addressing match? */
+			if (msg_init != cec_msg_destination(dst) &&
+			    !cec_msg_is_broadcast(dst))
+				continue;
+
+			/* We got a reply */
+			memcpy(dst->msg, msg->msg, msg->len);
+			dst->len = msg->len;
+			dst->rx_ts = msg->rx_ts;
+			dst->rx_status = msg->rx_status;
+			if (abort)
+				dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT;
+			/* Remove it from the wait_queue */
+			list_del_init(&data->list);
+
+			/* Cancel the pending timeout work */
+			if (!cancel_delayed_work(&data->work)) {
+				mutex_unlock(&adap->lock);
+				flush_scheduled_work();
+				mutex_lock(&adap->lock);
+			}
+			/*
+			 * Mark this as a reply, provided someone is still
+			 * waiting for the answer.
+			 */
+			if (data->fh)
+				is_reply = true;
+			cec_data_completed(data);
+			break;
+		}
+	}
+	mutex_unlock(&adap->lock);
+
+	/* Pass the message on to any monitoring filehandles */
+	cec_queue_msg_monitor(adap, msg, valid_la);
+
+	/* We're done if it is not for us or a poll message */
+	if (!valid_la || msg->len <= 1)
+		return;
+
+	/*
+	 * Process the message on the protocol level. If is_reply is true,
+	 * then cec_receive_notify() won't pass on the reply to the listener(s)
+	 * since that was already done by cec_data_completed() above.
+	 */
+	cec_receive_notify(adap, msg, is_reply);
+}
+EXPORT_SYMBOL_GPL(cec_received_msg);
+
+/* Logical Address Handling */
+
+/*
+ * Attempt to claim a specific logical address.
+ *
+ * This function is called with adap->lock held.
+ */
+static int cec_config_log_addr(struct cec_adapter *adap,
+			       unsigned int idx,
+			       unsigned int log_addr)
+{
+	struct cec_log_addrs *las = &adap->log_addrs;
+	struct cec_msg msg = { };
+	int err;
+
+	if (cec_has_log_addr(adap, log_addr))
+		return 0;
+
+	/* Send poll message */
+	msg.len = 1;
+	msg.msg[0] = 0xf0 | log_addr;
+	err = cec_transmit_msg_fh(adap, &msg, NULL, true);
+
+	/*
+	 * While trying to poll the physical address was reset
+	 * and the adapter was unconfigured, so bail out.
+	 */
+	if (!adap->is_configuring)
+		return -EINTR;
+
+	if (err)
+		return err;
+
+	if (msg.tx_status & CEC_TX_STATUS_OK)
+		return 0;
+
+	/*
+	 * Message not acknowledged, so this logical
+	 * address is free to use.
+	 */
+	err = adap->ops->adap_log_addr(adap, log_addr);
+	if (err)
+		return err;
+
+	las->log_addr[idx] = log_addr;
+	las->log_addr_mask |= 1 << log_addr;
+	adap->phys_addrs[log_addr] = adap->phys_addr;
+
+	dprintk(2, "claimed addr %d (%d)\n", log_addr,
+		las->primary_device_type[idx]);
+	return 1;
+}
+
+/*
+ * Unconfigure the adapter: clear all logical addresses and send
+ * the state changed event.
+ *
+ * This function is called with adap->lock held.
+ */
+static void cec_adap_unconfigure(struct cec_adapter *adap)
+{
+	WARN_ON(adap->ops->adap_log_addr(adap, CEC_LOG_ADDR_INVALID));
+	adap->log_addrs.log_addr_mask = 0;
+	adap->is_configuring = false;
+	adap->is_configured = false;
+	memset(adap->phys_addrs, 0xff, sizeof(adap->phys_addrs));
+	wake_up_interruptible(&adap->kthread_waitq);
+	cec_post_state_event(adap);
+}
+
+/*
+ * Attempt to claim the required logical addresses.
+ */
+static int cec_config_thread_func(void *arg)
+{
+	/* The various LAs for each type of device */
+	static const u8 tv_log_addrs[] = {
+		CEC_LOG_ADDR_TV, CEC_LOG_ADDR_SPECIFIC,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 record_log_addrs[] = {
+		CEC_LOG_ADDR_RECORD_1, CEC_LOG_ADDR_RECORD_2,
+		CEC_LOG_ADDR_RECORD_3,
+		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 tuner_log_addrs[] = {
+		CEC_LOG_ADDR_TUNER_1, CEC_LOG_ADDR_TUNER_2,
+		CEC_LOG_ADDR_TUNER_3, CEC_LOG_ADDR_TUNER_4,
+		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 playback_log_addrs[] = {
+		CEC_LOG_ADDR_PLAYBACK_1, CEC_LOG_ADDR_PLAYBACK_2,
+		CEC_LOG_ADDR_PLAYBACK_3,
+		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 audiosystem_log_addrs[] = {
+		CEC_LOG_ADDR_AUDIOSYSTEM,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 specific_use_log_addrs[] = {
+		CEC_LOG_ADDR_SPECIFIC,
+		CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2,
+		CEC_LOG_ADDR_INVALID
+	};
+	static const u8 *type2addrs[6] = {
+		[CEC_LOG_ADDR_TYPE_TV] = tv_log_addrs,
+		[CEC_LOG_ADDR_TYPE_RECORD] = record_log_addrs,
+		[CEC_LOG_ADDR_TYPE_TUNER] = tuner_log_addrs,
+		[CEC_LOG_ADDR_TYPE_PLAYBACK] = playback_log_addrs,
+		[CEC_LOG_ADDR_TYPE_AUDIOSYSTEM] = audiosystem_log_addrs,
+		[CEC_LOG_ADDR_TYPE_SPECIFIC] = specific_use_log_addrs,
+	};
+	static const u16 type2mask[] = {
+		[CEC_LOG_ADDR_TYPE_TV] = CEC_LOG_ADDR_MASK_TV,
+		[CEC_LOG_ADDR_TYPE_RECORD] = CEC_LOG_ADDR_MASK_RECORD,
+		[CEC_LOG_ADDR_TYPE_TUNER] = CEC_LOG_ADDR_MASK_TUNER,
+		[CEC_LOG_ADDR_TYPE_PLAYBACK] = CEC_LOG_ADDR_MASK_PLAYBACK,
+		[CEC_LOG_ADDR_TYPE_AUDIOSYSTEM] = CEC_LOG_ADDR_MASK_AUDIOSYSTEM,
+		[CEC_LOG_ADDR_TYPE_SPECIFIC] = CEC_LOG_ADDR_MASK_SPECIFIC,
+	};
+	struct cec_adapter *adap = arg;
+	struct cec_log_addrs *las = &adap->log_addrs;
+	int err;
+	int i, j;
+
+	mutex_lock(&adap->lock);
+	dprintk(1, "physical address: %x.%x.%x.%x, claim %d logical addresses\n",
+		cec_phys_addr_exp(adap->phys_addr), las->num_log_addrs);
+	las->log_addr_mask = 0;
+
+	if (las->log_addr_type[0] == CEC_LOG_ADDR_TYPE_UNREGISTERED)
+		goto configured;
+
+	for (i = 0; i < las->num_log_addrs; i++) {
+		unsigned int type = las->log_addr_type[i];
+		const u8 *la_list;
+		u8 last_la;
+
+		/*
+		 * The TV functionality can only map to physical address 0.
+		 * For any other address, try the Specific functionality
+		 * instead as per the spec.
+		 */
+		if (adap->phys_addr && type == CEC_LOG_ADDR_TYPE_TV)
+			type = CEC_LOG_ADDR_TYPE_SPECIFIC;
+
+		la_list = type2addrs[type];
+		last_la = las->log_addr[i];
+		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
+		if (last_la == CEC_LOG_ADDR_INVALID ||
+		    last_la == CEC_LOG_ADDR_UNREGISTERED ||
+		    !(last_la & type2mask[type]))
+			last_la = la_list[0];
+
+		err = cec_config_log_addr(adap, i, last_la);
+		if (err > 0) /* Reused last LA */
+			continue;
+
+		if (err < 0)
+			goto unconfigure;
+
+		for (j = 0; la_list[j] != CEC_LOG_ADDR_INVALID; j++) {
+			/* Tried this one already, skip it */
+			if (la_list[j] == last_la)
+				continue;
+			/* The backup addresses are CEC 2.0 specific */
+			if ((la_list[j] == CEC_LOG_ADDR_BACKUP_1 ||
+			     la_list[j] == CEC_LOG_ADDR_BACKUP_2) &&
+			    las->cec_version < CEC_OP_CEC_VERSION_2_0)
+				continue;
+
+			err = cec_config_log_addr(adap, i, la_list[j]);
+			if (err == 0) /* LA is in use */
+				continue;
+			if (err < 0)
+				goto unconfigure;
+			/* Done, claimed an LA */
+			break;
+		}
+
+		if (la_list[j] == CEC_LOG_ADDR_INVALID)
+			dprintk(1, "could not claim LA %d\n", i);
+	}
+
+configured:
+	if (adap->log_addrs.log_addr_mask == 0) {
+		/* Fall back to unregistered */
+		las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED;
+		las->log_addr_mask = 1 << las->log_addr[0];
+	}
+	adap->is_configured = true;
+	adap->is_configuring = false;
+	cec_post_state_event(adap);
+	mutex_unlock(&adap->lock);
+
+	for (i = 0; i < las->num_log_addrs; i++) {
+		if (las->log_addr[i] == CEC_LOG_ADDR_INVALID)
+			continue;
+
+		/*
+		 * Report Features must come first according
+		 * to CEC 2.0
+		 */
+		if (las->log_addr[i] != CEC_LOG_ADDR_UNREGISTERED)
+			cec_report_features(adap, i);
+		cec_report_phys_addr(adap, i);
+	}
+	mutex_lock(&adap->lock);
+	adap->kthread_config = NULL;
+	mutex_unlock(&adap->lock);
+	complete(&adap->config_completion);
+	return 0;
+
+unconfigure:
+	for (i = 0; i < las->num_log_addrs; i++)
+		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
+	cec_adap_unconfigure(adap);
+	adap->kthread_config = NULL;
+	mutex_unlock(&adap->lock);
+	complete(&adap->config_completion);
+	return 0;
+}
+
+/*
+ * Called from either __cec_s_phys_addr or __cec_s_log_addrs to claim the
+ * logical addresses.
+ *
+ * This function is called with adap->lock held.
+ */
+static void cec_claim_log_addrs(struct cec_adapter *adap, bool block)
+{
+	if (WARN_ON(adap->is_configuring || adap->is_configured))
+		return;
+
+	init_completion(&adap->config_completion);
+
+	/* Ready to kick off the thread */
+	adap->is_configuring = true;
+	adap->kthread_config = kthread_run(cec_config_thread_func, adap,
+					   "ceccfg-%s", adap->name);
+	if (IS_ERR(adap->kthread_config)) {
+		adap->kthread_config = NULL;
+	} else if (block) {
+		mutex_unlock(&adap->lock);
+		wait_for_completion(&adap->config_completion);
+		mutex_lock(&adap->lock);
+	}
+}
+
+/* Set a new physical address and send an event notifying userspace of this.
+ *
+ * This function is called with adap->lock held.
+ */
+void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block)
+{
+	if (phys_addr == adap->phys_addr || adap->devnode.unregistered)
+		return;
+
+	if (phys_addr == CEC_PHYS_ADDR_INVALID ||
+	    adap->phys_addr != CEC_PHYS_ADDR_INVALID) {
+		adap->phys_addr = CEC_PHYS_ADDR_INVALID;
+		cec_post_state_event(adap);
+		cec_adap_unconfigure(adap);
+		/* Disabling monitor all mode should always succeed */
+		if (adap->monitor_all_cnt)
+			WARN_ON(call_op(adap, adap_monitor_all_enable, false));
+		WARN_ON(adap->ops->adap_enable(adap, false));
+		if (phys_addr == CEC_PHYS_ADDR_INVALID)
+			return;
+	}
+
+	if (adap->ops->adap_enable(adap, true))
+		return;
+
+	if (adap->monitor_all_cnt &&
+	    call_op(adap, adap_monitor_all_enable, true)) {
+		WARN_ON(adap->ops->adap_enable(adap, false));
+		return;
+	}
+	adap->phys_addr = phys_addr;
+	cec_post_state_event(adap);
+	if (adap->log_addrs.num_log_addrs)
+		cec_claim_log_addrs(adap, block);
+}
+
+void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block)
+{
+	if (IS_ERR_OR_NULL(adap))
+		return;
+
+	if (WARN_ON(adap->capabilities & CEC_CAP_PHYS_ADDR))
+		return;
+	mutex_lock(&adap->lock);
+	__cec_s_phys_addr(adap, phys_addr, block);
+	mutex_unlock(&adap->lock);
+}
+EXPORT_SYMBOL_GPL(cec_s_phys_addr);
+
+/*
+ * Called from either the ioctl or a driver to set the logical addresses.
+ *
+ * This function is called with adap->lock held.
+ */
+int __cec_s_log_addrs(struct cec_adapter *adap,
+		      struct cec_log_addrs *log_addrs, bool block)
+{
+	u16 type_mask = 0;
+	int i;
+
+	if (adap->devnode.unregistered)
+		return -ENODEV;
+
+	if (!log_addrs || log_addrs->num_log_addrs == 0) {
+		adap->log_addrs.num_log_addrs = 0;
+		cec_adap_unconfigure(adap);
+		return 0;
+	}
+
+	/* Ensure the osd name is 0-terminated */
+	log_addrs->osd_name[sizeof(log_addrs->osd_name) - 1] = '\0';
+
+	/* Sanity checks */
+	if (log_addrs->num_log_addrs > adap->available_log_addrs) {
+		dprintk(1, "num_log_addrs > %d\n", adap->available_log_addrs);
+		return -EINVAL;
+	}
+
+	/*
+	 * Vendor ID is a 24 bit number, so check if the value is
+	 * within the correct range.
+	 */
+	if (log_addrs->vendor_id != CEC_VENDOR_ID_NONE &&
+	    (log_addrs->vendor_id & 0xff000000) != 0)
+		return -EINVAL;
+
+	if (log_addrs->cec_version != CEC_OP_CEC_VERSION_1_4 &&
+	    log_addrs->cec_version != CEC_OP_CEC_VERSION_2_0)
+		return -EINVAL;
+
+	if (log_addrs->num_log_addrs > 1)
+		for (i = 0; i < log_addrs->num_log_addrs; i++)
+			if (log_addrs->log_addr_type[i] ==
+					CEC_LOG_ADDR_TYPE_UNREGISTERED) {
+				dprintk(1, "num_log_addrs > 1 can't be combined with unregistered LA\n");
+				return -EINVAL;
+			}
+
+	for (i = 0; i < log_addrs->num_log_addrs; i++) {
+		const u8 feature_sz = ARRAY_SIZE(log_addrs->features[0]);
+		u8 *features = log_addrs->features[i];
+		bool op_is_dev_features = false;
+
+		log_addrs->log_addr[i] = CEC_LOG_ADDR_INVALID;
+		if (type_mask & (1 << log_addrs->log_addr_type[i])) {
+			dprintk(1, "duplicate logical address type\n");
+			return -EINVAL;
+		}
+		type_mask |= 1 << log_addrs->log_addr_type[i];
+		if ((type_mask & (1 << CEC_LOG_ADDR_TYPE_RECORD)) &&
+		    (type_mask & (1 << CEC_LOG_ADDR_TYPE_PLAYBACK))) {
+			/* Record already contains the playback functionality */
+			dprintk(1, "invalid record + playback combination\n");
+			return -EINVAL;
+		}
+		if (log_addrs->primary_device_type[i] >
+					CEC_OP_PRIM_DEVTYPE_PROCESSOR) {
+			dprintk(1, "unknown primary device type\n");
+			return -EINVAL;
+		}
+		if (log_addrs->primary_device_type[i] == 2) {
+			dprintk(1, "invalid primary device type\n");
+			return -EINVAL;
+		}
+		if (log_addrs->log_addr_type[i] > CEC_LOG_ADDR_TYPE_UNREGISTERED) {
+			dprintk(1, "unknown logical address type\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < feature_sz; i++) {
+			if ((features[i] & 0x80) == 0) {
+				if (op_is_dev_features)
+					break;
+				op_is_dev_features = true;
+			}
+		}
+		if (!op_is_dev_features || i == feature_sz) {
+			dprintk(1, "malformed features\n");
+			return -EINVAL;
+		}
+		/* Zero unused part of the feature array */
+		memset(features + i, 0, feature_sz - i);
+	}
+
+	if (log_addrs->cec_version >= CEC_OP_CEC_VERSION_2_0) {
+		if (log_addrs->num_log_addrs > 2) {
+			dprintk(1, "CEC 2.0 allows no more than 2 logical addresses\n");
+			return -EINVAL;
+		}
+		if (log_addrs->num_log_addrs == 2) {
+			if (!(type_mask & ((1 << CEC_LOG_ADDR_TYPE_AUDIOSYSTEM) |
+					   (1 << CEC_LOG_ADDR_TYPE_TV)))) {
+				dprintk(1, "Two LAs is only allowed for audiosystem and TV\n");
+				return -EINVAL;
+			}
+			if (!(type_mask & ((1 << CEC_LOG_ADDR_TYPE_PLAYBACK) |
+					   (1 << CEC_LOG_ADDR_TYPE_RECORD)))) {
+				dprintk(1, "An audiosystem/TV can only be combined with record or playback\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* Zero unused LAs */
+	for (i = log_addrs->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) {
+		log_addrs->primary_device_type[i] = 0;
+		log_addrs->log_addr_type[i] = 0;
+		log_addrs->all_device_types[i] = 0;
+		memset(log_addrs->features[i], 0,
+		       sizeof(log_addrs->features[i]));
+	}
+
+	log_addrs->log_addr_mask = adap->log_addrs.log_addr_mask;
+	adap->log_addrs = *log_addrs;
+	if (adap->phys_addr != CEC_PHYS_ADDR_INVALID)
+		cec_claim_log_addrs(adap, block);
+	return 0;
+}
+
+int cec_s_log_addrs(struct cec_adapter *adap,
+		    struct cec_log_addrs *log_addrs, bool block)
+{
+	int err;
+
+	if (WARN_ON(adap->capabilities & CEC_CAP_LOG_ADDRS))
+		return -EINVAL;
+	mutex_lock(&adap->lock);
+	err = __cec_s_log_addrs(adap, log_addrs, block);
+	mutex_unlock(&adap->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(cec_s_log_addrs);
+
+/* High-level core CEC message handling */
+
+/* Transmit the Report Features message */
+static int cec_report_features(struct cec_adapter *adap, unsigned int la_idx)
+{
+	struct cec_msg msg = { };
+	const struct cec_log_addrs *las = &adap->log_addrs;
+	const u8 *features = las->features[la_idx];
+	bool op_is_dev_features = false;
+	unsigned int idx;
+
+	/* This is 2.0 and up only */
+	if (adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0)
+		return 0;
+
+	/* Report Features */
+	msg.msg[0] = (las->log_addr[la_idx] << 4) | 0x0f;
+	msg.len = 4;
+	msg.msg[1] = CEC_MSG_REPORT_FEATURES;
+	msg.msg[2] = adap->log_addrs.cec_version;
+	msg.msg[3] = las->all_device_types[la_idx];
+
+	/* Write RC Profiles first, then Device Features */
+	for (idx = 0; idx < ARRAY_SIZE(las->features[0]); idx++) {
+		msg.msg[msg.len++] = features[idx];
+		if ((features[idx] & CEC_OP_FEAT_EXT) == 0) {
+			if (op_is_dev_features)
+				break;
+			op_is_dev_features = true;
+		}
+	}
+	return cec_transmit_msg(adap, &msg, false);
+}
+
+/* Transmit the Report Physical Address message */
+static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx)
+{
+	const struct cec_log_addrs *las = &adap->log_addrs;
+	struct cec_msg msg = { };
+
+	/* Report Physical Address */
+	msg.msg[0] = (las->log_addr[la_idx] << 4) | 0x0f;
+	cec_msg_report_physical_addr(&msg, adap->phys_addr,
+				     las->primary_device_type[la_idx]);
+	dprintk(2, "config: la %d pa %x.%x.%x.%x\n",
+		las->log_addr[la_idx],
+			cec_phys_addr_exp(adap->phys_addr));
+	return cec_transmit_msg(adap, &msg, false);
+}
+
+/* Transmit the Feature Abort message */
+static int cec_feature_abort_reason(struct cec_adapter *adap,
+				    struct cec_msg *msg, u8 reason)
+{
+	struct cec_msg tx_msg = { };
+
+	/*
+	 * Don't reply with CEC_MSG_FEATURE_ABORT to a CEC_MSG_FEATURE_ABORT
+	 * message!
+	 */
+	if (msg->msg[1] == CEC_MSG_FEATURE_ABORT)
+		return 0;
+	cec_msg_set_reply_to(&tx_msg, msg);
+	cec_msg_feature_abort(&tx_msg, msg->msg[1], reason);
+	return cec_transmit_msg(adap, &tx_msg, false);
+}
+
+static int cec_feature_abort(struct cec_adapter *adap, struct cec_msg *msg)
+{
+	return cec_feature_abort_reason(adap, msg,
+					CEC_OP_ABORT_UNRECOGNIZED_OP);
+}
+
+static int cec_feature_refused(struct cec_adapter *adap, struct cec_msg *msg)
+{
+	return cec_feature_abort_reason(adap, msg,
+					CEC_OP_ABORT_REFUSED);
+}
+
+/*
+ * Called when a CEC message is received. This function will do any
+ * necessary core processing. The is_reply bool is true if this message
+ * is a reply to an earlier transmit.
+ *
+ * The message is either a broadcast message or a valid directed message.
+ */
+static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
+			      bool is_reply)
+{
+	bool is_broadcast = cec_msg_is_broadcast(msg);
+	u8 dest_laddr = cec_msg_destination(msg);
+	u8 init_laddr = cec_msg_initiator(msg);
+	u8 devtype = cec_log_addr2dev(adap, dest_laddr);
+	int la_idx = cec_log_addr2idx(adap, dest_laddr);
+	bool is_directed = la_idx >= 0;
+	bool from_unregistered = init_laddr == 0xf;
+	struct cec_msg tx_cec_msg = { };
+
+	dprintk(1, "cec_receive_notify: %*ph\n", msg->len, msg->msg);
+
+	if (adap->ops->received) {
+		/* Allow drivers to process the message first */
+		if (adap->ops->received(adap, msg) != -ENOMSG)
+			return 0;
+	}
+
+	/*
+	 * REPORT_PHYSICAL_ADDR, CEC_MSG_USER_CONTROL_PRESSED and
+	 * CEC_MSG_USER_CONTROL_RELEASED messages always have to be
+	 * handled by the CEC core, even if the passthrough mode is on.
+	 * The others are just ignored if passthrough mode is on.
+	 */
+	switch (msg->msg[1]) {
+	case CEC_MSG_GET_CEC_VERSION:
+	case CEC_MSG_GIVE_DEVICE_VENDOR_ID:
+	case CEC_MSG_ABORT:
+	case CEC_MSG_GIVE_DEVICE_POWER_STATUS:
+	case CEC_MSG_GIVE_PHYSICAL_ADDR:
+	case CEC_MSG_GIVE_OSD_NAME:
+	case CEC_MSG_GIVE_FEATURES:
+		/*
+		 * Skip processing these messages if the passthrough mode
+		 * is on.
+		 */
+		if (adap->passthrough)
+			goto skip_processing;
+		/* Ignore if addressing is wrong */
+		if (is_broadcast || from_unregistered)
+			return 0;
+		break;
+
+	case CEC_MSG_USER_CONTROL_PRESSED:
+	case CEC_MSG_USER_CONTROL_RELEASED:
+		/* Wrong addressing mode: don't process */
+		if (is_broadcast || from_unregistered)
+			goto skip_processing;
+		break;
+
+	case CEC_MSG_REPORT_PHYSICAL_ADDR:
+		/*
+		 * This message is always processed, regardless of the
+		 * passthrough setting.
+		 *
+		 * Exception: don't process if wrong addressing mode.
+		 */
+		if (!is_broadcast)
+			goto skip_processing;
+		break;
+
+	default:
+		break;
+	}
+
+	cec_msg_set_reply_to(&tx_cec_msg, msg);
+
+	switch (msg->msg[1]) {
+	/* The following messages are processed but still passed through */
+	case CEC_MSG_REPORT_PHYSICAL_ADDR: {
+		u16 pa = (msg->msg[2] << 8) | msg->msg[3];
+
+		if (!from_unregistered)
+			adap->phys_addrs[init_laddr] = pa;
+		dprintk(1, "Reported physical address %x.%x.%x.%x for logical address %d\n",
+			cec_phys_addr_exp(pa), init_laddr);
+		break;
+	}
+
+	case CEC_MSG_USER_CONTROL_PRESSED:
+		if (!(adap->capabilities & CEC_CAP_RC))
+			break;
+
+#if IS_REACHABLE(CONFIG_RC_CORE)
+		switch (msg->msg[2]) {
+		/*
+		 * Play function, this message can have variable length
+		 * depending on the specific play function that is used.
+		 */
+		case 0x60:
+			if (msg->len == 2)
+				rc_keydown(adap->rc, RC_TYPE_CEC,
+					   msg->msg[2], 0);
+			else
+				rc_keydown(adap->rc, RC_TYPE_CEC,
+					   msg->msg[2] << 8 | msg->msg[3], 0);
+			break;
+		/*
+		 * Other function messages that are not handled.
+		 * Currently the RC framework does not allow to supply an
+		 * additional parameter to a keypress. These "keys" contain
+		 * other information such as channel number, an input number
+		 * etc.
+		 * For the time being these messages are not processed by the
+		 * framework and are simply forwarded to the user space.
+		 */
+		case 0x56: case 0x57:
+		case 0x67: case 0x68: case 0x69: case 0x6a:
+			break;
+		default:
+			rc_keydown(adap->rc, RC_TYPE_CEC, msg->msg[2], 0);
+			break;
+		}
+#endif
+		break;
+
+	case CEC_MSG_USER_CONTROL_RELEASED:
+		if (!(adap->capabilities & CEC_CAP_RC))
+			break;
+#if IS_REACHABLE(CONFIG_RC_CORE)
+		rc_keyup(adap->rc);
+#endif
+		break;
+
+	/*
+	 * The remaining messages are only processed if the passthrough mode
+	 * is off.
+	 */
+	case CEC_MSG_GET_CEC_VERSION:
+		cec_msg_cec_version(&tx_cec_msg, adap->log_addrs.cec_version);
+		return cec_transmit_msg(adap, &tx_cec_msg, false);
+
+	case CEC_MSG_GIVE_PHYSICAL_ADDR:
+		/* Do nothing for CEC switches using addr 15 */
+		if (devtype == CEC_OP_PRIM_DEVTYPE_SWITCH && dest_laddr == 15)
+			return 0;
+		cec_msg_report_physical_addr(&tx_cec_msg, adap->phys_addr, devtype);
+		return cec_transmit_msg(adap, &tx_cec_msg, false);
+
+	case CEC_MSG_GIVE_DEVICE_VENDOR_ID:
+		if (adap->log_addrs.vendor_id == CEC_VENDOR_ID_NONE)
+			return cec_feature_abort(adap, msg);
+		cec_msg_device_vendor_id(&tx_cec_msg, adap->log_addrs.vendor_id);
+		return cec_transmit_msg(adap, &tx_cec_msg, false);
+
+	case CEC_MSG_ABORT:
+		/* Do nothing for CEC switches */
+		if (devtype == CEC_OP_PRIM_DEVTYPE_SWITCH)
+			return 0;
+		return cec_feature_refused(adap, msg);
+
+	case CEC_MSG_GIVE_OSD_NAME: {
+		if (adap->log_addrs.osd_name[0] == 0)
+			return cec_feature_abort(adap, msg);
+		cec_msg_set_osd_name(&tx_cec_msg, adap->log_addrs.osd_name);
+		return cec_transmit_msg(adap, &tx_cec_msg, false);
+	}
+
+	case CEC_MSG_GIVE_FEATURES:
+		if (adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0)
+			return cec_report_features(adap, la_idx);
+		return 0;
+
+	default:
+		/*
+		 * Unprocessed messages are aborted if userspace isn't doing
+		 * any processing either.
+		 */
+		if (is_directed && !is_reply && !adap->follower_cnt &&
+		    !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT)
+			return cec_feature_abort(adap, msg);
+		break;
+	}
+
+skip_processing:
+	/* If this was a reply, then we're done */
+	if (is_reply)
+		return 0;
+
+	/*
+	 * Send to the exclusive follower if there is one, otherwise send
+	 * to all followers.
+	 */
+	if (adap->cec_follower)
+		cec_queue_msg_fh(adap->cec_follower, msg);
+	else
+		cec_queue_msg_followers(adap, msg);
+	return 0;
+}
+
+/*
+ * Helper functions to keep track of the 'monitor all' use count.
+ *
+ * These functions are called with adap->lock held.
+ */
+int cec_monitor_all_cnt_inc(struct cec_adapter *adap)
+{
+	int ret = 0;
+
+	if (adap->monitor_all_cnt == 0)
+		ret = call_op(adap, adap_monitor_all_enable, 1);
+	if (ret == 0)
+		adap->monitor_all_cnt++;
+	return ret;
+}
+
+void cec_monitor_all_cnt_dec(struct cec_adapter *adap)
+{
+	adap->monitor_all_cnt--;
+	if (adap->monitor_all_cnt == 0)
+		WARN_ON(call_op(adap, adap_monitor_all_enable, 0));
+}
+
+#ifdef CONFIG_MEDIA_CEC_DEBUG
+/*
+ * Log the current state of the CEC adapter.
+ * Very useful for debugging.
+ */
+int cec_adap_status(struct seq_file *file, void *priv)
+{
+	struct cec_adapter *adap = dev_get_drvdata(file->private);
+	struct cec_data *data;
+
+	mutex_lock(&adap->lock);
+	seq_printf(file, "configured: %d\n", adap->is_configured);
+	seq_printf(file, "configuring: %d\n", adap->is_configuring);
+	seq_printf(file, "phys_addr: %x.%x.%x.%x\n",
+		   cec_phys_addr_exp(adap->phys_addr));
+	seq_printf(file, "number of LAs: %d\n", adap->log_addrs.num_log_addrs);
+	seq_printf(file, "LA mask: 0x%04x\n", adap->log_addrs.log_addr_mask);
+	if (adap->cec_follower)
+		seq_printf(file, "has CEC follower%s\n",
+			   adap->passthrough ? " (in passthrough mode)" : "");
+	if (adap->cec_initiator)
+		seq_puts(file, "has CEC initiator\n");
+	if (adap->monitor_all_cnt)
+		seq_printf(file, "file handles in Monitor All mode: %u\n",
+			   adap->monitor_all_cnt);
+	data = adap->transmitting;
+	if (data)
+		seq_printf(file, "transmitting message: %*ph (reply: %02x, timeout: %ums)\n",
+			   data->msg.len, data->msg.msg, data->msg.reply,
+			   data->msg.timeout);
+	seq_printf(file, "pending transmits: %u\n", adap->transmit_queue_sz);
+	list_for_each_entry(data, &adap->transmit_queue, list) {
+		seq_printf(file, "queued tx message: %*ph (reply: %02x, timeout: %ums)\n",
+			   data->msg.len, data->msg.msg, data->msg.reply,
+			   data->msg.timeout);
+	}
+	list_for_each_entry(data, &adap->wait_queue, list) {
+		seq_printf(file, "message waiting for reply: %*ph (reply: %02x, timeout: %ums)\n",
+			   data->msg.len, data->msg.msg, data->msg.reply,
+			   data->msg.timeout);
+	}
+
+	call_void_op(adap, adap_status, file);
+	mutex_unlock(&adap->lock);
+	return 0;
+}
+#endif
diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c
new file mode 100644
index 000000000000..7be7615a0fdf
--- /dev/null
+++ b/drivers/staging/media/cec/cec-api.c
@@ -0,0 +1,579 @@
+/*
+ * cec-api.c - HDMI Consumer Electronics Control framework - API
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+
+#include "cec-priv.h"
+
+static inline struct cec_devnode *cec_devnode_data(struct file *filp)
+{
+	struct cec_fh *fh = filp->private_data;
+
+	return &fh->adap->devnode;
+}
+
+/* CEC file operations */
+
+static unsigned int cec_poll(struct file *filp,
+			     struct poll_table_struct *poll)
+{
+	struct cec_devnode *devnode = cec_devnode_data(filp);
+	struct cec_fh *fh = filp->private_data;
+	struct cec_adapter *adap = fh->adap;
+	unsigned int res = 0;
+
+	if (!devnode->registered)
+		return POLLERR | POLLHUP;
+	mutex_lock(&adap->lock);
+	if (adap->is_configured &&
+	    adap->transmit_queue_sz < CEC_MAX_MSG_TX_QUEUE_SZ)
+		res |= POLLOUT | POLLWRNORM;
+	if (fh->queued_msgs)
+		res |= POLLIN | POLLRDNORM;
+	if (fh->pending_events)
+		res |= POLLPRI;
+	poll_wait(filp, &fh->wait, poll);
+	mutex_unlock(&adap->lock);
+	return res;
+}
+
+static bool cec_is_busy(const struct cec_adapter *adap,
+			const struct cec_fh *fh)
+{
+	bool valid_initiator = adap->cec_initiator && adap->cec_initiator == fh;
+	bool valid_follower = adap->cec_follower && adap->cec_follower == fh;
+
+	/*
+	 * Exclusive initiators and followers can always access the CEC adapter
+	 */
+	if (valid_initiator || valid_follower)
+		return false;
+	/*
+	 * All others can only access the CEC adapter if there is no
+	 * exclusive initiator and they are in INITIATOR mode.
+	 */
+	return adap->cec_initiator ||
+	       fh->mode_initiator == CEC_MODE_NO_INITIATOR;
+}
+
+static long cec_adap_g_caps(struct cec_adapter *adap,
+			    struct cec_caps __user *parg)
+{
+	struct cec_caps caps = {};
+
+	strlcpy(caps.driver, adap->devnode.parent->driver->name,
+		sizeof(caps.driver));
+	strlcpy(caps.name, adap->name, sizeof(caps.name));
+	caps.available_log_addrs = adap->available_log_addrs;
+	caps.capabilities = adap->capabilities;
+	caps.version = LINUX_VERSION_CODE;
+	if (copy_to_user(parg, &caps, sizeof(caps)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_adap_g_phys_addr(struct cec_adapter *adap,
+				 __u16 __user *parg)
+{
+	u16 phys_addr;
+
+	mutex_lock(&adap->lock);
+	phys_addr = adap->phys_addr;
+	mutex_unlock(&adap->lock);
+	if (copy_to_user(parg, &phys_addr, sizeof(phys_addr)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_adap_s_phys_addr(struct cec_adapter *adap, struct cec_fh *fh,
+				 bool block, __u16 __user *parg)
+{
+	u16 phys_addr;
+	long err;
+
+	if (!(adap->capabilities & CEC_CAP_PHYS_ADDR))
+		return -ENOTTY;
+	if (copy_from_user(&phys_addr, parg, sizeof(phys_addr)))
+		return -EFAULT;
+
+	err = cec_phys_addr_validate(phys_addr, NULL, NULL);
+	if (err)
+		return err;
+	mutex_lock(&adap->lock);
+	if (cec_is_busy(adap, fh))
+		err = -EBUSY;
+	else
+		__cec_s_phys_addr(adap, phys_addr, block);
+	mutex_unlock(&adap->lock);
+	return err;
+}
+
+static long cec_adap_g_log_addrs(struct cec_adapter *adap,
+				 struct cec_log_addrs __user *parg)
+{
+	struct cec_log_addrs log_addrs;
+
+	mutex_lock(&adap->lock);
+	log_addrs = adap->log_addrs;
+	if (!adap->is_configured)
+		memset(log_addrs.log_addr, CEC_LOG_ADDR_INVALID,
+		       sizeof(log_addrs.log_addr));
+	mutex_unlock(&adap->lock);
+
+	if (copy_to_user(parg, &log_addrs, sizeof(log_addrs)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh,
+				 bool block, struct cec_log_addrs __user *parg)
+{
+	struct cec_log_addrs log_addrs;
+	long err = -EBUSY;
+
+	if (!(adap->capabilities & CEC_CAP_LOG_ADDRS))
+		return -ENOTTY;
+	if (copy_from_user(&log_addrs, parg, sizeof(log_addrs)))
+		return -EFAULT;
+	log_addrs.flags = 0;
+	mutex_lock(&adap->lock);
+	if (!adap->is_configuring &&
+	    (!log_addrs.num_log_addrs || !adap->is_configured) &&
+	    !cec_is_busy(adap, fh)) {
+		err = __cec_s_log_addrs(adap, &log_addrs, block);
+		if (!err)
+			log_addrs = adap->log_addrs;
+	}
+	mutex_unlock(&adap->lock);
+	if (err)
+		return err;
+	if (copy_to_user(parg, &log_addrs, sizeof(log_addrs)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_transmit(struct cec_adapter *adap, struct cec_fh *fh,
+			 bool block, struct cec_msg __user *parg)
+{
+	struct cec_msg msg = {};
+	long err = 0;
+
+	if (!(adap->capabilities & CEC_CAP_TRANSMIT))
+		return -ENOTTY;
+	if (copy_from_user(&msg, parg, sizeof(msg)))
+		return -EFAULT;
+	mutex_lock(&adap->lock);
+	if (!adap->is_configured)
+		err = -ENONET;
+	else if (cec_is_busy(adap, fh))
+		err = -EBUSY;
+	else
+		err = cec_transmit_msg_fh(adap, &msg, fh, block);
+	mutex_unlock(&adap->lock);
+	if (err)
+		return err;
+	if (copy_to_user(parg, &msg, sizeof(msg)))
+		return -EFAULT;
+	return 0;
+}
+
+/* Called by CEC_RECEIVE: wait for a message to arrive */
+static int cec_receive_msg(struct cec_fh *fh, struct cec_msg *msg, bool block)
+{
+	u32 timeout = msg->timeout;
+	int res;
+
+	do {
+		mutex_lock(&fh->lock);
+		/* Are there received messages queued up? */
+		if (fh->queued_msgs) {
+			/* Yes, return the first one */
+			struct cec_msg_entry *entry =
+				list_first_entry(&fh->msgs,
+						 struct cec_msg_entry, list);
+
+			list_del(&entry->list);
+			*msg = entry->msg;
+			kfree(entry);
+			fh->queued_msgs--;
+			mutex_unlock(&fh->lock);
+			/* restore original timeout value */
+			msg->timeout = timeout;
+			return 0;
+		}
+
+		/* No, return EAGAIN in non-blocking mode or wait */
+		mutex_unlock(&fh->lock);
+
+		/* Return when in non-blocking mode */
+		if (!block)
+			return -EAGAIN;
+
+		if (msg->timeout) {
+			/* The user specified a timeout */
+			res = wait_event_interruptible_timeout(fh->wait,
+							       fh->queued_msgs,
+				msecs_to_jiffies(msg->timeout));
+			if (res == 0)
+				res = -ETIMEDOUT;
+			else if (res > 0)
+				res = 0;
+		} else {
+			/* Wait indefinitely */
+			res = wait_event_interruptible(fh->wait,
+						       fh->queued_msgs);
+		}
+		/* Exit on error, otherwise loop to get the new message */
+	} while (!res);
+	return res;
+}
+
+static long cec_receive(struct cec_adapter *adap, struct cec_fh *fh,
+			bool block, struct cec_msg __user *parg)
+{
+	struct cec_msg msg = {};
+	long err = 0;
+
+	if (copy_from_user(&msg, parg, sizeof(msg)))
+		return -EFAULT;
+	mutex_lock(&adap->lock);
+	if (!adap->is_configured && fh->mode_follower < CEC_MODE_MONITOR)
+		err = -ENONET;
+	mutex_unlock(&adap->lock);
+	if (err)
+		return err;
+
+	err = cec_receive_msg(fh, &msg, block);
+	if (err)
+		return err;
+	if (copy_to_user(parg, &msg, sizeof(msg)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_dqevent(struct cec_adapter *adap, struct cec_fh *fh,
+			bool block, struct cec_event __user *parg)
+{
+	struct cec_event *ev = NULL;
+	u64 ts = ~0ULL;
+	unsigned int i;
+	long err = 0;
+
+	mutex_lock(&fh->lock);
+	while (!fh->pending_events && block) {
+		mutex_unlock(&fh->lock);
+		err = wait_event_interruptible(fh->wait, fh->pending_events);
+		if (err)
+			return err;
+		mutex_lock(&fh->lock);
+	}
+
+	/* Find the oldest event */
+	for (i = 0; i < CEC_NUM_EVENTS; i++) {
+		if (fh->pending_events & (1 << (i + 1)) &&
+		    fh->events[i].ts <= ts) {
+			ev = &fh->events[i];
+			ts = ev->ts;
+		}
+	}
+	if (!ev) {
+		err = -EAGAIN;
+		goto unlock;
+	}
+
+	if (copy_to_user(parg, ev, sizeof(*ev))) {
+		err = -EFAULT;
+		goto unlock;
+	}
+
+	fh->pending_events &= ~(1 << ev->event);
+
+unlock:
+	mutex_unlock(&fh->lock);
+	return err;
+}
+
+static long cec_g_mode(struct cec_adapter *adap, struct cec_fh *fh,
+		       u32 __user *parg)
+{
+	u32 mode = fh->mode_initiator | fh->mode_follower;
+
+	if (copy_to_user(parg, &mode, sizeof(mode)))
+		return -EFAULT;
+	return 0;
+}
+
+static long cec_s_mode(struct cec_adapter *adap, struct cec_fh *fh,
+		       u32 __user *parg)
+{
+	u32 mode;
+	u8 mode_initiator;
+	u8 mode_follower;
+	long err = 0;
+
+	if (copy_from_user(&mode, parg, sizeof(mode)))
+		return -EFAULT;
+	if (mode & ~(CEC_MODE_INITIATOR_MSK | CEC_MODE_FOLLOWER_MSK))
+		return -EINVAL;
+
+	mode_initiator = mode & CEC_MODE_INITIATOR_MSK;
+	mode_follower = mode & CEC_MODE_FOLLOWER_MSK;
+
+	if (mode_initiator > CEC_MODE_EXCL_INITIATOR ||
+	    mode_follower > CEC_MODE_MONITOR_ALL)
+		return -EINVAL;
+
+	if (mode_follower == CEC_MODE_MONITOR_ALL &&
+	    !(adap->capabilities & CEC_CAP_MONITOR_ALL))
+		return -EINVAL;
+
+	/* Follower modes should always be able to send CEC messages */
+	if ((mode_initiator == CEC_MODE_NO_INITIATOR ||
+	     !(adap->capabilities & CEC_CAP_TRANSMIT)) &&
+	    mode_follower >= CEC_MODE_FOLLOWER &&
+	    mode_follower <= CEC_MODE_EXCL_FOLLOWER_PASSTHRU)
+		return -EINVAL;
+
+	/* Monitor modes require CEC_MODE_NO_INITIATOR */
+	if (mode_initiator && mode_follower >= CEC_MODE_MONITOR)
+		return -EINVAL;
+
+	/* Monitor modes require CAP_NET_ADMIN */
+	if (mode_follower >= CEC_MODE_MONITOR && !capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&adap->lock);
+	/*
+	 * You can't become exclusive follower if someone else already
+	 * has that job.
+	 */
+	if ((mode_follower == CEC_MODE_EXCL_FOLLOWER ||
+	     mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU) &&
+	    adap->cec_follower && adap->cec_follower != fh)
+		err = -EBUSY;
+	/*
+	 * You can't become exclusive initiator if someone else already
+	 * has that job.
+	 */
+	if (mode_initiator == CEC_MODE_EXCL_INITIATOR &&
+	    adap->cec_initiator && adap->cec_initiator != fh)
+		err = -EBUSY;
+
+	if (!err) {
+		bool old_mon_all = fh->mode_follower == CEC_MODE_MONITOR_ALL;
+		bool new_mon_all = mode_follower == CEC_MODE_MONITOR_ALL;
+
+		if (old_mon_all != new_mon_all) {
+			if (new_mon_all)
+				err = cec_monitor_all_cnt_inc(adap);
+			else
+				cec_monitor_all_cnt_dec(adap);
+		}
+	}
+
+	if (err) {
+		mutex_unlock(&adap->lock);
+		return err;
+	}
+
+	if (fh->mode_follower == CEC_MODE_FOLLOWER)
+		adap->follower_cnt--;
+	if (mode_follower == CEC_MODE_FOLLOWER)
+		adap->follower_cnt++;
+	if (mode_follower == CEC_MODE_EXCL_FOLLOWER ||
+	    mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU) {
+		adap->passthrough =
+			mode_follower == CEC_MODE_EXCL_FOLLOWER_PASSTHRU;
+		adap->cec_follower = fh;
+	} else if (adap->cec_follower == fh) {
+		adap->passthrough = false;
+		adap->cec_follower = NULL;
+	}
+	if (mode_initiator == CEC_MODE_EXCL_INITIATOR)
+		adap->cec_initiator = fh;
+	else if (adap->cec_initiator == fh)
+		adap->cec_initiator = NULL;
+	fh->mode_initiator = mode_initiator;
+	fh->mode_follower = mode_follower;
+	mutex_unlock(&adap->lock);
+	return 0;
+}
+
+static long cec_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct cec_devnode *devnode = cec_devnode_data(filp);
+	struct cec_fh *fh = filp->private_data;
+	struct cec_adapter *adap = fh->adap;
+	bool block = !(filp->f_flags & O_NONBLOCK);
+	void __user *parg = (void __user *)arg;
+
+	if (!devnode->registered)
+		return -EIO;
+
+	switch (cmd) {
+	case CEC_ADAP_G_CAPS:
+		return cec_adap_g_caps(adap, parg);
+
+	case CEC_ADAP_G_PHYS_ADDR:
+		return cec_adap_g_phys_addr(adap, parg);
+
+	case CEC_ADAP_S_PHYS_ADDR:
+		return cec_adap_s_phys_addr(adap, fh, block, parg);
+
+	case CEC_ADAP_G_LOG_ADDRS:
+		return cec_adap_g_log_addrs(adap, parg);
+
+	case CEC_ADAP_S_LOG_ADDRS:
+		return cec_adap_s_log_addrs(adap, fh, block, parg);
+
+	case CEC_TRANSMIT:
+		return cec_transmit(adap, fh, block, parg);
+
+	case CEC_RECEIVE:
+		return cec_receive(adap, fh, block, parg);
+
+	case CEC_DQEVENT:
+		return cec_dqevent(adap, fh, block, parg);
+
+	case CEC_G_MODE:
+		return cec_g_mode(adap, fh, parg);
+
+	case CEC_S_MODE:
+		return cec_s_mode(adap, fh, parg);
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+static int cec_open(struct inode *inode, struct file *filp)
+{
+	struct cec_devnode *devnode =
+		container_of(inode->i_cdev, struct cec_devnode, cdev);
+	struct cec_adapter *adap = to_cec_adapter(devnode);
+	struct cec_fh *fh = kzalloc(sizeof(*fh), GFP_KERNEL);
+	/*
+	 * Initial events that are automatically sent when the cec device is
+	 * opened.
+	 */
+	struct cec_event ev_state = {
+		.event = CEC_EVENT_STATE_CHANGE,
+		.flags = CEC_EVENT_FL_INITIAL_STATE,
+	};
+	int err;
+
+	if (!fh)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&fh->msgs);
+	INIT_LIST_HEAD(&fh->xfer_list);
+	mutex_init(&fh->lock);
+	init_waitqueue_head(&fh->wait);
+
+	fh->mode_initiator = CEC_MODE_INITIATOR;
+	fh->adap = adap;
+
+	err = cec_get_device(devnode);
+	if (err) {
+		kfree(fh);
+		return err;
+	}
+
+	filp->private_data = fh;
+
+	mutex_lock(&devnode->fhs_lock);
+	/* Queue up initial state events */
+	ev_state.state_change.phys_addr = adap->phys_addr;
+	ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
+	cec_queue_event_fh(fh, &ev_state, 0);
+
+	list_add(&fh->list, &devnode->fhs);
+	mutex_unlock(&devnode->fhs_lock);
+
+	return 0;
+}
+
+/* Override for the release function */
+static int cec_release(struct inode *inode, struct file *filp)
+{
+	struct cec_devnode *devnode = cec_devnode_data(filp);
+	struct cec_adapter *adap = to_cec_adapter(devnode);
+	struct cec_fh *fh = filp->private_data;
+
+	mutex_lock(&adap->lock);
+	if (adap->cec_initiator == fh)
+		adap->cec_initiator = NULL;
+	if (adap->cec_follower == fh) {
+		adap->cec_follower = NULL;
+		adap->passthrough = false;
+	}
+	if (fh->mode_follower == CEC_MODE_FOLLOWER)
+		adap->follower_cnt--;
+	if (fh->mode_follower == CEC_MODE_MONITOR_ALL)
+		cec_monitor_all_cnt_dec(adap);
+	mutex_unlock(&adap->lock);
+
+	mutex_lock(&devnode->fhs_lock);
+	list_del(&fh->list);
+	mutex_unlock(&devnode->fhs_lock);
+
+	/* Unhook pending transmits from this filehandle. */
+	mutex_lock(&adap->lock);
+	while (!list_empty(&fh->xfer_list)) {
+		struct cec_data *data =
+			list_first_entry(&fh->xfer_list, struct cec_data, xfer_list);
+
+		data->blocking = false;
+		data->fh = NULL;
+		list_del(&data->xfer_list);
+	}
+	mutex_unlock(&adap->lock);
+	while (!list_empty(&fh->msgs)) {
+		struct cec_msg_entry *entry =
+			list_first_entry(&fh->msgs, struct cec_msg_entry, list);
+
+		list_del(&entry->list);
+		kfree(entry);
+	}
+	kfree(fh);
+
+	cec_put_device(devnode);
+	filp->private_data = NULL;
+	return 0;
+}
+
+const struct file_operations cec_devnode_fops = {
+	.owner = THIS_MODULE,
+	.open = cec_open,
+	.unlocked_ioctl = cec_ioctl,
+	.release = cec_release,
+	.poll = cec_poll,
+	.llseek = no_llseek,
+};
diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c
new file mode 100644
index 000000000000..112a5fae12f5
--- /dev/null
+++ b/drivers/staging/media/cec/cec-core.c
@@ -0,0 +1,409 @@
+/*
+ * cec-core.c - HDMI Consumer Electronics Control framework - Core
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "cec-priv.h"
+
+#define CEC_NUM_DEVICES	256
+#define CEC_NAME	"cec"
+
+int cec_debug;
+module_param_named(debug, cec_debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug level (0-2)");
+
+static dev_t cec_dev_t;
+
+/* Active devices */
+static DEFINE_MUTEX(cec_devnode_lock);
+static DECLARE_BITMAP(cec_devnode_nums, CEC_NUM_DEVICES);
+
+static struct dentry *top_cec_dir;
+
+/* dev to cec_devnode */
+#define to_cec_devnode(cd) container_of(cd, struct cec_devnode, dev)
+
+int cec_get_device(struct cec_devnode *devnode)
+{
+	/*
+	 * Check if the cec device is available. This needs to be done with
+	 * the cec_devnode_lock held to prevent an open/unregister race:
+	 * without the lock, the device could be unregistered and freed between
+	 * the devnode->registered check and get_device() calls, leading to
+	 * a crash.
+	 */
+	mutex_lock(&cec_devnode_lock);
+	/*
+	 * return ENXIO if the cec device has been removed
+	 * already or if it is not registered anymore.
+	 */
+	if (!devnode->registered) {
+		mutex_unlock(&cec_devnode_lock);
+		return -ENXIO;
+	}
+	/* and increase the device refcount */
+	get_device(&devnode->dev);
+	mutex_unlock(&cec_devnode_lock);
+	return 0;
+}
+
+void cec_put_device(struct cec_devnode *devnode)
+{
+	mutex_lock(&cec_devnode_lock);
+	put_device(&devnode->dev);
+	mutex_unlock(&cec_devnode_lock);
+}
+
+/* Called when the last user of the cec device exits. */
+static void cec_devnode_release(struct device *cd)
+{
+	struct cec_devnode *devnode = to_cec_devnode(cd);
+
+	mutex_lock(&cec_devnode_lock);
+
+	/* Mark device node number as free */
+	clear_bit(devnode->minor, cec_devnode_nums);
+
+	mutex_unlock(&cec_devnode_lock);
+	cec_delete_adapter(to_cec_adapter(devnode));
+}
+
+static struct bus_type cec_bus_type = {
+	.name = CEC_NAME,
+};
+
+/*
+ * Register a cec device node
+ *
+ * The registration code assigns minor numbers and registers the new device node
+ * with the kernel. An error is returned if no free minor number can be found,
+ * or if the registration of the device node fails.
+ *
+ * Zero is returned on success.
+ *
+ * Note that if the cec_devnode_register call fails, the release() callback of
+ * the cec_devnode structure is *not* called, so the caller is responsible for
+ * freeing any data.
+ */
+static int __must_check cec_devnode_register(struct cec_devnode *devnode,
+					     struct module *owner)
+{
+	int minor;
+	int ret;
+
+	/* Initialization */
+	INIT_LIST_HEAD(&devnode->fhs);
+	mutex_init(&devnode->fhs_lock);
+
+	/* Part 1: Find a free minor number */
+	mutex_lock(&cec_devnode_lock);
+	minor = find_next_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES, 0);
+	if (minor == CEC_NUM_DEVICES) {
+		mutex_unlock(&cec_devnode_lock);
+		pr_err("could not get a free minor\n");
+		return -ENFILE;
+	}
+
+	set_bit(minor, cec_devnode_nums);
+	mutex_unlock(&cec_devnode_lock);
+
+	devnode->minor = minor;
+	devnode->dev.bus = &cec_bus_type;
+	devnode->dev.devt = MKDEV(MAJOR(cec_dev_t), minor);
+	devnode->dev.release = cec_devnode_release;
+	devnode->dev.parent = devnode->parent;
+	dev_set_name(&devnode->dev, "cec%d", devnode->minor);
+	device_initialize(&devnode->dev);
+
+	/* Part 2: Initialize and register the character device */
+	cdev_init(&devnode->cdev, &cec_devnode_fops);
+	devnode->cdev.kobj.parent = &devnode->dev.kobj;
+	devnode->cdev.owner = owner;
+
+	ret = cdev_add(&devnode->cdev, devnode->dev.devt, 1);
+	if (ret < 0) {
+		pr_err("%s: cdev_add failed\n", __func__);
+		goto clr_bit;
+	}
+
+	ret = device_add(&devnode->dev);
+	if (ret)
+		goto cdev_del;
+
+	devnode->registered = true;
+	return 0;
+
+cdev_del:
+	cdev_del(&devnode->cdev);
+clr_bit:
+	clear_bit(devnode->minor, cec_devnode_nums);
+	return ret;
+}
+
+/*
+ * Unregister a cec device node
+ *
+ * This unregisters the passed device. Future open calls will be met with
+ * errors.
+ *
+ * This function can safely be called if the device node has never been
+ * registered or has already been unregistered.
+ */
+static void cec_devnode_unregister(struct cec_devnode *devnode)
+{
+	struct cec_fh *fh;
+
+	/* Check if devnode was never registered or already unregistered */
+	if (!devnode->registered || devnode->unregistered)
+		return;
+
+	mutex_lock(&devnode->fhs_lock);
+	list_for_each_entry(fh, &devnode->fhs, list)
+		wake_up_interruptible(&fh->wait);
+	mutex_unlock(&devnode->fhs_lock);
+
+	devnode->registered = false;
+	devnode->unregistered = true;
+	device_del(&devnode->dev);
+	cdev_del(&devnode->cdev);
+	put_device(&devnode->dev);
+}
+
+struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
+					 void *priv, const char *name, u32 caps,
+					 u8 available_las, struct device *parent)
+{
+	struct cec_adapter *adap;
+	int res;
+
+	if (WARN_ON(!parent))
+		return ERR_PTR(-EINVAL);
+	if (WARN_ON(!caps))
+		return ERR_PTR(-EINVAL);
+	if (WARN_ON(!ops))
+		return ERR_PTR(-EINVAL);
+	if (WARN_ON(!available_las || available_las > CEC_MAX_LOG_ADDRS))
+		return ERR_PTR(-EINVAL);
+	adap = kzalloc(sizeof(*adap), GFP_KERNEL);
+	if (!adap)
+		return ERR_PTR(-ENOMEM);
+	adap->owner = parent->driver->owner;
+	adap->devnode.parent = parent;
+	strlcpy(adap->name, name, sizeof(adap->name));
+	adap->phys_addr = CEC_PHYS_ADDR_INVALID;
+	adap->log_addrs.cec_version = CEC_OP_CEC_VERSION_2_0;
+	adap->log_addrs.vendor_id = CEC_VENDOR_ID_NONE;
+	adap->capabilities = caps;
+	adap->available_log_addrs = available_las;
+	adap->sequence = 0;
+	adap->ops = ops;
+	adap->priv = priv;
+	memset(adap->phys_addrs, 0xff, sizeof(adap->phys_addrs));
+	mutex_init(&adap->lock);
+	INIT_LIST_HEAD(&adap->transmit_queue);
+	INIT_LIST_HEAD(&adap->wait_queue);
+	init_waitqueue_head(&adap->kthread_waitq);
+
+	adap->kthread = kthread_run(cec_thread_func, adap, "cec-%s", name);
+	if (IS_ERR(adap->kthread)) {
+		pr_err("cec-%s: kernel_thread() failed\n", name);
+		res = PTR_ERR(adap->kthread);
+		kfree(adap);
+		return ERR_PTR(res);
+	}
+
+	if (!(caps & CEC_CAP_RC))
+		return adap;
+
+#if IS_REACHABLE(CONFIG_RC_CORE)
+	/* Prepare the RC input device */
+	adap->rc = rc_allocate_device();
+	if (!adap->rc) {
+		pr_err("cec-%s: failed to allocate memory for rc_dev\n",
+		       name);
+		kthread_stop(adap->kthread);
+		kfree(adap);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	snprintf(adap->input_name, sizeof(adap->input_name),
+		 "RC for %s", name);
+	snprintf(adap->input_phys, sizeof(adap->input_phys),
+		 "%s/input0", name);
+
+	adap->rc->input_name = adap->input_name;
+	adap->rc->input_phys = adap->input_phys;
+	adap->rc->input_id.bustype = BUS_CEC;
+	adap->rc->input_id.vendor = 0;
+	adap->rc->input_id.product = 0;
+	adap->rc->input_id.version = 1;
+	adap->rc->dev.parent = parent;
+	adap->rc->driver_type = RC_DRIVER_SCANCODE;
+	adap->rc->driver_name = CEC_NAME;
+	adap->rc->allowed_protocols = RC_BIT_CEC;
+	adap->rc->priv = adap;
+	adap->rc->map_name = RC_MAP_CEC;
+	adap->rc->timeout = MS_TO_NS(100);
+#else
+	adap->capabilities &= ~CEC_CAP_RC;
+#endif
+	return adap;
+}
+EXPORT_SYMBOL_GPL(cec_allocate_adapter);
+
+int cec_register_adapter(struct cec_adapter *adap)
+{
+	int res;
+
+	if (IS_ERR_OR_NULL(adap))
+		return 0;
+
+#if IS_REACHABLE(CONFIG_RC_CORE)
+	if (adap->capabilities & CEC_CAP_RC) {
+		res = rc_register_device(adap->rc);
+
+		if (res) {
+			pr_err("cec-%s: failed to prepare input device\n",
+			       adap->name);
+			rc_free_device(adap->rc);
+			adap->rc = NULL;
+			return res;
+		}
+	}
+#endif
+
+	res = cec_devnode_register(&adap->devnode, adap->owner);
+	if (res) {
+#if IS_REACHABLE(CONFIG_RC_CORE)
+		/* Note: rc_unregister also calls rc_free */
+		rc_unregister_device(adap->rc);
+		adap->rc = NULL;
+#endif
+		return res;
+	}
+
+	dev_set_drvdata(&adap->devnode.dev, adap);
+#ifdef CONFIG_MEDIA_CEC_DEBUG
+	if (!top_cec_dir)
+		return 0;
+
+	adap->cec_dir = debugfs_create_dir(dev_name(&adap->devnode.dev), top_cec_dir);
+	if (IS_ERR_OR_NULL(adap->cec_dir)) {
+		pr_warn("cec-%s: Failed to create debugfs dir\n", adap->name);
+		return 0;
+	}
+	adap->status_file = debugfs_create_devm_seqfile(&adap->devnode.dev,
+		"status", adap->cec_dir, cec_adap_status);
+	if (IS_ERR_OR_NULL(adap->status_file)) {
+		pr_warn("cec-%s: Failed to create status file\n", adap->name);
+		debugfs_remove_recursive(adap->cec_dir);
+		adap->cec_dir = NULL;
+	}
+#endif
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cec_register_adapter);
+
+void cec_unregister_adapter(struct cec_adapter *adap)
+{
+	if (IS_ERR_OR_NULL(adap))
+		return;
+
+#if IS_REACHABLE(CONFIG_RC_CORE)
+	/* Note: rc_unregister also calls rc_free */
+	rc_unregister_device(adap->rc);
+	adap->rc = NULL;
+#endif
+	debugfs_remove_recursive(adap->cec_dir);
+	cec_devnode_unregister(&adap->devnode);
+}
+EXPORT_SYMBOL_GPL(cec_unregister_adapter);
+
+void cec_delete_adapter(struct cec_adapter *adap)
+{
+	if (IS_ERR_OR_NULL(adap))
+		return;
+	mutex_lock(&adap->lock);
+	__cec_s_phys_addr(adap, CEC_PHYS_ADDR_INVALID, false);
+	mutex_unlock(&adap->lock);
+	kthread_stop(adap->kthread);
+	if (adap->kthread_config)
+		kthread_stop(adap->kthread_config);
+#if IS_REACHABLE(CONFIG_RC_CORE)
+	if (adap->rc)
+		rc_free_device(adap->rc);
+#endif
+	kfree(adap);
+}
+EXPORT_SYMBOL_GPL(cec_delete_adapter);
+
+/*
+ *	Initialise cec for linux
+ */
+static int __init cec_devnode_init(void)
+{
+	int ret;
+
+	pr_info("Linux cec interface: v0.10\n");
+	ret = alloc_chrdev_region(&cec_dev_t, 0, CEC_NUM_DEVICES,
+				  CEC_NAME);
+	if (ret < 0) {
+		pr_warn("cec: unable to allocate major\n");
+		return ret;
+	}
+
+#ifdef CONFIG_MEDIA_CEC_DEBUG
+	top_cec_dir = debugfs_create_dir("cec", NULL);
+	if (IS_ERR_OR_NULL(top_cec_dir)) {
+		pr_warn("cec: Failed to create debugfs cec dir\n");
+		top_cec_dir = NULL;
+	}
+#endif
+
+	ret = bus_register(&cec_bus_type);
+	if (ret < 0) {
+		unregister_chrdev_region(cec_dev_t, CEC_NUM_DEVICES);
+		pr_warn("cec: bus_register failed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void __exit cec_devnode_exit(void)
+{
+	debugfs_remove_recursive(top_cec_dir);
+	bus_unregister(&cec_bus_type);
+	unregister_chrdev_region(cec_dev_t, CEC_NUM_DEVICES);
+}
+
+subsys_initcall(cec_devnode_init);
+module_exit(cec_devnode_exit)
+
+MODULE_AUTHOR("Hans Verkuil <hans.verkuil@cisco.com>");
+MODULE_DESCRIPTION("Device node registration for cec drivers");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/media/cec/cec-priv.h b/drivers/staging/media/cec/cec-priv.h
new file mode 100644
index 000000000000..70767a7900f2
--- /dev/null
+++ b/drivers/staging/media/cec/cec-priv.h
@@ -0,0 +1,56 @@
+/*
+ * cec-priv.h - HDMI Consumer Electronics Control internal header
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CEC_PRIV_H
+#define _CEC_PRIV_H
+
+#include <linux/cec-funcs.h>
+#include <media/cec.h>
+
+#define dprintk(lvl, fmt, arg...)					\
+	do {								\
+		if (lvl <= cec_debug)					\
+			pr_info("cec-%s: " fmt, adap->name, ## arg);	\
+	} while (0)
+
+/* devnode to cec_adapter */
+#define to_cec_adapter(node) container_of(node, struct cec_adapter, devnode)
+
+/* cec-core.c */
+extern int cec_debug;
+int cec_get_device(struct cec_devnode *devnode);
+void cec_put_device(struct cec_devnode *devnode);
+
+/* cec-adap.c */
+int cec_monitor_all_cnt_inc(struct cec_adapter *adap);
+void cec_monitor_all_cnt_dec(struct cec_adapter *adap);
+int cec_adap_status(struct seq_file *file, void *priv);
+int cec_thread_func(void *_adap);
+void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block);
+int __cec_s_log_addrs(struct cec_adapter *adap,
+		      struct cec_log_addrs *log_addrs, bool block);
+int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg,
+			struct cec_fh *fh, bool block);
+void cec_queue_event_fh(struct cec_fh *fh,
+			const struct cec_event *new_ev, u64 ts);
+
+/* cec-api.c */
+extern const struct file_operations cec_devnode_fops;
+
+#endif
diff --git a/drivers/staging/media/davinci_vpfe/vpfe_video.c b/drivers/staging/media/davinci_vpfe/vpfe_video.c
index ea3ddec75806..3319fb8f7d01 100644
--- a/drivers/staging/media/davinci_vpfe/vpfe_video.c
+++ b/drivers/staging/media/davinci_vpfe/vpfe_video.c
@@ -542,7 +542,6 @@ static int vpfe_release(struct file *file)
 		video->io_usrs = 0;
 		/* Free buffers allocated */
 		vb2_queue_release(&video->buffer_queue);
-		vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
 	}
 	/* Decrement device users counter */
 	video->usrs--;
@@ -1092,7 +1091,7 @@ vpfe_g_dv_timings(struct file *file, void *fh,
  * @nbuffers: ptr to number of buffers requested by application
  * @nplanes:: contains number of distinct video planes needed to hold a frame
  * @sizes[]: contains the size (in bytes) of each plane.
- * @alloc_ctxs: ptr to allocation context
+ * @alloc_devs: ptr to allocation context
  *
  * This callback function is called when reqbuf() is called to adjust
  * the buffer nbuffers and buffer size
@@ -1100,7 +1099,7 @@ vpfe_g_dv_timings(struct file *file, void *fh,
 static int
 vpfe_buffer_queue_setup(struct vb2_queue *vq,
 			unsigned int *nbuffers, unsigned int *nplanes,
-			unsigned int sizes[], void *alloc_ctxs[])
+			unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct vpfe_fh *fh = vb2_get_drv_priv(vq);
 	struct vpfe_video_device *video = fh->video;
@@ -1115,7 +1114,6 @@ vpfe_buffer_queue_setup(struct vb2_queue *vq,
 
 	*nplanes = 1;
 	sizes[0] = size;
-	alloc_ctxs[0] = video->alloc_ctx;
 	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
 		 "nbuffers=%d, size=%lu\n", *nbuffers, size);
 	return 0;
@@ -1350,12 +1348,6 @@ static int vpfe_reqbufs(struct file *file, void *priv,
 	video->memory = req_buf->memory;
 
 	/* Initialize videobuf2 queue as per the buffer type */
-	video->alloc_ctx = vb2_dma_contig_init_ctx(vpfe_dev->pdev);
-	if (IS_ERR(video->alloc_ctx)) {
-		v4l2_err(&vpfe_dev->v4l2_dev, "Failed to get the context\n");
-		return PTR_ERR(video->alloc_ctx);
-	}
-
 	q = &video->buffer_queue;
 	q->type = req_buf->type;
 	q->io_modes = VB2_MMAP | VB2_USERPTR;
@@ -1365,11 +1357,11 @@ static int vpfe_reqbufs(struct file *file, void *priv,
 	q->mem_ops = &vb2_dma_contig_memops;
 	q->buf_struct_size = sizeof(struct vpfe_cap_buffer);
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->dev = vpfe_dev->pdev;
 
 	ret = vb2_queue_init(q);
 	if (ret) {
 		v4l2_err(&vpfe_dev->v4l2_dev, "vb2_queue_init() failed\n");
-		vb2_dma_contig_cleanup_ctx(vpfe_dev->pdev);
 		return ret;
 	}
 
diff --git a/drivers/staging/media/davinci_vpfe/vpfe_video.h b/drivers/staging/media/davinci_vpfe/vpfe_video.h
index 653334d537d3..aaec4403df3b 100644
--- a/drivers/staging/media/davinci_vpfe/vpfe_video.h
+++ b/drivers/staging/media/davinci_vpfe/vpfe_video.h
@@ -123,8 +123,6 @@ struct vpfe_video_device {
 	/* Used to store pixel format */
 	struct v4l2_format			fmt;
 	struct vb2_queue			buffer_queue;
-	/* allocator-specific contexts for each plane */
-	struct vb2_alloc_ctx *alloc_ctx;
 	/* Queue of filled frames */
 	struct list_head			dma_queue;
 	spinlock_t				irqlock;
diff --git a/drivers/staging/media/lirc/lirc_parallel.c b/drivers/staging/media/lirc/lirc_parallel.c
index 68ede6c56e6d..3906ac6e686d 100644
--- a/drivers/staging/media/lirc/lirc_parallel.c
+++ b/drivers/staging/media/lirc/lirc_parallel.c
@@ -305,9 +305,9 @@ static void lirc_lirc_irq_handler(void *blah)
 
 	/* enable interrupt */
 	/*
-	  enable_irq(irq);
-	  out(LIRC_PORT_IRQ, in(LIRC_PORT_IRQ)|LP_PINTEN);
-	*/
+	 * enable_irq(irq);
+	 * out(LIRC_PORT_IRQ, in(LIRC_PORT_IRQ)|LP_PINTEN);
+	 */
 }
 
 /*** file operations ***/
@@ -620,7 +620,7 @@ static void kf(void *handle)
 	lirc_off();
 	/* this is a bit annoying when you actually print...*/
 	/*
-	printk(KERN_INFO "%s: reclaimed port\n", LIRC_DRIVER_NAME);
+	 * printk(KERN_INFO "%s: reclaimed port\n", LIRC_DRIVER_NAME);
 	*/
 }
 
diff --git a/drivers/staging/media/mn88472/Kconfig b/drivers/staging/media/mn88472/Kconfig
deleted file mode 100644
index a85c90a60bce..000000000000
--- a/drivers/staging/media/mn88472/Kconfig
+++ /dev/null
@@ -1,7 +0,0 @@
-config DVB_MN88472
-	tristate "Panasonic MN88472"
-	depends on DVB_CORE && I2C
-	select REGMAP_I2C
-	default m if !MEDIA_SUBDRV_AUTOSELECT
-	help
-	  Say Y when you want to support this frontend.
diff --git a/drivers/staging/media/mn88472/Makefile b/drivers/staging/media/mn88472/Makefile
deleted file mode 100644
index 5987b7e6d82a..000000000000
--- a/drivers/staging/media/mn88472/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-obj-$(CONFIG_DVB_MN88472) += mn88472.o
-
-ccflags-y += -Idrivers/media/dvb-core/
-ccflags-y += -Idrivers/media/dvb-frontends/
-ccflags-y += -Idrivers/media/tuners/
diff --git a/drivers/staging/media/mn88472/TODO b/drivers/staging/media/mn88472/TODO
deleted file mode 100644
index b90a14be3beb..000000000000
--- a/drivers/staging/media/mn88472/TODO
+++ /dev/null
@@ -1,21 +0,0 @@
-Driver general quality is not good enough for mainline. Also, other
-device drivers (USB-bridge, tuner) needed for Astrometa receiver in
-question could need some changes. However, if that driver is mainlined
-due to some other device than Astrometa, unrelated TODOs could be
-skipped. In that case rtl28xxu driver needs module parameter to prevent
-driver loading.
-
-Required TODOs:
-* missing lock flags
-* I2C errors
-* tuner sensitivity
-
-*Do not* send any patch fixing checkpatch.pl issues. Currently it passes
-checkpatch.pl tests. I don't want waste my time to review this kind of
-trivial stuff. *Do not* add missing register I/O error checks. Those are
-missing for the reason it is much easier to compare I2C data sniffs when
-there is less lines. Those error checks are about the last thing to be added.
-
-Patches should be submitted to:
-linux-media@vger.kernel.org and Antti Palosaari <crope@iki.fi>
-
diff --git a/drivers/staging/media/mx2/Kconfig b/drivers/staging/media/mx2/Kconfig
deleted file mode 100644
index beaa885cf104..000000000000
--- a/drivers/staging/media/mx2/Kconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-config VIDEO_MX2
-	tristate "i.MX27 Camera Sensor Interface driver"
-	depends on VIDEO_DEV && SOC_CAMERA
-	depends on SOC_IMX27 || COMPILE_TEST
-	depends on HAS_DMA
-	select VIDEOBUF2_DMA_CONTIG
-	---help---
-	  This is a v4l2 driver for the i.MX27 Camera Sensor Interface
-
-	  This driver is deprecated: it should become a stand-alone driver
-	  instead of using the soc-camera framework.
-
-	  Unless someone is willing to take this on (unlikely with such
-	  ancient hardware) it is going to be removed from the kernel
-	  soon.
diff --git a/drivers/staging/media/mx2/Makefile b/drivers/staging/media/mx2/Makefile
deleted file mode 100644
index fc5b2826a558..000000000000
--- a/drivers/staging/media/mx2/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# Makefile for i.MX27 Camera Sensor driver
-
-obj-$(CONFIG_VIDEO_MX2) += mx2_camera.o
diff --git a/drivers/staging/media/mx2/TODO b/drivers/staging/media/mx2/TODO
deleted file mode 100644
index bc68fa443a3e..000000000000
--- a/drivers/staging/media/mx2/TODO
+++ /dev/null
@@ -1,10 +0,0 @@
-This driver is deprecated: it should become a stand-alone driver instead of
-using the soc-camera framework.
-
-Unless someone is willing to take this on (unlikely with such ancient
-hardware) it is going to be removed from the kernel soon.
-
-Note that trivial patches will not be accepted anymore, only a full conversion.
-
-If you want to convert this driver, please contact the linux-media mailinglist
-(see http://linuxtv.org/lists.php).
diff --git a/drivers/staging/media/mx2/mx2_camera.c b/drivers/staging/media/mx2/mx2_camera.c
deleted file mode 100644
index 48dd5b7851b5..000000000000
--- a/drivers/staging/media/mx2/mx2_camera.c
+++ /dev/null
@@ -1,1636 +0,0 @@
-/*
- * V4L2 Driver for i.MX27 camera host
- *
- * Copyright (C) 2008, Sascha Hauer, Pengutronix
- * Copyright (C) 2010, Baruch Siach, Orex Computed Radiography
- * Copyright (C) 2012, Javier Martin, Vista Silicon S.L.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/dma-mapping.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/gcd.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/math64.h>
-#include <linux/mm.h>
-#include <linux/moduleparam.h>
-#include <linux/time.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-
-#include <media/v4l2-common.h>
-#include <media/v4l2-dev.h>
-#include <media/videobuf2-v4l2.h>
-#include <media/videobuf2-dma-contig.h>
-#include <media/soc_camera.h>
-#include <media/drv-intf/soc_mediabus.h>
-
-#include <linux/videodev2.h>
-
-#include <linux/platform_data/media/camera-mx2.h>
-
-#include <asm/dma.h>
-
-#define MX2_CAM_DRV_NAME "mx2-camera"
-#define MX2_CAM_VERSION "0.0.6"
-#define MX2_CAM_DRIVER_DESCRIPTION "i.MX2x_Camera"
-
-/* reset values */
-#define CSICR1_RESET_VAL	0x40000800
-#define CSICR2_RESET_VAL	0x0
-#define CSICR3_RESET_VAL	0x0
-
-/* csi control reg 1 */
-#define CSICR1_SWAP16_EN	(1 << 31)
-#define CSICR1_EXT_VSYNC	(1 << 30)
-#define CSICR1_EOF_INTEN	(1 << 29)
-#define CSICR1_PRP_IF_EN	(1 << 28)
-#define CSICR1_CCIR_MODE	(1 << 27)
-#define CSICR1_COF_INTEN	(1 << 26)
-#define CSICR1_SF_OR_INTEN	(1 << 25)
-#define CSICR1_RF_OR_INTEN	(1 << 24)
-#define CSICR1_STATFF_LEVEL	(3 << 22)
-#define CSICR1_STATFF_INTEN	(1 << 21)
-#define CSICR1_RXFF_LEVEL(l)	(((l) & 3) << 19)
-#define CSICR1_RXFF_INTEN	(1 << 18)
-#define CSICR1_SOF_POL		(1 << 17)
-#define CSICR1_SOF_INTEN	(1 << 16)
-#define CSICR1_MCLKDIV(d)	(((d) & 0xF) << 12)
-#define CSICR1_HSYNC_POL	(1 << 11)
-#define CSICR1_CCIR_EN		(1 << 10)
-#define CSICR1_MCLKEN		(1 << 9)
-#define CSICR1_FCC		(1 << 8)
-#define CSICR1_PACK_DIR		(1 << 7)
-#define CSICR1_CLR_STATFIFO	(1 << 6)
-#define CSICR1_CLR_RXFIFO	(1 << 5)
-#define CSICR1_GCLK_MODE	(1 << 4)
-#define CSICR1_INV_DATA		(1 << 3)
-#define CSICR1_INV_PCLK		(1 << 2)
-#define CSICR1_REDGE		(1 << 1)
-#define CSICR1_FMT_MASK		(CSICR1_PACK_DIR | CSICR1_SWAP16_EN)
-
-#define SHIFT_STATFF_LEVEL	22
-#define SHIFT_RXFF_LEVEL	19
-#define SHIFT_MCLKDIV		12
-
-#define SHIFT_FRMCNT		16
-
-#define CSICR1			0x00
-#define CSICR2			0x04
-#define CSISR			0x08
-#define CSISTATFIFO		0x0c
-#define CSIRFIFO		0x10
-#define CSIRXCNT		0x14
-#define CSICR3			0x1c
-#define CSIDMASA_STATFIFO	0x20
-#define CSIDMATA_STATFIFO	0x24
-#define CSIDMASA_FB1		0x28
-#define CSIDMASA_FB2		0x2c
-#define CSIFBUF_PARA		0x30
-#define CSIIMAG_PARA		0x34
-
-/* EMMA PrP */
-#define PRP_CNTL			0x00
-#define PRP_INTR_CNTL			0x04
-#define PRP_INTRSTATUS			0x08
-#define PRP_SOURCE_Y_PTR		0x0c
-#define PRP_SOURCE_CB_PTR		0x10
-#define PRP_SOURCE_CR_PTR		0x14
-#define PRP_DEST_RGB1_PTR		0x18
-#define PRP_DEST_RGB2_PTR		0x1c
-#define PRP_DEST_Y_PTR			0x20
-#define PRP_DEST_CB_PTR			0x24
-#define PRP_DEST_CR_PTR			0x28
-#define PRP_SRC_FRAME_SIZE		0x2c
-#define PRP_DEST_CH1_LINE_STRIDE	0x30
-#define PRP_SRC_PIXEL_FORMAT_CNTL	0x34
-#define PRP_CH1_PIXEL_FORMAT_CNTL	0x38
-#define PRP_CH1_OUT_IMAGE_SIZE		0x3c
-#define PRP_CH2_OUT_IMAGE_SIZE		0x40
-#define PRP_SRC_LINE_STRIDE		0x44
-#define PRP_CSC_COEF_012		0x48
-#define PRP_CSC_COEF_345		0x4c
-#define PRP_CSC_COEF_678		0x50
-#define PRP_CH1_RZ_HORI_COEF1		0x54
-#define PRP_CH1_RZ_HORI_COEF2		0x58
-#define PRP_CH1_RZ_HORI_VALID		0x5c
-#define PRP_CH1_RZ_VERT_COEF1		0x60
-#define PRP_CH1_RZ_VERT_COEF2		0x64
-#define PRP_CH1_RZ_VERT_VALID		0x68
-#define PRP_CH2_RZ_HORI_COEF1		0x6c
-#define PRP_CH2_RZ_HORI_COEF2		0x70
-#define PRP_CH2_RZ_HORI_VALID		0x74
-#define PRP_CH2_RZ_VERT_COEF1		0x78
-#define PRP_CH2_RZ_VERT_COEF2		0x7c
-#define PRP_CH2_RZ_VERT_VALID		0x80
-
-#define PRP_CNTL_CH1EN		(1 << 0)
-#define PRP_CNTL_CH2EN		(1 << 1)
-#define PRP_CNTL_CSIEN		(1 << 2)
-#define PRP_CNTL_DATA_IN_YUV420	(0 << 3)
-#define PRP_CNTL_DATA_IN_YUV422	(1 << 3)
-#define PRP_CNTL_DATA_IN_RGB16	(2 << 3)
-#define PRP_CNTL_DATA_IN_RGB32	(3 << 3)
-#define PRP_CNTL_CH1_OUT_RGB8	(0 << 5)
-#define PRP_CNTL_CH1_OUT_RGB16	(1 << 5)
-#define PRP_CNTL_CH1_OUT_RGB32	(2 << 5)
-#define PRP_CNTL_CH1_OUT_YUV422	(3 << 5)
-#define PRP_CNTL_CH2_OUT_YUV420	(0 << 7)
-#define PRP_CNTL_CH2_OUT_YUV422 (1 << 7)
-#define PRP_CNTL_CH2_OUT_YUV444	(2 << 7)
-#define PRP_CNTL_CH1_LEN	(1 << 9)
-#define PRP_CNTL_CH2_LEN	(1 << 10)
-#define PRP_CNTL_SKIP_FRAME	(1 << 11)
-#define PRP_CNTL_SWRST		(1 << 12)
-#define PRP_CNTL_CLKEN		(1 << 13)
-#define PRP_CNTL_WEN		(1 << 14)
-#define PRP_CNTL_CH1BYP		(1 << 15)
-#define PRP_CNTL_IN_TSKIP(x)	((x) << 16)
-#define PRP_CNTL_CH1_TSKIP(x)	((x) << 19)
-#define PRP_CNTL_CH2_TSKIP(x)	((x) << 22)
-#define PRP_CNTL_INPUT_FIFO_LEVEL(x)	((x) << 25)
-#define PRP_CNTL_RZ_FIFO_LEVEL(x)	((x) << 27)
-#define PRP_CNTL_CH2B1EN	(1 << 29)
-#define PRP_CNTL_CH2B2EN	(1 << 30)
-#define PRP_CNTL_CH2FEN		(1 << 31)
-
-/* IRQ Enable and status register */
-#define PRP_INTR_RDERR		(1 << 0)
-#define PRP_INTR_CH1WERR	(1 << 1)
-#define PRP_INTR_CH2WERR	(1 << 2)
-#define PRP_INTR_CH1FC		(1 << 3)
-#define PRP_INTR_CH2FC		(1 << 5)
-#define PRP_INTR_LBOVF		(1 << 7)
-#define PRP_INTR_CH2OVF		(1 << 8)
-
-/* Resizing registers */
-#define PRP_RZ_VALID_TBL_LEN(x)	((x) << 24)
-#define PRP_RZ_VALID_BILINEAR	(1 << 31)
-
-#define MAX_VIDEO_MEM	16
-
-#define RESIZE_NUM_MIN	1
-#define RESIZE_NUM_MAX	20
-#define BC_COEF		3
-#define SZ_COEF		(1 << BC_COEF)
-
-#define RESIZE_DIR_H	0
-#define RESIZE_DIR_V	1
-
-#define RESIZE_ALGO_BILINEAR 0
-#define RESIZE_ALGO_AVERAGING 1
-
-struct mx2_prp_cfg {
-	int channel;
-	u32 in_fmt;
-	u32 out_fmt;
-	u32 src_pixel;
-	u32 ch1_pixel;
-	u32 irq_flags;
-	u32 csicr1;
-};
-
-/* prp resizing parameters */
-struct emma_prp_resize {
-	int		algo; /* type of algorithm used */
-	int		len; /* number of coefficients */
-	unsigned char	s[RESIZE_NUM_MAX]; /* table of coefficients */
-};
-
-/* prp configuration for a client-host fmt pair */
-struct mx2_fmt_cfg {
-	u32	in_fmt;
-	u32				out_fmt;
-	struct mx2_prp_cfg		cfg;
-};
-
-struct mx2_buf_internal {
-	struct list_head	queue;
-	int			bufnum;
-	bool			discard;
-};
-
-/* buffer for one video frame */
-struct mx2_buffer {
-	/* common v4l buffer stuff -- must be first */
-	struct vb2_v4l2_buffer vb;
-	struct mx2_buf_internal		internal;
-};
-
-enum mx2_camera_type {
-	IMX27_CAMERA,
-};
-
-struct mx2_camera_dev {
-	struct device		*dev;
-	struct soc_camera_host	soc_host;
-	struct clk		*clk_emma_ahb, *clk_emma_ipg;
-	struct clk		*clk_csi_ahb, *clk_csi_per;
-
-	void __iomem		*base_csi, *base_emma;
-
-	struct mx2_camera_platform_data *pdata;
-	unsigned long		platform_flags;
-
-	struct list_head	capture;
-	struct list_head	active_bufs;
-	struct list_head	discard;
-
-	spinlock_t		lock;
-
-	int			dma;
-	struct mx2_buffer	*active;
-	struct mx2_buffer	*fb1_active;
-	struct mx2_buffer	*fb2_active;
-
-	u32			csicr1;
-	enum mx2_camera_type	devtype;
-
-	struct mx2_buf_internal buf_discard[2];
-	void			*discard_buffer;
-	dma_addr_t		discard_buffer_dma;
-	size_t			discard_size;
-	struct mx2_fmt_cfg	*emma_prp;
-	struct emma_prp_resize	resizing[2];
-	unsigned int		s_width, s_height;
-	u32			frame_count;
-	struct vb2_alloc_ctx	*alloc_ctx;
-};
-
-static struct platform_device_id mx2_camera_devtype[] = {
-	{
-		.name = "imx27-camera",
-		.driver_data = IMX27_CAMERA,
-	}, {
-		/* sentinel */
-	}
-};
-MODULE_DEVICE_TABLE(platform, mx2_camera_devtype);
-
-static struct mx2_buffer *mx2_ibuf_to_buf(struct mx2_buf_internal *int_buf)
-{
-	return container_of(int_buf, struct mx2_buffer, internal);
-}
-
-static struct mx2_fmt_cfg mx27_emma_prp_table[] = {
-	/*
-	 * This is a generic configuration which is valid for most
-	 * prp input-output format combinations.
-	 * We set the incoming and outgoing pixelformat to a
-	 * 16 Bit wide format and adjust the bytesperline
-	 * accordingly. With this configuration the inputdata
-	 * will not be changed by the emma and could be any type
-	 * of 16 Bit Pixelformat.
-	 */
-	{
-		.in_fmt		= 0,
-		.out_fmt	= 0,
-		.cfg		= {
-			.channel	= 1,
-			.in_fmt		= PRP_CNTL_DATA_IN_RGB16,
-			.out_fmt	= PRP_CNTL_CH1_OUT_RGB16,
-			.src_pixel	= 0x2ca00565, /* RGB565 */
-			.ch1_pixel	= 0x2ca00565, /* RGB565 */
-			.irq_flags	= PRP_INTR_RDERR | PRP_INTR_CH1WERR |
-						PRP_INTR_CH1FC | PRP_INTR_LBOVF,
-			.csicr1		= 0,
-		}
-	},
-	{
-		.in_fmt		= MEDIA_BUS_FMT_UYVY8_2X8,
-		.out_fmt	= V4L2_PIX_FMT_YUYV,
-		.cfg		= {
-			.channel	= 1,
-			.in_fmt		= PRP_CNTL_DATA_IN_YUV422,
-			.out_fmt	= PRP_CNTL_CH1_OUT_YUV422,
-			.src_pixel	= 0x22000888, /* YUV422 (YUYV) */
-			.ch1_pixel	= 0x62000888, /* YUV422 (YUYV) */
-			.irq_flags	= PRP_INTR_RDERR | PRP_INTR_CH1WERR |
-						PRP_INTR_CH1FC | PRP_INTR_LBOVF,
-			.csicr1		= CSICR1_SWAP16_EN,
-		}
-	},
-	{
-		.in_fmt		= MEDIA_BUS_FMT_YUYV8_2X8,
-		.out_fmt	= V4L2_PIX_FMT_YUYV,
-		.cfg		= {
-			.channel	= 1,
-			.in_fmt		= PRP_CNTL_DATA_IN_YUV422,
-			.out_fmt	= PRP_CNTL_CH1_OUT_YUV422,
-			.src_pixel	= 0x22000888, /* YUV422 (YUYV) */
-			.ch1_pixel	= 0x62000888, /* YUV422 (YUYV) */
-			.irq_flags	= PRP_INTR_RDERR | PRP_INTR_CH1WERR |
-						PRP_INTR_CH1FC | PRP_INTR_LBOVF,
-			.csicr1		= CSICR1_PACK_DIR,
-		}
-	},
-	{
-		.in_fmt		= MEDIA_BUS_FMT_YUYV8_2X8,
-		.out_fmt	= V4L2_PIX_FMT_YUV420,
-		.cfg		= {
-			.channel	= 2,
-			.in_fmt		= PRP_CNTL_DATA_IN_YUV422,
-			.out_fmt	= PRP_CNTL_CH2_OUT_YUV420,
-			.src_pixel	= 0x22000888, /* YUV422 (YUYV) */
-			.irq_flags	= PRP_INTR_RDERR | PRP_INTR_CH2WERR |
-					PRP_INTR_CH2FC | PRP_INTR_LBOVF |
-					PRP_INTR_CH2OVF,
-			.csicr1		= CSICR1_PACK_DIR,
-		}
-	},
-	{
-		.in_fmt		= MEDIA_BUS_FMT_UYVY8_2X8,
-		.out_fmt	= V4L2_PIX_FMT_YUV420,
-		.cfg		= {
-			.channel	= 2,
-			.in_fmt		= PRP_CNTL_DATA_IN_YUV422,
-			.out_fmt	= PRP_CNTL_CH2_OUT_YUV420,
-			.src_pixel	= 0x22000888, /* YUV422 (YUYV) */
-			.irq_flags	= PRP_INTR_RDERR | PRP_INTR_CH2WERR |
-					PRP_INTR_CH2FC | PRP_INTR_LBOVF |
-					PRP_INTR_CH2OVF,
-			.csicr1		= CSICR1_SWAP16_EN,
-		}
-	},
-};
-
-static struct mx2_fmt_cfg *mx27_emma_prp_get_format(u32 in_fmt, u32 out_fmt)
-{
-	int i;
-
-	for (i = 1; i < ARRAY_SIZE(mx27_emma_prp_table); i++)
-		if ((mx27_emma_prp_table[i].in_fmt == in_fmt) &&
-				(mx27_emma_prp_table[i].out_fmt == out_fmt)) {
-			return &mx27_emma_prp_table[i];
-		}
-	/* If no match return the most generic configuration */
-	return &mx27_emma_prp_table[0];
-};
-
-static void mx27_update_emma_buf(struct mx2_camera_dev *pcdev,
-				 unsigned long phys, int bufnum)
-{
-	struct mx2_fmt_cfg *prp = pcdev->emma_prp;
-
-	if (prp->cfg.channel == 1) {
-		writel(phys, pcdev->base_emma +
-				PRP_DEST_RGB1_PTR + 4 * bufnum);
-	} else {
-		writel(phys, pcdev->base_emma +
-			PRP_DEST_Y_PTR - 0x14 * bufnum);
-		if (prp->out_fmt == V4L2_PIX_FMT_YUV420) {
-			u32 imgsize = pcdev->soc_host.icd->user_height *
-					pcdev->soc_host.icd->user_width;
-
-			writel(phys + imgsize, pcdev->base_emma +
-				PRP_DEST_CB_PTR - 0x14 * bufnum);
-			writel(phys + ((5 * imgsize) / 4), pcdev->base_emma +
-				PRP_DEST_CR_PTR - 0x14 * bufnum);
-		}
-	}
-}
-
-static void mx2_camera_deactivate(struct mx2_camera_dev *pcdev)
-{
-	clk_disable_unprepare(pcdev->clk_csi_ahb);
-	clk_disable_unprepare(pcdev->clk_csi_per);
-	writel(0, pcdev->base_csi + CSICR1);
-	writel(0, pcdev->base_emma + PRP_CNTL);
-}
-
-static int mx2_camera_add_device(struct soc_camera_device *icd)
-{
-	dev_info(icd->parent, "Camera driver attached to camera %d\n",
-		 icd->devnum);
-
-	return 0;
-}
-
-static void mx2_camera_remove_device(struct soc_camera_device *icd)
-{
-	dev_info(icd->parent, "Camera driver detached from camera %d\n",
-		 icd->devnum);
-}
-
-/*
- * The following two functions absolutely depend on the fact, that
- * there can be only one camera on mx2 camera sensor interface
- */
-static int mx2_camera_clock_start(struct soc_camera_host *ici)
-{
-	struct mx2_camera_dev *pcdev = ici->priv;
-	int ret;
-	u32 csicr1;
-
-	ret = clk_prepare_enable(pcdev->clk_csi_ahb);
-	if (ret < 0)
-		return ret;
-
-	ret = clk_prepare_enable(pcdev->clk_csi_per);
-	if (ret < 0)
-		goto exit_csi_ahb;
-
-	csicr1 = CSICR1_MCLKEN | CSICR1_PRP_IF_EN | CSICR1_FCC |
-		CSICR1_RXFF_LEVEL(0);
-
-	pcdev->csicr1 = csicr1;
-	writel(pcdev->csicr1, pcdev->base_csi + CSICR1);
-
-	pcdev->frame_count = 0;
-
-	return 0;
-
-exit_csi_ahb:
-	clk_disable_unprepare(pcdev->clk_csi_ahb);
-
-	return ret;
-}
-
-static void mx2_camera_clock_stop(struct soc_camera_host *ici)
-{
-	struct mx2_camera_dev *pcdev = ici->priv;
-
-	mx2_camera_deactivate(pcdev);
-}
-
-/*
- *  Videobuf operations
- */
-static int mx2_videobuf_setup(struct vb2_queue *vq,
-			unsigned int *count, unsigned int *num_planes,
-			unsigned int sizes[], void *alloc_ctxs[])
-{
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vq);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx2_camera_dev *pcdev = ici->priv;
-
-	dev_dbg(icd->parent, "count=%d, size=%d\n", *count, sizes[0]);
-
-	alloc_ctxs[0] = pcdev->alloc_ctx;
-
-	sizes[0] = icd->sizeimage;
-
-	if (0 == *count)
-		*count = 32;
-	if (!*num_planes &&
-	    sizes[0] * *count > MAX_VIDEO_MEM * 1024 * 1024)
-		*count = (MAX_VIDEO_MEM * 1024 * 1024) / sizes[0];
-
-	*num_planes = 1;
-
-	return 0;
-}
-
-static int mx2_videobuf_prepare(struct vb2_buffer *vb)
-{
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vb->vb2_queue);
-	int ret = 0;
-
-	dev_dbg(icd->parent, "%s (vb=0x%p) 0x%p %lu\n", __func__,
-		vb, vb2_plane_vaddr(vb, 0), vb2_get_plane_payload(vb, 0));
-
-#ifdef DEBUG
-	/*
-	 * This can be useful if you want to see if we actually fill
-	 * the buffer with something
-	 */
-	memset((void *)vb2_plane_vaddr(vb, 0),
-	       0xaa, vb2_get_plane_payload(vb, 0));
-#endif
-
-	vb2_set_plane_payload(vb, 0, icd->sizeimage);
-	if (vb2_plane_vaddr(vb, 0) &&
-	    vb2_get_plane_payload(vb, 0) > vb2_plane_size(vb, 0)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	return 0;
-
-out:
-	return ret;
-}
-
-static void mx2_videobuf_queue(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vb->vb2_queue);
-	struct soc_camera_host *ici =
-		to_soc_camera_host(icd->parent);
-	struct mx2_camera_dev *pcdev = ici->priv;
-	struct mx2_buffer *buf = container_of(vbuf, struct mx2_buffer, vb);
-	unsigned long flags;
-
-	dev_dbg(icd->parent, "%s (vb=0x%p) 0x%p %lu\n", __func__,
-		vb, vb2_plane_vaddr(vb, 0), vb2_get_plane_payload(vb, 0));
-
-	spin_lock_irqsave(&pcdev->lock, flags);
-
-	list_add_tail(&buf->internal.queue, &pcdev->capture);
-
-	spin_unlock_irqrestore(&pcdev->lock, flags);
-}
-
-static void mx27_camera_emma_buf_init(struct soc_camera_device *icd,
-		int bytesperline)
-{
-	struct soc_camera_host *ici =
-		to_soc_camera_host(icd->parent);
-	struct mx2_camera_dev *pcdev = ici->priv;
-	struct mx2_fmt_cfg *prp = pcdev->emma_prp;
-
-	writel((pcdev->s_width << 16) | pcdev->s_height,
-	       pcdev->base_emma + PRP_SRC_FRAME_SIZE);
-	writel(prp->cfg.src_pixel,
-	       pcdev->base_emma + PRP_SRC_PIXEL_FORMAT_CNTL);
-	if (prp->cfg.channel == 1) {
-		writel((icd->user_width << 16) | icd->user_height,
-			pcdev->base_emma + PRP_CH1_OUT_IMAGE_SIZE);
-		writel(bytesperline,
-			pcdev->base_emma + PRP_DEST_CH1_LINE_STRIDE);
-		writel(prp->cfg.ch1_pixel,
-			pcdev->base_emma + PRP_CH1_PIXEL_FORMAT_CNTL);
-	} else { /* channel 2 */
-		writel((icd->user_width << 16) | icd->user_height,
-			pcdev->base_emma + PRP_CH2_OUT_IMAGE_SIZE);
-	}
-
-	/* Enable interrupts */
-	writel(prp->cfg.irq_flags, pcdev->base_emma + PRP_INTR_CNTL);
-}
-
-static void mx2_prp_resize_commit(struct mx2_camera_dev *pcdev)
-{
-	int dir;
-
-	for (dir = RESIZE_DIR_H; dir <= RESIZE_DIR_V; dir++) {
-		unsigned char *s = pcdev->resizing[dir].s;
-		int len = pcdev->resizing[dir].len;
-		unsigned int coeff[2] = {0, 0};
-		unsigned int valid  = 0;
-		int i;
-
-		if (len == 0)
-			continue;
-
-		for (i = RESIZE_NUM_MAX - 1; i >= 0; i--) {
-			int j;
-
-			j = i > 9 ? 1 : 0;
-			coeff[j] = (coeff[j] << BC_COEF) |
-					(s[i] & (SZ_COEF - 1));
-
-			if (i == 5 || i == 15)
-				coeff[j] <<= 1;
-
-			valid = (valid << 1) | (s[i] >> BC_COEF);
-		}
-
-		valid |= PRP_RZ_VALID_TBL_LEN(len);
-
-		if (pcdev->resizing[dir].algo == RESIZE_ALGO_BILINEAR)
-			valid |= PRP_RZ_VALID_BILINEAR;
-
-		if (pcdev->emma_prp->cfg.channel == 1) {
-			if (dir == RESIZE_DIR_H) {
-				writel(coeff[0], pcdev->base_emma +
-							PRP_CH1_RZ_HORI_COEF1);
-				writel(coeff[1], pcdev->base_emma +
-							PRP_CH1_RZ_HORI_COEF2);
-				writel(valid, pcdev->base_emma +
-							PRP_CH1_RZ_HORI_VALID);
-			} else {
-				writel(coeff[0], pcdev->base_emma +
-							PRP_CH1_RZ_VERT_COEF1);
-				writel(coeff[1], pcdev->base_emma +
-							PRP_CH1_RZ_VERT_COEF2);
-				writel(valid, pcdev->base_emma +
-							PRP_CH1_RZ_VERT_VALID);
-			}
-		} else {
-			if (dir == RESIZE_DIR_H) {
-				writel(coeff[0], pcdev->base_emma +
-							PRP_CH2_RZ_HORI_COEF1);
-				writel(coeff[1], pcdev->base_emma +
-							PRP_CH2_RZ_HORI_COEF2);
-				writel(valid, pcdev->base_emma +
-							PRP_CH2_RZ_HORI_VALID);
-			} else {
-				writel(coeff[0], pcdev->base_emma +
-							PRP_CH2_RZ_VERT_COEF1);
-				writel(coeff[1], pcdev->base_emma +
-							PRP_CH2_RZ_VERT_COEF2);
-				writel(valid, pcdev->base_emma +
-							PRP_CH2_RZ_VERT_VALID);
-			}
-		}
-	}
-}
-
-static int mx2_start_streaming(struct vb2_queue *q, unsigned int count)
-{
-	struct soc_camera_device *icd = soc_camera_from_vb2q(q);
-	struct soc_camera_host *ici =
-		to_soc_camera_host(icd->parent);
-	struct mx2_camera_dev *pcdev = ici->priv;
-	struct mx2_fmt_cfg *prp = pcdev->emma_prp;
-	struct vb2_buffer *vb;
-	struct mx2_buffer *buf;
-	unsigned long phys;
-	int bytesperline;
-	unsigned long flags;
-
-	if (count < 2)
-		return -ENOBUFS;
-
-	spin_lock_irqsave(&pcdev->lock, flags);
-
-	buf = list_first_entry(&pcdev->capture, struct mx2_buffer,
-			       internal.queue);
-	buf->internal.bufnum = 0;
-	vb = &buf->vb.vb2_buf;
-
-	phys = vb2_dma_contig_plane_dma_addr(vb, 0);
-	mx27_update_emma_buf(pcdev, phys, buf->internal.bufnum);
-	list_move_tail(pcdev->capture.next, &pcdev->active_bufs);
-
-	buf = list_first_entry(&pcdev->capture, struct mx2_buffer,
-			       internal.queue);
-	buf->internal.bufnum = 1;
-	vb = &buf->vb.vb2_buf;
-
-	phys = vb2_dma_contig_plane_dma_addr(vb, 0);
-	mx27_update_emma_buf(pcdev, phys, buf->internal.bufnum);
-	list_move_tail(pcdev->capture.next, &pcdev->active_bufs);
-
-	bytesperline = soc_mbus_bytes_per_line(icd->user_width,
-					       icd->current_fmt->host_fmt);
-	if (bytesperline < 0) {
-		spin_unlock_irqrestore(&pcdev->lock, flags);
-		return bytesperline;
-	}
-
-	/*
-	 * I didn't manage to properly enable/disable the prp
-	 * on a per frame basis during running transfers,
-	 * thus we allocate a buffer here and use it to
-	 * discard frames when no buffer is available.
-	 * Feel free to work on this ;)
-	 */
-	pcdev->discard_size = icd->user_height * bytesperline;
-	pcdev->discard_buffer = dma_alloc_coherent(ici->v4l2_dev.dev,
-					pcdev->discard_size,
-					&pcdev->discard_buffer_dma, GFP_ATOMIC);
-	if (!pcdev->discard_buffer) {
-		spin_unlock_irqrestore(&pcdev->lock, flags);
-		return -ENOMEM;
-	}
-
-	pcdev->buf_discard[0].discard = true;
-	list_add_tail(&pcdev->buf_discard[0].queue,
-		      &pcdev->discard);
-
-	pcdev->buf_discard[1].discard = true;
-	list_add_tail(&pcdev->buf_discard[1].queue,
-		      &pcdev->discard);
-
-	mx2_prp_resize_commit(pcdev);
-
-	mx27_camera_emma_buf_init(icd, bytesperline);
-
-	if (prp->cfg.channel == 1) {
-		writel(PRP_CNTL_CH1EN |
-		       PRP_CNTL_CSIEN |
-		       prp->cfg.in_fmt |
-		       prp->cfg.out_fmt |
-		       PRP_CNTL_CH1_LEN |
-		       PRP_CNTL_CH1BYP |
-		       PRP_CNTL_CH1_TSKIP(0) |
-		       PRP_CNTL_IN_TSKIP(0),
-		       pcdev->base_emma + PRP_CNTL);
-	} else {
-		writel(PRP_CNTL_CH2EN |
-		       PRP_CNTL_CSIEN |
-		       prp->cfg.in_fmt |
-		       prp->cfg.out_fmt |
-		       PRP_CNTL_CH2_LEN |
-		       PRP_CNTL_CH2_TSKIP(0) |
-		       PRP_CNTL_IN_TSKIP(0),
-		       pcdev->base_emma + PRP_CNTL);
-	}
-	spin_unlock_irqrestore(&pcdev->lock, flags);
-
-	return 0;
-}
-
-static void mx2_stop_streaming(struct vb2_queue *q)
-{
-	struct soc_camera_device *icd = soc_camera_from_vb2q(q);
-	struct soc_camera_host *ici =
-		to_soc_camera_host(icd->parent);
-	struct mx2_camera_dev *pcdev = ici->priv;
-	struct mx2_fmt_cfg *prp = pcdev->emma_prp;
-	unsigned long flags;
-	void *b;
-	u32 cntl;
-
-	spin_lock_irqsave(&pcdev->lock, flags);
-
-	cntl = readl(pcdev->base_emma + PRP_CNTL);
-	if (prp->cfg.channel == 1) {
-		writel(cntl & ~PRP_CNTL_CH1EN,
-		       pcdev->base_emma + PRP_CNTL);
-	} else {
-		writel(cntl & ~PRP_CNTL_CH2EN,
-		       pcdev->base_emma + PRP_CNTL);
-	}
-	INIT_LIST_HEAD(&pcdev->capture);
-	INIT_LIST_HEAD(&pcdev->active_bufs);
-	INIT_LIST_HEAD(&pcdev->discard);
-
-	b = pcdev->discard_buffer;
-	pcdev->discard_buffer = NULL;
-
-	spin_unlock_irqrestore(&pcdev->lock, flags);
-
-	dma_free_coherent(ici->v4l2_dev.dev,
-			  pcdev->discard_size, b, pcdev->discard_buffer_dma);
-}
-
-static struct vb2_ops mx2_videobuf_ops = {
-	.queue_setup	 = mx2_videobuf_setup,
-	.buf_prepare	 = mx2_videobuf_prepare,
-	.buf_queue	 = mx2_videobuf_queue,
-	.start_streaming = mx2_start_streaming,
-	.stop_streaming	 = mx2_stop_streaming,
-};
-
-static int mx2_camera_init_videobuf(struct vb2_queue *q,
-			      struct soc_camera_device *icd)
-{
-	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	q->io_modes = VB2_MMAP | VB2_USERPTR;
-	q->drv_priv = icd;
-	q->ops = &mx2_videobuf_ops;
-	q->mem_ops = &vb2_dma_contig_memops;
-	q->buf_struct_size = sizeof(struct mx2_buffer);
-	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-
-	return vb2_queue_init(q);
-}
-
-#define MX2_BUS_FLAGS	(V4L2_MBUS_MASTER | \
-			V4L2_MBUS_VSYNC_ACTIVE_HIGH | \
-			V4L2_MBUS_VSYNC_ACTIVE_LOW | \
-			V4L2_MBUS_HSYNC_ACTIVE_HIGH | \
-			V4L2_MBUS_HSYNC_ACTIVE_LOW | \
-			V4L2_MBUS_PCLK_SAMPLE_RISING | \
-			V4L2_MBUS_PCLK_SAMPLE_FALLING | \
-			V4L2_MBUS_DATA_ACTIVE_HIGH | \
-			V4L2_MBUS_DATA_ACTIVE_LOW)
-
-static int mx27_camera_emma_prp_reset(struct mx2_camera_dev *pcdev)
-{
-	int count = 0;
-
-	readl(pcdev->base_emma + PRP_CNTL);
-	writel(PRP_CNTL_SWRST, pcdev->base_emma + PRP_CNTL);
-	while (count++ < 100) {
-		if (!(readl(pcdev->base_emma + PRP_CNTL) & PRP_CNTL_SWRST))
-			return 0;
-		barrier();
-		udelay(1);
-	}
-
-	return -ETIMEDOUT;
-}
-
-static int mx2_camera_set_bus_param(struct soc_camera_device *icd)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx2_camera_dev *pcdev = ici->priv;
-	struct v4l2_mbus_config cfg = {.type = V4L2_MBUS_PARALLEL,};
-	unsigned long common_flags;
-	int ret;
-	int bytesperline;
-	u32 csicr1 = pcdev->csicr1;
-
-	ret = v4l2_subdev_call(sd, video, g_mbus_config, &cfg);
-	if (!ret) {
-		common_flags = soc_mbus_config_compatible(&cfg, MX2_BUS_FLAGS);
-		if (!common_flags) {
-			dev_warn(icd->parent,
-				 "Flags incompatible: camera 0x%x, host 0x%x\n",
-				 cfg.flags, MX2_BUS_FLAGS);
-			return -EINVAL;
-		}
-	} else if (ret != -ENOIOCTLCMD) {
-		return ret;
-	} else {
-		common_flags = MX2_BUS_FLAGS;
-	}
-
-	if ((common_flags & V4L2_MBUS_HSYNC_ACTIVE_HIGH) &&
-	    (common_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW)) {
-		if (pcdev->platform_flags & MX2_CAMERA_HSYNC_HIGH)
-			common_flags &= ~V4L2_MBUS_HSYNC_ACTIVE_LOW;
-		else
-			common_flags &= ~V4L2_MBUS_HSYNC_ACTIVE_HIGH;
-	}
-
-	if ((common_flags & V4L2_MBUS_PCLK_SAMPLE_RISING) &&
-	    (common_flags & V4L2_MBUS_PCLK_SAMPLE_FALLING)) {
-		if (pcdev->platform_flags & MX2_CAMERA_PCLK_SAMPLE_RISING)
-			common_flags &= ~V4L2_MBUS_PCLK_SAMPLE_FALLING;
-		else
-			common_flags &= ~V4L2_MBUS_PCLK_SAMPLE_RISING;
-	}
-
-	cfg.flags = common_flags;
-	ret = v4l2_subdev_call(sd, video, s_mbus_config, &cfg);
-	if (ret < 0 && ret != -ENOIOCTLCMD) {
-		dev_dbg(icd->parent, "camera s_mbus_config(0x%lx) returned %d\n",
-			common_flags, ret);
-		return ret;
-	}
-
-	csicr1 = (csicr1 & ~CSICR1_FMT_MASK) | pcdev->emma_prp->cfg.csicr1;
-
-	if (common_flags & V4L2_MBUS_PCLK_SAMPLE_RISING)
-		csicr1 |= CSICR1_REDGE;
-	if (common_flags & V4L2_MBUS_VSYNC_ACTIVE_HIGH)
-		csicr1 |= CSICR1_SOF_POL;
-	if (common_flags & V4L2_MBUS_HSYNC_ACTIVE_HIGH)
-		csicr1 |= CSICR1_HSYNC_POL;
-	if (pcdev->platform_flags & MX2_CAMERA_EXT_VSYNC)
-		csicr1 |= CSICR1_EXT_VSYNC;
-	if (pcdev->platform_flags & MX2_CAMERA_CCIR)
-		csicr1 |= CSICR1_CCIR_EN;
-	if (pcdev->platform_flags & MX2_CAMERA_CCIR_INTERLACE)
-		csicr1 |= CSICR1_CCIR_MODE;
-	if (pcdev->platform_flags & MX2_CAMERA_GATED_CLOCK)
-		csicr1 |= CSICR1_GCLK_MODE;
-	if (pcdev->platform_flags & MX2_CAMERA_INV_DATA)
-		csicr1 |= CSICR1_INV_DATA;
-
-	pcdev->csicr1 = csicr1;
-
-	bytesperline = soc_mbus_bytes_per_line(icd->user_width,
-			icd->current_fmt->host_fmt);
-	if (bytesperline < 0)
-		return bytesperline;
-
-	ret = mx27_camera_emma_prp_reset(pcdev);
-	if (ret)
-		return ret;
-
-	writel(pcdev->csicr1, pcdev->base_csi + CSICR1);
-
-	return 0;
-}
-
-static int mx2_camera_set_crop(struct soc_camera_device *icd,
-				const struct v4l2_crop *a)
-{
-	struct v4l2_crop a_writable = *a;
-	struct v4l2_rect *rect = &a_writable.c;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct v4l2_subdev_format fmt = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-	};
-	struct v4l2_mbus_framefmt *mf = &fmt.format;
-	int ret;
-
-	soc_camera_limit_side(&rect->left, &rect->width, 0, 2, 4096);
-	soc_camera_limit_side(&rect->top, &rect->height, 0, 2, 4096);
-
-	ret = v4l2_subdev_call(sd, video, s_crop, a);
-	if (ret < 0)
-		return ret;
-
-	/* The capture device might have changed its output  */
-	ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &fmt);
-	if (ret < 0)
-		return ret;
-
-	dev_dbg(icd->parent, "Sensor cropped %dx%d\n",
-		mf->width, mf->height);
-
-	icd->user_width		= mf->width;
-	icd->user_height	= mf->height;
-
-	return ret;
-}
-
-static int mx2_camera_get_formats(struct soc_camera_device *icd,
-				  unsigned int idx,
-				  struct soc_camera_format_xlate *xlate)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	const struct soc_mbus_pixelfmt *fmt;
-	struct device *dev = icd->parent;
-	struct v4l2_subdev_mbus_code_enum code = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-		.index = idx,
-	};
-	int ret, formats = 0;
-
-	ret = v4l2_subdev_call(sd, pad, enum_mbus_code, NULL, &code);
-	if (ret < 0)
-		/* no more formats */
-		return 0;
-
-	fmt = soc_mbus_get_fmtdesc(code.code);
-	if (!fmt) {
-		dev_err(dev, "Invalid format code #%u: %d\n", idx, code.code);
-		return 0;
-	}
-
-	if (code.code == MEDIA_BUS_FMT_YUYV8_2X8 ||
-	    code.code == MEDIA_BUS_FMT_UYVY8_2X8) {
-		formats++;
-		if (xlate) {
-			/*
-			 * CH2 can output YUV420 which is a standard format in
-			 * soc_mediabus.c
-			 */
-			xlate->host_fmt =
-				soc_mbus_get_fmtdesc(MEDIA_BUS_FMT_YUYV8_1_5X8);
-			xlate->code	= code.code;
-			dev_dbg(dev, "Providing host format %s for sensor code %d\n",
-			       xlate->host_fmt->name, code.code);
-			xlate++;
-		}
-	}
-
-	if (code.code == MEDIA_BUS_FMT_UYVY8_2X8) {
-		formats++;
-		if (xlate) {
-			xlate->host_fmt =
-				soc_mbus_get_fmtdesc(MEDIA_BUS_FMT_YUYV8_2X8);
-			xlate->code	= code.code;
-			dev_dbg(dev, "Providing host format %s for sensor code %d\n",
-				xlate->host_fmt->name, code.code);
-			xlate++;
-		}
-	}
-
-	/* Generic pass-trough */
-	formats++;
-	if (xlate) {
-		xlate->host_fmt = fmt;
-		xlate->code	= code.code;
-		xlate++;
-	}
-	return formats;
-}
-
-static int mx2_emmaprp_resize(struct mx2_camera_dev *pcdev,
-			      struct v4l2_mbus_framefmt *mf_in,
-			      struct v4l2_pix_format *pix_out, bool apply)
-{
-	unsigned int num, den;
-	unsigned long m;
-	int i, dir;
-
-	for (dir = RESIZE_DIR_H; dir <= RESIZE_DIR_V; dir++) {
-		struct emma_prp_resize tmprsz;
-		unsigned char *s = tmprsz.s;
-		int len = 0;
-		int in, out;
-
-		if (dir == RESIZE_DIR_H) {
-			in = mf_in->width;
-			out = pix_out->width;
-		} else {
-			in = mf_in->height;
-			out = pix_out->height;
-		}
-
-		if (in < out)
-			return -EINVAL;
-		else if (in == out)
-			continue;
-
-		/* Calculate ratio */
-		m = gcd(in, out);
-		num = in / m;
-		den = out / m;
-		if (num > RESIZE_NUM_MAX)
-			return -EINVAL;
-
-		if ((num >= 2 * den) && (den == 1) &&
-		    (num < 9) && (!(num & 0x01))) {
-			int sum = 0;
-			int j;
-
-			/* Average scaling for >= 2:1 ratios */
-			/* Support can be added for num >=9 and odd values */
-
-			tmprsz.algo = RESIZE_ALGO_AVERAGING;
-			len = num;
-
-			for (i = 0; i < (len / 2); i++)
-				s[i] = 8;
-
-			do {
-				for (i = 0; i < (len / 2); i++) {
-					s[i] = s[i] >> 1;
-					sum = 0;
-					for (j = 0; j < (len / 2); j++)
-						sum += s[j];
-					if (sum == 4)
-						break;
-				}
-			} while (sum != 4);
-
-			for (i = (len / 2); i < len; i++)
-				s[i] = s[len - i - 1];
-
-			s[len - 1] |= SZ_COEF;
-		} else {
-			/* bilinear scaling for < 2:1 ratios */
-			int v; /* overflow counter */
-			int coeff, nxt; /* table output */
-			int in_pos_inc = 2 * den;
-			int out_pos = num;
-			int out_pos_inc = 2 * num;
-			int init_carry = num - den;
-			int carry = init_carry;
-
-			tmprsz.algo = RESIZE_ALGO_BILINEAR;
-			v = den + in_pos_inc;
-			do {
-				coeff = v - out_pos;
-				out_pos += out_pos_inc;
-				carry += out_pos_inc;
-				for (nxt = 0; v < out_pos; nxt++) {
-					v += in_pos_inc;
-					carry -= in_pos_inc;
-				}
-
-				if (len > RESIZE_NUM_MAX)
-					return -EINVAL;
-
-				coeff = ((coeff << BC_COEF) +
-					(in_pos_inc >> 1)) / in_pos_inc;
-
-				if (coeff >= (SZ_COEF - 1))
-					coeff--;
-
-				coeff |= SZ_COEF;
-				s[len] = (unsigned char)coeff;
-				len++;
-
-				for (i = 1; i < nxt; i++) {
-					if (len >= RESIZE_NUM_MAX)
-						return -EINVAL;
-					s[len] = 0;
-					len++;
-				}
-			} while (carry != init_carry);
-		}
-		tmprsz.len = len;
-		if (dir == RESIZE_DIR_H)
-			mf_in->width = pix_out->width;
-		else
-			mf_in->height = pix_out->height;
-
-		if (apply)
-			memcpy(&pcdev->resizing[dir], &tmprsz, sizeof(tmprsz));
-	}
-	return 0;
-}
-
-static int mx2_camera_set_fmt(struct soc_camera_device *icd,
-			       struct v4l2_format *f)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx2_camera_dev *pcdev = ici->priv;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	const struct soc_camera_format_xlate *xlate;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_subdev_format format = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-	};
-	struct v4l2_mbus_framefmt *mf = &format.format;
-	int ret;
-
-	dev_dbg(icd->parent, "%s: requested params: width = %d, height = %d\n",
-		__func__, pix->width, pix->height);
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
-	if (!xlate) {
-		dev_warn(icd->parent, "Format %x not found\n",
-				pix->pixelformat);
-		return -EINVAL;
-	}
-
-	mf->width	= pix->width;
-	mf->height	= pix->height;
-	mf->field	= pix->field;
-	mf->colorspace	= pix->colorspace;
-	mf->code	= xlate->code;
-
-	ret = v4l2_subdev_call(sd, pad, set_fmt, NULL, &format);
-	if (ret < 0 && ret != -ENOIOCTLCMD)
-		return ret;
-
-	/* Store width and height returned by the sensor for resizing */
-	pcdev->s_width = mf->width;
-	pcdev->s_height = mf->height;
-	dev_dbg(icd->parent, "%s: sensor params: width = %d, height = %d\n",
-		__func__, pcdev->s_width, pcdev->s_height);
-
-	pcdev->emma_prp = mx27_emma_prp_get_format(xlate->code,
-						   xlate->host_fmt->fourcc);
-
-	memset(pcdev->resizing, 0, sizeof(pcdev->resizing));
-	if ((mf->width != pix->width || mf->height != pix->height) &&
-		pcdev->emma_prp->cfg.in_fmt == PRP_CNTL_DATA_IN_YUV422) {
-		if (mx2_emmaprp_resize(pcdev, mf, pix, true) < 0)
-			dev_dbg(icd->parent, "%s: can't resize\n", __func__);
-	}
-
-	if (mf->code != xlate->code)
-		return -EINVAL;
-
-	pix->width		= mf->width;
-	pix->height		= mf->height;
-	pix->field		= mf->field;
-	pix->colorspace		= mf->colorspace;
-	icd->current_fmt	= xlate;
-
-	dev_dbg(icd->parent, "%s: returned params: width = %d, height = %d\n",
-		__func__, pix->width, pix->height);
-
-	return 0;
-}
-
-static int mx2_camera_try_fmt(struct soc_camera_device *icd,
-				  struct v4l2_format *f)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	const struct soc_camera_format_xlate *xlate;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_subdev_pad_config pad_cfg;
-	struct v4l2_subdev_format format = {
-		.which = V4L2_SUBDEV_FORMAT_TRY,
-	};
-	struct v4l2_mbus_framefmt *mf = &format.format;
-	__u32 pixfmt = pix->pixelformat;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx2_camera_dev *pcdev = ici->priv;
-	struct mx2_fmt_cfg *emma_prp;
-	int ret;
-
-	dev_dbg(icd->parent, "%s: requested params: width = %d, height = %d\n",
-		__func__, pix->width, pix->height);
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
-	if (pixfmt && !xlate) {
-		dev_warn(icd->parent, "Format %x not found\n", pixfmt);
-		return -EINVAL;
-	}
-
-	/*
-	 * limit to MX27 hardware capabilities: width must be a multiple of 8 as
-	 * requested by the CSI. (Table 39-2 in the i.MX27 Reference Manual).
-	 */
-	pix->width &= ~0x7;
-
-	/* limit to sensor capabilities */
-	mf->width	= pix->width;
-	mf->height	= pix->height;
-	mf->field	= pix->field;
-	mf->colorspace	= pix->colorspace;
-	mf->code	= xlate->code;
-
-	ret = v4l2_subdev_call(sd, pad, set_fmt, &pad_cfg, &format);
-	if (ret < 0)
-		return ret;
-
-	dev_dbg(icd->parent, "%s: sensor params: width = %d, height = %d\n",
-		__func__, pcdev->s_width, pcdev->s_height);
-
-	/* If the sensor does not support image size try PrP resizing */
-	emma_prp = mx27_emma_prp_get_format(xlate->code,
-					    xlate->host_fmt->fourcc);
-
-	if ((mf->width != pix->width || mf->height != pix->height) &&
-		emma_prp->cfg.in_fmt == PRP_CNTL_DATA_IN_YUV422) {
-		if (mx2_emmaprp_resize(pcdev, mf, pix, false) < 0)
-			dev_dbg(icd->parent, "%s: can't resize\n", __func__);
-	}
-
-	if (mf->field == V4L2_FIELD_ANY)
-		mf->field = V4L2_FIELD_NONE;
-	/*
-	 * Driver supports interlaced images provided they have
-	 * both fields so that they can be processed as if they
-	 * were progressive.
-	 */
-	if (mf->field != V4L2_FIELD_NONE && !V4L2_FIELD_HAS_BOTH(mf->field)) {
-		dev_err(icd->parent, "Field type %d unsupported.\n",
-				mf->field);
-		return -EINVAL;
-	}
-
-	pix->width	= mf->width;
-	pix->height	= mf->height;
-	pix->field	= mf->field;
-	pix->colorspace	= mf->colorspace;
-
-	dev_dbg(icd->parent, "%s: returned params: width = %d, height = %d\n",
-		__func__, pix->width, pix->height);
-
-	return 0;
-}
-
-static int mx2_camera_querycap(struct soc_camera_host *ici,
-			       struct v4l2_capability *cap)
-{
-	/* cap->name is set by the friendly caller:-> */
-	strlcpy(cap->card, MX2_CAM_DRIVER_DESCRIPTION, sizeof(cap->card));
-	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
-	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
-
-	return 0;
-}
-
-static unsigned int mx2_camera_poll(struct file *file, poll_table *pt)
-{
-	struct soc_camera_device *icd = file->private_data;
-
-	return vb2_poll(&icd->vb2_vidq, file, pt);
-}
-
-static struct soc_camera_host_ops mx2_soc_camera_host_ops = {
-	.owner		= THIS_MODULE,
-	.add		= mx2_camera_add_device,
-	.remove		= mx2_camera_remove_device,
-	.clock_start	= mx2_camera_clock_start,
-	.clock_stop	= mx2_camera_clock_stop,
-	.set_fmt	= mx2_camera_set_fmt,
-	.set_crop	= mx2_camera_set_crop,
-	.get_formats	= mx2_camera_get_formats,
-	.try_fmt	= mx2_camera_try_fmt,
-	.init_videobuf2	= mx2_camera_init_videobuf,
-	.poll		= mx2_camera_poll,
-	.querycap	= mx2_camera_querycap,
-	.set_bus_param	= mx2_camera_set_bus_param,
-};
-
-static void mx27_camera_frame_done_emma(struct mx2_camera_dev *pcdev,
-		int bufnum, bool err)
-{
-#ifdef DEBUG
-	struct mx2_fmt_cfg *prp = pcdev->emma_prp;
-#endif
-	struct mx2_buf_internal *ibuf;
-	struct mx2_buffer *buf;
-	struct vb2_buffer *vb;
-	struct vb2_v4l2_buffer *vbuf;
-	unsigned long phys;
-
-	ibuf = list_first_entry(&pcdev->active_bufs, struct mx2_buf_internal,
-			       queue);
-
-	BUG_ON(ibuf->bufnum != bufnum);
-
-	if (ibuf->discard) {
-		/*
-		 * Discard buffer must not be returned to user space.
-		 * Just return it to the discard queue.
-		 */
-		list_move_tail(pcdev->active_bufs.next, &pcdev->discard);
-	} else {
-		buf = mx2_ibuf_to_buf(ibuf);
-
-		vb = &buf->vb.vb2_buf;
-		vbuf = to_vb2_v4l2_buffer(vb);
-#ifdef DEBUG
-		phys = vb2_dma_contig_plane_dma_addr(vb, 0);
-		if (prp->cfg.channel == 1) {
-			if (readl(pcdev->base_emma + PRP_DEST_RGB1_PTR +
-				4 * bufnum) != phys) {
-				dev_err(pcdev->dev, "%lx != %x\n", phys,
-					readl(pcdev->base_emma +
-					PRP_DEST_RGB1_PTR + 4 * bufnum));
-			}
-		} else {
-			if (readl(pcdev->base_emma + PRP_DEST_Y_PTR -
-				0x14 * bufnum) != phys) {
-				dev_err(pcdev->dev, "%lx != %x\n", phys,
-					readl(pcdev->base_emma +
-					PRP_DEST_Y_PTR - 0x14 * bufnum));
-			}
-		}
-#endif
-		dev_dbg(pcdev->dev, "%s (vb=0x%p) 0x%p %lu\n", __func__, vb,
-				vb2_plane_vaddr(vb, 0),
-				vb2_get_plane_payload(vb, 0));
-
-		list_del_init(&buf->internal.queue);
-		vb->timestamp = ktime_get_ns();
-		vbuf->sequence = pcdev->frame_count;
-		if (err)
-			vb2_buffer_done(vb, VB2_BUF_STATE_ERROR);
-		else
-			vb2_buffer_done(vb, VB2_BUF_STATE_DONE);
-	}
-
-	pcdev->frame_count++;
-
-	if (list_empty(&pcdev->capture)) {
-		if (list_empty(&pcdev->discard)) {
-			dev_warn(pcdev->dev, "%s: trying to access empty discard list\n",
-				 __func__);
-			return;
-		}
-
-		ibuf = list_first_entry(&pcdev->discard,
-					struct mx2_buf_internal, queue);
-		ibuf->bufnum = bufnum;
-
-		list_move_tail(pcdev->discard.next, &pcdev->active_bufs);
-		mx27_update_emma_buf(pcdev, pcdev->discard_buffer_dma, bufnum);
-		return;
-	}
-
-	buf = list_first_entry(&pcdev->capture, struct mx2_buffer,
-			       internal.queue);
-
-	buf->internal.bufnum = bufnum;
-
-	list_move_tail(pcdev->capture.next, &pcdev->active_bufs);
-
-	vb = &buf->vb.vb2_buf;
-
-	phys = vb2_dma_contig_plane_dma_addr(vb, 0);
-	mx27_update_emma_buf(pcdev, phys, bufnum);
-}
-
-static irqreturn_t mx27_camera_emma_irq(int irq_emma, void *data)
-{
-	struct mx2_camera_dev *pcdev = data;
-	unsigned int status = readl(pcdev->base_emma + PRP_INTRSTATUS);
-	struct mx2_buf_internal *ibuf;
-
-	spin_lock(&pcdev->lock);
-
-	if (list_empty(&pcdev->active_bufs)) {
-		dev_warn(pcdev->dev, "%s: called while active list is empty\n",
-			__func__);
-
-		if (!status) {
-			spin_unlock(&pcdev->lock);
-			return IRQ_NONE;
-		}
-	}
-
-	if (status & (1 << 7)) { /* overflow */
-		u32 cntl = readl(pcdev->base_emma + PRP_CNTL);
-		writel(cntl & ~(PRP_CNTL_CH1EN | PRP_CNTL_CH2EN),
-		       pcdev->base_emma + PRP_CNTL);
-		writel(cntl, pcdev->base_emma + PRP_CNTL);
-
-		ibuf = list_first_entry(&pcdev->active_bufs,
-					struct mx2_buf_internal, queue);
-		mx27_camera_frame_done_emma(pcdev,
-					ibuf->bufnum, true);
-
-		status &= ~(1 << 7);
-	} else if (((status & (3 << 5)) == (3 << 5)) ||
-		((status & (3 << 3)) == (3 << 3))) {
-		/*
-		 * Both buffers have triggered, process the one we're expecting
-		 * to first
-		 */
-		ibuf = list_first_entry(&pcdev->active_bufs,
-					struct mx2_buf_internal, queue);
-		mx27_camera_frame_done_emma(pcdev, ibuf->bufnum, false);
-		status &= ~(1 << (6 - ibuf->bufnum)); /* mark processed */
-	} else if ((status & (1 << 6)) || (status & (1 << 4))) {
-		mx27_camera_frame_done_emma(pcdev, 0, false);
-	} else if ((status & (1 << 5)) || (status & (1 << 3))) {
-		mx27_camera_frame_done_emma(pcdev, 1, false);
-	}
-
-	spin_unlock(&pcdev->lock);
-	writel(status, pcdev->base_emma + PRP_INTRSTATUS);
-
-	return IRQ_HANDLED;
-}
-
-static int mx27_camera_emma_init(struct platform_device *pdev)
-{
-	struct mx2_camera_dev *pcdev = platform_get_drvdata(pdev);
-	struct resource *res_emma;
-	int irq_emma;
-	int err = 0;
-
-	res_emma = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	irq_emma = platform_get_irq(pdev, 1);
-	if (!res_emma || !irq_emma) {
-		dev_err(pcdev->dev, "no EMMA resources\n");
-		err = -ENODEV;
-		goto out;
-	}
-
-	pcdev->base_emma = devm_ioremap_resource(pcdev->dev, res_emma);
-	if (IS_ERR(pcdev->base_emma)) {
-		err = PTR_ERR(pcdev->base_emma);
-		goto out;
-	}
-
-	err = devm_request_irq(pcdev->dev, irq_emma, mx27_camera_emma_irq, 0,
-			       MX2_CAM_DRV_NAME, pcdev);
-	if (err) {
-		dev_err(pcdev->dev, "Camera EMMA interrupt register failed\n");
-		goto out;
-	}
-
-	pcdev->clk_emma_ipg = devm_clk_get(pcdev->dev, "emma-ipg");
-	if (IS_ERR(pcdev->clk_emma_ipg)) {
-		err = PTR_ERR(pcdev->clk_emma_ipg);
-		goto out;
-	}
-
-	clk_prepare_enable(pcdev->clk_emma_ipg);
-
-	pcdev->clk_emma_ahb = devm_clk_get(pcdev->dev, "emma-ahb");
-	if (IS_ERR(pcdev->clk_emma_ahb)) {
-		err = PTR_ERR(pcdev->clk_emma_ahb);
-		goto exit_clk_emma_ipg;
-	}
-
-	clk_prepare_enable(pcdev->clk_emma_ahb);
-
-	err = mx27_camera_emma_prp_reset(pcdev);
-	if (err)
-		goto exit_clk_emma_ahb;
-
-	return err;
-
-exit_clk_emma_ahb:
-	clk_disable_unprepare(pcdev->clk_emma_ahb);
-exit_clk_emma_ipg:
-	clk_disable_unprepare(pcdev->clk_emma_ipg);
-out:
-	return err;
-}
-
-static int mx2_camera_probe(struct platform_device *pdev)
-{
-	struct mx2_camera_dev *pcdev;
-	struct resource *res_csi;
-	int irq_csi;
-	int err = 0;
-
-	dev_dbg(&pdev->dev, "initialising\n");
-
-	res_csi = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	irq_csi = platform_get_irq(pdev, 0);
-	if (res_csi == NULL || irq_csi < 0) {
-		dev_err(&pdev->dev, "Missing platform resources data\n");
-		err = -ENODEV;
-		goto exit;
-	}
-
-	pcdev = devm_kzalloc(&pdev->dev, sizeof(*pcdev), GFP_KERNEL);
-	if (!pcdev) {
-		dev_err(&pdev->dev, "Could not allocate pcdev\n");
-		err = -ENOMEM;
-		goto exit;
-	}
-
-	pcdev->clk_csi_ahb = devm_clk_get(&pdev->dev, "ahb");
-	if (IS_ERR(pcdev->clk_csi_ahb)) {
-		dev_err(&pdev->dev, "Could not get csi ahb clock\n");
-		err = PTR_ERR(pcdev->clk_csi_ahb);
-		goto exit;
-	}
-
-	pcdev->clk_csi_per = devm_clk_get(&pdev->dev, "per");
-	if (IS_ERR(pcdev->clk_csi_per)) {
-		dev_err(&pdev->dev, "Could not get csi per clock\n");
-		err = PTR_ERR(pcdev->clk_csi_per);
-		goto exit;
-	}
-
-	pcdev->pdata = pdev->dev.platform_data;
-	if (pcdev->pdata) {
-		long rate;
-
-		pcdev->platform_flags = pcdev->pdata->flags;
-
-		rate = clk_round_rate(pcdev->clk_csi_per,
-						pcdev->pdata->clk * 2);
-		if (rate <= 0) {
-			err = -ENODEV;
-			goto exit;
-		}
-		err = clk_set_rate(pcdev->clk_csi_per, rate);
-		if (err < 0)
-			goto exit;
-	}
-
-	INIT_LIST_HEAD(&pcdev->capture);
-	INIT_LIST_HEAD(&pcdev->active_bufs);
-	INIT_LIST_HEAD(&pcdev->discard);
-	spin_lock_init(&pcdev->lock);
-
-	pcdev->base_csi = devm_ioremap_resource(&pdev->dev, res_csi);
-	if (IS_ERR(pcdev->base_csi)) {
-		err = PTR_ERR(pcdev->base_csi);
-		goto exit;
-	}
-
-	pcdev->dev = &pdev->dev;
-	platform_set_drvdata(pdev, pcdev);
-
-	err = mx27_camera_emma_init(pdev);
-	if (err)
-		goto exit;
-
-	/*
-	 * We're done with drvdata here.  Clear the pointer so that
-	 * v4l2 core can start using drvdata on its purpose.
-	 */
-	platform_set_drvdata(pdev, NULL);
-
-	pcdev->soc_host.drv_name	= MX2_CAM_DRV_NAME,
-	pcdev->soc_host.ops		= &mx2_soc_camera_host_ops,
-	pcdev->soc_host.priv		= pcdev;
-	pcdev->soc_host.v4l2_dev.dev	= &pdev->dev;
-	pcdev->soc_host.nr		= pdev->id;
-
-	pcdev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(pcdev->alloc_ctx)) {
-		err = PTR_ERR(pcdev->alloc_ctx);
-		goto eallocctx;
-	}
-	err = soc_camera_host_register(&pcdev->soc_host);
-	if (err)
-		goto exit_free_emma;
-
-	dev_info(&pdev->dev, "MX2 Camera (CSI) driver probed, clock frequency: %ld\n",
-			clk_get_rate(pcdev->clk_csi_per));
-
-	return 0;
-
-exit_free_emma:
-	vb2_dma_contig_cleanup_ctx(pcdev->alloc_ctx);
-eallocctx:
-	clk_disable_unprepare(pcdev->clk_emma_ipg);
-	clk_disable_unprepare(pcdev->clk_emma_ahb);
-exit:
-	return err;
-}
-
-static int mx2_camera_remove(struct platform_device *pdev)
-{
-	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
-	struct mx2_camera_dev *pcdev = container_of(soc_host,
-			struct mx2_camera_dev, soc_host);
-
-	soc_camera_host_unregister(&pcdev->soc_host);
-
-	vb2_dma_contig_cleanup_ctx(pcdev->alloc_ctx);
-
-	clk_disable_unprepare(pcdev->clk_emma_ipg);
-	clk_disable_unprepare(pcdev->clk_emma_ahb);
-
-	dev_info(&pdev->dev, "MX2 Camera driver unloaded\n");
-
-	return 0;
-}
-
-static struct platform_driver mx2_camera_driver = {
-	.driver		= {
-		.name	= MX2_CAM_DRV_NAME,
-	},
-	.id_table	= mx2_camera_devtype,
-	.remove		= mx2_camera_remove,
-};
-
-module_platform_driver_probe(mx2_camera_driver, mx2_camera_probe);
-
-MODULE_DESCRIPTION("i.MX27 SoC Camera Host driver");
-MODULE_AUTHOR("Sascha Hauer <sha@pengutronix.de>");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(MX2_CAM_VERSION);
diff --git a/drivers/staging/media/mx3/Kconfig b/drivers/staging/media/mx3/Kconfig
deleted file mode 100644
index 595d5fe7cad1..000000000000
--- a/drivers/staging/media/mx3/Kconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-config VIDEO_MX3
-	tristate "i.MX3x Camera Sensor Interface driver"
-	depends on VIDEO_DEV && MX3_IPU && SOC_CAMERA
-	depends on MX3_IPU || COMPILE_TEST
-	depends on HAS_DMA
-	select VIDEOBUF2_DMA_CONTIG
-	---help---
-	  This is a v4l2 driver for the i.MX3x Camera Sensor Interface
-
-	  This driver is deprecated: it should become a stand-alone driver
-	  instead of using the soc-camera framework.
-
-	  Unless someone is willing to take this on (unlikely with such
-	  ancient hardware) it is going to be removed from the kernel
-	  soon.
diff --git a/drivers/staging/media/mx3/Makefile b/drivers/staging/media/mx3/Makefile
deleted file mode 100644
index 6d91dcd80c1d..000000000000
--- a/drivers/staging/media/mx3/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# Makefile for i.MX3x Camera Sensor driver
-
-obj-$(CONFIG_VIDEO_MX3) += mx3_camera.o
diff --git a/drivers/staging/media/mx3/TODO b/drivers/staging/media/mx3/TODO
deleted file mode 100644
index bc68fa443a3e..000000000000
--- a/drivers/staging/media/mx3/TODO
+++ /dev/null
@@ -1,10 +0,0 @@
-This driver is deprecated: it should become a stand-alone driver instead of
-using the soc-camera framework.
-
-Unless someone is willing to take this on (unlikely with such ancient
-hardware) it is going to be removed from the kernel soon.
-
-Note that trivial patches will not be accepted anymore, only a full conversion.
-
-If you want to convert this driver, please contact the linux-media mailinglist
-(see http://linuxtv.org/lists.php).
diff --git a/drivers/staging/media/mx3/mx3_camera.c b/drivers/staging/media/mx3/mx3_camera.c
deleted file mode 100644
index aa39e9569b1a..000000000000
--- a/drivers/staging/media/mx3/mx3_camera.c
+++ /dev/null
@@ -1,1264 +0,0 @@
-/*
- * V4L2 Driver for i.MX3x camera host
- *
- * Copyright (C) 2008
- * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/videodev2.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/vmalloc.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/dma/ipu-dma.h>
-
-#include <media/v4l2-common.h>
-#include <media/v4l2-dev.h>
-#include <media/videobuf2-dma-contig.h>
-#include <media/soc_camera.h>
-#include <media/drv-intf/soc_mediabus.h>
-
-#include <linux/platform_data/media/camera-mx3.h>
-#include <linux/platform_data/dma-imx.h>
-
-#define MX3_CAM_DRV_NAME "mx3-camera"
-
-/* CMOS Sensor Interface Registers */
-#define CSI_REG_START		0x60
-
-#define CSI_SENS_CONF		(0x60 - CSI_REG_START)
-#define CSI_SENS_FRM_SIZE	(0x64 - CSI_REG_START)
-#define CSI_ACT_FRM_SIZE	(0x68 - CSI_REG_START)
-#define CSI_OUT_FRM_CTRL	(0x6C - CSI_REG_START)
-#define CSI_TST_CTRL		(0x70 - CSI_REG_START)
-#define CSI_CCIR_CODE_1		(0x74 - CSI_REG_START)
-#define CSI_CCIR_CODE_2		(0x78 - CSI_REG_START)
-#define CSI_CCIR_CODE_3		(0x7C - CSI_REG_START)
-#define CSI_FLASH_STROBE_1	(0x80 - CSI_REG_START)
-#define CSI_FLASH_STROBE_2	(0x84 - CSI_REG_START)
-
-#define CSI_SENS_CONF_VSYNC_POL_SHIFT		0
-#define CSI_SENS_CONF_HSYNC_POL_SHIFT		1
-#define CSI_SENS_CONF_DATA_POL_SHIFT		2
-#define CSI_SENS_CONF_PIX_CLK_POL_SHIFT		3
-#define CSI_SENS_CONF_SENS_PRTCL_SHIFT		4
-#define CSI_SENS_CONF_SENS_CLKSRC_SHIFT		7
-#define CSI_SENS_CONF_DATA_FMT_SHIFT		8
-#define CSI_SENS_CONF_DATA_WIDTH_SHIFT		10
-#define CSI_SENS_CONF_EXT_VSYNC_SHIFT		15
-#define CSI_SENS_CONF_DIVRATIO_SHIFT		16
-
-#define CSI_SENS_CONF_DATA_FMT_RGB_YUV444	(0UL << CSI_SENS_CONF_DATA_FMT_SHIFT)
-#define CSI_SENS_CONF_DATA_FMT_YUV422		(2UL << CSI_SENS_CONF_DATA_FMT_SHIFT)
-#define CSI_SENS_CONF_DATA_FMT_BAYER		(3UL << CSI_SENS_CONF_DATA_FMT_SHIFT)
-
-#define MAX_VIDEO_MEM 16
-
-struct mx3_camera_buffer {
-	/* common v4l buffer stuff -- must be first */
-	struct vb2_v4l2_buffer vb;
-	struct list_head			queue;
-
-	/* One descriptot per scatterlist (per frame) */
-	struct dma_async_tx_descriptor		*txd;
-
-	/* We have to "build" a scatterlist ourselves - one element per frame */
-	struct scatterlist			sg;
-};
-
-/**
- * struct mx3_camera_dev - i.MX3x camera (CSI) object
- * @dev:		camera device, to which the coherent buffer is attached
- * @icd:		currently attached camera sensor
- * @clk:		pointer to clock
- * @base:		remapped register base address
- * @pdata:		platform data
- * @platform_flags:	platform flags
- * @mclk:		master clock frequency in Hz
- * @capture:		list of capture videobuffers
- * @lock:		protects video buffer lists
- * @active:		active video buffer
- * @idmac_channel:	array of pointers to IPU DMAC DMA channels
- * @soc_host:		embedded soc_host object
- */
-struct mx3_camera_dev {
-	/*
-	 * i.MX3x is only supposed to handle one camera on its Camera Sensor
-	 * Interface. If anyone ever builds hardware to enable more than one
-	 * camera _simultaneously_, they will have to modify this driver too
-	 */
-	struct clk		*clk;
-
-	void __iomem		*base;
-
-	struct mx3_camera_pdata	*pdata;
-
-	unsigned long		platform_flags;
-	unsigned long		mclk;
-	u16			width_flags;	/* max 15 bits */
-
-	struct list_head	capture;
-	spinlock_t		lock;		/* Protects video buffer lists */
-	struct mx3_camera_buffer *active;
-	size_t			buf_total;
-	struct vb2_alloc_ctx	*alloc_ctx;
-	enum v4l2_field		field;
-	int			sequence;
-
-	/* IDMAC / dmaengine interface */
-	struct idmac_channel	*idmac_channel[1];	/* We need one channel */
-
-	struct soc_camera_host	soc_host;
-};
-
-struct dma_chan_request {
-	struct mx3_camera_dev	*mx3_cam;
-	enum ipu_channel	id;
-};
-
-static u32 csi_reg_read(struct mx3_camera_dev *mx3, off_t reg)
-{
-	return __raw_readl(mx3->base + reg);
-}
-
-static void csi_reg_write(struct mx3_camera_dev *mx3, u32 value, off_t reg)
-{
-	__raw_writel(value, mx3->base + reg);
-}
-
-static struct mx3_camera_buffer *to_mx3_vb(struct vb2_v4l2_buffer *vb)
-{
-	return container_of(vb, struct mx3_camera_buffer, vb);
-}
-
-/* Called from the IPU IDMAC ISR */
-static void mx3_cam_dma_done(void *arg)
-{
-	struct idmac_tx_desc *desc = to_tx_desc(arg);
-	struct dma_chan *chan = desc->txd.chan;
-	struct idmac_channel *ichannel = to_idmac_chan(chan);
-	struct mx3_camera_dev *mx3_cam = ichannel->client;
-
-	dev_dbg(chan->device->dev, "callback cookie %d, active DMA %pad\n",
-		desc->txd.cookie, mx3_cam->active ? &sg_dma_address(&mx3_cam->active->sg) : NULL);
-
-	spin_lock(&mx3_cam->lock);
-	if (mx3_cam->active) {
-		struct vb2_v4l2_buffer *vb = &mx3_cam->active->vb;
-		struct mx3_camera_buffer *buf = to_mx3_vb(vb);
-
-		list_del_init(&buf->queue);
-		vb->vb2_buf.timestamp = ktime_get_ns();
-		vb->field = mx3_cam->field;
-		vb->sequence = mx3_cam->sequence++;
-		vb2_buffer_done(&vb->vb2_buf, VB2_BUF_STATE_DONE);
-	}
-
-	if (list_empty(&mx3_cam->capture)) {
-		mx3_cam->active = NULL;
-		spin_unlock(&mx3_cam->lock);
-
-		/*
-		 * stop capture - without further buffers IPU_CHA_BUF0_RDY will
-		 * not get updated
-		 */
-		return;
-	}
-
-	mx3_cam->active = list_entry(mx3_cam->capture.next,
-				     struct mx3_camera_buffer, queue);
-	spin_unlock(&mx3_cam->lock);
-}
-
-/*
- * Videobuf operations
- */
-
-/*
- * Calculate the __buffer__ (not data) size and number of buffers.
- */
-static int mx3_videobuf_setup(struct vb2_queue *vq,
-			unsigned int *count, unsigned int *num_planes,
-			unsigned int sizes[], void *alloc_ctxs[])
-{
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vq);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-
-	if (!mx3_cam->idmac_channel[0])
-		return -EINVAL;
-
-	alloc_ctxs[0] = mx3_cam->alloc_ctx;
-
-	if (!vq->num_buffers)
-		mx3_cam->sequence = 0;
-
-	if (!*count)
-		*count = 2;
-
-	/* Called from VIDIOC_REQBUFS or in compatibility mode */
-	if (!*num_planes)
-		sizes[0] = icd->sizeimage;
-	else if (sizes[0] < icd->sizeimage)
-		return -EINVAL;
-
-	/* If *num_planes != 0, we have already verified *count. */
-	if (sizes[0] * *count + mx3_cam->buf_total > MAX_VIDEO_MEM * 1024 * 1024)
-		*count = (MAX_VIDEO_MEM * 1024 * 1024 - mx3_cam->buf_total) /
-			sizes[0];
-
-	*num_planes = 1;
-
-	return 0;
-}
-
-static enum pixel_fmt fourcc_to_ipu_pix(__u32 fourcc)
-{
-	/* Add more formats as need arises and test possibilities appear... */
-	switch (fourcc) {
-	case V4L2_PIX_FMT_RGB24:
-		return IPU_PIX_FMT_RGB24;
-	case V4L2_PIX_FMT_UYVY:
-	case V4L2_PIX_FMT_RGB565:
-	default:
-		return IPU_PIX_FMT_GENERIC;
-	}
-}
-
-static void mx3_videobuf_queue(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vb->vb2_queue);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct mx3_camera_buffer *buf = to_mx3_vb(vbuf);
-	struct scatterlist *sg = &buf->sg;
-	struct dma_async_tx_descriptor *txd;
-	struct idmac_channel *ichan = mx3_cam->idmac_channel[0];
-	struct idmac_video_param *video = &ichan->params.video;
-	const struct soc_mbus_pixelfmt *host_fmt = icd->current_fmt->host_fmt;
-	dma_cookie_t cookie;
-	size_t new_size;
-
-	new_size = icd->sizeimage;
-
-	if (vb2_plane_size(vb, 0) < new_size) {
-		dev_err(icd->parent, "Buffer #%d too small (%lu < %zu)\n",
-			vbuf->vb2_buf.index, vb2_plane_size(vb, 0), new_size);
-		goto error;
-	}
-
-	if (!buf->txd) {
-		sg_dma_address(sg)	= vb2_dma_contig_plane_dma_addr(vb, 0);
-		sg_dma_len(sg)		= new_size;
-
-		txd = dmaengine_prep_slave_sg(
-			&ichan->dma_chan, sg, 1, DMA_DEV_TO_MEM,
-			DMA_PREP_INTERRUPT);
-		if (!txd)
-			goto error;
-
-		txd->callback_param	= txd;
-		txd->callback		= mx3_cam_dma_done;
-
-		buf->txd		= txd;
-	} else {
-		txd = buf->txd;
-	}
-
-	vb2_set_plane_payload(vb, 0, new_size);
-
-	/* This is the configuration of one sg-element */
-	video->out_pixel_fmt = fourcc_to_ipu_pix(host_fmt->fourcc);
-
-	if (video->out_pixel_fmt == IPU_PIX_FMT_GENERIC) {
-		/*
-		 * If the IPU DMA channel is configured to transfer generic
-		 * 8-bit data, we have to set up the geometry parameters
-		 * correctly, according to the current pixel format. The DMA
-		 * horizontal parameters in this case are expressed in bytes,
-		 * not in pixels.
-		 */
-		video->out_width	= icd->bytesperline;
-		video->out_height	= icd->user_height;
-		video->out_stride	= icd->bytesperline;
-	} else {
-		/*
-		 * For IPU known formats the pixel unit will be managed
-		 * successfully by the IPU code
-		 */
-		video->out_width	= icd->user_width;
-		video->out_height	= icd->user_height;
-		video->out_stride	= icd->user_width;
-	}
-
-#ifdef DEBUG
-	/* helps to see what DMA actually has written */
-	if (vb2_plane_vaddr(vb, 0))
-		memset(vb2_plane_vaddr(vb, 0), 0xaa, vb2_get_plane_payload(vb, 0));
-#endif
-
-	spin_lock_irq(&mx3_cam->lock);
-	list_add_tail(&buf->queue, &mx3_cam->capture);
-
-	if (!mx3_cam->active)
-		mx3_cam->active = buf;
-
-	spin_unlock_irq(&mx3_cam->lock);
-
-	cookie = txd->tx_submit(txd);
-	dev_dbg(icd->parent, "Submitted cookie %d DMA %pad\n",
-		cookie, &sg_dma_address(&buf->sg));
-
-	if (cookie >= 0)
-		return;
-
-	spin_lock_irq(&mx3_cam->lock);
-
-	/* Submit error */
-	list_del_init(&buf->queue);
-
-	if (mx3_cam->active == buf)
-		mx3_cam->active = NULL;
-
-	spin_unlock_irq(&mx3_cam->lock);
-error:
-	vb2_buffer_done(vb, VB2_BUF_STATE_ERROR);
-}
-
-static void mx3_videobuf_release(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vb->vb2_queue);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct mx3_camera_buffer *buf = to_mx3_vb(vbuf);
-	struct dma_async_tx_descriptor *txd = buf->txd;
-	unsigned long flags;
-
-	dev_dbg(icd->parent,
-		"Release%s DMA %pad, queue %sempty\n",
-		mx3_cam->active == buf ? " active" : "", &sg_dma_address(&buf->sg),
-		list_empty(&buf->queue) ? "" : "not ");
-
-	spin_lock_irqsave(&mx3_cam->lock, flags);
-
-	if (mx3_cam->active == buf)
-		mx3_cam->active = NULL;
-
-	/* Doesn't hurt also if the list is empty */
-	list_del_init(&buf->queue);
-
-	if (txd) {
-		buf->txd = NULL;
-		if (mx3_cam->idmac_channel[0])
-			async_tx_ack(txd);
-	}
-
-	spin_unlock_irqrestore(&mx3_cam->lock, flags);
-
-	mx3_cam->buf_total -= vb2_plane_size(vb, 0);
-}
-
-static int mx3_videobuf_init(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vb->vb2_queue);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct mx3_camera_buffer *buf = to_mx3_vb(vbuf);
-
-	if (!buf->txd) {
-		/* This is for locking debugging only */
-		INIT_LIST_HEAD(&buf->queue);
-		sg_init_table(&buf->sg, 1);
-
-		mx3_cam->buf_total += vb2_plane_size(vb, 0);
-	}
-
-	return 0;
-}
-
-static void mx3_stop_streaming(struct vb2_queue *q)
-{
-	struct soc_camera_device *icd = soc_camera_from_vb2q(q);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct idmac_channel *ichan = mx3_cam->idmac_channel[0];
-	struct mx3_camera_buffer *buf, *tmp;
-	unsigned long flags;
-
-	if (ichan)
-		dmaengine_pause(&ichan->dma_chan);
-
-	spin_lock_irqsave(&mx3_cam->lock, flags);
-
-	mx3_cam->active = NULL;
-
-	list_for_each_entry_safe(buf, tmp, &mx3_cam->capture, queue) {
-		list_del_init(&buf->queue);
-		vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
-	}
-
-	spin_unlock_irqrestore(&mx3_cam->lock, flags);
-}
-
-static struct vb2_ops mx3_videobuf_ops = {
-	.queue_setup	= mx3_videobuf_setup,
-	.buf_queue	= mx3_videobuf_queue,
-	.buf_cleanup	= mx3_videobuf_release,
-	.buf_init	= mx3_videobuf_init,
-	.wait_prepare	= vb2_ops_wait_prepare,
-	.wait_finish	= vb2_ops_wait_finish,
-	.stop_streaming	= mx3_stop_streaming,
-};
-
-static int mx3_camera_init_videobuf(struct vb2_queue *q,
-				     struct soc_camera_device *icd)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-
-	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	q->io_modes = VB2_MMAP | VB2_USERPTR;
-	q->drv_priv = icd;
-	q->ops = &mx3_videobuf_ops;
-	q->mem_ops = &vb2_dma_contig_memops;
-	q->buf_struct_size = sizeof(struct mx3_camera_buffer);
-	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-	q->lock = &ici->host_lock;
-
-	return vb2_queue_init(q);
-}
-
-/* First part of ipu_csi_init_interface() */
-static void mx3_camera_activate(struct mx3_camera_dev *mx3_cam)
-{
-	u32 conf;
-	long rate;
-
-	/* Set default size: ipu_csi_set_window_size() */
-	csi_reg_write(mx3_cam, (640 - 1) | ((480 - 1) << 16), CSI_ACT_FRM_SIZE);
-	/* ...and position to 0:0: ipu_csi_set_window_pos() */
-	conf = csi_reg_read(mx3_cam, CSI_OUT_FRM_CTRL) & 0xffff0000;
-	csi_reg_write(mx3_cam, conf, CSI_OUT_FRM_CTRL);
-
-	/* We use only gated clock synchronisation mode so far */
-	conf = 0 << CSI_SENS_CONF_SENS_PRTCL_SHIFT;
-
-	/* Set generic data, platform-biggest bus-width */
-	conf |= CSI_SENS_CONF_DATA_FMT_BAYER;
-
-	if (mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_15)
-		conf |= 3 << CSI_SENS_CONF_DATA_WIDTH_SHIFT;
-	else if (mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_10)
-		conf |= 2 << CSI_SENS_CONF_DATA_WIDTH_SHIFT;
-	else if (mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_8)
-		conf |= 1 << CSI_SENS_CONF_DATA_WIDTH_SHIFT;
-	else/* if (mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_4)*/
-		conf |= 0 << CSI_SENS_CONF_DATA_WIDTH_SHIFT;
-
-	if (mx3_cam->platform_flags & MX3_CAMERA_CLK_SRC)
-		conf |= 1 << CSI_SENS_CONF_SENS_CLKSRC_SHIFT;
-	if (mx3_cam->platform_flags & MX3_CAMERA_EXT_VSYNC)
-		conf |= 1 << CSI_SENS_CONF_EXT_VSYNC_SHIFT;
-	if (mx3_cam->platform_flags & MX3_CAMERA_DP)
-		conf |= 1 << CSI_SENS_CONF_DATA_POL_SHIFT;
-	if (mx3_cam->platform_flags & MX3_CAMERA_PCP)
-		conf |= 1 << CSI_SENS_CONF_PIX_CLK_POL_SHIFT;
-	if (mx3_cam->platform_flags & MX3_CAMERA_HSP)
-		conf |= 1 << CSI_SENS_CONF_HSYNC_POL_SHIFT;
-	if (mx3_cam->platform_flags & MX3_CAMERA_VSP)
-		conf |= 1 << CSI_SENS_CONF_VSYNC_POL_SHIFT;
-
-	/* ipu_csi_init_interface() */
-	csi_reg_write(mx3_cam, conf, CSI_SENS_CONF);
-
-	clk_prepare_enable(mx3_cam->clk);
-	rate = clk_round_rate(mx3_cam->clk, mx3_cam->mclk);
-	dev_dbg(mx3_cam->soc_host.v4l2_dev.dev, "Set SENS_CONF to %x, rate %ld\n", conf, rate);
-	if (rate)
-		clk_set_rate(mx3_cam->clk, rate);
-}
-
-static int mx3_camera_add_device(struct soc_camera_device *icd)
-{
-	dev_info(icd->parent, "MX3 Camera driver attached to camera %d\n",
-		 icd->devnum);
-
-	return 0;
-}
-
-static void mx3_camera_remove_device(struct soc_camera_device *icd)
-{
-	dev_info(icd->parent, "MX3 Camera driver detached from camera %d\n",
-		 icd->devnum);
-}
-
-/* Called with .host_lock held */
-static int mx3_camera_clock_start(struct soc_camera_host *ici)
-{
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-
-	mx3_camera_activate(mx3_cam);
-
-	mx3_cam->buf_total = 0;
-
-	return 0;
-}
-
-/* Called with .host_lock held */
-static void mx3_camera_clock_stop(struct soc_camera_host *ici)
-{
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct idmac_channel **ichan = &mx3_cam->idmac_channel[0];
-
-	if (*ichan) {
-		dma_release_channel(&(*ichan)->dma_chan);
-		*ichan = NULL;
-	}
-
-	clk_disable_unprepare(mx3_cam->clk);
-}
-
-static int test_platform_param(struct mx3_camera_dev *mx3_cam,
-			       unsigned char buswidth, unsigned long *flags)
-{
-	/*
-	 * If requested data width is supported by the platform, use it or any
-	 * possible lower value - i.MX31 is smart enough to shift bits
-	 */
-	if (buswidth > fls(mx3_cam->width_flags))
-		return -EINVAL;
-
-	/*
-	 * Platform specified synchronization and pixel clock polarities are
-	 * only a recommendation and are only used during probing. MX3x
-	 * camera interface only works in master mode, i.e., uses HSYNC and
-	 * VSYNC signals from the sensor
-	 */
-	*flags = V4L2_MBUS_MASTER |
-		V4L2_MBUS_HSYNC_ACTIVE_HIGH |
-		V4L2_MBUS_HSYNC_ACTIVE_LOW |
-		V4L2_MBUS_VSYNC_ACTIVE_HIGH |
-		V4L2_MBUS_VSYNC_ACTIVE_LOW |
-		V4L2_MBUS_PCLK_SAMPLE_RISING |
-		V4L2_MBUS_PCLK_SAMPLE_FALLING |
-		V4L2_MBUS_DATA_ACTIVE_HIGH |
-		V4L2_MBUS_DATA_ACTIVE_LOW;
-
-	return 0;
-}
-
-static int mx3_camera_try_bus_param(struct soc_camera_device *icd,
-				    const unsigned int depth)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct v4l2_mbus_config cfg = {.type = V4L2_MBUS_PARALLEL,};
-	unsigned long bus_flags, common_flags;
-	int ret = test_platform_param(mx3_cam, depth, &bus_flags);
-
-	dev_dbg(icd->parent, "request bus width %d bit: %d\n", depth, ret);
-
-	if (ret < 0)
-		return ret;
-
-	ret = v4l2_subdev_call(sd, video, g_mbus_config, &cfg);
-	if (!ret) {
-		common_flags = soc_mbus_config_compatible(&cfg,
-							  bus_flags);
-		if (!common_flags) {
-			dev_warn(icd->parent,
-				 "Flags incompatible: camera 0x%x, host 0x%lx\n",
-				 cfg.flags, bus_flags);
-			return -EINVAL;
-		}
-	} else if (ret != -ENOIOCTLCMD) {
-		return ret;
-	}
-
-	return 0;
-}
-
-static bool chan_filter(struct dma_chan *chan, void *arg)
-{
-	struct dma_chan_request *rq = arg;
-	struct mx3_camera_pdata *pdata;
-
-	if (!imx_dma_is_ipu(chan))
-		return false;
-
-	if (!rq)
-		return false;
-
-	pdata = rq->mx3_cam->soc_host.v4l2_dev.dev->platform_data;
-
-	return rq->id == chan->chan_id &&
-		pdata->dma_dev == chan->device->dev;
-}
-
-static const struct soc_mbus_pixelfmt mx3_camera_formats[] = {
-	{
-		.fourcc			= V4L2_PIX_FMT_SBGGR8,
-		.name			= "Bayer BGGR (sRGB) 8 bit",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_NONE,
-		.order			= SOC_MBUS_ORDER_LE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	}, {
-		.fourcc			= V4L2_PIX_FMT_GREY,
-		.name			= "Monochrome 8 bit",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_NONE,
-		.order			= SOC_MBUS_ORDER_LE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-};
-
-/* This will be corrected as we get more formats */
-static bool mx3_camera_packing_supported(const struct soc_mbus_pixelfmt *fmt)
-{
-	return	fmt->packing == SOC_MBUS_PACKING_NONE ||
-		(fmt->bits_per_sample == 8 &&
-		 fmt->packing == SOC_MBUS_PACKING_2X8_PADHI) ||
-		(fmt->bits_per_sample > 8 &&
-		 fmt->packing == SOC_MBUS_PACKING_EXTEND16);
-}
-
-static int mx3_camera_get_formats(struct soc_camera_device *icd, unsigned int idx,
-				  struct soc_camera_format_xlate *xlate)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct device *dev = icd->parent;
-	int formats = 0, ret;
-	struct v4l2_subdev_mbus_code_enum code = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-		.index = idx,
-	};
-	const struct soc_mbus_pixelfmt *fmt;
-
-	ret = v4l2_subdev_call(sd, pad, enum_mbus_code, NULL, &code);
-	if (ret < 0)
-		/* No more formats */
-		return 0;
-
-	fmt = soc_mbus_get_fmtdesc(code.code);
-	if (!fmt) {
-		dev_warn(icd->parent,
-			 "Unsupported format code #%u: 0x%x\n", idx, code.code);
-		return 0;
-	}
-
-	/* This also checks support for the requested bits-per-sample */
-	ret = mx3_camera_try_bus_param(icd, fmt->bits_per_sample);
-	if (ret < 0)
-		return 0;
-
-	switch (code.code) {
-	case MEDIA_BUS_FMT_SBGGR10_1X10:
-		formats++;
-		if (xlate) {
-			xlate->host_fmt	= &mx3_camera_formats[0];
-			xlate->code	= code.code;
-			xlate++;
-			dev_dbg(dev, "Providing format %s using code 0x%x\n",
-				mx3_camera_formats[0].name, code.code);
-		}
-		break;
-	case MEDIA_BUS_FMT_Y10_1X10:
-		formats++;
-		if (xlate) {
-			xlate->host_fmt	= &mx3_camera_formats[1];
-			xlate->code	= code.code;
-			xlate++;
-			dev_dbg(dev, "Providing format %s using code 0x%x\n",
-				mx3_camera_formats[1].name, code.code);
-		}
-		break;
-	default:
-		if (!mx3_camera_packing_supported(fmt))
-			return 0;
-	}
-
-	/* Generic pass-through */
-	formats++;
-	if (xlate) {
-		xlate->host_fmt	= fmt;
-		xlate->code	= code.code;
-		dev_dbg(dev, "Providing format %c%c%c%c in pass-through mode\n",
-			(fmt->fourcc >> (0*8)) & 0xFF,
-			(fmt->fourcc >> (1*8)) & 0xFF,
-			(fmt->fourcc >> (2*8)) & 0xFF,
-			(fmt->fourcc >> (3*8)) & 0xFF);
-		xlate++;
-	}
-
-	return formats;
-}
-
-static void configure_geometry(struct mx3_camera_dev *mx3_cam,
-			       unsigned int width, unsigned int height,
-			       const struct soc_mbus_pixelfmt *fmt)
-{
-	u32 ctrl, width_field, height_field;
-
-	if (fourcc_to_ipu_pix(fmt->fourcc) == IPU_PIX_FMT_GENERIC) {
-		/*
-		 * As the CSI will be configured to output BAYER, here
-		 * the width parameter count the number of samples to
-		 * capture to complete the whole image width.
-		 */
-		unsigned int num, den;
-		int ret = soc_mbus_samples_per_pixel(fmt, &num, &den);
-		BUG_ON(ret < 0);
-		width = width * num / den;
-	}
-
-	/* Setup frame size - this cannot be changed on-the-fly... */
-	width_field = width - 1;
-	height_field = height - 1;
-	csi_reg_write(mx3_cam, width_field | (height_field << 16), CSI_SENS_FRM_SIZE);
-
-	csi_reg_write(mx3_cam, width_field << 16, CSI_FLASH_STROBE_1);
-	csi_reg_write(mx3_cam, (height_field << 16) | 0x22, CSI_FLASH_STROBE_2);
-
-	csi_reg_write(mx3_cam, width_field | (height_field << 16), CSI_ACT_FRM_SIZE);
-
-	/* ...and position */
-	ctrl = csi_reg_read(mx3_cam, CSI_OUT_FRM_CTRL) & 0xffff0000;
-	/* Sensor does the cropping */
-	csi_reg_write(mx3_cam, ctrl | 0 | (0 << 8), CSI_OUT_FRM_CTRL);
-}
-
-static int acquire_dma_channel(struct mx3_camera_dev *mx3_cam)
-{
-	dma_cap_mask_t mask;
-	struct dma_chan *chan;
-	struct idmac_channel **ichan = &mx3_cam->idmac_channel[0];
-	/* We have to use IDMAC_IC_7 for Bayer / generic data */
-	struct dma_chan_request rq = {.mx3_cam = mx3_cam,
-				      .id = IDMAC_IC_7};
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-	dma_cap_set(DMA_PRIVATE, mask);
-	chan = dma_request_channel(mask, chan_filter, &rq);
-	if (!chan)
-		return -EBUSY;
-
-	*ichan = to_idmac_chan(chan);
-	(*ichan)->client = mx3_cam;
-
-	return 0;
-}
-
-/*
- * FIXME: learn to use stride != width, then we can keep stride properly aligned
- * and support arbitrary (even) widths.
- */
-static inline void stride_align(__u32 *width)
-{
-	if (ALIGN(*width, 8) < 4096)
-		*width = ALIGN(*width, 8);
-	else
-		*width = *width &  ~7;
-}
-
-/*
- * As long as we don't implement host-side cropping and scaling, we can use
- * default g_crop and cropcap from soc_camera.c
- */
-static int mx3_camera_set_crop(struct soc_camera_device *icd,
-			       const struct v4l2_crop *a)
-{
-	struct v4l2_crop a_writable = *a;
-	struct v4l2_rect *rect = &a_writable.c;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct v4l2_subdev_format fmt = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-	};
-	struct v4l2_mbus_framefmt *mf = &fmt.format;
-	int ret;
-
-	soc_camera_limit_side(&rect->left, &rect->width, 0, 2, 4096);
-	soc_camera_limit_side(&rect->top, &rect->height, 0, 2, 4096);
-
-	ret = v4l2_subdev_call(sd, video, s_crop, a);
-	if (ret < 0)
-		return ret;
-
-	/* The capture device might have changed its output sizes */
-	ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &fmt);
-	if (ret < 0)
-		return ret;
-
-	if (mf->code != icd->current_fmt->code)
-		return -EINVAL;
-
-	if (mf->width & 7) {
-		/* Ouch! We can only handle 8-byte aligned width... */
-		stride_align(&mf->width);
-		ret = v4l2_subdev_call(sd, pad, set_fmt, NULL, &fmt);
-		if (ret < 0)
-			return ret;
-	}
-
-	if (mf->width != icd->user_width || mf->height != icd->user_height)
-		configure_geometry(mx3_cam, mf->width, mf->height,
-				   icd->current_fmt->host_fmt);
-
-	dev_dbg(icd->parent, "Sensor cropped %dx%d\n",
-		mf->width, mf->height);
-
-	icd->user_width		= mf->width;
-	icd->user_height	= mf->height;
-
-	return ret;
-}
-
-static int mx3_camera_set_fmt(struct soc_camera_device *icd,
-			      struct v4l2_format *f)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	const struct soc_camera_format_xlate *xlate;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_subdev_format format = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-	};
-	struct v4l2_mbus_framefmt *mf = &format.format;
-	int ret;
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
-	if (!xlate) {
-		dev_warn(icd->parent, "Format %x not found\n",
-			 pix->pixelformat);
-		return -EINVAL;
-	}
-
-	stride_align(&pix->width);
-	dev_dbg(icd->parent, "Set format %dx%d\n", pix->width, pix->height);
-
-	/*
-	 * Might have to perform a complete interface initialisation like in
-	 * ipu_csi_init_interface() in mxc_v4l2_s_param(). Also consider
-	 * mxc_v4l2_s_fmt()
-	 */
-
-	configure_geometry(mx3_cam, pix->width, pix->height, xlate->host_fmt);
-
-	mf->width	= pix->width;
-	mf->height	= pix->height;
-	mf->field	= pix->field;
-	mf->colorspace	= pix->colorspace;
-	mf->code	= xlate->code;
-
-	ret = v4l2_subdev_call(sd, pad, set_fmt, NULL, &format);
-	if (ret < 0)
-		return ret;
-
-	if (mf->code != xlate->code)
-		return -EINVAL;
-
-	if (!mx3_cam->idmac_channel[0]) {
-		ret = acquire_dma_channel(mx3_cam);
-		if (ret < 0)
-			return ret;
-	}
-
-	pix->width		= mf->width;
-	pix->height		= mf->height;
-	pix->field		= mf->field;
-	mx3_cam->field		= mf->field;
-	pix->colorspace		= mf->colorspace;
-	icd->current_fmt	= xlate;
-
-	dev_dbg(icd->parent, "Sensor set %dx%d\n", pix->width, pix->height);
-
-	return ret;
-}
-
-static int mx3_camera_try_fmt(struct soc_camera_device *icd,
-			      struct v4l2_format *f)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	const struct soc_camera_format_xlate *xlate;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_subdev_pad_config pad_cfg;
-	struct v4l2_subdev_format format = {
-		.which = V4L2_SUBDEV_FORMAT_TRY,
-	};
-	struct v4l2_mbus_framefmt *mf = &format.format;
-	__u32 pixfmt = pix->pixelformat;
-	int ret;
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
-	if (pixfmt && !xlate) {
-		dev_warn(icd->parent, "Format %x not found\n", pixfmt);
-		return -EINVAL;
-	}
-
-	/* limit to MX3 hardware capabilities */
-	if (pix->height > 4096)
-		pix->height = 4096;
-	if (pix->width > 4096)
-		pix->width = 4096;
-
-	/* limit to sensor capabilities */
-	mf->width	= pix->width;
-	mf->height	= pix->height;
-	mf->field	= pix->field;
-	mf->colorspace	= pix->colorspace;
-	mf->code	= xlate->code;
-
-	ret = v4l2_subdev_call(sd, pad, set_fmt, &pad_cfg, &format);
-	if (ret < 0)
-		return ret;
-
-	pix->width	= mf->width;
-	pix->height	= mf->height;
-	pix->colorspace	= mf->colorspace;
-
-	switch (mf->field) {
-	case V4L2_FIELD_ANY:
-		pix->field = V4L2_FIELD_NONE;
-		break;
-	case V4L2_FIELD_NONE:
-		break;
-	default:
-		dev_err(icd->parent, "Field type %d unsupported.\n",
-			mf->field);
-		ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static int mx3_camera_reqbufs(struct soc_camera_device *icd,
-			      struct v4l2_requestbuffers *p)
-{
-	return 0;
-}
-
-static unsigned int mx3_camera_poll(struct file *file, poll_table *pt)
-{
-	struct soc_camera_device *icd = file->private_data;
-
-	return vb2_poll(&icd->vb2_vidq, file, pt);
-}
-
-static int mx3_camera_querycap(struct soc_camera_host *ici,
-			       struct v4l2_capability *cap)
-{
-	/* cap->name is set by the firendly caller:-> */
-	strlcpy(cap->card, "i.MX3x Camera", sizeof(cap->card));
-	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
-	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
-
-	return 0;
-}
-
-static int mx3_camera_set_bus_param(struct soc_camera_device *icd)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct v4l2_mbus_config cfg = {.type = V4L2_MBUS_PARALLEL,};
-	u32 pixfmt = icd->current_fmt->host_fmt->fourcc;
-	unsigned long bus_flags, common_flags;
-	u32 dw, sens_conf;
-	const struct soc_mbus_pixelfmt *fmt;
-	int buswidth;
-	int ret;
-	const struct soc_camera_format_xlate *xlate;
-	struct device *dev = icd->parent;
-
-	fmt = soc_mbus_get_fmtdesc(icd->current_fmt->code);
-	if (!fmt)
-		return -EINVAL;
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
-	if (!xlate) {
-		dev_warn(dev, "Format %x not found\n", pixfmt);
-		return -EINVAL;
-	}
-
-	buswidth = fmt->bits_per_sample;
-	ret = test_platform_param(mx3_cam, buswidth, &bus_flags);
-
-	dev_dbg(dev, "requested bus width %d bit: %d\n", buswidth, ret);
-
-	if (ret < 0)
-		return ret;
-
-	ret = v4l2_subdev_call(sd, video, g_mbus_config, &cfg);
-	if (!ret) {
-		common_flags = soc_mbus_config_compatible(&cfg,
-							  bus_flags);
-		if (!common_flags) {
-			dev_warn(icd->parent,
-				 "Flags incompatible: camera 0x%x, host 0x%lx\n",
-				 cfg.flags, bus_flags);
-			return -EINVAL;
-		}
-	} else if (ret != -ENOIOCTLCMD) {
-		return ret;
-	} else {
-		common_flags = bus_flags;
-	}
-
-	dev_dbg(dev, "Flags cam: 0x%x host: 0x%lx common: 0x%lx\n",
-		cfg.flags, bus_flags, common_flags);
-
-	/* Make choices, based on platform preferences */
-	if ((common_flags & V4L2_MBUS_HSYNC_ACTIVE_HIGH) &&
-	    (common_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW)) {
-		if (mx3_cam->platform_flags & MX3_CAMERA_HSP)
-			common_flags &= ~V4L2_MBUS_HSYNC_ACTIVE_HIGH;
-		else
-			common_flags &= ~V4L2_MBUS_HSYNC_ACTIVE_LOW;
-	}
-
-	if ((common_flags & V4L2_MBUS_VSYNC_ACTIVE_HIGH) &&
-	    (common_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW)) {
-		if (mx3_cam->platform_flags & MX3_CAMERA_VSP)
-			common_flags &= ~V4L2_MBUS_VSYNC_ACTIVE_HIGH;
-		else
-			common_flags &= ~V4L2_MBUS_VSYNC_ACTIVE_LOW;
-	}
-
-	if ((common_flags & V4L2_MBUS_DATA_ACTIVE_HIGH) &&
-	    (common_flags & V4L2_MBUS_DATA_ACTIVE_LOW)) {
-		if (mx3_cam->platform_flags & MX3_CAMERA_DP)
-			common_flags &= ~V4L2_MBUS_DATA_ACTIVE_HIGH;
-		else
-			common_flags &= ~V4L2_MBUS_DATA_ACTIVE_LOW;
-	}
-
-	if ((common_flags & V4L2_MBUS_PCLK_SAMPLE_RISING) &&
-	    (common_flags & V4L2_MBUS_PCLK_SAMPLE_FALLING)) {
-		if (mx3_cam->platform_flags & MX3_CAMERA_PCP)
-			common_flags &= ~V4L2_MBUS_PCLK_SAMPLE_RISING;
-		else
-			common_flags &= ~V4L2_MBUS_PCLK_SAMPLE_FALLING;
-	}
-
-	cfg.flags = common_flags;
-	ret = v4l2_subdev_call(sd, video, s_mbus_config, &cfg);
-	if (ret < 0 && ret != -ENOIOCTLCMD) {
-		dev_dbg(dev, "camera s_mbus_config(0x%lx) returned %d\n",
-			common_flags, ret);
-		return ret;
-	}
-
-	/*
-	 * So far only gated clock mode is supported. Add a line
-	 *	(3 << CSI_SENS_CONF_SENS_PRTCL_SHIFT) |
-	 * below and select the required mode when supporting other
-	 * synchronisation protocols.
-	 */
-	sens_conf = csi_reg_read(mx3_cam, CSI_SENS_CONF) &
-		~((1 << CSI_SENS_CONF_VSYNC_POL_SHIFT) |
-		  (1 << CSI_SENS_CONF_HSYNC_POL_SHIFT) |
-		  (1 << CSI_SENS_CONF_DATA_POL_SHIFT) |
-		  (1 << CSI_SENS_CONF_PIX_CLK_POL_SHIFT) |
-		  (3 << CSI_SENS_CONF_DATA_FMT_SHIFT) |
-		  (3 << CSI_SENS_CONF_DATA_WIDTH_SHIFT));
-
-	/* TODO: Support RGB and YUV formats */
-
-	/* This has been set in mx3_camera_activate(), but we clear it above */
-	sens_conf |= CSI_SENS_CONF_DATA_FMT_BAYER;
-
-	if (common_flags & V4L2_MBUS_PCLK_SAMPLE_FALLING)
-		sens_conf |= 1 << CSI_SENS_CONF_PIX_CLK_POL_SHIFT;
-	if (common_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW)
-		sens_conf |= 1 << CSI_SENS_CONF_HSYNC_POL_SHIFT;
-	if (common_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW)
-		sens_conf |= 1 << CSI_SENS_CONF_VSYNC_POL_SHIFT;
-	if (common_flags & V4L2_MBUS_DATA_ACTIVE_LOW)
-		sens_conf |= 1 << CSI_SENS_CONF_DATA_POL_SHIFT;
-
-	/* Just do what we're asked to do */
-	switch (xlate->host_fmt->bits_per_sample) {
-	case 4:
-		dw = 0 << CSI_SENS_CONF_DATA_WIDTH_SHIFT;
-		break;
-	case 8:
-		dw = 1 << CSI_SENS_CONF_DATA_WIDTH_SHIFT;
-		break;
-	case 10:
-		dw = 2 << CSI_SENS_CONF_DATA_WIDTH_SHIFT;
-		break;
-	default:
-		/*
-		 * Actually it can only be 15 now, default is just to silence
-		 * compiler warnings
-		 */
-	case 15:
-		dw = 3 << CSI_SENS_CONF_DATA_WIDTH_SHIFT;
-	}
-
-	csi_reg_write(mx3_cam, sens_conf | dw, CSI_SENS_CONF);
-
-	dev_dbg(dev, "Set SENS_CONF to %x\n", sens_conf | dw);
-
-	return 0;
-}
-
-static struct soc_camera_host_ops mx3_soc_camera_host_ops = {
-	.owner		= THIS_MODULE,
-	.add		= mx3_camera_add_device,
-	.remove		= mx3_camera_remove_device,
-	.clock_start	= mx3_camera_clock_start,
-	.clock_stop	= mx3_camera_clock_stop,
-	.set_crop	= mx3_camera_set_crop,
-	.set_fmt	= mx3_camera_set_fmt,
-	.try_fmt	= mx3_camera_try_fmt,
-	.get_formats	= mx3_camera_get_formats,
-	.init_videobuf2	= mx3_camera_init_videobuf,
-	.reqbufs	= mx3_camera_reqbufs,
-	.poll		= mx3_camera_poll,
-	.querycap	= mx3_camera_querycap,
-	.set_bus_param	= mx3_camera_set_bus_param,
-};
-
-static int mx3_camera_probe(struct platform_device *pdev)
-{
-	struct mx3_camera_pdata	*pdata = pdev->dev.platform_data;
-	struct mx3_camera_dev *mx3_cam;
-	struct resource *res;
-	void __iomem *base;
-	int err = 0;
-	struct soc_camera_host *soc_host;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	if (!pdata)
-		return -EINVAL;
-
-	mx3_cam = devm_kzalloc(&pdev->dev, sizeof(*mx3_cam), GFP_KERNEL);
-	if (!mx3_cam) {
-		dev_err(&pdev->dev, "Could not allocate mx3 camera object\n");
-		return -ENOMEM;
-	}
-
-	mx3_cam->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(mx3_cam->clk))
-		return PTR_ERR(mx3_cam->clk);
-
-	mx3_cam->pdata = pdata;
-	mx3_cam->platform_flags = pdata->flags;
-	if (!(mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_MASK)) {
-		/*
-		 * Platform hasn't set available data widths. This is bad.
-		 * Warn and use a default.
-		 */
-		dev_warn(&pdev->dev, "WARNING! Platform hasn't set available "
-			 "data widths, using default 8 bit\n");
-		mx3_cam->platform_flags |= MX3_CAMERA_DATAWIDTH_8;
-	}
-	if (mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_4)
-		mx3_cam->width_flags = 1 << 3;
-	if (mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_8)
-		mx3_cam->width_flags |= 1 << 7;
-	if (mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_10)
-		mx3_cam->width_flags |= 1 << 9;
-	if (mx3_cam->platform_flags & MX3_CAMERA_DATAWIDTH_15)
-		mx3_cam->width_flags |= 1 << 14;
-
-	mx3_cam->mclk = pdata->mclk_10khz * 10000;
-	if (!mx3_cam->mclk) {
-		dev_warn(&pdev->dev,
-			 "mclk_10khz == 0! Please, fix your platform data. "
-			 "Using default 20MHz\n");
-		mx3_cam->mclk = 20000000;
-	}
-
-	/* list of video-buffers */
-	INIT_LIST_HEAD(&mx3_cam->capture);
-	spin_lock_init(&mx3_cam->lock);
-
-	mx3_cam->base	= base;
-
-	soc_host		= &mx3_cam->soc_host;
-	soc_host->drv_name	= MX3_CAM_DRV_NAME;
-	soc_host->ops		= &mx3_soc_camera_host_ops;
-	soc_host->priv		= mx3_cam;
-	soc_host->v4l2_dev.dev	= &pdev->dev;
-	soc_host->nr		= pdev->id;
-
-	mx3_cam->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(mx3_cam->alloc_ctx))
-		return PTR_ERR(mx3_cam->alloc_ctx);
-
-	if (pdata->asd_sizes) {
-		soc_host->asd = pdata->asd;
-		soc_host->asd_sizes = pdata->asd_sizes;
-	}
-
-	err = soc_camera_host_register(soc_host);
-	if (err)
-		goto ecamhostreg;
-
-	/* IDMAC interface */
-	dmaengine_get();
-
-	return 0;
-
-ecamhostreg:
-	vb2_dma_contig_cleanup_ctx(mx3_cam->alloc_ctx);
-	return err;
-}
-
-static int mx3_camera_remove(struct platform_device *pdev)
-{
-	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
-	struct mx3_camera_dev *mx3_cam = container_of(soc_host,
-					struct mx3_camera_dev, soc_host);
-
-	soc_camera_host_unregister(soc_host);
-
-	/*
-	 * The channel has either not been allocated,
-	 * or should have been released
-	 */
-	if (WARN_ON(mx3_cam->idmac_channel[0]))
-		dma_release_channel(&mx3_cam->idmac_channel[0]->dma_chan);
-
-	vb2_dma_contig_cleanup_ctx(mx3_cam->alloc_ctx);
-
-	dmaengine_put();
-
-	return 0;
-}
-
-static struct platform_driver mx3_camera_driver = {
-	.driver		= {
-		.name	= MX3_CAM_DRV_NAME,
-	},
-	.probe		= mx3_camera_probe,
-	.remove		= mx3_camera_remove,
-};
-
-module_platform_driver(mx3_camera_driver);
-
-MODULE_DESCRIPTION("i.MX3x SoC Camera Host driver");
-MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION("0.2.3");
-MODULE_ALIAS("platform:" MX3_CAM_DRV_NAME);
diff --git a/drivers/staging/media/omap1/Kconfig b/drivers/staging/media/omap1/Kconfig
deleted file mode 100644
index 6cfab3a04ae1..000000000000
--- a/drivers/staging/media/omap1/Kconfig
+++ /dev/null
@@ -1,13 +0,0 @@
-config VIDEO_OMAP1
-	tristate "OMAP1 Camera Interface driver"
-	depends on VIDEO_DEV && SOC_CAMERA
-	depends on ARCH_OMAP1
-	depends on HAS_DMA
-	select VIDEOBUF_DMA_CONTIG
-	select VIDEOBUF_DMA_SG
-	---help---
-	  This is a v4l2 driver for the TI OMAP1 camera interface
-
-	  This driver is deprecated and will be removed soon unless someone
-	  will start the work to convert this driver to the vb2 framework
-	  and remove the soc-camera dependency.
diff --git a/drivers/staging/media/omap1/Makefile b/drivers/staging/media/omap1/Makefile
deleted file mode 100644
index 2885622600f2..000000000000
--- a/drivers/staging/media/omap1/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# Makefile for OMAP1 driver
-
-obj-$(CONFIG_VIDEO_OMAP1) += omap1_camera.o
diff --git a/drivers/staging/media/omap1/TODO b/drivers/staging/media/omap1/TODO
deleted file mode 100644
index 1025f9f60ff0..000000000000
--- a/drivers/staging/media/omap1/TODO
+++ /dev/null
@@ -1,8 +0,0 @@
-This driver is deprecated and will be removed soon unless someone will start
-the work to convert this driver to the vb2 framework and remove the
-soc-camera dependency.
-
-Note that trivial patches will not be accepted anymore, only a full conversion.
-
-If you want to convert this driver, please contact the linux-media mailinglist
-(see http://linuxtv.org/lists.php).
diff --git a/drivers/staging/media/omap1/omap1_camera.c b/drivers/staging/media/omap1/omap1_camera.c
deleted file mode 100644
index 54b8dd2d2bba..000000000000
--- a/drivers/staging/media/omap1/omap1_camera.c
+++ /dev/null
@@ -1,1702 +0,0 @@
-/*
- * V4L2 SoC Camera driver for OMAP1 Camera Interface
- *
- * Copyright (C) 2010, Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
- *
- * Based on V4L2 Driver for i.MXL/i.MXL camera (CSI) host
- * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- * Copyright (C) 2009, Darius Augulis <augulis.darius@gmail.com>
- *
- * Based on PXA SoC camera driver
- * Copyright (C) 2006, Sascha Hauer, Pengutronix
- * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
- *
- * Hardware specific bits initialy based on former work by Matt Callow
- * drivers/media/platform/omap/omap1510cam.c
- * Copyright (C) 2006 Matt Callow
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-
-#include <linux/clk.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-#include <linux/platform_data/media/omap1_camera.h>
-#include <media/soc_camera.h>
-#include <media/drv-intf/soc_mediabus.h>
-#include <media/videobuf-dma-contig.h>
-#include <media/videobuf-dma-sg.h>
-
-#include <linux/omap-dma.h>
-
-
-#define DRIVER_NAME		"omap1-camera"
-#define DRIVER_VERSION		"0.0.2"
-
-#define OMAP_DMA_CAMERA_IF_RX		20
-
-/*
- * ---------------------------------------------------------------------------
- *  OMAP1 Camera Interface registers
- * ---------------------------------------------------------------------------
- */
-
-#define REG_CTRLCLOCK		0x00
-#define REG_IT_STATUS		0x04
-#define REG_MODE		0x08
-#define REG_STATUS		0x0C
-#define REG_CAMDATA		0x10
-#define REG_GPIO		0x14
-#define REG_PEAK_COUNTER	0x18
-
-/* CTRLCLOCK bit shifts */
-#define LCLK_EN			BIT(7)
-#define DPLL_EN			BIT(6)
-#define MCLK_EN			BIT(5)
-#define CAMEXCLK_EN		BIT(4)
-#define POLCLK			BIT(3)
-#define FOSCMOD_SHIFT		0
-#define FOSCMOD_MASK		(0x7 << FOSCMOD_SHIFT)
-#define FOSCMOD_12MHz		0x0
-#define FOSCMOD_6MHz		0x2
-#define FOSCMOD_9_6MHz		0x4
-#define FOSCMOD_24MHz		0x5
-#define FOSCMOD_8MHz		0x6
-
-/* IT_STATUS bit shifts */
-#define DATA_TRANSFER		BIT(5)
-#define FIFO_FULL		BIT(4)
-#define H_DOWN			BIT(3)
-#define H_UP			BIT(2)
-#define V_DOWN			BIT(1)
-#define V_UP			BIT(0)
-
-/* MODE bit shifts */
-#define RAZ_FIFO		BIT(18)
-#define EN_FIFO_FULL		BIT(17)
-#define EN_NIRQ			BIT(16)
-#define THRESHOLD_SHIFT		9
-#define THRESHOLD_MASK		(0x7f << THRESHOLD_SHIFT)
-#define DMA			BIT(8)
-#define EN_H_DOWN		BIT(7)
-#define EN_H_UP			BIT(6)
-#define EN_V_DOWN		BIT(5)
-#define EN_V_UP			BIT(4)
-#define ORDERCAMD		BIT(3)
-
-#define IRQ_MASK		(EN_V_UP | EN_V_DOWN | EN_H_UP | EN_H_DOWN | \
-				 EN_NIRQ | EN_FIFO_FULL)
-
-/* STATUS bit shifts */
-#define HSTATUS			BIT(1)
-#define VSTATUS			BIT(0)
-
-/* GPIO bit shifts */
-#define CAM_RST			BIT(0)
-
-/* end of OMAP1 Camera Interface registers */
-
-
-#define SOCAM_BUS_FLAGS	(V4L2_MBUS_MASTER | \
-			V4L2_MBUS_HSYNC_ACTIVE_HIGH | V4L2_MBUS_VSYNC_ACTIVE_HIGH | \
-			V4L2_MBUS_PCLK_SAMPLE_RISING | V4L2_MBUS_PCLK_SAMPLE_FALLING | \
-			V4L2_MBUS_DATA_ACTIVE_HIGH)
-
-
-#define FIFO_SIZE		((THRESHOLD_MASK >> THRESHOLD_SHIFT) + 1)
-#define FIFO_SHIFT		__fls(FIFO_SIZE)
-
-#define DMA_BURST_SHIFT		(1 + OMAP_DMA_DATA_BURST_4)
-#define DMA_BURST_SIZE		(1 << DMA_BURST_SHIFT)
-
-#define DMA_ELEMENT_SHIFT	OMAP_DMA_DATA_TYPE_S32
-#define DMA_ELEMENT_SIZE	(1 << DMA_ELEMENT_SHIFT)
-
-#define DMA_FRAME_SHIFT_CONTIG	(FIFO_SHIFT - 1)
-#define DMA_FRAME_SHIFT_SG	DMA_BURST_SHIFT
-
-#define DMA_FRAME_SHIFT(x)	((x) == OMAP1_CAM_DMA_CONTIG ? \
-						DMA_FRAME_SHIFT_CONTIG : \
-						DMA_FRAME_SHIFT_SG)
-#define DMA_FRAME_SIZE(x)	(1 << DMA_FRAME_SHIFT(x))
-#define DMA_SYNC		OMAP_DMA_SYNC_FRAME
-#define THRESHOLD_LEVEL		DMA_FRAME_SIZE
-
-
-#define MAX_VIDEO_MEM		4	/* arbitrary video memory limit in MB */
-
-
-/*
- * Structures
- */
-
-/* buffer for one video frame */
-struct omap1_cam_buf {
-	struct videobuf_buffer		vb;
-	u32	code;
-	int				inwork;
-	struct scatterlist		*sgbuf;
-	int				sgcount;
-	int				bytes_left;
-	enum videobuf_state		result;
-};
-
-struct omap1_cam_dev {
-	struct soc_camera_host		soc_host;
-	struct clk			*clk;
-
-	unsigned int			irq;
-	void __iomem			*base;
-
-	int				dma_ch;
-
-	struct omap1_cam_platform_data	*pdata;
-	struct resource			*res;
-	unsigned long			pflags;
-	unsigned long			camexclk;
-
-	struct list_head		capture;
-
-	/* lock used to protect videobuf */
-	spinlock_t			lock;
-
-	/* Pointers to DMA buffers */
-	struct omap1_cam_buf		*active;
-	struct omap1_cam_buf		*ready;
-
-	enum omap1_cam_vb_mode		vb_mode;
-	int				(*mmap_mapper)(struct videobuf_queue *q,
-						struct videobuf_buffer *buf,
-						struct vm_area_struct *vma);
-
-	u32				reg_cache[0];
-};
-
-
-static void cam_write(struct omap1_cam_dev *pcdev, u16 reg, u32 val)
-{
-	pcdev->reg_cache[reg / sizeof(u32)] = val;
-	__raw_writel(val, pcdev->base + reg);
-}
-
-static u32 cam_read(struct omap1_cam_dev *pcdev, u16 reg, bool from_cache)
-{
-	return !from_cache ? __raw_readl(pcdev->base + reg) :
-			pcdev->reg_cache[reg / sizeof(u32)];
-}
-
-#define CAM_READ(pcdev, reg) \
-		cam_read(pcdev, REG_##reg, false)
-#define CAM_WRITE(pcdev, reg, val) \
-		cam_write(pcdev, REG_##reg, val)
-#define CAM_READ_CACHE(pcdev, reg) \
-		cam_read(pcdev, REG_##reg, true)
-
-/*
- *  Videobuf operations
- */
-static int omap1_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
-		unsigned int *size)
-{
-	struct soc_camera_device *icd = vq->priv_data;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct omap1_cam_dev *pcdev = ici->priv;
-
-	*size = icd->sizeimage;
-
-	if (!*count || *count < OMAP1_CAMERA_MIN_BUF_COUNT(pcdev->vb_mode))
-		*count = OMAP1_CAMERA_MIN_BUF_COUNT(pcdev->vb_mode);
-
-	if (*size * *count > MAX_VIDEO_MEM * 1024 * 1024)
-		*count = (MAX_VIDEO_MEM * 1024 * 1024) / *size;
-
-	dev_dbg(icd->parent,
-			"%s: count=%d, size=%d\n", __func__, *count, *size);
-
-	return 0;
-}
-
-static void free_buffer(struct videobuf_queue *vq, struct omap1_cam_buf *buf,
-		enum omap1_cam_vb_mode vb_mode)
-{
-	struct videobuf_buffer *vb = &buf->vb;
-
-	BUG_ON(in_interrupt());
-
-	videobuf_waiton(vq, vb, 0, 0);
-
-	if (vb_mode == OMAP1_CAM_DMA_CONTIG) {
-		videobuf_dma_contig_free(vq, vb);
-	} else {
-		struct soc_camera_device *icd = vq->priv_data;
-		struct device *dev = icd->parent;
-		struct videobuf_dmabuf *dma = videobuf_to_dma(vb);
-
-		videobuf_dma_unmap(dev, dma);
-		videobuf_dma_free(dma);
-	}
-
-	vb->state = VIDEOBUF_NEEDS_INIT;
-}
-
-static int omap1_videobuf_prepare(struct videobuf_queue *vq,
-		struct videobuf_buffer *vb, enum v4l2_field field)
-{
-	struct soc_camera_device *icd = vq->priv_data;
-	struct omap1_cam_buf *buf = container_of(vb, struct omap1_cam_buf, vb);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct omap1_cam_dev *pcdev = ici->priv;
-	int ret;
-
-	WARN_ON(!list_empty(&vb->queue));
-
-	BUG_ON(NULL == icd->current_fmt);
-
-	buf->inwork = 1;
-
-	if (buf->code != icd->current_fmt->code || vb->field != field ||
-			vb->width  != icd->user_width ||
-			vb->height != icd->user_height) {
-		buf->code  = icd->current_fmt->code;
-		vb->width  = icd->user_width;
-		vb->height = icd->user_height;
-		vb->field  = field;
-		vb->state  = VIDEOBUF_NEEDS_INIT;
-	}
-
-	vb->size = icd->sizeimage;
-
-	if (vb->baddr && vb->bsize < vb->size) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (vb->state == VIDEOBUF_NEEDS_INIT) {
-		ret = videobuf_iolock(vq, vb, NULL);
-		if (ret)
-			goto fail;
-
-		vb->state = VIDEOBUF_PREPARED;
-	}
-	buf->inwork = 0;
-
-	return 0;
-fail:
-	free_buffer(vq, buf, pcdev->vb_mode);
-out:
-	buf->inwork = 0;
-	return ret;
-}
-
-static void set_dma_dest_params(int dma_ch, struct omap1_cam_buf *buf,
-		enum omap1_cam_vb_mode vb_mode)
-{
-	dma_addr_t dma_addr;
-	unsigned int block_size;
-
-	if (vb_mode == OMAP1_CAM_DMA_CONTIG) {
-		dma_addr = videobuf_to_dma_contig(&buf->vb);
-		block_size = buf->vb.size;
-	} else {
-		if (WARN_ON(!buf->sgbuf)) {
-			buf->result = VIDEOBUF_ERROR;
-			return;
-		}
-		dma_addr = sg_dma_address(buf->sgbuf);
-		if (WARN_ON(!dma_addr)) {
-			buf->sgbuf = NULL;
-			buf->result = VIDEOBUF_ERROR;
-			return;
-		}
-		block_size = sg_dma_len(buf->sgbuf);
-		if (WARN_ON(!block_size)) {
-			buf->sgbuf = NULL;
-			buf->result = VIDEOBUF_ERROR;
-			return;
-		}
-		if (unlikely(buf->bytes_left < block_size))
-			block_size = buf->bytes_left;
-		if (WARN_ON(dma_addr & (DMA_FRAME_SIZE(vb_mode) *
-				DMA_ELEMENT_SIZE - 1))) {
-			dma_addr = ALIGN(dma_addr, DMA_FRAME_SIZE(vb_mode) *
-					DMA_ELEMENT_SIZE);
-			block_size &= ~(DMA_FRAME_SIZE(vb_mode) *
-					DMA_ELEMENT_SIZE - 1);
-		}
-		buf->bytes_left -= block_size;
-		buf->sgcount++;
-	}
-
-	omap_set_dma_dest_params(dma_ch,
-		OMAP_DMA_PORT_EMIFF, OMAP_DMA_AMODE_POST_INC, dma_addr, 0, 0);
-	omap_set_dma_transfer_params(dma_ch,
-		OMAP_DMA_DATA_TYPE_S32, DMA_FRAME_SIZE(vb_mode),
-		block_size >> (DMA_FRAME_SHIFT(vb_mode) + DMA_ELEMENT_SHIFT),
-		DMA_SYNC, 0, 0);
-}
-
-static struct omap1_cam_buf *prepare_next_vb(struct omap1_cam_dev *pcdev)
-{
-	struct omap1_cam_buf *buf;
-
-	/*
-	 * If there is already a buffer pointed out by the pcdev->ready,
-	 * (re)use it, otherwise try to fetch and configure a new one.
-	 */
-	buf = pcdev->ready;
-	if (!buf) {
-		if (list_empty(&pcdev->capture))
-			return buf;
-		buf = list_entry(pcdev->capture.next,
-				struct omap1_cam_buf, vb.queue);
-		buf->vb.state = VIDEOBUF_ACTIVE;
-		pcdev->ready = buf;
-		list_del_init(&buf->vb.queue);
-	}
-
-	if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
-		/*
-		 * In CONTIG mode, we can safely enter next buffer parameters
-		 * into the DMA programming register set after the DMA
-		 * has already been activated on the previous buffer
-		 */
-		set_dma_dest_params(pcdev->dma_ch, buf, pcdev->vb_mode);
-	} else {
-		/*
-		 * In SG mode, the above is not safe since there are probably
-		 * a bunch of sgbufs from previous sglist still pending.
-		 * Instead, mark the sglist fresh for the upcoming
-		 * try_next_sgbuf().
-		 */
-		buf->sgbuf = NULL;
-	}
-
-	return buf;
-}
-
-static struct scatterlist *try_next_sgbuf(int dma_ch, struct omap1_cam_buf *buf)
-{
-	struct scatterlist *sgbuf;
-
-	if (likely(buf->sgbuf)) {
-		/* current sglist is active */
-		if (unlikely(!buf->bytes_left)) {
-			/* indicate sglist complete */
-			sgbuf = NULL;
-		} else {
-			/* process next sgbuf */
-			sgbuf = sg_next(buf->sgbuf);
-			if (WARN_ON(!sgbuf)) {
-				buf->result = VIDEOBUF_ERROR;
-			} else if (WARN_ON(!sg_dma_len(sgbuf))) {
-				sgbuf = NULL;
-				buf->result = VIDEOBUF_ERROR;
-			}
-		}
-		buf->sgbuf = sgbuf;
-	} else {
-		/* sglist is fresh, initialize it before using */
-		struct videobuf_dmabuf *dma = videobuf_to_dma(&buf->vb);
-
-		sgbuf = dma->sglist;
-		if (!(WARN_ON(!sgbuf))) {
-			buf->sgbuf = sgbuf;
-			buf->sgcount = 0;
-			buf->bytes_left = buf->vb.size;
-			buf->result = VIDEOBUF_DONE;
-		}
-	}
-	if (sgbuf)
-		/*
-		 * Put our next sgbuf parameters (address, size)
-		 * into the DMA programming register set.
-		 */
-		set_dma_dest_params(dma_ch, buf, OMAP1_CAM_DMA_SG);
-
-	return sgbuf;
-}
-
-static void start_capture(struct omap1_cam_dev *pcdev)
-{
-	struct omap1_cam_buf *buf = pcdev->active;
-	u32 ctrlclock = CAM_READ_CACHE(pcdev, CTRLCLOCK);
-	u32 mode = CAM_READ_CACHE(pcdev, MODE) & ~EN_V_DOWN;
-
-	if (WARN_ON(!buf))
-		return;
-
-	/*
-	 * Enable start of frame interrupt, which we will use for activating
-	 * our end of frame watchdog when capture actually starts.
-	 */
-	mode |= EN_V_UP;
-
-	if (unlikely(ctrlclock & LCLK_EN))
-		/* stop pixel clock before FIFO reset */
-		CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~LCLK_EN);
-	/* reset FIFO */
-	CAM_WRITE(pcdev, MODE, mode | RAZ_FIFO);
-
-	omap_start_dma(pcdev->dma_ch);
-
-	if (pcdev->vb_mode == OMAP1_CAM_DMA_SG) {
-		/*
-		 * In SG mode, it's a good moment for fetching next sgbuf
-		 * from the current sglist and, if available, already putting
-		 * its parameters into the DMA programming register set.
-		 */
-		try_next_sgbuf(pcdev->dma_ch, buf);
-	}
-
-	/* (re)enable pixel clock */
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock | LCLK_EN);
-	/* release FIFO reset */
-	CAM_WRITE(pcdev, MODE, mode);
-}
-
-static void suspend_capture(struct omap1_cam_dev *pcdev)
-{
-	u32 ctrlclock = CAM_READ_CACHE(pcdev, CTRLCLOCK);
-
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~LCLK_EN);
-	omap_stop_dma(pcdev->dma_ch);
-}
-
-static void disable_capture(struct omap1_cam_dev *pcdev)
-{
-	u32 mode = CAM_READ_CACHE(pcdev, MODE);
-
-	CAM_WRITE(pcdev, MODE, mode & ~(IRQ_MASK | DMA));
-}
-
-static void omap1_videobuf_queue(struct videobuf_queue *vq,
-						struct videobuf_buffer *vb)
-{
-	struct soc_camera_device *icd = vq->priv_data;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct omap1_cam_dev *pcdev = ici->priv;
-	struct omap1_cam_buf *buf;
-	u32 mode;
-
-	list_add_tail(&vb->queue, &pcdev->capture);
-	vb->state = VIDEOBUF_QUEUED;
-
-	if (pcdev->active) {
-		/*
-		 * Capture in progress, so don't touch pcdev->ready even if
-		 * empty. Since the transfer of the DMA programming register set
-		 * content to the DMA working register set is done automatically
-		 * by the DMA hardware, this can pretty well happen while we
-		 * are keeping the lock here. Leave fetching it from the queue
-		 * to be done when a next DMA interrupt occures instead.
-		 */
-		return;
-	}
-
-	WARN_ON(pcdev->ready);
-
-	buf = prepare_next_vb(pcdev);
-	if (WARN_ON(!buf))
-		return;
-
-	pcdev->active = buf;
-	pcdev->ready = NULL;
-
-	dev_dbg(icd->parent,
-		"%s: capture not active, setup FIFO, start DMA\n", __func__);
-	mode = CAM_READ_CACHE(pcdev, MODE) & ~THRESHOLD_MASK;
-	mode |= THRESHOLD_LEVEL(pcdev->vb_mode) << THRESHOLD_SHIFT;
-	CAM_WRITE(pcdev, MODE, mode | EN_FIFO_FULL | DMA);
-
-	if (pcdev->vb_mode == OMAP1_CAM_DMA_SG) {
-		/*
-		 * In SG mode, the above prepare_next_vb() didn't actually
-		 * put anything into the DMA programming register set,
-		 * so we have to do it now, before activating DMA.
-		 */
-		try_next_sgbuf(pcdev->dma_ch, buf);
-	}
-
-	start_capture(pcdev);
-}
-
-static void omap1_videobuf_release(struct videobuf_queue *vq,
-				 struct videobuf_buffer *vb)
-{
-	struct omap1_cam_buf *buf =
-			container_of(vb, struct omap1_cam_buf, vb);
-	struct soc_camera_device *icd = vq->priv_data;
-	struct device *dev = icd->parent;
-	struct soc_camera_host *ici = to_soc_camera_host(dev);
-	struct omap1_cam_dev *pcdev = ici->priv;
-
-	switch (vb->state) {
-	case VIDEOBUF_DONE:
-		dev_dbg(dev, "%s (done)\n", __func__);
-		break;
-	case VIDEOBUF_ACTIVE:
-		dev_dbg(dev, "%s (active)\n", __func__);
-		break;
-	case VIDEOBUF_QUEUED:
-		dev_dbg(dev, "%s (queued)\n", __func__);
-		break;
-	case VIDEOBUF_PREPARED:
-		dev_dbg(dev, "%s (prepared)\n", __func__);
-		break;
-	default:
-		dev_dbg(dev, "%s (unknown %d)\n", __func__, vb->state);
-		break;
-	}
-
-	free_buffer(vq, buf, pcdev->vb_mode);
-}
-
-static void videobuf_done(struct omap1_cam_dev *pcdev,
-		enum videobuf_state result)
-{
-	struct omap1_cam_buf *buf = pcdev->active;
-	struct videobuf_buffer *vb;
-	struct device *dev = pcdev->soc_host.icd->parent;
-
-	if (WARN_ON(!buf)) {
-		suspend_capture(pcdev);
-		disable_capture(pcdev);
-		return;
-	}
-
-	if (result == VIDEOBUF_ERROR)
-		suspend_capture(pcdev);
-
-	vb = &buf->vb;
-	if (waitqueue_active(&vb->done)) {
-		if (!pcdev->ready && result != VIDEOBUF_ERROR) {
-			/*
-			 * No next buffer has been entered into the DMA
-			 * programming register set on time (could be done only
-			 * while the previous DMA interurpt was processed, not
-			 * later), so the last DMA block, be it a whole buffer
-			 * if in CONTIG or its last sgbuf if in SG mode, is
-			 * about to be reused by the just autoreinitialized DMA
-			 * engine, and overwritten with next frame data. Best we
-			 * can do is stopping the capture as soon as possible,
-			 * hopefully before the next frame start.
-			 */
-			suspend_capture(pcdev);
-		}
-		vb->state = result;
-		v4l2_get_timestamp(&vb->ts);
-		if (result != VIDEOBUF_ERROR)
-			vb->field_count++;
-		wake_up(&vb->done);
-
-		/* shift in next buffer */
-		buf = pcdev->ready;
-		pcdev->active = buf;
-		pcdev->ready = NULL;
-
-		if (!buf) {
-			/*
-			 * No next buffer was ready on time (see above), so
-			 * indicate error condition to force capture restart or
-			 * stop, depending on next buffer already queued or not.
-			 */
-			result = VIDEOBUF_ERROR;
-			prepare_next_vb(pcdev);
-
-			buf = pcdev->ready;
-			pcdev->active = buf;
-			pcdev->ready = NULL;
-		}
-	} else if (pcdev->ready) {
-		/*
-		 * In both CONTIG and SG mode, the DMA engine has possibly
-		 * been already autoreinitialized with the preprogrammed
-		 * pcdev->ready buffer.  We can either accept this fact
-		 * and just swap the buffers, or provoke an error condition
-		 * and restart capture.  The former seems less intrusive.
-		 */
-		dev_dbg(dev, "%s: nobody waiting on videobuf, swap with next\n",
-				__func__);
-		pcdev->active = pcdev->ready;
-
-		if (pcdev->vb_mode == OMAP1_CAM_DMA_SG) {
-			/*
-			 * In SG mode, we have to make sure that the buffer we
-			 * are putting back into the pcdev->ready is marked
-			 * fresh.
-			 */
-			buf->sgbuf = NULL;
-		}
-		pcdev->ready = buf;
-
-		buf = pcdev->active;
-	} else {
-		/*
-		 * No next buffer has been entered into
-		 * the DMA programming register set on time.
-		 */
-		if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
-			/*
-			 * In CONTIG mode, the DMA engine has already been
-			 * reinitialized with the current buffer. Best we can do
-			 * is not touching it.
-			 */
-			dev_dbg(dev,
-				"%s: nobody waiting on videobuf, reuse it\n",
-				__func__);
-		} else {
-			/*
-			 * In SG mode, the DMA engine has just been
-			 * autoreinitialized with the last sgbuf from the
-			 * current list. Restart capture in order to transfer
-			 * next frame start into the first sgbuf, not the last
-			 * one.
-			 */
-			if (result != VIDEOBUF_ERROR) {
-				suspend_capture(pcdev);
-				result = VIDEOBUF_ERROR;
-			}
-		}
-	}
-
-	if (!buf) {
-		dev_dbg(dev, "%s: no more videobufs, stop capture\n", __func__);
-		disable_capture(pcdev);
-		return;
-	}
-
-	if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
-		/*
-		 * In CONTIG mode, the current buffer parameters had already
-		 * been entered into the DMA programming register set while the
-		 * buffer was fetched with prepare_next_vb(), they may have also
-		 * been transferred into the runtime set and already active if
-		 * the DMA still running.
-		 */
-	} else {
-		/* In SG mode, extra steps are required */
-		if (result == VIDEOBUF_ERROR)
-			/* make sure we (re)use sglist from start on error */
-			buf->sgbuf = NULL;
-
-		/*
-		 * In any case, enter the next sgbuf parameters into the DMA
-		 * programming register set.  They will be used either during
-		 * nearest DMA autoreinitialization or, in case of an error,
-		 * on DMA startup below.
-		 */
-		try_next_sgbuf(pcdev->dma_ch, buf);
-	}
-
-	if (result == VIDEOBUF_ERROR) {
-		dev_dbg(dev, "%s: videobuf error; reset FIFO, restart DMA\n",
-				__func__);
-		start_capture(pcdev);
-		/*
-		 * In SG mode, the above also resulted in the next sgbuf
-		 * parameters being entered into the DMA programming register
-		 * set, making them ready for next DMA autoreinitialization.
-		 */
-	}
-
-	/*
-	 * Finally, try fetching next buffer.
-	 * In CONTIG mode, it will also enter it into the DMA programming
-	 * register set, making it ready for next DMA autoreinitialization.
-	 */
-	prepare_next_vb(pcdev);
-}
-
-static void dma_isr(int channel, unsigned short status, void *data)
-{
-	struct omap1_cam_dev *pcdev = data;
-	struct omap1_cam_buf *buf = pcdev->active;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pcdev->lock, flags);
-
-	if (WARN_ON(!buf)) {
-		suspend_capture(pcdev);
-		disable_capture(pcdev);
-		goto out;
-	}
-
-	if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
-		/*
-		 * In CONTIG mode, assume we have just managed to collect the
-		 * whole frame, hopefully before our end of frame watchdog is
-		 * triggered. Then, all we have to do is disabling the watchdog
-		 * for this frame, and calling videobuf_done() with success
-		 * indicated.
-		 */
-		CAM_WRITE(pcdev, MODE,
-				CAM_READ_CACHE(pcdev, MODE) & ~EN_V_DOWN);
-		videobuf_done(pcdev, VIDEOBUF_DONE);
-	} else {
-		/*
-		 * In SG mode, we have to process every sgbuf from the current
-		 * sglist, one after another.
-		 */
-		if (buf->sgbuf) {
-			/*
-			 * Current sglist not completed yet, try fetching next
-			 * sgbuf, hopefully putting it into the DMA programming
-			 * register set, making it ready for next DMA
-			 * autoreinitialization.
-			 */
-			try_next_sgbuf(pcdev->dma_ch, buf);
-			if (buf->sgbuf)
-				goto out;
-
-			/*
-			 * No more sgbufs left in the current sglist. This
-			 * doesn't mean that the whole videobuffer is already
-			 * complete, but only that the last sgbuf from the
-			 * current sglist is about to be filled. It will be
-			 * ready on next DMA interrupt, signalled with the
-			 * buf->sgbuf set back to NULL.
-			 */
-			if (buf->result != VIDEOBUF_ERROR) {
-				/*
-				 * Video frame collected without errors so far,
-				 * we can prepare for collecting a next one
-				 * as soon as DMA gets autoreinitialized
-				 * after the current (last) sgbuf is completed.
-				 */
-				buf = prepare_next_vb(pcdev);
-				if (!buf)
-					goto out;
-
-				try_next_sgbuf(pcdev->dma_ch, buf);
-				goto out;
-			}
-		}
-		/* end of videobuf */
-		videobuf_done(pcdev, buf->result);
-	}
-
-out:
-	spin_unlock_irqrestore(&pcdev->lock, flags);
-}
-
-static irqreturn_t cam_isr(int irq, void *data)
-{
-	struct omap1_cam_dev *pcdev = data;
-	struct device *dev = pcdev->soc_host.icd->parent;
-	struct omap1_cam_buf *buf = pcdev->active;
-	u32 it_status;
-	unsigned long flags;
-
-	it_status = CAM_READ(pcdev, IT_STATUS);
-	if (!it_status)
-		return IRQ_NONE;
-
-	spin_lock_irqsave(&pcdev->lock, flags);
-
-	if (WARN_ON(!buf)) {
-		dev_warn(dev, "%s: unhandled camera interrupt, status == %#x\n",
-			 __func__, it_status);
-		suspend_capture(pcdev);
-		disable_capture(pcdev);
-		goto out;
-	}
-
-	if (unlikely(it_status & FIFO_FULL)) {
-		dev_warn(dev, "%s: FIFO overflow\n", __func__);
-
-	} else if (it_status & V_DOWN) {
-		/* end of video frame watchdog */
-		if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
-			/*
-			 * In CONTIG mode, the watchdog is disabled with
-			 * successful DMA end of block interrupt, and reenabled
-			 * on next frame start. If we get here, there is nothing
-			 * to check, we must be out of sync.
-			 */
-		} else {
-			if (buf->sgcount == 2) {
-				/*
-				 * If exactly 2 sgbufs from the next sglist have
-				 * been programmed into the DMA engine (the
-				 * first one already transferred into the DMA
-				 * runtime register set, the second one still
-				 * in the programming set), then we are in sync.
-				 */
-				goto out;
-			}
-		}
-		dev_notice(dev, "%s: unexpected end of video frame\n",
-				__func__);
-
-	} else if (it_status & V_UP) {
-		u32 mode;
-
-		if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
-			/*
-			 * In CONTIG mode, we need this interrupt every frame
-			 * in oredr to reenable our end of frame watchdog.
-			 */
-			mode = CAM_READ_CACHE(pcdev, MODE);
-		} else {
-			/*
-			 * In SG mode, the below enabled end of frame watchdog
-			 * is kept on permanently, so we can turn this one shot
-			 * setup off.
-			 */
-			mode = CAM_READ_CACHE(pcdev, MODE) & ~EN_V_UP;
-		}
-
-		if (!(mode & EN_V_DOWN)) {
-			/* (re)enable end of frame watchdog interrupt */
-			mode |= EN_V_DOWN;
-		}
-		CAM_WRITE(pcdev, MODE, mode);
-		goto out;
-
-	} else {
-		dev_warn(dev, "%s: unhandled camera interrupt, status == %#x\n",
-				__func__, it_status);
-		goto out;
-	}
-
-	videobuf_done(pcdev, VIDEOBUF_ERROR);
-out:
-	spin_unlock_irqrestore(&pcdev->lock, flags);
-	return IRQ_HANDLED;
-}
-
-static struct videobuf_queue_ops omap1_videobuf_ops = {
-	.buf_setup	= omap1_videobuf_setup,
-	.buf_prepare	= omap1_videobuf_prepare,
-	.buf_queue	= omap1_videobuf_queue,
-	.buf_release	= omap1_videobuf_release,
-};
-
-
-/*
- * SOC Camera host operations
- */
-
-static void sensor_reset(struct omap1_cam_dev *pcdev, bool reset)
-{
-	/* apply/release camera sensor reset if requested by platform data */
-	if (pcdev->pflags & OMAP1_CAMERA_RST_HIGH)
-		CAM_WRITE(pcdev, GPIO, reset);
-	else if (pcdev->pflags & OMAP1_CAMERA_RST_LOW)
-		CAM_WRITE(pcdev, GPIO, !reset);
-}
-
-static int omap1_cam_add_device(struct soc_camera_device *icd)
-{
-	dev_dbg(icd->parent, "OMAP1 Camera driver attached to camera %d\n",
-			icd->devnum);
-
-	return 0;
-}
-
-static void omap1_cam_remove_device(struct soc_camera_device *icd)
-{
-	dev_dbg(icd->parent,
-		"OMAP1 Camera driver detached from camera %d\n", icd->devnum);
-}
-
-/*
- * The following two functions absolutely depend on the fact, that
- * there can be only one camera on OMAP1 camera sensor interface
- */
-static int omap1_cam_clock_start(struct soc_camera_host *ici)
-{
-	struct omap1_cam_dev *pcdev = ici->priv;
-	u32 ctrlclock;
-
-	clk_enable(pcdev->clk);
-
-	/* setup sensor clock */
-	ctrlclock = CAM_READ(pcdev, CTRLCLOCK);
-	ctrlclock &= ~(CAMEXCLK_EN | MCLK_EN | DPLL_EN);
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
-
-	ctrlclock &= ~FOSCMOD_MASK;
-	switch (pcdev->camexclk) {
-	case 6000000:
-		ctrlclock |= CAMEXCLK_EN | FOSCMOD_6MHz;
-		break;
-	case 8000000:
-		ctrlclock |= CAMEXCLK_EN | FOSCMOD_8MHz | DPLL_EN;
-		break;
-	case 9600000:
-		ctrlclock |= CAMEXCLK_EN | FOSCMOD_9_6MHz | DPLL_EN;
-		break;
-	case 12000000:
-		ctrlclock |= CAMEXCLK_EN | FOSCMOD_12MHz;
-		break;
-	case 24000000:
-		ctrlclock |= CAMEXCLK_EN | FOSCMOD_24MHz | DPLL_EN;
-	default:
-		break;
-	}
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~DPLL_EN);
-
-	/* enable internal clock */
-	ctrlclock |= MCLK_EN;
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
-
-	sensor_reset(pcdev, false);
-
-	return 0;
-}
-
-static void omap1_cam_clock_stop(struct soc_camera_host *ici)
-{
-	struct omap1_cam_dev *pcdev = ici->priv;
-	u32 ctrlclock;
-
-	suspend_capture(pcdev);
-	disable_capture(pcdev);
-
-	sensor_reset(pcdev, true);
-
-	/* disable and release system clocks */
-	ctrlclock = CAM_READ_CACHE(pcdev, CTRLCLOCK);
-	ctrlclock &= ~(MCLK_EN | DPLL_EN | CAMEXCLK_EN);
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
-
-	ctrlclock = (ctrlclock & ~FOSCMOD_MASK) | FOSCMOD_12MHz;
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock | MCLK_EN);
-
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~MCLK_EN);
-
-	clk_disable(pcdev->clk);
-}
-
-/* Duplicate standard formats based on host capability of byte swapping */
-static const struct soc_mbus_lookup omap1_cam_formats[] = {
-{
-	.code = MEDIA_BUS_FMT_UYVY8_2X8,
-	.fmt = {
-		.fourcc			= V4L2_PIX_FMT_YUYV,
-		.name			= "YUYV",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_BE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-}, {
-	.code = MEDIA_BUS_FMT_VYUY8_2X8,
-	.fmt = {
-		.fourcc			= V4L2_PIX_FMT_YVYU,
-		.name			= "YVYU",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_BE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-}, {
-	.code = MEDIA_BUS_FMT_YUYV8_2X8,
-	.fmt = {
-		.fourcc			= V4L2_PIX_FMT_UYVY,
-		.name			= "UYVY",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_BE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-}, {
-	.code = MEDIA_BUS_FMT_YVYU8_2X8,
-	.fmt = {
-		.fourcc			= V4L2_PIX_FMT_VYUY,
-		.name			= "VYUY",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_BE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-}, {
-	.code = MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE,
-	.fmt = {
-		.fourcc			= V4L2_PIX_FMT_RGB555,
-		.name			= "RGB555",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_BE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-}, {
-	.code = MEDIA_BUS_FMT_RGB555_2X8_PADHI_LE,
-	.fmt = {
-		.fourcc			= V4L2_PIX_FMT_RGB555X,
-		.name			= "RGB555X",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_BE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-}, {
-	.code = MEDIA_BUS_FMT_RGB565_2X8_BE,
-	.fmt = {
-		.fourcc			= V4L2_PIX_FMT_RGB565,
-		.name			= "RGB565",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_BE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-}, {
-	.code = MEDIA_BUS_FMT_RGB565_2X8_LE,
-	.fmt = {
-		.fourcc			= V4L2_PIX_FMT_RGB565X,
-		.name			= "RGB565X",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_BE,
-		.layout			= SOC_MBUS_LAYOUT_PACKED,
-	},
-},
-};
-
-static int omap1_cam_get_formats(struct soc_camera_device *icd,
-		unsigned int idx, struct soc_camera_format_xlate *xlate)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct device *dev = icd->parent;
-	int formats = 0, ret;
-	struct v4l2_subdev_mbus_code_enum code = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-		.index = idx,
-	};
-	const struct soc_mbus_pixelfmt *fmt;
-
-	ret = v4l2_subdev_call(sd, pad, enum_mbus_code, NULL, &code);
-	if (ret < 0)
-		/* No more formats */
-		return 0;
-
-	fmt = soc_mbus_get_fmtdesc(code.code);
-	if (!fmt) {
-		dev_warn(dev, "%s: unsupported format code #%d: %d\n", __func__,
-				idx, code.code);
-		return 0;
-	}
-
-	/* Check support for the requested bits-per-sample */
-	if (fmt->bits_per_sample != 8)
-		return 0;
-
-	switch (code.code) {
-	case MEDIA_BUS_FMT_YUYV8_2X8:
-	case MEDIA_BUS_FMT_YVYU8_2X8:
-	case MEDIA_BUS_FMT_UYVY8_2X8:
-	case MEDIA_BUS_FMT_VYUY8_2X8:
-	case MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE:
-	case MEDIA_BUS_FMT_RGB555_2X8_PADHI_LE:
-	case MEDIA_BUS_FMT_RGB565_2X8_BE:
-	case MEDIA_BUS_FMT_RGB565_2X8_LE:
-		formats++;
-		if (xlate) {
-			xlate->host_fmt	= soc_mbus_find_fmtdesc(code.code,
-						omap1_cam_formats,
-						ARRAY_SIZE(omap1_cam_formats));
-			xlate->code	= code.code;
-			xlate++;
-			dev_dbg(dev,
-				"%s: providing format %s as byte swapped code #%d\n",
-				__func__, xlate->host_fmt->name, code.code);
-		}
-	default:
-		if (xlate)
-			dev_dbg(dev,
-				"%s: providing format %s in pass-through mode\n",
-				__func__, fmt->name);
-	}
-	formats++;
-	if (xlate) {
-		xlate->host_fmt	= fmt;
-		xlate->code	= code.code;
-		xlate++;
-	}
-
-	return formats;
-}
-
-static bool is_dma_aligned(s32 bytes_per_line, unsigned int height,
-		enum omap1_cam_vb_mode vb_mode)
-{
-	int size = bytes_per_line * height;
-
-	return IS_ALIGNED(bytes_per_line, DMA_ELEMENT_SIZE) &&
-		IS_ALIGNED(size, DMA_FRAME_SIZE(vb_mode) * DMA_ELEMENT_SIZE);
-}
-
-static int dma_align(int *width, int *height,
-		const struct soc_mbus_pixelfmt *fmt,
-		enum omap1_cam_vb_mode vb_mode, bool enlarge)
-{
-	s32 bytes_per_line = soc_mbus_bytes_per_line(*width, fmt);
-
-	if (bytes_per_line < 0)
-		return bytes_per_line;
-
-	if (!is_dma_aligned(bytes_per_line, *height, vb_mode)) {
-		unsigned int pxalign = __fls(bytes_per_line / *width);
-		unsigned int salign  = DMA_FRAME_SHIFT(vb_mode) +
-				DMA_ELEMENT_SHIFT - pxalign;
-		unsigned int incr    = enlarge << salign;
-
-		v4l_bound_align_image(width, 1, *width + incr, 0,
-				height, 1, *height + incr, 0, salign);
-		return 0;
-	}
-	return 1;
-}
-
-#define subdev_call_with_sense(pcdev, dev, icd, sd, op, function, args...)		     \
-({										     \
-	struct soc_camera_sense sense = {					     \
-		.master_clock		= pcdev->camexclk,			     \
-		.pixel_clock_max	= 0,					     \
-	};									     \
-	int __ret;								     \
-										     \
-	if (pcdev->pdata)							     \
-		sense.pixel_clock_max = pcdev->pdata->lclk_khz_max * 1000;	     \
-	icd->sense = &sense;							     \
-	__ret = v4l2_subdev_call(sd, op, function, ##args);			     \
-	icd->sense = NULL;							     \
-										     \
-	if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {				     \
-		if (sense.pixel_clock > sense.pixel_clock_max) {		     \
-			dev_err(dev,						     \
-				"%s: pixel clock %lu set by the camera too high!\n", \
-				__func__, sense.pixel_clock);			     \
-			__ret = -EINVAL;					     \
-		}								     \
-	}									     \
-	__ret;									     \
-})
-
-static int set_format(struct omap1_cam_dev *pcdev, struct device *dev,
-		struct soc_camera_device *icd, struct v4l2_subdev *sd,
-		struct v4l2_subdev_format *format,
-		const struct soc_camera_format_xlate *xlate)
-{
-	s32 bytes_per_line;
-	struct v4l2_mbus_framefmt *mf = &format->format;
-	int ret = subdev_call_with_sense(pcdev, dev, icd, sd, pad, set_fmt, NULL, format);
-
-	if (ret < 0) {
-		dev_err(dev, "%s: set_fmt failed\n", __func__);
-		return ret;
-	}
-
-	if (mf->code != xlate->code) {
-		dev_err(dev, "%s: unexpected pixel code change\n", __func__);
-		return -EINVAL;
-	}
-
-	bytes_per_line = soc_mbus_bytes_per_line(mf->width, xlate->host_fmt);
-	if (bytes_per_line < 0) {
-		dev_err(dev, "%s: soc_mbus_bytes_per_line() failed\n",
-				__func__);
-		return bytes_per_line;
-	}
-
-	if (!is_dma_aligned(bytes_per_line, mf->height, pcdev->vb_mode)) {
-		dev_err(dev, "%s: resulting geometry %ux%u not DMA aligned\n",
-				__func__, mf->width, mf->height);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static int omap1_cam_set_crop(struct soc_camera_device *icd,
-			       const struct v4l2_crop *crop)
-{
-	const struct v4l2_rect *rect = &crop->c;
-	const struct soc_camera_format_xlate *xlate = icd->current_fmt;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct device *dev = icd->parent;
-	struct soc_camera_host *ici = to_soc_camera_host(dev);
-	struct omap1_cam_dev *pcdev = ici->priv;
-	struct v4l2_subdev_format fmt = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-	};
-	struct v4l2_mbus_framefmt *mf = &fmt.format;
-	int ret;
-
-	ret = subdev_call_with_sense(pcdev, dev, icd, sd, video, s_crop, crop);
-	if (ret < 0) {
-		dev_warn(dev, "%s: failed to crop to %ux%u@%u:%u\n", __func__,
-			 rect->width, rect->height, rect->left, rect->top);
-		return ret;
-	}
-
-	ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &fmt);
-	if (ret < 0) {
-		dev_warn(dev, "%s: failed to fetch current format\n", __func__);
-		return ret;
-	}
-
-	ret = dma_align(&mf->width, &mf->height, xlate->host_fmt, pcdev->vb_mode,
-			false);
-	if (ret < 0) {
-		dev_err(dev, "%s: failed to align %ux%u %s with DMA\n",
-				__func__, mf->width, mf->height,
-				xlate->host_fmt->name);
-		return ret;
-	}
-
-	if (!ret) {
-		/* sensor returned geometry not DMA aligned, trying to fix */
-		ret = set_format(pcdev, dev, icd, sd, &fmt, xlate);
-		if (ret < 0) {
-			dev_err(dev, "%s: failed to set format\n", __func__);
-			return ret;
-		}
-	}
-
-	icd->user_width	 = mf->width;
-	icd->user_height = mf->height;
-
-	return 0;
-}
-
-static int omap1_cam_set_fmt(struct soc_camera_device *icd,
-			      struct v4l2_format *f)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	const struct soc_camera_format_xlate *xlate;
-	struct device *dev = icd->parent;
-	struct soc_camera_host *ici = to_soc_camera_host(dev);
-	struct omap1_cam_dev *pcdev = ici->priv;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_subdev_format format = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-	};
-	struct v4l2_mbus_framefmt *mf = &format.format;
-	int ret;
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
-	if (!xlate) {
-		dev_warn(dev, "%s: format %#x not found\n", __func__,
-				pix->pixelformat);
-		return -EINVAL;
-	}
-
-	mf->width	= pix->width;
-	mf->height	= pix->height;
-	mf->field	= pix->field;
-	mf->colorspace	= pix->colorspace;
-	mf->code	= xlate->code;
-
-	ret = dma_align(&mf->width, &mf->height, xlate->host_fmt, pcdev->vb_mode,
-			true);
-	if (ret < 0) {
-		dev_err(dev, "%s: failed to align %ux%u %s with DMA\n",
-				__func__, pix->width, pix->height,
-				xlate->host_fmt->name);
-		return ret;
-	}
-
-	ret = set_format(pcdev, dev, icd, sd, &format, xlate);
-	if (ret < 0) {
-		dev_err(dev, "%s: failed to set format\n", __func__);
-		return ret;
-	}
-
-	pix->width	 = mf->width;
-	pix->height	 = mf->height;
-	pix->field	 = mf->field;
-	pix->colorspace  = mf->colorspace;
-	icd->current_fmt = xlate;
-
-	return 0;
-}
-
-static int omap1_cam_try_fmt(struct soc_camera_device *icd,
-			      struct v4l2_format *f)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	const struct soc_camera_format_xlate *xlate;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_subdev_pad_config pad_cfg;
-	struct v4l2_subdev_format format = {
-		.which = V4L2_SUBDEV_FORMAT_TRY,
-	};
-	struct v4l2_mbus_framefmt *mf = &format.format;
-	int ret;
-	/* TODO: limit to mx1 hardware capabilities */
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
-	if (!xlate) {
-		dev_warn(icd->parent, "Format %#x not found\n",
-			 pix->pixelformat);
-		return -EINVAL;
-	}
-
-	mf->width	= pix->width;
-	mf->height	= pix->height;
-	mf->field	= pix->field;
-	mf->colorspace	= pix->colorspace;
-	mf->code	= xlate->code;
-
-	/* limit to sensor capabilities */
-	ret = v4l2_subdev_call(sd, pad, set_fmt, &pad_cfg, &format);
-	if (ret < 0)
-		return ret;
-
-	pix->width	= mf->width;
-	pix->height	= mf->height;
-	pix->field	= mf->field;
-	pix->colorspace	= mf->colorspace;
-
-	return 0;
-}
-
-static bool sg_mode;
-
-/*
- * Local mmap_mapper wrapper,
- * used for detecting videobuf-dma-contig buffer allocation failures
- * and switching to videobuf-dma-sg automatically for future attempts.
- */
-static int omap1_cam_mmap_mapper(struct videobuf_queue *q,
-				  struct videobuf_buffer *buf,
-				  struct vm_area_struct *vma)
-{
-	struct soc_camera_device *icd = q->priv_data;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct omap1_cam_dev *pcdev = ici->priv;
-	int ret;
-
-	ret = pcdev->mmap_mapper(q, buf, vma);
-
-	if (ret == -ENOMEM)
-		sg_mode = true;
-
-	return ret;
-}
-
-static void omap1_cam_init_videobuf(struct videobuf_queue *q,
-				     struct soc_camera_device *icd)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct omap1_cam_dev *pcdev = ici->priv;
-
-	if (!sg_mode)
-		videobuf_queue_dma_contig_init(q, &omap1_videobuf_ops,
-				icd->parent, &pcdev->lock,
-				V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
-				sizeof(struct omap1_cam_buf), icd, &ici->host_lock);
-	else
-		videobuf_queue_sg_init(q, &omap1_videobuf_ops,
-				icd->parent, &pcdev->lock,
-				V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
-				sizeof(struct omap1_cam_buf), icd, &ici->host_lock);
-
-	/* use videobuf mode (auto)selected with the module parameter */
-	pcdev->vb_mode = sg_mode ? OMAP1_CAM_DMA_SG : OMAP1_CAM_DMA_CONTIG;
-
-	/*
-	 * Ensure we substitute the videobuf-dma-contig version of the
-	 * mmap_mapper() callback with our own wrapper, used for switching
-	 * automatically to videobuf-dma-sg on buffer allocation failure.
-	 */
-	if (!sg_mode && q->int_ops->mmap_mapper != omap1_cam_mmap_mapper) {
-		pcdev->mmap_mapper = q->int_ops->mmap_mapper;
-		q->int_ops->mmap_mapper = omap1_cam_mmap_mapper;
-	}
-}
-
-static int omap1_cam_reqbufs(struct soc_camera_device *icd,
-			      struct v4l2_requestbuffers *p)
-{
-	int i;
-
-	/*
-	 * This is for locking debugging only. I removed spinlocks and now I
-	 * check whether .prepare is ever called on a linked buffer, or whether
-	 * a dma IRQ can occur for an in-work or unlinked buffer. Until now
-	 * it hadn't triggered
-	 */
-	for (i = 0; i < p->count; i++) {
-		struct omap1_cam_buf *buf = container_of(icd->vb_vidq.bufs[i],
-						      struct omap1_cam_buf, vb);
-		buf->inwork = 0;
-		INIT_LIST_HEAD(&buf->vb.queue);
-	}
-
-	return 0;
-}
-
-static int omap1_cam_querycap(struct soc_camera_host *ici,
-			       struct v4l2_capability *cap)
-{
-	/* cap->name is set by the friendly caller:-> */
-	strlcpy(cap->card, "OMAP1 Camera", sizeof(cap->card));
-	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
-	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
-
-	return 0;
-}
-
-static int omap1_cam_set_bus_param(struct soc_camera_device *icd)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct device *dev = icd->parent;
-	struct soc_camera_host *ici = to_soc_camera_host(dev);
-	struct omap1_cam_dev *pcdev = ici->priv;
-	u32 pixfmt = icd->current_fmt->host_fmt->fourcc;
-	const struct soc_camera_format_xlate *xlate;
-	const struct soc_mbus_pixelfmt *fmt;
-	struct v4l2_mbus_config cfg = {.type = V4L2_MBUS_PARALLEL,};
-	unsigned long common_flags;
-	u32 ctrlclock, mode;
-	int ret;
-
-	ret = v4l2_subdev_call(sd, video, g_mbus_config, &cfg);
-	if (!ret) {
-		common_flags = soc_mbus_config_compatible(&cfg, SOCAM_BUS_FLAGS);
-		if (!common_flags) {
-			dev_warn(dev,
-				 "Flags incompatible: camera 0x%x, host 0x%x\n",
-				 cfg.flags, SOCAM_BUS_FLAGS);
-			return -EINVAL;
-		}
-	} else if (ret != -ENOIOCTLCMD) {
-		return ret;
-	} else {
-		common_flags = SOCAM_BUS_FLAGS;
-	}
-
-	/* Make choices, possibly based on platform configuration */
-	if ((common_flags & V4L2_MBUS_PCLK_SAMPLE_RISING) &&
-			(common_flags & V4L2_MBUS_PCLK_SAMPLE_FALLING)) {
-		if (!pcdev->pdata ||
-				pcdev->pdata->flags & OMAP1_CAMERA_LCLK_RISING)
-			common_flags &= ~V4L2_MBUS_PCLK_SAMPLE_FALLING;
-		else
-			common_flags &= ~V4L2_MBUS_PCLK_SAMPLE_RISING;
-	}
-
-	cfg.flags = common_flags;
-	ret = v4l2_subdev_call(sd, video, s_mbus_config, &cfg);
-	if (ret < 0 && ret != -ENOIOCTLCMD) {
-		dev_dbg(dev, "camera s_mbus_config(0x%lx) returned %d\n",
-			common_flags, ret);
-		return ret;
-	}
-
-	ctrlclock = CAM_READ_CACHE(pcdev, CTRLCLOCK);
-	if (ctrlclock & LCLK_EN)
-		CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~LCLK_EN);
-
-	if (common_flags & V4L2_MBUS_PCLK_SAMPLE_RISING) {
-		dev_dbg(dev, "CTRLCLOCK_REG |= POLCLK\n");
-		ctrlclock |= POLCLK;
-	} else {
-		dev_dbg(dev, "CTRLCLOCK_REG &= ~POLCLK\n");
-		ctrlclock &= ~POLCLK;
-	}
-	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~LCLK_EN);
-
-	if (ctrlclock & LCLK_EN)
-		CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
-
-	/* select bus endianness */
-	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
-	fmt = xlate->host_fmt;
-
-	mode = CAM_READ(pcdev, MODE) & ~(RAZ_FIFO | IRQ_MASK | DMA);
-	if (fmt->order == SOC_MBUS_ORDER_LE) {
-		dev_dbg(dev, "MODE_REG &= ~ORDERCAMD\n");
-		CAM_WRITE(pcdev, MODE, mode & ~ORDERCAMD);
-	} else {
-		dev_dbg(dev, "MODE_REG |= ORDERCAMD\n");
-		CAM_WRITE(pcdev, MODE, mode | ORDERCAMD);
-	}
-
-	return 0;
-}
-
-static unsigned int omap1_cam_poll(struct file *file, poll_table *pt)
-{
-	struct soc_camera_device *icd = file->private_data;
-	struct omap1_cam_buf *buf;
-
-	buf = list_entry(icd->vb_vidq.stream.next, struct omap1_cam_buf,
-			 vb.stream);
-
-	poll_wait(file, &buf->vb.done, pt);
-
-	if (buf->vb.state == VIDEOBUF_DONE ||
-	    buf->vb.state == VIDEOBUF_ERROR)
-		return POLLIN | POLLRDNORM;
-
-	return 0;
-}
-
-static struct soc_camera_host_ops omap1_host_ops = {
-	.owner		= THIS_MODULE,
-	.add		= omap1_cam_add_device,
-	.remove		= omap1_cam_remove_device,
-	.clock_start	= omap1_cam_clock_start,
-	.clock_stop	= omap1_cam_clock_stop,
-	.get_formats	= omap1_cam_get_formats,
-	.set_crop	= omap1_cam_set_crop,
-	.set_fmt	= omap1_cam_set_fmt,
-	.try_fmt	= omap1_cam_try_fmt,
-	.init_videobuf	= omap1_cam_init_videobuf,
-	.reqbufs	= omap1_cam_reqbufs,
-	.querycap	= omap1_cam_querycap,
-	.set_bus_param	= omap1_cam_set_bus_param,
-	.poll		= omap1_cam_poll,
-};
-
-static int omap1_cam_probe(struct platform_device *pdev)
-{
-	struct omap1_cam_dev *pcdev;
-	struct resource *res;
-	struct clk *clk;
-	void __iomem *base;
-	unsigned int irq;
-	int err = 0;
-
-	irq = platform_get_irq(pdev, 0);
-	if ((int)irq <= 0) {
-		err = -ENODEV;
-		goto exit;
-	}
-
-	clk = devm_clk_get(&pdev->dev, "armper_ck");
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
-
-	pcdev = devm_kzalloc(&pdev->dev, sizeof(*pcdev) + resource_size(res),
-			     GFP_KERNEL);
-	if (!pcdev)
-		return -ENOMEM;
-
-	pcdev->clk = clk;
-
-	pcdev->pdata = pdev->dev.platform_data;
-	if (pcdev->pdata) {
-		pcdev->pflags = pcdev->pdata->flags;
-		pcdev->camexclk = pcdev->pdata->camexclk_khz * 1000;
-	}
-
-	switch (pcdev->camexclk) {
-	case 6000000:
-	case 8000000:
-	case 9600000:
-	case 12000000:
-	case 24000000:
-		break;
-	default:
-		/* pcdev->camexclk != 0 => pcdev->pdata != NULL */
-		dev_warn(&pdev->dev,
-				"Incorrect sensor clock frequency %ld kHz, "
-				"should be one of 0, 6, 8, 9.6, 12 or 24 MHz, "
-				"please correct your platform data\n",
-				pcdev->pdata->camexclk_khz);
-		pcdev->camexclk = 0;
-	case 0:
-		dev_info(&pdev->dev, "Not providing sensor clock\n");
-	}
-
-	INIT_LIST_HEAD(&pcdev->capture);
-	spin_lock_init(&pcdev->lock);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	pcdev->irq = irq;
-	pcdev->base = base;
-
-	sensor_reset(pcdev, true);
-
-	err = omap_request_dma(OMAP_DMA_CAMERA_IF_RX, DRIVER_NAME,
-			dma_isr, (void *)pcdev, &pcdev->dma_ch);
-	if (err < 0) {
-		dev_err(&pdev->dev, "Can't request DMA for OMAP1 Camera\n");
-		return -EBUSY;
-	}
-	dev_dbg(&pdev->dev, "got DMA channel %d\n", pcdev->dma_ch);
-
-	/* preconfigure DMA */
-	omap_set_dma_src_params(pcdev->dma_ch, OMAP_DMA_PORT_TIPB,
-			OMAP_DMA_AMODE_CONSTANT, res->start + REG_CAMDATA,
-			0, 0);
-	omap_set_dma_dest_burst_mode(pcdev->dma_ch, OMAP_DMA_DATA_BURST_4);
-	/* setup DMA autoinitialization */
-	omap_dma_link_lch(pcdev->dma_ch, pcdev->dma_ch);
-
-	err = devm_request_irq(&pdev->dev, pcdev->irq, cam_isr, 0, DRIVER_NAME,
-			       pcdev);
-	if (err) {
-		dev_err(&pdev->dev, "Camera interrupt register failed\n");
-		goto exit_free_dma;
-	}
-
-	pcdev->soc_host.drv_name	= DRIVER_NAME;
-	pcdev->soc_host.ops		= &omap1_host_ops;
-	pcdev->soc_host.priv		= pcdev;
-	pcdev->soc_host.v4l2_dev.dev	= &pdev->dev;
-	pcdev->soc_host.nr		= pdev->id;
-
-	err = soc_camera_host_register(&pcdev->soc_host);
-	if (err)
-		return err;
-
-	dev_info(&pdev->dev, "OMAP1 Camera Interface driver loaded\n");
-
-	return 0;
-
-exit_free_dma:
-	omap_free_dma(pcdev->dma_ch);
-exit:
-	return err;
-}
-
-static int omap1_cam_remove(struct platform_device *pdev)
-{
-	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
-	struct omap1_cam_dev *pcdev = container_of(soc_host,
-					struct omap1_cam_dev, soc_host);
-
-	omap_free_dma(pcdev->dma_ch);
-
-	soc_camera_host_unregister(soc_host);
-
-	dev_info(&pdev->dev, "OMAP1 Camera Interface driver unloaded\n");
-
-	return 0;
-}
-
-static struct platform_driver omap1_cam_driver = {
-	.driver		= {
-		.name	= DRIVER_NAME,
-	},
-	.probe		= omap1_cam_probe,
-	.remove		= omap1_cam_remove,
-};
-
-module_platform_driver(omap1_cam_driver);
-
-module_param(sg_mode, bool, 0644);
-MODULE_PARM_DESC(sg_mode, "videobuf mode, 0: dma-contig (default), 1: dma-sg");
-
-MODULE_DESCRIPTION("OMAP1 Camera Interface driver");
-MODULE_AUTHOR("Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c
index cf8da23558bb..90b7ff56722d 100644
--- a/drivers/staging/media/omap4iss/iss_video.c
+++ b/drivers/staging/media/omap4iss/iss_video.c
@@ -298,7 +298,7 @@ iss_video_check_format(struct iss_video *video, struct iss_video_fh *vfh)
 
 static int iss_video_queue_setup(struct vb2_queue *vq,
 				 unsigned int *count, unsigned int *num_planes,
-				 unsigned int sizes[], void *alloc_ctxs[])
+				 unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct iss_video_fh *vfh = vb2_get_drv_priv(vq);
 	struct iss_video *video = vfh->video;
@@ -310,8 +310,6 @@ static int iss_video_queue_setup(struct vb2_queue *vq,
 	if (sizes[0] == 0)
 		return -EINVAL;
 
-	alloc_ctxs[0] = video->alloc_ctx;
-
 	*count = min(*count, video->capture_mem / PAGE_ALIGN(sizes[0]));
 
 	return 0;
@@ -1017,13 +1015,6 @@ static int iss_video_open(struct file *file)
 		goto done;
 	}
 
-	video->alloc_ctx = vb2_dma_contig_init_ctx(video->iss->dev);
-	if (IS_ERR(video->alloc_ctx)) {
-		ret = PTR_ERR(video->alloc_ctx);
-		omap4iss_put(video->iss);
-		goto done;
-	}
-
 	q = &handle->queue;
 
 	q->type = video->type;
@@ -1033,6 +1024,7 @@ static int iss_video_open(struct file *file)
 	q->mem_ops = &vb2_dma_contig_memops;
 	q->buf_struct_size = sizeof(struct iss_buffer);
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->dev = video->iss->dev;
 
 	ret = vb2_queue_init(q);
 	if (ret) {
diff --git a/drivers/staging/media/omap4iss/iss_video.h b/drivers/staging/media/omap4iss/iss_video.h
index c8bd2958a3f8..d7e05d04512c 100644
--- a/drivers/staging/media/omap4iss/iss_video.h
+++ b/drivers/staging/media/omap4iss/iss_video.h
@@ -170,7 +170,6 @@ struct iss_video {
 	spinlock_t qlock;		/* protects dmaqueue and error */
 	struct list_head dmaqueue;
 	enum iss_video_dmaqueue_flags dmaqueue_flags;
-	struct vb2_alloc_ctx *alloc_ctx;
 
 	const struct iss_video_operations *ops;
 };
diff --git a/drivers/staging/media/pulse8-cec/Kconfig b/drivers/staging/media/pulse8-cec/Kconfig
new file mode 100644
index 000000000000..c6aa2d1c9df0
--- /dev/null
+++ b/drivers/staging/media/pulse8-cec/Kconfig
@@ -0,0 +1,10 @@
+config USB_PULSE8_CEC
+	tristate "Pulse Eight HDMI CEC"
+	depends on USB_ACM && MEDIA_CEC
+	select SERIO
+	select SERIO_SERPORT
+	---help---
+	  This is a cec driver for the Pulse Eight HDMI CEC device.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pulse8-cec.
diff --git a/drivers/staging/media/pulse8-cec/Makefile b/drivers/staging/media/pulse8-cec/Makefile
new file mode 100644
index 000000000000..9800690bc25a
--- /dev/null
+++ b/drivers/staging/media/pulse8-cec/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_USB_PULSE8_CEC) += pulse8-cec.o
diff --git a/drivers/staging/media/pulse8-cec/TODO b/drivers/staging/media/pulse8-cec/TODO
new file mode 100644
index 000000000000..fa6660245e5f
--- /dev/null
+++ b/drivers/staging/media/pulse8-cec/TODO
@@ -0,0 +1,52 @@
+This driver needs to mature a bit more and another round of
+code cleanups.
+
+Otherwise it looks to be in good shape. And of course the fact
+that the CEC framework is in staging at the moment also prevents
+this driver from being mainlined.
+
+Some notes:
+
+1) Regarding the "autonomous" mode of the Pulse-Eight: currently this
+is disabled, but the idea is that this allows basic functionality
+when the PC is off, and it can wake-up the PC through USB.
+
+To prevent the device to go into autonomous mode the driver would
+have to send MSGCODE_SET_CONTROLLED 1 and then send a ping every
+30 seconds (in practice once every 15 seconds would be good). When
+powering off or going to standby send MSGCODE_SET_CONTROLLED 0 to
+turn the autonomous mode back on.
+
+This needs to be implemented in the driver. Autonomous mode was
+added in firmware v2.
+
+2) Writing to the EEPROM can only be done once every 10 seconds.
+
+3) To use this driver you also need to patch the inputattach utility,
+this patch will be submitted once this driver is moved out of staging.
+
+diff -urN linuxconsoletools-1.4.9/utils/inputattach.c linuxconsoletools-1.4.9.new/utils/inputattach.c
+--- linuxconsoletools-1.4.9/utils/inputattach.c	2016-01-09 16:27:02.000000000 +0100
++++ linuxconsoletools-1.4.9.new/utils/inputattach.c	2016-03-20 11:35:31.707788967 +0100
+@@ -861,6 +861,9 @@
+ { "--wacom_iv",		"-wacom_iv",	"Wacom protocol IV tablet",
+ 	B9600, CS8 | CRTSCTS,
+ 	SERIO_WACOM_IV,		0x00,	0x00,	0,	wacom_iv_init },
++{ "--pulse8-cec",		"-pulse8-cec",	"Pulse Eight HDMI CEC dongle",
++	B9600, CS8,
++	SERIO_PULSE8_CEC,		0x00,	0x00,	0,	NULL },
+ { NULL, NULL, NULL, 0, 0, 0, 0, 0, 0, NULL }
+ };
+ 
+diff -urN linuxconsoletools-1.4.9/utils/serio-ids.h linuxconsoletools-1.4.9.new/utils/serio-ids.h
+--- linuxconsoletools-1.4.9/utils/serio-ids.h	2015-04-26 18:29:42.000000000 +0200
++++ linuxconsoletools-1.4.9.new/utils/serio-ids.h	2016-03-20 11:41:00.153558539 +0100
+@@ -131,5 +131,8 @@
+ #ifndef SERIO_EASYPEN
+ # define SERIO_EASYPEN		0x3f
+ #endif
++#ifndef SERIO_PULSE8_CEC
++# define SERIO_PULSE8_CEC	0x40
++#endif
+ 
+ #endif
diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c
new file mode 100644
index 000000000000..94f8590492dc
--- /dev/null
+++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c
@@ -0,0 +1,505 @@
+/*
+ * Pulse Eight HDMI CEC driver
+ *
+ * Copyright 2016 Hans Verkuil <hverkuil@xs4all.nl
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version of 2 of the License, or (at your
+ * option) any later version. See the file COPYING in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/completion.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/serio.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+
+#include <media/cec.h>
+
+MODULE_AUTHOR("Hans Verkuil <hverkuil@xs4all.nl>");
+MODULE_DESCRIPTION("Pulse Eight HDMI CEC driver");
+MODULE_LICENSE("GPL");
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug level (0-1)");
+
+enum pulse8_msgcodes {
+	MSGCODE_NOTHING = 0,
+	MSGCODE_PING,
+	MSGCODE_TIMEOUT_ERROR,
+	MSGCODE_HIGH_ERROR,
+	MSGCODE_LOW_ERROR,
+	MSGCODE_FRAME_START,
+	MSGCODE_FRAME_DATA,
+	MSGCODE_RECEIVE_FAILED,
+	MSGCODE_COMMAND_ACCEPTED,	/* 0x08 */
+	MSGCODE_COMMAND_REJECTED,
+	MSGCODE_SET_ACK_MASK,
+	MSGCODE_TRANSMIT,
+	MSGCODE_TRANSMIT_EOM,
+	MSGCODE_TRANSMIT_IDLETIME,
+	MSGCODE_TRANSMIT_ACK_POLARITY,
+	MSGCODE_TRANSMIT_LINE_TIMEOUT,
+	MSGCODE_TRANSMIT_SUCCEEDED,	/* 0x10 */
+	MSGCODE_TRANSMIT_FAILED_LINE,
+	MSGCODE_TRANSMIT_FAILED_ACK,
+	MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA,
+	MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE,
+	MSGCODE_FIRMWARE_VERSION,
+	MSGCODE_START_BOOTLOADER,
+	MSGCODE_GET_BUILDDATE,
+	MSGCODE_SET_CONTROLLED,		/* 0x18 */
+	MSGCODE_GET_AUTO_ENABLED,
+	MSGCODE_SET_AUTO_ENABLED,
+	MSGCODE_GET_DEFAULT_LOGICAL_ADDRESS,
+	MSGCODE_SET_DEFAULT_LOGICAL_ADDRESS,
+	MSGCODE_GET_LOGICAL_ADDRESS_MASK,
+	MSGCODE_SET_LOGICAL_ADDRESS_MASK,
+	MSGCODE_GET_PHYSICAL_ADDRESS,
+	MSGCODE_SET_PHYSICAL_ADDRESS,	/* 0x20 */
+	MSGCODE_GET_DEVICE_TYPE,
+	MSGCODE_SET_DEVICE_TYPE,
+	MSGCODE_GET_HDMI_VERSION,
+	MSGCODE_SET_HDMI_VERSION,
+	MSGCODE_GET_OSD_NAME,
+	MSGCODE_SET_OSD_NAME,
+	MSGCODE_WRITE_EEPROM,
+	MSGCODE_GET_ADAPTER_TYPE,	/* 0x28 */
+	MSGCODE_SET_ACTIVE_SOURCE,
+
+	MSGCODE_FRAME_EOM = 0x80,
+	MSGCODE_FRAME_ACK = 0x40,
+};
+
+#define MSGSTART	0xff
+#define MSGEND		0xfe
+#define MSGESC		0xfd
+#define MSGOFFSET	3
+
+#define DATA_SIZE 256
+
+struct pulse8 {
+	struct device *dev;
+	struct serio *serio;
+	struct cec_adapter *adap;
+	struct completion cmd_done;
+	struct work_struct work;
+	struct cec_msg rx_msg;
+	u8 data[DATA_SIZE];
+	unsigned int len;
+	u8 buf[DATA_SIZE];
+	unsigned int idx;
+	bool escape;
+	bool started;
+};
+
+static void pulse8_irq_work_handler(struct work_struct *work)
+{
+	struct pulse8 *pulse8 =
+		container_of(work, struct pulse8, work);
+
+	switch (pulse8->data[0] & 0x3f) {
+	case MSGCODE_FRAME_DATA:
+		cec_received_msg(pulse8->adap, &pulse8->rx_msg);
+		break;
+	case MSGCODE_TRANSMIT_SUCCEEDED:
+		cec_transmit_done(pulse8->adap, CEC_TX_STATUS_OK,
+				  0, 0, 0, 0);
+		break;
+	case MSGCODE_TRANSMIT_FAILED_LINE:
+		cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ARB_LOST,
+				  1, 0, 0, 0);
+		break;
+	case MSGCODE_TRANSMIT_FAILED_ACK:
+		cec_transmit_done(pulse8->adap, CEC_TX_STATUS_NACK,
+				  0, 1, 0, 0);
+		break;
+	case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA:
+	case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
+		cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ERROR,
+				  0, 0, 0, 1);
+		break;
+	}
+}
+
+static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data,
+				    unsigned int flags)
+{
+	struct pulse8 *pulse8 = serio_get_drvdata(serio);
+
+	if (!pulse8->started && data != MSGSTART)
+		return IRQ_HANDLED;
+	if (data == MSGESC) {
+		pulse8->escape = true;
+		return IRQ_HANDLED;
+	}
+	if (pulse8->escape) {
+		data += MSGOFFSET;
+		pulse8->escape = false;
+	} else if (data == MSGEND) {
+		struct cec_msg *msg = &pulse8->rx_msg;
+
+		if (debug)
+			dev_info(pulse8->dev, "received: %*ph\n",
+				 pulse8->idx, pulse8->buf);
+		pulse8->data[0] = pulse8->buf[0];
+		switch (pulse8->buf[0] & 0x3f) {
+		case MSGCODE_FRAME_START:
+			msg->len = 1;
+			msg->msg[0] = pulse8->buf[1];
+			break;
+		case MSGCODE_FRAME_DATA:
+			if (msg->len == CEC_MAX_MSG_SIZE)
+				break;
+			msg->msg[msg->len++] = pulse8->buf[1];
+			if (pulse8->buf[0] & MSGCODE_FRAME_EOM)
+				schedule_work(&pulse8->work);
+			break;
+		case MSGCODE_TRANSMIT_SUCCEEDED:
+		case MSGCODE_TRANSMIT_FAILED_LINE:
+		case MSGCODE_TRANSMIT_FAILED_ACK:
+		case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA:
+		case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
+			schedule_work(&pulse8->work);
+			break;
+		case MSGCODE_TIMEOUT_ERROR:
+			break;
+		case MSGCODE_COMMAND_ACCEPTED:
+		case MSGCODE_COMMAND_REJECTED:
+		default:
+			if (pulse8->idx == 0)
+				break;
+			memcpy(pulse8->data, pulse8->buf, pulse8->idx);
+			pulse8->len = pulse8->idx;
+			complete(&pulse8->cmd_done);
+			break;
+		}
+		pulse8->idx = 0;
+		pulse8->started = false;
+		return IRQ_HANDLED;
+	} else if (data == MSGSTART) {
+		pulse8->idx = 0;
+		pulse8->started = true;
+		return IRQ_HANDLED;
+	}
+
+	if (pulse8->idx >= DATA_SIZE) {
+		dev_dbg(pulse8->dev,
+			"throwing away %d bytes of garbage\n", pulse8->idx);
+		pulse8->idx = 0;
+	}
+	pulse8->buf[pulse8->idx++] = data;
+	return IRQ_HANDLED;
+}
+
+static void pulse8_disconnect(struct serio *serio)
+{
+	struct pulse8 *pulse8 = serio_get_drvdata(serio);
+
+	cec_unregister_adapter(pulse8->adap);
+	dev_info(&serio->dev, "disconnected\n");
+	serio_close(serio);
+	serio_set_drvdata(serio, NULL);
+	kfree(pulse8);
+}
+
+static int pulse8_send(struct serio *serio, const u8 *command, u8 cmd_len)
+{
+	int err = 0;
+
+	err = serio_write(serio, MSGSTART);
+	if (err)
+		return err;
+	for (; !err && cmd_len; command++, cmd_len--) {
+		if (*command >= MSGESC) {
+			err = serio_write(serio, MSGESC);
+			if (!err)
+				err = serio_write(serio, *command - MSGOFFSET);
+		} else {
+			err = serio_write(serio, *command);
+		}
+	}
+	if (!err)
+		err = serio_write(serio, 0xfe);
+
+	return err;
+}
+
+static int pulse8_send_and_wait(struct pulse8 *pulse8,
+				const u8 *cmd, u8 cmd_len, u8 response, u8 size)
+{
+	int err;
+
+	/*dev_info(pulse8->dev, "transmit: %*ph\n", cmd_len, cmd);*/
+	init_completion(&pulse8->cmd_done);
+
+	err = pulse8_send(pulse8->serio, cmd, cmd_len);
+	if (err)
+		return err;
+
+	if (!wait_for_completion_timeout(&pulse8->cmd_done, HZ))
+		return -ETIMEDOUT;
+	if ((pulse8->data[0] & 0x3f) == MSGCODE_COMMAND_REJECTED &&
+	    cmd[0] != MSGCODE_SET_CONTROLLED &&
+	    cmd[0] != MSGCODE_SET_AUTO_ENABLED &&
+	    cmd[0] != MSGCODE_GET_BUILDDATE) {
+		u8 cmd_sc[2];
+
+		cmd_sc[0] = MSGCODE_SET_CONTROLLED;
+		cmd_sc[1] = 1;
+		err = pulse8_send_and_wait(pulse8, cmd_sc, 2,
+					   MSGCODE_COMMAND_ACCEPTED, 1);
+		if (err)
+			return err;
+		init_completion(&pulse8->cmd_done);
+
+		err = pulse8_send(pulse8->serio, cmd, cmd_len);
+		if (err)
+			return err;
+
+		if (!wait_for_completion_timeout(&pulse8->cmd_done, HZ))
+			return -ETIMEDOUT;
+	}
+	if (response &&
+	    ((pulse8->data[0] & 0x3f) != response || pulse8->len < size + 1)) {
+		dev_info(pulse8->dev, "transmit: failed %02x\n",
+			 pulse8->data[0] & 0x3f);
+		return -EIO;
+	}
+	return 0;
+}
+
+static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio)
+{
+	u8 *data = pulse8->data + 1;
+	unsigned int count = 0;
+	unsigned int vers = 0;
+	u8 cmd[2];
+	int err;
+
+	cmd[0] = MSGCODE_PING;
+	err = pulse8_send_and_wait(pulse8, cmd, 1,
+				   MSGCODE_COMMAND_ACCEPTED, 0);
+	cmd[0] = MSGCODE_FIRMWARE_VERSION;
+	if (!err)
+		err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 2);
+	if (err)
+		return err;
+
+	vers = (data[0] << 8) | data[1];
+
+	dev_info(pulse8->dev, "Firmware version %04x\n", vers);
+	if (vers < 2)
+		return 0;
+
+	cmd[0] = MSGCODE_GET_BUILDDATE;
+	if (!err)
+		err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 4);
+	if (!err) {
+		time_t date = (data[0] << 24) | (data[1] << 16) |
+			(data[2] << 8) | data[3];
+		struct tm tm;
+
+		time_to_tm(date, 0, &tm);
+
+		dev_info(pulse8->dev, "Firmware build date %04ld.%02d.%02d %02d:%02d:%02d\n",
+			 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+			 tm.tm_hour, tm.tm_min, tm.tm_sec);
+	}
+
+	do {
+		if (count)
+			msleep(500);
+		cmd[0] = MSGCODE_SET_AUTO_ENABLED;
+		cmd[1] = 0;
+		err = pulse8_send_and_wait(pulse8, cmd, 2,
+					   MSGCODE_COMMAND_ACCEPTED, 1);
+		if (err && count == 0) {
+			dev_info(pulse8->dev, "No Auto Enabled supported\n");
+			return 0;
+		}
+
+		cmd[0] = MSGCODE_GET_AUTO_ENABLED;
+		if (!err)
+			err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1);
+		if (!err && !data[0]) {
+			cmd[0] = MSGCODE_WRITE_EEPROM;
+			err = pulse8_send_and_wait(pulse8, cmd, 1,
+						   MSGCODE_COMMAND_ACCEPTED, 1);
+			cmd[0] = MSGCODE_GET_AUTO_ENABLED;
+			if (!err)
+				err = pulse8_send_and_wait(pulse8, cmd, 1,
+							   cmd[0], 1);
+		}
+	} while (!err && data[0] && count++ < 5);
+
+	if (!err && data[0])
+		err = -EIO;
+
+	return err;
+}
+
+static int pulse8_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+	struct pulse8 *pulse8 = adap->priv;
+	u8 cmd[16];
+	int err;
+
+	cmd[0] = MSGCODE_SET_CONTROLLED;
+	cmd[1] = enable;
+	err = pulse8_send_and_wait(pulse8, cmd, 2,
+				   MSGCODE_COMMAND_ACCEPTED, 1);
+	return enable ? err : 0;
+}
+
+static int pulse8_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr)
+{
+	struct pulse8 *pulse8 = adap->priv;
+	u16 mask = 0;
+	u8 cmd[3];
+	int err;
+
+	if (log_addr != CEC_LOG_ADDR_INVALID)
+		mask = 1 << log_addr;
+	cmd[0] = MSGCODE_SET_ACK_MASK;
+	cmd[1] = mask >> 8;
+	cmd[2] = mask & 0xff;
+	err = pulse8_send_and_wait(pulse8, cmd, 3,
+				   MSGCODE_COMMAND_ACCEPTED, 0);
+	if (mask == 0)
+		return 0;
+	return err;
+}
+
+static int pulse8_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
+				    u32 signal_free_time, struct cec_msg *msg)
+{
+	struct pulse8 *pulse8 = adap->priv;
+	u8 cmd[2];
+	unsigned int i;
+	int err;
+
+	cmd[0] = MSGCODE_TRANSMIT_IDLETIME;
+	cmd[1] = 3;
+	err = pulse8_send_and_wait(pulse8, cmd, 2,
+				   MSGCODE_COMMAND_ACCEPTED, 1);
+	cmd[0] = MSGCODE_TRANSMIT_ACK_POLARITY;
+	cmd[1] = cec_msg_is_broadcast(msg);
+	if (!err)
+		err = pulse8_send_and_wait(pulse8, cmd, 2,
+					   MSGCODE_COMMAND_ACCEPTED, 1);
+	cmd[0] = msg->len == 1 ? MSGCODE_TRANSMIT_EOM : MSGCODE_TRANSMIT;
+	cmd[1] = msg->msg[0];
+	if (!err)
+		err = pulse8_send_and_wait(pulse8, cmd, 2,
+					   MSGCODE_COMMAND_ACCEPTED, 1);
+	if (!err && msg->len > 1) {
+		cmd[0] = msg->len == 2 ? MSGCODE_TRANSMIT_EOM :
+					 MSGCODE_TRANSMIT;
+		cmd[1] = msg->msg[1];
+		err = pulse8_send_and_wait(pulse8, cmd, 2,
+					   MSGCODE_COMMAND_ACCEPTED, 1);
+		for (i = 0; !err && i + 2 < msg->len; i++) {
+			cmd[0] = (i + 2 == msg->len - 1) ?
+				MSGCODE_TRANSMIT_EOM : MSGCODE_TRANSMIT;
+			cmd[1] = msg->msg[i + 2];
+			err = pulse8_send_and_wait(pulse8, cmd, 2,
+						   MSGCODE_COMMAND_ACCEPTED, 1);
+		}
+	}
+
+	return err;
+}
+
+static int pulse8_received(struct cec_adapter *adap, struct cec_msg *msg)
+{
+	return -ENOMSG;
+}
+
+static const struct cec_adap_ops pulse8_cec_adap_ops = {
+	.adap_enable = pulse8_cec_adap_enable,
+	.adap_log_addr = pulse8_cec_adap_log_addr,
+	.adap_transmit = pulse8_cec_adap_transmit,
+	.received = pulse8_received,
+};
+
+static int pulse8_connect(struct serio *serio, struct serio_driver *drv)
+{
+	u32 caps = CEC_CAP_TRANSMIT | CEC_CAP_LOG_ADDRS | CEC_CAP_PHYS_ADDR |
+		CEC_CAP_PASSTHROUGH | CEC_CAP_RC | CEC_CAP_MONITOR_ALL;
+	struct pulse8 *pulse8;
+	int err = -ENOMEM;
+
+	pulse8 = kzalloc(sizeof(*pulse8), GFP_KERNEL);
+
+	if (!pulse8)
+		return -ENOMEM;
+
+	pulse8->serio = serio;
+	pulse8->adap = cec_allocate_adapter(&pulse8_cec_adap_ops, pulse8,
+		"HDMI CEC", caps, 1, &serio->dev);
+	err = PTR_ERR_OR_ZERO(pulse8->adap);
+	if (err < 0)
+		goto free_device;
+
+	pulse8->dev = &serio->dev;
+	serio_set_drvdata(serio, pulse8);
+	INIT_WORK(&pulse8->work, pulse8_irq_work_handler);
+
+	err = serio_open(serio, drv);
+	if (err)
+		goto delete_adap;
+
+	err = pulse8_setup(pulse8, serio);
+	if (err)
+		goto close_serio;
+
+	err = cec_register_adapter(pulse8->adap);
+	if (err < 0)
+		goto close_serio;
+
+	pulse8->dev = &pulse8->adap->devnode.dev;
+	return 0;
+
+close_serio:
+	serio_close(serio);
+delete_adap:
+	cec_delete_adapter(pulse8->adap);
+	serio_set_drvdata(serio, NULL);
+free_device:
+	kfree(pulse8);
+	return err;
+}
+
+static struct serio_device_id pulse8_serio_ids[] = {
+	{
+		.type	= SERIO_RS232,
+		.proto	= SERIO_PULSE8_CEC,
+		.id	= SERIO_ANY,
+		.extra	= SERIO_ANY,
+	},
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(serio, pulse8_serio_ids);
+
+static struct serio_driver pulse8_drv = {
+	.driver		= {
+		.name	= "pulse8-cec",
+	},
+	.description	= "Pulse Eight HDMI CEC driver",
+	.id_table	= pulse8_serio_ids,
+	.interrupt	= pulse8_interrupt,
+	.connect	= pulse8_connect,
+	.disconnect	= pulse8_disconnect,
+};
+
+module_serio_driver(pulse8_drv);
diff --git a/drivers/staging/media/s5p-cec/Kconfig b/drivers/staging/media/s5p-cec/Kconfig
new file mode 100644
index 000000000000..0315fd7ad0f1
--- /dev/null
+++ b/drivers/staging/media/s5p-cec/Kconfig
@@ -0,0 +1,9 @@
+config VIDEO_SAMSUNG_S5P_CEC
+       tristate "Samsung S5P CEC driver"
+       depends on VIDEO_DEV && MEDIA_CEC && (PLAT_S5P || ARCH_EXYNOS || COMPILE_TEST)
+       ---help---
+         This is a driver for Samsung S5P HDMI CEC interface. It uses the
+         generic CEC framework interface.
+         CEC bus is present in the HDMI connector and enables communication
+         between compatible devices.
+
diff --git a/drivers/staging/media/s5p-cec/Makefile b/drivers/staging/media/s5p-cec/Makefile
new file mode 100644
index 000000000000..0e2cf457825a
--- /dev/null
+++ b/drivers/staging/media/s5p-cec/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_VIDEO_SAMSUNG_S5P_CEC)	+= s5p-cec.o
+s5p-cec-y += s5p_cec.o exynos_hdmi_cecctrl.o
diff --git a/drivers/staging/media/s5p-cec/TODO b/drivers/staging/media/s5p-cec/TODO
new file mode 100644
index 000000000000..f51d5268ac40
--- /dev/null
+++ b/drivers/staging/media/s5p-cec/TODO
@@ -0,0 +1,7 @@
+This driver depends on the CEC framework, which is currently in
+staging, so therefor this driver is in staging as well.
+
+In addition, this driver requires that userspace sets the physical
+address. However, this should be passed on from the corresponding
+samsung HDMI driver. It is very annoying if userspace has to do this,
+and other than USB CEC adapters this must be handled automatically.
diff --git a/drivers/staging/media/s5p-cec/exynos_hdmi_cec.h b/drivers/staging/media/s5p-cec/exynos_hdmi_cec.h
new file mode 100644
index 000000000000..3e4fc7b05e83
--- /dev/null
+++ b/drivers/staging/media/s5p-cec/exynos_hdmi_cec.h
@@ -0,0 +1,38 @@
+/* drivers/media/platform/s5p-cec/exynos_hdmi_cec.h
+ *
+ * Copyright (c) 2010, 2014 Samsung Electronics
+ *		http://www.samsung.com/
+ *
+ * Header file for interface of Samsung Exynos hdmi cec hardware
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _EXYNOS_HDMI_CEC_H_
+#define _EXYNOS_HDMI_CEC_H_ __FILE__
+
+#include <linux/regmap.h>
+#include <linux/miscdevice.h>
+#include "s5p_cec.h"
+
+void s5p_cec_set_divider(struct s5p_cec_dev *cec);
+void s5p_cec_enable_rx(struct s5p_cec_dev *cec);
+void s5p_cec_mask_rx_interrupts(struct s5p_cec_dev *cec);
+void s5p_cec_unmask_rx_interrupts(struct s5p_cec_dev *cec);
+void s5p_cec_mask_tx_interrupts(struct s5p_cec_dev *cec);
+void s5p_cec_unmask_tx_interrupts(struct s5p_cec_dev *cec);
+void s5p_cec_reset(struct s5p_cec_dev *cec);
+void s5p_cec_tx_reset(struct s5p_cec_dev *cec);
+void s5p_cec_rx_reset(struct s5p_cec_dev *cec);
+void s5p_cec_threshold(struct s5p_cec_dev *cec);
+void s5p_cec_copy_packet(struct s5p_cec_dev *cec, char *data,
+			 size_t count, u8 retries);
+void s5p_cec_set_addr(struct s5p_cec_dev *cec, u32 addr);
+u32 s5p_cec_get_status(struct s5p_cec_dev *cec);
+void s5p_clr_pending_tx(struct s5p_cec_dev *cec);
+void s5p_clr_pending_rx(struct s5p_cec_dev *cec);
+void s5p_cec_get_rx_buf(struct s5p_cec_dev *cec, u32 size, u8 *buffer);
+
+#endif /* _EXYNOS_HDMI_CEC_H_ */
diff --git a/drivers/staging/media/s5p-cec/exynos_hdmi_cecctrl.c b/drivers/staging/media/s5p-cec/exynos_hdmi_cecctrl.c
new file mode 100644
index 000000000000..ce95e0fcd882
--- /dev/null
+++ b/drivers/staging/media/s5p-cec/exynos_hdmi_cecctrl.c
@@ -0,0 +1,209 @@
+/* drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c
+ *
+ * Copyright (c) 2009, 2014 Samsung Electronics
+ *		http://www.samsung.com/
+ *
+ * cec ftn file for Samsung TVOUT driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/io.h>
+#include <linux/device.h>
+
+#include "exynos_hdmi_cec.h"
+#include "regs-cec.h"
+
+#define S5P_HDMI_FIN			24000000
+#define CEC_DIV_RATIO			320000
+
+#define CEC_MESSAGE_BROADCAST_MASK	0x0F
+#define CEC_MESSAGE_BROADCAST		0x0F
+#define CEC_FILTER_THRESHOLD		0x15
+
+void s5p_cec_set_divider(struct s5p_cec_dev *cec)
+{
+	u32 div_ratio, div_val;
+	unsigned int reg;
+
+	div_ratio  = S5P_HDMI_FIN / CEC_DIV_RATIO - 1;
+
+	if (regmap_read(cec->pmu, EXYNOS_HDMI_PHY_CONTROL, &reg)) {
+		dev_err(cec->dev, "failed to read phy control\n");
+		return;
+	}
+
+	reg = (reg & ~(0x3FF << 16)) | (div_ratio << 16);
+
+	if (regmap_write(cec->pmu, EXYNOS_HDMI_PHY_CONTROL, reg)) {
+		dev_err(cec->dev, "failed to write phy control\n");
+		return;
+	}
+
+	div_val = CEC_DIV_RATIO * 0.00005 - 1;
+
+	writeb(0x0, cec->reg + S5P_CEC_DIVISOR_3);
+	writeb(0x0, cec->reg + S5P_CEC_DIVISOR_2);
+	writeb(0x0, cec->reg + S5P_CEC_DIVISOR_1);
+	writeb(div_val, cec->reg + S5P_CEC_DIVISOR_0);
+}
+
+void s5p_cec_enable_rx(struct s5p_cec_dev *cec)
+{
+	u8 reg;
+
+	reg = readb(cec->reg + S5P_CEC_RX_CTRL);
+	reg |= S5P_CEC_RX_CTRL_ENABLE;
+	writeb(reg, cec->reg + S5P_CEC_RX_CTRL);
+}
+
+void s5p_cec_mask_rx_interrupts(struct s5p_cec_dev *cec)
+{
+	u8 reg;
+
+	reg = readb(cec->reg + S5P_CEC_IRQ_MASK);
+	reg |= S5P_CEC_IRQ_RX_DONE;
+	reg |= S5P_CEC_IRQ_RX_ERROR;
+	writeb(reg, cec->reg + S5P_CEC_IRQ_MASK);
+}
+
+void s5p_cec_unmask_rx_interrupts(struct s5p_cec_dev *cec)
+{
+	u8 reg;
+
+	reg = readb(cec->reg + S5P_CEC_IRQ_MASK);
+	reg &= ~S5P_CEC_IRQ_RX_DONE;
+	reg &= ~S5P_CEC_IRQ_RX_ERROR;
+	writeb(reg, cec->reg + S5P_CEC_IRQ_MASK);
+}
+
+void s5p_cec_mask_tx_interrupts(struct s5p_cec_dev *cec)
+{
+	u8 reg;
+
+	reg = readb(cec->reg + S5P_CEC_IRQ_MASK);
+	reg |= S5P_CEC_IRQ_TX_DONE;
+	reg |= S5P_CEC_IRQ_TX_ERROR;
+	writeb(reg, cec->reg + S5P_CEC_IRQ_MASK);
+
+}
+
+void s5p_cec_unmask_tx_interrupts(struct s5p_cec_dev *cec)
+{
+	u8 reg;
+
+	reg = readb(cec->reg + S5P_CEC_IRQ_MASK);
+	reg &= ~S5P_CEC_IRQ_TX_DONE;
+	reg &= ~S5P_CEC_IRQ_TX_ERROR;
+	writeb(reg, cec->reg + S5P_CEC_IRQ_MASK);
+}
+
+void s5p_cec_reset(struct s5p_cec_dev *cec)
+{
+	u8 reg;
+
+	writeb(S5P_CEC_RX_CTRL_RESET, cec->reg + S5P_CEC_RX_CTRL);
+	writeb(S5P_CEC_TX_CTRL_RESET, cec->reg + S5P_CEC_TX_CTRL);
+
+	reg = readb(cec->reg + 0xc4);
+	reg &= ~0x1;
+	writeb(reg, cec->reg + 0xc4);
+}
+
+void s5p_cec_tx_reset(struct s5p_cec_dev *cec)
+{
+	writeb(S5P_CEC_TX_CTRL_RESET, cec->reg + S5P_CEC_TX_CTRL);
+}
+
+void s5p_cec_rx_reset(struct s5p_cec_dev *cec)
+{
+	u8 reg;
+
+	writeb(S5P_CEC_RX_CTRL_RESET, cec->reg + S5P_CEC_RX_CTRL);
+
+	reg = readb(cec->reg + 0xc4);
+	reg &= ~0x1;
+	writeb(reg, cec->reg + 0xc4);
+}
+
+void s5p_cec_threshold(struct s5p_cec_dev *cec)
+{
+	writeb(CEC_FILTER_THRESHOLD, cec->reg + S5P_CEC_RX_FILTER_TH);
+	writeb(0, cec->reg + S5P_CEC_RX_FILTER_CTRL);
+}
+
+void s5p_cec_copy_packet(struct s5p_cec_dev *cec, char *data,
+			 size_t count, u8 retries)
+{
+	int i = 0;
+	u8 reg;
+
+	while (i < count) {
+		writeb(data[i], cec->reg + (S5P_CEC_TX_BUFF0 + (i * 4)));
+		i++;
+	}
+
+	writeb(count, cec->reg + S5P_CEC_TX_BYTES);
+	reg = readb(cec->reg + S5P_CEC_TX_CTRL);
+	reg |= S5P_CEC_TX_CTRL_START;
+	reg &= ~0x70;
+	reg |= retries << 4;
+
+	if ((data[0] & CEC_MESSAGE_BROADCAST_MASK) == CEC_MESSAGE_BROADCAST) {
+		dev_dbg(cec->dev, "Broadcast");
+		reg |= S5P_CEC_TX_CTRL_BCAST;
+	} else {
+		dev_dbg(cec->dev, "No Broadcast");
+		reg &= ~S5P_CEC_TX_CTRL_BCAST;
+	}
+
+	writeb(reg, cec->reg + S5P_CEC_TX_CTRL);
+	dev_dbg(cec->dev, "cec-tx: cec count (%zu): %*ph", count,
+		(int)count, data);
+}
+
+void s5p_cec_set_addr(struct s5p_cec_dev *cec, u32 addr)
+{
+	writeb(addr & 0x0F, cec->reg + S5P_CEC_LOGIC_ADDR);
+}
+
+u32 s5p_cec_get_status(struct s5p_cec_dev *cec)
+{
+	u32 status = 0;
+
+	status = readb(cec->reg + S5P_CEC_STATUS_0);
+	status |= readb(cec->reg + S5P_CEC_STATUS_1) << 8;
+	status |= readb(cec->reg + S5P_CEC_STATUS_2) << 16;
+	status |= readb(cec->reg + S5P_CEC_STATUS_3) << 24;
+
+	dev_dbg(cec->dev, "status = 0x%x!\n", status);
+
+	return status;
+}
+
+void s5p_clr_pending_tx(struct s5p_cec_dev *cec)
+{
+	writeb(S5P_CEC_IRQ_TX_DONE | S5P_CEC_IRQ_TX_ERROR,
+					cec->reg + S5P_CEC_IRQ_CLEAR);
+}
+
+void s5p_clr_pending_rx(struct s5p_cec_dev *cec)
+{
+	writeb(S5P_CEC_IRQ_RX_DONE | S5P_CEC_IRQ_RX_ERROR,
+					cec->reg + S5P_CEC_IRQ_CLEAR);
+}
+
+void s5p_cec_get_rx_buf(struct s5p_cec_dev *cec, u32 size, u8 *buffer)
+{
+	u32 i = 0;
+	char debug[40];
+
+	while (i < size) {
+		buffer[i] = readb(cec->reg + S5P_CEC_RX_BUFF0 + (i * 4));
+		sprintf(debug + i * 2, "%02x ", buffer[i]);
+		i++;
+	}
+	dev_dbg(cec->dev, "cec-rx: cec size(%d): %s", size, debug);
+}
diff --git a/drivers/staging/media/s5p-cec/regs-cec.h b/drivers/staging/media/s5p-cec/regs-cec.h
new file mode 100644
index 000000000000..b2e7e129920e
--- /dev/null
+++ b/drivers/staging/media/s5p-cec/regs-cec.h
@@ -0,0 +1,96 @@
+/* drivers/media/platform/s5p-cec/regs-cec.h
+ *
+ * Copyright (c) 2010 Samsung Electronics
+ *		http://www.samsung.com/
+ *
+ *  register header file for Samsung TVOUT driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __EXYNOS_REGS__H
+#define __EXYNOS_REGS__H
+
+/*
+ * Register part
+ */
+#define S5P_CEC_STATUS_0			(0x0000)
+#define S5P_CEC_STATUS_1			(0x0004)
+#define S5P_CEC_STATUS_2			(0x0008)
+#define S5P_CEC_STATUS_3			(0x000C)
+#define S5P_CEC_IRQ_MASK			(0x0010)
+#define S5P_CEC_IRQ_CLEAR			(0x0014)
+#define S5P_CEC_LOGIC_ADDR			(0x0020)
+#define S5P_CEC_DIVISOR_0			(0x0030)
+#define S5P_CEC_DIVISOR_1			(0x0034)
+#define S5P_CEC_DIVISOR_2			(0x0038)
+#define S5P_CEC_DIVISOR_3			(0x003C)
+
+#define S5P_CEC_TX_CTRL				(0x0040)
+#define S5P_CEC_TX_BYTES			(0x0044)
+#define S5P_CEC_TX_STAT0			(0x0060)
+#define S5P_CEC_TX_STAT1			(0x0064)
+#define S5P_CEC_TX_BUFF0			(0x0080)
+#define S5P_CEC_TX_BUFF1			(0x0084)
+#define S5P_CEC_TX_BUFF2			(0x0088)
+#define S5P_CEC_TX_BUFF3			(0x008C)
+#define S5P_CEC_TX_BUFF4			(0x0090)
+#define S5P_CEC_TX_BUFF5			(0x0094)
+#define S5P_CEC_TX_BUFF6			(0x0098)
+#define S5P_CEC_TX_BUFF7			(0x009C)
+#define S5P_CEC_TX_BUFF8			(0x00A0)
+#define S5P_CEC_TX_BUFF9			(0x00A4)
+#define S5P_CEC_TX_BUFF10			(0x00A8)
+#define S5P_CEC_TX_BUFF11			(0x00AC)
+#define S5P_CEC_TX_BUFF12			(0x00B0)
+#define S5P_CEC_TX_BUFF13			(0x00B4)
+#define S5P_CEC_TX_BUFF14			(0x00B8)
+#define S5P_CEC_TX_BUFF15			(0x00BC)
+
+#define S5P_CEC_RX_CTRL				(0x00C0)
+#define S5P_CEC_RX_STAT0			(0x00E0)
+#define S5P_CEC_RX_STAT1			(0x00E4)
+#define S5P_CEC_RX_BUFF0			(0x0100)
+#define S5P_CEC_RX_BUFF1			(0x0104)
+#define S5P_CEC_RX_BUFF2			(0x0108)
+#define S5P_CEC_RX_BUFF3			(0x010C)
+#define S5P_CEC_RX_BUFF4			(0x0110)
+#define S5P_CEC_RX_BUFF5			(0x0114)
+#define S5P_CEC_RX_BUFF6			(0x0118)
+#define S5P_CEC_RX_BUFF7			(0x011C)
+#define S5P_CEC_RX_BUFF8			(0x0120)
+#define S5P_CEC_RX_BUFF9			(0x0124)
+#define S5P_CEC_RX_BUFF10			(0x0128)
+#define S5P_CEC_RX_BUFF11			(0x012C)
+#define S5P_CEC_RX_BUFF12			(0x0130)
+#define S5P_CEC_RX_BUFF13			(0x0134)
+#define S5P_CEC_RX_BUFF14			(0x0138)
+#define S5P_CEC_RX_BUFF15			(0x013C)
+
+#define S5P_CEC_RX_FILTER_CTRL			(0x0180)
+#define S5P_CEC_RX_FILTER_TH			(0x0184)
+
+/*
+ * Bit definition part
+ */
+#define S5P_CEC_IRQ_TX_DONE			(1<<0)
+#define S5P_CEC_IRQ_TX_ERROR			(1<<1)
+#define S5P_CEC_IRQ_RX_DONE			(1<<4)
+#define S5P_CEC_IRQ_RX_ERROR			(1<<5)
+
+#define S5P_CEC_TX_CTRL_START			(1<<0)
+#define S5P_CEC_TX_CTRL_BCAST			(1<<1)
+#define S5P_CEC_TX_CTRL_RETRY			(0x04<<4)
+#define S5P_CEC_TX_CTRL_RESET			(1<<7)
+
+#define S5P_CEC_RX_CTRL_ENABLE			(1<<0)
+#define S5P_CEC_RX_CTRL_RESET			(1<<7)
+
+#define S5P_CEC_LOGIC_ADDR_MASK			(0xF)
+
+/* PMU Registers for PHY */
+#define EXYNOS_HDMI_PHY_CONTROL			0x700
+
+#endif	/* __EXYNOS_REGS__H	*/
diff --git a/drivers/staging/media/s5p-cec/s5p_cec.c b/drivers/staging/media/s5p-cec/s5p_cec.c
new file mode 100644
index 000000000000..78333273c4e5
--- /dev/null
+++ b/drivers/staging/media/s5p-cec/s5p_cec.c
@@ -0,0 +1,294 @@
+/* drivers/media/platform/s5p-cec/s5p_cec.c
+ *
+ * Samsung S5P CEC driver
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This driver is based on the "cec interface driver for exynos soc" by
+ * SangPil Moon.
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/timer.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <media/cec.h>
+
+#include "exynos_hdmi_cec.h"
+#include "regs-cec.h"
+#include "s5p_cec.h"
+
+#define CEC_NAME	"s5p-cec"
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug level (0-2)");
+
+static int s5p_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+	struct s5p_cec_dev *cec = adap->priv;
+
+	if (enable) {
+		pm_runtime_get_sync(cec->dev);
+
+		s5p_cec_reset(cec);
+
+		s5p_cec_set_divider(cec);
+		s5p_cec_threshold(cec);
+
+		s5p_cec_unmask_tx_interrupts(cec);
+		s5p_cec_unmask_rx_interrupts(cec);
+		s5p_cec_enable_rx(cec);
+	} else {
+		s5p_cec_mask_tx_interrupts(cec);
+		s5p_cec_mask_rx_interrupts(cec);
+		pm_runtime_disable(cec->dev);
+	}
+
+	return 0;
+}
+
+static int s5p_cec_adap_log_addr(struct cec_adapter *adap, u8 addr)
+{
+	struct s5p_cec_dev *cec = adap->priv;
+
+	s5p_cec_set_addr(cec, addr);
+	return 0;
+}
+
+static int s5p_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
+				 u32 signal_free_time, struct cec_msg *msg)
+{
+	struct s5p_cec_dev *cec = adap->priv;
+
+	/*
+	 * Unclear if 0 retries are allowed by the hardware, so have 1 as
+	 * the minimum.
+	 */
+	s5p_cec_copy_packet(cec, msg->msg, msg->len, max(1, attempts - 1));
+	return 0;
+}
+
+static irqreturn_t s5p_cec_irq_handler(int irq, void *priv)
+{
+	struct s5p_cec_dev *cec = priv;
+	u32 status = 0;
+
+	status = s5p_cec_get_status(cec);
+
+	dev_dbg(cec->dev, "irq received\n");
+
+	if (status & CEC_STATUS_TX_DONE) {
+		if (status & CEC_STATUS_TX_ERROR) {
+			dev_dbg(cec->dev, "CEC_STATUS_TX_ERROR set\n");
+			cec->tx = STATE_ERROR;
+		} else {
+			dev_dbg(cec->dev, "CEC_STATUS_TX_DONE\n");
+			cec->tx = STATE_DONE;
+		}
+		s5p_clr_pending_tx(cec);
+	}
+
+	if (status & CEC_STATUS_RX_DONE) {
+		if (status & CEC_STATUS_RX_ERROR) {
+			dev_dbg(cec->dev, "CEC_STATUS_RX_ERROR set\n");
+			s5p_cec_rx_reset(cec);
+			s5p_cec_enable_rx(cec);
+		} else {
+			dev_dbg(cec->dev, "CEC_STATUS_RX_DONE set\n");
+			if (cec->rx != STATE_IDLE)
+				dev_dbg(cec->dev, "Buffer overrun (worker did not process previous message)\n");
+			cec->rx = STATE_BUSY;
+			cec->msg.len = status >> 24;
+			cec->msg.rx_status = CEC_RX_STATUS_OK;
+			s5p_cec_get_rx_buf(cec, cec->msg.len,
+					cec->msg.msg);
+			cec->rx = STATE_DONE;
+			s5p_cec_enable_rx(cec);
+		}
+		/* Clear interrupt pending bit */
+		s5p_clr_pending_rx(cec);
+	}
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t s5p_cec_irq_handler_thread(int irq, void *priv)
+{
+	struct s5p_cec_dev *cec = priv;
+
+	dev_dbg(cec->dev, "irq processing thread\n");
+	switch (cec->tx) {
+	case STATE_DONE:
+		cec_transmit_done(cec->adap, CEC_TX_STATUS_OK, 0, 0, 0, 0);
+		cec->tx = STATE_IDLE;
+		break;
+	case STATE_ERROR:
+		cec_transmit_done(cec->adap,
+			CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_ERROR,
+			0, 0, 0, 1);
+		cec->tx = STATE_IDLE;
+		break;
+	case STATE_BUSY:
+		dev_err(cec->dev, "state set to busy, this should not occur here\n");
+		break;
+	default:
+		break;
+	}
+
+	switch (cec->rx) {
+	case STATE_DONE:
+		cec_received_msg(cec->adap, &cec->msg);
+		cec->rx = STATE_IDLE;
+		break;
+	default:
+		break;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static const struct cec_adap_ops s5p_cec_adap_ops = {
+	.adap_enable = s5p_cec_adap_enable,
+	.adap_log_addr = s5p_cec_adap_log_addr,
+	.adap_transmit = s5p_cec_adap_transmit,
+};
+
+static int s5p_cec_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct s5p_cec_dev *cec;
+	int ret;
+
+	cec = devm_kzalloc(&pdev->dev, sizeof(*cec), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	cec->dev = dev;
+
+	cec->irq = platform_get_irq(pdev, 0);
+	if (cec->irq < 0)
+		return cec->irq;
+
+	ret = devm_request_threaded_irq(dev, cec->irq, s5p_cec_irq_handler,
+		s5p_cec_irq_handler_thread, 0, pdev->name, cec);
+	if (ret)
+		return ret;
+
+	cec->clk = devm_clk_get(dev, "hdmicec");
+	if (IS_ERR(cec->clk))
+		return PTR_ERR(cec->clk);
+
+	cec->pmu = syscon_regmap_lookup_by_phandle(dev->of_node,
+						 "samsung,syscon-phandle");
+	if (IS_ERR(cec->pmu))
+		return -EPROBE_DEFER;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cec->reg = devm_ioremap_resource(dev, res);
+	if (IS_ERR(cec->reg))
+		return PTR_ERR(cec->reg);
+
+	cec->adap = cec_allocate_adapter(&s5p_cec_adap_ops, cec,
+		CEC_NAME,
+		CEC_CAP_PHYS_ADDR | CEC_CAP_LOG_ADDRS | CEC_CAP_TRANSMIT |
+		CEC_CAP_PASSTHROUGH | CEC_CAP_RC,
+		1, &pdev->dev);
+	ret = PTR_ERR_OR_ZERO(cec->adap);
+	if (ret)
+		return ret;
+	ret = cec_register_adapter(cec->adap);
+	if (ret) {
+		cec_delete_adapter(cec->adap);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, cec);
+	pm_runtime_enable(dev);
+
+	dev_dbg(dev, "successfuly probed\n");
+	return 0;
+}
+
+static int s5p_cec_remove(struct platform_device *pdev)
+{
+	struct s5p_cec_dev *cec = platform_get_drvdata(pdev);
+
+	cec_unregister_adapter(cec->adap);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+static int s5p_cec_runtime_suspend(struct device *dev)
+{
+	struct s5p_cec_dev *cec = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(cec->clk);
+	return 0;
+}
+
+static int s5p_cec_runtime_resume(struct device *dev)
+{
+	struct s5p_cec_dev *cec = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(cec->clk);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static int __maybe_unused s5p_cec_suspend(struct device *dev)
+{
+	if (pm_runtime_suspended(dev))
+		return 0;
+	return s5p_cec_runtime_suspend(dev);
+}
+
+static int __maybe_unused s5p_cec_resume(struct device *dev)
+{
+	if (pm_runtime_suspended(dev))
+		return 0;
+	return s5p_cec_runtime_resume(dev);
+}
+
+static const struct dev_pm_ops s5p_cec_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(s5p_cec_suspend, s5p_cec_resume)
+	SET_RUNTIME_PM_OPS(s5p_cec_runtime_suspend, s5p_cec_runtime_resume,
+			   NULL)
+};
+
+static const struct of_device_id s5p_cec_match[] = {
+	{
+		.compatible	= "samsung,s5p-cec",
+	},
+	{},
+};
+
+static struct platform_driver s5p_cec_pdrv = {
+	.probe	= s5p_cec_probe,
+	.remove	= s5p_cec_remove,
+	.driver	= {
+		.name		= CEC_NAME,
+		.of_match_table	= s5p_cec_match,
+		.pm		= &s5p_cec_pm_ops,
+	},
+};
+
+module_platform_driver(s5p_cec_pdrv);
+
+MODULE_AUTHOR("Kamil Debski <kamil@wypas.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Samsung S5P CEC driver");
diff --git a/drivers/staging/media/s5p-cec/s5p_cec.h b/drivers/staging/media/s5p-cec/s5p_cec.h
new file mode 100644
index 000000000000..03732c13d19f
--- /dev/null
+++ b/drivers/staging/media/s5p-cec/s5p_cec.h
@@ -0,0 +1,76 @@
+/* drivers/media/platform/s5p-cec/s5p_cec.h
+ *
+ * Samsung S5P HDMI CEC driver
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _S5P_CEC_H_
+#define _S5P_CEC_H_ __FILE__
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/timer.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <media/cec.h>
+
+#include "exynos_hdmi_cec.h"
+#include "regs-cec.h"
+#include "s5p_cec.h"
+
+#define CEC_NAME	"s5p-cec"
+
+#define CEC_STATUS_TX_RUNNING		(1 << 0)
+#define CEC_STATUS_TX_TRANSFERRING	(1 << 1)
+#define CEC_STATUS_TX_DONE		(1 << 2)
+#define CEC_STATUS_TX_ERROR		(1 << 3)
+#define CEC_STATUS_TX_BYTES		(0xFF << 8)
+#define CEC_STATUS_RX_RUNNING		(1 << 16)
+#define CEC_STATUS_RX_RECEIVING		(1 << 17)
+#define CEC_STATUS_RX_DONE		(1 << 18)
+#define CEC_STATUS_RX_ERROR		(1 << 19)
+#define CEC_STATUS_RX_BCAST		(1 << 20)
+#define CEC_STATUS_RX_BYTES		(0xFF << 24)
+
+#define CEC_WORKER_TX_DONE		(1 << 0)
+#define CEC_WORKER_RX_MSG		(1 << 1)
+
+/* CEC Rx buffer size */
+#define CEC_RX_BUFF_SIZE		16
+/* CEC Tx buffer size */
+#define CEC_TX_BUFF_SIZE		16
+
+enum cec_state {
+	STATE_IDLE,
+	STATE_BUSY,
+	STATE_DONE,
+	STATE_ERROR
+};
+
+struct s5p_cec_dev {
+	struct cec_adapter	*adap;
+	struct clk		*clk;
+	struct device		*dev;
+	struct mutex		lock;
+	struct regmap           *pmu;
+	int			irq;
+	void __iomem		*reg;
+
+	enum cec_state		rx;
+	enum cec_state		tx;
+	struct cec_msg		msg;
+};
+
+#endif /* _S5P_CEC_H_ */
diff --git a/drivers/staging/media/timb/Kconfig b/drivers/staging/media/timb/Kconfig
deleted file mode 100644
index e413fecc1e67..000000000000
--- a/drivers/staging/media/timb/Kconfig
+++ /dev/null
@@ -1,11 +0,0 @@
-config VIDEO_TIMBERDALE
-	tristate "Support for timberdale Video In/LogiWIN"
-	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && HAS_DMA
-	depends on (MFD_TIMBERDALE && TIMB_DMA) || COMPILE_TEST
-	select VIDEO_ADV7180
-	select VIDEOBUF_DMA_CONTIG
-	---help---
-	  Add support for the Video In peripherial of the timberdale FPGA.
-
-	  This driver is deprecated and will be removed soon unless someone
-	  will start the work to convert this driver to the vb2 framework.
diff --git a/drivers/staging/media/timb/Makefile b/drivers/staging/media/timb/Makefile
deleted file mode 100644
index 4c989c23a0e0..000000000000
--- a/drivers/staging/media/timb/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_VIDEO_TIMBERDALE)	+= timblogiw.o
diff --git a/drivers/staging/media/timb/timblogiw.c b/drivers/staging/media/timb/timblogiw.c
deleted file mode 100644
index 113c9f3c0b3e..000000000000
--- a/drivers/staging/media/timb/timblogiw.c
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- * timblogiw.c timberdale FPGA LogiWin Video In driver
- * Copyright (c) 2009-2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* Supports:
- * Timberdale FPGA LogiWin Video In
- */
-
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/dmaengine.h>
-#include <linux/scatterlist.h>
-#include <linux/interrupt.h>
-#include <linux/list.h>
-#include <linux/i2c.h>
-#include <linux/module.h>
-#include <media/v4l2-ioctl.h>
-#include <media/v4l2-device.h>
-#include <media/videobuf-dma-contig.h>
-#include <linux/platform_data/media/timb_video.h>
-
-#define DRIVER_NAME			"timb-video"
-
-#define TIMBLOGIWIN_NAME		"Timberdale Video-In"
-#define TIMBLOGIW_VERSION_CODE		0x04
-
-#define TIMBLOGIW_LINES_PER_DESC	44
-#define TIMBLOGIW_MAX_VIDEO_MEM		16
-
-#define TIMBLOGIW_HAS_DECODER(lw)	(lw->pdata.encoder.module_name)
-
-
-struct timblogiw {
-	struct video_device		video_dev;
-	struct v4l2_device		v4l2_dev; /* mutual exclusion */
-	struct mutex			lock;
-	struct device			*dev;
-	struct timb_video_platform_data pdata;
-	struct v4l2_subdev		*sd_enc;	/* encoder */
-	bool				opened;
-};
-
-struct timblogiw_tvnorm {
-	v4l2_std_id std;
-	u16     width;
-	u16     height;
-	u8	fps;
-};
-
-struct timblogiw_fh {
-	struct videobuf_queue		vb_vidq;
-	struct timblogiw_tvnorm const	*cur_norm;
-	struct list_head		capture;
-	struct dma_chan			*chan;
-	spinlock_t			queue_lock; /* mutual exclusion */
-	unsigned int			frame_count;
-};
-
-struct timblogiw_buffer {
-	/* common v4l buffer stuff -- must be first */
-	struct videobuf_buffer	vb;
-	struct scatterlist	sg[16];
-	dma_cookie_t		cookie;
-	struct timblogiw_fh	*fh;
-};
-
-static const struct timblogiw_tvnorm timblogiw_tvnorms[] = {
-	{
-		.std			= V4L2_STD_PAL,
-		.width			= 720,
-		.height			= 576,
-		.fps			= 25
-	},
-	{
-		.std			= V4L2_STD_NTSC,
-		.width			= 720,
-		.height			= 480,
-		.fps			= 30
-	}
-};
-
-static int timblogiw_bytes_per_line(const struct timblogiw_tvnorm *norm)
-{
-	return norm->width * 2;
-}
-
-
-static int timblogiw_frame_size(const struct timblogiw_tvnorm *norm)
-{
-	return norm->height * timblogiw_bytes_per_line(norm);
-}
-
-static const struct timblogiw_tvnorm *timblogiw_get_norm(const v4l2_std_id std)
-{
-	int i;
-	for (i = 0; i < ARRAY_SIZE(timblogiw_tvnorms); i++)
-		if (timblogiw_tvnorms[i].std & std)
-			return timblogiw_tvnorms + i;
-
-	/* default to first element */
-	return timblogiw_tvnorms;
-}
-
-static void timblogiw_dma_cb(void *data)
-{
-	struct timblogiw_buffer *buf = data;
-	struct timblogiw_fh *fh = buf->fh;
-	struct videobuf_buffer *vb = &buf->vb;
-
-	spin_lock(&fh->queue_lock);
-
-	/* mark the transfer done */
-	buf->cookie = -1;
-
-	fh->frame_count++;
-
-	if (vb->state != VIDEOBUF_ERROR) {
-		list_del(&vb->queue);
-		v4l2_get_timestamp(&vb->ts);
-		vb->field_count = fh->frame_count * 2;
-		vb->state = VIDEOBUF_DONE;
-
-		wake_up(&vb->done);
-	}
-
-	if (!list_empty(&fh->capture)) {
-		vb = list_entry(fh->capture.next, struct videobuf_buffer,
-			queue);
-		vb->state = VIDEOBUF_ACTIVE;
-	}
-
-	spin_unlock(&fh->queue_lock);
-}
-
-static bool timblogiw_dma_filter_fn(struct dma_chan *chan, void *filter_param)
-{
-	return chan->chan_id == (uintptr_t)filter_param;
-}
-
-/* IOCTL functions */
-
-static int timblogiw_g_fmt(struct file *file, void  *priv,
-	struct v4l2_format *format)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw *lw = video_get_drvdata(vdev);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s entry\n", __func__);
-
-	if (format->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		return -EINVAL;
-
-	mutex_lock(&lw->lock);
-
-	format->fmt.pix.width = fh->cur_norm->width;
-	format->fmt.pix.height = fh->cur_norm->height;
-	format->fmt.pix.pixelformat = V4L2_PIX_FMT_UYVY;
-	format->fmt.pix.bytesperline = timblogiw_bytes_per_line(fh->cur_norm);
-	format->fmt.pix.sizeimage = timblogiw_frame_size(fh->cur_norm);
-	format->fmt.pix.field = V4L2_FIELD_NONE;
-
-	mutex_unlock(&lw->lock);
-
-	return 0;
-}
-
-static int timblogiw_try_fmt(struct file *file, void  *priv,
-	struct v4l2_format *format)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct v4l2_pix_format *pix = &format->fmt.pix;
-
-	dev_dbg(&vdev->dev,
-		"%s - width=%d, height=%d, pixelformat=%d, field=%d\n"
-		"bytes per line %d, size image: %d, colorspace: %d\n",
-		__func__,
-		pix->width, pix->height, pix->pixelformat, pix->field,
-		pix->bytesperline, pix->sizeimage, pix->colorspace);
-
-	if (format->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		return -EINVAL;
-
-	if (pix->field != V4L2_FIELD_NONE)
-		return -EINVAL;
-
-	if (pix->pixelformat != V4L2_PIX_FMT_UYVY)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int timblogiw_s_fmt(struct file *file, void  *priv,
-	struct v4l2_format *format)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw *lw = video_get_drvdata(vdev);
-	struct timblogiw_fh *fh = priv;
-	struct v4l2_pix_format *pix = &format->fmt.pix;
-	int err;
-
-	mutex_lock(&lw->lock);
-
-	err = timblogiw_try_fmt(file, priv, format);
-	if (err)
-		goto out;
-
-	if (videobuf_queue_is_busy(&fh->vb_vidq)) {
-		dev_err(&vdev->dev, "%s queue busy\n", __func__);
-		err = -EBUSY;
-		goto out;
-	}
-
-	pix->width = fh->cur_norm->width;
-	pix->height = fh->cur_norm->height;
-
-out:
-	mutex_unlock(&lw->lock);
-	return err;
-}
-
-static int timblogiw_querycap(struct file *file, void  *priv,
-	struct v4l2_capability *cap)
-{
-	struct video_device *vdev = video_devdata(file);
-
-	dev_dbg(&vdev->dev, "%s: Entry\n",  __func__);
-	strncpy(cap->card, TIMBLOGIWIN_NAME, sizeof(cap->card)-1);
-	strncpy(cap->driver, DRIVER_NAME, sizeof(cap->driver) - 1);
-	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", vdev->name);
-	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING |
-		V4L2_CAP_READWRITE;
-	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
-
-	return 0;
-}
-
-static int timblogiw_enum_fmt(struct file *file, void  *priv,
-	struct v4l2_fmtdesc *fmt)
-{
-	struct video_device *vdev = video_devdata(file);
-
-	dev_dbg(&vdev->dev, "%s, index: %d\n",  __func__, fmt->index);
-
-	if (fmt->index != 0)
-		return -EINVAL;
-	memset(fmt, 0, sizeof(*fmt));
-	fmt->index = 0;
-	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	strncpy(fmt->description, "4:2:2, packed, YUYV",
-		sizeof(fmt->description)-1);
-	fmt->pixelformat = V4L2_PIX_FMT_UYVY;
-
-	return 0;
-}
-
-static int timblogiw_g_parm(struct file *file, void *priv,
-	struct v4l2_streamparm *sp)
-{
-	struct timblogiw_fh *fh = priv;
-	struct v4l2_captureparm *cp = &sp->parm.capture;
-
-	cp->capability = V4L2_CAP_TIMEPERFRAME;
-	cp->timeperframe.numerator = 1;
-	cp->timeperframe.denominator = fh->cur_norm->fps;
-
-	return 0;
-}
-
-static int timblogiw_reqbufs(struct file *file, void  *priv,
-	struct v4l2_requestbuffers *rb)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	return videobuf_reqbufs(&fh->vb_vidq, rb);
-}
-
-static int timblogiw_querybuf(struct file *file, void  *priv,
-	struct v4l2_buffer *b)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	return videobuf_querybuf(&fh->vb_vidq, b);
-}
-
-static int timblogiw_qbuf(struct file *file, void  *priv, struct v4l2_buffer *b)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	return videobuf_qbuf(&fh->vb_vidq, b);
-}
-
-static int timblogiw_dqbuf(struct file *file, void  *priv,
-	struct v4l2_buffer *b)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	return videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
-}
-
-static int timblogiw_g_std(struct file *file, void  *priv, v4l2_std_id *std)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	*std = fh->cur_norm->std;
-	return 0;
-}
-
-static int timblogiw_s_std(struct file *file, void  *priv, v4l2_std_id std)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw *lw = video_get_drvdata(vdev);
-	struct timblogiw_fh *fh = priv;
-	int err = 0;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	mutex_lock(&lw->lock);
-
-	if (TIMBLOGIW_HAS_DECODER(lw))
-		err = v4l2_subdev_call(lw->sd_enc, video, s_std, std);
-
-	if (!err)
-		fh->cur_norm = timblogiw_get_norm(std);
-
-	mutex_unlock(&lw->lock);
-
-	return err;
-}
-
-static int timblogiw_enuminput(struct file *file, void  *priv,
-	struct v4l2_input *inp)
-{
-	struct video_device *vdev = video_devdata(file);
-	int i;
-
-	dev_dbg(&vdev->dev, "%s: Entry\n",  __func__);
-
-	if (inp->index != 0)
-		return -EINVAL;
-
-	inp->index = 0;
-
-	strncpy(inp->name, "Timb input 1", sizeof(inp->name) - 1);
-	inp->type = V4L2_INPUT_TYPE_CAMERA;
-
-	inp->std = 0;
-	for (i = 0; i < ARRAY_SIZE(timblogiw_tvnorms); i++)
-		inp->std |= timblogiw_tvnorms[i].std;
-
-	return 0;
-}
-
-static int timblogiw_g_input(struct file *file, void  *priv,
-	unsigned int *input)
-{
-	struct video_device *vdev = video_devdata(file);
-
-	dev_dbg(&vdev->dev, "%s: Entry\n",  __func__);
-
-	*input = 0;
-
-	return 0;
-}
-
-static int timblogiw_s_input(struct file *file, void  *priv, unsigned int input)
-{
-	struct video_device *vdev = video_devdata(file);
-
-	dev_dbg(&vdev->dev, "%s: Entry\n",  __func__);
-
-	if (input != 0)
-		return -EINVAL;
-	return 0;
-}
-
-static int timblogiw_streamon(struct file *file, void  *priv, enum v4l2_buf_type type)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	if (type != V4L2_BUF_TYPE_VIDEO_CAPTURE) {
-		dev_dbg(&vdev->dev, "%s - No capture device\n", __func__);
-		return -EINVAL;
-	}
-
-	fh->frame_count = 0;
-	return videobuf_streamon(&fh->vb_vidq);
-}
-
-static int timblogiw_streamoff(struct file *file, void  *priv,
-	enum v4l2_buf_type type)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s entry\n",  __func__);
-
-	if (type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		return -EINVAL;
-
-	return videobuf_streamoff(&fh->vb_vidq);
-}
-
-static int timblogiw_querystd(struct file *file, void  *priv, v4l2_std_id *std)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw *lw = video_get_drvdata(vdev);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s entry\n",  __func__);
-
-	if (TIMBLOGIW_HAS_DECODER(lw))
-		return v4l2_subdev_call(lw->sd_enc, video, querystd, std);
-	else {
-		*std = fh->cur_norm->std;
-		return 0;
-	}
-}
-
-static int timblogiw_enum_framesizes(struct file *file, void  *priv,
-	struct v4l2_frmsizeenum *fsize)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = priv;
-
-	dev_dbg(&vdev->dev, "%s - index: %d, format: %d\n",  __func__,
-		fsize->index, fsize->pixel_format);
-
-	if ((fsize->index != 0) ||
-		(fsize->pixel_format != V4L2_PIX_FMT_UYVY))
-		return -EINVAL;
-
-	fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
-	fsize->discrete.width = fh->cur_norm->width;
-	fsize->discrete.height = fh->cur_norm->height;
-
-	return 0;
-}
-
-/* Video buffer functions */
-
-static int buffer_setup(struct videobuf_queue *vq, unsigned int *count,
-	unsigned int *size)
-{
-	struct timblogiw_fh *fh = vq->priv_data;
-
-	*size = timblogiw_frame_size(fh->cur_norm);
-
-	if (!*count)
-		*count = 32;
-
-	while (*size * *count > TIMBLOGIW_MAX_VIDEO_MEM * 1024 * 1024)
-		(*count)--;
-
-	return 0;
-}
-
-static int buffer_prepare(struct videobuf_queue *vq, struct videobuf_buffer *vb,
-	enum v4l2_field field)
-{
-	struct timblogiw_fh *fh = vq->priv_data;
-	struct timblogiw_buffer *buf = container_of(vb, struct timblogiw_buffer,
-		vb);
-	unsigned int data_size = timblogiw_frame_size(fh->cur_norm);
-	int err = 0;
-
-	if (vb->baddr && vb->bsize < data_size)
-		/* User provided buffer, but it is too small */
-		return -ENOMEM;
-
-	vb->size = data_size;
-	vb->width = fh->cur_norm->width;
-	vb->height = fh->cur_norm->height;
-	vb->field = field;
-
-	if (vb->state == VIDEOBUF_NEEDS_INIT) {
-		int i;
-		unsigned int size;
-		unsigned int bytes_per_desc = TIMBLOGIW_LINES_PER_DESC *
-			timblogiw_bytes_per_line(fh->cur_norm);
-		dma_addr_t addr;
-
-		sg_init_table(buf->sg, ARRAY_SIZE(buf->sg));
-
-		err = videobuf_iolock(vq, vb, NULL);
-		if (err)
-			goto err;
-
-		addr = videobuf_to_dma_contig(vb);
-		for (i = 0, size = 0; size < data_size; i++) {
-			sg_dma_address(buf->sg + i) = addr + size;
-			size += bytes_per_desc;
-			sg_dma_len(buf->sg + i) = (size > data_size) ?
-				(bytes_per_desc - (size - data_size)) :
-				bytes_per_desc;
-		}
-
-		vb->state = VIDEOBUF_PREPARED;
-		buf->cookie = -1;
-		buf->fh = fh;
-	}
-
-	return 0;
-
-err:
-	videobuf_dma_contig_free(vq, vb);
-	vb->state = VIDEOBUF_NEEDS_INIT;
-	return err;
-}
-
-static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
-{
-	struct timblogiw_fh *fh = vq->priv_data;
-	struct timblogiw_buffer *buf = container_of(vb, struct timblogiw_buffer,
-		vb);
-	struct dma_async_tx_descriptor *desc;
-	int sg_elems;
-	int bytes_per_desc = TIMBLOGIW_LINES_PER_DESC *
-		timblogiw_bytes_per_line(fh->cur_norm);
-
-	sg_elems = timblogiw_frame_size(fh->cur_norm) / bytes_per_desc;
-	sg_elems +=
-		(timblogiw_frame_size(fh->cur_norm) % bytes_per_desc) ? 1 : 0;
-
-	if (list_empty(&fh->capture))
-		vb->state = VIDEOBUF_ACTIVE;
-	else
-		vb->state = VIDEOBUF_QUEUED;
-
-	list_add_tail(&vb->queue, &fh->capture);
-
-	spin_unlock_irq(&fh->queue_lock);
-
-	desc = dmaengine_prep_slave_sg(fh->chan,
-		buf->sg, sg_elems, DMA_DEV_TO_MEM,
-		DMA_PREP_INTERRUPT);
-	if (!desc) {
-		spin_lock_irq(&fh->queue_lock);
-		list_del_init(&vb->queue);
-		vb->state = VIDEOBUF_PREPARED;
-		return;
-	}
-
-	desc->callback_param = buf;
-	desc->callback = timblogiw_dma_cb;
-
-	buf->cookie = desc->tx_submit(desc);
-
-	spin_lock_irq(&fh->queue_lock);
-}
-
-static void buffer_release(struct videobuf_queue *vq,
-	struct videobuf_buffer *vb)
-{
-	struct timblogiw_fh *fh = vq->priv_data;
-	struct timblogiw_buffer *buf = container_of(vb, struct timblogiw_buffer,
-		vb);
-
-	videobuf_waiton(vq, vb, 0, 0);
-	if (buf->cookie >= 0)
-		dma_sync_wait(fh->chan, buf->cookie);
-
-	videobuf_dma_contig_free(vq, vb);
-	vb->state = VIDEOBUF_NEEDS_INIT;
-}
-
-static struct videobuf_queue_ops timblogiw_video_qops = {
-	.buf_setup      = buffer_setup,
-	.buf_prepare    = buffer_prepare,
-	.buf_queue      = buffer_queue,
-	.buf_release    = buffer_release,
-};
-
-/* Device Operations functions */
-
-static int timblogiw_open(struct file *file)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw *lw = video_get_drvdata(vdev);
-	struct timblogiw_fh *fh;
-	v4l2_std_id std;
-	dma_cap_mask_t mask;
-	int err = 0;
-
-	dev_dbg(&vdev->dev, "%s: entry\n", __func__);
-
-	mutex_lock(&lw->lock);
-	if (lw->opened) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	if (TIMBLOGIW_HAS_DECODER(lw) && !lw->sd_enc) {
-		struct i2c_adapter *adapt;
-
-		/* find the video decoder */
-		adapt = i2c_get_adapter(lw->pdata.i2c_adapter);
-		if (!adapt) {
-			dev_err(&vdev->dev, "No I2C bus #%d\n",
-				lw->pdata.i2c_adapter);
-			err = -ENODEV;
-			goto out;
-		}
-
-		/* now find the encoder */
-		lw->sd_enc = v4l2_i2c_new_subdev_board(&lw->v4l2_dev, adapt,
-			lw->pdata.encoder.info, NULL);
-
-		i2c_put_adapter(adapt);
-
-		if (!lw->sd_enc) {
-			dev_err(&vdev->dev, "Failed to get encoder: %s\n",
-				lw->pdata.encoder.module_name);
-			err = -ENODEV;
-			goto out;
-		}
-	}
-
-	fh = kzalloc(sizeof(*fh), GFP_KERNEL);
-	if (!fh) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	fh->cur_norm = timblogiw_tvnorms;
-	timblogiw_querystd(file, fh, &std);
-	fh->cur_norm = timblogiw_get_norm(std);
-
-	INIT_LIST_HEAD(&fh->capture);
-	spin_lock_init(&fh->queue_lock);
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-	dma_cap_set(DMA_PRIVATE, mask);
-
-	/* find the DMA channel */
-	fh->chan = dma_request_channel(mask, timblogiw_dma_filter_fn,
-			(void *)(uintptr_t)lw->pdata.dma_channel);
-	if (!fh->chan) {
-		dev_err(&vdev->dev, "Failed to get DMA channel\n");
-		kfree(fh);
-		err = -ENODEV;
-		goto out;
-	}
-
-	file->private_data = fh;
-	videobuf_queue_dma_contig_init(&fh->vb_vidq,
-		&timblogiw_video_qops, lw->dev, &fh->queue_lock,
-		V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
-		sizeof(struct timblogiw_buffer), fh, NULL);
-
-	lw->opened = true;
-out:
-	mutex_unlock(&lw->lock);
-
-	return err;
-}
-
-static int timblogiw_close(struct file *file)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw *lw = video_get_drvdata(vdev);
-	struct timblogiw_fh *fh = file->private_data;
-
-	dev_dbg(&vdev->dev, "%s: Entry\n",  __func__);
-
-	videobuf_stop(&fh->vb_vidq);
-	videobuf_mmap_free(&fh->vb_vidq);
-
-	dma_release_channel(fh->chan);
-
-	kfree(fh);
-
-	mutex_lock(&lw->lock);
-	lw->opened = false;
-	mutex_unlock(&lw->lock);
-	return 0;
-}
-
-static ssize_t timblogiw_read(struct file *file, char __user *data,
-	size_t count, loff_t *ppos)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = file->private_data;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	return videobuf_read_stream(&fh->vb_vidq, data, count, ppos, 0,
-		file->f_flags & O_NONBLOCK);
-}
-
-static unsigned int timblogiw_poll(struct file *file,
-	struct poll_table_struct *wait)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = file->private_data;
-
-	dev_dbg(&vdev->dev, "%s: entry\n",  __func__);
-
-	return videobuf_poll_stream(file, &fh->vb_vidq, wait);
-}
-
-static int timblogiw_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct timblogiw_fh *fh = file->private_data;
-
-	dev_dbg(&vdev->dev, "%s: entry\n", __func__);
-
-	return videobuf_mmap_mapper(&fh->vb_vidq, vma);
-}
-
-/* Platform device functions */
-
-static struct v4l2_ioctl_ops timblogiw_ioctl_ops = {
-	.vidioc_querycap		= timblogiw_querycap,
-	.vidioc_enum_fmt_vid_cap	= timblogiw_enum_fmt,
-	.vidioc_g_fmt_vid_cap		= timblogiw_g_fmt,
-	.vidioc_try_fmt_vid_cap		= timblogiw_try_fmt,
-	.vidioc_s_fmt_vid_cap		= timblogiw_s_fmt,
-	.vidioc_g_parm			= timblogiw_g_parm,
-	.vidioc_reqbufs			= timblogiw_reqbufs,
-	.vidioc_querybuf		= timblogiw_querybuf,
-	.vidioc_qbuf			= timblogiw_qbuf,
-	.vidioc_dqbuf			= timblogiw_dqbuf,
-	.vidioc_g_std			= timblogiw_g_std,
-	.vidioc_s_std			= timblogiw_s_std,
-	.vidioc_enum_input		= timblogiw_enuminput,
-	.vidioc_g_input			= timblogiw_g_input,
-	.vidioc_s_input			= timblogiw_s_input,
-	.vidioc_streamon		= timblogiw_streamon,
-	.vidioc_streamoff		= timblogiw_streamoff,
-	.vidioc_querystd		= timblogiw_querystd,
-	.vidioc_enum_framesizes		= timblogiw_enum_framesizes,
-};
-
-static struct v4l2_file_operations timblogiw_fops = {
-	.owner		= THIS_MODULE,
-	.open		= timblogiw_open,
-	.release	= timblogiw_close,
-	.unlocked_ioctl		= video_ioctl2, /* V4L2 ioctl handler */
-	.mmap		= timblogiw_mmap,
-	.read		= timblogiw_read,
-	.poll		= timblogiw_poll,
-};
-
-static struct video_device timblogiw_template = {
-	.name		= TIMBLOGIWIN_NAME,
-	.fops		= &timblogiw_fops,
-	.ioctl_ops	= &timblogiw_ioctl_ops,
-	.release	= video_device_release_empty,
-	.minor		= -1,
-	.tvnorms	= V4L2_STD_PAL | V4L2_STD_NTSC
-};
-
-static int timblogiw_probe(struct platform_device *pdev)
-{
-	int err;
-	struct timblogiw *lw = NULL;
-	struct timb_video_platform_data *pdata = pdev->dev.platform_data;
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "No platform data\n");
-		err = -EINVAL;
-		goto err;
-	}
-
-	if (!pdata->encoder.module_name)
-		dev_info(&pdev->dev, "Running without decoder\n");
-
-	lw = devm_kzalloc(&pdev->dev, sizeof(*lw), GFP_KERNEL);
-	if (!lw) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	if (pdev->dev.parent)
-		lw->dev = pdev->dev.parent;
-	else
-		lw->dev = &pdev->dev;
-
-	memcpy(&lw->pdata, pdata, sizeof(lw->pdata));
-
-	mutex_init(&lw->lock);
-
-	lw->video_dev = timblogiw_template;
-
-	strlcpy(lw->v4l2_dev.name, DRIVER_NAME, sizeof(lw->v4l2_dev.name));
-	err = v4l2_device_register(NULL, &lw->v4l2_dev);
-	if (err)
-		goto err;
-
-	lw->video_dev.v4l2_dev = &lw->v4l2_dev;
-
-	platform_set_drvdata(pdev, lw);
-	video_set_drvdata(&lw->video_dev, lw);
-
-	err = video_register_device(&lw->video_dev, VFL_TYPE_GRABBER, 0);
-	if (err) {
-		dev_err(&pdev->dev, "Error reg video: %d\n", err);
-		goto err_request;
-	}
-
-	return 0;
-
-err_request:
-	v4l2_device_unregister(&lw->v4l2_dev);
-err:
-	dev_err(&pdev->dev, "Failed to register: %d\n", err);
-
-	return err;
-}
-
-static int timblogiw_remove(struct platform_device *pdev)
-{
-	struct timblogiw *lw = platform_get_drvdata(pdev);
-
-	video_unregister_device(&lw->video_dev);
-
-	v4l2_device_unregister(&lw->v4l2_dev);
-
-	return 0;
-}
-
-static struct platform_driver timblogiw_platform_driver = {
-	.driver = {
-		.name	= DRIVER_NAME,
-	},
-	.probe		= timblogiw_probe,
-	.remove		= timblogiw_remove,
-};
-
-module_platform_driver(timblogiw_platform_driver);
-
-MODULE_DESCRIPTION(TIMBLOGIWIN_NAME);
-MODULE_AUTHOR("Pelagicore AB <info@pelagicore.com>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:"DRIVER_NAME);
diff --git a/drivers/staging/media/tw686x-kh/tw686x-kh-video.c b/drivers/staging/media/tw686x-kh/tw686x-kh-video.c
index 6ecb504a79f9..9bf32aec2fc6 100644
--- a/drivers/staging/media/tw686x-kh/tw686x-kh-video.c
+++ b/drivers/staging/media/tw686x-kh/tw686x-kh-video.c
@@ -130,12 +130,11 @@ static void tw686x_get_format(struct tw686x_video_channel *vc,
 
 static int tw686x_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers,
 			      unsigned int *nplanes, unsigned int sizes[],
-			      void *alloc_ctxs[])
+			      struct device *alloc_devs[])
 {
 	struct tw686x_video_channel *vc = vb2_get_drv_priv(vq);
 	unsigned int size = vc->width * vc->height * vc->format->depth / 8;
 
-	alloc_ctxs[0] = vc->alloc_ctx;
 	if (*nbuffers < 2)
 		*nbuffers = 2;
 
@@ -645,7 +644,6 @@ void tw686x_kh_video_free(struct tw686x_dev *dev)
 		v4l2_ctrl_handler_free(&vc->ctrl_handler);
 		if (vc->device)
 			video_unregister_device(vc->device);
-		vb2_dma_sg_cleanup_ctx(vc->alloc_ctx);
 		for (n = 0; n < 2; n++) {
 			struct dma_desc *descs = &vc->sg_tables[n];
 
@@ -750,13 +748,6 @@ int tw686x_kh_video_init(struct tw686x_dev *dev)
 			goto error;
 		}
 
-		vc->alloc_ctx = vb2_dma_sg_init_ctx(&dev->pci_dev->dev);
-		if (IS_ERR(vc->alloc_ctx)) {
-			pr_warn("Unable to initialize DMA scatter-gather context\n");
-			err = PTR_ERR(vc->alloc_ctx);
-			goto error;
-		}
-
 		vc->vidq.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 		vc->vidq.io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
 		vc->vidq.drv_priv = vc;
@@ -766,6 +757,7 @@ int tw686x_kh_video_init(struct tw686x_dev *dev)
 		vc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 		vc->vidq.min_buffers_needed = 2;
 		vc->vidq.lock = &vc->vb_mutex;
+		vc->vidq.dev = &dev->pci_dev->dev;
 		vc->vidq.gfp_flags = GFP_DMA32;
 
 		err = vb2_queue_init(&vc->vidq);
diff --git a/drivers/staging/media/tw686x-kh/tw686x-kh.h b/drivers/staging/media/tw686x-kh/tw686x-kh.h
index dc257967dbc7..6284a90d6fe3 100644
--- a/drivers/staging/media/tw686x-kh/tw686x-kh.h
+++ b/drivers/staging/media/tw686x-kh/tw686x-kh.h
@@ -56,7 +56,6 @@ struct tw686x_video_channel {
 	struct video_device *device;
 	struct dma_desc sg_tables[2];
 	struct tw686x_vb2_buf *curr_bufs[2];
-	void *alloc_ctx;
 	struct vdma_desc *sg_descs[2];
 
 	struct v4l2_ctrl_handler ctrl_handler;
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
index 68931e5ecd8f..09e9499b7f9d 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
@@ -1799,8 +1799,8 @@ struct ieee80211_device {
 	short scanning;
 	short proto_started;
 
-	struct semaphore wx_sem;
-	struct semaphore scan_sem;
+	struct mutex wx_mutex;
+	struct mutex scan_mutex;
 
 	spinlock_t mgmt_tx_lock;
 	spinlock_t beacon_lock;
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
index d705595766a9..49db1b75cd05 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
@@ -427,7 +427,7 @@ void ieee80211_softmac_scan_syncro(struct ieee80211_device *ieee)
 	short ch = 0;
 	u8 channel_map[MAX_CHANNEL_NUMBER+1];
 	memcpy(channel_map, GET_DOT11D_INFO(ieee)->channel_map, MAX_CHANNEL_NUMBER+1);
-	down(&ieee->scan_sem);
+	mutex_lock(&ieee->scan_mutex);
 
 	while(1)
 	{
@@ -475,13 +475,13 @@ void ieee80211_softmac_scan_syncro(struct ieee80211_device *ieee)
 out:
 	if(ieee->state < IEEE80211_LINKED){
 		ieee->actscanning = false;
-		up(&ieee->scan_sem);
+		mutex_unlock(&ieee->scan_mutex);
 	}
 	else{
 	ieee->sync_scan_hurryup = 0;
 	if(IS_DOT11D_ENABLE(ieee))
 		DOT11D_ScanComplete(ieee);
-	up(&ieee->scan_sem);
+	mutex_unlock(&ieee->scan_mutex);
 }
 }
 EXPORT_SYMBOL(ieee80211_softmac_scan_syncro);
@@ -495,7 +495,7 @@ static void ieee80211_softmac_scan_wq(struct work_struct *work)
 	memcpy(channel_map, GET_DOT11D_INFO(ieee)->channel_map, MAX_CHANNEL_NUMBER+1);
 	if(!ieee->ieee_up)
 		return;
-	down(&ieee->scan_sem);
+	mutex_lock(&ieee->scan_mutex);
 	do{
 		ieee->current_network.channel =
 			(ieee->current_network.channel + 1) % MAX_CHANNEL_NUMBER;
@@ -517,7 +517,7 @@ static void ieee80211_softmac_scan_wq(struct work_struct *work)
 
 	schedule_delayed_work(&ieee->softmac_scan_wq, IEEE80211_SOFTMAC_SCAN_TIME);
 
-	up(&ieee->scan_sem);
+	mutex_unlock(&ieee->scan_mutex);
 	return;
 out:
 	if(IS_DOT11D_ENABLE(ieee))
@@ -525,7 +525,7 @@ out:
 	ieee->actscanning = false;
 	watchdog = 0;
 	ieee->scanning = 0;
-	up(&ieee->scan_sem);
+	mutex_unlock(&ieee->scan_mutex);
 }
 
 
@@ -579,7 +579,7 @@ static void ieee80211_softmac_stop_scan(struct ieee80211_device *ieee)
 
 	//ieee->sync_scan_hurryup = 1;
 
-	down(&ieee->scan_sem);
+	mutex_lock(&ieee->scan_mutex);
 //	spin_lock_irqsave(&ieee->lock, flags);
 
 	if (ieee->scanning == 1) {
@@ -589,7 +589,7 @@ static void ieee80211_softmac_stop_scan(struct ieee80211_device *ieee)
 	}
 
 //	spin_unlock_irqrestore(&ieee->lock, flags);
-	up(&ieee->scan_sem);
+	mutex_unlock(&ieee->scan_mutex);
 }
 
 void ieee80211_stop_scan(struct ieee80211_device *ieee)
@@ -621,7 +621,7 @@ static void ieee80211_start_scan(struct ieee80211_device *ieee)
 
 }
 
-/* called with wx_sem held */
+/* called with wx_mutex held */
 void ieee80211_start_scan_syncro(struct ieee80211_device *ieee)
 {
 	if (IS_DOT11D_ENABLE(ieee) )
@@ -1389,7 +1389,7 @@ static void ieee80211_associate_procedure_wq(struct work_struct *work)
 {
 	struct ieee80211_device *ieee = container_of(work, struct ieee80211_device, associate_procedure_wq);
 	ieee->sync_scan_hurryup = 1;
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	if (ieee->data_hard_stop)
 		ieee->data_hard_stop(ieee->dev);
@@ -1402,7 +1402,7 @@ static void ieee80211_associate_procedure_wq(struct work_struct *work)
 	ieee->associate_seq = 1;
 	ieee80211_associate_step1(ieee);
 
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 }
 
 inline void ieee80211_softmac_new_net(struct ieee80211_device *ieee, struct ieee80211_network *net)
@@ -2331,7 +2331,7 @@ static void ieee80211_start_ibss_wq(struct work_struct *work)
 	struct ieee80211_device *ieee = container_of(dwork, struct ieee80211_device, start_ibss_wq);
 	/* iwconfig mode ad-hoc will schedule this and return
 	 * on the other hand this will block further iwconfig SET
-	 * operations because of the wx_sem hold.
+	 * operations because of the wx_mutex hold.
 	 * Anyway some most set operations set a flag to speed-up
 	 * (abort) this wq (when syncro scanning) before sleeping
 	 * on the semaphore
@@ -2340,7 +2340,7 @@ static void ieee80211_start_ibss_wq(struct work_struct *work)
 		printk("==========oh driver down return\n");
 		return;
 	}
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	if (ieee->current_network.ssid_len == 0) {
 		strcpy(ieee->current_network.ssid, IEEE80211_DEFAULT_TX_ESSID);
@@ -2431,7 +2431,7 @@ static void ieee80211_start_ibss_wq(struct work_struct *work)
 		ieee->data_hard_resume(ieee->dev);
 	netif_carrier_on(ieee->dev);
 
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 }
 
 inline void ieee80211_start_ibss(struct ieee80211_device *ieee)
@@ -2439,7 +2439,7 @@ inline void ieee80211_start_ibss(struct ieee80211_device *ieee)
 	schedule_delayed_work(&ieee->start_ibss_wq, 150);
 }
 
-/* this is called only in user context, with wx_sem held */
+/* this is called only in user context, with wx_mutex held */
 void ieee80211_start_bss(struct ieee80211_device *ieee)
 {
 	unsigned long flags;
@@ -2505,7 +2505,7 @@ static void ieee80211_associate_retry_wq(struct work_struct *work)
 	struct ieee80211_device *ieee = container_of(dwork, struct ieee80211_device, associate_retry_wq);
 	unsigned long flags;
 
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 	if(!ieee->proto_started)
 		goto exit;
 
@@ -2537,7 +2537,7 @@ static void ieee80211_associate_retry_wq(struct work_struct *work)
 	spin_unlock_irqrestore(&ieee->lock, flags);
 
 exit:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 }
 
 struct sk_buff *ieee80211_get_beacon_(struct ieee80211_device *ieee)
@@ -2583,9 +2583,9 @@ EXPORT_SYMBOL(ieee80211_get_beacon);
 void ieee80211_softmac_stop_protocol(struct ieee80211_device *ieee)
 {
 	ieee->sync_scan_hurryup = 1;
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 	ieee80211_stop_protocol(ieee);
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 }
 EXPORT_SYMBOL(ieee80211_softmac_stop_protocol);
 
@@ -2609,9 +2609,9 @@ void ieee80211_stop_protocol(struct ieee80211_device *ieee)
 void ieee80211_softmac_start_protocol(struct ieee80211_device *ieee)
 {
 	ieee->sync_scan_hurryup = 0;
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 	ieee80211_start_protocol(ieee);
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 }
 EXPORT_SYMBOL(ieee80211_softmac_start_protocol);
 
@@ -2728,8 +2728,8 @@ void ieee80211_softmac_init(struct ieee80211_device *ieee)
 	INIT_WORK(&ieee->wx_sync_scan_wq, ieee80211_wx_sync_scan_wq);
 
 
-	sema_init(&ieee->wx_sem, 1);
-	sema_init(&ieee->scan_sem, 1);
+	mutex_init(&ieee->wx_mutex);
+	mutex_init(&ieee->scan_mutex);
 
 	spin_lock_init(&ieee->mgmt_tx_lock);
 	spin_lock_init(&ieee->beacon_lock);
@@ -2742,14 +2742,14 @@ void ieee80211_softmac_init(struct ieee80211_device *ieee)
 
 void ieee80211_softmac_free(struct ieee80211_device *ieee)
 {
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 	kfree(ieee->pDot11dInfo);
 	ieee->pDot11dInfo = NULL;
 	del_timer_sync(&ieee->associate_timer);
 
 	cancel_delayed_work(&ieee->associate_retry_wq);
 
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 }
 
 /********************************************************
@@ -3138,7 +3138,7 @@ int ieee80211_wpa_supplicant_ioctl(struct ieee80211_device *ieee, struct iw_poin
 	struct ieee_param *param;
 	int ret=0;
 
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 	//IEEE_DEBUG_INFO("wpa_supplicant: len=%d\n", p->length);
 
 	if (p->length < sizeof(struct ieee_param) || !p->pointer) {
@@ -3183,7 +3183,7 @@ int ieee80211_wpa_supplicant_ioctl(struct ieee80211_device *ieee, struct iw_poin
 
 	kfree(param);
 out:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 
 	return ret;
 }
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac_wx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac_wx.c
index aad288a1f9e3..21bd0dc40888 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac_wx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac_wx.c
@@ -34,7 +34,7 @@ int ieee80211_wx_set_freq(struct ieee80211_device *ieee, struct iw_request_info
 	int ret;
 	struct iw_freq *fwrq = &wrqu->freq;
 
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	if (ieee->iw_mode == IW_MODE_INFRA) {
 		ret = -EOPNOTSUPP;
@@ -79,7 +79,7 @@ int ieee80211_wx_set_freq(struct ieee80211_device *ieee, struct iw_request_info
 
 	ret = 0;
 out:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 	return ret;
 }
 EXPORT_SYMBOL(ieee80211_wx_set_freq);
@@ -145,7 +145,7 @@ int ieee80211_wx_set_wap(struct ieee80211_device *ieee,
 
 	ieee->sync_scan_hurryup = 1;
 
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 	/* use ifconfig hw ether */
 	if (ieee->iw_mode == IW_MODE_MASTER) {
 		ret = -1;
@@ -173,7 +173,7 @@ int ieee80211_wx_set_wap(struct ieee80211_device *ieee,
 	if (ifup)
 		ieee80211_start_protocol(ieee);
 out:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 	return ret;
 }
 EXPORT_SYMBOL(ieee80211_wx_set_wap);
@@ -274,7 +274,7 @@ int ieee80211_wx_set_mode(struct ieee80211_device *ieee, struct iw_request_info
 
 	ieee->sync_scan_hurryup = 1;
 
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	if (wrqu->mode == ieee->iw_mode)
 		goto out;
@@ -293,7 +293,7 @@ int ieee80211_wx_set_mode(struct ieee80211_device *ieee, struct iw_request_info
 	}
 
 out:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 	return 0;
 }
 EXPORT_SYMBOL(ieee80211_wx_set_mode);
@@ -353,7 +353,7 @@ void ieee80211_wx_sync_scan_wq(struct work_struct *work)
 		ieee80211_start_send_beacons(ieee);
 
 	netif_carrier_on(ieee->dev);
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 
 }
 
@@ -362,7 +362,7 @@ int ieee80211_wx_set_scan(struct ieee80211_device *ieee, struct iw_request_info
 {
 	int ret = 0;
 
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	if (ieee->iw_mode == IW_MODE_MONITOR || !(ieee->proto_started)) {
 		ret = -1;
@@ -376,7 +376,7 @@ int ieee80211_wx_set_scan(struct ieee80211_device *ieee, struct iw_request_info
 	}
 
 out:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 	return ret;
 }
 EXPORT_SYMBOL(ieee80211_wx_set_scan);
@@ -391,7 +391,7 @@ int ieee80211_wx_set_essid(struct ieee80211_device *ieee,
 	unsigned long flags;
 
 	ieee->sync_scan_hurryup = 1;
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	proto_started = ieee->proto_started;
 
@@ -430,7 +430,7 @@ int ieee80211_wx_set_essid(struct ieee80211_device *ieee,
 	if (proto_started)
 		ieee80211_start_protocol(ieee);
 out:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 	return ret;
 }
 EXPORT_SYMBOL(ieee80211_wx_set_essid);
@@ -453,7 +453,7 @@ int ieee80211_wx_set_rawtx(struct ieee80211_device *ieee,
 	int enable = (parms[0] > 0);
 	short prev = ieee->raw_tx;
 
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	if (enable)
 		ieee->raw_tx = 1;
@@ -475,7 +475,7 @@ int ieee80211_wx_set_rawtx(struct ieee80211_device *ieee,
 			netif_carrier_off(ieee->dev);
 	}
 
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 
 	return 0;
 }
@@ -514,7 +514,7 @@ int ieee80211_wx_set_power(struct ieee80211_device *ieee,
 {
 	int ret = 0;
 
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	if (wrqu->power.disabled) {
 		ieee->ps = IEEE80211_PS_DISABLED;
@@ -553,7 +553,7 @@ int ieee80211_wx_set_power(struct ieee80211_device *ieee,
 
 	}
 exit:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 	return ret;
 
 }
@@ -564,7 +564,7 @@ int ieee80211_wx_get_power(struct ieee80211_device *ieee,
 				 struct iw_request_info *info,
 				 union iwreq_data *wrqu, char *extra)
 {
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 
 	if (ieee->ps == IEEE80211_PS_DISABLED) {
 		wrqu->power.disabled = 1;
@@ -592,7 +592,7 @@ int ieee80211_wx_get_power(struct ieee80211_device *ieee,
 		wrqu->power.flags |= IW_POWER_UNICAST_R;
 
 exit:
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 	return 0;
 
 }
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_wx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_wx.c
index 208be5fc527a..563d7fed6e1c 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_wx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_wx.c
@@ -253,7 +253,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 	int i = 0;
 	int err = 0;
 	IEEE80211_DEBUG_WX("Getting scan\n");
-	down(&ieee->wx_sem);
+	mutex_lock(&ieee->wx_mutex);
 	spin_lock_irqsave(&ieee->lock, flags);
 
 	list_for_each_entry(network, &ieee->network_list, list) {
@@ -262,7 +262,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 		{
 			err = -E2BIG;
 			break;
-												}
+		}
 		if (ieee->scan_age == 0 ||
 		    time_after(network->last_scanned + ieee->scan_age, jiffies))
 			ev = rtl819x_translate_scan(ieee, ev, stop, network, info);
@@ -277,7 +277,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 	}
 
 	spin_unlock_irqrestore(&ieee->lock, flags);
-	up(&ieee->wx_sem);
+	mutex_unlock(&ieee->wx_mutex);
 	wrqu->data.length = ev -  extra;
 	wrqu->data.flags = 0;
 
diff --git a/drivers/staging/rtl8192u/r8180_93cx6.c b/drivers/staging/rtl8192u/r8180_93cx6.c
index 97d9b3f49114..f35defc36fd9 100644
--- a/drivers/staging/rtl8192u/r8180_93cx6.c
+++ b/drivers/staging/rtl8192u/r8180_93cx6.c
@@ -23,8 +23,11 @@
 static void eprom_cs(struct net_device *dev, short bit)
 {
 	u8 cmdreg;
+	int err;
 
-	read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+	err = read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+	if (err)
+		return;
 	if (bit)
 		/* enable EPROM */
 		write_nic_byte_E(dev, EPROM_CMD, cmdreg | EPROM_CS_BIT);
@@ -40,8 +43,11 @@ static void eprom_cs(struct net_device *dev, short bit)
 static void eprom_ck_cycle(struct net_device *dev)
 {
 	u8 cmdreg;
+	int err;
 
-	read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+	err = read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+	if (err)
+		return;
 	write_nic_byte_E(dev, EPROM_CMD, cmdreg | EPROM_CK_BIT);
 	force_pci_posting(dev);
 	udelay(EPROM_DELAY);
@@ -56,8 +62,11 @@ static void eprom_ck_cycle(struct net_device *dev)
 static void eprom_w(struct net_device *dev, short bit)
 {
 	u8 cmdreg;
+	int err;
 
-	read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+	err = read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+	if (err)
+		return;
 	if (bit)
 		write_nic_byte_E(dev, EPROM_CMD, cmdreg | EPROM_W_BIT);
 	else
@@ -71,8 +80,12 @@ static void eprom_w(struct net_device *dev, short bit)
 static short eprom_r(struct net_device *dev)
 {
 	u8 bit;
+	int err;
+
+	err = read_nic_byte_E(dev, EPROM_CMD, &bit);
+	if (err)
+		return err;
 
-	read_nic_byte_E(dev, EPROM_CMD, &bit);
 	udelay(EPROM_DELAY);
 
 	if (bit & EPROM_R_BIT)
@@ -93,7 +106,7 @@ static void eprom_send_bits_string(struct net_device *dev, short b[], int len)
 }
 
 
-u32 eprom_read(struct net_device *dev, u32 addr)
+int eprom_read(struct net_device *dev, u32 addr)
 {
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	short read_cmd[] = {1, 1, 0};
@@ -101,6 +114,7 @@ u32 eprom_read(struct net_device *dev, u32 addr)
 	int i;
 	int addr_len;
 	u32 ret;
+	int err;
 
 	ret = 0;
 	/* enable EPROM programming */
@@ -144,7 +158,11 @@ u32 eprom_read(struct net_device *dev, u32 addr)
 		 * and reading data. (eeprom outs a dummy 0)
 		 */
 		eprom_ck_cycle(dev);
-		ret |= (eprom_r(dev)<<(15-i));
+		err = eprom_r(dev);
+		if (err < 0)
+			return err;
+
+		ret |= err<<(15-i);
 	}
 
 	eprom_cs(dev, 0);
diff --git a/drivers/staging/rtl8192u/r8180_93cx6.h b/drivers/staging/rtl8192u/r8180_93cx6.h
index b840348eb5e3..9cf7f587c3ab 100644
--- a/drivers/staging/rtl8192u/r8180_93cx6.h
+++ b/drivers/staging/rtl8192u/r8180_93cx6.h
@@ -40,4 +40,4 @@
 #define EPROM_TXPW1 0x3d
 
 
-u32 eprom_read(struct net_device *dev, u32 addr); /* reads a 16 bits word */
+int eprom_read(struct net_device *dev, u32 addr); /* reads a 16 bits word */
diff --git a/drivers/staging/rtl8192u/r8192U.h b/drivers/staging/rtl8192u/r8192U.h
index ee1c72267811..821afc0ddac5 100644
--- a/drivers/staging/rtl8192u/r8192U.h
+++ b/drivers/staging/rtl8192u/r8192U.h
@@ -879,8 +879,7 @@ typedef struct r8192_priv {
 	/* If 1, allow bad crc frame, reception in monitor mode */
 	short crcmon;
 
-	struct semaphore wx_sem;
-	struct semaphore rf_sem;	/* Used to lock rf write operation */
+	struct mutex wx_mutex;
 
 	u8 rf_type;			/* 0: 1T2R, 1: 2T4R */
 	RT_RF_TYPE_819xU rf_chip;
@@ -1129,10 +1128,10 @@ int read_nic_byte(struct net_device *dev, int x, u8 *data);
 int read_nic_byte_E(struct net_device *dev, int x, u8 *data);
 int read_nic_dword(struct net_device *dev, int x, u32 *data);
 int read_nic_word(struct net_device *dev, int x, u16 *data);
-void write_nic_byte(struct net_device *dev, int x, u8 y);
-void write_nic_byte_E(struct net_device *dev, int x, u8 y);
-void write_nic_word(struct net_device *dev, int x, u16 y);
-void write_nic_dword(struct net_device *dev, int x, u32 y);
+int write_nic_byte(struct net_device *dev, int x, u8 y);
+int write_nic_byte_E(struct net_device *dev, int x, u8 y);
+int write_nic_word(struct net_device *dev, int x, u16 y);
+int write_nic_dword(struct net_device *dev, int x, u32 y);
 void force_pci_posting(struct net_device *dev);
 
 void rtl8192_rtx_disable(struct net_device *);
diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c
index 8c1d73719147..dd0970facdf5 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -253,7 +253,7 @@ u32 read_cam(struct net_device *dev, u8 addr)
 	return data;
 }
 
-void write_nic_byte_E(struct net_device *dev, int indx, u8 data)
+int write_nic_byte_E(struct net_device *dev, int indx, u8 data)
 {
 	int status;
 	struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
@@ -261,7 +261,7 @@ void write_nic_byte_E(struct net_device *dev, int indx, u8 data)
 	u8 *usbdata = kzalloc(sizeof(data), GFP_KERNEL);
 
 	if (!usbdata)
-		return;
+		return -ENOMEM;
 	*usbdata = data;
 
 	status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
@@ -269,9 +269,12 @@ void write_nic_byte_E(struct net_device *dev, int indx, u8 data)
 				 indx | 0xfe00, 0, usbdata, 1, HZ / 2);
 	kfree(usbdata);
 
-	if (status < 0)
+	if (status < 0){
 		netdev_err(dev, "write_nic_byte_E TimeOut! status: %d\n",
 			   status);
+		return status;
+	}
+	return 0;
 }
 
 int read_nic_byte_E(struct net_device *dev, int indx, u8 *data)
@@ -299,7 +302,7 @@ int read_nic_byte_E(struct net_device *dev, int indx, u8 *data)
 }
 
 /* as 92U has extend page from 4 to 16, so modify functions below. */
-void write_nic_byte(struct net_device *dev, int indx, u8 data)
+int write_nic_byte(struct net_device *dev, int indx, u8 data)
 {
 	int status;
 
@@ -308,7 +311,7 @@ void write_nic_byte(struct net_device *dev, int indx, u8 data)
 	u8 *usbdata = kzalloc(sizeof(data), GFP_KERNEL);
 
 	if (!usbdata)
-		return;
+		return -ENOMEM;
 	*usbdata = data;
 
 	status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
@@ -317,12 +320,16 @@ void write_nic_byte(struct net_device *dev, int indx, u8 data)
 				 usbdata, 1, HZ / 2);
 	kfree(usbdata);
 
-	if (status < 0)
+	if (status < 0) {
 		netdev_err(dev, "write_nic_byte TimeOut! status: %d\n", status);
+		return status;
+	}
+
+	return 0;
 }
 
 
-void write_nic_word(struct net_device *dev, int indx, u16 data)
+int write_nic_word(struct net_device *dev, int indx, u16 data)
 {
 	int status;
 
@@ -331,7 +338,7 @@ void write_nic_word(struct net_device *dev, int indx, u16 data)
 	u16 *usbdata = kzalloc(sizeof(data), GFP_KERNEL);
 
 	if (!usbdata)
-		return;
+		return -ENOMEM;
 	*usbdata = data;
 
 	status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
@@ -340,12 +347,16 @@ void write_nic_word(struct net_device *dev, int indx, u16 data)
 				 usbdata, 2, HZ / 2);
 	kfree(usbdata);
 
-	if (status < 0)
+	if (status < 0) {
 		netdev_err(dev, "write_nic_word TimeOut! status: %d\n", status);
+		return status;
+	}
+
+	return 0;
 }
 
 
-void write_nic_dword(struct net_device *dev, int indx, u32 data)
+int write_nic_dword(struct net_device *dev, int indx, u32 data)
 {
 	int status;
 
@@ -354,7 +365,7 @@ void write_nic_dword(struct net_device *dev, int indx, u32 data)
 	u32 *usbdata = kzalloc(sizeof(data), GFP_KERNEL);
 
 	if (!usbdata)
-		return;
+		return -ENOMEM;
 	*usbdata = data;
 
 	status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
@@ -364,9 +375,13 @@ void write_nic_dword(struct net_device *dev, int indx, u32 data)
 	kfree(usbdata);
 
 
-	if (status < 0)
+	if (status < 0) {
 		netdev_err(dev, "write_nic_dword TimeOut! status: %d\n",
 			   status);
+		return status;
+	}
+
+	return 0;
 }
 
 
@@ -2361,8 +2376,7 @@ static void rtl8192_init_priv_lock(struct r8192_priv *priv)
 {
 	spin_lock_init(&priv->tx_lock);
 	spin_lock_init(&priv->irq_lock);
-	sema_init(&priv->wx_sem, 1);
-	sema_init(&priv->rf_sem, 1);
+	mutex_init(&priv->wx_mutex);
 	mutex_init(&priv->mutex);
 }
 
@@ -2421,7 +2435,7 @@ static inline u16 endian_swap(u16 *data)
 	return *data;
 }
 
-static void rtl8192_read_eeprom_info(struct net_device *dev)
+static int rtl8192_read_eeprom_info(struct net_device *dev)
 {
 	u16 wEPROM_ID = 0;
 	u8 bMac_Tmp_Addr[6] = {0x00, 0xe0, 0x4c, 0x00, 0x00, 0x02};
@@ -2429,9 +2443,13 @@ static void rtl8192_read_eeprom_info(struct net_device *dev)
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	u16 tmpValue = 0;
 	int i;
+	int ret;
 
 	RT_TRACE(COMP_EPROM, "===========>%s()\n", __func__);
-	wEPROM_ID = eprom_read(dev, 0); /* first read EEPROM ID out; */
+	ret = eprom_read(dev, 0); /* first read EEPROM ID out; */
+	if (ret < 0)
+		return ret;
+	wEPROM_ID = (u16)ret;
 	RT_TRACE(COMP_EPROM, "EEPROM ID is 0x%x\n", wEPROM_ID);
 
 	if (wEPROM_ID != RTL8190_EEPROM_ID)
@@ -2443,13 +2461,25 @@ static void rtl8192_read_eeprom_info(struct net_device *dev)
 
 	if (bLoad_From_EEPOM) {
 		tmpValue = eprom_read(dev, EEPROM_VID >> 1);
+		ret = eprom_read(dev, EEPROM_VID >> 1);
+		if (ret < 0)
+			return ret;
+		tmpValue = (u16)ret;
 		priv->eeprom_vid = endian_swap(&tmpValue);
-		priv->eeprom_pid = eprom_read(dev, EEPROM_PID >> 1);
-		tmpValue = eprom_read(dev, EEPROM_ChannelPlan >> 1);
+		ret = eprom_read(dev, EEPROM_PID >> 1);
+		if (ret < 0)
+			return ret;
+		priv->eeprom_pid = (u16)ret;
+		ret = eprom_read(dev, EEPROM_ChannelPlan >> 1);
+		if (ret < 0)
+			return ret;
+		tmpValue = (u16)ret;
 		priv->eeprom_ChannelPlan = (tmpValue & 0xff00) >> 8;
 		priv->btxpowerdata_readfromEEPORM = true;
-		priv->eeprom_CustomerID =
-			eprom_read(dev, (EEPROM_Customer_ID >> 1)) >> 8;
+		ret = eprom_read(dev, (EEPROM_Customer_ID >> 1)) >> 8;
+		if (ret < 0)
+			return ret;
+		priv->eeprom_CustomerID = (u16)ret;
 	} else {
 		priv->eeprom_vid = 0;
 		priv->eeprom_pid = 0;
@@ -2467,10 +2497,10 @@ static void rtl8192_read_eeprom_info(struct net_device *dev)
 		int i;
 
 		for (i = 0; i < 6; i += 2) {
-			u16 tmp = 0;
-
-			tmp = eprom_read(dev, (u16)((EEPROM_NODE_ADDRESS_BYTE_0 + i) >> 1));
-			*(u16 *)(&dev->dev_addr[i]) = tmp;
+			ret = eprom_read(dev, (u16)((EEPROM_NODE_ADDRESS_BYTE_0 + i) >> 1));
+			if (ret < 0)
+				return ret;
+			*(u16 *)(&dev->dev_addr[i]) = (u16)ret;
 		}
 	} else {
 		memcpy(dev->dev_addr, bMac_Tmp_Addr, 6);
@@ -2482,52 +2512,72 @@ static void rtl8192_read_eeprom_info(struct net_device *dev)
 
 	if (priv->card_8192_version == (u8)VERSION_819xU_A) {
 		/* read Tx power gain offset of legacy OFDM to HT rate */
-		if (bLoad_From_EEPOM)
-			priv->EEPROMTxPowerDiff = (eprom_read(dev, (EEPROM_TxPowerDiff >> 1)) & 0xff00) >> 8;
-		else
+		if (bLoad_From_EEPOM) {
+			ret = eprom_read(dev, (EEPROM_TxPowerDiff >> 1));
+			if (ret < 0)
+				return ret;
+			priv->EEPROMTxPowerDiff = ((u16)ret & 0xff00) >> 8;
+		} else
 			priv->EEPROMTxPowerDiff = EEPROM_Default_TxPower;
 		RT_TRACE(COMP_EPROM, "TxPowerDiff:%d\n", priv->EEPROMTxPowerDiff);
 		/* read ThermalMeter from EEPROM */
-		if (bLoad_From_EEPOM)
-			priv->EEPROMThermalMeter = (u8)(eprom_read(dev, (EEPROM_ThermalMeter >> 1)) & 0x00ff);
-		else
+		if (bLoad_From_EEPOM) {
+			ret = eprom_read(dev, (EEPROM_ThermalMeter >> 1));
+			if (ret < 0)
+				return ret;
+			priv->EEPROMThermalMeter = (u8)((u16)ret & 0x00ff);
+		} else
 			priv->EEPROMThermalMeter = EEPROM_Default_ThermalMeter;
 		RT_TRACE(COMP_EPROM, "ThermalMeter:%d\n", priv->EEPROMThermalMeter);
 		/* for tx power track */
 		priv->TSSI_13dBm = priv->EEPROMThermalMeter * 100;
 		/* read antenna tx power offset of B/C/D to A from EEPROM */
-		if (bLoad_From_EEPOM)
-			priv->EEPROMPwDiff = (eprom_read(dev, (EEPROM_PwDiff >> 1)) & 0x0f00) >> 8;
-		else
+		if (bLoad_From_EEPOM) {
+			ret = eprom_read(dev, (EEPROM_PwDiff >> 1));
+			if (ret < 0)
+				return ret;
+			priv->EEPROMPwDiff = ((u16)ret & 0x0f00) >> 8;
+		} else
 			priv->EEPROMPwDiff = EEPROM_Default_PwDiff;
 		RT_TRACE(COMP_EPROM, "TxPwDiff:%d\n", priv->EEPROMPwDiff);
 		/* Read CrystalCap from EEPROM */
-		if (bLoad_From_EEPOM)
-			priv->EEPROMCrystalCap = (eprom_read(dev, (EEPROM_CrystalCap >> 1)) & 0x0f);
-		else
+		if (bLoad_From_EEPOM) {
+			ret = eprom_read(dev, (EEPROM_CrystalCap >> 1));
+			if (ret < 0)
+				return ret;
+			priv->EEPROMCrystalCap = (u16)ret & 0x0f;
+		} else
 			priv->EEPROMCrystalCap = EEPROM_Default_CrystalCap;
 		RT_TRACE(COMP_EPROM, "CrystalCap = %d\n", priv->EEPROMCrystalCap);
 		/* get per-channel Tx power level */
-		if (bLoad_From_EEPOM)
-			priv->EEPROM_Def_Ver = (eprom_read(dev, (EEPROM_TxPwIndex_Ver >> 1)) & 0xff00) >> 8;
-		else
+		if (bLoad_From_EEPOM) {
+			ret = eprom_read(dev, (EEPROM_TxPwIndex_Ver >> 1));
+			if (ret < 0)
+				return ret;
+			priv->EEPROM_Def_Ver = ((u16)ret & 0xff00) >> 8;
+		} else
 			priv->EEPROM_Def_Ver = 1;
 		RT_TRACE(COMP_EPROM, "EEPROM_DEF_VER:%d\n", priv->EEPROM_Def_Ver);
 		if (priv->EEPROM_Def_Ver == 0) { /* old eeprom definition */
 			int i;
 
-			if (bLoad_From_EEPOM)
-				priv->EEPROMTxPowerLevelCCK = (eprom_read(dev, (EEPROM_TxPwIndex_CCK >> 1)) & 0xff) >> 8;
-			else
+			if (bLoad_From_EEPOM) {
+				ret = eprom_read(dev, (EEPROM_TxPwIndex_CCK >> 1));
+				if (ret < 0)
+					return ret;
+				priv->EEPROMTxPowerLevelCCK = ((u16)ret & 0xff) >> 8;
+			} else
 				priv->EEPROMTxPowerLevelCCK = 0x10;
 			RT_TRACE(COMP_EPROM, "CCK Tx Power Levl: 0x%02x\n", priv->EEPROMTxPowerLevelCCK);
 			for (i = 0; i < 3; i++) {
 				if (bLoad_From_EEPOM) {
-					tmpValue = eprom_read(dev, (EEPROM_TxPwIndex_OFDM_24G + i) >> 1);
+					ret = eprom_read(dev, (EEPROM_TxPwIndex_OFDM_24G + i) >> 1);
+					if ( ret < 0)
+						return ret;
 					if (((EEPROM_TxPwIndex_OFDM_24G + i) % 2) == 0)
-						tmpValue = tmpValue & 0x00ff;
+						tmpValue = (u16)ret & 0x00ff;
 					else
-						tmpValue = (tmpValue & 0xff00) >> 8;
+						tmpValue = ((u16)ret & 0xff00) >> 8;
 				} else {
 					tmpValue = 0x10;
 				}
@@ -2536,17 +2586,21 @@ static void rtl8192_read_eeprom_info(struct net_device *dev)
 			}
 		} else if (priv->EEPROM_Def_Ver == 1) {
 			if (bLoad_From_EEPOM) {
-				tmpValue = eprom_read(dev,
-						EEPROM_TxPwIndex_CCK_V1 >> 1);
-				tmpValue = (tmpValue & 0xff00) >> 8;
+				ret = eprom_read(dev, EEPROM_TxPwIndex_CCK_V1 >> 1);
+				if (ret < 0)
+					return ret;
+				tmpValue = ((u16)ret & 0xff00) >> 8;
 			} else {
 				tmpValue = 0x10;
 			}
 			priv->EEPROMTxPowerLevelCCK_V1[0] = (u8)tmpValue;
 
-			if (bLoad_From_EEPOM)
-				tmpValue = eprom_read(dev, (EEPROM_TxPwIndex_CCK_V1 + 2) >> 1);
-			else
+			if (bLoad_From_EEPOM) {
+				ret = eprom_read(dev, (EEPROM_TxPwIndex_CCK_V1 + 2) >> 1);
+				if (ret < 0)
+					return ret;
+				tmpValue = (u16)ret;
+			} else
 				tmpValue = 0x1010;
 			*((u16 *)(&priv->EEPROMTxPowerLevelCCK_V1[1])) = tmpValue;
 			if (bLoad_From_EEPOM)
@@ -2644,6 +2698,8 @@ static void rtl8192_read_eeprom_info(struct net_device *dev)
 	init_rate_adaptive(dev);
 
 	RT_TRACE(COMP_EPROM, "<===========%s()\n", __func__);
+
+	return 0;
 }
 
 static short rtl8192_get_channel_map(struct net_device *dev)
@@ -2664,6 +2720,7 @@ static short rtl8192_get_channel_map(struct net_device *dev)
 static short rtl8192_init(struct net_device *dev)
 {
 	struct r8192_priv *priv = ieee80211_priv(dev);
+	int err;
 
 	memset(&(priv->stats), 0, sizeof(struct Stats));
 	memset(priv->txqueue_to_outpipemap, 0, 9);
@@ -2685,7 +2742,14 @@ static short rtl8192_init(struct net_device *dev)
 	rtl8192_init_priv_lock(priv);
 	rtl8192_init_priv_task(dev);
 	rtl8192_get_eeprom_size(dev);
-	rtl8192_read_eeprom_info(dev);
+	err = rtl8192_read_eeprom_info(dev);
+	if (err) {
+		DMESG("Reading EEPROM info failed");
+		kfree(priv->pFirmware);
+		priv->pFirmware = NULL;
+		free_ieee80211(dev);
+		return err;
+	}
 	rtl8192_get_channel_map(dev);
 	init_hal_dm(dev);
 	setup_timer(&priv->watch_dog_timer, watch_dog_timer_callback,
@@ -3303,12 +3367,12 @@ RESET_START:
 
 		/* Set the variable for reset. */
 		priv->ResetProgress = RESET_TYPE_SILENT;
-		down(&priv->wx_sem);
+		mutex_lock(&priv->wx_mutex);
 		if (priv->up == 0) {
 			RT_TRACE(COMP_ERR,
 				 "%s():the driver is not up! return\n",
 				 __func__);
-			up(&priv->wx_sem);
+			mutex_unlock(&priv->wx_mutex);
 			return;
 		}
 		priv->up = 0;
@@ -3323,19 +3387,19 @@ RESET_START:
 
 		ieee->sync_scan_hurryup = 1;
 		if (ieee->state == IEEE80211_LINKED) {
-			down(&ieee->wx_sem);
+			mutex_lock(&ieee->wx_mutex);
 			netdev_dbg(dev, "ieee->state is IEEE80211_LINKED\n");
 			ieee80211_stop_send_beacons(priv->ieee80211);
 			del_timer_sync(&ieee->associate_timer);
 			cancel_delayed_work(&ieee->associate_retry_wq);
 			ieee80211_stop_scan(ieee);
 			netif_carrier_off(dev);
-			up(&ieee->wx_sem);
+			mutex_unlock(&ieee->wx_mutex);
 		} else {
 			netdev_dbg(dev, "ieee->state is NOT LINKED\n");
 			ieee80211_softmac_stop_protocol(priv->ieee80211);
 		}
-		up(&priv->wx_sem);
+		mutex_unlock(&priv->wx_mutex);
 		RT_TRACE(COMP_RESET,
 			 "%s():<==========down process is finished\n",
 			 __func__);
@@ -3533,9 +3597,9 @@ static int rtl8192_open(struct net_device *dev)
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	int ret;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 	ret = rtl8192_up(dev);
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 }
 
@@ -3556,11 +3620,11 @@ static int rtl8192_close(struct net_device *dev)
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	int ret;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = rtl8192_down(dev);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 }
@@ -3632,11 +3696,11 @@ static void rtl8192_restart(struct work_struct *work)
 					       reset_wq);
 	struct net_device *dev = priv->ieee80211->dev;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	rtl8192_commit(dev);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 }
 
 static void r8192_set_multicast(struct net_device *dev)
@@ -3659,12 +3723,12 @@ static int r8192_set_mac_adr(struct net_device *dev, void *mac)
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	struct sockaddr *addr = mac;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ether_addr_copy(dev->dev_addr, addr->sa_data);
 
 	schedule_work(&priv->reset_wq);
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return 0;
 }
@@ -3681,7 +3745,7 @@ static int rtl8192_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	struct iw_point *p = &wrq->u.data;
 	struct ieee_param *ipw = NULL;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 
 	if (p->length < sizeof(struct ieee_param) || !p->pointer) {
@@ -3774,7 +3838,7 @@ static int rtl8192_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	kfree(ipw);
 	ipw = NULL;
 out:
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 }
 
diff --git a/drivers/staging/rtl8192u/r8192U_wx.c b/drivers/staging/rtl8192u/r8192U_wx.c
index 837704de3ea4..d2f2f246063f 100644
--- a/drivers/staging/rtl8192u/r8192U_wx.c
+++ b/drivers/staging/rtl8192u/r8192U_wx.c
@@ -67,11 +67,11 @@ static int r8192_wx_set_rate(struct net_device *dev,
 	int ret;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_set_rate(priv->ieee80211, info, wrqu, extra);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 }
@@ -83,11 +83,11 @@ static int r8192_wx_set_rts(struct net_device *dev,
 	int ret;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_set_rts(priv->ieee80211, info, wrqu, extra);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 }
@@ -108,11 +108,11 @@ static int r8192_wx_set_power(struct net_device *dev,
 	int ret;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_set_power(priv->ieee80211, info, wrqu, extra);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 }
@@ -132,11 +132,11 @@ static int r8192_wx_force_reset(struct net_device *dev,
 {
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	netdev_dbg(dev, "%s(): force reset ! extra is %d\n", __func__, *extra);
 	priv->force_reset = *extra;
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return 0;
 
 }
@@ -148,11 +148,11 @@ static int r8192_wx_set_rawtx(struct net_device *dev,
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	int ret;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_set_rawtx(priv->ieee80211, info, wrqu, extra);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 
@@ -166,7 +166,7 @@ static int r8192_wx_set_crcmon(struct net_device *dev,
 	int *parms = (int *)extra;
 	int enable = (parms[0] > 0);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	if (enable)
 		priv->crcmon = 1;
@@ -176,7 +176,7 @@ static int r8192_wx_set_crcmon(struct net_device *dev,
 	DMESG("bad CRC in monitor mode are %s",
 	      priv->crcmon ? "accepted" : "rejected");
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return 0;
 }
@@ -187,13 +187,13 @@ static int r8192_wx_set_mode(struct net_device *dev, struct iw_request_info *a,
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	int ret;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_set_mode(priv->ieee80211, a, wrqu, b);
 
 	rtl8192_set_rxconf(dev);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 }
 
@@ -338,7 +338,7 @@ static int r8192_wx_set_scan(struct net_device *dev, struct iw_request_info *a,
 		}
 	}
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 	if (priv->ieee80211->state != IEEE80211_LINKED) {
 		priv->ieee80211->scanning = 0;
 		ieee80211_softmac_scan_syncro(priv->ieee80211);
@@ -346,7 +346,7 @@ static int r8192_wx_set_scan(struct net_device *dev, struct iw_request_info *a,
 	} else {
 		ret = ieee80211_wx_set_scan(priv->ieee80211, a, wrqu, b);
 	}
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 }
 
@@ -361,11 +361,11 @@ static int r8192_wx_get_scan(struct net_device *dev, struct iw_request_info *a,
 	if (!priv->up)
 		return -ENETDOWN;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_get_scan(priv->ieee80211, a, wrqu, b);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 }
@@ -377,11 +377,11 @@ static int r8192_wx_set_essid(struct net_device *dev,
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	int ret;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_set_essid(priv->ieee80211, a, wrqu, b);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 }
@@ -393,11 +393,11 @@ static int r8192_wx_get_essid(struct net_device *dev,
 	int ret;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_get_essid(priv->ieee80211, a, wrqu, b);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 }
@@ -408,11 +408,11 @@ static int r8192_wx_set_freq(struct net_device *dev, struct iw_request_info *a,
 	int ret;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_set_freq(priv->ieee80211, a, wrqu, b);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 }
 
@@ -468,11 +468,11 @@ static int r8192_wx_set_wap(struct net_device *dev,
 	int ret;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	/* struct sockaddr *temp = (struct sockaddr *)awrq; */
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	ret = ieee80211_wx_set_wap(priv->ieee80211, info, awrq, extra);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return ret;
 
@@ -515,12 +515,12 @@ static int r8192_wx_set_enc(struct net_device *dev,
 	if (!priv->up)
 		return -ENETDOWN;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	RT_TRACE(COMP_SEC, "Setting SW wep key");
 	ret = ieee80211_wx_set_encode(priv->ieee80211, info, wrqu, key);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 
 
@@ -619,7 +619,7 @@ static int r8192_wx_set_retry(struct net_device *dev,
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	int err = 0;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 
 	if (wrqu->retry.flags & IW_RETRY_LIFETIME ||
 	    wrqu->retry.disabled){
@@ -652,7 +652,7 @@ static int r8192_wx_set_retry(struct net_device *dev,
 
 	rtl8192_commit(dev);
 exit:
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return err;
 }
@@ -701,7 +701,7 @@ static int r8192_wx_set_sens(struct net_device *dev,
 	struct r8192_priv *priv = ieee80211_priv(dev);
 	short err = 0;
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 	if (priv->rf_set_sens == NULL) {
 		err = -1; /* we have not this support for this radio */
 		goto exit;
@@ -712,7 +712,7 @@ static int r8192_wx_set_sens(struct net_device *dev,
 		err = -EINVAL;
 
 exit:
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 
 	return err;
 }
@@ -727,7 +727,7 @@ static int r8192_wx_set_enc_ext(struct net_device *dev,
 	struct ieee80211_device *ieee = priv->ieee80211;
 
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 	ret = ieee80211_wx_set_encode_ext(priv->ieee80211, info, wrqu, extra);
 
 	{
@@ -790,7 +790,7 @@ static int r8192_wx_set_enc_ext(struct net_device *dev,
 
 end_hw_sec:
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 
 }
@@ -801,9 +801,9 @@ static int r8192_wx_set_auth(struct net_device *dev,
 	int ret = 0;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 	ret = ieee80211_wx_set_auth(priv->ieee80211, info, &(data->param), extra);
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 }
 
@@ -815,10 +815,10 @@ static int r8192_wx_set_mlme(struct net_device *dev,
 	int ret = 0;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 	ret = ieee80211_wx_set_mlme(priv->ieee80211, info, wrqu, extra);
 
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 }
 
@@ -829,9 +829,9 @@ static int r8192_wx_set_gen_ie(struct net_device *dev,
 	int ret = 0;
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	down(&priv->wx_sem);
+	mutex_lock(&priv->wx_mutex);
 	ret = ieee80211_wx_set_gen_ie(priv->ieee80211, extra, data->data.length);
-	up(&priv->wx_sem);
+	mutex_unlock(&priv->wx_mutex);
 	return ret;
 
 
diff --git a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
index 0da559d929bc..d0ba3778990e 100644
--- a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
@@ -1256,10 +1256,15 @@ void rtw_cfg80211_indicate_scan_done(struct rtw_wdev_priv *pwdev_priv,
 		DBG_8723A("%s with scan req\n", __func__);
 
 		if (pwdev_priv->scan_request->wiphy !=
-		    pwdev_priv->rtw_wdev->wiphy)
+		    pwdev_priv->rtw_wdev->wiphy) {
 			DBG_8723A("error wiphy compare\n");
-		else
-			cfg80211_scan_done(pwdev_priv->scan_request, aborted);
+		} else {
+			struct cfg80211_scan_info info = {
+				.aborted = aborted,
+			};
+
+			cfg80211_scan_done(pwdev_priv->scan_request, &info);
+		}
 
 		pwdev_priv->scan_request = NULL;
 	} else {
diff --git a/drivers/staging/unisys/visorbus/iovmcall_gnuc.h b/drivers/staging/unisys/visorbus/iovmcall_gnuc.h
index b08b6ecc8d31..98ea7f381a3c 100644
--- a/drivers/staging/unisys/visorbus/iovmcall_gnuc.h
+++ b/drivers/staging/unisys/visorbus/iovmcall_gnuc.h
@@ -22,7 +22,7 @@ __unisys_vmcall_gnuc(unsigned long tuple, unsigned long reg_ebx,
 
 	cpuid(0x00000001, &cpuid_eax, &cpuid_ebx, &cpuid_ecx, &cpuid_edx);
 	if (!(cpuid_ecx & 0x80000000))
-		return -1;
+		return -EPERM;
 
 	__asm__ __volatile__(".byte 0x00f, 0x001, 0x0c1" : "=a"(result) :
 		"a"(tuple), "b"(reg_ebx), "c"(reg_ecx));
@@ -40,7 +40,7 @@ __unisys_extended_vmcall_gnuc(unsigned long long tuple,
 
 	cpuid(0x00000001, &cpuid_eax, &cpuid_ebx, &cpuid_ecx, &cpuid_edx);
 	if (!(cpuid_ecx & 0x80000000))
-		return -1;
+		return -EPERM;
 
 	__asm__ __volatile__(".byte 0x00f, 0x001, 0x0c1" : "=a"(result) :
 		"a"(tuple), "b"(reg_ebx), "c"(reg_ecx), "d"(reg_edx));
diff --git a/drivers/staging/unisys/visorbus/visorbus_main.c b/drivers/staging/unisys/visorbus/visorbus_main.c
index 3a147dbbd7b5..d32b8980a1cf 100644
--- a/drivers/staging/unisys/visorbus/visorbus_main.c
+++ b/drivers/staging/unisys/visorbus/visorbus_main.c
@@ -876,10 +876,10 @@ write_vbus_chp_info(struct visorchannel *chan,
 	int off = sizeof(struct channel_header) + hdr_info->chp_info_offset;
 
 	if (hdr_info->chp_info_offset == 0)
-		return -1;
+		return -EFAULT;
 
 	if (visorchannel_write(chan, off, info, sizeof(*info)) < 0)
-		return -1;
+		return -EFAULT;
 	return 0;
 }
 
@@ -895,10 +895,10 @@ write_vbus_bus_info(struct visorchannel *chan,
 	int off = sizeof(struct channel_header) + hdr_info->bus_info_offset;
 
 	if (hdr_info->bus_info_offset == 0)
-		return -1;
+		return -EFAULT;
 
 	if (visorchannel_write(chan, off, info, sizeof(*info)) < 0)
-		return -1;
+		return -EFAULT;
 	return 0;
 }
 
@@ -915,10 +915,10 @@ write_vbus_dev_info(struct visorchannel *chan,
 	    (hdr_info->device_info_struct_bytes * devix);
 
 	if (hdr_info->dev_info_offset == 0)
-		return -1;
+		return -EFAULT;
 
 	if (visorchannel_write(chan, off, info, sizeof(*info)) < 0)
-		return -1;
+		return -EFAULT;
 	return 0;
 }
 
diff --git a/drivers/staging/unisys/visorbus/visorchipset.c b/drivers/staging/unisys/visorbus/visorchipset.c
index 5ba5936e2203..d248c946a13b 100644
--- a/drivers/staging/unisys/visorbus/visorchipset.c
+++ b/drivers/staging/unisys/visorbus/visorchipset.c
@@ -1613,7 +1613,7 @@ parahotplug_request_complete(int id, u16 active)
 	}
 
 	spin_unlock(&parahotplug_request_list_lock);
-	return -1;
+	return -EINVAL;
 }
 
 /*
diff --git a/drivers/staging/unisys/visorhba/visorhba_main.c b/drivers/staging/unisys/visorhba/visorhba_main.c
index 6a4570d10642..120ba2097e02 100644
--- a/drivers/staging/unisys/visorhba/visorhba_main.c
+++ b/drivers/staging/unisys/visorhba/visorhba_main.c
@@ -16,6 +16,8 @@
 #include <linux/debugfs.h>
 #include <linux/skbuff.h>
 #include <linux/kthread.h>
+#include <linux/idr.h>
+#include <linux/seq_file.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_cmnd.h>
@@ -33,7 +35,6 @@
 #define MAX_BUF			8192
 #define MAX_PENDING_REQUESTS	(MIN_NUMSIGNALS * 2)
 #define VISORHBA_ERROR_COUNT	30
-#define VISORHBA_OPEN_MAX	1
 
 static int visorhba_queue_command_lck(struct scsi_cmnd *scsicmd,
 				      void (*visorhba_cmnd_done)
@@ -50,14 +51,7 @@ static int visorhba_pause(struct visor_device *dev,
 static int visorhba_resume(struct visor_device *dev,
 			   visorbus_state_complete_func complete_func);
 
-static ssize_t info_debugfs_read(struct file *file, char __user *buf,
-				 size_t len, loff_t *offset);
-static int set_no_disk_inquiry_result(unsigned char *buf,
-				      size_t len, bool is_lun0);
 static struct dentry *visorhba_debugfs_dir;
-static const struct file_operations debugfs_info_fops = {
-	.read = info_debugfs_read,
-};
 
 /* GUIDS for HBA channel type supported by this driver */
 static struct visor_channeltype_descriptor visorhba_channel_types[] = {
@@ -99,14 +93,6 @@ struct scsipending {
 	char cmdtype;		/* Type of pointer that is being stored */
 };
 
-/* Work Data for dar_work_queue */
-struct diskaddremove {
-	u8 add;			/* 0-remove, 1-add */
-	struct Scsi_Host *shost; /* Scsi Host for this visorhba instance */
-	u32 channel, id, lun;	/* Disk Path */
-	struct diskaddremove *next;
-};
-
 /* Each scsi_host has a host_data area that contains this struct. */
 struct visorhba_devdata {
 	struct Scsi_Host *scsihost;
@@ -133,14 +119,21 @@ struct visorhba_devdata {
 	int devnum;
 	struct task_struct *thread;
 	int thread_wait_ms;
+
+	/*
+	 * allows us to pass int handles back-and-forth between us and
+	 * iovm, instead of raw pointers
+	 */
+	struct idr idr;
+
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_info;
 };
 
 struct visorhba_devices_open {
 	struct visorhba_devdata *devdata;
 };
 
-static struct visorhba_devices_open visorhbas_open[VISORHBA_OPEN_MAX];
-
 #define for_each_vdisk_match(iter, list, match)			  \
 	for (iter = &list->head; iter->next; iter = iter->next) \
 		if ((iter->channel == match->channel) &&		  \
@@ -191,7 +184,7 @@ static void visor_thread_stop(struct task_struct *task)
  *	Partition so that it can be handled when it completes. If new is
  *	NULL it is assumed the entry refers only to the cmdrsp.
  *	Returns insert_location where entry was added,
- *	SCSI_MLQUEUE_DEVICE_BUSY if it can't
+ *	-EBUSY if it can't
  */
 static int add_scsipending_entry(struct visorhba_devdata *devdata,
 				 char cmdtype, void *new)
@@ -206,7 +199,7 @@ static int add_scsipending_entry(struct visorhba_devdata *devdata,
 		insert_location = (insert_location + 1) % MAX_PENDING_REQUESTS;
 		if (insert_location == (int)devdata->nextinsert) {
 			spin_unlock_irqrestore(&devdata->privlock, flags);
-			return -1;
+			return -EBUSY;
 		}
 	}
 
@@ -268,6 +261,62 @@ static struct uiscmdrsp *get_scsipending_cmdrsp(struct visorhba_devdata *ddata,
 	return NULL;
 }
 
+/**
+ *      simple_idr_get - associate a provided pointer with an int value
+ *                       1 <= value <= INT_MAX, and return this int value;
+ *                       the pointer value can be obtained later by passing
+ *                       this int value to idr_find()
+ *      @idrtable: the data object maintaining the pointer<-->int mappings
+ *      @p: the pointer value to be remembered
+ *      @lock: a spinlock used when exclusive access to idrtable is needed
+ */
+static unsigned int simple_idr_get(struct idr *idrtable, void *p,
+				   spinlock_t *lock)
+{
+	int id;
+	unsigned long flags;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_irqsave(lock, flags);
+	id = idr_alloc(idrtable, p, 1, INT_MAX, GFP_NOWAIT);
+	spin_unlock_irqrestore(lock, flags);
+	idr_preload_end();
+	if (id < 0)
+		return 0;  /* failure */
+	return (unsigned int)(id);  /* idr_alloc() guarantees > 0 */
+}
+
+/**
+ *      setup_scsitaskmgmt_handles - stash the necessary handles so that the
+ *                                   completion processing logic for a taskmgmt
+ *                                   cmd will be able to find who to wake up
+ *                                   and where to stash the result
+ */
+static void setup_scsitaskmgmt_handles(struct idr *idrtable, spinlock_t *lock,
+				       struct uiscmdrsp *cmdrsp,
+				       wait_queue_head_t *event, int *result)
+{
+	/* specify the event that has to be triggered when this */
+	/* cmd is complete */
+	cmdrsp->scsitaskmgmt.notify_handle =
+		simple_idr_get(idrtable, event, lock);
+	cmdrsp->scsitaskmgmt.notifyresult_handle =
+		simple_idr_get(idrtable, result, lock);
+}
+
+/**
+ *      cleanup_scsitaskmgmt_handles - forget handles created by
+ *                                     setup_scsitaskmgmt_handles()
+ */
+static void cleanup_scsitaskmgmt_handles(struct idr *idrtable,
+					 struct uiscmdrsp *cmdrsp)
+{
+	if (cmdrsp->scsitaskmgmt.notify_handle)
+		idr_remove(idrtable, cmdrsp->scsitaskmgmt.notify_handle);
+	if (cmdrsp->scsitaskmgmt.notifyresult_handle)
+		idr_remove(idrtable, cmdrsp->scsitaskmgmt.notifyresult_handle);
+}
+
 /**
  *	forward_taskmgmt_command - send taskmegmt command to the Service
  *				   Partition
@@ -303,10 +352,8 @@ static int forward_taskmgmt_command(enum task_mgmt_types tasktype,
 
 	/* issue TASK_MGMT_ABORT_TASK */
 	cmdrsp->cmdtype = CMD_SCSITASKMGMT_TYPE;
-	/* specify the event that has to be triggered when this */
-	/* cmd is complete */
-	cmdrsp->scsitaskmgmt.notify_handle = (u64)&notifyevent;
-	cmdrsp->scsitaskmgmt.notifyresult_handle = (u64)&notifyresult;
+	setup_scsitaskmgmt_handles(&devdata->idr, &devdata->privlock, cmdrsp,
+				   &notifyevent, &notifyresult);
 
 	/* save destination */
 	cmdrsp->scsitaskmgmt.tasktype = tasktype;
@@ -315,6 +362,8 @@ static int forward_taskmgmt_command(enum task_mgmt_types tasktype,
 	cmdrsp->scsitaskmgmt.vdest.lun = scsidev->lun;
 	cmdrsp->scsitaskmgmt.handle = scsicmd_id;
 
+	dev_dbg(&scsidev->sdev_gendev,
+		"visorhba: initiating type=%d taskmgmt command\n", tasktype);
 	if (!visorchannel_signalinsert(devdata->dev->visorchannel,
 				       IOCHAN_TO_IOPART,
 				       cmdrsp))
@@ -327,17 +376,23 @@ static int forward_taskmgmt_command(enum task_mgmt_types tasktype,
 				msecs_to_jiffies(45000)))
 		goto err_del_scsipending_ent;
 
+	dev_dbg(&scsidev->sdev_gendev,
+		"visorhba: taskmgmt type=%d success; result=0x%x\n",
+		 tasktype, notifyresult);
 	if (tasktype == TASK_MGMT_ABORT_TASK)
 		scsicmd->result = DID_ABORT << 16;
 	else
 		scsicmd->result = DID_RESET << 16;
 
 	scsicmd->scsi_done(scsicmd);
-
+	cleanup_scsitaskmgmt_handles(&devdata->idr, cmdrsp);
 	return SUCCESS;
 
 err_del_scsipending_ent:
+	dev_dbg(&scsidev->sdev_gendev,
+		"visorhba: taskmgmt type=%d not executed\n", tasktype);
 	del_scsipending_ent(devdata, scsicmd_id);
+	cleanup_scsitaskmgmt_handles(&devdata->idr, cmdrsp);
 	return FAILED;
 }
 
@@ -606,64 +661,76 @@ static struct scsi_host_template visorhba_driver_template = {
 };
 
 /**
- *	info_debugfs_read - debugfs interface to dump visorhba states
- *	@file: Debug file
- *	@buf: buffer to send back to user
- *	@len: len that can be written to buf
- *	@offset: offset into buf
+ *	info_debugfs_show - debugfs interface to dump visorhba states
  *
- *	Dumps information about the visorhba driver and devices
- *	TODO: Make this per vhba
- *	Returns bytes_read
+ *      This presents a file in the debugfs tree named:
+ *          /visorhba/vbus<x>:dev<y>/info
  */
-static ssize_t info_debugfs_read(struct file *file, char __user *buf,
-				 size_t len, loff_t *offset)
+static int info_debugfs_show(struct seq_file *seq, void *v)
 {
-	ssize_t bytes_read = 0;
-	int str_pos = 0;
-	u64 phys_flags_addr;
-	int i;
-	struct visorhba_devdata *devdata;
-	char *vbuf;
+	struct visorhba_devdata *devdata = seq->private;
+
+	seq_printf(seq, "max_buff_len = %u\n", devdata->max_buff_len);
+	seq_printf(seq, "interrupts_rcvd = %llu\n", devdata->interrupts_rcvd);
+	seq_printf(seq, "interrupts_disabled = %llu\n",
+		   devdata->interrupts_disabled);
+	seq_printf(seq, "interrupts_notme = %llu\n",
+		   devdata->interrupts_notme);
+	seq_printf(seq, "flags_addr = %p\n", devdata->flags_addr);
+	if (devdata->flags_addr) {
+		u64 phys_flags_addr =
+			virt_to_phys((__force  void *)devdata->flags_addr);
+		seq_printf(seq, "phys_flags_addr = 0x%016llx\n",
+			   phys_flags_addr);
+		seq_printf(seq, "FeatureFlags = %llu\n",
+			   (__le64)readq(devdata->flags_addr));
+	}
+	seq_printf(seq, "acquire_failed_cnt = %llu\n",
+		   devdata->acquire_failed_cnt);
 
-	if (len > MAX_BUF)
-		len = MAX_BUF;
-	vbuf = kzalloc(len, GFP_KERNEL);
-	if (!vbuf)
-		return -ENOMEM;
+	return 0;
+}
+
+static int info_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, info_debugfs_show, inode->i_private);
+}
 
-	for (i = 0; i < VISORHBA_OPEN_MAX; i++) {
-		if (!visorhbas_open[i].devdata)
-			continue;
-
-		devdata = visorhbas_open[i].devdata;
-
-		str_pos += scnprintf(vbuf + str_pos,
-				len - str_pos, "max_buff_len:%u\n",
-				devdata->max_buff_len);
-
-		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
-				"\ninterrupts_rcvd = %llu, interrupts_disabled = %llu\n",
-				devdata->interrupts_rcvd,
-				devdata->interrupts_disabled);
-		str_pos += scnprintf(vbuf + str_pos,
-				len - str_pos, "\ninterrupts_notme = %llu,\n",
-				devdata->interrupts_notme);
-		phys_flags_addr = virt_to_phys((__force  void *)
-					       devdata->flags_addr);
-		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
-				"flags_addr = %p, phys_flags_addr=0x%016llx, FeatureFlags=%llu\n",
-				devdata->flags_addr, phys_flags_addr,
-				(__le64)readq(devdata->flags_addr));
-		str_pos += scnprintf(vbuf + str_pos,
-			len - str_pos, "acquire_failed_cnt:%llu\n",
-			devdata->acquire_failed_cnt);
-		str_pos += scnprintf(vbuf + str_pos, len - str_pos, "\n");
+static const struct file_operations info_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = info_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ *	complete_taskmgmt_command - complete task management
+ *	@cmdrsp: Response from the IOVM
+ *
+ *	Service Partition returned the result of the task management
+ *	command. Wake up anyone waiting for it.
+ *	Returns void
+ */
+static inline void complete_taskmgmt_command
+(struct idr *idrtable, struct uiscmdrsp *cmdrsp, int result)
+{
+	wait_queue_head_t *wq =
+		idr_find(idrtable, cmdrsp->scsitaskmgmt.notify_handle);
+	int *scsi_result_ptr =
+		idr_find(idrtable, cmdrsp->scsitaskmgmt.notifyresult_handle);
+
+	if (unlikely(!(wq && scsi_result_ptr))) {
+		pr_err("visorhba: no completion context; cmd will time out\n");
+		return;
 	}
 
-	bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
-	kfree(vbuf);
-	return bytes_read;
+	/* copy the result of the taskmgmt and
+	 * wake up the error handler that is waiting for this
+	 */
+	pr_debug("visorhba: notifying initiator with result=0x%x\n", result);
+	*scsi_result_ptr = result;
+	wake_up_all(wq);
 }
 
 /**
@@ -701,17 +768,8 @@ static void visorhba_serverdown_complete(struct visorhba_devdata *devdata)
 			break;
 		case CMD_SCSITASKMGMT_TYPE:
 			cmdrsp = pendingdel->sent;
-			cmdrsp->scsitaskmgmt.notifyresult_handle
-							= TASK_MGMT_FAILED;
-			wake_up_all((wait_queue_head_t *)
-				    cmdrsp->scsitaskmgmt.notify_handle);
-			break;
-		case CMD_VDISKMGMT_TYPE:
-			cmdrsp = pendingdel->sent;
-			cmdrsp->vdiskmgmt.notifyresult_handle
-							= VDISK_MGMT_FAILED;
-			wake_up_all((wait_queue_head_t *)
-				    cmdrsp->vdiskmgmt.notify_handle);
+			complete_taskmgmt_command(&devdata->idr, cmdrsp,
+						  TASK_MGMT_FAILED);
 			break;
 		default:
 			break;
@@ -878,89 +936,6 @@ complete_scsi_command(struct uiscmdrsp *cmdrsp, struct scsi_cmnd *scsicmd)
 	scsicmd->scsi_done(scsicmd);
 }
 
-/* DELETE VDISK TASK MGMT COMMANDS */
-static inline void complete_vdiskmgmt_command(struct uiscmdrsp *cmdrsp)
-{
-	/* copy the result of the taskmgmt and
-	 * wake up the error handler that is waiting for this
-	 */
-	cmdrsp->vdiskmgmt.notifyresult_handle = cmdrsp->vdiskmgmt.result;
-	wake_up_all((wait_queue_head_t *)cmdrsp->vdiskmgmt.notify_handle);
-}
-
-/**
- *	complete_taskmgmt_command - complete task management
- *	@cmdrsp: Response from the IOVM
- *
- *	Service Partition returned the result of the task management
- *	command. Wake up anyone waiting for it.
- *	Returns void
- */
-static inline void complete_taskmgmt_command(struct uiscmdrsp *cmdrsp)
-{
-	/* copy the result of the taskgmgt and
-	 * wake up the error handler that is waiting for this
-	 */
-	cmdrsp->vdiskmgmt.notifyresult_handle = cmdrsp->vdiskmgmt.result;
-	wake_up_all((wait_queue_head_t *)cmdrsp->scsitaskmgmt.notify_handle);
-}
-
-static struct work_struct dar_work_queue;
-static struct diskaddremove *dar_work_queue_head;
-static spinlock_t dar_work_queue_lock; /* Lock to protet dar_work_queue_head */
-static unsigned short dar_work_queue_sched;
-
-/**
- *	queue_disk_add_remove - IOSP has sent us a add/remove request
- *	@dar: disk add/remove request
- *
- *	Queue the work needed to add/remove a disk.
- *	Returns void
- */
-static inline void queue_disk_add_remove(struct diskaddremove *dar)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&dar_work_queue_lock, flags);
-	if (!dar_work_queue_head) {
-		dar_work_queue_head = dar;
-		dar->next = NULL;
-	} else {
-		dar->next = dar_work_queue_head;
-		dar_work_queue_head = dar;
-	}
-	if (!dar_work_queue_sched) {
-		schedule_work(&dar_work_queue);
-		dar_work_queue_sched = 1;
-	}
-	spin_unlock_irqrestore(&dar_work_queue_lock, flags);
-}
-
-/**
- *	process_disk_notify - IOSP has sent a process disk notify event
- *	@shost: Scsi hot
- *	@cmdrsp: Response from the IOSP
- *
- *	Queue it to the work queue.
- *	Return void.
- */
-static void process_disk_notify(struct Scsi_Host *shost,
-				struct uiscmdrsp *cmdrsp)
-{
-	struct diskaddremove *dar;
-
-	dar = kzalloc(sizeof(*dar), GFP_ATOMIC);
-	if (!dar)
-		return;
-
-	dar->add = cmdrsp->disknotify.add;
-	dar->shost = shost;
-	dar->channel = cmdrsp->disknotify.channel;
-	dar->id = cmdrsp->disknotify.id;
-	dar->lun = cmdrsp->disknotify.lun;
-	queue_disk_add_remove(dar);
-}
-
 /**
  *	drain_queue - pull responses out of iochannel
  *	@cmdrsp: Response from the IOSP
@@ -973,7 +948,6 @@ static void
 drain_queue(struct uiscmdrsp *cmdrsp, struct visorhba_devdata *devdata)
 {
 	struct scsi_cmnd *scsicmd;
-	struct Scsi_Host *shost = devdata->scsihost;
 
 	while (1) {
 		if (!visorchannel_signalremove(devdata->dev->visorchannel,
@@ -995,21 +969,12 @@ drain_queue(struct uiscmdrsp *cmdrsp, struct visorhba_devdata *devdata)
 			if (!del_scsipending_ent(devdata,
 						 cmdrsp->scsitaskmgmt.handle))
 				break;
-			complete_taskmgmt_command(cmdrsp);
-		} else if (cmdrsp->cmdtype == CMD_NOTIFYGUEST_TYPE) {
-			/* The vHba pointer has no meaning in a
-			 * guest partition. Let's be safe and set it
-			 * to NULL now. Do not use it here!
-			 */
-			cmdrsp->disknotify.v_hba = NULL;
-			process_disk_notify(shost, cmdrsp);
-		} else if (cmdrsp->cmdtype == CMD_VDISKMGMT_TYPE) {
-			if (!del_scsipending_ent(devdata,
-						 cmdrsp->vdiskmgmt.handle))
-				break;
-			complete_vdiskmgmt_command(cmdrsp);
-		}
-		/* cmdrsp is now available for resuse */
+			complete_taskmgmt_command(&devdata->idr, cmdrsp,
+						  cmdrsp->scsitaskmgmt.result);
+		} else if (cmdrsp->cmdtype == CMD_NOTIFYGUEST_TYPE)
+			dev_err_once(&devdata->dev->device,
+				     "ignoring unsupported NOTIFYGUEST\n");
+		/* cmdrsp is now available for re-use */
 	}
 }
 
@@ -1107,7 +1072,7 @@ static int visorhba_probe(struct visor_device *dev)
 	struct Scsi_Host *scsihost;
 	struct vhba_config_max max;
 	struct visorhba_devdata *devdata = NULL;
-	int i, err, channel_offset;
+	int err, channel_offset;
 	u64 features;
 
 	scsihost = scsi_host_alloc(&visorhba_driver_template,
@@ -1122,9 +1087,9 @@ static int visorhba_probe(struct visor_device *dev)
 	if (err < 0)
 		goto err_scsi_host_put;
 
-	scsihost->max_id = (unsigned)max.max_id;
-	scsihost->max_lun = (unsigned)max.max_lun;
-	scsihost->cmd_per_lun = (unsigned)max.cmd_per_lun;
+	scsihost->max_id = (unsigned int)max.max_id;
+	scsihost->max_lun = (unsigned int)max.max_lun;
+	scsihost->cmd_per_lun = (unsigned int)max.cmd_per_lun;
 	scsihost->max_sectors =
 	    (unsigned short)(max.max_io_size >> 9);
 	scsihost->sg_tablesize =
@@ -1136,16 +1101,24 @@ static int visorhba_probe(struct visor_device *dev)
 		goto err_scsi_host_put;
 
 	devdata = (struct visorhba_devdata *)scsihost->hostdata;
-	for (i = 0; i < VISORHBA_OPEN_MAX; i++) {
-		if (!visorhbas_open[i].devdata) {
-			visorhbas_open[i].devdata = devdata;
-			break;
-		}
-	}
-
 	devdata->dev = dev;
 	dev_set_drvdata(&dev->device, devdata);
 
+	devdata->debugfs_dir = debugfs_create_dir(dev_name(&dev->device),
+						  visorhba_debugfs_dir);
+	if (!devdata->debugfs_dir) {
+		err = -ENOMEM;
+		goto err_scsi_remove_host;
+	}
+	devdata->debugfs_info =
+		debugfs_create_file("info", S_IRUSR | S_IRGRP,
+				    devdata->debugfs_dir, devdata,
+				    &info_debugfs_fops);
+	if (!devdata->debugfs_info) {
+		err = -ENOMEM;
+		goto err_debugfs_dir;
+	}
+
 	init_waitqueue_head(&devdata->rsp_queue);
 	spin_lock_init(&devdata->privlock);
 	devdata->serverdown = false;
@@ -1156,11 +1129,13 @@ static int visorhba_probe(struct visor_device *dev)
 				  channel_header.features);
 	err = visorbus_read_channel(dev, channel_offset, &features, 8);
 	if (err)
-		goto err_scsi_remove_host;
+		goto err_debugfs_info;
 	features |= ULTRA_IO_CHANNEL_IS_POLLING;
 	err = visorbus_write_channel(dev, channel_offset, &features, 8);
 	if (err)
-		goto err_scsi_remove_host;
+		goto err_debugfs_info;
+
+	idr_init(&devdata->idr);
 
 	devdata->thread_wait_ms = 2;
 	devdata->thread = visor_thread_start(process_incoming_rsps, devdata,
@@ -1170,6 +1145,12 @@ static int visorhba_probe(struct visor_device *dev)
 
 	return 0;
 
+err_debugfs_info:
+	debugfs_remove(devdata->debugfs_info);
+
+err_debugfs_dir:
+	debugfs_remove_recursive(devdata->debugfs_dir);
+
 err_scsi_remove_host:
 	scsi_remove_host(scsihost);
 
@@ -1198,7 +1179,11 @@ static void visorhba_remove(struct visor_device *dev)
 	scsi_remove_host(scsihost);
 	scsi_host_put(scsihost);
 
+	idr_destroy(&devdata->idr);
+
 	dev_set_drvdata(&dev->device, NULL);
+	debugfs_remove(devdata->debugfs_info);
+	debugfs_remove_recursive(devdata->debugfs_dir);
 }
 
 /**
@@ -1209,26 +1194,17 @@ static void visorhba_remove(struct visor_device *dev)
  */
 static int visorhba_init(void)
 {
-	struct dentry *ret;
 	int rc = -ENOMEM;
 
 	visorhba_debugfs_dir = debugfs_create_dir("visorhba", NULL);
 	if (!visorhba_debugfs_dir)
 		return -ENOMEM;
 
-	ret = debugfs_create_file("info", S_IRUSR, visorhba_debugfs_dir, NULL,
-				  &debugfs_info_fops);
-
-	if (!ret) {
-		rc = -EIO;
-		goto cleanup_debugfs;
-	}
-
 	rc = visorbus_register_visor_driver(&visorhba_driver);
 	if (rc)
 		goto cleanup_debugfs;
 
-	return rc;
+	return 0;
 
 cleanup_debugfs:
 	debugfs_remove_recursive(visorhba_debugfs_dir);
diff --git a/drivers/staging/unisys/visorinput/visorinput.c b/drivers/staging/unisys/visorinput/visorinput.c
index 12a3570780fc..d67cd76327c0 100644
--- a/drivers/staging/unisys/visorinput/visorinput.c
+++ b/drivers/staging/unisys/visorinput/visorinput.c
@@ -506,7 +506,7 @@ calc_button(int x)
 	case 3:
 		return BTN_RIGHT;
 	default:
-		return -1;
+		return -EINVAL;
 	}
 }
 
diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c
index fd7c9a6cb6f3..a28388d3ddc2 100644
--- a/drivers/staging/unisys/visornic/visornic_main.c
+++ b/drivers/staging/unisys/visornic/visornic_main.c
@@ -1000,25 +1000,28 @@ visornic_set_multi(struct net_device *netdev)
 	struct uiscmdrsp *cmdrsp;
 	struct visornic_devdata *devdata = netdev_priv(netdev);
 
-	/* any filtering changes */
-	if (devdata->old_flags != netdev->flags) {
-		if ((netdev->flags & IFF_PROMISC) !=
-		    (devdata->old_flags & IFF_PROMISC)) {
-			cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
-			if (!cmdrsp)
-				return;
-			cmdrsp->cmdtype = CMD_NET_TYPE;
-			cmdrsp->net.type = NET_RCV_PROMISC;
-			cmdrsp->net.enbdis.context = netdev;
-			cmdrsp->net.enbdis.enable =
-				netdev->flags & IFF_PROMISC;
-			visorchannel_signalinsert(devdata->dev->visorchannel,
-						  IOCHAN_TO_IOPART,
-						  cmdrsp);
-			kfree(cmdrsp);
-		}
-		devdata->old_flags = netdev->flags;
-	}
+	if (devdata->old_flags == netdev->flags)
+		return;
+
+	if ((netdev->flags & IFF_PROMISC) ==
+	    (devdata->old_flags & IFF_PROMISC))
+		goto out_save_flags;
+
+	cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
+	if (!cmdrsp)
+		return;
+	cmdrsp->cmdtype = CMD_NET_TYPE;
+	cmdrsp->net.type = NET_RCV_PROMISC;
+	cmdrsp->net.enbdis.context = netdev;
+	cmdrsp->net.enbdis.enable =
+		netdev->flags & IFF_PROMISC;
+	visorchannel_signalinsert(devdata->dev->visorchannel,
+				  IOCHAN_TO_IOPART,
+				  cmdrsp);
+	kfree(cmdrsp);
+
+out_save_flags:
+	devdata->old_flags = netdev->flags;
 }
 
 /**
@@ -1134,7 +1137,7 @@ repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
  *
  *	Got a receive packet back from the IO Part, handle it and send
  *	it up the stack.
- *	Returns void
+ *	Returns 1 iff an skb was receieved, otherwise 0
  */
 static int
 visornic_rx(struct uiscmdrsp *cmdrsp)
@@ -1145,7 +1148,6 @@ visornic_rx(struct uiscmdrsp *cmdrsp)
 	int cc, currsize, off;
 	struct ethhdr *eth;
 	unsigned long flags;
-	int rx_count = 0;
 
 	/* post new rcv buf to the other end using the cmdrsp we have at hand
 	 * post it without holding lock - but we'll use the signal lock to
@@ -1177,7 +1179,7 @@ visornic_rx(struct uiscmdrsp *cmdrsp)
 		 */
 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
 		repost_return(cmdrsp, devdata, skb, netdev);
-		return rx_count;
+		return 0;
 	}
 
 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
@@ -1196,7 +1198,7 @@ visornic_rx(struct uiscmdrsp *cmdrsp)
 			if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
 				dev_err(&devdata->netdev->dev,
 					"repost_return failed");
-			return rx_count;
+			return 0;
 		}
 		/* length rcvd is greater than firstfrag in this skb rcv buf  */
 		skb->tail += RCVPOST_BUF_SIZE;	/* amount in skb->data */
@@ -1212,7 +1214,7 @@ visornic_rx(struct uiscmdrsp *cmdrsp)
 			if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
 				dev_err(&devdata->netdev->dev,
 					"repost_return failed");
-			return rx_count;
+			return 0;
 		}
 		skb->tail += skb->len;
 		skb->data_len = 0;	/* nothing rcvd in frag_list */
@@ -1231,7 +1233,7 @@ visornic_rx(struct uiscmdrsp *cmdrsp)
 	if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
 		if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
 			dev_err(&devdata->netdev->dev, "repost_return failed");
-		return rx_count;
+		return 0;
 	}
 
 	if (cmdrsp->net.rcv.numrcvbufs > 1) {
@@ -1313,10 +1315,9 @@ visornic_rx(struct uiscmdrsp *cmdrsp)
 		/* drop packet - don't forward it up to OS */
 		devdata->n_rcv_packets_not_accepted++;
 		repost_return(cmdrsp, devdata, skb, netdev);
-		return rx_count;
+		return 0;
 	} while (0);
 
-	rx_count++;
 	netif_receive_skb(skb);
 	/* netif_rx returns various values, but "in practice most drivers
 	 * ignore the return value
@@ -1329,7 +1330,7 @@ visornic_rx(struct uiscmdrsp *cmdrsp)
 	 * new rcv buffer.
 	 */
 	repost_return(cmdrsp, devdata, skb, netdev);
-	return rx_count;
+	return 1;
 }
 
 /**
@@ -1339,13 +1340,11 @@ visornic_rx(struct uiscmdrsp *cmdrsp)
  *
  *	Setup initial values for the visornic based on channel and default
  *	values.
- *	Returns a pointer to the devdata if successful, else NULL
+ *	Returns a pointer to the devdata structure
  */
 static struct visornic_devdata *
 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
 {
-	if (!devdata)
-		return NULL;
 	devdata->dev = dev;
 	devdata->incarnation_id = get_jiffies_64();
 	return devdata;
@@ -1793,7 +1792,7 @@ static int visornic_probe(struct visor_device *dev)
 				  sizeof(struct sk_buff *), GFP_KERNEL);
 	if (!devdata->rcvbuf) {
 		err = -ENOMEM;
-		goto cleanup_rcvbuf;
+		goto cleanup_netdev;
 	}
 
 	/* set the net_xmit outstanding threshold */
@@ -1814,12 +1813,12 @@ static int visornic_probe(struct visor_device *dev)
 	devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
 	if (!devdata->cmdrsp_rcv) {
 		err = -ENOMEM;
-		goto cleanup_cmdrsp_rcv;
+		goto cleanup_rcvbuf;
 	}
 	devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
 	if (!devdata->xmit_cmdrsp) {
 		err = -ENOMEM;
-		goto cleanup_xmit_cmdrsp;
+		goto cleanup_cmdrsp_rcv;
 	}
 	INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
 	devdata->server_down = false;
@@ -2088,8 +2087,10 @@ static int visornic_init(void)
 		goto cleanup_debugfs;
 
 	err = visorbus_register_visor_driver(&visornic_driver);
-	if (!err)
-		return 0;
+	if (err)
+		goto cleanup_debugfs;
+
+	return 0;
 
 cleanup_debugfs:
 	debugfs_remove_recursive(visornic_debugfs_dir);
diff --git a/drivers/staging/wilc1000/Makefile b/drivers/staging/wilc1000/Makefile
index acc3f3e8481b..d22628314305 100644
--- a/drivers/staging/wilc1000/Makefile
+++ b/drivers/staging/wilc1000/Makefile
@@ -6,7 +6,6 @@ ccflags-y += -DFIRMWARE_1002=\"atmel/wilc1002_firmware.bin\" \
 ccflags-y += -I$(src)/ -DWILC_ASIC_A0 -DWILC_DEBUGFS
 
 wilc1000-objs := wilc_wfi_cfgoperations.o linux_wlan.o linux_mon.o \
-			wilc_msgqueue.o \
 			coreconfigurator.o host_interface.o \
 			wilc_wlan_cfg.o wilc_debugfs.o \
 			wilc_wlan.o
diff --git a/drivers/staging/wilc1000/TODO b/drivers/staging/wilc1000/TODO
index 95199d80a3e4..ec93b2ee0b08 100644
--- a/drivers/staging/wilc1000/TODO
+++ b/drivers/staging/wilc1000/TODO
@@ -4,6 +4,11 @@ TODO:
 - remove custom debug and tracing functions
 - rework comments and function headers(also coding style)
 - replace all semaphores with mutexes or completions
+- Move handling for each individual members of 'union message_body' out
+  into a separate 'struct work_struct' and completely remove the multiplexer
+  that is currently part of host_if_work(), allowing movement of the
+  implementation of each message handler into the callsite of the function
+  that currently queues the 'host_if_msg'.
 - make spi and sdio components coexist in one build
 - turn compile-time platform configuration (BEAGLE_BOARD,
   PANDA_BOARD, PLAT_WMS8304, PLAT_RKXXXX, CUSTOMER_PLATFORM, ...)
diff --git a/drivers/staging/wilc1000/host_interface.c b/drivers/staging/wilc1000/host_interface.c
index 953584248e63..0b1760cba6e3 100644
--- a/drivers/staging/wilc1000/host_interface.c
+++ b/drivers/staging/wilc1000/host_interface.c
@@ -3,11 +3,14 @@
 #include <linux/kthread.h>
 #include <linux/delay.h>
 #include <linux/completion.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
 #include "host_interface.h"
+#include <linux/spinlock.h>
+#include <linux/errno.h>
 #include "coreconfigurator.h"
 #include "wilc_wlan.h"
 #include "wilc_wlan_if.h"
-#include "wilc_msgqueue.h"
 #include <linux/etherdevice.h>
 #include "wilc_wfi_netdevice.h"
 
@@ -181,7 +184,6 @@ union message_body {
 	struct drv_handler drv;
 	struct set_multicast multicast_info;
 	struct op_mode mode;
-	struct set_mac_addr set_mac_info;
 	struct get_mac_addr get_mac_info;
 	struct ba_session_info session_info;
 	struct remain_ch remain_on_ch;
@@ -195,6 +197,7 @@ struct host_if_msg {
 	u16 id;
 	union message_body body;
 	struct wilc_vif *vif;
+	struct work_struct work;
 };
 
 struct join_bss_param {
@@ -229,8 +232,7 @@ struct join_bss_param {
 static struct host_if_drv *terminated_handle;
 bool wilc_optaining_ip;
 static u8 P2P_LISTEN_STATE;
-static struct task_struct *hif_thread_handler;
-static struct message_queue hif_msg_q;
+static struct workqueue_struct *hif_workqueue;
 static struct completion hif_thread_comp;
 static struct completion hif_driver_comp;
 static struct completion hif_wait_response;
@@ -264,6 +266,27 @@ static struct wilc_vif *join_req_vif;
 static void *host_int_ParseJoinBssParam(struct network_info *ptstrNetworkInfo);
 static int host_int_get_ipaddress(struct wilc_vif *vif, u8 *ip_addr, u8 idx);
 static s32 Handle_ScanDone(struct wilc_vif *vif, enum scan_event enuEvent);
+static void host_if_work(struct work_struct *work);
+
+/*!
+ *  @author		syounan
+ *  @date		1 Sep 2010
+ *  @note		copied from FLO glue implementatuion
+ *  @version		1.0
+ */
+static int wilc_enqueue_cmd(struct host_if_msg *msg)
+{
+	struct host_if_msg *new_msg;
+
+	new_msg = kmemdup(msg, sizeof(*new_msg), GFP_ATOMIC);
+	if (!new_msg)
+		return -ENOMEM;
+
+	INIT_WORK(&new_msg->work, host_if_work);
+	queue_work(hif_workqueue, &new_msg->work);
+	return 0;
+}
+
 
 /* The u8IfIdx starts from 0 to NUM_CONCURRENT_IFC -1, but 0 index used as
  * special purpose in wilc device, so we add 1 to the index to starts from 1.
@@ -417,10 +440,10 @@ static void handle_get_mac_address(struct wilc_vif *vif,
 	complete(&hif_wait_response);
 }
 
-static s32 handle_cfg_param(struct wilc_vif *vif,
-			    struct cfg_param_attr *cfg_param_attr)
+static void handle_cfg_param(struct wilc_vif *vif,
+			     struct cfg_param_attr *cfg_param_attr)
 {
-	s32 result = 0;
+	int ret = 0;
 	struct wid wid_list[32];
 	struct host_if_drv *hif_drv = vif->hif_drv;
 	int i = 0;
@@ -428,15 +451,16 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 	mutex_lock(&hif_drv->cfg_values_lock);
 
 	if (cfg_param_attr->flag & BSS_TYPE) {
-		if (cfg_param_attr->bss_type < 6) {
+		u8 bss_type = cfg_param_attr->bss_type;
+
+		if (bss_type < 6) {
 			wid_list[i].id = WID_BSS_TYPE;
-			wid_list[i].val = (s8 *)&cfg_param_attr->bss_type;
+			wid_list[i].val = (s8 *)&bss_type;
 			wid_list[i].type = WID_CHAR;
 			wid_list[i].size = sizeof(char);
-			hif_drv->cfg_values.bss_type = (u8)cfg_param_attr->bss_type;
+			hif_drv->cfg_values.bss_type = bss_type;
 		} else {
 			netdev_err(vif->ndev, "check value 6 over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -452,7 +476,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.auth_type = (u8)cfg_param_attr->auth_type;
 		} else {
 			netdev_err(vif->ndev, "Impossible value\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -467,7 +490,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.auth_timeout = cfg_param_attr->auth_timeout;
 		} else {
 			netdev_err(vif->ndev, "Range(1 ~ 65535) over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -481,7 +503,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.power_mgmt_mode = (u8)cfg_param_attr->power_mgmt_mode;
 		} else {
 			netdev_err(vif->ndev, "Invalid power mode\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -496,7 +517,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.short_retry_limit = cfg_param_attr->short_retry_limit;
 		} else {
 			netdev_err(vif->ndev, "Range(1~256) over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -511,7 +531,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.long_retry_limit = cfg_param_attr->long_retry_limit;
 		} else {
 			netdev_err(vif->ndev, "Range(1~256) over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -526,7 +545,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.frag_threshold = cfg_param_attr->frag_threshold;
 		} else {
 			netdev_err(vif->ndev, "Threshold Range fail\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -541,7 +559,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.rts_threshold = cfg_param_attr->rts_threshold;
 		} else {
 			netdev_err(vif->ndev, "Threshold Range fail\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -555,7 +572,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.preamble_type = cfg_param_attr->preamble_type;
 		} else {
 			netdev_err(vif->ndev, "Preamle Range(0~2) over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -569,7 +585,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.short_slot_allowed = (u8)cfg_param_attr->short_slot_allowed;
 		} else {
 			netdev_err(vif->ndev, "Short slot(2) over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -583,7 +598,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.txop_prot_disabled = (u8)cfg_param_attr->txop_prot_disabled;
 		} else {
 			netdev_err(vif->ndev, "TXOP prot disable\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -598,7 +612,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.beacon_interval = cfg_param_attr->beacon_interval;
 		} else {
 			netdev_err(vif->ndev, "Beacon interval(1~65535)fail\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -613,7 +626,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.dtim_period = cfg_param_attr->dtim_period;
 		} else {
 			netdev_err(vif->ndev, "DTIM range(1~255) fail\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -627,7 +639,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.site_survey_enabled = (u8)cfg_param_attr->site_survey_enabled;
 		} else {
 			netdev_err(vif->ndev, "Site survey disable\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -642,7 +653,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.site_survey_scan_time = cfg_param_attr->site_survey_scan_time;
 		} else {
 			netdev_err(vif->ndev, "Site scan time(1~65535) over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -657,7 +667,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.active_scan_time = cfg_param_attr->active_scan_time;
 		} else {
 			netdev_err(vif->ndev, "Active time(1~65535) over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -672,7 +681,6 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.passive_scan_time = cfg_param_attr->passive_scan_time;
 		} else {
 			netdev_err(vif->ndev, "Passive time(1~65535) over\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
@@ -694,21 +702,19 @@ static s32 handle_cfg_param(struct wilc_vif *vif,
 			hif_drv->cfg_values.curr_tx_rate = (u8)curr_tx_rate;
 		} else {
 			netdev_err(vif->ndev, "out of TX rate\n");
-			result = -EINVAL;
 			goto unlock;
 		}
 		i++;
 	}
 
-	result = wilc_send_config_pkt(vif, SET_CFG, wid_list,
-				      i, wilc_get_vif_idx(vif));
+	ret = wilc_send_config_pkt(vif, SET_CFG, wid_list,
+				   i, wilc_get_vif_idx(vif));
 
-	if (result)
+	if (ret)
 		netdev_err(vif->ndev, "Error in setting CFG params\n");
 
 unlock:
 	mutex_unlock(&hif_drv->cfg_values_lock);
-	return result;
 }
 
 static s32 handle_scan(struct wilc_vif *vif, struct scan_attr *scan_info)
@@ -1231,17 +1237,14 @@ static s32 Handle_RcvdNtwrkInfo(struct wilc_vif *vif,
 		}
 
 		for (i = 0; i < hif_drv->usr_scan_req.rcvd_ch_cnt; i++) {
-			if ((hif_drv->usr_scan_req.net_info[i].bssid) &&
-			    (pstrNetworkInfo->bssid)) {
-				if (memcmp(hif_drv->usr_scan_req.net_info[i].bssid,
-					   pstrNetworkInfo->bssid, 6) == 0) {
-					if (pstrNetworkInfo->rssi <= hif_drv->usr_scan_req.net_info[i].rssi) {
-						goto done;
-					} else {
-						hif_drv->usr_scan_req.net_info[i].rssi = pstrNetworkInfo->rssi;
-						bNewNtwrkFound = false;
-						break;
-					}
+			if (memcmp(hif_drv->usr_scan_req.net_info[i].bssid,
+				   pstrNetworkInfo->bssid, 6) == 0) {
+				if (pstrNetworkInfo->rssi <= hif_drv->usr_scan_req.net_info[i].rssi) {
+					goto done;
+				} else {
+					hif_drv->usr_scan_req.net_info[i].rssi = pstrNetworkInfo->rssi;
+					bNewNtwrkFound = false;
+					break;
 				}
 			}
 		}
@@ -1250,20 +1253,17 @@ static s32 Handle_RcvdNtwrkInfo(struct wilc_vif *vif,
 			if (hif_drv->usr_scan_req.rcvd_ch_cnt < MAX_NUM_SCANNED_NETWORKS) {
 				hif_drv->usr_scan_req.net_info[hif_drv->usr_scan_req.rcvd_ch_cnt].rssi = pstrNetworkInfo->rssi;
 
-				if (hif_drv->usr_scan_req.net_info[hif_drv->usr_scan_req.rcvd_ch_cnt].bssid &&
-				    pstrNetworkInfo->bssid) {
-					memcpy(hif_drv->usr_scan_req.net_info[hif_drv->usr_scan_req.rcvd_ch_cnt].bssid,
-					       pstrNetworkInfo->bssid, 6);
+				memcpy(hif_drv->usr_scan_req.net_info[hif_drv->usr_scan_req.rcvd_ch_cnt].bssid,
+				       pstrNetworkInfo->bssid, 6);
 
-					hif_drv->usr_scan_req.rcvd_ch_cnt++;
+				hif_drv->usr_scan_req.rcvd_ch_cnt++;
 
-					pstrNetworkInfo->new_network = true;
-					pJoinParams = host_int_ParseJoinBssParam(pstrNetworkInfo);
+				pstrNetworkInfo->new_network = true;
+				pJoinParams = host_int_ParseJoinBssParam(pstrNetworkInfo);
 
-					hif_drv->usr_scan_req.scan_result(SCAN_EVENT_NETWORK_FOUND, pstrNetworkInfo,
-									  hif_drv->usr_scan_req.arg,
-									  pJoinParams);
-				}
+				hif_drv->usr_scan_req.scan_result(SCAN_EVENT_NETWORK_FOUND, pstrNetworkInfo,
+								  hif_drv->usr_scan_req.arg,
+								  pJoinParams);
 			}
 		} else {
 			pstrNetworkInfo->new_network = false;
@@ -2364,7 +2364,7 @@ static void ListenTimerCB(unsigned long arg)
 	msg.vif = vif;
 	msg.body.remain_on_ch.id = vif->hif_drv->remain_on_ch.id;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 }
@@ -2464,187 +2464,171 @@ static void handle_get_tx_pwr(struct wilc_vif *vif, u8 *tx_pwr)
 	complete(&hif_wait_response);
 }
 
-static int hostIFthread(void *pvArg)
+static void host_if_work(struct work_struct *work)
 {
-	u32 u32Ret;
-	struct host_if_msg msg;
-	struct wilc *wilc = pvArg;
-	struct wilc_vif *vif;
-
-	memset(&msg, 0, sizeof(struct host_if_msg));
-
-	while (1) {
-		wilc_mq_recv(&hif_msg_q, &msg, sizeof(struct host_if_msg), &u32Ret);
-		vif = msg.vif;
-		if (msg.id == HOST_IF_MSG_EXIT)
-			break;
-
-		if ((!wilc_initialized)) {
-			usleep_range(200 * 1000, 200 * 1000);
-			wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
-			continue;
-		}
-
-		if (msg.id == HOST_IF_MSG_CONNECT &&
-		    vif->hif_drv->usr_scan_req.scan_result) {
-			wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
-			usleep_range(2 * 1000, 2 * 1000);
-			continue;
-		}
+	struct host_if_msg *msg;
+	struct wilc *wilc;
 
-		switch (msg.id) {
-		case HOST_IF_MSG_SCAN:
-			handle_scan(msg.vif, &msg.body.scan_info);
-			break;
+	msg = container_of(work, struct host_if_msg, work);
+	wilc = msg->vif->wilc;
 
-		case HOST_IF_MSG_CONNECT:
-			Handle_Connect(msg.vif, &msg.body.con_info);
-			break;
+	if (msg->id == HOST_IF_MSG_CONNECT &&
+	    msg->vif->hif_drv->usr_scan_req.scan_result) {
+		wilc_enqueue_cmd(msg);
+		usleep_range(2 * 1000, 2 * 1000);
+		goto free_msg;
+	}
+	switch (msg->id) {
+	case HOST_IF_MSG_SCAN:
+		handle_scan(msg->vif, &msg->body.scan_info);
+		break;
 
-		case HOST_IF_MSG_RCVD_NTWRK_INFO:
-			Handle_RcvdNtwrkInfo(msg.vif, &msg.body.net_info);
-			break;
+	case HOST_IF_MSG_CONNECT:
+		Handle_Connect(msg->vif, &msg->body.con_info);
+		break;
 
-		case HOST_IF_MSG_RCVD_GNRL_ASYNC_INFO:
-			Handle_RcvdGnrlAsyncInfo(vif,
-						 &msg.body.async_info);
-			break;
+	case HOST_IF_MSG_RCVD_NTWRK_INFO:
+		Handle_RcvdNtwrkInfo(msg->vif, &msg->body.net_info);
+		break;
 
-		case HOST_IF_MSG_KEY:
-			Handle_Key(msg.vif, &msg.body.key_info);
-			break;
+	case HOST_IF_MSG_RCVD_GNRL_ASYNC_INFO:
+		Handle_RcvdGnrlAsyncInfo(msg->vif,
+					 &msg->body.async_info);
+		break;
 
-		case HOST_IF_MSG_CFG_PARAMS:
-			handle_cfg_param(msg.vif, &msg.body.cfg_info);
-			break;
+	case HOST_IF_MSG_KEY:
+		Handle_Key(msg->vif, &msg->body.key_info);
+		break;
 
-		case HOST_IF_MSG_SET_CHANNEL:
-			handle_set_channel(msg.vif, &msg.body.channel_info);
-			break;
+	case HOST_IF_MSG_CFG_PARAMS:
+		handle_cfg_param(msg->vif, &msg->body.cfg_info);
+		break;
 
-		case HOST_IF_MSG_DISCONNECT:
-			Handle_Disconnect(msg.vif);
-			break;
+	case HOST_IF_MSG_SET_CHANNEL:
+		handle_set_channel(msg->vif, &msg->body.channel_info);
+		break;
 
-		case HOST_IF_MSG_RCVD_SCAN_COMPLETE:
-			del_timer(&vif->hif_drv->scan_timer);
+	case HOST_IF_MSG_DISCONNECT:
+		Handle_Disconnect(msg->vif);
+		break;
 
-			if (!wilc_wlan_get_num_conn_ifcs(wilc))
-				wilc_chip_sleep_manually(wilc);
+	case HOST_IF_MSG_RCVD_SCAN_COMPLETE:
+		del_timer(&msg->vif->hif_drv->scan_timer);
 
-			Handle_ScanDone(msg.vif, SCAN_EVENT_DONE);
+		if (!wilc_wlan_get_num_conn_ifcs(wilc))
+			wilc_chip_sleep_manually(wilc);
 
-			if (vif->hif_drv->remain_on_ch_pending)
-				Handle_RemainOnChan(msg.vif,
-						    &msg.body.remain_on_ch);
+		Handle_ScanDone(msg->vif, SCAN_EVENT_DONE);
 
-			break;
+		if (msg->vif->hif_drv->remain_on_ch_pending)
+			Handle_RemainOnChan(msg->vif,
+					    &msg->body.remain_on_ch);
 
-		case HOST_IF_MSG_GET_RSSI:
-			Handle_GetRssi(msg.vif);
-			break;
+		break;
 
-		case HOST_IF_MSG_GET_STATISTICS:
-			Handle_GetStatistics(msg.vif,
-					     (struct rf_info *)msg.body.data);
-			break;
+	case HOST_IF_MSG_GET_RSSI:
+		Handle_GetRssi(msg->vif);
+		break;
 
-		case HOST_IF_MSG_ADD_BEACON:
-			Handle_AddBeacon(msg.vif, &msg.body.beacon_info);
-			break;
+	case HOST_IF_MSG_GET_STATISTICS:
+		Handle_GetStatistics(msg->vif,
+				     (struct rf_info *)msg->body.data);
+		break;
 
-		case HOST_IF_MSG_DEL_BEACON:
-			Handle_DelBeacon(msg.vif);
-			break;
+	case HOST_IF_MSG_ADD_BEACON:
+		Handle_AddBeacon(msg->vif, &msg->body.beacon_info);
+		break;
 
-		case HOST_IF_MSG_ADD_STATION:
-			Handle_AddStation(msg.vif, &msg.body.add_sta_info);
-			break;
+	case HOST_IF_MSG_DEL_BEACON:
+		Handle_DelBeacon(msg->vif);
+		break;
 
-		case HOST_IF_MSG_DEL_STATION:
-			Handle_DelStation(msg.vif, &msg.body.del_sta_info);
-			break;
+	case HOST_IF_MSG_ADD_STATION:
+		Handle_AddStation(msg->vif, &msg->body.add_sta_info);
+		break;
 
-		case HOST_IF_MSG_EDIT_STATION:
-			Handle_EditStation(msg.vif, &msg.body.edit_sta_info);
-			break;
+	case HOST_IF_MSG_DEL_STATION:
+		Handle_DelStation(msg->vif, &msg->body.del_sta_info);
+		break;
 
-		case HOST_IF_MSG_GET_INACTIVETIME:
-			Handle_Get_InActiveTime(msg.vif, &msg.body.mac_info);
-			break;
+	case HOST_IF_MSG_EDIT_STATION:
+		Handle_EditStation(msg->vif, &msg->body.edit_sta_info);
+		break;
 
-		case HOST_IF_MSG_SCAN_TIMER_FIRED:
+	case HOST_IF_MSG_GET_INACTIVETIME:
+		Handle_Get_InActiveTime(msg->vif, &msg->body.mac_info);
+		break;
 
-			Handle_ScanDone(msg.vif, SCAN_EVENT_ABORTED);
-			break;
+	case HOST_IF_MSG_SCAN_TIMER_FIRED:
+		Handle_ScanDone(msg->vif, SCAN_EVENT_ABORTED);
+		break;
 
-		case HOST_IF_MSG_CONNECT_TIMER_FIRED:
-			Handle_ConnectTimeout(msg.vif);
-			break;
+	case HOST_IF_MSG_CONNECT_TIMER_FIRED:
+		Handle_ConnectTimeout(msg->vif);
+		break;
 
-		case HOST_IF_MSG_POWER_MGMT:
-			Handle_PowerManagement(msg.vif,
-					       &msg.body.pwr_mgmt_info);
-			break;
+	case HOST_IF_MSG_POWER_MGMT:
+		Handle_PowerManagement(msg->vif,
+				       &msg->body.pwr_mgmt_info);
+		break;
 
-		case HOST_IF_MSG_SET_WFIDRV_HANDLER:
-			handle_set_wfi_drv_handler(msg.vif, &msg.body.drv);
-			break;
+	case HOST_IF_MSG_SET_WFIDRV_HANDLER:
+		handle_set_wfi_drv_handler(msg->vif, &msg->body.drv);
+		break;
 
-		case HOST_IF_MSG_SET_OPERATION_MODE:
-			handle_set_operation_mode(msg.vif, &msg.body.mode);
-			break;
+	case HOST_IF_MSG_SET_OPERATION_MODE:
+		handle_set_operation_mode(msg->vif, &msg->body.mode);
+		break;
 
-		case HOST_IF_MSG_SET_IPADDRESS:
-			handle_set_ip_address(vif,
-					      msg.body.ip_info.ip_addr,
-					      msg.body.ip_info.idx);
-			break;
+	case HOST_IF_MSG_SET_IPADDRESS:
+		handle_set_ip_address(msg->vif,
+				      msg->body.ip_info.ip_addr,
+				      msg->body.ip_info.idx);
+		break;
 
-		case HOST_IF_MSG_GET_IPADDRESS:
-			handle_get_ip_address(vif, msg.body.ip_info.idx);
-			break;
+	case HOST_IF_MSG_GET_IPADDRESS:
+		handle_get_ip_address(msg->vif, msg->body.ip_info.idx);
+		break;
 
-		case HOST_IF_MSG_GET_MAC_ADDRESS:
-			handle_get_mac_address(msg.vif,
-					       &msg.body.get_mac_info);
-			break;
+	case HOST_IF_MSG_GET_MAC_ADDRESS:
+		handle_get_mac_address(msg->vif,
+				       &msg->body.get_mac_info);
+		break;
 
-		case HOST_IF_MSG_REMAIN_ON_CHAN:
-			Handle_RemainOnChan(msg.vif, &msg.body.remain_on_ch);
-			break;
+	case HOST_IF_MSG_REMAIN_ON_CHAN:
+		Handle_RemainOnChan(msg->vif, &msg->body.remain_on_ch);
+		break;
 
-		case HOST_IF_MSG_REGISTER_FRAME:
-			Handle_RegisterFrame(msg.vif, &msg.body.reg_frame);
-			break;
+	case HOST_IF_MSG_REGISTER_FRAME:
+		Handle_RegisterFrame(msg->vif, &msg->body.reg_frame);
+		break;
 
-		case HOST_IF_MSG_LISTEN_TIMER_FIRED:
-			Handle_ListenStateExpired(msg.vif, &msg.body.remain_on_ch);
-			break;
+	case HOST_IF_MSG_LISTEN_TIMER_FIRED:
+		Handle_ListenStateExpired(msg->vif, &msg->body.remain_on_ch);
+		break;
 
-		case HOST_IF_MSG_SET_MULTICAST_FILTER:
-			Handle_SetMulticastFilter(msg.vif, &msg.body.multicast_info);
-			break;
+	case HOST_IF_MSG_SET_MULTICAST_FILTER:
+		Handle_SetMulticastFilter(msg->vif, &msg->body.multicast_info);
+		break;
 
-		case HOST_IF_MSG_DEL_ALL_STA:
-			Handle_DelAllSta(msg.vif, &msg.body.del_all_sta_info);
-			break;
+	case HOST_IF_MSG_DEL_ALL_STA:
+		Handle_DelAllSta(msg->vif, &msg->body.del_all_sta_info);
+		break;
 
-		case HOST_IF_MSG_SET_TX_POWER:
-			handle_set_tx_pwr(msg.vif, msg.body.tx_power.tx_pwr);
-			break;
+	case HOST_IF_MSG_SET_TX_POWER:
+		handle_set_tx_pwr(msg->vif, msg->body.tx_power.tx_pwr);
+		break;
 
-		case HOST_IF_MSG_GET_TX_POWER:
-			handle_get_tx_pwr(msg.vif, &msg.body.tx_power.tx_pwr);
-			break;
-		default:
-			netdev_err(vif->ndev, "[Host Interface] undefined\n");
-			break;
-		}
+	case HOST_IF_MSG_GET_TX_POWER:
+		handle_get_tx_pwr(msg->vif, &msg->body.tx_power.tx_pwr);
+		break;
+	default:
+		netdev_err(msg->vif->ndev, "[Host Interface] undefined\n");
+		break;
 	}
-
+free_msg:
+	kfree(msg);
 	complete(&hif_thread_comp);
-	return 0;
 }
 
 static void TimerCB_Scan(unsigned long arg)
@@ -2656,7 +2640,7 @@ static void TimerCB_Scan(unsigned long arg)
 	msg.vif = vif;
 	msg.id = HOST_IF_MSG_SCAN_TIMER_FIRED;
 
-	wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	wilc_enqueue_cmd(&msg);
 }
 
 static void TimerCB_Connect(unsigned long arg)
@@ -2668,7 +2652,7 @@ static void TimerCB_Connect(unsigned long arg)
 	msg.vif = vif;
 	msg.id = HOST_IF_MSG_CONNECT_TIMER_FIRED;
 
-	wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	wilc_enqueue_cmd(&msg);
 }
 
 s32 wilc_remove_key(struct host_if_drv *hif_drv, const u8 *pu8StaAddress)
@@ -2703,7 +2687,7 @@ int wilc_remove_wep_key(struct wilc_vif *vif, u8 index)
 	msg.vif = vif;
 	msg.body.key_info.attr.wep.index = index;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "Request to remove WEP key\n");
 	else
@@ -2732,7 +2716,7 @@ int wilc_set_wep_default_keyid(struct wilc_vif *vif, u8 index)
 	msg.vif = vif;
 	msg.body.key_info.attr.wep.index = index;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "Default key index\n");
 	else
@@ -2766,7 +2750,7 @@ int wilc_add_wep_key_bss_sta(struct wilc_vif *vif, const u8 *key, u8 len,
 	msg.body.key_info.attr.wep.key_len = len;
 	msg.body.key_info.attr.wep.index = index;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "STA - WEP Key\n");
 	wait_for_completion(&hif_drv->comp_test_key_block);
@@ -2801,7 +2785,7 @@ int wilc_add_wep_key_bss_ap(struct wilc_vif *vif, const u8 *key, u8 len,
 	msg.body.key_info.attr.wep.mode = mode;
 	msg.body.key_info.attr.wep.auth_type = auth_type;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 
 	if (result)
 		netdev_err(vif->ndev, "AP - WEP Key\n");
@@ -2857,7 +2841,7 @@ int wilc_add_ptk(struct wilc_vif *vif, const u8 *ptk, u8 ptk_key_len,
 	msg.body.key_info.attr.wpa.mode = cipher_mode;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 
 	if (result)
 		netdev_err(vif->ndev, "PTK Key\n");
@@ -2926,7 +2910,7 @@ int wilc_add_rx_gtk(struct wilc_vif *vif, const u8 *rx_gtk, u8 gtk_key_len,
 	msg.body.key_info.attr.wpa.key_len = key_len;
 	msg.body.key_info.attr.wpa.seq_len = key_rsc_len;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "RX GTK\n");
 	else
@@ -2956,7 +2940,7 @@ int wilc_set_pmkid_info(struct wilc_vif *vif,
 		       &pmkid->pmkidlist[i].pmkid, PMKID_LEN);
 	}
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "PMKID Info\n");
 
@@ -2974,7 +2958,7 @@ int wilc_get_mac_address(struct wilc_vif *vif, u8 *mac_addr)
 	msg.body.get_mac_info.mac_addr = mac_addr;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result) {
 		netdev_err(vif->ndev, "Failed to send get mac address\n");
 		return -EFAULT;
@@ -3038,7 +3022,7 @@ int wilc_set_join_req(struct wilc_vif *vif, u8 *bssid, const u8 *ssid,
 	if (hif_drv->hif_state < HOST_IF_CONNECTING)
 		hif_drv->hif_state = HOST_IF_CONNECTING;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result) {
 		netdev_err(vif->ndev, "send message: Set join request\n");
 		return -EFAULT;
@@ -3067,7 +3051,7 @@ int wilc_disconnect(struct wilc_vif *vif, u16 reason_code)
 	msg.id = HOST_IF_MSG_DISCONNECT;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "Failed to send message: disconnect\n");
 	else
@@ -3111,7 +3095,7 @@ int wilc_set_mac_chnl_num(struct wilc_vif *vif, u8 channel)
 	msg.body.channel_info.set_ch = channel;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result) {
 		netdev_err(vif->ndev, "wilc mq send fail\n");
 		return -EINVAL;
@@ -3131,7 +3115,7 @@ int wilc_set_wfi_drv_handler(struct wilc_vif *vif, int index, u8 mac_idx)
 	msg.body.drv.mac_idx = mac_idx;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result) {
 		netdev_err(vif->ndev, "wilc mq send fail\n");
 		result = -EINVAL;
@@ -3150,7 +3134,7 @@ int wilc_set_operation_mode(struct wilc_vif *vif, u32 mode)
 	msg.body.mode.mode = mode;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result) {
 		netdev_err(vif->ndev, "wilc mq send fail\n");
 		result = -EINVAL;
@@ -3177,7 +3161,7 @@ s32 wilc_get_inactive_time(struct wilc_vif *vif, const u8 *mac,
 	msg.id = HOST_IF_MSG_GET_INACTIVETIME;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "Failed to send get host ch param\n");
 	else
@@ -3198,7 +3182,7 @@ int wilc_get_rssi(struct wilc_vif *vif, s8 *rssi_level)
 	msg.id = HOST_IF_MSG_GET_RSSI;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result) {
 		netdev_err(vif->ndev, "Failed to send get host ch param\n");
 		return -EFAULT;
@@ -3226,7 +3210,7 @@ int wilc_get_statistics(struct wilc_vif *vif, struct rf_info *stats)
 	msg.body.data = (char *)stats;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result) {
 		netdev_err(vif->ndev, "Failed to send get host channel\n");
 		return -EFAULT;
@@ -3279,7 +3263,7 @@ int wilc_scan(struct wilc_vif *vif, u8 scan_source, u8 scan_type,
 	if (!scan_info->ies)
 		return -ENOMEM;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result) {
 		netdev_err(vif->ndev, "Error in sending message queue\n");
 		return -EINVAL;
@@ -3309,7 +3293,7 @@ int wilc_hif_set_cfg(struct wilc_vif *vif,
 	msg.body.cfg_info = *cfg_param;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 
 	return result;
 }
@@ -3371,21 +3355,17 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler)
 	init_completion(&hif_drv->comp_inactive_time);
 
 	if (clients_count == 0)	{
-		result = wilc_mq_create(&hif_msg_q);
-
 		if (result < 0) {
 			netdev_err(vif->ndev, "Failed to creat MQ\n");
 			goto _fail_;
 		}
-
-		hif_thread_handler = kthread_run(hostIFthread, wilc,
-						 "WILC_kthread");
-
-		if (IS_ERR(hif_thread_handler)) {
-			netdev_err(vif->ndev, "Failed to creat Thread\n");
-			result = -EFAULT;
+		hif_workqueue = create_singlethread_workqueue("WILC_wq");
+		if (!hif_workqueue) {
+			netdev_err(vif->ndev, "Failed to create workqueue\n");
+			result = -ENOMEM;
 			goto _fail_mq_;
 		}
+
 		setup_timer(&periodic_rssi, GetPeriodicRSSI,
 			    (unsigned long)vif);
 		mod_timer(&periodic_rssi, jiffies + msecs_to_jiffies(5000));
@@ -3411,10 +3391,8 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler)
 
 	clients_count++;
 
-	return result;
-
 _fail_mq_:
-	wilc_mq_destroy(&hif_msg_q);
+	destroy_workqueue(hif_workqueue);
 _fail_:
 	return result;
 }
@@ -3458,13 +3436,13 @@ int wilc_deinit(struct wilc_vif *vif)
 		msg.id = HOST_IF_MSG_EXIT;
 		msg.vif = vif;
 
-		result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+		result = wilc_enqueue_cmd(&msg);
 		if (result != 0)
 			netdev_err(vif->ndev, "deinit : Error(%d)\n", result);
 		else
 			wait_for_completion(&hif_thread_comp);
 
-		wilc_mq_destroy(&hif_msg_q);
+		destroy_workqueue(hif_workqueue);
 	}
 
 	kfree(hif_drv);
@@ -3504,7 +3482,7 @@ void wilc_network_info_received(struct wilc *wilc, u8 *pu8Buffer,
 	msg.body.net_info.buffer = kmalloc(u32Length, GFP_KERNEL);
 	memcpy(msg.body.net_info.buffer, pu8Buffer, u32Length);
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "message parameters (%d)\n", result);
 }
@@ -3549,7 +3527,7 @@ void wilc_gnrl_async_info_received(struct wilc *wilc, u8 *pu8Buffer,
 	msg.body.async_info.buffer = kmalloc(u32Length, GFP_KERNEL);
 	memcpy(msg.body.async_info.buffer, pu8Buffer, u32Length);
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "synchronous info (%d)\n", result);
 
@@ -3580,7 +3558,7 @@ void wilc_scan_complete_received(struct wilc *wilc, u8 *pu8Buffer,
 		msg.id = HOST_IF_MSG_RCVD_SCAN_COMPLETE;
 		msg.vif = vif;
 
-		result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+		result = wilc_enqueue_cmd(&msg);
 		if (result)
 			netdev_err(vif->ndev, "complete param (%d)\n", result);
 	}
@@ -3606,7 +3584,7 @@ int wilc_remain_on_channel(struct wilc_vif *vif, u32 session_id,
 	msg.body.remain_on_ch.id = session_id;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc mq send fail\n");
 
@@ -3631,7 +3609,7 @@ int wilc_listen_state_expired(struct wilc_vif *vif, u32 session_id)
 	msg.vif = vif;
 	msg.body.remain_on_ch.id = session_id;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc mq send fail\n");
 
@@ -3662,7 +3640,7 @@ int wilc_frame_register(struct wilc_vif *vif, u16 frame_type, bool reg)
 	msg.body.reg_frame.reg = reg;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc mq send fail\n");
 
@@ -3700,7 +3678,7 @@ int wilc_add_beacon(struct wilc_vif *vif, u32 interval, u32 dtim_period,
 		beacon_info->tail = NULL;
 	}
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc mq send fail\n");
 
@@ -3722,7 +3700,7 @@ int wilc_del_beacon(struct wilc_vif *vif)
 	msg.id = HOST_IF_MSG_DEL_BEACON;
 	msg.vif = vif;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 
@@ -3749,7 +3727,7 @@ int wilc_add_station(struct wilc_vif *vif, struct add_sta_param *sta_param)
 			return -ENOMEM;
 	}
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 	return result;
@@ -3771,7 +3749,7 @@ int wilc_del_station(struct wilc_vif *vif, const u8 *mac_addr)
 	else
 		memcpy(del_sta_info->mac_addr, mac_addr, ETH_ALEN);
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 	return result;
@@ -3801,7 +3779,7 @@ int wilc_del_allstation(struct wilc_vif *vif, u8 mac_addr[][ETH_ALEN])
 		return result;
 
 	del_all_sta_info->assoc_sta = assoc_sta;
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
@@ -3832,7 +3810,7 @@ int wilc_edit_station(struct wilc_vif *vif,
 			return -ENOMEM;
 	}
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 
@@ -3856,7 +3834,7 @@ int wilc_set_power_mgmt(struct wilc_vif *vif, bool enabled, u32 timeout)
 	pwr_mgmt_info->enabled = enabled;
 	pwr_mgmt_info->timeout = timeout;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 	return result;
@@ -3877,7 +3855,7 @@ int wilc_setup_multicast_filter(struct wilc_vif *vif, bool enabled,
 	multicast_filter_param->enabled = enabled;
 	multicast_filter_param->cnt = count;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 	return result;
@@ -4050,7 +4028,7 @@ int wilc_setup_ipaddress(struct wilc_vif *vif, u8 *ip_addr, u8 idx)
 	msg.vif = vif;
 	msg.body.ip_info.idx = idx;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 
@@ -4070,7 +4048,7 @@ static int host_int_get_ipaddress(struct wilc_vif *vif, u8 *ip_addr, u8 idx)
 	msg.vif = vif;
 	msg.body.ip_info.idx = idx;
 
-	result = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	result = wilc_enqueue_cmd(&msg);
 	if (result)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 
@@ -4088,7 +4066,7 @@ int wilc_set_tx_power(struct wilc_vif *vif, u8 tx_power)
 	msg.body.tx_power.tx_pwr = tx_power;
 	msg.vif = vif;
 
-	ret = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	ret = wilc_enqueue_cmd(&msg);
 	if (ret)
 		netdev_err(vif->ndev, "wilc_mq_send fail\n");
 
@@ -4105,7 +4083,7 @@ int wilc_get_tx_power(struct wilc_vif *vif, u8 *tx_power)
 	msg.id = HOST_IF_MSG_GET_TX_POWER;
 	msg.vif = vif;
 
-	ret = wilc_mq_send(&hif_msg_q, &msg, sizeof(struct host_if_msg));
+	ret = wilc_enqueue_cmd(&msg);
 	if (ret)
 		netdev_err(vif->ndev, "Failed to get TX PWR\n");
 
diff --git a/drivers/staging/wilc1000/host_interface.h b/drivers/staging/wilc1000/host_interface.h
index 8d2dd0db0bed..ddfea29df2a7 100644
--- a/drivers/staging/wilc1000/host_interface.h
+++ b/drivers/staging/wilc1000/host_interface.h
@@ -224,10 +224,6 @@ struct op_mode {
 	u32 mode;
 };
 
-struct set_mac_addr {
-	u8 mac_addr[ETH_ALEN];
-};
-
 struct get_mac_addr {
 	u8 *mac_addr;
 };
diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c
index 4f93c11e73c0..3a66255f14fc 100644
--- a/drivers/staging/wilc1000/linux_wlan.c
+++ b/drivers/staging/wilc1000/linux_wlan.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
-
+#include <linux/mutex.h>
 #include <linux/semaphore.h>
 #include <linux/completion.h>
 
@@ -31,8 +31,6 @@ static struct notifier_block g_dev_notifier = {
 	.notifier_call = dev_state_ev_handler
 };
 
-static struct semaphore close_exit_sync;
-
 static int wlan_deinit_locks(struct net_device *dev);
 static void wlan_deinitialize_threads(struct net_device *dev);
 
@@ -241,7 +239,7 @@ void wilc_mac_indicate(struct wilc *wilc, int flag)
 				      (unsigned char *)&status, 4);
 		if (wilc->mac_status == WILC_MAC_STATUS_INIT) {
 			wilc->mac_status = status;
-			up(&wilc->sync_event);
+			complete(&wilc->sync_event);
 		} else {
 			wilc->mac_status = status;
 		}
@@ -316,7 +314,7 @@ static int linux_wlan_txq_task(void *vp)
 
 	complete(&wl->txq_thread_started);
 	while (1) {
-		down(&wl->txq_event);
+		wait_for_completion(&wl->txq_event);
 
 		if (wl->close) {
 			complete(&wl->txq_thread_started);
@@ -362,7 +360,7 @@ int wilc_wlan_get_firmware(struct net_device *dev)
 		goto _fail_;
 
 	if (request_firmware(&wilc_firmware, firmware, wilc->dev) != 0) {
-		netdev_err(dev, "%s - firmare not available\n", firmware);
+		netdev_err(dev, "%s - firmware not available\n", firmware);
 		ret = -1;
 		goto _fail_;
 	}
@@ -386,9 +384,9 @@ static int linux_wlan_start_firmware(struct net_device *dev)
 	if (ret < 0)
 		return ret;
 
-	ret = wilc_lock_timeout(wilc, &wilc->sync_event, 5000);
-	if (ret)
-		return ret;
+	if (!wait_for_completion_timeout(&wilc->sync_event,
+					msecs_to_jiffies(5000)))
+		return -ETIME;
 
 	return 0;
 }
@@ -650,7 +648,7 @@ void wilc1000_wlan_deinit(struct net_device *dev)
 			mutex_unlock(&wl->hif_cs);
 		}
 		if (&wl->txq_event)
-			up(&wl->txq_event);
+			wait_for_completion(&wl->txq_event);
 
 		wlan_deinitialize_threads(dev);
 		deinit_irq(dev);
@@ -679,12 +677,12 @@ static int wlan_init_locks(struct net_device *dev)
 	mutex_init(&wl->rxq_cs);
 
 	spin_lock_init(&wl->txq_spinlock);
-	sema_init(&wl->txq_add_to_head_cs, 1);
+	mutex_init(&wl->txq_add_to_head_cs);
 
-	sema_init(&wl->txq_event, 0);
+	init_completion(&wl->txq_event);
 
-	sema_init(&wl->cfg_event, 0);
-	sema_init(&wl->sync_event, 0);
+	init_completion(&wl->cfg_event);
+	init_completion(&wl->sync_event);
 	init_completion(&wl->txq_thread_started);
 
 	return 0;
@@ -717,10 +715,10 @@ static int wlan_initialize_threads(struct net_device *dev)
 
 	wilc->txq_thread = kthread_run(linux_wlan_txq_task, (void *)dev,
 				     "K_TXQ_TASK");
-	if (!wilc->txq_thread) {
+	if (IS_ERR(wilc->txq_thread)) {
 		netdev_err(dev, "couldn't create TXQ thread\n");
 		wilc->close = 0;
-		return -ENOBUFS;
+		return PTR_ERR(wilc->txq_thread);
 	}
 	wait_for_completion(&wilc->txq_thread_started);
 
@@ -738,7 +736,7 @@ static void wlan_deinitialize_threads(struct net_device *dev)
 	wl->close = 1;
 
 	if (&wl->txq_event)
-		up(&wl->txq_event);
+		complete(&wl->txq_event);
 
 	if (wl->txq_thread) {
 		kthread_stop(wl->txq_thread);
@@ -1088,7 +1086,6 @@ int wilc_mac_close(struct net_device *ndev)
 		WILC_WFI_deinit_mon_interface();
 	}
 
-	up(&close_exit_sync);
 	vif->mac_opened = 0;
 
 	return 0;
@@ -1232,8 +1229,6 @@ void wilc_netdev_cleanup(struct wilc *wilc)
 	}
 
 	if (wilc && (wilc->vif[0]->ndev || wilc->vif[1]->ndev)) {
-		wilc_lock_timeout(wilc, &close_exit_sync, 5 * 1000);
-
 		for (i = 0; i < NUM_CONCURRENT_IFC; i++)
 			if (wilc->vif[i]->ndev)
 				if (vif[i]->mac_opened)
@@ -1258,8 +1253,6 @@ int wilc_netdev_init(struct wilc **wilc, struct device *dev, int io_type,
 	struct net_device *ndev;
 	struct wilc *wl;
 
-	sema_init(&close_exit_sync, 0);
-
 	wl = kzalloc(sizeof(*wl), GFP_KERNEL);
 	if (!wl)
 		return -ENOMEM;
diff --git a/drivers/staging/wilc1000/wilc_msgqueue.c b/drivers/staging/wilc1000/wilc_msgqueue.c
deleted file mode 100644
index 6cb894e58f6d..000000000000
--- a/drivers/staging/wilc1000/wilc_msgqueue.c
+++ /dev/null
@@ -1,144 +0,0 @@
-
-#include "wilc_msgqueue.h"
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-
-/*!
- *  @author		syounan
- *  @date		1 Sep 2010
- *  @note		copied from FLO glue implementatuion
- *  @version		1.0
- */
-int wilc_mq_create(struct message_queue *mq)
-{
-	spin_lock_init(&mq->lock);
-	sema_init(&mq->sem, 0);
-	INIT_LIST_HEAD(&mq->msg_list);
-	mq->recv_count = 0;
-	mq->exiting = false;
-	return 0;
-}
-
-/*!
- *  @author		syounan
- *  @date		1 Sep 2010
- *  @note		copied from FLO glue implementatuion
- *  @version		1.0
- */
-int wilc_mq_destroy(struct message_queue *mq)
-{
-	struct message *msg;
-
-	mq->exiting = true;
-
-	/* Release any waiting receiver thread. */
-	while (mq->recv_count > 0) {
-		up(&mq->sem);
-		mq->recv_count--;
-	}
-
-	while (!list_empty(&mq->msg_list)) {
-		msg = list_first_entry(&mq->msg_list, struct message, list);
-		list_del(&msg->list);
-		kfree(msg->buf);
-	}
-
-	return 0;
-}
-
-/*!
- *  @author		syounan
- *  @date		1 Sep 2010
- *  @note		copied from FLO glue implementatuion
- *  @version		1.0
- */
-int wilc_mq_send(struct message_queue *mq,
-		 const void *send_buf, u32 send_buf_size)
-{
-	unsigned long flags;
-	struct message *new_msg = NULL;
-
-	if (!mq || (send_buf_size == 0) || !send_buf)
-		return -EINVAL;
-
-	if (mq->exiting)
-		return -EFAULT;
-
-	/* construct a new message */
-	new_msg = kmalloc(sizeof(*new_msg), GFP_ATOMIC);
-	if (!new_msg)
-		return -ENOMEM;
-
-	new_msg->len = send_buf_size;
-	INIT_LIST_HEAD(&new_msg->list);
-	new_msg->buf = kmemdup(send_buf, send_buf_size, GFP_ATOMIC);
-	if (!new_msg->buf) {
-		kfree(new_msg);
-		return -ENOMEM;
-	}
-
-	spin_lock_irqsave(&mq->lock, flags);
-
-	/* add it to the message queue */
-	list_add_tail(&new_msg->list, &mq->msg_list);
-
-	spin_unlock_irqrestore(&mq->lock, flags);
-
-	up(&mq->sem);
-
-	return 0;
-}
-
-/*!
- *  @author		syounan
- *  @date		1 Sep 2010
- *  @note		copied from FLO glue implementatuion
- *  @version		1.0
- */
-int wilc_mq_recv(struct message_queue *mq,
-		 void *recv_buf, u32 recv_buf_size, u32 *recv_len)
-{
-	struct message *msg;
-	unsigned long flags;
-
-	if (!mq || (recv_buf_size == 0) || !recv_buf || !recv_len)
-		return -EINVAL;
-
-	if (mq->exiting)
-		return -EFAULT;
-
-	spin_lock_irqsave(&mq->lock, flags);
-	mq->recv_count++;
-	spin_unlock_irqrestore(&mq->lock, flags);
-
-	down(&mq->sem);
-	spin_lock_irqsave(&mq->lock, flags);
-
-	if (list_empty(&mq->msg_list)) {
-		spin_unlock_irqrestore(&mq->lock, flags);
-		up(&mq->sem);
-		return -EFAULT;
-	}
-	/* check buffer size */
-	msg = list_first_entry(&mq->msg_list, struct message, list);
-	if (recv_buf_size < msg->len) {
-		spin_unlock_irqrestore(&mq->lock, flags);
-		up(&mq->sem);
-		return -EOVERFLOW;
-	}
-
-	/* consume the message */
-	mq->recv_count--;
-	memcpy(recv_buf, msg->buf, msg->len);
-	*recv_len = msg->len;
-
-	list_del(&msg->list);
-
-	kfree(msg->buf);
-	kfree(msg);
-
-	spin_unlock_irqrestore(&mq->lock, flags);
-
-	return 0;
-}
diff --git a/drivers/staging/wilc1000/wilc_msgqueue.h b/drivers/staging/wilc1000/wilc_msgqueue.h
deleted file mode 100644
index 846a4840e6e7..000000000000
--- a/drivers/staging/wilc1000/wilc_msgqueue.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef __WILC_MSG_QUEUE_H__
-#define __WILC_MSG_QUEUE_H__
-
-#include <linux/semaphore.h>
-#include <linux/list.h>
-
-struct message {
-	void *buf;
-	u32 len;
-	struct list_head list;
-};
-
-struct message_queue {
-	struct semaphore sem;
-	spinlock_t lock;
-	bool exiting;
-	u32 recv_count;
-	struct list_head msg_list;
-};
-
-int wilc_mq_create(struct message_queue *mq);
-int wilc_mq_send(struct message_queue *mq,
-		 const void *send_buf, u32 send_buf_size);
-int wilc_mq_recv(struct message_queue *mq,
-		 void *recv_buf, u32 recv_buf_size, u32 *recv_len);
-int wilc_mq_destroy(struct message_queue *mq);
-
-#endif
diff --git a/drivers/staging/wilc1000/wilc_sdio.c b/drivers/staging/wilc1000/wilc_sdio.c
index a839a7967dd8..39b73fb27398 100644
--- a/drivers/staging/wilc1000/wilc_sdio.c
+++ b/drivers/staging/wilc1000/wilc_sdio.c
@@ -1006,7 +1006,7 @@ static int sdio_sync_ext(struct wilc *wilc, int nint)
 	u32 reg;
 
 	if (nint > MAX_NUM_INT) {
-		dev_err(&func->dev, "Too many interupts (%d)...\n", nint);
+		dev_err(&func->dev, "Too many interrupts (%d)...\n", nint);
 		return 0;
 	}
 	if (nint > MAX_NUN_INT_THRPT_ENH2) {
diff --git a/drivers/staging/wilc1000/wilc_spi.c b/drivers/staging/wilc1000/wilc_spi.c
index 4268e2f29307..22cf4b7857e5 100644
--- a/drivers/staging/wilc1000/wilc_spi.c
+++ b/drivers/staging/wilc1000/wilc_spi.c
@@ -1082,7 +1082,7 @@ static int wilc_spi_sync_ext(struct wilc *wilc, int nint)
 	int ret, i;
 
 	if (nint > MAX_NUM_INT) {
-		dev_err(&spi->dev, "Too many interupts (%d)...\n", nint);
+		dev_err(&spi->dev, "Too many interrupts (%d)...\n", nint);
 		return 0;
 	}
 
diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
index 51aff4ff7d7c..9092600a1794 100644
--- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
+++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
@@ -454,7 +454,11 @@ static void CfgScanResult(enum scan_event scan_event,
 			mutex_lock(&priv->scan_req_lock);
 
 			if (priv->pstrScanReq) {
-				cfg80211_scan_done(priv->pstrScanReq, false);
+				struct cfg80211_scan_info info = {
+					.aborted = false,
+				};
+
+				cfg80211_scan_done(priv->pstrScanReq, &info);
 				priv->u32RcvdChCount = 0;
 				priv->bCfgScanning = false;
 				priv->pstrScanReq = NULL;
@@ -464,10 +468,14 @@ static void CfgScanResult(enum scan_event scan_event,
 			mutex_lock(&priv->scan_req_lock);
 
 			if (priv->pstrScanReq) {
+				struct cfg80211_scan_info info = {
+					.aborted = false,
+				};
+
 				update_scan_time();
 				refresh_scan(priv, 1, false);
 
-				cfg80211_scan_done(priv->pstrScanReq, false);
+				cfg80211_scan_done(priv->pstrScanReq, &info);
 				priv->bCfgScanning = false;
 				priv->pstrScanReq = NULL;
 			}
@@ -625,8 +633,7 @@ static int scan(struct wiphy *wiphy, struct cfg80211_scan_request *request)
 
 
 			for (i = 0; i < request->n_ssids; i++) {
-				if (request->ssids[i].ssid &&
-				    request->ssids[i].ssid_len != 0) {
+				if (request->ssids[i].ssid_len != 0) {
 					strHiddenNetwork.net_info[i].ssid = kmalloc(request->ssids[i].ssid_len, GFP_KERNEL);
 					memcpy(strHiddenNetwork.net_info[i].ssid, request->ssids[i].ssid, request->ssids[i].ssid_len);
 					strHiddenNetwork.net_info[i].ssid_len = request->ssids[i].ssid_len;
diff --git a/drivers/staging/wilc1000/wilc_wfi_netdevice.h b/drivers/staging/wilc1000/wilc_wfi_netdevice.h
index 3a561df6d370..5cc6a82d8081 100644
--- a/drivers/staging/wilc1000/wilc_wfi_netdevice.h
+++ b/drivers/staging/wilc1000/wilc_wfi_netdevice.h
@@ -42,6 +42,8 @@
 #include "host_interface.h"
 #include "wilc_wlan.h"
 #include <linux/wireless.h>
+#include <linux/completion.h>
+#include <linux/mutex.h>
 
 #define FLOW_CONTROL_LOWER_THRESHOLD	128
 #define FLOW_CONTROL_UPPER_THRESHOLD	256
@@ -170,15 +172,15 @@ struct wilc {
 	struct wilc_vif *vif[NUM_CONCURRENT_IFC];
 	u8 open_ifcs;
 
-	struct semaphore txq_add_to_head_cs;
+	struct mutex txq_add_to_head_cs;
 	spinlock_t txq_spinlock;
 
 	struct mutex rxq_cs;
 	struct mutex hif_cs;
 
-	struct semaphore cfg_event;
-	struct semaphore sync_event;
-	struct semaphore txq_event;
+	struct completion cfg_event;
+	struct completion sync_event;
+	struct completion txq_event;
 	struct completion txq_thread_started;
 
 	struct task_struct *txq_thread;
diff --git a/drivers/staging/wilc1000/wilc_wlan.c b/drivers/staging/wilc1000/wilc_wlan.c
index 11e16d56ace7..19a580939dfc 100644
--- a/drivers/staging/wilc1000/wilc_wlan.c
+++ b/drivers/staging/wilc1000/wilc_wlan.c
@@ -1,3 +1,4 @@
+#include <linux/completion.h>
 #include "wilc_wlan_if.h"
 #include "wilc_wlan.h"
 #include "wilc_wfi_netdevice.h"
@@ -89,7 +90,7 @@ static void wilc_wlan_txq_add_to_tail(struct net_device *dev,
 
 	spin_unlock_irqrestore(&wilc->txq_spinlock, flags);
 
-	up(&wilc->txq_event);
+	complete(&wilc->txq_event);
 }
 
 static int wilc_wlan_txq_add_to_head(struct wilc_vif *vif,
@@ -98,9 +99,7 @@ static int wilc_wlan_txq_add_to_head(struct wilc_vif *vif,
 	unsigned long flags;
 	struct wilc *wilc = vif->wilc;
 
-	if (wilc_lock_timeout(wilc, &wilc->txq_add_to_head_cs,
-				    CFG_PKTS_TIMEOUT))
-		return -1;
+	mutex_lock(&wilc->txq_add_to_head_cs);
 
 	spin_lock_irqsave(&wilc->txq_spinlock, flags);
 
@@ -118,8 +117,8 @@ static int wilc_wlan_txq_add_to_head(struct wilc_vif *vif,
 	wilc->txq_entries += 1;
 
 	spin_unlock_irqrestore(&wilc->txq_spinlock, flags);
-	up(&wilc->txq_add_to_head_cs);
-	up(&wilc->txq_event);
+	mutex_unlock(&wilc->txq_add_to_head_cs);
+	complete(&wilc->txq_event);
 
 	return 0;
 }
@@ -287,7 +286,8 @@ static int wilc_wlan_txq_filter_dup_tcp_ack(struct net_device *dev)
 	spin_unlock_irqrestore(&wilc->txq_spinlock, wilc->txq_spinlock_flags);
 
 	while (dropped > 0) {
-		wilc_lock_timeout(wilc, &wilc->txq_event, 1);
+		wait_for_completion_timeout(&wilc->txq_event,
+						msecs_to_jiffies(1));
 		dropped--;
 	}
 
@@ -310,7 +310,7 @@ static int wilc_wlan_txq_add_cfg_pkt(struct wilc_vif *vif, u8 *buffer,
 	netdev_dbg(vif->ndev, "Adding config packet ...\n");
 	if (wilc->quit) {
 		netdev_dbg(vif->ndev, "Return due to clear function\n");
-		up(&wilc->cfg_event);
+		complete(&wilc->cfg_event);
 		return 0;
 	}
 
@@ -571,8 +571,7 @@ int wilc_wlan_handle_txq(struct net_device *dev, u32 *txq_count)
 		if (wilc->quit)
 			break;
 
-		wilc_lock_timeout(wilc, &wilc->txq_add_to_head_cs,
-					CFG_PKTS_TIMEOUT);
+		mutex_lock(&wilc->txq_add_to_head_cs);
 		wilc_wlan_txq_filter_dup_tcp_ack(dev);
 		tqe = wilc_wlan_txq_get_first(wilc);
 		i = 0;
@@ -753,7 +752,7 @@ _end_:
 		if (ret != 1)
 			break;
 	} while (0);
-	up(&wilc->txq_add_to_head_cs);
+	mutex_unlock(&wilc->txq_add_to_head_cs);
 
 	wilc->txq_exit = 1;
 	*txq_count = wilc->txq_entries;
@@ -770,7 +769,7 @@ static void wilc_wlan_handle_rxq(struct wilc *wilc)
 
 	do {
 		if (wilc->quit) {
-			up(&wilc->cfg_event);
+			complete(&wilc->cfg_event);
 			break;
 		}
 		rqe = wilc_wlan_rxq_remove(wilc);
@@ -821,7 +820,7 @@ static void wilc_wlan_handle_rxq(struct wilc *wilc)
 					wilc_wlan_cfg_indicate_rx(wilc, &buffer[pkt_offset + offset], pkt_len, &rsp);
 					if (rsp.type == WILC_CFG_RSP) {
 						if (wilc->cfg_seq_no == rsp.seq_no)
-							up(&wilc->cfg_event);
+							complete(&wilc->cfg_event);
 					} else if (rsp.type == WILC_CFG_RSP_STATUS) {
 						wilc_mac_indicate(wilc, WILC_MAC_INDICATE_STATUS);
 
@@ -1229,11 +1228,12 @@ int wilc_wlan_cfg_set(struct wilc_vif *vif, int start, u16 wid, u8 *buffer,
 		if (wilc_wlan_cfg_commit(vif, WILC_CFG_SET, drv_handler))
 			ret_size = 0;
 
-		if (wilc_lock_timeout(wilc, &wilc->cfg_event,
-					    CFG_PKTS_TIMEOUT)) {
+		if (!wait_for_completion_timeout(&wilc->cfg_event,
+					msecs_to_jiffies(CFG_PKTS_TIMEOUT))) {
 			netdev_dbg(vif->ndev, "Set Timed Out\n");
 			ret_size = 0;
 		}
+
 		wilc->cfg_frame_in_use = 0;
 		wilc->cfg_frame_offset = 0;
 		wilc->cfg_seq_no += 1;
@@ -1266,8 +1266,8 @@ int wilc_wlan_cfg_get(struct wilc_vif *vif, int start, u16 wid, int commit,
 		if (wilc_wlan_cfg_commit(vif, WILC_CFG_QUERY, drv_handler))
 			ret_size = 0;
 
-		if (wilc_lock_timeout(wilc, &wilc->cfg_event,
-					    CFG_PKTS_TIMEOUT)) {
+		if (!wait_for_completion_timeout(&wilc->cfg_event,
+					msecs_to_jiffies(CFG_PKTS_TIMEOUT))) {
 			netdev_dbg(vif->ndev, "Get Timed Out\n");
 			ret_size = 0;
 		}
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index a6e6fb9f42e1..f46dfe6b24e8 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -338,6 +338,8 @@ static int prism2_scan(struct wiphy *wiphy,
 	struct p80211msg_dot11req_scan msg1;
 	struct p80211msg_dot11req_scan_results msg2;
 	struct cfg80211_bss *bss;
+	struct cfg80211_scan_info info = {};
+
 	int result;
 	int err = 0;
 	int numbss = 0;
@@ -440,7 +442,8 @@ static int prism2_scan(struct wiphy *wiphy,
 		err = prism2_result2err(msg2.resultcode.data);
 
 exit:
-	cfg80211_scan_done(request, err ? 1 : 0);
+	info.aborted = !!(err);
+	cfg80211_scan_done(request, &info);
 	priv->scan_request = NULL;
 	return err;
 }
diff --git a/drivers/target/iscsi/cxgbit/Kconfig b/drivers/target/iscsi/cxgbit/Kconfig
index c9b6a3c758b1..bc6c1d5dfcbb 100644
--- a/drivers/target/iscsi/cxgbit/Kconfig
+++ b/drivers/target/iscsi/cxgbit/Kconfig
@@ -1,7 +1,7 @@
 config ISCSI_TARGET_CXGB4
 	tristate "Chelsio iSCSI target offload driver"
 	depends on ISCSI_TARGET && CHELSIO_T4 && INET
-	select CHELSIO_T4_UWIRE
+	select CHELSIO_LIB
 	---help---
 	To compile this driver as module, choose M here: the module
 	will be called cxgbit.
diff --git a/drivers/target/iscsi/cxgbit/Makefile b/drivers/target/iscsi/cxgbit/Makefile
index bd56c073dff6..4893ec29b6b3 100644
--- a/drivers/target/iscsi/cxgbit/Makefile
+++ b/drivers/target/iscsi/cxgbit/Makefile
@@ -1,4 +1,5 @@
 ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
 ccflags-y += -Idrivers/target/iscsi
 
 obj-$(CONFIG_ISCSI_TARGET_CXGB4)  += cxgbit.o
diff --git a/drivers/target/iscsi/cxgbit/cxgbit.h b/drivers/target/iscsi/cxgbit/cxgbit.h
index 625c7f6de6b2..90388698c222 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit.h
+++ b/drivers/target/iscsi/cxgbit/cxgbit.h
@@ -37,7 +37,7 @@
 #include "cxgb4.h"
 #include "cxgb4_uld.h"
 #include "l2t.h"
-#include "cxgb4_ppm.h"
+#include "libcxgb_ppm.h"
 #include "cxgbit_lro.h"
 
 extern struct mutex cdev_list_lock;
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index 60dccd02bd85..27dd11aff934 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -26,6 +26,8 @@ void _cxgbit_free_cdev(struct kref *kref)
 	struct cxgbit_device *cdev;
 
 	cdev = container_of(kref, struct cxgbit_device, kref);
+
+	cxgbi_ppm_release(cdev2ppm(cdev));
 	kfree(cdev);
 }
 
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 7c4efb4417b0..22af12f8b8eb 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -312,7 +312,8 @@ static void iblock_bio_done(struct bio *bio)
 }
 
 static struct bio *
-iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num)
+iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op,
+	       int op_flags)
 {
 	struct iblock_dev *ib_dev = IBLOCK_DEV(cmd->se_dev);
 	struct bio *bio;
@@ -334,18 +335,19 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num)
 	bio->bi_private = cmd;
 	bio->bi_end_io = &iblock_bio_done;
 	bio->bi_iter.bi_sector = lba;
+	bio_set_op_attrs(bio, op, op_flags);
 
 	return bio;
 }
 
-static void iblock_submit_bios(struct bio_list *list, int rw)
+static void iblock_submit_bios(struct bio_list *list)
 {
 	struct blk_plug plug;
 	struct bio *bio;
 
 	blk_start_plug(&plug);
 	while ((bio = bio_list_pop(list)))
-		submit_bio(rw, bio);
+		submit_bio(bio);
 	blk_finish_plug(&plug);
 }
 
@@ -387,9 +389,10 @@ iblock_execute_sync_cache(struct se_cmd *cmd)
 	bio = bio_alloc(GFP_KERNEL, 0);
 	bio->bi_end_io = iblock_end_io_flush;
 	bio->bi_bdev = ib_dev->ibd_bd;
+	bio->bi_rw = WRITE_FLUSH;
 	if (!immed)
 		bio->bi_private = cmd;
-	submit_bio(WRITE_FLUSH, bio);
+	submit_bio(bio);
 	return 0;
 }
 
@@ -478,7 +481,7 @@ iblock_execute_write_same(struct se_cmd *cmd)
 		goto fail;
 	cmd->priv = ibr;
 
-	bio = iblock_get_bio(cmd, block_lba, 1);
+	bio = iblock_get_bio(cmd, block_lba, 1, REQ_OP_WRITE, 0);
 	if (!bio)
 		goto fail_free_ibr;
 
@@ -491,7 +494,8 @@ iblock_execute_write_same(struct se_cmd *cmd)
 		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
 				!= sg->length) {
 
-			bio = iblock_get_bio(cmd, block_lba, 1);
+			bio = iblock_get_bio(cmd, block_lba, 1, REQ_OP_WRITE,
+					     0);
 			if (!bio)
 				goto fail_put_bios;
 
@@ -504,7 +508,7 @@ iblock_execute_write_same(struct se_cmd *cmd)
 		sectors -= 1;
 	}
 
-	iblock_submit_bios(&list, WRITE);
+	iblock_submit_bios(&list);
 	return 0;
 
 fail_put_bios:
@@ -677,8 +681,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	struct scatterlist *sg;
 	u32 sg_num = sgl_nents;
 	unsigned bio_cnt;
-	int rw = 0;
-	int i;
+	int i, op, op_flags = 0;
 
 	if (data_direction == DMA_TO_DEVICE) {
 		struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
@@ -687,18 +690,15 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		 * Force writethrough using WRITE_FUA if a volatile write cache
 		 * is not enabled, or if initiator set the Force Unit Access bit.
 		 */
+		op = REQ_OP_WRITE;
 		if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) {
 			if (cmd->se_cmd_flags & SCF_FUA)
-				rw = WRITE_FUA;
+				op_flags = WRITE_FUA;
 			else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
-				rw = WRITE_FUA;
-			else
-				rw = WRITE;
-		} else {
-			rw = WRITE;
+				op_flags = WRITE_FUA;
 		}
 	} else {
-		rw = READ;
+		op = REQ_OP_READ;
 	}
 
 	ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL);
@@ -712,7 +712,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		return 0;
 	}
 
-	bio = iblock_get_bio(cmd, block_lba, sgl_nents);
+	bio = iblock_get_bio(cmd, block_lba, sgl_nents, op, op_flags);
 	if (!bio)
 		goto fail_free_ibr;
 
@@ -732,11 +732,12 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
 				!= sg->length) {
 			if (bio_cnt >= IBLOCK_MAX_BIO_PER_TASK) {
-				iblock_submit_bios(&list, rw);
+				iblock_submit_bios(&list);
 				bio_cnt = 0;
 			}
 
-			bio = iblock_get_bio(cmd, block_lba, sg_num);
+			bio = iblock_get_bio(cmd, block_lba, sg_num, op,
+					     op_flags);
 			if (!bio)
 				goto fail_put_bios;
 
@@ -756,7 +757,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 			goto fail_put_bios;
 	}
 
-	iblock_submit_bios(&list, rw);
+	iblock_submit_bios(&list);
 	iblock_complete_cmd(cmd);
 	return 0;
 
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index de18790eb21c..9125d9358dea 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -876,19 +876,19 @@ static inline struct bio *pscsi_get_bio(int nr_vecs)
 
 static sense_reason_t
 pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
-		enum dma_data_direction data_direction, struct bio **hbio)
+		struct request *req)
 {
 	struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev);
-	struct bio *bio = NULL, *tbio = NULL;
+	struct bio *bio = NULL;
 	struct page *page;
 	struct scatterlist *sg;
 	u32 data_len = cmd->data_length, i, len, bytes, off;
 	int nr_pages = (cmd->data_length + sgl[0].offset +
 			PAGE_SIZE - 1) >> PAGE_SHIFT;
 	int nr_vecs = 0, rc;
-	int rw = (data_direction == DMA_TO_DEVICE);
+	int rw = (cmd->data_direction == DMA_TO_DEVICE);
 
-	*hbio = NULL;
+	BUG_ON(!cmd->data_length);
 
 	pr_debug("PSCSI: nr_pages: %d\n", nr_pages);
 
@@ -922,21 +922,11 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 					goto fail;
 
 				if (rw)
-					bio->bi_rw |= REQ_WRITE;
+					bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 				pr_debug("PSCSI: Allocated bio: %p,"
 					" dir: %s nr_vecs: %d\n", bio,
 					(rw) ? "rw" : "r", nr_vecs);
-				/*
-				 * Set *hbio pointer to handle the case:
-				 * nr_pages > BIO_MAX_PAGES, where additional
-				 * bios need to be added to complete a given
-				 * command.
-				 */
-				if (!*hbio)
-					*hbio = tbio = bio;
-				else
-					tbio = tbio->bi_next = bio;
 			}
 
 			pr_debug("PSCSI: Calling bio_add_pc_page() i: %d"
@@ -955,11 +945,16 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 				pr_debug("PSCSI: Reached bio->bi_vcnt max:"
 					" %d i: %d bio: %p, allocating another"
 					" bio\n", bio->bi_vcnt, i, bio);
+
+				rc = blk_rq_append_bio(req, bio);
+				if (rc) {
+					pr_err("pSCSI: failed to append bio\n");
+					goto fail;
+				}
+
 				/*
 				 * Clear the pointer so that another bio will
-				 * be allocated with pscsi_get_bio() above, the
-				 * current bio has already been set *tbio and
-				 * bio->bi_next.
+				 * be allocated with pscsi_get_bio() above.
 				 */
 				bio = NULL;
 			}
@@ -968,13 +963,16 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		}
 	}
 
+	if (bio) {
+		rc = blk_rq_append_bio(req, bio);
+		if (rc) {
+			pr_err("pSCSI: failed to append bio\n");
+			goto fail;
+		}
+	}
+
 	return 0;
 fail:
-	while (*hbio) {
-		bio = *hbio;
-		*hbio = (*hbio)->bi_next;
-		bio_endio(bio);
-	}
 	return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 }
 
@@ -992,11 +990,9 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 {
 	struct scatterlist *sgl = cmd->t_data_sg;
 	u32 sgl_nents = cmd->t_data_nents;
-	enum dma_data_direction data_direction = cmd->data_direction;
 	struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev);
 	struct pscsi_plugin_task *pt;
 	struct request *req;
-	struct bio *hbio;
 	sense_reason_t ret;
 
 	/*
@@ -1012,31 +1008,21 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 	memcpy(pt->pscsi_cdb, cmd->t_task_cdb,
 		scsi_command_size(cmd->t_task_cdb));
 
-	if (!sgl) {
-		req = blk_get_request(pdv->pdv_sd->request_queue,
-				(data_direction == DMA_TO_DEVICE),
-				GFP_KERNEL);
-		if (IS_ERR(req)) {
-			pr_err("PSCSI: blk_get_request() failed\n");
-			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-			goto fail;
-		}
+	req = blk_get_request(pdv->pdv_sd->request_queue,
+			(cmd->data_direction == DMA_TO_DEVICE),
+			GFP_KERNEL);
+	if (IS_ERR(req)) {
+		pr_err("PSCSI: blk_get_request() failed\n");
+		ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+		goto fail;
+	}
 
-		blk_rq_set_block_pc(req);
-	} else {
-		BUG_ON(!cmd->data_length);
+	blk_rq_set_block_pc(req);
 
-		ret = pscsi_map_sg(cmd, sgl, sgl_nents, data_direction, &hbio);
+	if (sgl) {
+		ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
 		if (ret)
-			goto fail;
-
-		req = blk_make_request(pdv->pdv_sd->request_queue, hbio,
-				       GFP_KERNEL);
-		if (IS_ERR(req)) {
-			pr_err("pSCSI: blk_make_request() failed\n");
-			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-			goto fail_free_bio;
-		}
+			goto fail_put_request;
 	}
 
 	req->end_io = pscsi_req_done;
@@ -1057,13 +1043,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 
 	return 0;
 
-fail_free_bio:
-	while (hbio) {
-		struct bio *bio = hbio;
-		hbio = hbio->bi_next;
-		bio_endio(bio);
-	}
-	ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+fail_put_request:
+	blk_put_request(req);
 fail:
 	kfree(pt);
 	return ret;
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 5b4b47ed948b..3788ed74c9ab 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -787,22 +787,34 @@ __cpufreq_cooling_register(struct device_node *np,
 			const struct cpumask *clip_cpus, u32 capacitance,
 			get_static_t plat_static_func)
 {
+	struct cpufreq_policy *policy;
 	struct thermal_cooling_device *cool_dev;
 	struct cpufreq_cooling_device *cpufreq_dev;
 	char dev_name[THERMAL_NAME_LENGTH];
 	struct cpufreq_frequency_table *pos, *table;
+	struct cpumask temp_mask;
 	unsigned int freq, i, num_cpus;
 	int ret;
 
-	table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
+	cpumask_and(&temp_mask, clip_cpus, cpu_online_mask);
+	policy = cpufreq_cpu_get(cpumask_first(&temp_mask));
+	if (!policy) {
+		pr_debug("%s: CPUFreq policy not found\n", __func__);
+		return ERR_PTR(-EPROBE_DEFER);
+	}
+
+	table = policy->freq_table;
 	if (!table) {
 		pr_debug("%s: CPUFreq table not found\n", __func__);
-		return ERR_PTR(-EPROBE_DEFER);
+		cool_dev = ERR_PTR(-ENODEV);
+		goto put_policy;
 	}
 
 	cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
-	if (!cpufreq_dev)
-		return ERR_PTR(-ENOMEM);
+	if (!cpufreq_dev) {
+		cool_dev = ERR_PTR(-ENOMEM);
+		goto put_policy;
+	}
 
 	num_cpus = cpumask_weight(clip_cpus);
 	cpufreq_dev->time_in_idle = kcalloc(num_cpus,
@@ -892,7 +904,7 @@ __cpufreq_cooling_register(struct device_node *np,
 					  CPUFREQ_POLICY_NOTIFIER);
 	mutex_unlock(&cooling_cpufreq_lock);
 
-	return cool_dev;
+	goto put_policy;
 
 remove_idr:
 	release_idr(&cpufreq_idr, cpufreq_dev->id);
@@ -906,6 +918,8 @@ free_time_in_idle:
 	kfree(cpufreq_dev->time_in_idle);
 free_cdev:
 	kfree(cpufreq_dev);
+put_policy:
+	cpufreq_cpu_put(policy);
 
 	return cool_dev;
 }
diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c
index 4ebb31a35a64..b2bbaa1c60b0 100644
--- a/drivers/thermal/intel_soc_dts_thermal.c
+++ b/drivers/thermal/intel_soc_dts_thermal.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include "intel_soc_dts_iosf.h"
 
 #define CRITICAL_OFFSET_FROM_TJ_MAX	5000
@@ -42,7 +43,8 @@ static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data)
 }
 
 static const struct x86_cpu_id soc_thermal_ids[] = {
-	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, 0x37, 0, BYT_SOC_DTS_APIC_IRQ},
+	{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1, 0,
+		BYT_SOC_DTS_APIC_IRQ},
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, soc_thermal_ids);
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index 3840d6b421c4..5e4fa9206861 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -93,8 +93,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define	SERIAL_XMIT_SIZE	(min(PAGE_SIZE, 4096))
 #endif
 
-#define STD_COM_FLAGS (0)
-
 /* firmware stuff */
 #define ZL_MAX_BLOCKS	16
 #define DRIVER_VERSION	0x02010203
@@ -2288,7 +2286,6 @@ static int cy_get_serial_info(struct cyclades_port *info,
 		.closing_wait = info->port.closing_wait,
 		.baud_base = info->baud,
 		.custom_divisor = info->custom_divisor,
-		.hub6 = 0,		/*!!! */
 	};
 	return copy_to_user(retinfo, &tmp, sizeof(*retinfo)) ? -EFAULT : 0;
 }
@@ -3084,7 +3081,6 @@ static int cy_init_card(struct cyclades_card *cinfo)
 
 		info->port.closing_wait = CLOSING_WAIT_DELAY;
 		info->port.close_delay = 5 * HZ / 10;
-		info->port.flags = STD_COM_FLAGS;
 		init_completion(&info->shutdown_wait);
 
 		if (cy_is_Z(cinfo)) {
diff --git a/drivers/tty/ipwireless/tty.c b/drivers/tty/ipwireless/tty.c
index 345cebb07ae7..2685d59d2724 100644
--- a/drivers/tty/ipwireless/tty.c
+++ b/drivers/tty/ipwireless/tty.c
@@ -252,20 +252,11 @@ static int ipwireless_get_serial_info(struct ipw_tty *tty,
 {
 	struct serial_struct tmp;
 
-	if (!retinfo)
-		return (-EFAULT);
-
 	memset(&tmp, 0, sizeof(tmp));
 	tmp.type = PORT_UNKNOWN;
 	tmp.line = tty->index;
-	tmp.port = 0;
-	tmp.irq = 0;
-	tmp.flags = 0;
 	tmp.baud_base = 115200;
-	tmp.close_delay = 0;
-	tmp.closing_wait = 0;
-	tmp.custom_divisor = 0;
-	tmp.hub6 = 0;
+
 	if (copy_to_user(retinfo, &tmp, sizeof(*retinfo)))
 		return -EFAULT;
 
diff --git a/drivers/tty/metag_da.c b/drivers/tty/metag_da.c
index 9325262289f9..25ccef2fe748 100644
--- a/drivers/tty/metag_da.c
+++ b/drivers/tty/metag_da.c
@@ -323,12 +323,12 @@ static void dashtty_timer(unsigned long ignored)
 	if (channel >= 0)
 		fetch_data(channel);
 
-	mod_timer_pinned(&poll_timer, jiffies + DA_TTY_POLL);
+	mod_timer(&poll_timer, jiffies + DA_TTY_POLL);
 }
 
 static void add_poll_timer(struct timer_list *poll_timer)
 {
-	setup_timer(poll_timer, dashtty_timer, 0);
+	setup_pinned_timer(poll_timer, dashtty_timer, 0);
 	poll_timer->expires = jiffies + DA_TTY_POLL;
 
 	/*
diff --git a/drivers/tty/mips_ejtag_fdc.c b/drivers/tty/mips_ejtag_fdc.c
index a119176a1855..234123b0c642 100644
--- a/drivers/tty/mips_ejtag_fdc.c
+++ b/drivers/tty/mips_ejtag_fdc.c
@@ -689,7 +689,7 @@ static void mips_ejtag_fdc_tty_timer(unsigned long opaque)
 
 	mips_ejtag_fdc_handle(priv);
 	if (!priv->removing)
-		mod_timer_pinned(&priv->poll_timer, jiffies + FDC_TTY_POLL);
+		mod_timer(&priv->poll_timer, jiffies + FDC_TTY_POLL);
 }
 
 /* TTY Port operations */
@@ -1002,7 +1002,7 @@ static int mips_ejtag_fdc_tty_probe(struct mips_cdmm_device *dev)
 		raw_spin_unlock_irq(&priv->lock);
 	} else {
 		/* If we didn't get an usable IRQ, poll instead */
-		setup_timer(&priv->poll_timer, mips_ejtag_fdc_tty_timer,
+		setup_pinned_timer(&priv->poll_timer, mips_ejtag_fdc_tty_timer,
 			    (unsigned long)priv);
 		priv->poll_timer.expires = jiffies + FDC_TTY_POLL;
 		/*
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index 98d2bd16706d..69294ae154be 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -1219,7 +1219,6 @@ static int mxser_get_serial_info(struct tty_struct *tty,
 		.close_delay = info->port.close_delay,
 		.closing_wait = info->port.closing_wait,
 		.custom_divisor = info->custom_divisor,
-		.hub6 = 0
 	};
 	if (copy_to_user(retinfo, &tmp, sizeof(*retinfo)))
 		return -EFAULT;
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index f856c4544eea..51e0d32883ba 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -667,8 +667,11 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 		fsi = tty->driver_data;
 	else
 		fsi = tty->link->driver_data;
-	devpts_kill_index(fsi, tty->index);
-	devpts_release(fsi);
+
+	if (fsi) {
+		devpts_kill_index(fsi, tty->index);
+		devpts_release(fsi);
+	}
 }
 
 static const struct tty_operations ptm_unix98_ops = {
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 215a99237e95..122e0e4029fe 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -15,6 +15,8 @@
 #include <linux/serial_reg.h>
 #include <linux/dmaengine.h>
 
+#include "../serial_mctrl_gpio.h"
+
 struct uart_8250_dma {
 	int (*tx_dma)(struct uart_8250_port *p);
 	int (*rx_dma)(struct uart_8250_port *p);
@@ -53,11 +55,9 @@ struct old_serial_port {
 	unsigned int port;
 	unsigned int irq;
 	upf_t        flags;
-	unsigned char hub6;
 	unsigned char io_type;
 	unsigned char __iomem *iomem_base;
 	unsigned short iomem_reg_shift;
-	unsigned long irqflags;
 };
 
 struct serial8250_config {
@@ -131,6 +131,47 @@ void serial8250_rpm_put(struct uart_8250_port *p);
 int serial8250_em485_init(struct uart_8250_port *p);
 void serial8250_em485_destroy(struct uart_8250_port *p);
 
+static inline void serial8250_out_MCR(struct uart_8250_port *up, int value)
+{
+	int mctrl_gpio = 0;
+
+	serial_out(up, UART_MCR, value);
+
+	if (value & UART_MCR_RTS)
+		mctrl_gpio |= TIOCM_RTS;
+	if (value & UART_MCR_DTR)
+		mctrl_gpio |= TIOCM_DTR;
+
+	mctrl_gpio_set(up->gpios, mctrl_gpio);
+}
+
+static inline int serial8250_in_MCR(struct uart_8250_port *up)
+{
+	int mctrl, mctrl_gpio = 0;
+
+	mctrl = serial_in(up, UART_MCR);
+
+	/* save current MCR values */
+	if (mctrl & UART_MCR_RTS)
+		mctrl_gpio |= TIOCM_RTS;
+	if (mctrl & UART_MCR_DTR)
+		mctrl_gpio |= TIOCM_DTR;
+
+	mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio);
+
+	if (mctrl_gpio & TIOCM_RTS)
+		mctrl |= UART_MCR_RTS;
+	else
+		mctrl &= ~UART_MCR_RTS;
+
+	if (mctrl_gpio & TIOCM_DTR)
+		mctrl |= UART_MCR_DTR;
+	else
+		mctrl &= ~UART_MCR_DTR;
+
+	return mctrl;
+}
+
 #if defined(__alpha__) && !defined(CONFIG_PCI)
 /*
  * Digital did something really horribly wrong with the OUT1 and OUT2
@@ -237,9 +278,3 @@ static inline int serial_index(struct uart_port *port)
 {
 	return port->minor - 64;
 }
-
-#if 0
-#define DEBUG_INTR(fmt...)	printk(fmt)
-#else
-#define DEBUG_INTR(fmt...)	do { } while (0)
-#endif
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 0fbd7c033a25..13ad5c3d2e68 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -114,7 +114,7 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
 	struct list_head *l, *end = NULL;
 	int pass_counter = 0, handled = 0;
 
-	DEBUG_INTR("serial8250_interrupt(%d)...", irq);
+	pr_debug("%s(%d): start\n", __func__, irq);
 
 	spin_lock(&i->lock);
 
@@ -144,7 +144,7 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
 
 	spin_unlock(&i->lock);
 
-	DEBUG_INTR("end.\n");
+	pr_debug("%s(%d): end\n", __func__, irq);
 
 	return IRQ_RETVAL(handled);
 }
@@ -546,10 +546,10 @@ static void __init serial8250_isa_init_ports(void)
 
 		port->iobase   = old_serial_port[i].port;
 		port->irq      = irq_canonicalize(old_serial_port[i].irq);
-		port->irqflags = old_serial_port[i].irqflags;
+		port->irqflags = 0;
 		port->uartclk  = old_serial_port[i].baud_base * 16;
 		port->flags    = old_serial_port[i].flags;
-		port->hub6     = old_serial_port[i].hub6;
+		port->hub6     = 0;
 		port->membase  = old_serial_port[i].iomem_base;
 		port->iotype   = old_serial_port[i].io_type;
 		port->regshift = old_serial_port[i].iomem_reg_shift;
@@ -675,7 +675,7 @@ static struct console univ8250_console = {
 	.device		= uart_console_device,
 	.setup		= univ8250_console_setup,
 	.match		= univ8250_console_match,
-	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
+	.flags		= CON_PRINTBUFFER | CON_ANYTIME | CON_CONSDEV,
 	.index		= -1,
 	.data		= &serial8250_reg,
 };
@@ -974,6 +974,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 
 	uart = serial8250_find_match_or_unused(&up->port);
 	if (uart && uart->port.type != PORT_8250_CIR) {
+		struct mctrl_gpios *gpios;
+
 		if (uart->port.dev)
 			uart_remove_one_port(&serial8250_reg, &uart->port);
 
@@ -1011,6 +1013,13 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		if (up->port.flags & UPF_FIXED_TYPE)
 			uart->port.type = up->port.type;
 
+		gpios = mctrl_gpio_init(&uart->port, 0);
+		if (IS_ERR(gpios)) {
+			if (PTR_ERR(gpios) != -ENOSYS)
+				return PTR_ERR(gpios);
+		} else
+			uart->gpios = gpios;
+
 		serial8250_set_defaults(uart);
 
 		/* Possibly override default I/O functions.  */
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 7f33d1c8d1a9..3590d012001f 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -145,6 +145,7 @@ void serial8250_rx_dma_flush(struct uart_8250_port *p)
 		dmaengine_terminate_all(dma->rxchan);
 	}
 }
+EXPORT_SYMBOL_GPL(serial8250_rx_dma_flush);
 
 int serial8250_request_dma(struct uart_8250_port *p)
 {
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 8d08ff5c4e34..85a12f032402 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -150,6 +150,7 @@ EARLYCON_DECLARE(uart, early_serial8250_setup);
 OF_EARLYCON_DECLARE(ns16550, "ns16550", early_serial8250_setup);
 OF_EARLYCON_DECLARE(ns16550a, "ns16550a", early_serial8250_setup);
 OF_EARLYCON_DECLARE(uart, "nvidia,tegra20-uart", early_serial8250_setup);
+OF_EARLYCON_DECLARE(uart, "snps,dw-apb-uart", early_serial8250_setup);
 
 #ifdef CONFIG_SERIAL_8250_OMAP
 
diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c
index 870981dd9e39..737b4b3957b0 100644
--- a/drivers/tty/serial/8250/8250_fintek.c
+++ b/drivers/tty/serial/8250/8250_fintek.c
@@ -13,6 +13,7 @@
 #include <linux/pnp.h>
 #include <linux/kernel.h>
 #include <linux/serial_core.h>
+#include <linux/irq.h>
 #include  "8250.h"
 
 #define ADDR_PORT 0
@@ -30,6 +31,12 @@
 #define IO_ADDR2 0x60
 #define LDN 0x7
 
+#define IRQ_MODE	0x70
+#define IRQ_SHARE	BIT(4)
+#define IRQ_MODE_MASK	(BIT(6) | BIT(5))
+#define IRQ_LEVEL_LOW	0
+#define IRQ_EDGE_HIGH	BIT(5)
+
 #define RS485  0xF0
 #define RTS_INVERT BIT(5)
 #define RS485_URA BIT(4)
@@ -176,10 +183,37 @@ static int find_base_port(struct fintek_8250 *pdata, u16 io_address)
 	return -ENODEV;
 }
 
+static int fintek_8250_set_irq_mode(struct fintek_8250 *pdata, bool level_mode)
+{
+	int status;
+	u8 tmp;
+
+	status = fintek_8250_enter_key(pdata->base_port, pdata->key);
+	if (status)
+		return status;
+
+	outb(LDN, pdata->base_port + ADDR_PORT);
+	outb(pdata->index, pdata->base_port + DATA_PORT);
+
+	outb(IRQ_MODE, pdata->base_port + ADDR_PORT);
+	tmp = inb(pdata->base_port + DATA_PORT);
+
+	tmp &= ~IRQ_MODE_MASK;
+	tmp |= IRQ_SHARE;
+	if (!level_mode)
+		tmp |= IRQ_EDGE_HIGH;
+
+	outb(tmp, pdata->base_port + DATA_PORT);
+	fintek_8250_exit_key(pdata->base_port);
+	return 0;
+}
+
 int fintek_8250_probe(struct uart_8250_port *uart)
 {
 	struct fintek_8250 *pdata;
 	struct fintek_8250 probe_data;
+	struct irq_data *irq_data = irq_get_irq_data(uart->port.irq);
+	bool level_mode = irqd_is_level_type(irq_data);
 
 	if (find_base_port(&probe_data, uart->port.iobase))
 		return -ENODEV;
@@ -192,5 +226,5 @@ int fintek_8250_probe(struct uart_8250_port *uart)
 	uart->port.rs485_config = fintek_8250_rs485_config;
 	uart->port.private_data = pdata;
 
-	return 0;
+	return fintek_8250_set_irq_mode(pdata, level_mode);
 }
diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c
index b0677f610863..4d9dc10e265c 100644
--- a/drivers/tty/serial/8250/8250_ingenic.c
+++ b/drivers/tty/serial/8250/8250_ingenic.c
@@ -48,7 +48,6 @@ static const struct of_device_id of_match[];
 #define UART_MCR_MDCE	BIT(7)
 #define UART_MCR_FCM	BIT(6)
 
-#if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE)
 static struct earlycon_device *early_device;
 
 static uint8_t __init early_in(struct uart_port *port, int offset)
@@ -141,7 +140,6 @@ OF_EARLYCON_DECLARE(jz4775_uart, "ingenic,jz4775-uart",
 EARLYCON_DECLARE(jz4780_uart, ingenic_early_console_setup);
 OF_EARLYCON_DECLARE(jz4780_uart, "ingenic,jz4780-uart",
 		    ingenic_early_console_setup);
-#endif /* CONFIG_SERIAL_EARLYCON */
 
 static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
 {
diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c
index 86379a79a6a3..339de9cd0866 100644
--- a/drivers/tty/serial/8250/8250_mid.c
+++ b/drivers/tty/serial/8250/8250_mid.c
@@ -96,13 +96,27 @@ static int tng_setup(struct mid8250 *mid, struct uart_port *p)
 static int dnv_handle_irq(struct uart_port *p)
 {
 	struct mid8250 *mid = p->private_data;
+	struct uart_8250_port *up = up_to_u8250p(p);
 	unsigned int fisr = serial_port_in(p, INTEL_MID_UART_DNV_FISR);
+	u32 status;
 	int ret = IRQ_NONE;
-
-	if (fisr & BIT(2))
-		ret |= hsu_dma_irq(&mid->dma_chip, 1);
-	if (fisr & BIT(1))
-		ret |= hsu_dma_irq(&mid->dma_chip, 0);
+	int err;
+
+	if (fisr & BIT(2)) {
+		err = hsu_dma_get_status(&mid->dma_chip, 1, &status);
+		if (err > 0) {
+			serial8250_rx_dma_flush(up);
+			ret |= IRQ_HANDLED;
+		} else if (err == 0)
+			ret |= hsu_dma_do_irq(&mid->dma_chip, 1, status);
+	}
+	if (fisr & BIT(1)) {
+		err = hsu_dma_get_status(&mid->dma_chip, 0, &status);
+		if (err > 0)
+			ret |= IRQ_HANDLED;
+		else if (err == 0)
+			ret |= hsu_dma_do_irq(&mid->dma_chip, 0, status);
+	}
 	if (fisr & BIT(0))
 		ret |= serial8250_handle_irq(p, serial_port_in(p, UART_IIR));
 	return ret;
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index 3489fbcb7313..3611ec9bb4fa 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -301,7 +301,7 @@ static struct platform_driver mtk8250_platform_driver = {
 };
 module_platform_driver(mtk8250_platform_driver);
 
-#if defined(CONFIG_SERIAL_8250_CONSOLE) && !defined(MODULE)
+#ifdef CONFIG_SERIAL_8250_CONSOLE
 static int __init early_mtk8250_setup(struct earlycon_device *device,
 					const char *options)
 {
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 2c44c792d586..e14982f36a04 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -134,18 +134,21 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
 	serial8250_do_set_mctrl(port, mctrl);
 
-	/*
-	 * Turn off autoRTS if RTS is lowered and restore autoRTS setting
-	 * if RTS is raised
-	 */
-	lcr = serial_in(up, UART_LCR);
-	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-	if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
-		priv->efr |= UART_EFR_RTS;
-	else
-		priv->efr &= ~UART_EFR_RTS;
-	serial_out(up, UART_EFR, priv->efr);
-	serial_out(up, UART_LCR, lcr);
+	if (IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios,
+						UART_GPIO_RTS))) {
+		/*
+		 * Turn off autoRTS if RTS is lowered and restore autoRTS
+		 * setting if RTS is raised
+		 */
+		lcr = serial_in(up, UART_LCR);
+		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+		if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
+			priv->efr |= UART_EFR_RTS;
+		else
+			priv->efr &= ~UART_EFR_RTS;
+		serial_out(up, UART_EFR, priv->efr);
+		serial_out(up, UART_LCR, lcr);
+	}
 }
 
 /*
@@ -280,7 +283,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
 	serial_out(up, UART_EFR, UART_EFR_ECB);
 
 	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
-	serial_out(up, UART_MCR, UART_MCR_TCRTLR);
+	serial8250_out_MCR(up, UART_MCR_TCRTLR);
 	serial_out(up, UART_FCR, up->fcr);
 
 	omap8250_update_scr(up, priv);
@@ -296,7 +299,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
 	serial_out(up, UART_LCR, 0);
 
 	/* drop TCR + TLR access, we setup XON/XOFF later */
-	serial_out(up, UART_MCR, up->mcr);
+	serial8250_out_MCR(up, up->mcr);
 	serial_out(up, UART_IER, up->ier);
 
 	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
@@ -446,7 +449,9 @@ static void omap_8250_set_termios(struct uart_port *port,
 	priv->efr = 0;
 	up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF);
 
-	if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW) {
+	if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW
+		&& IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios,
+							UART_GPIO_RTS))) {
 		/* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */
 		up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS;
 		priv->efr |= UART_EFR_CTS;
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 8dd250fbd367..20ebaea5c414 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1136,11 +1136,11 @@ static int pci_quatech_rqopr(struct uart_8250_port *port)
 static void pci_quatech_wqopr(struct uart_8250_port *port, u8 qopr)
 {
 	unsigned long base = port->port.iobase;
-	u8 LCR, val;
+	u8 LCR;
 
 	LCR = inb(base + UART_LCR);
 	outb(0xBF, base + UART_LCR);
-	val = inb(base + UART_SCR);
+	inb(base + UART_SCR);
 	outb(qopr, base + UART_SCR);
 	outb(LCR, base + UART_LCR);
 }
@@ -1864,6 +1864,16 @@ pci_wch_ch353_setup(struct serial_private *priv,
 	return pci_default_setup(priv, board, port, idx);
 }
 
+static int
+pci_wch_ch355_setup(struct serial_private *priv,
+		const struct pciserial_board *board,
+		struct uart_8250_port *port, int idx)
+{
+	port->port.flags |= UPF_FIXED_TYPE;
+	port->port.type = PORT_16550A;
+	return pci_default_setup(priv, board, port, idx);
+}
+
 static int
 pci_wch_ch38x_setup(struct serial_private *priv,
 		    const struct pciserial_board *board,
@@ -1915,6 +1925,7 @@ pci_wch_ch38x_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_WCH_CH353_2S1PF	0x5046
 #define PCI_DEVICE_ID_WCH_CH353_1S1P	0x5053
 #define PCI_DEVICE_ID_WCH_CH353_2S1P	0x7053
+#define PCI_DEVICE_ID_WCH_CH355_4S	0x7173
 #define PCI_VENDOR_ID_AGESTAR		0x5372
 #define PCI_DEVICE_ID_AGESTAR_9375	0x6872
 #define PCI_VENDOR_ID_ASIX		0x9710
@@ -2618,6 +2629,14 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.subdevice	= PCI_ANY_ID,
 		.setup		= pci_wch_ch353_setup,
 	},
+	/* WCH CH355 4S card (16550 clone) */
+	{
+		.vendor		= PCI_VENDOR_ID_WCH,
+		.device		= PCI_DEVICE_ID_WCH_CH355_4S,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_wch_ch355_setup,
+	},
 	/* WCH CH382 2S card (16850 clone) */
 	{
 		.vendor         = PCIE_VENDOR_ID_WCH,
@@ -3812,6 +3831,7 @@ static const struct pci_device_id blacklist[] = {
 	/* multi-io cards handled by parport_serial */
 	{ PCI_DEVICE(0x4348, 0x7053), }, /* WCH CH353 2S1P */
 	{ PCI_DEVICE(0x4348, 0x5053), }, /* WCH CH353 1S1P */
+	{ PCI_DEVICE(0x4348, 0x7173), }, /* WCH CH355 4S */
 	{ PCI_DEVICE(0x1c00, 0x3250), }, /* WCH CH382 2S1P */
 	{ PCI_DEVICE(0x1c00, 0x3470), }, /* WCH CH384 4S */
 
@@ -5567,6 +5587,10 @@ static struct pci_device_id serial_pci_tbl[] = {
 		PCI_ANY_ID, PCI_ANY_ID,
 		0, 0, pbn_b0_bt_2_115200 },
 
+	{	PCI_VENDOR_ID_WCH, PCI_DEVICE_ID_WCH_CH355_4S,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0, pbn_b0_bt_4_115200 },
+
 	{	PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH382_2S,
 		PCI_ANY_ID, PCI_ANY_ID,
 		0, 0, pbn_wch382_2 },
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index d4036038a4dd..7481b95c6d84 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -527,13 +527,13 @@ static void serial8250_clear_fifos(struct uart_8250_port *p)
 
 static inline void serial8250_em485_rts_after_send(struct uart_8250_port *p)
 {
-	unsigned char mcr = serial_in(p, UART_MCR);
+	unsigned char mcr = serial8250_in_MCR(p);
 
 	if (p->port.rs485.flags & SER_RS485_RTS_AFTER_SEND)
 		mcr |= UART_MCR_RTS;
 	else
 		mcr &= ~UART_MCR_RTS;
-	serial_out(p, UART_MCR, mcr);
+	serial8250_out_MCR(p, mcr);
 }
 
 static void serial8250_em485_handle_start_tx(unsigned long arg);
@@ -785,10 +785,10 @@ static int size_fifo(struct uart_8250_port *up)
 	old_lcr = serial_in(up, UART_LCR);
 	serial_out(up, UART_LCR, 0);
 	old_fcr = serial_in(up, UART_FCR);
-	old_mcr = serial_in(up, UART_MCR);
+	old_mcr = serial8250_in_MCR(up);
 	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
 		    UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
-	serial_out(up, UART_MCR, UART_MCR_LOOP);
+	serial8250_out_MCR(up, UART_MCR_LOOP);
 	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	old_dl = serial_dl_read(up);
 	serial_dl_write(up, 0x0001);
@@ -800,7 +800,7 @@ static int size_fifo(struct uart_8250_port *up)
 	     (count < 256); count++)
 		serial_in(up, UART_RX);
 	serial_out(up, UART_FCR, old_fcr);
-	serial_out(up, UART_MCR, old_mcr);
+	serial8250_out_MCR(up, old_mcr);
 	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_dl_write(up, old_dl);
 	serial_out(up, UART_LCR, old_lcr);
@@ -1040,17 +1040,17 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 	 * it's changed. If so, set baud_base in EXCR2 to 921600. -- dwmw2
 	 */
 	serial_out(up, UART_LCR, 0);
-	status1 = serial_in(up, UART_MCR);
+	status1 = serial8250_in_MCR(up);
 	serial_out(up, UART_LCR, 0xE0);
 	status2 = serial_in(up, 0x02); /* EXCR1 */
 
 	if (!((status2 ^ status1) & UART_MCR_LOOP)) {
 		serial_out(up, UART_LCR, 0);
-		serial_out(up, UART_MCR, status1 ^ UART_MCR_LOOP);
+		serial8250_out_MCR(up, status1 ^ UART_MCR_LOOP);
 		serial_out(up, UART_LCR, 0xE0);
 		status2 = serial_in(up, 0x02); /* EXCR1 */
 		serial_out(up, UART_LCR, 0);
-		serial_out(up, UART_MCR, status1);
+		serial8250_out_MCR(up, status1);
 
 		if ((status2 ^ status1) & UART_MCR_LOOP) {
 			unsigned short quot;
@@ -1224,7 +1224,7 @@ static void autoconfig(struct uart_8250_port *up)
 		}
 	}
 
-	save_mcr = serial_in(up, UART_MCR);
+	save_mcr = serial8250_in_MCR(up);
 	save_lcr = serial_in(up, UART_LCR);
 
 	/*
@@ -1237,9 +1237,9 @@ static void autoconfig(struct uart_8250_port *up)
 	 * that conflicts with COM 1-4 --- we hope!
 	 */
 	if (!(port->flags & UPF_SKIP_TEST)) {
-		serial_out(up, UART_MCR, UART_MCR_LOOP | 0x0A);
+		serial8250_out_MCR(up, UART_MCR_LOOP | 0x0A);
 		status1 = serial_in(up, UART_MSR) & 0xF0;
-		serial_out(up, UART_MCR, save_mcr);
+		serial8250_out_MCR(up, save_mcr);
 		if (status1 != 0x90) {
 			spin_unlock_irqrestore(&port->lock, flags);
 			DEBUG_AUTOCONF("LOOP test failed (%02x) ",
@@ -1305,7 +1305,7 @@ static void autoconfig(struct uart_8250_port *up)
 	if (port->type == PORT_RSA)
 		serial_out(up, UART_RSA_FRR, 0);
 #endif
-	serial_out(up, UART_MCR, save_mcr);
+	serial8250_out_MCR(up, save_mcr);
 	serial8250_clear_fifos(up);
 	serial_in(up, UART_RX);
 	if (up->capabilities & UART_CAP_UUE)
@@ -1353,19 +1353,18 @@ static void autoconfig_irq(struct uart_8250_port *up)
 
 	/* forget possible initially masked and pending IRQ */
 	probe_irq_off(probe_irq_on());
-	save_mcr = serial_in(up, UART_MCR);
+	save_mcr = serial8250_in_MCR(up);
 	save_ier = serial_in(up, UART_IER);
-	serial_out(up, UART_MCR, UART_MCR_OUT1 | UART_MCR_OUT2);
+	serial8250_out_MCR(up, UART_MCR_OUT1 | UART_MCR_OUT2);
 
 	irqs = probe_irq_on();
-	serial_out(up, UART_MCR, 0);
+	serial8250_out_MCR(up, 0);
 	udelay(10);
 	if (port->flags & UPF_FOURPORT) {
-		serial_out(up, UART_MCR,
-			    UART_MCR_DTR | UART_MCR_RTS);
+		serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
 	} else {
-		serial_out(up, UART_MCR,
-			    UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2);
+		serial8250_out_MCR(up,
+			UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2);
 	}
 	serial_out(up, UART_IER, 0x0f);	/* enable all intrs */
 	serial_in(up, UART_LSR);
@@ -1376,7 +1375,7 @@ static void autoconfig_irq(struct uart_8250_port *up)
 	udelay(20);
 	irq = probe_irq_off(irqs);
 
-	serial_out(up, UART_MCR, save_mcr);
+	serial8250_out_MCR(up, save_mcr);
 	serial_out(up, UART_IER, save_ier);
 
 	if (port->flags & UPF_FOURPORT)
@@ -1549,14 +1548,14 @@ static inline void start_tx_rs485(struct uart_port *port)
 	del_timer(&em485->stop_tx_timer);
 	em485->active_timer = NULL;
 
-	mcr = serial_in(up, UART_MCR);
+	mcr = serial8250_in_MCR(up);
 	if (!!(up->port.rs485.flags & SER_RS485_RTS_ON_SEND) !=
 	    !!(mcr & UART_MCR_RTS)) {
 		if (up->port.rs485.flags & SER_RS485_RTS_ON_SEND)
 			mcr |= UART_MCR_RTS;
 		else
 			mcr &= ~UART_MCR_RTS;
-		serial_out(up, UART_MCR, mcr);
+		serial8250_out_MCR(up, mcr);
 
 		if (up->port.rs485.delay_rts_before_send > 0) {
 			em485->active_timer = &em485->start_tx_timer;
@@ -1619,6 +1618,8 @@ static void serial8250_disable_ms(struct uart_port *port)
 	if (up->bugs & UART_BUG_NOMSR)
 		return;
 
+	mctrl_gpio_disable_ms(up->gpios);
+
 	up->ier &= ~UART_IER_MSI;
 	serial_port_out(port, UART_IER, up->ier);
 }
@@ -1631,6 +1632,8 @@ static void serial8250_enable_ms(struct uart_port *port)
 	if (up->bugs & UART_BUG_NOMSR)
 		return;
 
+	mctrl_gpio_enable_ms(up->gpios);
+
 	up->ier |= UART_IER_MSI;
 
 	serial8250_rpm_get(up);
@@ -1686,7 +1689,7 @@ static void serial8250_read_char(struct uart_8250_port *up, unsigned char lsr)
 		lsr &= port->read_status_mask;
 
 		if (lsr & UART_LSR_BI) {
-			DEBUG_INTR("handling break....");
+			pr_debug("%s: handling break\n", __func__);
 			flag = TTY_BREAK;
 		} else if (lsr & UART_LSR_PE)
 			flag = TTY_PARITY;
@@ -1757,7 +1760,7 @@ void serial8250_tx_chars(struct uart_8250_port *up)
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(port);
 
-	DEBUG_INTR("THRE...");
+	pr_debug("%s: THRE\n", __func__);
 
 	/*
 	 * With RPM enabled, we have to wait until the FIFO is empty before the
@@ -1823,7 +1826,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
 
 	status = serial_port_in(port, UART_LSR);
 
-	DEBUG_INTR("status = %x...", status);
+	pr_debug("%s: status = %x\n", __func__, status);
 
 	if (status & (UART_LSR_DR | UART_LSR_BI)) {
 		if (!up->dma || handle_rx_dma(up, iir))
@@ -1861,7 +1864,6 @@ static int serial8250_default_handle_irq(struct uart_port *port)
  */
 static int exar_handle_irq(struct uart_port *port)
 {
-	unsigned char int0, int1, int2, int3;
 	unsigned int iir = serial_port_in(port, UART_IIR);
 	int ret;
 
@@ -1869,10 +1871,10 @@ static int exar_handle_irq(struct uart_port *port)
 
 	if ((port->type == PORT_XR17V35X) ||
 	   (port->type == PORT_XR17D15X)) {
-		int0 = serial_port_in(port, 0x80);
-		int1 = serial_port_in(port, 0x81);
-		int2 = serial_port_in(port, 0x82);
-		int3 = serial_port_in(port, 0x83);
+		serial_port_in(port, 0x80);
+		serial_port_in(port, 0x81);
+		serial_port_in(port, 0x82);
+		serial_port_in(port, 0x83);
 	}
 
 	return ret;
@@ -1915,7 +1917,8 @@ unsigned int serial8250_do_get_mctrl(struct uart_port *port)
 		ret |= TIOCM_DSR;
 	if (status & UART_MSR_CTS)
 		ret |= TIOCM_CTS;
-	return ret;
+
+	return mctrl_gpio_get(up->gpios, &ret);
 }
 EXPORT_SYMBOL_GPL(serial8250_do_get_mctrl);
 
@@ -1944,7 +1947,7 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
 	mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr;
 
-	serial_port_out(port, UART_MCR, mcr);
+	serial8250_out_MCR(up, mcr);
 }
 EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl);
 
@@ -1994,8 +1997,6 @@ static void wait_for_xmitr(struct uart_8250_port *up, int bits)
 
 	/* Wait up to 1s for flow control if necessary */
 	if (up->port.flags & UPF_CONS_FLOW) {
-		unsigned int tmout;
-
 		for (tmout = 1000000; tmout; tmout--) {
 			unsigned int msr = serial_in(up, UART_MSR);
 			up->msr_saved_flags |= msr & MSR_SAVE_FLAGS;
@@ -3093,7 +3094,7 @@ static void serial8250_console_restore(struct uart_8250_port *up)
 
 	serial8250_set_divisor(port, baud, quot, frac);
 	serial_port_out(port, UART_LCR, up->lcr);
-	serial_port_out(port, UART_MCR, UART_MCR_DTR | UART_MCR_RTS);
+	serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
 }
 
 /*
diff --git a/drivers/tty/serial/8250/8250_uniphier.c b/drivers/tty/serial/8250/8250_uniphier.c
index efd1f9c047b1..b8d9c8c9d02a 100644
--- a/drivers/tty/serial/8250/8250_uniphier.c
+++ b/drivers/tty/serial/8250/8250_uniphier.c
@@ -35,7 +35,7 @@ struct uniphier8250_priv {
 	spinlock_t atomic_write_lock;
 };
 
-#if defined(CONFIG_SERIAL_8250_CONSOLE) && !defined(MODULE)
+#ifdef CONFIG_SERIAL_8250_CONSOLE
 static int __init uniphier_early_console_setup(struct earlycon_device *device,
 					       const char *options)
 {
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index e46761d20f7b..c9ec839a5ddf 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -6,6 +6,7 @@
 config SERIAL_8250
 	tristate "8250/16550 and compatible serial support"
 	select SERIAL_CORE
+	select SERIAL_MCTRL_GPIO if GPIOLIB
 	---help---
 	  This selects whether you want to include the driver for the standard
 	  serial ports.  The standard answer is Y.  People who might say N
@@ -387,7 +388,8 @@ config SERIAL_8250_MT6577
 
 config SERIAL_8250_UNIPHIER
 	tristate "Support for UniPhier on-chip UART"
-	depends on SERIAL_8250 && ARCH_UNIPHIER
+	depends on SERIAL_8250
+	depends on ARCH_UNIPHIER || COMPILE_TEST
 	help
 	  If you have a UniPhier based board and want to use the on-chip
 	  serial ports, say Y to this option. If unsure, say N.
@@ -395,7 +397,7 @@ config SERIAL_8250_UNIPHIER
 config SERIAL_8250_INGENIC
 	tristate "Support for Ingenic SoC serial ports"
 	depends on SERIAL_8250
-	depends on (OF_FLATTREE && SERIAL_8250_CONSOLE) || !SERIAL_EARLYCON
+	depends on OF_FLATTREE
 	depends on MIPS || COMPILE_TEST
 	help
 	  If you have a system using an Ingenic SoC and wish to make use of
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 7e3a58c8bb67..518db24a5b36 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -736,6 +736,7 @@ config SERIAL_SH_SCI
 	tristate "SuperH SCI(F) serial port support"
 	depends on SUPERH || ARCH_RENESAS || H8300 || COMPILE_TEST
 	select SERIAL_CORE
+	select SERIAL_MCTRL_GPIO if GPIOLIB
 
 config SERIAL_SH_SCI_NR_UARTS
 	int "Maximum number of SCI(F) serial ports"
@@ -1477,7 +1478,7 @@ config SERIAL_MPS2_UART_CONSOLE
 
 config SERIAL_MPS2_UART
 	bool "MPS2 UART port"
-	depends on ARM || COMPILE_TEST
+	depends on ARCH_MPS2 || COMPILE_TEST
 	select SERIAL_CORE
 	help
 	  This driver support the UART ports on ARM MPS2.
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 1b7331e40d79..8a9e213387a7 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2553,11 +2553,17 @@ static int sbsa_uart_probe(struct platform_device *pdev)
 	if (!uap)
 		return -ENOMEM;
 
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cannot obtain irq\n");
+		return ret;
+	}
+	uap->port.irq	= ret;
+
 	uap->reg_offset	= vendor_sbsa.reg_offset;
 	uap->vendor	= &vendor_sbsa;
 	uap->fifosize	= 32;
 	uap->port.iotype = vendor_sbsa.access_32b ? UPIO_MEM32 : UPIO_MEM;
-	uap->port.irq	= platform_get_irq(pdev, 0);
 	uap->port.ops	= &sbsa_uart_pops;
 	uap->fixed_baud = baudrate;
 
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 954941dd8124..2eaa18ddef61 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -108,6 +108,12 @@ struct atmel_uart_char {
 	u16		ch;
 };
 
+/*
+ * Be careful, the real size of the ring buffer is
+ * sizeof(atmel_uart_char) * ATMEL_SERIAL_RINGSIZE. It means that ring buffer
+ * can contain up to 1024 characters in PIO mode and up to 4096 characters in
+ * DMA mode.
+ */
 #define ATMEL_SERIAL_RINGSIZE 1024
 
 /*
@@ -145,10 +151,10 @@ struct atmel_uart_port {
 	dma_cookie_t			cookie_rx;
 	struct scatterlist		sg_tx;
 	struct scatterlist		sg_rx;
-	struct tasklet_struct	tasklet;
-	unsigned int		irq_status;
+	struct tasklet_struct	tasklet_rx;
+	struct tasklet_struct	tasklet_tx;
+	atomic_t		tasklet_shutdown;
 	unsigned int		irq_status_prev;
-	unsigned int		status_change;
 	unsigned int		tx_len;
 
 	struct circ_buf		rx_ring;
@@ -281,6 +287,13 @@ static bool atmel_use_fifo(struct uart_port *port)
 	return atmel_port->fifo_size;
 }
 
+static void atmel_tasklet_schedule(struct atmel_uart_port *atmel_port,
+				   struct tasklet_struct *t)
+{
+	if (!atomic_read(&atmel_port->tasklet_shutdown))
+		tasklet_schedule(t);
+}
+
 static unsigned int atmel_get_lines_status(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
@@ -482,19 +495,21 @@ static void atmel_start_tx(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 
-	if (atmel_use_pdc_tx(port)) {
-		if (atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN)
-			/* The transmitter is already running.  Yes, we
-			   really need this.*/
-			return;
+	if (atmel_use_pdc_tx(port) && (atmel_uart_readl(port, ATMEL_PDC_PTSR)
+				       & ATMEL_PDC_TXTEN))
+		/* The transmitter is already running.  Yes, we
+		   really need this.*/
+		return;
 
+	if (atmel_use_pdc_tx(port) || atmel_use_dma_tx(port))
 		if ((port->rs485.flags & SER_RS485_ENABLED) &&
 		    !(port->rs485.flags & SER_RS485_RX_DURING_TX))
 			atmel_stop_rx(port);
 
+	if (atmel_use_pdc_tx(port))
 		/* re-enable PDC transmit */
 		atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTEN);
-	}
+
 	/* Enable interrupts */
 	atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask);
 }
@@ -710,7 +725,7 @@ static void atmel_rx_chars(struct uart_port *port)
 		status = atmel_uart_readl(port, ATMEL_US_CSR);
 	}
 
-	tasklet_schedule(&atmel_port->tasklet);
+	atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_rx);
 }
 
 /*
@@ -781,7 +796,7 @@ static void atmel_complete_tx_dma(void *arg)
 	 * remaining data from the beginning of xmit->buf to xmit->head.
 	 */
 	if (!uart_circ_empty(xmit))
-		tasklet_schedule(&atmel_port->tasklet);
+		atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx);
 
 	spin_unlock_irqrestore(&port->lock, flags);
 }
@@ -966,7 +981,7 @@ static void atmel_complete_rx_dma(void *arg)
 	struct uart_port *port = arg;
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 
-	tasklet_schedule(&atmel_port->tasklet);
+	atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_rx);
 }
 
 static void atmel_release_rx_dma(struct uart_port *port)
@@ -1006,7 +1021,7 @@ static void atmel_rx_from_dma(struct uart_port *port)
 	if (dmastat == DMA_ERROR) {
 		dev_dbg(port->dev, "Get residue error, restart tasklet\n");
 		atmel_uart_writel(port, ATMEL_US_IER, ATMEL_US_TIMEOUT);
-		tasklet_schedule(&atmel_port->tasklet);
+		atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_rx);
 		return;
 	}
 
@@ -1160,8 +1175,11 @@ static void atmel_uart_timer_callback(unsigned long data)
 	struct uart_port *port = (void *)data;
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 
-	tasklet_schedule(&atmel_port->tasklet);
-	mod_timer(&atmel_port->uart_timer, jiffies + uart_poll_timeout(port));
+	if (!atomic_read(&atmel_port->tasklet_shutdown)) {
+		tasklet_schedule(&atmel_port->tasklet_rx);
+		mod_timer(&atmel_port->uart_timer,
+			  jiffies + uart_poll_timeout(port));
+	}
 }
 
 /*
@@ -1183,7 +1201,8 @@ atmel_handle_receive(struct uart_port *port, unsigned int pending)
 		if (pending & (ATMEL_US_ENDRX | ATMEL_US_TIMEOUT)) {
 			atmel_uart_writel(port, ATMEL_US_IDR,
 					  (ATMEL_US_ENDRX | ATMEL_US_TIMEOUT));
-			tasklet_schedule(&atmel_port->tasklet);
+			atmel_tasklet_schedule(atmel_port,
+					       &atmel_port->tasklet_rx);
 		}
 
 		if (pending & (ATMEL_US_RXBRK | ATMEL_US_OVRE |
@@ -1195,7 +1214,8 @@ atmel_handle_receive(struct uart_port *port, unsigned int pending)
 		if (pending & ATMEL_US_TIMEOUT) {
 			atmel_uart_writel(port, ATMEL_US_IDR,
 					  ATMEL_US_TIMEOUT);
-			tasklet_schedule(&atmel_port->tasklet);
+			atmel_tasklet_schedule(atmel_port,
+					       &atmel_port->tasklet_rx);
 		}
 	}
 
@@ -1225,7 +1245,7 @@ atmel_handle_transmit(struct uart_port *port, unsigned int pending)
 		/* Either PDC or interrupt transmission */
 		atmel_uart_writel(port, ATMEL_US_IDR,
 				  atmel_port->tx_done_mask);
-		tasklet_schedule(&atmel_port->tasklet);
+		atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx);
 	}
 }
 
@@ -1237,14 +1257,27 @@ atmel_handle_status(struct uart_port *port, unsigned int pending,
 		    unsigned int status)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+	unsigned int status_change;
 
 	if (pending & (ATMEL_US_RIIC | ATMEL_US_DSRIC | ATMEL_US_DCDIC
 				| ATMEL_US_CTSIC)) {
-		atmel_port->irq_status = status;
-		atmel_port->status_change = atmel_port->irq_status ^
-					    atmel_port->irq_status_prev;
+		status_change = status ^ atmel_port->irq_status_prev;
 		atmel_port->irq_status_prev = status;
-		tasklet_schedule(&atmel_port->tasklet);
+
+		if (status_change & (ATMEL_US_RI | ATMEL_US_DSR
+					| ATMEL_US_DCD | ATMEL_US_CTS)) {
+			/* TODO: All reads to CSR will clear these interrupts! */
+			if (status_change & ATMEL_US_RI)
+				port->icount.rng++;
+			if (status_change & ATMEL_US_DSR)
+				port->icount.dsr++;
+			if (status_change & ATMEL_US_DCD)
+				uart_handle_dcd_change(port, !(status & ATMEL_US_DCD));
+			if (status_change & ATMEL_US_CTS)
+				uart_handle_cts_change(port, !(status & ATMEL_US_CTS));
+
+			wake_up_interruptible(&port->state->port.delta_msr_wait);
+		}
 	}
 }
 
@@ -1571,37 +1604,25 @@ static int atmel_prepare_rx_pdc(struct uart_port *port)
 /*
  * tasklet handling tty stuff outside the interrupt handler.
  */
-static void atmel_tasklet_func(unsigned long data)
+static void atmel_tasklet_rx_func(unsigned long data)
 {
 	struct uart_port *port = (struct uart_port *)data;
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	unsigned int status = atmel_port->irq_status;
-	unsigned int status_change = atmel_port->status_change;
 
 	/* The interrupt handler does not take the lock */
 	spin_lock(&port->lock);
-
-	atmel_port->schedule_tx(port);
-
-	if (status_change & (ATMEL_US_RI | ATMEL_US_DSR
-				| ATMEL_US_DCD | ATMEL_US_CTS)) {
-		/* TODO: All reads to CSR will clear these interrupts! */
-		if (status_change & ATMEL_US_RI)
-			port->icount.rng++;
-		if (status_change & ATMEL_US_DSR)
-			port->icount.dsr++;
-		if (status_change & ATMEL_US_DCD)
-			uart_handle_dcd_change(port, !(status & ATMEL_US_DCD));
-		if (status_change & ATMEL_US_CTS)
-			uart_handle_cts_change(port, !(status & ATMEL_US_CTS));
-
-		wake_up_interruptible(&port->state->port.delta_msr_wait);
-
-		atmel_port->status_change = 0;
-	}
-
 	atmel_port->schedule_rx(port);
+	spin_unlock(&port->lock);
+}
+
+static void atmel_tasklet_tx_func(unsigned long data)
+{
+	struct uart_port *port = (struct uart_port *)data;
+	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 
+	/* The interrupt handler does not take the lock */
+	spin_lock(&port->lock);
+	atmel_port->schedule_tx(port);
 	spin_unlock(&port->lock);
 }
 
@@ -1785,7 +1806,11 @@ static int atmel_startup(struct uart_port *port)
 		return retval;
 	}
 
-	tasklet_enable(&atmel_port->tasklet);
+	atomic_set(&atmel_port->tasklet_shutdown, 0);
+	tasklet_init(&atmel_port->tasklet_rx, atmel_tasklet_rx_func,
+			(unsigned long)port);
+	tasklet_init(&atmel_port->tasklet_tx, atmel_tasklet_tx_func,
+			(unsigned long)port);
 
 	/*
 	 * Initialize DMA (if necessary)
@@ -1833,7 +1858,6 @@ static int atmel_startup(struct uart_port *port)
 
 	/* Save current CSR for comparison in atmel_tasklet_func() */
 	atmel_port->irq_status_prev = atmel_get_lines_status(port);
-	atmel_port->irq_status = atmel_port->irq_status_prev;
 
 	/*
 	 * Finally, enable the serial port
@@ -1905,29 +1929,36 @@ static void atmel_shutdown(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 
+	/* Disable interrupts at device level */
+	atmel_uart_writel(port, ATMEL_US_IDR, -1);
+
+	/* Prevent spurious interrupts from scheduling the tasklet */
+	atomic_inc(&atmel_port->tasklet_shutdown);
+
 	/*
 	 * Prevent any tasklets being scheduled during
 	 * cleanup
 	 */
 	del_timer_sync(&atmel_port->uart_timer);
 
+	/* Make sure that no interrupt is on the fly */
+	synchronize_irq(port->irq);
+
 	/*
 	 * Clear out any scheduled tasklets before
 	 * we destroy the buffers
 	 */
-	tasklet_disable(&atmel_port->tasklet);
-	tasklet_kill(&atmel_port->tasklet);
+	tasklet_kill(&atmel_port->tasklet_rx);
+	tasklet_kill(&atmel_port->tasklet_tx);
 
 	/*
 	 * Ensure everything is stopped and
-	 * disable all interrupts, port and break condition.
+	 * disable port and break condition.
 	 */
 	atmel_stop_rx(port);
 	atmel_stop_tx(port);
 
 	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_RSTSTA);
-	atmel_uart_writel(port, ATMEL_US_IDR, -1);
-
 
 	/*
 	 * Shut-down the DMA.
@@ -2311,10 +2342,6 @@ static int atmel_init_port(struct atmel_uart_port *atmel_port,
 	port->irq	= pdev->resource[1].start;
 	port->rs485_config	= atmel_config_rs485;
 
-	tasklet_init(&atmel_port->tasklet, atmel_tasklet_func,
-			(unsigned long)port);
-	tasklet_disable(&atmel_port->tasklet);
-
 	memset(&atmel_port->rx_ring, 0, sizeof(atmel_port->rx_ring));
 
 	if (pdata && pdata->regs) {
@@ -2699,6 +2726,7 @@ static int atmel_serial_probe(struct platform_device *pdev)
 	atmel_port->uart.line = ret;
 	atmel_serial_probe_fifos(atmel_port, pdev);
 
+	atomic_set(&atmel_port->tasklet_shutdown, 0);
 	spin_lock_init(&atmel_port->lock_suspended);
 
 	ret = atmel_init_port(atmel_port, pdev);
@@ -2795,7 +2823,8 @@ static int atmel_serial_remove(struct platform_device *pdev)
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 	int ret = 0;
 
-	tasklet_kill(&atmel_port->tasklet);
+	tasklet_kill(&atmel_port->tasklet_rx);
+	tasklet_kill(&atmel_port->tasklet_tx);
 
 	device_init_wakeup(&pdev->dev, 0);
 
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index c28e5c24da16..5108fab953aa 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -813,8 +813,12 @@ static int bcm_uart_probe(struct platform_device *pdev)
 	struct clk *clk;
 	int ret;
 
-	if (pdev->dev.of_node)
-		pdev->id = of_alias_get_id(pdev->dev.of_node, "uart");
+	if (pdev->dev.of_node) {
+		pdev->id = of_alias_get_id(pdev->dev.of_node, "serial");
+
+		if (pdev->id < 0)
+			pdev->id = of_alias_get_id(pdev->dev.of_node, "uart");
+	}
 
 	if (pdev->id < 0 || pdev->id >= BCM63XX_NR_UARTS)
 		return -EINVAL;
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 3d790033744e..7f95f782a485 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -1830,7 +1830,13 @@ static int lpuart_probe(struct platform_device *pdev)
 	sport->port.dev = &pdev->dev;
 	sport->port.type = PORT_LPUART;
 	sport->port.iotype = UPIO_MEM;
-	sport->port.irq = platform_get_irq(pdev, 0);
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cannot obtain irq\n");
+		return ret;
+	}
+	sport->port.irq = ret;
+
 	if (sport->lpuart32)
 		sport->port.ops = &lpuart32_pops;
 	else
diff --git a/drivers/tty/serial/m32r_sio.c b/drivers/tty/serial/m32r_sio.c
index 68765f7c2645..218b7118e85d 100644
--- a/drivers/tty/serial/m32r_sio.c
+++ b/drivers/tty/serial/m32r_sio.c
@@ -30,7 +30,6 @@
 #define SUPPORT_SYSRQ
 #endif
 
-#include <linux/module.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/ioport.h>
@@ -51,9 +50,6 @@
 
 #define PASS_LIMIT	256
 
-/* Standard COM flags */
-#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST)
-
 static const struct {
 	unsigned int port;
 	unsigned int irq;
@@ -892,7 +888,7 @@ static void __init m32r_sio_init_ports(void)
 		up->port.iobase   = old_serial_port[i].port;
 		up->port.irq      = irq_canonicalize(old_serial_port[i].irq);
 		up->port.uartclk  = BAUD_RATE * 16;
-		up->port.flags    = STD_COM_FLAGS;
+		up->port.flags    = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
 		up->port.membase  = 0;
 		up->port.iotype   = 0;
 		up->port.regshift = 0;
@@ -1060,19 +1056,4 @@ static int __init m32r_sio_init(void)
 
 	return ret;
 }
-
-static void __exit m32r_sio_exit(void)
-{
-	int i;
-
-	for (i = 0; i < UART_NR; i++)
-		uart_remove_one_port(&m32r_sio_reg, &m32r_sio_ports[i].port);
-
-	uart_unregister_driver(&m32r_sio_reg);
-}
-
-module_init(m32r_sio_init);
-module_exit(m32r_sio_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Generic M32R SIO serial driver");
+device_initcall(m32r_sio_init);
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index 3f6e0ab725fe..9360801df3c4 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1,7 +1,7 @@
 /*
  *  Maxim (Dallas) MAX3107/8/9, MAX14830 serial driver
  *
- *  Copyright (C) 2012-2014 Alexander Shiyan <shc_work@mail.ru>
+ *  Copyright (C) 2012-2016 Alexander Shiyan <shc_work@mail.ru>
  *
  *  Based on max3100.c, by Christian Pellegrin <chripell@evolware.org>
  *  Based on max3110.c, by Feng Tang <feng.tang@intel.com>
@@ -32,6 +32,7 @@
 #define MAX310X_NAME			"max310x"
 #define MAX310X_MAJOR			204
 #define MAX310X_MINOR			209
+#define MAX310X_UART_NRMAX		16
 
 /* MAX310X register definitions */
 #define MAX310X_RHR_REG			(0x00) /* RX FIFO */
@@ -155,10 +156,6 @@
 #define MAX310X_LCR_FORCEPARITY_BIT	(1 << 5) /* 9-bit multidrop parity */
 #define MAX310X_LCR_TXBREAK_BIT		(1 << 6) /* TX break enable */
 #define MAX310X_LCR_RTS_BIT		(1 << 7) /* RTS pin control */
-#define MAX310X_LCR_WORD_LEN_5		(0x00)
-#define MAX310X_LCR_WORD_LEN_6		(0x01)
-#define MAX310X_LCR_WORD_LEN_7		(0x02)
-#define MAX310X_LCR_WORD_LEN_8		(0x03)
 
 /* IRDA register bits */
 #define MAX310X_IRDA_IRDAEN_BIT		(1 << 0) /* IRDA mode enable */
@@ -262,10 +259,10 @@ struct max310x_one {
 	struct uart_port	port;
 	struct work_struct	tx_work;
 	struct work_struct	md_work;
+	struct work_struct	rs_work;
 };
 
 struct max310x_port {
-	struct uart_driver	uart;
 	struct max310x_devtype	*devtype;
 	struct regmap		*regmap;
 	struct mutex		mutex;
@@ -276,6 +273,17 @@ struct max310x_port {
 	struct max310x_one	p[0];
 };
 
+static struct uart_driver max310x_uart = {
+	.owner		= THIS_MODULE,
+	.driver_name	= MAX310X_NAME,
+	.dev_name	= "ttyMAX",
+	.major		= MAX310X_MAJOR,
+	.minor		= MAX310X_MINOR,
+	.nr		= MAX310X_UART_NRMAX,
+};
+
+static DECLARE_BITMAP(max310x_lines, MAX310X_UART_NRMAX);
+
 static u8 max310x_port_read(struct uart_port *port, u8 reg)
 {
 	struct max310x_port *s = dev_get_drvdata(port->dev);
@@ -594,9 +602,7 @@ static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen)
 	unsigned int sts, ch, flag;
 
 	if (unlikely(rxlen >= port->fifosize)) {
-		dev_warn_ratelimited(port->dev,
-				     "Port %i: Possible RX FIFO overrun\n",
-				     port->line);
+		dev_warn_ratelimited(port->dev, "Possible RX FIFO overrun\n");
 		port->icount.buf_overrun++;
 		/* Ensure sanity of RX level */
 		rxlen = port->fifosize;
@@ -715,13 +721,13 @@ static irqreturn_t max310x_ist(int irq, void *dev_id)
 {
 	struct max310x_port *s = (struct max310x_port *)dev_id;
 
-	if (s->uart.nr > 1) {
+	if (s->devtype->nr > 1) {
 		do {
 			unsigned int val = ~0;
 
 			WARN_ON_ONCE(regmap_read(s->regmap,
 						 MAX310X_GLOBALIRQ_REG, &val));
-			val = ((1 << s->uart.nr) - 1) & ~val;
+			val = ((1 << s->devtype->nr) - 1) & ~val;
 			if (!val)
 				break;
 			max310x_port_irq(s, fls(val) - 1);
@@ -796,7 +802,7 @@ static void max310x_set_termios(struct uart_port *port,
 				struct ktermios *termios,
 				struct ktermios *old)
 {
-	unsigned int lcr, flow = 0;
+	unsigned int lcr = 0, flow = 0;
 	int baud;
 
 	/* Mask termios capabilities we don't support */
@@ -805,17 +811,16 @@ static void max310x_set_termios(struct uart_port *port,
 	/* Word size */
 	switch (termios->c_cflag & CSIZE) {
 	case CS5:
-		lcr = MAX310X_LCR_WORD_LEN_5;
 		break;
 	case CS6:
-		lcr = MAX310X_LCR_WORD_LEN_6;
+		lcr = MAX310X_LCR_LENGTH0_BIT;
 		break;
 	case CS7:
-		lcr = MAX310X_LCR_WORD_LEN_7;
+		lcr = MAX310X_LCR_LENGTH1_BIT;
 		break;
 	case CS8:
 	default:
-		lcr = MAX310X_LCR_WORD_LEN_8;
+		lcr = MAX310X_LCR_LENGTH1_BIT | MAX310X_LCR_LENGTH0_BIT;
 		break;
 	}
 
@@ -877,36 +882,45 @@ static void max310x_set_termios(struct uart_port *port,
 	uart_update_timeout(port, termios->c_cflag, baud);
 }
 
-static int max310x_rs485_config(struct uart_port *port,
-				struct serial_rs485 *rs485)
+static void max310x_rs_proc(struct work_struct *ws)
 {
+	struct max310x_one *one = container_of(ws, struct max310x_one, rs_work);
 	unsigned int val;
 
-	if (rs485->delay_rts_before_send > 0x0f ||
-		    rs485->delay_rts_after_send > 0x0f)
-		return -ERANGE;
+	val = (one->port.rs485.delay_rts_before_send << 4) |
+		one->port.rs485.delay_rts_after_send;
+	max310x_port_write(&one->port, MAX310X_HDPIXDELAY_REG, val);
 
-	val = (rs485->delay_rts_before_send << 4) |
-		rs485->delay_rts_after_send;
-	max310x_port_write(port, MAX310X_HDPIXDELAY_REG, val);
-	if (rs485->flags & SER_RS485_ENABLED) {
-		max310x_port_update(port, MAX310X_MODE1_REG,
+	if (one->port.rs485.flags & SER_RS485_ENABLED) {
+		max310x_port_update(&one->port, MAX310X_MODE1_REG,
 				MAX310X_MODE1_TRNSCVCTRL_BIT,
 				MAX310X_MODE1_TRNSCVCTRL_BIT);
-		max310x_port_update(port, MAX310X_MODE2_REG,
+		max310x_port_update(&one->port, MAX310X_MODE2_REG,
 				MAX310X_MODE2_ECHOSUPR_BIT,
 				MAX310X_MODE2_ECHOSUPR_BIT);
 	} else {
-		max310x_port_update(port, MAX310X_MODE1_REG,
+		max310x_port_update(&one->port, MAX310X_MODE1_REG,
 				MAX310X_MODE1_TRNSCVCTRL_BIT, 0);
-		max310x_port_update(port, MAX310X_MODE2_REG,
+		max310x_port_update(&one->port, MAX310X_MODE2_REG,
 				MAX310X_MODE2_ECHOSUPR_BIT, 0);
 	}
+}
+
+static int max310x_rs485_config(struct uart_port *port,
+				struct serial_rs485 *rs485)
+{
+	struct max310x_one *one = container_of(port, struct max310x_one, port);
+
+	if ((rs485->delay_rts_before_send > 0x0f) ||
+	    (rs485->delay_rts_after_send > 0x0f))
+		return -ERANGE;
 
 	rs485->flags &= SER_RS485_RTS_ON_SEND | SER_RS485_ENABLED;
 	memset(rs485->padding, 0, sizeof(rs485->padding));
 	port->rs485 = *rs485;
 
+	schedule_work(&one->rs_work);
+
 	return 0;
 }
 
@@ -1009,8 +1023,8 @@ static int __maybe_unused max310x_suspend(struct device *dev)
 	struct max310x_port *s = dev_get_drvdata(dev);
 	int i;
 
-	for (i = 0; i < s->uart.nr; i++) {
-		uart_suspend_port(&s->uart, &s->p[i].port);
+	for (i = 0; i < s->devtype->nr; i++) {
+		uart_suspend_port(&max310x_uart, &s->p[i].port);
 		s->devtype->power(&s->p[i].port, 0);
 	}
 
@@ -1022,9 +1036,9 @@ static int __maybe_unused max310x_resume(struct device *dev)
 	struct max310x_port *s = dev_get_drvdata(dev);
 	int i;
 
-	for (i = 0; i < s->uart.nr; i++) {
+	for (i = 0; i < s->devtype->nr; i++) {
 		s->devtype->power(&s->p[i].port, 1);
-		uart_resume_port(&s->uart, &s->p[i].port);
+		uart_resume_port(&max310x_uart, &s->p[i].port);
 	}
 
 	return 0;
@@ -1159,18 +1173,6 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
 	uartclk = max310x_set_ref_clk(s, freq, xtal);
 	dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk);
 
-	/* Register UART driver */
-	s->uart.owner		= THIS_MODULE;
-	s->uart.dev_name	= "ttyMAX";
-	s->uart.major		= MAX310X_MAJOR;
-	s->uart.minor		= MAX310X_MINOR;
-	s->uart.nr		= devtype->nr;
-	ret = uart_register_driver(&s->uart);
-	if (ret) {
-		dev_err(dev, "Registering UART driver failed\n");
-		goto out_clk;
-	}
-
 #ifdef CONFIG_GPIOLIB
 	/* Setup GPIO cotroller */
 	s->gpio.owner		= THIS_MODULE;
@@ -1183,16 +1185,24 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
 	s->gpio.base		= -1;
 	s->gpio.ngpio		= devtype->nr * 4;
 	s->gpio.can_sleep	= 1;
-	ret = gpiochip_add_data(&s->gpio, s);
+	ret = devm_gpiochip_add_data(dev, &s->gpio, s);
 	if (ret)
-		goto out_uart;
+		goto out_clk;
 #endif
 
 	mutex_init(&s->mutex);
 
 	for (i = 0; i < devtype->nr; i++) {
+		unsigned int line;
+
+		line = find_first_zero_bit(max310x_lines, MAX310X_UART_NRMAX);
+		if (line == MAX310X_UART_NRMAX) {
+			ret = -ERANGE;
+			goto out_uart;
+		}
+
 		/* Initialize port data */
-		s->p[i].port.line	= i;
+		s->p[i].port.line	= line;
 		s->p[i].port.dev	= dev;
 		s->p[i].port.irq	= irq;
 		s->p[i].port.type	= PORT_MAX310X;
@@ -1214,10 +1224,19 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
 				    MAX310X_MODE1_IRQSEL_BIT);
 		/* Initialize queue for start TX */
 		INIT_WORK(&s->p[i].tx_work, max310x_wq_proc);
-		/* Initialize queue for changing mode */
+		/* Initialize queue for changing LOOPBACK mode */
 		INIT_WORK(&s->p[i].md_work, max310x_md_proc);
+		/* Initialize queue for changing RS485 mode */
+		INIT_WORK(&s->p[i].rs_work, max310x_rs_proc);
+
 		/* Register port */
-		uart_add_one_port(&s->uart, &s->p[i].port);
+		ret = uart_add_one_port(&max310x_uart, &s->p[i].port);
+		if (ret) {
+			s->p[i].port.dev = NULL;
+			goto out_uart;
+		}
+		set_bit(line, max310x_lines);
+
 		/* Go to suspend mode */
 		devtype->power(&s->p[i].port, 0);
 	}
@@ -1230,14 +1249,15 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
 
 	dev_err(dev, "Unable to reguest IRQ %i\n", irq);
 
-	mutex_destroy(&s->mutex);
-
-#ifdef CONFIG_GPIOLIB
-	gpiochip_remove(&s->gpio);
-
 out_uart:
-#endif
-	uart_unregister_driver(&s->uart);
+	for (i = 0; i < devtype->nr; i++) {
+		if (s->p[i].port.dev) {
+			uart_remove_one_port(&max310x_uart, &s->p[i].port);
+			clear_bit(s->p[i].port.line, max310x_lines);
+		}
+	}
+
+	mutex_destroy(&s->mutex);
 
 out_clk:
 	clk_disable_unprepare(s->clk);
@@ -1250,19 +1270,16 @@ static int max310x_remove(struct device *dev)
 	struct max310x_port *s = dev_get_drvdata(dev);
 	int i;
 
-#ifdef CONFIG_GPIOLIB
-	gpiochip_remove(&s->gpio);
-#endif
-
-	for (i = 0; i < s->uart.nr; i++) {
+	for (i = 0; i < s->devtype->nr; i++) {
 		cancel_work_sync(&s->p[i].tx_work);
 		cancel_work_sync(&s->p[i].md_work);
-		uart_remove_one_port(&s->uart, &s->p[i].port);
+		cancel_work_sync(&s->p[i].rs_work);
+		uart_remove_one_port(&max310x_uart, &s->p[i].port);
+		clear_bit(s->p[i].port.line, max310x_lines);
 		s->devtype->power(&s->p[i].port, 0);
 	}
 
 	mutex_destroy(&s->mutex);
-	uart_unregister_driver(&s->uart);
 	clk_disable_unprepare(s->clk);
 
 	return 0;
@@ -1335,7 +1352,7 @@ static const struct spi_device_id max310x_id_table[] = {
 };
 MODULE_DEVICE_TABLE(spi, max310x_id_table);
 
-static struct spi_driver max310x_uart_driver = {
+static struct spi_driver max310x_spi_driver = {
 	.driver = {
 		.name		= MAX310X_NAME,
 		.of_match_table	= of_match_ptr(max310x_dt_ids),
@@ -1345,9 +1362,36 @@ static struct spi_driver max310x_uart_driver = {
 	.remove		= max310x_spi_remove,
 	.id_table	= max310x_id_table,
 };
-module_spi_driver(max310x_uart_driver);
 #endif
 
+static int __init max310x_uart_init(void)
+{
+	int ret;
+
+	bitmap_zero(max310x_lines, MAX310X_UART_NRMAX);
+
+	ret = uart_register_driver(&max310x_uart);
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_SPI_MASTER
+	spi_register_driver(&max310x_spi_driver);
+#endif
+
+	return 0;
+}
+module_init(max310x_uart_init);
+
+static void __exit max310x_uart_exit(void)
+{
+#ifdef CONFIG_SPI_MASTER
+	spi_unregister_driver(&max310x_spi_driver);
+#endif
+
+	uart_unregister_driver(&max310x_uart);
+}
+module_exit(max310x_uart_exit);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Alexander Shiyan <shc_work@mail.ru>");
 MODULE_DESCRIPTION("MAX310X serial driver");
diff --git a/drivers/tty/serial/mps2-uart.c b/drivers/tty/serial/mps2-uart.c
index da9e27d3c263..492ec4b375a0 100644
--- a/drivers/tty/serial/mps2-uart.c
+++ b/drivers/tty/serial/mps2-uart.c
@@ -1,4 +1,6 @@
 /*
+ * MPS2 UART driver
+ *
  * Copyright (C) 2015 ARM Limited
  *
  * Author: Vladimir Murzin <vladimir.murzin@arm.com>
@@ -17,7 +19,6 @@
 #include <linux/console.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
@@ -569,30 +570,20 @@ static int mps2_serial_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int mps2_serial_remove(struct platform_device *pdev)
-{
-	struct mps2_uart_port *mps_port = platform_get_drvdata(pdev);
-
-	uart_remove_one_port(&mps2_uart_driver, &mps_port->port);
-
-	return 0;
-}
-
 #ifdef CONFIG_OF
 static const struct of_device_id mps2_match[] = {
 	{ .compatible = "arm,mps2-uart", },
 	{},
 };
-MODULE_DEVICE_TABLE(of, mps2_match);
 #endif
 
 static struct platform_driver mps2_serial_driver = {
 	.probe = mps2_serial_probe,
-	.remove = mps2_serial_remove,
 
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = of_match_ptr(mps2_match),
+		.suppress_bind_attrs = true,
 	},
 };
 
@@ -610,16 +601,4 @@ static int __init mps2_uart_init(void)
 
 	return ret;
 }
-module_init(mps2_uart_init);
-
-static void __exit mps2_uart_exit(void)
-{
-	platform_driver_unregister(&mps2_serial_driver);
-	uart_unregister_driver(&mps2_uart_driver);
-}
-module_exit(mps2_uart_exit);
-
-MODULE_AUTHOR("Vladimir Murzin <vladimir.murzin@arm.com>");
-MODULE_DESCRIPTION("MPS2 UART driver");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:" DRIVER_NAME);
+arch_initcall(mps2_uart_init);
diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index b7d80bd57db9..7312e7e01b7e 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -19,33 +19,147 @@
 # define SUPPORT_SYSRQ
 #endif
 
+#include <linux/kernel.h>
 #include <linux/atomic.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
-#include <linux/hrtimer.h>
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
-#include <linux/irq.h>
+#include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/serial_core.h>
-#include <linux/serial.h>
 #include <linux/slab.h>
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-
-#include "msm_serial.h"
-
-#define UARTDM_BURST_SIZE	16   /* in bytes */
-#define UARTDM_TX_AIGN(x)	((x) & ~0x3) /* valid for > 1p3 */
-#define UARTDM_TX_MAX		256   /* in bytes, valid for <= 1p3 */
-#define UARTDM_RX_SIZE		(UART_XMIT_SIZE / 4)
+#include <linux/wait.h>
+
+#define UART_MR1			0x0000
+
+#define UART_MR1_AUTO_RFR_LEVEL0	0x3F
+#define UART_MR1_AUTO_RFR_LEVEL1	0x3FF00
+#define UART_DM_MR1_AUTO_RFR_LEVEL1	0xFFFFFF00
+#define UART_MR1_RX_RDY_CTL		BIT(7)
+#define UART_MR1_CTS_CTL		BIT(6)
+
+#define UART_MR2			0x0004
+#define UART_MR2_ERROR_MODE		BIT(6)
+#define UART_MR2_BITS_PER_CHAR		0x30
+#define UART_MR2_BITS_PER_CHAR_5	(0x0 << 4)
+#define UART_MR2_BITS_PER_CHAR_6	(0x1 << 4)
+#define UART_MR2_BITS_PER_CHAR_7	(0x2 << 4)
+#define UART_MR2_BITS_PER_CHAR_8	(0x3 << 4)
+#define UART_MR2_STOP_BIT_LEN_ONE	(0x1 << 2)
+#define UART_MR2_STOP_BIT_LEN_TWO	(0x3 << 2)
+#define UART_MR2_PARITY_MODE_NONE	0x0
+#define UART_MR2_PARITY_MODE_ODD	0x1
+#define UART_MR2_PARITY_MODE_EVEN	0x2
+#define UART_MR2_PARITY_MODE_SPACE	0x3
+#define UART_MR2_PARITY_MODE		0x3
+
+#define UART_CSR			0x0008
+
+#define UART_TF				0x000C
+#define UARTDM_TF			0x0070
+
+#define UART_CR				0x0010
+#define UART_CR_CMD_NULL		(0 << 4)
+#define UART_CR_CMD_RESET_RX		(1 << 4)
+#define UART_CR_CMD_RESET_TX		(2 << 4)
+#define UART_CR_CMD_RESET_ERR		(3 << 4)
+#define UART_CR_CMD_RESET_BREAK_INT	(4 << 4)
+#define UART_CR_CMD_START_BREAK		(5 << 4)
+#define UART_CR_CMD_STOP_BREAK		(6 << 4)
+#define UART_CR_CMD_RESET_CTS		(7 << 4)
+#define UART_CR_CMD_RESET_STALE_INT	(8 << 4)
+#define UART_CR_CMD_PACKET_MODE		(9 << 4)
+#define UART_CR_CMD_MODE_RESET		(12 << 4)
+#define UART_CR_CMD_SET_RFR		(13 << 4)
+#define UART_CR_CMD_RESET_RFR		(14 << 4)
+#define UART_CR_CMD_PROTECTION_EN	(16 << 4)
+#define UART_CR_CMD_STALE_EVENT_DISABLE	(6 << 8)
+#define UART_CR_CMD_STALE_EVENT_ENABLE	(80 << 4)
+#define UART_CR_CMD_FORCE_STALE		(4 << 8)
+#define UART_CR_CMD_RESET_TX_READY	(3 << 8)
+#define UART_CR_TX_DISABLE		BIT(3)
+#define UART_CR_TX_ENABLE		BIT(2)
+#define UART_CR_RX_DISABLE		BIT(1)
+#define UART_CR_RX_ENABLE		BIT(0)
+#define UART_CR_CMD_RESET_RXBREAK_START	((1 << 11) | (2 << 4))
+
+#define UART_IMR			0x0014
+#define UART_IMR_TXLEV			BIT(0)
+#define UART_IMR_RXSTALE		BIT(3)
+#define UART_IMR_RXLEV			BIT(4)
+#define UART_IMR_DELTA_CTS		BIT(5)
+#define UART_IMR_CURRENT_CTS		BIT(6)
+#define UART_IMR_RXBREAK_START		BIT(10)
+
+#define UART_IPR_RXSTALE_LAST		0x20
+#define UART_IPR_STALE_LSB		0x1F
+#define UART_IPR_STALE_TIMEOUT_MSB	0x3FF80
+#define UART_DM_IPR_STALE_TIMEOUT_MSB	0xFFFFFF80
+
+#define UART_IPR			0x0018
+#define UART_TFWR			0x001C
+#define UART_RFWR			0x0020
+#define UART_HCR			0x0024
+
+#define UART_MREG			0x0028
+#define UART_NREG			0x002C
+#define UART_DREG			0x0030
+#define UART_MNDREG			0x0034
+#define UART_IRDA			0x0038
+#define UART_MISR_MODE			0x0040
+#define UART_MISR_RESET			0x0044
+#define UART_MISR_EXPORT		0x0048
+#define UART_MISR_VAL			0x004C
+#define UART_TEST_CTRL			0x0050
+
+#define UART_SR				0x0008
+#define UART_SR_HUNT_CHAR		BIT(7)
+#define UART_SR_RX_BREAK		BIT(6)
+#define UART_SR_PAR_FRAME_ERR		BIT(5)
+#define UART_SR_OVERRUN			BIT(4)
+#define UART_SR_TX_EMPTY		BIT(3)
+#define UART_SR_TX_READY		BIT(2)
+#define UART_SR_RX_FULL			BIT(1)
+#define UART_SR_RX_READY		BIT(0)
+
+#define UART_RF				0x000C
+#define UARTDM_RF			0x0070
+#define UART_MISR			0x0010
+#define UART_ISR			0x0014
+#define UART_ISR_TX_READY		BIT(7)
+
+#define UARTDM_RXFS			0x50
+#define UARTDM_RXFS_BUF_SHIFT		0x7
+#define UARTDM_RXFS_BUF_MASK		0x7
+
+#define UARTDM_DMEN			0x3C
+#define UARTDM_DMEN_RX_SC_ENABLE	BIT(5)
+#define UARTDM_DMEN_TX_SC_ENABLE	BIT(4)
+
+#define UARTDM_DMEN_TX_BAM_ENABLE	BIT(2)	/* UARTDM_1P4 */
+#define UARTDM_DMEN_TX_DM_ENABLE	BIT(0)	/* < UARTDM_1P4 */
+
+#define UARTDM_DMEN_RX_BAM_ENABLE	BIT(3)	/* UARTDM_1P4 */
+#define UARTDM_DMEN_RX_DM_ENABLE	BIT(1)	/* < UARTDM_1P4 */
+
+#define UARTDM_DMRX			0x34
+#define UARTDM_NCF_TX			0x40
+#define UARTDM_RX_TOTAL_SNAP		0x38
+
+#define UARTDM_BURST_SIZE		16   /* in bytes */
+#define UARTDM_TX_AIGN(x)		((x) & ~0x3) /* valid for > 1p3 */
+#define UARTDM_TX_MAX			256   /* in bytes, valid for <= 1p3 */
+#define UARTDM_RX_SIZE			(UART_XMIT_SIZE / 4)
 
 enum {
 	UARTDM_1P1 = 1,
@@ -78,10 +192,65 @@ struct msm_port {
 	struct msm_dma		rx_dma;
 };
 
+#define UART_TO_MSM(uart_port)	container_of(uart_port, struct msm_port, uart)
+
+static
+void msm_write(struct uart_port *port, unsigned int val, unsigned int off)
+{
+	writel_relaxed(val, port->membase + off);
+}
+
+static
+unsigned int msm_read(struct uart_port *port, unsigned int off)
+{
+	return readl_relaxed(port->membase + off);
+}
+
+/*
+ * Setup the MND registers to use the TCXO clock.
+ */
+static void msm_serial_set_mnd_regs_tcxo(struct uart_port *port)
+{
+	msm_write(port, 0x06, UART_MREG);
+	msm_write(port, 0xF1, UART_NREG);
+	msm_write(port, 0x0F, UART_DREG);
+	msm_write(port, 0x1A, UART_MNDREG);
+	port->uartclk = 1843200;
+}
+
+/*
+ * Setup the MND registers to use the TCXO clock divided by 4.
+ */
+static void msm_serial_set_mnd_regs_tcxoby4(struct uart_port *port)
+{
+	msm_write(port, 0x18, UART_MREG);
+	msm_write(port, 0xF6, UART_NREG);
+	msm_write(port, 0x0F, UART_DREG);
+	msm_write(port, 0x0A, UART_MNDREG);
+	port->uartclk = 1843200;
+}
+
+static void msm_serial_set_mnd_regs(struct uart_port *port)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+
+	/*
+	 * These registers don't exist so we change the clk input rate
+	 * on uartdm hardware instead
+	 */
+	if (msm_port->is_uartdm)
+		return;
+
+	if (port->uartclk == 19200000)
+		msm_serial_set_mnd_regs_tcxo(port);
+	else if (port->uartclk == 4800000)
+		msm_serial_set_mnd_regs_tcxoby4(port);
+}
+
 static void msm_handle_tx(struct uart_port *port);
 static void msm_start_rx_dma(struct msm_port *msm_port);
 
-void msm_stop_dma(struct uart_port *port, struct msm_dma *dma)
+static void msm_stop_dma(struct uart_port *port, struct msm_dma *dma)
 {
 	struct device *dev = port->dev;
 	unsigned int mapped;
@@ -388,10 +557,6 @@ static void msm_complete_rx_dma(void *args)
 	val &= ~dma->enable_bit;
 	msm_write(port, val, UARTDM_DMEN);
 
-	/* Restore interrupts */
-	msm_port->imr |= UART_IMR_RXLEV | UART_IMR_RXSTALE;
-	msm_write(port, msm_port->imr, UART_IMR);
-
 	if (msm_read(port, UART_SR) & UART_SR_OVERRUN) {
 		port->icount.overrun++;
 		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
@@ -726,7 +891,7 @@ static void msm_handle_tx(struct uart_port *port)
 		return;
 	}
 
-	pio_count = CIRC_CNT(xmit->head, xmit->tail, UART_XMIT_SIZE);
+	pio_count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
 	dma_count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
 
 	dma_min = 1;	/* Always DMA */
diff --git a/drivers/tty/serial/msm_serial.h b/drivers/tty/serial/msm_serial.h
deleted file mode 100644
index 178645826f16..000000000000
--- a/drivers/tty/serial/msm_serial.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (C) 2007 Google, Inc.
- * Author: Robert Love <rlove@google.com>
- * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __DRIVERS_SERIAL_MSM_SERIAL_H
-#define __DRIVERS_SERIAL_MSM_SERIAL_H
-
-#define UART_MR1			0x0000
-
-#define UART_MR1_AUTO_RFR_LEVEL0	0x3F
-#define UART_MR1_AUTO_RFR_LEVEL1	0x3FF00
-#define UART_DM_MR1_AUTO_RFR_LEVEL1	0xFFFFFF00
-#define UART_MR1_RX_RDY_CTL		BIT(7)
-#define UART_MR1_CTS_CTL		BIT(6)
-
-#define UART_MR2			0x0004
-#define UART_MR2_ERROR_MODE		BIT(6)
-#define UART_MR2_BITS_PER_CHAR		0x30
-#define UART_MR2_BITS_PER_CHAR_5	(0x0 << 4)
-#define UART_MR2_BITS_PER_CHAR_6	(0x1 << 4)
-#define UART_MR2_BITS_PER_CHAR_7	(0x2 << 4)
-#define UART_MR2_BITS_PER_CHAR_8	(0x3 << 4)
-#define UART_MR2_STOP_BIT_LEN_ONE	(0x1 << 2)
-#define UART_MR2_STOP_BIT_LEN_TWO	(0x3 << 2)
-#define UART_MR2_PARITY_MODE_NONE	0x0
-#define UART_MR2_PARITY_MODE_ODD	0x1
-#define UART_MR2_PARITY_MODE_EVEN	0x2
-#define UART_MR2_PARITY_MODE_SPACE	0x3
-#define UART_MR2_PARITY_MODE		0x3
-
-#define UART_CSR			0x0008
-
-#define UART_TF		0x000C
-#define UARTDM_TF	0x0070
-
-#define UART_CR				0x0010
-#define UART_CR_CMD_NULL		(0 << 4)
-#define UART_CR_CMD_RESET_RX		(1 << 4)
-#define UART_CR_CMD_RESET_TX		(2 << 4)
-#define UART_CR_CMD_RESET_ERR		(3 << 4)
-#define UART_CR_CMD_RESET_BREAK_INT	(4 << 4)
-#define UART_CR_CMD_START_BREAK		(5 << 4)
-#define UART_CR_CMD_STOP_BREAK		(6 << 4)
-#define UART_CR_CMD_RESET_CTS		(7 << 4)
-#define UART_CR_CMD_RESET_STALE_INT	(8 << 4)
-#define UART_CR_CMD_PACKET_MODE		(9 << 4)
-#define UART_CR_CMD_MODE_RESET		(12 << 4)
-#define UART_CR_CMD_SET_RFR		(13 << 4)
-#define UART_CR_CMD_RESET_RFR		(14 << 4)
-#define UART_CR_CMD_PROTECTION_EN	(16 << 4)
-#define UART_CR_CMD_STALE_EVENT_DISABLE	(6 << 8)
-#define UART_CR_CMD_STALE_EVENT_ENABLE	(80 << 4)
-#define UART_CR_CMD_FORCE_STALE		(4 << 8)
-#define UART_CR_CMD_RESET_TX_READY	(3 << 8)
-#define UART_CR_TX_DISABLE		BIT(3)
-#define UART_CR_TX_ENABLE		BIT(2)
-#define UART_CR_RX_DISABLE		BIT(1)
-#define UART_CR_RX_ENABLE		BIT(0)
-#define UART_CR_CMD_RESET_RXBREAK_START	((1 << 11) | (2 << 4))
-
-#define UART_IMR		0x0014
-#define UART_IMR_TXLEV			BIT(0)
-#define UART_IMR_RXSTALE		BIT(3)
-#define UART_IMR_RXLEV			BIT(4)
-#define UART_IMR_DELTA_CTS		BIT(5)
-#define UART_IMR_CURRENT_CTS		BIT(6)
-#define UART_IMR_RXBREAK_START		BIT(10)
-
-#define UART_IPR_RXSTALE_LAST		0x20
-#define UART_IPR_STALE_LSB		0x1F
-#define UART_IPR_STALE_TIMEOUT_MSB	0x3FF80
-#define UART_DM_IPR_STALE_TIMEOUT_MSB	0xFFFFFF80
-
-#define UART_IPR	0x0018
-#define UART_TFWR	0x001C
-#define UART_RFWR	0x0020
-#define UART_HCR	0x0024
-
-#define UART_MREG		0x0028
-#define UART_NREG		0x002C
-#define UART_DREG		0x0030
-#define UART_MNDREG		0x0034
-#define UART_IRDA		0x0038
-#define UART_MISR_MODE		0x0040
-#define UART_MISR_RESET		0x0044
-#define UART_MISR_EXPORT	0x0048
-#define UART_MISR_VAL		0x004C
-#define UART_TEST_CTRL		0x0050
-
-#define UART_SR			0x0008
-#define UART_SR_HUNT_CHAR	BIT(7)
-#define UART_SR_RX_BREAK	BIT(6)
-#define UART_SR_PAR_FRAME_ERR	BIT(5)
-#define UART_SR_OVERRUN		BIT(4)
-#define UART_SR_TX_EMPTY	BIT(3)
-#define UART_SR_TX_READY	BIT(2)
-#define UART_SR_RX_FULL		BIT(1)
-#define UART_SR_RX_READY	BIT(0)
-
-#define UART_RF			0x000C
-#define UARTDM_RF		0x0070
-#define UART_MISR		0x0010
-#define UART_ISR		0x0014
-#define UART_ISR_TX_READY	BIT(7)
-
-#define UARTDM_RXFS		0x50
-#define UARTDM_RXFS_BUF_SHIFT	0x7
-#define UARTDM_RXFS_BUF_MASK	0x7
-
-#define UARTDM_DMEN		0x3C
-#define UARTDM_DMEN_RX_SC_ENABLE BIT(5)
-#define UARTDM_DMEN_TX_SC_ENABLE BIT(4)
-
-#define UARTDM_DMEN_TX_BAM_ENABLE BIT(2)	/* UARTDM_1P4 */
-#define UARTDM_DMEN_TX_DM_ENABLE  BIT(0)	/* < UARTDM_1P4 */
-
-#define UARTDM_DMEN_RX_BAM_ENABLE BIT(3)	/* UARTDM_1P4 */
-#define UARTDM_DMEN_RX_DM_ENABLE  BIT(1)	/* < UARTDM_1P4 */
-
-#define UARTDM_DMRX		0x34
-#define UARTDM_NCF_TX		0x40
-#define UARTDM_RX_TOTAL_SNAP	0x38
-
-#define UART_TO_MSM(uart_port)	((struct msm_port *) uart_port)
-
-static inline
-void msm_write(struct uart_port *port, unsigned int val, unsigned int off)
-{
-	writel_relaxed(val, port->membase + off);
-}
-
-static inline
-unsigned int msm_read(struct uart_port *port, unsigned int off)
-{
-	return readl_relaxed(port->membase + off);
-}
-
-/*
- * Setup the MND registers to use the TCXO clock.
- */
-static inline void msm_serial_set_mnd_regs_tcxo(struct uart_port *port)
-{
-	msm_write(port, 0x06, UART_MREG);
-	msm_write(port, 0xF1, UART_NREG);
-	msm_write(port, 0x0F, UART_DREG);
-	msm_write(port, 0x1A, UART_MNDREG);
-	port->uartclk = 1843200;
-}
-
-/*
- * Setup the MND registers to use the TCXO clock divided by 4.
- */
-static inline void msm_serial_set_mnd_regs_tcxoby4(struct uart_port *port)
-{
-	msm_write(port, 0x18, UART_MREG);
-	msm_write(port, 0xF6, UART_NREG);
-	msm_write(port, 0x0F, UART_DREG);
-	msm_write(port, 0x0A, UART_MNDREG);
-	port->uartclk = 1843200;
-}
-
-static inline
-void msm_serial_set_mnd_regs_from_uartclk(struct uart_port *port)
-{
-	if (port->uartclk == 19200000)
-		msm_serial_set_mnd_regs_tcxo(port);
-	else if (port->uartclk == 4800000)
-		msm_serial_set_mnd_regs_tcxoby4(port);
-}
-
-#define msm_serial_set_mnd_regs msm_serial_set_mnd_regs_from_uartclk
-
-#endif	/* __DRIVERS_SERIAL_MSM_SERIAL_H */
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index ce362bd51de7..45b57c294d13 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -300,6 +300,8 @@ static int mvebu_uart_startup(struct uart_port *port)
 static void mvebu_uart_shutdown(struct uart_port *port)
 {
 	writel(0, port->membase + UART_CTRL);
+
+	free_irq(port->irq, port);
 }
 
 static void mvebu_uart_set_termios(struct uart_port *port,
diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c
index 62a43bf5698e..7f8e99bbcb73 100644
--- a/drivers/tty/serial/pic32_uart.c
+++ b/drivers/tty/serial/pic32_uart.c
@@ -445,7 +445,6 @@ static int pic32_uart_startup(struct uart_port *port)
 				       sport->idx);
 	if (!sport->irq_rx_name) {
 		dev_err(port->dev, "%s: kasprintf err!", __func__);
-		kfree(sport->irq_fault_name);
 		ret = -ENOMEM;
 		goto out_f;
 	}
diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index e156e39d620c..b24b0556f5a8 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -1720,7 +1720,7 @@ static int __init pmz_init_port(struct uart_pmac_port *uap)
 
 	r_ports = platform_get_resource(uap->pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(uap->pdev, 0);
-	if (!r_ports || !irq)
+	if (!r_ports || irq <= 0)
 		return -ENODEV;
 
 	uap->port.mapbase  = r_ports->start;
diff --git a/drivers/tty/serial/pxa.c b/drivers/tty/serial/pxa.c
index 41eab75ba2af..cd9d9e878475 100644
--- a/drivers/tty/serial/pxa.c
+++ b/drivers/tty/serial/pxa.c
@@ -27,7 +27,6 @@
 #define SUPPORT_SYSRQ
 #endif
 
-#include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/console.h>
@@ -829,7 +828,6 @@ static const struct of_device_id serial_pxa_dt_ids[] = {
 	{ .compatible = "mrvl,mmp-uart", },
 	{}
 };
-MODULE_DEVICE_TABLE(of, serial_pxa_dt_ids);
 
 static int serial_pxa_probe_dt(struct platform_device *pdev,
 			       struct uart_pxa_port *sport)
@@ -914,28 +912,15 @@ static int serial_pxa_probe(struct platform_device *dev)
 	return ret;
 }
 
-static int serial_pxa_remove(struct platform_device *dev)
-{
-	struct uart_pxa_port *sport = platform_get_drvdata(dev);
-
-	uart_remove_one_port(&serial_pxa_reg, &sport->port);
-
-	clk_unprepare(sport->clk);
-	clk_put(sport->clk);
-	kfree(sport);
-
-	return 0;
-}
-
 static struct platform_driver serial_pxa_driver = {
         .probe          = serial_pxa_probe,
-        .remove         = serial_pxa_remove,
 
 	.driver		= {
 	        .name	= "pxa2xx-uart",
 #ifdef CONFIG_PM
 		.pm	= &serial_pxa_pm_ops,
 #endif
+		.suppress_bind_attrs = true,
 		.of_match_table = serial_pxa_dt_ids,
 	},
 };
@@ -954,15 +939,4 @@ static int __init serial_pxa_init(void)
 
 	return ret;
 }
-
-static void __exit serial_pxa_exit(void)
-{
-	platform_driver_unregister(&serial_pxa_driver);
-	uart_unregister_driver(&serial_pxa_reg);
-}
-
-module_init(serial_pxa_init);
-module_exit(serial_pxa_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:pxa2xx-uart");
+device_initcall(serial_pxa_init);
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index 99bb23161dd6..ae2095a66708 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -169,8 +169,7 @@ static void s3c24xx_serial_stop_tx(struct uart_port *port)
 		return;
 
 	if (s3c24xx_serial_has_interrupt_mask(port))
-		__set_bit(S3C64XX_UINTM_TXD,
-			portaddrl(port, S3C64XX_UINTM));
+		s3c24xx_set_bit(port, S3C64XX_UINTM_TXD, S3C64XX_UINTM);
 	else
 		disable_irq_nosync(ourport->tx_irq);
 
@@ -235,8 +234,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport)
 
 	/* Mask Tx interrupt */
 	if (s3c24xx_serial_has_interrupt_mask(port))
-		__set_bit(S3C64XX_UINTM_TXD,
-			  portaddrl(port, S3C64XX_UINTM));
+		s3c24xx_set_bit(port, S3C64XX_UINTM_TXD, S3C64XX_UINTM);
 	else
 		disable_irq_nosync(ourport->tx_irq);
 
@@ -269,8 +267,8 @@ static void enable_tx_pio(struct s3c24xx_uart_port *ourport)
 
 	/* Unmask Tx interrupt */
 	if (s3c24xx_serial_has_interrupt_mask(port))
-		__clear_bit(S3C64XX_UINTM_TXD,
-			    portaddrl(port, S3C64XX_UINTM));
+		s3c24xx_clear_bit(port, S3C64XX_UINTM_TXD,
+				  S3C64XX_UINTM);
 	else
 		enable_irq(ourport->tx_irq);
 
@@ -397,8 +395,8 @@ static void s3c24xx_serial_stop_rx(struct uart_port *port)
 	if (rx_enabled(port)) {
 		dbg("s3c24xx_serial_stop_rx: port=%p\n", port);
 		if (s3c24xx_serial_has_interrupt_mask(port))
-			__set_bit(S3C64XX_UINTM_RXD,
-				portaddrl(port, S3C64XX_UINTM));
+			s3c24xx_set_bit(port, S3C64XX_UINTM_RXD,
+					S3C64XX_UINTM);
 		else
 			disable_irq_nosync(ourport->rx_irq);
 		rx_enabled(port) = 0;
@@ -1069,7 +1067,7 @@ static int s3c64xx_serial_startup(struct uart_port *port)
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	/* Enable Rx Interrupt */
-	__clear_bit(S3C64XX_UINTM_RXD, portaddrl(port, S3C64XX_UINTM));
+	s3c24xx_clear_bit(port, S3C64XX_UINTM_RXD, S3C64XX_UINTM);
 
 	dbg("s3c64xx_serial_startup ok\n");
 	return ret;
@@ -1684,7 +1682,7 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
 		return -ENODEV;
 
 	if (port->mapbase != 0)
-		return 0;
+		return -EINVAL;
 
 	/* setup info for port */
 	port->dev	= &platdev->dev;
@@ -1738,22 +1736,25 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
 		ourport->dma = devm_kzalloc(port->dev,
 					    sizeof(*ourport->dma),
 					    GFP_KERNEL);
-		if (!ourport->dma)
-			return -ENOMEM;
+		if (!ourport->dma) {
+			ret = -ENOMEM;
+			goto err;
+		}
 	}
 
 	ourport->clk	= clk_get(&platdev->dev, "uart");
 	if (IS_ERR(ourport->clk)) {
 		pr_err("%s: Controller clock not found\n",
 				dev_name(&platdev->dev));
-		return PTR_ERR(ourport->clk);
+		ret = PTR_ERR(ourport->clk);
+		goto err;
 	}
 
 	ret = clk_prepare_enable(ourport->clk);
 	if (ret) {
 		pr_err("uart: clock failed to prepare+enable: %d\n", ret);
 		clk_put(ourport->clk);
-		return ret;
+		goto err;
 	}
 
 	/* Keep all interrupts masked and cleared */
@@ -1769,7 +1770,12 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
 
 	/* reset the fifos (and setup the uart) */
 	s3c24xx_serial_resetport(port, cfg);
+
 	return 0;
+
+err:
+	port->mapbase = 0;
+	return ret;
 }
 
 /* Device driver serial port probe */
@@ -1836,8 +1842,6 @@ static int s3c24xx_serial_probe(struct platform_device *pdev)
 	ourport->min_dma_size = max_t(int, ourport->port.fifosize,
 				    dma_get_cache_alignment());
 
-	probe_index++;
-
 	dbg("%s: initialising port %p...\n", __func__, ourport);
 
 	ret = s3c24xx_serial_init_port(ourport, pdev);
@@ -1867,6 +1871,8 @@ static int s3c24xx_serial_probe(struct platform_device *pdev)
 	if (ret < 0)
 		dev_err(&pdev->dev, "failed to add cpufreq notifier\n");
 
+	probe_index++;
+
 	return 0;
 }
 
diff --git a/drivers/tty/serial/samsung.h b/drivers/tty/serial/samsung.h
index fc5deaa4f382..2ae4fcee1814 100644
--- a/drivers/tty/serial/samsung.h
+++ b/drivers/tty/serial/samsung.h
@@ -117,10 +117,38 @@ struct s3c24xx_uart_port {
 #define portaddrl(port, reg) \
 	((unsigned long *)(unsigned long)((port)->membase + (reg)))
 
-#define rd_regb(port, reg) (__raw_readb(portaddr(port, reg)))
-#define rd_regl(port, reg) (__raw_readl(portaddr(port, reg)))
-
-#define wr_regb(port, reg, val) __raw_writeb(val, portaddr(port, reg))
-#define wr_regl(port, reg, val) __raw_writel(val, portaddr(port, reg))
+#define rd_regb(port, reg) (readb_relaxed(portaddr(port, reg)))
+#define rd_regl(port, reg) (readl_relaxed(portaddr(port, reg)))
+
+#define wr_regb(port, reg, val) writeb_relaxed(val, portaddr(port, reg))
+#define wr_regl(port, reg, val) writel_relaxed(val, portaddr(port, reg))
+
+/* Byte-order aware bit setting/clearing functions. */
+
+static inline void s3c24xx_set_bit(struct uart_port *port, int idx,
+				   unsigned int reg)
+{
+	unsigned long flags;
+	u32 val;
+
+	local_irq_save(flags);
+	val = rd_regl(port, reg);
+	val |= (1 << idx);
+	wr_regl(port, reg, val);
+	local_irq_restore(flags);
+}
+
+static inline void s3c24xx_clear_bit(struct uart_port *port, int idx,
+				     unsigned int reg)
+{
+	unsigned long flags;
+	u32 val;
+
+	local_irq_save(flags);
+	val = rd_regl(port, reg);
+	val &= ~(1 << idx);
+	wr_regl(port, reg, val);
+	local_irq_restore(flags);
+}
 
 #endif
diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c
index 1dba6719db8d..731ac35acb31 100644
--- a/drivers/tty/serial/serial-tegra.c
+++ b/drivers/tty/serial/serial-tegra.c
@@ -1317,7 +1317,12 @@ static int tegra_uart_probe(struct platform_device *pdev)
 	}
 
 	u->iotype = UPIO_MEM32;
-	u->irq = platform_get_irq(pdev, 0);
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Couldn't get IRQ\n");
+		return ret;
+	}
+	u->irq = ret;
 	u->regshift = 2;
 	ret = uart_add_one_port(&tegra_uart_driver, u);
 	if (ret < 0) {
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index a333c59cba2c..9fc15335c8c5 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -887,7 +887,7 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
 		/*
 		 * Free and release old regions
 		 */
-		if (old_type != PORT_UNKNOWN)
+		if (old_type != PORT_UNKNOWN && uport->ops->release_port)
 			uport->ops->release_port(uport);
 
 		uport->iobase = new_port;
@@ -900,7 +900,7 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
 		/*
 		 * Claim and map the new regions
 		 */
-		if (uport->type != PORT_UNKNOWN) {
+		if (uport->type != PORT_UNKNOWN && uport->ops->request_port) {
 			retval = uport->ops->request_port(uport);
 		} else {
 			/* Always success - Jean II */
@@ -1125,7 +1125,7 @@ static int uart_do_autoconfig(struct tty_struct *tty,struct uart_state *state)
 		 * If we already have a port type configured,
 		 * we must release its resources.
 		 */
-		if (uport->type != PORT_UNKNOWN)
+		if (uport->type != PORT_UNKNOWN && uport->ops->release_port)
 			uport->ops->release_port(uport);
 
 		flags = UART_CONFIG_TYPE;
@@ -2897,7 +2897,7 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 	/*
 	 * Free the port IO and memory resources, if any.
 	 */
-	if (uport->type != PORT_UNKNOWN)
+	if (uport->type != PORT_UNKNOWN && uport->ops->release_port)
 		uport->ops->release_port(uport);
 	kfree(uport->tty_groups);
 
diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c
index e8dd5097dc56..d2da6aa7f27d 100644
--- a/drivers/tty/serial/serial_mctrl_gpio.c
+++ b/drivers/tty/serial/serial_mctrl_gpio.c
@@ -52,6 +52,9 @@ void mctrl_gpio_set(struct mctrl_gpios *gpios, unsigned int mctrl)
 	int value_array[UART_GPIO_MAX];
 	unsigned int count = 0;
 
+	if (gpios == NULL)
+		return;
+
 	for (i = 0; i < UART_GPIO_MAX; i++)
 		if (gpios->gpio[i] && mctrl_gpios_desc[i].dir_out) {
 			desc_array[count] = gpios->gpio[i];
@@ -73,6 +76,9 @@ unsigned int mctrl_gpio_get(struct mctrl_gpios *gpios, unsigned int *mctrl)
 {
 	enum mctrl_gpio_idx i;
 
+	if (gpios == NULL)
+		return *mctrl;
+
 	for (i = 0; i < UART_GPIO_MAX; i++) {
 		if (gpios->gpio[i] && !mctrl_gpios_desc[i].dir_out) {
 			if (gpiod_get_value(gpios->gpio[i]))
@@ -86,6 +92,27 @@ unsigned int mctrl_gpio_get(struct mctrl_gpios *gpios, unsigned int *mctrl)
 }
 EXPORT_SYMBOL_GPL(mctrl_gpio_get);
 
+unsigned int
+mctrl_gpio_get_outputs(struct mctrl_gpios *gpios, unsigned int *mctrl)
+{
+	enum mctrl_gpio_idx i;
+
+	if (gpios == NULL)
+		return *mctrl;
+
+	for (i = 0; i < UART_GPIO_MAX; i++) {
+		if (gpios->gpio[i] && mctrl_gpios_desc[i].dir_out) {
+			if (gpiod_get_value(gpios->gpio[i]))
+				*mctrl |= mctrl_gpios_desc[i].mctrl;
+			else
+				*mctrl &= ~mctrl_gpios_desc[i].mctrl;
+		}
+	}
+
+	return *mctrl;
+}
+EXPORT_SYMBOL_GPL(mctrl_gpio_get_outputs);
+
 struct mctrl_gpios *mctrl_gpio_init_noauto(struct device *dev, unsigned int idx)
 {
 	struct mctrl_gpios *gpios;
@@ -203,6 +230,9 @@ void mctrl_gpio_free(struct device *dev, struct mctrl_gpios *gpios)
 {
 	enum mctrl_gpio_idx i;
 
+	if (gpios == NULL)
+		return;
+
 	for (i = 0; i < UART_GPIO_MAX; i++) {
 		if (gpios->irq[i])
 			devm_free_irq(gpios->port->dev, gpios->irq[i], gpios);
@@ -218,6 +248,9 @@ void mctrl_gpio_enable_ms(struct mctrl_gpios *gpios)
 {
 	enum mctrl_gpio_idx i;
 
+	if (gpios == NULL)
+		return;
+
 	/* .enable_ms may be called multiple times */
 	if (gpios->mctrl_on)
 		return;
@@ -240,6 +273,9 @@ void mctrl_gpio_disable_ms(struct mctrl_gpios *gpios)
 {
 	enum mctrl_gpio_idx i;
 
+	if (gpios == NULL)
+		return;
+
 	if (!gpios->mctrl_on)
 		return;
 
diff --git a/drivers/tty/serial/serial_mctrl_gpio.h b/drivers/tty/serial/serial_mctrl_gpio.h
index 332a33ab0647..fa000bcff217 100644
--- a/drivers/tty/serial/serial_mctrl_gpio.h
+++ b/drivers/tty/serial/serial_mctrl_gpio.h
@@ -48,11 +48,18 @@ struct mctrl_gpios;
 void mctrl_gpio_set(struct mctrl_gpios *gpios, unsigned int mctrl);
 
 /*
- * Get state of the modem control output lines from GPIOs.
+ * Get state of the modem control input lines from GPIOs.
  * The mctrl flags are updated and returned.
  */
 unsigned int mctrl_gpio_get(struct mctrl_gpios *gpios, unsigned int *mctrl);
 
+/*
+ * Get state of the modem control output lines from GPIOs.
+ * The mctrl flags are updated and returned.
+ */
+unsigned int
+mctrl_gpio_get_outputs(struct mctrl_gpios *gpios, unsigned int *mctrl);
+
 /*
  * Returns the associated struct gpio_desc to the modem line gidx
  */
@@ -107,6 +114,12 @@ unsigned int mctrl_gpio_get(struct mctrl_gpios *gpios, unsigned int *mctrl)
 	return *mctrl;
 }
 
+static inline unsigned int
+mctrl_gpio_get_outputs(struct mctrl_gpios *gpios, unsigned int *mctrl)
+{
+	return *mctrl;
+}
+
 static inline
 struct gpio_desc *mctrl_gpio_to_gpiod(struct mctrl_gpios *gpios,
 				      enum mctrl_gpio_idx gidx)
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 0130feb069ae..d86eee38aae6 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -57,6 +57,7 @@
 #include <asm/sh_bios.h>
 #endif
 
+#include "serial_mctrl_gpio.h"
 #include "sh-sci.h"
 
 /* Offsets into the sci_port->irqs array */
@@ -111,6 +112,7 @@ struct sci_port {
 	unsigned int		error_clear;
 	unsigned int		sampling_rate_mask;
 	resource_size_t		reg_size;
+	struct mctrl_gpios	*gpios;
 
 	/* Break timer */
 	struct timer_list	break_timer;
@@ -139,6 +141,8 @@ struct sci_port {
 	struct timer_list		rx_timer;
 	unsigned int			rx_timeout;
 #endif
+
+	bool autorts;
 };
 
 #define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS
@@ -701,7 +705,6 @@ static void sci_poll_put_char(struct uart_port *port, unsigned char c)
 static void sci_init_pins(struct uart_port *port, unsigned int cflag)
 {
 	struct sci_port *s = to_sci_port(port);
-	const struct plat_sci_reg *reg = sci_regmap[s->cfg->regtype] + SCSPTR;
 
 	/*
 	 * Use port-specific handler if provided.
@@ -711,21 +714,28 @@ static void sci_init_pins(struct uart_port *port, unsigned int cflag)
 		return;
 	}
 
-	/*
-	 * For the generic path SCSPTR is necessary. Bail out if that's
-	 * unavailable, too.
-	 */
-	if (!reg->size)
-		return;
-
-	if ((s->cfg->capabilities & SCIx_HAVE_RTSCTS) &&
-	    ((!(cflag & CRTSCTS)))) {
-		unsigned short status;
+	if (port->type == PORT_SCIFA || port->type == PORT_SCIFB) {
+		u16 ctrl = serial_port_in(port, SCPCR);
+
+		/* Enable RXD and TXD pin functions */
+		ctrl &= ~(SCPCR_RXDC | SCPCR_TXDC);
+		if (to_sci_port(port)->cfg->capabilities & SCIx_HAVE_RTSCTS) {
+			/* RTS# is output, driven 1 */
+			ctrl |= SCPCR_RTSC;
+			serial_port_out(port, SCPDR,
+				serial_port_in(port, SCPDR) | SCPDR_RTSD);
+			/* Enable CTS# pin function */
+			ctrl &= ~SCPCR_CTSC;
+		}
+		serial_port_out(port, SCPCR, ctrl);
+	} else if (sci_getreg(port, SCSPTR)->size) {
+		u16 status = serial_port_in(port, SCSPTR);
 
-		status = serial_port_in(port, SCSPTR);
-		status &= ~SCSPTR_CTSIO;
-		status |= SCSPTR_RTSIO;
-		serial_port_out(port, SCSPTR, status); /* Set RTS = 1 */
+		/* RTS# is output, driven 1 */
+		status |= SCSPTR_RTSIO | SCSPTR_RTSDT;
+		/* CTS# and SCK are inputs */
+		status &= ~(SCSPTR_CTSIO | SCSPTR_SCKIO);
+		serial_port_out(port, SCSPTR, status);
 	}
 }
 
@@ -1803,6 +1813,46 @@ static unsigned int sci_tx_empty(struct uart_port *port)
 	return (status & SCxSR_TEND(port)) && !in_tx_fifo ? TIOCSER_TEMT : 0;
 }
 
+static void sci_set_rts(struct uart_port *port, bool state)
+{
+	if (port->type == PORT_SCIFA || port->type == PORT_SCIFB) {
+		u16 data = serial_port_in(port, SCPDR);
+
+		/* Active low */
+		if (state)
+			data &= ~SCPDR_RTSD;
+		else
+			data |= SCPDR_RTSD;
+		serial_port_out(port, SCPDR, data);
+
+		/* RTS# is output */
+		serial_port_out(port, SCPCR,
+				serial_port_in(port, SCPCR) | SCPCR_RTSC);
+	} else if (sci_getreg(port, SCSPTR)->size) {
+		u16 ctrl = serial_port_in(port, SCSPTR);
+
+		/* Active low */
+		if (state)
+			ctrl &= ~SCSPTR_RTSDT;
+		else
+			ctrl |= SCSPTR_RTSDT;
+		serial_port_out(port, SCSPTR, ctrl);
+	}
+}
+
+static bool sci_get_cts(struct uart_port *port)
+{
+	if (port->type == PORT_SCIFA || port->type == PORT_SCIFB) {
+		/* Active low */
+		return !(serial_port_in(port, SCPDR) & SCPDR_CTSD);
+	} else if (sci_getreg(port, SCSPTR)->size) {
+		/* Active low */
+		return !(serial_port_in(port, SCSPTR) & SCSPTR_CTSDT);
+	}
+
+	return true;
+}
+
 /*
  * Modem control is a bit of a mixed bag for SCI(F) ports. Generally
  * CTS/RTS is supported in hardware by at least one port and controlled
@@ -1817,6 +1867,8 @@ static unsigned int sci_tx_empty(struct uart_port *port)
  */
 static void sci_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
+	struct sci_port *s = to_sci_port(port);
+
 	if (mctrl & TIOCM_LOOP) {
 		const struct plat_sci_reg *reg;
 
@@ -1829,25 +1881,72 @@ static void sci_set_mctrl(struct uart_port *port, unsigned int mctrl)
 					serial_port_in(port, SCFCR) |
 					SCFCR_LOOP);
 	}
+
+	mctrl_gpio_set(s->gpios, mctrl);
+
+	if (!(s->cfg->capabilities & SCIx_HAVE_RTSCTS))
+		return;
+
+	if (!(mctrl & TIOCM_RTS)) {
+		/* Disable Auto RTS */
+		serial_port_out(port, SCFCR,
+				serial_port_in(port, SCFCR) & ~SCFCR_MCE);
+
+		/* Clear RTS */
+		sci_set_rts(port, 0);
+	} else if (s->autorts) {
+		if (port->type == PORT_SCIFA || port->type == PORT_SCIFB) {
+			/* Enable RTS# pin function */
+			serial_port_out(port, SCPCR,
+				serial_port_in(port, SCPCR) & ~SCPCR_RTSC);
+		}
+
+		/* Enable Auto RTS */
+		serial_port_out(port, SCFCR,
+				serial_port_in(port, SCFCR) | SCFCR_MCE);
+	} else {
+		/* Set RTS */
+		sci_set_rts(port, 1);
+	}
 }
 
 static unsigned int sci_get_mctrl(struct uart_port *port)
 {
+	struct sci_port *s = to_sci_port(port);
+	struct mctrl_gpios *gpios = s->gpios;
+	unsigned int mctrl = 0;
+
+	mctrl_gpio_get(gpios, &mctrl);
+
 	/*
 	 * CTS/RTS is handled in hardware when supported, while nothing
-	 * else is wired up. Keep it simple and simply assert DSR/CAR.
+	 * else is wired up.
 	 */
-	return TIOCM_DSR | TIOCM_CAR;
+	if (s->autorts) {
+		if (sci_get_cts(port))
+			mctrl |= TIOCM_CTS;
+	} else if (IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(gpios, UART_GPIO_CTS))) {
+		mctrl |= TIOCM_CTS;
+	}
+	if (IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(gpios, UART_GPIO_DSR)))
+		mctrl |= TIOCM_DSR;
+	if (IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(gpios, UART_GPIO_DCD)))
+		mctrl |= TIOCM_CAR;
+
+	return mctrl;
+}
+
+static void sci_enable_ms(struct uart_port *port)
+{
+	mctrl_gpio_enable_ms(to_sci_port(port)->gpios);
 }
 
 static void sci_break_ctl(struct uart_port *port, int break_state)
 {
-	struct sci_port *s = to_sci_port(port);
-	const struct plat_sci_reg *reg = sci_regmap[s->cfg->regtype] + SCSPTR;
 	unsigned short scscr, scsptr;
 
 	/* check wheter the port has SCSPTR */
-	if (!reg->size) {
+	if (!sci_getreg(port, SCSPTR)->size) {
 		/*
 		 * Not supported by hardware. Most parts couple break and rx
 		 * interrupts together, with break detection always enabled.
@@ -1873,7 +1972,6 @@ static void sci_break_ctl(struct uart_port *port, int break_state)
 static int sci_startup(struct uart_port *port)
 {
 	struct sci_port *s = to_sci_port(port);
-	unsigned long flags;
 	int ret;
 
 	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
@@ -1884,11 +1982,6 @@ static int sci_startup(struct uart_port *port)
 
 	sci_request_dma(port);
 
-	spin_lock_irqsave(&port->lock, flags);
-	sci_start_tx(port);
-	sci_start_rx(port);
-	spin_unlock_irqrestore(&port->lock, flags);
-
 	return 0;
 }
 
@@ -1896,12 +1989,19 @@ static void sci_shutdown(struct uart_port *port)
 {
 	struct sci_port *s = to_sci_port(port);
 	unsigned long flags;
+	u16 scr;
 
 	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
 
+	s->autorts = false;
+	mctrl_gpio_disable_ms(to_sci_port(port)->gpios);
+
 	spin_lock_irqsave(&port->lock, flags);
 	sci_stop_rx(port);
 	sci_stop_tx(port);
+	/* Stop RX and TX, disable related interrupts, keep clock source */
+	scr = serial_port_in(port, SCSCR);
+	serial_port_out(port, SCSCR, scr & (SCSCR_CKE1 | SCSCR_CKE0));
 	spin_unlock_irqrestore(&port->lock, flags);
 
 #ifdef CONFIG_SERIAL_SH_SCI_DMA
@@ -2056,6 +2156,15 @@ static void sci_reset(struct uart_port *port)
 	reg = sci_getreg(port, SCFCR);
 	if (reg->size)
 		serial_port_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST);
+
+	sci_clear_SCxSR(port,
+			SCxSR_RDxF_CLEAR(port) & SCxSR_ERROR_CLEAR(port) &
+			SCxSR_BREAK_CLEAR(port));
+	if (sci_getreg(port, SCLSR)->size) {
+		status = serial_port_in(port, SCLSR);
+		status &= ~(SCLSR_TO | SCLSR_ORER);
+		serial_port_out(port, SCLSR, status);
+	}
 }
 
 static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
@@ -2218,15 +2327,18 @@ done:
 
 	sci_init_pins(port, termios->c_cflag);
 
+	port->status &= ~UPSTAT_AUTOCTS;
+	s->autorts = false;
 	reg = sci_getreg(port, SCFCR);
 	if (reg->size) {
 		unsigned short ctrl = serial_port_in(port, SCFCR);
 
-		if (s->cfg->capabilities & SCIx_HAVE_RTSCTS) {
-			if (termios->c_cflag & CRTSCTS)
-				ctrl |= SCFCR_MCE;
-			else
-				ctrl &= ~SCFCR_MCE;
+		if ((port->flags & UPF_HARD_FLOW) &&
+		    (termios->c_cflag & CRTSCTS)) {
+			/* There is no CTS interrupt to restart the hardware */
+			port->status |= UPSTAT_AUTOCTS;
+			/* MCE is enabled when RTS is raised */
+			s->autorts = true;
 		}
 
 		/*
@@ -2300,6 +2412,9 @@ done:
 		sci_start_rx(port);
 
 	sci_port_disable(s);
+
+	if (UART_ENABLE_MS(port, termios->c_cflag))
+		sci_enable_ms(port);
 }
 
 static void sci_pm(struct uart_port *port, unsigned int state,
@@ -2425,6 +2540,7 @@ static struct uart_ops sci_uart_ops = {
 	.start_tx	= sci_start_tx,
 	.stop_tx	= sci_stop_tx,
 	.stop_rx	= sci_stop_rx,
+	.enable_ms	= sci_enable_ms,
 	.break_ctl	= sci_break_ctl,
 	.startup	= sci_startup,
 	.shutdown	= sci_shutdown,
@@ -2890,6 +3006,9 @@ sci_parse_dt(struct platform_device *pdev, unsigned int *dev_id)
 	p->regtype = SCI_OF_REGTYPE(match->data);
 	p->scscr = SCSCR_RE | SCSCR_TE;
 
+	if (of_find_property(np, "uart-has-rtscts", NULL))
+		p->capabilities |= SCIx_HAVE_RTSCTS;
+
 	return p;
 }
 
@@ -2912,6 +3031,21 @@ static int sci_probe_single(struct platform_device *dev,
 	if (ret)
 		return ret;
 
+	sciport->gpios = mctrl_gpio_init(&sciport->port, 0);
+	if (IS_ERR(sciport->gpios) && PTR_ERR(sciport->gpios) != -ENOSYS)
+		return PTR_ERR(sciport->gpios);
+
+	if (p->capabilities & SCIx_HAVE_RTSCTS) {
+		if (!IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(sciport->gpios,
+							UART_GPIO_CTS)) ||
+		    !IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(sciport->gpios,
+							UART_GPIO_RTS))) {
+			dev_err(&dev->dev, "Conflicting RTS/CTS config\n");
+			return -EINVAL;
+		}
+		sciport->port.flags |= UPF_HARD_FLOW;
+	}
+
 	ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
 	if (ret) {
 		sci_cleanup_single(sciport);
diff --git a/drivers/tty/serial/sh-sci.h b/drivers/tty/serial/sh-sci.h
index 7a4fa185b93e..ffa6d688c335 100644
--- a/drivers/tty/serial/sh-sci.h
+++ b/drivers/tty/serial/sh-sci.h
@@ -105,13 +105,16 @@ enum {
 #define SCFCR_LOOP	BIT(0)	/* Loopback Test */
 
 /* SCLSR (Line Status Register) on (H)SCIF */
+#define SCLSR_TO	BIT(2)	/* Timeout */
 #define SCLSR_ORER	BIT(0)	/* Overrun Error */
 
 /* SCSPTR (Serial Port Register), optional */
-#define SCSPTR_RTSIO	BIT(7)	/* Serial Port RTS Pin Input/Output */
-#define SCSPTR_RTSDT	BIT(6)	/* Serial Port RTS Pin Data */
-#define SCSPTR_CTSIO	BIT(5)	/* Serial Port CTS Pin Input/Output */
-#define SCSPTR_CTSDT	BIT(4)	/* Serial Port CTS Pin Data */
+#define SCSPTR_RTSIO	BIT(7)	/* Serial Port RTS# Pin Input/Output */
+#define SCSPTR_RTSDT	BIT(6)	/* Serial Port RTS# Pin Data */
+#define SCSPTR_CTSIO	BIT(5)	/* Serial Port CTS# Pin Input/Output */
+#define SCSPTR_CTSDT	BIT(4)	/* Serial Port CTS# Pin Data */
+#define SCSPTR_SCKIO	BIT(3)	/* Serial Port Clock Pin Input/Output */
+#define SCSPTR_SCKDT	BIT(2)	/* Serial Port Clock Pin Data */
 #define SCSPTR_SPB2IO	BIT(1)	/* Serial Port Break Input/Output */
 #define SCSPTR_SPB2DT	BIT(0)	/* Serial Port Break Data */
 
@@ -119,12 +122,18 @@ enum {
 #define HSCIF_SRE	BIT(15)	/* Sampling Rate Register Enable */
 
 /* SCPCR (Serial Port Control Register), SCIFA/SCIFB only */
-#define SCPCR_RTSC	BIT(4)	/* Serial Port RTS Pin / Output Pin */
-#define SCPCR_CTSC	BIT(3)	/* Serial Port CTS Pin / Input Pin */
+#define SCPCR_RTSC	BIT(4)	/* Serial Port RTS# Pin / Output Pin */
+#define SCPCR_CTSC	BIT(3)	/* Serial Port CTS# Pin / Input Pin */
+#define SCPCR_SCKC	BIT(2)	/* Serial Port SCK Pin / Output Pin */
+#define SCPCR_RXDC	BIT(1)	/* Serial Port RXD Pin / Input Pin */
+#define SCPCR_TXDC	BIT(0)	/* Serial Port TXD Pin / Output Pin */
 
 /* SCPDR (Serial Port Data Register), SCIFA/SCIFB only */
-#define SCPDR_RTSD	BIT(4)	/* Serial Port RTS Output Pin Data */
-#define SCPDR_CTSD	BIT(3)	/* Serial Port CTS Input Pin Data */
+#define SCPDR_RTSD	BIT(4)	/* Serial Port RTS# Output Pin Data */
+#define SCPDR_CTSD	BIT(3)	/* Serial Port CTS# Input Pin Data */
+#define SCPDR_SCKD	BIT(2)	/* Serial Port SCK Output Pin Data */
+#define SCPDR_RXDD	BIT(1)	/* Serial Port RXD Input Pin Data */
+#define SCPDR_TXDD	BIT(0)	/* Serial Port TXD Output Pin Data */
 
 /*
  * BRG Clock Select Register (Some SCIF and HSCIF)
diff --git a/drivers/tty/serial/sirfsoc_uart.h b/drivers/tty/serial/sirfsoc_uart.h
index c3a885b4d76a..43756bd9111c 100644
--- a/drivers/tty/serial/sirfsoc_uart.h
+++ b/drivers/tty/serial/sirfsoc_uart.h
@@ -106,7 +106,7 @@ struct sirfsoc_uart_register {
 	enum sirfsoc_uart_type uart_type;
 };
 
-u32 uart_usp_ff_full_mask(struct uart_port *port)
+static u32 uart_usp_ff_full_mask(struct uart_port *port)
 {
 	u32 full_bit;
 
@@ -114,7 +114,7 @@ u32 uart_usp_ff_full_mask(struct uart_port *port)
 	return (1 << full_bit);
 }
 
-u32 uart_usp_ff_empty_mask(struct uart_port *port)
+static u32 uart_usp_ff_empty_mask(struct uart_port *port)
 {
 	u32 empty_bit;
 
diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c
index b384060e3b1f..23cfc5e16b45 100644
--- a/drivers/tty/serial/vt8500_serial.c
+++ b/drivers/tty/serial/vt8500_serial.c
@@ -21,7 +21,6 @@
 
 #include <linux/hrtimer.h>
 #include <linux/delay.h>
-#include <linux/module.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/irq.h>
@@ -730,22 +729,12 @@ static int vt8500_serial_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int vt8500_serial_remove(struct platform_device *pdev)
-{
-	struct vt8500_port *vt8500_port = platform_get_drvdata(pdev);
-
-	clk_disable_unprepare(vt8500_port->clk);
-	uart_remove_one_port(&vt8500_uart_driver, &vt8500_port->uart);
-
-	return 0;
-}
-
 static struct platform_driver vt8500_platform_driver = {
 	.probe  = vt8500_serial_probe,
-	.remove = vt8500_serial_remove,
 	.driver = {
 		.name = "vt8500_serial",
 		.of_match_table = wmt_dt_ids,
+		.suppress_bind_attrs = true,
 	},
 };
 
@@ -764,19 +753,4 @@ static int __init vt8500_serial_init(void)
 
 	return ret;
 }
-
-static void __exit vt8500_serial_exit(void)
-{
-#ifdef CONFIG_SERIAL_VT8500_CONSOLE
-	unregister_console(&vt8500_console);
-#endif
-	platform_driver_unregister(&vt8500_platform_driver);
-	uart_unregister_driver(&vt8500_uart_driver);
-}
-
-module_init(vt8500_serial_init);
-module_exit(vt8500_serial_exit);
-
-MODULE_AUTHOR("Alexey Charkov <alchark@gmail.com>");
-MODULE_DESCRIPTION("Driver for vt8500 serial device");
-MODULE_LICENSE("GPL v2");
+device_initcall(vt8500_serial_init);
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index cd46e64c4255..9ca1a4d1b66a 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -976,6 +976,23 @@ static void cdns_uart_poll_put_char(struct uart_port *port, unsigned char c)
 }
 #endif
 
+static void cdns_uart_pm(struct uart_port *port, unsigned int state,
+		   unsigned int oldstate)
+{
+	struct cdns_uart *cdns_uart = port->private_data;
+
+	switch (state) {
+	case UART_PM_STATE_OFF:
+		clk_disable(cdns_uart->uartclk);
+		clk_disable(cdns_uart->pclk);
+		break;
+	default:
+		clk_enable(cdns_uart->pclk);
+		clk_enable(cdns_uart->uartclk);
+		break;
+	}
+}
+
 static struct uart_ops cdns_uart_ops = {
 	.set_mctrl	= cdns_uart_set_mctrl,
 	.get_mctrl	= cdns_uart_get_mctrl,
@@ -987,6 +1004,7 @@ static struct uart_ops cdns_uart_ops = {
 	.set_termios	= cdns_uart_set_termios,
 	.startup	= cdns_uart_startup,
 	.shutdown	= cdns_uart_shutdown,
+	.pm		= cdns_uart_pm,
 	.type		= cdns_uart_type,
 	.verify_port	= cdns_uart_verify_port,
 	.request_port	= cdns_uart_request_port,
@@ -1350,12 +1368,12 @@ static int cdns_uart_probe(struct platform_device *pdev)
 		return PTR_ERR(cdns_uart_data->uartclk);
 	}
 
-	rc = clk_prepare_enable(cdns_uart_data->pclk);
+	rc = clk_prepare(cdns_uart_data->pclk);
 	if (rc) {
 		dev_err(&pdev->dev, "Unable to enable pclk clock.\n");
 		return rc;
 	}
-	rc = clk_prepare_enable(cdns_uart_data->uartclk);
+	rc = clk_prepare(cdns_uart_data->uartclk);
 	if (rc) {
 		dev_err(&pdev->dev, "Unable to enable device clock.\n");
 		goto err_out_clk_dis_pclk;
@@ -1422,9 +1440,9 @@ err_out_notif_unreg:
 			&cdns_uart_data->clk_rate_change_nb);
 #endif
 err_out_clk_disable:
-	clk_disable_unprepare(cdns_uart_data->uartclk);
+	clk_unprepare(cdns_uart_data->uartclk);
 err_out_clk_dis_pclk:
-	clk_disable_unprepare(cdns_uart_data->pclk);
+	clk_unprepare(cdns_uart_data->pclk);
 
 	return rc;
 }
@@ -1448,8 +1466,8 @@ static int cdns_uart_remove(struct platform_device *pdev)
 #endif
 	rc = uart_remove_one_port(&cdns_uart_uart_driver, port);
 	port->mapbase = 0;
-	clk_disable_unprepare(cdns_uart_data->uartclk);
-	clk_disable_unprepare(cdns_uart_data->pclk);
+	clk_unprepare(cdns_uart_data->uartclk);
+	clk_unprepare(cdns_uart_data->pclk);
 	return rc;
 }
 
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index e5139402e7f8..52bbd27e93ae 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -363,6 +363,7 @@ static void moom_callback(struct work_struct *ignored)
 	struct oom_control oc = {
 		.zonelist = node_zonelist(first_memory_node, gfp_mask),
 		.nodemask = NULL,
+		.memcg = NULL,
 		.gfp_mask = gfp_mask,
 		.order = -1,
 	};
diff --git a/drivers/tty/vt/consolemap.c b/drivers/tty/vt/consolemap.c
index c8c91f0476a2..9d7ab7b66a8a 100644
--- a/drivers/tty/vt/consolemap.c
+++ b/drivers/tty/vt/consolemap.c
@@ -499,9 +499,8 @@ con_insert_unipair(struct uni_pagedir *p, u_short unicode, u_short fontpos)
 	return 0;
 }
 
-/* ui is a leftover from using a hashtable, but might be used again
-   Caller must hold the lock */
-static int con_do_clear_unimap(struct vc_data *vc, struct unimapinit *ui)
+/* Caller must hold the lock */
+static int con_do_clear_unimap(struct vc_data *vc)
 {
 	struct uni_pagedir *p, *q;
 
@@ -524,11 +523,11 @@ static int con_do_clear_unimap(struct vc_data *vc, struct unimapinit *ui)
 	return 0;
 }
 
-int con_clear_unimap(struct vc_data *vc, struct unimapinit *ui)
+int con_clear_unimap(struct vc_data *vc)
 {
 	int ret;
 	console_lock();
-	ret = con_do_clear_unimap(vc, ui);
+	ret = con_do_clear_unimap(vc);
 	console_unlock();
 	return ret;
 }
@@ -556,7 +555,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list)
 		int j, k;
 		u16 **p1, *p2, l;
 		
-		err1 = con_do_clear_unimap(vc, NULL);
+		err1 = con_do_clear_unimap(vc);
 		if (err1) {
 			console_unlock();
 			return err1;
@@ -677,7 +676,7 @@ int con_set_default_unimap(struct vc_data *vc)
 	
 	/* The default font is always 256 characters */
 
-	err = con_do_clear_unimap(vc, NULL);
+	err = con_do_clear_unimap(vc);
 	if (err)
 		return err;
     
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index f973bfce5d08..0f8caae4267d 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -366,34 +366,22 @@ static void to_utf8(struct vc_data *vc, uint c)
 
 static void do_compute_shiftstate(void)
 {
-	unsigned int i, j, k, sym, val;
+	unsigned int k, sym, val;
 
 	shift_state = 0;
 	memset(shift_down, 0, sizeof(shift_down));
 
-	for (i = 0; i < ARRAY_SIZE(key_down); i++) {
-
-		if (!key_down[i])
+	for_each_set_bit(k, key_down, min(NR_KEYS, KEY_CNT)) {
+		sym = U(key_maps[0][k]);
+		if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
 			continue;
 
-		k = i * BITS_PER_LONG;
-
-		for (j = 0; j < BITS_PER_LONG; j++, k++) {
-
-			if (!test_bit(k, key_down))
-				continue;
-
-			sym = U(key_maps[0][k]);
-			if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
-				continue;
-
-			val = KVAL(sym);
-			if (val == KVAL(K_CAPSSHIFT))
-				val = KVAL(K_SHIFT);
+		val = KVAL(sym);
+		if (val == KVAL(K_CAPSSHIFT))
+			val = KVAL(K_SHIFT);
 
-			shift_down[val]++;
-			shift_state |= (1 << val);
-		}
+		shift_down[val]++;
+		shift_state |= BIT(val);
 	}
 }
 
@@ -579,7 +567,7 @@ static void fn_scroll_forw(struct vc_data *vc)
 
 static void fn_scroll_back(struct vc_data *vc)
 {
-	scrollback(vc, 0);
+	scrollback(vc);
 }
 
 static void fn_show_mem(struct vc_data *vc)
@@ -1745,16 +1733,10 @@ int vt_do_diacrit(unsigned int cmd, void __user *udp, int perm)
 			return -EINVAL;
 
 		if (ct) {
-			buf = kmalloc(ct * sizeof(struct kbdiacruc),
-								GFP_KERNEL);
-			if (buf == NULL)
-				return -ENOMEM;
-
-			if (copy_from_user(buf, a->kbdiacruc,
-					ct * sizeof(struct kbdiacruc))) {
-				kfree(buf);
-				return -EFAULT;
-			}
+			buf = memdup_user(a->kbdiacruc,
+					  ct * sizeof(struct kbdiacruc));
+			if (IS_ERR(buf))
+				return PTR_ERR(buf);
 		} 
 		spin_lock_irqsave(&kbd_event_lock, flags);
 		if (ct)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index dc125322f48f..2705ca960e92 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -277,13 +277,15 @@ static void notify_update(struct vc_data *vc)
  *	Low-Level Functions
  */
 
-#define IS_FG(vc)	((vc)->vc_num == fg_console)
+static inline bool con_is_fg(const struct vc_data *vc)
+{
+	return vc->vc_num == fg_console;
+}
 
-#ifdef VT_BUF_VRAM_ONLY
-#define DO_UPDATE(vc)	0
-#else
-#define DO_UPDATE(vc)	(CON_IS_VISIBLE(vc) && !console_blanked)
-#endif
+static inline bool con_should_update(const struct vc_data *vc)
+{
+	return con_is_visible(vc) && !console_blanked;
+}
 
 static inline unsigned short *screenpos(struct vc_data *vc, int offset, int viewed)
 {
@@ -321,7 +323,7 @@ static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
 		nr = b - t - 1;
 	if (b > vc->vc_rows || t >= b || nr < 1)
 		return;
-	if (CON_IS_VISIBLE(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_UP, nr))
+	if (con_is_visible(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_UP, nr))
 		return;
 	d = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t);
 	s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * (t + nr));
@@ -339,7 +341,7 @@ static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
 		nr = b - t - 1;
 	if (b > vc->vc_rows || t >= b || nr < 1)
 		return;
-	if (CON_IS_VISIBLE(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_DOWN, nr))
+	if (con_is_visible(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_DOWN, nr))
 		return;
 	s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t);
 	step = vc->vc_cols * nr;
@@ -349,7 +351,6 @@ static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
 
 static void do_update_region(struct vc_data *vc, unsigned long start, int count)
 {
-#ifndef VT_BUF_VRAM_ONLY
 	unsigned int xx, yy, offset;
 	u16 *p;
 
@@ -390,14 +391,13 @@ static void do_update_region(struct vc_data *vc, unsigned long start, int count)
 			start = vc->vc_sw->con_getxy(vc, start, NULL, NULL);
 		}
 	}
-#endif
 }
 
 void update_region(struct vc_data *vc, unsigned long start, int count)
 {
 	WARN_CONSOLE_UNLOCKED();
 
-	if (DO_UPDATE(vc)) {
+	if (con_should_update(vc)) {
 		hide_cursor(vc);
 		do_update_region(vc, start, count);
 		set_cursor(vc);
@@ -413,7 +413,6 @@ static u8 build_attr(struct vc_data *vc, u8 _color, u8 _intensity, u8 _blink,
 		return vc->vc_sw->con_build_attr(vc, _color, _intensity,
 		       _blink, _underline, _reverse, _italic);
 
-#ifndef VT_BUF_VRAM_ONLY
 /*
  * ++roman: I completely changed the attribute format for monochrome
  * mode (!can_do_color). The formerly used MDA (monochrome display
@@ -448,9 +447,6 @@ static u8 build_attr(struct vc_data *vc, u8 _color, u8 _intensity, u8 _blink,
 		a <<= 1;
 	return a;
 	}
-#else
-	return 0;
-#endif
 }
 
 static void update_attr(struct vc_data *vc)
@@ -470,10 +466,9 @@ void invert_screen(struct vc_data *vc, int offset, int count, int viewed)
 
 	count /= 2;
 	p = screenpos(vc, offset, viewed);
-	if (vc->vc_sw->con_invert_region)
+	if (vc->vc_sw->con_invert_region) {
 		vc->vc_sw->con_invert_region(vc, p, count);
-#ifndef VT_BUF_VRAM_ONLY
-	else {
+	} else {
 		u16 *q = p;
 		int cnt = count;
 		u16 a;
@@ -501,8 +496,8 @@ void invert_screen(struct vc_data *vc, int offset, int count, int viewed)
 			}
 		}
 	}
-#endif
-	if (DO_UPDATE(vc))
+
+	if (con_should_update(vc))
 		do_update_region(vc, (unsigned long) p, count);
 	notify_update(vc);
 }
@@ -519,7 +514,7 @@ void complement_pos(struct vc_data *vc, int offset)
 	if (old_offset != -1 && old_offset >= 0 &&
 	    old_offset < vc->vc_screenbuf_size) {
 		scr_writew(old, screenpos(vc, old_offset, 1));
-		if (DO_UPDATE(vc))
+		if (con_should_update(vc))
 			vc->vc_sw->con_putc(vc, old, oldy, oldx);
 		notify_update(vc);
 	}
@@ -534,7 +529,7 @@ void complement_pos(struct vc_data *vc, int offset)
 		old = scr_readw(p);
 		new = old ^ vc->vc_complement_mask;
 		scr_writew(new, p);
-		if (DO_UPDATE(vc)) {
+		if (con_should_update(vc)) {
 			oldx = (offset >> 1) % vc->vc_cols;
 			oldy = (offset >> 1) / vc->vc_cols;
 			vc->vc_sw->con_putc(vc, new, oldy, oldx);
@@ -550,7 +545,7 @@ static void insert_char(struct vc_data *vc, unsigned int nr)
 	scr_memmovew(p + nr, p, (vc->vc_cols - vc->vc_x - nr) * 2);
 	scr_memsetw(p, vc->vc_video_erase_char, nr * 2);
 	vc->vc_need_wrap = 0;
-	if (DO_UPDATE(vc))
+	if (con_should_update(vc))
 		do_update_region(vc, (unsigned long) p,
 			vc->vc_cols - vc->vc_x);
 }
@@ -563,7 +558,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr)
 	scr_memsetw(p + vc->vc_cols - vc->vc_x - nr, vc->vc_video_erase_char,
 			nr * 2);
 	vc->vc_need_wrap = 0;
-	if (DO_UPDATE(vc))
+	if (con_should_update(vc))
 		do_update_region(vc, (unsigned long) p,
 			vc->vc_cols - vc->vc_x);
 }
@@ -583,7 +578,7 @@ static void add_softcursor(struct vc_data *vc)
 	if ((type & 0x20) && ((softcursor_original & 0x7000) == (i & 0x7000))) i ^= 0x7000;
 	if ((type & 0x40) && ((i & 0x700) == ((i & 0x7000) >> 4))) i ^= 0x0700;
 	scr_writew(i, (u16 *) vc->vc_pos);
-	if (DO_UPDATE(vc))
+	if (con_should_update(vc))
 		vc->vc_sw->con_putc(vc, i, vc->vc_y, vc->vc_x);
 }
 
@@ -591,7 +586,7 @@ static void hide_softcursor(struct vc_data *vc)
 {
 	if (softcursor_original != -1) {
 		scr_writew(softcursor_original, (u16 *)vc->vc_pos);
-		if (DO_UPDATE(vc))
+		if (con_should_update(vc))
 			vc->vc_sw->con_putc(vc, softcursor_original,
 					vc->vc_y, vc->vc_x);
 		softcursor_original = -1;
@@ -608,8 +603,7 @@ static void hide_cursor(struct vc_data *vc)
 
 static void set_cursor(struct vc_data *vc)
 {
-	if (!IS_FG(vc) || console_blanked ||
-	    vc->vc_mode == KD_GRAPHICS)
+	if (!con_is_fg(vc) || console_blanked || vc->vc_mode == KD_GRAPHICS)
 		return;
 	if (vc->vc_deccm) {
 		if (vc == sel_cons)
@@ -625,7 +619,7 @@ static void set_origin(struct vc_data *vc)
 {
 	WARN_CONSOLE_UNLOCKED();
 
-	if (!CON_IS_VISIBLE(vc) ||
+	if (!con_is_visible(vc) ||
 	    !vc->vc_sw->con_set_origin ||
 	    !vc->vc_sw->con_set_origin(vc))
 		vc->vc_origin = (unsigned long)vc->vc_screenbuf;
@@ -673,12 +667,12 @@ void redraw_screen(struct vc_data *vc, int is_switch)
 		struct vc_data *old_vc = vc_cons[fg_console].d;
 		if (old_vc == vc)
 			return;
-		if (!CON_IS_VISIBLE(vc))
+		if (!con_is_visible(vc))
 			redraw = 1;
 		*vc->vc_display_fg = vc;
 		fg_console = vc->vc_num;
 		hide_cursor(old_vc);
-		if (!CON_IS_VISIBLE(old_vc)) {
+		if (!con_is_visible(old_vc)) {
 			save_screen(old_vc);
 			set_origin(old_vc);
 		}
@@ -750,6 +744,7 @@ static void visual_init(struct vc_data *vc, int num, int init)
 	vc->vc_complement_mask = 0;
 	vc->vc_can_do_color = 0;
 	vc->vc_panic_force_write = false;
+	vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
 	vc->vc_sw->con_init(vc, init);
 	if (!vc->vc_complement_mask)
 		vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
@@ -953,7 +948,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 		tty_do_resize(tty, &ws);
 	}
 
-	if (CON_IS_VISIBLE(vc))
+	if (con_is_visible(vc))
 		update_screen(vc);
 	vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num);
 	return err;
@@ -1102,11 +1097,9 @@ static void gotoxay(struct vc_data *vc, int new_x, int new_y)
 	gotoxy(vc, new_x, vc->vc_decom ? (vc->vc_top + new_y) : new_y);
 }
 
-void scrollback(struct vc_data *vc, int lines)
+void scrollback(struct vc_data *vc)
 {
-	if (!lines)
-		lines = vc->vc_rows / 2;
-	scrolldelta(-lines);
+	scrolldelta(-(vc->vc_rows / 2));
 }
 
 void scrollfront(struct vc_data *vc, int lines)
@@ -1185,7 +1178,7 @@ static void csi_J(struct vc_data *vc, int vpar)
 			scr_memsetw(vc->vc_screenbuf, vc->vc_video_erase_char,
 				    vc->vc_screenbuf_size >> 1);
 			set_origin(vc);
-			if (CON_IS_VISIBLE(vc))
+			if (con_is_visible(vc))
 				update_screen(vc);
 			/* fall through */
 		case 2: /* erase whole display */
@@ -1196,7 +1189,7 @@ static void csi_J(struct vc_data *vc, int vpar)
 			return;
 	}
 	scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
-	if (DO_UPDATE(vc))
+	if (con_should_update(vc))
 		do_update_region(vc, (unsigned long) start, count);
 	vc->vc_need_wrap = 0;
 }
@@ -1224,7 +1217,7 @@ static void csi_K(struct vc_data *vc, int vpar)
 	}
 	scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
 	vc->vc_need_wrap = 0;
-	if (DO_UPDATE(vc))
+	if (con_should_update(vc))
 		do_update_region(vc, (unsigned long) start, count);
 }
 
@@ -1237,7 +1230,7 @@ static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar posi
 	count = (vpar > vc->vc_cols - vc->vc_x) ? (vc->vc_cols - vc->vc_x) : vpar;
 
 	scr_memsetw((unsigned short *)vc->vc_pos, vc->vc_video_erase_char, 2 * count);
-	if (DO_UPDATE(vc))
+	if (con_should_update(vc))
 		vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1, count);
 	vc->vc_need_wrap = 0;
 }
@@ -1254,48 +1247,87 @@ static void default_attr(struct vc_data *vc)
 
 struct rgb { u8 r; u8 g; u8 b; };
 
-static struct rgb rgb_from_256(int i)
+static void rgb_from_256(int i, struct rgb *c)
 {
-	struct rgb c;
 	if (i < 8) {            /* Standard colours. */
-		c.r = i&1 ? 0xaa : 0x00;
-		c.g = i&2 ? 0xaa : 0x00;
-		c.b = i&4 ? 0xaa : 0x00;
+		c->r = i&1 ? 0xaa : 0x00;
+		c->g = i&2 ? 0xaa : 0x00;
+		c->b = i&4 ? 0xaa : 0x00;
 	} else if (i < 16) {
-		c.r = i&1 ? 0xff : 0x55;
-		c.g = i&2 ? 0xff : 0x55;
-		c.b = i&4 ? 0xff : 0x55;
+		c->r = i&1 ? 0xff : 0x55;
+		c->g = i&2 ? 0xff : 0x55;
+		c->b = i&4 ? 0xff : 0x55;
 	} else if (i < 232) {   /* 6x6x6 colour cube. */
-		c.r = (i - 16) / 36 * 85 / 2;
-		c.g = (i - 16) / 6 % 6 * 85 / 2;
-		c.b = (i - 16) % 6 * 85 / 2;
+		c->r = (i - 16) / 36 * 85 / 2;
+		c->g = (i - 16) / 6 % 6 * 85 / 2;
+		c->b = (i - 16) % 6 * 85 / 2;
 	} else                  /* Grayscale ramp. */
-		c.r = c.g = c.b = i * 10 - 2312;
-	return c;
+		c->r = c->g = c->b = i * 10 - 2312;
 }
 
-static void rgb_foreground(struct vc_data *vc, struct rgb c)
+static void rgb_foreground(struct vc_data *vc, const struct rgb *c)
 {
-	u8 hue, max = c.r;
-	if (c.g > max)
-		max = c.g;
-	if (c.b > max)
-		max = c.b;
-	hue = (c.r > max/2 ? 4 : 0)
-	    | (c.g > max/2 ? 2 : 0)
-	    | (c.b > max/2 ? 1 : 0);
-	if (hue == 7 && max <= 0x55)
-		hue = 0, vc->vc_intensity = 2;
+	u8 hue = 0, max = max3(c->r, c->g, c->b);
+
+	if (c->r > max / 2)
+		hue |= 4;
+	if (c->g > max / 2)
+		hue |= 2;
+	if (c->b > max / 2)
+		hue |= 1;
+
+	if (hue == 7 && max <= 0x55) {
+		hue = 0;
+		vc->vc_intensity = 2;
+	} else if (max > 0xaa)
+		vc->vc_intensity = 2;
 	else
-		vc->vc_intensity = (max > 0xaa) + 1;
+		vc->vc_intensity = 1;
+
 	vc->vc_color = (vc->vc_color & 0xf0) | hue;
 }
 
-static void rgb_background(struct vc_data *vc, struct rgb c)
+static void rgb_background(struct vc_data *vc, const struct rgb *c)
 {
 	/* For backgrounds, err on the dark side. */
 	vc->vc_color = (vc->vc_color & 0x0f)
-		| (c.r&0x80) >> 1 | (c.g&0x80) >> 2 | (c.b&0x80) >> 3;
+		| (c->r&0x80) >> 1 | (c->g&0x80) >> 2 | (c->b&0x80) >> 3;
+}
+
+/*
+ * ITU T.416 Higher colour modes. They break the usual properties of SGR codes
+ * and thus need to be detected and ignored by hand. Strictly speaking, that
+ * standard also wants : rather than ; as separators, contrary to ECMA-48, but
+ * no one produces such codes and almost no one accepts them.
+ *
+ * Subcommands 3 (CMY) and 4 (CMYK) are so insane there's no point in
+ * supporting them.
+ */
+static int vc_t416_color(struct vc_data *vc, int i,
+		void(*set_color)(struct vc_data *vc, const struct rgb *c))
+{
+	struct rgb c;
+
+	i++;
+	if (i > vc->vc_npar)
+		return i;
+
+	if (vc->vc_par[i] == 5 && i < vc->vc_npar) {
+		/* 256 colours -- ubiquitous */
+		i++;
+		rgb_from_256(vc->vc_par[i], &c);
+	} else if (vc->vc_par[i] == 2 && i <= vc->vc_npar + 3) {
+		/* 24 bit -- extremely rare */
+		c.r = vc->vc_par[i + 1];
+		c.g = vc->vc_par[i + 2];
+		c.b = vc->vc_par[i + 3];
+		i += 3;
+	} else
+		return i;
+
+	set_color(vc, &c);
+
+	return i;
 }
 
 /* console_lock is held */
@@ -1305,135 +1337,91 @@ static void csi_m(struct vc_data *vc)
 
 	for (i = 0; i <= vc->vc_npar; i++)
 		switch (vc->vc_par[i]) {
-			case 0:	/* all attributes off */
-				default_attr(vc);
-				break;
-			case 1:
-				vc->vc_intensity = 2;
-				break;
-			case 2:
-				vc->vc_intensity = 0;
-				break;
-			case 3:
-				vc->vc_italic = 1;
-				break;
-			case 4:
-				vc->vc_underline = 1;
-				break;
-			case 5:
-				vc->vc_blink = 1;
-				break;
-			case 7:
-				vc->vc_reverse = 1;
-				break;
-			case 10: /* ANSI X3.64-1979 (SCO-ish?)
-				  * Select primary font, don't display
-				  * control chars if defined, don't set
-				  * bit 8 on output.
-				  */
-				vc->vc_translate = set_translate(vc->vc_charset == 0
-						? vc->vc_G0_charset
-						: vc->vc_G1_charset, vc);
-				vc->vc_disp_ctrl = 0;
-				vc->vc_toggle_meta = 0;
-				break;
-			case 11: /* ANSI X3.64-1979 (SCO-ish?)
-				  * Select first alternate font, lets
-				  * chars < 32 be displayed as ROM chars.
-				  */
-				vc->vc_translate = set_translate(IBMPC_MAP, vc);
-				vc->vc_disp_ctrl = 1;
-				vc->vc_toggle_meta = 0;
-				break;
-			case 12: /* ANSI X3.64-1979 (SCO-ish?)
-				  * Select second alternate font, toggle
-				  * high bit before displaying as ROM char.
-				  */
-				vc->vc_translate = set_translate(IBMPC_MAP, vc);
-				vc->vc_disp_ctrl = 1;
-				vc->vc_toggle_meta = 1;
-				break;
-			case 21:
-			case 22:
-				vc->vc_intensity = 1;
-				break;
-			case 23:
-				vc->vc_italic = 0;
-				break;
-			case 24:
-				vc->vc_underline = 0;
-				break;
-			case 25:
-				vc->vc_blink = 0;
-				break;
-			case 27:
-				vc->vc_reverse = 0;
-				break;
-			case 38: /* ITU T.416
-				  * Higher colour modes.
-				  * They break the usual properties of SGR codes
-				  * and thus need to be detected and ignored by
-				  * hand.  Strictly speaking, that standard also
-				  * wants : rather than ; as separators, contrary
-				  * to ECMA-48, but no one produces such codes
-				  * and almost no one accepts them.
-				  */
-				i++;
-				if (i > vc->vc_npar)
-					break;
-				if (vc->vc_par[i] == 5 &&  /* 256 colours */
-				    i < vc->vc_npar) {     /* ubiquitous */
-					i++;
-					rgb_foreground(vc,
-						rgb_from_256(vc->vc_par[i]));
-				} else if (vc->vc_par[i] == 2 &&  /* 24 bit */
-				           i <= vc->vc_npar + 3) {/* extremely rare */
-					struct rgb c = {
-						.r = vc->vc_par[i + 1],
-						.g = vc->vc_par[i + 2],
-						.b = vc->vc_par[i + 3],
-					};
-					rgb_foreground(vc, c);
-					i += 3;
-				}
-				/* Subcommands 3 (CMY) and 4 (CMYK) are so insane
-				 * there's no point in supporting them.
-				 */
-				break;
-			case 48:
-				i++;
-				if (i > vc->vc_npar)
-					break;
-				if (vc->vc_par[i] == 5 &&  /* 256 colours */
-				    i < vc->vc_npar) {
-					i++;
-					rgb_background(vc,
-						rgb_from_256(vc->vc_par[i]));
-				} else if (vc->vc_par[i] == 2 && /* 24 bit */
-				           i <= vc->vc_npar + 3) {
-					struct rgb c = {
-						.r = vc->vc_par[i + 1],
-						.g = vc->vc_par[i + 2],
-						.b = vc->vc_par[i + 3],
-					};
-					rgb_background(vc, c);
-					i += 3;
-				}
-				break;
-			case 39:
-				vc->vc_color = (vc->vc_def_color & 0x0f) | (vc->vc_color & 0xf0);
-				break;
-			case 49:
-				vc->vc_color = (vc->vc_def_color & 0xf0) | (vc->vc_color & 0x0f);
-				break;
-			default:
-				if (vc->vc_par[i] >= 30 && vc->vc_par[i] <= 37)
-					vc->vc_color = color_table[vc->vc_par[i] - 30]
-						| (vc->vc_color & 0xf0);
-				else if (vc->vc_par[i] >= 40 && vc->vc_par[i] <= 47)
-					vc->vc_color = (color_table[vc->vc_par[i] - 40] << 4)
-						| (vc->vc_color & 0x0f);
-				break;
+		case 0:	/* all attributes off */
+			default_attr(vc);
+			break;
+		case 1:
+			vc->vc_intensity = 2;
+			break;
+		case 2:
+			vc->vc_intensity = 0;
+			break;
+		case 3:
+			vc->vc_italic = 1;
+			break;
+		case 4:
+			vc->vc_underline = 1;
+			break;
+		case 5:
+			vc->vc_blink = 1;
+			break;
+		case 7:
+			vc->vc_reverse = 1;
+			break;
+		case 10: /* ANSI X3.64-1979 (SCO-ish?)
+			  * Select primary font, don't display control chars if
+			  * defined, don't set bit 8 on output.
+			  */
+			vc->vc_translate = set_translate(vc->vc_charset == 0
+					? vc->vc_G0_charset
+					: vc->vc_G1_charset, vc);
+			vc->vc_disp_ctrl = 0;
+			vc->vc_toggle_meta = 0;
+			break;
+		case 11: /* ANSI X3.64-1979 (SCO-ish?)
+			  * Select first alternate font, lets chars < 32 be
+			  * displayed as ROM chars.
+			  */
+			vc->vc_translate = set_translate(IBMPC_MAP, vc);
+			vc->vc_disp_ctrl = 1;
+			vc->vc_toggle_meta = 0;
+			break;
+		case 12: /* ANSI X3.64-1979 (SCO-ish?)
+			  * Select second alternate font, toggle high bit
+			  * before displaying as ROM char.
+			  */
+			vc->vc_translate = set_translate(IBMPC_MAP, vc);
+			vc->vc_disp_ctrl = 1;
+			vc->vc_toggle_meta = 1;
+			break;
+		case 21:
+		case 22:
+			vc->vc_intensity = 1;
+			break;
+		case 23:
+			vc->vc_italic = 0;
+			break;
+		case 24:
+			vc->vc_underline = 0;
+			break;
+		case 25:
+			vc->vc_blink = 0;
+			break;
+		case 27:
+			vc->vc_reverse = 0;
+			break;
+		case 38:
+			i = vc_t416_color(vc, i, rgb_foreground);
+			break;
+		case 48:
+			i = vc_t416_color(vc, i, rgb_background);
+			break;
+		case 39:
+			vc->vc_color = (vc->vc_def_color & 0x0f) |
+				(vc->vc_color & 0xf0);
+			break;
+		case 49:
+			vc->vc_color = (vc->vc_def_color & 0xf0) |
+				(vc->vc_color & 0x0f);
+			break;
+		default:
+			if (vc->vc_par[i] >= 30 && vc->vc_par[i] <= 37)
+				vc->vc_color = color_table[vc->vc_par[i] - 30]
+					| (vc->vc_color & 0xf0);
+			else if (vc->vc_par[i] >= 40 && vc->vc_par[i] <= 47)
+				vc->vc_color = (color_table[vc->vc_par[i] - 40] << 4)
+					| (vc->vc_color & 0x0f);
+			break;
 		}
 	update_attr(vc);
 }
@@ -1495,7 +1483,6 @@ static void set_mode(struct vc_data *vc, int on_off)
 					clr_kbd(vc, decckm);
 				break;
 			case 3:	/* 80/132 mode switch unimplemented */
-				vc->vc_deccolm = on_off;
 #if 0
 				vc_resize(deccolm ? 132 : 80, vc->vc_rows);
 				/* this alone does not suffice; some user mode
@@ -2177,18 +2164,20 @@ static int is_double_width(uint32_t ucs)
 	return bisearch(ucs, double_width, ARRAY_SIZE(double_width) - 1);
 }
 
+static void con_flush(struct vc_data *vc, unsigned long draw_from,
+		unsigned long draw_to, int *draw_x)
+{
+	if (*draw_x < 0)
+		return;
+
+	vc->vc_sw->con_putcs(vc, (u16 *)draw_from,
+			(u16 *)draw_to - (u16 *)draw_from, vc->vc_y, *draw_x);
+	*draw_x = -1;
+}
+
 /* acquires console_lock */
 static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count)
 {
-#ifdef VT_BUF_VRAM_ONLY
-#define FLUSH do { } while(0);
-#else
-#define FLUSH if (draw_x >= 0) { \
-	vc->vc_sw->con_putcs(vc, (u16 *)draw_from, (u16 *)draw_to - (u16 *)draw_from, vc->vc_y, draw_x); \
-	draw_x = -1; \
-	}
-#endif
-
 	int c, tc, ok, n = 0, draw_x = -1;
 	unsigned int currcons;
 	unsigned long draw_from = 0, draw_to = 0;
@@ -2225,7 +2214,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
 	charmask = himask ? 0x1ff : 0xff;
 
 	/* undraw cursor first */
-	if (IS_FG(vc))
+	if (con_is_fg(vc))
 		hide_cursor(vc);
 
 	param.vc = vc;
@@ -2380,12 +2369,13 @@ rescan_last_byte:
 				} else {
 					vc_attr = ((vc->vc_attr) & 0x88) | (((vc->vc_attr) & 0x70) >> 4) | (((vc->vc_attr) & 0x07) << 4);
 				}
-				FLUSH
+				con_flush(vc, draw_from, draw_to, &draw_x);
 			}
 
 			while (1) {
 				if (vc->vc_need_wrap || vc->vc_decim)
-					FLUSH
+					con_flush(vc, draw_from, draw_to,
+							&draw_x);
 				if (vc->vc_need_wrap) {
 					cr(vc);
 					lf(vc);
@@ -2396,7 +2386,7 @@ rescan_last_byte:
 					     ((vc_attr << 8) & ~himask) + ((tc & 0x100) ? himask : 0) + (tc & 0xff) :
 					     (vc_attr << 8) + tc,
 					   (u16 *) vc->vc_pos);
-				if (DO_UPDATE(vc) && draw_x < 0) {
+				if (con_should_update(vc) && draw_x < 0) {
 					draw_x = vc->vc_x;
 					draw_from = vc->vc_pos;
 				}
@@ -2415,9 +2405,8 @@ rescan_last_byte:
 			}
 			notify_write(vc, c);
 
-			if (inverse) {
-				FLUSH
-			}
+			if (inverse)
+				con_flush(vc, draw_from, draw_to, &draw_x);
 
 			if (rescan) {
 				rescan = 0;
@@ -2428,15 +2417,14 @@ rescan_last_byte:
 			}
 			continue;
 		}
-		FLUSH
+		con_flush(vc, draw_from, draw_to, &draw_x);
 		do_con_trol(tty, vc, orig);
 	}
-	FLUSH
+	con_flush(vc, draw_from, draw_to, &draw_x);
 	console_conditional_schedule();
 	console_unlock();
 	notify_update(vc);
 	return n;
-#undef FLUSH
 }
 
 /*
@@ -2470,7 +2458,7 @@ static void console_callback(struct work_struct *ignored)
 	if (scrollback_delta) {
 		struct vc_data *vc = vc_cons[fg_console].d;
 		clear_selection();
-		if (vc->vc_mode == KD_TEXT)
+		if (vc->vc_mode == KD_TEXT && vc->vc_sw->con_scrolldelta)
 			vc->vc_sw->con_scrolldelta(vc, scrollback_delta);
 		scrollback_delta = 0;
 	}
@@ -2582,7 +2570,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 		goto quit;
 
 	/* undraw cursor first */
-	if (IS_FG(vc))
+	if (con_is_fg(vc))
 		hide_cursor(vc);
 
 	start = (ushort *)vc->vc_pos;
@@ -2593,7 +2581,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 		c = *b++;
 		if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) {
 			if (cnt > 0) {
-				if (CON_IS_VISIBLE(vc))
+				if (con_is_visible(vc))
 					vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
 				vc->vc_x += cnt;
 				if (vc->vc_need_wrap)
@@ -2625,7 +2613,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 		myx++;
 	}
 	if (cnt > 0) {
-		if (CON_IS_VISIBLE(vc))
+		if (con_is_visible(vc))
 			vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
 		vc->vc_x += cnt;
 		if (vc->vc_x == vc->vc_cols) {
@@ -3172,7 +3160,7 @@ static int do_bind_con_driver(const struct consw *csw, int first, int last,
 
 		j = i;
 
-		if (CON_IS_VISIBLE(vc)) {
+		if (con_is_visible(vc)) {
 			k = i;
 			save_screen(vc);
 		}
@@ -3980,7 +3968,7 @@ static void set_palette(struct vc_data *vc)
 {
 	WARN_CONSOLE_UNLOCKED();
 
-	if (vc->vc_mode != KD_GRAPHICS)
+	if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_set_palette)
 		vc->vc_sw->con_set_palette(vc, color_table);
 }
 
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 97d5a74558a3..f62c598810ff 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -1006,16 +1006,10 @@ int vt_ioctl(struct tty_struct *tty,
 		break;
 
 	case PIO_UNIMAPCLR:
-	      { struct unimapinit ui;
 		if (!perm)
 			return -EPERM;
-		ret = copy_from_user(&ui, up, sizeof(struct unimapinit));
-		if (ret)
-			ret = -EFAULT;
-		else
-			con_clear_unimap(vc, &ui);
+		con_clear_unimap(vc);
 		break;
-	      }
 
 	case PIO_UNIMAP:
 	case GIO_UNIMAP:
diff --git a/drivers/usb/chipidea/Kconfig b/drivers/usb/chipidea/Kconfig
index 3644a3500b70..5e5b9eb7ebf6 100644
--- a/drivers/usb/chipidea/Kconfig
+++ b/drivers/usb/chipidea/Kconfig
@@ -4,8 +4,9 @@ config USB_CHIPIDEA
 	select EXTCON
 	help
 	  Say Y here if your system has a dual role high speed USB
-	  controller based on ChipIdea silicon IP. Currently, only the
-	  peripheral mode is supported.
+	  controller based on ChipIdea silicon IP. It supports:
+	  Dual-role switch (ID, OTG FSM, sysfs), Host-only, and
+	  Peripheral-only.
 
 	  When compiled dynamically, the module will be called ci-hdrc.ko.
 
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 94a14f5dc4d4..71912301ef7f 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -946,7 +946,7 @@ static int wait_serial_change(struct acm *acm, unsigned long arg)
 	DECLARE_WAITQUEUE(wait, current);
 	struct async_icount old, new;
 
-	if (arg & (TIOCM_DSR | TIOCM_RI | TIOCM_CD ))
+	if (arg & (TIOCM_DSR | TIOCM_RI | TIOCM_CD))
 		return -EINVAL;
 	do {
 		spin_lock_irq(&acm->read_lock);
@@ -1146,7 +1146,7 @@ static int acm_probe(struct usb_interface *intf,
 		     const struct usb_device_id *id)
 {
 	struct usb_cdc_union_desc *union_header = NULL;
-	struct usb_cdc_country_functional_desc *cfd = NULL;
+	struct usb_cdc_call_mgmt_descriptor *cmgmd = NULL;
 	unsigned char *buffer = intf->altsetting->extra;
 	int buflen = intf->altsetting->extralen;
 	struct usb_interface *control_interface;
@@ -1155,18 +1155,16 @@ static int acm_probe(struct usb_interface *intf,
 	struct usb_endpoint_descriptor *epread = NULL;
 	struct usb_endpoint_descriptor *epwrite = NULL;
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
+	struct usb_cdc_parsed_header h;
 	struct acm *acm;
 	int minor;
 	int ctrlsize, readsize;
 	u8 *buf;
-	u8 ac_management_function = 0;
-	u8 call_management_function = 0;
-	int call_interface_num = -1;
-	int data_interface_num = -1;
+	int call_intf_num = -1;
+	int data_intf_num = -1;
 	unsigned long quirks;
 	int num_rx_buf;
 	int i;
-	unsigned int elength = 0;
 	int combined_interfaces = 0;
 	struct device *tty_dev;
 	int rv = -ENOMEM;
@@ -1210,70 +1208,22 @@ static int acm_probe(struct usb_interface *intf,
 		}
 	}
 
-	while (buflen > 0) {
-		elength = buffer[0];
-		if (!elength) {
-			dev_err(&intf->dev, "skipping garbage byte\n");
-			elength = 1;
-			goto next_desc;
-		}
-		if (buffer[1] != USB_DT_CS_INTERFACE) {
-			dev_err(&intf->dev, "skipping garbage\n");
-			goto next_desc;
-		}
-
-		switch (buffer[2]) {
-		case USB_CDC_UNION_TYPE: /* we've found it */
-			if (elength < sizeof(struct usb_cdc_union_desc))
-				goto next_desc;
-			if (union_header) {
-				dev_err(&intf->dev, "More than one "
-					"union descriptor, skipping ...\n");
-				goto next_desc;
-			}
-			union_header = (struct usb_cdc_union_desc *)buffer;
-			break;
-		case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
-			if (elength < sizeof(struct usb_cdc_country_functional_desc))
-				goto next_desc;
-			cfd = (struct usb_cdc_country_functional_desc *)buffer;
-			break;
-		case USB_CDC_HEADER_TYPE: /* maybe check version */
-			break; /* for now we ignore it */
-		case USB_CDC_ACM_TYPE:
-			if (elength < 4)
-				goto next_desc;
-			ac_management_function = buffer[3];
-			break;
-		case USB_CDC_CALL_MANAGEMENT_TYPE:
-			if (elength < 5)
-				goto next_desc;
-			call_management_function = buffer[3];
-			call_interface_num = buffer[4];
-			break;
-		default:
-			/*
-			 * there are LOTS more CDC descriptors that
-			 * could legitimately be found here.
-			 */
-			dev_dbg(&intf->dev, "Ignoring descriptor: "
-					"type %02x, length %ud\n",
-					buffer[2], elength);
-			break;
-		}
-next_desc:
-		buflen -= elength;
-		buffer += elength;
-	}
+	cdc_parse_cdc_header(&h, intf, buffer, buflen);
+	union_header = h.usb_cdc_union_desc;
+	cmgmd = h.usb_cdc_call_mgmt_descriptor;
+	if (cmgmd)
+		call_intf_num = cmgmd->bDataInterface;
 
 	if (!union_header) {
-		if (call_interface_num > 0) {
+		if (call_intf_num > 0) {
 			dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n");
 			/* quirks for Droids MuIn LCD */
-			if (quirks & NO_DATA_INTERFACE)
+			if (quirks & NO_DATA_INTERFACE) {
 				data_interface = usb_ifnum_to_if(usb_dev, 0);
-			else
-				data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
+			} else {
+				data_intf_num = call_intf_num;
+				data_interface = usb_ifnum_to_if(usb_dev, data_intf_num);
+			}
 			control_interface = intf;
 		} else {
 			if (intf->cur_altsetting->desc.bNumEndpoints != 3) {
@@ -1287,8 +1237,9 @@ next_desc:
 			}
 		}
 	} else {
+		data_intf_num = union_header->bSlaveInterface0;
 		control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0);
-		data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0));
+		data_interface = usb_ifnum_to_if(usb_dev, data_intf_num);
 	}
 
 	if (!control_interface || !data_interface) {
@@ -1296,7 +1247,7 @@ next_desc:
 		return -ENODEV;
 	}
 
-	if (data_interface_num != call_interface_num)
+	if (data_intf_num != call_intf_num)
 		dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n");
 
 	if (control_interface == data_interface) {
@@ -1379,11 +1330,8 @@ made_compressed_probe:
 		goto alloc_fail;
 
 	minor = acm_alloc_minor(acm);
-	if (minor < 0) {
-		dev_err(&intf->dev, "no more free acm devices\n");
-		kfree(acm);
-		return -ENODEV;
-	}
+	if (minor < 0)
+		goto alloc_fail1;
 
 	ctrlsize = usb_endpoint_maxp(epctrl);
 	readsize = usb_endpoint_maxp(epread) *
@@ -1394,7 +1342,8 @@ made_compressed_probe:
 	acm->data = data_interface;
 	acm->minor = minor;
 	acm->dev = usb_dev;
-	acm->ctrl_caps = ac_management_function;
+	if (h.usb_cdc_acm_descriptor)
+		acm->ctrl_caps = h.usb_cdc_acm_descriptor->bmCapabilities;
 	if (quirks & NO_CAP_LINE)
 		acm->ctrl_caps &= ~USB_CDC_CAP_LINE;
 	acm->ctrlsize = ctrlsize;
@@ -1488,7 +1437,10 @@ made_compressed_probe:
 	if (i < 0)
 		goto alloc_fail7;
 
-	if (cfd) { /* export the country data */
+	if (h.usb_cdc_country_functional_desc) { /* export the country data */
+		struct usb_cdc_country_functional_desc * cfd =
+					h.usb_cdc_country_functional_desc;
+
 		acm->country_codes = kmalloc(cfd->bLength - 4, GFP_KERNEL);
 		if (!acm->country_codes)
 			goto skip_countries;
@@ -1572,6 +1524,7 @@ alloc_fail4:
 	usb_free_coherent(usb_dev, ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
 alloc_fail2:
 	acm_release_minor(acm);
+alloc_fail1:
 	kfree(acm);
 alloc_fail:
 	return rv;
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 61ea87917433..337948c42110 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -875,38 +875,18 @@ static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	int rv = -EINVAL;
 	struct usb_host_interface *iface;
 	struct usb_endpoint_descriptor *ep;
-	struct usb_cdc_dmm_desc *dmhd;
+	struct usb_cdc_parsed_header hdr;
 	u8 *buffer = intf->altsetting->extra;
 	int buflen = intf->altsetting->extralen;
 	u16 maxcom = WDM_DEFAULT_BUFSIZE;
 
 	if (!buffer)
 		goto err;
-	while (buflen > 2) {
-		if (buffer[1] != USB_DT_CS_INTERFACE) {
-			dev_err(&intf->dev, "skipping garbage\n");
-			goto next_desc;
-		}
 
-		switch (buffer[2]) {
-		case USB_CDC_HEADER_TYPE:
-			break;
-		case USB_CDC_DMM_TYPE:
-			dmhd = (struct usb_cdc_dmm_desc *)buffer;
-			maxcom = le16_to_cpu(dmhd->wMaxCommand);
-			dev_dbg(&intf->dev,
-				"Finding maximum buffer length: %d", maxcom);
-			break;
-		default:
-			dev_err(&intf->dev,
-				"Ignoring extra header, type %d, length %d\n",
-				buffer[2], buffer[0]);
-			break;
-		}
-next_desc:
-		buflen -= buffer[0];
-		buffer += buffer[0];
-	}
+	cdc_parse_cdc_header(&hdr, intf, buffer, buflen);
+
+	if (hdr.usb_cdc_dmm_desc)
+		maxcom = le16_to_cpu(hdr.usb_cdc_dmm_desc->wMaxCommand);
 
 	iface = intf->cur_altsetting;
 	if (iface->desc.bNumEndpoints != 1)
diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c
index e3d01619d6b3..5ef8da6e67c3 100644
--- a/drivers/usb/common/common.c
+++ b/drivers/usb/common/common.c
@@ -131,15 +131,17 @@ EXPORT_SYMBOL_GPL(usb_get_dr_mode);
  * of_usb_get_dr_mode_by_phy - Get dual role mode for the controller device
  * which is associated with the given phy device_node
  * @np:	Pointer to the given phy device_node
+ * @arg0: phandle args[0] for phy's with #phy-cells >= 1, or -1 for
+ *        phys which do not have phy-cells
  *
  * In dts a usb controller associates with phy devices.  The function gets
  * the string from property 'dr_mode' of the controller associated with the
  * given phy device node, and returns the correspondig enum usb_dr_mode.
  */
-enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *phy_np)
+enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0)
 {
 	struct device_node *controller = NULL;
-	struct device_node *phy;
+	struct of_phandle_args args;
 	const char *dr_mode;
 	int index;
 	int err;
@@ -148,12 +150,24 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *phy_np)
 		controller = of_find_node_with_property(controller, "phys");
 		index = 0;
 		do {
-			phy = of_parse_phandle(controller, "phys", index);
-			of_node_put(phy);
-			if (phy == phy_np)
+			if (arg0 == -1) {
+				args.np = of_parse_phandle(controller, "phys",
+							index);
+				args.args_count = 0;
+			} else {
+				err = of_parse_phandle_with_args(controller,
+							"phys", "#phy-cells",
+							index, &args);
+				if (err)
+					break;
+			}
+
+			of_node_put(args.np);
+			if (args.np == np && (args.args_count == 0 ||
+					      args.args[0] == arg0))
 				goto finish;
 			index++;
-		} while (phy);
+		} while (args.np);
 	} while (controller);
 
 finish:
diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c
index 9059b7dc185e..2f537bbdda09 100644
--- a/drivers/usb/common/usb-otg-fsm.c
+++ b/drivers/usb/common/usb-otg-fsm.c
@@ -21,6 +21,7 @@
  * 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
@@ -450,3 +451,4 @@ int otg_statemachine(struct otg_fsm *fsm)
 	return fsm->state_changed;
 }
 EXPORT_SYMBOL_GPL(otg_statemachine);
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 34b837ae1ed7..d2e3f655c26f 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2598,26 +2598,23 @@ EXPORT_SYMBOL_GPL(usb_create_hcd);
  * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is
  * deallocated.
  *
- * Make sure to only deallocate the bandwidth_mutex when the primary HCD is
- * freed.  When hcd_release() is called for either hcd in a peer set
- * invalidate the peer's ->shared_hcd and ->primary_hcd pointers to
- * block new peering attempts
+ * Make sure to deallocate the bandwidth_mutex only when the last HCD is
+ * freed.  When hcd_release() is called for either hcd in a peer set,
+ * invalidate the peer's ->shared_hcd and ->primary_hcd pointers.
  */
 static void hcd_release(struct kref *kref)
 {
 	struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);
 
 	mutex_lock(&usb_port_peer_mutex);
-	if (usb_hcd_is_primary_hcd(hcd)) {
-		kfree(hcd->address0_mutex);
-		kfree(hcd->bandwidth_mutex);
-	}
 	if (hcd->shared_hcd) {
 		struct usb_hcd *peer = hcd->shared_hcd;
 
 		peer->shared_hcd = NULL;
-		if (peer->primary_hcd == hcd)
-			peer->primary_hcd = NULL;
+		peer->primary_hcd = NULL;
+	} else {
+		kfree(hcd->address0_mutex);
+		kfree(hcd->bandwidth_mutex);
 	}
 	mutex_unlock(&usb_port_peer_mutex);
 	kfree(hcd);
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index ea681f157368..0406a59f0551 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -12,6 +12,7 @@
 #include <linux/nls.h>
 #include <linux/device.h>
 #include <linux/scatterlist.h>
+#include <linux/usb/cdc.h>
 #include <linux/usb/quirks.h>
 #include <linux/usb/hcd.h>	/* for usbcore internals */
 #include <asm/byteorder.h>
@@ -2023,3 +2024,155 @@ int usb_driver_set_configuration(struct usb_device *udev, int config)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usb_driver_set_configuration);
+
+/**
+ * cdc_parse_cdc_header - parse the extra headers present in CDC devices
+ * @hdr: the place to put the results of the parsing
+ * @intf: the interface for which parsing is requested
+ * @buffer: pointer to the extra headers to be parsed
+ * @buflen: length of the extra headers
+ *
+ * This evaluates the extra headers present in CDC devices which
+ * bind the interfaces for data and control and provide details
+ * about the capabilities of the device.
+ *
+ * Return: number of descriptors parsed or -EINVAL
+ * if the header is contradictory beyond salvage
+ */
+
+int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr,
+				struct usb_interface *intf,
+				u8 *buffer,
+				int buflen)
+{
+	/* duplicates are ignored */
+	struct usb_cdc_union_desc *union_header = NULL;
+
+	/* duplicates are not tolerated */
+	struct usb_cdc_header_desc *header = NULL;
+	struct usb_cdc_ether_desc *ether = NULL;
+	struct usb_cdc_mdlm_detail_desc *detail = NULL;
+	struct usb_cdc_mdlm_desc *desc = NULL;
+
+	unsigned int elength;
+	int cnt = 0;
+
+	memset(hdr, 0x00, sizeof(struct usb_cdc_parsed_header));
+	hdr->phonet_magic_present = false;
+	while (buflen > 0) {
+		elength = buffer[0];
+		if (!elength) {
+			dev_err(&intf->dev, "skipping garbage byte\n");
+			elength = 1;
+			goto next_desc;
+		}
+		if (buffer[1] != USB_DT_CS_INTERFACE) {
+			dev_err(&intf->dev, "skipping garbage\n");
+			goto next_desc;
+		}
+
+		switch (buffer[2]) {
+		case USB_CDC_UNION_TYPE: /* we've found it */
+			if (elength < sizeof(struct usb_cdc_union_desc))
+				goto next_desc;
+			if (union_header) {
+				dev_err(&intf->dev, "More than one union descriptor, skipping ...\n");
+				goto next_desc;
+			}
+			union_header = (struct usb_cdc_union_desc *)buffer;
+			break;
+		case USB_CDC_COUNTRY_TYPE:
+			if (elength < sizeof(struct usb_cdc_country_functional_desc))
+				goto next_desc;
+			hdr->usb_cdc_country_functional_desc =
+				(struct usb_cdc_country_functional_desc *)buffer;
+			break;
+		case USB_CDC_HEADER_TYPE:
+			if (elength != sizeof(struct usb_cdc_header_desc))
+				goto next_desc;
+			if (header)
+				return -EINVAL;
+			header = (struct usb_cdc_header_desc *)buffer;
+			break;
+		case USB_CDC_ACM_TYPE:
+			if (elength < sizeof(struct usb_cdc_acm_descriptor))
+				goto next_desc;
+			hdr->usb_cdc_acm_descriptor =
+				(struct usb_cdc_acm_descriptor *)buffer;
+			break;
+		case USB_CDC_ETHERNET_TYPE:
+			if (elength != sizeof(struct usb_cdc_ether_desc))
+				goto next_desc;
+			if (ether)
+				return -EINVAL;
+			ether = (struct usb_cdc_ether_desc *)buffer;
+			break;
+		case USB_CDC_CALL_MANAGEMENT_TYPE:
+			if (elength < sizeof(struct usb_cdc_call_mgmt_descriptor))
+				goto next_desc;
+			hdr->usb_cdc_call_mgmt_descriptor =
+				(struct usb_cdc_call_mgmt_descriptor *)buffer;
+			break;
+		case USB_CDC_DMM_TYPE:
+			if (elength < sizeof(struct usb_cdc_dmm_desc))
+				goto next_desc;
+			hdr->usb_cdc_dmm_desc =
+				(struct usb_cdc_dmm_desc *)buffer;
+			break;
+		case USB_CDC_MDLM_TYPE:
+			if (elength < sizeof(struct usb_cdc_mdlm_desc *))
+				goto next_desc;
+			if (desc)
+				return -EINVAL;
+			desc = (struct usb_cdc_mdlm_desc *)buffer;
+			break;
+		case USB_CDC_MDLM_DETAIL_TYPE:
+			if (elength < sizeof(struct usb_cdc_mdlm_detail_desc *))
+				goto next_desc;
+			if (detail)
+				return -EINVAL;
+			detail = (struct usb_cdc_mdlm_detail_desc *)buffer;
+			break;
+		case USB_CDC_NCM_TYPE:
+			if (elength < sizeof(struct usb_cdc_ncm_desc))
+				goto next_desc;
+			hdr->usb_cdc_ncm_desc = (struct usb_cdc_ncm_desc *)buffer;
+			break;
+		case USB_CDC_MBIM_TYPE:
+			if (elength < sizeof(struct usb_cdc_mbim_desc))
+				goto next_desc;
+
+			hdr->usb_cdc_mbim_desc = (struct usb_cdc_mbim_desc *)buffer;
+			break;
+		case USB_CDC_MBIM_EXTENDED_TYPE:
+			if (elength < sizeof(struct usb_cdc_mbim_extended_desc))
+				break;
+			hdr->usb_cdc_mbim_extended_desc =
+				(struct usb_cdc_mbim_extended_desc *)buffer;
+			break;
+		case CDC_PHONET_MAGIC_NUMBER:
+			hdr->phonet_magic_present = true;
+			break;
+		default:
+			/*
+			 * there are LOTS more CDC descriptors that
+			 * could legitimately be found here.
+			 */
+			dev_dbg(&intf->dev, "Ignoring descriptor: type %02x, length %ud\n",
+					buffer[2], elength);
+			goto next_desc;
+		}
+		cnt++;
+next_desc:
+		buflen -= elength;
+		buffer += elength;
+	}
+	hdr->usb_cdc_union_desc = union_header;
+	hdr->usb_cdc_header_desc = header;
+	hdr->usb_cdc_mdlm_detail_desc = detail;
+	hdr->usb_cdc_mdlm_desc = desc;
+	hdr->usb_cdc_ether_desc = ether;
+	return cnt;
+}
+
+EXPORT_SYMBOL(cdc_parse_cdc_header);
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 944a6dca0fcb..d2e50a27140c 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -128,6 +128,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x04f3, 0x016f), .driver_info =
 			USB_QUIRK_DEVICE_QUALIFIER },
 
+	{ USB_DEVICE(0x04f3, 0x0381), .driver_info =
+			USB_QUIRK_NO_LPM },
+
 	{ USB_DEVICE(0x04f3, 0x21b8), .driver_info =
 			USB_QUIRK_DEVICE_QUALIFIER },
 
diff --git a/drivers/usb/dwc2/Kconfig b/drivers/usb/dwc2/Kconfig
index c1f29caa8990..e838701d6dd5 100644
--- a/drivers/usb/dwc2/Kconfig
+++ b/drivers/usb/dwc2/Kconfig
@@ -55,6 +55,7 @@ endchoice
 config USB_DWC2_PCI
 	tristate "DWC2 PCI"
 	depends on PCI
+	depends on USB_GADGET || !USB_GADGET
 	default n
 	select NOP_USB_XCEIV
 	help
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index dec0b21fc626..9fae0291cd69 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -166,7 +166,7 @@ struct dwc2_hsotg_req;
  *          means that it is sending data to the Host.
  * @index: The index for the endpoint registers.
  * @mc: Multi Count - number of transactions per microframe
- * @interval - Interval for periodic endpoints
+ * @interval - Interval for periodic endpoints, in frames or microframes.
  * @name: The name array passed to the USB core.
  * @halted: Set if the endpoint has been halted.
  * @periodic: Set if this is a periodic ep, such as Interrupt
@@ -177,6 +177,8 @@ struct dwc2_hsotg_req;
  * @fifo_load: The amount of data loaded into the FIFO (periodic IN)
  * @last_load: The offset of data for the last start of request.
  * @size_loaded: The last loaded size for DxEPTSIZE for periodic IN
+ * @target_frame: Targeted frame num to setup next ISOC transfer
+ * @frame_overrun: Indicates SOF number overrun in DSTS
  *
  * This is the driver's state for each registered enpoint, allowing it
  * to keep track of transactions that need doing. Each endpoint has a
@@ -213,7 +215,9 @@ struct dwc2_hsotg_ep {
 	unsigned int            periodic:1;
 	unsigned int            isochronous:1;
 	unsigned int            send_zlp:1;
-	unsigned int            has_correct_parity:1;
+	unsigned int            target_frame;
+#define TARGET_FRAME_INITIAL   0xFFFFFFFF
+	bool			frame_overrun;
 
 	char                    name[10];
 };
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 26cf09d0fe3c..af46adfae41c 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -96,6 +96,25 @@ static inline bool using_dma(struct dwc2_hsotg *hsotg)
 	return hsotg->g_using_dma;
 }
 
+/**
+ * dwc2_gadget_incr_frame_num - Increments the targeted frame number.
+ * @hs_ep: The endpoint
+ * @increment: The value to increment by
+ *
+ * This function will also check if the frame number overruns DSTS_SOFFN_LIMIT.
+ * If an overrun occurs it will wrap the value and set the frame_overrun flag.
+ */
+static inline void dwc2_gadget_incr_frame_num(struct dwc2_hsotg_ep *hs_ep)
+{
+	hs_ep->target_frame += hs_ep->interval;
+	if (hs_ep->target_frame > DSTS_SOFFN_LIMIT) {
+		hs_ep->frame_overrun = 1;
+		hs_ep->target_frame &= DSTS_SOFFN_LIMIT;
+	} else {
+		hs_ep->frame_overrun = 0;
+	}
+}
+
 /**
  * dwc2_hsotg_en_gsint - enable one or more of the general interrupt
  * @hsotg: The device state
@@ -503,6 +522,23 @@ static unsigned get_ep_limit(struct dwc2_hsotg_ep *hs_ep)
 	return maxsize;
 }
 
+/**
+* dwc2_hsotg_read_frameno - read current frame number
+* @hsotg: The device instance
+*
+* Return the current frame number
+*/
+static u32 dwc2_hsotg_read_frameno(struct dwc2_hsotg *hsotg)
+{
+	u32 dsts;
+
+	dsts = dwc2_readl(hsotg->regs + DSTS);
+	dsts &= DSTS_SOFFN_MASK;
+	dsts >>= DSTS_SOFFN_SHIFT;
+
+	return dsts;
+}
+
 /**
  * dwc2_hsotg_start_req - start a USB request from an endpoint's queue
  * @hsotg: The controller state.
@@ -631,8 +667,17 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg,
 			__func__, &ureq->dma, dma_reg);
 	}
 
+	if (hs_ep->isochronous && hs_ep->interval == 1) {
+		hs_ep->target_frame = dwc2_hsotg_read_frameno(hsotg);
+		dwc2_gadget_incr_frame_num(hs_ep);
+
+		if (hs_ep->target_frame & 0x1)
+			ctrl |= DXEPCTL_SETODDFR;
+		else
+			ctrl |= DXEPCTL_SETEVENFR;
+	}
+
 	ctrl |= DXEPCTL_EPENA;	/* ensure ep enabled */
-	ctrl |= DXEPCTL_USBACTEP;
 
 	dev_dbg(hsotg->dev, "ep0 state:%d\n", hsotg->ep0_state);
 
@@ -658,14 +703,6 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg,
 		dwc2_hsotg_write_fifo(hsotg, hs_ep, hs_req);
 	}
 
-	/*
-	 * clear the INTknTXFEmpMsk when we start request, more as a aide
-	 * to debugging to see what is going on.
-	 */
-	if (dir_in)
-		dwc2_writel(DIEPMSK_INTKNTXFEMPMSK,
-		       hsotg->regs + DIEPINT(index));
-
 	/*
 	 * Note, trying to clear the NAK here causes problems with transmit
 	 * on the S3C6400 ending up with the TXFIFO becoming full.
@@ -773,6 +810,30 @@ static void dwc2_hsotg_handle_unaligned_buf_complete(struct dwc2_hsotg *hsotg,
 	hs_req->saved_req_buf = NULL;
 }
 
+/**
+ * dwc2_gadget_target_frame_elapsed - Checks target frame
+ * @hs_ep: The driver endpoint to check
+ *
+ * Returns 1 if targeted frame elapsed. If returned 1 then we need to drop
+ * corresponding transfer.
+ */
+static bool dwc2_gadget_target_frame_elapsed(struct dwc2_hsotg_ep *hs_ep)
+{
+	struct dwc2_hsotg *hsotg = hs_ep->parent;
+	u32 target_frame = hs_ep->target_frame;
+	u32 current_frame = dwc2_hsotg_read_frameno(hsotg);
+	bool frame_overrun = hs_ep->frame_overrun;
+
+	if (!frame_overrun && current_frame >= target_frame)
+		return true;
+
+	if (frame_overrun && current_frame >= target_frame &&
+	    ((current_frame - target_frame) < DSTS_SOFFN_LIMIT / 2))
+		return true;
+
+	return false;
+}
+
 static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
 			      gfp_t gfp_flags)
 {
@@ -812,9 +873,18 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
 	first = list_empty(&hs_ep->queue);
 	list_add_tail(&hs_req->queue, &hs_ep->queue);
 
-	if (first)
-		dwc2_hsotg_start_req(hs, hs_ep, hs_req, false);
+	if (first) {
+		if (!hs_ep->isochronous) {
+			dwc2_hsotg_start_req(hs, hs_ep, hs_req, false);
+			return 0;
+		}
+
+		while (dwc2_gadget_target_frame_elapsed(hs_ep))
+			dwc2_gadget_incr_frame_num(hs_ep);
 
+		if (hs_ep->target_frame != TARGET_FRAME_INITIAL)
+			dwc2_hsotg_start_req(hs, hs_ep, hs_req, false);
+	}
 	return 0;
 }
 
@@ -1034,6 +1104,42 @@ static struct dwc2_hsotg_req *get_ep_head(struct dwc2_hsotg_ep *hs_ep)
 	return list_first_entry(&hs_ep->queue, struct dwc2_hsotg_req, queue);
 }
 
+/**
+ * dwc2_gadget_start_next_request - Starts next request from ep queue
+ * @hs_ep: Endpoint structure
+ *
+ * If queue is empty and EP is ISOC-OUT - unmasks OUTTKNEPDIS which is masked
+ * in its handler. Hence we need to unmask it here to be able to do
+ * resynchronization.
+ */
+static void dwc2_gadget_start_next_request(struct dwc2_hsotg_ep *hs_ep)
+{
+	u32 mask;
+	struct dwc2_hsotg *hsotg = hs_ep->parent;
+	int dir_in = hs_ep->dir_in;
+	struct dwc2_hsotg_req *hs_req;
+	u32 epmsk_reg = dir_in ? DIEPMSK : DOEPMSK;
+
+	if (!list_empty(&hs_ep->queue)) {
+		hs_req = get_ep_head(hs_ep);
+		dwc2_hsotg_start_req(hsotg, hs_ep, hs_req, false);
+		return;
+	}
+	if (!hs_ep->isochronous)
+		return;
+
+	if (dir_in) {
+		dev_dbg(hsotg->dev, "%s: No more ISOC-IN requests\n",
+			__func__);
+	} else {
+		dev_dbg(hsotg->dev, "%s: No more ISOC-OUT requests\n",
+			__func__);
+		mask = dwc2_readl(hsotg->regs + epmsk_reg);
+		mask |= DOEPMSK_OUTTKNEPDISMSK;
+		dwc2_writel(mask, hsotg->regs + epmsk_reg);
+	}
+}
+
 /**
  * dwc2_hsotg_process_req_feature - process request {SET,CLEAR}_FEATURE
  * @hsotg: The device state
@@ -1044,7 +1150,6 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 {
 	struct dwc2_hsotg_ep *ep0 = hsotg->eps_out[0];
 	struct dwc2_hsotg_req *hs_req;
-	bool restart;
 	bool set = (ctrl->bRequest == USB_REQ_SET_FEATURE);
 	struct dwc2_hsotg_ep *ep;
 	int ret;
@@ -1127,12 +1232,7 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 
 				/* If we have pending request, then start it */
 				if (!ep->req) {
-					restart = !list_empty(&ep->queue);
-					if (restart) {
-						hs_req = get_ep_head(ep);
-						dwc2_hsotg_start_req(hsotg, ep,
-								hs_req, false);
-					}
+					dwc2_gadget_start_next_request(ep);
 				}
 			}
 
@@ -1373,7 +1473,6 @@ static void dwc2_hsotg_complete_request(struct dwc2_hsotg *hsotg,
 				       struct dwc2_hsotg_req *hs_req,
 				       int result)
 {
-	bool restart;
 
 	if (!hs_req) {
 		dev_dbg(hsotg->dev, "%s: nothing to complete?\n", __func__);
@@ -1417,11 +1516,7 @@ static void dwc2_hsotg_complete_request(struct dwc2_hsotg *hsotg,
 	 */
 
 	if (!hs_ep->req && result >= 0) {
-		restart = !list_empty(&hs_ep->queue);
-		if (restart) {
-			hs_req = get_ep_head(hs_ep);
-			dwc2_hsotg_start_req(hsotg, hs_ep, hs_req, false);
-		}
+		dwc2_gadget_start_next_request(hs_ep);
 	}
 }
 
@@ -1597,31 +1692,15 @@ static void dwc2_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum)
 	 * adjust the ISOC parity here.
 	 */
 	if (!using_dma(hsotg)) {
-		hs_ep->has_correct_parity = 1;
 		if (hs_ep->isochronous && hs_ep->interval == 1)
 			dwc2_hsotg_change_ep_iso_parity(hsotg, DOEPCTL(epnum));
+		else if (hs_ep->isochronous && hs_ep->interval > 1)
+			dwc2_gadget_incr_frame_num(hs_ep);
 	}
 
 	dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, result);
 }
 
-/**
- * dwc2_hsotg_read_frameno - read current frame number
- * @hsotg: The device instance
- *
- * Return the current frame number
- */
-static u32 dwc2_hsotg_read_frameno(struct dwc2_hsotg *hsotg)
-{
-	u32 dsts;
-
-	dsts = dwc2_readl(hsotg->regs + DSTS);
-	dsts &= DSTS_SOFFN_MASK;
-	dsts >>= DSTS_SOFFN_SHIFT;
-
-	return dsts;
-}
-
 /**
  * dwc2_hsotg_handle_rx - RX FIFO has data
  * @hsotg: The device instance
@@ -1936,6 +2015,190 @@ static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg,
 	dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
 }
 
+/**
+ * dwc2_gadget_read_ep_interrupts - reads interrupts for given ep
+ * @hsotg: The device state.
+ * @idx: Index of ep.
+ * @dir_in: Endpoint direction 1-in 0-out.
+ *
+ * Reads for endpoint with given index and direction, by masking
+ * epint_reg with coresponding mask.
+ */
+static u32 dwc2_gadget_read_ep_interrupts(struct dwc2_hsotg *hsotg,
+					  unsigned int idx, int dir_in)
+{
+	u32 epmsk_reg = dir_in ? DIEPMSK : DOEPMSK;
+	u32 epint_reg = dir_in ? DIEPINT(idx) : DOEPINT(idx);
+	u32 ints;
+	u32 mask;
+	u32 diepempmsk;
+
+	mask = dwc2_readl(hsotg->regs + epmsk_reg);
+	diepempmsk = dwc2_readl(hsotg->regs + DIEPEMPMSK);
+	mask |= ((diepempmsk >> idx) & 0x1) ? DIEPMSK_TXFIFOEMPTY : 0;
+	mask |= DXEPINT_SETUP_RCVD;
+
+	ints = dwc2_readl(hsotg->regs + epint_reg);
+	ints &= mask;
+	return ints;
+}
+
+/**
+ * dwc2_gadget_handle_ep_disabled - handle DXEPINT_EPDISBLD
+ * @hs_ep: The endpoint on which interrupt is asserted.
+ *
+ * This interrupt indicates that the endpoint has been disabled per the
+ * application's request.
+ *
+ * For IN endpoints flushes txfifo, in case of BULK clears DCTL_CGNPINNAK,
+ * in case of ISOC completes current request.
+ *
+ * For ISOC-OUT endpoints completes expired requests. If there is remaining
+ * request starts it.
+ */
+static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep)
+{
+	struct dwc2_hsotg *hsotg = hs_ep->parent;
+	struct dwc2_hsotg_req *hs_req;
+	unsigned char idx = hs_ep->index;
+	int dir_in = hs_ep->dir_in;
+	u32 epctl_reg = dir_in ? DIEPCTL(idx) : DOEPCTL(idx);
+	int dctl = dwc2_readl(hsotg->regs + DCTL);
+
+	dev_dbg(hsotg->dev, "%s: EPDisbld\n", __func__);
+
+	if (dir_in) {
+		int epctl = dwc2_readl(hsotg->regs + epctl_reg);
+
+		dwc2_hsotg_txfifo_flush(hsotg, hs_ep->fifo_index);
+
+		if (hs_ep->isochronous) {
+			dwc2_hsotg_complete_in(hsotg, hs_ep);
+			return;
+		}
+
+		if ((epctl & DXEPCTL_STALL) && (epctl & DXEPCTL_EPTYPE_BULK)) {
+			int dctl = dwc2_readl(hsotg->regs + DCTL);
+
+			dctl |= DCTL_CGNPINNAK;
+			dwc2_writel(dctl, hsotg->regs + DCTL);
+		}
+		return;
+	}
+
+	if (dctl & DCTL_GOUTNAKSTS) {
+		dctl |= DCTL_CGOUTNAK;
+		dwc2_writel(dctl, hsotg->regs + DCTL);
+	}
+
+	if (!hs_ep->isochronous)
+		return;
+
+	if (list_empty(&hs_ep->queue)) {
+		dev_dbg(hsotg->dev, "%s: complete_ep 0x%p, ep->queue empty!\n",
+			__func__, hs_ep);
+		return;
+	}
+
+	do {
+		hs_req = get_ep_head(hs_ep);
+		if (hs_req)
+			dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req,
+						    -ENODATA);
+		dwc2_gadget_incr_frame_num(hs_ep);
+	} while (dwc2_gadget_target_frame_elapsed(hs_ep));
+
+	dwc2_gadget_start_next_request(hs_ep);
+}
+
+/**
+ * dwc2_gadget_handle_out_token_ep_disabled - handle DXEPINT_OUTTKNEPDIS
+ * @hs_ep: The endpoint on which interrupt is asserted.
+ *
+ * This is starting point for ISOC-OUT transfer, synchronization done with
+ * first out token received from host while corresponding EP is disabled.
+ *
+ * Device does not know initial frame in which out token will come. For this
+ * HW generates OUTTKNEPDIS - out token is received while EP is disabled. Upon
+ * getting this interrupt SW starts calculation for next transfer frame.
+ */
+static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep)
+{
+	struct dwc2_hsotg *hsotg = ep->parent;
+	int dir_in = ep->dir_in;
+	u32 doepmsk;
+
+	if (dir_in || !ep->isochronous)
+		return;
+
+	dwc2_hsotg_complete_request(hsotg, ep, get_ep_head(ep), -ENODATA);
+
+	if (ep->interval > 1 &&
+	    ep->target_frame == TARGET_FRAME_INITIAL) {
+		u32 dsts;
+		u32 ctrl;
+
+		dsts = dwc2_readl(hsotg->regs + DSTS);
+		ep->target_frame = dwc2_hsotg_read_frameno(hsotg);
+		dwc2_gadget_incr_frame_num(ep);
+
+		ctrl = dwc2_readl(hsotg->regs + DOEPCTL(ep->index));
+		if (ep->target_frame & 0x1)
+			ctrl |= DXEPCTL_SETODDFR;
+		else
+			ctrl |= DXEPCTL_SETEVENFR;
+
+		dwc2_writel(ctrl, hsotg->regs + DOEPCTL(ep->index));
+	}
+
+	dwc2_gadget_start_next_request(ep);
+	doepmsk = dwc2_readl(hsotg->regs + DOEPMSK);
+	doepmsk &= ~DOEPMSK_OUTTKNEPDISMSK;
+	dwc2_writel(doepmsk, hsotg->regs + DOEPMSK);
+}
+
+/**
+* dwc2_gadget_handle_nak - handle NAK interrupt
+* @hs_ep: The endpoint on which interrupt is asserted.
+*
+* This is starting point for ISOC-IN transfer, synchronization done with
+* first IN token received from host while corresponding EP is disabled.
+*
+* Device does not know when first one token will arrive from host. On first
+* token arrival HW generates 2 interrupts: 'in token received while FIFO empty'
+* and 'NAK'. NAK interrupt for ISOC-IN means that token has arrived and ZLP was
+* sent in response to that as there was no data in FIFO. SW is basing on this
+* interrupt to obtain frame in which token has come and then based on the
+* interval calculates next frame for transfer.
+*/
+static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep)
+{
+	struct dwc2_hsotg *hsotg = hs_ep->parent;
+	int dir_in = hs_ep->dir_in;
+
+	if (!dir_in || !hs_ep->isochronous)
+		return;
+
+	if (hs_ep->target_frame == TARGET_FRAME_INITIAL) {
+		hs_ep->target_frame = dwc2_hsotg_read_frameno(hsotg);
+		if (hs_ep->interval > 1) {
+			u32 ctrl = dwc2_readl(hsotg->regs +
+					      DIEPCTL(hs_ep->index));
+			if (hs_ep->target_frame & 0x1)
+				ctrl |= DXEPCTL_SETODDFR;
+			else
+				ctrl |= DXEPCTL_SETEVENFR;
+
+			dwc2_writel(ctrl, hsotg->regs + DIEPCTL(hs_ep->index));
+		}
+
+		dwc2_hsotg_complete_request(hsotg, hs_ep,
+					    get_ep_head(hs_ep), 0);
+	}
+
+	dwc2_gadget_incr_frame_num(hs_ep);
+}
+
 /**
  * dwc2_hsotg_epint - handle an in/out endpoint interrupt
  * @hsotg: The driver state
@@ -1954,7 +2217,7 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 	u32 ints;
 	u32 ctrl;
 
-	ints = dwc2_readl(hsotg->regs + epint_reg);
+	ints = dwc2_gadget_read_ep_interrupts(hsotg, idx, dir_in);
 	ctrl = dwc2_readl(hsotg->regs + epctl_reg);
 
 	/* Clear endpoint interrupts */
@@ -1973,11 +2236,10 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 	if (idx == 0 && (ints & (DXEPINT_SETUP | DXEPINT_SETUP_RCVD)))
 		ints &= ~DXEPINT_XFERCOMPL;
 
-	if (ints & DXEPINT_XFERCOMPL) {
-		hs_ep->has_correct_parity = 1;
-		if (hs_ep->isochronous && hs_ep->interval == 1)
-			dwc2_hsotg_change_ep_iso_parity(hsotg, epctl_reg);
+	if (ints & DXEPINT_STSPHSERCVD)
+		dev_dbg(hsotg->dev, "%s: StsPhseRcvd asserted\n", __func__);
 
+	if (ints & DXEPINT_XFERCOMPL) {
 		dev_dbg(hsotg->dev,
 			"%s: XferCompl: DxEPCTL=0x%08x, DXEPTSIZ=%08x\n",
 			__func__, dwc2_readl(hsotg->regs + epctl_reg),
@@ -1988,7 +2250,12 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 		 * at completing IN requests here
 		 */
 		if (dir_in) {
+			if (hs_ep->isochronous && hs_ep->interval > 1)
+				dwc2_gadget_incr_frame_num(hs_ep);
+
 			dwc2_hsotg_complete_in(hsotg, hs_ep);
+			if (ints & DXEPINT_NAKINTRPT)
+				ints &= ~DXEPINT_NAKINTRPT;
 
 			if (idx == 0 && !hs_ep->req)
 				dwc2_hsotg_enqueue_setup(hsotg);
@@ -1997,28 +2264,21 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 			 * We're using DMA, we need to fire an OutDone here
 			 * as we ignore the RXFIFO.
 			 */
+			if (hs_ep->isochronous && hs_ep->interval > 1)
+				dwc2_gadget_incr_frame_num(hs_ep);
 
 			dwc2_hsotg_handle_outdone(hsotg, idx);
 		}
 	}
 
-	if (ints & DXEPINT_EPDISBLD) {
-		dev_dbg(hsotg->dev, "%s: EPDisbld\n", __func__);
+	if (ints & DXEPINT_EPDISBLD)
+		dwc2_gadget_handle_ep_disabled(hs_ep);
 
-		if (dir_in) {
-			int epctl = dwc2_readl(hsotg->regs + epctl_reg);
+	if (ints & DXEPINT_OUTTKNEPDIS)
+		dwc2_gadget_handle_out_token_ep_disabled(hs_ep);
 
-			dwc2_hsotg_txfifo_flush(hsotg, hs_ep->fifo_index);
-
-			if ((epctl & DXEPCTL_STALL) &&
-				(epctl & DXEPCTL_EPTYPE_BULK)) {
-				int dctl = dwc2_readl(hsotg->regs + DCTL);
-
-				dctl |= DCTL_CGNPINNAK;
-				dwc2_writel(dctl, hsotg->regs + DCTL);
-			}
-		}
-	}
+	if (ints & DXEPINT_NAKINTRPT)
+		dwc2_gadget_handle_nak(hs_ep);
 
 	if (ints & DXEPINT_AHBERR)
 		dev_dbg(hsotg->dev, "%s: AHBErr\n", __func__);
@@ -2046,20 +2306,20 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 
 	if (dir_in && !hs_ep->isochronous) {
 		/* not sure if this is important, but we'll clear it anyway */
-		if (ints & DIEPMSK_INTKNTXFEMPMSK) {
+		if (ints & DXEPINT_INTKNTXFEMP) {
 			dev_dbg(hsotg->dev, "%s: ep%d: INTknTXFEmpMsk\n",
 				__func__, idx);
 		}
 
 		/* this probably means something bad is happening */
-		if (ints & DIEPMSK_INTKNEPMISMSK) {
+		if (ints & DXEPINT_INTKNEPMIS) {
 			dev_warn(hsotg->dev, "%s: ep%d: INTknEP\n",
 				 __func__, idx);
 		}
 
 		/* FIFO has space or is empty (see GAHBCFG) */
 		if (hsotg->dedicated_fifos &&
-		    ints & DIEPMSK_TXFIFOEMPTY) {
+		    ints & DXEPINT_TXFEMP) {
 			dev_dbg(hsotg->dev, "%s: ep%d: TxFIFOEmpty\n",
 				__func__, idx);
 			if (!using_dma(hsotg))
@@ -2322,18 +2582,16 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	dwc2_writel(((hsotg->dedicated_fifos && !using_dma(hsotg)) ?
 		DIEPMSK_TXFIFOEMPTY | DIEPMSK_INTKNTXFEMPMSK : 0) |
 		DIEPMSK_EPDISBLDMSK | DIEPMSK_XFERCOMPLMSK |
-		DIEPMSK_TIMEOUTMSK | DIEPMSK_AHBERRMSK |
-		DIEPMSK_INTKNEPMISMSK,
+		DIEPMSK_TIMEOUTMSK | DIEPMSK_AHBERRMSK,
 		hsotg->regs + DIEPMSK);
 
 	/*
 	 * don't need XferCompl, we get that from RXFIFO in slave mode. In
 	 * DMA mode we may need this.
 	 */
-	dwc2_writel((using_dma(hsotg) ? (DIEPMSK_XFERCOMPLMSK |
-				    DIEPMSK_TIMEOUTMSK) : 0) |
+	dwc2_writel((using_dma(hsotg) ? (DIEPMSK_XFERCOMPLMSK) : 0) |
 		DOEPMSK_EPDISBLDMSK | DOEPMSK_AHBERRMSK |
-		DOEPMSK_SETUPMSK,
+		DOEPMSK_SETUPMSK | DOEPMSK_STSPHSERCVDMSK,
 		hsotg->regs + DOEPMSK);
 
 	dwc2_writel(0, hsotg->regs + DAINTMSK);
@@ -2413,6 +2671,85 @@ void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg)
 	__bic32(hsotg->regs + DCTL, DCTL_SFTDISCON);
 }
 
+/**
+ * dwc2_gadget_handle_incomplete_isoc_in - handle incomplete ISO IN Interrupt.
+ * @hsotg: The device state:
+ *
+ * This interrupt indicates one of the following conditions occurred while
+ * transmitting an ISOC transaction.
+ * - Corrupted IN Token for ISOC EP.
+ * - Packet not complete in FIFO.
+ *
+ * The following actions will be taken:
+ * - Determine the EP
+ * - Disable EP; when 'Endpoint Disabled' interrupt is received Flush FIFO
+ */
+static void dwc2_gadget_handle_incomplete_isoc_in(struct dwc2_hsotg *hsotg)
+{
+	struct dwc2_hsotg_ep *hs_ep;
+	u32 epctrl;
+	u32 idx;
+
+	dev_dbg(hsotg->dev, "Incomplete isoc in interrupt received:\n");
+
+	for (idx = 1; idx <= hsotg->num_of_eps; idx++) {
+		hs_ep = hsotg->eps_in[idx];
+		epctrl = dwc2_readl(hsotg->regs + DIEPCTL(idx));
+		if ((epctrl & DXEPCTL_EPENA) && hs_ep->isochronous &&
+		    dwc2_gadget_target_frame_elapsed(hs_ep)) {
+			epctrl |= DXEPCTL_SNAK;
+			epctrl |= DXEPCTL_EPDIS;
+			dwc2_writel(epctrl, hsotg->regs + DIEPCTL(idx));
+		}
+	}
+
+	/* Clear interrupt */
+	dwc2_writel(GINTSTS_INCOMPL_SOIN, hsotg->regs + GINTSTS);
+}
+
+/**
+ * dwc2_gadget_handle_incomplete_isoc_out - handle incomplete ISO OUT Interrupt
+ * @hsotg: The device state:
+ *
+ * This interrupt indicates one of the following conditions occurred while
+ * transmitting an ISOC transaction.
+ * - Corrupted OUT Token for ISOC EP.
+ * - Packet not complete in FIFO.
+ *
+ * The following actions will be taken:
+ * - Determine the EP
+ * - Set DCTL_SGOUTNAK and unmask GOUTNAKEFF if target frame elapsed.
+ */
+static void dwc2_gadget_handle_incomplete_isoc_out(struct dwc2_hsotg *hsotg)
+{
+	u32 gintsts;
+	u32 gintmsk;
+	u32 epctrl;
+	struct dwc2_hsotg_ep *hs_ep;
+	int idx;
+
+	dev_dbg(hsotg->dev, "%s: GINTSTS_INCOMPL_SOOUT\n", __func__);
+
+	for (idx = 1; idx <= hsotg->num_of_eps; idx++) {
+		hs_ep = hsotg->eps_out[idx];
+		epctrl = dwc2_readl(hsotg->regs + DOEPCTL(idx));
+		if ((epctrl & DXEPCTL_EPENA) && hs_ep->isochronous &&
+		    dwc2_gadget_target_frame_elapsed(hs_ep)) {
+			/* Unmask GOUTNAKEFF interrupt */
+			gintmsk = dwc2_readl(hsotg->regs + GINTMSK);
+			gintmsk |= GINTSTS_GOUTNAKEFF;
+			dwc2_writel(gintmsk, hsotg->regs + GINTMSK);
+
+			gintsts = dwc2_readl(hsotg->regs + GINTSTS);
+			if (!(gintsts & GINTSTS_GOUTNAKEFF))
+				__orr32(hsotg->regs + DCTL, DCTL_SGOUTNAK);
+		}
+	}
+
+	/* Clear interrupt */
+	dwc2_writel(GINTSTS_INCOMPL_SOOUT, hsotg->regs + GINTSTS);
+}
+
 /**
  * dwc2_hsotg_irq - handle device interrupt
  * @irq: The IRQ number triggered
@@ -2545,11 +2882,29 @@ irq_retry:
 	 */
 
 	if (gintsts & GINTSTS_GOUTNAKEFF) {
-		dev_info(hsotg->dev, "GOUTNakEff triggered\n");
+		u8 idx;
+		u32 epctrl;
+		u32 gintmsk;
+		struct dwc2_hsotg_ep *hs_ep;
 
-		__orr32(hsotg->regs + DCTL, DCTL_CGOUTNAK);
+		/* Mask this interrupt */
+		gintmsk = dwc2_readl(hsotg->regs + GINTMSK);
+		gintmsk &= ~GINTSTS_GOUTNAKEFF;
+		dwc2_writel(gintmsk, hsotg->regs + GINTMSK);
 
-		dwc2_hsotg_dump(hsotg);
+		dev_dbg(hsotg->dev, "GOUTNakEff triggered\n");
+		for (idx = 1; idx <= hsotg->num_of_eps; idx++) {
+			hs_ep = hsotg->eps_out[idx];
+			epctrl = dwc2_readl(hsotg->regs + DOEPCTL(idx));
+
+			if ((epctrl & DXEPCTL_EPENA) && hs_ep->isochronous) {
+				epctrl |= DXEPCTL_SNAK;
+				epctrl |= DXEPCTL_EPDIS;
+				dwc2_writel(epctrl, hsotg->regs + DOEPCTL(idx));
+			}
+		}
+
+		/* This interrupt bit is cleared in DXEPINT_EPDISBLD handler */
 	}
 
 	if (gintsts & GINTSTS_GINNAKEFF) {
@@ -2560,39 +2915,11 @@ irq_retry:
 		dwc2_hsotg_dump(hsotg);
 	}
 
-	if (gintsts & GINTSTS_INCOMPL_SOIN) {
-		u32 idx, epctl_reg;
-		struct dwc2_hsotg_ep *hs_ep;
-
-		dev_dbg(hsotg->dev, "%s: GINTSTS_INCOMPL_SOIN\n", __func__);
-		for (idx = 1; idx < hsotg->num_of_eps; idx++) {
-			hs_ep = hsotg->eps_in[idx];
-
-			if (!hs_ep->isochronous || hs_ep->has_correct_parity)
-				continue;
+	if (gintsts & GINTSTS_INCOMPL_SOIN)
+		dwc2_gadget_handle_incomplete_isoc_in(hsotg);
 
-			epctl_reg = DIEPCTL(idx);
-			dwc2_hsotg_change_ep_iso_parity(hsotg, epctl_reg);
-		}
-		dwc2_writel(GINTSTS_INCOMPL_SOIN, hsotg->regs + GINTSTS);
-	}
-
-	if (gintsts & GINTSTS_INCOMPL_SOOUT) {
-		u32 idx, epctl_reg;
-		struct dwc2_hsotg_ep *hs_ep;
-
-		dev_dbg(hsotg->dev, "%s: GINTSTS_INCOMPL_SOOUT\n", __func__);
-		for (idx = 1; idx < hsotg->num_of_eps; idx++) {
-			hs_ep = hsotg->eps_out[idx];
-
-			if (!hs_ep->isochronous || hs_ep->has_correct_parity)
-				continue;
-
-			epctl_reg = DOEPCTL(idx);
-			dwc2_hsotg_change_ep_iso_parity(hsotg, epctl_reg);
-		}
-		dwc2_writel(GINTSTS_INCOMPL_SOOUT, hsotg->regs + GINTSTS);
-	}
+	if (gintsts & GINTSTS_INCOMPL_SOOUT)
+		dwc2_gadget_handle_incomplete_isoc_out(hsotg);
 
 	/*
 	 * if we've had fifo events, we should try and go around the
@@ -2624,6 +2951,7 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
 	u32 epctrl_reg;
 	u32 epctrl;
 	u32 mps;
+	u32 mask;
 	unsigned int dir_in;
 	unsigned int i, val, size;
 	int ret = 0;
@@ -2666,15 +2994,6 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
 	 */
 	epctrl |= DXEPCTL_USBACTEP;
 
-	/*
-	 * set the NAK status on the endpoint, otherwise we might try and
-	 * do something with data that we've yet got a request to process
-	 * since the RXFIFO will take data for an endpoint even if the
-	 * size register hasn't been set.
-	 */
-
-	epctrl |= DXEPCTL_SNAK;
-
 	/* update the endpoint state */
 	dwc2_hsotg_set_ep_maxpacket(hsotg, hs_ep->index, mps, dir_in);
 
@@ -2683,18 +3002,24 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
 	hs_ep->periodic = 0;
 	hs_ep->halted = 0;
 	hs_ep->interval = desc->bInterval;
-	hs_ep->has_correct_parity = 0;
-
-	if (hs_ep->interval > 1 && hs_ep->mc > 1)
-		dev_err(hsotg->dev, "MC > 1 when interval is not 1\n");
 
 	switch (desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
 	case USB_ENDPOINT_XFER_ISOC:
 		epctrl |= DXEPCTL_EPTYPE_ISO;
 		epctrl |= DXEPCTL_SETEVENFR;
 		hs_ep->isochronous = 1;
-		if (dir_in)
+		hs_ep->interval = 1 << (desc->bInterval - 1);
+		hs_ep->target_frame = TARGET_FRAME_INITIAL;
+		if (dir_in) {
 			hs_ep->periodic = 1;
+			mask = dwc2_readl(hsotg->regs + DIEPMSK);
+			mask |= DIEPMSK_NAKMSK;
+			dwc2_writel(mask, hsotg->regs + DIEPMSK);
+		} else {
+			mask = dwc2_readl(hsotg->regs + DOEPMSK);
+			mask |= DOEPMSK_OUTTKNEPDISMSK;
+			dwc2_writel(mask, hsotg->regs + DOEPMSK);
+		}
 		break;
 
 	case USB_ENDPOINT_XFER_BULK:
@@ -2705,6 +3030,9 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
 		if (dir_in)
 			hs_ep->periodic = 1;
 
+		if (hsotg->gadget.speed == USB_SPEED_HIGH)
+			hs_ep->interval = 1 << (desc->bInterval - 1);
+
 		epctrl |= DXEPCTL_EPTYPE_INTERRUPT;
 		break;
 
@@ -2758,7 +3086,7 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
 	}
 
 	/* for non control endpoints, set PID to D0 */
-	if (index)
+	if (index && !hs_ep->isochronous)
 		epctrl |= DXEPCTL_SETD0PID;
 
 	dev_dbg(hsotg->dev, "%s: write DxEPCTL=0x%08x\n",
@@ -2875,10 +3203,8 @@ static void dwc2_hsotg_ep_stop_xfr(struct dwc2_hsotg *hsotg,
 			dev_warn(hsotg->dev,
 				"%s: timeout DIEPINT.NAKEFF\n", __func__);
 	} else {
-		/* Clear any pending nak effect interrupt */
-		dwc2_writel(GINTSTS_GOUTNAKEFF, hsotg->regs + GINTSTS);
-
-		__orr32(hsotg->regs + DCTL, DCTL_SGOUTNAK);
+		if (!(dwc2_readl(hsotg->regs + GINTSTS) & GINTSTS_GOUTNAKEFF))
+			__orr32(hsotg->regs + DCTL, DCTL_SGOUTNAK);
 
 		/* Wait for global nak to take effect */
 		if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS,
diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index b5c7793a2df2..13754353251f 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -367,7 +367,8 @@ static void pmap_unschedule(unsigned long *map, int bits_per_period,
  * @fmt:   The format for printf.
  * @...:   The args for printf.
  */
-static void cat_printf(char **buf, size_t *size, const char *fmt, ...)
+static __printf(3, 4)
+void cat_printf(char **buf, size_t *size, const char *fmt, ...)
 {
 	va_list args;
 	int i;
diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h
index 281b57b36ab4..efc3bcde2822 100644
--- a/drivers/usb/dwc2/hw.h
+++ b/drivers/usb/dwc2/hw.h
@@ -459,6 +459,9 @@
 #define DSTS_SUSPSTS			(1 << 0)
 
 #define DIEPMSK				HSOTG_REG(0x810)
+#define DIEPMSK_NAKMSK			(1 << 13)
+#define DIEPMSK_BNAININTRMSK		(1 << 9)
+#define DIEPMSK_TXFIFOUNDRNMSK		(1 << 8)
 #define DIEPMSK_TXFIFOEMPTY		(1 << 7)
 #define DIEPMSK_INEPNAKEFFMSK		(1 << 6)
 #define DIEPMSK_INTKNEPMISMSK		(1 << 5)
@@ -470,6 +473,7 @@
 
 #define DOEPMSK				HSOTG_REG(0x814)
 #define DOEPMSK_BACK2BACKSETUP		(1 << 6)
+#define DOEPMSK_STSPHSERCVDMSK		(1 << 5)
 #define DOEPMSK_OUTTKNEPDISMSK		(1 << 4)
 #define DOEPMSK_SETUPMSK		(1 << 3)
 #define DOEPMSK_AHBERRMSK		(1 << 2)
@@ -486,6 +490,7 @@
 #define DTKNQR2				HSOTG_REG(0x824)
 #define DTKNQR3				HSOTG_REG(0x830)
 #define DTKNQR4				HSOTG_REG(0x834)
+#define DIEPEMPMSK			HSOTG_REG(0x834)
 
 #define DVBUSDIS			HSOTG_REG(0x828)
 #define DVBUSPULSE			HSOTG_REG(0x82C)
@@ -544,9 +549,18 @@
 #define DIEPINT(_a)			HSOTG_REG(0x908 + ((_a) * 0x20))
 #define DOEPINT(_a)			HSOTG_REG(0xB08 + ((_a) * 0x20))
 #define DXEPINT_SETUP_RCVD		(1 << 15)
+#define DXEPINT_NYETINTRPT		(1 << 14)
+#define DXEPINT_NAKINTRPT		(1 << 13)
+#define DXEPINT_BBLEERRINTRPT		(1 << 12)
+#define DXEPINT_PKTDRPSTS		(1 << 11)
+#define DXEPINT_BNAINTR			(1 << 9)
+#define DXEPINT_TXFIFOUNDRN		(1 << 8)
+#define DXEPINT_OUTPKTERR		(1 << 8)
+#define DXEPINT_TXFEMP			(1 << 7)
 #define DXEPINT_INEPNAKEFF		(1 << 6)
 #define DXEPINT_BACK2BACKSETUP		(1 << 6)
 #define DXEPINT_INTKNEPMIS		(1 << 5)
+#define DXEPINT_STSPHSERCVD		(1 << 5)
 #define DXEPINT_INTKNTXFEMP		(1 << 4)
 #define DXEPINT_OUTTKNEPDIS		(1 << 4)
 #define DXEPINT_TIMEOUT			(1 << 3)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index a590cd225bb7..946643157b78 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -41,14 +41,13 @@
 #include <linux/usb/of.h>
 #include <linux/usb/otg.h>
 
-#include "platform_data.h"
 #include "core.h"
 #include "gadget.h"
 #include "io.h"
 
 #include "debug.h"
 
-/* -------------------------------------------------------------------------- */
+#define DWC3_DEFAULT_AUTOSUSPEND_DELAY	5000 /* ms */
 
 void dwc3_set_mode(struct dwc3 *dwc, u32 mode)
 {
@@ -149,9 +148,8 @@ static int dwc3_soft_reset(struct dwc3 *dwc)
 /*
  * dwc3_frame_length_adjustment - Adjusts frame length if required
  * @dwc3: Pointer to our controller context structure
- * @fladj: Value of GFLADJ_30MHZ to adjust frame length
  */
-static void dwc3_frame_length_adjustment(struct dwc3 *dwc, u32 fladj)
+static void dwc3_frame_length_adjustment(struct dwc3 *dwc)
 {
 	u32 reg;
 	u32 dft;
@@ -159,15 +157,15 @@ static void dwc3_frame_length_adjustment(struct dwc3 *dwc, u32 fladj)
 	if (dwc->revision < DWC3_REVISION_250A)
 		return;
 
-	if (fladj == 0)
+	if (dwc->fladj == 0)
 		return;
 
 	reg = dwc3_readl(dwc->regs, DWC3_GFLADJ);
 	dft = reg & DWC3_GFLADJ_30MHZ_MASK;
-	if (!dev_WARN_ONCE(dwc->dev, dft == fladj,
+	if (!dev_WARN_ONCE(dwc->dev, dft == dwc->fladj,
 	    "request value same as default, ignoring\n")) {
 		reg &= ~DWC3_GFLADJ_30MHZ_MASK;
-		reg |= DWC3_GFLADJ_30MHZ_SDBND_SEL | fladj;
+		reg |= DWC3_GFLADJ_30MHZ_SDBND_SEL | dwc->fladj;
 		dwc3_writel(dwc->regs, DWC3_GFLADJ, reg);
 	}
 }
@@ -507,6 +505,21 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
 	return 0;
 }
 
+static void dwc3_core_exit(struct dwc3 *dwc)
+{
+	dwc3_event_buffers_cleanup(dwc);
+
+	usb_phy_shutdown(dwc->usb2_phy);
+	usb_phy_shutdown(dwc->usb3_phy);
+	phy_exit(dwc->usb2_generic_phy);
+	phy_exit(dwc->usb3_generic_phy);
+
+	usb_phy_set_suspend(dwc->usb2_phy, 1);
+	usb_phy_set_suspend(dwc->usb3_phy, 1);
+	phy_power_off(dwc->usb2_generic_phy);
+	phy_power_off(dwc->usb3_generic_phy);
+}
+
 /**
  * dwc3_core_init - Low-level initialization of DWC3 Core
  * @dwc: Pointer to our controller context structure
@@ -556,6 +569,10 @@ static int dwc3_core_init(struct dwc3 *dwc)
 	if (ret)
 		goto err0;
 
+	ret = dwc3_phy_setup(dwc);
+	if (ret)
+		goto err0;
+
 	reg = dwc3_readl(dwc->regs, DWC3_GCTL);
 	reg &= ~DWC3_GCTL_SCALEDOWN_MASK;
 
@@ -622,22 +639,45 @@ static int dwc3_core_init(struct dwc3 *dwc)
 	if (dwc->revision < DWC3_REVISION_190A)
 		reg |= DWC3_GCTL_U2RSTECN;
 
-	dwc3_core_num_eps(dwc);
-
 	dwc3_writel(dwc->regs, DWC3_GCTL, reg);
 
-	ret = dwc3_alloc_scratch_buffers(dwc);
-	if (ret)
-		goto err1;
+	dwc3_core_num_eps(dwc);
 
 	ret = dwc3_setup_scratch_buffers(dwc);
 	if (ret)
+		goto err1;
+
+	/* Adjust Frame Length */
+	dwc3_frame_length_adjustment(dwc);
+
+	usb_phy_set_suspend(dwc->usb2_phy, 0);
+	usb_phy_set_suspend(dwc->usb3_phy, 0);
+	ret = phy_power_on(dwc->usb2_generic_phy);
+	if (ret < 0)
 		goto err2;
 
+	ret = phy_power_on(dwc->usb3_generic_phy);
+	if (ret < 0)
+		goto err3;
+
+	ret = dwc3_event_buffers_setup(dwc);
+	if (ret) {
+		dev_err(dwc->dev, "failed to setup event buffers\n");
+		goto err4;
+	}
+
 	return 0;
 
+err4:
+	phy_power_off(dwc->usb2_generic_phy);
+
+err3:
+	phy_power_off(dwc->usb3_generic_phy);
+
 err2:
-	dwc3_free_scratch_buffers(dwc);
+	usb_phy_set_suspend(dwc->usb2_phy, 1);
+	usb_phy_set_suspend(dwc->usb3_phy, 1);
+	dwc3_core_exit(dwc);
 
 err1:
 	usb_phy_shutdown(dwc->usb2_phy);
@@ -649,15 +689,6 @@ err0:
 	return ret;
 }
 
-static void dwc3_core_exit(struct dwc3 *dwc)
-{
-	dwc3_free_scratch_buffers(dwc);
-	usb_phy_shutdown(dwc->usb2_phy);
-	usb_phy_shutdown(dwc->usb3_phy);
-	phy_exit(dwc->usb2_generic_phy);
-	phy_exit(dwc->usb3_generic_phy);
-}
-
 static int dwc3_core_get_phy(struct dwc3 *dwc)
 {
 	struct device		*dev = dwc->dev;
@@ -735,7 +766,8 @@ static int dwc3_core_init_mode(struct dwc3 *dwc)
 		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_DEVICE);
 		ret = dwc3_gadget_init(dwc);
 		if (ret) {
-			dev_err(dev, "failed to initialize gadget\n");
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "failed to initialize gadget\n");
 			return ret;
 		}
 		break;
@@ -743,7 +775,8 @@ static int dwc3_core_init_mode(struct dwc3 *dwc)
 		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_HOST);
 		ret = dwc3_host_init(dwc);
 		if (ret) {
-			dev_err(dev, "failed to initialize host\n");
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "failed to initialize host\n");
 			return ret;
 		}
 		break;
@@ -751,13 +784,15 @@ static int dwc3_core_init_mode(struct dwc3 *dwc)
 		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_OTG);
 		ret = dwc3_host_init(dwc);
 		if (ret) {
-			dev_err(dev, "failed to initialize host\n");
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "failed to initialize host\n");
 			return ret;
 		}
 
 		ret = dwc3_gadget_init(dwc);
 		if (ret) {
-			dev_err(dev, "failed to initialize gadget\n");
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "failed to initialize gadget\n");
 			return ret;
 		}
 		break;
@@ -793,13 +828,11 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc)
 static int dwc3_probe(struct platform_device *pdev)
 {
 	struct device		*dev = &pdev->dev;
-	struct dwc3_platform_data *pdata = dev_get_platdata(dev);
 	struct resource		*res;
 	struct dwc3		*dwc;
 	u8			lpm_nyet_threshold;
 	u8			tx_de_emphasis;
 	u8			hird_threshold;
-	u32			fladj = 0;
 
 	int			ret;
 
@@ -814,16 +847,6 @@ static int dwc3_probe(struct platform_device *pdev)
 	dwc->mem = mem;
 	dwc->dev = dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res) {
-		dev_err(dev, "missing IRQ\n");
-		return -ENODEV;
-	}
-	dwc->xhci_resources[1].start = res->start;
-	dwc->xhci_resources[1].end = res->end;
-	dwc->xhci_resources[1].flags = res->flags;
-	dwc->xhci_resources[1].name = res->name;
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		dev_err(dev, "missing memory resource\n");
@@ -909,40 +932,7 @@ static int dwc3_probe(struct platform_device *pdev)
 	device_property_read_string(dev, "snps,hsphy_interface",
 				    &dwc->hsphy_interface);
 	device_property_read_u32(dev, "snps,quirk-frame-length-adjustment",
-				 &fladj);
-
-	if (pdata) {
-		dwc->maximum_speed = pdata->maximum_speed;
-		dwc->has_lpm_erratum = pdata->has_lpm_erratum;
-		if (pdata->lpm_nyet_threshold)
-			lpm_nyet_threshold = pdata->lpm_nyet_threshold;
-		dwc->is_utmi_l1_suspend = pdata->is_utmi_l1_suspend;
-		if (pdata->hird_threshold)
-			hird_threshold = pdata->hird_threshold;
-
-		dwc->usb3_lpm_capable = pdata->usb3_lpm_capable;
-		dwc->dr_mode = pdata->dr_mode;
-
-		dwc->disable_scramble_quirk = pdata->disable_scramble_quirk;
-		dwc->u2exit_lfps_quirk = pdata->u2exit_lfps_quirk;
-		dwc->u2ss_inp3_quirk = pdata->u2ss_inp3_quirk;
-		dwc->req_p1p2p3_quirk = pdata->req_p1p2p3_quirk;
-		dwc->del_p1p2p3_quirk = pdata->del_p1p2p3_quirk;
-		dwc->del_phy_power_chg_quirk = pdata->del_phy_power_chg_quirk;
-		dwc->lfps_filter_quirk = pdata->lfps_filter_quirk;
-		dwc->rx_detect_poll_quirk = pdata->rx_detect_poll_quirk;
-		dwc->dis_u3_susphy_quirk = pdata->dis_u3_susphy_quirk;
-		dwc->dis_u2_susphy_quirk = pdata->dis_u2_susphy_quirk;
-		dwc->dis_enblslpm_quirk = pdata->dis_enblslpm_quirk;
-		dwc->dis_rxdet_inp3_quirk = pdata->dis_rxdet_inp3_quirk;
-
-		dwc->tx_de_emphasis_quirk = pdata->tx_de_emphasis_quirk;
-		if (pdata->tx_de_emphasis)
-			tx_de_emphasis = pdata->tx_de_emphasis;
-
-		dwc->hsphy_interface = pdata->hsphy_interface;
-		fladj = pdata->fladj_value;
-	}
+				 &dwc->fladj);
 
 	dwc->lpm_nyet_threshold = lpm_nyet_threshold;
 	dwc->tx_de_emphasis = tx_de_emphasis;
@@ -953,10 +943,6 @@ static int dwc3_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, dwc);
 	dwc3_cache_hwparams(dwc);
 
-	ret = dwc3_phy_setup(dwc);
-	if (ret)
-		goto err0;
-
 	ret = dwc3_core_get_phy(dwc);
 	if (ret)
 		goto err0;
@@ -969,29 +955,43 @@ static int dwc3_probe(struct platform_device *pdev)
 		dma_set_coherent_mask(dev, dev->parent->coherent_dma_mask);
 	}
 
+	pm_runtime_set_active(dev);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, DWC3_DEFAULT_AUTOSUSPEND_DELAY);
 	pm_runtime_enable(dev);
-	pm_runtime_get_sync(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0)
+		goto err1;
+
 	pm_runtime_forbid(dev);
 
 	ret = dwc3_alloc_event_buffers(dwc, DWC3_EVENT_BUFFERS_SIZE);
 	if (ret) {
 		dev_err(dwc->dev, "failed to allocate event buffers\n");
 		ret = -ENOMEM;
-		goto err1;
+		goto err2;
 	}
 
-	if (IS_ENABLED(CONFIG_USB_DWC3_HOST))
+	if (IS_ENABLED(CONFIG_USB_DWC3_HOST) &&
+			(dwc->dr_mode == USB_DR_MODE_OTG ||
+					dwc->dr_mode == USB_DR_MODE_UNKNOWN))
 		dwc->dr_mode = USB_DR_MODE_HOST;
-	else if (IS_ENABLED(CONFIG_USB_DWC3_GADGET))
+	else if (IS_ENABLED(CONFIG_USB_DWC3_GADGET) &&
+			(dwc->dr_mode == USB_DR_MODE_OTG ||
+					dwc->dr_mode == USB_DR_MODE_UNKNOWN))
 		dwc->dr_mode = USB_DR_MODE_PERIPHERAL;
 
 	if (dwc->dr_mode == USB_DR_MODE_UNKNOWN)
 		dwc->dr_mode = USB_DR_MODE_OTG;
 
+	ret = dwc3_alloc_scratch_buffers(dwc);
+	if (ret)
+		goto err3;
+
 	ret = dwc3_core_init(dwc);
 	if (ret) {
 		dev_err(dev, "failed to initialize core\n");
-		goto err1;
+		goto err4;
 	}
 
 	/* Check the maximum_speed parameter */
@@ -1021,31 +1021,12 @@ static int dwc3_probe(struct platform_device *pdev)
 		break;
 	}
 
-	/* Adjust Frame Length */
-	dwc3_frame_length_adjustment(dwc, fladj);
-
-	usb_phy_set_suspend(dwc->usb2_phy, 0);
-	usb_phy_set_suspend(dwc->usb3_phy, 0);
-	ret = phy_power_on(dwc->usb2_generic_phy);
-	if (ret < 0)
-		goto err2;
-
-	ret = phy_power_on(dwc->usb3_generic_phy);
-	if (ret < 0)
-		goto err3;
-
-	ret = dwc3_event_buffers_setup(dwc);
-	if (ret) {
-		dev_err(dwc->dev, "failed to setup event buffers\n");
-		goto err4;
-	}
-
 	ret = dwc3_core_init_mode(dwc);
 	if (ret)
 		goto err5;
 
 	dwc3_debugfs_init(dwc);
-	pm_runtime_allow(dev);
+	pm_runtime_put(dev);
 
 	return 0;
 
@@ -1053,19 +1034,18 @@ err5:
 	dwc3_event_buffers_cleanup(dwc);
 
 err4:
-	phy_power_off(dwc->usb3_generic_phy);
+	dwc3_free_scratch_buffers(dwc);
 
 err3:
-	phy_power_off(dwc->usb2_generic_phy);
+	dwc3_free_event_buffers(dwc);
+	dwc3_ulpi_exit(dwc);
 
 err2:
-	usb_phy_set_suspend(dwc->usb2_phy, 1);
-	usb_phy_set_suspend(dwc->usb3_phy, 1);
-	dwc3_core_exit(dwc);
+	pm_runtime_allow(&pdev->dev);
 
 err1:
-	dwc3_free_event_buffers(dwc);
-	dwc3_ulpi_exit(dwc);
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 
 err0:
 	/*
@@ -1083,6 +1063,7 @@ static int dwc3_remove(struct platform_device *pdev)
 	struct dwc3	*dwc = platform_get_drvdata(pdev);
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
+	pm_runtime_get_sync(&pdev->dev);
 	/*
 	 * restore res->start back to its original value so that, in case the
 	 * probe is deferred, we don't end up getting error in request the
@@ -1092,133 +1073,192 @@ static int dwc3_remove(struct platform_device *pdev)
 
 	dwc3_debugfs_exit(dwc);
 	dwc3_core_exit_mode(dwc);
-	dwc3_event_buffers_cleanup(dwc);
-	dwc3_free_event_buffers(dwc);
-
-	usb_phy_set_suspend(dwc->usb2_phy, 1);
-	usb_phy_set_suspend(dwc->usb3_phy, 1);
-	phy_power_off(dwc->usb2_generic_phy);
-	phy_power_off(dwc->usb3_generic_phy);
 
 	dwc3_core_exit(dwc);
 	dwc3_ulpi_exit(dwc);
 
 	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_allow(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	dwc3_free_event_buffers(dwc);
+	dwc3_free_scratch_buffers(dwc);
+
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int dwc3_suspend(struct device *dev)
+#ifdef CONFIG_PM
+static int dwc3_suspend_common(struct dwc3 *dwc)
 {
-	struct dwc3	*dwc = dev_get_drvdata(dev);
 	unsigned long	flags;
 
-	spin_lock_irqsave(&dwc->lock, flags);
-
 	switch (dwc->dr_mode) {
 	case USB_DR_MODE_PERIPHERAL:
 	case USB_DR_MODE_OTG:
+		spin_lock_irqsave(&dwc->lock, flags);
 		dwc3_gadget_suspend(dwc);
-		/* FALLTHROUGH */
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		break;
 	case USB_DR_MODE_HOST:
 	default:
-		dwc3_event_buffers_cleanup(dwc);
+		/* do nothing */
 		break;
 	}
 
-	dwc->gctl = dwc3_readl(dwc->regs, DWC3_GCTL);
-	spin_unlock_irqrestore(&dwc->lock, flags);
+	dwc3_core_exit(dwc);
 
-	usb_phy_shutdown(dwc->usb3_phy);
-	usb_phy_shutdown(dwc->usb2_phy);
-	phy_exit(dwc->usb2_generic_phy);
-	phy_exit(dwc->usb3_generic_phy);
+	return 0;
+}
 
-	usb_phy_set_suspend(dwc->usb2_phy, 1);
-	usb_phy_set_suspend(dwc->usb3_phy, 1);
-	WARN_ON(phy_power_off(dwc->usb2_generic_phy) < 0);
-	WARN_ON(phy_power_off(dwc->usb3_generic_phy) < 0);
+static int dwc3_resume_common(struct dwc3 *dwc)
+{
+	unsigned long	flags;
+	int		ret;
 
-	pinctrl_pm_select_sleep_state(dev);
+	ret = dwc3_core_init(dwc);
+	if (ret)
+		return ret;
+
+	switch (dwc->dr_mode) {
+	case USB_DR_MODE_PERIPHERAL:
+	case USB_DR_MODE_OTG:
+		spin_lock_irqsave(&dwc->lock, flags);
+		dwc3_gadget_resume(dwc);
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		/* FALLTHROUGH */
+	case USB_DR_MODE_HOST:
+	default:
+		/* do nothing */
+		break;
+	}
 
 	return 0;
 }
 
-static int dwc3_resume(struct device *dev)
+static int dwc3_runtime_checks(struct dwc3 *dwc)
 {
-	struct dwc3	*dwc = dev_get_drvdata(dev);
-	unsigned long	flags;
+	switch (dwc->dr_mode) {
+	case USB_DR_MODE_PERIPHERAL:
+	case USB_DR_MODE_OTG:
+		if (dwc->connected)
+			return -EBUSY;
+		break;
+	case USB_DR_MODE_HOST:
+	default:
+		/* do nothing */
+		break;
+	}
+
+	return 0;
+}
+
+static int dwc3_runtime_suspend(struct device *dev)
+{
+	struct dwc3     *dwc = dev_get_drvdata(dev);
 	int		ret;
 
-	pinctrl_pm_select_default_state(dev);
+	if (dwc3_runtime_checks(dwc))
+		return -EBUSY;
 
-	usb_phy_set_suspend(dwc->usb2_phy, 0);
-	usb_phy_set_suspend(dwc->usb3_phy, 0);
-	ret = phy_power_on(dwc->usb2_generic_phy);
-	if (ret < 0)
+	ret = dwc3_suspend_common(dwc);
+	if (ret)
 		return ret;
 
-	ret = phy_power_on(dwc->usb3_generic_phy);
-	if (ret < 0)
-		goto err_usb2phy_power;
+	device_init_wakeup(dev, true);
 
-	usb_phy_init(dwc->usb3_phy);
-	usb_phy_init(dwc->usb2_phy);
-	ret = phy_init(dwc->usb2_generic_phy);
-	if (ret < 0)
-		goto err_usb3phy_power;
+	return 0;
+}
 
-	ret = phy_init(dwc->usb3_generic_phy);
-	if (ret < 0)
-		goto err_usb2phy_init;
+static int dwc3_runtime_resume(struct device *dev)
+{
+	struct dwc3     *dwc = dev_get_drvdata(dev);
+	int		ret;
 
-	spin_lock_irqsave(&dwc->lock, flags);
+	device_init_wakeup(dev, false);
 
-	dwc3_event_buffers_setup(dwc);
-	dwc3_writel(dwc->regs, DWC3_GCTL, dwc->gctl);
+	ret = dwc3_resume_common(dwc);
+	if (ret)
+		return ret;
 
 	switch (dwc->dr_mode) {
 	case USB_DR_MODE_PERIPHERAL:
 	case USB_DR_MODE_OTG:
-		dwc3_gadget_resume(dwc);
-		/* FALLTHROUGH */
+		dwc3_gadget_process_pending_events(dwc);
+		break;
 	case USB_DR_MODE_HOST:
 	default:
 		/* do nothing */
 		break;
 	}
 
-	spin_unlock_irqrestore(&dwc->lock, flags);
+	pm_runtime_mark_last_busy(dev);
 
-	pm_runtime_disable(dev);
-	pm_runtime_set_active(dev);
-	pm_runtime_enable(dev);
+	return 0;
+}
+
+static int dwc3_runtime_idle(struct device *dev)
+{
+	struct dwc3     *dwc = dev_get_drvdata(dev);
+
+	switch (dwc->dr_mode) {
+	case USB_DR_MODE_PERIPHERAL:
+	case USB_DR_MODE_OTG:
+		if (dwc3_runtime_checks(dwc))
+			return -EBUSY;
+		break;
+	case USB_DR_MODE_HOST:
+	default:
+		/* do nothing */
+		break;
+	}
+
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_autosuspend(dev);
 
 	return 0;
+}
+#endif /* CONFIG_PM */
 
-err_usb2phy_init:
-	phy_exit(dwc->usb2_generic_phy);
+#ifdef CONFIG_PM_SLEEP
+static int dwc3_suspend(struct device *dev)
+{
+	struct dwc3	*dwc = dev_get_drvdata(dev);
+	int		ret;
 
-err_usb3phy_power:
-	phy_power_off(dwc->usb3_generic_phy);
+	ret = dwc3_suspend_common(dwc);
+	if (ret)
+		return ret;
 
-err_usb2phy_power:
-	phy_power_off(dwc->usb2_generic_phy);
+	pinctrl_pm_select_sleep_state(dev);
 
-	return ret;
+	return 0;
 }
 
+static int dwc3_resume(struct device *dev)
+{
+	struct dwc3	*dwc = dev_get_drvdata(dev);
+	int		ret;
+
+	pinctrl_pm_select_default_state(dev);
+
+	ret = dwc3_resume_common(dwc);
+	if (ret)
+		return ret;
+
+	pm_runtime_disable(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 static const struct dev_pm_ops dwc3_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(dwc3_suspend, dwc3_resume)
+	SET_RUNTIME_PM_OPS(dwc3_runtime_suspend, dwc3_runtime_resume,
+			dwc3_runtime_idle)
 };
 
-#define DWC3_PM_OPS	&(dwc3_dev_pm_ops)
-#else
-#define DWC3_PM_OPS	NULL
-#endif
-
 #ifdef CONFIG_OF
 static const struct of_device_id of_dwc3_match[] = {
 	{
@@ -1250,7 +1290,7 @@ static struct platform_driver dwc3_driver = {
 		.name	= "dwc3",
 		.of_match_table	= of_match_ptr(of_dwc3_match),
 		.acpi_match_table = ACPI_PTR(dwc3_acpi_match),
-		.pm	= DWC3_PM_OPS,
+		.pm	= &dwc3_dev_pm_ops,
 	},
 };
 
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 654050684f4f..45d6de5107c7 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -86,6 +86,7 @@
 #define DWC3_GCTL		0xc110
 #define DWC3_GEVTEN		0xc114
 #define DWC3_GSTS		0xc118
+#define DWC3_GUCTL1		0xc11c
 #define DWC3_GSNPSID		0xc120
 #define DWC3_GGPIO		0xc124
 #define DWC3_GUID		0xc128
@@ -138,10 +139,12 @@
 #define DWC3_DGCMDPAR		0xc710
 #define DWC3_DGCMD		0xc714
 #define DWC3_DALEPENA		0xc720
-#define DWC3_DEPCMDPAR2(n)	(0xc800 + (n * 0x10))
-#define DWC3_DEPCMDPAR1(n)	(0xc804 + (n * 0x10))
-#define DWC3_DEPCMDPAR0(n)	(0xc808 + (n * 0x10))
-#define DWC3_DEPCMD(n)		(0xc80c + (n * 0x10))
+
+#define DWC3_DEP_BASE(n)	(0xc800 + (n * 0x10))
+#define DWC3_DEPCMDPAR2		0x00
+#define DWC3_DEPCMDPAR1		0x04
+#define DWC3_DEPCMDPAR0		0x08
+#define DWC3_DEPCMD		0x0c
 
 /* OTG Registers */
 #define DWC3_OCFG		0xcc00
@@ -231,6 +234,14 @@
 #define DWC3_GEVNTSIZ_INTMASK		(1 << 31)
 #define DWC3_GEVNTSIZ_SIZE(n)		((n) & 0xffff)
 
+/* Global HWPARAMS0 Register */
+#define DWC3_GHWPARAMS0_USB3_MODE(n)	((n) & 0x3)
+#define DWC3_GHWPARAMS0_MBUS_TYPE(n)	(((n) >> 3) & 0x7)
+#define DWC3_GHWPARAMS0_SBUS_TYPE(n)	(((n) >> 6) & 0x3)
+#define DWC3_GHWPARAMS0_MDWIDTH(n)	(((n) >> 8) & 0xff)
+#define DWC3_GHWPARAMS0_SDWIDTH(n)	(((n) >> 16) & 0xff)
+#define DWC3_GHWPARAMS0_AWIDTH(n)	(((n) >> 24) & 0xff)
+
 /* Global HWPARAMS1 Register */
 #define DWC3_GHWPARAMS1_EN_PWROPT(n)	(((n) & (3 << 24)) >> 24)
 #define DWC3_GHWPARAMS1_EN_PWROPT_NO	0
@@ -260,6 +271,10 @@
 /* Global HWPARAMS6 Register */
 #define DWC3_GHWPARAMS6_EN_FPGA			(1 << 7)
 
+/* Global HWPARAMS7 Register */
+#define DWC3_GHWPARAMS7_RAM1_DEPTH(n)	((n) & 0xffff)
+#define DWC3_GHWPARAMS7_RAM2_DEPTH(n)	(((n) >> 16) & 0xffff)
+
 /* Global Frame Length Adjustment Register */
 #define DWC3_GFLADJ_30MHZ_SDBND_SEL		(1 << 7)
 #define DWC3_GFLADJ_30MHZ_MASK			0x3f
@@ -468,6 +483,8 @@ struct dwc3_event_buffer {
  * @endpoint: usb endpoint
  * @pending_list: list of pending requests for this endpoint
  * @started_list: list of started requests on this endpoint
+ * @lock: spinlock for endpoint request queue traversal
+ * @regs: pointer to first endpoint register
  * @trb_pool: array of transaction buffers
  * @trb_pool_dma: dma address of @trb_pool
  * @trb_enqueue: enqueue 'pointer' into TRB array
@@ -480,6 +497,8 @@ struct dwc3_event_buffer {
  * @type: set to bmAttributes & USB_ENDPOINT_XFERTYPE_MASK
  * @resource_index: Resource transfer index
  * @interval: the interval on which the ISOC transfer is started
+ * @allocated_requests: number of requests allocated
+ * @queued_requests: number of requests queued for transfer
  * @name: a human readable name e.g. ep1out-bulk
  * @direction: true for TX, false for RX
  * @stream_capable: true when streams are enabled
@@ -489,6 +508,9 @@ struct dwc3_ep {
 	struct list_head	pending_list;
 	struct list_head	started_list;
 
+	spinlock_t		lock;
+	void __iomem		*regs;
+
 	struct dwc3_trb		*trb_pool;
 	dma_addr_t		trb_pool_dma;
 	const struct usb_ss_ep_comp_descriptor *comp_desc;
@@ -521,6 +543,8 @@ struct dwc3_ep {
 	u8			number;
 	u8			type;
 	u8			resource_index;
+	u32			allocated_requests;
+	u32			queued_requests;
 	u32			interval;
 
 	char			name[20];
@@ -712,6 +736,8 @@ struct dwc3_scratchpad_array {
  * @gadget_driver: pointer to the gadget driver
  * @regs: base address for our registers
  * @regs_size: address space size
+ * @fladj: frame length adjustment
+ * @irq_gadget: peripheral controller's IRQ number
  * @nr_scratch: number of scratch buffers
  * @u1u2: only used on revisions <1.83a for workaround
  * @maximum_speed: maximum speed requested (mainly for testing purposes)
@@ -744,6 +770,7 @@ struct dwc3_scratchpad_array {
  * @lpm_nyet_threshold: LPM NYET response threshold
  * @hird_threshold: HIRD threshold
  * @hsphy_interface: "utmi" or "ulpi"
+ * @connected: true when we're connected to a host, false otherwise
  * @delayed_status: true when gadget driver asks for delayed status
  * @ep0_bounced: true when we used bounce buffer
  * @ep0_expect_in: true when we expect a DATA IN transfer
@@ -754,6 +781,7 @@ struct dwc3_scratchpad_array {
  * 	0	- utmi_sleep_n
  * 	1	- utmi_l1_suspend_n
  * @is_fpga: true when we are using the FPGA board
+ * @pending_events: true when we have pending IRQs to be handled
  * @pullups_connected: true when Run/Stop bit is set
  * @setup_packet_pending: true when there's a Setup Packet in FIFO. Workaround
  * @start_config_issued: true when StartConfig command has been issued
@@ -818,10 +846,8 @@ struct dwc3 {
 
 	enum usb_dr_mode	dr_mode;
 
-	/* used for suspend/resume */
-	u32			dcfg;
-	u32			gctl;
-
+	u32			fladj;
+	u32			irq_gadget;
 	u32			nr_scratch;
 	u32			u1u2;
 	u32			maximum_speed;
@@ -860,7 +886,7 @@ struct dwc3 {
  * just so dwc31 revisions are always larger than dwc3.
  */
 #define DWC3_REVISION_IS_DWC31		0x80000000
-#define DWC3_USB31_REVISION_110A	(0x3131302a | DWC3_REVISION_IS_USB31)
+#define DWC3_USB31_REVISION_110A	(0x3131302a | DWC3_REVISION_IS_DWC31)
 
 	enum dwc3_ep0_next	ep0_next_event;
 	enum dwc3_ep0_state	ep0state;
@@ -890,6 +916,7 @@ struct dwc3 {
 
 	const char		*hsphy_interface;
 
+	unsigned		connected:1;
 	unsigned		delayed_status:1;
 	unsigned		ep0_bounced:1;
 	unsigned		ep0_expect_in:1;
@@ -897,6 +924,7 @@ struct dwc3 {
 	unsigned		has_lpm_erratum:1;
 	unsigned		is_utmi_l1_suspend:1;
 	unsigned		is_fpga:1;
+	unsigned		pending_events:1;
 	unsigned		pullups_connected:1;
 	unsigned		setup_packet_pending:1;
 	unsigned		three_stage_setup:1;
@@ -1094,8 +1122,8 @@ void dwc3_gadget_exit(struct dwc3 *dwc);
 int dwc3_gadget_set_test_mode(struct dwc3 *dwc, int mode);
 int dwc3_gadget_get_link_state(struct dwc3 *dwc);
 int dwc3_gadget_set_link_state(struct dwc3 *dwc, enum dwc3_link_state state);
-int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
-		unsigned cmd, struct dwc3_gadget_ep_cmd_params *params);
+int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd,
+		struct dwc3_gadget_ep_cmd_params *params);
 int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned cmd, u32 param);
 #else
 static inline int dwc3_gadget_init(struct dwc3 *dwc)
@@ -1110,8 +1138,8 @@ static inline int dwc3_gadget_set_link_state(struct dwc3 *dwc,
 		enum dwc3_link_state state)
 { return 0; }
 
-static inline int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
-		unsigned cmd, struct dwc3_gadget_ep_cmd_params *params)
+static inline int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd,
+		struct dwc3_gadget_ep_cmd_params *params)
 { return 0; }
 static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc,
 		int cmd, u32 param)
@@ -1122,6 +1150,7 @@ static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc,
 #if !IS_ENABLED(CONFIG_USB_DWC3_HOST)
 int dwc3_gadget_suspend(struct dwc3 *dwc);
 int dwc3_gadget_resume(struct dwc3 *dwc);
+void dwc3_gadget_process_pending_events(struct dwc3 *dwc);
 #else
 static inline int dwc3_gadget_suspend(struct dwc3 *dwc)
 {
@@ -1132,6 +1161,10 @@ static inline int dwc3_gadget_resume(struct dwc3 *dwc)
 {
 	return 0;
 }
+
+static inline void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
+{
+}
 #endif /* !IS_ENABLED(CONFIG_USB_DWC3_HOST) */
 
 #if IS_ENABLED(CONFIG_USB_DWC3_ULPI)
diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
index 71e318025964..22dfc3dd6a13 100644
--- a/drivers/usb/dwc3/debug.h
+++ b/drivers/usb/dwc3/debug.h
@@ -128,56 +128,112 @@ dwc3_gadget_link_string(enum dwc3_link_state link_state)
  * dwc3_gadget_event_string - returns event name
  * @event: the event code
  */
-static inline const char *dwc3_gadget_event_string(u8 event)
+static inline const char *
+dwc3_gadget_event_string(const struct dwc3_event_devt *event)
 {
-	switch (event) {
+	static char str[256];
+	enum dwc3_link_state state = event->event_info & DWC3_LINK_STATE_MASK;
+
+	switch (event->type) {
 	case DWC3_DEVICE_EVENT_DISCONNECT:
-		return "Disconnect";
+		sprintf(str, "Disconnect: [%s]",
+				dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_RESET:
-		return "Reset";
+		sprintf(str, "Reset [%s]", dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_CONNECT_DONE:
-		return "Connection Done";
+		sprintf(str, "Connection Done [%s]",
+				dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE:
-		return "Link Status Change";
+		sprintf(str, "Link Change [%s]",
+				dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_WAKEUP:
-		return "WakeUp";
+		sprintf(str, "WakeUp [%s]", dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_EOPF:
-		return "End-Of-Frame";
+		sprintf(str, "End-Of-Frame [%s]",
+				dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_SOF:
-		return "Start-Of-Frame";
+		sprintf(str, "Start-Of-Frame [%s]",
+				dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_ERRATIC_ERROR:
-		return "Erratic Error";
+		sprintf(str, "Erratic Error [%s]",
+				dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_CMD_CMPL:
-		return "Command Complete";
+		sprintf(str, "Command Complete [%s]",
+				dwc3_gadget_link_string(state));
+		break;
 	case DWC3_DEVICE_EVENT_OVERFLOW:
-		return "Overflow";
+		sprintf(str, "Overflow [%s]", dwc3_gadget_link_string(state));
+		break;
+	default:
+		sprintf(str, "UNKNOWN");
 	}
 
-	return "UNKNOWN";
+	return str;
 }
 
 /**
  * dwc3_ep_event_string - returns event name
  * @event: then event code
  */
-static inline const char *dwc3_ep_event_string(u8 event)
+static inline const char *
+dwc3_ep_event_string(const struct dwc3_event_depevt *event)
 {
-	switch (event) {
+	u8 epnum = event->endpoint_number;
+	static char str[256];
+	int status;
+	int ret;
+
+	ret = sprintf(str, "ep%d%s: ", epnum >> 1,
+			(epnum & 1) ? "in" : "in");
+	if (ret < 0)
+		return "UNKNOWN";
+
+	switch (event->endpoint_event) {
 	case DWC3_DEPEVT_XFERCOMPLETE:
-		return "Transfer Complete";
+		strcat(str, "Transfer Complete");
+		break;
 	case DWC3_DEPEVT_XFERINPROGRESS:
-		return "Transfer In-Progress";
+		strcat(str, "Transfer In-Progress");
+		break;
 	case DWC3_DEPEVT_XFERNOTREADY:
-		return "Transfer Not Ready";
+		strcat(str, "Transfer Not Ready");
+		status = event->status & DEPEVT_STATUS_TRANSFER_ACTIVE;
+		strcat(str, status ? " (Active)" : " (Not Active)");
+		break;
 	case DWC3_DEPEVT_RXTXFIFOEVT:
-		return "FIFO";
+		strcat(str, "FIFO");
+		break;
 	case DWC3_DEPEVT_STREAMEVT:
-		return "Stream";
+		status = event->status;
+
+		switch (status) {
+		case DEPEVT_STREAMEVT_FOUND:
+			sprintf(str + ret, " Stream %d Found",
+					event->parameters);
+			break;
+		case DEPEVT_STREAMEVT_NOTFOUND:
+		default:
+			strcat(str, " Stream Not Found");
+			break;
+		}
+
+		break;
 	case DWC3_DEPEVT_EPCMDCMPLT:
-		return "Endpoint Command Complete";
+		strcat(str, "Endpoint Command Complete");
+		break;
+	default:
+		sprintf(str, "UNKNOWN");
 	}
 
-	return "UNKNOWN";
+	return str;
 }
 
 /**
@@ -214,6 +270,46 @@ static inline const char *dwc3_gadget_event_type_string(u8 event)
 	}
 }
 
+static inline const char *dwc3_decode_event(u32 event)
+{
+	const union dwc3_event evt = (union dwc3_event) event;
+
+	if (evt.type.is_devspec)
+		return dwc3_gadget_event_string(&evt.devt);
+	else
+		return dwc3_ep_event_string(&evt.depevt);
+}
+
+static inline const char *dwc3_ep_cmd_status_string(int status)
+{
+	switch (status) {
+	case -ETIMEDOUT:
+		return "Timed Out";
+	case 0:
+		return "Successful";
+	case DEPEVT_TRANSFER_NO_RESOURCE:
+		return "No Resource";
+	case DEPEVT_TRANSFER_BUS_EXPIRY:
+		return "Bus Expiry";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static inline const char *dwc3_gadget_generic_cmd_status_string(int status)
+{
+	switch (status) {
+	case -ETIMEDOUT:
+		return "Timed Out";
+	case 0:
+		return "Successful";
+	case 1:
+		return "Error";
+	default:
+		return "UNKNOWN";
+	}
+}
+
 void dwc3_trace(void (*trace)(struct va_format *), const char *fmt, ...);
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c
index b1dd3c6d7ef7..31926dda43c9 100644
--- a/drivers/usb/dwc3/debugfs.c
+++ b/drivers/usb/dwc3/debugfs.c
@@ -36,9 +36,32 @@
 #define dump_register(nm)				\
 {							\
 	.name	= __stringify(nm),			\
-	.offset	= DWC3_ ##nm - DWC3_GLOBALS_REGS_START,	\
+	.offset	= DWC3_ ##nm,				\
 }
 
+#define dump_ep_register_set(n)			\
+	{					\
+		.name = "DEPCMDPAR2("__stringify(n)")",	\
+		.offset = DWC3_DEP_BASE(n) +	\
+			DWC3_DEPCMDPAR2,	\
+	},					\
+	{					\
+		.name = "DEPCMDPAR1("__stringify(n)")",	\
+		.offset = DWC3_DEP_BASE(n) +	\
+			DWC3_DEPCMDPAR1,	\
+	},					\
+	{					\
+		.name = "DEPCMDPAR0("__stringify(n)")",	\
+		.offset = DWC3_DEP_BASE(n) +	\
+			DWC3_DEPCMDPAR0,	\
+	},					\
+	{					\
+		.name = "DEPCMD("__stringify(n)")",	\
+		.offset = DWC3_DEP_BASE(n) +	\
+			DWC3_DEPCMD,		\
+	}
+
+
 static const struct debugfs_reg32 dwc3_regs[] = {
 	dump_register(GSBUSCFG0),
 	dump_register(GSBUSCFG1),
@@ -47,6 +70,7 @@ static const struct debugfs_reg32 dwc3_regs[] = {
 	dump_register(GCTL),
 	dump_register(GEVTEN),
 	dump_register(GSTS),
+	dump_register(GUCTL1),
 	dump_register(GSNPSID),
 	dump_register(GGPIO),
 	dump_register(GUID),
@@ -218,137 +242,38 @@ static const struct debugfs_reg32 dwc3_regs[] = {
 	dump_register(DGCMD),
 	dump_register(DALEPENA),
 
-	dump_register(DEPCMDPAR2(0)),
-	dump_register(DEPCMDPAR2(1)),
-	dump_register(DEPCMDPAR2(2)),
-	dump_register(DEPCMDPAR2(3)),
-	dump_register(DEPCMDPAR2(4)),
-	dump_register(DEPCMDPAR2(5)),
-	dump_register(DEPCMDPAR2(6)),
-	dump_register(DEPCMDPAR2(7)),
-	dump_register(DEPCMDPAR2(8)),
-	dump_register(DEPCMDPAR2(9)),
-	dump_register(DEPCMDPAR2(10)),
-	dump_register(DEPCMDPAR2(11)),
-	dump_register(DEPCMDPAR2(12)),
-	dump_register(DEPCMDPAR2(13)),
-	dump_register(DEPCMDPAR2(14)),
-	dump_register(DEPCMDPAR2(15)),
-	dump_register(DEPCMDPAR2(16)),
-	dump_register(DEPCMDPAR2(17)),
-	dump_register(DEPCMDPAR2(18)),
-	dump_register(DEPCMDPAR2(19)),
-	dump_register(DEPCMDPAR2(20)),
-	dump_register(DEPCMDPAR2(21)),
-	dump_register(DEPCMDPAR2(22)),
-	dump_register(DEPCMDPAR2(23)),
-	dump_register(DEPCMDPAR2(24)),
-	dump_register(DEPCMDPAR2(25)),
-	dump_register(DEPCMDPAR2(26)),
-	dump_register(DEPCMDPAR2(27)),
-	dump_register(DEPCMDPAR2(28)),
-	dump_register(DEPCMDPAR2(29)),
-	dump_register(DEPCMDPAR2(30)),
-	dump_register(DEPCMDPAR2(31)),
-
-	dump_register(DEPCMDPAR1(0)),
-	dump_register(DEPCMDPAR1(1)),
-	dump_register(DEPCMDPAR1(2)),
-	dump_register(DEPCMDPAR1(3)),
-	dump_register(DEPCMDPAR1(4)),
-	dump_register(DEPCMDPAR1(5)),
-	dump_register(DEPCMDPAR1(6)),
-	dump_register(DEPCMDPAR1(7)),
-	dump_register(DEPCMDPAR1(8)),
-	dump_register(DEPCMDPAR1(9)),
-	dump_register(DEPCMDPAR1(10)),
-	dump_register(DEPCMDPAR1(11)),
-	dump_register(DEPCMDPAR1(12)),
-	dump_register(DEPCMDPAR1(13)),
-	dump_register(DEPCMDPAR1(14)),
-	dump_register(DEPCMDPAR1(15)),
-	dump_register(DEPCMDPAR1(16)),
-	dump_register(DEPCMDPAR1(17)),
-	dump_register(DEPCMDPAR1(18)),
-	dump_register(DEPCMDPAR1(19)),
-	dump_register(DEPCMDPAR1(20)),
-	dump_register(DEPCMDPAR1(21)),
-	dump_register(DEPCMDPAR1(22)),
-	dump_register(DEPCMDPAR1(23)),
-	dump_register(DEPCMDPAR1(24)),
-	dump_register(DEPCMDPAR1(25)),
-	dump_register(DEPCMDPAR1(26)),
-	dump_register(DEPCMDPAR1(27)),
-	dump_register(DEPCMDPAR1(28)),
-	dump_register(DEPCMDPAR1(29)),
-	dump_register(DEPCMDPAR1(30)),
-	dump_register(DEPCMDPAR1(31)),
-
-	dump_register(DEPCMDPAR0(0)),
-	dump_register(DEPCMDPAR0(1)),
-	dump_register(DEPCMDPAR0(2)),
-	dump_register(DEPCMDPAR0(3)),
-	dump_register(DEPCMDPAR0(4)),
-	dump_register(DEPCMDPAR0(5)),
-	dump_register(DEPCMDPAR0(6)),
-	dump_register(DEPCMDPAR0(7)),
-	dump_register(DEPCMDPAR0(8)),
-	dump_register(DEPCMDPAR0(9)),
-	dump_register(DEPCMDPAR0(10)),
-	dump_register(DEPCMDPAR0(11)),
-	dump_register(DEPCMDPAR0(12)),
-	dump_register(DEPCMDPAR0(13)),
-	dump_register(DEPCMDPAR0(14)),
-	dump_register(DEPCMDPAR0(15)),
-	dump_register(DEPCMDPAR0(16)),
-	dump_register(DEPCMDPAR0(17)),
-	dump_register(DEPCMDPAR0(18)),
-	dump_register(DEPCMDPAR0(19)),
-	dump_register(DEPCMDPAR0(20)),
-	dump_register(DEPCMDPAR0(21)),
-	dump_register(DEPCMDPAR0(22)),
-	dump_register(DEPCMDPAR0(23)),
-	dump_register(DEPCMDPAR0(24)),
-	dump_register(DEPCMDPAR0(25)),
-	dump_register(DEPCMDPAR0(26)),
-	dump_register(DEPCMDPAR0(27)),
-	dump_register(DEPCMDPAR0(28)),
-	dump_register(DEPCMDPAR0(29)),
-	dump_register(DEPCMDPAR0(30)),
-	dump_register(DEPCMDPAR0(31)),
-
-	dump_register(DEPCMD(0)),
-	dump_register(DEPCMD(1)),
-	dump_register(DEPCMD(2)),
-	dump_register(DEPCMD(3)),
-	dump_register(DEPCMD(4)),
-	dump_register(DEPCMD(5)),
-	dump_register(DEPCMD(6)),
-	dump_register(DEPCMD(7)),
-	dump_register(DEPCMD(8)),
-	dump_register(DEPCMD(9)),
-	dump_register(DEPCMD(10)),
-	dump_register(DEPCMD(11)),
-	dump_register(DEPCMD(12)),
-	dump_register(DEPCMD(13)),
-	dump_register(DEPCMD(14)),
-	dump_register(DEPCMD(15)),
-	dump_register(DEPCMD(16)),
-	dump_register(DEPCMD(17)),
-	dump_register(DEPCMD(18)),
-	dump_register(DEPCMD(19)),
-	dump_register(DEPCMD(20)),
-	dump_register(DEPCMD(21)),
-	dump_register(DEPCMD(22)),
-	dump_register(DEPCMD(23)),
-	dump_register(DEPCMD(24)),
-	dump_register(DEPCMD(25)),
-	dump_register(DEPCMD(26)),
-	dump_register(DEPCMD(27)),
-	dump_register(DEPCMD(28)),
-	dump_register(DEPCMD(29)),
-	dump_register(DEPCMD(30)),
-	dump_register(DEPCMD(31)),
+	dump_ep_register_set(0),
+	dump_ep_register_set(1),
+	dump_ep_register_set(2),
+	dump_ep_register_set(3),
+	dump_ep_register_set(4),
+	dump_ep_register_set(5),
+	dump_ep_register_set(6),
+	dump_ep_register_set(7),
+	dump_ep_register_set(8),
+	dump_ep_register_set(9),
+	dump_ep_register_set(10),
+	dump_ep_register_set(11),
+	dump_ep_register_set(12),
+	dump_ep_register_set(13),
+	dump_ep_register_set(14),
+	dump_ep_register_set(15),
+	dump_ep_register_set(16),
+	dump_ep_register_set(17),
+	dump_ep_register_set(18),
+	dump_ep_register_set(19),
+	dump_ep_register_set(20),
+	dump_ep_register_set(21),
+	dump_ep_register_set(22),
+	dump_ep_register_set(23),
+	dump_ep_register_set(24),
+	dump_ep_register_set(25),
+	dump_ep_register_set(26),
+	dump_ep_register_set(27),
+	dump_ep_register_set(28),
+	dump_ep_register_set(29),
+	dump_ep_register_set(30),
+	dump_ep_register_set(31),
 
 	dump_register(OCFG),
 	dump_register(OCTL),
@@ -939,7 +864,7 @@ void dwc3_debugfs_init(struct dwc3 *dwc)
 
 	dwc->regset->regs = dwc3_regs;
 	dwc->regset->nregs = ARRAY_SIZE(dwc3_regs);
-	dwc->regset->base = dwc->regs;
+	dwc->regset->base = dwc->regs - DWC3_GLOBALS_REGS_START;
 
 	file = debugfs_create_regset32("regdump", S_IRUGO, root, dwc->regset);
 	if (!file)
diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index af264493bbae..29e80cc9b634 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -165,7 +165,7 @@ static void dwc3_omap_write_utmi_ctrl(struct dwc3_omap *omap, u32 value)
 
 static u32 dwc3_omap_read_irq0_status(struct dwc3_omap *omap)
 {
-	return dwc3_omap_readl(omap->base, USBOTGSS_IRQSTATUS_0 -
+	return dwc3_omap_readl(omap->base, USBOTGSS_IRQSTATUS_RAW_0 -
 						omap->irq0_offset);
 }
 
@@ -178,7 +178,7 @@ static void dwc3_omap_write_irq0_status(struct dwc3_omap *omap, u32 value)
 
 static u32 dwc3_omap_read_irqmisc_status(struct dwc3_omap *omap)
 {
-	return dwc3_omap_readl(omap->base, USBOTGSS_IRQSTATUS_MISC +
+	return dwc3_omap_readl(omap->base, USBOTGSS_IRQSTATUS_RAW_MISC +
 						omap->irqmisc_offset);
 }
 
@@ -231,35 +231,30 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
 		}
 
 		val = dwc3_omap_read_utmi_ctrl(omap);
-		val &= ~(USBOTGSS_UTMI_OTG_CTRL_IDDIG
-				| USBOTGSS_UTMI_OTG_CTRL_VBUSVALID
-				| USBOTGSS_UTMI_OTG_CTRL_SESSEND);
-		val |= USBOTGSS_UTMI_OTG_CTRL_SESSVALID
-				| USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT;
+		val &= ~USBOTGSS_UTMI_OTG_CTRL_IDDIG;
 		dwc3_omap_write_utmi_ctrl(omap, val);
 		break;
 
 	case OMAP_DWC3_VBUS_VALID:
 		val = dwc3_omap_read_utmi_ctrl(omap);
 		val &= ~USBOTGSS_UTMI_OTG_CTRL_SESSEND;
-		val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG
-				| USBOTGSS_UTMI_OTG_CTRL_VBUSVALID
-				| USBOTGSS_UTMI_OTG_CTRL_SESSVALID
-				| USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT;
+		val |= USBOTGSS_UTMI_OTG_CTRL_VBUSVALID
+				| USBOTGSS_UTMI_OTG_CTRL_SESSVALID;
 		dwc3_omap_write_utmi_ctrl(omap, val);
 		break;
 
 	case OMAP_DWC3_ID_FLOAT:
 		if (omap->vbus_reg)
 			regulator_disable(omap->vbus_reg);
+		val = dwc3_omap_read_utmi_ctrl(omap);
+		val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG;
+		dwc3_omap_write_utmi_ctrl(omap, val);
 
 	case OMAP_DWC3_VBUS_OFF:
 		val = dwc3_omap_read_utmi_ctrl(omap);
 		val &= ~(USBOTGSS_UTMI_OTG_CTRL_SESSVALID
-				| USBOTGSS_UTMI_OTG_CTRL_VBUSVALID
-				| USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT);
-		val |= USBOTGSS_UTMI_OTG_CTRL_SESSEND
-				| USBOTGSS_UTMI_OTG_CTRL_IDDIG;
+				| USBOTGSS_UTMI_OTG_CTRL_VBUSVALID);
+		val |= USBOTGSS_UTMI_OTG_CTRL_SESSEND;
 		dwc3_omap_write_utmi_ctrl(omap, val);
 		break;
 
@@ -268,19 +263,38 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
 	}
 }
 
+static void dwc3_omap_enable_irqs(struct dwc3_omap *omap);
+static void dwc3_omap_disable_irqs(struct dwc3_omap *omap);
+
 static irqreturn_t dwc3_omap_interrupt(int irq, void *_omap)
+{
+	struct dwc3_omap	*omap = _omap;
+
+	if (dwc3_omap_read_irqmisc_status(omap) ||
+	    dwc3_omap_read_irq0_status(omap)) {
+		/* mask irqs */
+		dwc3_omap_disable_irqs(omap);
+		return IRQ_WAKE_THREAD;
+	}
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t dwc3_omap_interrupt_thread(int irq, void *_omap)
 {
 	struct dwc3_omap	*omap = _omap;
 	u32			reg;
 
+	/* clear irq status flags */
 	reg = dwc3_omap_read_irqmisc_status(omap);
-
 	dwc3_omap_write_irqmisc_status(omap, reg);
 
 	reg = dwc3_omap_read_irq0_status(omap);
-
 	dwc3_omap_write_irq0_status(omap, reg);
 
+	/* unmask irqs */
+	dwc3_omap_enable_irqs(omap);
+
 	return IRQ_HANDLED;
 }
 
@@ -497,8 +511,9 @@ static int dwc3_omap_probe(struct platform_device *pdev)
 	/* check the DMA Status */
 	reg = dwc3_omap_readl(omap->base, USBOTGSS_SYSCONFIG);
 
-	ret = devm_request_irq(dev, omap->irq, dwc3_omap_interrupt, 0,
-			"dwc3-omap", omap);
+	ret = devm_request_threaded_irq(dev, omap->irq, dwc3_omap_interrupt,
+					dwc3_omap_interrupt_thread, IRQF_SHARED,
+					"dwc3-omap", omap);
 	if (ret) {
 		dev_err(dev, "failed to request IRQ #%d --> %d\n",
 				omap->irq, ret);
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 14196cd416b3..45f5a232d9fb 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -20,11 +20,11 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
 #include <linux/gpio/consumer.h>
 #include <linux/acpi.h>
-
-#include "platform_data.h"
+#include <linux/delay.h>
 
 #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3		0xabcd
 #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI	0xabce
@@ -51,62 +51,70 @@ static int dwc3_pci_quirks(struct pci_dev *pdev, struct platform_device *dwc3)
 {
 	if (pdev->vendor == PCI_VENDOR_ID_AMD &&
 	    pdev->device == PCI_DEVICE_ID_AMD_NL_USB) {
-		struct dwc3_platform_data pdata;
-
-		memset(&pdata, 0, sizeof(pdata));
-
-		pdata.has_lpm_erratum = true;
-		pdata.lpm_nyet_threshold = 0xf;
-
-		pdata.u2exit_lfps_quirk = true;
-		pdata.u2ss_inp3_quirk = true;
-		pdata.req_p1p2p3_quirk = true;
-		pdata.del_p1p2p3_quirk = true;
-		pdata.del_phy_power_chg_quirk = true;
-		pdata.lfps_filter_quirk = true;
-		pdata.rx_detect_poll_quirk = true;
-
-		pdata.tx_de_emphasis_quirk = true;
-		pdata.tx_de_emphasis = 1;
-
-		/*
-		 * FIXME these quirks should be removed when AMD NL
-		 * taps out
-		 */
-		pdata.disable_scramble_quirk = true;
-		pdata.dis_u3_susphy_quirk = true;
-		pdata.dis_u2_susphy_quirk = true;
-
-		return platform_device_add_data(dwc3, &pdata, sizeof(pdata));
+		struct property_entry properties[] = {
+			PROPERTY_ENTRY_BOOL("snps,has-lpm-erratum"),
+			PROPERTY_ENTRY_U8("snps,lpm-nyet-threshold", 0xf),
+			PROPERTY_ENTRY_BOOL("snps,u2exit_lfps_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,u2ss_inp3_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,req_p1p2p3_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,del_p1p2p3_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,del_phy_power_chg_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,lfps_filter_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,rx_detect_poll_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,tx_de_emphasis_quirk"),
+			PROPERTY_ENTRY_U8("snps,tx_de_emphasis", 1),
+			/*
+			 * FIXME these quirks should be removed when AMD NL
+			 * tapes out
+			 */
+			PROPERTY_ENTRY_BOOL("snps,disable_scramble_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"),
+			PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
+			{ },
+		};
+
+		return platform_device_add_properties(dwc3, properties);
 	}
 
-	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-	    pdev->device == PCI_DEVICE_ID_INTEL_BYT) {
-		struct gpio_desc *gpio;
+	if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
+		int ret;
 
-		acpi_dev_add_driver_gpios(ACPI_COMPANION(&pdev->dev),
-					  acpi_dwc3_byt_gpios);
+		struct property_entry properties[] = {
+			PROPERTY_ENTRY_STRING("dr-mode", "peripheral"),
+			{ }
+		};
 
-		/*
-		 * These GPIOs will turn on the USB2 PHY. Note that we have to
-		 * put the gpio descriptors again here because the phy driver
-		 * might want to grab them, too.
-		 */
-		gpio = gpiod_get_optional(&pdev->dev, "cs", GPIOD_OUT_LOW);
-		if (IS_ERR(gpio))
-			return PTR_ERR(gpio);
+		ret = platform_device_add_properties(dwc3, properties);
+		if (ret < 0)
+			return ret;
 
-		gpiod_set_value_cansleep(gpio, 1);
-		gpiod_put(gpio);
+		if (pdev->device == PCI_DEVICE_ID_INTEL_BYT) {
+			struct gpio_desc *gpio;
 
-		gpio = gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
-		if (IS_ERR(gpio))
-			return PTR_ERR(gpio);
+			acpi_dev_add_driver_gpios(ACPI_COMPANION(&pdev->dev),
+					acpi_dwc3_byt_gpios);
+
+			/*
+			 * These GPIOs will turn on the USB2 PHY. Note that we have to
+			 * put the gpio descriptors again here because the phy driver
+			 * might want to grab them, too.
+			 */
+			gpio = gpiod_get_optional(&pdev->dev, "cs", GPIOD_OUT_LOW);
+			if (IS_ERR(gpio))
+				return PTR_ERR(gpio);
 
-		if (gpio) {
 			gpiod_set_value_cansleep(gpio, 1);
 			gpiod_put(gpio);
-			usleep_range(10000, 11000);
+
+			gpio = gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
+			if (IS_ERR(gpio))
+				return PTR_ERR(gpio);
+
+			if (gpio) {
+				gpiod_set_value_cansleep(gpio, 1);
+				gpiod_put(gpio);
+				usleep_range(10000, 11000);
+			}
 		}
 	}
 
@@ -114,15 +122,14 @@ static int dwc3_pci_quirks(struct pci_dev *pdev, struct platform_device *dwc3)
 	    (pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 ||
 	     pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI ||
 	     pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31)) {
-
-		struct dwc3_platform_data pdata;
-
-		memset(&pdata, 0, sizeof(pdata));
-		pdata.usb3_lpm_capable = true;
-		pdata.has_lpm_erratum = true;
-		pdata.dis_enblslpm_quirk = true;
-
-		return platform_device_add_data(dwc3, &pdata, sizeof(pdata));
+		struct property_entry properties[] = {
+			PROPERTY_ENTRY_BOOL("snps,usb3_lpm_capable"),
+			PROPERTY_ENTRY_BOOL("snps,has-lpm-erratum"),
+			PROPERTY_ENTRY_BOOL("snps,dis_enblslpm_quirk"),
+			{ },
+		};
+
+		return platform_device_add_properties(dwc3, properties);
 	}
 
 	return 0;
@@ -180,7 +187,11 @@ static int dwc3_pci_probe(struct pci_dev *pci,
 		goto err;
 	}
 
+	device_init_wakeup(dev, true);
+	device_set_run_wake(dev, true);
 	pci_set_drvdata(pci, dwc3);
+	pm_runtime_put(dev);
+
 	return 0;
 err:
 	platform_device_put(dwc3);
@@ -189,6 +200,8 @@ err:
 
 static void dwc3_pci_remove(struct pci_dev *pci)
 {
+	device_init_wakeup(&pci->dev, false);
+	pm_runtime_get(&pci->dev);
 	acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pci->dev));
 	platform_device_unregister(pci_get_drvdata(pci));
 }
@@ -219,11 +232,43 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
 };
 MODULE_DEVICE_TABLE(pci, dwc3_pci_id_table);
 
+#ifdef CONFIG_PM
+static int dwc3_pci_runtime_suspend(struct device *dev)
+{
+	if (device_run_wake(dev))
+		return 0;
+
+	return -EBUSY;
+}
+
+static int dwc3_pci_pm_dummy(struct device *dev)
+{
+	/*
+	 * There's nothing to do here. No, seriously. Everything is either taken
+	 * care either by PCI subsystem or dwc3/core.c, so we have nothing
+	 * missing here.
+	 *
+	 * So you'd think we didn't need this at all, but PCI subsystem will
+	 * bail out if we don't have a valid callback :-s
+	 */
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+static struct dev_pm_ops dwc3_pci_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(dwc3_pci_pm_dummy, dwc3_pci_pm_dummy)
+	SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_pm_dummy,
+		NULL)
+};
+
 static struct pci_driver dwc3_pci_driver = {
 	.name		= "dwc3-pci",
 	.id_table	= dwc3_pci_id_table,
 	.probe		= dwc3_pci_probe,
 	.remove		= dwc3_pci_remove,
+	.driver		= {
+		.pm	= &dwc3_pci_dev_pm_ops,
+	}
 };
 
 MODULE_AUTHOR("Felipe Balbi <balbi@ti.com>");
diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c
index 50d6ae6f88bc..89a2f712fdfe 100644
--- a/drivers/usb/dwc3/dwc3-st.c
+++ b/drivers/usb/dwc3/dwc3-st.c
@@ -233,7 +233,8 @@ static int st_dwc3_probe(struct platform_device *pdev)
 	dev_vdbg(&pdev->dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n",
 		 dwc3_data->glue_base, dwc3_data->syscfg_reg_off);
 
-	dwc3_data->rstc_pwrdn = devm_reset_control_get(dev, "powerdown");
+	dwc3_data->rstc_pwrdn =
+		devm_reset_control_get_exclusive(dev, "powerdown");
 	if (IS_ERR(dwc3_data->rstc_pwrdn)) {
 		dev_err(&pdev->dev, "could not get power controller\n");
 		ret = PTR_ERR(dwc3_data->rstc_pwrdn);
@@ -243,7 +244,8 @@ static int st_dwc3_probe(struct platform_device *pdev)
 	/* Manage PowerDown */
 	reset_control_deassert(dwc3_data->rstc_pwrdn);
 
-	dwc3_data->rstc_rst = devm_reset_control_get(dev, "softreset");
+	dwc3_data->rstc_rst =
+		devm_reset_control_get_shared(dev, "softreset");
 	if (IS_ERR(dwc3_data->rstc_rst)) {
 		dev_err(&pdev->dev, "could not get reset controller\n");
 		ret = PTR_ERR(dwc3_data->rstc_rst);
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 51b52a79dfec..fe79d771dee4 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -98,8 +98,7 @@ static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma,
 
 	trace_dwc3_prepare_trb(dep, trb);
 
-	ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
-			DWC3_DEPCMD_STARTTRANSFER, &params);
+	ret = dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_STARTTRANSFER, &params);
 	if (ret < 0) {
 		dwc3_trace(trace_dwc3_ep0, "%s STARTTRANSFER failed",
 				dep->name);
@@ -107,9 +106,7 @@ static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma,
 	}
 
 	dep->flags |= DWC3_EP_BUSY;
-	dep->resource_index = dwc3_gadget_ep_get_transfer_index(dwc,
-			dep->number);
-
+	dep->resource_index = dwc3_gadget_ep_get_transfer_index(dep);
 	dwc->ep0_next_event = DWC3_EP0_COMPLETE;
 
 	return 0;
@@ -499,7 +496,7 @@ static int dwc3_ep0_handle_feature(struct dwc3 *dwc,
 	case USB_RECIP_ENDPOINT:
 		switch (wValue) {
 		case USB_ENDPOINT_HALT:
-			dep = dwc3_wIndex_to_dep(dwc, wIndex);
+			dep = dwc3_wIndex_to_dep(dwc, ctrl->wIndex);
 			if (!dep)
 				return -EINVAL;
 			if (set == 0 && (dep->flags & DWC3_EP_WEDGE))
@@ -622,8 +619,8 @@ static void dwc3_ep0_set_sel_cmpl(struct usb_ep *ep, struct usb_request *req)
 	struct timing {
 		u8	u1sel;
 		u8	u1pel;
-		u16	u2sel;
-		u16	u2pel;
+		__le16	u2sel;
+		__le16	u2pel;
 	} __packed timing;
 
 	int		ret;
@@ -980,7 +977,7 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc,
 		ret = usb_gadget_map_request(&dwc->gadget, &req->request,
 				dep->number);
 		if (ret) {
-			dwc3_trace(trace_dwc3_ep0, "failed to map request\n");
+			dwc3_trace(trace_dwc3_ep0, "failed to map request");
 			return;
 		}
 
@@ -1008,7 +1005,7 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc,
 		ret = usb_gadget_map_request(&dwc->gadget, &req->request,
 				dep->number);
 		if (ret) {
-			dwc3_trace(trace_dwc3_ep0, "failed to map request\n");
+			dwc3_trace(trace_dwc3_ep0, "failed to map request");
 			return;
 		}
 
@@ -1058,7 +1055,7 @@ static void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep)
 	cmd |= DWC3_DEPCMD_CMDIOC;
 	cmd |= DWC3_DEPCMD_PARAM(dep->resource_index);
 	memset(&params, 0, sizeof(params));
-	ret = dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, &params);
+	ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
 	WARN_ON_ONCE(ret);
 	dep->resource_index = 0;
 }
@@ -1112,11 +1109,8 @@ static void dwc3_ep0_xfernotready(struct dwc3 *dwc,
 void dwc3_ep0_interrupt(struct dwc3 *dwc,
 		const struct dwc3_event_depevt *event)
 {
-	u8			epnum = event->endpoint_number;
-
-	dwc3_trace(trace_dwc3_ep0, "%s while ep%d%s in state '%s'",
-			dwc3_ep_event_string(event->endpoint_event),
-			epnum >> 1, (epnum & 1) ? "in" : "out",
+	dwc3_trace(trace_dwc3_ep0, "%s: state '%s'",
+			dwc3_ep_event_string(event),
 			dwc3_ep0_state_string(dwc->ep0state));
 
 	switch (event->endpoint_event) {
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 07248ff1be5c..8f8c2157910e 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -145,21 +145,29 @@ int dwc3_gadget_set_link_state(struct dwc3 *dwc, enum dwc3_link_state state)
 	return -ETIMEDOUT;
 }
 
-static void dwc3_ep_inc_enq(struct dwc3_ep *dep)
+/**
+ * dwc3_ep_inc_trb() - Increment a TRB index.
+ * @index - Pointer to the TRB index to increment.
+ *
+ * The index should never point to the link TRB. After incrementing,
+ * if it is point to the link TRB, wrap around to the beginning. The
+ * link TRB is always at the last TRB entry.
+ */
+static void dwc3_ep_inc_trb(u8 *index)
 {
-	dep->trb_enqueue++;
-	dep->trb_enqueue %= DWC3_TRB_NUM;
+	(*index)++;
+	if (*index == (DWC3_TRB_NUM - 1))
+		*index = 0;
 }
 
-static void dwc3_ep_inc_deq(struct dwc3_ep *dep)
+static void dwc3_ep_inc_enq(struct dwc3_ep *dep)
 {
-	dep->trb_dequeue++;
-	dep->trb_dequeue %= DWC3_TRB_NUM;
+	dwc3_ep_inc_trb(&dep->trb_enqueue);
 }
 
-static int dwc3_ep_is_last_trb(unsigned int index)
+static void dwc3_ep_inc_deq(struct dwc3_ep *dep)
 {
-	return index == DWC3_TRB_NUM - 1;
+	dwc3_ep_inc_trb(&dep->trb_dequeue);
 }
 
 void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
@@ -172,13 +180,6 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 		i = 0;
 		do {
 			dwc3_ep_inc_deq(dep);
-			/*
-			 * Skip LINK TRB. We can't use req->trb and check for
-			 * DWC3_TRBCTL_LINK_TRB because it points the TRB we
-			 * just completed (not the LINK TRB).
-			 */
-			if (dwc3_ep_is_last_trb(dep->trb_dequeue))
-				dwc3_ep_inc_deq(dep);
 		} while(++i < req->request.num_mapped_sgs);
 		req->started = false;
 	}
@@ -199,57 +200,54 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 	spin_unlock(&dwc->lock);
 	usb_gadget_giveback_request(&dep->endpoint, &req->request);
 	spin_lock(&dwc->lock);
+
+	if (dep->number > 1)
+		pm_runtime_put(dwc->dev);
 }
 
 int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned cmd, u32 param)
 {
 	u32		timeout = 500;
+	int		status = 0;
+	int		ret = 0;
 	u32		reg;
 
-	trace_dwc3_gadget_generic_cmd(cmd, param);
-
 	dwc3_writel(dwc->regs, DWC3_DGCMDPAR, param);
 	dwc3_writel(dwc->regs, DWC3_DGCMD, cmd | DWC3_DGCMD_CMDACT);
 
 	do {
 		reg = dwc3_readl(dwc->regs, DWC3_DGCMD);
 		if (!(reg & DWC3_DGCMD_CMDACT)) {
-			dwc3_trace(trace_dwc3_gadget,
-					"Command Complete --> %d",
-					DWC3_DGCMD_STATUS(reg));
-			if (DWC3_DGCMD_STATUS(reg))
-				return -EINVAL;
-			return 0;
+			status = DWC3_DGCMD_STATUS(reg);
+			if (status)
+				ret = -EINVAL;
+			break;
 		}
+	} while (timeout--);
 
-		/*
-		 * We can't sleep here, because it's also called from
-		 * interrupt context.
-		 */
-		timeout--;
-		if (!timeout) {
-			dwc3_trace(trace_dwc3_gadget,
-					"Command Timed Out");
-			return -ETIMEDOUT;
-		}
-		udelay(1);
-	} while (1);
+	if (!timeout) {
+		ret = -ETIMEDOUT;
+		status = -ETIMEDOUT;
+	}
+
+	trace_dwc3_gadget_generic_cmd(cmd, param, status);
+
+	return ret;
 }
 
 static int __dwc3_gadget_wakeup(struct dwc3 *dwc);
 
-int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
-		unsigned cmd, struct dwc3_gadget_ep_cmd_params *params)
+int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd,
+		struct dwc3_gadget_ep_cmd_params *params)
 {
-	struct dwc3_ep		*dep = dwc->eps[ep];
+	struct dwc3		*dwc = dep->dwc;
 	u32			timeout = 500;
 	u32			reg;
 
+	int			cmd_status = 0;
 	int			susphy = false;
 	int			ret = -EINVAL;
 
-	trace_dwc3_gadget_ep_cmd(dep, cmd, params);
-
 	/*
 	 * Synopsys Databook 2.60a states, on section 6.3.2.5.[1-8], that if
 	 * we're issuing an endpoint command, we must check if
@@ -258,11 +256,13 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
 	 * We will also set SUSPHY bit to what it was before returning as stated
 	 * by the same section on Synopsys databook.
 	 */
-	reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
-	if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) {
-		susphy = true;
-		reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
-		dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+	if (dwc->gadget.speed <= USB_SPEED_HIGH) {
+		reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+		if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) {
+			susphy = true;
+			reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+			dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+		}
 	}
 
 	if (cmd == DWC3_DEPCMD_STARTTRANSFER) {
@@ -279,26 +279,21 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
 		}
 	}
 
-	dwc3_writel(dwc->regs, DWC3_DEPCMDPAR0(ep), params->param0);
-	dwc3_writel(dwc->regs, DWC3_DEPCMDPAR1(ep), params->param1);
-	dwc3_writel(dwc->regs, DWC3_DEPCMDPAR2(ep), params->param2);
+	dwc3_writel(dep->regs, DWC3_DEPCMDPAR0, params->param0);
+	dwc3_writel(dep->regs, DWC3_DEPCMDPAR1, params->param1);
+	dwc3_writel(dep->regs, DWC3_DEPCMDPAR2, params->param2);
 
-	dwc3_writel(dwc->regs, DWC3_DEPCMD(ep), cmd | DWC3_DEPCMD_CMDACT);
+	dwc3_writel(dep->regs, DWC3_DEPCMD, cmd | DWC3_DEPCMD_CMDACT);
 	do {
-		reg = dwc3_readl(dwc->regs, DWC3_DEPCMD(ep));
+		reg = dwc3_readl(dep->regs, DWC3_DEPCMD);
 		if (!(reg & DWC3_DEPCMD_CMDACT)) {
-			int cmd_status = DWC3_DEPCMD_STATUS(reg);
-
-			dwc3_trace(trace_dwc3_gadget,
-					"Command Complete --> %d",
-					cmd_status);
+			cmd_status = DWC3_DEPCMD_STATUS(reg);
 
 			switch (cmd_status) {
 			case 0:
 				ret = 0;
 				break;
 			case DEPEVT_TRANSFER_NO_RESOURCE:
-				dwc3_trace(trace_dwc3_gadget, "%s: no resource available");
 				ret = -EINVAL;
 				break;
 			case DEPEVT_TRANSFER_BUS_EXPIRY:
@@ -313,7 +308,6 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
 				 * give a hint to the gadget driver that this is
 				 * the case by returning -EAGAIN.
 				 */
-				dwc3_trace(trace_dwc3_gadget, "%s: bus expiry");
 				ret = -EAGAIN;
 				break;
 			default:
@@ -322,21 +316,14 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
 
 			break;
 		}
+	} while (--timeout);
 
-		/*
-		 * We can't sleep here, because it is also called from
-		 * interrupt context.
-		 */
-		timeout--;
-		if (!timeout) {
-			dwc3_trace(trace_dwc3_gadget,
-					"Command Timed Out");
-			ret = -ETIMEDOUT;
-			break;
-		}
+	if (timeout == 0) {
+		ret = -ETIMEDOUT;
+		cmd_status = -ETIMEDOUT;
+	}
 
-		udelay(1);
-	} while (1);
+	trace_dwc3_gadget_ep_cmd(dep, cmd, params, cmd_status);
 
 	if (unlikely(susphy)) {
 		reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
@@ -366,7 +353,7 @@ static int dwc3_send_clear_stall_ep_cmd(struct dwc3_ep *dep)
 
 	memset(&params, 0, sizeof(params));
 
-	return dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, &params);
+	return dwc3_send_gadget_ep_cmd(dep, cmd, &params);
 }
 
 static dma_addr_t dwc3_trb_dma_offset(struct dwc3_ep *dep,
@@ -454,7 +441,7 @@ static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep)
 	memset(&params, 0x00, sizeof(params));
 	cmd = DWC3_DEPCMD_DEPSTARTCFG;
 
-	ret = dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+	ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
 	if (ret)
 		return ret;
 
@@ -475,10 +462,14 @@ static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep)
 static int dwc3_gadget_set_ep_config(struct dwc3 *dwc, struct dwc3_ep *dep,
 		const struct usb_endpoint_descriptor *desc,
 		const struct usb_ss_ep_comp_descriptor *comp_desc,
-		bool ignore, bool restore)
+		bool modify, bool restore)
 {
 	struct dwc3_gadget_ep_cmd_params params;
 
+	if (dev_WARN_ONCE(dwc->dev, modify && restore,
+					"Can't modify and restore\n"))
+		return -EINVAL;
+
 	memset(&params, 0x00, sizeof(params));
 
 	params.param0 = DWC3_DEPCFG_EP_TYPE(usb_endpoint_type(desc))
@@ -487,30 +478,22 @@ static int dwc3_gadget_set_ep_config(struct dwc3 *dwc, struct dwc3_ep *dep,
 	/* Burst size is only needed in SuperSpeed mode */
 	if (dwc->gadget.speed >= USB_SPEED_SUPER) {
 		u32 burst = dep->endpoint.maxburst;
-		u32 nump;
-		u32 reg;
-
-		/* update NumP */
-		reg = dwc3_readl(dwc->regs, DWC3_DCFG);
-		nump = DWC3_DCFG_NUMP(reg);
-		nump = max(nump, burst);
-		reg &= ~DWC3_DCFG_NUMP_MASK;
-		reg |= nump << DWC3_DCFG_NUMP_SHIFT;
-		dwc3_writel(dwc->regs, DWC3_DCFG, reg);
-
 		params.param0 |= DWC3_DEPCFG_BURST_SIZE(burst - 1);
 	}
 
-	if (ignore)
-		params.param0 |= DWC3_DEPCFG_IGN_SEQ_NUM;
-
-	if (restore) {
+	if (modify) {
+		params.param0 |= DWC3_DEPCFG_ACTION_MODIFY;
+	} else if (restore) {
 		params.param0 |= DWC3_DEPCFG_ACTION_RESTORE;
 		params.param2 |= dep->saved_state;
+	} else {
+		params.param0 |= DWC3_DEPCFG_ACTION_INIT;
 	}
 
-	params.param1 = DWC3_DEPCFG_XFER_COMPLETE_EN
-		| DWC3_DEPCFG_XFER_NOT_READY_EN;
+	params.param1 = DWC3_DEPCFG_XFER_COMPLETE_EN;
+
+	if (dep->number <= 1 || usb_endpoint_xfer_isoc(desc))
+		params.param1 |= DWC3_DEPCFG_XFER_NOT_READY_EN;
 
 	if (usb_ss_max_streams(comp_desc) && usb_endpoint_xfer_bulk(desc)) {
 		params.param1 |= DWC3_DEPCFG_STREAM_CAPABLE
@@ -541,8 +524,7 @@ static int dwc3_gadget_set_ep_config(struct dwc3 *dwc, struct dwc3_ep *dep,
 		dep->interval = 1 << (desc->bInterval - 1);
 	}
 
-	return dwc3_send_gadget_ep_cmd(dwc, dep->number,
-			DWC3_DEPCMD_SETEPCONFIG, &params);
+	return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETEPCONFIG, &params);
 }
 
 static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep)
@@ -553,8 +535,8 @@ static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep)
 
 	params.param0 = DWC3_DEPXFERCFG_NUM_XFER_RES(1);
 
-	return dwc3_send_gadget_ep_cmd(dwc, dep->number,
-			DWC3_DEPCMD_SETTRANSFRESOURCE, &params);
+	return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETTRANSFRESOURCE,
+			&params);
 }
 
 /**
@@ -567,7 +549,7 @@ static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep)
 static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
 		const struct usb_endpoint_descriptor *desc,
 		const struct usb_ss_ep_comp_descriptor *comp_desc,
-		bool ignore, bool restore)
+		bool modify, bool restore)
 {
 	struct dwc3		*dwc = dep->dwc;
 	u32			reg;
@@ -581,7 +563,7 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
 			return ret;
 	}
 
-	ret = dwc3_gadget_set_ep_config(dwc, dep, desc, comp_desc, ignore,
+	ret = dwc3_gadget_set_ep_config(dwc, dep, desc, comp_desc, modify,
 			restore);
 	if (ret)
 		return ret;
@@ -600,38 +582,24 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
 		dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
 
 		if (usb_endpoint_xfer_control(desc))
-			goto out;
+			return 0;
+
+		/* Initialize the TRB ring */
+		dep->trb_dequeue = 0;
+		dep->trb_enqueue = 0;
+		memset(dep->trb_pool, 0,
+		       sizeof(struct dwc3_trb) * DWC3_TRB_NUM);
 
 		/* Link TRB. The HWO bit is never reset */
 		trb_st_hw = &dep->trb_pool[0];
 
 		trb_link = &dep->trb_pool[DWC3_TRB_NUM - 1];
-		memset(trb_link, 0, sizeof(*trb_link));
-
 		trb_link->bpl = lower_32_bits(dwc3_trb_dma_offset(dep, trb_st_hw));
 		trb_link->bph = upper_32_bits(dwc3_trb_dma_offset(dep, trb_st_hw));
 		trb_link->ctrl |= DWC3_TRBCTL_LINK_TRB;
 		trb_link->ctrl |= DWC3_TRB_CTRL_HWO;
 	}
 
-out:
-	switch (usb_endpoint_type(desc)) {
-	case USB_ENDPOINT_XFER_CONTROL:
-		/* don't change name */
-		break;
-	case USB_ENDPOINT_XFER_ISOC:
-		strlcat(dep->name, "-isoc", sizeof(dep->name));
-		break;
-	case USB_ENDPOINT_XFER_BULK:
-		strlcat(dep->name, "-bulk", sizeof(dep->name));
-		break;
-	case USB_ENDPOINT_XFER_INT:
-		strlcat(dep->name, "-int", sizeof(dep->name));
-		break;
-	default:
-		dev_err(dwc->dev, "invalid endpoint transfer type\n");
-	}
-
 	return 0;
 }
 
@@ -640,15 +608,13 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
 {
 	struct dwc3_request		*req;
 
-	if (!list_empty(&dep->started_list)) {
-		dwc3_stop_active_transfer(dwc, dep->number, true);
+	dwc3_stop_active_transfer(dwc, dep->number, true);
 
-		/* - giveback all requests to gadget driver */
-		while (!list_empty(&dep->started_list)) {
-			req = next_request(&dep->started_list);
+	/* - giveback all requests to gadget driver */
+	while (!list_empty(&dep->started_list)) {
+		req = next_request(&dep->started_list);
 
-			dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
-		}
+		dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
 	}
 
 	while (!list_empty(&dep->pending_list)) {
@@ -689,10 +655,6 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
 	dep->type = 0;
 	dep->flags = 0;
 
-	snprintf(dep->name, sizeof(dep->name), "ep%d%s",
-			dep->number >> 1,
-			(dep->number & 1) ? "in" : "out");
-
 	return 0;
 }
 
@@ -784,6 +746,8 @@ static struct usb_request *dwc3_gadget_ep_alloc_request(struct usb_ep *ep,
 	req->epnum	= dep->number;
 	req->dep	= dep;
 
+	dep->allocated_requests++;
+
 	trace_dwc3_alloc_request(req);
 
 	return &req->request;
@@ -793,7 +757,9 @@ static void dwc3_gadget_ep_free_request(struct usb_ep *ep,
 		struct usb_request *request)
 {
 	struct dwc3_request		*req = to_dwc3_request(request);
+	struct dwc3_ep			*dep = to_dwc3_ep(ep);
 
+	dep->allocated_requests--;
 	trace_dwc3_free_request(req);
 	kfree(req);
 }
@@ -825,9 +791,6 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
 	}
 
 	dwc3_ep_inc_enq(dep);
-	/* Skip the LINK-TRB */
-	if (dwc3_ep_is_last_trb(dep->trb_enqueue))
-		dwc3_ep_inc_enq(dep);
 
 	trb->size = DWC3_TRB_SIZE_LENGTH(length);
 	trb->bpl = lower_32_bits(dma);
@@ -877,137 +840,169 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
 
 	trb->ctrl |= DWC3_TRB_CTRL_HWO;
 
+	dep->queued_requests++;
+
 	trace_dwc3_prepare_trb(dep, trb);
 }
 
-/*
- * dwc3_prepare_trbs - setup TRBs from requests
- * @dep: endpoint for which requests are being prepared
- * @starting: true if the endpoint is idle and no requests are queued.
+/**
+ * dwc3_ep_prev_trb() - Returns the previous TRB in the ring
+ * @dep: The endpoint with the TRB ring
+ * @index: The index of the current TRB in the ring
  *
- * The function goes through the requests list and sets up TRBs for the
- * transfers. The function returns once there are no more TRBs available or
- * it runs out of requests.
+ * Returns the TRB prior to the one pointed to by the index. If the
+ * index is 0, we will wrap backwards, skip the link TRB, and return
+ * the one just before that.
  */
-static void dwc3_prepare_trbs(struct dwc3_ep *dep, bool starting)
+static struct dwc3_trb *dwc3_ep_prev_trb(struct dwc3_ep *dep, u8 index)
 {
-	struct dwc3_request	*req, *n;
-	u32			trbs_left;
-	unsigned int		last_one = 0;
+	if (!index)
+		index = DWC3_TRB_NUM - 2;
+	else
+		index = dep->trb_enqueue - 1;
 
-	BUILD_BUG_ON_NOT_POWER_OF_2(DWC3_TRB_NUM);
+	return &dep->trb_pool[index];
+}
 
-	trbs_left = dep->trb_dequeue - dep->trb_enqueue;
+static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
+{
+	struct dwc3_trb		*tmp;
+	u8			trbs_left;
 
 	/*
-	 * If enqueue & dequeue are equal than it is either full or empty. If we
-	 * are starting to process requests then we are empty. Otherwise we are
-	 * full and don't do anything
+	 * If enqueue & dequeue are equal than it is either full or empty.
+	 *
+	 * One way to know for sure is if the TRB right before us has HWO bit
+	 * set or not. If it has, then we're definitely full and can't fit any
+	 * more transfers in our ring.
 	 */
-	if (!trbs_left) {
-		if (!starting)
-			return;
+	if (dep->trb_enqueue == dep->trb_dequeue) {
+		tmp = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
+		if (tmp->ctrl & DWC3_TRB_CTRL_HWO)
+			return 0;
 
-		trbs_left = DWC3_TRB_NUM;
+		return DWC3_TRB_NUM - 1;
 	}
 
-	/* The last TRB is a link TRB, not used for xfer */
-	if (trbs_left <= 1)
-		return;
+	trbs_left = dep->trb_dequeue - dep->trb_enqueue;
+	trbs_left &= (DWC3_TRB_NUM - 1);
 
-	list_for_each_entry_safe(req, n, &dep->pending_list, list) {
-		unsigned	length;
-		dma_addr_t	dma;
-		last_one = false;
-
-		if (req->request.num_mapped_sgs > 0) {
-			struct usb_request *request = &req->request;
-			struct scatterlist *sg = request->sg;
-			struct scatterlist *s;
-			int		i;
-
-			for_each_sg(sg, s, request->num_mapped_sgs, i) {
-				unsigned chain = true;
-
-				length = sg_dma_len(s);
-				dma = sg_dma_address(s);
-
-				if (i == (request->num_mapped_sgs - 1) ||
-						sg_is_last(s)) {
-					if (list_empty(&dep->pending_list))
-						last_one = true;
-					chain = false;
-				}
+	if (dep->trb_dequeue < dep->trb_enqueue)
+		trbs_left--;
 
-				trbs_left--;
-				if (!trbs_left)
-					last_one = true;
+	return trbs_left;
+}
 
-				if (last_one)
-					chain = false;
+static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep,
+		struct dwc3_request *req, unsigned int trbs_left,
+		unsigned int more_coming)
+{
+	struct usb_request *request = &req->request;
+	struct scatterlist *sg = request->sg;
+	struct scatterlist *s;
+	unsigned int	last = false;
+	unsigned int	length;
+	dma_addr_t	dma;
+	int		i;
 
-				dwc3_prepare_one_trb(dep, req, dma, length,
-						last_one, chain, i);
+	for_each_sg(sg, s, request->num_mapped_sgs, i) {
+		unsigned chain = true;
 
-				if (last_one)
-					break;
-			}
+		length = sg_dma_len(s);
+		dma = sg_dma_address(s);
 
-			if (last_one)
-				break;
-		} else {
-			dma = req->request.dma;
-			length = req->request.length;
-			trbs_left--;
+		if (sg_is_last(s)) {
+			if (usb_endpoint_xfer_int(dep->endpoint.desc) ||
+				!more_coming)
+				last = true;
 
-			if (!trbs_left)
-				last_one = 1;
+			chain = false;
+		}
 
-			/* Is this the last request? */
-			if (list_is_last(&req->list, &dep->pending_list))
-				last_one = 1;
+		if (!trbs_left--)
+			last = true;
 
-			dwc3_prepare_one_trb(dep, req, dma, length,
-					last_one, false, 0);
+		if (last)
+			chain = false;
 
-			if (last_one)
-				break;
-		}
+		dwc3_prepare_one_trb(dep, req, dma, length,
+				last, chain, i);
+
+		if (last)
+			break;
 	}
 }
 
-static int __dwc3_gadget_kick_transfer(struct dwc3_ep *dep, u16 cmd_param,
-		int start_new)
+static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep,
+		struct dwc3_request *req, unsigned int trbs_left,
+		unsigned int more_coming)
+{
+	unsigned int	last = false;
+	unsigned int	length;
+	dma_addr_t	dma;
+
+	dma = req->request.dma;
+	length = req->request.length;
+
+	if (!trbs_left)
+		last = true;
+
+	/* Is this the last request? */
+	if (usb_endpoint_xfer_int(dep->endpoint.desc) || !more_coming)
+		last = true;
+
+	dwc3_prepare_one_trb(dep, req, dma, length,
+			last, false, 0);
+}
+
+/*
+ * dwc3_prepare_trbs - setup TRBs from requests
+ * @dep: endpoint for which requests are being prepared
+ *
+ * The function goes through the requests list and sets up TRBs for the
+ * transfers. The function returns once there are no more TRBs available or
+ * it runs out of requests.
+ */
+static void dwc3_prepare_trbs(struct dwc3_ep *dep)
+{
+	struct dwc3_request	*req, *n;
+	unsigned int		more_coming;
+	u32			trbs_left;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(DWC3_TRB_NUM);
+
+	trbs_left = dwc3_calc_trbs_left(dep);
+	if (!trbs_left)
+		return;
+
+	more_coming = dep->allocated_requests - dep->queued_requests;
+
+	list_for_each_entry_safe(req, n, &dep->pending_list, list) {
+		if (req->request.num_mapped_sgs > 0)
+			dwc3_prepare_one_trb_sg(dep, req, trbs_left--,
+					more_coming);
+		else
+			dwc3_prepare_one_trb_linear(dep, req, trbs_left--,
+					more_coming);
+
+		if (!trbs_left)
+			return;
+	}
+}
+
+static int __dwc3_gadget_kick_transfer(struct dwc3_ep *dep, u16 cmd_param)
 {
 	struct dwc3_gadget_ep_cmd_params params;
 	struct dwc3_request		*req;
 	struct dwc3			*dwc = dep->dwc;
+	int				starting;
 	int				ret;
 	u32				cmd;
 
-	if (start_new && (dep->flags & DWC3_EP_BUSY)) {
-		dwc3_trace(trace_dwc3_gadget, "%s: endpoint busy", dep->name);
-		return -EBUSY;
-	}
-
-	/*
-	 * If we are getting here after a short-out-packet we don't enqueue any
-	 * new requests as we try to set the IOC bit only on the last request.
-	 */
-	if (start_new) {
-		if (list_empty(&dep->started_list))
-			dwc3_prepare_trbs(dep, start_new);
-
-		/* req points to the first request which will be sent */
-		req = next_request(&dep->started_list);
-	} else {
-		dwc3_prepare_trbs(dep, start_new);
+	starting = !(dep->flags & DWC3_EP_BUSY);
 
-		/*
-		 * req points to the first request where HWO changed from 0 to 1
-		 */
-		req = next_request(&dep->started_list);
-	}
+	dwc3_prepare_trbs(dep);
+	req = next_request(&dep->started_list);
 	if (!req) {
 		dep->flags |= DWC3_EP_PENDING_REQUEST;
 		return 0;
@@ -1015,16 +1010,17 @@ static int __dwc3_gadget_kick_transfer(struct dwc3_ep *dep, u16 cmd_param,
 
 	memset(&params, 0, sizeof(params));
 
-	if (start_new) {
+	if (starting) {
 		params.param0 = upper_32_bits(req->trb_dma);
 		params.param1 = lower_32_bits(req->trb_dma);
-		cmd = DWC3_DEPCMD_STARTTRANSFER;
+		cmd = DWC3_DEPCMD_STARTTRANSFER |
+			DWC3_DEPCMD_PARAM(cmd_param);
 	} else {
-		cmd = DWC3_DEPCMD_UPDATETRANSFER;
+		cmd = DWC3_DEPCMD_UPDATETRANSFER |
+			DWC3_DEPCMD_PARAM(dep->resource_index);
 	}
 
-	cmd |= DWC3_DEPCMD_PARAM(cmd_param);
-	ret = dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, &params);
+	ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
 	if (ret < 0) {
 		/*
 		 * FIXME we need to iterate over the list of requests
@@ -1039,9 +1035,8 @@ static int __dwc3_gadget_kick_transfer(struct dwc3_ep *dep, u16 cmd_param,
 
 	dep->flags |= DWC3_EP_BUSY;
 
-	if (start_new) {
-		dep->resource_index = dwc3_gadget_ep_get_transfer_index(dwc,
-				dep->number);
+	if (starting) {
+		dep->resource_index = dwc3_gadget_ep_get_transfer_index(dep);
 		WARN_ON_ONCE(!dep->resource_index);
 	}
 
@@ -1064,7 +1059,7 @@ static void __dwc3_gadget_start_isoc(struct dwc3 *dwc,
 	/* 4 micro frames in the future */
 	uf = cur_uf + dep->interval * 4;
 
-	__dwc3_gadget_kick_transfer(dep, uf, 1);
+	__dwc3_gadget_kick_transfer(dep, uf);
 }
 
 static void dwc3_gadget_start_isoc(struct dwc3 *dwc,
@@ -1085,18 +1080,20 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 
 	if (!dep->endpoint.desc) {
 		dwc3_trace(trace_dwc3_gadget,
-				"trying to queue request %p to disabled %s\n",
+				"trying to queue request %p to disabled %s",
 				&req->request, dep->endpoint.name);
 		return -ESHUTDOWN;
 	}
 
 	if (WARN(req->dep != dep, "request %p belongs to '%s'\n",
 				&req->request, req->dep->name)) {
-		dwc3_trace(trace_dwc3_gadget, "request %p belongs to '%s'\n",
+		dwc3_trace(trace_dwc3_gadget, "request %p belongs to '%s'",
 				&req->request, req->dep->name);
 		return -EINVAL;
 	}
 
+	pm_runtime_get(dwc->dev);
+
 	req->request.actual	= 0;
 	req->request.status	= -EINPROGRESS;
 	req->direction		= dep->direction;
@@ -1131,9 +1128,8 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 	 * little bit faster.
 	 */
 	if (!usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
-			!usb_endpoint_xfer_int(dep->endpoint.desc) &&
-			!(dep->flags & DWC3_EP_BUSY)) {
-		ret = __dwc3_gadget_kick_transfer(dep, 0, true);
+			!usb_endpoint_xfer_int(dep->endpoint.desc)) {
+		ret = __dwc3_gadget_kick_transfer(dep, 0);
 		goto out;
 	}
 
@@ -1163,7 +1159,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 			return 0;
 		}
 
-		ret = __dwc3_gadget_kick_transfer(dep, 0, true);
+		ret = __dwc3_gadget_kick_transfer(dep, 0);
 		if (!ret)
 			dep->flags &= ~DWC3_EP_PENDING_REQUEST;
 
@@ -1179,8 +1175,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 			(dep->flags & DWC3_EP_BUSY) &&
 			!(dep->flags & DWC3_EP_MISSED_ISOC)) {
 		WARN_ON_ONCE(!dep->resource_index);
-		ret = __dwc3_gadget_kick_transfer(dep, dep->resource_index,
-				false);
+		ret = __dwc3_gadget_kick_transfer(dep, dep->resource_index);
 		goto out;
 	}
 
@@ -1190,12 +1185,12 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 	 * handled.
 	 */
 	if (dep->stream_capable)
-		ret = __dwc3_gadget_kick_transfer(dep, 0, true);
+		ret = __dwc3_gadget_kick_transfer(dep, 0);
 
 out:
 	if (ret && ret != -EBUSY)
 		dwc3_trace(trace_dwc3_gadget,
-				"%s: failed to kick transfers\n",
+				"%s: failed to kick transfers",
 				dep->name);
 	if (ret == -EBUSY)
 		ret = 0;
@@ -1215,7 +1210,7 @@ static int __dwc3_gadget_ep_queue_zlp(struct dwc3 *dwc, struct dwc3_ep *dep)
 	struct usb_request		*request;
 	struct usb_ep			*ep = &dep->endpoint;
 
-	dwc3_trace(trace_dwc3_gadget, "queueing ZLP\n");
+	dwc3_trace(trace_dwc3_gadget, "queueing ZLP");
 	request = dwc3_gadget_ep_alloc_request(ep, GFP_ATOMIC);
 	if (!request)
 		return -ENOMEM;
@@ -1319,23 +1314,36 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
 	memset(&params, 0x00, sizeof(params));
 
 	if (value) {
-		if (!protocol && ((dep->direction && dep->flags & DWC3_EP_BUSY) ||
-				(!list_empty(&dep->started_list) ||
-				 !list_empty(&dep->pending_list)))) {
+		struct dwc3_trb *trb;
+
+		unsigned transfer_in_flight;
+		unsigned started;
+
+		if (dep->number > 1)
+			trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
+		else
+			trb = &dwc->ep0_trb[dep->trb_enqueue];
+
+		transfer_in_flight = trb->ctrl & DWC3_TRB_CTRL_HWO;
+		started = !list_empty(&dep->started_list);
+
+		if (!protocol && ((dep->direction && transfer_in_flight) ||
+				(!dep->direction && started))) {
 			dwc3_trace(trace_dwc3_gadget,
 					"%s: pending request, cannot halt",
 					dep->name);
 			return -EAGAIN;
 		}
 
-		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
-			DWC3_DEPCMD_SETSTALL, &params);
+		ret = dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETSTALL,
+				&params);
 		if (ret)
 			dev_err(dwc->dev, "failed to set STALL on %s\n",
 					dep->name);
 		else
 			dep->flags |= DWC3_EP_STALL;
 	} else {
+
 		ret = dwc3_send_clear_stall_ep_cmd(dep);
 		if (ret)
 			dev_err(dwc->dev, "failed to clear STALL on %s\n",
@@ -1444,8 +1452,8 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc)
 	speed = reg & DWC3_DSTS_CONNECTSPD;
 	if ((speed == DWC3_DSTS_SUPERSPEED) ||
 	    (speed == DWC3_DSTS_SUPERSPEED_PLUS)) {
-		dwc3_trace(trace_dwc3_gadget, "no wakeup on SuperSpeed\n");
-		return -EINVAL;
+		dwc3_trace(trace_dwc3_gadget, "no wakeup on SuperSpeed");
+		return 0;
 	}
 
 	link_state = DWC3_DSTS_USBLNKST(reg);
@@ -1456,7 +1464,7 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc)
 		break;
 	default:
 		dwc3_trace(trace_dwc3_gadget,
-				"can't wakeup from '%s'\n",
+				"can't wakeup from '%s'",
 				dwc3_gadget_link_string(link_state));
 		return -EINVAL;
 	}
@@ -1525,6 +1533,9 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
 	u32			reg;
 	u32			timeout = 500;
 
+	if (pm_runtime_suspended(dwc->dev))
+		return 0;
+
 	reg = dwc3_readl(dwc->regs, DWC3_DCTL);
 	if (is_on) {
 		if (dwc->revision <= DWC3_REVISION_187A) {
@@ -1553,18 +1564,11 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
 
 	do {
 		reg = dwc3_readl(dwc->regs, DWC3_DSTS);
-		if (is_on) {
-			if (!(reg & DWC3_DSTS_DEVCTRLHLT))
-				break;
-		} else {
-			if (reg & DWC3_DSTS_DEVCTRLHLT)
-				break;
-		}
-		timeout--;
-		if (!timeout)
-			return -ETIMEDOUT;
-		udelay(1);
-	} while (1);
+		reg &= DWC3_DSTS_DEVCTRLHLT;
+	} while (--timeout && !(!is_on ^ !reg));
+
+	if (!timeout)
+		return -ETIMEDOUT;
 
 	dwc3_trace(trace_dwc3_gadget, "gadget %s data soft-%s",
 			dwc->gadget_driver
@@ -1616,36 +1620,52 @@ static void dwc3_gadget_disable_irq(struct dwc3 *dwc)
 static irqreturn_t dwc3_interrupt(int irq, void *_dwc);
 static irqreturn_t dwc3_thread_interrupt(int irq, void *_dwc);
 
-static int dwc3_gadget_start(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+/**
+ * dwc3_gadget_setup_nump - Calculate and initialize NUMP field of DCFG
+ * dwc: pointer to our context structure
+ *
+ * The following looks like complex but it's actually very simple. In order to
+ * calculate the number of packets we can burst at once on OUT transfers, we're
+ * gonna use RxFIFO size.
+ *
+ * To calculate RxFIFO size we need two numbers:
+ * MDWIDTH = size, in bits, of the internal memory bus
+ * RAM2_DEPTH = depth, in MDWIDTH, of internal RAM2 (where RxFIFO sits)
+ *
+ * Given these two numbers, the formula is simple:
+ *
+ * RxFIFO Size = (RAM2_DEPTH * MDWIDTH / 8) - 24 - 16;
+ *
+ * 24 bytes is for 3x SETUP packets
+ * 16 bytes is a clock domain crossing tolerance
+ *
+ * Given RxFIFO Size, NUMP = RxFIFOSize / 1024;
+ */
+static void dwc3_gadget_setup_nump(struct dwc3 *dwc)
 {
-	struct dwc3		*dwc = gadget_to_dwc(g);
-	struct dwc3_ep		*dep;
-	unsigned long		flags;
-	int			ret = 0;
-	int			irq;
-	u32			reg;
+	u32 ram2_depth;
+	u32 mdwidth;
+	u32 nump;
+	u32 reg;
 
-	irq = platform_get_irq(to_platform_device(dwc->dev), 0);
-	ret = request_threaded_irq(irq, dwc3_interrupt, dwc3_thread_interrupt,
-			IRQF_SHARED, "dwc3", dwc->ev_buf);
-	if (ret) {
-		dev_err(dwc->dev, "failed to request irq #%d --> %d\n",
-				irq, ret);
-		goto err0;
-	}
+	ram2_depth = DWC3_GHWPARAMS7_RAM2_DEPTH(dwc->hwparams.hwparams7);
+	mdwidth = DWC3_GHWPARAMS0_MDWIDTH(dwc->hwparams.hwparams0);
 
-	spin_lock_irqsave(&dwc->lock, flags);
+	nump = ((ram2_depth * mdwidth / 8) - 24 - 16) / 1024;
+	nump = min_t(u32, nump, 16);
 
-	if (dwc->gadget_driver) {
-		dev_err(dwc->dev, "%s is already bound to %s\n",
-				dwc->gadget.name,
-				dwc->gadget_driver->driver.name);
-		ret = -EBUSY;
-		goto err1;
-	}
+	/* update NumP */
+	reg = dwc3_readl(dwc->regs, DWC3_DCFG);
+	reg &= ~DWC3_DCFG_NUMP_MASK;
+	reg |= nump << DWC3_DCFG_NUMP_SHIFT;
+	dwc3_writel(dwc->regs, DWC3_DCFG, reg);
+}
 
-	dwc->gadget_driver	= driver;
+static int __dwc3_gadget_start(struct dwc3 *dwc)
+{
+	struct dwc3_ep		*dep;
+	int			ret = 0;
+	u32			reg;
 
 	reg = dwc3_readl(dwc->regs, DWC3_DCFG);
 	reg &= ~(DWC3_DCFG_SPEED_MASK);
@@ -1668,16 +1688,16 @@ static int dwc3_gadget_start(struct usb_gadget *g,
 	} else {
 		switch (dwc->maximum_speed) {
 		case USB_SPEED_LOW:
-			reg |= DWC3_DSTS_LOWSPEED;
+			reg |= DWC3_DCFG_LOWSPEED;
 			break;
 		case USB_SPEED_FULL:
-			reg |= DWC3_DSTS_FULLSPEED1;
+			reg |= DWC3_DCFG_FULLSPEED1;
 			break;
 		case USB_SPEED_HIGH:
-			reg |= DWC3_DSTS_HIGHSPEED;
+			reg |= DWC3_DCFG_HIGHSPEED;
 			break;
 		case USB_SPEED_SUPER_PLUS:
-			reg |= DWC3_DSTS_SUPERSPEED_PLUS;
+			reg |= DWC3_DCFG_SUPERSPEED_PLUS;
 			break;
 		default:
 			dev_err(dwc->dev, "invalid dwc->maximum_speed (%d)\n",
@@ -1701,6 +1721,8 @@ static int dwc3_gadget_start(struct usb_gadget *g,
 	reg &= ~DWC3_GRXTHRCFG_PKTCNTSEL;
 	dwc3_writel(dwc->regs, DWC3_GRXTHRCFG, reg);
 
+	dwc3_gadget_setup_nump(dwc);
+
 	/* Start with SuperSpeed Default */
 	dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512);
 
@@ -1709,7 +1731,7 @@ static int dwc3_gadget_start(struct usb_gadget *g,
 			false);
 	if (ret) {
 		dev_err(dwc->dev, "failed to enable %s\n", dep->name);
-		goto err2;
+		goto err0;
 	}
 
 	dep = dwc->eps[1];
@@ -1717,7 +1739,7 @@ static int dwc3_gadget_start(struct usb_gadget *g,
 			false);
 	if (ret) {
 		dev_err(dwc->dev, "failed to enable %s\n", dep->name);
-		goto err3;
+		goto err1;
 	}
 
 	/* begin to receive SETUP packets */
@@ -1726,43 +1748,79 @@ static int dwc3_gadget_start(struct usb_gadget *g,
 
 	dwc3_gadget_enable_irq(dwc);
 
-	spin_unlock_irqrestore(&dwc->lock, flags);
-
 	return 0;
 
-err3:
-	__dwc3_gadget_ep_disable(dwc->eps[0]);
-
-err2:
-	dwc->gadget_driver = NULL;
-
 err1:
-	spin_unlock_irqrestore(&dwc->lock, flags);
-
-	free_irq(irq, dwc->ev_buf);
+	__dwc3_gadget_ep_disable(dwc->eps[0]);
 
 err0:
 	return ret;
 }
 
-static int dwc3_gadget_stop(struct usb_gadget *g)
+static int dwc3_gadget_start(struct usb_gadget *g,
+		struct usb_gadget_driver *driver)
 {
 	struct dwc3		*dwc = gadget_to_dwc(g);
 	unsigned long		flags;
+	int			ret = 0;
 	int			irq;
 
+	irq = dwc->irq_gadget;
+	ret = request_threaded_irq(irq, dwc3_interrupt, dwc3_thread_interrupt,
+			IRQF_SHARED, "dwc3", dwc->ev_buf);
+	if (ret) {
+		dev_err(dwc->dev, "failed to request irq #%d --> %d\n",
+				irq, ret);
+		goto err0;
+	}
+
 	spin_lock_irqsave(&dwc->lock, flags);
+	if (dwc->gadget_driver) {
+		dev_err(dwc->dev, "%s is already bound to %s\n",
+				dwc->gadget.name,
+				dwc->gadget_driver->driver.name);
+		ret = -EBUSY;
+		goto err1;
+	}
+
+	dwc->gadget_driver	= driver;
+
+	if (pm_runtime_active(dwc->dev))
+		__dwc3_gadget_start(dwc);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	return 0;
+
+err1:
+	spin_unlock_irqrestore(&dwc->lock, flags);
+	free_irq(irq, dwc);
+
+err0:
+	return ret;
+}
+
+static void __dwc3_gadget_stop(struct dwc3 *dwc)
+{
+	if (pm_runtime_suspended(dwc->dev))
+		return;
 
 	dwc3_gadget_disable_irq(dwc);
 	__dwc3_gadget_ep_disable(dwc->eps[0]);
 	__dwc3_gadget_ep_disable(dwc->eps[1]);
+}
 
-	dwc->gadget_driver	= NULL;
+static int dwc3_gadget_stop(struct usb_gadget *g)
+{
+	struct dwc3		*dwc = gadget_to_dwc(g);
+	unsigned long		flags;
 
+	spin_lock_irqsave(&dwc->lock, flags);
+	__dwc3_gadget_stop(dwc);
+	dwc->gadget_driver	= NULL;
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
-	irq = platform_get_irq(to_platform_device(dwc->dev), 0);
-	free_irq(irq, dwc->ev_buf);
+	free_irq(dwc->irq_gadget, dwc->ev_buf);
 
 	return 0;
 }
@@ -1785,7 +1843,7 @@ static int dwc3_gadget_init_hw_endpoints(struct dwc3 *dwc,
 	u8				i;
 
 	for (i = 0; i < num; i++) {
-		u8 epnum = (i << 1) | (!!direction);
+		u8 epnum = (i << 1) | (direction ? 1 : 0);
 
 		dep = kzalloc(sizeof(*dep), GFP_KERNEL);
 		if (!dep)
@@ -1794,12 +1852,14 @@ static int dwc3_gadget_init_hw_endpoints(struct dwc3 *dwc,
 		dep->dwc = dwc;
 		dep->number = epnum;
 		dep->direction = !!direction;
+		dep->regs = dwc->regs + DWC3_DEP_BASE(epnum);
 		dwc->eps[epnum] = dep;
 
 		snprintf(dep->name, sizeof(dep->name), "ep%d%s", epnum >> 1,
 				(epnum & 1) ? "in" : "out");
 
 		dep->endpoint.name = dep->name;
+		spin_lock_init(&dep->lock);
 
 		dwc3_trace(trace_dwc3_gadget, "initializing %s", dep->name);
 
@@ -1901,6 +1961,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
 	unsigned int		s_pkt = 0;
 	unsigned int		trb_status;
 
+	dep->queued_requests--;
 	trace_dwc3_complete_trb(dep, trb);
 
 	if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
@@ -1921,7 +1982,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
 			trb_status = DWC3_TRB_SIZE_TRBSTS(trb->size);
 			if (trb_status == DWC3_TRBSTS_MISSED_ISOC) {
 				dwc3_trace(trace_dwc3_gadget,
-						"%s: incomplete IN transfer\n",
+						"%s: incomplete IN transfer",
 						dep->name);
 				/*
 				 * If missed isoc occurred and there is
@@ -2006,6 +2067,14 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
 			break;
 	} while (1);
 
+	/*
+	 * Our endpoint might get disabled by another thread during
+	 * dwc3_gadget_giveback(). If that happens, we're just gonna return 1
+	 * early on so DWC3_EP_BUSY flag gets cleared
+	 */
+	if (!dep->endpoint.desc)
+		return 1;
+
 	if (usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
 			list_empty(&dep->started_list)) {
 		if (list_empty(&dep->pending_list)) {
@@ -2023,6 +2092,10 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
 		return 1;
 	}
 
+	if (usb_endpoint_xfer_isoc(dep->endpoint.desc))
+		if ((event->status & DEPEVT_STATUS_IOC) &&
+				(trb->ctrl & DWC3_TRB_CTRL_IOC))
+			return 0;
 	return 1;
 }
 
@@ -2039,7 +2112,7 @@ static void dwc3_endpoint_transfer_complete(struct dwc3 *dwc,
 		status = -ECONNRESET;
 
 	clean_busy = dwc3_cleanup_done_reqs(dwc, dep, event, status);
-	if (clean_busy && (is_xfer_complete ||
+	if (clean_busy && (!dep->endpoint.desc || is_xfer_complete ||
 				usb_endpoint_xfer_isoc(dep->endpoint.desc)))
 		dep->flags &= ~DWC3_EP_BUSY;
 
@@ -2068,10 +2141,18 @@ static void dwc3_endpoint_transfer_complete(struct dwc3 *dwc,
 		dwc->u1u2 = 0;
 	}
 
+	/*
+	 * Our endpoint might get disabled by another thread during
+	 * dwc3_gadget_giveback(). If that happens, we're just gonna return 1
+	 * early on so DWC3_EP_BUSY flag gets cleared
+	 */
+	if (!dep->endpoint.desc)
+		return;
+
 	if (!usb_endpoint_xfer_isoc(dep->endpoint.desc)) {
 		int ret;
 
-		ret = __dwc3_gadget_kick_transfer(dep, 0, is_xfer_complete);
+		ret = __dwc3_gadget_kick_transfer(dep, 0);
 		if (!ret || ret == -EBUSY)
 			return;
 	}
@@ -2099,7 +2180,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
 
 		if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) {
 			dwc3_trace(trace_dwc3_gadget,
-					"%s is an Isochronous endpoint\n",
+					"%s is an Isochronous endpoint",
 					dep->name);
 			return;
 		}
@@ -2122,12 +2203,12 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
 					dep->name, active ? "Transfer Active"
 					: "Transfer Not Active");
 
-			ret = __dwc3_gadget_kick_transfer(dep, 0, !active);
+			ret = __dwc3_gadget_kick_transfer(dep, 0);
 			if (!ret || ret == -EBUSY)
 				return;
 
 			dwc3_trace(trace_dwc3_gadget,
-					"%s: failed to kick transfers\n",
+					"%s: failed to kick transfers",
 					dep->name);
 		}
 
@@ -2150,11 +2231,11 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
 			/* FALLTHROUGH */
 		default:
 			dwc3_trace(trace_dwc3_gadget,
-					"unable to find suitable stream\n");
+					"unable to find suitable stream");
 		}
 		break;
 	case DWC3_DEPEVT_RXTXFIFOEVT:
-		dwc3_trace(trace_dwc3_gadget, "%s FIFO Overrun\n", dep->name);
+		dwc3_trace(trace_dwc3_gadget, "%s FIFO Overrun", dep->name);
 		break;
 	case DWC3_DEPEVT_EPCMDCMPLT:
 		dwc3_trace(trace_dwc3_gadget, "Endpoint Command Complete");
@@ -2237,7 +2318,7 @@ static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
 	cmd |= DWC3_DEPCMD_CMDIOC;
 	cmd |= DWC3_DEPCMD_PARAM(dep->resource_index);
 	memset(&params, 0, sizeof(params));
-	ret = dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, &params);
+	ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
 	WARN_ON_ONCE(ret);
 	dep->resource_index = 0;
 	dep->flags &= ~DWC3_EP_BUSY;
@@ -2300,12 +2381,16 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
 	dwc->gadget.speed = USB_SPEED_UNKNOWN;
 	dwc->setup_packet_pending = false;
 	usb_gadget_set_state(&dwc->gadget, USB_STATE_NOTATTACHED);
+
+	dwc->connected = false;
 }
 
 static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
 {
 	u32			reg;
 
+	dwc->connected = true;
+
 	/*
 	 * WORKAROUND: DWC3 revisions <1.88a have an issue which
 	 * would cause a missing Disconnect Event if there's a
@@ -2393,12 +2478,12 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
 	dwc3_update_ram_clk_sel(dwc, speed);
 
 	switch (speed) {
-	case DWC3_DCFG_SUPERSPEED_PLUS:
+	case DWC3_DSTS_SUPERSPEED_PLUS:
 		dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512);
 		dwc->gadget.ep0->maxpacket = 512;
 		dwc->gadget.speed = USB_SPEED_SUPER_PLUS;
 		break;
-	case DWC3_DCFG_SUPERSPEED:
+	case DWC3_DSTS_SUPERSPEED:
 		/*
 		 * WORKAROUND: DWC3 revisions <1.90a have an issue which
 		 * would cause a missing USB3 Reset event.
@@ -2419,18 +2504,18 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
 		dwc->gadget.ep0->maxpacket = 512;
 		dwc->gadget.speed = USB_SPEED_SUPER;
 		break;
-	case DWC3_DCFG_HIGHSPEED:
+	case DWC3_DSTS_HIGHSPEED:
 		dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(64);
 		dwc->gadget.ep0->maxpacket = 64;
 		dwc->gadget.speed = USB_SPEED_HIGH;
 		break;
-	case DWC3_DCFG_FULLSPEED2:
-	case DWC3_DCFG_FULLSPEED1:
+	case DWC3_DSTS_FULLSPEED2:
+	case DWC3_DSTS_FULLSPEED1:
 		dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(64);
 		dwc->gadget.ep0->maxpacket = 64;
 		dwc->gadget.speed = USB_SPEED_FULL;
 		break;
-	case DWC3_DCFG_LOWSPEED:
+	case DWC3_DSTS_LOWSPEED:
 		dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(8);
 		dwc->gadget.ep0->maxpacket = 8;
 		dwc->gadget.speed = USB_SPEED_LOW;
@@ -2440,8 +2525,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
 	/* Enable USB2 LPM Capability */
 
 	if ((dwc->revision > DWC3_REVISION_194A) &&
-	    (speed != DWC3_DCFG_SUPERSPEED) &&
-	    (speed != DWC3_DCFG_SUPERSPEED_PLUS)) {
+	    (speed != DWC3_DSTS_SUPERSPEED) &&
+	    (speed != DWC3_DSTS_SUPERSPEED_PLUS)) {
 		reg = dwc3_readl(dwc->regs, DWC3_DCFG);
 		reg |= DWC3_DCFG_LPM_CAP;
 		dwc3_writel(dwc->regs, DWC3_DCFG, reg);
@@ -2610,6 +2695,17 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc,
 	dwc->link_state = next;
 }
 
+static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc,
+					  unsigned int evtinfo)
+{
+	enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK;
+
+	if (dwc->link_state != next && next == DWC3_LINK_STATE_U3)
+		dwc3_suspend_gadget(dwc);
+
+	dwc->link_state = next;
+}
+
 static void dwc3_gadget_hibernation_interrupt(struct dwc3 *dwc,
 		unsigned int evtinfo)
 {
@@ -2661,7 +2757,20 @@ static void dwc3_gadget_interrupt(struct dwc3 *dwc,
 		dwc3_gadget_linksts_change_interrupt(dwc, event->event_info);
 		break;
 	case DWC3_DEVICE_EVENT_EOPF:
-		dwc3_trace(trace_dwc3_gadget, "End of Periodic Frame");
+		/* It changed to be suspend event for version 2.30a and above */
+		if (dwc->revision < DWC3_REVISION_230A) {
+			dwc3_trace(trace_dwc3_gadget, "End of Periodic Frame");
+		} else {
+			dwc3_trace(trace_dwc3_gadget, "U3/L1-L2 Suspend Event");
+
+			/*
+			 * Ignore suspend event until the gadget enters into
+			 * USB_STATE_CONFIGURED state.
+			 */
+			if (dwc->gadget.state >= USB_STATE_CONFIGURED)
+				dwc3_gadget_suspend_interrupt(dwc,
+						event->event_info);
+		}
 		break;
 	case DWC3_DEVICE_EVENT_SOF:
 		dwc3_trace(trace_dwc3_gadget, "Start of Periodic Frame");
@@ -2767,6 +2876,13 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
 	u32 count;
 	u32 reg;
 
+	if (pm_runtime_suspended(dwc->dev)) {
+		pm_runtime_get(dwc->dev);
+		disable_irq_nosync(dwc->irq_gadget);
+		dwc->pending_events = true;
+		return IRQ_HANDLED;
+	}
+
 	count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
 	count &= DWC3_GEVNTCOUNT_MASK;
 	if (!count)
@@ -2798,7 +2914,33 @@ static irqreturn_t dwc3_interrupt(int irq, void *_evt)
  */
 int dwc3_gadget_init(struct dwc3 *dwc)
 {
-	int					ret;
+	int ret, irq;
+	struct platform_device *dwc3_pdev = to_platform_device(dwc->dev);
+
+	irq = platform_get_irq_byname(dwc3_pdev, "peripheral");
+	if (irq == -EPROBE_DEFER)
+		return irq;
+
+	if (irq <= 0) {
+		irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3");
+		if (irq == -EPROBE_DEFER)
+			return irq;
+
+		if (irq <= 0) {
+			irq = platform_get_irq(dwc3_pdev, 0);
+			if (irq <= 0) {
+				if (irq != -EPROBE_DEFER) {
+					dev_err(dwc->dev,
+						"missing peripheral IRQ\n");
+				}
+				if (!irq)
+					irq = -EINVAL;
+				return irq;
+			}
+		}
+	}
+
+	dwc->irq_gadget = irq;
 
 	dwc->ctrl_req = dma_alloc_coherent(dwc->dev, sizeof(*dwc->ctrl_req),
 			&dwc->ctrl_req_addr, GFP_KERNEL);
@@ -2861,7 +3003,7 @@ int dwc3_gadget_init(struct dwc3 *dwc)
 	 */
 	if (dwc->revision < DWC3_REVISION_220A)
 		dwc3_trace(trace_dwc3_gadget,
-				"Changing max_speed on rev %08x\n",
+				"Changing max_speed on rev %08x",
 				dwc->revision);
 
 	dwc->gadget.max_speed		= dwc->maximum_speed;
@@ -2935,61 +3077,50 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
 
 int dwc3_gadget_suspend(struct dwc3 *dwc)
 {
+	int ret;
+
 	if (!dwc->gadget_driver)
 		return 0;
 
-	if (dwc->pullups_connected) {
-		dwc3_gadget_disable_irq(dwc);
-		dwc3_gadget_run_stop(dwc, true, true);
-	}
-
-	__dwc3_gadget_ep_disable(dwc->eps[0]);
-	__dwc3_gadget_ep_disable(dwc->eps[1]);
+	ret = dwc3_gadget_run_stop(dwc, false, false);
+	if (ret < 0)
+		return ret;
 
-	dwc->dcfg = dwc3_readl(dwc->regs, DWC3_DCFG);
+	dwc3_disconnect_gadget(dwc);
+	__dwc3_gadget_stop(dwc);
 
 	return 0;
 }
 
 int dwc3_gadget_resume(struct dwc3 *dwc)
 {
-	struct dwc3_ep		*dep;
 	int			ret;
 
 	if (!dwc->gadget_driver)
 		return 0;
 
-	/* Start with SuperSpeed Default */
-	dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512);
-
-	dep = dwc->eps[0];
-	ret = __dwc3_gadget_ep_enable(dep, &dwc3_gadget_ep0_desc, NULL, false,
-			false);
-	if (ret)
+	ret = __dwc3_gadget_start(dwc);
+	if (ret < 0)
 		goto err0;
 
-	dep = dwc->eps[1];
-	ret = __dwc3_gadget_ep_enable(dep, &dwc3_gadget_ep0_desc, NULL, false,
-			false);
-	if (ret)
+	ret = dwc3_gadget_run_stop(dwc, true, false);
+	if (ret < 0)
 		goto err1;
 
-	/* begin to receive SETUP packets */
-	dwc->ep0state = EP0_SETUP_PHASE;
-	dwc3_ep0_out_start(dwc);
-
-	dwc3_writel(dwc->regs, DWC3_DCFG, dwc->dcfg);
-
-	if (dwc->pullups_connected) {
-		dwc3_gadget_enable_irq(dwc);
-		dwc3_gadget_run_stop(dwc, true, false);
-	}
-
 	return 0;
 
 err1:
-	__dwc3_gadget_ep_disable(dwc->eps[0]);
+	__dwc3_gadget_stop(dwc);
 
 err0:
 	return ret;
 }
+
+void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
+{
+	if (dwc->pending_events) {
+		dwc3_interrupt(dwc->irq_gadget, dwc->ev_buf);
+		dwc->pending_events = false;
+		enable_irq(dwc->irq_gadget);
+	}
+}
diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h
index f21c0fccbebd..e4a1d974a5ae 100644
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -95,11 +95,11 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol);
  *
  * Caller should take care of locking
  */
-static inline u32 dwc3_gadget_ep_get_transfer_index(struct dwc3 *dwc, u8 number)
+static inline u32 dwc3_gadget_ep_get_transfer_index(struct dwc3_ep *dep)
 {
 	u32			res_id;
 
-	res_id = dwc3_readl(dwc->regs, DWC3_DEPCMD(number));
+	res_id = dwc3_readl(dep->regs, DWC3_DEPCMD);
 
 	return DWC3_DEPCMD_GET_RSC_IDX(res_id);
 }
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index c679f63783ae..f6533c68fed1 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -16,15 +16,55 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/usb/xhci_pdriver.h>
 
 #include "core.h"
 
 int dwc3_host_init(struct dwc3 *dwc)
 {
+	struct property_entry	props[2];
 	struct platform_device	*xhci;
-	struct usb_xhci_pdata	pdata;
-	int			ret;
+	int			ret, irq;
+	struct resource		*res;
+	struct platform_device	*dwc3_pdev = to_platform_device(dwc->dev);
+
+	irq = platform_get_irq_byname(dwc3_pdev, "host");
+	if (irq == -EPROBE_DEFER)
+		return irq;
+
+	if (irq <= 0) {
+		irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3");
+		if (irq == -EPROBE_DEFER)
+			return irq;
+
+		if (irq <= 0) {
+			irq = platform_get_irq(dwc3_pdev, 0);
+			if (irq <= 0) {
+				if (irq != -EPROBE_DEFER) {
+					dev_err(dwc->dev,
+						"missing host IRQ\n");
+				}
+				if (!irq)
+					irq = -EINVAL;
+				return irq;
+			} else {
+				res = platform_get_resource(dwc3_pdev,
+							    IORESOURCE_IRQ, 0);
+			}
+		} else {
+			res = platform_get_resource_byname(dwc3_pdev,
+							   IORESOURCE_IRQ,
+							   "dwc_usb3");
+		}
+
+	} else {
+		res = platform_get_resource_byname(dwc3_pdev, IORESOURCE_IRQ,
+						   "host");
+	}
+
+	dwc->xhci_resources[1].start = irq;
+	dwc->xhci_resources[1].end = irq;
+	dwc->xhci_resources[1].flags = res->flags;
+	dwc->xhci_resources[1].name = res->name;
 
 	xhci = platform_device_alloc("xhci-hcd", PLATFORM_DEVID_AUTO);
 	if (!xhci) {
@@ -47,14 +87,15 @@ int dwc3_host_init(struct dwc3 *dwc)
 		goto err1;
 	}
 
-	memset(&pdata, 0, sizeof(pdata));
-
-	pdata.usb3_lpm_capable = dwc->usb3_lpm_capable;
+	memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props));
 
-	ret = platform_device_add_data(xhci, &pdata, sizeof(pdata));
-	if (ret) {
-		dev_err(dwc->dev, "couldn't add platform data to xHCI device\n");
-		goto err1;
+	if (dwc->usb3_lpm_capable) {
+		props[0].name = "usb3-lpm-capable";
+		ret = platform_device_add_properties(xhci, props);
+		if (ret) {
+			dev_err(dwc->dev, "failed to add properties to xHCI\n");
+			goto err1;
+		}
 	}
 
 	phy_create_lookup(dwc->usb2_generic_phy, "usb2-phy",
diff --git a/drivers/usb/dwc3/io.h b/drivers/usb/dwc3/io.h
index 6a79c8e66bbc..a06f9a8fecc7 100644
--- a/drivers/usb/dwc3/io.h
+++ b/drivers/usb/dwc3/io.h
@@ -26,7 +26,6 @@
 
 static inline u32 dwc3_readl(void __iomem *base, u32 offset)
 {
-	u32 offs = offset - DWC3_GLOBALS_REGS_START;
 	u32 value;
 
 	/*
@@ -34,7 +33,7 @@ static inline u32 dwc3_readl(void __iomem *base, u32 offset)
 	 * space, see dwc3_probe in core.c.
 	 * However, the offsets are given starting from xHCI address space.
 	 */
-	value = readl(base + offs);
+	value = readl(base + offset - DWC3_GLOBALS_REGS_START);
 
 	/*
 	 * When tracing we want to make it easy to find the correct address on
@@ -49,14 +48,12 @@ static inline u32 dwc3_readl(void __iomem *base, u32 offset)
 
 static inline void dwc3_writel(void __iomem *base, u32 offset, u32 value)
 {
-	u32 offs = offset - DWC3_GLOBALS_REGS_START;
-
 	/*
 	 * We requested the mem region starting from the Globals address
 	 * space, see dwc3_probe in core.c.
 	 * However, the offsets are given starting from xHCI address space.
 	 */
-	writel(value, base + offs);
+	writel(value, base + offset - DWC3_GLOBALS_REGS_START);
 
 	/*
 	 * When tracing we want to make it easy to find the correct address on
diff --git a/drivers/usb/dwc3/platform_data.h b/drivers/usb/dwc3/platform_data.h
deleted file mode 100644
index 8826cca5fc6f..000000000000
--- a/drivers/usb/dwc3/platform_data.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * platform_data.h - USB DWC3 Platform Data Support
- *
- * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
- * Author: Felipe Balbi <balbi@ti.com>
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2  of
- * the License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/usb/ch9.h>
-#include <linux/usb/otg.h>
-
-struct dwc3_platform_data {
-	enum usb_device_speed maximum_speed;
-	enum usb_dr_mode dr_mode;
-	bool usb3_lpm_capable;
-
-	unsigned is_utmi_l1_suspend:1;
-	u8 hird_threshold;
-
-	u8 lpm_nyet_threshold;
-
-	unsigned disable_scramble_quirk:1;
-	unsigned has_lpm_erratum:1;
-	unsigned u2exit_lfps_quirk:1;
-	unsigned u2ss_inp3_quirk:1;
-	unsigned req_p1p2p3_quirk:1;
-	unsigned del_p1p2p3_quirk:1;
-	unsigned del_phy_power_chg_quirk:1;
-	unsigned lfps_filter_quirk:1;
-	unsigned rx_detect_poll_quirk:1;
-	unsigned dis_u3_susphy_quirk:1;
-	unsigned dis_u2_susphy_quirk:1;
-	unsigned dis_enblslpm_quirk:1;
-	unsigned dis_rxdet_inp3_quirk:1;
-
-	unsigned tx_de_emphasis_quirk:1;
-	unsigned tx_de_emphasis:2;
-
-	u32 fladj_value;
-
-	const char *hsphy_interface;
-};
diff --git a/drivers/usb/dwc3/trace.h b/drivers/usb/dwc3/trace.h
index 3ac7252f4427..d24cefd191b5 100644
--- a/drivers/usb/dwc3/trace.h
+++ b/drivers/usb/dwc3/trace.h
@@ -71,7 +71,8 @@ DECLARE_EVENT_CLASS(dwc3_log_event,
 	TP_fast_assign(
 		__entry->event = event;
 	),
-	TP_printk("event %08x", __entry->event)
+	TP_printk("event (%08x): %s", __entry->event,
+			dwc3_decode_event(__entry->event))
 );
 
 DEFINE_EVENT(dwc3_log_event, dwc3_event,
@@ -85,21 +86,21 @@ DECLARE_EVENT_CLASS(dwc3_log_ctrl,
 	TP_STRUCT__entry(
 		__field(__u8, bRequestType)
 		__field(__u8, bRequest)
-		__field(__le16, wValue)
-		__field(__le16, wIndex)
-		__field(__le16, wLength)
+		__field(__u16, wValue)
+		__field(__u16, wIndex)
+		__field(__u16, wLength)
 	),
 	TP_fast_assign(
 		__entry->bRequestType = ctrl->bRequestType;
 		__entry->bRequest = ctrl->bRequest;
-		__entry->wValue = ctrl->wValue;
-		__entry->wIndex = ctrl->wIndex;
-		__entry->wLength = ctrl->wLength;
+		__entry->wValue = le16_to_cpu(ctrl->wValue);
+		__entry->wIndex = le16_to_cpu(ctrl->wIndex);
+		__entry->wLength = le16_to_cpu(ctrl->wLength);
 	),
 	TP_printk("bRequestType %02x bRequest %02x wValue %04x wIndex %04x wLength %d",
 		__entry->bRequestType, __entry->bRequest,
-		le16_to_cpu(__entry->wValue), le16_to_cpu(__entry->wIndex),
-		le16_to_cpu(__entry->wLength)
+		__entry->wValue, __entry->wIndex,
+		__entry->wLength
 	)
 );
 
@@ -166,37 +167,41 @@ DEFINE_EVENT(dwc3_log_request, dwc3_gadget_giveback,
 );
 
 DECLARE_EVENT_CLASS(dwc3_log_generic_cmd,
-	TP_PROTO(unsigned int cmd, u32 param),
-	TP_ARGS(cmd, param),
+	TP_PROTO(unsigned int cmd, u32 param, int status),
+	TP_ARGS(cmd, param, status),
 	TP_STRUCT__entry(
 		__field(unsigned int, cmd)
 		__field(u32, param)
+		__field(int, status)
 	),
 	TP_fast_assign(
 		__entry->cmd = cmd;
 		__entry->param = param;
+		__entry->status = status;
 	),
-	TP_printk("cmd '%s' [%d] param %08x",
+	TP_printk("cmd '%s' [%d] param %08x --> status: %s",
 		dwc3_gadget_generic_cmd_string(__entry->cmd),
-		__entry->cmd, __entry->param
+		__entry->cmd, __entry->param,
+		dwc3_gadget_generic_cmd_status_string(__entry->status)
 	)
 );
 
 DEFINE_EVENT(dwc3_log_generic_cmd, dwc3_gadget_generic_cmd,
-	TP_PROTO(unsigned int cmd, u32 param),
-	TP_ARGS(cmd, param)
+	TP_PROTO(unsigned int cmd, u32 param, int status),
+	TP_ARGS(cmd, param, status)
 );
 
 DECLARE_EVENT_CLASS(dwc3_log_gadget_ep_cmd,
 	TP_PROTO(struct dwc3_ep *dep, unsigned int cmd,
-		struct dwc3_gadget_ep_cmd_params *params),
-	TP_ARGS(dep, cmd, params),
+		struct dwc3_gadget_ep_cmd_params *params, int cmd_status),
+	TP_ARGS(dep, cmd, params, cmd_status),
 	TP_STRUCT__entry(
 		__dynamic_array(char, name, DWC3_MSG_MAX)
 		__field(unsigned int, cmd)
 		__field(u32, param0)
 		__field(u32, param1)
 		__field(u32, param2)
+		__field(int, cmd_status)
 	),
 	TP_fast_assign(
 		snprintf(__get_str(name), DWC3_MSG_MAX, "%s", dep->name);
@@ -204,18 +209,20 @@ DECLARE_EVENT_CLASS(dwc3_log_gadget_ep_cmd,
 		__entry->param0 = params->param0;
 		__entry->param1 = params->param1;
 		__entry->param2 = params->param2;
+		__entry->cmd_status = cmd_status;
 	),
-	TP_printk("%s: cmd '%s' [%d] params %08x %08x %08x",
+	TP_printk("%s: cmd '%s' [%d] params %08x %08x %08x --> status: %s",
 		__get_str(name), dwc3_gadget_ep_cmd_string(__entry->cmd),
 		__entry->cmd, __entry->param0,
-		__entry->param1, __entry->param2
+		__entry->param1, __entry->param2,
+		dwc3_ep_cmd_status_string(__entry->cmd_status)
 	)
 );
 
 DEFINE_EVENT(dwc3_log_gadget_ep_cmd, dwc3_gadget_ep_cmd,
 	TP_PROTO(struct dwc3_ep *dep, unsigned int cmd,
-		struct dwc3_gadget_ep_cmd_params *params),
-	TP_ARGS(dep, cmd, params)
+		struct dwc3_gadget_ep_cmd_params *params, int cmd_status),
+	TP_ARGS(dep, cmd, params, cmd_status)
 );
 
 DECLARE_EVENT_CLASS(dwc3_log_trb,
@@ -224,6 +231,8 @@ DECLARE_EVENT_CLASS(dwc3_log_trb,
 	TP_STRUCT__entry(
 		__dynamic_array(char, name, DWC3_MSG_MAX)
 		__field(struct dwc3_trb *, trb)
+		__field(u32, allocated)
+		__field(u32, queued)
 		__field(u32, bpl)
 		__field(u32, bph)
 		__field(u32, size)
@@ -232,14 +241,53 @@ DECLARE_EVENT_CLASS(dwc3_log_trb,
 	TP_fast_assign(
 		snprintf(__get_str(name), DWC3_MSG_MAX, "%s", dep->name);
 		__entry->trb = trb;
+		__entry->allocated = dep->allocated_requests;
+		__entry->queued = dep->queued_requests;
 		__entry->bpl = trb->bpl;
 		__entry->bph = trb->bph;
 		__entry->size = trb->size;
 		__entry->ctrl = trb->ctrl;
 	),
-	TP_printk("%s: trb %p bph %08x bpl %08x size %08x ctrl %08x",
-		__get_str(name), __entry->trb, __entry->bph, __entry->bpl,
-		__entry->size, __entry->ctrl
+	TP_printk("%s: %d/%d trb %p buf %08x%08x size %d ctrl %08x (%c%c%c%c:%c%c:%s)",
+		__get_str(name), __entry->queued, __entry->allocated,
+		__entry->trb, __entry->bph, __entry->bpl,
+		__entry->size, __entry->ctrl,
+		__entry->ctrl & DWC3_TRB_CTRL_HWO ? 'H' : 'h',
+		__entry->ctrl & DWC3_TRB_CTRL_LST ? 'L' : 'l',
+		__entry->ctrl & DWC3_TRB_CTRL_CHN ? 'C' : 'c',
+		__entry->ctrl & DWC3_TRB_CTRL_CSP ? 'S' : 's',
+		__entry->ctrl & DWC3_TRB_CTRL_ISP_IMI ? 'S' : 's',
+		__entry->ctrl & DWC3_TRB_CTRL_IOC ? 'C' : 'c',
+		({char *s;
+		switch (__entry->ctrl & 0x3f0) {
+		case DWC3_TRBCTL_NORMAL:
+			s = "normal";
+			break;
+		case DWC3_TRBCTL_CONTROL_SETUP:
+			s = "setup";
+			break;
+		case DWC3_TRBCTL_CONTROL_STATUS2:
+			s = "status2";
+			break;
+		case DWC3_TRBCTL_CONTROL_STATUS3:
+			s = "status3";
+			break;
+		case DWC3_TRBCTL_CONTROL_DATA:
+			s = "data";
+			break;
+		case DWC3_TRBCTL_ISOCHRONOUS_FIRST:
+			s = "isoc-first";
+			break;
+		case DWC3_TRBCTL_ISOCHRONOUS:
+			s = "isoc";
+			break;
+		case DWC3_TRBCTL_LINK_TRB:
+			s = "link";
+			break;
+		default:
+			s = "UNKNOWN";
+			break;
+		} s; })
 	)
 );
 
diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 8cfc3191be50..12731e67d2c7 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -13,7 +13,7 @@
 
 #include <linux/console.h>
 #include <linux/errno.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
 #include <linux/usb/ch9.h>
@@ -1093,5 +1093,5 @@ static int __init kgdbdbgp_start_thread(void)
 
 	return 0;
 }
-module_init(kgdbdbgp_start_thread);
+device_initcall(kgdbdbgp_start_thread);
 #endif /* CONFIG_KGDB */
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 2057add439f0..3c3f31ceece7 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -114,7 +114,7 @@ config USB_GADGET_VBUS_DRAW
 
 config USB_GADGET_STORAGE_NUM_BUFFERS
 	int "Number of storage pipeline buffers"
-	range 2 32
+	range 2 256
 	default 2
 	help
 	   Usually 2 buffers are enough to establish a good buffering
diff --git a/drivers/usb/gadget/config.c b/drivers/usb/gadget/config.c
index e6c0542a063b..17a6077b89a4 100644
--- a/drivers/usb/gadget/config.c
+++ b/drivers/usb/gadget/config.c
@@ -93,7 +93,7 @@ int usb_gadget_config_buf(
 	*cp = *config;
 
 	/* then interface/endpoint/class/vendor/... */
-	len = usb_descriptor_fillbuf(USB_DT_CONFIG_SIZE + (u8*)buf,
+	len = usb_descriptor_fillbuf(USB_DT_CONFIG_SIZE + (u8 *)buf,
 			length - USB_DT_CONFIG_SIZE, desc);
 	if (len < 0)
 		return len;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index cc33d2667408..5c8429f23a89 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -130,6 +130,12 @@ struct ffs_epfile {
 
 	struct dentry			*dentry;
 
+	/*
+	 * Buffer for holding data from partial reads which may happen since
+	 * weâre rounding user read requests to a multiple of a max packet size.
+	 */
+	struct ffs_buffer		*read_buffer;	/* P: epfile->mutex */
+
 	char				name[5];
 
 	unsigned char			in;	/* P: ffs->eps_lock */
@@ -138,6 +144,12 @@ struct ffs_epfile {
 	unsigned char			_pad;
 };
 
+struct ffs_buffer {
+	size_t length;
+	char *data;
+	char storage[];
+};
+
 /*  ffs_io_data structure ***************************************************/
 
 struct ffs_io_data {
@@ -640,6 +652,49 @@ static void ffs_epfile_io_complete(struct usb_ep *_ep, struct usb_request *req)
 	}
 }
 
+static ssize_t ffs_copy_to_iter(void *data, int data_len, struct iov_iter *iter)
+{
+	ssize_t ret = copy_to_iter(data, data_len, iter);
+	if (likely(ret == data_len))
+		return ret;
+
+	if (unlikely(iov_iter_count(iter)))
+		return -EFAULT;
+
+	/*
+	 * Dear user space developer!
+	 *
+	 * TL;DR: To stop getting below error message in your kernel log, change
+	 * user space code using functionfs to align read buffers to a max
+	 * packet size.
+	 *
+	 * Some UDCs (e.g. dwc3) require request sizes to be a multiple of a max
+	 * packet size.  When unaligned buffer is passed to functionfs, it
+	 * internally uses a larger, aligned buffer so that such UDCs are happy.
+	 *
+	 * Unfortunately, this means that host may send more data than was
+	 * requested in read(2) system call.  f_fs doesnât know what to do with
+	 * that excess data so it simply drops it.
+	 *
+	 * Was the buffer aligned in the first place, no such problem would
+	 * happen.
+	 *
+	 * Data may be dropped only in AIO reads.  Synchronous reads are handled
+	 * by splitting a request into multiple parts.  This splitting may still
+	 * be a problem though so itâs likely best to align the buffer
+	 * regardless of it being AIO or not..
+	 *
+	 * This only affects OUT endpoints, i.e. reading data with a read(2),
+	 * aio_read(2) etc. system calls.  Writing data to an IN endpoint is not
+	 * affected.
+	 */
+	pr_err("functionfs read size %d > requested size %zd, dropping excess data. "
+	       "Align read buffer size to max packet size to avoid the problem.\n",
+	       data_len, ret);
+
+	return ret;
+}
+
 static void ffs_user_copy_worker(struct work_struct *work)
 {
 	struct ffs_io_data *io_data = container_of(work, struct ffs_io_data,
@@ -650,9 +705,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
 
 	if (io_data->read && ret > 0) {
 		use_mm(io_data->mm);
-		ret = copy_to_iter(io_data->buf, ret, &io_data->data);
-		if (ret != io_data->req->actual && iov_iter_count(&io_data->data))
-			ret = -EFAULT;
+		ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data);
 		unuse_mm(io_data->mm);
 	}
 
@@ -680,6 +733,58 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep,
 	schedule_work(&io_data->work);
 }
 
+/* Assumes epfile->mutex is held. */
+static ssize_t __ffs_epfile_read_buffered(struct ffs_epfile *epfile,
+					  struct iov_iter *iter)
+{
+	struct ffs_buffer *buf = epfile->read_buffer;
+	ssize_t ret;
+	if (!buf)
+		return 0;
+
+	ret = copy_to_iter(buf->data, buf->length, iter);
+	if (buf->length == ret) {
+		kfree(buf);
+		epfile->read_buffer = NULL;
+	} else if (unlikely(iov_iter_count(iter))) {
+		ret = -EFAULT;
+	} else {
+		buf->length -= ret;
+		buf->data += ret;
+	}
+	return ret;
+}
+
+/* Assumes epfile->mutex is held. */
+static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile,
+				      void *data, int data_len,
+				      struct iov_iter *iter)
+{
+	struct ffs_buffer *buf;
+
+	ssize_t ret = copy_to_iter(data, data_len, iter);
+	if (likely(data_len == ret))
+		return ret;
+
+	if (unlikely(iov_iter_count(iter)))
+		return -EFAULT;
+
+	/* See ffs_copy_to_iter for more context. */
+	pr_warn("functionfs read size %d > requested size %zd, splitting request into multiple reads.",
+		data_len, ret);
+
+	data_len -= ret;
+	buf = kmalloc(sizeof(*buf) + data_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	buf->length = data_len;
+	buf->data = buf->storage;
+	memcpy(buf->storage, data + ret, data_len);
+	epfile->read_buffer = buf;
+
+	return ret;
+}
+
 static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 {
 	struct ffs_epfile *epfile = file->private_data;
@@ -709,21 +814,40 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 	if (halt && epfile->isoc)
 		return -EINVAL;
 
+	/* We will be using request and read_buffer */
+	ret = ffs_mutex_lock(&epfile->mutex, file->f_flags & O_NONBLOCK);
+	if (unlikely(ret))
+		goto error;
+
 	/* Allocate & copy */
 	if (!halt) {
+		struct usb_gadget *gadget;
+
+		/*
+		 * Do we have buffered data from previous partial read?  Check
+		 * that for synchronous case only because we do not have
+		 * facility to âwake upâ a pending asynchronous read and push
+		 * buffered data to it which we would need to make things behave
+		 * consistently.
+		 */
+		if (!io_data->aio && io_data->read) {
+			ret = __ffs_epfile_read_buffered(epfile, &io_data->data);
+			if (ret)
+				goto error_mutex;
+		}
+
 		/*
 		 * if we _do_ wait above, the epfile->ffs->gadget might be NULL
 		 * before the waiting completes, so do not assign to 'gadget'
 		 * earlier
 		 */
-		struct usb_gadget *gadget = epfile->ffs->gadget;
-		size_t copied;
+		gadget = epfile->ffs->gadget;
 
 		spin_lock_irq(&epfile->ffs->eps_lock);
 		/* In the meantime, endpoint got disabled or changed. */
 		if (epfile->ep != ep) {
-			spin_unlock_irq(&epfile->ffs->eps_lock);
-			return -ESHUTDOWN;
+			ret = -ESHUTDOWN;
+			goto error_lock;
 		}
 		data_len = iov_iter_count(&io_data->data);
 		/*
@@ -735,22 +859,17 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 		spin_unlock_irq(&epfile->ffs->eps_lock);
 
 		data = kmalloc(data_len, GFP_KERNEL);
-		if (unlikely(!data))
-			return -ENOMEM;
-		if (!io_data->read) {
-			copied = copy_from_iter(data, data_len, &io_data->data);
-			if (copied != data_len) {
-				ret = -EFAULT;
-				goto error;
-			}
+		if (unlikely(!data)) {
+			ret = -ENOMEM;
+			goto error_mutex;
+		}
+		if (!io_data->read &&
+		    copy_from_iter(data, data_len, &io_data->data) != data_len) {
+			ret = -EFAULT;
+			goto error_mutex;
 		}
 	}
 
-	/* We will be using request */
-	ret = ffs_mutex_lock(&epfile->mutex, file->f_flags & O_NONBLOCK);
-	if (unlikely(ret))
-		goto error;
-
 	spin_lock_irq(&epfile->ffs->eps_lock);
 
 	if (epfile->ep != ep) {
@@ -803,18 +922,13 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 			interrupted = ep->status < 0;
 		}
 
-		/*
-		 * XXX We may end up silently droping data here.  Since data_len
-		 * (i.e. req->length) may be bigger than len (after being
-		 * rounded up to maxpacketsize), we may end up with more data
-		 * then user space has space for.
-		 */
-		ret = interrupted ? -EINTR : ep->status;
-		if (io_data->read && ret > 0) {
-			ret = copy_to_iter(data, ret, &io_data->data);
-			if (!ret)
-				ret = -EFAULT;
-		}
+		if (interrupted)
+			ret = -EINTR;
+		else if (io_data->read && ep->status > 0)
+			ret = __ffs_epfile_read_data(epfile, data, ep->status,
+						     &io_data->data);
+		else
+			ret = ep->status;
 		goto error_mutex;
 	} else if (!(req = usb_ep_alloc_request(ep->ep, GFP_KERNEL))) {
 		ret = -ENOMEM;
@@ -980,6 +1094,8 @@ ffs_epfile_release(struct inode *inode, struct file *file)
 
 	ENTER();
 
+	kfree(epfile->read_buffer);
+	epfile->read_buffer = NULL;
 	ffs_data_closed(epfile->ffs);
 
 	return 0;
@@ -1605,19 +1721,24 @@ static void ffs_func_eps_disable(struct ffs_function *func)
 	unsigned count            = func->ffs->eps_count;
 	unsigned long flags;
 
-	spin_lock_irqsave(&func->ffs->eps_lock, flags);
 	do {
+		if (epfile)
+			mutex_lock(&epfile->mutex);
+		spin_lock_irqsave(&func->ffs->eps_lock, flags);
 		/* pending requests get nuked */
 		if (likely(ep->ep))
 			usb_ep_disable(ep->ep);
 		++ep;
+		spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
 
 		if (epfile) {
 			epfile->ep = NULL;
+			kfree(epfile->read_buffer);
+			epfile->read_buffer = NULL;
+			mutex_unlock(&epfile->mutex);
 			++epfile;
 		}
 	} while (--count);
-	spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
 }
 
 static int ffs_func_eps_enable(struct ffs_function *func)
@@ -2227,8 +2348,8 @@ static int __ffs_data_got_strings(struct ffs_data *ffs,
 {
 	u32 str_count, needed_count, lang_count;
 	struct usb_gadget_strings **stringtabs, *t;
-	struct usb_string *strings, *s;
 	const char *data = _data;
+	struct usb_string *s;
 
 	ENTER();
 
@@ -2286,7 +2407,6 @@ static int __ffs_data_got_strings(struct ffs_data *ffs,
 		stringtabs = vla_ptr(vlabuf, d, stringtabs);
 		t = vla_ptr(vlabuf, d, stringtab);
 		s = vla_ptr(vlabuf, d, strings);
-		strings = s;
 	}
 
 	/* For each language */
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 5c6d4d7ca605..2505117e88e8 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2655,18 +2655,6 @@ void fsg_common_put(struct fsg_common *common)
 }
 EXPORT_SYMBOL_GPL(fsg_common_put);
 
-/* check if fsg_num_buffers is within a valid range */
-static inline int fsg_num_buffers_validate(unsigned int fsg_num_buffers)
-{
-#define FSG_MAX_NUM_BUFFERS	32
-
-	if (fsg_num_buffers >= 2 && fsg_num_buffers <= FSG_MAX_NUM_BUFFERS)
-		return 0;
-	pr_err("fsg_num_buffers %u is out of range (%d to %d)\n",
-	       fsg_num_buffers, 2, FSG_MAX_NUM_BUFFERS);
-	return -EINVAL;
-}
-
 static struct fsg_common *fsg_common_setup(struct fsg_common *common)
 {
 	if (!common) {
@@ -2709,11 +2697,7 @@ static void _fsg_common_free_buffers(struct fsg_buffhd *buffhds, unsigned n)
 int fsg_common_set_num_buffers(struct fsg_common *common, unsigned int n)
 {
 	struct fsg_buffhd *bh, *buffhds;
-	int i, rc;
-
-	rc = fsg_num_buffers_validate(n);
-	if (rc != 0)
-		return rc;
+	int i;
 
 	buffhds = kcalloc(n, sizeof(*buffhds), GFP_KERNEL);
 	if (!buffhds)
@@ -3401,10 +3385,6 @@ static ssize_t fsg_opts_num_buffers_store(struct config_item *item,
 	if (ret)
 		goto end;
 
-	ret = fsg_num_buffers_validate(num);
-	if (ret)
-		goto end;
-
 	fsg_common_set_num_buffers(opts->common, num);
 	ret = len;
 
diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index 3580f198df8b..6ded6345cd09 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -907,7 +907,6 @@ static int gs_write(struct tty_struct *tty, const unsigned char *buf, int count)
 {
 	struct gs_port	*port = tty->driver_data;
 	unsigned long	flags;
-	int		status;
 
 	pr_vdebug("gs_write: ttyGS%d (%p) writing %d bytes\n",
 			port->port_num, tty, count);
@@ -917,7 +916,7 @@ static int gs_write(struct tty_struct *tty, const unsigned char *buf, int count)
 		count = gs_buf_put(&port->port_write_buf, buf, count);
 	/* treat count == 0 as flush_chars() */
 	if (port->port_usb)
-		status = gs_start_tx(port);
+		gs_start_tx(port);
 	spin_unlock_irqrestore(&port->port_lock, flags);
 
 	return count;
diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
index 912694f3d54e..6377e9fee6e5 100644
--- a/drivers/usb/gadget/function/uvc_queue.c
+++ b/drivers/usb/gadget/function/uvc_queue.c
@@ -43,7 +43,7 @@
 
 static int uvc_queue_setup(struct vb2_queue *vq,
 			   unsigned int *nbuffers, unsigned int *nplanes,
-			   unsigned int sizes[], void *alloc_ctxs[])
+			   unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
 	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c
index f85639ef8a8f..6da7316f8e87 100644
--- a/drivers/usb/gadget/legacy/g_ffs.c
+++ b/drivers/usb/gadget/legacy/g_ffs.c
@@ -265,7 +265,7 @@ static void *functionfs_acquire_dev(struct ffs_dev *dev)
 {
 	if (!try_module_get(THIS_MODULE))
 		return ERR_PTR(-ENOENT);
-	
+
 	return NULL;
 }
 
@@ -275,7 +275,7 @@ static void functionfs_release_dev(struct ffs_dev *dev)
 }
 
 /*
- * The caller of this function takes ffs_lock 
+ * The caller of this function takes ffs_lock
  */
 static int functionfs_ready_callback(struct ffs_data *ffs)
 {
@@ -294,12 +294,12 @@ static int functionfs_ready_callback(struct ffs_data *ffs)
 		++missing_funcs;
 		gfs_registered = false;
 	}
-	
+
 	return ret;
 }
 
 /*
- * The caller of this function takes ffs_lock 
+ * The caller of this function takes ffs_lock
  */
 static void functionfs_closed_callback(struct ffs_data *ffs)
 {
@@ -347,17 +347,14 @@ static int gfs_bind(struct usb_composite_dev *cdev)
 
 #ifdef CONFIG_USB_FUNCTIONFS_RNDIS
 	{
-		struct f_rndis_opts *rndis_opts;
-
 		fi_rndis = usb_get_function_instance("rndis");
 		if (IS_ERR(fi_rndis)) {
 			ret = PTR_ERR(fi_rndis);
 			goto error;
 		}
-		rndis_opts = container_of(fi_rndis, struct f_rndis_opts,
-					  func_inst);
 #ifndef CONFIG_USB_FUNCTIONFS_ETH
-		net = rndis_opts->net;
+		net = container_of(fi_rndis, struct f_rndis_opts,
+				   func_inst)->net;
 #endif
 	}
 #endif
diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 7c289416f87d..658b8da60915 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -312,7 +312,7 @@ config USB_NET2272_DMA
 	  If unsure, say "N" here.  The driver works fine in PIO mode.
 
 config USB_NET2280
-	tristate "NetChip 228x / PLX USB338x"
+	tristate "NetChip NET228x / PLX USB3x8x"
 	depends on PCI
 	help
 	   NetChip 2280 / 2282 is a PCI based USB peripheral controller which
@@ -322,6 +322,8 @@ config USB_NET2280
 	   (for control transfers) and several endpoints with dedicated
 	   functions.
 
+	   PLX 2380 is a PCIe version of the PLX 2380.
+
 	   PLX 3380 / 3382 is a PCIe based USB peripheral controller which
 	   supports full, high speed USB 2.0 and super speed USB 3.0
 	   data transfers.
diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile
index dfee53446319..98e74ed9f555 100644
--- a/drivers/usb/gadget/udc/Makefile
+++ b/drivers/usb/gadget/udc/Makefile
@@ -1,3 +1,8 @@
+# define_trace.h needs to know how to find our header
+CFLAGS_trace.o			:= -I$(src)
+
+udc-core-y			:= core.o trace.o
+
 #
 # USB peripheral controller drivers
 #
diff --git a/drivers/usb/gadget/udc/amd5536udc.c b/drivers/usb/gadget/udc/amd5536udc.c
index 39d70b4a8958..ea03ca7ae29a 100644
--- a/drivers/usb/gadget/udc/amd5536udc.c
+++ b/drivers/usb/gadget/udc/amd5536udc.c
@@ -2340,7 +2340,6 @@ static irqreturn_t udc_data_in_isr(struct udc *dev, int ep_ix)
 	struct udc_ep *ep;
 	struct udc_request *req;
 	struct udc_data_dma *td;
-	unsigned dma_done;
 	unsigned len;
 
 	ep = &dev->ep[ep_ix];
@@ -2385,13 +2384,8 @@ static irqreturn_t udc_data_in_isr(struct udc *dev, int ep_ix)
 			 */
 			if (use_dma_ppb_du) {
 				td = udc_get_last_dma_desc(req);
-				if (td) {
-					dma_done =
-						AMD_GETBITS(td->status,
-						UDC_DMA_IN_STS_BS);
-					/* don't care DMA done */
+				if (td)
 					req->req.actual = req->req.length;
-				}
 			} else {
 				/* assume all bytes transferred */
 				req->req.actual = req->req.length;
@@ -3417,4 +3411,3 @@ module_pci_driver(udc_pci_driver);
 MODULE_DESCRIPTION(UDC_MOD_DESCRIPTION);
 MODULE_AUTHOR("Thomas Dahlmann");
 MODULE_LICENSE("GPL");
-
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 18569de06b04..bb1f6c8f0f01 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -1920,6 +1920,8 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
 
 	udc->errata = match->data;
 	udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
+	if (IS_ERR(udc->pmc))
+		udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9x5-pmc");
 	if (udc->errata && IS_ERR(udc->pmc))
 		return ERR_CAST(udc->pmc);
 
diff --git a/drivers/usb/gadget/udc/bdc/bdc_cmd.c b/drivers/usb/gadget/udc/bdc/bdc_cmd.c
index 6a4155c4bd86..4d5e9188beae 100644
--- a/drivers/usb/gadget/udc/bdc/bdc_cmd.c
+++ b/drivers/usb/gadget/udc/bdc/bdc_cmd.c
@@ -57,7 +57,6 @@ static int bdc_submit_cmd(struct bdc *bdc, u32 cmd_sc,
 					u32 param0, u32 param1,	u32 param2)
 {
 	u32 temp, cmd_status;
-	int reset_bdc = 0;
 	int ret;
 
 	temp = bdc_readl(bdc->regs, BDC_CMDSC);
@@ -94,7 +93,6 @@ static int bdc_submit_cmd(struct bdc *bdc, u32 cmd_sc,
 
 	case BDC_CMDS_INTL:
 		dev_err(bdc->dev, "BDC Internal error\n");
-		reset_bdc = 1;
 		ret = -ECONNRESET;
 		break;
 
@@ -102,7 +100,6 @@ static int bdc_submit_cmd(struct bdc *bdc, u32 cmd_sc,
 		dev_err(bdc->dev,
 			"command timedout waited for %dusec\n",
 			BDC_CMD_TIMEOUT);
-		reset_bdc = 1;
 		ret = -ECONNRESET;
 		break;
 	default:
diff --git a/drivers/usb/gadget/udc/bdc/bdc_ep.c b/drivers/usb/gadget/udc/bdc/bdc_ep.c
index d6199507f861..ccaa74ab6c0e 100644
--- a/drivers/usb/gadget/udc/bdc/bdc_ep.c
+++ b/drivers/usb/gadget/udc/bdc/bdc_ep.c
@@ -81,7 +81,7 @@ static void ep_bd_list_free(struct bdc_ep *ep, u32 num_tabs)
 			continue;
 		}
 		if (!bd_table->start_bd) {
-			dev_dbg(bdc->dev, "bd dma pool not allocted\n");
+			dev_dbg(bdc->dev, "bd dma pool not allocated\n");
 			continue;
 		}
 
@@ -702,11 +702,9 @@ static int ep0_queue(struct bdc_ep *ep, struct bdc_req *req)
 /* Queue data stage */
 static int ep0_queue_data_stage(struct bdc *bdc)
 {
-	struct usb_request *ep0_usb_req;
 	struct bdc_ep *ep;
 
 	dev_dbg(bdc->dev, "%s\n", __func__);
-	ep0_usb_req = &bdc->ep0_req.usb_req;
 	ep = bdc->bdc_ep_array[1];
 	bdc->ep0_req.ep = ep;
 	bdc->ep0_req.usb_req.complete = NULL;
@@ -1393,10 +1391,8 @@ static int ep0_set_sel(struct bdc *bdc,
 {
 	struct bdc_ep	*ep;
 	u16	wLength;
-	u16	wValue;
 
 	dev_dbg(bdc->dev, "%s\n", __func__);
-	wValue = le16_to_cpu(setup_pkt->wValue);
 	wLength = le16_to_cpu(setup_pkt->wLength);
 	if (unlikely(wLength != 6)) {
 		dev_err(bdc->dev, "%s Wrong wLength:%d\n", __func__, wLength);
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
new file mode 100644
index 000000000000..ff8685ea7219
--- /dev/null
+++ b/drivers/usb/gadget/udc/core.c
@@ -0,0 +1,1523 @@
+/**
+ * udc.c - Core UDC Framework
+ *
+ * Copyright (C) 2010 Texas Instruments
+ * Author: Felipe Balbi <balbi@ti.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+
+#include <linux/usb/ch9.h>
+#include <linux/usb/gadget.h>
+#include <linux/usb.h>
+
+#include "trace.h"
+
+/**
+ * struct usb_udc - describes one usb device controller
+ * @driver - the gadget driver pointer. For use by the class code
+ * @dev - the child device to the actual controller
+ * @gadget - the gadget. For use by the class code
+ * @list - for use by the udc class driver
+ * @vbus - for udcs who care about vbus status, this value is real vbus status;
+ * for udcs who do not care about vbus status, this value is always true
+ *
+ * This represents the internal data structure which is used by the UDC-class
+ * to hold information about udc driver and gadget together.
+ */
+struct usb_udc {
+	struct usb_gadget_driver	*driver;
+	struct usb_gadget		*gadget;
+	struct device			dev;
+	struct list_head		list;
+	bool				vbus;
+};
+
+static struct class *udc_class;
+static LIST_HEAD(udc_list);
+static LIST_HEAD(gadget_driver_pending_list);
+static DEFINE_MUTEX(udc_lock);
+
+static int udc_bind_to_driver(struct usb_udc *udc,
+		struct usb_gadget_driver *driver);
+
+/* ------------------------------------------------------------------------- */
+
+/**
+ * usb_ep_set_maxpacket_limit - set maximum packet size limit for endpoint
+ * @ep:the endpoint being configured
+ * @maxpacket_limit:value of maximum packet size limit
+ *
+ * This function should be used only in UDC drivers to initialize endpoint
+ * (usually in probe function).
+ */
+void usb_ep_set_maxpacket_limit(struct usb_ep *ep,
+					      unsigned maxpacket_limit)
+{
+	ep->maxpacket_limit = maxpacket_limit;
+	ep->maxpacket = maxpacket_limit;
+
+	trace_usb_ep_set_maxpacket_limit(ep, 0);
+}
+EXPORT_SYMBOL_GPL(usb_ep_set_maxpacket_limit);
+
+/**
+ * usb_ep_enable - configure endpoint, making it usable
+ * @ep:the endpoint being configured.  may not be the endpoint named "ep0".
+ *	drivers discover endpoints through the ep_list of a usb_gadget.
+ *
+ * When configurations are set, or when interface settings change, the driver
+ * will enable or disable the relevant endpoints.  while it is enabled, an
+ * endpoint may be used for i/o until the driver receives a disconnect() from
+ * the host or until the endpoint is disabled.
+ *
+ * the ep0 implementation (which calls this routine) must ensure that the
+ * hardware capabilities of each endpoint match the descriptor provided
+ * for it.  for example, an endpoint named "ep2in-bulk" would be usable
+ * for interrupt transfers as well as bulk, but it likely couldn't be used
+ * for iso transfers or for endpoint 14.  some endpoints are fully
+ * configurable, with more generic names like "ep-a".  (remember that for
+ * USB, "in" means "towards the USB master".)
+ *
+ * returns zero, or a negative error code.
+ */
+int usb_ep_enable(struct usb_ep *ep)
+{
+	int ret = 0;
+
+	if (ep->enabled)
+		goto out;
+
+	ret = ep->ops->enable(ep, ep->desc);
+	if (ret) {
+		ret = ret;
+		goto out;
+	}
+
+	ep->enabled = true;
+
+out:
+	trace_usb_ep_enable(ep, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_ep_enable);
+
+/**
+ * usb_ep_disable - endpoint is no longer usable
+ * @ep:the endpoint being unconfigured.  may not be the endpoint named "ep0".
+ *
+ * no other task may be using this endpoint when this is called.
+ * any pending and uncompleted requests will complete with status
+ * indicating disconnect (-ESHUTDOWN) before this call returns.
+ * gadget drivers must call usb_ep_enable() again before queueing
+ * requests to the endpoint.
+ *
+ * returns zero, or a negative error code.
+ */
+int usb_ep_disable(struct usb_ep *ep)
+{
+	int ret = 0;
+
+	if (!ep->enabled)
+		goto out;
+
+	ret = ep->ops->disable(ep);
+	if (ret) {
+		ret = ret;
+		goto out;
+	}
+
+	ep->enabled = false;
+
+out:
+	trace_usb_ep_disable(ep, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_ep_disable);
+
+/**
+ * usb_ep_alloc_request - allocate a request object to use with this endpoint
+ * @ep:the endpoint to be used with with the request
+ * @gfp_flags:GFP_* flags to use
+ *
+ * Request objects must be allocated with this call, since they normally
+ * need controller-specific setup and may even need endpoint-specific
+ * resources such as allocation of DMA descriptors.
+ * Requests may be submitted with usb_ep_queue(), and receive a single
+ * completion callback.  Free requests with usb_ep_free_request(), when
+ * they are no longer needed.
+ *
+ * Returns the request, or null if one could not be allocated.
+ */
+struct usb_request *usb_ep_alloc_request(struct usb_ep *ep,
+						       gfp_t gfp_flags)
+{
+	struct usb_request *req = NULL;
+
+	req = ep->ops->alloc_request(ep, gfp_flags);
+
+	trace_usb_ep_alloc_request(ep, req, req ? 0 : -ENOMEM);
+
+	return req;
+}
+EXPORT_SYMBOL_GPL(usb_ep_alloc_request);
+
+/**
+ * usb_ep_free_request - frees a request object
+ * @ep:the endpoint associated with the request
+ * @req:the request being freed
+ *
+ * Reverses the effect of usb_ep_alloc_request().
+ * Caller guarantees the request is not queued, and that it will
+ * no longer be requeued (or otherwise used).
+ */
+void usb_ep_free_request(struct usb_ep *ep,
+				       struct usb_request *req)
+{
+	ep->ops->free_request(ep, req);
+	trace_usb_ep_free_request(ep, req, 0);
+}
+EXPORT_SYMBOL_GPL(usb_ep_free_request);
+
+/**
+ * usb_ep_queue - queues (submits) an I/O request to an endpoint.
+ * @ep:the endpoint associated with the request
+ * @req:the request being submitted
+ * @gfp_flags: GFP_* flags to use in case the lower level driver couldn't
+ *	pre-allocate all necessary memory with the request.
+ *
+ * This tells the device controller to perform the specified request through
+ * that endpoint (reading or writing a buffer).  When the request completes,
+ * including being canceled by usb_ep_dequeue(), the request's completion
+ * routine is called to return the request to the driver.  Any endpoint
+ * (except control endpoints like ep0) may have more than one transfer
+ * request queued; they complete in FIFO order.  Once a gadget driver
+ * submits a request, that request may not be examined or modified until it
+ * is given back to that driver through the completion callback.
+ *
+ * Each request is turned into one or more packets.  The controller driver
+ * never merges adjacent requests into the same packet.  OUT transfers
+ * will sometimes use data that's already buffered in the hardware.
+ * Drivers can rely on the fact that the first byte of the request's buffer
+ * always corresponds to the first byte of some USB packet, for both
+ * IN and OUT transfers.
+ *
+ * Bulk endpoints can queue any amount of data; the transfer is packetized
+ * automatically.  The last packet will be short if the request doesn't fill it
+ * out completely.  Zero length packets (ZLPs) should be avoided in portable
+ * protocols since not all usb hardware can successfully handle zero length
+ * packets.  (ZLPs may be explicitly written, and may be implicitly written if
+ * the request 'zero' flag is set.)  Bulk endpoints may also be used
+ * for interrupt transfers; but the reverse is not true, and some endpoints
+ * won't support every interrupt transfer.  (Such as 768 byte packets.)
+ *
+ * Interrupt-only endpoints are less functional than bulk endpoints, for
+ * example by not supporting queueing or not handling buffers that are
+ * larger than the endpoint's maxpacket size.  They may also treat data
+ * toggle differently.
+ *
+ * Control endpoints ... after getting a setup() callback, the driver queues
+ * one response (even if it would be zero length).  That enables the
+ * status ack, after transferring data as specified in the response.  Setup
+ * functions may return negative error codes to generate protocol stalls.
+ * (Note that some USB device controllers disallow protocol stall responses
+ * in some cases.)  When control responses are deferred (the response is
+ * written after the setup callback returns), then usb_ep_set_halt() may be
+ * used on ep0 to trigger protocol stalls.  Depending on the controller,
+ * it may not be possible to trigger a status-stage protocol stall when the
+ * data stage is over, that is, from within the response's completion
+ * routine.
+ *
+ * For periodic endpoints, like interrupt or isochronous ones, the usb host
+ * arranges to poll once per interval, and the gadget driver usually will
+ * have queued some data to transfer at that time.
+ *
+ * Returns zero, or a negative error code.  Endpoints that are not enabled
+ * report errors; errors will also be
+ * reported when the usb peripheral is disconnected.
+ */
+int usb_ep_queue(struct usb_ep *ep,
+			       struct usb_request *req, gfp_t gfp_flags)
+{
+	int ret = 0;
+
+	if (WARN_ON_ONCE(!ep->enabled && ep->address)) {
+		ret = -ESHUTDOWN;
+		goto out;
+	}
+
+	ret = ep->ops->queue(ep, req, gfp_flags);
+
+out:
+	trace_usb_ep_queue(ep, req, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_ep_queue);
+
+/**
+ * usb_ep_dequeue - dequeues (cancels, unlinks) an I/O request from an endpoint
+ * @ep:the endpoint associated with the request
+ * @req:the request being canceled
+ *
+ * If the request is still active on the endpoint, it is dequeued and its
+ * completion routine is called (with status -ECONNRESET); else a negative
+ * error code is returned. This is guaranteed to happen before the call to
+ * usb_ep_dequeue() returns.
+ *
+ * Note that some hardware can't clear out write fifos (to unlink the request
+ * at the head of the queue) except as part of disconnecting from usb. Such
+ * restrictions prevent drivers from supporting configuration changes,
+ * even to configuration zero (a "chapter 9" requirement).
+ */
+int usb_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
+{
+	int ret;
+
+	ret = ep->ops->dequeue(ep, req);
+	trace_usb_ep_dequeue(ep, req, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_ep_dequeue);
+
+/**
+ * usb_ep_set_halt - sets the endpoint halt feature.
+ * @ep: the non-isochronous endpoint being stalled
+ *
+ * Use this to stall an endpoint, perhaps as an error report.
+ * Except for control endpoints,
+ * the endpoint stays halted (will not stream any data) until the host
+ * clears this feature; drivers may need to empty the endpoint's request
+ * queue first, to make sure no inappropriate transfers happen.
+ *
+ * Note that while an endpoint CLEAR_FEATURE will be invisible to the
+ * gadget driver, a SET_INTERFACE will not be.  To reset endpoints for the
+ * current altsetting, see usb_ep_clear_halt().  When switching altsettings,
+ * it's simplest to use usb_ep_enable() or usb_ep_disable() for the endpoints.
+ *
+ * Returns zero, or a negative error code.  On success, this call sets
+ * underlying hardware state that blocks data transfers.
+ * Attempts to halt IN endpoints will fail (returning -EAGAIN) if any
+ * transfer requests are still queued, or if the controller hardware
+ * (usually a FIFO) still holds bytes that the host hasn't collected.
+ */
+int usb_ep_set_halt(struct usb_ep *ep)
+{
+	int ret;
+
+	ret = ep->ops->set_halt(ep, 1);
+	trace_usb_ep_set_halt(ep, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_ep_set_halt);
+
+/**
+ * usb_ep_clear_halt - clears endpoint halt, and resets toggle
+ * @ep:the bulk or interrupt endpoint being reset
+ *
+ * Use this when responding to the standard usb "set interface" request,
+ * for endpoints that aren't reconfigured, after clearing any other state
+ * in the endpoint's i/o queue.
+ *
+ * Returns zero, or a negative error code.  On success, this call clears
+ * the underlying hardware state reflecting endpoint halt and data toggle.
+ * Note that some hardware can't support this request (like pxa2xx_udc),
+ * and accordingly can't correctly implement interface altsettings.
+ */
+int usb_ep_clear_halt(struct usb_ep *ep)
+{
+	int ret;
+
+	ret = ep->ops->set_halt(ep, 0);
+	trace_usb_ep_clear_halt(ep, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_ep_clear_halt);
+
+/**
+ * usb_ep_set_wedge - sets the halt feature and ignores clear requests
+ * @ep: the endpoint being wedged
+ *
+ * Use this to stall an endpoint and ignore CLEAR_FEATURE(HALT_ENDPOINT)
+ * requests. If the gadget driver clears the halt status, it will
+ * automatically unwedge the endpoint.
+ *
+ * Returns zero on success, else negative errno.
+ */
+int usb_ep_set_wedge(struct usb_ep *ep)
+{
+	int ret;
+
+	if (ep->ops->set_wedge)
+		ret = ep->ops->set_wedge(ep);
+	else
+		ret = ep->ops->set_halt(ep, 1);
+
+	trace_usb_ep_set_wedge(ep, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_ep_set_wedge);
+
+/**
+ * usb_ep_fifo_status - returns number of bytes in fifo, or error
+ * @ep: the endpoint whose fifo status is being checked.
+ *
+ * FIFO endpoints may have "unclaimed data" in them in certain cases,
+ * such as after aborted transfers.  Hosts may not have collected all
+ * the IN data written by the gadget driver (and reported by a request
+ * completion).  The gadget driver may not have collected all the data
+ * written OUT to it by the host.  Drivers that need precise handling for
+ * fault reporting or recovery may need to use this call.
+ *
+ * This returns the number of such bytes in the fifo, or a negative
+ * errno if the endpoint doesn't use a FIFO or doesn't support such
+ * precise handling.
+ */
+int usb_ep_fifo_status(struct usb_ep *ep)
+{
+	int ret;
+
+	if (ep->ops->fifo_status)
+		ret = ep->ops->fifo_status(ep);
+	else
+		ret = -EOPNOTSUPP;
+
+	trace_usb_ep_fifo_status(ep, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_ep_fifo_status);
+
+/**
+ * usb_ep_fifo_flush - flushes contents of a fifo
+ * @ep: the endpoint whose fifo is being flushed.
+ *
+ * This call may be used to flush the "unclaimed data" that may exist in
+ * an endpoint fifo after abnormal transaction terminations.  The call
+ * must never be used except when endpoint is not being used for any
+ * protocol translation.
+ */
+void usb_ep_fifo_flush(struct usb_ep *ep)
+{
+	if (ep->ops->fifo_flush)
+		ep->ops->fifo_flush(ep);
+
+	trace_usb_ep_fifo_flush(ep, 0);
+}
+EXPORT_SYMBOL_GPL(usb_ep_fifo_flush);
+
+/* ------------------------------------------------------------------------- */
+
+/**
+ * usb_gadget_frame_number - returns the current frame number
+ * @gadget: controller that reports the frame number
+ *
+ * Returns the usb frame number, normally eleven bits from a SOF packet,
+ * or negative errno if this device doesn't support this capability.
+ */
+int usb_gadget_frame_number(struct usb_gadget *gadget)
+{
+	int ret;
+
+	ret = gadget->ops->get_frame(gadget);
+
+	trace_usb_gadget_frame_number(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_frame_number);
+
+/**
+ * usb_gadget_wakeup - tries to wake up the host connected to this gadget
+ * @gadget: controller used to wake up the host
+ *
+ * Returns zero on success, else negative error code if the hardware
+ * doesn't support such attempts, or its support has not been enabled
+ * by the usb host.  Drivers must return device descriptors that report
+ * their ability to support this, or hosts won't enable it.
+ *
+ * This may also try to use SRP to wake the host and start enumeration,
+ * even if OTG isn't otherwise in use.  OTG devices may also start
+ * remote wakeup even when hosts don't explicitly enable it.
+ */
+int usb_gadget_wakeup(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (!gadget->ops->wakeup) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	ret = gadget->ops->wakeup(gadget);
+
+out:
+	trace_usb_gadget_wakeup(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_wakeup);
+
+/**
+ * usb_gadget_set_selfpowered - sets the device selfpowered feature.
+ * @gadget:the device being declared as self-powered
+ *
+ * this affects the device status reported by the hardware driver
+ * to reflect that it now has a local power supply.
+ *
+ * returns zero on success, else negative errno.
+ */
+int usb_gadget_set_selfpowered(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (!gadget->ops->set_selfpowered) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	ret = gadget->ops->set_selfpowered(gadget, 1);
+
+out:
+	trace_usb_gadget_set_selfpowered(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_set_selfpowered);
+
+/**
+ * usb_gadget_clear_selfpowered - clear the device selfpowered feature.
+ * @gadget:the device being declared as bus-powered
+ *
+ * this affects the device status reported by the hardware driver.
+ * some hardware may not support bus-powered operation, in which
+ * case this feature's value can never change.
+ *
+ * returns zero on success, else negative errno.
+ */
+int usb_gadget_clear_selfpowered(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (!gadget->ops->set_selfpowered) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	ret = gadget->ops->set_selfpowered(gadget, 0);
+
+out:
+	trace_usb_gadget_clear_selfpowered(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_clear_selfpowered);
+
+/**
+ * usb_gadget_vbus_connect - Notify controller that VBUS is powered
+ * @gadget:The device which now has VBUS power.
+ * Context: can sleep
+ *
+ * This call is used by a driver for an external transceiver (or GPIO)
+ * that detects a VBUS power session starting.  Common responses include
+ * resuming the controller, activating the D+ (or D-) pullup to let the
+ * host detect that a USB device is attached, and starting to draw power
+ * (8mA or possibly more, especially after SET_CONFIGURATION).
+ *
+ * Returns zero on success, else negative errno.
+ */
+int usb_gadget_vbus_connect(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (!gadget->ops->vbus_session) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	ret = gadget->ops->vbus_session(gadget, 1);
+
+out:
+	trace_usb_gadget_vbus_connect(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_vbus_connect);
+
+/**
+ * usb_gadget_vbus_draw - constrain controller's VBUS power usage
+ * @gadget:The device whose VBUS usage is being described
+ * @mA:How much current to draw, in milliAmperes.  This should be twice
+ *	the value listed in the configuration descriptor bMaxPower field.
+ *
+ * This call is used by gadget drivers during SET_CONFIGURATION calls,
+ * reporting how much power the device may consume.  For example, this
+ * could affect how quickly batteries are recharged.
+ *
+ * Returns zero on success, else negative errno.
+ */
+int usb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
+{
+	int ret = 0;
+
+	if (!gadget->ops->vbus_draw) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	ret = gadget->ops->vbus_draw(gadget, mA);
+	if (!ret)
+		gadget->mA = mA;
+
+out:
+	trace_usb_gadget_vbus_draw(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_vbus_draw);
+
+/**
+ * usb_gadget_vbus_disconnect - notify controller about VBUS session end
+ * @gadget:the device whose VBUS supply is being described
+ * Context: can sleep
+ *
+ * This call is used by a driver for an external transceiver (or GPIO)
+ * that detects a VBUS power session ending.  Common responses include
+ * reversing everything done in usb_gadget_vbus_connect().
+ *
+ * Returns zero on success, else negative errno.
+ */
+int usb_gadget_vbus_disconnect(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (!gadget->ops->vbus_session) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	ret = gadget->ops->vbus_session(gadget, 0);
+
+out:
+	trace_usb_gadget_vbus_disconnect(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect);
+
+/**
+ * usb_gadget_connect - software-controlled connect to USB host
+ * @gadget:the peripheral being connected
+ *
+ * Enables the D+ (or potentially D-) pullup.  The host will start
+ * enumerating this gadget when the pullup is active and a VBUS session
+ * is active (the link is powered).  This pullup is always enabled unless
+ * usb_gadget_disconnect() has been used to disable it.
+ *
+ * Returns zero on success, else negative errno.
+ */
+int usb_gadget_connect(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (!gadget->ops->pullup) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (gadget->deactivated) {
+		/*
+		 * If gadget is deactivated we only save new state.
+		 * Gadget will be connected automatically after activation.
+		 */
+		gadget->connected = true;
+		goto out;
+	}
+
+	ret = gadget->ops->pullup(gadget, 1);
+	if (!ret)
+		gadget->connected = 1;
+
+out:
+	trace_usb_gadget_connect(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_connect);
+
+/**
+ * usb_gadget_disconnect - software-controlled disconnect from USB host
+ * @gadget:the peripheral being disconnected
+ *
+ * Disables the D+ (or potentially D-) pullup, which the host may see
+ * as a disconnect (when a VBUS session is active).  Not all systems
+ * support software pullup controls.
+ *
+ * Returns zero on success, else negative errno.
+ */
+int usb_gadget_disconnect(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (!gadget->ops->pullup) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (gadget->deactivated) {
+		/*
+		 * If gadget is deactivated we only save new state.
+		 * Gadget will stay disconnected after activation.
+		 */
+		gadget->connected = false;
+		goto out;
+	}
+
+	ret = gadget->ops->pullup(gadget, 0);
+	if (!ret)
+		gadget->connected = 0;
+
+out:
+	trace_usb_gadget_disconnect(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_disconnect);
+
+/**
+ * usb_gadget_deactivate - deactivate function which is not ready to work
+ * @gadget: the peripheral being deactivated
+ *
+ * This routine may be used during the gadget driver bind() call to prevent
+ * the peripheral from ever being visible to the USB host, unless later
+ * usb_gadget_activate() is called.  For example, user mode components may
+ * need to be activated before the system can talk to hosts.
+ *
+ * Returns zero on success, else negative errno.
+ */
+int usb_gadget_deactivate(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (gadget->deactivated)
+		goto out;
+
+	if (gadget->connected) {
+		ret = usb_gadget_disconnect(gadget);
+		if (ret)
+			goto out;
+
+		/*
+		 * If gadget was being connected before deactivation, we want
+		 * to reconnect it in usb_gadget_activate().
+		 */
+		gadget->connected = true;
+	}
+	gadget->deactivated = true;
+
+out:
+	trace_usb_gadget_deactivate(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_deactivate);
+
+/**
+ * usb_gadget_activate - activate function which is not ready to work
+ * @gadget: the peripheral being activated
+ *
+ * This routine activates gadget which was previously deactivated with
+ * usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed.
+ *
+ * Returns zero on success, else negative errno.
+ */
+int usb_gadget_activate(struct usb_gadget *gadget)
+{
+	int ret = 0;
+
+	if (!gadget->deactivated)
+		goto out;
+
+	gadget->deactivated = false;
+
+	/*
+	 * If gadget has been connected before deactivation, or became connected
+	 * while it was being deactivated, we call usb_gadget_connect().
+	 */
+	if (gadget->connected)
+		ret = usb_gadget_connect(gadget);
+
+out:
+	trace_usb_gadget_activate(gadget, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_activate);
+
+/* ------------------------------------------------------------------------- */
+
+#ifdef	CONFIG_HAS_DMA
+
+int usb_gadget_map_request_by_dev(struct device *dev,
+		struct usb_request *req, int is_in)
+{
+	if (req->length == 0)
+		return 0;
+
+	if (req->num_sgs) {
+		int     mapped;
+
+		mapped = dma_map_sg(dev, req->sg, req->num_sgs,
+				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		if (mapped == 0) {
+			dev_err(dev, "failed to map SGs\n");
+			return -EFAULT;
+		}
+
+		req->num_mapped_sgs = mapped;
+	} else {
+		req->dma = dma_map_single(dev, req->buf, req->length,
+				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+		if (dma_mapping_error(dev, req->dma)) {
+			dev_err(dev, "failed to map buffer\n");
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_map_request_by_dev);
+
+int usb_gadget_map_request(struct usb_gadget *gadget,
+		struct usb_request *req, int is_in)
+{
+	return usb_gadget_map_request_by_dev(gadget->dev.parent, req, is_in);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_map_request);
+
+void usb_gadget_unmap_request_by_dev(struct device *dev,
+		struct usb_request *req, int is_in)
+{
+	if (req->length == 0)
+		return;
+
+	if (req->num_mapped_sgs) {
+		dma_unmap_sg(dev, req->sg, req->num_mapped_sgs,
+				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+		req->num_mapped_sgs = 0;
+	} else {
+		dma_unmap_single(dev, req->dma, req->length,
+				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	}
+}
+EXPORT_SYMBOL_GPL(usb_gadget_unmap_request_by_dev);
+
+void usb_gadget_unmap_request(struct usb_gadget *gadget,
+		struct usb_request *req, int is_in)
+{
+	usb_gadget_unmap_request_by_dev(gadget->dev.parent, req, is_in);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
+
+#endif	/* CONFIG_HAS_DMA */
+
+/* ------------------------------------------------------------------------- */
+
+/**
+ * usb_gadget_giveback_request - give the request back to the gadget layer
+ * Context: in_interrupt()
+ *
+ * This is called by device controller drivers in order to return the
+ * completed request back to the gadget layer.
+ */
+void usb_gadget_giveback_request(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	if (likely(req->status == 0))
+		usb_led_activity(USB_LED_EVENT_GADGET);
+
+	trace_usb_gadget_giveback_request(ep, req, 0);
+
+	req->complete(ep, req);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_giveback_request);
+
+/* ------------------------------------------------------------------------- */
+
+/**
+ * gadget_find_ep_by_name - returns ep whose name is the same as sting passed
+ *	in second parameter or NULL if searched endpoint not found
+ * @g: controller to check for quirk
+ * @name: name of searched endpoint
+ */
+struct usb_ep *gadget_find_ep_by_name(struct usb_gadget *g, const char *name)
+{
+	struct usb_ep *ep;
+
+	gadget_for_each_ep(ep, g) {
+		if (!strcmp(ep->name, name))
+			return ep;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(gadget_find_ep_by_name);
+
+/* ------------------------------------------------------------------------- */
+
+int usb_gadget_ep_match_desc(struct usb_gadget *gadget,
+		struct usb_ep *ep, struct usb_endpoint_descriptor *desc,
+		struct usb_ss_ep_comp_descriptor *ep_comp)
+{
+	u8		type;
+	u16		max;
+	int		num_req_streams = 0;
+
+	/* endpoint already claimed? */
+	if (ep->claimed)
+		return 0;
+
+	type = usb_endpoint_type(desc);
+	max = 0x7ff & usb_endpoint_maxp(desc);
+
+	if (usb_endpoint_dir_in(desc) && !ep->caps.dir_in)
+		return 0;
+	if (usb_endpoint_dir_out(desc) && !ep->caps.dir_out)
+		return 0;
+
+	if (max > ep->maxpacket_limit)
+		return 0;
+
+	/* "high bandwidth" works only at high speed */
+	if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11))
+		return 0;
+
+	switch (type) {
+	case USB_ENDPOINT_XFER_CONTROL:
+		/* only support ep0 for portable CONTROL traffic */
+		return 0;
+	case USB_ENDPOINT_XFER_ISOC:
+		if (!ep->caps.type_iso)
+			return 0;
+		/* ISO:  limit 1023 bytes full speed, 1024 high/super speed */
+		if (!gadget_is_dualspeed(gadget) && max > 1023)
+			return 0;
+		break;
+	case USB_ENDPOINT_XFER_BULK:
+		if (!ep->caps.type_bulk)
+			return 0;
+		if (ep_comp && gadget_is_superspeed(gadget)) {
+			/* Get the number of required streams from the
+			 * EP companion descriptor and see if the EP
+			 * matches it
+			 */
+			num_req_streams = ep_comp->bmAttributes & 0x1f;
+			if (num_req_streams > ep->max_streams)
+				return 0;
+		}
+		break;
+	case USB_ENDPOINT_XFER_INT:
+		/* Bulk endpoints handle interrupt transfers,
+		 * except the toggle-quirky iso-synch kind
+		 */
+		if (!ep->caps.type_int && !ep->caps.type_bulk)
+			return 0;
+		/* INT:  limit 64 bytes full speed, 1024 high/super speed */
+		if (!gadget_is_dualspeed(gadget) && max > 64)
+			return 0;
+		break;
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_ep_match_desc);
+
+/* ------------------------------------------------------------------------- */
+
+static void usb_gadget_state_work(struct work_struct *work)
+{
+	struct usb_gadget *gadget = work_to_gadget(work);
+	struct usb_udc *udc = gadget->udc;
+
+	if (udc)
+		sysfs_notify(&udc->dev.kobj, NULL, "state");
+}
+
+void usb_gadget_set_state(struct usb_gadget *gadget,
+		enum usb_device_state state)
+{
+	gadget->state = state;
+	schedule_work(&gadget->work);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_set_state);
+
+/* ------------------------------------------------------------------------- */
+
+static void usb_udc_connect_control(struct usb_udc *udc)
+{
+	if (udc->vbus)
+		usb_gadget_connect(udc->gadget);
+	else
+		usb_gadget_disconnect(udc->gadget);
+}
+
+/**
+ * usb_udc_vbus_handler - updates the udc core vbus status, and try to
+ * connect or disconnect gadget
+ * @gadget: The gadget which vbus change occurs
+ * @status: The vbus status
+ *
+ * The udc driver calls it when it wants to connect or disconnect gadget
+ * according to vbus status.
+ */
+void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status)
+{
+	struct usb_udc *udc = gadget->udc;
+
+	if (udc) {
+		udc->vbus = status;
+		usb_udc_connect_control(udc);
+	}
+}
+EXPORT_SYMBOL_GPL(usb_udc_vbus_handler);
+
+/**
+ * usb_gadget_udc_reset - notifies the udc core that bus reset occurs
+ * @gadget: The gadget which bus reset occurs
+ * @driver: The gadget driver we want to notify
+ *
+ * If the udc driver has bus reset handler, it needs to call this when the bus
+ * reset occurs, it notifies the gadget driver that the bus reset occurs as
+ * well as updates gadget state.
+ */
+void usb_gadget_udc_reset(struct usb_gadget *gadget,
+		struct usb_gadget_driver *driver)
+{
+	driver->reset(gadget);
+	usb_gadget_set_state(gadget, USB_STATE_DEFAULT);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_udc_reset);
+
+/**
+ * usb_gadget_udc_start - tells usb device controller to start up
+ * @udc: The UDC to be started
+ *
+ * This call is issued by the UDC Class driver when it's about
+ * to register a gadget driver to the device controller, before
+ * calling gadget driver's bind() method.
+ *
+ * It allows the controller to be powered off until strictly
+ * necessary to have it powered on.
+ *
+ * Returns zero on success, else negative errno.
+ */
+static inline int usb_gadget_udc_start(struct usb_udc *udc)
+{
+	return udc->gadget->ops->udc_start(udc->gadget, udc->driver);
+}
+
+/**
+ * usb_gadget_udc_stop - tells usb device controller we don't need it anymore
+ * @gadget: The device we want to stop activity
+ * @driver: The driver to unbind from @gadget
+ *
+ * This call is issued by the UDC Class driver after calling
+ * gadget driver's unbind() method.
+ *
+ * The details are implementation specific, but it can go as
+ * far as powering off UDC completely and disable its data
+ * line pullups.
+ */
+static inline void usb_gadget_udc_stop(struct usb_udc *udc)
+{
+	udc->gadget->ops->udc_stop(udc->gadget);
+}
+
+/**
+ * usb_udc_release - release the usb_udc struct
+ * @dev: the dev member within usb_udc
+ *
+ * This is called by driver's core in order to free memory once the last
+ * reference is released.
+ */
+static void usb_udc_release(struct device *dev)
+{
+	struct usb_udc *udc;
+
+	udc = container_of(dev, struct usb_udc, dev);
+	dev_dbg(dev, "releasing '%s'\n", dev_name(dev));
+	kfree(udc);
+}
+
+static const struct attribute_group *usb_udc_attr_groups[];
+
+static void usb_udc_nop_release(struct device *dev)
+{
+	dev_vdbg(dev, "%s\n", __func__);
+}
+
+/**
+ * usb_add_gadget_udc_release - adds a new gadget to the udc class driver list
+ * @parent: the parent device to this udc. Usually the controller driver's
+ * device.
+ * @gadget: the gadget to be added to the list.
+ * @release: a gadget release function.
+ *
+ * Returns zero on success, negative errno otherwise.
+ */
+int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
+		void (*release)(struct device *dev))
+{
+	struct usb_udc		*udc;
+	struct usb_gadget_driver *driver;
+	int			ret = -ENOMEM;
+
+	udc = kzalloc(sizeof(*udc), GFP_KERNEL);
+	if (!udc)
+		goto err1;
+
+	dev_set_name(&gadget->dev, "gadget");
+	INIT_WORK(&gadget->work, usb_gadget_state_work);
+	gadget->dev.parent = parent;
+
+	if (release)
+		gadget->dev.release = release;
+	else
+		gadget->dev.release = usb_udc_nop_release;
+
+	ret = device_register(&gadget->dev);
+	if (ret)
+		goto err2;
+
+	device_initialize(&udc->dev);
+	udc->dev.release = usb_udc_release;
+	udc->dev.class = udc_class;
+	udc->dev.groups = usb_udc_attr_groups;
+	udc->dev.parent = parent;
+	ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
+	if (ret)
+		goto err3;
+
+	udc->gadget = gadget;
+	gadget->udc = udc;
+
+	mutex_lock(&udc_lock);
+	list_add_tail(&udc->list, &udc_list);
+
+	ret = device_add(&udc->dev);
+	if (ret)
+		goto err4;
+
+	usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
+	udc->vbus = true;
+
+	/* pick up one of pending gadget drivers */
+	list_for_each_entry(driver, &gadget_driver_pending_list, pending) {
+		if (!driver->udc_name || strcmp(driver->udc_name,
+						dev_name(&udc->dev)) == 0) {
+			ret = udc_bind_to_driver(udc, driver);
+			if (ret != -EPROBE_DEFER)
+				list_del(&driver->pending);
+			if (ret)
+				goto err4;
+			break;
+		}
+	}
+
+	mutex_unlock(&udc_lock);
+
+	return 0;
+
+err4:
+	list_del(&udc->list);
+	mutex_unlock(&udc_lock);
+
+err3:
+	put_device(&udc->dev);
+	device_del(&gadget->dev);
+
+err2:
+	put_device(&gadget->dev);
+	kfree(udc);
+
+err1:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_add_gadget_udc_release);
+
+/**
+ * usb_get_gadget_udc_name - get the name of the first UDC controller
+ * This functions returns the name of the first UDC controller in the system.
+ * Please note that this interface is usefull only for legacy drivers which
+ * assume that there is only one UDC controller in the system and they need to
+ * get its name before initialization. There is no guarantee that the UDC
+ * of the returned name will be still available, when gadget driver registers
+ * itself.
+ *
+ * Returns pointer to string with UDC controller name on success, NULL
+ * otherwise. Caller should kfree() returned string.
+ */
+char *usb_get_gadget_udc_name(void)
+{
+	struct usb_udc *udc;
+	char *name = NULL;
+
+	/* For now we take the first available UDC */
+	mutex_lock(&udc_lock);
+	list_for_each_entry(udc, &udc_list, list) {
+		if (!udc->driver) {
+			name = kstrdup(udc->gadget->name, GFP_KERNEL);
+			break;
+		}
+	}
+	mutex_unlock(&udc_lock);
+	return name;
+}
+EXPORT_SYMBOL_GPL(usb_get_gadget_udc_name);
+
+/**
+ * usb_add_gadget_udc - adds a new gadget to the udc class driver list
+ * @parent: the parent device to this udc. Usually the controller
+ * driver's device.
+ * @gadget: the gadget to be added to the list
+ *
+ * Returns zero on success, negative errno otherwise.
+ */
+int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget)
+{
+	return usb_add_gadget_udc_release(parent, gadget, NULL);
+}
+EXPORT_SYMBOL_GPL(usb_add_gadget_udc);
+
+static void usb_gadget_remove_driver(struct usb_udc *udc)
+{
+	dev_dbg(&udc->dev, "unregistering UDC driver [%s]\n",
+			udc->driver->function);
+
+	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
+
+	usb_gadget_disconnect(udc->gadget);
+	udc->driver->disconnect(udc->gadget);
+	udc->driver->unbind(udc->gadget);
+	usb_gadget_udc_stop(udc);
+
+	udc->driver = NULL;
+	udc->dev.driver = NULL;
+	udc->gadget->dev.driver = NULL;
+}
+
+/**
+ * usb_del_gadget_udc - deletes @udc from udc_list
+ * @gadget: the gadget to be removed.
+ *
+ * This, will call usb_gadget_unregister_driver() if
+ * the @udc is still busy.
+ */
+void usb_del_gadget_udc(struct usb_gadget *gadget)
+{
+	struct usb_udc *udc = gadget->udc;
+
+	if (!udc)
+		return;
+
+	dev_vdbg(gadget->dev.parent, "unregistering gadget\n");
+
+	mutex_lock(&udc_lock);
+	list_del(&udc->list);
+
+	if (udc->driver) {
+		struct usb_gadget_driver *driver = udc->driver;
+
+		usb_gadget_remove_driver(udc);
+		list_add(&driver->pending, &gadget_driver_pending_list);
+	}
+	mutex_unlock(&udc_lock);
+
+	kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
+	flush_work(&gadget->work);
+	device_unregister(&udc->dev);
+	device_unregister(&gadget->dev);
+}
+EXPORT_SYMBOL_GPL(usb_del_gadget_udc);
+
+/* ------------------------------------------------------------------------- */
+
+static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *driver)
+{
+	int ret;
+
+	dev_dbg(&udc->dev, "registering UDC driver [%s]\n",
+			driver->function);
+
+	udc->driver = driver;
+	udc->dev.driver = &driver->driver;
+	udc->gadget->dev.driver = &driver->driver;
+
+	ret = driver->bind(udc->gadget, driver);
+	if (ret)
+		goto err1;
+	ret = usb_gadget_udc_start(udc);
+	if (ret) {
+		driver->unbind(udc->gadget);
+		goto err1;
+	}
+	usb_udc_connect_control(udc);
+
+	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
+	return 0;
+err1:
+	if (ret != -EISNAM)
+		dev_err(&udc->dev, "failed to start %s: %d\n",
+			udc->driver->function, ret);
+	udc->driver = NULL;
+	udc->dev.driver = NULL;
+	udc->gadget->dev.driver = NULL;
+	return ret;
+}
+
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
+{
+	struct usb_udc		*udc = NULL;
+	int			ret = -ENODEV;
+
+	if (!driver || !driver->bind || !driver->setup)
+		return -EINVAL;
+
+	mutex_lock(&udc_lock);
+	if (driver->udc_name) {
+		list_for_each_entry(udc, &udc_list, list) {
+			ret = strcmp(driver->udc_name, dev_name(&udc->dev));
+			if (!ret)
+				break;
+		}
+		if (!ret && !udc->driver)
+			goto found;
+	} else {
+		list_for_each_entry(udc, &udc_list, list) {
+			/* For now we take the first one */
+			if (!udc->driver)
+				goto found;
+		}
+	}
+
+	if (!driver->match_existing_only) {
+		list_add_tail(&driver->pending, &gadget_driver_pending_list);
+		pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n",
+			driver->function);
+		ret = 0;
+	}
+
+	mutex_unlock(&udc_lock);
+	return ret;
+found:
+	ret = udc_bind_to_driver(udc, driver);
+	mutex_unlock(&udc_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_probe_driver);
+
+int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
+{
+	struct usb_udc		*udc = NULL;
+	int			ret = -ENODEV;
+
+	if (!driver || !driver->unbind)
+		return -EINVAL;
+
+	mutex_lock(&udc_lock);
+	list_for_each_entry(udc, &udc_list, list)
+		if (udc->driver == driver) {
+			usb_gadget_remove_driver(udc);
+			usb_gadget_set_state(udc->gadget,
+					USB_STATE_NOTATTACHED);
+			ret = 0;
+			break;
+		}
+
+	if (ret) {
+		list_del(&driver->pending);
+		ret = 0;
+	}
+	mutex_unlock(&udc_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_gadget_unregister_driver);
+
+/* ------------------------------------------------------------------------- */
+
+static ssize_t usb_udc_srp_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t n)
+{
+	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
+
+	if (sysfs_streq(buf, "1"))
+		usb_gadget_wakeup(udc->gadget);
+
+	return n;
+}
+static DEVICE_ATTR(srp, S_IWUSR, NULL, usb_udc_srp_store);
+
+static ssize_t usb_udc_softconn_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t n)
+{
+	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
+
+	if (!udc->driver) {
+		dev_err(dev, "soft-connect without a gadget driver\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (sysfs_streq(buf, "connect")) {
+		usb_gadget_udc_start(udc);
+		usb_gadget_connect(udc->gadget);
+	} else if (sysfs_streq(buf, "disconnect")) {
+		usb_gadget_disconnect(udc->gadget);
+		udc->driver->disconnect(udc->gadget);
+		usb_gadget_udc_stop(udc);
+	} else {
+		dev_err(dev, "unsupported command '%s'\n", buf);
+		return -EINVAL;
+	}
+
+	return n;
+}
+static DEVICE_ATTR(soft_connect, S_IWUSR, NULL, usb_udc_softconn_store);
+
+static ssize_t state_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
+	struct usb_gadget	*gadget = udc->gadget;
+
+	return sprintf(buf, "%s\n", usb_state_string(gadget->state));
+}
+static DEVICE_ATTR_RO(state);
+
+#define USB_UDC_SPEED_ATTR(name, param)					\
+ssize_t name##_show(struct device *dev,					\
+		struct device_attribute *attr, char *buf)		\
+{									\
+	struct usb_udc *udc = container_of(dev, struct usb_udc, dev);	\
+	return snprintf(buf, PAGE_SIZE, "%s\n",				\
+			usb_speed_string(udc->gadget->param));		\
+}									\
+static DEVICE_ATTR_RO(name)
+
+static USB_UDC_SPEED_ATTR(current_speed, speed);
+static USB_UDC_SPEED_ATTR(maximum_speed, max_speed);
+
+#define USB_UDC_ATTR(name)					\
+ssize_t name##_show(struct device *dev,				\
+		struct device_attribute *attr, char *buf)	\
+{								\
+	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev); \
+	struct usb_gadget	*gadget = udc->gadget;		\
+								\
+	return snprintf(buf, PAGE_SIZE, "%d\n", gadget->name);	\
+}								\
+static DEVICE_ATTR_RO(name)
+
+static USB_UDC_ATTR(is_otg);
+static USB_UDC_ATTR(is_a_peripheral);
+static USB_UDC_ATTR(b_hnp_enable);
+static USB_UDC_ATTR(a_hnp_support);
+static USB_UDC_ATTR(a_alt_hnp_support);
+static USB_UDC_ATTR(is_selfpowered);
+
+static struct attribute *usb_udc_attrs[] = {
+	&dev_attr_srp.attr,
+	&dev_attr_soft_connect.attr,
+	&dev_attr_state.attr,
+	&dev_attr_current_speed.attr,
+	&dev_attr_maximum_speed.attr,
+
+	&dev_attr_is_otg.attr,
+	&dev_attr_is_a_peripheral.attr,
+	&dev_attr_b_hnp_enable.attr,
+	&dev_attr_a_hnp_support.attr,
+	&dev_attr_a_alt_hnp_support.attr,
+	&dev_attr_is_selfpowered.attr,
+	NULL,
+};
+
+static const struct attribute_group usb_udc_attr_group = {
+	.attrs = usb_udc_attrs,
+};
+
+static const struct attribute_group *usb_udc_attr_groups[] = {
+	&usb_udc_attr_group,
+	NULL,
+};
+
+static int usb_udc_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
+	int			ret;
+
+	ret = add_uevent_var(env, "USB_UDC_NAME=%s", udc->gadget->name);
+	if (ret) {
+		dev_err(dev, "failed to add uevent USB_UDC_NAME\n");
+		return ret;
+	}
+
+	if (udc->driver) {
+		ret = add_uevent_var(env, "USB_UDC_DRIVER=%s",
+				udc->driver->function);
+		if (ret) {
+			dev_err(dev, "failed to add uevent USB_UDC_DRIVER\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int __init usb_udc_init(void)
+{
+	udc_class = class_create(THIS_MODULE, "udc");
+	if (IS_ERR(udc_class)) {
+		pr_err("failed to create udc class --> %ld\n",
+				PTR_ERR(udc_class));
+		return PTR_ERR(udc_class);
+	}
+
+	udc_class->dev_uevent = usb_udc_uevent;
+	return 0;
+}
+subsys_initcall(usb_udc_init);
+
+static void __exit usb_udc_exit(void)
+{
+	class_destroy(udc_class);
+}
+module_exit(usb_udc_exit);
+
+MODULE_DESCRIPTION("UDC Framework");
+MODULE_AUTHOR("Felipe Balbi <balbi@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index dde44450dfa9..77d07904f932 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -647,12 +647,10 @@ static int dummy_disable(struct usb_ep *_ep)
 static struct usb_request *dummy_alloc_request(struct usb_ep *_ep,
 		gfp_t mem_flags)
 {
-	struct dummy_ep		*ep;
 	struct dummy_request	*req;
 
 	if (!_ep)
 		return NULL;
-	ep = usb_ep_to_dummy_ep(_ep);
 
 	req = kzalloc(sizeof(*req), mem_flags);
 	if (!req)
@@ -2444,9 +2442,6 @@ static int dummy_start(struct usb_hcd *hcd)
 
 static void dummy_stop(struct usb_hcd *hcd)
 {
-	struct dummy		*dum;
-
-	dum = hcd_to_dummy_hcd(hcd)->dum;
 	device_remove_file(dummy_dev(hcd_to_dummy_hcd(hcd)), &dev_attr_urbs);
 	dev_info(dummy_dev(hcd_to_dummy_hcd(hcd)), "stopped\n");
 }
diff --git a/drivers/usb/gadget/udc/m66592-udc.c b/drivers/usb/gadget/udc/m66592-udc.c
index b1cfa96cc88f..6e977dc22570 100644
--- a/drivers/usb/gadget/udc/m66592-udc.c
+++ b/drivers/usb/gadget/udc/m66592-udc.c
@@ -1199,8 +1199,6 @@ static irqreturn_t m66592_irq(int irq, void *_m66592)
 	struct m66592 *m66592 = _m66592;
 	u16 intsts0;
 	u16 intenb0;
-	u16 brdysts, nrdysts, bempsts;
-	u16 brdyenb, nrdyenb, bempenb;
 	u16 savepipe;
 	u16 mask0;
 
@@ -1224,12 +1222,10 @@ static irqreturn_t m66592_irq(int irq, void *_m66592)
 
 	mask0 = intsts0 & intenb0;
 	if (mask0) {
-		brdysts = m66592_read(m66592, M66592_BRDYSTS);
-		nrdysts = m66592_read(m66592, M66592_NRDYSTS);
-		bempsts = m66592_read(m66592, M66592_BEMPSTS);
-		brdyenb = m66592_read(m66592, M66592_BRDYENB);
-		nrdyenb = m66592_read(m66592, M66592_NRDYENB);
-		bempenb = m66592_read(m66592, M66592_BEMPENB);
+		u16 brdysts = m66592_read(m66592, M66592_BRDYSTS);
+		u16 bempsts = m66592_read(m66592, M66592_BEMPSTS);
+		u16 brdyenb = m66592_read(m66592, M66592_BRDYENB);
+		u16 bempenb = m66592_read(m66592, M66592_BEMPENB);
 
 		if (mask0 & M66592_VBINT) {
 			m66592_write(m66592,  0xffff & ~M66592_VBINT,
@@ -1408,28 +1404,20 @@ static int m66592_dequeue(struct usb_ep *_ep, struct usb_request *_req)
 
 static int m66592_set_halt(struct usb_ep *_ep, int value)
 {
-	struct m66592_ep *ep;
-	struct m66592_request *req;
+	struct m66592_ep *ep = container_of(_ep, struct m66592_ep, ep);
 	unsigned long flags;
 	int ret = 0;
 
-	ep = container_of(_ep, struct m66592_ep, ep);
-	req = list_entry(ep->queue.next, struct m66592_request, queue);
-
 	spin_lock_irqsave(&ep->m66592->lock, flags);
 	if (!list_empty(&ep->queue)) {
 		ret = -EAGAIN;
-		goto out;
-	}
-	if (value) {
+	} else if (value) {
 		ep->busy = 1;
 		pipe_stall(ep->m66592, ep->pipenum);
 	} else {
 		ep->busy = 0;
 		pipe_stop(ep->m66592, ep->pipenum);
 	}
-
-out:
 	spin_unlock_irqrestore(&ep->m66592->lock, flags);
 	return ret;
 }
diff --git a/drivers/usb/gadget/udc/mv_u3d_core.c b/drivers/usb/gadget/udc/mv_u3d_core.c
index dafe74eb9ade..b9e19a591322 100644
--- a/drivers/usb/gadget/udc/mv_u3d_core.c
+++ b/drivers/usb/gadget/udc/mv_u3d_core.c
@@ -119,18 +119,14 @@ static int mv_u3d_process_ep_req(struct mv_u3d *u3d, int index,
 	struct mv_u3d_req *curr_req)
 {
 	struct mv_u3d_trb	*curr_trb;
-	dma_addr_t cur_deq_lo;
-	struct mv_u3d_ep_context	*curr_ep_context;
-	int trb_complete, actual, remaining_length = 0;
+	int actual, remaining_length = 0;
 	int direction, ep_num;
 	int retval = 0;
 	u32 tmp, status, length;
 
-	curr_ep_context = &u3d->ep_context[index];
 	direction = index % 2;
 	ep_num = index / 2;
 
-	trb_complete = 0;
 	actual = curr_req->req.length;
 
 	while (!list_empty(&curr_req->trb_list)) {
@@ -143,15 +139,10 @@ static int mv_u3d_process_ep_req(struct mv_u3d *u3d, int index,
 		}
 
 		curr_trb->trb_hw->ctrl.own = 0;
-		if (direction == MV_U3D_EP_DIR_OUT) {
+		if (direction == MV_U3D_EP_DIR_OUT)
 			tmp = ioread32(&u3d->vuc_regs->rxst[ep_num].statuslo);
-			cur_deq_lo =
-				ioread32(&u3d->vuc_regs->rxst[ep_num].curdeqlo);
-		} else {
+		else
 			tmp = ioread32(&u3d->vuc_regs->txst[ep_num].statuslo);
-			cur_deq_lo =
-				ioread32(&u3d->vuc_regs->txst[ep_num].curdeqlo);
-		}
 
 		status = tmp >> MV_U3D_XFERSTATUS_COMPLETE_SHIFT;
 		length = tmp & MV_U3D_XFERSTATUS_TRB_LENGTH_MASK;
@@ -527,7 +518,6 @@ static int mv_u3d_ep_enable(struct usb_ep *_ep,
 {
 	struct mv_u3d *u3d;
 	struct mv_u3d_ep *ep;
-	struct mv_u3d_ep_context *ep_context;
 	u16 max = 0;
 	unsigned maxburst = 0;
 	u32 epxcr, direction;
@@ -548,9 +538,6 @@ static int mv_u3d_ep_enable(struct usb_ep *_ep,
 		_ep->maxburst = 1;
 	maxburst = _ep->maxburst;
 
-	/* Get the endpoint context address */
-	ep_context = (struct mv_u3d_ep_context *)ep->ep_context;
-
 	/* Set the max burst size */
 	switch (desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
 	case USB_ENDPOINT_XFER_BULK:
@@ -633,7 +620,6 @@ static int  mv_u3d_ep_disable(struct usb_ep *_ep)
 {
 	struct mv_u3d *u3d;
 	struct mv_u3d_ep *ep;
-	struct mv_u3d_ep_context *ep_context;
 	u32 epxcr, direction;
 	unsigned long flags;
 
@@ -646,9 +632,6 @@ static int  mv_u3d_ep_disable(struct usb_ep *_ep)
 
 	u3d = ep->u3d;
 
-	/* Get the endpoint context address */
-	ep_context = ep->ep_context;
-
 	direction = mv_u3d_ep_dir(ep);
 
 	/* nuke all pending requests (does flush) */
diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c
index 81b6229c7805..ce73b3552269 100644
--- a/drivers/usb/gadget/udc/mv_udc_core.c
+++ b/drivers/usb/gadget/udc/mv_udc_core.c
@@ -129,7 +129,7 @@ static int process_ep_req(struct mv_udc *udc, int index,
 {
 	struct mv_dtd	*curr_dtd;
 	struct mv_dqh	*curr_dqh;
-	int td_complete, actual, remaining_length;
+	int actual, remaining_length;
 	int i, direction;
 	int retval = 0;
 	u32 errors;
@@ -139,7 +139,6 @@ static int process_ep_req(struct mv_udc *udc, int index,
 	direction = index % 2;
 
 	curr_dtd = curr_req->head;
-	td_complete = 0;
 	actual = curr_req->req.length;
 
 	for (i = 0; i < curr_req->dtd_count; i++) {
@@ -412,11 +411,8 @@ static int req_to_dtd(struct mv_req *req)
 	unsigned count;
 	int is_last, is_first = 1;
 	struct mv_dtd *dtd, *last_dtd = NULL;
-	struct mv_udc *udc;
 	dma_addr_t dma;
 
-	udc = req->ep->udc;
-
 	do {
 		dtd = build_dtd(req, &count, &dma, &is_last);
 		if (dtd == NULL)
@@ -567,7 +563,7 @@ static int  mv_ep_disable(struct usb_ep *_ep)
 	struct mv_udc *udc;
 	struct mv_ep *ep;
 	struct mv_dqh *dqh;
-	u32 bit_pos, epctrlx, direction;
+	u32 epctrlx, direction;
 	unsigned long flags;
 
 	ep = container_of(_ep, struct mv_ep, ep);
@@ -582,7 +578,6 @@ static int  mv_ep_disable(struct usb_ep *_ep)
 	spin_lock_irqsave(&udc->lock, flags);
 
 	direction = ep_dir(ep);
-	bit_pos = 1 << ((direction == EP_DIR_OUT ? 0 : 16) + ep->ep_num);
 
 	/* Reset the max packet length and the interrupt on Setup */
 	dqh->max_packet_length = 0;
diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c
index 18f5ebd447b8..7c6113432093 100644
--- a/drivers/usb/gadget/udc/net2272.c
+++ b/drivers/usb/gadget/udc/net2272.c
@@ -329,12 +329,10 @@ static int net2272_disable(struct usb_ep *_ep)
 static struct usb_request *
 net2272_alloc_request(struct usb_ep *_ep, gfp_t gfp_flags)
 {
-	struct net2272_ep *ep;
 	struct net2272_request *req;
 
 	if (!_ep)
 		return NULL;
-	ep = container_of(_ep, struct net2272_ep, ep);
 
 	req = kzalloc(sizeof(*req), gfp_flags);
 	if (!req)
@@ -348,10 +346,8 @@ net2272_alloc_request(struct usb_ep *_ep, gfp_t gfp_flags)
 static void
 net2272_free_request(struct usb_ep *_ep, struct usb_request *_req)
 {
-	struct net2272_ep *ep;
 	struct net2272_request *req;
 
-	ep = container_of(_ep, struct net2272_ep, ep);
 	if (!_ep || !_req)
 		return;
 
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index c894b94b234b..614ab951a4ae 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -211,7 +211,7 @@ net2280_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 		goto print_err;
 	}
 
-	if (dev->quirks & PLX_SUPERSPEED) {
+	if (dev->quirks & PLX_PCIE) {
 		if ((desc->bEndpointAddress & 0x0f) >= 0x0c) {
 			ret = -EDOM;
 			goto print_err;
@@ -245,7 +245,7 @@ net2280_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	/* set type, direction, address; reset fifo counters */
 	writel(BIT(FIFO_FLUSH), &ep->regs->ep_stat);
 
-	if ((dev->quirks & PLX_SUPERSPEED) && dev->enhanced_mode) {
+	if ((dev->quirks & PLX_PCIE) && dev->enhanced_mode) {
 		tmp = readl(&ep->cfg->ep_cfg);
 		/* If USB ep number doesn't match hardware ep number */
 		if ((tmp & 0xf) != usb_endpoint_num(desc)) {
@@ -316,7 +316,7 @@ net2280_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 			BIT(CLEAR_NAK_OUT_PACKETS_MODE), &ep->regs->ep_rsp);
 	}
 
-	if (dev->quirks & PLX_SUPERSPEED)
+	if (dev->quirks & PLX_PCIE)
 		ep_clear_seqnum(ep);
 	writel(tmp, &ep->cfg->ep_cfg);
 
@@ -527,7 +527,7 @@ static int net2280_disable(struct usb_ep *_ep)
 	spin_lock_irqsave(&ep->dev->lock, flags);
 	nuke(ep);
 
-	if (ep->dev->quirks & PLX_SUPERSPEED)
+	if (ep->dev->quirks & PLX_PCIE)
 		ep_reset_338x(ep->dev->regs, ep);
 	else
 		ep_reset_228x(ep->dev->regs, ep);
@@ -862,7 +862,7 @@ static void start_queue(struct net2280_ep *ep, u32 dmactl, u32 td_dma)
 	writel(readl(&dma->dmastat), &dma->dmastat);
 
 	writel(td_dma, &dma->dmadesc);
-	if (ep->dev->quirks & PLX_SUPERSPEED)
+	if (ep->dev->quirks & PLX_PCIE)
 		dmactl |= BIT(DMA_REQUEST_OUTSTANDING);
 	writel(dmactl, &dma->dmactl);
 
@@ -1046,7 +1046,7 @@ net2280_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
 
 	/* kickstart this i/o queue? */
 	if  (list_empty(&ep->queue) && !ep->stopped &&
-		!((dev->quirks & PLX_SUPERSPEED) && ep->dma &&
+		!((dev->quirks & PLX_PCIE) && ep->dma &&
 		  (readl(&ep->regs->ep_rsp) & BIT(CLEAR_ENDPOINT_HALT)))) {
 
 		/* use DMA if the endpoint supports it, else pio */
@@ -1169,7 +1169,7 @@ static void scan_dma_completions(struct net2280_ep *ep)
 			break;
 		} else if (!ep->is_in &&
 			   (req->req.length % ep->ep.maxpacket) &&
-			   !(ep->dev->quirks & PLX_SUPERSPEED)) {
+			   !(ep->dev->quirks & PLX_PCIE)) {
 
 			tmp = readl(&ep->regs->ep_stat);
 			/* AVOID TROUBLE HERE by not issuing short reads from
@@ -1367,7 +1367,7 @@ net2280_set_halt_and_wedge(struct usb_ep *_ep, int value, int wedged)
 				ep->wedged = 1;
 		} else {
 			clear_halt(ep);
-			if (ep->dev->quirks & PLX_SUPERSPEED &&
+			if (ep->dev->quirks & PLX_PCIE &&
 				!list_empty(&ep->queue) && ep->td_dma)
 					restart_dma(ep);
 			ep->wedged = 0;
@@ -2394,7 +2394,7 @@ static int net2280_start(struct usb_gadget *_gadget,
 	 */
 	net2280_led_active(dev, 1);
 
-	if ((dev->quirks & PLX_SUPERSPEED) && !dev->bug7734_patched)
+	if ((dev->quirks & PLX_PCIE) && !dev->bug7734_patched)
 		defect7374_enable_data_eps_zero(dev);
 
 	ep0_start(dev);
@@ -3063,7 +3063,7 @@ static void handle_stat0_irqs(struct net2280 *dev, u32 stat)
 		}
 		ep->stopped = 0;
 		dev->protocol_stall = 0;
-		if (!(dev->quirks & PLX_SUPERSPEED)) {
+		if (!(dev->quirks & PLX_PCIE)) {
 			if (ep->dev->quirks & PLX_2280)
 				tmp = BIT(FIFO_OVERFLOW) |
 				    BIT(FIFO_UNDERFLOW);
@@ -3090,7 +3090,7 @@ static void handle_stat0_irqs(struct net2280 *dev, u32 stat)
 		cpu_to_le32s(&u.raw[0]);
 		cpu_to_le32s(&u.raw[1]);
 
-		if ((dev->quirks & PLX_SUPERSPEED) && !dev->bug7734_patched)
+		if ((dev->quirks & PLX_PCIE) && !dev->bug7734_patched)
 			defect7374_workaround(dev, u.r);
 
 		tmp = 0;
@@ -3173,7 +3173,7 @@ static void handle_stat0_irqs(struct net2280 *dev, u32 stat)
 			} else {
 				ep_vdbg(dev, "%s clear halt\n", e->ep.name);
 				clear_halt(e);
-				if ((ep->dev->quirks & PLX_SUPERSPEED) &&
+				if ((ep->dev->quirks & PLX_PCIE) &&
 					!list_empty(&e->queue) && e->td_dma)
 						restart_dma(e);
 			}
@@ -3195,7 +3195,7 @@ static void handle_stat0_irqs(struct net2280 *dev, u32 stat)
 			if (e->ep.name == ep0name)
 				goto do_stall;
 			set_halt(e);
-			if ((dev->quirks & PLX_SUPERSPEED) && e->dma)
+			if ((dev->quirks & PLX_PCIE) && e->dma)
 				abort_dma(e);
 			allow_status(ep);
 			ep_vdbg(dev, "%s set halt\n", ep->ep.name);
@@ -3234,7 +3234,7 @@ do_stall:
 #undef	w_length
 
 next_endpoints:
-	if ((dev->quirks & PLX_SUPERSPEED) && dev->enhanced_mode) {
+	if ((dev->quirks & PLX_PCIE) && dev->enhanced_mode) {
 		u32 mask = (BIT(ENDPOINT_0_INTERRUPT) |
 			USB3380_IRQSTAT0_EP_INTR_MASK_IN |
 			USB3380_IRQSTAT0_EP_INTR_MASK_OUT);
@@ -3399,7 +3399,7 @@ __acquires(dev->lock)
 		writel(tmp, &dma->dmastat);
 
 		/* dma sync*/
-		if (dev->quirks & PLX_SUPERSPEED) {
+		if (dev->quirks & PLX_PCIE) {
 			u32 r_dmacount = readl(&dma->dmacount);
 			if (!ep->is_in &&  (r_dmacount & 0x00FFFFFF) &&
 			    (tmp & BIT(DMA_TRANSACTION_DONE_INTERRUPT)))
@@ -3468,7 +3468,7 @@ static irqreturn_t net2280_irq(int irq, void *_dev)
 	/* control requests and PIO */
 	handle_stat0_irqs(dev, readl(&dev->regs->irqstat0));
 
-	if (dev->quirks & PLX_SUPERSPEED) {
+	if (dev->quirks & PLX_PCIE) {
 		/* re-enable interrupt to trigger any possible new interrupt */
 		u32 pciirqenb1 = readl(&dev->regs->pciirqenb1);
 		writel(pciirqenb1 & 0x7FFFFFFF, &dev->regs->pciirqenb1);
@@ -3513,7 +3513,7 @@ static void net2280_remove(struct pci_dev *pdev)
 	}
 	if (dev->got_irq)
 		free_irq(pdev->irq, dev);
-	if (dev->quirks & PLX_SUPERSPEED)
+	if (dev->quirks & PLX_PCIE)
 		pci_disable_msi(pdev);
 	if (dev->regs)
 		iounmap(dev->regs);
@@ -3593,7 +3593,7 @@ static int net2280_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	dev->dep = (struct net2280_dep_regs __iomem *) (base + 0x0200);
 	dev->epregs = (struct net2280_ep_regs __iomem *) (base + 0x0300);
 
-	if (dev->quirks & PLX_SUPERSPEED) {
+	if (dev->quirks & PLX_PCIE) {
 		u32 fsmvalue;
 		u32 usbstat;
 		dev->usb_ext = (struct usb338x_usb_ext_regs __iomem *)
@@ -3637,7 +3637,7 @@ static int net2280_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto done;
 	}
 
-	if (dev->quirks & PLX_SUPERSPEED)
+	if (dev->quirks & PLX_PCIE)
 		if (pci_enable_msi(pdev))
 			ep_err(dev, "Failed to enable MSI mode\n");
 
@@ -3755,10 +3755,19 @@ static const struct pci_device_id pci_ids[] = { {
 	.class =	PCI_CLASS_SERIAL_USB_DEVICE,
 	.class_mask =	~0,
 	.vendor =	PCI_VENDOR_ID_PLX,
+	.device =	0x2380,
+	.subvendor =	PCI_ANY_ID,
+	.subdevice =	PCI_ANY_ID,
+	.driver_data =	PLX_PCIE,
+	 },
+	{
+	.class =	((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class_mask =	~0,
+	.vendor =	PCI_VENDOR_ID_PLX,
 	.device =	0x3380,
 	.subvendor =	PCI_ANY_ID,
 	.subdevice =	PCI_ANY_ID,
-	.driver_data =	PLX_SUPERSPEED,
+	.driver_data =	PLX_PCIE | PLX_SUPERSPEED,
 	 },
 	{
 	.class =	PCI_CLASS_SERIAL_USB_DEVICE,
@@ -3767,7 +3776,7 @@ static const struct pci_device_id pci_ids[] = { {
 	.device =	0x3382,
 	.subvendor =	PCI_ANY_ID,
 	.subdevice =	PCI_ANY_ID,
-	.driver_data =	PLX_SUPERSPEED,
+	.driver_data =	PLX_PCIE | PLX_SUPERSPEED,
 	 },
 { /* end: all zeroes */ }
 };
diff --git a/drivers/usb/gadget/udc/net2280.h b/drivers/usb/gadget/udc/net2280.h
index 0d32052bf16f..2736a95751c3 100644
--- a/drivers/usb/gadget/udc/net2280.h
+++ b/drivers/usb/gadget/udc/net2280.h
@@ -47,6 +47,7 @@ set_idx_reg(struct net2280_regs __iomem *regs, u32 index, u32 value)
 #define PLX_LEGACY		BIT(0)
 #define PLX_2280		BIT(1)
 #define PLX_SUPERSPEED		BIT(2)
+#define PLX_PCIE		BIT(3)
 
 #define REG_DIAG		0x0
 #define     RETRY_COUNTER                                       16
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index ebc51ec5790a..a97da645c1b9 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -1477,11 +1477,11 @@ static void complete_req(struct pch_udc_ep *ep, struct pch_udc_request *req,
 		req->dma_mapped = 0;
 	}
 	ep->halted = 1;
-	spin_lock(&dev->lock);
+	spin_unlock(&dev->lock);
 	if (!ep->in)
 		pch_udc_ep_clear_rrdy(ep);
 	usb_gadget_giveback_request(&ep->ep, &req->req);
-	spin_unlock(&dev->lock);
+	spin_lock(&dev->lock);
 	ep->halted = halted;
 }
 
@@ -1984,9 +1984,8 @@ static int pch_udc_pcd_set_halt(struct usb_ep *usbep, int halt)
 			if (ep->num == PCH_UDC_EP0)
 				ep->dev->stall = 1;
 			pch_udc_ep_set_stall(ep);
-			pch_udc_enable_ep_interrupts(ep->dev,
-						     PCH_UDC_EPINT(ep->in,
-								   ep->num));
+			pch_udc_enable_ep_interrupts(
+				ep->dev, PCH_UDC_EPINT(ep->in, ep->num));
 		} else {
 			pch_udc_ep_clear_stall(ep);
 		}
@@ -2451,16 +2450,11 @@ static void pch_udc_svc_control_out(struct pch_udc_dev *dev)
  */
 static void pch_udc_postsvc_epinters(struct pch_udc_dev *dev, int ep_num)
 {
-	struct pch_udc_ep	*ep;
-	struct pch_udc_request *req;
-
-	ep = &dev->ep[UDC_EPIN_IDX(ep_num)];
-	if (!list_empty(&ep->queue)) {
-		req = list_entry(ep->queue.next, struct pch_udc_request, queue);
-		pch_udc_enable_ep_interrupts(ep->dev,
-					     PCH_UDC_EPINT(ep->in, ep->num));
-		pch_udc_ep_clear_nak(ep);
-	}
+	struct pch_udc_ep	*ep = &dev->ep[UDC_EPIN_IDX(ep_num)];
+	if (list_empty(&ep->queue))
+		return;
+	pch_udc_enable_ep_interrupts(ep->dev, PCH_UDC_EPINT(ep->in, ep->num));
+	pch_udc_ep_clear_nak(ep);
 }
 
 /**
@@ -2573,9 +2567,9 @@ static void pch_udc_svc_ur_interrupt(struct pch_udc_dev *dev)
 		empty_req_queue(ep);
 	}
 	if (dev->driver) {
-		spin_lock(&dev->lock);
-		usb_gadget_udc_reset(&dev->gadget, dev->driver);
 		spin_unlock(&dev->lock);
+		usb_gadget_udc_reset(&dev->gadget, dev->driver);
+		spin_lock(&dev->lock);
 	}
 }
 
@@ -2654,9 +2648,9 @@ static void pch_udc_svc_intf_interrupt(struct pch_udc_dev *dev)
 		dev->ep[i].halted = 0;
 	}
 	dev->stall = 0;
-	spin_lock(&dev->lock);
-	dev->driver->setup(&dev->gadget, &dev->setup_data);
 	spin_unlock(&dev->lock);
+	dev->driver->setup(&dev->gadget, &dev->setup_data);
+	spin_lock(&dev->lock);
 }
 
 /**
@@ -2691,9 +2685,9 @@ static void pch_udc_svc_cfg_interrupt(struct pch_udc_dev *dev)
 	dev->stall = 0;
 
 	/* call gadget zero with setup data received */
-	spin_lock(&dev->lock);
-	dev->driver->setup(&dev->gadget, &dev->setup_data);
 	spin_unlock(&dev->lock);
+	dev->driver->setup(&dev->gadget, &dev->setup_data);
+	spin_lock(&dev->lock);
 }
 
 /**
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index 001a3b74a993..ad140aa00132 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -1825,13 +1825,10 @@ fail:
  * Disables all udc endpoints (even control endpoint), report disconnect to
  * the gadget user.
  */
-static void stop_activity(struct pxa_udc *udc, struct usb_gadget_driver *driver)
+static void stop_activity(struct pxa_udc *udc)
 {
 	int i;
 
-	/* don't disconnect drivers more than once */
-	if (udc->gadget.speed == USB_SPEED_UNKNOWN)
-		driver = NULL;
 	udc->gadget.speed = USB_SPEED_UNKNOWN;
 
 	for (i = 0; i < NR_USB_ENDPOINTS; i++)
@@ -1848,7 +1845,7 @@ static int pxa27x_udc_stop(struct usb_gadget *g)
 {
 	struct pxa_udc *udc = to_pxa(g);
 
-	stop_activity(udc, NULL);
+	stop_activity(udc);
 	udc_disable(udc);
 
 	udc->driver = NULL;
@@ -2296,7 +2293,7 @@ static void irq_udc_reset(struct pxa_udc *udc)
 
 	if ((udccr & UDCCR_UDA) == 0) {
 		dev_dbg(udc->dev, "USB reset start\n");
-		stop_activity(udc, udc->driver);
+		stop_activity(udc);
 	}
 	udc->gadget.speed = USB_SPEED_FULL;
 	memset(&udc->stats, 0, sizeof udc->stats);
diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c
index 8b300e6da7fc..f2c8862093a2 100644
--- a/drivers/usb/gadget/udc/r8a66597-udc.c
+++ b/drivers/usb/gadget/udc/r8a66597-udc.c
@@ -1464,8 +1464,6 @@ static irqreturn_t r8a66597_irq(int irq, void *_r8a66597)
 	struct r8a66597 *r8a66597 = _r8a66597;
 	u16 intsts0;
 	u16 intenb0;
-	u16 brdysts, nrdysts, bempsts;
-	u16 brdyenb, nrdyenb, bempenb;
 	u16 savepipe;
 	u16 mask0;
 
@@ -1481,12 +1479,10 @@ static irqreturn_t r8a66597_irq(int irq, void *_r8a66597)
 
 	mask0 = intsts0 & intenb0;
 	if (mask0) {
-		brdysts = r8a66597_read(r8a66597, BRDYSTS);
-		nrdysts = r8a66597_read(r8a66597, NRDYSTS);
-		bempsts = r8a66597_read(r8a66597, BEMPSTS);
-		brdyenb = r8a66597_read(r8a66597, BRDYENB);
-		nrdyenb = r8a66597_read(r8a66597, NRDYENB);
-		bempenb = r8a66597_read(r8a66597, BEMPENB);
+		u16 brdysts = r8a66597_read(r8a66597, BRDYSTS);
+		u16 bempsts = r8a66597_read(r8a66597, BEMPSTS);
+		u16 brdyenb = r8a66597_read(r8a66597, BRDYENB);
+		u16 bempenb = r8a66597_read(r8a66597, BEMPENB);
 
 		if (mask0 & VBINT) {
 			r8a66597_write(r8a66597,  0xffff & ~VBINT,
@@ -1658,20 +1654,14 @@ static int r8a66597_dequeue(struct usb_ep *_ep, struct usb_request *_req)
 
 static int r8a66597_set_halt(struct usb_ep *_ep, int value)
 {
-	struct r8a66597_ep *ep;
-	struct r8a66597_request *req;
+	struct r8a66597_ep *ep = container_of(_ep, struct r8a66597_ep, ep);
 	unsigned long flags;
 	int ret = 0;
 
-	ep = container_of(_ep, struct r8a66597_ep, ep);
-	req = get_request_from_ep(ep);
-
 	spin_lock_irqsave(&ep->r8a66597->lock, flags);
 	if (!list_empty(&ep->queue)) {
 		ret = -EAGAIN;
-		goto out;
-	}
-	if (value) {
+	} else if (value) {
 		ep->busy = 1;
 		pipe_stall(ep->r8a66597, ep->pipenum);
 	} else {
@@ -1679,8 +1669,6 @@ static int r8a66597_set_halt(struct usb_ep *_ep, int value)
 		ep->wedge = 0;
 		pipe_stop(ep->r8a66597, ep->pipenum);
 	}
-
-out:
 	spin_unlock_irqrestore(&ep->r8a66597->lock, flags);
 	return ret;
 }
diff --git a/drivers/usb/gadget/udc/trace.c b/drivers/usb/gadget/udc/trace.c
new file mode 100644
index 000000000000..8c551ab91ad8
--- /dev/null
+++ b/drivers/usb/gadget/udc/trace.c
@@ -0,0 +1,18 @@
+/**
+ * trace.c - USB Gadget Framework Trace Support
+ *
+ * Copyright (C) 2016 Intel Corporation
+ * Author: Felipe Balbi <felipe.balbi@linux.intel.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h
new file mode 100644
index 000000000000..da29874b5366
--- /dev/null
+++ b/drivers/usb/gadget/udc/trace.h
@@ -0,0 +1,298 @@
+/**
+ * udc.c - Core UDC Framework
+ *
+ * Copyright (C) 2016 Intel Corporation
+ * Author: Felipe Balbi <felipe.balbi@linux.intel.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gadget
+
+#if !defined(__UDC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __UDC_TRACE_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <asm/byteorder.h>
+#include <linux/usb/gadget.h>
+
+DECLARE_EVENT_CLASS(udc_log_gadget,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret),
+	TP_STRUCT__entry(
+		__field(enum usb_device_speed, speed)
+		__field(enum usb_device_speed, max_speed)
+		__field(enum usb_device_state, state)
+		__field(unsigned, mA)
+		__field(unsigned, sg_supported)
+		__field(unsigned, is_otg)
+		__field(unsigned, is_a_peripheral)
+		__field(unsigned, b_hnp_enable)
+		__field(unsigned, a_hnp_support)
+		__field(unsigned, hnp_polling_support)
+		__field(unsigned, host_request_flag)
+		__field(unsigned, quirk_ep_out_aligned_size)
+		__field(unsigned, quirk_altset_not_supp)
+		__field(unsigned, quirk_stall_not_supp)
+		__field(unsigned, quirk_zlp_not_supp)
+		__field(unsigned, is_selfpowered)
+		__field(unsigned, deactivated)
+		__field(unsigned, connected)
+		__field(int, ret)
+	),
+	TP_fast_assign(
+		__entry->speed = g->speed;
+		__entry->max_speed = g->max_speed;
+		__entry->state = g->state;
+		__entry->mA = g->mA;
+		__entry->sg_supported = g->sg_supported;
+		__entry->is_otg = g->is_otg;
+		__entry->is_a_peripheral = g->is_a_peripheral;
+		__entry->b_hnp_enable = g->b_hnp_enable;
+		__entry->a_hnp_support = g->a_hnp_support;
+		__entry->hnp_polling_support = g->hnp_polling_support;
+		__entry->host_request_flag = g->host_request_flag;
+		__entry->quirk_ep_out_aligned_size = g->quirk_ep_out_aligned_size;
+		__entry->quirk_altset_not_supp = g->quirk_altset_not_supp;
+		__entry->quirk_stall_not_supp = g->quirk_stall_not_supp;
+		__entry->quirk_zlp_not_supp = g->quirk_zlp_not_supp;
+		__entry->is_selfpowered = g->is_selfpowered;
+		__entry->deactivated = g->deactivated;
+		__entry->connected = g->connected;
+		__entry->ret = ret;
+	),
+	TP_printk("speed %d/%d state %d %dmA [%s%s%s%s%s%s%s%s%s%s%s%s%s%s] --> %d",
+		__entry->speed, __entry->max_speed, __entry->state, __entry->mA,
+		__entry->sg_supported ? "sg:" : "",
+		__entry->is_otg ? "OTG:" : "",
+		__entry->is_a_peripheral ? "a_peripheral:" : "",
+		__entry->b_hnp_enable ? "b_hnp:" : "",
+		__entry->a_hnp_support ? "a_hnp:" : "",
+		__entry->hnp_polling_support ? "hnp_poll:" : "",
+		__entry->host_request_flag ? "hostreq:" : "",
+		__entry->quirk_ep_out_aligned_size ? "out_aligned:" : "",
+		__entry->quirk_altset_not_supp ? "no_altset:" : "",
+		__entry->quirk_stall_not_supp ? "no_stall:" : "",
+		__entry->quirk_zlp_not_supp ? "no_zlp" : "",
+		__entry->is_selfpowered ? "self-powered:" : "bus-powered:",
+		__entry->deactivated ? "deactivated:" : "activated:",
+		__entry->connected ? "connected" : "disconnected",
+		__entry->ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_frame_number,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_clear_selfpowered,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_connect,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_draw,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_disconnect,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_connect,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_disconnect,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_deactivate,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DEFINE_EVENT(udc_log_gadget, usb_gadget_activate,
+	TP_PROTO(struct usb_gadget *g, int ret),
+	TP_ARGS(g, ret)
+);
+
+DECLARE_EVENT_CLASS(udc_log_ep,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret),
+	TP_STRUCT__entry(
+		__dynamic_array(char, name, UDC_TRACE_STR_MAX)
+		__field(unsigned, maxpacket)
+		__field(unsigned, maxpacket_limit)
+		__field(unsigned, max_streams)
+		__field(unsigned, mult)
+		__field(unsigned, maxburst)
+		__field(u8, address)
+		__field(bool, claimed)
+		__field(bool, enabled)
+		__field(int, ret)
+	),
+	TP_fast_assign(
+		snprintf(__get_str(name), UDC_TRACE_STR_MAX, "%s", ep->name);
+		__entry->maxpacket = ep->maxpacket;
+		__entry->maxpacket_limit = ep->maxpacket_limit;
+		__entry->max_streams = ep->max_streams;
+		__entry->mult = ep->mult;
+		__entry->maxburst = ep->maxburst;
+		__entry->address = ep->address,
+		__entry->claimed = ep->claimed;
+		__entry->enabled = ep->enabled;
+		__entry->ret = ret;
+	),
+	TP_printk("%s: mps %d/%d streams %d mult %d burst %d addr %02x %s%s --> %d",
+		__get_str(name), __entry->maxpacket, __entry->maxpacket_limit,
+		__entry->max_streams, __entry->mult, __entry->maxburst,
+		__entry->address, __entry->claimed ? "claimed:" : "released:",
+		__entry->enabled ? "enabled" : "disabled", ret)
+);
+
+DEFINE_EVENT(udc_log_ep, usb_ep_set_maxpacket_limit,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret)
+);
+
+DEFINE_EVENT(udc_log_ep, usb_ep_enable,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret)
+);
+
+DEFINE_EVENT(udc_log_ep, usb_ep_disable,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret)
+);
+
+DEFINE_EVENT(udc_log_ep, usb_ep_set_halt,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret)
+);
+
+DEFINE_EVENT(udc_log_ep, usb_ep_clear_halt,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret)
+);
+
+DEFINE_EVENT(udc_log_ep, usb_ep_set_wedge,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret)
+);
+
+DEFINE_EVENT(udc_log_ep, usb_ep_fifo_status,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret)
+);
+
+DEFINE_EVENT(udc_log_ep, usb_ep_fifo_flush,
+	TP_PROTO(struct usb_ep *ep, int ret),
+	TP_ARGS(ep, ret)
+);
+
+DECLARE_EVENT_CLASS(udc_log_req,
+	TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
+	TP_ARGS(ep, req, ret),
+	TP_STRUCT__entry(
+		__dynamic_array(char, name, UDC_TRACE_STR_MAX)
+		__field(unsigned, length)
+		__field(unsigned, actual)
+		__field(unsigned, num_sgs)
+		__field(unsigned, num_mapped_sgs)
+		__field(unsigned, stream_id)
+		__field(unsigned, no_interrupt)
+		__field(unsigned, zero)
+		__field(unsigned, short_not_ok)
+		__field(int, status)
+		__field(int, ret)
+	),
+	TP_fast_assign(
+		snprintf(__get_str(name), UDC_TRACE_STR_MAX, "%s", ep->name);
+		__entry->length = req->length;
+		__entry->actual = req->actual;
+		__entry->num_sgs = req->num_sgs;
+		__entry->num_mapped_sgs = req->num_mapped_sgs;
+		__entry->stream_id = req->stream_id;
+		__entry->no_interrupt = req->no_interrupt;
+		__entry->zero = req->zero;
+		__entry->short_not_ok = req->short_not_ok;
+		__entry->status = req->status;
+		__entry->ret = ret;
+	),
+	TP_printk("%s: length %d/%d sgs %d/%d stream %d %s%s%s status %d --> %d",
+		__get_str(name), __entry->actual, __entry->length,
+		__entry->num_mapped_sgs, __entry->num_sgs, __entry->stream_id,
+		__entry->zero ? "Z" : "z",
+		__entry->short_not_ok ? "S" : "s",
+		__entry->no_interrupt ? "i" : "I",
+		__entry->status, __entry->ret
+	)
+);
+
+DEFINE_EVENT(udc_log_req, usb_ep_alloc_request,
+	TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
+	TP_ARGS(ep, req, ret)
+);
+
+DEFINE_EVENT(udc_log_req, usb_ep_free_request,
+	TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
+	TP_ARGS(ep, req, ret)
+);
+
+DEFINE_EVENT(udc_log_req, usb_ep_queue,
+	TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
+	TP_ARGS(ep, req, ret)
+);
+
+DEFINE_EVENT(udc_log_req, usb_ep_dequeue,
+	TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
+	TP_ARGS(ep, req, ret)
+);
+
+DEFINE_EVENT(udc_log_req, usb_gadget_giveback_request,
+	TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
+	TP_ARGS(ep, req, ret)
+);
+
+#endif /* __UDC_TRACE_H */
+
+/* this part has to be here */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
deleted file mode 100644
index e1b2dcebdc2e..000000000000
--- a/drivers/usb/gadget/udc/udc-core.c
+++ /dev/null
@@ -1,800 +0,0 @@
-/**
- * udc.c - Core UDC Framework
- *
- * Copyright (C) 2010 Texas Instruments
- * Author: Felipe Balbi <balbi@ti.com>
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2  of
- * the License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/err.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-
-#include <linux/usb/ch9.h>
-#include <linux/usb/gadget.h>
-#include <linux/usb.h>
-
-/**
- * struct usb_udc - describes one usb device controller
- * @driver - the gadget driver pointer. For use by the class code
- * @dev - the child device to the actual controller
- * @gadget - the gadget. For use by the class code
- * @list - for use by the udc class driver
- * @vbus - for udcs who care about vbus status, this value is real vbus status;
- * for udcs who do not care about vbus status, this value is always true
- *
- * This represents the internal data structure which is used by the UDC-class
- * to hold information about udc driver and gadget together.
- */
-struct usb_udc {
-	struct usb_gadget_driver	*driver;
-	struct usb_gadget		*gadget;
-	struct device			dev;
-	struct list_head		list;
-	bool				vbus;
-};
-
-static struct class *udc_class;
-static LIST_HEAD(udc_list);
-static LIST_HEAD(gadget_driver_pending_list);
-static DEFINE_MUTEX(udc_lock);
-
-static int udc_bind_to_driver(struct usb_udc *udc,
-		struct usb_gadget_driver *driver);
-
-/* ------------------------------------------------------------------------- */
-
-#ifdef	CONFIG_HAS_DMA
-
-int usb_gadget_map_request_by_dev(struct device *dev,
-		struct usb_request *req, int is_in)
-{
-	if (req->length == 0)
-		return 0;
-
-	if (req->num_sgs) {
-		int     mapped;
-
-		mapped = dma_map_sg(dev, req->sg, req->num_sgs,
-				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		if (mapped == 0) {
-			dev_err(dev, "failed to map SGs\n");
-			return -EFAULT;
-		}
-
-		req->num_mapped_sgs = mapped;
-	} else {
-		req->dma = dma_map_single(dev, req->buf, req->length,
-				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-		if (dma_mapping_error(dev, req->dma)) {
-			dev_err(dev, "failed to map buffer\n");
-			return -EFAULT;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(usb_gadget_map_request_by_dev);
-
-int usb_gadget_map_request(struct usb_gadget *gadget,
-		struct usb_request *req, int is_in)
-{
-	return usb_gadget_map_request_by_dev(gadget->dev.parent, req, is_in);
-}
-EXPORT_SYMBOL_GPL(usb_gadget_map_request);
-
-void usb_gadget_unmap_request_by_dev(struct device *dev,
-		struct usb_request *req, int is_in)
-{
-	if (req->length == 0)
-		return;
-
-	if (req->num_mapped_sgs) {
-		dma_unmap_sg(dev, req->sg, req->num_mapped_sgs,
-				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-		req->num_mapped_sgs = 0;
-	} else {
-		dma_unmap_single(dev, req->dma, req->length,
-				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-	}
-}
-EXPORT_SYMBOL_GPL(usb_gadget_unmap_request_by_dev);
-
-void usb_gadget_unmap_request(struct usb_gadget *gadget,
-		struct usb_request *req, int is_in)
-{
-	usb_gadget_unmap_request_by_dev(gadget->dev.parent, req, is_in);
-}
-EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
-
-#endif	/* CONFIG_HAS_DMA */
-
-/* ------------------------------------------------------------------------- */
-
-/**
- * usb_gadget_giveback_request - give the request back to the gadget layer
- * Context: in_interrupt()
- *
- * This is called by device controller drivers in order to return the
- * completed request back to the gadget layer.
- */
-void usb_gadget_giveback_request(struct usb_ep *ep,
-		struct usb_request *req)
-{
-	if (likely(req->status == 0))
-		usb_led_activity(USB_LED_EVENT_GADGET);
-
-	req->complete(ep, req);
-}
-EXPORT_SYMBOL_GPL(usb_gadget_giveback_request);
-
-/* ------------------------------------------------------------------------- */
-
-/**
- * gadget_find_ep_by_name - returns ep whose name is the same as sting passed
- *	in second parameter or NULL if searched endpoint not found
- * @g: controller to check for quirk
- * @name: name of searched endpoint
- */
-struct usb_ep *gadget_find_ep_by_name(struct usb_gadget *g, const char *name)
-{
-	struct usb_ep *ep;
-
-	gadget_for_each_ep(ep, g) {
-		if (!strcmp(ep->name, name))
-			return ep;
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(gadget_find_ep_by_name);
-
-/* ------------------------------------------------------------------------- */
-
-int usb_gadget_ep_match_desc(struct usb_gadget *gadget,
-		struct usb_ep *ep, struct usb_endpoint_descriptor *desc,
-		struct usb_ss_ep_comp_descriptor *ep_comp)
-{
-	u8		type;
-	u16		max;
-	int		num_req_streams = 0;
-
-	/* endpoint already claimed? */
-	if (ep->claimed)
-		return 0;
-
-	type = usb_endpoint_type(desc);
-	max = 0x7ff & usb_endpoint_maxp(desc);
-
-	if (usb_endpoint_dir_in(desc) && !ep->caps.dir_in)
-		return 0;
-	if (usb_endpoint_dir_out(desc) && !ep->caps.dir_out)
-		return 0;
-
-	if (max > ep->maxpacket_limit)
-		return 0;
-
-	/* "high bandwidth" works only at high speed */
-	if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11))
-		return 0;
-
-	switch (type) {
-	case USB_ENDPOINT_XFER_CONTROL:
-		/* only support ep0 for portable CONTROL traffic */
-		return 0;
-	case USB_ENDPOINT_XFER_ISOC:
-		if (!ep->caps.type_iso)
-			return 0;
-		/* ISO:  limit 1023 bytes full speed, 1024 high/super speed */
-		if (!gadget_is_dualspeed(gadget) && max > 1023)
-			return 0;
-		break;
-	case USB_ENDPOINT_XFER_BULK:
-		if (!ep->caps.type_bulk)
-			return 0;
-		if (ep_comp && gadget_is_superspeed(gadget)) {
-			/* Get the number of required streams from the
-			 * EP companion descriptor and see if the EP
-			 * matches it
-			 */
-			num_req_streams = ep_comp->bmAttributes & 0x1f;
-			if (num_req_streams > ep->max_streams)
-				return 0;
-		}
-		break;
-	case USB_ENDPOINT_XFER_INT:
-		/* Bulk endpoints handle interrupt transfers,
-		 * except the toggle-quirky iso-synch kind
-		 */
-		if (!ep->caps.type_int && !ep->caps.type_bulk)
-			return 0;
-		/* INT:  limit 64 bytes full speed, 1024 high/super speed */
-		if (!gadget_is_dualspeed(gadget) && max > 64)
-			return 0;
-		break;
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(usb_gadget_ep_match_desc);
-
-/* ------------------------------------------------------------------------- */
-
-static void usb_gadget_state_work(struct work_struct *work)
-{
-	struct usb_gadget *gadget = work_to_gadget(work);
-	struct usb_udc *udc = gadget->udc;
-
-	if (udc)
-		sysfs_notify(&udc->dev.kobj, NULL, "state");
-}
-
-void usb_gadget_set_state(struct usb_gadget *gadget,
-		enum usb_device_state state)
-{
-	gadget->state = state;
-	schedule_work(&gadget->work);
-}
-EXPORT_SYMBOL_GPL(usb_gadget_set_state);
-
-/* ------------------------------------------------------------------------- */
-
-static void usb_udc_connect_control(struct usb_udc *udc)
-{
-	if (udc->vbus)
-		usb_gadget_connect(udc->gadget);
-	else
-		usb_gadget_disconnect(udc->gadget);
-}
-
-/**
- * usb_udc_vbus_handler - updates the udc core vbus status, and try to
- * connect or disconnect gadget
- * @gadget: The gadget which vbus change occurs
- * @status: The vbus status
- *
- * The udc driver calls it when it wants to connect or disconnect gadget
- * according to vbus status.
- */
-void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status)
-{
-	struct usb_udc *udc = gadget->udc;
-
-	if (udc) {
-		udc->vbus = status;
-		usb_udc_connect_control(udc);
-	}
-}
-EXPORT_SYMBOL_GPL(usb_udc_vbus_handler);
-
-/**
- * usb_gadget_udc_reset - notifies the udc core that bus reset occurs
- * @gadget: The gadget which bus reset occurs
- * @driver: The gadget driver we want to notify
- *
- * If the udc driver has bus reset handler, it needs to call this when the bus
- * reset occurs, it notifies the gadget driver that the bus reset occurs as
- * well as updates gadget state.
- */
-void usb_gadget_udc_reset(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
-{
-	driver->reset(gadget);
-	usb_gadget_set_state(gadget, USB_STATE_DEFAULT);
-}
-EXPORT_SYMBOL_GPL(usb_gadget_udc_reset);
-
-/**
- * usb_gadget_udc_start - tells usb device controller to start up
- * @udc: The UDC to be started
- *
- * This call is issued by the UDC Class driver when it's about
- * to register a gadget driver to the device controller, before
- * calling gadget driver's bind() method.
- *
- * It allows the controller to be powered off until strictly
- * necessary to have it powered on.
- *
- * Returns zero on success, else negative errno.
- */
-static inline int usb_gadget_udc_start(struct usb_udc *udc)
-{
-	return udc->gadget->ops->udc_start(udc->gadget, udc->driver);
-}
-
-/**
- * usb_gadget_udc_stop - tells usb device controller we don't need it anymore
- * @gadget: The device we want to stop activity
- * @driver: The driver to unbind from @gadget
- *
- * This call is issued by the UDC Class driver after calling
- * gadget driver's unbind() method.
- *
- * The details are implementation specific, but it can go as
- * far as powering off UDC completely and disable its data
- * line pullups.
- */
-static inline void usb_gadget_udc_stop(struct usb_udc *udc)
-{
-	udc->gadget->ops->udc_stop(udc->gadget);
-}
-
-/**
- * usb_udc_release - release the usb_udc struct
- * @dev: the dev member within usb_udc
- *
- * This is called by driver's core in order to free memory once the last
- * reference is released.
- */
-static void usb_udc_release(struct device *dev)
-{
-	struct usb_udc *udc;
-
-	udc = container_of(dev, struct usb_udc, dev);
-	dev_dbg(dev, "releasing '%s'\n", dev_name(dev));
-	kfree(udc);
-}
-
-static const struct attribute_group *usb_udc_attr_groups[];
-
-static void usb_udc_nop_release(struct device *dev)
-{
-	dev_vdbg(dev, "%s\n", __func__);
-}
-
-/**
- * usb_add_gadget_udc_release - adds a new gadget to the udc class driver list
- * @parent: the parent device to this udc. Usually the controller driver's
- * device.
- * @gadget: the gadget to be added to the list.
- * @release: a gadget release function.
- *
- * Returns zero on success, negative errno otherwise.
- */
-int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
-		void (*release)(struct device *dev))
-{
-	struct usb_udc		*udc;
-	struct usb_gadget_driver *driver;
-	int			ret = -ENOMEM;
-
-	udc = kzalloc(sizeof(*udc), GFP_KERNEL);
-	if (!udc)
-		goto err1;
-
-	dev_set_name(&gadget->dev, "gadget");
-	INIT_WORK(&gadget->work, usb_gadget_state_work);
-	gadget->dev.parent = parent;
-
-	if (release)
-		gadget->dev.release = release;
-	else
-		gadget->dev.release = usb_udc_nop_release;
-
-	ret = device_register(&gadget->dev);
-	if (ret)
-		goto err2;
-
-	device_initialize(&udc->dev);
-	udc->dev.release = usb_udc_release;
-	udc->dev.class = udc_class;
-	udc->dev.groups = usb_udc_attr_groups;
-	udc->dev.parent = parent;
-	ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
-	if (ret)
-		goto err3;
-
-	udc->gadget = gadget;
-	gadget->udc = udc;
-
-	mutex_lock(&udc_lock);
-	list_add_tail(&udc->list, &udc_list);
-
-	ret = device_add(&udc->dev);
-	if (ret)
-		goto err4;
-
-	usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
-	udc->vbus = true;
-
-	/* pick up one of pending gadget drivers */
-	list_for_each_entry(driver, &gadget_driver_pending_list, pending) {
-		if (!driver->udc_name || strcmp(driver->udc_name,
-						dev_name(&udc->dev)) == 0) {
-			ret = udc_bind_to_driver(udc, driver);
-			if (ret != -EPROBE_DEFER)
-				list_del(&driver->pending);
-			if (ret)
-				goto err4;
-			break;
-		}
-	}
-
-	mutex_unlock(&udc_lock);
-
-	return 0;
-
-err4:
-	list_del(&udc->list);
-	mutex_unlock(&udc_lock);
-
-err3:
-	put_device(&udc->dev);
-	device_del(&gadget->dev);
-
-err2:
-	put_device(&gadget->dev);
-	kfree(udc);
-
-err1:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(usb_add_gadget_udc_release);
-
-/**
- * usb_get_gadget_udc_name - get the name of the first UDC controller
- * This functions returns the name of the first UDC controller in the system.
- * Please note that this interface is usefull only for legacy drivers which
- * assume that there is only one UDC controller in the system and they need to
- * get its name before initialization. There is no guarantee that the UDC
- * of the returned name will be still available, when gadget driver registers
- * itself.
- *
- * Returns pointer to string with UDC controller name on success, NULL
- * otherwise. Caller should kfree() returned string.
- */
-char *usb_get_gadget_udc_name(void)
-{
-	struct usb_udc *udc;
-	char *name = NULL;
-
-	/* For now we take the first available UDC */
-	mutex_lock(&udc_lock);
-	list_for_each_entry(udc, &udc_list, list) {
-		if (!udc->driver) {
-			name = kstrdup(udc->gadget->name, GFP_KERNEL);
-			break;
-		}
-	}
-	mutex_unlock(&udc_lock);
-	return name;
-}
-EXPORT_SYMBOL_GPL(usb_get_gadget_udc_name);
-
-/**
- * usb_add_gadget_udc - adds a new gadget to the udc class driver list
- * @parent: the parent device to this udc. Usually the controller
- * driver's device.
- * @gadget: the gadget to be added to the list
- *
- * Returns zero on success, negative errno otherwise.
- */
-int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget)
-{
-	return usb_add_gadget_udc_release(parent, gadget, NULL);
-}
-EXPORT_SYMBOL_GPL(usb_add_gadget_udc);
-
-static void usb_gadget_remove_driver(struct usb_udc *udc)
-{
-	dev_dbg(&udc->dev, "unregistering UDC driver [%s]\n",
-			udc->driver->function);
-
-	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
-
-	usb_gadget_disconnect(udc->gadget);
-	udc->driver->disconnect(udc->gadget);
-	udc->driver->unbind(udc->gadget);
-	usb_gadget_udc_stop(udc);
-
-	udc->driver = NULL;
-	udc->dev.driver = NULL;
-	udc->gadget->dev.driver = NULL;
-}
-
-/**
- * usb_del_gadget_udc - deletes @udc from udc_list
- * @gadget: the gadget to be removed.
- *
- * This, will call usb_gadget_unregister_driver() if
- * the @udc is still busy.
- */
-void usb_del_gadget_udc(struct usb_gadget *gadget)
-{
-	struct usb_udc *udc = gadget->udc;
-
-	if (!udc)
-		return;
-
-	dev_vdbg(gadget->dev.parent, "unregistering gadget\n");
-
-	mutex_lock(&udc_lock);
-	list_del(&udc->list);
-
-	if (udc->driver) {
-		struct usb_gadget_driver *driver = udc->driver;
-
-		usb_gadget_remove_driver(udc);
-		list_add(&driver->pending, &gadget_driver_pending_list);
-	}
-	mutex_unlock(&udc_lock);
-
-	kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
-	flush_work(&gadget->work);
-	device_unregister(&udc->dev);
-	device_unregister(&gadget->dev);
-}
-EXPORT_SYMBOL_GPL(usb_del_gadget_udc);
-
-/* ------------------------------------------------------------------------- */
-
-static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *driver)
-{
-	int ret;
-
-	dev_dbg(&udc->dev, "registering UDC driver [%s]\n",
-			driver->function);
-
-	udc->driver = driver;
-	udc->dev.driver = &driver->driver;
-	udc->gadget->dev.driver = &driver->driver;
-
-	ret = driver->bind(udc->gadget, driver);
-	if (ret)
-		goto err1;
-	ret = usb_gadget_udc_start(udc);
-	if (ret) {
-		driver->unbind(udc->gadget);
-		goto err1;
-	}
-	usb_udc_connect_control(udc);
-
-	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
-	return 0;
-err1:
-	if (ret != -EISNAM)
-		dev_err(&udc->dev, "failed to start %s: %d\n",
-			udc->driver->function, ret);
-	udc->driver = NULL;
-	udc->dev.driver = NULL;
-	udc->gadget->dev.driver = NULL;
-	return ret;
-}
-
-int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
-{
-	struct usb_udc		*udc = NULL;
-	int			ret = -ENODEV;
-
-	if (!driver || !driver->bind || !driver->setup)
-		return -EINVAL;
-
-	mutex_lock(&udc_lock);
-	if (driver->udc_name) {
-		list_for_each_entry(udc, &udc_list, list) {
-			ret = strcmp(driver->udc_name, dev_name(&udc->dev));
-			if (!ret)
-				break;
-		}
-		if (!ret && !udc->driver)
-			goto found;
-	} else {
-		list_for_each_entry(udc, &udc_list, list) {
-			/* For now we take the first one */
-			if (!udc->driver)
-				goto found;
-		}
-	}
-
-	if (!driver->match_existing_only) {
-		list_add_tail(&driver->pending, &gadget_driver_pending_list);
-		pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n",
-			driver->function);
-		ret = 0;
-	}
-
-	mutex_unlock(&udc_lock);
-	return ret;
-found:
-	ret = udc_bind_to_driver(udc, driver);
-	mutex_unlock(&udc_lock);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(usb_gadget_probe_driver);
-
-int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
-{
-	struct usb_udc		*udc = NULL;
-	int			ret = -ENODEV;
-
-	if (!driver || !driver->unbind)
-		return -EINVAL;
-
-	mutex_lock(&udc_lock);
-	list_for_each_entry(udc, &udc_list, list)
-		if (udc->driver == driver) {
-			usb_gadget_remove_driver(udc);
-			usb_gadget_set_state(udc->gadget,
-					USB_STATE_NOTATTACHED);
-			ret = 0;
-			break;
-		}
-
-	if (ret) {
-		list_del(&driver->pending);
-		ret = 0;
-	}
-	mutex_unlock(&udc_lock);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(usb_gadget_unregister_driver);
-
-/* ------------------------------------------------------------------------- */
-
-static ssize_t usb_udc_srp_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t n)
-{
-	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
-
-	if (sysfs_streq(buf, "1"))
-		usb_gadget_wakeup(udc->gadget);
-
-	return n;
-}
-static DEVICE_ATTR(srp, S_IWUSR, NULL, usb_udc_srp_store);
-
-static ssize_t usb_udc_softconn_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t n)
-{
-	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
-
-	if (!udc->driver) {
-		dev_err(dev, "soft-connect without a gadget driver\n");
-		return -EOPNOTSUPP;
-	}
-
-	if (sysfs_streq(buf, "connect")) {
-		usb_gadget_udc_start(udc);
-		usb_gadget_connect(udc->gadget);
-	} else if (sysfs_streq(buf, "disconnect")) {
-		usb_gadget_disconnect(udc->gadget);
-		udc->driver->disconnect(udc->gadget);
-		usb_gadget_udc_stop(udc);
-	} else {
-		dev_err(dev, "unsupported command '%s'\n", buf);
-		return -EINVAL;
-	}
-
-	return n;
-}
-static DEVICE_ATTR(soft_connect, S_IWUSR, NULL, usb_udc_softconn_store);
-
-static ssize_t state_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
-	struct usb_gadget	*gadget = udc->gadget;
-
-	return sprintf(buf, "%s\n", usb_state_string(gadget->state));
-}
-static DEVICE_ATTR_RO(state);
-
-#define USB_UDC_SPEED_ATTR(name, param)					\
-ssize_t name##_show(struct device *dev,					\
-		struct device_attribute *attr, char *buf)		\
-{									\
-	struct usb_udc *udc = container_of(dev, struct usb_udc, dev);	\
-	return snprintf(buf, PAGE_SIZE, "%s\n",				\
-			usb_speed_string(udc->gadget->param));		\
-}									\
-static DEVICE_ATTR_RO(name)
-
-static USB_UDC_SPEED_ATTR(current_speed, speed);
-static USB_UDC_SPEED_ATTR(maximum_speed, max_speed);
-
-#define USB_UDC_ATTR(name)					\
-ssize_t name##_show(struct device *dev,				\
-		struct device_attribute *attr, char *buf)	\
-{								\
-	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev); \
-	struct usb_gadget	*gadget = udc->gadget;		\
-								\
-	return snprintf(buf, PAGE_SIZE, "%d\n", gadget->name);	\
-}								\
-static DEVICE_ATTR_RO(name)
-
-static USB_UDC_ATTR(is_otg);
-static USB_UDC_ATTR(is_a_peripheral);
-static USB_UDC_ATTR(b_hnp_enable);
-static USB_UDC_ATTR(a_hnp_support);
-static USB_UDC_ATTR(a_alt_hnp_support);
-static USB_UDC_ATTR(is_selfpowered);
-
-static struct attribute *usb_udc_attrs[] = {
-	&dev_attr_srp.attr,
-	&dev_attr_soft_connect.attr,
-	&dev_attr_state.attr,
-	&dev_attr_current_speed.attr,
-	&dev_attr_maximum_speed.attr,
-
-	&dev_attr_is_otg.attr,
-	&dev_attr_is_a_peripheral.attr,
-	&dev_attr_b_hnp_enable.attr,
-	&dev_attr_a_hnp_support.attr,
-	&dev_attr_a_alt_hnp_support.attr,
-	&dev_attr_is_selfpowered.attr,
-	NULL,
-};
-
-static const struct attribute_group usb_udc_attr_group = {
-	.attrs = usb_udc_attrs,
-};
-
-static const struct attribute_group *usb_udc_attr_groups[] = {
-	&usb_udc_attr_group,
-	NULL,
-};
-
-static int usb_udc_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
-	int			ret;
-
-	ret = add_uevent_var(env, "USB_UDC_NAME=%s", udc->gadget->name);
-	if (ret) {
-		dev_err(dev, "failed to add uevent USB_UDC_NAME\n");
-		return ret;
-	}
-
-	if (udc->driver) {
-		ret = add_uevent_var(env, "USB_UDC_DRIVER=%s",
-				udc->driver->function);
-		if (ret) {
-			dev_err(dev, "failed to add uevent USB_UDC_DRIVER\n");
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int __init usb_udc_init(void)
-{
-	udc_class = class_create(THIS_MODULE, "udc");
-	if (IS_ERR(udc_class)) {
-		pr_err("failed to create udc class --> %ld\n",
-				PTR_ERR(udc_class));
-		return PTR_ERR(udc_class);
-	}
-
-	udc_class->dev_uevent = usb_udc_uevent;
-	return 0;
-}
-subsys_initcall(usb_udc_init);
-
-static void __exit usb_udc_exit(void)
-{
-	class_destroy(udc_class);
-}
-module_exit(usb_udc_exit);
-
-MODULE_DESCRIPTION("UDC Framework");
-MODULE_AUTHOR("Felipe Balbi <balbi@ti.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
index 1cbb0ac6b182..f8bf290f1894 100644
--- a/drivers/usb/gadget/udc/udc-xilinx.c
+++ b/drivers/usb/gadget/udc/udc-xilinx.c
@@ -2055,7 +2055,6 @@ static int xudc_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	struct resource *res;
 	struct xusb_udc *udc;
-	struct xusb_ep *ep0;
 	int irq;
 	int ret;
 	u32 ier;
@@ -2119,8 +2118,6 @@ static int xudc_probe(struct platform_device *pdev)
 
 	xudc_eps_init(udc);
 
-	ep0 = &udc->ep[0];
-
 	/* Set device address to 0.*/
 	udc->write_fn(udc->addr, XUSB_ADDRESS_OFFSET, 0);
 
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index d8f5674809e8..2e710a4cca52 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -180,7 +180,7 @@ config USB_EHCI_MXC
 config USB_EHCI_HCD_OMAP
 	tristate "EHCI support for OMAP3 and later chips"
 	depends on ARCH_OMAP
-	select NOP_USB_XCEIV
+	depends on NOP_USB_XCEIV
 	default y
 	---help---
 	  Enables support for the on-chip EHCI controller on
diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
index 1757ebb471b6..6816b8c371d0 100644
--- a/drivers/usb/host/ehci-platform.c
+++ b/drivers/usb/host/ehci-platform.c
@@ -39,11 +39,12 @@
 
 #define DRIVER_DESC "EHCI generic platform driver"
 #define EHCI_MAX_CLKS 3
+#define EHCI_MAX_RSTS 3
 #define hcd_to_ehci_priv(h) ((struct ehci_platform_priv *)hcd_to_ehci(h)->priv)
 
 struct ehci_platform_priv {
 	struct clk *clks[EHCI_MAX_CLKS];
-	struct reset_control *rst;
+	struct reset_control *rsts[EHCI_MAX_RSTS];
 	struct phy **phys;
 	int num_phys;
 	bool reset_on_resume;
@@ -149,7 +150,7 @@ static int ehci_platform_probe(struct platform_device *dev)
 	struct usb_ehci_pdata *pdata = dev_get_platdata(&dev->dev);
 	struct ehci_platform_priv *priv;
 	struct ehci_hcd *ehci;
-	int err, irq, phy_num, clk = 0;
+	int err, irq, phy_num, clk = 0, rst;
 
 	if (usb_disabled())
 		return -ENODEV;
@@ -234,16 +235,20 @@ static int ehci_platform_probe(struct platform_device *dev)
 		}
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, NULL);
-	if (IS_ERR(priv->rst)) {
-		err = PTR_ERR(priv->rst);
-		if (err == -EPROBE_DEFER)
-			goto err_put_clks;
-		priv->rst = NULL;
-	} else {
-		err = reset_control_deassert(priv->rst);
+	for (rst = 0; rst < EHCI_MAX_RSTS; rst++) {
+		priv->rsts[rst] = devm_reset_control_get_shared_by_index(
+					&dev->dev, rst);
+		if (IS_ERR(priv->rsts[rst])) {
+			err = PTR_ERR(priv->rsts[rst]);
+			if (err == -EPROBE_DEFER)
+				goto err_reset;
+			priv->rsts[rst] = NULL;
+			break;
+		}
+
+		err = reset_control_deassert(priv->rsts[rst]);
 		if (err)
-			goto err_put_clks;
+			goto err_reset;
 	}
 
 	if (pdata->big_endian_desc)
@@ -300,8 +305,8 @@ err_power:
 	if (pdata->power_off)
 		pdata->power_off(dev);
 err_reset:
-	if (priv->rst)
-		reset_control_assert(priv->rst);
+	while (--rst >= 0)
+		reset_control_assert(priv->rsts[rst]);
 err_put_clks:
 	while (--clk >= 0)
 		clk_put(priv->clks[clk]);
@@ -319,15 +324,15 @@ static int ehci_platform_remove(struct platform_device *dev)
 	struct usb_hcd *hcd = platform_get_drvdata(dev);
 	struct usb_ehci_pdata *pdata = dev_get_platdata(&dev->dev);
 	struct ehci_platform_priv *priv = hcd_to_ehci_priv(hcd);
-	int clk;
+	int clk, rst;
 
 	usb_remove_hcd(hcd);
 
 	if (pdata->power_off)
 		pdata->power_off(dev);
 
-	if (priv->rst)
-		reset_control_assert(priv->rst);
+	for (rst = 0; rst < EHCI_MAX_RSTS && priv->rsts[rst]; rst++)
+		reset_control_assert(priv->rsts[rst]);
 
 	for (clk = 0; clk < EHCI_MAX_CLKS && priv->clks[clk]; clk++)
 		clk_put(priv->clks[clk]);
diff --git a/drivers/usb/host/ehci-st.c b/drivers/usb/host/ehci-st.c
index a94ed677d937..be4a2788fc58 100644
--- a/drivers/usb/host/ehci-st.c
+++ b/drivers/usb/host/ehci-st.c
@@ -206,7 +206,8 @@ static int st_ehci_platform_probe(struct platform_device *dev)
 		priv->clk48 = NULL;
 	}
 
-	priv->pwr = devm_reset_control_get_optional(&dev->dev, "power");
+	priv->pwr =
+		devm_reset_control_get_optional_shared(&dev->dev, "power");
 	if (IS_ERR(priv->pwr)) {
 		err = PTR_ERR(priv->pwr);
 		if (err == -EPROBE_DEFER)
@@ -214,7 +215,8 @@ static int st_ehci_platform_probe(struct platform_device *dev)
 		priv->pwr = NULL;
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset");
+	priv->rst =
+		devm_reset_control_get_optional_shared(&dev->dev, "softreset");
 	if (IS_ERR(priv->rst)) {
 		err = PTR_ERR(priv->rst);
 		if (err == -EPROBE_DEFER)
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 0449235d4f22..1700908b84ef 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -500,7 +500,6 @@ static int ohci_init (struct ohci_hcd *ohci)
 
 	setup_timer(&ohci->io_watchdog, io_watchdog_func,
 			(unsigned long) ohci);
-	set_timer_slack(&ohci->io_watchdog, msecs_to_jiffies(20));
 
 	ohci->hcca = dma_alloc_coherent (hcd->self.controller,
 			sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL);
diff --git a/drivers/usb/host/ohci-platform.c b/drivers/usb/host/ohci-platform.c
index ae1c988da146..898b74086c12 100644
--- a/drivers/usb/host/ohci-platform.c
+++ b/drivers/usb/host/ohci-platform.c
@@ -33,11 +33,12 @@
 
 #define DRIVER_DESC "OHCI generic platform driver"
 #define OHCI_MAX_CLKS 3
+#define OHCI_MAX_RESETS 2
 #define hcd_to_ohci_priv(h) ((struct ohci_platform_priv *)hcd_to_ohci(h)->priv)
 
 struct ohci_platform_priv {
 	struct clk *clks[OHCI_MAX_CLKS];
-	struct reset_control *rst;
+	struct reset_control *resets[OHCI_MAX_RESETS];
 	struct phy **phys;
 	int num_phys;
 };
@@ -117,7 +118,7 @@ static int ohci_platform_probe(struct platform_device *dev)
 	struct usb_ohci_pdata *pdata = dev_get_platdata(&dev->dev);
 	struct ohci_platform_priv *priv;
 	struct ohci_hcd *ohci;
-	int err, irq, phy_num, clk = 0;
+	int err, irq, phy_num, clk = 0, rst = 0;
 
 	if (usb_disabled())
 		return -ENODEV;
@@ -195,19 +196,21 @@ static int ohci_platform_probe(struct platform_device *dev)
 				break;
 			}
 		}
-
-	}
-
-	priv->rst = devm_reset_control_get_optional(&dev->dev, NULL);
-	if (IS_ERR(priv->rst)) {
-		err = PTR_ERR(priv->rst);
-		if (err == -EPROBE_DEFER)
-			goto err_put_clks;
-		priv->rst = NULL;
-	} else {
-		err = reset_control_deassert(priv->rst);
-		if (err)
-			goto err_put_clks;
+		for (rst = 0; rst < OHCI_MAX_RESETS; rst++) {
+			priv->resets[rst] =
+				devm_reset_control_get_shared_by_index(
+								&dev->dev, rst);
+			if (IS_ERR(priv->resets[rst])) {
+				err = PTR_ERR(priv->resets[rst]);
+				if (err == -EPROBE_DEFER)
+					goto err_reset;
+				priv->resets[rst] = NULL;
+				break;
+			}
+			err = reset_control_deassert(priv->resets[rst]);
+			if (err)
+				goto err_reset;
+		}
 	}
 
 	if (pdata->big_endian_desc)
@@ -265,8 +268,8 @@ err_power:
 	if (pdata->power_off)
 		pdata->power_off(dev);
 err_reset:
-	if (priv->rst)
-		reset_control_assert(priv->rst);
+	while (--rst >= 0)
+		reset_control_assert(priv->resets[rst]);
 err_put_clks:
 	while (--clk >= 0)
 		clk_put(priv->clks[clk]);
@@ -284,15 +287,15 @@ static int ohci_platform_remove(struct platform_device *dev)
 	struct usb_hcd *hcd = platform_get_drvdata(dev);
 	struct usb_ohci_pdata *pdata = dev_get_platdata(&dev->dev);
 	struct ohci_platform_priv *priv = hcd_to_ohci_priv(hcd);
-	int clk;
+	int clk, rst;
 
 	usb_remove_hcd(hcd);
 
 	if (pdata->power_off)
 		pdata->power_off(dev);
 
-	if (priv->rst)
-		reset_control_assert(priv->rst);
+	for (rst = 0; rst < OHCI_MAX_RESETS && priv->resets[rst]; rst++)
+		reset_control_assert(priv->resets[rst]);
 
 	for (clk = 0; clk < OHCI_MAX_CLKS && priv->clks[clk]; clk++)
 		clk_put(priv->clks[clk]);
diff --git a/drivers/usb/host/ohci-st.c b/drivers/usb/host/ohci-st.c
index acf2eb2a5676..02816a1515a1 100644
--- a/drivers/usb/host/ohci-st.c
+++ b/drivers/usb/host/ohci-st.c
@@ -188,13 +188,15 @@ static int st_ohci_platform_probe(struct platform_device *dev)
 		priv->clk48 = NULL;
 	}
 
-	priv->pwr = devm_reset_control_get_optional(&dev->dev, "power");
+	priv->pwr =
+		devm_reset_control_get_optional_shared(&dev->dev, "power");
 	if (IS_ERR(priv->pwr)) {
 		err = PTR_ERR(priv->pwr);
 		goto err_put_clks;
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset");
+	priv->rst =
+		devm_reset_control_get_optional_shared(&dev->dev, "softreset");
 	if (IS_ERR(priv->rst)) {
 		err = PTR_ERR(priv->rst);
 		goto err_put_clks;
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index bad0d1f9a41d..6afe32381209 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -37,7 +37,9 @@
  * "All components of all Command and Transfer TRBs shall be initialized to '0'"
  */
 static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci,
-					unsigned int cycle_state, gfp_t flags)
+					       unsigned int cycle_state,
+					       unsigned int max_packet,
+					       gfp_t flags)
 {
 	struct xhci_segment *seg;
 	dma_addr_t	dma;
@@ -53,6 +55,14 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci,
 		return NULL;
 	}
 
+	if (max_packet) {
+		seg->bounce_buf = kzalloc(max_packet, flags | GFP_DMA);
+		if (!seg->bounce_buf) {
+			dma_pool_free(xhci->segment_pool, seg->trbs, dma);
+			kfree(seg);
+			return NULL;
+		}
+	}
 	/* If the cycle state is 0, set the cycle bit to 1 for all the TRBs */
 	if (cycle_state == 0) {
 		for (i = 0; i < TRBS_PER_SEGMENT; i++)
@@ -70,6 +80,7 @@ static void xhci_segment_free(struct xhci_hcd *xhci, struct xhci_segment *seg)
 		dma_pool_free(xhci->segment_pool, seg->trbs, seg->dma);
 		seg->trbs = NULL;
 	}
+	kfree(seg->bounce_buf);
 	kfree(seg);
 }
 
@@ -317,11 +328,11 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring,
 static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci,
 		struct xhci_segment **first, struct xhci_segment **last,
 		unsigned int num_segs, unsigned int cycle_state,
-		enum xhci_ring_type type, gfp_t flags)
+		enum xhci_ring_type type, unsigned int max_packet, gfp_t flags)
 {
 	struct xhci_segment *prev;
 
-	prev = xhci_segment_alloc(xhci, cycle_state, flags);
+	prev = xhci_segment_alloc(xhci, cycle_state, max_packet, flags);
 	if (!prev)
 		return -ENOMEM;
 	num_segs--;
@@ -330,7 +341,7 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci,
 	while (num_segs > 0) {
 		struct xhci_segment	*next;
 
-		next = xhci_segment_alloc(xhci, cycle_state, flags);
+		next = xhci_segment_alloc(xhci, cycle_state, max_packet, flags);
 		if (!next) {
 			prev = *first;
 			while (prev) {
@@ -360,7 +371,7 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci,
  */
 static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
 		unsigned int num_segs, unsigned int cycle_state,
-		enum xhci_ring_type type, gfp_t flags)
+		enum xhci_ring_type type, unsigned int max_packet, gfp_t flags)
 {
 	struct xhci_ring	*ring;
 	int ret;
@@ -370,13 +381,15 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
 		return NULL;
 
 	ring->num_segs = num_segs;
+	ring->bounce_buf_len = max_packet;
 	INIT_LIST_HEAD(&ring->td_list);
 	ring->type = type;
 	if (num_segs == 0)
 		return ring;
 
 	ret = xhci_alloc_segments_for_ring(xhci, &ring->first_seg,
-			&ring->last_seg, num_segs, cycle_state, type, flags);
+			&ring->last_seg, num_segs, cycle_state, type,
+			max_packet, flags);
 	if (ret)
 		goto fail;
 
@@ -470,7 +483,8 @@ int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring,
 			ring->num_segs : num_segs_needed;
 
 	ret = xhci_alloc_segments_for_ring(xhci, &first, &last,
-			num_segs, ring->cycle_state, ring->type, flags);
+			num_segs, ring->cycle_state, ring->type,
+			ring->bounce_buf_len, flags);
 	if (ret)
 		return -ENOMEM;
 
@@ -652,7 +666,8 @@ struct xhci_ring *xhci_stream_id_to_ring(
  */
 struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
 		unsigned int num_stream_ctxs,
-		unsigned int num_streams, gfp_t mem_flags)
+		unsigned int num_streams,
+		unsigned int max_packet, gfp_t mem_flags)
 {
 	struct xhci_stream_info *stream_info;
 	u32 cur_stream;
@@ -704,9 +719,11 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
 	 * and add their segment DMA addresses to the radix tree.
 	 * Stream 0 is reserved.
 	 */
+
 	for (cur_stream = 1; cur_stream < num_streams; cur_stream++) {
 		stream_info->stream_rings[cur_stream] =
-			xhci_ring_alloc(xhci, 2, 1, TYPE_STREAM, mem_flags);
+			xhci_ring_alloc(xhci, 2, 1, TYPE_STREAM, max_packet,
+					mem_flags);
 		cur_ring = stream_info->stream_rings[cur_stream];
 		if (!cur_ring)
 			goto cleanup_rings;
@@ -1003,7 +1020,7 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
 	}
 
 	/* Allocate endpoint 0 ring */
-	dev->eps[0].ring = xhci_ring_alloc(xhci, 2, 1, TYPE_CTRL, flags);
+	dev->eps[0].ring = xhci_ring_alloc(xhci, 2, 1, TYPE_CTRL, 0, flags);
 	if (!dev->eps[0].ring)
 		goto fail;
 
@@ -1434,22 +1451,6 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
 		return -EINVAL;
 
 	ring_type = usb_endpoint_type(&ep->desc);
-	/* Set up the endpoint ring */
-	virt_dev->eps[ep_index].new_ring =
-		xhci_ring_alloc(xhci, 2, 1, ring_type, mem_flags);
-	if (!virt_dev->eps[ep_index].new_ring) {
-		/* Attempt to use the ring cache */
-		if (virt_dev->num_rings_cached == 0)
-			return -ENOMEM;
-		virt_dev->num_rings_cached--;
-		virt_dev->eps[ep_index].new_ring =
-			virt_dev->ring_cache[virt_dev->num_rings_cached];
-		virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
-		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
-					1, ring_type);
-	}
-	virt_dev->eps[ep_index].skip = false;
-	ep_ring = virt_dev->eps[ep_index].new_ring;
 
 	/*
 	 * Get values to fill the endpoint context, mostly from ep descriptor.
@@ -1479,6 +1480,23 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
 	if ((xhci->hci_version > 0x100) && HCC2_LEC(xhci->hcc_params2))
 		mult = 0;
 
+	/* Set up the endpoint ring */
+	virt_dev->eps[ep_index].new_ring =
+		xhci_ring_alloc(xhci, 2, 1, ring_type, max_packet, mem_flags);
+	if (!virt_dev->eps[ep_index].new_ring) {
+		/* Attempt to use the ring cache */
+		if (virt_dev->num_rings_cached == 0)
+			return -ENOMEM;
+		virt_dev->num_rings_cached--;
+		virt_dev->eps[ep_index].new_ring =
+			virt_dev->ring_cache[virt_dev->num_rings_cached];
+		virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
+		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
+					1, ring_type);
+	}
+	virt_dev->eps[ep_index].skip = false;
+	ep_ring = virt_dev->eps[ep_index].new_ring;
+
 	/* Fill the endpoint context */
 	ep_ctx->ep_info = cpu_to_le32(EP_MAX_ESIT_PAYLOAD_HI(max_esit_payload) |
 				      EP_INTERVAL(interval) |
@@ -2409,7 +2427,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
 		goto fail;
 
 	/* Set up the command ring to have one segments for now. */
-	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, 1, TYPE_COMMAND, flags);
+	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, 1, TYPE_COMMAND, 0, flags);
 	if (!xhci->cmd_ring)
 		goto fail;
 	xhci_dbg_trace(xhci, trace_xhci_dbg_init,
@@ -2454,7 +2472,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
 	 */
 	xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Allocating event ring");
 	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, 1, TYPE_EVENT,
-						flags);
+					0, flags);
 	if (!xhci->event_ring)
 		goto fail;
 	if (xhci_check_trb_in_td_math(xhci) < 0)
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 1f3f981fe7f8..ed56bf9ed885 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -18,7 +18,6 @@
 #include <linux/platform_device.h>
 #include <linux/usb/phy.h>
 #include <linux/slab.h>
-#include <linux/usb/xhci_pdriver.h>
 #include <linux/acpi.h>
 
 #include "xhci.h"
@@ -138,8 +137,6 @@ MODULE_DEVICE_TABLE(of, usb_xhci_of_match);
 
 static int xhci_plat_probe(struct platform_device *pdev)
 {
-	struct device_node	*node = pdev->dev.of_node;
-	struct usb_xhci_pdata	*pdata = dev_get_platdata(&pdev->dev);
 	const struct of_device_id *match;
 	const struct hc_driver	*driver;
 	struct xhci_hcd		*xhci;
@@ -202,7 +199,7 @@ static int xhci_plat_probe(struct platform_device *pdev)
 	}
 
 	xhci = hcd_to_xhci(hcd);
-	match = of_match_node(usb_xhci_of_match, node);
+	match = of_match_node(usb_xhci_of_match, pdev->dev.of_node);
 	if (match) {
 		const struct xhci_plat_priv *priv_match = match->data;
 		struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd);
@@ -223,8 +220,7 @@ static int xhci_plat_probe(struct platform_device *pdev)
 		goto disable_clk;
 	}
 
-	if ((node && of_property_read_bool(node, "usb3-lpm-capable")) ||
-			(pdata && pdata->usb3_lpm_capable))
+	if (device_property_read_bool(&pdev->dev, "usb3-lpm-capable"))
 		xhci->quirks |= XHCI_LPM_SUPPORT;
 
 	if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index d7d502578d79..918e0c739b79 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -66,6 +66,7 @@
 
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <linux/dma-mapping.h>
 #include "xhci.h"
 #include "xhci-trace.h"
 #include "xhci-mtk.h"
@@ -88,36 +89,25 @@ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg,
 	return seg->dma + (segment_offset * sizeof(*trb));
 }
 
-/* Does this link TRB point to the first segment in a ring,
- * or was the previous TRB the last TRB on the last segment in the ERST?
- */
-static bool last_trb_on_last_seg(struct xhci_hcd *xhci, struct xhci_ring *ring,
-		struct xhci_segment *seg, union xhci_trb *trb)
+static bool trb_is_link(union xhci_trb *trb)
 {
-	if (ring == xhci->event_ring)
-		return (trb == &seg->trbs[TRBS_PER_SEGMENT]) &&
-			(seg->next == xhci->event_ring->first_seg);
-	else
-		return le32_to_cpu(trb->link.control) & LINK_TOGGLE;
+	return TRB_TYPE_LINK_LE32(trb->link.control);
 }
 
-/* Is this TRB a link TRB or was the last TRB the last TRB in this event ring
- * segment?  I.e. would the updated event TRB pointer step off the end of the
- * event seg?
- */
-static int last_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
-		struct xhci_segment *seg, union xhci_trb *trb)
+static bool last_trb_on_seg(struct xhci_segment *seg, union xhci_trb *trb)
 {
-	if (ring == xhci->event_ring)
-		return trb == &seg->trbs[TRBS_PER_SEGMENT];
-	else
-		return TRB_TYPE_LINK_LE32(trb->link.control);
+	return trb == &seg->trbs[TRBS_PER_SEGMENT - 1];
 }
 
-static int enqueue_is_link_trb(struct xhci_ring *ring)
+static bool last_trb_on_ring(struct xhci_ring *ring,
+			struct xhci_segment *seg, union xhci_trb *trb)
 {
-	struct xhci_link_trb *link = &ring->enqueue->link;
-	return TRB_TYPE_LINK_LE32(link->control);
+	return last_trb_on_seg(seg, trb) && (seg->next == ring->first_seg);
+}
+
+static bool link_trb_toggles_cycle(union xhci_trb *trb)
+{
+	return le32_to_cpu(trb->link.control) & LINK_TOGGLE;
 }
 
 /* Updates trb to point to the next TRB in the ring, and updates seg if the next
@@ -129,7 +119,7 @@ static void next_trb(struct xhci_hcd *xhci,
 		struct xhci_segment **seg,
 		union xhci_trb **trb)
 {
-	if (last_trb(xhci, ring, *seg, *trb)) {
+	if (trb_is_link(*trb)) {
 		*seg = (*seg)->next;
 		*trb = ((*seg)->trbs);
 	} else {
@@ -145,32 +135,29 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring)
 {
 	ring->deq_updates++;
 
-	/*
-	 * If this is not event ring, and the dequeue pointer
-	 * is not on a link TRB, there is one more usable TRB
-	 */
-	if (ring->type != TYPE_EVENT &&
-			!last_trb(xhci, ring, ring->deq_seg, ring->dequeue))
-		ring->num_trbs_free++;
-
-	do {
-		/*
-		 * Update the dequeue pointer further if that was a link TRB or
-		 * we're at the end of an event ring segment (which doesn't have
-		 * link TRBS)
-		 */
-		if (last_trb(xhci, ring, ring->deq_seg, ring->dequeue)) {
-			if (ring->type == TYPE_EVENT &&
-					last_trb_on_last_seg(xhci, ring,
-						ring->deq_seg, ring->dequeue)) {
-				ring->cycle_state ^= 1;
-			}
-			ring->deq_seg = ring->deq_seg->next;
-			ring->dequeue = ring->deq_seg->trbs;
-		} else {
+	/* event ring doesn't have link trbs, check for last trb */
+	if (ring->type == TYPE_EVENT) {
+		if (!last_trb_on_seg(ring->deq_seg, ring->dequeue)) {
 			ring->dequeue++;
+			return;
 		}
-	} while (last_trb(xhci, ring, ring->deq_seg, ring->dequeue));
+		if (last_trb_on_ring(ring, ring->deq_seg, ring->dequeue))
+			ring->cycle_state ^= 1;
+		ring->deq_seg = ring->deq_seg->next;
+		ring->dequeue = ring->deq_seg->trbs;
+		return;
+	}
+
+	/* All other rings have link trbs */
+	if (!trb_is_link(ring->dequeue)) {
+		ring->dequeue++;
+		ring->num_trbs_free++;
+	}
+	while (trb_is_link(ring->dequeue)) {
+		ring->deq_seg = ring->deq_seg->next;
+		ring->dequeue = ring->deq_seg->trbs;
+	}
+	return;
 }
 
 /*
@@ -198,50 +185,42 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
 
 	chain = le32_to_cpu(ring->enqueue->generic.field[3]) & TRB_CHAIN;
 	/* If this is not event ring, there is one less usable TRB */
-	if (ring->type != TYPE_EVENT &&
-			!last_trb(xhci, ring, ring->enq_seg, ring->enqueue))
+	if (!trb_is_link(ring->enqueue))
 		ring->num_trbs_free--;
 	next = ++(ring->enqueue);
 
 	ring->enq_updates++;
-	/* Update the dequeue pointer further if that was a link TRB or we're at
-	 * the end of an event ring segment (which doesn't have link TRBS)
-	 */
-	while (last_trb(xhci, ring, ring->enq_seg, next)) {
-		if (ring->type != TYPE_EVENT) {
-			/*
-			 * If the caller doesn't plan on enqueueing more
-			 * TDs before ringing the doorbell, then we
-			 * don't want to give the link TRB to the
-			 * hardware just yet.  We'll give the link TRB
-			 * back in prepare_ring() just before we enqueue
-			 * the TD at the top of the ring.
-			 */
-			if (!chain && !more_trbs_coming)
-				break;
+	/* Update the dequeue pointer further if that was a link TRB */
+	while (trb_is_link(next)) {
 
-			/* If we're not dealing with 0.95 hardware or
-			 * isoc rings on AMD 0.96 host,
-			 * carry over the chain bit of the previous TRB
-			 * (which may mean the chain bit is cleared).
-			 */
-			if (!(ring->type == TYPE_ISOC &&
-					(xhci->quirks & XHCI_AMD_0x96_HOST))
-						&& !xhci_link_trb_quirk(xhci)) {
-				next->link.control &=
-					cpu_to_le32(~TRB_CHAIN);
-				next->link.control |=
-					cpu_to_le32(chain);
-			}
-			/* Give this link TRB to the hardware */
-			wmb();
-			next->link.control ^= cpu_to_le32(TRB_CYCLE);
+		/*
+		 * If the caller doesn't plan on enqueueing more TDs before
+		 * ringing the doorbell, then we don't want to give the link TRB
+		 * to the hardware just yet. We'll give the link TRB back in
+		 * prepare_ring() just before we enqueue the TD at the top of
+		 * the ring.
+		 */
+		if (!chain && !more_trbs_coming)
+			break;
 
-			/* Toggle the cycle bit after the last ring segment. */
-			if (last_trb_on_last_seg(xhci, ring, ring->enq_seg, next)) {
-				ring->cycle_state ^= 1;
-			}
+		/* If we're not dealing with 0.95 hardware or isoc rings on
+		 * AMD 0.96 host, carry over the chain bit of the previous TRB
+		 * (which may mean the chain bit is cleared).
+		 */
+		if (!(ring->type == TYPE_ISOC &&
+		      (xhci->quirks & XHCI_AMD_0x96_HOST)) &&
+		    !xhci_link_trb_quirk(xhci)) {
+			next->link.control &= cpu_to_le32(~TRB_CHAIN);
+			next->link.control |= cpu_to_le32(chain);
 		}
+		/* Give this link TRB to the hardware */
+		wmb();
+		next->link.control ^= cpu_to_le32(TRB_CYCLE);
+
+		/* Toggle the cycle bit after the last ring segment. */
+		if (link_trb_toggles_cycle(next))
+			ring->cycle_state ^= 1;
+
 		ring->enq_seg = ring->enq_seg->next;
 		ring->enqueue = ring->enq_seg->trbs;
 		next = ring->enqueue;
@@ -626,6 +605,31 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci,
 	}
 }
 
+void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci, struct xhci_ring *ring,
+				 struct xhci_td *td)
+{
+	struct device *dev = xhci_to_hcd(xhci)->self.controller;
+	struct xhci_segment *seg = td->bounce_seg;
+	struct urb *urb = td->urb;
+
+	if (!seg || !urb)
+		return;
+
+	if (usb_urb_dir_out(urb)) {
+		dma_unmap_single(dev, seg->bounce_dma, ring->bounce_buf_len,
+				 DMA_TO_DEVICE);
+		return;
+	}
+
+	/* for in tranfers we need to copy the data from bounce to sg */
+	sg_pcopy_from_buffer(urb->sg, urb->num_mapped_sgs, seg->bounce_buf,
+			     seg->bounce_len, seg->bounce_offs);
+	dma_unmap_single(dev, seg->bounce_dma, ring->bounce_buf_len,
+			 DMA_FROM_DEVICE);
+	seg->bounce_len = 0;
+	seg->bounce_offs = 0;
+}
+
 /*
  * When we get a command completion for a Stop Endpoint Command, we need to
  * unlink any cancelled TDs from the ring.  There are two ways to do that:
@@ -745,6 +749,9 @@ remove_finished_td:
 		/* Doesn't matter what we pass for status, since the core will
 		 * just overwrite it (because the URB has been unlinked).
 		 */
+		ep_ring = xhci_urb_to_transfer_ring(xhci, cur_td->urb);
+		if (ep_ring && cur_td->bounce_seg)
+			xhci_unmap_td_bounce_buffer(xhci, ep_ring, cur_td);
 		xhci_giveback_urb_in_irq(xhci, cur_td, 0);
 
 		/* Stop processing the cancelled list if the watchdog timer is
@@ -767,6 +774,9 @@ static void xhci_kill_ring_urbs(struct xhci_hcd *xhci, struct xhci_ring *ring)
 		list_del_init(&cur_td->td_list);
 		if (!list_empty(&cur_td->cancelled_td_list))
 			list_del_init(&cur_td->cancelled_td_list);
+
+		if (cur_td->bounce_seg)
+			xhci_unmap_td_bounce_buffer(xhci, ring, cur_td);
 		xhci_giveback_urb_in_irq(xhci, cur_td, -ESHUTDOWN);
 	}
 }
@@ -917,7 +927,7 @@ static void update_ring_for_set_deq_completion(struct xhci_hcd *xhci,
 	 * the dequeue pointer one segment further, or we'll jump off
 	 * the segment into la-la-land.
 	 */
-	if (last_trb(xhci, ep_ring, ep_ring->deq_seg, ep_ring->dequeue)) {
+	if (trb_is_link(ep_ring->dequeue)) {
 		ep_ring->deq_seg = ep_ring->deq_seg->next;
 		ep_ring->dequeue = ep_ring->deq_seg->trbs;
 	}
@@ -926,8 +936,7 @@ static void update_ring_for_set_deq_completion(struct xhci_hcd *xhci,
 		/* We have more usable TRBs */
 		ep_ring->num_trbs_free++;
 		ep_ring->dequeue++;
-		if (last_trb(xhci, ep_ring, ep_ring->deq_seg,
-				ep_ring->dequeue)) {
+		if (trb_is_link(ep_ring->dequeue)) {
 			if (ep_ring->dequeue ==
 					dev->eps[ep_index].queued_deq_ptr)
 				break;
@@ -1865,6 +1874,10 @@ td_cleanup:
 	urb = td->urb;
 	urb_priv = urb->hcpriv;
 
+	/* if a bounce buffer was used to align this td then unmap it */
+	if (td->bounce_seg)
+		xhci_unmap_td_bounce_buffer(xhci, ep_ring, td);
+
 	/* Do one last check of the actual transfer length.
 	 * If the host controller said we transferred more data than the buffer
 	 * length, urb->actual_length will be a very big number (since it's
@@ -2865,36 +2878,29 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
 		}
 	}
 
-	if (enqueue_is_link_trb(ep_ring)) {
-		struct xhci_ring *ring = ep_ring;
-		union xhci_trb *next;
-
-		next = ring->enqueue;
+	while (trb_is_link(ep_ring->enqueue)) {
+		/* If we're not dealing with 0.95 hardware or isoc rings
+		 * on AMD 0.96 host, clear the chain bit.
+		 */
+		if (!xhci_link_trb_quirk(xhci) &&
+		    !(ep_ring->type == TYPE_ISOC &&
+		      (xhci->quirks & XHCI_AMD_0x96_HOST)))
+			ep_ring->enqueue->link.control &=
+				cpu_to_le32(~TRB_CHAIN);
+		else
+			ep_ring->enqueue->link.control |=
+				cpu_to_le32(TRB_CHAIN);
 
-		while (last_trb(xhci, ring, ring->enq_seg, next)) {
-			/* If we're not dealing with 0.95 hardware or isoc rings
-			 * on AMD 0.96 host, clear the chain bit.
-			 */
-			if (!xhci_link_trb_quirk(xhci) &&
-					!(ring->type == TYPE_ISOC &&
-					 (xhci->quirks & XHCI_AMD_0x96_HOST)))
-				next->link.control &= cpu_to_le32(~TRB_CHAIN);
-			else
-				next->link.control |= cpu_to_le32(TRB_CHAIN);
+		wmb();
+		ep_ring->enqueue->link.control ^= cpu_to_le32(TRB_CYCLE);
 
-			wmb();
-			next->link.control ^= cpu_to_le32(TRB_CYCLE);
+		/* Toggle the cycle bit after the last ring segment. */
+		if (link_trb_toggles_cycle(ep_ring->enqueue))
+			ep_ring->cycle_state ^= 1;
 
-			/* Toggle the cycle bit after the last ring segment. */
-			if (last_trb_on_last_seg(xhci, ring, ring->enq_seg, next)) {
-				ring->cycle_state ^= 1;
-			}
-			ring->enq_seg = ring->enq_seg->next;
-			ring->enqueue = ring->enq_seg->trbs;
-			next = ring->enqueue;
-		}
+		ep_ring->enq_seg = ep_ring->enq_seg->next;
+		ep_ring->enqueue = ep_ring->enq_seg->trbs;
 	}
-
 	return 0;
 }
 
@@ -3092,7 +3098,7 @@ int xhci_queue_intr_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
  */
 static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
 			      int trb_buff_len, unsigned int td_total_len,
-			      struct urb *urb, unsigned int num_trbs_left)
+			      struct urb *urb, bool more_trbs_coming)
 {
 	u32 maxp, total_packet_count;
 
@@ -3101,7 +3107,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
 		return ((td_total_len - transferred) >> 10);
 
 	/* One TRB with a zero-length data packet. */
-	if (num_trbs_left == 0 || (transferred == 0 && trb_buff_len == 0) ||
+	if (!more_trbs_coming || (transferred == 0 && trb_buff_len == 0) ||
 	    trb_buff_len == td_total_len)
 		return 0;
 
@@ -3116,37 +3122,103 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
 	return (total_packet_count - ((transferred + trb_buff_len) / maxp));
 }
 
+
+static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len,
+			 u32 *trb_buff_len, struct xhci_segment *seg)
+{
+	struct device *dev = xhci_to_hcd(xhci)->self.controller;
+	unsigned int unalign;
+	unsigned int max_pkt;
+	u32 new_buff_len;
+
+	max_pkt = GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc));
+	unalign = (enqd_len + *trb_buff_len) % max_pkt;
+
+	/* we got lucky, last normal TRB data on segment is packet aligned */
+	if (unalign == 0)
+		return 0;
+
+	xhci_dbg(xhci, "Unaligned %d bytes, buff len %d\n",
+		 unalign, *trb_buff_len);
+
+	/* is the last nornal TRB alignable by splitting it */
+	if (*trb_buff_len > unalign) {
+		*trb_buff_len -= unalign;
+		xhci_dbg(xhci, "split align, new buff len %d\n", *trb_buff_len);
+		return 0;
+	}
+
+	/*
+	 * We want enqd_len + trb_buff_len to sum up to a number aligned to
+	 * number which is divisible by the endpoint's wMaxPacketSize. IOW:
+	 * (size of currently enqueued TRBs + remainder) % wMaxPacketSize == 0.
+	 */
+	new_buff_len = max_pkt - (enqd_len % max_pkt);
+
+	if (new_buff_len > (urb->transfer_buffer_length - enqd_len))
+		new_buff_len = (urb->transfer_buffer_length - enqd_len);
+
+	/* create a max max_pkt sized bounce buffer pointed to by last trb */
+	if (usb_urb_dir_out(urb)) {
+		sg_pcopy_to_buffer(urb->sg, urb->num_mapped_sgs,
+				   seg->bounce_buf, new_buff_len, enqd_len);
+		seg->bounce_dma = dma_map_single(dev, seg->bounce_buf,
+						 max_pkt, DMA_TO_DEVICE);
+	} else {
+		seg->bounce_dma = dma_map_single(dev, seg->bounce_buf,
+						 max_pkt, DMA_FROM_DEVICE);
+	}
+
+	if (dma_mapping_error(dev, seg->bounce_dma)) {
+		/* try without aligning. Some host controllers survive */
+		xhci_warn(xhci, "Failed mapping bounce buffer, not aligning\n");
+		return 0;
+	}
+	*trb_buff_len = new_buff_len;
+	seg->bounce_len = new_buff_len;
+	seg->bounce_offs = enqd_len;
+
+	xhci_dbg(xhci, "Bounce align, new buff len %d\n", *trb_buff_len);
+
+	return 1;
+}
+
 /* This is very similar to what ehci-q.c qtd_fill() does */
 int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		struct urb *urb, int slot_id, unsigned int ep_index)
 {
-	struct xhci_ring *ep_ring;
+	struct xhci_ring *ring;
 	struct urb_priv *urb_priv;
 	struct xhci_td *td;
 	struct xhci_generic_trb *start_trb;
 	struct scatterlist *sg = NULL;
-	bool more_trbs_coming;
-	bool zero_length_needed;
-	unsigned int num_trbs, last_trb_num, i;
+	bool more_trbs_coming = true;
+	bool need_zero_pkt = false;
+	bool first_trb = true;
+	unsigned int num_trbs;
 	unsigned int start_cycle, num_sgs = 0;
-	unsigned int running_total, block_len, trb_buff_len;
-	unsigned int full_len;
-	int ret;
+	unsigned int enqd_len, block_len, trb_buff_len, full_len;
+	int sent_len, ret;
 	u32 field, length_field, remainder;
-	u64 addr;
+	u64 addr, send_addr;
 
-	ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
-	if (!ep_ring)
+	ring = xhci_urb_to_transfer_ring(xhci, urb);
+	if (!ring)
 		return -EINVAL;
 
+	full_len = urb->transfer_buffer_length;
 	/* If we have scatter/gather list, we use it. */
 	if (urb->num_sgs) {
 		num_sgs = urb->num_mapped_sgs;
 		sg = urb->sg;
+		addr = (u64) sg_dma_address(sg);
+		block_len = sg_dma_len(sg);
 		num_trbs = count_sg_trbs_needed(urb);
-	} else
+	} else {
 		num_trbs = count_trbs_needed(urb);
-
+		addr = (u64) urb->transfer_dma;
+		block_len = full_len;
+	}
 	ret = prepare_transfer(xhci, xhci->devs[slot_id],
 			ep_index, urb->stream_id,
 			num_trbs, urb, 0, mem_flags);
@@ -3155,20 +3227,9 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
 	urb_priv = urb->hcpriv;
 
-	last_trb_num = num_trbs - 1;
-
 	/* Deal with URB_ZERO_PACKET - need one more td/trb */
-	zero_length_needed = urb->transfer_flags & URB_ZERO_PACKET &&
-		urb_priv->length == 2;
-	if (zero_length_needed) {
-		num_trbs++;
-		xhci_dbg(xhci, "Creating zero length td.\n");
-		ret = prepare_transfer(xhci, xhci->devs[slot_id],
-				ep_index, urb->stream_id,
-				1, urb, 1, mem_flags);
-		if (unlikely(ret < 0))
-			return ret;
-	}
+	if (urb->transfer_flags & URB_ZERO_PACKET && urb_priv->length > 1)
+		need_zero_pkt = true;
 
 	td = urb_priv->td[0];
 
@@ -3177,102 +3238,97 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	 * until we've finished creating all the other TRBs.  The ring's cycle
 	 * state may change as we enqueue the other TRBs, so save it too.
 	 */
-	start_trb = &ep_ring->enqueue->generic;
-	start_cycle = ep_ring->cycle_state;
-
-	full_len = urb->transfer_buffer_length;
-	running_total = 0;
-	block_len = 0;
+	start_trb = &ring->enqueue->generic;
+	start_cycle = ring->cycle_state;
+	send_addr = addr;
 
 	/* Queue the TRBs, even if they are zero-length */
-	for (i = 0; i < num_trbs; i++) {
+	for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) {
 		field = TRB_TYPE(TRB_NORMAL);
 
-		if (block_len == 0) {
-			/* A new contiguous block. */
-			if (sg) {
-				addr = (u64) sg_dma_address(sg);
-				block_len = sg_dma_len(sg);
-			} else {
-				addr = (u64) urb->transfer_dma;
-				block_len = full_len;
-			}
-			/* TRB buffer should not cross 64KB boundaries */
-			trb_buff_len = TRB_BUFF_LEN_UP_TO_BOUNDARY(addr);
-			trb_buff_len = min_t(unsigned int,
-								trb_buff_len,
-								block_len);
-		} else {
-			/* Further through the contiguous block. */
-			trb_buff_len = block_len;
-			if (trb_buff_len > TRB_MAX_BUFF_SIZE)
-				trb_buff_len = TRB_MAX_BUFF_SIZE;
-		}
+		/* TRB buffer should not cross 64KB boundaries */
+		trb_buff_len = TRB_BUFF_LEN_UP_TO_BOUNDARY(addr);
+		trb_buff_len = min_t(unsigned int, trb_buff_len, block_len);
 
-		if (running_total + trb_buff_len > full_len)
-			trb_buff_len = full_len - running_total;
+		if (enqd_len + trb_buff_len > full_len)
+			trb_buff_len = full_len - enqd_len;
 
 		/* Don't change the cycle bit of the first TRB until later */
-		if (i == 0) {
+		if (first_trb) {
+			first_trb = false;
 			if (start_cycle == 0)
 				field |= TRB_CYCLE;
 		} else
-			field |= ep_ring->cycle_state;
+			field |= ring->cycle_state;
 
 		/* Chain all the TRBs together; clear the chain bit in the last
 		 * TRB to indicate it's the last TRB in the chain.
 		 */
-		if (i < last_trb_num) {
+		if (enqd_len + trb_buff_len < full_len) {
 			field |= TRB_CHAIN;
-		} else {
-			field |= TRB_IOC;
-			if (i == last_trb_num)
-				td->last_trb = ep_ring->enqueue;
-			else if (zero_length_needed) {
-				trb_buff_len = 0;
-				urb_priv->td[1]->last_trb = ep_ring->enqueue;
+			if (trb_is_link(ring->enqueue + 1)) {
+				if (xhci_align_td(xhci, urb, enqd_len,
+						  &trb_buff_len,
+						  ring->enq_seg)) {
+					send_addr = ring->enq_seg->bounce_dma;
+					/* assuming TD won't span 2 segs */
+					td->bounce_seg = ring->enq_seg;
+				}
 			}
 		}
+		if (enqd_len + trb_buff_len >= full_len) {
+			field &= ~TRB_CHAIN;
+			field |= TRB_IOC;
+			more_trbs_coming = false;
+			td->last_trb = ring->enqueue;
+		}
 
 		/* Only set interrupt on short packet for IN endpoints */
 		if (usb_urb_dir_in(urb))
 			field |= TRB_ISP;
 
 		/* Set the TRB length, TD size, and interrupter fields. */
-		remainder = xhci_td_remainder(xhci, running_total,
-							trb_buff_len, full_len,
-							urb, num_trbs - i - 1);
+		remainder = xhci_td_remainder(xhci, enqd_len, trb_buff_len,
+					      full_len, urb, more_trbs_coming);
 
 		length_field = TRB_LEN(trb_buff_len) |
 			TRB_TD_SIZE(remainder) |
 			TRB_INTR_TARGET(0);
 
-		if (i < num_trbs - 1)
-			more_trbs_coming = true;
-		else
-			more_trbs_coming = false;
-		queue_trb(xhci, ep_ring, more_trbs_coming,
-				lower_32_bits(addr),
-				upper_32_bits(addr),
+		queue_trb(xhci, ring, more_trbs_coming | need_zero_pkt,
+				lower_32_bits(send_addr),
+				upper_32_bits(send_addr),
 				length_field,
 				field);
 
-		running_total += trb_buff_len;
 		addr += trb_buff_len;
-		block_len -= trb_buff_len;
-
-		if (sg) {
-			if (block_len == 0) {
-				/* New sg entry */
-				--num_sgs;
-				if (num_sgs == 0)
-					break;
+		sent_len = trb_buff_len;
+
+		while (sg && sent_len >= block_len) {
+			/* New sg entry */
+			--num_sgs;
+			sent_len -= block_len;
+			if (num_sgs != 0) {
 				sg = sg_next(sg);
+				block_len = sg_dma_len(sg);
+				addr = (u64) sg_dma_address(sg);
+				addr += sent_len;
 			}
 		}
+		block_len -= sent_len;
+		send_addr = addr;
+	}
+
+	if (need_zero_pkt) {
+		ret = prepare_transfer(xhci, xhci->devs[slot_id],
+				       ep_index, urb->stream_id,
+				       1, urb, 1, mem_flags);
+		urb_priv->td[1]->last_trb = ring->enqueue;
+		field = TRB_TYPE(TRB_NORMAL) | ring->cycle_state | TRB_IOC;
+		queue_trb(xhci, ring, 0, 0, 0, TRB_INTR_TARGET(0), field);
 	}
 
-	check_trb_math(urb, running_total);
+	check_trb_math(urb, enqd_len);
 	giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id,
 			start_cycle, start_trb);
 	return 0;
@@ -3666,7 +3722,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 			/* Set the TRB length, TD size, & interrupter fields. */
 			remainder = xhci_td_remainder(xhci, running_total,
 						   trb_buff_len, td_len,
-						   urb, trbs_per_td - j - 1);
+						   urb, more_trbs_coming);
 
 			length_field = TRB_LEN(trb_buff_len) |
 				TRB_INTR_TARGET(0);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f2f9518c53ab..01d96c9b3a75 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -490,8 +490,6 @@ static void compliance_mode_recovery_timer_init(struct xhci_hcd *xhci)
 	xhci->comp_mode_recovery_timer.expires = jiffies +
 			msecs_to_jiffies(COMP_MODE_RCVRY_MSECS);
 
-	set_timer_slack(&xhci->comp_mode_recovery_timer,
-			msecs_to_jiffies(COMP_MODE_RCVRY_MSECS));
 	add_timer(&xhci->comp_mode_recovery_timer);
 	xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
 			"Compliance mode recovery timer initialized");
@@ -3139,6 +3137,7 @@ int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev,
 	struct xhci_input_control_ctx *ctrl_ctx;
 	unsigned int ep_index;
 	unsigned int num_stream_ctxs;
+	unsigned int max_packet;
 	unsigned long flags;
 	u32 changed_ep_bitmask = 0;
 
@@ -3212,9 +3211,11 @@ int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev,
 
 	for (i = 0; i < num_eps; i++) {
 		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		max_packet = GET_MAX_PACKET(usb_endpoint_maxp(&eps[i]->desc));
 		vdev->eps[ep_index].stream_info = xhci_alloc_stream_info(xhci,
 				num_stream_ctxs,
-				num_streams, mem_flags);
+				num_streams,
+				max_packet, mem_flags);
 		if (!vdev->eps[ep_index].stream_info)
 			goto cleanup;
 		/* Set maxPstreams in endpoint context and update deq ptr to
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index b0b8d0f8791a..b2c1dc5dc0f3 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1347,6 +1347,11 @@ struct xhci_segment {
 	/* private to HCD */
 	struct xhci_segment	*next;
 	dma_addr_t		dma;
+	/* Max packet sized bounce buffer for td-fragmant alignment */
+	dma_addr_t		bounce_dma;
+	void			*bounce_buf;
+	unsigned int		bounce_offs;
+	unsigned int		bounce_len;
 };
 
 struct xhci_td {
@@ -1356,6 +1361,7 @@ struct xhci_td {
 	struct xhci_segment	*start_seg;
 	union xhci_trb		*first_trb;
 	union xhci_trb		*last_trb;
+	struct xhci_segment	*bounce_seg;
 	/* actual_length of the URB has already been set */
 	bool			urb_length_set;
 };
@@ -1405,6 +1411,7 @@ struct xhci_ring {
 	unsigned int		num_segs;
 	unsigned int		num_trbs_free;
 	unsigned int		num_trbs_free_temp;
+	unsigned int		bounce_buf_len;
 	enum xhci_ring_type	type;
 	bool			last_td_was_short;
 	struct radix_tree_root	*trb_address_map;
@@ -1807,7 +1814,8 @@ void xhci_free_or_cache_endpoint_ring(struct xhci_hcd *xhci,
 		unsigned int ep_index);
 struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
 		unsigned int num_stream_ctxs,
-		unsigned int num_streams, gfp_t flags);
+		unsigned int num_streams,
+		unsigned int max_packet, gfp_t flags);
 void xhci_free_stream_info(struct xhci_hcd *xhci,
 		struct xhci_stream_info *stream_info);
 void xhci_setup_streams_ep_input_ctx(struct xhci_hcd *xhci,
diff --git a/drivers/usb/image/microtek.h b/drivers/usb/image/microtek.h
index ccce318f20a0..7e32ae787136 100644
--- a/drivers/usb/image/microtek.h
+++ b/drivers/usb/image/microtek.h
@@ -13,11 +13,11 @@ typedef void (*mts_scsi_cmnd_callback)(struct scsi_cmnd *);
 
 struct mts_transfer_context
 {
-	struct mts_desc* instance;
+	struct mts_desc *instance;
 	mts_scsi_cmnd_callback final_callback;
 	struct scsi_cmnd *srb;
 	
-	void* data;
+	void *data;
 	unsigned data_length;
 	int data_pipe;
 	int fragment;
@@ -38,7 +38,7 @@ struct mts_desc {
 	u8 ep_response;
 	u8 ep_image;
 
-	struct Scsi_Host * host;
+	struct Scsi_Host *host;
 
 	struct urb *urb;
 	struct mts_transfer_context context;
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index e9e5ae521fa6..6e705971d637 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -260,11 +260,12 @@ config USB_CHAOSKEY
 	tristate "ChaosKey random number generator driver support"
 	depends on HW_RANDOM
 	help
-	  Say Y here if you want to connect an AltusMetrum ChaosKey to
-	  your computer's USB port. The ChaosKey is a hardware random
-	  number generator which hooks into the kernel entropy pool to
-	  ensure a large supply of entropy for /dev/random and
-	  /dev/urandom and also provides direct access via /dev/chaoskeyX
+	  Say Y here if you want to connect an AltusMetrum ChaosKey or
+	  Araneus Alea I to your computer's USB port. These devices
+	  are hardware random number generators which hook into the
+	  kernel entropy pool to ensure a large supply of entropy for
+	  /dev/random and /dev/urandom and also provides direct access
+	  via /dev/chaoskeyX
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called chaoskey.
diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c
index 76350e4ee807..6ddd08a32777 100644
--- a/drivers/usb/misc/chaoskey.c
+++ b/drivers/usb/misc/chaoskey.c
@@ -55,9 +55,13 @@ MODULE_LICENSE("GPL");
 #define CHAOSKEY_VENDOR_ID	0x1d50	/* OpenMoko */
 #define CHAOSKEY_PRODUCT_ID	0x60c6	/* ChaosKey */
 
+#define ALEA_VENDOR_ID		0x12d8	/* Araneus */
+#define ALEA_PRODUCT_ID		0x0001	/* Alea I */
+
 #define CHAOSKEY_BUF_LEN	64	/* max size of USB full speed packet */
 
-#define NAK_TIMEOUT (HZ)		/* stall/wait timeout for device */
+#define NAK_TIMEOUT (HZ)		/* normal stall/wait timeout */
+#define ALEA_FIRST_TIMEOUT (HZ*3)	/* first stall/wait timeout for Alea */
 
 #ifdef CONFIG_USB_DYNAMIC_MINORS
 #define USB_CHAOSKEY_MINOR_BASE 0
@@ -69,6 +73,7 @@ MODULE_LICENSE("GPL");
 
 static const struct usb_device_id chaoskey_table[] = {
 	{ USB_DEVICE(CHAOSKEY_VENDOR_ID, CHAOSKEY_PRODUCT_ID) },
+	{ USB_DEVICE(ALEA_VENDOR_ID, ALEA_PRODUCT_ID) },
 	{ },
 };
 MODULE_DEVICE_TABLE(usb, chaoskey_table);
@@ -84,6 +89,7 @@ struct chaoskey {
 	int open;			/* open count */
 	bool present;			/* device not disconnected */
 	bool reading;			/* ongoing IO */
+	bool reads_started;		/* track first read for Alea */
 	int size;			/* size of buf */
 	int valid;			/* bytes of buf read */
 	int used;			/* bytes of buf consumed */
@@ -188,6 +194,9 @@ static int chaoskey_probe(struct usb_interface *interface,
 
 	dev->in_ep = in_ep;
 
+	if (udev->descriptor.idVendor != ALEA_VENDOR_ID)
+		dev->reads_started = 1;
+
 	dev->size = size;
 	dev->present = 1;
 
@@ -357,6 +366,7 @@ static int _chaoskey_fill(struct chaoskey *dev)
 {
 	DEFINE_WAIT(wait);
 	int result;
+	bool started;
 
 	usb_dbg(dev->interface, "fill");
 
@@ -389,10 +399,17 @@ static int _chaoskey_fill(struct chaoskey *dev)
 		goto out;
 	}
 
+	/* The first read on the Alea takes a little under 2 seconds.
+	 * Reads after the first read take only a few microseconds
+	 * though.  Presumably the entropy-generating circuit needs
+	 * time to ramp up.  So, we wait longer on the first read.
+	 */
+	started = dev->reads_started;
+	dev->reads_started = true;
 	result = wait_event_interruptible_timeout(
 		dev->wait_q,
 		!dev->reading,
-		NAK_TIMEOUT);
+		(started ? NAK_TIMEOUT : ALEA_FIRST_TIMEOUT) );
 
 	if (result < 0)
 		goto out;
diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index 15666ad7c772..02abfcdfbf7b 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -1285,18 +1285,22 @@ int sisusb_readb(struct sisusb_usb_data *sisusb, u32 adr, u8 *data)
 }
 
 int sisusb_copy_memory(struct sisusb_usb_data *sisusb, char *src,
-		u32 dest, int length, size_t *bytes_written)
+		u32 dest, int length)
 {
+	size_t dummy;
+
 	return sisusb_write_mem_bulk(sisusb, dest, src, length,
-			NULL, 0, bytes_written);
+			NULL, 0, &dummy);
 }
 
 #ifdef SISUSBENDIANTEST
-int sisusb_read_memory(struct sisusb_usb_data *sisusb, char *dest,
-		u32 src, int length, size_t *bytes_written)
+static int sisusb_read_memory(struct sisusb_usb_data *sisusb, char *dest,
+		u32 src, int length)
 {
+	size_t dummy;
+
 	return sisusb_read_mem_bulk(sisusb, src, dest, length,
-			NULL, bytes_written);
+			NULL, &dummy);
 }
 #endif
 #endif
@@ -1306,16 +1310,14 @@ static void sisusb_testreadwrite(struct sisusb_usb_data *sisusb)
 {
 	static char srcbuffer[] = { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77 };
 	char destbuffer[10];
-	size_t dummy;
 	int i, j;
 
-	sisusb_copy_memory(sisusb, srcbuffer, sisusb->vrambase, 7, &dummy);
+	sisusb_copy_memory(sisusb, srcbuffer, sisusb->vrambase, 7);
 
 	for (i = 1; i <= 7; i++) {
 		dev_dbg(&sisusb->sisusb_dev->dev,
 				"sisusb: rwtest %d bytes\n", i);
-		sisusb_read_memory(sisusb, destbuffer, sisusb->vrambase,
-				i, &dummy);
+		sisusb_read_memory(sisusb, destbuffer, sisusb->vrambase, i);
 		for (j = 0; j < i; j++) {
 			dev_dbg(&sisusb->sisusb_dev->dev,
 					"rwtest read[%d] = %x\n",
@@ -2276,7 +2278,6 @@ int sisusb_reset_text_mode(struct sisusb_usb_data *sisusb, int init)
 	const struct font_desc *myfont;
 	u8 *tempbuf;
 	u16 *tempbufb;
-	size_t written;
 	static const char bootstring[] =
 		"SiSUSB VGA text console, (C) 2005 Thomas Winischhofer.";
 	static const char bootlogo[] = "(o_ //\\ V_/_";
@@ -2343,18 +2344,15 @@ int sisusb_reset_text_mode(struct sisusb_usb_data *sisusb, int init)
 				*(tempbufb++) = 0x0700 | bootstring[i++];
 
 			ret |= sisusb_copy_memory(sisusb, tempbuf,
-					sisusb->vrambase, 8192, &written);
+					sisusb->vrambase, 8192);
 
 			vfree(tempbuf);
 
 		}
 
 	} else if (sisusb->scrbuf) {
-
 		ret |= sisusb_copy_memory(sisusb, (char *)sisusb->scrbuf,
-				sisusb->vrambase, sisusb->scrbuf_size,
-				&written);
-
+				sisusb->vrambase, sisusb->scrbuf_size);
 	}
 
 	if (sisusb->sisusb_cursor_size_from >= 0 &&
diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
index afa853209f1d..460cebf322e3 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -370,7 +370,6 @@ static void
 sisusbcon_putc(struct vc_data *c, int ch, int y, int x)
 {
 	struct sisusb_usb_data *sisusb;
-	ssize_t written;
 
 	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
 	if (!sisusb)
@@ -384,7 +383,7 @@ sisusbcon_putc(struct vc_data *c, int ch, int y, int x)
 
 
 	sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(x, y),
-				(long)SISUSB_HADDR(x, y), 2, &written);
+				(long)SISUSB_HADDR(x, y), 2);
 
 	mutex_unlock(&sisusb->lock);
 }
@@ -395,7 +394,6 @@ sisusbcon_putcs(struct vc_data *c, const unsigned short *s,
 		         int count, int y, int x)
 {
 	struct sisusb_usb_data *sisusb;
-	ssize_t written;
 	u16 *dest;
 	int i;
 
@@ -420,7 +418,7 @@ sisusbcon_putcs(struct vc_data *c, const unsigned short *s,
 	}
 
 	sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(x, y),
-				(long)SISUSB_HADDR(x, y), count * 2, &written);
+				(long)SISUSB_HADDR(x, y), count * 2);
 
 	mutex_unlock(&sisusb->lock);
 }
@@ -431,7 +429,6 @@ sisusbcon_clear(struct vc_data *c, int y, int x, int height, int width)
 {
 	struct sisusb_usb_data *sisusb;
 	u16 eattr = c->vc_video_erase_char;
-	ssize_t written;
 	int i, length, cols;
 	u16 *dest;
 
@@ -475,41 +472,7 @@ sisusbcon_clear(struct vc_data *c, int y, int x, int height, int width)
 
 
 	sisusb_copy_memory(sisusb, (unsigned char *)SISUSB_VADDR(x, y),
-				(long)SISUSB_HADDR(x, y), length, &written);
-
-	mutex_unlock(&sisusb->lock);
-}
-
-/* Interface routine */
-static void
-sisusbcon_bmove(struct vc_data *c, int sy, int sx,
-			 int dy, int dx, int height, int width)
-{
-	struct sisusb_usb_data *sisusb;
-	ssize_t written;
-	int cols, length;
-
-	if (width <= 0 || height <= 0)
-		return;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return;
-
-	/* sisusb->lock is down */
-
-	cols = sisusb->sisusb_num_columns;
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	length = ((height * cols) - dx - (cols - width - dx)) * 2;
-
-
-	sisusb_copy_memory(sisusb, (unsigned char *)SISUSB_VADDR(dx, dy),
-				(long)SISUSB_HADDR(dx, dy), length, &written);
+				(long)SISUSB_HADDR(x, y), length);
 
 	mutex_unlock(&sisusb->lock);
 }
@@ -519,7 +482,6 @@ static int
 sisusbcon_switch(struct vc_data *c)
 {
 	struct sisusb_usb_data *sisusb;
-	ssize_t written;
 	int length;
 
 	/* Returnvalue 0 means we have fully restored screen,
@@ -559,7 +521,7 @@ sisusbcon_switch(struct vc_data *c)
 
 	sisusb_copy_memory(sisusb, (unsigned char *)c->vc_origin,
 				(long)SISUSB_HADDR(0, 0),
-				length, &written);
+				length);
 
 	mutex_unlock(&sisusb->lock);
 
@@ -600,7 +562,7 @@ sisusbcon_save_screen(struct vc_data *c)
 }
 
 /* interface routine */
-static int
+static void
 sisusbcon_set_palette(struct vc_data *c, const unsigned char *table)
 {
 	struct sisusb_usb_data *sisusb;
@@ -608,18 +570,18 @@ sisusbcon_set_palette(struct vc_data *c, const unsigned char *table)
 
 	/* Return value not used by vt */
 
-	if (!CON_IS_VISIBLE(c))
-		return -EINVAL;
+	if (!con_is_visible(c))
+		return;
 
 	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
 	if (!sisusb)
-		return -EINVAL;
+		return;
 
 	/* sisusb->lock is down */
 
 	if (sisusb_is_inactive(c, sisusb)) {
 		mutex_unlock(&sisusb->lock);
-		return -EINVAL;
+		return;
 	}
 
 	for (i = j = 0; i < 16; i++) {
@@ -634,8 +596,6 @@ sisusbcon_set_palette(struct vc_data *c, const unsigned char *table)
 	}
 
 	mutex_unlock(&sisusb->lock);
-
-	return 0;
 }
 
 /* interface routine */
@@ -644,7 +604,6 @@ sisusbcon_blank(struct vc_data *c, int blank, int mode_switch)
 {
 	struct sisusb_usb_data *sisusb;
 	u8 sr1, cr17, pmreg, cr63;
-	ssize_t written;
 	int ret = 0;
 
 	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
@@ -672,7 +631,7 @@ sisusbcon_blank(struct vc_data *c, int blank, int mode_switch)
 				(unsigned char *)c->vc_origin,
 				(u32)(sisusb->vrambase +
 					(c->vc_origin - sisusb->scrbuf)),
-				c->vc_screenbuf_size, &written);
+				c->vc_screenbuf_size);
 		sisusb->con_blanked = 1;
 		ret = 1;
 		break;
@@ -723,24 +682,22 @@ sisusbcon_blank(struct vc_data *c, int blank, int mode_switch)
 }
 
 /* interface routine */
-static int
+static void
 sisusbcon_scrolldelta(struct vc_data *c, int lines)
 {
 	struct sisusb_usb_data *sisusb;
 	int margin = c->vc_size_row * 4;
 	int ul, we, p, st;
 
-	/* The return value does not seem to be used */
-
 	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
 	if (!sisusb)
-		return 0;
+		return;
 
 	/* sisusb->lock is down */
 
 	if (sisusb_is_inactive(c, sisusb)) {
 		mutex_unlock(&sisusb->lock);
-		return 0;
+		return;
 	}
 
 	if (!lines)		/* Turn scrollback off */
@@ -780,8 +737,6 @@ sisusbcon_scrolldelta(struct vc_data *c, int lines)
 	sisusbcon_set_start_address(sisusb, c);
 
 	mutex_unlock(&sisusb->lock);
-
-	return 1;
 }
 
 /* Interface routine */
@@ -860,7 +815,6 @@ sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb,
 	int cols = sisusb->sisusb_num_columns;
 	int length = ((b - t) * cols) * 2;
 	u16 eattr = c->vc_video_erase_char;
-	ssize_t written;
 
 	/* sisusb->lock is down */
 
@@ -890,7 +844,7 @@ sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb,
 	}
 
 	sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(0, t),
-				(long)SISUSB_HADDR(0, t), length, &written);
+				(long)SISUSB_HADDR(0, t), length);
 
 	mutex_unlock(&sisusb->lock);
 
@@ -903,7 +857,6 @@ sisusbcon_scroll(struct vc_data *c, int t, int b, int dir, int lines)
 {
 	struct sisusb_usb_data *sisusb;
 	u16 eattr = c->vc_video_erase_char;
-	ssize_t written;
 	int copyall = 0;
 	unsigned long oldorigin;
 	unsigned int delta = lines * c->vc_size_row;
@@ -996,18 +949,18 @@ sisusbcon_scroll(struct vc_data *c, int t, int b, int dir, int lines)
 		sisusb_copy_memory(sisusb,
 			(char *)c->vc_origin,
 			(u32)(sisusb->vrambase + originoffset),
-			c->vc_screenbuf_size, &written);
+			c->vc_screenbuf_size);
 	else if (dir == SM_UP)
 		sisusb_copy_memory(sisusb,
 			(char *)c->vc_origin + c->vc_screenbuf_size - delta,
 			(u32)sisusb->vrambase + originoffset +
 					c->vc_screenbuf_size - delta,
-			delta, &written);
+			delta);
 	else
 		sisusb_copy_memory(sisusb,
 			(char *)c->vc_origin,
 			(u32)(sisusb->vrambase + originoffset),
-			delta, &written);
+			delta);
 
 	c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size;
 	c->vc_visible_origin = c->vc_origin;
@@ -1273,7 +1226,7 @@ sisusbcon_do_font_op(struct sisusb_usb_data *sisusb, int set, int slot,
 			struct vc_data *vc = vc_cons[i].d;
 
 			if (vc && vc->vc_sw == &sisusb_con) {
-				if (CON_IS_VISIBLE(vc)) {
+				if (con_is_visible(vc)) {
 					vc->vc_sw->con_cursor(vc, CM_DRAW);
 				}
 				vc->vc_font.height = fh;
@@ -1385,7 +1338,6 @@ static const struct consw sisusb_con = {
 	.con_putcs =		sisusbcon_putcs,
 	.con_cursor =		sisusbcon_cursor,
 	.con_scroll =		sisusbcon_scroll,
-	.con_bmove =		sisusbcon_bmove,
 	.con_switch =		sisusbcon_switch,
 	.con_blank =		sisusbcon_blank,
 	.con_font_set =		sisusbcon_font_set,
@@ -1433,15 +1385,12 @@ static const struct consw sisusb_dummy_con = {
 	.con_putcs =		SISUSBCONDUMMY,
 	.con_cursor =		SISUSBCONDUMMY,
 	.con_scroll =		SISUSBCONDUMMY,
-	.con_bmove =		SISUSBCONDUMMY,
 	.con_switch =		SISUSBCONDUMMY,
 	.con_blank =		SISUSBCONDUMMY,
 	.con_font_set =		SISUSBCONDUMMY,
 	.con_font_get =		SISUSBCONDUMMY,
 	.con_font_default =	SISUSBCONDUMMY,
 	.con_font_copy =	SISUSBCONDUMMY,
-	.con_set_palette =	SISUSBCONDUMMY,
-	.con_scrolldelta =	SISUSBCONDUMMY,
 };
 
 int
diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.h b/drivers/usb/misc/sisusbvga/sisusb_init.h
index c46ce42d4489..e79a616f0d26 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_init.h
+++ b/drivers/usb/misc/sisusbvga/sisusb_init.h
@@ -828,7 +828,7 @@ void sisusb_delete(struct kref *kref);
 int sisusb_writeb(struct sisusb_usb_data *sisusb, u32 adr, u8 data);
 int sisusb_readb(struct sisusb_usb_data *sisusb, u32 adr, u8 * data);
 int sisusb_copy_memory(struct sisusb_usb_data *sisusb, char *src,
-		       u32 dest, int length, size_t * bytes_written);
+		       u32 dest, int length);
 int sisusb_reset_text_mode(struct sisusb_usb_data *sisusb, int init);
 int sisusbcon_do_font_op(struct sisusb_usb_data *sisusb, int set, int slot,
 			 u8 * arg, int cmapsz, int ch512, int dorecalc,
diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
index b45cb77c0744..8e7737d7ac0a 100644
--- a/drivers/usb/misc/usb3503.c
+++ b/drivers/usb/misc/usb3503.c
@@ -330,6 +330,17 @@ static int usb3503_i2c_probe(struct i2c_client *i2c,
 	return usb3503_probe(hub);
 }
 
+static int usb3503_i2c_remove(struct i2c_client *i2c)
+{
+	struct usb3503 *hub;
+
+	hub = i2c_get_clientdata(i2c);
+	if (hub->clk)
+		clk_disable_unprepare(hub->clk);
+
+	return 0;
+}
+
 static int usb3503_platform_probe(struct platform_device *pdev)
 {
 	struct usb3503 *hub;
@@ -338,10 +349,22 @@ static int usb3503_platform_probe(struct platform_device *pdev)
 	if (!hub)
 		return -ENOMEM;
 	hub->dev = &pdev->dev;
+	platform_set_drvdata(pdev, hub);
 
 	return usb3503_probe(hub);
 }
 
+static int usb3503_platform_remove(struct platform_device *pdev)
+{
+	struct usb3503 *hub;
+
+	hub = platform_get_drvdata(pdev);
+	if (hub->clk)
+		clk_disable_unprepare(hub->clk);
+
+	return 0;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int usb3503_i2c_suspend(struct device *dev)
 {
@@ -395,6 +418,7 @@ static struct i2c_driver usb3503_i2c_driver = {
 		.of_match_table = of_match_ptr(usb3503_of_match),
 	},
 	.probe		= usb3503_i2c_probe,
+	.remove		= usb3503_i2c_remove,
 	.id_table	= usb3503_id,
 };
 
@@ -404,6 +428,7 @@ static struct platform_driver usb3503_platform_driver = {
 		.of_match_table = of_match_ptr(usb3503_of_match),
 	},
 	.probe		= usb3503_platform_probe,
+	.remove		= usb3503_platform_remove,
 };
 
 static int __init usb3503_init(void)
diff --git a/drivers/usb/musb/Makefile b/drivers/usb/musb/Makefile
index f95befe18cc1..689d42aba8a9 100644
--- a/drivers/usb/musb/Makefile
+++ b/drivers/usb/musb/Makefile
@@ -2,9 +2,12 @@
 # for USB OTG silicon based on Mentor Graphics INVENTRA designs
 #
 
+# define_trace.h needs to know how to find our header
+CFLAGS_musb_trace.o	:= -I$(src)
+
 obj-$(CONFIG_USB_MUSB_HDRC) += musb_hdrc.o
 
-musb_hdrc-y := musb_core.o
+musb_hdrc-y := musb_core.o musb_trace.o
 
 musb_hdrc-$(CONFIG_USB_MUSB_HOST)$(CONFIG_USB_MUSB_DUAL_ROLE) += musb_virthub.o musb_host.o
 musb_hdrc-$(CONFIG_USB_MUSB_GADGET)$(CONFIG_USB_MUSB_DUAL_ROLE) += musb_gadget_ep0.o musb_gadget.o
diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c
index cc134109b056..1ae48e64e975 100644
--- a/drivers/usb/musb/cppi_dma.c
+++ b/drivers/usb/musb/cppi_dma.c
@@ -14,6 +14,7 @@
 #include "musb_core.h"
 #include "musb_debug.h"
 #include "cppi_dma.h"
+#include "davinci.h"
 
 
 /* CPPI DMA status 7-mar-2006:
@@ -232,7 +233,7 @@ static void cppi_controller_stop(struct cppi *controller)
 	musb_writel(tibase, DAVINCI_RXCPPI_INTCLR_REG,
 			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
 
-	dev_dbg(musb->controller, "Tearing down RX and TX Channels\n");
+	musb_dbg(musb, "Tearing down RX and TX Channels");
 	for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
 		/* FIXME restructure of txdma to use bds like rxdma */
 		controller->tx[i].last_processed = NULL;
@@ -297,13 +298,13 @@ cppi_channel_allocate(struct dma_controller *c,
 	 */
 	if (transmit) {
 		if (index >= ARRAY_SIZE(controller->tx)) {
-			dev_dbg(musb->controller, "no %cX%d CPPI channel\n", 'T', index);
+			musb_dbg(musb, "no %cX%d CPPI channel", 'T', index);
 			return NULL;
 		}
 		cppi_ch = controller->tx + index;
 	} else {
 		if (index >= ARRAY_SIZE(controller->rx)) {
-			dev_dbg(musb->controller, "no %cX%d CPPI channel\n", 'R', index);
+			musb_dbg(musb, "no %cX%d CPPI channel", 'R', index);
 			return NULL;
 		}
 		cppi_ch = controller->rx + index;
@@ -314,13 +315,13 @@ cppi_channel_allocate(struct dma_controller *c,
 	 * with the other DMA engine too
 	 */
 	if (cppi_ch->hw_ep)
-		dev_dbg(musb->controller, "re-allocating DMA%d %cX channel %p\n",
+		musb_dbg(musb, "re-allocating DMA%d %cX channel %p",
 				index, transmit ? 'T' : 'R', cppi_ch);
 	cppi_ch->hw_ep = ep;
 	cppi_ch->channel.status = MUSB_DMA_STATUS_FREE;
 	cppi_ch->channel.max_len = 0x7fffffff;
 
-	dev_dbg(musb->controller, "Allocate CPPI%d %cX\n", index, transmit ? 'T' : 'R');
+	musb_dbg(musb, "Allocate CPPI%d %cX", index, transmit ? 'T' : 'R');
 	return &cppi_ch->channel;
 }
 
@@ -335,8 +336,8 @@ static void cppi_channel_release(struct dma_channel *channel)
 	c = container_of(channel, struct cppi_channel, channel);
 	tibase = c->controller->tibase;
 	if (!c->hw_ep)
-		dev_dbg(c->controller->musb->controller,
-			"releasing idle DMA channel %p\n", c);
+		musb_dbg(c->controller->musb,
+			"releasing idle DMA channel %p", c);
 	else if (!c->transmit)
 		core_rxirq_enable(tibase, c->index + 1);
 
@@ -354,11 +355,10 @@ cppi_dump_rx(int level, struct cppi_channel *c, const char *tag)
 
 	musb_ep_select(base, c->index + 1);
 
-	dev_dbg(c->controller->musb->controller,
+	musb_dbg(c->controller->musb,
 		"RX DMA%d%s: %d left, csr %04x, "
 		"%08x H%08x S%08x C%08x, "
-		"B%08x L%08x %08x .. %08x"
-		"\n",
+		"B%08x L%08x %08x .. %08x",
 		c->index, tag,
 		musb_readl(c->controller->tibase,
 			DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index),
@@ -385,11 +385,10 @@ cppi_dump_tx(int level, struct cppi_channel *c, const char *tag)
 
 	musb_ep_select(base, c->index + 1);
 
-	dev_dbg(c->controller->musb->controller,
+	musb_dbg(c->controller->musb,
 		"TX DMA%d%s: csr %04x, "
 		"H%08x S%08x C%08x %08x, "
-		"F%08x L%08x .. %08x"
-		"\n",
+		"F%08x L%08x .. %08x",
 		c->index, tag,
 		musb_readw(c->hw_ep->regs, MUSB_TXCSR),
 
@@ -590,7 +589,7 @@ cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx)
 		length = min(n_bds * maxpacket, length);
 	}
 
-	dev_dbg(musb->controller, "TX DMA%d, pktSz %d %s bds %d dma 0x%llx len %u\n",
+	musb_dbg(musb, "TX DMA%d, pktSz %d %s bds %d dma 0x%llx len %u",
 			tx->index,
 			maxpacket,
 			rndis ? "rndis" : "transparent",
@@ -647,7 +646,7 @@ cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx)
 				bd->hw_options |= CPPI_ZERO_SET;
 		}
 
-		dev_dbg(musb->controller, "TXBD %p: nxt %08x buf %08x len %04x opt %08x\n",
+		musb_dbg(musb, "TXBD %p: nxt %08x buf %08x len %04x opt %08x",
 				bd, bd->hw_next, bd->hw_bufp,
 				bd->hw_off_len, bd->hw_options);
 
@@ -813,8 +812,8 @@ cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket)
 
 	length = min(n_bds * maxpacket, length);
 
-	dev_dbg(musb->controller, "RX DMA%d seg, maxp %d %s bds %d (cnt %d) "
-			"dma 0x%llx len %u %u/%u\n",
+	musb_dbg(musb, "RX DMA%d seg, maxp %d %s bds %d (cnt %d) "
+			"dma 0x%llx len %u %u/%u",
 			rx->index, maxpacket,
 			onepacket
 				? (is_rndis ? "rndis" : "onepacket")
@@ -924,7 +923,7 @@ cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket)
 			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4))
 			& 0xffff;
 	if (i < (2 + n_bds)) {
-		dev_dbg(musb->controller, "bufcnt%d underrun - %d (for %d)\n",
+		musb_dbg(musb, "bufcnt%d underrun - %d (for %d)",
 					rx->index, i, n_bds);
 		musb_writel(tibase,
 			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4),
@@ -973,7 +972,7 @@ static int cppi_channel_program(struct dma_channel *ch,
 		/* WARN_ON(1); */
 		break;
 	case MUSB_DMA_STATUS_UNKNOWN:
-		dev_dbg(musb->controller, "%cX DMA%d not allocated!\n",
+		musb_dbg(musb, "%cX DMA%d not allocated!",
 				cppi_ch->transmit ? 'T' : 'R',
 				cppi_ch->index);
 		/* FALLTHROUGH */
@@ -1029,8 +1028,8 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
 		if (!completed && (bd->hw_options & CPPI_OWN_SET))
 			break;
 
-		dev_dbg(musb->controller, "C/RXBD %llx: nxt %08x buf %08x "
-			"off.len %08x opt.len %08x (%d)\n",
+		musb_dbg(musb, "C/RXBD %llx: nxt %08x buf %08x "
+			"off.len %08x opt.len %08x (%d)",
 			(unsigned long long)bd->dma, bd->hw_next, bd->hw_bufp,
 			bd->hw_off_len, bd->hw_options,
 			rx->channel.actual_len);
@@ -1051,7 +1050,7 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
 			 * CPPI ignores those BDs even though OWN is still set.
 			 */
 			completed = true;
-			dev_dbg(musb->controller, "rx short %d/%d (%d)\n",
+			musb_dbg(musb, "rx short %d/%d (%d)",
 					len, bd->buflen,
 					rx->channel.actual_len);
 		}
@@ -1101,7 +1100,7 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
 		musb_ep_select(cppi->mregs, rx->index + 1);
 		csr = musb_readw(regs, MUSB_RXCSR);
 		if (csr & MUSB_RXCSR_DMAENAB) {
-			dev_dbg(musb->controller, "list%d %p/%p, last %llx%s, csr %04x\n",
+			musb_dbg(musb, "list%d %p/%p, last %llx%s, csr %04x",
 				rx->index,
 				rx->head, rx->tail,
 				rx->last_processed
@@ -1164,7 +1163,7 @@ irqreturn_t cppi_interrupt(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
-	dev_dbg(musb->controller, "CPPI IRQ Tx%x Rx%x\n", tx, rx);
+	musb_dbg(musb, "CPPI IRQ Tx%x Rx%x", tx, rx);
 
 	/* process TX channels */
 	for (index = 0; tx; tx = tx >> 1, index++) {
@@ -1192,7 +1191,7 @@ irqreturn_t cppi_interrupt(int irq, void *dev_id)
 		 * that needs to be acknowledged.
 		 */
 		if (NULL == bd) {
-			dev_dbg(musb->controller, "null BD\n");
+			musb_dbg(musb, "null BD");
 			musb_writel(&tx_ram->tx_complete, 0, 0);
 			continue;
 		}
@@ -1207,7 +1206,7 @@ irqreturn_t cppi_interrupt(int irq, void *dev_id)
 			if (bd->hw_options & CPPI_OWN_SET)
 				break;
 
-			dev_dbg(musb->controller, "C/TXBD %p n %x b %x off %x opt %x\n",
+			musb_dbg(musb, "C/TXBD %p n %x b %x off %x opt %x",
 					bd, bd->hw_next, bd->hw_bufp,
 					bd->hw_off_len, bd->hw_options);
 
diff --git a/drivers/usb/musb/cppi_dma.h b/drivers/usb/musb/cppi_dma.h
index 59bf949e589b..7fdfb71a8f09 100644
--- a/drivers/usb/musb/cppi_dma.h
+++ b/drivers/usb/musb/cppi_dma.h
@@ -7,17 +7,10 @@
 #include <linux/list.h>
 #include <linux/errno.h>
 #include <linux/dmapool.h>
+#include <linux/dmaengine.h>
 
-#include "musb_dma.h"
 #include "musb_core.h"
-
-
-/* FIXME fully isolate CPPI from DaVinci ... the "CPPI generic" registers
- * would seem to be shared with the TUSB6020 (over VLYNQ).
- */
-
-#include "davinci.h"
-
+#include "musb_dma.h"
 
 /* CPPI RX/TX state RAM */
 
@@ -131,4 +124,24 @@ struct cppi {
 /* CPPI IRQ handler */
 extern irqreturn_t cppi_interrupt(int, void *);
 
+struct cppi41_dma_channel {
+	struct dma_channel channel;
+	struct cppi41_dma_controller *controller;
+	struct musb_hw_ep *hw_ep;
+	struct dma_chan *dc;
+	dma_cookie_t cookie;
+	u8 port_num;
+	u8 is_tx;
+	u8 is_allocated;
+	u8 usb_toggle;
+
+	dma_addr_t buf_addr;
+	u32 total_len;
+	u32 prog_len;
+	u32 transferred;
+	u32 packet_sz;
+	struct list_head tx_check;
+	int tx_zlp;
+};
+
 #endif				/* end of ifndef _CPPI_DMA_H_ */
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index f824336def5c..74fc3069cb42 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -102,6 +102,7 @@
 #include <linux/usb.h>
 
 #include "musb_core.h"
+#include "musb_trace.h"
 
 #define TA_WAIT_BCON(m) max_t(int, (m)->a_wait_bcon, OTG_TIME_A_WAIT_BCON)
 
@@ -258,31 +259,43 @@ static u32 musb_default_busctl_offset(u8 epnum, u16 offset)
 
 static u8 musb_default_readb(const void __iomem *addr, unsigned offset)
 {
-	return __raw_readb(addr + offset);
+	u8 data =  __raw_readb(addr + offset);
+
+	trace_musb_readb(__builtin_return_address(0), addr, offset, data);
+	return data;
 }
 
 static void musb_default_writeb(void __iomem *addr, unsigned offset, u8 data)
 {
+	trace_musb_writeb(__builtin_return_address(0), addr, offset, data);
 	__raw_writeb(data, addr + offset);
 }
 
 static u16 musb_default_readw(const void __iomem *addr, unsigned offset)
 {
-	return __raw_readw(addr + offset);
+	u16 data = __raw_readw(addr + offset);
+
+	trace_musb_readw(__builtin_return_address(0), addr, offset, data);
+	return data;
 }
 
 static void musb_default_writew(void __iomem *addr, unsigned offset, u16 data)
 {
+	trace_musb_writew(__builtin_return_address(0), addr, offset, data);
 	__raw_writew(data, addr + offset);
 }
 
 static u32 musb_default_readl(const void __iomem *addr, unsigned offset)
 {
-	return __raw_readl(addr + offset);
+	u32 data = __raw_readl(addr + offset);
+
+	trace_musb_readl(__builtin_return_address(0), addr, offset, data);
+	return data;
 }
 
 static void musb_default_writel(void __iomem *addr, unsigned offset, u32 data)
 {
+	trace_musb_writel(__builtin_return_address(0), addr, offset, data);
 	__raw_writel(data, addr + offset);
 }
 
@@ -461,20 +474,21 @@ static void musb_otg_timer_func(unsigned long data)
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_B_WAIT_ACON:
-		dev_dbg(musb->controller, "HNP: b_wait_acon timeout; back to b_peripheral\n");
+		musb_dbg(musb,
+			"HNP: b_wait_acon timeout; back to b_peripheral");
 		musb_g_disconnect(musb);
 		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 		musb->is_active = 0;
 		break;
 	case OTG_STATE_A_SUSPEND:
 	case OTG_STATE_A_WAIT_BCON:
-		dev_dbg(musb->controller, "HNP: %s timeout\n",
+		musb_dbg(musb, "HNP: %s timeout",
 			usb_otg_state_string(musb->xceiv->otg->state));
 		musb_platform_set_vbus(musb, 0);
 		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
 		break;
 	default:
-		dev_dbg(musb->controller, "HNP: Unhandled mode %s\n",
+		musb_dbg(musb, "HNP: Unhandled mode %s",
 			usb_otg_state_string(musb->xceiv->otg->state));
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
@@ -489,17 +503,17 @@ void musb_hnp_stop(struct musb *musb)
 	void __iomem	*mbase = musb->mregs;
 	u8	reg;
 
-	dev_dbg(musb->controller, "HNP: stop from %s\n",
+	musb_dbg(musb, "HNP: stop from %s",
 			usb_otg_state_string(musb->xceiv->otg->state));
 
 	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_PERIPHERAL:
 		musb_g_disconnect(musb);
-		dev_dbg(musb->controller, "HNP: back to %s\n",
+		musb_dbg(musb, "HNP: back to %s",
 			usb_otg_state_string(musb->xceiv->otg->state));
 		break;
 	case OTG_STATE_B_HOST:
-		dev_dbg(musb->controller, "HNP: Disabling HR\n");
+		musb_dbg(musb, "HNP: Disabling HR");
 		if (hcd)
 			hcd->self.is_b_host = 0;
 		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
@@ -510,7 +524,7 @@ void musb_hnp_stop(struct musb *musb)
 		/* REVISIT: Start SESSION_REQUEST here? */
 		break;
 	default:
-		dev_dbg(musb->controller, "HNP: Stopping in unknown state %s\n",
+		musb_dbg(musb, "HNP: Stopping in unknown state %s",
 			usb_otg_state_string(musb->xceiv->otg->state));
 	}
 
@@ -541,8 +555,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 {
 	irqreturn_t handled = IRQ_NONE;
 
-	dev_dbg(musb->controller, "<== DevCtl=%02x, int_usb=0x%x\n", devctl,
-		int_usb);
+	musb_dbg(musb, "<== DevCtl=%02x, int_usb=0x%x", devctl, int_usb);
 
 	/* in host mode, the peripheral may issue remote wakeup.
 	 * in peripheral mode, the host may resume the link.
@@ -550,7 +563,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 	 */
 	if (int_usb & MUSB_INTR_RESUME) {
 		handled = IRQ_HANDLED;
-		dev_dbg(musb->controller, "RESUME (%s)\n",
+		musb_dbg(musb, "RESUME (%s)",
 				usb_otg_state_string(musb->xceiv->otg->state));
 
 		if (devctl & MUSB_DEVCTL_HM) {
@@ -619,11 +632,11 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 
 		if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS
 				&& (devctl & MUSB_DEVCTL_BDEVICE)) {
-			dev_dbg(musb->controller, "SessReq while on B state\n");
+			musb_dbg(musb, "SessReq while on B state");
 			return IRQ_HANDLED;
 		}
 
-		dev_dbg(musb->controller, "SESSION_REQUEST (%s)\n",
+		musb_dbg(musb, "SESSION_REQUEST (%s)",
 			usb_otg_state_string(musb->xceiv->otg->state));
 
 		/* IRQ arrives from ID pin sense or (later, if VBUS power
@@ -714,7 +727,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 	}
 
 	if (int_usb & MUSB_INTR_SUSPEND) {
-		dev_dbg(musb->controller, "SUSPEND (%s) devctl %02x\n",
+		musb_dbg(musb, "SUSPEND (%s) devctl %02x",
 			usb_otg_state_string(musb->xceiv->otg->state), devctl);
 		handled = IRQ_HANDLED;
 
@@ -743,7 +756,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			musb->is_active = musb->g.b_hnp_enable;
 			if (musb->is_active) {
 				musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON;
-				dev_dbg(musb->controller, "HNP: Setting timer for b_ase0_brst\n");
+				musb_dbg(musb, "HNP: Setting timer for b_ase0_brst");
 				mod_timer(&musb->otg_timer, jiffies
 					+ msecs_to_jiffies(
 							OTG_TIME_B_ASE0_BRST));
@@ -760,7 +773,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			break;
 		case OTG_STATE_B_HOST:
 			/* Transition to B_PERIPHERAL, see 6.8.2.6 p 44 */
-			dev_dbg(musb->controller, "REVISIT: SUSPEND as B_HOST\n");
+			musb_dbg(musb, "REVISIT: SUSPEND as B_HOST");
 			break;
 		default:
 			/* "should not happen" */
@@ -797,14 +810,14 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_B_PERIPHERAL:
 			if (int_usb & MUSB_INTR_SUSPEND) {
-				dev_dbg(musb->controller, "HNP: SUSPEND+CONNECT, now b_host\n");
+				musb_dbg(musb, "HNP: SUSPEND+CONNECT, now b_host");
 				int_usb &= ~MUSB_INTR_SUSPEND;
 				goto b_host;
 			} else
-				dev_dbg(musb->controller, "CONNECT as b_peripheral???\n");
+				musb_dbg(musb, "CONNECT as b_peripheral???");
 			break;
 		case OTG_STATE_B_WAIT_ACON:
-			dev_dbg(musb->controller, "HNP: CONNECT, now b_host\n");
+			musb_dbg(musb, "HNP: CONNECT, now b_host");
 b_host:
 			musb->xceiv->otg->state = OTG_STATE_B_HOST;
 			if (musb->hcd)
@@ -823,12 +836,12 @@ b_host:
 
 		musb_host_poke_root_hub(musb);
 
-		dev_dbg(musb->controller, "CONNECT (%s) devctl %02x\n",
+		musb_dbg(musb, "CONNECT (%s) devctl %02x",
 				usb_otg_state_string(musb->xceiv->otg->state), devctl);
 	}
 
 	if (int_usb & MUSB_INTR_DISCONNECT) {
-		dev_dbg(musb->controller, "DISCONNECT (%s) as %s, devctl %02x\n",
+		musb_dbg(musb, "DISCONNECT (%s) as %s, devctl %02x",
 				usb_otg_state_string(musb->xceiv->otg->state),
 				MUSB_MODE(musb), devctl);
 		handled = IRQ_HANDLED;
@@ -891,7 +904,7 @@ b_host:
 			if (is_host_active(musb))
 				musb_recover_from_babble(musb);
 		} else {
-			dev_dbg(musb->controller, "BUS RESET as %s\n",
+			musb_dbg(musb, "BUS RESET as %s",
 				usb_otg_state_string(musb->xceiv->otg->state));
 			switch (musb->xceiv->otg->state) {
 			case OTG_STATE_A_SUSPEND:
@@ -899,7 +912,7 @@ b_host:
 				/* FALLTHROUGH */
 			case OTG_STATE_A_WAIT_BCON:	/* OPT TD.4.7-900ms */
 				/* never use invalid T(a_wait_bcon) */
-				dev_dbg(musb->controller, "HNP: in %s, %d msec timeout\n",
+				musb_dbg(musb, "HNP: in %s, %d msec timeout",
 					usb_otg_state_string(musb->xceiv->otg->state),
 					TA_WAIT_BCON(musb));
 				mod_timer(&musb->otg_timer, jiffies
@@ -910,7 +923,7 @@ b_host:
 				musb_g_reset(musb);
 				break;
 			case OTG_STATE_B_WAIT_ACON:
-				dev_dbg(musb->controller, "HNP: RESET (%s), to b_peripheral\n",
+				musb_dbg(musb, "HNP: RESET (%s), to b_peripheral",
 					usb_otg_state_string(musb->xceiv->otg->state));
 				musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 				musb_g_reset(musb);
@@ -922,7 +935,7 @@ b_host:
 				musb_g_reset(musb);
 				break;
 			default:
-				dev_dbg(musb->controller, "Unhandled BUS RESET as %s\n",
+				musb_dbg(musb, "Unhandled BUS RESET as %s",
 					usb_otg_state_string(musb->xceiv->otg->state));
 			}
 		}
@@ -1030,7 +1043,7 @@ void musb_start(struct musb *musb)
 	u8              devctl = musb_readb(regs, MUSB_DEVCTL);
 	u8		power;
 
-	dev_dbg(musb->controller, "<== devctl %02x\n", devctl);
+	musb_dbg(musb, "<== devctl %02x", devctl);
 
 	musb_enable_interrupts(musb);
 	musb_writeb(regs, MUSB_TESTMODE, 0);
@@ -1078,7 +1091,7 @@ void musb_stop(struct musb *musb)
 	/* stop IRQs, timers, ... */
 	musb_platform_disable(musb);
 	musb_generic_disable(musb);
-	dev_dbg(musb->controller, "HDRC disabled\n");
+	musb_dbg(musb, "HDRC disabled");
 
 	/* FIXME
 	 *  - mark host and/or peripheral drivers unusable/inactive
@@ -1391,7 +1404,7 @@ static int ep_config_from_hw(struct musb *musb)
 	void __iomem *mbase = musb->mregs;
 	int ret = 0;
 
-	dev_dbg(musb->controller, "<== static silicon ep config\n");
+	musb_dbg(musb, "<== static silicon ep config");
 
 	/* FIXME pick up ep0 maxpacket size */
 
@@ -1532,8 +1545,7 @@ static int musb_core_init(u16 musb_type, struct musb *musb)
 		hw_ep->tx_reinit = 1;
 
 		if (hw_ep->max_packet_sz_tx) {
-			dev_dbg(musb->controller,
-				"%s: hw_ep %d%s, %smax %d\n",
+			musb_dbg(musb, "%s: hw_ep %d%s, %smax %d",
 				musb_driver_name, i,
 				hw_ep->is_shared_fifo ? "shared" : "tx",
 				hw_ep->tx_double_buffered
@@ -1541,8 +1553,7 @@ static int musb_core_init(u16 musb_type, struct musb *musb)
 				hw_ep->max_packet_sz_tx);
 		}
 		if (hw_ep->max_packet_sz_rx && !hw_ep->is_shared_fifo) {
-			dev_dbg(musb->controller,
-				"%s: hw_ep %d%s, %smax %d\n",
+			musb_dbg(musb, "%s: hw_ep %d%s, %smax %d",
 				musb_driver_name, i,
 				"rx",
 				hw_ep->rx_double_buffered
@@ -1550,7 +1561,7 @@ static int musb_core_init(u16 musb_type, struct musb *musb)
 				hw_ep->max_packet_sz_rx);
 		}
 		if (!(hw_ep->max_packet_sz_tx || hw_ep->max_packet_sz_rx))
-			dev_dbg(musb->controller, "hw_ep %d not configured\n", i);
+			musb_dbg(musb, "hw_ep %d not configured", i);
 	}
 
 	return 0;
@@ -1577,9 +1588,7 @@ irqreturn_t musb_interrupt(struct musb *musb)
 
 	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 
-	dev_dbg(musb->controller, "** IRQ %s usb%04x tx%04x rx%04x\n",
-		is_host_active(musb) ? "host" : "peripheral",
-		musb->int_usb, musb->int_tx, musb->int_rx);
+	trace_musb_isr(musb);
 
 	/**
 	 * According to Mentor Graphics' documentation, flowchart on page 98,
@@ -1976,7 +1985,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	 * Fail when the board needs a feature that's not enabled.
 	 */
 	if (!plat) {
-		dev_dbg(dev, "no platform_data?\n");
+		dev_err(dev, "no platform_data?\n");
 		status = -ENODEV;
 		goto fail0;
 	}
diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c
index e499b862a946..d4d7c56b48c7 100644
--- a/drivers/usb/musb/musb_cppi41.c
+++ b/drivers/usb/musb/musb_cppi41.c
@@ -5,7 +5,9 @@
 #include <linux/platform_device.h>
 #include <linux/of.h>
 
+#include "cppi_dma.h"
 #include "musb_core.h"
+#include "musb_trace.h"
 
 #define RNDIS_REG(x) (0x80 + ((x - 1) * 4))
 
@@ -22,26 +24,6 @@
 #define USB_CTRL_AUTOREQ	0xd0
 #define USB_TDOWN		0xd8
 
-struct cppi41_dma_channel {
-	struct dma_channel channel;
-	struct cppi41_dma_controller *controller;
-	struct musb_hw_ep *hw_ep;
-	struct dma_chan *dc;
-	dma_cookie_t cookie;
-	u8 port_num;
-	u8 is_tx;
-	u8 is_allocated;
-	u8 usb_toggle;
-
-	dma_addr_t buf_addr;
-	u32 total_len;
-	u32 prog_len;
-	u32 transferred;
-	u32 packet_sz;
-	struct list_head tx_check;
-	int tx_zlp;
-};
-
 #define MUSB_DMA_NUM_CHANNELS 15
 
 struct cppi41_dma_controller {
@@ -96,8 +78,8 @@ static void update_rx_toggle(struct cppi41_dma_channel *cppi41_channel)
 	if (!toggle && toggle == cppi41_channel->usb_toggle) {
 		csr |= MUSB_RXCSR_H_DATATOGGLE | MUSB_RXCSR_H_WR_DATATOGGLE;
 		musb_writew(cppi41_channel->hw_ep->regs, MUSB_RXCSR, csr);
-		dev_dbg(cppi41_channel->controller->musb->controller,
-				"Restoring DATA1 toggle.\n");
+		musb_dbg(cppi41_channel->controller->musb,
+				"Restoring DATA1 toggle.");
 	}
 
 	cppi41_channel->usb_toggle = toggle;
@@ -145,6 +127,8 @@ static void cppi41_trans_done(struct cppi41_dma_channel *cppi41_channel)
 			csr = MUSB_TXCSR_MODE | MUSB_TXCSR_TXPKTRDY;
 			musb_writew(epio, MUSB_TXCSR, csr);
 		}
+
+		trace_musb_cppi41_done(cppi41_channel);
 		musb_dma_completion(musb, hw_ep->epnum, cppi41_channel->is_tx);
 	} else {
 		/* next iteration, reload */
@@ -173,6 +157,7 @@ static void cppi41_trans_done(struct cppi41_dma_channel *cppi41_channel)
 		dma_desc->callback = cppi41_dma_callback;
 		dma_desc->callback_param = &cppi41_channel->channel;
 		cppi41_channel->cookie = dma_desc->tx_submit(dma_desc);
+		trace_musb_cppi41_cont(cppi41_channel);
 		dma_async_issue_pending(dc);
 
 		if (!cppi41_channel->is_tx) {
@@ -240,10 +225,7 @@ static void cppi41_dma_callback(void *private_data)
 	transferred = cppi41_channel->prog_len - txstate.residue;
 	cppi41_channel->transferred += transferred;
 
-	dev_dbg(musb->controller, "DMA transfer done on hw_ep=%d bytes=%d/%d\n",
-		hw_ep->epnum, cppi41_channel->transferred,
-		cppi41_channel->total_len);
-
+	trace_musb_cppi41_gb(cppi41_channel);
 	update_rx_toggle(cppi41_channel);
 
 	if (cppi41_channel->transferred == cppi41_channel->total_len ||
@@ -374,12 +356,6 @@ static bool cppi41_configure_channel(struct dma_channel *channel,
 	struct musb *musb = cppi41_channel->controller->musb;
 	unsigned use_gen_rndis = 0;
 
-	dev_dbg(musb->controller,
-		"configure ep%d/%x packet_sz=%d, mode=%d, dma_addr=0x%llx, len=%d is_tx=%d\n",
-		cppi41_channel->port_num, RNDIS_REG(cppi41_channel->port_num),
-		packet_sz, mode, (unsigned long long) dma_addr,
-		len, cppi41_channel->is_tx);
-
 	cppi41_channel->buf_addr = dma_addr;
 	cppi41_channel->total_len = len;
 	cppi41_channel->transferred = 0;
@@ -431,6 +407,8 @@ static bool cppi41_configure_channel(struct dma_channel *channel,
 	cppi41_channel->cookie = dma_desc->tx_submit(dma_desc);
 	cppi41_channel->channel.rx_packet_done = false;
 
+	trace_musb_cppi41_config(cppi41_channel);
+
 	save_rx_toggle(cppi41_channel);
 	dma_async_issue_pending(dc);
 	return true;
@@ -461,6 +439,7 @@ static struct dma_channel *cppi41_dma_channel_allocate(struct dma_controller *c,
 	cppi41_channel->hw_ep = hw_ep;
 	cppi41_channel->is_allocated = 1;
 
+	trace_musb_cppi41_alloc(cppi41_channel);
 	return &cppi41_channel->channel;
 }
 
@@ -468,6 +447,7 @@ static void cppi41_dma_channel_release(struct dma_channel *channel)
 {
 	struct cppi41_dma_channel *cppi41_channel = channel->private_data;
 
+	trace_musb_cppi41_free(cppi41_channel);
 	if (cppi41_channel->is_allocated) {
 		cppi41_channel->is_allocated = 0;
 		channel->status = MUSB_DMA_STATUS_FREE;
@@ -537,8 +517,7 @@ static int cppi41_dma_channel_abort(struct dma_channel *channel)
 	u16 csr;
 
 	is_tx = cppi41_channel->is_tx;
-	dev_dbg(musb->controller, "abort channel=%d, is_tx=%d\n",
-			cppi41_channel->port_num, is_tx);
+	trace_musb_cppi41_abort(cppi41_channel);
 
 	if (cppi41_channel->channel.status == MUSB_DMA_STATUS_FREE)
 		return 0;
diff --git a/drivers/usb/musb/musb_debug.h b/drivers/usb/musb/musb_debug.h
index 27ba8f799462..9a78877a8afe 100644
--- a/drivers/usb/musb/musb_debug.h
+++ b/drivers/usb/musb/musb_debug.h
@@ -42,6 +42,8 @@
 #define INFO(fmt, args...) yprintk(KERN_INFO, fmt, ## args)
 #define ERR(fmt, args...) yprintk(KERN_ERR, fmt, ## args)
 
+void musb_dbg(struct musb *musb, const char *fmt, ...);
+
 #ifdef CONFIG_DEBUG_FS
 int musb_init_debugfs(struct musb *musb);
 void musb_exit_debugfs(struct musb *musb);
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index eeb7d9ecf7df..2537179636db 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -51,30 +51,6 @@
 
 static const struct of_device_id musb_dsps_of_match[];
 
-/**
- * avoid using musb_readx()/musb_writex() as glue layer should not be
- * dependent on musb core layer symbols.
- */
-static inline u8 dsps_readb(const void __iomem *addr, unsigned offset)
-{
-	return __raw_readb(addr + offset);
-}
-
-static inline u32 dsps_readl(const void __iomem *addr, unsigned offset)
-{
-	return __raw_readl(addr + offset);
-}
-
-static inline void dsps_writeb(void __iomem *addr, unsigned offset, u8 data)
-{
-	__raw_writeb(data, addr + offset);
-}
-
-static inline void dsps_writel(void __iomem *addr, unsigned offset, u32 data)
-{
-	__raw_writel(data, addr + offset);
-}
-
 /**
  * DSPS musb wrapper register offset.
  * FIXME: This should be expanded to have all the wrapper registers from TI DSPS
@@ -223,8 +199,8 @@ static void dsps_musb_enable(struct musb *musb)
 	       ((musb->epmask & wrp->rxep_mask) << wrp->rxep_shift);
 	coremask = (wrp->usb_bitmap & ~MUSB_INTR_SOF);
 
-	dsps_writel(reg_base, wrp->epintr_set, epmask);
-	dsps_writel(reg_base, wrp->coreintr_set, coremask);
+	musb_writel(reg_base, wrp->epintr_set, epmask);
+	musb_writel(reg_base, wrp->coreintr_set, coremask);
 	/* start polling for ID change in dual-role idle mode */
 	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE &&
 			musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
@@ -244,10 +220,10 @@ static void dsps_musb_disable(struct musb *musb)
 	const struct dsps_musb_wrapper *wrp = glue->wrp;
 	void __iomem *reg_base = musb->ctrl_base;
 
-	dsps_writel(reg_base, wrp->coreintr_clear, wrp->usb_bitmap);
-	dsps_writel(reg_base, wrp->epintr_clear,
+	musb_writel(reg_base, wrp->coreintr_clear, wrp->usb_bitmap);
+	musb_writel(reg_base, wrp->epintr_clear,
 			 wrp->txep_bitmap | wrp->rxep_bitmap);
-	dsps_writeb(musb->mregs, MUSB_DEVCTL, 0);
+	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 }
 
 static void otg_timer(unsigned long _musb)
@@ -265,14 +241,14 @@ static void otg_timer(unsigned long _musb)
 	 * We poll because DSPS IP's won't expose several OTG-critical
 	 * status change events (from the transceiver) otherwise.
 	 */
-	devctl = dsps_readb(mregs, MUSB_DEVCTL);
+	devctl = musb_readb(mregs, MUSB_DEVCTL);
 	dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl,
 				usb_otg_state_string(musb->xceiv->otg->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_WAIT_BCON:
-		dsps_writeb(musb->mregs, MUSB_DEVCTL, 0);
+		musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 		skip_session = 1;
 		/* fall */
 
@@ -286,13 +262,13 @@ static void otg_timer(unsigned long _musb)
 			MUSB_HST_MODE(musb);
 		}
 		if (!(devctl & MUSB_DEVCTL_SESSION) && !skip_session)
-			dsps_writeb(mregs, MUSB_DEVCTL, MUSB_DEVCTL_SESSION);
+			musb_writeb(mregs, MUSB_DEVCTL, MUSB_DEVCTL_SESSION);
 		mod_timer(&glue->timer, jiffies +
 				msecs_to_jiffies(wrp->poll_timeout));
 		break;
 	case OTG_STATE_A_WAIT_VFALL:
 		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
-		dsps_writel(musb->ctrl_base, wrp->coreintr_set,
+		musb_writel(musb->ctrl_base, wrp->coreintr_set,
 			    MUSB_INTR_VBUSERROR << wrp->usb_shift);
 		break;
 	default:
@@ -315,29 +291,29 @@ static irqreturn_t dsps_interrupt(int irq, void *hci)
 	spin_lock_irqsave(&musb->lock, flags);
 
 	/* Get endpoint interrupts */
-	epintr = dsps_readl(reg_base, wrp->epintr_status);
+	epintr = musb_readl(reg_base, wrp->epintr_status);
 	musb->int_rx = (epintr & wrp->rxep_bitmap) >> wrp->rxep_shift;
 	musb->int_tx = (epintr & wrp->txep_bitmap) >> wrp->txep_shift;
 
 	if (epintr)
-		dsps_writel(reg_base, wrp->epintr_status, epintr);
+		musb_writel(reg_base, wrp->epintr_status, epintr);
 
 	/* Get usb core interrupts */
-	usbintr = dsps_readl(reg_base, wrp->coreintr_status);
+	usbintr = musb_readl(reg_base, wrp->coreintr_status);
 	if (!usbintr && !epintr)
 		goto out;
 
 	musb->int_usb =	(usbintr & wrp->usb_bitmap) >> wrp->usb_shift;
 	if (usbintr)
-		dsps_writel(reg_base, wrp->coreintr_status, usbintr);
+		musb_writel(reg_base, wrp->coreintr_status, usbintr);
 
 	dev_dbg(musb->controller, "usbintr (%x) epintr(%x)\n",
 			usbintr, epintr);
 
 	if (usbintr & ((1 << wrp->drvvbus) << wrp->usb_shift)) {
-		int drvvbus = dsps_readl(reg_base, wrp->status);
+		int drvvbus = musb_readl(reg_base, wrp->status);
 		void __iomem *mregs = musb->mregs;
-		u8 devctl = dsps_readb(mregs, MUSB_DEVCTL);
+		u8 devctl = musb_readb(mregs, MUSB_DEVCTL);
 		int err;
 
 		err = musb->int_usb & MUSB_INTR_VBUSERROR;
@@ -442,7 +418,7 @@ static int dsps_musb_init(struct musb *musb)
 	musb->phy = devm_phy_get(dev->parent, "usb2-phy");
 
 	/* Returns zero if e.g. not clocked */
-	rev = dsps_readl(reg_base, wrp->revision);
+	rev = musb_readl(reg_base, wrp->revision);
 	if (!rev)
 		return -ENODEV;
 
@@ -463,14 +439,14 @@ static int dsps_musb_init(struct musb *musb)
 	setup_timer(&glue->timer, otg_timer, (unsigned long) musb);
 
 	/* Reset the musb */
-	dsps_writel(reg_base, wrp->control, (1 << wrp->reset));
+	musb_writel(reg_base, wrp->control, (1 << wrp->reset));
 
 	musb->isr = dsps_interrupt;
 
 	/* reset the otgdisable bit, needed for host mode to work */
-	val = dsps_readl(reg_base, wrp->phy_utmi);
+	val = musb_readl(reg_base, wrp->phy_utmi);
 	val &= ~(1 << wrp->otg_disable);
-	dsps_writel(musb->ctrl_base, wrp->phy_utmi, val);
+	musb_writel(musb->ctrl_base, wrp->phy_utmi, val);
 
 	/*
 	 *  Check whether the dsps version has babble control enabled.
@@ -478,11 +454,11 @@ static int dsps_musb_init(struct musb *musb)
 	 * If MUSB_BABBLE_CTL returns 0x4 then we have the babble control
 	 * logic enabled.
 	 */
-	val = dsps_readb(musb->mregs, MUSB_BABBLE_CTL);
+	val = musb_readb(musb->mregs, MUSB_BABBLE_CTL);
 	if (val & MUSB_BABBLE_RCV_DISABLE) {
 		glue->sw_babble_enabled = true;
 		val |= MUSB_BABBLE_SW_SESSION_CTRL;
-		dsps_writeb(musb->mregs, MUSB_BABBLE_CTL, val);
+		musb_writeb(musb->mregs, MUSB_BABBLE_CTL, val);
 	}
 
 	return dsps_musb_dbg_init(musb, glue);
@@ -510,7 +486,7 @@ static int dsps_musb_set_mode(struct musb *musb, u8 mode)
 	void __iomem *ctrl_base = musb->ctrl_base;
 	u32 reg;
 
-	reg = dsps_readl(ctrl_base, wrp->mode);
+	reg = musb_readl(ctrl_base, wrp->mode);
 
 	switch (mode) {
 	case MUSB_HOST:
@@ -523,8 +499,8 @@ static int dsps_musb_set_mode(struct musb *musb, u8 mode)
 		 */
 		reg |= (1 << wrp->iddig_mux);
 
-		dsps_writel(ctrl_base, wrp->mode, reg);
-		dsps_writel(ctrl_base, wrp->phy_utmi, 0x02);
+		musb_writel(ctrl_base, wrp->mode, reg);
+		musb_writel(ctrl_base, wrp->phy_utmi, 0x02);
 		break;
 	case MUSB_PERIPHERAL:
 		reg |= (1 << wrp->iddig);
@@ -536,10 +512,10 @@ static int dsps_musb_set_mode(struct musb *musb, u8 mode)
 		 */
 		reg |= (1 << wrp->iddig_mux);
 
-		dsps_writel(ctrl_base, wrp->mode, reg);
+		musb_writel(ctrl_base, wrp->mode, reg);
 		break;
 	case MUSB_OTG:
-		dsps_writel(ctrl_base, wrp->phy_utmi, 0x02);
+		musb_writel(ctrl_base, wrp->phy_utmi, 0x02);
 		break;
 	default:
 		dev_err(glue->dev, "unsupported mode %d\n", mode);
@@ -554,7 +530,7 @@ static bool dsps_sw_babble_control(struct musb *musb)
 	u8 babble_ctl;
 	bool session_restart =  false;
 
-	babble_ctl = dsps_readb(musb->mregs, MUSB_BABBLE_CTL);
+	babble_ctl = musb_readb(musb->mregs, MUSB_BABBLE_CTL);
 	dev_dbg(musb->controller, "babble: MUSB_BABBLE_CTL value %x\n",
 		babble_ctl);
 	/*
@@ -571,14 +547,14 @@ static bool dsps_sw_babble_control(struct musb *musb)
 		 * babble is due to noise, then set transmit idle (d7 bit)
 		 * to resume normal operation
 		 */
-		babble_ctl = dsps_readb(musb->mregs, MUSB_BABBLE_CTL);
+		babble_ctl = musb_readb(musb->mregs, MUSB_BABBLE_CTL);
 		babble_ctl |= MUSB_BABBLE_FORCE_TXIDLE;
-		dsps_writeb(musb->mregs, MUSB_BABBLE_CTL, babble_ctl);
+		musb_writeb(musb->mregs, MUSB_BABBLE_CTL, babble_ctl);
 
 		/* wait till line monitor flag cleared */
 		dev_dbg(musb->controller, "Set TXIDLE, wait J to clear\n");
 		do {
-			babble_ctl = dsps_readb(musb->mregs, MUSB_BABBLE_CTL);
+			babble_ctl = musb_readb(musb->mregs, MUSB_BABBLE_CTL);
 			udelay(1);
 		} while ((babble_ctl & MUSB_BABBLE_STUCK_J) && timeout--);
 
@@ -896,13 +872,13 @@ static int dsps_suspend(struct device *dev)
 		return 0;
 
 	mbase = musb->ctrl_base;
-	glue->context.control = dsps_readl(mbase, wrp->control);
-	glue->context.epintr = dsps_readl(mbase, wrp->epintr_set);
-	glue->context.coreintr = dsps_readl(mbase, wrp->coreintr_set);
-	glue->context.phy_utmi = dsps_readl(mbase, wrp->phy_utmi);
-	glue->context.mode = dsps_readl(mbase, wrp->mode);
-	glue->context.tx_mode = dsps_readl(mbase, wrp->tx_mode);
-	glue->context.rx_mode = dsps_readl(mbase, wrp->rx_mode);
+	glue->context.control = musb_readl(mbase, wrp->control);
+	glue->context.epintr = musb_readl(mbase, wrp->epintr_set);
+	glue->context.coreintr = musb_readl(mbase, wrp->coreintr_set);
+	glue->context.phy_utmi = musb_readl(mbase, wrp->phy_utmi);
+	glue->context.mode = musb_readl(mbase, wrp->mode);
+	glue->context.tx_mode = musb_readl(mbase, wrp->tx_mode);
+	glue->context.rx_mode = musb_readl(mbase, wrp->rx_mode);
 
 	return 0;
 }
@@ -918,13 +894,13 @@ static int dsps_resume(struct device *dev)
 		return 0;
 
 	mbase = musb->ctrl_base;
-	dsps_writel(mbase, wrp->control, glue->context.control);
-	dsps_writel(mbase, wrp->epintr_set, glue->context.epintr);
-	dsps_writel(mbase, wrp->coreintr_set, glue->context.coreintr);
-	dsps_writel(mbase, wrp->phy_utmi, glue->context.phy_utmi);
-	dsps_writel(mbase, wrp->mode, glue->context.mode);
-	dsps_writel(mbase, wrp->tx_mode, glue->context.tx_mode);
-	dsps_writel(mbase, wrp->rx_mode, glue->context.rx_mode);
+	musb_writel(mbase, wrp->control, glue->context.control);
+	musb_writel(mbase, wrp->epintr_set, glue->context.epintr);
+	musb_writel(mbase, wrp->coreintr_set, glue->context.coreintr);
+	musb_writel(mbase, wrp->phy_utmi, glue->context.phy_utmi);
+	musb_writel(mbase, wrp->mode, glue->context.mode);
+	musb_writel(mbase, wrp->tx_mode, glue->context.tx_mode);
+	musb_writel(mbase, wrp->rx_mode, glue->context.rx_mode);
 	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE &&
 	    musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
 		mod_timer(&glue->timer, jiffies +
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index af2a3a7addf9..6d1e975e9605 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -44,6 +44,7 @@
 #include <linux/slab.h>
 
 #include "musb_core.h"
+#include "musb_trace.h"
 
 
 /* ----------------------------------------------------------------------- */
@@ -167,15 +168,7 @@ __acquires(ep->musb->lock)
 	if (!dma_mapping_error(&musb->g.dev, request->dma))
 		unmap_dma_buffer(req, musb);
 
-	if (request->status == 0)
-		dev_dbg(musb->controller, "%s done request %p,  %d/%d\n",
-				ep->end_point.name, request,
-				req->request.actual, req->request.length);
-	else
-		dev_dbg(musb->controller, "%s request %p, %d/%d fault %d\n",
-				ep->end_point.name, request,
-				req->request.actual, req->request.length,
-				request->status);
+	trace_musb_req_gb(req);
 	usb_gadget_giveback_request(&req->ep->end_point, &req->request);
 	spin_lock(&musb->lock);
 	ep->busy = busy;
@@ -217,8 +210,7 @@ static void nuke(struct musb_ep *ep, const int status)
 		}
 
 		value = c->channel_abort(ep->dma);
-		dev_dbg(musb->controller, "%s: abort DMA --> %d\n",
-				ep->name, value);
+		musb_dbg(musb, "%s: abort DMA --> %d", ep->name, value);
 		c->channel_release(ep->dma);
 		ep->dma = NULL;
 	}
@@ -266,14 +258,14 @@ static void txstate(struct musb *musb, struct musb_request *req)
 
 	/* Check if EP is disabled */
 	if (!musb_ep->desc) {
-		dev_dbg(musb->controller, "ep:%s disabled - ignore request\n",
+		musb_dbg(musb, "ep:%s disabled - ignore request",
 						musb_ep->end_point.name);
 		return;
 	}
 
 	/* we shouldn't get here while DMA is active ... but we do ... */
 	if (dma_channel_status(musb_ep->dma) == MUSB_DMA_STATUS_BUSY) {
-		dev_dbg(musb->controller, "dma pending...\n");
+		musb_dbg(musb, "dma pending...");
 		return;
 	}
 
@@ -285,18 +277,18 @@ static void txstate(struct musb *musb, struct musb_request *req)
 			(int)(request->length - request->actual));
 
 	if (csr & MUSB_TXCSR_TXPKTRDY) {
-		dev_dbg(musb->controller, "%s old packet still ready , txcsr %03x\n",
+		musb_dbg(musb, "%s old packet still ready , txcsr %03x",
 				musb_ep->end_point.name, csr);
 		return;
 	}
 
 	if (csr & MUSB_TXCSR_P_SENDSTALL) {
-		dev_dbg(musb->controller, "%s stalling, txcsr %03x\n",
+		musb_dbg(musb, "%s stalling, txcsr %03x",
 				musb_ep->end_point.name, csr);
 		return;
 	}
 
-	dev_dbg(musb->controller, "hw_ep%d, maxpacket %d, fifo count %d, txcsr %03x\n",
+	musb_dbg(musb, "hw_ep%d, maxpacket %d, fifo count %d, txcsr %03x",
 			epnum, musb_ep->packet_sz, fifo_count,
 			csr);
 
@@ -424,7 +416,7 @@ static void txstate(struct musb *musb, struct musb_request *req)
 	}
 
 	/* host may already have the data when this message shows... */
-	dev_dbg(musb->controller, "%s TX/IN %s len %d/%d, txcsr %04x, fifo %d/%d\n",
+	musb_dbg(musb, "%s TX/IN %s len %d/%d, txcsr %04x, fifo %d/%d",
 			musb_ep->end_point.name, use_dma ? "dma" : "pio",
 			request->actual, request->length,
 			musb_readw(epio, MUSB_TXCSR),
@@ -450,8 +442,9 @@ void musb_g_tx(struct musb *musb, u8 epnum)
 	req = next_request(musb_ep);
 	request = &req->request;
 
+	trace_musb_req_tx(req);
 	csr = musb_readw(epio, MUSB_TXCSR);
-	dev_dbg(musb->controller, "<== %s, txcsr %04x\n", musb_ep->end_point.name, csr);
+	musb_dbg(musb, "<== %s, txcsr %04x", musb_ep->end_point.name, csr);
 
 	dma = is_dma_capable() ? musb_ep->dma : NULL;
 
@@ -480,7 +473,7 @@ void musb_g_tx(struct musb *musb, u8 epnum)
 		 * SHOULD NOT HAPPEN... has with CPPI though, after
 		 * changing SENDSTALL (and other cases); harmless?
 		 */
-		dev_dbg(musb->controller, "%s dma still busy?\n", musb_ep->end_point.name);
+		musb_dbg(musb, "%s dma still busy?", musb_ep->end_point.name);
 		return;
 	}
 
@@ -497,7 +490,7 @@ void musb_g_tx(struct musb *musb, u8 epnum)
 			/* Ensure writebuffer is empty. */
 			csr = musb_readw(epio, MUSB_TXCSR);
 			request->actual += musb_ep->dma->actual_len;
-			dev_dbg(musb->controller, "TXCSR%d %04x, DMA off, len %zu, req %p\n",
+			musb_dbg(musb, "TXCSR%d %04x, DMA off, len %zu, req %p",
 				epnum, csr, musb_ep->dma->actual_len, request);
 		}
 
@@ -524,7 +517,6 @@ void musb_g_tx(struct musb *musb, u8 epnum)
 			if (csr & MUSB_TXCSR_TXPKTRDY)
 				return;
 
-			dev_dbg(musb->controller, "sending zero pkt\n");
 			musb_writew(epio, MUSB_TXCSR, MUSB_TXCSR_MODE
 					| MUSB_TXCSR_TXPKTRDY);
 			request->zero = 0;
@@ -543,7 +535,7 @@ void musb_g_tx(struct musb *musb, u8 epnum)
 			musb_ep_select(mbase, epnum);
 			req = musb_ep->desc ? next_request(musb_ep) : NULL;
 			if (!req) {
-				dev_dbg(musb->controller, "%s idle now\n",
+				musb_dbg(musb, "%s idle now",
 					musb_ep->end_point.name);
 				return;
 			}
@@ -579,19 +571,19 @@ static void rxstate(struct musb *musb, struct musb_request *req)
 
 	/* Check if EP is disabled */
 	if (!musb_ep->desc) {
-		dev_dbg(musb->controller, "ep:%s disabled - ignore request\n",
+		musb_dbg(musb, "ep:%s disabled - ignore request",
 						musb_ep->end_point.name);
 		return;
 	}
 
 	/* We shouldn't get here while DMA is active, but we do... */
 	if (dma_channel_status(musb_ep->dma) == MUSB_DMA_STATUS_BUSY) {
-		dev_dbg(musb->controller, "DMA pending...\n");
+		musb_dbg(musb, "DMA pending...");
 		return;
 	}
 
 	if (csr & MUSB_RXCSR_P_SENDSTALL) {
-		dev_dbg(musb->controller, "%s stalling, RXCSR %04x\n",
+		musb_dbg(musb, "%s stalling, RXCSR %04x",
 		    musb_ep->end_point.name, csr);
 		return;
 	}
@@ -766,7 +758,7 @@ static void rxstate(struct musb *musb, struct musb_request *req)
 			}
 
 			len = request->length - request->actual;
-			dev_dbg(musb->controller, "%s OUT/RX pio fifo %d/%d, maxpacket %d\n",
+			musb_dbg(musb, "%s OUT/RX pio fifo %d/%d, maxpacket %d",
 					musb_ep->end_point.name,
 					fifo_count, len,
 					musb_ep->packet_sz);
@@ -849,12 +841,13 @@ void musb_g_rx(struct musb *musb, u8 epnum)
 	if (!req)
 		return;
 
+	trace_musb_req_rx(req);
 	request = &req->request;
 
 	csr = musb_readw(epio, MUSB_RXCSR);
 	dma = is_dma_capable() ? musb_ep->dma : NULL;
 
-	dev_dbg(musb->controller, "<== %s, rxcsr %04x%s %p\n", musb_ep->end_point.name,
+	musb_dbg(musb, "<== %s, rxcsr %04x%s %p", musb_ep->end_point.name,
 			csr, dma ? " (dma)" : "", request);
 
 	if (csr & MUSB_RXCSR_P_SENTSTALL) {
@@ -869,18 +862,18 @@ void musb_g_rx(struct musb *musb, u8 epnum)
 		csr &= ~MUSB_RXCSR_P_OVERRUN;
 		musb_writew(epio, MUSB_RXCSR, csr);
 
-		dev_dbg(musb->controller, "%s iso overrun on %p\n", musb_ep->name, request);
+		musb_dbg(musb, "%s iso overrun on %p", musb_ep->name, request);
 		if (request->status == -EINPROGRESS)
 			request->status = -EOVERFLOW;
 	}
 	if (csr & MUSB_RXCSR_INCOMPRX) {
 		/* REVISIT not necessarily an error */
-		dev_dbg(musb->controller, "%s, incomprx\n", musb_ep->end_point.name);
+		musb_dbg(musb, "%s, incomprx", musb_ep->end_point.name);
 	}
 
 	if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) {
 		/* "should not happen"; likely RXPKTRDY pending for DMA */
-		dev_dbg(musb->controller, "%s busy, csr %04x\n",
+		musb_dbg(musb, "%s busy, csr %04x",
 			musb_ep->end_point.name, csr);
 		return;
 	}
@@ -894,11 +887,6 @@ void musb_g_rx(struct musb *musb, u8 epnum)
 
 		request->actual += musb_ep->dma->actual_len;
 
-		dev_dbg(musb->controller, "RXCSR%d %04x, dma off, %04x, len %zu, req %p\n",
-			epnum, csr,
-			musb_readw(epio, MUSB_RXCSR),
-			musb_ep->dma->actual_len, request);
-
 #if defined(CONFIG_USB_INVENTRA_DMA) || defined(CONFIG_USB_TUSB_OMAP_DMA) || \
 	defined(CONFIG_USB_UX500_DMA)
 		/* Autoclear doesn't clear RxPktRdy for short packets */
@@ -996,7 +984,7 @@ static int musb_gadget_enable(struct usb_ep *ep,
 			ok = musb->hb_iso_rx;
 
 		if (!ok) {
-			dev_dbg(musb->controller, "no support for high bandwidth ISO\n");
+			musb_dbg(musb, "no support for high bandwidth ISO");
 			goto fail;
 		}
 		musb_ep->hb_mult = (tmp >> 11) & 3;
@@ -1019,7 +1007,7 @@ static int musb_gadget_enable(struct usb_ep *ep,
 			goto fail;
 
 		if (tmp > hw_ep->max_packet_sz_tx) {
-			dev_dbg(musb->controller, "packet size beyond hardware FIFO size\n");
+			musb_dbg(musb, "packet size beyond hardware FIFO size");
 			goto fail;
 		}
 
@@ -1062,7 +1050,7 @@ static int musb_gadget_enable(struct usb_ep *ep,
 			goto fail;
 
 		if (tmp > hw_ep->max_packet_sz_rx) {
-			dev_dbg(musb->controller, "packet size beyond hardware FIFO size\n");
+			musb_dbg(musb, "packet size beyond hardware FIFO size");
 			goto fail;
 		}
 
@@ -1174,7 +1162,7 @@ static int musb_gadget_disable(struct usb_ep *ep)
 
 	spin_unlock_irqrestore(&(musb->lock), flags);
 
-	dev_dbg(musb->controller, "%s\n", musb_ep->end_point.name);
+	musb_dbg(musb, "%s", musb_ep->end_point.name);
 
 	return status;
 }
@@ -1186,19 +1174,17 @@ static int musb_gadget_disable(struct usb_ep *ep)
 struct usb_request *musb_alloc_request(struct usb_ep *ep, gfp_t gfp_flags)
 {
 	struct musb_ep		*musb_ep = to_musb_ep(ep);
-	struct musb		*musb = musb_ep->musb;
 	struct musb_request	*request = NULL;
 
 	request = kzalloc(sizeof *request, gfp_flags);
-	if (!request) {
-		dev_dbg(musb->controller, "not enough memory\n");
+	if (!request)
 		return NULL;
-	}
 
 	request->request.dma = DMA_ADDR_INVALID;
 	request->epnum = musb_ep->current_epnum;
 	request->ep = musb_ep;
 
+	trace_musb_req_alloc(request);
 	return &request->request;
 }
 
@@ -1208,7 +1194,10 @@ struct usb_request *musb_alloc_request(struct usb_ep *ep, gfp_t gfp_flags)
  */
 void musb_free_request(struct usb_ep *ep, struct usb_request *req)
 {
-	kfree(to_musb_request(req));
+	struct musb_request *request = to_musb_request(req);
+
+	trace_musb_req_free(request);
+	kfree(request);
 }
 
 static LIST_HEAD(buffers);
@@ -1225,10 +1214,7 @@ struct free_record {
  */
 void musb_ep_restart(struct musb *musb, struct musb_request *req)
 {
-	dev_dbg(musb->controller, "<== %s request %p len %u on hw_ep%d\n",
-		req->tx ? "TX/IN" : "RX/OUT",
-		&req->request, req->request.length, req->epnum);
-
+	trace_musb_req_start(req);
 	musb_ep_select(musb->mregs, req->epnum);
 	if (req->tx)
 		txstate(musb, req);
@@ -1259,7 +1245,7 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
 	if (request->ep != musb_ep)
 		return -EINVAL;
 
-	dev_dbg(musb->controller, "<== to %s request=%p\n", ep->name, req);
+	trace_musb_req_enq(request);
 
 	/* request is mine now... */
 	request->request.actual = 0;
@@ -1273,7 +1259,7 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
 
 	/* don't queue if the ep is down */
 	if (!musb_ep->desc) {
-		dev_dbg(musb->controller, "req %p queued to %s while ep %s\n",
+		musb_dbg(musb, "req %p queued to %s while ep %s",
 				req, ep->name, "disabled");
 		status = -ESHUTDOWN;
 		unmap_dma_buffer(request, musb);
@@ -1301,9 +1287,11 @@ static int musb_gadget_dequeue(struct usb_ep *ep, struct usb_request *request)
 	int			status = 0;
 	struct musb		*musb = musb_ep->musb;
 
-	if (!ep || !request || to_musb_request(request)->ep != musb_ep)
+	if (!ep || !request || req->ep != musb_ep)
 		return -EINVAL;
 
+	trace_musb_req_deq(req);
+
 	spin_lock_irqsave(&musb->lock, flags);
 
 	list_for_each_entry(r, &musb_ep->req_list, list) {
@@ -1311,7 +1299,8 @@ static int musb_gadget_dequeue(struct usb_ep *ep, struct usb_request *request)
 			break;
 	}
 	if (r != req) {
-		dev_dbg(musb->controller, "request %p not queued to %s\n", request, ep->name);
+		dev_err(musb->controller, "request %p not queued to %s\n",
+				request, ep->name);
 		status = -EINVAL;
 		goto done;
 	}
@@ -1377,7 +1366,7 @@ static int musb_gadget_set_halt(struct usb_ep *ep, int value)
 	request = next_request(musb_ep);
 	if (value) {
 		if (request) {
-			dev_dbg(musb->controller, "request in progress, cannot halt %s\n",
+			musb_dbg(musb, "request in progress, cannot halt %s",
 			    ep->name);
 			status = -EAGAIN;
 			goto done;
@@ -1386,7 +1375,8 @@ static int musb_gadget_set_halt(struct usb_ep *ep, int value)
 		if (musb_ep->is_in) {
 			csr = musb_readw(epio, MUSB_TXCSR);
 			if (csr & MUSB_TXCSR_FIFONOTEMPTY) {
-				dev_dbg(musb->controller, "FIFO busy, cannot halt %s\n", ep->name);
+				musb_dbg(musb, "FIFO busy, cannot halt %s",
+						ep->name);
 				status = -EAGAIN;
 				goto done;
 			}
@@ -1395,7 +1385,7 @@ static int musb_gadget_set_halt(struct usb_ep *ep, int value)
 		musb_ep->wedged = 0;
 
 	/* set/clear the stall and toggle bits */
-	dev_dbg(musb->controller, "%s: %s stall\n", ep->name, value ? "set" : "clear");
+	musb_dbg(musb, "%s: %s stall", ep->name, value ? "set" : "clear");
 	if (musb_ep->is_in) {
 		csr = musb_readw(epio, MUSB_TXCSR);
 		csr |= MUSB_TXCSR_P_WZC_BITS
@@ -1422,7 +1412,7 @@ static int musb_gadget_set_halt(struct usb_ep *ep, int value)
 
 	/* maybe start the first request in the queue */
 	if (!musb_ep->busy && !value && request) {
-		dev_dbg(musb->controller, "restarting the request\n");
+		musb_dbg(musb, "restarting the request");
 		musb_ep_restart(musb, request);
 	}
 
@@ -1558,7 +1548,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 	case OTG_STATE_B_IDLE:
 		/* Start SRP ... OTG not required. */
 		devctl = musb_readb(mregs, MUSB_DEVCTL);
-		dev_dbg(musb->controller, "Sending SRP: devctl: %02x\n", devctl);
+		musb_dbg(musb, "Sending SRP: devctl: %02x", devctl);
 		devctl |= MUSB_DEVCTL_SESSION;
 		musb_writeb(mregs, MUSB_DEVCTL, devctl);
 		devctl = musb_readb(mregs, MUSB_DEVCTL);
@@ -1586,7 +1576,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 		status = 0;
 		goto done;
 	default:
-		dev_dbg(musb->controller, "Unhandled wake: %s\n",
+		musb_dbg(musb, "Unhandled wake: %s",
 			usb_otg_state_string(musb->xceiv->otg->state));
 		goto done;
 	}
@@ -1596,7 +1586,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 	power = musb_readb(mregs, MUSB_POWER);
 	power |= MUSB_POWER_RESUME;
 	musb_writeb(mregs, MUSB_POWER, power);
-	dev_dbg(musb->controller, "issue wakeup\n");
+	musb_dbg(musb, "issue wakeup");
 
 	/* FIXME do this next chunk in a timer callback, no udelay */
 	mdelay(2);
@@ -1628,7 +1618,7 @@ static void musb_pullup(struct musb *musb, int is_on)
 
 	/* FIXME if on, HdrcStart; if off, HdrcStop */
 
-	dev_dbg(musb->controller, "gadget D+ pullup %s\n",
+	musb_dbg(musb, "gadget D+ pullup %s",
 		is_on ? "on" : "off");
 	musb_writeb(musb->mregs, MUSB_POWER, power);
 }
@@ -1636,7 +1626,7 @@ static void musb_pullup(struct musb *musb, int is_on)
 #if 0
 static int musb_gadget_vbus_session(struct usb_gadget *gadget, int is_active)
 {
-	dev_dbg(musb->controller, "<= %s =>\n", __func__);
+	musb_dbg(musb, "<= %s =>\n", __func__);
 
 	/*
 	 * FIXME iff driver's softconnect flag is set (as it is during probe,
@@ -2011,7 +2001,7 @@ void musb_g_suspend(struct musb *musb)
 	u8	devctl;
 
 	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-	dev_dbg(musb->controller, "devctl %02x\n", devctl);
+	musb_dbg(musb, "musb_g_suspend: devctl %02x", devctl);
 
 	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_B_IDLE:
@@ -2030,7 +2020,7 @@ void musb_g_suspend(struct musb *musb)
 		/* REVISIT if B_HOST, clear DEVCTL.HOSTREQ;
 		 * A_PERIPHERAL may need care too
 		 */
-		WARNING("unhandled SUSPEND transition (%s)\n",
+		WARNING("unhandled SUSPEND transition (%s)",
 				usb_otg_state_string(musb->xceiv->otg->state));
 	}
 }
@@ -2047,7 +2037,7 @@ void musb_g_disconnect(struct musb *musb)
 	void __iomem	*mregs = musb->mregs;
 	u8	devctl = musb_readb(mregs, MUSB_DEVCTL);
 
-	dev_dbg(musb->controller, "devctl %02x\n", devctl);
+	musb_dbg(musb, "musb_g_disconnect: devctl %02x", devctl);
 
 	/* clear HR */
 	musb_writeb(mregs, MUSB_DEVCTL, devctl & MUSB_DEVCTL_SESSION);
@@ -2064,7 +2054,7 @@ void musb_g_disconnect(struct musb *musb)
 
 	switch (musb->xceiv->otg->state) {
 	default:
-		dev_dbg(musb->controller, "Unhandled disconnect %s, setting a_idle\n",
+		musb_dbg(musb, "Unhandled disconnect %s, setting a_idle",
 			usb_otg_state_string(musb->xceiv->otg->state));
 		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		MUSB_HST_MODE(musb);
@@ -2094,7 +2084,7 @@ __acquires(musb->lock)
 	u8		devctl = musb_readb(mbase, MUSB_DEVCTL);
 	u8		power;
 
-	dev_dbg(musb->controller, "<== %s driver '%s'\n",
+	musb_dbg(musb, "<== %s driver '%s'",
 			(devctl & MUSB_DEVCTL_BDEVICE)
 				? "B-Device" : "A-Device",
 			musb->gadget_driver
diff --git a/drivers/usb/musb/musb_gadget_ep0.c b/drivers/usb/musb/musb_gadget_ep0.c
index 10d30afe4a3c..844a309fe895 100644
--- a/drivers/usb/musb/musb_gadget_ep0.c
+++ b/drivers/usb/musb/musb_gadget_ep0.c
@@ -206,7 +206,7 @@ static inline void musb_try_b_hnp_enable(struct musb *musb)
 	void __iomem	*mbase = musb->mregs;
 	u8		devctl;
 
-	dev_dbg(musb->controller, "HNP: Setting HR\n");
+	musb_dbg(musb, "HNP: Setting HR");
 	devctl = musb_readb(mbase, MUSB_DEVCTL);
 	musb_writeb(mbase, MUSB_DEVCTL, devctl | MUSB_DEVCTL_HR);
 }
@@ -303,7 +303,7 @@ __acquires(musb->lock)
 				/* Maybe start the first request in the queue */
 				request = next_request(musb_ep);
 				if (!musb_ep->busy && request) {
-					dev_dbg(musb->controller, "restarting the request\n");
+					musb_dbg(musb, "restarting the request");
 					musb_ep_restart(musb, request);
 				}
 
@@ -550,7 +550,7 @@ static void ep0_txstate(struct musb *musb)
 
 	if (!req) {
 		/* WARN_ON(1); */
-		dev_dbg(musb->controller, "odd; csr0 %04x\n", musb_readw(regs, MUSB_CSR0));
+		musb_dbg(musb, "odd; csr0 %04x", musb_readw(regs, MUSB_CSR0));
 		return;
 	}
 
@@ -607,7 +607,7 @@ musb_read_setup(struct musb *musb, struct usb_ctrlrequest *req)
 	/* NOTE:  earlier 2.6 versions changed setup packets to host
 	 * order, but now USB packets always stay in USB byte order.
 	 */
-	dev_dbg(musb->controller, "SETUP req%02x.%02x v%04x i%04x l%d\n",
+	musb_dbg(musb, "SETUP req%02x.%02x v%04x i%04x l%d",
 		req->bRequestType,
 		req->bRequest,
 		le16_to_cpu(req->wValue),
@@ -675,7 +675,7 @@ irqreturn_t musb_g_ep0_irq(struct musb *musb)
 	csr = musb_readw(regs, MUSB_CSR0);
 	len = musb_readb(regs, MUSB_COUNT0);
 
-	dev_dbg(musb->controller, "csr %04x, count %d, ep0stage %s\n",
+	musb_dbg(musb, "csr %04x, count %d, ep0stage %s",
 			csr, len, decode_ep0stage(musb->ep0_state));
 
 	if (csr & MUSB_CSR0_P_DATAEND) {
@@ -752,7 +752,7 @@ irqreturn_t musb_g_ep0_irq(struct musb *musb)
 
 		/* enter test mode if needed (exit by reset) */
 		else if (musb->test_mode) {
-			dev_dbg(musb->controller, "entering TESTMODE\n");
+			musb_dbg(musb, "entering TESTMODE");
 
 			if (MUSB_TEST_PACKET == musb->test_mode_nr)
 				musb_load_testpacket(musb);
@@ -864,7 +864,7 @@ setup:
 				break;
 			}
 
-			dev_dbg(musb->controller, "handled %d, csr %04x, ep0stage %s\n",
+			musb_dbg(musb, "handled %d, csr %04x, ep0stage %s",
 				handled, csr,
 				decode_ep0stage(musb->ep0_state));
 
@@ -881,7 +881,7 @@ setup:
 			if (handled < 0) {
 				musb_ep_select(mbase, 0);
 stall:
-				dev_dbg(musb->controller, "stall (%d)\n", handled);
+				musb_dbg(musb, "stall (%d)", handled);
 				musb->ackpend |= MUSB_CSR0_P_SENDSTALL;
 				musb->ep0_state = MUSB_EP0_STAGE_IDLE;
 finish:
@@ -961,7 +961,7 @@ musb_g_ep0_queue(struct usb_ep *e, struct usb_request *r, gfp_t gfp_flags)
 		status = 0;
 		break;
 	default:
-		dev_dbg(musb->controller, "ep0 request queued in state %d\n",
+		musb_dbg(musb, "ep0 request queued in state %d",
 				musb->ep0_state);
 		status = -EINVAL;
 		goto cleanup;
@@ -970,7 +970,7 @@ musb_g_ep0_queue(struct usb_ep *e, struct usb_request *r, gfp_t gfp_flags)
 	/* add request to the list */
 	list_add_tail(&req->list, &ep->req_list);
 
-	dev_dbg(musb->controller, "queue to %s (%s), length=%d\n",
+	musb_dbg(musb, "queue to %s (%s), length=%d",
 			ep->name, ep->is_in ? "IN/TX" : "OUT/RX",
 			req->request.length);
 
@@ -1063,7 +1063,7 @@ static int musb_g_ep0_halt(struct usb_ep *e, int value)
 		musb->ackpend = 0;
 		break;
 	default:
-		dev_dbg(musb->controller, "ep0 can't halt in state %d\n", musb->ep0_state);
+		musb_dbg(musb, "ep0 can't halt in state %d", musb->ep0_state);
 		status = -EINVAL;
 	}
 
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index d227a71d85e1..53bc4ceefe89 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -44,6 +44,7 @@
 
 #include "musb_core.h"
 #include "musb_host.h"
+#include "musb_trace.h"
 
 /* MUSB HOST status 22-mar-2006
  *
@@ -131,7 +132,7 @@ static void musb_h_tx_flush_fifo(struct musb_hw_ep *ep)
 		 * I found using a usb-ethernet device and running iperf
 		 * (client on AM335x) has very high chance to trigger it.
 		 *
-		 * Better to turn on dev_dbg() in musb_cleanup_urb() with
+		 * Better to turn on musb_dbg() in musb_cleanup_urb() with
 		 * CPPI enabled to see the issue when aborting the tx channel.
 		 */
 		if (dev_WARN_ONCE(musb->controller, retries-- < 1,
@@ -225,8 +226,6 @@ musb_start_urb(struct musb *musb, int is_in, struct musb_qh *qh)
 	void			*buf = urb->transfer_buffer;
 	u32			offset = 0;
 	struct musb_hw_ep	*hw_ep = qh->hw_ep;
-	unsigned		pipe = urb->pipe;
-	u8			address = usb_pipedevice(pipe);
 	int			epnum = hw_ep->epnum;
 
 	/* initialize software qh state */
@@ -254,16 +253,7 @@ musb_start_urb(struct musb *musb, int is_in, struct musb_qh *qh)
 		len = urb->transfer_buffer_length - urb->actual_length;
 	}
 
-	dev_dbg(musb->controller, "qh %p urb %p dev%d ep%d%s%s, hw_ep %d, %p/%d\n",
-			qh, urb, address, qh->epnum,
-			is_in ? "in" : "out",
-			({char *s; switch (qh->type) {
-			case USB_ENDPOINT_XFER_CONTROL:	s = ""; break;
-			case USB_ENDPOINT_XFER_BULK:	s = "-bulk"; break;
-			case USB_ENDPOINT_XFER_ISOC:	s = "-iso"; break;
-			default:			s = "-intr"; break;
-			} s; }),
-			epnum, buf + offset, len);
+	trace_musb_urb_start(musb, urb);
 
 	/* Configure endpoint */
 	musb_ep_set_qh(hw_ep, is_in, qh);
@@ -277,7 +267,7 @@ musb_start_urb(struct musb *musb, int is_in, struct musb_qh *qh)
 	switch (qh->type) {
 	case USB_ENDPOINT_XFER_ISOC:
 	case USB_ENDPOINT_XFER_INT:
-		dev_dbg(musb->controller, "check whether there's still time for periodic Tx\n");
+		musb_dbg(musb, "check whether there's still time for periodic Tx");
 		frame = musb_readw(mbase, MUSB_FRAME);
 		/* FIXME this doesn't implement that scheduling policy ...
 		 * or handle framecounter wrapping
@@ -291,7 +281,7 @@ musb_start_urb(struct musb *musb, int is_in, struct musb_qh *qh)
 		} else {
 			qh->frame = urb->start_frame;
 			/* enable SOF interrupt so we can count down */
-			dev_dbg(musb->controller, "SOF for %d\n", epnum);
+			musb_dbg(musb, "SOF for %d", epnum);
 #if 1 /* ifndef	CONFIG_ARCH_DAVINCI */
 			musb_writeb(mbase, MUSB_INTRUSBE, 0xff);
 #endif
@@ -299,7 +289,7 @@ musb_start_urb(struct musb *musb, int is_in, struct musb_qh *qh)
 		break;
 	default:
 start:
-		dev_dbg(musb->controller, "Start TX%d %s\n", epnum,
+		musb_dbg(musb, "Start TX%d %s", epnum,
 			hw_ep->tx_channel ? "dma" : "pio");
 
 		if (!hw_ep->tx_channel)
@@ -314,14 +304,7 @@ static void musb_giveback(struct musb *musb, struct urb *urb, int status)
 __releases(musb->lock)
 __acquires(musb->lock)
 {
-	dev_dbg(musb->controller,
-			"complete %p %pF (%d), dev%d ep%d%s, %d/%d\n",
-			urb, urb->complete, status,
-			usb_pipedevice(urb->pipe),
-			usb_pipeendpoint(urb->pipe),
-			usb_pipein(urb->pipe) ? "in" : "out",
-			urb->actual_length, urb->transfer_buffer_length
-			);
+	trace_musb_urb_gb(musb, urb);
 
 	usb_hcd_unlink_urb_from_ep(musb->hcd, urb);
 	spin_unlock(&musb->lock);
@@ -441,7 +424,7 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb,
 	 * for RX, until we have a test case to understand the behavior of TX.
 	 */
 	if ((!status || !is_in) && qh && qh->is_ready) {
-		dev_dbg(musb->controller, "... next ep%d %cX urb %p\n",
+		musb_dbg(musb, "... next ep%d %cX urb %p",
 		    hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh));
 		musb_start_urb(musb, is_in, qh);
 	}
@@ -486,7 +469,7 @@ musb_host_packet_rx(struct musb *musb, struct urb *urb, u8 epnum, u8 iso_err)
 
 	/* musb_ep_select(mbase, epnum); */
 	rx_count = musb_readw(epio, MUSB_RXCOUNT);
-	dev_dbg(musb->controller, "RX%d count %d, buffer %p len %d/%d\n", epnum, rx_count,
+	musb_dbg(musb, "RX%d count %d, buffer %p len %d/%d", epnum, rx_count,
 			urb->transfer_buffer, qh->offset,
 			urb->transfer_buffer_length);
 
@@ -508,7 +491,7 @@ musb_host_packet_rx(struct musb *musb, struct urb *urb, u8 epnum, u8 iso_err)
 				status = -EOVERFLOW;
 				urb->error_count++;
 			}
-			dev_dbg(musb->controller, "** OVERFLOW %d into %d\n", rx_count, length);
+			musb_dbg(musb, "OVERFLOW %d into %d", rx_count, length);
 			do_flush = 1;
 		} else
 			length = rx_count;
@@ -526,7 +509,7 @@ musb_host_packet_rx(struct musb *musb, struct urb *urb, u8 epnum, u8 iso_err)
 		if (rx_count > length) {
 			if (urb->status == -EINPROGRESS)
 				urb->status = -EOVERFLOW;
-			dev_dbg(musb->controller, "** OVERFLOW %d into %d\n", rx_count, length);
+			musb_dbg(musb, "OVERFLOW %d into %d", rx_count, length);
 			do_flush = 1;
 		} else
 			length = rx_count;
@@ -750,8 +733,8 @@ static void musb_ep_program(struct musb *musb, u8 epnum,
 	u8			use_dma = 1;
 	u16			csr;
 
-	dev_dbg(musb->controller, "%s hw%d urb %p spd%d dev%d ep%d%s "
-				"h_addr%02x h_port%02x bytes %d\n",
+	musb_dbg(musb, "%s hw%d urb %p spd%d dev%d ep%d%s "
+				"h_addr%02x h_port%02x bytes %d",
 			is_out ? "-->" : "<--",
 			epnum, urb, urb->dev->speed,
 			qh->addr_reg, qh->epnum, is_out ? "out" : "in",
@@ -969,7 +952,7 @@ finish:
 		}
 
 		csr |= MUSB_RXCSR_H_REQPKT;
-		dev_dbg(musb->controller, "RXCSR%d := %04x\n", epnum, csr);
+		musb_dbg(musb, "RXCSR%d := %04x", epnum, csr);
 		musb_writew(hw_ep->regs, MUSB_RXCSR, csr);
 		csr = musb_readw(hw_ep->regs, MUSB_RXCSR);
 	}
@@ -1085,15 +1068,15 @@ static bool musb_h_ep0_continue(struct musb *musb, u16 len, struct urb *urb)
 		request = (struct usb_ctrlrequest *) urb->setup_packet;
 
 		if (!request->wLength) {
-			dev_dbg(musb->controller, "start no-DATA\n");
+			musb_dbg(musb, "start no-DATA");
 			break;
 		} else if (request->bRequestType & USB_DIR_IN) {
-			dev_dbg(musb->controller, "start IN-DATA\n");
+			musb_dbg(musb, "start IN-DATA");
 			musb->ep0_stage = MUSB_EP0_IN;
 			more = true;
 			break;
 		} else {
-			dev_dbg(musb->controller, "start OUT-DATA\n");
+			musb_dbg(musb, "start OUT-DATA");
 			musb->ep0_stage = MUSB_EP0_OUT;
 			more = true;
 		}
@@ -1105,7 +1088,7 @@ static bool musb_h_ep0_continue(struct musb *musb, u16 len, struct urb *urb)
 		if (fifo_count) {
 			fifo_dest = (u8 *) (urb->transfer_buffer
 					+ urb->actual_length);
-			dev_dbg(musb->controller, "Sending %d byte%s to ep0 fifo %p\n",
+			musb_dbg(musb, "Sending %d byte%s to ep0 fifo %p",
 					fifo_count,
 					(fifo_count == 1) ? "" : "s",
 					fifo_dest);
@@ -1150,7 +1133,7 @@ irqreturn_t musb_h_ep0_irq(struct musb *musb)
 			? musb_readb(epio, MUSB_COUNT0)
 			: 0;
 
-	dev_dbg(musb->controller, "<== csr0 %04x, qh %p, count %d, urb %p, stage %d\n",
+	musb_dbg(musb, "<== csr0 %04x, qh %p, count %d, urb %p, stage %d",
 		csr, qh, len, urb, musb->ep0_stage);
 
 	/* if we just did status stage, we are done */
@@ -1161,15 +1144,15 @@ irqreturn_t musb_h_ep0_irq(struct musb *musb)
 
 	/* prepare status */
 	if (csr & MUSB_CSR0_H_RXSTALL) {
-		dev_dbg(musb->controller, "STALLING ENDPOINT\n");
+		musb_dbg(musb, "STALLING ENDPOINT");
 		status = -EPIPE;
 
 	} else if (csr & MUSB_CSR0_H_ERROR) {
-		dev_dbg(musb->controller, "no response, csr0 %04x\n", csr);
+		musb_dbg(musb, "no response, csr0 %04x", csr);
 		status = -EPROTO;
 
 	} else if (csr & MUSB_CSR0_H_NAKTIMEOUT) {
-		dev_dbg(musb->controller, "control NAK timeout\n");
+		musb_dbg(musb, "control NAK timeout");
 
 		/* NOTE:  this code path would be a good place to PAUSE a
 		 * control transfer, if another one is queued, so that
@@ -1184,7 +1167,7 @@ irqreturn_t musb_h_ep0_irq(struct musb *musb)
 	}
 
 	if (status) {
-		dev_dbg(musb->controller, "aborting\n");
+		musb_dbg(musb, "aborting");
 		retval = IRQ_HANDLED;
 		if (urb)
 			urb->status = status;
@@ -1237,7 +1220,7 @@ irqreturn_t musb_h_ep0_irq(struct musb *musb)
 			/* flag status stage */
 			musb->ep0_stage = MUSB_EP0_STATUS;
 
-			dev_dbg(musb->controller, "ep0 STATUS, csr %04x\n", csr);
+			musb_dbg(musb, "ep0 STATUS, csr %04x", csr);
 
 		}
 		musb_writew(epio, MUSB_CSR0, csr);
@@ -1291,38 +1274,37 @@ void musb_host_tx(struct musb *musb, u8 epnum)
 
 	/* with CPPI, DMA sometimes triggers "extra" irqs */
 	if (!urb) {
-		dev_dbg(musb->controller, "extra TX%d ready, csr %04x\n", epnum, tx_csr);
+		musb_dbg(musb, "extra TX%d ready, csr %04x", epnum, tx_csr);
 		return;
 	}
 
 	pipe = urb->pipe;
 	dma = is_dma_capable() ? hw_ep->tx_channel : NULL;
-	dev_dbg(musb->controller, "OUT/TX%d end, csr %04x%s\n", epnum, tx_csr,
+	trace_musb_urb_tx(musb, urb);
+	musb_dbg(musb, "OUT/TX%d end, csr %04x%s", epnum, tx_csr,
 			dma ? ", dma" : "");
 
 	/* check for errors */
 	if (tx_csr & MUSB_TXCSR_H_RXSTALL) {
 		/* dma was disabled, fifo flushed */
-		dev_dbg(musb->controller, "TX end %d stall\n", epnum);
+		musb_dbg(musb, "TX end %d stall", epnum);
 
 		/* stall; record URB status */
 		status = -EPIPE;
 
 	} else if (tx_csr & MUSB_TXCSR_H_ERROR) {
 		/* (NON-ISO) dma was disabled, fifo flushed */
-		dev_dbg(musb->controller, "TX 3strikes on ep=%d\n", epnum);
+		musb_dbg(musb, "TX 3strikes on ep=%d", epnum);
 
 		status = -ETIMEDOUT;
 
 	} else if (tx_csr & MUSB_TXCSR_H_NAKTIMEOUT) {
 		if (USB_ENDPOINT_XFER_BULK == qh->type && qh->mux == 1
 				&& !list_is_singular(&musb->out_bulk)) {
-			dev_dbg(musb->controller,
-				"NAK timeout on TX%d ep\n", epnum);
+			musb_dbg(musb, "NAK timeout on TX%d ep", epnum);
 			musb_bulk_nak_timeout(musb, hw_ep, 0);
 		} else {
-			dev_dbg(musb->controller,
-				"TX end=%d device not responding\n", epnum);
+			musb_dbg(musb, "TX ep%d device not responding", epnum);
 			/* NOTE:  this code path would be a good place to PAUSE a
 			 * transfer, if there's some other (nonperiodic) tx urb
 			 * that could use this fifo.  (dma complicates it...)
@@ -1368,7 +1350,7 @@ done:
 
 	/* second cppi case */
 	if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) {
-		dev_dbg(musb->controller, "extra TX%d ready, csr %04x\n", epnum, tx_csr);
+		musb_dbg(musb, "extra TX%d ready, csr %04x", epnum, tx_csr);
 		return;
 	}
 
@@ -1427,8 +1409,9 @@ done:
 		 * FIFO mode too...
 		 */
 		if (tx_csr & (MUSB_TXCSR_FIFONOTEMPTY | MUSB_TXCSR_TXPKTRDY)) {
-			dev_dbg(musb->controller, "DMA complete but packet still in FIFO, "
-			    "CSR %04x\n", tx_csr);
+			musb_dbg(musb,
+				"DMA complete but FIFO not empty, CSR %04x",
+				tx_csr);
 			return;
 		}
 	}
@@ -1494,7 +1477,7 @@ done:
 			return;
 		}
 	} else	if (tx_csr & MUSB_TXCSR_DMAENAB) {
-		dev_dbg(musb->controller, "not complete, but DMA enabled?\n");
+		musb_dbg(musb, "not complete, but DMA enabled?");
 		return;
 	}
 
@@ -1723,7 +1706,7 @@ static int musb_rx_dma_in_inventra_cppi41(struct dma_controller *dma,
 				d_status = -EOVERFLOW;
 				urb->error_count++;
 			}
-			dev_dbg(musb->controller, "** OVERFLOW %d into %d\n",
+			musb_dbg(musb, "** OVERFLOW %d into %d",
 				rx_count, d->length);
 
 			length = d->length;
@@ -1847,28 +1830,26 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 		 * usbtest #11 (unlinks) triggers it regularly, sometimes
 		 * with fifo full.  (Only with DMA??)
 		 */
-		dev_dbg(musb->controller, "BOGUS RX%d ready, csr %04x, count %d\n", epnum, val,
-			musb_readw(epio, MUSB_RXCOUNT));
+		musb_dbg(musb, "BOGUS RX%d ready, csr %04x, count %d",
+			epnum, val, musb_readw(epio, MUSB_RXCOUNT));
 		musb_h_flush_rxfifo(hw_ep, MUSB_RXCSR_CLRDATATOG);
 		return;
 	}
 
 	pipe = urb->pipe;
 
-	dev_dbg(musb->controller, "<== hw %d rxcsr %04x, urb actual %d (+dma %zu)\n",
-		epnum, rx_csr, urb->actual_length,
-		dma ? dma->actual_len : 0);
+	trace_musb_urb_rx(musb, urb);
 
 	/* check for errors, concurrent stall & unlink is not really
 	 * handled yet! */
 	if (rx_csr & MUSB_RXCSR_H_RXSTALL) {
-		dev_dbg(musb->controller, "RX end %d STALL\n", epnum);
+		musb_dbg(musb, "RX end %d STALL", epnum);
 
 		/* stall; record URB status */
 		status = -EPIPE;
 
 	} else if (rx_csr & MUSB_RXCSR_H_ERROR) {
-		dev_dbg(musb->controller, "end %d RX proto error\n", epnum);
+		musb_dbg(musb, "end %d RX proto error", epnum);
 
 		status = -EPROTO;
 		musb_writeb(epio, MUSB_RXINTERVAL, 0);
@@ -1879,7 +1860,7 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 	} else if (rx_csr & MUSB_RXCSR_DATAERROR) {
 
 		if (USB_ENDPOINT_XFER_ISOC != qh->type) {
-			dev_dbg(musb->controller, "RX end %d NAK timeout\n", epnum);
+			musb_dbg(musb, "RX end %d NAK timeout", epnum);
 
 			/* NOTE: NAKing is *NOT* an error, so we want to
 			 * continue.  Except ... if there's a request for
@@ -1902,12 +1883,12 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 
 			goto finish;
 		} else {
-			dev_dbg(musb->controller, "RX end %d ISO data error\n", epnum);
+			musb_dbg(musb, "RX end %d ISO data error", epnum);
 			/* packet error reported later */
 			iso_err = true;
 		}
 	} else if (rx_csr & MUSB_RXCSR_INCOMPRX) {
-		dev_dbg(musb->controller, "end %d high bandwidth incomplete ISO packet RX\n",
+		musb_dbg(musb, "end %d high bandwidth incomplete ISO packet RX",
 				epnum);
 		status = -EPROTO;
 	}
@@ -1952,7 +1933,7 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 			done = true;
 		}
 
-		dev_dbg(musb->controller, "RXCSR%d %04x, reqpkt, len %zu%s\n", epnum, rx_csr,
+		musb_dbg(musb, "RXCSR%d %04x, reqpkt, len %zu%s", epnum, rx_csr,
 				xfer_len, dma ? ", dma" : "");
 		rx_csr &= ~MUSB_RXCSR_H_REQPKT;
 
@@ -1973,8 +1954,8 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 		if (musb_dma_inventra(musb) || musb_dma_ux500(musb) ||
 		    musb_dma_cppi41(musb)) {
 			    done = musb_rx_dma_inventra_cppi41(c, hw_ep, qh, urb, xfer_len);
-			    dev_dbg(hw_ep->musb->controller,
-				    "ep %d dma %s, rxcsr %04x, rxcount %d\n",
+			    musb_dbg(hw_ep->musb,
+				    "ep %d dma %s, rxcsr %04x, rxcount %d",
 				    epnum, done ? "off" : "reset",
 				    musb_readw(epio, MUSB_RXCSR),
 				    musb_readw(epio, MUSB_RXCOUNT));
@@ -2001,8 +1982,8 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 		/* we are expecting IN packets */
 		if ((musb_dma_inventra(musb) || musb_dma_ux500(musb) ||
 		    musb_dma_cppi41(musb)) && dma) {
-			dev_dbg(hw_ep->musb->controller,
-				"RX%d count %d, buffer 0x%llx len %d/%d\n",
+			musb_dbg(hw_ep->musb,
+				"RX%d count %d, buffer 0x%llx len %d/%d",
 				epnum, musb_readw(epio, MUSB_RXCOUNT),
 				(unsigned long long) urb->transfer_dma
 				+ urb->actual_length,
@@ -2054,7 +2035,7 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 				done = musb_host_packet_rx(musb, urb,
 						epnum, iso_err);
 			}
-			dev_dbg(musb->controller, "read %spacket\n", done ? "last " : "");
+			musb_dbg(musb, "read %spacket", done ? "last " : "");
 		}
 	}
 
@@ -2178,7 +2159,7 @@ static int musb_schedule(
 	idle = 1;
 	qh->mux = 0;
 	hw_ep = musb->endpoints + best_end;
-	dev_dbg(musb->controller, "qh %p periodic slot %d\n", qh, best_end);
+	musb_dbg(musb, "qh %p periodic slot %d", qh, best_end);
 success:
 	if (head) {
 		idle = list_empty(head);
@@ -2210,6 +2191,8 @@ static int musb_urb_enqueue(
 	if (!is_host_active(musb) || !musb->is_active)
 		return -ENODEV;
 
+	trace_musb_urb_enq(musb, urb);
+
 	spin_lock_irqsave(&musb->lock, flags);
 	ret = usb_hcd_link_urb_to_ep(hcd, urb);
 	qh = ret ? NULL : hep->hcpriv;
@@ -2400,8 +2383,7 @@ static int musb_cleanup_urb(struct urb *urb, struct musb_qh *qh)
 		dma = is_in ? ep->rx_channel : ep->tx_channel;
 		if (dma) {
 			status = ep->musb->dma_controller->channel_abort(dma);
-			dev_dbg(musb->controller,
-				"abort %cX%d DMA for urb %p --> %d\n",
+			musb_dbg(musb, "abort %cX%d DMA for urb %p --> %d",
 				is_in ? 'R' : 'T', ep->epnum,
 				urb, status);
 			urb->actual_length += dma->actual_len;
@@ -2447,10 +2429,7 @@ static int musb_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 	int			is_in  = usb_pipein(urb->pipe);
 	int			ret;
 
-	dev_dbg(musb->controller, "urb=%p, dev%d ep%d%s\n", urb,
-			usb_pipedevice(urb->pipe),
-			usb_pipeendpoint(urb->pipe),
-			is_in ? "in" : "out");
+	trace_musb_urb_deq(musb, urb);
 
 	spin_lock_irqsave(&musb->lock, flags);
 	ret = usb_hcd_check_unlink_urb(hcd, urb, status);
diff --git a/drivers/usb/musb/musb_trace.c b/drivers/usb/musb/musb_trace.c
new file mode 100644
index 000000000000..70973d901a21
--- /dev/null
+++ b/drivers/usb/musb/musb_trace.c
@@ -0,0 +1,33 @@
+/*
+ * musb_trace.c - MUSB Controller Trace Support
+ *
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Author: Bin Liu <b-liu@ti.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define CREATE_TRACE_POINTS
+#include "musb_trace.h"
+
+void musb_dbg(struct musb *musb, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	trace_musb_log(musb, &vaf);
+
+	va_end(args);
+}
diff --git a/drivers/usb/musb/musb_trace.h b/drivers/usb/musb/musb_trace.h
new file mode 100644
index 000000000000..f031c9e74322
--- /dev/null
+++ b/drivers/usb/musb/musb_trace.h
@@ -0,0 +1,371 @@
+/*
+ * musb_trace.h - MUSB Controller Trace Support
+ *
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Author: Bin Liu <b-liu@ti.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM musb
+
+#if !defined(__MUSB_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __MUSB_TRACE_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/usb.h>
+#include "musb_core.h"
+#ifdef CONFIG_USB_TI_CPPI41_DMA
+#include "cppi_dma.h"
+#endif
+
+#define MUSB_MSG_MAX   500
+
+TRACE_EVENT(musb_log,
+	TP_PROTO(struct musb *musb, struct va_format *vaf),
+	TP_ARGS(musb, vaf),
+	TP_STRUCT__entry(
+		__string(name, dev_name(musb->controller))
+		__dynamic_array(char, msg, MUSB_MSG_MAX)
+	),
+	TP_fast_assign(
+		__assign_str(name, dev_name(musb->controller));
+		vsnprintf(__get_str(msg), MUSB_MSG_MAX, vaf->fmt, *vaf->va);
+	),
+	TP_printk("%s: %s", __get_str(name), __get_str(msg))
+);
+
+DECLARE_EVENT_CLASS(musb_regb,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u8 data),
+	TP_ARGS(caller, addr, offset, data),
+	TP_STRUCT__entry(
+		__field(void *, caller)
+		__field(const void *, addr)
+		__field(unsigned int, offset)
+		__field(u8, data)
+	),
+	TP_fast_assign(
+		__entry->caller = caller;
+		__entry->addr = addr;
+		__entry->offset = offset;
+		__entry->data = data;
+	),
+	TP_printk("%pS: %p + %04x: %02x",
+		__entry->caller, __entry->addr, __entry->offset, __entry->data)
+);
+
+DEFINE_EVENT(musb_regb, musb_readb,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u8 data),
+	TP_ARGS(caller, addr, offset, data)
+);
+
+DEFINE_EVENT(musb_regb, musb_writeb,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u8 data),
+	TP_ARGS(caller, addr, offset, data)
+);
+
+DECLARE_EVENT_CLASS(musb_regw,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u16 data),
+	TP_ARGS(caller, addr, offset, data),
+	TP_STRUCT__entry(
+		__field(void *, caller)
+		__field(const void *, addr)
+		__field(unsigned int, offset)
+		__field(u16, data)
+	),
+	TP_fast_assign(
+		__entry->caller = caller;
+		__entry->addr = addr;
+		__entry->offset = offset;
+		__entry->data = data;
+	),
+	TP_printk("%pS: %p + %04x: %04x",
+		__entry->caller, __entry->addr, __entry->offset, __entry->data)
+);
+
+DEFINE_EVENT(musb_regw, musb_readw,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u16 data),
+	TP_ARGS(caller, addr, offset, data)
+);
+
+DEFINE_EVENT(musb_regw, musb_writew,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u16 data),
+	TP_ARGS(caller, addr, offset, data)
+);
+
+DECLARE_EVENT_CLASS(musb_regl,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u32 data),
+	TP_ARGS(caller, addr, offset, data),
+	TP_STRUCT__entry(
+		__field(void *, caller)
+		__field(const void *, addr)
+		__field(unsigned int, offset)
+		__field(u32, data)
+	),
+	TP_fast_assign(
+		__entry->caller = caller;
+		__entry->addr = addr;
+		__entry->offset = offset;
+		__entry->data = data;
+	),
+	TP_printk("%pS: %p + %04x: %08x",
+		__entry->caller, __entry->addr, __entry->offset, __entry->data)
+);
+
+DEFINE_EVENT(musb_regl, musb_readl,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u32 data),
+	TP_ARGS(caller, addr, offset, data)
+);
+
+DEFINE_EVENT(musb_regl, musb_writel,
+	TP_PROTO(void *caller, const void *addr, unsigned int offset, u32 data),
+	TP_ARGS(caller, addr, offset, data)
+);
+
+TRACE_EVENT(musb_isr,
+	TP_PROTO(struct musb *musb),
+	TP_ARGS(musb),
+	TP_STRUCT__entry(
+		__string(name, dev_name(musb->controller))
+		__field(u8, int_usb)
+		__field(u16, int_tx)
+		__field(u16, int_rx)
+	),
+	TP_fast_assign(
+		__assign_str(name, dev_name(musb->controller));
+		__entry->int_usb = musb->int_usb;
+		__entry->int_tx = musb->int_tx;
+		__entry->int_rx = musb->int_rx;
+	),
+	TP_printk("%s: usb %02x, tx %04x, rx %04x",
+		__get_str(name), __entry->int_usb,
+		__entry->int_tx, __entry->int_rx
+	)
+);
+
+DECLARE_EVENT_CLASS(musb_urb,
+	TP_PROTO(struct musb *musb, struct urb *urb),
+	TP_ARGS(musb, urb),
+	TP_STRUCT__entry(
+		__string(name, dev_name(musb->controller))
+		__field(struct urb *, urb)
+		__field(unsigned int, pipe)
+		__field(int, status)
+		__field(unsigned int, flag)
+		__field(u32, buf_len)
+		__field(u32, actual_len)
+	),
+	TP_fast_assign(
+		__assign_str(name, dev_name(musb->controller));
+		__entry->urb = urb;
+		__entry->pipe = urb->pipe;
+		__entry->status = urb->status;
+		__entry->flag = urb->transfer_flags;
+		__entry->buf_len = urb->transfer_buffer_length;
+		__entry->actual_len = urb->actual_length;
+	),
+	TP_printk("%s: %p, dev%d ep%d%s, flag 0x%x, len %d/%d, status %d",
+			__get_str(name), __entry->urb,
+			usb_pipedevice(__entry->pipe),
+			usb_pipeendpoint(__entry->pipe),
+			usb_pipein(__entry->pipe) ? "in" : "out",
+			__entry->flag,
+			__entry->actual_len, __entry->buf_len,
+			__entry->status
+	)
+);
+
+DEFINE_EVENT(musb_urb, musb_urb_start,
+	TP_PROTO(struct musb *musb, struct urb *urb),
+	TP_ARGS(musb, urb)
+);
+
+DEFINE_EVENT(musb_urb, musb_urb_gb,
+	TP_PROTO(struct musb *musb, struct urb *urb),
+	TP_ARGS(musb, urb)
+);
+
+DEFINE_EVENT(musb_urb, musb_urb_rx,
+	TP_PROTO(struct musb *musb, struct urb *urb),
+	TP_ARGS(musb, urb)
+);
+
+DEFINE_EVENT(musb_urb, musb_urb_tx,
+	TP_PROTO(struct musb *musb, struct urb *urb),
+	TP_ARGS(musb, urb)
+);
+
+DEFINE_EVENT(musb_urb, musb_urb_enq,
+	TP_PROTO(struct musb *musb, struct urb *urb),
+	TP_ARGS(musb, urb)
+);
+
+DEFINE_EVENT(musb_urb, musb_urb_deq,
+	TP_PROTO(struct musb *musb, struct urb *urb),
+	TP_ARGS(musb, urb)
+);
+
+DECLARE_EVENT_CLASS(musb_req,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req),
+	TP_STRUCT__entry(
+		__field(struct usb_request *, req)
+		__field(u8, is_tx)
+		__field(u8, epnum)
+		__field(int, status)
+		__field(unsigned int, buf_len)
+		__field(unsigned int, actual_len)
+		__field(unsigned int, zero)
+		__field(unsigned int, short_not_ok)
+		__field(unsigned int, no_interrupt)
+	),
+	TP_fast_assign(
+		__entry->req = &req->request;
+		__entry->is_tx = req->tx;
+		__entry->epnum = req->epnum;
+		__entry->status = req->request.status;
+		__entry->buf_len = req->request.length;
+		__entry->actual_len = req->request.actual;
+		__entry->zero = req->request.zero;
+		__entry->short_not_ok = req->request.short_not_ok;
+		__entry->no_interrupt = req->request.no_interrupt;
+	),
+	TP_printk("%p, ep%d %s, %s%s%s, len %d/%d, status %d",
+			__entry->req, __entry->epnum,
+			__entry->is_tx ? "tx/IN" : "rx/OUT",
+			__entry->zero ? "Z" : "z",
+			__entry->short_not_ok ? "S" : "s",
+			__entry->no_interrupt ? "I" : "i",
+			__entry->actual_len, __entry->buf_len,
+			__entry->status
+	)
+);
+
+DEFINE_EVENT(musb_req, musb_req_gb,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req)
+);
+
+DEFINE_EVENT(musb_req, musb_req_tx,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req)
+);
+
+DEFINE_EVENT(musb_req, musb_req_rx,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req)
+);
+
+DEFINE_EVENT(musb_req, musb_req_alloc,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req)
+);
+
+DEFINE_EVENT(musb_req, musb_req_free,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req)
+);
+
+DEFINE_EVENT(musb_req, musb_req_start,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req)
+);
+
+DEFINE_EVENT(musb_req, musb_req_enq,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req)
+);
+
+DEFINE_EVENT(musb_req, musb_req_deq,
+	TP_PROTO(struct musb_request *req),
+	TP_ARGS(req)
+);
+
+#ifdef CONFIG_USB_TI_CPPI41_DMA
+DECLARE_EVENT_CLASS(musb_cppi41,
+	TP_PROTO(struct cppi41_dma_channel *ch),
+	TP_ARGS(ch),
+	TP_STRUCT__entry(
+		__field(struct cppi41_dma_channel *, ch)
+		__string(name, dev_name(ch->hw_ep->musb->controller))
+		__field(u8, hwep)
+		__field(u8, port)
+		__field(u8, is_tx)
+		__field(u32, len)
+		__field(u32, prog_len)
+		__field(u32, xferred)
+	),
+	TP_fast_assign(
+		__entry->ch = ch;
+		__assign_str(name, dev_name(ch->hw_ep->musb->controller));
+		__entry->hwep = ch->hw_ep->epnum;
+		__entry->port = ch->port_num;
+		__entry->is_tx = ch->is_tx;
+		__entry->len = ch->total_len;
+		__entry->prog_len = ch->prog_len;
+		__entry->xferred = ch->transferred;
+	),
+	TP_printk("%s: %p, hwep%d ch%d%s, prog_len %d, len %d/%d",
+			__get_str(name), __entry->ch, __entry->hwep,
+			__entry->port, __entry->is_tx ? "tx" : "rx",
+			__entry->prog_len, __entry->xferred, __entry->len
+	)
+);
+
+DEFINE_EVENT(musb_cppi41, musb_cppi41_done,
+	TP_PROTO(struct cppi41_dma_channel *ch),
+	TP_ARGS(ch)
+);
+
+DEFINE_EVENT(musb_cppi41, musb_cppi41_gb,
+	TP_PROTO(struct cppi41_dma_channel *ch),
+	TP_ARGS(ch)
+);
+
+DEFINE_EVENT(musb_cppi41, musb_cppi41_config,
+	TP_PROTO(struct cppi41_dma_channel *ch),
+	TP_ARGS(ch)
+);
+
+DEFINE_EVENT(musb_cppi41, musb_cppi41_cont,
+	TP_PROTO(struct cppi41_dma_channel *ch),
+	TP_ARGS(ch)
+);
+
+DEFINE_EVENT(musb_cppi41, musb_cppi41_alloc,
+	TP_PROTO(struct cppi41_dma_channel *ch),
+	TP_ARGS(ch)
+);
+
+DEFINE_EVENT(musb_cppi41, musb_cppi41_abort,
+	TP_PROTO(struct cppi41_dma_channel *ch),
+	TP_ARGS(ch)
+);
+
+DEFINE_EVENT(musb_cppi41, musb_cppi41_free,
+	TP_PROTO(struct cppi41_dma_channel *ch),
+	TP_ARGS(ch)
+);
+#endif /* CONFIG_USB_TI_CPPI41_DMA */
+
+#endif /* __MUSB_TRACE_H */
+
+/* this part has to be here */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE musb_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index 92d5f718659b..192248f974ec 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -55,8 +55,7 @@ void musb_host_finish_resume(struct work_struct *work)
 
 	power = musb_readb(musb->mregs, MUSB_POWER);
 	power &= ~MUSB_POWER_RESUME;
-	dev_dbg(musb->controller, "root port resume stopped, power %02x\n",
-		power);
+	musb_dbg(musb, "root port resume stopped, power %02x", power);
 	musb_writeb(musb->mregs, MUSB_POWER, power);
 
 	/*
@@ -104,7 +103,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
 				break;
 		}
 
-		dev_dbg(musb->controller, "Root port suspended, power %02x\n", power);
+		musb_dbg(musb, "Root port suspended, power %02x", power);
 
 		musb->port1_status |= USB_PORT_STAT_SUSPEND;
 		switch (musb->xceiv->otg->state) {
@@ -123,7 +122,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
 			musb_platform_try_idle(musb, 0);
 			break;
 		default:
-			dev_dbg(musb->controller, "bogus rh suspend? %s\n",
+			musb_dbg(musb, "bogus rh suspend? %s",
 				usb_otg_state_string(musb->xceiv->otg->state));
 		}
 	} else if (power & MUSB_POWER_SUSPENDM) {
@@ -131,7 +130,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
 		power |= MUSB_POWER_RESUME;
 		musb_writeb(mbase, MUSB_POWER, power);
 
-		dev_dbg(musb->controller, "Root port resuming, power %02x\n", power);
+		musb_dbg(musb, "Root port resuming, power %02x", power);
 
 		/* later, GetPortStatus will stop RESUME signaling */
 		musb->port1_status |= MUSB_PORT_STAT_RESUME;
@@ -146,7 +145,7 @@ void musb_port_reset(struct musb *musb, bool do_reset)
 	void __iomem	*mbase = musb->mregs;
 
 	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE) {
-		dev_dbg(musb->controller, "HNP: Returning from HNP; no hub reset from b_idle\n");
+		musb_dbg(musb, "HNP: Returning from HNP; no hub reset from b_idle");
 		musb->port1_status &= ~USB_PORT_STAT_RESET;
 		return;
 	}
@@ -194,7 +193,7 @@ void musb_port_reset(struct musb *musb, bool do_reset)
 		schedule_delayed_work(&musb->deassert_reset_work,
 				      msecs_to_jiffies(50));
 	} else {
-		dev_dbg(musb->controller, "root port reset stopped\n");
+		musb_dbg(musb, "root port reset stopped");
 		musb_platform_pre_root_reset_end(musb);
 		musb_writeb(mbase, MUSB_POWER,
 				power & ~MUSB_POWER_RESET);
@@ -202,7 +201,7 @@ void musb_port_reset(struct musb *musb, bool do_reset)
 
 		power = musb_readb(mbase, MUSB_POWER);
 		if (power & MUSB_POWER_HSMODE) {
-			dev_dbg(musb->controller, "high-speed device connected\n");
+			musb_dbg(musb, "high-speed device connected");
 			musb->port1_status |= USB_PORT_STAT_HIGH_SPEED;
 		}
 
@@ -242,7 +241,7 @@ void musb_root_disconnect(struct musb *musb)
 		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		break;
 	default:
-		dev_dbg(musb->controller, "host disconnect (%s)\n",
+		musb_dbg(musb, "host disconnect (%s)",
 			usb_otg_state_string(musb->xceiv->otg->state));
 	}
 }
@@ -337,7 +336,7 @@ int musb_hub_control(
 		default:
 			goto error;
 		}
-		dev_dbg(musb->controller, "clear feature %d\n", wValue);
+		musb_dbg(musb, "clear feature %d", wValue);
 		musb->port1_status &= ~(1 << wValue);
 		break;
 	case GetHubDescriptor:
@@ -372,8 +371,7 @@ int musb_hub_control(
 				(__le32 *) buf);
 
 		/* port change status is more interesting */
-		dev_dbg(musb->controller, "port status %08x\n",
-				musb->port1_status);
+		musb_dbg(musb, "port status %08x", musb->port1_status);
 		break;
 	case SetPortFeature:
 		if ((wIndex & 0xff) != 1)
@@ -443,7 +441,7 @@ int musb_hub_control(
 		default:
 			goto error;
 		}
-		dev_dbg(musb->controller, "set feature %d\n", wValue);
+		musb_dbg(musb, "set feature %d", wValue);
 		musb->port1_status |= 1 << wValue;
 		break;
 
diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c
index 8abfe4ec62fb..3620073da58c 100644
--- a/drivers/usb/musb/musbhsdma.c
+++ b/drivers/usb/musb/musbhsdma.c
@@ -117,7 +117,7 @@ static void configure_channel(struct dma_channel *channel,
 	u8 bchannel = musb_channel->idx;
 	u16 csr = 0;
 
-	dev_dbg(musb->controller, "%p, pkt_sz %d, addr %pad, len %d, mode %d\n",
+	musb_dbg(musb, "%p, pkt_sz %d, addr %pad, len %d, mode %d",
 			channel, packet_sz, &dma_addr, len, mode);
 
 	if (mode) {
@@ -152,7 +152,7 @@ static int dma_channel_program(struct dma_channel *channel,
 	struct musb_dma_controller *controller = musb_channel->controller;
 	struct musb *musb = controller->private_data;
 
-	dev_dbg(musb->controller, "ep%d-%s pkt_sz %d, dma_addr %pad length %d, mode %d\n",
+	musb_dbg(musb, "ep%d-%s pkt_sz %d, dma_addr %pad length %d, mode %d",
 		musb_channel->epnum,
 		musb_channel->transmit ? "Tx" : "Rx",
 		packet_sz, &dma_addr, len, mode);
@@ -266,7 +266,7 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
 #endif
 
 	if (!int_hsdma) {
-		dev_dbg(musb->controller, "spurious DMA irq\n");
+		musb_dbg(musb, "spurious DMA irq");
 
 		for (bchannel = 0; bchannel < MUSB_HSDMA_CHANNELS; bchannel++) {
 			musb_channel = (struct musb_dma_channel *)
@@ -280,7 +280,7 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
 			}
 		}
 
-		dev_dbg(musb->controller, "int_hsdma = 0x%x\n", int_hsdma);
+		musb_dbg(musb, "int_hsdma = 0x%x", int_hsdma);
 
 		if (!int_hsdma)
 			goto done;
@@ -307,7 +307,7 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
 				channel->actual_len = addr
 					- musb_channel->start_addr;
 
-				dev_dbg(musb->controller, "ch %p, 0x%x -> 0x%x (%zu / %d) %s\n",
+				musb_dbg(musb, "ch %p, 0x%x -> 0x%x (%zu / %d) %s",
 					channel, musb_channel->start_addr,
 					addr, channel->actual_len,
 					musb_channel->len,
diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c
index 76500515dd8b..c6ee16660572 100644
--- a/drivers/usb/musb/sunxi.c
+++ b/drivers/usb/musb/sunxi.c
@@ -256,12 +256,10 @@ static int sunxi_musb_init(struct musb *musb)
 	writeb(SUNXI_MUSB_VEND0_PIO_MODE, musb->mregs + SUNXI_MUSB_VEND0);
 
 	/* Register notifier before calling phy_init() */
-	if (musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE) {
-		ret = extcon_register_notifier(glue->extcon, EXTCON_USB_HOST,
-					       &glue->host_nb);
-		if (ret)
-			goto error_reset_assert;
-	}
+	ret = extcon_register_notifier(glue->extcon, EXTCON_USB_HOST,
+				       &glue->host_nb);
+	if (ret)
+		goto error_reset_assert;
 
 	ret = phy_init(glue->phy);
 	if (ret)
@@ -275,9 +273,8 @@ static int sunxi_musb_init(struct musb *musb)
 	return 0;
 
 error_unregister_notifier:
-	if (musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
-		extcon_unregister_notifier(glue->extcon, EXTCON_USB_HOST,
-					   &glue->host_nb);
+	extcon_unregister_notifier(glue->extcon, EXTCON_USB_HOST,
+				   &glue->host_nb);
 error_reset_assert:
 	if (test_bit(SUNXI_MUSB_FL_HAS_RESET, &glue->flags))
 		reset_control_assert(glue->rst);
@@ -301,9 +298,8 @@ static int sunxi_musb_exit(struct musb *musb)
 
 	phy_exit(glue->phy);
 
-	if (musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
-		extcon_unregister_notifier(glue->extcon, EXTCON_USB_HOST,
-					   &glue->host_nb);
+	extcon_unregister_notifier(glue->extcon, EXTCON_USB_HOST,
+				   &glue->host_nb);
 
 	if (test_bit(SUNXI_MUSB_FL_HAS_RESET, &glue->flags))
 		reset_control_assert(glue->rst);
@@ -315,25 +311,6 @@ static int sunxi_musb_exit(struct musb *musb)
 	return 0;
 }
 
-static int sunxi_set_mode(struct musb *musb, u8 mode)
-{
-	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
-	int ret;
-
-	if (mode == MUSB_HOST) {
-		ret = phy_power_on(glue->phy);
-		if (ret)
-			return ret;
-
-		set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags);
-		/* Stop musb work from turning vbus off again */
-		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
-	}
-
-	return 0;
-}
-
 static void sunxi_musb_enable(struct musb *musb)
 {
 	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
@@ -354,13 +331,13 @@ static void sunxi_musb_disable(struct musb *musb)
 	clear_bit(SUNXI_MUSB_FL_ENABLED, &glue->flags);
 }
 
-struct dma_controller *sunxi_musb_dma_controller_create(struct musb *musb,
-						    void __iomem *base)
+static struct dma_controller *
+sunxi_musb_dma_controller_create(struct musb *musb, void __iomem *base)
 {
 	return NULL;
 }
 
-void sunxi_musb_dma_controller_destroy(struct dma_controller *c)
+static void sunxi_musb_dma_controller_destroy(struct dma_controller *c)
 {
 }
 
@@ -582,7 +559,6 @@ static const struct musb_platform_ops sunxi_musb_ops = {
 	.exit		= sunxi_musb_exit,
 	.enable		= sunxi_musb_enable,
 	.disable	= sunxi_musb_disable,
-	.set_mode	= sunxi_set_mode,
 	.fifo_offset	= sunxi_musb_fifo_offset,
 	.ep_offset	= sunxi_musb_ep_offset,
 	.busctl_offset	= sunxi_musb_busctl_offset,
@@ -638,10 +614,6 @@ static int sunxi_musb_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
-	if (!glue)
-		return -ENOMEM;
-
 	memset(&pdata, 0, sizeof(pdata));
 	switch (usb_get_dr_mode(&pdev->dev)) {
 #if defined CONFIG_USB_MUSB_DUAL_ROLE || defined CONFIG_USB_MUSB_HOST
@@ -649,15 +621,13 @@ static int sunxi_musb_probe(struct platform_device *pdev)
 		pdata.mode = MUSB_PORT_MODE_HOST;
 		break;
 #endif
+#if defined CONFIG_USB_MUSB_DUAL_ROLE || defined CONFIG_USB_MUSB_GADGET
+	case USB_DR_MODE_PERIPHERAL:
+		pdata.mode = MUSB_PORT_MODE_GADGET;
+		break;
+#endif
 #ifdef CONFIG_USB_MUSB_DUAL_ROLE
 	case USB_DR_MODE_OTG:
-		glue->extcon = extcon_get_edev_by_phandle(&pdev->dev, 0);
-		if (IS_ERR(glue->extcon)) {
-			if (PTR_ERR(glue->extcon) == -EPROBE_DEFER)
-				return -EPROBE_DEFER;
-			dev_err(&pdev->dev, "Invalid or missing extcon\n");
-			return PTR_ERR(glue->extcon);
-		}
 		pdata.mode = MUSB_PORT_MODE_DUAL_ROLE;
 		break;
 #endif
@@ -668,6 +638,10 @@ static int sunxi_musb_probe(struct platform_device *pdev)
 	pdata.platform_ops	= &sunxi_musb_ops;
 	pdata.config		= &sunxi_musb_hdrc_config;
 
+	glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
+	if (!glue)
+		return -ENOMEM;
+
 	glue->dev = &pdev->dev;
 	INIT_WORK(&glue->work, sunxi_musb_work);
 	glue->host_nb.notifier_call = sunxi_musb_host_notifier;
@@ -701,6 +675,14 @@ static int sunxi_musb_probe(struct platform_device *pdev)
 		}
 	}
 
+	glue->extcon = extcon_get_edev_by_phandle(&pdev->dev, 0);
+	if (IS_ERR(glue->extcon)) {
+		if (PTR_ERR(glue->extcon) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_err(&pdev->dev, "Invalid or missing extcon\n");
+		return PTR_ERR(glue->extcon);
+	}
+
 	glue->phy = devm_phy_get(&pdev->dev, "usb");
 	if (IS_ERR(glue->phy)) {
 		if (PTR_ERR(glue->phy) == -EPROBE_DEFER)
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index c6904742e2aa..b9c409a18faa 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -21,6 +21,7 @@ config AB8500_USB
 config FSL_USB2_OTG
 	bool "Freescale USB OTG Transceiver Driver"
 	depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	select USB_PHY
 	help
 	  Enable this to support Freescale USB OTG transceiver.
@@ -29,6 +30,7 @@ config ISP1301_OMAP
 	tristate "Philips ISP1301 with OMAP OTG"
 	depends on I2C && ARCH_OMAP_OTG
 	depends on USB
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	select USB_PHY
 	help
 	  If you say yes here you get support for the Philips ISP1301
@@ -43,7 +45,7 @@ config ISP1301_OMAP
 config KEYSTONE_USB_PHY
 	tristate "Keystone USB PHY Driver"
 	depends on ARCH_KEYSTONE || COMPILE_TEST
-	select NOP_USB_XCEIV
+	depends on NOP_USB_XCEIV
 	help
 	  Enable this to support Keystone USB phy. This driver provides
 	  interface to interact with USB 2.0 and USB 3.0 PHY that is part
@@ -51,6 +53,7 @@ config KEYSTONE_USB_PHY
 
 config NOP_USB_XCEIV
 	tristate "NOP USB Transceiver Driver"
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, NOP can't be built-in
 	select USB_PHY
 	help
 	  This driver is to be used by all the usb transceiver which are either
@@ -63,9 +66,9 @@ config AM335X_CONTROL_USB
 config AM335X_PHY_USB
 	tristate "AM335x USB PHY Driver"
 	depends on ARM || COMPILE_TEST
+	depends on NOP_USB_XCEIV
 	select USB_PHY
 	select AM335X_CONTROL_USB
-	select NOP_USB_XCEIV
 	select USB_COMMON
 	help
 	  This driver provides PHY support for that phy which part for the
@@ -92,6 +95,7 @@ config TWL6030_USB
 config USB_GPIO_VBUS
 	tristate "GPIO based peripheral-only VBUS sensing 'transceiver'"
 	depends on GPIOLIB || COMPILE_TEST
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	select USB_PHY
 	help
 	  Provides simple GPIO VBUS sensing for controllers with an
@@ -112,6 +116,7 @@ config OMAP_OTG
 config TAHVO_USB
 	tristate "Tahvo USB transceiver driver"
 	depends on MFD_RETU && EXTCON
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	select USB_PHY
 	help
 	  Enable this to support USB transceiver on Tahvo. This is used
@@ -140,6 +145,7 @@ config USB_ISP1301
 config USB_MSM_OTG
 	tristate "Qualcomm on-chip USB OTG controller support"
 	depends on (USB || USB_GADGET) && (ARCH_QCOM || COMPILE_TEST)
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	depends on RESET_CONTROLLER
 	depends on EXTCON
 	select USB_PHY
@@ -169,6 +175,7 @@ config USB_QCOM_8X16_PHY
 config USB_MV_OTG
 	tristate "Marvell USB OTG support"
 	depends on USB_EHCI_MV && USB_MV_UDC && PM && USB_OTG
+	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	select USB_PHY
 	help
 	  Say Y here if you want to build Marvell USB OTG transciever
diff --git a/drivers/usb/phy/phy-am335x.c b/drivers/usb/phy/phy-am335x.c
index a262a4343f29..7e5aece769da 100644
--- a/drivers/usb/phy/phy-am335x.c
+++ b/drivers/usb/phy/phy-am335x.c
@@ -54,7 +54,7 @@ static int am335x_phy_probe(struct platform_device *pdev)
 		return am_phy->id;
 	}
 
-	am_phy->dr_mode = of_usb_get_dr_mode_by_phy(pdev->dev.of_node);
+	am_phy->dr_mode = of_usb_get_dr_mode_by_phy(pdev->dev.of_node, -1);
 
 	ret = usb_phy_gen_create_phy(dev, &am_phy->usb_phy_gen, NULL);
 	if (ret)
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 72b387d592c2..8a34759727bb 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -18,6 +18,7 @@
 
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/extcon.h>
 #include <linux/gpio/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
@@ -35,6 +36,8 @@
 #include <linux/of_device.h>
 #include <linux/reboot.h>
 #include <linux/reset.h>
+#include <linux/types.h>
+#include <linux/usb/otg.h>
 
 #include <linux/usb.h>
 #include <linux/usb/otg.h>
@@ -42,10 +45,183 @@
 #include <linux/usb/ulpi.h>
 #include <linux/usb/gadget.h>
 #include <linux/usb/hcd.h>
-#include <linux/usb/msm_hsusb.h>
 #include <linux/usb/msm_hsusb_hw.h>
 #include <linux/regulator/consumer.h>
 
+/**
+ * OTG control
+ *
+ * OTG_NO_CONTROL	Id/VBUS notifications not required. Useful in host
+ *                      only configuration.
+ * OTG_PHY_CONTROL	Id/VBUS notifications comes form USB PHY.
+ * OTG_PMIC_CONTROL	Id/VBUS notifications comes from PMIC hardware.
+ * OTG_USER_CONTROL	Id/VBUS notifcations comes from User via sysfs.
+ *
+ */
+enum otg_control_type {
+	OTG_NO_CONTROL = 0,
+	OTG_PHY_CONTROL,
+	OTG_PMIC_CONTROL,
+	OTG_USER_CONTROL,
+};
+
+/**
+ * PHY used in
+ *
+ * INVALID_PHY			Unsupported PHY
+ * CI_45NM_INTEGRATED_PHY	Chipidea 45nm integrated PHY
+ * SNPS_28NM_INTEGRATED_PHY	Synopsis 28nm integrated PHY
+ *
+ */
+enum msm_usb_phy_type {
+	INVALID_PHY = 0,
+	CI_45NM_INTEGRATED_PHY,
+	SNPS_28NM_INTEGRATED_PHY,
+};
+
+#define IDEV_CHG_MAX	1500
+#define IUNIT		100
+
+/**
+ * Different states involved in USB charger detection.
+ *
+ * USB_CHG_STATE_UNDEFINED	USB charger is not connected or detection
+ *                              process is not yet started.
+ * USB_CHG_STATE_WAIT_FOR_DCD	Waiting for Data pins contact.
+ * USB_CHG_STATE_DCD_DONE	Data pin contact is detected.
+ * USB_CHG_STATE_PRIMARY_DONE	Primary detection is completed (Detects
+ *                              between SDP and DCP/CDP).
+ * USB_CHG_STATE_SECONDARY_DONE	Secondary detection is completed (Detects
+ *                              between DCP and CDP).
+ * USB_CHG_STATE_DETECTED	USB charger type is determined.
+ *
+ */
+enum usb_chg_state {
+	USB_CHG_STATE_UNDEFINED = 0,
+	USB_CHG_STATE_WAIT_FOR_DCD,
+	USB_CHG_STATE_DCD_DONE,
+	USB_CHG_STATE_PRIMARY_DONE,
+	USB_CHG_STATE_SECONDARY_DONE,
+	USB_CHG_STATE_DETECTED,
+};
+
+/**
+ * USB charger types
+ *
+ * USB_INVALID_CHARGER	Invalid USB charger.
+ * USB_SDP_CHARGER	Standard downstream port. Refers to a downstream port
+ *                      on USB2.0 compliant host/hub.
+ * USB_DCP_CHARGER	Dedicated charger port (AC charger/ Wall charger).
+ * USB_CDP_CHARGER	Charging downstream port. Enumeration can happen and
+ *                      IDEV_CHG_MAX can be drawn irrespective of USB state.
+ *
+ */
+enum usb_chg_type {
+	USB_INVALID_CHARGER = 0,
+	USB_SDP_CHARGER,
+	USB_DCP_CHARGER,
+	USB_CDP_CHARGER,
+};
+
+/**
+ * struct msm_otg_platform_data - platform device data
+ *              for msm_otg driver.
+ * @phy_init_seq: PHY configuration sequence values. Value of -1 is reserved as
+ *              "do not overwrite default vaule at this address".
+ * @phy_init_sz: PHY configuration sequence size.
+ * @vbus_power: VBUS power on/off routine.
+ * @power_budget: VBUS power budget in mA (0 will be treated as 500mA).
+ * @mode: Supported mode (OTG/peripheral/host).
+ * @otg_control: OTG switch controlled by user/Id pin
+ */
+struct msm_otg_platform_data {
+	int *phy_init_seq;
+	int phy_init_sz;
+	void (*vbus_power)(bool on);
+	unsigned power_budget;
+	enum usb_dr_mode mode;
+	enum otg_control_type otg_control;
+	enum msm_usb_phy_type phy_type;
+	void (*setup_gpio)(enum usb_otg_state state);
+};
+
+/**
+ * struct msm_usb_cable - structure for exteternal connector cable
+ *			  state tracking
+ * @nb: hold event notification callback
+ * @conn: used for notification registration
+ */
+struct msm_usb_cable {
+	struct notifier_block		nb;
+	struct extcon_dev		*extcon;
+};
+
+/**
+ * struct msm_otg: OTG driver data. Shared by HCD and DCD.
+ * @otg: USB OTG Transceiver structure.
+ * @pdata: otg device platform data.
+ * @irq: IRQ number assigned for HSUSB controller.
+ * @clk: clock struct of usb_hs_clk.
+ * @pclk: clock struct of usb_hs_pclk.
+ * @core_clk: clock struct of usb_hs_core_clk.
+ * @regs: ioremapped register base address.
+ * @inputs: OTG state machine inputs(Id, SessValid etc).
+ * @sm_work: OTG state machine work.
+ * @in_lpm: indicates low power mode (LPM) state.
+ * @async_int: Async interrupt arrived.
+ * @cur_power: The amount of mA available from downstream port.
+ * @chg_work: Charger detection work.
+ * @chg_state: The state of charger detection process.
+ * @chg_type: The type of charger attached.
+ * @dcd_retires: The retry count used to track Data contact
+ *               detection process.
+ * @manual_pullup: true if VBUS is not routed to USB controller/phy
+ *	and controller driver therefore enables pull-up explicitly before
+ *	starting controller using usbcmd run/stop bit.
+ * @vbus: VBUS signal state trakining, using extcon framework
+ * @id: ID signal state trakining, using extcon framework
+ * @switch_gpio: Descriptor for GPIO used to control external Dual
+ *               SPDT USB Switch.
+ * @reboot: Used to inform the driver to route USB D+/D- line to Device
+ *	    connector
+ */
+struct msm_otg {
+	struct usb_phy phy;
+	struct msm_otg_platform_data *pdata;
+	int irq;
+	struct clk *clk;
+	struct clk *pclk;
+	struct clk *core_clk;
+	void __iomem *regs;
+#define ID		0
+#define B_SESS_VLD	1
+	unsigned long inputs;
+	struct work_struct sm_work;
+	atomic_t in_lpm;
+	int async_int;
+	unsigned cur_power;
+	int phy_number;
+	struct delayed_work chg_work;
+	enum usb_chg_state chg_state;
+	enum usb_chg_type chg_type;
+	u8 dcd_retries;
+	struct regulator *v3p3;
+	struct regulator *v1p8;
+	struct regulator *vddcx;
+
+	struct reset_control *phy_rst;
+	struct reset_control *link_rst;
+	int vdd_levels[3];
+
+	bool manual_pullup;
+
+	struct msm_usb_cable vbus;
+	struct msm_usb_cable id;
+
+	struct gpio_desc *switch_gpio;
+	struct notifier_block reboot;
+};
+
 #define MSM_USB_BASE	(motg->regs)
 #define DRIVER_NAME	"msm_otg"
 
diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c
index c4bf2de6d14e..6f6d2a7fd5a0 100644
--- a/drivers/usb/phy/phy-omap-otg.c
+++ b/drivers/usb/phy/phy-omap-otg.c
@@ -148,7 +148,7 @@ static int omap_otg_remove(struct platform_device *pdev)
 	struct otg_device *otg_dev = platform_get_drvdata(pdev);
 	struct extcon_dev *edev = otg_dev->extcon;
 
-	extcon_unregister_notifier(edev, EXTCON_USB_HOST,&otg_dev->id_nb);
+	extcon_unregister_notifier(edev, EXTCON_USB_HOST, &otg_dev->id_nb);
 	extcon_unregister_notifier(edev, EXTCON_USB, &otg_dev->vbus_nb);
 
 	return 0;
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index baeb7d23bf24..8fbbc2d32371 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -697,7 +697,7 @@ probe_end_fifo_exit:
 probe_end_pipe_exit:
 	usbhs_pipe_remove(priv);
 
-	dev_info(&pdev->dev, "probe failed\n");
+	dev_info(&pdev->dev, "probe failed (%d)\n", ret);
 
 	return ret;
 }
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index 7be4e7d57ace..280ed5ff021b 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -810,20 +810,27 @@ static void xfer_work(struct work_struct *work)
 {
 	struct usbhs_pkt *pkt = container_of(work, struct usbhs_pkt, work);
 	struct usbhs_pipe *pipe = pkt->pipe;
-	struct usbhs_fifo *fifo = usbhs_pipe_to_fifo(pipe);
+	struct usbhs_fifo *fifo;
 	struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe);
 	struct dma_async_tx_descriptor *desc;
-	struct dma_chan *chan = usbhsf_dma_chan_get(fifo, pkt);
+	struct dma_chan *chan;
 	struct device *dev = usbhs_priv_to_dev(priv);
 	enum dma_transfer_direction dir;
+	unsigned long flags;
 
+	usbhs_lock(priv, flags);
+	fifo = usbhs_pipe_to_fifo(pipe);
+	if (!fifo)
+		goto xfer_work_end;
+
+	chan = usbhsf_dma_chan_get(fifo, pkt);
 	dir = usbhs_pipe_is_dir_in(pipe) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
 
 	desc = dmaengine_prep_slave_single(chan, pkt->dma + pkt->actual,
 					pkt->trans, dir,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc)
-		return;
+		goto xfer_work_end;
 
 	desc->callback		= usbhsf_dma_complete;
 	desc->callback_param	= pipe;
@@ -831,7 +838,7 @@ static void xfer_work(struct work_struct *work)
 	pkt->cookie = dmaengine_submit(desc);
 	if (pkt->cookie < 0) {
 		dev_err(dev, "Failed to submit dma descriptor\n");
-		return;
+		goto xfer_work_end;
 	}
 
 	dev_dbg(dev, "  %s %d (%d/ %d)\n",
@@ -842,6 +849,9 @@ static void xfer_work(struct work_struct *work)
 	usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans);
 	dma_async_issue_pending(chan);
 	usbhs_pipe_enable(pipe);
+
+xfer_work_end:
+	usbhs_unlock(priv, flags);
 }
 
 /*
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 30345c2d01be..50f3363cc382 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -585,6 +585,9 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
 	struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
 	struct usbhs_pipe *pipe;
 	int ret = -EIO;
+	unsigned long flags;
+
+	usbhs_lock(priv, flags);
 
 	/*
 	 * if it already have pipe,
@@ -593,7 +596,8 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
 	if (uep->pipe) {
 		usbhs_pipe_clear(uep->pipe);
 		usbhs_pipe_sequence_data0(uep->pipe);
-		return 0;
+		ret = 0;
+		goto usbhsg_ep_enable_end;
 	}
 
 	pipe = usbhs_pipe_malloc(priv,
@@ -621,6 +625,9 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
 		ret = 0;
 	}
 
+usbhsg_ep_enable_end:
+	usbhs_unlock(priv, flags);
+
 	return ret;
 }
 
diff --git a/drivers/usb/renesas_usbhs/rcar3.c b/drivers/usb/renesas_usbhs/rcar3.c
index 38b01f2aeeb0..1d70add926f0 100644
--- a/drivers/usb/renesas_usbhs/rcar3.c
+++ b/drivers/usb/renesas_usbhs/rcar3.c
@@ -23,7 +23,7 @@
 #define UGCTRL2_RESERVED_3	0x00000001	/* bit[3:0] should be B'0001 */
 #define UGCTRL2_USB0SEL_OTG	0x00000030
 
-void usbhs_write32(struct usbhs_priv *priv, u32 reg, u32 data)
+static void usbhs_write32(struct usbhs_priv *priv, u32 reg, u32 data)
 {
 	iowrite32(data, priv->base + reg);
 }
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index c7508cbce3ce..9f490375ac92 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -245,7 +245,7 @@ enum usbip_side {
 #define USBIP_EH_RESET		(1 << 2)
 #define USBIP_EH_UNUSABLE	(1 << 3)
 
-#define SDEV_EVENT_REMOVED   (USBIP_EH_SHUTDOWN | USBIP_EH_RESET | USBIP_EH_BYE)
+#define	SDEV_EVENT_REMOVED	(USBIP_EH_SHUTDOWN | USBIP_EH_BYE)
 #define	SDEV_EVENT_DOWN		(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
 #define	SDEV_EVENT_ERROR_TCP	(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
 #define	SDEV_EVENT_ERROR_SUBMIT	(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c
index 99397fa1e3f0..0f98f2c7475f 100644
--- a/drivers/usb/usbip/vudc_sysfs.c
+++ b/drivers/usb/usbip/vudc_sysfs.c
@@ -40,7 +40,7 @@ int get_gadget_descs(struct vudc *udc)
 	struct usb_ctrlrequest req;
 	int ret;
 
-	if (!udc || !udc->driver || !udc->pullup)
+	if (!udc->driver || !udc->pullup)
 		return -EINVAL;
 
 	req.bRequestType = USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f744eeb3e2b4..e032ca397371 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -301,6 +301,32 @@ static bool vhost_can_busy_poll(struct vhost_dev *dev,
 	       !vhost_has_work(dev);
 }
 
+static void vhost_net_disable_vq(struct vhost_net *n,
+				 struct vhost_virtqueue *vq)
+{
+	struct vhost_net_virtqueue *nvq =
+		container_of(vq, struct vhost_net_virtqueue, vq);
+	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
+	if (!vq->private_data)
+		return;
+	vhost_poll_stop(poll);
+}
+
+static int vhost_net_enable_vq(struct vhost_net *n,
+				struct vhost_virtqueue *vq)
+{
+	struct vhost_net_virtqueue *nvq =
+		container_of(vq, struct vhost_net_virtqueue, vq);
+	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
+	struct socket *sock;
+
+	sock = vq->private_data;
+	if (!sock)
+		return 0;
+
+	return vhost_poll_start(poll, sock->file);
+}
+
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct vhost_virtqueue *vq,
 				    struct iovec iov[], unsigned int iov_size,
@@ -455,10 +481,14 @@ out:
 
 static int peek_head_len(struct sock *sk)
 {
+	struct socket *sock = sk->sk_socket;
 	struct sk_buff *head;
 	int len = 0;
 	unsigned long flags;
 
+	if (sock->ops->peek_len)
+		return sock->ops->peek_len(sock);
+
 	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
 	head = skb_peek(&sk->sk_receive_queue);
 	if (likely(head)) {
@@ -471,6 +501,16 @@ static int peek_head_len(struct sock *sk)
 	return len;
 }
 
+static int sk_has_rx_data(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->peek_len)
+		return sock->ops->peek_len(sock);
+
+	return skb_queue_empty(&sk->sk_receive_queue);
+}
+
 static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
@@ -487,7 +527,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 		endtime = busy_clock() + vq->busyloop_timeout;
 
 		while (vhost_can_busy_poll(&net->dev, endtime) &&
-		       skb_queue_empty(&sk->sk_receive_queue) &&
+		       !sk_has_rx_data(sk) &&
 		       vhost_vq_avail_empty(&net->dev, vq))
 			cpu_relax_lowlatency();
 
@@ -613,6 +653,7 @@ static void handle_rx(struct vhost_net *net)
 	if (!sock)
 		goto out;
 	vhost_disable_notify(&net->dev, vq);
+	vhost_net_disable_vq(net, vq);
 
 	vhost_hlen = nvq->vhost_hlen;
 	sock_hlen = nvq->sock_hlen;
@@ -629,7 +670,7 @@ static void handle_rx(struct vhost_net *net)
 					likely(mergeable) ? UIO_MAXIOV : 1);
 		/* On error, stop handling until the next kick. */
 		if (unlikely(headcount < 0))
-			break;
+			goto out;
 		/* On overrun, truncate and discard */
 		if (unlikely(headcount > UIO_MAXIOV)) {
 			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
@@ -648,7 +689,7 @@ static void handle_rx(struct vhost_net *net)
 			}
 			/* Nothing new?  Wait for eventfd to tell us
 			 * they refilled. */
-			break;
+			goto out;
 		}
 		/* We don't need to be notified again. */
 		iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
@@ -676,7 +717,7 @@ static void handle_rx(struct vhost_net *net)
 					 &fixup) != sizeof(hdr)) {
 				vq_err(vq, "Unable to write vnet_hdr "
 				       "at addr %p\n", vq->iov->iov_base);
-				break;
+				goto out;
 			}
 		} else {
 			/* Header came from socket; we'll need to patch
@@ -692,7 +733,7 @@ static void handle_rx(struct vhost_net *net)
 				 &fixup) != sizeof num_buffers) {
 			vq_err(vq, "Failed num_buffers write");
 			vhost_discard_vq_desc(vq, headcount);
-			break;
+			goto out;
 		}
 		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
 					    headcount);
@@ -701,9 +742,10 @@ static void handle_rx(struct vhost_net *net)
 		total_len += vhost_len;
 		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
 			vhost_poll_queue(&vq->poll);
-			break;
+			goto out;
 		}
 	}
+	vhost_net_enable_vq(net, vq);
 out:
 	mutex_unlock(&vq->mutex);
 }
@@ -782,32 +824,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 	return 0;
 }
 
-static void vhost_net_disable_vq(struct vhost_net *n,
-				 struct vhost_virtqueue *vq)
-{
-	struct vhost_net_virtqueue *nvq =
-		container_of(vq, struct vhost_net_virtqueue, vq);
-	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
-	if (!vq->private_data)
-		return;
-	vhost_poll_stop(poll);
-}
-
-static int vhost_net_enable_vq(struct vhost_net *n,
-				struct vhost_virtqueue *vq)
-{
-	struct vhost_net_virtqueue *nvq =
-		container_of(vq, struct vhost_net_virtqueue, vq);
-	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
-	struct socket *sock;
-
-	sock = vq->private_data;
-	if (!sock)
-		return 0;
-
-	return vhost_poll_start(poll, sock->file);
-}
-
 static struct socket *vhost_net_stop_vq(struct vhost_net *n,
 					struct vhost_virtqueue *vq)
 {
diff --git a/drivers/video/console/dummycon.c b/drivers/video/console/dummycon.c
index 0efc52f11ad0..9269d5685239 100644
--- a/drivers/video/console/dummycon.c
+++ b/drivers/video/console/dummycon.c
@@ -64,14 +64,11 @@ const struct consw dummy_con = {
     .con_putcs =	DUMMY,
     .con_cursor =	DUMMY,
     .con_scroll =	DUMMY,
-    .con_bmove =	DUMMY,
     .con_switch =	DUMMY,
     .con_blank =	DUMMY,
     .con_font_set =	DUMMY,
     .con_font_get =	DUMMY,
     .con_font_default =	DUMMY,
     .con_font_copy =	DUMMY,
-    .con_set_palette =	DUMMY,
-    .con_scrolldelta =	DUMMY,
 };
 EXPORT_SYMBOL_GPL(dummy_con);
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index afd3301ac40c..b87f5cfdaea5 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -170,8 +170,7 @@ static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
 			int height, int width);
 static int fbcon_switch(struct vc_data *vc);
 static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch);
-static int fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
-static int fbcon_scrolldelta(struct vc_data *vc, int lines);
+static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
 
 /*
  *  Internal routines
@@ -381,7 +380,7 @@ static void fb_flashcursor(struct work_struct *work)
 	if (ops && ops->currcon != -1)
 		vc = vc_cons[ops->currcon].d;
 
-	if (!vc || !CON_IS_VISIBLE(vc) ||
+	if (!vc || !con_is_visible(vc) ||
  	    registered_fb[con2fb_map[vc->vc_num]] != info ||
 	    vc->vc_deccm != 1) {
 		console_unlock();
@@ -619,7 +618,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info,
 		    erase,
 		    vc->vc_size_row * logo_lines);
 
-	if (CON_IS_VISIBLE(vc) && vc->vc_mode == KD_TEXT) {
+	if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) {
 		fbcon_clear_margins(vc, 0);
 		update_screen(vc);
 	}
@@ -1113,7 +1112,7 @@ static void fbcon_init(struct vc_data *vc, int init)
 	 *
 	 * We need to do it in fbcon_init() to prevent screen corruption.
 	 */
-	if (CON_IS_VISIBLE(vc) && vc->vc_mode == KD_TEXT) {
+	if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) {
 		if (info->fbops->fb_set_par &&
 		    !(ops->flags & FBCON_FLAGS_INIT)) {
 			ret = info->fbops->fb_set_par(info);
@@ -1193,7 +1192,7 @@ static void fbcon_deinit(struct vc_data *vc)
 	if (!ops)
 		goto finished;
 
-	if (CON_IS_VISIBLE(vc))
+	if (con_is_visible(vc))
 		fbcon_del_cursor_timer(info);
 
 	ops->flags &= ~FBCON_FLAGS_INIT;
@@ -1398,7 +1397,7 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
 	rows /= vc->vc_font.height;
 	vc_resize(vc, cols, rows);
 
-	if (CON_IS_VISIBLE(vc)) {
+	if (con_is_visible(vc)) {
 		update_screen(vc);
 		if (softback_buf)
 			fbcon_update_softback(vc);
@@ -2146,7 +2145,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 			return -EINVAL;
 
 		DPRINTK("resize now %ix%i\n", var.xres, var.yres);
-		if (CON_IS_VISIBLE(vc)) {
+		if (con_is_visible(vc)) {
 			var.activate = FB_ACTIVATE_NOW |
 				FB_ACTIVATE_FORCE;
 			fb_set_var(info, &var);
@@ -2449,7 +2448,7 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
 	int cnt;
 	char *old_data = NULL;
 
-	if (CON_IS_VISIBLE(vc) && softback_lines)
+	if (con_is_visible(vc) && softback_lines)
 		fbcon_set_origin(vc);
 
 	resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
@@ -2530,9 +2529,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
 		cols /= w;
 		rows /= h;
 		vc_resize(vc, cols, rows);
-		if (CON_IS_VISIBLE(vc) && softback_buf)
+		if (con_is_visible(vc) && softback_buf)
 			fbcon_update_softback(vc);
-	} else if (CON_IS_VISIBLE(vc)
+	} else if (con_is_visible(vc)
 		   && vc->vc_mode == KD_TEXT) {
 		fbcon_clear_margins(vc, 0);
 		update_screen(vc);
@@ -2652,17 +2651,17 @@ static struct fb_cmap palette_cmap = {
 	0, 16, palette_red, palette_green, palette_blue, NULL
 };
 
-static int fbcon_set_palette(struct vc_data *vc, const unsigned char *table)
+static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table)
 {
 	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
 	int i, j, k, depth;
 	u8 val;
 
 	if (fbcon_is_inactive(vc, info))
-		return -EINVAL;
+		return;
 
-	if (!CON_IS_VISIBLE(vc))
-		return 0;
+	if (!con_is_visible(vc))
+		return;
 
 	depth = fb_get_color_depth(&info->var, &info->fix);
 	if (depth > 3) {
@@ -2684,7 +2683,7 @@ static int fbcon_set_palette(struct vc_data *vc, const unsigned char *table)
 	} else
 		fb_copy_cmap(fb_default_cmap(1 << depth), &palette_cmap);
 
-	return fb_set_cmap(&palette_cmap, info);
+	fb_set_cmap(&palette_cmap, info);
 }
 
 static u16 *fbcon_screen_pos(struct vc_data *vc, int offset)
@@ -2765,7 +2764,7 @@ static void fbcon_invert_region(struct vc_data *vc, u16 * p, int cnt)
 	}
 }
 
-static int fbcon_scrolldelta(struct vc_data *vc, int lines)
+static void fbcon_scrolldelta(struct vc_data *vc, int lines)
 {
 	struct fb_info *info = registered_fb[con2fb_map[fg_console]];
 	struct fbcon_ops *ops = info->fbcon_par;
@@ -2774,9 +2773,9 @@ static int fbcon_scrolldelta(struct vc_data *vc, int lines)
 
 	if (softback_top) {
 		if (vc->vc_num != fg_console)
-			return 0;
+			return;
 		if (vc->vc_mode != KD_TEXT || !lines)
-			return 0;
+			return;
 		if (logo_shown >= 0) {
 			struct vc_data *conp2 = vc_cons[logo_shown].d;
 
@@ -2809,11 +2808,11 @@ static int fbcon_scrolldelta(struct vc_data *vc, int lines)
 		fbcon_cursor(vc, CM_ERASE | CM_SOFTBACK);
 		fbcon_redraw_softback(vc, disp, lines);
 		fbcon_cursor(vc, CM_DRAW | CM_SOFTBACK);
-		return 0;
+		return;
 	}
 
 	if (!scrollback_phys_max)
-		return -ENOSYS;
+		return;
 
 	scrollback_old = scrollback_current;
 	scrollback_current -= lines;
@@ -2822,10 +2821,10 @@ static int fbcon_scrolldelta(struct vc_data *vc, int lines)
 	else if (scrollback_current > scrollback_max)
 		scrollback_current = scrollback_max;
 	if (scrollback_current == scrollback_old)
-		return 0;
+		return;
 
 	if (fbcon_is_inactive(vc, info))
-		return 0;
+		return;
 
 	fbcon_cursor(vc, CM_ERASE);
 
@@ -2852,7 +2851,6 @@ static int fbcon_scrolldelta(struct vc_data *vc, int lines)
 
 	if (!scrollback_current)
 		fbcon_cursor(vc, CM_DRAW);
-	return 0;
 }
 
 static int fbcon_set_origin(struct vc_data *vc)
@@ -2904,7 +2902,7 @@ static void fbcon_modechanged(struct fb_info *info)
 	p = &fb_display[vc->vc_num];
 	set_blitting_type(vc, info);
 
-	if (CON_IS_VISIBLE(vc)) {
+	if (con_is_visible(vc)) {
 		var_to_display(p, &info->var, info);
 		cols = FBCON_SWAP(ops->rotate, info->var.xres, info->var.yres);
 		rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
@@ -2943,7 +2941,7 @@ static void fbcon_set_all_vcs(struct fb_info *info)
 		    registered_fb[con2fb_map[i]] != info)
 			continue;
 
-		if (CON_IS_VISIBLE(vc)) {
+		if (con_is_visible(vc)) {
 			fg = i;
 			continue;
 		}
@@ -3182,7 +3180,7 @@ static void fbcon_fb_blanked(struct fb_info *info, int blank)
 			registered_fb[con2fb_map[ops->currcon]] != info)
 		return;
 
-	if (CON_IS_VISIBLE(vc)) {
+	if (con_is_visible(vc)) {
 		if (blank)
 			do_blank_screen(0);
 		else
@@ -3336,7 +3334,6 @@ static const struct consw fb_con = {
 	.con_putcs 		= fbcon_putcs,
 	.con_cursor 		= fbcon_cursor,
 	.con_scroll 		= fbcon_scroll,
-	.con_bmove 		= fbcon_bmove,
 	.con_switch 		= fbcon_switch,
 	.con_blank 		= fbcon_blank,
 	.con_font_set 		= fbcon_set_font,
diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c
index 8edc062536a8..bacbb044d77c 100644
--- a/drivers/video/console/mdacon.c
+++ b/drivers/video/console/mdacon.c
@@ -444,48 +444,11 @@ static void mdacon_clear(struct vc_data *c, int y, int x,
 	}
 }
                         
-static void mdacon_bmove(struct vc_data *c, int sy, int sx, 
-			 int dy, int dx, int height, int width)
-{
-	u16 *src, *dest;
-
-	if (width <= 0 || height <= 0)
-		return;
-		
-	if (sx==0 && dx==0 && width==mda_num_columns) {
-		scr_memmovew(MDA_ADDR(0,dy), MDA_ADDR(0,sy), height*width*2);
-
-	} else if (dy < sy || (dy == sy && dx < sx)) {
-		src  = MDA_ADDR(sx, sy);
-		dest = MDA_ADDR(dx, dy);
-
-		for (; height > 0; height--) {
-			scr_memmovew(dest, src, width*2);
-			src  += mda_num_columns;
-			dest += mda_num_columns;
-		}
-	} else {
-		src  = MDA_ADDR(sx, sy+height-1);
-		dest = MDA_ADDR(dx, dy+height-1);
-
-		for (; height > 0; height--) {
-			scr_memmovew(dest, src, width*2);
-			src  -= mda_num_columns;
-			dest -= mda_num_columns;
-		}
-	}
-}
-
 static int mdacon_switch(struct vc_data *c)
 {
 	return 1;	/* redrawing needed */
 }
 
-static int mdacon_set_palette(struct vc_data *c, const unsigned char *table)
-{
-	return -EINVAL;
-}
-
 static int mdacon_blank(struct vc_data *c, int blank, int mode_switch)
 {
 	if (mda_type == TYPE_MDA) {
@@ -505,11 +468,6 @@ static int mdacon_blank(struct vc_data *c, int blank, int mode_switch)
 	}
 }
 
-static int mdacon_scrolldelta(struct vc_data *c, int lines)
-{
-	return 0;
-}
-
 static void mdacon_cursor(struct vc_data *c, int mode)
 {
 	if (mode == CM_ERASE) {
@@ -574,11 +532,8 @@ static const struct consw mda_con = {
 	.con_putcs =		mdacon_putcs,
 	.con_cursor =		mdacon_cursor,
 	.con_scroll =		mdacon_scroll,
-	.con_bmove =		mdacon_bmove,
 	.con_switch =		mdacon_switch,
 	.con_blank =		mdacon_blank,
-	.con_set_palette =	mdacon_set_palette,
-	.con_scrolldelta =	mdacon_scrolldelta,
 	.con_build_attr =	mdacon_build_attr,
 	.con_invert_region =	mdacon_invert_region,
 };
diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index 0553dfe684ef..e3b9521e4ec3 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -574,17 +574,6 @@ static int newport_font_set(struct vc_data *vc, struct console_font *font, unsig
 	return newport_set_font(vc->vc_num, font);
 }
 
-static int newport_set_palette(struct vc_data *vc, const unsigned char *table)
-{
-	return -EINVAL;
-}
-
-static int newport_scrolldelta(struct vc_data *vc, int lines)
-{
-	/* there is (nearly) no off-screen memory, so we can't scroll back */
-	return 0;
-}
-
 static int newport_scroll(struct vc_data *vc, int t, int b, int dir,
 			  int lines)
 {
@@ -684,34 +673,6 @@ static int newport_scroll(struct vc_data *vc, int t, int b, int dir,
 	return 1;
 }
 
-static void newport_bmove(struct vc_data *vc, int sy, int sx, int dy,
-			  int dx, int h, int w)
-{
-	short xs, ys, xe, ye, xoffs, yoffs;
-
-	xs = sx << 3;
-	xe = ((sx + w) << 3) - 1;
-	/*
-	 * as bmove is only used to move stuff around in the same line
-	 * (h == 1), we don't care about wrap arounds caused by topscan != 0
-	 */
-	ys = ((sy << 4) + topscan) & 0x3ff;
-	ye = (((sy + h) << 4) - 1 + topscan) & 0x3ff;
-	xoffs = (dx - sx) << 3;
-	yoffs = (dy - sy) << 4;
-	if (xoffs > 0) {
-		/* move to the right, exchange starting points */
-		swap(xe, xs);
-	}
-	newport_wait(npregs);
-	npregs->set.drawmode0 = (NPORT_DMODE0_S2S | NPORT_DMODE0_BLOCK |
-				 NPORT_DMODE0_DOSETUP | NPORT_DMODE0_STOPX
-				 | NPORT_DMODE0_STOPY);
-	npregs->set.xystarti = (xs << 16) | ys;
-	npregs->set.xyendi = (xe << 16) | ye;
-	npregs->go.xymove = (xoffs << 16) | yoffs;
-}
-
 static int newport_dummy(struct vc_data *c)
 {
 	return 0;
@@ -729,13 +690,10 @@ const struct consw newport_con = {
 	.con_putcs	  = newport_putcs,
 	.con_cursor	  = newport_cursor,
 	.con_scroll	  = newport_scroll,
-	.con_bmove 	  = newport_bmove,
 	.con_switch	  = newport_switch,
 	.con_blank	  = newport_blank,
 	.con_font_set	  = newport_font_set,
 	.con_font_default = newport_font_default,
-	.con_set_palette  = newport_set_palette,
-	.con_scrolldelta  = newport_scrolldelta,
 	.con_set_origin	  = DUMMY,
 	.con_save_screen  = DUMMY
 };
diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
index e440c2d9fe7c..3a10ac19598f 100644
--- a/drivers/video/console/sticon.c
+++ b/drivers/video/console/sticon.c
@@ -79,11 +79,6 @@ static const char *sticon_startup(void)
     return "STI console";
 }
 
-static int sticon_set_palette(struct vc_data *c, const unsigned char *table)
-{
-    return -EINVAL;
-}
-
 static void sticon_putc(struct vc_data *conp, int c, int ypos, int xpos)
 {
     int redraw_cursor = 0;
@@ -182,22 +177,6 @@ static int sticon_scroll(struct vc_data *conp, int t, int b, int dir, int count)
     return 0;
 }
 
-static void sticon_bmove(struct vc_data *conp, int sy, int sx, 
-	int dy, int dx, int height, int width)
-{
-    if (!width || !height)
-	    return;
-#if 0
-    if (((sy <= p->cursor_y) && (p->cursor_y < sy+height) &&
-	(sx <= p->cursor_x) && (p->cursor_x < sx+width)) ||
-	((dy <= p->cursor_y) && (p->cursor_y < dy+height) &&
-	(dx <= p->cursor_x) && (p->cursor_x < dx+width)))
-		sticon_cursor(p, CM_ERASE /*|CM_SOFTBACK*/);
-#endif
-
-    sti_bmove(sticon_sti, sy, sx, dy, dx, height, width);
-}
-
 static void sticon_init(struct vc_data *c, int init)
 {
     struct sti_struct *sti = sticon_sti;
@@ -256,11 +235,6 @@ static int sticon_blank(struct vc_data *c, int blank, int mode_switch)
     return 1;
 }
 
-static int sticon_scrolldelta(struct vc_data *conp, int lines)
-{
-    return 0;
-}
-
 static u16 *sticon_screen_pos(struct vc_data *conp, int offset)
 {
     int line;
@@ -355,11 +329,8 @@ static const struct consw sti_con = {
 	.con_putcs		= sticon_putcs,
 	.con_cursor		= sticon_cursor,
 	.con_scroll		= sticon_scroll,
-	.con_bmove		= sticon_bmove,
 	.con_switch		= sticon_switch,
 	.con_blank		= sticon_blank,
-	.con_set_palette	= sticon_set_palette,
-	.con_scrolldelta	= sticon_scrolldelta,
 	.con_set_origin		= sticon_set_origin,
 	.con_save_screen	= sticon_save_screen, 
 	.con_build_attr		= sticon_build_attr,
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 8bf911002cba..11576611a974 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -80,7 +80,7 @@ static void vgacon_deinit(struct vc_data *c);
 static void vgacon_cursor(struct vc_data *c, int mode);
 static int vgacon_switch(struct vc_data *c);
 static int vgacon_blank(struct vc_data *c, int blank, int mode_switch);
-static int vgacon_scrolldelta(struct vc_data *c, int lines);
+static void vgacon_scrolldelta(struct vc_data *c, int lines);
 static int vgacon_set_origin(struct vc_data *c);
 static void vgacon_save_screen(struct vc_data *c);
 static int vgacon_scroll(struct vc_data *c, int t, int b, int dir,
@@ -248,18 +248,18 @@ static void vgacon_restore_screen(struct vc_data *c)
 	}
 }
 
-static int vgacon_scrolldelta(struct vc_data *c, int lines)
+static void vgacon_scrolldelta(struct vc_data *c, int lines)
 {
 	int start, end, count, soff;
 
 	if (!lines) {
 		c->vc_visible_origin = c->vc_origin;
 		vga_set_mem_top(c);
-		return 1;
+		return;
 	}
 
 	if (!vgacon_scrollback)
-		return 1;
+		return;
 
 	if (!vgacon_scrollback_save) {
 		vgacon_cursor(c, CM_ERASE);
@@ -320,8 +320,6 @@ static int vgacon_scrolldelta(struct vc_data *c, int lines)
 			scr_memcpyw(d, s, diff * c->vc_size_row);
 	} else
 		vgacon_cursor(c, CM_MOVE);
-
-	return 1;
 }
 #else
 #define vgacon_scrollback_startup(...) do { } while (0)
@@ -334,7 +332,7 @@ static void vgacon_restore_screen(struct vc_data *c)
 		vgacon_scrolldelta(c, 0);
 }
 
-static int vgacon_scrolldelta(struct vc_data *c, int lines)
+static void vgacon_scrolldelta(struct vc_data *c, int lines)
 {
 	if (!lines)		/* Turn scrollback off */
 		c->vc_visible_origin = c->vc_origin;
@@ -362,7 +360,6 @@ static int vgacon_scrolldelta(struct vc_data *c, int lines)
 		c->vc_visible_origin = vga_vram_base + (p + ul) % we;
 	}
 	vga_set_mem_top(c);
-	return 1;
 }
 #endif /* CONFIG_VGACON_SOFT_SCROLLBACK */
 
@@ -592,7 +589,7 @@ static void vgacon_init(struct vc_data *c, int init)
 static void vgacon_deinit(struct vc_data *c)
 {
 	/* When closing the active console, reset video origin */
-	if (CON_IS_VISIBLE(c)) {
+	if (con_is_visible(c)) {
 		c->vc_visible_origin = vga_vram_base;
 		vga_set_mem_top(c);
 	}
@@ -859,16 +856,13 @@ static void vga_set_palette(struct vc_data *vc, const unsigned char *table)
 	}
 }
 
-static int vgacon_set_palette(struct vc_data *vc, const unsigned char *table)
+static void vgacon_set_palette(struct vc_data *vc, const unsigned char *table)
 {
 #ifdef CAN_LOAD_PALETTE
 	if (vga_video_type != VIDEO_TYPE_VGAC || vga_palette_blanked
-	    || !CON_IS_VISIBLE(vc))
-		return -EINVAL;
+	    || !con_is_visible(vc))
+		return;
 	vga_set_palette(vc, table);
-	return 0;
-#else
-	return -EINVAL;
 #endif
 }
 
@@ -1254,7 +1248,7 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight)
 		struct vc_data *c = vc_cons[i].d;
 
 		if (c && c->vc_sw == &vga_con) {
-			if (CON_IS_VISIBLE(c)) {
+			if (con_is_visible(c)) {
 			        /* void size to cause regs to be rewritten */
 				cursor_size_lastfrom = 0;
 				cursor_size_lastto = 0;
@@ -1318,7 +1312,7 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
 		   return success */
 		return (user) ? 0 : -EINVAL;
 
-	if (CON_IS_VISIBLE(c) && !vga_is_gfx) /* who knows */
+	if (con_is_visible(c) && !vga_is_gfx) /* who knows */
 		vgacon_doresize(c, width, height);
 	return 0;
 }
@@ -1427,7 +1421,6 @@ const struct consw vga_con = {
 	.con_putcs = DUMMY,
 	.con_cursor = vgacon_cursor,
 	.con_scroll = vgacon_scroll,
-	.con_bmove = DUMMY,
 	.con_switch = vgacon_switch,
 	.con_blank = vgacon_blank,
 	.con_font_set = vgacon_font_set,
diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index 47c3191ec313..62c0cf79674f 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c
@@ -1496,7 +1496,6 @@ int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var)
 }
 void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 {
-	specs = NULL;
 }
 void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 {
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 476c0e3a7150..888d5f8322ce 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -30,6 +30,7 @@
 #include <linux/oom.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
+#include <linux/mount.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -45,6 +46,10 @@ static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
 
+#ifdef CONFIG_BALLOON_COMPACTION
+static struct vfsmount *balloon_mnt;
+#endif
+
 struct virtio_balloon {
 	struct virtio_device *vdev;
 	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
@@ -490,6 +495,24 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
 
 	return MIGRATEPAGE_SUCCESS;
 }
+
+static struct dentry *balloon_mount(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *data)
+{
+	static const struct dentry_operations ops = {
+		.d_dname = simple_dname,
+	};
+
+	return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops,
+				BALLOON_KVM_MAGIC);
+}
+
+static struct file_system_type balloon_fs = {
+	.name           = "balloon-kvm",
+	.mount          = balloon_mount,
+	.kill_sb        = kill_anon_super,
+};
+
 #endif /* CONFIG_BALLOON_COMPACTION */
 
 static int virtballoon_probe(struct virtio_device *vdev)
@@ -519,9 +542,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 
 	balloon_devinfo_init(&vb->vb_dev_info);
-#ifdef CONFIG_BALLOON_COMPACTION
-	vb->vb_dev_info.migratepage = virtballoon_migratepage;
-#endif
 
 	err = init_vqs(vb);
 	if (err)
@@ -531,13 +551,33 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY;
 	err = register_oom_notifier(&vb->nb);
 	if (err < 0)
-		goto out_oom_notify;
+		goto out_del_vqs;
+
+#ifdef CONFIG_BALLOON_COMPACTION
+	balloon_mnt = kern_mount(&balloon_fs);
+	if (IS_ERR(balloon_mnt)) {
+		err = PTR_ERR(balloon_mnt);
+		unregister_oom_notifier(&vb->nb);
+		goto out_del_vqs;
+	}
+
+	vb->vb_dev_info.migratepage = virtballoon_migratepage;
+	vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+	if (IS_ERR(vb->vb_dev_info.inode)) {
+		err = PTR_ERR(vb->vb_dev_info.inode);
+		kern_unmount(balloon_mnt);
+		unregister_oom_notifier(&vb->nb);
+		vb->vb_dev_info.inode = NULL;
+		goto out_del_vqs;
+	}
+	vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+#endif
 
 	virtio_device_ready(vdev);
 
 	return 0;
 
-out_oom_notify:
+out_del_vqs:
 	vdev->config->del_vqs(vdev);
 out_free_vb:
 	kfree(vb);
@@ -571,6 +611,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	cancel_work_sync(&vb->update_balloon_stats_work);
 
 	remove_common(vb);
+	if (vb->vb_dev_info.inode)
+		iput(vb->vb_dev_info.inode);
 	kfree(vb);
 }
 
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 979a8317204f..f15bb3b789d5 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -275,7 +275,7 @@ config XEN_HAVE_PVMMU
 
 config XEN_EFI
 	def_bool y
-	depends on X86_64 && EFI
+	depends on (ARM || ARM64 || X86_64) && EFI
 
 config XEN_AUTO_XLATE
 	def_bool y
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 030e91b38e32..8feab810aed9 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -10,6 +10,7 @@ CFLAGS_features.o			:= $(nostackp)
 CFLAGS_efi.o				+= -fshort-wchar
 LDFLAGS					+= $(call ld-option, --no-wchar-size-warning)
 
+dom0-$(CONFIG_ARM64) += arm-device.o
 dom0-$(CONFIG_PCI) += pci.o
 dom0-$(CONFIG_USB_SUPPORT) += dbgp.o
 dom0-$(CONFIG_XEN_ACPI) += acpi.o $(xen-pad-y)
diff --git a/drivers/xen/arm-device.c b/drivers/xen/arm-device.c
new file mode 100644
index 000000000000..778acf80aacb
--- /dev/null
+++ b/drivers/xen/arm-device.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2015, Linaro Limited, Shannon Zhao
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/interface/memory.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+static int xen_unmap_device_mmio(const struct resource *resources,
+				 unsigned int count)
+{
+	unsigned int i, j, nr;
+	int rc = 0;
+	const struct resource *r;
+	struct xen_remove_from_physmap xrp;
+
+	for (i = 0; i < count; i++) {
+		r = &resources[i];
+		nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE);
+		if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0))
+			continue;
+
+		for (j = 0; j < nr; j++) {
+			xrp.domid = DOMID_SELF;
+			xrp.gpfn = XEN_PFN_DOWN(r->start) + j;
+			rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap,
+						  &xrp);
+			if (rc)
+				return rc;
+		}
+	}
+
+	return rc;
+}
+
+static int xen_map_device_mmio(const struct resource *resources,
+			       unsigned int count)
+{
+	unsigned int i, j, nr;
+	int rc = 0;
+	const struct resource *r;
+	xen_pfn_t *gpfns;
+	xen_ulong_t *idxs;
+	int *errs;
+	struct xen_add_to_physmap_range xatp;
+
+	for (i = 0; i < count; i++) {
+		r = &resources[i];
+		nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE);
+		if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0))
+			continue;
+
+		gpfns = kzalloc(sizeof(xen_pfn_t) * nr, GFP_KERNEL);
+		idxs = kzalloc(sizeof(xen_ulong_t) * nr, GFP_KERNEL);
+		errs = kzalloc(sizeof(int) * nr, GFP_KERNEL);
+		if (!gpfns || !idxs || !errs) {
+			kfree(gpfns);
+			kfree(idxs);
+			kfree(errs);
+			rc = -ENOMEM;
+			goto unmap;
+		}
+
+		for (j = 0; j < nr; j++) {
+			/*
+			 * The regions are always mapped 1:1 to DOM0 and this is
+			 * fine because the memory map for DOM0 is the same as
+			 * the host (except for the RAM).
+			 */
+			gpfns[j] = XEN_PFN_DOWN(r->start) + j;
+			idxs[j] = XEN_PFN_DOWN(r->start) + j;
+		}
+
+		xatp.domid = DOMID_SELF;
+		xatp.size = nr;
+		xatp.space = XENMAPSPACE_dev_mmio;
+
+		set_xen_guest_handle(xatp.gpfns, gpfns);
+		set_xen_guest_handle(xatp.idxs, idxs);
+		set_xen_guest_handle(xatp.errs, errs);
+
+		rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
+		kfree(gpfns);
+		kfree(idxs);
+		kfree(errs);
+		if (rc)
+			goto unmap;
+	}
+
+	return rc;
+
+unmap:
+	xen_unmap_device_mmio(resources, i);
+	return rc;
+}
+
+static int xen_platform_notifier(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct platform_device *pdev = to_platform_device(data);
+	int r = 0;
+
+	if (pdev->num_resources == 0 || pdev->resource == NULL)
+		return NOTIFY_OK;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		r = xen_map_device_mmio(pdev->resource, pdev->num_resources);
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		r = xen_unmap_device_mmio(pdev->resource, pdev->num_resources);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+	if (r)
+		dev_err(&pdev->dev, "Platform: Failed to %s device %s MMIO!\n",
+			action == BUS_NOTIFY_ADD_DEVICE ? "map" :
+			(action == BUS_NOTIFY_DEL_DEVICE ? "unmap" : "?"),
+			pdev->name);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block platform_device_nb = {
+	.notifier_call = xen_platform_notifier,
+};
+
+static int __init register_xen_platform_notifier(void)
+{
+	if (!xen_initial_domain() || acpi_disabled)
+		return 0;
+
+	return bus_register_notifier(&platform_bus_type, &platform_device_nb);
+}
+
+arch_initcall(register_xen_platform_notifier);
+
+#ifdef CONFIG_ARM_AMBA
+#include <linux/amba/bus.h>
+
+static int xen_amba_notifier(struct notifier_block *nb,
+			     unsigned long action, void *data)
+{
+	struct amba_device *adev = to_amba_device(data);
+	int r = 0;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		r = xen_map_device_mmio(&adev->res, 1);
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		r = xen_unmap_device_mmio(&adev->res, 1);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+	if (r)
+		dev_err(&adev->dev, "AMBA: Failed to %s device %s MMIO!\n",
+			action == BUS_NOTIFY_ADD_DEVICE ? "map" :
+			(action == BUS_NOTIFY_DEL_DEVICE ? "unmap" : "?"),
+			adev->dev.init_name);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block amba_device_nb = {
+	.notifier_call = xen_amba_notifier,
+};
+
+static int __init register_xen_amba_notifier(void)
+{
+	if (!xen_initial_domain() || acpi_disabled)
+		return 0;
+
+	return bus_register_notifier(&amba_bustype, &amba_device_nb);
+}
+
+arch_initcall(register_xen_amba_notifier);
+#endif
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d46839f51e73..e4db19e88ab1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -151,8 +151,6 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 static void balloon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 
-static void release_memory_resource(struct resource *resource);
-
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -248,6 +246,19 @@ static enum bp_state update_schedule(enum bp_state state)
 }
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static void release_memory_resource(struct resource *resource)
+{
+	if (!resource)
+		return;
+
+	/*
+	 * No need to reset region to identity mapped since we now
+	 * know that no I/O can be in this region
+	 */
+	release_resource(resource);
+	kfree(resource);
+}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
 	struct resource *res;
@@ -286,19 +297,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 	return res;
 }
 
-static void release_memory_resource(struct resource *resource)
-{
-	if (!resource)
-		return;
-
-	/*
-	 * No need to reset region to identity mapped since we now
-	 * know that no I/O can be in this region
-	 */
-	release_resource(resource);
-	kfree(resource);
-}
-
 static enum bp_state reserve_additional_memory(void)
 {
 	long credit;
diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index e9d2135445c1..22f71ffd3406 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
@@ -38,7 +38,7 @@
 
 #define efi_data(op)	(op.u.efi_runtime_call)
 
-static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
 	struct xen_platform_op op = INIT_EFI_OP(get_time);
 
@@ -59,8 +59,9 @@ static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_get_time);
 
-static efi_status_t xen_efi_set_time(efi_time_t *tm)
+efi_status_t xen_efi_set_time(efi_time_t *tm)
 {
 	struct xen_platform_op op = INIT_EFI_OP(set_time);
 
@@ -72,10 +73,10 @@ static efi_status_t xen_efi_set_time(efi_time_t *tm)
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_set_time);
 
-static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled,
-					    efi_bool_t *pending,
-					    efi_time_t *tm)
+efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
+				     efi_time_t *tm)
 {
 	struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time);
 
@@ -95,8 +96,9 @@ static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled,
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_get_wakeup_time);
 
-static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 {
 	struct xen_platform_op op = INIT_EFI_OP(set_wakeup_time);
 
@@ -113,12 +115,11 @@ static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_set_wakeup_time);
 
-static efi_status_t xen_efi_get_variable(efi_char16_t *name,
-					 efi_guid_t *vendor,
-					 u32 *attr,
-					 unsigned long *data_size,
-					 void *data)
+efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
+				  u32 *attr, unsigned long *data_size,
+				  void *data)
 {
 	struct xen_platform_op op = INIT_EFI_OP(get_variable);
 
@@ -138,10 +139,11 @@ static efi_status_t xen_efi_get_variable(efi_char16_t *name,
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_get_variable);
 
-static efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
-					      efi_char16_t *name,
-					      efi_guid_t *vendor)
+efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
+				       efi_char16_t *name,
+				       efi_guid_t *vendor)
 {
 	struct xen_platform_op op = INIT_EFI_OP(get_next_variable_name);
 
@@ -161,12 +163,11 @@ static efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_get_next_variable);
 
-static efi_status_t xen_efi_set_variable(efi_char16_t *name,
-					 efi_guid_t *vendor,
-					 u32 attr,
-					 unsigned long data_size,
-					 void *data)
+efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
+				 u32 attr, unsigned long data_size,
+				 void *data)
 {
 	struct xen_platform_op op = INIT_EFI_OP(set_variable);
 
@@ -183,11 +184,11 @@ static efi_status_t xen_efi_set_variable(efi_char16_t *name,
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_set_variable);
 
-static efi_status_t xen_efi_query_variable_info(u32 attr,
-						u64 *storage_space,
-						u64 *remaining_space,
-						u64 *max_variable_size)
+efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
+					 u64 *remaining_space,
+					 u64 *max_variable_size)
 {
 	struct xen_platform_op op = INIT_EFI_OP(query_variable_info);
 
@@ -205,8 +206,9 @@ static efi_status_t xen_efi_query_variable_info(u32 attr,
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_query_variable_info);
 
-static efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
+efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
 {
 	struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count);
 
@@ -217,10 +219,10 @@ static efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_get_next_high_mono_count);
 
-static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
-					   unsigned long count,
-					   unsigned long sg_list)
+efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
+				    unsigned long count, unsigned long sg_list)
 {
 	struct xen_platform_op op = INIT_EFI_OP(update_capsule);
 
@@ -237,11 +239,11 @@ static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
 
 	return efi_data(op).status;
 }
+EXPORT_SYMBOL_GPL(xen_efi_update_capsule);
 
-static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
-					       unsigned long count,
-					       u64 *max_size,
-					       int *reset_type)
+efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
+					unsigned long count, u64 *max_size,
+					int *reset_type)
 {
 	struct xen_platform_op op = INIT_EFI_OP(query_capsule_capabilities);
 
@@ -260,111 +262,4 @@ static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 
 	return efi_data(op).status;
 }
-
-static efi_char16_t vendor[100] __initdata;
-
-static efi_system_table_t efi_systab_xen __initdata = {
-	.hdr = {
-		.signature	= EFI_SYSTEM_TABLE_SIGNATURE,
-		.revision	= 0, /* Initialized later. */
-		.headersize	= 0, /* Ignored by Linux Kernel. */
-		.crc32		= 0, /* Ignored by Linux Kernel. */
-		.reserved	= 0
-	},
-	.fw_vendor	= EFI_INVALID_TABLE_ADDR, /* Initialized later. */
-	.fw_revision	= 0,			  /* Initialized later. */
-	.con_in_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
-	.con_in		= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
-	.con_out_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
-	.con_out	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
-	.stderr_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
-	.stderr		= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
-	.runtime	= (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR,
-						  /* Not used under Xen. */
-	.boottime	= (efi_boot_services_t *)EFI_INVALID_TABLE_ADDR,
-						  /* Not used under Xen. */
-	.nr_tables	= 0,			  /* Initialized later. */
-	.tables		= EFI_INVALID_TABLE_ADDR  /* Initialized later. */
-};
-
-static const struct efi efi_xen __initconst = {
-	.systab                   = NULL, /* Initialized later. */
-	.runtime_version	  = 0,    /* Initialized later. */
-	.mps                      = EFI_INVALID_TABLE_ADDR,
-	.acpi                     = EFI_INVALID_TABLE_ADDR,
-	.acpi20                   = EFI_INVALID_TABLE_ADDR,
-	.smbios                   = EFI_INVALID_TABLE_ADDR,
-	.smbios3                  = EFI_INVALID_TABLE_ADDR,
-	.sal_systab               = EFI_INVALID_TABLE_ADDR,
-	.boot_info                = EFI_INVALID_TABLE_ADDR,
-	.hcdp                     = EFI_INVALID_TABLE_ADDR,
-	.uga                      = EFI_INVALID_TABLE_ADDR,
-	.uv_systab                = EFI_INVALID_TABLE_ADDR,
-	.fw_vendor                = EFI_INVALID_TABLE_ADDR,
-	.runtime                  = EFI_INVALID_TABLE_ADDR,
-	.config_table             = EFI_INVALID_TABLE_ADDR,
-	.get_time                 = xen_efi_get_time,
-	.set_time                 = xen_efi_set_time,
-	.get_wakeup_time          = xen_efi_get_wakeup_time,
-	.set_wakeup_time          = xen_efi_set_wakeup_time,
-	.get_variable             = xen_efi_get_variable,
-	.get_next_variable        = xen_efi_get_next_variable,
-	.set_variable             = xen_efi_set_variable,
-	.query_variable_info      = xen_efi_query_variable_info,
-	.update_capsule           = xen_efi_update_capsule,
-	.query_capsule_caps       = xen_efi_query_capsule_caps,
-	.get_next_high_mono_count = xen_efi_get_next_high_mono_count,
-	.reset_system             = NULL, /* Functionality provided by Xen. */
-	.set_virtual_address_map  = NULL, /* Not used under Xen. */
-	.flags			  = 0     /* Initialized later. */
-};
-
-efi_system_table_t __init *xen_efi_probe(void)
-{
-	struct xen_platform_op op = {
-		.cmd = XENPF_firmware_info,
-		.u.firmware_info = {
-			.type = XEN_FW_EFI_INFO,
-			.index = XEN_FW_EFI_CONFIG_TABLE
-		}
-	};
-	union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info;
-
-	if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0)
-		return NULL;
-
-	/* Here we know that Xen runs on EFI platform. */
-
-	efi = efi_xen;
-
-	efi_systab_xen.tables = info->cfg.addr;
-	efi_systab_xen.nr_tables = info->cfg.nent;
-
-	op.cmd = XENPF_firmware_info;
-	op.u.firmware_info.type = XEN_FW_EFI_INFO;
-	op.u.firmware_info.index = XEN_FW_EFI_VENDOR;
-	info->vendor.bufsz = sizeof(vendor);
-	set_xen_guest_handle(info->vendor.name, vendor);
-
-	if (HYPERVISOR_platform_op(&op) == 0) {
-		efi_systab_xen.fw_vendor = __pa_symbol(vendor);
-		efi_systab_xen.fw_revision = info->vendor.revision;
-	} else
-		efi_systab_xen.fw_vendor = __pa_symbol(L"UNKNOWN");
-
-	op.cmd = XENPF_firmware_info;
-	op.u.firmware_info.type = XEN_FW_EFI_INFO;
-	op.u.firmware_info.index = XEN_FW_EFI_VERSION;
-
-	if (HYPERVISOR_platform_op(&op) == 0)
-		efi_systab_xen.hdr.revision = info->version;
-
-	op.cmd = XENPF_firmware_info;
-	op.u.firmware_info.type = XEN_FW_EFI_INFO;
-	op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION;
-
-	if (HYPERVISOR_platform_op(&op) == 0)
-		efi.runtime_version = info->version;
-
-	return &efi_systab_xen;
-}
+EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 71d49a95f8c0..d5dbdb9d24d8 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -895,7 +895,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 		irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
 					      handle_percpu_irq, "ipi");
 
-		bind_ipi.vcpu = cpu;
+		bind_ipi.vcpu = xen_vcpu_nr(cpu);
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
 						&bind_ipi) != 0)
 			BUG();
@@ -991,7 +991,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
 						      handle_edge_irq, "virq");
 
 		bind_virq.virq = virq;
-		bind_virq.vcpu = cpu;
+		bind_virq.vcpu = xen_vcpu_nr(cpu);
 		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
 						&bind_virq);
 		if (ret == 0)
@@ -1211,7 +1211,8 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 
 #ifdef CONFIG_X86
 	if (unlikely(vector == XEN_NMI_VECTOR)) {
-		int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
+		int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
+					     NULL);
 		if (rc < 0)
 			printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
 		return;
@@ -1318,7 +1319,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 
 	/* Send future instances of this interrupt to other vcpu. */
 	bind_vcpu.port = evtchn;
-	bind_vcpu.vcpu = tcpu;
+	bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
 
 	/*
 	 * Mask the event while changing the VCPU binding to prevent
@@ -1458,7 +1459,7 @@ static void restore_cpu_virqs(unsigned int cpu)
 
 		/* Get a new binding from Xen. */
 		bind_virq.virq = virq;
-		bind_virq.vcpu = cpu;
+		bind_virq.vcpu = xen_vcpu_nr(cpu);
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
 						&bind_virq) != 0)
 			BUG();
@@ -1482,7 +1483,7 @@ static void restore_cpu_ipis(unsigned int cpu)
 		BUG_ON(ipi_from_irq(irq) != ipi);
 
 		/* Get a new binding from Xen. */
-		bind_ipi.vcpu = cpu;
+		bind_ipi.vcpu = xen_vcpu_nr(cpu);
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
 						&bind_ipi) != 0)
 			BUG();
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 9289a17712e2..266c2c733039 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -113,7 +113,7 @@ static int init_control_block(int cpu,
 
 	init_control.control_gfn = virt_to_gfn(control_block);
 	init_control.offset      = 0;
-	init_control.vcpu        = cpu;
+	init_control.vcpu        = xen_vcpu_nr(cpu);
 
 	return HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
 }
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index f4edd6df3df2..e8c7f09d01be 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -55,6 +55,7 @@
 #include <xen/xen.h>
 #include <xen/events.h>
 #include <xen/evtchn.h>
+#include <xen/xen-ops.h>
 #include <asm/xen/hypervisor.h>
 
 struct per_user_data {
@@ -73,8 +74,12 @@ struct per_user_data {
 	wait_queue_head_t evtchn_wait;
 	struct fasync_struct *evtchn_async_queue;
 	const char *name;
+
+	domid_t restrict_domid;
 };
 
+#define UNRESTRICTED_DOMID ((domid_t)-1)
+
 struct user_evtchn {
 	struct rb_node node;
 	struct per_user_data *user;
@@ -443,12 +448,16 @@ static long evtchn_ioctl(struct file *file,
 		struct ioctl_evtchn_bind_virq bind;
 		struct evtchn_bind_virq bind_virq;
 
+		rc = -EACCES;
+		if (u->restrict_domid != UNRESTRICTED_DOMID)
+			break;
+
 		rc = -EFAULT;
 		if (copy_from_user(&bind, uarg, sizeof(bind)))
 			break;
 
 		bind_virq.virq = bind.virq;
-		bind_virq.vcpu = 0;
+		bind_virq.vcpu = xen_vcpu_nr(0);
 		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
 						 &bind_virq);
 		if (rc != 0)
@@ -468,6 +477,11 @@ static long evtchn_ioctl(struct file *file,
 		if (copy_from_user(&bind, uarg, sizeof(bind)))
 			break;
 
+		rc = -EACCES;
+		if (u->restrict_domid != UNRESTRICTED_DOMID &&
+		    u->restrict_domid != bind.remote_domain)
+			break;
+
 		bind_interdomain.remote_dom  = bind.remote_domain;
 		bind_interdomain.remote_port = bind.remote_port;
 		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
@@ -485,6 +499,10 @@ static long evtchn_ioctl(struct file *file,
 		struct ioctl_evtchn_bind_unbound_port bind;
 		struct evtchn_alloc_unbound alloc_unbound;
 
+		rc = -EACCES;
+		if (u->restrict_domid != UNRESTRICTED_DOMID)
+			break;
+
 		rc = -EFAULT;
 		if (copy_from_user(&bind, uarg, sizeof(bind)))
 			break;
@@ -553,6 +571,27 @@ static long evtchn_ioctl(struct file *file,
 		break;
 	}
 
+	case IOCTL_EVTCHN_RESTRICT_DOMID: {
+		struct ioctl_evtchn_restrict_domid ierd;
+
+		rc = -EACCES;
+		if (u->restrict_domid != UNRESTRICTED_DOMID)
+			break;
+
+		rc = -EFAULT;
+		if (copy_from_user(&ierd, uarg, sizeof(ierd)))
+		    break;
+
+		rc = -EINVAL;
+		if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED)
+			break;
+
+		u->restrict_domid = ierd.domid;
+		rc = 0;
+
+		break;
+	}
+
 	default:
 		rc = -ENOSYS;
 		break;
@@ -601,6 +640,8 @@ static int evtchn_open(struct inode *inode, struct file *filp)
 	mutex_init(&u->ring_cons_mutex);
 	spin_lock_init(&u->ring_prod_lock);
 
+	u->restrict_domid = UNRESTRICTED_DOMID;
+
 	filp->private_data = u;
 
 	return nonseekable_open(inode, filp);
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 4547a91bca67..7a47c4c9fb1b 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -504,7 +504,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct gntalloc_file_private_data *priv = filp->private_data;
 	struct gntalloc_vma_private_data *vm_priv;
 	struct gntalloc_gref *gref;
-	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int count = vma_pages(vma);
 	int rv, i;
 
 	if (!(vma->vm_flags & VM_SHARED)) {
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 67939578cd6d..bb952121ea94 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -982,7 +982,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 {
 	struct gntdev_priv *priv = flip->private_data;
 	int index = vma->vm_pgoff;
-	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int count = vma_pages(vma);
 	struct grant_map *map;
 	int i, err = -EINVAL;
 
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index df2e6f783318..702040fe2001 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -582,7 +582,7 @@ static long privcmd_ioctl(struct file *file,
 static void privcmd_close(struct vm_area_struct *vma)
 {
 	struct page **pages = vma->vm_private_data;
-	int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int numpgs = vma_pages(vma);
 	int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT;
 	int rc;
 
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 71078425c9ea..ac5f23fcafc2 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -6,6 +6,7 @@
 #include <linux/math64.h>
 #include <linux/gfp.h>
 
+#include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -46,27 +47,31 @@ static u64 get64(const u64 *p)
 	return ret;
 }
 
-/*
- * Runstate accounting
- */
-void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
+static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
+					  unsigned int cpu)
 {
 	u64 state_time;
 	struct vcpu_runstate_info *state;
 
 	BUG_ON(preemptible());
 
-	state = this_cpu_ptr(&xen_runstate);
+	state = per_cpu_ptr(&xen_runstate, cpu);
 
-	/*
-	 * The runstate info is always updated by the hypervisor on
-	 * the current CPU, so there's no need to use anything
-	 * stronger than a compiler barrier when fetching it.
-	 */
 	do {
 		state_time = get64(&state->state_entry_time);
+		rmb();	/* Hypervisor might update data. */
 		*res = READ_ONCE(*state);
-	} while (get64(&state->state_entry_time) != state_time);
+		rmb();	/* Hypervisor might update data. */
+	} while (get64(&state->state_entry_time) != state_time ||
+		 (state_time & XEN_RUNSTATE_UPDATE));
+}
+
+/*
+ * Runstate accounting
+ */
+void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+	xen_get_runstate_snapshot_cpu(res, smp_processor_id());
 }
 
 /* return true when a vcpu could run but has no real cpu to run on */
@@ -75,6 +80,14 @@ bool xen_vcpu_stolen(int vcpu)
 	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
 }
 
+u64 xen_steal_clock(int cpu)
+{
+	struct vcpu_runstate_info state;
+
+	xen_get_runstate_snapshot_cpu(&state, cpu);
+	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
+}
+
 void xen_setup_runstate_info(int cpu)
 {
 	struct vcpu_register_runstate_memory_area area;
@@ -82,7 +95,20 @@ void xen_setup_runstate_info(int cpu)
 	area.addr.v = &per_cpu(xen_runstate, cpu);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
-			       cpu, &area))
+			       xen_vcpu_nr(cpu), &area))
 		BUG();
 }
 
+void __init xen_time_setup_guest(void)
+{
+	bool xen_runstate_remote;
+
+	xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
+					VMASST_TYPE_runstate_update_flag);
+
+	pv_time_ops.steal_clock = xen_steal_clock;
+
+	static_key_slow_inc(&paravirt_steal_enabled);
+	if (xen_runstate_remote)
+		static_key_slow_inc(&paravirt_steal_rq_enabled);
+}
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 076970a54f89..4ce10bcca18b 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -423,36 +423,7 @@ upload:
 
 	return 0;
 }
-static int __init check_prereq(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (!xen_initial_domain())
-		return -ENODEV;
-
-	if (!acpi_gbl_FADT.smi_command)
-		return -ENODEV;
-
-	if (c->x86_vendor == X86_VENDOR_INTEL) {
-		if (!cpu_has(c, X86_FEATURE_EST))
-			return -ENODEV;
 
-		return 0;
-	}
-	if (c->x86_vendor == X86_VENDOR_AMD) {
-		/* Copied from powernow-k8.h, can't include ../cpufreq/powernow
-		 * as we get compile warnings for the static functions.
-		 */
-#define CPUID_FREQ_VOLT_CAPABILITIES    0x80000007
-#define USE_HW_PSTATE                   0x00000080
-		u32 eax, ebx, ecx, edx;
-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-		if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE)
-			return -ENODEV;
-		return 0;
-	}
-	return -ENODEV;
-}
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance __percpu *acpi_perf_data;
 
@@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor_resume_nb = {
 static int __init xen_acpi_processor_init(void)
 {
 	unsigned int i;
-	int rc = check_prereq();
+	int rc;
 
-	if (rc)
-		return rc;
+	if (!xen_initial_domain())
+		return -ENODEV;
 
 	nr_acpi_bits = get_max_acpi_id() + 1;
 	acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 8e67336f8ddd..9e9286d0872e 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -148,7 +148,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
 	struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
 	const struct config_field_entry *cfg_entry;
 	const struct config_field *field;
-	int req_start, req_end, field_start, field_end;
+	int field_start, field_end;
 	/* if read fails for any reason, return 0
 	 * (as if device didn't respond) */
 	u32 value = 0, tmp_val;
@@ -178,13 +178,10 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
 	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
 		field = cfg_entry->field;
 
-		req_start = offset;
-		req_end = offset + size;
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		if (offset + size > field_start && field_end > offset) {
 			err = conf_space_read(dev, cfg_entry, field_start,
 					      &tmp_val);
 			if (err)
@@ -192,7 +189,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
 
 			value = merge_value(value, tmp_val,
 					    get_mask(field->size),
-					    field_start - req_start);
+					    field_start - offset);
 		}
 	}
 
@@ -212,7 +209,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 	const struct config_field_entry *cfg_entry;
 	const struct config_field *field;
 	u32 tmp_val;
-	int req_start, req_end, field_start, field_end;
+	int field_start, field_end;
 
 	if (unlikely(verbose_request))
 		printk(KERN_DEBUG
@@ -225,22 +222,17 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
 		field = cfg_entry->field;
 
-		req_start = offset;
-		req_end = offset + size;
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
-			tmp_val = 0;
-
-			err = xen_pcibk_config_read(dev, field_start,
-						  field->size, &tmp_val);
+		if (offset + size > field_start && field_end > offset) {
+			err = conf_space_read(dev, cfg_entry, field_start,
+					      &tmp_val);
 			if (err)
 				break;
 
 			tmp_val = merge_value(tmp_val, value, get_mask(size),
-					      req_start - field_start);
+					      offset - field_start);
 
 			err = conf_space_write(dev, cfg_entry, field_start,
 					       tmp_val);
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index ad3d17d29c81..5fbfd9cfb6d6 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -145,7 +145,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
 	/* A write to obtain the length must happen as a 32-bit write.
 	 * This does not (yet) support writing individual bytes
 	 */
-	if (value == ~PCI_ROM_ADDRESS_ENABLE)
+	if ((value | ~PCI_ROM_ADDRESS_MASK) == ~0U)
 		bar->which = 1;
 	else {
 		u32 tmpval;
@@ -209,54 +209,35 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
 	return 0;
 }
 
-static inline void read_dev_bar(struct pci_dev *dev,
-				struct pci_bar_info *bar_info, int offset,
-				u32 len_mask)
+static void *bar_init(struct pci_dev *dev, int offset)
 {
-	int	pos;
-	struct resource	*res = dev->resource;
+	unsigned int pos;
+	const struct resource *res = dev->resource;
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
+
+	if (!bar)
+		return ERR_PTR(-ENOMEM);
 
 	if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
 		pos = PCI_ROM_RESOURCE;
 	else {
 		pos = (offset - PCI_BASE_ADDRESS_0) / 4;
-		if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
-				PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
-			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
-				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
-			bar_info->val = res[pos - 1].start >> 32;
-			bar_info->len_val = res[pos - 1].end >> 32;
-			return;
+		if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64)) {
+			bar->val = res[pos - 1].start >> 32;
+			bar->len_val = -resource_size(&res[pos - 1]) >> 32;
+			return bar;
 		}
 	}
 
-	bar_info->val = res[pos].start |
-			(res[pos].flags & PCI_REGION_FLAG_MASK);
-	bar_info->len_val = resource_size(&res[pos]);
-}
-
-static void *bar_init(struct pci_dev *dev, int offset)
-{
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
-
-	if (!bar)
-		return ERR_PTR(-ENOMEM);
+	if (!res[pos].flags ||
+	    (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET |
+			       IORESOURCE_BUSY)))
+		return bar;
 
-	read_dev_bar(dev, bar, offset, ~0);
-	bar->which = 0;
-
-	return bar;
-}
-
-static void *rom_init(struct pci_dev *dev, int offset)
-{
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
-
-	if (!bar)
-		return ERR_PTR(-ENOMEM);
-
-	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
-	bar->which = 0;
+	bar->val = res[pos].start |
+		   (res[pos].flags & PCI_REGION_FLAG_MASK);
+	bar->len_val = -resource_size(&res[pos]) |
+		       (res[pos].flags & PCI_REGION_FLAG_MASK);
 
 	return bar;
 }
@@ -379,7 +360,7 @@ static const struct config_field header_common[] = {
 	{						\
 	.offset     = reg_offset,			\
 	.size       = 4,				\
-	.init       = rom_init,				\
+	.init       = bar_init,				\
 	.reset      = bar_reset,			\
 	.release    = bar_release,			\
 	.u.dw.read  = bar_read,				\
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 4d529f3e40df..7af369b6aaa2 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -55,7 +55,6 @@ struct xen_pcibk_dev_data {
 
 /* Used by XenBus and xen_pcibk_ops.c */
 extern wait_queue_head_t xen_pcibk_aer_wait_queue;
-extern struct workqueue_struct *xen_pcibk_wq;
 /* Used by pcistub.c and conf_space_quirks.c */
 extern struct list_head xen_pcibk_quirks;
 
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 2f19dd7553e6..f8c77751f330 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -310,7 +310,7 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
 	 * already processing a request */
 	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
 	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
-		queue_work(xen_pcibk_wq, &pdev->op_work);
+		schedule_work(&pdev->op_work);
 	}
 	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
 	sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index c252eb3f0176..5ce878c51d03 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -17,7 +17,6 @@
 #include "pciback.h"
 
 #define INVALID_EVTCHN_IRQ  (-1)
-struct workqueue_struct *xen_pcibk_wq;
 
 static bool __read_mostly passthrough;
 module_param(passthrough, bool, S_IRUGO);
@@ -76,8 +75,7 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
 	/* If the driver domain started an op, make sure we complete it
 	 * before releasing the shared memory */
 
-	/* Note, the workqueue does not use spinlocks at all.*/
-	flush_workqueue(xen_pcibk_wq);
+	flush_work(&pdev->op_work);
 
 	if (pdev->sh_info != NULL) {
 		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
@@ -733,11 +731,6 @@ const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
 
 int __init xen_pcibk_xenbus_register(void)
 {
-	xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
-	if (!xen_pcibk_wq) {
-		pr_err("%s: create xen_pciback_workqueue failed\n", __func__);
-		return -EFAULT;
-	}
 	xen_pcibk_backend = &xen_pcibk_vpci_backend;
 	if (passthrough)
 		xen_pcibk_backend = &xen_pcibk_passthrough_backend;
@@ -747,6 +740,5 @@ int __init xen_pcibk_xenbus_register(void)
 
 void __exit xen_pcibk_xenbus_unregister(void)
 {
-	destroy_workqueue(xen_pcibk_wq);
 	xenbus_unregister_driver(&xen_pcibk_driver);
 }
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 53a085fca00c..66620713242a 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -195,7 +195,7 @@ static void selfballoon_process(struct work_struct *work)
 				MB2PAGES(selfballoon_reserved_mb);
 #ifdef CONFIG_FRONTSWAP
 		/* allow space for frontswap pages to be repatriated */
-		if (frontswap_selfshrinking && frontswap_enabled)
+		if (frontswap_selfshrinking)
 			goal_pages += frontswap_curr_pages();
 #endif
 		if (cur_pages > goal_pages)
@@ -230,7 +230,7 @@ static void selfballoon_process(struct work_struct *work)
 		reset_timer = true;
 	}
 #ifdef CONFIG_FRONTSWAP
-	if (frontswap_selfshrinking && frontswap_enabled) {
+	if (frontswap_selfshrinking) {
 		frontswap_selfshrink();
 		reset_timer = true;
 	}
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index cacf30d14747..7487971f9f78 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -316,11 +316,18 @@ static int xenbus_write_transaction(unsigned msg_type,
 			rc = -ENOMEM;
 			goto out;
 		}
+	} else {
+		list_for_each_entry(trans, &u->transactions, list)
+			if (trans->handle.id == u->u.msg.tx_id)
+				break;
+		if (&trans->list == &u->transactions)
+			return -ESRCH;
 	}
 
 	reply = xenbus_dev_request_and_reply(&u->u.msg);
 	if (IS_ERR(reply)) {
-		kfree(trans);
+		if (msg_type == XS_TRANSACTION_START)
+			kfree(trans);
 		rc = PTR_ERR(reply);
 		goto out;
 	}
@@ -333,12 +340,7 @@ static int xenbus_write_transaction(unsigned msg_type,
 			list_add(&trans->list, &u->transactions);
 		}
 	} else if (u->u.msg.type == XS_TRANSACTION_END) {
-		list_for_each_entry(trans, &u->transactions, list)
-			if (trans->handle.id == u->u.msg.tx_id)
-				break;
-		BUG_ON(&trans->list == &u->transactions);
 		list_del(&trans->list);
-
 		kfree(trans);
 	}
 
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index bcb53bdc469c..611a23119675 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -31,7 +31,6 @@
 #include "xenbus_probe.h"
 
 
-static struct workqueue_struct *xenbus_frontend_wq;
 
 /* device/<type>/<id> => <type>-<id> */
 static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
@@ -109,13 +108,7 @@ static int xenbus_frontend_dev_resume(struct device *dev)
 	if (xen_store_domain_type == XS_LOCAL) {
 		struct xenbus_device *xdev = to_xenbus_device(dev);
 
-		if (!xenbus_frontend_wq) {
-			pr_err("%s: no workqueue to process delayed resume\n",
-			       xdev->nodename);
-			return -EFAULT;
-		}
-
-		queue_work(xenbus_frontend_wq, &xdev->work);
+		schedule_work(&xdev->work);
 
 		return 0;
 	}
@@ -485,12 +478,6 @@ static int __init xenbus_probe_frontend_init(void)
 
 	register_xenstore_notifier(&xenstore_notifier);
 
-	if (xen_store_domain_type == XS_LOCAL) {
-		xenbus_frontend_wq = create_workqueue("xenbus_frontend");
-		if (!xenbus_frontend_wq)
-			pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n");
-	}
-
 	return 0;
 }
 subsys_initcall(xenbus_probe_frontend_init);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 374b12af8812..22f7cd711c57 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -232,10 +232,10 @@ static void transaction_resume(void)
 void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 {
 	void *ret;
-	struct xsd_sockmsg req_msg = *msg;
+	enum xsd_sockmsg_type type = msg->type;
 	int err;
 
-	if (req_msg.type == XS_TRANSACTION_START)
+	if (type == XS_TRANSACTION_START)
 		transaction_start();
 
 	mutex_lock(&xs_state.request_mutex);
@@ -249,12 +249,8 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 
 	mutex_unlock(&xs_state.request_mutex);
 
-	if (IS_ERR(ret))
-		return ret;
-
 	if ((msg->type == XS_TRANSACTION_END) ||
-	    ((req_msg.type == XS_TRANSACTION_START) &&
-	     (msg->type == XS_ERROR)))
+	    ((type == XS_TRANSACTION_START) && (msg->type == XS_ERROR)))
 		transaction_end();
 
 	return ret;
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index 5063c5e796b7..23f1387b3ef7 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -29,6 +29,8 @@
  */
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -37,6 +39,7 @@
 #include <xen/page.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/memory.h>
+#include <xen/balloon.h>
 
 typedef void (*xen_gfn_fn_t)(unsigned long gfn, void *data);
 
@@ -185,3 +188,77 @@ int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range);
+
+struct map_balloon_pages {
+	xen_pfn_t *pfns;
+	unsigned int idx;
+};
+
+static void setup_balloon_gfn(unsigned long gfn, void *data)
+{
+	struct map_balloon_pages *info = data;
+
+	info->pfns[info->idx++] = gfn;
+}
+
+/**
+ * xen_xlate_map_ballooned_pages - map a new set of ballooned pages
+ * @gfns: returns the array of corresponding GFNs
+ * @virt: returns the virtual address of the mapped region
+ * @nr_grant_frames: number of GFNs
+ * @return 0 on success, error otherwise
+ *
+ * This allocates a set of ballooned pages and maps them into the
+ * kernel's address space.
+ */
+int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt,
+					 unsigned long nr_grant_frames)
+{
+	struct page **pages;
+	xen_pfn_t *pfns;
+	void *vaddr;
+	struct map_balloon_pages data;
+	int rc;
+	unsigned long nr_pages;
+
+	BUG_ON(nr_grant_frames == 0);
+	nr_pages = DIV_ROUND_UP(nr_grant_frames, XEN_PFN_PER_PAGE);
+	pages = kcalloc(nr_pages, sizeof(pages[0]), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL);
+	if (!pfns) {
+		kfree(pages);
+		return -ENOMEM;
+	}
+	rc = alloc_xenballooned_pages(nr_pages, pages);
+	if (rc) {
+		pr_warn("%s Couldn't balloon alloc %ld pages rc:%d\n", __func__,
+			nr_pages, rc);
+		kfree(pages);
+		kfree(pfns);
+		return rc;
+	}
+
+	data.pfns = pfns;
+	data.idx = 0;
+	xen_for_each_gfn(pages, nr_grant_frames, setup_balloon_gfn, &data);
+
+	vaddr = vmap(pages, nr_pages, 0, PAGE_KERNEL);
+	if (!vaddr) {
+		pr_warn("%s Couldn't map %ld pages rc:%d\n", __func__,
+			nr_pages, rc);
+		free_xenballooned_pages(nr_pages, pages);
+		kfree(pages);
+		kfree(pfns);
+		return -ENOMEM;
+	}
+	kfree(pages);
+
+	*gfns = pfns;
+	*virt = vaddr;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b84c291ba1eb..d7b78d531e63 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 					v9fs_proto_dotu(v9ses));
 	fid = file->private_data;
 	if (!fid) {
-		fid = v9fs_fid_clone(file->f_path.dentry);
+		fid = v9fs_fid_clone(file_dentry(file));
 		if (IS_ERR(fid))
 			return PTR_ERR(fid);
 
@@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		 * because we want write after unlink usecase
 		 * to work.
 		 */
-		fid = v9fs_writeback_fid(file->f_path.dentry);
+		fid = v9fs_writeback_fid(file_dentry(file));
 		if (IS_ERR(fid)) {
 			err = PTR_ERR(fid);
 			mutex_unlock(&v9inode->v_mutex);
@@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
 		 * because we want write after unlink usecase
 		 * to work.
 		 */
-		fid = v9fs_writeback_fid(filp->f_path.dentry);
+		fid = v9fs_writeback_fid(file_dentry(filp));
 		if (IS_ERR(fid)) {
 			retval = PTR_ERR(fid);
 			mutex_unlock(&v9inode->v_mutex);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f4645c515262..e2e7c749925a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -853,7 +853,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct p9_fid *fid, *inode_fid;
 	struct dentry *res = NULL;
 
-	if (d_unhashed(dentry)) {
+	if (d_in_lookup(dentry)) {
 		res = v9fs_vfs_lookup(dir, dentry, 0);
 		if (IS_ERR(res))
 			return PTR_ERR(res);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a34702c998f5..1b51eaa5e2dd 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -254,7 +254,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	struct posix_acl *pacl = NULL, *dacl = NULL;
 	struct dentry *res = NULL;
 
-	if (d_unhashed(dentry)) {
+	if (d_in_lookup(dentry)) {
 		res = v9fs_vfs_lookup(dir, dentry, 0);
 		if (IS_ERR(res))
 			return PTR_ERR(res);
diff --git a/fs/Kconfig b/fs/Kconfig
index b8fcb416be72..4524916fa200 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -10,6 +10,9 @@ config DCACHE_WORD_ACCESS
 
 if BLOCK
 
+config FS_IOMAP
+	bool
+
 source "fs/ext2/Kconfig"
 source "fs/ext4/Kconfig"
 source "fs/jbd2/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 85b6e13b62d3..ed2b63257ba9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_COREDUMP)		+= coredump.o
 obj-$(CONFIG_SYSCTL)		+= drop_caches.o
 
 obj-$(CONFIG_FHANDLE)		+= fhandle.o
+obj-$(CONFIG_FS_IOMAP)		+= iomap.o
 
 obj-y				+= quota/
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 63cd9f939f19..4832de84d52c 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -85,18 +85,14 @@ int afs_open_socket(void)
 
 	skb_queue_head_init(&afs_incoming_calls);
 
+	ret = -ENOMEM;
 	afs_async_calls = create_singlethread_workqueue("kafsd");
-	if (!afs_async_calls) {
-		_leave(" = -ENOMEM [wq]");
-		return -ENOMEM;
-	}
+	if (!afs_async_calls)
+		goto error_0;
 
 	ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
-	if (ret < 0) {
-		destroy_workqueue(afs_async_calls);
-		_leave(" = %d [socket]", ret);
-		return ret;
-	}
+	if (ret < 0)
+		goto error_1;
 
 	socket->sk->sk_allocation = GFP_NOFS;
 
@@ -111,18 +107,26 @@ int afs_open_socket(void)
 	       sizeof(srx.transport.sin.sin_addr));
 
 	ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
-	if (ret < 0) {
-		sock_release(socket);
-		destroy_workqueue(afs_async_calls);
-		_leave(" = %d [bind]", ret);
-		return ret;
-	}
+	if (ret < 0)
+		goto error_2;
+
+	ret = kernel_listen(socket, INT_MAX);
+	if (ret < 0)
+		goto error_2;
 
 	rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
 
 	afs_socket = socket;
 	_leave(" = 0");
 	return 0;
+
+error_2:
+	sock_release(socket);
+error_1:
+	destroy_workqueue(afs_async_calls);
+error_0:
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f0d268b97d19..a439548de785 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -70,9 +70,13 @@ struct autofs_info {
 };
 
 #define AUTOFS_INF_EXPIRING	(1<<0) /* dentry in the process of expiring */
-#define AUTOFS_INF_NO_RCU	(1<<1) /* the dentry is being considered
+#define AUTOFS_INF_WANT_EXPIRE	(1<<1) /* the dentry is being considered
 					* for expiry, so RCU_walk is
-					* not permitted
+					* not permitted.  If it progresses to
+					* actual expiry attempt, the flag is
+					* not cleared when EXPIRING is set -
+					* in that case it gets cleared only
+					* when it comes to clearing EXPIRING.
 					*/
 #define AUTOFS_INF_PENDING	(1<<2) /* dentry pending mount */
 
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 9510d8d2e9cd..b493909e7492 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -316,19 +316,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
 	if (ino->flags & AUTOFS_INF_PENDING)
 		goto out;
 	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
-		ino->flags |= AUTOFS_INF_NO_RCU;
+		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
 		spin_lock(&sbi->fs_lock);
 		if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
 			ino->flags |= AUTOFS_INF_EXPIRING;
-			smp_mb();
-			ino->flags &= ~AUTOFS_INF_NO_RCU;
 			init_completion(&ino->expire_complete);
 			spin_unlock(&sbi->fs_lock);
 			return root;
 		}
-		ino->flags &= ~AUTOFS_INF_NO_RCU;
+		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
 	}
 out:
 	spin_unlock(&sbi->fs_lock);
@@ -446,7 +444,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	while ((dentry = get_next_positive_subdir(dentry, root))) {
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
-		if (ino->flags & AUTOFS_INF_NO_RCU)
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			expired = NULL;
 		else
 			expired = should_expire(dentry, mnt, timeout, how);
@@ -455,7 +453,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			continue;
 		}
 		ino = autofs4_dentry_ino(expired);
-		ino->flags |= AUTOFS_INF_NO_RCU;
+		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
 		spin_lock(&sbi->fs_lock);
@@ -465,7 +463,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			goto found;
 		}
 
-		ino->flags &= ~AUTOFS_INF_NO_RCU;
+		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
 		if (expired != dentry)
 			dput(expired);
 		spin_unlock(&sbi->fs_lock);
@@ -475,17 +473,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 found:
 	pr_debug("returning %p %pd\n", expired, expired);
 	ino->flags |= AUTOFS_INF_EXPIRING;
-	smp_mb();
-	ino->flags &= ~AUTOFS_INF_NO_RCU;
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
-	spin_lock(&sbi->lookup_lock);
-	spin_lock(&expired->d_parent->d_lock);
-	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
-	list_move(&expired->d_parent->d_subdirs, &expired->d_child);
-	spin_unlock(&expired->d_lock);
-	spin_unlock(&expired->d_parent->d_lock);
-	spin_unlock(&sbi->lookup_lock);
 	return expired;
 }
 
@@ -496,7 +485,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 	int status;
 
 	/* Block on any pending expire */
-	if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)))
+	if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
 		return 0;
 	if (rcu_walk)
 		return -ECHILD;
@@ -554,7 +543,7 @@ int autofs4_expire_run(struct super_block *sb,
 	ino = autofs4_dentry_ino(dentry);
 	/* avoid rapid-fire expire attempts if expiry fails */
 	ino->last_used = now;
-	ino->flags &= ~AUTOFS_INF_EXPIRING;
+	ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
 	complete_all(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 
@@ -583,7 +572,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		spin_lock(&sbi->fs_lock);
 		/* avoid rapid-fire expire attempts if expiry fails */
 		ino->last_used = now;
-		ino->flags &= ~AUTOFS_INF_EXPIRING;
+		ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
 		complete_all(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 78bd80298528..3767f6641af1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -458,7 +458,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 		 */
 		struct inode *inode;
 
-		if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			return 0;
 		if (d_mountpoint(dentry))
 			return 0;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 0146d911f468..631f1554c87b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi,
 	set_fs(KERNEL_DS);
 
 	mutex_lock(&sbi->pipe_mutex);
-	wr = __vfs_write(file, data, bytes, &file->f_pos);
-	while (bytes && wr) {
+	while (bytes) {
+		wr = __vfs_write(file, data, bytes, &file->f_pos);
+		if (wr <= 0)
+			break;
 		data += wr;
 		bytes -= wr;
-		wr = __vfs_write(file, data, bytes, &file->f_pos);
 	}
 	mutex_unlock(&sbi->pipe_mutex);
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71ccab1d22c6..d012be4ab977 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -493,7 +493,7 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax)
 
 	if (size < 0)
 		return size;
-	if (!ops->direct_access)
+	if (!blk_queue_dax(bdev_get_queue(bdev)) || !ops->direct_access)
 		return -EOPNOTSUPP;
 	if ((sector + DIV_ROUND_UP(size, 512)) >
 					part_nr_sects_read(bdev->bd_part))
@@ -1287,7 +1287,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 		bdev->bd_disk = disk;
 		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
-		if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+		if (IS_ENABLED(CONFIG_BLK_DEV_DAX) &&
+		    blk_queue_dax(disk->queue))
 			bdev->bd_inode->i_flags = S_DAX;
 		else
 			bdev->bd_inode->i_flags = 0;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 7706c8dc5fa6..5d5cae05818d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1673,6 +1673,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
 		}
 		bio->bi_bdev = block_ctx->dev->bdev;
 		bio->bi_iter.bi_sector = dev_bytenr >> 9;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 
 		for (j = i; j < num_pages; j++) {
 			ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1685,7 +1686,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
 			       "btrfsic: error, failed to add a single page!\n");
 			return -1;
 		}
-		if (submit_bio_wait(READ, bio)) {
+		if (submit_bio_wait(bio)) {
 			printk(KERN_INFO
 			       "btrfsic: read error at logical %llu dev %s!\n",
 			       block_ctx->start, block_ctx->dev->name);
@@ -2206,7 +2207,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 			       block->dev_bytenr, block->mirror_num);
 		next_block = block->next_in_same_bio;
 		block->iodone_w_error = iodone_w_error;
-		if (block->submit_bio_bh_rw & REQ_FLUSH) {
+		if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
 			dev_state->last_flush_gen++;
 			if ((dev_state->state->print_mask &
 			     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
@@ -2242,7 +2243,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
 		       block->dev_bytenr, block->mirror_num);
 
 	block->iodone_w_error = iodone_w_error;
-	if (block->submit_bio_bh_rw & REQ_FLUSH) {
+	if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
 		dev_state->last_flush_gen++;
 		if ((dev_state->state->print_mask &
 		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
@@ -2855,12 +2856,12 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
 	return ds;
 }
 
-int btrfsic_submit_bh(int rw, struct buffer_head *bh)
+int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh)
 {
 	struct btrfsic_dev_state *dev_state;
 
 	if (!btrfsic_is_initialized)
-		return submit_bh(rw, bh);
+		return submit_bh(op, op_flags, bh);
 
 	mutex_lock(&btrfsic_mutex);
 	/* since btrfsic_submit_bh() might also be called before
@@ -2869,26 +2870,26 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
 
 	/* Only called to write the superblock (incl. FLUSH/FUA) */
 	if (NULL != dev_state &&
-	    (rw & WRITE) && bh->b_size > 0) {
+	    (op == REQ_OP_WRITE) && bh->b_size > 0) {
 		u64 dev_bytenr;
 
 		dev_bytenr = 4096 * bh->b_blocknr;
 		if (dev_state->state->print_mask &
 		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
 			printk(KERN_INFO
-			       "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
-			       " size=%zu, data=%p, bdev=%p)\n",
-			       rw, (unsigned long long)bh->b_blocknr,
+			       "submit_bh(op=0x%x,0x%x, blocknr=%llu "
+			       "(bytenr %llu), size=%zu, data=%p, bdev=%p)\n",
+			       op, op_flags, (unsigned long long)bh->b_blocknr,
 			       dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
 		btrfsic_process_written_block(dev_state, dev_bytenr,
 					      &bh->b_data, 1, NULL,
-					      NULL, bh, rw);
-	} else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+					      NULL, bh, op_flags);
+	} else if (NULL != dev_state && (op_flags & REQ_PREFLUSH)) {
 		if (dev_state->state->print_mask &
 		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
 			printk(KERN_INFO
-			       "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
-			       rw, bh->b_bdev);
+			       "submit_bh(op=0x%x,0x%x FLUSH, bdev=%p)\n",
+			       op, op_flags, bh->b_bdev);
 		if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
 			if ((dev_state->state->print_mask &
 			     (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@ -2906,7 +2907,7 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
 			block->never_written = 0;
 			block->iodone_w_error = 0;
 			block->flush_gen = dev_state->last_flush_gen + 1;
-			block->submit_bio_bh_rw = rw;
+			block->submit_bio_bh_rw = op_flags;
 			block->orig_bio_bh_private = bh->b_private;
 			block->orig_bio_bh_end_io.bh = bh->b_end_io;
 			block->next_in_same_bio = NULL;
@@ -2915,10 +2916,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
 		}
 	}
 	mutex_unlock(&btrfsic_mutex);
-	return submit_bh(rw, bh);
+	return submit_bh(op, op_flags, bh);
 }
 
-static void __btrfsic_submit_bio(int rw, struct bio *bio)
+static void __btrfsic_submit_bio(struct bio *bio)
 {
 	struct btrfsic_dev_state *dev_state;
 
@@ -2930,7 +2931,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
 	 * btrfsic_mount(), this might return NULL */
 	dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
 	if (NULL != dev_state &&
-	    (rw & WRITE) && NULL != bio->bi_io_vec) {
+	    (bio_op(bio) == REQ_OP_WRITE) && NULL != bio->bi_io_vec) {
 		unsigned int i;
 		u64 dev_bytenr;
 		u64 cur_bytenr;
@@ -2942,9 +2943,9 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
 		if (dev_state->state->print_mask &
 		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
 			printk(KERN_INFO
-			       "submit_bio(rw=0x%x, bi_vcnt=%u,"
+			       "submit_bio(rw=%d,0x%x, bi_vcnt=%u,"
 			       " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
-			       rw, bio->bi_vcnt,
+			       bio_op(bio), bio->bi_rw, bio->bi_vcnt,
 			       (unsigned long long)bio->bi_iter.bi_sector,
 			       dev_bytenr, bio->bi_bdev);
 
@@ -2975,18 +2976,18 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
 		btrfsic_process_written_block(dev_state, dev_bytenr,
 					      mapped_datav, bio->bi_vcnt,
 					      bio, &bio_is_patched,
-					      NULL, rw);
+					      NULL, bio->bi_rw);
 		while (i > 0) {
 			i--;
 			kunmap(bio->bi_io_vec[i].bv_page);
 		}
 		kfree(mapped_datav);
-	} else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+	} else if (NULL != dev_state && (bio->bi_rw & REQ_PREFLUSH)) {
 		if (dev_state->state->print_mask &
 		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
 			printk(KERN_INFO
-			       "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
-			       rw, bio->bi_bdev);
+			       "submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
+			       bio_op(bio), bio->bi_rw, bio->bi_bdev);
 		if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
 			if ((dev_state->state->print_mask &
 			     (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@ -3004,7 +3005,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
 			block->never_written = 0;
 			block->iodone_w_error = 0;
 			block->flush_gen = dev_state->last_flush_gen + 1;
-			block->submit_bio_bh_rw = rw;
+			block->submit_bio_bh_rw = bio->bi_rw;
 			block->orig_bio_bh_private = bio->bi_private;
 			block->orig_bio_bh_end_io.bio = bio->bi_end_io;
 			block->next_in_same_bio = NULL;
@@ -3016,16 +3017,16 @@ leave:
 	mutex_unlock(&btrfsic_mutex);
 }
 
-void btrfsic_submit_bio(int rw, struct bio *bio)
+void btrfsic_submit_bio(struct bio *bio)
 {
-	__btrfsic_submit_bio(rw, bio);
-	submit_bio(rw, bio);
+	__btrfsic_submit_bio(bio);
+	submit_bio(bio);
 }
 
-int btrfsic_submit_bio_wait(int rw, struct bio *bio)
+int btrfsic_submit_bio_wait(struct bio *bio)
 {
-	__btrfsic_submit_bio(rw, bio);
-	return submit_bio_wait(rw, bio);
+	__btrfsic_submit_bio(bio);
+	return submit_bio_wait(bio);
 }
 
 int btrfsic_mount(struct btrfs_root *root,
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
index 13b8566c97ab..f78dff1c7e86 100644
--- a/fs/btrfs/check-integrity.h
+++ b/fs/btrfs/check-integrity.h
@@ -20,9 +20,9 @@
 #define __BTRFS_CHECK_INTEGRITY__
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
-int btrfsic_submit_bh(int rw, struct buffer_head *bh);
-void btrfsic_submit_bio(int rw, struct bio *bio);
-int btrfsic_submit_bio_wait(int rw, struct bio *bio);
+int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh);
+void btrfsic_submit_bio(struct bio *bio);
+int btrfsic_submit_bio_wait(struct bio *bio);
 #else
 #define btrfsic_submit_bh submit_bh
 #define btrfsic_submit_bio submit_bio
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 658c39b70fba..cefedabf0a92 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -363,6 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 		kfree(cb);
 		return -ENOMEM;
 	}
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	bio->bi_private = cb;
 	bio->bi_end_io = end_compressed_bio_write;
 	atomic_inc(&cb->pending_bios);
@@ -373,7 +374,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 		page = compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		if (bio->bi_iter.bi_size)
-			ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
+			ret = io_tree->ops->merge_bio_hook(page, 0,
 							   PAGE_SIZE,
 							   bio, 0);
 		else
@@ -401,13 +402,14 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				BUG_ON(ret); /* -ENOMEM */
 			}
 
-			ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
+			ret = btrfs_map_bio(root, bio, 0, 1);
 			BUG_ON(ret); /* -ENOMEM */
 
 			bio_put(bio);
 
 			bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
 			BUG_ON(!bio);
+			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 			bio->bi_private = cb;
 			bio->bi_end_io = end_compressed_bio_write;
 			bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -431,7 +433,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 		BUG_ON(ret); /* -ENOMEM */
 	}
 
-	ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
+	ret = btrfs_map_bio(root, bio, 0, 1);
 	BUG_ON(ret); /* -ENOMEM */
 
 	bio_put(bio);
@@ -646,6 +648,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
 	if (!comp_bio)
 		goto fail2;
+	bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
 	comp_bio->bi_private = cb;
 	comp_bio->bi_end_io = end_compressed_bio_read;
 	atomic_inc(&cb->pending_bios);
@@ -656,7 +659,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 		page->index = em_start >> PAGE_SHIFT;
 
 		if (comp_bio->bi_iter.bi_size)
-			ret = tree->ops->merge_bio_hook(READ, page, 0,
+			ret = tree->ops->merge_bio_hook(page, 0,
 							PAGE_SIZE,
 							comp_bio, 0);
 		else
@@ -687,8 +690,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
 					     root->sectorsize);
 
-			ret = btrfs_map_bio(root, READ, comp_bio,
-					    mirror_num, 0);
+			ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
 			if (ret) {
 				bio->bi_error = ret;
 				bio_endio(comp_bio);
@@ -699,6 +701,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
 							GFP_NOFS);
 			BUG_ON(!comp_bio);
+			bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
 			comp_bio->bi_private = cb;
 			comp_bio->bi_end_io = end_compressed_bio_read;
 
@@ -717,7 +720,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 		BUG_ON(ret); /* -ENOMEM */
 	}
 
-	ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
+	ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
 	if (ret) {
 		bio->bi_error = ret;
 		bio_endio(comp_bio);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6276add8538a..a85cf7d23309 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1786,10 +1786,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 			if (!err) {
 				tmp = (struct btrfs_disk_key *)(kaddr + offset -
 							map_start);
-			} else {
+			} else if (err == 1) {
 				read_extent_buffer(eb, &unaligned,
 						   offset, sizeof(unaligned));
 				tmp = &unaligned;
+			} else {
+				return err;
 			}
 
 		} else {
@@ -2830,6 +2832,8 @@ cow_done:
 		}
 
 		ret = key_search(b, key, level, &prev_cmp, &slot);
+		if (ret < 0)
+			goto done;
 
 		if (level != 0) {
 			int dec = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 101c3cfd3f7c..b2620d1f883f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, unsigned long count);
 int btrfs_async_run_delayed_refs(struct btrfs_root *root,
-				 unsigned long count, int wait);
+				 unsigned long count, u64 transid, int wait);
 int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 bytenr,
@@ -3091,7 +3091,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *new_root,
 			     struct btrfs_root *parent_root,
 			     u64 new_dirid);
-int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 			 size_t size, struct bio *bio,
 			 unsigned long bio_flags);
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 61561c2a3f96..d3aaabbfada0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
 	return 0;
 }
 
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
-			     struct list_head *del_list)
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list)
 {
 	struct btrfs_delayed_node *delayed_node;
 	struct btrfs_delayed_item *item;
 
 	delayed_node = btrfs_get_delayed_node(inode);
 	if (!delayed_node)
-		return;
+		return false;
+
+	/*
+	 * We can only do one readdir with delayed items at a time because of
+	 * item->readdir_list.
+	 */
+	inode_unlock_shared(inode);
+	inode_lock(inode);
 
 	mutex_lock(&delayed_node->mutex);
 	item = __btrfs_first_delayed_insertion_item(delayed_node);
@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
 	 * requeue or dequeue this delayed node.
 	 */
 	atomic_dec(&delayed_node->refs);
+
+	return true;
 }
 
-void btrfs_put_delayed_items(struct list_head *ins_list,
-			     struct list_head *del_list)
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list)
 {
 	struct btrfs_delayed_item *curr, *next;
 
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
 		if (atomic_dec_and_test(&curr->refs))
 			kfree(curr);
 	}
+
+	/*
+	 * The VFS is going to do up_read(), so we need to downgrade back to a
+	 * read lock.
+	 */
+	downgrade_write(&inode->i_rwsem);
 }
 
 int btrfs_should_delete_dir_index(struct list_head *del_list,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 0167853c84ae..2495b3d4075f 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
 void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
 
 /* Used for readdir() */
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
-			     struct list_head *del_list);
-void btrfs_put_delayed_items(struct list_head *ins_list,
-			     struct list_head *del_list);
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list);
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list);
 int btrfs_should_delete_dir_index(struct list_head *del_list,
 				  u64 index);
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 54cca7a1572b..9a726ded2c6d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -124,7 +124,6 @@ struct async_submit_bio {
 	struct list_head list;
 	extent_submit_bio_hook_t *submit_bio_start;
 	extent_submit_bio_hook_t *submit_bio_done;
-	int rw;
 	int mirror_num;
 	unsigned long bio_flags;
 	/*
@@ -727,7 +726,7 @@ static void end_workqueue_bio(struct bio *bio)
 	fs_info = end_io_wq->info;
 	end_io_wq->error = bio->bi_error;
 
-	if (bio->bi_rw & REQ_WRITE) {
+	if (bio_op(bio) == REQ_OP_WRITE) {
 		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
 			wq = fs_info->endio_meta_write_workers;
 			func = btrfs_endio_meta_write_helper;
@@ -797,7 +796,7 @@ static void run_one_async_start(struct btrfs_work *work)
 	int ret;
 
 	async = container_of(work, struct  async_submit_bio, work);
-	ret = async->submit_bio_start(async->inode, async->rw, async->bio,
+	ret = async->submit_bio_start(async->inode, async->bio,
 				      async->mirror_num, async->bio_flags,
 				      async->bio_offset);
 	if (ret)
@@ -830,9 +829,8 @@ static void run_one_async_done(struct btrfs_work *work)
 		return;
 	}
 
-	async->submit_bio_done(async->inode, async->rw, async->bio,
-			       async->mirror_num, async->bio_flags,
-			       async->bio_offset);
+	async->submit_bio_done(async->inode, async->bio, async->mirror_num,
+			       async->bio_flags, async->bio_offset);
 }
 
 static void run_one_async_free(struct btrfs_work *work)
@@ -844,7 +842,7 @@ static void run_one_async_free(struct btrfs_work *work)
 }
 
 int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			int rw, struct bio *bio, int mirror_num,
+			struct bio *bio, int mirror_num,
 			unsigned long bio_flags,
 			u64 bio_offset,
 			extent_submit_bio_hook_t *submit_bio_start,
@@ -857,7 +855,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 		return -ENOMEM;
 
 	async->inode = inode;
-	async->rw = rw;
 	async->bio = bio;
 	async->mirror_num = mirror_num;
 	async->submit_bio_start = submit_bio_start;
@@ -873,7 +870,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 
 	atomic_inc(&fs_info->nr_async_submits);
 
-	if (rw & REQ_SYNC)
+	if (bio->bi_rw & REQ_SYNC)
 		btrfs_set_work_high_priority(&async->work);
 
 	btrfs_queue_work(fs_info->workers, &async->work);
@@ -903,9 +900,8 @@ static int btree_csum_one_bio(struct bio *bio)
 	return ret;
 }
 
-static int __btree_submit_bio_start(struct inode *inode, int rw,
-				    struct bio *bio, int mirror_num,
-				    unsigned long bio_flags,
+static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
+				    int mirror_num, unsigned long bio_flags,
 				    u64 bio_offset)
 {
 	/*
@@ -915,7 +911,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
 	return btree_csum_one_bio(bio);
 }
 
-static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
+static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset)
 {
@@ -925,7 +921,7 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
 	 * when we're called for a write, we're already in the async
 	 * submission context.  Just jump into btrfs_map_bio
 	 */
-	ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+	ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 1);
 	if (ret) {
 		bio->bi_error = ret;
 		bio_endio(bio);
@@ -944,14 +940,14 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
 	return 1;
 }
 
-static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset)
 {
 	int async = check_async_write(inode, bio_flags);
 	int ret;
 
-	if (!(rw & REQ_WRITE)) {
+	if (bio_op(bio) != REQ_OP_WRITE) {
 		/*
 		 * called for a read, do the setup so that checksum validation
 		 * can happen in the async kernel threads
@@ -960,21 +956,19 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 					  bio, BTRFS_WQ_ENDIO_METADATA);
 		if (ret)
 			goto out_w_error;
-		ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
-				    mirror_num, 0);
+		ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0);
 	} else if (!async) {
 		ret = btree_csum_one_bio(bio);
 		if (ret)
 			goto out_w_error;
-		ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
-				    mirror_num, 0);
+		ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0);
 	} else {
 		/*
 		 * kthread helpers are used to submit writes so that
 		 * checksumming can happen in parallel across all CPUs
 		 */
 		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
-					  inode, rw, bio, mirror_num, 0,
+					  inode, bio, mirror_num, 0,
 					  bio_offset,
 					  __btree_submit_bio_start,
 					  __btree_submit_bio_done);
@@ -2806,7 +2800,7 @@ int open_ctree(struct super_block *sb,
 
 	nodesize = btrfs_super_nodesize(disk_super);
 	sectorsize = btrfs_super_sectorsize(disk_super);
-	stripesize = btrfs_super_stripesize(disk_super);
+	stripesize = sectorsize;
 	fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
 	fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
 
@@ -3418,9 +3412,9 @@ static int write_dev_supers(struct btrfs_device *device,
 		 * to go down lazy.
 		 */
 		if (i == 0)
-			ret = btrfsic_submit_bh(WRITE_FUA, bh);
+			ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh);
 		else
-			ret = btrfsic_submit_bh(WRITE_SYNC, bh);
+			ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
 		if (ret)
 			errors++;
 	}
@@ -3484,12 +3478,13 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 
 	bio->bi_end_io = btrfs_end_empty_barrier;
 	bio->bi_bdev = device->bdev;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
 	init_completion(&device->flush_wait);
 	bio->bi_private = &device->flush_wait;
 	device->flush_bio = bio;
 
 	bio_get(bio);
-	btrfsic_submit_bio(WRITE_FLUSH, bio);
+	btrfsic_submit_bio(bio);
 
 	return 0;
 }
@@ -4133,9 +4128,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 		       btrfs_super_bytes_used(sb));
 		ret = -EINVAL;
 	}
-	if (!is_power_of_2(btrfs_super_stripesize(sb)) ||
-		((btrfs_super_stripesize(sb) != sectorsize) &&
-			(btrfs_super_stripesize(sb) != 4096))) {
+	if (!is_power_of_2(btrfs_super_stripesize(sb))) {
 		btrfs_err(fs_info, "invalid stripesize %u",
 		       btrfs_super_stripesize(sb));
 		ret = -EINVAL;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index acba821499a9..dbf3e1aab69e 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -122,7 +122,7 @@ void btrfs_csum_final(u32 crc, char *result);
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata);
 int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			int rw, struct bio *bio, int mirror_num,
+			struct bio *bio, int mirror_num,
 			unsigned long bio_flags, u64 bio_offset,
 			extent_submit_bio_hook_t *submit_bio_start,
 			extent_submit_bio_hook_t *submit_bio_done);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 29e5d000bbee..b480fd555774 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2048,7 +2048,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 	 */
 	btrfs_bio_counter_inc_blocked(root->fs_info);
 	/* Tell the block device(s) that the sectors can be discarded */
-	ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
+	ret = btrfs_map_block(root->fs_info, REQ_OP_DISCARD,
 			      bytenr, &num_bytes, &bbio, 0);
 	/* Error condition is -ENOMEM */
 	if (!ret) {
@@ -2835,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
 
 struct async_delayed_refs {
 	struct btrfs_root *root;
+	u64 transid;
 	int count;
 	int error;
 	int sync;
@@ -2850,6 +2851,10 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 
 	async = container_of(work, struct async_delayed_refs, work);
 
+	/* if the commit is already started, we don't need to wait here */
+	if (btrfs_transaction_blocked(async->root->fs_info))
+		goto done;
+
 	trans = btrfs_join_transaction(async->root);
 	if (IS_ERR(trans)) {
 		async->error = PTR_ERR(trans);
@@ -2861,10 +2866,15 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 	 * wait on delayed refs
 	 */
 	trans->sync = true;
+
+	/* Don't bother flushing if we got into a different transaction */
+	if (trans->transid > async->transid)
+		goto end;
+
 	ret = btrfs_run_delayed_refs(trans, async->root, async->count);
 	if (ret)
 		async->error = ret;
-
+end:
 	ret = btrfs_end_transaction(trans, async->root);
 	if (ret && !async->error)
 		async->error = ret;
@@ -2876,7 +2886,7 @@ done:
 }
 
 int btrfs_async_run_delayed_refs(struct btrfs_root *root,
-				 unsigned long count, int wait)
+				 unsigned long count, u64 transid, int wait)
 {
 	struct async_delayed_refs *async;
 	int ret;
@@ -2888,6 +2898,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
 	async->root = root->fs_info->tree_root;
 	async->count = count;
 	async->error = 0;
+	async->transid = transid;
 	if (wait)
 		async->sync = 1;
 	else
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aaee3ef55ed8..cee4cb99b8ce 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2049,9 +2049,10 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
 		return -EIO;
 	}
 	bio->bi_bdev = dev->bdev;
+	bio->bi_rw = WRITE_SYNC;
 	bio_add_page(bio, page, length, pg_offset);
 
-	if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
+	if (btrfsic_submit_bio_wait(bio)) {
 		/* try to remap that extent elsewhere? */
 		btrfs_bio_counter_dec(fs_info);
 		bio_put(bio);
@@ -2386,7 +2387,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	int read_mode;
 	int ret;
 
-	BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
 	ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
 	if (ret)
@@ -2412,12 +2413,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 		free_io_failure(inode, failrec);
 		return -EIO;
 	}
+	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
 
 	pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
 		 read_mode, failrec->this_mirror, failrec->in_validation);
 
-	ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
-					 failrec->this_mirror,
+	ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
 					 failrec->bio_flags, 0);
 	if (ret) {
 		free_io_failure(inode, failrec);
@@ -2723,8 +2724,8 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 }
 
 
-static int __must_check submit_one_bio(int rw, struct bio *bio,
-				       int mirror_num, unsigned long bio_flags)
+static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
+				       unsigned long bio_flags)
 {
 	int ret = 0;
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -2735,33 +2736,32 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
 	start = page_offset(page) + bvec->bv_offset;
 
 	bio->bi_private = NULL;
-
 	bio_get(bio);
 
 	if (tree->ops && tree->ops->submit_bio_hook)
-		ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+		ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
 					   mirror_num, bio_flags, start);
 	else
-		btrfsic_submit_bio(rw, bio);
+		btrfsic_submit_bio(bio);
 
 	bio_put(bio);
 	return ret;
 }
 
-static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
+static int merge_bio(struct extent_io_tree *tree, struct page *page,
 		     unsigned long offset, size_t size, struct bio *bio,
 		     unsigned long bio_flags)
 {
 	int ret = 0;
 	if (tree->ops && tree->ops->merge_bio_hook)
-		ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
+		ret = tree->ops->merge_bio_hook(page, offset, size, bio,
 						bio_flags);
 	BUG_ON(ret < 0);
 	return ret;
 
 }
 
-static int submit_extent_page(int rw, struct extent_io_tree *tree,
+static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
 			      struct writeback_control *wbc,
 			      struct page *page, sector_t sector,
 			      size_t size, unsigned long offset,
@@ -2789,10 +2789,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 
 		if (prev_bio_flags != bio_flags || !contig ||
 		    force_bio_submit ||
-		    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
+		    merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
 		    bio_add_page(bio, page, page_size, offset) < page_size) {
-			ret = submit_one_bio(rw, bio, mirror_num,
-					     prev_bio_flags);
+			ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
 			if (ret < 0) {
 				*bio_ret = NULL;
 				return ret;
@@ -2813,6 +2812,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 	bio_add_page(bio, page, page_size, offset);
 	bio->bi_end_io = end_io_func;
 	bio->bi_private = tree;
+	bio_set_op_attrs(bio, op, op_flags);
 	if (wbc) {
 		wbc_init_bio(wbc, bio);
 		wbc_account_io(wbc, page, page_size);
@@ -2821,7 +2821,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 	if (bio_ret)
 		*bio_ret = bio;
 	else
-		ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
+		ret = submit_one_bio(bio, mirror_num, bio_flags);
 
 	return ret;
 }
@@ -2885,7 +2885,7 @@ static int __do_readpage(struct extent_io_tree *tree,
 			 get_extent_t *get_extent,
 			 struct extent_map **em_cached,
 			 struct bio **bio, int mirror_num,
-			 unsigned long *bio_flags, int rw,
+			 unsigned long *bio_flags, int read_flags,
 			 u64 *prev_em_start)
 {
 	struct inode *inode = page->mapping->host;
@@ -3068,8 +3068,8 @@ static int __do_readpage(struct extent_io_tree *tree,
 		}
 
 		pnr -= page->index;
-		ret = submit_extent_page(rw, tree, NULL, page,
-					 sector, disk_io_size, pg_offset,
+		ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
+					 page, sector, disk_io_size, pg_offset,
 					 bdev, bio, pnr,
 					 end_bio_extent_readpage, mirror_num,
 					 *bio_flags,
@@ -3100,7 +3100,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
 					     get_extent_t *get_extent,
 					     struct extent_map **em_cached,
 					     struct bio **bio, int mirror_num,
-					     unsigned long *bio_flags, int rw,
+					     unsigned long *bio_flags,
 					     u64 *prev_em_start)
 {
 	struct inode *inode;
@@ -3121,7 +3121,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
 
 	for (index = 0; index < nr_pages; index++) {
 		__do_readpage(tree, pages[index], get_extent, em_cached, bio,
-			      mirror_num, bio_flags, rw, prev_em_start);
+			      mirror_num, bio_flags, 0, prev_em_start);
 		put_page(pages[index]);
 	}
 }
@@ -3131,7 +3131,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
 			       int nr_pages, get_extent_t *get_extent,
 			       struct extent_map **em_cached,
 			       struct bio **bio, int mirror_num,
-			       unsigned long *bio_flags, int rw,
+			       unsigned long *bio_flags,
 			       u64 *prev_em_start)
 {
 	u64 start = 0;
@@ -3153,7 +3153,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
 						  index - first_index, start,
 						  end, get_extent, em_cached,
 						  bio, mirror_num, bio_flags,
-						  rw, prev_em_start);
+						  prev_em_start);
 			start = page_start;
 			end = start + PAGE_SIZE - 1;
 			first_index = index;
@@ -3164,7 +3164,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
 		__do_contiguous_readpages(tree, &pages[first_index],
 					  index - first_index, start,
 					  end, get_extent, em_cached, bio,
-					  mirror_num, bio_flags, rw,
+					  mirror_num, bio_flags,
 					  prev_em_start);
 }
 
@@ -3172,7 +3172,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 				   struct page *page,
 				   get_extent_t *get_extent,
 				   struct bio **bio, int mirror_num,
-				   unsigned long *bio_flags, int rw)
+				   unsigned long *bio_flags, int read_flags)
 {
 	struct inode *inode = page->mapping->host;
 	struct btrfs_ordered_extent *ordered;
@@ -3192,7 +3192,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 	}
 
 	ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
-			    bio_flags, rw, NULL);
+			    bio_flags, read_flags, NULL);
 	return ret;
 }
 
@@ -3204,9 +3204,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
 	int ret;
 
 	ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
-				      &bio_flags, READ);
+				      &bio_flags, 0);
 	if (bio)
-		ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
+		ret = submit_one_bio(bio, mirror_num, bio_flags);
 	return ret;
 }
 
@@ -3440,8 +3440,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 			       page->index, cur, end);
 		}
 
-		ret = submit_extent_page(write_flags, tree, wbc, page,
-					 sector, iosize, pg_offset,
+		ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+					 page, sector, iosize, pg_offset,
 					 bdev, &epd->bio, max_nr,
 					 end_bio_extent_writepage,
 					 0, 0, 0, false);
@@ -3480,13 +3480,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 	size_t pg_offset = 0;
 	loff_t i_size = i_size_read(inode);
 	unsigned long end_index = i_size >> PAGE_SHIFT;
-	int write_flags;
+	int write_flags = 0;
 	unsigned long nr_written = 0;
 
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		write_flags = WRITE_SYNC;
-	else
-		write_flags = WRITE;
 
 	trace___extent_writepage(page, inode, wbc);
 
@@ -3730,7 +3728,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 	u64 offset = eb->start;
 	unsigned long i, num_pages;
 	unsigned long bio_flags = 0;
-	int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
+	int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;
 	int ret = 0;
 
 	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -3744,9 +3742,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 
 		clear_page_dirty_for_io(p);
 		set_page_writeback(p);
-		ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
-					 PAGE_SIZE, 0, bdev, &epd->bio,
-					 -1, end_bio_extent_buffer_writepage,
+		ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+					 p, offset >> 9, PAGE_SIZE, 0, bdev,
+					 &epd->bio, -1,
+					 end_bio_extent_buffer_writepage,
 					 0, epd->bio_flags, bio_flags, false);
 		epd->bio_flags = bio_flags;
 		if (ret) {
@@ -4056,13 +4055,12 @@ retry:
 static void flush_epd_write_bio(struct extent_page_data *epd)
 {
 	if (epd->bio) {
-		int rw = WRITE;
 		int ret;
 
-		if (epd->sync_io)
-			rw = WRITE_SYNC;
+		bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
+				 epd->sync_io ? WRITE_SYNC : 0);
 
-		ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
+		ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
 		BUG_ON(ret < 0); /* -ENOMEM */
 		epd->bio = NULL;
 	}
@@ -4180,7 +4178,8 @@ int extent_readpages(struct extent_io_tree *tree,
 		prefetchw(&page->flags);
 		list_del(&page->lru);
 		if (add_to_page_cache_lru(page, mapping,
-					page->index, GFP_NOFS)) {
+					page->index,
+					readahead_gfp_mask(mapping))) {
 			put_page(page);
 			continue;
 		}
@@ -4189,19 +4188,19 @@ int extent_readpages(struct extent_io_tree *tree,
 		if (nr < ARRAY_SIZE(pagepool))
 			continue;
 		__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
-				   &bio, 0, &bio_flags, READ, &prev_em_start);
+				   &bio, 0, &bio_flags, &prev_em_start);
 		nr = 0;
 	}
 	if (nr)
 		__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
-				   &bio, 0, &bio_flags, READ, &prev_em_start);
+				   &bio, 0, &bio_flags, &prev_em_start);
 
 	if (em_cached)
 		free_extent_map(em_cached);
 
 	BUG_ON(!list_empty(pages));
 	if (bio)
-		return submit_one_bio(READ, bio, 0, bio_flags);
+		return submit_one_bio(bio, 0, bio_flags);
 	return 0;
 }
 
@@ -5236,7 +5235,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 			err = __extent_read_full_page(tree, page,
 						      get_extent, &bio,
 						      mirror_num, &bio_flags,
-						      READ | REQ_META);
+						      REQ_META);
 			if (err)
 				ret = err;
 		} else {
@@ -5245,8 +5244,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 	}
 
 	if (bio) {
-		err = submit_one_bio(READ | REQ_META, bio, mirror_num,
-				     bio_flags);
+		err = submit_one_bio(bio, mirror_num, bio_flags);
 		if (err)
 			return err;
 	}
@@ -5342,6 +5340,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
 	return ret;
 }
 
+/*
+ * return 0 if the item is found within a page.
+ * return 1 if the item spans two pages.
+ * return -EINVAL otherwise.
+ */
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 			       unsigned long min_len, char **map,
 			       unsigned long *map_start,
@@ -5356,7 +5359,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 		PAGE_SHIFT;
 
 	if (i != end_i)
-		return -EINVAL;
+		return 1;
 
 	if (i == 0) {
 		offset = start_offset;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c0c1c4fef6ce..bc2729a7612d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -63,16 +63,16 @@ struct btrfs_root;
 struct btrfs_io_bio;
 struct io_failure_record;
 
-typedef	int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
-				       struct bio *bio, int mirror_num,
-				       unsigned long bio_flags, u64 bio_offset);
+typedef	int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
+				       int mirror_num, unsigned long bio_flags,
+				       u64 bio_offset);
 struct extent_io_ops {
 	int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
 			     u64 start, u64 end, int *page_started,
 			     unsigned long *nr_written);
 	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
 	extent_submit_bio_hook_t *submit_bio_hook;
-	int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset,
+	int (*merge_bio_hook)(struct page *page, unsigned long offset,
 			      size_t size, struct bio *bio,
 			      unsigned long bio_flags);
 	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e0c9bd3fb02d..2234e88cf674 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1534,30 +1534,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		reserve_bytes = round_up(write_bytes + sector_offset,
 				root->sectorsize);
 
-		if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-					      BTRFS_INODE_PREALLOC)) &&
-		    check_can_nocow(inode, pos, &write_bytes) > 0) {
-			/*
-			 * For nodata cow case, no need to reserve
-			 * data space.
-			 */
-			only_release_metadata = true;
-			/*
-			 * our prealloc extent may be smaller than
-			 * write_bytes, so scale down.
-			 */
-			num_pages = DIV_ROUND_UP(write_bytes + offset,
-						 PAGE_SIZE);
-			reserve_bytes = round_up(write_bytes + sector_offset,
-					root->sectorsize);
-			goto reserve_metadata;
-		}
-
 		ret = btrfs_check_data_free_space(inode, pos, write_bytes);
-		if (ret < 0)
-			break;
+		if (ret < 0) {
+			if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+						      BTRFS_INODE_PREALLOC)) &&
+			    check_can_nocow(inode, pos, &write_bytes) > 0) {
+				/*
+				 * For nodata cow case, no need to reserve
+				 * data space.
+				 */
+				only_release_metadata = true;
+				/*
+				 * our prealloc extent may be smaller than
+				 * write_bytes, so scale down.
+				 */
+				num_pages = DIV_ROUND_UP(write_bytes + offset,
+							 PAGE_SIZE);
+				reserve_bytes = round_up(write_bytes +
+							 sector_offset,
+							 root->sectorsize);
+			} else {
+				break;
+			}
+		}
 
-reserve_metadata:
 		ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
 		if (ret) {
 			if (!only_release_metadata)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2be95cfb6d1..df731c0ebec7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1823,7 +1823,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
  * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
  * we don't create bios that span stripes or chunks
  */
-int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 			 size_t size, struct bio *bio,
 			 unsigned long bio_flags)
 {
@@ -1838,7 +1838,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
 
 	length = bio->bi_iter.bi_size;
 	map_length = length;
-	ret = btrfs_map_block(root->fs_info, rw, logical,
+	ret = btrfs_map_block(root->fs_info, bio_op(bio), logical,
 			      &map_length, NULL, 0);
 	/* Will always return 0 with map_multi == NULL */
 	BUG_ON(ret < 0);
@@ -1855,9 +1855,8 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_start(struct inode *inode, int rw,
-				    struct bio *bio, int mirror_num,
-				    unsigned long bio_flags,
+static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
+				    int mirror_num, unsigned long bio_flags,
 				    u64 bio_offset)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1876,14 +1875,14 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
+static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
 			  int mirror_num, unsigned long bio_flags,
 			  u64 bio_offset)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret;
 
-	ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
+	ret = btrfs_map_bio(root, bio, mirror_num, 1);
 	if (ret) {
 		bio->bi_error = ret;
 		bio_endio(bio);
@@ -1895,7 +1894,7 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
  * extent_io.c submission hook. This does the right thing for csum calculation
  * on write, or reading the csums from the tree before a read
  */
-static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 			  int mirror_num, unsigned long bio_flags,
 			  u64 bio_offset)
 {
@@ -1910,7 +1909,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 	if (btrfs_is_free_space_inode(inode))
 		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
 
-	if (!(rw & REQ_WRITE)) {
+	if (bio_op(bio) != REQ_OP_WRITE) {
 		ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
 		if (ret)
 			goto out;
@@ -1932,7 +1931,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 			goto mapit;
 		/* we're doing a write, do the async checksumming */
 		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
-				   inode, rw, bio, mirror_num,
+				   inode, bio, mirror_num,
 				   bio_flags, bio_offset,
 				   __btrfs_submit_bio_start,
 				   __btrfs_submit_bio_done);
@@ -1944,7 +1943,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 	}
 
 mapit:
-	ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+	ret = btrfs_map_bio(root, bio, mirror_num, 0);
 
 out:
 	if (ret < 0) {
@@ -4558,6 +4557,7 @@ delete:
 			BUG_ON(ret);
 			if (btrfs_should_throttle_delayed_refs(trans, root))
 				btrfs_async_run_delayed_refs(root,
+							     trans->transid,
 					trans->delayed_ref_updates * 2, 0);
 			if (be_nice) {
 				if (truncate_space_check(trans, root,
@@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 	int name_len;
 	int is_curr = 0;	/* ctx->pos points to the current index? */
 	bool emitted;
+	bool put = false;
 
 	/* FIXME, use a real flag for deciding about the key type */
 	if (root->fs_info->tree_root == root)
@@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 	if (key_type == BTRFS_DIR_INDEX_KEY) {
 		INIT_LIST_HEAD(&ins_list);
 		INIT_LIST_HEAD(&del_list);
-		btrfs_get_delayed_items(inode, &ins_list, &del_list);
+		put = btrfs_readdir_get_delayed_items(inode, &ins_list,
+						      &del_list);
 	}
 
 	key.type = key_type;
@@ -5921,8 +5923,8 @@ next:
 nopos:
 	ret = 0;
 err:
-	if (key_type == BTRFS_DIR_INDEX_KEY)
-		btrfs_put_delayed_items(&ins_list, &del_list);
+	if (put)
+		btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
 	btrfs_free_path(path);
 	return ret;
 }
@@ -7787,12 +7789,12 @@ err:
 }
 
 static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
-					int rw, int mirror_num)
+					int mirror_num)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret;
 
-	BUG_ON(rw & REQ_WRITE);
+	BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
 	bio_get(bio);
 
@@ -7801,7 +7803,7 @@ static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
 	if (ret)
 		goto err;
 
-	ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+	ret = btrfs_map_bio(root, bio, mirror_num, 0);
 err:
 	bio_put(bio);
 	return ret;
@@ -7852,7 +7854,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	int read_mode;
 	int ret;
 
-	BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
 	ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
 	if (ret)
@@ -7880,13 +7882,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 		free_io_failure(inode, failrec);
 		return -EIO;
 	}
+	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
 
 	btrfs_debug(BTRFS_I(inode)->root->fs_info,
 		    "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
 		    read_mode, failrec->this_mirror, failrec->in_validation);
 
-	ret = submit_dio_repair_bio(inode, bio, read_mode,
-				    failrec->this_mirror);
+	ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
 	if (ret) {
 		free_io_failure(inode, failrec);
 		bio_put(bio);
@@ -8176,7 +8178,7 @@ static void btrfs_endio_direct_write(struct bio *bio)
 	bio_put(bio);
 }
 
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
+static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
 				    struct bio *bio, int mirror_num,
 				    unsigned long bio_flags, u64 offset)
 {
@@ -8194,8 +8196,8 @@ static void btrfs_end_dio_bio(struct bio *bio)
 
 	if (err)
 		btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
-			   "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
-			   btrfs_ino(dip->inode), bio->bi_rw,
+			   "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
+			   btrfs_ino(dip->inode), bio_op(bio), bio->bi_rw,
 			   (unsigned long long)bio->bi_iter.bi_sector,
 			   bio->bi_iter.bi_size, err);
 
@@ -8269,11 +8271,11 @@ static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
 }
 
 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
-					 int rw, u64 file_offset, int skip_sum,
+					 u64 file_offset, int skip_sum,
 					 int async_submit)
 {
 	struct btrfs_dio_private *dip = bio->bi_private;
-	int write = rw & REQ_WRITE;
+	bool write = bio_op(bio) == REQ_OP_WRITE;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret;
 
@@ -8294,8 +8296,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 
 	if (write && async_submit) {
 		ret = btrfs_wq_submit_bio(root->fs_info,
-				   inode, rw, bio, 0, 0,
-				   file_offset,
+				   inode, bio, 0, 0, file_offset,
 				   __btrfs_submit_bio_start_direct_io,
 				   __btrfs_submit_bio_done);
 		goto err;
@@ -8314,13 +8315,13 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 			goto err;
 	}
 map:
-	ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
+	ret = btrfs_map_bio(root, bio, 0, async_submit);
 err:
 	bio_put(bio);
 	return ret;
 }
 
-static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
+static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
 				    int skip_sum)
 {
 	struct inode *inode = dip->inode;
@@ -8339,8 +8340,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 	int i;
 
 	map_length = orig_bio->bi_iter.bi_size;
-	ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
-			      &map_length, NULL, 0);
+	ret = btrfs_map_block(root->fs_info, bio_op(orig_bio),
+			      start_sector << 9, &map_length, NULL, 0);
 	if (ret)
 		return -EIO;
 
@@ -8360,6 +8361,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 	if (!bio)
 		return -ENOMEM;
 
+	bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_rw);
 	bio->bi_private = dip;
 	bio->bi_end_io = btrfs_end_dio_bio;
 	btrfs_io_bio(bio)->logical = file_offset;
@@ -8379,7 +8381,7 @@ next_block:
 			 * before we're done setting it up
 			 */
 			atomic_inc(&dip->pending_bios);
-			ret = __btrfs_submit_dio_bio(bio, inode, rw,
+			ret = __btrfs_submit_dio_bio(bio, inode,
 						     file_offset, skip_sum,
 						     async_submit);
 			if (ret) {
@@ -8397,12 +8399,13 @@ next_block:
 						  start_sector, GFP_NOFS);
 			if (!bio)
 				goto out_err;
+			bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_rw);
 			bio->bi_private = dip;
 			bio->bi_end_io = btrfs_end_dio_bio;
 			btrfs_io_bio(bio)->logical = file_offset;
 
 			map_length = orig_bio->bi_iter.bi_size;
-			ret = btrfs_map_block(root->fs_info, rw,
+			ret = btrfs_map_block(root->fs_info, bio_op(orig_bio),
 					      start_sector << 9,
 					      &map_length, NULL, 0);
 			if (ret) {
@@ -8422,7 +8425,7 @@ next_block:
 	}
 
 submit:
-	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
+	ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
 				     async_submit);
 	if (!ret)
 		return 0;
@@ -8442,14 +8445,14 @@ out_err:
 	return 0;
 }
 
-static void btrfs_submit_direct(int rw, struct bio *dio_bio,
-				struct inode *inode, loff_t file_offset)
+static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
+				loff_t file_offset)
 {
 	struct btrfs_dio_private *dip = NULL;
 	struct bio *io_bio = NULL;
 	struct btrfs_io_bio *btrfs_bio;
 	int skip_sum;
-	int write = rw & REQ_WRITE;
+	bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
 	int ret = 0;
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -8500,7 +8503,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
 			dio_data->unsubmitted_oe_range_end;
 	}
 
-	ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
+	ret = btrfs_submit_direct_hook(dip, skip_sum);
 	if (!ret)
 		return;
 
@@ -10534,7 +10537,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
 static const struct file_operations btrfs_dir_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
-	.iterate	= btrfs_real_readdir,
+	.iterate_shared	= btrfs_real_readdir,
 	.unlocked_ioctl	= btrfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= btrfs_compat_ioctl,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e96634a725c3..aca8264f4a49 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -968,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 	struct rb_node *prev = NULL;
 	struct btrfs_ordered_extent *test;
 	int ret = 1;
+	u64 orig_offset = offset;
 
 	spin_lock_irq(&tree->lock);
 	if (ordered) {
@@ -983,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 
 	/* truncate file */
 	if (disk_i_size > i_size) {
-		BTRFS_I(inode)->disk_i_size = i_size;
+		BTRFS_I(inode)->disk_i_size = orig_offset;
 		ret = 0;
 		goto out;
 	}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f8b6d411a034..cd8d302a1f61 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1320,7 +1320,9 @@ write_data:
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_write_end_io;
-		submit_bio(WRITE, bio);
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+		submit_bio(bio);
 	}
 	return;
 
@@ -1573,11 +1575,12 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_rmw_end_io;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 
 		btrfs_bio_wq_end_io(rbio->fs_info, bio,
 				    BTRFS_WQ_ENDIO_RAID56);
 
-		submit_bio(READ, bio);
+		submit_bio(bio);
 	}
 	/* the actual write will happen once the reads are done */
 	return 0;
@@ -2097,11 +2100,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_recover_end_io;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 
 		btrfs_bio_wq_end_io(rbio->fs_info, bio,
 				    BTRFS_WQ_ENDIO_RAID56);
 
-		submit_bio(READ, bio);
+		submit_bio(bio);
 	}
 out:
 	return 0;
@@ -2433,7 +2437,9 @@ submit_write:
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_write_end_io;
-		submit_bio(WRITE, bio);
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+		submit_bio(bio);
 	}
 	return;
 
@@ -2610,11 +2616,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid56_parity_scrub_end_io;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 
 		btrfs_bio_wq_end_io(rbio->fs_info, bio,
 				    BTRFS_WQ_ENDIO_RAID56);
 
-		submit_bio(READ, bio);
+		submit_bio(bio);
 	}
 	/* the actual write will happen once the reads are done */
 	return;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 70427ef66b04..e08b6bc676e3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1504,8 +1504,9 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 				sblock->no_io_error_seen = 0;
 		} else {
 			bio->bi_iter.bi_sector = page->physical >> 9;
+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 
-			if (btrfsic_submit_bio_wait(READ, bio))
+			if (btrfsic_submit_bio_wait(bio))
 				sblock->no_io_error_seen = 0;
 		}
 
@@ -1583,6 +1584,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
 			return -EIO;
 		bio->bi_bdev = page_bad->dev->bdev;
 		bio->bi_iter.bi_sector = page_bad->physical >> 9;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
 		if (PAGE_SIZE != ret) {
@@ -1590,7 +1592,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
 			return -EIO;
 		}
 
-		if (btrfsic_submit_bio_wait(WRITE, bio)) {
+		if (btrfsic_submit_bio_wait(bio)) {
 			btrfs_dev_stat_inc_and_print(page_bad->dev,
 				BTRFS_DEV_STAT_WRITE_ERRS);
 			btrfs_dev_replace_stats_inc(
@@ -1684,6 +1686,7 @@ again:
 		bio->bi_end_io = scrub_wr_bio_end_io;
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 		sbio->err = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical_for_dev_replace ||
@@ -1731,7 +1734,7 @@ static void scrub_wr_submit(struct scrub_ctx *sctx)
 	 * orders the requests before sending them to the driver which
 	 * doubled the write performance on spinning disks when measured
 	 * with Linux 3.5 */
-	btrfsic_submit_bio(WRITE, sbio->bio);
+	btrfsic_submit_bio(sbio->bio);
 }
 
 static void scrub_wr_bio_end_io(struct bio *bio)
@@ -2041,7 +2044,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
 	sbio = sctx->bios[sctx->curr];
 	sctx->curr = -1;
 	scrub_pending_bio_inc(sctx);
-	btrfsic_submit_bio(READ, sbio->bio);
+	btrfsic_submit_bio(sbio->bio);
 }
 
 static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
@@ -2088,6 +2091,7 @@ again:
 		bio->bi_end_io = scrub_bio_end_io;
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 		sbio->err = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical ||
@@ -4436,6 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
 	bio->bi_bdev = dev->bdev;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
 	ret = bio_add_page(bio, page, PAGE_SIZE, 0);
 	if (ret != PAGE_SIZE) {
 leave_with_eio:
@@ -4444,7 +4449,7 @@ leave_with_eio:
 		return -EIO;
 	}
 
-	if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
+	if (btrfsic_submit_bio_wait(bio))
 		goto leave_with_eio;
 
 	bio_put(bio);
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 10eb249ef891..02223f3f78f4 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -68,7 +68,7 @@ int btrfs_init_test_fs(void)
 	if (IS_ERR(test_mnt)) {
 		printk(KERN_ERR "btrfs: cannot mount test file system\n");
 		unregister_filesystem(&test_type);
-		return ret;
+		return PTR_ERR(test_mnt);
 	}
 	return 0;
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 765845742fde..948aa186b353 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_transaction *cur_trans = trans->transaction;
 	struct btrfs_fs_info *info = root->fs_info;
+	u64 transid = trans->transid;
 	unsigned long cur = trans->delayed_ref_updates;
 	int lock = (trans->type != TRANS_JOIN_NOLOCK);
 	int err = 0;
@@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 	if (must_run_delayed_refs) {
-		btrfs_async_run_delayed_refs(root, cur,
+		btrfs_async_run_delayed_refs(root, cur, transid,
 					     must_run_delayed_refs == 1);
 	}
 	return err;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2f631b58ae00..0fb4a959012e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -462,7 +462,7 @@ loop_lock:
 			sync_pending = 0;
 		}
 
-		btrfsic_submit_bio(cur->bi_rw, cur);
+		btrfsic_submit_bio(cur);
 		num_run++;
 		batch_run++;
 
@@ -4694,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
 	if (type & BTRFS_BLOCK_GROUP_RAID5) {
 		raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
-				 btrfs_super_stripesize(info->super_copy));
+						extent_root->stripesize);
 		data_stripes = num_stripes - 1;
 	}
 	if (type & BTRFS_BLOCK_GROUP_RAID6) {
 		raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
-				 btrfs_super_stripesize(info->super_copy));
+						extent_root->stripesize);
 		data_stripes = num_stripes - 2;
 	}
 
@@ -5260,7 +5260,7 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
 		kfree(bbio);
 }
 
-static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
 			     u64 logical, u64 *length,
 			     struct btrfs_bio **bbio_ret,
 			     int mirror_num, int need_raid_map)
@@ -5346,7 +5346,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		raid56_full_stripe_start *= full_stripe_len;
 	}
 
-	if (rw & REQ_DISCARD) {
+	if (op == REQ_OP_DISCARD) {
 		/* we don't discard raid56 yet */
 		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
 			ret = -EOPNOTSUPP;
@@ -5359,7 +5359,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		   For other RAID types and for RAID[56] reads, just allow a single
 		   stripe (on a single disk). */
 		if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
-		    (rw & REQ_WRITE)) {
+		    (op == REQ_OP_WRITE)) {
 			max_len = stripe_len * nr_data_stripes(map) -
 				(offset - raid56_full_stripe_start);
 		} else {
@@ -5384,8 +5384,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		btrfs_dev_replace_set_lock_blocking(dev_replace);
 
 	if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
-	    !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
-	    dev_replace->tgtdev != NULL) {
+	    op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+	    op != REQ_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
 		/*
 		 * in dev-replace case, for repair case (that's the only
 		 * case where the mirror is selected explicitly when
@@ -5472,15 +5472,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 			    (offset + *length);
 
 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
-		if (rw & REQ_DISCARD)
+		if (op == REQ_OP_DISCARD)
 			num_stripes = min_t(u64, map->num_stripes,
 					    stripe_nr_end - stripe_nr_orig);
 		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
 				&stripe_index);
-		if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
+		if (op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+		    op != REQ_GET_READ_MIRRORS)
 			mirror_num = 1;
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-		if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))
+		if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+		    op == REQ_GET_READ_MIRRORS)
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -5493,7 +5495,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		}
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-		if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) {
+		if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+		    op == REQ_GET_READ_MIRRORS) {
 			num_stripes = map->num_stripes;
 		} else if (mirror_num) {
 			stripe_index = mirror_num - 1;
@@ -5507,9 +5510,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
 		stripe_index *= map->sub_stripes;
 
-		if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
+		if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS)
 			num_stripes = map->sub_stripes;
-		else if (rw & REQ_DISCARD)
+		else if (op == REQ_OP_DISCARD)
 			num_stripes = min_t(u64, map->sub_stripes *
 					    (stripe_nr_end - stripe_nr_orig),
 					    map->num_stripes);
@@ -5527,7 +5530,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
 		if (need_raid_map &&
-		    ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
+		    (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS ||
 		     mirror_num > 1)) {
 			/* push stripe_nr back to the start of the full stripe */
 			stripe_nr = div_u64(raid56_full_stripe_start,
@@ -5555,8 +5558,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 			/* We distribute the parity blocks across stripes */
 			div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
 					&stripe_index);
-			if (!(rw & (REQ_WRITE | REQ_DISCARD |
-				    REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
+			if ((op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+			    op != REQ_GET_READ_MIRRORS) && mirror_num <= 1)
 				mirror_num = 1;
 		}
 	} else {
@@ -5579,9 +5582,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
 	num_alloc_stripes = num_stripes;
 	if (dev_replace_is_ongoing) {
-		if (rw & (REQ_WRITE | REQ_DISCARD))
+		if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD)
 			num_alloc_stripes <<= 1;
-		if (rw & REQ_GET_READ_MIRRORS)
+		if (op == REQ_GET_READ_MIRRORS)
 			num_alloc_stripes++;
 		tgtdev_indexes = num_stripes;
 	}
@@ -5596,7 +5599,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
 	/* build raid_map */
 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
-	    need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
+	    need_raid_map &&
+	    ((op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) ||
 	    mirror_num > 1)) {
 		u64 tmp;
 		unsigned rot;
@@ -5621,7 +5625,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 				RAID6_Q_STRIPE;
 	}
 
-	if (rw & REQ_DISCARD) {
+	if (op == REQ_OP_DISCARD) {
 		u32 factor = 0;
 		u32 sub_stripes = 0;
 		u64 stripes_per_dev = 0;
@@ -5701,14 +5705,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		}
 	}
 
-	if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
+	if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS)
 		max_errors = btrfs_chunk_max_errors(map);
 
 	if (bbio->raid_map)
 		sort_parity_stripes(bbio, num_stripes);
 
 	tgtdev_indexes = 0;
-	if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
+	if (dev_replace_is_ongoing &&
+	   (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) &&
 	    dev_replace->tgtdev != NULL) {
 		int index_where_to_add;
 		u64 srcdev_devid = dev_replace->srcdev->devid;
@@ -5743,7 +5748,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 			}
 		}
 		num_stripes = index_where_to_add;
-	} else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
+	} else if (dev_replace_is_ongoing && (op == REQ_GET_READ_MIRRORS) &&
 		   dev_replace->tgtdev != NULL) {
 		u64 srcdev_devid = dev_replace->srcdev->devid;
 		int index_srcdev = 0;
@@ -5815,21 +5820,21 @@ out:
 	return ret;
 }
 
-int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
 		      u64 logical, u64 *length,
 		      struct btrfs_bio **bbio_ret, int mirror_num)
 {
-	return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
+	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
 				 mirror_num, 0);
 }
 
 /* For Scrub/replace */
-int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op,
 		     u64 logical, u64 *length,
 		     struct btrfs_bio **bbio_ret, int mirror_num,
 		     int need_raid_map)
 {
-	return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
+	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
 				 mirror_num, need_raid_map);
 }
 
@@ -5943,7 +5948,7 @@ static void btrfs_end_bio(struct bio *bio)
 			BUG_ON(stripe_index >= bbio->num_stripes);
 			dev = bbio->stripes[stripe_index].dev;
 			if (dev->bdev) {
-				if (bio->bi_rw & WRITE)
+				if (bio_op(bio) == REQ_OP_WRITE)
 					btrfs_dev_stat_inc(dev,
 						BTRFS_DEV_STAT_WRITE_ERRS);
 				else
@@ -5997,7 +6002,7 @@ static void btrfs_end_bio(struct bio *bio)
  */
 static noinline void btrfs_schedule_bio(struct btrfs_root *root,
 					struct btrfs_device *device,
-					int rw, struct bio *bio)
+					struct bio *bio)
 {
 	int should_queue = 1;
 	struct btrfs_pending_bios *pending_bios;
@@ -6008,9 +6013,9 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
 	}
 
 	/* don't bother with additional async steps for reads, right now */
-	if (!(rw & REQ_WRITE)) {
+	if (bio_op(bio) == REQ_OP_READ) {
 		bio_get(bio);
-		btrfsic_submit_bio(rw, bio);
+		btrfsic_submit_bio(bio);
 		bio_put(bio);
 		return;
 	}
@@ -6024,7 +6029,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
 	atomic_inc(&root->fs_info->nr_async_bios);
 	WARN_ON(bio->bi_next);
 	bio->bi_next = NULL;
-	bio->bi_rw |= rw;
 
 	spin_lock(&device->io_lock);
 	if (bio->bi_rw & REQ_SYNC)
@@ -6050,7 +6054,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
 
 static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
 			      struct bio *bio, u64 physical, int dev_nr,
-			      int rw, int async)
+			      int async)
 {
 	struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
 
@@ -6064,8 +6068,8 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
 
 		rcu_read_lock();
 		name = rcu_dereference(dev->name);
-		pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu "
-			 "(%s id %llu), size=%u\n", rw,
+		pr_debug("btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu "
+			 "(%s id %llu), size=%u\n", bio_op(bio), bio->bi_rw,
 			 (u64)bio->bi_iter.bi_sector, (u_long)dev->bdev->bd_dev,
 			 name->str, dev->devid, bio->bi_iter.bi_size);
 		rcu_read_unlock();
@@ -6076,9 +6080,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
 	btrfs_bio_counter_inc_noblocked(root->fs_info);
 
 	if (async)
-		btrfs_schedule_bio(root, dev, rw, bio);
+		btrfs_schedule_bio(root, dev, bio);
 	else
-		btrfsic_submit_bio(rw, bio);
+		btrfsic_submit_bio(bio);
 }
 
 static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
@@ -6095,7 +6099,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 	}
 }
 
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
+int btrfs_map_bio(struct btrfs_root *root, struct bio *bio,
 		  int mirror_num, int async_submit)
 {
 	struct btrfs_device *dev;
@@ -6112,8 +6116,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 	map_length = length;
 
 	btrfs_bio_counter_inc_blocked(root->fs_info);
-	ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
-			      mirror_num, 1);
+	ret = __btrfs_map_block(root->fs_info, bio_op(bio), logical,
+				&map_length, &bbio, mirror_num, 1);
 	if (ret) {
 		btrfs_bio_counter_dec(root->fs_info);
 		return ret;
@@ -6127,10 +6131,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 	atomic_set(&bbio->stripes_pending, bbio->num_stripes);
 
 	if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
-	    ((rw & WRITE) || (mirror_num > 1))) {
+	    ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
 		/* In this case, map_length has been set to the length of
 		   a single stripe; not the whole write */
-		if (rw & WRITE) {
+		if (bio_op(bio) == REQ_OP_WRITE) {
 			ret = raid56_parity_write(root, bio, bbio, map_length);
 		} else {
 			ret = raid56_parity_recover(root, bio, bbio, map_length,
@@ -6149,7 +6153,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 
 	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
 		dev = bbio->stripes[dev_nr].dev;
-		if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
+		if (!dev || !dev->bdev ||
+		    (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) {
 			bbio_error(bbio, first_bio, logical);
 			continue;
 		}
@@ -6161,7 +6166,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 			bio = first_bio;
 
 		submit_stripe_bio(root, bbio, bio,
-				  bbio->stripes[dev_nr].physical, dev_nr, rw,
+				  bbio->stripes[dev_nr].physical, dev_nr,
 				  async_submit);
 	}
 	btrfs_bio_counter_dec(root->fs_info);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 0ac90f8d85bd..6613e6335ca2 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -375,10 +375,10 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
 				   u64 end, u64 *length);
 void btrfs_get_bbio(struct btrfs_bio *bbio);
 void btrfs_put_bbio(struct btrfs_bio *bbio);
-int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
 		    u64 logical, u64 *length,
 		    struct btrfs_bio **bbio_ret, int mirror_num);
-int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op,
 		     u64 logical, u64 *length,
 		     struct btrfs_bio **bbio_ret, int mirror_num,
 		     int need_raid_map);
@@ -391,7 +391,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *extent_root, u64 type);
 void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
+int btrfs_map_bio(struct btrfs_root *root, struct bio *bio,
 		  int mirror_num, int async_submit);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       fmode_t flags, void *holder);
diff --git a/fs/buffer.c b/fs/buffer.c
index 754813a6962b..9c8eb9b6db6a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
 #include <linux/fs.h>
+#include <linux/iomap.h>
 #include <linux/mm.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
@@ -45,7 +46,7 @@
 #include <trace/events/block.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
-static int submit_bh_wbc(int rw, struct buffer_head *bh,
+static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 			 unsigned long bio_flags,
 			 struct writeback_control *wbc);
 
@@ -153,7 +154,7 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
 	if (uptodate) {
 		set_buffer_uptodate(bh);
 	} else {
-		/* This happens, due to failed READA attempts. */
+		/* This happens, due to failed read-ahead attempts. */
 		clear_buffer_uptodate(bh);
 	}
 	unlock_buffer(bh);
@@ -588,7 +589,7 @@ void write_boundary_block(struct block_device *bdev,
 	struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
 	if (bh) {
 		if (buffer_dirty(bh))
-			ll_rw_block(WRITE, 1, &bh);
+			ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
 		put_bh(bh);
 	}
 }
@@ -1225,7 +1226,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh)
 	} else {
 		get_bh(bh);
 		bh->b_end_io = end_buffer_read_sync;
-		submit_bh(READ, bh);
+		submit_bh(REQ_OP_READ, 0, bh);
 		wait_on_buffer(bh);
 		if (buffer_uptodate(bh))
 			return bh;
@@ -1395,7 +1396,7 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
 {
 	struct buffer_head *bh = __getblk(bdev, block, size);
 	if (likely(bh)) {
-		ll_rw_block(READA, 1, &bh);
+		ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
 		brelse(bh);
 	}
 }
@@ -1687,7 +1688,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode *
  * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
  * causes the writes to be flagged as synchronous writes.
  */
-static int __block_write_full_page(struct inode *inode, struct page *page,
+int __block_write_full_page(struct inode *inode, struct page *page,
 			get_block_t *get_block, struct writeback_control *wbc,
 			bh_end_io_t *handler)
 {
@@ -1697,7 +1698,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	struct buffer_head *bh, *head;
 	unsigned int blocksize, bbits;
 	int nr_underway = 0;
-	int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+	int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
 
 	head = create_page_buffers(page, inode,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1786,7 +1787,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh_wbc(write_op, bh, 0, wbc);
+			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -1840,7 +1841,7 @@ recover:
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh_wbc(write_op, bh, 0, wbc);
+			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -1848,6 +1849,7 @@ recover:
 	unlock_page(page);
 	goto done;
 }
+EXPORT_SYMBOL(__block_write_full_page);
 
 /*
  * If a page has any new buffers, zero them out here, and mark them uptodate
@@ -1891,8 +1893,62 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
 }
 EXPORT_SYMBOL(page_zero_new_buffers);
 
-int __block_write_begin(struct page *page, loff_t pos, unsigned len,
-		get_block_t *get_block)
+static void
+iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
+		struct iomap *iomap)
+{
+	loff_t offset = block << inode->i_blkbits;
+
+	bh->b_bdev = iomap->bdev;
+
+	/*
+	 * Block points to offset in file we need to map, iomap contains
+	 * the offset at which the map starts. If the map ends before the
+	 * current block, then do not map the buffer and let the caller
+	 * handle it.
+	 */
+	BUG_ON(offset >= iomap->offset + iomap->length);
+
+	switch (iomap->type) {
+	case IOMAP_HOLE:
+		/*
+		 * If the buffer is not up to date or beyond the current EOF,
+		 * we need to mark it as new to ensure sub-block zeroing is
+		 * executed if necessary.
+		 */
+		if (!buffer_uptodate(bh) ||
+		    (offset >= i_size_read(inode)))
+			set_buffer_new(bh);
+		break;
+	case IOMAP_DELALLOC:
+		if (!buffer_uptodate(bh) ||
+		    (offset >= i_size_read(inode)))
+			set_buffer_new(bh);
+		set_buffer_uptodate(bh);
+		set_buffer_mapped(bh);
+		set_buffer_delay(bh);
+		break;
+	case IOMAP_UNWRITTEN:
+		/*
+		 * For unwritten regions, we always need to ensure that
+		 * sub-block writes cause the regions in the block we are not
+		 * writing to are zeroed. Set the buffer as new to ensure this.
+		 */
+		set_buffer_new(bh);
+		set_buffer_unwritten(bh);
+		/* FALLTHRU */
+	case IOMAP_MAPPED:
+		if (offset >= i_size_read(inode))
+			set_buffer_new(bh);
+		bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
+				((offset - iomap->offset) >> inode->i_blkbits);
+		set_buffer_mapped(bh);
+		break;
+	}
+}
+
+int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
+		get_block_t *get_block, struct iomap *iomap)
 {
 	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + len;
@@ -1928,9 +1984,14 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 			clear_buffer_new(bh);
 		if (!buffer_mapped(bh)) {
 			WARN_ON(bh->b_size != blocksize);
-			err = get_block(inode, block, bh, 1);
-			if (err)
-				break;
+			if (get_block) {
+				err = get_block(inode, block, bh, 1);
+				if (err)
+					break;
+			} else {
+				iomap_to_bh(inode, block, bh, iomap);
+			}
+
 			if (buffer_new(bh)) {
 				unmap_underlying_metadata(bh->b_bdev,
 							bh->b_blocknr);
@@ -1955,7 +2016,7 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
 		    !buffer_unwritten(bh) &&
 		     (block_start < from || block_end > to)) {
-			ll_rw_block(READ, 1, &bh);
+			ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 			*wait_bh++=bh;
 		}
 	}
@@ -1971,6 +2032,12 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 		page_zero_new_buffers(page, from, to);
 	return err;
 }
+
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
+		get_block_t *get_block)
+{
+	return __block_write_begin_int(page, pos, len, get_block, NULL);
+}
 EXPORT_SYMBOL(__block_write_begin);
 
 static int __block_commit_write(struct inode *inode, struct page *page,
@@ -2248,7 +2315,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 		if (buffer_uptodate(bh))
 			end_buffer_async_read(bh, 1);
 		else
-			submit_bh(READ, bh);
+			submit_bh(REQ_OP_READ, 0, bh);
 	}
 	return 0;
 }
@@ -2582,7 +2649,7 @@ int nobh_write_begin(struct address_space *mapping,
 		if (block_start < from || block_end > to) {
 			lock_buffer(bh);
 			bh->b_end_io = end_buffer_read_nobh;
-			submit_bh(READ, bh);
+			submit_bh(REQ_OP_READ, 0, bh);
 			nr_reads++;
 		}
 	}
@@ -2852,7 +2919,7 @@ int block_truncate_page(struct address_space *mapping,
 
 	if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
 		err = -EIO;
-		ll_rw_block(READ, 1, &bh);
+		ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 		wait_on_buffer(bh);
 		/* Uhhuh. Read error. Complain and punt. */
 		if (!buffer_uptodate(bh))
@@ -2949,7 +3016,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
  * errors, this only handles the "we need to be able to
  * do IO at the final sector" case.
  */
-void guard_bio_eod(int rw, struct bio *bio)
+void guard_bio_eod(int op, struct bio *bio)
 {
 	sector_t maxsector;
 	struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
@@ -2979,13 +3046,13 @@ void guard_bio_eod(int rw, struct bio *bio)
 	bvec->bv_len -= truncated_bytes;
 
 	/* ..and clear the end of the buffer for reads */
-	if ((rw & RW_MASK) == READ) {
+	if (op == REQ_OP_READ) {
 		zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
 				truncated_bytes);
 	}
 }
 
-static int submit_bh_wbc(int rw, struct buffer_head *bh,
+static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 			 unsigned long bio_flags, struct writeback_control *wbc)
 {
 	struct bio *bio;
@@ -2999,7 +3066,7 @@ static int submit_bh_wbc(int rw, struct buffer_head *bh,
 	/*
 	 * Only clear out a write error when rewriting
 	 */
-	if (test_set_buffer_req(bh) && (rw & WRITE))
+	if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
 		clear_buffer_write_io_error(bh);
 
 	/*
@@ -3024,39 +3091,42 @@ static int submit_bh_wbc(int rw, struct buffer_head *bh,
 	bio->bi_flags |= bio_flags;
 
 	/* Take care of bh's that straddle the end of the device */
-	guard_bio_eod(rw, bio);
+	guard_bio_eod(op, bio);
 
 	if (buffer_meta(bh))
-		rw |= REQ_META;
+		op_flags |= REQ_META;
 	if (buffer_prio(bh))
-		rw |= REQ_PRIO;
+		op_flags |= REQ_PRIO;
+	bio_set_op_attrs(bio, op, op_flags);
 
-	submit_bio(rw, bio);
+	submit_bio(bio);
 	return 0;
 }
 
-int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
+int _submit_bh(int op, int op_flags, struct buffer_head *bh,
+	       unsigned long bio_flags)
 {
-	return submit_bh_wbc(rw, bh, bio_flags, NULL);
+	return submit_bh_wbc(op, op_flags, bh, bio_flags, NULL);
 }
 EXPORT_SYMBOL_GPL(_submit_bh);
 
-int submit_bh(int rw, struct buffer_head *bh)
+int submit_bh(int op, int op_flags,  struct buffer_head *bh)
 {
-	return submit_bh_wbc(rw, bh, 0, NULL);
+	return submit_bh_wbc(op, op_flags, bh, 0, NULL);
 }
 EXPORT_SYMBOL(submit_bh);
 
 /**
  * ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @op: whether to %READ or %WRITE
+ * @op_flags: rq_flag_bits
  * @nr: number of &struct buffer_heads in the array
  * @bhs: array of pointers to &struct buffer_head
  *
  * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
- * requests an I/O operation on them, either a %READ or a %WRITE.  The third
- * %READA option is described in the documentation for generic_make_request()
- * which ll_rw_block() calls.
+ * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
+ * @op_flags contains flags modifying the detailed I/O behavior, most notably
+ * %REQ_RAHEAD.
  *
  * This function drops any buffer that it cannot get a lock on (with the
  * BH_Lock state bit), any buffer that appears to be clean when doing a write
@@ -3072,7 +3142,7 @@ EXPORT_SYMBOL(submit_bh);
  * All of the buffers must be for the same device, and must also be a
  * multiple of the current approved size for the device.
  */
-void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
+void ll_rw_block(int op, int op_flags,  int nr, struct buffer_head *bhs[])
 {
 	int i;
 
@@ -3081,18 +3151,18 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 
 		if (!trylock_buffer(bh))
 			continue;
-		if (rw == WRITE) {
+		if (op == WRITE) {
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
 				get_bh(bh);
-				submit_bh(WRITE, bh);
+				submit_bh(op, op_flags, bh);
 				continue;
 			}
 		} else {
 			if (!buffer_uptodate(bh)) {
 				bh->b_end_io = end_buffer_read_sync;
 				get_bh(bh);
-				submit_bh(rw, bh);
+				submit_bh(op, op_flags, bh);
 				continue;
 			}
 		}
@@ -3101,7 +3171,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 }
 EXPORT_SYMBOL(ll_rw_block);
 
-void write_dirty_buffer(struct buffer_head *bh, int rw)
+void write_dirty_buffer(struct buffer_head *bh, int op_flags)
 {
 	lock_buffer(bh);
 	if (!test_clear_buffer_dirty(bh)) {
@@ -3110,7 +3180,7 @@ void write_dirty_buffer(struct buffer_head *bh, int rw)
 	}
 	bh->b_end_io = end_buffer_write_sync;
 	get_bh(bh);
-	submit_bh(rw, bh);
+	submit_bh(REQ_OP_WRITE, op_flags, bh);
 }
 EXPORT_SYMBOL(write_dirty_buffer);
 
@@ -3119,7 +3189,7 @@ EXPORT_SYMBOL(write_dirty_buffer);
  * and then start new I/O and then wait upon it.  The caller must have a ref on
  * the buffer_head.
  */
-int __sync_dirty_buffer(struct buffer_head *bh, int rw)
+int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
 {
 	int ret = 0;
 
@@ -3128,7 +3198,7 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw)
 	if (test_clear_buffer_dirty(bh)) {
 		get_bh(bh);
 		bh->b_end_io = end_buffer_write_sync;
-		ret = submit_bh(rw, bh);
+		ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
 		wait_on_buffer(bh);
 		if (!ret && !buffer_uptodate(bh))
 			ret = -EIO;
@@ -3391,7 +3461,7 @@ int bh_submit_read(struct buffer_head *bh)
 
 	get_bh(bh);
 	bh->b_end_io = end_buffer_read_sync;
-	submit_bh(READ, bh);
+	submit_bh(REQ_OP_READ, 0, bh);
 	wait_on_buffer(bh);
 	if (buffer_uptodate(bh))
 		return 0;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 6e72c98162d5..1780218a48f0 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
 	}
 
 	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		iput(inode);
+	if (IS_ERR(dentry))
 		return dentry;
-	}
 	err = ceph_init_dentry(dentry);
 	if (err < 0) {
 		dput(dentry);
@@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb,
 		return ERR_PTR(-ENOENT);
 
 	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		iput(inode);
+	if (IS_ERR(dentry))
 		return dentry;
-	}
 	err = ceph_init_dentry(dentry);
 	if (err < 0) {
 		dput(dentry);
@@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
 
 	dout("fh_to_parent %llx\n", cfh->parent_ino);
 	dentry = __get_parent(sb, NULL, cfh->ino);
-	if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+	if (unlikely(dentry == ERR_PTR(-ENOENT)))
 		dentry = __fh_to_dentry(sb, cfh->parent_ino);
 	return dentry;
 }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ce2f5795e44b..0daaf7ceedc5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -394,7 +394,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
 		err = ceph_handle_notrace_create(dir, dentry);
 
-	if (d_unhashed(dentry)) {
+	if (d_in_lookup(dentry)) {
 		dn = ceph_finish_lookup(req, dentry, err);
 		if (IS_ERR(dn))
 			err = PTR_ERR(dn);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 687471dc04a0..6edd825231c5 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -92,7 +92,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 		}
 
 		if (i < CHRDEV_MAJOR_DYN_END)
-			pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range",
+			pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range\n",
 				name, i);
 
 		if (i == 0) {
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 5a53ac6b1e02..02b071bf3732 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target)
 	case SFM_SLASH:
 		*target = '\\';
 		break;
+	case SFM_SPACE:
+		*target = ' ';
+		break;
+	case SFM_PERIOD:
+		*target = '.';
+		break;
 	default:
 		return false;
 	}
@@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char)
 	return dest_char;
 }
 
-static __le16 convert_to_sfm_char(char src_char)
+static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
 {
 	__le16 dest_char;
 
@@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char)
 	case '|':
 		dest_char = cpu_to_le16(SFM_PIPE);
 		break;
+	case '.':
+		if (end_of_string)
+			dest_char = cpu_to_le16(SFM_PERIOD);
+		else
+			dest_char = 0;
+		break;
+	case ' ':
+		if (end_of_string)
+			dest_char = cpu_to_le16(SFM_SPACE);
+		else
+			dest_char = 0;
+		break;
 	default:
 		dest_char = 0;
 	}
@@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 		/* see if we must remap this char */
 		if (map_chars == SFU_MAP_UNI_RSVD)
 			dst_char = convert_to_sfu_char(src_char);
-		else if (map_chars == SFM_MAP_UNI_RSVD)
-			dst_char = convert_to_sfm_char(src_char);
-		else
+		else if (map_chars == SFM_MAP_UNI_RSVD) {
+			bool end_of_string;
+
+			if (i == srclen - 1)
+				end_of_string = true;
+			else
+				end_of_string = false;
+
+			dst_char = convert_to_sfm_char(src_char, end_of_string);
+		} else
 			dst_char = 0;
 		/*
 		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index bdc52cb9a676..479bc0a941f3 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -64,6 +64,8 @@
 #define SFM_LESSTHAN    ((__u16) 0xF023)
 #define SFM_PIPE        ((__u16) 0xF027)
 #define SFM_SLASH       ((__u16) 0xF026)
+#define SFM_PERIOD	((__u16) 0xF028)
+#define SFM_SPACE	((__u16) 0xF029)
 
 /*
  * Mapping mechanism to use when one of the seven reserved characters is
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5d8b7edf8a8f..5d841f39c4b7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
 
 struct workqueue_struct	*cifsiod_wq;
+__u32 cifs_lock_secret;
 
 /*
  * Bumps refcount for cifs super block.
@@ -1266,6 +1267,8 @@ init_cifs(void)
 	spin_lock_init(&cifs_file_list_lock);
 	spin_lock_init(&GlobalMid_Lock);
 
+	get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
+
 	if (cifs_max_pending < 2) {
 		cifs_max_pending = 2;
 		cifs_dbg(FYI, "cifs_max_pending set to min of 2\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bba106cdc43c..8f1d8c1e72be 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1619,6 +1619,7 @@ void cifs_oplock_break(struct work_struct *work);
 
 extern const struct slow_work_ops cifs_oplock_break_ops;
 extern struct workqueue_struct *cifsiod_wq;
+extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_mid_poolp;
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 66736f57b5ab..7d2b15c06090 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work)
 	 * server->ops->need_neg() == true. Also, no need to ping if
 	 * we got a response recently.
 	 */
-	if (!server->ops->need_neg || server->ops->need_neg(server) ||
+
+	if (server->tcpStatus == CifsNeedReconnect ||
+	    server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew ||
 	    (server->ops->can_echo && !server->ops->can_echo(server)) ||
 	    time_before(jiffies, server->lstrp + echo_interval - HZ))
 		goto requeue_echo;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index c3eb998a99bd..fb0903fffc22 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -445,7 +445,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 		 * Check for hashed negative dentry. We have already revalidated
 		 * the dentry and it is fine. No need to perform another lookup.
 		 */
-		if (!d_unhashed(direntry))
+		if (!d_in_lookup(direntry))
 			return -ENOENT;
 
 		res = cifs_lookup(inode, direntry, 0);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9793ae0bcaa2..579e41b350a2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1112,6 +1112,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	return rc;
 }
 
+static __u32
+hash_lockowner(fl_owner_t owner)
+{
+	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
+}
+
 struct lock_to_push {
 	struct list_head llist;
 	__u64 offset;
@@ -1178,7 +1184,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
 		else
 			type = CIFS_WRLCK;
 		lck = list_entry(el, struct lock_to_push, llist);
-		lck->pid = flock->fl_pid;
+		lck->pid = hash_lockowner(flock->fl_owner);
 		lck->netfid = cfile->fid.netfid;
 		lck->length = length;
 		lck->type = type;
@@ -1305,7 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
 			posix_lock_type = CIFS_RDLCK;
 		else
 			posix_lock_type = CIFS_WRLCK;
-		rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
+		rc = CIFSSMBPosixLock(xid, tcon, netfid,
+				      hash_lockowner(flock->fl_owner),
 				      flock->fl_start, length, flock,
 				      posix_lock_type, wait_flag);
 		return rc;
@@ -1505,7 +1512,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
 			posix_lock_type = CIFS_UNLCK;
 
 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
-				      current->tgid, flock->fl_start, length,
+				      hash_lockowner(flock->fl_owner),
+				      flock->fl_start, length,
 				      NULL, posix_lock_type, wait_flag);
 		goto out;
 	}
@@ -3358,7 +3366,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
 	struct page *page, *tpage;
 	unsigned int expected_index;
 	int rc;
-	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
+	gfp_t gfp = readahead_gfp_mask(mapping);
 
 	INIT_LIST_HEAD(tmplist);
 
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 848249fa120f..3079b38f0afb 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE {
 
 int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
 void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
-int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen,
+int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
 			struct cifs_ses *ses,
 			const struct nls_table *nls_cp);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index af0ec2d5ad0e..538d9b55699a 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
 	sec_blob->DomainName.MaximumLength = 0;
 }
 
-/* We do not malloc the blob, it is passed in pbuffer, because its
-   maximum possible size is fixed and small, making this approach cleaner.
-   This function returns the length of the data in the blob */
-int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+static int size_of_ntlmssp_blob(struct cifs_ses *ses)
+{
+	int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
+		- CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
+
+	if (ses->domainName)
+		sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+	else
+		sz += 2;
+
+	if (ses->user_name)
+		sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+	else
+		sz += 2;
+
+	return sz;
+}
+
+int build_ntlmssp_auth_blob(unsigned char **pbuffer,
 					u16 *buflen,
 				   struct cifs_ses *ses,
 				   const struct nls_table *nls_cp)
 {
 	int rc;
-	AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
+	AUTHENTICATE_MESSAGE *sec_blob;
 	__u32 flags;
 	unsigned char *tmp;
 
+	rc = setup_ntlmv2_rsp(ses, nls_cp);
+	if (rc) {
+		cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+		*buflen = 0;
+		goto setup_ntlmv2_ret;
+	}
+	*pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+	sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
+
 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
 	sec_blob->MessageType = NtLmAuthenticate;
 
@@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 			flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
 	}
 
-	tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
+	tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
 	sec_blob->NegotiateFlags = cpu_to_le32(flags);
 
 	sec_blob->LmChallengeResponse.BufferOffset =
@@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->LmChallengeResponse.Length = 0;
 	sec_blob->LmChallengeResponse.MaximumLength = 0;
 
-	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->NtChallengeResponse.BufferOffset =
+				cpu_to_le32(tmp - *pbuffer);
 	if (ses->user_name != NULL) {
-		rc = setup_ntlmv2_rsp(ses, nls_cp);
-		if (rc) {
-			cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
-			goto setup_ntlmv2_ret;
-		}
 		memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
 				ses->auth_key.len - CIFS_SESS_KEY_SIZE);
 		tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
@@ -423,23 +443,23 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	}
 
 	if (ses->domainName == NULL) {
-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->DomainName.Length = 0;
 		sec_blob->DomainName.MaximumLength = 0;
 		tmp += 2;
 	} else {
 		int len;
 		len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
-				      CIFS_MAX_USERNAME_LEN, nls_cp);
+				      CIFS_MAX_DOMAINNAME_LEN, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->DomainName.Length = cpu_to_le16(len);
 		sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
 		tmp += len;
 	}
 
 	if (ses->user_name == NULL) {
-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->UserName.Length = 0;
 		sec_blob->UserName.MaximumLength = 0;
 		tmp += 2;
@@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
 				      CIFS_MAX_USERNAME_LEN, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->UserName.Length = cpu_to_le16(len);
 		sec_blob->UserName.MaximumLength = cpu_to_le16(len);
 		tmp += len;
 	}
 
-	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 	sec_blob->WorkstationName.Length = 0;
 	sec_blob->WorkstationName.MaximumLength = 0;
 	tmp += 2;
@@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		(ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
 			&& !calc_seckey(ses)) {
 		memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
 		sec_blob->SessionKey.MaximumLength =
 				cpu_to_le16(CIFS_CPHTXT_SIZE);
 		tmp += CIFS_CPHTXT_SIZE;
 	} else {
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->SessionKey.Length = 0;
 		sec_blob->SessionKey.MaximumLength = 0;
 	}
 
+	*buflen = tmp - *pbuffer;
 setup_ntlmv2_ret:
-	*buflen = tmp - pbuffer;
 	return rc;
 }
 
@@ -690,6 +710,8 @@ sess_auth_lanman(struct sess_data *sess_data)
 		rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
 				      ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
 				      true : false, lnm_session_key);
+		if (rc)
+			goto out;
 
 		memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
 		bcc_ptr += CIFS_AUTH_RESP_SIZE;
@@ -1266,7 +1288,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
 	struct cifs_ses *ses = sess_data->ses;
 	__u16 bytes_remaining;
 	char *bcc_ptr;
-	char *ntlmsspblob = NULL;
+	unsigned char *ntlmsspblob = NULL;
 	u16 blob_len;
 
 	cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
@@ -1279,19 +1301,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
 	/* Build security blob before we assemble the request */
 	pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
 	smb_buf = (struct smb_hdr *)pSMB;
-	/*
-	 * 5 is an empirical value, large enough to hold
-	 * authenticate message plus max 10 of av paris,
-	 * domain, user, workstation names, flags, etc.
-	 */
-	ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
-				GFP_KERNEL);
-	if (!ntlmsspblob) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	rc = build_ntlmssp_auth_blob(ntlmsspblob,
+	rc = build_ntlmssp_auth_blob(&ntlmsspblob,
 					&blob_len, ses, sess_data->nls_cp);
 	if (rc)
 		goto out_free_ntlmsspblob;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f38e33d365b..29e06db5f187 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
 	u16 blob_length = 0;
 	struct key *spnego_key = NULL;
 	char *security_blob = NULL;
-	char *ntlmssp_blob = NULL;
+	unsigned char *ntlmssp_blob = NULL;
 	bool use_spnego = false; /* else use raw ntlmssp */
 
 	cifs_dbg(FYI, "Session Setup\n");
@@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate:
 		iov[1].iov_len = blob_length;
 	} else if (phase == NtLmAuthenticate) {
 		req->hdr.SessionId = ses->Suid;
-		ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
-				       GFP_KERNEL);
-		if (ntlmssp_blob == NULL) {
-			rc = -ENOMEM;
-			goto ssetup_exit;
-		}
-		rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses,
+		rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
 					     nls_cp);
 		if (rc) {
 			cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
@@ -1818,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server)
 
 	cifs_dbg(FYI, "In echo request\n");
 
+	if (server->tcpStatus == CifsNeedNegotiate) {
+		struct list_head *tmp, *tmp2;
+		struct cifs_ses *ses;
+		struct cifs_tcon *tcon;
+
+		cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+		spin_lock(&cifs_tcp_ses_lock);
+		list_for_each(tmp, &server->smb_ses_list) {
+			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+			list_for_each(tmp2, &ses->tcon_list) {
+				tcon = list_entry(tmp2, struct cifs_tcon,
+						  tcon_list);
+				/* add check for persistent handle reconnect */
+				if (tcon && tcon->need_reconnect) {
+					spin_unlock(&cifs_tcp_ses_lock);
+					rc = smb2_reconnect(SMB2_ECHO, tcon);
+					spin_lock(&cifs_tcp_ses_lock);
+				}
+			}
+		}
+		spin_unlock(&cifs_tcp_ses_lock);
+	}
+
+	/* if no session, renegotiate failed above */
+	if (server->tcpStatus == CifsNeedNegotiate)
+		return -EIO;
+
 	rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
 	if (rc)
 		return rc;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index bd01b92aad98..c1e9f29c924c 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -57,6 +57,7 @@
 #include <linux/i2c-dev.h>
 #include <linux/atalk.h>
 #include <linux/gfp.h>
+#include <linux/cec.h>
 
 #include "internal.h"
 
@@ -1377,6 +1378,17 @@ COMPATIBLE_IOCTL(VIDEO_GET_NAVI)
 COMPATIBLE_IOCTL(VIDEO_SET_ATTRIBUTES)
 COMPATIBLE_IOCTL(VIDEO_GET_SIZE)
 COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE)
+/* cec */
+COMPATIBLE_IOCTL(CEC_ADAP_G_CAPS)
+COMPATIBLE_IOCTL(CEC_ADAP_G_LOG_ADDRS)
+COMPATIBLE_IOCTL(CEC_ADAP_S_LOG_ADDRS)
+COMPATIBLE_IOCTL(CEC_ADAP_G_PHYS_ADDR)
+COMPATIBLE_IOCTL(CEC_ADAP_S_PHYS_ADDR)
+COMPATIBLE_IOCTL(CEC_G_MODE)
+COMPATIBLE_IOCTL(CEC_S_MODE)
+COMPATIBLE_IOCTL(CEC_TRANSMIT)
+COMPATIBLE_IOCTL(CEC_RECEIVE)
+COMPATIBLE_IOCTL(CEC_DQEVENT)
 
 /* joystick */
 COMPATIBLE_IOCTL(JSIOCGVERSION)
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 33b7ee34eda5..bbc1252a59f5 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -357,8 +357,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf,
 
 	len = simple_write_to_buffer(buffer->bin_buffer,
 			buffer->bin_buffer_size, ppos, buf, count);
-	if (len > 0)
-		*ppos += len;
 out:
 	mutex_unlock(&buffer->mutex);
 	return len;
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 2fc8c43ce531..c502c116924c 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -318,6 +318,7 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
 		bio->bi_bdev = inode->i_sb->s_bdev;
 		bio->bi_iter.bi_sector =
 			pblk << (inode->i_sb->s_blocksize_bits - 9);
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 		ret = bio_add_page(bio, ciphertext_page,
 					inode->i_sb->s_blocksize, 0);
 		if (ret != inode->i_sb->s_blocksize) {
@@ -327,7 +328,7 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
 			err = -EIO;
 			goto errout;
 		}
-		err = submit_bio_wait(WRITE, bio);
+		err = submit_bio_wait(bio);
 		if ((err == 0) && bio->bi_error)
 			err = -EIO;
 		bio_put(bio);
diff --git a/fs/dax.c b/fs/dax.c
index 761495bf5eb9..432b9e6dd63b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 				dax.addr += first;
 				size = map_len - first;
 			}
-			max = min(pos + size, end);
+			/*
+			 * pos + size is one past the last offset for IO,
+			 * so pos + size can overflow loff_t at extreme offsets.
+			 * Cast to u64 to catch this and get the true minimum.
+			 */
+			max = min_t(u64, pos + size, end);
 		}
 
 		if (iov_iter_rw(iter) == WRITE) {
@@ -814,16 +819,16 @@ static int dax_insert_mapping(struct address_space *mapping,
 }
 
 /**
- * __dax_fault - handle a page fault on a DAX file
+ * dax_fault - handle a page fault on a DAX file
  * @vma: The virtual memory area where the fault occurred
  * @vmf: The description of the fault
  * @get_block: The filesystem method used to translate file offsets to blocks
  *
  * When a page fault occurs, filesystems may call this helper in their
- * fault handler for DAX files. __dax_fault() assumes the caller has done all
+ * fault handler for DAX files. dax_fault() assumes the caller has done all
  * the necessary locking for the page fault to proceed successfully.
  */
-int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			get_block_t get_block)
 {
 	struct file *file = vma->vm_file;
@@ -908,33 +913,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		return VM_FAULT_SIGBUS | major;
 	return VM_FAULT_NOPAGE | major;
 }
-EXPORT_SYMBOL(__dax_fault);
-
-/**
- * dax_fault - handle a page fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * When a page fault occurs, filesystems may call this helper in their
- * fault handler for DAX files.
- */
-int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-	      get_block_t get_block)
-{
-	int result;
-	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
-
-	if (vmf->flags & FAULT_FLAG_WRITE) {
-		sb_start_pagefault(sb);
-		file_update_time(vma->vm_file);
-	}
-	result = __dax_fault(vma, vmf, get_block);
-	if (vmf->flags & FAULT_FLAG_WRITE)
-		sb_end_pagefault(sb);
-
-	return result;
-}
 EXPORT_SYMBOL_GPL(dax_fault);
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE)
@@ -962,7 +940,16 @@ static void __dax_dbg(struct buffer_head *bh, unsigned long address,
 
 #define dax_pmd_dbg(bh, address, reason)	__dax_dbg(bh, address, reason, "dax_pmd")
 
-int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
+/**
+ * dax_pmd_fault - handle a PMD fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @get_block: The filesystem method used to translate file offsets to blocks
+ *
+ * When a page fault occurs, filesystems may call this helper in their
+ * pmd_fault handler for DAX files.
+ */
+int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 		pmd_t *pmd, unsigned int flags, get_block_t get_block)
 {
 	struct file *file = vma->vm_file;
@@ -1114,7 +1101,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 		 *
 		 * The PMD path doesn't have an equivalent to
 		 * dax_pfn_mkwrite(), though, so for a read followed by a
-		 * write we traverse all the way through __dax_pmd_fault()
+		 * write we traverse all the way through dax_pmd_fault()
 		 * twice.  This means we can just skip inserting a radix tree
 		 * entry completely on the initial read and just wait until
 		 * the write to insert a dirty entry.
@@ -1143,33 +1130,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	result = VM_FAULT_FALLBACK;
 	goto out;
 }
-EXPORT_SYMBOL_GPL(__dax_pmd_fault);
-
-/**
- * dax_pmd_fault - handle a PMD fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * When a page fault occurs, filesystems may call this helper in their
- * pmd_fault handler for DAX files.
- */
-int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
-			pmd_t *pmd, unsigned int flags, get_block_t get_block)
-{
-	int result;
-	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
-
-	if (flags & FAULT_FLAG_WRITE) {
-		sb_start_pagefault(sb);
-		file_update_time(vma->vm_file);
-	}
-	result = __dax_pmd_fault(vma, address, pmd, flags, get_block);
-	if (flags & FAULT_FLAG_WRITE)
-		sb_end_pagefault(sb);
-
-	return result;
-}
 EXPORT_SYMBOL_GPL(dax_pmd_fault);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 817c243c1ff1..d6847d7b123d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
+static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
+{
+	struct dentry *next;
+	/*
+	 * Inform d_walk() and shrink_dentry_list() that we are no longer
+	 * attached to the dentry tree
+	 */
+	dentry->d_flags |= DCACHE_DENTRY_KILLED;
+	if (unlikely(list_empty(&dentry->d_child)))
+		return;
+	__list_del_entry(&dentry->d_child);
+	/*
+	 * Cursors can move around the list of children.  While we'd been
+	 * a normal list member, it didn't matter - ->d_child.next would've
+	 * been updated.  However, from now on it won't be and for the
+	 * things like d_walk() it might end up with a nasty surprise.
+	 * Normally d_walk() doesn't care about cursors moving around -
+	 * ->d_lock on parent prevents that and since a cursor has no children
+	 * of its own, we get through it without ever unlocking the parent.
+	 * There is one exception, though - if we ascend from a child that
+	 * gets killed as soon as we unlock it, the next sibling is found
+	 * using the value left in its ->d_child.next.  And if _that_
+	 * pointed to a cursor, and cursor got moved (e.g. by lseek())
+	 * before d_walk() regains parent->d_lock, we'll end up skipping
+	 * everything the cursor had been moved past.
+	 *
+	 * Solution: make sure that the pointer left behind in ->d_child.next
+	 * points to something that won't be moving around.  I.e. skip the
+	 * cursors.
+	 */
+	while (dentry->d_child.next != &parent->d_subdirs) {
+		next = list_entry(dentry->d_child.next, struct dentry, d_child);
+		if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
+			break;
+		dentry->d_child.next = next->d_child.next;
+	}
+}
+
 static void __dentry_kill(struct dentry *dentry)
 {
 	struct dentry *parent = NULL;
@@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry)
 	}
 	/* if it was on the hash then remove it */
 	__d_drop(dentry);
-	__list_del_entry(&dentry->d_child);
-	/*
-	 * Inform d_walk() that we are no longer attached to the
-	 * dentry tree
-	 */
-	dentry->d_flags |= DCACHE_DENTRY_KILLED;
+	dentry_unlist(dentry, parent);
 	if (parent)
 		spin_unlock(&parent->d_lock);
 	dentry_iput(dentry);
@@ -1203,6 +1236,9 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 		next = tmp->next;
 
+		if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
+			continue;
+
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 
 		ret = enter(data, dentry);
@@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+struct dentry *d_alloc_cursor(struct dentry * parent)
+{
+	struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+	if (dentry) {
+		dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+		dentry->d_parent = dget(parent);
+	}
+	return dentry;
+}
+
 /**
  * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
  * @sb: the superblock
@@ -2457,7 +2503,6 @@ retry:
 		rcu_read_unlock();
 		goto retry;
 	}
-	rcu_read_unlock();
 	/*
 	 * No changes for the parent since the beginning of d_lookup().
 	 * Since all removals from the chain happen with hlist_bl_lock(),
@@ -2470,8 +2515,6 @@ retry:
 			continue;
 		if (dentry->d_parent != parent)
 			continue;
-		if (d_unhashed(dentry))
-			continue;
 		if (parent->d_flags & DCACHE_OP_COMPARE) {
 			int tlen = dentry->d_name.len;
 			const char *tname = dentry->d_name.name;
@@ -2483,9 +2526,18 @@ retry:
 			if (dentry_cmp(dentry, str, len))
 				continue;
 		}
-		dget(dentry);
 		hlist_bl_unlock(b);
-		/* somebody is doing lookup for it right now; wait for it */
+		/* now we can try to grab a reference */
+		if (!lockref_get_not_dead(&dentry->d_lockref)) {
+			rcu_read_unlock();
+			goto retry;
+		}
+
+		rcu_read_unlock();
+		/*
+		 * somebody is likely to be still doing lookup for it;
+		 * wait for them to finish
+		 */
 		spin_lock(&dentry->d_lock);
 		d_wait_lookup(dentry);
 		/*
@@ -2516,6 +2568,7 @@ retry:
 		dput(new);
 		return dentry;
 	}
+	rcu_read_unlock();
 	/* we can't take ->d_lock here; it's OK, though. */
 	new->d_flags |= DCACHE_PAR_LOOKUP;
 	new->d_wait = wq;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f3b4408be590..7c3ce73cb617 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -108,7 +108,8 @@ struct dio_submit {
 /* dio_state communicated between submission path and end_io */
 struct dio {
 	int flags;			/* doesn't change */
-	int rw;
+	int op;
+	int op_flags;
 	blk_qc_t bio_cookie;
 	struct block_device *bio_bdev;
 	struct inode *inode;
@@ -163,7 +164,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
 				&sdio->from);
 
-	if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
+	if (ret < 0 && sdio->blocks_available && (dio->op == REQ_OP_WRITE)) {
 		struct page *page = ZERO_PAGE(0);
 		/*
 		 * A memory fault, but the filesystem has some outstanding
@@ -242,7 +243,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
 		transferred = dio->result;
 
 		/* Check for short read case */
-		if ((dio->rw == READ) && ((offset + transferred) > dio->i_size))
+		if ((dio->op == REQ_OP_READ) &&
+		    ((offset + transferred) > dio->i_size))
 			transferred = dio->i_size - offset;
 	}
 
@@ -273,7 +275,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
 		 */
 		dio->iocb->ki_pos += transferred;
 
-		if (dio->rw & WRITE)
+		if (dio->op == REQ_OP_WRITE)
 			ret = generic_write_sync(dio->iocb,  transferred);
 		dio->iocb->ki_complete(dio->iocb, ret, 0);
 	}
@@ -375,6 +377,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 
 	bio->bi_bdev = bdev;
 	bio->bi_iter.bi_sector = first_sector;
+	bio_set_op_attrs(bio, dio->op, dio->op_flags);
 	if (dio->is_async)
 		bio->bi_end_io = dio_bio_end_aio;
 	else
@@ -402,17 +405,16 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 	dio->refcount++;
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
-	if (dio->is_async && dio->rw == READ && dio->should_dirty)
+	if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty)
 		bio_set_pages_dirty(bio);
 
 	dio->bio_bdev = bio->bi_bdev;
 
 	if (sdio->submit_io) {
-		sdio->submit_io(dio->rw, bio, dio->inode,
-			       sdio->logical_offset_in_bio);
+		sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio);
 		dio->bio_cookie = BLK_QC_T_NONE;
 	} else
-		dio->bio_cookie = submit_bio(dio->rw, bio);
+		dio->bio_cookie = submit_bio(bio);
 
 	sdio->bio = NULL;
 	sdio->boundary = 0;
@@ -478,14 +480,14 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 	if (bio->bi_error)
 		dio->io_error = -EIO;
 
-	if (dio->is_async && dio->rw == READ && dio->should_dirty) {
+	if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
 		err = bio->bi_error;
 		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
 		bio_for_each_segment_all(bvec, bio, i) {
 			struct page *page = bvec->bv_page;
 
-			if (dio->rw == READ && !PageCompound(page) &&
+			if (dio->op == REQ_OP_READ && !PageCompound(page) &&
 					dio->should_dirty)
 				set_page_dirty_lock(page);
 			put_page(page);
@@ -638,7 +640,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
 		 * which may decide to handle it or also return an unmapped
 		 * buffer head.
 		 */
-		create = dio->rw & WRITE;
+		create = dio->op == REQ_OP_WRITE;
 		if (dio->flags & DIO_SKIP_HOLES) {
 			if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
 							i_blkbits))
@@ -788,7 +790,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 {
 	int ret = 0;
 
-	if (dio->rw & WRITE) {
+	if (dio->op == REQ_OP_WRITE) {
 		/*
 		 * Read accounting is performed in submit_bio()
 		 */
@@ -988,7 +990,7 @@ do_holes:
 				loff_t i_size_aligned;
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
-				if (dio->rw & WRITE) {
+				if (dio->op == REQ_OP_WRITE) {
 					put_page(page);
 					return -ENOTBLK;
 				}
@@ -1202,7 +1204,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		dio->is_async = true;
 
 	dio->inode = inode;
-	dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ;
+	if (iov_iter_rw(iter) == WRITE) {
+		dio->op = REQ_OP_WRITE;
+		dio->op_flags = WRITE_ODIRECT;
+	} else {
+		dio->op = REQ_OP_READ;
+	}
 
 	/*
 	 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 1669f6291c95..df955d2209ce 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -73,6 +73,7 @@ struct dlm_cluster {
 	unsigned int cl_toss_secs;
 	unsigned int cl_scan_secs;
 	unsigned int cl_log_debug;
+	unsigned int cl_log_info;
 	unsigned int cl_protocol;
 	unsigned int cl_timewarn_cs;
 	unsigned int cl_waitwarn_us;
@@ -95,6 +96,7 @@ enum {
 	CLUSTER_ATTR_TOSS_SECS,
 	CLUSTER_ATTR_SCAN_SECS,
 	CLUSTER_ATTR_LOG_DEBUG,
+	CLUSTER_ATTR_LOG_INFO,
 	CLUSTER_ATTR_PROTOCOL,
 	CLUSTER_ATTR_TIMEWARN_CS,
 	CLUSTER_ATTR_WAITWARN_US,
@@ -166,6 +168,7 @@ CLUSTER_ATTR(recover_timer, 1);
 CLUSTER_ATTR(toss_secs, 1);
 CLUSTER_ATTR(scan_secs, 1);
 CLUSTER_ATTR(log_debug, 0);
+CLUSTER_ATTR(log_info, 0);
 CLUSTER_ATTR(protocol, 0);
 CLUSTER_ATTR(timewarn_cs, 1);
 CLUSTER_ATTR(waitwarn_us, 0);
@@ -180,6 +183,7 @@ static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs,
 	[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs,
 	[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug,
+	[CLUSTER_ATTR_LOG_INFO] = &cluster_attr_log_info,
 	[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol,
 	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs,
 	[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us,
@@ -365,6 +369,7 @@ static struct config_group *make_cluster(struct config_group *g,
 	cl->cl_toss_secs = dlm_config.ci_toss_secs;
 	cl->cl_scan_secs = dlm_config.ci_scan_secs;
 	cl->cl_log_debug = dlm_config.ci_log_debug;
+	cl->cl_log_info = dlm_config.ci_log_info;
 	cl->cl_protocol = dlm_config.ci_protocol;
 	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
 	cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
@@ -850,6 +855,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_TOSS_SECS         10
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
+#define DEFAULT_LOG_INFO           1
 #define DEFAULT_PROTOCOL           0
 #define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 #define DEFAULT_WAITWARN_US	   0
@@ -865,6 +871,7 @@ struct dlm_config_info dlm_config = {
 	.ci_toss_secs = DEFAULT_TOSS_SECS,
 	.ci_scan_secs = DEFAULT_SCAN_SECS,
 	.ci_log_debug = DEFAULT_LOG_DEBUG,
+	.ci_log_info = DEFAULT_LOG_INFO,
 	.ci_protocol = DEFAULT_PROTOCOL,
 	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
 	.ci_waitwarn_us = DEFAULT_WAITWARN_US,
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index f30697bc2780..6041eec886ab 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -31,6 +31,7 @@ struct dlm_config_info {
 	int ci_toss_secs;
 	int ci_scan_secs;
 	int ci_log_debug;
+	int ci_log_info;
 	int ci_protocol;
 	int ci_timewarn_cs;
 	int ci_waitwarn_us;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 5eff6ea3e27f..216b61604ef9 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -65,8 +65,16 @@ struct dlm_mhandle;
 	printk(KERN_ERR "dlm: "fmt"\n" , ##args)
 #define log_error(ls, fmt, args...) \
 	printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
+
 #define log_rinfo(ls, fmt, args...) \
-	printk(KERN_INFO "dlm: %s: " fmt "\n", (ls)->ls_name , ##args);
+do { \
+	if (dlm_config.ci_log_info) \
+		printk(KERN_INFO "dlm: %s: " fmt "\n", \
+			(ls)->ls_name, ##args); \
+	else if (dlm_config.ci_log_debug) \
+		printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
+		       (ls)->ls_name , ##args); \
+} while (0)
 
 #define log_debug(ls, fmt, args...) \
 do { \
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 1ab012a27d9f..963016c8f3d1 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1279,10 +1279,9 @@ static void init_local(void)
 		if (dlm_our_addr(&sas, i))
 			break;
 
-		addr = kmalloc(sizeof(*addr), GFP_NOFS);
+		addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS);
 		if (!addr)
 			break;
-		memcpy(addr, &sas, sizeof(*addr));
 		dlm_local_addr[dlm_local_count++] = addr;
 	}
 }
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 0d8eb3455b34..e5e29f8c920b 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -45,7 +45,7 @@
  * ecryptfs_to_hex
  * @dst: Buffer to take hex character representation of contents of
  *       src; must be at least of size (src_size * 2)
- * @src: Buffer to be converted to a hex string respresentation
+ * @src: Buffer to be converted to a hex string representation
  * @src_size: number of bytes to convert
  */
 void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
@@ -60,7 +60,7 @@ void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
  * ecryptfs_from_hex
  * @dst: Buffer to take the bytes from src hex; must be at least of
  *       size (src_size / 2)
- * @src: Buffer to be converted from a hex string respresentation to raw value
+ * @src: Buffer to be converted from a hex string representation to raw value
  * @dst_size: size of dst buffer, or number of hex characters pairs to convert
  */
 void ecryptfs_from_hex(char *dst, char *src, int dst_size)
@@ -953,7 +953,7 @@ struct ecryptfs_cipher_code_str_map_elem {
 };
 
 /* Add support for additional ciphers by adding elements here. The
- * cipher_code is whatever OpenPGP applicatoins use to identify the
+ * cipher_code is whatever OpenPGP applications use to identify the
  * ciphers. List in order of probability. */
 static struct ecryptfs_cipher_code_str_map_elem
 ecryptfs_cipher_code_str_map[] = {
@@ -1410,7 +1410,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry,
  *
  * Common entry point for reading file metadata. From here, we could
  * retrieve the header information from the header region of the file,
- * the xattr region of the file, or some other repostory that is
+ * the xattr region of the file, or some other repository that is
  * stored separately from the file itself. The current implementation
  * supports retrieving the metadata information from the file contents
  * and from the xattr region.
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 7000b96b783e..ca4e83750214 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -169,9 +169,22 @@ out:
 	return rc;
 }
 
+static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct file *lower_file = ecryptfs_file_to_lower(file);
+	/*
+	 * Don't allow mmap on top of file systems that don't support it
+	 * natively.  If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs
+	 * allows recursive mounting, this will need to be extended.
+	 */
+	if (!lower_file->f_op->mmap)
+		return -ENODEV;
+	return generic_file_mmap(file, vma);
+}
+
 /**
  * ecryptfs_open
- * @inode: inode speciying file to open
+ * @inode: inode specifying file to open
  * @file: Structure to return filled in
  *
  * Opens the file specified by inode.
@@ -240,7 +253,7 @@ out:
 
 /**
  * ecryptfs_dir_open
- * @inode: inode speciying file to open
+ * @inode: inode specifying file to open
  * @file: Structure to return filled in
  *
  * Opens the file specified by inode.
@@ -403,7 +416,7 @@ const struct file_operations ecryptfs_main_fops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = ecryptfs_compat_ioctl,
 #endif
-	.mmap = generic_file_mmap,
+	.mmap = ecryptfs_mmap,
 	.open = ecryptfs_open,
 	.flush = ecryptfs_flush,
 	.release = ecryptfs_release,
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index e818f5ac7a26..866bb18efefe 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/mount.h>
-#include <linux/file.h>
 #include "ecryptfs_kernel.h"
 
 struct ecryptfs_open_req {
@@ -148,7 +147,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
 	flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR;
 	(*lower_file) = dentry_open(&req.path, flags, cred);
 	if (!IS_ERR(*lower_file))
-		goto have_file;
+		goto out;
 	if ((flags & O_ACCMODE) == O_RDONLY) {
 		rc = PTR_ERR((*lower_file));
 		goto out;
@@ -166,16 +165,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
 	mutex_unlock(&ecryptfs_kthread_ctl.mux);
 	wake_up(&ecryptfs_kthread_ctl.wait);
 	wait_for_completion(&req.done);
-	if (IS_ERR(*lower_file)) {
+	if (IS_ERR(*lower_file))
 		rc = PTR_ERR(*lower_file);
-		goto out;
-	}
-have_file:
-	if ((*lower_file)->f_op->mmap == NULL) {
-		fput(*lower_file);
-		*lower_file = NULL;
-		rc = -EMEDIUMTYPE;
-	}
 out:
 	return rc;
 }
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1698132d0e57..612004495141 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -738,8 +738,7 @@ static void ecryptfs_free_kmem_caches(void)
 		struct ecryptfs_cache_info *info;
 
 		info = &ecryptfs_cache_infos[i];
-		if (*(info->cache))
-			kmem_cache_destroy(*(info->cache));
+		kmem_cache_destroy(*(info->cache));
 	}
 }
 
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 7bd8ac8dfb28..8bb72807e70d 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -878,7 +878,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
 			} else {
 				bio = master_dev->bio;
 				/* FIXME: bio_set_dir() */
-				bio->bi_rw |= REQ_WRITE;
+				bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 			}
 
 			osd_req_write(or, _ios_obj(ios, cur_comp),
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 9f9992b37924..4c40c0786e16 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1193,6 +1193,27 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi)
 	return 1;
 }
 
+/*
+ * Returns 1 if the passed-in block region is valid; 0 if some part overlaps
+ * with filesystem metadata blocksi.
+ */
+int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
+			  unsigned int count)
+{
+	if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+	    (start_blk + count < start_blk) ||
+	    (start_blk > le32_to_cpu(sbi->s_es->s_blocks_count)))
+		return 0;
+
+	/* Ensure we do not step over superblock */
+	if ((start_blk <= sbi->s_sb_block) &&
+	    (start_blk + count >= sbi->s_sb_block))
+		return 0;
+
+
+	return 1;
+}
+
 /*
  * ext2_new_blocks() -- core block(s) allocation function
  * @inode:		file inode
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 170939f379d7..3fb93681bf7f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -367,6 +367,7 @@ struct ext2_inode {
  */
 #define	EXT2_VALID_FS			0x0001	/* Unmounted cleanly */
 #define	EXT2_ERROR_FS			0x0002	/* Errors detected */
+#define	EFSCORRUPTED			EUCLEAN	/* Filesystem is corrupted */
 
 /*
  * Mount flags
@@ -739,6 +740,8 @@ extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group);
 extern ext2_fsblk_t ext2_new_block(struct inode *, unsigned long, int *);
 extern ext2_fsblk_t ext2_new_blocks(struct inode *, unsigned long,
 				unsigned long *, int *);
+extern int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
+				 unsigned int count);
 extern void ext2_free_blocks (struct inode *, unsigned long,
 			      unsigned long);
 extern unsigned long ext2_count_free_blocks (struct super_block *);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 868c02317b05..5efeefe17abb 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -51,7 +51,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 	down_read(&ei->dax_sem);
 
-	ret = __dax_fault(vma, vmf, ext2_get_block);
+	ret = dax_fault(vma, vmf, ext2_get_block);
 
 	up_read(&ei->dax_sem);
 	if (vmf->flags & FAULT_FLAG_WRITE)
@@ -72,7 +72,7 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 	}
 	down_read(&ei->dax_sem);
 
-	ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block);
+	ret = dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block);
 
 	up_read(&ei->dax_sem);
 	if (flags & FAULT_FLAG_WRITE)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fcbe58641e40..d5c7d09919f3 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1389,6 +1389,16 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 	ei->i_frag_size = raw_inode->i_fsize;
 	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
 	ei->i_dir_acl = 0;
+
+	if (ei->i_file_acl &&
+	    !ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) {
+		ext2_error(sb, "ext2_iget", "bad extended attribute block %u",
+			   ei->i_file_acl);
+		brelse(bh);
+		ret = -EFSCORRUPTED;
+		goto bad_inode;
+	}
+
 	if (S_ISREG(inode->i_mode))
 		inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
 	else
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 1a5e3bff0b63..b7f896f3f7a7 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -759,10 +759,19 @@ void
 ext2_xattr_delete_inode(struct inode *inode)
 {
 	struct buffer_head *bh = NULL;
+	struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb);
 
 	down_write(&EXT2_I(inode)->xattr_sem);
 	if (!EXT2_I(inode)->i_file_acl)
 		goto cleanup;
+
+	if (!ext2_data_block_valid(sbi, EXT2_I(inode)->i_file_acl, 0)) {
+		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
+			"inode %ld: xattr block %d is out of data blocks range",
+			inode->i_ino, EXT2_I(inode)->i_file_acl);
+		goto cleanup;
+	}
+
 	bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
 	if (!bh) {
 		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index b46e9fc64196..e38039fd96ff 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -99,17 +99,9 @@ config EXT4_FS_SECURITY
 	  extended attributes for file security labels, say N.
 
 config EXT4_ENCRYPTION
-	tristate "Ext4 Encryption"
+	bool "Ext4 Encryption"
 	depends on EXT4_FS
-	select CRYPTO_AES
-	select CRYPTO_CBC
-	select CRYPTO_ECB
-	select CRYPTO_XTS
-	select CRYPTO_CTS
-	select CRYPTO_CTR
-	select CRYPTO_SHA256
-	select KEYS
-	select ENCRYPTED_KEYS
+	select FS_ENCRYPTION
 	help
 	  Enable encryption of ext4 files and directories.  This
 	  feature is similar to ecryptfs, but it is more memory
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index f52cf54f0cbc..354103f3490c 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -12,5 +12,3 @@ ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
 
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
 ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
-ext4-$(CONFIG_EXT4_FS_ENCRYPTION)	+= crypto_policy.o crypto.o \
-		crypto_key.o crypto_fname.o
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 3020fd70c392..e04ec868e37e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb,
 	memset(bh->b_data, 0, sb->s_blocksize);
 
 	bit_max = ext4_num_base_meta_clusters(sb, block_group);
+	if ((bit_max >> 3) >= bh->b_size)
+		return -EFSCORRUPTED;
+
 	for (bit = 0; bit < bit_max; bit++)
 		ext4_set_bit(bit, bh->b_data);
 
@@ -470,7 +473,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 	trace_ext4_read_block_bitmap_load(sb, block_group);
 	bh->b_end_io = ext4_end_bitmap_read;
 	get_bh(bh);
-	submit_bh(READ | REQ_META | REQ_PRIO, bh);
+	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
 	return bh;
 verify:
 	err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
@@ -610,7 +613,9 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 
 	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
 
-	jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
+	smp_mb();
+	if (EXT4_SB(sb)->s_mb_free_pending)
+		jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
 	return 1;
 }
 
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
deleted file mode 100644
index 6a6c27373b54..000000000000
--- a/fs/ext4/crypto.c
+++ /dev/null
@@ -1,536 +0,0 @@
-/*
- * linux/fs/ext4/crypto.c
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains encryption functions for ext4
- *
- * Written by Michael Halcrow, 2014.
- *
- * Filename encryption additions
- *	Uday Savagaonkar, 2014
- * Encryption policy handling additions
- *	Ildar Muslukhov, 2014
- *
- * This has not yet undergone a rigorous security audit.
- *
- * The usage of AES-XTS should conform to recommendations in NIST
- * Special Publication 800-38E and IEEE P1619/D16.
- */
-
-#include <crypto/skcipher.h>
-#include <keys/user-type.h>
-#include <keys/encrypted-type.h>
-#include <linux/ecryptfs.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/key.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/spinlock_types.h>
-#include <linux/namei.h>
-
-#include "ext4_extents.h"
-#include "xattr.h"
-
-/* Encryption added and removed here! (L: */
-
-static unsigned int num_prealloc_crypto_pages = 32;
-static unsigned int num_prealloc_crypto_ctxs = 128;
-
-module_param(num_prealloc_crypto_pages, uint, 0444);
-MODULE_PARM_DESC(num_prealloc_crypto_pages,
-		 "Number of crypto pages to preallocate");
-module_param(num_prealloc_crypto_ctxs, uint, 0444);
-MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
-		 "Number of crypto contexts to preallocate");
-
-static mempool_t *ext4_bounce_page_pool;
-
-static LIST_HEAD(ext4_free_crypto_ctxs);
-static DEFINE_SPINLOCK(ext4_crypto_ctx_lock);
-
-static struct kmem_cache *ext4_crypto_ctx_cachep;
-struct kmem_cache *ext4_crypt_info_cachep;
-
-/**
- * ext4_release_crypto_ctx() - Releases an encryption context
- * @ctx: The encryption context to release.
- *
- * If the encryption context was allocated from the pre-allocated pool, returns
- * it to that pool. Else, frees it.
- *
- * If there's a bounce page in the context, this frees that.
- */
-void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
-{
-	unsigned long flags;
-
-	if (ctx->flags & EXT4_WRITE_PATH_FL && ctx->w.bounce_page)
-		mempool_free(ctx->w.bounce_page, ext4_bounce_page_pool);
-	ctx->w.bounce_page = NULL;
-	ctx->w.control_page = NULL;
-	if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) {
-		kmem_cache_free(ext4_crypto_ctx_cachep, ctx);
-	} else {
-		spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
-		list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
-		spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
-	}
-}
-
-/**
- * ext4_get_crypto_ctx() - Gets an encryption context
- * @inode:       The inode for which we are doing the crypto
- *
- * Allocates and initializes an encryption context.
- *
- * Return: An allocated and initialized encryption context on success; error
- * value or NULL otherwise.
- */
-struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
-					    gfp_t gfp_flags)
-{
-	struct ext4_crypto_ctx *ctx = NULL;
-	int res = 0;
-	unsigned long flags;
-	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
-
-	if (ci == NULL)
-		return ERR_PTR(-ENOKEY);
-
-	/*
-	 * We first try getting the ctx from a free list because in
-	 * the common case the ctx will have an allocated and
-	 * initialized crypto tfm, so it's probably a worthwhile
-	 * optimization. For the bounce page, we first try getting it
-	 * from the kernel allocator because that's just about as fast
-	 * as getting it from a list and because a cache of free pages
-	 * should generally be a "last resort" option for a filesystem
-	 * to be able to do its job.
-	 */
-	spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
-	ctx = list_first_entry_or_null(&ext4_free_crypto_ctxs,
-				       struct ext4_crypto_ctx, free_list);
-	if (ctx)
-		list_del(&ctx->free_list);
-	spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
-	if (!ctx) {
-		ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, gfp_flags);
-		if (!ctx) {
-			res = -ENOMEM;
-			goto out;
-		}
-		ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
-	} else {
-		ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
-	}
-	ctx->flags &= ~EXT4_WRITE_PATH_FL;
-
-out:
-	if (res) {
-		if (!IS_ERR_OR_NULL(ctx))
-			ext4_release_crypto_ctx(ctx);
-		ctx = ERR_PTR(res);
-	}
-	return ctx;
-}
-
-struct workqueue_struct *ext4_read_workqueue;
-static DEFINE_MUTEX(crypto_init);
-
-/**
- * ext4_exit_crypto() - Shutdown the ext4 encryption system
- */
-void ext4_exit_crypto(void)
-{
-	struct ext4_crypto_ctx *pos, *n;
-
-	list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list)
-		kmem_cache_free(ext4_crypto_ctx_cachep, pos);
-	INIT_LIST_HEAD(&ext4_free_crypto_ctxs);
-	if (ext4_bounce_page_pool)
-		mempool_destroy(ext4_bounce_page_pool);
-	ext4_bounce_page_pool = NULL;
-	if (ext4_read_workqueue)
-		destroy_workqueue(ext4_read_workqueue);
-	ext4_read_workqueue = NULL;
-	if (ext4_crypto_ctx_cachep)
-		kmem_cache_destroy(ext4_crypto_ctx_cachep);
-	ext4_crypto_ctx_cachep = NULL;
-	if (ext4_crypt_info_cachep)
-		kmem_cache_destroy(ext4_crypt_info_cachep);
-	ext4_crypt_info_cachep = NULL;
-}
-
-/**
- * ext4_init_crypto() - Set up for ext4 encryption.
- *
- * We only call this when we start accessing encrypted files, since it
- * results in memory getting allocated that wouldn't otherwise be used.
- *
- * Return: Zero on success, non-zero otherwise.
- */
-int ext4_init_crypto(void)
-{
-	int i, res = -ENOMEM;
-
-	mutex_lock(&crypto_init);
-	if (ext4_read_workqueue)
-		goto already_initialized;
-	ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0);
-	if (!ext4_read_workqueue)
-		goto fail;
-
-	ext4_crypto_ctx_cachep = KMEM_CACHE(ext4_crypto_ctx,
-					    SLAB_RECLAIM_ACCOUNT);
-	if (!ext4_crypto_ctx_cachep)
-		goto fail;
-
-	ext4_crypt_info_cachep = KMEM_CACHE(ext4_crypt_info,
-					    SLAB_RECLAIM_ACCOUNT);
-	if (!ext4_crypt_info_cachep)
-		goto fail;
-
-	for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
-		struct ext4_crypto_ctx *ctx;
-
-		ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
-		if (!ctx) {
-			res = -ENOMEM;
-			goto fail;
-		}
-		list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
-	}
-
-	ext4_bounce_page_pool =
-		mempool_create_page_pool(num_prealloc_crypto_pages, 0);
-	if (!ext4_bounce_page_pool) {
-		res = -ENOMEM;
-		goto fail;
-	}
-already_initialized:
-	mutex_unlock(&crypto_init);
-	return 0;
-fail:
-	ext4_exit_crypto();
-	mutex_unlock(&crypto_init);
-	return res;
-}
-
-void ext4_restore_control_page(struct page *data_page)
-{
-	struct ext4_crypto_ctx *ctx =
-		(struct ext4_crypto_ctx *)page_private(data_page);
-
-	set_page_private(data_page, (unsigned long)NULL);
-	ClearPagePrivate(data_page);
-	unlock_page(data_page);
-	ext4_release_crypto_ctx(ctx);
-}
-
-/**
- * ext4_crypt_complete() - The completion callback for page encryption
- * @req: The asynchronous encryption request context
- * @res: The result of the encryption operation
- */
-static void ext4_crypt_complete(struct crypto_async_request *req, int res)
-{
-	struct ext4_completion_result *ecr = req->data;
-
-	if (res == -EINPROGRESS)
-		return;
-	ecr->res = res;
-	complete(&ecr->completion);
-}
-
-typedef enum {
-	EXT4_DECRYPT = 0,
-	EXT4_ENCRYPT,
-} ext4_direction_t;
-
-static int ext4_page_crypto(struct inode *inode,
-			    ext4_direction_t rw,
-			    pgoff_t index,
-			    struct page *src_page,
-			    struct page *dest_page,
-			    gfp_t gfp_flags)
-
-{
-	u8 xts_tweak[EXT4_XTS_TWEAK_SIZE];
-	struct skcipher_request *req = NULL;
-	DECLARE_EXT4_COMPLETION_RESULT(ecr);
-	struct scatterlist dst, src;
-	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
-	struct crypto_skcipher *tfm = ci->ci_ctfm;
-	int res = 0;
-
-	req = skcipher_request_alloc(tfm, gfp_flags);
-	if (!req) {
-		printk_ratelimited(KERN_ERR
-				   "%s: crypto_request_alloc() failed\n",
-				   __func__);
-		return -ENOMEM;
-	}
-	skcipher_request_set_callback(
-		req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-		ext4_crypt_complete, &ecr);
-
-	BUILD_BUG_ON(EXT4_XTS_TWEAK_SIZE < sizeof(index));
-	memcpy(xts_tweak, &index, sizeof(index));
-	memset(&xts_tweak[sizeof(index)], 0,
-	       EXT4_XTS_TWEAK_SIZE - sizeof(index));
-
-	sg_init_table(&dst, 1);
-	sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
-	sg_init_table(&src, 1);
-	sg_set_page(&src, src_page, PAGE_SIZE, 0);
-	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
-				   xts_tweak);
-	if (rw == EXT4_DECRYPT)
-		res = crypto_skcipher_decrypt(req);
-	else
-		res = crypto_skcipher_encrypt(req);
-	if (res == -EINPROGRESS || res == -EBUSY) {
-		wait_for_completion(&ecr.completion);
-		res = ecr.res;
-	}
-	skcipher_request_free(req);
-	if (res) {
-		printk_ratelimited(
-			KERN_ERR
-			"%s: crypto_skcipher_encrypt() returned %d\n",
-			__func__, res);
-		return res;
-	}
-	return 0;
-}
-
-static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx,
-				      gfp_t gfp_flags)
-{
-	ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, gfp_flags);
-	if (ctx->w.bounce_page == NULL)
-		return ERR_PTR(-ENOMEM);
-	ctx->flags |= EXT4_WRITE_PATH_FL;
-	return ctx->w.bounce_page;
-}
-
-/**
- * ext4_encrypt() - Encrypts a page
- * @inode:          The inode for which the encryption should take place
- * @plaintext_page: The page to encrypt. Must be locked.
- *
- * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
- * encryption context.
- *
- * Called on the page write path.  The caller must call
- * ext4_restore_control_page() on the returned ciphertext page to
- * release the bounce buffer and the encryption context.
- *
- * Return: An allocated page with the encrypted content on success. Else, an
- * error value or NULL.
- */
-struct page *ext4_encrypt(struct inode *inode,
-			  struct page *plaintext_page,
-			  gfp_t gfp_flags)
-{
-	struct ext4_crypto_ctx *ctx;
-	struct page *ciphertext_page = NULL;
-	int err;
-
-	BUG_ON(!PageLocked(plaintext_page));
-
-	ctx = ext4_get_crypto_ctx(inode, gfp_flags);
-	if (IS_ERR(ctx))
-		return (struct page *) ctx;
-
-	/* The encryption operation will require a bounce page. */
-	ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
-	if (IS_ERR(ciphertext_page))
-		goto errout;
-	ctx->w.control_page = plaintext_page;
-	err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index,
-			       plaintext_page, ciphertext_page, gfp_flags);
-	if (err) {
-		ciphertext_page = ERR_PTR(err);
-	errout:
-		ext4_release_crypto_ctx(ctx);
-		return ciphertext_page;
-	}
-	SetPagePrivate(ciphertext_page);
-	set_page_private(ciphertext_page, (unsigned long)ctx);
-	lock_page(ciphertext_page);
-	return ciphertext_page;
-}
-
-/**
- * ext4_decrypt() - Decrypts a page in-place
- * @ctx:  The encryption context.
- * @page: The page to decrypt. Must be locked.
- *
- * Decrypts page in-place using the ctx encryption context.
- *
- * Called from the read completion callback.
- *
- * Return: Zero on success, non-zero otherwise.
- */
-int ext4_decrypt(struct page *page)
-{
-	BUG_ON(!PageLocked(page));
-
-	return ext4_page_crypto(page->mapping->host, EXT4_DECRYPT,
-				page->index, page, page, GFP_NOFS);
-}
-
-int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
-			   ext4_fsblk_t pblk, ext4_lblk_t len)
-{
-	struct ext4_crypto_ctx	*ctx;
-	struct page		*ciphertext_page = NULL;
-	struct bio		*bio;
-	int			ret, err = 0;
-
-#if 0
-	ext4_msg(inode->i_sb, KERN_CRIT,
-		 "ext4_encrypted_zeroout ino %lu lblk %u len %u",
-		 (unsigned long) inode->i_ino, lblk, len);
-#endif
-
-	BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
-
-	ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
-	if (IS_ERR(ciphertext_page)) {
-		err = PTR_ERR(ciphertext_page);
-		goto errout;
-	}
-
-	while (len--) {
-		err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk,
-				       ZERO_PAGE(0), ciphertext_page,
-				       GFP_NOFS);
-		if (err)
-			goto errout;
-
-		bio = bio_alloc(GFP_NOWAIT, 1);
-		if (!bio) {
-			err = -ENOMEM;
-			goto errout;
-		}
-		bio->bi_bdev = inode->i_sb->s_bdev;
-		bio->bi_iter.bi_sector =
-			pblk << (inode->i_sb->s_blocksize_bits - 9);
-		ret = bio_add_page(bio, ciphertext_page,
-				   inode->i_sb->s_blocksize, 0);
-		if (ret != inode->i_sb->s_blocksize) {
-			/* should never happen! */
-			ext4_msg(inode->i_sb, KERN_ERR,
-				 "bio_add_page failed: %d", ret);
-			WARN_ON(1);
-			bio_put(bio);
-			err = -EIO;
-			goto errout;
-		}
-		err = submit_bio_wait(WRITE, bio);
-		if ((err == 0) && bio->bi_error)
-			err = -EIO;
-		bio_put(bio);
-		if (err)
-			goto errout;
-		lblk++; pblk++;
-	}
-	err = 0;
-errout:
-	ext4_release_crypto_ctx(ctx);
-	return err;
-}
-
-bool ext4_valid_contents_enc_mode(uint32_t mode)
-{
-	return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS);
-}
-
-/**
- * ext4_validate_encryption_key_size() - Validate the encryption key size
- * @mode: The key mode.
- * @size: The key size to validate.
- *
- * Return: The validated key size for @mode. Zero if invalid.
- */
-uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
-{
-	if (size == ext4_encryption_key_size(mode))
-		return size;
-	return 0;
-}
-
-/*
- * Validate dentries for encrypted directories to make sure we aren't
- * potentially caching stale data after a key has been added or
- * removed.
- */
-static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
-{
-	struct dentry *dir;
-	struct ext4_crypt_info *ci;
-	int dir_has_key, cached_with_key;
-
-	if (flags & LOOKUP_RCU)
-		return -ECHILD;
-
-	dir = dget_parent(dentry);
-	if (!ext4_encrypted_inode(d_inode(dir))) {
-		dput(dir);
-		return 0;
-	}
-	ci = EXT4_I(d_inode(dir))->i_crypt_info;
-	if (ci && ci->ci_keyring_key &&
-	    (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					  (1 << KEY_FLAG_REVOKED) |
-					  (1 << KEY_FLAG_DEAD))))
-		ci = NULL;
-
-	/* this should eventually be an flag in d_flags */
-	cached_with_key = dentry->d_fsdata != NULL;
-	dir_has_key = (ci != NULL);
-	dput(dir);
-
-	/*
-	 * If the dentry was cached without the key, and it is a
-	 * negative dentry, it might be a valid name.  We can't check
-	 * if the key has since been made available due to locking
-	 * reasons, so we fail the validation so ext4_lookup() can do
-	 * this check.
-	 *
-	 * We also fail the validation if the dentry was created with
-	 * the key present, but we no longer have the key, or vice versa.
-	 */
-	if ((!cached_with_key && d_is_negative(dentry)) ||
-	    (!cached_with_key && dir_has_key) ||
-	    (cached_with_key && !dir_has_key)) {
-#if 0				/* Revalidation debug */
-		char buf[80];
-		char *cp = simple_dname(dentry, buf, sizeof(buf));
-
-		if (IS_ERR(cp))
-			cp = (char *) "???";
-		pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
-		       cached_with_key, d_is_negative(dentry),
-		       dir_has_key);
-#endif
-		return 0;
-	}
-	return 1;
-}
-
-const struct dentry_operations ext4_encrypted_d_ops = {
-	.d_revalidate = ext4_d_revalidate,
-};
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
deleted file mode 100644
index 1a2f360405db..000000000000
--- a/fs/ext4/crypto_fname.c
+++ /dev/null
@@ -1,468 +0,0 @@
-/*
- * linux/fs/ext4/crypto_fname.c
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains functions for filename crypto management in ext4
- *
- * Written by Uday Savagaonkar, 2014.
- *
- * This has not yet undergone a rigorous security audit.
- *
- */
-
-#include <crypto/skcipher.h>
-#include <keys/encrypted-type.h>
-#include <keys/user-type.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/key.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/spinlock_types.h>
-
-#include "ext4.h"
-#include "ext4_crypto.h"
-#include "xattr.h"
-
-/**
- * ext4_dir_crypt_complete() -
- */
-static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res)
-{
-	struct ext4_completion_result *ecr = req->data;
-
-	if (res == -EINPROGRESS)
-		return;
-	ecr->res = res;
-	complete(&ecr->completion);
-}
-
-bool ext4_valid_filenames_enc_mode(uint32_t mode)
-{
-	return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
-}
-
-static unsigned max_name_len(struct inode *inode)
-{
-	return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
-		EXT4_NAME_LEN;
-}
-
-/**
- * ext4_fname_encrypt() -
- *
- * This function encrypts the input filename, and returns the length of the
- * ciphertext. Errors are returned as negative numbers.  We trust the caller to
- * allocate sufficient memory to oname string.
- */
-static int ext4_fname_encrypt(struct inode *inode,
-			      const struct qstr *iname,
-			      struct ext4_str *oname)
-{
-	u32 ciphertext_len;
-	struct skcipher_request *req = NULL;
-	DECLARE_EXT4_COMPLETION_RESULT(ecr);
-	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
-	struct crypto_skcipher *tfm = ci->ci_ctfm;
-	int res = 0;
-	char iv[EXT4_CRYPTO_BLOCK_SIZE];
-	struct scatterlist src_sg, dst_sg;
-	int padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
-	char *workbuf, buf[32], *alloc_buf = NULL;
-	unsigned lim = max_name_len(inode);
-
-	if (iname->len <= 0 || iname->len > lim)
-		return -EIO;
-
-	ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
-		EXT4_CRYPTO_BLOCK_SIZE : iname->len;
-	ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
-	ciphertext_len = (ciphertext_len > lim)
-			? lim : ciphertext_len;
-
-	if (ciphertext_len <= sizeof(buf)) {
-		workbuf = buf;
-	} else {
-		alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
-		if (!alloc_buf)
-			return -ENOMEM;
-		workbuf = alloc_buf;
-	}
-
-	/* Allocate request */
-	req = skcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		printk_ratelimited(
-		    KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
-		kfree(alloc_buf);
-		return -ENOMEM;
-	}
-	skcipher_request_set_callback(req,
-		CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-		ext4_dir_crypt_complete, &ecr);
-
-	/* Copy the input */
-	memcpy(workbuf, iname->name, iname->len);
-	if (iname->len < ciphertext_len)
-		memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
-
-	/* Initialize IV */
-	memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
-
-	/* Create encryption request */
-	sg_init_one(&src_sg, workbuf, ciphertext_len);
-	sg_init_one(&dst_sg, oname->name, ciphertext_len);
-	skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
-	res = crypto_skcipher_encrypt(req);
-	if (res == -EINPROGRESS || res == -EBUSY) {
-		wait_for_completion(&ecr.completion);
-		res = ecr.res;
-	}
-	kfree(alloc_buf);
-	skcipher_request_free(req);
-	if (res < 0) {
-		printk_ratelimited(
-		    KERN_ERR "%s: Error (error code %d)\n", __func__, res);
-	}
-	oname->len = ciphertext_len;
-	return res;
-}
-
-/*
- * ext4_fname_decrypt()
- *	This function decrypts the input filename, and returns
- *	the length of the plaintext.
- *	Errors are returned as negative numbers.
- *	We trust the caller to allocate sufficient memory to oname string.
- */
-static int ext4_fname_decrypt(struct inode *inode,
-			      const struct ext4_str *iname,
-			      struct ext4_str *oname)
-{
-	struct ext4_str tmp_in[2], tmp_out[1];
-	struct skcipher_request *req = NULL;
-	DECLARE_EXT4_COMPLETION_RESULT(ecr);
-	struct scatterlist src_sg, dst_sg;
-	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
-	struct crypto_skcipher *tfm = ci->ci_ctfm;
-	int res = 0;
-	char iv[EXT4_CRYPTO_BLOCK_SIZE];
-	unsigned lim = max_name_len(inode);
-
-	if (iname->len <= 0 || iname->len > lim)
-		return -EIO;
-
-	tmp_in[0].name = iname->name;
-	tmp_in[0].len = iname->len;
-	tmp_out[0].name = oname->name;
-
-	/* Allocate request */
-	req = skcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		printk_ratelimited(
-		    KERN_ERR "%s: crypto_request_alloc() failed\n",  __func__);
-		return -ENOMEM;
-	}
-	skcipher_request_set_callback(req,
-		CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-		ext4_dir_crypt_complete, &ecr);
-
-	/* Initialize IV */
-	memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
-
-	/* Create encryption request */
-	sg_init_one(&src_sg, iname->name, iname->len);
-	sg_init_one(&dst_sg, oname->name, oname->len);
-	skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
-	res = crypto_skcipher_decrypt(req);
-	if (res == -EINPROGRESS || res == -EBUSY) {
-		wait_for_completion(&ecr.completion);
-		res = ecr.res;
-	}
-	skcipher_request_free(req);
-	if (res < 0) {
-		printk_ratelimited(
-		    KERN_ERR "%s: Error in ext4_fname_encrypt (error code %d)\n",
-		    __func__, res);
-		return res;
-	}
-
-	oname->len = strnlen(oname->name, iname->len);
-	return oname->len;
-}
-
-static const char *lookup_table =
-	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
-
-/**
- * ext4_fname_encode_digest() -
- *
- * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
- * The encoded string is roughly 4/3 times the size of the input string.
- */
-static int digest_encode(const char *src, int len, char *dst)
-{
-	int i = 0, bits = 0, ac = 0;
-	char *cp = dst;
-
-	while (i < len) {
-		ac += (((unsigned char) src[i]) << bits);
-		bits += 8;
-		do {
-			*cp++ = lookup_table[ac & 0x3f];
-			ac >>= 6;
-			bits -= 6;
-		} while (bits >= 6);
-		i++;
-	}
-	if (bits)
-		*cp++ = lookup_table[ac & 0x3f];
-	return cp - dst;
-}
-
-static int digest_decode(const char *src, int len, char *dst)
-{
-	int i = 0, bits = 0, ac = 0;
-	const char *p;
-	char *cp = dst;
-
-	while (i < len) {
-		p = strchr(lookup_table, src[i]);
-		if (p == NULL || src[i] == 0)
-			return -2;
-		ac += (p - lookup_table) << bits;
-		bits += 6;
-		if (bits >= 8) {
-			*cp++ = ac & 0xff;
-			ac >>= 8;
-			bits -= 8;
-		}
-		i++;
-	}
-	if (ac)
-		return -1;
-	return cp - dst;
-}
-
-/**
- * ext4_fname_crypto_round_up() -
- *
- * Return: The next multiple of block size
- */
-u32 ext4_fname_crypto_round_up(u32 size, u32 blksize)
-{
-	return ((size+blksize-1)/blksize)*blksize;
-}
-
-unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen)
-{
-	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
-	int padding = 32;
-
-	if (ci)
-		padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
-	if (ilen < EXT4_CRYPTO_BLOCK_SIZE)
-		ilen = EXT4_CRYPTO_BLOCK_SIZE;
-	return ext4_fname_crypto_round_up(ilen, padding);
-}
-
-/*
- * ext4_fname_crypto_alloc_buffer() -
- *
- * Allocates an output buffer that is sufficient for the crypto operation
- * specified by the context and the direction.
- */
-int ext4_fname_crypto_alloc_buffer(struct inode *inode,
-				   u32 ilen, struct ext4_str *crypto_str)
-{
-	unsigned int olen = ext4_fname_encrypted_size(inode, ilen);
-
-	crypto_str->len = olen;
-	if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
-		olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
-	/* Allocated buffer can hold one more character to null-terminate the
-	 * string */
-	crypto_str->name = kmalloc(olen+1, GFP_NOFS);
-	if (!(crypto_str->name))
-		return -ENOMEM;
-	return 0;
-}
-
-/**
- * ext4_fname_crypto_free_buffer() -
- *
- * Frees the buffer allocated for crypto operation.
- */
-void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
-{
-	if (!crypto_str)
-		return;
-	kfree(crypto_str->name);
-	crypto_str->name = NULL;
-}
-
-/**
- * ext4_fname_disk_to_usr() - converts a filename from disk space to user space
- */
-int _ext4_fname_disk_to_usr(struct inode *inode,
-			    struct dx_hash_info *hinfo,
-			    const struct ext4_str *iname,
-			    struct ext4_str *oname)
-{
-	char buf[24];
-	int ret;
-
-	if (iname->len < 3) {
-		/*Check for . and .. */
-		if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') {
-			oname->name[0] = '.';
-			oname->name[iname->len-1] = '.';
-			oname->len = iname->len;
-			return oname->len;
-		}
-	}
-	if (iname->len < EXT4_CRYPTO_BLOCK_SIZE) {
-		EXT4_ERROR_INODE(inode, "encrypted inode too small");
-		return -EUCLEAN;
-	}
-	if (EXT4_I(inode)->i_crypt_info)
-		return ext4_fname_decrypt(inode, iname, oname);
-
-	if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
-		ret = digest_encode(iname->name, iname->len, oname->name);
-		oname->len = ret;
-		return ret;
-	}
-	if (hinfo) {
-		memcpy(buf, &hinfo->hash, 4);
-		memcpy(buf+4, &hinfo->minor_hash, 4);
-	} else
-		memset(buf, 0, 8);
-	memcpy(buf + 8, iname->name + iname->len - 16, 16);
-	oname->name[0] = '_';
-	ret = digest_encode(buf, 24, oname->name+1);
-	oname->len = ret + 1;
-	return ret + 1;
-}
-
-int ext4_fname_disk_to_usr(struct inode *inode,
-			   struct dx_hash_info *hinfo,
-			   const struct ext4_dir_entry_2 *de,
-			   struct ext4_str *oname)
-{
-	struct ext4_str iname = {.name = (unsigned char *) de->name,
-				 .len = de->name_len };
-
-	return _ext4_fname_disk_to_usr(inode, hinfo, &iname, oname);
-}
-
-
-/**
- * ext4_fname_usr_to_disk() - converts a filename from user space to disk space
- */
-int ext4_fname_usr_to_disk(struct inode *inode,
-			   const struct qstr *iname,
-			   struct ext4_str *oname)
-{
-	int res;
-	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
-
-	if (iname->len < 3) {
-		/*Check for . and .. */
-		if (iname->name[0] == '.' &&
-				iname->name[iname->len-1] == '.') {
-			oname->name[0] = '.';
-			oname->name[iname->len-1] = '.';
-			oname->len = iname->len;
-			return oname->len;
-		}
-	}
-	if (ci) {
-		res = ext4_fname_encrypt(inode, iname, oname);
-		return res;
-	}
-	/* Without a proper key, a user is not allowed to modify the filenames
-	 * in a directory. Consequently, a user space name cannot be mapped to
-	 * a disk-space name */
-	return -EACCES;
-}
-
-int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
-			      int lookup, struct ext4_filename *fname)
-{
-	struct ext4_crypt_info *ci;
-	int ret = 0, bigname = 0;
-
-	memset(fname, 0, sizeof(struct ext4_filename));
-	fname->usr_fname = iname;
-
-	if (!ext4_encrypted_inode(dir) ||
-	    ((iname->name[0] == '.') &&
-	     ((iname->len == 1) ||
-	      ((iname->name[1] == '.') && (iname->len == 2))))) {
-		fname->disk_name.name = (unsigned char *) iname->name;
-		fname->disk_name.len = iname->len;
-		return 0;
-	}
-	ret = ext4_get_encryption_info(dir);
-	if (ret)
-		return ret;
-	ci = EXT4_I(dir)->i_crypt_info;
-	if (ci) {
-		ret = ext4_fname_crypto_alloc_buffer(dir, iname->len,
-						     &fname->crypto_buf);
-		if (ret < 0)
-			return ret;
-		ret = ext4_fname_encrypt(dir, iname, &fname->crypto_buf);
-		if (ret < 0)
-			goto errout;
-		fname->disk_name.name = fname->crypto_buf.name;
-		fname->disk_name.len = fname->crypto_buf.len;
-		return 0;
-	}
-	if (!lookup)
-		return -EACCES;
-
-	/* We don't have the key and we are doing a lookup; decode the
-	 * user-supplied name
-	 */
-	if (iname->name[0] == '_')
-		bigname = 1;
-	if ((bigname && (iname->len != 33)) ||
-	    (!bigname && (iname->len > 43)))
-		return -ENOENT;
-
-	fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
-	if (fname->crypto_buf.name == NULL)
-		return -ENOMEM;
-	ret = digest_decode(iname->name + bigname, iname->len - bigname,
-			    fname->crypto_buf.name);
-	if (ret < 0) {
-		ret = -ENOENT;
-		goto errout;
-	}
-	fname->crypto_buf.len = ret;
-	if (bigname) {
-		memcpy(&fname->hinfo.hash, fname->crypto_buf.name, 4);
-		memcpy(&fname->hinfo.minor_hash, fname->crypto_buf.name + 4, 4);
-	} else {
-		fname->disk_name.name = fname->crypto_buf.name;
-		fname->disk_name.len = fname->crypto_buf.len;
-	}
-	return 0;
-errout:
-	kfree(fname->crypto_buf.name);
-	fname->crypto_buf.name = NULL;
-	return ret;
-}
-
-void ext4_fname_free_filename(struct ext4_filename *fname)
-{
-	kfree(fname->crypto_buf.name);
-	fname->crypto_buf.name = NULL;
-	fname->usr_fname = NULL;
-	fname->disk_name.name = NULL;
-}
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
deleted file mode 100644
index 0129d688d1f7..000000000000
--- a/fs/ext4/crypto_key.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * linux/fs/ext4/crypto_key.c
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains encryption key functions for ext4
- *
- * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
- */
-
-#include <crypto/skcipher.h>
-#include <keys/encrypted-type.h>
-#include <keys/user-type.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <uapi/linux/keyctl.h>
-
-#include "ext4.h"
-#include "xattr.h"
-
-static void derive_crypt_complete(struct crypto_async_request *req, int rc)
-{
-	struct ext4_completion_result *ecr = req->data;
-
-	if (rc == -EINPROGRESS)
-		return;
-
-	ecr->res = rc;
-	complete(&ecr->completion);
-}
-
-/**
- * ext4_derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivation.
- * @source_key:   Source key to which to apply derivation.
- * @derived_key:  Derived key.
- *
- * Return: Zero on success; non-zero otherwise.
- */
-static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
-			       char source_key[EXT4_AES_256_XTS_KEY_SIZE],
-			       char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
-{
-	int res = 0;
-	struct skcipher_request *req = NULL;
-	DECLARE_EXT4_COMPLETION_RESULT(ecr);
-	struct scatterlist src_sg, dst_sg;
-	struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
-
-	if (IS_ERR(tfm)) {
-		res = PTR_ERR(tfm);
-		tfm = NULL;
-		goto out;
-	}
-	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
-	req = skcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		res = -ENOMEM;
-		goto out;
-	}
-	skcipher_request_set_callback(req,
-			CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-			derive_crypt_complete, &ecr);
-	res = crypto_skcipher_setkey(tfm, deriving_key,
-				     EXT4_AES_128_ECB_KEY_SIZE);
-	if (res < 0)
-		goto out;
-	sg_init_one(&src_sg, source_key, EXT4_AES_256_XTS_KEY_SIZE);
-	sg_init_one(&dst_sg, derived_key, EXT4_AES_256_XTS_KEY_SIZE);
-	skcipher_request_set_crypt(req, &src_sg, &dst_sg,
-				   EXT4_AES_256_XTS_KEY_SIZE, NULL);
-	res = crypto_skcipher_encrypt(req);
-	if (res == -EINPROGRESS || res == -EBUSY) {
-		wait_for_completion(&ecr.completion);
-		res = ecr.res;
-	}
-
-out:
-	skcipher_request_free(req);
-	crypto_free_skcipher(tfm);
-	return res;
-}
-
-void ext4_free_crypt_info(struct ext4_crypt_info *ci)
-{
-	if (!ci)
-		return;
-
-	if (ci->ci_keyring_key)
-		key_put(ci->ci_keyring_key);
-	crypto_free_skcipher(ci->ci_ctfm);
-	kmem_cache_free(ext4_crypt_info_cachep, ci);
-}
-
-void ext4_free_encryption_info(struct inode *inode,
-			       struct ext4_crypt_info *ci)
-{
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	struct ext4_crypt_info *prev;
-
-	if (ci == NULL)
-		ci = ACCESS_ONCE(ei->i_crypt_info);
-	if (ci == NULL)
-		return;
-	prev = cmpxchg(&ei->i_crypt_info, ci, NULL);
-	if (prev != ci)
-		return;
-
-	ext4_free_crypt_info(ci);
-}
-
-int _ext4_get_encryption_info(struct inode *inode)
-{
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	struct ext4_crypt_info *crypt_info;
-	char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
-				 (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1];
-	struct key *keyring_key = NULL;
-	struct ext4_encryption_key *master_key;
-	struct ext4_encryption_context ctx;
-	const struct user_key_payload *ukp;
-	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-	struct crypto_skcipher *ctfm;
-	const char *cipher_str;
-	char raw_key[EXT4_MAX_KEY_SIZE];
-	char mode;
-	int res;
-
-	if (!ext4_read_workqueue) {
-		res = ext4_init_crypto();
-		if (res)
-			return res;
-	}
-
-retry:
-	crypt_info = ACCESS_ONCE(ei->i_crypt_info);
-	if (crypt_info) {
-		if (!crypt_info->ci_keyring_key ||
-		    key_validate(crypt_info->ci_keyring_key) == 0)
-			return 0;
-		ext4_free_encryption_info(inode, crypt_info);
-		goto retry;
-	}
-
-	res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
-				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-				 &ctx, sizeof(ctx));
-	if (res < 0) {
-		if (!DUMMY_ENCRYPTION_ENABLED(sbi))
-			return res;
-		ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
-		ctx.filenames_encryption_mode =
-			EXT4_ENCRYPTION_MODE_AES_256_CTS;
-		ctx.flags = 0;
-	} else if (res != sizeof(ctx))
-		return -EINVAL;
-	res = 0;
-
-	crypt_info = kmem_cache_alloc(ext4_crypt_info_cachep, GFP_KERNEL);
-	if (!crypt_info)
-		return -ENOMEM;
-
-	crypt_info->ci_flags = ctx.flags;
-	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
-	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
-	crypt_info->ci_ctfm = NULL;
-	crypt_info->ci_keyring_key = NULL;
-	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
-	       sizeof(crypt_info->ci_master_key));
-	if (S_ISREG(inode->i_mode))
-		mode = crypt_info->ci_data_mode;
-	else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-		mode = crypt_info->ci_filename_mode;
-	else
-		BUG();
-	switch (mode) {
-	case EXT4_ENCRYPTION_MODE_AES_256_XTS:
-		cipher_str = "xts(aes)";
-		break;
-	case EXT4_ENCRYPTION_MODE_AES_256_CTS:
-		cipher_str = "cts(cbc(aes))";
-		break;
-	default:
-		printk_once(KERN_WARNING
-			    "ext4: unsupported key mode %d (ino %u)\n",
-			    mode, (unsigned) inode->i_ino);
-		res = -ENOKEY;
-		goto out;
-	}
-	if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
-		memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
-		goto got_key;
-	}
-	memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
-	       EXT4_KEY_DESC_PREFIX_SIZE);
-	sprintf(full_key_descriptor + EXT4_KEY_DESC_PREFIX_SIZE,
-		"%*phN", EXT4_KEY_DESCRIPTOR_SIZE,
-		ctx.master_key_descriptor);
-	full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
-			    (2 * EXT4_KEY_DESCRIPTOR_SIZE)] = '\0';
-	keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
-	if (IS_ERR(keyring_key)) {
-		res = PTR_ERR(keyring_key);
-		keyring_key = NULL;
-		goto out;
-	}
-	crypt_info->ci_keyring_key = keyring_key;
-	if (keyring_key->type != &key_type_logon) {
-		printk_once(KERN_WARNING
-			    "ext4: key type must be logon\n");
-		res = -ENOKEY;
-		goto out;
-	}
-	down_read(&keyring_key->sem);
-	ukp = user_key_payload(keyring_key);
-	if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
-		res = -EINVAL;
-		up_read(&keyring_key->sem);
-		goto out;
-	}
-	master_key = (struct ext4_encryption_key *)ukp->data;
-	BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE !=
-		     EXT4_KEY_DERIVATION_NONCE_SIZE);
-	if (master_key->size != EXT4_AES_256_XTS_KEY_SIZE) {
-		printk_once(KERN_WARNING
-			    "ext4: key size incorrect: %d\n",
-			    master_key->size);
-		res = -ENOKEY;
-		up_read(&keyring_key->sem);
-		goto out;
-	}
-	res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
-				  raw_key);
-	up_read(&keyring_key->sem);
-	if (res)
-		goto out;
-got_key:
-	ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
-	if (!ctfm || IS_ERR(ctfm)) {
-		res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
-		printk(KERN_DEBUG
-		       "%s: error %d (inode %u) allocating crypto tfm\n",
-		       __func__, res, (unsigned) inode->i_ino);
-		goto out;
-	}
-	crypt_info->ci_ctfm = ctfm;
-	crypto_skcipher_clear_flags(ctfm, ~0);
-	crypto_tfm_set_flags(crypto_skcipher_tfm(ctfm),
-			     CRYPTO_TFM_REQ_WEAK_KEY);
-	res = crypto_skcipher_setkey(ctfm, raw_key,
-				     ext4_encryption_key_size(mode));
-	if (res)
-		goto out;
-	memzero_explicit(raw_key, sizeof(raw_key));
-	if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
-		ext4_free_crypt_info(crypt_info);
-		goto retry;
-	}
-	return 0;
-
-out:
-	if (res == -ENOKEY)
-		res = 0;
-	ext4_free_crypt_info(crypt_info);
-	memzero_explicit(raw_key, sizeof(raw_key));
-	return res;
-}
-
-int ext4_has_encryption_key(struct inode *inode)
-{
-	struct ext4_inode_info *ei = EXT4_I(inode);
-
-	return (ei->i_crypt_info != NULL);
-}
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
deleted file mode 100644
index ad050698143f..000000000000
--- a/fs/ext4/crypto_policy.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * linux/fs/ext4/crypto_policy.c
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains encryption policy functions for ext4
- *
- * Written by Michael Halcrow, 2015.
- */
-
-#include <linux/random.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include "ext4_jbd2.h"
-#include "ext4.h"
-#include "xattr.h"
-
-static int ext4_inode_has_encryption_context(struct inode *inode)
-{
-	int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
-				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0);
-	return (res > 0);
-}
-
-/*
- * check whether the policy is consistent with the encryption context
- * for the inode
- */
-static int ext4_is_encryption_context_consistent_with_policy(
-	struct inode *inode, const struct ext4_encryption_policy *policy)
-{
-	struct ext4_encryption_context ctx;
-	int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
-				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
-				 sizeof(ctx));
-	if (res != sizeof(ctx))
-		return 0;
-	return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
-			EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
-		(ctx.flags ==
-		 policy->flags) &&
-		(ctx.contents_encryption_mode ==
-		 policy->contents_encryption_mode) &&
-		(ctx.filenames_encryption_mode ==
-		 policy->filenames_encryption_mode));
-}
-
-static int ext4_create_encryption_context_from_policy(
-	struct inode *inode, const struct ext4_encryption_policy *policy)
-{
-	struct ext4_encryption_context ctx;
-	handle_t *handle;
-	int res, res2;
-
-	res = ext4_convert_inline_data(inode);
-	if (res)
-		return res;
-
-	ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
-	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
-	       EXT4_KEY_DESCRIPTOR_SIZE);
-	if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) {
-		printk(KERN_WARNING
-		       "%s: Invalid contents encryption mode %d\n", __func__,
-			policy->contents_encryption_mode);
-		return -EINVAL;
-	}
-	if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
-		printk(KERN_WARNING
-		       "%s: Invalid filenames encryption mode %d\n", __func__,
-			policy->filenames_encryption_mode);
-		return -EINVAL;
-	}
-	if (policy->flags & ~EXT4_POLICY_FLAGS_VALID)
-		return -EINVAL;
-	ctx.contents_encryption_mode = policy->contents_encryption_mode;
-	ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
-	ctx.flags = policy->flags;
-	BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
-	get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
-
-	handle = ext4_journal_start(inode, EXT4_HT_MISC,
-				    ext4_jbd2_credits_xattr(inode));
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
-	res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
-			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
-			     sizeof(ctx), 0);
-	if (!res) {
-		ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
-		res = ext4_mark_inode_dirty(handle, inode);
-		if (res)
-			EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
-	}
-	res2 = ext4_journal_stop(handle);
-	if (!res)
-		res = res2;
-	return res;
-}
-
-int ext4_process_policy(const struct ext4_encryption_policy *policy,
-			struct inode *inode)
-{
-	if (policy->version != 0)
-		return -EINVAL;
-
-	if (!ext4_inode_has_encryption_context(inode)) {
-		if (!S_ISDIR(inode->i_mode))
-			return -EINVAL;
-		if (!ext4_empty_dir(inode))
-			return -ENOTEMPTY;
-		return ext4_create_encryption_context_from_policy(inode,
-								  policy);
-	}
-
-	if (ext4_is_encryption_context_consistent_with_policy(inode, policy))
-		return 0;
-
-	printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
-	       __func__);
-	return -EINVAL;
-}
-
-int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
-{
-	struct ext4_encryption_context ctx;
-
-	int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
-				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
-				 &ctx, sizeof(ctx));
-	if (res != sizeof(ctx))
-		return -ENOENT;
-	if (ctx.format != EXT4_ENCRYPTION_CONTEXT_FORMAT_V1)
-		return -EINVAL;
-	policy->version = 0;
-	policy->contents_encryption_mode = ctx.contents_encryption_mode;
-	policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
-	policy->flags = ctx.flags;
-	memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
-	       EXT4_KEY_DESCRIPTOR_SIZE);
-	return 0;
-}
-
-int ext4_is_child_context_consistent_with_parent(struct inode *parent,
-						 struct inode *child)
-{
-	struct ext4_crypt_info *parent_ci, *child_ci;
-	int res;
-
-	if ((parent == NULL) || (child == NULL)) {
-		pr_err("parent %p child %p\n", parent, child);
-		WARN_ON(1);	/* Should never happen */
-		return 0;
-	}
-	/* no restrictions if the parent directory is not encrypted */
-	if (!ext4_encrypted_inode(parent))
-		return 1;
-	/* if the child directory is not encrypted, this is always a problem */
-	if (!ext4_encrypted_inode(child))
-		return 0;
-	res = ext4_get_encryption_info(parent);
-	if (res)
-		return 0;
-	res = ext4_get_encryption_info(child);
-	if (res)
-		return 0;
-	parent_ci = EXT4_I(parent)->i_crypt_info;
-	child_ci = EXT4_I(child)->i_crypt_info;
-	if (!parent_ci && !child_ci)
-		return 1;
-	if (!parent_ci || !child_ci)
-		return 0;
-
-	return (memcmp(parent_ci->ci_master_key,
-		       child_ci->ci_master_key,
-		       EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
-		(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
-		(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
-		(parent_ci->ci_flags == child_ci->ci_flags));
-}
-
-/**
- * ext4_inherit_context() - Sets a child context from its parent
- * @parent: Parent inode from which the context is inherited.
- * @child:  Child inode that inherits the context from @parent.
- *
- * Return: Zero on success, non-zero otherwise
- */
-int ext4_inherit_context(struct inode *parent, struct inode *child)
-{
-	struct ext4_encryption_context ctx;
-	struct ext4_crypt_info *ci;
-	int res;
-
-	res = ext4_get_encryption_info(parent);
-	if (res < 0)
-		return res;
-	ci = EXT4_I(parent)->i_crypt_info;
-	if (ci == NULL)
-		return -ENOKEY;
-
-	ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
-	if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
-		ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
-		ctx.filenames_encryption_mode =
-			EXT4_ENCRYPTION_MODE_AES_256_CTS;
-		ctx.flags = 0;
-		memset(ctx.master_key_descriptor, 0x42,
-		       EXT4_KEY_DESCRIPTOR_SIZE);
-		res = 0;
-	} else {
-		ctx.contents_encryption_mode = ci->ci_data_mode;
-		ctx.filenames_encryption_mode = ci->ci_filename_mode;
-		ctx.flags = ci->ci_flags;
-		memcpy(ctx.master_key_descriptor, ci->ci_master_key,
-		       EXT4_KEY_DESCRIPTOR_SIZE);
-	}
-	get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
-	res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
-			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
-			     sizeof(ctx), 0);
-	if (!res) {
-		ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
-		ext4_clear_inode_state(child, EXT4_STATE_MAY_INLINE_DATA);
-		res = ext4_get_encryption_info(child);
-	}
-	return res;
-}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 68323e3da3fa..67415e0e6af0 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -109,10 +109,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *bh = NULL;
 	int dir_has_error = 0;
-	struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
+	struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
 
 	if (ext4_encrypted_inode(inode)) {
-		err = ext4_get_encryption_info(inode);
+		err = fscrypt_get_encryption_info(inode);
 		if (err && err != -ENOKEY)
 			return err;
 	}
@@ -139,8 +139,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 	}
 
 	if (ext4_encrypted_inode(inode)) {
-		err = ext4_fname_crypto_alloc_buffer(inode, EXT4_NAME_LEN,
-						     &fname_crypto_str);
+		err = fscrypt_fname_alloc_buffer(inode, EXT4_NAME_LEN, &fstr);
 		if (err < 0)
 			return err;
 	}
@@ -253,16 +252,19 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 					    get_dtype(sb, de->file_type)))
 						goto done;
 				} else {
-					int save_len = fname_crypto_str.len;
+					int save_len = fstr.len;
+					struct fscrypt_str de_name =
+							FSTR_INIT(de->name,
+								de->name_len);
 
 					/* Directory is encrypted */
-					err = ext4_fname_disk_to_usr(inode,
-						NULL, de, &fname_crypto_str);
-					fname_crypto_str.len = save_len;
+					err = fscrypt_fname_disk_to_usr(inode,
+						0, 0, &de_name, &fstr);
+					fstr.len = save_len;
 					if (err < 0)
 						goto errout;
 					if (!dir_emit(ctx,
-					    fname_crypto_str.name, err,
+					    fstr.name, err,
 					    le32_to_cpu(de->inode),
 					    get_dtype(sb, de->file_type)))
 						goto done;
@@ -281,7 +283,7 @@ done:
 	err = 0;
 errout:
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-	ext4_fname_crypto_free_buffer(&fname_crypto_str);
+	fscrypt_fname_free_buffer(&fstr);
 #endif
 	brelse(bh);
 	return err;
@@ -432,7 +434,7 @@ void ext4_htree_free_dir_info(struct dir_private_info *p)
 int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 			     __u32 minor_hash,
 			    struct ext4_dir_entry_2 *dirent,
-			    struct ext4_str *ent_name)
+			    struct fscrypt_str *ent_name)
 {
 	struct rb_node **p, *parent = NULL;
 	struct fname *fname, *new_fn;
@@ -609,7 +611,7 @@ finished:
 static int ext4_dir_open(struct inode * inode, struct file * filp)
 {
 	if (ext4_encrypted_inode(inode))
-		return ext4_get_encryption_info(inode) ? -EACCES : 0;
+		return fscrypt_get_encryption_info(inode) ? -EACCES : 0;
 	return 0;
 }
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b84aa1ca480a..ea31931386ec 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -32,6 +32,7 @@
 #include <linux/percpu_counter.h>
 #include <linux/ratelimit.h>
 #include <crypto/hash.h>
+#include <linux/fscrypto.h>
 #include <linux/falloc.h>
 #include <linux/percpu-rwsem.h>
 #ifdef __KERNEL__
@@ -608,15 +609,6 @@ enum {
 #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER	0x0010
 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER	0x0020
 
-/* Encryption algorithms */
-#define EXT4_ENCRYPTION_MODE_INVALID		0
-#define EXT4_ENCRYPTION_MODE_AES_256_XTS	1
-#define EXT4_ENCRYPTION_MODE_AES_256_GCM	2
-#define EXT4_ENCRYPTION_MODE_AES_256_CBC	3
-#define EXT4_ENCRYPTION_MODE_AES_256_CTS	4
-
-#include "ext4_crypto.h"
-
 /*
  * ioctl commands
  */
@@ -638,9 +630,9 @@ enum {
 #define EXT4_IOC_RESIZE_FS		_IOW('f', 16, __u64)
 #define EXT4_IOC_SWAP_BOOT		_IO('f', 17)
 #define EXT4_IOC_PRECACHE_EXTENTS	_IO('f', 18)
-#define EXT4_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct ext4_encryption_policy)
-#define EXT4_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
-#define EXT4_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct ext4_encryption_policy)
+#define EXT4_IOC_SET_ENCRYPTION_POLICY	FS_IOC_SET_ENCRYPTION_POLICY
+#define EXT4_IOC_GET_ENCRYPTION_PWSALT	FS_IOC_GET_ENCRYPTION_PWSALT
+#define EXT4_IOC_GET_ENCRYPTION_POLICY	FS_IOC_GET_ENCRYPTION_POLICY
 
 #ifndef FS_IOC_FSGETXATTR
 /* Until the uapi changes get merged for project quota... */
@@ -1082,10 +1074,6 @@ struct ext4_inode_info {
 	/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
 	__u32 i_csum_seed;
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	/* Encryption params */
-	struct ext4_crypt_info *i_crypt_info;
-#endif
 	kprojid_t i_projid;
 };
 
@@ -1344,6 +1332,11 @@ struct ext4_super_block {
 /* Number of quota types we support */
 #define EXT4_MAXQUOTAS 3
 
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#define EXT4_KEY_DESC_PREFIX "ext4:"
+#define EXT4_KEY_DESC_PREFIX_SIZE 5
+#endif
+
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1430,6 +1423,7 @@ struct ext4_sb_info {
 	unsigned short *s_mb_offsets;
 	unsigned int *s_mb_maxs;
 	unsigned int s_group_info_size;
+	unsigned int s_mb_free_pending;
 
 	/* tunables */
 	unsigned long s_stripe;
@@ -1512,6 +1506,12 @@ struct ext4_sb_info {
 
 	/* Barrier between changing inodes' journal flags and writepages ops. */
 	struct percpu_rw_semaphore s_journal_flag_rwsem;
+
+	/* Encryption support */
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+	u8 key_prefix[EXT4_KEY_DESC_PREFIX_SIZE];
+	u8 key_prefix_size;
+#endif
 };
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1610,15 +1610,6 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
 /*
  * Returns true if the inode is inode is encrypted
  */
-static inline int ext4_encrypted_inode(struct inode *inode)
-{
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT);
-#else
-	return 0;
-#endif
-}
-
 #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
 
 /*
@@ -2082,10 +2073,10 @@ struct dx_hash_info
 
 struct ext4_filename {
 	const struct qstr *usr_fname;
-	struct ext4_str disk_name;
+	struct fscrypt_str disk_name;
 	struct dx_hash_info hinfo;
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-	struct ext4_str crypto_buf;
+	struct fscrypt_str crypto_buf;
 #endif
 };
 
@@ -2296,132 +2287,82 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
 					      struct ext4_group_desc *gdp);
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
-/* crypto_policy.c */
-int ext4_is_child_context_consistent_with_parent(struct inode *parent,
-						 struct inode *child);
-int ext4_inherit_context(struct inode *parent, struct inode *child);
-void ext4_to_hex(char *dst, char *src, size_t src_size);
-int ext4_process_policy(const struct ext4_encryption_policy *policy,
-			struct inode *inode);
-int ext4_get_policy(struct inode *inode,
-		    struct ext4_encryption_policy *policy);
-
-/* crypto.c */
-extern struct kmem_cache *ext4_crypt_info_cachep;
-bool ext4_valid_contents_enc_mode(uint32_t mode);
-uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
-extern struct workqueue_struct *ext4_read_workqueue;
-struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
-					    gfp_t gfp_flags);
-void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx);
-void ext4_restore_control_page(struct page *data_page);
-struct page *ext4_encrypt(struct inode *inode,
-			  struct page *plaintext_page,
-			  gfp_t gfp_flags);
-int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
-			   ext4_fsblk_t pblk, ext4_lblk_t len);
-extern const struct dentry_operations ext4_encrypted_d_ops;
-
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-int ext4_init_crypto(void);
-void ext4_exit_crypto(void);
 static inline int ext4_sb_has_crypto(struct super_block *sb)
 {
 	return ext4_has_feature_encrypt(sb);
 }
-#else
-static inline int ext4_init_crypto(void) { return 0; }
-static inline void ext4_exit_crypto(void) { }
-static inline int ext4_sb_has_crypto(struct super_block *sb)
+
+static inline bool ext4_encrypted_inode(struct inode *inode)
 {
-	return 0;
+	return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT);
 }
-#endif
 
-/* crypto_fname.c */
-bool ext4_valid_filenames_enc_mode(uint32_t mode);
-u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
-unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen);
-int ext4_fname_crypto_alloc_buffer(struct inode *inode,
-				   u32 ilen, struct ext4_str *crypto_str);
-int _ext4_fname_disk_to_usr(struct inode *inode,
-			    struct dx_hash_info *hinfo,
-			    const struct ext4_str *iname,
-			    struct ext4_str *oname);
-int ext4_fname_disk_to_usr(struct inode *inode,
-			   struct dx_hash_info *hinfo,
-			   const struct ext4_dir_entry_2 *de,
-			   struct ext4_str *oname);
-int ext4_fname_usr_to_disk(struct inode *inode,
-			   const struct qstr *iname,
-			   struct ext4_str *oname);
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str);
-int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
-			      int lookup, struct ext4_filename *fname);
-void ext4_fname_free_filename(struct ext4_filename *fname);
-#else
-static inline
-int ext4_setup_fname_crypto(struct inode *inode)
-{
-	return 0;
-}
-static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { }
 static inline int ext4_fname_setup_filename(struct inode *dir,
-				     const struct qstr *iname,
-				     int lookup, struct ext4_filename *fname)
+			const struct qstr *iname,
+			int lookup, struct ext4_filename *fname)
 {
-	fname->usr_fname = iname;
-	fname->disk_name.name = (unsigned char *) iname->name;
-	fname->disk_name.len = iname->len;
-	return 0;
-}
-static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
-#endif
-
+	struct fscrypt_name name;
+	int err;
 
-/* crypto_key.c */
-void ext4_free_crypt_info(struct ext4_crypt_info *ci);
-void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
-int _ext4_get_encryption_info(struct inode *inode);
+	memset(fname, 0, sizeof(struct ext4_filename));
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-int ext4_has_encryption_key(struct inode *inode);
+	err = fscrypt_setup_filename(dir, iname, lookup, &name);
 
-static inline int ext4_get_encryption_info(struct inode *inode)
-{
-	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
-
-	if (!ci ||
-	    (ci->ci_keyring_key &&
-	     (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					   (1 << KEY_FLAG_REVOKED) |
-					   (1 << KEY_FLAG_DEAD)))))
-		return _ext4_get_encryption_info(inode);
-	return 0;
+	fname->usr_fname = name.usr_fname;
+	fname->disk_name = name.disk_name;
+	fname->hinfo.hash = name.hash;
+	fname->hinfo.minor_hash = name.minor_hash;
+	fname->crypto_buf = name.crypto_buf;
+	return err;
 }
 
-static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
+static inline void ext4_fname_free_filename(struct ext4_filename *fname)
 {
-	return EXT4_I(inode)->i_crypt_info;
-}
+	struct fscrypt_name name;
 
-#else
-static inline int ext4_has_encryption_key(struct inode *inode)
-{
-	return 0;
+	name.crypto_buf = fname->crypto_buf;
+	fscrypt_free_filename(&name);
+
+	fname->crypto_buf.name = NULL;
+	fname->usr_fname = NULL;
+	fname->disk_name.name = NULL;
 }
-static inline int ext4_get_encryption_info(struct inode *inode)
+#else
+static inline int ext4_fname_setup_filename(struct inode *dir,
+		const struct qstr *iname,
+		int lookup, struct ext4_filename *fname)
 {
+	fname->usr_fname = iname;
+	fname->disk_name.name = (unsigned char *) iname->name;
+	fname->disk_name.len = iname->len;
 	return 0;
 }
-static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
-{
-	return NULL;
-}
-#endif
+static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
 
+#define fscrypt_set_d_op(i)
+#define fscrypt_get_ctx			fscrypt_notsupp_get_ctx
+#define fscrypt_release_ctx		fscrypt_notsupp_release_ctx
+#define fscrypt_encrypt_page		fscrypt_notsupp_encrypt_page
+#define fscrypt_decrypt_page		fscrypt_notsupp_decrypt_page
+#define fscrypt_decrypt_bio_pages	fscrypt_notsupp_decrypt_bio_pages
+#define fscrypt_pullback_bio_page	fscrypt_notsupp_pullback_bio_page
+#define fscrypt_restore_control_page	fscrypt_notsupp_restore_control_page
+#define fscrypt_zeroout_range		fscrypt_notsupp_zeroout_range
+#define fscrypt_process_policy		fscrypt_notsupp_process_policy
+#define fscrypt_get_policy		fscrypt_notsupp_get_policy
+#define fscrypt_has_permitted_context	fscrypt_notsupp_has_permitted_context
+#define fscrypt_inherit_context		fscrypt_notsupp_inherit_context
+#define fscrypt_get_encryption_info	fscrypt_notsupp_get_encryption_info
+#define fscrypt_put_encryption_info	fscrypt_notsupp_put_encryption_info
+#define fscrypt_setup_filename		fscrypt_notsupp_setup_filename
+#define fscrypt_free_filename		fscrypt_notsupp_free_filename
+#define fscrypt_fname_encrypted_size	fscrypt_notsupp_fname_encrypted_size
+#define fscrypt_fname_alloc_buffer	fscrypt_notsupp_fname_alloc_buffer
+#define fscrypt_fname_free_buffer	fscrypt_notsupp_fname_free_buffer
+#define fscrypt_fname_disk_to_usr	fscrypt_notsupp_fname_disk_to_usr
+#define fscrypt_fname_usr_to_disk	fscrypt_notsupp_fname_usr_to_disk
+#endif
 
 /* dir.c */
 extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
@@ -2435,7 +2376,7 @@ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
 extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 				__u32 minor_hash,
 				struct ext4_dir_entry_2 *dirent,
-				struct ext4_str *ent_name);
+				struct fscrypt_str *ent_name);
 extern void ext4_htree_free_dir_info(struct dir_private_info *p);
 extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
 			     struct buffer_head *bh,
@@ -2623,7 +2564,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
 				     void *entry_buf,
 				     int buf_size,
 				     int csum_size);
-extern int ext4_empty_dir(struct inode *inode);
+extern bool ext4_empty_dir(struct inode *inode);
 
 /* resize.c */
 extern int ext4_group_add(struct super_block *sb,
@@ -3105,7 +3046,7 @@ extern int ext4_delete_inline_entry(handle_t *handle,
 				    struct ext4_dir_entry_2 *de_del,
 				    struct buffer_head *bh,
 				    int *has_inline_data);
-extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
+extern bool empty_inline_dir(struct inode *dir, int *has_inline_data);
 extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
 					struct ext4_dir_entry_2 **parent_de,
 					int *retval);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
deleted file mode 100644
index 1f73c29717e1..000000000000
--- a/fs/ext4/ext4_crypto.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * linux/fs/ext4/ext4_crypto.h
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains encryption header content for ext4
- *
- * Written by Michael Halcrow, 2015.
- */
-
-#ifndef _EXT4_CRYPTO_H
-#define _EXT4_CRYPTO_H
-
-#include <linux/fs.h>
-
-#define EXT4_KEY_DESCRIPTOR_SIZE 8
-
-/* Policy provided via an ioctl on the topmost directory */
-struct ext4_encryption_policy {
-	char version;
-	char contents_encryption_mode;
-	char filenames_encryption_mode;
-	char flags;
-	char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
-} __attribute__((__packed__));
-
-#define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1
-#define EXT4_KEY_DERIVATION_NONCE_SIZE 16
-
-#define EXT4_POLICY_FLAGS_PAD_4		0x00
-#define EXT4_POLICY_FLAGS_PAD_8		0x01
-#define EXT4_POLICY_FLAGS_PAD_16	0x02
-#define EXT4_POLICY_FLAGS_PAD_32	0x03
-#define EXT4_POLICY_FLAGS_PAD_MASK	0x03
-#define EXT4_POLICY_FLAGS_VALID		0x03
-
-/**
- * Encryption context for inode
- *
- * Protector format:
- *  1 byte: Protector format (1 = this version)
- *  1 byte: File contents encryption mode
- *  1 byte: File names encryption mode
- *  1 byte: Reserved
- *  8 bytes: Master Key descriptor
- *  16 bytes: Encryption Key derivation nonce
- */
-struct ext4_encryption_context {
-	char format;
-	char contents_encryption_mode;
-	char filenames_encryption_mode;
-	char flags;
-	char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
-	char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE];
-} __attribute__((__packed__));
-
-/* Encryption parameters */
-#define EXT4_XTS_TWEAK_SIZE 16
-#define EXT4_AES_128_ECB_KEY_SIZE 16
-#define EXT4_AES_256_GCM_KEY_SIZE 32
-#define EXT4_AES_256_CBC_KEY_SIZE 32
-#define EXT4_AES_256_CTS_KEY_SIZE 32
-#define EXT4_AES_256_XTS_KEY_SIZE 64
-#define EXT4_MAX_KEY_SIZE 64
-
-#define EXT4_KEY_DESC_PREFIX "ext4:"
-#define EXT4_KEY_DESC_PREFIX_SIZE 5
-
-/* This is passed in from userspace into the kernel keyring */
-struct ext4_encryption_key {
-        __u32 mode;
-        char raw[EXT4_MAX_KEY_SIZE];
-        __u32 size;
-} __attribute__((__packed__));
-
-struct ext4_crypt_info {
-	char		ci_data_mode;
-	char		ci_filename_mode;
-	char		ci_flags;
-	struct crypto_skcipher *ci_ctfm;
-	struct key	*ci_keyring_key;
-	char		ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
-};
-
-#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL             0x00000001
-#define EXT4_WRITE_PATH_FL			      0x00000002
-
-struct ext4_crypto_ctx {
-	union {
-		struct {
-			struct page *bounce_page;       /* Ciphertext page */
-			struct page *control_page;      /* Original page  */
-		} w;
-		struct {
-			struct bio *bio;
-			struct work_struct work;
-		} r;
-		struct list_head free_list;     /* Free list */
-	};
-	char flags;                      /* Flags */
-	char mode;                       /* Encryption mode for tfm */
-};
-
-struct ext4_completion_result {
-	struct completion completion;
-	int res;
-};
-
-#define DECLARE_EXT4_COMPLETION_RESULT(ecr) \
-	struct ext4_completion_result ecr = { \
-		COMPLETION_INITIALIZER((ecr).completion), 0 }
-
-static inline int ext4_encryption_key_size(int mode)
-{
-	switch (mode) {
-	case EXT4_ENCRYPTION_MODE_AES_256_XTS:
-		return EXT4_AES_256_XTS_KEY_SIZE;
-	case EXT4_ENCRYPTION_MODE_AES_256_GCM:
-		return EXT4_AES_256_GCM_KEY_SIZE;
-	case EXT4_ENCRYPTION_MODE_AES_256_CBC:
-		return EXT4_AES_256_CBC_KEY_SIZE;
-	case EXT4_ENCRYPTION_MODE_AES_256_CTS:
-		return EXT4_AES_256_CTS_KEY_SIZE;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-#define EXT4_FNAME_NUM_SCATTER_ENTRIES	4
-#define EXT4_CRYPTO_BLOCK_SIZE		16
-#define EXT4_FNAME_CRYPTO_DIGEST_SIZE	32
-
-struct ext4_str {
-	unsigned char *name;
-	u32 len;
-};
-
-/**
- * For encrypted symlinks, the ciphertext length is stored at the beginning
- * of the string in little-endian format.
- */
-struct ext4_encrypted_symlink_data {
-	__le16 len;
-	char encrypted_path[1];
-} __attribute__((__packed__));
-
-/**
- * This function is used to calculate the disk space required to
- * store a filename of length l in encrypted symlink format.
- */
-static inline u32 encrypted_symlink_data_len(u32 l)
-{
-	if (l < EXT4_CRYPTO_BLOCK_SIZE)
-		l = EXT4_CRYPTO_BLOCK_SIZE;
-	return (l + sizeof(struct ext4_encrypted_symlink_data) - 1);
-}
-
-#endif	/* _EXT4_CRYPTO_H */
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 09c1ef38cbe6..b1d52c14098e 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -175,6 +175,13 @@ struct ext4_journal_cb_entry {
  * There is no guaranteed calling order of multiple registered callbacks on
  * the same transaction.
  */
+static inline void _ext4_journal_callback_add(handle_t *handle,
+			struct ext4_journal_cb_entry *jce)
+{
+	/* Add the jce to transaction's private list */
+	list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
+}
+
 static inline void ext4_journal_callback_add(handle_t *handle,
 			void (*func)(struct super_block *sb,
 				     struct ext4_journal_cb_entry *jce,
@@ -187,10 +194,11 @@ static inline void ext4_journal_callback_add(handle_t *handle,
 	/* Add the jce to transaction's private list */
 	jce->jce_func = func;
 	spin_lock(&sbi->s_md_lock);
-	list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
+	_ext4_journal_callback_add(handle, jce);
 	spin_unlock(&sbi->s_md_lock);
 }
 
+
 /**
  * ext4_journal_callback_del: delete a registered callback
  * @handle: active journal transaction handle on which callback was registered
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2a2eef9c14e4..d7ccb7f51dfc 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -381,9 +381,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 	ext4_fsblk_t block = ext4_ext_pblock(ext);
 	int len = ext4_ext_get_actual_len(ext);
 	ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
-	ext4_lblk_t last = lblock + len - 1;
 
-	if (len == 0 || lblock > last)
+	/*
+	 * We allow neither:
+	 *  - zero length
+	 *  - overflow/wrap-around
+	 */
+	if (lblock + len <= lblock)
 		return 0;
 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
 }
@@ -474,6 +478,10 @@ static int __ext4_ext_check(const char *function, unsigned int line,
 		error_msg = "invalid extent entries";
 		goto corrupted;
 	}
+	if (unlikely(depth > 32)) {
+		error_msg = "too large eh_depth";
+		goto corrupted;
+	}
 	/* Verify checksum on non-root extent tree nodes */
 	if (ext_depth(inode) != depth &&
 	    !ext4_extent_block_csum_verify(inode, eh)) {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index df44c877892a..261ac3734c58 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -202,7 +202,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (IS_ERR(handle))
 		result = VM_FAULT_SIGBUS;
 	else
-		result = __dax_fault(vma, vmf, ext4_dax_get_block);
+		result = dax_fault(vma, vmf, ext4_dax_get_block);
 
 	if (write) {
 		if (!IS_ERR(handle))
@@ -237,7 +237,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 	if (IS_ERR(handle))
 		result = VM_FAULT_SIGBUS;
 	else
-		result = __dax_pmd_fault(vma, addr, pmd, flags,
+		result = dax_pmd_fault(vma, addr, pmd, flags,
 					 ext4_dax_get_block);
 
 	if (write) {
@@ -303,10 +303,10 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
 	struct inode *inode = file->f_mapping->host;
 
 	if (ext4_encrypted_inode(inode)) {
-		int err = ext4_get_encryption_info(inode);
+		int err = fscrypt_get_encryption_info(inode);
 		if (err)
 			return 0;
-		if (ext4_encryption_info(inode) == NULL)
+		if (!fscrypt_has_encryption_key(inode))
 			return -ENOKEY;
 	}
 	file_accessed(file);
@@ -362,16 +362,16 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 		}
 	}
 	if (ext4_encrypted_inode(inode)) {
-		ret = ext4_get_encryption_info(inode);
+		ret = fscrypt_get_encryption_info(inode);
 		if (ret)
 			return -EACCES;
-		if (ext4_encryption_info(inode) == NULL)
+		if (!fscrypt_has_encryption_key(inode))
 			return -ENOKEY;
 	}
 
 	dir = dget_parent(file_dentry(filp));
 	if (ext4_encrypted_inode(d_inode(dir)) &&
-	    !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) {
+			!fscrypt_has_permitted_context(d_inode(dir), inode)) {
 		ext4_warning(inode->i_sb,
 			     "Inconsistent encryption contexts: %lu/%lu",
 			     (unsigned long) d_inode(dir)->i_ino,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8850254136ae..5c4372512ef7 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -106,9 +106,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	}
 
 	if (!journal) {
-		ret = generic_file_fsync(file, start, end, datasync);
+		ret = __generic_file_fsync(file, start, end, datasync);
 		if (!ret && !hlist_empty(&inode->i_dentry))
 			ret = ext4_sync_parent(inode);
+		if (test_opt(inode->i_sb, BARRIER))
+			goto issue_flush;
 		goto out;
 	}
 
@@ -140,6 +142,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 		needs_barrier = true;
 	ret = jbd2_complete_transaction(journal, commit_tid);
 	if (needs_barrier) {
+	issue_flush:
 		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 		if (!ret)
 			ret = err;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3da4cf8d18b6..9e66cd1d7b78 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -214,7 +214,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 	trace_ext4_load_inode_bitmap(sb, block_group);
 	bh->b_end_io = ext4_end_bitmap_read;
 	get_bh(bh);
-	submit_bh(READ | REQ_META | REQ_PRIO, bh);
+	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
 	wait_on_buffer(bh);
 	if (!buffer_uptodate(bh)) {
 		put_bh(bh);
@@ -767,10 +767,10 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 	if ((ext4_encrypted_inode(dir) ||
 	     DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
 	    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
-		err = ext4_get_encryption_info(dir);
+		err = fscrypt_get_encryption_info(dir);
 		if (err)
 			return ERR_PTR(err);
-		if (ext4_encryption_info(dir) == NULL)
+		if (!fscrypt_has_encryption_key(dir))
 			return ERR_PTR(-EPERM);
 		if (!handle)
 			nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
@@ -1115,7 +1115,8 @@ got:
 	}
 
 	if (encrypt) {
-		err = ext4_inherit_context(dir, inode);
+		/* give pointer to avoid set_context with journal ops. */
+		err = fscrypt_inherit_context(dir, inode, &encrypt, true);
 		if (err)
 			goto fail_free_drop;
 	}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index ff7538c26992..f74d5ee2cdec 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1326,7 +1326,7 @@ int htree_inlinedir_to_tree(struct file *dir_file,
 	struct ext4_iloc iloc;
 	void *dir_buf = NULL;
 	struct ext4_dir_entry_2 fake;
-	struct ext4_str tmp_str;
+	struct fscrypt_str tmp_str;
 
 	ret = ext4_get_inode_loc(inode, &iloc);
 	if (ret)
@@ -1739,20 +1739,20 @@ ext4_get_inline_entry(struct inode *inode,
 	return (struct ext4_dir_entry_2 *)(inline_pos + offset);
 }
 
-int empty_inline_dir(struct inode *dir, int *has_inline_data)
+bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 {
 	int err, inline_size;
 	struct ext4_iloc iloc;
 	void *inline_pos;
 	unsigned int offset;
 	struct ext4_dir_entry_2 *de;
-	int ret = 1;
+	bool ret = true;
 
 	err = ext4_get_inode_loc(dir, &iloc);
 	if (err) {
 		EXT4_ERROR_INODE(dir, "error %d getting inode %lu block",
 				 err, dir->i_ino);
-		return 1;
+		return true;
 	}
 
 	down_read(&EXT4_I(dir)->xattr_sem);
@@ -1766,7 +1766,7 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data)
 		ext4_warning(dir->i_sb,
 			     "bad inline directory (dir #%lu) - no `..'",
 			     dir->i_ino);
-		ret = 1;
+		ret = true;
 		goto out;
 	}
 
@@ -1784,11 +1784,11 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data)
 				     dir->i_ino, le32_to_cpu(de->inode),
 				     le16_to_cpu(de->rec_len), de->name_len,
 				     inline_size);
-			ret = 1;
+			ret = true;
 			goto out;
 		}
 		if (le32_to_cpu(de->inode)) {
-			ret = 0;
+			ret = false;
 			goto out;
 		}
 		offset += ext4_rec_len_from_disk(de->rec_len, inline_size);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f7140ca66e3b..3131747199e1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -51,25 +51,31 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
 			      struct ext4_inode_info *ei)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-	__u16 csum_lo;
-	__u16 csum_hi = 0;
 	__u32 csum;
+	__u16 dummy_csum = 0;
+	int offset = offsetof(struct ext4_inode, i_checksum_lo);
+	unsigned int csum_size = sizeof(dummy_csum);
 
-	csum_lo = le16_to_cpu(raw->i_checksum_lo);
-	raw->i_checksum_lo = 0;
-	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
-	    EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
-		csum_hi = le16_to_cpu(raw->i_checksum_hi);
-		raw->i_checksum_hi = 0;
-	}
-
-	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
-			   EXT4_INODE_SIZE(inode->i_sb));
+	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset);
+	csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size);
+	offset += csum_size;
+	csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+			   EXT4_GOOD_OLD_INODE_SIZE - offset);
 
-	raw->i_checksum_lo = cpu_to_le16(csum_lo);
-	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
-	    EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
-		raw->i_checksum_hi = cpu_to_le16(csum_hi);
+	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+		offset = offsetof(struct ext4_inode, i_checksum_hi);
+		csum = ext4_chksum(sbi, csum, (__u8 *)raw +
+				   EXT4_GOOD_OLD_INODE_SIZE,
+				   offset - EXT4_GOOD_OLD_INODE_SIZE);
+		if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
+			csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
+					   csum_size);
+			offset += csum_size;
+			csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+					   EXT4_INODE_SIZE(inode->i_sb) -
+					   offset);
+		}
+	}
 
 	return csum;
 }
@@ -205,9 +211,9 @@ void ext4_evict_inode(struct inode *inode)
 		 * Note that directories do not have this problem because they
 		 * don't use page cache.
 		 */
-		if (ext4_should_journal_data(inode) &&
-		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
-		    inode->i_ino != EXT4_JOURNAL_INO) {
+		if (inode->i_ino != EXT4_JOURNAL_INO &&
+		    ext4_should_journal_data(inode) &&
+		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
 			journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
 			tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
 
@@ -386,7 +392,7 @@ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
 	int ret;
 
 	if (ext4_encrypted_inode(inode))
-		return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+		return fscrypt_zeroout_range(inode, lblk, pblk, len);
 
 	ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
 	if (ret > 0)
@@ -981,7 +987,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 		return bh;
 	if (!bh || buffer_uptodate(bh))
 		return bh;
-	ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
+	ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
 	wait_on_buffer(bh);
 	if (buffer_uptodate(bh))
 		return bh;
@@ -1135,7 +1141,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
 		    !buffer_unwritten(bh) &&
 		    (block_start < from || block_end > to)) {
-			ll_rw_block(READ, 1, &bh);
+			ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 			*wait_bh++ = bh;
 			decrypt = ext4_encrypted_inode(inode) &&
 				S_ISREG(inode->i_mode);
@@ -1152,7 +1158,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 	if (unlikely(err))
 		page_zero_new_buffers(page, from, to);
 	else if (decrypt)
-		err = ext4_decrypt(page);
+		err = fscrypt_decrypt_page(page);
 	return err;
 }
 #endif
@@ -2748,13 +2754,36 @@ retry:
 				done = true;
 			}
 		}
-		ext4_journal_stop(handle);
+		/*
+		 * Caution: If the handle is synchronous,
+		 * ext4_journal_stop() can wait for transaction commit
+		 * to finish which may depend on writeback of pages to
+		 * complete or on page lock to be released.  In that
+		 * case, we have to wait until after after we have
+		 * submitted all the IO, released page locks we hold,
+		 * and dropped io_end reference (for extent conversion
+		 * to be able to complete) before stopping the handle.
+		 */
+		if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
+			ext4_journal_stop(handle);
+			handle = NULL;
+		}
 		/* Submit prepared bio */
 		ext4_io_submit(&mpd.io_submit);
 		/* Unlock pages we didn't use */
 		mpage_release_unused_pages(&mpd, give_up_on_write);
-		/* Drop our io_end reference we got from init */
-		ext4_put_io_end(mpd.io_submit.io_end);
+		/*
+		 * Drop our io_end reference we got from init. We have
+		 * to be careful and use deferred io_end finishing if
+		 * we are still holding the transaction as we can
+		 * release the last reference to io_end which may end
+		 * up doing unwritten extent conversion.
+		 */
+		if (handle) {
+			ext4_put_io_end_defer(mpd.io_submit.io_end);
+			ext4_journal_stop(handle);
+		} else
+			ext4_put_io_end(mpd.io_submit.io_end);
 
 		if (ret == -ENOSPC && sbi->s_journal) {
 			/*
@@ -3698,7 +3727,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 
 	if (!buffer_uptodate(bh)) {
 		err = -EIO;
-		ll_rw_block(READ, 1, &bh);
+		ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 		wait_on_buffer(bh);
 		/* Uhhuh. Read error. Complain and punt. */
 		if (!buffer_uptodate(bh))
@@ -3706,9 +3735,9 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 		if (S_ISREG(inode->i_mode) &&
 		    ext4_encrypted_inode(inode)) {
 			/* We expect the key to be set. */
-			BUG_ON(!ext4_has_encryption_key(inode));
+			BUG_ON(!fscrypt_has_encryption_key(inode));
 			BUG_ON(blocksize != PAGE_SIZE);
-			WARN_ON_ONCE(ext4_decrypt(page));
+			WARN_ON_ONCE(fscrypt_decrypt_page(page));
 		}
 	}
 	if (ext4_should_journal_data(inode)) {
@@ -4281,7 +4310,7 @@ make_io:
 		trace_ext4_load_inode(inode);
 		get_bh(bh);
 		bh->b_end_io = end_buffer_read_sync;
-		submit_bh(READ | REQ_META | REQ_PRIO, bh);
+		submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			EXT4_ERROR_INODE_BLOCK(inode, block,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 28cc412852af..10686fd67fb4 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -308,6 +308,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
 	kprojid_t kprojid;
 	struct ext4_iloc iloc;
 	struct ext4_inode *raw_inode;
+	struct dquot *transfer_to[MAXQUOTAS] = { };
 
 	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 			EXT4_FEATURE_RO_COMPAT_PROJECT)) {
@@ -361,17 +362,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
 	if (err)
 		goto out_stop;
 
-	if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
-		struct dquot *transfer_to[MAXQUOTAS] = { };
-
-		transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
-		if (!IS_ERR(transfer_to[PRJQUOTA])) {
-			err = __dquot_transfer(inode, transfer_to);
-			dqput(transfer_to[PRJQUOTA]);
-			if (err)
-				goto out_dirty;
-		}
+	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+	if (!IS_ERR(transfer_to[PRJQUOTA])) {
+		err = __dquot_transfer(inode, transfer_to);
+		dqput(transfer_to[PRJQUOTA]);
+		if (err)
+			goto out_dirty;
 	}
+
 	EXT4_I(inode)->i_projid = kprojid;
 	inode->i_ctime = ext4_current_time(inode);
 out_dirty:
@@ -772,19 +770,13 @@ resizefs_out:
 		return ext4_ext_precache(inode);
 	case EXT4_IOC_SET_ENCRYPTION_POLICY: {
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-		struct ext4_encryption_policy policy;
-		int err = 0;
+		struct fscrypt_policy policy;
 
 		if (copy_from_user(&policy,
-				   (struct ext4_encryption_policy __user *)arg,
-				   sizeof(policy))) {
-			err = -EFAULT;
-			goto encryption_policy_out;
-		}
-
-		err = ext4_process_policy(&policy, inode);
-encryption_policy_out:
-		return err;
+				   (struct fscrypt_policy __user *)arg,
+				   sizeof(policy)))
+			return -EFAULT;
+		return fscrypt_process_policy(inode, &policy);
 #else
 		return -EOPNOTSUPP;
 #endif
@@ -827,12 +819,12 @@ encryption_policy_out:
 	}
 	case EXT4_IOC_GET_ENCRYPTION_POLICY: {
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-		struct ext4_encryption_policy policy;
+		struct fscrypt_policy policy;
 		int err = 0;
 
 		if (!ext4_encrypted_inode(inode))
 			return -ENOENT;
-		err = ext4_get_policy(inode, &policy);
+		err = fscrypt_get_policy(inode, &policy);
 		if (err)
 			return err;
 		if (copy_to_user((void __user *)arg, &policy, sizeof(policy)))
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c1ab3ec30423..11562161e24a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2627,6 +2627,7 @@ int ext4_mb_init(struct super_block *sb)
 
 	spin_lock_init(&sbi->s_md_lock);
 	spin_lock_init(&sbi->s_bal_lock);
+	sbi->s_mb_free_pending = 0;
 
 	sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
 	sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
@@ -2814,6 +2815,9 @@ static void ext4_free_data_callback(struct super_block *sb,
 	/* we expect to find existing buddy because it's pinned */
 	BUG_ON(err != 0);
 
+	spin_lock(&EXT4_SB(sb)->s_md_lock);
+	EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
+	spin_unlock(&EXT4_SB(sb)->s_md_lock);
 
 	db = e4b.bd_info;
 	/* there are blocks to put in buddy to make them really free */
@@ -2939,7 +2943,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
 			   "fs metadata", block, block+len);
 		/* File system mounted not to panic on error
-		 * Fix the bitmap and repeat the block allocation
+		 * Fix the bitmap and return EFSCORRUPTED
 		 * We leak some of the blocks here.
 		 */
 		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
@@ -2948,7 +2952,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
 		if (!err)
-			err = -EAGAIN;
+			err = -EFSCORRUPTED;
 		goto out_err;
 	}
 
@@ -4513,18 +4517,7 @@ repeat:
 	}
 	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
 		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
-		if (*errp == -EAGAIN) {
-			/*
-			 * drop the reference that we took
-			 * in ext4_mb_use_best_found
-			 */
-			ext4_mb_release_context(ac);
-			ac->ac_b_ex.fe_group = 0;
-			ac->ac_b_ex.fe_start = 0;
-			ac->ac_b_ex.fe_len = 0;
-			ac->ac_status = AC_STATUS_CONTINUE;
-			goto repeat;
-		} else if (*errp) {
+		if (*errp) {
 			ext4_discard_allocated_blocks(ac);
 			goto errout;
 		} else {
@@ -4583,6 +4576,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 {
 	ext4_group_t group = e4b->bd_group;
 	ext4_grpblk_t cluster;
+	ext4_grpblk_t clusters = new_entry->efd_count;
 	struct ext4_free_data *entry;
 	struct ext4_group_info *db = e4b->bd_info;
 	struct super_block *sb = e4b->bd_sb;
@@ -4649,8 +4643,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 		}
 	}
 	/* Add the extent to transaction's private list */
-	ext4_journal_callback_add(handle, ext4_free_data_callback,
-				  &new_entry->efd_jce);
+	new_entry->efd_jce.jce_func = ext4_free_data_callback;
+	spin_lock(&sbi->s_md_lock);
+	_ext4_journal_callback_add(handle, &new_entry->efd_jce);
+	sbi->s_mb_free_pending += clusters;
+	spin_unlock(&sbi->s_md_lock);
 	return 0;
 }
 
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 23d436d6f8b8..d89754ef1aab 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
 	lock_buffer(bh);
 	bh->b_end_io = end_buffer_write_sync;
 	get_bh(bh);
-	submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
+	submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh);
 	wait_on_buffer(bh);
 	sb_end_write(sb);
 	if (unlikely(!buffer_uptodate(bh)))
@@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
 	get_bh(*bh);
 	lock_buffer(*bh);
 	(*bh)->b_end_io = end_buffer_read_sync;
-	submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
+	submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh);
 	wait_on_buffer(*bh);
 	if (!buffer_uptodate(*bh)) {
 		ret = -EIO;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ec4c39952e84..34c0142caf6a 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -420,15 +420,14 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	__u32 csum;
-	__le32 save_csum;
 	int size;
+	__u32 dummy_csum = 0;
+	int offset = offsetof(struct dx_tail, dt_checksum);
 
 	size = count_offset + (count * sizeof(struct dx_entry));
-	save_csum = t->dt_checksum;
-	t->dt_checksum = 0;
 	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
-	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
-	t->dt_checksum = save_csum;
+	csum = ext4_chksum(sbi, csum, (__u8 *)t, offset);
+	csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
 
 	return cpu_to_le32(csum);
 }
@@ -446,14 +445,14 @@ static int ext4_dx_csum_verify(struct inode *inode,
 	c = get_dx_countlimit(inode, dirent, &count_offset);
 	if (!c) {
 		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
-		return 1;
+		return 0;
 	}
 	limit = le16_to_cpu(c->limit);
 	count = le16_to_cpu(c->count);
 	if (count_offset + (limit * sizeof(struct dx_entry)) >
 	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
 		warn_no_space_for_csum(inode);
-		return 1;
+		return 0;
 	}
 	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
 
@@ -612,19 +611,19 @@ static struct stats dx_show_leaf(struct inode *dir,
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 				int len;
 				char *name;
-				struct ext4_str fname_crypto_str
-					= {.name = NULL, .len = 0};
+				struct fscrypt_str fname_crypto_str =
+					FSTR_INIT(NULL, 0);
 				int res = 0;
 
 				name  = de->name;
 				len = de->name_len;
-				if (ext4_encrypted_inode(inode))
-					res = ext4_get_encryption_info(dir);
+				if (ext4_encrypted_inode(dir))
+					res = fscrypt_get_encryption_info(dir);
 				if (res) {
 					printk(KERN_WARNING "Error setting up"
 					       " fname crypto: %d\n", res);
 				}
-				if (ctx == NULL) {
+				if (!fscrypt_has_encryption_key(dir)) {
 					/* Directory is not encrypted */
 					ext4fs_dirhash(de->name,
 						de->name_len, &h);
@@ -633,19 +632,21 @@ static struct stats dx_show_leaf(struct inode *dir,
 					       (unsigned) ((char *) de
 							   - base));
 				} else {
+					struct fscrypt_str de_name =
+						FSTR_INIT(name, len);
+
 					/* Directory is encrypted */
-					res = ext4_fname_crypto_alloc_buffer(
-						ctx, de->name_len,
+					res = fscrypt_fname_alloc_buffer(
+						dir, len,
 						&fname_crypto_str);
-					if (res < 0) {
+					if (res < 0)
 						printk(KERN_WARNING "Error "
 							"allocating crypto "
 							"buffer--skipping "
 							"crypto\n");
-						ctx = NULL;
-					}
-					res = ext4_fname_disk_to_usr(ctx, NULL, de,
-							&fname_crypto_str);
+					res = fscrypt_fname_disk_to_usr(dir,
+						0, 0, &de_name,
+						&fname_crypto_str);
 					if (res < 0) {
 						printk(KERN_WARNING "Error "
 							"converting filename "
@@ -662,8 +663,8 @@ static struct stats dx_show_leaf(struct inode *dir,
 					printk("%*.s:(E)%x.%u ", len, name,
 					       h.hash, (unsigned) ((char *) de
 								   - base));
-					ext4_fname_crypto_free_buffer(
-						&fname_crypto_str);
+					fscrypt_fname_free_buffer(
+							&fname_crypto_str);
 				}
 #else
 				int len = de->name_len;
@@ -952,7 +953,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 	struct buffer_head *bh;
 	struct ext4_dir_entry_2 *de, *top;
 	int err = 0, count = 0;
-	struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str;
+	struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
 
 	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
 							(unsigned long)block));
@@ -967,12 +968,12 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 	/* Check if the directory is encrypted */
 	if (ext4_encrypted_inode(dir)) {
-		err = ext4_get_encryption_info(dir);
+		err = fscrypt_get_encryption_info(dir);
 		if (err < 0) {
 			brelse(bh);
 			return err;
 		}
-		err = ext4_fname_crypto_alloc_buffer(dir, EXT4_NAME_LEN,
+		err = fscrypt_fname_alloc_buffer(dir, EXT4_NAME_LEN,
 						     &fname_crypto_str);
 		if (err < 0) {
 			brelse(bh);
@@ -1003,10 +1004,13 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 				   &tmp_str);
 		} else {
 			int save_len = fname_crypto_str.len;
+			struct fscrypt_str de_name = FSTR_INIT(de->name,
+								de->name_len);
 
 			/* Directory is encrypted */
-			err = ext4_fname_disk_to_usr(dir, hinfo, de,
-						     &fname_crypto_str);
+			err = fscrypt_fname_disk_to_usr(dir, hinfo->hash,
+					hinfo->minor_hash, &de_name,
+					&fname_crypto_str);
 			if (err < 0) {
 				count = err;
 				goto errout;
@@ -1025,7 +1029,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 errout:
 	brelse(bh);
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-	ext4_fname_crypto_free_buffer(&fname_crypto_str);
+	fscrypt_fname_free_buffer(&fname_crypto_str);
 #endif
 	return count;
 }
@@ -1050,7 +1054,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 	int count = 0;
 	int ret, err;
 	__u32 hashval;
-	struct ext4_str tmp_str;
+	struct fscrypt_str tmp_str;
 
 	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
 		       start_hash, start_minor_hash));
@@ -1443,7 +1447,8 @@ restart:
 				}
 				bh_use[ra_max] = bh;
 				if (bh)
-					ll_rw_block(READ | REQ_META | REQ_PRIO,
+					ll_rw_block(REQ_OP_READ,
+						    REQ_META | REQ_PRIO,
 						    1, &bh);
 			}
 		}
@@ -1563,26 +1568,23 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 	struct ext4_dir_entry_2 *de;
 	struct buffer_head *bh;
 
-       if (ext4_encrypted_inode(dir)) {
-               int res = ext4_get_encryption_info(dir);
+	if (ext4_encrypted_inode(dir)) {
+		int res = fscrypt_get_encryption_info(dir);
 
 		/*
-		 * This should be a properly defined flag for
-		 * dentry->d_flags when we uplift this to the VFS.
-		 * d_fsdata is set to (void *) 1 if if the dentry is
+		 * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
 		 * created while the directory was encrypted and we
-		 * don't have access to the key.
+		 * have access to the key.
 		 */
-	       dentry->d_fsdata = NULL;
-	       if (ext4_encryption_info(dir))
-		       dentry->d_fsdata = (void *) 1;
-	       d_set_d_op(dentry, &ext4_encrypted_d_ops);
-	       if (res && res != -ENOKEY)
-		       return ERR_PTR(res);
-       }
+		if (fscrypt_has_encryption_key(dir))
+			fscrypt_set_encrypted_dentry(dentry);
+		fscrypt_set_d_op(dentry);
+		if (res && res != -ENOKEY)
+			return ERR_PTR(res);
+	}
 
-	if (dentry->d_name.len > EXT4_NAME_LEN)
-		return ERR_PTR(-ENAMETOOLONG);
+       if (dentry->d_name.len > EXT4_NAME_LEN)
+	       return ERR_PTR(-ENAMETOOLONG);
 
 	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
 	if (IS_ERR(bh))
@@ -1609,11 +1611,9 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 		}
 		if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
 		    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
-		    !ext4_is_child_context_consistent_with_parent(dir,
-								  inode)) {
+		    !fscrypt_has_permitted_context(dir, inode)) {
 			int nokey = ext4_encrypted_inode(inode) &&
-				!ext4_encryption_info(inode);
-
+				!fscrypt_has_encryption_key(inode);
 			iput(inode);
 			if (nokey)
 				return ERR_PTR(-ENOKEY);
@@ -2690,30 +2690,30 @@ out_stop:
 /*
  * routine to check that the specified directory is empty (for rmdir)
  */
-int ext4_empty_dir(struct inode *inode)
+bool ext4_empty_dir(struct inode *inode)
 {
 	unsigned int offset;
 	struct buffer_head *bh;
 	struct ext4_dir_entry_2 *de, *de1;
 	struct super_block *sb;
-	int err = 0;
 
 	if (ext4_has_inline_data(inode)) {
 		int has_inline_data = 1;
+		int ret;
 
-		err = empty_inline_dir(inode, &has_inline_data);
+		ret = empty_inline_dir(inode, &has_inline_data);
 		if (has_inline_data)
-			return err;
+			return ret;
 	}
 
 	sb = inode->i_sb;
 	if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
 		EXT4_ERROR_INODE(inode, "invalid size");
-		return 1;
+		return true;
 	}
 	bh = ext4_read_dirblock(inode, 0, EITHER);
 	if (IS_ERR(bh))
-		return 1;
+		return true;
 
 	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	de1 = ext4_next_entry(de, sb->s_blocksize);
@@ -2722,7 +2722,7 @@ int ext4_empty_dir(struct inode *inode)
 			strcmp(".", de->name) || strcmp("..", de1->name)) {
 		ext4_warning_inode(inode, "directory missing '.' and/or '..'");
 		brelse(bh);
-		return 1;
+		return true;
 	}
 	offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
 		 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
@@ -2730,12 +2730,11 @@ int ext4_empty_dir(struct inode *inode)
 	while (offset < inode->i_size) {
 		if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
 			unsigned int lblock;
-			err = 0;
 			brelse(bh);
 			lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
 			bh = ext4_read_dirblock(inode, lblock, EITHER);
 			if (IS_ERR(bh))
-				return 1;
+				return true;
 			de = (struct ext4_dir_entry_2 *) bh->b_data;
 		}
 		if (ext4_check_dir_entry(inode, NULL, de, bh,
@@ -2747,13 +2746,13 @@ int ext4_empty_dir(struct inode *inode)
 		}
 		if (le32_to_cpu(de->inode)) {
 			brelse(bh);
-			return 0;
+			return false;
 		}
 		offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
 		de = ext4_next_entry(de, sb->s_blocksize);
 	}
 	brelse(bh);
-	return 1;
+	return true;
 }
 
 /*
@@ -3076,8 +3075,8 @@ static int ext4_symlink(struct inode *dir,
 	int err, len = strlen(symname);
 	int credits;
 	bool encryption_required;
-	struct ext4_str disk_link;
-	struct ext4_encrypted_symlink_data *sd = NULL;
+	struct fscrypt_str disk_link;
+	struct fscrypt_symlink_data *sd = NULL;
 
 	disk_link.len = len + 1;
 	disk_link.name = (char *) symname;
@@ -3085,13 +3084,13 @@ static int ext4_symlink(struct inode *dir,
 	encryption_required = (ext4_encrypted_inode(dir) ||
 			       DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)));
 	if (encryption_required) {
-		err = ext4_get_encryption_info(dir);
+		err = fscrypt_get_encryption_info(dir);
 		if (err)
 			return err;
-		if (ext4_encryption_info(dir) == NULL)
+		if (!fscrypt_has_encryption_key(dir))
 			return -EPERM;
-		disk_link.len = (ext4_fname_encrypted_size(dir, len) +
-				 sizeof(struct ext4_encrypted_symlink_data));
+		disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
+				 sizeof(struct fscrypt_symlink_data));
 		sd = kzalloc(disk_link.len, GFP_KERNEL);
 		if (!sd)
 			return -ENOMEM;
@@ -3139,13 +3138,12 @@ static int ext4_symlink(struct inode *dir,
 
 	if (encryption_required) {
 		struct qstr istr;
-		struct ext4_str ostr;
+		struct fscrypt_str ostr =
+			FSTR_INIT(sd->encrypted_path, disk_link.len);
 
 		istr.name = (const unsigned char *) symname;
 		istr.len = len;
-		ostr.name = sd->encrypted_path;
-		ostr.len = disk_link.len;
-		err = ext4_fname_usr_to_disk(inode, &istr, &ostr);
+		err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
 		if (err < 0)
 			goto err_drop_inode;
 		sd->len = cpu_to_le16(ostr.len);
@@ -3234,7 +3232,7 @@ static int ext4_link(struct dentry *old_dentry,
 	if (inode->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 	if (ext4_encrypted_inode(dir) &&
-	    !ext4_is_child_context_consistent_with_parent(dir, inode))
+			!fscrypt_has_permitted_context(dir, inode))
 		return -EPERM;
 
        if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
@@ -3557,8 +3555,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if ((old.dir != new.dir) &&
 	    ext4_encrypted_inode(new.dir) &&
-	    !ext4_is_child_context_consistent_with_parent(new.dir,
-							  old.inode)) {
+	    !fscrypt_has_permitted_context(new.dir, old.inode)) {
 		retval = -EPERM;
 		goto end_rename;
 	}
@@ -3730,10 +3727,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if ((ext4_encrypted_inode(old_dir) ||
 	     ext4_encrypted_inode(new_dir)) &&
 	    (old_dir != new_dir) &&
-	    (!ext4_is_child_context_consistent_with_parent(new_dir,
-							   old.inode) ||
-	     !ext4_is_child_context_consistent_with_parent(old_dir,
-							   new.inode)))
+	    (!fscrypt_has_permitted_context(new_dir, old.inode) ||
+	     !fscrypt_has_permitted_context(old_dir, new.inode)))
 		return -EPERM;
 
 	if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 2a01df9cc1c3..a6132a730967 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/backing-dev.h>
+#include <linux/fscrypto.h>
 
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -67,7 +68,6 @@ static void ext4_finish_bio(struct bio *bio)
 		struct page *page = bvec->bv_page;
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 		struct page *data_page = NULL;
-		struct ext4_crypto_ctx *ctx = NULL;
 #endif
 		struct buffer_head *bh, *head;
 		unsigned bio_start = bvec->bv_offset;
@@ -82,8 +82,7 @@ static void ext4_finish_bio(struct bio *bio)
 		if (!page->mapping) {
 			/* The bounce data pages are unmapped. */
 			data_page = page;
-			ctx = (struct ext4_crypto_ctx *)page_private(data_page);
-			page = ctx->w.control_page;
+			fscrypt_pullback_bio_page(&page, false);
 		}
 #endif
 
@@ -113,8 +112,8 @@ static void ext4_finish_bio(struct bio *bio)
 		local_irq_restore(flags);
 		if (!under_io) {
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-			if (ctx)
-				ext4_restore_control_page(data_page);
+			if (data_page)
+				fscrypt_restore_control_page(data_page);
 #endif
 			end_page_writeback(page);
 		}
@@ -340,9 +339,10 @@ void ext4_io_submit(struct ext4_io_submit *io)
 	struct bio *bio = io->io_bio;
 
 	if (bio) {
-		int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
-			    WRITE_SYNC : WRITE;
-		submit_bio(io_op, io->io_bio);
+		int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
+				  WRITE_SYNC : 0;
+		bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
+		submit_bio(io->io_bio);
 	}
 	io->io_bio = NULL;
 }
@@ -472,7 +472,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 		gfp_t gfp_flags = GFP_NOFS;
 
 	retry_encrypt:
-		data_page = ext4_encrypt(inode, page, gfp_flags);
+		data_page = fscrypt_encrypt_page(inode, page, gfp_flags);
 		if (IS_ERR(data_page)) {
 			ret = PTR_ERR(data_page);
 			if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
@@ -510,7 +510,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	if (ret) {
 	out:
 		if (data_page)
-			ext4_restore_control_page(data_page);
+			fscrypt_restore_control_page(data_page);
 		printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
 		redirty_page_for_writepage(wbc, page);
 		do {
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index dc54a4b60eba..a81b829d56de 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -46,37 +46,6 @@
 
 #include "ext4.h"
 
-/*
- * Call ext4_decrypt on every single page, reusing the encryption
- * context.
- */
-static void completion_pages(struct work_struct *work)
-{
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	struct ext4_crypto_ctx *ctx =
-		container_of(work, struct ext4_crypto_ctx, r.work);
-	struct bio	*bio	= ctx->r.bio;
-	struct bio_vec	*bv;
-	int		i;
-
-	bio_for_each_segment_all(bv, bio, i) {
-		struct page *page = bv->bv_page;
-
-		int ret = ext4_decrypt(page);
-		if (ret) {
-			WARN_ON_ONCE(1);
-			SetPageError(page);
-		} else
-			SetPageUptodate(page);
-		unlock_page(page);
-	}
-	ext4_release_crypto_ctx(ctx);
-	bio_put(bio);
-#else
-	BUG();
-#endif
-}
-
 static inline bool ext4_bio_encrypted(struct bio *bio)
 {
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
@@ -104,14 +73,10 @@ static void mpage_end_io(struct bio *bio)
 	int i;
 
 	if (ext4_bio_encrypted(bio)) {
-		struct ext4_crypto_ctx *ctx = bio->bi_private;
-
 		if (bio->bi_error) {
-			ext4_release_crypto_ctx(ctx);
+			fscrypt_release_ctx(bio->bi_private);
 		} else {
-			INIT_WORK(&ctx->r.work, completion_pages);
-			ctx->r.bio = bio;
-			queue_work(ext4_read_workqueue, &ctx->r.work);
+			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
 			return;
 		}
 	}
@@ -135,7 +100,6 @@ int ext4_mpage_readpages(struct address_space *mapping,
 			 unsigned nr_pages)
 {
 	struct bio *bio = NULL;
-	unsigned page_idx;
 	sector_t last_block_in_bio = 0;
 
 	struct inode *inode = mapping->host;
@@ -157,7 +121,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 	map.m_len = 0;
 	map.m_flags = 0;
 
-	for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
+	for (; nr_pages; nr_pages--) {
 		int fully_mapped = 1;
 		unsigned first_hole = blocks_per_page;
 
@@ -166,7 +130,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 			page = list_entry(pages->prev, struct page, lru);
 			list_del(&page->lru);
 			if (add_to_page_cache_lru(page, mapping, page->index,
-				  mapping_gfp_constraint(mapping, GFP_KERNEL)))
+				  readahead_gfp_mask(mapping)))
 				goto next_page;
 		}
 
@@ -271,15 +235,15 @@ int ext4_mpage_readpages(struct address_space *mapping,
 		 */
 		if (bio && (last_block_in_bio != blocks[0] - 1)) {
 		submit_and_realloc:
-			submit_bio(READ, bio);
+			submit_bio(bio);
 			bio = NULL;
 		}
 		if (bio == NULL) {
-			struct ext4_crypto_ctx *ctx = NULL;
+			struct fscrypt_ctx *ctx = NULL;
 
 			if (ext4_encrypted_inode(inode) &&
 			    S_ISREG(inode->i_mode)) {
-				ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
+				ctx = fscrypt_get_ctx(inode, GFP_NOFS);
 				if (IS_ERR(ctx))
 					goto set_error_page;
 			}
@@ -287,13 +251,14 @@ int ext4_mpage_readpages(struct address_space *mapping,
 				min_t(int, nr_pages, BIO_MAX_PAGES));
 			if (!bio) {
 				if (ctx)
-					ext4_release_crypto_ctx(ctx);
+					fscrypt_release_ctx(ctx);
 				goto set_error_page;
 			}
 			bio->bi_bdev = bdev;
 			bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 			bio->bi_end_io = mpage_end_io;
 			bio->bi_private = ctx;
+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 		}
 
 		length = first_hole << blkbits;
@@ -303,14 +268,14 @@ int ext4_mpage_readpages(struct address_space *mapping,
 		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
 		     (relative_block == map.m_len)) ||
 		    (first_hole != blocks_per_page)) {
-			submit_bio(READ, bio);
+			submit_bio(bio);
 			bio = NULL;
 		} else
 			last_block_in_bio = blocks[blocks_per_page - 1];
 		goto next_page;
 	confused:
 		if (bio) {
-			submit_bio(READ, bio);
+			submit_bio(bio);
 			bio = NULL;
 		}
 		if (!PageUptodate(page))
@@ -323,6 +288,6 @@ int ext4_mpage_readpages(struct address_space *mapping,
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
-		submit_bio(READ, bio);
+		submit_bio(bio);
 	return 0;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3822a5aedc61..1c593aa0218e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -945,9 +945,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->i_datasync_tid = 0;
 	atomic_set(&ei->i_unwritten, 0);
 	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	ei->i_crypt_info = NULL;
-#endif
 	return &ei->vfs_inode;
 }
 
@@ -1026,8 +1023,7 @@ void ext4_clear_inode(struct inode *inode)
 		EXT4_I(inode)->jinode = NULL;
 	}
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-	if (EXT4_I(inode)->i_crypt_info)
-		ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info);
+	fscrypt_put_encryption_info(inode, NULL);
 #endif
 }
 
@@ -1094,6 +1090,90 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 	return try_to_free_buffers(page);
 }
 
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
+{
+	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
+}
+
+static int ext4_key_prefix(struct inode *inode, u8 **key)
+{
+	*key = EXT4_SB(inode->i_sb)->key_prefix;
+	return EXT4_SB(inode->i_sb)->key_prefix_size;
+}
+
+static int ext4_prepare_context(struct inode *inode)
+{
+	return ext4_convert_inline_data(inode);
+}
+
+static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
+							void *fs_data)
+{
+	handle_t *handle;
+	int res, res2;
+
+	/* fs_data is null when internally used. */
+	if (fs_data) {
+		res  = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+				EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
+				len, 0);
+		if (!res) {
+			ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+			ext4_clear_inode_state(inode,
+					EXT4_STATE_MAY_INLINE_DATA);
+		}
+		return res;
+	}
+
+	handle = ext4_journal_start(inode, EXT4_HT_MISC,
+			ext4_jbd2_credits_xattr(inode));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+			EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
+			len, 0);
+	if (!res) {
+		ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+		res = ext4_mark_inode_dirty(handle, inode);
+		if (res)
+			EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
+	}
+	res2 = ext4_journal_stop(handle);
+	if (!res)
+		res = res2;
+	return res;
+}
+
+static int ext4_dummy_context(struct inode *inode)
+{
+	return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
+}
+
+static unsigned ext4_max_namelen(struct inode *inode)
+{
+	return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
+		EXT4_NAME_LEN;
+}
+
+static struct fscrypt_operations ext4_cryptops = {
+	.get_context		= ext4_get_context,
+	.key_prefix		= ext4_key_prefix,
+	.prepare_context	= ext4_prepare_context,
+	.set_context		= ext4_set_context,
+	.dummy_context		= ext4_dummy_context,
+	.is_encrypted		= ext4_encrypted_inode,
+	.empty_dir		= ext4_empty_dir,
+	.max_namelen		= ext4_max_namelen,
+};
+#else
+static struct fscrypt_operations ext4_cryptops = {
+	.is_encrypted		= ext4_encrypted_inode,
+};
+#endif
+
 #ifdef CONFIG_QUOTA
 static char *quotatypes[] = INITQFNAMES;
 #define QTYPE2NAME(t) (quotatypes[t])
@@ -2068,23 +2148,25 @@ failed:
 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
 				   struct ext4_group_desc *gdp)
 {
-	int offset;
+	int offset = offsetof(struct ext4_group_desc, bg_checksum);
 	__u16 crc = 0;
 	__le32 le_group = cpu_to_le32(block_group);
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (ext4_has_metadata_csum(sbi->s_sb)) {
 		/* Use new metadata_csum algorithm */
-		__le16 save_csum;
 		__u32 csum32;
+		__u16 dummy_csum = 0;
 
-		save_csum = gdp->bg_checksum;
-		gdp->bg_checksum = 0;
 		csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
 				     sizeof(le_group));
-		csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
-				     sbi->s_desc_size);
-		gdp->bg_checksum = save_csum;
+		csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
+		csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
+				     sizeof(dummy_csum));
+		offset += sizeof(dummy_csum);
+		if (offset < sbi->s_desc_size)
+			csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
+					     sbi->s_desc_size - offset);
 
 		crc = csum32 & 0xFFFF;
 		goto out;
@@ -2094,8 +2176,6 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
 	if (!ext4_has_feature_gdt_csum(sb))
 		return 0;
 
-	offset = offsetof(struct ext4_group_desc, bg_checksum);
-
 	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
 	crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
 	crc = crc16(crc, (__u8 *)gdp, offset);
@@ -2278,6 +2358,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 	while (es->s_last_orphan) {
 		struct inode *inode;
 
+		/*
+		 * We may have encountered an error during cleanup; if
+		 * so, skip the rest.
+		 */
+		if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
+			jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+			es->s_last_orphan = 0;
+			break;
+		}
+
 		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
 		if (IS_ERR(inode)) {
 			es->s_last_orphan = 0;
@@ -3416,6 +3506,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
+	if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
+		ext4_msg(sb, KERN_ERR,
+			 "Number of reserved GDT blocks insanely large: %d",
+			 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
+		goto failed_mount;
+	}
+
 	if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
 		err = bdev_dax_supported(sb, blocksize);
 		if (err)
@@ -3686,6 +3783,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_op = &ext4_sops;
 	sb->s_export_op = &ext4_export_ops;
 	sb->s_xattr = ext4_xattr_handlers;
+	sb->s_cop = &ext4_cryptops;
 #ifdef CONFIG_QUOTA
 	sb->dq_op = &ext4_quota_operations;
 	if (ext4_has_feature_quota(sb))
@@ -3996,6 +4094,11 @@ no_journal:
 	ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
 
 	kfree(orig_data);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+	memcpy(sbi->key_prefix, EXT4_KEY_DESC_PREFIX,
+				EXT4_KEY_DESC_PREFIX_SIZE);
+	sbi->key_prefix_size = EXT4_KEY_DESC_PREFIX_SIZE;
+#endif
 	return 0;
 
 cantfind_ext4:
@@ -4204,7 +4307,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 		goto out_bdev;
 	}
 	journal->j_private = sb;
-	ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
+	ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
 	wait_on_buffer(journal->j_sb_buffer);
 	if (!buffer_uptodate(journal->j_sb_buffer)) {
 		ext4_msg(sb, KERN_ERR, "I/O error on journal device");
@@ -4327,20 +4430,6 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 
 	if (!sbh || block_device_ejected(sb))
 		return error;
-	if (buffer_write_io_error(sbh)) {
-		/*
-		 * Oh, dear.  A previous attempt to write the
-		 * superblock failed.  This could happen because the
-		 * USB device was yanked out.  Or it could happen to
-		 * be a transient write error and maybe the block will
-		 * be remapped.  Nothing we can do but to retry the
-		 * write and hope for the best.
-		 */
-		ext4_msg(sb, KERN_ERR, "previous I/O error to "
-		       "superblock detected");
-		clear_buffer_write_io_error(sbh);
-		set_buffer_uptodate(sbh);
-	}
 	/*
 	 * If the file system is mounted read-only, don't update the
 	 * superblock write time.  This avoids updating the superblock
@@ -4371,7 +4460,23 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 				&EXT4_SB(sb)->s_freeinodes_counter));
 	BUFFER_TRACE(sbh, "marking dirty");
 	ext4_superblock_csum_set(sb);
+	lock_buffer(sbh);
+	if (buffer_write_io_error(sbh)) {
+		/*
+		 * Oh, dear.  A previous attempt to write the
+		 * superblock failed.  This could happen because the
+		 * USB device was yanked out.  Or it could happen to
+		 * be a transient write error and maybe the block will
+		 * be remapped.  Nothing we can do but to retry the
+		 * write and hope for the best.
+		 */
+		ext4_msg(sb, KERN_ERR, "previous I/O error to "
+		       "superblock detected");
+		clear_buffer_write_io_error(sbh);
+		set_buffer_uptodate(sbh);
+	}
 	mark_buffer_dirty(sbh);
+	unlock_buffer(sbh);
 	if (sync) {
 		error = __sync_dirty_buffer(sbh,
 			test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
@@ -5422,7 +5527,6 @@ out5:
 
 static void __exit ext4_exit_fs(void)
 {
-	ext4_exit_crypto();
 	ext4_destroy_lazyinit_thread();
 	unregister_as_ext2();
 	unregister_as_ext3();
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 75ed5c2f0c16..4d83d9e05f2e 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -22,23 +22,22 @@
 #include "ext4.h"
 #include "xattr.h"
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
 static const char *ext4_encrypted_get_link(struct dentry *dentry,
 					   struct inode *inode,
 					   struct delayed_call *done)
 {
 	struct page *cpage = NULL;
 	char *caddr, *paddr = NULL;
-	struct ext4_str cstr, pstr;
-	struct ext4_encrypted_symlink_data *sd;
+	struct fscrypt_str cstr, pstr;
+	struct fscrypt_symlink_data *sd;
 	loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
 	int res;
-	u32 plen, max_size = inode->i_sb->s_blocksize;
+	u32 max_size = inode->i_sb->s_blocksize;
 
 	if (!dentry)
 		return ERR_PTR(-ECHILD);
 
-	res = ext4_get_encryption_info(inode);
+	res = fscrypt_get_encryption_info(inode);
 	if (res)
 		return ERR_PTR(res);
 
@@ -54,30 +53,27 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
 	}
 
 	/* Symlink is encrypted */
-	sd = (struct ext4_encrypted_symlink_data *)caddr;
+	sd = (struct fscrypt_symlink_data *)caddr;
 	cstr.name = sd->encrypted_path;
 	cstr.len  = le16_to_cpu(sd->len);
-	if ((cstr.len +
-	     sizeof(struct ext4_encrypted_symlink_data) - 1) >
-	    max_size) {
+	if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) {
 		/* Symlink data on the disk is corrupted */
 		res = -EFSCORRUPTED;
 		goto errout;
 	}
-	plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ?
-		EXT4_FNAME_CRYPTO_DIGEST_SIZE*2 : cstr.len;
-	paddr = kmalloc(plen + 1, GFP_NOFS);
-	if (!paddr) {
-		res = -ENOMEM;
+
+	res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
+	if (res)
 		goto errout;
-	}
-	pstr.name = paddr;
-	pstr.len = plen;
-	res = _ext4_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
+
+	res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
 	if (res < 0)
 		goto errout;
+
+	paddr = pstr.name;
+
 	/* Null-terminate the name */
-	if (res <= plen)
+	if (res <= pstr.len)
 		paddr[res] = '\0';
 	if (cpage)
 		put_page(cpage);
@@ -99,7 +95,6 @@ const struct inode_operations ext4_encrypted_symlink_inode_operations = {
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 };
-#endif
 
 const struct inode_operations ext4_symlink_inode_operations = {
 	.readlink	= generic_readlink,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e79bd32b9b79..39e9cfb1b371 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -121,17 +121,18 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	__u32 csum;
-	__le32 save_csum;
 	__le64 dsk_block_nr = cpu_to_le64(block_nr);
+	__u32 dummy_csum = 0;
+	int offset = offsetof(struct ext4_xattr_header, h_checksum);
 
-	save_csum = hdr->h_checksum;
-	hdr->h_checksum = 0;
 	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
 			   sizeof(dsk_block_nr));
-	csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
-			   EXT4_BLOCK_SIZE(inode->i_sb));
+	csum = ext4_chksum(sbi, csum, (__u8 *)hdr, offset);
+	csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
+	offset += sizeof(dummy_csum);
+	csum = ext4_chksum(sbi, csum, (__u8 *)hdr + offset,
+			   EXT4_BLOCK_SIZE(inode->i_sb) - offset);
 
-	hdr->h_checksum = save_csum;
 	return cpu_to_le32(csum);
 }
 
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index a31c7e859af6..4dcc9e28dc5c 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -201,7 +201,6 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
 static int __f2fs_set_acl(struct inode *inode, int type,
 			struct posix_acl *acl, struct page *ipage)
 {
-	struct f2fs_inode_info *fi = F2FS_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -214,7 +213,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 			error = posix_acl_equiv_mode(acl, &inode->i_mode);
 			if (error < 0)
 				return error;
-			set_acl_inode(fi, inode->i_mode);
+			set_acl_inode(inode, inode->i_mode);
 			if (error == 0)
 				acl = NULL;
 		}
@@ -233,7 +232,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 	if (acl) {
 		value = f2fs_acl_to_disk(acl, &size);
 		if (IS_ERR(value)) {
-			clear_inode_flag(fi, FI_ACL_MODE);
+			clear_inode_flag(inode, FI_ACL_MODE);
 			return (int)PTR_ERR(value);
 		}
 	}
@@ -244,7 +243,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 	if (!error)
 		set_cached_acl(inode, type, acl);
 
-	clear_inode_flag(fi, FI_ACL_MODE);
+	clear_inode_flag(inode, FI_ACL_MODE);
 	return error;
 }
 
@@ -385,6 +384,8 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
 	if (error)
 		return error;
 
+	f2fs_mark_inode_dirty_sync(inode);
+
 	if (default_acl) {
 		error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
 				       ipage);
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 997ca8edb6cb..b2334d11dae8 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -37,7 +37,7 @@ struct f2fs_acl_header {
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 
 extern struct posix_acl *f2fs_get_acl(struct inode *, int);
-extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int f2fs_set_acl(struct inode *, struct posix_acl *, int);
 extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
 							struct page *);
 #else
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 389160049993..f94d01e7d001 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -48,7 +48,8 @@ repeat:
 		goto repeat;
 	}
 	f2fs_wait_on_page_writeback(page, META, true);
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 	return page;
 }
 
@@ -63,14 +64,15 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
 	struct f2fs_io_info fio = {
 		.sbi = sbi,
 		.type = META,
-		.rw = READ_SYNC | REQ_META | REQ_PRIO,
+		.op = REQ_OP_READ,
+		.op_flags = READ_SYNC | REQ_META | REQ_PRIO,
 		.old_blkaddr = index,
 		.new_blkaddr = index,
 		.encrypted_page = NULL,
 	};
 
 	if (unlikely(!is_meta))
-		fio.rw &= ~REQ_META;
+		fio.op_flags &= ~REQ_META;
 repeat:
 	page = f2fs_grab_cache_page(mapping, index, false);
 	if (!page) {
@@ -157,13 +159,14 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
 	struct f2fs_io_info fio = {
 		.sbi = sbi,
 		.type = META,
-		.rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
+		.op = REQ_OP_READ,
+		.op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
 		.encrypted_page = NULL,
 	};
 	struct blk_plug plug;
 
 	if (unlikely(type == META_POR))
-		fio.rw &= ~REQ_META;
+		fio.op_flags &= ~REQ_META;
 
 	blk_start_plug(&plug);
 	for (; nrpages-- > 0; blkno++) {
@@ -264,6 +267,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
 				struct writeback_control *wbc)
 {
 	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+	struct blk_plug plug;
 	long diff, written;
 
 	/* collect a number of dirty meta pages and write together */
@@ -276,7 +280,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
 	/* if mounting is failed, skip writing node pages */
 	mutex_lock(&sbi->cp_mutex);
 	diff = nr_pages_to_write(sbi, META, wbc);
+	blk_start_plug(&plug);
 	written = sync_meta_pages(sbi, META, wbc->nr_to_write);
+	blk_finish_plug(&plug);
 	mutex_unlock(&sbi->cp_mutex);
 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
 	return 0;
@@ -364,9 +370,10 @@ static int f2fs_set_meta_page_dirty(struct page *page)
 {
 	trace_f2fs_set_page_dirty(page, META);
 
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 	if (!PageDirty(page)) {
-		__set_page_dirty_nobuffers(page);
+		f2fs_set_page_dirty_nobuffers(page);
 		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
 		SetPagePrivate(page);
 		f2fs_trace_pid(page);
@@ -508,10 +515,11 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
 	spin_unlock(&im->ino_lock);
 }
 
-void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+void add_orphan_inode(struct inode *inode)
 {
 	/* add new orphan ino entry into list */
-	__add_ino_entry(sbi, ino, ORPHAN_INO);
+	__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
+	update_inode_page(inode);
 }
 
 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -759,28 +767,25 @@ fail_no_cp:
 static void __add_dirty_inode(struct inode *inode, enum inode_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct f2fs_inode_info *fi = F2FS_I(inode);
 	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
 
-	if (is_inode_flag_set(fi, flag))
+	if (is_inode_flag_set(inode, flag))
 		return;
 
-	set_inode_flag(fi, flag);
-	list_add_tail(&fi->dirty_list, &sbi->inode_list[type]);
+	set_inode_flag(inode, flag);
+	list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
 	stat_inc_dirty_inode(sbi, type);
 }
 
 static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
 {
-	struct f2fs_inode_info *fi = F2FS_I(inode);
 	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
 
-	if (get_dirty_pages(inode) ||
-			!is_inode_flag_set(F2FS_I(inode), flag))
+	if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
 		return;
 
-	list_del_init(&fi->dirty_list);
-	clear_inode_flag(fi, flag);
+	list_del_init(&F2FS_I(inode)->dirty_list);
+	clear_inode_flag(inode, flag);
 	stat_dec_dirty_inode(F2FS_I_SB(inode), type);
 }
 
@@ -793,13 +798,12 @@ void update_dirty_page(struct inode *inode, struct page *page)
 			!S_ISLNK(inode->i_mode))
 		return;
 
-	if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) {
-		spin_lock(&sbi->inode_lock[type]);
+	spin_lock(&sbi->inode_lock[type]);
+	if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
 		__add_dirty_inode(inode, type);
-		spin_unlock(&sbi->inode_lock[type]);
-	}
-
 	inode_inc_dirty_pages(inode);
+	spin_unlock(&sbi->inode_lock[type]);
+
 	SetPagePrivate(page);
 	f2fs_trace_pid(page);
 }
@@ -862,6 +866,34 @@ retry:
 	goto retry;
 }
 
+int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
+{
+	struct list_head *head = &sbi->inode_list[DIRTY_META];
+	struct inode *inode;
+	struct f2fs_inode_info *fi;
+	s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);
+
+	while (total--) {
+		if (unlikely(f2fs_cp_error(sbi)))
+			return -EIO;
+
+		spin_lock(&sbi->inode_lock[DIRTY_META]);
+		if (list_empty(head)) {
+			spin_unlock(&sbi->inode_lock[DIRTY_META]);
+			return 0;
+		}
+		fi = list_entry(head->next, struct f2fs_inode_info,
+							gdirty_list);
+		inode = igrab(&fi->vfs_inode);
+		spin_unlock(&sbi->inode_lock[DIRTY_META]);
+		if (inode) {
+			update_inode_page(inode);
+			iput(inode);
+		}
+	};
+	return 0;
+}
+
 /*
  * Freeze all the FS-operations for checkpoint.
  */
@@ -888,6 +920,14 @@ retry_flush_dents:
 		goto retry_flush_dents;
 	}
 
+	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
+		f2fs_unlock_all(sbi);
+		err = f2fs_sync_inode_meta(sbi);
+		if (err)
+			goto out;
+		goto retry_flush_dents;
+	}
+
 	/*
 	 * POR: we should ensure that there are no dirty node pages
 	 * until finishing nat/sit flush.
@@ -912,6 +952,8 @@ out:
 static void unblock_operations(struct f2fs_sb_info *sbi)
 {
 	up_write(&sbi->node_write);
+
+	build_free_nids(sbi);
 	f2fs_unlock_all(sbi);
 }
 
@@ -952,7 +994,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * This avoids to conduct wrong roll-forward operations and uses
 	 * metapages, so should be called prior to sync_meta_pages below.
 	 */
-	if (discard_next_dnode(sbi, discard_blk))
+	if (!test_opt(sbi, LFS) && discard_next_dnode(sbi, discard_blk))
 		invalidate = true;
 
 	/* Flush all the NAT/SIT pages */
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9a8bbc1fb1fa..e2624275d828 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -19,6 +19,8 @@
 #include <linux/bio.h>
 #include <linux/prefetch.h>
 #include <linux/uio.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
 #include <linux/cleancache.h>
 
 #include "f2fs.h"
@@ -45,7 +47,8 @@ static void f2fs_read_end_io(struct bio *bio)
 		struct page *page = bvec->bv_page;
 
 		if (!bio->bi_error) {
-			SetPageUptodate(page);
+			if (!PageUptodate(page))
+				SetPageUptodate(page);
 		} else {
 			ClearPageUptodate(page);
 			SetPageError(page);
@@ -97,12 +100,16 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
 	return bio;
 }
 
-static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw,
-						struct bio *bio)
+static inline void __submit_bio(struct f2fs_sb_info *sbi,
+				struct bio *bio, enum page_type type)
 {
-	if (!is_read_io(rw))
+	if (!is_read_io(bio_op(bio))) {
 		atomic_inc(&sbi->nr_wb_bios);
-	submit_bio(rw, bio);
+		if (f2fs_sb_mounted_hmsmr(sbi->sb) &&
+			current->plug && (type == DATA || type == NODE))
+			blk_finish_plug(current->plug);
+	}
+	submit_bio(bio);
 }
 
 static void __submit_merged_bio(struct f2fs_bio_info *io)
@@ -112,12 +119,14 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
 	if (!io->bio)
 		return;
 
-	if (is_read_io(fio->rw))
+	if (is_read_io(fio->op))
 		trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
 	else
 		trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
 
-	__submit_bio(io->sbi, fio->rw, io->bio);
+	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+
+	__submit_bio(io->sbi, io->bio, fio->type);
 	io->bio = NULL;
 }
 
@@ -183,10 +192,12 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 	/* change META to META_FLUSH in the checkpoint procedure */
 	if (type >= META_FLUSH) {
 		io->fio.type = META_FLUSH;
+		io->fio.op = REQ_OP_WRITE;
 		if (test_opt(sbi, NOBARRIER))
-			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
+			io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO;
 		else
-			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+			io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META |
+								REQ_PRIO;
 	}
 	__submit_merged_bio(io);
 out:
@@ -228,14 +239,16 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	f2fs_trace_ios(fio, 0);
 
 	/* Allocate a new bio */
-	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw));
+	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->op));
 
 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 		bio_put(bio);
 		return -EFAULT;
 	}
+	bio->bi_rw = fio->op_flags;
+	bio_set_op_attrs(bio, fio->op, fio->op_flags);
 
-	__submit_bio(fio->sbi, fio->rw, bio);
+	__submit_bio(fio->sbi, bio, fio->type);
 	return 0;
 }
 
@@ -244,7 +257,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 	struct f2fs_sb_info *sbi = fio->sbi;
 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 	struct f2fs_bio_info *io;
-	bool is_read = is_read_io(fio->rw);
+	bool is_read = is_read_io(fio->op);
 	struct page *bio_page;
 
 	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
@@ -256,7 +269,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 	down_write(&io->io_rwsem);
 
 	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
-						io->fio.rw != fio->rw))
+	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
 		__submit_merged_bio(io);
 alloc_new:
 	if (io->bio == NULL) {
@@ -321,7 +334,7 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
 	if (!count)
 		return 0;
 
-	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 		return -EPERM;
 	if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
 		return -ENOSPC;
@@ -343,9 +356,6 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
 
 	if (set_page_dirty(dn->node_page))
 		dn->node_changed = true;
-
-	mark_inode_dirty(dn->inode);
-	sync_inode_page(dn);
 	return 0;
 }
 
@@ -390,7 +400,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
 }
 
 struct page *get_read_data_page(struct inode *inode, pgoff_t index,
-						int rw, bool for_write)
+						int op_flags, bool for_write)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct dnode_of_data dn;
@@ -400,7 +410,8 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
 	struct f2fs_io_info fio = {
 		.sbi = F2FS_I_SB(inode),
 		.type = DATA,
-		.rw = rw,
+		.op = REQ_OP_READ,
+		.op_flags = op_flags,
 		.encrypted_page = NULL,
 	};
 
@@ -440,7 +451,8 @@ got_it:
 	 */
 	if (dn.data_blkaddr == NEW_ADDR) {
 		zero_user_segment(page, 0, PAGE_SIZE);
-		SetPageUptodate(page);
+		if (!PageUptodate(page))
+			SetPageUptodate(page);
 		unlock_page(page);
 		return page;
 	}
@@ -499,14 +511,14 @@ repeat:
 
 	/* wait for read completion */
 	lock_page(page);
-	if (unlikely(!PageUptodate(page))) {
-		f2fs_put_page(page, 1);
-		return ERR_PTR(-EIO);
-	}
 	if (unlikely(page->mapping != mapping)) {
 		f2fs_put_page(page, 1);
 		goto repeat;
 	}
+	if (unlikely(!PageUptodate(page))) {
+		f2fs_put_page(page, 1);
+		return ERR_PTR(-EIO);
+	}
 	return page;
 }
 
@@ -551,7 +563,8 @@ struct page *get_new_data_page(struct inode *inode,
 
 	if (dn.data_blkaddr == NEW_ADDR) {
 		zero_user_segment(page, 0, PAGE_SIZE);
-		SetPageUptodate(page);
+		if (!PageUptodate(page))
+			SetPageUptodate(page);
 	} else {
 		f2fs_put_page(page, 1);
 
@@ -563,11 +576,8 @@ struct page *get_new_data_page(struct inode *inode,
 	}
 got_it:
 	if (new_i_size && i_size_read(inode) <
-				((loff_t)(index + 1) << PAGE_SHIFT)) {
-		i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
-		/* Only the directory inode sets new_i_size */
-		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
-	}
+				((loff_t)(index + 1) << PAGE_SHIFT))
+		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
 	return page;
 }
 
@@ -580,7 +590,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
 	pgoff_t fofs;
 	blkcnt_t count = 1;
 
-	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 		return -EPERM;
 
 	dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
@@ -605,7 +615,7 @@ alloc:
 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 							dn->ofs_in_node;
 	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
-		i_size_write(dn->inode,
+		f2fs_i_size_write(dn->inode,
 				((loff_t)(fofs + 1) << PAGE_SHIFT));
 	return 0;
 }
@@ -654,7 +664,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 	unsigned int maxblocks = map->m_len;
 	struct dnode_of_data dn;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
+	int mode = create ? ALLOC_NODE : LOOKUP_NODE;
 	pgoff_t pgofs, end_offset, end;
 	int err = 0, ofs = 1;
 	unsigned int ofs_in_node, last_ofs_in_node;
@@ -717,8 +727,7 @@ next_block:
 			} else {
 				err = __allocate_data_block(&dn);
 				if (!err) {
-					set_inode_flag(F2FS_I(inode),
-							FI_APPEND_WRITE);
+					set_inode_flag(inode, FI_APPEND_WRITE);
 					allocated = true;
 				}
 			}
@@ -789,8 +798,6 @@ skip:
 	else if (dn.ofs_in_node < end_offset)
 		goto next_block;
 
-	if (allocated)
-		sync_inode_page(&dn);
 	f2fs_put_dnode(&dn);
 
 	if (create) {
@@ -801,8 +808,6 @@ skip:
 	goto next_dnode;
 
 sync_out:
-	if (allocated)
-		sync_inode_page(&dn);
 	f2fs_put_dnode(&dn);
 unlock_out:
 	if (create) {
@@ -962,6 +967,37 @@ out:
 	return ret;
 }
 
+struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
+							unsigned nr_pages)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct fscrypt_ctx *ctx = NULL;
+	struct block_device *bdev = sbi->sb->s_bdev;
+	struct bio *bio;
+
+	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+		ctx = fscrypt_get_ctx(inode, GFP_NOFS);
+		if (IS_ERR(ctx))
+			return ERR_CAST(ctx);
+
+		/* wait the page to be moved by cleaning */
+		f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
+	}
+
+	bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES));
+	if (!bio) {
+		if (ctx)
+			fscrypt_release_ctx(ctx);
+		return ERR_PTR(-ENOMEM);
+	}
+	bio->bi_bdev = bdev;
+	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
+	bio->bi_end_io = f2fs_read_end_io;
+	bio->bi_private = ctx;
+
+	return bio;
+}
+
 /*
  * This function was originally taken from fs/mpage.c, and customized for f2fs.
  * Major change was from block_size == page_size in f2fs by default.
@@ -980,7 +1016,6 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
 	sector_t last_block;
 	sector_t last_block_in_file;
 	sector_t block_nr;
-	struct block_device *bdev = inode->i_sb->s_bdev;
 	struct f2fs_map_blocks map;
 
 	map.m_pblk = 0;
@@ -996,7 +1031,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
 			page = list_entry(pages->prev, struct page, lru);
 			list_del(&page->lru);
 			if (add_to_page_cache_lru(page, mapping,
-						  page->index, GFP_KERNEL))
+						  page->index,
+						  readahead_gfp_mask(mapping)))
 				goto next_page;
 		}
 
@@ -1040,7 +1076,8 @@ got_it:
 			}
 		} else {
 			zero_user_segment(page, 0, PAGE_SIZE);
-			SetPageUptodate(page);
+			if (!PageUptodate(page))
+				SetPageUptodate(page);
 			unlock_page(page);
 			goto next_page;
 		}
@@ -1051,35 +1088,16 @@ got_it:
 		 */
 		if (bio && (last_block_in_bio != block_nr - 1)) {
 submit_and_realloc:
-			__submit_bio(F2FS_I_SB(inode), READ, bio);
+			__submit_bio(F2FS_I_SB(inode), bio, DATA);
 			bio = NULL;
 		}
 		if (bio == NULL) {
-			struct fscrypt_ctx *ctx = NULL;
-
-			if (f2fs_encrypted_inode(inode) &&
-					S_ISREG(inode->i_mode)) {
-
-				ctx = fscrypt_get_ctx(inode, GFP_NOFS);
-				if (IS_ERR(ctx))
-					goto set_error_page;
-
-				/* wait the page to be moved by cleaning */
-				f2fs_wait_on_encrypted_page_writeback(
-						F2FS_I_SB(inode), block_nr);
-			}
-
-			bio = bio_alloc(GFP_KERNEL,
-				min_t(int, nr_pages, BIO_MAX_PAGES));
-			if (!bio) {
-				if (ctx)
-					fscrypt_release_ctx(ctx);
+			bio = f2fs_grab_bio(inode, block_nr, nr_pages);
+			if (IS_ERR(bio)) {
+				bio = NULL;
 				goto set_error_page;
 			}
-			bio->bi_bdev = bdev;
-			bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
-			bio->bi_end_io = f2fs_read_end_io;
-			bio->bi_private = ctx;
+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 		}
 
 		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
@@ -1094,7 +1112,7 @@ set_error_page:
 		goto next_page;
 confused:
 		if (bio) {
-			__submit_bio(F2FS_I_SB(inode), READ, bio);
+			__submit_bio(F2FS_I_SB(inode), bio, DATA);
 			bio = NULL;
 		}
 		unlock_page(page);
@@ -1104,7 +1122,7 @@ next_page:
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
-		__submit_bio(F2FS_I_SB(inode), READ, bio);
+		__submit_bio(F2FS_I_SB(inode), bio, DATA);
 	return 0;
 }
 
@@ -1193,14 +1211,14 @@ retry_encrypt:
 			!IS_ATOMIC_WRITTEN_PAGE(page) &&
 			need_inplace_update(inode))) {
 		rewrite_data_page(fio);
-		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
+		set_inode_flag(inode, FI_UPDATE_WRITE);
 		trace_f2fs_do_write_data_page(page, IPU);
 	} else {
 		write_data_page(&dn, fio);
 		trace_f2fs_do_write_data_page(page, OPU);
-		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+		set_inode_flag(inode, FI_APPEND_WRITE);
 		if (page->index == 0)
-			set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
+			set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
 	}
 out_writepage:
 	f2fs_put_dnode(&dn);
@@ -1215,13 +1233,15 @@ static int f2fs_write_data_page(struct page *page,
 	loff_t i_size = i_size_read(inode);
 	const pgoff_t end_index = ((unsigned long long) i_size)
 							>> PAGE_SHIFT;
+	loff_t psize = (page->index + 1) << PAGE_SHIFT;
 	unsigned offset = 0;
 	bool need_balance_fs = false;
 	int err = 0;
 	struct f2fs_io_info fio = {
 		.sbi = sbi,
 		.type = DATA,
-		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+		.op = REQ_OP_WRITE,
+		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
 		.page = page,
 		.encrypted_page = NULL,
 	};
@@ -1251,20 +1271,18 @@ write:
 			available_free_memory(sbi, BASE_CHECK))))
 		goto redirty_out;
 
+	/* we should bypass data pages to proceed the kworkder jobs */
+	if (unlikely(f2fs_cp_error(sbi))) {
+		mapping_set_error(page->mapping, -EIO);
+		goto out;
+	}
+
 	/* Dentry blocks are controlled by checkpoint */
 	if (S_ISDIR(inode->i_mode)) {
-		if (unlikely(f2fs_cp_error(sbi)))
-			goto redirty_out;
 		err = do_write_data_page(&fio);
 		goto done;
 	}
 
-	/* we should bypass data pages to proceed the kworkder jobs */
-	if (unlikely(f2fs_cp_error(sbi))) {
-		SetPageError(page);
-		goto out;
-	}
-
 	if (!wbc->for_reclaim)
 		need_balance_fs = true;
 	else if (has_not_enough_free_secs(sbi, 0))
@@ -1276,6 +1294,8 @@ write:
 		err = f2fs_write_inline_data(inode, page);
 	if (err == -EAGAIN)
 		err = do_write_data_page(&fio);
+	if (F2FS_I(inode)->last_disk_size < psize)
+		F2FS_I(inode)->last_disk_size = psize;
 	f2fs_unlock_op(sbi);
 done:
 	if (err && err != -ENOENT)
@@ -1302,16 +1322,8 @@ out:
 
 redirty_out:
 	redirty_page_for_writepage(wbc, page);
-	return AOP_WRITEPAGE_ACTIVATE;
-}
-
-static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
-			void *data)
-{
-	struct address_space *mapping = data;
-	int ret = mapping->a_ops->writepage(page, wbc);
-	mapping_set_error(mapping, ret);
-	return ret;
+	unlock_page(page);
+	return err;
 }
 
 /*
@@ -1320,8 +1332,7 @@ static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
  * warm/hot data page.
  */
 static int f2fs_write_cache_pages(struct address_space *mapping,
-			struct writeback_control *wbc, writepage_t writepage,
-			void *data)
+					struct writeback_control *wbc)
 {
 	int ret = 0;
 	int done = 0;
@@ -1334,10 +1345,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 	int cycled;
 	int range_whole = 0;
 	int tag;
-	int step = 0;
 
 	pagevec_init(&pvec, 0);
-next:
+
 	if (wbc->range_cyclic) {
 		writeback_index = mapping->writeback_index; /* prev offset */
 		index = writeback_index;
@@ -1392,9 +1402,6 @@ continue_unlock:
 				goto continue_unlock;
 			}
 
-			if (step == is_cold_data(page))
-				goto continue_unlock;
-
 			if (PageWriteback(page)) {
 				if (wbc->sync_mode != WB_SYNC_NONE)
 					f2fs_wait_on_page_writeback(page,
@@ -1407,16 +1414,11 @@ continue_unlock:
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
-			ret = (*writepage)(page, wbc, data);
+			ret = mapping->a_ops->writepage(page, wbc);
 			if (unlikely(ret)) {
-				if (ret == AOP_WRITEPAGE_ACTIVATE) {
-					unlock_page(page);
-					ret = 0;
-				} else {
-					done_index = page->index + 1;
-					done = 1;
-					break;
-				}
+				done_index = page->index + 1;
+				done = 1;
+				break;
 			}
 
 			if (--wbc->nr_to_write <= 0 &&
@@ -1429,11 +1431,6 @@ continue_unlock:
 		cond_resched();
 	}
 
-	if (step < 1) {
-		step++;
-		goto next;
-	}
-
 	if (!cycled && !done) {
 		cycled = 1;
 		index = 0;
@@ -1451,9 +1448,8 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	bool locked = false;
+	struct blk_plug plug;
 	int ret;
-	long diff;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -1469,7 +1465,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 		goto skip_write;
 
 	/* skip writing during file defragment */
-	if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
+	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
 		goto skip_write;
 
 	/* during POR, we don't need to trigger writepage at all. */
@@ -1478,20 +1474,16 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 
 	trace_f2fs_writepages(mapping->host, wbc, DATA);
 
-	diff = nr_pages_to_write(sbi, DATA, wbc);
-
-	if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) {
-		mutex_lock(&sbi->writepages);
-		locked = true;
-	}
-	ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
-	f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
-	if (locked)
-		mutex_unlock(&sbi->writepages);
+	blk_start_plug(&plug);
+	ret = f2fs_write_cache_pages(mapping, wbc);
+	blk_finish_plug(&plug);
+	/*
+	 * if some pages were truncated, we cannot guarantee its mapping->host
+	 * to detect pending bios.
+	 */
+	f2fs_submit_merged_bio(sbi, DATA, WRITE);
 
 	remove_dirty_inode(inode);
-
-	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
 	return ret;
 
 skip_write:
@@ -1549,7 +1541,7 @@ restart:
 	if (f2fs_has_inline_data(inode)) {
 		if (pos + len <= MAX_INLINE_DATA) {
 			read_inline_data(page, ipage);
-			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+			set_inode_flag(inode, FI_DATA_EXIST);
 			if (inode->i_nlink)
 				set_inline_node(ipage);
 		} else {
@@ -1659,38 +1651,35 @@ repeat:
 	if (blkaddr == NEW_ADDR) {
 		zero_user_segment(page, 0, PAGE_SIZE);
 	} else {
-		struct f2fs_io_info fio = {
-			.sbi = sbi,
-			.type = DATA,
-			.rw = READ_SYNC,
-			.old_blkaddr = blkaddr,
-			.new_blkaddr = blkaddr,
-			.page = page,
-			.encrypted_page = NULL,
-		};
-		err = f2fs_submit_page_bio(&fio);
-		if (err)
-			goto fail;
+		struct bio *bio;
 
-		lock_page(page);
-		if (unlikely(!PageUptodate(page))) {
-			err = -EIO;
+		bio = f2fs_grab_bio(inode, blkaddr, 1);
+		if (IS_ERR(bio)) {
+			err = PTR_ERR(bio);
+			goto fail;
+		}
+		bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
+		if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+			bio_put(bio);
+			err = -EFAULT;
 			goto fail;
 		}
+
+		__submit_bio(sbi, bio, DATA);
+
+		lock_page(page);
 		if (unlikely(page->mapping != mapping)) {
 			f2fs_put_page(page, 1);
 			goto repeat;
 		}
-
-		/* avoid symlink page */
-		if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
-			err = fscrypt_decrypt_page(page);
-			if (err)
-				goto fail;
+		if (unlikely(!PageUptodate(page))) {
+			err = -EIO;
+			goto fail;
 		}
 	}
 out_update:
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 out_clear:
 	clear_cold_data(page);
 	return 0;
@@ -1711,13 +1700,11 @@ static int f2fs_write_end(struct file *file,
 	trace_f2fs_write_end(inode, pos, len, copied);
 
 	set_page_dirty(page);
+	f2fs_put_page(page, 1);
 
-	if (pos + copied > i_size_read(inode)) {
-		i_size_write(inode, pos + copied);
-		mark_inode_dirty(inode);
-	}
+	if (pos + copied > i_size_read(inode))
+		f2fs_i_size_write(inode, pos + copied);
 
-	f2fs_put_page(page, 1);
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 	return copied;
 }
@@ -1742,6 +1729,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	struct inode *inode = mapping->host;
 	size_t count = iov_iter_count(iter);
 	loff_t offset = iocb->ki_pos;
+	int rw = iov_iter_rw(iter);
 	int err;
 
 	err = check_direct_IO(inode, iter, offset);
@@ -1750,18 +1738,23 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 		return 0;
+	if (test_opt(F2FS_I_SB(inode), LFS))
+		return 0;
 
-	trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
+	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
 
+	down_read(&F2FS_I(inode)->dio_rwsem[rw]);
 	err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
-	if (iov_iter_rw(iter) == WRITE) {
+	up_read(&F2FS_I(inode)->dio_rwsem[rw]);
+
+	if (rw == WRITE) {
 		if (err > 0)
-			set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
+			set_inode_flag(inode, FI_UPDATE_WRITE);
 		else if (err < 0)
 			f2fs_write_failed(mapping, offset + count);
 	}
 
-	trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
+	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
 
 	return err;
 }
@@ -1808,6 +1801,35 @@ int f2fs_release_page(struct page *page, gfp_t wait)
 	return 1;
 }
 
+/*
+ * This was copied from __set_page_dirty_buffers which gives higher performance
+ * in very high speed storages. (e.g., pmem)
+ */
+void f2fs_set_page_dirty_nobuffers(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	unsigned long flags;
+
+	if (unlikely(!mapping))
+		return;
+
+	spin_lock(&mapping->private_lock);
+	lock_page_memcg(page);
+	SetPageDirty(page);
+	spin_unlock(&mapping->private_lock);
+
+	spin_lock_irqsave(&mapping->tree_lock, flags);
+	WARN_ON_ONCE(!PageUptodate(page));
+	account_page_dirtied(page, mapping);
+	radix_tree_tag_set(&mapping->page_tree,
+			page_index(page), PAGECACHE_TAG_DIRTY);
+	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	unlock_page_memcg(page);
+
+	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	return;
+}
+
 static int f2fs_set_data_page_dirty(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
@@ -1815,7 +1837,8 @@ static int f2fs_set_data_page_dirty(struct page *page)
 
 	trace_f2fs_set_page_dirty(page, DATA);
 
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 
 	if (f2fs_is_atomic_file(inode)) {
 		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
@@ -1830,7 +1853,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
 	}
 
 	if (!PageDirty(page)) {
-		__set_page_dirty_nobuffers(page);
+		f2fs_set_page_dirty_nobuffers(page);
 		update_dirty_page(inode, page);
 		return 1;
 	}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index d89a425055d0..badd407bb622 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -47,6 +47,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
 	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
 	si->wb_bios = atomic_read(&sbi->nr_wb_bios);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
@@ -304,8 +305,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->inmem_pages, si->wb_bios);
 		seq_printf(s, "  - nodes: %4lld in %4d\n",
 			   si->ndirty_node, si->node_pages);
-		seq_printf(s, "  - dents: %4lld in dirs:%4d\n",
-			   si->ndirty_dent, si->ndirty_dirs);
+		seq_printf(s, "  - dents: %4lld in dirs:%4d (%4d)\n",
+			   si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
 		seq_printf(s, "  - datas: %4lld in files:%4d\n",
 			   si->ndirty_data, si->ndirty_files);
 		seq_printf(s, "  - meta: %4lld in %4d\n",
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index f9313f684540..a485f68a76b1 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -185,8 +185,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 		/* no need to allocate new dentry pages to all the indices */
 		dentry_page = find_data_page(dir, bidx);
 		if (IS_ERR(dentry_page)) {
-			room = true;
-			continue;
+			if (PTR_ERR(dentry_page) == -ENOENT) {
+				room = true;
+				continue;
+			} else {
+				*res_page = dentry_page;
+				break;
+			}
 		}
 
 		de = find_in_block(dentry_page, fname, namehash, &max_slots,
@@ -223,19 +228,22 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
 	struct fscrypt_name fname;
 	int err;
 
-	*res_page = NULL;
-
 	err = fscrypt_setup_filename(dir, child, 1, &fname);
-	if (err)
+	if (err) {
+		*res_page = ERR_PTR(err);
 		return NULL;
+	}
 
 	if (f2fs_has_inline_dentry(dir)) {
+		*res_page = NULL;
 		de = find_in_inline_dir(dir, &fname, res_page);
 		goto out;
 	}
 
-	if (npages == 0)
+	if (npages == 0) {
+		*res_page = NULL;
 		goto out;
+	}
 
 	max_depth = F2FS_I(dir)->i_current_depth;
 	if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
@@ -243,13 +251,13 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
 				"Corrupted max_depth of %lu: %u",
 				dir->i_ino, max_depth);
 		max_depth = MAX_DIR_HASH_DEPTH;
-		F2FS_I(dir)->i_current_depth = max_depth;
-		mark_inode_dirty(dir);
+		f2fs_i_depth_write(dir, max_depth);
 	}
 
 	for (level = 0; level < max_depth; level++) {
+		*res_page = NULL;
 		de = find_in_level(dir, level, &fname, res_page);
-		if (de)
+		if (de || IS_ERR(*res_page))
 			break;
 	}
 out:
@@ -259,35 +267,22 @@ out:
 
 struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
 {
-	struct page *page;
-	struct f2fs_dir_entry *de;
-	struct f2fs_dentry_block *dentry_blk;
-
-	if (f2fs_has_inline_dentry(dir))
-		return f2fs_parent_inline_dir(dir, p);
-
-	page = get_lock_data_page(dir, 0, false);
-	if (IS_ERR(page))
-		return NULL;
+	struct qstr dotdot = QSTR_INIT("..", 2);
 
-	dentry_blk = kmap(page);
-	de = &dentry_blk->dentry[1];
-	*p = page;
-	unlock_page(page);
-	return de;
+	return f2fs_find_entry(dir, &dotdot, p);
 }
 
-ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
+ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr,
+							struct page **page)
 {
 	ino_t res = 0;
 	struct f2fs_dir_entry *de;
-	struct page *page;
 
-	de = f2fs_find_entry(dir, qstr, &page);
+	de = f2fs_find_entry(dir, qstr, page);
 	if (de) {
 		res = le32_to_cpu(de->ino);
-		f2fs_dentry_kunmap(dir, page);
-		f2fs_put_page(page, 0);
+		f2fs_dentry_kunmap(dir, *page);
+		f2fs_put_page(*page, 0);
 	}
 
 	return res;
@@ -303,9 +298,9 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
 	set_de_type(de, inode->i_mode);
 	f2fs_dentry_kunmap(dir, page);
 	set_page_dirty(page);
-	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-	mark_inode_dirty(dir);
 
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	f2fs_mark_inode_dirty_sync(dir);
 	f2fs_put_page(page, 1);
 }
 
@@ -385,7 +380,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
 	struct page *page;
 	int err;
 
-	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
+	if (is_inode_flag_set(inode, FI_NEW_INODE)) {
 		page = new_inode_page(inode);
 		if (IS_ERR(page))
 			return page;
@@ -429,7 +424,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
 	 * This file should be checkpointed during fsync.
 	 * We lost i_pino from now on.
 	 */
-	if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
+	if (is_inode_flag_set(inode, FI_INC_LINK)) {
 		file_lost_pino(inode);
 		/*
 		 * If link the tmpfile to alias through linkat path,
@@ -437,14 +432,11 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
 		 */
 		if (inode->i_nlink == 0)
 			remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
-		inc_nlink(inode);
+		f2fs_i_links_write(inode, true);
 	}
 	return page;
 
 put_error:
-	/* truncate empty dir pages */
-	truncate_inode_pages(&inode->i_data, 0);
-
 	clear_nlink(inode);
 	update_inode(inode, page);
 	f2fs_put_page(page, 1);
@@ -454,23 +446,19 @@ put_error:
 void update_parent_metadata(struct inode *dir, struct inode *inode,
 						unsigned int current_depth)
 {
-	if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
-		if (S_ISDIR(inode->i_mode)) {
-			inc_nlink(dir);
-			set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
-		}
-		clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+	if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) {
+		if (S_ISDIR(inode->i_mode))
+			f2fs_i_links_write(dir, true);
+		clear_inode_flag(inode, FI_NEW_INODE);
 	}
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-	mark_inode_dirty(dir);
+	f2fs_mark_inode_dirty_sync(dir);
 
-	if (F2FS_I(dir)->i_current_depth != current_depth) {
-		F2FS_I(dir)->i_current_depth = current_depth;
-		set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
-	}
+	if (F2FS_I(dir)->i_current_depth != current_depth)
+		f2fs_i_depth_write(dir, current_depth);
 
-	if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
-		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	if (inode && is_inode_flag_set(inode, FI_INC_LINK))
+		clear_inode_flag(inode, FI_INC_LINK);
 }
 
 int room_for_filename(const void *bitmap, int slots, int max_slots)
@@ -596,9 +584,7 @@ add_dentry:
 	set_page_dirty(dentry_page);
 
 	if (inode) {
-		/* we don't need to mark_inode_dirty now */
-		F2FS_I(inode)->i_pino = dir->i_ino;
-		update_inode(inode, page);
+		f2fs_i_pino_write(inode, dir->i_ino);
 		f2fs_put_page(page, 1);
 	}
 
@@ -607,10 +593,6 @@ fail:
 	if (inode)
 		up_write(&F2FS_I(inode)->i_sem);
 
-	if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
-		update_inode_page(dir);
-		clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
-	}
 	kunmap(dentry_page);
 	f2fs_put_page(dentry_page, 1);
 
@@ -657,42 +639,34 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
 		err = PTR_ERR(page);
 		goto fail;
 	}
-	/* we don't need to mark_inode_dirty now */
-	update_inode(inode, page);
 	f2fs_put_page(page, 1);
 
-	clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+	clear_inode_flag(inode, FI_NEW_INODE);
 fail:
 	up_write(&F2FS_I(inode)->i_sem);
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 	return err;
 }
 
-void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page)
+void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 
 	down_write(&F2FS_I(inode)->i_sem);
 
-	if (S_ISDIR(inode->i_mode)) {
-		drop_nlink(dir);
-		if (page)
-			update_inode(dir, page);
-		else
-			update_inode_page(dir);
-	}
+	if (S_ISDIR(inode->i_mode))
+		f2fs_i_links_write(dir, false);
 	inode->i_ctime = CURRENT_TIME;
 
-	drop_nlink(inode);
+	f2fs_i_links_write(inode, false);
 	if (S_ISDIR(inode->i_mode)) {
-		drop_nlink(inode);
-		i_size_write(inode, 0);
+		f2fs_i_links_write(inode, false);
+		f2fs_i_size_write(inode, 0);
 	}
 	up_write(&F2FS_I(inode)->i_sem);
-	update_inode_page(inode);
 
 	if (inode->i_nlink == 0)
-		add_orphan_inode(sbi, inode->i_ino);
+		add_orphan_inode(inode);
 	else
 		release_orphan_inode(sbi);
 }
@@ -730,9 +704,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	set_page_dirty(page);
 
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+	f2fs_mark_inode_dirty_sync(dir);
 
 	if (inode)
-		f2fs_drop_nlink(dir, inode, NULL);
+		f2fs_drop_nlink(dir, inode);
 
 	if (bit_pos == NR_DENTRY_IN_BLOCK &&
 			!truncate_hole(dir, page->index, page->index + 1)) {
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 5bfcdb9b69f2..2b06d4fcd954 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -170,8 +170,10 @@ static void __drop_largest_extent(struct inode *inode,
 {
 	struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
 
-	if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs)
+	if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
 		largest->len = 0;
+		f2fs_mark_inode_dirty_sync(inode);
+	}
 }
 
 /* return true, if inode page is changed */
@@ -335,11 +337,12 @@ lookup_neighbors:
 	return en;
 }
 
-static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
+static struct extent_node *__try_merge_extent_node(struct inode *inode,
 				struct extent_tree *et, struct extent_info *ei,
 				struct extent_node *prev_ex,
 				struct extent_node *next_ex)
 {
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct extent_node *en = NULL;
 
 	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
@@ -360,7 +363,7 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 	if (!en)
 		return NULL;
 
-	__try_update_largest_extent(et, en);
+	__try_update_largest_extent(inode, et, en);
 
 	spin_lock(&sbi->extent_lock);
 	if (!list_empty(&en->list)) {
@@ -371,11 +374,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 	return en;
 }
 
-static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
+static struct extent_node *__insert_extent_tree(struct inode *inode,
 				struct extent_tree *et, struct extent_info *ei,
 				struct rb_node **insert_p,
 				struct rb_node *insert_parent)
 {
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct rb_node **p = &et->root.rb_node;
 	struct rb_node *parent = NULL;
 	struct extent_node *en = NULL;
@@ -402,7 +406,7 @@ do_insert:
 	if (!en)
 		return NULL;
 
-	__try_update_largest_extent(et, en);
+	__try_update_largest_extent(inode, et, en);
 
 	/* update in global extent list */
 	spin_lock(&sbi->extent_lock);
@@ -431,7 +435,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 
 	write_lock(&et->lock);
 
-	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
+	if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
 		write_unlock(&et->lock);
 		return false;
 	}
@@ -473,7 +477,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 				set_extent_info(&ei, end,
 						end - dei.fofs + dei.blk,
 						org_end - end);
-				en1 = __insert_extent_tree(sbi, et, &ei,
+				en1 = __insert_extent_tree(inode, et, &ei,
 							NULL, NULL);
 				next_en = en1;
 			} else {
@@ -494,7 +498,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 		}
 
 		if (parts)
-			__try_update_largest_extent(et, en);
+			__try_update_largest_extent(inode, et, en);
 		else
 			__release_extent_node(sbi, et, en);
 
@@ -514,20 +518,20 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 	if (blkaddr) {
 
 		set_extent_info(&ei, fofs, blkaddr, len);
-		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
-			__insert_extent_tree(sbi, et, &ei,
+		if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en))
+			__insert_extent_tree(inode, et, &ei,
 						insert_p, insert_parent);
 
 		/* give up extent_cache, if split and small updates happen */
 		if (dei.len >= 1 &&
 				prev.len < F2FS_MIN_EXTENT_LEN &&
 				et->largest.len < F2FS_MIN_EXTENT_LEN) {
-			et->largest.len = 0;
-			set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
+			__drop_largest_extent(inode, 0, UINT_MAX);
+			set_inode_flag(inode, FI_NO_EXTENT);
 		}
 	}
 
-	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
+	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 		__free_extent_tree(sbi, et);
 
 	write_unlock(&et->lock);
@@ -627,6 +631,19 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
 	return node_cnt;
 }
 
+void f2fs_drop_extent_tree(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+
+	set_inode_flag(inode, FI_NO_EXTENT);
+
+	write_lock(&et->lock);
+	__free_extent_tree(sbi, et);
+	__drop_largest_extent(inode, 0, UINT_MAX);
+	write_unlock(&et->lock);
+}
+
 void f2fs_destroy_extent_tree(struct inode *inode)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -685,9 +702,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn)
 
 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 								dn->ofs_in_node;
-
-	if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1))
-		sync_inode_page(dn);
+	f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
 }
 
 void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
@@ -697,8 +712,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
 	if (!f2fs_may_extent_tree(dn->inode))
 		return;
 
-	if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len))
-		sync_inode_page(dn);
+	f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
 }
 
 void init_extent_cache_info(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 916e7c238e3d..7890e9071499 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -45,6 +45,7 @@ enum {
 	FAULT_ORPHAN,
 	FAULT_BLOCK,
 	FAULT_DIR_DEPTH,
+	FAULT_EVICT_INODE,
 	FAULT_MAX,
 };
 
@@ -74,6 +75,8 @@ static inline bool time_to_inject(int type)
 		return false;
 	else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type))
 		return false;
+	else if (type == FAULT_EVICT_INODE && !IS_FAULT_SET(type))
+		return false;
 
 	atomic_inc(&f2fs_fault.inject_ops);
 	if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) {
@@ -108,6 +111,8 @@ static inline bool time_to_inject(int type)
 #define F2FS_MOUNT_FORCE_FG_GC		0x00004000
 #define F2FS_MOUNT_DATA_FLUSH		0x00008000
 #define F2FS_MOUNT_FAULT_INJECTION	0x00010000
+#define F2FS_MOUNT_ADAPTIVE		0x00020000
+#define F2FS_MOUNT_LFS			0x00040000
 
 #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -128,6 +133,7 @@ struct f2fs_mount_info {
 };
 
 #define F2FS_FEATURE_ENCRYPT	0x0001
+#define F2FS_FEATURE_HMSMR	0x0002
 
 #define F2FS_HAS_FEATURE(sb, mask)					\
 	((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -158,7 +164,7 @@ enum {
 #define BATCHED_TRIM_BLOCKS(sbi)	\
 		(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
 #define DEF_CP_INTERVAL			60	/* 60 secs */
-#define DEF_IDLE_INTERVAL		120	/* 2 mins */
+#define DEF_IDLE_INTERVAL		5	/* 5 secs */
 
 struct cp_control {
 	int reason;
@@ -262,6 +268,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 #define F2FS_IOC_GARBAGE_COLLECT	_IO(F2FS_IOCTL_MAGIC, 6)
 #define F2FS_IOC_WRITE_CHECKPOINT	_IO(F2FS_IOCTL_MAGIC, 7)
 #define F2FS_IOC_DEFRAGMENT		_IO(F2FS_IOCTL_MAGIC, 8)
+#define F2FS_IOC_MOVE_RANGE		_IOWR(F2FS_IOCTL_MAGIC, 9,	\
+						struct f2fs_move_range)
 
 #define F2FS_IOC_SET_ENCRYPTION_POLICY	FS_IOC_SET_ENCRYPTION_POLICY
 #define F2FS_IOC_GET_ENCRYPTION_POLICY	FS_IOC_GET_ENCRYPTION_POLICY
@@ -291,6 +299,13 @@ struct f2fs_defragment {
 	u64 len;
 };
 
+struct f2fs_move_range {
+	u32 dst_fd;		/* destination fd */
+	u64 pos_in;		/* start position in src_fd */
+	u64 pos_out;		/* start position in dst_fd */
+	u64 len;		/* size to move */
+};
+
 /*
  * For INODE and NODE manager
  */
@@ -441,11 +456,14 @@ struct f2fs_inode_info {
 	unsigned int clevel;		/* maximum level of given file name */
 	nid_t i_xattr_nid;		/* node id that contains xattrs */
 	unsigned long long xattr_ver;	/* cp version of xattr modification */
+	loff_t	last_disk_size;		/* lastly written file size */
 
-	struct list_head dirty_list;	/* linked in global dirty list */
+	struct list_head dirty_list;	/* dirty list for dirs and files */
+	struct list_head gdirty_list;	/* linked in global dirty list */
 	struct list_head inmem_pages;	/* inmemory pages managed by f2fs */
 	struct mutex inmem_lock;	/* lock for inmemory pages */
 	struct extent_tree *extent_tree;	/* cached extent_tree entry */
+	struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
 };
 
 static inline void get_extent_info(struct extent_info *ext,
@@ -498,11 +516,14 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
 	return __is_extent_mergeable(cur, front);
 }
 
-static inline void __try_update_largest_extent(struct extent_tree *et,
-						struct extent_node *en)
+extern void f2fs_mark_inode_dirty_sync(struct inode *);
+static inline void __try_update_largest_extent(struct inode *inode,
+			struct extent_tree *et, struct extent_node *en)
 {
-	if (en->ei.len > et->largest.len)
+	if (en->ei.len > et->largest.len) {
 		et->largest = en->ei;
+		f2fs_mark_inode_dirty_sync(inode);
+	}
 }
 
 struct f2fs_nm_info {
@@ -517,7 +538,7 @@ struct f2fs_nm_info {
 	/* NAT cache management */
 	struct radix_tree_root nat_root;/* root of the nat entry cache */
 	struct radix_tree_root nat_set_root;/* root of the nat set cache */
-	struct rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
+	struct percpu_rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	unsigned int nat_cnt;		/* the # of cached nat entries */
 	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
@@ -599,6 +620,7 @@ struct flush_cmd {
 struct flush_cmd_control {
 	struct task_struct *f2fs_issue_flush;	/* flush thread */
 	wait_queue_head_t flush_wait_queue;	/* waiting queue for wake-up */
+	atomic_t submit_flush;			/* # of issued flushes */
 	struct llist_head issue_list;		/* list for command issue */
 	struct llist_node *dispatch_list;	/* list for command dispatch */
 };
@@ -655,6 +677,7 @@ enum count_type {
 	F2FS_DIRTY_NODES,
 	F2FS_DIRTY_META,
 	F2FS_INMEM_PAGES,
+	F2FS_DIRTY_IMETA,
 	NR_COUNT_TYPE,
 };
 
@@ -686,14 +709,15 @@ enum page_type {
 struct f2fs_io_info {
 	struct f2fs_sb_info *sbi;	/* f2fs_sb_info pointer */
 	enum page_type type;	/* contains DATA/NODE/META/META_FLUSH */
-	int rw;			/* contains R/RS/W/WS with REQ_META/REQ_PRIO */
+	int op;			/* contains REQ_OP_ */
+	int op_flags;		/* rq_flag_bits */
 	block_t new_blkaddr;	/* new block address to be written */
 	block_t old_blkaddr;	/* old block address before Cow */
 	struct page *page;	/* page to be written */
 	struct page *encrypted_page;	/* encrypted page */
 };
 
-#define is_read_io(rw)	(((rw) & 1) == READ)
+#define is_read_io(rw) (rw == READ)
 struct f2fs_bio_info {
 	struct f2fs_sb_info *sbi;	/* f2fs superblock */
 	struct bio *bio;		/* bios to merge */
@@ -705,6 +729,7 @@ struct f2fs_bio_info {
 enum inode_type {
 	DIR_INODE,			/* for dirty dir inode */
 	FILE_INODE,			/* for dirty regular/symlink inode */
+	DIRTY_META,			/* for all dirtied inode metadata */
 	NR_INODE_TYPE,
 };
 
@@ -756,14 +781,14 @@ struct f2fs_sb_info {
 	/* for bio operations */
 	struct f2fs_bio_info read_io;			/* for read bios */
 	struct f2fs_bio_info write_io[NR_PAGE_TYPE];	/* for write bios */
+	struct mutex wio_mutex[NODE + 1];	/* bio ordering for NODE/DATA */
 
 	/* for checkpoint */
 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
 	struct inode *meta_inode;		/* cache meta blocks */
 	struct mutex cp_mutex;			/* checkpoint procedure lock */
-	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
+	struct percpu_rw_semaphore cp_rwsem;		/* blocking FS operations */
 	struct rw_semaphore node_write;		/* locking node writes */
-	struct mutex writepages;		/* mutex for writepages() */
 	wait_queue_head_t cp_wait;
 	unsigned long last_time[MAX_TIME];	/* to store time in jiffies */
 	long interval_time[MAX_TIME];		/* to store thresholds */
@@ -1049,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
-	down_read(&sbi->cp_rwsem);
+	percpu_down_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
-	up_read(&sbi->cp_rwsem);
+	percpu_up_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
 {
-	down_write(&sbi->cp_rwsem);
+	percpu_down_write(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
 {
-	up_write(&sbi->cp_rwsem);
+	percpu_up_write(&sbi->cp_rwsem);
 }
 
 static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
@@ -1119,34 +1144,37 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
 	return ofs == XATTR_NODE_OFFSET;
 }
 
+static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool);
 static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
 				 struct inode *inode, blkcnt_t *count)
 {
-	block_t	valid_block_count;
+	blkcnt_t diff;
 
-	spin_lock(&sbi->stat_lock);
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(FAULT_BLOCK)) {
-		spin_unlock(&sbi->stat_lock);
+	if (time_to_inject(FAULT_BLOCK))
 		return false;
-	}
 #endif
-	valid_block_count =
-		sbi->total_valid_block_count + (block_t)(*count);
-	if (unlikely(valid_block_count > sbi->user_block_count)) {
-		*count = sbi->user_block_count - sbi->total_valid_block_count;
+	/*
+	 * let's increase this in prior to actual block count change in order
+	 * for f2fs_sync_file to avoid data races when deciding checkpoint.
+	 */
+	percpu_counter_add(&sbi->alloc_valid_block_count, (*count));
+
+	spin_lock(&sbi->stat_lock);
+	sbi->total_valid_block_count += (block_t)(*count);
+	if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) {
+		diff = sbi->total_valid_block_count - sbi->user_block_count;
+		*count -= diff;
+		sbi->total_valid_block_count = sbi->user_block_count;
 		if (!*count) {
 			spin_unlock(&sbi->stat_lock);
+			percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
 			return false;
 		}
 	}
-	/* *count can be recalculated */
-	inode->i_blocks += *count;
-	sbi->total_valid_block_count =
-		sbi->total_valid_block_count + (block_t)(*count);
 	spin_unlock(&sbi->stat_lock);
 
-	percpu_counter_add(&sbi->alloc_valid_block_count, (*count));
+	f2fs_i_blocks_write(inode, *count, true);
 	return true;
 }
 
@@ -1157,9 +1185,9 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
 	spin_lock(&sbi->stat_lock);
 	f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
 	f2fs_bug_on(sbi, inode->i_blocks < count);
-	inode->i_blocks -= count;
 	sbi->total_valid_block_count -= (block_t)count;
 	spin_unlock(&sbi->stat_lock);
+	f2fs_i_blocks_write(inode, count, false);
 }
 
 static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1294,7 +1322,7 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
 	}
 
 	if (inode)
-		inode->i_blocks++;
+		f2fs_i_blocks_write(inode, 1, true);
 
 	sbi->total_valid_node_count++;
 	sbi->total_valid_block_count++;
@@ -1313,7 +1341,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
 	f2fs_bug_on(sbi, !sbi->total_valid_node_count);
 	f2fs_bug_on(sbi, !inode->i_blocks);
 
-	inode->i_blocks--;
+	f2fs_i_blocks_write(inode, 1, false);
 	sbi->total_valid_node_count--;
 	sbi->total_valid_block_count--;
 
@@ -1510,12 +1538,12 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
 enum {
 	FI_NEW_INODE,		/* indicate newly allocated inode */
 	FI_DIRTY_INODE,		/* indicate inode is dirty or not */
+	FI_AUTO_RECOVER,	/* indicate inode is recoverable */
 	FI_DIRTY_DIR,		/* indicate directory has dirty pages */
 	FI_INC_LINK,		/* need to increment i_nlink */
 	FI_ACL_MODE,		/* indicate acl mode */
 	FI_NO_ALLOC,		/* should not allocate any blocks */
 	FI_FREE_NID,		/* free allocated nide */
-	FI_UPDATE_DIR,		/* should update inode block for consistency */
 	FI_NO_EXTENT,		/* not to use the extent cache */
 	FI_INLINE_XATTR,	/* used for inline xattr */
 	FI_INLINE_DATA,		/* used for inline data*/
@@ -1533,64 +1561,143 @@ enum {
 	FI_DIRTY_FILE,		/* indicate regular/symlink has dirty pages */
 };
 
-static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
+static inline void __mark_inode_dirty_flag(struct inode *inode,
+						int flag, bool set)
+{
+	switch (flag) {
+	case FI_INLINE_XATTR:
+	case FI_INLINE_DATA:
+	case FI_INLINE_DENTRY:
+		if (set)
+			return;
+	case FI_DATA_EXIST:
+	case FI_INLINE_DOTS:
+		f2fs_mark_inode_dirty_sync(inode);
+	}
+}
+
+static inline void set_inode_flag(struct inode *inode, int flag)
+{
+	if (!test_bit(flag, &F2FS_I(inode)->flags))
+		set_bit(flag, &F2FS_I(inode)->flags);
+	__mark_inode_dirty_flag(inode, flag, true);
+}
+
+static inline int is_inode_flag_set(struct inode *inode, int flag)
+{
+	return test_bit(flag, &F2FS_I(inode)->flags);
+}
+
+static inline void clear_inode_flag(struct inode *inode, int flag)
+{
+	if (test_bit(flag, &F2FS_I(inode)->flags))
+		clear_bit(flag, &F2FS_I(inode)->flags);
+	__mark_inode_dirty_flag(inode, flag, false);
+}
+
+static inline void set_acl_inode(struct inode *inode, umode_t mode)
 {
-	if (!test_bit(flag, &fi->flags))
-		set_bit(flag, &fi->flags);
+	F2FS_I(inode)->i_acl_mode = mode;
+	set_inode_flag(inode, FI_ACL_MODE);
+	f2fs_mark_inode_dirty_sync(inode);
 }
 
-static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
+static inline void f2fs_i_links_write(struct inode *inode, bool inc)
 {
-	return test_bit(flag, &fi->flags);
+	if (inc)
+		inc_nlink(inode);
+	else
+		drop_nlink(inode);
+	f2fs_mark_inode_dirty_sync(inode);
+}
+
+static inline void f2fs_i_blocks_write(struct inode *inode,
+					blkcnt_t diff, bool add)
+{
+	bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE);
+	bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER);
+
+	inode->i_blocks = add ? inode->i_blocks + diff :
+				inode->i_blocks - diff;
+	f2fs_mark_inode_dirty_sync(inode);
+	if (clean || recover)
+		set_inode_flag(inode, FI_AUTO_RECOVER);
+}
+
+static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
+{
+	bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE);
+	bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER);
+
+	if (i_size_read(inode) == i_size)
+		return;
+
+	i_size_write(inode, i_size);
+	f2fs_mark_inode_dirty_sync(inode);
+	if (clean || recover)
+		set_inode_flag(inode, FI_AUTO_RECOVER);
 }
 
-static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
+static inline bool f2fs_skip_inode_update(struct inode *inode)
 {
-	if (test_bit(flag, &fi->flags))
-		clear_bit(flag, &fi->flags);
+	if (!is_inode_flag_set(inode, FI_AUTO_RECOVER))
+		return false;
+	return F2FS_I(inode)->last_disk_size == i_size_read(inode);
 }
 
-static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
+static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
 {
-	fi->i_acl_mode = mode;
-	set_inode_flag(fi, FI_ACL_MODE);
+	F2FS_I(inode)->i_current_depth = depth;
+	f2fs_mark_inode_dirty_sync(inode);
 }
 
-static inline void get_inline_info(struct f2fs_inode_info *fi,
-					struct f2fs_inode *ri)
+static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid)
 {
+	F2FS_I(inode)->i_xattr_nid = xnid;
+	f2fs_mark_inode_dirty_sync(inode);
+}
+
+static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino)
+{
+	F2FS_I(inode)->i_pino = pino;
+	f2fs_mark_inode_dirty_sync(inode);
+}
+
+static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri)
+{
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+
 	if (ri->i_inline & F2FS_INLINE_XATTR)
-		set_inode_flag(fi, FI_INLINE_XATTR);
+		set_bit(FI_INLINE_XATTR, &fi->flags);
 	if (ri->i_inline & F2FS_INLINE_DATA)
-		set_inode_flag(fi, FI_INLINE_DATA);
+		set_bit(FI_INLINE_DATA, &fi->flags);
 	if (ri->i_inline & F2FS_INLINE_DENTRY)
-		set_inode_flag(fi, FI_INLINE_DENTRY);
+		set_bit(FI_INLINE_DENTRY, &fi->flags);
 	if (ri->i_inline & F2FS_DATA_EXIST)
-		set_inode_flag(fi, FI_DATA_EXIST);
+		set_bit(FI_DATA_EXIST, &fi->flags);
 	if (ri->i_inline & F2FS_INLINE_DOTS)
-		set_inode_flag(fi, FI_INLINE_DOTS);
+		set_bit(FI_INLINE_DOTS, &fi->flags);
 }
 
-static inline void set_raw_inline(struct f2fs_inode_info *fi,
-					struct f2fs_inode *ri)
+static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri)
 {
 	ri->i_inline = 0;
 
-	if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+	if (is_inode_flag_set(inode, FI_INLINE_XATTR))
 		ri->i_inline |= F2FS_INLINE_XATTR;
-	if (is_inode_flag_set(fi, FI_INLINE_DATA))
+	if (is_inode_flag_set(inode, FI_INLINE_DATA))
 		ri->i_inline |= F2FS_INLINE_DATA;
-	if (is_inode_flag_set(fi, FI_INLINE_DENTRY))
+	if (is_inode_flag_set(inode, FI_INLINE_DENTRY))
 		ri->i_inline |= F2FS_INLINE_DENTRY;
-	if (is_inode_flag_set(fi, FI_DATA_EXIST))
+	if (is_inode_flag_set(inode, FI_DATA_EXIST))
 		ri->i_inline |= F2FS_DATA_EXIST;
-	if (is_inode_flag_set(fi, FI_INLINE_DOTS))
+	if (is_inode_flag_set(inode, FI_INLINE_DOTS))
 		ri->i_inline |= F2FS_INLINE_DOTS;
 }
 
 static inline int f2fs_has_inline_xattr(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
+	return is_inode_flag_set(inode, FI_INLINE_XATTR);
 }
 
 static inline unsigned int addrs_per_inode(struct inode *inode)
@@ -1617,43 +1724,43 @@ static inline int inline_xattr_size(struct inode *inode)
 
 static inline int f2fs_has_inline_data(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
+	return is_inode_flag_set(inode, FI_INLINE_DATA);
 }
 
 static inline void f2fs_clear_inline_inode(struct inode *inode)
 {
-	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
-	clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+	clear_inode_flag(inode, FI_INLINE_DATA);
+	clear_inode_flag(inode, FI_DATA_EXIST);
 }
 
 static inline int f2fs_exist_data(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
+	return is_inode_flag_set(inode, FI_DATA_EXIST);
 }
 
 static inline int f2fs_has_inline_dots(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS);
+	return is_inode_flag_set(inode, FI_INLINE_DOTS);
 }
 
 static inline bool f2fs_is_atomic_file(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
+	return is_inode_flag_set(inode, FI_ATOMIC_FILE);
 }
 
 static inline bool f2fs_is_volatile_file(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
+	return is_inode_flag_set(inode, FI_VOLATILE_FILE);
 }
 
 static inline bool f2fs_is_first_block_written(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
+	return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN);
 }
 
 static inline bool f2fs_is_drop_cache(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE);
+	return is_inode_flag_set(inode, FI_DROP_CACHE);
 }
 
 static inline void *inline_data_addr(struct page *page)
@@ -1664,7 +1771,7 @@ static inline void *inline_data_addr(struct page *page)
 
 static inline int f2fs_has_inline_dentry(struct inode *inode)
 {
-	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
+	return is_inode_flag_set(inode, FI_INLINE_DENTRY);
 }
 
 static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
@@ -1681,11 +1788,13 @@ static inline int is_file(struct inode *inode, int type)
 static inline void set_file(struct inode *inode, int type)
 {
 	F2FS_I(inode)->i_advise |= type;
+	f2fs_mark_inode_dirty_sync(inode);
 }
 
 static inline void clear_file(struct inode *inode, int type)
 {
 	F2FS_I(inode)->i_advise &= ~type;
+	f2fs_mark_inode_dirty_sync(inode);
 }
 
 static inline int f2fs_readonly(struct super_block *sb)
@@ -1712,7 +1821,7 @@ static inline bool is_dot_dotdot(const struct qstr *str)
 static inline bool f2fs_may_extent_tree(struct inode *inode)
 {
 	if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
-			is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
+			is_inode_flag_set(inode, FI_NO_EXTENT))
 		return false;
 
 	return S_ISREG(inode->i_mode);
@@ -1748,7 +1857,7 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
 }
 
 #define get_inode_mode(i) \
-	((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
+	((is_inode_flag_set(i, FI_ACL_MODE)) ? \
 	 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
 
 /* get offset of first page in next direct node */
@@ -1763,7 +1872,7 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
 int f2fs_sync_file(struct file *, loff_t, loff_t, int);
 void truncate_data_blocks(struct dnode_of_data *);
 int truncate_blocks(struct inode *, u64, bool);
-int f2fs_truncate(struct inode *, bool);
+int f2fs_truncate(struct inode *);
 int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 int f2fs_setattr(struct dentry *, struct iattr *);
 int truncate_hole(struct inode *, pgoff_t, pgoff_t);
@@ -1804,11 +1913,11 @@ struct page *init_inode_metadata(struct inode *, struct inode *,
 			const struct qstr *, struct page *);
 void update_parent_metadata(struct inode *, struct inode *, unsigned int);
 int room_for_filename(const void *, int, int);
-void f2fs_drop_nlink(struct inode *, struct inode *, struct page *);
+void f2fs_drop_nlink(struct inode *, struct inode *);
 struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *,
 							struct page **);
 struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
-ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
+ino_t f2fs_inode_by_name(struct inode *, struct qstr *, struct page **);
 void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
 				struct page *, struct inode *);
 int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
@@ -1832,6 +1941,8 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 /*
  * super.c
  */
+int f2fs_inode_dirtied(struct inode *);
+void f2fs_inode_synced(struct inode *);
 int f2fs_commit_super(struct f2fs_sb_info *, bool);
 int f2fs_sync_fs(struct super_block *, int);
 extern __printf(3, 4)
@@ -1865,11 +1976,11 @@ struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
 void ra_node_page(struct f2fs_sb_info *, nid_t);
 struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
 struct page *get_node_page_ra(struct page *, int);
-void sync_inode_page(struct dnode_of_data *);
 void move_node_page(struct page *, int);
-int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *,
-								bool);
+int fsync_node_pages(struct f2fs_sb_info *, struct inode *,
+			struct writeback_control *, bool);
 int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
+void build_free_nids(struct f2fs_sb_info *);
 bool alloc_nid(struct f2fs_sb_info *, nid_t *);
 void alloc_nid_done(struct f2fs_sb_info *, nid_t);
 void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
@@ -1943,9 +2054,10 @@ void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
 void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
 void release_ino_entry(struct f2fs_sb_info *, bool);
 bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
+int f2fs_sync_inode_meta(struct f2fs_sb_info *);
 int acquire_orphan_inode(struct f2fs_sb_info *);
 void release_orphan_inode(struct f2fs_sb_info *);
-void add_orphan_inode(struct f2fs_sb_info *, nid_t);
+void add_orphan_inode(struct inode *);
 void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
 int recover_orphan_inodes(struct f2fs_sb_info *);
 int get_valid_checkpoint(struct f2fs_sb_info *);
@@ -1980,6 +2092,7 @@ struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
 int do_write_data_page(struct f2fs_io_info *);
 int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
+void f2fs_set_page_dirty_nobuffers(struct page *);
 void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
 int f2fs_release_page(struct page *, gfp_t);
 
@@ -2011,7 +2124,7 @@ struct f2fs_stat_info {
 	unsigned long long hit_total, total_ext;
 	int ext_tree, zombie_tree, ext_node;
 	s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages;
-	unsigned int ndirty_dirs, ndirty_files;
+	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
 	int nats, dirty_nats, sits, dirty_sits, fnids;
 	int total_count, utilization;
 	int bg_gc, wb_bios;
@@ -2180,7 +2293,6 @@ int f2fs_write_inline_data(struct inode *, struct page *);
 bool recover_inline_data(struct inode *, struct page *);
 struct f2fs_dir_entry *find_in_inline_dir(struct inode *,
 				struct fscrypt_name *, struct page **);
-struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
 int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
 int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
 						nid_t, umode_t);
@@ -2205,6 +2317,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *);
  */
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
 bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
+void f2fs_drop_extent_tree(struct inode *);
 unsigned int f2fs_destroy_extent_node(struct inode *);
 void f2fs_destroy_extent_tree(struct inode *);
 bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
@@ -2240,6 +2353,26 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb)
 	return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
 }
 
+static inline int f2fs_sb_mounted_hmsmr(struct super_block *sb)
+{
+	return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_HMSMR);
+}
+
+static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
+{
+	clear_opt(sbi, ADAPTIVE);
+	clear_opt(sbi, LFS);
+
+	switch (mt) {
+	case F2FS_MOUNT_ADAPTIVE:
+		set_opt(sbi, ADAPTIVE);
+		break;
+	case F2FS_MOUNT_LFS:
+		set_opt(sbi, LFS);
+		break;
+	}
+}
+
 static inline bool f2fs_may_encrypt(struct inode *inode)
 {
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f4c0086655c4..0e493f63ea41 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -21,6 +21,7 @@
 #include <linux/mount.h>
 #include <linux/pagevec.h>
 #include <linux/uuid.h>
+#include <linux/file.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -81,7 +82,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
 		zero_user_segment(page, offset, PAGE_SIZE);
 	}
 	set_page_dirty(page);
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 
 	trace_f2fs_vm_page_mkwrite(page, DATA);
 mapped:
@@ -171,22 +173,16 @@ static void try_to_fix_pino(struct inode *inode)
 	fi->xattr_ver = 0;
 	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
 			get_parent_ino(inode, &pino)) {
-		fi->i_pino = pino;
+		f2fs_i_pino_write(inode, pino);
 		file_got_pino(inode);
-		up_write(&fi->i_sem);
-
-		mark_inode_dirty_sync(inode);
-		f2fs_write_inode(inode, NULL);
-	} else {
-		up_write(&fi->i_sem);
 	}
+	up_write(&fi->i_sem);
 }
 
 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 						int datasync, bool atomic)
 {
 	struct inode *inode = file->f_mapping->host;
-	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	nid_t ino = inode->i_ino;
 	int ret = 0;
@@ -204,9 +200,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 
 	/* if fdatasync is triggered, let's do in-place-update */
 	if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
-		set_inode_flag(fi, FI_NEED_IPU);
+		set_inode_flag(inode, FI_NEED_IPU);
 	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	clear_inode_flag(fi, FI_NEED_IPU);
+	clear_inode_flag(inode, FI_NEED_IPU);
 
 	if (ret) {
 		trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -214,7 +210,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 	}
 
 	/* if the inode is dirty, let's recover all the time */
-	if (!datasync) {
+	if (!datasync && !f2fs_skip_inode_update(inode)) {
 		f2fs_write_inode(inode, NULL);
 		goto go_write;
 	}
@@ -222,14 +218,14 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 	/*
 	 * if there is no written data, don't waste time to write recovery info.
 	 */
-	if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
+	if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
 			!exist_written_data(sbi, ino, APPEND_INO)) {
 
 		/* it may call write_inode just prior to fsync */
 		if (need_inode_page_update(sbi, ino))
 			goto go_write;
 
-		if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
+		if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
 				exist_written_data(sbi, ino, UPDATE_INO))
 			goto flush_out;
 		goto out;
@@ -239,9 +235,9 @@ go_write:
 	 * Both of fdatasync() and fsync() are able to be recovered from
 	 * sudden-power-off.
 	 */
-	down_read(&fi->i_sem);
+	down_read(&F2FS_I(inode)->i_sem);
 	need_cp = need_do_checkpoint(inode);
-	up_read(&fi->i_sem);
+	up_read(&F2FS_I(inode)->i_sem);
 
 	if (need_cp) {
 		/* all the dirty node pages should be flushed for POR */
@@ -252,12 +248,12 @@ go_write:
 		 * will be used only for fsynced inodes after checkpoint.
 		 */
 		try_to_fix_pino(inode);
-		clear_inode_flag(fi, FI_APPEND_WRITE);
-		clear_inode_flag(fi, FI_UPDATE_WRITE);
+		clear_inode_flag(inode, FI_APPEND_WRITE);
+		clear_inode_flag(inode, FI_UPDATE_WRITE);
 		goto out;
 	}
 sync_nodes:
-	ret = fsync_node_pages(sbi, ino, &wbc, atomic);
+	ret = fsync_node_pages(sbi, inode, &wbc, atomic);
 	if (ret)
 		goto out;
 
@@ -268,7 +264,7 @@ sync_nodes:
 	}
 
 	if (need_inode_block_update(sbi, ino)) {
-		mark_inode_dirty_sync(inode);
+		f2fs_mark_inode_dirty_sync(inode);
 		f2fs_write_inode(inode, NULL);
 		goto sync_nodes;
 	}
@@ -279,10 +275,10 @@ sync_nodes:
 
 	/* once recovery info is written, don't need to tack this */
 	remove_ino_entry(sbi, ino, APPEND_INO);
-	clear_inode_flag(fi, FI_APPEND_WRITE);
+	clear_inode_flag(inode, FI_APPEND_WRITE);
 flush_out:
 	remove_ino_entry(sbi, ino, UPDATE_INO);
-	clear_inode_flag(fi, FI_UPDATE_WRITE);
+	clear_inode_flag(inode, FI_UPDATE_WRITE);
 	ret = f2fs_issue_flush(sbi);
 	f2fs_update_time(sbi, REQ_TIME);
 out:
@@ -360,7 +356,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 
 	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
 		set_new_dnode(&dn, inode, NULL, NULL, 0);
-		err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+		err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
 		if (err && err != -ENOENT) {
 			goto fail;
 		} else if (err == -ENOENT) {
@@ -487,8 +483,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 		set_data_blkaddr(dn);
 		invalidate_blocks(sbi, blkaddr);
 		if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
-			clear_inode_flag(F2FS_I(dn->inode),
-						FI_FIRST_BLOCK_WRITTEN);
+			clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
 		nr_free++;
 	}
 
@@ -502,7 +497,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 							dn->inode) + ofs;
 		f2fs_update_extent_cache_range(dn, fofs, 0, len);
 		dec_valid_block_count(sbi, dn->inode, nr_free);
-		sync_inode_page(dn);
 	}
 	dn->ofs_in_node = ofs;
 
@@ -616,7 +610,7 @@ free_partial:
 	return err;
 }
 
-int f2fs_truncate(struct inode *inode, bool lock)
+int f2fs_truncate(struct inode *inode)
 {
 	int err;
 
@@ -633,12 +627,12 @@ int f2fs_truncate(struct inode *inode, bool lock)
 			return err;
 	}
 
-	err = truncate_blocks(inode, i_size_read(inode), lock);
+	err = truncate_blocks(inode, i_size_read(inode), true);
 	if (err)
 		return err;
 
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	mark_inode_dirty(inode);
+	f2fs_mark_inode_dirty_sync(inode);
 	return 0;
 }
 
@@ -654,7 +648,6 @@ int f2fs_getattr(struct vfsmount *mnt,
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 static void __setattr_copy(struct inode *inode, const struct iattr *attr)
 {
-	struct f2fs_inode_info *fi = F2FS_I(inode);
 	unsigned int ia_valid = attr->ia_valid;
 
 	if (ia_valid & ATTR_UID)
@@ -675,7 +668,7 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
 
 		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
 			mode &= ~S_ISGID;
-		set_acl_inode(fi, mode);
+		set_acl_inode(inode, mode);
 	}
 }
 #else
@@ -685,7 +678,6 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
 int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = d_inode(dentry);
-	struct f2fs_inode_info *fi = F2FS_I(inode);
 	int err;
 
 	err = inode_change_ok(inode, attr);
@@ -699,7 +691,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 
 		if (attr->ia_size <= i_size_read(inode)) {
 			truncate_setsize(inode, attr->ia_size);
-			err = f2fs_truncate(inode, true);
+			err = f2fs_truncate(inode);
 			if (err)
 				return err;
 			f2fs_balance_fs(F2FS_I_SB(inode), true);
@@ -724,13 +716,13 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	if (attr->ia_valid & ATTR_MODE) {
 		err = posix_acl_chmod(inode, get_inode_mode(inode));
-		if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
-			inode->i_mode = fi->i_acl_mode;
-			clear_inode_flag(fi, FI_ACL_MODE);
+		if (err || is_inode_flag_set(inode, FI_ACL_MODE)) {
+			inode->i_mode = F2FS_I(inode)->i_acl_mode;
+			clear_inode_flag(inode, FI_ACL_MODE);
 		}
 	}
 
-	mark_inode_dirty(inode);
+	f2fs_mark_inode_dirty_sync(inode);
 	return err;
 }
 
@@ -859,79 +851,199 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	return ret;
 }
 
-static int __exchange_data_block(struct inode *inode, pgoff_t src,
-					pgoff_t dst, bool full)
+static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
+				int *do_replace, pgoff_t off, pgoff_t len)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct dnode_of_data dn;
-	block_t new_addr;
-	bool do_replace = false;
-	int ret;
+	int ret, done, i;
 
+next_dnode:
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
-	ret = get_dnode_of_data(&dn, src, LOOKUP_NODE_RA);
+	ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
 	if (ret && ret != -ENOENT) {
 		return ret;
 	} else if (ret == -ENOENT) {
-		new_addr = NULL_ADDR;
-	} else {
-		new_addr = dn.data_blkaddr;
-		if (!is_checkpointed_data(sbi, new_addr)) {
+		if (dn.max_level == 0)
+			return -ENOENT;
+		done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len);
+		blkaddr += done;
+		do_replace += done;
+		goto next;
+	}
+
+	done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
+							dn.ofs_in_node, len);
+	for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
+		*blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+		if (!is_checkpointed_data(sbi, *blkaddr)) {
+
+			if (test_opt(sbi, LFS)) {
+				f2fs_put_dnode(&dn);
+				return -ENOTSUPP;
+			}
+
 			/* do not invalidate this block address */
 			f2fs_update_data_blkaddr(&dn, NULL_ADDR);
-			do_replace = true;
+			*do_replace = 1;
 		}
-		f2fs_put_dnode(&dn);
 	}
+	f2fs_put_dnode(&dn);
+next:
+	len -= done;
+	off += done;
+	if (len)
+		goto next_dnode;
+	return 0;
+}
 
-	if (new_addr == NULL_ADDR)
-		return full ? truncate_hole(inode, dst, dst + 1) : 0;
+static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
+				int *do_replace, pgoff_t off, int len)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct dnode_of_data dn;
+	int ret, i;
 
-	if (do_replace) {
-		struct page *ipage = get_node_page(sbi, inode->i_ino);
-		struct node_info ni;
+	for (i = 0; i < len; i++, do_replace++, blkaddr++) {
+		if (*do_replace == 0)
+			continue;
 
-		if (IS_ERR(ipage)) {
-			ret = PTR_ERR(ipage);
-			goto err_out;
+		set_new_dnode(&dn, inode, NULL, NULL, 0);
+		ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
+		if (ret) {
+			dec_valid_block_count(sbi, inode, 1);
+			invalidate_blocks(sbi, *blkaddr);
+		} else {
+			f2fs_update_data_blkaddr(&dn, *blkaddr);
 		}
+		f2fs_put_dnode(&dn);
+	}
+	return 0;
+}
 
-		set_new_dnode(&dn, inode, ipage, NULL, 0);
-		ret = f2fs_reserve_block(&dn, dst);
-		if (ret)
-			goto err_out;
+static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
+			block_t *blkaddr, int *do_replace,
+			pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
+	pgoff_t i = 0;
+	int ret;
+
+	while (i < len) {
+		if (blkaddr[i] == NULL_ADDR && !full) {
+			i++;
+			continue;
+		}
 
-		truncate_data_blocks_range(&dn, 1);
+		if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
+			struct dnode_of_data dn;
+			struct node_info ni;
+			size_t new_size;
+			pgoff_t ilen;
 
-		get_node_info(sbi, dn.nid, &ni);
-		f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
-				ni.version, true, false);
-		f2fs_put_dnode(&dn);
-	} else {
-		struct page *psrc, *pdst;
+			set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
+			ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
+			if (ret)
+				return ret;
+
+			get_node_info(sbi, dn.nid, &ni);
+			ilen = min((pgoff_t)
+				ADDRS_PER_PAGE(dn.node_page, dst_inode) -
+						dn.ofs_in_node, len - i);
+			do {
+				dn.data_blkaddr = datablock_addr(dn.node_page,
+								dn.ofs_in_node);
+				truncate_data_blocks_range(&dn, 1);
+
+				if (do_replace[i]) {
+					f2fs_i_blocks_write(src_inode,
+								1, false);
+					f2fs_i_blocks_write(dst_inode,
+								1, true);
+					f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
+					blkaddr[i], ni.version, true, false);
+
+					do_replace[i] = 0;
+				}
+				dn.ofs_in_node++;
+				i++;
+				new_size = (dst + i) << PAGE_SHIFT;
+				if (dst_inode->i_size < new_size)
+					f2fs_i_size_write(dst_inode, new_size);
+			} while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen);
 
-		psrc = get_lock_data_page(inode, src, true);
-		if (IS_ERR(psrc))
-			return PTR_ERR(psrc);
-		pdst = get_new_data_page(inode, NULL, dst, true);
-		if (IS_ERR(pdst)) {
+			f2fs_put_dnode(&dn);
+		} else {
+			struct page *psrc, *pdst;
+
+			psrc = get_lock_data_page(src_inode, src + i, true);
+			if (IS_ERR(psrc))
+				return PTR_ERR(psrc);
+			pdst = get_new_data_page(dst_inode, NULL, dst + i,
+								true);
+			if (IS_ERR(pdst)) {
+				f2fs_put_page(psrc, 1);
+				return PTR_ERR(pdst);
+			}
+			f2fs_copy_page(psrc, pdst);
+			set_page_dirty(pdst);
+			f2fs_put_page(pdst, 1);
 			f2fs_put_page(psrc, 1);
-			return PTR_ERR(pdst);
-		}
-		f2fs_copy_page(psrc, pdst);
-		set_page_dirty(pdst);
-		f2fs_put_page(pdst, 1);
-		f2fs_put_page(psrc, 1);
 
-		return truncate_hole(inode, src, src + 1);
+			ret = truncate_hole(src_inode, src + i, src + i + 1);
+			if (ret)
+				return ret;
+			i++;
+		}
 	}
 	return 0;
+}
 
-err_out:
-	if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) {
-		f2fs_update_data_blkaddr(&dn, new_addr);
-		f2fs_put_dnode(&dn);
+static int __exchange_data_block(struct inode *src_inode,
+			struct inode *dst_inode, pgoff_t src, pgoff_t dst,
+			pgoff_t len, bool full)
+{
+	block_t *src_blkaddr;
+	int *do_replace;
+	pgoff_t olen;
+	int ret;
+
+	while (len) {
+		olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
+
+		src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
+		if (!src_blkaddr)
+			return -ENOMEM;
+
+		do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL);
+		if (!do_replace) {
+			kvfree(src_blkaddr);
+			return -ENOMEM;
+		}
+
+		ret = __read_out_blkaddrs(src_inode, src_blkaddr,
+					do_replace, src, olen);
+		if (ret)
+			goto roll_back;
+
+		ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
+					do_replace, src, dst, olen, full);
+		if (ret)
+			goto roll_back;
+
+		src += olen;
+		dst += olen;
+		len -= olen;
+
+		kvfree(src_blkaddr);
+		kvfree(do_replace);
 	}
+	return 0;
+
+roll_back:
+	__roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len);
+	kvfree(src_blkaddr);
+	kvfree(do_replace);
 	return ret;
 }
 
@@ -939,16 +1051,15 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
-	int ret = 0;
+	int ret;
 
-	for (; end < nrpages; start++, end++) {
-		f2fs_balance_fs(sbi, true);
-		f2fs_lock_op(sbi);
-		ret = __exchange_data_block(inode, end, start, true);
-		f2fs_unlock_op(sbi);
-		if (ret)
-			break;
-	}
+	f2fs_balance_fs(sbi, true);
+	f2fs_lock_op(sbi);
+
+	f2fs_drop_extent_tree(inode);
+
+	ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
+	f2fs_unlock_op(sbi);
 	return ret;
 }
 
@@ -992,7 +1103,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 
 	ret = truncate_blocks(inode, new_size, true);
 	if (!ret)
-		i_size_write(inode, new_size);
+		f2fs_i_size_write(inode, new_size);
 
 	return ret;
 }
@@ -1128,11 +1239,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 	}
 
 out:
-	if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) {
-		i_size_write(inode, new_size);
-		mark_inode_dirty(inode);
-		update_inode_page(inode);
-	}
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
+		f2fs_i_size_write(inode, new_size);
 
 	return ret;
 }
@@ -1140,7 +1248,7 @@ out:
 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	pgoff_t pg_start, pg_end, delta, nrpages, idx;
+	pgoff_t nr, pg_start, pg_end, delta, idx;
 	loff_t new_size;
 	int ret = 0;
 
@@ -1175,14 +1283,20 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	pg_start = offset >> PAGE_SHIFT;
 	pg_end = (offset + len) >> PAGE_SHIFT;
 	delta = pg_end - pg_start;
-	nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+	idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+
+	while (!ret && idx > pg_start) {
+		nr = idx - pg_start;
+		if (nr > delta)
+			nr = delta;
+		idx -= nr;
 
-	for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) {
 		f2fs_lock_op(sbi);
-		ret = __exchange_data_block(inode, idx, idx + delta, false);
+		f2fs_drop_extent_tree(inode);
+
+		ret = __exchange_data_block(inode, inode, idx,
+					idx + delta, nr, false);
 		f2fs_unlock_op(sbi);
-		if (ret)
-			break;
 	}
 
 	/* write out all moved pages, if possible */
@@ -1190,7 +1304,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	truncate_pagecache(inode, offset);
 
 	if (!ret)
-		i_size_write(inode, new_size);
+		f2fs_i_size_write(inode, new_size);
 	return ret;
 }
 
@@ -1238,11 +1352,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
 		new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
 	}
 
-	if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) {
-		i_size_write(inode, new_size);
-		mark_inode_dirty(inode);
-		update_inode_page(inode);
-	}
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
+		f2fs_i_size_write(inode, new_size);
 
 	return ret;
 }
@@ -1285,7 +1396,7 @@ static long f2fs_fallocate(struct file *file, int mode,
 
 	if (!ret) {
 		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-		mark_inode_dirty(inode);
+		f2fs_mark_inode_dirty_sync(inode);
 		f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 	}
 
@@ -1310,10 +1421,10 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
 	if (f2fs_is_atomic_file(inode))
 		drop_inmem_pages(inode);
 	if (f2fs_is_volatile_file(inode)) {
-		clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
-		set_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
+		clear_inode_flag(inode, FI_VOLATILE_FILE);
+		set_inode_flag(inode, FI_DROP_CACHE);
 		filemap_fdatawrite(inode->i_mapping);
-		clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
+		clear_inode_flag(inode, FI_DROP_CACHE);
 	}
 	return 0;
 }
@@ -1376,9 +1487,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 	fi->i_flags = flags;
 	inode_unlock(inode);
 
-	f2fs_set_inode_flags(inode);
 	inode->i_ctime = CURRENT_TIME;
-	mark_inode_dirty(inode);
+	f2fs_set_inode_flags(inode);
 out:
 	mnt_drop_write_file(filp);
 	return ret;
@@ -1412,7 +1522,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	if (ret)
 		goto out;
 
-	set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+	set_inode_flag(inode, FI_ATOMIC_FILE);
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 
 	if (!get_dirty_pages(inode))
@@ -1423,7 +1533,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 					inode->i_ino, get_dirty_pages(inode));
 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
 	if (ret)
-		clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+		clear_inode_flag(inode, FI_ATOMIC_FILE);
 out:
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
@@ -1448,10 +1558,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
 		goto err_out;
 
 	if (f2fs_is_atomic_file(inode)) {
-		clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+		clear_inode_flag(inode, FI_ATOMIC_FILE);
 		ret = commit_inmem_pages(inode);
 		if (ret) {
-			set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+			set_inode_flag(inode, FI_ATOMIC_FILE);
 			goto err_out;
 		}
 	}
@@ -1484,7 +1594,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
 	if (ret)
 		goto out;
 
-	set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+	set_inode_flag(inode, FI_VOLATILE_FILE);
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 out:
 	inode_unlock(inode);
@@ -1538,7 +1648,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
 	if (f2fs_is_atomic_file(inode))
 		drop_inmem_pages(inode);
 	if (f2fs_is_volatile_file(inode)) {
-		clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+		clear_inode_flag(inode, FI_VOLATILE_FILE);
 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 	}
 
@@ -1871,7 +1981,7 @@ do_map:
 			continue;
 		}
 
-		set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+		set_inode_flag(inode, FI_DO_DEFRAG);
 
 		idx = map.m_lblk;
 		while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
@@ -1896,14 +2006,14 @@ do_map:
 		if (idx < pg_end && cnt < blk_per_seg)
 			goto do_map;
 
-		clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+		clear_inode_flag(inode, FI_DO_DEFRAG);
 
 		err = filemap_fdatawrite(inode->i_mapping);
 		if (err)
 			goto out;
 	}
 clear_out:
-	clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+	clear_inode_flag(inode, FI_DO_DEFRAG);
 out:
 	inode_unlock(inode);
 	if (!err)
@@ -1959,6 +2069,133 @@ out:
 	return err;
 }
 
+static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
+			struct file *file_out, loff_t pos_out, size_t len)
+{
+	struct inode *src = file_inode(file_in);
+	struct inode *dst = file_inode(file_out);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(src);
+	size_t olen = len, dst_max_i_size = 0;
+	size_t dst_osize;
+	int ret;
+
+	if (file_in->f_path.mnt != file_out->f_path.mnt ||
+				src->i_sb != dst->i_sb)
+		return -EXDEV;
+
+	if (unlikely(f2fs_readonly(src->i_sb)))
+		return -EROFS;
+
+	if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode))
+		return -EISDIR;
+
+	if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
+		return -EOPNOTSUPP;
+
+	inode_lock(src);
+	if (src != dst)
+		inode_lock(dst);
+
+	ret = -EINVAL;
+	if (pos_in + len > src->i_size || pos_in + len < pos_in)
+		goto out_unlock;
+	if (len == 0)
+		olen = len = src->i_size - pos_in;
+	if (pos_in + len == src->i_size)
+		len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
+	if (len == 0) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	dst_osize = dst->i_size;
+	if (pos_out + olen > dst->i_size)
+		dst_max_i_size = pos_out + olen;
+
+	/* verify the end result is block aligned */
+	if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
+			!IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
+			!IS_ALIGNED(pos_out, F2FS_BLKSIZE))
+		goto out_unlock;
+
+	ret = f2fs_convert_inline_inode(src);
+	if (ret)
+		goto out_unlock;
+
+	ret = f2fs_convert_inline_inode(dst);
+	if (ret)
+		goto out_unlock;
+
+	/* write out all dirty pages from offset */
+	ret = filemap_write_and_wait_range(src->i_mapping,
+					pos_in, pos_in + len);
+	if (ret)
+		goto out_unlock;
+
+	ret = filemap_write_and_wait_range(dst->i_mapping,
+					pos_out, pos_out + len);
+	if (ret)
+		goto out_unlock;
+
+	f2fs_balance_fs(sbi, true);
+	f2fs_lock_op(sbi);
+	ret = __exchange_data_block(src, dst, pos_in,
+				pos_out, len >> F2FS_BLKSIZE_BITS, false);
+
+	if (!ret) {
+		if (dst_max_i_size)
+			f2fs_i_size_write(dst, dst_max_i_size);
+		else if (dst_osize != dst->i_size)
+			f2fs_i_size_write(dst, dst_osize);
+	}
+	f2fs_unlock_op(sbi);
+out_unlock:
+	if (src != dst)
+		inode_unlock(dst);
+	inode_unlock(src);
+	return ret;
+}
+
+static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
+{
+	struct f2fs_move_range range;
+	struct fd dst;
+	int err;
+
+	if (!(filp->f_mode & FMODE_READ) ||
+			!(filp->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+
+	dst = fdget(range.dst_fd);
+	if (!dst.file)
+		return -EBADF;
+
+	if (!(dst.file->f_mode & FMODE_WRITE)) {
+		err = -EBADF;
+		goto err_out;
+	}
+
+	err = mnt_want_write_file(filp);
+	if (err)
+		goto err_out;
+
+	err = f2fs_move_file_range(filp, range.pos_in, dst.file,
+					range.pos_out, range.len);
+
+	mnt_drop_write_file(filp);
+
+	if (copy_to_user((struct f2fs_move_range __user *)arg,
+						&range, sizeof(range)))
+		err = -EFAULT;
+err_out:
+	fdput(dst);
+	return err;
+}
+
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -1994,6 +2231,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return f2fs_ioc_write_checkpoint(filp, arg);
 	case F2FS_IOC_DEFRAGMENT:
 		return f2fs_ioc_defragment(filp, arg);
+	case F2FS_IOC_MOVE_RANGE:
+		return f2fs_ioc_move_range(filp, arg);
 	default:
 		return -ENOTTY;
 	}
@@ -2003,6 +2242,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
+	struct blk_plug plug;
 	ssize_t ret;
 
 	if (f2fs_encrypted_inode(inode) &&
@@ -2014,8 +2254,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	ret = generic_write_checks(iocb, from);
 	if (ret > 0) {
 		ret = f2fs_preallocate_blocks(iocb, from);
-		if (!ret)
+		if (!ret) {
+			blk_start_plug(&plug);
 			ret = __generic_file_write_iter(iocb, from);
+			blk_finish_plug(&plug);
+		}
 	}
 	inode_unlock(inode);
 
@@ -2050,6 +2293,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_WRITE_CHECKPOINT:
 	case F2FS_IOC_DEFRAGMENT:
 		break;
+	case F2FS_IOC_MOVE_RANGE:
+		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 38d56f678912..8f7fa326ce95 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -538,7 +538,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
 	struct f2fs_io_info fio = {
 		.sbi = F2FS_I_SB(inode),
 		.type = DATA,
-		.rw = READ_SYNC,
+		.op = REQ_OP_READ,
+		.op_flags = READ_SYNC,
 		.encrypted_page = NULL,
 	};
 	struct dnode_of_data dn;
@@ -593,11 +594,11 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
 	/* write page */
 	lock_page(fio.encrypted_page);
 
-	if (unlikely(!PageUptodate(fio.encrypted_page))) {
+	if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
 		err = -EIO;
 		goto put_page_out;
 	}
-	if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
+	if (unlikely(!PageUptodate(fio.encrypted_page))) {
 		err = -EIO;
 		goto put_page_out;
 	}
@@ -612,14 +613,15 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
 	/* allocate block address */
 	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
 
-	fio.rw = WRITE_SYNC;
+	fio.op = REQ_OP_WRITE;
+	fio.op_flags = WRITE_SYNC;
 	fio.new_blkaddr = newaddr;
 	f2fs_submit_page_mbio(&fio);
 
 	f2fs_update_data_blkaddr(&dn, newaddr);
-	set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+	set_inode_flag(inode, FI_APPEND_WRITE);
 	if (page->index == 0)
-		set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
+		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
 put_page_out:
 	f2fs_put_page(fio.encrypted_page, 1);
 recover_block:
@@ -649,16 +651,28 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
 		struct f2fs_io_info fio = {
 			.sbi = F2FS_I_SB(inode),
 			.type = DATA,
-			.rw = WRITE_SYNC,
+			.op = REQ_OP_WRITE,
+			.op_flags = WRITE_SYNC,
 			.page = page,
 			.encrypted_page = NULL,
 		};
+		bool is_dirty = PageDirty(page);
+		int err;
+
+retry:
 		set_page_dirty(page);
 		f2fs_wait_on_page_writeback(page, DATA, true);
 		if (clear_page_dirty_for_io(page))
 			inode_dec_dirty_pages(inode);
+
 		set_cold_data(page);
-		do_write_data_page(&fio);
+
+		err = do_write_data_page(&fio);
+		if (err == -ENOMEM && is_dirty) {
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
+			goto retry;
+		}
+
 		clear_cold_data(page);
 	}
 out:
@@ -730,7 +744,8 @@ next_step:
 
 			start_bidx = start_bidx_of_node(nofs, inode);
 			data_page = get_read_data_page(inode,
-					start_bidx + ofs_in_node, READA, true);
+					start_bidx + ofs_in_node, REQ_RAHEAD,
+					true);
 			if (IS_ERR(data_page)) {
 				iput(inode);
 				continue;
@@ -744,12 +759,32 @@ next_step:
 		/* phase 3 */
 		inode = find_gc_inode(gc_list, dni.ino);
 		if (inode) {
+			struct f2fs_inode_info *fi = F2FS_I(inode);
+			bool locked = false;
+
+			if (S_ISREG(inode->i_mode)) {
+				if (!down_write_trylock(&fi->dio_rwsem[READ]))
+					continue;
+				if (!down_write_trylock(
+						&fi->dio_rwsem[WRITE])) {
+					up_write(&fi->dio_rwsem[READ]);
+					continue;
+				}
+				locked = true;
+			}
+
 			start_bidx = start_bidx_of_node(nofs, inode)
 								+ ofs_in_node;
 			if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 				move_encrypted_block(inode, start_bidx);
 			else
 				move_data_page(inode, start_bidx, gc_type);
+
+			if (locked) {
+				up_write(&fi->dio_rwsem[WRITE]);
+				up_write(&fi->dio_rwsem[READ]);
+			}
+
 			stat_inc_data_blk_count(sbi, 1, gc_type);
 		}
 	}
@@ -798,6 +833,10 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
 	blk_start_plug(&plug);
 
 	for (segno = start_segno; segno < end_segno; segno++) {
+
+		if (get_valid_blocks(sbi, segno, 1) == 0)
+			continue;
+
 		/* find segment summary of victim */
 		sum_page = find_get_page(META_MAPPING(sbi),
 					GET_SUM_BLOCK(sbi, segno));
@@ -873,10 +912,13 @@ gc_more:
 		 * enough free sections, we should flush dent/node blocks and do
 		 * garbage collections.
 		 */
-		if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
+		if (__get_victim(sbi, &segno, gc_type) ||
+						prefree_segments(sbi)) {
 			write_checkpoint(sbi, &cpc);
-		else if (has_not_enough_free_secs(sbi, 0))
+			segno = NULL_SEGNO;
+		} else if (has_not_enough_free_secs(sbi, 0)) {
 			write_checkpoint(sbi, &cpc);
+		}
 	}
 
 	if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index a4bb155dd00a..ccea8735de59 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -59,7 +59,8 @@ void read_inline_data(struct page *page, struct page *ipage)
 	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 	flush_dcache_page(page);
 	kunmap_atomic(dst_addr);
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 }
 
 bool truncate_inline_inode(struct page *ipage, u64 from)
@@ -73,7 +74,7 @@ bool truncate_inline_inode(struct page *ipage, u64 from)
 
 	f2fs_wait_on_page_writeback(ipage, NODE, true);
 	memset(addr + from, 0, MAX_INLINE_DATA - from);
-
+	set_page_dirty(ipage);
 	return true;
 }
 
@@ -97,7 +98,8 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
 	else
 		read_inline_data(page, ipage);
 
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 	f2fs_put_page(ipage, 1);
 	unlock_page(page);
 	return 0;
@@ -108,7 +110,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
 	struct f2fs_io_info fio = {
 		.sbi = F2FS_I_SB(dn->inode),
 		.type = DATA,
-		.rw = WRITE_SYNC | REQ_PRIO,
+		.op = REQ_OP_WRITE,
+		.op_flags = WRITE_SYNC | REQ_PRIO,
 		.page = page,
 		.encrypted_page = NULL,
 	};
@@ -138,7 +141,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
 		inode_dec_dirty_pages(dn->inode);
 
 	/* this converted inline_data should be recovered. */
-	set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
+	set_inode_flag(dn->inode, FI_APPEND_WRITE);
 
 	/* clear inline data and flag after data writeback */
 	truncate_inline_inode(dn->inode_page, 0);
@@ -146,7 +149,6 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
 clear_out:
 	stat_dec_inline_inode(dn->inode);
 	f2fs_clear_inline_inode(dn->inode);
-	sync_inode_page(dn);
 	f2fs_put_dnode(dn);
 	return 0;
 }
@@ -212,11 +214,11 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
 	dst_addr = inline_data_addr(dn.inode_page);
 	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 	kunmap_atomic(src_addr);
+	set_page_dirty(dn.inode_page);
 
-	set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
-	set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+	set_inode_flag(inode, FI_APPEND_WRITE);
+	set_inode_flag(inode, FI_DATA_EXIST);
 
-	sync_inode_page(&dn);
 	clear_inline_node(dn.inode_page);
 	f2fs_put_dnode(&dn);
 	return 0;
@@ -252,10 +254,10 @@ process_inline:
 		dst_addr = inline_data_addr(ipage);
 		memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 
-		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
-		set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+		set_inode_flag(inode, FI_INLINE_DATA);
+		set_inode_flag(inode, FI_DATA_EXIST);
 
-		update_inode(inode, ipage);
+		set_page_dirty(ipage);
 		f2fs_put_page(ipage, 1);
 		return true;
 	}
@@ -266,7 +268,6 @@ process_inline:
 		if (!truncate_inline_inode(ipage, 0))
 			return false;
 		f2fs_clear_inline_inode(inode);
-		update_inode(inode, ipage);
 		f2fs_put_page(ipage, 1);
 	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
 		if (truncate_blocks(inode, 0, false))
@@ -288,8 +289,10 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
 	f2fs_hash_t namehash;
 
 	ipage = get_node_page(sbi, dir->i_ino);
-	if (IS_ERR(ipage))
+	if (IS_ERR(ipage)) {
+		*res_page = ipage;
 		return NULL;
+	}
 
 	namehash = f2fs_dentry_hash(&name);
 
@@ -306,25 +309,6 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
 	return de;
 }
 
-struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir,
-							struct page **p)
-{
-	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
-	struct page *ipage;
-	struct f2fs_dir_entry *de;
-	struct f2fs_inline_dentry *dentry_blk;
-
-	ipage = get_node_page(sbi, dir->i_ino);
-	if (IS_ERR(ipage))
-		return NULL;
-
-	dentry_blk = inline_data_addr(ipage);
-	de = &dentry_blk->dentry[1];
-	*p = ipage;
-	unlock_page(ipage);
-	return de;
-}
-
 int make_empty_inline_dir(struct inode *inode, struct inode *parent,
 							struct page *ipage)
 {
@@ -339,10 +323,8 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
 	set_page_dirty(ipage);
 
 	/* update i_size to MAX_INLINE_DATA */
-	if (i_size_read(inode) < MAX_INLINE_DATA) {
-		i_size_write(inode, MAX_INLINE_DATA);
-		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
-	}
+	if (i_size_read(inode) < MAX_INLINE_DATA)
+		f2fs_i_size_write(inode, MAX_INLINE_DATA);
 	return 0;
 }
 
@@ -391,22 +373,19 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
 					NR_INLINE_DENTRY * F2FS_SLOT_LEN);
 
 	kunmap_atomic(dentry_blk);
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 	set_page_dirty(page);
 
 	/* clear inline dir and flag after data writeback */
 	truncate_inline_inode(ipage, 0);
 
 	stat_dec_inline_dir(dir);
-	clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
+	clear_inode_flag(dir, FI_INLINE_DENTRY);
 
-	F2FS_I(dir)->i_current_depth = 1;
-	if (i_size_read(dir) < PAGE_SIZE) {
-		i_size_write(dir, PAGE_SIZE);
-		set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
-	}
-
-	sync_inode_page(&dn);
+	f2fs_i_depth_write(dir, 1);
+	if (i_size_read(dir) < PAGE_SIZE)
+		f2fs_i_size_write(dir, PAGE_SIZE);
 out:
 	f2fs_put_page(page, 1);
 	return err;
@@ -464,7 +443,6 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
 				struct f2fs_inline_dentry *inline_dentry)
 {
 	struct f2fs_inline_dentry *backup_dentry;
-	struct f2fs_inode_info *fi = F2FS_I(dir);
 	int err;
 
 	backup_dentry = f2fs_kmalloc(sizeof(struct f2fs_inline_dentry),
@@ -486,16 +464,15 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
 	lock_page(ipage);
 
 	stat_dec_inline_dir(dir);
-	clear_inode_flag(fi, FI_INLINE_DENTRY);
-	update_inode(dir, ipage);
+	clear_inode_flag(dir, FI_INLINE_DENTRY);
 	kfree(backup_dentry);
 	return 0;
 recover:
 	lock_page(ipage);
 	memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA);
-	fi->i_current_depth = 0;
-	i_size_write(dir, MAX_INLINE_DATA);
-	update_inode(dir, ipage);
+	f2fs_i_depth_write(dir, 0);
+	f2fs_i_size_write(dir, MAX_INLINE_DATA);
+	set_page_dirty(ipage);
 	f2fs_put_page(ipage, 1);
 
 	kfree(backup_dentry);
@@ -559,8 +536,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
 
 	/* we don't need to mark_inode_dirty now */
 	if (inode) {
-		F2FS_I(inode)->i_pino = dir->i_ino;
-		update_inode(inode, page);
+		f2fs_i_pino_write(inode, dir->i_ino);
 		f2fs_put_page(page, 1);
 	}
 
@@ -568,11 +544,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
 fail:
 	if (inode)
 		up_write(&F2FS_I(inode)->i_sem);
-
-	if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
-		update_inode(dir, ipage);
-		clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
-	}
 out:
 	f2fs_put_page(ipage, 1);
 	return err;
@@ -596,13 +567,13 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
 				&inline_dentry->dentry_bitmap);
 
 	set_page_dirty(page);
+	f2fs_put_page(page, 1);
 
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+	f2fs_mark_inode_dirty_sync(dir);
 
 	if (inode)
-		f2fs_drop_nlink(dir, inode, page);
-
-	f2fs_put_page(page, 1);
+		f2fs_drop_nlink(dir, inode);
 }
 
 bool f2fs_empty_inline_dir(struct inode *dir)
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2e68adab0d64..9ac5efc15347 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -18,6 +18,13 @@
 
 #include <trace/events/f2fs.h>
 
+void f2fs_mark_inode_dirty_sync(struct inode *inode)
+{
+	if (f2fs_inode_dirtied(inode))
+		return;
+	mark_inode_dirty_sync(inode);
+}
+
 void f2fs_set_inode_flags(struct inode *inode)
 {
 	unsigned int flags = F2FS_I(inode)->i_flags;
@@ -35,6 +42,7 @@ void f2fs_set_inode_flags(struct inode *inode)
 		new_fl |= S_DIRSYNC;
 	inode_set_flags(inode, new_fl,
 			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	f2fs_mark_inode_dirty_sync(inode);
 }
 
 static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -85,8 +93,8 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
 		if (*start++) {
 			f2fs_wait_on_page_writeback(ipage, NODE, true);
 
-			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
-			set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage));
+			set_inode_flag(inode, FI_DATA_EXIST);
+			set_raw_inline(inode, F2FS_INODE(ipage));
 			set_page_dirty(ipage);
 			return;
 		}
@@ -141,7 +149,7 @@ static int do_read_inode(struct inode *inode)
 	if (f2fs_init_extent_tree(inode, &ri->i_ext))
 		set_page_dirty(node_page);
 
-	get_inline_info(fi, ri);
+	get_inline_info(inode, ri);
 
 	/* check data exist */
 	if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
@@ -151,7 +159,10 @@ static int do_read_inode(struct inode *inode)
 	__get_inode_rdev(inode, ri);
 
 	if (__written_first_block(ri))
-		set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
+		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+
+	if (!need_inode_block_update(sbi, inode->i_ino))
+		fi->last_disk_size = inode->i_size;
 
 	f2fs_put_page(node_page, 1);
 
@@ -227,6 +238,8 @@ int update_inode(struct inode *inode, struct page *node_page)
 {
 	struct f2fs_inode *ri;
 
+	f2fs_inode_synced(inode);
+
 	f2fs_wait_on_page_writeback(node_page, NODE, true);
 
 	ri = F2FS_INODE(node_page);
@@ -244,7 +257,7 @@ int update_inode(struct inode *inode, struct page *node_page)
 							&ri->i_ext);
 	else
 		memset(&ri->i_ext, 0, sizeof(ri->i_ext));
-	set_raw_inline(F2FS_I(inode), ri);
+	set_raw_inline(inode, ri);
 
 	ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
 	ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
@@ -261,7 +274,6 @@ int update_inode(struct inode *inode, struct page *node_page)
 
 	__set_inode_rdev(inode, ri);
 	set_cold_node(inode, node_page);
-	clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
 
 	/* deleted inode */
 	if (inode->i_nlink == 0)
@@ -285,6 +297,7 @@ retry:
 		} else if (err != -ENOENT) {
 			f2fs_stop_checkpoint(sbi, false);
 		}
+		f2fs_inode_synced(inode);
 		return 0;
 	}
 	ret = update_inode(inode, node_page);
@@ -300,7 +313,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 			inode->i_ino == F2FS_META_INO(sbi))
 		return 0;
 
-	if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE))
+	if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
 		return 0;
 
 	/*
@@ -318,8 +331,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 void f2fs_evict_inode(struct inode *inode)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct f2fs_inode_info *fi = F2FS_I(inode);
-	nid_t xnid = fi->i_xattr_nid;
+	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
 	int err = 0;
 
 	/* some remained atomic pages should discarded */
@@ -341,12 +353,17 @@ void f2fs_evict_inode(struct inode *inode)
 	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	if (time_to_inject(FAULT_EVICT_INODE))
+		goto no_delete;
+#endif
+
 	sb_start_intwrite(inode->i_sb);
-	set_inode_flag(fi, FI_NO_ALLOC);
+	set_inode_flag(inode, FI_NO_ALLOC);
 	i_size_write(inode, 0);
 retry:
 	if (F2FS_HAS_BLOCKS(inode))
-		err = f2fs_truncate(inode, true);
+		err = f2fs_truncate(inode);
 
 	if (!err) {
 		f2fs_lock_op(sbi);
@@ -360,6 +377,8 @@ retry:
 		goto retry;
 	}
 
+	if (err)
+		update_inode_page(inode);
 	sb_end_intwrite(inode->i_sb);
 no_delete:
 	stat_dec_inline_xattr(inode);
@@ -369,13 +388,13 @@ no_delete:
 	invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
 	if (xnid)
 		invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
-	if (is_inode_flag_set(fi, FI_APPEND_WRITE))
+	if (is_inode_flag_set(inode, FI_APPEND_WRITE))
 		add_ino_entry(sbi, inode->i_ino, APPEND_INO);
-	if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
+	if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
 		add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
-	if (is_inode_flag_set(fi, FI_FREE_NID)) {
+	if (is_inode_flag_set(inode, FI_FREE_NID)) {
 		alloc_nid_failed(sbi, inode->i_ino);
-		clear_inode_flag(fi, FI_FREE_NID);
+		clear_inode_flag(inode, FI_FREE_NID);
 	}
 	f2fs_bug_on(sbi, err &&
 		!exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
@@ -407,11 +426,11 @@ void handle_failed_inode(struct inode *inode)
 			f2fs_msg(sbi->sb, KERN_WARNING,
 				"Too many orphan inodes, run fsck to fix.");
 		} else {
-			add_orphan_inode(sbi, inode->i_ino);
+			add_orphan_inode(inode);
 		}
 		alloc_nid_done(sbi, inode->i_ino);
 	} else {
-		set_inode_flag(F2FS_I(inode), FI_FREE_NID);
+		set_inode_flag(inode, FI_FREE_NID);
 	}
 
 	f2fs_unlock_op(sbi);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 324ed3812f30..73fa356f8fbb 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -60,10 +60,14 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 	if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
 		f2fs_set_encrypted_inode(inode);
 
+	set_inode_flag(inode, FI_NEW_INODE);
+
+	if (test_opt(sbi, INLINE_XATTR))
+		set_inode_flag(inode, FI_INLINE_XATTR);
 	if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
-		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+		set_inode_flag(inode, FI_INLINE_DATA);
 	if (f2fs_may_inline_dentry(inode))
-		set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
+		set_inode_flag(inode, FI_INLINE_DENTRY);
 
 	f2fs_init_extent_tree(inode, NULL);
 
@@ -72,14 +76,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 	stat_inc_inline_dir(inode);
 
 	trace_f2fs_new_inode(inode, 0);
-	mark_inode_dirty(inode);
 	return inode;
 
 fail:
 	trace_f2fs_new_inode(inode, err);
 	make_bad_inode(inode);
 	if (nid_free)
-		set_inode_flag(F2FS_I(inode), FI_FREE_NID);
+		set_inode_flag(inode, FI_FREE_NID);
 	iput(inode);
 	return ERR_PTR(err);
 }
@@ -177,7 +180,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 	inode->i_ctime = CURRENT_TIME;
 	ihold(inode);
 
-	set_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	set_inode_flag(inode, FI_INC_LINK);
 	f2fs_lock_op(sbi);
 	err = f2fs_add_link(dentry, inode);
 	if (err)
@@ -190,7 +193,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 		f2fs_sync_fs(sbi->sb, 1);
 	return 0;
 out:
-	clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	clear_inode_flag(inode, FI_INC_LINK);
 	iput(inode);
 	f2fs_unlock_op(sbi);
 	return err;
@@ -199,9 +202,13 @@ out:
 struct dentry *f2fs_get_parent(struct dentry *child)
 {
 	struct qstr dotdot = QSTR_INIT("..", 2);
-	unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot);
-	if (!ino)
+	struct page *page;
+	unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot, &page);
+	if (!ino) {
+		if (IS_ERR(page))
+			return ERR_CAST(page);
 		return ERR_PTR(-ENOENT);
+	}
 	return d_obtain_alias(f2fs_iget(child->d_sb, ino));
 }
 
@@ -229,6 +236,9 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
 	if (de) {
 		f2fs_dentry_kunmap(dir, page);
 		f2fs_put_page(page, 0);
+	} else if (IS_ERR(page)) {
+		err = PTR_ERR(page);
+		goto out;
 	} else {
 		err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
 		if (err)
@@ -239,14 +249,14 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
 	if (de) {
 		f2fs_dentry_kunmap(dir, page);
 		f2fs_put_page(page, 0);
+	} else if (IS_ERR(page)) {
+		err = PTR_ERR(page);
 	} else {
 		err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
 	}
 out:
-	if (!err) {
-		clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS);
-		mark_inode_dirty(dir);
-	}
+	if (!err)
+		clear_inode_flag(dir, FI_INLINE_DOTS);
 
 	f2fs_unlock_op(sbi);
 	return err;
@@ -281,8 +291,11 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(-ENAMETOOLONG);
 
 	de = f2fs_find_entry(dir, &dentry->d_name, &page);
-	if (!de)
+	if (!de) {
+		if (IS_ERR(page))
+			return (struct dentry *)page;
 		return d_splice_alias(inode, dentry);
+	}
 
 	ino = le32_to_cpu(de->ino);
 	f2fs_dentry_kunmap(dir, page);
@@ -329,8 +342,11 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 	trace_f2fs_unlink_enter(dir, dentry);
 
 	de = f2fs_find_entry(dir, &dentry->d_name, &page);
-	if (!de)
+	if (!de) {
+		if (IS_ERR(page))
+			err = PTR_ERR(page);
 		goto fail;
+	}
 
 	f2fs_balance_fs(sbi, true);
 
@@ -345,9 +361,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 	f2fs_delete_entry(de, page, dir, inode);
 	f2fs_unlock_op(sbi);
 
-	/* In order to evict this inode, we set it dirty */
-	mark_inode_dirty(inode);
-
 	if (IS_DIRSYNC(dir))
 		f2fs_sync_fs(sbi->sb, 1);
 fail:
@@ -492,7 +505,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	f2fs_balance_fs(sbi, true);
 
-	set_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	set_inode_flag(inode, FI_INC_LINK);
 	f2fs_lock_op(sbi);
 	err = f2fs_add_link(dentry, inode);
 	if (err)
@@ -509,7 +522,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return 0;
 
 out_fail:
-	clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	clear_inode_flag(inode, FI_INC_LINK);
 	handle_failed_inode(inode);
 	return err;
 }
@@ -592,17 +605,17 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
 	 * add this non-linked tmpfile to orphan list, in this way we could
 	 * remove all unused data of tmpfile after abnormal power-off.
 	 */
-	add_orphan_inode(sbi, inode->i_ino);
-	f2fs_unlock_op(sbi);
-
+	add_orphan_inode(inode);
 	alloc_nid_done(sbi, inode->i_ino);
 
 	if (whiteout) {
-		inode_dec_link_count(inode);
+		f2fs_i_links_write(inode, false);
 		*whiteout = inode;
 	} else {
 		d_tmpfile(dentry, inode);
 	}
+	/* link_count was changed by d_tmpfile as well. */
+	f2fs_unlock_op(sbi);
 	unlock_new_inode(inode);
 	return 0;
 
@@ -652,14 +665,19 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
-	if (!old_entry)
+	if (!old_entry) {
+		if (IS_ERR(old_page))
+			err = PTR_ERR(old_page);
 		goto out;
+	}
 
 	if (S_ISDIR(old_inode->i_mode)) {
-		err = -EIO;
 		old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page);
-		if (!old_dir_entry)
+		if (!old_dir_entry) {
+			if (IS_ERR(old_dir_page))
+				err = PTR_ERR(old_dir_page);
 			goto out_old;
+		}
 	}
 
 	if (flags & RENAME_WHITEOUT) {
@@ -677,8 +695,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		err = -ENOENT;
 		new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name,
 						&new_page);
-		if (!new_entry)
+		if (!new_entry) {
+			if (IS_ERR(new_page))
+				err = PTR_ERR(new_page);
 			goto out_whiteout;
+		}
 
 		f2fs_balance_fs(sbi, true);
 
@@ -700,19 +721,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		new_inode->i_ctime = CURRENT_TIME;
 		down_write(&F2FS_I(new_inode)->i_sem);
 		if (old_dir_entry)
-			drop_nlink(new_inode);
-		drop_nlink(new_inode);
+			f2fs_i_links_write(new_inode, false);
+		f2fs_i_links_write(new_inode, false);
 		up_write(&F2FS_I(new_inode)->i_sem);
 
-		mark_inode_dirty(new_inode);
-
 		if (!new_inode->i_nlink)
-			add_orphan_inode(sbi, new_inode->i_ino);
+			add_orphan_inode(new_inode);
 		else
 			release_orphan_inode(sbi);
-
-		update_inode_page(old_inode);
-		update_inode_page(new_inode);
 	} else {
 		f2fs_balance_fs(sbi, true);
 
@@ -724,10 +740,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			goto out_whiteout;
 		}
 
-		if (old_dir_entry) {
-			inc_nlink(new_dir);
-			update_inode_page(new_dir);
-		}
+		if (old_dir_entry)
+			f2fs_i_links_write(new_dir, true);
 
 		/*
 		 * old entry and new entry can locate in the same inline
@@ -743,7 +757,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			old_entry = f2fs_find_entry(old_dir,
 						&old_dentry->d_name, &old_page);
 			if (!old_entry) {
-				err = -EIO;
+				err = -ENOENT;
+				if (IS_ERR(old_page))
+					err = PTR_ERR(old_page);
 				f2fs_unlock_op(sbi);
 				goto out_whiteout;
 			}
@@ -757,13 +773,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	up_write(&F2FS_I(old_inode)->i_sem);
 
 	old_inode->i_ctime = CURRENT_TIME;
-	mark_inode_dirty(old_inode);
+	f2fs_mark_inode_dirty_sync(old_inode);
 
 	f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
 
 	if (whiteout) {
 		whiteout->i_state |= I_LINKABLE;
-		set_inode_flag(F2FS_I(whiteout), FI_INC_LINK);
+		set_inode_flag(whiteout, FI_INC_LINK);
 		err = f2fs_add_link(old_dentry, whiteout);
 		if (err)
 			goto put_out_dir;
@@ -775,14 +791,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (old_dir != new_dir && !whiteout) {
 			f2fs_set_link(old_inode, old_dir_entry,
 						old_dir_page, new_dir);
-			update_inode_page(old_inode);
 		} else {
 			f2fs_dentry_kunmap(old_inode, old_dir_page);
 			f2fs_put_page(old_dir_page, 0);
 		}
-		drop_nlink(old_dir);
-		mark_inode_dirty(old_dir);
-		update_inode_page(old_dir);
+		f2fs_i_links_write(old_dir, false);
 	}
 
 	f2fs_unlock_op(sbi);
@@ -832,29 +845,39 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		return -EPERM;
 
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
-	if (!old_entry)
+	if (!old_entry) {
+		if (IS_ERR(old_page))
+			err = PTR_ERR(old_page);
 		goto out;
+	}
 
 	new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
-	if (!new_entry)
+	if (!new_entry) {
+		if (IS_ERR(new_page))
+			err = PTR_ERR(new_page);
 		goto out_old;
+	}
 
 	/* prepare for updating ".." directory entry info later */
 	if (old_dir != new_dir) {
 		if (S_ISDIR(old_inode->i_mode)) {
-			err = -EIO;
 			old_dir_entry = f2fs_parent_dir(old_inode,
 							&old_dir_page);
-			if (!old_dir_entry)
+			if (!old_dir_entry) {
+				if (IS_ERR(old_dir_page))
+					err = PTR_ERR(old_dir_page);
 				goto out_new;
+			}
 		}
 
 		if (S_ISDIR(new_inode->i_mode)) {
-			err = -EIO;
 			new_dir_entry = f2fs_parent_dir(new_inode,
 							&new_dir_page);
-			if (!new_dir_entry)
+			if (!new_dir_entry) {
+				if (IS_ERR(new_dir_page))
+					err = PTR_ERR(new_dir_page);
 				goto out_old_dir;
+			}
 		}
 	}
 
@@ -904,19 +927,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	file_lost_pino(old_inode);
 	up_write(&F2FS_I(old_inode)->i_sem);
 
-	update_inode_page(old_inode);
-
 	old_dir->i_ctime = CURRENT_TIME;
 	if (old_nlink) {
 		down_write(&F2FS_I(old_dir)->i_sem);
-		if (old_nlink < 0)
-			drop_nlink(old_dir);
-		else
-			inc_nlink(old_dir);
+		f2fs_i_links_write(old_dir, old_nlink > 0);
 		up_write(&F2FS_I(old_dir)->i_sem);
 	}
-	mark_inode_dirty(old_dir);
-	update_inode_page(old_dir);
+	f2fs_mark_inode_dirty_sync(old_dir);
 
 	/* update directory entry info of new dir inode */
 	f2fs_set_link(new_dir, new_entry, new_page, old_inode);
@@ -925,19 +942,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	file_lost_pino(new_inode);
 	up_write(&F2FS_I(new_inode)->i_sem);
 
-	update_inode_page(new_inode);
-
 	new_dir->i_ctime = CURRENT_TIME;
 	if (new_nlink) {
 		down_write(&F2FS_I(new_dir)->i_sem);
-		if (new_nlink < 0)
-			drop_nlink(new_dir);
-		else
-			inc_nlink(new_dir);
+		f2fs_i_links_write(new_dir, new_nlink > 0);
 		up_write(&F2FS_I(new_dir)->i_sem);
 	}
-	mark_inode_dirty(new_dir);
-	update_inode_page(new_dir);
+	f2fs_mark_inode_dirty_sync(new_dir);
 
 	f2fs_unlock_op(sbi);
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1f21aae80c40..b2fa4b615925 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -52,6 +52,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
 							PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
+		if (excess_cached_nats(sbi))
+			res = false;
+		if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD)
+			res = false;
 	} else if (type == DIRTY_DENTS) {
 		if (sbi->sb->s_bdi->wb.dirty_exceeded)
 			return false;
@@ -202,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
 	struct nat_entry *e;
 	bool need = false;
 
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		if (!get_nat_flag(e, IS_CHECKPOINTED) &&
 				!get_nat_flag(e, HAS_FSYNCED_INODE))
 			need = true;
 	}
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 	return need;
 }
 
@@ -219,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
 	struct nat_entry *e;
 	bool is_cp = true;
 
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
 		is_cp = false;
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 	return is_cp;
 }
 
@@ -233,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
 	struct nat_entry *e;
 	bool need_update = true;
 
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ino);
 	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
 			(get_nat_flag(e, IS_CHECKPOINTED) ||
 			 get_nat_flag(e, HAS_FSYNCED_INODE)))
 		need_update = false;
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 	return need_update;
 }
 
@@ -280,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *e;
 
-	down_write(&nm_i->nat_tree_lock);
+	percpu_down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ni->nid);
 	if (!e) {
 		e = grab_nat_entry(nm_i, ni->nid);
@@ -330,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 			set_nat_flag(e, HAS_FSYNCED_INODE, true);
 		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
 	}
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 }
 
 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -338,8 +342,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	int nr = nr_shrink;
 
-	if (!down_write_trylock(&nm_i->nat_tree_lock))
-		return 0;
+	percpu_down_write(&nm_i->nat_tree_lock);
 
 	while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
 		struct nat_entry *ne;
@@ -348,7 +351,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
 		__del_from_nat_cache(nm_i, ne);
 		nr_shrink--;
 	}
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 	return nr - nr_shrink;
 }
 
@@ -370,13 +373,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
 	ni->nid = nid;
 
 	/* Check nat cache */
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		ni->ino = nat_get_ino(e);
 		ni->blk_addr = nat_get_blkaddr(e);
 		ni->version = nat_get_version(e);
-		up_read(&nm_i->nat_tree_lock);
+		percpu_up_read(&nm_i->nat_tree_lock);
 		return;
 	}
 
@@ -400,11 +403,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
 	node_info_from_raw_nat(ni, &ne);
 	f2fs_put_page(page, 1);
 cache:
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 	/* cache nat entry */
-	down_write(&nm_i->nat_tree_lock);
+	percpu_down_write(&nm_i->nat_tree_lock);
 	cache_nat_entry(sbi, nid, &ne);
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -646,6 +649,7 @@ release_out:
 	if (err == -ENOENT) {
 		dn->cur_level = i;
 		dn->max_level = level;
+		dn->ofs_in_node = offset[level];
 	}
 	return err;
 }
@@ -670,8 +674,7 @@ static void truncate_node(struct dnode_of_data *dn)
 	if (dn->nid == dn->inode->i_ino) {
 		remove_orphan_inode(sbi, dn->nid);
 		dec_valid_inode_count(sbi);
-	} else {
-		sync_inode_page(dn);
+		f2fs_inode_synced(dn->inode);
 	}
 invalidate:
 	clear_node_page_dirty(dn->node_page);
@@ -953,7 +956,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
 	if (IS_ERR(npage))
 		return PTR_ERR(npage);
 
-	F2FS_I(inode)->i_xattr_nid = 0;
+	f2fs_i_xnid_write(inode, 0);
 
 	/* need to do checkpoint during fsync */
 	F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
@@ -1019,7 +1022,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
 	struct page *page;
 	int err;
 
-	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 		return ERR_PTR(-EPERM);
 
 	page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
@@ -1042,21 +1045,16 @@ struct page *new_node_page(struct dnode_of_data *dn,
 	f2fs_wait_on_page_writeback(page, NODE, true);
 	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
 	set_cold_node(dn->inode, page);
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 	if (set_page_dirty(page))
 		dn->node_changed = true;
 
 	if (f2fs_has_xattr_block(ofs))
-		F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
+		f2fs_i_xnid_write(dn->inode, dn->nid);
 
-	dn->node_page = page;
-	if (ipage)
-		update_inode(dn->inode, ipage);
-	else
-		sync_inode_page(dn);
 	if (ofs == 0)
 		inc_valid_inode_count(sbi);
-
 	return page;
 
 fail:
@@ -1070,18 +1068,22 @@ fail:
  * 0: f2fs_put_page(page, 0)
  * LOCKED_PAGE or error: f2fs_put_page(page, 1)
  */
-static int read_node_page(struct page *page, int rw)
+static int read_node_page(struct page *page, int op_flags)
 {
 	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
 	struct node_info ni;
 	struct f2fs_io_info fio = {
 		.sbi = sbi,
 		.type = NODE,
-		.rw = rw,
+		.op = REQ_OP_READ,
+		.op_flags = op_flags,
 		.page = page,
 		.encrypted_page = NULL,
 	};
 
+	if (PageUptodate(page))
+		return LOCKED_PAGE;
+
 	get_node_info(sbi, page->index, &ni);
 
 	if (unlikely(ni.blk_addr == NULL_ADDR)) {
@@ -1089,9 +1091,6 @@ static int read_node_page(struct page *page, int rw)
 		return -ENOENT;
 	}
 
-	if (PageUptodate(page))
-		return LOCKED_PAGE;
-
 	fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
 	return f2fs_submit_page_bio(&fio);
 }
@@ -1118,7 +1117,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
 	if (!apage)
 		return;
 
-	err = read_node_page(apage, READA);
+	err = read_node_page(apage, REQ_RAHEAD);
 	f2fs_put_page(apage, err ? 1 : 0);
 }
 
@@ -1149,16 +1148,21 @@ repeat:
 
 	lock_page(page);
 
-	if (unlikely(!PageUptodate(page))) {
-		f2fs_put_page(page, 1);
-		return ERR_PTR(-EIO);
-	}
 	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
 		f2fs_put_page(page, 1);
 		goto repeat;
 	}
+
+	if (unlikely(!PageUptodate(page)))
+		goto out_err;
 page_hit:
-	f2fs_bug_on(sbi, nid != nid_of_node(page));
+	if(unlikely(nid != nid_of_node(page))) {
+		f2fs_bug_on(sbi, 1);
+		ClearPageUptodate(page);
+out_err:
+		f2fs_put_page(page, 1);
+		return ERR_PTR(-EIO);
+	}
 	return page;
 }
 
@@ -1175,24 +1179,6 @@ struct page *get_node_page_ra(struct page *parent, int start)
 	return __get_node_page(sbi, nid, parent, start);
 }
 
-void sync_inode_page(struct dnode_of_data *dn)
-{
-	int ret = 0;
-
-	if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
-		ret = update_inode(dn->inode, dn->node_page);
-	} else if (dn->inode_page) {
-		if (!dn->inode_page_locked)
-			lock_page(dn->inode_page);
-		ret = update_inode(dn->inode, dn->inode_page);
-		if (!dn->inode_page_locked)
-			unlock_page(dn->inode_page);
-	} else {
-		ret = update_inode_page(dn->inode);
-	}
-	dn->node_changed = ret ? true: false;
-}
-
 static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
 {
 	struct inode *inode;
@@ -1318,7 +1304,7 @@ continue_unlock:
 	return last_page;
 }
 
-int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
+int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			struct writeback_control *wbc, bool atomic)
 {
 	pgoff_t index, end;
@@ -1326,6 +1312,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
 	int ret = 0;
 	struct page *last_page = NULL;
 	bool marked = false;
+	nid_t ino = inode->i_ino;
 
 	if (atomic) {
 		last_page = last_fsync_dnode(sbi, ino);
@@ -1379,9 +1366,13 @@ continue_unlock:
 
 			if (!atomic || page == last_page) {
 				set_fsync_mark(page, 1);
-				if (IS_INODE(page))
+				if (IS_INODE(page)) {
+					if (is_inode_flag_set(inode,
+								FI_DIRTY_INODE))
+						update_inode(inode, page);
 					set_dentry_mark(page,
 						need_dentry_mark(sbi, ino));
+				}
 				/*  may be written by other thread */
 				if (!PageDirty(page))
 					set_page_dirty(page);
@@ -1568,7 +1559,8 @@ static int f2fs_write_node_page(struct page *page,
 	struct f2fs_io_info fio = {
 		.sbi = sbi,
 		.type = NODE,
-		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+		.op = REQ_OP_WRITE,
+		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
 		.page = page,
 		.encrypted_page = NULL,
 	};
@@ -1628,6 +1620,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
 			    struct writeback_control *wbc)
 {
 	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+	struct blk_plug plug;
 	long diff;
 
 	/* balancing f2fs's metadata in background */
@@ -1641,7 +1634,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
 
 	diff = nr_pages_to_write(sbi, NODE, wbc);
 	wbc->sync_mode = WB_SYNC_NONE;
+	blk_start_plug(&plug);
 	sync_node_pages(sbi, wbc);
+	blk_finish_plug(&plug);
 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
 	return 0;
 
@@ -1655,9 +1650,10 @@ static int f2fs_set_node_page_dirty(struct page *page)
 {
 	trace_f2fs_set_page_dirty(page, NODE);
 
-	SetPageUptodate(page);
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
 	if (!PageDirty(page)) {
-		__set_page_dirty_nobuffers(page);
+		f2fs_set_page_dirty_nobuffers(page);
 		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
 		SetPagePrivate(page);
 		f2fs_trace_pid(page);
@@ -1776,7 +1772,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	}
 }
 
-static void build_free_nids(struct f2fs_sb_info *sbi)
+void build_free_nids(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1785,14 +1781,14 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
 	nid_t nid = nm_i->next_scan_nid;
 
 	/* Enough entries */
-	if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
+	if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK)
 		return;
 
 	/* readahead nat pages to be scanned */
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
 
-	down_read(&nm_i->nat_tree_lock);
+	percpu_down_read(&nm_i->nat_tree_lock);
 
 	while (1) {
 		struct page *page = get_current_nat_page(sbi, nid);
@@ -1824,7 +1820,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
 			remove_free_nid(nm_i, nid);
 	}
 	up_read(&curseg->journal_rwsem);
-	up_read(&nm_i->nat_tree_lock);
+	percpu_up_read(&nm_i->nat_tree_lock);
 
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
 					nm_i->ra_nid_pages, META_NAT, false);
@@ -1923,12 +1919,15 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
 	struct free_nid *i, *next;
 	int nr = nr_shrink;
 
+	if (nm_i->fcnt <= MAX_FREE_NIDS)
+		return 0;
+
 	if (!mutex_trylock(&nm_i->build_lock))
 		return 0;
 
 	spin_lock(&nm_i->free_nid_list_lock);
 	list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
-		if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK)
+		if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS)
 			break;
 		if (i->state == NID_ALLOC)
 			continue;
@@ -1955,7 +1954,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
 
 	ri = F2FS_INODE(page);
 	if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
-		clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
+		clear_inode_flag(inode, FI_INLINE_XATTR);
 		goto update_inode;
 	}
 
@@ -1997,13 +1996,11 @@ recover_xnid:
 	get_node_info(sbi, new_xnid, &ni);
 	ni.ino = inode->i_ino;
 	set_node_addr(sbi, &ni, NEW_ADDR, false);
-	F2FS_I(inode)->i_xattr_nid = new_xnid;
+	f2fs_i_xnid_write(inode, new_xnid);
 
 	/* 3: update xattr blkaddr */
 	refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
 	set_node_addr(sbi, &ni, blkaddr, false);
-
-	update_inode_page(inode);
 }
 
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2025,7 +2022,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
 	/* Should not use this inode from free nid list */
 	remove_free_nid(NM_I(sbi), ino);
 
-	SetPageUptodate(ipage);
+	if (!PageUptodate(ipage))
+		SetPageUptodate(ipage);
 	fill_node_footer(ipage, ino, ino, 0, true);
 
 	src = F2FS_INODE(page);
@@ -2211,7 +2209,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	if (!nm_i->dirty_nat_cnt)
 		return;
 
-	down_write(&nm_i->nat_tree_lock);
+	percpu_down_write(&nm_i->nat_tree_lock);
 
 	/*
 	 * if there are no enough space in journal to store dirty nat
@@ -2234,7 +2232,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	list_for_each_entry_safe(set, tmp, &sets, set_list)
 		__flush_nat_entry_set(sbi, set);
 
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 
 	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
@@ -2270,7 +2268,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 
 	mutex_init(&nm_i->build_lock);
 	spin_lock_init(&nm_i->free_nid_list_lock);
-	init_rwsem(&nm_i->nat_tree_lock);
+	if (percpu_init_rwsem(&nm_i->nat_tree_lock))
+		return -ENOMEM;
 
 	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
 	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2327,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	spin_unlock(&nm_i->free_nid_list_lock);
 
 	/* destroy nat cache */
-	down_write(&nm_i->nat_tree_lock);
+	percpu_down_write(&nm_i->nat_tree_lock);
 	while ((found = __gang_lookup_nat_cache(nm_i,
 					nid, NATVEC_SIZE, natvec))) {
 		unsigned idx;
@@ -2352,8 +2351,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 			kmem_cache_free(nat_entry_set_slab, setvec[idx]);
 		}
 	}
-	up_write(&nm_i->nat_tree_lock);
+	percpu_up_write(&nm_i->nat_tree_lock);
 
+	percpu_free_rwsem(&nm_i->nat_tree_lock);
 	kfree(nm_i->nat_bitmap);
 	sbi->nm_info = NULL;
 	kfree(nm_i);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 1f4f9d4569d9..fc7684554b1a 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -15,18 +15,21 @@
 #define	NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
 
 /* # of pages to perform synchronous readahead before building free nids */
-#define FREE_NID_PAGES 4
+#define FREE_NID_PAGES	8
+#define MAX_FREE_NIDS	(NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
 
-#define DEF_RA_NID_PAGES	4	/* # of nid pages to be readaheaded */
+#define DEF_RA_NID_PAGES	0	/* # of nid pages to be readaheaded */
 
 /* maximum readahead size for node during getting data blocks */
 #define MAX_RA_NODE		128
 
 /* control the memory footprint threshold (10MB per 1GB ram) */
-#define DEF_RAM_THRESHOLD	10
+#define DEF_RAM_THRESHOLD	1
 
 /* control dirty nats ratio threshold (default: 10% over max nid count) */
 #define DEF_DIRTY_NAT_RATIO_THRESHOLD		10
+/* control total # of nats */
+#define DEF_NAT_CACHE_THRESHOLD			100000
 
 /* vector size for gang look-up from nat cache that consists of radix tree */
 #define NATVEC_SIZE	64
@@ -126,6 +129,11 @@ static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
 					NM_I(sbi)->dirty_nats_ratio / 100;
 }
 
+static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
+{
+	return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
+}
+
 enum mem_type {
 	FREE_NIDS,	/* indicates the free nid list */
 	NAT_ENTRIES,	/* indicates the cached nat entry */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 3d7216d7a288..9e652d5a659b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -153,9 +153,12 @@ retry:
 		f2fs_delete_entry(de, page, dir, einode);
 		iput(einode);
 		goto retry;
+	} else if (IS_ERR(page)) {
+		err = PTR_ERR(page);
+	} else {
+		err = __f2fs_add_link(dir, &name, inode,
+					inode->i_ino, inode->i_mode);
 	}
-	err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
-
 	goto out;
 
 out_unmap_put:
@@ -175,7 +178,7 @@ static void recover_inode(struct inode *inode, struct page *page)
 	char *name;
 
 	inode->i_mode = le16_to_cpu(raw->i_mode);
-	i_size_write(inode, le64_to_cpu(raw->i_size));
+	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
 	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
 	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
 	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
@@ -455,6 +458,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 			continue;
 		}
 
+		if ((start + 1) << PAGE_SHIFT > i_size_read(inode))
+			f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
+
 		/*
 		 * dest is reserved block, invalidate src block
 		 * and then reserve one new block in dnode page.
@@ -476,6 +482,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 #endif
 				/* We should not get -ENOSPC */
 				f2fs_bug_on(sbi, err);
+				if (err)
+					goto err;
 			}
 
 			/* Check the previous node page having this index */
@@ -490,9 +498,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 		}
 	}
 
-	if (IS_INODE(dn.node_page))
-		sync_inode_page(&dn);
-
 	copy_node_footer(dn.node_page, page);
 	fill_node_footer(dn.node_page, dn.nid, ni.ino,
 					ofs_of_node(page), false);
@@ -624,8 +629,12 @@ out:
 	if (err) {
 		bool invalidate = false;
 
-		if (discard_next_dnode(sbi, blkaddr))
+		if (test_opt(sbi, LFS)) {
+			update_meta_page(sbi, NULL, blkaddr);
+			invalidate = true;
+		} else if (discard_next_dnode(sbi, blkaddr)) {
 			invalidate = true;
+		}
 
 		/* Flush all the NAT/SIT pages */
 		while (get_pages(sbi, F2FS_DIRTY_META))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 2e6f537a0e7d..a46296f57b02 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -241,7 +241,7 @@ void drop_inmem_pages(struct inode *inode)
 {
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 
-	clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+	clear_inode_flag(inode, FI_ATOMIC_FILE);
 
 	mutex_lock(&fi->inmem_lock);
 	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
@@ -257,7 +257,8 @@ static int __commit_inmem_pages(struct inode *inode,
 	struct f2fs_io_info fio = {
 		.sbi = sbi,
 		.type = DATA,
-		.rw = WRITE_SYNC | REQ_PRIO,
+		.op = REQ_OP_WRITE,
+		.op_flags = WRITE_SYNC | REQ_PRIO,
 		.encrypted_page = NULL,
 	};
 	bool submit_bio = false;
@@ -345,6 +346,11 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 {
 	if (!need)
 		return;
+
+	/* balance_fs_bg is able to be pending */
+	if (excess_cached_nats(sbi))
+		f2fs_balance_fs_bg(sbi);
+
 	/*
 	 * We should do GC or end up with checkpoint, if there are so many dirty
 	 * dir/node pages without enough free segments.
@@ -366,7 +372,9 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 		try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
 
 	if (!available_free_memory(sbi, FREE_NIDS))
-		try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES);
+		try_to_free_nids(sbi, MAX_FREE_NIDS);
+	else
+		build_free_nids(sbi);
 
 	/* checkpoint is the only way to shrink partial cached entries */
 	if (!available_free_memory(sbi, NAT_ENTRIES) ||
@@ -406,7 +414,8 @@ repeat:
 		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
 
 		bio->bi_bdev = sbi->sb->s_bdev;
-		ret = submit_bio_wait(WRITE_FLUSH, bio);
+		bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+		ret = submit_bio_wait(bio);
 
 		llist_for_each_entry_safe(cmd, next,
 					  fcc->dispatch_list, llnode) {
@@ -433,24 +442,29 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
 	if (test_opt(sbi, NOBARRIER))
 		return 0;
 
-	if (!test_opt(sbi, FLUSH_MERGE)) {
+	if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
 		struct bio *bio = f2fs_bio_alloc(0);
 		int ret;
 
+		atomic_inc(&fcc->submit_flush);
 		bio->bi_bdev = sbi->sb->s_bdev;
-		ret = submit_bio_wait(WRITE_FLUSH, bio);
+		bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+		ret = submit_bio_wait(bio);
+		atomic_dec(&fcc->submit_flush);
 		bio_put(bio);
 		return ret;
 	}
 
 	init_completion(&cmd.wait);
 
+	atomic_inc(&fcc->submit_flush);
 	llist_add(&cmd.llnode, &fcc->issue_list);
 
 	if (!fcc->dispatch_list)
 		wake_up(&fcc->flush_wait_queue);
 
 	wait_for_completion(&cmd.wait);
+	atomic_dec(&fcc->submit_flush);
 
 	return cmd.ret;
 }
@@ -464,6 +478,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 	fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
 	if (!fcc)
 		return -ENOMEM;
+	atomic_set(&fcc->submit_flush, 0);
 	init_waitqueue_head(&fcc->flush_wait_queue);
 	init_llist_head(&fcc->issue_list);
 	SM_I(sbi)->cmd_control_info = fcc;
@@ -665,6 +680,10 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			break;
 
 		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
+		if (force && start && end != max_blocks
+					&& (end - start) < cpc->trim_minlen)
+			continue;
+
 		__add_discard_entry(sbi, cpc, se, start, end);
 	}
 }
@@ -702,6 +721,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
 	unsigned int start = 0, end = -1;
+	unsigned int secno, start_segno;
+	bool force = (cpc->reason == CP_DISCARD);
 
 	mutex_lock(&dirty_i->seglist_lock);
 
@@ -718,17 +739,31 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 		dirty_i->nr_dirty[PRE] -= end - start;
 
-		if (!test_opt(sbi, DISCARD))
+		if (force || !test_opt(sbi, DISCARD))
 			continue;
 
-		f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
+		if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
+			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
 				(end - start) << sbi->log_blocks_per_seg);
+			continue;
+		}
+next:
+		secno = GET_SECNO(sbi, start);
+		start_segno = secno * sbi->segs_per_sec;
+		if (!IS_CURSEC(sbi, secno) &&
+			!get_valid_blocks(sbi, start, sbi->segs_per_sec))
+			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
+				sbi->segs_per_sec << sbi->log_blocks_per_seg);
+
+		start = start_segno + sbi->segs_per_sec;
+		if (start < end)
+			goto next;
 	}
 	mutex_unlock(&dirty_i->seglist_lock);
 
 	/* send small discards */
 	list_for_each_entry_safe(entry, this, head, list) {
-		if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen)
+		if (force && entry->len < cpc->trim_minlen)
 			goto skip;
 		f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
 		cpc->trimmed += entry->len;
@@ -1216,6 +1251,9 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
 {
 	int i;
 
+	if (test_opt(sbi, LFS))
+		return;
+
 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
 		__allocate_new_segments(sbi, i);
 }
@@ -1389,11 +1427,17 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 {
 	int type = __get_segment_type(fio->page, fio->type);
 
+	if (fio->type == NODE || fio->type == DATA)
+		mutex_lock(&fio->sbi->wio_mutex[fio->type]);
+
 	allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
 					&fio->new_blkaddr, sum, type);
 
 	/* writeout dirty page into bdev */
 	f2fs_submit_page_mbio(fio);
+
+	if (fio->type == NODE || fio->type == DATA)
+		mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
 }
 
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -1401,7 +1445,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
 	struct f2fs_io_info fio = {
 		.sbi = sbi,
 		.type = META,
-		.rw = WRITE_SYNC | REQ_META | REQ_PRIO,
+		.op = REQ_OP_WRITE,
+		.op_flags = WRITE_SYNC | REQ_META | REQ_PRIO,
 		.old_blkaddr = page->index,
 		.new_blkaddr = page->index,
 		.page = page,
@@ -1409,7 +1454,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
 	};
 
 	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
-		fio.rw &= ~REQ_META;
+		fio.op_flags &= ~REQ_META;
 
 	set_page_writeback(page);
 	f2fs_submit_page_mbio(&fio);
@@ -2373,7 +2418,11 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
 	sm_info->rec_prefree_segments = sm_info->main_segments *
 					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
-	sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
+	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
+		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
+
+	if (!test_opt(sbi, LFS))
+		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7a756ff5a36d..b33f73ec60a4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -16,6 +16,7 @@
 #define NULL_SECNO			((unsigned int)(~0))
 
 #define DEF_RECLAIM_PREFREE_SEGMENTS	5	/* 5% over total segments */
+#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS	4096	/* 8GB in maximum */
 
 /* L: Logical segment # in volume, R: Relative segment # in main area */
 #define GET_L2R_SEGNO(free_i, segno)	(segno - free_i->start_segno)
@@ -470,6 +471,10 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
 {
 	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
 	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
+
+	if (test_opt(sbi, LFS))
+		return false;
+
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs +
 						reserved_sections(sbi) + 1);
 }
@@ -479,6 +484,8 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
 	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
 	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
 
+	node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
+
 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
 		return false;
 
@@ -531,6 +538,9 @@ static inline bool need_inplace_update(struct inode *inode)
 	if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
 		return false;
 
+	if (test_opt(sbi, LFS))
+		return false;
+
 	if (policy & (0x1 << F2FS_IPU_FORCE))
 		return true;
 	if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
@@ -544,7 +554,7 @@ static inline bool need_inplace_update(struct inode *inode)
 
 	/* this is only set during fdatasync */
 	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
-			is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
+			is_inode_flag_set(inode, FI_NEED_IPU))
 		return true;
 
 	return false;
@@ -706,9 +716,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
 	if (type == DATA)
 		return sbi->blocks_per_seg;
 	else if (type == NODE)
-		return 3 * sbi->blocks_per_seg;
+		return 8 * sbi->blocks_per_seg;
 	else if (type == META)
-		return MAX_BIO_BLOCKS(sbi);
+		return 8 * MAX_BIO_BLOCKS(sbi);
 	else
 		return 0;
 }
@@ -726,10 +736,8 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
 
 	nr_to_write = wbc->nr_to_write;
 
-	if (type == DATA)
-		desired = 4096;
-	else if (type == NODE)
-		desired = 3 * max_hw_blocks(sbi);
+	if (type == NODE)
+		desired = 2 * max_hw_blocks(sbi);
 	else
 		desired = MAX_BIO_BLOCKS(sbi);
 
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 93606f281bf9..46c915425923 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -13,6 +13,7 @@
 #include <linux/f2fs_fs.h>
 
 #include "f2fs.h"
+#include "node.h"
 
 static LIST_HEAD(f2fs_list);
 static DEFINE_SPINLOCK(f2fs_list_lock);
@@ -25,8 +26,8 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
 
 static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
 {
-	if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK)
-		return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK;
+	if (NM_I(sbi)->fcnt > MAX_FREE_NIDS)
+		return NM_I(sbi)->fcnt - MAX_FREE_NIDS;
 	return 0;
 }
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 74cc8520b8b1..b97c065cbe74 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = {
 	[FAULT_ORPHAN]		= "orphan",
 	[FAULT_BLOCK]		= "no more block",
 	[FAULT_DIR_DEPTH]	= "too big dir depth",
+	[FAULT_EVICT_INODE]	= "evict_inode fail",
 };
 
 static void f2fs_build_fault_attr(unsigned int rate)
@@ -75,6 +76,7 @@ enum {
 	Opt_disable_roll_forward,
 	Opt_norecovery,
 	Opt_discard,
+	Opt_nodiscard,
 	Opt_noheap,
 	Opt_user_xattr,
 	Opt_nouser_xattr,
@@ -86,13 +88,17 @@ enum {
 	Opt_inline_data,
 	Opt_inline_dentry,
 	Opt_flush_merge,
+	Opt_noflush_merge,
 	Opt_nobarrier,
 	Opt_fastboot,
 	Opt_extent_cache,
 	Opt_noextent_cache,
 	Opt_noinline_data,
 	Opt_data_flush,
+	Opt_mode,
 	Opt_fault_injection,
+	Opt_lazytime,
+	Opt_nolazytime,
 	Opt_err,
 };
 
@@ -101,6 +107,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_disable_roll_forward, "disable_roll_forward"},
 	{Opt_norecovery, "norecovery"},
 	{Opt_discard, "discard"},
+	{Opt_nodiscard, "nodiscard"},
 	{Opt_noheap, "no_heap"},
 	{Opt_user_xattr, "user_xattr"},
 	{Opt_nouser_xattr, "nouser_xattr"},
@@ -112,13 +119,17 @@ static match_table_t f2fs_tokens = {
 	{Opt_inline_data, "inline_data"},
 	{Opt_inline_dentry, "inline_dentry"},
 	{Opt_flush_merge, "flush_merge"},
+	{Opt_noflush_merge, "noflush_merge"},
 	{Opt_nobarrier, "nobarrier"},
 	{Opt_fastboot, "fastboot"},
 	{Opt_extent_cache, "extent_cache"},
 	{Opt_noextent_cache, "noextent_cache"},
 	{Opt_noinline_data, "noinline_data"},
 	{Opt_data_flush, "data_flush"},
+	{Opt_mode, "mode=%s"},
 	{Opt_fault_injection, "fault_injection=%u"},
+	{Opt_lazytime, "lazytime"},
+	{Opt_nolazytime, "nolazytime"},
 	{Opt_err, NULL},
 };
 
@@ -417,6 +428,8 @@ static int parse_options(struct super_block *sb, char *options)
 					"the device does not support discard");
 			}
 			break;
+		case Opt_nodiscard:
+			clear_opt(sbi, DISCARD);
 		case Opt_noheap:
 			set_opt(sbi, NOHEAP);
 			break;
@@ -478,6 +491,9 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_flush_merge:
 			set_opt(sbi, FLUSH_MERGE);
 			break;
+		case Opt_noflush_merge:
+			clear_opt(sbi, FLUSH_MERGE);
+			break;
 		case Opt_nobarrier:
 			set_opt(sbi, NOBARRIER);
 			break;
@@ -496,6 +512,23 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_data_flush:
 			set_opt(sbi, DATA_FLUSH);
 			break;
+		case Opt_mode:
+			name = match_strdup(&args[0]);
+
+			if (!name)
+				return -ENOMEM;
+			if (strlen(name) == 8 &&
+					!strncmp(name, "adaptive", 8)) {
+				set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
+			} else if (strlen(name) == 3 &&
+					!strncmp(name, "lfs", 3)) {
+				set_opt_mode(sbi, F2FS_MOUNT_LFS);
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 		case Opt_fault_injection:
 			if (args->from && match_int(args, &arg))
 				return -EINVAL;
@@ -506,6 +539,12 @@ static int parse_options(struct super_block *sb, char *options)
 				"FAULT_INJECTION was not selected");
 #endif
 			break;
+		case Opt_lazytime:
+			sb->s_flags |= MS_LAZYTIME;
+			break;
+		case Opt_nolazytime:
+			sb->s_flags &= ~MS_LAZYTIME;
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -537,13 +576,11 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	fi->i_advise = 0;
 	init_rwsem(&fi->i_sem);
 	INIT_LIST_HEAD(&fi->dirty_list);
+	INIT_LIST_HEAD(&fi->gdirty_list);
 	INIT_LIST_HEAD(&fi->inmem_pages);
 	mutex_init(&fi->inmem_lock);
-
-	set_inode_flag(fi, FI_NEW_INODE);
-
-	if (test_opt(F2FS_SB(sb), INLINE_XATTR))
-		set_inode_flag(fi, FI_INLINE_XATTR);
+	init_rwsem(&fi->dio_rwsem[READ]);
+	init_rwsem(&fi->dio_rwsem[WRITE]);
 
 	/* Will be used by directory only */
 	fi->i_dir_level = F2FS_SB(sb)->dir_level;
@@ -559,7 +596,7 @@ static int f2fs_drop_inode(struct inode *inode)
 	 *    - f2fs_gc -> iput -> evict
 	 *       - inode_wait_for_writeback(inode)
 	 */
-	if (!inode_unhashed(inode) && inode->i_state & I_SYNC) {
+	if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) {
 		if (!inode->i_nlink && !is_bad_inode(inode)) {
 			/* to avoid evict_inode call simultaneously */
 			atomic_inc(&inode->i_count);
@@ -573,10 +610,10 @@ static int f2fs_drop_inode(struct inode *inode)
 			f2fs_destroy_extent_node(inode);
 
 			sb_start_intwrite(inode->i_sb);
-			i_size_write(inode, 0);
+			f2fs_i_size_write(inode, 0);
 
 			if (F2FS_HAS_BLOCKS(inode))
-				f2fs_truncate(inode, true);
+				f2fs_truncate(inode);
 
 			sb_end_intwrite(inode->i_sb);
 
@@ -586,9 +623,47 @@ static int f2fs_drop_inode(struct inode *inode)
 		}
 		return 0;
 	}
+
 	return generic_drop_inode(inode);
 }
 
+int f2fs_inode_dirtied(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	spin_lock(&sbi->inode_lock[DIRTY_META]);
+	if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
+		spin_unlock(&sbi->inode_lock[DIRTY_META]);
+		return 1;
+	}
+
+	set_inode_flag(inode, FI_DIRTY_INODE);
+	list_add_tail(&F2FS_I(inode)->gdirty_list,
+				&sbi->inode_list[DIRTY_META]);
+	inc_page_count(sbi, F2FS_DIRTY_IMETA);
+	stat_inc_dirty_inode(sbi, DIRTY_META);
+	spin_unlock(&sbi->inode_lock[DIRTY_META]);
+
+	return 0;
+}
+
+void f2fs_inode_synced(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	spin_lock(&sbi->inode_lock[DIRTY_META]);
+	if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) {
+		spin_unlock(&sbi->inode_lock[DIRTY_META]);
+		return;
+	}
+	list_del_init(&F2FS_I(inode)->gdirty_list);
+	clear_inode_flag(inode, FI_DIRTY_INODE);
+	clear_inode_flag(inode, FI_AUTO_RECOVER);
+	dec_page_count(sbi, F2FS_DIRTY_IMETA);
+	stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
+	spin_unlock(&sbi->inode_lock[DIRTY_META]);
+}
+
 /*
  * f2fs_dirty_inode() is called from __mark_inode_dirty()
  *
@@ -596,7 +671,19 @@ static int f2fs_drop_inode(struct inode *inode)
  */
 static void f2fs_dirty_inode(struct inode *inode, int flags)
 {
-	set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	if (inode->i_ino == F2FS_NODE_INO(sbi) ||
+			inode->i_ino == F2FS_META_INO(sbi))
+		return;
+
+	if (flags == I_DIRTY_TIME)
+		return;
+
+	if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
+		clear_inode_flag(inode, FI_AUTO_RECOVER);
+
+	f2fs_inode_dirtied(inode);
 }
 
 static void f2fs_i_callback(struct rcu_head *head)
@@ -619,6 +706,8 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi)
 		percpu_counter_destroy(&sbi->nr_pages[i]);
 	percpu_counter_destroy(&sbi->alloc_valid_block_count);
 	percpu_counter_destroy(&sbi->total_valid_inode_count);
+
+	percpu_free_rwsem(&sbi->cp_rwsem);
 }
 
 static void f2fs_put_super(struct super_block *sb)
@@ -738,7 +827,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_bsize = sbi->blocksize;
 
 	buf->f_blocks = total_count - start_count;
-	buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
+	buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
 	buf->f_bavail = user_block_count - valid_user_blocks(sbi);
 
 	buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
@@ -803,6 +892,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",noextent_cache");
 	if (test_opt(sbi, DATA_FLUSH))
 		seq_puts(seq, ",data_flush");
+
+	seq_puts(seq, ",mode=");
+	if (test_opt(sbi, ADAPTIVE))
+		seq_puts(seq, "adaptive");
+	else if (test_opt(sbi, LFS))
+		seq_puts(seq, "lfs");
 	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
 
 	return 0;
@@ -884,6 +979,14 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, BG_GC);
 	set_opt(sbi, INLINE_DATA);
 	set_opt(sbi, EXTENT_CACHE);
+	sbi->sb->s_flags |= MS_LAZYTIME;
+	set_opt(sbi, FLUSH_MERGE);
+	if (f2fs_sb_mounted_hmsmr(sbi->sb)) {
+		set_opt_mode(sbi, F2FS_MOUNT_LFS);
+		set_opt(sbi, DISCARD);
+	} else {
+		set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
+	}
 
 #ifdef CONFIG_F2FS_FS_XATTR
 	set_opt(sbi, XATTR_USER);
@@ -1367,6 +1470,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&sbi->s_list);
 	mutex_init(&sbi->umount_mutex);
+	mutex_init(&sbi->wio_mutex[NODE]);
+	mutex_init(&sbi->wio_mutex[DATA]);
 
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
 	memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX,
@@ -1379,6 +1484,9 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
 {
 	int i, err;
 
+	if (percpu_init_rwsem(&sbi->cp_rwsem))
+		return -ENOMEM;
+
 	for (i = 0; i < NR_COUNT_TYPE; i++) {
 		err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
 		if (err)
@@ -1530,6 +1638,8 @@ try_onemore:
 		goto free_sbi;
 
 	sb->s_fs_info = sbi;
+	sbi->raw_super = raw_super;
+
 	default_options(sbi);
 	/* parse mount options */
 	options = kstrdup((const char *)data, GFP_KERNEL);
@@ -1559,10 +1669,8 @@ try_onemore:
 	memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
 
 	/* init f2fs-specific super block info */
-	sbi->raw_super = raw_super;
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
-	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 
@@ -1579,7 +1687,6 @@ try_onemore:
 		sbi->write_io[i].bio = NULL;
 	}
 
-	init_rwsem(&sbi->cp_rwsem);
 	init_waitqueue_head(&sbi->cp_wait);
 	init_sb_info(sbi);
 
@@ -1762,6 +1869,7 @@ try_onemore:
 	return 0;
 
 free_kobj:
+	f2fs_sync_inode_meta(sbi);
 	kobject_del(&sbi->s_kobj);
 	kobject_put(&sbi->s_kobj);
 	wait_for_completion(&sbi->s_kobj_unregister);
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index 562ce0821559..73b4e1d1912a 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -25,11 +25,11 @@ static inline void __print_last_io(void)
 	if (!last_io.len)
 		return;
 
-	trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n",
+	trace_printk("%3x:%3x %4x %-16s %2x %5x %5x %12x %4x\n",
 			last_io.major, last_io.minor,
 			last_io.pid, "----------------",
 			last_io.type,
-			last_io.fio.rw,
+			last_io.fio.op, last_io.fio.op_flags,
 			last_io.fio.new_blkaddr,
 			last_io.len);
 	memset(&last_io, 0, sizeof(last_io));
@@ -101,7 +101,8 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
 	if (last_io.major == major && last_io.minor == minor &&
 			last_io.pid == pid &&
 			last_io.type == __file_type(inode, pid) &&
-			last_io.fio.rw == fio->rw &&
+			last_io.fio.op == fio->op &&
+			last_io.fio.op_flags == fio->op_flags &&
 			last_io.fio.new_blkaddr + last_io.len ==
 							fio->new_blkaddr) {
 		last_io.len++;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index e3decae3acfb..c8898b5148eb 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
 		return -EINVAL;
 
 	F2FS_I(inode)->i_advise |= *(char *)value;
-	mark_inode_dirty(inode);
+	f2fs_mark_inode_dirty_sync(inode);
 	return 0;
 }
 
@@ -299,6 +299,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
 		if (ipage) {
 			inline_addr = inline_xattr_addr(ipage);
 			f2fs_wait_on_page_writeback(ipage, NODE, true);
+			set_page_dirty(ipage);
 		} else {
 			page = get_node_page(sbi, inode->i_ino);
 			if (IS_ERR(page)) {
@@ -441,13 +442,12 @@ static int __f2fs_setxattr(struct inode *inode, int index,
 			const char *name, const void *value, size_t size,
 			struct page *ipage, int flags)
 {
-	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct f2fs_xattr_entry *here, *last;
 	void *base_addr;
 	int found, newsize;
 	size_t len;
 	__u32 new_hsize;
-	int error = -ENOMEM;
+	int error = 0;
 
 	if (name == NULL)
 		return -EINVAL;
@@ -465,7 +465,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
 
 	base_addr = read_all_xattrs(inode, ipage);
 	if (!base_addr)
-		goto exit;
+		return -ENOMEM;
 
 	/* find entry with wanted name. */
 	here = __find_xattr(base_addr, index, len, name);
@@ -539,19 +539,15 @@ static int __f2fs_setxattr(struct inode *inode, int index,
 	if (error)
 		goto exit;
 
-	if (is_inode_flag_set(fi, FI_ACL_MODE)) {
-		inode->i_mode = fi->i_acl_mode;
+	if (is_inode_flag_set(inode, FI_ACL_MODE)) {
+		inode->i_mode = F2FS_I(inode)->i_acl_mode;
 		inode->i_ctime = CURRENT_TIME;
-		clear_inode_flag(fi, FI_ACL_MODE);
+		clear_inode_flag(inode, FI_ACL_MODE);
 	}
 	if (index == F2FS_XATTR_INDEX_ENCRYPTION &&
 			!strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
 		f2fs_set_encrypted_inode(inode);
-
-	if (ipage)
-		update_inode(inode, ipage);
-	else
-		update_inode_page(inode);
+	f2fs_mark_inode_dirty_sync(inode);
 exit:
 	kzfree(base_addr);
 	return error;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index c4589e981760..8a8698119ff7 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -267,7 +267,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 	int i, err = 0;
 
 	for (i = 0; i < nr_bhs; i++)
-		write_dirty_buffer(bhs[i], WRITE);
+		write_dirty_buffer(bhs[i], 0);
 
 	for (i = 0; i < nr_bhs; i++) {
 		wait_on_buffer(bhs[i]);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 989a2cef6b76..6f9c9f6f5157 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -483,9 +483,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 		goto out_free;
 	}
 	inode->i_state |= I_WB_SWITCH;
+	__iget(inode);
 	spin_unlock(&inode->i_lock);
 
-	ihold(inode);
 	isw->inode = inode;
 
 	atomic_inc(&isw_nr_in_flight);
@@ -980,6 +980,42 @@ void inode_io_list_del(struct inode *inode)
 	spin_unlock(&wb->list_lock);
 }
 
+/*
+ * mark an inode as under writeback on the sb
+ */
+void sb_mark_inode_writeback(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	unsigned long flags;
+
+	if (list_empty(&inode->i_wb_list)) {
+		spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
+		if (list_empty(&inode->i_wb_list)) {
+			list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
+			trace_sb_mark_inode_writeback(inode);
+		}
+		spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
+	}
+}
+
+/*
+ * clear an inode as under writeback on the sb
+ */
+void sb_clear_inode_writeback(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	unsigned long flags;
+
+	if (!list_empty(&inode->i_wb_list)) {
+		spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
+		if (!list_empty(&inode->i_wb_list)) {
+			list_del_init(&inode->i_wb_list);
+			trace_sb_clear_inode_writeback(inode);
+		}
+		spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
+	}
+}
+
 /*
  * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
  * furthest end of its superblock's dirty-inode list.
@@ -2154,7 +2190,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
  */
 static void wait_sb_inodes(struct super_block *sb)
 {
-	struct inode *inode, *old_inode = NULL;
+	LIST_HEAD(sync_list);
 
 	/*
 	 * We need to be protected against the filesystem going from
@@ -2163,38 +2199,60 @@ static void wait_sb_inodes(struct super_block *sb)
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
 	mutex_lock(&sb->s_sync_lock);
-	spin_lock(&sb->s_inode_list_lock);
 
 	/*
-	 * Data integrity sync. Must wait for all pages under writeback,
-	 * because there may have been pages dirtied before our sync
-	 * call, but which had writeout started before we write it out.
-	 * In which case, the inode may not be on the dirty list, but
-	 * we still have to wait for that writeout.
+	 * Splice the writeback list onto a temporary list to avoid waiting on
+	 * inodes that have started writeback after this point.
+	 *
+	 * Use rcu_read_lock() to keep the inodes around until we have a
+	 * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as
+	 * the local list because inodes can be dropped from either by writeback
+	 * completion.
+	 */
+	rcu_read_lock();
+	spin_lock_irq(&sb->s_inode_wblist_lock);
+	list_splice_init(&sb->s_inodes_wb, &sync_list);
+
+	/*
+	 * Data integrity sync. Must wait for all pages under writeback, because
+	 * there may have been pages dirtied before our sync call, but which had
+	 * writeout started before we write it out.  In which case, the inode
+	 * may not be on the dirty list, but we still have to wait for that
+	 * writeout.
 	 */
-	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+	while (!list_empty(&sync_list)) {
+		struct inode *inode = list_first_entry(&sync_list, struct inode,
+						       i_wb_list);
 		struct address_space *mapping = inode->i_mapping;
 
+		/*
+		 * Move each inode back to the wb list before we drop the lock
+		 * to preserve consistency between i_wb_list and the mapping
+		 * writeback tag. Writeback completion is responsible to remove
+		 * the inode from either list once the writeback tag is cleared.
+		 */
+		list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);
+
+		/*
+		 * The mapping can appear untagged while still on-list since we
+		 * do not have the mapping lock. Skip it here, wb completion
+		 * will remove it.
+		 */
+		if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+			continue;
+
+		spin_unlock_irq(&sb->s_inode_wblist_lock);
+
 		spin_lock(&inode->i_lock);
-		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
-		    (mapping->nrpages == 0)) {
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
 			spin_unlock(&inode->i_lock);
+
+			spin_lock_irq(&sb->s_inode_wblist_lock);
 			continue;
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&sb->s_inode_list_lock);
-
-		/*
-		 * We hold a reference to 'inode' so it couldn't have been
-		 * removed from s_inodes list while we dropped the
-		 * s_inode_list_lock.  We cannot iput the inode now as we can
-		 * be holding the last reference and we cannot iput it under
-		 * s_inode_list_lock. So we keep the reference and iput it
-		 * later.
-		 */
-		iput(old_inode);
-		old_inode = inode;
+		rcu_read_unlock();
 
 		/*
 		 * We keep the error status of individual mapping so that
@@ -2205,10 +2263,13 @@ static void wait_sb_inodes(struct super_block *sb)
 
 		cond_resched();
 
-		spin_lock(&sb->s_inode_list_lock);
+		iput(inode);
+
+		rcu_read_lock();
+		spin_lock_irq(&sb->s_inode_wblist_lock);
 	}
-	spin_unlock(&sb->s_inode_list_lock);
-	iput(old_inode);
+	spin_unlock_irq(&sb->s_inode_wblist_lock);
+	rcu_read_unlock();
 	mutex_unlock(&sb->s_sync_lock);
 }
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ccd4971cc6c1..cca7b048c07b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	struct dentry *newent;
 	bool outarg_valid = true;
 
+	fuse_lock_inode(dir);
 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
 			       &outarg, &inode);
+	fuse_unlock_inode(dir);
 	if (err == -ENOENT) {
 		outarg_valid = false;
 		err = 0;
@@ -478,7 +480,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct dentry *res = NULL;
 
-	if (d_unhashed(entry)) {
+	if (d_in_lookup(entry)) {
 		res = fuse_lookup(dir, entry, 0);
 		if (IS_ERR(res))
 			return PTR_ERR(res);
@@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
 			       FUSE_READDIR);
 	}
+	fuse_lock_inode(inode);
 	fuse_request_send(fc, req);
+	fuse_unlock_inode(inode);
 	nbytes = req->out.args[0].size;
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index eddbe02c4028..929c383432b0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -110,6 +110,9 @@ struct fuse_inode {
 
 	/** Miscellaneous bits describing inode state */
 	unsigned long state;
+
+	/** Lock for serializing lookup and readdir for back compatibility*/
+	struct mutex mutex;
 };
 
 /** FUSE inode state bits */
@@ -540,6 +543,9 @@ struct fuse_conn {
 	/** write-back cache policy (default is write-through) */
 	unsigned writeback_cache:1;
 
+	/** allow parallel lookups and readdir (default is serialized) */
+	unsigned parallel_dirops:1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 
 void fuse_set_initialized(struct fuse_conn *fc);
 
+void fuse_unlock_inode(struct inode *inode);
+void fuse_lock_inode(struct inode *inode);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1ce67668a8e1..9961d8432ce3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	INIT_LIST_HEAD(&fi->queued_writes);
 	INIT_LIST_HEAD(&fi->writepages);
 	init_waitqueue_head(&fi->page_waitq);
+	mutex_init(&fi->mutex);
 	fi->forget = fuse_alloc_forget();
 	if (!fi->forget) {
 		kmem_cache_free(fuse_inode_cachep, inode);
@@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode)
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	BUG_ON(!list_empty(&fi->write_files));
 	BUG_ON(!list_empty(&fi->queued_writes));
+	mutex_destroy(&fi->mutex);
 	kfree(fi->forget);
 	call_rcu(&inode->i_rcu, fuse_i_callback);
 }
@@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
 	return 0;
 }
 
+void fuse_lock_inode(struct inode *inode)
+{
+	if (!get_fuse_conn(inode)->parallel_dirops)
+		mutex_lock(&get_fuse_inode(inode)->mutex);
+}
+
+void fuse_unlock_inode(struct inode *inode)
+{
+	if (!get_fuse_conn(inode)->parallel_dirops)
+		mutex_unlock(&get_fuse_inode(inode)->mutex);
+}
+
 static void fuse_umount_begin(struct super_block *sb)
 {
 	fuse_abort_conn(get_fuse_conn_super(sb));
@@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->async_dio = 1;
 			if (arg->flags & FUSE_WRITEBACK_CACHE)
 				fc->writeback_cache = 1;
+			if (arg->flags & FUSE_PARALLEL_DIROPS)
+				fc->parallel_dirops = 1;
 			if (arg->time_gran && arg->time_gran <= 1000000000)
 				fc->sb->s_time_gran = arg->time_gran;
 		} else {
@@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
 		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
-		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
+		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
+		FUSE_PARALLEL_DIROPS;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 37b7bc14c8da..82df36886938 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -140,6 +140,32 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 	return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
 }
 
+/* This is the same as calling block_write_full_page, but it also
+ * writes pages outside of i_size
+ */
+int gfs2_write_full_page(struct page *page, get_block_t *get_block,
+			 struct writeback_control *wbc)
+{
+	struct inode * const inode = page->mapping->host;
+	loff_t i_size = i_size_read(inode);
+	const pgoff_t end_index = i_size >> PAGE_SHIFT;
+	unsigned offset;
+
+	/*
+	 * The page straddles i_size.  It must be zeroed out on each and every
+	 * writepage invocation because it may be mmapped.  "A file is mapped
+	 * in multiples of the page size.  For a file that is not a multiple of
+	 * the  page size, the remaining memory is zeroed when mapped, and
+	 * writes to that region are not written out to the file."
+	 */
+	offset = i_size & (PAGE_SIZE-1);
+	if (page->index == end_index && offset)
+		zero_user_segment(page, offset, PAGE_SIZE);
+
+	return __block_write_full_page(inode, page, get_block, wbc,
+				       end_buffer_async_write);
+}
+
 /**
  * __gfs2_jdata_writepage - The core of jdata writepage
  * @page: The page to write
@@ -165,7 +191,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
 		}
 		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
 	}
-	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+	return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
 }
 
 /**
@@ -180,27 +206,20 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
 static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	int ret;
-	int done_trans = 0;
 
-	if (PageChecked(page)) {
-		if (wbc->sync_mode != WB_SYNC_ALL)
-			goto out_ignore;
-		ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
-		if (ret)
-			goto out_ignore;
-		done_trans = 1;
-	}
-	ret = gfs2_writepage_common(page, wbc);
-	if (ret > 0)
-		ret = __gfs2_jdata_writepage(page, wbc);
-	if (done_trans)
-		gfs2_trans_end(sdp);
+	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
+		goto out;
+	if (PageChecked(page) || current->journal_info)
+		goto out_ignore;
+	ret = __gfs2_jdata_writepage(page, wbc);
 	return ret;
 
 out_ignore:
 	redirty_page_for_writepage(wbc, page);
+out:
 	unlock_page(page);
 	return 0;
 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 24ce1cdd434a..6e2bec1cd289 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -285,7 +285,8 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
 		if (trylock_buffer(rabh)) {
 			if (!buffer_uptodate(rabh)) {
 				rabh->b_end_io = end_buffer_read_sync;
-				submit_bh(READA | REQ_META, rabh);
+				submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META,
+						rabh);
 				continue;
 			}
 			unlock_buffer(rabh);
@@ -974,7 +975,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
 
 	if (!buffer_uptodate(bh)) {
 		err = -EIO;
-		ll_rw_block(READ, 1, &bh);
+		ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 		wait_on_buffer(bh);
 		/* Uhhuh. Read error. Complain and punt. */
 		if (!buffer_uptodate(bh))
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 30822b148f3e..5173b98ca036 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -117,7 +117,7 @@ static int gfs2_dentry_delete(const struct dentry *dentry)
 		return 0;
 
 	ginode = GFS2_I(d_inode(dentry));
-	if (!ginode->i_iopen_gh.gh_gl)
+	if (!gfs2_holder_initialized(&ginode->i_iopen_gh))
 		return 0;
 
 	if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags))
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 271d93905bac..fcb59b23f1e3 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1513,7 +1513,7 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
 				continue;
 			}
 			bh->b_end_io = end_buffer_read_sync;
-			submit_bh(READA | REQ_META, bh);
+			submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, bh);
 			continue;
 		}
 		brelse(bh);
@@ -1663,7 +1663,8 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name,
 		brelse(bh);
 		if (fail_on_exist)
 			return ERR_PTR(-EEXIST);
-		inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino);
+		inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino,
+					  GFS2_BLKST_FREE /* ignore */);
 		if (!IS_ERR(inode))
 			GFS2_I(inode)->i_rahead = rahead;
 		return inode;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index d5bda8513457..a332f3cd925e 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -137,21 +137,10 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct inode *inode;
 
-	inode = gfs2_ilookup(sb, inum->no_addr);
-	if (inode) {
-		if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
-			iput(inode);
-			return ERR_PTR(-ESTALE);
-		}
-		goto out_inode;
-	}
-
 	inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
 				    GFS2_BLKST_DINODE);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
-
-out_inode:
 	return d_obtain_alias(inode);
 }
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e0f98e483aec..320e65e61938 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1098,7 +1098,7 @@ static void do_unflock(struct file *file, struct file_lock *fl)
 
 	mutex_lock(&fp->f_fl_mutex);
 	locks_lock_file_wait(file, fl);
-	if (fl_gh->gh_gl) {
+	if (gfs2_holder_initialized(fl_gh)) {
 		gfs2_glock_dq(fl_gh);
 		gfs2_holder_uninit(fl_gh);
 	}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 706fd9352f36..3a90b2b5b9bb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -575,7 +575,6 @@ static void delete_work_func(struct work_struct *work)
 {
 	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-	struct gfs2_inode *ip;
 	struct inode *inode;
 	u64 no_addr = gl->gl_name.ln_number;
 
@@ -585,13 +584,7 @@ static void delete_work_func(struct work_struct *work)
 	if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
 		goto out;
 
-	ip = gl->gl_object;
-	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
-
-	if (ip)
-		inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
-	else
-		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+	inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
 	if (inode && !IS_ERR(inode)) {
 		d_prune_aliases(inode);
 		iput(inode);
@@ -808,7 +801,7 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 {
 	put_pid(gh->gh_owner_pid);
 	gfs2_glock_put(gh->gh_gl);
-	gh->gh_gl = NULL;
+	gfs2_holder_mark_uninitialized(gh);
 	gh->gh_ip = 0;
 }
 
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 46ab67fc16da..ab1ef322f7a5 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -247,4 +247,14 @@ extern void gfs2_unregister_debugfs(void);
 
 extern const struct lm_lockops gfs2_dlm_ops;
 
+static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh)
+{
+	gh->gh_gl = NULL;
+}
+
+static inline bool gfs2_holder_initialized(struct gfs2_holder *gh)
+{
+	return gh->gh_gl;
+}
+
 #endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 21dc784f66c2..e0621cacf134 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -37,9 +37,35 @@
 #include "super.h"
 #include "glops.h"
 
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+static int iget_test(struct inode *inode, void *opaque)
 {
-	return ilookup(sb, (unsigned long)no_addr);
+	u64 no_addr = *(u64 *)opaque;
+
+	return GFS2_I(inode)->i_no_addr == no_addr;
+}
+
+static int iget_set(struct inode *inode, void *opaque)
+{
+	u64 no_addr = *(u64 *)opaque;
+
+	GFS2_I(inode)->i_no_addr = no_addr;
+	inode->i_ino = no_addr;
+	return 0;
+}
+
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
+{
+	struct inode *inode;
+
+repeat:
+	inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr);
+	if (!inode)
+		return inode;
+	if (is_bad_inode(inode)) {
+		iput(inode);
+		goto repeat;
+	}
+	return inode;
 }
 
 /**
@@ -78,26 +104,37 @@ static void gfs2_set_iop(struct inode *inode)
 /**
  * gfs2_inode_lookup - Lookup an inode
  * @sb: The super block
- * @no_addr: The inode number
  * @type: The type of the inode
+ * @no_addr: The inode number
+ * @no_formal_ino: The inode generation number
+ * @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED;
+ *           GFS2_BLKST_FREE do indicate not to verify)
+ *
+ * If @type is DT_UNKNOWN, the inode type is fetched from disk.
+ *
+ * If @blktype is anything other than GFS2_BLKST_FREE (which is used as a
+ * placeholder because it doesn't otherwise make sense), the on-disk block type
+ * is verified to be @blktype.
  *
  * Returns: A VFS inode, or an error
  */
 
 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
-				u64 no_addr, u64 no_formal_ino)
+				u64 no_addr, u64 no_formal_ino,
+				unsigned int blktype)
 {
 	struct inode *inode;
 	struct gfs2_inode *ip;
 	struct gfs2_glock *io_gl = NULL;
+	struct gfs2_holder i_gh;
 	int error;
 
-	inode = iget_locked(sb, (unsigned long)no_addr);
+	gfs2_holder_mark_uninitialized(&i_gh);
+	inode = gfs2_iget(sb, no_addr);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
 	ip = GFS2_I(inode);
-	ip->i_no_addr = no_addr;
 
 	if (inode->i_state & I_NEW) {
 		struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -112,10 +149,29 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 		if (unlikely(error))
 			goto fail_put;
 
+		if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
+			/*
+			 * The GL_SKIP flag indicates to skip reading the inode
+			 * block.  We read the inode with gfs2_inode_refresh
+			 * after possibly checking the block type.
+			 */
+			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
+						   GL_SKIP, &i_gh);
+			if (error)
+				goto fail_put;
+
+			if (blktype != GFS2_BLKST_FREE) {
+				error = gfs2_check_blk_type(sdp, no_addr,
+							    blktype);
+				if (error)
+					goto fail_put;
+			}
+		}
+
 		set_bit(GIF_INVALID, &ip->i_flags);
 		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
 		if (unlikely(error))
-			goto fail_iopen;
+			goto fail_put;
 
 		ip->i_iopen_gh.gh_gl->gl_object = ip;
 		gfs2_glock_put(io_gl);
@@ -134,6 +190,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 		unlock_new_inode(inode);
 	}
 
+	if (gfs2_holder_initialized(&i_gh))
+		gfs2_glock_dq_uninit(&i_gh);
 	return inode;
 
 fail_refresh:
@@ -141,10 +199,11 @@ fail_refresh:
 	ip->i_iopen_gh.gh_gl->gl_object = NULL;
 	gfs2_glock_dq_wait(&ip->i_iopen_gh);
 	gfs2_holder_uninit(&ip->i_iopen_gh);
-fail_iopen:
+fail_put:
 	if (io_gl)
 		gfs2_glock_put(io_gl);
-fail_put:
+	if (gfs2_holder_initialized(&i_gh))
+		gfs2_glock_dq_uninit(&i_gh);
 	ip->i_gl->gl_object = NULL;
 fail:
 	iget_failed(inode);
@@ -155,23 +214,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 				  u64 *no_formal_ino, unsigned int blktype)
 {
 	struct super_block *sb = sdp->sd_vfs;
-	struct gfs2_holder i_gh;
-	struct inode *inode = NULL;
+	struct inode *inode;
 	int error;
 
-	/* Must not read in block until block type is verified */
-	error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
-				  LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
-	if (error)
-		return ERR_PTR(error);
-
-	error = gfs2_check_blk_type(sdp, no_addr, blktype);
-	if (error)
-		goto fail;
-
-	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
+	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, blktype);
 	if (IS_ERR(inode))
-		goto fail;
+		return inode;
 
 	/* Two extra checks for NFS only */
 	if (no_formal_ino) {
@@ -182,16 +230,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 		error = -EIO;
 		if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
 			goto fail_iput;
-
-		error = 0;
 	}
+	return inode;
 
-fail:
-	gfs2_glock_dq_uninit(&i_gh);
-	return error ? ERR_PTR(error) : inode;
 fail_iput:
 	iput(inode);
-	goto fail;
+	return ERR_PTR(error);
 }
 
 
@@ -236,8 +280,8 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 	struct gfs2_holder d_gh;
 	int error = 0;
 	struct inode *inode = NULL;
-	int unlock = 0;
 
+	gfs2_holder_mark_uninitialized(&d_gh);
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return ERR_PTR(-ENAMETOOLONG);
 
@@ -252,7 +296,6 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
 		if (error)
 			return ERR_PTR(error);
-		unlock = 1;
 	}
 
 	if (!is_root) {
@@ -265,7 +308,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 	if (IS_ERR(inode))
 		error = PTR_ERR(inode);
 out:
-	if (unlock)
+	if (gfs2_holder_initialized(&d_gh))
 		gfs2_glock_dq_uninit(&d_gh);
 	if (error == -ENOENT)
 		return NULL;
@@ -1189,7 +1232,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct dentry *d;
 	bool excl = !!(flags & O_EXCL);
 
-	if (!d_unhashed(dentry))
+	if (!d_in_lookup(dentry))
 		goto skip_lookup;
 
 	d = __gfs2_lookup(dir, dentry, file, opened);
@@ -1309,7 +1352,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	struct gfs2_inode *ip = GFS2_I(d_inode(odentry));
 	struct gfs2_inode *nip = NULL;
 	struct gfs2_sbd *sdp = GFS2_SB(odir);
-	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
+	struct gfs2_holder ghs[5], r_gh;
 	struct gfs2_rgrpd *nrgd;
 	unsigned int num_gh;
 	int dir_rename = 0;
@@ -1317,6 +1360,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	unsigned int x;
 	int error;
 
+	gfs2_holder_mark_uninitialized(&r_gh);
 	if (d_really_is_positive(ndentry)) {
 		nip = GFS2_I(d_inode(ndentry));
 		if (ip == nip)
@@ -1506,7 +1550,7 @@ out_gunlock:
 		gfs2_holder_uninit(ghs + x);
 	}
 out_gunlock_r:
-	if (r_gh.gh_gl)
+	if (gfs2_holder_initialized(&r_gh))
 		gfs2_glock_dq_uninit(&r_gh);
 out:
 	return error;
@@ -1532,13 +1576,14 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
 	struct gfs2_inode *oip = GFS2_I(odentry->d_inode);
 	struct gfs2_inode *nip = GFS2_I(ndentry->d_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(odir);
-	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
+	struct gfs2_holder ghs[5], r_gh;
 	unsigned int num_gh;
 	unsigned int x;
 	umode_t old_mode = oip->i_inode.i_mode;
 	umode_t new_mode = nip->i_inode.i_mode;
 	int error;
 
+	gfs2_holder_mark_uninitialized(&r_gh);
 	error = gfs2_rindex_update(sdp);
 	if (error)
 		return error;
@@ -1646,7 +1691,7 @@ out_gunlock:
 		gfs2_holder_uninit(ghs + x);
 	}
 out_gunlock_r:
-	if (r_gh.gh_gl)
+	if (gfs2_holder_initialized(&r_gh))
 		gfs2_glock_dq_uninit(&r_gh);
 out:
 	return error;
@@ -1743,9 +1788,8 @@ int gfs2_permission(struct inode *inode, int mask)
 	struct gfs2_inode *ip;
 	struct gfs2_holder i_gh;
 	int error;
-	int unlock = 0;
-
 
+	gfs2_holder_mark_uninitialized(&i_gh);
 	ip = GFS2_I(inode);
 	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
 		if (mask & MAY_NOT_BLOCK)
@@ -1753,14 +1797,13 @@ int gfs2_permission(struct inode *inode, int mask)
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 		if (error)
 			return error;
-		unlock = 1;
 	}
 
 	if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
 		error = -EACCES;
 	else
 		error = generic_permission(inode, mask);
-	if (unlock)
+	if (gfs2_holder_initialized(&i_gh))
 		gfs2_glock_dq_uninit(&i_gh);
 
 	return error;
@@ -1932,17 +1975,16 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
-	int unlock = 0;
 
+	gfs2_holder_mark_uninitialized(&gh);
 	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
 		if (error)
 			return error;
-		unlock = 1;
 	}
 
 	generic_fillattr(inode, stat);
-	if (unlock)
+	if (gfs2_holder_initialized(&gh))
 		gfs2_glock_dq_uninit(&gh);
 
 	return 0;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index e1af0d4aa308..7710dfd3af35 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -94,11 +94,11 @@ err:
 }
 
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
-				       u64 no_addr, u64 no_formal_ino);
+				       u64 no_addr, u64 no_formal_ino,
+				       unsigned int blktype);
 extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 					 u64 *no_formal_ino,
 					 unsigned int blktype);
-extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 
 extern int gfs2_inode_refresh(struct gfs2_inode *ip);
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0ff028c15199..e58ccef09c91 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -657,7 +657,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	struct gfs2_log_header *lh;
 	unsigned int tail;
 	u32 hash;
-	int rw = WRITE_FLUSH_FUA | REQ_META;
+	int op_flags = WRITE_FLUSH_FUA | REQ_META;
 	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 	lh = page_address(page);
@@ -682,12 +682,12 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
 		gfs2_ordered_wait(sdp);
 		log_flush_wait(sdp);
-		rw = WRITE_SYNC | REQ_META | REQ_PRIO;
+		op_flags = WRITE_SYNC | REQ_META | REQ_PRIO;
 	}
 
 	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
 	gfs2_log_write_page(sdp, page);
-	gfs2_log_flush_bio(sdp, rw);
+	gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
 	log_flush_wait(sdp);
 
 	if (sdp->sd_log_tail != tail)
@@ -738,7 +738,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 
 	gfs2_ordered_write(sdp);
 	lops_before_commit(sdp, tr);
-	gfs2_log_flush_bio(sdp, WRITE);
+	gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
 
 	if (sdp->sd_log_head != sdp->sd_log_flush_head) {
 		log_flush_wait(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index d5369a109781..49d5a1b61b06 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -230,17 +230,19 @@ static void gfs2_end_log_write(struct bio *bio)
 /**
  * gfs2_log_flush_bio - Submit any pending log bio
  * @sdp: The superblock
- * @rw: The rw flags
+ * @op: REQ_OP
+ * @op_flags: rq_flag_bits
  *
  * Submit any pending part-built or full bio to the block device. If
  * there is no pending bio, then this is a no-op.
  */
 
-void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
+void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags)
 {
 	if (sdp->sd_log_bio) {
 		atomic_inc(&sdp->sd_log_in_flight);
-		submit_bio(rw, sdp->sd_log_bio);
+		bio_set_op_attrs(sdp->sd_log_bio, op, op_flags);
+		submit_bio(sdp->sd_log_bio);
 		sdp->sd_log_bio = NULL;
 	}
 }
@@ -299,7 +301,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
 		nblk >>= sdp->sd_fsb2bb_shift;
 		if (blkno == nblk)
 			return bio;
-		gfs2_log_flush_bio(sdp, WRITE);
+		gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
 	}
 
 	return gfs2_log_alloc_bio(sdp, blkno);
@@ -328,7 +330,7 @@ static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
 	bio = gfs2_log_get_bio(sdp, blkno);
 	ret = bio_add_page(bio, page, size, offset);
 	if (ret == 0) {
-		gfs2_log_flush_bio(sdp, WRITE);
+		gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
 		bio = gfs2_log_alloc_bio(sdp, blkno);
 		ret = bio_add_page(bio, page, size, offset);
 		WARN_ON(ret == 0);
@@ -535,9 +537,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
 		return 0;
 
-	gfs2_replay_incr_blk(sdp, &start);
+	gfs2_replay_incr_blk(jd, &start);
 
-	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+	for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 		blkno = be64_to_cpu(*ptr++);
 
 		jd->jd_found_blocks++;
@@ -693,7 +695,7 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 
 	offset = sizeof(struct gfs2_log_descriptor);
 
-	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+	for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 		error = gfs2_replay_read_block(jd, start, &bh);
 		if (error)
 			return error;
@@ -762,7 +764,6 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 				    __be64 *ptr, int pass)
 {
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
-	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 	struct gfs2_glock *gl = ip->i_gl;
 	unsigned int blks = be32_to_cpu(ld->ld_data1);
 	struct buffer_head *bh_log, *bh_ip;
@@ -773,8 +774,8 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
 		return 0;
 
-	gfs2_replay_incr_blk(sdp, &start);
-	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+	gfs2_replay_incr_blk(jd, &start);
+	for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 		blkno = be64_to_cpu(*ptr++);
 		esc = be64_to_cpu(*ptr++);
 
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index a65a7ba32ffd..e529f536c117 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -27,7 +27,7 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
 
 extern const struct gfs2_log_operations *gfs2_log_ops[];
 extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
-extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
+extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags);
 extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
 
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index f99f8e94de3f..74fd0139e6c2 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -45,6 +45,7 @@ static void gfs2_init_inode_once(void *foo)
 	memset(&ip->i_res, 0, sizeof(ip->i_res));
 	RB_CLEAR_NODE(&ip->i_res.rs_node);
 	ip->i_hash_cache = NULL;
+	gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
 }
 
 static void gfs2_init_glock_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 8eaadabbc771..950b8be68e41 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,8 +37,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 {
 	struct buffer_head *bh, *head;
 	int nr_underway = 0;
-	int write_op = REQ_META | REQ_PRIO |
-		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+	int write_flags = REQ_META | REQ_PRIO |
+		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!page_has_buffers(page));
@@ -79,7 +79,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh(write_op, bh);
+			submit_bh(REQ_OP_WRITE, write_flags, bh);
 			nr_underway++;
 		}
 		bh = next;
@@ -213,7 +213,8 @@ static void gfs2_meta_read_endio(struct bio *bio)
  * Submit several consecutive buffer head I/O requests as a single bio I/O
  * request.  (See submit_bh_wbc.)
  */
-static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
+static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
+			    int num)
 {
 	struct buffer_head *bh = bhs[0];
 	struct bio *bio;
@@ -230,7 +231,8 @@ static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
 		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
 	}
 	bio->bi_end_io = gfs2_meta_read_endio;
-	submit_bio(rw, bio);
+	bio_set_op_attrs(bio, op, op_flags);
+	submit_bio(bio);
 }
 
 /**
@@ -280,7 +282,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 		}
 	}
 
-	gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
+	gfs2_submit_bhs(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
 	if (!(flags & DIO_WAIT))
 		return 0;
 
@@ -448,7 +450,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	if (buffer_uptodate(first_bh))
 		goto out;
 	if (!buffer_locked(first_bh))
-		ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
+		ll_rw_block(REQ_OP_READ, READ_SYNC | REQ_META, 1, &first_bh);
 
 	dblock++;
 	extlen--;
@@ -457,7 +459,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 		bh = gfs2_getbuf(gl, dblock, CREATE);
 
 		if (!buffer_uptodate(bh) && !buffer_locked(bh))
-			ll_rw_block(READA | REQ_META, 1, &bh);
+			ll_rw_block(REQ_OP_READ, REQ_RAHEAD | REQ_META, 1, &bh);
 		brelse(bh);
 		dblock++;
 		extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 45463600fb81..ef1e1822977f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -246,7 +246,8 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
 
 	bio->bi_end_io = end_bio_io_page;
 	bio->bi_private = page;
-	submit_bio(READ_SYNC | REQ_META, bio);
+	bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC | REQ_META);
+	submit_bio(bio);
 	wait_on_page_locked(page);
 	bio_put(bio);
 	if (!PageUptodate(page)) {
@@ -454,7 +455,8 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
 	struct dentry *dentry;
 	struct inode *inode;
 
-	inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
+	inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0,
+				  GFS2_BLKST_FREE /* ignore */);
 	if (IS_ERR(inode)) {
 		fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
 		return PTR_ERR(inode);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index ce7d69a2fdc0..77930ca25303 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -730,7 +730,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
 		if (PageUptodate(page))
 			set_buffer_uptodate(bh);
 		if (!buffer_uptodate(bh)) {
-			ll_rw_block(READ | REQ_META, 1, &bh);
+			ll_rw_block(REQ_OP_READ, REQ_META, 1, &bh);
 			wait_on_buffer(bh);
 			if (!buffer_uptodate(bh))
 				goto unlock_out;
@@ -883,7 +883,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
 			      &data_blocks, &ind_blocks);
 
-	ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
+	ghs = kmalloc(num_qd * sizeof(struct gfs2_holder), GFP_NOFS);
 	if (!ghs)
 		return -ENOMEM;
 
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 1b645773c98e..113b6095a58d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -338,7 +338,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
 			struct gfs2_log_header_host lh;
 			error = get_log_header(jd, start, &lh);
 			if (!error) {
-				gfs2_replay_incr_blk(sdp, &start);
+				gfs2_replay_incr_blk(jd, &start);
 				brelse(bh);
 				continue;
 			}
@@ -360,7 +360,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
 		}
 
 		while (length--)
-			gfs2_replay_incr_blk(sdp, &start);
+			gfs2_replay_incr_blk(jd, &start);
 
 		brelse(bh);
 	}
@@ -390,7 +390,7 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
 	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
 
 	lblock = head->lh_blkno;
-	gfs2_replay_incr_blk(sdp, &lblock);
+	gfs2_replay_incr_blk(jd, &lblock);
 	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
 	error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
 	if (error)
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 6142836cce96..11fdfab4bf99 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -14,9 +14,9 @@
 
 extern struct workqueue_struct *gfs_recovery_wq;
 
-static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
+static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, unsigned int *blk)
 {
-	if (++*blk == sdp->sd_jdesc->jd_blocks)
+	if (++*blk == jd->jd_blocks)
 	        *blk = 0;
 }
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5bd216901e89..86ccc0159393 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -658,6 +658,7 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 	if (rgd) {
 		spin_lock(&rgd->rd_rsspin);
 		__rs_deltree(rs);
+		BUG_ON(rs->rs_free);
 		spin_unlock(&rgd->rd_rsspin);
 	}
 }
@@ -671,10 +672,8 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
 {
 	down_write(&ip->i_rw_mutex);
-	if ((wcount == NULL) || (atomic_read(wcount) <= 1)) {
+	if ((wcount == NULL) || (atomic_read(wcount) <= 1))
 		gfs2_rs_deltree(&ip->i_res);
-		BUG_ON(ip->i_res.rs_free);
-	}
 	up_write(&ip->i_rw_mutex);
 	gfs2_qa_delete(ip, wcount);
 }
@@ -722,6 +721,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 
 		gfs2_free_clones(rgd);
 		kfree(rgd->rd_bits);
+		rgd->rd_bits = NULL;
 		return_all_reservations(rgd);
 		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 	}
@@ -916,9 +916,6 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	if (error)
 		goto fail;
 
-	rgd->rd_gl->gl_object = rgd;
-	rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
-	rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 	rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 	if (rgd->rd_data > sdp->sd_max_rg_data)
@@ -926,14 +923,20 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	spin_lock(&sdp->sd_rindex_spin);
 	error = rgd_insert(rgd);
 	spin_unlock(&sdp->sd_rindex_spin);
-	if (!error)
+	if (!error) {
+		rgd->rd_gl->gl_object = rgd;
+		rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
+		rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
+						    rgd->rd_length) * bsize) - 1;
 		return 0;
+	}
 
 	error = 0; /* someone else read in the rgrp; free it and ignore it */
 	gfs2_glock_put(rgd->rd_gl);
 
 fail:
 	kfree(rgd->rd_bits);
+	rgd->rd_bits = NULL;
 	kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 	return error;
 }
@@ -2096,7 +2099,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 {
 	struct gfs2_blkreserv *rs = &ip->i_res;
 
-	if (rs->rs_rgd_gh.gh_gl)
+	if (gfs2_holder_initialized(&rs->rs_rgd_gh))
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 }
 
@@ -2596,7 +2599,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
 {
 	unsigned int x;
 
-	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
+	rlist->rl_ghs = kmalloc(rlist->rl_rgrps * sizeof(struct gfs2_holder),
 				GFP_NOFS | __GFP_NOFAIL);
 	for (x = 0; x < rlist->rl_rgrps; x++)
 		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9b2ff353e45f..3a7e60bb39f8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -855,7 +855,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
 	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
 
-	if (freeze_gh.gh_gl)
+	if (gfs2_holder_initialized(&freeze_gh))
 		gfs2_glock_dq_uninit(&freeze_gh);
 
 	gfs2_quota_cleanup(sdp);
@@ -1033,7 +1033,7 @@ static int gfs2_unfreeze(struct super_block *sb)
 
 	mutex_lock(&sdp->sd_freeze_mutex);
         if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
-	    sdp->sd_freeze_gh.gh_gl == NULL) {
+	    !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
 		mutex_unlock(&sdp->sd_freeze_mutex);
                 return 0;
 	}
@@ -1084,9 +1084,11 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
 	int error = 0, err;
 
 	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
-	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
+	gha = kmalloc(slots * sizeof(struct gfs2_holder), GFP_KERNEL);
 	if (!gha)
 		return -ENOMEM;
+	for (x = 0; x < slots; x++)
+		gfs2_holder_mark_uninitialized(gha + x);
 
 	rgd_next = gfs2_rgrpd_get_first(sdp);
 
@@ -1096,7 +1098,7 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
 		for (x = 0; x < slots; x++) {
 			gh = gha + x;
 
-			if (gh->gh_gl && gfs2_glock_poll(gh)) {
+			if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) {
 				err = gfs2_glock_wait(gh);
 				if (err) {
 					gfs2_holder_uninit(gh);
@@ -1109,7 +1111,7 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
 				}
 			}
 
-			if (gh->gh_gl)
+			if (gfs2_holder_initialized(gh))
 				done = 0;
 			else if (rgd_next && !error) {
 				error = gfs2_glock_nq_init(rgd_next->rd_gl,
@@ -1304,9 +1306,11 @@ static int gfs2_drop_inode(struct inode *inode)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 
-	if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && inode->i_nlink) {
+	if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
+	    inode->i_nlink &&
+	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
 		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
-		if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
+		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
 			clear_nlink(inode);
 	}
 	return generic_drop_inode(inode);
@@ -1551,7 +1555,7 @@ static void gfs2_evict_inode(struct inode *inode)
 			goto out_truncate;
 	}
 
-	if (ip->i_iopen_gh.gh_gl &&
+	if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
 	    test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
 		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
 		gfs2_glock_dq_wait(&ip->i_iopen_gh);
@@ -1610,7 +1614,7 @@ out_unlock:
 	if (gfs2_rs_active(&ip->i_res))
 		gfs2_rs_deltree(&ip->i_res);
 
-	if (ip->i_iopen_gh.gh_gl) {
+	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
 		if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
 			ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
 			gfs2_glock_dq_wait(&ip->i_iopen_gh);
@@ -1632,7 +1636,7 @@ out:
 	gfs2_glock_add_to_lru(ip->i_gl);
 	gfs2_glock_put(ip->i_gl);
 	ip->i_gl = NULL;
-	if (ip->i_iopen_gh.gh_gl) {
+	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
 		ip->i_iopen_gh.gh_gl->gl_object = NULL;
 		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
 		gfs2_glock_dq_wait(&ip->i_iopen_gh);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index fdc3446d934a..047245bd2cd6 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -526,7 +526,7 @@ int hfsplus_compare_dentry(const struct dentry *parent,
 
 /* wrapper.c */
 int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf,
-		       void **data, int rw);
+		       void **data, int op, int op_flags);
 int hfsplus_read_wrapper(struct super_block *sb);
 
 /* time macros */
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index eb355d81e279..63164ebc52fa 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -112,7 +112,8 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
 		if ((u8 *)pm - (u8 *)buf >= buf_size) {
 			res = hfsplus_submit_bio(sb,
 						 *part_start + HFS_PMAP_BLK + i,
-						 buf, (void **)&pm, READ);
+						 buf, (void **)&pm, REQ_OP_READ,
+						 0);
 			if (res)
 				return res;
 		}
@@ -136,7 +137,7 @@ int hfs_part_find(struct super_block *sb,
 		return -ENOMEM;
 
 	res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK,
-				 buf, &data, READ);
+				 buf, &data, REQ_OP_READ, 0);
 	if (res)
 		goto out;
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 755bf30ba1ce..11854dd84572 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -220,7 +220,8 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
 
 	error2 = hfsplus_submit_bio(sb,
 				   sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
-				   sbi->s_vhdr_buf, NULL, WRITE_SYNC);
+				   sbi->s_vhdr_buf, NULL, REQ_OP_WRITE,
+				   WRITE_SYNC);
 	if (!error)
 		error = error2;
 	if (!write_backup)
@@ -228,7 +229,8 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
 
 	error2 = hfsplus_submit_bio(sb,
 				  sbi->part_start + sbi->sect_count - 2,
-				  sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC);
+				  sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE,
+				  WRITE_SYNC);
 	if (!error)
 		error2 = error;
 out:
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index cc6235671437..ebb85e5f6549 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -30,7 +30,8 @@ struct hfsplus_wd {
  * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
  * @buf: buffer for I/O
  * @data: output pointer for location of requested data
- * @rw: direction of I/O
+ * @op: direction of I/O
+ * @op_flags: request op flags
  *
  * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
  * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
@@ -44,7 +45,7 @@ struct hfsplus_wd {
  * will work correctly.
  */
 int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
-		void *buf, void **data, int rw)
+		       void *buf, void **data, int op, int op_flags)
 {
 	struct bio *bio;
 	int ret = 0;
@@ -65,8 +66,9 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
 	bio = bio_alloc(GFP_NOIO, 1);
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_bdev = sb->s_bdev;
+	bio_set_op_attrs(bio, op, op_flags);
 
-	if (!(rw & WRITE) && data)
+	if (op != WRITE && data)
 		*data = (u8 *)buf + offset;
 
 	while (io_size > 0) {
@@ -83,7 +85,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
 		buf = (u8 *)buf + len;
 	}
 
-	ret = submit_bio_wait(rw, bio);
+	ret = submit_bio_wait(bio);
 out:
 	bio_put(bio);
 	return ret < 0 ? ret : 0;
@@ -181,7 +183,7 @@ int hfsplus_read_wrapper(struct super_block *sb)
 reread:
 	error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
 				   sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
-				   READ);
+				   REQ_OP_READ, 0);
 	if (error)
 		goto out_free_backup_vhdr;
 
@@ -213,7 +215,8 @@ reread:
 
 	error = hfsplus_submit_bio(sb, part_start + part_size - 2,
 				   sbi->s_backup_vhdr_buf,
-				   (void **)&sbi->s_backup_vhdr, READ);
+				   (void **)&sbi->s_backup_vhdr, REQ_OP_READ,
+				   0);
 	if (error)
 		goto out_free_backup_vhdr;
 
diff --git a/fs/inode.c b/fs/inode.c
index 4ccbc21b30ce..e171f7b5f9e4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -365,6 +365,7 @@ void inode_init_once(struct inode *inode)
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_devices);
 	INIT_LIST_HEAD(&inode->i_io_list);
+	INIT_LIST_HEAD(&inode->i_wb_list);
 	INIT_LIST_HEAD(&inode->i_lru);
 	address_space_init_once(&inode->i_data);
 	i_size_ordered_init(inode);
@@ -507,6 +508,7 @@ void clear_inode(struct inode *inode)
 	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
+	BUG_ON(!list_empty(&inode->i_wb_list));
 	/* don't need i_lock here, no concurrent mods to i_state */
 	inode->i_state = I_FREEING | I_CLEAR;
 }
diff --git a/fs/internal.h b/fs/internal.h
index b71deeecea17..cef0913e5d41 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -11,6 +11,7 @@
 
 struct super_block;
 struct file_system_type;
+struct iomap;
 struct linux_binprm;
 struct path;
 struct mount;
@@ -39,6 +40,8 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
  * buffer.c
  */
 extern void guard_bio_eod(int rw, struct bio *bio);
+extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
+		get_block_t *get_block, struct iomap *iomap);
 
 /*
  * char_dev.c
@@ -130,6 +133,7 @@ extern int invalidate_inodes(struct super_block *, bool);
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
 extern int d_set_mounted(struct dentry *dentry);
 extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
+extern struct dentry *d_alloc_cursor(struct dentry *);
 
 /*
  * read_write.c
diff --git a/fs/iomap.c b/fs/iomap.c
new file mode 100644
index 000000000000..48141b8eff5f
--- /dev/null
+++ b/fs/iomap.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc.
+ * Copyright (c) 2016 Christoph Hellwig.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/fs.h>
+#include <linux/iomap.h>
+#include <linux/uaccess.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/uio.h>
+#include <linux/backing-dev.h>
+#include <linux/buffer_head.h>
+#include <linux/dax.h>
+#include "internal.h"
+
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+		void *data, struct iomap *iomap);
+
+/*
+ * Execute a iomap write on a segment of the mapping that spans a
+ * contiguous range of pages that have identical block mapping state.
+ *
+ * This avoids the need to map pages individually, do individual allocations
+ * for each page and most importantly avoid the need for filesystem specific
+ * locking per page. Instead, all the operations are amortised over the entire
+ * range of pages. It is assumed that the filesystems will lock whatever
+ * resources they require in the iomap_begin call, and release them in the
+ * iomap_end call.
+ */
+static loff_t
+iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
+		struct iomap_ops *ops, void *data, iomap_actor_t actor)
+{
+	struct iomap iomap = { 0 };
+	loff_t written = 0, ret;
+
+	/*
+	 * Need to map a range from start position for length bytes. This can
+	 * span multiple pages - it is only guaranteed to return a range of a
+	 * single type of pages (e.g. all into a hole, all mapped or all
+	 * unwritten). Failure at this point has nothing to undo.
+	 *
+	 * If allocation is required for this range, reserve the space now so
+	 * that the allocation is guaranteed to succeed later on. Once we copy
+	 * the data into the page cache pages, then we cannot fail otherwise we
+	 * expose transient stale data. If the reserve fails, we can safely
+	 * back out at this point as there is nothing to undo.
+	 */
+	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
+	if (ret)
+		return ret;
+	if (WARN_ON(iomap.offset > pos))
+		return -EIO;
+
+	/*
+	 * Cut down the length to the one actually provided by the filesystem,
+	 * as it might not be able to give us the whole size that we requested.
+	 */
+	if (iomap.offset + iomap.length < pos + length)
+		length = iomap.offset + iomap.length - pos;
+
+	/*
+	 * Now that we have guaranteed that the space allocation will succeed.
+	 * we can do the copy-in page by page without having to worry about
+	 * failures exposing transient data.
+	 */
+	written = actor(inode, pos, length, data, &iomap);
+
+	/*
+	 * Now the data has been copied, commit the range we've copied.  This
+	 * should not fail unless the filesystem has had a fatal error.
+	 */
+	ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0,
+			flags, &iomap);
+
+	return written ? written : ret;
+}
+
+static void
+iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
+{
+	loff_t i_size = i_size_read(inode);
+
+	/*
+	 * Only truncate newly allocated pages beyoned EOF, even if the
+	 * write started inside the existing inode size.
+	 */
+	if (pos + len > i_size)
+		truncate_pagecache_range(inode, max(pos, i_size), pos + len);
+}
+
+static int
+iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, struct iomap *iomap)
+{
+	pgoff_t index = pos >> PAGE_SHIFT;
+	struct page *page;
+	int status = 0;
+
+	BUG_ON(pos + len > iomap->offset + iomap->length);
+
+	page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
+	if (!page)
+		return -ENOMEM;
+
+	status = __block_write_begin_int(page, pos, len, NULL, iomap);
+	if (unlikely(status)) {
+		unlock_page(page);
+		put_page(page);
+		page = NULL;
+
+		iomap_write_failed(inode, pos, len);
+	}
+
+	*pagep = page;
+	return status;
+}
+
+static int
+iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+		unsigned copied, struct page *page)
+{
+	int ret;
+
+	ret = generic_write_end(NULL, inode->i_mapping, pos, len,
+			copied, page, NULL);
+	if (ret < len)
+		iomap_write_failed(inode, pos, len);
+	return ret;
+}
+
+static loff_t
+iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iov_iter *i = data;
+	long status = 0;
+	ssize_t written = 0;
+	unsigned int flags = AOP_FLAG_NOFS;
+
+	/*
+	 * Copies from kernel address space cannot fail (NFSD is a big user).
+	 */
+	if (!iter_is_iovec(i))
+		flags |= AOP_FLAG_UNINTERRUPTIBLE;
+
+	do {
+		struct page *page;
+		unsigned long offset;	/* Offset into pagecache page */
+		unsigned long bytes;	/* Bytes to write to page */
+		size_t copied;		/* Bytes copied from user */
+
+		offset = (pos & (PAGE_SIZE - 1));
+		bytes = min_t(unsigned long, PAGE_SIZE - offset,
+						iov_iter_count(i));
+again:
+		if (bytes > length)
+			bytes = length;
+
+		/*
+		 * Bring in the user page that we will copy from _first_.
+		 * Otherwise there's a nasty deadlock on copying from the
+		 * same page as we're writing to, without it being marked
+		 * up-to-date.
+		 *
+		 * Not only is this an optimisation, but it is also required
+		 * to check that the address is actually valid, when atomic
+		 * usercopies are used, below.
+		 */
+		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+			status = -EFAULT;
+			break;
+		}
+
+		status = iomap_write_begin(inode, pos, bytes, flags, &page,
+				iomap);
+		if (unlikely(status))
+			break;
+
+		if (mapping_writably_mapped(inode->i_mapping))
+			flush_dcache_page(page);
+
+		pagefault_disable();
+		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+		pagefault_enable();
+
+		flush_dcache_page(page);
+		mark_page_accessed(page);
+
+		status = iomap_write_end(inode, pos, bytes, copied, page);
+		if (unlikely(status < 0))
+			break;
+		copied = status;
+
+		cond_resched();
+
+		iov_iter_advance(i, copied);
+		if (unlikely(copied == 0)) {
+			/*
+			 * If we were unable to copy any data at all, we must
+			 * fall back to a single segment length write.
+			 *
+			 * If we didn't fallback here, we could livelock
+			 * because not all segments in the iov can be copied at
+			 * once without a pagefault.
+			 */
+			bytes = min_t(unsigned long, PAGE_SIZE - offset,
+						iov_iter_single_seg_count(i));
+			goto again;
+		}
+		pos += copied;
+		written += copied;
+		length -= copied;
+
+		balance_dirty_pages_ratelimited(inode->i_mapping);
+	} while (iov_iter_count(i) && length);
+
+	return written ? written : status;
+}
+
+ssize_t
+iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops)
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	loff_t pos = iocb->ki_pos, ret = 0, written = 0;
+
+	while (iov_iter_count(iter)) {
+		ret = iomap_apply(inode, pos, iov_iter_count(iter),
+				IOMAP_WRITE, ops, iter, iomap_write_actor);
+		if (ret <= 0)
+			break;
+		pos += ret;
+		written += ret;
+	}
+
+	return written ? written : ret;
+}
+EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
+
+static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
+		unsigned bytes, struct iomap *iomap)
+{
+	struct page *page;
+	int status;
+
+	status = iomap_write_begin(inode, pos, bytes,
+			AOP_FLAG_UNINTERRUPTIBLE | AOP_FLAG_NOFS, &page, iomap);
+	if (status)
+		return status;
+
+	zero_user(page, offset, bytes);
+	mark_page_accessed(page);
+
+	return iomap_write_end(inode, pos, bytes, bytes, page);
+}
+
+static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
+		struct iomap *iomap)
+{
+	sector_t sector = iomap->blkno +
+		(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
+
+	return __dax_zero_page_range(iomap->bdev, sector, offset, bytes);
+}
+
+static loff_t
+iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
+		void *data, struct iomap *iomap)
+{
+	bool *did_zero = data;
+	loff_t written = 0;
+	int status;
+
+	/* already zeroed?  we're done. */
+	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+	    	return count;
+
+	do {
+		unsigned offset, bytes;
+
+		offset = pos & (PAGE_SIZE - 1); /* Within page */
+		bytes = min_t(unsigned, PAGE_SIZE - offset, count);
+
+		if (IS_DAX(inode))
+			status = iomap_dax_zero(pos, offset, bytes, iomap);
+		else
+			status = iomap_zero(inode, pos, offset, bytes, iomap);
+		if (status < 0)
+			return status;
+
+		pos += bytes;
+		count -= bytes;
+		written += bytes;
+		if (did_zero)
+			*did_zero = true;
+	} while (count > 0);
+
+	return written;
+}
+
+int
+iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
+		struct iomap_ops *ops)
+{
+	loff_t ret;
+
+	while (len > 0) {
+		ret = iomap_apply(inode, pos, len, IOMAP_ZERO,
+				ops, did_zero, iomap_zero_range_actor);
+		if (ret <= 0)
+			return ret;
+
+		pos += ret;
+		len -= ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_zero_range);
+
+int
+iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
+		struct iomap_ops *ops)
+{
+	unsigned blocksize = (1 << inode->i_blkbits);
+	unsigned off = pos & (blocksize - 1);
+
+	/* Block boundary? Nothing to do */
+	if (!off)
+		return 0;
+	return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
+}
+EXPORT_SYMBOL_GPL(iomap_truncate_page);
+
+static loff_t
+iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
+		void *data, struct iomap *iomap)
+{
+	struct page *page = data;
+	int ret;
+
+	ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
+			NULL, iomap);
+	if (ret)
+		return ret;
+
+	block_commit_write(page, 0, length);
+	return length;
+}
+
+int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+		struct iomap_ops *ops)
+{
+	struct page *page = vmf->page;
+	struct inode *inode = file_inode(vma->vm_file);
+	unsigned long length;
+	loff_t offset, size;
+	ssize_t ret;
+
+	lock_page(page);
+	size = i_size_read(inode);
+	if ((page->mapping != inode->i_mapping) ||
+	    (page_offset(page) > size)) {
+		/* We overload EFAULT to mean page got truncated */
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	/* page is wholly or partially inside EOF */
+	if (((page->index + 1) << PAGE_SHIFT) > size)
+		length = size & ~PAGE_MASK;
+	else
+		length = PAGE_SIZE;
+
+	offset = page_offset(page);
+	while (length > 0) {
+		ret = iomap_apply(inode, offset, length, IOMAP_WRITE,
+				ops, page, iomap_page_mkwrite_actor);
+		if (unlikely(ret <= 0))
+			goto out_unlock;
+		offset += ret;
+		length -= ret;
+	}
+
+	set_page_dirty(page);
+	wait_for_stable_page(page);
+	return 0;
+out_unlock:
+	unlock_page(page);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
+
+struct fiemap_ctx {
+	struct fiemap_extent_info *fi;
+	struct iomap prev;
+};
+
+static int iomap_to_fiemap(struct fiemap_extent_info *fi,
+		struct iomap *iomap, u32 flags)
+{
+	switch (iomap->type) {
+	case IOMAP_HOLE:
+		/* skip holes */
+		return 0;
+	case IOMAP_DELALLOC:
+		flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN;
+		break;
+	case IOMAP_UNWRITTEN:
+		flags |= FIEMAP_EXTENT_UNWRITTEN;
+		break;
+	case IOMAP_MAPPED:
+		break;
+	}
+
+	return fiemap_fill_next_extent(fi, iomap->offset,
+			iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
+			iomap->length, flags | FIEMAP_EXTENT_MERGED);
+
+}
+
+static loff_t
+iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct fiemap_ctx *ctx = data;
+	loff_t ret = length;
+
+	if (iomap->type == IOMAP_HOLE)
+		return length;
+
+	ret = iomap_to_fiemap(ctx->fi, &ctx->prev, 0);
+	ctx->prev = *iomap;
+	switch (ret) {
+	case 0:		/* success */
+		return length;
+	case 1:		/* extent array full */
+		return 0;
+	default:
+		return ret;
+	}
+}
+
+int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
+		loff_t start, loff_t len, struct iomap_ops *ops)
+{
+	struct fiemap_ctx ctx;
+	loff_t ret;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.fi = fi;
+	ctx.prev.type = IOMAP_HOLE;
+
+	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
+	if (ret)
+		return ret;
+
+	ret = filemap_write_and_wait(inode->i_mapping);
+	if (ret)
+		return ret;
+
+	while (len > 0) {
+		ret = iomap_apply(inode, start, len, 0, ops, &ctx,
+				iomap_fiemap_actor);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			break;
+
+		start += ret;
+		len -= ret;
+	}
+
+	if (ctx.prev.type != IOMAP_HOLE) {
+		ret = iomap_to_fiemap(fi, &ctx.prev, FIEMAP_EXTENT_LAST);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_fiemap);
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 2e4e834d1a98..2ce5b75ee9a5 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -81,7 +81,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
 	blocknum = block_start >> bufshift;
 	memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *));
 	haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
-	ll_rw_block(READ, haveblocks, bhs);
+	ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs);
 
 	curbh = 0;
 	curpage = 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 70078096117d..5bb565f9989c 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -124,7 +124,7 @@ static int journal_submit_commit_record(journal_t *journal,
 	struct commit_header *tmp;
 	struct buffer_head *bh;
 	int ret;
-	struct timespec now = current_kernel_time();
+	struct timespec64 now = current_kernel_time64();
 
 	*cbh = NULL;
 
@@ -155,9 +155,9 @@ static int journal_submit_commit_record(journal_t *journal,
 
 	if (journal->j_flags & JBD2_BARRIER &&
 	    !jbd2_has_feature_async_commit(journal))
-		ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
+		ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC | WRITE_FLUSH_FUA, bh);
 	else
-		ret = submit_bh(WRITE_SYNC, bh);
+		ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
 
 	*cbh = bh;
 	return ret;
@@ -718,7 +718,7 @@ start_journal_io:
 				clear_buffer_dirty(bh);
 				set_buffer_uptodate(bh);
 				bh->b_end_io = journal_end_buffer_io_sync;
-				submit_bh(WRITE_SYNC, bh);
+				submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
 			}
 			cond_resched();
 			stats.run.rs_blocks_logged += bufs;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b31852f76f46..46261a6f902d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -691,6 +691,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 {
 	int err = 0;
 
+	jbd2_might_wait_for_commit(journal);
 	read_lock(&journal->j_state_lock);
 #ifdef CONFIG_JBD2_DEBUG
 	if (!tid_geq(journal->j_commit_request, tid)) {
@@ -1091,6 +1092,7 @@ static void jbd2_stats_proc_exit(journal_t *journal)
 
 static journal_t * journal_init_common (void)
 {
+	static struct lock_class_key jbd2_trans_commit_key;
 	journal_t *journal;
 	int err;
 
@@ -1126,6 +1128,9 @@ static journal_t * journal_init_common (void)
 
 	spin_lock_init(&journal->j_history_lock);
 
+	lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
+			 &jbd2_trans_commit_key, 0);
+
 	return journal;
 }
 
@@ -1346,15 +1351,15 @@ static int journal_reset(journal_t *journal)
 	return jbd2_journal_start_thread(journal);
 }
 
-static int jbd2_write_superblock(journal_t *journal, int write_op)
+static int jbd2_write_superblock(journal_t *journal, int write_flags)
 {
 	struct buffer_head *bh = journal->j_sb_buffer;
 	journal_superblock_t *sb = journal->j_superblock;
 	int ret;
 
-	trace_jbd2_write_superblock(journal, write_op);
+	trace_jbd2_write_superblock(journal, write_flags);
 	if (!(journal->j_flags & JBD2_BARRIER))
-		write_op &= ~(REQ_FUA | REQ_FLUSH);
+		write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
 	lock_buffer(bh);
 	if (buffer_write_io_error(bh)) {
 		/*
@@ -1374,7 +1379,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_op)
 	jbd2_superblock_csum_set(journal, sb);
 	get_bh(bh);
 	bh->b_end_io = end_buffer_write_sync;
-	ret = submit_bh(write_op, bh);
+	ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
 	wait_on_buffer(bh);
 	if (buffer_write_io_error(bh)) {
 		clear_buffer_write_io_error(bh);
@@ -1498,7 +1503,7 @@ static int journal_get_superblock(journal_t *journal)
 
 	J_ASSERT(bh != NULL);
 	if (!buffer_uptodate(bh)) {
-		ll_rw_block(READ, 1, &bh);
+		ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			printk(KERN_ERR
@@ -2329,18 +2334,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
 
 	BUG_ON(size & (size-1)); /* Must be a power of 2 */
 
-	flags |= __GFP_REPEAT;
-	if (size == PAGE_SIZE)
-		ptr = (void *)__get_free_pages(flags, 0);
-	else if (size > PAGE_SIZE) {
-		int order = get_order(size);
-
-		if (order < 3)
-			ptr = (void *)__get_free_pages(flags, order);
-		else
-			ptr = vmalloc(size);
-	} else
+	if (size < PAGE_SIZE)
 		ptr = kmem_cache_alloc(get_slab(size), flags);
+	else
+		ptr = (void *)__get_free_pages(flags, get_order(size));
 
 	/* Check alignment; SLUB has gotten this wrong in the past,
 	 * and this can lead to user data corruption! */
@@ -2351,20 +2348,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
 
 void jbd2_free(void *ptr, size_t size)
 {
-	if (size == PAGE_SIZE) {
-		free_pages((unsigned long)ptr, 0);
-		return;
-	}
-	if (size > PAGE_SIZE) {
-		int order = get_order(size);
-
-		if (order < 3)
-			free_pages((unsigned long)ptr, order);
-		else
-			vfree(ptr);
-		return;
-	}
-	kmem_cache_free(get_slab(size), ptr);
+	if (size < PAGE_SIZE)
+		kmem_cache_free(get_slab(size), ptr);
+	else
+		free_pages((unsigned long)ptr, get_order(size));
 };
 
 /*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 805bc6bcd8ab..02dd3360cb20 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -104,7 +104,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
 			bufs[nbufs++] = bh;
 			if (nbufs == MAXBUF) {
-				ll_rw_block(READ, nbufs, bufs);
+				ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
 				journal_brelse_array(bufs, nbufs);
 				nbufs = 0;
 			}
@@ -113,7 +113,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 	}
 
 	if (nbufs)
-		ll_rw_block(READ, nbufs, bufs);
+		ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
 	err = 0;
 
 failed:
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 1749519b362f..b5bc3e249163 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -182,6 +182,8 @@ static int add_transaction_credits(journal_t *journal, int blocks,
 	int needed;
 	int total = blocks + rsv_blocks;
 
+	jbd2_might_wait_for_commit(journal);
+
 	/*
 	 * If the current transaction is locked down for commit, wait
 	 * for the lock to be released.
@@ -382,13 +384,11 @@ repeat:
 	read_unlock(&journal->j_state_lock);
 	current->journal_info = handle;
 
-	lock_map_acquire(&handle->h_lockdep_map);
+	rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
 	jbd2_journal_free_transaction(new_transaction);
 	return 0;
 }
 
-static struct lock_class_key jbd2_handle_key;
-
 /* Allocate a new handle.  This should probably be in a slab... */
 static handle_t *new_handle(int nblocks)
 {
@@ -398,9 +398,6 @@ static handle_t *new_handle(int nblocks)
 	handle->h_buffer_credits = nblocks;
 	handle->h_ref = 1;
 
-	lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
-						&jbd2_handle_key, 0);
-
 	return handle;
 }
 
@@ -672,7 +669,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
 	if (need_to_start)
 		jbd2_log_start_commit(journal, tid);
 
-	lock_map_release(&handle->h_lockdep_map);
+	rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
 	handle->h_buffer_credits = nblocks;
 	ret = start_this_handle(journal, handle, gfp_mask);
 	return ret;
@@ -700,6 +697,8 @@ void jbd2_journal_lock_updates(journal_t *journal)
 {
 	DEFINE_WAIT(wait);
 
+	jbd2_might_wait_for_commit(journal);
+
 	write_lock(&journal->j_state_lock);
 	++journal->j_barrier_count;
 
@@ -1750,11 +1749,11 @@ int jbd2_journal_stop(handle_t *handle)
 			wake_up(&journal->j_wait_transaction_locked);
 	}
 
+	rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
+
 	if (wait_for_commit)
 		err = jbd2_log_wait_commit(journal, tid);
 
-	lock_map_release(&handle->h_lockdep_map);
-
 	if (handle->h_rsv_handle)
 		jbd2_journal_free_reserved(handle->h_rsv_handle);
 free_and_exit:
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 63759d723920..a74752146ec9 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2002,12 +2002,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
+	bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
 	/*check if journaling to disk has been disabled*/
 	if (log->no_integrity) {
 		bio->bi_iter.bi_size = 0;
 		lbmIODone(bio);
 	} else {
-		submit_bio(READ_SYNC, bio);
+		submit_bio(bio);
 	}
 
 	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
@@ -2145,13 +2146,14 @@ static void lbmStartIO(struct lbuf * bp)
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
 
 	/* check if journaling to disk has been disabled */
 	if (log->no_integrity) {
 		bio->bi_iter.bi_size = 0;
 		lbmIODone(bio);
 	} else {
-		submit_bio(WRITE_SYNC, bio);
+		submit_bio(bio);
 		INCREMENT(lmStat.submitted);
 	}
 }
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index b60e015cc757..e7fa9e513040 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -411,7 +411,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
 			inc_io(page);
 			if (!bio->bi_iter.bi_size)
 				goto dump_bio;
-			submit_bio(WRITE, bio);
+			submit_bio(bio);
 			nr_underway++;
 			bio = NULL;
 		} else
@@ -434,6 +434,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
 		bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
 		bio->bi_end_io = metapage_write_end_io;
 		bio->bi_private = page;
+		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		/* Don't call bio_add_page yet, we may add to this vec */
 		bio_offset = offset;
@@ -448,7 +449,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
 		if (!bio->bi_iter.bi_size)
 			goto dump_bio;
 
-		submit_bio(WRITE, bio);
+		submit_bio(bio);
 		nr_underway++;
 	}
 	if (redirty)
@@ -506,7 +507,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
 				insert_metapage(page, NULL);
 			inc_io(page);
 			if (bio)
-				submit_bio(READ, bio);
+				submit_bio(bio);
 
 			bio = bio_alloc(GFP_NOFS, 1);
 			bio->bi_bdev = inode->i_sb->s_bdev;
@@ -514,6 +515,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
 				pblock << (inode->i_blkbits - 9);
 			bio->bi_end_io = metapage_read_end_io;
 			bio->bi_private = page;
+			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 			len = xlen << inode->i_blkbits;
 			offset = block_offset << inode->i_blkbits;
 			if (bio_add_page(bio, page, len, offset) < len)
@@ -523,7 +525,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
 			block_offset++;
 	}
 	if (bio)
-		submit_bio(READ, bio);
+		submit_bio(bio);
 	else
 		unlock_page(page);
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 3db2721144c2..74dc8b9e7f53 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup);
 
 int dcache_dir_open(struct inode *inode, struct file *file)
 {
-	static struct qstr cursor_name = QSTR_INIT(".", 1);
-
-	file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
+	file->private_data = d_alloc_cursor(file->f_path.dentry);
 
 	return file->private_data ? 0 : -ENOMEM;
 }
@@ -86,6 +84,61 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 }
 EXPORT_SYMBOL(dcache_dir_close);
 
+/* parent is locked at least shared */
+static struct dentry *next_positive(struct dentry *parent,
+				    struct list_head *from,
+				    int count)
+{
+	unsigned *seq = &parent->d_inode->i_dir_seq, n;
+	struct dentry *res;
+	struct list_head *p;
+	bool skipped;
+	int i;
+
+retry:
+	i = count;
+	skipped = false;
+	n = smp_load_acquire(seq) & ~1;
+	res = NULL;
+	rcu_read_lock();
+	for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+		struct dentry *d = list_entry(p, struct dentry, d_child);
+		if (!simple_positive(d)) {
+			skipped = true;
+		} else if (!--i) {
+			res = d;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	if (skipped) {
+		smp_rmb();
+		if (unlikely(*seq != n))
+			goto retry;
+	}
+	return res;
+}
+
+static void move_cursor(struct dentry *cursor, struct list_head *after)
+{
+	struct dentry *parent = cursor->d_parent;
+	unsigned n, *seq = &parent->d_inode->i_dir_seq;
+	spin_lock(&parent->d_lock);
+	for (;;) {
+		n = *seq;
+		if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
+			break;
+		cpu_relax();
+	}
+	__list_del(cursor->d_child.prev, cursor->d_child.next);
+	if (after)
+		list_add(&cursor->d_child, after);
+	else
+		list_add_tail(&cursor->d_child, &parent->d_subdirs);
+	smp_store_release(seq, n + 2);
+	spin_unlock(&parent->d_lock);
+}
+
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry *dentry = file->f_path.dentry;
@@ -101,25 +154,14 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 	if (offset != file->f_pos) {
 		file->f_pos = offset;
 		if (file->f_pos >= 2) {
-			struct list_head *p;
 			struct dentry *cursor = file->private_data;
+			struct dentry *to;
 			loff_t n = file->f_pos - 2;
 
-			spin_lock(&dentry->d_lock);
-			/* d_lock not required for cursor */
-			list_del(&cursor->d_child);
-			p = dentry->d_subdirs.next;
-			while (n && p != &dentry->d_subdirs) {
-				struct dentry *next;
-				next = list_entry(p, struct dentry, d_child);
-				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
-				if (simple_positive(next))
-					n--;
-				spin_unlock(&next->d_lock);
-				p = p->next;
-			}
-			list_add_tail(&cursor->d_child, p);
-			spin_unlock(&dentry->d_lock);
+			inode_lock_shared(dentry->d_inode);
+			to = next_positive(dentry, &dentry->d_subdirs, n);
+			move_cursor(cursor, to ? &to->d_child : NULL);
+			inode_unlock_shared(dentry->d_inode);
 		}
 	}
 	return offset;
@@ -142,36 +184,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct dentry *cursor = file->private_data;
-	struct list_head *p, *q = &cursor->d_child;
+	struct list_head *p = &cursor->d_child;
+	struct dentry *next;
+	bool moved = false;
 
 	if (!dir_emit_dots(file, ctx))
 		return 0;
-	spin_lock(&dentry->d_lock);
-	if (ctx->pos == 2)
-		list_move(q, &dentry->d_subdirs);
 
-	for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
-		struct dentry *next = list_entry(p, struct dentry, d_child);
-		spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
-		if (!simple_positive(next)) {
-			spin_unlock(&next->d_lock);
-			continue;
-		}
-
-		spin_unlock(&next->d_lock);
-		spin_unlock(&dentry->d_lock);
+	if (ctx->pos == 2)
+		p = &dentry->d_subdirs;
+	while ((next = next_positive(dentry, p, 1)) != NULL) {
 		if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
 			      d_inode(next)->i_ino, dt_type(d_inode(next))))
-			return 0;
-		spin_lock(&dentry->d_lock);
-		spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
-		/* next is still alive */
-		list_move(q, p);
-		spin_unlock(&next->d_lock);
-		p = q;
+			break;
+		moved = true;
+		p = &next->d_child;
 		ctx->pos++;
 	}
-	spin_unlock(&dentry->d_lock);
+	if (moved)
+		move_cursor(cursor, p);
 	return 0;
 }
 EXPORT_SYMBOL(dcache_readdir);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 154a107cd376..fc4084ef4736 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = {
 };
 #endif
 
-static void lockd_svc_exit_thread(void)
+static void lockd_unregister_notifiers(void)
 {
 	unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
 #if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
 #endif
+}
+
+static void lockd_svc_exit_thread(void)
+{
+	lockd_unregister_notifiers();
 	svc_exit_thread(nlmsvc_rqst);
 }
 
@@ -462,7 +467,7 @@ int lockd_up(struct net *net)
 	 * Note: svc_serv structures have an initial use count of 1,
 	 * so we exit through here on both success and failure.
 	 */
-err_net:
+err_put:
 	svc_destroy(serv);
 err_create:
 	mutex_unlock(&nlmsvc_mutex);
@@ -470,7 +475,9 @@ err_create:
 
 err_start:
 	lockd_down_net(serv, net);
-	goto err_net;
+err_net:
+	lockd_unregister_notifiers();
+	goto err_put;
 }
 EXPORT_SYMBOL_GPL(lockd_up);
 
diff --git a/fs/locks.c b/fs/locks.c
index 7c5f91be9b65..ee1b15f6fc13 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 {
 	struct file_lock *fl, *my_fl = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct file_lock_context *ctx;
 	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index cc26f8f215f5..a8329cc47dec 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -14,7 +14,7 @@
 
 #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
 
-static int sync_request(struct page *page, struct block_device *bdev, int rw)
+static int sync_request(struct page *page, struct block_device *bdev, int op)
 {
 	struct bio bio;
 	struct bio_vec bio_vec;
@@ -29,8 +29,9 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
 	bio.bi_bdev = bdev;
 	bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9);
 	bio.bi_iter.bi_size = PAGE_SIZE;
+	bio_set_op_attrs(&bio, op, 0);
 
-	return submit_bio_wait(rw, &bio);
+	return submit_bio_wait(&bio);
 }
 
 static int bdev_readpage(void *_sb, struct page *page)
@@ -95,8 +96,9 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
 			bio->bi_iter.bi_sector = ofs >> 9;
 			bio->bi_private = sb;
 			bio->bi_end_io = writeseg_end_io;
+			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 			atomic_inc(&super->s_pending_writes);
-			submit_bio(WRITE, bio);
+			submit_bio(bio);
 
 			ofs += i * PAGE_SIZE;
 			index += i;
@@ -122,8 +124,9 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
 	bio->bi_iter.bi_sector = ofs >> 9;
 	bio->bi_private = sb;
 	bio->bi_end_io = writeseg_end_io;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	atomic_inc(&super->s_pending_writes);
-	submit_bio(WRITE, bio);
+	submit_bio(bio);
 	return 0;
 }
 
@@ -185,8 +188,9 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
 			bio->bi_iter.bi_sector = ofs >> 9;
 			bio->bi_private = sb;
 			bio->bi_end_io = erase_end_io;
+			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 			atomic_inc(&super->s_pending_writes);
-			submit_bio(WRITE, bio);
+			submit_bio(bio);
 
 			ofs += i * PAGE_SIZE;
 			index += i;
@@ -206,8 +210,9 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
 	bio->bi_iter.bi_sector = ofs >> 9;
 	bio->bi_private = sb;
 	bio->bi_end_io = erase_end_io;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	atomic_inc(&super->s_pending_writes);
-	submit_bio(WRITE, bio);
+	submit_bio(bio);
 	return 0;
 }
 
diff --git a/fs/mpage.c b/fs/mpage.c
index eedc644b78d7..2ca1f39c8cba 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -56,11 +56,12 @@ static void mpage_end_io(struct bio *bio)
 	bio_put(bio);
 }
 
-static struct bio *mpage_bio_submit(int rw, struct bio *bio)
+static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
 {
 	bio->bi_end_io = mpage_end_io;
-	guard_bio_eod(rw, bio);
-	submit_bio(rw, bio);
+	bio_set_op_attrs(bio, op, op_flags);
+	guard_bio_eod(op, bio);
+	submit_bio(bio);
 	return NULL;
 }
 
@@ -71,6 +72,8 @@ mpage_alloc(struct block_device *bdev,
 {
 	struct bio *bio;
 
+	/* Restrict the given (page cache) mask for slab allocations */
+	gfp_flags &= GFP_KERNEL;
 	bio = bio_alloc(gfp_flags, nr_vecs);
 
 	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
@@ -269,7 +272,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
 	if (bio && (*last_block_in_bio != blocks[0] - 1))
-		bio = mpage_bio_submit(READ, bio);
+		bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
 
 alloc_new:
 	if (bio == NULL) {
@@ -286,7 +289,7 @@ alloc_new:
 
 	length = first_hole << blkbits;
 	if (bio_add_page(bio, page, length, 0) < length) {
-		bio = mpage_bio_submit(READ, bio);
+		bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
 		goto alloc_new;
 	}
 
@@ -294,7 +297,7 @@ alloc_new:
 	nblocks = map_bh->b_size >> blkbits;
 	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
 	    (first_hole != blocks_per_page))
-		bio = mpage_bio_submit(READ, bio);
+		bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
 	else
 		*last_block_in_bio = blocks[blocks_per_page - 1];
 out:
@@ -302,7 +305,7 @@ out:
 
 confused:
 	if (bio)
-		bio = mpage_bio_submit(READ, bio);
+		bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
 	if (!PageUptodate(page))
 	        block_read_full_page(page, get_block);
 	else
@@ -362,7 +365,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	sector_t last_block_in_bio = 0;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
-	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
+	gfp_t gfp = readahead_gfp_mask(mapping);
 
 	map_bh.b_state = 0;
 	map_bh.b_size = 0;
@@ -384,7 +387,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	}
 	BUG_ON(!list_empty(pages));
 	if (bio)
-		mpage_bio_submit(READ, bio);
+		mpage_bio_submit(REQ_OP_READ, 0, bio);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpages);
@@ -405,7 +408,7 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
 			&map_bh, &first_logical_block, get_block, gfp);
 	if (bio)
-		mpage_bio_submit(READ, bio);
+		mpage_bio_submit(REQ_OP_READ, 0, bio);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpage);
@@ -486,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 	struct buffer_head map_bh;
 	loff_t i_size = i_size_read(inode);
 	int ret = 0;
-	int wr = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
+	int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : 0);
 
 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
@@ -595,7 +598,7 @@ page_is_mapped:
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
 	if (bio && mpd->last_block_in_bio != blocks[0] - 1)
-		bio = mpage_bio_submit(wr, bio);
+		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
 
 alloc_new:
 	if (bio == NULL) {
@@ -622,7 +625,7 @@ alloc_new:
 	wbc_account_io(wbc, page, PAGE_SIZE);
 	length = first_unmapped << blkbits;
 	if (bio_add_page(bio, page, length, 0) < length) {
-		bio = mpage_bio_submit(wr, bio);
+		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
 		goto alloc_new;
 	}
 
@@ -632,7 +635,7 @@ alloc_new:
 	set_page_writeback(page);
 	unlock_page(page);
 	if (boundary || (first_unmapped != blocks_per_page)) {
-		bio = mpage_bio_submit(wr, bio);
+		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
 		if (boundary_block) {
 			write_boundary_block(boundary_bdev,
 					boundary_block, 1 << blkbits);
@@ -644,7 +647,7 @@ alloc_new:
 
 confused:
 	if (bio)
-		bio = mpage_bio_submit(wr, bio);
+		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
 
 	if (mpd->use_writepage) {
 		ret = mapping->a_ops->writepage(page, wbc);
@@ -701,9 +704,9 @@ mpage_writepages(struct address_space *mapping,
 
 		ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
 		if (mpd.bio) {
-			int wr = (wbc->sync_mode == WB_SYNC_ALL ?
-				  WRITE_SYNC : WRITE);
-			mpage_bio_submit(wr, mpd.bio);
+			int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
+				  WRITE_SYNC : 0);
+			mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
 		}
 	}
 	blk_finish_plug(&plug);
@@ -722,9 +725,9 @@ int mpage_writepage(struct page *page, get_block_t get_block,
 	};
 	int ret = __mpage_writepage(page, wbc, &mpd);
 	if (mpd.bio) {
-		int wr = (wbc->sync_mode == WB_SYNC_ALL ?
-			  WRITE_SYNC : WRITE);
-		mpage_bio_submit(wr, mpd.bio);
+		int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
+			  WRITE_SYNC : 0);
+		mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
 	}
 	return ret;
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index a7ec92c051f5..419f746d851d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry)
 		goto out_unlock;
 
 	lock_mount_hash();
+	event++;
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
 		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
@@ -3247,6 +3248,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
 		if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
 			mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
 
+		/* Don't miss readonly hidden in the superblock flags */
+		if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
+			mnt_flags |= MNT_LOCK_READONLY;
+
 		/* Verify the mount flags are equal to or more permissive
 		 * than the proposed new mount.
 		 */
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 17a42e4eb872..f55a4e756047 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -102,14 +102,15 @@ static inline void put_parallel(struct parallel_io *p)
 }
 
 static struct bio *
-bl_submit_bio(int rw, struct bio *bio)
+bl_submit_bio(struct bio *bio)
 {
 	if (bio) {
 		get_parallel(bio->bi_private);
 		dprintk("%s submitting %s bio %u@%llu\n", __func__,
-			rw == READ ? "read" : "write", bio->bi_iter.bi_size,
+			bio_op(bio) == READ ? "read" : "write",
+			bio->bi_iter.bi_size,
 			(unsigned long long)bio->bi_iter.bi_sector);
-		submit_bio(rw, bio);
+		submit_bio(bio);
 	}
 	return NULL;
 }
@@ -158,7 +159,7 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
 	if (disk_addr < map->start || disk_addr >= map->start + map->len) {
 		if (!dev->map(dev, disk_addr, map))
 			return ERR_PTR(-EIO);
-		bio = bl_submit_bio(rw, bio);
+		bio = bl_submit_bio(bio);
 	}
 	disk_addr += map->disk_offset;
 	disk_addr -= map->start;
@@ -174,9 +175,10 @@ retry:
 				disk_addr >> SECTOR_SHIFT, end_io, par);
 		if (!bio)
 			return ERR_PTR(-ENOMEM);
+		bio_set_op_attrs(bio, rw, 0);
 	}
 	if (bio_add_page(bio, page, *len, offset) < *len) {
-		bio = bl_submit_bio(rw, bio);
+		bio = bl_submit_bio(bio);
 		goto retry;
 	}
 	return bio;
@@ -252,7 +254,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
 	for (i = pg_index; i < header->page_array.npages; i++) {
 		if (extent_length <= 0) {
 			/* We've used up the previous extent */
-			bio = bl_submit_bio(READ, bio);
+			bio = bl_submit_bio(bio);
 
 			/* Get the next one */
 			if (!ext_tree_lookup(bl, isect, &be, false)) {
@@ -273,7 +275,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
 		}
 
 		if (is_hole(&be)) {
-			bio = bl_submit_bio(READ, bio);
+			bio = bl_submit_bio(bio);
 			/* Fill hole w/ zeroes w/o accessing device */
 			dprintk("%s Zeroing page for hole\n", __func__);
 			zero_user_segment(pages[i], pg_offset, pg_len);
@@ -306,7 +308,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
 		header->res.count = (isect << SECTOR_SHIFT) - header->args.offset;
 	}
 out:
-	bl_submit_bio(READ, bio);
+	bl_submit_bio(bio);
 	blk_finish_plug(&plug);
 	put_parallel(par);
 	return PNFS_ATTEMPTED;
@@ -398,7 +400,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
 	for (i = pg_index; i < header->page_array.npages; i++) {
 		if (extent_length <= 0) {
 			/* We've used up the previous extent */
-			bio = bl_submit_bio(WRITE, bio);
+			bio = bl_submit_bio(bio);
 			/* Get the next one */
 			if (!ext_tree_lookup(bl, isect, &be, true)) {
 				header->pnfs_error = -EINVAL;
@@ -427,7 +429,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
 
 	header->res.count = header->args.count;
 out:
-	bl_submit_bio(WRITE, bio);
+	bl_submit_bio(bio);
 	blk_finish_plug(&plug);
 	put_parallel(par);
 	return PNFS_ATTEMPTED;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index aaf7bd0cbae2..19d93d0cd400 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
 static
 int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 {
+	struct inode *inode;
 	struct nfs_inode *nfsi;
 
 	if (d_really_is_negative(dentry))
 		return 0;
 
-	nfsi = NFS_I(d_inode(dentry));
+	inode = d_inode(dentry);
+	if (is_bad_inode(inode) || NFS_STALE(inode))
+		return 0;
+
+	nfsi = NFS_I(inode);
 	if (entry->fattr->fileid == nfsi->fileid)
 		return 1;
 	if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
@@ -1363,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations);
 struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
 	struct dentry *res;
-	struct dentry *parent;
 	struct inode *inode = NULL;
 	struct nfs_fh *fhandle = NULL;
 	struct nfs_fattr *fattr = NULL;
@@ -1393,7 +1397,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	if (IS_ERR(label))
 		goto out;
 
-	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	trace_nfs_lookup_enter(dir, dentry, flags);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
@@ -1482,11 +1485,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		    struct file *file, unsigned open_flags,
 		    umode_t mode, int *opened)
 {
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	struct nfs_open_context *ctx;
 	struct dentry *res;
 	struct iattr attr = { .ia_valid = ATTR_OPEN };
 	struct inode *inode;
 	unsigned int lookup_flags = 0;
+	bool switched = false;
 	int err;
 
 	/* Expect a negative dentry */
@@ -1501,7 +1506,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 
 	/* NFS only supports OPEN on regular files */
 	if ((open_flags & O_DIRECTORY)) {
-		if (!d_unhashed(dentry)) {
+		if (!d_in_lookup(dentry)) {
 			/*
 			 * Hashed negative dentry with O_DIRECTORY: dentry was
 			 * revalidated and is fine, no need to perform lookup
@@ -1525,6 +1530,17 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		attr.ia_size = 0;
 	}
 
+	if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
+		d_drop(dentry);
+		switched = true;
+		dentry = d_alloc_parallel(dentry->d_parent,
+					  &dentry->d_name, &wq);
+		if (IS_ERR(dentry))
+			return PTR_ERR(dentry);
+		if (unlikely(!d_in_lookup(dentry)))
+			return finish_no_open(file, dentry);
+	}
+
 	ctx = create_nfs_open_context(dentry, open_flags);
 	err = PTR_ERR(ctx);
 	if (IS_ERR(ctx))
@@ -1536,9 +1552,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		err = PTR_ERR(inode);
 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
 		put_nfs_open_context(ctx);
+		d_drop(dentry);
 		switch (err) {
 		case -ENOENT:
-			d_drop(dentry);
 			d_add(dentry, NULL);
 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 			break;
@@ -1560,14 +1576,23 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
 	put_nfs_open_context(ctx);
 out:
+	if (unlikely(switched)) {
+		d_lookup_done(dentry);
+		dput(dentry);
+	}
 	return err;
 
 no_open:
 	res = nfs_lookup(dir, dentry, lookup_flags);
-	err = PTR_ERR(res);
+	if (switched) {
+		d_lookup_done(dentry);
+		if (!res)
+			res = dentry;
+		else
+			dput(dentry);
+	}
 	if (IS_ERR(res))
-		goto out;
-
+		return PTR_ERR(res);
 	return finish_no_open(file, res);
 }
 EXPORT_SYMBOL_GPL(nfs_atomic_open);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 979b3c4dee6a..c7326c2af2c3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -353,10 +353,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
 
 	result = wait_for_completion_killable(&dreq->completion);
 
+	if (!result) {
+		result = dreq->count;
+		WARN_ON_ONCE(dreq->count < 0);
+	}
 	if (!result)
 		result = dreq->error;
-	if (!result)
-		result = dreq->count;
 
 out:
 	return (ssize_t) result;
@@ -386,8 +388,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
 
 	if (dreq->iocb) {
 		long res = (long) dreq->error;
-		if (!res)
+		if (dreq->count != 0) {
 			res = (long) dreq->count;
+			WARN_ON_ONCE(dreq->count < 0);
+		}
 		dreq->iocb->ki_complete(dreq->iocb, res, 0);
 	}
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 52e7d6869e3b..dda689d7a8a7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque)
 	struct nfs_fattr	*fattr = desc->fattr;
 
 	set_nfs_fileid(inode, fattr->fileid);
+	inode->i_mode = fattr->mode;
 	nfs_copy_fh(NFS_FH(inode), desc->fh);
 	return 0;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de97567795a5..ff416d0e24bc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2882,12 +2882,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 			call_close |= is_wronly;
 		else if (is_wronly)
 			calldata->arg.fmode |= FMODE_WRITE;
+		if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE))
+			call_close |= is_rdwr;
 	} else if (is_rdwr)
 		calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
 
-	if (calldata->arg.fmode == 0)
-		call_close |= is_rdwr;
-
 	if (!nfs4_valid_open_stateid(state))
 		call_close = 0;
 	spin_unlock(&state->owner->so_lock);
@@ -7924,8 +7923,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 			break;
 		}
 		lo = NFS_I(inode)->layout;
-		if (lo && nfs4_stateid_match(&lgp->args.stateid,
-					&lo->plh_stateid)) {
+		if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
+		    nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
 			LIST_HEAD(head);
 
 			/*
@@ -7936,10 +7935,10 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 			pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
 			spin_unlock(&inode->i_lock);
 			pnfs_free_lseg_list(&head);
+			status = -EAGAIN;
+			goto out;
 		} else
 			spin_unlock(&inode->i_lock);
-		status = -EAGAIN;
-		goto out;
 	}
 
 	status = nfs4_handle_exception(server, status, exception);
@@ -8036,7 +8035,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
 		.flags = RPC_TASK_ASYNC,
 	};
 	struct pnfs_layout_segment *lseg = NULL;
-	struct nfs4_exception exception = { .timeout = *timeout };
+	struct nfs4_exception exception = {
+		.inode = inode,
+		.timeout = *timeout,
+	};
 	int status = 0;
 
 	dprintk("--> %s\n", __func__);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9679f4749364..834b875900d6 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1488,9 +1488,9 @@ restart:
 					}
 					spin_unlock(&state->state_lock);
 				}
-				nfs4_put_open_state(state);
 				clear_bit(NFS_STATE_RECLAIM_NOGRACE,
 					&state->flags);
+				nfs4_put_open_state(state);
 				spin_lock(&sp->so_lock);
 				goto restart;
 			}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c7e0d45a4de..0fbe734cc38c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,8 +361,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
 	list_del_init(&lseg->pls_list);
 	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
 	atomic_dec(&lo->plh_refcount);
-	if (list_empty(&lo->plh_segs))
+	if (list_empty(&lo->plh_segs)) {
+		set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
 		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+	}
 	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 }
 
@@ -1290,6 +1292,7 @@ alloc_init_layout_hdr(struct inode *ino,
 	INIT_LIST_HEAD(&lo->plh_bulk_destroy);
 	lo->plh_inode = ino;
 	lo->plh_lc_cred = get_rpccred(ctx->cred);
+	lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
 	return lo;
 }
 
@@ -1297,6 +1300,8 @@ static struct pnfs_layout_hdr *
 pnfs_find_alloc_layout(struct inode *ino,
 		       struct nfs_open_context *ctx,
 		       gfp_t gfp_flags)
+	__releases(&ino->i_lock)
+	__acquires(&ino->i_lock)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct pnfs_layout_hdr *new = NULL;
@@ -1565,8 +1570,7 @@ lookup_again:
 	 * stateid, or it has been invalidated, then we must use the open
 	 * stateid.
 	 */
-	if (lo->plh_stateid.seqid == 0 ||
-	    test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
+	if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
 
 		/*
 		 * The first layoutget for the file. Need to serialize per
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 0dfc476da3e1..b38e3c0dc790 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
 }
 
 /* Helper function for pnfs_generic_commit_pagelist to catch an empty
- * page list. This can happen when two commits race. */
+ * page list. This can happen when two commits race.
+ *
+ * This must be called instead of nfs_init_commit - call one or the other, but
+ * not both!
+ */
 static bool
 pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
 					  struct nfs_commit_data *data,
@@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
 	if (list_empty(pages)) {
 		if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
 			wake_up_atomic_t(&cinfo->mds->rpcs_out);
-		nfs_commitdata_release(data);
+		/* don't call nfs_commitdata_release - it tries to put
+		 * the open_context which is not acquired until nfs_init_commit
+		 * which has not been called on @data */
+		WARN_ON_ONCE(data->context);
+		nfs_commit_free(data);
 		return true;
 	}
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6776d7a7839e..572e5b3b06f1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page)
 		nfs_list_remove_request(new);
 		nfs_readpage_release(new);
 		error = desc->pgio->pg_error;
-		goto out_unlock;
+		goto out;
 	}
 	return 0;
 out_error:
 	error = PTR_ERR(new);
-out_unlock:
 	unlock_page(page);
+out:
 	return error;
 }
 
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 31f3df193bdb..ad2c05e80a83 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -2,6 +2,7 @@
  * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 #include <linux/exportfs.h>
+#include <linux/iomap.h>
 #include <linux/genhd.h>
 #include <linux/slab.h>
 #include <linux/pr.h>
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 6c3b316f932e..4ebaaf4b8d8a 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -3,6 +3,7 @@
  */
 #include <linux/sunrpc/svc.h>
 #include <linux/exportfs.h>
+#include <linux/iomap.h>
 #include <linux/nfs4.h>
 
 #include "nfsd.h"
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1580ea6fd64d..d08cd88155c7 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 		goto out;
 
 	inode = d_inode(fh->fh_dentry);
-	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
-		error = -EOPNOTSUPP;
-		goto out_errno;
-	}
 
 	error = fh_want_write(fh);
 	if (error)
 		goto out_errno;
 
-	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+	fh_lock(fh);
+
+	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
 	if (error)
-		goto out_drop_write;
-	error = inode->i_op->set_acl(inode, argp->acl_default,
-				     ACL_TYPE_DEFAULT);
+		goto out_drop_lock;
+	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
 	if (error)
-		goto out_drop_write;
+		goto out_drop_lock;
+
+	fh_unlock(fh);
 
 	fh_drop_write(fh);
 
@@ -131,7 +130,8 @@ out:
 	posix_acl_release(argp->acl_access);
 	posix_acl_release(argp->acl_default);
 	return nfserr;
-out_drop_write:
+out_drop_lock:
+	fh_unlock(fh);
 	fh_drop_write(fh);
 out_errno:
 	nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 01df4cd7c753..0c890347cde3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 		goto out;
 
 	inode = d_inode(fh->fh_dentry);
-	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
-		error = -EOPNOTSUPP;
-		goto out_errno;
-	}
 
 	error = fh_want_write(fh);
 	if (error)
 		goto out_errno;
 
-	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+	fh_lock(fh);
+
+	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
 	if (error)
-		goto out_drop_write;
-	error = inode->i_op->set_acl(inode, argp->acl_default,
-				     ACL_TYPE_DEFAULT);
+		goto out_drop_lock;
+	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
 
-out_drop_write:
+out_drop_lock:
+	fh_unlock(fh);
 	fh_drop_write(fh);
 out_errno:
 	nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 6adabd6049b7..71292a0d6f09 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	dentry = fhp->fh_dentry;
 	inode = d_inode(dentry);
 
-	if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
-		return nfserr_attrnotsupp;
-
 	if (S_ISDIR(inode->i_mode))
 		flags = NFS4_ACL_DIR;
 
@@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (host_error < 0)
 		goto out_nfserr;
 
-	host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS);
+	fh_lock(fhp);
+
+	host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
 	if (host_error < 0)
-		goto out_release;
+		goto out_drop_lock;
 
 	if (S_ISDIR(inode->i_mode)) {
-		host_error = inode->i_op->set_acl(inode, dpacl,
-						  ACL_TYPE_DEFAULT);
+		host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
 	}
 
-out_release:
+out_drop_lock:
+	fh_unlock(fhp);
+
 	posix_acl_release(pacl);
 	posix_acl_release(dpacl);
 out_nfserr:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 0576033699bc..4cca998ec7a0 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -62,7 +62,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
 }
 
 int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
-			      sector_t pblocknr, int mode,
+			      sector_t pblocknr, int mode, int mode_flags,
 			      struct buffer_head **pbh, sector_t *submit_ptr)
 {
 	struct buffer_head *bh;
@@ -95,7 +95,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
 		}
 	}
 
-	if (mode == READA) {
+	if (mode_flags & REQ_RAHEAD) {
 		if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) {
 			err = -EBUSY; /* internal code */
 			brelse(bh);
@@ -114,7 +114,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
 	bh->b_blocknr = pblocknr; /* set block address for read */
 	bh->b_end_io = end_buffer_read_sync;
 	get_bh(bh);
-	submit_bh(mode, bh);
+	submit_bh(mode, mode_flags, bh);
 	bh->b_blocknr = blocknr; /* set back to the given block address */
 	*submit_ptr = pblocknr;
 	err = 0;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 2cc1b80e18f7..4e8aaa1aeb65 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -43,7 +43,7 @@ void nilfs_btnode_cache_clear(struct address_space *);
 struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
 					      __u64 blocknr);
 int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int,
-			      struct buffer_head **, sector_t *);
+			      int, struct buffer_head **, sector_t *);
 void nilfs_btnode_delete(struct buffer_head *);
 int nilfs_btnode_prepare_change_key(struct address_space *,
 				    struct nilfs_btnode_chkey_ctxt *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index eccb1c89ccbb..982d1e3df3a5 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -476,7 +476,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 	sector_t submit_ptr = 0;
 	int ret;
 
-	ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
+	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
+					&submit_ptr);
 	if (ret) {
 		if (ret != -EEXIST)
 			return ret;
@@ -492,7 +493,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 		     n > 0 && i < ra->ncmax; n--, i++) {
 			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 
-			ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
+			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
+							REQ_OP_READ, REQ_RAHEAD,
 							&ra_bh, &submit_ptr);
 			if (likely(!ret || ret == -EEXIST))
 				brelse(ra_bh);
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 693aded72498..e9148f94d696 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -101,7 +101,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
 	bh->b_blocknr = pbn;
 	bh->b_end_io = end_buffer_read_sync;
 	get_bh(bh);
-	submit_bh(READ, bh);
+	submit_bh(REQ_OP_READ, 0, bh);
 	if (vbn)
 		bh->b_blocknr = vbn;
  out:
@@ -138,7 +138,8 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
 	int ret;
 
 	ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
-					vbn ? : pbn, pbn, READ, out_bh, &pbn);
+					vbn ? : pbn, pbn, REQ_OP_READ, 0,
+					out_bh, &pbn);
 	if (ret == -EEXIST) /* internal code (cache hit) */
 		ret = 0;
 	return ret;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 3417d859a03c..0d7b71fbeff8 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -121,7 +121,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
 
 static int
 nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
-		       int mode, struct buffer_head **out_bh)
+		       int mode, int mode_flags, struct buffer_head **out_bh)
 {
 	struct buffer_head *bh;
 	__u64 blknum = 0;
@@ -135,7 +135,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
 	if (buffer_uptodate(bh))
 		goto out;
 
-	if (mode == READA) {
+	if (mode_flags & REQ_RAHEAD) {
 		if (!trylock_buffer(bh)) {
 			ret = -EBUSY;
 			goto failed_bh;
@@ -157,7 +157,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
 
 	bh->b_end_io = end_buffer_read_sync;
 	get_bh(bh);
-	submit_bh(mode, bh);
+	submit_bh(mode, mode_flags, bh);
 	ret = 0;
 
 	trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode);
@@ -181,7 +181,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
 	int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
 	int err;
 
-	err = nilfs_mdt_submit_block(inode, block, READ, &first_bh);
+	err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, 0, &first_bh);
 	if (err == -EEXIST) /* internal code */
 		goto out;
 
@@ -191,7 +191,8 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
 	if (readahead) {
 		blkoff = block + 1;
 		for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
-			err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
+			err = nilfs_mdt_submit_block(inode, blkoff, REQ_OP_READ,
+						     REQ_RAHEAD, &bh);
 			if (likely(!err || err == -EEXIST))
 				brelse(bh);
 			else if (err != -EBUSY)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index bf36df10540b..a962d7d83447 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -346,7 +346,8 @@ static void nilfs_end_bio_write(struct bio *bio)
 }
 
 static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
-				   struct nilfs_write_info *wi, int mode)
+				   struct nilfs_write_info *wi, int mode,
+				   int mode_flags)
 {
 	struct bio *bio = wi->bio;
 	int err;
@@ -364,7 +365,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
 
 	bio->bi_end_io = nilfs_end_bio_write;
 	bio->bi_private = segbuf;
-	submit_bio(mode, bio);
+	bio_set_op_attrs(bio, mode, mode_flags);
+	submit_bio(bio);
 	segbuf->sb_nbio++;
 
 	wi->bio = NULL;
@@ -437,7 +439,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
 		return 0;
 	}
 	/* bio is FULL */
-	err = nilfs_segbuf_submit_bio(segbuf, wi, mode);
+	err = nilfs_segbuf_submit_bio(segbuf, wi, mode, 0);
 	/* never submit current bh */
 	if (likely(!err))
 		goto repeat;
@@ -461,19 +463,19 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
 {
 	struct nilfs_write_info wi;
 	struct buffer_head *bh;
-	int res = 0, rw = WRITE;
+	int res = 0;
 
 	wi.nilfs = nilfs;
 	nilfs_segbuf_prepare_write(segbuf, &wi);
 
 	list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
-		res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
+		res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE);
 		if (unlikely(res))
 			goto failed_bio;
 	}
 
 	list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
-		res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
+		res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE);
 		if (unlikely(res))
 			goto failed_bio;
 	}
@@ -483,8 +485,8 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
 		 * Last BIO is always sent through the following
 		 * submission.
 		 */
-		rw |= REQ_SYNC;
-		res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
+		res = nilfs_segbuf_submit_bio(segbuf, &wi, REQ_OP_WRITE,
+					      REQ_SYNC);
 	}
 
  failed_bio:
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 809bd2de7ad0..e9fd241b9a0a 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
 	if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
 		return 0;
 	bytes = le16_to_cpu(sbp->s_bytes);
-	if (bytes > BLOCK_SIZE)
+	if (bytes < sumoff + 4 || bytes > BLOCK_SIZE)
 		return 0;
 	crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
 		       sumoff);
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 97768a1379f2..fe251f187ff8 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -362,7 +362,7 @@ handle_zblock:
 		for (i = 0; i < nr; i++) {
 			tbh = arr[i];
 			if (likely(!buffer_uptodate(tbh)))
-				submit_bh(READ, tbh);
+				submit_bh(REQ_OP_READ, 0, tbh);
 			else
 				ntfs_end_buffer_async_read(tbh, 1);
 		}
@@ -877,7 +877,7 @@ lock_retry_remap:
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh(WRITE, bh);
+			submit_bh(REQ_OP_WRITE, 0, bh);
 			need_end_writeback = false;
 		}
 		bh = next;
@@ -1202,7 +1202,7 @@ lock_retry_remap:
 		BUG_ON(!buffer_mapped(tbh));
 		get_bh(tbh);
 		tbh->b_end_io = end_buffer_write_sync;
-		submit_bh(WRITE, tbh);
+		submit_bh(REQ_OP_WRITE, 0, tbh);
 	}
 	/* Synchronize the mft mirror now if not @sync. */
 	if (is_mft && !sync)
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index f2b5e746f49b..f8eb04387ca4 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -670,7 +670,7 @@ lock_retry_remap:
 		}
 		get_bh(tbh);
 		tbh->b_end_io = end_buffer_read_sync;
-		submit_bh(READ, tbh);
+		submit_bh(REQ_OP_READ, 0, tbh);
 	}
 
 	/* Wait for io completion on all buffer heads. */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5622ed5a201e..f548629dfaac 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -553,7 +553,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
 	lock_buffer(bh);
 	get_bh(bh);
 	bh->b_end_io = end_buffer_read_sync;
-	return submit_bh(READ, bh);
+	return submit_bh(REQ_OP_READ, 0, bh);
 }
 
 /**
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 9d71213ca81e..761f12f7f3ef 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -821,7 +821,7 @@ map_vcn:
 			 * completed ignore errors afterwards as we can assume
 			 * that if one buffer worked all of them will work.
 			 */
-			submit_bh(WRITE, bh);
+			submit_bh(REQ_OP_WRITE, 0, bh);
 			if (should_wait) {
 				should_wait = false;
 				wait_on_buffer(bh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 37b2501caaa4..d15d492ce47b 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -592,7 +592,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 			clear_buffer_dirty(tbh);
 			get_bh(tbh);
 			tbh->b_end_io = end_buffer_write_sync;
-			submit_bh(WRITE, tbh);
+			submit_bh(REQ_OP_WRITE, 0, tbh);
 		}
 		/* Wait on i/o completion of buffers. */
 		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
@@ -785,7 +785,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 		clear_buffer_dirty(tbh);
 		get_bh(tbh);
 		tbh->b_end_io = end_buffer_write_sync;
-		submit_bh(WRITE, tbh);
+		submit_bh(REQ_OP_WRITE, 0, tbh);
 	}
 	/* Synchronize the mft mirror now if not @sync. */
 	if (!sync && ni->mft_no < vol->mftmirr_size)
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index e27e6527912b..4342c7ee7d20 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,7 +1,5 @@
 ccflags-y := -Ifs/ocfs2
 
-ccflags-y += -DCATCH_BH_JBD_RACES
-
 obj-$(CONFIG_OCFS2_FS) += 	\
 	ocfs2.o			\
 	ocfs2_stackglue.o
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c034edf3ef38..e97a37179614 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -640,7 +640,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
 			   !buffer_new(bh) &&
 			   ocfs2_should_read_blk(inode, page, block_start) &&
 			   (block_start < from || block_end > to)) {
-			ll_rw_block(READ, 1, &bh);
+			ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 			*wait_bh++=bh;
 		}
 
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index fe50ded1b4ce..8f040f88ade4 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -79,7 +79,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 
 	get_bh(bh); /* for end_buffer_write_sync() */
 	bh->b_end_io = end_buffer_write_sync;
-	submit_bh(WRITE, bh);
+	submit_bh(REQ_OP_WRITE, 0, bh);
 
 	wait_on_buffer(bh);
 
@@ -139,17 +139,22 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 
 		lock_buffer(bh);
 		if (buffer_jbd(bh)) {
+#ifdef CATCH_BH_JBD_RACES
 			mlog(ML_ERROR,
 			     "block %llu had the JBD bit set "
 			     "while I was in lock_buffer!",
 			     (unsigned long long)bh->b_blocknr);
 			BUG();
+#else
+			unlock_buffer(bh);
+			continue;
+#endif
 		}
 
 		clear_buffer_uptodate(bh);
 		get_bh(bh); /* for end_buffer_read_sync() */
 		bh->b_end_io = end_buffer_read_sync;
-		submit_bh(READ, bh);
+		submit_bh(REQ_OP_READ, 0, bh);
 	}
 
 	for (i = nr; i > 0; i--) {
@@ -305,7 +310,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 			if (validate)
 				set_buffer_needs_validate(bh);
 			bh->b_end_io = end_buffer_read_sync;
-			submit_bh(READ, bh);
+			submit_bh(REQ_OP_READ, 0, bh);
 			continue;
 		}
 	}
@@ -419,7 +424,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 	get_bh(bh); /* for end_buffer_write_sync() */
 	bh->b_end_io = end_buffer_write_sync;
 	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
-	submit_bh(WRITE, bh);
+	submit_bh(REQ_OP_WRITE, 0, bh);
 
 	wait_on_buffer(bh);
 
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 6aaf3e351391..636abcbd4650 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -530,7 +530,8 @@ static void o2hb_bio_end_io(struct bio *bio)
 static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
 				      struct o2hb_bio_wait_ctxt *wc,
 				      unsigned int *current_slot,
-				      unsigned int max_slots)
+				      unsigned int max_slots, int op,
+				      int op_flags)
 {
 	int len, current_page;
 	unsigned int vec_len, vec_start;
@@ -556,6 +557,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
 	bio->bi_bdev = reg->hr_bdev;
 	bio->bi_private = wc;
 	bio->bi_end_io = o2hb_bio_end_io;
+	bio_set_op_attrs(bio, op, op_flags);
 
 	vec_start = (cs << bits) % PAGE_SIZE;
 	while(cs < max_slots) {
@@ -591,7 +593,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
 	o2hb_bio_wait_init(&wc);
 
 	while(current_slot < max_slots) {
-		bio = o2hb_setup_one_bio(reg, &wc, &current_slot, max_slots);
+		bio = o2hb_setup_one_bio(reg, &wc, &current_slot, max_slots,
+					 REQ_OP_READ, 0);
 		if (IS_ERR(bio)) {
 			status = PTR_ERR(bio);
 			mlog_errno(status);
@@ -599,7 +602,7 @@ static int o2hb_read_slots(struct o2hb_region *reg,
 		}
 
 		atomic_inc(&wc.wc_num_reqs);
-		submit_bio(READ, bio);
+		submit_bio(bio);
 	}
 
 	status = 0;
@@ -623,7 +626,8 @@ static int o2hb_issue_node_write(struct o2hb_region *reg,
 
 	slot = o2nm_this_node();
 
-	bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1);
+	bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE,
+				 WRITE_SYNC);
 	if (IS_ERR(bio)) {
 		status = PTR_ERR(bio);
 		mlog_errno(status);
@@ -631,7 +635,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg,
 	}
 
 	atomic_inc(&write_wc->wc_num_reqs);
-	submit_bio(WRITE_SYNC, bio);
+	submit_bio(bio);
 
 	status = 0;
 bail:
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4238eb28889f..1d67fcbf7160 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1618,16 +1618,12 @@ static void o2net_start_connect(struct work_struct *work)
 
 	/* watch for racing with tearing a node down */
 	node = o2nm_get_node_by_num(o2net_num_from_nn(nn));
-	if (node == NULL) {
-		ret = 0;
+	if (node == NULL)
 		goto out;
-	}
 
 	mynode = o2nm_get_node_by_num(o2nm_this_node());
-	if (mynode == NULL) {
-		ret = 0;
+	if (mynode == NULL)
 		goto out;
-	}
 
 	spin_lock(&nn->nn_lock);
 	/*
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 825136070d2c..e7b760deefae 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -347,26 +347,6 @@ static struct dentry *dlm_debugfs_root;
 #define DLM_DEBUGFS_PURGE_LIST			"purge_list"
 
 /* begin - utils funcs */
-static void dlm_debug_free(struct kref *kref)
-{
-	struct dlm_debug_ctxt *dc;
-
-	dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt);
-
-	kfree(dc);
-}
-
-static void dlm_debug_put(struct dlm_debug_ctxt *dc)
-{
-	if (dc)
-		kref_put(&dc->debug_refcnt, dlm_debug_free);
-}
-
-static void dlm_debug_get(struct dlm_debug_ctxt *dc)
-{
-	kref_get(&dc->debug_refcnt);
-}
-
 static int debug_release(struct inode *inode, struct file *file)
 {
 	free_page((unsigned long)file->private_data);
@@ -932,11 +912,9 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
 		goto bail;
 	}
 
-	dlm_debug_get(dc);
 	return 0;
 
 bail:
-	dlm_debug_shutdown(dlm);
 	return -ENOMEM;
 }
 
@@ -949,7 +927,8 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
 		debugfs_remove(dc->debug_mle_dentry);
 		debugfs_remove(dc->debug_lockres_dentry);
 		debugfs_remove(dc->debug_state_dentry);
-		dlm_debug_put(dc);
+		kfree(dc);
+		dc = NULL;
 	}
 }
 
@@ -969,7 +948,6 @@ int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
 		mlog_errno(-ENOMEM);
 		goto bail;
 	}
-	kref_init(&dlm->dlm_debug_ctxt->debug_refcnt);
 
 	return 0;
 bail:
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 1f27c4812d1a..5ced5482e7d3 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -30,7 +30,6 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle);
 #ifdef CONFIG_DEBUG_FS
 
 struct dlm_debug_ctxt {
-	struct kref debug_refcnt;
 	struct dentry *debug_state_dentry;
 	struct dentry *debug_lockres_dentry;
 	struct dentry *debug_mle_dentry;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 1eaa9100c889..83d576f6a287 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1635,7 +1635,6 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	BUG_ON(!inode);
 	BUG_ON(!ocfs2_inode_is_new(inode));
 
 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -1665,10 +1664,8 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
 	}
 
 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
-	if (ret) {
+	if (ret)
 		mlog_errno(ret);
-		goto bail;
-	}
 
 bail:
 	return ret;
@@ -1680,8 +1677,6 @@ int ocfs2_rw_lock(struct inode *inode, int write)
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	BUG_ON(!inode);
-
 	mlog(0, "inode %llu take %s RW lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
@@ -1724,8 +1719,6 @@ int ocfs2_open_lock(struct inode *inode)
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	BUG_ON(!inode);
-
 	mlog(0, "inode %llu take PRMODE open lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
@@ -1749,8 +1742,6 @@ int ocfs2_try_open_lock(struct inode *inode, int write)
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	BUG_ON(!inode);
-
 	mlog(0, "inode %llu try to take %s open lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
@@ -2328,8 +2319,6 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct buffer_head *local_bh = NULL;
 
-	BUG_ON(!inode);
-
 	mlog(0, "inode %llu, take %s META lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index d8f3fc8d2551..50cc55047443 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -145,22 +145,15 @@ int ocfs2_drop_inode(struct inode *inode);
 struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff);
 struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
 			 int sysfile_type);
-int ocfs2_inode_init_private(struct inode *inode);
 int ocfs2_inode_revalidate(struct dentry *dentry);
 void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 			  int create_ino);
-void ocfs2_read_inode(struct inode *inode);
-void ocfs2_read_inode2(struct inode *inode, void *opaque);
-ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
-			size_t size, loff_t *offp);
 void ocfs2_sync_blockdev(struct super_block *sb);
 void ocfs2_refresh_inode(struct inode *inode,
 			 struct ocfs2_dinode *fe);
 int ocfs2_mark_inode_dirty(handle_t *handle,
 			   struct inode *inode,
 			   struct buffer_head *bh);
-struct buffer_head *ocfs2_bread(struct inode *inode,
-				int block, int *err, int reada);
 
 void ocfs2_set_inode_flags(struct inode *inode);
 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index e607419cdfa4..a244f14c6b87 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
 	int status = 0;
 	int i;
 	u64 v_blkno, p_blkno, p_blocks, num_blocks;
-#define CONCURRENT_JOURNAL_FILL 32ULL
-	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
-
-	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
+	struct buffer_head *bh = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 	v_blkno = 0;
@@ -1174,29 +1172,32 @@ static int ocfs2_force_read_journal(struct inode *inode)
 			goto bail;
 		}
 
-		if (p_blocks > CONCURRENT_JOURNAL_FILL)
-			p_blocks = CONCURRENT_JOURNAL_FILL;
-
-		/* We are reading journal data which should not
-		 * be put in the uptodate cache */
-		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
-						p_blkno, p_blocks, bhs);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
+		for (i = 0; i < p_blocks; i++, p_blkno++) {
+			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
+					osb->sb->s_blocksize);
+			/* block not cached. */
+			if (!bh)
+				continue;
+
+			brelse(bh);
+			bh = NULL;
+			/* We are reading journal data which should not
+			 * be put in the uptodate cache.
+			 */
+			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
+			if (status < 0) {
+				mlog_errno(status);
+				goto bail;
+			}
 
-		for(i = 0; i < p_blocks; i++) {
-			brelse(bhs[i]);
-			bhs[i] = NULL;
+			brelse(bh);
+			bh = NULL;
 		}
 
 		v_blkno += p_blocks;
 	}
 
 bail:
-	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
-		brelse(bhs[i]);
 	return status;
 }
 
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 13219ed73e1d..52c07346bea3 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -735,8 +735,6 @@ static void __exit ocfs2_stack_glue_exit(void)
 {
 	memset(&locking_max_version, 0,
 	       sizeof(struct ocfs2_protocol_version));
-	locking_max_version.pv_major = 0;
-	locking_max_version.pv_minor = 0;
 	ocfs2_sysfs_exit();
 	if (ocfs2_table_header)
 		unregister_sysctl_table(ocfs2_table_header);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d7cae3327de5..603b28d6f008 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1819,7 +1819,7 @@ static int ocfs2_get_sector(struct super_block *sb,
 	if (!buffer_dirty(*bh))
 		clear_buffer_uptodate(*bh);
 	unlock_buffer(*bh);
-	ll_rw_block(READ, 1, bh);
+	ll_rw_block(REQ_OP_READ, 0, 1, bh);
 	wait_on_buffer(*bh);
 	if (!buffer_uptodate(*bh)) {
 		mlog_errno(-EIO);
@@ -2072,7 +2072,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash);
 
 	osb->sb = sb;
-	/* Save off for ocfs2_rw_direct */
 	osb->s_sectsize_bits = blksize_bits(sector_size);
 	BUG_ON(!osb->s_sectsize_bits);
 
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
index 03f89dbb2512..28f2195cd798 100644
--- a/fs/orangefs/acl.c
+++ b/fs/orangefs/acl.c
@@ -18,10 +18,10 @@ struct posix_acl *orangefs_get_acl(struct inode *inode, int type)
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		key = ORANGEFS_XATTR_NAME_ACL_ACCESS;
+		key = XATTR_NAME_POSIX_ACL_ACCESS;
 		break;
 	case ACL_TYPE_DEFAULT:
-		key = ORANGEFS_XATTR_NAME_ACL_DEFAULT;
+		key = XATTR_NAME_POSIX_ACL_DEFAULT;
 		break;
 	default:
 		gossip_err("orangefs_get_acl: bogus value of type %d\n", type);
@@ -43,11 +43,8 @@ struct posix_acl *orangefs_get_acl(struct inode *inode, int type)
 		     get_khandle_from_ino(inode),
 		     key,
 		     type);
-	ret = orangefs_inode_getxattr(inode,
-				   "",
-				   key,
-				   value,
-				   ORANGEFS_MAX_XATTR_VALUELEN);
+	ret = orangefs_inode_getxattr(inode, key, value,
+				      ORANGEFS_MAX_XATTR_VALUELEN);
 	/* if the key exists, convert it to an in-memory rep */
 	if (ret > 0) {
 		acl = posix_acl_from_xattr(&init_user_ns, value, ret);
@@ -74,7 +71,7 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		name = ORANGEFS_XATTR_NAME_ACL_ACCESS;
+		name = XATTR_NAME_POSIX_ACL_ACCESS;
 		if (acl) {
 			umode_t mode = inode->i_mode;
 			/*
@@ -98,7 +95,7 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 		}
 		break;
 	case ACL_TYPE_DEFAULT:
-		name = ORANGEFS_XATTR_NAME_ACL_DEFAULT;
+		name = XATTR_NAME_POSIX_ACL_DEFAULT;
 		break;
 	default:
 		gossip_err("%s: invalid type %d!\n", __func__, type);
@@ -131,7 +128,7 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	 * will xlate to a removexattr. However, we don't want removexattr
 	 * complain if attributes does not exist.
 	 */
-	error = orangefs_inode_setxattr(inode, "", name, value, size, 0);
+	error = orangefs_inode_setxattr(inode, name, value, size, 0);
 
 out:
 	kfree(value);
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index db170beba797..a287a66d94e3 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -116,6 +116,13 @@ static int orangefs_devreq_open(struct inode *inode, struct file *file)
 {
 	int ret = -EINVAL;
 
+	/* in order to ensure that the filesystem driver sees correct UIDs */
+	if (file->f_cred->user_ns != &init_user_ns) {
+		gossip_err("%s: device cannot be opened outside init_user_ns\n",
+			   __func__);
+		goto out;
+	}
+
 	if (!(file->f_flags & O_NONBLOCK)) {
 		gossip_err("%s: device cannot be opened in blocking mode\n",
 			   __func__);
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 491e82c6f705..526040e09f78 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -516,7 +516,6 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 	if (cmd == FS_IOC_GETFLAGS) {
 		val = 0;
 		ret = orangefs_inode_getxattr(file_inode(file),
-					      ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
 					      "user.pvfs2.meta_hint",
 					      &val, sizeof(val));
 		if (ret < 0 && ret != -ENODATA)
@@ -549,7 +548,6 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 			     "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n",
 			     (unsigned long long)val);
 		ret = orangefs_inode_setxattr(file_inode(file),
-					      ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
 					      "user.pvfs2.meta_hint",
 					      &val, sizeof(val), 0);
 	}
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 85640e955cde..8f2fa94cc4f6 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -80,7 +80,7 @@ static int orangefs_readpages(struct file *file,
 		if (!add_to_page_cache(page,
 				       mapping,
 				       page->index,
-				       GFP_KERNEL)) {
+				       readahead_gfp_mask(mapping))) {
 			ret = read_one_page(page);
 			gossip_debug(GOSSIP_INODE_DEBUG,
 				"failure adding page to cache, read_one_page returned: %d\n",
@@ -124,19 +124,16 @@ static int orangefs_releasepage(struct page *page, gfp_t foo)
  * will need to be able to use O_DIRECT on open in order to support
  * AIO. Modeled after NFS, they do this too.
  */
-/*
- * static ssize_t orangefs_direct_IO(int rw,
- *			struct kiocb *iocb,
- *			struct iov_iter *iter,
- *			loff_t offset)
- *{
- *	gossip_debug(GOSSIP_INODE_DEBUG,
- *		     "orangefs_direct_IO: %s\n",
- *		     iocb->ki_filp->f_path.dentry->d_name.name);
- *
- *	return -EINVAL;
- *}
- */
+
+static ssize_t orangefs_direct_IO(struct kiocb *iocb,
+				  struct iov_iter *iter)
+{
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "orangefs_direct_IO: %s\n",
+		     iocb->ki_filp->f_path.dentry->d_name.name);
+
+	return -EINVAL;
+}
 
 struct backing_dev_info orangefs_backing_dev_info = {
 	.name = "orangefs",
@@ -150,7 +147,7 @@ const struct address_space_operations orangefs_address_operations = {
 	.readpages = orangefs_readpages,
 	.invalidatepage = orangefs_invalidatepage,
 	.releasepage = orangefs_releasepage,
-/*	.direct_IO = orangefs_direct_IO */
+	.direct_IO = orangefs_direct_IO,
 };
 
 static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c
index 900a2e38e11b..b6edbe9fb309 100644
--- a/fs/orangefs/orangefs-cache.c
+++ b/fs/orangefs/orangefs-cache.c
@@ -136,10 +136,10 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type)
 			     llu(new_op->tag),
 			     get_opname_string(new_op));
 
-		new_op->upcall.uid = from_kuid(current_user_ns(),
+		new_op->upcall.uid = from_kuid(&init_user_ns,
 					       current_fsuid());
 
-		new_op->upcall.gid = from_kgid(current_user_ns(),
+		new_op->upcall.gid = from_kgid(&init_user_ns,
 					       current_fsgid());
 	} else {
 		gossip_err("op_alloc: kmem_cache_zalloc failed!\n");
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 2281882f718e..c1181e5529af 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -119,17 +119,6 @@ struct client_debug_mask {
 #define ORANGEFS_CACHE_CREATE_FLAGS 0
 #endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */
 
-/* orangefs xattr and acl related defines */
-#define ORANGEFS_XATTR_INDEX_POSIX_ACL_ACCESS  1
-#define ORANGEFS_XATTR_INDEX_POSIX_ACL_DEFAULT 2
-#define ORANGEFS_XATTR_INDEX_TRUSTED           3
-#define ORANGEFS_XATTR_INDEX_DEFAULT           4
-
-#define ORANGEFS_XATTR_NAME_ACL_ACCESS XATTR_NAME_POSIX_ACL_ACCESS
-#define ORANGEFS_XATTR_NAME_ACL_DEFAULT XATTR_NAME_POSIX_ACL_DEFAULT
-#define ORANGEFS_XATTR_NAME_TRUSTED_PREFIX "trusted."
-#define ORANGEFS_XATTR_NAME_DEFAULT_PREFIX ""
-
 /* these functions are defined in orangefs-utils.c */
 int orangefs_prepare_cdm_array(char *debug_array_string);
 int orangefs_prepare_debugfs_help_string(int);
@@ -528,13 +517,11 @@ __s32 fsid_of_op(struct orangefs_kernel_op_s *op);
 int orangefs_flush_inode(struct inode *inode);
 
 ssize_t orangefs_inode_getxattr(struct inode *inode,
-			     const char *prefix,
 			     const char *name,
 			     void *buffer,
 			     size_t size);
 
 int orangefs_inode_setxattr(struct inode *inode,
-			 const char *prefix,
 			 const char *name,
 			 const void *value,
 			 size_t size,
@@ -600,8 +587,8 @@ int service_operation(struct orangefs_kernel_op_s *op,
 
 #define fill_default_sys_attrs(sys_attr, type, mode)			\
 do {									\
-	sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \
-	sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \
+	sys_attr.owner = from_kuid(&init_user_ns, current_fsuid()); \
+	sys_attr.group = from_kgid(&init_user_ns, current_fsgid()); \
 	sys_attr.perms = ORANGEFS_util_translate_mode(mode);		\
 	sys_attr.mtime = 0;						\
 	sys_attr.atime = 0;						\
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 2d129b5886ee..c5fbc62357c6 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -153,12 +153,12 @@ static inline int copy_attributes_from_inode(struct inode *inode,
 	 */
 	attrs->mask = 0;
 	if (iattr->ia_valid & ATTR_UID) {
-		attrs->owner = from_kuid(current_user_ns(), iattr->ia_uid);
+		attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
 		attrs->mask |= ORANGEFS_ATTR_SYS_UID;
 		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
 	}
 	if (iattr->ia_valid & ATTR_GID) {
-		attrs->group = from_kgid(current_user_ns(), iattr->ia_gid);
+		attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
 		attrs->mask |= ORANGEFS_ATTR_SYS_GID;
 		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
 	}
diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c
index 5893ddde0e4b..2a9f07f06d10 100644
--- a/fs/orangefs/xattr.c
+++ b/fs/orangefs/xattr.c
@@ -59,8 +59,8 @@ static inline int convert_to_internal_xattr_flags(int setxattr_flags)
  * unless the key does not exist for the file and/or if
  * there were errors in fetching the attribute value.
  */
-ssize_t orangefs_inode_getxattr(struct inode *inode, const char *prefix,
-		const char *name, void *buffer, size_t size)
+ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name,
+				void *buffer, size_t size)
 {
 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 	struct orangefs_kernel_op_s *new_op = NULL;
@@ -70,17 +70,17 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *prefix,
 	int fsgid;
 
 	gossip_debug(GOSSIP_XATTR_DEBUG,
-		     "%s: prefix %s name %s, buffer_size %zd\n",
-		     __func__, prefix, name, size);
+		     "%s: name %s, buffer_size %zd\n",
+		     __func__, name, size);
 
-	if ((strlen(name) + strlen(prefix)) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+	if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
 		gossip_err("Invalid key length (%d)\n",
-			   (int)(strlen(name) + strlen(prefix)));
+			   (int)strlen(name));
 		return -EINVAL;
 	}
 
-	fsuid = from_kuid(current_user_ns(), current_fsuid());
-	fsgid = from_kgid(current_user_ns(), current_fsgid());
+	fsuid = from_kuid(&init_user_ns, current_fsuid());
+	fsgid = from_kgid(&init_user_ns, current_fsgid());
 
 	gossip_debug(GOSSIP_XATTR_DEBUG,
 		     "getxattr on inode %pU, name %s "
@@ -97,15 +97,14 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *prefix,
 		goto out_unlock;
 
 	new_op->upcall.req.getxattr.refn = orangefs_inode->refn;
-	ret = snprintf((char *)new_op->upcall.req.getxattr.key,
-		       ORANGEFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name);
+	strcpy(new_op->upcall.req.getxattr.key, name);
 
 	/*
 	 * NOTE: Although keys are meant to be NULL terminated textual
 	 * strings, I am going to explicitly pass the length just in case
 	 * we change this later on...
 	 */
-	new_op->upcall.req.getxattr.key_sz = ret + 1;
+	new_op->upcall.req.getxattr.key_sz = strlen(name) + 1;
 
 	ret = service_operation(new_op, "orangefs_inode_getxattr",
 				get_interruptible_flag(inode));
@@ -163,10 +162,8 @@ out_unlock:
 	return ret;
 }
 
-static int orangefs_inode_removexattr(struct inode *inode,
-			    const char *prefix,
-			    const char *name,
-			    int flags)
+static int orangefs_inode_removexattr(struct inode *inode, const char *name,
+				      int flags)
 {
 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 	struct orangefs_kernel_op_s *new_op = NULL;
@@ -183,12 +180,8 @@ static int orangefs_inode_removexattr(struct inode *inode,
 	 * textual strings, I am going to explicitly pass the
 	 * length just in case we change this later on...
 	 */
-	ret = snprintf((char *)new_op->upcall.req.removexattr.key,
-		       ORANGEFS_MAX_XATTR_NAMELEN,
-		       "%s%s",
-		       (prefix ? prefix : ""),
-		       name);
-	new_op->upcall.req.removexattr.key_sz = ret + 1;
+	strcpy(new_op->upcall.req.removexattr.key, name);
+	new_op->upcall.req.removexattr.key_sz = strlen(name) + 1;
 
 	gossip_debug(GOSSIP_XATTR_DEBUG,
 		     "orangefs_inode_removexattr: key %s, key_sz %d\n",
@@ -223,8 +216,8 @@ out_unlock:
  * Returns a -ve number on error and 0 on success.  Key is text, but value
  * can be binary!
  */
-int orangefs_inode_setxattr(struct inode *inode, const char *prefix,
-		const char *name, const void *value, size_t size, int flags)
+int orangefs_inode_setxattr(struct inode *inode, const char *name,
+			    const void *value, size_t size, int flags)
 {
 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 	struct orangefs_kernel_op_s *new_op;
@@ -232,8 +225,8 @@ int orangefs_inode_setxattr(struct inode *inode, const char *prefix,
 	int ret = -ENOMEM;
 
 	gossip_debug(GOSSIP_XATTR_DEBUG,
-		     "%s: prefix %s, name %s, buffer_size %zd\n",
-		     __func__, prefix, name, size);
+		     "%s: name %s, buffer_size %zd\n",
+		     __func__, name, size);
 
 	if (size >= ORANGEFS_MAX_XATTR_VALUELEN ||
 	    flags < 0) {
@@ -245,29 +238,19 @@ int orangefs_inode_setxattr(struct inode *inode, const char *prefix,
 
 	internal_flag = convert_to_internal_xattr_flags(flags);
 
-	if (prefix) {
-		if (strlen(name) + strlen(prefix) >= ORANGEFS_MAX_XATTR_NAMELEN) {
-			gossip_err
-			    ("orangefs_inode_setxattr: bogus key size (%d)\n",
-			     (int)(strlen(name) + strlen(prefix)));
-			return -EINVAL;
-		}
-	} else {
-		if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
-			gossip_err
-			    ("orangefs_inode_setxattr: bogus key size (%d)\n",
-			     (int)(strlen(name)));
-			return -EINVAL;
-		}
+	if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+		gossip_err
+		    ("orangefs_inode_setxattr: bogus key size (%d)\n",
+		     (int)(strlen(name)));
+		return -EINVAL;
 	}
 
 	/* This is equivalent to a removexattr */
 	if (size == 0 && value == NULL) {
 		gossip_debug(GOSSIP_XATTR_DEBUG,
-			     "removing xattr (%s%s)\n",
-			     prefix,
+			     "removing xattr (%s)\n",
 			     name);
-		return orangefs_inode_removexattr(inode, prefix, name, flags);
+		return orangefs_inode_removexattr(inode, name, flags);
 	}
 
 	gossip_debug(GOSSIP_XATTR_DEBUG,
@@ -288,11 +271,8 @@ int orangefs_inode_setxattr(struct inode *inode, const char *prefix,
 	 * strings, I am going to explicitly pass the length just in
 	 * case we change this later on...
 	 */
-	ret = snprintf((char *)new_op->upcall.req.setxattr.keyval.key,
-		       ORANGEFS_MAX_XATTR_NAMELEN,
-		       "%s%s",
-		       prefix, name);
-	new_op->upcall.req.setxattr.keyval.key_sz = ret + 1;
+	strcpy(new_op->upcall.req.setxattr.keyval.key, name);
+	new_op->upcall.req.setxattr.keyval.key_sz = strlen(name) + 1;
 	memcpy(new_op->upcall.req.setxattr.keyval.val, value, size);
 	new_op->upcall.req.setxattr.keyval.val_sz = size;
 
@@ -455,12 +435,7 @@ static int orangefs_xattr_set_default(const struct xattr_handler *handler,
 				      size_t size,
 				      int flags)
 {
-	return orangefs_inode_setxattr(inode,
-				    ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
-				    name,
-				    buffer,
-				    size,
-				    flags);
+	return orangefs_inode_setxattr(inode, name, buffer, size, flags);
 }
 
 static int orangefs_xattr_get_default(const struct xattr_handler *handler,
@@ -470,57 +445,12 @@ static int orangefs_xattr_get_default(const struct xattr_handler *handler,
 				      void *buffer,
 				      size_t size)
 {
-	return orangefs_inode_getxattr(inode,
-				    ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
-				    name,
-				    buffer,
-				    size);
-
-}
+	return orangefs_inode_getxattr(inode, name, buffer, size);
 
-static int orangefs_xattr_set_trusted(const struct xattr_handler *handler,
-				     struct dentry *unused,
-				     struct inode *inode,
-				     const char *name,
-				     const void *buffer,
-				     size_t size,
-				     int flags)
-{
-	return orangefs_inode_setxattr(inode,
-				    ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
-				    name,
-				    buffer,
-				    size,
-				    flags);
 }
 
-static int orangefs_xattr_get_trusted(const struct xattr_handler *handler,
-				      struct dentry *unused,
-				      struct inode *inode,
-				      const char *name,
-				      void *buffer,
-				      size_t size)
-{
-	return orangefs_inode_getxattr(inode,
-				    ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
-				    name,
-				    buffer,
-				    size);
-}
-
-static struct xattr_handler orangefs_xattr_trusted_handler = {
-	.prefix = ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
-	.get = orangefs_xattr_get_trusted,
-	.set = orangefs_xattr_set_trusted,
-};
-
 static struct xattr_handler orangefs_xattr_default_handler = {
-	/*
-	 * NOTE: this is set to be the empty string.
-	 * so that all un-prefixed xattrs keys get caught
-	 * here!
-	 */
-	.prefix = ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+	.prefix = "",  /* match any name => handlers called with full name */
 	.get = orangefs_xattr_get_default,
 	.set = orangefs_xattr_set_default,
 };
@@ -528,7 +458,6 @@ static struct xattr_handler orangefs_xattr_default_handler = {
 const struct xattr_handler *orangefs_xattr_handlers[] = {
 	&posix_acl_access_xattr_handler,
 	&posix_acl_default_xattr_handler,
-	&orangefs_xattr_trusted_handler,
 	&orangefs_xattr_default_handler,
 	NULL
 };
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index c2a6b0894022..5c9d2d80ff70 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -505,6 +505,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
 	struct dentry *upper;
 	struct dentry *opaquedir = NULL;
 	int err;
+	int flags = 0;
 
 	if (WARN_ON(!workdir))
 		return -EROFS;
@@ -534,46 +535,39 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
 	if (err)
 		goto out_dput;
 
-	whiteout = ovl_whiteout(workdir, dentry);
-	err = PTR_ERR(whiteout);
-	if (IS_ERR(whiteout))
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
 		goto out_unlock;
 
-	upper = ovl_dentry_upper(dentry);
-	if (!upper) {
-		upper = lookup_one_len(dentry->d_name.name, upperdir,
-				       dentry->d_name.len);
-		err = PTR_ERR(upper);
-		if (IS_ERR(upper))
-			goto kill_whiteout;
-
-		err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
-		dput(upper);
-		if (err)
-			goto kill_whiteout;
-	} else {
-		int flags = 0;
+	err = -ESTALE;
+	if ((opaquedir && upper != opaquedir) ||
+	    (!opaquedir && ovl_dentry_upper(dentry) &&
+	     upper != ovl_dentry_upper(dentry))) {
+		goto out_dput_upper;
+	}
 
-		if (opaquedir)
-			upper = opaquedir;
-		err = -ESTALE;
-		if (upper->d_parent != upperdir)
-			goto kill_whiteout;
+	whiteout = ovl_whiteout(workdir, dentry);
+	err = PTR_ERR(whiteout);
+	if (IS_ERR(whiteout))
+		goto out_dput_upper;
 
-		if (is_dir)
-			flags |= RENAME_EXCHANGE;
+	if (d_is_dir(upper))
+		flags = RENAME_EXCHANGE;
 
-		err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
-		if (err)
-			goto kill_whiteout;
+	err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+	if (err)
+		goto kill_whiteout;
+	if (flags)
+		ovl_cleanup(wdir, upper);
 
-		if (is_dir)
-			ovl_cleanup(wdir, upper);
-	}
 	ovl_dentry_version_inc(dentry->d_parent);
 out_d_drop:
 	d_drop(dentry);
 	dput(whiteout);
+out_dput_upper:
+	dput(upper);
 out_unlock:
 	unlock_rename(workdir, upperdir);
 out_dput:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 1dbeab6cf96e..d1cdc60dd68f 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -59,16 +59,40 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		goto out;
 
+	if (attr->ia_valid & ATTR_SIZE) {
+		struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+
+		err = -ETXTBSY;
+		if (atomic_read(&realinode->i_writecount) < 0)
+			goto out_drop_write;
+	}
+
 	err = ovl_copy_up(dentry);
 	if (!err) {
+		struct inode *winode = NULL;
+
 		upperdentry = ovl_dentry_upper(dentry);
 
+		if (attr->ia_valid & ATTR_SIZE) {
+			winode = d_inode(upperdentry);
+			err = get_write_access(winode);
+			if (err)
+				goto out_drop_write;
+		}
+
+		if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+			attr->ia_valid &= ~ATTR_MODE;
+
 		inode_lock(upperdentry->d_inode);
 		err = notify_change(upperdentry, attr, NULL);
 		if (!err)
 			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
 		inode_unlock(upperdentry->d_inode);
+
+		if (winode)
+			put_write_access(winode);
 	}
+out_drop_write:
 	ovl_drop_write(dentry);
 out:
 	return err;
@@ -121,16 +145,18 @@ int ovl_permission(struct inode *inode, int mask)
 
 		err = vfs_getattr(&realpath, &stat);
 		if (err)
-			return err;
+			goto out_dput;
 
+		err = -ESTALE;
 		if ((stat.mode ^ inode->i_mode) & S_IFMT)
-			return -ESTALE;
+			goto out_dput;
 
 		inode->i_mode = stat.mode;
 		inode->i_uid = stat.uid;
 		inode->i_gid = stat.gid;
 
-		return generic_permission(inode, mask);
+		err = generic_permission(inode, mask);
+		goto out_dput;
 	}
 
 	/* Careful in RCU walk mode */
@@ -387,12 +413,11 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
 	if (!inode)
 		return NULL;
 
-	mode &= S_IFMT;
-
 	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_flags |= S_NOATIME | S_NOCMTIME;
 
+	mode &= S_IFMT;
 	switch (mode) {
 	case S_IFDIR:
 		inode->i_private = oe;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4bd9b5ba8f42..cfbca53590d0 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -187,6 +187,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
 {
 	to->i_uid = from->i_uid;
 	to->i_gid = from->i_gid;
+	to->i_mode = from->i_mode;
 }
 
 /* dir.c */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ce02f46029da..9a7693d5f8ff 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1082,11 +1082,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			if (err < 0)
 				goto out_put_workdir;
 
-			if (!err) {
-				pr_err("overlayfs: upper fs needs to support d_type.\n");
-				err = -EINVAL;
-				goto out_put_workdir;
-			}
+			/*
+			 * We allowed this configuration and don't want to
+			 * break users over kernel upgrade. So warn instead
+			 * of erroring out.
+			 */
+			if (!err)
+				pr_warn("overlayfs: upper fs needs to support d_type.\n");
 		}
 	}
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 0d3f5165cb0b..4b32928f5426 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,6 +21,7 @@
 #include <linux/audit.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
+#include <linux/memcontrol.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -137,6 +138,22 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 		put_page(page);
 }
 
+static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
+			       struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	if (page_count(page) == 1) {
+		if (memcg_kmem_enabled()) {
+			memcg_kmem_uncharge(page, 0);
+			__ClearPageKmemcg(page);
+		}
+		__SetPageLocked(page);
+		return 0;
+	}
+	return 1;
+}
+
 /**
  * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
@@ -219,7 +236,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.confirm = generic_pipe_buf_confirm,
 	.release = anon_pipe_buf_release,
-	.steal = generic_pipe_buf_steal,
+	.steal = anon_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
 };
 
@@ -227,7 +244,7 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = {
 	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = anon_pipe_buf_release,
-	.steal = generic_pipe_buf_steal,
+	.steal = anon_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
 };
 
@@ -405,7 +422,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			int copied;
 
 			if (!page) {
-				page = alloc_page(GFP_HIGHUSER);
+				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
 				if (unlikely(!page)) {
 					ret = ret ? : -ENOMEM;
 					break;
@@ -611,7 +628,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
 {
 	struct pipe_inode_info *pipe;
 
-	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
 	if (pipe) {
 		unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
 		struct user_struct *user = get_current_user();
@@ -619,7 +636,9 @@ struct pipe_inode_info *alloc_pipe_info(void)
 		if (!too_many_pipe_buffers_hard(user)) {
 			if (too_many_pipe_buffers_soft(user))
 				pipe_bufs = 1;
-			pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+			pipe->bufs = kcalloc(pipe_bufs,
+					     sizeof(struct pipe_buffer),
+					     GFP_KERNEL_ACCOUNT);
 		}
 
 		if (pipe->bufs) {
@@ -1010,7 +1029,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
 	if (nr_pages < pipe->nrbufs)
 		return -EBUSY;
 
-	bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN);
+	bufs = kcalloc(nr_pages, sizeof(*bufs),
+		       GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
 	if (unlikely(!bufs))
 		return -ENOMEM;
 
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 8a4a266beff3..edc452c2a563 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -820,39 +820,43 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
 	return error;
 }
 
-static int
-posix_acl_xattr_set(const struct xattr_handler *handler,
-		    struct dentry *unused, struct inode *inode,
-		    const char *name, const void *value,
-		    size_t size, int flags)
+int
+set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-	struct posix_acl *acl = NULL;
-	int ret;
-
 	if (!IS_POSIXACL(inode))
 		return -EOPNOTSUPP;
 	if (!inode->i_op->set_acl)
 		return -EOPNOTSUPP;
 
-	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
-		return value ? -EACCES : 0;
+	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+		return acl ? -EACCES : 0;
 	if (!inode_owner_or_capable(inode))
 		return -EPERM;
 
+	if (acl) {
+		int ret = posix_acl_valid(acl);
+		if (ret)
+			return ret;
+	}
+	return inode->i_op->set_acl(inode, acl, type);
+}
+EXPORT_SYMBOL(set_posix_acl);
+
+static int
+posix_acl_xattr_set(const struct xattr_handler *handler,
+		    struct dentry *unused, struct inode *inode,
+		    const char *name, const void *value,
+		    size_t size, int flags)
+{
+	struct posix_acl *acl = NULL;
+	int ret;
+
 	if (value) {
 		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
-
-		if (acl) {
-			ret = posix_acl_valid(acl);
-			if (ret)
-				goto out;
-		}
 	}
-
-	ret = inode->i_op->set_acl(inode, acl, handler->flags);
-out:
+	ret = set_posix_acl(inode, handler->flags, acl);
 	posix_acl_release(acl);
 	return ret;
 }
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 83720460c5bc..cf301a9ef512 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -105,6 +105,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 #endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		"AnonHugePages:  %8lu kB\n"
+		"ShmemHugePages: %8lu kB\n"
+		"ShmemPmdMapped: %8lu kB\n"
 #endif
 #ifdef CONFIG_CMA
 		"CmaTotal:       %8lu kB\n"
@@ -162,8 +164,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		, atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
 #endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		, K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
-		   HPAGE_PMD_NR)
+		, K(global_page_state(NR_ANON_THPS) * HPAGE_PMD_NR)
+		, K(global_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR)
+		, K(global_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR)
 #endif
 #ifdef CONFIG_CMA
 		, K(totalcma_pages)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4648c7f63ae2..187d84ef9de9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -448,6 +448,7 @@ struct mem_size_stats {
 	unsigned long referenced;
 	unsigned long anonymous;
 	unsigned long anonymous_thp;
+	unsigned long shmem_thp;
 	unsigned long swap;
 	unsigned long shared_hugetlb;
 	unsigned long private_hugetlb;
@@ -576,7 +577,12 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 	page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
 	if (IS_ERR_OR_NULL(page))
 		return;
-	mss->anonymous_thp += HPAGE_PMD_SIZE;
+	if (PageAnon(page))
+		mss->anonymous_thp += HPAGE_PMD_SIZE;
+	else if (PageSwapBacked(page))
+		mss->shmem_thp += HPAGE_PMD_SIZE;
+	else
+		VM_BUG_ON_PAGE(1, page);
 	smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
 }
 #else
@@ -770,6 +776,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		   "Referenced:     %8lu kB\n"
 		   "Anonymous:      %8lu kB\n"
 		   "AnonHugePages:  %8lu kB\n"
+		   "ShmemPmdMapped: %8lu kB\n"
 		   "Shared_Hugetlb: %8lu kB\n"
 		   "Private_Hugetlb: %7lu kB\n"
 		   "Swap:           %8lu kB\n"
@@ -787,6 +794,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		   mss.referenced >> 10,
 		   mss.anonymous >> 10,
 		   mss.anonymous_thp >> 10,
+		   mss.shmem_thp >> 10,
 		   mss.shared_hugetlb >> 10,
 		   mss.private_hugetlb >> 10,
 		   mss.swap >> 10,
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 360ae43f590c..be40813eff52 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -1,8 +1,6 @@
 config PSTORE
 	tristate "Persistent store support"
 	default n
-	select ZLIB_DEFLATE
-	select ZLIB_INFLATE
 	help
 	   This option enables generic access to platform level
 	   persistent storage via "pstore" filesystem that can
@@ -14,6 +12,35 @@ config PSTORE
 	   If you don't have a platform persistent store driver,
 	   say N.
 
+choice
+        prompt "Choose compression algorithm"
+        depends on PSTORE
+        default PSTORE_ZLIB_COMPRESS
+        help
+          This option chooses compression algorithm.
+
+config PSTORE_ZLIB_COMPRESS
+        bool "ZLIB"
+        select ZLIB_DEFLATE
+        select ZLIB_INFLATE
+        help
+          This option enables ZLIB compression algorithm support.
+
+config PSTORE_LZO_COMPRESS
+        bool "LZO"
+        select LZO_COMPRESS
+        select LZO_DECOMPRESS
+        help
+          This option enables LZO compression algorithm support.
+
+config PSTORE_LZ4_COMPRESS
+        bool "LZ4"
+        select LZ4_COMPRESS
+        select LZ4_DECOMPRESS
+        help
+          This option enables LZ4 compression algorithm support.
+endchoice
+
 config PSTORE_CONSOLE
 	bool "Log kernel console messages"
 	depends on PSTORE
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 45d6110744cb..ec9ddef5ae75 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -178,7 +178,6 @@ static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
 }
 
 static const struct file_operations pstore_file_operations = {
-	.owner		= THIS_MODULE,
 	.open		= pstore_file_open,
 	.read		= pstore_file_read,
 	.llseek		= pstore_file_llseek,
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 588461bb2dd4..16ecca5b72d8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -28,7 +28,15 @@
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pstore.h>
+#ifdef CONFIG_PSTORE_ZLIB_COMPRESS
 #include <linux/zlib.h>
+#endif
+#ifdef CONFIG_PSTORE_LZO_COMPRESS
+#include <linux/lzo.h>
+#endif
+#ifdef CONFIG_PSTORE_LZ4_COMPRESS
+#include <linux/lz4.h>
+#endif
 #include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
@@ -69,10 +77,23 @@ struct pstore_info *psinfo;
 static char *backend;
 
 /* Compression parameters */
+#ifdef CONFIG_PSTORE_ZLIB_COMPRESS
 #define COMPR_LEVEL 6
 #define WINDOW_BITS 12
 #define MEM_LEVEL 4
 static struct z_stream_s stream;
+#else
+static unsigned char *workspace;
+#endif
+
+struct pstore_zbackend {
+	int (*compress)(const void *in, void *out, size_t inlen, size_t outlen);
+	int (*decompress)(void *in, void *out, size_t inlen, size_t outlen);
+	void (*allocate)(void);
+	void (*free)(void);
+
+	const char *name;
+};
 
 static char *big_oops_buf;
 static size_t big_oops_buf_sz;
@@ -129,9 +150,9 @@ bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
 }
 EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
 
+#ifdef CONFIG_PSTORE_ZLIB_COMPRESS
 /* Derived from logfs_compress() */
-static int pstore_compress(const void *in, void *out, size_t inlen,
-							size_t outlen)
+static int compress_zlib(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int err, ret;
 
@@ -165,7 +186,7 @@ error:
 }
 
 /* Derived from logfs_uncompress */
-static int pstore_decompress(void *in, void *out, size_t inlen, size_t outlen)
+static int decompress_zlib(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int err, ret;
 
@@ -194,7 +215,7 @@ error:
 	return ret;
 }
 
-static void allocate_buf_for_compression(void)
+static void allocate_zlib(void)
 {
 	size_t size;
 	size_t cmpr;
@@ -237,12 +258,190 @@ static void allocate_buf_for_compression(void)
 
 }
 
-static void free_buf_for_compression(void)
+static void free_zlib(void)
 {
 	kfree(stream.workspace);
 	stream.workspace = NULL;
 	kfree(big_oops_buf);
 	big_oops_buf = NULL;
+	big_oops_buf_sz = 0;
+}
+
+static struct pstore_zbackend backend_zlib = {
+	.compress	= compress_zlib,
+	.decompress	= decompress_zlib,
+	.allocate	= allocate_zlib,
+	.free		= free_zlib,
+	.name		= "zlib",
+};
+#endif
+
+#ifdef CONFIG_PSTORE_LZO_COMPRESS
+static int compress_lzo(const void *in, void *out, size_t inlen, size_t outlen)
+{
+	int ret;
+
+	ret = lzo1x_1_compress(in, inlen, out, &outlen, workspace);
+	if (ret != LZO_E_OK) {
+		pr_err("lzo_compress error, ret = %d!\n", ret);
+		return -EIO;
+	}
+
+	return outlen;
+}
+
+static int decompress_lzo(void *in, void *out, size_t inlen, size_t outlen)
+{
+	int ret;
+
+	ret = lzo1x_decompress_safe(in, inlen, out, &outlen);
+	if (ret != LZO_E_OK) {
+		pr_err("lzo_decompress error, ret = %d!\n", ret);
+		return -EIO;
+	}
+
+	return outlen;
+}
+
+static void allocate_lzo(void)
+{
+	big_oops_buf_sz = lzo1x_worst_compress(psinfo->bufsize);
+	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
+	if (big_oops_buf) {
+		workspace = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+		if (!workspace) {
+			pr_err("No memory for compression workspace; skipping compression\n");
+			kfree(big_oops_buf);
+			big_oops_buf = NULL;
+		}
+	} else {
+		pr_err("No memory for uncompressed data; skipping compression\n");
+		workspace = NULL;
+	}
+}
+
+static void free_lzo(void)
+{
+	kfree(workspace);
+	kfree(big_oops_buf);
+	big_oops_buf = NULL;
+	big_oops_buf_sz = 0;
+}
+
+static struct pstore_zbackend backend_lzo = {
+	.compress	= compress_lzo,
+	.decompress	= decompress_lzo,
+	.allocate	= allocate_lzo,
+	.free		= free_lzo,
+	.name		= "lzo",
+};
+#endif
+
+#ifdef CONFIG_PSTORE_LZ4_COMPRESS
+static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
+{
+	int ret;
+
+	ret = lz4_compress(in, inlen, out, &outlen, workspace);
+	if (ret) {
+		pr_err("lz4_compress error, ret = %d!\n", ret);
+		return -EIO;
+	}
+
+	return outlen;
+}
+
+static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
+{
+	int ret;
+
+	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
+	if (ret) {
+		pr_err("lz4_decompress error, ret = %d!\n", ret);
+		return -EIO;
+	}
+
+	return outlen;
+}
+
+static void allocate_lz4(void)
+{
+	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
+	if (big_oops_buf) {
+		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
+		if (!workspace) {
+			pr_err("No memory for compression workspace; skipping compression\n");
+			kfree(big_oops_buf);
+			big_oops_buf = NULL;
+		}
+	} else {
+		pr_err("No memory for uncompressed data; skipping compression\n");
+		workspace = NULL;
+	}
+}
+
+static void free_lz4(void)
+{
+	kfree(workspace);
+	kfree(big_oops_buf);
+	big_oops_buf = NULL;
+	big_oops_buf_sz = 0;
+}
+
+static struct pstore_zbackend backend_lz4 = {
+	.compress	= compress_lz4,
+	.decompress	= decompress_lz4,
+	.allocate	= allocate_lz4,
+	.free		= free_lz4,
+	.name		= "lz4",
+};
+#endif
+
+static struct pstore_zbackend *zbackend =
+#if defined(CONFIG_PSTORE_ZLIB_COMPRESS)
+	&backend_zlib;
+#elif defined(CONFIG_PSTORE_LZO_COMPRESS)
+	&backend_lzo;
+#elif defined(CONFIG_PSTORE_LZ4_COMPRESS)
+	&backend_lz4;
+#else
+	NULL;
+#endif
+
+static int pstore_compress(const void *in, void *out,
+			   size_t inlen, size_t outlen)
+{
+	if (zbackend)
+		return zbackend->compress(in, out, inlen, outlen);
+	else
+		return -EIO;
+}
+
+static int pstore_decompress(void *in, void *out, size_t inlen, size_t outlen)
+{
+	if (zbackend)
+		return zbackend->decompress(in, out, inlen, outlen);
+	else
+		return -EIO;
+}
+
+static void allocate_buf_for_compression(void)
+{
+	if (zbackend) {
+		pr_info("using %s compression\n", zbackend->name);
+		zbackend->allocate();
+	} else {
+		pr_err("allocate compression buffer error!\n");
+	}
+}
+
+static void free_buf_for_compression(void)
+{
+	if (zbackend)
+		zbackend->free();
+	else
+		pr_err("free compression buffer error!\n");
 }
 
 /*
@@ -284,7 +483,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	u64		id;
 	unsigned int	part = 1;
 	unsigned long	flags = 0;
-	int		is_locked = 0;
+	int		is_locked;
 	int		ret;
 
 	why = get_reason_str(reason);
@@ -295,8 +494,10 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 			pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
 				       , in_nmi() ? "NMI" : why);
 		}
-	} else
+	} else {
 		spin_lock_irqsave(&psinfo->buf_lock, flags);
+		is_locked = 1;
+	}
 	oopscount++;
 	while (total < kmsg_bytes) {
 		char *dst;
@@ -304,19 +505,25 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		int hsize;
 		int zipped_len = -1;
 		size_t len;
-		bool compressed;
+		bool compressed = false;
 		size_t total_len;
 
 		if (big_oops_buf && is_locked) {
 			dst = big_oops_buf;
-			hsize = sprintf(dst, "%s#%d Part%u\n", why,
-							oopscount, part);
-			size = big_oops_buf_sz - hsize;
+			size = big_oops_buf_sz;
+		} else {
+			dst = psinfo->buf;
+			size = psinfo->bufsize;
+		}
 
-			if (!kmsg_dump_get_buffer(dumper, true, dst + hsize,
-								size, &len))
-				break;
+		hsize = sprintf(dst, "%s#%d Part%u\n", why, oopscount, part);
+		size -= hsize;
+
+		if (!kmsg_dump_get_buffer(dumper, true, dst + hsize,
+					  size, &len))
+			break;
 
+		if (big_oops_buf && is_locked) {
 			zipped_len = pstore_compress(dst, psinfo->buf,
 						hsize + len, psinfo->bufsize);
 
@@ -324,21 +531,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 				compressed = true;
 				total_len = zipped_len;
 			} else {
-				compressed = false;
 				total_len = copy_kmsg_to_buffer(hsize, len);
 			}
 		} else {
-			dst = psinfo->buf;
-			hsize = sprintf(dst, "%s#%d Part%u\n", why, oopscount,
-									part);
-			size = psinfo->bufsize - hsize;
-			dst += hsize;
-
-			if (!kmsg_dump_get_buffer(dumper, true, dst,
-								size, &len))
-				break;
-
-			compressed = false;
 			total_len = hsize + len;
 		}
 
@@ -350,10 +545,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		total += total_len;
 		part++;
 	}
-	if (pstore_cannot_block_path(reason)) {
-		if (is_locked)
-			spin_unlock_irqrestore(&psinfo->buf_lock, flags);
-	} else
+	if (is_locked)
 		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 }
 
@@ -497,9 +689,11 @@ EXPORT_SYMBOL_GPL(pstore_register);
 
 void pstore_unregister(struct pstore_info *psi)
 {
-	pstore_unregister_pmsg();
-	pstore_unregister_ftrace();
-	pstore_unregister_console();
+	if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) {
+		pstore_unregister_pmsg();
+		pstore_unregister_ftrace();
+		pstore_unregister_console();
+	}
 	pstore_unregister_kmsg();
 
 	free_buf_for_compression();
@@ -527,6 +721,7 @@ void pstore_get_records(int quiet)
 	int			failed = 0, rc;
 	bool			compressed;
 	int			unzipped_len = -1;
+	ssize_t			ecc_notice_size = 0;
 
 	if (!psi)
 		return;
@@ -536,7 +731,7 @@ void pstore_get_records(int quiet)
 		goto out;
 
 	while ((size = psi->read(&id, &type, &count, &time, &buf, &compressed,
-				psi)) > 0) {
+				 &ecc_notice_size, psi)) > 0) {
 		if (compressed && (type == PSTORE_TYPE_DMESG)) {
 			if (big_oops_buf)
 				unzipped_len = pstore_decompress(buf,
@@ -544,6 +739,9 @@ void pstore_get_records(int quiet)
 							big_oops_buf_sz);
 
 			if (unzipped_len > 0) {
+				if (ecc_notice_size)
+					memcpy(big_oops_buf + unzipped_len,
+					       buf + size, ecc_notice_size);
 				kfree(buf);
 				buf = big_oops_buf;
 				size = unzipped_len;
@@ -555,7 +753,8 @@ void pstore_get_records(int quiet)
 			}
 		}
 		rc = pstore_mkfile(type, psi->name, id, count, buf,
-				  compressed, (size_t)size, time, psi);
+				   compressed, size + ecc_notice_size,
+				   time, psi);
 		if (unzipped_len < 0) {
 			/* Free buffer other than big oops */
 			kfree(buf);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index bd9812e83461..47516a794011 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -34,6 +34,8 @@
 #include <linux/slab.h>
 #include <linux/compiler.h>
 #include <linux/pstore_ram.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 
 #define RAMOOPS_KERNMSG_HDR "===="
 #define MIN_MEM_SIZE 4096UL
@@ -181,10 +183,10 @@ static bool prz_ok(struct persistent_ram_zone *prz)
 static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 				   int *count, struct timespec *time,
 				   char **buf, bool *compressed,
+				   ssize_t *ecc_notice_size,
 				   struct pstore_info *psi)
 {
 	ssize_t size;
-	ssize_t ecc_notice_size;
 	struct ramoops_context *cxt = psi->data;
 	struct persistent_ram_zone *prz = NULL;
 	int header_length = 0;
@@ -229,16 +231,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 	size = persistent_ram_old_size(prz) - header_length;
 
 	/* ECC correction notice */
-	ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
+	*ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
 
-	*buf = kmalloc(size + ecc_notice_size + 1, GFP_KERNEL);
+	*buf = kmalloc(size + *ecc_notice_size + 1, GFP_KERNEL);
 	if (*buf == NULL)
 		return -ENOMEM;
 
 	memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size);
-	persistent_ram_ecc_string(prz, *buf + size, ecc_notice_size + 1);
+	persistent_ram_ecc_string(prz, *buf + size, *ecc_notice_size + 1);
 
-	return size + ecc_notice_size;
+	return size;
 }
 
 static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz,
@@ -458,15 +460,98 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
 	return 0;
 }
 
+static int ramoops_parse_dt_size(struct platform_device *pdev,
+				 const char *propname, u32 *value)
+{
+	u32 val32 = 0;
+	int ret;
+
+	ret = of_property_read_u32(pdev->dev.of_node, propname, &val32);
+	if (ret < 0 && ret != -EINVAL) {
+		dev_err(&pdev->dev, "failed to parse property %s: %d\n",
+			propname, ret);
+		return ret;
+	}
+
+	if (val32 > INT_MAX) {
+		dev_err(&pdev->dev, "%s %u > INT_MAX\n", propname, val32);
+		return -EOVERFLOW;
+	}
+
+	*value = val32;
+	return 0;
+}
+
+static int ramoops_parse_dt(struct platform_device *pdev,
+			    struct ramoops_platform_data *pdata)
+{
+	struct device_node *of_node = pdev->dev.of_node;
+	struct device_node *mem_region;
+	struct resource res;
+	u32 value;
+	int ret;
+
+	dev_dbg(&pdev->dev, "using Device Tree\n");
+
+	mem_region = of_parse_phandle(of_node, "memory-region", 0);
+	if (!mem_region) {
+		dev_err(&pdev->dev, "no memory-region phandle\n");
+		return -ENODEV;
+	}
+
+	ret = of_address_to_resource(mem_region, 0, &res);
+	of_node_put(mem_region);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"failed to translate memory-region to resource: %d\n",
+			ret);
+		return ret;
+	}
+
+	pdata->mem_size = resource_size(&res);
+	pdata->mem_address = res.start;
+	pdata->mem_type = of_property_read_bool(of_node, "unbuffered");
+	pdata->dump_oops = !of_property_read_bool(of_node, "no-dump-oops");
+
+#define parse_size(name, field) {					\
+		ret = ramoops_parse_dt_size(pdev, name, &value);	\
+		if (ret < 0)						\
+			return ret;					\
+		field = value;						\
+	}
+
+	parse_size("record-size", pdata->record_size);
+	parse_size("console-size", pdata->console_size);
+	parse_size("ftrace-size", pdata->ftrace_size);
+	parse_size("pmsg-size", pdata->pmsg_size);
+	parse_size("ecc-size", pdata->ecc_info.ecc_size);
+
+#undef parse_size
+
+	return 0;
+}
+
 static int ramoops_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ramoops_platform_data *pdata = pdev->dev.platform_data;
+	struct ramoops_platform_data *pdata = dev->platform_data;
 	struct ramoops_context *cxt = &oops_cxt;
 	size_t dump_mem_sz;
 	phys_addr_t paddr;
 	int err = -EINVAL;
 
+	if (dev_of_node(dev) && !pdata) {
+		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+		if (!pdata) {
+			err = -ENOMEM;
+			goto fail_out;
+		}
+
+		err = ramoops_parse_dt(pdev, pdata);
+		if (err < 0)
+			goto fail_out;
+	}
+
 	/* Only a single ramoops area allowed at a time, so fail extra
 	 * probes.
 	 */
@@ -596,11 +681,17 @@ static int ramoops_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "ramoops" },
+	{}
+};
+
 static struct platform_driver ramoops_driver = {
 	.probe		= ramoops_probe,
 	.remove		= ramoops_remove,
 	.driver		= {
-		.name	= "ramoops",
+		.name		= "ramoops",
+		.of_match_table	= dt_match,
 	},
 };
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 933b53a375b4..66215a7b17cf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1168,6 +1168,15 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
 	return do_compat_preadv64(fd, vec, vlen, pos, 0);
 }
 
+#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
+COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen, loff_t, pos, int, flags)
+{
+	return do_compat_preadv64(fd, vec, vlen, pos, flags);
+}
+#endif
+
 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
 		const struct compat_iovec __user *,vec,
 		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
@@ -1265,6 +1274,15 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
 	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
 }
 
+#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
+COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen, loff_t, pos, int, flags)
+{
+	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
+}
+#endif
+
 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
 		const struct compat_iovec __user *,vec,
 		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 825455d3e4ba..c2c59f9ff04b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2668,7 +2668,7 @@ static int reiserfs_write_full_page(struct page *page,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh(WRITE, bh);
+			submit_bh(REQ_OP_WRITE, 0, bh);
 			nr++;
 		}
 		put_bh(bh);
@@ -2728,7 +2728,7 @@ fail:
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh(WRITE, bh);
+			submit_bh(REQ_OP_WRITE, 0, bh);
 			nr++;
 		}
 		put_bh(bh);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2ace90e981f0..bc2dde2423c2 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -652,7 +652,7 @@ static void submit_logged_buffer(struct buffer_head *bh)
 		BUG();
 	if (!buffer_uptodate(bh))
 		BUG();
-	submit_bh(WRITE, bh);
+	submit_bh(REQ_OP_WRITE, 0, bh);
 }
 
 static void submit_ordered_buffer(struct buffer_head *bh)
@@ -662,7 +662,7 @@ static void submit_ordered_buffer(struct buffer_head *bh)
 	clear_buffer_dirty(bh);
 	if (!buffer_uptodate(bh))
 		BUG();
-	submit_bh(WRITE, bh);
+	submit_bh(REQ_OP_WRITE, 0, bh);
 }
 
 #define CHUNK_SIZE 32
@@ -870,7 +870,7 @@ loop_next:
 		 */
 		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
 			spin_unlock(lock);
-			ll_rw_block(WRITE, 1, &bh);
+			ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
 			spin_lock(lock);
 		}
 		put_bh(bh);
@@ -1057,7 +1057,7 @@ static int flush_commit_list(struct super_block *s,
 		if (tbh) {
 			if (buffer_dirty(tbh)) {
 		            depth = reiserfs_write_unlock_nested(s);
-			    ll_rw_block(WRITE, 1, &tbh);
+			    ll_rw_block(REQ_OP_WRITE, 0, 1, &tbh);
 			    reiserfs_write_lock_nested(s, depth);
 			}
 			put_bh(tbh) ;
@@ -2244,7 +2244,7 @@ abort_replay:
 		}
 	}
 	/* read in the log blocks, memcpy to the corresponding real block */
-	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
+	ll_rw_block(REQ_OP_READ, 0, get_desc_trans_len(desc), log_blocks);
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 
 		wait_on_buffer(log_blocks[i]);
@@ -2269,7 +2269,7 @@ abort_replay:
 	/* flush out the real blocks */
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 		set_buffer_dirty(real_blocks[i]);
-		write_dirty_buffer(real_blocks[i], WRITE);
+		write_dirty_buffer(real_blocks[i], 0);
 	}
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 		wait_on_buffer(real_blocks[i]);
@@ -2346,7 +2346,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
 		} else
 			bhlist[j++] = bh;
 	}
-	ll_rw_block(READ, j, bhlist);
+	ll_rw_block(REQ_OP_READ, 0, j, bhlist);
 	for (i = 1; i < j; i++)
 		brelse(bhlist[i]);
 	bh = bhlist[0];
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 5feacd689241..4032d1e87c8f 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -551,7 +551,7 @@ static int search_by_key_reada(struct super_block *s,
 		if (!buffer_uptodate(bh[j])) {
 			if (depth == -1)
 				depth = reiserfs_write_unlock_nested(s);
-			ll_rw_block(READA, 1, bh + j);
+			ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j);
 		}
 		brelse(bh[j]);
 	}
@@ -660,7 +660,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,
 			if (!buffer_uptodate(bh) && depth == -1)
 				depth = reiserfs_write_unlock_nested(sb);
 
-			ll_rw_block(READ, 1, &bh);
+			ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 			wait_on_buffer(bh);
 
 			if (depth != -1)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index c72c16c5a60f..7a4a85a6821e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1666,7 +1666,7 @@ static int read_super_block(struct super_block *s, int offset)
 /* after journal replay, reread all bitmap and super blocks */
 static int reread_meta_blocks(struct super_block *s)
 {
-	ll_rw_block(READ, 1, &SB_BUFFER_WITH_SB(s));
+	ll_rw_block(REQ_OP_READ, 0, 1, &SB_BUFFER_WITH_SB(s));
 	wait_on_buffer(SB_BUFFER_WITH_SB(s));
 	if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
 		reiserfs_warning(s, "reiserfs-2504", "error reading the super");
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2c2618410d51..ce62a380314f 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -124,7 +124,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 				goto block_release;
 			bytes += msblk->devblksize;
 		}
-		ll_rw_block(READ, b, bh);
+		ll_rw_block(REQ_OP_READ, 0, b, bh);
 	} else {
 		/*
 		 * Metadata block.
@@ -156,7 +156,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 				goto block_release;
 			bytes += msblk->devblksize;
 		}
-		ll_rw_block(READ, b - 1, bh + 1);
+		ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1);
 	}
 
 	for (i = 0; i < b; i++) {
diff --git a/fs/super.c b/fs/super.c
index d78b9847e6cb..5806ffd45563 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -206,6 +206,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	mutex_init(&s->s_sync_lock);
 	INIT_LIST_HEAD(&s->s_inodes);
 	spin_lock_init(&s->s_inode_list_lock);
+	INIT_LIST_HEAD(&s->s_inodes_wb);
+	spin_lock_init(&s->s_inode_wblist_lock);
 
 	if (list_lru_init_memcg(&s->s_dentry_lru))
 		goto fail;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 053818dd6c18..9ae4abb4110b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -390,6 +390,11 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 	     clockid != CLOCK_BOOTTIME_ALARM))
 		return -EINVAL;
 
+	if (!capable(CAP_WAKE_ALARM) &&
+	    (clockid == CLOCK_REALTIME_ALARM ||
+	     clockid == CLOCK_BOOTTIME_ALARM))
+		return -EPERM;
+
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
@@ -433,6 +438,11 @@ static int do_timerfd_settime(int ufd, int flags,
 		return ret;
 	ctx = f.file->private_data;
 
+	if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
+		fdput(f);
+		return -EPERM;
+	}
+
 	timerfd_setup_cancel(ctx, flags);
 
 	/*
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 08316972ff93..7bbf420d1289 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -52,6 +52,7 @@
 #include "ubifs.h"
 #include <linux/mount.h>
 #include <linux/slab.h>
+#include <linux/migrate.h>
 
 static int read_block(struct inode *inode, void *addr, unsigned int block,
 		      struct ubifs_data_node *dn)
@@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page)
 	return ret;
 }
 
+#ifdef CONFIG_MIGRATION
+static int ubifs_migrate_page(struct address_space *mapping,
+		struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+	int rc;
+
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+	if (rc != MIGRATEPAGE_SUCCESS)
+		return rc;
+
+	if (PagePrivate(page)) {
+		ClearPagePrivate(page);
+		SetPagePrivate(newpage);
+	}
+
+	migrate_page_copy(newpage, page);
+	return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
 static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 {
 	/*
@@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = {
 	.write_end      = ubifs_write_end,
 	.invalidatepage = ubifs_invalidatepage,
 	.set_page_dirty = ubifs_set_page_dirty,
+#ifdef CONFIG_MIGRATION
+	.migratepage	= ubifs_migrate_page,
+#endif
 	.releasepage    = ubifs_releasepage,
 };
 
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 4c5593abc553..aaec13c95253 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -113,7 +113,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
 					brelse(tmp);
 			}
 			if (num) {
-				ll_rw_block(READA, num, bha);
+				ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha);
 				for (i = 0; i < num; i++)
 					brelse(bha[i]);
 			}
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index c763fda257bf..988d5352bdb8 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -87,7 +87,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 					brelse(tmp);
 			}
 			if (num) {
-				ll_rw_block(READA, num, bha);
+				ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha);
 				for (i = 0; i < num; i++)
 					brelse(bha[i]);
 			}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index f323aff740ef..55aa587bbc38 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1199,7 +1199,7 @@ struct buffer_head *udf_bread(struct inode *inode, int block,
 	if (buffer_uptodate(bh))
 		return bh;
 
-	ll_rw_block(READ, 1, &bh);
+	ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 
 	wait_on_buffer(bh);
 	if (buffer_uptodate(bh))
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 0447b949c7f5..67e085d591d8 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -292,7 +292,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
 			if (!buffer_mapped(bh))
 					map_bh(bh, inode->i_sb, oldb + pos);
 			if (!buffer_uptodate(bh)) {
-				ll_rw_block(READ, 1, &bh);
+				ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 				wait_on_buffer(bh);
 				if (!buffer_uptodate(bh)) {
 					ufs_error(inode->i_sb, __func__,
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index a409e3e7827a..f41ad0a6106f 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -118,7 +118,7 @@ void ubh_sync_block(struct ufs_buffer_head *ubh)
 		unsigned i;
 
 		for (i = 0; i < ubh->count; i++)
-			write_dirty_buffer(ubh->bh[i], WRITE);
+			write_dirty_buffer(ubh->bh[i], 0);
 
 		for (i = 0; i < ubh->count; i++)
 			wait_on_buffer(ubh->bh[i]);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 2d97952e341a..85959d8324df 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -257,10 +257,9 @@ out:
  * fatal_signal_pending()s, and the mmap_sem must be released before
  * returning it.
  */
-int handle_userfault(struct vm_area_struct *vma, unsigned long address,
-		     unsigned int flags, unsigned long reason)
+int handle_userfault(struct fault_env *fe, unsigned long reason)
 {
-	struct mm_struct *mm = vma->vm_mm;
+	struct mm_struct *mm = fe->vma->vm_mm;
 	struct userfaultfd_ctx *ctx;
 	struct userfaultfd_wait_queue uwq;
 	int ret;
@@ -269,7 +268,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 	BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
 	ret = VM_FAULT_SIGBUS;
-	ctx = vma->vm_userfaultfd_ctx.ctx;
+	ctx = fe->vma->vm_userfaultfd_ctx.ctx;
 	if (!ctx)
 		goto out;
 
@@ -302,17 +301,17 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 	 * without first stopping userland access to the memory. For
 	 * VM_UFFD_MISSING userfaults this is enough for now.
 	 */
-	if (unlikely(!(flags & FAULT_FLAG_ALLOW_RETRY))) {
+	if (unlikely(!(fe->flags & FAULT_FLAG_ALLOW_RETRY))) {
 		/*
 		 * Validate the invariant that nowait must allow retry
 		 * to be sure not to return SIGBUS erroneously on
 		 * nowait invocations.
 		 */
-		BUG_ON(flags & FAULT_FLAG_RETRY_NOWAIT);
+		BUG_ON(fe->flags & FAULT_FLAG_RETRY_NOWAIT);
 #ifdef CONFIG_DEBUG_VM
 		if (printk_ratelimit()) {
 			printk(KERN_WARNING
-			       "FAULT_FLAG_ALLOW_RETRY missing %x\n", flags);
+			       "FAULT_FLAG_ALLOW_RETRY missing %x\n", fe->flags);
 			dump_stack();
 		}
 #endif
@@ -324,7 +323,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 	 * and wait.
 	 */
 	ret = VM_FAULT_RETRY;
-	if (flags & FAULT_FLAG_RETRY_NOWAIT)
+	if (fe->flags & FAULT_FLAG_RETRY_NOWAIT)
 		goto out;
 
 	/* take the reference before dropping the mmap_sem */
@@ -332,10 +331,11 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 
 	init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
 	uwq.wq.private = current;
-	uwq.msg = userfault_msg(address, flags, reason);
+	uwq.msg = userfault_msg(fe->address, fe->flags, reason);
 	uwq.ctx = ctx;
 
-	return_to_userland = (flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
+	return_to_userland =
+		(fe->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
 		(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
 
 	spin_lock(&ctx->fault_pending_wqh.lock);
@@ -353,7 +353,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 			  TASK_KILLABLE);
 	spin_unlock(&ctx->fault_pending_wqh.lock);
 
-	must_wait = userfaultfd_must_wait(ctx, address, flags, reason);
+	must_wait = userfaultfd_must_wait(ctx, fe->address, fe->flags, reason);
 	up_read(&mm->mmap_sem);
 
 	if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5d47b4df61ea..35faf128f36d 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -4,6 +4,7 @@ config XFS_FS
 	depends on (64BIT || LBDAF)
 	select EXPORTFS
 	select LIBCRC32C
+	select FS_IOMAP
 	help
 	  XFS is a high performance journaling filesystem which originated
 	  on the SGI IRIX platform.  It is completely multi-threaded, can
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index a708e38b494c..88c26b827a2d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -84,7 +84,7 @@ xfs_alloc_lookup_ge(
  * Lookup the first record less than or equal to [bno, len]
  * in the btree given by cur.
  */
-int					/* error */
+static int				/* error */
 xfs_alloc_lookup_le(
 	struct xfs_btree_cur	*cur,	/* btree cursor */
 	xfs_agblock_t		bno,	/* starting block of extent */
@@ -1839,19 +1839,8 @@ void
 xfs_alloc_compute_maxlevels(
 	xfs_mount_t	*mp)	/* file system mount structure */
 {
-	int		level;
-	uint		maxblocks;
-	uint		maxleafents;
-	int		minleafrecs;
-	int		minnoderecs;
-
-	maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
-	minleafrecs = mp->m_alloc_mnr[0];
-	minnoderecs = mp->m_alloc_mnr[1];
-	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
-	for (level = 1; maxblocks > 1; level++)
-		maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
-	mp->m_ag_maxlevels = level;
+	mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
+			(mp->m_sb.sb_agblocks + 1) / 2);
 }
 
 /*
@@ -2658,55 +2647,79 @@ error0:
 	return error;
 }
 
-/*
- * Free an extent.
- * Just break up the extent address and hand off to xfs_free_ag_extent
- * after fixing up the freelist.
- */
-int				/* error */
-xfs_free_extent(
-	xfs_trans_t	*tp,	/* transaction pointer */
-	xfs_fsblock_t	bno,	/* starting block number of extent */
-	xfs_extlen_t	len)	/* length of extent */
+/* Ensure that the freelist is at full capacity. */
+int
+xfs_free_extent_fix_freelist(
+	struct xfs_trans	*tp,
+	xfs_agnumber_t		agno,
+	struct xfs_buf		**agbp)
 {
-	xfs_alloc_arg_t	args;
-	int		error;
+	struct xfs_alloc_arg	args;
+	int			error;
 
-	ASSERT(len != 0);
-	memset(&args, 0, sizeof(xfs_alloc_arg_t));
+	memset(&args, 0, sizeof(struct xfs_alloc_arg));
 	args.tp = tp;
 	args.mp = tp->t_mountp;
+	args.agno = agno;
 
 	/*
 	 * validate that the block number is legal - the enables us to detect
 	 * and handle a silent filesystem corruption rather than crashing.
 	 */
-	args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
 	if (args.agno >= args.mp->m_sb.sb_agcount)
 		return -EFSCORRUPTED;
 
-	args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
-	if (args.agbno >= args.mp->m_sb.sb_agblocks)
-		return -EFSCORRUPTED;
-
 	args.pag = xfs_perag_get(args.mp, args.agno);
 	ASSERT(args.pag);
 
 	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
 	if (error)
-		goto error0;
+		goto out;
+
+	*agbp = args.agbp;
+out:
+	xfs_perag_put(args.pag);
+	return error;
+}
+
+/*
+ * Free an extent.
+ * Just break up the extent address and hand off to xfs_free_ag_extent
+ * after fixing up the freelist.
+ */
+int				/* error */
+xfs_free_extent(
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_fsblock_t		bno,	/* starting block number of extent */
+	xfs_extlen_t		len)	/* length of extent */
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_buf		*agbp;
+	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(mp, bno);
+	xfs_agblock_t		agbno = XFS_FSB_TO_AGBNO(mp, bno);
+	int			error;
+
+	ASSERT(len != 0);
+
+	error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
+	if (error)
+		return error;
+
+	XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
 
 	/* validate the extent size is legal now we have the agf locked */
-	if (args.agbno + len >
-			be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
-		error = -EFSCORRUPTED;
-		goto error0;
-	}
+	XFS_WANT_CORRUPTED_GOTO(mp,
+		agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
+				err);
 
-	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
-	if (!error)
-		xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
-error0:
-	xfs_perag_put(args.pag);
+	error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, 0);
+	if (error)
+		goto err;
+
+	xfs_extent_busy_insert(tp, agno, agbno, len, 0);
+	return 0;
+
+err:
+	xfs_trans_brelse(tp, agbp);
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 135eb3d24db7..cf268b2d0b6c 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -212,13 +212,6 @@ xfs_free_extent(
 	xfs_fsblock_t	bno,	/* starting block number of extent */
 	xfs_extlen_t	len);	/* length of extent */
 
-int					/* error */
-xfs_alloc_lookup_le(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agblock_t		bno,	/* starting block of extent */
-	xfs_extlen_t		len,	/* length of extent */
-	int			*stat);	/* success/failure */
-
 int				/* error */
 xfs_alloc_lookup_ge(
 	struct xfs_btree_cur	*cur,	/* btree cursor */
@@ -236,5 +229,7 @@ xfs_alloc_get_rec(
 int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
 			xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
 int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
+int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
+		struct xfs_buf **agbp);
 
 #endif	/* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 882c8d338891..4f2aed04f827 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -50,7 +50,6 @@ int	xfs_attr_shortform_lookup(struct xfs_da_args *args);
 int	xfs_attr_shortform_getvalue(struct xfs_da_args *args);
 int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
 int	xfs_attr_shortform_remove(struct xfs_da_args *args);
-int	xfs_attr_shortform_list(struct xfs_attr_list_context *context);
 int	xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int	xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
 void	xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
@@ -88,8 +87,6 @@ int	xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval);
 void	xfs_attr3_leaf_unbalance(struct xfs_da_state *state,
 				       struct xfs_da_state_blk *drop_blk,
 				       struct xfs_da_state_blk *save_blk);
-int	xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
-
 /*
  * Utility routines.
  */
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 932381caef1b..2f2c85cc8117 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -570,14 +570,12 @@ xfs_bmap_validate_ret(
  */
 void
 xfs_bmap_add_free(
+	struct xfs_mount	*mp,		/* mount point structure */
+	struct xfs_bmap_free	*flist,		/* list of extents */
 	xfs_fsblock_t		bno,		/* fs block number of extent */
-	xfs_filblks_t		len,		/* length of extent */
-	xfs_bmap_free_t		*flist,		/* list of extents */
-	xfs_mount_t		*mp)		/* mount point structure */
+	xfs_filblks_t		len)		/* length of extent */
 {
-	xfs_bmap_free_item_t	*cur;		/* current (next) element */
-	xfs_bmap_free_item_t	*new;		/* new element */
-	xfs_bmap_free_item_t	*prev;		/* previous element */
+	struct xfs_bmap_free_item	*new;		/* new element */
 #ifdef DEBUG
 	xfs_agnumber_t		agno;
 	xfs_agblock_t		agbno;
@@ -597,17 +595,7 @@ xfs_bmap_add_free(
 	new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
 	new->xbfi_startblock = bno;
 	new->xbfi_blockcount = (xfs_extlen_t)len;
-	for (prev = NULL, cur = flist->xbf_first;
-	     cur != NULL;
-	     prev = cur, cur = cur->xbfi_next) {
-		if (cur->xbfi_startblock >= bno)
-			break;
-	}
-	if (prev)
-		prev->xbfi_next = new;
-	else
-		flist->xbf_first = new;
-	new->xbfi_next = cur;
+	list_add(&new->xbfi_list, &flist->xbf_flist);
 	flist->xbf_count++;
 }
 
@@ -617,14 +605,10 @@ xfs_bmap_add_free(
  */
 void
 xfs_bmap_del_free(
-	xfs_bmap_free_t		*flist,	/* free item list header */
-	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
-	xfs_bmap_free_item_t	*free)	/* list item to be freed */
+	struct xfs_bmap_free		*flist,	/* free item list header */
+	struct xfs_bmap_free_item	*free)	/* list item to be freed */
 {
-	if (prev)
-		prev->xbfi_next = free->xbfi_next;
-	else
-		flist->xbf_first = free->xbfi_next;
+	list_del(&free->xbfi_list);
 	flist->xbf_count--;
 	kmem_zone_free(xfs_bmap_free_item_zone, free);
 }
@@ -634,17 +618,16 @@ xfs_bmap_del_free(
  */
 void
 xfs_bmap_cancel(
-	xfs_bmap_free_t		*flist)	/* list of bmap_free_items */
+	struct xfs_bmap_free		*flist)	/* list of bmap_free_items */
 {
-	xfs_bmap_free_item_t	*free;	/* free list item */
-	xfs_bmap_free_item_t	*next;
+	struct xfs_bmap_free_item	*free;	/* free list item */
 
 	if (flist->xbf_count == 0)
 		return;
-	ASSERT(flist->xbf_first != NULL);
-	for (free = flist->xbf_first; free; free = next) {
-		next = free->xbfi_next;
-		xfs_bmap_del_free(flist, NULL, free);
+	while (!list_empty(&flist->xbf_flist)) {
+		free = list_first_entry(&flist->xbf_flist,
+				struct xfs_bmap_free_item, xbfi_list);
+		xfs_bmap_del_free(flist, free);
 	}
 	ASSERT(flist->xbf_count == 0);
 }
@@ -699,7 +682,7 @@ xfs_bmap_btree_to_extents(
 	cblock = XFS_BUF_TO_BLOCK(cbp);
 	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 		return error;
-	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
+	xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1);
 	ip->i_d.di_nblocks--;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
@@ -5073,8 +5056,8 @@ xfs_bmap_del_extent(
 	 * If we need to, add to list of extents to delete.
 	 */
 	if (do_fx)
-		xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
-			mp);
+		xfs_bmap_add_free(mp, flist, del->br_startblock,
+			del->br_blockcount);
 	/*
 	 * Adjust inode # blocks in the file.
 	 */
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 423a34e832bd..f1f3ae6c0a3f 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -62,12 +62,12 @@ struct xfs_bmalloca {
  * List of extents to be free "later".
  * The list is kept sorted on xbf_startblock.
  */
-typedef struct xfs_bmap_free_item
+struct xfs_bmap_free_item
 {
 	xfs_fsblock_t		xbfi_startblock;/* starting fs block number */
 	xfs_extlen_t		xbfi_blockcount;/* number of blocks in extent */
-	struct xfs_bmap_free_item *xbfi_next;	/* link to next entry */
-} xfs_bmap_free_item_t;
+	struct list_head	xbfi_list;
+};
 
 /*
  * Header for free extent list.
@@ -85,7 +85,7 @@ typedef struct xfs_bmap_free_item
  */
 typedef	struct xfs_bmap_free
 {
-	xfs_bmap_free_item_t	*xbf_first;	/* list of to-be-free extents */
+	struct list_head	xbf_flist;	/* list of to-be-free extents */
 	int			xbf_count;	/* count of items on list */
 	int			xbf_low;	/* alloc in low mode */
 } xfs_bmap_free_t;
@@ -141,8 +141,10 @@ static inline int xfs_bmapi_aflag(int w)
 
 static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
 {
-	((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \
-		(flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK);
+	INIT_LIST_HEAD(&flp->xbf_flist);
+	flp->xbf_count = 0;
+	flp->xbf_low = 0;
+	*fbp = NULLFSBLOCK;
 }
 
 /*
@@ -191,8 +193,8 @@ void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
 
 int	xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 void	xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
-void	xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
-		struct xfs_bmap_free *flist, struct xfs_mount *mp);
+void	xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist,
+			  xfs_fsblock_t bno, xfs_filblks_t len);
 void	xfs_bmap_cancel(struct xfs_bmap_free *flist);
 int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
 			struct xfs_inode *ip);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6282f6e708af..db0c71e470c9 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -526,7 +526,7 @@ xfs_bmbt_free_block(
 	struct xfs_trans	*tp = cur->bc_tp;
 	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
 
-	xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
+	xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1);
 	ip->i_d.di_nblocks--;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 1f88e1ce770f..07eeb0b4ca74 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -543,12 +543,12 @@ xfs_btree_ptr_addr(
  */
 STATIC struct xfs_btree_block *
 xfs_btree_get_iroot(
-       struct xfs_btree_cur    *cur)
+	struct xfs_btree_cur	*cur)
 {
-       struct xfs_ifork        *ifp;
+	struct xfs_ifork	*ifp;
 
-       ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
-       return (struct xfs_btree_block *)ifp->if_broot;
+	ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
+	return (struct xfs_btree_block *)ifp->if_broot;
 }
 
 /*
@@ -4152,3 +4152,22 @@ xfs_btree_sblock_verify(
 
 	return true;
 }
+
+/*
+ * Calculate the number of btree levels needed to store a given number of
+ * records in a short-format btree.
+ */
+uint
+xfs_btree_compute_maxlevels(
+	struct xfs_mount	*mp,
+	uint			*limits,
+	unsigned long		len)
+{
+	uint			level;
+	unsigned long		maxblocks;
+
+	maxblocks = (len + limits[0] - 1) / limits[0];
+	for (level = 1; maxblocks > 1; level++)
+		maxblocks = (maxblocks + limits[1] - 1) / limits[1];
+	return level;
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 2e874be70209..785a99682159 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -474,5 +474,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
 
 bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
 bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
+uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
+				 unsigned long len);
 
 #endif	/* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 097bf7717d80..0f1f165f4048 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -356,7 +356,6 @@ xfs_da3_split(
 	struct xfs_da_state_blk	*newblk;
 	struct xfs_da_state_blk	*addblk;
 	struct xfs_da_intnode	*node;
-	struct xfs_buf		*bp;
 	int			max;
 	int			action = 0;
 	int			error;
@@ -397,7 +396,9 @@ xfs_da3_split(
 				break;
 			}
 			/*
-			 * Entry wouldn't fit, split the leaf again.
+			 * Entry wouldn't fit, split the leaf again. The new
+			 * extrablk will be consumed by xfs_da3_node_split if
+			 * the node is split.
 			 */
 			state->extravalid = 1;
 			if (state->inleaf) {
@@ -445,6 +446,14 @@ xfs_da3_split(
 	if (!addblk)
 		return 0;
 
+	/*
+	 * xfs_da3_node_split() should have consumed any extra blocks we added
+	 * during a double leaf split in the attr fork. This is guaranteed as
+	 * we can't be here if the attr fork only has a single leaf block.
+	 */
+	ASSERT(state->extravalid == 0 ||
+	       state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
+
 	/*
 	 * Split the root node.
 	 */
@@ -457,43 +466,33 @@ xfs_da3_split(
 	}
 
 	/*
-	 * Update pointers to the node which used to be block 0 and
-	 * just got bumped because of the addition of a new root node.
-	 * There might be three blocks involved if a double split occurred,
-	 * and the original block 0 could be at any position in the list.
+	 * Update pointers to the node which used to be block 0 and just got
+	 * bumped because of the addition of a new root node.  Note that the
+	 * original block 0 could be at any position in the list of blocks in
+	 * the tree.
 	 *
-	 * Note: the magic numbers and sibling pointers are in the same
-	 * physical place for both v2 and v3 headers (by design). Hence it
-	 * doesn't matter which version of the xfs_da_intnode structure we use
-	 * here as the result will be the same using either structure.
+	 * Note: the magic numbers and sibling pointers are in the same physical
+	 * place for both v2 and v3 headers (by design). Hence it doesn't matter
+	 * which version of the xfs_da_intnode structure we use here as the
+	 * result will be the same using either structure.
 	 */
 	node = oldblk->bp->b_addr;
 	if (node->hdr.info.forw) {
-		if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
-			bp = addblk->bp;
-		} else {
-			ASSERT(state->extravalid);
-			bp = state->extrablk.bp;
-		}
-		node = bp->b_addr;
+		ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno);
+		node = addblk->bp->b_addr;
 		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
-		xfs_trans_log_buf(state->args->trans, bp,
-		    XFS_DA_LOGRANGE(node, &node->hdr.info,
-		    sizeof(node->hdr.info)));
+		xfs_trans_log_buf(state->args->trans, addblk->bp,
+				  XFS_DA_LOGRANGE(node, &node->hdr.info,
+				  sizeof(node->hdr.info)));
 	}
 	node = oldblk->bp->b_addr;
 	if (node->hdr.info.back) {
-		if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
-			bp = addblk->bp;
-		} else {
-			ASSERT(state->extravalid);
-			bp = state->extrablk.bp;
-		}
-		node = bp->b_addr;
+		ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno);
+		node = addblk->bp->b_addr;
 		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
-		xfs_trans_log_buf(state->args->trans, bp,
-		    XFS_DA_LOGRANGE(node, &node->hdr.info,
-		    sizeof(node->hdr.info)));
+		xfs_trans_log_buf(state->args->trans, addblk->bp,
+				  XFS_DA_LOGRANGE(node, &node->hdr.info,
+				  sizeof(node->hdr.info)));
 	}
 	addblk->bp = NULL;
 	return 0;
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index 9d624a622946..f1e8d4dbb600 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -40,8 +40,7 @@ xfs_dir2_sf_entsize(
 	int count = sizeof(struct xfs_dir2_sf_entry);	/* namelen + offset */
 
 	count += len;					/* name */
-	count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) :
-				sizeof(xfs_dir2_ino4_t); /* ino # */
+	count += hdr->i8count ? XFS_INO64_SIZE : XFS_INO32_SIZE; /* ino # */
 	return count;
 }
 
@@ -125,33 +124,33 @@ xfs_dir3_sfe_put_ftype(
 static xfs_ino_t
 xfs_dir2_sf_get_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
-	xfs_dir2_inou_t		*from)
+	__uint8_t		*from)
 {
 	if (hdr->i8count)
-		return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
+		return get_unaligned_be64(from) & 0x00ffffffffffffffULL;
 	else
-		return get_unaligned_be32(&from->i4.i);
+		return get_unaligned_be32(from);
 }
 
 static void
 xfs_dir2_sf_put_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
-	xfs_dir2_inou_t		*to,
+	__uint8_t		*to,
 	xfs_ino_t		ino)
 {
 	ASSERT((ino & 0xff00000000000000ULL) == 0);
 
 	if (hdr->i8count)
-		put_unaligned_be64(ino, &to->i8.i);
+		put_unaligned_be64(ino, to);
 	else
-		put_unaligned_be32(ino, &to->i4.i);
+		put_unaligned_be32(ino, to);
 }
 
 static xfs_ino_t
 xfs_dir2_sf_get_parent_ino(
 	struct xfs_dir2_sf_hdr	*hdr)
 {
-	return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
+	return xfs_dir2_sf_get_ino(hdr, hdr->parent);
 }
 
 static void
@@ -159,7 +158,7 @@ xfs_dir2_sf_put_parent_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
 	xfs_ino_t		ino)
 {
-	xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
+	xfs_dir2_sf_put_ino(hdr, hdr->parent, ino);
 }
 
 /*
@@ -173,8 +172,7 @@ xfs_dir2_sfe_get_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
 	struct xfs_dir2_sf_entry *sfep)
 {
-	return xfs_dir2_sf_get_ino(hdr,
-				(xfs_dir2_inou_t *)&sfep->name[sfep->namelen]);
+	return xfs_dir2_sf_get_ino(hdr, &sfep->name[sfep->namelen]);
 }
 
 static void
@@ -183,8 +181,7 @@ xfs_dir2_sfe_put_ino(
 	struct xfs_dir2_sf_entry *sfep,
 	xfs_ino_t		ino)
 {
-	xfs_dir2_sf_put_ino(hdr,
-			    (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino);
+	xfs_dir2_sf_put_ino(hdr, &sfep->name[sfep->namelen], ino);
 }
 
 static xfs_ino_t
@@ -192,8 +189,7 @@ xfs_dir3_sfe_get_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
 	struct xfs_dir2_sf_entry *sfep)
 {
-	return xfs_dir2_sf_get_ino(hdr,
-			(xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]);
+	return xfs_dir2_sf_get_ino(hdr, &sfep->name[sfep->namelen + 1]);
 }
 
 static void
@@ -202,8 +198,7 @@ xfs_dir3_sfe_put_ino(
 	struct xfs_dir2_sf_entry *sfep,
 	xfs_ino_t		ino)
 {
-	xfs_dir2_sf_put_ino(hdr,
-			(xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino);
+	xfs_dir2_sf_put_ino(hdr, &sfep->name[sfep->namelen + 1], ino);
 }
 
 
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 8d4d8bce41bf..685f23b67056 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -191,12 +191,6 @@ typedef	__uint16_t	xfs_dir2_data_off_t;
 #define	NULLDATAOFF	0xffffU
 typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
 
-/*
- * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
- * Only need 16 bits, this is the byte offset into the single block form.
- */
-typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
-
 /*
  * Offset in data space of a data entry.
  */
@@ -214,22 +208,10 @@ typedef	xfs_off_t	xfs_dir2_off_t;
  */
 typedef	__uint32_t	xfs_dir2_db_t;
 
-/*
- * Inode number stored as 8 8-bit values.
- */
-typedef	struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
-
-/*
- * Inode number stored as 4 8-bit values.
- * Works a lot of the time, when all the inode numbers in a directory
- * fit in 32 bits.
- */
-typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
+#define XFS_INO32_SIZE	4
+#define XFS_INO64_SIZE	8
+#define XFS_INO64_DIFF	(XFS_INO64_SIZE - XFS_INO32_SIZE)
 
-typedef union {
-	xfs_dir2_ino8_t	i8;
-	xfs_dir2_ino4_t	i4;
-} xfs_dir2_inou_t;
 #define	XFS_DIR2_MAX_SHORT_INUM	((xfs_ino_t)0xffffffffULL)
 
 /*
@@ -246,39 +228,38 @@ typedef union {
 typedef struct xfs_dir2_sf_hdr {
 	__uint8_t		count;		/* count of entries */
 	__uint8_t		i8count;	/* count of 8-byte inode #s */
-	xfs_dir2_inou_t		parent;		/* parent dir inode number */
-} __arch_pack xfs_dir2_sf_hdr_t;
+	__uint8_t		parent[8];	/* parent dir inode number */
+} __packed xfs_dir2_sf_hdr_t;
 
 typedef struct xfs_dir2_sf_entry {
 	__u8			namelen;	/* actual name length */
-	xfs_dir2_sf_off_t	offset;		/* saved offset */
+	__u8			offset[2];	/* saved offset */
 	__u8			name[];		/* name, variable size */
 	/*
 	 * A single byte containing the file type field follows the inode
 	 * number for version 3 directory entries.
 	 *
-	 * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
-	 * variable offset after the name.
+	 * A 64-bit or 32-bit inode number follows here, at a variable offset
+	 * after the name.
 	 */
-} __arch_pack xfs_dir2_sf_entry_t;
+} xfs_dir2_sf_entry_t;
 
 static inline int xfs_dir2_sf_hdr_size(int i8count)
 {
 	return sizeof(struct xfs_dir2_sf_hdr) -
-		(i8count == 0) *
-		(sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t));
+		(i8count == 0) * XFS_INO64_DIFF;
 }
 
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
 {
-	return get_unaligned_be16(&sfep->offset.i);
+	return get_unaligned_be16(sfep->offset);
 }
 
 static inline void
 xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
 {
-	put_unaligned_be16(off, &sfep->offset.i);
+	put_unaligned_be16(off, sfep->offset);
 }
 
 static inline struct xfs_dir2_sf_entry *
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index e5bb9cc3b243..c6809ff41197 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -126,13 +126,12 @@ xfs_dir2_block_sfsize(
 		/*
 		 * Calculate the new size, see if we should give up yet.
 		 */
-		size = xfs_dir2_sf_hdr_size(i8count) +		/* header */
-		       count +					/* namelen */
-		       count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
-		       namelen +				/* name */
-		       (i8count ?				/* inumber */
-				(uint)sizeof(xfs_dir2_ino8_t) * count :
-				(uint)sizeof(xfs_dir2_ino4_t) * count);
+		size = xfs_dir2_sf_hdr_size(i8count) +	/* header */
+		       count * 3 * sizeof(u8) +		/* namelen + offset */
+		       namelen +			/* name */
+		       (i8count ?			/* inumber */
+				count * XFS_INO64_SIZE :
+				count * XFS_INO32_SIZE);
 		if (size > XFS_IFORK_DSIZE(dp))
 			return size;		/* size value is a failure */
 	}
@@ -319,10 +318,7 @@ xfs_dir2_sf_addname(
 		/*
 		 * Yes, adjust the inode size.  old count + (parent + new)
 		 */
-		incr_isize +=
-			(sfp->count + 2) *
-			((uint)sizeof(xfs_dir2_ino8_t) -
-			 (uint)sizeof(xfs_dir2_ino4_t));
+		incr_isize += (sfp->count + 2) * XFS_INO64_DIFF;
 		objchange = 1;
 	}
 
@@ -897,11 +893,7 @@ xfs_dir2_sf_replace(
 		int	error;			/* error return value */
 		int	newsize;		/* new inode size */
 
-		newsize =
-			dp->i_df.if_bytes +
-			(sfp->count + 1) *
-			((uint)sizeof(xfs_dir2_ino8_t) -
-			 (uint)sizeof(xfs_dir2_ino4_t));
+		newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
 		/*
 		 * Won't fit as shortform, convert to block then do replace.
 		 */
@@ -1022,10 +1014,7 @@ xfs_dir2_sf_toino4(
 	/*
 	 * Compute the new inode size.
 	 */
-	newsize =
-		oldsize -
-		(oldsfp->count + 1) *
-		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
+	newsize = oldsize - (oldsfp->count + 1) * XFS_INO64_DIFF;
 	xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
 	xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
 	/*
@@ -1048,7 +1037,7 @@ xfs_dir2_sf_toino4(
 	     i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
 		  oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
-		sfep->offset = oldsfep->offset;
+		memcpy(sfep->offset, oldsfep->offset, sizeof(sfep->offset));
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
 		dp->d_ops->sf_put_ino(sfp, sfep,
 				      dp->d_ops->sf_get_ino(oldsfp, oldsfep));
@@ -1098,10 +1087,7 @@ xfs_dir2_sf_toino8(
 	/*
 	 * Compute the new inode size (nb: entry count + 1 for parent)
 	 */
-	newsize =
-		oldsize +
-		(oldsfp->count + 1) *
-		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
+	newsize = oldsize + (oldsfp->count + 1) * XFS_INO64_DIFF;
 	xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
 	xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
 	/*
@@ -1124,7 +1110,7 @@ xfs_dir2_sf_toino8(
 	     i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
 		  oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
-		sfep->offset = oldsfep->offset;
+		memcpy(sfep->offset, oldsfep->offset, sizeof(sfep->offset));
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
 		dp->d_ops->sf_put_ino(sfp, sfep,
 				      dp->d_ops->sf_get_ino(oldsfp, oldsfep));
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index dc97eb21af07..adb204d40f22 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1435,41 +1435,57 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
  * with the crc feature bit, and all accesses to them must be conditional on
  * that flag.
  */
+/* short form block header */
+struct xfs_btree_block_shdr {
+	__be32		bb_leftsib;
+	__be32		bb_rightsib;
+
+	__be64		bb_blkno;
+	__be64		bb_lsn;
+	uuid_t		bb_uuid;
+	__be32		bb_owner;
+	__le32		bb_crc;
+};
+
+/* long form block header */
+struct xfs_btree_block_lhdr {
+	__be64		bb_leftsib;
+	__be64		bb_rightsib;
+
+	__be64		bb_blkno;
+	__be64		bb_lsn;
+	uuid_t		bb_uuid;
+	__be64		bb_owner;
+	__le32		bb_crc;
+	__be32		bb_pad; /* padding for alignment */
+};
+
 struct xfs_btree_block {
 	__be32		bb_magic;	/* magic number for block type */
 	__be16		bb_level;	/* 0 is a leaf */
 	__be16		bb_numrecs;	/* current # of data records */
 	union {
-		struct {
-			__be32		bb_leftsib;
-			__be32		bb_rightsib;
-
-			__be64		bb_blkno;
-			__be64		bb_lsn;
-			uuid_t		bb_uuid;
-			__be32		bb_owner;
-			__le32		bb_crc;
-		} s;			/* short form pointers */
-		struct	{
-			__be64		bb_leftsib;
-			__be64		bb_rightsib;
-
-			__be64		bb_blkno;
-			__be64		bb_lsn;
-			uuid_t		bb_uuid;
-			__be64		bb_owner;
-			__le32		bb_crc;
-			__be32		bb_pad; /* padding for alignment */
-		} l;			/* long form pointers */
+		struct xfs_btree_block_shdr s;
+		struct xfs_btree_block_lhdr l;
 	} bb_u;				/* rest */
 };
 
-#define XFS_BTREE_SBLOCK_LEN	16	/* size of a short form block */
-#define XFS_BTREE_LBLOCK_LEN	24	/* size of a long form block */
+/* size of a short form block */
+#define XFS_BTREE_SBLOCK_LEN \
+	(offsetof(struct xfs_btree_block, bb_u) + \
+	 offsetof(struct xfs_btree_block_shdr, bb_blkno))
+/* size of a long form block */
+#define XFS_BTREE_LBLOCK_LEN \
+	(offsetof(struct xfs_btree_block, bb_u) + \
+	 offsetof(struct xfs_btree_block_lhdr, bb_blkno))
 
 /* sizes of CRC enabled btree blocks */
-#define XFS_BTREE_SBLOCK_CRC_LEN	(XFS_BTREE_SBLOCK_LEN + 40)
-#define XFS_BTREE_LBLOCK_CRC_LEN	(XFS_BTREE_LBLOCK_LEN + 48)
+#define XFS_BTREE_SBLOCK_CRC_LEN \
+	(offsetof(struct xfs_btree_block, bb_u) + \
+	 sizeof(struct xfs_btree_block_shdr))
+#define XFS_BTREE_LBLOCK_CRC_LEN \
+	(offsetof(struct xfs_btree_block, bb_u) + \
+	 sizeof(struct xfs_btree_block_lhdr))
 
 #define XFS_BTREE_SBLOCK_CRC_OFF \
 	offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index fffe3d01bd9f..f5ec9c5ccae6 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -521,12 +521,8 @@ typedef struct xfs_swapext
 #define XFS_IOC_ERROR_CLEARALL	     _IOW ('X', 117, struct xfs_error_injection)
 /*	XFS_IOC_ATTRCTL_BY_HANDLE -- deprecated 118	 */
 
-/*	XFS_IOC_FREEZE		  -- FIFREEZE   119	 */
-/*	XFS_IOC_THAW		  -- FITHAW     120	 */
-#ifndef FIFREEZE
-#define XFS_IOC_FREEZE		     _IOWR('X', 119, int)
-#define XFS_IOC_THAW		     _IOWR('X', 120, int)
-#endif
+#define XFS_IOC_FREEZE		     _IOWR('X', 119, int)	/* aka FIFREEZE */
+#define XFS_IOC_THAW		     _IOWR('X', 120, int)	/* aka FITHAW */
 
 #define XFS_IOC_FSSETDM_BY_HANDLE    _IOW ('X', 121, struct xfs_fsop_setdm_handlereq)
 #define XFS_IOC_ATTRLIST_BY_HANDLE   _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 22297f9b0fd5..4b1e408169a8 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1828,9 +1828,8 @@ xfs_difree_inode_chunk(
 
 	if (!xfs_inobt_issparse(rec->ir_holemask)) {
 		/* not sparse, calculate extent info directly */
-		xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
-				  XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
-				  mp->m_ialloc_blks, flist, mp);
+		xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, sagbno),
+				  mp->m_ialloc_blks);
 		return;
 	}
 
@@ -1873,8 +1872,8 @@ xfs_difree_inode_chunk(
 
 		ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
 		ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
-		xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
-				  flist, mp);
+		xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno),
+				  contigblk);
 
 		/* reset range to current bit and carry on... */
 		startidx = endidx = nextbit;
@@ -2395,20 +2394,11 @@ void
 xfs_ialloc_compute_maxlevels(
 	xfs_mount_t	*mp)		/* file system mount structure */
 {
-	int		level;
-	uint		maxblocks;
-	uint		maxleafents;
-	int		minleafrecs;
-	int		minnoderecs;
-
-	maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
-		XFS_INODES_PER_CHUNK_LOG;
-	minleafrecs = mp->m_inobt_mnr[0];
-	minnoderecs = mp->m_inobt_mnr[1];
-	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
-	for (level = 1; maxblocks > 1; level++)
-		maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
-	mp->m_in_maxlevels = level;
+	uint		inodes;
+
+	inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
+	mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_inobt_mnr,
+							 inodes);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 951c044e24e4..e2e1106c9fad 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -70,7 +70,7 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
  * Get a buffer for the bitmap or summary file block specified.
  * The buffer is returned read and locked.
  */
-int
+static int
 xfs_rtbuf_get(
 	xfs_mount_t	*mp,		/* file system mount structure */
 	xfs_trans_t	*tp,		/* transaction pointer */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4c463b99fe57..7575cfc3ad15 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -87,6 +87,12 @@ xfs_find_bdev_for_inode(
  * We're now finished for good with this page.  Update the page state via the
  * associated buffer_heads, paying attention to the start and end offsets that
  * we need to process on the page.
+ *
+ * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
+ * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
+ * the page at all, as we may be racing with memory reclaim and it can free both
+ * the bufferhead chain and the page as it will see the page as clean and
+ * unused.
  */
 static void
 xfs_finish_page_writeback(
@@ -95,8 +101,9 @@ xfs_finish_page_writeback(
 	int			error)
 {
 	unsigned int		end = bvec->bv_offset + bvec->bv_len - 1;
-	struct buffer_head	*head, *bh;
+	struct buffer_head	*head, *bh, *next;
 	unsigned int		off = 0;
+	unsigned int		bsize;
 
 	ASSERT(bvec->bv_offset < PAGE_SIZE);
 	ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
@@ -105,15 +112,17 @@ xfs_finish_page_writeback(
 
 	bh = head = page_buffers(bvec->bv_page);
 
+	bsize = bh->b_size;
 	do {
+		next = bh->b_this_page;
 		if (off < bvec->bv_offset)
 			goto next_bh;
 		if (off > end)
 			break;
 		bh->b_end_io(bh, !error);
 next_bh:
-		off += bh->b_size;
-	} while ((bh = bh->b_this_page) != head);
+		off += bsize;
+	} while ((bh = next) != head);
 }
 
 /*
@@ -438,7 +447,8 @@ xfs_submit_ioend(
 
 	ioend->io_bio->bi_private = ioend;
 	ioend->io_bio->bi_end_io = xfs_end_bio;
-
+	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
+			 (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
 	/*
 	 * If we are failing the IO now, just mark the ioend with an
 	 * error and finish it. This will run IO completion immediately
@@ -451,8 +461,7 @@ xfs_submit_ioend(
 		return status;
 	}
 
-	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
-		   ioend->io_bio);
+	submit_bio(ioend->io_bio);
 	return 0;
 }
 
@@ -510,8 +519,9 @@ xfs_chain_bio(
 
 	bio_chain(ioend->io_bio, new);
 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
-	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
-		   ioend->io_bio);
+	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
+			  (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+	submit_bio(ioend->io_bio);
 	ioend->io_bio = new;
 }
 
@@ -1040,6 +1050,20 @@ xfs_vm_releasepage(
 
 	trace_xfs_releasepage(page->mapping->host, page, 0, 0);
 
+	/*
+	 * mm accommodates an old ext3 case where clean pages might not have had
+	 * the dirty bit cleared. Thus, it can send actual dirty pages to
+	 * ->releasepage() via shrink_active_list(). Conversely,
+	 * block_invalidatepage() can send pages that are still marked dirty
+	 * but otherwise have invalidated buffers.
+	 *
+	 * We've historically freed buffers on the latter. Instead, quietly
+	 * filter out all dirty pages to avoid spurious buffer state warnings.
+	 * This can likely be removed once shrink_active_list() is fixed.
+	 */
+	if (PageDirty(page))
+		return 0;
+
 	xfs_count_page_state(page, &delalloc, &unwritten);
 
 	if (WARN_ON_ONCE(delalloc))
@@ -1143,6 +1167,8 @@ __xfs_get_blocks(
 	ssize_t			size;
 	int			new = 0;
 
+	BUG_ON(create && !direct);
+
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
@@ -1150,22 +1176,14 @@ __xfs_get_blocks(
 	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
 	size = bh_result->b_size;
 
-	if (!create && direct && offset >= i_size_read(inode))
+	if (!create && offset >= i_size_read(inode))
 		return 0;
 
 	/*
 	 * Direct I/O is usually done on preallocated files, so try getting
-	 * a block mapping without an exclusive lock first.  For buffered
-	 * writes we already have the exclusive iolock anyway, so avoiding
-	 * a lock roundtrip here by taking the ilock exclusive from the
-	 * beginning is a useful micro optimization.
+	 * a block mapping without an exclusive lock first.
 	 */
-	if (create && !direct) {
-		lockmode = XFS_ILOCK_EXCL;
-		xfs_ilock(ip, lockmode);
-	} else {
-		lockmode = xfs_ilock_data_map_shared(ip);
-	}
+	lockmode = xfs_ilock_data_map_shared(ip);
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if (offset + size > mp->m_super->s_maxbytes)
@@ -1184,37 +1202,19 @@ __xfs_get_blocks(
 	     (imap.br_startblock == HOLESTARTBLOCK ||
 	      imap.br_startblock == DELAYSTARTBLOCK) ||
 	     (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
-		if (direct || xfs_get_extsz_hint(ip)) {
-			/*
-			 * xfs_iomap_write_direct() expects the shared lock. It
-			 * is unlocked on return.
-			 */
-			if (lockmode == XFS_ILOCK_EXCL)
-				xfs_ilock_demote(ip, lockmode);
-
-			error = xfs_iomap_write_direct(ip, offset, size,
-						       &imap, nimaps);
-			if (error)
-				return error;
-			new = 1;
+		/*
+		 * xfs_iomap_write_direct() expects the shared lock. It
+		 * is unlocked on return.
+		 */
+		if (lockmode == XFS_ILOCK_EXCL)
+			xfs_ilock_demote(ip, lockmode);
 
-		} else {
-			/*
-			 * Delalloc reservations do not require a transaction,
-			 * we can go on without dropping the lock here. If we
-			 * are allocating a new delalloc block, make sure that
-			 * we set the new flag so that we mark the buffer new so
-			 * that we know that it is newly allocated if the write
-			 * fails.
-			 */
-			if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
-				new = 1;
-			error = xfs_iomap_write_delay(ip, offset, size, &imap);
-			if (error)
-				goto out_unlock;
+		error = xfs_iomap_write_direct(ip, offset, size,
+					       &imap, nimaps);
+		if (error)
+			return error;
+		new = 1;
 
-			xfs_iunlock(ip, lockmode);
-		}
 		trace_xfs_get_blocks_alloc(ip, offset, size,
 				ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
 						   : XFS_IO_DELALLOC, &imap);
@@ -1235,9 +1235,7 @@ __xfs_get_blocks(
 	}
 
 	/* trim mapping down to size requested */
-	if (direct || size > (1 << inode->i_blkbits))
-		xfs_map_trim_size(inode, iblock, bh_result,
-				  &imap, offset, size);
+	xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
 
 	/*
 	 * For unwritten extents do not report a disk address in the buffered
@@ -1250,7 +1248,7 @@ __xfs_get_blocks(
 		if (ISUNWRITTEN(&imap))
 			set_buffer_unwritten(bh_result);
 		/* direct IO needs special help */
-		if (create && direct) {
+		if (create) {
 			if (dax_fault)
 				ASSERT(!ISUNWRITTEN(&imap));
 			else
@@ -1279,14 +1277,7 @@ __xfs_get_blocks(
 	     (new || ISUNWRITTEN(&imap))))
 		set_buffer_new(bh_result);
 
-	if (imap.br_startblock == DELAYSTARTBLOCK) {
-		BUG_ON(direct);
-		if (create) {
-			set_buffer_uptodate(bh_result);
-			set_buffer_mapped(bh_result);
-			set_buffer_delay(bh_result);
-		}
-	}
+	BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
 
 	return 0;
 
@@ -1336,7 +1327,7 @@ xfs_get_blocks_dax_fault(
  * whereas if we have flags set we will always be called in task context
  * (i.e. from a workqueue).
  */
-STATIC int
+int
 xfs_end_io_direct_write(
 	struct kiocb		*iocb,
 	loff_t			offset,
@@ -1407,234 +1398,10 @@ xfs_vm_direct_IO(
 	struct kiocb		*iocb,
 	struct iov_iter		*iter)
 {
-	struct inode		*inode = iocb->ki_filp->f_mapping->host;
-	dio_iodone_t		*endio = NULL;
-	int			flags = 0;
-	struct block_device	*bdev;
-
-	if (iov_iter_rw(iter) == WRITE) {
-		endio = xfs_end_io_direct_write;
-		flags = DIO_ASYNC_EXTEND;
-	}
-
-	if (IS_DAX(inode)) {
-		return dax_do_io(iocb, inode, iter,
-				 xfs_get_blocks_direct, endio, 0);
-	}
-
-	bdev = xfs_find_bdev_for_inode(inode);
-	return  __blockdev_direct_IO(iocb, inode, bdev, iter,
-			xfs_get_blocks_direct, endio, NULL, flags);
-}
-
-/*
- * Punch out the delalloc blocks we have already allocated.
- *
- * Don't bother with xfs_setattr given that nothing can have made it to disk yet
- * as the page is still locked at this point.
- */
-STATIC void
-xfs_vm_kill_delalloc_range(
-	struct inode		*inode,
-	loff_t			start,
-	loff_t			end)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	xfs_fileoff_t		start_fsb;
-	xfs_fileoff_t		end_fsb;
-	int			error;
-
-	start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
-	end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
-	if (end_fsb <= start_fsb)
-		return;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-						end_fsb - start_fsb);
-	if (error) {
-		/* something screwed, just bail */
-		if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-			xfs_alert(ip->i_mount,
-		"xfs_vm_write_failed: unable to clean up ino %lld",
-					ip->i_ino);
-		}
-	}
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-}
-
-STATIC void
-xfs_vm_write_failed(
-	struct inode		*inode,
-	struct page		*page,
-	loff_t			pos,
-	unsigned		len)
-{
-	loff_t			block_offset;
-	loff_t			block_start;
-	loff_t			block_end;
-	loff_t			from = pos & (PAGE_SIZE - 1);
-	loff_t			to = from + len;
-	struct buffer_head	*bh, *head;
-	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
-
 	/*
-	 * The request pos offset might be 32 or 64 bit, this is all fine
-	 * on 64-bit platform.  However, for 64-bit pos request on 32-bit
-	 * platform, the high 32-bit will be masked off if we evaluate the
-	 * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
-	 * 0xfffff000 as an unsigned long, hence the result is incorrect
-	 * which could cause the following ASSERT failed in most cases.
-	 * In order to avoid this, we can evaluate the block_offset of the
-	 * start of the page by using shifts rather than masks the mismatch
-	 * problem.
+	 * We just need the method present so that open/fcntl allow direct I/O.
 	 */
-	block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
-
-	ASSERT(block_offset + from == pos);
-
-	head = page_buffers(page);
-	block_start = 0;
-	for (bh = head; bh != head || !block_start;
-	     bh = bh->b_this_page, block_start = block_end,
-				   block_offset += bh->b_size) {
-		block_end = block_start + bh->b_size;
-
-		/* skip buffers before the write */
-		if (block_end <= from)
-			continue;
-
-		/* if the buffer is after the write, we're done */
-		if (block_start >= to)
-			break;
-
-		/*
-		 * Process delalloc and unwritten buffers beyond EOF. We can
-		 * encounter unwritten buffers in the event that a file has
-		 * post-EOF unwritten extents and an extending write happens to
-		 * fail (e.g., an unaligned write that also involves a delalloc
-		 * to the same page).
-		 */
-		if (!buffer_delay(bh) && !buffer_unwritten(bh))
-			continue;
-
-		if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
-		    block_offset < i_size_read(inode))
-			continue;
-
-		if (buffer_delay(bh))
-			xfs_vm_kill_delalloc_range(inode, block_offset,
-						   block_offset + bh->b_size);
-
-		/*
-		 * This buffer does not contain data anymore. make sure anyone
-		 * who finds it knows that for certain.
-		 */
-		clear_buffer_delay(bh);
-		clear_buffer_uptodate(bh);
-		clear_buffer_mapped(bh);
-		clear_buffer_new(bh);
-		clear_buffer_dirty(bh);
-		clear_buffer_unwritten(bh);
-	}
-
-}
-
-/*
- * This used to call block_write_begin(), but it unlocks and releases the page
- * on error, and we need that page to be able to punch stale delalloc blocks out
- * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
- * the appropriate point.
- */
-STATIC int
-xfs_vm_write_begin(
-	struct file		*file,
-	struct address_space	*mapping,
-	loff_t			pos,
-	unsigned		len,
-	unsigned		flags,
-	struct page		**pagep,
-	void			**fsdata)
-{
-	pgoff_t			index = pos >> PAGE_SHIFT;
-	struct page		*page;
-	int			status;
-	struct xfs_mount	*mp = XFS_I(mapping->host)->i_mount;
-
-	ASSERT(len <= PAGE_SIZE);
-
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page)
-		return -ENOMEM;
-
-	status = __block_write_begin(page, pos, len, xfs_get_blocks);
-	if (xfs_mp_fail_writes(mp))
-		status = -EIO;
-	if (unlikely(status)) {
-		struct inode	*inode = mapping->host;
-		size_t		isize = i_size_read(inode);
-
-		xfs_vm_write_failed(inode, page, pos, len);
-		unlock_page(page);
-
-		/*
-		 * If the write is beyond EOF, we only want to kill blocks
-		 * allocated in this write, not blocks that were previously
-		 * written successfully.
-		 */
-		if (xfs_mp_fail_writes(mp))
-			isize = 0;
-		if (pos + len > isize) {
-			ssize_t start = max_t(ssize_t, pos, isize);
-
-			truncate_pagecache_range(inode, start, pos + len);
-		}
-
-		put_page(page);
-		page = NULL;
-	}
-
-	*pagep = page;
-	return status;
-}
-
-/*
- * On failure, we only need to kill delalloc blocks beyond EOF in the range of
- * this specific write because they will never be written. Previous writes
- * beyond EOF where block allocation succeeded do not need to be trashed, so
- * only new blocks from this write should be trashed. For blocks within
- * EOF, generic_write_end() zeros them so they are safe to leave alone and be
- * written with all the other valid data.
- */
-STATIC int
-xfs_vm_write_end(
-	struct file		*file,
-	struct address_space	*mapping,
-	loff_t			pos,
-	unsigned		len,
-	unsigned		copied,
-	struct page		*page,
-	void			*fsdata)
-{
-	int			ret;
-
-	ASSERT(len <= PAGE_SIZE);
-
-	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-	if (unlikely(ret < len)) {
-		struct inode	*inode = mapping->host;
-		size_t		isize = i_size_read(inode);
-		loff_t		to = pos + len;
-
-		if (to > isize) {
-			/* only kill blocks in this write beyond EOF */
-			if (pos > isize)
-				isize = pos;
-			xfs_vm_kill_delalloc_range(inode, isize, to);
-			truncate_pagecache_range(inode, isize, to);
-		}
-	}
-	return ret;
+	return -EINVAL;
 }
 
 STATIC sector_t
@@ -1747,8 +1514,6 @@ const struct address_space_operations xfs_address_space_operations = {
 	.set_page_dirty		= xfs_vm_set_page_dirty,
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
-	.write_begin		= xfs_vm_write_begin,
-	.write_end		= xfs_vm_write_end,
 	.bmap			= xfs_vm_bmap,
 	.direct_IO		= xfs_vm_direct_IO,
 	.migratepage		= buffer_migrate_page,
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 814aab790713..bf2d9a141a73 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -60,6 +60,9 @@ int	xfs_get_blocks_direct(struct inode *inode, sector_t offset,
 int	xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
 			         struct buffer_head *map_bh, int create);
 
+int	xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
+		ssize_t size, void *private);
+
 extern void xfs_count_page_state(struct page *, int *, int *);
 extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
 
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 55d214981ed2..be0b79d8900f 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -322,7 +322,7 @@ xfs_attr3_node_inactive(
  * Recurse (gasp!) through the attribute nodes until we find leaves.
  * We're doing a depth-first traversal in order to invalidate everything.
  */
-int
+static int
 xfs_attr3_root_inactive(
 	struct xfs_trans	**trans,
 	struct xfs_inode	*dp)
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index d25f26b22ac9..25e76cd6c053 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -65,7 +65,7 @@ xfs_attr_shortform_compare(const void *a, const void *b)
  * we have to calculate each entries' hashvalue and sort them before
  * we can begin returning them to the user.
  */
-int
+static int
 xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 {
 	attrlist_cursor_kern_t *cursor;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 586bb64e674b..cd4a850564f2 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -79,6 +79,23 @@ xfs_zero_extent(
 		GFP_NOFS, true);
 }
 
+/* Sort bmap items by AG. */
+static int
+xfs_bmap_free_list_cmp(
+	void			*priv,
+	struct list_head	*a,
+	struct list_head	*b)
+{
+	struct xfs_mount	*mp = priv;
+	struct xfs_bmap_free_item	*ra;
+	struct xfs_bmap_free_item	*rb;
+
+	ra = container_of(a, struct xfs_bmap_free_item, xbfi_list);
+	rb = container_of(b, struct xfs_bmap_free_item, xbfi_list);
+	return  XFS_FSB_TO_AGNO(mp, ra->xbfi_startblock) -
+		XFS_FSB_TO_AGNO(mp, rb->xbfi_startblock);
+}
+
 /*
  * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
  * caller.  Frees all the extents that need freeing, which must be done
@@ -99,14 +116,15 @@ xfs_bmap_finish(
 	int				error;	/* error return value */
 	int				committed;/* xact committed or not */
 	struct xfs_bmap_free_item	*free;	/* free extent item */
-	struct xfs_bmap_free_item	*next;	/* next item on free list */
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
 	if (flist->xbf_count == 0)
 		return 0;
 
+	list_sort((*tp)->t_mountp, &flist->xbf_flist, xfs_bmap_free_list_cmp);
+
 	efi = xfs_trans_get_efi(*tp, flist->xbf_count);
-	for (free = flist->xbf_first; free; free = free->xbfi_next)
+	list_for_each_entry(free, &flist->xbf_flist, xbfi_list)
 		xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
 			free->xbfi_blockcount);
 
@@ -125,9 +143,7 @@ xfs_bmap_finish(
 		if (committed) {
 			xfs_efi_release(efi);
 			xfs_force_shutdown((*tp)->t_mountp,
-				(error == -EFSCORRUPTED) ?
-					SHUTDOWN_CORRUPT_INCORE :
-					SHUTDOWN_META_IO_ERROR);
+					   SHUTDOWN_META_IO_ERROR);
 		}
 		return error;
 	}
@@ -138,15 +154,15 @@ xfs_bmap_finish(
 	 * on error.
 	 */
 	efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
-	for (free = flist->xbf_first; free != NULL; free = next) {
-		next = free->xbfi_next;
-
+	while (!list_empty(&flist->xbf_flist)) {
+		free = list_first_entry(&flist->xbf_flist,
+				struct xfs_bmap_free_item, xbfi_list);
 		error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock,
 					      free->xbfi_blockcount);
 		if (error)
 			return error;
 
-		xfs_bmap_del_free(flist, NULL, free);
+		xfs_bmap_del_free(flist, free);
 	}
 
 	return 0;
@@ -409,7 +425,7 @@ xfs_bmap_count_tree(
 /*
  * Count fsblocks of the given fork.
  */
-int						/* error */
+static int					/* error */
 xfs_bmap_count_blocks(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_inode_t		*ip,		/* incore inode */
@@ -799,7 +815,7 @@ xfs_bmap_punch_delalloc_range(
 		if (error)
 			break;
 
-		ASSERT(!flist.xbf_count && !flist.xbf_first);
+		ASSERT(!flist.xbf_count && list_empty(&flist.xbf_flist));
 next_block:
 		start_fsb++;
 		remaining--;
@@ -1089,99 +1105,120 @@ error1:	/* Just cancel transaction */
 	return error;
 }
 
-/*
- * Zero file bytes between startoff and endoff inclusive.
- * The iolock is held exclusive and no blocks are buffered.
- *
- * This function is used by xfs_free_file_space() to zero
- * partial blocks when the range to free is not block aligned.
- * When unreserving space with boundaries that are not block
- * aligned we round up the start and round down the end
- * boundaries and then use this function to zero the parts of
- * the blocks that got dropped during the rounding.
- */
-STATIC int
-xfs_zero_remaining_bytes(
-	xfs_inode_t		*ip,
-	xfs_off_t		startoff,
-	xfs_off_t		endoff)
+static int
+xfs_unmap_extent(
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		startoffset_fsb,
+	xfs_filblks_t		len_fsb,
+	int			*done)
 {
-	xfs_bmbt_irec_t		imap;
-	xfs_fileoff_t		offset_fsb;
-	xfs_off_t		lastoffset;
-	xfs_off_t		offset;
-	xfs_buf_t		*bp;
-	xfs_mount_t		*mp = ip->i_mount;
-	int			nimap;
-	int			error = 0;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	struct xfs_bmap_free	free_list;
+	xfs_fsblock_t		firstfsb;
+	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+	int			error;
 
-	/*
-	 * Avoid doing I/O beyond eof - it's not necessary
-	 * since nothing can read beyond eof.  The space will
-	 * be zeroed when the file is extended anyway.
-	 */
-	if (startoff >= XFS_ISIZE(ip))
-		return 0;
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+	if (error) {
+		ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+		return error;
+	}
 
-	if (endoff > XFS_ISIZE(ip))
-		endoff = XFS_ISIZE(ip);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot,
+			ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS);
+	if (error)
+		goto out_trans_cancel;
 
-	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
-		uint lock_mode;
+	xfs_trans_ijoin(tp, ip, 0);
 
-		offset_fsb = XFS_B_TO_FSBT(mp, offset);
-		nimap = 1;
+	xfs_bmap_init(&free_list, &firstfsb);
+	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
+			&free_list, done);
+	if (error)
+		goto out_bmap_cancel;
 
-		lock_mode = xfs_ilock_data_map_shared(ip);
-		error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
-		xfs_iunlock(ip, lock_mode);
+	error = xfs_bmap_finish(&tp, &free_list, NULL);
+	if (error)
+		goto out_bmap_cancel;
 
-		if (error || nimap < 1)
-			break;
-		ASSERT(imap.br_blockcount >= 1);
-		ASSERT(imap.br_startoff == offset_fsb);
-		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+	error = xfs_trans_commit(tp);
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return error;
 
-		if (imap.br_startblock == HOLESTARTBLOCK ||
-		    imap.br_state == XFS_EXT_UNWRITTEN) {
-			/* skip the entire extent */
-			lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
-						      imap.br_blockcount) - 1;
-			continue;
-		}
+out_bmap_cancel:
+	xfs_bmap_cancel(&free_list);
+out_trans_cancel:
+	xfs_trans_cancel(tp);
+	goto out_unlock;
+}
 
-		lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
-		if (lastoffset > endoff)
-			lastoffset = endoff;
+static int
+xfs_adjust_extent_unmap_boundaries(
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		*startoffset_fsb,
+	xfs_fileoff_t		*endoffset_fsb)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_bmbt_irec	imap;
+	int			nimap, error;
+	xfs_extlen_t		mod = 0;
 
-		/* DAX can just zero the backing device directly */
-		if (IS_DAX(VFS_I(ip))) {
-			error = dax_zero_page_range(VFS_I(ip), offset,
-						    lastoffset - offset + 1,
-						    xfs_get_blocks_direct);
-			if (error)
-				return error;
-			continue;
-		}
+	nimap = 1;
+	error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
+	if (error)
+		return error;
 
-		error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
-				mp->m_rtdev_targp : mp->m_ddev_targp,
-				xfs_fsb_to_db(ip, imap.br_startblock),
-				BTOBB(mp->m_sb.sb_blocksize),
-				0, &bp, NULL);
-		if (error)
-			return error;
+	if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
+		xfs_daddr_t	block;
 
-		memset(bp->b_addr +
-				(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
-		       0, lastoffset - offset + 1);
+		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+		block = imap.br_startblock;
+		mod = do_div(block, mp->m_sb.sb_rextsize);
+		if (mod)
+			*startoffset_fsb += mp->m_sb.sb_rextsize - mod;
+	}
 
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-		if (error)
-			return error;
+	nimap = 1;
+	error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
+	if (error)
+		return error;
+
+	if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
+		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+		mod++;
+		if (mod && mod != mp->m_sb.sb_rextsize)
+			*endoffset_fsb -= mod;
 	}
-	return error;
+
+	return 0;
+}
+
+static int
+xfs_flush_unmap_range(
+	struct xfs_inode	*ip,
+	xfs_off_t		offset,
+	xfs_off_t		len)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct inode		*inode = VFS_I(ip);
+	xfs_off_t		rounding, start, end;
+	int			error;
+
+	/* wait for the completion of any pending DIOs */
+	inode_dio_wait(inode);
+
+	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
+	start = round_down(offset, rounding);
+	end = round_up(offset + len, rounding) - 1;
+
+	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (error)
+		return error;
+	truncate_pagecache_range(inode, start, end);
+	return 0;
 }
 
 int
@@ -1190,24 +1227,10 @@ xfs_free_file_space(
 	xfs_off_t		offset,
 	xfs_off_t		len)
 {
-	int			done;
-	xfs_fileoff_t		endoffset_fsb;
-	int			error;
-	xfs_fsblock_t		firstfsb;
-	xfs_bmap_free_t		free_list;
-	xfs_bmbt_irec_t		imap;
-	xfs_off_t		ioffset;
-	xfs_off_t		iendoffset;
-	xfs_extlen_t		mod=0;
-	xfs_mount_t		*mp;
-	int			nimap;
-	uint			resblks;
-	xfs_off_t		rounding;
-	int			rt;
+	struct xfs_mount	*mp = ip->i_mount;
 	xfs_fileoff_t		startoffset_fsb;
-	xfs_trans_t		*tp;
-
-	mp = ip->i_mount;
+	xfs_fileoff_t		endoffset_fsb;
+	int			done = 0, error;
 
 	trace_xfs_free_file_space(ip);
 
@@ -1215,135 +1238,45 @@ xfs_free_file_space(
 	if (error)
 		return error;
 
-	error = 0;
 	if (len <= 0)	/* if nothing being freed */
-		return error;
-	rt = XFS_IS_REALTIME_INODE(ip);
-	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);
-	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
-
-	/* wait for the completion of any pending DIOs */
-	inode_dio_wait(VFS_I(ip));
+		return 0;
 
-	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
-	ioffset = round_down(offset, rounding);
-	iendoffset = round_up(offset + len, rounding) - 1;
-	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
-					     iendoffset);
+	error = xfs_flush_unmap_range(ip, offset, len);
 	if (error)
-		goto out;
-	truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset);
+		return error;
+
+	startoffset_fsb = XFS_B_TO_FSB(mp, offset);
+	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
 
 	/*
-	 * Need to zero the stuff we're not freeing, on disk.
-	 * If it's a realtime file & can't use unwritten extents then we
-	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
-	 * will take care of it for us.
+	 * Need to zero the stuff we're not freeing, on disk.  If it's a RT file
+	 * and we can't use unwritten extents then we actually need to ensure
+	 * to zero the whole extent, otherwise we just need to take of block
+	 * boundaries, and xfs_bunmapi will handle the rest.
 	 */
-	if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
-		nimap = 1;
-		error = xfs_bmapi_read(ip, startoffset_fsb, 1,
-					&imap, &nimap, 0);
-		if (error)
-			goto out;
-		ASSERT(nimap == 0 || nimap == 1);
-		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-			xfs_daddr_t	block;
-
-			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-			block = imap.br_startblock;
-			mod = do_div(block, mp->m_sb.sb_rextsize);
-			if (mod)
-				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
-		}
-		nimap = 1;
-		error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
-					&imap, &nimap, 0);
+	if (XFS_IS_REALTIME_INODE(ip) &&
+	    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+		error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
+				&endoffset_fsb);
 		if (error)
-			goto out;
-		ASSERT(nimap == 0 || nimap == 1);
-		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-			mod++;
-			if (mod && (mod != mp->m_sb.sb_rextsize))
-				endoffset_fsb -= mod;
-		}
-	}
-	if ((done = (endoffset_fsb <= startoffset_fsb)))
-		/*
-		 * One contiguous piece to clear
-		 */
-		error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
-	else {
-		/*
-		 * Some full blocks, possibly two pieces to clear
-		 */
-		if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
-			error = xfs_zero_remaining_bytes(ip, offset,
-				XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
-		if (!error &&
-		    XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
-			error = xfs_zero_remaining_bytes(ip,
-				XFS_FSB_TO_B(mp, endoffset_fsb),
-				offset + len - 1);
+			return error;
 	}
 
-	/*
-	 * free file space until done or until there is an error
-	 */
-	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-	while (!error && !done) {
-
-		/*
-		 * allocate and setup the transaction. Allow this
-		 * transaction to dip into the reserve blocks to ensure
-		 * the freeing of the space succeeds at ENOSPC.
-		 */
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
-				&tp);
-		if (error) {
-			ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-			break;
+	if (endoffset_fsb > startoffset_fsb) {
+		while (!done) {
+			error = xfs_unmap_extent(ip, startoffset_fsb,
+					endoffset_fsb - startoffset_fsb, &done);
+			if (error)
+				return error;
 		}
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_trans_reserve_quota(tp, mp,
-				ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
-				resblks, 0, XFS_QMOPT_RES_REGBLKS);
-		if (error)
-			goto error1;
-
-		xfs_trans_ijoin(tp, ip, 0);
-
-		/*
-		 * issue the bunmapi() call to free the blocks
-		 */
-		xfs_bmap_init(&free_list, &firstfsb);
-		error = xfs_bunmapi(tp, ip, startoffset_fsb,
-				  endoffset_fsb - startoffset_fsb,
-				  0, 2, &firstfsb, &free_list, &done);
-		if (error)
-			goto error0;
-
-		/*
-		 * complete the transaction
-		 */
-		error = xfs_bmap_finish(&tp, &free_list, NULL);
-		if (error)
-			goto error0;
-
-		error = xfs_trans_commit(tp);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	}
 
- out:
-	return error;
-
- error0:
-	xfs_bmap_cancel(&free_list);
- error1:
-	xfs_trans_cancel(tp);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	goto out;
+	/*
+	 * Now that we've unmap all full blocks we'll have to zero out any
+	 * partial block at the beginning and/or end.  xfs_zero_range is
+	 * smart enough to skip any holes, including those we just created.
+	 */
+	return xfs_zero_range(ip, offset, len, NULL);
 }
 
 /*
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index af97d9a1dfb4..f20071432ca6 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -31,8 +31,6 @@ struct xfs_bmalloca;
 int	xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
 int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
 		     int whichfork, int *eof);
-int	xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
-			      int whichfork, int *count);
 int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
 		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
 
@@ -43,7 +41,6 @@ int	xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
 
 /* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
 void	xfs_bmap_del_free(struct xfs_bmap_free *flist,
-			  struct xfs_bmap_free_item *prev,
 			  struct xfs_bmap_free_item *free);
 int	xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
 			       struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e71cfbd5acb3..47a318ce82e0 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -79,6 +79,47 @@ xfs_buf_vmap_len(
 	return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
 }
 
+/*
+ * Bump the I/O in flight count on the buftarg if we haven't yet done so for
+ * this buffer. The count is incremented once per buffer (per hold cycle)
+ * because the corresponding decrement is deferred to buffer release. Buffers
+ * can undergo I/O multiple times in a hold-release cycle and per buffer I/O
+ * tracking adds unnecessary overhead. This is used for sychronization purposes
+ * with unmount (see xfs_wait_buftarg()), so all we really need is a count of
+ * in-flight buffers.
+ *
+ * Buffers that are never released (e.g., superblock, iclog buffers) must set
+ * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count
+ * never reaches zero and unmount hangs indefinitely.
+ */
+static inline void
+xfs_buf_ioacct_inc(
+	struct xfs_buf	*bp)
+{
+	if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
+		return;
+
+	ASSERT(bp->b_flags & XBF_ASYNC);
+	bp->b_flags |= _XBF_IN_FLIGHT;
+	percpu_counter_inc(&bp->b_target->bt_io_count);
+}
+
+/*
+ * Clear the in-flight state on a buffer about to be released to the LRU or
+ * freed and unaccount from the buftarg.
+ */
+static inline void
+xfs_buf_ioacct_dec(
+	struct xfs_buf	*bp)
+{
+	if (!(bp->b_flags & _XBF_IN_FLIGHT))
+		return;
+
+	ASSERT(bp->b_flags & XBF_ASYNC);
+	bp->b_flags &= ~_XBF_IN_FLIGHT;
+	percpu_counter_dec(&bp->b_target->bt_io_count);
+}
+
 /*
  * When we mark a buffer stale, we remove the buffer from the LRU and clear the
  * b_lru_ref count so that the buffer is freed immediately when the buffer
@@ -102,6 +143,14 @@ xfs_buf_stale(
 	 */
 	bp->b_flags &= ~_XBF_DELWRI_Q;
 
+	/*
+	 * Once the buffer is marked stale and unlocked, a subsequent lookup
+	 * could reset b_flags. There is no guarantee that the buffer is
+	 * unaccounted (released to LRU) before that occurs. Drop in-flight
+	 * status now to preserve accounting consistency.
+	 */
+	xfs_buf_ioacct_dec(bp);
+
 	spin_lock(&bp->b_lock);
 	atomic_set(&bp->b_lru_ref, 0);
 	if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
@@ -815,7 +864,8 @@ xfs_buf_get_uncached(
 	struct xfs_buf		*bp;
 	DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
 
-	bp = _xfs_buf_alloc(target, &map, 1, 0);
+	/* flags might contain irrelevant bits, pass only what we care about */
+	bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
 	if (unlikely(bp == NULL))
 		goto fail;
 
@@ -866,63 +916,85 @@ xfs_buf_hold(
 }
 
 /*
- *	Releases a hold on the specified buffer.  If the
- *	the hold count is 1, calls xfs_buf_free.
+ * Release a hold on the specified buffer. If the hold count is 1, the buffer is
+ * placed on LRU or freed (depending on b_lru_ref).
  */
 void
 xfs_buf_rele(
 	xfs_buf_t		*bp)
 {
 	struct xfs_perag	*pag = bp->b_pag;
+	bool			release;
+	bool			freebuf = false;
 
 	trace_xfs_buf_rele(bp, _RET_IP_);
 
 	if (!pag) {
 		ASSERT(list_empty(&bp->b_lru));
 		ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
-		if (atomic_dec_and_test(&bp->b_hold))
+		if (atomic_dec_and_test(&bp->b_hold)) {
+			xfs_buf_ioacct_dec(bp);
 			xfs_buf_free(bp);
+		}
 		return;
 	}
 
 	ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
 
 	ASSERT(atomic_read(&bp->b_hold) > 0);
-	if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
-		spin_lock(&bp->b_lock);
-		if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
-			/*
-			 * If the buffer is added to the LRU take a new
-			 * reference to the buffer for the LRU and clear the
-			 * (now stale) dispose list state flag
-			 */
-			if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
-				bp->b_state &= ~XFS_BSTATE_DISPOSE;
-				atomic_inc(&bp->b_hold);
-			}
-			spin_unlock(&bp->b_lock);
-			spin_unlock(&pag->pag_buf_lock);
-		} else {
-			/*
-			 * most of the time buffers will already be removed from
-			 * the LRU, so optimise that case by checking for the
-			 * XFS_BSTATE_DISPOSE flag indicating the last list the
-			 * buffer was on was the disposal list
-			 */
-			if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
-				list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
-			} else {
-				ASSERT(list_empty(&bp->b_lru));
-			}
-			spin_unlock(&bp->b_lock);
 
-			ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
-			rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
-			spin_unlock(&pag->pag_buf_lock);
-			xfs_perag_put(pag);
-			xfs_buf_free(bp);
+	release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
+	spin_lock(&bp->b_lock);
+	if (!release) {
+		/*
+		 * Drop the in-flight state if the buffer is already on the LRU
+		 * and it holds the only reference. This is racy because we
+		 * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT
+		 * ensures the decrement occurs only once per-buf.
+		 */
+		if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
+			xfs_buf_ioacct_dec(bp);
+		goto out_unlock;
+	}
+
+	/* the last reference has been dropped ... */
+	xfs_buf_ioacct_dec(bp);
+	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
+		/*
+		 * If the buffer is added to the LRU take a new reference to the
+		 * buffer for the LRU and clear the (now stale) dispose list
+		 * state flag
+		 */
+		if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
+			bp->b_state &= ~XFS_BSTATE_DISPOSE;
+			atomic_inc(&bp->b_hold);
+		}
+		spin_unlock(&pag->pag_buf_lock);
+	} else {
+		/*
+		 * most of the time buffers will already be removed from the
+		 * LRU, so optimise that case by checking for the
+		 * XFS_BSTATE_DISPOSE flag indicating the last list the buffer
+		 * was on was the disposal list
+		 */
+		if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
+			list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
+		} else {
+			ASSERT(list_empty(&bp->b_lru));
 		}
+
+		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
+		rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+		spin_unlock(&pag->pag_buf_lock);
+		xfs_perag_put(pag);
+		freebuf = true;
 	}
+
+out_unlock:
+	spin_unlock(&bp->b_lock);
+
+	if (freebuf)
+		xfs_buf_free(bp);
 }
 
 
@@ -944,10 +1016,12 @@ xfs_buf_trylock(
 	int			locked;
 
 	locked = down_trylock(&bp->b_sema) == 0;
-	if (locked)
+	if (locked) {
 		XB_SET_OWNER(bp);
-
-	trace_xfs_buf_trylock(bp, _RET_IP_);
+		trace_xfs_buf_trylock(bp, _RET_IP_);
+	} else {
+		trace_xfs_buf_trylock_fail(bp, _RET_IP_);
+	}
 	return locked;
 }
 
@@ -1127,7 +1201,8 @@ xfs_buf_ioapply_map(
 	int		map,
 	int		*buf_offset,
 	int		*count,
-	int		rw)
+	int		op,
+	int		op_flags)
 {
 	int		page_index;
 	int		total_nr_pages = bp->b_page_count;
@@ -1157,16 +1232,14 @@ xfs_buf_ioapply_map(
 
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
-	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
-	if (nr_pages > total_nr_pages)
-		nr_pages = total_nr_pages;
+	nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
 
 	bio = bio_alloc(GFP_NOIO, nr_pages);
 	bio->bi_bdev = bp->b_target->bt_bdev;
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_end_io = xfs_buf_bio_end_io;
 	bio->bi_private = bp;
-
+	bio_set_op_attrs(bio, op, op_flags);
 
 	for (; size && nr_pages; nr_pages--, page_index++) {
 		int	rbytes, nbytes = PAGE_SIZE - offset;
@@ -1190,7 +1263,7 @@ next_chunk:
 			flush_kernel_vmap_range(bp->b_addr,
 						xfs_buf_vmap_len(bp));
 		}
-		submit_bio(rw, bio);
+		submit_bio(bio);
 		if (size)
 			goto next_chunk;
 	} else {
@@ -1210,7 +1283,8 @@ _xfs_buf_ioapply(
 	struct xfs_buf	*bp)
 {
 	struct blk_plug	plug;
-	int		rw;
+	int		op;
+	int		op_flags = 0;
 	int		offset;
 	int		size;
 	int		i;
@@ -1229,14 +1303,13 @@ _xfs_buf_ioapply(
 		bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
 
 	if (bp->b_flags & XBF_WRITE) {
+		op = REQ_OP_WRITE;
 		if (bp->b_flags & XBF_SYNCIO)
-			rw = WRITE_SYNC;
-		else
-			rw = WRITE;
+			op_flags = WRITE_SYNC;
 		if (bp->b_flags & XBF_FUA)
-			rw |= REQ_FUA;
+			op_flags |= REQ_FUA;
 		if (bp->b_flags & XBF_FLUSH)
-			rw |= REQ_FLUSH;
+			op_flags |= REQ_PREFLUSH;
 
 		/*
 		 * Run the write verifier callback function if it exists. If
@@ -1266,13 +1339,14 @@ _xfs_buf_ioapply(
 			}
 		}
 	} else if (bp->b_flags & XBF_READ_AHEAD) {
-		rw = READA;
+		op = REQ_OP_READ;
+		op_flags = REQ_RAHEAD;
 	} else {
-		rw = READ;
+		op = REQ_OP_READ;
 	}
 
 	/* we only use the buffer cache for meta-data */
-	rw |= REQ_META;
+	op_flags |= REQ_META;
 
 	/*
 	 * Walk all the vectors issuing IO on them. Set up the initial offset
@@ -1284,7 +1358,7 @@ _xfs_buf_ioapply(
 	size = BBTOB(bp->b_io_length);
 	blk_start_plug(&plug);
 	for (i = 0; i < bp->b_map_count; i++) {
-		xfs_buf_ioapply_map(bp, i, &offset, &size, rw);
+		xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
 		if (bp->b_error)
 			break;
 		if (size <= 0)
@@ -1339,6 +1413,7 @@ xfs_buf_submit(
 	 * xfs_buf_ioend too early.
 	 */
 	atomic_set(&bp->b_io_remaining, 1);
+	xfs_buf_ioacct_inc(bp);
 	_xfs_buf_ioapply(bp);
 
 	/*
@@ -1524,13 +1599,19 @@ xfs_wait_buftarg(
 	int loop = 0;
 
 	/*
-	 * We need to flush the buffer workqueue to ensure that all IO
-	 * completion processing is 100% done. Just waiting on buffer locks is
-	 * not sufficient for async IO as the reference count held over IO is
-	 * not released until after the buffer lock is dropped. Hence we need to
-	 * ensure here that all reference counts have been dropped before we
-	 * start walking the LRU list.
+	 * First wait on the buftarg I/O count for all in-flight buffers to be
+	 * released. This is critical as new buffers do not make the LRU until
+	 * they are released.
+	 *
+	 * Next, flush the buffer workqueue to ensure all completion processing
+	 * has finished. Just waiting on buffer locks is not sufficient for
+	 * async IO as the reference count held over IO is not released until
+	 * after the buffer lock is dropped. Hence we need to ensure here that
+	 * all reference counts have been dropped before we start walking the
+	 * LRU list.
 	 */
+	while (percpu_counter_sum(&btp->bt_io_count))
+		delay(100);
 	drain_workqueue(btp->bt_mount->m_buf_workqueue);
 
 	/* loop until there is nothing left on the lru list. */
@@ -1627,6 +1708,8 @@ xfs_free_buftarg(
 	struct xfs_buftarg	*btp)
 {
 	unregister_shrinker(&btp->bt_shrinker);
+	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
+	percpu_counter_destroy(&btp->bt_io_count);
 	list_lru_destroy(&btp->bt_lru);
 
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
@@ -1691,6 +1774,9 @@ xfs_alloc_buftarg(
 	if (list_lru_init(&btp->bt_lru))
 		goto error;
 
+	if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
+		goto error;
+
 	btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
 	btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
 	btp->bt_shrinker.seeks = DEFAULT_SEEKS;
@@ -1774,18 +1860,33 @@ xfs_buf_cmp(
 	return 0;
 }
 
+/*
+ * submit buffers for write.
+ *
+ * When we have a large buffer list, we do not want to hold all the buffers
+ * locked while we block on the request queue waiting for IO dispatch. To avoid
+ * this problem, we lock and submit buffers in groups of 50, thereby minimising
+ * the lock hold times for lists which may contain thousands of objects.
+ *
+ * To do this, we sort the buffer list before we walk the list to lock and
+ * submit buffers, and we plug and unplug around each group of buffers we
+ * submit.
+ */
 static int
-__xfs_buf_delwri_submit(
+xfs_buf_delwri_submit_buffers(
 	struct list_head	*buffer_list,
-	struct list_head	*io_list,
-	bool			wait)
+	struct list_head	*wait_list)
 {
-	struct blk_plug		plug;
 	struct xfs_buf		*bp, *n;
+	LIST_HEAD		(submit_list);
 	int			pinned = 0;
+	struct blk_plug		plug;
 
+	list_sort(NULL, buffer_list, xfs_buf_cmp);
+
+	blk_start_plug(&plug);
 	list_for_each_entry_safe(bp, n, buffer_list, b_list) {
-		if (!wait) {
+		if (!wait_list) {
 			if (xfs_buf_ispinned(bp)) {
 				pinned++;
 				continue;
@@ -1808,25 +1909,21 @@ __xfs_buf_delwri_submit(
 			continue;
 		}
 
-		list_move_tail(&bp->b_list, io_list);
 		trace_xfs_buf_delwri_split(bp, _RET_IP_);
-	}
-
-	list_sort(NULL, io_list, xfs_buf_cmp);
-
-	blk_start_plug(&plug);
-	list_for_each_entry_safe(bp, n, io_list, b_list) {
-		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
-		bp->b_flags |= XBF_WRITE | XBF_ASYNC;
 
 		/*
-		 * we do all Io submission async. This means if we need to wait
-		 * for IO completion we need to take an extra reference so the
-		 * buffer is still valid on the other side.
+		 * We do all IO submission async. This means if we need
+		 * to wait for IO completion we need to take an extra
+		 * reference so the buffer is still valid on the other
+		 * side. We need to move the buffer onto the io_list
+		 * at this point so the caller can still access it.
 		 */
-		if (wait)
+		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL);
+		bp->b_flags |= XBF_WRITE | XBF_ASYNC;
+		if (wait_list) {
 			xfs_buf_hold(bp);
-		else
+			list_move_tail(&bp->b_list, wait_list);
+		} else
 			list_del_init(&bp->b_list);
 
 		xfs_buf_submit(bp);
@@ -1849,8 +1946,7 @@ int
 xfs_buf_delwri_submit_nowait(
 	struct list_head	*buffer_list)
 {
-	LIST_HEAD		(io_list);
-	return __xfs_buf_delwri_submit(buffer_list, &io_list, false);
+	return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
 }
 
 /*
@@ -1865,15 +1961,15 @@ int
 xfs_buf_delwri_submit(
 	struct list_head	*buffer_list)
 {
-	LIST_HEAD		(io_list);
+	LIST_HEAD		(wait_list);
 	int			error = 0, error2;
 	struct xfs_buf		*bp;
 
-	__xfs_buf_delwri_submit(buffer_list, &io_list, true);
+	xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
 
 	/* Wait for IO to complete. */
-	while (!list_empty(&io_list)) {
-		bp = list_first_entry(&io_list, struct xfs_buf, b_list);
+	while (!list_empty(&wait_list)) {
+		bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
 
 		list_del_init(&bp->b_list);
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 8bfb974f0772..1c2e52b2d926 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -43,6 +43,7 @@ typedef enum {
 #define XBF_READ	 (1 << 0) /* buffer intended for reading from device */
 #define XBF_WRITE	 (1 << 1) /* buffer intended for writing to device */
 #define XBF_READ_AHEAD	 (1 << 2) /* asynchronous read-ahead */
+#define XBF_NO_IOACCT	 (1 << 3) /* bypass I/O accounting (non-LRU bufs) */
 #define XBF_ASYNC	 (1 << 4) /* initiator will not wait for completion */
 #define XBF_DONE	 (1 << 5) /* all pages in the buffer uptodate */
 #define XBF_STALE	 (1 << 6) /* buffer has been staled, do not find it */
@@ -62,6 +63,7 @@ typedef enum {
 #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
 #define _XBF_COMPOUND	 (1 << 23)/* compound buffer */
+#define _XBF_IN_FLIGHT	 (1 << 25) /* I/O in flight, for accounting purposes */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -81,7 +83,8 @@ typedef unsigned int xfs_buf_flags_t;
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
-	{ _XBF_COMPOUND,	"COMPOUND" }
+	{ _XBF_COMPOUND,	"COMPOUND" }, \
+	{ _XBF_IN_FLIGHT,	"IN_FLIGHT" }
 
 
 /*
@@ -115,6 +118,8 @@ typedef struct xfs_buftarg {
 	/* LRU control structures */
 	struct shrinker		bt_shrinker;
 	struct list_lru		bt_lru;
+
+	struct percpu_counter	bt_io_count;
 } xfs_buftarg_t;
 
 struct xfs_buf;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 34257992934c..e455f9098d49 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -359,7 +359,7 @@ xfs_buf_item_format(
 	for (i = 0; i < bip->bli_format_count; i++) {
 		xfs_buf_item_format_segment(bip, lv, &vecp, offset,
 					    &bip->bli_formats[i]);
-		offset += bp->b_maps[i].bm_len;
+		offset += BBTOB(bp->b_maps[i].bm_len);
 	}
 
 	/*
@@ -915,20 +915,28 @@ xfs_buf_item_log(
 	for (i = 0; i < bip->bli_format_count; i++) {
 		if (start > last)
 			break;
-		end = start + BBTOB(bp->b_maps[i].bm_len);
+		end = start + BBTOB(bp->b_maps[i].bm_len) - 1;
+
+		/* skip to the map that includes the first byte to log */
 		if (first > end) {
 			start += BBTOB(bp->b_maps[i].bm_len);
 			continue;
 		}
+
+		/*
+		 * Trim the range to this segment and mark it in the bitmap.
+		 * Note that we must convert buffer offsets to segment relative
+		 * offsets (e.g., the first byte of each segment is byte 0 of
+		 * that segment).
+		 */
 		if (first < start)
 			first = start;
 		if (end > last)
 			end = last;
-
-		xfs_buf_item_log_segment(first, end,
+		xfs_buf_item_log_segment(first - start, end - start,
 					 &bip->bli_formats[i].blf_data_map[0]);
 
-		start += bp->b_maps[i].bm_len;
+		start += BBTOB(bp->b_maps[i].bm_len);
 	}
 }
 
@@ -949,6 +957,7 @@ xfs_buf_item_free(
 	xfs_buf_log_item_t	*bip)
 {
 	xfs_buf_item_free_format(bip);
+	kmem_free(bip->bli_item.li_lv_shadow);
 	kmem_zone_free(xfs_buf_item_zone, bip);
 }
 
@@ -1073,6 +1082,8 @@ xfs_buf_iodone_callback_error(
 	trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
 	ASSERT(bp->b_iodone != NULL);
 
+	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
+
 	/*
 	 * If the write was asynchronous then no one will be looking for the
 	 * error.  If this is the first failure of this type, clear the error
@@ -1080,13 +1091,12 @@ xfs_buf_iodone_callback_error(
 	 * async write failure at least once, but we also need to set the buffer
 	 * up to behave correctly now for repeated failures.
 	 */
-	if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) ||
+	if (!(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) ||
 	     bp->b_last_error != bp->b_error) {
-		bp->b_flags |= (XBF_WRITE | XBF_ASYNC |
-			        XBF_DONE | XBF_WRITE_FAIL);
+		bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
 		bp->b_last_error = bp->b_error;
-		bp->b_retries = 0;
-		bp->b_first_retry_time = jiffies;
+		if (cfg->retry_timeout && !bp->b_first_retry_time)
+			bp->b_first_retry_time = jiffies;
 
 		xfs_buf_ioerror(bp, 0);
 		xfs_buf_submit(bp);
@@ -1097,7 +1107,6 @@ xfs_buf_iodone_callback_error(
 	 * Repeated failure on an async write. Take action according to the
 	 * error configuration we have been set up to use.
 	 */
-	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
 
 	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
 	    ++bp->b_retries > cfg->max_retries)
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index e0646659ce16..ccb0811963b2 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -74,6 +74,7 @@ xfs_qm_dqdestroy(
 {
 	ASSERT(list_empty(&dqp->q_lru));
 
+	kmem_free(dqp->q_logitem.qli_item.li_lv_shadow);
 	mutex_destroy(&dqp->q_qlock);
 
 	XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 814cff94e78f..2c7a1629e064 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -370,6 +370,8 @@ xfs_qm_qoffend_logitem_committed(
 	spin_lock(&ailp->xa_lock);
 	xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
 
+	kmem_free(qfs->qql_item.li_lv_shadow);
+	kmem_free(lip->li_lv_shadow);
 	kmem_free(qfs);
 	kmem_free(qfe);
 	return (xfs_lsn_t)-1;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 88693a98fac5..ed7ee4e8af73 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -55,12 +55,15 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
 }
 
 int
-xfs_errortag_add(int error_tag, xfs_mount_t *mp)
+xfs_errortag_add(unsigned int error_tag, xfs_mount_t *mp)
 {
 	int i;
 	int len;
 	int64_t fsid;
 
+	if (error_tag >= XFS_ERRTAG_MAX)
+		return -EINVAL;
+
 	memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
 
 	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 4ed3042a0f16..2e4f67f68856 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -128,7 +128,7 @@ extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
 	 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
 			(rf))))
 
-extern int xfs_errortag_add(int error_tag, struct xfs_mount *mp);
+extern int xfs_errortag_add(unsigned int error_tag, struct xfs_mount *mp);
 extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
 #else
 #define XFS_TEST_ERROR(expr, mp, tag, rf)	(expr)
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 4aa0153214f9..ab779460ecbf 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -40,6 +40,7 @@ void
 xfs_efi_item_free(
 	struct xfs_efi_log_item	*efip)
 {
+	kmem_free(efip->efi_item.li_lv_shadow);
 	if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS)
 		kmem_free(efip);
 	else
@@ -300,6 +301,7 @@ static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
 STATIC void
 xfs_efd_item_free(struct xfs_efd_log_item *efdp)
 {
+	kmem_free(efdp->efd_item.li_lv_shadow);
 	if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS)
 		kmem_free(efdp);
 	else
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 47fc63295422..ed95e5bb04e6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -37,6 +37,7 @@
 #include "xfs_log.h"
 #include "xfs_icache.h"
 #include "xfs_pnfs.h"
+#include "xfs_iomap.h"
 
 #include <linux/dcache.h>
 #include <linux/falloc.h>
@@ -80,61 +81,17 @@ xfs_rw_ilock_demote(
 }
 
 /*
- * xfs_iozero clears the specified range supplied via the page cache (except in
- * the DAX case). Writes through the page cache will allocate blocks over holes,
- * though the callers usually map the holes first and avoid them. If a block is
- * not completely zeroed, then it will be read from disk before being partially
- * zeroed.
- *
- * In the DAX case, we can just directly write to the underlying pages. This
- * will not allocate blocks, but will avoid holes and unwritten extents and so
- * not do unnecessary work.
+ * Clear the specified ranges to zero through either the pagecache or DAX.
+ * Holes and unwritten extents will be left as-is as they already are zeroed.
  */
 int
-xfs_iozero(
-	struct xfs_inode	*ip,	/* inode			*/
-	loff_t			pos,	/* offset in file		*/
-	size_t			count)	/* size of data to zero		*/
+xfs_zero_range(
+	struct xfs_inode	*ip,
+	xfs_off_t		pos,
+	xfs_off_t		count,
+	bool			*did_zero)
 {
-	struct page		*page;
-	struct address_space	*mapping;
-	int			status = 0;
-
-
-	mapping = VFS_I(ip)->i_mapping;
-	do {
-		unsigned offset, bytes;
-		void *fsdata;
-
-		offset = (pos & (PAGE_SIZE -1)); /* Within page */
-		bytes = PAGE_SIZE - offset;
-		if (bytes > count)
-			bytes = count;
-
-		if (IS_DAX(VFS_I(ip))) {
-			status = dax_zero_page_range(VFS_I(ip), pos, bytes,
-						     xfs_get_blocks_direct);
-			if (status)
-				break;
-		} else {
-			status = pagecache_write_begin(NULL, mapping, pos, bytes,
-						AOP_FLAG_UNINTERRUPTIBLE,
-						&page, &fsdata);
-			if (status)
-				break;
-
-			zero_user(page, offset, bytes);
-
-			status = pagecache_write_end(NULL, mapping, pos, bytes,
-						bytes, page, fsdata);
-			WARN_ON(status <= 0); /* can't return less than zero! */
-			status = 0;
-		}
-		pos += bytes;
-		count -= bytes;
-	} while (count);
-
-	return status;
+	return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
 }
 
 int
@@ -282,48 +239,35 @@ xfs_file_fsync(
 }
 
 STATIC ssize_t
-xfs_file_read_iter(
+xfs_file_dio_aio_read(
 	struct kiocb		*iocb,
 	struct iov_iter		*to)
 {
-	struct file		*file = iocb->ki_filp;
-	struct inode		*inode = file->f_mapping->host;
+	struct address_space	*mapping = iocb->ki_filp->f_mapping;
+	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	size_t			size = iov_iter_count(to);
+	loff_t			isize = i_size_read(inode);
+	size_t			count = iov_iter_count(to);
+	struct iov_iter		data;
+	struct xfs_buftarg	*target;
 	ssize_t			ret = 0;
-	int			ioflags = 0;
-	xfs_fsize_t		n;
-	loff_t			pos = iocb->ki_pos;
 
-	XFS_STATS_INC(mp, xs_read_calls);
-
-	if (unlikely(iocb->ki_flags & IOCB_DIRECT))
-		ioflags |= XFS_IO_ISDIRECT;
-	if (file->f_mode & FMODE_NOCMTIME)
-		ioflags |= XFS_IO_INVIS;
-
-	if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
-		xfs_buftarg_t	*target =
-			XFS_IS_REALTIME_INODE(ip) ?
-				mp->m_rtdev_targp : mp->m_ddev_targp;
-		/* DIO must be aligned to device logical sector size */
-		if ((pos | size) & target->bt_logical_sectormask) {
-			if (pos == i_size_read(inode))
-				return 0;
-			return -EINVAL;
-		}
-	}
+	trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
 
-	n = mp->m_super->s_maxbytes - pos;
-	if (n <= 0 || size == 0)
-		return 0;
+	if (!count)
+		return 0; /* skip atime */
 
-	if (n < size)
-		size = n;
+	if (XFS_IS_REALTIME_INODE(ip))
+		target = ip->i_mount->m_rtdev_targp;
+	else
+		target = ip->i_mount->m_ddev_targp;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
+	/* DIO must be aligned to device logical sector size */
+	if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
+		if (iocb->ki_pos == isize)
+			return 0;
+		return -EINVAL;
+	}
 
 	/*
 	 * Locking is a bit tricky here. If we take an exclusive lock for direct
@@ -336,7 +280,7 @@ xfs_file_read_iter(
 	 * serialisation.
 	 */
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
+	if (mapping->nrpages) {
 		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
 
@@ -351,8 +295,8 @@ xfs_file_read_iter(
 		 * flush and reduce the chances of repeated iolock cycles going
 		 * forward.
 		 */
-		if (inode->i_mapping->nrpages) {
-			ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+		if (mapping->nrpages) {
+			ret = filemap_write_and_wait(mapping);
 			if (ret) {
 				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
 				return ret;
@@ -363,20 +307,95 @@ xfs_file_read_iter(
 			 * we fail to invalidate a page, but this should never
 			 * happen on XFS. Warn if it does fail.
 			 */
-			ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
+			ret = invalidate_inode_pages2(mapping);
 			WARN_ON_ONCE(ret);
 			ret = 0;
 		}
 		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 	}
 
-	trace_xfs_file_read(ip, size, pos, ioflags);
+	data = *to;
+	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
+			xfs_get_blocks_direct, NULL, NULL, 0);
+	if (ret > 0) {
+		iocb->ki_pos += ret;
+		iov_iter_advance(to, ret);
+	}
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
+	file_accessed(iocb->ki_filp);
+	return ret;
+}
+
+static noinline ssize_t
+xfs_file_dax_read(
+	struct kiocb		*iocb,
+	struct iov_iter		*to)
+{
+	struct address_space	*mapping = iocb->ki_filp->f_mapping;
+	struct inode		*inode = mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct iov_iter		data = *to;
+	size_t			count = iov_iter_count(to);
+	ssize_t			ret = 0;
+
+	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
+
+	if (!count)
+		return 0; /* skip atime */
+
+	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
+	if (ret > 0) {
+		iocb->ki_pos += ret;
+		iov_iter_advance(to, ret);
+	}
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	file_accessed(iocb->ki_filp);
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_buffered_aio_read(
+	struct kiocb		*iocb,
+	struct iov_iter		*to)
+{
+	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
+	ssize_t			ret;
+
+	trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
+
+	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 	ret = generic_file_read_iter(iocb, to);
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_read_iter(
+	struct kiocb		*iocb,
+	struct iov_iter		*to)
+{
+	struct inode		*inode = file_inode(iocb->ki_filp);
+	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
+	ssize_t			ret = 0;
+
+	XFS_STATS_INC(mp, xs_read_calls);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	if (IS_DAX(inode))
+		ret = xfs_file_dax_read(iocb, to);
+	else if (iocb->ki_flags & IOCB_DIRECT)
+		ret = xfs_file_dio_aio_read(iocb, to);
+	else
+		ret = xfs_file_buffered_aio_read(iocb, to);
+
 	if (ret > 0)
 		XFS_STATS_ADD(mp, xs_read_bytes, ret);
-
-	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 	return ret;
 }
 
@@ -389,18 +408,14 @@ xfs_file_splice_read(
 	unsigned int		flags)
 {
 	struct xfs_inode	*ip = XFS_I(infilp->f_mapping->host);
-	int			ioflags = 0;
 	ssize_t			ret;
 
 	XFS_STATS_INC(ip->i_mount, xs_read_calls);
 
-	if (infilp->f_mode & FMODE_NOCMTIME)
-		ioflags |= XFS_IO_INVIS;
-
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
-	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
+	trace_xfs_file_splice_read(ip, count, *ppos);
 
 	/*
 	 * DAX inodes cannot ues the page cache for splice, so we have to push
@@ -423,49 +438,6 @@ out:
 	return ret;
 }
 
-/*
- * This routine is called to handle zeroing any space in the last block of the
- * file that is beyond the EOF.  We do this since the size is being increased
- * without writing anything to that block and we don't want to read the
- * garbage on the disk.
- */
-STATIC int				/* error (positive) */
-xfs_zero_last_block(
-	struct xfs_inode	*ip,
-	xfs_fsize_t		offset,
-	xfs_fsize_t		isize,
-	bool			*did_zeroing)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		last_fsb = XFS_B_TO_FSBT(mp, isize);
-	int			zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-	int			zero_len;
-	int			nimaps = 1;
-	int			error = 0;
-	struct xfs_bmbt_irec	imap;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	if (error)
-		return error;
-
-	ASSERT(nimaps > 0);
-
-	/*
-	 * If the block underlying isize is just a hole, then there
-	 * is nothing to zero.
-	 */
-	if (imap.br_startblock == HOLESTARTBLOCK)
-		return 0;
-
-	zero_len = mp->m_sb.sb_blocksize - zero_offset;
-	if (isize + zero_len > offset)
-		zero_len = offset - isize;
-	*did_zeroing = true;
-	return xfs_iozero(ip, isize, zero_len);
-}
-
 /*
  * Zero any on disk space between the current EOF and the new, larger EOF.
  *
@@ -484,94 +456,11 @@ xfs_zero_eof(
 	xfs_fsize_t		isize,		/* current inode size */
 	bool			*did_zeroing)
 {
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		start_zero_fsb;
-	xfs_fileoff_t		end_zero_fsb;
-	xfs_fileoff_t		zero_count_fsb;
-	xfs_fileoff_t		last_fsb;
-	xfs_fileoff_t		zero_off;
-	xfs_fsize_t		zero_len;
-	int			nimaps;
-	int			error = 0;
-	struct xfs_bmbt_irec	imap;
-
 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 	ASSERT(offset > isize);
 
 	trace_xfs_zero_eof(ip, isize, offset - isize);
-
-	/*
-	 * First handle zeroing the block on which isize resides.
-	 *
-	 * We only zero a part of that block so it is handled specially.
-	 */
-	if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
-		error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
-		if (error)
-			return error;
-	}
-
-	/*
-	 * Calculate the range between the new size and the old where blocks
-	 * needing to be zeroed may exist.
-	 *
-	 * To get the block where the last byte in the file currently resides,
-	 * we need to subtract one from the size and truncate back to a block
-	 * boundary.  We subtract 1 in case the size is exactly on a block
-	 * boundary.
-	 */
-	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
-	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
-	end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
-	ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
-	if (last_fsb == end_zero_fsb) {
-		/*
-		 * The size was only incremented on its last block.
-		 * We took care of that above, so just return.
-		 */
-		return 0;
-	}
-
-	ASSERT(start_zero_fsb <= end_zero_fsb);
-	while (start_zero_fsb <= end_zero_fsb) {
-		nimaps = 1;
-		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
-					  &imap, &nimaps, 0);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		if (error)
-			return error;
-
-		ASSERT(nimaps > 0);
-
-		if (imap.br_state == XFS_EXT_UNWRITTEN ||
-		    imap.br_startblock == HOLESTARTBLOCK) {
-			start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-			ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-			continue;
-		}
-
-		/*
-		 * There are blocks we need to zero.
-		 */
-		zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
-		zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
-		if ((zero_off + zero_len) > offset)
-			zero_len = offset - zero_off;
-
-		error = xfs_iozero(ip, zero_off, zero_len);
-		if (error)
-			return error;
-
-		*did_zeroing = true;
-		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-	}
-
-	return 0;
+	return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
 }
 
 /*
@@ -722,8 +611,7 @@ xfs_file_dio_aio_write(
 					mp->m_rtdev_targp : mp->m_ddev_targp;
 
 	/* DIO must be aligned to device logical sector size */
-	if (!IS_DAX(inode) &&
-	    ((iocb->ki_pos | count) & target->bt_logical_sectormask))
+	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
 		return -EINVAL;
 
 	/* "unaligned" here means not aligned to a filesystem block */
@@ -762,7 +650,7 @@ xfs_file_dio_aio_write(
 	end = iocb->ki_pos + count - 1;
 
 	/*
-	 * See xfs_file_read_iter() for why we do a full-file flush here.
+	 * See xfs_file_dio_aio_read() for why we do a full-file flush here.
 	 */
 	if (mapping->nrpages) {
 		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
@@ -789,10 +677,12 @@ xfs_file_dio_aio_write(
 		iolock = XFS_IOLOCK_SHARED;
 	}
 
-	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
 
 	data = *from;
-	ret = mapping->a_ops->direct_IO(iocb, &data);
+	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
+			xfs_get_blocks_direct, xfs_end_io_direct_write,
+			NULL, DIO_ASYNC_EXTEND);
 
 	/* see generic_file_direct_write() for why this is necessary */
 	if (mapping->nrpages) {
@@ -809,10 +699,70 @@ out:
 	xfs_rw_iunlock(ip, iolock);
 
 	/*
-	 * No fallback to buffered IO on errors for XFS. DAX can result in
-	 * partial writes, but direct IO will either complete fully or fail.
+	 * No fallback to buffered IO on errors for XFS, direct IO will either
+	 * complete fully or fail.
 	 */
-	ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
+	ASSERT(ret < 0 || ret == count);
+	return ret;
+}
+
+static noinline ssize_t
+xfs_file_dax_write(
+	struct kiocb		*iocb,
+	struct iov_iter		*from)
+{
+	struct address_space	*mapping = iocb->ki_filp->f_mapping;
+	struct inode		*inode = mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	ssize_t			ret = 0;
+	int			unaligned_io = 0;
+	int			iolock;
+	struct iov_iter		data;
+
+	/* "unaligned" here means not aligned to a filesystem block */
+	if ((iocb->ki_pos & mp->m_blockmask) ||
+	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
+		unaligned_io = 1;
+		iolock = XFS_IOLOCK_EXCL;
+	} else if (mapping->nrpages) {
+		iolock = XFS_IOLOCK_EXCL;
+	} else {
+		iolock = XFS_IOLOCK_SHARED;
+	}
+	xfs_rw_ilock(ip, iolock);
+
+	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
+	if (ret)
+		goto out;
+
+	/*
+	 * Yes, even DAX files can have page cache attached to them:  A zeroed
+	 * page is inserted into the pagecache when we have to serve a write
+	 * fault on a hole.  It should never be dirtied and can simply be
+	 * dropped from the pagecache once we get real data for the page.
+	 */
+	if (mapping->nrpages) {
+		ret = invalidate_inode_pages2(mapping);
+		WARN_ON_ONCE(ret);
+	}
+
+	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
+		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+		iolock = XFS_IOLOCK_SHARED;
+	}
+
+	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
+
+	data = *from;
+	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
+			xfs_end_io_direct_write, 0);
+	if (ret > 0) {
+		iocb->ki_pos += ret;
+		iov_iter_advance(from, ret);
+	}
+out:
+	xfs_rw_iunlock(ip, iolock);
 	return ret;
 }
 
@@ -839,9 +789,8 @@ xfs_file_buffered_aio_write(
 	current->backing_dev_info = inode_to_bdi(inode);
 
 write_retry:
-	trace_xfs_file_buffered_write(ip, iov_iter_count(from),
-				      iocb->ki_pos, 0);
-	ret = generic_perform_write(file, from, iocb->ki_pos);
+	trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
+	ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
 	if (likely(ret >= 0))
 		iocb->ki_pos += ret;
 
@@ -895,7 +844,9 @@ xfs_file_write_iter(
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
-	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
+	if (IS_DAX(inode))
+		ret = xfs_file_dax_write(iocb, from);
+	else if (iocb->ki_flags & IOCB_DIRECT)
 		ret = xfs_file_dio_aio_write(iocb, from);
 	else
 		ret = xfs_file_buffered_aio_write(iocb, from);
@@ -1551,9 +1502,9 @@ xfs_filemap_page_mkwrite(
 	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
 	if (IS_DAX(inode)) {
-		ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
 	} else {
-		ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
 		ret = block_page_mkwrite_return(ret);
 	}
 
@@ -1585,7 +1536,7 @@ xfs_filemap_fault(
 		 * changes to xfs_get_blocks_direct() to map unwritten extent
 		 * ioend for conversion on read-only mappings.
 		 */
-		ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
 	} else
 		ret = filemap_fault(vma, vmf);
 	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@ -1622,7 +1573,7 @@ xfs_filemap_pmd_fault(
 	}
 
 	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-	ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
+	ret = dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
 	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
 	if (flags & FAULT_FLAG_WRITE)
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b4d75825ae37..7191c3878b4a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -667,8 +667,11 @@ xfs_reserve_blocks(
 	__uint64_t              *inval,
 	xfs_fsop_resblks_t      *outval)
 {
-	__int64_t		lcounter, delta, fdblks_delta;
+	__int64_t		lcounter, delta;
+	__int64_t		fdblks_delta = 0;
 	__uint64_t		request;
+	__int64_t		free;
+	int			error = 0;
 
 	/* If inval is null, report current values and return */
 	if (inval == (__uint64_t *)NULL) {
@@ -682,24 +685,23 @@ xfs_reserve_blocks(
 	request = *inval;
 
 	/*
-	 * With per-cpu counters, this becomes an interesting
-	 * problem. we needto work out if we are freeing or allocation
-	 * blocks first, then we can do the modification as necessary.
+	 * With per-cpu counters, this becomes an interesting problem. we need
+	 * to work out if we are freeing or allocation blocks first, then we can
+	 * do the modification as necessary.
 	 *
-	 * We do this under the m_sb_lock so that if we are near
-	 * ENOSPC, we will hold out any changes while we work out
-	 * what to do. This means that the amount of free space can
-	 * change while we do this, so we need to retry if we end up
-	 * trying to reserve more space than is available.
+	 * We do this under the m_sb_lock so that if we are near ENOSPC, we will
+	 * hold out any changes while we work out what to do. This means that
+	 * the amount of free space can change while we do this, so we need to
+	 * retry if we end up trying to reserve more space than is available.
 	 */
-retry:
 	spin_lock(&mp->m_sb_lock);
 
 	/*
 	 * If our previous reservation was larger than the current value,
-	 * then move any unused blocks back to the free pool.
+	 * then move any unused blocks back to the free pool. Modify the resblks
+	 * counters directly since we shouldn't have any problems unreserving
+	 * space.
 	 */
-	fdblks_delta = 0;
 	if (mp->m_resblks > request) {
 		lcounter = mp->m_resblks_avail - request;
 		if (lcounter  > 0) {		/* release unused blocks */
@@ -707,54 +709,67 @@ retry:
 			mp->m_resblks_avail -= lcounter;
 		}
 		mp->m_resblks = request;
-	} else {
-		__int64_t	free;
+		if (fdblks_delta) {
+			spin_unlock(&mp->m_sb_lock);
+			error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
+			spin_lock(&mp->m_sb_lock);
+		}
+
+		goto out;
+	}
 
+	/*
+	 * If the request is larger than the current reservation, reserve the
+	 * blocks before we update the reserve counters. Sample m_fdblocks and
+	 * perform a partial reservation if the request exceeds free space.
+	 */
+	error = -ENOSPC;
+	do {
 		free = percpu_counter_sum(&mp->m_fdblocks) -
 							XFS_ALLOC_SET_ASIDE(mp);
 		if (!free)
-			goto out; /* ENOSPC and fdblks_delta = 0 */
+			break;
 
 		delta = request - mp->m_resblks;
 		lcounter = free - delta;
-		if (lcounter < 0) {
+		if (lcounter < 0)
 			/* We can't satisfy the request, just get what we can */
-			mp->m_resblks += free;
-			mp->m_resblks_avail += free;
-			fdblks_delta = -free;
-		} else {
-			fdblks_delta = -delta;
-			mp->m_resblks = request;
-			mp->m_resblks_avail += delta;
-		}
-	}
-out:
-	if (outval) {
-		outval->resblks = mp->m_resblks;
-		outval->resblks_avail = mp->m_resblks_avail;
-	}
-	spin_unlock(&mp->m_sb_lock);
+			fdblks_delta = free;
+		else
+			fdblks_delta = delta;
 
-	if (fdblks_delta) {
 		/*
-		 * If we are putting blocks back here, m_resblks_avail is
-		 * already at its max so this will put it in the free pool.
-		 *
-		 * If we need space, we'll either succeed in getting it
-		 * from the free block count or we'll get an enospc. If
-		 * we get a ENOSPC, it means things changed while we were
-		 * calculating fdblks_delta and so we should try again to
-		 * see if there is anything left to reserve.
+		 * We'll either succeed in getting space from the free block
+		 * count or we'll get an ENOSPC. If we get a ENOSPC, it means
+		 * things changed while we were calculating fdblks_delta and so
+		 * we should try again to see if there is anything left to
+		 * reserve.
 		 *
 		 * Don't set the reserved flag here - we don't want to reserve
 		 * the extra reserve blocks from the reserve.....
 		 */
-		int error;
-		error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
-		if (error == -ENOSPC)
-			goto retry;
+		spin_unlock(&mp->m_sb_lock);
+		error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
+		spin_lock(&mp->m_sb_lock);
+	} while (error == -ENOSPC);
+
+	/*
+	 * Update the reserve counters if blocks have been successfully
+	 * allocated.
+	 */
+	if (!error && fdblks_delta) {
+		mp->m_resblks += fdblks_delta;
+		mp->m_resblks_avail += fdblks_delta;
 	}
-	return 0;
+
+out:
+	if (outval) {
+		outval->resblks = mp->m_resblks;
+		outval->resblks_avail = mp->m_resblks_avail;
+	}
+
+	spin_unlock(&mp->m_sb_lock);
+	return error;
 }
 
 int
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 99ee6eee5e0b..fb39a66914dd 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -765,7 +765,7 @@ restart:
  * Background scanning to trim post-EOF preallocated space. This is queued
  * based on the 'speculative_prealloc_lifetime' tunable (5m by default).
  */
-STATIC void
+void
 xfs_queue_eofblocks(
 	struct xfs_mount *mp)
 {
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 62f1f91c32cb..05bac99bef75 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -68,6 +68,7 @@ void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
 int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
 int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip);
 void xfs_eofblocks_worker(struct work_struct *);
+void xfs_queue_eofblocks(struct xfs_mount *);
 
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
 	int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ee6799e0476f..8825bcfd314c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -431,7 +431,7 @@ xfs_lock_inumorder(int lock_mode, int subclass)
  * lock more than one at a time, lockdep will report false positives saying we
  * have violated locking orders.
  */
-void
+static void
 xfs_lock_inodes(
 	xfs_inode_t	**ips,
 	int		inodes,
@@ -667,14 +667,6 @@ xfs_ip2xflags(
 	return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
 }
 
-uint
-xfs_dic2xflags(
-	struct xfs_dinode	*dip)
-{
-	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
-				be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
-}
-
 /*
  * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
  * is allowed, otherwise it has to be an exact match. If a CI match is found,
@@ -748,7 +740,7 @@ out_unlock:
  * are not linked into the directory structure - they are attached
  * directly to the superblock - and so have no parent.
  */
-int
+static int
 xfs_ialloc(
 	xfs_trans_t	*tp,
 	xfs_inode_t	*pip,
@@ -1085,7 +1077,7 @@ xfs_dir_ialloc(
  * link count to go to zero, move the inode to AGI unlinked list so that it can
  * be freed when the last active reference goes away via xfs_inactive().
  */
-int				/* error */
+static int			/* error */
 xfs_droplink(
 	xfs_trans_t *tp,
 	xfs_inode_t *ip)
@@ -1104,7 +1096,7 @@ xfs_droplink(
 /*
  * Increment the link count on an inode & log the change.
  */
-int
+static int
 xfs_bumplink(
 	xfs_trans_t *tp,
 	xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index e52d7c7aeb5b..8eb78ec4a6e2 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -395,12 +395,8 @@ void		xfs_ilock_demote(xfs_inode_t *, uint);
 int		xfs_isilocked(xfs_inode_t *, uint);
 uint		xfs_ilock_data_map_shared(struct xfs_inode *);
 uint		xfs_ilock_attr_map_shared(struct xfs_inode *);
-int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
-			   xfs_nlink_t, xfs_dev_t, prid_t, int,
-			   struct xfs_buf **, xfs_inode_t **);
 
 uint		xfs_ip2xflags(struct xfs_inode *);
-uint		xfs_dic2xflags(struct xfs_dinode *);
 int		xfs_ifree(struct xfs_trans *, xfs_inode_t *,
 			   struct xfs_bmap_free *);
 int		xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
@@ -411,7 +407,6 @@ void		xfs_iunpin_wait(xfs_inode_t *);
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
 int		xfs_iflush(struct xfs_inode *, struct xfs_buf **);
-void		xfs_lock_inodes(xfs_inode_t **, int, uint);
 void		xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
 
 xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
@@ -419,8 +414,6 @@ xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
 int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
 			       xfs_nlink_t, xfs_dev_t, prid_t, int,
 			       struct xfs_inode **, int *);
-int		xfs_droplink(struct xfs_trans *, struct xfs_inode *);
-int		xfs_bumplink(struct xfs_trans *, struct xfs_inode *);
 
 /* from xfs_file.c */
 enum xfs_prealloc_flags {
@@ -434,7 +427,8 @@ int	xfs_update_prealloc_flags(struct xfs_inode *ip,
 				  enum xfs_prealloc_flags flags);
 int	xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
 		     xfs_fsize_t isize, bool *did_zeroing);
-int	xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
+int	xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
+		bool *did_zero);
 loff_t	__xfs_seek_hole_data(struct inode *inode, loff_t start,
 			     loff_t eof, int whence);
 
@@ -479,14 +473,4 @@ do { \
 
 extern struct kmem_zone	*xfs_inode_zone;
 
-/*
- * Flags for read/write calls
- */
-#define XFS_IO_ISDIRECT	0x00001		/* bypass page cache */
-#define XFS_IO_INVIS	0x00002		/* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
-	{ XFS_IO_ISDIRECT,	"DIRECT" }, \
-	{ XFS_IO_INVIS,		"INVIS"}
-
 #endif	/* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index a1b07612224c..892c2aced207 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -651,6 +651,7 @@ void
 xfs_inode_item_destroy(
 	xfs_inode_t	*ip)
 {
+	kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
 	kmem_zone_free(xfs_ili_zone, ip->i_itemp);
 }
 
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index dbca7375deef..9a7c87809d3b 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -595,13 +595,12 @@ xfs_attrmulti_by_handle(
 
 int
 xfs_ioc_space(
-	struct xfs_inode	*ip,
-	struct inode		*inode,
 	struct file		*filp,
-	int			ioflags,
 	unsigned int		cmd,
 	xfs_flock64_t		*bf)
 {
+	struct inode		*inode = file_inode(filp);
+	struct xfs_inode	*ip = XFS_I(inode);
 	struct iattr		iattr;
 	enum xfs_prealloc_flags	flags = 0;
 	uint			iolock = XFS_IOLOCK_EXCL;
@@ -626,7 +625,7 @@ xfs_ioc_space(
 
 	if (filp->f_flags & O_DSYNC)
 		flags |= XFS_PREALLOC_SYNC;
-	if (ioflags & XFS_IO_INVIS)
+	if (filp->f_mode & FMODE_NOCMTIME)
 		flags |= XFS_PREALLOC_INVISIBLE;
 
 	error = mnt_want_write_file(filp);
@@ -1464,8 +1463,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
 
 STATIC int
 xfs_ioc_getbmap(
-	struct xfs_inode	*ip,
-	int			ioflags,
+	struct file		*file,
 	unsigned int		cmd,
 	void			__user *arg)
 {
@@ -1479,10 +1477,10 @@ xfs_ioc_getbmap(
 		return -EINVAL;
 
 	bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
-	if (ioflags & XFS_IO_INVIS)
+	if (file->f_mode & FMODE_NOCMTIME)
 		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
 
-	error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
+	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format,
 			    (__force struct getbmap *)arg+1);
 	if (error)
 		return error;
@@ -1575,6 +1573,17 @@ xfs_ioc_swapext(
 		goto out_put_tmp_file;
 	}
 
+	/*
+	 * We need to ensure that the fds passed in point to XFS inodes
+	 * before we cast and access them as XFS structures as we have no
+	 * control over what the user passes us here.
+	 */
+	if (f.file->f_op != &xfs_file_operations ||
+	    tmp.file->f_op != &xfs_file_operations) {
+		error = -EINVAL;
+		goto out_put_tmp_file;
+	}
+
 	ip = XFS_I(file_inode(f.file));
 	tip = XFS_I(file_inode(tmp.file));
 
@@ -1619,12 +1628,8 @@ xfs_file_ioctl(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	void			__user *arg = (void __user *)p;
-	int			ioflags = 0;
 	int			error;
 
-	if (filp->f_mode & FMODE_NOCMTIME)
-		ioflags |= XFS_IO_INVIS;
-
 	trace_xfs_file_ioctl(ip);
 
 	switch (cmd) {
@@ -1643,7 +1648,7 @@ xfs_file_ioctl(
 
 		if (copy_from_user(&bf, arg, sizeof(bf)))
 			return -EFAULT;
-		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+		return xfs_ioc_space(filp, cmd, &bf);
 	}
 	case XFS_IOC_DIOINFO: {
 		struct dioattr	da;
@@ -1702,7 +1707,7 @@ xfs_file_ioctl(
 
 	case XFS_IOC_GETBMAP:
 	case XFS_IOC_GETBMAPA:
-		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+		return xfs_ioc_getbmap(filp, cmd, arg);
 
 	case XFS_IOC_GETBMAPX:
 		return xfs_ioc_getbmapx(ip, arg);
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 77c02c7900b6..8b52881bfd90 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -20,10 +20,7 @@
 
 extern int
 xfs_ioc_space(
-	struct xfs_inode	*ip,
-	struct inode		*inode,
 	struct file		*filp,
-	int			ioflags,
 	unsigned int		cmd,
 	xfs_flock64_t		*bf);
 
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 1a05d8ae327d..321f57721b92 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -532,12 +532,8 @@ xfs_file_compat_ioctl(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	void			__user *arg = (void __user *)p;
-	int			ioflags = 0;
 	int			error;
 
-	if (filp->f_mode & FMODE_NOCMTIME)
-		ioflags |= XFS_IO_INVIS;
-
 	trace_xfs_file_compat_ioctl(ip);
 
 	switch (cmd) {
@@ -589,7 +585,7 @@ xfs_file_compat_ioctl(
 		if (xfs_compat_flock64_copyin(&bf, arg))
 			return -EFAULT;
 		cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
-		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+		return xfs_ioc_space(filp, cmd, &bf);
 	}
 	case XFS_IOC_FSGEOMETRY_V1_32:
 		return xfs_compat_ioc_fsgeometry_v1(mp, arg);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 58391355a44d..620fc9120444 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -15,6 +15,7 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#include <linux/iomap.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
@@ -940,3 +941,173 @@ error_on_bmapi_transaction:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
+
+void
+xfs_bmbt_to_iomap(
+	struct xfs_inode	*ip,
+	struct iomap		*iomap,
+	struct xfs_bmbt_irec	*imap)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+
+	if (imap->br_startblock == HOLESTARTBLOCK) {
+		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->type = IOMAP_HOLE;
+	} else if (imap->br_startblock == DELAYSTARTBLOCK) {
+		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->type = IOMAP_DELALLOC;
+	} else {
+		iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
+		if (imap->br_state == XFS_EXT_UNWRITTEN)
+			iomap->type = IOMAP_UNWRITTEN;
+		else
+			iomap->type = IOMAP_MAPPED;
+	}
+	iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
+	iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
+	iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
+}
+
+static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
+{
+	return !nimaps ||
+		imap->br_startblock == HOLESTARTBLOCK ||
+		imap->br_startblock == DELAYSTARTBLOCK;
+}
+
+static int
+xfs_file_iomap_begin(
+	struct inode		*inode,
+	loff_t			offset,
+	loff_t			length,
+	unsigned		flags,
+	struct iomap		*iomap)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_bmbt_irec	imap;
+	xfs_fileoff_t		offset_fsb, end_fsb;
+	int			nimaps = 1, error = 0;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+	ASSERT(offset <= mp->m_super->s_maxbytes);
+	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+		length = mp->m_super->s_maxbytes - offset;
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	end_fsb = XFS_B_TO_FSB(mp, offset + length);
+
+	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+			       &nimaps, XFS_BMAPI_ENTIRE);
+	if (error) {
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		return error;
+	}
+
+	if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
+		/*
+		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
+		 * pages to keep the chunks of work done where somewhat symmetric
+		 * with the work writeback does. This is a completely arbitrary
+		 * number pulled out of thin air as a best guess for initial
+		 * testing.
+		 *
+		 * Note that the values needs to be less than 32-bits wide until
+		 * the lower level functions are updated.
+		 */
+		length = min_t(loff_t, length, 1024 * PAGE_SIZE);
+		if (xfs_get_extsz_hint(ip)) {
+			/*
+			 * xfs_iomap_write_direct() expects the shared lock. It
+			 * is unlocked on return.
+			 */
+			xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+			error = xfs_iomap_write_direct(ip, offset, length, &imap,
+					nimaps);
+		} else {
+			error = xfs_iomap_write_delay(ip, offset, length, &imap);
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		}
+
+		if (error)
+			return error;
+
+		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
+		xfs_bmbt_to_iomap(ip, iomap, &imap);
+	} else if (nimaps) {
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		trace_xfs_iomap_found(ip, offset, length, 0, &imap);
+		xfs_bmbt_to_iomap(ip, iomap, &imap);
+	} else {
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
+		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->type = IOMAP_HOLE;
+		iomap->offset = offset;
+		iomap->length = length;
+	}
+
+	return 0;
+}
+
+static int
+xfs_file_iomap_end_delalloc(
+	struct xfs_inode	*ip,
+	loff_t			offset,
+	loff_t			length,
+	ssize_t			written)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_fileoff_t		start_fsb;
+	xfs_fileoff_t		end_fsb;
+	int			error = 0;
+
+	start_fsb = XFS_B_TO_FSB(mp, offset + written);
+	end_fsb = XFS_B_TO_FSB(mp, offset + length);
+
+	/*
+	 * Trim back delalloc blocks if we didn't manage to write the whole
+	 * range reserved.
+	 *
+	 * We don't need to care about racing delalloc as we hold i_mutex
+	 * across the reserve/allocate/unreserve calls. If there are delalloc
+	 * blocks in the range, they are ours.
+	 */
+	if (start_fsb < end_fsb) {
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+					       end_fsb - start_fsb);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+		if (error && !XFS_FORCED_SHUTDOWN(mp)) {
+			xfs_alert(mp, "%s: unable to clean up ino %lld",
+				__func__, ip->i_ino);
+			return error;
+		}
+	}
+
+	return 0;
+}
+
+static int
+xfs_file_iomap_end(
+	struct inode		*inode,
+	loff_t			offset,
+	loff_t			length,
+	ssize_t			written,
+	unsigned		flags,
+	struct iomap		*iomap)
+{
+	if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
+		return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
+				length, written);
+	return 0;
+}
+
+struct iomap_ops xfs_iomap_ops = {
+	.iomap_begin		= xfs_file_iomap_begin,
+	.iomap_end		= xfs_file_iomap_end,
+};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 8688e663d744..e066d045e2ff 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,6 +18,8 @@
 #ifndef __XFS_IOMAP_H__
 #define __XFS_IOMAP_H__
 
+#include <linux/iomap.h>
+
 struct xfs_inode;
 struct xfs_bmbt_irec;
 
@@ -29,4 +31,9 @@ int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
 			struct xfs_bmbt_irec *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
 
+void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
+		struct xfs_bmbt_irec *);
+
+extern struct iomap_ops xfs_iomap_ops;
+
 #endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c5d4eba6972e..ab820f84ed50 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -38,12 +38,13 @@
 #include "xfs_dir2.h"
 #include "xfs_trans_space.h"
 #include "xfs_pnfs.h"
+#include "xfs_iomap.h"
 
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/security.h>
-#include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/slab.h>
 
 /*
@@ -800,21 +801,31 @@ xfs_setattr_size(
 	if (error)
 		return error;
 
+	/*
+	 * Wait for all direct I/O to complete.
+	 */
+	inode_dio_wait(inode);
+
 	/*
 	 * File data changes must be complete before we start the transaction to
 	 * modify the inode.  This needs to be done before joining the inode to
 	 * the transaction because the inode cannot be unlocked once it is a
 	 * part of the transaction.
 	 *
-	 * Start with zeroing any data block beyond EOF that we may expose on
-	 * file extension.
+	 * Start with zeroing any data beyond EOF that we may expose on file
+	 * extension, or zeroing out the rest of the block on a downward
+	 * truncate.
 	 */
 	if (newsize > oldsize) {
 		error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
-		if (error)
-			return error;
+	} else {
+		error = iomap_truncate_page(inode, newsize, &did_zeroing,
+				&xfs_iomap_ops);
 	}
 
+	if (error)
+		return error;
+
 	/*
 	 * We are going to log the inode size change in this transaction so
 	 * any previous writes that are beyond the on disk EOF and the new
@@ -823,17 +834,14 @@ xfs_setattr_size(
 	 * problem. Note that this includes any block zeroing we did above;
 	 * otherwise those blocks may not be zeroed after a crash.
 	 */
-	if (newsize > ip->i_d.di_size &&
-	    (oldsize != ip->i_d.di_size || did_zeroing)) {
+	if (did_zeroing ||
+	    (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
 		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
 						      ip->i_d.di_size, newsize);
 		if (error)
 			return error;
 	}
 
-	/* Now wait for all direct I/O to complete. */
-	inode_dio_wait(inode);
-
 	/*
 	 * We've already locked out new page faults, so now we can safely remove
 	 * pages from the page cache knowing they won't get refaulted until we
@@ -851,13 +859,6 @@ xfs_setattr_size(
 	 * to hope that the caller sees ENOMEM and retries the truncate
 	 * operation.
 	 */
-	if (IS_DAX(inode))
-		error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
-	else
-		error = block_truncate_page(inode->i_mapping, newsize,
-					    xfs_get_blocks);
-	if (error)
-		return error;
 	truncate_setsize(inode, newsize);
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -998,51 +999,6 @@ xfs_vn_update_time(
 	return xfs_trans_commit(tp);
 }
 
-#define XFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-
-/*
- * Call fiemap helper to fill in user data.
- * Returns positive errors to xfs_getbmap.
- */
-STATIC int
-xfs_fiemap_format(
-	void			**arg,
-	struct getbmapx		*bmv,
-	int			*full)
-{
-	int			error;
-	struct fiemap_extent_info *fieinfo = *arg;
-	u32			fiemap_flags = 0;
-	u64			logical, physical, length;
-
-	/* Do nothing for a hole */
-	if (bmv->bmv_block == -1LL)
-		return 0;
-
-	logical = BBTOB(bmv->bmv_offset);
-	physical = BBTOB(bmv->bmv_block);
-	length = BBTOB(bmv->bmv_length);
-
-	if (bmv->bmv_oflags & BMV_OF_PREALLOC)
-		fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
-	else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
-		fiemap_flags |= (FIEMAP_EXTENT_DELALLOC |
-				 FIEMAP_EXTENT_UNKNOWN);
-		physical = 0;   /* no block yet */
-	}
-	if (bmv->bmv_oflags & BMV_OF_LAST)
-		fiemap_flags |= FIEMAP_EXTENT_LAST;
-
-	error = fiemap_fill_next_extent(fieinfo, logical, physical,
-					length, fiemap_flags);
-	if (error > 0) {
-		error = 0;
-		*full = 1;	/* user array now full */
-	}
-
-	return error;
-}
-
 STATIC int
 xfs_vn_fiemap(
 	struct inode		*inode,
@@ -1050,38 +1006,13 @@ xfs_vn_fiemap(
 	u64			start,
 	u64			length)
 {
-	xfs_inode_t		*ip = XFS_I(inode);
-	struct getbmapx		bm;
 	int			error;
 
-	error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
-	if (error)
-		return error;
-
-	/* Set up bmap header for xfs internal routine */
-	bm.bmv_offset = BTOBBT(start);
-	/* Special case for whole file */
-	if (length == FIEMAP_MAX_OFFSET)
-		bm.bmv_length = -1LL;
-	else
-		bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
-
-	/* We add one because in getbmap world count includes the header */
-	bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
-					fieinfo->fi_extents_max + 1;
-	bm.bmv_count = min_t(__s32, bm.bmv_count,
-			     (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
-	bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
-	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
-		bm.bmv_iflags |= BMV_IF_ATTRFORK;
-	if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
-		bm.bmv_iflags |= BMV_IF_DELALLOC;
-
-	error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
-	if (error)
-		return error;
+	xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
+	error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
+	xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
 
-	return 0;
+	return error;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index a8192dc797dc..b8d64d520e12 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -328,13 +328,6 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
 	return x;
 }
 
-/* ARM old ABI has some weird alignment/padding */
-#if defined(__arm__) && !defined(__ARM_EABI__)
-#define __arch_pack __attribute__((packed))
-#else
-#define __arch_pack
-#endif
-
 #define ASSERT_ALWAYS(expr)	\
 	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index bde02f1fba73..3b74fa011bb1 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -788,7 +788,7 @@ xfs_log_mount_cancel(
  * As far as I know, there weren't any dependencies on the old behaviour.
  */
 
-int
+static int
 xfs_log_unmount_write(xfs_mount_t *mp)
 {
 	struct xlog	 *log = mp->m_log;
@@ -1036,7 +1036,7 @@ xfs_log_space_wake(
  * there's no point in running a dummy transaction at this point because we
  * can't start trying to idle the log until both the CIL and AIL are empty.
  */
-int
+static int
 xfs_log_need_covered(xfs_mount_t *mp)
 {
 	struct xlog	*log = mp->m_log;
@@ -1177,7 +1177,7 @@ xlog_space_left(
  * The log manager needs its own routine, in order to control what
  * happens with the buffer after the write completes.
  */
-void
+static void
 xlog_iodone(xfs_buf_t *bp)
 {
 	struct xlog_in_core	*iclog = bp->b_fspriv;
@@ -1302,7 +1302,7 @@ xfs_log_work_queue(
  * disk. If there is nothing dirty, then we might need to cover the log to
  * indicate that the filesystem is idle.
  */
-void
+static void
 xfs_log_worker(
 	struct work_struct	*work)
 {
@@ -1415,7 +1415,7 @@ xlog_alloc_log(
 	 */
 	error = -ENOMEM;
 	bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
-			   BTOBB(log->l_iclog_size), 0);
+			   BTOBB(log->l_iclog_size), XBF_NO_IOACCT);
 	if (!bp)
 		goto out_free_log;
 
@@ -1454,7 +1454,8 @@ xlog_alloc_log(
 		prev_iclog = iclog;
 
 		bp = xfs_buf_get_uncached(mp->m_logdev_targp,
-						BTOBB(log->l_iclog_size), 0);
+					  BTOBB(log->l_iclog_size),
+					  XBF_NO_IOACCT);
 		if (!bp)
 			goto out_free_iclog;
 
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 80ba0c047090..b5e71072fde5 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -163,12 +163,8 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
 			  __uint8_t	   clientid,
 			  bool		   permanent);
 int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
-int	  xfs_log_unmount_write(struct xfs_mount *mp);
 void      xfs_log_unmount(struct xfs_mount *mp);
 int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
-int	  xfs_log_need_covered(struct xfs_mount *mp);
-
-void	  xlog_iodone(struct xfs_buf *);
 
 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
 void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
@@ -178,7 +174,6 @@ void	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
 bool	xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 
 void	xfs_log_work_queue(struct xfs_mount *mp);
-void	xfs_log_worker(struct work_struct *work);
 void	xfs_log_quiesce(struct xfs_mount *mp);
 bool	xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
 
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 5e54e7955ea6..a4ab192e1792 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -78,6 +78,157 @@ xlog_cil_init_post_recovery(
 	log->l_cilp->xc_ctx->sequence = 1;
 }
 
+static inline int
+xlog_cil_iovec_space(
+	uint	niovecs)
+{
+	return round_up((sizeof(struct xfs_log_vec) +
+					niovecs * sizeof(struct xfs_log_iovec)),
+			sizeof(uint64_t));
+}
+
+/*
+ * Allocate or pin log vector buffers for CIL insertion.
+ *
+ * The CIL currently uses disposable buffers for copying a snapshot of the
+ * modified items into the log during a push. The biggest problem with this is
+ * the requirement to allocate the disposable buffer during the commit if:
+ *	a) does not exist; or
+ *	b) it is too small
+ *
+ * If we do this allocation within xlog_cil_insert_format_items(), it is done
+ * under the xc_ctx_lock, which means that a CIL push cannot occur during
+ * the memory allocation. This means that we have a potential deadlock situation
+ * under low memory conditions when we have lots of dirty metadata pinned in
+ * the CIL and we need a CIL commit to occur to free memory.
+ *
+ * To avoid this, we need to move the memory allocation outside the
+ * xc_ctx_lock, but because the log vector buffers are disposable, that opens
+ * up a TOCTOU race condition w.r.t. the CIL committing and removing the log
+ * vector buffers between the check and the formatting of the item into the
+ * log vector buffer within the xc_ctx_lock.
+ *
+ * Because the log vector buffer needs to be unchanged during the CIL push
+ * process, we cannot share the buffer between the transaction commit (which
+ * modifies the buffer) and the CIL push context that is writing the changes
+ * into the log. This means skipping preallocation of buffer space is
+ * unreliable, but we most definitely do not want to be allocating and freeing
+ * buffers unnecessarily during commits when overwrites can be done safely.
+ *
+ * The simplest solution to this problem is to allocate a shadow buffer when a
+ * log item is committed for the second time, and then to only use this buffer
+ * if necessary. The buffer can remain attached to the log item until such time
+ * it is needed, and this is the buffer that is reallocated to match the size of
+ * the incoming modification. Then during the formatting of the item we can swap
+ * the active buffer with the new one if we can't reuse the existing buffer. We
+ * don't free the old buffer as it may be reused on the next modification if
+ * it's size is right, otherwise we'll free and reallocate it at that point.
+ *
+ * This function builds a vector for the changes in each log item in the
+ * transaction. It then works out the length of the buffer needed for each log
+ * item, allocates them and attaches the vector to the log item in preparation
+ * for the formatting step which occurs under the xc_ctx_lock.
+ *
+ * While this means the memory footprint goes up, it avoids the repeated
+ * alloc/free pattern that repeated modifications of an item would otherwise
+ * cause, and hence minimises the CPU overhead of such behaviour.
+ */
+static void
+xlog_cil_alloc_shadow_bufs(
+	struct xlog		*log,
+	struct xfs_trans	*tp)
+{
+	struct xfs_log_item_desc *lidp;
+
+	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+		struct xfs_log_item *lip = lidp->lid_item;
+		struct xfs_log_vec *lv;
+		int	niovecs = 0;
+		int	nbytes = 0;
+		int	buf_size;
+		bool	ordered = false;
+
+		/* Skip items which aren't dirty in this transaction. */
+		if (!(lidp->lid_flags & XFS_LID_DIRTY))
+			continue;
+
+		/* get number of vecs and size of data to be stored */
+		lip->li_ops->iop_size(lip, &niovecs, &nbytes);
+
+		/*
+		 * Ordered items need to be tracked but we do not wish to write
+		 * them. We need a logvec to track the object, but we do not
+		 * need an iovec or buffer to be allocated for copying data.
+		 */
+		if (niovecs == XFS_LOG_VEC_ORDERED) {
+			ordered = true;
+			niovecs = 0;
+			nbytes = 0;
+		}
+
+		/*
+		 * We 64-bit align the length of each iovec so that the start
+		 * of the next one is naturally aligned.  We'll need to
+		 * account for that slack space here. Then round nbytes up
+		 * to 64-bit alignment so that the initial buffer alignment is
+		 * easy to calculate and verify.
+		 */
+		nbytes += niovecs * sizeof(uint64_t);
+		nbytes = round_up(nbytes, sizeof(uint64_t));
+
+		/*
+		 * The data buffer needs to start 64-bit aligned, so round up
+		 * that space to ensure we can align it appropriately and not
+		 * overrun the buffer.
+		 */
+		buf_size = nbytes + xlog_cil_iovec_space(niovecs);
+
+		/*
+		 * if we have no shadow buffer, or it is too small, we need to
+		 * reallocate it.
+		 */
+		if (!lip->li_lv_shadow ||
+		    buf_size > lip->li_lv_shadow->lv_size) {
+
+			/*
+			 * We free and allocate here as a realloc would copy
+			 * unecessary data. We don't use kmem_zalloc() for the
+			 * same reason - we don't need to zero the data area in
+			 * the buffer, only the log vector header and the iovec
+			 * storage.
+			 */
+			kmem_free(lip->li_lv_shadow);
+
+			lv = kmem_alloc(buf_size, KM_SLEEP|KM_NOFS);
+			memset(lv, 0, xlog_cil_iovec_space(niovecs));
+
+			lv->lv_item = lip;
+			lv->lv_size = buf_size;
+			if (ordered)
+				lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+			else
+				lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
+			lip->li_lv_shadow = lv;
+		} else {
+			/* same or smaller, optimise common overwrite case */
+			lv = lip->li_lv_shadow;
+			if (ordered)
+				lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+			else
+				lv->lv_buf_len = 0;
+			lv->lv_bytes = 0;
+			lv->lv_next = NULL;
+		}
+
+		/* Ensure the lv is set up according to ->iop_size */
+		lv->lv_niovecs = niovecs;
+
+		/* The allocated data region lies beyond the iovec region */
+		lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs);
+	}
+
+}
+
 /*
  * Prepare the log item for insertion into the CIL. Calculate the difference in
  * log space and vectors it will consume, and if it is a new item pin it as
@@ -100,16 +251,19 @@ xfs_cil_prepare_item(
 	/*
 	 * If there is no old LV, this is the first time we've seen the item in
 	 * this CIL context and so we need to pin it. If we are replacing the
-	 * old_lv, then remove the space it accounts for and free it.
+	 * old_lv, then remove the space it accounts for and make it the shadow
+	 * buffer for later freeing. In both cases we are now switching to the
+	 * shadow buffer, so update the the pointer to it appropriately.
 	 */
-	if (!old_lv)
+	if (!old_lv) {
 		lv->lv_item->li_ops->iop_pin(lv->lv_item);
-	else if (old_lv != lv) {
+		lv->lv_item->li_lv_shadow = NULL;
+	} else if (old_lv != lv) {
 		ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
 
 		*diff_len -= old_lv->lv_bytes;
 		*diff_iovecs -= old_lv->lv_niovecs;
-		kmem_free(old_lv);
+		lv->lv_item->li_lv_shadow = old_lv;
 	}
 
 	/* attach new log vector to log item */
@@ -133,11 +287,13 @@ xfs_cil_prepare_item(
  * write it out asynchronously without needing to relock the object that was
  * modified at the time it gets written into the iclog.
  *
- * This function builds a vector for the changes in each log item in the
- * transaction. It then works out the length of the buffer needed for each log
- * item, allocates them and formats the vector for the item into the buffer.
- * The buffer is then attached to the log item are then inserted into the
- * Committed Item List for tracking until the next checkpoint is written out.
+ * This function takes the prepared log vectors attached to each log item, and
+ * formats the changes into the log vector buffer. The buffer it uses is
+ * dependent on the current state of the vector in the CIL - the shadow lv is
+ * guaranteed to be large enough for the current modification, but we will only
+ * use that if we can't reuse the existing lv. If we can't reuse the existing
+ * lv, then simple swap it out for the shadow lv. We don't free it - that is
+ * done lazily either by th enext modification or the freeing of the log item.
  *
  * We don't set up region headers during this process; we simply copy the
  * regions into the flat buffer. We can do this because we still have to do a
@@ -170,59 +326,29 @@ xlog_cil_insert_format_items(
 	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
 		struct xfs_log_item *lip = lidp->lid_item;
 		struct xfs_log_vec *lv;
-		struct xfs_log_vec *old_lv;
-		int	niovecs = 0;
-		int	nbytes = 0;
-		int	buf_size;
+		struct xfs_log_vec *old_lv = NULL;
+		struct xfs_log_vec *shadow;
 		bool	ordered = false;
 
 		/* Skip items which aren't dirty in this transaction. */
 		if (!(lidp->lid_flags & XFS_LID_DIRTY))
 			continue;
 
-		/* get number of vecs and size of data to be stored */
-		lip->li_ops->iop_size(lip, &niovecs, &nbytes);
-
-		/* Skip items that do not have any vectors for writing */
-		if (!niovecs)
-			continue;
-
 		/*
-		 * Ordered items need to be tracked but we do not wish to write
-		 * them. We need a logvec to track the object, but we do not
-		 * need an iovec or buffer to be allocated for copying data.
+		 * The formatting size information is already attached to
+		 * the shadow lv on the log item.
 		 */
-		if (niovecs == XFS_LOG_VEC_ORDERED) {
+		shadow = lip->li_lv_shadow;
+		if (shadow->lv_buf_len == XFS_LOG_VEC_ORDERED)
 			ordered = true;
-			niovecs = 0;
-			nbytes = 0;
-		}
 
-		/*
-		 * We 64-bit align the length of each iovec so that the start
-		 * of the next one is naturally aligned.  We'll need to
-		 * account for that slack space here. Then round nbytes up
-		 * to 64-bit alignment so that the initial buffer alignment is
-		 * easy to calculate and verify.
-		 */
-		nbytes += niovecs * sizeof(uint64_t);
-		nbytes = round_up(nbytes, sizeof(uint64_t));
-
-		/* grab the old item if it exists for reservation accounting */
-		old_lv = lip->li_lv;
-
-		/*
-		 * The data buffer needs to start 64-bit aligned, so round up
-		 * that space to ensure we can align it appropriately and not
-		 * overrun the buffer.
-		 */
-		buf_size = nbytes +
-			   round_up((sizeof(struct xfs_log_vec) +
-				     niovecs * sizeof(struct xfs_log_iovec)),
-				    sizeof(uint64_t));
+		/* Skip items that do not have any vectors for writing */
+		if (!shadow->lv_niovecs && !ordered)
+			continue;
 
 		/* compare to existing item size */
-		if (lip->li_lv && buf_size <= lip->li_lv->lv_size) {
+		old_lv = lip->li_lv;
+		if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) {
 			/* same or smaller, optimise common overwrite case */
 			lv = lip->li_lv;
 			lv->lv_next = NULL;
@@ -236,32 +362,29 @@ xlog_cil_insert_format_items(
 			 */
 			*diff_iovecs -= lv->lv_niovecs;
 			*diff_len -= lv->lv_bytes;
+
+			/* Ensure the lv is set up according to ->iop_size */
+			lv->lv_niovecs = shadow->lv_niovecs;
+
+			/* reset the lv buffer information for new formatting */
+			lv->lv_buf_len = 0;
+			lv->lv_bytes = 0;
+			lv->lv_buf = (char *)lv +
+					xlog_cil_iovec_space(lv->lv_niovecs);
 		} else {
-			/* allocate new data chunk */
-			lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
+			/* switch to shadow buffer! */
+			lv = shadow;
 			lv->lv_item = lip;
-			lv->lv_size = buf_size;
 			if (ordered) {
 				/* track as an ordered logvec */
 				ASSERT(lip->li_lv == NULL);
-				lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
 				goto insert;
 			}
-			lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
 		}
 
-		/* Ensure the lv is set up according to ->iop_size */
-		lv->lv_niovecs = niovecs;
-
-		/* The allocated data region lies beyond the iovec region */
-		lv->lv_buf_len = 0;
-		lv->lv_bytes = 0;
-		lv->lv_buf = (char *)lv + buf_size - nbytes;
 		ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
-
 		lip->li_ops->iop_format(lip, lv);
 insert:
-		ASSERT(lv->lv_buf_len <= nbytes);
 		xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
 	}
 }
@@ -783,6 +906,13 @@ xfs_log_commit_cil(
 	struct xlog		*log = mp->m_log;
 	struct xfs_cil		*cil = log->l_cilp;
 
+	/*
+	 * Do all necessary memory allocation before we lock the CIL.
+	 * This ensures the allocation does not deadlock with a CIL
+	 * push in memory reclaim (e.g. from kswapd).
+	 */
+	xlog_cil_alloc_shadow_bufs(log, tp);
+
 	/* lock out background commit */
 	down_read(&cil->xc_ctx_lock);
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index e39b02351b4a..970c19ba2f56 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -272,13 +272,15 @@ xfs_readsb(
 	buf_ops = NULL;
 
 	/*
-	 * Allocate a (locked) buffer to hold the superblock.
-	 * This will be kept around at all times to optimize
-	 * access to the superblock.
+	 * Allocate a (locked) buffer to hold the superblock. This will be kept
+	 * around at all times to optimize access to the superblock. Therefore,
+	 * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
+	 * elevated.
 	 */
 reread:
 	error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
-				   BTOBB(sector_size), 0, &bp, buf_ops);
+				      BTOBB(sector_size), XBF_NO_IOACCT, &bp,
+				      buf_ops);
 	if (error) {
 		if (loud)
 			xfs_warn(mp, "SB validate failed with error %d.", error);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 184c44effdd5..0cc8d8f74356 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -22,6 +22,11 @@
 	BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
 		#structname ") is wrong, expected " #size)
 
+#define XFS_CHECK_OFFSET(structname, member, off) \
+	BUILD_BUG_ON_MSG(offsetof(structname, member) != (off), \
+		"XFS: offsetof(" #structname ", " #member ") is wrong, " \
+		"expected " #off)
+
 static inline void __init
 xfs_check_ondisk_structs(void)
 {
@@ -34,6 +39,8 @@ xfs_check_ondisk_structs(void)
 	XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key,		8);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec,		16);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block,		4);
+	XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_shdr,	48);
+	XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_lhdr,	64);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block,		72);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_dinode,		176);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot,		104);
@@ -75,27 +82,39 @@ xfs_check_ondisk_structs(void)
 	XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t,	12);
 	 */
 
+	XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen,	0);
+	XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen,	2);
+	XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval,	3);
+	XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valueblk,	0);
+	XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valuelen,	4);
+	XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen,	8);
+	XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name,	9);
 	XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t,		40);
-	XFS_CHECK_STRUCT_SIZE(xfs_attr_shortform_t,		8);
+	XFS_CHECK_OFFSET(xfs_attr_shortform_t, hdr.totsize,	0);
+	XFS_CHECK_OFFSET(xfs_attr_shortform_t, hdr.count,	2);
+	XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].namelen,	4);
+	XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].valuelen, 5);
+	XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].flags,	6);
+	XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].nameval,	7);
 	XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t,			12);
 	XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t,			16);
 	XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t,		8);
 	XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t,		16);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t,		4);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t,		16);
-	XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_unused_t,		6);
+	XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, freetag,	0);
+	XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, length,	2);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t,		16);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t,			16);
-	XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino4_t,			4);
-	XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino8_t,			8);
-	XFS_CHECK_STRUCT_SIZE(xfs_dir2_inou_t,			8);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t,		8);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t,		16);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t,			16);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t,		4);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t,		3);
+	XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, namelen,		0);
+	XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, offset,		1);
+	XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, name,		3);
 	XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t,		10);
-	XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_off_t,		2);
 
 	/* log structures */
 	XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat,		24);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index d5b756669fb5..0f14b2e4bf6c 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2014 Christoph Hellwig.
  */
+#include <linux/iomap.h>
 #include "xfs.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
@@ -79,32 +80,6 @@ xfs_fs_get_uuid(
 	return 0;
 }
 
-static void
-xfs_bmbt_to_iomap(
-	struct xfs_inode	*ip,
-	struct iomap		*iomap,
-	struct xfs_bmbt_irec	*imap)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-
-	if (imap->br_startblock == HOLESTARTBLOCK) {
-		iomap->blkno = IOMAP_NULL_BLOCK;
-		iomap->type = IOMAP_HOLE;
-	} else if (imap->br_startblock == DELAYSTARTBLOCK) {
-		iomap->blkno = IOMAP_NULL_BLOCK;
-		iomap->type = IOMAP_DELALLOC;
-	} else {
-		iomap->blkno =
-			XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock);
-		if (imap->br_state == XFS_EXT_UNWRITTEN)
-			iomap->type = IOMAP_UNWRITTEN;
-		else
-			iomap->type = IOMAP_MAPPED;
-	}
-	iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
-	iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
-}
-
 /*
  * Get a layout for the pNFS client.
  */
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 76c0a4a9bb17..355dd9e1cb64 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -98,8 +98,6 @@ xfs_growfs_rt(
 /*
  * From xfs_rtbitmap.c
  */
-int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
-		  xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
 int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
 		      xfs_rtblock_t start, xfs_extlen_t len, int val,
 		      xfs_rtblock_t *new, int *stat);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 11ea5d51db56..0303f1005f88 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -546,7 +546,7 @@ xfs_showargs(
 
 	return 0;
 }
-__uint64_t
+static __uint64_t
 xfs_max_file_offset(
 	unsigned int		blockshift)
 {
@@ -1294,6 +1294,7 @@ xfs_fs_remount(
 		 */
 		xfs_restore_resvblks(mp);
 		xfs_log_work_queue(mp);
+		xfs_queue_eofblocks(mp);
 	}
 
 	/* rw -> ro */
@@ -1306,6 +1307,13 @@ xfs_fs_remount(
 		 * return it to the same size.
 		 */
 		xfs_save_resvblks(mp);
+
+		/*
+		 * Cancel background eofb scanning so it cannot race with the
+		 * final log force+buftarg wait and deadlock the remount.
+		 */
+		cancel_delayed_work_sync(&mp->m_eofblocks_work);
+
 		xfs_quiesce_attr(mp);
 		mp->m_flags |= XFS_MOUNT_RDONLY;
 	}
@@ -1565,10 +1573,6 @@ xfs_fs_fill_super(
 		}
 	}
 
-	if (xfs_sb_version_hassparseinodes(&mp->m_sb))
-		xfs_alert(mp,
-	"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
-
 	error = xfs_mountfs(mp);
 	if (error)
 		goto out_filestream_unmount;
@@ -1692,8 +1696,9 @@ xfs_init_zones(void)
 	if (!xfs_log_ticket_zone)
 		goto out_free_ioend_bioset;
 
-	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
-						"xfs_bmap_free_item");
+	xfs_bmap_free_item_zone = kmem_zone_init(
+			sizeof(struct xfs_bmap_free_item),
+			"xfs_bmap_free_item");
 	if (!xfs_bmap_free_item_zone)
 		goto out_destroy_log_ticket_zone;
 
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 2dfb1ce4585f..529bce9fc37e 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -61,8 +61,6 @@ struct xfs_mount;
 struct xfs_buftarg;
 struct block_device;
 
-extern __uint64_t xfs_max_file_offset(unsigned int);
-
 extern void xfs_flush_inodes(struct xfs_mount *mp);
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *,
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 4c2c55086208..79cfd3fc5324 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -634,6 +634,9 @@ xfs_error_get_cfg(
 {
 	struct xfs_error_cfg	*cfg;
 
+	if (error < 0)
+		error = -error;
+
 	switch (error) {
 	case EIO:
 		cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO];
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ea94ee0fe5ea..145169093fe0 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -354,6 +354,7 @@ DEFINE_BUF_EVENT(xfs_buf_submit_wait);
 DEFINE_BUF_EVENT(xfs_buf_bawrite);
 DEFINE_BUF_EVENT(xfs_buf_lock);
 DEFINE_BUF_EVENT(xfs_buf_lock_done);
+DEFINE_BUF_EVENT(xfs_buf_trylock_fail);
 DEFINE_BUF_EVENT(xfs_buf_trylock);
 DEFINE_BUF_EVENT(xfs_buf_unlock);
 DEFINE_BUF_EVENT(xfs_buf_iowait);
@@ -1134,15 +1135,14 @@ TRACE_EVENT(xfs_log_assign_tail_lsn,
 )
 
 DECLARE_EVENT_CLASS(xfs_file_class,
-	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
-	TP_ARGS(ip, count, offset, flags),
+	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset),
+	TP_ARGS(ip, count, offset),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(xfs_fsize_t, size)
 		__field(loff_t, offset)
 		__field(size_t, count)
-		__field(int, flags)
 	),
 	TP_fast_assign(
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
@@ -1150,25 +1150,25 @@ DECLARE_EVENT_CLASS(xfs_file_class,
 		__entry->size = ip->i_d.di_size;
 		__entry->offset = offset;
 		__entry->count = count;
-		__entry->flags = flags;
 	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
-		  "offset 0x%llx count 0x%zx ioflags %s",
+	TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count 0x%zx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
 		  __entry->offset,
-		  __entry->count,
-		  __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
+		  __entry->count)
 )
 
 #define DEFINE_RW_EVENT(name)		\
 DEFINE_EVENT(xfs_file_class, name,	\
-	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),	\
-	TP_ARGS(ip, count, offset, flags))
-DEFINE_RW_EVENT(xfs_file_read);
+	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset),	\
+	TP_ARGS(ip, count, offset))
+DEFINE_RW_EVENT(xfs_file_buffered_read);
+DEFINE_RW_EVENT(xfs_file_direct_read);
+DEFINE_RW_EVENT(xfs_file_dax_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
+DEFINE_RW_EVENT(xfs_file_dax_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
 
 DECLARE_EVENT_CLASS(xfs_page_class,
@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
+DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
+DEFINE_IOMAP_EVENT(xfs_iomap_found);
+DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9a462e892e4f..9b2b9fa89331 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -52,6 +52,7 @@ typedef struct xfs_log_item {
 	/* delayed logging */
 	struct list_head		li_cil;		/* CIL pointers */
 	struct xfs_log_vec		*li_lv;		/* active log vector */
+	struct xfs_log_vec		*li_lv_shadow;	/* standby vector */
 	xfs_lsn_t			li_seq;		/* CIL commit seq */
 } xfs_log_item_t;
 
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 797ae2ec8eee..29c691265b49 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -78,6 +78,7 @@
 
 /* ACPI PCI Interrupt Link (pci_link.c) */
 
+int acpi_irq_penalty_init(void);
 int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering,
 			       int *polarity, char **name);
 int acpi_pci_link_free_irq(acpi_handle handle);
diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index 94a37cd7fbda..d4b72944ccda 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -15,6 +15,10 @@ extern int pxm_to_node(int);
 extern int node_to_pxm(int);
 extern int acpi_map_pxm_to_node(int);
 extern unsigned char acpi_srat_revision;
+extern int acpi_numa __initdata;
+
+extern void bad_srat(void);
+extern int srat_disabled(void);
 
 #endif				/* CONFIG_ACPI_NUMA */
 #endif				/* __ACP_NUMA_H */
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 4e4c21491c41..1ff3a76c265d 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -192,7 +192,7 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE);
 /*
  * Optionally support group module level code.
  */
-ACPI_INIT_GLOBAL(u8, acpi_gbl_group_module_level_code, FALSE);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_group_module_level_code, TRUE);
 
 /*
  * Optionally use 32-bit FADT addresses if and when there is a conflict
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index dad8af3ebeb5..284965cbc9af 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -15,10 +15,9 @@
 #define _CPPC_ACPI_H
 
 #include <linux/acpi.h>
-#include <linux/mailbox_controller.h>
-#include <linux/mailbox_client.h>
 #include <linux/types.h>
 
+#include <acpi/pcc.h>
 #include <acpi/processor.h>
 
 /* Only support CPPCv2 for now. */
@@ -130,8 +129,4 @@ extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
 extern int acpi_get_psd_map(struct cpudata **);
 
-/* Methods to interact with the PCC mailbox controller. */
-extern struct mbox_chan *
-	pcc_mbox_request_channel(struct mbox_client *, unsigned int);
-
 #endif /* _CPPC_ACPI_H*/
diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h
new file mode 100644
index 000000000000..17a940a14477
--- /dev/null
+++ b/include/acpi/pcc.h
@@ -0,0 +1,29 @@
+/*
+ * PCC (Platform Communications Channel) methods
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifndef _PCC_H
+#define _PCC_H
+
+#include <linux/mailbox_controller.h>
+#include <linux/mailbox_client.h>
+
+#ifdef CONFIG_PCC
+extern struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
+						  int subspace_id);
+extern void pcc_mbox_free_channel(struct mbox_chan *chan);
+#else
+static inline struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
+							 int subspace_id)
+{
+	return NULL;
+}
+static inline void pcc_mbox_free_channel(struct mbox_chan *chan) { }
+#endif
+
+#endif /* _PCC_H */
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 45c2d6528829..93b61b1f2beb 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -73,6 +73,10 @@
 #define ACPI_DEBUGGER
 #endif
 
+#ifdef CONFIG_ACPI_DEBUG
+#define ACPI_MUTEX_DEBUG
+#endif
+
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 6f1805dd5d3c..bfe6b2e10f3a 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -39,6 +39,7 @@
 #define ACPI_CSTATE_SYSTEMIO	0
 #define ACPI_CSTATE_FFH		1
 #define ACPI_CSTATE_HALT	2
+#define ACPI_CSTATE_INTEGER	3
 
 #define ACPI_CX_DESC_LEN	32
 
@@ -67,9 +68,25 @@ struct acpi_processor_cx {
 	char desc[ACPI_CX_DESC_LEN];
 };
 
+struct acpi_lpi_state {
+	u32 min_residency;
+	u32 wake_latency; /* worst case */
+	u32 flags;
+	u32 arch_flags;
+	u32 res_cnt_freq;
+	u32 enable_parent_state;
+	u64 address;
+	u8 index;
+	u8 entry_method;
+	char desc[ACPI_CX_DESC_LEN];
+};
+
 struct acpi_processor_power {
 	int count;
-	struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
+	union {
+		struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
+		struct acpi_lpi_state lpi_states[ACPI_PROCESSOR_MAX_POWER];
+	};
 	int timer_broadcast_on_state;
 };
 
@@ -189,6 +206,7 @@ struct acpi_processor_flags {
 	u8 bm_control:1;
 	u8 bm_check:1;
 	u8 has_cst:1;
+	u8 has_lpi:1;
 	u8 power_setup_done:1;
 	u8 bm_rld_set:1;
 	u8 need_hotplug_init:1;
@@ -242,7 +260,7 @@ extern int acpi_processor_get_performance_info(struct acpi_processor *pr);
 DECLARE_PER_CPU(struct acpi_processor *, processors);
 extern struct acpi_processor_errata errata;
 
-#ifdef ARCH_HAS_POWER_INIT
+#if defined(ARCH_HAS_POWER_INIT) && defined(CONFIG_ACPI_PROCESSOR_CSTATE)
 void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
 					unsigned int cpu);
 int acpi_processor_ffh_cstate_probe(unsigned int cpu,
@@ -309,6 +327,7 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 
 /* in processor_core.c */
 phys_cpuid_t acpi_get_phys_id(acpi_handle, int type, u32 acpi_id);
+phys_cpuid_t acpi_map_madt_entry(u32 acpi_id);
 int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id);
 int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id);
 
@@ -371,7 +390,7 @@ extern struct cpuidle_driver acpi_idle_driver;
 #ifdef CONFIG_ACPI_PROCESSOR_IDLE
 int acpi_processor_power_init(struct acpi_processor *pr);
 int acpi_processor_power_exit(struct acpi_processor *pr);
-int acpi_processor_cst_has_changed(struct acpi_processor *pr);
+int acpi_processor_power_state_has_changed(struct acpi_processor *pr);
 int acpi_processor_hotplug(struct acpi_processor *pr);
 #else
 static inline int acpi_processor_power_init(struct acpi_processor *pr)
@@ -384,7 +403,7 @@ static inline int acpi_processor_power_exit(struct acpi_processor *pr)
 	return -ENODEV;
 }
 
-static inline int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+static inline int acpi_processor_power_state_has_changed(struct acpi_processor *pr)
 {
 	return -ENODEV;
 }
diff --git a/include/acpi/video.h b/include/acpi/video.h
index 5731ccb42585..4536bd345ab4 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -54,7 +54,7 @@ extern int acpi_video_get_levels(struct acpi_device *device,
 				 struct acpi_video_device_brightness **dev_br,
 				 int *pmax_level);
 #else
-static inline int acpi_video_register(void) { return 0; }
+static inline int acpi_video_register(void) { return -ENODEV; }
 static inline void acpi_video_unregister(void) { return; }
 static inline int acpi_video_get_edid(struct acpi_device *device, int type,
 				      int device_id, void **edid)
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 5e1f345b58dd..288cc9e96395 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -112,6 +112,62 @@ static __always_inline void atomic_long_dec(atomic_long_t *l)
 	ATOMIC_LONG_PFX(_dec)(v);
 }
 
+#define ATOMIC_LONG_FETCH_OP(op, mo)					\
+static inline long							\
+atomic_long_fetch_##op##mo(long i, atomic_long_t *l)			\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(i, v);		\
+}
+
+ATOMIC_LONG_FETCH_OP(add, )
+ATOMIC_LONG_FETCH_OP(add, _relaxed)
+ATOMIC_LONG_FETCH_OP(add, _acquire)
+ATOMIC_LONG_FETCH_OP(add, _release)
+ATOMIC_LONG_FETCH_OP(sub, )
+ATOMIC_LONG_FETCH_OP(sub, _relaxed)
+ATOMIC_LONG_FETCH_OP(sub, _acquire)
+ATOMIC_LONG_FETCH_OP(sub, _release)
+ATOMIC_LONG_FETCH_OP(and, )
+ATOMIC_LONG_FETCH_OP(and, _relaxed)
+ATOMIC_LONG_FETCH_OP(and, _acquire)
+ATOMIC_LONG_FETCH_OP(and, _release)
+ATOMIC_LONG_FETCH_OP(andnot, )
+ATOMIC_LONG_FETCH_OP(andnot, _relaxed)
+ATOMIC_LONG_FETCH_OP(andnot, _acquire)
+ATOMIC_LONG_FETCH_OP(andnot, _release)
+ATOMIC_LONG_FETCH_OP(or, )
+ATOMIC_LONG_FETCH_OP(or, _relaxed)
+ATOMIC_LONG_FETCH_OP(or, _acquire)
+ATOMIC_LONG_FETCH_OP(or, _release)
+ATOMIC_LONG_FETCH_OP(xor, )
+ATOMIC_LONG_FETCH_OP(xor, _relaxed)
+ATOMIC_LONG_FETCH_OP(xor, _acquire)
+ATOMIC_LONG_FETCH_OP(xor, _release)
+
+#undef ATOMIC_LONG_FETCH_OP
+
+#define ATOMIC_LONG_FETCH_INC_DEC_OP(op, mo)					\
+static inline long							\
+atomic_long_fetch_##op##mo(atomic_long_t *l)				\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(v);		\
+}
+
+ATOMIC_LONG_FETCH_INC_DEC_OP(inc,)
+ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _relaxed)
+ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _acquire)
+ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _release)
+ATOMIC_LONG_FETCH_INC_DEC_OP(dec,)
+ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _relaxed)
+ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _acquire)
+ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _release)
+
+#undef ATOMIC_LONG_FETCH_INC_DEC_OP
+
 #define ATOMIC_LONG_OP(op)						\
 static __always_inline void						\
 atomic_long_##op(long i, atomic_long_t *l)				\
@@ -124,9 +180,9 @@ atomic_long_##op(long i, atomic_long_t *l)				\
 ATOMIC_LONG_OP(add)
 ATOMIC_LONG_OP(sub)
 ATOMIC_LONG_OP(and)
+ATOMIC_LONG_OP(andnot)
 ATOMIC_LONG_OP(or)
 ATOMIC_LONG_OP(xor)
-ATOMIC_LONG_OP(andnot)
 
 #undef ATOMIC_LONG_OP
 
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 74f1a3704d7a..9ed8b987185b 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -61,6 +61,18 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return c c_op i;						\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int c, old;							\
+									\
+	c = v->counter;							\
+	while ((old = cmpxchg(&v->counter, c, c c_op i)) != c)		\
+		c = old;						\
+									\
+	return c;							\
+}
+
 #else
 
 #include <linux/irqflags.h>
@@ -88,6 +100,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return ret;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+	int ret;							\
+									\
+	raw_local_irq_save(flags);					\
+	ret = v->counter;						\
+	v->counter = v->counter c_op i;					\
+	raw_local_irq_restore(flags);					\
+									\
+	return ret;							\
+}
+
 #endif /* CONFIG_SMP */
 
 #ifndef atomic_add_return
@@ -98,6 +124,26 @@ ATOMIC_OP_RETURN(add, +)
 ATOMIC_OP_RETURN(sub, -)
 #endif
 
+#ifndef atomic_fetch_add
+ATOMIC_FETCH_OP(add, +)
+#endif
+
+#ifndef atomic_fetch_sub
+ATOMIC_FETCH_OP(sub, -)
+#endif
+
+#ifndef atomic_fetch_and
+ATOMIC_FETCH_OP(and, &)
+#endif
+
+#ifndef atomic_fetch_or
+ATOMIC_FETCH_OP(or, |)
+#endif
+
+#ifndef atomic_fetch_xor
+ATOMIC_FETCH_OP(xor, ^)
+#endif
+
 #ifndef atomic_and
 ATOMIC_OP(and, &)
 #endif
@@ -110,6 +156,7 @@ ATOMIC_OP(or, |)
 ATOMIC_OP(xor, ^)
 #endif
 
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index d48e78ccad3d..dad68bf46c77 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -27,16 +27,23 @@ extern void	 atomic64_##op(long long a, atomic64_t *v);
 #define ATOMIC64_OP_RETURN(op)						\
 extern long long atomic64_##op##_return(long long a, atomic64_t *v);
 
-#define ATOMIC64_OPS(op)	ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op)
+#define ATOMIC64_FETCH_OP(op)						\
+extern long long atomic64_fetch_##op(long long a, atomic64_t *v);
+
+#define ATOMIC64_OPS(op)	ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op)	ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 1cceca146905..fe297b599b0a 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -194,7 +194,7 @@ do {									\
 })
 #endif
 
-#endif
+#endif	/* CONFIG_SMP */
 
 /* Barriers for virtual machine guests when talking to an SMP host */
 #define virt_mb() __smp_mb()
@@ -207,5 +207,44 @@ do {									\
 #define virt_store_release(p, v) __smp_store_release(p, v)
 #define virt_load_acquire(p) __smp_load_acquire(p)
 
+/**
+ * smp_acquire__after_ctrl_dep() - Provide ACQUIRE ordering after a control dependency
+ *
+ * A control dependency provides a LOAD->STORE order, the additional RMB
+ * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
+ * aka. (load)-ACQUIRE.
+ *
+ * Architectures that do not do load speculation can have this be barrier().
+ */
+#ifndef smp_acquire__after_ctrl_dep
+#define smp_acquire__after_ctrl_dep()		smp_rmb()
+#endif
+
+/**
+ * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * @ptr: pointer to the variable to wait on
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ *
+ * Due to C lacking lambda expressions we load the value of *ptr into a
+ * pre-named variable @VAL to be used in @cond.
+ */
+#ifndef smp_cond_load_acquire
+#define smp_cond_load_acquire(ptr, cond_expr) ({		\
+	typeof(ptr) __PTR = (ptr);				\
+	typeof(*ptr) VAL;					\
+	for (;;) {						\
+		VAL = READ_ONCE(*__PTR);			\
+		if (cond_expr)					\
+			break;					\
+		cpu_relax();					\
+	}							\
+	smp_acquire__after_ctrl_dep();				\
+	VAL;							\
+})
+#endif
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
index 0f1c6f315cdc..a84e28e0c634 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t;
 	(__force u64)(__ct)
 #define nsecs_to_cputime(__nsecs)	\
 	(__force cputime_t)(__nsecs)
+#define nsecs_to_cputime64(__nsecs)	\
+	(__force cputime64_t)(__nsecs)
 
 
 /*
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 002b81f6f2bc..7ef015eb3403 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -585,6 +585,16 @@ static inline u32 ioread32(const volatile void __iomem *addr)
 }
 #endif
 
+#ifdef CONFIG_64BIT
+#ifndef ioread64
+#define ioread64 ioread64
+static inline u64 ioread64(const volatile void __iomem *addr)
+{
+	return readq(addr);
+}
+#endif
+#endif /* CONFIG_64BIT */
+
 #ifndef iowrite8
 #define iowrite8 iowrite8
 static inline void iowrite8(u8 value, volatile void __iomem *addr)
@@ -609,11 +619,21 @@ static inline void iowrite32(u32 value, volatile void __iomem *addr)
 }
 #endif
 
+#ifdef CONFIG_64BIT
+#ifndef iowrite64
+#define iowrite64 iowrite64
+static inline void iowrite64(u64 value, volatile void __iomem *addr)
+{
+	writeq(value, addr);
+}
+#endif
+#endif /* CONFIG_64BIT */
+
 #ifndef ioread16be
 #define ioread16be ioread16be
 static inline u16 ioread16be(const volatile void __iomem *addr)
 {
-	return __be16_to_cpu(__raw_readw(addr));
+	return swab16(readw(addr));
 }
 #endif
 
@@ -621,15 +641,25 @@ static inline u16 ioread16be(const volatile void __iomem *addr)
 #define ioread32be ioread32be
 static inline u32 ioread32be(const volatile void __iomem *addr)
 {
-	return __be32_to_cpu(__raw_readl(addr));
+	return swab32(readl(addr));
+}
+#endif
+
+#ifdef CONFIG_64BIT
+#ifndef ioread64be
+#define ioread64be ioread64be
+static inline u64 ioread64be(const volatile void __iomem *addr)
+{
+	return swab64(readq(addr));
 }
 #endif
+#endif /* CONFIG_64BIT */
 
 #ifndef iowrite16be
 #define iowrite16be iowrite16be
 static inline void iowrite16be(u16 value, void volatile __iomem *addr)
 {
-	__raw_writew(__cpu_to_be16(value), addr);
+	writew(swab16(value), addr);
 }
 #endif
 
@@ -637,10 +667,20 @@ static inline void iowrite16be(u16 value, void volatile __iomem *addr)
 #define iowrite32be iowrite32be
 static inline void iowrite32be(u32 value, volatile void __iomem *addr)
 {
-	__raw_writel(__cpu_to_be32(value), addr);
+	writel(swab32(value), addr);
 }
 #endif
 
+#ifdef CONFIG_64BIT
+#ifndef iowrite64be
+#define iowrite64be iowrite64be
+static inline void iowrite64be(u64 value, volatile void __iomem *addr)
+{
+	writeq(swab64(value), addr);
+}
+#endif
+#endif /* CONFIG_64BIT */
+
 #ifndef ioread8_rep
 #define ioread8_rep ioread8_rep
 static inline void ioread8_rep(const volatile void __iomem *addr, void *buffer,
@@ -668,6 +708,17 @@ static inline void ioread32_rep(const volatile void __iomem *addr,
 }
 #endif
 
+#ifdef CONFIG_64BIT
+#ifndef ioread64_rep
+#define ioread64_rep ioread64_rep
+static inline void ioread64_rep(const volatile void __iomem *addr,
+				void *buffer, unsigned int count)
+{
+	readsq(addr, buffer, count);
+}
+#endif
+#endif /* CONFIG_64BIT */
+
 #ifndef iowrite8_rep
 #define iowrite8_rep iowrite8_rep
 static inline void iowrite8_rep(volatile void __iomem *addr,
@@ -697,6 +748,18 @@ static inline void iowrite32_rep(volatile void __iomem *addr,
 	writesl(addr, buffer, count);
 }
 #endif
+
+#ifdef CONFIG_64BIT
+#ifndef iowrite64_rep
+#define iowrite64_rep iowrite64_rep
+static inline void iowrite64_rep(volatile void __iomem *addr,
+				 const void *buffer,
+				 unsigned int count)
+{
+	writesq(addr, buffer, count);
+}
+#endif
+#endif /* CONFIG_64BIT */
 #endif /* CONFIG_GENERIC_IOMAP */
 
 #ifdef __KERNEL__
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index d8f8622fa044..650fede33c25 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -30,12 +30,20 @@ extern unsigned int ioread16(void __iomem *);
 extern unsigned int ioread16be(void __iomem *);
 extern unsigned int ioread32(void __iomem *);
 extern unsigned int ioread32be(void __iomem *);
+#ifdef CONFIG_64BIT
+extern u64 ioread64(void __iomem *);
+extern u64 ioread64be(void __iomem *);
+#endif
 
 extern void iowrite8(u8, void __iomem *);
 extern void iowrite16(u16, void __iomem *);
 extern void iowrite16be(u16, void __iomem *);
 extern void iowrite32(u32, void __iomem *);
 extern void iowrite32be(u32, void __iomem *);
+#ifdef CONFIG_64BIT
+extern void iowrite64(u64, void __iomem *);
+extern void iowrite64be(u64, void __iomem *);
+#endif
 
 /*
  * "string" versions of the above. Note that they
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index fd694cfd678a..c54829d3de37 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
+	if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
 		return 1;
 	return 0;
 }
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index a6b4a7bd6ac9..3269ec4e195f 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	int prev = atomic_xchg_acquire(count, 0);
+	int prev;
 
+	if (atomic_read(count) != 1)
+		return 0;
+
+	prev = atomic_xchg_acquire(count, 0);
 	if (unlikely(prev < 0)) {
 		/*
 		 * The lock was marked contended so we must restore that
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 05f05f17a7c2..9f0681bf1e87 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -111,10 +111,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 {
 	/*
-	 * smp_mb__before_atomic() in order to guarantee release semantics
+	 * unlock() needs release semantics:
 	 */
-	smp_mb__before_atomic();
-	atomic_sub(_Q_LOCKED_VAL, &lock->val);
+	(void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val);
 }
 #endif
 
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index 3fc94a046bf5..5be122e3d326 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -41,8 +41,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
 	long tmp;
 
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg_acquire(&sem->count, tmp,
+	while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+		if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
 				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
 			return 1;
 		}
@@ -79,7 +79,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
 	long tmp;
 
-	tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
+	tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
 		      RWSEM_ACTIVE_WRITE_BIAS);
 	return tmp == RWSEM_UNLOCKED_VALUE;
 }
@@ -106,14 +106,6 @@ static inline void __up_write(struct rw_semaphore *sem)
 		rwsem_wake(sem);
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-	atomic_long_add(delta, (atomic_long_t *)&sem->count);
-}
-
 /*
  * downgrade write lock to read lock
  */
@@ -134,13 +126,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 		rwsem_downgrade_wake(sem);
 }
 
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-	return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
-}
-
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_GENERIC_RWSEM_H */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 9dbb739cafa0..c6d667187608 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -107,6 +107,12 @@ struct mmu_gather {
 	struct mmu_gather_batch	local;
 	struct page		*__pages[MMU_GATHER_BUNDLE];
 	unsigned int		batch_count;
+	/*
+	 * __tlb_adjust_range  will track the new addr here,
+	 * that that we can adjust the range after the flush
+	 */
+	unsigned long addr;
+	int page_size;
 };
 
 #define HAVE_GENERIC_MMU_GATHER
@@ -115,23 +121,20 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
 							unsigned long end);
-int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
-
-/* tlb_remove_page
- *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
- *	required.
- */
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	if (!__tlb_remove_page(tlb, page))
-		tlb_flush_mmu(tlb);
-}
+extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
+				   int page_size);
 
 static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 				      unsigned long address)
 {
 	tlb->start = min(tlb->start, address);
 	tlb->end = max(tlb->end, address + PAGE_SIZE);
+	/*
+	 * Track the last address with which we adjusted the range. This
+	 * will be used later to adjust again after a mmu_flush due to
+	 * failed __tlb_remove_page
+	 */
+	tlb->addr = address;
 }
 
 static inline void __tlb_reset_range(struct mmu_gather *tlb)
@@ -144,6 +147,40 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
 	}
 }
 
+static inline void tlb_remove_page_size(struct mmu_gather *tlb,
+					struct page *page, int page_size)
+{
+	if (__tlb_remove_page_size(tlb, page, page_size)) {
+		tlb_flush_mmu(tlb);
+		tlb->page_size = page_size;
+		__tlb_adjust_range(tlb, tlb->addr);
+		__tlb_remove_page_size(tlb, page, page_size);
+	}
+}
+
+static bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
+}
+
+/* tlb_remove_page
+ *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
+ *	required.
+ */
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	return tlb_remove_page_size(tlb, page, PAGE_SIZE);
+}
+
+static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *page)
+{
+	/* active->nr should be zero when we call this */
+	VM_BUG_ON_PAGE(tlb->active->nr, page);
+	tlb->page_size = PAGE_SIZE;
+	__tlb_adjust_range(tlb, tlb->addr);
+	return __tlb_remove_page(tlb, page);
+}
+
 /*
  * In the case of tlb vma handling, we can optimise these away in the
  * case where we're doing a full MM flush.  When we're doing a munmap,
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 6a67ab94b553..54643d1f5af4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -249,6 +249,14 @@
 	*(.data..init_task)						\
 	VMLINUX_SYMBOL(__end_init_task) = .;
 
+/*
+ * Allow architectures to handle ro_after_init data on their
+ * own by defining an empty RO_AFTER_INIT_DATA.
+ */
+#ifndef RO_AFTER_INIT_DATA
+#define RO_AFTER_INIT_DATA *(.data..ro_after_init)
+#endif
+
 /*
  * Read only Data
  */
@@ -257,7 +265,7 @@
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
-		*(.data..ro_after_init)	/* Read only after init */	\
+		RO_AFTER_INIT_DATA	/* Read only after init */	\
 		*(__vermagic)		/* Kernel version magic */	\
 		. = ALIGN(8);						\
 		VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .;		\
@@ -542,15 +550,19 @@
 
 #define INIT_TEXT							\
 	*(.init.text)							\
+	*(.text.startup)						\
 	MEM_DISCARD(init.text)
 
 #define EXIT_DATA							\
 	*(.exit.data)							\
+	*(.fini_array)							\
+	*(.dtors)							\
 	MEM_DISCARD(exit.data)						\
 	MEM_DISCARD(exit.rodata)
 
 #define EXIT_TEXT							\
 	*(.exit.text)							\
+	*(.text.exit)							\
 	MEM_DISCARD(exit.text)
 
 #define EXIT_CALL							\
diff --git a/include/clocksource/timer-sp804.h b/include/clocksource/timer-sp804.h
index 1f8a1caa7cb4..7654d71243dd 100644
--- a/include/clocksource/timer-sp804.h
+++ b/include/clocksource/timer-sp804.h
@@ -3,10 +3,10 @@
 
 struct clk;
 
-void __sp804_clocksource_and_sched_clock_init(void __iomem *,
-					      const char *, struct clk *, int);
-void __sp804_clockevents_init(void __iomem *, unsigned int,
-			      struct clk *, const char *);
+int __sp804_clocksource_and_sched_clock_init(void __iomem *,
+					     const char *, struct clk *, int);
+int __sp804_clockevents_init(void __iomem *, unsigned int,
+			     struct clk *, const char *);
 void sp804_timer_disable(void __iomem *);
 
 static inline void sp804_clocksource_init(void __iomem *base, const char *name)
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 75174f80a106..12f84327ca36 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -112,11 +112,12 @@ struct aead_request {
  *		 supplied during the decryption operation. This function is also
  *		 responsible for checking the authentication tag size for
  *		 validity.
- * @setkey: see struct ablkcipher_alg
- * @encrypt: see struct ablkcipher_alg
- * @decrypt: see struct ablkcipher_alg
- * @geniv: see struct ablkcipher_alg
- * @ivsize: see struct ablkcipher_alg
+ * @setkey: see struct skcipher_alg
+ * @encrypt: see struct skcipher_alg
+ * @decrypt: see struct skcipher_alg
+ * @geniv: see struct skcipher_alg
+ * @ivsize: see struct skcipher_alg
+ * @chunksize: see struct skcipher_alg
  * @init: Initialize the cryptographic transformation object. This function
  *	  is used to initialize the cryptographic transformation object.
  *	  This function is called only once at the instantiation time, right
@@ -145,6 +146,7 @@ struct aead_alg {
 
 	unsigned int ivsize;
 	unsigned int maxauthsize;
+	unsigned int chunksize;
 
 	struct crypto_alg base;
 };
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index eeafd21afb44..8637cdfe382a 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -244,6 +244,8 @@ static inline struct crypto_alg *crypto_attr_alg(struct rtattr *rta,
 }
 
 int crypto_attr_u32(struct rtattr *rta, u32 *num);
+int crypto_inst_setname(struct crypto_instance *inst, const char *name,
+			struct crypto_alg *alg);
 void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg,
 			     unsigned int head);
 struct crypto_instance *crypto_alloc_instance(const char *name,
@@ -440,8 +442,10 @@ static inline int crypto_memneq(const void *a, const void *b, size_t size)
 
 static inline void crypto_yield(u32 flags)
 {
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
 	if (flags & CRYPTO_TFM_REQ_MAY_SLEEP)
 		cond_resched();
+#endif
 }
 
 #endif	/* _CRYPTO_ALGAPI_H */
diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h
index 1547f540c920..bc792d5a9e88 100644
--- a/include/crypto/cryptd.h
+++ b/include/crypto/cryptd.h
@@ -31,6 +31,7 @@ static inline struct cryptd_ablkcipher *__cryptd_ablkcipher_cast(
 struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
 						  u32 type, u32 mask);
 struct crypto_blkcipher *cryptd_ablkcipher_child(struct cryptd_ablkcipher *tfm);
+bool cryptd_ablkcipher_queued(struct cryptd_ablkcipher *tfm);
 void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm);
 
 struct cryptd_ahash {
@@ -48,6 +49,8 @@ struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
 					u32 type, u32 mask);
 struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm);
 struct shash_desc *cryptd_shash_desc(struct ahash_request *req);
+/* Must be called without moving CPUs. */
+bool cryptd_ahash_queued(struct cryptd_ahash *tfm);
 void cryptd_free_ahash(struct cryptd_ahash *tfm);
 
 struct cryptd_aead {
@@ -64,6 +67,8 @@ struct cryptd_aead *cryptd_alloc_aead(const char *alg_name,
 					  u32 type, u32 mask);
 
 struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm);
+/* Must be called without moving CPUs. */
+bool cryptd_aead_queued(struct cryptd_aead *tfm);
 
 void cryptd_free_aead(struct cryptd_aead *tfm);
 
diff --git a/include/crypto/dh.h b/include/crypto/dh.h
new file mode 100644
index 000000000000..5102a8f282e6
--- /dev/null
+++ b/include/crypto/dh.h
@@ -0,0 +1,29 @@
+/*
+ * Diffie-Hellman secret to be used with kpp API along with helper functions
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_DH_
+#define _CRYPTO_DH_
+
+struct dh {
+	void *key;
+	void *p;
+	void *g;
+	unsigned int key_size;
+	unsigned int p_size;
+	unsigned int g_size;
+};
+
+int crypto_dh_key_len(const struct dh *params);
+int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params);
+int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params);
+
+#endif
diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index d961b2b16f55..61580b19f9f6 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -43,6 +43,7 @@
 #include <linux/random.h>
 #include <linux/scatterlist.h>
 #include <crypto/hash.h>
+#include <crypto/skcipher.h>
 #include <linux/module.h>
 #include <linux/crypto.h>
 #include <linux/slab.h>
@@ -107,14 +108,25 @@ struct drbg_test_data {
 struct drbg_state {
 	struct mutex drbg_mutex;	/* lock around DRBG */
 	unsigned char *V;	/* internal state 10.1.1.1 1a) */
+	unsigned char *Vbuf;
 	/* hash: static value 10.1.1.1 1b) hmac / ctr: key */
 	unsigned char *C;
+	unsigned char *Cbuf;
 	/* Number of RNG requests since last reseed -- 10.1.1.1 1c) */
 	size_t reseed_ctr;
 	size_t reseed_threshold;
 	 /* some memory the DRBG can use for its operation */
 	unsigned char *scratchpad;
+	unsigned char *scratchpadbuf;
 	void *priv_data;	/* Cipher handle */
+
+	struct crypto_skcipher *ctr_handle;	/* CTR mode cipher handle */
+	struct skcipher_request *ctr_req;	/* CTR mode request handle */
+	__u8 *ctr_null_value_buf;		/* CTR mode unaligned buffer */
+	__u8 *ctr_null_value;			/* CTR mode aligned zero buf */
+	struct completion ctr_completion;	/* CTR mode async handler */
+	int ctr_async_err;			/* CTR mode async error */
+
 	bool seeded;		/* DRBG fully seeded? */
 	bool pr;		/* Prediction resistance enabled? */
 	struct work_struct seed_work;	/* asynchronous seeding support */
diff --git a/include/crypto/ecdh.h b/include/crypto/ecdh.h
new file mode 100644
index 000000000000..84bad548d194
--- /dev/null
+++ b/include/crypto/ecdh.h
@@ -0,0 +1,30 @@
+/*
+ * ECDH params to be used with kpp API
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ECDH_
+#define _CRYPTO_ECDH_
+
+/* Curves IDs */
+#define ECC_CURVE_NIST_P192	0x0001
+#define ECC_CURVE_NIST_P256	0x0002
+
+struct ecdh {
+	unsigned short curve_id;
+	char *key;
+	unsigned short key_size;
+};
+
+int crypto_ecdh_key_len(const struct ecdh *params);
+int crypto_ecdh_encode_key(char *buf, unsigned int len, const struct ecdh *p);
+int crypto_ecdh_decode_key(const char *buf, unsigned int len, struct ecdh *p);
+
+#endif
diff --git a/include/crypto/internal/aead.h b/include/crypto/internal/aead.h
index da3864991d4c..6ad8e31d3868 100644
--- a/include/crypto/internal/aead.h
+++ b/include/crypto/internal/aead.h
@@ -159,6 +159,27 @@ static inline struct aead_request *aead_get_backlog(struct aead_queue *queue)
 	return req ? container_of(req, struct aead_request, base) : NULL;
 }
 
+static inline unsigned int crypto_aead_alg_chunksize(struct aead_alg *alg)
+{
+	return alg->chunksize;
+}
+
+/**
+ * crypto_aead_chunksize() - obtain chunk size
+ * @tfm: cipher handle
+ *
+ * The block size is set to one for ciphers such as CCM.  However,
+ * you still need to provide incremental updates in multiples of
+ * the underlying block size as the IV does not have sub-block
+ * granularity.  This is known in this API as the chunk size.
+ *
+ * Return: chunk size in bytes
+ */
+static inline unsigned int crypto_aead_chunksize(struct crypto_aead *tfm)
+{
+	return crypto_aead_alg_chunksize(crypto_aead_alg(tfm));
+}
+
 int crypto_register_aead(struct aead_alg *alg);
 void crypto_unregister_aead(struct aead_alg *alg);
 int crypto_register_aeads(struct aead_alg *algs, int count);
diff --git a/include/crypto/internal/geniv.h b/include/crypto/internal/geniv.h
index 59333635e712..2bcfb931bc5b 100644
--- a/include/crypto/internal/geniv.h
+++ b/include/crypto/internal/geniv.h
@@ -20,7 +20,7 @@
 struct aead_geniv_ctx {
 	spinlock_t lock;
 	struct crypto_aead *child;
-	struct crypto_blkcipher *null;
+	struct crypto_skcipher *sknull;
 	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
 };
 
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 49dae16f8929..1d4f365d8f03 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -114,14 +114,10 @@ int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc);
 int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc);
 int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc);
 
-int shash_ahash_mcryptd_update(struct ahash_request *req,
-			       struct shash_desc *desc);
-int shash_ahash_mcryptd_final(struct ahash_request *req,
-			      struct shash_desc *desc);
-int shash_ahash_mcryptd_finup(struct ahash_request *req,
-			      struct shash_desc *desc);
-int shash_ahash_mcryptd_digest(struct ahash_request *req,
-			       struct shash_desc *desc);
+int ahash_mcryptd_update(struct ahash_request *desc);
+int ahash_mcryptd_final(struct ahash_request *desc);
+int ahash_mcryptd_finup(struct ahash_request *desc);
+int ahash_mcryptd_digest(struct ahash_request *desc);
 
 int crypto_init_shash_ops_async(struct crypto_tfm *tfm);
 
diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
new file mode 100644
index 000000000000..ad3acf3649be
--- /dev/null
+++ b/include/crypto/internal/kpp.h
@@ -0,0 +1,64 @@
+/*
+ * Key-agreement Protocol Primitives (KPP)
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_KPP_INT_H
+#define _CRYPTO_KPP_INT_H
+#include <crypto/kpp.h>
+#include <crypto/algapi.h>
+
+/*
+ * Transform internal helpers.
+ */
+static inline void *kpp_request_ctx(struct kpp_request *req)
+{
+	return req->__ctx;
+}
+
+static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm)
+{
+	return tfm->base.__crt_ctx;
+}
+
+static inline void kpp_request_complete(struct kpp_request *req, int err)
+{
+	req->base.complete(&req->base, err);
+}
+
+static inline const char *kpp_alg_name(struct crypto_kpp *tfm)
+{
+	return crypto_kpp_tfm(tfm)->__crt_alg->cra_name;
+}
+
+/**
+ * crypto_register_kpp() -- Register key-agreement protocol primitives algorithm
+ *
+ * Function registers an implementation of a key-agreement protocol primitive
+ * algorithm
+ *
+ * @alg:	algorithm definition
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_register_kpp(struct kpp_alg *alg);
+
+/**
+ * crypto_unregister_kpp() -- Unregister key-agreement protocol primitive
+ * algorithm
+ *
+ * Function unregisters an implementation of a key-agreement protocol primitive
+ * algorithm
+ *
+ * @alg:	algorithm definition
+ */
+void crypto_unregister_kpp(struct kpp_alg *alg);
+
+#endif
diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h
index c7585bdecbc2..9e8f1590de98 100644
--- a/include/crypto/internal/rsa.h
+++ b/include/crypto/internal/rsa.h
@@ -12,12 +12,44 @@
  */
 #ifndef _RSA_HELPER_
 #define _RSA_HELPER_
-#include <linux/mpi.h>
+#include <linux/types.h>
 
+/**
+ * rsa_key - RSA key structure
+ * @n           : RSA modulus raw byte stream
+ * @e           : RSA public exponent raw byte stream
+ * @d           : RSA private exponent raw byte stream
+ * @p           : RSA prime factor p of n raw byte stream
+ * @q           : RSA prime factor q of n raw byte stream
+ * @dp          : RSA exponent d mod (p - 1) raw byte stream
+ * @dq          : RSA exponent d mod (q - 1) raw byte stream
+ * @qinv        : RSA CRT coefficient q^(-1) mod p raw byte stream
+ * @n_sz        : length in bytes of RSA modulus n
+ * @e_sz        : length in bytes of RSA public exponent
+ * @d_sz        : length in bytes of RSA private exponent
+ * @p_sz        : length in bytes of p field
+ * @q_sz        : length in bytes of q field
+ * @dp_sz       : length in bytes of dp field
+ * @dq_sz       : length in bytes of dq field
+ * @qinv_sz     : length in bytes of qinv field
+ */
 struct rsa_key {
-	MPI n;
-	MPI e;
-	MPI d;
+	const u8 *n;
+	const u8 *e;
+	const u8 *d;
+	const u8 *p;
+	const u8 *q;
+	const u8 *dp;
+	const u8 *dq;
+	const u8 *qinv;
+	size_t n_sz;
+	size_t e_sz;
+	size_t d_sz;
+	size_t p_sz;
+	size_t q_sz;
+	size_t dp_sz;
+	size_t dq_sz;
+	size_t qinv_sz;
 };
 
 int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key,
@@ -26,7 +58,5 @@ int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key,
 int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key,
 		       unsigned int key_len);
 
-void rsa_free_key(struct rsa_key *rsa_key);
-
 extern struct crypto_template rsa_pkcs1pad_tmpl;
 #endif
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index 2cf7a61ece59..a21a95e1a375 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -19,12 +19,46 @@
 
 struct rtattr;
 
+struct skcipher_instance {
+	void (*free)(struct skcipher_instance *inst);
+	union {
+		struct {
+			char head[offsetof(struct skcipher_alg, base)];
+			struct crypto_instance base;
+		} s;
+		struct skcipher_alg alg;
+	};
+};
+
 struct crypto_skcipher_spawn {
 	struct crypto_spawn base;
 };
 
 extern const struct crypto_type crypto_givcipher_type;
 
+static inline struct crypto_instance *skcipher_crypto_instance(
+	struct skcipher_instance *inst)
+{
+	return &inst->s.base;
+}
+
+static inline struct skcipher_instance *skcipher_alg_instance(
+	struct crypto_skcipher *skcipher)
+{
+	return container_of(crypto_skcipher_alg(skcipher),
+			    struct skcipher_instance, alg);
+}
+
+static inline void *skcipher_instance_ctx(struct skcipher_instance *inst)
+{
+	return crypto_instance_ctx(skcipher_crypto_instance(inst));
+}
+
+static inline void skcipher_request_complete(struct skcipher_request *req, int err)
+{
+	req->base.complete(&req->base, err);
+}
+
 static inline void crypto_set_skcipher_spawn(
 	struct crypto_skcipher_spawn *spawn, struct crypto_instance *inst)
 {
@@ -34,6 +68,12 @@ static inline void crypto_set_skcipher_spawn(
 int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name,
 			 u32 type, u32 mask);
 
+static inline int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn,
+					const char *name, u32 type, u32 mask)
+{
+	return crypto_grab_skcipher(spawn, name, type, mask);
+}
+
 struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask);
 
 static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn)
@@ -41,54 +81,42 @@ static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn)
 	crypto_drop_spawn(&spawn->base);
 }
 
-static inline struct crypto_alg *crypto_skcipher_spawn_alg(
+static inline struct skcipher_alg *crypto_skcipher_spawn_alg(
 	struct crypto_skcipher_spawn *spawn)
 {
-	return spawn->base.alg;
+	return container_of(spawn->base.alg, struct skcipher_alg, base);
 }
 
-static inline struct crypto_ablkcipher *crypto_spawn_skcipher(
+static inline struct skcipher_alg *crypto_spawn_skcipher_alg(
 	struct crypto_skcipher_spawn *spawn)
 {
-	return __crypto_ablkcipher_cast(
-		crypto_spawn_tfm(&spawn->base, crypto_skcipher_type(0),
-				 crypto_skcipher_mask(0)));
+	return crypto_skcipher_spawn_alg(spawn);
 }
 
-int skcipher_null_givencrypt(struct skcipher_givcrypt_request *req);
-int skcipher_null_givdecrypt(struct skcipher_givcrypt_request *req);
-const char *crypto_default_geniv(const struct crypto_alg *alg);
-
-struct crypto_instance *skcipher_geniv_alloc(struct crypto_template *tmpl,
-					     struct rtattr **tb, u32 type,
-					     u32 mask);
-void skcipher_geniv_free(struct crypto_instance *inst);
-int skcipher_geniv_init(struct crypto_tfm *tfm);
-void skcipher_geniv_exit(struct crypto_tfm *tfm);
-
-static inline struct crypto_ablkcipher *skcipher_geniv_cipher(
-	struct crypto_ablkcipher *geniv)
+static inline struct crypto_skcipher *crypto_spawn_skcipher(
+	struct crypto_skcipher_spawn *spawn)
 {
-	return crypto_ablkcipher_crt(geniv)->base;
+	return crypto_spawn_tfm2(&spawn->base);
 }
 
-static inline int skcipher_enqueue_givcrypt(
-	struct crypto_queue *queue, struct skcipher_givcrypt_request *request)
+static inline struct crypto_skcipher *crypto_spawn_skcipher2(
+	struct crypto_skcipher_spawn *spawn)
 {
-	return ablkcipher_enqueue_request(queue, &request->creq);
+	return crypto_spawn_skcipher(spawn);
 }
 
-static inline struct skcipher_givcrypt_request *skcipher_dequeue_givcrypt(
-	struct crypto_queue *queue)
+static inline void crypto_skcipher_set_reqsize(
+	struct crypto_skcipher *skcipher, unsigned int reqsize)
 {
-	return skcipher_givcrypt_cast(crypto_dequeue_request(queue));
+	skcipher->reqsize = reqsize;
 }
 
-static inline void *skcipher_givcrypt_reqctx(
-	struct skcipher_givcrypt_request *req)
-{
-	return ablkcipher_request_ctx(&req->creq);
-}
+int crypto_register_skcipher(struct skcipher_alg *alg);
+void crypto_unregister_skcipher(struct skcipher_alg *alg);
+int crypto_register_skciphers(struct skcipher_alg *algs, int count);
+void crypto_unregister_skciphers(struct skcipher_alg *algs, int count);
+int skcipher_register_instance(struct crypto_template *tmpl,
+			       struct skcipher_instance *inst);
 
 static inline void ablkcipher_request_complete(struct ablkcipher_request *req,
 					       int err)
@@ -96,12 +124,6 @@ static inline void ablkcipher_request_complete(struct ablkcipher_request *req,
 	req->base.complete(&req->base, err);
 }
 
-static inline void skcipher_givcrypt_complete(
-	struct skcipher_givcrypt_request *req, int err)
-{
-	ablkcipher_request_complete(&req->creq, err);
-}
-
 static inline u32 ablkcipher_request_flags(struct ablkcipher_request *req)
 {
 	return req->base.flags;
@@ -122,5 +144,31 @@ static inline u32 skcipher_request_flags(struct skcipher_request *req)
 	return req->base.flags;
 }
 
+static inline unsigned int crypto_skcipher_alg_min_keysize(
+	struct skcipher_alg *alg)
+{
+	if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+	    CRYPTO_ALG_TYPE_BLKCIPHER)
+		return alg->base.cra_blkcipher.min_keysize;
+
+	if (alg->base.cra_ablkcipher.encrypt)
+		return alg->base.cra_ablkcipher.min_keysize;
+
+	return alg->min_keysize;
+}
+
+static inline unsigned int crypto_skcipher_alg_max_keysize(
+	struct skcipher_alg *alg)
+{
+	if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+	    CRYPTO_ALG_TYPE_BLKCIPHER)
+		return alg->base.cra_blkcipher.max_keysize;
+
+	if (alg->base.cra_ablkcipher.encrypt)
+		return alg->base.cra_ablkcipher.max_keysize;
+
+	return alg->max_keysize;
+}
+
 #endif	/* _CRYPTO_INTERNAL_SKCIPHER_H */
 
diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h
new file mode 100644
index 000000000000..30791f75c180
--- /dev/null
+++ b/include/crypto/kpp.h
@@ -0,0 +1,330 @@
+/*
+ * Key-agreement Protocol Primitives (KPP)
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_KPP_
+#define _CRYPTO_KPP_
+#include <linux/crypto.h>
+
+/**
+ * struct kpp_request
+ *
+ * @base:	Common attributes for async crypto requests
+ * @src:	Source data
+ * @dst:	Destination data
+ * @src_len:	Size of the input buffer
+ * @dst_len:	Size of the output buffer. It needs to be at least
+ *		as big as the expected result depending	on the operation
+ *		After operation it will be updated with the actual size of the
+ *		result. In case of error where the dst sgl size was insufficient,
+ *		it will be updated to the size required for the operation.
+ * @__ctx:	Start of private context data
+ */
+struct kpp_request {
+	struct crypto_async_request base;
+	struct scatterlist *src;
+	struct scatterlist *dst;
+	unsigned int src_len;
+	unsigned int dst_len;
+	void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+/**
+ * struct crypto_kpp - user-instantiated object which encapsulate
+ * algorithms and core processing logic
+ *
+ * @base:	Common crypto API algorithm data structure
+ */
+struct crypto_kpp {
+	struct crypto_tfm base;
+};
+
+/**
+ * struct kpp_alg - generic key-agreement protocol primitives
+ *
+ * @set_secret:		Function invokes the protocol specific function to
+ *			store the secret private key along with parameters.
+ *			The implementation knows how to decode thie buffer
+ * @generate_public_key: Function generate the public key to be sent to the
+ *			counterpart. In case of error, where output is not big
+ *			enough req->dst_len will be updated to the size
+ *			required
+ * @compute_shared_secret: Function compute the shared secret as defined by
+ *			the algorithm. The result is given back to the user.
+ *			In case of error, where output is not big enough,
+ *			req->dst_len will be updated to the size required
+ * @max_size:		Function returns the size of the output buffer
+ * @init:		Initialize the object. This is called only once at
+ *			instantiation time. In case the cryptographic hardware
+ *			needs to be initialized. Software fallback should be
+ *			put in place here.
+ * @exit:		Undo everything @init did.
+ *
+ * @reqsize:		Request context size required by algorithm
+ *			implementation
+ * @base		Common crypto API algorithm data structure
+ */
+struct kpp_alg {
+	int (*set_secret)(struct crypto_kpp *tfm, void *buffer,
+			  unsigned int len);
+	int (*generate_public_key)(struct kpp_request *req);
+	int (*compute_shared_secret)(struct kpp_request *req);
+
+	int (*max_size)(struct crypto_kpp *tfm);
+
+	int (*init)(struct crypto_kpp *tfm);
+	void (*exit)(struct crypto_kpp *tfm);
+
+	unsigned int reqsize;
+	struct crypto_alg base;
+};
+
+/**
+ * DOC: Generic Key-agreement Protocol Primitevs API
+ *
+ * The KPP API is used with the algorithm type
+ * CRYPTO_ALG_TYPE_KPP (listed as type "kpp" in /proc/crypto)
+ */
+
+/**
+ * crypto_alloc_kpp() - allocate KPP tfm handle
+ * @alg_name: is the name of the kpp algorithm (e.g. "dh", "ecdh")
+ * @type: specifies the type of the algorithm
+ * @mask: specifies the mask for the algorithm
+ *
+ * Allocate a handle for kpp algorithm. The returned struct crypto_kpp
+ * is requeried for any following API invocation
+ *
+ * Return: allocated handle in case of success; IS_ERR() is true in case of
+ *	   an error, PTR_ERR() returns the error code.
+ */
+struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask);
+
+static inline struct crypto_tfm *crypto_kpp_tfm(struct crypto_kpp *tfm)
+{
+	return &tfm->base;
+}
+
+static inline struct kpp_alg *__crypto_kpp_alg(struct crypto_alg *alg)
+{
+	return container_of(alg, struct kpp_alg, base);
+}
+
+static inline struct crypto_kpp *__crypto_kpp_tfm(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_kpp, base);
+}
+
+static inline struct kpp_alg *crypto_kpp_alg(struct crypto_kpp *tfm)
+{
+	return __crypto_kpp_alg(crypto_kpp_tfm(tfm)->__crt_alg);
+}
+
+static inline unsigned int crypto_kpp_reqsize(struct crypto_kpp *tfm)
+{
+	return crypto_kpp_alg(tfm)->reqsize;
+}
+
+static inline void kpp_request_set_tfm(struct kpp_request *req,
+				       struct crypto_kpp *tfm)
+{
+	req->base.tfm = crypto_kpp_tfm(tfm);
+}
+
+static inline struct crypto_kpp *crypto_kpp_reqtfm(struct kpp_request *req)
+{
+	return __crypto_kpp_tfm(req->base.tfm);
+}
+
+/**
+ * crypto_free_kpp() - free KPP tfm handle
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp()
+ */
+static inline void crypto_free_kpp(struct crypto_kpp *tfm)
+{
+	crypto_destroy_tfm(tfm, crypto_kpp_tfm(tfm));
+}
+
+/**
+ * kpp_request_alloc() - allocates kpp request
+ *
+ * @tfm:	KPP tfm handle allocated with crypto_alloc_kpp()
+ * @gfp:	allocation flags
+ *
+ * Return: allocated handle in case of success or NULL in case of an error.
+ */
+static inline struct kpp_request *kpp_request_alloc(struct crypto_kpp *tfm,
+						    gfp_t gfp)
+{
+	struct kpp_request *req;
+
+	req = kmalloc(sizeof(*req) + crypto_kpp_reqsize(tfm), gfp);
+	if (likely(req))
+		kpp_request_set_tfm(req, tfm);
+
+	return req;
+}
+
+/**
+ * kpp_request_free() - zeroize and free kpp request
+ *
+ * @req:	request to free
+ */
+static inline void kpp_request_free(struct kpp_request *req)
+{
+	kzfree(req);
+}
+
+/**
+ * kpp_request_set_callback() - Sets an asynchronous callback.
+ *
+ * Callback will be called when an asynchronous operation on a given
+ * request is finished.
+ *
+ * @req:	request that the callback will be set for
+ * @flgs:	specify for instance if the operation may backlog
+ * @cmpl:	callback which will be called
+ * @data:	private data used by the caller
+ */
+static inline void kpp_request_set_callback(struct kpp_request *req,
+					    u32 flgs,
+					    crypto_completion_t cmpl,
+					    void *data)
+{
+	req->base.complete = cmpl;
+	req->base.data = data;
+	req->base.flags = flgs;
+}
+
+/**
+ * kpp_request_set_input() - Sets input buffer
+ *
+ * Sets parameters required by generate_public_key
+ *
+ * @req:	kpp request
+ * @input:	ptr to input scatter list
+ * @input_len:	size of the input scatter list
+ */
+static inline void kpp_request_set_input(struct kpp_request *req,
+					 struct scatterlist *input,
+					 unsigned int input_len)
+{
+	req->src = input;
+	req->src_len = input_len;
+}
+
+/**
+ * kpp_request_set_output() - Sets output buffer
+ *
+ * Sets parameters required by kpp operation
+ *
+ * @req:	kpp request
+ * @output:	ptr to output scatter list
+ * @output_len:	size of the output scatter list
+ */
+static inline void kpp_request_set_output(struct kpp_request *req,
+					  struct scatterlist *output,
+					  unsigned int output_len)
+{
+	req->dst = output;
+	req->dst_len = output_len;
+}
+
+enum {
+	CRYPTO_KPP_SECRET_TYPE_UNKNOWN,
+	CRYPTO_KPP_SECRET_TYPE_DH,
+	CRYPTO_KPP_SECRET_TYPE_ECDH,
+};
+
+/**
+ * struct kpp_secret - small header for packing secret buffer
+ *
+ * @type:	define type of secret. Each kpp type will define its own
+ * @len:	specify the len of the secret, include the header, that
+ *		follows the struct
+ */
+struct kpp_secret {
+	unsigned short type;
+	unsigned short len;
+};
+
+/**
+ * crypto_kpp_set_secret() - Invoke kpp operation
+ *
+ * Function invokes the specific kpp operation for a given alg.
+ *
+ * @tfm:	tfm handle
+ *
+ * Return: zero on success; error code in case of error
+ */
+static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, void *buffer,
+					unsigned int len)
+{
+	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+
+	return alg->set_secret(tfm, buffer, len);
+}
+
+/**
+ * crypto_kpp_generate_public_key() - Invoke kpp operation
+ *
+ * Function invokes the specific kpp operation for generating the public part
+ * for a given kpp algorithm
+ *
+ * @req:	kpp key request
+ *
+ * Return: zero on success; error code in case of error
+ */
+static inline int crypto_kpp_generate_public_key(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+
+	return alg->generate_public_key(req);
+}
+
+/**
+ * crypto_kpp_compute_shared_secret() - Invoke kpp operation
+ *
+ * Function invokes the specific kpp operation for computing the shared secret
+ * for a given kpp algorithm.
+ *
+ * @req:	kpp key request
+ *
+ * Return: zero on success; error code in case of error
+ */
+static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+
+	return alg->compute_shared_secret(req);
+}
+
+/**
+ * crypto_kpp_maxsize() - Get len for output buffer
+ *
+ * Function returns the output buffer size required
+ *
+ * @tfm:	KPP tfm handle allocated with crypto_alloc_kpp()
+ *
+ * Return: minimum len for output buffer or error code if key hasn't been set
+ */
+static inline int crypto_kpp_maxsize(struct crypto_kpp *tfm)
+{
+	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+
+	return alg->max_size(tfm);
+}
+
+#endif
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
index c23ee1f7ee80..4a53c0d38cd2 100644
--- a/include/crypto/mcryptd.h
+++ b/include/crypto/mcryptd.h
@@ -39,7 +39,7 @@ struct mcryptd_instance_ctx {
 };
 
 struct mcryptd_hash_ctx {
-	struct crypto_shash *child;
+	struct crypto_ahash *child;
 	struct mcryptd_alg_state *alg_state;
 };
 
@@ -59,13 +59,13 @@ struct mcryptd_hash_request_ctx {
 	struct crypto_hash_walk walk;
 	u8 *out;
 	int flag;
-	struct shash_desc desc;
+	struct ahash_request areq;
 };
 
 struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
 					u32 type, u32 mask);
-struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm);
-struct shash_desc *mcryptd_shash_desc(struct ahash_request *req);
+struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm);
+struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req);
 void mcryptd_free_ahash(struct mcryptd_ahash *tfm);
 void mcryptd_flusher(struct work_struct *work);
 
diff --git a/include/crypto/null.h b/include/crypto/null.h
index 06dc30d9f56e..3f0c59fb0a61 100644
--- a/include/crypto/null.h
+++ b/include/crypto/null.h
@@ -8,7 +8,17 @@
 #define NULL_DIGEST_SIZE	0
 #define NULL_IV_SIZE		0
 
-struct crypto_blkcipher *crypto_get_default_null_skcipher(void);
+struct crypto_skcipher *crypto_get_default_null_skcipher(void);
 void crypto_put_default_null_skcipher(void);
 
+static inline struct crypto_skcipher *crypto_get_default_null_skcipher2(void)
+{
+	return crypto_get_default_null_skcipher();
+}
+
+static inline void crypto_put_default_null_skcipher2(void)
+{
+	crypto_put_default_null_skcipher();
+}
+
 #endif
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 35f99b68d037..880e6be9e95e 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -16,14 +16,10 @@
 #ifndef _CRYPTO_SCATTERWALK_H
 #define _CRYPTO_SCATTERWALK_H
 
-#include <asm/kmap_types.h>
 #include <crypto/algapi.h>
-#include <linux/hardirq.h>
 #include <linux/highmem.h>
 #include <linux/kernel.h>
-#include <linux/mm.h>
 #include <linux/scatterlist.h>
-#include <linux/sched.h>
 
 static inline void scatterwalk_crypto_chain(struct scatterlist *head,
 					    struct scatterlist *sg,
@@ -83,17 +79,53 @@ static inline void scatterwalk_unmap(void *vaddr)
 	kunmap_atomic(vaddr);
 }
 
-void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg);
+static inline void scatterwalk_start(struct scatter_walk *walk,
+				     struct scatterlist *sg)
+{
+	walk->sg = sg;
+	walk->offset = sg->offset;
+}
+
+static inline void *scatterwalk_map(struct scatter_walk *walk)
+{
+	return kmap_atomic(scatterwalk_page(walk)) +
+	       offset_in_page(walk->offset);
+}
+
+static inline void scatterwalk_pagedone(struct scatter_walk *walk, int out,
+					unsigned int more)
+{
+	if (out) {
+		struct page *page;
+
+		page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT);
+		/* Test ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE first as
+		 * PageSlab cannot be optimised away per se due to
+		 * use of volatile pointer.
+		 */
+		if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE && !PageSlab(page))
+			flush_dcache_page(page);
+	}
+
+	if (more && walk->offset >= walk->sg->offset + walk->sg->length)
+		scatterwalk_start(walk, sg_next(walk->sg));
+}
+
+static inline void scatterwalk_done(struct scatter_walk *walk, int out,
+				    int more)
+{
+	if (!more || walk->offset >= walk->sg->offset + walk->sg->length ||
+	    !(walk->offset & (PAGE_SIZE - 1)))
+		scatterwalk_pagedone(walk, out, more);
+}
+
 void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
 			    size_t nbytes, int out);
 void *scatterwalk_map(struct scatter_walk *walk);
-void scatterwalk_done(struct scatter_walk *walk, int out, int more);
 
 void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
 			      unsigned int start, unsigned int nbytes, int out);
 
-int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes);
-
 struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
 				     struct scatterlist *src,
 				     unsigned int len);
diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h
new file mode 100644
index 000000000000..f4c9f68f5ffe
--- /dev/null
+++ b/include/crypto/sha3.h
@@ -0,0 +1,29 @@
+/*
+ * Common values for SHA-3 algorithms
+ */
+#ifndef __CRYPTO_SHA3_H__
+#define __CRYPTO_SHA3_H__
+
+#define SHA3_224_DIGEST_SIZE	(224 / 8)
+#define SHA3_224_BLOCK_SIZE	(200 - 2 * SHA3_224_DIGEST_SIZE)
+
+#define SHA3_256_DIGEST_SIZE	(256 / 8)
+#define SHA3_256_BLOCK_SIZE	(200 - 2 * SHA3_256_DIGEST_SIZE)
+
+#define SHA3_384_DIGEST_SIZE	(384 / 8)
+#define SHA3_384_BLOCK_SIZE	(200 - 2 * SHA3_384_DIGEST_SIZE)
+
+#define SHA3_512_DIGEST_SIZE	(512 / 8)
+#define SHA3_512_BLOCK_SIZE	(200 - 2 * SHA3_512_DIGEST_SIZE)
+
+struct sha3_state {
+	u64		st[25];
+	unsigned int	md_len;
+	unsigned int	rsiz;
+	unsigned int	rsizw;
+
+	unsigned int	partial;
+	u8		buf[SHA3_224_BLOCK_SIZE];
+};
+
+#endif
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index 0f987f50bb52..cc4d98a7892e 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -65,86 +65,80 @@ struct crypto_skcipher {
 	struct crypto_tfm base;
 };
 
-#define SKCIPHER_REQUEST_ON_STACK(name, tfm) \
-	char __##name##_desc[sizeof(struct skcipher_request) + \
-		crypto_skcipher_reqsize(tfm)] CRYPTO_MINALIGN_ATTR; \
-	struct skcipher_request *name = (void *)__##name##_desc
-
-static inline struct crypto_ablkcipher *skcipher_givcrypt_reqtfm(
-	struct skcipher_givcrypt_request *req)
-{
-	return crypto_ablkcipher_reqtfm(&req->creq);
-}
+/**
+ * struct skcipher_alg - symmetric key cipher definition
+ * @min_keysize: Minimum key size supported by the transformation. This is the
+ *		 smallest key length supported by this transformation algorithm.
+ *		 This must be set to one of the pre-defined values as this is
+ *		 not hardware specific. Possible values for this field can be
+ *		 found via git grep "_MIN_KEY_SIZE" include/crypto/
+ * @max_keysize: Maximum key size supported by the transformation. This is the
+ *		 largest key length supported by this transformation algorithm.
+ *		 This must be set to one of the pre-defined values as this is
+ *		 not hardware specific. Possible values for this field can be
+ *		 found via git grep "_MAX_KEY_SIZE" include/crypto/
+ * @setkey: Set key for the transformation. This function is used to either
+ *	    program a supplied key into the hardware or store the key in the
+ *	    transformation context for programming it later. Note that this
+ *	    function does modify the transformation context. This function can
+ *	    be called multiple times during the existence of the transformation
+ *	    object, so one must make sure the key is properly reprogrammed into
+ *	    the hardware. This function is also responsible for checking the key
+ *	    length for validity. In case a software fallback was put in place in
+ *	    the @cra_init call, this function might need to use the fallback if
+ *	    the algorithm doesn't support all of the key sizes.
+ * @encrypt: Encrypt a scatterlist of blocks. This function is used to encrypt
+ *	     the supplied scatterlist containing the blocks of data. The crypto
+ *	     API consumer is responsible for aligning the entries of the
+ *	     scatterlist properly and making sure the chunks are correctly
+ *	     sized. In case a software fallback was put in place in the
+ *	     @cra_init call, this function might need to use the fallback if
+ *	     the algorithm doesn't support all of the key sizes. In case the
+ *	     key was stored in transformation context, the key might need to be
+ *	     re-programmed into the hardware in this function. This function
+ *	     shall not modify the transformation context, as this function may
+ *	     be called in parallel with the same transformation object.
+ * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt
+ *	     and the conditions are exactly the same.
+ * @init: Initialize the cryptographic transformation object. This function
+ *	  is used to initialize the cryptographic transformation object.
+ *	  This function is called only once at the instantiation time, right
+ *	  after the transformation context was allocated. In case the
+ *	  cryptographic hardware has some special requirements which need to
+ *	  be handled by software, this function shall check for the precise
+ *	  requirement of the transformation and put any software fallbacks
+ *	  in place.
+ * @exit: Deinitialize the cryptographic transformation object. This is a
+ *	  counterpart to @init, used to remove various changes set in
+ *	  @init.
+ * @ivsize: IV size applicable for transformation. The consumer must provide an
+ *	    IV of exactly that size to perform the encrypt or decrypt operation.
+ * @chunksize: Equal to the block size except for stream ciphers such as
+ *	       CTR where it is set to the underlying block size.
+ * @base: Definition of a generic crypto algorithm.
+ *
+ * All fields except @ivsize are mandatory and must be filled.
+ */
+struct skcipher_alg {
+	int (*setkey)(struct crypto_skcipher *tfm, const u8 *key,
+	              unsigned int keylen);
+	int (*encrypt)(struct skcipher_request *req);
+	int (*decrypt)(struct skcipher_request *req);
+	int (*init)(struct crypto_skcipher *tfm);
+	void (*exit)(struct crypto_skcipher *tfm);
 
-static inline int crypto_skcipher_givencrypt(
-	struct skcipher_givcrypt_request *req)
-{
-	struct ablkcipher_tfm *crt =
-		crypto_ablkcipher_crt(skcipher_givcrypt_reqtfm(req));
-	return crt->givencrypt(req);
-};
+	unsigned int min_keysize;
+	unsigned int max_keysize;
+	unsigned int ivsize;
+	unsigned int chunksize;
 
-static inline int crypto_skcipher_givdecrypt(
-	struct skcipher_givcrypt_request *req)
-{
-	struct ablkcipher_tfm *crt =
-		crypto_ablkcipher_crt(skcipher_givcrypt_reqtfm(req));
-	return crt->givdecrypt(req);
+	struct crypto_alg base;
 };
 
-static inline void skcipher_givcrypt_set_tfm(
-	struct skcipher_givcrypt_request *req, struct crypto_ablkcipher *tfm)
-{
-	req->creq.base.tfm = crypto_ablkcipher_tfm(tfm);
-}
-
-static inline struct skcipher_givcrypt_request *skcipher_givcrypt_cast(
-	struct crypto_async_request *req)
-{
-	return container_of(ablkcipher_request_cast(req),
-			    struct skcipher_givcrypt_request, creq);
-}
-
-static inline struct skcipher_givcrypt_request *skcipher_givcrypt_alloc(
-	struct crypto_ablkcipher *tfm, gfp_t gfp)
-{
-	struct skcipher_givcrypt_request *req;
-
-	req = kmalloc(sizeof(struct skcipher_givcrypt_request) +
-		      crypto_ablkcipher_reqsize(tfm), gfp);
-
-	if (likely(req))
-		skcipher_givcrypt_set_tfm(req, tfm);
-
-	return req;
-}
-
-static inline void skcipher_givcrypt_free(struct skcipher_givcrypt_request *req)
-{
-	kfree(req);
-}
-
-static inline void skcipher_givcrypt_set_callback(
-	struct skcipher_givcrypt_request *req, u32 flags,
-	crypto_completion_t compl, void *data)
-{
-	ablkcipher_request_set_callback(&req->creq, flags, compl, data);
-}
-
-static inline void skcipher_givcrypt_set_crypt(
-	struct skcipher_givcrypt_request *req,
-	struct scatterlist *src, struct scatterlist *dst,
-	unsigned int nbytes, void *iv)
-{
-	ablkcipher_request_set_crypt(&req->creq, src, dst, nbytes, iv);
-}
-
-static inline void skcipher_givcrypt_set_giv(
-	struct skcipher_givcrypt_request *req, u8 *giv, u64 seq)
-{
-	req->giv = giv;
-	req->seq = seq;
-}
+#define SKCIPHER_REQUEST_ON_STACK(name, tfm) \
+	char __##name##_desc[sizeof(struct skcipher_request) + \
+		crypto_skcipher_reqsize(tfm)] CRYPTO_MINALIGN_ATTR; \
+	struct skcipher_request *name = (void *)__##name##_desc
 
 /**
  * DOC: Symmetric Key Cipher API
@@ -231,12 +225,43 @@ static inline int crypto_has_skcipher(const char *alg_name, u32 type,
 			      crypto_skcipher_mask(mask));
 }
 
+/**
+ * crypto_has_skcipher2() - Search for the availability of an skcipher.
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	      skcipher
+ * @type: specifies the type of the skcipher
+ * @mask: specifies the mask for the skcipher
+ *
+ * Return: true when the skcipher is known to the kernel crypto API; false
+ *	   otherwise
+ */
+int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask);
+
 static inline const char *crypto_skcipher_driver_name(
 	struct crypto_skcipher *tfm)
 {
 	return crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm));
 }
 
+static inline struct skcipher_alg *crypto_skcipher_alg(
+	struct crypto_skcipher *tfm)
+{
+	return container_of(crypto_skcipher_tfm(tfm)->__crt_alg,
+			    struct skcipher_alg, base);
+}
+
+static inline unsigned int crypto_skcipher_alg_ivsize(struct skcipher_alg *alg)
+{
+	if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+	    CRYPTO_ALG_TYPE_BLKCIPHER)
+		return alg->base.cra_blkcipher.ivsize;
+
+	if (alg->base.cra_ablkcipher.encrypt)
+		return alg->base.cra_ablkcipher.ivsize;
+
+	return alg->ivsize;
+}
+
 /**
  * crypto_skcipher_ivsize() - obtain IV size
  * @tfm: cipher handle
@@ -251,6 +276,36 @@ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm)
 	return tfm->ivsize;
 }
 
+static inline unsigned int crypto_skcipher_alg_chunksize(
+	struct skcipher_alg *alg)
+{
+	if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+	    CRYPTO_ALG_TYPE_BLKCIPHER)
+		return alg->base.cra_blocksize;
+
+	if (alg->base.cra_ablkcipher.encrypt)
+		return alg->base.cra_blocksize;
+
+	return alg->chunksize;
+}
+
+/**
+ * crypto_skcipher_chunksize() - obtain chunk size
+ * @tfm: cipher handle
+ *
+ * The block size is set to one for ciphers such as CTR.  However,
+ * you still need to provide incremental updates in multiples of
+ * the underlying block size as the IV does not have sub-block
+ * granularity.  This is known in this API as the chunk size.
+ *
+ * Return: chunk size in bytes
+ */
+static inline unsigned int crypto_skcipher_chunksize(
+	struct crypto_skcipher *tfm)
+{
+	return crypto_skcipher_alg_chunksize(crypto_skcipher_alg(tfm));
+}
+
 /**
  * crypto_skcipher_blocksize() - obtain block size of cipher
  * @tfm: cipher handle
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 9094599a1150..33466bfc6440 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -309,6 +309,7 @@
 	INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \
 	INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \
 	INTEL_VGA_DEVICE(0x5902, info), /* DT  GT1 */ \
+	INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \
 	INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \
 	INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */
 
@@ -322,15 +323,12 @@
 	INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */
 
 #define INTEL_KBL_GT3_IDS(info) \
+	INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \
 	INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \
-	INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \
-	INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */
+	INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */
 
 #define INTEL_KBL_GT4_IDS(info) \
-	INTEL_VGA_DEVICE(0x5932, info), /* DT  GT4 */ \
-	INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \
-	INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \
-	INTEL_VGA_DEVICE(0x593D, info)  /* WKS GT4 */
+	INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */
 
 #define INTEL_KBL_IDS(info) \
 	INTEL_KBL_GT1_IDS(info), \
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index c801d9028e37..4cecb0b75b9c 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -316,6 +316,20 @@ ttm_bo_reference(struct ttm_buffer_object *bo)
  */
 extern int ttm_bo_wait(struct ttm_buffer_object *bo,
 		       bool interruptible, bool no_wait);
+
+/**
+ * ttm_bo_mem_compat - Check if proposed placement is compatible with a bo
+ *
+ * @placement:  Return immediately if buffer is busy.
+ * @mem:  The struct ttm_mem_reg indicating the region where the bo resides
+ * @new_flags: Describes compatible placement found
+ *
+ * Returns true if the placement is compatible
+ */
+extern bool ttm_bo_mem_compat(struct ttm_placement *placement,
+			      struct ttm_mem_reg *mem,
+			      uint32_t *new_flags);
+
 /**
  * ttm_bo_validate
  *
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index fe389ac31489..92e7e97ca8ff 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -18,13 +18,13 @@
 #ifndef __ASM_ARM_KVM_PMU_H
 #define __ASM_ARM_KVM_PMU_H
 
-#ifdef CONFIG_KVM_ARM_PMU
-
 #include <linux/perf_event.h>
 #include <asm/perf_event.h>
 
 #define ARMV8_PMU_CYCLE_IDX		(ARMV8_PMU_MAX_COUNTERS - 1)
 
+#ifdef CONFIG_KVM_ARM_PMU
+
 struct kvm_pmc {
 	u8 idx;	/* index into the pmu->pmc array */
 	struct perf_event *perf_event;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 288fac5294f5..db7c8bd39a3c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -208,7 +208,6 @@ void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
-void early_acpi_table_init(void *data, size_t size);
 int acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
 int __init acpi_parse_entries(char *id, unsigned long table_size,
@@ -232,12 +231,26 @@ int acpi_table_parse_madt(enum acpi_madt_type id,
 int acpi_parse_mcfg (struct acpi_table_header *header);
 void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
 
-/* the following four functions are architecture-dependent */
+/* the following numa functions are architecture-dependent */
 void acpi_numa_slit_init (struct acpi_table_slit *slit);
+
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
 void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
+#else
+static inline void
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { }
+#endif
+
 void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
+
+#ifdef CONFIG_ARM64
+void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa);
+#else
+static inline void
+acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { }
+#endif
+
 int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
-void acpi_numa_arch_fixup(void);
 
 #ifndef PHYS_CPUID_INVALID
 typedef u32 phys_cpuid_t;
@@ -444,8 +457,12 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_SB_HOTPLUG_OST_SUPPORT		0x00000008
 #define OSC_SB_APEI_SUPPORT			0x00000010
 #define OSC_SB_CPC_SUPPORT			0x00000020
+#define OSC_SB_CPCV2_SUPPORT			0x00000040
+#define OSC_SB_PCLPI_SUPPORT			0x00000080
+#define OSC_SB_OSLPI_SUPPORT			0x00000100
 
 extern bool osc_sb_apei_support_acked;
+extern bool osc_pc_lpi_support_confirmed;
 
 /* PCI Host Bridge _OSC: Capabilities DWORD 2: Support Field */
 #define OSC_PCI_EXT_CONFIG_SUPPORT		0x00000001
@@ -532,6 +549,24 @@ void acpi_walk_dep_device_list(acpi_handle handle);
 struct platform_device *acpi_create_platform_device(struct acpi_device *);
 #define ACPI_PTR(_ptr)	(_ptr)
 
+static inline void acpi_device_set_enumerated(struct acpi_device *adev)
+{
+	adev->flags.visited = true;
+}
+
+static inline void acpi_device_clear_enumerated(struct acpi_device *adev)
+{
+	adev->flags.visited = false;
+}
+
+enum acpi_reconfig_event  {
+	ACPI_RECONFIG_DEVICE_ADD = 0,
+	ACPI_RECONFIG_DEVICE_REMOVE,
+};
+
+int acpi_reconfig_notifier_register(struct notifier_block *nb);
+int acpi_reconfig_notifier_unregister(struct notifier_block *nb);
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
@@ -543,6 +578,11 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *);
 
 struct fwnode_handle;
 
+static inline bool acpi_dev_found(const char *hid)
+{
+	return false;
+}
+
 static inline bool is_acpi_node(struct fwnode_handle *fwnode)
 {
 	return false;
@@ -588,7 +628,6 @@ static inline const char *acpi_dev_name(struct acpi_device *adev)
 	return NULL;
 }
 
-static inline void early_acpi_table_init(void *data, size_t size) { }
 static inline void acpi_early_init(void) { }
 static inline void acpi_subsystem_init(void) { }
 
@@ -654,6 +693,14 @@ static inline bool acpi_driver_match_device(struct device *dev,
 	return false;
 }
 
+static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
+						   const u8 *uuid,
+						   int rev, int func,
+						   union acpi_object *argv4)
+{
+	return NULL;
+}
+
 static inline int acpi_device_uevent_modalias(struct device *dev,
 				struct kobj_uevent_env *env)
 {
@@ -678,6 +725,24 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
 
 #define ACPI_PTR(_ptr)	(NULL)
 
+static inline void acpi_device_set_enumerated(struct acpi_device *adev)
+{
+}
+
+static inline void acpi_device_clear_enumerated(struct acpi_device *adev)
+{
+}
+
+static inline int acpi_reconfig_notifier_register(struct notifier_block *nb)
+{
+	return -EINVAL;
+}
+
+static inline int acpi_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+	return -EINVAL;
+}
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI
@@ -997,4 +1062,10 @@ static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
 #define acpi_probe_device_table(t)	({ int __r = 0; __r;})
 #endif
 
+#ifdef CONFIG_ACPI_TABLE_UPGRADE
+void acpi_table_upgrade(void);
+#else
+static inline void acpi_table_upgrade(void) { }
+#endif
+
 #endif	/*_LINUX_ACPI_H*/
diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 52f3b7da4f2d..9d8031257a90 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -26,10 +26,10 @@ enum alarmtimer_restart {
  * struct alarm - Alarm timer structure
  * @node:	timerqueue node for adding to the event list this value
  *		also includes the expiration time.
- * @period:	Period for recuring alarms
+ * @timer:	hrtimer used to schedule events while running
  * @function:	Function pointer to be executed when the timer fires.
- * @type:	Alarm type (BOOTTIME/REALTIME)
- * @enabled:	Flag that represents if the alarm is set to fire or not
+ * @type:	Alarm type (BOOTTIME/REALTIME).
+ * @state:	Flag that represents if the alarm is set to fire or not.
  * @data:	Internal data value.
  */
 struct alarm {
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 99346be5a7ca..adbc812c009b 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -46,8 +46,9 @@ enum {
 	ATA_MAX_SECTORS_128	= 128,
 	ATA_MAX_SECTORS		= 256,
 	ATA_MAX_SECTORS_1024    = 1024,
-	ATA_MAX_SECTORS_LBA48	= 65535,/* TODO: 65536? */
+	ATA_MAX_SECTORS_LBA48	= 65535,/* avoid count to be 0000h */
 	ATA_MAX_SECTORS_TAPE	= 65535,
+	ATA_MAX_TRIM_RNUM	= 64,	/* 512-byte payload / (6-byte LBA + 2-byte range per entry) */
 
 	ATA_ID_WORDS		= 256,
 	ATA_ID_CONFIG		= 0,
@@ -409,6 +410,9 @@ enum {
 	SETFEATURES_WC_ON	= 0x02, /* Enable write cache */
 	SETFEATURES_WC_OFF	= 0x82, /* Disable write cache */
 
+	SETFEATURES_RA_ON	= 0xaa, /* Enable read look-ahead */
+	SETFEATURES_RA_OFF	= 0x55, /* Disable read look-ahead */
+
 	/* Enable/Disable Automatic Acoustic Management */
 	SETFEATURES_AAM_ON	= 0x42,
 	SETFEATURES_AAM_OFF	= 0xC2,
@@ -519,16 +523,23 @@ enum {
 	SERR_DEV_XCHG		= (1 << 26), /* device exchanged */
 };
 
-enum ata_tf_protocols {
-	/* ATA taskfile protocols */
-	ATA_PROT_UNKNOWN,	/* unknown/invalid */
-	ATA_PROT_NODATA,	/* no data */
-	ATA_PROT_PIO,		/* PIO data xfer */
-	ATA_PROT_DMA,		/* DMA */
-	ATA_PROT_NCQ,		/* NCQ */
-	ATAPI_PROT_NODATA,	/* packet command, no data */
-	ATAPI_PROT_PIO,		/* packet command, PIO data xfer*/
-	ATAPI_PROT_DMA,		/* packet command with special DMA sauce */
+enum ata_prot_flags {
+	/* protocol flags */
+	ATA_PROT_FLAG_PIO	= (1 << 0), /* is PIO */
+	ATA_PROT_FLAG_DMA	= (1 << 1), /* is DMA */
+	ATA_PROT_FLAG_NCQ	= (1 << 2), /* is NCQ */
+	ATA_PROT_FLAG_ATAPI	= (1 << 3), /* is ATAPI */
+
+	/* taskfile protocols */
+	ATA_PROT_UNKNOWN	= (u8)-1,
+	ATA_PROT_NODATA		= 0,
+	ATA_PROT_PIO		= ATA_PROT_FLAG_PIO,
+	ATA_PROT_DMA		= ATA_PROT_FLAG_DMA,
+	ATA_PROT_NCQ_NODATA	= ATA_PROT_FLAG_NCQ,
+	ATA_PROT_NCQ		= ATA_PROT_FLAG_DMA | ATA_PROT_FLAG_NCQ,
+	ATAPI_PROT_NODATA	= ATA_PROT_FLAG_ATAPI,
+	ATAPI_PROT_PIO		= ATA_PROT_FLAG_ATAPI | ATA_PROT_FLAG_PIO,
+	ATAPI_PROT_DMA		= ATA_PROT_FLAG_ATAPI | ATA_PROT_FLAG_DMA,
 };
 
 enum ata_ioctls {
@@ -1066,12 +1077,12 @@ static inline void ata_id_to_hd_driveid(u16 *id)
  * TO NV CACHE PINNED SET.
  */
 static inline unsigned ata_set_lba_range_entries(void *_buffer,
-		unsigned buf_size, u64 sector, unsigned long count)
+		unsigned num, u64 sector, unsigned long count)
 {
 	__le64 *buffer = _buffer;
 	unsigned i = 0, used_bytes;
 
-	while (i < buf_size / 8 ) { /* 6-byte LBA + 2-byte range per entry */
+	while (i < num) {
 		u64 entry = sector |
 			((u64)(count > 0xffff ? 0xffff : count) << 48);
 		buffer[i++] = __cpu_to_le64(entry);
@@ -1095,13 +1106,13 @@ static inline bool ata_ok(u8 status)
 static inline bool lba_28_ok(u64 block, u32 n_block)
 {
 	/* check the ending block number: must be LESS THAN 0x0fffffff */
-	return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= 256);
+	return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= ATA_MAX_SECTORS);
 }
 
 static inline bool lba_48_ok(u64 block, u32 n_block)
 {
 	/* check the ending block number */
-	return ((block + n_block - 1) < ((u64)1 << 48)) && (n_block <= 65536);
+	return ((block + n_block - 1) < ((u64)1 << 48)) && (n_block <= ATA_MAX_SECTORS_LBA48);
 }
 
 #define sata_pmp_gscr_vendor(gscr)	((gscr)[SATA_PMP_GSCR_PROD_ID] & 0xffff)
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index e66153d60bd5..76860a461ed2 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -40,6 +40,7 @@ struct ath9k_platform_data {
 	bool tx_gain_buffalo;
 	bool disable_2ghz;
 	bool disable_5ghz;
+	bool led_active_high;
 
 	int (*get_mac_revision)(void);
 	int (*external_reset)(void);
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index e451534fe54d..e71835bf60a9 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -163,206 +163,265 @@
 #endif
 #endif /* atomic_dec_return_relaxed */
 
-/* atomic_xchg_relaxed */
-#ifndef atomic_xchg_relaxed
-#define  atomic_xchg_relaxed		atomic_xchg
-#define  atomic_xchg_acquire		atomic_xchg
-#define  atomic_xchg_release		atomic_xchg
 
-#else /* atomic_xchg_relaxed */
+/* atomic_fetch_add_relaxed */
+#ifndef atomic_fetch_add_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add
+#define atomic_fetch_add_acquire	atomic_fetch_add
+#define atomic_fetch_add_release	atomic_fetch_add
 
-#ifndef atomic_xchg_acquire
-#define  atomic_xchg_acquire(...)					\
-	__atomic_op_acquire(atomic_xchg, __VA_ARGS__)
+#else /* atomic_fetch_add_relaxed */
+
+#ifndef atomic_fetch_add_acquire
+#define atomic_fetch_add_acquire(...)					\
+	__atomic_op_acquire(atomic_fetch_add, __VA_ARGS__)
 #endif
 
-#ifndef atomic_xchg_release
-#define  atomic_xchg_release(...)					\
-	__atomic_op_release(atomic_xchg, __VA_ARGS__)
+#ifndef atomic_fetch_add_release
+#define atomic_fetch_add_release(...)					\
+	__atomic_op_release(atomic_fetch_add, __VA_ARGS__)
 #endif
 
-#ifndef atomic_xchg
-#define  atomic_xchg(...)						\
-	__atomic_op_fence(atomic_xchg, __VA_ARGS__)
+#ifndef atomic_fetch_add
+#define atomic_fetch_add(...)						\
+	__atomic_op_fence(atomic_fetch_add, __VA_ARGS__)
+#endif
+#endif /* atomic_fetch_add_relaxed */
+
+/* atomic_fetch_inc_relaxed */
+#ifndef atomic_fetch_inc_relaxed
+
+#ifndef atomic_fetch_inc
+#define atomic_fetch_inc(v)	        atomic_fetch_add(1, (v))
+#define atomic_fetch_inc_relaxed(v)	atomic_fetch_add_relaxed(1, (v))
+#define atomic_fetch_inc_acquire(v)	atomic_fetch_add_acquire(1, (v))
+#define atomic_fetch_inc_release(v)	atomic_fetch_add_release(1, (v))
+#else /* atomic_fetch_inc */
+#define atomic_fetch_inc_relaxed	atomic_fetch_inc
+#define atomic_fetch_inc_acquire	atomic_fetch_inc
+#define atomic_fetch_inc_release	atomic_fetch_inc
+#endif /* atomic_fetch_inc */
+
+#else /* atomic_fetch_inc_relaxed */
+
+#ifndef atomic_fetch_inc_acquire
+#define atomic_fetch_inc_acquire(...)					\
+	__atomic_op_acquire(atomic_fetch_inc, __VA_ARGS__)
 #endif
-#endif /* atomic_xchg_relaxed */
 
-/* atomic_cmpxchg_relaxed */
-#ifndef atomic_cmpxchg_relaxed
-#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
-#define  atomic_cmpxchg_acquire		atomic_cmpxchg
-#define  atomic_cmpxchg_release		atomic_cmpxchg
+#ifndef atomic_fetch_inc_release
+#define atomic_fetch_inc_release(...)					\
+	__atomic_op_release(atomic_fetch_inc, __VA_ARGS__)
+#endif
 
-#else /* atomic_cmpxchg_relaxed */
+#ifndef atomic_fetch_inc
+#define atomic_fetch_inc(...)						\
+	__atomic_op_fence(atomic_fetch_inc, __VA_ARGS__)
+#endif
+#endif /* atomic_fetch_inc_relaxed */
 
-#ifndef atomic_cmpxchg_acquire
-#define  atomic_cmpxchg_acquire(...)					\
-	__atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__)
+/* atomic_fetch_sub_relaxed */
+#ifndef atomic_fetch_sub_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub
+#define atomic_fetch_sub_acquire	atomic_fetch_sub
+#define atomic_fetch_sub_release	atomic_fetch_sub
+
+#else /* atomic_fetch_sub_relaxed */
+
+#ifndef atomic_fetch_sub_acquire
+#define atomic_fetch_sub_acquire(...)					\
+	__atomic_op_acquire(atomic_fetch_sub, __VA_ARGS__)
 #endif
 
-#ifndef atomic_cmpxchg_release
-#define  atomic_cmpxchg_release(...)					\
-	__atomic_op_release(atomic_cmpxchg, __VA_ARGS__)
+#ifndef atomic_fetch_sub_release
+#define atomic_fetch_sub_release(...)					\
+	__atomic_op_release(atomic_fetch_sub, __VA_ARGS__)
 #endif
 
-#ifndef atomic_cmpxchg
-#define  atomic_cmpxchg(...)						\
-	__atomic_op_fence(atomic_cmpxchg, __VA_ARGS__)
+#ifndef atomic_fetch_sub
+#define atomic_fetch_sub(...)						\
+	__atomic_op_fence(atomic_fetch_sub, __VA_ARGS__)
+#endif
+#endif /* atomic_fetch_sub_relaxed */
+
+/* atomic_fetch_dec_relaxed */
+#ifndef atomic_fetch_dec_relaxed
+
+#ifndef atomic_fetch_dec
+#define atomic_fetch_dec(v)	        atomic_fetch_sub(1, (v))
+#define atomic_fetch_dec_relaxed(v)	atomic_fetch_sub_relaxed(1, (v))
+#define atomic_fetch_dec_acquire(v)	atomic_fetch_sub_acquire(1, (v))
+#define atomic_fetch_dec_release(v)	atomic_fetch_sub_release(1, (v))
+#else /* atomic_fetch_dec */
+#define atomic_fetch_dec_relaxed	atomic_fetch_dec
+#define atomic_fetch_dec_acquire	atomic_fetch_dec
+#define atomic_fetch_dec_release	atomic_fetch_dec
+#endif /* atomic_fetch_dec */
+
+#else /* atomic_fetch_dec_relaxed */
+
+#ifndef atomic_fetch_dec_acquire
+#define atomic_fetch_dec_acquire(...)					\
+	__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 #endif
-#endif /* atomic_cmpxchg_relaxed */
 
-#ifndef atomic64_read_acquire
-#define  atomic64_read_acquire(v)	smp_load_acquire(&(v)->counter)
+#ifndef atomic_fetch_dec_release
+#define atomic_fetch_dec_release(...)					\
+	__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_set_release
-#define  atomic64_set_release(v, i)	smp_store_release(&(v)->counter, (i))
+#ifndef atomic_fetch_dec
+#define atomic_fetch_dec(...)						\
+	__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 #endif
+#endif /* atomic_fetch_dec_relaxed */
 
-/* atomic64_add_return_relaxed */
-#ifndef atomic64_add_return_relaxed
-#define  atomic64_add_return_relaxed	atomic64_add_return
-#define  atomic64_add_return_acquire	atomic64_add_return
-#define  atomic64_add_return_release	atomic64_add_return
+/* atomic_fetch_or_relaxed */
+#ifndef atomic_fetch_or_relaxed
+#define atomic_fetch_or_relaxed	atomic_fetch_or
+#define atomic_fetch_or_acquire	atomic_fetch_or
+#define atomic_fetch_or_release	atomic_fetch_or
 
-#else /* atomic64_add_return_relaxed */
+#else /* atomic_fetch_or_relaxed */
 
-#ifndef atomic64_add_return_acquire
-#define  atomic64_add_return_acquire(...)				\
-	__atomic_op_acquire(atomic64_add_return, __VA_ARGS__)
+#ifndef atomic_fetch_or_acquire
+#define atomic_fetch_or_acquire(...)					\
+	__atomic_op_acquire(atomic_fetch_or, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_add_return_release
-#define  atomic64_add_return_release(...)				\
-	__atomic_op_release(atomic64_add_return, __VA_ARGS__)
+#ifndef atomic_fetch_or_release
+#define atomic_fetch_or_release(...)					\
+	__atomic_op_release(atomic_fetch_or, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_add_return
-#define  atomic64_add_return(...)					\
-	__atomic_op_fence(atomic64_add_return, __VA_ARGS__)
+#ifndef atomic_fetch_or
+#define atomic_fetch_or(...)						\
+	__atomic_op_fence(atomic_fetch_or, __VA_ARGS__)
 #endif
-#endif /* atomic64_add_return_relaxed */
+#endif /* atomic_fetch_or_relaxed */
 
-/* atomic64_inc_return_relaxed */
-#ifndef atomic64_inc_return_relaxed
-#define  atomic64_inc_return_relaxed	atomic64_inc_return
-#define  atomic64_inc_return_acquire	atomic64_inc_return
-#define  atomic64_inc_return_release	atomic64_inc_return
+/* atomic_fetch_and_relaxed */
+#ifndef atomic_fetch_and_relaxed
+#define atomic_fetch_and_relaxed	atomic_fetch_and
+#define atomic_fetch_and_acquire	atomic_fetch_and
+#define atomic_fetch_and_release	atomic_fetch_and
 
-#else /* atomic64_inc_return_relaxed */
+#else /* atomic_fetch_and_relaxed */
 
-#ifndef atomic64_inc_return_acquire
-#define  atomic64_inc_return_acquire(...)				\
-	__atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
+#ifndef atomic_fetch_and_acquire
+#define atomic_fetch_and_acquire(...)					\
+	__atomic_op_acquire(atomic_fetch_and, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_inc_return_release
-#define  atomic64_inc_return_release(...)				\
-	__atomic_op_release(atomic64_inc_return, __VA_ARGS__)
+#ifndef atomic_fetch_and_release
+#define atomic_fetch_and_release(...)					\
+	__atomic_op_release(atomic_fetch_and, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_inc_return
-#define  atomic64_inc_return(...)					\
-	__atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
+#ifndef atomic_fetch_and
+#define atomic_fetch_and(...)						\
+	__atomic_op_fence(atomic_fetch_and, __VA_ARGS__)
 #endif
-#endif /* atomic64_inc_return_relaxed */
-
+#endif /* atomic_fetch_and_relaxed */
 
-/* atomic64_sub_return_relaxed */
-#ifndef atomic64_sub_return_relaxed
-#define  atomic64_sub_return_relaxed	atomic64_sub_return
-#define  atomic64_sub_return_acquire	atomic64_sub_return
-#define  atomic64_sub_return_release	atomic64_sub_return
+#ifdef atomic_andnot
+/* atomic_fetch_andnot_relaxed */
+#ifndef atomic_fetch_andnot_relaxed
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot
+#define atomic_fetch_andnot_acquire	atomic_fetch_andnot
+#define atomic_fetch_andnot_release	atomic_fetch_andnot
 
-#else /* atomic64_sub_return_relaxed */
+#else /* atomic_fetch_andnot_relaxed */
 
-#ifndef atomic64_sub_return_acquire
-#define  atomic64_sub_return_acquire(...)				\
-	__atomic_op_acquire(atomic64_sub_return, __VA_ARGS__)
+#ifndef atomic_fetch_andnot_acquire
+#define atomic_fetch_andnot_acquire(...)					\
+	__atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_sub_return_release
-#define  atomic64_sub_return_release(...)				\
-	__atomic_op_release(atomic64_sub_return, __VA_ARGS__)
+#ifndef atomic_fetch_andnot_release
+#define atomic_fetch_andnot_release(...)					\
+	__atomic_op_release(atomic_fetch_andnot, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_sub_return
-#define  atomic64_sub_return(...)					\
-	__atomic_op_fence(atomic64_sub_return, __VA_ARGS__)
+#ifndef atomic_fetch_andnot
+#define atomic_fetch_andnot(...)						\
+	__atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
 #endif
-#endif /* atomic64_sub_return_relaxed */
+#endif /* atomic_fetch_andnot_relaxed */
+#endif /* atomic_andnot */
 
-/* atomic64_dec_return_relaxed */
-#ifndef atomic64_dec_return_relaxed
-#define  atomic64_dec_return_relaxed	atomic64_dec_return
-#define  atomic64_dec_return_acquire	atomic64_dec_return
-#define  atomic64_dec_return_release	atomic64_dec_return
+/* atomic_fetch_xor_relaxed */
+#ifndef atomic_fetch_xor_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor
+#define atomic_fetch_xor_acquire	atomic_fetch_xor
+#define atomic_fetch_xor_release	atomic_fetch_xor
 
-#else /* atomic64_dec_return_relaxed */
+#else /* atomic_fetch_xor_relaxed */
 
-#ifndef atomic64_dec_return_acquire
-#define  atomic64_dec_return_acquire(...)				\
-	__atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
+#ifndef atomic_fetch_xor_acquire
+#define atomic_fetch_xor_acquire(...)					\
+	__atomic_op_acquire(atomic_fetch_xor, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_dec_return_release
-#define  atomic64_dec_return_release(...)				\
-	__atomic_op_release(atomic64_dec_return, __VA_ARGS__)
+#ifndef atomic_fetch_xor_release
+#define atomic_fetch_xor_release(...)					\
+	__atomic_op_release(atomic_fetch_xor, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_dec_return
-#define  atomic64_dec_return(...)					\
-	__atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
+#ifndef atomic_fetch_xor
+#define atomic_fetch_xor(...)						\
+	__atomic_op_fence(atomic_fetch_xor, __VA_ARGS__)
 #endif
-#endif /* atomic64_dec_return_relaxed */
+#endif /* atomic_fetch_xor_relaxed */
 
-/* atomic64_xchg_relaxed */
-#ifndef atomic64_xchg_relaxed
-#define  atomic64_xchg_relaxed		atomic64_xchg
-#define  atomic64_xchg_acquire		atomic64_xchg
-#define  atomic64_xchg_release		atomic64_xchg
 
-#else /* atomic64_xchg_relaxed */
+/* atomic_xchg_relaxed */
+#ifndef atomic_xchg_relaxed
+#define  atomic_xchg_relaxed		atomic_xchg
+#define  atomic_xchg_acquire		atomic_xchg
+#define  atomic_xchg_release		atomic_xchg
 
-#ifndef atomic64_xchg_acquire
-#define  atomic64_xchg_acquire(...)					\
-	__atomic_op_acquire(atomic64_xchg, __VA_ARGS__)
+#else /* atomic_xchg_relaxed */
+
+#ifndef atomic_xchg_acquire
+#define  atomic_xchg_acquire(...)					\
+	__atomic_op_acquire(atomic_xchg, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_xchg_release
-#define  atomic64_xchg_release(...)					\
-	__atomic_op_release(atomic64_xchg, __VA_ARGS__)
+#ifndef atomic_xchg_release
+#define  atomic_xchg_release(...)					\
+	__atomic_op_release(atomic_xchg, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_xchg
-#define  atomic64_xchg(...)						\
-	__atomic_op_fence(atomic64_xchg, __VA_ARGS__)
+#ifndef atomic_xchg
+#define  atomic_xchg(...)						\
+	__atomic_op_fence(atomic_xchg, __VA_ARGS__)
 #endif
-#endif /* atomic64_xchg_relaxed */
+#endif /* atomic_xchg_relaxed */
 
-/* atomic64_cmpxchg_relaxed */
-#ifndef atomic64_cmpxchg_relaxed
-#define  atomic64_cmpxchg_relaxed	atomic64_cmpxchg
-#define  atomic64_cmpxchg_acquire	atomic64_cmpxchg
-#define  atomic64_cmpxchg_release	atomic64_cmpxchg
+/* atomic_cmpxchg_relaxed */
+#ifndef atomic_cmpxchg_relaxed
+#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
+#define  atomic_cmpxchg_acquire		atomic_cmpxchg
+#define  atomic_cmpxchg_release		atomic_cmpxchg
 
-#else /* atomic64_cmpxchg_relaxed */
+#else /* atomic_cmpxchg_relaxed */
 
-#ifndef atomic64_cmpxchg_acquire
-#define  atomic64_cmpxchg_acquire(...)					\
-	__atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__)
+#ifndef atomic_cmpxchg_acquire
+#define  atomic_cmpxchg_acquire(...)					\
+	__atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_cmpxchg_release
-#define  atomic64_cmpxchg_release(...)					\
-	__atomic_op_release(atomic64_cmpxchg, __VA_ARGS__)
+#ifndef atomic_cmpxchg_release
+#define  atomic_cmpxchg_release(...)					\
+	__atomic_op_release(atomic_cmpxchg, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_cmpxchg
-#define  atomic64_cmpxchg(...)						\
-	__atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__)
+#ifndef atomic_cmpxchg
+#define  atomic_cmpxchg(...)						\
+	__atomic_op_fence(atomic_cmpxchg, __VA_ARGS__)
 #endif
-#endif /* atomic64_cmpxchg_relaxed */
+#endif /* atomic_cmpxchg_relaxed */
 
 /* cmpxchg_relaxed */
 #ifndef cmpxchg_relaxed
@@ -463,18 +522,28 @@ static inline void atomic_andnot(int i, atomic_t *v)
 {
 	atomic_and(~i, v);
 }
-#endif
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+static inline int atomic_fetch_andnot(int i, atomic_t *v)
 {
-	atomic_andnot(mask, v);
+	return atomic_fetch_and(~i, v);
 }
 
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+static inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
-	atomic_or(mask, v);
+	return atomic_fetch_and_relaxed(~i, v);
 }
 
+static inline int atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+	return atomic_fetch_and_acquire(~i, v);
+}
+
+static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
+{
+	return atomic_fetch_and_release(~i, v);
+}
+#endif
+
 /**
  * atomic_inc_not_zero_hint - increment if not null
  * @v: pointer of type atomic_t
@@ -558,36 +627,400 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-/**
- * atomic_fetch_or - perform *p |= mask and return old value of *p
- * @mask: mask to OR on the atomic_t
- * @p: pointer to atomic_t
- */
-#ifndef atomic_fetch_or
-static inline int atomic_fetch_or(int mask, atomic_t *p)
-{
-	int old, val = atomic_read(p);
+#ifdef CONFIG_GENERIC_ATOMIC64
+#include <asm-generic/atomic64.h>
+#endif
 
-	for (;;) {
-		old = atomic_cmpxchg(p, val, val | mask);
-		if (old == val)
-			break;
-		val = old;
-	}
+#ifndef atomic64_read_acquire
+#define  atomic64_read_acquire(v)	smp_load_acquire(&(v)->counter)
+#endif
 
-	return old;
-}
+#ifndef atomic64_set_release
+#define  atomic64_set_release(v, i)	smp_store_release(&(v)->counter, (i))
 #endif
 
-#ifdef CONFIG_GENERIC_ATOMIC64
-#include <asm-generic/atomic64.h>
+/* atomic64_add_return_relaxed */
+#ifndef atomic64_add_return_relaxed
+#define  atomic64_add_return_relaxed	atomic64_add_return
+#define  atomic64_add_return_acquire	atomic64_add_return
+#define  atomic64_add_return_release	atomic64_add_return
+
+#else /* atomic64_add_return_relaxed */
+
+#ifndef atomic64_add_return_acquire
+#define  atomic64_add_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return_release
+#define  atomic64_add_return_release(...)				\
+	__atomic_op_release(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return
+#define  atomic64_add_return(...)					\
+	__atomic_op_fence(atomic64_add_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_add_return_relaxed */
+
+/* atomic64_inc_return_relaxed */
+#ifndef atomic64_inc_return_relaxed
+#define  atomic64_inc_return_relaxed	atomic64_inc_return
+#define  atomic64_inc_return_acquire	atomic64_inc_return
+#define  atomic64_inc_return_release	atomic64_inc_return
+
+#else /* atomic64_inc_return_relaxed */
+
+#ifndef atomic64_inc_return_acquire
+#define  atomic64_inc_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return_release
+#define  atomic64_inc_return_release(...)				\
+	__atomic_op_release(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return
+#define  atomic64_inc_return(...)					\
+	__atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_inc_return_relaxed */
+
+
+/* atomic64_sub_return_relaxed */
+#ifndef atomic64_sub_return_relaxed
+#define  atomic64_sub_return_relaxed	atomic64_sub_return
+#define  atomic64_sub_return_acquire	atomic64_sub_return
+#define  atomic64_sub_return_release	atomic64_sub_return
+
+#else /* atomic64_sub_return_relaxed */
+
+#ifndef atomic64_sub_return_acquire
+#define  atomic64_sub_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_sub_return, __VA_ARGS__)
 #endif
 
+#ifndef atomic64_sub_return_release
+#define  atomic64_sub_return_release(...)				\
+	__atomic_op_release(atomic64_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_sub_return
+#define  atomic64_sub_return(...)					\
+	__atomic_op_fence(atomic64_sub_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_sub_return_relaxed */
+
+/* atomic64_dec_return_relaxed */
+#ifndef atomic64_dec_return_relaxed
+#define  atomic64_dec_return_relaxed	atomic64_dec_return
+#define  atomic64_dec_return_acquire	atomic64_dec_return
+#define  atomic64_dec_return_release	atomic64_dec_return
+
+#else /* atomic64_dec_return_relaxed */
+
+#ifndef atomic64_dec_return_acquire
+#define  atomic64_dec_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return_release
+#define  atomic64_dec_return_release(...)				\
+	__atomic_op_release(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return
+#define  atomic64_dec_return(...)					\
+	__atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_dec_return_relaxed */
+
+
+/* atomic64_fetch_add_relaxed */
+#ifndef atomic64_fetch_add_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add
+#define atomic64_fetch_add_acquire	atomic64_fetch_add
+#define atomic64_fetch_add_release	atomic64_fetch_add
+
+#else /* atomic64_fetch_add_relaxed */
+
+#ifndef atomic64_fetch_add_acquire
+#define atomic64_fetch_add_acquire(...)					\
+	__atomic_op_acquire(atomic64_fetch_add, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_add_release
+#define atomic64_fetch_add_release(...)					\
+	__atomic_op_release(atomic64_fetch_add, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_add
+#define atomic64_fetch_add(...)						\
+	__atomic_op_fence(atomic64_fetch_add, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_add_relaxed */
+
+/* atomic64_fetch_inc_relaxed */
+#ifndef atomic64_fetch_inc_relaxed
+
+#ifndef atomic64_fetch_inc
+#define atomic64_fetch_inc(v)		atomic64_fetch_add(1, (v))
+#define atomic64_fetch_inc_relaxed(v)	atomic64_fetch_add_relaxed(1, (v))
+#define atomic64_fetch_inc_acquire(v)	atomic64_fetch_add_acquire(1, (v))
+#define atomic64_fetch_inc_release(v)	atomic64_fetch_add_release(1, (v))
+#else /* atomic64_fetch_inc */
+#define atomic64_fetch_inc_relaxed	atomic64_fetch_inc
+#define atomic64_fetch_inc_acquire	atomic64_fetch_inc
+#define atomic64_fetch_inc_release	atomic64_fetch_inc
+#endif /* atomic64_fetch_inc */
+
+#else /* atomic64_fetch_inc_relaxed */
+
+#ifndef atomic64_fetch_inc_acquire
+#define atomic64_fetch_inc_acquire(...)					\
+	__atomic_op_acquire(atomic64_fetch_inc, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_inc_release
+#define atomic64_fetch_inc_release(...)					\
+	__atomic_op_release(atomic64_fetch_inc, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_inc
+#define atomic64_fetch_inc(...)						\
+	__atomic_op_fence(atomic64_fetch_inc, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_inc_relaxed */
+
+/* atomic64_fetch_sub_relaxed */
+#ifndef atomic64_fetch_sub_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub
+#define atomic64_fetch_sub_acquire	atomic64_fetch_sub
+#define atomic64_fetch_sub_release	atomic64_fetch_sub
+
+#else /* atomic64_fetch_sub_relaxed */
+
+#ifndef atomic64_fetch_sub_acquire
+#define atomic64_fetch_sub_acquire(...)					\
+	__atomic_op_acquire(atomic64_fetch_sub, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_sub_release
+#define atomic64_fetch_sub_release(...)					\
+	__atomic_op_release(atomic64_fetch_sub, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_sub
+#define atomic64_fetch_sub(...)						\
+	__atomic_op_fence(atomic64_fetch_sub, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_sub_relaxed */
+
+/* atomic64_fetch_dec_relaxed */
+#ifndef atomic64_fetch_dec_relaxed
+
+#ifndef atomic64_fetch_dec
+#define atomic64_fetch_dec(v)		atomic64_fetch_sub(1, (v))
+#define atomic64_fetch_dec_relaxed(v)	atomic64_fetch_sub_relaxed(1, (v))
+#define atomic64_fetch_dec_acquire(v)	atomic64_fetch_sub_acquire(1, (v))
+#define atomic64_fetch_dec_release(v)	atomic64_fetch_sub_release(1, (v))
+#else /* atomic64_fetch_dec */
+#define atomic64_fetch_dec_relaxed	atomic64_fetch_dec
+#define atomic64_fetch_dec_acquire	atomic64_fetch_dec
+#define atomic64_fetch_dec_release	atomic64_fetch_dec
+#endif /* atomic64_fetch_dec */
+
+#else /* atomic64_fetch_dec_relaxed */
+
+#ifndef atomic64_fetch_dec_acquire
+#define atomic64_fetch_dec_acquire(...)					\
+	__atomic_op_acquire(atomic64_fetch_dec, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_dec_release
+#define atomic64_fetch_dec_release(...)					\
+	__atomic_op_release(atomic64_fetch_dec, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_dec
+#define atomic64_fetch_dec(...)						\
+	__atomic_op_fence(atomic64_fetch_dec, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_dec_relaxed */
+
+/* atomic64_fetch_or_relaxed */
+#ifndef atomic64_fetch_or_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or
+#define atomic64_fetch_or_acquire	atomic64_fetch_or
+#define atomic64_fetch_or_release	atomic64_fetch_or
+
+#else /* atomic64_fetch_or_relaxed */
+
+#ifndef atomic64_fetch_or_acquire
+#define atomic64_fetch_or_acquire(...)					\
+	__atomic_op_acquire(atomic64_fetch_or, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_or_release
+#define atomic64_fetch_or_release(...)					\
+	__atomic_op_release(atomic64_fetch_or, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_or
+#define atomic64_fetch_or(...)						\
+	__atomic_op_fence(atomic64_fetch_or, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_or_relaxed */
+
+/* atomic64_fetch_and_relaxed */
+#ifndef atomic64_fetch_and_relaxed
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and
+#define atomic64_fetch_and_acquire	atomic64_fetch_and
+#define atomic64_fetch_and_release	atomic64_fetch_and
+
+#else /* atomic64_fetch_and_relaxed */
+
+#ifndef atomic64_fetch_and_acquire
+#define atomic64_fetch_and_acquire(...)					\
+	__atomic_op_acquire(atomic64_fetch_and, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_and_release
+#define atomic64_fetch_and_release(...)					\
+	__atomic_op_release(atomic64_fetch_and, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_and
+#define atomic64_fetch_and(...)						\
+	__atomic_op_fence(atomic64_fetch_and, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_and_relaxed */
+
+#ifdef atomic64_andnot
+/* atomic64_fetch_andnot_relaxed */
+#ifndef atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot
+#define atomic64_fetch_andnot_acquire	atomic64_fetch_andnot
+#define atomic64_fetch_andnot_release	atomic64_fetch_andnot
+
+#else /* atomic64_fetch_andnot_relaxed */
+
+#ifndef atomic64_fetch_andnot_acquire
+#define atomic64_fetch_andnot_acquire(...)					\
+	__atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_andnot_release
+#define atomic64_fetch_andnot_release(...)					\
+	__atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_andnot
+#define atomic64_fetch_andnot(...)						\
+	__atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_andnot_relaxed */
+#endif /* atomic64_andnot */
+
+/* atomic64_fetch_xor_relaxed */
+#ifndef atomic64_fetch_xor_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor
+#define atomic64_fetch_xor_acquire	atomic64_fetch_xor
+#define atomic64_fetch_xor_release	atomic64_fetch_xor
+
+#else /* atomic64_fetch_xor_relaxed */
+
+#ifndef atomic64_fetch_xor_acquire
+#define atomic64_fetch_xor_acquire(...)					\
+	__atomic_op_acquire(atomic64_fetch_xor, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_xor_release
+#define atomic64_fetch_xor_release(...)					\
+	__atomic_op_release(atomic64_fetch_xor, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_xor
+#define atomic64_fetch_xor(...)						\
+	__atomic_op_fence(atomic64_fetch_xor, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_xor_relaxed */
+
+
+/* atomic64_xchg_relaxed */
+#ifndef atomic64_xchg_relaxed
+#define  atomic64_xchg_relaxed		atomic64_xchg
+#define  atomic64_xchg_acquire		atomic64_xchg
+#define  atomic64_xchg_release		atomic64_xchg
+
+#else /* atomic64_xchg_relaxed */
+
+#ifndef atomic64_xchg_acquire
+#define  atomic64_xchg_acquire(...)					\
+	__atomic_op_acquire(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg_release
+#define  atomic64_xchg_release(...)					\
+	__atomic_op_release(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg
+#define  atomic64_xchg(...)						\
+	__atomic_op_fence(atomic64_xchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_xchg_relaxed */
+
+/* atomic64_cmpxchg_relaxed */
+#ifndef atomic64_cmpxchg_relaxed
+#define  atomic64_cmpxchg_relaxed	atomic64_cmpxchg
+#define  atomic64_cmpxchg_acquire	atomic64_cmpxchg
+#define  atomic64_cmpxchg_release	atomic64_cmpxchg
+
+#else /* atomic64_cmpxchg_relaxed */
+
+#ifndef atomic64_cmpxchg_acquire
+#define  atomic64_cmpxchg_acquire(...)					\
+	__atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg_release
+#define  atomic64_cmpxchg_release(...)					\
+	__atomic_op_release(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg
+#define  atomic64_cmpxchg(...)						\
+	__atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_cmpxchg_relaxed */
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
 	atomic64_and(~i, v);
 }
+
+static inline long long atomic64_fetch_andnot(long long i, atomic64_t *v)
+{
+	return atomic64_fetch_and(~i, v);
+}
+
+static inline long long atomic64_fetch_andnot_relaxed(long long i, atomic64_t *v)
+{
+	return atomic64_fetch_and_relaxed(~i, v);
+}
+
+static inline long long atomic64_fetch_andnot_acquire(long long i, atomic64_t *v)
+{
+	return atomic64_fetch_and_acquire(~i, v);
+}
+
+static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v)
+{
+	return atomic64_fetch_and_release(~i, v);
+}
 #endif
 
 #include <asm-generic/atomic-long.h>
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 961a417d641e..e38e3fc13ea8 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
-#include <linux/tty.h>
 
 #define AUDIT_INO_UNSET ((unsigned long)-1)
 #define AUDIT_DEV_UNSET ((dev_t)-1)
@@ -348,23 +347,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 	return tsk->sessionid;
 }
 
-static inline struct tty_struct *audit_get_tty(struct task_struct *tsk)
-{
-	struct tty_struct *tty = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&tsk->sighand->siglock, flags);
-	if (tsk->signal)
-		tty = tty_kref_get(tsk->signal->tty);
-	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
-	return tty;
-}
-
-static inline void audit_put_tty(struct tty_struct *tty)
-{
-	tty_kref_put(tty);
-}
-
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
 extern void __audit_bprm(struct linux_binprm *bprm);
@@ -522,12 +504,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 {
 	return -1;
 }
-static inline struct tty_struct *audit_get_tty(struct task_struct *tsk)
-{
-	return NULL;
-}
-static inline void audit_put_tty(struct tty_struct *tty)
-{ }
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 { }
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 9b0a15d06a4f..79542b2698ec 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -48,6 +48,7 @@
 #include <linux/migrate.h>
 #include <linux/gfp.h>
 #include <linux/err.h>
+#include <linux/fs.h>
 
 /*
  * Balloon device information descriptor.
@@ -62,6 +63,7 @@ struct balloon_dev_info {
 	struct list_head pages;		/* Pages enqueued & handled to Host */
 	int (*migratepage)(struct balloon_dev_info *, struct page *newpage,
 			struct page *page, enum migrate_mode mode);
+	struct inode *inode;
 };
 
 extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
@@ -73,44 +75,18 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon)
 	spin_lock_init(&balloon->pages_lock);
 	INIT_LIST_HEAD(&balloon->pages);
 	balloon->migratepage = NULL;
+	balloon->inode = NULL;
 }
 
 #ifdef CONFIG_BALLOON_COMPACTION
-extern bool balloon_page_isolate(struct page *page);
+extern const struct address_space_operations balloon_aops;
+extern bool balloon_page_isolate(struct page *page,
+				isolate_mode_t mode);
 extern void balloon_page_putback(struct page *page);
-extern int balloon_page_migrate(struct page *newpage,
+extern int balloon_page_migrate(struct address_space *mapping,
+				struct page *newpage,
 				struct page *page, enum migrate_mode mode);
 
-/*
- * __is_movable_balloon_page - helper to perform @page PageBalloon tests
- */
-static inline bool __is_movable_balloon_page(struct page *page)
-{
-	return PageBalloon(page);
-}
-
-/*
- * balloon_page_movable - test PageBalloon to identify balloon pages
- *			  and PagePrivate to check that the page is not
- *			  isolated and can be moved by compaction/migration.
- *
- * As we might return false positives in the case of a balloon page being just
- * released under us, this need to be re-tested later, under the page lock.
- */
-static inline bool balloon_page_movable(struct page *page)
-{
-	return PageBalloon(page) && PagePrivate(page);
-}
-
-/*
- * isolated_balloon_page - identify an isolated balloon page on private
- *			   compaction/migration page lists.
- */
-static inline bool isolated_balloon_page(struct page *page)
-{
-	return PageBalloon(page);
-}
-
 /*
  * balloon_page_insert - insert a page into the balloon's page list and make
  *			 the page->private assignment accordingly.
@@ -124,7 +100,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon,
 				       struct page *page)
 {
 	__SetPageBalloon(page);
-	SetPagePrivate(page);
+	__SetPageMovable(page, balloon->inode->i_mapping);
 	set_page_private(page, (unsigned long)balloon);
 	list_add(&page->lru, &balloon->pages);
 }
@@ -140,11 +116,14 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon,
 static inline void balloon_page_delete(struct page *page)
 {
 	__ClearPageBalloon(page);
+	__ClearPageMovable(page);
 	set_page_private(page, 0);
-	if (PagePrivate(page)) {
-		ClearPagePrivate(page);
+	/*
+	 * No touch page.lru field once @page has been isolated
+	 * because VM is using the field.
+	 */
+	if (!PageIsolated(page))
 		list_del(&page->lru);
-	}
 }
 
 /*
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index e6b41f42602b..3db25df396cb 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -159,6 +159,7 @@ struct bcma_host_ops {
 #define BCMA_CORE_DEFAULT		0xFFF
 
 #define BCMA_MAX_NR_CORES		16
+#define BCMA_CORE_SIZE			0x1000
 
 /* Chip IDs of PCIe devices */
 #define BCMA_CHIP_ID_BCM4313	0x4313
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index a5ac2cad5cb7..b20e3d56253f 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -504,6 +504,9 @@
 #define BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_MASK	0x1ff00000
 #define BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_SHIFT	20
 
+#define BCMA_CCB_MII_MNG_CTL		0x0000
+#define BCMA_CCB_MII_MNG_CMD_DATA	0x0004
+
 /* BCM4331 ChipControl numbers. */
 #define BCMA_CHIPCTL_4331_BT_COEXIST		BIT(0)	/* 0 disable */
 #define BCMA_CHIPCTL_4331_SECI			BIT(1)	/* 0 SECI is disabled (JATG functional) */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9faebf7f9a33..583c10810e32 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -41,44 +41,9 @@
 #endif
 
 #define BIO_MAX_PAGES		256
-#define BIO_MAX_SIZE		(BIO_MAX_PAGES << PAGE_SHIFT)
-#define BIO_MAX_SECTORS		(BIO_MAX_SIZE >> 9)
 
-/*
- * upper 16 bits of bi_rw define the io priority of this bio
- */
-#define BIO_PRIO_SHIFT	(8 * sizeof(unsigned long) - IOPRIO_BITS)
-#define bio_prio(bio)	((bio)->bi_rw >> BIO_PRIO_SHIFT)
-#define bio_prio_valid(bio)	ioprio_valid(bio_prio(bio))
-
-#define bio_set_prio(bio, prio)		do {			\
-	WARN_ON(prio >= (1 << IOPRIO_BITS));			\
-	(bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1);		\
-	(bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT);	\
-} while (0)
-
-/*
- * various member access, note that bio_data should of course not be used
- * on highmem page vectors
- */
-#define __bvec_iter_bvec(bvec, iter)	(&(bvec)[(iter).bi_idx])
-
-#define bvec_iter_page(bvec, iter)				\
-	(__bvec_iter_bvec((bvec), (iter))->bv_page)
-
-#define bvec_iter_len(bvec, iter)				\
-	min((iter).bi_size,					\
-	    __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
-
-#define bvec_iter_offset(bvec, iter)				\
-	(__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
-
-#define bvec_iter_bvec(bvec, iter)				\
-((struct bio_vec) {						\
-	.bv_page	= bvec_iter_page((bvec), (iter)),	\
-	.bv_len		= bvec_iter_len((bvec), (iter)),	\
-	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
-})
+#define bio_prio(bio)			(bio)->bi_ioprio
+#define bio_set_prio(bio, prio)		((bio)->bi_ioprio = prio)
 
 #define bio_iter_iovec(bio, iter)				\
 	bvec_iter_bvec((bio)->bi_io_vec, (iter))
@@ -106,18 +71,23 @@ static inline bool bio_has_data(struct bio *bio)
 {
 	if (bio &&
 	    bio->bi_iter.bi_size &&
-	    !(bio->bi_rw & REQ_DISCARD))
+	    bio_op(bio) != REQ_OP_DISCARD)
 		return true;
 
 	return false;
 }
 
+static inline bool bio_no_advance_iter(struct bio *bio)
+{
+	return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_WRITE_SAME;
+}
+
 static inline bool bio_is_rw(struct bio *bio)
 {
 	if (!bio_has_data(bio))
 		return false;
 
-	if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
+	if (bio_no_advance_iter(bio))
 		return false;
 
 	return true;
@@ -193,39 +163,12 @@ static inline void *bio_data(struct bio *bio)
 #define bio_for_each_segment_all(bvl, bio, i)				\
 	for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
 
-static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
-				     unsigned bytes)
-{
-	WARN_ONCE(bytes > iter->bi_size,
-		  "Attempted to advance past end of bvec iter\n");
-
-	while (bytes) {
-		unsigned len = min(bytes, bvec_iter_len(bv, *iter));
-
-		bytes -= len;
-		iter->bi_size -= len;
-		iter->bi_bvec_done += len;
-
-		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
-			iter->bi_bvec_done = 0;
-			iter->bi_idx++;
-		}
-	}
-}
-
-#define for_each_bvec(bvl, bio_vec, iter, start)			\
-	for (iter = (start);						\
-	     (iter).bi_size &&						\
-		((bvl = bvec_iter_bvec((bio_vec), (iter))), 1);	\
-	     bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
-
-
 static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 				    unsigned bytes)
 {
 	iter->bi_sector += bytes >> 9;
 
-	if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
+	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
 	else
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
@@ -253,10 +196,10 @@ static inline unsigned bio_segments(struct bio *bio)
 	 * differently:
 	 */
 
-	if (bio->bi_rw & REQ_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD)
 		return 1;
 
-	if (bio->bi_rw & REQ_WRITE_SAME)
+	if (bio_op(bio) == REQ_OP_WRITE_SAME)
 		return 1;
 
 	bio_for_each_segment(bv, bio, iter)
@@ -473,7 +416,7 @@ static inline void bio_io_error(struct bio *bio)
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
-extern int submit_bio_wait(int rw, struct bio *bio);
+extern int submit_bio_wait(struct bio *bio);
 extern void bio_advance(struct bio *, unsigned);
 
 extern void bio_init(struct bio *);
@@ -720,8 +663,6 @@ static inline void bio_inc_remaining(struct bio *bio)
  * and the bvec_slabs[].
  */
 #define BIO_POOL_SIZE 2
-#define BIOVEC_NR_POOLS 6
-#define BIOVEC_MAX_IDX	(BIOVEC_NR_POOLS - 1)
 
 struct bio_set {
 	struct kmem_cache *bio_slab;
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index e9b0b9ab07e5..27bfc0b631a9 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -267,6 +267,10 @@ static inline int bitmap_equal(const unsigned long *src1,
 {
 	if (small_const_nbits(nbits))
 		return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
+#ifdef CONFIG_S390
+	else if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0)
+		return !memcmp(src1, src2, nbits / 8);
+#endif
 	else
 		return __bitmap_equal(src1, src2, nbits);
 }
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index c02e669945e9..f77150a4a96a 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -590,25 +590,26 @@ static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
 /**
  * blkg_rwstat_add - add a value to a blkg_rwstat
  * @rwstat: target blkg_rwstat
- * @rw: mask of REQ_{WRITE|SYNC}
+ * @op: REQ_OP
+ * @op_flags: rq_flag_bits
  * @val: value to add
  *
  * Add @val to @rwstat.  The counters are chosen according to @rw.  The
  * caller is responsible for synchronizing calls to this function.
  */
 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
-				   int rw, uint64_t val)
+				   int op, int op_flags, uint64_t val)
 {
 	struct percpu_counter *cnt;
 
-	if (rw & REQ_WRITE)
+	if (op_is_write(op))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
 
 	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
 
-	if (rw & REQ_SYNC)
+	if (op_flags & REQ_SYNC)
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
@@ -713,9 +714,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 
 	if (!throtl) {
 		blkg = blkg ?: q->root_blkg;
-		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_rw,
+		blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_rw,
 				bio->bi_iter.bi_size);
-		blkg_rwstat_add(&blkg->stat_ios, bio->bi_rw, 1);
+		blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_rw, 1);
 	}
 
 	rcu_read_unlock();
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2498fdf3a503..e43bbffb5b7a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -96,6 +96,7 @@ typedef int (init_request_fn)(void *, struct request *, unsigned int,
 		unsigned int, unsigned int);
 typedef void (exit_request_fn)(void *, struct request *, unsigned int,
 		unsigned int);
+typedef int (reinit_request_fn)(void *, struct request *);
 
 typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
 		bool);
@@ -145,6 +146,7 @@ struct blk_mq_ops {
 	 */
 	init_request_fn		*init_request;
 	exit_request_fn		*exit_request;
+	reinit_request_fn	*reinit_request;
 };
 
 enum {
@@ -196,6 +198,8 @@ enum {
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		unsigned int flags);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
+		unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
 
@@ -243,6 +247,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
+int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 77e5d81f07aa..f254eb264924 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -6,6 +6,7 @@
 #define __LINUX_BLK_TYPES_H
 
 #include <linux/types.h>
+#include <linux/bvec.h>
 
 struct bio_set;
 struct bio;
@@ -17,28 +18,7 @@ struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 typedef void (bio_destructor_t) (struct bio *);
 
-/*
- * was unsigned short, but we might as well be ready for > 64kB I/O pages
- */
-struct bio_vec {
-	struct page	*bv_page;
-	unsigned int	bv_len;
-	unsigned int	bv_offset;
-};
-
 #ifdef CONFIG_BLOCK
-
-struct bvec_iter {
-	sector_t		bi_sector;	/* device address in 512 byte
-						   sectors */
-	unsigned int		bi_size;	/* residual I/O count */
-
-	unsigned int		bi_idx;		/* current index into bvl_vec */
-
-	unsigned int            bi_bvec_done;	/* number of bytes completed in
-						   current bvec */
-};
-
 /*
  * main unit of I/O for the block layer and lower layers (ie drivers and
  * stacking drivers)
@@ -46,11 +26,12 @@ struct bvec_iter {
 struct bio {
 	struct bio		*bi_next;	/* request queue link */
 	struct block_device	*bi_bdev;
-	unsigned int		bi_flags;	/* status, command, etc */
 	int			bi_error;
-	unsigned long		bi_rw;		/* bottom bits READ/WRITE,
-						 * top bits priority
+	unsigned int		bi_rw;		/* bottom bits req flags,
+						 * top bits REQ_OP
 						 */
+	unsigned short		bi_flags;	/* status, command, etc */
+	unsigned short		bi_ioprio;
 
 	struct bvec_iter	bi_iter;
 
@@ -107,6 +88,16 @@ struct bio {
 	struct bio_vec		bi_inline_vecs[0];
 };
 
+#define BIO_OP_SHIFT	(8 * sizeof(unsigned int) - REQ_OP_BITS)
+#define bio_op(bio)	((bio)->bi_rw >> BIO_OP_SHIFT)
+
+#define bio_set_op_attrs(bio, op, op_flags) do {		\
+	WARN_ON(op >= (1 << REQ_OP_BITS));			\
+	(bio)->bi_rw &= ((1 << BIO_OP_SHIFT) - 1);		\
+	(bio)->bi_rw |= ((unsigned int) (op) << BIO_OP_SHIFT);	\
+	(bio)->bi_rw |= op_flags;				\
+} while (0)
+
 #define BIO_RESET_BYTES		offsetof(struct bio, bi_max_vecs)
 
 /*
@@ -123,19 +114,25 @@ struct bio {
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
- * BIO_POOL_IDX()
+ * BVEC_POOL_IDX()
+ */
+#define BIO_RESET_BITS	10
+
+/*
+ * We support 6 different bvec pools, the last one is magic in that it
+ * is backed by a mempool.
  */
-#define BIO_RESET_BITS	13
-#define BIO_OWNS_VEC	13	/* bio_free() should free bvec */
+#define BVEC_POOL_NR		6
+#define BVEC_POOL_MAX		(BVEC_POOL_NR - 1)
 
 /*
- * top 4 bits of bio flags indicate the pool this bio came from
+ * Top 4 bits of bio flags indicate the pool the bvecs came from.  We add
+ * 1 to the actual index so that 0 indicates that there are no bvecs to be
+ * freed.
  */
-#define BIO_POOL_BITS		(4)
-#define BIO_POOL_NONE		((1UL << BIO_POOL_BITS) - 1)
-#define BIO_POOL_OFFSET		(32 - BIO_POOL_BITS)
-#define BIO_POOL_MASK		(1UL << BIO_POOL_OFFSET)
-#define BIO_POOL_IDX(bio)	((bio)->bi_flags >> BIO_POOL_OFFSET)
+#define BVEC_POOL_BITS		(4)
+#define BVEC_POOL_OFFSET	(16 - BVEC_POOL_BITS)
+#define BVEC_POOL_IDX(bio)	((bio)->bi_flags >> BVEC_POOL_OFFSET)
 
 #endif /* CONFIG_BLOCK */
 
@@ -145,7 +142,6 @@ struct bio {
  */
 enum rq_flag_bits {
 	/* common flags */
-	__REQ_WRITE,		/* not set, read. set, write */
 	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
 	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
@@ -153,14 +149,11 @@ enum rq_flag_bits {
 	__REQ_SYNC,		/* request is sync (sync write or read) */
 	__REQ_META,		/* metadata io request */
 	__REQ_PRIO,		/* boost priority in cfq */
-	__REQ_DISCARD,		/* request to discard sectors */
-	__REQ_SECURE,		/* secure discard (used with __REQ_DISCARD) */
-	__REQ_WRITE_SAME,	/* write same block many times */
 
 	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
-	__REQ_FLUSH,		/* request for cache flush */
+	__REQ_PREFLUSH,		/* request for cache flush */
 
 	/* bio only flags */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
@@ -191,31 +184,25 @@ enum rq_flag_bits {
 	__REQ_NR_BITS,		/* stops here */
 };
 
-#define REQ_WRITE		(1ULL << __REQ_WRITE)
 #define REQ_FAILFAST_DEV	(1ULL << __REQ_FAILFAST_DEV)
 #define REQ_FAILFAST_TRANSPORT	(1ULL << __REQ_FAILFAST_TRANSPORT)
 #define REQ_FAILFAST_DRIVER	(1ULL << __REQ_FAILFAST_DRIVER)
 #define REQ_SYNC		(1ULL << __REQ_SYNC)
 #define REQ_META		(1ULL << __REQ_META)
 #define REQ_PRIO		(1ULL << __REQ_PRIO)
-#define REQ_DISCARD		(1ULL << __REQ_DISCARD)
-#define REQ_WRITE_SAME		(1ULL << __REQ_WRITE_SAME)
 #define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
 #define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
-	(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
-	 REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
-	 REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE)
+	(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
+	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
-#define BIO_NO_ADVANCE_ITER_MASK	(REQ_DISCARD|REQ_WRITE_SAME)
-
 /* This mask is used for both bio and request merge checking */
 #define REQ_NOMERGE_FLAGS \
-	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ)
+	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ)
 
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
 #define REQ_THROTTLED		(1ULL << __REQ_THROTTLED)
@@ -233,15 +220,25 @@ enum rq_flag_bits {
 #define REQ_PREEMPT		(1ULL << __REQ_PREEMPT)
 #define REQ_ALLOCED		(1ULL << __REQ_ALLOCED)
 #define REQ_COPY_USER		(1ULL << __REQ_COPY_USER)
-#define REQ_FLUSH		(1ULL << __REQ_FLUSH)
+#define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
 #define REQ_FLUSH_SEQ		(1ULL << __REQ_FLUSH_SEQ)
 #define REQ_IO_STAT		(1ULL << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1ULL << __REQ_MIXED_MERGE)
-#define REQ_SECURE		(1ULL << __REQ_SECURE)
 #define REQ_PM			(1ULL << __REQ_PM)
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 
+enum req_op {
+	REQ_OP_READ,
+	REQ_OP_WRITE,
+	REQ_OP_DISCARD,		/* request to discard sectors */
+	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
+	REQ_OP_WRITE_SAME,	/* write same block many times */
+	REQ_OP_FLUSH,		/* request for cache flush */
+};
+
+#define REQ_OP_BITS 3
+
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE	-1U
 #define BLK_QC_T_SHIFT	16
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3d9cf326574f..c96db9c22d10 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -90,18 +90,17 @@ struct request {
 	struct list_head queuelist;
 	union {
 		struct call_single_data csd;
-		unsigned long fifo_time;
+		u64 fifo_time;
 	};
 
 	struct request_queue *q;
 	struct blk_mq_ctx *mq_ctx;
 
-	u64 cmd_flags;
+	int cpu;
 	unsigned cmd_type;
+	u64 cmd_flags;
 	unsigned long atomic_flags;
 
-	int cpu;
-
 	/* the following two fields are internal, NEVER access directly */
 	unsigned int __data_len;	/* total data len */
 	sector_t __sector;		/* sector cursor */
@@ -200,6 +199,20 @@ struct request {
 	struct request *next_rq;
 };
 
+#define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS)
+#define req_op(req)  ((req)->cmd_flags >> REQ_OP_SHIFT)
+
+#define req_set_op(req, op) do {				\
+	WARN_ON(op >= (1 << REQ_OP_BITS));			\
+	(req)->cmd_flags &= ((1ULL << REQ_OP_SHIFT) - 1);	\
+	(req)->cmd_flags |= ((u64) (op) << REQ_OP_SHIFT);	\
+} while (0)
+
+#define req_set_op_attrs(req, op, flags) do {	\
+	req_set_op(req, op);			\
+	(req)->cmd_flags |= flags;		\
+} while (0)
+
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
@@ -483,7 +496,7 @@ struct request_queue {
 #define QUEUE_FLAG_DISCARD     14	/* supports DISCARD */
 #define QUEUE_FLAG_NOXMERGES   15	/* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
-#define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
+#define QUEUE_FLAG_SECERASE    17	/* supports secure erase */
 #define QUEUE_FLAG_SAME_FORCE  18	/* force complete on same CPU */
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
@@ -492,6 +505,7 @@ struct request_queue {
 #define QUEUE_FLAG_WC	       23	/* Write back caching */
 #define QUEUE_FLAG_FUA	       24	/* device supports FUA writes */
 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
+#define QUEUE_FLAG_DAX         26	/* device supports DAX */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -579,8 +593,9 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
-#define blk_queue_secdiscard(q)	(blk_queue_discard(q) && \
-	test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
+#define blk_queue_secure_erase(q) \
+	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
+#define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
@@ -597,7 +612,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
-#define rq_data_dir(rq)		((int)((rq)->cmd_flags & 1))
+#define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
 
 /*
  * Driver can handle struct request, if it either has an old style
@@ -616,14 +631,14 @@ static inline unsigned int blk_queue_cluster(struct request_queue *q)
 /*
  * We regard a request as sync, if either a read or a sync write
  */
-static inline bool rw_is_sync(unsigned int rw_flags)
+static inline bool rw_is_sync(int op, unsigned int rw_flags)
 {
-	return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC);
+	return op == REQ_OP_READ || (rw_flags & REQ_SYNC);
 }
 
 static inline bool rq_is_sync(struct request *rq)
 {
-	return rw_is_sync(rq->cmd_flags);
+	return rw_is_sync(req_op(rq), rq->cmd_flags);
 }
 
 static inline bool blk_rl_full(struct request_list *rl, bool sync)
@@ -652,22 +667,10 @@ static inline bool rq_mergeable(struct request *rq)
 	if (rq->cmd_type != REQ_TYPE_FS)
 		return false;
 
-	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
+	if (req_op(rq) == REQ_OP_FLUSH)
 		return false;
 
-	return true;
-}
-
-static inline bool blk_check_merge_flags(unsigned int flags1,
-					 unsigned int flags2)
-{
-	if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD))
-		return false;
-
-	if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE))
-		return false;
-
-	if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME))
+	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
 		return false;
 
 	return true;
@@ -786,8 +789,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
-extern struct request *blk_make_request(struct request_queue *, struct bio *,
-					gfp_t);
 extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
@@ -800,6 +801,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
+extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_queue_split(struct request_queue *, struct bio **,
 			    struct bio_set *);
@@ -879,12 +881,12 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 }
 
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
-						     unsigned int cmd_flags)
+						     int op)
 {
-	if (unlikely(cmd_flags & REQ_DISCARD))
+	if (unlikely(op == REQ_OP_DISCARD))
 		return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
-	if (unlikely(cmd_flags & REQ_WRITE_SAME))
+	if (unlikely(op == REQ_OP_WRITE_SAME))
 		return q->limits.max_write_same_sectors;
 
 	return q->limits.max_sectors;
@@ -904,18 +906,19 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
 			(offset & (q->limits.chunk_sectors - 1));
 }
 
-static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
+static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
+						  sector_t offset)
 {
 	struct request_queue *q = rq->q;
 
 	if (unlikely(rq->cmd_type != REQ_TYPE_FS))
 		return q->limits.max_hw_sectors;
 
-	if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
-		return blk_queue_get_max_sectors(q, rq->cmd_flags);
+	if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD))
+		return blk_queue_get_max_sectors(q, req_op(rq));
 
-	return min(blk_max_size_offset(q, blk_rq_pos(rq)),
-			blk_queue_get_max_sectors(q, rq->cmd_flags));
+	return min(blk_max_size_offset(q, offset),
+			blk_queue_get_max_sectors(q, req_op(rq)));
 }
 
 static inline unsigned int blk_rq_count_bios(struct request *rq)
@@ -1135,13 +1138,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 	return bqt->tag_index[tag];
 }
 
-#define BLKDEV_DISCARD_SECURE  0x01    /* secure discard */
+
+#define BLKDEV_DISCARD_SECURE	(1 << 0)	/* issue a secure erase */
+#define BLKDEV_DISCARD_ZERO	(1 << 1)	/* must reliably zero data */
 
 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-		sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop);
+		sector_t nr_sects, gfp_t gfp_mask, int flags,
+		struct bio **biop);
 extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, struct page *page);
 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 0f3172b8b225..cceb72f9e29f 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -118,7 +118,7 @@ static inline int blk_cmd_buf_len(struct request *rq)
 }
 
 extern void blk_dump_cmd(char *buf, struct request *rq);
-extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
+extern void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes);
 
 #endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8ee27b8afe81..11134238417d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -11,14 +11,17 @@
 #include <linux/workqueue.h>
 #include <linux/file.h>
 #include <linux/percpu.h>
+#include <linux/err.h>
 
+struct perf_event;
 struct bpf_map;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
 	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
-	void (*map_free)(struct bpf_map *);
+	void (*map_release)(struct bpf_map *map, struct file *map_file);
+	void (*map_free)(struct bpf_map *map);
 	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
 
 	/* funcs callable from userspace and from eBPF programs */
@@ -27,8 +30,9 @@ struct bpf_map_ops {
 	int (*map_delete_elem)(struct bpf_map *map, void *key);
 
 	/* funcs called by prog_array and perf_event_array map */
-	void *(*map_fd_get_ptr) (struct bpf_map *map, int fd);
-	void (*map_fd_put_ptr) (void *ptr);
+	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
+				int fd);
+	void (*map_fd_put_ptr)(void *ptr);
 };
 
 struct bpf_map {
@@ -111,6 +115,31 @@ enum bpf_access_type {
 	BPF_WRITE = 2
 };
 
+/* types of values stored in eBPF registers */
+enum bpf_reg_type {
+	NOT_INIT = 0,		 /* nothing was written into register */
+	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
+	PTR_TO_CTX,		 /* reg points to bpf_context */
+	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
+	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
+	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
+	FRAME_PTR,		 /* reg == frame_pointer */
+	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
+	CONST_IMM,		 /* constant integer value */
+
+	/* PTR_TO_PACKET represents:
+	 * skb->data
+	 * skb->data + imm
+	 * skb->data + (u16) var
+	 * skb->data + (u16) var + imm
+	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
+	 * if (id > 0) means that some 'var' was added
+	 * if (off > 0) menas that 'imm' was added
+	 */
+	PTR_TO_PACKET,
+	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+};
+
 struct bpf_prog;
 
 struct bpf_verifier_ops {
@@ -120,7 +149,8 @@ struct bpf_verifier_ops {
 	/* return true if 'size' wide access at offset 'off' within bpf_context
 	 * with 'type' (read or write) is allowed
 	 */
-	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
+	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
+				enum bpf_reg_type *reg_type);
 
 	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
 				  int src_reg, int ctx_off,
@@ -163,15 +193,28 @@ struct bpf_array {
 		void __percpu *pptrs[0] __aligned(8);
 	};
 };
+
 #define MAX_TAIL_CALL_CNT 32
 
+struct bpf_event_entry {
+	struct perf_event *event;
+	struct file *perf_file;
+	struct file *map_file;
+	struct rcu_head rcu;
+};
+
 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
-void bpf_fd_array_map_clear(struct bpf_map *map);
+
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
-const struct bpf_func_proto *bpf_get_event_output_proto(void);
+
+typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
+					unsigned long off, unsigned long len);
+
+u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
 
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
@@ -180,9 +223,10 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
+struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i);
 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
-void bpf_prog_put_rcu(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
@@ -205,8 +249,13 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
 			   u64 flags);
 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 			    u64 flags);
+
 int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 
+int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
+				 void *key, void *value, u64 map_flags);
+void bpf_fd_array_map_clear(struct bpf_map *map);
+
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
  * forced to use 'long' read/writes to try to atomically copy long counters.
  * Best-effort only.  No barriers here, since it _will_ race with concurrent
@@ -235,6 +284,16 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
+						 enum bpf_prog_type type)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index d48daa3f6f20..ebbacd14d450 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -187,12 +187,13 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
 void unlock_buffer(struct buffer_head *bh);
 void __lock_buffer(struct buffer_head *bh);
-void ll_rw_block(int, int, struct buffer_head * bh[]);
+void ll_rw_block(int, int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
-int __sync_dirty_buffer(struct buffer_head *bh, int rw);
-void write_dirty_buffer(struct buffer_head *bh, int rw);
-int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags);
-int submit_bh(int, struct buffer_head *);
+int __sync_dirty_buffer(struct buffer_head *bh, int op_flags);
+void write_dirty_buffer(struct buffer_head *bh, int op_flags);
+int _submit_bh(int op, int op_flags, struct buffer_head *bh,
+	       unsigned long bio_flags);
+int submit_bh(int, int, struct buffer_head *);
 void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
 int bh_uptodate_or_lock(struct buffer_head *bh);
@@ -208,6 +209,9 @@ void block_invalidatepage(struct page *page, unsigned int offset,
 			  unsigned int length);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
+int __block_write_full_page(struct inode *inode, struct page *page,
+			get_block_t *get_block, struct writeback_control *wbc,
+			bh_end_io_t *handler);
 int block_read_full_page(struct page*, get_block_t*);
 int block_is_partially_uptodate(struct page *page, unsigned long from,
 				unsigned long count);
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
new file mode 100644
index 000000000000..701b64a3b7c5
--- /dev/null
+++ b/include/linux/bvec.h
@@ -0,0 +1,96 @@
+/*
+ * bvec iterator
+ *
+ * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+#ifndef __LINUX_BVEC_ITER_H
+#define __LINUX_BVEC_ITER_H
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+
+/*
+ * was unsigned short, but we might as well be ready for > 64kB I/O pages
+ */
+struct bio_vec {
+	struct page	*bv_page;
+	unsigned int	bv_len;
+	unsigned int	bv_offset;
+};
+
+struct bvec_iter {
+	sector_t		bi_sector;	/* device address in 512 byte
+						   sectors */
+	unsigned int		bi_size;	/* residual I/O count */
+
+	unsigned int		bi_idx;		/* current index into bvl_vec */
+
+	unsigned int            bi_bvec_done;	/* number of bytes completed in
+						   current bvec */
+};
+
+/*
+ * various member access, note that bio_data should of course not be used
+ * on highmem page vectors
+ */
+#define __bvec_iter_bvec(bvec, iter)	(&(bvec)[(iter).bi_idx])
+
+#define bvec_iter_page(bvec, iter)				\
+	(__bvec_iter_bvec((bvec), (iter))->bv_page)
+
+#define bvec_iter_len(bvec, iter)				\
+	min((iter).bi_size,					\
+	    __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
+
+#define bvec_iter_offset(bvec, iter)				\
+	(__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
+
+#define bvec_iter_bvec(bvec, iter)				\
+((struct bio_vec) {						\
+	.bv_page	= bvec_iter_page((bvec), (iter)),	\
+	.bv_len		= bvec_iter_len((bvec), (iter)),	\
+	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
+})
+
+static inline void bvec_iter_advance(const struct bio_vec *bv,
+				     struct bvec_iter *iter,
+				     unsigned bytes)
+{
+	WARN_ONCE(bytes > iter->bi_size,
+		  "Attempted to advance past end of bvec iter\n");
+
+	while (bytes) {
+		unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+
+		bytes -= len;
+		iter->bi_size -= len;
+		iter->bi_bvec_done += len;
+
+		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+			iter->bi_bvec_done = 0;
+			iter->bi_idx++;
+		}
+	}
+}
+
+#define for_each_bvec(bvl, bio_vec, iter, start)			\
+	for (iter = (start);						\
+	     (iter).bi_size &&						\
+		((bvl = bvec_iter_bvec((bio_vec), (iter))), 1);	\
+	     bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+
+#endif /* __LINUX_BVEC_ITER_H */
diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h
new file mode 100644
index 000000000000..82c3d3b7269d
--- /dev/null
+++ b/include/linux/cec-funcs.h
@@ -0,0 +1,1899 @@
+/*
+ * cec - HDMI Consumer Electronics Control message functions
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * Alternatively you can redistribute this file under the terms of the
+ * BSD license as stated below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. The names of its contributors may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Note: this framework is still in staging and it is likely the API
+ * will change before it goes out of staging.
+ *
+ * Once it is moved out of staging this header will move to uapi.
+ */
+#ifndef _CEC_UAPI_FUNCS_H
+#define _CEC_UAPI_FUNCS_H
+
+#include <linux/cec.h>
+
+/* One Touch Play Feature */
+static inline void cec_msg_active_source(struct cec_msg *msg, __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_ACTIVE_SOURCE;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+}
+
+static inline void cec_ops_active_source(const struct cec_msg *msg,
+					 __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_image_view_on(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_IMAGE_VIEW_ON;
+}
+
+static inline void cec_msg_text_view_on(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_TEXT_VIEW_ON;
+}
+
+
+/* Routing Control Feature */
+static inline void cec_msg_inactive_source(struct cec_msg *msg,
+					   __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[1] = CEC_MSG_INACTIVE_SOURCE;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+}
+
+static inline void cec_ops_inactive_source(const struct cec_msg *msg,
+					   __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_request_active_source(struct cec_msg *msg,
+						 bool reply)
+{
+	msg->len = 2;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REQUEST_ACTIVE_SOURCE;
+	msg->reply = reply ? CEC_MSG_ACTIVE_SOURCE : 0;
+}
+
+static inline void cec_msg_routing_information(struct cec_msg *msg,
+					       __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_ROUTING_INFORMATION;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+}
+
+static inline void cec_ops_routing_information(const struct cec_msg *msg,
+					       __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_routing_change(struct cec_msg *msg,
+					  bool reply,
+					  __u16 orig_phys_addr,
+					  __u16 new_phys_addr)
+{
+	msg->len = 6;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_ROUTING_CHANGE;
+	msg->msg[2] = orig_phys_addr >> 8;
+	msg->msg[3] = orig_phys_addr & 0xff;
+	msg->msg[4] = new_phys_addr >> 8;
+	msg->msg[5] = new_phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_ROUTING_INFORMATION : 0;
+}
+
+static inline void cec_ops_routing_change(const struct cec_msg *msg,
+					  __u16 *orig_phys_addr,
+					  __u16 *new_phys_addr)
+{
+	*orig_phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*new_phys_addr = (msg->msg[4] << 8) | msg->msg[5];
+}
+
+static inline void cec_msg_set_stream_path(struct cec_msg *msg, __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_SET_STREAM_PATH;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+}
+
+static inline void cec_ops_set_stream_path(const struct cec_msg *msg,
+					   __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+
+/* Standby Feature */
+static inline void cec_msg_standby(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_STANDBY;
+}
+
+
+/* One Touch Record Feature */
+static inline void cec_msg_record_off(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_RECORD_OFF;
+}
+
+struct cec_op_arib_data {
+	__u16 transport_id;
+	__u16 service_id;
+	__u16 orig_network_id;
+};
+
+struct cec_op_atsc_data {
+	__u16 transport_id;
+	__u16 program_number;
+};
+
+struct cec_op_dvb_data {
+	__u16 transport_id;
+	__u16 service_id;
+	__u16 orig_network_id;
+};
+
+struct cec_op_channel_data {
+	__u8 channel_number_fmt;
+	__u16 major;
+	__u16 minor;
+};
+
+struct cec_op_digital_service_id {
+	__u8 service_id_method;
+	__u8 dig_bcast_system;
+	union {
+		struct cec_op_arib_data arib;
+		struct cec_op_atsc_data atsc;
+		struct cec_op_dvb_data dvb;
+		struct cec_op_channel_data channel;
+	};
+};
+
+struct cec_op_record_src {
+	__u8 type;
+	union {
+		struct cec_op_digital_service_id digital;
+		struct {
+			__u8 ana_bcast_type;
+			__u16 ana_freq;
+			__u8 bcast_system;
+		} analog;
+		struct {
+			__u8 plug;
+		} ext_plug;
+		struct {
+			__u16 phys_addr;
+		} ext_phys_addr;
+	};
+};
+
+static inline void cec_set_digital_service_id(__u8 *msg,
+	      const struct cec_op_digital_service_id *digital)
+{
+	*msg++ = (digital->service_id_method << 7) | digital->dig_bcast_system;
+	if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
+		*msg++ = (digital->channel.channel_number_fmt << 2) |
+			 (digital->channel.major >> 8);
+		*msg++ = digital->channel.major && 0xff;
+		*msg++ = digital->channel.minor >> 8;
+		*msg++ = digital->channel.minor & 0xff;
+		*msg++ = 0;
+		*msg++ = 0;
+		return;
+	}
+	switch (digital->dig_bcast_system) {
+	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_GEN:
+	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_CABLE:
+	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_SAT:
+	case CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_T:
+		*msg++ = digital->atsc.transport_id >> 8;
+		*msg++ = digital->atsc.transport_id & 0xff;
+		*msg++ = digital->atsc.program_number >> 8;
+		*msg++ = digital->atsc.program_number & 0xff;
+		*msg++ = 0;
+		*msg++ = 0;
+		break;
+	default:
+		*msg++ = digital->dvb.transport_id >> 8;
+		*msg++ = digital->dvb.transport_id & 0xff;
+		*msg++ = digital->dvb.service_id >> 8;
+		*msg++ = digital->dvb.service_id & 0xff;
+		*msg++ = digital->dvb.orig_network_id >> 8;
+		*msg++ = digital->dvb.orig_network_id & 0xff;
+		break;
+	}
+}
+
+static inline void cec_get_digital_service_id(const __u8 *msg,
+	      struct cec_op_digital_service_id *digital)
+{
+	digital->service_id_method = msg[0] >> 7;
+	digital->dig_bcast_system = msg[0] & 0x7f;
+	if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
+		digital->channel.channel_number_fmt = msg[1] >> 2;
+		digital->channel.major = ((msg[1] & 3) << 6) | msg[2];
+		digital->channel.minor = (msg[3] << 8) | msg[4];
+		return;
+	}
+	digital->dvb.transport_id = (msg[1] << 8) | msg[2];
+	digital->dvb.service_id = (msg[3] << 8) | msg[4];
+	digital->dvb.orig_network_id = (msg[5] << 8) | msg[6];
+}
+
+static inline void cec_msg_record_on_own(struct cec_msg *msg)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_OWN;
+}
+
+static inline void cec_msg_record_on_digital(struct cec_msg *msg,
+			     const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 10;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_DIGITAL;
+	cec_set_digital_service_id(msg->msg + 3, digital);
+}
+
+static inline void cec_msg_record_on_analog(struct cec_msg *msg,
+					    __u8 ana_bcast_type,
+					    __u16 ana_freq,
+					    __u8 bcast_system)
+{
+	msg->len = 7;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_ANALOG;
+	msg->msg[3] = ana_bcast_type;
+	msg->msg[4] = ana_freq >> 8;
+	msg->msg[5] = ana_freq & 0xff;
+	msg->msg[6] = bcast_system;
+}
+
+static inline void cec_msg_record_on_plug(struct cec_msg *msg,
+					  __u8 plug)
+{
+	msg->len = 4;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_EXT_PLUG;
+	msg->msg[3] = plug;
+}
+
+static inline void cec_msg_record_on_phys_addr(struct cec_msg *msg,
+					       __u16 phys_addr)
+{
+	msg->len = 5;
+	msg->msg[1] = CEC_MSG_RECORD_ON;
+	msg->msg[2] = CEC_OP_RECORD_SRC_EXT_PHYS_ADDR;
+	msg->msg[3] = phys_addr >> 8;
+	msg->msg[4] = phys_addr & 0xff;
+}
+
+static inline void cec_msg_record_on(struct cec_msg *msg,
+				     const struct cec_op_record_src *rec_src)
+{
+	switch (rec_src->type) {
+	case CEC_OP_RECORD_SRC_OWN:
+		cec_msg_record_on_own(msg);
+		break;
+	case CEC_OP_RECORD_SRC_DIGITAL:
+		cec_msg_record_on_digital(msg, &rec_src->digital);
+		break;
+	case CEC_OP_RECORD_SRC_ANALOG:
+		cec_msg_record_on_analog(msg,
+					 rec_src->analog.ana_bcast_type,
+					 rec_src->analog.ana_freq,
+					 rec_src->analog.bcast_system);
+		break;
+	case CEC_OP_RECORD_SRC_EXT_PLUG:
+		cec_msg_record_on_plug(msg, rec_src->ext_plug.plug);
+		break;
+	case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR:
+		cec_msg_record_on_phys_addr(msg,
+					    rec_src->ext_phys_addr.phys_addr);
+		break;
+	}
+}
+
+static inline void cec_ops_record_on(const struct cec_msg *msg,
+				     struct cec_op_record_src *rec_src)
+{
+	rec_src->type = msg->msg[2];
+	switch (rec_src->type) {
+	case CEC_OP_RECORD_SRC_OWN:
+		break;
+	case CEC_OP_RECORD_SRC_DIGITAL:
+		cec_get_digital_service_id(msg->msg + 3, &rec_src->digital);
+		break;
+	case CEC_OP_RECORD_SRC_ANALOG:
+		rec_src->analog.ana_bcast_type = msg->msg[3];
+		rec_src->analog.ana_freq =
+			(msg->msg[4] << 8) | msg->msg[5];
+		rec_src->analog.bcast_system = msg->msg[6];
+		break;
+	case CEC_OP_RECORD_SRC_EXT_PLUG:
+		rec_src->ext_plug.plug = msg->msg[3];
+		break;
+	case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR:
+		rec_src->ext_phys_addr.phys_addr =
+			(msg->msg[3] << 8) | msg->msg[4];
+		break;
+	}
+}
+
+static inline void cec_msg_record_status(struct cec_msg *msg, __u8 rec_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_RECORD_STATUS;
+	msg->msg[2] = rec_status;
+}
+
+static inline void cec_ops_record_status(const struct cec_msg *msg,
+					 __u8 *rec_status)
+{
+	*rec_status = msg->msg[2];
+}
+
+static inline void cec_msg_record_tv_screen(struct cec_msg *msg,
+					    bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_RECORD_TV_SCREEN;
+	msg->reply = reply ? CEC_MSG_RECORD_ON : 0;
+}
+
+
+/* Timer Programming Feature */
+static inline void cec_msg_timer_status(struct cec_msg *msg,
+					__u8 timer_overlap_warning,
+					__u8 media_info,
+					__u8 prog_info,
+					__u8 prog_error,
+					__u8 duration_hr,
+					__u8 duration_min)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_TIMER_STATUS;
+	msg->msg[2] = (timer_overlap_warning << 7) |
+		(media_info << 5) |
+		(prog_info ? 0x10 : 0) |
+		(prog_info ? prog_info : prog_error);
+	if (prog_info == CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE ||
+	    prog_info == CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE ||
+	    prog_error == CEC_OP_PROG_ERROR_DUPLICATE) {
+		msg->len += 2;
+		msg->msg[3] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+		msg->msg[4] = ((duration_min / 10) << 4) | (duration_min % 10);
+	}
+}
+
+static inline void cec_ops_timer_status(const struct cec_msg *msg,
+					__u8 *timer_overlap_warning,
+					__u8 *media_info,
+					__u8 *prog_info,
+					__u8 *prog_error,
+					__u8 *duration_hr,
+					__u8 *duration_min)
+{
+	*timer_overlap_warning = msg->msg[2] >> 7;
+	*media_info = (msg->msg[2] >> 5) & 3;
+	if (msg->msg[2] & 0x10) {
+		*prog_info = msg->msg[2] & 0xf;
+		*prog_error = 0;
+	} else {
+		*prog_info = 0;
+		*prog_error = msg->msg[2] & 0xf;
+	}
+	if (*prog_info == CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE ||
+	    *prog_info == CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE ||
+	    *prog_error == CEC_OP_PROG_ERROR_DUPLICATE) {
+		*duration_hr = (msg->msg[3] >> 4) * 10 + (msg->msg[3] & 0xf);
+		*duration_min = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	} else {
+		*duration_hr = *duration_min = 0;
+	}
+}
+
+static inline void cec_msg_timer_cleared_status(struct cec_msg *msg,
+						__u8 timer_cleared_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_TIMER_CLEARED_STATUS;
+	msg->msg[2] = timer_cleared_status;
+}
+
+static inline void cec_ops_timer_cleared_status(const struct cec_msg *msg,
+						__u8 *timer_cleared_status)
+{
+	*timer_cleared_status = msg->msg[2];
+}
+
+static inline void cec_msg_clear_analogue_timer(struct cec_msg *msg,
+						bool reply,
+						__u8 day,
+						__u8 month,
+						__u8 start_hr,
+						__u8 start_min,
+						__u8 duration_hr,
+						__u8 duration_min,
+						__u8 recording_seq,
+						__u8 ana_bcast_type,
+						__u16 ana_freq,
+						__u8 bcast_system)
+{
+	msg->len = 13;
+	msg->msg[1] = CEC_MSG_CLEAR_ANALOGUE_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	msg->msg[9] = ana_bcast_type;
+	msg->msg[10] = ana_freq >> 8;
+	msg->msg[11] = ana_freq & 0xff;
+	msg->msg[12] = bcast_system;
+	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
+}
+
+static inline void cec_ops_clear_analogue_timer(const struct cec_msg *msg,
+						__u8 *day,
+						__u8 *month,
+						__u8 *start_hr,
+						__u8 *start_min,
+						__u8 *duration_hr,
+						__u8 *duration_min,
+						__u8 *recording_seq,
+						__u8 *ana_bcast_type,
+						__u16 *ana_freq,
+						__u8 *bcast_system)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	*ana_bcast_type = msg->msg[9];
+	*ana_freq = (msg->msg[10] << 8) | msg->msg[11];
+	*bcast_system = msg->msg[12];
+}
+
+static inline void cec_msg_clear_digital_timer(struct cec_msg *msg,
+				bool reply,
+				__u8 day,
+				__u8 month,
+				__u8 start_hr,
+				__u8 start_min,
+				__u8 duration_hr,
+				__u8 duration_min,
+				__u8 recording_seq,
+				const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 16;
+	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
+	msg->msg[1] = CEC_MSG_CLEAR_DIGITAL_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	cec_set_digital_service_id(msg->msg + 9, digital);
+}
+
+static inline void cec_ops_clear_digital_timer(const struct cec_msg *msg,
+				__u8 *day,
+				__u8 *month,
+				__u8 *start_hr,
+				__u8 *start_min,
+				__u8 *duration_hr,
+				__u8 *duration_min,
+				__u8 *recording_seq,
+				struct cec_op_digital_service_id *digital)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	cec_get_digital_service_id(msg->msg + 9, digital);
+}
+
+static inline void cec_msg_clear_ext_timer(struct cec_msg *msg,
+					   bool reply,
+					   __u8 day,
+					   __u8 month,
+					   __u8 start_hr,
+					   __u8 start_min,
+					   __u8 duration_hr,
+					   __u8 duration_min,
+					   __u8 recording_seq,
+					   __u8 ext_src_spec,
+					   __u8 plug,
+					   __u16 phys_addr)
+{
+	msg->len = 13;
+	msg->msg[1] = CEC_MSG_CLEAR_EXT_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	msg->msg[9] = ext_src_spec;
+	msg->msg[10] = plug;
+	msg->msg[11] = phys_addr >> 8;
+	msg->msg[12] = phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_TIMER_CLEARED_STATUS : 0;
+}
+
+static inline void cec_ops_clear_ext_timer(const struct cec_msg *msg,
+					   __u8 *day,
+					   __u8 *month,
+					   __u8 *start_hr,
+					   __u8 *start_min,
+					   __u8 *duration_hr,
+					   __u8 *duration_min,
+					   __u8 *recording_seq,
+					   __u8 *ext_src_spec,
+					   __u8 *plug,
+					   __u16 *phys_addr)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	*ext_src_spec = msg->msg[9];
+	*plug = msg->msg[10];
+	*phys_addr = (msg->msg[11] << 8) | msg->msg[12];
+}
+
+static inline void cec_msg_set_analogue_timer(struct cec_msg *msg,
+					      bool reply,
+					      __u8 day,
+					      __u8 month,
+					      __u8 start_hr,
+					      __u8 start_min,
+					      __u8 duration_hr,
+					      __u8 duration_min,
+					      __u8 recording_seq,
+					      __u8 ana_bcast_type,
+					      __u16 ana_freq,
+					      __u8 bcast_system)
+{
+	msg->len = 13;
+	msg->msg[1] = CEC_MSG_SET_ANALOGUE_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	msg->msg[9] = ana_bcast_type;
+	msg->msg[10] = ana_freq >> 8;
+	msg->msg[11] = ana_freq & 0xff;
+	msg->msg[12] = bcast_system;
+	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
+}
+
+static inline void cec_ops_set_analogue_timer(const struct cec_msg *msg,
+					      __u8 *day,
+					      __u8 *month,
+					      __u8 *start_hr,
+					      __u8 *start_min,
+					      __u8 *duration_hr,
+					      __u8 *duration_min,
+					      __u8 *recording_seq,
+					      __u8 *ana_bcast_type,
+					      __u16 *ana_freq,
+					      __u8 *bcast_system)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	*ana_bcast_type = msg->msg[9];
+	*ana_freq = (msg->msg[10] << 8) | msg->msg[11];
+	*bcast_system = msg->msg[12];
+}
+
+static inline void cec_msg_set_digital_timer(struct cec_msg *msg,
+			bool reply,
+			__u8 day,
+			__u8 month,
+			__u8 start_hr,
+			__u8 start_min,
+			__u8 duration_hr,
+			__u8 duration_min,
+			__u8 recording_seq,
+			const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 16;
+	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
+	msg->msg[1] = CEC_MSG_SET_DIGITAL_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	cec_set_digital_service_id(msg->msg + 9, digital);
+}
+
+static inline void cec_ops_set_digital_timer(const struct cec_msg *msg,
+			__u8 *day,
+			__u8 *month,
+			__u8 *start_hr,
+			__u8 *start_min,
+			__u8 *duration_hr,
+			__u8 *duration_min,
+			__u8 *recording_seq,
+			struct cec_op_digital_service_id *digital)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	cec_get_digital_service_id(msg->msg + 9, digital);
+}
+
+static inline void cec_msg_set_ext_timer(struct cec_msg *msg,
+					 bool reply,
+					 __u8 day,
+					 __u8 month,
+					 __u8 start_hr,
+					 __u8 start_min,
+					 __u8 duration_hr,
+					 __u8 duration_min,
+					 __u8 recording_seq,
+					 __u8 ext_src_spec,
+					 __u8 plug,
+					 __u16 phys_addr)
+{
+	msg->len = 13;
+	msg->msg[1] = CEC_MSG_SET_EXT_TIMER;
+	msg->msg[2] = day;
+	msg->msg[3] = month;
+	/* Hours and minutes are in BCD format */
+	msg->msg[4] = ((start_hr / 10) << 4) | (start_hr % 10);
+	msg->msg[5] = ((start_min / 10) << 4) | (start_min % 10);
+	msg->msg[6] = ((duration_hr / 10) << 4) | (duration_hr % 10);
+	msg->msg[7] = ((duration_min / 10) << 4) | (duration_min % 10);
+	msg->msg[8] = recording_seq;
+	msg->msg[9] = ext_src_spec;
+	msg->msg[10] = plug;
+	msg->msg[11] = phys_addr >> 8;
+	msg->msg[12] = phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_TIMER_STATUS : 0;
+}
+
+static inline void cec_ops_set_ext_timer(const struct cec_msg *msg,
+					 __u8 *day,
+					 __u8 *month,
+					 __u8 *start_hr,
+					 __u8 *start_min,
+					 __u8 *duration_hr,
+					 __u8 *duration_min,
+					 __u8 *recording_seq,
+					 __u8 *ext_src_spec,
+					 __u8 *plug,
+					 __u16 *phys_addr)
+{
+	*day = msg->msg[2];
+	*month = msg->msg[3];
+	/* Hours and minutes are in BCD format */
+	*start_hr = (msg->msg[4] >> 4) * 10 + (msg->msg[4] & 0xf);
+	*start_min = (msg->msg[5] >> 4) * 10 + (msg->msg[5] & 0xf);
+	*duration_hr = (msg->msg[6] >> 4) * 10 + (msg->msg[6] & 0xf);
+	*duration_min = (msg->msg[7] >> 4) * 10 + (msg->msg[7] & 0xf);
+	*recording_seq = msg->msg[8];
+	*ext_src_spec = msg->msg[9];
+	*plug = msg->msg[10];
+	*phys_addr = (msg->msg[11] << 8) | msg->msg[12];
+}
+
+static inline void cec_msg_set_timer_program_title(struct cec_msg *msg,
+						   const char *prog_title)
+{
+	unsigned int len = strlen(prog_title);
+
+	if (len > 14)
+		len = 14;
+	msg->len = 2 + len;
+	msg->msg[1] = CEC_MSG_SET_TIMER_PROGRAM_TITLE;
+	memcpy(msg->msg + 2, prog_title, len);
+}
+
+static inline void cec_ops_set_timer_program_title(const struct cec_msg *msg,
+						   char *prog_title)
+{
+	unsigned int len = msg->len > 2 ? msg->len - 2 : 0;
+
+	if (len > 14)
+		len = 14;
+	memcpy(prog_title, msg->msg + 2, len);
+	prog_title[len] = '\0';
+}
+
+/* System Information Feature */
+static inline void cec_msg_cec_version(struct cec_msg *msg, __u8 cec_version)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_CEC_VERSION;
+	msg->msg[2] = cec_version;
+}
+
+static inline void cec_ops_cec_version(const struct cec_msg *msg,
+				       __u8 *cec_version)
+{
+	*cec_version = msg->msg[2];
+}
+
+static inline void cec_msg_get_cec_version(struct cec_msg *msg,
+					   bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GET_CEC_VERSION;
+	msg->reply = reply ? CEC_MSG_CEC_VERSION : 0;
+}
+
+static inline void cec_msg_report_physical_addr(struct cec_msg *msg,
+					__u16 phys_addr, __u8 prim_devtype)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REPORT_PHYSICAL_ADDR;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+	msg->msg[4] = prim_devtype;
+}
+
+static inline void cec_ops_report_physical_addr(const struct cec_msg *msg,
+					__u16 *phys_addr, __u8 *prim_devtype)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*prim_devtype = msg->msg[4];
+}
+
+static inline void cec_msg_give_physical_addr(struct cec_msg *msg,
+					      bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_PHYSICAL_ADDR;
+	msg->reply = reply ? CEC_MSG_REPORT_PHYSICAL_ADDR : 0;
+}
+
+static inline void cec_msg_set_menu_language(struct cec_msg *msg,
+					     const char *language)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_SET_MENU_LANGUAGE;
+	memcpy(msg->msg + 2, language, 3);
+}
+
+static inline void cec_ops_set_menu_language(const struct cec_msg *msg,
+					     char *language)
+{
+	memcpy(language, msg->msg + 2, 3);
+	language[3] = '\0';
+}
+
+static inline void cec_msg_get_menu_language(struct cec_msg *msg,
+					     bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GET_MENU_LANGUAGE;
+	msg->reply = reply ? CEC_MSG_SET_MENU_LANGUAGE : 0;
+}
+
+/*
+ * Assumes a single RC Profile byte and a single Device Features byte,
+ * i.e. no extended features are supported by this helper function.
+ *
+ * As of CEC 2.0 no extended features are defined, should those be added
+ * in the future, then this function needs to be adapted or a new function
+ * should be added.
+ */
+static inline void cec_msg_report_features(struct cec_msg *msg,
+				__u8 cec_version, __u8 all_device_types,
+				__u8 rc_profile, __u8 dev_features)
+{
+	msg->len = 6;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REPORT_FEATURES;
+	msg->msg[2] = cec_version;
+	msg->msg[3] = all_device_types;
+	msg->msg[4] = rc_profile;
+	msg->msg[5] = dev_features;
+}
+
+static inline void cec_ops_report_features(const struct cec_msg *msg,
+			__u8 *cec_version, __u8 *all_device_types,
+			const __u8 **rc_profile, const __u8 **dev_features)
+{
+	const __u8 *p = &msg->msg[4];
+
+	*cec_version = msg->msg[2];
+	*all_device_types = msg->msg[3];
+	*rc_profile = p;
+	while (p < &msg->msg[14] && (*p & CEC_OP_FEAT_EXT))
+		p++;
+	if (!(*p & CEC_OP_FEAT_EXT)) {
+		*dev_features = p + 1;
+		while (p < &msg->msg[15] && (*p & CEC_OP_FEAT_EXT))
+			p++;
+	}
+	if (*p & CEC_OP_FEAT_EXT)
+		*rc_profile = *dev_features = NULL;
+}
+
+static inline void cec_msg_give_features(struct cec_msg *msg,
+					 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_FEATURES;
+	msg->reply = reply ? CEC_MSG_REPORT_FEATURES : 0;
+}
+
+/* Deck Control Feature */
+static inline void cec_msg_deck_control(struct cec_msg *msg,
+					__u8 deck_control_mode)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_DECK_CONTROL;
+	msg->msg[2] = deck_control_mode;
+}
+
+static inline void cec_ops_deck_control(const struct cec_msg *msg,
+					__u8 *deck_control_mode)
+{
+	*deck_control_mode = msg->msg[2];
+}
+
+static inline void cec_msg_deck_status(struct cec_msg *msg,
+				       __u8 deck_info)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_DECK_STATUS;
+	msg->msg[2] = deck_info;
+}
+
+static inline void cec_ops_deck_status(const struct cec_msg *msg,
+				       __u8 *deck_info)
+{
+	*deck_info = msg->msg[2];
+}
+
+static inline void cec_msg_give_deck_status(struct cec_msg *msg,
+					    bool reply,
+					    __u8 status_req)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_GIVE_DECK_STATUS;
+	msg->msg[2] = status_req;
+	msg->reply = reply ? CEC_MSG_DECK_STATUS : 0;
+}
+
+static inline void cec_ops_give_deck_status(const struct cec_msg *msg,
+					    __u8 *status_req)
+{
+	*status_req = msg->msg[2];
+}
+
+static inline void cec_msg_play(struct cec_msg *msg,
+				__u8 play_mode)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_PLAY;
+	msg->msg[2] = play_mode;
+}
+
+static inline void cec_ops_play(const struct cec_msg *msg,
+				__u8 *play_mode)
+{
+	*play_mode = msg->msg[2];
+}
+
+
+/* Tuner Control Feature */
+struct cec_op_tuner_device_info {
+	__u8 rec_flag;
+	__u8 tuner_display_info;
+	bool is_analog;
+	union {
+		struct cec_op_digital_service_id digital;
+		struct {
+			__u8 ana_bcast_type;
+			__u16 ana_freq;
+			__u8 bcast_system;
+		} analog;
+	};
+};
+
+static inline void cec_msg_tuner_device_status_analog(struct cec_msg *msg,
+						      __u8 rec_flag,
+						      __u8 tuner_display_info,
+						      __u8 ana_bcast_type,
+						      __u16 ana_freq,
+						      __u8 bcast_system)
+{
+	msg->len = 7;
+	msg->msg[1] = CEC_MSG_TUNER_DEVICE_STATUS;
+	msg->msg[2] = (rec_flag << 7) | tuner_display_info;
+	msg->msg[3] = ana_bcast_type;
+	msg->msg[4] = ana_freq >> 8;
+	msg->msg[5] = ana_freq & 0xff;
+	msg->msg[6] = bcast_system;
+}
+
+static inline void cec_msg_tuner_device_status_digital(struct cec_msg *msg,
+		   __u8 rec_flag, __u8 tuner_display_info,
+		   const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 10;
+	msg->msg[1] = CEC_MSG_TUNER_DEVICE_STATUS;
+	msg->msg[2] = (rec_flag << 7) | tuner_display_info;
+	cec_set_digital_service_id(msg->msg + 3, digital);
+}
+
+static inline void cec_msg_tuner_device_status(struct cec_msg *msg,
+			const struct cec_op_tuner_device_info *tuner_dev_info)
+{
+	if (tuner_dev_info->is_analog)
+		cec_msg_tuner_device_status_analog(msg,
+			tuner_dev_info->rec_flag,
+			tuner_dev_info->tuner_display_info,
+			tuner_dev_info->analog.ana_bcast_type,
+			tuner_dev_info->analog.ana_freq,
+			tuner_dev_info->analog.bcast_system);
+	else
+		cec_msg_tuner_device_status_digital(msg,
+			tuner_dev_info->rec_flag,
+			tuner_dev_info->tuner_display_info,
+			&tuner_dev_info->digital);
+}
+
+static inline void cec_ops_tuner_device_status(const struct cec_msg *msg,
+				struct cec_op_tuner_device_info *tuner_dev_info)
+{
+	tuner_dev_info->is_analog = msg->len < 10;
+	tuner_dev_info->rec_flag = msg->msg[2] >> 7;
+	tuner_dev_info->tuner_display_info = msg->msg[2] & 0x7f;
+	if (tuner_dev_info->is_analog) {
+		tuner_dev_info->analog.ana_bcast_type = msg->msg[3];
+		tuner_dev_info->analog.ana_freq = (msg->msg[4] << 8) | msg->msg[5];
+		tuner_dev_info->analog.bcast_system = msg->msg[6];
+		return;
+	}
+	cec_get_digital_service_id(msg->msg + 3, &tuner_dev_info->digital);
+}
+
+static inline void cec_msg_give_tuner_device_status(struct cec_msg *msg,
+						    bool reply,
+						    __u8 status_req)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_GIVE_TUNER_DEVICE_STATUS;
+	msg->msg[2] = status_req;
+	msg->reply = reply ? CEC_MSG_TUNER_DEVICE_STATUS : 0;
+}
+
+static inline void cec_ops_give_tuner_device_status(const struct cec_msg *msg,
+						    __u8 *status_req)
+{
+	*status_req = msg->msg[2];
+}
+
+static inline void cec_msg_select_analogue_service(struct cec_msg *msg,
+						   __u8 ana_bcast_type,
+						   __u16 ana_freq,
+						   __u8 bcast_system)
+{
+	msg->len = 6;
+	msg->msg[1] = CEC_MSG_SELECT_ANALOGUE_SERVICE;
+	msg->msg[2] = ana_bcast_type;
+	msg->msg[3] = ana_freq >> 8;
+	msg->msg[4] = ana_freq & 0xff;
+	msg->msg[5] = bcast_system;
+}
+
+static inline void cec_ops_select_analogue_service(const struct cec_msg *msg,
+						   __u8 *ana_bcast_type,
+						   __u16 *ana_freq,
+						   __u8 *bcast_system)
+{
+	*ana_bcast_type = msg->msg[2];
+	*ana_freq = (msg->msg[3] << 8) | msg->msg[4];
+	*bcast_system = msg->msg[5];
+}
+
+static inline void cec_msg_select_digital_service(struct cec_msg *msg,
+				const struct cec_op_digital_service_id *digital)
+{
+	msg->len = 9;
+	msg->msg[1] = CEC_MSG_SELECT_DIGITAL_SERVICE;
+	cec_set_digital_service_id(msg->msg + 2, digital);
+}
+
+static inline void cec_ops_select_digital_service(const struct cec_msg *msg,
+				struct cec_op_digital_service_id *digital)
+{
+	cec_get_digital_service_id(msg->msg + 2, digital);
+}
+
+static inline void cec_msg_tuner_step_decrement(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_TUNER_STEP_DECREMENT;
+}
+
+static inline void cec_msg_tuner_step_increment(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_TUNER_STEP_INCREMENT;
+}
+
+
+/* Vendor Specific Commands Feature */
+static inline void cec_msg_device_vendor_id(struct cec_msg *msg, __u32 vendor_id)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_DEVICE_VENDOR_ID;
+	msg->msg[2] = vendor_id >> 16;
+	msg->msg[3] = (vendor_id >> 8) & 0xff;
+	msg->msg[4] = vendor_id & 0xff;
+}
+
+static inline void cec_ops_device_vendor_id(const struct cec_msg *msg,
+					    __u32 *vendor_id)
+{
+	*vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4];
+}
+
+static inline void cec_msg_give_device_vendor_id(struct cec_msg *msg,
+						 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_DEVICE_VENDOR_ID;
+	msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0;
+}
+
+static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_UP;
+}
+
+
+/* OSD Display Feature */
+static inline void cec_msg_set_osd_string(struct cec_msg *msg,
+					  __u8 disp_ctl,
+					  const char *osd)
+{
+	unsigned int len = strlen(osd);
+
+	if (len > 13)
+		len = 13;
+	msg->len = 3 + len;
+	msg->msg[1] = CEC_MSG_SET_OSD_STRING;
+	msg->msg[2] = disp_ctl;
+	memcpy(msg->msg + 3, osd, len);
+}
+
+static inline void cec_ops_set_osd_string(const struct cec_msg *msg,
+					  __u8 *disp_ctl,
+					  char *osd)
+{
+	unsigned int len = msg->len > 3 ? msg->len - 3 : 0;
+
+	*disp_ctl = msg->msg[2];
+	if (len > 13)
+		len = 13;
+	memcpy(osd, msg->msg + 3, len);
+	osd[len] = '\0';
+}
+
+
+/* Device OSD Transfer Feature */
+static inline void cec_msg_set_osd_name(struct cec_msg *msg, const char *name)
+{
+	unsigned int len = strlen(name);
+
+	if (len > 14)
+		len = 14;
+	msg->len = 2 + len;
+	msg->msg[1] = CEC_MSG_SET_OSD_NAME;
+	memcpy(msg->msg + 2, name, len);
+}
+
+static inline void cec_ops_set_osd_name(const struct cec_msg *msg,
+					char *name)
+{
+	unsigned int len = msg->len > 2 ? msg->len - 2 : 0;
+
+	if (len > 14)
+		len = 14;
+	memcpy(name, msg->msg + 2, len);
+	name[len] = '\0';
+}
+
+static inline void cec_msg_give_osd_name(struct cec_msg *msg,
+					 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_OSD_NAME;
+	msg->reply = reply ? CEC_MSG_SET_OSD_NAME : 0;
+}
+
+
+/* Device Menu Control Feature */
+static inline void cec_msg_menu_status(struct cec_msg *msg,
+				       __u8 menu_state)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_MENU_STATUS;
+	msg->msg[2] = menu_state;
+}
+
+static inline void cec_ops_menu_status(const struct cec_msg *msg,
+				       __u8 *menu_state)
+{
+	*menu_state = msg->msg[2];
+}
+
+static inline void cec_msg_menu_request(struct cec_msg *msg,
+					bool reply,
+					__u8 menu_req)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_MENU_REQUEST;
+	msg->msg[2] = menu_req;
+	msg->reply = reply ? CEC_MSG_MENU_STATUS : 0;
+}
+
+static inline void cec_ops_menu_request(const struct cec_msg *msg,
+					__u8 *menu_req)
+{
+	*menu_req = msg->msg[2];
+}
+
+struct cec_op_ui_command {
+	__u8 ui_cmd;
+	bool has_opt_arg;
+	union {
+		struct cec_op_channel_data channel_identifier;
+		__u8 ui_broadcast_type;
+		__u8 ui_sound_presentation_control;
+		__u8 play_mode;
+		__u8 ui_function_media;
+		__u8 ui_function_select_av_input;
+		__u8 ui_function_select_audio_input;
+	};
+};
+
+static inline void cec_msg_user_control_pressed(struct cec_msg *msg,
+					const struct cec_op_ui_command *ui_cmd)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_USER_CONTROL_PRESSED;
+	msg->msg[2] = ui_cmd->ui_cmd;
+	if (!ui_cmd->has_opt_arg)
+		return;
+	switch (ui_cmd->ui_cmd) {
+	case 0x56:
+	case 0x57:
+	case 0x60:
+	case 0x68:
+	case 0x69:
+	case 0x6a:
+		/* The optional operand is one byte for all these ui commands */
+		msg->len++;
+		msg->msg[3] = ui_cmd->play_mode;
+		break;
+	case 0x67:
+		msg->len += 4;
+		msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) |
+			      (ui_cmd->channel_identifier.major >> 8);
+		msg->msg[4] = ui_cmd->channel_identifier.major && 0xff;
+		msg->msg[5] = ui_cmd->channel_identifier.minor >> 8;
+		msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff;
+		break;
+	}
+}
+
+static inline void cec_ops_user_control_pressed(const struct cec_msg *msg,
+						struct cec_op_ui_command *ui_cmd)
+{
+	ui_cmd->ui_cmd = msg->msg[2];
+	ui_cmd->has_opt_arg = false;
+	if (msg->len == 3)
+		return;
+	switch (ui_cmd->ui_cmd) {
+	case 0x56:
+	case 0x57:
+	case 0x60:
+	case 0x68:
+	case 0x69:
+	case 0x6a:
+		/* The optional operand is one byte for all these ui commands */
+		ui_cmd->play_mode = msg->msg[3];
+		ui_cmd->has_opt_arg = true;
+		break;
+	case 0x67:
+		if (msg->len < 7)
+			break;
+		ui_cmd->has_opt_arg = true;
+		ui_cmd->channel_identifier.channel_number_fmt = msg->msg[3] >> 2;
+		ui_cmd->channel_identifier.major = ((msg->msg[3] & 3) << 6) | msg->msg[4];
+		ui_cmd->channel_identifier.minor = (msg->msg[5] << 8) | msg->msg[6];
+		break;
+	}
+}
+
+static inline void cec_msg_user_control_released(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_USER_CONTROL_RELEASED;
+}
+
+/* Remote Control Passthrough Feature */
+
+/* Power Status Feature */
+static inline void cec_msg_report_power_status(struct cec_msg *msg,
+					       __u8 pwr_state)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_REPORT_POWER_STATUS;
+	msg->msg[2] = pwr_state;
+}
+
+static inline void cec_ops_report_power_status(const struct cec_msg *msg,
+					       __u8 *pwr_state)
+{
+	*pwr_state = msg->msg[2];
+}
+
+static inline void cec_msg_give_device_power_status(struct cec_msg *msg,
+						    bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_DEVICE_POWER_STATUS;
+	msg->reply = reply ? CEC_MSG_REPORT_POWER_STATUS : 0;
+}
+
+/* General Protocol Messages */
+static inline void cec_msg_feature_abort(struct cec_msg *msg,
+					 __u8 abort_msg, __u8 reason)
+{
+	msg->len = 4;
+	msg->msg[1] = CEC_MSG_FEATURE_ABORT;
+	msg->msg[2] = abort_msg;
+	msg->msg[3] = reason;
+}
+
+static inline void cec_ops_feature_abort(const struct cec_msg *msg,
+					 __u8 *abort_msg, __u8 *reason)
+{
+	*abort_msg = msg->msg[2];
+	*reason = msg->msg[3];
+}
+
+/* This changes the current message into a feature abort message */
+static inline void cec_msg_reply_feature_abort(struct cec_msg *msg, __u8 reason)
+{
+	cec_msg_set_reply_to(msg, msg);
+	msg->len = 4;
+	msg->msg[2] = msg->msg[1];
+	msg->msg[3] = reason;
+	msg->msg[1] = CEC_MSG_FEATURE_ABORT;
+}
+
+static inline void cec_msg_abort(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_ABORT;
+}
+
+
+/* System Audio Control Feature */
+static inline void cec_msg_report_audio_status(struct cec_msg *msg,
+					       __u8 aud_mute_status,
+					       __u8 aud_vol_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_REPORT_AUDIO_STATUS;
+	msg->msg[2] = (aud_mute_status << 7) | (aud_vol_status & 0x7f);
+}
+
+static inline void cec_ops_report_audio_status(const struct cec_msg *msg,
+					       __u8 *aud_mute_status,
+					       __u8 *aud_vol_status)
+{
+	*aud_mute_status = msg->msg[2] >> 7;
+	*aud_vol_status = msg->msg[2] & 0x7f;
+}
+
+static inline void cec_msg_give_audio_status(struct cec_msg *msg,
+					     bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_AUDIO_STATUS;
+	msg->reply = reply ? CEC_MSG_REPORT_AUDIO_STATUS : 0;
+}
+
+static inline void cec_msg_set_system_audio_mode(struct cec_msg *msg,
+						 __u8 sys_aud_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_SET_SYSTEM_AUDIO_MODE;
+	msg->msg[2] = sys_aud_status;
+}
+
+static inline void cec_ops_set_system_audio_mode(const struct cec_msg *msg,
+						 __u8 *sys_aud_status)
+{
+	*sys_aud_status = msg->msg[2];
+}
+
+static inline void cec_msg_system_audio_mode_request(struct cec_msg *msg,
+						     bool reply,
+						     __u16 phys_addr)
+{
+	msg->len = phys_addr == 0xffff ? 2 : 4;
+	msg->msg[1] = CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_SET_SYSTEM_AUDIO_MODE : 0;
+
+}
+
+static inline void cec_ops_system_audio_mode_request(const struct cec_msg *msg,
+						     __u16 *phys_addr)
+{
+	if (msg->len < 4)
+		*phys_addr = 0xffff;
+	else
+		*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_system_audio_mode_status(struct cec_msg *msg,
+						    __u8 sys_aud_status)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_SYSTEM_AUDIO_MODE_STATUS;
+	msg->msg[2] = sys_aud_status;
+}
+
+static inline void cec_ops_system_audio_mode_status(const struct cec_msg *msg,
+						    __u8 *sys_aud_status)
+{
+	*sys_aud_status = msg->msg[2];
+}
+
+static inline void cec_msg_give_system_audio_mode_status(struct cec_msg *msg,
+							 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS;
+	msg->reply = reply ? CEC_MSG_SYSTEM_AUDIO_MODE_STATUS : 0;
+}
+
+static inline void cec_msg_report_short_audio_descriptor(struct cec_msg *msg,
+					__u8 num_descriptors,
+					const __u32 *descriptors)
+{
+	unsigned int i;
+
+	if (num_descriptors > 4)
+		num_descriptors = 4;
+	msg->len = 2 + num_descriptors * 3;
+	msg->msg[1] = CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR;
+	for (i = 0; i < num_descriptors; i++) {
+		msg->msg[2 + i * 3] = (descriptors[i] >> 16) & 0xff;
+		msg->msg[3 + i * 3] = (descriptors[i] >> 8) & 0xff;
+		msg->msg[4 + i * 3] = descriptors[i] & 0xff;
+	}
+}
+
+static inline void cec_ops_report_short_audio_descriptor(const struct cec_msg *msg,
+							 __u8 *num_descriptors,
+							 __u32 *descriptors)
+{
+	unsigned int i;
+
+	*num_descriptors = (msg->len - 2) / 3;
+	if (*num_descriptors > 4)
+		*num_descriptors = 4;
+	for (i = 0; i < *num_descriptors; i++)
+		descriptors[i] = (msg->msg[2 + i * 3] << 16) |
+			(msg->msg[3 + i * 3] << 8) |
+			msg->msg[4 + i * 3];
+}
+
+static inline void cec_msg_request_short_audio_descriptor(struct cec_msg *msg,
+					bool reply,
+					__u8 num_descriptors,
+					const __u8 *audio_format_id,
+					const __u8 *audio_format_code)
+{
+	unsigned int i;
+
+	if (num_descriptors > 4)
+		num_descriptors = 4;
+	msg->len = 2 + num_descriptors;
+	msg->msg[1] = CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR;
+	msg->reply = reply ? CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR : 0;
+	for (i = 0; i < num_descriptors; i++)
+		msg->msg[2 + i] = (audio_format_id[i] << 6) |
+				  (audio_format_code[i] & 0x3f);
+}
+
+static inline void cec_ops_request_short_audio_descriptor(const struct cec_msg *msg,
+					__u8 *num_descriptors,
+					__u8 *audio_format_id,
+					__u8 *audio_format_code)
+{
+	unsigned int i;
+
+	*num_descriptors = msg->len - 2;
+	if (*num_descriptors > 4)
+		*num_descriptors = 4;
+	for (i = 0; i < *num_descriptors; i++) {
+		audio_format_id[i] = msg->msg[2 + i] >> 6;
+		audio_format_code[i] = msg->msg[2 + i] & 0x3f;
+	}
+}
+
+
+/* Audio Rate Control Feature */
+static inline void cec_msg_set_audio_rate(struct cec_msg *msg,
+					  __u8 audio_rate)
+{
+	msg->len = 3;
+	msg->msg[1] = CEC_MSG_SET_AUDIO_RATE;
+	msg->msg[2] = audio_rate;
+}
+
+static inline void cec_ops_set_audio_rate(const struct cec_msg *msg,
+					  __u8 *audio_rate)
+{
+	*audio_rate = msg->msg[2];
+}
+
+
+/* Audio Return Channel Control Feature */
+static inline void cec_msg_report_arc_initiated(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_REPORT_ARC_INITIATED;
+}
+
+static inline void cec_msg_initiate_arc(struct cec_msg *msg,
+					bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_INITIATE_ARC;
+	msg->reply = reply ? CEC_MSG_REPORT_ARC_INITIATED : 0;
+}
+
+static inline void cec_msg_request_arc_initiation(struct cec_msg *msg,
+						  bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_REQUEST_ARC_INITIATION;
+	msg->reply = reply ? CEC_MSG_INITIATE_ARC : 0;
+}
+
+static inline void cec_msg_report_arc_terminated(struct cec_msg *msg)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_REPORT_ARC_TERMINATED;
+}
+
+static inline void cec_msg_terminate_arc(struct cec_msg *msg,
+					 bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_TERMINATE_ARC;
+	msg->reply = reply ? CEC_MSG_REPORT_ARC_TERMINATED : 0;
+}
+
+static inline void cec_msg_request_arc_termination(struct cec_msg *msg,
+						   bool reply)
+{
+	msg->len = 2;
+	msg->msg[1] = CEC_MSG_REQUEST_ARC_TERMINATION;
+	msg->reply = reply ? CEC_MSG_TERMINATE_ARC : 0;
+}
+
+
+/* Dynamic Audio Lipsync Feature */
+/* Only for CEC 2.0 and up */
+static inline void cec_msg_report_current_latency(struct cec_msg *msg,
+						  __u16 phys_addr,
+						  __u8 video_latency,
+						  __u8 low_latency_mode,
+						  __u8 audio_out_compensated,
+						  __u8 audio_out_delay)
+{
+	msg->len = 7;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REPORT_CURRENT_LATENCY;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+	msg->msg[4] = video_latency;
+	msg->msg[5] = (low_latency_mode << 2) | audio_out_compensated;
+	msg->msg[6] = audio_out_delay;
+}
+
+static inline void cec_ops_report_current_latency(const struct cec_msg *msg,
+						  __u16 *phys_addr,
+						  __u8 *video_latency,
+						  __u8 *low_latency_mode,
+						  __u8 *audio_out_compensated,
+						  __u8 *audio_out_delay)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*video_latency = msg->msg[4];
+	*low_latency_mode = (msg->msg[5] >> 2) & 1;
+	*audio_out_compensated = msg->msg[5] & 3;
+	*audio_out_delay = msg->msg[6];
+}
+
+static inline void cec_msg_request_current_latency(struct cec_msg *msg,
+						   bool reply,
+						   __u16 phys_addr)
+{
+	msg->len = 4;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_REQUEST_CURRENT_LATENCY;
+	msg->msg[2] = phys_addr >> 8;
+	msg->msg[3] = phys_addr & 0xff;
+	msg->reply = reply ? CEC_MSG_REPORT_CURRENT_LATENCY : 0;
+}
+
+static inline void cec_ops_request_current_latency(const struct cec_msg *msg,
+						   __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+
+/* Capability Discovery and Control Feature */
+static inline void cec_msg_cdc_hec_inquire_state(struct cec_msg *msg,
+						 __u16 phys_addr1,
+						 __u16 phys_addr2)
+{
+	msg->len = 9;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_INQUIRE_STATE;
+	msg->msg[5] = phys_addr1 >> 8;
+	msg->msg[6] = phys_addr1 & 0xff;
+	msg->msg[7] = phys_addr2 >> 8;
+	msg->msg[8] = phys_addr2 & 0xff;
+}
+
+static inline void cec_ops_cdc_hec_inquire_state(const struct cec_msg *msg,
+						 __u16 *phys_addr,
+						 __u16 *phys_addr1,
+						 __u16 *phys_addr2)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
+	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
+}
+
+static inline void cec_msg_cdc_hec_report_state(struct cec_msg *msg,
+						__u16 target_phys_addr,
+						__u8 hec_func_state,
+						__u8 host_func_state,
+						__u8 enc_func_state,
+						__u8 cdc_errcode,
+						__u8 has_field,
+						__u16 hec_field)
+{
+	msg->len = has_field ? 10 : 8;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_REPORT_STATE;
+	msg->msg[5] = target_phys_addr >> 8;
+	msg->msg[6] = target_phys_addr & 0xff;
+	msg->msg[7] = (hec_func_state << 6) |
+		      (host_func_state << 4) |
+		      (enc_func_state << 2) |
+		      cdc_errcode;
+	if (has_field) {
+		msg->msg[8] = hec_field >> 8;
+		msg->msg[9] = hec_field & 0xff;
+	}
+}
+
+static inline void cec_ops_cdc_hec_report_state(const struct cec_msg *msg,
+						__u16 *phys_addr,
+						__u16 *target_phys_addr,
+						__u8 *hec_func_state,
+						__u8 *host_func_state,
+						__u8 *enc_func_state,
+						__u8 *cdc_errcode,
+						__u8 *has_field,
+						__u16 *hec_field)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*target_phys_addr = (msg->msg[5] << 8) | msg->msg[6];
+	*hec_func_state = msg->msg[7] >> 6;
+	*host_func_state = (msg->msg[7] >> 4) & 3;
+	*enc_func_state = (msg->msg[7] >> 4) & 3;
+	*cdc_errcode = msg->msg[7] & 3;
+	*has_field = msg->len >= 10;
+	*hec_field = *has_field ? ((msg->msg[8] << 8) | msg->msg[9]) : 0;
+}
+
+static inline void cec_msg_cdc_hec_set_state(struct cec_msg *msg,
+					     __u16 phys_addr1,
+					     __u16 phys_addr2,
+					     __u8 hec_set_state,
+					     __u16 phys_addr3,
+					     __u16 phys_addr4,
+					     __u16 phys_addr5)
+{
+	msg->len = 10;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_INQUIRE_STATE;
+	msg->msg[5] = phys_addr1 >> 8;
+	msg->msg[6] = phys_addr1 & 0xff;
+	msg->msg[7] = phys_addr2 >> 8;
+	msg->msg[8] = phys_addr2 & 0xff;
+	msg->msg[9] = hec_set_state;
+	if (phys_addr3 != CEC_PHYS_ADDR_INVALID) {
+		msg->msg[msg->len++] = phys_addr3 >> 8;
+		msg->msg[msg->len++] = phys_addr3 & 0xff;
+		if (phys_addr4 != CEC_PHYS_ADDR_INVALID) {
+			msg->msg[msg->len++] = phys_addr4 >> 8;
+			msg->msg[msg->len++] = phys_addr4 & 0xff;
+			if (phys_addr5 != CEC_PHYS_ADDR_INVALID) {
+				msg->msg[msg->len++] = phys_addr5 >> 8;
+				msg->msg[msg->len++] = phys_addr5 & 0xff;
+			}
+		}
+	}
+}
+
+static inline void cec_ops_cdc_hec_set_state(const struct cec_msg *msg,
+					     __u16 *phys_addr,
+					     __u16 *phys_addr1,
+					     __u16 *phys_addr2,
+					     __u8 *hec_set_state,
+					     __u16 *phys_addr3,
+					     __u16 *phys_addr4,
+					     __u16 *phys_addr5)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
+	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
+	*hec_set_state = msg->msg[9];
+	*phys_addr3 = *phys_addr4 = *phys_addr5 = CEC_PHYS_ADDR_INVALID;
+	if (msg->len >= 12)
+		*phys_addr3 = (msg->msg[10] << 8) | msg->msg[11];
+	if (msg->len >= 14)
+		*phys_addr4 = (msg->msg[12] << 8) | msg->msg[13];
+	if (msg->len >= 16)
+		*phys_addr5 = (msg->msg[14] << 8) | msg->msg[15];
+}
+
+static inline void cec_msg_cdc_hec_set_state_adjacent(struct cec_msg *msg,
+						      __u16 phys_addr1,
+						      __u8 hec_set_state)
+{
+	msg->len = 8;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_SET_STATE_ADJACENT;
+	msg->msg[5] = phys_addr1 >> 8;
+	msg->msg[6] = phys_addr1 & 0xff;
+	msg->msg[7] = hec_set_state;
+}
+
+static inline void cec_ops_cdc_hec_set_state_adjacent(const struct cec_msg *msg,
+						      __u16 *phys_addr,
+						      __u16 *phys_addr1,
+						      __u8 *hec_set_state)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
+	*hec_set_state = msg->msg[7];
+}
+
+static inline void cec_msg_cdc_hec_request_deactivation(struct cec_msg *msg,
+							__u16 phys_addr1,
+							__u16 phys_addr2,
+							__u16 phys_addr3)
+{
+	msg->len = 11;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_REQUEST_DEACTIVATION;
+	msg->msg[5] = phys_addr1 >> 8;
+	msg->msg[6] = phys_addr1 & 0xff;
+	msg->msg[7] = phys_addr2 >> 8;
+	msg->msg[8] = phys_addr2 & 0xff;
+	msg->msg[9] = phys_addr3 >> 8;
+	msg->msg[10] = phys_addr3 & 0xff;
+}
+
+static inline void cec_ops_cdc_hec_request_deactivation(const struct cec_msg *msg,
+							__u16 *phys_addr,
+							__u16 *phys_addr1,
+							__u16 *phys_addr2,
+							__u16 *phys_addr3)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*phys_addr1 = (msg->msg[5] << 8) | msg->msg[6];
+	*phys_addr2 = (msg->msg[7] << 8) | msg->msg[8];
+	*phys_addr3 = (msg->msg[9] << 8) | msg->msg[10];
+}
+
+static inline void cec_msg_cdc_hec_notify_alive(struct cec_msg *msg)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_NOTIFY_ALIVE;
+}
+
+static inline void cec_ops_cdc_hec_notify_alive(const struct cec_msg *msg,
+						__u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_cdc_hec_discover(struct cec_msg *msg)
+{
+	msg->len = 5;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HEC_DISCOVER;
+}
+
+static inline void cec_ops_cdc_hec_discover(const struct cec_msg *msg,
+					    __u16 *phys_addr)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+}
+
+static inline void cec_msg_cdc_hpd_set_state(struct cec_msg *msg,
+					     __u8 input_port,
+					     __u8 hpd_state)
+{
+	msg->len = 6;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HPD_SET_STATE;
+	msg->msg[5] = (input_port << 4) | hpd_state;
+}
+
+static inline void cec_ops_cdc_hpd_set_state(const struct cec_msg *msg,
+					    __u16 *phys_addr,
+					    __u8 *input_port,
+					    __u8 *hpd_state)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*input_port = msg->msg[5] >> 4;
+	*hpd_state = msg->msg[5] & 0xf;
+}
+
+static inline void cec_msg_cdc_hpd_report_state(struct cec_msg *msg,
+						__u8 hpd_state,
+						__u8 hpd_error)
+{
+	msg->len = 6;
+	msg->msg[0] |= 0xf; /* broadcast */
+	msg->msg[1] = CEC_MSG_CDC_MESSAGE;
+	/* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */
+	msg->msg[4] = CEC_MSG_CDC_HPD_REPORT_STATE;
+	msg->msg[5] = (hpd_state << 4) | hpd_error;
+}
+
+static inline void cec_ops_cdc_hpd_report_state(const struct cec_msg *msg,
+						__u16 *phys_addr,
+						__u8 *hpd_state,
+						__u8 *hpd_error)
+{
+	*phys_addr = (msg->msg[2] << 8) | msg->msg[3];
+	*hpd_state = msg->msg[5] >> 4;
+	*hpd_error = msg->msg[5] & 0xf;
+}
+
+#endif
diff --git a/include/linux/cec.h b/include/linux/cec.h
new file mode 100644
index 000000000000..b3e22893a002
--- /dev/null
+++ b/include/linux/cec.h
@@ -0,0 +1,1011 @@
+/*
+ * cec - HDMI Consumer Electronics Control public header
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * Alternatively you can redistribute this file under the terms of the
+ * BSD license as stated below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. The names of its contributors may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Note: this framework is still in staging and it is likely the API
+ * will change before it goes out of staging.
+ *
+ * Once it is moved out of staging this header will move to uapi.
+ */
+#ifndef _CEC_UAPI_H
+#define _CEC_UAPI_H
+
+#include <linux/types.h>
+
+#define CEC_MAX_MSG_SIZE	16
+
+/**
+ * struct cec_msg - CEC message structure.
+ * @tx_ts:	Timestamp in nanoseconds using CLOCK_MONOTONIC. Set by the
+ *		driver when the message transmission has finished.
+ * @rx_ts:	Timestamp in nanoseconds using CLOCK_MONOTONIC. Set by the
+ *		driver when the message was received.
+ * @len:	Length in bytes of the message.
+ * @timeout:	The timeout (in ms) that is used to timeout CEC_RECEIVE.
+ *		Set to 0 if you want to wait forever. This timeout can also be
+ *		used with CEC_TRANSMIT as the timeout for waiting for a reply.
+ *		If 0, then it will use a 1 second timeout instead of waiting
+ *		forever as is done with CEC_RECEIVE.
+ * @sequence:	The framework assigns a sequence number to messages that are
+ *		sent. This can be used to track replies to previously sent
+ *		messages.
+ * @flags:	Set to 0.
+ * @msg:	The message payload.
+ * @reply:	This field is ignored with CEC_RECEIVE and is only used by
+ *		CEC_TRANSMIT. If non-zero, then wait for a reply with this
+ *		opcode. Set to CEC_MSG_FEATURE_ABORT if you want to wait for
+ *		a possible ABORT reply. If there was an error when sending the
+ *		msg or FeatureAbort was returned, then reply is set to 0.
+ *		If reply is non-zero upon return, then len/msg are set to
+ *		the received message.
+ *		If reply is zero upon return and status has the
+ *		CEC_TX_STATUS_FEATURE_ABORT bit set, then len/msg are set to
+ *		the received feature abort message.
+ *		If reply is zero upon return and status has the
+ *		CEC_TX_STATUS_MAX_RETRIES bit set, then no reply was seen at
+ *		all. If reply is non-zero for CEC_TRANSMIT and the message is a
+ *		broadcast, then -EINVAL is returned.
+ *		if reply is non-zero, then timeout is set to 1000 (the required
+ *		maximum response time).
+ * @rx_status:	The message receive status bits. Set by the driver.
+ * @tx_status:	The message transmit status bits. Set by the driver.
+ * @tx_arb_lost_cnt: The number of 'Arbitration Lost' events. Set by the driver.
+ * @tx_nack_cnt: The number of 'Not Acknowledged' events. Set by the driver.
+ * @tx_low_drive_cnt: The number of 'Low Drive Detected' events. Set by the
+ *		driver.
+ * @tx_error_cnt: The number of 'Error' events. Set by the driver.
+ */
+struct cec_msg {
+	__u64 tx_ts;
+	__u64 rx_ts;
+	__u32 len;
+	__u32 timeout;
+	__u32 sequence;
+	__u32 flags;
+	__u8 msg[CEC_MAX_MSG_SIZE];
+	__u8 reply;
+	__u8 rx_status;
+	__u8 tx_status;
+	__u8 tx_arb_lost_cnt;
+	__u8 tx_nack_cnt;
+	__u8 tx_low_drive_cnt;
+	__u8 tx_error_cnt;
+};
+
+/**
+ * cec_msg_initiator - return the initiator's logical address.
+ * @msg:	the message structure
+ */
+static inline __u8 cec_msg_initiator(const struct cec_msg *msg)
+{
+	return msg->msg[0] >> 4;
+}
+
+/**
+ * cec_msg_destination - return the destination's logical address.
+ * @msg:	the message structure
+ */
+static inline __u8 cec_msg_destination(const struct cec_msg *msg)
+{
+	return msg->msg[0] & 0xf;
+}
+
+/**
+ * cec_msg_opcode - return the opcode of the message, -1 for poll
+ * @msg:	the message structure
+ */
+static inline int cec_msg_opcode(const struct cec_msg *msg)
+{
+	return msg->len > 1 ? msg->msg[1] : -1;
+}
+
+/**
+ * cec_msg_is_broadcast - return true if this is a broadcast message.
+ * @msg:	the message structure
+ */
+static inline bool cec_msg_is_broadcast(const struct cec_msg *msg)
+{
+	return (msg->msg[0] & 0xf) == 0xf;
+}
+
+/**
+ * cec_msg_init - initialize the message structure.
+ * @msg:	the message structure
+ * @initiator:	the logical address of the initiator
+ * @destination:the logical address of the destination (0xf for broadcast)
+ *
+ * The whole structure is zeroed, the len field is set to 1 (i.e. a poll
+ * message) and the initiator and destination are filled in.
+ */
+static inline void cec_msg_init(struct cec_msg *msg,
+				__u8 initiator, __u8 destination)
+{
+	memset(msg, 0, sizeof(*msg));
+	msg->msg[0] = (initiator << 4) | destination;
+	msg->len = 1;
+}
+
+/**
+ * cec_msg_set_reply_to - fill in destination/initiator in a reply message.
+ * @msg:	the message structure for the reply
+ * @orig:	the original message structure
+ *
+ * Set the msg destination to the orig initiator and the msg initiator to the
+ * orig destination. Note that msg and orig may be the same pointer, in which
+ * case the change is done in place.
+ */
+static inline void cec_msg_set_reply_to(struct cec_msg *msg,
+					struct cec_msg *orig)
+{
+	/* The destination becomes the initiator and vice versa */
+	msg->msg[0] = (cec_msg_destination(orig) << 4) |
+		      cec_msg_initiator(orig);
+	msg->reply = msg->timeout = 0;
+}
+
+/* cec status field */
+#define CEC_TX_STATUS_OK		(1 << 0)
+#define CEC_TX_STATUS_ARB_LOST		(1 << 1)
+#define CEC_TX_STATUS_NACK		(1 << 2)
+#define CEC_TX_STATUS_LOW_DRIVE		(1 << 3)
+#define CEC_TX_STATUS_ERROR		(1 << 4)
+#define CEC_TX_STATUS_MAX_RETRIES	(1 << 5)
+
+#define CEC_RX_STATUS_OK		(1 << 0)
+#define CEC_RX_STATUS_TIMEOUT		(1 << 1)
+#define CEC_RX_STATUS_FEATURE_ABORT	(1 << 2)
+
+static inline bool cec_msg_status_is_ok(const struct cec_msg *msg)
+{
+	if (msg->tx_status && !(msg->tx_status & CEC_TX_STATUS_OK))
+		return false;
+	if (msg->rx_status && !(msg->rx_status & CEC_RX_STATUS_OK))
+		return false;
+	if (!msg->tx_status && !msg->rx_status)
+		return false;
+	return !(msg->rx_status & CEC_RX_STATUS_FEATURE_ABORT);
+}
+
+#define CEC_LOG_ADDR_INVALID		0xff
+#define CEC_PHYS_ADDR_INVALID		0xffff
+
+/*
+ * The maximum number of logical addresses one device can be assigned to.
+ * The CEC 2.0 spec allows for only 2 logical addresses at the moment. The
+ * Analog Devices CEC hardware supports 3. So let's go wild and go for 4.
+ */
+#define CEC_MAX_LOG_ADDRS 4
+
+/* The logical addresses defined by CEC 2.0 */
+#define CEC_LOG_ADDR_TV			0
+#define CEC_LOG_ADDR_RECORD_1		1
+#define CEC_LOG_ADDR_RECORD_2		2
+#define CEC_LOG_ADDR_TUNER_1		3
+#define CEC_LOG_ADDR_PLAYBACK_1		4
+#define CEC_LOG_ADDR_AUDIOSYSTEM	5
+#define CEC_LOG_ADDR_TUNER_2		6
+#define CEC_LOG_ADDR_TUNER_3		7
+#define CEC_LOG_ADDR_PLAYBACK_2		8
+#define CEC_LOG_ADDR_RECORD_3		9
+#define CEC_LOG_ADDR_TUNER_4		10
+#define CEC_LOG_ADDR_PLAYBACK_3		11
+#define CEC_LOG_ADDR_BACKUP_1		12
+#define CEC_LOG_ADDR_BACKUP_2		13
+#define CEC_LOG_ADDR_SPECIFIC		14
+#define CEC_LOG_ADDR_UNREGISTERED	15 /* as initiator address */
+#define CEC_LOG_ADDR_BROADCAST		15 /* ad destination address */
+
+/* The logical address types that the CEC device wants to claim */
+#define CEC_LOG_ADDR_TYPE_TV		0
+#define CEC_LOG_ADDR_TYPE_RECORD	1
+#define CEC_LOG_ADDR_TYPE_TUNER		2
+#define CEC_LOG_ADDR_TYPE_PLAYBACK	3
+#define CEC_LOG_ADDR_TYPE_AUDIOSYSTEM	4
+#define CEC_LOG_ADDR_TYPE_SPECIFIC	5
+#define CEC_LOG_ADDR_TYPE_UNREGISTERED	6
+/*
+ * Switches should use UNREGISTERED.
+ * Processors should use SPECIFIC.
+ */
+
+#define CEC_LOG_ADDR_MASK_TV		(1 << CEC_LOG_ADDR_TV)
+#define CEC_LOG_ADDR_MASK_RECORD	((1 << CEC_LOG_ADDR_RECORD_1) | \
+					 (1 << CEC_LOG_ADDR_RECORD_2) | \
+					 (1 << CEC_LOG_ADDR_RECORD_3))
+#define CEC_LOG_ADDR_MASK_TUNER		((1 << CEC_LOG_ADDR_TUNER_1) | \
+					 (1 << CEC_LOG_ADDR_TUNER_2) | \
+					 (1 << CEC_LOG_ADDR_TUNER_3) | \
+					 (1 << CEC_LOG_ADDR_TUNER_4))
+#define CEC_LOG_ADDR_MASK_PLAYBACK	((1 << CEC_LOG_ADDR_PLAYBACK_1) | \
+					 (1 << CEC_LOG_ADDR_PLAYBACK_2) | \
+					 (1 << CEC_LOG_ADDR_PLAYBACK_3))
+#define CEC_LOG_ADDR_MASK_AUDIOSYSTEM	(1 << CEC_LOG_ADDR_AUDIOSYSTEM)
+#define CEC_LOG_ADDR_MASK_BACKUP	((1 << CEC_LOG_ADDR_BACKUP_1) | \
+					 (1 << CEC_LOG_ADDR_BACKUP_2))
+#define CEC_LOG_ADDR_MASK_SPECIFIC	(1 << CEC_LOG_ADDR_SPECIFIC)
+#define CEC_LOG_ADDR_MASK_UNREGISTERED	(1 << CEC_LOG_ADDR_UNREGISTERED)
+
+static inline bool cec_has_tv(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_TV;
+}
+
+static inline bool cec_has_record(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_RECORD;
+}
+
+static inline bool cec_has_tuner(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_TUNER;
+}
+
+static inline bool cec_has_playback(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_PLAYBACK;
+}
+
+static inline bool cec_has_audiosystem(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_AUDIOSYSTEM;
+}
+
+static inline bool cec_has_backup(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_BACKUP;
+}
+
+static inline bool cec_has_specific(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_SPECIFIC;
+}
+
+static inline bool cec_is_unregistered(__u16 log_addr_mask)
+{
+	return log_addr_mask & CEC_LOG_ADDR_MASK_UNREGISTERED;
+}
+
+static inline bool cec_is_unconfigured(__u16 log_addr_mask)
+{
+	return log_addr_mask == 0;
+}
+
+/*
+ * Use this if there is no vendor ID (CEC_G_VENDOR_ID) or if the vendor ID
+ * should be disabled (CEC_S_VENDOR_ID)
+ */
+#define CEC_VENDOR_ID_NONE		0xffffffff
+
+/* The message handling modes */
+/* Modes for initiator */
+#define CEC_MODE_NO_INITIATOR		(0x0 << 0)
+#define CEC_MODE_INITIATOR		(0x1 << 0)
+#define CEC_MODE_EXCL_INITIATOR		(0x2 << 0)
+#define CEC_MODE_INITIATOR_MSK		0x0f
+
+/* Modes for follower */
+#define CEC_MODE_NO_FOLLOWER		(0x0 << 4)
+#define CEC_MODE_FOLLOWER		(0x1 << 4)
+#define CEC_MODE_EXCL_FOLLOWER		(0x2 << 4)
+#define CEC_MODE_EXCL_FOLLOWER_PASSTHRU	(0x3 << 4)
+#define CEC_MODE_MONITOR		(0xe << 4)
+#define CEC_MODE_MONITOR_ALL		(0xf << 4)
+#define CEC_MODE_FOLLOWER_MSK		0xf0
+
+/* Userspace has to configure the physical address */
+#define CEC_CAP_PHYS_ADDR	(1 << 0)
+/* Userspace has to configure the logical addresses */
+#define CEC_CAP_LOG_ADDRS	(1 << 1)
+/* Userspace can transmit messages (and thus become follower as well) */
+#define CEC_CAP_TRANSMIT	(1 << 2)
+/*
+ * Passthrough all messages instead of processing them.
+ */
+#define CEC_CAP_PASSTHROUGH	(1 << 3)
+/* Supports remote control */
+#define CEC_CAP_RC		(1 << 4)
+/* Hardware can monitor all messages, not just directed and broadcast. */
+#define CEC_CAP_MONITOR_ALL	(1 << 5)
+
+/**
+ * struct cec_caps - CEC capabilities structure.
+ * @driver: name of the CEC device driver.
+ * @name: name of the CEC device. @driver + @name must be unique.
+ * @available_log_addrs: number of available logical addresses.
+ * @capabilities: capabilities of the CEC adapter.
+ * @version: version of the CEC adapter framework.
+ */
+struct cec_caps {
+	char driver[32];
+	char name[32];
+	__u32 available_log_addrs;
+	__u32 capabilities;
+	__u32 version;
+};
+
+/**
+ * struct cec_log_addrs - CEC logical addresses structure.
+ * @log_addr: the claimed logical addresses. Set by the driver.
+ * @log_addr_mask: current logical address mask. Set by the driver.
+ * @cec_version: the CEC version that the adapter should implement. Set by the
+ *	caller.
+ * @num_log_addrs: how many logical addresses should be claimed. Set by the
+ *	caller.
+ * @vendor_id: the vendor ID of the device. Set by the caller.
+ * @flags: set to 0.
+ * @osd_name: the OSD name of the device. Set by the caller.
+ * @primary_device_type: the primary device type for each logical address.
+ *	Set by the caller.
+ * @log_addr_type: the logical address types. Set by the caller.
+ * @all_device_types: CEC 2.0: all device types represented by the logical
+ *	address. Set by the caller.
+ * @features:	CEC 2.0: The logical address features. Set by the caller.
+ */
+struct cec_log_addrs {
+	__u8 log_addr[CEC_MAX_LOG_ADDRS];
+	__u16 log_addr_mask;
+	__u8 cec_version;
+	__u8 num_log_addrs;
+	__u32 vendor_id;
+	__u32 flags;
+	char osd_name[15];
+	__u8 primary_device_type[CEC_MAX_LOG_ADDRS];
+	__u8 log_addr_type[CEC_MAX_LOG_ADDRS];
+
+	/* CEC 2.0 */
+	__u8 all_device_types[CEC_MAX_LOG_ADDRS];
+	__u8 features[CEC_MAX_LOG_ADDRS][12];
+};
+
+/* Events */
+
+/* Event that occurs when the adapter state changes */
+#define CEC_EVENT_STATE_CHANGE		1
+/*
+ * This event is sent when messages are lost because the application
+ * didn't empty the message queue in time
+ */
+#define CEC_EVENT_LOST_MSGS		2
+
+#define CEC_EVENT_FL_INITIAL_STATE	(1 << 0)
+
+/**
+ * struct cec_event_state_change - used when the CEC adapter changes state.
+ * @phys_addr: the current physical address
+ * @log_addr_mask: the current logical address mask
+ */
+struct cec_event_state_change {
+	__u16 phys_addr;
+	__u16 log_addr_mask;
+};
+
+/**
+ * struct cec_event_lost_msgs - tells you how many messages were lost due.
+ * @lost_msgs: how many messages were lost.
+ */
+struct cec_event_lost_msgs {
+	__u32 lost_msgs;
+};
+
+/**
+ * struct cec_event - CEC event structure
+ * @ts: the timestamp of when the event was sent.
+ * @event: the event.
+ * array.
+ * @state_change: the event payload for CEC_EVENT_STATE_CHANGE.
+ * @lost_msgs: the event payload for CEC_EVENT_LOST_MSGS.
+ * @raw: array to pad the union.
+ */
+struct cec_event {
+	__u64 ts;
+	__u32 event;
+	__u32 flags;
+	union {
+		struct cec_event_state_change state_change;
+		struct cec_event_lost_msgs lost_msgs;
+		__u32 raw[16];
+	};
+};
+
+/* ioctls */
+
+/* Adapter capabilities */
+#define CEC_ADAP_G_CAPS		_IOWR('a',  0, struct cec_caps)
+
+/*
+ * phys_addr is either 0 (if this is the CEC root device)
+ * or a valid physical address obtained from the sink's EDID
+ * as read by this CEC device (if this is a source device)
+ * or a physical address obtained and modified from a sink
+ * EDID and used for a sink CEC device.
+ * If nothing is connected, then phys_addr is 0xffff.
+ * See HDMI 1.4b, section 8.7 (Physical Address).
+ *
+ * The CEC_ADAP_S_PHYS_ADDR ioctl may not be available if that is handled
+ * internally.
+ */
+#define CEC_ADAP_G_PHYS_ADDR	_IOR('a',  1, __u16)
+#define CEC_ADAP_S_PHYS_ADDR	_IOW('a',  2, __u16)
+
+/*
+ * Configure the CEC adapter. It sets the device type and which
+ * logical types it will try to claim. It will return which
+ * logical addresses it could actually claim.
+ * An error is returned if the adapter is disabled or if there
+ * is no physical address assigned.
+ */
+
+#define CEC_ADAP_G_LOG_ADDRS	_IOR('a',  3, struct cec_log_addrs)
+#define CEC_ADAP_S_LOG_ADDRS	_IOWR('a',  4, struct cec_log_addrs)
+
+/* Transmit/receive a CEC command */
+#define CEC_TRANSMIT		_IOWR('a',  5, struct cec_msg)
+#define CEC_RECEIVE		_IOWR('a',  6, struct cec_msg)
+
+/* Dequeue CEC events */
+#define CEC_DQEVENT		_IOWR('a',  7, struct cec_event)
+
+/*
+ * Get and set the message handling mode for this filehandle.
+ */
+#define CEC_G_MODE		_IOR('a',  8, __u32)
+#define CEC_S_MODE		_IOW('a',  9, __u32)
+
+/*
+ * The remainder of this header defines all CEC messages and operands.
+ * The format matters since it the cec-ctl utility parses it to generate
+ * code for implementing all these messages.
+ *
+ * Comments ending with 'Feature' group messages for each feature.
+ * If messages are part of multiple features, then the "Has also"
+ * comment is used to list the previously defined messages that are
+ * supported by the feature.
+ *
+ * Before operands are defined a comment is added that gives the
+ * name of the operand and in brackets the variable name of the
+ * corresponding argument in the cec-funcs.h function.
+ */
+
+/* Messages */
+
+/* One Touch Play Feature */
+#define CEC_MSG_ACTIVE_SOURCE				0x82
+#define CEC_MSG_IMAGE_VIEW_ON				0x04
+#define CEC_MSG_TEXT_VIEW_ON				0x0d
+
+
+/* Routing Control Feature */
+
+/*
+ * Has also:
+ *	CEC_MSG_ACTIVE_SOURCE
+ */
+
+#define CEC_MSG_INACTIVE_SOURCE				0x9d
+#define CEC_MSG_REQUEST_ACTIVE_SOURCE			0x85
+#define CEC_MSG_ROUTING_CHANGE				0x80
+#define CEC_MSG_ROUTING_INFORMATION			0x81
+#define CEC_MSG_SET_STREAM_PATH				0x86
+
+
+/* Standby Feature */
+#define CEC_MSG_STANDBY					0x36
+
+
+/* One Touch Record Feature */
+#define CEC_MSG_RECORD_OFF				0x0b
+#define CEC_MSG_RECORD_ON				0x09
+/* Record Source Type Operand (rec_src_type) */
+#define CEC_OP_RECORD_SRC_OWN				1
+#define CEC_OP_RECORD_SRC_DIGITAL			2
+#define CEC_OP_RECORD_SRC_ANALOG			3
+#define CEC_OP_RECORD_SRC_EXT_PLUG			4
+#define CEC_OP_RECORD_SRC_EXT_PHYS_ADDR			5
+/* Service Identification Method Operand (service_id_method) */
+#define CEC_OP_SERVICE_ID_METHOD_BY_DIG_ID		0
+#define CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL		1
+/* Digital Service Broadcast System Operand (dig_bcast_system) */
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_GEN	0x00
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_GEN	0x01
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_GEN		0x02
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_BS		0x08
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_CS		0x09
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ARIB_T		0x0a
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_CABLE	0x10
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_SAT	0x11
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_ATSC_T		0x12
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_C		0x18
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_S		0x19
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_S2		0x1a
+#define CEC_OP_DIG_SERVICE_BCAST_SYSTEM_DVB_T		0x1b
+/* Analogue Broadcast Type Operand (ana_bcast_type) */
+#define CEC_OP_ANA_BCAST_TYPE_CABLE			0
+#define CEC_OP_ANA_BCAST_TYPE_SATELLITE			1
+#define CEC_OP_ANA_BCAST_TYPE_TERRESTRIAL		2
+/* Broadcast System Operand (bcast_system) */
+#define CEC_OP_BCAST_SYSTEM_PAL_BG			0x00
+#define CEC_OP_BCAST_SYSTEM_SECAM_LQ			0x01 /* SECAM L' */
+#define CEC_OP_BCAST_SYSTEM_PAL_M			0x02
+#define CEC_OP_BCAST_SYSTEM_NTSC_M			0x03
+#define CEC_OP_BCAST_SYSTEM_PAL_I			0x04
+#define CEC_OP_BCAST_SYSTEM_SECAM_DK			0x05
+#define CEC_OP_BCAST_SYSTEM_SECAM_BG			0x06
+#define CEC_OP_BCAST_SYSTEM_SECAM_L			0x07
+#define CEC_OP_BCAST_SYSTEM_PAL_DK			0x08
+#define CEC_OP_BCAST_SYSTEM_OTHER			0x1f
+/* Channel Number Format Operand (channel_number_fmt) */
+#define CEC_OP_CHANNEL_NUMBER_FMT_1_PART		0x01
+#define CEC_OP_CHANNEL_NUMBER_FMT_2_PART		0x02
+
+#define CEC_MSG_RECORD_STATUS				0x0a
+/* Record Status Operand (rec_status) */
+#define CEC_OP_RECORD_STATUS_CUR_SRC			0x01
+#define CEC_OP_RECORD_STATUS_DIG_SERVICE		0x02
+#define CEC_OP_RECORD_STATUS_ANA_SERVICE		0x03
+#define CEC_OP_RECORD_STATUS_EXT_INPUT			0x04
+#define CEC_OP_RECORD_STATUS_NO_DIG_SERVICE		0x05
+#define CEC_OP_RECORD_STATUS_NO_ANA_SERVICE		0x06
+#define CEC_OP_RECORD_STATUS_NO_SERVICE			0x07
+#define CEC_OP_RECORD_STATUS_INVALID_EXT_PLUG		0x09
+#define CEC_OP_RECORD_STATUS_INVALID_EXT_PHYS_ADDR	0x0a
+#define CEC_OP_RECORD_STATUS_UNSUP_CA			0x0b
+#define CEC_OP_RECORD_STATUS_NO_CA_ENTITLEMENTS		0x0c
+#define CEC_OP_RECORD_STATUS_CANT_COPY_SRC		0x0d
+#define CEC_OP_RECORD_STATUS_NO_MORE_COPIES		0x0e
+#define CEC_OP_RECORD_STATUS_NO_MEDIA			0x10
+#define CEC_OP_RECORD_STATUS_PLAYING			0x11
+#define CEC_OP_RECORD_STATUS_ALREADY_RECORDING		0x12
+#define CEC_OP_RECORD_STATUS_MEDIA_PROT			0x13
+#define CEC_OP_RECORD_STATUS_NO_SIGNAL			0x14
+#define CEC_OP_RECORD_STATUS_MEDIA_PROBLEM		0x15
+#define CEC_OP_RECORD_STATUS_NO_SPACE			0x16
+#define CEC_OP_RECORD_STATUS_PARENTAL_LOCK		0x17
+#define CEC_OP_RECORD_STATUS_TERMINATED_OK		0x1a
+#define CEC_OP_RECORD_STATUS_ALREADY_TERM		0x1b
+#define CEC_OP_RECORD_STATUS_OTHER			0x1f
+
+#define CEC_MSG_RECORD_TV_SCREEN			0x0f
+
+
+/* Timer Programming Feature */
+#define CEC_MSG_CLEAR_ANALOGUE_TIMER			0x33
+/* Recording Sequence Operand (recording_seq) */
+#define CEC_OP_REC_SEQ_SUNDAY				0x01
+#define CEC_OP_REC_SEQ_MONDAY				0x02
+#define CEC_OP_REC_SEQ_TUESDAY				0x04
+#define CEC_OP_REC_SEQ_WEDNESDAY			0x08
+#define CEC_OP_REC_SEQ_THURSDAY				0x10
+#define CEC_OP_REC_SEQ_FRIDAY				0x20
+#define CEC_OP_REC_SEQ_SATERDAY				0x40
+#define CEC_OP_REC_SEQ_ONCE_ONLY			0x00
+
+#define CEC_MSG_CLEAR_DIGITAL_TIMER			0x99
+
+#define CEC_MSG_CLEAR_EXT_TIMER				0xa1
+/* External Source Specifier Operand (ext_src_spec) */
+#define CEC_OP_EXT_SRC_PLUG				0x04
+#define CEC_OP_EXT_SRC_PHYS_ADDR			0x05
+
+#define CEC_MSG_SET_ANALOGUE_TIMER			0x34
+#define CEC_MSG_SET_DIGITAL_TIMER			0x97
+#define CEC_MSG_SET_EXT_TIMER				0xa2
+
+#define CEC_MSG_SET_TIMER_PROGRAM_TITLE			0x67
+#define CEC_MSG_TIMER_CLEARED_STATUS			0x43
+/* Timer Cleared Status Data Operand (timer_cleared_status) */
+#define CEC_OP_TIMER_CLR_STAT_RECORDING			0x00
+#define CEC_OP_TIMER_CLR_STAT_NO_MATCHING		0x01
+#define CEC_OP_TIMER_CLR_STAT_NO_INFO			0x02
+#define CEC_OP_TIMER_CLR_STAT_CLEARED			0x80
+
+#define CEC_MSG_TIMER_STATUS				0x35
+/* Timer Overlap Warning Operand (timer_overlap_warning) */
+#define CEC_OP_TIMER_OVERLAP_WARNING_NO_OVERLAP		0
+#define CEC_OP_TIMER_OVERLAP_WARNING_OVERLAP		1
+/* Media Info Operand (media_info) */
+#define CEC_OP_MEDIA_INFO_UNPROT_MEDIA			0
+#define CEC_OP_MEDIA_INFO_PROT_MEDIA			1
+#define CEC_OP_MEDIA_INFO_NO_MEDIA			2
+/* Programmed Indicator Operand (prog_indicator) */
+#define CEC_OP_PROG_IND_NOT_PROGRAMMED			0
+#define CEC_OP_PROG_IND_PROGRAMMED			1
+/* Programmed Info Operand (prog_info) */
+#define CEC_OP_PROG_INFO_ENOUGH_SPACE			0x08
+#define CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE		0x09
+#define CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE	0x0b
+#define CEC_OP_PROG_INFO_NONE_AVAILABLE			0x0a
+/* Not Programmed Error Info Operand (prog_error) */
+#define CEC_OP_PROG_ERROR_NO_FREE_TIMER			0x01
+#define CEC_OP_PROG_ERROR_DATE_OUT_OF_RANGE		0x02
+#define CEC_OP_PROG_ERROR_REC_SEQ_ERROR			0x03
+#define CEC_OP_PROG_ERROR_INV_EXT_PLUG			0x04
+#define CEC_OP_PROG_ERROR_INV_EXT_PHYS_ADDR		0x05
+#define CEC_OP_PROG_ERROR_CA_UNSUPP			0x06
+#define CEC_OP_PROG_ERROR_INSUF_CA_ENTITLEMENTS		0x07
+#define CEC_OP_PROG_ERROR_RESOLUTION_UNSUPP		0x08
+#define CEC_OP_PROG_ERROR_PARENTAL_LOCK			0x09
+#define CEC_OP_PROG_ERROR_CLOCK_FAILURE			0x0a
+#define CEC_OP_PROG_ERROR_DUPLICATE			0x0e
+
+
+/* System Information Feature */
+#define CEC_MSG_CEC_VERSION				0x9e
+/* CEC Version Operand (cec_version) */
+#define CEC_OP_CEC_VERSION_1_3A				4
+#define CEC_OP_CEC_VERSION_1_4				5
+#define CEC_OP_CEC_VERSION_2_0				6
+
+#define CEC_MSG_GET_CEC_VERSION				0x9f
+#define CEC_MSG_GIVE_PHYSICAL_ADDR			0x83
+#define CEC_MSG_GET_MENU_LANGUAGE			0x91
+#define CEC_MSG_REPORT_PHYSICAL_ADDR			0x84
+/* Primary Device Type Operand (prim_devtype) */
+#define CEC_OP_PRIM_DEVTYPE_TV				0
+#define CEC_OP_PRIM_DEVTYPE_RECORD			1
+#define CEC_OP_PRIM_DEVTYPE_TUNER			3
+#define CEC_OP_PRIM_DEVTYPE_PLAYBACK			4
+#define CEC_OP_PRIM_DEVTYPE_AUDIOSYSTEM			5
+#define CEC_OP_PRIM_DEVTYPE_SWITCH			6
+#define CEC_OP_PRIM_DEVTYPE_PROCESSOR			7
+
+#define CEC_MSG_SET_MENU_LANGUAGE			0x32
+#define CEC_MSG_REPORT_FEATURES				0xa6	/* HDMI 2.0 */
+/* All Device Types Operand (all_device_types) */
+#define CEC_OP_ALL_DEVTYPE_TV				0x80
+#define CEC_OP_ALL_DEVTYPE_RECORD			0x40
+#define CEC_OP_ALL_DEVTYPE_TUNER			0x20
+#define CEC_OP_ALL_DEVTYPE_PLAYBACK			0x10
+#define CEC_OP_ALL_DEVTYPE_AUDIOSYSTEM			0x08
+#define CEC_OP_ALL_DEVTYPE_SWITCH			0x04
+/*
+ * And if you wondering what happened to PROCESSOR devices: those should
+ * be mapped to a SWITCH.
+ */
+
+/* Valid for RC Profile and Device Feature operands */
+#define CEC_OP_FEAT_EXT					0x80	/* Extension bit */
+/* RC Profile Operand (rc_profile) */
+#define CEC_OP_FEAT_RC_TV_PROFILE_NONE			0x00
+#define CEC_OP_FEAT_RC_TV_PROFILE_1			0x02
+#define CEC_OP_FEAT_RC_TV_PROFILE_2			0x06
+#define CEC_OP_FEAT_RC_TV_PROFILE_3			0x0a
+#define CEC_OP_FEAT_RC_TV_PROFILE_4			0x0e
+#define CEC_OP_FEAT_RC_SRC_HAS_DEV_ROOT_MENU		0x50
+#define CEC_OP_FEAT_RC_SRC_HAS_DEV_SETUP_MENU		0x48
+#define CEC_OP_FEAT_RC_SRC_HAS_CONTENTS_MENU		0x44
+#define CEC_OP_FEAT_RC_SRC_HAS_MEDIA_TOP_MENU		0x42
+#define CEC_OP_FEAT_RC_SRC_HAS_MEDIA_CONTEXT_MENU	0x41
+/* Device Feature Operand (dev_features) */
+#define CEC_OP_FEAT_DEV_HAS_RECORD_TV_SCREEN		0x40
+#define CEC_OP_FEAT_DEV_HAS_SET_OSD_STRING		0x20
+#define CEC_OP_FEAT_DEV_HAS_DECK_CONTROL		0x10
+#define CEC_OP_FEAT_DEV_HAS_SET_AUDIO_RATE		0x08
+#define CEC_OP_FEAT_DEV_SINK_HAS_ARC_TX			0x04
+#define CEC_OP_FEAT_DEV_SOURCE_HAS_ARC_RX		0x02
+
+#define CEC_MSG_GIVE_FEATURES				0xa5	/* HDMI 2.0 */
+
+
+/* Deck Control Feature */
+#define CEC_MSG_DECK_CONTROL				0x42
+/* Deck Control Mode Operand (deck_control_mode) */
+#define CEC_OP_DECK_CTL_MODE_SKIP_FWD			1
+#define CEC_OP_DECK_CTL_MODE_SKIP_REV			2
+#define CEC_OP_DECK_CTL_MODE_STOP			3
+#define CEC_OP_DECK_CTL_MODE_EJECT			4
+
+#define CEC_MSG_DECK_STATUS				0x1b
+/* Deck Info Operand (deck_info) */
+#define CEC_OP_DECK_INFO_PLAY				0x11
+#define CEC_OP_DECK_INFO_RECORD				0x12
+#define CEC_OP_DECK_INFO_PLAY_REV			0x13
+#define CEC_OP_DECK_INFO_STILL				0x14
+#define CEC_OP_DECK_INFO_SLOW				0x15
+#define CEC_OP_DECK_INFO_SLOW_REV			0x16
+#define CEC_OP_DECK_INFO_FAST_FWD			0x17
+#define CEC_OP_DECK_INFO_FAST_REV			0x18
+#define CEC_OP_DECK_INFO_NO_MEDIA			0x19
+#define CEC_OP_DECK_INFO_STOP				0x1a
+#define CEC_OP_DECK_INFO_SKIP_FWD			0x1b
+#define CEC_OP_DECK_INFO_SKIP_REV			0x1c
+#define CEC_OP_DECK_INFO_INDEX_SEARCH_FWD		0x1d
+#define CEC_OP_DECK_INFO_INDEX_SEARCH_REV		0x1e
+#define CEC_OP_DECK_INFO_OTHER				0x1f
+
+#define CEC_MSG_GIVE_DECK_STATUS			0x1a
+/* Status Request Operand (status_req) */
+#define CEC_OP_STATUS_REQ_ON				1
+#define CEC_OP_STATUS_REQ_OFF				2
+#define CEC_OP_STATUS_REQ_ONCE				3
+
+#define CEC_MSG_PLAY					0x41
+/* Play Mode Operand (play_mode) */
+#define CEC_OP_PLAY_MODE_PLAY_FWD			0x24
+#define CEC_OP_PLAY_MODE_PLAY_REV			0x20
+#define CEC_OP_PLAY_MODE_PLAY_STILL			0x25
+#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MIN		0x05
+#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MED		0x06
+#define CEC_OP_PLAY_MODE_PLAY_FAST_FWD_MAX		0x07
+#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MIN		0x09
+#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MED		0x0a
+#define CEC_OP_PLAY_MODE_PLAY_FAST_REV_MAX		0x0b
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MIN		0x15
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MED		0x16
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_FWD_MAX		0x17
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MIN		0x19
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MED		0x1a
+#define CEC_OP_PLAY_MODE_PLAY_SLOW_REV_MAX		0x1b
+
+
+/* Tuner Control Feature */
+#define CEC_MSG_GIVE_TUNER_DEVICE_STATUS		0x08
+#define CEC_MSG_SELECT_ANALOGUE_SERVICE			0x92
+#define CEC_MSG_SELECT_DIGITAL_SERVICE			0x93
+#define CEC_MSG_TUNER_DEVICE_STATUS			0x07
+/* Recording Flag Operand (rec_flag) */
+#define CEC_OP_REC_FLAG_USED				0
+#define CEC_OP_REC_FLAG_NOT_USED			1
+/* Tuner Display Info Operand (tuner_display_info) */
+#define CEC_OP_TUNER_DISPLAY_INFO_DIGITAL		0
+#define CEC_OP_TUNER_DISPLAY_INFO_NONE			1
+#define CEC_OP_TUNER_DISPLAY_INFO_ANALOGUE		2
+
+#define CEC_MSG_TUNER_STEP_DECREMENT			0x06
+#define CEC_MSG_TUNER_STEP_INCREMENT			0x05
+
+
+/* Vendor Specific Commands Feature */
+
+/*
+ * Has also:
+ *	CEC_MSG_CEC_VERSION
+ *	CEC_MSG_GET_CEC_VERSION
+ */
+#define CEC_MSG_DEVICE_VENDOR_ID			0x87
+#define CEC_MSG_GIVE_DEVICE_VENDOR_ID			0x8c
+#define CEC_MSG_VENDOR_COMMAND				0x89
+#define CEC_MSG_VENDOR_COMMAND_WITH_ID			0xa0
+#define CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN		0x8a
+#define CEC_MSG_VENDOR_REMOTE_BUTTON_UP			0x8b
+
+
+/* OSD Display Feature */
+#define CEC_MSG_SET_OSD_STRING				0x64
+/* Display Control Operand (disp_ctl) */
+#define CEC_OP_DISP_CTL_DEFAULT				0x00
+#define CEC_OP_DISP_CTL_UNTIL_CLEARED			0x40
+#define CEC_OP_DISP_CTL_CLEAR				0x80
+
+
+/* Device OSD Transfer Feature */
+#define CEC_MSG_GIVE_OSD_NAME				0x46
+#define CEC_MSG_SET_OSD_NAME				0x47
+
+
+/* Device Menu Control Feature */
+#define CEC_MSG_MENU_REQUEST				0x8d
+/* Menu Request Type Operand (menu_req) */
+#define CEC_OP_MENU_REQUEST_ACTIVATE			0x00
+#define CEC_OP_MENU_REQUEST_DEACTIVATE			0x01
+#define CEC_OP_MENU_REQUEST_QUERY			0x02
+
+#define CEC_MSG_MENU_STATUS				0x8e
+/* Menu State Operand (menu_state) */
+#define CEC_OP_MENU_STATE_ACTIVATED			0x00
+#define CEC_OP_MENU_STATE_DEACTIVATED			0x01
+
+#define CEC_MSG_USER_CONTROL_PRESSED			0x44
+/* UI Broadcast Type Operand (ui_bcast_type) */
+#define CEC_OP_UI_BCAST_TYPE_TOGGLE_ALL			0x00
+#define CEC_OP_UI_BCAST_TYPE_TOGGLE_DIG_ANA		0x01
+#define CEC_OP_UI_BCAST_TYPE_ANALOGUE			0x10
+#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_T			0x20
+#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_CABLE		0x30
+#define CEC_OP_UI_BCAST_TYPE_ANALOGUE_SAT		0x40
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL			0x50
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_T			0x60
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_CABLE		0x70
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_SAT		0x80
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_COM_SAT		0x90
+#define CEC_OP_UI_BCAST_TYPE_DIGITAL_COM_SAT2		0x91
+#define CEC_OP_UI_BCAST_TYPE_IP				0xa0
+/* UI Sound Presentation Control Operand (ui_snd_pres_ctl) */
+#define CEC_OP_UI_SND_PRES_CTL_DUAL_MONO		0x10
+#define CEC_OP_UI_SND_PRES_CTL_KARAOKE			0x20
+#define CEC_OP_UI_SND_PRES_CTL_DOWNMIX			0x80
+#define CEC_OP_UI_SND_PRES_CTL_REVERB			0x90
+#define CEC_OP_UI_SND_PRES_CTL_EQUALIZER		0xa0
+#define CEC_OP_UI_SND_PRES_CTL_BASS_UP			0xb1
+#define CEC_OP_UI_SND_PRES_CTL_BASS_NEUTRAL		0xb2
+#define CEC_OP_UI_SND_PRES_CTL_BASS_DOWN		0xb3
+#define CEC_OP_UI_SND_PRES_CTL_TREBLE_UP		0xc1
+#define CEC_OP_UI_SND_PRES_CTL_TREBLE_NEUTRAL		0xc2
+#define CEC_OP_UI_SND_PRES_CTL_TREBLE_DOWN		0xc3
+
+#define CEC_MSG_USER_CONTROL_RELEASED			0x45
+
+
+/* Remote Control Passthrough Feature */
+
+/*
+ * Has also:
+ *	CEC_MSG_USER_CONTROL_PRESSED
+ *	CEC_MSG_USER_CONTROL_RELEASED
+ */
+
+
+/* Power Status Feature */
+#define CEC_MSG_GIVE_DEVICE_POWER_STATUS		0x8f
+#define CEC_MSG_REPORT_POWER_STATUS			0x90
+/* Power Status Operand (pwr_state) */
+#define CEC_OP_POWER_STATUS_ON				0
+#define CEC_OP_POWER_STATUS_STANDBY			1
+#define CEC_OP_POWER_STATUS_TO_ON			2
+#define CEC_OP_POWER_STATUS_TO_STANDBY			3
+
+
+/* General Protocol Messages */
+#define CEC_MSG_FEATURE_ABORT				0x00
+/* Abort Reason Operand (reason) */
+#define CEC_OP_ABORT_UNRECOGNIZED_OP			0
+#define CEC_OP_ABORT_INCORRECT_MODE			1
+#define CEC_OP_ABORT_NO_SOURCE				2
+#define CEC_OP_ABORT_INVALID_OP				3
+#define CEC_OP_ABORT_REFUSED				4
+#define CEC_OP_ABORT_UNDETERMINED			5
+
+#define CEC_MSG_ABORT					0xff
+
+
+/* System Audio Control Feature */
+
+/*
+ * Has also:
+ *	CEC_MSG_USER_CONTROL_PRESSED
+ *	CEC_MSG_USER_CONTROL_RELEASED
+ */
+#define CEC_MSG_GIVE_AUDIO_STATUS			0x71
+#define CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS		0x7d
+#define CEC_MSG_REPORT_AUDIO_STATUS			0x7a
+/* Audio Mute Status Operand (aud_mute_status) */
+#define CEC_OP_AUD_MUTE_STATUS_OFF			0
+#define CEC_OP_AUD_MUTE_STATUS_ON			1
+
+#define CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR		0xa3
+#define CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR		0xa4
+#define CEC_MSG_SET_SYSTEM_AUDIO_MODE			0x72
+/* System Audio Status Operand (sys_aud_status) */
+#define CEC_OP_SYS_AUD_STATUS_OFF			0
+#define CEC_OP_SYS_AUD_STATUS_ON			1
+
+#define CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST		0x70
+#define CEC_MSG_SYSTEM_AUDIO_MODE_STATUS		0x7e
+/* Audio Format ID Operand (audio_format_id) */
+#define CEC_OP_AUD_FMT_ID_CEA861			0
+#define CEC_OP_AUD_FMT_ID_CEA861_CXT			1
+
+
+/* Audio Rate Control Feature */
+#define CEC_MSG_SET_AUDIO_RATE				0x9a
+/* Audio Rate Operand (audio_rate) */
+#define CEC_OP_AUD_RATE_OFF				0
+#define CEC_OP_AUD_RATE_WIDE_STD			1
+#define CEC_OP_AUD_RATE_WIDE_FAST			2
+#define CEC_OP_AUD_RATE_WIDE_SLOW			3
+#define CEC_OP_AUD_RATE_NARROW_STD			4
+#define CEC_OP_AUD_RATE_NARROW_FAST			5
+#define CEC_OP_AUD_RATE_NARROW_SLOW			6
+
+
+/* Audio Return Channel Control Feature */
+#define CEC_MSG_INITIATE_ARC				0xc0
+#define CEC_MSG_REPORT_ARC_INITIATED			0xc1
+#define CEC_MSG_REPORT_ARC_TERMINATED			0xc2
+#define CEC_MSG_REQUEST_ARC_INITIATION			0xc3
+#define CEC_MSG_REQUEST_ARC_TERMINATION			0xc4
+#define CEC_MSG_TERMINATE_ARC				0xc5
+
+
+/* Dynamic Audio Lipsync Feature */
+/* Only for CEC 2.0 and up */
+#define CEC_MSG_REQUEST_CURRENT_LATENCY			0xa7
+#define CEC_MSG_REPORT_CURRENT_LATENCY			0xa8
+/* Low Latency Mode Operand (low_latency_mode) */
+#define CEC_OP_LOW_LATENCY_MODE_OFF			0
+#define CEC_OP_LOW_LATENCY_MODE_ON			1
+/* Audio Output Compensated Operand (audio_out_compensated) */
+#define CEC_OP_AUD_OUT_COMPENSATED_NA			0
+#define CEC_OP_AUD_OUT_COMPENSATED_DELAY		1
+#define CEC_OP_AUD_OUT_COMPENSATED_NO_DELAY		2
+#define CEC_OP_AUD_OUT_COMPENSATED_PARTIAL_DELAY	3
+
+
+/* Capability Discovery and Control Feature */
+#define CEC_MSG_CDC_MESSAGE				0xf8
+/* Ethernet-over-HDMI: nobody ever does this... */
+#define CEC_MSG_CDC_HEC_INQUIRE_STATE			0x00
+#define CEC_MSG_CDC_HEC_REPORT_STATE			0x01
+/* HEC Functionality State Operand (hec_func_state) */
+#define CEC_OP_HEC_FUNC_STATE_NOT_SUPPORTED		0
+#define CEC_OP_HEC_FUNC_STATE_INACTIVE			1
+#define CEC_OP_HEC_FUNC_STATE_ACTIVE			2
+#define CEC_OP_HEC_FUNC_STATE_ACTIVATION_FIELD		3
+/* Host Functionality State Operand (host_func_state) */
+#define CEC_OP_HOST_FUNC_STATE_NOT_SUPPORTED		0
+#define CEC_OP_HOST_FUNC_STATE_INACTIVE			1
+#define CEC_OP_HOST_FUNC_STATE_ACTIVE			2
+/* ENC Functionality State Operand (enc_func_state) */
+#define CEC_OP_ENC_FUNC_STATE_EXT_CON_NOT_SUPPORTED	0
+#define CEC_OP_ENC_FUNC_STATE_EXT_CON_INACTIVE		1
+#define CEC_OP_ENC_FUNC_STATE_EXT_CON_ACTIVE		2
+/* CDC Error Code Operand (cdc_errcode) */
+#define CEC_OP_CDC_ERROR_CODE_NONE			0
+#define CEC_OP_CDC_ERROR_CODE_CAP_UNSUPPORTED		1
+#define CEC_OP_CDC_ERROR_CODE_WRONG_STATE		2
+#define CEC_OP_CDC_ERROR_CODE_OTHER			3
+/* HEC Support Operand (hec_support) */
+#define CEC_OP_HEC_SUPPORT_NO				0
+#define CEC_OP_HEC_SUPPORT_YES				1
+/* HEC Activation Operand (hec_activation) */
+#define CEC_OP_HEC_ACTIVATION_ON			0
+#define CEC_OP_HEC_ACTIVATION_OFF			1
+
+#define CEC_MSG_CDC_HEC_SET_STATE_ADJACENT		0x02
+#define CEC_MSG_CDC_HEC_SET_STATE			0x03
+/* HEC Set State Operand (hec_set_state) */
+#define CEC_OP_HEC_SET_STATE_DEACTIVATE			0
+#define CEC_OP_HEC_SET_STATE_ACTIVATE			1
+
+#define CEC_MSG_CDC_HEC_REQUEST_DEACTIVATION		0x04
+#define CEC_MSG_CDC_HEC_NOTIFY_ALIVE			0x05
+#define CEC_MSG_CDC_HEC_DISCOVER			0x06
+/* Hotplug Detect messages */
+#define CEC_MSG_CDC_HPD_SET_STATE			0x10
+/* HPD State Operand (hpd_state) */
+#define CEC_OP_HPD_STATE_CP_EDID_DISABLE		0
+#define CEC_OP_HPD_STATE_CP_EDID_ENABLE			1
+#define CEC_OP_HPD_STATE_CP_EDID_DISABLE_ENABLE		2
+#define CEC_OP_HPD_STATE_EDID_DISABLE			3
+#define CEC_OP_HPD_STATE_EDID_ENABLE			4
+#define CEC_OP_HPD_STATE_EDID_DISABLE_ENABLE		5
+#define CEC_MSG_CDC_HPD_REPORT_STATE			0x11
+/* HPD Error Code Operand (hpd_error) */
+#define CEC_OP_HPD_ERROR_NONE				0
+#define CEC_OP_HPD_ERROR_INITIATOR_NOT_CAPABLE		1
+#define CEC_OP_HPD_ERROR_INITIATOR_WRONG_STATE		2
+#define CEC_OP_HPD_ERROR_OTHER				3
+#define CEC_OP_HPD_ERROR_NONE_NO_VIDEO			4
+
+#endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a20320c666fd..984f73b719a9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -87,6 +87,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
 						       struct cgroup_subsys *ss);
 
 struct cgroup *cgroup_get_from_path(const char *path);
+struct cgroup *cgroup_get_from_fd(int fd);
 
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 0df4a51e1a78..834179f3fa72 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -461,6 +461,10 @@ static inline struct clk *clk_get_parent(struct clk *clk)
 	return NULL;
 }
 
+static inline struct clk *clk_get_sys(const char *dev_id, const char *con_id)
+{
+	return NULL;
+}
 #endif
 
 /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 44a1aff22566..08398182f56e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -244,7 +244,7 @@ extern int clocksource_mmio_init(void __iomem *, const char *,
 extern int clocksource_i8253_init(void);
 
 #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \
-	OF_DECLARE_1(clksrc, name, compat, fn)
+	OF_DECLARE_1_RET(clksrc, name, compat, fn)
 
 #ifdef CONFIG_CLKSRC_PROBE
 extern void clocksource_probe(void);
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index a58c852a268f..1a02dab16646 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -212,6 +212,7 @@ static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_i
 #endif /* CONFIG_COMPACTION */
 
 #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
+struct node;
 extern int compaction_register_node(struct node *node);
 extern void compaction_unregister_node(struct node *node);
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 793c0829e3a3..2e853b679a5d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -304,23 +304,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	__u.__val;					\
 })
 
-/**
- * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
- * @cond: boolean expression to wait for
- *
- * Equivalent to using smp_load_acquire() on the condition variable but employs
- * the control dependency of the wait to reduce the barrier on many platforms.
- *
- * The control dependency provides a LOAD->STORE order, the additional RMB
- * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
- * aka. ACQUIRE.
- */
-#define smp_cond_acquire(cond)	do {		\
-	while (!(cond))				\
-		cpu_relax();			\
-	smp_rmb(); /* ctrl + rmb := acquire */	\
-} while (0)
-
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
@@ -545,10 +528,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * Similar to rcu_dereference(), but for situations where the pointed-to
  * object's lifetime is managed by something other than RCU.  That
  * "something other" might be reference counting or simple immortality.
+ *
+ * The seemingly unused void * variable is to validate @p is indeed a pointer
+ * type. All pointer types silently cast to void *.
  */
 #define lockless_dereference(p) \
 ({ \
 	typeof(p) _________p1 = READ_ONCE(p); \
+	__maybe_unused const void * const _________p2 = _________p1; \
 	smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
 	(_________p1); \
 })
diff --git a/include/linux/console.h b/include/linux/console.h
index 98c8615dc300..d530c4627e54 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -28,6 +28,13 @@ struct tty_struct;
 #define VT100ID "\033[?1;2c"
 #define VT102ID "\033[?6c"
 
+/**
+ * struct consw - callbacks for consoles
+ *
+ * @con_set_palette: sets the palette of the console to @table (optional)
+ * @con_scrolldelta: the contents of the console should be scrolled by @lines.
+ *		     Invoked by user. (optional)
+ */
 struct consw {
 	struct module *owner;
 	const char *(*con_startup)(void);
@@ -38,7 +45,6 @@ struct consw {
 	void	(*con_putcs)(struct vc_data *, const unsigned short *, int, int, int);
 	void	(*con_cursor)(struct vc_data *, int);
 	int	(*con_scroll)(struct vc_data *, int, int, int, int);
-	void	(*con_bmove)(struct vc_data *, int, int, int, int, int, int);
 	int	(*con_switch)(struct vc_data *);
 	int	(*con_blank)(struct vc_data *, int, int);
 	int	(*con_font_set)(struct vc_data *, struct console_font *, unsigned);
@@ -47,8 +53,9 @@ struct consw {
 	int	(*con_font_copy)(struct vc_data *, int);
 	int     (*con_resize)(struct vc_data *, unsigned int, unsigned int,
 			       unsigned int);
-	int	(*con_set_palette)(struct vc_data *, const unsigned char *);
-	int	(*con_scrolldelta)(struct vc_data *, int);
+	void	(*con_set_palette)(struct vc_data *,
+			const unsigned char *table);
+	void	(*con_scrolldelta)(struct vc_data *, int lines);
 	int	(*con_set_origin)(struct vc_data *);
 	void	(*con_save_screen)(struct vc_data *);
 	u8	(*con_build_attr)(struct vc_data *, u8, u8, u8, u8, u8, u8);
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index e329ee2667e1..6fd3c908a340 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -21,6 +21,38 @@ struct uni_pagedir;
 
 #define NPAR 16
 
+/*
+ * Example: vc_data of a console that was scrolled 3 lines down.
+ *
+ *                              Console buffer
+ * vc_screenbuf ---------> +----------------------+-.
+ *                         | initializing W       |  \
+ *                         | initializing X       |   |
+ *                         | initializing Y       |    > scroll-back area
+ *                         | initializing Z       |   |
+ *                         |                      |  /
+ * vc_visible_origin ---> ^+----------------------+-:
+ * (changes by scroll)    || Welcome to linux     |  \
+ *                        ||                      |   |
+ *           vc_rows --->< | login: root          |   |  visible on console
+ *                        || password:            |    > (vc_screenbuf_size is
+ * vc_origin -----------> ||                      |   |   vc_size_row * vc_rows)
+ * (start when no scroll) || Last login: 12:28    |  /
+ *                        v+----------------------+-:
+ *                         | Have a lot of fun... |  \
+ * vc_pos -----------------|--------v             |   > scroll-front area
+ *                         | ~ # cat_             |  /
+ * vc_scr_end -----------> +----------------------+-:
+ * (vc_origin +            |                      |  \ EMPTY, to be filled by
+ *  vc_screenbuf_size)     |                      |  / vc_video_erase_char
+ *                         +----------------------+-'
+ *                         <---- 2 * vc_cols ----->
+ *                         <---- vc_size_row ----->
+ *
+ * Note that every character in the console buffer is accompanied with an
+ * attribute in the buffer right after the character. This is not depicted
+ * in the figure.
+ */
 struct vc_data {
 	struct tty_port port;			/* Upper level data */
 
@@ -74,7 +106,6 @@ struct vc_data {
 	unsigned int	vc_decawm	: 1;	/* Autowrap Mode */
 	unsigned int	vc_deccm	: 1;	/* Cursor Visible */
 	unsigned int	vc_decim	: 1;	/* Insert Mode */
-	unsigned int	vc_deccolm	: 1;	/* 80/132 Column Mode */
 	/* attribute flags */
 	unsigned int	vc_intensity	: 2;	/* 0=half-bright, 1=normal, 2=bold */
 	unsigned int    vc_italic:1;
@@ -136,6 +167,9 @@ extern void vc_SAK(struct work_struct *work);
 
 #define CUR_DEFAULT CUR_UNDERLINE
 
-#define CON_IS_VISIBLE(conp) (*conp->vc_display_fg == conp)
+static inline bool con_is_visible(const struct vc_data *vc)
+{
+	return *vc->vc_display_fg == vc;
+}
 
 #endif /* _LINUX_CONSOLE_STRUCT_H */
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d259274238db..d9aef2a0ec8e 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -31,6 +31,19 @@ static inline void user_exit(void)
 		context_tracking_exit(CONTEXT_USER);
 }
 
+/* Called with interrupts disabled.  */
+static inline void user_enter_irqoff(void)
+{
+	if (context_tracking_is_enabled())
+		__context_tracking_enter(CONTEXT_USER);
+
+}
+static inline void user_exit_irqoff(void)
+{
+	if (context_tracking_is_enabled())
+		__context_tracking_exit(CONTEXT_USER);
+}
+
 static inline enum ctx_state exception_enter(void)
 {
 	enum ctx_state prev_ctx;
@@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void)
 #else
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline void user_enter_irqoff(void) { }
+static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 4e81e08db752..631ba33bbe9f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -36,6 +36,12 @@
 
 struct cpufreq_governor;
 
+enum cpufreq_table_sorting {
+	CPUFREQ_TABLE_UNSORTED,
+	CPUFREQ_TABLE_SORTED_ASCENDING,
+	CPUFREQ_TABLE_SORTED_DESCENDING
+};
+
 struct cpufreq_freqs {
 	unsigned int cpu;	/* cpu nr */
 	unsigned int old;
@@ -87,6 +93,7 @@ struct cpufreq_policy {
 
 	struct cpufreq_user_policy user_policy;
 	struct cpufreq_frequency_table	*freq_table;
+	enum cpufreq_table_sorting freq_table_sorted;
 
 	struct list_head        policy_list;
 	struct kobject		kobj;
@@ -113,6 +120,10 @@ struct cpufreq_policy {
 	bool			fast_switch_possible;
 	bool			fast_switch_enabled;
 
+	 /* Cached frequency lookup from cpufreq_driver_resolve_freq. */
+	unsigned int cached_target_freq;
+	int cached_resolved_idx;
+
 	/* Synchronization for frequency transitions */
 	bool			transition_ongoing; /* Tracks transition status */
 	spinlock_t		transition_lock;
@@ -185,6 +196,18 @@ static inline unsigned int cpufreq_quick_get_max(unsigned int cpu)
 static inline void disable_cpufreq(void) { }
 #endif
 
+#ifdef CONFIG_CPU_FREQ_STAT
+void cpufreq_stats_create_table(struct cpufreq_policy *policy);
+void cpufreq_stats_free_table(struct cpufreq_policy *policy);
+void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
+				     unsigned int new_freq);
+#else
+static inline void cpufreq_stats_create_table(struct cpufreq_policy *policy) { }
+static inline void cpufreq_stats_free_table(struct cpufreq_policy *policy) { }
+static inline void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
+						   unsigned int new_freq) { }
+#endif /* CONFIG_CPU_FREQ_STAT */
+
 /*********************************************************************
  *                      CPUFREQ DRIVER INTERFACE                     *
  *********************************************************************/
@@ -251,6 +274,16 @@ struct cpufreq_driver {
 					unsigned int index);
 	unsigned int	(*fast_switch)(struct cpufreq_policy *policy,
 				       unsigned int target_freq);
+
+	/*
+	 * Caches and returns the lowest driver-supported frequency greater than
+	 * or equal to the target frequency, subject to any driver limitations.
+	 * Does not set the frequency. Only to be implemented for drivers with
+	 * target().
+	 */
+	unsigned int	(*resolve_freq)(struct cpufreq_policy *policy,
+					unsigned int target_freq);
+
 	/*
 	 * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
 	 * unset.
@@ -455,18 +488,13 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
 #define MIN_LATENCY_MULTIPLIER		(20)
 #define TRANSITION_LATENCY_LIMIT	(10 * 1000 * 1000)
 
-/* Governor Events */
-#define CPUFREQ_GOV_START	1
-#define CPUFREQ_GOV_STOP	2
-#define CPUFREQ_GOV_LIMITS	3
-#define CPUFREQ_GOV_POLICY_INIT	4
-#define CPUFREQ_GOV_POLICY_EXIT	5
-
 struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
-	int	initialized;
-	int	(*governor)	(struct cpufreq_policy *policy,
-				 unsigned int event);
+	int	(*init)(struct cpufreq_policy *policy);
+	void	(*exit)(struct cpufreq_policy *policy);
+	int	(*start)(struct cpufreq_policy *policy);
+	void	(*stop)(struct cpufreq_policy *policy);
+	void	(*limits)(struct cpufreq_policy *policy);
 	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
 					 char *buf);
 	int	(*store_setspeed)	(struct cpufreq_policy *policy,
@@ -487,12 +515,22 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
 int __cpufreq_driver_target(struct cpufreq_policy *policy,
 				   unsigned int target_freq,
 				   unsigned int relation);
+unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
+					 unsigned int target_freq);
 int cpufreq_register_governor(struct cpufreq_governor *governor);
 void cpufreq_unregister_governor(struct cpufreq_governor *governor);
 
 struct cpufreq_governor *cpufreq_default_governor(void);
 struct cpufreq_governor *cpufreq_fallback_governor(void);
 
+static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy)
+{
+	if (policy->max < policy->cur)
+		__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+	else if (policy->min > policy->cur)
+		__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
+}
+
 /* Governor attribute set */
 struct gov_attr_set {
 	struct kobject kobj;
@@ -582,11 +620,9 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 				   struct cpufreq_frequency_table *table);
 int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy);
 
-int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-				   struct cpufreq_frequency_table *table,
-				   unsigned int target_freq,
-				   unsigned int relation,
-				   unsigned int *index);
+int cpufreq_table_index_unsorted(struct cpufreq_policy *policy,
+				 unsigned int target_freq,
+				 unsigned int relation);
 int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy,
 		unsigned int freq);
 
@@ -597,6 +633,227 @@ int cpufreq_boost_trigger_state(int state);
 int cpufreq_boost_enabled(void);
 int cpufreq_enable_boost_support(void);
 bool policy_has_boost_freq(struct cpufreq_policy *policy);
+
+/* Find lowest freq at or above target in a table in ascending order */
+static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq >= target_freq)
+			return i;
+
+		best = i;
+	}
+
+	return best;
+}
+
+/* Find lowest freq at or above target in a table in descending order */
+static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq == target_freq)
+			return i;
+
+		if (freq > target_freq) {
+			best = i;
+			continue;
+		}
+
+		/* No freq found above target_freq */
+		if (best == -1)
+			return i;
+
+		return best;
+	}
+
+	return best;
+}
+
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy,
+					     unsigned int target_freq)
+{
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
+
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
+		return cpufreq_table_find_index_al(policy, target_freq);
+	else
+		return cpufreq_table_find_index_dl(policy, target_freq);
+}
+
+/* Find highest freq at or below target in a table in ascending order */
+static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq == target_freq)
+			return i;
+
+		if (freq < target_freq) {
+			best = i;
+			continue;
+		}
+
+		/* No freq found below target_freq */
+		if (best == -1)
+			return i;
+
+		return best;
+	}
+
+	return best;
+}
+
+/* Find highest freq at or below target in a table in descending order */
+static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq <= target_freq)
+			return i;
+
+		best = i;
+	}
+
+	return best;
+}
+
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy,
+					     unsigned int target_freq)
+{
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
+
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
+		return cpufreq_table_find_index_ah(policy, target_freq);
+	else
+		return cpufreq_table_find_index_dh(policy, target_freq);
+}
+
+/* Find closest freq to target in a table in ascending order */
+static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq == target_freq)
+			return i;
+
+		if (freq < target_freq) {
+			best = i;
+			continue;
+		}
+
+		/* No freq found below target_freq */
+		if (best == -1)
+			return i;
+
+		/* Choose the closest freq */
+		if (target_freq - table[best].frequency > freq - target_freq)
+			return i;
+
+		return best;
+	}
+
+	return best;
+}
+
+/* Find closest freq to target in a table in descending order */
+static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq == target_freq)
+			return i;
+
+		if (freq > target_freq) {
+			best = i;
+			continue;
+		}
+
+		/* No freq found above target_freq */
+		if (best == -1)
+			return i;
+
+		/* Choose the closest freq */
+		if (table[best].frequency - target_freq > target_freq - freq)
+			return i;
+
+		return best;
+	}
+
+	return best;
+}
+
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
+					     unsigned int target_freq)
+{
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
+
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
+		return cpufreq_table_find_index_ac(policy, target_freq);
+	else
+		return cpufreq_table_find_index_dc(policy, target_freq);
+}
+
+static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
+						 unsigned int target_freq,
+						 unsigned int relation)
+{
+	if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))
+		return cpufreq_table_index_unsorted(policy, target_freq,
+						    relation);
+
+	switch (relation) {
+	case CPUFREQ_RELATION_L:
+		return cpufreq_table_find_index_l(policy, target_freq);
+	case CPUFREQ_RELATION_H:
+		return cpufreq_table_find_index_h(policy, target_freq);
+	case CPUFREQ_RELATION_C:
+		return cpufreq_table_find_index_c(policy, target_freq);
+	default:
+		pr_err("%s: Invalid relation: %d\n", __func__, relation);
+		return -EINVAL;
+	}
+}
 #else
 static inline int cpufreq_boost_trigger_state(int state)
 {
@@ -617,8 +874,6 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
 	return false;
 }
 #endif
-/* the following funtion is for cpufreq core use only */
-struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 07b83d32f66c..bb31373c3478 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -252,4 +252,22 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 #define CPUIDLE_DRIVER_STATE_START	0
 #endif
 
+#define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx)	\
+({								\
+	int __ret;						\
+								\
+	if (!idx) {						\
+		cpu_do_idle();					\
+		return idx;					\
+	}							\
+								\
+	__ret = cpu_pm_enter();					\
+	if (!__ret) {						\
+		__ret = low_level_idle_enter(idx);		\
+		cpu_pm_exit();					\
+	}							\
+								\
+	__ret ? -1 : idx;					\
+})
+
 #endif /* _LINUX_CPUIDLE_H */
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 6e28c895c376..7cee5551625b 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -47,16 +47,18 @@
 #define CRYPTO_ALG_TYPE_AEAD		0x00000003
 #define CRYPTO_ALG_TYPE_BLKCIPHER	0x00000004
 #define CRYPTO_ALG_TYPE_ABLKCIPHER	0x00000005
+#define CRYPTO_ALG_TYPE_SKCIPHER	0x00000005
 #define CRYPTO_ALG_TYPE_GIVCIPHER	0x00000006
-#define CRYPTO_ALG_TYPE_DIGEST		0x00000008
-#define CRYPTO_ALG_TYPE_HASH		0x00000008
-#define CRYPTO_ALG_TYPE_SHASH		0x00000009
-#define CRYPTO_ALG_TYPE_AHASH		0x0000000a
+#define CRYPTO_ALG_TYPE_KPP		0x00000008
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
 #define CRYPTO_ALG_TYPE_AKCIPHER	0x0000000d
+#define CRYPTO_ALG_TYPE_DIGEST		0x0000000e
+#define CRYPTO_ALG_TYPE_HASH		0x0000000e
+#define CRYPTO_ALG_TYPE_SHASH		0x0000000e
+#define CRYPTO_ALG_TYPE_AHASH		0x0000000f
 
 #define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
-#define CRYPTO_ALG_TYPE_AHASH_MASK	0x0000000c
+#define CRYPTO_ALG_TYPE_AHASH_MASK	0x0000000e
 #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK	0x0000000c
 
 #define CRYPTO_ALG_LARVAL		0x00000010
@@ -486,8 +488,6 @@ struct ablkcipher_tfm {
 	              unsigned int keylen);
 	int (*encrypt)(struct ablkcipher_request *req);
 	int (*decrypt)(struct ablkcipher_request *req);
-	int (*givencrypt)(struct skcipher_givcrypt_request *req);
-	int (*givdecrypt)(struct skcipher_givcrypt_request *req);
 
 	struct crypto_ablkcipher *base;
 
@@ -712,23 +712,6 @@ static inline u32 crypto_skcipher_mask(u32 mask)
  * state information is unused by the kernel crypto API.
  */
 
-/**
- * crypto_alloc_ablkcipher() - allocate asynchronous block cipher handle
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- *	      ablkcipher cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Allocate a cipher handle for an ablkcipher. The returned struct
- * crypto_ablkcipher is the cipher handle that is required for any subsequent
- * API invocation for that ablkcipher.
- *
- * Return: allocated cipher handle in case of success; IS_ERR() is true in case
- *	   of an error, PTR_ERR() returns the error code.
- */
-struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name,
-						  u32 type, u32 mask);
-
 static inline struct crypto_tfm *crypto_ablkcipher_tfm(
 	struct crypto_ablkcipher *tfm)
 {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 43d5f0b799c7..9c6dc7704043 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -14,7 +14,6 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
-int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 				   pgoff_t index, bool wake_all);
@@ -46,19 +45,15 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE)
 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
 				unsigned int flags, get_block_t);
-int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
-				unsigned int flags, get_block_t);
 #else
 static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 				pmd_t *pmd, unsigned int flags, get_block_t gb)
 {
 	return VM_FAULT_FALLBACK;
 }
-#define __dax_pmd_fault dax_pmd_fault
 #endif
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
 #define dax_mkwrite(vma, vmf, gb)	dax_fault(vma, vmf, gb)
-#define __dax_mkwrite(vma, vmf, gb)	__dax_fault(vma, vmf, gb)
 
 static inline bool vma_is_dax(struct vm_area_struct *vma)
 {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f28100f6b556..f53fa055021a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -212,6 +212,7 @@ struct dentry_operations {
 #define DCACHE_OP_REAL			0x08000000
 
 #define DCACHE_PAR_LOOKUP		0x10000000 /* being looked up (with parent locked shared) */
+#define DCACHE_DENTRY_CURSOR		0x20000000
 
 extern seqlock_t rename_lock;
 
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 46056cb161fc..d82bf1994485 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -38,7 +38,7 @@ struct debug_obj {
  * @name:		name of the object typee
  * @debug_hint:		function returning address, which have associated
  *			kernel symbol, to allow identify the object
- * @is_static_object	return true if the obj is static, otherwise return false
+ * @is_static_object:	return true if the obj is static, otherwise return false
  * @fixup_init:		fixup function, which is called when the init check
  *			fails. All fixup functions must return true if fixup
  *			was successful, otherwise return false
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0830c9e86f0d..b0db857f334b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -19,6 +19,15 @@ struct dm_table;
 struct mapped_device;
 struct bio_vec;
 
+/*
+ * Type of table, mapped_device's mempool and request_queue
+ */
+#define DM_TYPE_NONE			0
+#define DM_TYPE_BIO_BASED		1
+#define DM_TYPE_REQUEST_BASED		2
+#define DM_TYPE_MQ_REQUEST_BASED	3
+#define DM_TYPE_DAX_BIO_BASED		4
+
 typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
 
 union map_info {
@@ -116,6 +125,14 @@ typedef void (*dm_io_hints_fn) (struct dm_target *ti,
  */
 typedef int (*dm_busy_fn) (struct dm_target *ti);
 
+/*
+ * Returns:
+ *  < 0 : error
+ * >= 0 : the number of bytes accessible at the address
+ */
+typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
+				     void __pmem **kaddr, pfn_t *pfn, long size);
+
 void dm_error(const char *message);
 
 struct dm_dev {
@@ -162,6 +179,7 @@ struct target_type {
 	dm_busy_fn busy;
 	dm_iterate_devices_fn iterate_devices;
 	dm_io_hints_fn io_hints;
+	dm_direct_access_fn direct_access;
 
 	/* For internal device-mapper use. */
 	struct list_head list;
@@ -443,6 +461,14 @@ int dm_table_add_target(struct dm_table *t, const char *type,
  */
 void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb);
 
+/*
+ * Target can use this to set the table's type.
+ * Can only ever be called from a target's ctr.
+ * Useful for "hybrid" target (supports both bio-based
+ * and request-based).
+ */
+void dm_table_set_type(struct dm_table *t, unsigned type);
+
 /*
  * Finally call this to make the table ready for use.
  */
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index a68cbe59e6ad..b91b023deffb 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -57,7 +57,8 @@ struct dm_io_notify {
  */
 struct dm_io_client;
 struct dm_io_request {
-	int bi_rw;			/* READ|WRITE - not READA */
+	int bi_op;			/* REQ_OP */
+	int bi_op_flags;		/* rq_flag_bits */
 	struct dm_io_memory mem;	/* Memory to use for io */
 	struct dm_io_notify notify;	/* Synchronous if notify.fn is NULL */
 	struct dm_io_client *client;	/* Client memory handler */
diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h
index 79df69dc629c..aaff68efba5d 100644
--- a/include/linux/dma/hsu.h
+++ b/include/linux/dma/hsu.h
@@ -39,14 +39,22 @@ struct hsu_dma_chip {
 
 #if IS_ENABLED(CONFIG_HSU_DMA)
 /* Export to the internal users */
-irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr);
+int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr,
+		       u32 *status);
+irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr,
+			   u32 status);
 
 /* Export to the platform drivers */
 int hsu_dma_probe(struct hsu_dma_chip *chip);
 int hsu_dma_remove(struct hsu_dma_chip *chip);
 #else
-static inline irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip,
-				      unsigned short nr)
+static inline int hsu_dma_get_status(struct hsu_dma_chip *chip,
+				     unsigned short nr, u32 *status)
+{
+	return 0;
+}
+static inline irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip,
+					 unsigned short nr, u32 status)
 {
 	return IRQ_NONE;
 }
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index d6b3c9943a2c..002611c85318 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -51,7 +51,7 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.6"
+#define REL_VERSION "8.4.7"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
@@ -370,6 +370,14 @@ enum drbd_notification_type {
 	NOTIFY_FLAGS = NOTIFY_CONTINUES,
 };
 
+enum drbd_peer_state {
+	P_INCONSISTENT = 3,
+	P_OUTDATED = 4,
+	P_DOWN = 5,
+	P_PRIMARY = 6,
+	P_FENCING = 7,
+};
+
 #define UUID_JUST_CREATED ((__u64)4)
 
 enum write_ordering_e {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 2d0e5ad5de9d..c934d3a96b5e 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -123,15 +123,16 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
 	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
 	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
+	__u32_field_def(20,     DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF)
+	__u32_field_def(21,     0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
+	__u32_field_def(25,     0 /* OPTIONAL */,       rs_discard_granularity, DRBD_RS_DISCARD_GRANULARITY_DEF)
 
 	__flg_field_def(16, DRBD_GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
 	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
 	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
 	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
-	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
-	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
-	/* 9: __u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
 	__flg_field_def(23,     0 /* OPTIONAL */,	al_updates, DRBD_AL_UPDATES_DEF)
+	__flg_field_def(24,     0 /* OPTIONAL */,	discard_zeroes_if_aligned, DRBD_DISCARD_ZEROES_IF_ALIGNED)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 8ac8c5d9a3ad..ddac68422a96 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -126,8 +126,7 @@
 #define DRBD_RESYNC_RATE_DEF 250
 #define DRBD_RESYNC_RATE_SCALE 'k'  /* kilobytes */
 
-  /* less than 7 would hit performance unnecessarily. */
-#define DRBD_AL_EXTENTS_MIN  7
+#define DRBD_AL_EXTENTS_MIN  67
   /* we use u16 as "slot number", (u16)~0 is "FREE".
    * If you use >= 292 kB on-disk ring buffer,
    * this is the maximum you can use: */
@@ -210,6 +209,12 @@
 #define DRBD_MD_FLUSHES_DEF	1
 #define DRBD_TCP_CORK_DEF	1
 #define DRBD_AL_UPDATES_DEF     1
+/* We used to ignore the discard_zeroes_data setting.
+ * To not change established (and expected) behaviour,
+ * by default assume that, for discard_zeroes_data=0,
+ * we can make that an effective discard_zeroes_data=1,
+ * if we only explicitly zero-out unaligned partial chunks. */
+#define DRBD_DISCARD_ZEROES_IF_ALIGNED 1
 
 #define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
 #define DRBD_ALWAYS_ASBP_DEF	0
@@ -230,4 +235,10 @@
 #define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX
 #define DRBD_SOCKET_CHECK_TIMEO_DEF 0
 #define DRBD_SOCKET_CHECK_TIMEO_SCALE '1'
+
+#define DRBD_RS_DISCARD_GRANULARITY_MIN 0
+#define DRBD_RS_DISCARD_GRANULARITY_MAX (1<<20)  /* 1MiByte */
+#define DRBD_RS_DISCARD_GRANULARITY_DEF 0     /* disabled by default */
+#define DRBD_RS_DISCARD_GRANULARITY_SCALE '1' /* bytes */
+
 #endif
diff --git a/include/linux/efi.h b/include/linux/efi.h
index f196dd0b0f2f..7f80a75ee9e3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -536,116 +536,58 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes,
 void efi_native_runtime_setup(void);
 
 /*
- *  EFI Configuration Table and GUID definitions
+ * EFI Configuration Table and GUID definitions
+ *
+ * These are all defined in a single line to make them easier to
+ * grep for and to see them at a glance - while still having a
+ * similar structure to the definitions in the spec.
+ *
+ * Here's how they are structured:
+ *
+ * GUID: 12345678-1234-1234-1234-123456789012
+ * Spec:
+ *      #define EFI_SOME_PROTOCOL_GUID \
+ *        {0x12345678,0x1234,0x1234,\
+ *          {0x12,0x34,0x12,0x34,0x56,0x78,0x90,0x12}}
+ * Here:
+ *	#define SOME_PROTOCOL_GUID		EFI_GUID(0x12345678, 0x1234, 0x1234,  0x12, 0x34, 0x12, 0x34, 0x56, 0x78, 0x90, 0x12)
+ *					^ tabs					    ^extra space
+ *
+ * Note that the 'extra space' separates the values at the same place
+ * where the UEFI SPEC breaks the line.
  */
-#define NULL_GUID \
-	EFI_GUID(0x00000000, 0x0000, 0x0000, \
-		 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
-
-#define MPS_TABLE_GUID    \
-	EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, \
-		 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
-
-#define ACPI_TABLE_GUID    \
-	EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, \
-		 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
-
-#define ACPI_20_TABLE_GUID    \
-	EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, \
-		 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
-
-#define SMBIOS_TABLE_GUID    \
-	EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, \
-		 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
-
-#define SMBIOS3_TABLE_GUID    \
-	EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, \
-		 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94)
-
-#define SAL_SYSTEM_TABLE_GUID    \
-	EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, \
-		 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
-
-#define HCDP_TABLE_GUID	\
-	EFI_GUID(0xf951938d, 0x620b, 0x42ef, \
-		 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98)
-
-#define UGA_IO_PROTOCOL_GUID \
-	EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, \
-		 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2)
-
-#define EFI_GLOBAL_VARIABLE_GUID \
-	EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, \
-		 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c)
-
-#define UV_SYSTEM_TABLE_GUID \
-	EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, \
-		 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93)
-
-#define LINUX_EFI_CRASH_GUID \
-	EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, \
-		 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0)
-
-#define LOADED_IMAGE_PROTOCOL_GUID \
-	EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2, \
-		 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
-
-#define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID \
-	EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, \
-		 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a)
-
-#define EFI_UGA_PROTOCOL_GUID \
-	EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, \
-		 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39)
-
-#define EFI_PCI_IO_PROTOCOL_GUID \
-	EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, \
-		 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a)
-
-#define EFI_FILE_INFO_ID \
-	EFI_GUID(0x9576e92, 0x6d3f, 0x11d2, \
-		 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
-
-#define EFI_SYSTEM_RESOURCE_TABLE_GUID \
-	EFI_GUID(0xb122a263, 0x3661, 0x4f68, \
-		 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80)
-
-#define EFI_FILE_SYSTEM_GUID \
-	EFI_GUID(0x964e5b22, 0x6459, 0x11d2, \
-		 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
-
-#define DEVICE_TREE_GUID \
-	EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5, \
-		 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0)
-
-#define EFI_PROPERTIES_TABLE_GUID \
-	EFI_GUID(0x880aaca3, 0x4adc, 0x4a04, \
-		 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5)
-
-#define EFI_RNG_PROTOCOL_GUID \
-	EFI_GUID(0x3152bca5, 0xeade, 0x433d, \
-		 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
-
-#define EFI_MEMORY_ATTRIBUTES_TABLE_GUID \
-	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, \
-		 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
-
-#define EFI_CONSOLE_OUT_DEVICE_GUID \
-	EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, \
-		 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define NULL_GUID				EFI_GUID(0x00000000, 0x0000, 0x0000,  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
+#define MPS_TABLE_GUID				EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3,  0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define ACPI_TABLE_GUID				EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3,  0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define ACPI_20_TABLE_GUID			EFI_GUID(0x8868e871, 0xe4f1, 0x11d3,  0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SMBIOS_TABLE_GUID			EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3,  0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define SMBIOS3_TABLE_GUID			EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c,  0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94)
+#define SAL_SYSTEM_TABLE_GUID			EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3,  0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define HCDP_TABLE_GUID				EFI_GUID(0xf951938d, 0x620b, 0x42ef,  0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98)
+#define UGA_IO_PROTOCOL_GUID			EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b,  0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2)
+#define EFI_GLOBAL_VARIABLE_GUID		EFI_GUID(0x8be4df61, 0x93ca, 0x11d2,  0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c)
+#define UV_SYSTEM_TABLE_GUID			EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd,  0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93)
+#define LINUX_EFI_CRASH_GUID			EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc,  0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0)
+#define LOADED_IMAGE_PROTOCOL_GUID		EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2,  0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
+#define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID	EFI_GUID(0x9042a9de, 0x23dc, 0x4a38,  0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a)
+#define EFI_UGA_PROTOCOL_GUID			EFI_GUID(0x982c298b, 0xf4fa, 0x41cb,  0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39)
+#define EFI_PCI_IO_PROTOCOL_GUID		EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5,  0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a)
+#define EFI_FILE_INFO_ID			EFI_GUID(0x09576e92, 0x6d3f, 0x11d2,  0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
+#define EFI_SYSTEM_RESOURCE_TABLE_GUID		EFI_GUID(0xb122a263, 0x3661, 0x4f68,  0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80)
+#define EFI_FILE_SYSTEM_GUID			EFI_GUID(0x964e5b22, 0x6459, 0x11d2,  0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
+#define DEVICE_TREE_GUID			EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5,  0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0)
+#define EFI_PROPERTIES_TABLE_GUID		EFI_GUID(0x880aaca3, 0x4adc, 0x4a04,  0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5)
+#define EFI_RNG_PROTOCOL_GUID			EFI_GUID(0x3152bca5, 0xeade, 0x433d,  0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
+#define EFI_MEMORY_ATTRIBUTES_TABLE_GUID	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f,  0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
+#define EFI_CONSOLE_OUT_DEVICE_GUID		EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4,  0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
 
 /*
  * This GUID is used to pass to the kernel proper the struct screen_info
  * structure that was populated by the stub based on the GOP protocol instance
  * associated with ConOut
  */
-#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID \
-	EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, \
-		 0xb9, 0xe, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
-
-#define LINUX_EFI_LOADER_ENTRY_GUID \
-	EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, \
-		 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
+#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID	EFI_GUID(0xe03fc20a, 0x85dc, 0x406e,  0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
+#define LINUX_EFI_LOADER_ENTRY_GUID		EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf,  0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
 
 typedef struct {
 	efi_guid_t guid;
@@ -975,7 +917,6 @@ extern u64 efi_mem_desc_end(efi_memory_desc_t *md);
 extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
-extern void efi_get_time(struct timespec *now);
 extern void efi_reserve_boot_services(void);
 extern int efi_get_fdt_params(struct efi_fdt_params *params);
 extern struct kobject *efi_kobj;
@@ -1465,4 +1406,55 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
 			   unsigned long size);
 
 bool efi_runtime_disabled(void);
+extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
+
+/*
+ * Arch code can implement the following three template macros, avoiding
+ * reptition for the void/non-void return cases of {__,}efi_call_virt():
+ *
+ *  * arch_efi_call_virt_setup()
+ *
+ *    Sets up the environment for the call (e.g. switching page tables,
+ *    allowing kernel-mode use of floating point, if required).
+ *
+ *  * arch_efi_call_virt()
+ *
+ *    Performs the call. The last expression in the macro must be the call
+ *    itself, allowing the logic to be shared by the void and non-void
+ *    cases.
+ *
+ *  * arch_efi_call_virt_teardown()
+ *
+ *    Restores the usual kernel environment once the call has returned.
+ */
+
+#define efi_call_virt_pointer(p, f, args...)				\
+({									\
+	efi_status_t __s;						\
+	unsigned long __flags;						\
+									\
+	arch_efi_call_virt_setup();					\
+									\
+	local_save_flags(__flags);					\
+	__s = arch_efi_call_virt(p, f, args);				\
+	efi_call_virt_check_flags(__flags, __stringify(f));		\
+									\
+	arch_efi_call_virt_teardown();					\
+									\
+	__s;								\
+})
+
+#define __efi_call_virt_pointer(p, f, args...)				\
+({									\
+	unsigned long __flags;						\
+									\
+	arch_efi_call_virt_setup();					\
+									\
+	local_save_flags(__flags);					\
+	arch_efi_call_virt(p, f, args);					\
+	efi_call_virt_check_flags(__flags, __stringify(f));		\
+									\
+	arch_efi_call_virt_teardown();					\
+})
+
 #endif /* _LINUX_EFI_H */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 638b324f0291..e7f358d2e5fc 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -16,7 +16,11 @@ typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *,
 
 typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int);
 
-typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *);
+typedef int (elevator_allow_bio_merge_fn) (struct request_queue *,
+					   struct request *, struct bio *);
+
+typedef int (elevator_allow_rq_merge_fn) (struct request_queue *,
+					  struct request *, struct request *);
 
 typedef void (elevator_bio_merged_fn) (struct request_queue *,
 						struct request *, struct bio *);
@@ -26,7 +30,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int);
 typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
 typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_may_queue_fn) (struct request_queue *, int);
+typedef int (elevator_may_queue_fn) (struct request_queue *, int, int);
 
 typedef void (elevator_init_icq_fn) (struct io_cq *);
 typedef void (elevator_exit_icq_fn) (struct io_cq *);
@@ -46,7 +50,8 @@ struct elevator_ops
 	elevator_merge_fn *elevator_merge_fn;
 	elevator_merged_fn *elevator_merged_fn;
 	elevator_merge_req_fn *elevator_merge_req_fn;
-	elevator_allow_merge_fn *elevator_allow_merge_fn;
+	elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn;
+	elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn;
 	elevator_bio_merged_fn *elevator_bio_merged_fn;
 
 	elevator_dispatch_fn *elevator_dispatch_fn;
@@ -134,7 +139,7 @@ extern struct request *elv_former_request(struct request_queue *, struct request
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
 extern void elv_unregister_queue(struct request_queue *q);
-extern int elv_may_queue(struct request_queue *, int);
+extern int elv_may_queue(struct request_queue *, int, int);
 extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *q, struct request *rq,
 			   struct bio *bio, gfp_t gfp_mask);
@@ -157,7 +162,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 extern int elevator_init(struct request_queue *, char *);
 extern void elevator_exit(struct elevator_queue *);
 extern int elevator_change(struct request_queue *, const char *);
-extern bool elv_rq_merge_ok(struct request *, struct bio *);
+extern bool elv_bio_merge_ok(struct request *, struct bio *);
 extern struct elevator_queue *elevator_alloc(struct request_queue *,
 					struct elevator_type *);
 
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 37ff4a6faa9a..6fec9e81bd70 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -373,6 +373,29 @@ static inline bool ether_addr_equal_unaligned(const u8 *addr1, const u8 *addr2)
 #endif
 }
 
+/**
+ * ether_addr_equal_masked - Compare two Ethernet addresses with a mask
+ * @addr1: Pointer to a six-byte array containing the 1st Ethernet address
+ * @addr2: Pointer to a six-byte array containing the 2nd Ethernet address
+ * @mask: Pointer to a six-byte array containing the Ethernet address bitmask
+ *
+ * Compare two Ethernet addresses with a mask, returns true if for every bit
+ * set in the bitmask the equivalent bits in the ethernet addresses are equal.
+ * Using a mask with all bits set is a slower ether_addr_equal.
+ */
+static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2,
+					   const u8 *mask)
+{
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		if ((addr1[i] ^ addr2[i]) & mask[i])
+			return false;
+	}
+
+	return true;
+}
+
 /**
  * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
  * @dev: Pointer to a device structure
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index d8414502edb4..b03c0625fa6e 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -6,6 +6,7 @@
 struct dentry;
 struct iattr;
 struct inode;
+struct iomap;
 struct super_block;
 struct vfsmount;
 
@@ -187,21 +188,6 @@ struct fid {
  *    get_name is not (which is possibly inconsistent)
  */
 
-/* types of block ranges for multipage write mappings. */
-#define IOMAP_HOLE	0x01	/* no blocks allocated, need allocation */
-#define IOMAP_DELALLOC	0x02	/* delayed allocation blocks */
-#define IOMAP_MAPPED	0x03	/* blocks allocated @blkno */
-#define IOMAP_UNWRITTEN	0x04	/* blocks allocated @blkno in unwritten state */
-
-#define IOMAP_NULL_BLOCK -1LL	/* blkno is not valid */
-
-struct iomap {
-	sector_t	blkno;	/* first sector of mapping */
-	loff_t		offset;	/* file offset of mapping, bytes */
-	u64		length;	/* length of mapping, bytes */
-	int		type;	/* type of mapping */
-};
-
 struct export_operations {
 	int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len,
 			struct inode *parent);
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 7abf674c388c..61004413dc64 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -126,42 +126,6 @@ struct extcon_dev {
 	struct device_attribute *d_attrs_muex;
 };
 
-/**
- * struct extcon_cable - An internal data for each cable of extcon device.
- * @edev:		The extcon device
- * @cable_index:	Index of this cable in the edev
- * @attr_g:		Attribute group for the cable
- * @attr_name:		"name" sysfs entry
- * @attr_state:		"state" sysfs entry
- * @attrs:		Array pointing to attr_name and attr_state for attr_g
- */
-struct extcon_cable {
-	struct extcon_dev *edev;
-	int cable_index;
-
-	struct attribute_group attr_g;
-	struct device_attribute attr_name;
-	struct device_attribute attr_state;
-
-	struct attribute *attrs[3]; /* to be fed to attr_g.attrs */
-};
-
-/**
- * struct extcon_specific_cable_nb - An internal data for
- *				     extcon_register_interest().
- * @user_nb:		user provided notifier block for events from
- *			a specific cable.
- * @cable_index:	the target cable.
- * @edev:		the target extcon device.
- * @previous_value:	the saved previous event value.
- */
-struct extcon_specific_cable_nb {
-	struct notifier_block *user_nb;
-	int cable_index;
-	struct extcon_dev *edev;
-	unsigned long previous_value;
-};
-
 #if IS_ENABLED(CONFIG_EXTCON)
 
 /*
@@ -201,29 +165,12 @@ extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state);
 
 /*
  * get/set_cable_state access each bit of the 32b encoded state value.
- * They are used to access the status of each cable based on the cable_name.
+ * They are used to access the status of each cable based on the cable id.
  */
 extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id);
 extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id,
 				   bool cable_state);
 
-extern int extcon_get_cable_state(struct extcon_dev *edev,
-				  const char *cable_name);
-extern int extcon_set_cable_state(struct extcon_dev *edev,
-				  const char *cable_name, bool cable_state);
-
-/*
- * Following APIs are for notifiees (those who want to be notified)
- * to register a callback for events from a specific cable of the extcon.
- * Notifiees are the connected device drivers wanting to get notified by
- * a specific external port of a connection device.
- */
-extern int extcon_register_interest(struct extcon_specific_cable_nb *obj,
-				    const char *extcon_name,
-				    const char *cable_name,
-				    struct notifier_block *nb);
-extern int extcon_unregister_interest(struct extcon_specific_cable_nb *nb);
-
 /*
  * Following APIs are to monitor every action of a notifier.
  * Registrar gets notified for every external port of a connection device.
@@ -235,6 +182,12 @@ extern int extcon_register_notifier(struct extcon_dev *edev, unsigned int id,
 				    struct notifier_block *nb);
 extern int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id,
 				    struct notifier_block *nb);
+extern int devm_extcon_register_notifier(struct device *dev,
+				struct extcon_dev *edev, unsigned int id,
+				struct notifier_block *nb);
+extern void devm_extcon_unregister_notifier(struct device *dev,
+				struct extcon_dev *edev, unsigned int id,
+				struct notifier_block *nb);
 
 /*
  * Following API get the extcon device from devicetree.
@@ -246,6 +199,7 @@ extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev,
 /* Following API to get information of extcon device */
 extern const char *extcon_get_edev_name(struct extcon_dev *edev);
 
+
 #else /* CONFIG_EXTCON */
 static inline int extcon_dev_register(struct extcon_dev *edev)
 {
@@ -306,18 +260,6 @@ static inline int extcon_set_cable_state_(struct extcon_dev *edev,
 	return 0;
 }
 
-static inline int extcon_get_cable_state(struct extcon_dev *edev,
-			const char *cable_name)
-{
-	return 0;
-}
-
-static inline int extcon_set_cable_state(struct extcon_dev *edev,
-			const char *cable_name, int state)
-{
-	return 0;
-}
-
 static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name)
 {
 	return NULL;
@@ -337,19 +279,16 @@ static inline int extcon_unregister_notifier(struct extcon_dev *edev,
 	return 0;
 }
 
-static inline int extcon_register_interest(struct extcon_specific_cable_nb *obj,
-					   const char *extcon_name,
-					   const char *cable_name,
-					   struct notifier_block *nb)
+static inline int devm_extcon_register_notifier(struct device *dev,
+				struct extcon_dev *edev, unsigned int id,
+				struct notifier_block *nb)
 {
-	return 0;
+	return -ENOSYS;
 }
 
-static inline int extcon_unregister_interest(struct extcon_specific_cable_nb
-						    *obj)
-{
-	return 0;
-}
+static inline  void devm_extcon_unregister_notifier(struct device *dev,
+				struct extcon_dev *edev, unsigned int id,
+				struct notifier_block *nb) { }
 
 static inline struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev,
 							    int index)
@@ -357,4 +296,28 @@ static inline struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev,
 	return ERR_PTR(-ENODEV);
 }
 #endif /* CONFIG_EXTCON */
+
+/*
+ * Following structure and API are deprecated. EXTCON remains the function
+ * definition to prevent the build break.
+ */
+struct extcon_specific_cable_nb {
+       struct notifier_block *user_nb;
+       int cable_index;
+       struct extcon_dev *edev;
+       unsigned long previous_value;
+};
+
+static inline int extcon_register_interest(struct extcon_specific_cable_nb *obj,
+			const char *extcon_name, const char *cable_name,
+			struct notifier_block *nb)
+{
+	return -EINVAL;
+}
+
+static inline int extcon_unregister_interest(struct extcon_specific_cable_nb
+						    *obj)
+{
+	return -EINVAL;
+}
 #endif /* __LINUX_EXTCON_H__ */
diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h
index 53c60806bcfb..ac85f2061351 100644
--- a/include/linux/extcon/extcon-adc-jack.h
+++ b/include/linux/extcon/extcon-adc-jack.h
@@ -53,6 +53,7 @@ struct adc_jack_cond {
  *			milli-seconds after the interrupt occurs. You may
  *			describe such delays with @handling_delay_ms, which
  *			is rounded-off by jiffies.
+ * @wakeup_source:	flag to wake up the system for extcon events.
  */
 struct adc_jack_pdata {
 	const char *name;
@@ -65,6 +66,7 @@ struct adc_jack_pdata {
 
 	unsigned long irq_flags;
 	unsigned long handling_delay_ms; /* in ms */
+	bool wakeup_source;
 };
 
 #endif /* _EXTCON_ADC_JACK_H */
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 2056e9fd0138..1de1b3f6fb76 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -81,8 +81,6 @@ struct fence {
 	unsigned long flags;
 	ktime_t timestamp;
 	int status;
-	struct list_head child_list;
-	struct list_head active_list;
 };
 
 enum fence_flag_bits {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6fc31ef1da2d..a16439b99fd9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -368,6 +368,11 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
+struct xdp_buff {
+	void *data;
+	void *data_end;
+};
+
 /* compute the linear packet data range [data, data_end) which
  * will be accessed by cls_bpf and act_bpf programs
  */
@@ -429,6 +434,18 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 	return BPF_PROG_RUN(prog, skb);
 }
 
+static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
+				   struct xdp_buff *xdp)
+{
+	u32 ret;
+
+	rcu_read_lock();
+	ret = BPF_PROG_RUN(prog, (void *)xdp);
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
 	return max(sizeof(struct bpf_prog),
@@ -467,7 +484,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 }
 #endif /* CONFIG_DEBUG_SET_MODULE_RONX */
 
-int sk_filter(struct sock *sk, struct sk_buff *skb);
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
+static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
+{
+	return sk_filter_trim_cap(sk, skb, 1);
+}
 
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
 void bpf_prog_free(struct bpf_prog *fp);
@@ -509,6 +530,7 @@ bool bpf_helper_changes_skb_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
+void bpf_warn_invalid_xdp_action(u32 act);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index e65ef959546c..c46d2aa16d81 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -4,6 +4,7 @@
 #include <linux/swap.h>
 #include <linux/mm.h>
 #include <linux/bitops.h>
+#include <linux/jump_label.h>
 
 struct frontswap_ops {
 	void (*init)(unsigned); /* this swap type was just swapon'ed */
@@ -14,7 +15,6 @@ struct frontswap_ops {
 	struct frontswap_ops *next; /* private pointer to next ops */
 };
 
-extern bool frontswap_enabled;
 extern void frontswap_register_ops(struct frontswap_ops *ops);
 extern void frontswap_shrink(unsigned long);
 extern unsigned long frontswap_curr_pages(void);
@@ -30,7 +30,12 @@ extern void __frontswap_invalidate_page(unsigned, pgoff_t);
 extern void __frontswap_invalidate_area(unsigned);
 
 #ifdef CONFIG_FRONTSWAP
-#define frontswap_enabled (1)
+extern struct static_key_false frontswap_enabled_key;
+
+static inline bool frontswap_enabled(void)
+{
+	return static_branch_unlikely(&frontswap_enabled_key);
+}
 
 static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
 {
@@ -50,7 +55,10 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
 #else
 /* all inline routines become no-ops and all externs are ignored */
 
-#define frontswap_enabled (0)
+static inline bool frontswap_enabled(void)
+{
+	return false;
+}
 
 static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
 {
@@ -70,37 +78,35 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
 
 static inline int frontswap_store(struct page *page)
 {
-	int ret = -1;
+	if (frontswap_enabled())
+		return __frontswap_store(page);
 
-	if (frontswap_enabled)
-		ret = __frontswap_store(page);
-	return ret;
+	return -1;
 }
 
 static inline int frontswap_load(struct page *page)
 {
-	int ret = -1;
+	if (frontswap_enabled())
+		return __frontswap_load(page);
 
-	if (frontswap_enabled)
-		ret = __frontswap_load(page);
-	return ret;
+	return -1;
 }
 
 static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset)
 {
-	if (frontswap_enabled)
+	if (frontswap_enabled())
 		__frontswap_invalidate_page(type, offset);
 }
 
 static inline void frontswap_invalidate_area(unsigned type)
 {
-	if (frontswap_enabled)
+	if (frontswap_enabled())
 		__frontswap_invalidate_area(type);
 }
 
 static inline void frontswap_init(unsigned type, unsigned long *map)
 {
-	if (frontswap_enabled)
+	if (frontswap_enabled())
 		__frontswap_init(type, map);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd288148a6b1..f2a69f20926f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -152,9 +152,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define CHECK_IOVEC_ONLY -1
 
 /*
- * The below are the various read and write types that we support. Some of
+ * The below are the various read and write flags that we support. Some of
  * them include behavioral modifiers that send information down to the
- * block layer and IO scheduler. Terminology:
+ * block layer and IO scheduler. They should be used along with a req_op.
+ * Terminology:
  *
  *	The block layer uses device plugging to defer IO a little bit, in
  *	the hope that we will see more IO very shortly. This increases
@@ -177,9 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
  * READ_SYNC		A synchronous read. Device is not plugged, caller can
  *			immediately wait on this read without caring about
  *			unplugging.
- * READA		Used for read-ahead operations. Lower priority, and the
- *			block layer could (in theory) choose to ignore this
- *			request if it runs into resource problems.
  * WRITE		A normal async write. Device will be plugged.
  * WRITE_SYNC		Synchronous write. Identical to WRITE, but passes down
  *			the hint that someone will be waiting on this IO
@@ -193,19 +191,17 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
  *			non-volatile media on completion.
  *
  */
-#define RW_MASK			REQ_WRITE
-#define RWA_MASK		REQ_RAHEAD
+#define RW_MASK			REQ_OP_WRITE
 
-#define READ			0
-#define WRITE			RW_MASK
-#define READA			RWA_MASK
+#define READ			REQ_OP_READ
+#define WRITE			REQ_OP_WRITE
 
-#define READ_SYNC		(READ | REQ_SYNC)
-#define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
-#define WRITE_ODIRECT		(WRITE | REQ_SYNC)
-#define WRITE_FLUSH		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
-#define WRITE_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
+#define READ_SYNC		REQ_SYNC
+#define WRITE_SYNC		(REQ_SYNC | REQ_NOIDLE)
+#define WRITE_ODIRECT		REQ_SYNC
+#define WRITE_FLUSH		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
+#define WRITE_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_FUA)
+#define WRITE_FLUSH_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
@@ -402,6 +398,8 @@ struct address_space_operations {
 	 */
 	int (*migratepage) (struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
+	bool (*isolate_page)(struct page *, isolate_mode_t);
+	void (*putback_page)(struct page *);
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, unsigned long,
 					unsigned long);
@@ -665,6 +663,7 @@ struct inode {
 #endif
 	struct list_head	i_lru;		/* inode LRU list */
 	struct list_head	i_sb_list;
+	struct list_head	i_wb_list;	/* backing dev writeback list */
 	union {
 		struct hlist_head	i_dentry;
 		struct rcu_head		i_rcu;
@@ -1448,6 +1447,9 @@ struct super_block {
 	/* s_inode_list_lock protects s_inodes */
 	spinlock_t		s_inode_list_lock ____cacheline_aligned_in_smp;
 	struct list_head	s_inodes;	/* all inodes */
+
+	spinlock_t		s_inode_wblist_lock;
+	struct list_head	s_inodes_wb;	/* writeback inodes */
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
@@ -2464,15 +2466,18 @@ extern void make_bad_inode(struct inode *);
 extern bool is_bad_inode(struct inode *);
 
 #ifdef CONFIG_BLOCK
-/*
- * return READ, READA, or WRITE
- */
-#define bio_rw(bio)		((bio)->bi_rw & (RW_MASK | RWA_MASK))
+static inline bool op_is_write(unsigned int op)
+{
+	return op == REQ_OP_READ ? false : true;
+}
 
 /*
  * return data direction, READ or WRITE
  */
-#define bio_data_dir(bio)	((bio)->bi_rw & 1)
+static inline int bio_data_dir(struct bio *bio)
+{
+	return op_is_write(bio_op(bio)) ? WRITE : READ;
+}
 
 extern void check_disk_size_change(struct gendisk *disk,
 				   struct block_device *bdev);
@@ -2747,7 +2752,7 @@ static inline void remove_inode_hash(struct inode *inode)
 extern void inode_sb_list_add(struct inode *inode);
 
 #ifdef CONFIG_BLOCK
-extern blk_qc_t submit_bio(int, struct bio *);
+extern blk_qc_t submit_bio(struct bio *);
 extern int bdev_read_only(struct block_device *);
 #endif
 extern int set_blocksize(struct block_device *, int);
@@ -2802,7 +2807,7 @@ extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
 #ifdef CONFIG_BLOCK
-typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
+typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
 			    loff_t file_offset);
 
 enum {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 359a8e4bd44d..1dbf52f9c24b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -205,7 +205,6 @@ struct gendisk {
 	void *private_data;
 
 	int flags;
-	struct device *driverfs_dev;  // FIXME: remove
 	struct kobject *slave_dir;
 
 	struct timer_rand_state *random;
@@ -414,7 +413,12 @@ static inline void free_part_info(struct hd_struct *part)
 extern void part_round_stats(int cpu, struct hd_struct *part);
 
 /* block/genhd.c */
-extern void add_disk(struct gendisk *disk);
+extern void device_add_disk(struct device *parent, struct gendisk *disk);
+static inline void add_disk(struct gendisk *disk)
+{
+	device_add_disk(NULL, disk);
+}
+
 extern void del_gendisk(struct gendisk *gp);
 extern struct gendisk *get_gendisk(dev_t dev, int *partno);
 extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 570383a41853..c29e9d347bc6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -78,8 +78,7 @@ struct vm_area_struct;
  * __GFP_THISNODE forces the allocation to be satisified from the requested
  *   node with no fallbacks or placement policy enforcements.
  *
- * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant
- *   to kmem allocations).
+ * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
  */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
@@ -486,10 +485,6 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
 	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
 
-extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
-extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
-					  unsigned int order);
-
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
@@ -513,9 +508,6 @@ extern void *__alloc_page_frag(struct page_frag_cache *nc,
 			       unsigned int fragsz, gfp_t gfp_mask);
 extern void __free_page_frag(void *addr);
 
-extern void __free_kmem_pages(struct page *page, unsigned int order);
-extern void free_kmem_pages(unsigned long addr, unsigned int order);
-
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 419fb9e03447..92ce91c03cd0 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -1,20 +1,12 @@
 #ifndef _LINUX_HUGE_MM_H
 #define _LINUX_HUGE_MM_H
 
-extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
-				      struct vm_area_struct *vma,
-				      unsigned long address, pmd_t *pmd,
-				      unsigned int flags);
+extern int do_huge_pmd_anonymous_page(struct fault_env *fe);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 			 struct vm_area_struct *vma);
-extern void huge_pmd_set_accessed(struct mm_struct *mm,
-				  struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmd,
-				  pmd_t orig_pmd, int dirty);
-extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			       unsigned long address, pmd_t *pmd,
-			       pmd_t orig_pmd);
+extern void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd);
+extern int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd);
 extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 					  unsigned long addr,
 					  pmd_t *pmd,
@@ -49,6 +41,18 @@ enum transparent_hugepage_flag {
 #endif
 };
 
+struct kobject;
+struct kobj_attribute;
+
+extern ssize_t single_hugepage_flag_store(struct kobject *kobj,
+				 struct kobj_attribute *attr,
+				 const char *buf, size_t count,
+				 enum transparent_hugepage_flag flag);
+extern ssize_t single_hugepage_flag_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf,
+				enum transparent_hugepage_flag flag);
+extern struct kobj_attribute shmem_enabled_attr;
+
 #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
 #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
 
@@ -94,7 +98,7 @@ static inline int split_huge_page(struct page *page)
 void deferred_split_huge_page(struct page *page);
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long address, bool freeze);
+		unsigned long address, bool freeze, struct page *page);
 
 #define split_huge_pmd(__vma, __pmd, __address)				\
 	do {								\
@@ -102,7 +106,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		if (pmd_trans_huge(*____pmd)				\
 					|| pmd_devmap(*____pmd))	\
 			__split_huge_pmd(__vma, __pmd, __address,	\
-						false);			\
+						false, NULL);		\
 	}  while (0)
 
 
@@ -134,8 +138,7 @@ static inline int hpage_nr_pages(struct page *page)
 	return 1;
 }
 
-extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-				unsigned long addr, pmd_t pmd, pmd_t *pmdp);
+extern int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
 
@@ -152,6 +155,8 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 struct page *get_huge_zero_page(void);
 void put_huge_zero_page(void);
 
+#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -161,6 +166,8 @@ void put_huge_zero_page(void);
 
 #define transparent_hugepage_enabled(__vma) 0
 
+static inline void prep_transhuge_page(struct page *page) {}
+
 #define transparent_hugepage_flags 0UL
 static inline int
 split_huge_page_to_list(struct page *page, struct list_head *list)
@@ -196,8 +203,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 	return NULL;
 }
 
-static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-					unsigned long addr, pmd_t pmd, pmd_t *pmdp)
+static inline int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd)
 {
 	return 0;
 }
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 630f45335c73..57086e9fc64c 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -14,9 +14,12 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_IPV6)
 extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info);
 
-typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info);
+typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+			     const struct in6_addr *force_saddr);
 extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn);
 extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
+int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
+			       unsigned int data_len);
 
 #else
 
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b118744d3382..a80516fd65c8 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -19,6 +19,7 @@
 
 #include <linux/types.h>
 #include <linux/if_ether.h>
+#include <linux/etherdevice.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
@@ -2464,7 +2465,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
  */
 static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb)
 {
-	if (skb->len < 25)
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
 		return false;
 	return _ieee80211_is_robust_mgmt_frame((void *)skb->data);
 }
@@ -2486,6 +2487,35 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
 	return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC;
 }
 
+/**
+ * _ieee80211_is_group_privacy_action - check if frame is a group addressed
+ * privacy action frame
+ * @hdr: the frame
+ */
+static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr)
+{
+	struct ieee80211_mgmt *mgmt = (void *)hdr;
+
+	if (!ieee80211_is_action(hdr->frame_control) ||
+	    !is_multicast_ether_addr(hdr->addr1))
+		return false;
+
+	return mgmt->u.action.category == WLAN_CATEGORY_MESH_ACTION ||
+	       mgmt->u.action.category == WLAN_CATEGORY_MULTIHOP_ACTION;
+}
+
+/**
+ * ieee80211_is_group_privacy_action - check if frame is a group addressed
+ * privacy action frame
+ * @skb: the skb containing the frame, length will be checked
+ */
+static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb)
+{
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
+		return false;
+	return _ieee80211_is_group_privacy_action((void *)skb->data);
+}
+
 /**
  * ieee80211_tu_to_usec - convert time units (TU) to microseconds
  * @tu: the TUs
diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index acedbb68a5a3..ddb890174a0e 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -31,6 +31,8 @@
 #define IEEE802154_MIN_PSDU_LEN		9
 #define IEEE802154_FCS_LEN		2
 #define IEEE802154_MAX_AUTH_TAG_LEN	16
+#define IEEE802154_FC_LEN		2
+#define IEEE802154_SEQ_LEN		1
 
 /*  General MAC frame format:
  *  2 bytes: Frame Control
@@ -48,6 +50,7 @@
 
 #define IEEE802154_EXTENDED_ADDR_LEN	8
 #define IEEE802154_SHORT_ADDR_LEN	2
+#define IEEE802154_PAN_ID_LEN		2
 
 #define IEEE802154_LIFS_PERIOD		40
 #define IEEE802154_SIFS_PERIOD		12
@@ -221,9 +224,17 @@ enum {
 #define IEEE802154_FCTL_ACKREQ		0x0020
 #define IEEE802154_FCTL_SECEN		0x0004
 #define IEEE802154_FCTL_INTRA_PAN	0x0040
+#define IEEE802154_FCTL_DADDR		0x0c00
+#define IEEE802154_FCTL_SADDR		0xc000
 
 #define IEEE802154_FTYPE_DATA		0x0001
 
+#define IEEE802154_FCTL_ADDR_NONE	0x0000
+#define IEEE802154_FCTL_DADDR_SHORT	0x0800
+#define IEEE802154_FCTL_DADDR_EXTENDED	0x0c00
+#define IEEE802154_FCTL_SADDR_SHORT	0x8000
+#define IEEE802154_FCTL_SADDR_EXTENDED	0xc000
+
 /*
  * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA
  * @fc: frame control bytes in little-endian byteorder
@@ -261,6 +272,24 @@ static inline bool ieee802154_is_intra_pan(__le16 fc)
 	return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN);
 }
 
+/*
+ * ieee802154_daddr_mode - get daddr mode from fc
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline __le16 ieee802154_daddr_mode(__le16 fc)
+{
+	return fc & cpu_to_le16(IEEE802154_FCTL_DADDR);
+}
+
+/*
+ * ieee802154_saddr_mode - get saddr mode from fc
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline __le16 ieee802154_saddr_mode(__le16 fc)
+{
+	return fc & cpu_to_le16(IEEE802154_FCTL_SADDR);
+}
+
 /**
  * ieee802154_is_valid_psdu_len - check if psdu len is valid
  * available lengths:
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 99403b19092f..228bd44efa4c 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -223,6 +223,7 @@ struct st_sensor_settings {
  * @get_irq_data_ready: Function to get the IRQ used for data ready signal.
  * @tf: Transfer function structure used by I/O operations.
  * @tb: Transfer buffers and mutex used by I/O operations.
+ * @edge_irq: the IRQ triggers on edges and need special handling.
  * @hw_irq_trigger: if we're using the hardware interrupt on the sensor.
  * @hw_timestamp: Latest timestamp from the interrupt handler, when in use.
  */
@@ -250,14 +251,13 @@ struct st_sensor_data {
 	const struct st_sensor_transfer_function *tf;
 	struct st_sensor_transfer_buffer tb;
 
+	bool edge_irq;
 	bool hw_irq_trigger;
 	s64 hw_timestamp;
 };
 
 #ifdef CONFIG_IIO_BUFFER
 irqreturn_t st_sensors_trigger_handler(int irq, void *p);
-
-int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf);
 #endif
 
 #ifdef CONFIG_IIO_TRIGGER
@@ -287,7 +287,7 @@ int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable);
 
 int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable);
 
-void st_sensors_power_enable(struct iio_dev *indio_dev);
+int st_sensors_power_enable(struct iio_dev *indio_dev);
 
 void st_sensors_power_disable(struct iio_dev *indio_dev);
 
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 7c29cb0124ae..854e2dad1e0d 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -312,13 +312,8 @@ static inline bool iio_channel_has_info(const struct iio_chan_spec *chan,
 		},							\
 }
 
-/**
- * iio_get_time_ns() - utility function to get a time stamp for events etc
- **/
-static inline s64 iio_get_time_ns(void)
-{
-	return ktime_get_real_ns();
-}
+s64 iio_get_time_ns(const struct iio_dev *indio_dev);
+unsigned int iio_get_time_res(const struct iio_dev *indio_dev);
 
 /* Device operating modes */
 #define INDIO_DIRECT_MODE		0x01
@@ -497,6 +492,7 @@ struct iio_buffer_setup_ops {
  * @chan_attr_group:	[INTERN] group for all attrs in base directory
  * @name:		[DRIVER] name of the device.
  * @info:		[DRIVER] callbacks and constant info from driver
+ * @clock_id:		[INTERN] timestamping clock posix identifier
  * @info_exist_lock:	[INTERN] lock to prevent use during removal
  * @setup_ops:		[DRIVER] callbacks to call before and after buffer
  *			enable/disable
@@ -537,6 +533,7 @@ struct iio_dev {
 	struct attribute_group		chan_attr_group;
 	const char			*name;
 	const struct iio_info		*info;
+	clockid_t			clock_id;
 	struct mutex			info_exist_lock;
 	const struct iio_buffer_setup_ops	*setup_ops;
 	struct cdev			chrdev;
@@ -565,7 +562,7 @@ extern struct bus_type iio_bus_type;
 
 /**
  * iio_device_put() - reference counted deallocation of struct device
- * @indio_dev: 		IIO device structure containing the device
+ * @indio_dev: IIO device structure containing the device
  **/
 static inline void iio_device_put(struct iio_dev *indio_dev)
 {
@@ -573,6 +570,15 @@ static inline void iio_device_put(struct iio_dev *indio_dev)
 		put_device(&indio_dev->dev);
 }
 
+/**
+ * iio_device_get_clock() - Retrieve current timestamping clock for the device
+ * @indio_dev: IIO device structure containing the device
+ */
+static inline clockid_t iio_device_get_clock(const struct iio_dev *indio_dev)
+{
+	return indio_dev->clock_id;
+}
+
 /**
  * dev_to_iio_dev() - Get IIO device struct from a device struct
  * @dev: 		The device embedded in the IIO device
diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h
new file mode 100644
index 000000000000..23ca41515527
--- /dev/null
+++ b/include/linux/iio/sw_device.h
@@ -0,0 +1,70 @@
+/*
+ * Industrial I/O software device interface
+ *
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef __IIO_SW_DEVICE
+#define __IIO_SW_DEVICE
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/iio/iio.h>
+#include <linux/configfs.h>
+
+#define module_iio_sw_device_driver(__iio_sw_device_type) \
+	module_driver(__iio_sw_device_type, iio_register_sw_device_type, \
+		      iio_unregister_sw_device_type)
+
+struct iio_sw_device_ops;
+
+struct iio_sw_device_type {
+	const char *name;
+	struct module *owner;
+	const struct iio_sw_device_ops *ops;
+	struct list_head list;
+	struct config_group *group;
+};
+
+struct iio_sw_device {
+	struct iio_dev *device;
+	struct iio_sw_device_type *device_type;
+	struct config_group group;
+};
+
+struct iio_sw_device_ops {
+	struct iio_sw_device* (*probe)(const char *);
+	int (*remove)(struct iio_sw_device *);
+};
+
+static inline
+struct iio_sw_device *to_iio_sw_device(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct iio_sw_device,
+			    group);
+}
+
+int iio_register_sw_device_type(struct iio_sw_device_type *dt);
+void iio_unregister_sw_device_type(struct iio_sw_device_type *dt);
+
+struct iio_sw_device *iio_sw_device_create(const char *, const char *);
+void iio_sw_device_destroy(struct iio_sw_device *);
+
+int iio_sw_device_type_configfs_register(struct iio_sw_device_type *dt);
+void iio_sw_device_type_configfs_unregister(struct iio_sw_device_type *dt);
+
+static inline
+void iio_swd_group_init_type_name(struct iio_sw_device *d,
+				  const char *name,
+				  struct config_item_type *type)
+{
+#ifdef CONFIG_CONFIGFS_FS
+	config_group_init_type_name(&d->group, name, type);
+#endif
+}
+
+#endif /* __IIO_SW_DEVICE */
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 7c27fa1030e8..feb04ea20f11 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -52,6 +52,12 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
+void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
+
+int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
+			     struct inet_diag_msg *r, int ext,
+			     struct user_namespace *user_ns);
+
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
 #endif /* _INET_DIAG_H_ */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f2cb8d45513d..f8834f820ec2 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -190,7 +190,7 @@ extern struct task_group root_task_group;
 #define INIT_TASK(tsk)	\
 {									\
 	.state		= 0,						\
-	.stack		= &init_thread_info,				\
+	.stack		= init_stack,					\
 	.usage		= ATOMIC_INIT(2),				\
 	.flags		= PF_KTHREAD,					\
 	.prio		= MAX_PRIO-20,					\
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9fcabeb07787..b6683f0ffc9f 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -278,6 +278,8 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
 extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
+struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs);
+
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -308,6 +310,12 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 {
 	return 0;
 }
+
+static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+{
+	*nr_vecs = 1;
+	return NULL;
+}
 #endif /* CONFIG_SMP */
 
 /*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
new file mode 100644
index 000000000000..3267df461012
--- /dev/null
+++ b/include/linux/iomap.h
@@ -0,0 +1,70 @@
+#ifndef LINUX_IOMAP_H
+#define LINUX_IOMAP_H 1
+
+#include <linux/types.h>
+
+struct fiemap_extent_info;
+struct inode;
+struct iov_iter;
+struct kiocb;
+struct vm_area_struct;
+struct vm_fault;
+
+/*
+ * Types of block ranges for iomap mappings:
+ */
+#define IOMAP_HOLE	0x01	/* no blocks allocated, need allocation */
+#define IOMAP_DELALLOC	0x02	/* delayed allocation blocks */
+#define IOMAP_MAPPED	0x03	/* blocks allocated @blkno */
+#define IOMAP_UNWRITTEN	0x04	/* blocks allocated @blkno in unwritten state */
+
+/*
+ * Magic value for blkno:
+ */
+#define IOMAP_NULL_BLOCK -1LL	/* blkno is not valid */
+
+struct iomap {
+	sector_t		blkno;	/* 1st sector of mapping, 512b units */
+	loff_t			offset;	/* file offset of mapping, bytes */
+	u64			length;	/* length of mapping, bytes */
+	int			type;	/* type of mapping */
+	struct block_device	*bdev;	/* block device for I/O */
+};
+
+/*
+ * Flags for iomap_begin / iomap_end.  No flag implies a read.
+ */
+#define IOMAP_WRITE		(1 << 0)
+#define IOMAP_ZERO		(1 << 1)
+
+struct iomap_ops {
+	/*
+	 * Return the existing mapping at pos, or reserve space starting at
+	 * pos for up to length, as long as we can do it as a single mapping.
+	 * The actual length is returned in iomap->length.
+	 */
+	int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
+			unsigned flags, struct iomap *iomap);
+
+	/*
+	 * Commit and/or unreserve space previous allocated using iomap_begin.
+	 * Written indicates the length of the successful write operation which
+	 * needs to be commited, while the rest needs to be unreserved.
+	 * Written might be zero if no data was written.
+	 */
+	int (*iomap_end)(struct inode *inode, loff_t pos, loff_t length,
+			ssize_t written, unsigned flags, struct iomap *iomap);
+};
+
+ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
+		struct iomap_ops *ops);
+int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
+		bool *did_zero, struct iomap_ops *ops);
+int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
+		struct iomap_ops *ops);
+int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+		struct iomap_ops *ops);
+int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		loff_t start, loff_t len, struct iomap_ops *ops);
+
+#endif /* LINUX_IOMAP_H */
diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 838dbfa3c331..78c5d5ae3857 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -277,7 +277,7 @@ int ipmi_validate_addr(struct ipmi_addr *addr, int len);
  */
 enum ipmi_addr_src {
 	SI_INVALID = 0, SI_HOTMOD, SI_HARDCODED, SI_SPMI, SI_ACPI, SI_SMBIOS,
-	SI_PCI,	SI_DEVICETREE, SI_DEFAULT
+	SI_PCI,	SI_DEVICETREE, SI_LAST
 };
 const char *ipmi_addr_src_to_str(enum ipmi_addr_src src);
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5c91b0b055d4..c6dbcd84a2c7 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -283,6 +283,8 @@ struct tcp6_timewait_sock {
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
+bool ipv6_mod_enabled(void);
+
 static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk)
 {
 	return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL;
@@ -326,6 +328,11 @@ static inline int inet_v6_ipv6only(const struct sock *sk)
 #define ipv6_only_sock(sk)	0
 #define ipv6_sk_rxinfo(sk)	0
 
+static inline bool ipv6_mod_enabled(void)
+{
+	return false;
+}
+
 static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 {
 	return NULL;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4d758a7c604a..b52424eaa0ed 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -197,6 +197,7 @@ struct irq_data {
  * IRQD_IRQ_INPROGRESS		- In progress state of the interrupt
  * IRQD_WAKEUP_ARMED		- Wakeup mode armed
  * IRQD_FORWARDED_TO_VCPU	- The interrupt is forwarded to a VCPU
+ * IRQD_AFFINITY_MANAGED	- Affinity is auto-managed by the kernel
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -212,6 +213,7 @@ enum {
 	IRQD_IRQ_INPROGRESS		= (1 << 18),
 	IRQD_WAKEUP_ARMED		= (1 << 19),
 	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
+	IRQD_AFFINITY_MANAGED		= (1 << 21),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -305,6 +307,11 @@ static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d)
 	__irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU;
 }
 
+static inline bool irqd_affinity_is_managed(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
@@ -315,6 +322,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
 /**
  * struct irq_chip - hardware interrupt chip descriptor
  *
+ * @parent_device:	pointer to parent device for irqchip
  * @name:		name for /proc/interrupts
  * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
  * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
@@ -354,6 +362,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @flags:		chip specific flags
  */
 struct irq_chip {
+	struct device	*parent_device;
 	const char	*name;
 	unsigned int	(*irq_startup)(struct irq_data *data);
 	void		(*irq_shutdown)(struct irq_data *data);
@@ -482,12 +491,15 @@ extern void handle_fasteoi_irq(struct irq_desc *desc);
 extern void handle_edge_irq(struct irq_desc *desc);
 extern void handle_edge_eoi_irq(struct irq_desc *desc);
 extern void handle_simple_irq(struct irq_desc *desc);
+extern void handle_untracked_irq(struct irq_desc *desc);
 extern void handle_percpu_irq(struct irq_desc *desc);
 extern void handle_percpu_devid_irq(struct irq_desc *desc);
 extern void handle_bad_irq(struct irq_desc *desc);
 extern void handle_nested_irq(unsigned int irq);
 
 extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
+extern int irq_chip_pm_get(struct irq_data *data);
+extern int irq_chip_pm_put(struct irq_data *data);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 extern void irq_chip_enable_parent(struct irq_data *data);
 extern void irq_chip_disable_parent(struct irq_data *data);
@@ -701,11 +713,11 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
 unsigned int arch_dynirq_lower_bound(unsigned int from);
 
 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
-		struct module *owner);
+		      struct module *owner, const struct cpumask *affinity);
 
 /* use macros to avoid needing export.h for THIS_MODULE */
 #define irq_alloc_descs(irq, from, cnt, node)	\
-	__irq_alloc_descs(irq, from, cnt, node, THIS_MODULE)
+	__irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL)
 
 #define irq_alloc_desc(node)			\
 	irq_alloc_descs(-1, 0, 1, node)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index dc493e0f0ff7..107eed475b94 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -204,6 +204,7 @@
 #define GITS_BASER_NR_REGS		8
 
 #define GITS_BASER_VALID		(1UL << 63)
+#define GITS_BASER_INDIRECT		(1UL << 62)
 #define GITS_BASER_nCnB			(0UL << 59)
 #define GITS_BASER_nC			(1UL << 59)
 #define GITS_BASER_RaWt			(2UL << 59)
@@ -228,6 +229,7 @@
 #define GITS_BASER_PAGE_SIZE_64K	(2UL << GITS_BASER_PAGE_SIZE_SHIFT)
 #define GITS_BASER_PAGE_SIZE_MASK	(3UL << GITS_BASER_PAGE_SIZE_SHIFT)
 #define GITS_BASER_PAGES_MAX		256
+#define GITS_BASER_PAGES_SHIFT		(0)
 
 #define GITS_BASER_TYPE_NONE		0
 #define GITS_BASER_TYPE_DEVICE		1
@@ -238,6 +240,8 @@
 #define GITS_BASER_TYPE_RESERVED6	6
 #define GITS_BASER_TYPE_RESERVED7	7
 
+#define GITS_LVL1_ENTRY_SIZE           (8UL)
+
 /*
  * ITS commands
  */
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index fd051855539b..eafc965b3eb8 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -101,9 +101,14 @@
 #include <linux/irqdomain.h>
 
 struct device_node;
+struct gic_chip_data;
 
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
 int gic_cpu_if_down(unsigned int gic_nr);
+void gic_cpu_save(struct gic_chip_data *gic);
+void gic_cpu_restore(struct gic_chip_data *gic);
+void gic_dist_save(struct gic_chip_data *gic);
+void gic_dist_restore(struct gic_chip_data *gic);
 
 /*
  * Subdrivers that need some preparatory work can initialize their
@@ -111,6 +116,12 @@ int gic_cpu_if_down(unsigned int gic_nr);
  */
 int gic_of_init(struct device_node *node, struct device_node *parent);
 
+/*
+ * Initialises and registers a non-root or child GIC chip. Memory for
+ * the gic_chip_data structure is dynamically allocated.
+ */
+int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq);
+
 /*
  * Legacy platforms not converted to DT yet must use this to init
  * their GIC
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f1f36e04d885..ffb84604c1de 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -39,6 +39,7 @@ struct irq_domain;
 struct of_device_id;
 struct irq_chip;
 struct irq_data;
+struct cpumask;
 
 /* Number of irqs reserved for a legacy isa controller */
 #define NUM_ISA_INTERRUPTS	16
@@ -217,7 +218,8 @@ extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
 						   enum irq_domain_bus_token bus_token);
 extern void irq_set_default_host(struct irq_domain *host);
 extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
-				  irq_hw_number_t hwirq, int node);
+				  irq_hw_number_t hwirq, int node,
+				  const struct cpumask *affinity);
 
 static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
 {
@@ -389,7 +391,7 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par
 
 extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 				   unsigned int nr_irqs, int node, void *arg,
-				   bool realloc);
+				   bool realloc, const struct cpumask *affinity);
 extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
 extern void irq_domain_activate_irq(struct irq_data *irq_data);
 extern void irq_domain_deactivate_irq(struct irq_data *irq_data);
@@ -397,7 +399,8 @@ extern void irq_domain_deactivate_irq(struct irq_data *irq_data);
 static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
 			unsigned int nr_irqs, int node, void *arg)
 {
-	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false);
+	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
+				       NULL);
 }
 
 extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
@@ -452,6 +455,9 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
 	return -1;
 }
 
+static inline void irq_domain_free_irqs(unsigned int virq,
+					unsigned int nr_irqs) { }
+
 static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
 {
 	return false;
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index efb232c5f668..dfaa1f4dcb0c 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -491,10 +491,6 @@ struct jbd2_journal_handle
 
 	unsigned long		h_start_jiffies;
 	unsigned int		h_requested_credits;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map	h_lockdep_map;
-#endif
 };
 
 
@@ -793,6 +789,7 @@ jbd2_time_diff(unsigned long start, unsigned long end)
  * @j_proc_entry: procfs entry for the jbd statistics directory
  * @j_stats: Overall statistics
  * @j_private: An opaque pointer to fs-private information.
+ * @j_trans_commit_map: Lockdep entity to track transaction commit dependencies
  */
 
 struct journal_s
@@ -1035,8 +1032,26 @@ struct journal_s
 
 	/* Precomputed journal UUID checksum for seeding other checksums */
 	__u32 j_csum_seed;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Lockdep entity to track transaction commit dependencies. Handles
+	 * hold this "lock" for read, when we wait for commit, we acquire the
+	 * "lock" for writing. This matches the properties of jbd2 journalling
+	 * where the running transaction has to wait for all handles to be
+	 * dropped to commit that transaction and also acquiring a handle may
+	 * require transaction commit to finish.
+	 */
+	struct lockdep_map	j_trans_commit_map;
+#endif
 };
 
+#define jbd2_might_wait_for_commit(j) \
+	do { \
+		rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \
+		rwsem_release(&j->j_trans_commit_map, 1, _THIS_IP_); \
+	} while (0)
+
 /* journal feature predicate functions */
 #define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \
 static inline bool jbd2_has_feature_##name(journal_t *j) \
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0536524bb9eb..68904469fba1 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -117,13 +117,18 @@ struct module;
 
 #include <linux/atomic.h>
 
+#ifdef HAVE_JUMP_LABEL
+
 static inline int static_key_count(struct static_key *key)
 {
-	return atomic_read(&key->enabled);
+	/*
+	 * -1 means the first static_key_slow_inc() is in progress.
+	 *  static_key_enabled() must return true, so return 1 here.
+	 */
+	int n = atomic_read(&key->enabled);
+	return n >= 0 ? n : 1;
 }
 
-#ifdef HAVE_JUMP_LABEL
-
 #define JUMP_TYPE_FALSE	0UL
 #define JUMP_TYPE_TRUE	1UL
 #define JUMP_TYPE_MASK	1UL
@@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #else  /* !HAVE_JUMP_LABEL */
 
+static inline int static_key_count(struct static_key *key)
+{
+	return atomic_read(&key->enabled);
+}
+
 static __always_inline void jump_label_init(void)
 {
 	static_key_initialized = true;
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 611927f5870d..ac4b3c46a84d 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -59,14 +59,13 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object);
 
 void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
 void kasan_kfree_large(const void *ptr);
-void kasan_kfree(void *ptr);
+void kasan_poison_kfree(void *ptr);
 void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
 		  gfp_t flags);
 void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);
 
 void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
 bool kasan_slab_free(struct kmem_cache *s, void *object);
-void kasan_poison_slab_free(struct kmem_cache *s, void *object);
 
 struct kasan_cache {
 	int alloc_meta_offset;
@@ -76,6 +75,9 @@ struct kasan_cache {
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
+size_t ksize(const void *);
+static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
+
 #else /* CONFIG_KASAN */
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
@@ -102,7 +104,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache,
 
 static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
 static inline void kasan_kfree_large(const void *ptr) {}
-static inline void kasan_kfree(void *ptr) {}
+static inline void kasan_poison_kfree(void *ptr) {}
 static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
 				size_t size, gfp_t flags) {}
 static inline void kasan_krealloc(const void *object, size_t new_size,
@@ -114,11 +116,12 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
 {
 	return false;
 }
-static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {}
 
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
 static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
+static inline void kasan_unpoison_slab(const void *ptr) { }
+
 #endif /* CONFIG_KASAN */
 
 #endif /* LINUX_KASAN_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 94aa10ffe156..c42082112ec8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -451,6 +451,7 @@ extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
 extern int panic_on_warn;
+extern int sysctl_panic_on_rcu_stall;
 extern int sysctl_panic_on_stackoverflow;
 
 extern bool crash_kexec_post_notifiers;
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 25a822f6f000..44fda64ad434 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user)
 extern void account_process_tick(struct task_struct *, int user);
 #endif
 
-extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index eeb307985715..1e032a1ddb3e 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -4,6 +4,11 @@
 #include <linux/sched.h> /* MMF_VM_HUGEPAGE */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern struct attribute_group khugepaged_attr_group;
+
+extern int khugepaged_init(void);
+extern void khugepaged_destroy(void);
+extern int start_stop_khugepaged(void);
 extern int __khugepaged_enter(struct mm_struct *mm);
 extern void __khugepaged_exit(struct mm_struct *mm);
 extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 7ae216a39c9e..481c8c4627ca 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -43,8 +43,7 @@ static inline struct stable_node *page_stable_node(struct page *page)
 static inline void set_page_stable_node(struct page *page,
 					struct stable_node *stable_node)
 {
-	page->mapping = (void *)stable_node +
-				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
+	page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
 }
 
 /*
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d15c19e331d1..e37d4f99f510 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -146,13 +146,6 @@ enum {
 	ATA_TFLAG_FUA		= (1 << 5), /* enable FUA */
 	ATA_TFLAG_POLLING	= (1 << 6), /* set nIEN to 1 and use polling */
 
-	/* protocol flags */
-	ATA_PROT_FLAG_PIO	= (1 << 0), /* is PIO */
-	ATA_PROT_FLAG_DMA	= (1 << 1), /* is DMA */
-	ATA_PROT_FLAG_DATA	= ATA_PROT_FLAG_PIO | ATA_PROT_FLAG_DMA,
-	ATA_PROT_FLAG_NCQ	= (1 << 2), /* is NCQ */
-	ATA_PROT_FLAG_ATAPI	= (1 << 3), /* is ATAPI */
-
 	/* struct ata_device stuff */
 	ATA_DFLAG_LBA		= (1 << 0), /* device supports LBA */
 	ATA_DFLAG_LBA48		= (1 << 1), /* device supports LBA48 */
@@ -1039,58 +1032,29 @@ extern const unsigned long sata_deb_timing_long[];
 extern struct ata_port_operations ata_dummy_port_ops;
 extern const struct ata_port_info ata_dummy_port_info;
 
-/*
- * protocol tests
- */
-static inline unsigned int ata_prot_flags(u8 prot)
-{
-	switch (prot) {
-	case ATA_PROT_NODATA:
-		return 0;
-	case ATA_PROT_PIO:
-		return ATA_PROT_FLAG_PIO;
-	case ATA_PROT_DMA:
-		return ATA_PROT_FLAG_DMA;
-	case ATA_PROT_NCQ:
-		return ATA_PROT_FLAG_DMA | ATA_PROT_FLAG_NCQ;
-	case ATAPI_PROT_NODATA:
-		return ATA_PROT_FLAG_ATAPI;
-	case ATAPI_PROT_PIO:
-		return ATA_PROT_FLAG_ATAPI | ATA_PROT_FLAG_PIO;
-	case ATAPI_PROT_DMA:
-		return ATA_PROT_FLAG_ATAPI | ATA_PROT_FLAG_DMA;
-	}
-	return 0;
-}
-
-static inline int ata_is_atapi(u8 prot)
-{
-	return ata_prot_flags(prot) & ATA_PROT_FLAG_ATAPI;
-}
-
-static inline int ata_is_nodata(u8 prot)
+static inline bool ata_is_atapi(u8 prot)
 {
-	return !(ata_prot_flags(prot) & ATA_PROT_FLAG_DATA);
+	return prot & ATA_PROT_FLAG_ATAPI;
 }
 
-static inline int ata_is_pio(u8 prot)
+static inline bool ata_is_pio(u8 prot)
 {
-	return ata_prot_flags(prot) & ATA_PROT_FLAG_PIO;
+	return prot & ATA_PROT_FLAG_PIO;
 }
 
-static inline int ata_is_dma(u8 prot)
+static inline bool ata_is_dma(u8 prot)
 {
-	return ata_prot_flags(prot) & ATA_PROT_FLAG_DMA;
+	return prot & ATA_PROT_FLAG_DMA;
 }
 
-static inline int ata_is_ncq(u8 prot)
+static inline bool ata_is_ncq(u8 prot)
 {
-	return ata_prot_flags(prot) & ATA_PROT_FLAG_NCQ;
+	return prot & ATA_PROT_FLAG_NCQ;
 }
 
-static inline int ata_is_data(u8 prot)
+static inline bool ata_is_data(u8 prot)
 {
-	return ata_prot_flags(prot) & ATA_PROT_FLAG_DATA;
+	return prot & (ATA_PROT_FLAG_PIO | ATA_PROT_FLAG_DMA);
 }
 
 static inline int is_multi_taskfile(struct ata_taskfile *tf)
@@ -1407,7 +1371,7 @@ static inline bool sata_pmp_attached(struct ata_port *ap)
 	return ap->nr_pmp_links != 0;
 }
 
-static inline int ata_is_host_link(const struct ata_link *link)
+static inline bool ata_is_host_link(const struct ata_link *link)
 {
 	return link == &link->ap->link || link == link->ap->slave_link;
 }
@@ -1422,7 +1386,7 @@ static inline bool sata_pmp_attached(struct ata_port *ap)
 	return false;
 }
 
-static inline int ata_is_host_link(const struct ata_link *link)
+static inline bool ata_is_host_link(const struct ata_link *link)
 {
 	return 1;
 }
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index ef2c7d2e76c4..ba78b8306674 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -1,7 +1,9 @@
 #ifndef NVM_H
 #define NVM_H
 
+#include <linux/blkdev.h>
 #include <linux/types.h>
+#include <uapi/linux/lightnvm.h>
 
 enum {
 	NVM_IO_OK = 0,
@@ -269,24 +271,15 @@ struct nvm_lun {
 	int lun_id;
 	int chnl_id;
 
-	/* It is up to the target to mark blocks as closed. If the target does
-	 * not do it, all blocks are marked as open, and nr_open_blocks
-	 * represents the number of blocks in use
-	 */
-	unsigned int nr_open_blocks;	/* Number of used, writable blocks */
-	unsigned int nr_closed_blocks;	/* Number of used, read-only blocks */
-	unsigned int nr_free_blocks;	/* Number of unused blocks */
-	unsigned int nr_bad_blocks;	/* Number of bad blocks */
-
 	spinlock_t lock;
 
+	unsigned int nr_free_blocks;	/* Number of unused blocks */
 	struct nvm_block *blocks;
 };
 
 enum {
 	NVM_BLK_ST_FREE =	0x1,	/* Free block */
-	NVM_BLK_ST_OPEN =	0x2,	/* Open block - read-write */
-	NVM_BLK_ST_CLOSED =	0x4,	/* Closed block - read-only */
+	NVM_BLK_ST_TGT =	0x2,	/* Block in use by target */
 	NVM_BLK_ST_BAD =	0x8,	/* Bad block */
 };
 
@@ -385,6 +378,7 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
 {
 	struct ppa_addr l;
 
+	l.ppa = 0;
 	/*
 	 * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
 	 */
@@ -455,6 +449,8 @@ struct nvm_tgt_type {
 	struct list_head list;
 };
 
+extern struct nvm_tgt_type *nvm_find_target_type(const char *, int);
+
 extern int nvm_register_tgt_type(struct nvm_tgt_type *);
 extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
 
@@ -463,6 +459,9 @@ extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
 
 typedef int (nvmm_register_fn)(struct nvm_dev *);
 typedef void (nvmm_unregister_fn)(struct nvm_dev *);
+
+typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
+typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
 					      struct nvm_lun *, unsigned long);
 typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
@@ -488,9 +487,10 @@ struct nvmm_type {
 	nvmm_register_fn *register_mgr;
 	nvmm_unregister_fn *unregister_mgr;
 
+	nvmm_create_tgt_fn *create_tgt;
+	nvmm_remove_tgt_fn *remove_tgt;
+
 	/* Block administration callbacks */
-	nvmm_get_blk_fn *get_blk_unlocked;
-	nvmm_put_blk_fn *put_blk_unlocked;
 	nvmm_get_blk_fn *get_blk;
 	nvmm_put_blk_fn *put_blk;
 	nvmm_open_blk_fn *open_blk;
@@ -520,10 +520,6 @@ struct nvmm_type {
 extern int nvm_register_mgr(struct nvmm_type *);
 extern void nvm_unregister_mgr(struct nvmm_type *);
 
-extern struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *,
-					struct nvm_lun *, unsigned long);
-extern void nvm_put_blk_unlocked(struct nvm_dev *, struct nvm_block *);
-
 extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
 								unsigned long);
 extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
@@ -532,11 +528,13 @@ extern int nvm_register(struct request_queue *, char *,
 						struct nvm_dev_ops *);
 extern void nvm_unregister(char *);
 
+void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
+
 extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
-						struct ppa_addr *, int, int);
+					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int);
 extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
diff --git a/include/linux/list.h b/include/linux/list.h
index 5356f4d661a7..5183138aa932 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -678,6 +678,16 @@ static inline bool hlist_fake(struct hlist_node *h)
 	return h->pprev == &h->next;
 }
 
+/*
+ * Check whether the node is the only node of the head without
+ * accessing head:
+ */
+static inline bool
+hlist_is_singular_node(struct hlist_node *n, struct hlist_head *h)
+{
+	return !n->next && n->pprev == &h->first;
+}
+
 /*
  * Move a list from one list head to another. Fixup the pprev
  * reference of the first entry if it exists.
diff --git a/include/linux/mdio-mux.h b/include/linux/mdio-mux.h
index a243dbba8659..61f5b21b31c7 100644
--- a/include/linux/mdio-mux.h
+++ b/include/linux/mdio-mux.h
@@ -10,11 +10,13 @@
 #ifndef __LINUX_MDIO_MUX_H
 #define __LINUX_MDIO_MUX_H
 #include <linux/device.h>
+#include <linux/phy.h>
 
 int mdio_mux_init(struct device *dev,
 		  int (*switch_fn) (int cur, int desired, void *data),
 		  void **mux_handle,
-		  void *data);
+		  void *data,
+		  struct mii_bus *mux_bus);
 
 void mdio_mux_uninit(void *mux_handle);
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 3106ac1c895e..6c14b6179727 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -73,8 +73,8 @@ extern bool movable_node_enabled;
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
 phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
-					    phys_addr_t start, phys_addr_t end,
-					    int nid, ulong flags);
+					phys_addr_t start, phys_addr_t end,
+					int nid, ulong flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
 phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
@@ -110,7 +110,7 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
 			  phys_addr_t *out_end, int *out_nid);
 
 void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
-			       phys_addr_t *out_end);
+				phys_addr_t *out_end);
 
 /**
  * for_each_mem_range - iterate through memblock areas from type_a and not
@@ -148,7 +148,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
 			       p_start, p_end, p_nid)			\
 	for (i = (u64)ULLONG_MAX,					\
 		     __next_mem_range_rev(&i, nid, flags, type_a, type_b,\
-					 p_start, p_end, p_nid);	\
+					  p_start, p_end, p_nid);	\
 	     i != (u64)ULLONG_MAX;					\
 	     __next_mem_range_rev(&i, nid, flags, type_a, type_b,	\
 				  p_start, p_end, p_nid))
@@ -163,8 +163,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
  * is initialized.
  */
 #define for_each_reserved_mem_region(i, p_start, p_end)			\
-	for (i = 0UL,							\
-	     __next_reserved_mem_region(&i, p_start, p_end);		\
+	for (i = 0UL, __next_reserved_mem_region(&i, p_start, p_end);	\
 	     i != (u64)ULLONG_MAX;					\
 	     __next_reserved_mem_region(&i, p_start, p_end))
 
@@ -403,15 +402,14 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 }
 
 #define for_each_memblock(memblock_type, region)					\
-	for (region = memblock.memblock_type.regions;				\
+	for (region = memblock.memblock_type.regions;					\
 	     region < (memblock.memblock_type.regions + memblock.memblock_type.cnt);	\
 	     region++)
 
 #define for_each_memblock_type(memblock_type, rgn)			\
-	idx = 0;							\
-	rgn = &memblock_type->regions[idx];				\
-	for (idx = 0; idx < memblock_type->cnt;				\
-	     idx++,rgn = &memblock_type->regions[idx])
+	for (idx = 0, rgn = &memblock_type->regions[0];			\
+	     idx < memblock_type->cnt;					\
+	     idx++, rgn = &memblock_type->regions[idx])
 
 #ifdef CONFIG_MEMTEST
 extern void early_memtest(phys_addr_t start, phys_addr_t end);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a805474df4ab..71aff733a497 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -97,6 +97,11 @@ enum mem_cgroup_events_target {
 #define MEM_CGROUP_ID_SHIFT	16
 #define MEM_CGROUP_ID_MAX	USHRT_MAX
 
+struct mem_cgroup_id {
+	int id;
+	atomic_t ref;
+};
+
 struct mem_cgroup_stat_cpu {
 	long count[MEMCG_NR_STAT];
 	unsigned long events[MEMCG_NR_EVENTS];
@@ -172,6 +177,9 @@ enum memcg_kmem_state {
 struct mem_cgroup {
 	struct cgroup_subsys_state css;
 
+	/* Private memcg ID. Used to ID objects that outlive the cgroup */
+	struct mem_cgroup_id id;
+
 	/* Accounted resources */
 	struct page_counter memory;
 	struct page_counter swap;
@@ -330,22 +338,9 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 	if (mem_cgroup_disabled())
 		return 0;
 
-	return memcg->css.id;
-}
-
-/**
- * mem_cgroup_from_id - look up a memcg from an id
- * @id: the id to look up
- *
- * Caller must hold rcu_read_lock() and use css_tryget() as necessary.
- */
-static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
-{
-	struct cgroup_subsys_state *css;
-
-	css = css_from_id(id, &memory_cgrp_subsys);
-	return mem_cgroup_from_css(css);
+	return memcg->id.id;
 }
+struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
 
 /**
  * parent_mem_cgroup - find the accounting parent of a memcg
@@ -754,6 +749,13 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 }
 #endif
 
+struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
+void memcg_kmem_put_cache(struct kmem_cache *cachep);
+int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
+			    struct mem_cgroup *memcg);
+int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
+void memcg_kmem_uncharge(struct page *page, int order);
+
 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
 extern struct static_key_false memcg_kmem_enabled_key;
 
@@ -774,22 +776,6 @@ static inline bool memcg_kmem_enabled(void)
 	return static_branch_unlikely(&memcg_kmem_enabled_key);
 }
 
-/*
- * In general, we'll do everything in our power to not incur in any overhead
- * for non-memcg users for the kmem functions. Not even a function call, if we
- * can avoid it.
- *
- * Therefore, we'll inline all those functions so that in the best case, we'll
- * see that kmemcg is off for everybody and proceed quickly.  If it is on,
- * we'll still do most of the flag checking inline. We check a lot of
- * conditions, but because they are pretty simple, they are expected to be
- * fast.
- */
-int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
-			      struct mem_cgroup *memcg);
-int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
-void __memcg_kmem_uncharge(struct page *page, int order);
-
 /*
  * helper for accessing a memcg's index. It will be used as an index in the
  * child cache array in kmem_cache, and also to derive its name. This function
@@ -800,67 +786,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return memcg ? memcg->kmemcg_id : -1;
 }
 
-struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
-void __memcg_kmem_put_cache(struct kmem_cache *cachep);
-
-static inline bool __memcg_kmem_bypass(void)
-{
-	if (!memcg_kmem_enabled())
-		return true;
-	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
-		return true;
-	return false;
-}
-
-/**
- * memcg_kmem_charge: charge a kmem page
- * @page: page to charge
- * @gfp: reclaim mode
- * @order: allocation order
- *
- * Returns 0 on success, an error code on failure.
- */
-static __always_inline int memcg_kmem_charge(struct page *page,
-					     gfp_t gfp, int order)
-{
-	if (__memcg_kmem_bypass())
-		return 0;
-	if (!(gfp & __GFP_ACCOUNT))
-		return 0;
-	return __memcg_kmem_charge(page, gfp, order);
-}
-
-/**
- * memcg_kmem_uncharge: uncharge a kmem page
- * @page: page to uncharge
- * @order: allocation order
- */
-static __always_inline void memcg_kmem_uncharge(struct page *page, int order)
-{
-	if (memcg_kmem_enabled())
-		__memcg_kmem_uncharge(page, order);
-}
-
-/**
- * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
- * @cachep: the original global kmem cache
- *
- * All memory allocated from a per-memcg cache is charged to the owner memcg.
- */
-static __always_inline struct kmem_cache *
-memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
-{
-	if (__memcg_kmem_bypass())
-		return cachep;
-	return __memcg_kmem_get_cache(cachep, gfp);
-}
-
-static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
-{
-	if (memcg_kmem_enabled())
-		__memcg_kmem_put_cache(cachep);
-}
-
 /**
  * memcg_kmem_update_page_stat - update kmem page state statistics
  * @page: the page
@@ -883,15 +808,6 @@ static inline bool memcg_kmem_enabled(void)
 	return false;
 }
 
-static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
-{
-	return 0;
-}
-
-static inline void memcg_kmem_uncharge(struct page *page, int order)
-{
-}
-
 static inline int memcg_cache_id(struct mem_cgroup *memcg)
 {
 	return -1;
@@ -905,16 +821,6 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
-static inline struct kmem_cache *
-memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
-{
-	return cachep;
-}
-
-static inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
-{
-}
-
 static inline void memcg_kmem_update_page_stat(struct page *page,
 				enum mem_cgroup_stat_index idx, int val)
 {
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 5145620ba48a..01033fadea47 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -284,5 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
 		unsigned long map_offset);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
+extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
+			  enum zone_type target);
 
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index c18a4c19d6fc..ce9230af09c2 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -171,7 +171,7 @@ static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg,
 static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg,
 				      unsigned reg_cnt, unsigned char *val)
 {
-	int ret;
+	int ret = 0;
 	int i;
 
 	for (i = 0; i < reg_cnt; i++) {
diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h
index c72d5344f3b3..cadc6543909d 100644
--- a/include/linux/mfd/rn5t618.h
+++ b/include/linux/mfd/rn5t618.h
@@ -20,6 +20,7 @@
 #define RN5T618_OTPVER			0x01
 #define RN5T618_IODAC			0x02
 #define RN5T618_VINDAC			0x03
+#define RN5T618_OUT32KEN		0x05
 #define RN5T618_CPUCNT			0x06
 #define RN5T618_PSWR			0x07
 #define RN5T618_PONHIS			0x09
@@ -38,6 +39,7 @@
 #define RN5T618_DC1_SLOT		0x16
 #define RN5T618_DC2_SLOT		0x17
 #define RN5T618_DC3_SLOT		0x18
+#define RN5T618_DC4_SLOT		0x19
 #define RN5T618_LDO1_SLOT		0x1b
 #define RN5T618_LDO2_SLOT		0x1c
 #define RN5T618_LDO3_SLOT		0x1d
@@ -54,12 +56,16 @@
 #define RN5T618_DC2CTL2			0x2f
 #define RN5T618_DC3CTL			0x30
 #define RN5T618_DC3CTL2			0x31
+#define RN5T618_DC4CTL			0x32
+#define RN5T618_DC4CTL2			0x33
 #define RN5T618_DC1DAC			0x36
 #define RN5T618_DC2DAC			0x37
 #define RN5T618_DC3DAC			0x38
+#define RN5T618_DC4DAC			0x39
 #define RN5T618_DC1DAC_SLP		0x3b
 #define RN5T618_DC2DAC_SLP		0x3c
 #define RN5T618_DC3DAC_SLP		0x3d
+#define RN5T618_DC4DAC_SLP		0x3e
 #define RN5T618_DCIREN			0x40
 #define RN5T618_DCIRQ			0x41
 #define RN5T618_DCIRMON			0x42
@@ -211,6 +217,7 @@ enum {
 	RN5T618_DCDC1,
 	RN5T618_DCDC2,
 	RN5T618_DCDC3,
+	RN5T618_DCDC4,
 	RN5T618_LDO1,
 	RN5T618_LDO2,
 	RN5T618_LDO3,
@@ -221,8 +228,14 @@ enum {
 	RN5T618_REG_NUM,
 };
 
+enum {
+	RN5T567 = 0,
+	RN5T618,
+};
+
 struct rn5t618 {
 	struct regmap *regmap;
+	long variant;
 };
 
 #endif /* __LINUX_MFD_RN5T618_H */
diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index ac7fba44d7e4..1c88231496d3 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -257,6 +257,7 @@ struct tps65217 {
 	unsigned long id;
 	struct regulator_desc desc[TPS65217_NUM_REGULATOR];
 	struct regmap *regmap;
+	u8 *strobes;
 };
 
 static inline struct tps65217 *dev_to_tps65217(struct device *dev)
diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h
index d58f3b5f585a..7fdf5326f34e 100644
--- a/include/linux/mfd/tps65218.h
+++ b/include/linux/mfd/tps65218.h
@@ -246,6 +246,7 @@ enum tps65218_irqs {
  * @name:		Voltage regulator name
  * @min_uV:		minimum micro volts
  * @max_uV:		minimum micro volts
+ * @strobe:		sequencing strobe value for the regulator
  *
  * This data is used to check the regualtor voltage limits while setting.
  */
@@ -254,6 +255,7 @@ struct tps_info {
 	const char *name;
 	int min_uV;
 	int max_uV;
+	int strobe;
 };
 
 /**
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 2e5b194b9b19..257173e0095e 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -37,6 +37,7 @@
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
+#define MICREL_PHY_FXEN		0x00000002
 
 #define MICREL_KSZ9021_EXTREG_CTRL	0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE	0xC
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 9b50325e4ddf..ae8d475a9385 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -37,6 +37,8 @@ extern int migrate_page(struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
 		unsigned long private, enum migrate_mode mode, int reason);
+extern bool isolate_movable_page(struct page *page, isolate_mode_t mode);
+extern void putback_movable_page(struct page *page);
 
 extern int migrate_prep(void);
 extern int migrate_prep_local(void);
@@ -69,6 +71,21 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 #endif /* CONFIG_MIGRATION */
 
+#ifdef CONFIG_COMPACTION
+extern int PageMovable(struct page *page);
+extern void __SetPageMovable(struct page *page, struct address_space *mapping);
+extern void __ClearPageMovable(struct page *page);
+#else
+static inline int PageMovable(struct page *page) { return 0; };
+static inline void __SetPageMovable(struct page *page,
+				struct address_space *mapping)
+{
+}
+static inline void __ClearPageMovable(struct page *page)
+{
+}
+#endif
+
 #ifdef CONFIG_NUMA_BALANCING
 extern bool pmd_trans_migrating(pmd_t pmd);
 extern int migrate_misplaced_page(struct page *page,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 80dec87a94f8..e6f6910278f3 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -466,6 +466,7 @@ enum {
 enum {
 	MLX4_INTERFACE_STATE_UP		= 1 << 0,
 	MLX4_INTERFACE_STATE_DELETION	= 1 << 1,
+	MLX4_INTERFACE_STATE_SHUTDOWN	= 1 << 2,
 };
 
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
@@ -535,6 +536,7 @@ struct mlx4_caps {
 	int			max_rq_desc_sz;
 	int			max_qp_init_rdma;
 	int			max_qp_dest_rdma;
+	int			max_tc_eth;
 	u32			*qp0_qkey;
 	u32			*qp0_proxy;
 	u32			*qp1_proxy;
@@ -1494,6 +1496,7 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
 
 int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
 			 u16 offset, u16 size, u8 *data);
+int mlx4_max_tc(struct mlx4_dev *dev);
 
 /* Returns true if running in low memory profile (kdump kernel) */
 static inline bool mlx4_low_memory_profile(void)
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 587cdf943b52..deaa2217214d 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -291,16 +291,18 @@ enum {
 	MLX4_WQE_CTRL_FORCE_LOOPBACK	= 1 << 0,
 };
 
+union mlx4_wqe_qpn_vlan {
+	struct {
+		__be16	vlan_tag;
+		u8	ins_vlan;
+		u8	fence_size;
+	};
+	__be32		bf_qpn;
+};
+
 struct mlx4_wqe_ctrl_seg {
 	__be32			owner_opcode;
-	union {
-		struct {
-			__be16			vlan_tag;
-			u8			ins_vlan;
-			u8			fence_size;
-		};
-		__be32			bf_qpn;
-	};
+	union mlx4_wqe_qpn_vlan	qpn_vlan;
 	/*
 	 * High 24 bits are SRC remote buffer; low 8 bits are flags:
 	 * [7]   SO (strong ordering)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 73a48479892d..0b6d15cddb2f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -129,6 +129,13 @@ __mlx5_mask(typ, fld))
 		tmp;							  \
 		})
 
+enum mlx5_inline_modes {
+	MLX5_INLINE_MODE_NONE,
+	MLX5_INLINE_MODE_L2,
+	MLX5_INLINE_MODE_IP,
+	MLX5_INLINE_MODE_TCP_UDP,
+};
+
 enum {
 	MLX5_MAX_COMMANDS		= 32,
 	MLX5_CMD_DATA_BLOCK_SIZE	= 512,
@@ -1330,6 +1337,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_ESWITCH,
 	MLX5_CAP_RESERVED,
 	MLX5_CAP_VECTOR_CALC,
+	MLX5_CAP_QOS,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1414,6 +1422,9 @@ enum mlx5_cap_type {
 	MLX5_GET(vector_calc_cap, \
 		 mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap)
 
+#define MLX5_CAP_QOS(mdev, cap)\
+	MLX5_GET(qos_cap, mdev->hca_caps_cur[MLX5_CAP_QOS], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 80776d0c52dc..a041b99fceac 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -469,7 +469,7 @@ struct mlx5_irq_info {
 };
 
 struct mlx5_fc_stats {
-	struct list_head list;
+	struct rb_root counters;
 	struct list_head addlist;
 	/* protect addlist add/splice operations */
 	spinlock_t addlist_lock;
@@ -481,6 +481,21 @@ struct mlx5_fc_stats {
 
 struct mlx5_eswitch;
 
+struct mlx5_rl_entry {
+	u32                     rate;
+	u16                     index;
+	u16                     refcount;
+};
+
+struct mlx5_rl_table {
+	/* protect rate limit table */
+	struct mutex            rl_lock;
+	u16                     max_size;
+	u32                     max_rate;
+	u32                     min_rate;
+	struct mlx5_rl_entry   *rl_entry;
+};
+
 struct mlx5_priv {
 	char			name[MLX5_MAX_NAME_LEN];
 	struct mlx5_eq_table	eq_table;
@@ -535,15 +550,12 @@ struct mlx5_priv {
 	struct list_head        ctx_list;
 	spinlock_t              ctx_lock;
 
+	struct mlx5_flow_steering *steering;
 	struct mlx5_eswitch     *eswitch;
 	struct mlx5_core_sriov	sriov;
 	unsigned long		pci_dev_data;
-	struct mlx5_flow_root_namespace *root_ns;
-	struct mlx5_flow_root_namespace *fdb_root_ns;
-	struct mlx5_flow_root_namespace *esw_egress_root_ns;
-	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
-
 	struct mlx5_fc_stats		fc_stats;
+	struct mlx5_rl_table            rl_table;
 };
 
 enum mlx5_device_state {
@@ -562,6 +574,18 @@ enum mlx5_pci_status {
 	MLX5_PCI_STATUS_ENABLED,
 };
 
+struct mlx5_td {
+	struct list_head tirs_list;
+	u32              tdn;
+};
+
+struct mlx5e_resources {
+	struct mlx5_uar            cq_uar;
+	u32                        pdn;
+	struct mlx5_td             td;
+	struct mlx5_core_mkey      mkey;
+};
+
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
 	/* sync pci state */
@@ -586,6 +610,7 @@ struct mlx5_core_dev {
 	struct mlx5_profile	*profile;
 	atomic_t		num_qps;
 	u32			issi;
+	struct mlx5e_resources  mlx5e_res;
 #ifdef CONFIG_RFS_ACCEL
 	struct cpu_rmap         *rmap;
 #endif
@@ -629,6 +654,7 @@ struct mlx5_cmd_work_ent {
 	void		       *uout;
 	int			uout_size;
 	mlx5_cmd_cbk_t		callback;
+	struct delayed_work	cb_timeout_work;
 	void		       *context;
 	int			idx;
 	struct completion	done;
@@ -861,6 +887,12 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
 int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 			     u8 port_num, void *out, size_t sz);
 
+int mlx5_init_rl_table(struct mlx5_core_dev *dev);
+void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
+bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
+
 static inline int fw_initializing(struct mlx5_core_dev *dev)
 {
 	return ioread32be(&dev->iseg->initializing) >> 31;
@@ -938,6 +970,11 @@ static inline int mlx5_get_gid_table_len(u16 param)
 	return 8 * (1 << param);
 }
 
+static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)
+{
+	return !!(dev->priv.rl_table.max_size);
+}
+
 enum {
 	MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
 };
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 4b7a107d9c19..e036d6030867 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -54,6 +54,8 @@ static inline void build_leftovers_ft_param(int *priority,
 
 enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_BYPASS,
+	MLX5_FLOW_NAMESPACE_OFFLOADS,
+	MLX5_FLOW_NAMESPACE_ETHTOOL,
 	MLX5_FLOW_NAMESPACE_KERNEL,
 	MLX5_FLOW_NAMESPACE_LEFTOVERS,
 	MLX5_FLOW_NAMESPACE_ANCHOR,
@@ -67,6 +69,12 @@ struct mlx5_flow_group;
 struct mlx5_flow_rule;
 struct mlx5_flow_namespace;
 
+struct mlx5_flow_spec {
+	u8   match_criteria_enable;
+	u32  match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
+	u32  match_value[MLX5_ST_SZ_DW(fte_match_param)];
+};
+
 struct mlx5_flow_destination {
 	enum mlx5_flow_destination_type	type;
 	union {
@@ -115,9 +123,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
  */
 struct mlx5_flow_rule *
 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   u8 match_criteria_enable,
-		   u32 *match_criteria,
-		   u32 *match_value,
+		   struct mlx5_flow_spec *spec,
 		   u32 action,
 		   u32 flow_tag,
 		   struct mlx5_flow_destination *dest);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e955a2859009..21bc4557b67a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -123,6 +123,10 @@ enum {
 	MLX5_CMD_OP_DRAIN_DCT                     = 0x712,
 	MLX5_CMD_OP_QUERY_DCT                     = 0x713,
 	MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION     = 0x714,
+	MLX5_CMD_OP_CREATE_XRQ                    = 0x717,
+	MLX5_CMD_OP_DESTROY_XRQ                   = 0x718,
+	MLX5_CMD_OP_QUERY_XRQ                     = 0x719,
+	MLX5_CMD_OP_ARM_XRQ                       = 0x71a,
 	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
 	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
 	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
@@ -139,6 +143,8 @@ enum {
 	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
 	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
 	MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
+	MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
+	MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
 	MLX5_CMD_OP_ALLOC_PD                      = 0x800,
 	MLX5_CMD_OP_DEALLOC_PD                    = 0x801,
 	MLX5_CMD_OP_ALLOC_UAR                     = 0x802,
@@ -362,7 +368,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
 };
 
 struct mlx5_ifc_fte_match_set_misc_bits {
-	u8         reserved_at_0[0x20];
+	u8         reserved_at_0[0x8];
+	u8         source_sqn[0x18];
 
 	u8         reserved_at_20[0x10];
 	u8         source_port[0x10];
@@ -508,6 +515,17 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         reserved_at_20[0x7e0];
 };
 
+struct mlx5_ifc_qos_cap_bits {
+	u8         packet_pacing[0x1];
+	u8         reserved_0[0x1f];
+	u8         reserved_1[0x20];
+	u8         packet_pacing_max_rate[0x20];
+	u8         packet_pacing_min_rate[0x20];
+	u8         reserved_2[0x10];
+	u8         packet_pacing_rate_table_size[0x10];
+	u8         reserved_3[0x760];
+};
+
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         csum_cap[0x1];
 	u8         vlan_cap[0x1];
@@ -518,7 +536,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         self_lb_en_modifiable[0x1];
 	u8         reserved_at_9[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_at_10[0x4];
+	u8         reserved_at_10[0x2];
+	u8	   wqe_inline_mode[0x2];
 	u8         rss_ind_tbl_cap[0x4];
 	u8         reg_umr_sq[0x1];
 	u8         scatter_fcs[0x1];
@@ -747,7 +766,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         out_of_seq_cnt[0x1];
 	u8         vport_counters[0x1];
-	u8         reserved_at_182[0x4];
+	u8         retransmission_q_counters[0x1];
+	u8         reserved_at_183[0x3];
 	u8         max_qp_cnt[0xa];
 	u8         pkey_table_size[0x10];
 
@@ -774,7 +794,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_msg[0x5];
 	u8         reserved_at_1c8[0x4];
 	u8         max_tc[0x4];
-	u8         reserved_at_1d0[0x6];
+	u8         reserved_at_1d0[0x1];
+	u8         dcbx[0x1];
+	u8         reserved_at_1d2[0x4];
 	u8         rol_s[0x1];
 	u8         rol_g[0x1];
 	u8         reserved_at_1d8[0x1];
@@ -806,7 +828,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         tph[0x1];
 	u8         rf[0x1];
 	u8         dct[0x1];
-	u8         reserved_at_21b[0x1];
+	u8         qos[0x1];
 	u8         eth_net_offloads[0x1];
 	u8         roce[0x1];
 	u8         atomic[0x1];
@@ -872,7 +894,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_330[0xb];
 	u8         log_max_xrcd[0x5];
 
-	u8         reserved_at_340[0x20];
+	u8         reserved_at_340[0x8];
+	u8         log_max_flow_counter_bulk[0x8];
+	u8         max_flow_counter[0x10];
+
 
 	u8         reserved_at_360[0x3];
 	u8         log_max_rq[0x5];
@@ -932,7 +957,15 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         cqe_compression_timeout[0x10];
 	u8         cqe_compression_max_num[0x10];
 
-	u8         reserved_at_5e0[0x220];
+	u8         reserved_at_5e0[0x10];
+	u8         tag_matching[0x1];
+	u8         rndv_offload_rc[0x1];
+	u8         rndv_offload_dc[0x1];
+	u8         log_tag_matching_list_sz[0x5];
+	u8         reserved_at_5e8[0x3];
+	u8         log_max_xrq[0x5];
+
+	u8         reserved_at_5f0[0x200];
 };
 
 enum mlx5_flow_destination_type {
@@ -951,7 +984,8 @@ struct mlx5_ifc_dest_format_struct_bits {
 };
 
 struct mlx5_ifc_flow_counter_list_bits {
-	u8         reserved_at_0[0x10];
+	u8         clear[0x1];
+	u8         num_of_counters[0xf];
 	u8         flow_counter_id[0x10];
 
 	u8         reserved_at_20[0x20];
@@ -1970,7 +2004,7 @@ struct mlx5_ifc_qpc_bits {
 
 	u8         reserved_at_560[0x5];
 	u8         rq_type[0x3];
-	u8         srqn_rmpn[0x18];
+	u8         srqn_rmpn_xrqn[0x18];
 
 	u8         reserved_at_580[0x8];
 	u8         rmsn[0x18];
@@ -2021,6 +2055,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
 	struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
+	struct mlx5_ifc_qos_cap_bits qos_cap;
 	u8         reserved_at_0[0x8000];
 };
 
@@ -2236,7 +2271,8 @@ struct mlx5_ifc_sqc_bits {
 	u8         cd_master[0x1];
 	u8         fre[0x1];
 	u8         flush_in_error_en[0x1];
-	u8         reserved_at_4[0x4];
+	u8         reserved_at_4[0x1];
+	u8	   min_wqe_inline_mode[0x3];
 	u8         state[0x4];
 	u8         reg_umr[0x1];
 	u8         reserved_at_d[0x13];
@@ -2247,8 +2283,9 @@ struct mlx5_ifc_sqc_bits {
 	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_at_60[0xa0];
+	u8         reserved_at_60[0x90];
 
+	u8         packet_pacing_rate_limit_index[0x10];
 	u8         tis_lst_sz[0x10];
 	u8         reserved_at_110[0x10];
 
@@ -2332,7 +2369,9 @@ struct mlx5_ifc_rmpc_bits {
 };
 
 struct mlx5_ifc_nic_vport_context_bits {
-	u8         reserved_at_0[0x1f];
+	u8         reserved_at_0[0x5];
+	u8         min_wqe_inline_mode[0x3];
+	u8         reserved_at_8[0x17];
 	u8         roce_en[0x1];
 
 	u8         arm_change_event[0x1];
@@ -2596,7 +2635,7 @@ struct mlx5_ifc_dctc_bits {
 	u8         reserved_at_98[0x8];
 
 	u8         reserved_at_a0[0x8];
-	u8         srqn[0x18];
+	u8         srqn_xrqn[0x18];
 
 	u8         reserved_at_c0[0x8];
 	u8         pd[0x18];
@@ -2648,6 +2687,7 @@ enum {
 enum {
 	MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
 	MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
+	MLX5_CQ_PERIOD_NUM_MODES
 };
 
 struct mlx5_ifc_cqc_bits {
@@ -2725,6 +2765,54 @@ struct mlx5_ifc_query_adapter_param_block_bits {
 	u8         vsd_contd_psid[16][0x8];
 };
 
+enum {
+	MLX5_XRQC_STATE_GOOD   = 0x0,
+	MLX5_XRQC_STATE_ERROR  = 0x1,
+};
+
+enum {
+	MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0,
+	MLX5_XRQC_TOPOLOGY_TAG_MATCHING        = 0x1,
+};
+
+enum {
+	MLX5_XRQC_OFFLOAD_RNDV = 0x1,
+};
+
+struct mlx5_ifc_tag_matching_topology_context_bits {
+	u8         log_matching_list_sz[0x4];
+	u8         reserved_at_4[0xc];
+	u8         append_next_index[0x10];
+
+	u8         sw_phase_cnt[0x10];
+	u8         hw_phase_cnt[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_xrqc_bits {
+	u8         state[0x4];
+	u8         rlkey[0x1];
+	u8         reserved_at_5[0xf];
+	u8         topology[0x4];
+	u8         reserved_at_18[0x4];
+	u8         offload[0x4];
+
+	u8         reserved_at_20[0x8];
+	u8         user_index[0x18];
+
+	u8         reserved_at_40[0x8];
+	u8         cqn[0x18];
+
+	u8         reserved_at_60[0xa0];
+
+	struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
+
+	u8         reserved_at_180[0x180];
+
+	struct mlx5_ifc_wq_bits wq;
+};
+
 union mlx5_ifc_modify_field_select_resize_field_select_auto_bits {
 	struct mlx5_ifc_modify_field_select_bits modify_field_select;
 	struct mlx5_ifc_resize_field_select_bits resize_field_select;
@@ -3147,6 +3235,30 @@ struct mlx5_ifc_rst2init_qp_in_bits {
 	u8         reserved_at_800[0x80];
 };
 
+struct mlx5_ifc_query_xrq_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
+struct mlx5_ifc_query_xrq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         xrqn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_query_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -3550,7 +3662,27 @@ struct mlx5_ifc_query_q_counter_out_bits {
 
 	u8         out_of_sequence[0x20];
 
-	u8         reserved_at_1e0[0x620];
+	u8         reserved_at_1e0[0x20];
+
+	u8         duplicate_request[0x20];
+
+	u8         reserved_at_220[0x20];
+
+	u8         rnr_nak_retry_err[0x20];
+
+	u8         reserved_at_260[0x20];
+
+	u8         packet_seq_err[0x20];
+
+	u8         reserved_at_2a0[0x20];
+
+	u8         implied_nak_seq_err[0x20];
+
+	u8         reserved_at_2e0[0x20];
+
+	u8         local_ack_timeout_err[0x20];
+
+	u8         reserved_at_320[0x4e0];
 };
 
 struct mlx5_ifc_query_q_counter_in_bits {
@@ -5004,6 +5136,28 @@ struct mlx5_ifc_detach_from_mcg_in_bits {
 	u8         multicast_gid[16][0x8];
 };
 
+struct mlx5_ifc_destroy_xrq_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_xrq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         xrqn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_destroy_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -5589,6 +5743,30 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_create_xrq_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x8];
+	u8         xrqn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_xrq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
 struct mlx5_ifc_create_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6130,6 +6308,29 @@ struct mlx5_ifc_attach_to_mcg_in_bits {
 	u8         multicast_gid[16][0x8];
 };
 
+struct mlx5_ifc_arm_xrq_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_arm_xrq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         xrqn[0x18];
+
+	u8         reserved_at_60[0x10];
+	u8         lwm[0x10];
+};
+
 struct mlx5_ifc_arm_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6167,7 +6368,8 @@ struct mlx5_ifc_arm_rq_out_bits {
 };
 
 enum {
-	MLX5_ARM_RQ_IN_OP_MOD_SRQ_  = 0x1,
+	MLX5_ARM_RQ_IN_OP_MOD_SRQ = 0x1,
+	MLX5_ARM_RQ_IN_OP_MOD_XRQ = 0x2,
 };
 
 struct mlx5_ifc_arm_rq_in_bits {
@@ -6360,6 +6562,30 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
 	u8         vxlan_udp_port[0x10];
 };
 
+struct mlx5_ifc_set_rate_limit_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_rate_limit_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x10];
+	u8         rate_limit_index[0x10];
+
+	u8         reserved_at_60[0x20];
+
+	u8         rate_limit[0x20];
+};
+
 struct mlx5_ifc_access_register_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6484,12 +6710,15 @@ struct mlx5_ifc_pude_reg_bits {
 };
 
 struct mlx5_ifc_ptys_reg_bits {
-	u8         reserved_at_0[0x8];
+	u8         an_disable_cap[0x1];
+	u8         an_disable_admin[0x1];
+	u8         reserved_at_2[0x6];
 	u8         local_port[0x8];
 	u8         reserved_at_10[0xd];
 	u8         proto_mask[0x3];
 
-	u8         reserved_at_20[0x40];
+	u8         an_status[0x4];
+	u8         reserved_at_24[0x3c];
 
 	u8         eth_proto_capability[0x20];
 
@@ -7450,4 +7679,34 @@ struct mlx5_ifc_mcia_reg_bits {
 	u8         dword_11[0x20];
 };
 
+struct mlx5_ifc_dcbx_param_bits {
+	u8         dcbx_cee_cap[0x1];
+	u8         dcbx_ieee_cap[0x1];
+	u8         dcbx_standby_cap[0x1];
+	u8         reserved_at_0[0x5];
+	u8         port_number[0x8];
+	u8         reserved_at_10[0xa];
+	u8         max_application_table_size[6];
+	u8         reserved_at_20[0x15];
+	u8         version_oper[0x3];
+	u8         reserved_at_38[5];
+	u8         version_admin[0x3];
+	u8         willing_admin[0x1];
+	u8         reserved_at_41[0x3];
+	u8         pfc_cap_oper[0x4];
+	u8         reserved_at_48[0x4];
+	u8         pfc_cap_admin[0x4];
+	u8         reserved_at_50[0x4];
+	u8         num_of_tc_oper[0x4];
+	u8         reserved_at_58[0x4];
+	u8         num_of_tc_admin[0x4];
+	u8         remote_willing[0x1];
+	u8         reserved_at_61[3];
+	u8         remote_pfc_cap[4];
+	u8         reserved_at_68[0x14];
+	u8         remote_num_of_tc[0x4];
+	u8         reserved_at_80[0x18];
+	u8         error[0x8];
+	u8         reserved_at_a0[0x160];
+};
 #endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 9851862c0ec5..e3012cc64b8a 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -47,6 +47,14 @@ enum mlx5_module_id {
 	MLX5_MODULE_ID_QSFP28           = 0x11,
 };
 
+enum mlx5_an_status {
+	MLX5_AN_UNAVAILABLE = 0,
+	MLX5_AN_COMPLETE    = 1,
+	MLX5_AN_FAILED      = 2,
+	MLX5_AN_LINK_UP     = 3,
+	MLX5_AN_LINK_DOWN   = 4,
+};
+
 #define MLX5_EEPROM_MAX_BYTES			32
 #define MLX5_EEPROM_IDENTIFIER_BYTE_MASK	0x000000ff
 #define MLX5_I2C_ADDR_LOW		0x50
@@ -65,13 +73,17 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
 			       u8 *proto_oper, int proto_mask,
 			       u8 local_port);
-int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
-			int proto_mask);
+int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
+		       u32 proto_admin, int proto_mask);
+void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 			       enum mlx5_port_status status);
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
 				 enum mlx5_port_status *status);
 int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration);
+void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
+			     u8 *an_status,
+			     u8 *an_disable_cap, u8 *an_disable_admin);
 
 int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port);
 void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 266320feb160..ab310819ac36 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -172,6 +172,7 @@ enum {
 enum {
 	MLX5_FENCE_MODE_NONE			= 0 << 5,
 	MLX5_FENCE_MODE_INITIATOR_SMALL		= 1 << 5,
+	MLX5_FENCE_MODE_FENCE			= 2 << 5,
 	MLX5_FENCE_MODE_STRONG_ORDERING		= 3 << 5,
 	MLX5_FENCE_MODE_SMALL_AND_FENCE		= 4 << 5,
 };
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 6c16c198f680..e087b7d047ac 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -43,6 +43,8 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr);
+void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				     u8 *min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
 				      u16 vport, u8 *addr);
 int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5df5feb49575..192c1bbe5fcd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -309,10 +309,34 @@ struct vm_fault {
 					 * VM_FAULT_DAX_LOCKED and fill in
 					 * entry here.
 					 */
-	/* for ->map_pages() only */
-	pgoff_t max_pgoff;		/* map pages for offset from pgoff till
-					 * max_pgoff inclusive */
-	pte_t *pte;			/* pte entry associated with ->pgoff */
+};
+
+/*
+ * Page fault context: passes though page fault handler instead of endless list
+ * of function arguments.
+ */
+struct fault_env {
+	struct vm_area_struct *vma;	/* Target VMA */
+	unsigned long address;		/* Faulting virtual address */
+	unsigned int flags;		/* FAULT_FLAG_xxx flags */
+	pmd_t *pmd;			/* Pointer to pmd entry matching
+					 * the 'address'
+					 */
+	pte_t *pte;			/* Pointer to pte entry matching
+					 * the 'address'. NULL if the page
+					 * table hasn't been allocated.
+					 */
+	spinlock_t *ptl;		/* Page table lock.
+					 * Protects pte page table if 'pte'
+					 * is not NULL, otherwise pmd.
+					 */
+	pgtable_t prealloc_pte;		/* Pre-allocated pte page table.
+					 * vm_ops->map_pages() calls
+					 * alloc_set_pte() from atomic context.
+					 * do_fault_around() pre-allocates
+					 * page table to avoid allocation from
+					 * atomic context.
+					 */
 };
 
 /*
@@ -327,7 +351,8 @@ struct vm_operations_struct {
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
 	int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
 						pmd_t *, unsigned int flags);
-	void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
+	void (*map_pages)(struct fault_env *fe,
+			pgoff_t start_pgoff, pgoff_t end_pgoff);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
@@ -537,7 +562,6 @@ void __put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
 void split_page(struct page *page, unsigned int order);
-int split_free_page(struct page *page);
 
 /*
  * Compound pages have a destructor function.  Provide a
@@ -601,8 +625,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 	return pte;
 }
 
-void do_set_pte(struct vm_area_struct *vma, unsigned long address,
-		struct page *page, pte_t *pte, bool write, bool anon, bool old);
+int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
+		struct page *page);
 #endif
 
 /*
@@ -1035,6 +1059,7 @@ static inline pgoff_t page_file_index(struct page *page)
 }
 
 bool page_mapped(struct page *page);
+struct address_space *page_mapping(struct page *page);
 
 /*
  * Return true only if the page has been allocated with
@@ -1215,15 +1240,14 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page);
 int invalidate_inode_page(struct page *page);
 
 #ifdef CONFIG_MMU
-extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, unsigned int flags);
+extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
+		unsigned int flags);
 extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long address, unsigned int fault_flags,
 			    bool *unlocked);
 #else
-static inline int handle_mm_fault(struct mm_struct *mm,
-			struct vm_area_struct *vma, unsigned long address,
-			unsigned int flags)
+static inline int handle_mm_fault(struct vm_area_struct *vma,
+		unsigned long address, unsigned int flags)
 {
 	/* should never happen if there's no MMU */
 	BUG();
@@ -2063,7 +2087,8 @@ extern void truncate_inode_pages_final(struct address_space *);
 
 /* generic vm_area_ops exported for stackable file systems */
 extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
-extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern void filemap_map_pages(struct fault_env *fe,
+		pgoff_t start_pgoff, pgoff_t end_pgoff);
 extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* mm/page-writeback.c */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ca3e517980a0..79472b22d23f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -60,51 +60,52 @@ struct page {
 	};
 
 	/* Second double word */
-	struct {
-		union {
-			pgoff_t index;		/* Our offset within mapping. */
-			void *freelist;		/* sl[aou]b first free object */
-			/* page_deferred_list().prev	-- second tail page */
-		};
+	union {
+		pgoff_t index;		/* Our offset within mapping. */
+		void *freelist;		/* sl[aou]b first free object */
+		/* page_deferred_list().prev	-- second tail page */
+	};
 
-		union {
+	union {
 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
 	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
-			/* Used for cmpxchg_double in slub */
-			unsigned long counters;
+		/* Used for cmpxchg_double in slub */
+		unsigned long counters;
 #else
-			/*
-			 * Keep _refcount separate from slub cmpxchg_double
-			 * data.  As the rest of the double word is protected by
-			 * slab_lock but _refcount is not.
-			 */
-			unsigned counters;
+		/*
+		 * Keep _refcount separate from slub cmpxchg_double data.
+		 * As the rest of the double word is protected by slab_lock
+		 * but _refcount is not.
+		 */
+		unsigned counters;
 #endif
+		struct {
 
-			struct {
-
-				union {
-					/*
-					 * Count of ptes mapped in mms, to show
-					 * when page is mapped & limit reverse
-					 * map searches.
-					 */
-					atomic_t _mapcount;
-
-					struct { /* SLUB */
-						unsigned inuse:16;
-						unsigned objects:15;
-						unsigned frozen:1;
-					};
-					int units;	/* SLOB */
-				};
+			union {
 				/*
-				 * Usage count, *USE WRAPPER FUNCTION*
-				 * when manual accounting. See page_ref.h
+				 * Count of ptes mapped in mms, to show when
+				 * page is mapped & limit reverse map searches.
+				 *
+				 * Extra information about page type may be
+				 * stored here for pages that are never mapped,
+				 * in which case the value MUST BE <= -2.
+				 * See page-flags.h for more details.
 				 */
-				atomic_t _refcount;
+				atomic_t _mapcount;
+
+				unsigned int active;		/* SLAB */
+				struct {			/* SLUB */
+					unsigned inuse:16;
+					unsigned objects:15;
+					unsigned frozen:1;
+				};
+				int units;			/* SLOB */
 			};
-			unsigned int active;	/* SLAB */
+			/*
+			 * Usage count, *USE WRAPPER FUNCTION* when manual
+			 * accounting. See page_ref.h
+			 */
+			atomic_t _refcount;
 		};
 	};
 
@@ -594,6 +595,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index de7be78c6f0e..451a811f48f2 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -39,6 +39,7 @@ void dump_mm(const struct mm_struct *mm);
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
+#define VM_WARN(cond, format...) WARN(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
@@ -47,6 +48,7 @@ void dump_mm(const struct mm_struct *mm);
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
 
 #ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 02069c23486d..19425e988bdc 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -140,6 +140,9 @@ enum zone_stat_item {
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
 	NR_PAGES_SCANNED,	/* pages scanned since last reclaim */
+#if IS_ENABLED(CONFIG_ZSMALLOC)
+	NR_ZSPAGES,		/* allocated in zsmalloc */
+#endif
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
@@ -151,7 +154,9 @@ enum zone_stat_item {
 	WORKINGSET_REFAULT,
 	WORKINGSET_ACTIVATE,
 	WORKINGSET_NODERECLAIM,
-	NR_ANON_TRANSPARENT_HUGEPAGES,
+	NR_ANON_THPS,
+	NR_SHMEM_THPS,
+	NR_SHMEM_PMDMAPPED,
 	NR_FREE_CMA_PAGES,
 	NR_VM_ZONE_STAT_ITEMS };
 
@@ -524,7 +529,6 @@ struct zone {
 
 enum zone_flags {
 	ZONE_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
-	ZONE_OOM_LOCKED,		/* zone is in OOM killer zonelist */
 	ZONE_CONGESTED,			/* zone has many dirty pages backed by
 					 * a congested BDI
 					 */
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 6e4c645e1c0d..ed84c07f6a51 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -657,4 +657,20 @@ struct ulpi_device_id {
 	kernel_ulong_t driver_data;
 };
 
+/**
+ * struct fsl_mc_device_id - MC object device identifier
+ * @vendor: vendor ID
+ * @obj_type: MC object type
+ * @ver_major: MC object version major number
+ * @ver_minor: MC object version minor number
+ *
+ * Type of entries in the "device Id" table for MC object devices supported by
+ * a MC object device driver. The last entry of the table has vendor set to 0x0
+ */
+struct fsl_mc_device_id {
+	__u16 vendor;
+	const char obj_type[16];
+};
+
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 3a5abe95affd..1cc5ffb769af 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -80,8 +80,7 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
 int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 		    int *sign);
 void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign);
-int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign);
-int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned *nbytes,
+int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes,
 		     int *sign);
 
 #define log_mpidump g10_log_mpidump
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index bf9b322cb0b0..d351fd3e1049 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -104,6 +104,7 @@ struct mfc_cache {
 			unsigned long bytes;
 			unsigned long pkt;
 			unsigned long wrong_if;
+			unsigned long lastuse;
 			unsigned char ttls[MAXVIFS];	/* TTL thresholds		*/
 		} res;
 	} mfc_un;
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 66982e764051..3987b64040c5 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -92,6 +92,7 @@ struct mfc6_cache {
 			unsigned long bytes;
 			unsigned long pkt;
 			unsigned long wrong_if;
+			unsigned long lastuse;
 			unsigned char ttls[MAXMIFS];	/* TTL thresholds		*/
 		} res;
 	} mfc_un;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 8b425c66305a..4f0bfe5912b2 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -47,6 +47,7 @@ struct fsl_mc_msi_desc {
  * @nvec_used:	The number of vectors used
  * @dev:	Pointer to the device which uses this descriptor
  * @msg:	The last set MSI message cached for reuse
+ * @affinity:	Optional pointer to a cpu affinity mask for this descriptor
  *
  * @masked:	[PCI MSI/X] Mask bits
  * @is_msix:	[PCI MSI/X] True if MSI-X
@@ -67,6 +68,7 @@ struct msi_desc {
 	unsigned int			nvec_used;
 	struct device			*dev;
 	struct msi_msg			msg;
+	const struct cpumask		*affinity;
 
 	union {
 		/* PCI MSI/X specific data */
@@ -264,12 +266,10 @@ enum {
 	 * callbacks.
 	 */
 	MSI_FLAG_USE_DEF_CHIP_OPS	= (1 << 1),
-	/* Build identity map between hwirq and irq */
-	MSI_FLAG_IDENTITY_MAP		= (1 << 2),
 	/* Support multiple PCI MSI interrupts */
-	MSI_FLAG_MULTI_PCI_MSI		= (1 << 3),
+	MSI_FLAG_MULTI_PCI_MSI		= (1 << 2),
 	/* Support PCI MSIX interrupts */
-	MSI_FLAG_PCI_MSIX		= (1 << 4),
+	MSI_FLAG_PCI_MSIX		= (1 << 3),
 };
 
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
diff --git a/include/linux/net.h b/include/linux/net.h
index 9aa49a05fe38..b9f0ff4d489c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -185,6 +185,7 @@ struct proto_ops {
 	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 	int		(*set_peek_off)(struct sock *sk, int val);
+	int		(*peek_len)(struct socket *sock);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
@@ -251,7 +252,8 @@ do {									\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
 	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
 	    net_ratelimit())						\
-		__dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__);	\
+		__dynamic_pr_debug(&descriptor, pr_fmt(fmt),		\
+		                   ##__VA_ARGS__);			\
 } while (0)
 #elif defined(DEBUG)
 #define net_dbg_ratelimited(fmt, ...)				\
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index aa7b2400f98c..9c6c8ef2e9e7 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -53,8 +53,9 @@ enum {
 					 *     headers in software.
 					 */
 	NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
+	NETIF_F_GSO_SCTP_BIT,		/* ... SCTP fragmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
-		NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
+		NETIF_F_GSO_SCTP_BIT,
 
 	NETIF_F_FCOE_CRC_BIT,		/* FCoE CRC32 */
 	NETIF_F_SCTP_CRC_BIT,		/* SCTP checksum offload */
@@ -128,6 +129,7 @@ enum {
 #define NETIF_F_TSO_MANGLEID	__NETIF_F(TSO_MANGLEID)
 #define NETIF_F_GSO_PARTIAL	 __NETIF_F(GSO_PARTIAL)
 #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
+#define NETIF_F_GSO_SCTP	__NETIF_F(GSO_SCTP)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
@@ -166,7 +168,8 @@ enum {
 				 NETIF_F_FSO)
 
 /* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | NETIF_F_UFO)
+#define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | NETIF_F_UFO | \
+				 NETIF_F_GSO_SCTP)
 
 /*
  * If one device supports one of these features, then enable them
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f45929ce8157..076df5360ba5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -61,6 +61,9 @@ struct wireless_dev;
 /* 802.15.4 specific */
 struct wpan_dev;
 struct mpls_dev;
+/* UDP Tunnel offloads */
+struct udp_tunnel_info;
+struct bpf_prog;
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
@@ -90,7 +93,6 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
 #define NET_XMIT_SUCCESS	0x00
 #define NET_XMIT_DROP		0x01	/* skb dropped			*/
 #define NET_XMIT_CN		0x02	/* congestion notification	*/
-#define NET_XMIT_POLICED	0x03	/* skb is shot by police	*/
 #define NET_XMIT_MASK		0x0f	/* qdisc flags in net/sch_generic.h */
 
 /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
@@ -785,6 +787,7 @@ enum {
 	TC_SETUP_MQPRIO,
 	TC_SETUP_CLSU32,
 	TC_SETUP_CLSFLOWER,
+	TC_SETUP_MATCHALL,
 };
 
 struct tc_cls_u32_offload;
@@ -795,9 +798,37 @@ struct tc_to_netdev {
 		u8 tc;
 		struct tc_cls_u32_offload *cls_u32;
 		struct tc_cls_flower_offload *cls_flower;
+		struct tc_cls_matchall_offload *cls_mall;
 	};
 };
 
+/* These structures hold the attributes of xdp state that are being passed
+ * to the netdevice through the xdp op.
+ */
+enum xdp_netdev_command {
+	/* Set or clear a bpf program used in the earliest stages of packet
+	 * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee
+	 * is responsible for calling bpf_prog_put on any old progs that are
+	 * stored. In case of error, the callee need not release the new prog
+	 * reference, but on success it takes ownership and must bpf_prog_put
+	 * when it is no longer used.
+	 */
+	XDP_SETUP_PROG,
+	/* Check if a bpf program is set on the device.  The callee should
+	 * return true if a program is currently attached and running.
+	 */
+	XDP_QUERY_PROG,
+};
+
+struct netdev_xdp {
+	enum xdp_netdev_command command;
+	union {
+		/* XDP_SETUP_PROG */
+		struct bpf_prog *prog;
+		/* XDP_QUERY_PROG */
+		bool prog_attached;
+	};
+};
 
 /*
  * This structure defines the management hooks for network devices.
@@ -1025,31 +1056,18 @@ struct tc_to_netdev {
  *	not implement this, it is assumed that the hw is not able to have
  *	multiple net devices on single physical port.
  *
- * void (*ndo_add_vxlan_port)(struct  net_device *dev,
- *			      sa_family_t sa_family, __be16 port);
- *	Called by vxlan to notify a driver about the UDP port and socket
- *	address family that vxlan is listening to. It is called only when
- *	a new port starts listening. The operation is protected by the
- *	vxlan_net->sock_lock.
- *
- * void (*ndo_add_geneve_port)(struct net_device *dev,
- *			       sa_family_t sa_family, __be16 port);
- *	Called by geneve to notify a driver about the UDP port and socket
- *	address family that geneve is listnening to. It is called only when
- *	a new port starts listening. The operation is protected by the
- *	geneve_net->sock_lock.
- *
- * void (*ndo_del_geneve_port)(struct net_device *dev,
- *			       sa_family_t sa_family, __be16 port);
- *	Called by geneve to notify the driver about a UDP port and socket
- *	address family that geneve is not listening to anymore. The operation
- *	is protected by the geneve_net->sock_lock.
- *
- * void (*ndo_del_vxlan_port)(struct  net_device *dev,
- *			      sa_family_t sa_family, __be16 port);
- *	Called by vxlan to notify the driver about a UDP port and socket
- *	address family that vxlan is not listening to anymore. The operation
- *	is protected by the vxlan_net->sock_lock.
+ * void (*ndo_udp_tunnel_add)(struct net_device *dev,
+ *			      struct udp_tunnel_info *ti);
+ *	Called by UDP tunnel to notify a driver about the UDP port and socket
+ *	address family that a UDP tunnel is listnening to. It is called only
+ *	when a new port starts listening. The operation is protected by the
+ *	RTNL.
+ *
+ * void (*ndo_udp_tunnel_del)(struct net_device *dev,
+ *			      struct udp_tunnel_info *ti);
+ *	Called by UDP tunnel to notify the driver about a UDP port and socket
+ *	address family that the UDP tunnel is not listening to anymore. The
+ *	operation is protected by the RTNL.
  *
  * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
  *				 struct net_device *dev)
@@ -1099,6 +1117,9 @@ struct tc_to_netdev {
  *	appropriate rx headroom value allows avoiding skb head copy on
  *	forward. Setting a negative value resets the rx headroom to the
  *	default value.
+ * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp);
+ *	This function is used to set or query state related to XDP on the
+ *	netdevice. See definition of enum xdp_netdev_command for details.
  *
  */
 struct net_device_ops {
@@ -1221,8 +1242,10 @@ struct net_device_ops {
 						    netdev_features_t features);
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
-	int			(*ndo_neigh_construct)(struct neighbour *n);
-	void			(*ndo_neigh_destroy)(struct neighbour *n);
+	int			(*ndo_neigh_construct)(struct net_device *dev,
+						       struct neighbour *n);
+	void			(*ndo_neigh_destroy)(struct net_device *dev,
+						     struct neighbour *n);
 
 	int			(*ndo_fdb_add)(struct ndmsg *ndm,
 					       struct nlattr *tb[],
@@ -1258,18 +1281,10 @@ struct net_device_ops {
 							struct netdev_phys_item_id *ppid);
 	int			(*ndo_get_phys_port_name)(struct net_device *dev,
 							  char *name, size_t len);
-	void			(*ndo_add_vxlan_port)(struct  net_device *dev,
-						      sa_family_t sa_family,
-						      __be16 port);
-	void			(*ndo_del_vxlan_port)(struct  net_device *dev,
-						      sa_family_t sa_family,
-						      __be16 port);
-	void			(*ndo_add_geneve_port)(struct  net_device *dev,
-						       sa_family_t sa_family,
-						       __be16 port);
-	void			(*ndo_del_geneve_port)(struct  net_device *dev,
-						       sa_family_t sa_family,
-						       __be16 port);
+	void			(*ndo_udp_tunnel_add)(struct net_device *dev,
+						      struct udp_tunnel_info *ti);
+	void			(*ndo_udp_tunnel_del)(struct net_device *dev,
+						      struct udp_tunnel_info *ti);
 	void*			(*ndo_dfwd_add_station)(struct net_device *pdev,
 							struct net_device *dev);
 	void			(*ndo_dfwd_del_station)(struct net_device *pdev,
@@ -1289,6 +1304,8 @@ struct net_device_ops {
 						       struct sk_buff *skb);
 	void			(*ndo_set_rx_headroom)(struct net_device *dev,
 						       int needed_headroom);
+	int			(*ndo_xdp)(struct net_device *dev,
+					   struct netdev_xdp *xdp);
 };
 
 /**
@@ -1457,6 +1474,8 @@ enum netdev_priv_flags {
  *	@netdev_ops:	Includes several pointers to callbacks,
  *			if one wants to override the ndo_*() functions
  *	@ethtool_ops:	Management operations
+ *	@ndisc_ops:	Includes callbacks for different IPv6 neighbour
+ *			discovery handling. Necessary for e.g. 6LoWPAN.
  *	@header_ops:	Includes callbacks for creating,parsing,caching,etc
  *			of Layer 2 headers.
  *
@@ -1484,8 +1503,7 @@ enum netdev_priv_flags {
  * 	@perm_addr:		Permanent hw address
  * 	@addr_assign_type:	Hw address assignment type
  * 	@addr_len:		Hardware address length
- * 	@neigh_priv_len;	Used in neigh_alloc(),
- * 				initialized only in atm/clip.c
+ *	@neigh_priv_len:	Used in neigh_alloc()
  * 	@dev_id:		Used to differentiate devices that share
  * 				the same link layer address
  * 	@dev_port:		Used to differentiate devices that share
@@ -1594,7 +1612,8 @@ enum netdev_priv_flags {
  *	@phydev:	Physical device may attach itself
  *			for hardware timestamping
  *
- *	@qdisc_tx_busylock:	XXX: need comments on this one
+ *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
+ *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
  *
  *	@proto_down:	protocol port state information can be sent to the
  *			switch driver and used to set the phys state of the
@@ -1673,6 +1692,9 @@ struct net_device {
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	const struct l3mdev_ops	*l3mdev_ops;
 #endif
+#if IS_ENABLED(CONFIG_IPV6)
+	const struct ndisc_ops *ndisc_ops;
+#endif
 
 	const struct header_ops *header_ops;
 
@@ -1862,6 +1884,7 @@ struct net_device {
 #endif
 	struct phy_device	*phydev;
 	struct lock_class_key	*qdisc_tx_busylock;
+	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
@@ -1944,6 +1967,23 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 		f(dev, &dev->_tx[i], arg);
 }
 
+#define netdev_lockdep_set_classes(dev)				\
+{								\
+	static struct lock_class_key qdisc_tx_busylock_key;	\
+	static struct lock_class_key qdisc_running_key;		\
+	static struct lock_class_key qdisc_xmit_lock_key;	\
+	static struct lock_class_key dev_addr_list_lock_key;	\
+	unsigned int i;						\
+								\
+	(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;	\
+	(dev)->qdisc_running_key = &qdisc_running_key;		\
+	lockdep_set_class(&(dev)->addr_list_lock,		\
+			  &dev_addr_list_lock_key); 		\
+	for (i = 0; i < (dev)->num_tx_queues; i++)		\
+		lockdep_set_class(&(dev)->_tx[i]._xmit_lock,	\
+				  &qdisc_xmit_lock_key);	\
+}
+
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
 				    void *accel_priv);
@@ -2233,8 +2273,8 @@ struct netdev_lag_lower_state_info {
 #define NETDEV_BONDING_INFO	0x0019
 #define NETDEV_PRECHANGEUPPER	0x001A
 #define NETDEV_CHANGELOWERSTATE	0x001B
-#define NETDEV_OFFLOAD_PUSH_VXLAN	0x001C
-#define NETDEV_OFFLOAD_PUSH_GENEVE	0x001D
+#define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
+#define NETDEV_CHANGE_TX_QUEUE_LEN	0x001E
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
@@ -2370,6 +2410,8 @@ void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
 DECLARE_PER_CPU(int, xmit_recursion);
+#define XMIT_RECURSION_LIMIT	10
+
 static inline int dev_recursion_level(void)
 {
 	return this_cpu_read(xmit_recursion);
@@ -3250,6 +3292,7 @@ int dev_get_phys_port_id(struct net_device *dev,
 int dev_get_phys_port_name(struct net_device *dev,
 			   char *name, size_t len);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
+int dev_change_xdp_fd(struct net_device *dev, int fd);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
@@ -3799,12 +3842,30 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 
 void *netdev_lower_get_next(struct net_device *dev,
 				struct list_head **iter);
+
 #define netdev_for_each_lower_dev(dev, ldev, iter) \
 	for (iter = (dev)->adj_list.lower.next, \
 	     ldev = netdev_lower_get_next(dev, &(iter)); \
 	     ldev; \
 	     ldev = netdev_lower_get_next(dev, &(iter)))
 
+struct net_device *netdev_all_lower_get_next(struct net_device *dev,
+					     struct list_head **iter);
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+						 struct list_head **iter);
+
+#define netdev_for_each_all_lower_dev(dev, ldev, iter) \
+	for (iter = (dev)->all_adj_list.lower.next, \
+	     ldev = netdev_all_lower_get_next(dev, &(iter)); \
+	     ldev; \
+	     ldev = netdev_all_lower_get_next(dev, &(iter)))
+
+#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \
+	for (iter = (dev)->all_adj_list.lower.next, \
+	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \
+	     ldev; \
+	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)))
+
 void *netdev_adjacent_get_private(struct list_head *adj_list);
 void *netdev_lower_get_first_private_rcu(struct net_device *dev);
 struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
@@ -3820,6 +3881,10 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 				   struct net_device *lower_dev);
 void netdev_lower_state_changed(struct net_device *lower_dev,
 				void *lower_state_info);
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+					   struct neighbour *n);
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+					  struct neighbour *n);
 
 /* RSS keys are 40 or 52 bytes long */
 #define NETDEV_RSS_KEY_LEN 52
@@ -4012,6 +4077,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_SCTP    != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
 
 	return (features & feature) == feature;
 }
@@ -4145,6 +4211,13 @@ static inline void netif_keep_dst(struct net_device *dev)
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
 }
 
+/* return true if dev can't cope with mtu frames that need vlan tag insertion */
+static inline bool netif_reduces_vlan_mtu(struct net_device *dev)
+{
+	/* TODO: reserve and use an additional IFF bit, if we get more users */
+	return dev->priv_flags & IFF_MACSEC;
+}
+
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index dc4f58a3cdcc..2ad1a2b289b5 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -6,6 +6,10 @@
 #include <linux/static_key.h>
 #include <uapi/linux/netfilter/x_tables.h>
 
+/* Test a struct->invflags and a boolean for inequality */
+#define NF_INVF(ptr, flag, boolean)					\
+	((boolean) ^ !!((ptr)->invflags & (flag)))
+
 /**
  * struct xt_action_param - parameters for matches/targets
  *
@@ -246,6 +250,10 @@ int xt_check_entry_offsets(const void *base, const char *elems,
 			   unsigned int target_offset,
 			   unsigned int next_offset);
 
+unsigned int *xt_alloc_entry_offsets(unsigned int size);
+bool xt_find_jump_offset(const unsigned int *offsets,
+			 unsigned int target, unsigned int size);
+
 int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
 		   bool inv_proto);
 int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 2ea517c7c6b9..984b2112c77b 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -115,8 +115,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct ebt_table *table);
 
-/* Used in the kernel match() functions */
-#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg))
 /* True if the hook mask denotes that the rule is in a base chain,
  * used in the check() functions */
 #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS))
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
new file mode 100644
index 000000000000..bf240a3cbf99
--- /dev/null
+++ b/include/linux/nvme-rdma.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_NVME_RDMA_H
+#define _LINUX_NVME_RDMA_H
+
+enum nvme_rdma_cm_fmt {
+	NVME_RDMA_CM_FMT_1_0 = 0x0,
+};
+
+enum nvme_rdma_cm_status {
+	NVME_RDMA_CM_INVALID_LEN	= 0x01,
+	NVME_RDMA_CM_INVALID_RECFMT	= 0x02,
+	NVME_RDMA_CM_INVALID_QID	= 0x03,
+	NVME_RDMA_CM_INVALID_HSQSIZE	= 0x04,
+	NVME_RDMA_CM_INVALID_HRQSIZE	= 0x05,
+	NVME_RDMA_CM_NO_RSC		= 0x06,
+	NVME_RDMA_CM_INVALID_IRD	= 0x07,
+	NVME_RDMA_CM_INVALID_ORD	= 0x08,
+};
+
+/**
+ * struct nvme_rdma_cm_req - rdma connect request
+ *
+ * @recfmt:        format of the RDMA Private Data
+ * @qid:           queue Identifier for the Admin or I/O Queue
+ * @hrqsize:       host receive queue size to be created
+ * @hsqsize:       host send queue size to be created
+ */
+struct nvme_rdma_cm_req {
+	__le16		recfmt;
+	__le16		qid;
+	__le16		hrqsize;
+	__le16		hsqsize;
+	u8		rsvd[24];
+};
+
+/**
+ * struct nvme_rdma_cm_rep - rdma connect reply
+ *
+ * @recfmt:        format of the RDMA Private Data
+ * @crqsize:       controller receive queue size
+ */
+struct nvme_rdma_cm_rep {
+	__le16		recfmt;
+	__le16		crqsize;
+	u8		rsvd[28];
+};
+
+/**
+ * struct nvme_rdma_cm_rej - rdma connect reject
+ *
+ * @recfmt:        format of the RDMA Private Data
+ * @fsts:          error status for the associated connect request
+ */
+struct nvme_rdma_cm_rej {
+	__le16		recfmt;
+	__le16		sts;
+};
+
+#endif /* _LINUX_NVME_RDMA_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 7d51b2904cb7..d8b37bab2887 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -16,6 +16,78 @@
 #define _LINUX_NVME_H
 
 #include <linux/types.h>
+#include <linux/uuid.h>
+
+/* NQN names in commands fields specified one size */
+#define NVMF_NQN_FIELD_LEN	256
+
+/* However the max length of a qualified name is another size */
+#define NVMF_NQN_SIZE		223
+
+#define NVMF_TRSVCID_SIZE	32
+#define NVMF_TRADDR_SIZE	256
+#define NVMF_TSAS_SIZE		256
+
+#define NVME_DISC_SUBSYS_NAME	"nqn.2014-08.org.nvmexpress.discovery"
+
+#define NVME_RDMA_IP_PORT	4420
+
+enum nvme_subsys_type {
+	NVME_NQN_DISC	= 1,		/* Discovery type target subsystem */
+	NVME_NQN_NVME	= 2,		/* NVME type target subsystem */
+};
+
+/* Address Family codes for Discovery Log Page entry ADRFAM field */
+enum {
+	NVMF_ADDR_FAMILY_PCI	= 0,	/* PCIe */
+	NVMF_ADDR_FAMILY_IP4	= 1,	/* IP4 */
+	NVMF_ADDR_FAMILY_IP6	= 2,	/* IP6 */
+	NVMF_ADDR_FAMILY_IB	= 3,	/* InfiniBand */
+	NVMF_ADDR_FAMILY_FC	= 4,	/* Fibre Channel */
+};
+
+/* Transport Type codes for Discovery Log Page entry TRTYPE field */
+enum {
+	NVMF_TRTYPE_RDMA	= 1,	/* RDMA */
+	NVMF_TRTYPE_FC		= 2,	/* Fibre Channel */
+	NVMF_TRTYPE_LOOP	= 254,	/* Reserved for host usage */
+	NVMF_TRTYPE_MAX,
+};
+
+/* Transport Requirements codes for Discovery Log Page entry TREQ field */
+enum {
+	NVMF_TREQ_NOT_SPECIFIED	= 0,	/* Not specified */
+	NVMF_TREQ_REQUIRED	= 1,	/* Required */
+	NVMF_TREQ_NOT_REQUIRED	= 2,	/* Not Required */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+	NVMF_RDMA_QPTYPE_CONNECTED	= 0, /* Reliable Connected */
+	NVMF_RDMA_QPTYPE_DATAGRAM	= 1, /* Reliable Datagram */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+	NVMF_RDMA_PRTYPE_NOT_SPECIFIED	= 0, /* No Provider Specified */
+	NVMF_RDMA_PRTYPE_IB		= 1, /* InfiniBand */
+	NVMF_RDMA_PRTYPE_ROCE		= 2, /* InfiniBand RoCE */
+	NVMF_RDMA_PRTYPE_ROCEV2		= 3, /* InfiniBand RoCEV2 */
+	NVMF_RDMA_PRTYPE_IWARP		= 4, /* IWARP */
+};
+
+/* RDMA Connection Management Service Type codes for Discovery Log Page
+ * entry TSAS RDMA_CMS field
+ */
+enum {
+	NVMF_RDMA_CMS_RDMA_CM	= 0, /* Sockets based enpoint addressing */
+};
+
+#define NVMF_AQ_DEPTH		32
 
 enum {
 	NVME_REG_CAP	= 0x0000,	/* Controller Capabilities */
@@ -50,6 +122,13 @@ enum {
 #define NVME_CMB_CQS(cmbsz)	((cmbsz) & 0x2)
 #define NVME_CMB_SQS(cmbsz)	((cmbsz) & 0x1)
 
+/*
+ * Submission and Completion Queue Entry Sizes for the NVM command set.
+ * (In bytes and specified as a power of two (2^n)).
+ */
+#define NVME_NVM_IOSQES		6
+#define NVME_NVM_IOCQES		4
+
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
 	NVME_CC_CSS_NVM		= 0 << 4,
@@ -61,8 +140,8 @@ enum {
 	NVME_CC_SHN_NORMAL	= 1 << 14,
 	NVME_CC_SHN_ABRUPT	= 2 << 14,
 	NVME_CC_SHN_MASK	= 3 << 14,
-	NVME_CC_IOSQES		= 6 << 16,
-	NVME_CC_IOCQES		= 4 << 20,
+	NVME_CC_IOSQES		= NVME_NVM_IOSQES << 16,
+	NVME_CC_IOCQES		= NVME_NVM_IOCQES << 20,
 	NVME_CSTS_RDY		= 1 << 0,
 	NVME_CSTS_CFS		= 1 << 1,
 	NVME_CSTS_NSSRO		= 1 << 4,
@@ -107,7 +186,11 @@ struct nvme_id_ctrl {
 	__u8			mdts;
 	__le16			cntlid;
 	__le32			ver;
-	__u8			rsvd84[172];
+	__le32			rtd3r;
+	__le32			rtd3e;
+	__le32			oaes;
+	__le32			ctratt;
+	__u8			rsvd100[156];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -119,10 +202,12 @@ struct nvme_id_ctrl {
 	__u8			apsta;
 	__le16			wctemp;
 	__le16			cctemp;
-	__u8			rsvd270[242];
+	__u8			rsvd270[50];
+	__le16			kas;
+	__u8			rsvd322[190];
 	__u8			sqes;
 	__u8			cqes;
-	__u8			rsvd514[2];
+	__le16			maxcmd;
 	__le32			nn;
 	__le16			oncs;
 	__le16			fuses;
@@ -135,7 +220,15 @@ struct nvme_id_ctrl {
 	__le16			acwu;
 	__u8			rsvd534[2];
 	__le32			sgls;
-	__u8			rsvd540[1508];
+	__u8			rsvd540[228];
+	char			subnqn[256];
+	__u8			rsvd1024[768];
+	__le32			ioccsz;
+	__le32			iorcsz;
+	__le16			icdoff;
+	__u8			ctrattr;
+	__u8			msdbd;
+	__u8			rsvd1804[244];
 	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
@@ -274,6 +367,12 @@ struct nvme_reservation_status {
 	} regctl_ds[];
 };
 
+enum nvme_async_event_type {
+	NVME_AER_TYPE_ERROR	= 0,
+	NVME_AER_TYPE_SMART	= 1,
+	NVME_AER_TYPE_NOTICE	= 2,
+};
+
 /* I/O commands */
 
 enum nvme_opcode {
@@ -290,6 +389,84 @@ enum nvme_opcode {
 	nvme_cmd_resv_release	= 0x15,
 };
 
+/*
+ * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * @NVME_SGL_FMT_ADDRESS:     absolute address of the data block
+ * @NVME_SGL_FMT_OFFSET:      relative offset of the in-capsule data block
+ * @NVME_SGL_FMT_INVALIDATE:  RDMA transport specific remote invalidation
+ *                            request subtype
+ */
+enum {
+	NVME_SGL_FMT_ADDRESS		= 0x00,
+	NVME_SGL_FMT_OFFSET		= 0x01,
+	NVME_SGL_FMT_INVALIDATE		= 0x0f,
+};
+
+/*
+ * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * For struct nvme_sgl_desc:
+ *   @NVME_SGL_FMT_DATA_DESC:		data block descriptor
+ *   @NVME_SGL_FMT_SEG_DESC:		sgl segment descriptor
+ *   @NVME_SGL_FMT_LAST_SEG_DESC:	last sgl segment descriptor
+ *
+ * For struct nvme_keyed_sgl_desc:
+ *   @NVME_KEY_SGL_FMT_DATA_DESC:	keyed data block descriptor
+ */
+enum {
+	NVME_SGL_FMT_DATA_DESC		= 0x00,
+	NVME_SGL_FMT_SEG_DESC		= 0x02,
+	NVME_SGL_FMT_LAST_SEG_DESC	= 0x03,
+	NVME_KEY_SGL_FMT_DATA_DESC	= 0x04,
+};
+
+struct nvme_sgl_desc {
+	__le64	addr;
+	__le32	length;
+	__u8	rsvd[3];
+	__u8	type;
+};
+
+struct nvme_keyed_sgl_desc {
+	__le64	addr;
+	__u8	length[3];
+	__u8	key[4];
+	__u8	type;
+};
+
+union nvme_data_ptr {
+	struct {
+		__le64	prp1;
+		__le64	prp2;
+	};
+	struct nvme_sgl_desc	sgl;
+	struct nvme_keyed_sgl_desc ksgl;
+};
+
+/*
+ * Lowest two bits of our flags field (FUSE field in the spec):
+ *
+ * @NVME_CMD_FUSE_FIRST:   Fused Operation, first command
+ * @NVME_CMD_FUSE_SECOND:  Fused Operation, second command
+ *
+ * Highest two bits in our flags field (PSDT field in the spec):
+ *
+ * @NVME_CMD_PSDT_SGL_METABUF:	Use SGLS for this transfer,
+ *	If used, MPTR contains addr of single physical buffer (byte aligned).
+ * @NVME_CMD_PSDT_SGL_METASEG:	Use SGLS for this transfer,
+ *	If used, MPTR contains an address of an SGL segment containing
+ *	exactly 1 SGL descriptor (qword aligned).
+ */
+enum {
+	NVME_CMD_FUSE_FIRST	= (1 << 0),
+	NVME_CMD_FUSE_SECOND	= (1 << 1),
+
+	NVME_CMD_SGL_METABUF	= (1 << 6),
+	NVME_CMD_SGL_METASEG	= (1 << 7),
+	NVME_CMD_SGL_ALL	= NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG,
+};
+
 struct nvme_common_command {
 	__u8			opcode;
 	__u8			flags;
@@ -297,8 +474,7 @@ struct nvme_common_command {
 	__le32			nsid;
 	__le32			cdw2[2];
 	__le64			metadata;
-	__le64			prp1;
-	__le64			prp2;
+	union nvme_data_ptr	dptr;
 	__le32			cdw10[6];
 };
 
@@ -309,8 +485,7 @@ struct nvme_rw_command {
 	__le32			nsid;
 	__u64			rsvd2;
 	__le64			metadata;
-	__le64			prp1;
-	__le64			prp2;
+	union nvme_data_ptr	dptr;
 	__le64			slba;
 	__le16			length;
 	__le16			control;
@@ -350,8 +525,7 @@ struct nvme_dsm_cmd {
 	__u16			command_id;
 	__le32			nsid;
 	__u64			rsvd2[2];
-	__le64			prp1;
-	__le64			prp2;
+	union nvme_data_ptr	dptr;
 	__le32			nr;
 	__le32			attributes;
 	__u32			rsvd12[4];
@@ -384,6 +558,7 @@ enum nvme_admin_opcode {
 	nvme_admin_async_event		= 0x0c,
 	nvme_admin_activate_fw		= 0x10,
 	nvme_admin_download_fw		= 0x11,
+	nvme_admin_keep_alive		= 0x18,
 	nvme_admin_format_nvm		= 0x80,
 	nvme_admin_security_send	= 0x81,
 	nvme_admin_security_recv	= 0x82,
@@ -408,6 +583,7 @@ enum {
 	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
 	NVME_FEAT_ASYNC_EVENT	= 0x0b,
 	NVME_FEAT_AUTO_PST	= 0x0c,
+	NVME_FEAT_KATO		= 0x0f,
 	NVME_FEAT_SW_PROGRESS	= 0x80,
 	NVME_FEAT_HOST_ID	= 0x81,
 	NVME_FEAT_RESV_MASK	= 0x82,
@@ -415,6 +591,7 @@ enum {
 	NVME_LOG_ERROR		= 0x01,
 	NVME_LOG_SMART		= 0x02,
 	NVME_LOG_FW_SLOT	= 0x03,
+	NVME_LOG_DISC		= 0x70,
 	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
 	NVME_FWACT_REPL_ACTV	= (1 << 3),
@@ -427,8 +604,7 @@ struct nvme_identify {
 	__u16			command_id;
 	__le32			nsid;
 	__u64			rsvd2[2];
-	__le64			prp1;
-	__le64			prp2;
+	union nvme_data_ptr	dptr;
 	__le32			cns;
 	__u32			rsvd11[5];
 };
@@ -439,8 +615,7 @@ struct nvme_features {
 	__u16			command_id;
 	__le32			nsid;
 	__u64			rsvd2[2];
-	__le64			prp1;
-	__le64			prp2;
+	union nvme_data_ptr	dptr;
 	__le32			fid;
 	__le32			dword11;
 	__u32			rsvd12[4];
@@ -499,8 +674,7 @@ struct nvme_download_firmware {
 	__u8			flags;
 	__u16			command_id;
 	__u32			rsvd1[5];
-	__le64			prp1;
-	__le64			prp2;
+	union nvme_data_ptr	dptr;
 	__le32			numd;
 	__le32			offset;
 	__u32			rsvd12[4];
@@ -516,6 +690,143 @@ struct nvme_format_cmd {
 	__u32			rsvd11[5];
 };
 
+struct nvme_get_log_page_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	union nvme_data_ptr	dptr;
+	__u8			lid;
+	__u8			rsvd10;
+	__le16			numdl;
+	__le16			numdu;
+	__u16			rsvd11;
+	__le32			lpol;
+	__le32			lpou;
+	__u32			rsvd14[2];
+};
+
+/*
+ * Fabrics subcommands.
+ */
+enum nvmf_fabrics_opcode {
+	nvme_fabrics_command		= 0x7f,
+};
+
+enum nvmf_capsule_command {
+	nvme_fabrics_type_property_set	= 0x00,
+	nvme_fabrics_type_connect	= 0x01,
+	nvme_fabrics_type_property_get	= 0x04,
+};
+
+struct nvmf_common_command {
+	__u8	opcode;
+	__u8	resv1;
+	__u16	command_id;
+	__u8	fctype;
+	__u8	resv2[35];
+	__u8	ts[24];
+};
+
+/*
+ * The legal cntlid range a NVMe Target will provide.
+ * Note that cntlid of value 0 is considered illegal in the fabrics world.
+ * Devices based on earlier specs did not have the subsystem concept;
+ * therefore, those devices had their cntlid value set to 0 as a result.
+ */
+#define NVME_CNTLID_MIN		1
+#define NVME_CNTLID_MAX		0xffef
+#define NVME_CNTLID_DYNAMIC	0xffff
+
+#define MAX_DISC_LOGS	255
+
+/* Discovery log page entry */
+struct nvmf_disc_rsp_page_entry {
+	__u8		trtype;
+	__u8		adrfam;
+	__u8		nqntype;
+	__u8		treq;
+	__le16		portid;
+	__le16		cntlid;
+	__le16		asqsz;
+	__u8		resv8[22];
+	char		trsvcid[NVMF_TRSVCID_SIZE];
+	__u8		resv64[192];
+	char		subnqn[NVMF_NQN_FIELD_LEN];
+	char		traddr[NVMF_TRADDR_SIZE];
+	union tsas {
+		char		common[NVMF_TSAS_SIZE];
+		struct rdma {
+			__u8	qptype;
+			__u8	prtype;
+			__u8	cms;
+			__u8	resv3[5];
+			__u16	pkey;
+			__u8	resv10[246];
+		} rdma;
+	} tsas;
+};
+
+/* Discovery log page header */
+struct nvmf_disc_rsp_page_hdr {
+	__le64		genctr;
+	__le64		numrec;
+	__le16		recfmt;
+	__u8		resv14[1006];
+	struct nvmf_disc_rsp_page_entry entries[0];
+};
+
+struct nvmf_connect_command {
+	__u8		opcode;
+	__u8		resv1;
+	__u16		command_id;
+	__u8		fctype;
+	__u8		resv2[19];
+	union nvme_data_ptr dptr;
+	__le16		recfmt;
+	__le16		qid;
+	__le16		sqsize;
+	__u8		cattr;
+	__u8		resv3;
+	__le32		kato;
+	__u8		resv4[12];
+};
+
+struct nvmf_connect_data {
+	uuid_le		hostid;
+	__le16		cntlid;
+	char		resv4[238];
+	char		subsysnqn[NVMF_NQN_FIELD_LEN];
+	char		hostnqn[NVMF_NQN_FIELD_LEN];
+	char		resv5[256];
+};
+
+struct nvmf_property_set_command {
+	__u8		opcode;
+	__u8		resv1;
+	__u16		command_id;
+	__u8		fctype;
+	__u8		resv2[35];
+	__u8		attrib;
+	__u8		resv3[3];
+	__le32		offset;
+	__le64		value;
+	__u8		resv4[8];
+};
+
+struct nvmf_property_get_command {
+	__u8		opcode;
+	__u8		resv1;
+	__u16		command_id;
+	__u8		fctype;
+	__u8		resv2[35];
+	__u8		attrib;
+	__u8		resv3[3];
+	__le32		offset;
+	__u8		resv4[16];
+};
+
 struct nvme_command {
 	union {
 		struct nvme_common_command common;
@@ -529,10 +840,30 @@ struct nvme_command {
 		struct nvme_format_cmd format;
 		struct nvme_dsm_cmd dsm;
 		struct nvme_abort_cmd abort;
+		struct nvme_get_log_page_command get_log_page;
+		struct nvmf_common_command fabrics;
+		struct nvmf_connect_command connect;
+		struct nvmf_property_set_command prop_set;
+		struct nvmf_property_get_command prop_get;
 	};
 };
 
+static inline bool nvme_is_write(struct nvme_command *cmd)
+{
+	/*
+	 * What a mess...
+	 *
+	 * Why can't we simply have a Fabrics In and Fabrics out command?
+	 */
+	if (unlikely(cmd->common.opcode == nvme_fabrics_command))
+		return cmd->fabrics.opcode & 1;
+	return cmd->common.opcode & 1;
+}
+
 enum {
+	/*
+	 * Generic Command Status:
+	 */
 	NVME_SC_SUCCESS			= 0x0,
 	NVME_SC_INVALID_OPCODE		= 0x1,
 	NVME_SC_INVALID_FIELD		= 0x2,
@@ -551,10 +882,18 @@ enum {
 	NVME_SC_SGL_INVALID_DATA	= 0xf,
 	NVME_SC_SGL_INVALID_METADATA	= 0x10,
 	NVME_SC_SGL_INVALID_TYPE	= 0x11,
+
+	NVME_SC_SGL_INVALID_OFFSET	= 0x16,
+	NVME_SC_SGL_INVALID_SUBTYPE	= 0x17,
+
 	NVME_SC_LBA_RANGE		= 0x80,
 	NVME_SC_CAP_EXCEEDED		= 0x81,
 	NVME_SC_NS_NOT_READY		= 0x82,
 	NVME_SC_RESERVATION_CONFLICT	= 0x83,
+
+	/*
+	 * Command Specific Status:
+	 */
 	NVME_SC_CQ_INVALID		= 0x100,
 	NVME_SC_QID_INVALID		= 0x101,
 	NVME_SC_QUEUE_SIZE		= 0x102,
@@ -572,9 +911,29 @@ enum {
 	NVME_SC_FEATURE_NOT_CHANGEABLE	= 0x10e,
 	NVME_SC_FEATURE_NOT_PER_NS	= 0x10f,
 	NVME_SC_FW_NEEDS_RESET_SUBSYS	= 0x110,
+
+	/*
+	 * I/O Command Set Specific - NVM commands:
+	 */
 	NVME_SC_BAD_ATTRIBUTES		= 0x180,
 	NVME_SC_INVALID_PI		= 0x181,
 	NVME_SC_READ_ONLY		= 0x182,
+
+	/*
+	 * I/O Command Set Specific - Fabrics commands:
+	 */
+	NVME_SC_CONNECT_FORMAT		= 0x180,
+	NVME_SC_CONNECT_CTRL_BUSY	= 0x181,
+	NVME_SC_CONNECT_INVALID_PARAM	= 0x182,
+	NVME_SC_CONNECT_RESTART_DISC	= 0x183,
+	NVME_SC_CONNECT_INVALID_HOST	= 0x184,
+
+	NVME_SC_DISCOVERY_RESTART	= 0x190,
+	NVME_SC_AUTH_REQUIRED		= 0x191,
+
+	/*
+	 * Media and Data Integrity Errors:
+	 */
 	NVME_SC_WRITE_FAULT		= 0x280,
 	NVME_SC_READ_ERROR		= 0x281,
 	NVME_SC_GUARD_CHECK		= 0x282,
@@ -582,12 +941,19 @@ enum {
 	NVME_SC_REFTAG_CHECK		= 0x284,
 	NVME_SC_COMPARE_FAILED		= 0x285,
 	NVME_SC_ACCESS_DENIED		= 0x286,
+
 	NVME_SC_DNR			= 0x4000,
 };
 
 struct nvme_completion {
-	__le32	result;		/* Used by admin commands to return data */
-	__u32	rsvd;
+	/*
+	 * Used by Admin and Fabrics commands to return data:
+	 */
+	union {
+		__le16	result16;
+		__le32	result;
+		__le64	result64;
+	};
 	__le16	sq_head;	/* how much of this queue may be reclaimed */
 	__le16	sq_id;		/* submission queue that generated this entry */
 	__u16	command_id;	/* of the command which completed */
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 9bb77d3ed6e0..c2256d746543 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -74,7 +74,7 @@ static inline void nvmem_cell_put(struct nvmem_cell *cell)
 {
 }
 
-static inline char *nvmem_cell_read(struct nvmem_cell *cell, size_t *len)
+static inline void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len)
 {
 	return ERR_PTR(-ENOSYS);
 }
diff --git a/include/linux/of.h b/include/linux/of.h
index 74eb28cadbef..15c43f076b23 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -1009,10 +1009,13 @@ static inline int of_get_available_child_count(const struct device_node *np)
 #endif
 
 typedef int (*of_init_fn_2)(struct device_node *, struct device_node *);
+typedef int (*of_init_fn_1_ret)(struct device_node *);
 typedef void (*of_init_fn_1)(struct device_node *);
 
 #define OF_DECLARE_1(table, name, compat, fn) \
 		_OF_DECLARE(table, name, compat, fn, of_init_fn_1)
+#define OF_DECLARE_1_RET(table, name, compat, fn) \
+		_OF_DECLARE(table, name, compat, fn, of_init_fn_1_ret)
 #define OF_DECLARE_2(table, name, compat, fn) \
 		_OF_DECLARE(table, name, compat, fn, of_init_fn_2)
 
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 901ec01c9fba..26c3302ae58f 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -53,6 +53,8 @@ extern char __dtb_end[];
 extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 				     int depth, void *data),
 			   void *data);
+extern int of_get_flat_dt_subnode_by_name(unsigned long node,
+					  const char *uname);
 extern const void *of_get_flat_dt_prop(unsigned long node, const char *name,
 				       int *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 8f2237eb3485..2ab233661ae5 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -19,12 +19,17 @@ extern struct phy_device *of_phy_connect(struct net_device *dev,
 					 struct device_node *phy_np,
 					 void (*hndlr)(struct net_device *),
 					 u32 flags, phy_interface_t iface);
+extern struct phy_device *
+of_phy_get_and_connect(struct net_device *dev, struct device_node *np,
+		       void (*hndlr)(struct net_device *));
 struct phy_device *of_phy_attach(struct net_device *dev,
 				 struct device_node *phy_np, u32 flags,
 				 phy_interface_t iface);
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np);
+extern int of_phy_register_fixed_link(struct device_node *np);
+extern bool of_phy_is_fixed_link(struct device_node *np);
 
 #else /* CONFIG_OF */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
@@ -50,6 +55,13 @@ static inline struct phy_device *of_phy_connect(struct net_device *dev,
 	return NULL;
 }
 
+static inline struct phy_device *
+of_phy_get_and_connect(struct net_device *dev, struct device_node *np,
+		       void (*hndlr)(struct net_device *))
+{
+	return NULL;
+}
+
 static inline struct phy_device *of_phy_attach(struct net_device *dev,
 					       struct device_node *phy_np,
 					       u32 flags, phy_interface_t iface)
@@ -67,12 +79,6 @@ static inline int of_mdio_parse_addr(struct device *dev,
 {
 	return -ENOSYS;
 }
-#endif /* CONFIG_OF */
-
-#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY)
-extern int of_phy_register_fixed_link(struct device_node *np);
-extern bool of_phy_is_fixed_link(struct device_node *np);
-#else
 static inline int of_phy_register_fixed_link(struct device_node *np)
 {
 	return -ENOSYS;
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index c201060e0c6d..f8e1992d6423 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -1,7 +1,8 @@
 #ifndef __OF_RESERVED_MEM_H
 #define __OF_RESERVED_MEM_H
 
-struct device;
+#include <linux/device.h>
+
 struct of_phandle_args;
 struct reserved_mem_ops;
 
@@ -28,7 +29,9 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
 	_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
 
 #ifdef CONFIG_OF_RESERVED_MEM
-int of_reserved_mem_device_init(struct device *dev);
+
+int of_reserved_mem_device_init_by_idx(struct device *dev,
+				       struct device_node *np, int idx);
 void of_reserved_mem_device_release(struct device *dev);
 
 int early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
@@ -42,7 +45,8 @@ void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
 			       phys_addr_t base, phys_addr_t size);
 #else
-static inline int of_reserved_mem_device_init(struct device *dev)
+static inline int of_reserved_mem_device_init_by_idx(struct device *dev,
+					struct device_node *np, int idx)
 {
 	return -ENOSYS;
 }
@@ -53,4 +57,19 @@ static inline void fdt_reserved_mem_save_node(unsigned long node,
 		const char *uname, phys_addr_t base, phys_addr_t size) { }
 #endif
 
+/**
+ * of_reserved_mem_device_init() - assign reserved memory region to given device
+ * @dev:	Pointer to the device to configure
+ *
+ * This function assigns respective DMA-mapping operations based on the first
+ * reserved memory region specified by 'memory-region' property in device tree
+ * node of the given device.
+ *
+ * Returns error code or zero on success.
+ */
+static inline int of_reserved_mem_device_init(struct device *dev)
+{
+	return of_reserved_mem_device_init_by_idx(dev, dev->of_node, 0);
+}
+
 #endif /* __OF_RESERVED_MEM_H */
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 83469522690a..606137b3b778 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -23,6 +23,9 @@ struct oom_control {
 	/* Used to determine mempolicy */
 	nodemask_t *nodemask;
 
+	/* Memory cgroup in which oom is invoked, or NULL for global oom */
+	struct mem_cgroup *memcg;
+
 	/* Used to determine cpuset and node locality requirement */
 	const gfp_t gfp_mask;
 
@@ -83,14 +86,13 @@ extern unsigned long oom_badness(struct task_struct *p,
 
 extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			     unsigned int points, unsigned long totalpages,
-			     struct mem_cgroup *memcg, const char *message);
+			     const char *message);
 
 extern void check_panic_on_oom(struct oom_control *oc,
-			       enum oom_constraint constraint,
-			       struct mem_cgroup *memcg);
+			       enum oom_constraint constraint);
 
 extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
-		struct task_struct *task, unsigned long totalpages);
+					       struct task_struct *task);
 
 extern bool out_of_memory(struct oom_control *oc);
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e5a32445f930..74e4dda91238 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -129,6 +129,9 @@ enum pageflags {
 
 	/* Compound pages. Stored in first tail page's flags */
 	PG_double_map = PG_private_2,
+
+	/* non-lru isolated movable page */
+	PG_isolated = PG_reclaim,
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -292,11 +295,11 @@ PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
  */
 TESTPAGEFLAG(Writeback, writeback, PF_NO_COMPOUND)
 	TESTSCFLAG(Writeback, writeback, PF_NO_COMPOUND)
-PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_COMPOUND)
+PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
 
 /* PG_readahead is only used for reads; PG_reclaim is only for writes */
-PAGEFLAG(Reclaim, reclaim, PF_NO_COMPOUND)
-	TESTCLEARFLAG(Reclaim, reclaim, PF_NO_COMPOUND)
+PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
+	TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
 PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
 	TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND)
 
@@ -357,29 +360,37 @@ PAGEFLAG(Idle, idle, PF_ANY)
  * with the PAGE_MAPPING_ANON bit set to distinguish it.  See rmap.h.
  *
  * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
- * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit;
- * and then page->mapping points, not to an anon_vma, but to a private
+ * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON
+ * bit; and then page->mapping points, not to an anon_vma, but to a private
  * structure which KSM associates with that merged page.  See ksm.h.
  *
- * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used.
+ * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable
+ * page and then page->mapping points a struct address_space.
  *
  * Please note that, confusingly, "page_mapping" refers to the inode
  * address_space which maps the page from disk; whereas "page_mapped"
  * refers to user virtual address space into which the page is mapped.
  */
-#define PAGE_MAPPING_ANON	1
-#define PAGE_MAPPING_KSM	2
-#define PAGE_MAPPING_FLAGS	(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)
+#define PAGE_MAPPING_ANON	0x1
+#define PAGE_MAPPING_MOVABLE	0x2
+#define PAGE_MAPPING_KSM	(PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
+#define PAGE_MAPPING_FLAGS	(PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
 
-static __always_inline int PageAnonHead(struct page *page)
+static __always_inline int PageMappingFlags(struct page *page)
 {
-	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
+	return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
 }
 
 static __always_inline int PageAnon(struct page *page)
 {
 	page = compound_head(page);
-	return PageAnonHead(page);
+	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
+}
+
+static __always_inline int __PageMovable(struct page *page)
+{
+	return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
+				PAGE_MAPPING_MOVABLE;
 }
 
 #ifdef CONFIG_KSM
@@ -393,7 +404,7 @@ static __always_inline int PageKsm(struct page *page)
 {
 	page = compound_head(page);
 	return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
-				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
+				PAGE_MAPPING_KSM;
 }
 #else
 TESTPAGEFLAG_FALSE(Ksm)
@@ -570,6 +581,17 @@ static inline int PageDoubleMap(struct page *page)
 	return PageHead(page) && test_bit(PG_double_map, &page[1].flags);
 }
 
+static inline void SetPageDoubleMap(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageHead(page), page);
+	set_bit(PG_double_map, &page[1].flags);
+}
+
+static inline void ClearPageDoubleMap(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageHead(page), page);
+	clear_bit(PG_double_map, &page[1].flags);
+}
 static inline int TestSetPageDoubleMap(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageHead(page), page);
@@ -587,59 +609,59 @@ TESTPAGEFLAG_FALSE(TransHuge)
 TESTPAGEFLAG_FALSE(TransCompound)
 TESTPAGEFLAG_FALSE(TransCompoundMap)
 TESTPAGEFLAG_FALSE(TransTail)
-TESTPAGEFLAG_FALSE(DoubleMap)
+PAGEFLAG_FALSE(DoubleMap)
 	TESTSETFLAG_FALSE(DoubleMap)
 	TESTCLEARFLAG_FALSE(DoubleMap)
 #endif
 
+/*
+ * For pages that are never mapped to userspace, page->mapcount may be
+ * used for storing extra information about page type. Any value used
+ * for this purpose must be <= -2, but it's better start not too close
+ * to -2 so that an underflow of the page_mapcount() won't be mistaken
+ * for a special page.
+ */
+#define PAGE_MAPCOUNT_OPS(uname, lname)					\
+static __always_inline int Page##uname(struct page *page)		\
+{									\
+	return atomic_read(&page->_mapcount) ==				\
+				PAGE_##lname##_MAPCOUNT_VALUE;		\
+}									\
+static __always_inline void __SetPage##uname(struct page *page)		\
+{									\
+	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);	\
+	atomic_set(&page->_mapcount, PAGE_##lname##_MAPCOUNT_VALUE);	\
+}									\
+static __always_inline void __ClearPage##uname(struct page *page)	\
+{									\
+	VM_BUG_ON_PAGE(!Page##uname(page), page);			\
+	atomic_set(&page->_mapcount, -1);				\
+}
+
 /*
  * PageBuddy() indicate that the page is free and in the buddy system
  * (see mm/page_alloc.c).
- *
- * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to
- * -2 so that an underflow of the page_mapcount() won't be mistaken
- * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very
- * efficiently by most CPU architectures.
  */
-#define PAGE_BUDDY_MAPCOUNT_VALUE (-128)
-
-static inline int PageBuddy(struct page *page)
-{
-	return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;
-}
+#define PAGE_BUDDY_MAPCOUNT_VALUE		(-128)
+PAGE_MAPCOUNT_OPS(Buddy, BUDDY)
 
-static inline void __SetPageBuddy(struct page *page)
-{
-	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
-	atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);
-}
+/*
+ * PageBalloon() is set on pages that are on the balloon page list
+ * (see mm/balloon_compaction.c).
+ */
+#define PAGE_BALLOON_MAPCOUNT_VALUE		(-256)
+PAGE_MAPCOUNT_OPS(Balloon, BALLOON)
 
-static inline void __ClearPageBuddy(struct page *page)
-{
-	VM_BUG_ON_PAGE(!PageBuddy(page), page);
-	atomic_set(&page->_mapcount, -1);
-}
+/*
+ * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
+ * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
+ */
+#define PAGE_KMEMCG_MAPCOUNT_VALUE		(-512)
+PAGE_MAPCOUNT_OPS(Kmemcg, KMEMCG)
 
 extern bool is_free_buddy_page(struct page *page);
 
-#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
-
-static inline int PageBalloon(struct page *page)
-{
-	return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE;
-}
-
-static inline void __SetPageBalloon(struct page *page)
-{
-	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
-	atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE);
-}
-
-static inline void __ClearPageBalloon(struct page *page)
-{
-	VM_BUG_ON_PAGE(!PageBalloon(page), page);
-	atomic_set(&page->_mapcount, -1);
-}
+__PAGEFLAG(Isolated, isolated, PF_ANY);
 
 /*
  * If network-based swap is enabled, sl*b must keep track of whether pages
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index e1fe7cf5bddf..03f2a3e7d76d 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
 
 struct pglist_data;
 struct page_ext_operations {
@@ -44,9 +45,8 @@ struct page_ext {
 #ifdef CONFIG_PAGE_OWNER
 	unsigned int order;
 	gfp_t gfp_mask;
-	unsigned int nr_entries;
 	int last_migrate_reason;
-	unsigned long trace_entries[8];
+	depot_stack_handle_t handle;
 #endif
 };
 
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 46f1b939948c..30583ab0ffb1 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -10,7 +10,7 @@ extern struct page_ext_operations page_owner_ops;
 extern void __reset_page_owner(struct page *page, unsigned int order);
 extern void __set_page_owner(struct page *page,
 			unsigned int order, gfp_t gfp_mask);
-extern gfp_t __get_page_owner_gfp(struct page *page);
+extern void __split_page_owner(struct page *page, unsigned int order);
 extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
 extern void __dump_page_owner(struct page *page);
@@ -28,12 +28,10 @@ static inline void set_page_owner(struct page *page,
 		__set_page_owner(page, order, gfp_mask);
 }
 
-static inline gfp_t get_page_owner_gfp(struct page *page)
+static inline void split_page_owner(struct page *page, unsigned int order)
 {
 	if (static_branch_unlikely(&page_owner_inited))
-		return __get_page_owner_gfp(page);
-	else
-		return 0;
+		__split_page_owner(page, order);
 }
 static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 {
@@ -58,9 +56,9 @@ static inline void set_page_owner(struct page *page,
 			unsigned int order, gfp_t gfp_mask)
 {
 }
-static inline gfp_t get_page_owner_gfp(struct page *page)
+static inline void split_page_owner(struct page *page,
+			unsigned int order)
 {
-	return 0;
 }
 static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 97354102794d..81363b834900 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -209,10 +209,10 @@ static inline struct page *page_cache_alloc_cold(struct address_space *x)
 	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
 
-static inline struct page *page_cache_alloc_readahead(struct address_space *x)
+static inline gfp_t readahead_gfp_mask(struct address_space *x)
 {
-	return __page_cache_alloc(mapping_gfp_mask(x) |
-				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
+	return mapping_gfp_mask(x) |
+				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN;
 }
 
 typedef int filler_t(void *, struct page *);
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 84f542df7ff5..1c7eec09e5eb 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -136,14 +136,12 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 	 * used as a pointer.  If the compiler generates a separate fetch
 	 * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in
 	 * between contaminating the pointer value, meaning that
-	 * ACCESS_ONCE() is required when fetching it.
-	 *
-	 * Also, we need a data dependency barrier to be paired with
-	 * smp_store_release() in __percpu_ref_switch_to_percpu().
-	 *
-	 * Use lockless deref which contains both.
+	 * READ_ONCE() is required when fetching it.
 	 */
-	percpu_ptr = lockless_dereference(ref->percpu_count_ptr);
+	percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
+
+	/* paired with smp_store_release() in __percpu_ref_switch_to_percpu() */
+	smp_read_barrier_depends();
 
 	/*
 	 * Theoretically, the following could test just ATOMIC; however,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1a827cecd62f..e1f921c2e4e0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -69,9 +69,22 @@ struct perf_callchain_entry_ctx {
 	bool			    contexts_maxed;
 };
 
+typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
+				     unsigned long off, unsigned long len);
+
+struct perf_raw_frag {
+	union {
+		struct perf_raw_frag	*next;
+		unsigned long		pad;
+	};
+	perf_copy_f			copy;
+	void				*data;
+	u32				size;
+} __packed;
+
 struct perf_raw_record {
+	struct perf_raw_frag		frag;
 	u32				size;
-	void				*data;
 };
 
 /*
@@ -517,6 +530,11 @@ struct swevent_hlist {
 struct perf_cgroup;
 struct ring_buffer;
 
+struct pmu_event_list {
+	raw_spinlock_t		lock;
+	struct list_head	list;
+};
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -675,6 +693,7 @@ struct perf_event {
 	int				cgrp_defer_enabled;
 #endif
 
+	struct list_head		sb_list;
 #endif /* CONFIG_PERF_EVENTS */
 };
 
@@ -1074,7 +1093,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
 extern struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 		   u32 max_stack, bool crosstask, bool add_mark);
-extern int get_callchain_buffers(void);
+extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
 
 extern int sysctl_perf_event_max_stack;
@@ -1283,6 +1302,11 @@ extern void perf_restore_debug_store(void);
 static inline void perf_restore_debug_store(void)			{ }
 #endif
 
+static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
+{
+	return frag->pad < sizeof(u64);
+}
+
 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
 
 /*
@@ -1326,6 +1350,13 @@ struct perf_pmu_events_attr {
 	const char *event_str;
 };
 
+struct perf_pmu_events_ht_attr {
+	struct device_attribute			attr;
+	u64					id;
+	const char				*event_str_ht;
+	const char				*event_str_noht;
+};
+
 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
 			      char *page);
 
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index a810f2a18842..f08b67238b58 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -22,12 +22,20 @@
 
 struct phy;
 
+enum phy_mode {
+	PHY_MODE_INVALID,
+	PHY_MODE_USB_HOST,
+	PHY_MODE_USB_DEVICE,
+	PHY_MODE_USB_OTG,
+};
+
 /**
  * struct phy_ops - set of function pointers for performing phy operations
  * @init: operation to be performed for initializing phy
  * @exit: operation to be performed while exiting
  * @power_on: powering on the phy
  * @power_off: powering off the phy
+ * @set_mode: set the mode of the phy
  * @owner: the module owner containing the ops
  */
 struct phy_ops {
@@ -35,6 +43,7 @@ struct phy_ops {
 	int	(*exit)(struct phy *phy);
 	int	(*power_on)(struct phy *phy);
 	int	(*power_off)(struct phy *phy);
+	int	(*set_mode)(struct phy *phy, enum phy_mode mode);
 	struct module *owner;
 };
 
@@ -126,6 +135,7 @@ int phy_init(struct phy *phy);
 int phy_exit(struct phy *phy);
 int phy_power_on(struct phy *phy);
 int phy_power_off(struct phy *phy);
+int phy_set_mode(struct phy *phy, enum phy_mode mode);
 static inline int phy_get_bus_width(struct phy *phy)
 {
 	return phy->attrs.bus_width;
@@ -233,6 +243,13 @@ static inline int phy_power_off(struct phy *phy)
 	return -ENOSYS;
 }
 
+static inline int phy_set_mode(struct phy *phy, enum phy_mode mode)
+{
+	if (!phy)
+		return 0;
+	return -ENOSYS;
+}
+
 static inline int phy_get_bus_width(struct phy *phy)
 {
 	return -ENOSYS;
diff --git a/include/linux/platform_data/b53.h b/include/linux/platform_data/b53.h
new file mode 100644
index 000000000000..69d279c0da96
--- /dev/null
+++ b/include/linux/platform_data/b53.h
@@ -0,0 +1,33 @@
+/*
+ * B53 platform data
+ *
+ * Copyright (C) 2013 Jonas Gorski <jogo@openwrt.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __B53_H
+#define __B53_H
+
+#include <linux/kernel.h>
+
+struct b53_platform_data {
+	u32 chip_id;
+	u16 enabled_ports;
+
+	/* only used by MMAP'd driver */
+	unsigned big_endian:1;
+	void __iomem *regs;
+};
+
+#endif
diff --git a/include/linux/platform_data/sht3x.h b/include/linux/platform_data/sht3x.h
new file mode 100644
index 000000000000..2e5eea358194
--- /dev/null
+++ b/include/linux/platform_data/sht3x.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 Sensirion AG, Switzerland
+ * Author: David Frey <david.frey@sensirion.com>
+ * Author: Pascal Sachs <pascal.sachs@sensirion.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SHT3X_H_
+#define __SHT3X_H_
+
+struct sht3x_platform_data {
+	bool blocking_io;
+	bool high_precision;
+};
+#endif /* __SHT3X_H_ */
diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 308d6044f153..09779b0ae720 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -42,6 +42,7 @@ extern int pm_clk_create(struct device *dev);
 extern void pm_clk_destroy(struct device *dev);
 extern int pm_clk_add(struct device *dev, const char *con_id);
 extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
+extern int of_pm_clk_add_clk(struct device *dev, const char *name);
 extern int of_pm_clk_add_clks(struct device *dev);
 extern void pm_clk_remove(struct device *dev, const char *con_id);
 extern void pm_clk_remove_clk(struct device *dev, struct clk *clk);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 39285c7bd3f5..31fec858088c 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -57,7 +57,6 @@ struct generic_pm_domain {
 	unsigned int device_count;	/* Number of devices */
 	unsigned int suspended_count;	/* System suspend device counter */
 	unsigned int prepared_count;	/* Suspend counter of prepared devices */
-	bool suspend_power_off;	/* Power status before system suspend */
 	int (*power_off)(struct generic_pm_domain *domain);
 	int (*power_on)(struct generic_pm_domain *domain);
 	struct gpd_dev_ops dev_ops;
@@ -128,8 +127,8 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 				  struct generic_pm_domain *new_subdomain);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
-extern void pm_genpd_init(struct generic_pm_domain *genpd,
-			  struct dev_power_governor *gov, bool is_off);
+extern int pm_genpd_init(struct generic_pm_domain *genpd,
+			 struct dev_power_governor *gov, bool is_off);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
@@ -164,9 +163,10 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline void pm_genpd_init(struct generic_pm_domain *genpd,
-				 struct dev_power_governor *gov, bool is_off)
+static inline int pm_genpd_init(struct generic_pm_domain *genpd,
+				struct dev_power_governor *gov, bool is_off)
 {
+	return -ENOSYS;
 }
 #endif
 
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 5b5a80cc5926..c818772d9f9d 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -43,10 +43,8 @@ struct posix_acl_entry {
 };
 
 struct posix_acl {
-	union {
-		atomic_t		a_refcount;
-		struct rcu_head		a_rcu;
-	};
+	atomic_t		a_refcount;
+	struct rcu_head		a_rcu;
 	unsigned int		a_count;
 	struct posix_acl_entry	a_entries[0];
 };
diff --git a/include/linux/power/max8903_charger.h b/include/linux/power/max8903_charger.h
index 24f51db8a83f..89d3f1cb3433 100644
--- a/include/linux/power/max8903_charger.h
+++ b/include/linux/power/max8903_charger.h
@@ -26,8 +26,8 @@
 struct max8903_pdata {
 	/*
 	 * GPIOs
-	 * cen, chg, flt, and usus are optional.
-	 * dok, dcm, and uok are not optional depending on the status of
+	 * cen, chg, flt, dcm and usus are optional.
+	 * dok and uok are not optional depending on the status of
 	 * dc_valid and usb_valid.
 	 */
 	int cen;	/* Charger Enable input */
@@ -41,7 +41,7 @@ struct max8903_pdata {
 	/*
 	 * DC(Adapter/TA) is wired
 	 * When dc_valid is true,
-	 *	dok and dcm should be valid.
+	 *	dok should be valid.
 	 *
 	 * At least one of dc_valid or usb_valid should be true.
 	 */
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 751061790626..3965503315ef 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -248,6 +248,7 @@ struct power_supply {
 	struct delayed_work deferred_register_work;
 	spinlock_t changed_lock;
 	bool changed;
+	bool initialized;
 	atomic_t use_cnt;
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd;
diff --git a/include/linux/printk.h b/include/linux/printk.h
index f4da695fd615..f136b22c7772 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -108,11 +108,14 @@ struct va_format {
  * Dummy printk for disabled debugging statements to use whilst maintaining
  * gcc's format checking.
  */
-#define no_printk(fmt, ...)			\
-do {						\
-	if (0)					\
-		printk(fmt, ##__VA_ARGS__);	\
-} while (0)
+#define no_printk(fmt, ...)				\
+({							\
+	do {						\
+		if (0)					\
+			printk(fmt, ##__VA_ARGS__);	\
+	} while (0);					\
+	0;						\
+})
 
 #ifdef CONFIG_EARLY_PRINTK
 extern asmlinkage __printf(1, 2)
@@ -309,20 +312,24 @@ extern asmlinkage void dump_stack(void) __cold;
 #define printk_once(fmt, ...)					\
 ({								\
 	static bool __print_once __read_mostly;			\
+	bool __ret_print_once = !__print_once;			\
 								\
 	if (!__print_once) {					\
 		__print_once = true;				\
 		printk(fmt, ##__VA_ARGS__);			\
 	}							\
+	unlikely(__ret_print_once);				\
 })
 #define printk_deferred_once(fmt, ...)				\
 ({								\
 	static bool __print_once __read_mostly;			\
+	bool __ret_print_once = !__print_once;			\
 								\
 	if (!__print_once) {					\
 		__print_once = true;				\
 		printk_deferred(fmt, ##__VA_ARGS__);		\
 	}							\
+	unlikely(__ret_print_once);				\
 })
 #else
 #define printk_once(fmt, ...)					\
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 831479f8df8f..899e95e84400 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -58,7 +58,8 @@ struct pstore_info {
 	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 			int *count, struct timespec *time, char **buf,
-			bool *compressed, struct pstore_info *psi);
+			bool *compressed, ssize_t *ecc_notice_size,
+			struct pstore_info *psi);
 	int		(*write)(enum pstore_type_id type,
 			enum kmsg_dump_reason reason, u64 *id,
 			unsigned int part, int count, bool compressed,
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
new file mode 100644
index 000000000000..2052011bf9fb
--- /dev/null
+++ b/include/linux/ptr_ring.h
@@ -0,0 +1,448 @@
+/*
+ *	Definitions for the 'struct ptr_ring' datastructure.
+ *
+ *	Author:
+ *		Michael S. Tsirkin <mst@redhat.com>
+ *
+ *	Copyright (C) 2016 Red Hat, Inc.
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License as published by the
+ *	Free Software Foundation; either version 2 of the License, or (at your
+ *	option) any later version.
+ *
+ *	This is a limited-size FIFO maintaining pointers in FIFO order, with
+ *	one CPU producing entries and another consuming entries from a FIFO.
+ *
+ *	This implementation tries to minimize cache-contention when there is a
+ *	single producer and a single consumer CPU.
+ */
+
+#ifndef _LINUX_PTR_RING_H
+#define _LINUX_PTR_RING_H 1
+
+#ifdef __KERNEL__
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <asm/errno.h>
+#endif
+
+struct ptr_ring {
+	int producer ____cacheline_aligned_in_smp;
+	spinlock_t producer_lock;
+	int consumer ____cacheline_aligned_in_smp;
+	spinlock_t consumer_lock;
+	/* Shared consumer/producer data */
+	/* Read-only by both the producer and the consumer */
+	int size ____cacheline_aligned_in_smp; /* max entries in queue */
+	void **queue;
+};
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax().  If ring is ever resized, callers must hold
+ * producer_lock - see e.g. ptr_ring_full.  Otherwise, if callers don't hold
+ * producer_lock, the next call to __ptr_ring_produce may fail.
+ */
+static inline bool __ptr_ring_full(struct ptr_ring *r)
+{
+	return r->queue[r->producer];
+}
+
+static inline bool ptr_ring_full(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock(&r->producer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_irq(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_irq(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock_irq(&r->producer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&r->producer_lock, flags);
+	ret = __ptr_ring_full(r);
+	spin_unlock_irqrestore(&r->producer_lock, flags);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_bh(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_bh(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock_bh(&r->producer_lock);
+
+	return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must hold producer_lock.
+ */
+static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+	if (unlikely(!r->size) || r->queue[r->producer])
+		return -ENOSPC;
+
+	r->queue[r->producer++] = ptr;
+	if (unlikely(r->producer >= r->size))
+		r->producer = 0;
+	return 0;
+}
+
+static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock(&r->producer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock_irq(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_irq(&r->producer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&r->producer_lock, flags);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_irqrestore(&r->producer_lock, flags);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock_bh(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_bh(&r->producer_lock);
+
+	return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
+ * If ring is never resized, and if the pointer is merely
+ * tested, there's no need to take the lock - see e.g.  __ptr_ring_empty.
+ */
+static inline void *__ptr_ring_peek(struct ptr_ring *r)
+{
+	if (likely(r->size))
+		return r->queue[r->consumer];
+	return NULL;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if the ring is ever resized - see e.g. ptr_ring_empty.
+ */
+static inline bool __ptr_ring_empty(struct ptr_ring *r)
+{
+	return !__ptr_ring_peek(r);
+}
+
+static inline bool ptr_ring_empty(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_irq(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_irq(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_bh(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_bh(&r->consumer_lock);
+
+	return ret;
+}
+
+/* Must only be called after __ptr_ring_peek returned !NULL */
+static inline void __ptr_ring_discard_one(struct ptr_ring *r)
+{
+	r->queue[r->consumer++] = NULL;
+	if (unlikely(r->consumer >= r->size))
+		r->consumer = 0;
+}
+
+static inline void *__ptr_ring_consume(struct ptr_ring *r)
+{
+	void *ptr;
+
+	ptr = __ptr_ring_peek(r);
+	if (ptr)
+		__ptr_ring_discard_one(r);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock(&r->consumer_lock);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock_irq(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_irq(&r->consumer_lock);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	void *ptr;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock_bh(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_bh(&r->consumer_lock);
+
+	return ptr;
+}
+
+/* Cast to structure type and call a function without discarding from FIFO.
+ * Function must return a value.
+ * Callers must take consumer_lock.
+ */
+#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
+
+#define PTR_RING_PEEK_CALL(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock_irq(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_irq(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock_bh(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_bh(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	unsigned long __PTR_RING_PEEK_CALL_f;\
+	\
+	spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp)
+{
+	return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
+}
+
+static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
+{
+	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
+	if (!r->queue)
+		return -ENOMEM;
+
+	r->size = size;
+	r->producer = r->consumer = 0;
+	spin_lock_init(&r->producer_lock);
+	spin_lock_init(&r->consumer_lock);
+
+	return 0;
+}
+
+static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
+					   int size, gfp_t gfp,
+					   void (*destroy)(void *))
+{
+	int producer = 0;
+	void **old;
+	void *ptr;
+
+	while ((ptr = ptr_ring_consume(r)))
+		if (producer < size)
+			queue[producer++] = ptr;
+		else if (destroy)
+			destroy(ptr);
+
+	r->size = size;
+	r->producer = producer;
+	r->consumer = 0;
+	old = r->queue;
+	r->queue = queue;
+
+	return old;
+}
+
+static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+				  void (*destroy)(void *))
+{
+	unsigned long flags;
+	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+	void **old;
+
+	if (!queue)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&(r)->producer_lock, flags);
+
+	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
+
+	spin_unlock_irqrestore(&(r)->producer_lock, flags);
+
+	kfree(old);
+
+	return 0;
+}
+
+static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings,
+					   int size,
+					   gfp_t gfp, void (*destroy)(void *))
+{
+	unsigned long flags;
+	void ***queues;
+	int i;
+
+	queues = kmalloc(nrings * sizeof *queues, gfp);
+	if (!queues)
+		goto noqueues;
+
+	for (i = 0; i < nrings; ++i) {
+		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
+		if (!queues[i])
+			goto nomem;
+	}
+
+	for (i = 0; i < nrings; ++i) {
+		spin_lock_irqsave(&(rings[i])->producer_lock, flags);
+		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
+						  size, gfp, destroy);
+		spin_unlock_irqrestore(&(rings[i])->producer_lock, flags);
+	}
+
+	for (i = 0; i < nrings; ++i)
+		kfree(queues[i]);
+
+	kfree(queues);
+
+	return 0;
+
+nomem:
+	while (--i >= 0)
+		kfree(queues[i]);
+
+	kfree(queues);
+
+noqueues:
+	return -ENOMEM;
+}
+
+static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
+{
+	void *ptr;
+
+	if (destroy)
+		while ((ptr = ptr_ring_consume(r)))
+			destroy(ptr);
+	kfree(r->queue);
+}
+
+#endif /* _LINUX_PTR_RING_H  */
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 908b67c847cd..c038ae36b10e 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -464,6 +464,8 @@ static inline bool pwm_can_sleep(struct pwm_device *pwm)
 
 static inline void pwm_apply_args(struct pwm_device *pwm)
 {
+	struct pwm_state state = { };
+
 	/*
 	 * PWM users calling pwm_apply_args() expect to have a fresh config
 	 * where the polarity and period are set according to pwm_args info.
@@ -476,18 +478,20 @@ static inline void pwm_apply_args(struct pwm_device *pwm)
 	 * at startup (even if they are actually enabled), thus authorizing
 	 * polarity setting.
 	 *
-	 * Instead of setting ->enabled to false, we call pwm_disable()
-	 * before pwm_set_polarity() to ensure that everything is configured
-	 * as expected, and the PWM is really disabled when the user request
-	 * it.
+	 * To fulfill this requirement, we apply a new state which disables
+	 * the PWM device and set the reference period and polarity config.
 	 *
 	 * Note that PWM users requiring a smooth handover between the
 	 * bootloader and the kernel (like critical regulators controlled by
 	 * PWM devices) will have to switch to the atomic API and avoid calling
 	 * pwm_apply_args().
 	 */
-	pwm_disable(pwm);
-	pwm_set_polarity(pwm, pwm->args.polarity);
+
+	state.enabled = false;
+	state.polarity = pwm->args.polarity;
+	state.period = pwm->args.period;
+
+	pwm_apply_state(pwm, &state);
 }
 
 struct pwm_lookup {
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 3f14c7efe68f..40c0ada01806 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -12,10 +12,21 @@
 #define CORE_SPQE_PAGE_SIZE_BYTES                       4096
 
 #define X_FINAL_CLEANUP_AGG_INT 1
+#define NUM_OF_GLOBAL_QUEUES                            128
+
+/* Queue Zone sizes in bytes */
+#define TSTORM_QZONE_SIZE 8
+#define MSTORM_QZONE_SIZE 0
+#define USTORM_QZONE_SIZE 8
+#define XSTORM_QZONE_SIZE 8
+#define YSTORM_QZONE_SIZE 0
+#define PSTORM_QZONE_SIZE 0
+
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF 16
 
 #define FW_MAJOR_VERSION	8
-#define FW_MINOR_VERSION	7
-#define FW_REVISION_VERSION	3
+#define FW_MINOR_VERSION	10
+#define FW_REVISION_VERSION	5
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -97,45 +108,86 @@
 #define DQ_XCM_AGG_VAL_SEL_REG6   7
 
 /* XCM agg val selection */
-#define DQ_XCM_ETH_EDPM_NUM_BDS_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD2
-#define DQ_XCM_ETH_TX_BD_CONS_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD3
-#define DQ_XCM_CORE_TX_BD_CONS_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD3
-#define DQ_XCM_ETH_TX_BD_PROD_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD4
-#define DQ_XCM_CORE_TX_BD_PROD_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD4
-#define DQ_XCM_CORE_SPQ_PROD_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD4
-#define DQ_XCM_ETH_GO_TO_BD_CONS_CMD            DQ_XCM_AGG_VAL_SEL_WORD5
+#define	DQ_XCM_CORE_TX_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD3
+#define	DQ_XCM_CORE_TX_BD_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
+#define	DQ_XCM_CORE_SPQ_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
+#define	DQ_XCM_ETH_EDPM_NUM_BDS_CMD	DQ_XCM_AGG_VAL_SEL_WORD2
+#define	DQ_XCM_ETH_TX_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD3
+#define	DQ_XCM_ETH_TX_BD_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
+#define	DQ_XCM_ETH_GO_TO_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD5
+
+/* UCM agg val selection (HW) */
+#define	DQ_UCM_AGG_VAL_SEL_WORD0	0
+#define	DQ_UCM_AGG_VAL_SEL_WORD1	1
+#define	DQ_UCM_AGG_VAL_SEL_WORD2	2
+#define	DQ_UCM_AGG_VAL_SEL_WORD3	3
+#define	DQ_UCM_AGG_VAL_SEL_REG0	4
+#define	DQ_UCM_AGG_VAL_SEL_REG1	5
+#define	DQ_UCM_AGG_VAL_SEL_REG2	6
+#define	DQ_UCM_AGG_VAL_SEL_REG3	7
+
+/* UCM agg val selection (FW) */
+#define DQ_UCM_ETH_PMD_TX_CONS_CMD	DQ_UCM_AGG_VAL_SEL_WORD2
+#define DQ_UCM_ETH_PMD_RX_CONS_CMD	DQ_UCM_AGG_VAL_SEL_WORD3
+#define DQ_UCM_ROCE_CQ_CONS_CMD		DQ_UCM_AGG_VAL_SEL_REG0
+#define DQ_UCM_ROCE_CQ_PROD_CMD		DQ_UCM_AGG_VAL_SEL_REG2
+
+/* TCM agg val selection (HW) */
+#define	DQ_TCM_AGG_VAL_SEL_WORD0	0
+#define	DQ_TCM_AGG_VAL_SEL_WORD1	1
+#define	DQ_TCM_AGG_VAL_SEL_WORD2	2
+#define	DQ_TCM_AGG_VAL_SEL_WORD3	3
+#define	DQ_TCM_AGG_VAL_SEL_REG1		4
+#define	DQ_TCM_AGG_VAL_SEL_REG2		5
+#define	DQ_TCM_AGG_VAL_SEL_REG6		6
+#define	DQ_TCM_AGG_VAL_SEL_REG9		7
+
+/* TCM agg val selection (FW) */
+#define DQ_TCM_L2B_BD_PROD_CMD \
+	DQ_TCM_AGG_VAL_SEL_WORD1
+#define DQ_TCM_ROCE_RQ_PROD_CMD	\
+	DQ_TCM_AGG_VAL_SEL_WORD0
 
 /* XCM agg counter flag selection */
-#define DQ_XCM_AGG_FLG_SHIFT_BIT14  0
-#define DQ_XCM_AGG_FLG_SHIFT_BIT15  1
-#define DQ_XCM_AGG_FLG_SHIFT_CF12   2
-#define DQ_XCM_AGG_FLG_SHIFT_CF13   3
-#define DQ_XCM_AGG_FLG_SHIFT_CF18   4
-#define DQ_XCM_AGG_FLG_SHIFT_CF19   5
-#define DQ_XCM_AGG_FLG_SHIFT_CF22   6
-#define DQ_XCM_AGG_FLG_SHIFT_CF23   7
+#define	DQ_XCM_AGG_FLG_SHIFT_BIT14	0
+#define	DQ_XCM_AGG_FLG_SHIFT_BIT15	1
+#define	DQ_XCM_AGG_FLG_SHIFT_CF12	2
+#define	DQ_XCM_AGG_FLG_SHIFT_CF13	3
+#define	DQ_XCM_AGG_FLG_SHIFT_CF18	4
+#define	DQ_XCM_AGG_FLG_SHIFT_CF19	5
+#define	DQ_XCM_AGG_FLG_SHIFT_CF22	6
+#define	DQ_XCM_AGG_FLG_SHIFT_CF23	7
 
 /* XCM agg counter flag selection */
-#define DQ_XCM_ETH_DQ_CF_CMD		(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF18)
-#define DQ_XCM_CORE_DQ_CF_CMD		(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF18)
-#define DQ_XCM_ETH_TERMINATE_CMD	(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF19)
-#define DQ_XCM_CORE_TERMINATE_CMD	(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF19)
-#define DQ_XCM_ETH_SLOW_PATH_CMD	(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF22)
-#define DQ_XCM_CORE_SLOW_PATH_CMD	(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF22)
-#define DQ_XCM_ETH_TPH_EN_CMD		(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_CORE_DQ_CF_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_CORE_TERMINATE_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_CORE_SLOW_PATH_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_DQ_CF_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_ETH_TERMINATE_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ETH_SLOW_PATH_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_TPH_EN_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF23)
+
+/* UCM agg counter flag selection (HW) */
+#define	DQ_UCM_AGG_FLG_SHIFT_CF0	0
+#define	DQ_UCM_AGG_FLG_SHIFT_CF1	1
+#define	DQ_UCM_AGG_FLG_SHIFT_CF3	2
+#define	DQ_UCM_AGG_FLG_SHIFT_CF4	3
+#define	DQ_UCM_AGG_FLG_SHIFT_CF5	4
+#define	DQ_UCM_AGG_FLG_SHIFT_CF6	5
+#define	DQ_UCM_AGG_FLG_SHIFT_RULE0EN	6
+#define	DQ_UCM_AGG_FLG_SHIFT_RULE1EN	7
+
+/* UCM agg counter flag selection (FW) */
+#define DQ_UCM_ETH_PMD_TX_ARM_CMD	(1 << DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_ETH_PMD_RX_ARM_CMD	(1 << DQ_UCM_AGG_FLG_SHIFT_CF5)
+
+#define	DQ_REGION_SHIFT	(12)
+
+/* DPM */
+#define	DQ_DPM_WQE_BUFF_SIZE	(320)
+
+/* Conn type ranges */
+#define	DQ_CONN_TYPE_RANGE_SHIFT	(4)
 
 /*****************/
 /* QM CONSTANTS  */
@@ -282,8 +334,6 @@
 	(PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \
 	 PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1)
 
-#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN	12
-#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER	1024
 
 #define PXP_VF_BAR0_START_IGU                   0
 #define PXP_VF_BAR0_IGU_LENGTH                  0x3000
@@ -342,6 +392,9 @@
 
 #define PXP_VF_BAR0_GRC_WINDOW_LENGTH           32
 
+#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN		12
+#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER		1024
+
 /* ILT Records */
 #define PXP_NUM_ILT_RECORDS_BB 7600
 #define PXP_NUM_ILT_RECORDS_K2 11000
@@ -379,6 +432,38 @@ struct async_data {
 	u8	fw_debug_param;
 };
 
+struct coalescing_timeset {
+	u8 value;
+#define	COALESCING_TIMESET_TIMESET_MASK		0x7F
+#define	COALESCING_TIMESET_TIMESET_SHIFT	0
+#define	COALESCING_TIMESET_VALID_MASK		0x1
+#define	COALESCING_TIMESET_VALID_SHIFT		7
+};
+
+struct common_prs_pf_msg_info {
+	__le32 value;
+#define	COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_MASK	0x1
+#define	COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_SHIFT	0
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_MASK		0x1
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_SHIFT		1
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_MASK		0x1
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_SHIFT		2
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_MASK		0x1
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_SHIFT		3
+#define	COMMON_PRS_PF_MSG_INFO_RESERVED_MASK		0xFFFFFFF
+#define	COMMON_PRS_PF_MSG_INFO_RESERVED_SHIFT		4
+};
+
+struct common_queue_zone {
+	__le16 ring_drv_data_consumer;
+	__le16 reserved;
+};
+
+struct eth_rx_prod_data {
+	__le16 bd_prod;
+	__le16 cqe_prod;
+};
+
 struct regpair {
 	__le32	lo;
 	__le32	hi;
@@ -388,11 +473,23 @@ struct vf_pf_channel_eqe_data {
 	struct regpair msg_addr;
 };
 
+struct malicious_vf_eqe_data {
+	u8 vf_id;
+	u8 err_id;
+	__le16 reserved[3];
+};
+
+struct initial_cleanup_eqe_data {
+	u8 vf_id;
+	u8 reserved[7];
+};
+
 /* Event Data Union */
 union event_ring_data {
-	u8				bytes[8];
-	struct vf_pf_channel_eqe_data	vf_pf_channel;
-	struct async_data		async_info;
+	u8 bytes[8];
+	struct vf_pf_channel_eqe_data vf_pf_channel;
+	struct malicious_vf_eqe_data malicious_vf;
+	struct initial_cleanup_eqe_data vf_init_cleanup;
 };
 
 /* Event Ring Entry */
@@ -420,9 +517,9 @@ enum mf_mode {
 
 /* Per-protocol connection types */
 enum protocol_type {
-	PROTOCOLID_RESERVED1,
+	PROTOCOLID_ISCSI,
 	PROTOCOLID_RESERVED2,
-	PROTOCOLID_RESERVED3,
+	PROTOCOLID_ROCE,
 	PROTOCOLID_CORE,
 	PROTOCOLID_ETH,
 	PROTOCOLID_RESERVED4,
@@ -433,6 +530,16 @@ enum protocol_type {
 	MAX_PROTOCOL_TYPE
 };
 
+struct ustorm_eth_queue_zone {
+	struct coalescing_timeset int_coalescing_timeset;
+	u8 reserved[3];
+};
+
+struct ustorm_queue_zone {
+	struct ustorm_eth_queue_zone eth;
+	struct common_queue_zone common;
+};
+
 /* status block structure */
 struct cau_pi_entry {
 	u32 prod;
@@ -588,7 +695,10 @@ struct parsing_and_err_flags {
 #define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT         15
 };
 
-/* Concrete Function ID. */
+struct pb_context {
+	__le32 crc[4];
+};
+
 struct pxp_concrete_fid {
 	__le16 fid;
 #define PXP_CONCRETE_FID_PFID_MASK     0xF
@@ -655,6 +765,72 @@ struct pxp_ptt_entry {
 };
 
 /* RSS hash type */
+struct rdif_task_context {
+	__le32 initial_ref_tag;
+	__le16 app_tag_value;
+	__le16 app_tag_mask;
+	u8 flags0;
+#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK            0x1
+#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT           0
+#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK      0x1
+#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT     1
+#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK           0x1
+#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT          2
+#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK         0x1
+#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT        3
+#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK          0x3
+#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT         4
+#define RDIF_TASK_CONTEXT_CRC_SEED_MASK                0x1
+#define RDIF_TASK_CONTEXT_CRC_SEED_SHIFT               6
+#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK         0x1
+#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT        7
+	u8 partial_dif_data[7];
+	__le16 partial_crc_value;
+	__le16 partial_checksum_value;
+	__le32 offset_in_io;
+	__le16 flags1;
+#define RDIF_TASK_CONTEXT_VALIDATEGUARD_MASK           0x1
+#define RDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT          0
+#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK          0x1
+#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT         1
+#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK          0x1
+#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT         2
+#define RDIF_TASK_CONTEXT_FORWARDGUARD_MASK            0x1
+#define RDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT           3
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK           0x1
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT          4
+#define RDIF_TASK_CONTEXT_FORWARDREFTAG_MASK           0x1
+#define RDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT          5
+#define RDIF_TASK_CONTEXT_INTERVALSIZE_MASK            0x7
+#define RDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT           6
+#define RDIF_TASK_CONTEXT_HOSTINTERFACE_MASK           0x3
+#define RDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT          9
+#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK           0x1
+#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT          11
+#define RDIF_TASK_CONTEXT_RESERVED0_MASK               0x1
+#define RDIF_TASK_CONTEXT_RESERVED0_SHIFT              12
+#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK        0x1
+#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT       13
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK   0x1
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT  14
+#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK   0x1
+#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT  15
+	__le16 state;
+#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_MASK    0xF
+#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_SHIFT   0
+#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_MASK  0xF
+#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_SHIFT 4
+#define RDIF_TASK_CONTEXT_ERRORINIO_MASK               0x1
+#define RDIF_TASK_CONTEXT_ERRORINIO_SHIFT              8
+#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK        0x1
+#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT       9
+#define RDIF_TASK_CONTEXT_REFTAGMASK_MASK              0xF
+#define RDIF_TASK_CONTEXT_REFTAGMASK_SHIFT             10
+#define RDIF_TASK_CONTEXT_RESERVED1_MASK               0x3
+#define RDIF_TASK_CONTEXT_RESERVED1_SHIFT              14
+	__le32 reserved2;
+};
+
 enum rss_hash_type {
 	RSS_HASH_TYPE_DEFAULT	= 0,
 	RSS_HASH_TYPE_IPV4	= 1,
@@ -683,19 +859,122 @@ struct status_block {
 #define STATUS_BLOCK_ZERO_PAD3_SHIFT  24
 };
 
-struct tunnel_parsing_flags {
-	u8 flags;
-#define TUNNEL_PARSING_FLAGS_TYPE_MASK              0x3
-#define TUNNEL_PARSING_FLAGS_TYPE_SHIFT             0
-#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_MASK  0x1
-#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_SHIFT 2
-#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK     0x3
-#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT    3
-#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_MASK   0x1
-#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_SHIFT  5
-#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK     0x1
-#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT    6
-#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_MASK      0x1
-#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT     7
+struct tdif_task_context {
+	__le32 initial_ref_tag;
+	__le16 app_tag_value;
+	__le16 app_tag_mask;
+	__le16 partial_crc_valueB;
+	__le16 partial_checksum_valueB;
+	__le16 stateB;
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_MASK    0xF
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_SHIFT   0
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_MASK  0xF
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_SHIFT 4
+#define TDIF_TASK_CONTEXT_ERRORINIOB_MASK               0x1
+#define TDIF_TASK_CONTEXT_ERRORINIOB_SHIFT              8
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK         0x1
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT        9
+#define TDIF_TASK_CONTEXT_RESERVED0_MASK                0x3F
+#define TDIF_TASK_CONTEXT_RESERVED0_SHIFT               10
+	u8 reserved1;
+	u8 flags0;
+#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK             0x1
+#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT            0
+#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK       0x1
+#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT      1
+#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK            0x1
+#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT           2
+#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK          0x1
+#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT         3
+#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK           0x3
+#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT          4
+#define TDIF_TASK_CONTEXT_CRC_SEED_MASK                 0x1
+#define TDIF_TASK_CONTEXT_CRC_SEED_SHIFT                6
+#define TDIF_TASK_CONTEXT_RESERVED2_MASK                0x1
+#define TDIF_TASK_CONTEXT_RESERVED2_SHIFT               7
+	__le32 flags1;
+#define TDIF_TASK_CONTEXT_VALIDATEGUARD_MASK            0x1
+#define TDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT           0
+#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK           0x1
+#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT          1
+#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK           0x1
+#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT          2
+#define TDIF_TASK_CONTEXT_FORWARDGUARD_MASK             0x1
+#define TDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT            3
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK            0x1
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT           4
+#define TDIF_TASK_CONTEXT_FORWARDREFTAG_MASK            0x1
+#define TDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT           5
+#define TDIF_TASK_CONTEXT_INTERVALSIZE_MASK             0x7
+#define TDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT            6
+#define TDIF_TASK_CONTEXT_HOSTINTERFACE_MASK            0x3
+#define TDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT           9
+#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK            0x1
+#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT           11
+#define TDIF_TASK_CONTEXT_RESERVED3_MASK                0x1
+#define TDIF_TASK_CONTEXT_RESERVED3_SHIFT               12
+#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK         0x1
+#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT        13
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_MASK    0xF
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_SHIFT   14
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_MASK  0xF
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_SHIFT 18
+#define TDIF_TASK_CONTEXT_ERRORINIOA_MASK               0x1
+#define TDIF_TASK_CONTEXT_ERRORINIOA_SHIFT              22
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_MASK        0x1
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_SHIFT       23
+#define TDIF_TASK_CONTEXT_REFTAGMASK_MASK               0xF
+#define TDIF_TASK_CONTEXT_REFTAGMASK_SHIFT              24
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK    0x1
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT   28
+#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK    0x1
+#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT   29
+#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK          0x1
+#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT         30
+#define TDIF_TASK_CONTEXT_RESERVED4_MASK                0x1
+#define TDIF_TASK_CONTEXT_RESERVED4_SHIFT               31
+	__le32 offset_in_iob;
+	__le16 partial_crc_value_a;
+	__le16 partial_checksum_valuea_;
+	__le32 offset_in_ioa;
+	u8 partial_dif_data_a[8];
+	u8 partial_dif_data_b[8];
+};
+
+struct timers_context {
+	__le32 logical_client0;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK     0xFFFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC0_MASK              0x1
+#define TIMERS_CONTEXT_VALIDLC0_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC0_MASK             0x1
+#define TIMERS_CONTEXT_ACTIVELC0_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED0_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED0_SHIFT            30
+	__le32 logical_client1;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK     0xFFFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC1_MASK              0x1
+#define TIMERS_CONTEXT_VALIDLC1_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC1_MASK             0x1
+#define TIMERS_CONTEXT_ACTIVELC1_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED1_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED1_SHIFT            30
+	__le32 logical_client2;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK     0xFFFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC2_MASK              0x1
+#define TIMERS_CONTEXT_VALIDLC2_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC2_MASK             0x1
+#define TIMERS_CONTEXT_ACTIVELC2_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED2_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED2_SHIFT            30
+	__le32 host_expiration_fields;
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_MASK  0xFFFFFFF
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_SHIFT 0
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_MASK  0x1
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_SHIFT 28
+#define TIMERS_CONTEXT_RESERVED3_MASK             0x7
+#define TIMERS_CONTEXT_RESERVED3_SHIFT            29
 };
 #endif /* __COMMON_HSI__ */
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
index 092cb0c1afcb..b5ebc697d05f 100644
--- a/include/linux/qed/eth_common.h
+++ b/include/linux/qed/eth_common.h
@@ -12,6 +12,8 @@
 /********************/
 /* ETH FW CONSTANTS */
 /********************/
+#define ETH_HSI_VER_MAJOR                   3
+#define ETH_HSI_VER_MINOR                   0
 #define ETH_CACHE_LINE_SIZE                 64
 
 #define ETH_MAX_RAMROD_PER_CON                          8
@@ -57,19 +59,6 @@
 #define ETH_TPA_CQE_CONT_LEN_LIST_SIZE    6
 #define ETH_TPA_CQE_END_LEN_LIST_SIZE     4
 
-/* Queue Zone sizes */
-#define TSTORM_QZONE_SIZE    0
-#define MSTORM_QZONE_SIZE    sizeof(struct mstorm_eth_queue_zone)
-#define USTORM_QZONE_SIZE    sizeof(struct ustorm_eth_queue_zone)
-#define XSTORM_QZONE_SIZE    0
-#define YSTORM_QZONE_SIZE    sizeof(struct ystorm_eth_queue_zone)
-#define PSTORM_QZONE_SIZE    0
-
-/* Interrupt coalescing TimeSet */
-struct coalescing_timeset {
-	u8	timeset;
-	u8	valid;
-};
 
 struct eth_tx_1st_bd_flags {
 	u8 bitfields;
@@ -97,12 +86,12 @@ struct eth_tx_data_1st_bd {
 	u8				nbds;
 	struct eth_tx_1st_bd_flags	bd_flags;
 	__le16				bitfields;
-#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_MASK  0x1
-#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_SHIFT 0
+#define ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK  0x1
+#define ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT 0
 #define ETH_TX_DATA_1ST_BD_RESERVED0_MASK          0x1
 #define ETH_TX_DATA_1ST_BD_RESERVED0_SHIFT         1
-#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_MASK        0x3FFF
-#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_SHIFT       2
+#define ETH_TX_DATA_1ST_BD_PKT_LEN_MASK    0x3FFF
+#define ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT   2
 };
 
 /* The parsing information data for the second tx bd of a given packet. */
@@ -136,28 +125,51 @@ struct eth_tx_data_2nd_bd {
 #define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT                13
 };
 
+struct eth_fast_path_cqe_fw_debug {
+	u8 reserved0;
+	u8 reserved1;
+	__le16 reserved2;
+};
+
+/*  tunneling parsing flags */
+struct eth_tunnel_parsing_flags {
+	u8 flags;
+#define	ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK		0x3
+#define	ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT		0
+#define	ETH_TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_MASK	0x1
+#define	ETH_TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_SHIFT	2
+#define	ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK	0x3
+#define	ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT	3
+#define	ETH_TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_MASK	0x1
+#define	ETH_TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_SHIFT	5
+#define	ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK	0x1
+#define	ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT	6
+#define	ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_MASK	0x1
+#define	ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT	7
+};
+
 /* Regular ETH Rx FP CQE. */
 struct eth_fast_path_rx_reg_cqe {
-	u8	type;
-	u8	bitfields;
+	u8 type;
+	u8 bitfields;
 #define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK  0x7
 #define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT 0
 #define ETH_FAST_PATH_RX_REG_CQE_TC_MASK             0xF
 #define ETH_FAST_PATH_RX_REG_CQE_TC_SHIFT            3
 #define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_MASK      0x1
 #define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_SHIFT     7
-	__le16				pkt_len;
-	struct parsing_and_err_flags	pars_flags;
-	__le16				vlan_tag;
-	__le32				rss_hash;
-	__le16				len_on_first_bd;
-	u8				placement_offset;
-	struct tunnel_parsing_flags	tunnel_pars_flags;
-	u8				bd_num;
-	u8				reserved[7];
-	u32				fw_debug;
-	u8				reserved1[3];
-	u8				flags;
+	__le16 pkt_len;
+	struct parsing_and_err_flags pars_flags;
+	__le16 vlan_tag;
+	__le32 rss_hash;
+	__le16 len_on_first_bd;
+	u8 placement_offset;
+	struct eth_tunnel_parsing_flags tunnel_pars_flags;
+	u8 bd_num;
+	u8 reserved[7];
+	struct eth_fast_path_cqe_fw_debug fw_debug;
+	u8 reserved1[3];
+	u8 flags;
 #define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK          0x1
 #define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT         0
 #define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK   0x1
@@ -207,11 +219,11 @@ struct eth_fast_path_rx_tpa_start_cqe {
 	__le32	rss_hash;
 	__le16	len_on_first_bd;
 	u8	placement_offset;
-	struct tunnel_parsing_flags tunnel_pars_flags;
+	struct eth_tunnel_parsing_flags tunnel_pars_flags;
 	u8	tpa_agg_index;
 	u8	header_len;
 	__le16	ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE];
-	u32	fw_debug;
+	struct eth_fast_path_cqe_fw_debug fw_debug;
 };
 
 /* The L4 pseudo checksum mode for Ethernet */
@@ -264,12 +276,25 @@ enum eth_rx_cqe_type {
 	MAX_ETH_RX_CQE_TYPE
 };
 
-/* ETH Rx producers data */
-struct eth_rx_prod_data {
-	__le16	bd_prod;
-	__le16	cqe_prod;
-	__le16	reserved;
-	__le16	reserved1;
+enum eth_rx_tunn_type {
+	ETH_RX_NO_TUNN,
+	ETH_RX_TUNN_GENEVE,
+	ETH_RX_TUNN_GRE,
+	ETH_RX_TUNN_VXLAN,
+	MAX_ETH_RX_TUNN_TYPE
+};
+
+/*  Aggregation end reason. */
+enum eth_tpa_end_reason {
+	ETH_AGG_END_UNUSED,
+	ETH_AGG_END_SP_UPDATE,
+	ETH_AGG_END_MAX_LEN,
+	ETH_AGG_END_LAST_SEG,
+	ETH_AGG_END_TIMEOUT,
+	ETH_AGG_END_NOT_CONSISTENT,
+	ETH_AGG_END_OUT_OF_ORDER,
+	ETH_AGG_END_NON_TPA_SEG,
+	MAX_ETH_TPA_END_REASON
 };
 
 /* The first tx bd of a given packet */
@@ -337,21 +362,18 @@ union eth_tx_bd_types {
 };
 
 /* Mstorm Queue Zone */
-struct mstorm_eth_queue_zone {
-	struct eth_rx_prod_data rx_producers;
-	__le32			reserved[2];
-};
-
-/* Ustorm Queue Zone */
-struct ustorm_eth_queue_zone {
-	struct coalescing_timeset	int_coalescing_timeset;
-	__le16				reserved[3];
+enum eth_tx_tunn_type {
+	ETH_TX_TUNN_GENEVE,
+	ETH_TX_TUNN_TTAG,
+	ETH_TX_TUNN_GRE,
+	ETH_TX_TUNN_VXLAN,
+	MAX_ETH_TX_TUNN_TYPE
 };
 
 /* Ystorm Queue Zone */
-struct ystorm_eth_queue_zone {
-	struct coalescing_timeset	int_coalescing_timeset;
-	__le16				reserved[3];
+struct xstorm_eth_queue_zone {
+	struct coalescing_timeset int_coalescing_timeset;
+	u8 reserved[7];
 };
 
 /* ETH doorbell data */
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
new file mode 100644
index 000000000000..b3c0feb15ae9
--- /dev/null
+++ b/include/linux/qed/iscsi_common.h
@@ -0,0 +1,1439 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __ISCSI_COMMON__
+#define __ISCSI_COMMON__
+/**********************/
+/* ISCSI FW CONSTANTS */
+/**********************/
+
+/* iSCSI HSI constants */
+#define ISCSI_DEFAULT_MTU       (1500)
+
+/* Current iSCSI HSI version number composed of two fields (16 bit) */
+#define ISCSI_HSI_MAJOR_VERSION (0)
+#define ISCSI_HSI_MINOR_VERSION (0)
+
+/* KWQ (kernel work queue) layer codes */
+#define ISCSI_SLOW_PATH_LAYER_CODE   (6)
+
+/* CQE completion status */
+#define ISCSI_EQE_COMPLETION_SUCCESS (0x0)
+#define ISCSI_EQE_RST_CONN_RCVD (0x1)
+
+/* iSCSI parameter defaults */
+#define ISCSI_DEFAULT_HEADER_DIGEST         (0)
+#define ISCSI_DEFAULT_DATA_DIGEST           (0)
+#define ISCSI_DEFAULT_INITIAL_R2T           (1)
+#define ISCSI_DEFAULT_IMMEDIATE_DATA        (1)
+#define ISCSI_DEFAULT_MAX_PDU_LENGTH        (0x2000)
+#define ISCSI_DEFAULT_FIRST_BURST_LENGTH    (0x10000)
+#define ISCSI_DEFAULT_MAX_BURST_LENGTH      (0x40000)
+#define ISCSI_DEFAULT_MAX_OUTSTANDING_R2T   (1)
+
+/* iSCSI parameter limits */
+#define ISCSI_MIN_VAL_MAX_PDU_LENGTH        (0x200)
+#define ISCSI_MAX_VAL_MAX_PDU_LENGTH        (0xffffff)
+#define ISCSI_MIN_VAL_BURST_LENGTH          (0x200)
+#define ISCSI_MAX_VAL_BURST_LENGTH          (0xffffff)
+#define ISCSI_MIN_VAL_MAX_OUTSTANDING_R2T   (1)
+#define ISCSI_MAX_VAL_MAX_OUTSTANDING_R2T   (0xff)
+
+/* iSCSI reserved params */
+#define ISCSI_ITT_ALL_ONES	(0xffffffff)
+#define ISCSI_TTT_ALL_ONES	(0xffffffff)
+
+#define ISCSI_OPTION_1_OFF_CHIP_TCP 1
+#define ISCSI_OPTION_2_ON_CHIP_TCP 2
+
+#define ISCSI_INITIATOR_MODE 0
+#define ISCSI_TARGET_MODE 1
+
+/* iSCSI request op codes */
+#define ISCSI_OPCODE_NOP_OUT_NO_IMM                     (0)
+#define ISCSI_OPCODE_NOP_OUT                            ( \
+		ISCSI_OPCODE_NOP_OUT_NO_IMM | 0x40)
+#define ISCSI_OPCODE_SCSI_CMD_NO_IMM            (1)
+#define ISCSI_OPCODE_SCSI_CMD                           ( \
+		ISCSI_OPCODE_SCSI_CMD_NO_IMM | 0x40)
+#define ISCSI_OPCODE_TMF_REQUEST_NO_IMM         (2)
+#define ISCSI_OPCODE_TMF_REQUEST                        ( \
+		ISCSI_OPCODE_TMF_REQUEST_NO_IMM | 0x40)
+#define ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM       (3)
+#define ISCSI_OPCODE_LOGIN_REQUEST                      ( \
+		ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM | 0x40)
+#define ISCSI_OPCODE_TEXT_REQUEST_NO_IMM        (4)
+#define ISCSI_OPCODE_TEXT_REQUEST                       ( \
+		ISCSI_OPCODE_TEXT_REQUEST_NO_IMM | 0x40)
+#define ISCSI_OPCODE_DATA_OUT                           (5)
+#define ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM      (6)
+#define ISCSI_OPCODE_LOGOUT_REQUEST                     ( \
+		ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM | 0x40)
+
+/* iSCSI response/messages op codes */
+#define ISCSI_OPCODE_NOP_IN             (0x20)
+#define ISCSI_OPCODE_SCSI_RESPONSE      (0x21)
+#define ISCSI_OPCODE_TMF_RESPONSE       (0x22)
+#define ISCSI_OPCODE_LOGIN_RESPONSE     (0x23)
+#define ISCSI_OPCODE_TEXT_RESPONSE      (0x24)
+#define ISCSI_OPCODE_DATA_IN            (0x25)
+#define ISCSI_OPCODE_LOGOUT_RESPONSE    (0x26)
+#define ISCSI_OPCODE_R2T                (0x31)
+#define ISCSI_OPCODE_ASYNC_MSG          (0x32)
+#define ISCSI_OPCODE_REJECT             (0x3f)
+
+/* iSCSI stages */
+#define ISCSI_STAGE_SECURITY_NEGOTIATION            (0)
+#define ISCSI_STAGE_LOGIN_OPERATIONAL_NEGOTIATION   (1)
+#define ISCSI_STAGE_FULL_FEATURE_PHASE              (3)
+
+/* iSCSI CQE errors */
+#define CQE_ERROR_BITMAP_DATA_DIGEST          (0x08)
+#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN  (0x10)
+#define CQE_ERROR_BITMAP_DATA_TRUNCATED       (0x20)
+
+struct cqe_error_bitmap {
+	u8 cqe_error_status_bits;
+#define CQE_ERROR_BITMAP_DIF_ERR_BITS_MASK         0x7
+#define CQE_ERROR_BITMAP_DIF_ERR_BITS_SHIFT        0
+#define CQE_ERROR_BITMAP_DATA_DIGEST_ERR_MASK      0x1
+#define CQE_ERROR_BITMAP_DATA_DIGEST_ERR_SHIFT     3
+#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN_MASK  0x1
+#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN_SHIFT 4
+#define CQE_ERROR_BITMAP_DATA_TRUNCATED_ERR_MASK   0x1
+#define CQE_ERROR_BITMAP_DATA_TRUNCATED_ERR_SHIFT  5
+#define CQE_ERROR_BITMAP_UNDER_RUN_ERR_MASK        0x1
+#define CQE_ERROR_BITMAP_UNDER_RUN_ERR_SHIFT       6
+#define CQE_ERROR_BITMAP_RESERVED2_MASK            0x1
+#define CQE_ERROR_BITMAP_RESERVED2_SHIFT           7
+};
+
+union cqe_error_status {
+	u8 error_status;
+	struct cqe_error_bitmap error_bits;
+};
+
+struct data_hdr {
+	__le32 data[12];
+};
+
+struct iscsi_async_msg_hdr {
+	__le16 reserved0;
+	u8 flags_attr;
+#define ISCSI_ASYNC_MSG_HDR_RSRV_MASK           0x7F
+#define ISCSI_ASYNC_MSG_HDR_RSRV_SHIFT          0
+#define ISCSI_ASYNC_MSG_HDR_CONST1_MASK         0x1
+#define ISCSI_ASYNC_MSG_HDR_CONST1_SHIFT        7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_ASYNC_MSG_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_ASYNC_MSG_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_ASYNC_MSG_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_ASYNC_MSG_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 all_ones;
+	__le32 reserved1;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le16 param1_rsrv;
+	u8 async_vcode;
+	u8 async_event;
+	__le16 param3_rsrv;
+	__le16 param2_rsrv;
+	__le32 reserved7;
+};
+
+struct iscsi_sge {
+	struct regpair sge_addr;
+	__le16 sge_len;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct iscsi_cached_sge_ctx {
+	struct iscsi_sge sge;
+	struct regpair reserved;
+	__le32 dsgl_curr_offset[2];
+};
+
+struct iscsi_cmd_hdr {
+	__le16 reserved1;
+	u8 flags_attr;
+#define ISCSI_CMD_HDR_ATTR_MASK           0x7
+#define ISCSI_CMD_HDR_ATTR_SHIFT          0
+#define ISCSI_CMD_HDR_RSRV_MASK           0x3
+#define ISCSI_CMD_HDR_RSRV_SHIFT          3
+#define ISCSI_CMD_HDR_WRITE_MASK          0x1
+#define ISCSI_CMD_HDR_WRITE_SHIFT         5
+#define ISCSI_CMD_HDR_READ_MASK           0x1
+#define ISCSI_CMD_HDR_READ_SHIFT          6
+#define ISCSI_CMD_HDR_FINAL_MASK          0x1
+#define ISCSI_CMD_HDR_FINAL_SHIFT         7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_CMD_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_CMD_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_CMD_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_CMD_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 expected_transfer_length;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 cdb[4];
+};
+
+struct iscsi_common_hdr {
+	u8 hdr_status;
+	u8 hdr_response;
+	u8 hdr_flags;
+	u8 hdr_first_byte;
+#define ISCSI_COMMON_HDR_OPCODE_MASK         0x3F
+#define ISCSI_COMMON_HDR_OPCODE_SHIFT        0
+#define ISCSI_COMMON_HDR_IMM_MASK            0x1
+#define ISCSI_COMMON_HDR_IMM_SHIFT           6
+#define ISCSI_COMMON_HDR_RSRV_MASK           0x1
+#define ISCSI_COMMON_HDR_RSRV_SHIFT          7
+	__le32 hdr_second_dword;
+#define ISCSI_COMMON_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_COMMON_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_SHIFT 24
+	__le32 lun_reserved[4];
+	__le32 data[6];
+};
+
+struct iscsi_conn_offload_params {
+	struct regpair sq_pbl_addr;
+	struct regpair r2tq_pbl_addr;
+	struct regpair xhq_pbl_addr;
+	struct regpair uhq_pbl_addr;
+	__le32 initial_ack;
+	__le16 physical_q0;
+	__le16 physical_q1;
+	u8 flags;
+#define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_MASK  0x1
+#define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_SHIFT 0
+#define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_MASK     0x1
+#define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_SHIFT    1
+#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK       0x3F
+#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT      2
+	u8 pbl_page_size_log;
+	u8 pbe_page_size_log;
+	u8 default_cq;
+	__le32 stat_sn;
+};
+
+struct iscsi_slow_path_hdr {
+	u8 op_code;
+	u8 flags;
+#define ISCSI_SLOW_PATH_HDR_RESERVED0_MASK   0xF
+#define ISCSI_SLOW_PATH_HDR_RESERVED0_SHIFT  0
+#define ISCSI_SLOW_PATH_HDR_LAYER_CODE_MASK  0x7
+#define ISCSI_SLOW_PATH_HDR_LAYER_CODE_SHIFT 4
+#define ISCSI_SLOW_PATH_HDR_RESERVED1_MASK   0x1
+#define ISCSI_SLOW_PATH_HDR_RESERVED1_SHIFT  7
+};
+
+struct iscsi_conn_update_ramrod_params {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	u8 flags;
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN_MASK           0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN_SHIFT          0
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DD_EN_MASK           0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DD_EN_SHIFT          1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_MASK     0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_SHIFT    2
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_MASK  0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_SHIFT 3
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK       0xF
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT      4
+	u8 reserved0[3];
+	__le32 max_seq_size;
+	__le32 max_send_pdu_length;
+	__le32 max_recv_pdu_length;
+	__le32 first_seq_length;
+	__le32 exp_stat_sn;
+};
+
+struct iscsi_ext_cdb_cmd_hdr {
+	__le16 reserved1;
+	u8 flags_attr;
+#define ISCSI_EXT_CDB_CMD_HDR_ATTR_MASK          0x7
+#define ISCSI_EXT_CDB_CMD_HDR_ATTR_SHIFT         0
+#define ISCSI_EXT_CDB_CMD_HDR_RSRV_MASK          0x3
+#define ISCSI_EXT_CDB_CMD_HDR_RSRV_SHIFT         3
+#define ISCSI_EXT_CDB_CMD_HDR_WRITE_MASK         0x1
+#define ISCSI_EXT_CDB_CMD_HDR_WRITE_SHIFT        5
+#define ISCSI_EXT_CDB_CMD_HDR_READ_MASK          0x1
+#define ISCSI_EXT_CDB_CMD_HDR_READ_SHIFT         6
+#define ISCSI_EXT_CDB_CMD_HDR_FINAL_MASK         0x1
+#define ISCSI_EXT_CDB_CMD_HDR_FINAL_SHIFT        7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_EXT_CDB_CMD_HDR_DATA_SEG_LEN_MASK  0xFFFFFF
+#define ISCSI_EXT_CDB_CMD_HDR_DATA_SEG_LEN_SHIFT 0
+#define ISCSI_EXT_CDB_CMD_HDR_CDB_SIZE_MASK      0xFF
+#define ISCSI_EXT_CDB_CMD_HDR_CDB_SIZE_SHIFT     24
+	struct regpair lun;
+	__le32 itt;
+	__le32 expected_transfer_length;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	struct iscsi_sge cdb_sge;
+};
+
+struct iscsi_login_req_hdr {
+	u8 version_min;
+	u8 version_max;
+	u8 flags_attr;
+#define ISCSI_LOGIN_REQ_HDR_NSG_MASK            0x3
+#define ISCSI_LOGIN_REQ_HDR_NSG_SHIFT           0
+#define ISCSI_LOGIN_REQ_HDR_CSG_MASK            0x3
+#define ISCSI_LOGIN_REQ_HDR_CSG_SHIFT           2
+#define ISCSI_LOGIN_REQ_HDR_RSRV_MASK           0x3
+#define ISCSI_LOGIN_REQ_HDR_RSRV_SHIFT          4
+#define ISCSI_LOGIN_REQ_HDR_C_MASK              0x1
+#define ISCSI_LOGIN_REQ_HDR_C_SHIFT             6
+#define ISCSI_LOGIN_REQ_HDR_T_MASK              0x1
+#define ISCSI_LOGIN_REQ_HDR_T_SHIFT             7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_SHIFT 24
+	__le32 isid_TABC;
+	__le16 tsih;
+	__le16 isid_d;
+	__le32 itt;
+	__le16 reserved1;
+	__le16 cid;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 reserved2[4];
+};
+
+struct iscsi_logout_req_hdr {
+	__le16 reserved0;
+	u8 reason_code;
+	u8 opcode;
+	__le32 reserved1;
+	__le32 reserved2[2];
+	__le32 itt;
+	__le16 reserved3;
+	__le16 cid;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 reserved4[4];
+};
+
+struct iscsi_data_out_hdr {
+	__le16 reserved1;
+	u8 flags_attr;
+#define ISCSI_DATA_OUT_HDR_RSRV_MASK   0x7F
+#define ISCSI_DATA_OUT_HDR_RSRV_SHIFT  0
+#define ISCSI_DATA_OUT_HDR_FINAL_MASK  0x1
+#define ISCSI_DATA_OUT_HDR_FINAL_SHIFT 7
+	u8 opcode;
+	__le32 reserved2;
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 reserved3;
+	__le32 exp_stat_sn;
+	__le32 reserved4;
+	__le32 data_sn;
+	__le32 buffer_offset;
+	__le32 reserved5;
+};
+
+struct iscsi_data_in_hdr {
+	u8 status_rsvd;
+	u8 reserved1;
+	u8 flags;
+#define ISCSI_DATA_IN_HDR_STATUS_MASK     0x1
+#define ISCSI_DATA_IN_HDR_STATUS_SHIFT    0
+#define ISCSI_DATA_IN_HDR_UNDERFLOW_MASK  0x1
+#define ISCSI_DATA_IN_HDR_UNDERFLOW_SHIFT 1
+#define ISCSI_DATA_IN_HDR_OVERFLOW_MASK   0x1
+#define ISCSI_DATA_IN_HDR_OVERFLOW_SHIFT  2
+#define ISCSI_DATA_IN_HDR_RSRV_MASK       0x7
+#define ISCSI_DATA_IN_HDR_RSRV_SHIFT      3
+#define ISCSI_DATA_IN_HDR_ACK_MASK        0x1
+#define ISCSI_DATA_IN_HDR_ACK_SHIFT       6
+#define ISCSI_DATA_IN_HDR_FINAL_MASK      0x1
+#define ISCSI_DATA_IN_HDR_FINAL_SHIFT     7
+	u8 opcode;
+	__le32 reserved2;
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 data_sn;
+	__le32 buffer_offset;
+	__le32 residual_count;
+};
+
+struct iscsi_r2t_hdr {
+	u8 reserved0[3];
+	u8 opcode;
+	__le32 reserved2;
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 r2t_sn;
+	__le32 buffer_offset;
+	__le32 desired_data_trns_len;
+};
+
+struct iscsi_nop_out_hdr {
+	__le16 reserved1;
+	u8 flags_attr;
+#define ISCSI_NOP_OUT_HDR_RSRV_MASK    0x7F
+#define ISCSI_NOP_OUT_HDR_RSRV_SHIFT   0
+#define ISCSI_NOP_OUT_HDR_CONST1_MASK  0x1
+#define ISCSI_NOP_OUT_HDR_CONST1_SHIFT 7
+	u8 opcode;
+	__le32 reserved2;
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 reserved3;
+	__le32 reserved4;
+	__le32 reserved5;
+	__le32 reserved6;
+};
+
+struct iscsi_nop_in_hdr {
+	__le16 reserved0;
+	u8 flags_attr;
+#define ISCSI_NOP_IN_HDR_RSRV_MASK           0x7F
+#define ISCSI_NOP_IN_HDR_RSRV_SHIFT          0
+#define ISCSI_NOP_IN_HDR_CONST1_MASK         0x1
+#define ISCSI_NOP_IN_HDR_CONST1_SHIFT        7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_NOP_IN_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_NOP_IN_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_NOP_IN_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_NOP_IN_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 reserved5;
+	__le32 reserved6;
+	__le32 reserved7;
+};
+
+struct iscsi_login_response_hdr {
+	u8 version_active;
+	u8 version_max;
+	u8 flags_attr;
+#define ISCSI_LOGIN_RESPONSE_HDR_NSG_MASK            0x3
+#define ISCSI_LOGIN_RESPONSE_HDR_NSG_SHIFT           0
+#define ISCSI_LOGIN_RESPONSE_HDR_CSG_MASK            0x3
+#define ISCSI_LOGIN_RESPONSE_HDR_CSG_SHIFT           2
+#define ISCSI_LOGIN_RESPONSE_HDR_RSRV_MASK           0x3
+#define ISCSI_LOGIN_RESPONSE_HDR_RSRV_SHIFT          4
+#define ISCSI_LOGIN_RESPONSE_HDR_C_MASK              0x1
+#define ISCSI_LOGIN_RESPONSE_HDR_C_SHIFT             6
+#define ISCSI_LOGIN_RESPONSE_HDR_T_MASK              0x1
+#define ISCSI_LOGIN_RESPONSE_HDR_T_SHIFT             7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	__le32 isid_TABC;
+	__le16 tsih;
+	__le16 isid_d;
+	__le32 itt;
+	__le32 reserved1;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le16 reserved2;
+	u8 status_detail;
+	u8 status_class;
+	__le32 reserved4[2];
+};
+
+struct iscsi_logout_response_hdr {
+	u8 reserved1;
+	u8 response;
+	u8 flags;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_LOGOUT_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_LOGOUT_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_LOGOUT_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_LOGOUT_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	__le32 reserved2[2];
+	__le32 itt;
+	__le32 reserved3;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 reserved4;
+	__le16 time2retain;
+	__le16 time2wait;
+	__le32 reserved5[1];
+};
+
+struct iscsi_text_request_hdr {
+	__le16 reserved0;
+	u8 flags_attr;
+#define ISCSI_TEXT_REQUEST_HDR_RSRV_MASK           0x3F
+#define ISCSI_TEXT_REQUEST_HDR_RSRV_SHIFT          0
+#define ISCSI_TEXT_REQUEST_HDR_C_MASK              0x1
+#define ISCSI_TEXT_REQUEST_HDR_C_SHIFT             6
+#define ISCSI_TEXT_REQUEST_HDR_F_MASK              0x1
+#define ISCSI_TEXT_REQUEST_HDR_F_SHIFT             7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_TEXT_REQUEST_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_TEXT_REQUEST_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_TEXT_REQUEST_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_TEXT_REQUEST_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 reserved4[4];
+};
+
+struct iscsi_text_response_hdr {
+	__le16 reserved1;
+	u8 flags;
+#define ISCSI_TEXT_RESPONSE_HDR_RSRV_MASK           0x3F
+#define ISCSI_TEXT_RESPONSE_HDR_RSRV_SHIFT          0
+#define ISCSI_TEXT_RESPONSE_HDR_C_MASK              0x1
+#define ISCSI_TEXT_RESPONSE_HDR_C_SHIFT             6
+#define ISCSI_TEXT_RESPONSE_HDR_F_MASK              0x1
+#define ISCSI_TEXT_RESPONSE_HDR_F_SHIFT             7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_TEXT_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_TEXT_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_TEXT_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_TEXT_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 reserved4[3];
+};
+
+struct iscsi_tmf_request_hdr {
+	__le16 reserved0;
+	u8 function;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_TMF_REQUEST_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_TMF_REQUEST_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_TMF_REQUEST_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_TMF_REQUEST_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 rtt;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 ref_cmd_sn;
+	__le32 exp_data_sn;
+	__le32 reserved4[2];
+};
+
+struct iscsi_tmf_response_hdr {
+	u8 reserved2;
+	u8 hdr_response;
+	u8 hdr_flags;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_TMF_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_TMF_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair reserved0;
+	__le32 itt;
+	__le32 rtt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 reserved4[3];
+};
+
+struct iscsi_response_hdr {
+	u8 hdr_status;
+	u8 hdr_response;
+	u8 hdr_flags;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 snack_tag;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 exp_data_sn;
+	__le32 bi_residual_count;
+	__le32 residual_count;
+};
+
+struct iscsi_reject_hdr {
+	u8 reserved4;
+	u8 hdr_reason;
+	u8 hdr_flags;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_REJECT_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_REJECT_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair reserved0;
+	__le32 reserved1;
+	__le32 reserved2;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 data_sn;
+	__le32 reserved3[2];
+};
+
+union iscsi_task_hdr {
+	struct iscsi_common_hdr common;
+	struct data_hdr data;
+	struct iscsi_cmd_hdr cmd;
+	struct iscsi_ext_cdb_cmd_hdr ext_cdb_cmd;
+	struct iscsi_login_req_hdr login_req;
+	struct iscsi_logout_req_hdr logout_req;
+	struct iscsi_data_out_hdr data_out;
+	struct iscsi_data_in_hdr data_in;
+	struct iscsi_r2t_hdr r2t;
+	struct iscsi_nop_out_hdr nop_out;
+	struct iscsi_nop_in_hdr nop_in;
+	struct iscsi_login_response_hdr login_response;
+	struct iscsi_logout_response_hdr logout_response;
+	struct iscsi_text_request_hdr text_request;
+	struct iscsi_text_response_hdr text_response;
+	struct iscsi_tmf_request_hdr tmf_request;
+	struct iscsi_tmf_response_hdr tmf_response;
+	struct iscsi_response_hdr response;
+	struct iscsi_reject_hdr reject;
+	struct iscsi_async_msg_hdr async_msg;
+};
+
+struct iscsi_cqe_common {
+	__le16 conn_id;
+	u8 cqe_type;
+	union cqe_error_status error_bitmap;
+	__le32 reserved[3];
+	union iscsi_task_hdr iscsi_hdr;
+};
+
+struct iscsi_cqe_solicited {
+	__le16 conn_id;
+	u8 cqe_type;
+	union cqe_error_status error_bitmap;
+	__le16 itid;
+	u8 task_type;
+	u8 fw_dbg_field;
+	__le32 reserved1[2];
+	union iscsi_task_hdr iscsi_hdr;
+};
+
+struct iscsi_cqe_unsolicited {
+	__le16 conn_id;
+	u8 cqe_type;
+	union cqe_error_status error_bitmap;
+	__le16 reserved0;
+	u8 reserved1;
+	u8 unsol_cqe_type;
+	struct regpair rqe_opaque;
+	union iscsi_task_hdr iscsi_hdr;
+};
+
+union iscsi_cqe {
+	struct iscsi_cqe_common cqe_common;
+	struct iscsi_cqe_solicited cqe_solicited;
+	struct iscsi_cqe_unsolicited cqe_unsolicited;
+};
+
+enum iscsi_cqes_type {
+	ISCSI_CQE_TYPE_SOLICITED = 1,
+	ISCSI_CQE_TYPE_UNSOLICITED,
+	ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE
+	   ,
+	ISCSI_CQE_TYPE_TASK_CLEANUP,
+	ISCSI_CQE_TYPE_DUMMY,
+	MAX_ISCSI_CQES_TYPE
+};
+
+enum iscsi_cqe_unsolicited_type {
+	ISCSI_CQE_UNSOLICITED_NONE,
+	ISCSI_CQE_UNSOLICITED_SINGLE,
+	ISCSI_CQE_UNSOLICITED_FIRST,
+	ISCSI_CQE_UNSOLICITED_MIDDLE,
+	ISCSI_CQE_UNSOLICITED_LAST,
+	MAX_ISCSI_CQE_UNSOLICITED_TYPE
+};
+
+struct iscsi_virt_sgl_ctx {
+	struct regpair sgl_base;
+	struct regpair dsgl_base;
+	__le32 sgl_initial_offset;
+	__le32 dsgl_initial_offset;
+	__le32 dsgl_curr_offset[2];
+};
+
+struct iscsi_sgl_var_params {
+	u8 sgl_ptr;
+	u8 dsgl_ptr;
+	__le16 sge_offset;
+	__le16 dsge_offset;
+};
+
+struct iscsi_phys_sgl_ctx {
+	struct regpair sgl_base;
+	struct regpair dsgl_base;
+	u8 sgl_size;
+	u8 dsgl_size;
+	__le16 reserved;
+	struct iscsi_sgl_var_params var_params[2];
+};
+
+union iscsi_data_desc_ctx {
+	struct iscsi_virt_sgl_ctx virt_sgl;
+	struct iscsi_phys_sgl_ctx phys_sgl;
+	struct iscsi_cached_sge_ctx cached_sge;
+};
+
+struct iscsi_debug_modes {
+	u8 flags;
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RX_CONN_ERROR_MASK         0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RX_CONN_ERROR_SHIFT        0
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_RESET_MASK            0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_RESET_SHIFT           1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_FIN_MASK              0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_FIN_SHIFT             2
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_CLEANUP_MASK          0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_CLEANUP_SHIFT         3
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_MASK  0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_SHIFT 4
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_MASK              0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_SHIFT             5
+#define ISCSI_DEBUG_MODES_RESERVED0_MASK                       0x3
+#define ISCSI_DEBUG_MODES_RESERVED0_SHIFT                      6
+};
+
+struct iscsi_dif_flags {
+	u8 flags;
+#define ISCSI_DIF_FLAGS_PROT_INTERVAL_SIZE_LOG_MASK  0xF
+#define ISCSI_DIF_FLAGS_PROT_INTERVAL_SIZE_LOG_SHIFT 0
+#define ISCSI_DIF_FLAGS_DIF_TO_PEER_MASK             0x1
+#define ISCSI_DIF_FLAGS_DIF_TO_PEER_SHIFT            4
+#define ISCSI_DIF_FLAGS_HOST_INTERFACE_MASK          0x7
+#define ISCSI_DIF_FLAGS_HOST_INTERFACE_SHIFT         5
+};
+
+enum iscsi_eqe_opcode {
+	ISCSI_EVENT_TYPE_INIT_FUNC = 0,
+	ISCSI_EVENT_TYPE_DESTROY_FUNC,
+	ISCSI_EVENT_TYPE_OFFLOAD_CONN,
+	ISCSI_EVENT_TYPE_UPDATE_CONN,
+	ISCSI_EVENT_TYPE_CLEAR_SQ,
+	ISCSI_EVENT_TYPE_TERMINATE_CONN,
+	ISCSI_EVENT_TYPE_ASYN_CONNECT_COMPLETE,
+	ISCSI_EVENT_TYPE_ASYN_TERMINATE_DONE,
+	RESERVED8,
+	RESERVED9,
+	ISCSI_EVENT_TYPE_START_OF_ERROR_TYPES = 10,
+	ISCSI_EVENT_TYPE_ASYN_ABORT_RCVD,
+	ISCSI_EVENT_TYPE_ASYN_CLOSE_RCVD,
+	ISCSI_EVENT_TYPE_ASYN_SYN_RCVD,
+	ISCSI_EVENT_TYPE_ASYN_MAX_RT_TIME,
+	ISCSI_EVENT_TYPE_ASYN_MAX_RT_CNT,
+	ISCSI_EVENT_TYPE_ASYN_MAX_KA_PROBES_CNT,
+	ISCSI_EVENT_TYPE_ASYN_FIN_WAIT2,
+	ISCSI_EVENT_TYPE_ISCSI_CONN_ERROR,
+	ISCSI_EVENT_TYPE_TCP_CONN_ERROR,
+	ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES,
+	MAX_ISCSI_EQE_OPCODE
+};
+
+enum iscsi_error_types {
+	ISCSI_STATUS_NONE = 0,
+	ISCSI_CQE_ERROR_UNSOLICITED_RCV_ON_INVALID_CONN = 1,
+	ISCSI_CONN_ERROR_TASK_CID_MISMATCH,
+	ISCSI_CONN_ERROR_TASK_NOT_VALID,
+	ISCSI_CONN_ERROR_RQ_RING_IS_FULL,
+	ISCSI_CONN_ERROR_CMDQ_RING_IS_FULL,
+	ISCSI_CONN_ERROR_HQE_CACHING_FAILED,
+	ISCSI_CONN_ERROR_HEADER_DIGEST_ERROR,
+	ISCSI_CONN_ERROR_LOCAL_COMPLETION_ERROR,
+	ISCSI_CONN_ERROR_DATA_OVERRUN,
+	ISCSI_CONN_ERROR_OUT_OF_SGES_ERROR,
+	ISCSI_CONN_ERROR_TCP_SEG_PROC_URG_ERROR,
+	ISCSI_CONN_ERROR_TCP_SEG_PROC_IP_OPTIONS_ERROR,
+	ISCSI_CONN_ERROR_TCP_SEG_PROC_CONNECT_INVALID_WS_OPTION,
+	ISCSI_CONN_ERROR_TCP_IP_FRAGMENT_ERROR,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_LEN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_TYPE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_ITT_OUT_OF_RANGE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_TTT_OUT_OF_RANGE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SEG_LEN_EXCEEDS_PDU_SIZE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_OPCODE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_OPCODE_BEFORE_UPDATE,
+	ISCSI_CONN_ERROR_UNVALID_NOPIN_DSL,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_CARRIES_NO_DATA,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_IN_TTT,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_OUT_ITT,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_TTT,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_BUFFER_OFFSET,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_BUFFER_OFFSET_OOO,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_SN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_0,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_1,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_2,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_LUN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_F_BIT_ZERO,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_F_BIT_ZERO_S_BIT_ONE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_EXP_STAT_SN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DSL_NOT_ZERO,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_DSL,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SEG_LEN_TOO_BIG,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_OUTSTANDING_R2T_COUNT,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DIF_TX,
+	ISCSI_CONN_ERROR_SENSE_DATA_LENGTH,
+	ISCSI_CONN_ERROR_DATA_PLACEMENT_ERROR,
+	ISCSI_ERROR_UNKNOWN,
+	MAX_ISCSI_ERROR_TYPES
+};
+
+struct iscsi_mflags {
+	u8 mflags;
+#define ISCSI_MFLAGS_SLOW_IO_MASK     0x1
+#define ISCSI_MFLAGS_SLOW_IO_SHIFT    0
+#define ISCSI_MFLAGS_SINGLE_SGE_MASK  0x1
+#define ISCSI_MFLAGS_SINGLE_SGE_SHIFT 1
+#define ISCSI_MFLAGS_RESERVED_MASK    0x3F
+#define ISCSI_MFLAGS_RESERVED_SHIFT   2
+};
+
+struct iscsi_sgl {
+	struct regpair sgl_addr;
+	__le16 updated_sge_size;
+	__le16 updated_sge_offset;
+	__le32 byte_offset;
+};
+
+union iscsi_mstorm_sgl {
+	struct iscsi_sgl sgl_struct;
+	struct iscsi_sge single_sge;
+};
+
+enum iscsi_ramrod_cmd_id {
+	ISCSI_RAMROD_CMD_ID_UNUSED = 0,
+	ISCSI_RAMROD_CMD_ID_INIT_FUNC = 1,
+	ISCSI_RAMROD_CMD_ID_DESTROY_FUNC = 2,
+	ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN = 3,
+	ISCSI_RAMROD_CMD_ID_UPDATE_CONN = 4,
+	ISCSI_RAMROD_CMD_ID_TERMINATION_CONN = 5,
+	ISCSI_RAMROD_CMD_ID_CLEAR_SQ = 6,
+	MAX_ISCSI_RAMROD_CMD_ID
+};
+
+struct iscsi_reg1 {
+	__le32 reg1_map;
+#define ISCSI_REG1_NUM_FAST_SGES_MASK  0x7
+#define ISCSI_REG1_NUM_FAST_SGES_SHIFT 0
+#define ISCSI_REG1_RESERVED1_MASK      0x1FFFFFFF
+#define ISCSI_REG1_RESERVED1_SHIFT     3
+};
+
+union iscsi_seq_num {
+	__le16 data_sn;
+	__le16 r2t_sn;
+};
+
+struct iscsi_spe_conn_offload {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	struct iscsi_conn_offload_params iscsi;
+	struct tcp_offload_params tcp;
+};
+
+struct iscsi_spe_conn_offload_option2 {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	struct iscsi_conn_offload_params iscsi;
+	struct tcp_offload_params_opt2 tcp;
+};
+
+struct iscsi_spe_conn_termination {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	u8 abortive;
+	u8 reserved0[7];
+	struct regpair queue_cnts_addr;
+	struct regpair query_params_addr;
+};
+
+struct iscsi_spe_func_dstry {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct iscsi_spe_func_init {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 half_way_close_timeout;
+	u8 num_sq_pages_in_ring;
+	u8 num_r2tq_pages_in_ring;
+	u8 num_uhq_pages_in_ring;
+	u8 ll2_rx_queue_id;
+	u8 ooo_enable;
+	struct iscsi_debug_modes debug_mode;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
+	__le32 reserved4;
+	struct scsi_init_func_params func_params;
+	struct scsi_init_func_queues q_params;
+};
+
+struct ystorm_iscsi_task_state {
+	union iscsi_data_desc_ctx sgl_ctx_union;
+	__le32 buffer_offset[2];
+	__le16 bytes_nxt_dif;
+	__le16 rxmit_bytes_nxt_dif;
+	union iscsi_seq_num seq_num_union;
+	u8 dif_bytes_leftover;
+	u8 rxmit_dif_bytes_leftover;
+	__le16 reuse_count;
+	struct iscsi_dif_flags dif_flags;
+	u8 local_comp;
+	__le32 exp_r2t_sn;
+	__le32 sgl_offset[2];
+};
+
+struct ystorm_iscsi_task_st_ctx {
+	struct ystorm_iscsi_task_state state;
+	union iscsi_task_hdr pdu_hdr;
+};
+
+struct ystorm_iscsi_task_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	__le16 word0;
+	u8 flags0;
+#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK     0xF
+#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT    0
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK        0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT       4
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK        0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT       5
+#define YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK       0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT      6
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK        0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT       7
+	u8 flags1;
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK         0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT        0
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1_MASK         0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT        2
+#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_MASK  0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_SHIFT 4
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK       0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT      6
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK       0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT      7
+	u8 flags2;
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK        0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT       0
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT    1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT    2
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT    3
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT    4
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT    5
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT    6
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT    7
+	u8 byte2;
+	__le32 TTT;
+	u8 byte3;
+	u8 byte4;
+	__le16 word1;
+};
+
+struct mstorm_iscsi_task_ag_ctx {
+	u8 cdu_validation;
+	u8 byte1;
+	__le16 task_cid;
+	u8 flags0;
+#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK     0xF
+#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT    0
+#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK        0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT       4
+#define MSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK                0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT               5
+#define MSTORM_ISCSI_TASK_AG_CTX_VALID_MASK               0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT              6
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK   0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_SHIFT  7
+	u8 flags1;
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_MASK     0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_SHIFT    0
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1_MASK                 0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT                2
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2_MASK                 0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT                4
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_MASK  0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_SHIFT 6
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK               0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT              7
+	u8 flags2;
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK               0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT              0
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT            1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT            2
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT            3
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT            4
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT            5
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT            6
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT            7
+	u8 byte2;
+	__le32 reg0;
+	u8 byte3;
+	u8 byte4;
+	__le16 word1;
+};
+
+struct ustorm_iscsi_task_ag_ctx {
+	u8 reserved;
+	u8 state;
+	__le16 icid;
+	u8 flags0;
+#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK        0xF
+#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT       0
+#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK           0x1
+#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT          4
+#define USTORM_ISCSI_TASK_AG_CTX_BIT1_MASK                   0x1
+#define USTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT                  5
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_MASK          0x3
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT         6
+	u8 flags1;
+#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_MASK              0x3
+#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_SHIFT             0
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_MASK               0x3
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_SHIFT              2
+#define USTORM_ISCSI_TASK_AG_CTX_CF3_MASK                    0x3
+#define USTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT                   4
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_MASK           0x3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_SHIFT          6
+	u8 flags2;
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_MASK       0x1
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_SHIFT      0
+#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_MASK     0x1
+#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_SHIFT    1
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_MASK            0x1
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_SHIFT           2
+#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK                  0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT                 3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK        0x1
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT       4
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_MASK  0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_SHIFT 5
+#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT               6
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_MASK    0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT   7
+	u8 flags3;
+#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT               0
+#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT               1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT               2
+#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT               3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_MASK         0xF
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT        4
+	__le32 dif_err_intervals;
+	__le32 dif_error_1st_interval;
+	__le32 rcv_cont_len;
+	__le32 exp_cont_len;
+	__le32 total_data_acked;
+	__le32 exp_data_acked;
+	u8 next_tid_valid;
+	u8 byte3;
+	__le16 word1;
+	__le16 next_tid;
+	__le16 word3;
+	__le32 hdr_residual_count;
+	__le32 exp_r2t_sn;
+};
+
+struct mstorm_iscsi_task_st_ctx {
+	union iscsi_mstorm_sgl sgl_union;
+	struct iscsi_dif_flags dif_flags;
+	struct iscsi_mflags flags;
+	u8 sgl_size;
+	u8 host_sge_index;
+	__le16 dix_cur_sge_offset;
+	__le16 dix_cur_sge_size;
+	__le32 data_offset_rtid;
+	u8 dif_offset;
+	u8 dix_sgl_size;
+	u8 dix_sge_index;
+	u8 task_type;
+	struct regpair sense_db;
+	struct regpair dix_sgl_cur_sge;
+	__le32 rem_task_size;
+	__le16 reuse_count;
+	__le16 dif_data_residue;
+	u8 reserved0[4];
+	__le32 reserved1[1];
+};
+
+struct ustorm_iscsi_task_st_ctx {
+	__le32 rem_rcv_len;
+	__le32 exp_data_transfer_len;
+	__le32 exp_data_sn;
+	struct regpair lun;
+	struct iscsi_reg1 reg1;
+	u8 flags2;
+#define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_MASK             0x1
+#define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_SHIFT            0
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_MASK             0x7F
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_SHIFT            1
+	u8 reserved2;
+	__le16 reserved3;
+	__le32 reserved4;
+	__le32 reserved5;
+	__le32 reserved6;
+	__le32 reserved7;
+	u8 task_type;
+	u8 error_flags;
+#define USTORM_ISCSI_TASK_ST_CTX_DATA_DIGEST_ERROR_MASK     0x1
+#define USTORM_ISCSI_TASK_ST_CTX_DATA_DIGEST_ERROR_SHIFT    0
+#define USTORM_ISCSI_TASK_ST_CTX_DATA_TRUNCATED_ERROR_MASK  0x1
+#define USTORM_ISCSI_TASK_ST_CTX_DATA_TRUNCATED_ERROR_SHIFT 1
+#define USTORM_ISCSI_TASK_ST_CTX_UNDER_RUN_ERROR_MASK       0x1
+#define USTORM_ISCSI_TASK_ST_CTX_UNDER_RUN_ERROR_SHIFT      2
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED8_MASK             0x1F
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED8_SHIFT            3
+	u8 flags;
+#define USTORM_ISCSI_TASK_ST_CTX_CQE_WRITE_MASK             0x3
+#define USTORM_ISCSI_TASK_ST_CTX_CQE_WRITE_SHIFT            0
+#define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_MASK            0x1
+#define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_SHIFT           2
+#define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_MASK        0x1
+#define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_SHIFT       3
+#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_MASK   0x1
+#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_SHIFT  4
+#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_MASK        0x1
+#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_SHIFT       5
+#define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_MASK         0x1
+#define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_SHIFT        6
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_MASK             0x1
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_SHIFT            7
+	u8 cq_rss_number;
+};
+
+struct iscsi_task_context {
+	struct ystorm_iscsi_task_st_ctx ystorm_st_context;
+	struct regpair ystorm_st_padding[2];
+	struct ystorm_iscsi_task_ag_ctx ystorm_ag_context;
+	struct regpair ystorm_ag_padding[2];
+	struct tdif_task_context tdif_context;
+	struct mstorm_iscsi_task_ag_ctx mstorm_ag_context;
+	struct regpair mstorm_ag_padding[2];
+	struct ustorm_iscsi_task_ag_ctx ustorm_ag_context;
+	struct mstorm_iscsi_task_st_ctx mstorm_st_context;
+	struct ustorm_iscsi_task_st_ctx ustorm_st_context;
+	struct rdif_task_context rdif_context;
+};
+
+enum iscsi_task_type {
+	ISCSI_TASK_TYPE_INITIATOR_WRITE,
+	ISCSI_TASK_TYPE_INITIATOR_READ,
+	ISCSI_TASK_TYPE_MIDPATH,
+	ISCSI_TASK_TYPE_UNSOLIC,
+	ISCSI_TASK_TYPE_EXCHCLEANUP,
+	ISCSI_TASK_TYPE_IRRELEVANT,
+	ISCSI_TASK_TYPE_TARGET_WRITE,
+	ISCSI_TASK_TYPE_TARGET_READ,
+	ISCSI_TASK_TYPE_TARGET_RESPONSE,
+	ISCSI_TASK_TYPE_LOGIN_RESPONSE,
+	MAX_ISCSI_TASK_TYPE
+};
+
+union iscsi_ttt_txlen_union {
+	__le32 desired_tx_len;
+	__le32 ttt;
+};
+
+struct iscsi_uhqe {
+	__le32 reg1;
+#define ISCSI_UHQE_PDU_PAYLOAD_LEN_MASK     0xFFFFF
+#define ISCSI_UHQE_PDU_PAYLOAD_LEN_SHIFT    0
+#define ISCSI_UHQE_LOCAL_COMP_MASK          0x1
+#define ISCSI_UHQE_LOCAL_COMP_SHIFT         20
+#define ISCSI_UHQE_TOGGLE_BIT_MASK          0x1
+#define ISCSI_UHQE_TOGGLE_BIT_SHIFT         21
+#define ISCSI_UHQE_PURE_PAYLOAD_MASK        0x1
+#define ISCSI_UHQE_PURE_PAYLOAD_SHIFT       22
+#define ISCSI_UHQE_LOGIN_RESPONSE_PDU_MASK  0x1
+#define ISCSI_UHQE_LOGIN_RESPONSE_PDU_SHIFT 23
+#define ISCSI_UHQE_TASK_ID_HI_MASK          0xFF
+#define ISCSI_UHQE_TASK_ID_HI_SHIFT         24
+	__le32 reg2;
+#define ISCSI_UHQE_BUFFER_OFFSET_MASK       0xFFFFFF
+#define ISCSI_UHQE_BUFFER_OFFSET_SHIFT      0
+#define ISCSI_UHQE_TASK_ID_LO_MASK          0xFF
+#define ISCSI_UHQE_TASK_ID_LO_SHIFT         24
+};
+
+struct iscsi_wqe_field {
+	__le32 contlen_cdbsize_field;
+#define ISCSI_WQE_FIELD_CONT_LEN_MASK  0xFFFFFF
+#define ISCSI_WQE_FIELD_CONT_LEN_SHIFT 0
+#define ISCSI_WQE_FIELD_CDB_SIZE_MASK  0xFF
+#define ISCSI_WQE_FIELD_CDB_SIZE_SHIFT 24
+};
+
+union iscsi_wqe_field_union {
+	struct iscsi_wqe_field cont_field;
+	__le32 prev_tid;
+};
+
+struct iscsi_wqe {
+	__le16 task_id;
+	u8 flags;
+#define ISCSI_WQE_WQE_TYPE_MASK        0x7
+#define ISCSI_WQE_WQE_TYPE_SHIFT       0
+#define ISCSI_WQE_NUM_FAST_SGES_MASK   0x7
+#define ISCSI_WQE_NUM_FAST_SGES_SHIFT  3
+#define ISCSI_WQE_PTU_INVALIDATE_MASK  0x1
+#define ISCSI_WQE_PTU_INVALIDATE_SHIFT 6
+#define ISCSI_WQE_RESPONSE_MASK        0x1
+#define ISCSI_WQE_RESPONSE_SHIFT       7
+	struct iscsi_dif_flags prot_flags;
+	union iscsi_wqe_field_union cont_prevtid_union;
+};
+
+enum iscsi_wqe_type {
+	ISCSI_WQE_TYPE_NORMAL,
+	ISCSI_WQE_TYPE_TASK_CLEANUP,
+	ISCSI_WQE_TYPE_MIDDLE_PATH,
+	ISCSI_WQE_TYPE_LOGIN,
+	ISCSI_WQE_TYPE_FIRST_R2T_CONT,
+	ISCSI_WQE_TYPE_NONFIRST_R2T_CONT,
+	ISCSI_WQE_TYPE_RESPONSE,
+	MAX_ISCSI_WQE_TYPE
+};
+
+struct iscsi_xhqe {
+	union iscsi_ttt_txlen_union ttt_or_txlen;
+	__le32 exp_stat_sn;
+	struct iscsi_dif_flags prot_flags;
+	u8 total_ahs_length;
+	u8 opcode;
+	u8 flags;
+#define ISCSI_XHQE_NUM_FAST_SGES_MASK  0x7
+#define ISCSI_XHQE_NUM_FAST_SGES_SHIFT 0
+#define ISCSI_XHQE_FINAL_MASK          0x1
+#define ISCSI_XHQE_FINAL_SHIFT         3
+#define ISCSI_XHQE_SUPER_IO_MASK       0x1
+#define ISCSI_XHQE_SUPER_IO_SHIFT      4
+#define ISCSI_XHQE_STATUS_BIT_MASK     0x1
+#define ISCSI_XHQE_STATUS_BIT_SHIFT    5
+#define ISCSI_XHQE_RESERVED_MASK       0x3
+#define ISCSI_XHQE_RESERVED_SHIFT      6
+	union iscsi_seq_num seq_num_union;
+	__le16 reserved1;
+};
+
+struct mstorm_iscsi_stats_drv {
+	struct regpair iscsi_rx_dropped_pdus_task_not_valid;
+};
+
+struct ooo_opaque {
+	__le32 cid;
+	u8 drop_isle;
+	u8 drop_size;
+	u8 ooo_opcode;
+	u8 ooo_isle;
+};
+
+struct pstorm_iscsi_stats_drv {
+	struct regpair iscsi_tx_bytes_cnt;
+	struct regpair iscsi_tx_packet_cnt;
+};
+
+struct tstorm_iscsi_stats_drv {
+	struct regpair iscsi_rx_bytes_cnt;
+	struct regpair iscsi_rx_packet_cnt;
+	struct regpair iscsi_rx_new_ooo_isle_events_cnt;
+	__le32 iscsi_cmdq_threshold_cnt;
+	__le32 iscsi_rq_threshold_cnt;
+	__le32 iscsi_immq_threshold_cnt;
+};
+
+struct ustorm_iscsi_stats_drv {
+	struct regpair iscsi_rx_data_pdu_cnt;
+	struct regpair iscsi_rx_r2t_pdu_cnt;
+	struct regpair iscsi_rx_total_pdu_cnt;
+};
+
+struct xstorm_iscsi_stats_drv {
+	struct regpair iscsi_tx_go_to_slow_start_event_cnt;
+	struct regpair iscsi_tx_fast_retransmit_event_cnt;
+};
+
+struct ystorm_iscsi_stats_drv {
+	struct regpair iscsi_tx_data_pdu_cnt;
+	struct regpair iscsi_tx_r2t_pdu_cnt;
+	struct regpair iscsi_tx_total_pdu_cnt;
+};
+
+struct iscsi_db_data {
+	u8 params;
+#define ISCSI_DB_DATA_DEST_MASK         0x3
+#define ISCSI_DB_DATA_DEST_SHIFT        0
+#define ISCSI_DB_DATA_AGG_CMD_MASK      0x3
+#define ISCSI_DB_DATA_AGG_CMD_SHIFT     2
+#define ISCSI_DB_DATA_BYPASS_EN_MASK    0x1
+#define ISCSI_DB_DATA_BYPASS_EN_SHIFT   4
+#define ISCSI_DB_DATA_RESERVED_MASK     0x1
+#define ISCSI_DB_DATA_RESERVED_SHIFT    5
+#define ISCSI_DB_DATA_AGG_VAL_SEL_MASK  0x3
+#define ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT 6
+	u8 agg_flags;
+	__le16 sq_prod;
+};
+
+struct tstorm_iscsi_task_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	__le16 word0;
+	u8 flags0;
+#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK  0xF
+#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT 0
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT    4
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT    5
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_SHIFT    6
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT    7
+	u8 flags1;
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT    0
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_SHIFT    1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT     2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT     4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT     6
+	u8 flags2;
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT     0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4_SHIFT     2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5_SHIFT     4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6_SHIFT     6
+	u8 flags3;
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7_SHIFT     0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT   2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT   3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT   4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT   5
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_SHIFT   6
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_SHIFT   7
+	u8 flags4;
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_SHIFT   0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_SHIFT   1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 2
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 3
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 4
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 5
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 6
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 7
+	u8 byte2;
+	__le16 word1;
+	__le32 reg0;
+	u8 byte3;
+	u8 byte4;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg1;
+	__le32 reg2;
+};
+
+#endif /* __ISCSI_COMMON__ */
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 5f8fcaaa6504..7e441bdeabdc 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -25,10 +25,9 @@
 				} while (0)
 
 #define HILO_GEN(hi, lo, type)  ((((type)(hi)) << 32) + (lo))
-#define HILO_DMA(hi, lo)        HILO_GEN(hi, lo, dma_addr_t)
 #define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64)
-#define HILO_DMA_REGPAIR(regpair)       (HILO_DMA(regpair.hi, regpair.lo))
 #define HILO_64_REGPAIR(regpair)        (HILO_64(regpair.hi, regpair.lo))
+#define HILO_DMA_REGPAIR(regpair)	((dma_addr_t)HILO_64_REGPAIR(regpair))
 
 enum qed_chain_mode {
 	/* Each Page contains a next pointer at its end */
@@ -47,16 +46,56 @@ enum qed_chain_use_mode {
 	QED_CHAIN_USE_TO_CONSUME_PRODUCE,	/* Chain starts empty */
 };
 
+enum qed_chain_cnt_type {
+	/* The chain's size/prod/cons are kept in 16-bit variables */
+	QED_CHAIN_CNT_TYPE_U16,
+
+	/* The chain's size/prod/cons are kept in 32-bit variables  */
+	QED_CHAIN_CNT_TYPE_U32,
+};
+
 struct qed_chain_next {
 	struct regpair	next_phys;
 	void		*next_virt;
 };
 
+struct qed_chain_pbl_u16 {
+	u16 prod_page_idx;
+	u16 cons_page_idx;
+};
+
+struct qed_chain_pbl_u32 {
+	u32 prod_page_idx;
+	u32 cons_page_idx;
+};
+
 struct qed_chain_pbl {
+	/* Base address of a pre-allocated buffer for pbl */
 	dma_addr_t	p_phys_table;
 	void		*p_virt_table;
-	u16		prod_page_idx;
-	u16		cons_page_idx;
+
+	/* Table for keeping the virtual addresses of the chain pages,
+	 * respectively to the physical addresses in the pbl table.
+	 */
+	void **pp_virt_addr_tbl;
+
+	/* Index to current used page by producer/consumer */
+	union {
+		struct qed_chain_pbl_u16 pbl16;
+		struct qed_chain_pbl_u32 pbl32;
+	} u;
+};
+
+struct qed_chain_u16 {
+	/* Cyclic index of next element to produce/consme */
+	u16 prod_idx;
+	u16 cons_idx;
+};
+
+struct qed_chain_u32 {
+	/* Cyclic index of next element to produce/consme */
+	u32 prod_idx;
+	u32 cons_idx;
 };
 
 struct qed_chain {
@@ -64,13 +103,25 @@ struct qed_chain {
 	dma_addr_t		p_phys_addr;
 	void			*p_prod_elem;
 	void			*p_cons_elem;
-	u16			page_cnt;
+
 	enum qed_chain_mode	mode;
 	enum qed_chain_use_mode intended_use; /* used to produce/consume */
-	u16			capacity; /*< number of _usable_ elements */
-	u16			size; /* number of elements */
-	u16			prod_idx;
-	u16			cons_idx;
+	enum qed_chain_cnt_type cnt_type;
+
+	union {
+		struct qed_chain_u16 chain16;
+		struct qed_chain_u32 chain32;
+	} u;
+
+	u32 page_cnt;
+
+	/* Number of elements - capacity is for usable elements only,
+	 * while size will contain total number of elements [for entire chain].
+	 */
+	u32 capacity;
+	u32 size;
+
+	/* Elements information for fast calculations */
 	u16			elem_per_page;
 	u16			elem_per_page_mask;
 	u16			elem_unusable;
@@ -96,66 +147,69 @@ struct qed_chain {
 #define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \
 	DIV_ROUND_UP(elem_cnt, USABLE_ELEMS_PER_PAGE(elem_size, mode))
 
+#define is_chain_u16(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U16)
+#define is_chain_u32(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U32)
+
 /* Accessors */
 static inline u16 qed_chain_get_prod_idx(struct qed_chain *p_chain)
 {
-	return p_chain->prod_idx;
+	return p_chain->u.chain16.prod_idx;
 }
 
 static inline u16 qed_chain_get_cons_idx(struct qed_chain *p_chain)
 {
-	return p_chain->cons_idx;
+	return p_chain->u.chain16.cons_idx;
+}
+
+static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain)
+{
+	return p_chain->u.chain32.cons_idx;
 }
 
 static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain)
 {
 	u16 used;
 
-	/* we don't need to trancate upon assignmet, as we assign u32->u16 */
-	used = ((u32)0x10000u + (u32)(p_chain->prod_idx)) -
-		(u32)p_chain->cons_idx;
+	used = (u16) (((u32)0x10000 +
+		       (u32)p_chain->u.chain16.prod_idx) -
+		      (u32)p_chain->u.chain16.cons_idx);
 	if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
-		used -= p_chain->prod_idx / p_chain->elem_per_page -
-			p_chain->cons_idx / p_chain->elem_per_page;
+		used -= p_chain->u.chain16.prod_idx / p_chain->elem_per_page -
+		    p_chain->u.chain16.cons_idx / p_chain->elem_per_page;
 
-	return p_chain->capacity - used;
+	return (u16)(p_chain->capacity - used);
 }
 
-static inline u8 qed_chain_is_full(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain)
 {
-	return qed_chain_get_elem_left(p_chain) == p_chain->capacity;
-}
+	u32 used;
 
-static inline u8 qed_chain_is_empty(struct qed_chain *p_chain)
-{
-	return qed_chain_get_elem_left(p_chain) == 0;
-}
+	used = (u32) (((u64)0x100000000ULL +
+		       (u64)p_chain->u.chain32.prod_idx) -
+		      (u64)p_chain->u.chain32.cons_idx);
+	if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
+		used -= p_chain->u.chain32.prod_idx / p_chain->elem_per_page -
+		    p_chain->u.chain32.cons_idx / p_chain->elem_per_page;
 
-static inline u16 qed_chain_get_elem_per_page(
-	struct qed_chain *p_chain)
-{
-	return p_chain->elem_per_page;
+	return p_chain->capacity - used;
 }
 
-static inline u16 qed_chain_get_usable_per_page(
-	struct qed_chain *p_chain)
+static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain)
 {
 	return p_chain->usable_per_page;
 }
 
-static inline u16 qed_chain_get_unusable_per_page(
-	struct qed_chain *p_chain)
+static inline u16 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
 {
 	return p_chain->elem_unusable;
 }
 
-static inline u16 qed_chain_get_size(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain)
 {
-	return p_chain->size;
+	return p_chain->page_cnt;
 }
 
-static inline dma_addr_t
-qed_chain_get_pbl_phys(struct qed_chain *p_chain)
+static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
 {
 	return p_chain->pbl.p_phys_table;
 }
@@ -172,64 +226,62 @@ qed_chain_get_pbl_phys(struct qed_chain *p_chain)
  */
 static inline void
 qed_chain_advance_page(struct qed_chain *p_chain,
-		       void **p_next_elem,
-		       u16 *idx_to_inc,
-		       u16 *page_to_inc)
+		       void **p_next_elem, void *idx_to_inc, void *page_to_inc)
 
 {
+	struct qed_chain_next *p_next = NULL;
+	u32 page_index = 0;
 	switch (p_chain->mode) {
 	case QED_CHAIN_MODE_NEXT_PTR:
-	{
-		struct qed_chain_next *p_next = *p_next_elem;
+		p_next = *p_next_elem;
 		*p_next_elem = p_next->next_virt;
-		*idx_to_inc += p_chain->elem_unusable;
+		if (is_chain_u16(p_chain))
+			*(u16 *)idx_to_inc += p_chain->elem_unusable;
+		else
+			*(u32 *)idx_to_inc += p_chain->elem_unusable;
 		break;
-	}
 	case QED_CHAIN_MODE_SINGLE:
 		*p_next_elem = p_chain->p_virt_addr;
 		break;
 
 	case QED_CHAIN_MODE_PBL:
-		/* It is assumed pages are sequential, next element needs
-		 * to change only when passing going back to first from last.
-		 */
-		if (++(*page_to_inc) == p_chain->page_cnt) {
-			*page_to_inc = 0;
-			*p_next_elem = p_chain->p_virt_addr;
+		if (is_chain_u16(p_chain)) {
+			if (++(*(u16 *)page_to_inc) == p_chain->page_cnt)
+				*(u16 *)page_to_inc = 0;
+			page_index = *(u16 *)page_to_inc;
+		} else {
+			if (++(*(u32 *)page_to_inc) == p_chain->page_cnt)
+				*(u32 *)page_to_inc = 0;
+			page_index = *(u32 *)page_to_inc;
 		}
+		*p_next_elem = p_chain->pbl.pp_virt_addr_tbl[page_index];
 	}
 }
 
 #define is_unusable_idx(p, idx)	\
-	(((p)->idx & (p)->elem_per_page_mask) == (p)->usable_per_page)
+	(((p)->u.chain16.idx & (p)->elem_per_page_mask) == (p)->usable_per_page)
+
+#define is_unusable_idx_u32(p, idx) \
+	(((p)->u.chain32.idx & (p)->elem_per_page_mask) == (p)->usable_per_page)
+#define is_unusable_next_idx(p, idx)				 \
+	((((p)->u.chain16.idx + 1) & (p)->elem_per_page_mask) == \
+	 (p)->usable_per_page)
 
-#define is_unusable_next_idx(p, idx) \
-	((((p)->idx + 1) & (p)->elem_per_page_mask) == (p)->usable_per_page)
+#define is_unusable_next_idx_u32(p, idx)			 \
+	((((p)->u.chain32.idx + 1) & (p)->elem_per_page_mask) == \
+	 (p)->usable_per_page)
 
-#define test_ans_skip(p, idx)				\
+#define test_and_skip(p, idx)						   \
 	do {						\
-		if (is_unusable_idx(p, idx)) {		\
-			(p)->idx += (p)->elem_unusable;	\
+		if (is_chain_u16(p)) {					   \
+			if (is_unusable_idx(p, idx))			   \
+				(p)->u.chain16.idx += (p)->elem_unusable;  \
+		} else {						   \
+			if (is_unusable_idx_u32(p, idx))		   \
+				(p)->u.chain32.idx += (p)->elem_unusable;  \
 		}					\
 	} while (0)
 
-/**
- * @brief qed_chain_return_multi_produced -
- *
- * A chain in which the driver "Produces" elements should use this API
- * to indicate previous produced elements are now consumed.
- *
- * @param p_chain
- * @param num
- */
-static inline void
-qed_chain_return_multi_produced(struct qed_chain *p_chain,
-				u16 num)
-{
-	p_chain->cons_idx += num;
-	test_ans_skip(p_chain, cons_idx);
-}
-
 /**
  * @brief qed_chain_return_produced -
  *
@@ -240,8 +292,11 @@ qed_chain_return_multi_produced(struct qed_chain *p_chain,
  */
 static inline void qed_chain_return_produced(struct qed_chain *p_chain)
 {
-	p_chain->cons_idx++;
-	test_ans_skip(p_chain, cons_idx);
+	if (is_chain_u16(p_chain))
+		p_chain->u.chain16.cons_idx++;
+	else
+		p_chain->u.chain32.cons_idx++;
+	test_and_skip(p_chain, cons_idx);
 }
 
 /**
@@ -257,21 +312,33 @@ static inline void qed_chain_return_produced(struct qed_chain *p_chain)
  */
 static inline void *qed_chain_produce(struct qed_chain *p_chain)
 {
-	void *ret = NULL;
-
-	if ((p_chain->prod_idx & p_chain->elem_per_page_mask) ==
-	    p_chain->next_page_mask) {
-		qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
-				       &p_chain->prod_idx,
-				       &p_chain->pbl.prod_page_idx);
+	void *p_ret = NULL, *p_prod_idx, *p_prod_page_idx;
+
+	if (is_chain_u16(p_chain)) {
+		if ((p_chain->u.chain16.prod_idx &
+		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
+			p_prod_idx = &p_chain->u.chain16.prod_idx;
+			p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx;
+			qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
+					       p_prod_idx, p_prod_page_idx);
+		}
+		p_chain->u.chain16.prod_idx++;
+	} else {
+		if ((p_chain->u.chain32.prod_idx &
+		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
+			p_prod_idx = &p_chain->u.chain32.prod_idx;
+			p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx;
+			qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
+					       p_prod_idx, p_prod_page_idx);
+		}
+		p_chain->u.chain32.prod_idx++;
 	}
 
-	ret = p_chain->p_prod_elem;
-	p_chain->prod_idx++;
+	p_ret = p_chain->p_prod_elem;
 	p_chain->p_prod_elem = (void *)(((u8 *)p_chain->p_prod_elem) +
 					p_chain->elem_size);
 
-	return ret;
+	return p_ret;
 }
 
 /**
@@ -282,9 +349,9 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
  * @param p_chain
  * @param num
  *
- * @return u16, number of unusable BDs
+ * @return number of unusable BDs
  */
-static inline u16 qed_chain_get_capacity(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain)
 {
 	return p_chain->capacity;
 }
@@ -297,11 +364,13 @@ static inline u16 qed_chain_get_capacity(struct qed_chain *p_chain)
  *
  * @param p_chain
  */
-static inline void
-qed_chain_recycle_consumed(struct qed_chain *p_chain)
+static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain)
 {
-	test_ans_skip(p_chain, prod_idx);
-	p_chain->prod_idx++;
+	test_and_skip(p_chain, prod_idx);
+	if (is_chain_u16(p_chain))
+		p_chain->u.chain16.prod_idx++;
+	else
+		p_chain->u.chain32.prod_idx++;
 }
 
 /**
@@ -316,21 +385,33 @@ qed_chain_recycle_consumed(struct qed_chain *p_chain)
  */
 static inline void *qed_chain_consume(struct qed_chain *p_chain)
 {
-	void *ret = NULL;
-
-	if ((p_chain->cons_idx & p_chain->elem_per_page_mask) ==
-	    p_chain->next_page_mask) {
+	void *p_ret = NULL, *p_cons_idx, *p_cons_page_idx;
+
+	if (is_chain_u16(p_chain)) {
+		if ((p_chain->u.chain16.cons_idx &
+		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
+			p_cons_idx = &p_chain->u.chain16.cons_idx;
+			p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx;
+			qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
+					       p_cons_idx, p_cons_page_idx);
+		}
+		p_chain->u.chain16.cons_idx++;
+	} else {
+		if ((p_chain->u.chain32.cons_idx &
+		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
+			p_cons_idx = &p_chain->u.chain32.cons_idx;
+			p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx;
 		qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
-				       &p_chain->cons_idx,
-				       &p_chain->pbl.cons_page_idx);
+					       p_cons_idx, p_cons_page_idx);
+		}
+		p_chain->u.chain32.cons_idx++;
 	}
 
-	ret = p_chain->p_cons_elem;
-	p_chain->cons_idx++;
+	p_ret = p_chain->p_cons_elem;
 	p_chain->p_cons_elem = (void *)(((u8 *)p_chain->p_cons_elem) +
 					p_chain->elem_size);
 
-	return ret;
+	return p_ret;
 }
 
 /**
@@ -340,16 +421,33 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
  */
 static inline void qed_chain_reset(struct qed_chain *p_chain)
 {
-	int i;
-
-	p_chain->prod_idx	= 0;
-	p_chain->cons_idx	= 0;
-	p_chain->p_cons_elem	= p_chain->p_virt_addr;
-	p_chain->p_prod_elem	= p_chain->p_virt_addr;
+	u32 i;
+
+	if (is_chain_u16(p_chain)) {
+		p_chain->u.chain16.prod_idx = 0;
+		p_chain->u.chain16.cons_idx = 0;
+	} else {
+		p_chain->u.chain32.prod_idx = 0;
+		p_chain->u.chain32.cons_idx = 0;
+	}
+	p_chain->p_cons_elem = p_chain->p_virt_addr;
+	p_chain->p_prod_elem = p_chain->p_virt_addr;
 
 	if (p_chain->mode == QED_CHAIN_MODE_PBL) {
-		p_chain->pbl.prod_page_idx	= p_chain->page_cnt - 1;
-		p_chain->pbl.cons_page_idx	= p_chain->page_cnt - 1;
+		/* Use (page_cnt - 1) as a reset value for the prod/cons page's
+		 * indices, to avoid unnecessary page advancing on the first
+		 * call to qed_chain_produce/consume. Instead, the indices
+		 * will be advanced to page_cnt and then will be wrapped to 0.
+		 */
+		u32 reset_val = p_chain->page_cnt - 1;
+
+		if (is_chain_u16(p_chain)) {
+			p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val;
+			p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val;
+		} else {
+			p_chain->pbl.u.pbl32.prod_page_idx = reset_val;
+			p_chain->pbl.u.pbl32.cons_page_idx = reset_val;
+		}
 	}
 
 	switch (p_chain->intended_use) {
@@ -377,168 +475,184 @@ static inline void qed_chain_reset(struct qed_chain *p_chain)
  * @param intended_use
  * @param mode
  */
-static inline void qed_chain_init(struct qed_chain *p_chain,
-				  void *p_virt_addr,
-				  dma_addr_t p_phys_addr,
-				  u16 page_cnt,
-				  u8 elem_size,
-				  enum qed_chain_use_mode intended_use,
-				  enum qed_chain_mode mode)
+static inline void qed_chain_init_params(struct qed_chain *p_chain,
+					 u32 page_cnt,
+					 u8 elem_size,
+					 enum qed_chain_use_mode intended_use,
+					 enum qed_chain_mode mode,
+					 enum qed_chain_cnt_type cnt_type)
 {
 	/* chain fixed parameters */
-	p_chain->p_virt_addr	= p_virt_addr;
-	p_chain->p_phys_addr	= p_phys_addr;
+	p_chain->p_virt_addr = NULL;
+	p_chain->p_phys_addr = 0;
 	p_chain->elem_size	= elem_size;
-	p_chain->page_cnt	= page_cnt;
+	p_chain->intended_use = intended_use;
 	p_chain->mode		= mode;
+	p_chain->cnt_type = cnt_type;
 
-	p_chain->intended_use		= intended_use;
 	p_chain->elem_per_page		= ELEMS_PER_PAGE(elem_size);
-	p_chain->usable_per_page =
-		USABLE_ELEMS_PER_PAGE(elem_size, mode);
-	p_chain->capacity		= p_chain->usable_per_page * page_cnt;
-	p_chain->size			= p_chain->elem_per_page * page_cnt;
+	p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode);
 	p_chain->elem_per_page_mask	= p_chain->elem_per_page - 1;
-
 	p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode);
-
 	p_chain->next_page_mask = (p_chain->usable_per_page &
 				   p_chain->elem_per_page_mask);
 
-	if (mode == QED_CHAIN_MODE_NEXT_PTR) {
-		struct qed_chain_next	*p_next;
-		u16			i;
-
-		for (i = 0; i < page_cnt - 1; i++) {
-			/* Increment mem_phy to the next page. */
-			p_phys_addr += QED_CHAIN_PAGE_SIZE;
-
-			/* Initialize the physical address of the next page. */
-			p_next = (struct qed_chain_next *)((u8 *)p_virt_addr +
-							   elem_size *
-							   p_chain->
-							   usable_per_page);
-
-			p_next->next_phys.lo	= DMA_LO_LE(p_phys_addr);
-			p_next->next_phys.hi	= DMA_HI_LE(p_phys_addr);
-
-			/* Initialize the virtual address of the next page. */
-			p_next->next_virt = (void *)((u8 *)p_virt_addr +
-						     QED_CHAIN_PAGE_SIZE);
-
-			/* Move to the next page. */
-			p_virt_addr = p_next->next_virt;
-		}
-
-		/* Last page's next should point to beginning of the chain */
-		p_next = (struct qed_chain_next *)((u8 *)p_virt_addr +
-						   elem_size *
-						   p_chain->usable_per_page);
+	p_chain->page_cnt = page_cnt;
+	p_chain->capacity = p_chain->usable_per_page * page_cnt;
+	p_chain->size = p_chain->elem_per_page * page_cnt;
 
-		p_next->next_phys.lo	= DMA_LO_LE(p_chain->p_phys_addr);
-		p_next->next_phys.hi	= DMA_HI_LE(p_chain->p_phys_addr);
-		p_next->next_virt	= p_chain->p_virt_addr;
-	}
-	qed_chain_reset(p_chain);
+	p_chain->pbl.p_phys_table = 0;
+	p_chain->pbl.p_virt_table = NULL;
+	p_chain->pbl.pp_virt_addr_tbl = NULL;
 }
 
 /**
- * @brief qed_chain_pbl_init - Initalizes a basic pbl chain
- *        struct
+ * @brief qed_chain_init_mem -
+ *
+ * Initalizes a basic chain struct with its chain buffers
+ *
  * @param p_chain
  * @param p_virt_addr	virtual address of allocated buffer's beginning
  * @param p_phys_addr	physical address of allocated buffer's beginning
- * @param page_cnt	number of pages in the allocated buffer
- * @param elem_size	size of each element in the chain
- * @param use_mode
- * @param p_phys_pbl	pointer to a pre-allocated side table
- *                      which will hold physical page addresses.
- * @param p_virt_pbl	pointer to a pre allocated side table
- *                      which will hold virtual page addresses.
+ *
  */
-static inline void
-qed_chain_pbl_init(struct qed_chain *p_chain,
-		   void *p_virt_addr,
-		   dma_addr_t p_phys_addr,
-		   u16 page_cnt,
-		   u8 elem_size,
-		   enum qed_chain_use_mode use_mode,
-		   dma_addr_t p_phys_pbl,
-		   dma_addr_t *p_virt_pbl)
+static inline void qed_chain_init_mem(struct qed_chain *p_chain,
+				      void *p_virt_addr, dma_addr_t p_phys_addr)
 {
-	dma_addr_t *p_pbl_dma = p_virt_pbl;
-	int i;
-
-	qed_chain_init(p_chain, p_virt_addr, p_phys_addr, page_cnt,
-		       elem_size, use_mode, QED_CHAIN_MODE_PBL);
+	p_chain->p_virt_addr = p_virt_addr;
+	p_chain->p_phys_addr = p_phys_addr;
+}
 
+/**
+ * @brief qed_chain_init_pbl_mem -
+ *
+ * Initalizes a basic chain struct with its pbl buffers
+ *
+ * @param p_chain
+ * @param p_virt_pbl	pointer to a pre allocated side table which will hold
+ *                      virtual page addresses.
+ * @param p_phys_pbl	pointer to a pre-allocated side table which will hold
+ *                      physical page addresses.
+ * @param pp_virt_addr_tbl
+ *                      pointer to a pre-allocated side table which will hold
+ *                      the virtual addresses of the chain pages.
+ *
+ */
+static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain,
+					  void *p_virt_pbl,
+					  dma_addr_t p_phys_pbl,
+					  void **pp_virt_addr_tbl)
+{
 	p_chain->pbl.p_phys_table = p_phys_pbl;
 	p_chain->pbl.p_virt_table = p_virt_pbl;
-
-	/* Fill the PBL with physical addresses*/
-	for (i = 0; i < page_cnt; i++) {
-		*p_pbl_dma = p_phys_addr;
-		p_phys_addr += QED_CHAIN_PAGE_SIZE;
-		p_pbl_dma++;
-	}
+	p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl;
 }
 
 /**
- * @brief qed_chain_set_prod - sets the prod to the given
- *        value
+ * @brief qed_chain_init_next_ptr_elem -
+ *
+ * Initalizes a next pointer element
+ *
+ * @param p_chain
+ * @param p_virt_curr	virtual address of a chain page of which the next
+ *                      pointer element is initialized
+ * @param p_virt_next	virtual address of the next chain page
+ * @param p_phys_next	physical address of the next chain page
  *
- * @param prod_idx
- * @param p_prod_elem
  */
-static inline void qed_chain_set_prod(struct qed_chain *p_chain,
-				      u16 prod_idx,
-				      void *p_prod_elem)
+static inline void
+qed_chain_init_next_ptr_elem(struct qed_chain *p_chain,
+			     void *p_virt_curr,
+			     void *p_virt_next, dma_addr_t p_phys_next)
 {
-	p_chain->prod_idx	= prod_idx;
-	p_chain->p_prod_elem	= p_prod_elem;
+	struct qed_chain_next *p_next;
+	u32 size;
+
+	size = p_chain->elem_size * p_chain->usable_per_page;
+	p_next = (struct qed_chain_next *)((u8 *)p_virt_curr + size);
+
+	DMA_REGPAIR_LE(p_next->next_phys, p_phys_next);
+
+	p_next->next_virt = p_virt_next;
 }
 
 /**
- * @brief qed_chain_get_elem -
+ * @brief qed_chain_get_last_elem -
  *
- * get a pointer to an element represented by absolute idx
+ * Returns a pointer to the last element of the chain
  *
  * @param p_chain
- * @assumption p_chain->size is a power of 2
  *
- * @return void*, a pointer to next element
+ * @return void*
  */
-static inline void *qed_chain_sge_get_elem(struct qed_chain *p_chain,
-					   u16 idx)
+static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain)
 {
-	void *ret = NULL;
-
-	if (idx >= p_chain->size)
-		return NULL;
+	struct qed_chain_next *p_next = NULL;
+	void *p_virt_addr = NULL;
+	u32 size, last_page_idx;
 
-	ret = (u8 *)p_chain->p_virt_addr + p_chain->elem_size * idx;
+	if (!p_chain->p_virt_addr)
+		goto out;
 
-	return ret;
+	switch (p_chain->mode) {
+	case QED_CHAIN_MODE_NEXT_PTR:
+		size = p_chain->elem_size * p_chain->usable_per_page;
+		p_virt_addr = p_chain->p_virt_addr;
+		p_next = (struct qed_chain_next *)((u8 *)p_virt_addr + size);
+		while (p_next->next_virt != p_chain->p_virt_addr) {
+			p_virt_addr = p_next->next_virt;
+			p_next = (struct qed_chain_next *)((u8 *)p_virt_addr +
+							   size);
+		}
+		break;
+	case QED_CHAIN_MODE_SINGLE:
+		p_virt_addr = p_chain->p_virt_addr;
+		break;
+	case QED_CHAIN_MODE_PBL:
+		last_page_idx = p_chain->page_cnt - 1;
+		p_virt_addr = p_chain->pbl.pp_virt_addr_tbl[last_page_idx];
+		break;
+	}
+	/* p_virt_addr points at this stage to the last page of the chain */
+	size = p_chain->elem_size * (p_chain->usable_per_page - 1);
+	p_virt_addr = (u8 *)p_virt_addr + size;
+out:
+	return p_virt_addr;
 }
 
 /**
- * @brief qed_chain_sge_inc_cons_prod
+ * @brief qed_chain_set_prod - sets the prod to the given value
  *
- * for sge chains, producer isn't increased serially, the ring
- * is expected to be full at all times. Once elements are
- * consumed, they are immediately produced.
+ * @param prod_idx
+ * @param p_prod_elem
+ */
+static inline void qed_chain_set_prod(struct qed_chain *p_chain,
+				      u32 prod_idx, void *p_prod_elem)
+{
+	if (is_chain_u16(p_chain))
+		p_chain->u.chain16.prod_idx = (u16) prod_idx;
+	else
+		p_chain->u.chain32.prod_idx = prod_idx;
+	p_chain->p_prod_elem = p_prod_elem;
+}
+
+/**
+ * @brief qed_chain_pbl_zero_mem - set chain memory to 0
  *
  * @param p_chain
- * @param cnt
- *
- * @return inline void
  */
-static inline void
-qed_chain_sge_inc_cons_prod(struct qed_chain *p_chain,
-			    u16 cnt)
+static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain)
 {
-	p_chain->prod_idx += cnt;
-	p_chain->cons_idx += cnt;
+	u32 i, page_cnt;
+
+	if (p_chain->mode != QED_CHAIN_MODE_PBL)
+		return;
+
+	page_cnt = qed_chain_get_page_cnt(p_chain);
+
+	for (i = 0; i < page_cnt; i++)
+		memset(p_chain->pbl.pp_virt_addr_tbl[i], 0,
+		       QED_CHAIN_PAGE_SIZE);
 }
 
 #endif
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 6ae8cb4a61d3..4475a9d8ae15 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -49,6 +49,7 @@ struct qed_start_vport_params {
 	bool drop_ttl0;
 	u8 vport_id;
 	u16 mtu;
+	bool clear_stats;
 };
 
 struct qed_stop_rxq_params {
@@ -113,6 +114,7 @@ struct qed_queue_start_common_params {
 	u8 vport_id;
 	u16 sb;
 	u16 sb_idx;
+	u16 vf_qid;
 };
 
 struct qed_tunn_params {
@@ -127,11 +129,73 @@ struct qed_eth_cb_ops {
 	void (*force_mac) (void *dev, u8 *mac);
 };
 
+#ifdef CONFIG_DCB
+/* Prototype declaration of qed_eth_dcbnl_ops should match with the declaration
+ * of dcbnl_rtnl_ops structure.
+ */
+struct qed_eth_dcbnl_ops {
+	/* IEEE 802.1Qaz std */
+	int (*ieee_getpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc);
+	int (*ieee_setpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc);
+	int (*ieee_getets)(struct qed_dev *cdev, struct ieee_ets *ets);
+	int (*ieee_setets)(struct qed_dev *cdev, struct ieee_ets *ets);
+	int (*ieee_peer_getets)(struct qed_dev *cdev, struct ieee_ets *ets);
+	int (*ieee_peer_getpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc);
+	int (*ieee_getapp)(struct qed_dev *cdev, struct dcb_app *app);
+	int (*ieee_setapp)(struct qed_dev *cdev, struct dcb_app *app);
+
+	/* CEE std */
+	u8 (*getstate)(struct qed_dev *cdev);
+	u8 (*setstate)(struct qed_dev *cdev, u8 state);
+	void (*getpgtccfgtx)(struct qed_dev *cdev, int prio, u8 *prio_type,
+			     u8 *pgid, u8 *bw_pct, u8 *up_map);
+	void (*getpgbwgcfgtx)(struct qed_dev *cdev, int pgid, u8 *bw_pct);
+	void (*getpgtccfgrx)(struct qed_dev *cdev, int prio, u8 *prio_type,
+			     u8 *pgid, u8 *bw_pct, u8 *up_map);
+	void (*getpgbwgcfgrx)(struct qed_dev *cdev, int pgid, u8 *bw_pct);
+	void (*getpfccfg)(struct qed_dev *cdev, int prio, u8 *setting);
+	void (*setpfccfg)(struct qed_dev *cdev, int prio, u8 setting);
+	u8 (*getcap)(struct qed_dev *cdev, int capid, u8 *cap);
+	int (*getnumtcs)(struct qed_dev *cdev, int tcid, u8 *num);
+	u8 (*getpfcstate)(struct qed_dev *cdev);
+	int (*getapp)(struct qed_dev *cdev, u8 idtype, u16 id);
+	u8 (*getfeatcfg)(struct qed_dev *cdev, int featid, u8 *flags);
+
+	/* DCBX configuration */
+	u8 (*getdcbx)(struct qed_dev *cdev);
+	void (*setpgtccfgtx)(struct qed_dev *cdev, int prio,
+			     u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map);
+	void (*setpgtccfgrx)(struct qed_dev *cdev, int prio,
+			     u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map);
+	void (*setpgbwgcfgtx)(struct qed_dev *cdev, int pgid, u8 bw_pct);
+	void (*setpgbwgcfgrx)(struct qed_dev *cdev, int pgid, u8 bw_pct);
+	u8 (*setall)(struct qed_dev *cdev);
+	int (*setnumtcs)(struct qed_dev *cdev, int tcid, u8 num);
+	void (*setpfcstate)(struct qed_dev *cdev, u8 state);
+	int (*setapp)(struct qed_dev *cdev, u8 idtype, u16 idval, u8 up);
+	u8 (*setdcbx)(struct qed_dev *cdev, u8 state);
+	u8 (*setfeatcfg)(struct qed_dev *cdev, int featid, u8 flags);
+
+	/* Peer apps */
+	int (*peer_getappinfo)(struct qed_dev *cdev,
+			       struct dcb_peer_app_info *info,
+			       u16 *app_count);
+	int (*peer_getapptable)(struct qed_dev *cdev, struct dcb_app *table);
+
+	/* CEE peer */
+	int (*cee_peer_getpfc)(struct qed_dev *cdev, struct cee_pfc *pfc);
+	int (*cee_peer_getpg)(struct qed_dev *cdev, struct cee_pg *pg);
+};
+#endif
+
 struct qed_eth_ops {
 	const struct qed_common_ops *common;
 #ifdef CONFIG_QED_SRIOV
 	const struct qed_iov_hv_ops *iov;
 #endif
+#ifdef CONFIG_DCB
+	const struct qed_eth_dcbnl_ops *dcb;
+#endif
 
 	int (*fill_dev_info)(struct qed_dev *cdev,
 			     struct qed_dev_eth_info *info);
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 4c29439f54bf..b1e3c57c7117 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -34,6 +34,96 @@ enum dcbx_protocol_type {
 	DCBX_MAX_PROTOCOL_TYPE
 };
 
+#ifdef CONFIG_DCB
+#define QED_LLDP_CHASSIS_ID_STAT_LEN 4
+#define QED_LLDP_PORT_ID_STAT_LEN 4
+#define QED_DCBX_MAX_APP_PROTOCOL 32
+#define QED_MAX_PFC_PRIORITIES 8
+#define QED_DCBX_DSCP_SIZE 64
+
+struct qed_dcbx_lldp_remote {
+	u32 peer_chassis_id[QED_LLDP_CHASSIS_ID_STAT_LEN];
+	u32 peer_port_id[QED_LLDP_PORT_ID_STAT_LEN];
+	bool enable_rx;
+	bool enable_tx;
+	u32 tx_interval;
+	u32 max_credit;
+};
+
+struct qed_dcbx_lldp_local {
+	u32 local_chassis_id[QED_LLDP_CHASSIS_ID_STAT_LEN];
+	u32 local_port_id[QED_LLDP_PORT_ID_STAT_LEN];
+};
+
+struct qed_dcbx_app_prio {
+	u8 roce;
+	u8 roce_v2;
+	u8 fcoe;
+	u8 iscsi;
+	u8 eth;
+};
+
+struct qed_dbcx_pfc_params {
+	bool willing;
+	bool enabled;
+	u8 prio[QED_MAX_PFC_PRIORITIES];
+	u8 max_tc;
+};
+
+struct qed_app_entry {
+	bool ethtype;
+	bool enabled;
+	u8 prio;
+	u16 proto_id;
+	enum dcbx_protocol_type proto_type;
+};
+
+struct qed_dcbx_params {
+	struct qed_app_entry app_entry[QED_DCBX_MAX_APP_PROTOCOL];
+	u16 num_app_entries;
+	bool app_willing;
+	bool app_valid;
+	bool app_error;
+	bool ets_willing;
+	bool ets_enabled;
+	bool ets_cbs;
+	bool valid;
+	u8 ets_pri_tc_tbl[QED_MAX_PFC_PRIORITIES];
+	u8 ets_tc_bw_tbl[QED_MAX_PFC_PRIORITIES];
+	u8 ets_tc_tsa_tbl[QED_MAX_PFC_PRIORITIES];
+	struct qed_dbcx_pfc_params pfc;
+	u8 max_ets_tc;
+};
+
+struct qed_dcbx_admin_params {
+	struct qed_dcbx_params params;
+	bool valid;
+};
+
+struct qed_dcbx_remote_params {
+	struct qed_dcbx_params params;
+	bool valid;
+};
+
+struct qed_dcbx_operational_params {
+	struct qed_dcbx_app_prio app_prio;
+	struct qed_dcbx_params params;
+	bool valid;
+	bool enabled;
+	bool ieee;
+	bool cee;
+	u32 err;
+};
+
+struct qed_dcbx_get {
+	struct qed_dcbx_operational_params operational;
+	struct qed_dcbx_lldp_remote lldp_remote;
+	struct qed_dcbx_lldp_local lldp_local;
+	struct qed_dcbx_remote_params remote;
+	struct qed_dcbx_admin_params local;
+};
+#endif
+
 enum qed_led_mode {
 	QED_LED_MODE_OFF,
 	QED_LED_MODE_ON,
@@ -58,8 +148,70 @@ struct qed_eth_pf_params {
 	u16 num_cons;
 };
 
+/* Most of the the parameters below are described in the FW iSCSI / TCP HSI */
+struct qed_iscsi_pf_params {
+	u64 glbl_q_params_addr;
+	u64 bdq_pbl_base_addr[2];
+	u32 max_cwnd;
+	u16 cq_num_entries;
+	u16 cmdq_num_entries;
+	u16 dup_ack_threshold;
+	u16 tx_sws_timer;
+	u16 min_rto;
+	u16 min_rto_rt;
+	u16 max_rto;
+
+	/* The following parameters are used during HW-init
+	 * and these parameters need to be passed as arguments
+	 * to update_pf_params routine invoked before slowpath start
+	 */
+	u16 num_cons;
+	u16 num_tasks;
+
+	/* The following parameters are used during protocol-init */
+	u16 half_way_close_timeout;
+	u16 bdq_xoff_threshold[2];
+	u16 bdq_xon_threshold[2];
+	u16 cmdq_xoff_threshold;
+	u16 cmdq_xon_threshold;
+	u16 rq_buffer_size;
+
+	u8 num_sq_pages_in_ring;
+	u8 num_r2tq_pages_in_ring;
+	u8 num_uhq_pages_in_ring;
+	u8 num_queues;
+	u8 log_page_size;
+	u8 rqe_log_size;
+	u8 max_fin_rt;
+	u8 gl_rq_pi;
+	u8 gl_cmd_pi;
+	u8 debug_mode;
+	u8 ll2_ooo_queue_id;
+	u8 ooo_enable;
+
+	u8 is_target;
+	u8 bdq_pbl_num_entries[2];
+};
+
+struct qed_rdma_pf_params {
+	/* Supplied to QED during resource allocation (may affect the ILT and
+	 * the doorbell BAR).
+	 */
+	u32 min_dpis;		/* number of requested DPIs */
+	u32 num_mrs;		/* number of requested memory regions */
+	u32 num_qps;		/* number of requested Queue Pairs */
+	u32 num_srqs;		/* number of requested SRQ */
+	u8 roce_edpm_mode;	/* see QED_ROCE_EDPM_MODE_ENABLE */
+	u8 gl_pi;		/* protocol index */
+
+	/* Will allocate rate limiters to be used with QPs */
+	u8 enable_dcqcn;
+};
+
 struct qed_pf_params {
 	struct qed_eth_pf_params eth_pf_params;
+	struct qed_iscsi_pf_params iscsi_pf_params;
+	struct qed_rdma_pf_params rdma_pf_params;
 };
 
 enum qed_int_mode {
@@ -100,6 +252,8 @@ struct qed_dev_info {
 	/* MFW version */
 	u32		mfw_rev;
 
+	bool rdma_supported;
+
 	u32		flash_size;
 	u8		mf_mode;
 	bool		tx_switching;
@@ -111,6 +265,7 @@ enum qed_sb_type {
 
 enum qed_protocol {
 	QED_PROTOCOL_ETH,
+	QED_PROTOCOL_ISCSI,
 };
 
 struct qed_link_params {
@@ -325,13 +480,38 @@ struct qed_common_ops {
 	int		(*chain_alloc)(struct qed_dev *cdev,
 				       enum qed_chain_use_mode intended_use,
 				       enum qed_chain_mode mode,
-				       u16 num_elems,
+				       enum qed_chain_cnt_type cnt_type,
+				       u32 num_elems,
 				       size_t elem_size,
 				       struct qed_chain *p_chain);
 
 	void		(*chain_free)(struct qed_dev *cdev,
 				      struct qed_chain *p_chain);
 
+/**
+ * @brief get_coalesce - Get coalesce parameters in usec
+ *
+ * @param cdev
+ * @param rx_coal - Rx coalesce value in usec
+ * @param tx_coal - Tx coalesce value in usec
+ *
+ */
+	void (*get_coalesce)(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal);
+
+/**
+ * @brief set_coalesce - Configure Rx coalesce value in usec
+ *
+ * @param cdev
+ * @param rx_coal - Rx coalesce value in usec
+ * @param tx_coal - Tx coalesce value in usec
+ * @param qid - Queue index
+ * @param sb_id - Status Block Id
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
+			    u8 qid, u16 sb_id);
+
 /**
  * @brief set_led - Configure LED mode
  *
diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h
new file mode 100644
index 000000000000..187991c1f439
--- /dev/null
+++ b/include/linux/qed/rdma_common.h
@@ -0,0 +1,44 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __RDMA_COMMON__
+#define __RDMA_COMMON__
+/************************/
+/* RDMA FW CONSTANTS */
+/************************/
+
+#define RDMA_RESERVED_LKEY                      (0)
+#define RDMA_RING_PAGE_SIZE                     (0x1000)
+
+#define RDMA_MAX_SGE_PER_SQ_WQE         (4)
+#define RDMA_MAX_SGE_PER_RQ_WQE         (4)
+
+#define RDMA_MAX_DATA_SIZE_IN_WQE       (0x7FFFFFFF)
+
+#define RDMA_REQ_RD_ATOMIC_ELM_SIZE             (0x50)
+#define RDMA_RESP_RD_ATOMIC_ELM_SIZE    (0x20)
+
+#define RDMA_MAX_CQS                            (64 * 1024)
+#define RDMA_MAX_TIDS                           (128 * 1024 - 1)
+#define RDMA_MAX_PDS                            (64 * 1024)
+
+#define RDMA_NUM_STATISTIC_COUNTERS                     MAX_NUM_VPORTS
+
+#define RDMA_TASK_TYPE (PROTOCOLID_ROCE)
+
+struct rdma_srq_id {
+	__le16 srq_idx;
+	__le16 opaque_fid;
+};
+
+struct rdma_srq_producers {
+	__le32 sge_prod;
+	__le32 wqe_prod;
+};
+
+#endif /* __RDMA_COMMON__ */
diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h
new file mode 100644
index 000000000000..2eeaf3dc6646
--- /dev/null
+++ b/include/linux/qed/roce_common.h
@@ -0,0 +1,17 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __ROCE_COMMON__
+#define __ROCE_COMMON__
+
+#define ROCE_REQ_MAX_INLINE_DATA_SIZE (256)
+#define ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE (288)
+
+#define ROCE_MAX_QPS	(32 * 1024)
+
+#endif /* __ROCE_COMMON__ */
diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h
new file mode 100644
index 000000000000..3b8e1efd9bc2
--- /dev/null
+++ b/include/linux/qed/storage_common.h
@@ -0,0 +1,91 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __STORAGE_COMMON__
+#define __STORAGE_COMMON__
+
+#define NUM_OF_CMDQS_CQS (NUM_OF_GLOBAL_QUEUES / 2)
+#define BDQ_NUM_RESOURCES (4)
+
+#define BDQ_ID_RQ                        (0)
+#define BDQ_ID_IMM_DATA          (1)
+#define BDQ_NUM_IDS          (2)
+
+#define BDQ_MAX_EXTERNAL_RING_SIZE (1 << 15)
+
+struct scsi_bd {
+	struct regpair address;
+	struct regpair opaque;
+};
+
+struct scsi_bdq_ram_drv_data {
+	__le16 external_producer;
+	__le16 reserved0[3];
+};
+
+struct scsi_drv_cmdq {
+	__le16 cmdq_cons;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct scsi_init_func_params {
+	__le16 num_tasks;
+	u8 log_page_size;
+	u8 debug_mode;
+	u8 reserved2[12];
+};
+
+struct scsi_init_func_queues {
+	struct regpair glbl_q_params_addr;
+	__le16 rq_buffer_size;
+	__le16 cq_num_entries;
+	__le16 cmdq_num_entries;
+	u8 bdq_resource_id;
+	u8 q_validity;
+#define SCSI_INIT_FUNC_QUEUES_RQ_VALID_MASK        0x1
+#define SCSI_INIT_FUNC_QUEUES_RQ_VALID_SHIFT       0
+#define SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID_MASK  0x1
+#define SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID_SHIFT 1
+#define SCSI_INIT_FUNC_QUEUES_CMD_VALID_MASK       0x1
+#define SCSI_INIT_FUNC_QUEUES_CMD_VALID_SHIFT      2
+#define SCSI_INIT_FUNC_QUEUES_RESERVED_VALID_MASK  0x1F
+#define SCSI_INIT_FUNC_QUEUES_RESERVED_VALID_SHIFT 3
+	u8 num_queues;
+	u8 queue_relative_offset;
+	u8 cq_sb_pi;
+	u8 cmdq_sb_pi;
+	__le16 cq_cmdq_sb_num_arr[NUM_OF_CMDQS_CQS];
+	__le16 reserved0;
+	u8 bdq_pbl_num_entries[BDQ_NUM_IDS];
+	struct regpair bdq_pbl_base_address[BDQ_NUM_IDS];
+	__le16 bdq_xoff_threshold[BDQ_NUM_IDS];
+	__le16 bdq_xon_threshold[BDQ_NUM_IDS];
+	__le16 cmdq_xoff_threshold;
+	__le16 cmdq_xon_threshold;
+	__le32 reserved1;
+};
+
+struct scsi_ram_per_bdq_resource_drv_data {
+	struct scsi_bdq_ram_drv_data drv_data_per_bdq_id[BDQ_NUM_IDS];
+};
+
+struct scsi_sge {
+	struct regpair sge_addr;
+	__le16 sge_len;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct scsi_terminate_extra_params {
+	__le16 unsolicited_cq_count;
+	__le16 cmdq_count;
+	u8 reserved[4];
+};
+
+#endif /* __STORAGE_COMMON__ */
diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h
new file mode 100644
index 000000000000..accba0e6b704
--- /dev/null
+++ b/include/linux/qed/tcp_common.h
@@ -0,0 +1,226 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __TCP_COMMON__
+#define __TCP_COMMON__
+
+#define TCP_INVALID_TIMEOUT_VAL -1
+
+enum tcp_connect_mode {
+	TCP_CONNECT_ACTIVE,
+	TCP_CONNECT_PASSIVE,
+	MAX_TCP_CONNECT_MODE
+};
+
+struct tcp_init_params {
+	__le32 max_cwnd;
+	__le16 dup_ack_threshold;
+	__le16 tx_sws_timer;
+	__le16 min_rto;
+	__le16 min_rto_rt;
+	__le16 max_rto;
+	u8 maxfinrt;
+	u8 reserved[1];
+};
+
+enum tcp_ip_version {
+	TCP_IPV4,
+	TCP_IPV6,
+	MAX_TCP_IP_VERSION
+};
+
+struct tcp_offload_params {
+	__le16 local_mac_addr_lo;
+	__le16 local_mac_addr_mid;
+	__le16 local_mac_addr_hi;
+	__le16 remote_mac_addr_lo;
+	__le16 remote_mac_addr_mid;
+	__le16 remote_mac_addr_hi;
+	__le16 vlan_id;
+	u8 flags;
+#define TCP_OFFLOAD_PARAMS_TS_EN_MASK         0x1
+#define TCP_OFFLOAD_PARAMS_TS_EN_SHIFT        0
+#define TCP_OFFLOAD_PARAMS_DA_EN_MASK         0x1
+#define TCP_OFFLOAD_PARAMS_DA_EN_SHIFT        1
+#define TCP_OFFLOAD_PARAMS_KA_EN_MASK         0x1
+#define TCP_OFFLOAD_PARAMS_KA_EN_SHIFT        2
+#define TCP_OFFLOAD_PARAMS_NAGLE_EN_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_NAGLE_EN_SHIFT     3
+#define TCP_OFFLOAD_PARAMS_DA_CNT_EN_MASK     0x1
+#define TCP_OFFLOAD_PARAMS_DA_CNT_EN_SHIFT    4
+#define TCP_OFFLOAD_PARAMS_FIN_SENT_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_FIN_SENT_SHIFT     5
+#define TCP_OFFLOAD_PARAMS_FIN_RECEIVED_MASK  0x1
+#define TCP_OFFLOAD_PARAMS_FIN_RECEIVED_SHIFT 6
+#define TCP_OFFLOAD_PARAMS_RESERVED0_MASK     0x1
+#define TCP_OFFLOAD_PARAMS_RESERVED0_SHIFT    7
+	u8 ip_version;
+	__le32 remote_ip[4];
+	__le32 local_ip[4];
+	__le32 flow_label;
+	u8 ttl;
+	u8 tos_or_tc;
+	__le16 remote_port;
+	__le16 local_port;
+	__le16 mss;
+	u8 rcv_wnd_scale;
+	u8 connect_mode;
+	__le16 srtt;
+	__le32 cwnd;
+	__le32 ss_thresh;
+	__le16 reserved1;
+	u8 ka_max_probe_cnt;
+	u8 dup_ack_theshold;
+	__le32 rcv_next;
+	__le32 snd_una;
+	__le32 snd_next;
+	__le32 snd_max;
+	__le32 snd_wnd;
+	__le32 rcv_wnd;
+	__le32 snd_wl1;
+	__le32 ts_time;
+	__le32 ts_recent;
+	__le32 ts_recent_age;
+	__le32 total_rt;
+	__le32 ka_timeout_delta;
+	__le32 rt_timeout_delta;
+	u8 dup_ack_cnt;
+	u8 snd_wnd_probe_cnt;
+	u8 ka_probe_cnt;
+	u8 rt_cnt;
+	__le16 rtt_var;
+	__le16 reserved2;
+	__le32 ka_timeout;
+	__le32 ka_interval;
+	__le32 max_rt_time;
+	__le32 initial_rcv_wnd;
+	u8 snd_wnd_scale;
+	u8 ack_frequency;
+	__le16 da_timeout_value;
+	__le32 ts_ticks_per_second;
+};
+
+struct tcp_offload_params_opt2 {
+	__le16 local_mac_addr_lo;
+	__le16 local_mac_addr_mid;
+	__le16 local_mac_addr_hi;
+	__le16 remote_mac_addr_lo;
+	__le16 remote_mac_addr_mid;
+	__le16 remote_mac_addr_hi;
+	__le16 vlan_id;
+	u8 flags;
+#define TCP_OFFLOAD_PARAMS_OPT2_TS_EN_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_OPT2_TS_EN_SHIFT     0
+#define TCP_OFFLOAD_PARAMS_OPT2_DA_EN_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_OPT2_DA_EN_SHIFT     1
+#define TCP_OFFLOAD_PARAMS_OPT2_KA_EN_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_OPT2_KA_EN_SHIFT     2
+#define TCP_OFFLOAD_PARAMS_OPT2_RESERVED0_MASK  0x1F
+#define TCP_OFFLOAD_PARAMS_OPT2_RESERVED0_SHIFT 3
+	u8 ip_version;
+	__le32 remote_ip[4];
+	__le32 local_ip[4];
+	__le32 flow_label;
+	u8 ttl;
+	u8 tos_or_tc;
+	__le16 remote_port;
+	__le16 local_port;
+	__le16 mss;
+	u8 rcv_wnd_scale;
+	u8 connect_mode;
+	__le16 syn_ip_payload_length;
+	__le32 syn_phy_addr_lo;
+	__le32 syn_phy_addr_hi;
+	__le32 reserved1[22];
+};
+
+enum tcp_seg_placement_event {
+	TCP_EVENT_ADD_PEN,
+	TCP_EVENT_ADD_NEW_ISLE,
+	TCP_EVENT_ADD_ISLE_RIGHT,
+	TCP_EVENT_ADD_ISLE_LEFT,
+	TCP_EVENT_JOIN,
+	TCP_EVENT_NOP,
+	MAX_TCP_SEG_PLACEMENT_EVENT
+};
+
+struct tcp_update_params {
+	__le16 flags;
+#define TCP_UPDATE_PARAMS_REMOTE_MAC_ADDR_CHANGED_MASK   0x1
+#define TCP_UPDATE_PARAMS_REMOTE_MAC_ADDR_CHANGED_SHIFT  0
+#define TCP_UPDATE_PARAMS_MSS_CHANGED_MASK               0x1
+#define TCP_UPDATE_PARAMS_MSS_CHANGED_SHIFT              1
+#define TCP_UPDATE_PARAMS_TTL_CHANGED_MASK               0x1
+#define TCP_UPDATE_PARAMS_TTL_CHANGED_SHIFT              2
+#define TCP_UPDATE_PARAMS_TOS_OR_TC_CHANGED_MASK         0x1
+#define TCP_UPDATE_PARAMS_TOS_OR_TC_CHANGED_SHIFT        3
+#define TCP_UPDATE_PARAMS_KA_TIMEOUT_CHANGED_MASK        0x1
+#define TCP_UPDATE_PARAMS_KA_TIMEOUT_CHANGED_SHIFT       4
+#define TCP_UPDATE_PARAMS_KA_INTERVAL_CHANGED_MASK       0x1
+#define TCP_UPDATE_PARAMS_KA_INTERVAL_CHANGED_SHIFT      5
+#define TCP_UPDATE_PARAMS_MAX_RT_TIME_CHANGED_MASK       0x1
+#define TCP_UPDATE_PARAMS_MAX_RT_TIME_CHANGED_SHIFT      6
+#define TCP_UPDATE_PARAMS_FLOW_LABEL_CHANGED_MASK        0x1
+#define TCP_UPDATE_PARAMS_FLOW_LABEL_CHANGED_SHIFT       7
+#define TCP_UPDATE_PARAMS_INITIAL_RCV_WND_CHANGED_MASK   0x1
+#define TCP_UPDATE_PARAMS_INITIAL_RCV_WND_CHANGED_SHIFT  8
+#define TCP_UPDATE_PARAMS_KA_MAX_PROBE_CNT_CHANGED_MASK  0x1
+#define TCP_UPDATE_PARAMS_KA_MAX_PROBE_CNT_CHANGED_SHIFT 9
+#define TCP_UPDATE_PARAMS_KA_EN_CHANGED_MASK             0x1
+#define TCP_UPDATE_PARAMS_KA_EN_CHANGED_SHIFT            10
+#define TCP_UPDATE_PARAMS_NAGLE_EN_CHANGED_MASK          0x1
+#define TCP_UPDATE_PARAMS_NAGLE_EN_CHANGED_SHIFT         11
+#define TCP_UPDATE_PARAMS_KA_EN_MASK                     0x1
+#define TCP_UPDATE_PARAMS_KA_EN_SHIFT                    12
+#define TCP_UPDATE_PARAMS_NAGLE_EN_MASK                  0x1
+#define TCP_UPDATE_PARAMS_NAGLE_EN_SHIFT                 13
+#define TCP_UPDATE_PARAMS_KA_RESTART_MASK                0x1
+#define TCP_UPDATE_PARAMS_KA_RESTART_SHIFT               14
+#define TCP_UPDATE_PARAMS_RETRANSMIT_RESTART_MASK        0x1
+#define TCP_UPDATE_PARAMS_RETRANSMIT_RESTART_SHIFT       15
+	__le16 remote_mac_addr_lo;
+	__le16 remote_mac_addr_mid;
+	__le16 remote_mac_addr_hi;
+	__le16 mss;
+	u8 ttl;
+	u8 tos_or_tc;
+	__le32 ka_timeout;
+	__le32 ka_interval;
+	__le32 max_rt_time;
+	__le32 flow_label;
+	__le32 initial_rcv_wnd;
+	u8 ka_max_probe_cnt;
+	u8 reserved1[7];
+};
+
+struct tcp_upload_params {
+	__le32 rcv_next;
+	__le32 snd_una;
+	__le32 snd_next;
+	__le32 snd_max;
+	__le32 snd_wnd;
+	__le32 rcv_wnd;
+	__le32 snd_wl1;
+	__le32 cwnd;
+	__le32 ss_thresh;
+	__le16 srtt;
+	__le16 rtt_var;
+	__le32 ts_time;
+	__le32 ts_recent;
+	__le32 ts_recent_age;
+	__le32 total_rt;
+	__le32 ka_timeout_delta;
+	__le32 rt_timeout_delta;
+	u8 dup_ack_cnt;
+	u8 snd_wnd_probe_cnt;
+	u8 ka_probe_cnt;
+	u8 rt_cnt;
+	__le32 reserved;
+};
+
+#endif /* __TCP_COMMON__ */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index cb4b7e8cee81..cbfee507c839 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -291,6 +291,7 @@ unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
 			unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload(gfp_t gfp_mask);
+int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
 void radix_tree_init(void);
 void *radix_tree_tag_set(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
@@ -407,6 +408,7 @@ static inline __must_check
 void **radix_tree_iter_retry(struct radix_tree_iter *iter)
 {
 	iter->next_index = iter->index;
+	iter->tags = 0;
 	return NULL;
 }
 
diff --git a/include/linux/random.h b/include/linux/random.h
index e47e533742b5..3d6e9815cd85 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -95,27 +95,27 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 #ifdef CONFIG_ARCH_RANDOM
 # include <asm/archrandom.h>
 #else
-static inline int arch_get_random_long(unsigned long *v)
+static inline bool arch_get_random_long(unsigned long *v)
 {
 	return 0;
 }
-static inline int arch_get_random_int(unsigned int *v)
+static inline bool arch_get_random_int(unsigned int *v)
 {
 	return 0;
 }
-static inline int arch_has_random(void)
+static inline bool arch_has_random(void)
 {
 	return 0;
 }
-static inline int arch_get_random_seed_long(unsigned long *v)
+static inline bool arch_get_random_seed_long(unsigned long *v)
 {
 	return 0;
 }
-static inline int arch_get_random_seed_int(unsigned int *v)
+static inline bool arch_get_random_seed_int(unsigned int *v)
 {
 	return 0;
 }
-static inline int arch_has_random_seed(void)
+static inline bool arch_has_random_seed(void)
 {
 	return 0;
 }
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index b6900099ea81..e585018498d5 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -76,6 +76,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *);
 /* Fast replacement of a single node without remove/rebalance/add/rebalance */
 extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
 			    struct rb_root *root);
+extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+				struct rb_root *root);
 
 static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
 				struct rb_node **rb_link)
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 14d7b831b63a..d076183e49be 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -130,6 +130,19 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
 		WRITE_ONCE(root->rb_node, new);
 }
 
+static inline void
+__rb_change_child_rcu(struct rb_node *old, struct rb_node *new,
+		      struct rb_node *parent, struct rb_root *root)
+{
+	if (parent) {
+		if (parent->rb_left == old)
+			rcu_assign_pointer(parent->rb_left, new);
+		else
+			rcu_assign_pointer(parent->rb_right, new);
+	} else
+		rcu_assign_pointer(root->rb_node, new);
+}
+
 extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5f1533e3d032..1aa62e1a761b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -45,6 +45,7 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/ktime.h>
+#include <linux/irqflags.h>
 
 #include <asm/barrier.h>
 
@@ -379,12 +380,13 @@ static inline void rcu_init_nohz(void)
  * in the inner idle loop.
  *
  * This macro provides the way out:  RCU_NONIDLE(do_something_with_RCU())
- * will tell RCU that it needs to pay attending, invoke its argument
- * (in this example, a call to the do_something_with_RCU() function),
+ * will tell RCU that it needs to pay attention, invoke its argument
+ * (in this example, calling the do_something_with_RCU() function),
  * and then tell RCU to go back to ignoring this CPU.  It is permissible
- * to nest RCU_NONIDLE() wrappers, but the nesting level is currently
- * quite limited.  If deeper nesting is required, it will be necessary
- * to adjust DYNTICK_TASK_NESTING_VALUE accordingly.
+ * to nest RCU_NONIDLE() wrappers, but not indefinitely (but the limit is
+ * on the order of a million or so, even on 32-bit systems).  It is
+ * not legal to block within RCU_NONIDLE(), nor is it permissible to
+ * transfer control either into or out of RCU_NONIDLE()'s statement.
  */
 #define RCU_NONIDLE(a) \
 	do { \
@@ -611,6 +613,12 @@ static inline void rcu_preempt_sleep_check(void)
 	rcu_dereference_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(p)); \
 })
+#define rcu_dereference_raw(p) \
+({ \
+	/* Dependency order vs. p above. */ \
+	typeof(p) ________p1 = lockless_dereference(p); \
+	((typeof(*p) __force __kernel *)(________p1)); \
+})
 
 /**
  * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
@@ -649,7 +657,16 @@ static inline void rcu_preempt_sleep_check(void)
  * please be careful when making changes to rcu_assign_pointer() and the
  * other macros that it invokes.
  */
-#define rcu_assign_pointer(p, v) smp_store_release(&p, RCU_INITIALIZER(v))
+#define rcu_assign_pointer(p, v)					      \
+({									      \
+	uintptr_t _r_a_p__v = (uintptr_t)(v);				      \
+									      \
+	if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL)	      \
+		WRITE_ONCE((p), (typeof(p))(_r_a_p__v));		      \
+	else								      \
+		smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
+	_r_a_p__v;							      \
+})
 
 /**
  * rcu_access_pointer() - fetch RCU pointer with no dereferencing
@@ -729,8 +746,6 @@ static inline void rcu_preempt_sleep_check(void)
 	__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
 				__rcu)
 
-#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
-
 /*
  * The tracing infrastructure traces RCU (we want that), but unfortunately
  * some of the RCU checks causes tracing to lock up the system.
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 3dc08ce15426..2c12cc5af744 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -95,6 +95,45 @@ struct reg_sequence {
 #define	regmap_fields_force_update_bits(field, id, mask, val) \
 	regmap_fields_update_bits_base(field, id, mask, val, NULL, false, true)
 
+/**
+ * regmap_read_poll_timeout - Poll until a condition is met or a timeout occurs
+ * @map: Regmap to read from
+ * @addr: Address to poll
+ * @val: Unsigned integer variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0
+ *            tight-loops).  Should be less than ~20ms since usleep_range
+ *            is used (see Documentation/timers/timers-howto.txt).
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
+ * error return value in case of a error read. In the two former cases,
+ * the last read value at @addr is stored in @val. Must not be called
+ * from atomic context if sleep_us or timeout_us are used.
+ *
+ * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
+ */
+#define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \
+({ \
+	ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+	int ret; \
+	might_sleep_if(sleep_us); \
+	for (;;) { \
+		ret = regmap_read((map), (addr), &(val)); \
+		if (ret) \
+			break; \
+		if (cond) \
+			break; \
+		if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+			ret = regmap_read((map), (addr), &(val)); \
+			break; \
+		} \
+		if (sleep_us) \
+			usleep_range((sleep_us >> 2) + 1, sleep_us); \
+	} \
+	ret ?: ((cond) ? 0 : -ETIMEDOUT); \
+})
+
 #ifdef CONFIG_REGMAP
 
 enum regmap_endian {
@@ -851,6 +890,12 @@ struct regmap_irq {
  * @num_type_reg:    Number of type registers.
  * @type_reg_stride: Stride to use for chips where type registers are not
  *			contiguous.
+ * @handle_pre_irq:  Driver specific callback to handle interrupt from device
+ *		     before regmap_irq_handler process the interrupts.
+ * @handle_post_irq: Driver specific callback to handle interrupt from device
+ *		     after handling the interrupts in regmap_irq_handler().
+ * @irq_drv_data:    Driver specific IRQ data which is passed as parameter when
+ *		     driver specific pre/post interrupt handler is called.
  */
 struct regmap_irq_chip {
 	const char *name;
@@ -877,6 +922,10 @@ struct regmap_irq_chip {
 
 	int num_type_reg;
 	unsigned int type_reg_stride;
+
+	int (*handle_pre_irq)(void *irq_drv_data);
+	int (*handle_post_irq)(void *irq_drv_data);
+	void *irq_drv_data;
 };
 
 struct regmap_irq_chip_data;
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 48603506f8de..cae500b2c1d7 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -224,7 +224,6 @@ int regulator_bulk_force_disable(int num_consumers,
 void regulator_bulk_free(int num_consumers,
 			 struct regulator_bulk_data *consumers);
 
-int regulator_can_change_voltage(struct regulator *regulator);
 int regulator_count_voltages(struct regulator *regulator);
 int regulator_list_voltage(struct regulator *regulator, unsigned selector);
 int regulator_is_supported_voltage(struct regulator *regulator,
@@ -436,11 +435,6 @@ static inline void regulator_bulk_free(int num_consumers,
 {
 }
 
-static inline int regulator_can_change_voltage(struct regulator *regulator)
-{
-	return 0;
-}
-
 static inline int regulator_set_voltage(struct regulator *regulator,
 					int min_uV, int max_uV)
 {
diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h
index a43a5ca1167b..80cb40b7c88d 100644
--- a/include/linux/regulator/da9211.h
+++ b/include/linux/regulator/da9211.h
@@ -1,5 +1,6 @@
 /*
- * da9211.h - Regulator device driver for DA9211/DA9213/DA9215
+ * da9211.h - Regulator device driver for DA9211/DA9212
+ * /DA9213/DA9214/DA9215
  * Copyright (C) 2015  Dialog Semiconductor Ltd.
  *
  * This program is free software; you can redistribute it and/or
@@ -22,7 +23,9 @@
 
 enum da9211_chip_id {
 	DA9211,
+	DA9212,
 	DA9213,
+	DA9214,
 	DA9215,
 };
 
diff --git a/include/linux/regulator/mt6323-regulator.h b/include/linux/regulator/mt6323-regulator.h
new file mode 100644
index 000000000000..67011cd1ce55
--- /dev/null
+++ b/include/linux/regulator/mt6323-regulator.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Chen Zhong <chen.zhong@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_REGULATOR_MT6323_H
+#define __LINUX_REGULATOR_MT6323_H
+
+enum {
+	MT6323_ID_VPROC = 0,
+	MT6323_ID_VSYS,
+	MT6323_ID_VPA,
+	MT6323_ID_VTCXO,
+	MT6323_ID_VCN28,
+	MT6323_ID_VCN33_BT,
+	MT6323_ID_VCN33_WIFI,
+	MT6323_ID_VA,
+	MT6323_ID_VCAMA,
+	MT6323_ID_VIO28 = 9,
+	MT6323_ID_VUSB,
+	MT6323_ID_VMC,
+	MT6323_ID_VMCH,
+	MT6323_ID_VEMC3V3,
+	MT6323_ID_VGP1,
+	MT6323_ID_VGP2,
+	MT6323_ID_VGP3,
+	MT6323_ID_VCN18,
+	MT6323_ID_VSIM1,
+	MT6323_ID_VSIM2,
+	MT6323_ID_VRTC,
+	MT6323_ID_VCAMAF,
+	MT6323_ID_VIBR,
+	MT6323_ID_VRF18,
+	MT6323_ID_VM,
+	MT6323_ID_VIO18,
+	MT6323_ID_VCAMD,
+	MT6323_ID_VCAMIO,
+	MT6323_ID_RG_MAX,
+};
+
+#define MT6323_MAX_REGULATOR	MT6323_ID_RG_MAX
+
+#endif /* __LINUX_REGULATOR_MT6323_H */
diff --git a/include/linux/reset.h b/include/linux/reset.h
index ec0306ce7b92..45a4abeb6acb 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -84,8 +84,8 @@ static inline struct reset_control *__devm_reset_control_get(
 #endif /* CONFIG_RESET_CONTROLLER */
 
 /**
- * reset_control_get - Lookup and obtain an exclusive reference to a
- *                     reset controller.
+ * reset_control_get_exclusive - Lookup and obtain an exclusive reference
+ *                               to a reset controller.
  * @dev: device to be reset by the controller
  * @id: reset line name
  *
@@ -98,8 +98,8 @@ static inline struct reset_control *__devm_reset_control_get(
  *
  * Use of id names is optional.
  */
-static inline struct reset_control *__must_check reset_control_get(
-					struct device *dev, const char *id)
+static inline struct reset_control *
+__must_check reset_control_get_exclusive(struct device *dev, const char *id)
 {
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
@@ -107,12 +107,6 @@ static inline struct reset_control *__must_check reset_control_get(
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
 }
 
-static inline struct reset_control *reset_control_get_optional(
-					struct device *dev, const char *id)
-{
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
-}
-
 /**
  * reset_control_get_shared - Lookup and obtain a shared reference to a
  *                            reset controller.
@@ -141,9 +135,21 @@ static inline struct reset_control *reset_control_get_shared(
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
 }
 
+static inline struct reset_control *reset_control_get_optional_exclusive(
+					struct device *dev, const char *id)
+{
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
+}
+
+static inline struct reset_control *reset_control_get_optional_shared(
+					struct device *dev, const char *id)
+{
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
+}
+
 /**
- * of_reset_control_get - Lookup and obtain an exclusive reference to a
- *                        reset controller.
+ * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference
+ *                                  to a reset controller.
  * @node: device to be reset by the controller
  * @id: reset line name
  *
@@ -151,15 +157,41 @@ static inline struct reset_control *reset_control_get_shared(
  *
  * Use of id names is optional.
  */
-static inline struct reset_control *of_reset_control_get(
+static inline struct reset_control *of_reset_control_get_exclusive(
 				struct device_node *node, const char *id)
 {
 	return __of_reset_control_get(node, id, 0, 0);
 }
 
 /**
- * of_reset_control_get_by_index - Lookup and obtain an exclusive reference to
- *                                 a reset controller by index.
+ * of_reset_control_get_shared - Lookup and obtain an shared reference
+ *                               to a reset controller.
+ * @node: device to be reset by the controller
+ * @id: reset line name
+ *
+ * When a reset-control is shared, the behavior of reset_control_assert /
+ * deassert is changed, the reset-core will keep track of a deassert_count
+ * and only (re-)assert the reset after reset_control_assert has been called
+ * as many times as reset_control_deassert was called. Also see the remark
+ * about shared reset-controls in the reset_control_assert docs.
+ *
+ * Calling reset_control_assert without first calling reset_control_deassert
+ * is not allowed on a shared reset control. Calling reset_control_reset is
+ * also not allowed on a shared reset control.
+ * Returns a struct reset_control or IS_ERR() condition containing errno.
+ *
+ * Use of id names is optional.
+ */
+static inline struct reset_control *of_reset_control_get_shared(
+				struct device_node *node, const char *id)
+{
+	return __of_reset_control_get(node, id, 0, 1);
+}
+
+/**
+ * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive
+ *                                           reference to a reset controller
+ *                                           by index.
  * @node: device to be reset by the controller
  * @index: index of the reset controller
  *
@@ -167,49 +199,60 @@ static inline struct reset_control *of_reset_control_get(
  * in whatever order. Returns a struct reset_control or IS_ERR() condition
  * containing errno.
  */
-static inline struct reset_control *of_reset_control_get_by_index(
+static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 					struct device_node *node, int index)
 {
 	return __of_reset_control_get(node, NULL, index, 0);
 }
 
 /**
- * devm_reset_control_get - resource managed reset_control_get()
- * @dev: device to be reset by the controller
- * @id: reset line name
+ * of_reset_control_get_shared_by_index - Lookup and obtain an shared
+ *                                        reference to a reset controller
+ *                                        by index.
+ * @node: device to be reset by the controller
+ * @index: index of the reset controller
+ *
+ * When a reset-control is shared, the behavior of reset_control_assert /
+ * deassert is changed, the reset-core will keep track of a deassert_count
+ * and only (re-)assert the reset after reset_control_assert has been called
+ * as many times as reset_control_deassert was called. Also see the remark
+ * about shared reset-controls in the reset_control_assert docs.
+ *
+ * Calling reset_control_assert without first calling reset_control_deassert
+ * is not allowed on a shared reset control. Calling reset_control_reset is
+ * also not allowed on a shared reset control.
+ * Returns a struct reset_control or IS_ERR() condition containing errno.
  *
- * Managed reset_control_get(). For reset controllers returned from this
- * function, reset_control_put() is called automatically on driver detach.
- * See reset_control_get() for more information.
+ * This is to be used to perform a list of resets for a device or power domain
+ * in whatever order. Returns a struct reset_control or IS_ERR() condition
+ * containing errno.
  */
-static inline struct reset_control *__must_check devm_reset_control_get(
-					struct device *dev, const char *id)
-{
-#ifndef CONFIG_RESET_CONTROLLER
-	WARN_ON(1);
-#endif
-	return __devm_reset_control_get(dev, id, 0, 0);
-}
-
-static inline struct reset_control *devm_reset_control_get_optional(
-					struct device *dev, const char *id)
+static inline struct reset_control *of_reset_control_get_shared_by_index(
+					struct device_node *node, int index)
 {
-	return __devm_reset_control_get(dev, id, 0, 0);
+	return __of_reset_control_get(node, NULL, index, 1);
 }
 
 /**
- * devm_reset_control_get_by_index - resource managed reset_control_get
+ * devm_reset_control_get_exclusive - resource managed
+ *                                    reset_control_get_exclusive()
  * @dev: device to be reset by the controller
- * @index: index of the reset controller
+ * @id: reset line name
  *
- * Managed reset_control_get(). For reset controllers returned from this
- * function, reset_control_put() is called automatically on driver detach.
- * See reset_control_get() for more information.
+ * Managed reset_control_get_exclusive(). For reset controllers returned
+ * from this function, reset_control_put() is called automatically on driver
+ * detach.
+ *
+ * See reset_control_get_exclusive() for more information.
  */
-static inline struct reset_control *devm_reset_control_get_by_index(
-					struct device *dev, int index)
+static inline struct reset_control *
+__must_check devm_reset_control_get_exclusive(struct device *dev,
+					      const char *id)
 {
-	return __devm_reset_control_get(dev, NULL, index, 0);
+#ifndef CONFIG_RESET_CONTROLLER
+	WARN_ON(1);
+#endif
+	return __devm_reset_control_get(dev, id, 0, 0);
 }
 
 /**
@@ -227,6 +270,36 @@ static inline struct reset_control *devm_reset_control_get_shared(
 	return __devm_reset_control_get(dev, id, 0, 1);
 }
 
+static inline struct reset_control *devm_reset_control_get_optional_exclusive(
+					struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, 0);
+}
+
+static inline struct reset_control *devm_reset_control_get_optional_shared(
+					struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, 1);
+}
+
+/**
+ * devm_reset_control_get_exclusive_by_index - resource managed
+ *                                             reset_control_get_exclusive()
+ * @dev: device to be reset by the controller
+ * @index: index of the reset controller
+ *
+ * Managed reset_control_get_exclusive(). For reset controllers returned from
+ * this function, reset_control_put() is called automatically on driver
+ * detach.
+ *
+ * See reset_control_get_exclusive() for more information.
+ */
+static inline struct reset_control *
+devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
+{
+	return __devm_reset_control_get(dev, NULL, index, 0);
+}
+
 /**
  * devm_reset_control_get_shared_by_index - resource managed
  * reset_control_get_shared
@@ -237,10 +310,60 @@ static inline struct reset_control *devm_reset_control_get_shared(
  * this function, reset_control_put() is called automatically on driver detach.
  * See reset_control_get_shared() for more information.
  */
-static inline struct reset_control *devm_reset_control_get_shared_by_index(
-					struct device *dev, int index)
+static inline struct reset_control *
+devm_reset_control_get_shared_by_index(struct device *dev, int index)
 {
 	return __devm_reset_control_get(dev, NULL, index, 1);
 }
 
+/*
+ * TEMPORARY calls to use during transition:
+ *
+ *   of_reset_control_get() => of_reset_control_get_exclusive()
+ *
+ * These inline function calls will be removed once all consumers
+ * have been moved over to the new explicit API.
+ */
+static inline struct reset_control *reset_control_get(
+				struct device *dev, const char *id)
+{
+	return reset_control_get_exclusive(dev, id);
+}
+
+static inline struct reset_control *reset_control_get_optional(
+					struct device *dev, const char *id)
+{
+	return reset_control_get_optional_exclusive(dev, id);
+}
+
+static inline struct reset_control *of_reset_control_get(
+				struct device_node *node, const char *id)
+{
+	return of_reset_control_get_exclusive(node, id);
+}
+
+static inline struct reset_control *of_reset_control_get_by_index(
+				struct device_node *node, int index)
+{
+	return of_reset_control_get_exclusive_by_index(node, index);
+}
+
+static inline struct reset_control *devm_reset_control_get(
+				struct device *dev, const char *id)
+{
+	return devm_reset_control_get_exclusive(dev, id);
+}
+
+static inline struct reset_control *devm_reset_control_get_optional(
+				struct device *dev, const char *id)
+{
+	return devm_reset_control_get_optional_exclusive(dev, id);
+
+}
+
+static inline struct reset_control *devm_reset_control_get_by_index(
+				struct device *dev, int index)
+{
+	return devm_reset_control_get_exclusive_by_index(dev, index);
+}
 #endif
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 49eb4f8ebac9..b46bb5620a76 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -158,14 +158,14 @@ struct anon_vma *page_get_anon_vma(struct page *page);
 /*
  * rmap interfaces called when adding or removing pte of page
  */
-void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_move_anon_rmap(struct page *, struct vm_area_struct *);
 void page_add_anon_rmap(struct page *, struct vm_area_struct *,
 		unsigned long, bool);
 void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
 			   unsigned long, int);
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
 		unsigned long, bool);
-void page_add_file_rmap(struct page *);
+void page_add_file_rmap(struct page *, bool);
 void page_remove_rmap(struct page *, bool);
 
 void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c006cc900c44..2daece8979f7 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -89,8 +89,9 @@ void net_inc_egress_queue(void);
 void net_dec_egress_queue(void);
 #endif
 
-extern void rtnetlink_init(void);
-extern void __rtnl_unlock(void);
+void rtnetlink_init(void);
+void __rtnl_unlock(void);
+void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail);
 
 #define ASSERT_RTNL() do { \
 	if (unlikely(!rtnl_is_locked())) { \
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index d37fbb34d06f..dd1d14250340 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -23,10 +23,11 @@ struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
 #include <linux/rwsem-spinlock.h> /* use a generic implementation */
+#define __RWSEM_INIT_COUNT(name)	.count = RWSEM_UNLOCKED_VALUE
 #else
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
-	long count;
+	atomic_long_t count;
 	struct list_head wait_list;
 	raw_spinlock_t wait_lock;
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
@@ -54,9 +55,10 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
 /* In all implementations count != 0 means locked */
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
-	return sem->count != 0;
+	return atomic_long_read(&sem->count) != 0;
 }
 
+#define __RWSEM_INIT_COUNT(name)	.count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
 #endif
 
 /* Common initializer macros and functions */
@@ -74,7 +76,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 #endif
 
 #define __RWSEM_INITIALIZER(name)				\
-	{ .count = RWSEM_UNLOCKED_VALUE,			\
+	{ __RWSEM_INIT_COUNT(name),				\
 	  .wait_list = LIST_HEAD_INIT((name).wait_list),	\
 	  .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock)	\
 	  __RWSEM_OPT_INIT(name)				\
diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index a53915cd5581..c68307bc306f 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -35,21 +35,24 @@ struct sockaddr_rxrpc {
  */
 #define RXRPC_SECURITY_KEY		1	/* [clnt] set client security key */
 #define RXRPC_SECURITY_KEYRING		2	/* [srvr] set ring of server security keys */
-#define RXRPC_EXCLUSIVE_CONNECTION	3	/* [clnt] use exclusive RxRPC connection */
+#define RXRPC_EXCLUSIVE_CONNECTION	3	/* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */
 #define RXRPC_MIN_SECURITY_LEVEL	4	/* minimum security level */
 
 /*
  * RxRPC control messages
+ * - If neither abort or accept are specified, the message is a data message.
  * - terminal messages mean that a user call ID tag can be recycled
+ * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg()
  */
-#define RXRPC_USER_CALL_ID	1	/* user call ID specifier */
-#define RXRPC_ABORT		2	/* abort request / notification [terminal] */
-#define RXRPC_ACK		3	/* [Server] RPC op final ACK received [terminal] */
-#define RXRPC_NET_ERROR		5	/* network error received [terminal] */
-#define RXRPC_BUSY		6	/* server busy received [terminal] */
-#define RXRPC_LOCAL_ERROR	7	/* local error generated [terminal] */
-#define RXRPC_NEW_CALL		8	/* [Server] new incoming call notification */
-#define RXRPC_ACCEPT		9	/* [Server] accept request */
+#define RXRPC_USER_CALL_ID	1	/* sr: user call ID specifier */
+#define RXRPC_ABORT		2	/* sr: abort request / notification [terminal] */
+#define RXRPC_ACK		3	/* -r: [Service] RPC op final ACK received [terminal] */
+#define RXRPC_NET_ERROR		5	/* -r: network error received [terminal] */
+#define RXRPC_BUSY		6	/* -r: server busy received [terminal] */
+#define RXRPC_LOCAL_ERROR	7	/* -r: local error generated [terminal] */
+#define RXRPC_NEW_CALL		8	/* -r: [Service] new incoming call notification */
+#define RXRPC_ACCEPT		9	/* s-: [Service] accept request */
+#define RXRPC_EXCLUSIVE_CALL	10	/* s-: Call should be on exclusive connection */
 
 /*
  * RxRPC security levels
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e42ada26345..d99218a1e043 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -219,9 +219,10 @@ extern void proc_sched_set_task(struct task_struct *p);
 #define TASK_WAKING		256
 #define TASK_PARKED		512
 #define TASK_NOLOAD		1024
-#define TASK_STATE_MAX		2048
+#define TASK_NEW		2048
+#define TASK_STATE_MAX		4096
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
 
 extern char ___assert_task_state[1 - 2*!!(
 		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -2139,6 +2140,9 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
+struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+struct task_struct *try_get_task_struct(struct task_struct **ptask);
+
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void task_cputime(struct task_struct *t,
 			 cputime_t *utime, cputime_t *stime);
@@ -3007,7 +3011,7 @@ static inline int object_is_on_stack(void *obj)
 	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
 }
 
-extern void thread_info_cache_init(void);
+extern void thread_stack_cache_init(void);
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
 static inline unsigned long stack_not_used(struct task_struct *p)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 48ec7651989b..923266cd294a 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -111,6 +111,7 @@ struct uart_8250_port {
 						 *   if no_console_suspend
 						 */
 	unsigned char		probe;
+	struct mctrl_gpios	*gpios;
 #define UART_PROBE_RSA	(1 << 0)
 
 	/*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index a3d7c0d4a03e..2f44e2013654 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -352,9 +352,15 @@ struct earlycon_id {
 extern const struct earlycon_id __earlycon_table[];
 extern const struct earlycon_id __earlycon_table_end[];
 
+#if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE)
+#define EARLYCON_USED_OR_UNUSED	__used
+#else
+#define EARLYCON_USED_OR_UNUSED	__maybe_unused
+#endif
+
 #define OF_EARLYCON_DECLARE(_name, compat, fn)				\
 	static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name)	\
-	     __used __section(__earlycon_table)				\
+	     EARLYCON_USED_OR_UNUSED __section(__earlycon_table)	\
 		= { .name = __stringify(_name),				\
 		    .compatible = compat,				\
 		    .setup = fn  }
diff --git a/include/linux/sfi.h b/include/linux/sfi.h
index d9b436f09925..e0e1597ef9e6 100644
--- a/include/linux/sfi.h
+++ b/include/linux/sfi.h
@@ -156,6 +156,7 @@ struct sfi_device_table_entry {
 #define SFI_DEV_TYPE_UART	2
 #define SFI_DEV_TYPE_HSI	3
 #define SFI_DEV_TYPE_IPC	4
+#define SFI_DEV_TYPE_SD		5
 
 	u8	host_num;	/* attached to host 0, 1...*/
 	u16	addr;
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d4780c00d34..ff078e7043b6 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -16,8 +16,9 @@ struct shmem_inode_info {
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
 	unsigned long		swapped;	/* subtotal assigned to swap */
-	struct shared_policy	policy;		/* NUMA memory alloc policy */
+	struct list_head        shrinklist;     /* shrinkable hpage inodes */
 	struct list_head	swaplist;	/* chain of maybes on swap */
+	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct simple_xattrs	xattrs;		/* list of xattrs */
 	struct inode		vfs_inode;
 };
@@ -28,10 +29,14 @@ struct shmem_sb_info {
 	unsigned long max_inodes;   /* How many inodes are allowed */
 	unsigned long free_inodes;  /* How many are left for allocation */
 	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
+	umode_t mode;		    /* Mount mode for root directory */
+	unsigned char huge;	    /* Whether to try for hugepages */
 	kuid_t uid;		    /* Mount uid for root directory */
 	kgid_t gid;		    /* Mount gid for root directory */
-	umode_t mode;		    /* Mount mode for root directory */
 	struct mempolicy *mpol;     /* default memory policy for mappings */
+	spinlock_t shrinklist_lock;   /* Protects shrinklist */
+	struct list_head shrinklist;  /* List of shinkable inodes */
+	unsigned long shrinklist_len; /* Length of shrinklist */
 };
 
 static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
@@ -49,6 +54,8 @@ extern struct file *shmem_file_setup(const char *name,
 extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
 					    unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
+extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 extern bool shmem_mapping(struct address_space *mapping);
 extern void shmem_unlock_mapping(struct address_space *mapping);
@@ -61,6 +68,19 @@ extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
 extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 						pgoff_t start, pgoff_t end);
 
+/* Flag allocation requirements to shmem_getpage */
+enum sgp_type {
+	SGP_READ,	/* don't exceed i_size, don't allocate page */
+	SGP_CACHE,	/* don't exceed i_size, may allocate page */
+	SGP_NOHUGE,	/* like SGP_CACHE, but no huge pages */
+	SGP_HUGE,	/* like SGP_CACHE, huge pages preferred */
+	SGP_WRITE,	/* may exceed i_size, may allocate !Uptodate page */
+	SGP_FALLOC,	/* like SGP_WRITE, but make existing page Uptodate */
+};
+
+extern int shmem_getpage(struct inode *inode, pgoff_t index,
+		struct page **pagep, enum sgp_type sgp);
+
 static inline struct page *shmem_read_mapping_page(
 				struct address_space *mapping, pgoff_t index)
 {
@@ -68,6 +88,18 @@ static inline struct page *shmem_read_mapping_page(
 					mapping_gfp_mask(mapping));
 }
 
+static inline bool shmem_file(struct file *file)
+{
+	if (!IS_ENABLED(CONFIG_SHMEM))
+		return false;
+	if (!file || !file->f_mapping)
+		return false;
+	return shmem_mapping(file->f_mapping);
+}
+
+extern bool shmem_charge(struct inode *inode, long pages);
+extern void shmem_uncharge(struct inode *inode, long pages);
+
 #ifdef CONFIG_TMPFS
 
 extern int shmem_add_seals(struct file *file, unsigned int seals);
@@ -83,4 +115,13 @@ static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
 
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+extern bool shmem_huge_enabled(struct vm_area_struct *vma);
+#else
+static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
+{
+	return false;
+}
+#endif
+
 #endif
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
new file mode 100644
index 000000000000..f4dfade428f0
--- /dev/null
+++ b/include/linux/skb_array.h
@@ -0,0 +1,178 @@
+/*
+ *	Definitions for the 'struct skb_array' datastructure.
+ *
+ *	Author:
+ *		Michael S. Tsirkin <mst@redhat.com>
+ *
+ *	Copyright (C) 2016 Red Hat, Inc.
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License as published by the
+ *	Free Software Foundation; either version 2 of the License, or (at your
+ *	option) any later version.
+ *
+ *	Limited-size FIFO of skbs. Can be used more or less whenever
+ *	sk_buff_head can be used, except you need to know the queue size in
+ *	advance.
+ *	Implemented as a type-safe wrapper around ptr_ring.
+ */
+
+#ifndef _LINUX_SKB_ARRAY_H
+#define _LINUX_SKB_ARRAY_H 1
+
+#ifdef __KERNEL__
+#include <linux/ptr_ring.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#endif
+
+struct skb_array {
+	struct ptr_ring ring;
+};
+
+/* Might be slightly faster than skb_array_full below, but callers invoking
+ * this in a loop must use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_full(struct skb_array *a)
+{
+	return __ptr_ring_full(&a->ring);
+}
+
+static inline bool skb_array_full(struct skb_array *a)
+{
+	return ptr_ring_full(&a->ring);
+}
+
+static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb)
+{
+	return ptr_ring_produce(&a->ring, skb);
+}
+
+static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb)
+{
+	return ptr_ring_produce_irq(&a->ring, skb);
+}
+
+static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb)
+{
+	return ptr_ring_produce_bh(&a->ring, skb);
+}
+
+static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb)
+{
+	return ptr_ring_produce_any(&a->ring, skb);
+}
+
+/* Might be slightly faster than skb_array_empty below, but only safe if the
+ * array is never resized. Also, callers invoking this in a loop must take care
+ * to use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_empty(struct skb_array *a)
+{
+	return !__ptr_ring_peek(&a->ring);
+}
+
+static inline bool skb_array_empty(struct skb_array *a)
+{
+	return ptr_ring_empty(&a->ring);
+}
+
+static inline bool skb_array_empty_bh(struct skb_array *a)
+{
+	return ptr_ring_empty_bh(&a->ring);
+}
+
+static inline bool skb_array_empty_irq(struct skb_array *a)
+{
+	return ptr_ring_empty_irq(&a->ring);
+}
+
+static inline bool skb_array_empty_any(struct skb_array *a)
+{
+	return ptr_ring_empty_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume(struct skb_array *a)
+{
+	return ptr_ring_consume(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a)
+{
+	return ptr_ring_consume_irq(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_any(struct skb_array *a)
+{
+	return ptr_ring_consume_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a)
+{
+	return ptr_ring_consume_bh(&a->ring);
+}
+
+static inline int __skb_array_len_with_tag(struct sk_buff *skb)
+{
+	if (likely(skb)) {
+		int len = skb->len;
+
+		if (skb_vlan_tag_present(skb))
+			len += VLAN_HLEN;
+
+		return len;
+	} else {
+		return 0;
+	}
+}
+
+static inline int skb_array_peek_len(struct skb_array *a)
+{
+	return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_irq(struct skb_array *a)
+{
+	return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_bh(struct skb_array *a)
+{
+	return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_any(struct skb_array *a)
+{
+	return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
+{
+	return ptr_ring_init(&a->ring, size, gfp);
+}
+
+static void __skb_array_destroy_skb(void *ptr)
+{
+	kfree_skb(ptr);
+}
+
+static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
+{
+	return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
+}
+
+static inline int skb_array_resize_multiple(struct skb_array **rings,
+					    int nrings, int size, gfp_t gfp)
+{
+	BUILD_BUG_ON(offsetof(struct skb_array, ring));
+	return ptr_ring_resize_multiple((struct ptr_ring **)rings,
+					nrings, size, gfp,
+					__skb_array_destroy_skb);
+}
+
+static inline void skb_array_cleanup(struct skb_array *a)
+{
+	ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb);
+}
+
+#endif /* _LINUX_SKB_ARRAY_H  */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ee38a4127475..6f0b3e0adc73 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,7 @@
 #include <net/flow_dissector.h>
 #include <linux/splice.h>
 #include <linux/in6.h>
+#include <linux/if_packet.h>
 #include <net/flow.h>
 
 /* The interface for checksum offload between the stack and networking drivers
@@ -301,6 +302,11 @@ struct sk_buff;
 #endif
 extern int sysctl_max_skb_frags;
 
+/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
+ * segment using its current segmentation instead.
+ */
+#define GSO_BY_FRAGS	0xFFFF
+
 typedef struct skb_frag_struct skb_frag_t;
 
 struct skb_frag_struct {
@@ -482,6 +488,8 @@ enum {
 	SKB_GSO_PARTIAL = 1 << 13,
 
 	SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
+
+	SKB_GSO_SCTP = 1 << 15,
 };
 
 #if BITS_PER_LONG > 32
@@ -874,6 +882,15 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 	return (struct rtable *)skb_dst(skb);
 }
 
+/* For mangling skb->pkt_type from user space side from applications
+ * such as nft, tc, etc, we only allow a conservative subset of
+ * possible pkt_types to be set.
+*/
+static inline bool skb_pkt_type_ok(u32 ptype)
+{
+	return ptype <= PACKET_OTHERHOST;
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
@@ -1062,6 +1079,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 }
 
 void __skb_get_hash(struct sk_buff *skb);
+u32 __skb_get_hash_symmetric(struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 		   const struct flow_keys *keys, int hlen);
@@ -2869,6 +2887,25 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb,
 		skb->csum = csum_partial(start, len, skb->csum);
 }
 
+/**
+ *	skb_push_rcsum - push skb and update receive checksum
+ *	@skb: buffer to update
+ *	@len: length of data pulled
+ *
+ *	This function performs an skb_push on the packet and updates
+ *	the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_push unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
+ */
+static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
+					    unsigned int len)
+{
+	skb_push(skb, len);
+	skb_postpush_rcsum(skb, skb->data, len);
+	return skb->data;
+}
+
 /**
  *	pskb_trim_rcsum - trim received skb and update checksum
  *	@skb: buffer to trim
@@ -2987,6 +3024,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
 unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
+bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 int skb_ensure_writable(struct sk_buff *skb, int write_len);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index aeb3e6d00a66..1a4ea551aae5 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -565,6 +565,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
 {
 	if (size != 0 && n > SIZE_MAX / size)
 		return NULL;
+	if (__builtin_constant_p(n) && __builtin_constant_p(size))
+		return kmalloc(n * size, flags);
 	return __kmalloc(n * size, flags);
 }
 
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 8694f7a5d92b..339ba027ade9 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -81,7 +81,7 @@ struct kmem_cache {
 #endif
 
 #ifdef CONFIG_SLAB_FREELIST_RANDOM
-	void *random_seq;
+	unsigned int *random_seq;
 #endif
 
 	struct kmem_cache_node *node[MAX_NUMNODES];
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index d1faa019c02a..5624c1f3eb0a 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -99,6 +99,11 @@ struct kmem_cache {
 	 */
 	int remote_node_defrag_ratio;
 #endif
+
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+	unsigned int *random_seq;
+#endif
+
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 4018b48f2b3b..a0596ca0e80a 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -36,6 +36,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
 {
 	switch (sk->sk_family) {
 	case AF_INET:
+		if (sk->sk_type == SOCK_RAW)
+			return SKNLGRP_NONE;
+
 		switch (sk->sk_protocol) {
 		case IPPROTO_TCP:
 			return SKNLGRP_INET_TCP_DESTROY;
@@ -45,6 +48,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
 			return SKNLGRP_NONE;
 		}
 	case AF_INET6:
+		if (sk->sk_type == SOCK_RAW)
+			return SKNLGRP_NONE;
+
 		switch (sk->sk_protocol) {
 		case IPPROTO_TCP:
 			return SKNLGRP_INET6_TCP_DESTROY;
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 8b3ac0d718eb..0d9848de677d 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -6,6 +6,7 @@
 #endif
 
 #include <asm/processor.h>	/* for cpu_relax() */
+#include <asm/barrier.h>
 
 /*
  * include/linux/spinlock_up.h - UP-debug version of spinlocks.
@@ -25,6 +26,11 @@
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define arch_spin_is_locked(x)		((x)->slock == 0)
 
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, VAL);
+}
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	lock->slock = 0;
@@ -67,6 +73,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
+#define arch_spin_unlock_wait(lock)	do { barrier(); (void)(lock); } while (0)
 /* for sched/core.c and kernel_lock.c: */
 # define arch_spin_lock(lock)		do { barrier(); (void)(lock); } while (0)
 # define arch_spin_lock_flags(lock, flags)	do { barrier(); (void)(lock); } while (0)
@@ -79,7 +86,4 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 #define arch_read_can_lock(lock)	(((void)(lock), 1))
 #define arch_write_can_lock(lock)	(((void)(lock), 1))
 
-#define arch_spin_unlock_wait(lock) \
-		do { cpu_relax(); } while (arch_spin_is_locked(lock))
-
 #endif /* __LINUX_SPINLOCK_UP_H */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index ffdaca9c01af..705840e0438f 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -135,9 +135,12 @@ struct plat_stmmacenet_data {
 	void (*bus_setup)(void __iomem *ioaddr);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
+	void (*suspend)(struct platform_device *pdev, void *priv);
+	void (*resume)(struct platform_device *pdev, void *priv);
 	void *bsp_priv;
 	struct stmmac_axi *axi;
 	int has_gmac4;
 	bool tso_en;
+	int mac_port_sel_speed;
 };
 #endif
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 8b6ec7ef0854..7693e39b14fe 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -18,12 +18,11 @@ static inline void pm_set_vt_switch(int do_switch)
 #endif
 
 #ifdef CONFIG_VT_CONSOLE_SLEEP
-extern int pm_prepare_console(void);
+extern void pm_prepare_console(void);
 extern void pm_restore_console(void);
 #else
-static inline int pm_prepare_console(void)
+static inline void pm_prepare_console(void)
 {
-	return 0;
 }
 
 static inline void pm_restore_console(void)
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 0a0d56834c8e..f2293028ab9d 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -71,7 +71,7 @@ struct st_proto_s {
 	enum proto_type type;
 	long (*recv) (void *, struct sk_buff *);
 	unsigned char (*match_packet) (const unsigned char *data);
-	void (*reg_complete_cb) (void *, char data);
+	void (*reg_complete_cb) (void *, int data);
 	long (*write) (struct sk_buff *skb);
 	void *priv_data;
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 297f09f23896..4cea09d94208 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -205,7 +205,20 @@ struct tm {
 	int tm_yday;
 };
 
-void time_to_tm(time_t totalsecs, int offset, struct tm *result);
+void time64_to_tm(time64_t totalsecs, int offset, struct tm *result);
+
+/**
+ * time_to_tm - converts the calendar time to local broken-down time
+ *
+ * @totalsecs	the number of seconds elapsed since 00:00:00 on January 1, 1970,
+ *		Coordinated Universal Time (UTC).
+ * @offset	offset seconds adding to totalsecs.
+ * @result	pointer to struct tm variable to receive broken-down time
+ */
+static inline void time_to_tm(time_t totalsecs, int offset, struct tm *result)
+{
+	time64_to_tm(totalsecs, offset, result);
+}
 
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 20ac746f3eb3..4419506b564e 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -19,7 +19,6 @@ struct timer_list {
 	void			(*function)(unsigned long);
 	unsigned long		data;
 	u32			flags;
-	int			slack;
 
 #ifdef CONFIG_TIMER_STATS
 	int			start_pid;
@@ -58,11 +57,14 @@ struct timer_list {
  * workqueue locking issues. It's not meant for executing random crap
  * with interrupts disabled. Abuse is monitored!
  */
-#define TIMER_CPUMASK		0x0007FFFF
-#define TIMER_MIGRATING		0x00080000
+#define TIMER_CPUMASK		0x0003FFFF
+#define TIMER_MIGRATING		0x00040000
 #define TIMER_BASEMASK		(TIMER_CPUMASK | TIMER_MIGRATING)
-#define TIMER_DEFERRABLE	0x00100000
+#define TIMER_DEFERRABLE	0x00080000
+#define TIMER_PINNED		0x00100000
 #define TIMER_IRQSAFE		0x00200000
+#define TIMER_ARRAYSHIFT	22
+#define TIMER_ARRAYMASK		0xFFC00000
 
 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
 		.entry = { .next = TIMER_ENTRY_STATIC },	\
@@ -70,7 +72,6 @@ struct timer_list {
 		.expires = (_expires),				\
 		.data = (_data),				\
 		.flags = (_flags),				\
-		.slack = -1,					\
 		__TIMER_LOCKDEP_MAP_INITIALIZER(		\
 			__FILE__ ":" __stringify(__LINE__))	\
 	}
@@ -78,9 +79,15 @@ struct timer_list {
 #define TIMER_INITIALIZER(_function, _expires, _data)		\
 	__TIMER_INITIALIZER((_function), (_expires), (_data), 0)
 
+#define TIMER_PINNED_INITIALIZER(_function, _expires, _data)	\
+	__TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_PINNED)
+
 #define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data)	\
 	__TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE)
 
+#define TIMER_PINNED_DEFERRED_INITIALIZER(_function, _expires, _data)	\
+	__TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE | TIMER_PINNED)
+
 #define DEFINE_TIMER(_name, _function, _expires, _data)		\
 	struct timer_list _name =				\
 		TIMER_INITIALIZER(_function, _expires, _data)
@@ -124,8 +131,12 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
 
 #define init_timer(timer)						\
 	__init_timer((timer), 0)
+#define init_timer_pinned(timer)					\
+	__init_timer((timer), TIMER_PINNED)
 #define init_timer_deferrable(timer)					\
 	__init_timer((timer), TIMER_DEFERRABLE)
+#define init_timer_pinned_deferrable(timer)				\
+	__init_timer((timer), TIMER_DEFERRABLE | TIMER_PINNED)
 #define init_timer_on_stack(timer)					\
 	__init_timer_on_stack((timer), 0)
 
@@ -145,12 +156,20 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
 
 #define setup_timer(timer, fn, data)					\
 	__setup_timer((timer), (fn), (data), 0)
+#define setup_pinned_timer(timer, fn, data)				\
+	__setup_timer((timer), (fn), (data), TIMER_PINNED)
 #define setup_deferrable_timer(timer, fn, data)				\
 	__setup_timer((timer), (fn), (data), TIMER_DEFERRABLE)
+#define setup_pinned_deferrable_timer(timer, fn, data)			\
+	__setup_timer((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED)
 #define setup_timer_on_stack(timer, fn, data)				\
 	__setup_timer_on_stack((timer), (fn), (data), 0)
+#define setup_pinned_timer_on_stack(timer, fn, data)			\
+	__setup_timer_on_stack((timer), (fn), (data), TIMER_PINNED)
 #define setup_deferrable_timer_on_stack(timer, fn, data)		\
 	__setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE)
+#define setup_pinned_deferrable_timer_on_stack(timer, fn, data)		\
+	__setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED)
 
 /**
  * timer_pending - is a timer pending?
@@ -171,12 +190,7 @@ extern void add_timer_on(struct timer_list *timer, int cpu);
 extern int del_timer(struct timer_list * timer);
 extern int mod_timer(struct timer_list *timer, unsigned long expires);
 extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
-extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
-
-extern void set_timer_slack(struct timer_list *time, int slack_hz);
 
-#define TIMER_NOT_PINNED	0
-#define TIMER_PINNED		1
 /*
  * The jiffies value which is added to now, when there is no timer
  * in the timer wheel:
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 7759fc3c622d..6685a73736a2 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -50,6 +50,10 @@
 	do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); } while (0)
 
 /* Definitions for online/offline exerciser. */
+bool torture_offline(int cpu, long *n_onl_attempts, long *n_onl_successes,
+		     unsigned long *sum_offl, int *min_onl, int *max_onl);
+bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
+		    unsigned long *sum_onl, int *min_onl, int *max_onl);
 int torture_onoff_init(long ooholdoff, long oointerval);
 void torture_onoff_stats(void);
 bool torture_onoff_failures(void);
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 966889a20ea3..e479033bd782 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -180,11 +180,11 @@ struct ehci_regs {
  * PORTSCx
  */
 	/* HOSTPC: offset 0x84 */
-	u32		hostpc[1];	/* HOSTPC extension */
+	u32		hostpc[0];	/* HOSTPC extension */
 #define HOSTPC_PHCD	(1<<22)		/* Phy clock disable */
 #define HOSTPC_PSPD	(3<<25)		/* Port speed detection */
 
-	u32		reserved5[16];
+	u32		reserved5[17];
 
 	/* USBMODE_EX: offset 0xc8 */
 	u32		usbmode_ex;	/* USB Device mode extension */
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index fefe8b06a63d..612dbdfa388e 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -25,6 +25,8 @@
 #include <linux/workqueue.h>
 #include <linux/usb/ch9.h>
 
+#define UDC_TRACE_STR_MAX	512
+
 struct usb_ep;
 
 /**
@@ -228,307 +230,49 @@ struct usb_ep {
 
 /*-------------------------------------------------------------------------*/
 
-/**
- * usb_ep_set_maxpacket_limit - set maximum packet size limit for endpoint
- * @ep:the endpoint being configured
- * @maxpacket_limit:value of maximum packet size limit
- *
- * This function should be used only in UDC drivers to initialize endpoint
- * (usually in probe function).
- */
+#if IS_ENABLED(CONFIG_USB_GADGET)
+void usb_ep_set_maxpacket_limit(struct usb_ep *ep, unsigned maxpacket_limit);
+int usb_ep_enable(struct usb_ep *ep);
+int usb_ep_disable(struct usb_ep *ep);
+struct usb_request *usb_ep_alloc_request(struct usb_ep *ep, gfp_t gfp_flags);
+void usb_ep_free_request(struct usb_ep *ep, struct usb_request *req);
+int usb_ep_queue(struct usb_ep *ep, struct usb_request *req, gfp_t gfp_flags);
+int usb_ep_dequeue(struct usb_ep *ep, struct usb_request *req);
+int usb_ep_set_halt(struct usb_ep *ep);
+int usb_ep_clear_halt(struct usb_ep *ep);
+int usb_ep_set_wedge(struct usb_ep *ep);
+int usb_ep_fifo_status(struct usb_ep *ep);
+void usb_ep_fifo_flush(struct usb_ep *ep);
+#else
 static inline void usb_ep_set_maxpacket_limit(struct usb_ep *ep,
-					      unsigned maxpacket_limit)
-{
-	ep->maxpacket_limit = maxpacket_limit;
-	ep->maxpacket = maxpacket_limit;
-}
-
-/**
- * usb_ep_enable - configure endpoint, making it usable
- * @ep:the endpoint being configured.  may not be the endpoint named "ep0".
- *	drivers discover endpoints through the ep_list of a usb_gadget.
- *
- * When configurations are set, or when interface settings change, the driver
- * will enable or disable the relevant endpoints.  while it is enabled, an
- * endpoint may be used for i/o until the driver receives a disconnect() from
- * the host or until the endpoint is disabled.
- *
- * the ep0 implementation (which calls this routine) must ensure that the
- * hardware capabilities of each endpoint match the descriptor provided
- * for it.  for example, an endpoint named "ep2in-bulk" would be usable
- * for interrupt transfers as well as bulk, but it likely couldn't be used
- * for iso transfers or for endpoint 14.  some endpoints are fully
- * configurable, with more generic names like "ep-a".  (remember that for
- * USB, "in" means "towards the USB master".)
- *
- * returns zero, or a negative error code.
- */
+		unsigned maxpacket_limit)
+{ }
 static inline int usb_ep_enable(struct usb_ep *ep)
-{
-	int ret;
-
-	if (ep->enabled)
-		return 0;
-
-	ret = ep->ops->enable(ep, ep->desc);
-	if (ret)
-		return ret;
-
-	ep->enabled = true;
-
-	return 0;
-}
-
-/**
- * usb_ep_disable - endpoint is no longer usable
- * @ep:the endpoint being unconfigured.  may not be the endpoint named "ep0".
- *
- * no other task may be using this endpoint when this is called.
- * any pending and uncompleted requests will complete with status
- * indicating disconnect (-ESHUTDOWN) before this call returns.
- * gadget drivers must call usb_ep_enable() again before queueing
- * requests to the endpoint.
- *
- * returns zero, or a negative error code.
- */
+{ return 0; }
 static inline int usb_ep_disable(struct usb_ep *ep)
-{
-	int ret;
-
-	if (!ep->enabled)
-		return 0;
-
-	ret = ep->ops->disable(ep);
-	if (ret)
-		return ret;
-
-	ep->enabled = false;
-
-	return 0;
-}
-
-/**
- * usb_ep_alloc_request - allocate a request object to use with this endpoint
- * @ep:the endpoint to be used with with the request
- * @gfp_flags:GFP_* flags to use
- *
- * Request objects must be allocated with this call, since they normally
- * need controller-specific setup and may even need endpoint-specific
- * resources such as allocation of DMA descriptors.
- * Requests may be submitted with usb_ep_queue(), and receive a single
- * completion callback.  Free requests with usb_ep_free_request(), when
- * they are no longer needed.
- *
- * Returns the request, or null if one could not be allocated.
- */
+{ return 0; }
 static inline struct usb_request *usb_ep_alloc_request(struct usb_ep *ep,
-						       gfp_t gfp_flags)
-{
-	return ep->ops->alloc_request(ep, gfp_flags);
-}
-
-/**
- * usb_ep_free_request - frees a request object
- * @ep:the endpoint associated with the request
- * @req:the request being freed
- *
- * Reverses the effect of usb_ep_alloc_request().
- * Caller guarantees the request is not queued, and that it will
- * no longer be requeued (or otherwise used).
- */
+		gfp_t gfp_flags)
+{ return NULL; }
 static inline void usb_ep_free_request(struct usb_ep *ep,
-				       struct usb_request *req)
-{
-	ep->ops->free_request(ep, req);
-}
-
-/**
- * usb_ep_queue - queues (submits) an I/O request to an endpoint.
- * @ep:the endpoint associated with the request
- * @req:the request being submitted
- * @gfp_flags: GFP_* flags to use in case the lower level driver couldn't
- *	pre-allocate all necessary memory with the request.
- *
- * This tells the device controller to perform the specified request through
- * that endpoint (reading or writing a buffer).  When the request completes,
- * including being canceled by usb_ep_dequeue(), the request's completion
- * routine is called to return the request to the driver.  Any endpoint
- * (except control endpoints like ep0) may have more than one transfer
- * request queued; they complete in FIFO order.  Once a gadget driver
- * submits a request, that request may not be examined or modified until it
- * is given back to that driver through the completion callback.
- *
- * Each request is turned into one or more packets.  The controller driver
- * never merges adjacent requests into the same packet.  OUT transfers
- * will sometimes use data that's already buffered in the hardware.
- * Drivers can rely on the fact that the first byte of the request's buffer
- * always corresponds to the first byte of some USB packet, for both
- * IN and OUT transfers.
- *
- * Bulk endpoints can queue any amount of data; the transfer is packetized
- * automatically.  The last packet will be short if the request doesn't fill it
- * out completely.  Zero length packets (ZLPs) should be avoided in portable
- * protocols since not all usb hardware can successfully handle zero length
- * packets.  (ZLPs may be explicitly written, and may be implicitly written if
- * the request 'zero' flag is set.)  Bulk endpoints may also be used
- * for interrupt transfers; but the reverse is not true, and some endpoints
- * won't support every interrupt transfer.  (Such as 768 byte packets.)
- *
- * Interrupt-only endpoints are less functional than bulk endpoints, for
- * example by not supporting queueing or not handling buffers that are
- * larger than the endpoint's maxpacket size.  They may also treat data
- * toggle differently.
- *
- * Control endpoints ... after getting a setup() callback, the driver queues
- * one response (even if it would be zero length).  That enables the
- * status ack, after transferring data as specified in the response.  Setup
- * functions may return negative error codes to generate protocol stalls.
- * (Note that some USB device controllers disallow protocol stall responses
- * in some cases.)  When control responses are deferred (the response is
- * written after the setup callback returns), then usb_ep_set_halt() may be
- * used on ep0 to trigger protocol stalls.  Depending on the controller,
- * it may not be possible to trigger a status-stage protocol stall when the
- * data stage is over, that is, from within the response's completion
- * routine.
- *
- * For periodic endpoints, like interrupt or isochronous ones, the usb host
- * arranges to poll once per interval, and the gadget driver usually will
- * have queued some data to transfer at that time.
- *
- * Returns zero, or a negative error code.  Endpoints that are not enabled
- * report errors; errors will also be
- * reported when the usb peripheral is disconnected.
- */
-static inline int usb_ep_queue(struct usb_ep *ep,
-			       struct usb_request *req, gfp_t gfp_flags)
-{
-	if (WARN_ON_ONCE(!ep->enabled && ep->address))
-		return -ESHUTDOWN;
-
-	return ep->ops->queue(ep, req, gfp_flags);
-}
-
-/**
- * usb_ep_dequeue - dequeues (cancels, unlinks) an I/O request from an endpoint
- * @ep:the endpoint associated with the request
- * @req:the request being canceled
- *
- * If the request is still active on the endpoint, it is dequeued and its
- * completion routine is called (with status -ECONNRESET); else a negative
- * error code is returned. This is guaranteed to happen before the call to
- * usb_ep_dequeue() returns.
- *
- * Note that some hardware can't clear out write fifos (to unlink the request
- * at the head of the queue) except as part of disconnecting from usb. Such
- * restrictions prevent drivers from supporting configuration changes,
- * even to configuration zero (a "chapter 9" requirement).
- */
+		struct usb_request *req)
+{ }
+static inline int usb_ep_queue(struct usb_ep *ep, struct usb_request *req,
+		gfp_t gfp_flags)
+{ return 0; }
 static inline int usb_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
-{
-	return ep->ops->dequeue(ep, req);
-}
-
-/**
- * usb_ep_set_halt - sets the endpoint halt feature.
- * @ep: the non-isochronous endpoint being stalled
- *
- * Use this to stall an endpoint, perhaps as an error report.
- * Except for control endpoints,
- * the endpoint stays halted (will not stream any data) until the host
- * clears this feature; drivers may need to empty the endpoint's request
- * queue first, to make sure no inappropriate transfers happen.
- *
- * Note that while an endpoint CLEAR_FEATURE will be invisible to the
- * gadget driver, a SET_INTERFACE will not be.  To reset endpoints for the
- * current altsetting, see usb_ep_clear_halt().  When switching altsettings,
- * it's simplest to use usb_ep_enable() or usb_ep_disable() for the endpoints.
- *
- * Returns zero, or a negative error code.  On success, this call sets
- * underlying hardware state that blocks data transfers.
- * Attempts to halt IN endpoints will fail (returning -EAGAIN) if any
- * transfer requests are still queued, or if the controller hardware
- * (usually a FIFO) still holds bytes that the host hasn't collected.
- */
+{ return 0; }
 static inline int usb_ep_set_halt(struct usb_ep *ep)
-{
-	return ep->ops->set_halt(ep, 1);
-}
-
-/**
- * usb_ep_clear_halt - clears endpoint halt, and resets toggle
- * @ep:the bulk or interrupt endpoint being reset
- *
- * Use this when responding to the standard usb "set interface" request,
- * for endpoints that aren't reconfigured, after clearing any other state
- * in the endpoint's i/o queue.
- *
- * Returns zero, or a negative error code.  On success, this call clears
- * the underlying hardware state reflecting endpoint halt and data toggle.
- * Note that some hardware can't support this request (like pxa2xx_udc),
- * and accordingly can't correctly implement interface altsettings.
- */
+{ return 0; }
 static inline int usb_ep_clear_halt(struct usb_ep *ep)
-{
-	return ep->ops->set_halt(ep, 0);
-}
-
-/**
- * usb_ep_set_wedge - sets the halt feature and ignores clear requests
- * @ep: the endpoint being wedged
- *
- * Use this to stall an endpoint and ignore CLEAR_FEATURE(HALT_ENDPOINT)
- * requests. If the gadget driver clears the halt status, it will
- * automatically unwedge the endpoint.
- *
- * Returns zero on success, else negative errno.
- */
-static inline int
-usb_ep_set_wedge(struct usb_ep *ep)
-{
-	if (ep->ops->set_wedge)
-		return ep->ops->set_wedge(ep);
-	else
-		return ep->ops->set_halt(ep, 1);
-}
-
-/**
- * usb_ep_fifo_status - returns number of bytes in fifo, or error
- * @ep: the endpoint whose fifo status is being checked.
- *
- * FIFO endpoints may have "unclaimed data" in them in certain cases,
- * such as after aborted transfers.  Hosts may not have collected all
- * the IN data written by the gadget driver (and reported by a request
- * completion).  The gadget driver may not have collected all the data
- * written OUT to it by the host.  Drivers that need precise handling for
- * fault reporting or recovery may need to use this call.
- *
- * This returns the number of such bytes in the fifo, or a negative
- * errno if the endpoint doesn't use a FIFO or doesn't support such
- * precise handling.
- */
+{ return 0; }
+static inline int usb_ep_set_wedge(struct usb_ep *ep)
+{ return 0; }
 static inline int usb_ep_fifo_status(struct usb_ep *ep)
-{
-	if (ep->ops->fifo_status)
-		return ep->ops->fifo_status(ep);
-	else
-		return -EOPNOTSUPP;
-}
-
-/**
- * usb_ep_fifo_flush - flushes contents of a fifo
- * @ep: the endpoint whose fifo is being flushed.
- *
- * This call may be used to flush the "unclaimed data" that may exist in
- * an endpoint fifo after abnormal transaction terminations.  The call
- * must never be used except when endpoint is not being used for any
- * protocol translation.
- */
+{ return 0; }
 static inline void usb_ep_fifo_flush(struct usb_ep *ep)
-{
-	if (ep->ops->fifo_flush)
-		ep->ops->fifo_flush(ep);
-}
-
+{ }
+#endif /* USB_GADGET */
 
 /*-------------------------------------------------------------------------*/
 
@@ -582,6 +326,7 @@ struct usb_gadget_ops {
  * @dev: Driver model state for this abstract device.
  * @out_epnum: last used out ep number
  * @in_epnum: last used in ep number
+ * @mA: last set mA value
  * @otg_caps: OTG capabilities of this gadget.
  * @sg_supported: true if we can handle scatter-gather
  * @is_otg: True if the USB device port uses a Mini-AB jack, so that the
@@ -638,6 +383,7 @@ struct usb_gadget {
 	struct device			dev;
 	unsigned			out_epnum;
 	unsigned			in_epnum;
+	unsigned			mA;
 	struct usb_otg_caps		*otg_caps;
 
 	unsigned			sg_supported:1;
@@ -760,251 +506,44 @@ static inline int gadget_is_otg(struct usb_gadget *g)
 #endif
 }
 
-/**
- * usb_gadget_frame_number - returns the current frame number
- * @gadget: controller that reports the frame number
- *
- * Returns the usb frame number, normally eleven bits from a SOF packet,
- * or negative errno if this device doesn't support this capability.
- */
-static inline int usb_gadget_frame_number(struct usb_gadget *gadget)
-{
-	return gadget->ops->get_frame(gadget);
-}
+/*-------------------------------------------------------------------------*/
 
-/**
- * usb_gadget_wakeup - tries to wake up the host connected to this gadget
- * @gadget: controller used to wake up the host
- *
- * Returns zero on success, else negative error code if the hardware
- * doesn't support such attempts, or its support has not been enabled
- * by the usb host.  Drivers must return device descriptors that report
- * their ability to support this, or hosts won't enable it.
- *
- * This may also try to use SRP to wake the host and start enumeration,
- * even if OTG isn't otherwise in use.  OTG devices may also start
- * remote wakeup even when hosts don't explicitly enable it.
- */
+#if IS_ENABLED(CONFIG_USB_GADGET)
+int usb_gadget_frame_number(struct usb_gadget *gadget);
+int usb_gadget_wakeup(struct usb_gadget *gadget);
+int usb_gadget_set_selfpowered(struct usb_gadget *gadget);
+int usb_gadget_clear_selfpowered(struct usb_gadget *gadget);
+int usb_gadget_vbus_connect(struct usb_gadget *gadget);
+int usb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA);
+int usb_gadget_vbus_disconnect(struct usb_gadget *gadget);
+int usb_gadget_connect(struct usb_gadget *gadget);
+int usb_gadget_disconnect(struct usb_gadget *gadget);
+int usb_gadget_deactivate(struct usb_gadget *gadget);
+int usb_gadget_activate(struct usb_gadget *gadget);
+#else
+static inline int usb_gadget_frame_number(struct usb_gadget *gadget)
+{ return 0; }
 static inline int usb_gadget_wakeup(struct usb_gadget *gadget)
-{
-	if (!gadget->ops->wakeup)
-		return -EOPNOTSUPP;
-	return gadget->ops->wakeup(gadget);
-}
-
-/**
- * usb_gadget_set_selfpowered - sets the device selfpowered feature.
- * @gadget:the device being declared as self-powered
- *
- * this affects the device status reported by the hardware driver
- * to reflect that it now has a local power supply.
- *
- * returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget)
-{
-	if (!gadget->ops->set_selfpowered)
-		return -EOPNOTSUPP;
-	return gadget->ops->set_selfpowered(gadget, 1);
-}
-
-/**
- * usb_gadget_clear_selfpowered - clear the device selfpowered feature.
- * @gadget:the device being declared as bus-powered
- *
- * this affects the device status reported by the hardware driver.
- * some hardware may not support bus-powered operation, in which
- * case this feature's value can never change.
- *
- * returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget)
-{
-	if (!gadget->ops->set_selfpowered)
-		return -EOPNOTSUPP;
-	return gadget->ops->set_selfpowered(gadget, 0);
-}
-
-/**
- * usb_gadget_vbus_connect - Notify controller that VBUS is powered
- * @gadget:The device which now has VBUS power.
- * Context: can sleep
- *
- * This call is used by a driver for an external transceiver (or GPIO)
- * that detects a VBUS power session starting.  Common responses include
- * resuming the controller, activating the D+ (or D-) pullup to let the
- * host detect that a USB device is attached, and starting to draw power
- * (8mA or possibly more, especially after SET_CONFIGURATION).
- *
- * Returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_vbus_connect(struct usb_gadget *gadget)
-{
-	if (!gadget->ops->vbus_session)
-		return -EOPNOTSUPP;
-	return gadget->ops->vbus_session(gadget, 1);
-}
-
-/**
- * usb_gadget_vbus_draw - constrain controller's VBUS power usage
- * @gadget:The device whose VBUS usage is being described
- * @mA:How much current to draw, in milliAmperes.  This should be twice
- *	the value listed in the configuration descriptor bMaxPower field.
- *
- * This call is used by gadget drivers during SET_CONFIGURATION calls,
- * reporting how much power the device may consume.  For example, this
- * could affect how quickly batteries are recharged.
- *
- * Returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
-{
-	if (!gadget->ops->vbus_draw)
-		return -EOPNOTSUPP;
-	return gadget->ops->vbus_draw(gadget, mA);
-}
-
-/**
- * usb_gadget_vbus_disconnect - notify controller about VBUS session end
- * @gadget:the device whose VBUS supply is being described
- * Context: can sleep
- *
- * This call is used by a driver for an external transceiver (or GPIO)
- * that detects a VBUS power session ending.  Common responses include
- * reversing everything done in usb_gadget_vbus_connect().
- *
- * Returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_vbus_disconnect(struct usb_gadget *gadget)
-{
-	if (!gadget->ops->vbus_session)
-		return -EOPNOTSUPP;
-	return gadget->ops->vbus_session(gadget, 0);
-}
-
-/**
- * usb_gadget_connect - software-controlled connect to USB host
- * @gadget:the peripheral being connected
- *
- * Enables the D+ (or potentially D-) pullup.  The host will start
- * enumerating this gadget when the pullup is active and a VBUS session
- * is active (the link is powered).  This pullup is always enabled unless
- * usb_gadget_disconnect() has been used to disable it.
- *
- * Returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_connect(struct usb_gadget *gadget)
-{
-	int ret;
-
-	if (!gadget->ops->pullup)
-		return -EOPNOTSUPP;
-
-	if (gadget->deactivated) {
-		/*
-		 * If gadget is deactivated we only save new state.
-		 * Gadget will be connected automatically after activation.
-		 */
-		gadget->connected = true;
-		return 0;
-	}
-
-	ret = gadget->ops->pullup(gadget, 1);
-	if (!ret)
-		gadget->connected = 1;
-	return ret;
-}
-
-/**
- * usb_gadget_disconnect - software-controlled disconnect from USB host
- * @gadget:the peripheral being disconnected
- *
- * Disables the D+ (or potentially D-) pullup, which the host may see
- * as a disconnect (when a VBUS session is active).  Not all systems
- * support software pullup controls.
- *
- * Returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_disconnect(struct usb_gadget *gadget)
-{
-	int ret;
-
-	if (!gadget->ops->pullup)
-		return -EOPNOTSUPP;
-
-	if (gadget->deactivated) {
-		/*
-		 * If gadget is deactivated we only save new state.
-		 * Gadget will stay disconnected after activation.
-		 */
-		gadget->connected = false;
-		return 0;
-	}
-
-	ret = gadget->ops->pullup(gadget, 0);
-	if (!ret)
-		gadget->connected = 0;
-	return ret;
-}
-
-/**
- * usb_gadget_deactivate - deactivate function which is not ready to work
- * @gadget: the peripheral being deactivated
- *
- * This routine may be used during the gadget driver bind() call to prevent
- * the peripheral from ever being visible to the USB host, unless later
- * usb_gadget_activate() is called.  For example, user mode components may
- * need to be activated before the system can talk to hosts.
- *
- * Returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_deactivate(struct usb_gadget *gadget)
-{
-	int ret;
-
-	if (gadget->deactivated)
-		return 0;
-
-	if (gadget->connected) {
-		ret = usb_gadget_disconnect(gadget);
-		if (ret)
-			return ret;
-		/*
-		 * If gadget was being connected before deactivation, we want
-		 * to reconnect it in usb_gadget_activate().
-		 */
-		gadget->connected = true;
-	}
-	gadget->deactivated = true;
-
-	return 0;
-}
-
-/**
- * usb_gadget_activate - activate function which is not ready to work
- * @gadget: the peripheral being activated
- *
- * This routine activates gadget which was previously deactivated with
- * usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed.
- *
- * Returns zero on success, else negative errno.
- */
+{ return 0; }
 static inline int usb_gadget_activate(struct usb_gadget *gadget)
-{
-	if (!gadget->deactivated)
-		return 0;
-
-	gadget->deactivated = false;
-
-	/*
-	 * If gadget has been connected before deactivation, or became connected
-	 * while it was being deactivated, we call usb_gadget_connect().
-	 */
-	if (gadget->connected)
-		return usb_gadget_connect(gadget);
-
-	return 0;
-}
+{ return 0; }
+#endif /* CONFIG_USB_GADGET */
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
deleted file mode 100644
index 8c8f6854c993..000000000000
--- a/include/linux/usb/msm_hsusb.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* linux/include/asm-arm/arch-msm/hsusb.h
- *
- * Copyright (C) 2008 Google, Inc.
- * Author: Brian Swetland <swetland@google.com>
- * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __ASM_ARCH_MSM_HSUSB_H
-#define __ASM_ARCH_MSM_HSUSB_H
-
-#include <linux/extcon.h>
-#include <linux/types.h>
-#include <linux/usb/otg.h>
-#include <linux/clk.h>
-
-/**
- * OTG control
- *
- * OTG_NO_CONTROL	Id/VBUS notifications not required. Useful in host
- *                      only configuration.
- * OTG_PHY_CONTROL	Id/VBUS notifications comes form USB PHY.
- * OTG_PMIC_CONTROL	Id/VBUS notifications comes from PMIC hardware.
- * OTG_USER_CONTROL	Id/VBUS notifcations comes from User via sysfs.
- *
- */
-enum otg_control_type {
-	OTG_NO_CONTROL = 0,
-	OTG_PHY_CONTROL,
-	OTG_PMIC_CONTROL,
-	OTG_USER_CONTROL,
-};
-
-/**
- * PHY used in
- *
- * INVALID_PHY			Unsupported PHY
- * CI_45NM_INTEGRATED_PHY	Chipidea 45nm integrated PHY
- * SNPS_28NM_INTEGRATED_PHY	Synopsis 28nm integrated PHY
- *
- */
-enum msm_usb_phy_type {
-	INVALID_PHY = 0,
-	CI_45NM_INTEGRATED_PHY,
-	SNPS_28NM_INTEGRATED_PHY,
-};
-
-#define IDEV_CHG_MAX	1500
-#define IUNIT		100
-
-/**
- * Different states involved in USB charger detection.
- *
- * USB_CHG_STATE_UNDEFINED	USB charger is not connected or detection
- *                              process is not yet started.
- * USB_CHG_STATE_WAIT_FOR_DCD	Waiting for Data pins contact.
- * USB_CHG_STATE_DCD_DONE	Data pin contact is detected.
- * USB_CHG_STATE_PRIMARY_DONE	Primary detection is completed (Detects
- *                              between SDP and DCP/CDP).
- * USB_CHG_STATE_SECONDARY_DONE	Secondary detection is completed (Detects
- *                              between DCP and CDP).
- * USB_CHG_STATE_DETECTED	USB charger type is determined.
- *
- */
-enum usb_chg_state {
-	USB_CHG_STATE_UNDEFINED = 0,
-	USB_CHG_STATE_WAIT_FOR_DCD,
-	USB_CHG_STATE_DCD_DONE,
-	USB_CHG_STATE_PRIMARY_DONE,
-	USB_CHG_STATE_SECONDARY_DONE,
-	USB_CHG_STATE_DETECTED,
-};
-
-/**
- * USB charger types
- *
- * USB_INVALID_CHARGER	Invalid USB charger.
- * USB_SDP_CHARGER	Standard downstream port. Refers to a downstream port
- *                      on USB2.0 compliant host/hub.
- * USB_DCP_CHARGER	Dedicated charger port (AC charger/ Wall charger).
- * USB_CDP_CHARGER	Charging downstream port. Enumeration can happen and
- *                      IDEV_CHG_MAX can be drawn irrespective of USB state.
- *
- */
-enum usb_chg_type {
-	USB_INVALID_CHARGER = 0,
-	USB_SDP_CHARGER,
-	USB_DCP_CHARGER,
-	USB_CDP_CHARGER,
-};
-
-/**
- * struct msm_otg_platform_data - platform device data
- *              for msm_otg driver.
- * @phy_init_seq: PHY configuration sequence values. Value of -1 is reserved as
- *              "do not overwrite default vaule at this address".
- * @phy_init_sz: PHY configuration sequence size.
- * @vbus_power: VBUS power on/off routine.
- * @power_budget: VBUS power budget in mA (0 will be treated as 500mA).
- * @mode: Supported mode (OTG/peripheral/host).
- * @otg_control: OTG switch controlled by user/Id pin
- */
-struct msm_otg_platform_data {
-	int *phy_init_seq;
-	int phy_init_sz;
-	void (*vbus_power)(bool on);
-	unsigned power_budget;
-	enum usb_dr_mode mode;
-	enum otg_control_type otg_control;
-	enum msm_usb_phy_type phy_type;
-	void (*setup_gpio)(enum usb_otg_state state);
-};
-
-/**
- * struct msm_usb_cable - structure for exteternal connector cable
- *			  state tracking
- * @nb: hold event notification callback
- * @conn: used for notification registration
- */
-struct msm_usb_cable {
-	struct notifier_block		nb;
-	struct extcon_dev		*extcon;
-};
-
-/**
- * struct msm_otg: OTG driver data. Shared by HCD and DCD.
- * @otg: USB OTG Transceiver structure.
- * @pdata: otg device platform data.
- * @irq: IRQ number assigned for HSUSB controller.
- * @clk: clock struct of usb_hs_clk.
- * @pclk: clock struct of usb_hs_pclk.
- * @core_clk: clock struct of usb_hs_core_clk.
- * @regs: ioremapped register base address.
- * @inputs: OTG state machine inputs(Id, SessValid etc).
- * @sm_work: OTG state machine work.
- * @in_lpm: indicates low power mode (LPM) state.
- * @async_int: Async interrupt arrived.
- * @cur_power: The amount of mA available from downstream port.
- * @chg_work: Charger detection work.
- * @chg_state: The state of charger detection process.
- * @chg_type: The type of charger attached.
- * @dcd_retires: The retry count used to track Data contact
- *               detection process.
- * @manual_pullup: true if VBUS is not routed to USB controller/phy
- *	and controller driver therefore enables pull-up explicitly before
- *	starting controller using usbcmd run/stop bit.
- * @vbus: VBUS signal state trakining, using extcon framework
- * @id: ID signal state trakining, using extcon framework
- * @switch_gpio: Descriptor for GPIO used to control external Dual
- *               SPDT USB Switch.
- * @reboot: Used to inform the driver to route USB D+/D- line to Device
- *	    connector
- */
-struct msm_otg {
-	struct usb_phy phy;
-	struct msm_otg_platform_data *pdata;
-	int irq;
-	struct clk *clk;
-	struct clk *pclk;
-	struct clk *core_clk;
-	void __iomem *regs;
-#define ID		0
-#define B_SESS_VLD	1
-	unsigned long inputs;
-	struct work_struct sm_work;
-	atomic_t in_lpm;
-	int async_int;
-	unsigned cur_power;
-	int phy_number;
-	struct delayed_work chg_work;
-	enum usb_chg_state chg_state;
-	enum usb_chg_type chg_type;
-	u8 dcd_retries;
-	struct regulator *v3p3;
-	struct regulator *v1p8;
-	struct regulator *vddcx;
-
-	struct reset_control *phy_rst;
-	struct reset_control *link_rst;
-	int vdd_levels[3];
-
-	bool manual_pullup;
-
-	struct msm_usb_cable vbus;
-	struct msm_usb_cable id;
-
-	struct gpio_desc *switch_gpio;
-	struct notifier_block reboot;
-};
-
-#endif
diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h
index de3237fce6b2..5ff9032ee1b4 100644
--- a/include/linux/usb/of.h
+++ b/include/linux/usb/of.h
@@ -12,7 +12,7 @@
 #include <linux/usb/phy.h>
 
 #if IS_ENABLED(CONFIG_OF)
-enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *phy_np);
+enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0);
 bool of_usb_host_tpl_support(struct device_node *np);
 int of_usb_update_otg_caps(struct device_node *np,
 			struct usb_otg_caps *otg_caps);
@@ -20,7 +20,7 @@ struct device_node *usb_of_get_child_node(struct device_node *parent,
 			int portnum);
 #else
 static inline enum usb_dr_mode
-of_usb_get_dr_mode_by_phy(struct device_node *phy_np)
+of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0)
 {
 	return USB_DR_MODE_UNKNOWN;
 }
diff --git a/include/linux/usb/xhci_pdriver.h b/include/linux/usb/xhci_pdriver.h
deleted file mode 100644
index 376654b5b0f7..000000000000
--- a/include/linux/usb/xhci_pdriver.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- */
-
-#ifndef __USB_CORE_XHCI_PDRIVER_H
-#define __USB_CORE_XHCI_PDRIVER_H
-
-/**
- * struct usb_xhci_pdata - platform_data for generic xhci platform driver
- *
- * @usb3_lpm_capable:	determines if this xhci platform supports USB3
- *			LPM capability
- *
- */
-struct usb_xhci_pdata {
-	unsigned	usb3_lpm_capable:1;
-};
-
-#endif /* __USB_CORE_XHCI_PDRIVER_H */
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 587480ad41b7..dd66a952e8cd 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -27,8 +27,7 @@
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
-extern int handle_userfault(struct vm_area_struct *vma, unsigned long address,
-			    unsigned int flags, unsigned long reason);
+extern int handle_userfault(struct fault_env *fe, unsigned long reason);
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 			    unsigned long src_start, unsigned long len);
@@ -56,10 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
-static inline int handle_userfault(struct vm_area_struct *vma,
-				   unsigned long address,
-				   unsigned int flags,
-				   unsigned long reason)
+static inline int handle_userfault(struct fault_env *fe, unsigned long reason)
 {
 	return VM_FAULT_SIGBUS;
 }
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
new file mode 100644
index 000000000000..1c912f85e041
--- /dev/null
+++ b/include/linux/virtio_net.h
@@ -0,0 +1,101 @@
+#ifndef _LINUX_VIRTIO_NET_H
+#define _LINUX_VIRTIO_NET_H
+
+#include <linux/if_vlan.h>
+#include <uapi/linux/virtio_net.h>
+
+static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
+					const struct virtio_net_hdr *hdr,
+					bool little_endian)
+{
+	unsigned short gso_type = 0;
+
+	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+		case VIRTIO_NET_HDR_GSO_TCPV4:
+			gso_type = SKB_GSO_TCPV4;
+			break;
+		case VIRTIO_NET_HDR_GSO_TCPV6:
+			gso_type = SKB_GSO_TCPV6;
+			break;
+		case VIRTIO_NET_HDR_GSO_UDP:
+			gso_type = SKB_GSO_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
+			gso_type |= SKB_GSO_TCP_ECN;
+
+		if (hdr->gso_size == 0)
+			return -EINVAL;
+	}
+
+	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		u16 start = __virtio16_to_cpu(little_endian, hdr->csum_start);
+		u16 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
+
+		if (!skb_partial_csum_set(skb, start, off))
+			return -EINVAL;
+	}
+
+	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
+
+		skb_shinfo(skb)->gso_size = gso_size;
+		skb_shinfo(skb)->gso_type = gso_type;
+
+		/* Header must be checked, and gso_segs computed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	return 0;
+}
+
+static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
+					  struct virtio_net_hdr *hdr,
+					  bool little_endian)
+{
+	memset(hdr, 0, sizeof(*hdr));
+
+	if (skb_is_gso(skb)) {
+		struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+		/* This is a hint as to how much should be linear. */
+		hdr->hdr_len = __cpu_to_virtio16(little_endian,
+						 skb_headlen(skb));
+		hdr->gso_size = __cpu_to_virtio16(little_endian,
+						  sinfo->gso_size);
+		if (sinfo->gso_type & SKB_GSO_TCPV4)
+			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+		else if (sinfo->gso_type & SKB_GSO_TCPV6)
+			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+		else if (sinfo->gso_type & SKB_GSO_UDP)
+			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+		else
+			return -EINVAL;
+		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+	} else
+		hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		if (skb_vlan_tag_present(skb))
+			hdr->csum_start = __cpu_to_virtio16(little_endian,
+				skb_checksum_start_offset(skb) + VLAN_HLEN);
+		else
+			hdr->csum_start = __cpu_to_virtio16(little_endian,
+				skb_checksum_start_offset(skb));
+		hdr->csum_offset = __cpu_to_virtio16(little_endian,
+				skb->csum_offset);
+	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+	} /* else everything is zero */
+
+	return 0;
+}
+
+#endif /* _LINUX_VIRTIO_BYTEORDER */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ec084321fe09..42604173f122 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -70,6 +70,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_FAULT_FALLBACK,
 		THP_COLLAPSE_ALLOC,
 		THP_COLLAPSE_ALLOC_FAILED,
+		THP_FILE_ALLOC,
+		THP_FILE_MAPPED,
 		THP_SPLIT_PAGE,
 		THP_SPLIT_PAGE_FAILED,
 		THP_DEFERRED_SPLIT_PAGE,
@@ -100,4 +102,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		NR_VM_EVENT_ITEMS
 };
 
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
+#define THP_FILE_ALLOC ({ BUILD_BUG(); 0; })
+#define THP_FILE_MAPPED ({ BUILD_BUG(); 0; })
+#endif
+
 #endif		/* VM_EVENT_ITEM_H_INCLUDED */
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 8d7634247fb4..6abd24f258bc 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -45,7 +45,7 @@ void poke_blanked_console(void);
 int con_font_op(struct vc_data *vc, struct console_font_op *op);
 int con_set_cmap(unsigned char __user *cmap);
 int con_get_cmap(unsigned char __user *cmap);
-void scrollback(struct vc_data *vc, int lines);
+void scrollback(struct vc_data *vc);
 void scrollfront(struct vc_data *vc, int lines);
 void clear_buffer_attributes(struct vc_data *vc);
 void update_region(struct vc_data *vc, unsigned long start, int count);
@@ -59,14 +59,13 @@ int tioclinux(struct tty_struct *tty, unsigned long arg);
 #ifdef CONFIG_CONSOLE_TRANSLATIONS
 /* consolemap.c */
 
-struct unimapinit;
 struct unipair;
 
 int con_set_trans_old(unsigned char __user * table);
 int con_get_trans_old(unsigned char __user * table);
 int con_set_trans_new(unsigned short __user * table);
 int con_get_trans_new(unsigned short __user * table);
-int con_clear_unimap(struct vc_data *vc, struct unimapinit *ui);
+int con_clear_unimap(struct vc_data *vc);
 int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list);
 int con_get_unimap(struct vc_data *vc, ushort ct, ushort __user *uct, struct unipair __user *list);
 int con_set_default_unimap(struct vc_data *vc);
@@ -92,7 +91,7 @@ static inline int con_get_trans_new(unsigned short __user *table)
 {
 	return -EINVAL;
 }
-static inline int con_clear_unimap(struct vc_data *vc, struct unimapinit *ui)
+static inline int con_clear_unimap(struct vc_data *vc)
 {
 	return 0;
 }
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index fa2196990f84..aa9bfea8804a 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -12,11 +12,9 @@ struct task_struct;
 /*
  * vtime_accounting_cpu_enabled() definitions/declarations
  */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
 static inline bool vtime_accounting_cpu_enabled(void) { return true; }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+#elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
 /*
  * Checks if vtime is enabled on some CPU. Cputime readers want to be careful
  * in that case and compute the tickless cputime.
@@ -37,11 +35,9 @@ static inline bool vtime_accounting_cpu_enabled(void)
 
 	return false;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 static inline bool vtime_accounting_cpu_enabled(void) { return false; }
-#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
+#endif
 
 
 /*
@@ -64,35 +60,15 @@ extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 
-#ifdef __ARCH_HAS_VTIME_ACCOUNT
-extern void vtime_account_irq_enter(struct task_struct *tsk);
-#else
-extern void vtime_common_account_irq_enter(struct task_struct *tsk);
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
-{
-	if (vtime_accounting_cpu_enabled())
-		vtime_common_account_irq_enter(tsk);
-}
-#endif /* __ARCH_HAS_VTIME_ACCOUNT */
-
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
-static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
-
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
-{
-	if (vtime_accounting_cpu_enabled())
-		vtime_gen_account_irq_exit(tsk);
-}
-
 extern void vtime_user_enter(struct task_struct *tsk);
 
 static inline void vtime_user_exit(struct task_struct *tsk)
@@ -103,11 +79,6 @@ extern void vtime_guest_enter(struct task_struct *tsk);
 extern void vtime_guest_exit(struct task_struct *tsk);
 extern void vtime_init_idle(struct task_struct *tsk, int cpu);
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN  */
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
-{
-	/* On hard|softirq exit we always account to hard|softirq cputime */
-	vtime_account_system(tsk);
-}
 static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
 static inline void vtime_guest_enter(struct task_struct *tsk) { }
@@ -115,6 +86,19 @@ static inline void vtime_guest_exit(struct task_struct *tsk) { }
 static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
 #endif
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+extern void vtime_account_irq_enter(struct task_struct *tsk);
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+	/* On hard|softirq exit we always account to hard|softirq cputime */
+	vtime_account_system(tsk);
+}
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
+static inline void vtime_account_irq_exit(struct task_struct *tsk) { }
+#endif
+
+
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 extern void irqtime_account_irq(struct task_struct *tsk);
 #else
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d0b5ca5d4e08..717e6149e753 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -384,4 +384,7 @@ void tag_pages_for_writeback(struct address_space *mapping,
 
 void account_page_redirty(struct page *page);
 
+void sb_mark_inode_writeback(struct inode *inode);
+void sb_clear_inode_writeback(struct inode *inode);
+
 #endif		/* WRITEBACK_H */
diff --git a/include/media/cec-edid.h b/include/media/cec-edid.h
new file mode 100644
index 000000000000..bdf731ecba1a
--- /dev/null
+++ b/include/media/cec-edid.h
@@ -0,0 +1,104 @@
+/*
+ * cec-edid - HDMI Consumer Electronics Control & EDID helpers
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MEDIA_CEC_EDID_H
+#define _MEDIA_CEC_EDID_H
+
+#include <linux/types.h>
+
+#define CEC_PHYS_ADDR_INVALID		0xffff
+#define cec_phys_addr_exp(pa) \
+	((pa) >> 12), ((pa) >> 8) & 0xf, ((pa) >> 4) & 0xf, (pa) & 0xf
+
+/**
+ * cec_get_edid_phys_addr() - find and return the physical address
+ *
+ * @edid:	pointer to the EDID data
+ * @size:	size in bytes of the EDID data
+ * @offset:	If not %NULL then the location of the physical address
+ *		bytes in the EDID will be returned here. This is set to 0
+ *		if there is no physical address found.
+ *
+ * Return: the physical address or CEC_PHYS_ADDR_INVALID if there is none.
+ */
+u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size,
+			   unsigned int *offset);
+
+/**
+ * cec_set_edid_phys_addr() - find and set the physical address
+ *
+ * @edid:	pointer to the EDID data
+ * @size:	size in bytes of the EDID data
+ * @phys_addr:	the new physical address
+ *
+ * This function finds the location of the physical address in the EDID
+ * and fills in the given physical address and updates the checksum
+ * at the end of the EDID block. It does nothing if the EDID doesn't
+ * contain a physical address.
+ */
+void cec_set_edid_phys_addr(u8 *edid, unsigned int size, u16 phys_addr);
+
+/**
+ * cec_phys_addr_for_input() - calculate the PA for an input
+ *
+ * @phys_addr:	the physical address of the parent
+ * @input:	the number of the input port, must be between 1 and 15
+ *
+ * This function calculates a new physical address based on the input
+ * port number. For example:
+ *
+ * PA = 0.0.0.0 and input = 2 becomes 2.0.0.0
+ *
+ * PA = 3.0.0.0 and input = 1 becomes 3.1.0.0
+ *
+ * PA = 3.2.1.0 and input = 5 becomes 3.2.1.5
+ *
+ * PA = 3.2.1.3 and input = 5 becomes f.f.f.f since it maxed out the depth.
+ *
+ * Return: the new physical address or CEC_PHYS_ADDR_INVALID.
+ */
+u16 cec_phys_addr_for_input(u16 phys_addr, u8 input);
+
+/**
+ * cec_phys_addr_validate() - validate a physical address from an EDID
+ *
+ * @phys_addr:	the physical address to validate
+ * @parent:	if not %NULL, then this is filled with the parents PA.
+ * @port:	if not %NULL, then this is filled with the input port.
+ *
+ * This validates a physical address as read from an EDID. If the
+ * PA is invalid (such as 1.0.1.0 since '0' is only allowed at the end),
+ * then it will return -EINVAL.
+ *
+ * The parent PA is passed into %parent and the input port is passed into
+ * %port. For example:
+ *
+ * PA = 0.0.0.0: has parent 0.0.0.0 and input port 0.
+ *
+ * PA = 1.0.0.0: has parent 0.0.0.0 and input port 1.
+ *
+ * PA = 3.2.0.0: has parent 3.0.0.0 and input port 2.
+ *
+ * PA = f.f.f.f: has parent f.f.f.f and input port 0.
+ *
+ * Return: 0 if the PA is valid, -EINVAL if not.
+ */
+int cec_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port);
+
+#endif /* _MEDIA_CEC_EDID_H */
diff --git a/include/media/cec.h b/include/media/cec.h
new file mode 100644
index 000000000000..dc7854b855f3
--- /dev/null
+++ b/include/media/cec.h
@@ -0,0 +1,241 @@
+/*
+ * cec - HDMI Consumer Electronics Control support header
+ *
+ * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MEDIA_CEC_H
+#define _MEDIA_CEC_H
+
+#include <linux/poll.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/kthread.h>
+#include <linux/timer.h>
+#include <linux/cec-funcs.h>
+#include <media/rc-core.h>
+#include <media/cec-edid.h>
+
+/**
+ * struct cec_devnode - cec device node
+ * @dev:	cec device
+ * @cdev:	cec character device
+ * @parent:	parent device
+ * @minor:	device node minor number
+ * @registered:	the device was correctly registered
+ * @unregistered: the device was unregistered
+ * @fhs_lock:	lock to control access to the filehandle list
+ * @fhs:	the list of open filehandles (cec_fh)
+ *
+ * This structure represents a cec-related device node.
+ *
+ * The @parent is a physical device. It must be set by core or device drivers
+ * before registering the node.
+ */
+struct cec_devnode {
+	/* sysfs */
+	struct device dev;
+	struct cdev cdev;
+	struct device *parent;
+
+	/* device info */
+	int minor;
+	bool registered;
+	bool unregistered;
+	struct mutex fhs_lock;
+	struct list_head fhs;
+};
+
+struct cec_adapter;
+struct cec_data;
+
+struct cec_data {
+	struct list_head list;
+	struct list_head xfer_list;
+	struct cec_adapter *adap;
+	struct cec_msg msg;
+	struct cec_fh *fh;
+	struct delayed_work work;
+	struct completion c;
+	u8 attempts;
+	bool new_initiator;
+	bool blocking;
+	bool completed;
+};
+
+struct cec_msg_entry {
+	struct list_head	list;
+	struct cec_msg		msg;
+};
+
+#define CEC_NUM_EVENTS		CEC_EVENT_LOST_MSGS
+
+struct cec_fh {
+	struct list_head	list;
+	struct list_head	xfer_list;
+	struct cec_adapter	*adap;
+	u8			mode_initiator;
+	u8			mode_follower;
+
+	/* Events */
+	wait_queue_head_t	wait;
+	unsigned int		pending_events;
+	struct cec_event	events[CEC_NUM_EVENTS];
+	struct mutex		lock;
+	struct list_head	msgs; /* queued messages */
+	unsigned int		queued_msgs;
+};
+
+#define CEC_SIGNAL_FREE_TIME_RETRY		3
+#define CEC_SIGNAL_FREE_TIME_NEW_INITIATOR	5
+#define CEC_SIGNAL_FREE_TIME_NEXT_XFER		7
+
+/* The nominal data bit period is 2.4 ms */
+#define CEC_FREE_TIME_TO_USEC(ft)		((ft) * 2400)
+
+struct cec_adap_ops {
+	/* Low-level callbacks */
+	int (*adap_enable)(struct cec_adapter *adap, bool enable);
+	int (*adap_monitor_all_enable)(struct cec_adapter *adap, bool enable);
+	int (*adap_log_addr)(struct cec_adapter *adap, u8 logical_addr);
+	int (*adap_transmit)(struct cec_adapter *adap, u8 attempts,
+			     u32 signal_free_time, struct cec_msg *msg);
+	void (*adap_status)(struct cec_adapter *adap, struct seq_file *file);
+
+	/* High-level CEC message callback */
+	int (*received)(struct cec_adapter *adap, struct cec_msg *msg);
+};
+
+/*
+ * The minimum message length you can receive (excepting poll messages) is 2.
+ * With a transfer rate of at most 36 bytes per second this makes 18 messages
+ * per second worst case.
+ *
+ * We queue at most 3 seconds worth of received messages. The CEC specification
+ * requires that messages are replied to within a second, so 3 seconds should
+ * give more than enough margin. Since most messages are actually more than 2
+ * bytes, this is in practice a lot more than 3 seconds.
+ */
+#define CEC_MAX_MSG_RX_QUEUE_SZ		(18 * 3)
+
+/*
+ * The transmit queue is limited to 1 second worth of messages (worst case).
+ * Messages can be transmitted by userspace and kernel space. But for both it
+ * makes no sense to have a lot of messages queued up. One second seems
+ * reasonable.
+ */
+#define CEC_MAX_MSG_TX_QUEUE_SZ		(18 * 1)
+
+struct cec_adapter {
+	struct module *owner;
+	char name[32];
+	struct cec_devnode devnode;
+	struct mutex lock;
+	struct rc_dev *rc;
+
+	struct list_head transmit_queue;
+	unsigned int transmit_queue_sz;
+	struct list_head wait_queue;
+	struct cec_data *transmitting;
+
+	struct task_struct *kthread_config;
+	struct completion config_completion;
+
+	struct task_struct *kthread;
+	wait_queue_head_t kthread_waitq;
+	wait_queue_head_t waitq;
+
+	const struct cec_adap_ops *ops;
+	void *priv;
+	u32 capabilities;
+	u8 available_log_addrs;
+
+	u16 phys_addr;
+	bool is_configuring;
+	bool is_configured;
+	u32 monitor_all_cnt;
+	u32 follower_cnt;
+	struct cec_fh *cec_follower;
+	struct cec_fh *cec_initiator;
+	bool passthrough;
+	struct cec_log_addrs log_addrs;
+
+	struct dentry *cec_dir;
+	struct dentry *status_file;
+
+	u16 phys_addrs[15];
+	u32 sequence;
+
+	char input_name[32];
+	char input_phys[32];
+	char input_drv[32];
+};
+
+static inline bool cec_has_log_addr(const struct cec_adapter *adap, u8 log_addr)
+{
+	return adap->log_addrs.log_addr_mask & (1 << log_addr);
+}
+
+static inline bool cec_is_sink(const struct cec_adapter *adap)
+{
+	return adap->phys_addr == 0;
+}
+
+#if IS_ENABLED(CONFIG_MEDIA_CEC)
+struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
+		void *priv, const char *name, u32 caps, u8 available_las,
+		struct device *parent);
+int cec_register_adapter(struct cec_adapter *adap);
+void cec_unregister_adapter(struct cec_adapter *adap);
+void cec_delete_adapter(struct cec_adapter *adap);
+
+int cec_s_log_addrs(struct cec_adapter *adap, struct cec_log_addrs *log_addrs,
+		    bool block);
+void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr,
+		     bool block);
+int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg,
+		     bool block);
+
+/* Called by the adapter */
+void cec_transmit_done(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt,
+		       u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt);
+void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg);
+
+#else
+
+static inline int cec_register_adapter(struct cec_adapter *adap)
+{
+	return 0;
+}
+
+static inline void cec_unregister_adapter(struct cec_adapter *adap)
+{
+}
+
+static inline void cec_delete_adapter(struct cec_adapter *adap)
+{
+}
+
+static inline void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr,
+				   bool block)
+{
+}
+
+#endif
+
+#endif /* _MEDIA_CEC_H */
diff --git a/include/media/davinci/vpbe_display.h b/include/media/davinci/vpbe_display.h
index e14a9370b67e..12783fd823f8 100644
--- a/include/media/davinci/vpbe_display.h
+++ b/include/media/davinci/vpbe_display.h
@@ -81,8 +81,6 @@ struct vpbe_layer {
 	 * Buffer queue used in video-buf
 	 */
 	struct vb2_queue buffer_queue;
-	/* allocator-specific contexts for each plane */
-	struct vb2_alloc_ctx *alloc_ctx;
 	/* Queue of filled frames */
 	struct list_head dma_queue;
 	/* Used in video-buf */
diff --git a/include/media/i2c/adv7511.h b/include/media/i2c/adv7511.h
index d83b91d80764..61c3d711cc69 100644
--- a/include/media/i2c/adv7511.h
+++ b/include/media/i2c/adv7511.h
@@ -32,11 +32,7 @@ struct adv7511_monitor_detect {
 struct adv7511_edid_detect {
 	int present;
 	int segment;
-};
-
-struct adv7511_cec_arg {
-	void *arg;
-	u32 f_flags;
+	uint16_t phys_addr;
 };
 
 struct adv7511_platform_data {
diff --git a/include/media/i2c/adv7604.h b/include/media/i2c/adv7604.h
index a913859bfd30..2e6857dee0cc 100644
--- a/include/media/i2c/adv7604.h
+++ b/include/media/i2c/adv7604.h
@@ -121,8 +121,6 @@ struct adv76xx_platform_data {
 
 	/* IO register 0x02 */
 	unsigned alt_gamma:1;
-	unsigned op_656_range:1;
-	unsigned alt_data_sat:1;
 
 	/* IO register 0x05 */
 	unsigned blank_data:1;
diff --git a/include/media/i2c/adv7842.h b/include/media/i2c/adv7842.h
index bc249709bf35..7f53ada9bdf1 100644
--- a/include/media/i2c/adv7842.h
+++ b/include/media/i2c/adv7842.h
@@ -165,8 +165,6 @@ struct adv7842_platform_data {
 
 	/* IO register 0x02 */
 	unsigned alt_gamma:1;
-	unsigned op_656_range:1;
-	unsigned alt_data_sat:1;
 
 	/* IO register 0x05 */
 	unsigned blank_data:1;
diff --git a/include/media/media-device.h b/include/media/media-device.h
index a9b33c47310d..f743ae2210ee 100644
--- a/include/media/media-device.h
+++ b/include/media/media-device.h
@@ -347,7 +347,7 @@ struct media_entity_notify {
 struct media_device {
 	/* dev->driver_data points to this struct. */
 	struct device *dev;
-	struct media_devnode devnode;
+	struct media_devnode *devnode;
 
 	char model[32];
 	char driver_name[32];
@@ -393,9 +393,6 @@ struct usb_device;
 #define MEDIA_DEV_NOTIFY_PRE_LINK_CH	0
 #define MEDIA_DEV_NOTIFY_POST_LINK_CH	1
 
-/* media_devnode to media_device */
-#define to_media_device(node) container_of(node, struct media_device, devnode)
-
 /**
  * media_entity_enum_init - Initialise an entity enumeration
  *
diff --git a/include/media/media-devnode.h b/include/media/media-devnode.h
index fe42f08e72bd..37d494805944 100644
--- a/include/media/media-devnode.h
+++ b/include/media/media-devnode.h
@@ -33,6 +33,8 @@
 #include <linux/device.h>
 #include <linux/cdev.h>
 
+struct media_device;
+
 /*
  * Flag to mark the media_devnode struct as registered. Drivers must not touch
  * this flag directly, it will be set and cleared by media_devnode_register and
@@ -67,8 +69,9 @@ struct media_file_operations {
 
 /**
  * struct media_devnode - Media device node
+ * @media_dev:	pointer to struct &media_device
  * @fops:	pointer to struct &media_file_operations with media device ops
- * @dev:	struct device pointer for the media controller device
+ * @dev:	pointer to struct &device containing the media controller device
  * @cdev:	struct cdev pointer character device
  * @parent:	parent device
  * @minor:	device node minor number
@@ -81,6 +84,8 @@ struct media_file_operations {
  * before registering the node.
  */
 struct media_devnode {
+	struct media_device *media_dev;
+
 	/* device ops */
 	const struct media_file_operations *fops;
 
@@ -94,7 +99,7 @@ struct media_devnode {
 	unsigned long flags;		/* Use bitops to access flags */
 
 	/* callbacks */
-	void (*release)(struct media_devnode *mdev);
+	void (*release)(struct media_devnode *devnode);
 };
 
 /* dev to media_devnode */
@@ -103,7 +108,8 @@ struct media_devnode {
 /**
  * media_devnode_register - register a media device node
  *
- * @mdev: media device node structure we want to register
+ * @mdev: struct media_device we want to register a device node
+ * @devnode: media device node structure we want to register
  * @owner: should be filled with %THIS_MODULE
  *
  * The registration code assigns minor numbers and registers the new device node
@@ -116,20 +122,33 @@ struct media_devnode {
  * the media_devnode structure is *not* called, so the caller is responsible for
  * freeing any data.
  */
-int __must_check media_devnode_register(struct media_devnode *mdev,
+int __must_check media_devnode_register(struct media_device *mdev,
+					struct media_devnode *devnode,
 					struct module *owner);
 
+/**
+ * media_devnode_unregister_prepare - clear the media device node register bit
+ * @devnode: the device node to prepare for unregister
+ *
+ * This clears the passed device register bit. Future open calls will be met
+ * with errors. Should be called before media_devnode_unregister() to avoid
+ * races with unregister and device file open calls.
+ *
+ * This function can safely be called if the device node has never been
+ * registered or has already been unregistered.
+ */
+void media_devnode_unregister_prepare(struct media_devnode *devnode);
+
 /**
  * media_devnode_unregister - unregister a media device node
- * @mdev: the device node to unregister
+ * @devnode: the device node to unregister
  *
  * This unregisters the passed device. Future open calls will be met with
  * errors.
  *
- * This function can safely be called if the device node has never been
- * registered or has already been unregistered.
+ * Should be called after media_devnode_unregister_prepare()
  */
-void media_devnode_unregister(struct media_devnode *mdev);
+void media_devnode_unregister(struct media_devnode *devnode);
 
 /**
  * media_devnode_data - returns a pointer to the &media_devnode
@@ -145,11 +164,16 @@ static inline struct media_devnode *media_devnode_data(struct file *filp)
  * media_devnode_is_registered - returns true if &media_devnode is registered;
  *	false otherwise.
  *
- * @mdev: pointer to struct &media_devnode.
+ * @devnode: pointer to struct &media_devnode.
+ *
+ * Note: If mdev is NULL, it also returns false.
  */
-static inline int media_devnode_is_registered(struct media_devnode *mdev)
+static inline int media_devnode_is_registered(struct media_devnode *devnode)
 {
-	return test_bit(MEDIA_FLAG_REGISTERED, &mdev->flags);
+	if (!devnode)
+		return false;
+
+	return test_bit(MEDIA_FLAG_REGISTERED, &devnode->flags);
 }
 
 #endif /* _MEDIA_DEVNODE_H */
diff --git a/include/media/rc-core.h b/include/media/rc-core.h
index b6586a91129c..324232cfc08d 100644
--- a/include/media/rc-core.h
+++ b/include/media/rc-core.h
@@ -119,6 +119,7 @@ enum rc_filter_type {
  * @s_carrier_report: enable carrier reports
  * @s_filter: set the scancode filter
  * @s_wakeup_filter: set the wakeup scancode filter
+ * @s_timeout: set hardware timeout in ns
  */
 struct rc_dev {
 	struct device			dev;
@@ -174,6 +175,8 @@ struct rc_dev {
 						    struct rc_scancode_filter *filter);
 	int				(*s_wakeup_filter)(struct rc_dev *dev,
 							   struct rc_scancode_filter *filter);
+	int				(*s_timeout)(struct rc_dev *dev,
+						     unsigned int timeout);
 };
 
 #define to_rc_dev(d) container_of(d, struct rc_dev, dev)
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 7844e9879497..a459129dd554 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -31,6 +31,7 @@ enum rc_type {
 	RC_TYPE_RC6_MCE		= 16,	/* MCE (Philips RC6-6A-32 subtype) protocol */
 	RC_TYPE_SHARP		= 17,	/* Sharp protocol */
 	RC_TYPE_XMP		= 18,	/* XMP protocol */
+	RC_TYPE_CEC		= 19,	/* CEC protocol */
 };
 
 #define RC_BIT_NONE		0ULL
@@ -53,6 +54,7 @@ enum rc_type {
 #define RC_BIT_RC6_MCE		(1ULL << RC_TYPE_RC6_MCE)
 #define RC_BIT_SHARP		(1ULL << RC_TYPE_SHARP)
 #define RC_BIT_XMP		(1ULL << RC_TYPE_XMP)
+#define RC_BIT_CEC		(1ULL << RC_TYPE_CEC)
 
 #define RC_BIT_ALL	(RC_BIT_UNKNOWN | RC_BIT_OTHER | \
 			 RC_BIT_RC5 | RC_BIT_RC5X | RC_BIT_RC5_SZ | \
@@ -61,7 +63,7 @@ enum rc_type {
 			 RC_BIT_NEC | RC_BIT_SANYO | RC_BIT_MCE_KBD | \
 			 RC_BIT_RC6_0 | RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \
 			 RC_BIT_RC6_6A_32 | RC_BIT_RC6_MCE | RC_BIT_SHARP | \
-			 RC_BIT_XMP)
+			 RC_BIT_XMP | RC_BIT_CEC)
 
 
 #define RC_SCANCODE_UNKNOWN(x)			(x)
@@ -123,6 +125,7 @@ void rc_map_init(void);
 #define RC_MAP_BEHOLD_COLUMBUS           "rc-behold-columbus"
 #define RC_MAP_BEHOLD                    "rc-behold"
 #define RC_MAP_BUDGET_CI_OLD             "rc-budget-ci-old"
+#define RC_MAP_CEC                       "rc-cec"
 #define RC_MAP_CINERGY_1400              "rc-cinergy-1400"
 #define RC_MAP_CINERGY                   "rc-cinergy"
 #define RC_MAP_DELOCK_61959              "rc-delock-61959"
@@ -133,6 +136,7 @@ void rc_map_init(void);
 #define RC_MAP_DM1105_NEC                "rc-dm1105-nec"
 #define RC_MAP_DNTV_LIVE_DVBT_PRO        "rc-dntv-live-dvbt-pro"
 #define RC_MAP_DNTV_LIVE_DVB_T           "rc-dntv-live-dvb-t"
+#define RC_MAP_DTT200U                   "rc-dtt200u"
 #define RC_MAP_DVBSKY                    "rc-dvbsky"
 #define RC_MAP_EMPTY                     "rc-empty"
 #define RC_MAP_EM_TERRATEC               "rc-em-terratec"
diff --git a/include/media/rcar-fcp.h b/include/media/rcar-fcp.h
new file mode 100644
index 000000000000..4c7fc77eaf29
--- /dev/null
+++ b/include/media/rcar-fcp.h
@@ -0,0 +1,37 @@
+/*
+ * rcar-fcp.h  --  R-Car Frame Compression Processor Driver
+ *
+ * Copyright (C) 2016 Renesas Electronics Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __MEDIA_RCAR_FCP_H__
+#define __MEDIA_RCAR_FCP_H__
+
+struct device_node;
+struct rcar_fcp_device;
+
+#if IS_ENABLED(CONFIG_VIDEO_RENESAS_FCP)
+struct rcar_fcp_device *rcar_fcp_get(const struct device_node *np);
+void rcar_fcp_put(struct rcar_fcp_device *fcp);
+int rcar_fcp_enable(struct rcar_fcp_device *fcp);
+void rcar_fcp_disable(struct rcar_fcp_device *fcp);
+#else
+static inline struct rcar_fcp_device *rcar_fcp_get(const struct device_node *np)
+{
+	return ERR_PTR(-ENOENT);
+}
+static inline void rcar_fcp_put(struct rcar_fcp_device *fcp) { }
+static inline int rcar_fcp_enable(struct rcar_fcp_device *fcp)
+{
+	return -ENOSYS;
+}
+static inline void rcar_fcp_disable(struct rcar_fcp_device *fcp) { }
+#endif
+
+#endif /* __MEDIA_RCAR_FCP_H__ */
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 0bc9b35b8f3e..8b59336b2217 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -759,9 +759,9 @@ s32 v4l2_ctrl_g_ctrl(struct v4l2_ctrl *ctrl);
  * @ctrl:	The control.
  * @val:	The new value.
  *
- * This set the control's new value safely by going through the control
- * framework. This function will lock the control's handler, so it cannot be
- * used from within the &v4l2_ctrl_ops functions.
+ * This sets the control's new value safely by going through the control
+ * framework. This function assumes the control's handler is already locked,
+ * allowing it to be used from within the &v4l2_ctrl_ops functions.
  *
  * This function is for integer type controls only.
  */
@@ -771,7 +771,7 @@ int __v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val);
  * @ctrl:	The control.
  * @val:	The new value.
  *
- * This set the control's new value safely by going through the control
+ * This sets the control's new value safely by going through the control
  * framework. This function will lock the control's handler, so it cannot be
  * used from within the &v4l2_ctrl_ops functions.
  *
@@ -807,9 +807,9 @@ s64 v4l2_ctrl_g_ctrl_int64(struct v4l2_ctrl *ctrl);
  * @ctrl:	The control.
  * @val:	The new value.
  *
- * This set the control's new value safely by going through the control
- * framework. This function will lock the control's handler, so it cannot be
- * used from within the &v4l2_ctrl_ops functions.
+ * This sets the control's new value safely by going through the control
+ * framework. This function assumes the control's handler is already locked,
+ * allowing it to be used from within the &v4l2_ctrl_ops functions.
  *
  * This function is for 64-bit integer type controls only.
  */
@@ -821,7 +821,7 @@ int __v4l2_ctrl_s_ctrl_int64(struct v4l2_ctrl *ctrl, s64 val);
  * @ctrl:	The control.
  * @val:	The new value.
  *
- * This set the control's new value safely by going through the control
+ * This sets the control's new value safely by going through the control
  * framework. This function will lock the control's handler, so it cannot be
  * used from within the &v4l2_ctrl_ops functions.
  *
@@ -843,9 +843,9 @@ static inline int v4l2_ctrl_s_ctrl_int64(struct v4l2_ctrl *ctrl, s64 val)
  * @ctrl:	The control.
  * @s:		The new string.
  *
- * This set the control's new string safely by going through the control
- * framework. This function will lock the control's handler, so it cannot be
- * used from within the &v4l2_ctrl_ops functions.
+ * This sets the control's new string safely by going through the control
+ * framework. This function assumes the control's handler is already locked,
+ * allowing it to be used from within the &v4l2_ctrl_ops functions.
  *
  * This function is for string type controls only.
  */
@@ -857,7 +857,7 @@ int __v4l2_ctrl_s_ctrl_string(struct v4l2_ctrl *ctrl, const char *s);
  * @ctrl:	The control.
  * @s:		The new string.
  *
- * This set the control's new string safely by going through the control
+ * This sets the control's new string safely by going through the control
  * framework. This function will lock the control's handler, so it cannot be
  * used from within the &v4l2_ctrl_ops functions.
  *
@@ -903,16 +903,6 @@ int v4l2_try_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct v4l2_ext_controls *
 int v4l2_s_ext_ctrls(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl,
 						struct v4l2_ext_controls *c);
 
-/* Helpers for subdevices. If the associated ctrl_handler == NULL then they
-   will all return -EINVAL. */
-int v4l2_subdev_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc);
-int v4l2_subdev_querymenu(struct v4l2_subdev *sd, struct v4l2_querymenu *qm);
-int v4l2_subdev_g_ext_ctrls(struct v4l2_subdev *sd, struct v4l2_ext_controls *cs);
-int v4l2_subdev_try_ext_ctrls(struct v4l2_subdev *sd, struct v4l2_ext_controls *cs);
-int v4l2_subdev_s_ext_ctrls(struct v4l2_subdev *sd, struct v4l2_ext_controls *cs);
-int v4l2_subdev_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
-int v4l2_subdev_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
-
 /* Can be used as a subscribe_event function that just subscribes control
    events. */
 int v4l2_ctrl_subdev_subscribe_event(struct v4l2_subdev *sd, struct v4l2_fh *fh,
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 32fc7a4beb5e..c672efc4f87b 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -143,20 +143,6 @@ struct v4l2_subdev_io_pin_config {
  * @s_gpio: set GPIO pins. Very simple right now, might need to be extended with
  *	a direction argument if needed.
  *
- * @queryctrl: callback for VIDIOC_QUERYCTL ioctl handler code.
- *
- * @g_ctrl: callback for VIDIOC_G_CTRL ioctl handler code.
- *
- * @s_ctrl: callback for VIDIOC_S_CTRL ioctl handler code.
- *
- * @g_ext_ctrls: callback for VIDIOC_G_EXT_CTRLS ioctl handler code.
- *
- * @s_ext_ctrls: callback for VIDIOC_S_EXT_CTRLS ioctl handler code.
- *
- * @try_ext_ctrls: callback for VIDIOC_TRY_EXT_CTRLS ioctl handler code.
- *
- * @querymenu: callback for VIDIOC_QUERYMENU ioctl handler code.
- *
  * @ioctl: called at the end of ioctl() syscall handler at the V4L2 core.
  *	   used to provide support for private ioctls used on the driver.
  *
@@ -190,13 +176,6 @@ struct v4l2_subdev_core_ops {
 	int (*load_fw)(struct v4l2_subdev *sd);
 	int (*reset)(struct v4l2_subdev *sd, u32 val);
 	int (*s_gpio)(struct v4l2_subdev *sd, u32 val);
-	int (*queryctrl)(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc);
-	int (*g_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
-	int (*s_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
-	int (*g_ext_ctrls)(struct v4l2_subdev *sd, struct v4l2_ext_controls *ctrls);
-	int (*s_ext_ctrls)(struct v4l2_subdev *sd, struct v4l2_ext_controls *ctrls);
-	int (*try_ext_ctrls)(struct v4l2_subdev *sd, struct v4l2_ext_controls *ctrls);
-	int (*querymenu)(struct v4l2_subdev *sd, struct v4l2_querymenu *qm);
 	long (*ioctl)(struct v4l2_subdev *sd, unsigned int cmd, void *arg);
 #ifdef CONFIG_COMPAT
 	long (*compat_ioctl32)(struct v4l2_subdev *sd, unsigned int cmd,
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 88e3ab496e8f..bea81c9e3758 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -27,7 +27,6 @@ enum vb2_memory {
 	VB2_MEMORY_DMABUF	= 4,
 };
 
-struct vb2_alloc_ctx;
 struct vb2_fileio_data;
 struct vb2_threadio_data;
 
@@ -57,7 +56,7 @@ struct vb2_threadio_data;
  * @put_userptr: inform the allocator that a USERPTR buffer will no longer
  *		 be used.
  * @attach_dmabuf: attach a shared struct dma_buf for a hardware operation;
- *		   used for DMABUF memory types; alloc_ctx is the alloc context
+ *		   used for DMABUF memory types; dev is the alloc device
  *		   dbuf is the shared dma_buf; returns NULL on failure;
  *		   allocator private per-buffer structure on success;
  *		   this needs to be used for further accesses to the buffer.
@@ -93,13 +92,13 @@ struct vb2_threadio_data;
  *				  unmap_dmabuf.
  */
 struct vb2_mem_ops {
-	void		*(*alloc)(void *alloc_ctx, unsigned long size,
-				  enum dma_data_direction dma_dir,
+	void		*(*alloc)(struct device *dev, const struct dma_attrs *attrs,
+				  unsigned long size, enum dma_data_direction dma_dir,
 				  gfp_t gfp_flags);
 	void		(*put)(void *buf_priv);
 	struct dma_buf *(*get_dmabuf)(void *buf_priv, unsigned long flags);
 
-	void		*(*get_userptr)(void *alloc_ctx, unsigned long vaddr,
+	void		*(*get_userptr)(struct device *dev, unsigned long vaddr,
 					unsigned long size,
 					enum dma_data_direction dma_dir);
 	void		(*put_userptr)(void *buf_priv);
@@ -107,7 +106,7 @@ struct vb2_mem_ops {
 	void		(*prepare)(void *buf_priv);
 	void		(*finish)(void *buf_priv);
 
-	void		*(*attach_dmabuf)(void *alloc_ctx, struct dma_buf *dbuf,
+	void		*(*attach_dmabuf)(struct device *dev, struct dma_buf *dbuf,
 					  unsigned long size,
 					  enum dma_data_direction dma_dir);
 	void		(*detach_dmabuf)(void *buf_priv);
@@ -282,7 +281,7 @@ struct vb2_buffer {
  *			in *num_buffers, the required number of planes per
  *			buffer in *num_planes, the size of each plane should be
  *			set in the sizes[] array and optional per-plane
- *			allocator specific context in the alloc_ctxs[] array.
+ *			allocator specific device in the alloc_devs[] array.
  *			When called from VIDIOC_REQBUFS, *num_planes == 0, the
  *			driver has to use the currently configured format to
  *			determine the plane sizes and *num_buffers is the total
@@ -356,7 +355,7 @@ struct vb2_buffer {
 struct vb2_ops {
 	int (*queue_setup)(struct vb2_queue *q,
 			   unsigned int *num_buffers, unsigned int *num_planes,
-			   unsigned int sizes[], void *alloc_ctxs[]);
+			   unsigned int sizes[], struct device *alloc_devs[]);
 
 	void (*wait_prepare)(struct vb2_queue *q);
 	void (*wait_finish)(struct vb2_queue *q);
@@ -401,6 +400,9 @@ struct vb2_buf_ops {
  *		caller. For example, for V4L2, it should match
  *		the V4L2_BUF_TYPE_* in include/uapi/linux/videodev2.h
  * @io_modes:	supported io methods (see vb2_io_modes enum)
+ * @dev:	device to use for the default allocation context if the driver
+ *		doesn't fill in the @alloc_devs array.
+ * @dma_attrs:	DMA attributes to use for the DMA. May be NULL.
  * @fileio_read_once:		report EOF after reading the first buffer
  * @fileio_write_immediately:	queue buffer after each write() call
  * @allow_zero_bytesused:	allow bytesused == 0 to be passed to the driver
@@ -447,7 +449,7 @@ struct vb2_buf_ops {
  * @done_list:	list of buffers ready to be dequeued to userspace
  * @done_lock:	lock to protect done_list list
  * @done_wq:	waitqueue for processes waiting for buffers ready to be dequeued
- * @alloc_ctx:	memory type/allocator-specific contexts for each plane
+ * @alloc_devs:	memory type/allocator-specific per-plane device
  * @streaming:	current streaming state
  * @start_streaming_called: start_streaming() was called successfully and we
  *		started streaming.
@@ -467,6 +469,8 @@ struct vb2_buf_ops {
 struct vb2_queue {
 	unsigned int			type;
 	unsigned int			io_modes;
+	struct device			*dev;
+	const struct dma_attrs		*dma_attrs;
 	unsigned			fileio_read_once:1;
 	unsigned			fileio_write_immediately:1;
 	unsigned			allow_zero_bytesused:1;
@@ -499,7 +503,7 @@ struct vb2_queue {
 	spinlock_t			done_lock;
 	wait_queue_head_t		done_wq;
 
-	void				*alloc_ctx[VB2_MAX_PLANES];
+	struct device			*alloc_devs[VB2_MAX_PLANES];
 
 	unsigned int			streaming:1;
 	unsigned int			start_streaming_called:1;
diff --git a/include/media/videobuf2-dma-contig.h b/include/media/videobuf2-dma-contig.h
index 2087c9a68be3..df2aabee3401 100644
--- a/include/media/videobuf2-dma-contig.h
+++ b/include/media/videobuf2-dma-contig.h
@@ -26,15 +26,8 @@ vb2_dma_contig_plane_dma_addr(struct vb2_buffer *vb, unsigned int plane_no)
 	return *addr;
 }
 
-void *vb2_dma_contig_init_ctx_attrs(struct device *dev,
-				    struct dma_attrs *attrs);
-
-static inline void *vb2_dma_contig_init_ctx(struct device *dev)
-{
-	return vb2_dma_contig_init_ctx_attrs(dev, NULL);
-}
-
-void vb2_dma_contig_cleanup_ctx(void *alloc_ctx);
+int vb2_dma_contig_set_max_seg_size(struct device *dev, unsigned int size);
+void vb2_dma_contig_clear_max_seg_size(struct device *dev);
 
 extern const struct vb2_mem_ops vb2_dma_contig_memops;
 
diff --git a/include/media/videobuf2-dma-sg.h b/include/media/videobuf2-dma-sg.h
index 8d1083f83c3d..52afa0e2bb17 100644
--- a/include/media/videobuf2-dma-sg.h
+++ b/include/media/videobuf2-dma-sg.h
@@ -21,9 +21,6 @@ static inline struct sg_table *vb2_dma_sg_plane_desc(
 	return (struct sg_table *)vb2_plane_cookie(vb, plane_no);
 }
 
-void *vb2_dma_sg_init_ctx(struct device *dev);
-void vb2_dma_sg_cleanup_ctx(void *alloc_ctx);
-
 extern const struct vb2_mem_ops vb2_dma_sg_memops;
 
 #endif
diff --git a/include/media/vsp1.h b/include/media/vsp1.h
index 3e654a0455bd..9322d9775fb7 100644
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -14,31 +14,28 @@
 #define __MEDIA_VSP1_H__
 
 #include <linux/types.h>
+#include <linux/videodev2.h>
 
 struct device;
-struct v4l2_rect;
 
 int vsp1_du_init(struct device *dev);
 
 int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 		      unsigned int height);
 
+struct vsp1_du_atomic_config {
+	u32 pixelformat;
+	unsigned int pitch;
+	dma_addr_t mem[2];
+	struct v4l2_rect src;
+	struct v4l2_rect dst;
+	unsigned int alpha;
+	unsigned int zpos;
+};
+
 void vsp1_du_atomic_begin(struct device *dev);
-int vsp1_du_atomic_update_ext(struct device *dev, unsigned int rpf,
-			      u32 pixelformat, unsigned int pitch,
-			      dma_addr_t mem[2], const struct v4l2_rect *src,
-			      const struct v4l2_rect *dst, unsigned int alpha,
-			      unsigned int zpos);
+int vsp1_du_atomic_update(struct device *dev, unsigned int rpf,
+			  const struct vsp1_du_atomic_config *cfg);
 void vsp1_du_atomic_flush(struct device *dev);
 
-static inline int vsp1_du_atomic_update(struct device *dev,
-					unsigned int rpf_index, u32 pixelformat,
-					unsigned int pitch, dma_addr_t mem[2],
-					const struct v4l2_rect *src,
-					const struct v4l2_rect *dst)
-{
-	return vsp1_du_atomic_update_ext(dev, rpf_index, pixelformat, pitch,
-					 mem, src, dst, 255, 0);
-}
-
 #endif /* __MEDIA_VSP1_H__ */
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index da84cf920b78..5ab4c9901ccc 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -141,6 +141,16 @@ struct lowpan_dev {
 	u8 priv[0] __aligned(sizeof(void *));
 };
 
+struct lowpan_802154_neigh {
+	__le16 short_addr;
+};
+
+static inline
+struct lowpan_802154_neigh *lowpan_802154_neigh(void *neigh_priv)
+{
+	return neigh_priv;
+}
+
 static inline
 struct lowpan_dev *lowpan_dev(const struct net_device *dev)
 {
@@ -244,6 +254,12 @@ static inline bool lowpan_fetch_skb(struct sk_buff *skb, void *data,
 	return false;
 }
 
+static inline bool lowpan_802154_is_valid_src_short_addr(__le16 addr)
+{
+	/* First bit of addr is multicast, reserved or 802.15.4 specific */
+	return !(addr & cpu_to_le16(0x8000));
+}
+
 static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data,
 				       const size_t len)
 {
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 9a9a8edc138f..41e6a24a44b9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -2,42 +2,14 @@
 #define __NET_ACT_API_H
 
 /*
- * Public police action API for classifiers/qdiscs
- */
+ * Public action API for classifiers/qdiscs
+*/
 
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
-struct tcf_common {
-	struct hlist_node		tcfc_head;
-	u32				tcfc_index;
-	int				tcfc_refcnt;
-	int				tcfc_bindcnt;
-	u32				tcfc_capab;
-	int				tcfc_action;
-	struct tcf_t			tcfc_tm;
-	struct gnet_stats_basic_packed	tcfc_bstats;
-	struct gnet_stats_queue		tcfc_qstats;
-	struct gnet_stats_rate_est64	tcfc_rate_est;
-	spinlock_t			tcfc_lock;
-	struct rcu_head			tcfc_rcu;
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
-	struct gnet_stats_queue __percpu *cpu_qstats;
-};
-#define tcf_head	common.tcfc_head
-#define tcf_index	common.tcfc_index
-#define tcf_refcnt	common.tcfc_refcnt
-#define tcf_bindcnt	common.tcfc_bindcnt
-#define tcf_capab	common.tcfc_capab
-#define tcf_action	common.tcfc_action
-#define tcf_tm		common.tcfc_tm
-#define tcf_bstats	common.tcfc_bstats
-#define tcf_qstats	common.tcfc_qstats
-#define tcf_rate_est	common.tcfc_rate_est
-#define tcf_lock	common.tcfc_lock
-#define tcf_rcu		common.tcfc_rcu
 
 struct tcf_hashinfo {
 	struct hlist_head	*htab;
@@ -46,6 +18,44 @@ struct tcf_hashinfo {
 	u32			index;
 };
 
+struct tc_action_ops;
+
+struct tc_action {
+	const struct tc_action_ops	*ops;
+	__u32				type; /* for backward compat(TCA_OLD_COMPAT) */
+	__u32				order;
+	struct list_head		list;
+	struct tcf_hashinfo		*hinfo;
+
+	struct hlist_node		tcfa_head;
+	u32				tcfa_index;
+	int				tcfa_refcnt;
+	int				tcfa_bindcnt;
+	u32				tcfa_capab;
+	int				tcfa_action;
+	struct tcf_t			tcfa_tm;
+	struct gnet_stats_basic_packed	tcfa_bstats;
+	struct gnet_stats_queue		tcfa_qstats;
+	struct gnet_stats_rate_est64	tcfa_rate_est;
+	spinlock_t			tcfa_lock;
+	struct rcu_head			tcfa_rcu;
+	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+	struct gnet_stats_queue __percpu *cpu_qstats;
+};
+#define tcf_act		common.tcfa_act
+#define tcf_head	common.tcfa_head
+#define tcf_index	common.tcfa_index
+#define tcf_refcnt	common.tcfa_refcnt
+#define tcf_bindcnt	common.tcfa_bindcnt
+#define tcf_capab	common.tcfa_capab
+#define tcf_action	common.tcfa_action
+#define tcf_tm		common.tcfa_tm
+#define tcf_bstats	common.tcfa_bstats
+#define tcf_qstats	common.tcfa_qstats
+#define tcf_rate_est	common.tcfa_rate_est
+#define tcf_lock	common.tcfa_lock
+#define tcf_rcu		common.tcfa_rcu
+
 static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
 {
 	return index & hmask;
@@ -76,16 +86,17 @@ static inline void tcf_lastuse_update(struct tcf_t *tm)
 
 	if (tm->lastuse != now)
 		tm->lastuse = now;
+	if (unlikely(!tm->firstuse))
+		tm->firstuse = now;
 }
 
-struct tc_action {
-	void			*priv;
-	const struct tc_action_ops	*ops;
-	__u32			type; /* for backward compat(TCA_OLD_COMPAT) */
-	__u32			order;
-	struct list_head	list;
-	struct tcf_hashinfo	*hinfo;
-};
+static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
+{
+	dtm->install = jiffies_to_clock_t(jiffies - stm->install);
+	dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse);
+	dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse);
+	dtm->expires = jiffies_to_clock_t(stm->expires);
+}
 
 #ifdef CONFIG_NET_CLS_ACT
 
@@ -96,16 +107,18 @@ struct tc_action_ops {
 	struct list_head head;
 	char    kind[IFNAMSIZ];
 	__u32   type; /* TBD to match kind */
+	size_t	size;
 	struct module		*owner;
-	int     (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *);
+	int     (*act)(struct sk_buff *, const struct tc_action *,
+		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *, int bind);
-	int     (*lookup)(struct net *, struct tc_action *, u32);
+	int     (*lookup)(struct net *, struct tc_action **, u32);
 	int     (*init)(struct net *net, struct nlattr *nla,
-			struct nlattr *est, struct tc_action *act, int ovr,
+			struct nlattr *est, struct tc_action **act, int ovr,
 			int bind);
 	int     (*walk)(struct net *, struct sk_buff *,
-			struct netlink_callback *, int, struct tc_action *);
+			struct netlink_callback *, int, const struct tc_action_ops *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 };
 
@@ -115,8 +128,8 @@ struct tc_action_net {
 };
 
 static inline
-int tc_action_net_init(struct tc_action_net *tn, const struct tc_action_ops *ops,
-		       unsigned int mask)
+int tc_action_net_init(struct tc_action_net *tn,
+		       const struct tc_action_ops *ops, unsigned int mask)
 {
 	int err = 0;
 
@@ -141,13 +154,14 @@ static inline void tc_action_net_exit(struct tc_action_net *tn)
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       struct tc_action *a);
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index);
+		       const struct tc_action_ops *ops);
+int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
 u32 tcf_hash_new_index(struct tc_action_net *tn);
-int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
-		   int bind);
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+		    int bind);
 int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
-		    struct tc_action *a, int size, int bind, bool cpustats);
+		    struct tc_action **a, const struct tc_action_ops *ops, int bind,
+		    bool cpustats);
 void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
 void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a);
 
@@ -159,7 +173,8 @@ static inline int tcf_hash_release(struct tc_action *a, bool bind)
 }
 
 int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
-int tcf_unregister_action(struct tc_action_ops *a, struct pernet_operations *ops);
+int tcf_unregister_action(struct tc_action_ops *a,
+			  struct pernet_operations *ops);
 int tcf_action_destroy(struct list_head *actions, int bind);
 int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
 		    struct tcf_result *res);
@@ -180,6 +195,9 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 #define tc_for_each_action(_a, _exts) \
 	list_for_each_entry(a, &(_exts)->actions, list)
 
+#define tc_single_action(_exts) \
+	(list_is_singular(&(_exts)->actions))
+
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 					   u64 packets, u64 lastuse)
 {
@@ -193,6 +211,7 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 
 #define tc_no_actions(_exts) true
 #define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
+#define tc_single_action(_exts) false
 #define tcf_action_stats_update(a, bytes, packets, lastuse)
 
 #endif /* CONFIG_NET_CLS_ACT */
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 730d856683e5..9826d3a9464c 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -94,6 +94,16 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
 void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
 
+void addrconf_add_linklocal(struct inet6_dev *idev,
+			    const struct in6_addr *addr, u32 flags);
+
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+				 const struct prefix_info *pinfo,
+				 struct inet6_dev *in6_dev,
+				 const struct in6_addr *addr, int addr_type,
+				 u32 addr_flags, bool sllao, bool tokenized,
+				 __u32 valid_lft, u32 prefered_lft);
+
 static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
 {
 	if (dev->addr_len != ETH_ALEN)
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index eefcf3e96421..003b25283407 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -65,7 +65,7 @@
 #define HCI_I2C		8
 
 /* HCI controller types */
-#define HCI_BREDR	0x00
+#define HCI_PRIMARY	0x00
 #define HCI_AMP		0x01
 
 /* First BR/EDR Controller shall have ID = 0 */
@@ -445,6 +445,7 @@ enum {
 /* ---- HCI Error Codes ---- */
 #define HCI_ERROR_UNKNOWN_CONN_ID	0x02
 #define HCI_ERROR_AUTH_FAILURE		0x05
+#define HCI_ERROR_PIN_OR_KEY_MISSING	0x06
 #define HCI_ERROR_MEMORY_EXCEEDED	0x07
 #define HCI_ERROR_CONNECTION_TIMEOUT	0x08
 #define HCI_ERROR_REJ_LIMITED_RESOURCES	0x0d
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index dc71473462ac..ee7fc47680a1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -372,6 +372,8 @@ struct hci_dev {
 
 	atomic_t		promisc;
 
+	const char		*hw_info;
+	const char		*fw_info;
 	struct dentry		*debugfs;
 
 	struct device		dev;
@@ -654,6 +656,7 @@ enum {
 	HCI_CONN_PARAM_REMOVAL_PEND,
 	HCI_CONN_NEW_LINK_KEY,
 	HCI_CONN_SCANNING,
+	HCI_CONN_AUTH_FAILURE,
 };
 
 static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
@@ -1021,6 +1024,10 @@ void hci_unregister_dev(struct hci_dev *hdev);
 int hci_suspend_dev(struct hci_dev *hdev);
 int hci_resume_dev(struct hci_dev *hdev);
 int hci_reset_dev(struct hci_dev *hdev);
+int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
+int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
+void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...);
+void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...);
 int hci_dev_open(__u16 dev);
 int hci_dev_close(__u16 dev);
 int hci_dev_do_close(struct hci_dev *hdev);
@@ -1097,9 +1104,6 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance);
 
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
-int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
-int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
-
 void hci_init_sysfs(struct hci_dev *hdev);
 void hci_conn_init_sysfs(struct hci_conn *conn);
 void hci_conn_add_sysfs(struct hci_conn *conn);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index ea73e0826aa7..7647964b1efa 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -645,6 +645,7 @@ struct mgmt_ev_device_connected {
 #define MGMT_DEV_DISCONN_TIMEOUT	0x01
 #define MGMT_DEV_DISCONN_LOCAL_HOST	0x02
 #define MGMT_DEV_DISCONN_REMOTE		0x03
+#define MGMT_DEV_DISCONN_AUTH_FAILURE	0x04
 
 #define MGMT_EV_DEVICE_DISCONNECTED	0x000C
 struct mgmt_ev_device_disconnected {
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 791800ddd6d9..6360c259da6d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -34,6 +34,9 @@
 
 #define BOND_DEFAULT_MIIMON	100
 
+#ifndef __long_aligned
+#define __long_aligned __attribute__((aligned((sizeof(long)))))
+#endif
 /*
  * Less bad way to call ioctl from within the kernel; this needs to be
  * done some other way to get the call out of interrupt context.
@@ -138,7 +141,9 @@ struct bond_params {
 	struct reciprocal_value reciprocal_packets_per_slave;
 	u16 ad_actor_sys_prio;
 	u16 ad_user_port_key;
-	u8 ad_actor_system[ETH_ALEN];
+
+	/* 2 bytes of padding : see ether_addr_equal_64bits() */
+	u8 ad_actor_system[ETH_ALEN + 2];
 };
 
 struct bond_parm_tbl {
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 63921672bed0..9c23f4d33e06 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -330,6 +330,9 @@ struct ieee80211_supported_band {
  * in a separate chapter.
  */
 
+#define VHT_MUMIMO_GROUPS_DATA_LEN (WLAN_MEMBERSHIP_LEN +\
+				    WLAN_USER_POSITION_LEN)
+
 /**
  * struct vif_params - describes virtual interface parameters
  * @use_4addr: use 4-address frames
@@ -339,10 +342,13 @@ struct ieee80211_supported_band {
  *	This feature is only fully supported by drivers that enable the
  *	%NL80211_FEATURE_MAC_ON_CREATE flag.  Others may support creating
  **	only p2p devices with specified MAC.
+ * @vht_mumimo_groups: MU-MIMO groupID. used for monitoring only
+ *	 packets belonging to that MU-MIMO groupID.
  */
 struct vif_params {
-       int use_4addr;
-       u8 macaddr[ETH_ALEN];
+	int use_4addr;
+	u8 macaddr[ETH_ALEN];
+	u8 vht_mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN];
 };
 
 /**
@@ -774,6 +780,7 @@ enum station_parameters_apply_mask {
  *	(bitmask of BIT(NL80211_STA_FLAG_...))
  * @listen_interval: listen interval or -1 for no change
  * @aid: AID or zero for no change
+ * @peer_aid: mesh peer AID or zero for no change
  * @plink_action: plink action to take
  * @plink_state: set the peer link state for a station
  * @ht_capa: HT capabilities of station
@@ -805,6 +812,7 @@ struct station_parameters {
 	u32 sta_modify_mask;
 	int listen_interval;
 	u16 aid;
+	u16 peer_aid;
 	u8 supported_rates_len;
 	u8 plink_action;
 	u8 plink_state;
@@ -1417,6 +1425,21 @@ struct cfg80211_ssid {
 	u8 ssid_len;
 };
 
+/**
+ * struct cfg80211_scan_info - information about completed scan
+ * @scan_start_tsf: scan start time in terms of the TSF of the BSS that the
+ *	wireless device that requested the scan is connected to. If this
+ *	information is not available, this field is left zero.
+ * @tsf_bssid: the BSSID according to which %scan_start_tsf is set.
+ * @aborted: set to true if the scan was aborted for any reason,
+ *	userspace will be notified of that
+ */
+struct cfg80211_scan_info {
+	u64 scan_start_tsf;
+	u8 tsf_bssid[ETH_ALEN] __aligned(2);
+	bool aborted;
+};
+
 /**
  * struct cfg80211_scan_request - scan request description
  *
@@ -1427,12 +1450,17 @@ struct cfg80211_ssid {
  * @scan_width: channel width for scanning
  * @ie: optional information element(s) to add into Probe Request or %NULL
  * @ie_len: length of ie in octets
+ * @duration: how long to listen on each channel, in TUs. If
+ *	%duration_mandatory is not set, this is the maximum dwell time and
+ *	the actual dwell time may be shorter.
+ * @duration_mandatory: if set, the scan duration must be as specified by the
+ *	%duration field.
  * @flags: bit field of flags controlling operation
  * @rates: bitmap of rates to advertise for each band
  * @wiphy: the wiphy this was for
  * @scan_start: time (in jiffies) when the scan started
  * @wdev: the wireless device to scan for
- * @aborted: (internal) scan request was notified as aborted
+ * @info: (internal) information about completed scan
  * @notified: (internal) scan request was notified as done or aborted
  * @no_cck: used to send probe requests at non CCK rate in 2GHz band
  * @mac_addr: MAC address used with randomisation
@@ -1448,6 +1476,8 @@ struct cfg80211_scan_request {
 	enum nl80211_bss_scan_width scan_width;
 	const u8 *ie;
 	size_t ie_len;
+	u16 duration;
+	bool duration_mandatory;
 	u32 flags;
 
 	u32 rates[NUM_NL80211_BANDS];
@@ -1461,7 +1491,8 @@ struct cfg80211_scan_request {
 	/* internal */
 	struct wiphy *wiphy;
 	unsigned long scan_start;
-	bool aborted, notified;
+	struct cfg80211_scan_info info;
+	bool notified;
 	bool no_cck;
 
 	/* keep last */
@@ -1594,12 +1625,19 @@ enum cfg80211_signal_type {
  *	buffered on the device) and be accurate to about 10ms.
  *	If the frame isn't buffered, just passing the return value of
  *	ktime_get_boot_ns() is likely appropriate.
+ * @parent_tsf: the time at the start of reception of the first octet of the
+ *	timestamp field of the frame. The time is the TSF of the BSS specified
+ *	by %parent_bssid.
+ * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to
+ *	the BSS that requested the scan in which the beacon/probe was received.
  */
 struct cfg80211_inform_bss {
 	struct ieee80211_channel *chan;
 	enum nl80211_bss_scan_width scan_width;
 	s32 signal;
 	u64 boottime_ns;
+	u64 parent_tsf;
+	u8 parent_bssid[ETH_ALEN] __aligned(2);
 };
 
 /**
@@ -2367,19 +2405,23 @@ struct cfg80211_qos_map {
  *	(invoked with the wireless_dev mutex held)
  *
  * @connect: Connect to the ESS with the specified parameters. When connected,
- *	call cfg80211_connect_result() with status code %WLAN_STATUS_SUCCESS.
- *	If the connection fails for some reason, call cfg80211_connect_result()
- *	with the status from the AP. The driver is allowed to roam to other
- *	BSSes within the ESS when the other BSS matches the connect parameters.
- *	When such roaming is initiated by the driver, the driver is expected to
- *	verify that the target matches the configured security parameters and
- *	to use Reassociation Request frame instead of Association Request frame.
- *	The connect function can also be used to request the driver to perform
- *	a specific roam when connected to an ESS. In that case, the prev_bssid
+ *	call cfg80211_connect_result()/cfg80211_connect_bss() with status code
+ *	%WLAN_STATUS_SUCCESS. If the connection fails for some reason, call
+ *	cfg80211_connect_result()/cfg80211_connect_bss() with the status code
+ *	from the AP or cfg80211_connect_timeout() if no frame with status code
+ *	was received.
+ *	The driver is allowed to roam to other BSSes within the ESS when the
+ *	other BSS matches the connect parameters. When such roaming is initiated
+ *	by the driver, the driver is expected to verify that the target matches
+ *	the configured security parameters and to use Reassociation Request
+ *	frame instead of Association Request frame.
+ *	The connect function can also be used to request the driver to perform a
+ *	specific roam when connected to an ESS. In that case, the prev_bssid
  *	parameter is set to the BSSID of the currently associated BSS as an
- *	indication of requesting reassociation. In both the driver-initiated and
- *	new connect() call initiated roaming cases, the result of roaming is
- *	indicated with a call to cfg80211_roamed() or cfg80211_roamed_bss().
+ *	indication of requesting reassociation.
+ *	In both the driver-initiated and new connect() call initiated roaming
+ *	cases, the result of roaming is indicated with a call to
+ *	cfg80211_roamed() or cfg80211_roamed_bss().
  *	(invoked with the wireless_dev mutex held)
  * @disconnect: Disconnect from the BSS/ESS.
  *	(invoked with the wireless_dev mutex held)
@@ -3079,6 +3121,24 @@ struct wiphy_vendor_command {
 		      unsigned long *storage);
 };
 
+/**
+ * struct wiphy_iftype_ext_capab - extended capabilities per interface type
+ * @iftype: interface type
+ * @extended_capabilities: extended capabilities supported by the driver,
+ *	additional capabilities might be supported by userspace; these are the
+ *	802.11 extended capabilities ("Extended Capabilities element") and are
+ *	in the same format as in the information element. See IEEE Std
+ *	802.11-2012 8.4.2.29 for the defined fields.
+ * @extended_capabilities_mask: mask of the valid values
+ * @extended_capabilities_len: length of the extended capabilities
+ */
+struct wiphy_iftype_ext_capab {
+	enum nl80211_iftype iftype;
+	const u8 *extended_capabilities;
+	const u8 *extended_capabilities_mask;
+	u8 extended_capabilities_len;
+};
+
 /**
  * struct wiphy - wireless hardware description
  * @reg_notifier: the driver's regulatory notification callback,
@@ -3199,9 +3259,14 @@ struct wiphy_vendor_command {
  *	additional capabilities might be supported by userspace; these are
  *	the 802.11 extended capabilities ("Extended Capabilities element")
  *	and are in the same format as in the information element. See
- *	802.11-2012 8.4.2.29 for the defined fields.
+ *	802.11-2012 8.4.2.29 for the defined fields. These are the default
+ *	extended capabilities to be used if the capabilities are not specified
+ *	for a specific interface type in iftype_ext_capab.
  * @extended_capabilities_mask: mask of the valid values
  * @extended_capabilities_len: length of the extended capabilities
+ * @iftype_ext_capab: array of extended capabilities per interface type
+ * @num_iftype_ext_capab: number of interface types for which extended
+ *	capabilities are specified separately.
  * @coalesce: packet coalescing support information
  *
  * @vendor_commands: array of vendor commands supported by the hardware
@@ -3301,6 +3366,9 @@ struct wiphy {
 	const u8 *extended_capabilities, *extended_capabilities_mask;
 	u8 extended_capabilities_len;
 
+	const struct wiphy_iftype_ext_capab *iftype_ext_capab;
+	unsigned int num_iftype_ext_capab;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
@@ -4031,10 +4099,10 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
  * cfg80211_scan_done - notify that scan finished
  *
  * @request: the corresponding scan request
- * @aborted: set to true if the scan was aborted for any reason,
- *	userspace will be notified of that
+ * @info: information about the completed scan
  */
-void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted);
+void cfg80211_scan_done(struct cfg80211_scan_request *request,
+			struct cfg80211_scan_info *info);
 
 /**
  * cfg80211_sched_scan_results - notify that new scan results are available
@@ -4680,7 +4748,7 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
 void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid,
 			  struct cfg80211_bss *bss, const u8 *req_ie,
 			  size_t req_ie_len, const u8 *resp_ie,
-			  size_t resp_ie_len, u16 status, gfp_t gfp);
+			  size_t resp_ie_len, int status, gfp_t gfp);
 
 /**
  * cfg80211_connect_result - notify cfg80211 of connection result
@@ -4709,6 +4777,29 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			     resp_ie_len, status, gfp);
 }
 
+/**
+ * cfg80211_connect_timeout - notify cfg80211 of connection timeout
+ *
+ * @dev: network device
+ * @bssid: the BSSID of the AP
+ * @req_ie: association request IEs (maybe be %NULL)
+ * @req_ie_len: association request IEs length
+ * @gfp: allocation flags
+ *
+ * It should be called by the underlying driver whenever connect() has failed
+ * in a sequence where no explicit authentication/association rejection was
+ * received from the AP. This could happen, e.g., due to not being able to send
+ * out the Authentication or Association Request frame or timing out while
+ * waiting for the response.
+ */
+static inline void
+cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid,
+			 const u8 *req_ie, size_t req_ie_len, gfp_t gfp)
+{
+	cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, NULL, 0, -1,
+			     gfp);
+}
+
 /**
  * cfg80211_roamed - notify cfg80211 of roaming
  *
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 171cd76558fb..795ca4008f72 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -219,9 +219,22 @@ struct wpan_phy {
 
 	struct device dev;
 
+	/* the network namespace this phy lives in currently */
+	possible_net_t _net;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
+static inline struct net *wpan_phy_net(struct wpan_phy *wpan_phy)
+{
+	return read_pnet(&wpan_phy->_net);
+}
+
+static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net)
+{
+	write_pnet(&wpan_phy->_net, net);
+}
+
 struct ieee802154_addr {
 	u8 mode;
 	__le16 pan_id;
diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h
index 8144d9cd2908..098630f83a55 100644
--- a/include/net/codel_qdisc.h
+++ b/include/net/codel_qdisc.h
@@ -52,6 +52,7 @@
 /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */
 struct codel_skb_cb {
 	codel_time_t enqueue_time;
+	unsigned int mem_usage;
 };
 
 static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1d45b61cb320..c99ffe8cef3c 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -90,6 +90,9 @@ struct devlink_ops {
 				       u16 tc_index,
 				       enum devlink_sb_pool_type pool_type,
 				       u32 *p_cur, u32 *p_max);
+
+	int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
+	int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 17c3d37b6779..2217a3f817f8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -26,11 +26,14 @@ enum dsa_tag_protocol {
 	DSA_TAG_PROTO_TRAILER,
 	DSA_TAG_PROTO_EDSA,
 	DSA_TAG_PROTO_BRCM,
+	DSA_TAG_LAST,		/* MUST BE LAST */
 };
 
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
 
+#define DSA_RTABLE_NONE		-1
+
 struct dsa_chip_data {
 	/*
 	 * How to access the switch configuration registers.
@@ -58,12 +61,11 @@ struct dsa_chip_data {
 	struct device_node *port_dn[DSA_MAX_PORTS];
 
 	/*
-	 * An array (with nr_chips elements) of which element [a]
-	 * indicates which port on this switch should be used to
-	 * send packets to that are destined for switch a.  Can be
-	 * NULL if there is only one switch chip.
+	 * An array of which element [a] indicates which port on this
+	 * switch should be used to send packets to that are destined
+	 * for switch a. Can be NULL if there is only one switch chip.
 	 */
-	s8		*rtable;
+	s8		rtable[DSA_MAX_SWITCHES];
 };
 
 struct dsa_platform_data {
@@ -85,6 +87,17 @@ struct dsa_platform_data {
 struct packet_type;
 
 struct dsa_switch_tree {
+	struct list_head	list;
+
+	/* Tree identifier */
+	u32 tree;
+
+	/* Number of switches attached to this tree */
+	struct kref refcount;
+
+	/* Has this tree been applied to the hardware? */
+	bool applied;
+
 	/*
 	 * Configuration data for the platform device that owns
 	 * this dsa switch tree instance.
@@ -100,12 +113,12 @@ struct dsa_switch_tree {
 				       struct net_device *dev,
 				       struct packet_type *pt,
 				       struct net_device *orig_dev);
-	enum dsa_tag_protocol	tag_protocol;
 
 	/*
 	 * Original copy of the master netdev ethtool_ops
 	 */
 	struct ethtool_ops	master_ethtool_ops;
+	const struct ethtool_ops *master_orig_ethtool_ops;
 
 	/*
 	 * The switch and port to which the CPU is attached.
@@ -117,6 +130,18 @@ struct dsa_switch_tree {
 	 * Data for the individual switch chips.
 	 */
 	struct dsa_switch	*ds[DSA_MAX_SWITCHES];
+
+	/*
+	 * Tagging protocol operations for adding and removing an
+	 * encapsulation tag.
+	 */
+	const struct dsa_device_ops *tag_ops;
+};
+
+struct dsa_port {
+	struct net_device	*netdev;
+	struct device_node	*dn;
+	unsigned int		ageing_time;
 };
 
 struct dsa_switch {
@@ -144,6 +169,13 @@ struct dsa_switch {
 	 */
 	struct dsa_switch_driver	*drv;
 
+	/*
+	 * An array of which element [a] indicates which port on this
+	 * switch should be used to send packets to that are destined
+	 * for switch a. Can be NULL if there is only one switch chip.
+	 */
+	s8		rtable[DSA_MAX_SWITCHES];
+
 #ifdef CONFIG_NET_DSA_HWMON
 	/*
 	 * Hardware monitoring information
@@ -152,14 +184,20 @@ struct dsa_switch {
 	struct device		*hwmon_dev;
 #endif
 
+	/*
+	 * The lower device this switch uses to talk to the host
+	 */
+	struct net_device *master_netdev;
+
 	/*
 	 * Slave mii_bus and devices for the individual ports.
 	 */
 	u32			dsa_port_mask;
+	u32			cpu_port_mask;
 	u32			enabled_port_mask;
 	u32			phys_mii_mask;
+	struct dsa_port		ports[DSA_MAX_PORTS];
 	struct mii_bus		*slave_mii_bus;
-	struct net_device	*ports[DSA_MAX_PORTS];
 };
 
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
@@ -174,7 +212,7 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p)
 
 static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
 {
-	return ds->enabled_port_mask & (1 << p) && ds->ports[p];
+	return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
 }
 
 static inline u8 dsa_upstream_port(struct dsa_switch *ds)
@@ -190,7 +228,7 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 	if (dst->cpu_switch == ds->index)
 		return dst->cpu_port;
 	else
-		return ds->cd->rtable[dst->cpu_switch];
+		return ds->rtable[dst->cpu_switch];
 }
 
 struct switchdev_trans;
@@ -292,6 +330,7 @@ struct dsa_switch_driver {
 	/*
 	 * Bridge integration
 	 */
+	int	(*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs);
 	int	(*port_bridge_join)(struct dsa_switch *ds, int port,
 				    struct net_device *bridge);
 	void	(*port_bridge_leave)(struct dsa_switch *ds, int port);
@@ -344,4 +383,7 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 {
 	return dst->rcv != NULL;
 }
+
+void dsa_unregister_switch(struct dsa_switch *ds);
+int dsa_register_switch(struct dsa_switch *ds, struct device_node *np);
 #endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 59160de702b6..456e4a6006ab 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -17,7 +17,8 @@ struct fib_rule {
 	u32			flags;
 	u32			table;
 	u8			action;
-	/* 3 bytes hole, try to use */
+	u8			l3mdev;
+	/* 2 bytes hole, try to use */
 	u32			target;
 	__be64			tun_id;
 	struct fib_rule __rcu	*ctarget;
@@ -36,6 +37,7 @@ struct fib_lookup_arg {
 	void			*lookup_ptr;
 	void			*result;
 	struct fib_rule		*rule;
+	u32			table;
 	int			flags;
 #define FIB_LOOKUP_NOREF		1
 #define FIB_LOOKUP_IGNORE_LINKSTATE	2
@@ -89,7 +91,8 @@ struct fib_rules_ops {
 	[FRA_TABLE]     = { .type = NLA_U32 }, \
 	[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
 	[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
-	[FRA_GOTO]	= { .type = NLA_U32 }
+	[FRA_GOTO]	= { .type = NLA_U32 }, \
+	[FRA_L3MDEV]	= { .type = NLA_U8 }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
@@ -102,6 +105,20 @@ static inline void fib_rule_put(struct fib_rule *rule)
 		kfree_rcu(rule, rcu);
 }
 
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static inline u32 fib_rule_get_table(struct fib_rule *rule,
+				     struct fib_lookup_arg *arg)
+{
+	return rule->l3mdev ? arg->table : rule->table;
+}
+#else
+static inline u32 fib_rule_get_table(struct fib_rule *rule,
+				     struct fib_lookup_arg *arg)
+{
+	return rule->table;
+}
+#endif
+
 static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
 {
 	if (nla[FRA_TABLE])
@@ -117,4 +134,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
 		     struct fib_lookup_arg *);
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
 			 u32 flags);
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh);
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh);
 #endif
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 610cd397890e..231e121cc7d9 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -33,10 +33,12 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 				 spinlock_t *lock, struct gnet_dump *d,
 				 int padattr);
 
-int gnet_stats_copy_basic(struct gnet_dump *d,
+int gnet_stats_copy_basic(const seqcount_t *running,
+			  struct gnet_dump *d,
 			  struct gnet_stats_basic_cpu __percpu *cpu,
 			  struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+void __gnet_stats_copy_basic(const seqcount_t *running,
+			     struct gnet_stats_basic_packed *bstats,
 			     struct gnet_stats_basic_cpu __percpu *cpu,
 			     struct gnet_stats_basic_packed *b);
 int gnet_stats_copy_rate_est(struct gnet_dump *d,
@@ -52,13 +54,15 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct gnet_stats_rate_est64 *rate_est,
-		      spinlock_t *stats_lock, struct nlattr *opt);
+		      spinlock_t *stats_lock,
+		      seqcount_t *running, struct nlattr *opt);
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est64 *rate_est);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct gnet_stats_rate_est64 *rate_est,
-			  spinlock_t *stats_lock, struct nlattr *opt);
+			  spinlock_t *stats_lock,
+			  seqcount_t *running, struct nlattr *opt);
 bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 			  const struct gnet_stats_rate_est64 *rate_est);
 #endif
diff --git a/include/net/geneve.h b/include/net/geneve.h
index cb544a530146..ec0327d4331b 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -1,10 +1,7 @@
 #ifndef __NET_GENEVE_H
 #define __NET_GENEVE_H  1
 
-#ifdef CONFIG_INET
 #include <net/udp_tunnel.h>
-#endif
-
 
 /* Geneve Header:
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -62,12 +59,6 @@ struct genevehdr {
 	struct geneve_opt options[];
 };
 
-static inline void geneve_get_rx_port(struct net_device *netdev)
-{
-	ASSERT_RTNL();
-	call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_GENEVE, netdev);
-}
-
 #ifdef CONFIG_INET
 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 					u8 name_assign_type, u16 dst_port);
diff --git a/include/net/gre.h b/include/net/gre.h
index 5dce30a6abe3..7a54a31d1d4c 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 				       u8 name_assign_type);
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-		     bool *csum_err, __be16 proto);
+		     bool *csum_err, __be16 proto, int nhs);
 
 static inline int gre_calc_hlen(__be16 o_flags)
 {
diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
index cf6c74550baa..d15214d673b2 100644
--- a/include/net/gro_cells.h
+++ b/include/net/gro_cells.h
@@ -14,27 +14,26 @@ struct gro_cells {
 	struct gro_cell __percpu	*cells;
 };
 
-static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
+static inline int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
 {
 	struct gro_cell *cell;
 	struct net_device *dev = skb->dev;
 
-	if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
-		netif_rx(skb);
-		return;
-	}
+	if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO))
+		return netif_rx(skb);
 
 	cell = this_cpu_ptr(gcells->cells);
 
 	if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
-		return;
+		return NET_RX_DROP;
 	}
 
 	__skb_queue_tail(&cell->napi_skbs, skb);
 	if (skb_queue_len(&cell->napi_skbs) == 1)
 		napi_schedule(&cell->napi);
+	return NET_RX_SUCCESS;
 }
 
 /* called under BH context */
diff --git a/include/net/gtp.h b/include/net/gtp.h
index 894a37b87d63..6398891b99ba 100644
--- a/include/net/gtp.h
+++ b/include/net/gtp.h
@@ -1,5 +1,5 @@
 #ifndef _GTP_H_
-#define _GTP_H
+#define _GTP_H_
 
 /* General GTP protocol related definitions. */
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 37165fba3741..9742b92dc933 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -47,6 +47,7 @@ struct inet_skb_parm {
 #define IPSKB_REROUTED		BIT(4)
 #define IPSKB_DOREDIRECT	BIT(5)
 #define IPSKB_FRAG_PMTU		BIT(6)
+#define IPSKB_FRAG_SEGS		BIT(7)
 
 	u16			frag_max_size;
 };
@@ -313,10 +314,9 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 	return min(dst->dev->mtu, IP_MAX_MTU);
 }
 
-static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
+static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
+					  const struct sk_buff *skb)
 {
-	struct sock *sk = skb->sk;
-
 	if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
 		bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 54c779416eec..d97305d0e71f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -18,6 +18,7 @@ struct route_info {
 	__u8			prefix[0];	/* 0,8 or 16 */
 };
 
+#include <net/addrconf.h>
 #include <net/flow.h>
 #include <net/ip6_fib.h>
 #include <net/sock.h>
@@ -76,6 +77,8 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags);
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+			       int ifindex, struct flowi6 *fl6, int flags);
 
 int ip6_route_init(void);
 void ip6_route_cleanup(void);
@@ -86,9 +89,23 @@ int ip6_route_add(struct fib6_config *cfg);
 int ip6_ins_rt(struct rt6_info *);
 int ip6_del_rt(struct rt6_info *);
 
-int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
-			const struct in6_addr *daddr, unsigned int prefs,
-			struct in6_addr *saddr);
+static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
+				      const struct in6_addr *daddr,
+				      unsigned int prefs,
+				      struct in6_addr *saddr)
+{
+	struct inet6_dev *idev =
+			rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
+	int err = 0;
+
+	if (rt && rt->rt6i_prefsrc.plen)
+		*saddr = rt->rt6i_prefsrc.addr;
+	else
+		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
+					 daddr, prefs, saddr);
+
+	return err;
+}
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 			    const struct in6_addr *saddr, int oif, int flags);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index dbf444428437..a5e7035fb93f 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -132,6 +132,7 @@ struct ip_tunnel {
 	int			ip_tnl_net_id;
 	struct gro_cells	gro_cells;
 	bool			collect_md;
+	bool			ignore_df;
 };
 
 #define TUNNEL_CSUM		__cpu_to_be16(0x01)
@@ -156,6 +157,7 @@ struct tnl_ptk_info {
 	__be16 proto;
 	__be32 key;
 	__be32 seq;
+	int hdr_len;
 };
 
 #define PACKET_RCVD	0
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 374388dc01c8..e90095091aa0 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -11,6 +11,8 @@
 #ifndef _NET_L3MDEV_H_
 #define _NET_L3MDEV_H_
 
+#include <net/fib_rules.h>
+
 /**
  * struct l3mdev_ops - l3mdev operations
  *
@@ -36,11 +38,17 @@ struct l3mdev_ops {
 
 	/* IPv6 ops */
 	struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
-						 const struct flowi6 *fl6);
+						 struct flowi6 *fl6);
+	int		   (*l3mdev_get_saddr6)(struct net_device *dev,
+						const struct sock *sk,
+						struct flowi6 *fl6);
 };
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
 
+int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+			  struct fib_lookup_arg *arg);
+
 int l3mdev_master_ifindex_rcu(const struct net_device *dev);
 static inline int l3mdev_master_ifindex(struct net_device *dev)
 {
@@ -71,6 +79,31 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
 	return rc;
 }
 
+static inline
+const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
+{
+	/* netdev_master_upper_dev_get_rcu calls
+	 * list_first_or_null_rcu to walk the upper dev list.
+	 * list_first_or_null_rcu does not handle a const arg. We aren't
+	 * making changes, just want the master device from that list so
+	 * typecast to remove the const
+	 */
+	struct net_device *dev = (struct net_device *)_dev;
+	const struct net_device *master;
+
+	if (!dev)
+		return NULL;
+
+	if (netif_is_l3_master(dev))
+		master = dev;
+	else if (netif_is_l3_slave(dev))
+		master = netdev_master_upper_dev_get_rcu(dev);
+	else
+		master = NULL;
+
+	return master;
+}
+
 /* get index of an interface to use for FIB lookups. For devices
  * enslaved to an L3 master device FIB lookups are based on the
  * master index
@@ -134,7 +167,9 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 
 int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
 
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6);
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
+int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+		      struct flowi6 *fl6);
 
 static inline
 struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -180,6 +215,12 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
 	return 0;
 }
 
+static inline
+const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
+{
+	return NULL;
+}
+
 static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
 {
 	return dev ? dev->ifindex : 0;
@@ -220,11 +261,17 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex,
 }
 
 static inline
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6)
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
 {
 	return NULL;
 }
 
+static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+				    struct flowi6 *fl6)
+{
+	return 0;
+}
+
 static inline
 struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
 {
@@ -236,6 +283,13 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
 {
 	return skb;
 }
+
+static inline
+int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+			  struct fib_lookup_arg *arg)
+{
+	return 1;
+}
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index be30b0549b88..b4faadbb4e01 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
+#include <net/codel.h>
 #include <asm/unaligned.h>
 
 /**
@@ -895,7 +896,18 @@ struct ieee80211_tx_info {
 				unsigned long jiffies;
 			};
 			/* NB: vif can be NULL for injected frames */
-			struct ieee80211_vif *vif;
+			union {
+				/* NB: vif can be NULL for injected frames */
+				struct ieee80211_vif *vif;
+
+				/* When packets are enqueued on txq it's easy
+				 * to re-construct the vif pointer. There's no
+				 * more space in tx_info so it can be used to
+				 * store the necessary enqueue time for packet
+				 * sojourn time computation.
+				 */
+				codel_time_t enqueue_time;
+			};
 			struct ieee80211_key_conf *hw_key;
 			u32 flags;
 			/* 4 bytes free */
@@ -2147,9 +2159,6 @@ enum ieee80211_hw_flags {
  * @n_cipher_schemes: a size of an array of cipher schemes definitions.
  * @cipher_schemes: a pointer to an array of cipher scheme definitions
  *	supported by HW.
- *
- * @txq_ac_max_pending: maximum number of frames per AC pending in all txq
- *	entries for a vif.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -2180,7 +2189,6 @@ struct ieee80211_hw {
 	u8 uapsd_max_sp_len;
 	u8 n_cipher_schemes;
 	const struct ieee80211_cipher_scheme *cipher_schemes;
-	int txq_ac_max_pending;
 };
 
 static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw,
@@ -4689,9 +4697,10 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw);
  * any context, including hardirq context.
  *
  * @hw: the hardware that finished the scan
- * @aborted: set to true if scan was aborted
+ * @info: information about the completed scan
  */
-void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted);
+void ieee80211_scan_completed(struct ieee80211_hw *hw,
+			      struct cfg80211_scan_info *info);
 
 /**
  * ieee80211_sched_scan_results - got results from scheduled scan
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index e465c8551ac3..286824acd008 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -247,14 +247,123 @@ struct ieee802154_ops {
  */
 static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb)
 {
+	__le16 fc;
+
 	/* check if we can fc at skb_mac_header of sk buffer */
-	if (unlikely(!skb_mac_header_was_set(skb) ||
-		     (skb_tail_pointer(skb) - skb_mac_header(skb)) < 2)) {
-		WARN_ON(1);
+	if (WARN_ON(!skb_mac_header_was_set(skb) ||
+		    (skb_tail_pointer(skb) -
+		     skb_mac_header(skb)) < IEEE802154_FC_LEN))
 		return cpu_to_le16(0);
+
+	memcpy(&fc, skb_mac_header(skb), IEEE802154_FC_LEN);
+	return fc;
+}
+
+/**
+ * ieee802154_skb_dst_pan - get the pointer to destination pan field
+ * @fc: mac header frame control field
+ * @skb: skb where the destination pan pointer will be get from
+ */
+static inline unsigned char *ieee802154_skb_dst_pan(__le16 fc,
+						    const struct sk_buff *skb)
+{
+	unsigned char *dst_pan;
+
+	switch (ieee802154_daddr_mode(fc)) {
+	case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+		dst_pan = NULL;
+		break;
+	case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT):
+	case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED):
+		dst_pan = skb_mac_header(skb) +
+			  IEEE802154_FC_LEN +
+			  IEEE802154_SEQ_LEN;
+		break;
+	default:
+		WARN_ONCE(1, "invalid addr mode detected");
+		dst_pan = NULL;
+		break;
+	}
+
+	return dst_pan;
+}
+
+/**
+ * ieee802154_skb_src_pan - get the pointer to source pan field
+ * @fc: mac header frame control field
+ * @skb: skb where the source pan pointer will be get from
+ */
+static inline unsigned char *ieee802154_skb_src_pan(__le16 fc,
+						    const struct sk_buff *skb)
+{
+	unsigned char *src_pan;
+
+	switch (ieee802154_saddr_mode(fc)) {
+	case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+		src_pan = NULL;
+		break;
+	case cpu_to_le16(IEEE802154_FCTL_SADDR_SHORT):
+	case cpu_to_le16(IEEE802154_FCTL_SADDR_EXTENDED):
+		/* if intra-pan and source addr mode is non none,
+		 * then source pan id is equal destination pan id.
+		 */
+		if (ieee802154_is_intra_pan(fc)) {
+			src_pan = ieee802154_skb_dst_pan(fc, skb);
+			break;
+		}
+
+		switch (ieee802154_daddr_mode(fc)) {
+		case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN;
+			break;
+		case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN +
+				  IEEE802154_PAN_ID_LEN +
+				  IEEE802154_SHORT_ADDR_LEN;
+			break;
+		case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN +
+				  IEEE802154_PAN_ID_LEN +
+				  IEEE802154_EXTENDED_ADDR_LEN;
+			break;
+		default:
+			WARN_ONCE(1, "invalid addr mode detected");
+			src_pan = NULL;
+			break;
+		}
+		break;
+	default:
+		WARN_ONCE(1, "invalid addr mode detected");
+		src_pan = NULL;
+		break;
 	}
 
-	return get_unaligned_le16(skb_mac_header(skb));
+	return src_pan;
+}
+
+/**
+ * ieee802154_skb_is_intra_pan_addressing - checks whenever the mac addressing
+ *	is an intra pan communication
+ * @fc: mac header frame control field
+ * @skb: skb where the source and destination pan should be get from
+ */
+static inline bool ieee802154_skb_is_intra_pan_addressing(__le16 fc,
+							  const struct sk_buff *skb)
+{
+	unsigned char *dst_pan = ieee802154_skb_dst_pan(fc, skb),
+		      *src_pan = ieee802154_skb_src_pan(fc, skb);
+
+	/* if one is NULL is no intra pan addressing */
+	if (!dst_pan || !src_pan)
+		return false;
+
+	return !memcmp(dst_pan, src_pan, IEEE802154_PAN_ID_LEN);
 }
 
 /**
diff --git a/include/net/ncsi.h b/include/net/ncsi.h
new file mode 100644
index 000000000000..1dbf42f79750
--- /dev/null
+++ b/include/net/ncsi.h
@@ -0,0 +1,52 @@
+#ifndef __NET_NCSI_H
+#define __NET_NCSI_H
+
+/*
+ * The NCSI device states seen from external. More NCSI device states are
+ * only visible internally (in net/ncsi/internal.h). When the NCSI device
+ * is registered, it's in ncsi_dev_state_registered state. The state
+ * ncsi_dev_state_start is used to drive to choose active package and
+ * channel. After that, its state is changed to ncsi_dev_state_functional.
+ *
+ * The state ncsi_dev_state_stop helps to shut down the currently active
+ * package and channel while ncsi_dev_state_config helps to reconfigure
+ * them.
+ */
+enum {
+	ncsi_dev_state_registered	= 0x0000,
+	ncsi_dev_state_functional	= 0x0100,
+	ncsi_dev_state_probe		= 0x0200,
+	ncsi_dev_state_config		= 0x0300,
+	ncsi_dev_state_suspend		= 0x0400,
+};
+
+struct ncsi_dev {
+	int               state;
+	int		  link_up;
+	struct net_device *dev;
+	void		  (*handler)(struct ncsi_dev *ndev);
+};
+
+#ifdef CONFIG_NET_NCSI
+struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
+				   void (*notifier)(struct ncsi_dev *nd));
+int ncsi_start_dev(struct ncsi_dev *nd);
+void ncsi_unregister_dev(struct ncsi_dev *nd);
+#else /* !CONFIG_NET_NCSI */
+static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
+					void (*notifier)(struct ncsi_dev *nd))
+{
+	return NULL;
+}
+
+static inline int ncsi_start_dev(struct ncsi_dev *nd)
+{
+	return -ENOTTY;
+}
+
+static inline void ncsi_unregister_dev(struct ncsi_dev *nd)
+{
+}
+#endif /* CONFIG_NET_NCSI */
+
+#endif /* __NET_NCSI_H */
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 2d8edaad29cb..be1fe2283254 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -35,6 +35,7 @@ enum {
 	ND_OPT_ROUTE_INFO = 24,		/* RFC4191 */
 	ND_OPT_RDNSS = 25,		/* RFC5006 */
 	ND_OPT_DNSSL = 31,		/* RFC6106 */
+	ND_OPT_6CO = 34,		/* RFC6775 */
 	__ND_OPT_MAX
 };
 
@@ -53,11 +54,21 @@ enum {
 
 #include <net/neighbour.h>
 
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(val, level, fmt, ...)				\
+do {								\
+	if (val <= ND_DEBUG)					\
+		net_##level##_ratelimited(fmt, ##__VA_ARGS__);	\
+} while (0)
+
 struct ctl_table;
 struct inet6_dev;
 struct net_device;
 struct net_proto_family;
 struct sk_buff;
+struct prefix_info;
 
 extern struct neigh_table nd_tbl;
 
@@ -99,20 +110,201 @@ struct ndisc_options {
 #endif
 	struct nd_opt_hdr *nd_useropts;
 	struct nd_opt_hdr *nd_useropts_end;
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+	struct nd_opt_hdr *nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR + 1];
+#endif
 };
 
-#define nd_opts_src_lladdr	nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr	nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi		nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end		nd_opt_array[__ND_OPT_PREFIX_INFO_END]
-#define nd_opts_rh		nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu		nd_opt_array[ND_OPT_MTU]
+#define nd_opts_src_lladdr		nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_opts_tgt_lladdr		nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+#define nd_opts_pi			nd_opt_array[ND_OPT_PREFIX_INFO]
+#define nd_opts_pi_end			nd_opt_array[__ND_OPT_PREFIX_INFO_END]
+#define nd_opts_rh			nd_opt_array[ND_OPT_REDIRECT_HDR]
+#define nd_opts_mtu			nd_opt_array[ND_OPT_MTU]
+#define nd_802154_opts_src_lladdr	nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_802154_opts_tgt_lladdr	nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR]
 
 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
 
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+					  u8 *opt, int opt_len,
 					  struct ndisc_options *ndopts);
 
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+			      int data_len, int pad);
+
+#define NDISC_OPS_REDIRECT_DATA_SPACE	2
+
+/*
+ * This structure defines the hooks for IPv6 neighbour discovery.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * int (*is_useropt)(u8 nd_opt_type):
+ *     This function is called when IPv6 decide RA userspace options. if
+ *     this function returns 1 then the option given by nd_opt_type will
+ *     be handled as userspace option additional to the IPv6 options.
+ *
+ * int (*parse_options)(const struct net_device *dev,
+ *			struct nd_opt_hdr *nd_opt,
+ *			struct ndisc_options *ndopts):
+ *     This function is called while parsing ndisc ops and put each position
+ *     as pointer into ndopts. If this function return unequal 0, then this
+ *     function took care about the ndisc option, if 0 then the IPv6 ndisc
+ *     option parser will take care about that option.
+ *
+ * void (*update)(const struct net_device *dev, struct neighbour *n,
+ *		  u32 flags, u8 icmp6_type,
+ *		  const struct ndisc_options *ndopts):
+ *     This function is called when IPv6 ndisc updates the neighbour cache
+ *     entry. Additional options which can be updated may be previously
+ *     parsed by parse_opts callback and accessible over ndopts parameter.
+ *
+ * int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type,
+ *			 struct neighbour *neigh, u8 *ha_buf,
+ *			 u8 **ha):
+ *     This function is called when the necessary option space will be
+ *     calculated before allocating a skb. The parameters neigh, ha_buf
+ *     abd ha are available on NDISC_REDIRECT messages only.
+ *
+ * void (*fill_addr_option)(const struct net_device *dev,
+ *			    struct sk_buff *skb, u8 icmp6_type,
+ *			    const u8 *ha):
+ *     This function is called when the skb will finally fill the option
+ *     fields inside skb. NOTE: this callback should fill the option
+ *     fields to the skb which are previously indicated by opt_space
+ *     parameter. That means the decision to add such option should
+ *     not lost between these two callbacks, e.g. protected by interface
+ *     up state.
+ *
+ * void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev,
+ *			       const struct prefix_info *pinfo,
+ *			       struct inet6_dev *in6_dev,
+ *			       struct in6_addr *addr,
+ *			       int addr_type, u32 addr_flags,
+ *			       bool sllao, bool tokenized,
+ *			       __u32 valid_lft, u32 prefered_lft,
+ *			       bool dev_addr_generated):
+ *     This function is called when a RA messages is received with valid
+ *     PIO option fields and an IPv6 address will be added to the interface
+ *     for autoconfiguration. The parameter dev_addr_generated reports about
+ *     if the address was based on dev->dev_addr or not. This can be used
+ *     to add a second address if link-layer operates with two link layer
+ *     addresses. E.g. 802.15.4 6LoWPAN.
+ */
+struct ndisc_ops {
+	int	(*is_useropt)(u8 nd_opt_type);
+	int	(*parse_options)(const struct net_device *dev,
+				 struct nd_opt_hdr *nd_opt,
+				 struct ndisc_options *ndopts);
+	void	(*update)(const struct net_device *dev, struct neighbour *n,
+			  u32 flags, u8 icmp6_type,
+			  const struct ndisc_options *ndopts);
+	int	(*opt_addr_space)(const struct net_device *dev, u8 icmp6_type,
+				  struct neighbour *neigh, u8 *ha_buf,
+				  u8 **ha);
+	void	(*fill_addr_option)(const struct net_device *dev,
+				    struct sk_buff *skb, u8 icmp6_type,
+				    const u8 *ha);
+	void	(*prefix_rcv_add_addr)(struct net *net, struct net_device *dev,
+				       const struct prefix_info *pinfo,
+				       struct inet6_dev *in6_dev,
+				       struct in6_addr *addr,
+				       int addr_type, u32 addr_flags,
+				       bool sllao, bool tokenized,
+				       __u32 valid_lft, u32 prefered_lft,
+				       bool dev_addr_generated);
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ndisc_ops_is_useropt(const struct net_device *dev,
+				       u8 nd_opt_type)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->is_useropt)
+		return dev->ndisc_ops->is_useropt(nd_opt_type);
+	else
+		return 0;
+}
+
+static inline int ndisc_ops_parse_options(const struct net_device *dev,
+					  struct nd_opt_hdr *nd_opt,
+					  struct ndisc_options *ndopts)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->parse_options)
+		return dev->ndisc_ops->parse_options(dev, nd_opt, ndopts);
+	else
+		return 0;
+}
+
+static inline void ndisc_ops_update(const struct net_device *dev,
+					  struct neighbour *n, u32 flags,
+					  u8 icmp6_type,
+					  const struct ndisc_options *ndopts)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->update)
+		dev->ndisc_ops->update(dev, n, flags, icmp6_type, ndopts);
+}
+
+static inline int ndisc_ops_opt_addr_space(const struct net_device *dev,
+					   u8 icmp6_type)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space &&
+	    icmp6_type != NDISC_REDIRECT)
+		return dev->ndisc_ops->opt_addr_space(dev, icmp6_type, NULL,
+						      NULL, NULL);
+	else
+		return 0;
+}
+
+static inline int ndisc_ops_redirect_opt_addr_space(const struct net_device *dev,
+						    struct neighbour *neigh,
+						    u8 *ha_buf, u8 **ha)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space)
+		return dev->ndisc_ops->opt_addr_space(dev, NDISC_REDIRECT,
+						      neigh, ha_buf, ha);
+	else
+		return 0;
+}
+
+static inline void ndisc_ops_fill_addr_option(const struct net_device *dev,
+					      struct sk_buff *skb,
+					      u8 icmp6_type)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option &&
+	    icmp6_type != NDISC_REDIRECT)
+		dev->ndisc_ops->fill_addr_option(dev, skb, icmp6_type, NULL);
+}
+
+static inline void ndisc_ops_fill_redirect_addr_option(const struct net_device *dev,
+						       struct sk_buff *skb,
+						       const u8 *ha)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option)
+		dev->ndisc_ops->fill_addr_option(dev, skb, NDISC_REDIRECT, ha);
+}
+
+static inline void ndisc_ops_prefix_rcv_add_addr(struct net *net,
+						 struct net_device *dev,
+						 const struct prefix_info *pinfo,
+						 struct inet6_dev *in6_dev,
+						 struct in6_addr *addr,
+						 int addr_type, u32 addr_flags,
+						 bool sllao, bool tokenized,
+						 __u32 valid_lft,
+						 u32 prefered_lft,
+						 bool dev_addr_generated)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->prefix_rcv_add_addr)
+		dev->ndisc_ops->prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+						    addr, addr_type,
+						    addr_flags, sllao,
+						    tokenized, valid_lft,
+						    prefered_lft,
+						    dev_addr_generated);
+}
+#endif
+
 /*
  * Return the padding between the option length and the start of the
  * link addr.  Currently only IP-over-InfiniBand needs this, although
@@ -127,23 +319,48 @@ static inline int ndisc_addr_option_pad(unsigned short type)
 	}
 }
 
-static inline int ndisc_opt_addr_space(struct net_device *dev)
+static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad)
 {
-	return NDISC_OPT_SPACE(dev->addr_len +
-			       ndisc_addr_option_pad(dev->type));
+	return NDISC_OPT_SPACE(addr_len + pad);
 }
 
-static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
-				      struct net_device *dev)
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ndisc_opt_addr_space(struct net_device *dev, u8 icmp6_type)
+{
+	return __ndisc_opt_addr_space(dev->addr_len,
+				      ndisc_addr_option_pad(dev->type)) +
+		ndisc_ops_opt_addr_space(dev, icmp6_type);
+}
+
+static inline int ndisc_redirect_opt_addr_space(struct net_device *dev,
+						struct neighbour *neigh,
+						u8 *ops_data_buf,
+						u8 **ops_data)
+{
+	return __ndisc_opt_addr_space(dev->addr_len,
+				      ndisc_addr_option_pad(dev->type)) +
+		ndisc_ops_redirect_opt_addr_space(dev, neigh, ops_data_buf,
+						  ops_data);
+}
+#endif
+
+static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p,
+					unsigned char addr_len, int prepad)
 {
 	u8 *lladdr = (u8 *)(p + 1);
 	int lladdrlen = p->nd_opt_len << 3;
-	int prepad = ndisc_addr_option_pad(dev->type);
-	if (lladdrlen != ndisc_opt_addr_space(dev))
+	if (lladdrlen != __ndisc_opt_addr_space(addr_len, prepad))
 		return NULL;
 	return lladdr + prepad;
 }
 
+static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
+				      struct net_device *dev)
+{
+	return __ndisc_opt_addr_data(p, dev->addr_len,
+				     ndisc_addr_option_pad(dev->type));
+}
+
 static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd)
 {
 	const u32 *p32 = pkey;
@@ -194,6 +411,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target);
 int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev,
 		 int dir);
 
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+		  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+		  struct ndisc_options *ndopts);
 
 /*
  *	IGMP
diff --git a/include/net/netevent.h b/include/net/netevent.h
index d8bbb38584b6..f440df172b56 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -24,6 +24,7 @@ struct netevent_redirect {
 enum netevent_notif_type {
 	NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
 	NETEVENT_REDIRECT,	   /* arg is struct netevent_redirect ptr */
+	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index dd78bea227c8..445b019c2078 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -17,6 +17,7 @@
 #include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/atomic.h>
+#include <linux/rhashtable.h>
 
 #include <linux/netfilter/nf_conntrack_tcp.h>
 #include <linux/netfilter/nf_conntrack_dccp.h>
@@ -85,6 +86,9 @@ struct nf_conn {
 	spinlock_t	lock;
 	u16		cpu;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	struct nf_conntrack_zone zone;
+#endif
 	/* XXX should I move this to the tail ? - Y.K */
 	/* These are my tuples; original and reply */
 	struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
@@ -114,6 +118,9 @@ struct nf_conn {
 	/* Extensions */
 	struct nf_ct_ext *ext;
 
+#if IS_ENABLED(CONFIG_NF_NAT)
+	struct rhash_head	nat_bysource;
+#endif
 	/* Storage reserved for other modules, must be the last member */
 	union nf_conntrack_proto proto;
 };
@@ -263,12 +270,12 @@ static inline int nf_ct_is_template(const struct nf_conn *ct)
 }
 
 /* It's confirmed if it is, or has been in the hash table. */
-static inline int nf_ct_is_confirmed(struct nf_conn *ct)
+static inline int nf_ct_is_confirmed(const struct nf_conn *ct)
 {
 	return test_bit(IPS_CONFIRMED_BIT, &ct->status);
 }
 
-static inline int nf_ct_is_dying(struct nf_conn *ct)
+static inline int nf_ct_is_dying(const struct nf_conn *ct)
 {
 	return test_bit(IPS_DYING_BIT, &ct->status);
 }
@@ -284,9 +291,18 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
 	return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK;
 }
 
+/* jiffies until ct expires, 0 if already expired */
+static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
+{
+	long timeout = (long)ct->timeout.expires - (long)jiffies;
+
+	return timeout > 0 ? timeout : 0;
+}
+
 struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
+int nf_conntrack_hash_resize(unsigned int hashsize);
 extern unsigned int nf_conntrack_htable_size;
 extern unsigned int nf_conntrack_max;
 
@@ -297,6 +313,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
+#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
 
 #define MODULE_ALIAS_NFCT_HELPER(helper) \
         MODULE_ALIAS("nfct-helper-" helper)
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3e2f3328945c..79d7ac5c9740 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_l3proto *l3proto,
 			const struct nf_conntrack_l4proto *l4proto);
 
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
+
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net,
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 55d15049ab2f..1c3035dda31f 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -15,9 +15,6 @@ enum nf_ct_ext_id {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	NF_CT_EXT_ECACHE,
 #endif
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	NF_CT_EXT_ZONE,
-#endif
 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
 	NF_CT_EXT_TSTAMP,
 #endif
@@ -38,7 +35,6 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct
 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
-#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
 #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
 #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
 #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
@@ -103,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
 struct nf_ct_ext_type {
 	/* Destroys relationships (can be NULL). */
 	void (*destroy)(struct nf_conn *ct);
-	/* Called when realloacted (can be NULL).
-	   Contents has already been moved. */
-	void (*move)(void *new, void *old);
 
 	enum nf_ct_ext_id id;
 
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 6cf614bc0029..1eaac1f4cd6a 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -58,10 +58,25 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
 struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
 							       u16 l3num,
 							       u8 protonum);
+void nf_ct_helper_init(struct nf_conntrack_helper *helper,
+		       u16 l3num, u16 protonum, const char *name,
+		       u16 default_port, u16 spec_port, u32 id,
+		       const struct nf_conntrack_expect_policy *exp_pol,
+		       u32 expect_class_max, u32 data_len,
+		       int (*help)(struct sk_buff *skb, unsigned int protoff,
+				   struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo),
+		       int (*from_nlattr)(struct nlattr *attr,
+					  struct nf_conn *ct),
+		       struct module *module);
 
 int nf_conntrack_helper_register(struct nf_conntrack_helper *);
 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
 
+int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
+void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
+				     unsigned int);
+
 struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
 					  struct nf_conntrack_helper *helper,
 					  gfp_t gfp);
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index c5f8fc736b3d..498814626e28 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -10,8 +10,7 @@
 #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
 
 struct nf_conn_labels {
-	u8 words;
-	unsigned long bits[];
+	unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)];
 };
 
 static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
@@ -26,27 +25,18 @@ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
 static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
 {
 #ifdef CONFIG_NF_CONNTRACK_LABELS
-	struct nf_conn_labels *cl_ext;
 	struct net *net = nf_ct_net(ct);
-	u8 words;
 
-	words = ACCESS_ONCE(net->ct.label_words);
-	if (words == 0)
+	if (net->ct.labels_used == 0)
 		return NULL;
 
-	cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
-				      words * sizeof(long), GFP_ATOMIC);
-	if (cl_ext != NULL)
-		cl_ext->words = words;
-
-	return cl_ext;
+	return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
+				    sizeof(struct nf_conn_labels), GFP_ATOMIC);
 #else
 	return NULL;
 #endif
 }
 
-int nf_connlabel_set(struct nf_conn *ct, u16 bit);
-
 int nf_connlabels_replace(struct nf_conn *ct,
 			  const u32 *data, const u32 *mask, unsigned int words);
 
diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 4e32512cef32..64a718b60839 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -9,12 +9,11 @@
 static inline const struct nf_conntrack_zone *
 nf_ct_zone(const struct nf_conn *ct)
 {
-	const struct nf_conntrack_zone *nf_ct_zone = NULL;
-
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-	nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
+	return &ct->zone;
+#else
+	return &nf_ct_zone_dflt;
 #endif
-	return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt;
 }
 
 static inline const struct nf_conntrack_zone *
@@ -31,32 +30,22 @@ static inline const struct nf_conntrack_zone *
 nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
 		struct nf_conntrack_zone *tmp)
 {
-	const struct nf_conntrack_zone *zone;
-
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	if (!tmpl)
 		return &nf_ct_zone_dflt;
 
-	zone = nf_ct_zone(tmpl);
-	if (zone->flags & NF_CT_FLAG_MARK)
-		zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0);
-
-	return zone;
+	if (tmpl->zone.flags & NF_CT_FLAG_MARK)
+		return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0);
+#endif
+	return nf_ct_zone(tmpl);
 }
 
-static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags,
-				 const struct nf_conntrack_zone *info)
+static inline void nf_ct_zone_add(struct nf_conn *ct,
+				  const struct nf_conntrack_zone *zone)
 {
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-	struct nf_conntrack_zone *nf_ct_zone;
-
-	nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags);
-	if (!nf_ct_zone)
-		return -ENOMEM;
-
-	nf_ct_zone_init(nf_ct_zone, info->id, info->dir,
-			info->flags);
+	ct->zone = *zone;
 #endif
-	return 0;
 }
 
 static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
@@ -68,22 +57,34 @@ static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
 static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
 				enum ip_conntrack_dir dir)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone_matches_dir(zone, dir) ?
 	       zone->id : NF_CT_DEFAULT_ZONE_ID;
+#else
+	return NF_CT_DEFAULT_ZONE_ID;
+#endif
 }
 
 static inline bool nf_ct_zone_equal(const struct nf_conn *a,
 				    const struct nf_conntrack_zone *b,
 				    enum ip_conntrack_dir dir)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone_id(nf_ct_zone(a), dir) ==
 	       nf_ct_zone_id(b, dir);
+#else
+	return true;
+#endif
 }
 
 static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
 					const struct nf_conntrack_zone *b)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone(a)->id == b->id;
+#else
+	return true;
+#endif
 }
 #endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
 #endif /* _NF_CONNTRACK_ZONES_H */
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 57639fca223a..83d855ba6af1 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -12,6 +12,9 @@
 #define NF_LOG_UID		0x08	/* Log UID owning local socket */
 #define NF_LOG_MASK		0x0f
 
+/* This flag indicates that copy_len field in nf_loginfo is set */
+#define NF_LOG_F_COPY_LEN	0x1
+
 enum nf_log_type {
 	NF_LOG_TYPE_LOG		= 0,
 	NF_LOG_TYPE_ULOG,
@@ -22,9 +25,13 @@ struct nf_loginfo {
 	u_int8_t type;
 	union {
 		struct {
+			/* copy_len will be used iff you set
+			 * NF_LOG_F_COPY_LEN in flags
+			 */
 			u_int32_t copy_len;
 			u_int16_t group;
 			u_int16_t qthreshold;
+			u_int16_t flags;
 		} ulog;
 		struct {
 			u_int8_t level;
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 344b1ab19220..c327a431a6f3 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,5 +1,6 @@
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
+#include <linux/rhashtable.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/nf_nat.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
@@ -29,8 +30,6 @@ struct nf_conn;
 
 /* The structure embedded in the conntrack structure. */
 struct nf_conn_nat {
-	struct hlist_node bysource;
-	struct nf_conn *ct;
 	union nf_conntrack_nat_help help;
 #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
     IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 092235458691..f2f13399ce44 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -167,6 +167,7 @@ struct nft_set_elem {
 
 struct nft_set;
 struct nft_set_iter {
+	u8		genmask;
 	unsigned int	count;
 	unsigned int	skip;
 	int		err;
@@ -235,7 +236,8 @@ struct nft_expr;
  *	@features: features supported by the implementation
  */
 struct nft_set_ops {
-	bool				(*lookup)(const struct nft_set *set,
+	bool				(*lookup)(const struct net *net,
+						  const struct nft_set *set,
 						  const u32 *key,
 						  const struct nft_set_ext **ext);
 	bool				(*update)(struct nft_set *set,
@@ -247,11 +249,14 @@ struct nft_set_ops {
 						  struct nft_regs *regs,
 						  const struct nft_set_ext **ext);
 
-	int				(*insert)(const struct nft_set *set,
+	int				(*insert)(const struct net *net,
+						  const struct nft_set *set,
 						  const struct nft_set_elem *elem);
-	void				(*activate)(const struct nft_set *set,
+	void				(*activate)(const struct net *net,
+						    const struct nft_set *set,
 						    const struct nft_set_elem *elem);
-	void *				(*deactivate)(const struct nft_set *set,
+	void *				(*deactivate)(const struct net *net,
+						      const struct nft_set *set,
 						      const struct nft_set_elem *elem);
 	void				(*remove)(const struct nft_set *set,
 						  const struct nft_set_elem *elem);
@@ -294,8 +299,8 @@ void nft_unregister_set(struct nft_set_ops *ops);
  *	@udlen: user data length
  *	@udata: user data
  * 	@ops: set ops
- * 	@pnet: network namespace
  * 	@flags: set flags
+ *	@genmask: generation mask
  * 	@klen: key length
  * 	@dlen: data length
  * 	@data: private set data
@@ -316,8 +321,8 @@ struct nft_set {
 	unsigned char			*udata;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
-	possible_net_t			pnet;
-	u16				flags;
+	u16				flags:14,
+					genmask:2;
 	u8				klen;
 	u8				dlen;
 	unsigned char			data[]
@@ -335,9 +340,9 @@ static inline struct nft_set *nft_set_container_of(const void *priv)
 }
 
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-				     const struct nlattr *nla);
+				     const struct nlattr *nla, u8 genmask);
 struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-					  const struct nlattr *nla);
+					  const struct nlattr *nla, u8 genmask);
 
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
@@ -732,7 +737,6 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
 
 enum nft_chain_flags {
 	NFT_BASE_CHAIN			= 0x1,
-	NFT_CHAIN_INACTIVE		= 0x2,
 };
 
 /**
@@ -754,7 +758,8 @@ struct nft_chain {
 	u64				handle;
 	u32				use;
 	u16				level;
-	u8				flags;
+	u8				flags:6,
+					genmask:2;
 	char				name[NFT_CHAIN_MAXNAMELEN];
 };
 
@@ -796,13 +801,11 @@ struct nft_stats {
 };
 
 #define NFT_HOOK_OPS_MAX		2
-#define NFT_BASECHAIN_DISABLED		(1 << 0)
 
 /**
  *	struct nft_base_chain - nf_tables base chain
  *
  *	@ops: netfilter hook ops
- *	@pnet: net namespace that this chain belongs to
  *	@type: chain type
  *	@policy: default policy
  *	@stats: per-cpu chain stats
@@ -811,7 +814,6 @@ struct nft_stats {
  */
 struct nft_base_chain {
 	struct nf_hook_ops		ops[NFT_HOOK_OPS_MAX];
-	possible_net_t			pnet;
 	const struct nf_chain_type	*type;
 	u8				policy;
 	u8				flags;
@@ -838,6 +840,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
  *	@hgenerator: handle generator state
  *	@use: number of chain references to this table
  *	@flags: table flag (see enum nft_table_flags)
+ *	@genmask: generation mask
  *	@name: name of the table
  */
 struct nft_table {
@@ -846,7 +849,8 @@ struct nft_table {
 	struct list_head		sets;
 	u64				hgenerator;
 	u32				use;
-	u16				flags;
+	u16				flags:14,
+					genmask:2;
 	char				name[NFT_TABLE_MAXNAMELEN];
 };
 
@@ -969,6 +973,32 @@ static inline u8 nft_genmask_cur(const struct net *net)
 
 #define NFT_GENMASK_ANY		((1 << 0) | (1 << 1))
 
+/*
+ * Generic transaction helpers
+ */
+
+/* Check if this object is currently active. */
+#define nft_is_active(__net, __obj)				\
+	(((__obj)->genmask & nft_genmask_cur(__net)) == 0)
+
+/* Check if this object is active in the next generation. */
+#define nft_is_active_next(__net, __obj)			\
+	(((__obj)->genmask & nft_genmask_next(__net)) == 0)
+
+/* This object becomes active in the next generation. */
+#define nft_activate_next(__net, __obj)				\
+	(__obj)->genmask = nft_genmask_cur(__net)
+
+/* This object becomes inactive in the next generation. */
+#define nft_deactivate_next(__net, __obj)			\
+        (__obj)->genmask = nft_genmask_next(__net)
+
+/* After committing the ruleset, clear the stale generation bit. */
+#define nft_clear(__net, __obj)					\
+	(__obj)->genmask &= ~nft_genmask_next(__net)
+#define nft_active_genmask(__obj, __genmask)			\
+	!((__obj)->genmask & __genmask)
+
 /*
  * Set element transaction helpers
  */
@@ -979,10 +1009,11 @@ static inline bool nft_set_elem_active(const struct nft_set_ext *ext,
 	return !(ext->genmask & genmask);
 }
 
-static inline void nft_set_elem_change_active(const struct nft_set *set,
+static inline void nft_set_elem_change_active(const struct net *net,
+					      const struct nft_set *set,
 					      struct nft_set_ext *ext)
 {
-	ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
+	ext->genmask ^= nft_genmask_next(net);
 }
 
 /*
diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index 0ae101eef0f4..74fa7eb94e72 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h
@@ -220,12 +220,13 @@ struct nfc_digital_dev {
 	struct list_head cmd_queue;
 	struct mutex cmd_lock;
 
-	struct work_struct poll_work;
+	struct delayed_work poll_work;
 
 	u8 curr_protocol;
 	u8 curr_rf_tech;
 	u8 curr_nfc_dep_pni;
 	u8 did;
+	u16 dep_rwt;
 
 	u8 local_payload_max;
 	u8 remote_payload_max;
@@ -237,7 +238,6 @@ struct nfc_digital_dev {
 	int nack_count;
 
 	struct sk_buff *saved_skb;
-	unsigned int saved_skb_len;
 
 	u16 target_fsc;
 
diff --git a/include/net/nfc/llc.h b/include/net/nfc/llc.h
index c25fbdee0d61..7ecb45757897 100644
--- a/include/net/nfc/llc.h
+++ b/include/net/nfc/llc.h
@@ -37,10 +37,6 @@ struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
 				 int tx_tailroom, llc_failure_t llc_failure);
 void nfc_llc_free(struct nfc_llc *llc);
 
-void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom,
-				   int *rx_tailroom);
-
-
 int nfc_llc_start(struct nfc_llc *llc);
 int nfc_llc_stop(struct nfc_llc *llc);
 void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb);
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index fcab4de49951..ddcee128f5d9 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -54,6 +54,8 @@ enum nl802154_commands {
 
 	NL802154_CMD_SET_ACKREQ_DEFAULT,
 
+	NL802154_CMD_SET_WPAN_PHY_NETNS,
+
 	/* add new commands above here */
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
@@ -124,6 +126,11 @@ enum nl802154_attrs {
 
 	NL802154_ATTR_ACKREQ_DEFAULT,
 
+	NL802154_ATTR_PAD,
+
+	NL802154_ATTR_PID,
+	NL802154_ATTR_NETNS_FD,
+
 	/* add attributes here, update the policy in nl802154.c */
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
@@ -138,8 +145,6 @@ enum nl802154_attrs {
 	NL802154_ATTR_SEC_KEY,
 #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 
-	NL802154_ATTR_PAD,
-
 	__NL802154_ATTR_AFTER_LAST,
 	NL802154_ATTR_MAX = __NL802154_ATTR_AFTER_LAST - 1
 };
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 3722dda0199d..6f8d65342d3a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -442,4 +442,15 @@ struct tc_cls_flower_offload {
 	struct tcf_exts *exts;
 };
 
+enum tc_matchall_command {
+	TC_CLSMATCHALL_REPLACE,
+	TC_CLSMATCHALL_DESTROY,
+};
+
+struct tc_cls_matchall_offload {
+	enum tc_matchall_command command;
+	struct tcf_exts *exts;
+	unsigned long cookie;
+};
+
 #endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index fea53f4d92ca..7caa99b482c6 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -67,12 +67,12 @@ struct qdisc_watchdog {
 };
 
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle);
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
 
 static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
 					   psched_time_t expires)
 {
-	qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires), true);
+	qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires));
 }
 
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd);
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 006a7b81d758..4113916cc1bb 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -98,10 +98,11 @@ struct rtnl_link_ops {
 						   const struct net_device *dev,
 						   const struct net_device *slave_dev);
 	struct net		*(*get_link_net)(const struct net_device *dev);
-	size_t			(*get_linkxstats_size)(const struct net_device *dev);
+	size_t			(*get_linkxstats_size)(const struct net_device *dev,
+						       int attr);
 	int			(*fill_linkxstats)(struct sk_buff *skb,
 						   const struct net_device *dev,
-						   int *prividx);
+						   int *prividx, int attr);
 };
 
 int __rtnl_link_register(struct rtnl_link_ops *ops);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 62d553184e91..909aff2db2b3 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -26,14 +26,6 @@ struct qdisc_rate_table {
 enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
-	__QDISC_STATE_THROTTLED,
-};
-
-/*
- * following bits are only changed while qdisc lock is held
- */
-enum qdisc___state_t {
-	__QDISC___STATE_RUNNING = 1,
 };
 
 struct qdisc_size_table {
@@ -45,8 +37,10 @@ struct qdisc_size_table {
 };
 
 struct Qdisc {
-	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
-	struct sk_buff *	(*dequeue)(struct Qdisc *dev);
+	int 			(*enqueue)(struct sk_buff *skb,
+					   struct Qdisc *sch,
+					   struct sk_buff **to_free);
+	struct sk_buff *	(*dequeue)(struct Qdisc *sch);
 	unsigned int		flags;
 #define TCQ_F_BUILTIN		1
 #define TCQ_F_INGRESS		2
@@ -70,31 +64,25 @@ struct Qdisc {
 	struct list_head	list;
 	u32			handle;
 	u32			parent;
-	int			(*reshape_fail)(struct sk_buff *skb,
-					struct Qdisc *q);
-
 	void			*u32_node;
 
-	/* This field is deprecated, but it is still used by CBQ
-	 * and it will live until better solution will be invented.
-	 */
-	struct Qdisc		*__parent;
 	struct netdev_queue	*dev_queue;
 
 	struct gnet_stats_rate_est64	rate_est;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue	__percpu *cpu_qstats;
 
-	struct Qdisc		*next_sched;
-	struct sk_buff		*gso_skb;
 	/*
 	 * For performance sake on SMP, we put highly modified fields at the end
 	 */
-	unsigned long		state;
+	struct sk_buff		*gso_skb ____cacheline_aligned_in_smp;
 	struct sk_buff_head	q;
 	struct gnet_stats_basic_packed bstats;
-	unsigned int		__state;
+	seqcount_t		running;
 	struct gnet_stats_queue	qstats;
+	unsigned long		state;
+	struct Qdisc            *next_sched;
+	struct sk_buff		*skb_bad_txq;
 	struct rcu_head		rcu_head;
 	int			padded;
 	atomic_t		refcnt;
@@ -104,20 +92,24 @@ struct Qdisc {
 
 static inline bool qdisc_is_running(const struct Qdisc *qdisc)
 {
-	return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false;
+	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
 }
 
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
 	if (qdisc_is_running(qdisc))
 		return false;
-	qdisc->__state |= __QDISC___STATE_RUNNING;
+	/* Variant of write_seqcount_begin() telling lockdep a trylock
+	 * was attempted.
+	 */
+	raw_write_seqcount_begin(&qdisc->running);
+	seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
 	return true;
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
-	qdisc->__state &= ~__QDISC___STATE_RUNNING;
+	write_seqcount_end(&qdisc->running);
 }
 
 static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -135,21 +127,6 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
 #endif
 }
 
-static inline bool qdisc_is_throttled(const struct Qdisc *qdisc)
-{
-	return test_bit(__QDISC_STATE_THROTTLED, &qdisc->state) ? true : false;
-}
-
-static inline void qdisc_throttled(struct Qdisc *qdisc)
-{
-	set_bit(__QDISC_STATE_THROTTLED, &qdisc->state);
-}
-
-static inline void qdisc_unthrottled(struct Qdisc *qdisc)
-{
-	clear_bit(__QDISC_STATE_THROTTLED, &qdisc->state);
-}
-
 struct Qdisc_class_ops {
 	/* Child qdisc manipulation */
 	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
@@ -186,10 +163,11 @@ struct Qdisc_ops {
 	char			id[IFNAMSIZ];
 	int			priv_size;
 
-	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
+	int 			(*enqueue)(struct sk_buff *skb,
+					   struct Qdisc *sch,
+					   struct sk_buff **to_free);
 	struct sk_buff *	(*dequeue)(struct Qdisc *);
 	struct sk_buff *	(*peek)(struct Qdisc *);
-	unsigned int		(*drop)(struct Qdisc *);
 
 	int			(*init)(struct Qdisc *, struct nlattr *arg);
 	void			(*reset)(struct Qdisc *);
@@ -322,6 +300,14 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
 	return qdisc_lock(root);
 }
 
+static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
+{
+	struct Qdisc *root = qdisc_root_sleeping(qdisc);
+
+	ASSERT_RTNL();
+	return &root->running;
+}
+
 static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->dev;
@@ -517,10 +503,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
 #endif
 }
 
-static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+				struct sk_buff **to_free)
 {
 	qdisc_calculate_pkt_len(skb, sch);
-	return sch->enqueue(skb, sch);
+	return sch->enqueue(skb, sch, to_free);
 }
 
 static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
@@ -645,40 +632,36 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
 	return __qdisc_dequeue_head(sch, &sch->q);
 }
 
+/* Instead of calling kfree_skb() while root qdisc lock is held,
+ * queue the skb for future freeing at end of __dev_xmit_skb()
+ */
+static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
+{
+	skb->next = *to_free;
+	*to_free = skb;
+}
+
 static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
-					      struct sk_buff_head *list)
+						   struct sk_buff_head *list,
+						   struct sk_buff **to_free)
 {
 	struct sk_buff *skb = __skb_dequeue(list);
 
 	if (likely(skb != NULL)) {
 		unsigned int len = qdisc_pkt_len(skb);
+
 		qdisc_qstats_backlog_dec(sch, skb);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return len;
 	}
 
 	return 0;
 }
 
-static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch)
-{
-	return __qdisc_queue_drop_head(sch, &sch->q);
-}
-
-static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch,
-						   struct sk_buff_head *list)
-{
-	struct sk_buff *skb = __skb_dequeue_tail(list);
-
-	if (likely(skb != NULL))
-		qdisc_qstats_backlog_dec(sch, skb);
-
-	return skb;
-}
-
-static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch)
+static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
+						 struct sk_buff **to_free)
 {
-	return __qdisc_dequeue_tail(sch, &sch->q);
+	return __qdisc_queue_drop_head(sch, &sch->q, to_free);
 }
 
 static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
@@ -718,19 +701,21 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
 	return skb;
 }
 
-static inline void __qdisc_reset_queue(struct Qdisc *sch,
-				       struct sk_buff_head *list)
+static inline void __qdisc_reset_queue(struct sk_buff_head *list)
 {
 	/*
 	 * We do not know the backlog in bytes of this list, it
 	 * is up to the caller to correct it
 	 */
-	__skb_queue_purge(list);
+	if (!skb_queue_empty(list)) {
+		rtnl_kfree_skbs(list->next, list->prev);
+		__skb_queue_head_init(list);
+	}
 }
 
 static inline void qdisc_reset_queue(struct Qdisc *sch)
 {
-	__qdisc_reset_queue(sch, &sch->q);
+	__qdisc_reset_queue(&sch->q);
 	sch->qstats.backlog = 0;
 }
 
@@ -751,46 +736,19 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
 	return old;
 }
 
-static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch,
-					      struct sk_buff_head *list)
+static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct sk_buff *skb = __qdisc_dequeue_tail(sch, list);
-
-	if (likely(skb != NULL)) {
-		unsigned int len = qdisc_pkt_len(skb);
-		kfree_skb(skb);
-		return len;
-	}
-
-	return 0;
-}
-
-static inline unsigned int qdisc_queue_drop(struct Qdisc *sch)
-{
-	return __qdisc_queue_drop(sch, &sch->q);
-}
-
-static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
-{
-	kfree_skb(skb);
+	rtnl_kfree_skbs(skb, skb);
 	qdisc_qstats_drop(sch);
-
-	return NET_XMIT_DROP;
 }
 
-static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch)
+
+static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
+			     struct sk_buff **to_free)
 {
+	__qdisc_drop(skb, to_free);
 	qdisc_qstats_drop(sch);
 
-#ifdef CONFIG_NET_CLS_ACT
-	if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
-		goto drop;
-
-	return NET_XMIT_SUCCESS;
-
-drop:
-#endif
-	kfree_skb(skb);
 	return NET_XMIT_DROP;
 }
 
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index b392ac8382f2..632e205ca54b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net);
 int sctp_remaddr_proc_init(struct net *net);
 void sctp_remaddr_proc_exit(struct net *net);
 
+/*
+ * sctp/offload.c
+ */
+int sctp_offload_init(void);
 
 /*
  * Module global variables
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 16b013a6191c..ce93c4b10d26 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -59,6 +59,7 @@
 #include <linux/workqueue.h>	/* We need tq_struct.	 */
 #include <linux/sctp.h>		/* We need sctp* header structs.  */
 #include <net/sctp/auth.h>	/* We need auth specific structs */
+#include <net/ip.h>		/* For inet_skb_parm */
 
 /* A convenience structure for handling sockaddr structures.
  * We should wean ourselves off this.
@@ -566,6 +567,9 @@ struct sctp_chunk {
 	/* This points to the sk_buff containing the actual data.  */
 	struct sk_buff *skb;
 
+	/* In case of GSO packets, this will store the head one */
+	struct sk_buff *head_skb;
+
 	/* These are the SCTP headers by reverse order in a packet.
 	 * Note that some of these may happen more than once.  In that
 	 * case, we point at the "current" one, whatever that means
@@ -599,6 +603,16 @@ struct sctp_chunk {
 	/* This needs to be recoverable for SCTP_SEND_FAILED events. */
 	struct sctp_sndrcvinfo sinfo;
 
+	/* We use this field to record param for prsctp policies,
+	 * for TTL policy, it is the time_to_drop of this chunk,
+	 * for RTX policy, it is the max_sent_count of this chunk,
+	 * for PRIO policy, it is the priority of this chunk.
+	 */
+	unsigned long prsctp_param;
+
+	/* How many times this chunk have been sent, for prsctp RTX policy */
+	int sent_count;
+
 	/* Which association does this belong to?  */
 	struct sctp_association *asoc;
 
@@ -696,6 +710,8 @@ struct sctp_packet {
 	size_t overhead;
 	/* This is the total size of all chunks INCLUDING padding.  */
 	size_t size;
+	/* This is the maximum size this packet may have */
+	size_t max_size;
 
 	/* The packet is destined for this transport address.
 	 * The function we finally use to pass down to the next lower
@@ -1069,12 +1085,36 @@ void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
 		     sctp_retransmit_reason_t);
 void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
 int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
+void sctp_prsctp_prune(struct sctp_association *asoc,
+		       struct sctp_sndrcvinfo *sinfo, int msg_len);
 /* Uncork and flush an outqueue.  */
 static inline void sctp_outq_cork(struct sctp_outq *q)
 {
 	q->cork = 1;
 }
 
+/* SCTP skb control block.
+ * sctp_input_cb is currently used on rx and sock rx queue
+ */
+struct sctp_input_cb {
+	union {
+		struct inet_skb_parm	h4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;
+	struct sctp_chunk *chunk;
+	struct sctp_af *af;
+};
+#define SCTP_INPUT_CB(__skb)	((struct sctp_input_cb *)&((__skb)->cb[0]))
+
+static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb)
+{
+	const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
+
+	return chunk->head_skb ? : skb;
+}
+
 /* These bind address data fields common between endpoints and associations */
 struct sctp_bind_addr {
 
@@ -1251,7 +1291,8 @@ struct sctp_endpoint {
 	/* SCTP-AUTH: endpoint shared keys */
 	struct list_head endpoint_shared_keys;
 	__u16 active_key_id;
-	__u8  auth_enable;
+	__u8  auth_enable:1,
+	      prsctp_enable:1;
 };
 
 /* Recover the outter endpoint structure. */
@@ -1843,9 +1884,15 @@ struct sctp_association {
 	__u16 active_key_id;
 
 	__u8 need_ecne:1,	/* Need to send an ECNE Chunk? */
-	     temp:1;		/* Is it a temporary association? */
+	     temp:1,		/* Is it a temporary association? */
+	     prsctp_enable:1;
 
 	struct sctp_priv_assoc_stats stats;
+
+	int sent_cnt_removable;
+
+	__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+	__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
 };
 
 
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index cccdcfd14973..2c098cd7e7e2 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -48,15 +48,15 @@
  */
 struct sctp_ulpevent {
 	struct sctp_association *asoc;
-	__u16 stream;
-	__u16 ssn;
-	__u16 flags;
+	struct sctp_chunk *chunk;
+	unsigned int rmem_len;
 	__u32 ppid;
 	__u32 tsn;
 	__u32 cumtsn;
-	int msg_flags;
-	int iif;
-	unsigned int rmem_len;
+	__u16 stream;
+	__u16 ssn;
+	__u16 flags;
+	__u16 msg_flags;
 };
 
 /* Retrieve the skb this event sits inside of. */
diff --git a/include/net/sock.h b/include/net/sock.h
index 649d2a8c17fc..ff5be7e8ddea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1576,7 +1576,13 @@ static inline void sock_put(struct sock *sk)
  */
 void sock_gen_put(struct sock *sk);
 
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested);
+int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
+		     unsigned int trim_cap);
+static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+				 const int nested)
+{
+	return __sk_receive_skb(sk, skb, nested, 1);
+}
 
 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
 {
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 985619a59323..62f6a967a1b7 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -60,7 +60,7 @@ struct switchdev_attr {
 		struct netdev_phys_item_id ppid;	/* PORT_PARENT_ID */
 		u8 stp_state;				/* PORT_STP_STATE */
 		unsigned long brport_flags;		/* PORT_BRIDGE_FLAGS */
-		u32 ageing_time;			/* BRIDGE_AGEING_TIME */
+		clock_t ageing_time;			/* BRIDGE_AGEING_TIME */
 		bool vlan_filtering;			/* BRIDGE_VLAN_FILTERING */
 	} u;
 };
@@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 				 struct net_device *group_dev,
 				 bool joining);
 
+bool switchdev_port_same_parent_id(struct net_device *a,
+				   struct net_device *b);
 #else
 
 static inline void switchdev_deferred_process(void)
@@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
 {
 }
 
+static inline bool switchdev_port_same_parent_id(struct net_device *a,
+						 struct net_device *b)
+{
+	return false;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
index 958d69cfb19c..2b94673a3dbc 100644
--- a/include/net/tc_act/tc_bpf.h
+++ b/include/net/tc_act/tc_bpf.h
@@ -14,7 +14,7 @@
 #include <net/act_api.h>
 
 struct tcf_bpf {
-	struct tcf_common	common;
+	struct tc_action	common;
 	struct bpf_prog __rcu	*filter;
 	union {
 		u32		bpf_fd;
@@ -23,7 +23,6 @@ struct tcf_bpf {
 	struct sock_filter	*bpf_ops;
 	const char		*bpf_name;
 };
-#define to_bpf(a) \
-	container_of(a->priv, struct tcf_bpf, common)
+#define to_bpf(a) ((struct tcf_bpf *)a)
 
 #endif /* __NET_TC_BPF_H */
diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
index 02caa406611b..59b515d32bb4 100644
--- a/include/net/tc_act/tc_connmark.h
+++ b/include/net/tc_act/tc_connmark.h
@@ -4,12 +4,11 @@
 #include <net/act_api.h>
 
 struct tcf_connmark_info {
-	struct tcf_common common;
+	struct tc_action common;
 	struct net *net;
 	u16 zone;
 };
 
-#define to_connmark(a) \
-	container_of(a->priv, struct tcf_connmark_info, common)
+#define to_connmark(a) ((struct tcf_connmark_info *)a)
 
 #endif /* __NET_TC_CONNMARK_H */
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index fa8f5fac65e9..f31fb6331a53 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -5,11 +5,10 @@
 #include <net/act_api.h>
 
 struct tcf_csum {
-	struct tcf_common common;
+	struct tc_action common;
 
 	u32 update_flags;
 };
-#define to_tcf_csum(a) \
-	container_of(a->priv,struct tcf_csum,common)
+#define to_tcf_csum(a) ((struct tcf_csum *)a)
 
 #endif /* __NET_TC_CSUM_H */
diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h
index 9763dcbb9bc3..d47f040a3bdf 100644
--- a/include/net/tc_act/tc_defact.h
+++ b/include/net/tc_act/tc_defact.h
@@ -4,11 +4,10 @@
 #include <net/act_api.h>
 
 struct tcf_defact {
-	struct tcf_common	common;
-	u32     		tcfd_datalen;
-	void    		*tcfd_defdata;
+	struct tc_action	common;
+	u32		tcfd_datalen;
+	void		*tcfd_defdata;
 };
-#define to_defact(a) \
-	container_of(a->priv, struct tcf_defact, common)
+#define to_defact(a) ((struct tcf_defact *)a)
 
 #endif /* __NET_TC_DEF_H */
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 93c520b83d10..b6f173910226 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -5,7 +5,7 @@
 #include <linux/tc_act/tc_gact.h>
 
 struct tcf_gact {
-	struct tcf_common	common;
+	struct tc_action	common;
 #ifdef CONFIG_GACT_PROB
 	u16			tcfg_ptype;
 	u16			tcfg_pval;
@@ -13,8 +13,7 @@ struct tcf_gact {
 	atomic_t		packets;
 #endif
 };
-#define to_gact(a) \
-	container_of(a->priv, struct tcf_gact, common)
+#define to_gact(a) ((struct tcf_gact *)a)
 
 static inline bool is_tcf_gact_shot(const struct tc_action *a)
 {
@@ -24,7 +23,7 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a)
 	if (a->ops && a->ops->type != TCA_ACT_GACT)
 		return false;
 
-	gact = a->priv;
+	gact = to_gact(a);
 	if (gact->tcf_action == TC_ACT_SHOT)
 		return true;
 
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index dc9a09aefb33..5164bd7a38fb 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -8,7 +8,7 @@
 
 #define IFE_METAHDRLEN 2
 struct tcf_ife_info {
-	struct tcf_common common;
+	struct tc_action common;
 	u8 eth_dst[ETH_ALEN];
 	u8 eth_src[ETH_ALEN];
 	u16 eth_type;
@@ -16,8 +16,7 @@ struct tcf_ife_info {
 	/* list of metaids allowed */
 	struct list_head metalist;
 };
-#define to_ife(a) \
-	container_of(a->priv, struct tcf_ife_info, common)
+#define to_ife(a) ((struct tcf_ife_info *)a)
 
 struct tcf_meta_info {
 	const struct tcf_meta_ops *ops;
@@ -36,7 +35,7 @@ struct tcf_meta_ops {
 	int	(*encode)(struct sk_buff *, void *, struct tcf_meta_info *);
 	int	(*decode)(struct sk_buff *, void *, u16 len);
 	int	(*get)(struct sk_buff *skb, struct tcf_meta_info *mi);
-	int	(*alloc)(struct tcf_meta_info *, void *);
+	int	(*alloc)(struct tcf_meta_info *, void *, gfp_t);
 	void	(*release)(struct tcf_meta_info *);
 	int	(*validate)(void *val, int len);
 	struct module	*owner;
@@ -48,8 +47,8 @@ int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi);
 int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi);
 int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen,
 			const void *dval);
-int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval);
-int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval);
+int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp);
+int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp);
 int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi);
 int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi);
 int ife_validate_meta_u32(void *val, int len);
diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h
index c0f4193f432c..31309766e379 100644
--- a/include/net/tc_act/tc_ipt.h
+++ b/include/net/tc_act/tc_ipt.h
@@ -6,12 +6,11 @@
 struct xt_entry_target;
 
 struct tcf_ipt {
-	struct tcf_common	common;
+	struct tc_action	common;
 	u32			tcfi_hook;
 	char			*tcfi_tname;
 	struct xt_entry_target	*tcfi_t;
 };
-#define to_ipt(a) \
-	container_of(a->priv, struct tcf_ipt, common)
+#define to_ipt(a) ((struct tcf_ipt *)a)
 
 #endif /* __NET_TC_IPT_H */
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index e891835eb74e..62770add15bd 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -5,15 +5,14 @@
 #include <linux/tc_act/tc_mirred.h>
 
 struct tcf_mirred {
-	struct tcf_common	common;
+	struct tc_action	common;
 	int			tcfm_eaction;
 	int			tcfm_ifindex;
 	int			tcfm_ok_push;
 	struct net_device __rcu	*tcfm_dev;
 	struct list_head	tcfm_list;
 };
-#define to_mirred(a) \
-	container_of(a->priv, struct tcf_mirred, common)
+#define to_mirred(a) ((struct tcf_mirred *)a)
 
 static inline bool is_tcf_mirred_redirect(const struct tc_action *a)
 {
@@ -24,6 +23,15 @@ static inline bool is_tcf_mirred_redirect(const struct tc_action *a)
 	return false;
 }
 
+static inline bool is_tcf_mirred_mirror(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->type == TCA_ACT_MIRRED)
+		return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR;
+#endif
+	return false;
+}
+
 static inline int tcf_mirred_ifindex(const struct tc_action *a)
 {
 	return to_mirred(a)->tcfm_ifindex;
diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h
index 63d8e9ca9d99..56681a320612 100644
--- a/include/net/tc_act/tc_nat.h
+++ b/include/net/tc_act/tc_nat.h
@@ -5,7 +5,7 @@
 #include <net/act_api.h>
 
 struct tcf_nat {
-	struct tcf_common common;
+	struct tc_action common;
 
 	__be32 old_addr;
 	__be32 new_addr;
@@ -13,9 +13,6 @@ struct tcf_nat {
 	u32 flags;
 };
 
-static inline struct tcf_nat *to_tcf_nat(struct tc_action *a)
-{
-	return container_of(a->priv, struct tcf_nat, common);
-}
+#define to_tcf_nat(a) ((struct tcf_nat *)a)
 
 #endif /* __NET_TC_NAT_H */
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 5b80998879c7..29e38d6823df 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -4,12 +4,11 @@
 #include <net/act_api.h>
 
 struct tcf_pedit {
-	struct tcf_common	common;
+	struct tc_action	common;
 	unsigned char		tcfp_nkeys;
 	unsigned char		tcfp_flags;
 	struct tc_pedit_key	*tcfp_keys;
 };
-#define to_pedit(a) \
-	container_of(a->priv, struct tcf_pedit, common)
+#define to_pedit(a) ((struct tcf_pedit *)a)
 
 #endif /* __NET_TC_PED_H */
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index b496d5ad7d42..5767e9dbcf92 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -23,15 +23,14 @@
 #include <linux/tc_act/tc_skbedit.h>
 
 struct tcf_skbedit {
-	struct tcf_common	common;
-	u32			flags;
-	u32     		priority;
-	u32     		mark;
-	u16			queue_mapping;
-	/* XXX: 16-bit pad here? */
+	struct tc_action	common;
+	u32		flags;
+	u32		priority;
+	u32		mark;
+	u16		queue_mapping;
+	u16		ptype;
 };
-#define to_skbedit(a) \
-	container_of(a->priv, struct tcf_skbedit, common)
+#define to_skbedit(a) ((struct tcf_skbedit *)a)
 
 /* Return true iff action is mark */
 static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 93b70ade1ff3..e29f52e8bdf1 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -16,12 +16,11 @@
 #define VLAN_F_PUSH		0x2
 
 struct tcf_vlan {
-	struct tcf_common	common;
+	struct tc_action	common;
 	int			tcfv_action;
 	u16			tcfv_push_vid;
 	__be16			tcfv_push_proto;
 };
-#define to_vlan(a) \
-	container_of(a->priv, struct tcf_vlan, common)
+#define to_vlan(a) ((struct tcf_vlan *)a)
 
 #endif /* __NET_TC_VLAN_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0bcc70f4e1fb..c00e7d51bb18 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -589,7 +589,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 	 * On the other hand, for extremely large MSS devices, handling
 	 * smaller than MSS windows in this way does make sense.
 	 */
-	if (tp->max_window >= 512)
+	if (tp->max_window > TCP_MSS_DEFAULT)
 		cutoff = (tp->max_window >> 1);
 	else
 		cutoff = tp->max_window;
@@ -767,6 +767,7 @@ struct tcp_skb_cb {
 	union {
 		struct {
 			/* There is space for up to 20 bytes */
+			__u32 in_flight;/* Bytes in flight when packet sent */
 		} tx;   /* only used for outgoing skbs */
 		union {
 			struct inet_skb_parm	h4;
@@ -859,6 +860,7 @@ union tcp_cc_info;
 struct ack_sample {
 	u32 pkts_acked;
 	s32 rtt_us;
+	u32 in_flight;
 };
 
 struct tcp_congestion_ops {
@@ -1382,7 +1384,7 @@ union tcp_md5sum_block {
 /* - pool: digest algorithm, hash description and scratch buffer */
 struct tcp_md5sig_pool {
 	struct ahash_request	*md5_req;
-	union tcp_md5sum_block	md5_blk;
+	void			*scratch;
 };
 
 /* - functions */
@@ -1418,7 +1420,6 @@ static inline void tcp_put_md5sig_pool(void)
 	local_bh_enable();
 }
 
-int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *);
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
 			  unsigned int header_len);
 int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
diff --git a/include/net/udp.h b/include/net/udp.h
index ae07f375370d..8894d7144189 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -160,8 +160,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
 
 static inline void udp_csum_pull_header(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_NONE)
-		skb->csum = csum_partial(udp_hdr(skb), sizeof(struct udphdr),
+	if (!skb->csum_valid && skb->ip_summed == CHECKSUM_NONE)
+		skb->csum = csum_partial(skb->data, sizeof(struct udphdr),
 					 skb->csum);
 	skb_pull_rcsum(skb, sizeof(struct udphdr));
 	UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr);
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 9d14f707e534..02c5be037451 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -84,6 +84,46 @@ struct udp_tunnel_sock_cfg {
 void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 			   struct udp_tunnel_sock_cfg *sock_cfg);
 
+/* -- List of parsable UDP tunnel types --
+ *
+ * Adding to this list will result in serious debate.  The main issue is
+ * that this list is essentially a list of workarounds for either poorly
+ * designed tunnels, or poorly designed device offloads.
+ *
+ * The parsing supported via these types should really be used for Rx
+ * traffic only as the network stack will have already inserted offsets for
+ * the location of the headers in the skb.  In addition any ports that are
+ * pushed should be kept within the namespace without leaking to other
+ * devices such as VFs or other ports on the same device.
+ *
+ * It is strongly encouraged to use CHECKSUM_COMPLETE for Rx to avoid the
+ * need to use this for Rx checksum offload.  It should not be necessary to
+ * call this function to perform Tx offloads on outgoing traffic.
+ */
+enum udp_parsable_tunnel_type {
+	UDP_TUNNEL_TYPE_VXLAN,		/* RFC 7348 */
+	UDP_TUNNEL_TYPE_GENEVE,		/* draft-ietf-nvo3-geneve */
+	UDP_TUNNEL_TYPE_VXLAN_GPE,	/* draft-ietf-nvo3-vxlan-gpe */
+};
+
+struct udp_tunnel_info {
+	unsigned short type;
+	sa_family_t sa_family;
+	__be16 port;
+};
+
+/* Notify network devices of offloadable types */
+void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type);
+void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type);
+void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
+
+static inline void udp_tunnel_get_rx_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
+}
+
 /* Transmit the skb using UDP encapsulation. */
 void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
@@ -105,12 +145,14 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
 				    __be16 flags, __be64 tunnel_id,
 				    int md_size);
 
+#ifdef CONFIG_INET
 static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
 {
 	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 
 	return iptunnel_handle_offloads(skb, type);
 }
+#endif
 
 static inline void udp_tunnel_encap_enable(struct socket *sock)
 {
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index b8803165df91..b96d0360c095 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -1,13 +1,10 @@
 #ifndef __NET_VXLAN_H
 #define __NET_VXLAN_H 1
 
-#include <linux/ip.h>
-#include <linux/ipv6.h>
 #include <linux/if_vlan.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/udp.h>
+#include <net/udp_tunnel.h>
 #include <net/dst_metadata.h>
+#include <net/udp_tunnel.h>
 
 /* VXLAN protocol (RFC 7348) header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -392,12 +389,6 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
 	return vni_field;
 }
 
-static inline void vxlan_get_rx_port(struct net_device *netdev)
-{
-	ASSERT_RTNL();
-	call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_VXLAN, netdev);
-}
-
 static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
 {
 	return vs->sock->sk->sk_family;
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 16274e2133cd..9c9a27d42aaa 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -203,7 +203,9 @@ struct rvt_driver_provided {
 
 	/*
 	 * Allocate a private queue pair data structure for driver specific
-	 * information which is opaque to rdmavt.
+	 * information which is opaque to rdmavt.  Errors are returned via
+	 * ERR_PTR(err).  The driver is free to return NULL or a valid
+	 * pointer.
 	 */
 	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 				gfp_t gfp);
diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h
index bedbff891423..c76ef30b05ba 100644
--- a/include/soc/fsl/qe/immap_qe.h
+++ b/include/soc/fsl/qe/immap_qe.h
@@ -159,10 +159,7 @@ struct spi {
 
 /* SI */
 struct si1 {
-	__be16	siamr1;		/* SI1 TDMA mode register */
-	__be16	sibmr1;		/* SI1 TDMB mode register */
-	__be16	sicmr1;		/* SI1 TDMC mode register */
-	__be16	sidmr1;		/* SI1 TDMD mode register */
+	__be16	sixmr1[4];	/* SI1 TDMx (x = A B C D) mode register */
 	u8	siglmr1_h;	/* SI1 global mode register high */
 	u8	res0[0x1];
 	u8	sicmdr1_h;	/* SI1 command register high */
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index 33b29ead3d55..70339d7958c0 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -80,6 +80,8 @@ enum qe_clock {
 	QE_CLK22,		/* Clock 22 */
 	QE_CLK23,		/* Clock 23 */
 	QE_CLK24,		/* Clock 24 */
+	QE_RSYNC_PIN,		/* RSYNC from pin */
+	QE_TSYNC_PIN,		/* TSYNC from pin */
 	QE_CLK_DUMMY
 };
 
@@ -242,6 +244,22 @@ static inline int qe_alive_during_sleep(void)
 #define qe_muram_addr cpm_muram_addr
 #define qe_muram_offset cpm_muram_offset
 
+#define qe_setbits32(_addr, _v) iowrite32be(ioread32be(_addr) |  (_v), (_addr))
+#define qe_clrbits32(_addr, _v) iowrite32be(ioread32be(_addr) & ~(_v), (_addr))
+
+#define qe_setbits16(_addr, _v) iowrite16be(ioread16be(_addr) |  (_v), (_addr))
+#define qe_clrbits16(_addr, _v) iowrite16be(ioread16be(_addr) & ~(_v), (_addr))
+
+#define qe_setbits8(_addr, _v) iowrite8(ioread8(_addr) |  (_v), (_addr))
+#define qe_clrbits8(_addr, _v) iowrite8(ioread8(_addr) & ~(_v), (_addr))
+
+#define qe_clrsetbits32(addr, clear, set) \
+	iowrite32be((ioread32be(addr) & ~(clear)) | (set), (addr))
+#define qe_clrsetbits16(addr, clear, set) \
+	iowrite16be((ioread16be(addr) & ~(clear)) | (set), (addr))
+#define qe_clrsetbits8(addr, clear, set) \
+	iowrite8((ioread8(addr) & ~(clear)) | (set), (addr))
+
 /* Structure that defines QE firmware binary files.
  *
  * See Documentation/powerpc/qe_firmware.txt for a description of these
@@ -639,6 +657,7 @@ struct ucc_slow_pram {
 #define UCC_SLOW_GUMR_L_MODE_QMC	0x00000002
 
 /* General UCC FAST Mode Register */
+#define UCC_FAST_GUMR_LOOPBACK	0x40000000
 #define UCC_FAST_GUMR_TCI	0x20000000
 #define UCC_FAST_GUMR_TRX	0x10000000
 #define UCC_FAST_GUMR_TTX	0x08000000
diff --git a/include/soc/fsl/qe/qe_tdm.h b/include/soc/fsl/qe/qe_tdm.h
new file mode 100644
index 000000000000..a1664b635f1a
--- /dev/null
+++ b/include/soc/fsl/qe/qe_tdm.h
@@ -0,0 +1,94 @@
+/*
+ * Internal header file for QE TDM mode routines.
+ *
+ * Copyright (C) 2016 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Authors:	Zhao Qiang <qiang.zhao@nxp.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version
+ */
+
+#ifndef _QE_TDM_H_
+#define _QE_TDM_H_
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+
+#include <soc/fsl/qe/immap_qe.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <soc/fsl/qe/ucc.h>
+#include <soc/fsl/qe/ucc_fast.h>
+
+/* SI RAM entries */
+#define SIR_LAST	0x0001
+#define SIR_BYTE	0x0002
+#define SIR_CNT(x)	((x) << 2)
+#define SIR_CSEL(x)	((x) << 5)
+#define SIR_SGS		0x0200
+#define SIR_SWTR	0x4000
+#define SIR_MCC		0x8000
+#define SIR_IDLE	0
+
+/* SIxMR fields */
+#define SIMR_SAD(x) ((x) << 12)
+#define SIMR_SDM_NORMAL	0x0000
+#define SIMR_SDM_INTERNAL_LOOPBACK	0x0800
+#define SIMR_SDM_MASK	0x0c00
+#define SIMR_CRT	0x0040
+#define SIMR_SL		0x0020
+#define SIMR_CE		0x0010
+#define SIMR_FE		0x0008
+#define SIMR_GM		0x0004
+#define SIMR_TFSD(n)	(n)
+#define SIMR_RFSD(n)	((n) << 8)
+
+enum tdm_ts_t {
+	TDM_TX_TS,
+	TDM_RX_TS
+};
+
+enum tdm_framer_t {
+	TDM_FRAMER_T1,
+	TDM_FRAMER_E1
+};
+
+enum tdm_mode_t {
+	TDM_INTERNAL_LOOPBACK,
+	TDM_NORMAL
+};
+
+struct si_mode_info {
+	u8 simr_rfsd;
+	u8 simr_tfsd;
+	u8 simr_crt;
+	u8 simr_sl;
+	u8 simr_ce;
+	u8 simr_fe;
+	u8 simr_gm;
+};
+
+struct ucc_tdm_info {
+	struct ucc_fast_info uf_info;
+	struct si_mode_info si_info;
+};
+
+struct ucc_tdm {
+	u16 tdm_port;		/* port for this tdm:TDMA,TDMB */
+	u32 siram_entry_id;
+	u16 __iomem *siram;
+	struct si1 __iomem *si_regs;
+	enum tdm_framer_t tdm_framer_type;
+	enum tdm_mode_t tdm_mode;
+	u8 num_of_ts;		/* the number of timeslots in this tdm frame */
+	u32 tx_ts_mask;		/* tx time slot mask */
+	u32 rx_ts_mask;		/* rx time slot mask */
+};
+
+int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
+		     struct ucc_tdm_info *ut_info);
+void ucc_tdm_init(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info);
+#endif
diff --git a/include/soc/fsl/qe/ucc.h b/include/soc/fsl/qe/ucc.h
index 894f14cbb044..6bbbb597f2af 100644
--- a/include/soc/fsl/qe/ucc.h
+++ b/include/soc/fsl/qe/ucc.h
@@ -41,6 +41,10 @@ int ucc_set_qe_mux_mii_mng(unsigned int ucc_num);
 
 int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock,
 	enum comm_dir mode);
+int ucc_set_tdm_rxtx_clk(unsigned int tdm_num, enum qe_clock clock,
+			 enum comm_dir mode);
+int ucc_set_tdm_rxtx_sync(unsigned int tdm_num, enum qe_clock clock,
+			  enum comm_dir mode);
 
 int ucc_mux_set_grant_tsa_bkpt(unsigned int ucc_num, int set, u32 mask);
 
diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h
index df8ea7958c63..3ee9e7c1a7d7 100644
--- a/include/soc/fsl/qe/ucc_fast.h
+++ b/include/soc/fsl/qe/ucc_fast.h
@@ -21,19 +21,37 @@
 
 #include <soc/fsl/qe/ucc.h>
 
-/* Receive BD's status */
+/* Receive BD's status and length*/
 #define R_E	0x80000000	/* buffer empty */
 #define R_W	0x20000000	/* wrap bit */
 #define R_I	0x10000000	/* interrupt on reception */
 #define R_L	0x08000000	/* last */
 #define R_F	0x04000000	/* first */
 
-/* transmit BD's status */
+/* transmit BD's status and length*/
 #define T_R	0x80000000	/* ready bit */
 #define T_W	0x20000000	/* wrap bit */
 #define T_I	0x10000000	/* interrupt on completion */
 #define T_L	0x08000000	/* last */
 
+/* Receive BD's status */
+#define R_E_S	0x8000	/* buffer empty */
+#define R_W_S	0x2000	/* wrap bit */
+#define R_I_S	0x1000	/* interrupt on reception */
+#define R_L_S	0x0800	/* last */
+#define R_F_S	0x0400	/* first */
+#define R_CM_S	0x0200	/* continuous mode */
+#define R_CR_S	0x0004	/* crc */
+#define R_OV_S	0x0002	/* crc */
+
+/* transmit BD's status */
+#define T_R_S	0x8000	/* ready bit */
+#define T_W_S	0x2000	/* wrap bit */
+#define T_I_S	0x1000	/* interrupt on completion */
+#define T_L_S	0x0800	/* last */
+#define T_TC_S	0x0400	/* crc */
+#define T_TM_S	0x0200	/* continuous mode */
+
 /* Rx Data buffer must be 4 bytes aligned in most cases */
 #define UCC_FAST_RX_ALIGN			4
 #define UCC_FAST_MRBLR_ALIGNMENT		4
@@ -118,9 +136,12 @@ enum ucc_fast_transparent_tcrc {
 /* Fast UCC initialization structure */
 struct ucc_fast_info {
 	int ucc_num;
+	int tdm_num;
 	enum qe_clock rx_clock;
 	enum qe_clock tx_clock;
-	u32 regs;
+	enum qe_clock rx_sync;
+	enum qe_clock tx_sync;
+	resource_size_t regs;
 	int irq;
 	u32 uccm_mask;
 	int bd_mem_part;
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 981acf74b14f..65673d8b81ac 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -27,7 +27,8 @@ DECLARE_EVENT_CLASS(bcache_request,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->orig_sector	= bio->bi_iter.bi_sector - 16;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
@@ -101,7 +102,8 @@ DECLARE_EVENT_CLASS(bcache_bio,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d  %s %llu + %u",
@@ -136,7 +138,8 @@ TRACE_EVENT(bcache_read,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 		__entry->cache_hit = hit;
 		__entry->bypass = bypass;
 	),
@@ -167,7 +170,8 @@ TRACE_EVENT(bcache_write,
 		__entry->inode		= inode;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 		__entry->writeback = writeback;
 		__entry->bypass = bypass;
 	),
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index e8a5eca1dbe5..5a2a7592068f 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -84,7 +84,8 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
 					0 : blk_rq_sectors(rq);
 		__entry->errors    = rq->errors;
 
-		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
+			      blk_rq_bytes(rq));
 		blk_dump_cmd(__get_str(cmd), rq);
 	),
 
@@ -162,7 +163,7 @@ TRACE_EVENT(block_rq_complete,
 		__entry->nr_sector = nr_bytes >> 9;
 		__entry->errors    = rq->errors;
 
-		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
+		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags, nr_bytes);
 		blk_dump_cmd(__get_str(cmd), rq);
 	),
 
@@ -198,7 +199,8 @@ DECLARE_EVENT_CLASS(block_rq,
 		__entry->bytes     = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
 					blk_rq_bytes(rq) : 0;
 
-		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
+			      blk_rq_bytes(rq));
 		blk_dump_cmd(__get_str(cmd), rq);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
@@ -272,7 +274,8 @@ TRACE_EVENT(block_bio_bounce,
 					  bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -310,7 +313,8 @@ TRACE_EVENT(block_bio_complete,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->error		= error;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u [%d]",
@@ -337,7 +341,8 @@ DECLARE_EVENT_CLASS(block_bio_merge,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -404,7 +409,8 @@ TRACE_EVENT(block_bio_queue,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -432,7 +438,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
 		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio ? bio->bi_iter.bi_sector : 0;
 		__entry->nr_sector	= bio ? bio_sectors(bio) : 0;
-		blk_fill_rwbs(__entry->rwbs,
+		blk_fill_rwbs(__entry->rwbs, bio ? bio_op(bio) : 0,
 			      bio ? bio->bi_rw : 0, __entry->nr_sector);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
         ),
@@ -567,7 +573,8 @@ TRACE_EVENT(block_split,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->new_sector	= new_sector;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -610,7 +617,8 @@ TRACE_EVENT(block_bio_remap,
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_rw,
+			      bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
@@ -656,7 +664,8 @@ TRACE_EVENT(block_rq_remap,
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
 		__entry->nr_bios	= blk_rq_count_bios(rq);
-		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
+			      blk_rq_bytes(rq));
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u",
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
new file mode 100644
index 000000000000..09f1df228f2c
--- /dev/null
+++ b/include/trace/events/devlink.h
@@ -0,0 +1,68 @@
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM devlink
+
+#if !defined(_TRACE_DEVLINK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DEVLINK_H
+
+#include <linux/device.h>
+#include <net/devlink.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoint for devlink hardware message:
+ */
+TRACE_EVENT(devlink_hwmsg,
+	TP_PROTO(const struct devlink *devlink, bool incoming,
+		 unsigned long type, const u8 *buf, size_t len),
+
+	TP_ARGS(devlink, incoming, type, buf, len),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__field(bool, incoming)
+		__field(unsigned long, type)
+		__dynamic_array(u8, buf, len)
+		__field(size_t, len)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__entry->incoming = incoming;
+		__entry->type = type;
+		memcpy(__get_dynamic_array(buf), buf, len);
+		__entry->len = len;
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%zu",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(driver_name), __entry->incoming, __entry->type,
+		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
+);
+
+#endif /* _TRACE_DEVLINK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+#else /* CONFIG_NET_DEVLINK */
+
+#if !defined(_TRACE_DEVLINK_H)
+#define _TRACE_DEVLINK_H
+
+#include <net/devlink.h>
+
+static inline void trace_devlink_hwmsg(const struct devlink *devlink,
+				       bool incoming, unsigned long type,
+				       const u8 *buf, size_t len)
+{
+}
+
+#endif /* _TRACE_DEVLINK_H */
+
+#endif
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 3a09bb4dc3b2..ff95fd02116f 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -31,10 +31,9 @@ TRACE_DEFINE_ENUM(BG_GC);
 TRACE_DEFINE_ENUM(LFS);
 TRACE_DEFINE_ENUM(SSR);
 TRACE_DEFINE_ENUM(__REQ_RAHEAD);
-TRACE_DEFINE_ENUM(__REQ_WRITE);
 TRACE_DEFINE_ENUM(__REQ_SYNC);
 TRACE_DEFINE_ENUM(__REQ_NOIDLE);
-TRACE_DEFINE_ENUM(__REQ_FLUSH);
+TRACE_DEFINE_ENUM(__REQ_PREFLUSH);
 TRACE_DEFINE_ENUM(__REQ_FUA);
 TRACE_DEFINE_ENUM(__REQ_PRIO);
 TRACE_DEFINE_ENUM(__REQ_META);
@@ -56,17 +55,21 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 		{ IPU,		"IN-PLACE" },				\
 		{ OPU,		"OUT-OF-PLACE" })
 
-#define F2FS_BIO_MASK(t)	(t & (READA | WRITE_FLUSH_FUA))
+#define F2FS_BIO_FLAG_MASK(t)	(t & (REQ_RAHEAD | WRITE_FLUSH_FUA))
 #define F2FS_BIO_EXTRA_MASK(t)	(t & (REQ_META | REQ_PRIO))
 
-#define show_bio_type(type)	show_bio_base(type), show_bio_extra(type)
+#define show_bio_type(op, op_flags) show_bio_op(op),			\
+			show_bio_op_flags(op_flags), show_bio_extra(op_flags)
 
-#define show_bio_base(type)						\
-	__print_symbolic(F2FS_BIO_MASK(type),				\
+#define show_bio_op(op)							\
+	__print_symbolic(op,						\
 		{ READ, 		"READ" },			\
-		{ READA, 		"READAHEAD" },			\
+		{ WRITE,		"WRITE" })
+
+#define show_bio_op_flags(flags)					\
+	__print_symbolic(F2FS_BIO_FLAG_MASK(flags),			\
+		{ REQ_RAHEAD, 		"READAHEAD" },			\
 		{ READ_SYNC, 		"READ_SYNC" },			\
-		{ WRITE, 		"WRITE" },			\
 		{ WRITE_SYNC, 		"WRITE_SYNC" },			\
 		{ WRITE_FLUSH,		"WRITE_FLUSH" },		\
 		{ WRITE_FUA, 		"WRITE_FUA" },			\
@@ -734,7 +737,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
 		__field(pgoff_t, index)
 		__field(block_t, old_blkaddr)
 		__field(block_t, new_blkaddr)
-		__field(int, rw)
+		__field(int, op)
+		__field(int, op_flags)
 		__field(int, type)
 	),
 
@@ -744,17 +748,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
 		__entry->index		= page->index;
 		__entry->old_blkaddr	= fio->old_blkaddr;
 		__entry->new_blkaddr	= fio->new_blkaddr;
-		__entry->rw		= fio->rw;
+		__entry->op		= fio->op;
+		__entry->op_flags	= fio->op_flags;
 		__entry->type		= fio->type;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
-		"oldaddr = 0x%llx, newaddr = 0x%llx rw = %s%s, type = %s",
+		"oldaddr = 0x%llx, newaddr = 0x%llx rw = %s%si%s, type = %s",
 		show_dev_ino(__entry),
 		(unsigned long)__entry->index,
 		(unsigned long long)__entry->old_blkaddr,
 		(unsigned long long)__entry->new_blkaddr,
-		show_bio_type(__entry->rw),
+		show_bio_type(__entry->op, __entry->op_flags),
 		show_block_type(__entry->type))
 );
 
@@ -785,7 +790,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
-		__field(int,	rw)
+		__field(int,	op)
+		__field(int,	op_flags)
 		__field(int,	type)
 		__field(sector_t,	sector)
 		__field(unsigned int,	size)
@@ -793,15 +799,16 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
-		__entry->rw		= fio->rw;
+		__entry->op		= fio->op;
+		__entry->op_flags	= fio->op_flags;
 		__entry->type		= fio->type;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->size		= bio->bi_iter.bi_size;
 	),
 
-	TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u",
+	TP_printk("dev = (%d,%d), %s%s%s, %s, sector = %lld, size = %u",
 		show_dev(__entry),
-		show_bio_type(__entry->rw),
+		show_bio_type(__entry->op, __entry->op_flags),
 		show_block_type(__entry->type),
 		(unsigned long long)__entry->sector,
 		__entry->size)
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 551ba4acde4d..04f58acda8e8 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -13,7 +13,7 @@
 	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\
 	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\
 	EM( SCAN_PAGE_RO,		"no_writable_page")		\
-	EM( SCAN_NO_REFERENCED_PAGE,	"no_referenced_page")		\
+	EM( SCAN_LACK_REFERENCED_PAGE,	"lack_referenced_page")		\
 	EM( SCAN_PAGE_NULL,		"page_null")			\
 	EM( SCAN_SCAN_ABORT,		"scan_aborted")			\
 	EM( SCAN_PAGE_COUNT,		"not_suitable_page_count")	\
@@ -28,7 +28,9 @@
 	EM( SCAN_SWAP_CACHE_PAGE,	"page_swap_cache")		\
 	EM( SCAN_DEL_PAGE_LRU,		"could_not_delete_page_from_lru")\
 	EM( SCAN_ALLOC_HUGE_PAGE_FAIL,	"alloc_huge_page_failed")	\
-	EMe( SCAN_CGROUP_CHARGE_FAIL,	"ccgroup_charge_failed")
+	EM( SCAN_CGROUP_CHARGE_FAIL,	"ccgroup_charge_failed")	\
+	EM( SCAN_EXCEED_SWAP_PTE,	"exceed_swap_pte")		\
+	EMe(SCAN_TRUNCATED,		"truncated")			\
 
 #undef EM
 #undef EMe
@@ -45,17 +47,18 @@ SCAN_STATUS
 TRACE_EVENT(mm_khugepaged_scan_pmd,
 
 	TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
-		 bool referenced, int none_or_zero, int status),
+		 int referenced, int none_or_zero, int status, int unmapped),
 
-	TP_ARGS(mm, page, writable, referenced, none_or_zero, status),
+	TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped),
 
 	TP_STRUCT__entry(
 		__field(struct mm_struct *, mm)
 		__field(unsigned long, pfn)
 		__field(bool, writable)
-		__field(bool, referenced)
+		__field(int, referenced)
 		__field(int, none_or_zero)
 		__field(int, status)
+		__field(int, unmapped)
 	),
 
 	TP_fast_assign(
@@ -65,15 +68,17 @@ TRACE_EVENT(mm_khugepaged_scan_pmd,
 		__entry->referenced = referenced;
 		__entry->none_or_zero = none_or_zero;
 		__entry->status = status;
+		__entry->unmapped = unmapped;
 	),
 
-	TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s",
+	TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s, unmapped=%d",
 		__entry->mm,
 		__entry->pfn,
 		__entry->writable,
 		__entry->referenced,
 		__entry->none_or_zero,
-		__print_symbolic(__entry->status, SCAN_STATUS))
+		__print_symbolic(__entry->status, SCAN_STATUS),
+		__entry->unmapped)
 );
 
 TRACE_EVENT(mm_collapse_huge_page,
@@ -103,14 +108,14 @@ TRACE_EVENT(mm_collapse_huge_page,
 TRACE_EVENT(mm_collapse_huge_page_isolate,
 
 	TP_PROTO(struct page *page, int none_or_zero,
-		 bool referenced, bool  writable, int status),
+		 int referenced, bool  writable, int status),
 
 	TP_ARGS(page, none_or_zero, referenced, writable, status),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, pfn)
 		__field(int, none_or_zero)
-		__field(bool, referenced)
+		__field(int, referenced)
 		__field(bool, writable)
 		__field(int, status)
 	),
@@ -131,5 +136,32 @@ TRACE_EVENT(mm_collapse_huge_page_isolate,
 		__print_symbolic(__entry->status, SCAN_STATUS))
 );
 
+TRACE_EVENT(mm_collapse_huge_page_swapin,
+
+	TP_PROTO(struct mm_struct *mm, int swapped_in, int referenced, int ret),
+
+	TP_ARGS(mm, swapped_in, referenced, ret),
+
+	TP_STRUCT__entry(
+		__field(struct mm_struct *, mm)
+		__field(int, swapped_in)
+		__field(int, referenced)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->mm = mm;
+		__entry->swapped_in = swapped_in;
+		__entry->referenced = referenced;
+		__entry->ret = ret;
+	),
+
+	TP_printk("mm=%p, swapped_in=%d, referenced=%d, ret=%d",
+		__entry->mm,
+		__entry->swapped_in,
+		__entry->referenced,
+		__entry->ret)
+);
+
 #endif /* __HUGE_MEMORY_H */
 #include <trace/define_trace.h>
diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
index 75fff8696bae..2fbbf990e4b3 100644
--- a/include/trace/events/libata.h
+++ b/include/trace/events/libata.h
@@ -126,6 +126,7 @@
 		ata_protocol_name(ATA_PROT_PIO),	\
 		ata_protocol_name(ATA_PROT_DMA),	\
 		ata_protocol_name(ATA_PROT_NCQ),	\
+		ata_protocol_name(ATA_PROT_NCQ_NODATA),	\
 		ata_protocol_name(ATAPI_PROT_NODATA),	\
 		ata_protocol_name(ATAPI_PROT_PIO),	\
 		ata_protocol_name(ATAPI_PROT_DMA))
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 8fe1e93f531d..0b9e5136a2a3 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -12,22 +12,27 @@
 
 TRACE_EVENT(napi_poll,
 
-	TP_PROTO(struct napi_struct *napi),
+	TP_PROTO(struct napi_struct *napi, int work, int budget),
 
-	TP_ARGS(napi),
+	TP_ARGS(napi, work, budget),
 
 	TP_STRUCT__entry(
 		__field(	struct napi_struct *,	napi)
 		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
+		__field(	int,			work)
+		__field(	int,			budget)
 	),
 
 	TP_fast_assign(
 		__entry->napi = napi;
 		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
+		__entry->work = work;
+		__entry->budget = budget;
 	),
 
-	TP_printk("napi poll on napi struct %p for device %s",
-		__entry->napi, __get_str(dev_name))
+	TP_printk("napi poll on napi struct %p for device %s work %d budget %d",
+		  __entry->napi, __get_str(dev_name),
+		  __entry->work, __entry->budget)
 );
 
 #undef NO_DEV
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 73614ce1d204..531f5811ff6b 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -696,7 +696,7 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
 	TP_ARGS(inode, wbc, nr_to_write)
 );
 
-DECLARE_EVENT_CLASS(writeback_lazytime_template,
+DECLARE_EVENT_CLASS(writeback_inode_template,
 	TP_PROTO(struct inode *inode),
 
 	TP_ARGS(inode),
@@ -723,25 +723,39 @@ DECLARE_EVENT_CLASS(writeback_lazytime_template,
 		  show_inode_state(__entry->state), __entry->mode)
 );
 
-DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime,
+DEFINE_EVENT(writeback_inode_template, writeback_lazytime,
 	TP_PROTO(struct inode *inode),
 
 	TP_ARGS(inode)
 );
 
-DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput,
+DEFINE_EVENT(writeback_inode_template, writeback_lazytime_iput,
 	TP_PROTO(struct inode *inode),
 
 	TP_ARGS(inode)
 );
 
-DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue,
+DEFINE_EVENT(writeback_inode_template, writeback_dirty_inode_enqueue,
 
 	TP_PROTO(struct inode *inode),
 
 	TP_ARGS(inode)
 );
 
+/*
+ * Inode writeback list tracking.
+ */
+
+DEFINE_EVENT(writeback_inode_template, sb_mark_inode_writeback,
+	TP_PROTO(struct inode *inode),
+	TP_ARGS(inode)
+);
+
+DEFINE_EVENT(writeback_inode_template, sb_clear_inode_writeback,
+	TP_PROTO(struct inode *inode),
+	TP_ARGS(inode)
+);
+
 #endif /* _TRACE_WRITEBACK_H */
 
 /* This part must be outside protection */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 8bdae34d1f9a..ec10cfef166a 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -245,6 +245,7 @@ endif
 header-y += hw_breakpoint.h
 header-y += l2tp.h
 header-y += libc-compat.h
+header-y += lirc.h
 header-y += limits.h
 header-y += llc.h
 header-y += loop.h
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
new file mode 100644
index 000000000000..0fbf6fd4711b
--- /dev/null
+++ b/include/uapi/linux/batman_adv.h
@@ -0,0 +1,114 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _UAPI_LINUX_BATMAN_ADV_H_
+#define _UAPI_LINUX_BATMAN_ADV_H_
+
+#define BATADV_NL_NAME "batadv"
+
+#define BATADV_NL_MCAST_GROUP_TPMETER	"tpmeter"
+
+/**
+ * enum batadv_nl_attrs - batman-adv netlink attributes
+ *
+ * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors
+ * @BATADV_ATTR_VERSION: batman-adv version string
+ * @BATADV_ATTR_ALGO_NAME: name of routing algorithm
+ * @BATADV_ATTR_MESH_IFINDEX: index of the batman-adv interface
+ * @BATADV_ATTR_MESH_IFNAME: name of the batman-adv interface
+ * @BATADV_ATTR_MESH_ADDRESS: mac address of the batman-adv interface
+ * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface
+ * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface
+ * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv interface
+ * @BATADV_ATTR_ORIG_ADDRESS: originator mac address
+ * @BATADV_ATTR_TPMETER_RESULT: result of run (see batadv_tp_meter_status)
+ * @BATADV_ATTR_TPMETER_TEST_TIME: time (msec) the run took
+ * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run
+ * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session
+ * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment
+ * @__BATADV_ATTR_AFTER_LAST: internal use
+ * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
+ * @BATADV_ATTR_MAX: highest attribute number currently defined
+ */
+enum batadv_nl_attrs {
+	BATADV_ATTR_UNSPEC,
+	BATADV_ATTR_VERSION,
+	BATADV_ATTR_ALGO_NAME,
+	BATADV_ATTR_MESH_IFINDEX,
+	BATADV_ATTR_MESH_IFNAME,
+	BATADV_ATTR_MESH_ADDRESS,
+	BATADV_ATTR_HARD_IFINDEX,
+	BATADV_ATTR_HARD_IFNAME,
+	BATADV_ATTR_HARD_ADDRESS,
+	BATADV_ATTR_ORIG_ADDRESS,
+	BATADV_ATTR_TPMETER_RESULT,
+	BATADV_ATTR_TPMETER_TEST_TIME,
+	BATADV_ATTR_TPMETER_BYTES,
+	BATADV_ATTR_TPMETER_COOKIE,
+	BATADV_ATTR_PAD,
+	/* add attributes above here, update the policy in netlink.c */
+	__BATADV_ATTR_AFTER_LAST,
+	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
+	BATADV_ATTR_MAX = __BATADV_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum batadv_nl_commands - supported batman-adv netlink commands
+ *
+ * @BATADV_CMD_UNSPEC: unspecified command to catch errors
+ * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device
+ * @BATADV_CMD_TP_METER: Start a tp meter session
+ * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
+ * @__BATADV_CMD_AFTER_LAST: internal use
+ * @BATADV_CMD_MAX: highest used command number
+ */
+enum batadv_nl_commands {
+	BATADV_CMD_UNSPEC,
+	BATADV_CMD_GET_MESH_INFO,
+	BATADV_CMD_TP_METER,
+	BATADV_CMD_TP_METER_CANCEL,
+	/* add new commands above here */
+	__BATADV_CMD_AFTER_LAST,
+	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
+};
+
+/**
+ * enum batadv_tp_meter_reason - reason of a tp meter test run stop
+ * @BATADV_TP_REASON_COMPLETE: sender finished tp run
+ * @BATADV_TP_REASON_CANCEL: sender was stopped during run
+ * @BATADV_TP_REASON_DST_UNREACHABLE: receiver could not be reached or didn't
+ *  answer
+ * @BATADV_TP_REASON_RESEND_LIMIT: (unused) sender retry reached limit
+ * @BATADV_TP_REASON_ALREADY_ONGOING: test to or from the same node already
+ *  ongoing
+ * @BATADV_TP_REASON_MEMORY_ERROR: test was stopped due to low memory
+ * @BATADV_TP_REASON_CANT_SEND: failed to send via outgoing interface
+ * @BATADV_TP_REASON_TOO_MANY: too many ongoing sessions
+ */
+enum batadv_tp_meter_reason {
+	BATADV_TP_REASON_COMPLETE		= 3,
+	BATADV_TP_REASON_CANCEL			= 4,
+	/* error status >= 128 */
+	BATADV_TP_REASON_DST_UNREACHABLE	= 128,
+	BATADV_TP_REASON_RESEND_LIMIT		= 129,
+	BATADV_TP_REASON_ALREADY_ONGOING	= 130,
+	BATADV_TP_REASON_MEMORY_ERROR		= 131,
+	BATADV_TP_REASON_CANT_SEND		= 132,
+	BATADV_TP_REASON_TOO_MANY		= 133,
+};
+
+#endif /* _UAPI_LINUX_BATMAN_ADV_H_ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 406459b935a2..da218fec6056 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -84,6 +84,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_PERCPU_HASH,
 	BPF_MAP_TYPE_PERCPU_ARRAY,
 	BPF_MAP_TYPE_STACK_TRACE,
+	BPF_MAP_TYPE_CGROUP_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -93,6 +94,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SCHED_CLS,
 	BPF_PROG_TYPE_SCHED_ACT,
 	BPF_PROG_TYPE_TRACEPOINT,
+	BPF_PROG_TYPE_XDP,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
@@ -313,6 +315,66 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_get_tunnel_opt,
 	BPF_FUNC_skb_set_tunnel_opt,
+
+	/**
+	 * bpf_skb_change_proto(skb, proto, flags)
+	 * Change protocol of the skb. Currently supported is
+	 * v4 -> v6, v6 -> v4 transitions. The helper will also
+	 * resize the skb. eBPF program is expected to fill the
+	 * new headers via skb_store_bytes and lX_csum_replace.
+	 * @skb: pointer to skb
+	 * @proto: new skb->protocol type
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_proto,
+
+	/**
+	 * bpf_skb_change_type(skb, type)
+	 * Change packet type of skb.
+	 * @skb: pointer to skb
+	 * @type: new skb->pkt_type type
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_type,
+
+	/**
+	 * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+	 * @skb: pointer to skb
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 skb failed the cgroup2 descendant test
+	 *   == 1 skb succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_skb_in_cgroup,
+
+	/**
+	 * bpf_get_hash_recalc(skb)
+	 * Retrieve and possibly recalculate skb->hash.
+	 * @skb: pointer to skb
+	 * Return: hash
+	 */
+	BPF_FUNC_get_hash_recalc,
+
+	/**
+	 * u64 bpf_get_current_task(void)
+	 * Returns current task_struct
+	 * Return: current
+	 */
+	BPF_FUNC_get_current_task,
+
+	/**
+	 * bpf_probe_write_user(void *dst, void *src, int len)
+	 * safely attempt to write to a location
+	 * @dst: destination address in userspace
+	 * @src: source address on stack
+	 * @len: number of bytes to copy
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_probe_write_user,
+
 	__BPF_FUNC_MAX_ID,
 };
 
@@ -347,9 +409,11 @@ enum bpf_func_id {
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
 
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK		0xffffffffULL
 #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
@@ -386,4 +450,24 @@ struct bpf_tunnel_key {
 	__u32 tunnel_label;
 };
 
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+	XDP_ABORTED = 0,
+	XDP_DROP,
+	XDP_PASS,
+	XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+	__u32 data;
+	__u32 data_end;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h
index 7a291dc1ff15..cefb304414ba 100644
--- a/include/uapi/linux/can/bcm.h
+++ b/include/uapi/linux/can/bcm.h
@@ -99,5 +99,6 @@ enum {
 #define RX_ANNOUNCE_RESUME  0x0100
 #define TX_RESET_MULTI_IDX  0x0200
 #define RX_RTR_FRAME        0x0400
+#define CAN_FD_FRAME        0x0800
 
 #endif /* !_UAPI_CAN_BCM_H */
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 2e67bb64c1da..79b5ded2001a 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -45,6 +45,7 @@ enum crypto_attr_type_t {
 	CRYPTOCFGA_REPORT_RNG,		/* struct crypto_report_rng */
 	CRYPTOCFGA_REPORT_CIPHER,	/* struct crypto_report_cipher */
 	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
+	CRYPTOCFGA_REPORT_KPP,		/* struct crypto_report_kpp */
 	__CRYPTOCFGA_MAX
 
 #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
@@ -107,5 +108,9 @@ struct crypto_report_akcipher {
 	char type[CRYPTO_MAX_NAME];
 };
 
+struct crypto_report_kpp {
+	char type[CRYPTO_MAX_NAME];
+};
+
 #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
 			       sizeof(struct crypto_report_blkcipher))
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ba0073b26fa6..915bfa74458c 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -57,6 +57,8 @@ enum devlink_command {
 	DEVLINK_CMD_SB_OCC_SNAPSHOT,
 	DEVLINK_CMD_SB_OCC_MAX_CLEAR,
 
+	DEVLINK_CMD_ESWITCH_MODE_GET,
+	DEVLINK_CMD_ESWITCH_MODE_SET,
 	/* add new commands above here */
 
 	__DEVLINK_CMD_MAX,
@@ -95,6 +97,11 @@ enum devlink_sb_threshold_type {
 
 #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
 
+enum devlink_eswitch_mode {
+	DEVLINK_ESWITCH_MODE_LEGACY,
+	DEVLINK_ESWITCH_MODE_SWITCHDEV,
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -125,6 +132,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_SB_TC_INDEX,		/* u16 */
 	DEVLINK_ATTR_SB_OCC_CUR,		/* u32 */
 	DEVLINK_ATTR_SB_OCC_MAX,		/* u32 */
+	DEVLINK_ATTR_ESWITCH_MODE,		/* u16 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 30afd0a23c4b..4bf9f1eabffc 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	34
+#define DM_VERSION_MINOR	35
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2015-10-28)"
+#define DM_VERSION_EXTRA	"-ioctl (2016-06-23)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index c3fdfe79e5cc..cb5d1a519202 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -40,6 +40,7 @@
 #define EM_TILEPRO	188	/* Tilera TILEPro */
 #define EM_MICROBLAZE	189	/* Xilinx MicroBlaze */
 #define EM_TILEGX	191	/* Tilera TILE-Gx */
+#define EM_BPF		247	/* Linux BPF - in-kernel virtual machine */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
 #define EM_AVR32	0x18ad	/* Atmel AVR32 */
 
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 5f030b46cff4..b8f38e84d93a 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1362,6 +1362,7 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT	= 37,
 	ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT	= 38,
 	ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT	= 39,
+	ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT         = 40,
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
 	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1370,7 +1371,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+	  = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 620c8a5ddc00..14404b3ebb89 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -50,6 +50,7 @@ enum {
 	FRA_FWMASK,	/* mask for netfilter mark */
 	FRA_OIFNAME,
 	FRA_PAD,
+	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	__FRA_MAX
 };
 
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5974fae54e12..27e17363263a 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -105,6 +105,9 @@
  *
  *  7.24
  *  - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
+ *
+ *  7.25
+ *  - add FUSE_PARALLEL_DIROPS
  */
 
 #ifndef _LINUX_FUSE_H
@@ -140,7 +143,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 24
+#define FUSE_KERNEL_MINOR_VERSION 25
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -234,6 +237,7 @@ struct fuse_file_lock {
  * FUSE_ASYNC_DIO: asynchronous direct I/O submission
  * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
  * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -253,6 +257,7 @@ struct fuse_file_lock {
 #define FUSE_ASYNC_DIO		(1 << 15)
 #define FUSE_WRITEBACK_CACHE	(1 << 16)
 #define FUSE_NO_OPEN_SUPPORT	(1 << 17)
+#define FUSE_PARALLEL_DIROPS    (1 << 18)
 
 /**
  * CUSE INIT request/reply flags
diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index d0a3cac72250..333d3544c964 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -1,7 +1,7 @@
 /*
  * <linux/gpio.h> - userspace ABI for the GPIO character devices
  *
- * Copyright (C) 2015 Linus Walleij
+ * Copyright (C) 2016 Linus Walleij
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
@@ -26,8 +26,8 @@ struct gpiochip_info {
 	__u32 lines;
 };
 
-/* Line is in use by the kernel */
-#define GPIOLINE_FLAG_KERNEL		(1UL << 0)
+/* Informational flags */
+#define GPIOLINE_FLAG_KERNEL		(1UL << 0) /* Line used by the kernel */
 #define GPIOLINE_FLAG_IS_OUT		(1UL << 1)
 #define GPIOLINE_FLAG_ACTIVE_LOW	(1UL << 2)
 #define GPIOLINE_FLAG_OPEN_DRAIN	(1UL << 3)
@@ -52,7 +52,106 @@ struct gpioline_info {
 	char consumer[32];
 };
 
+/* Maximum number of requested handles */
+#define GPIOHANDLES_MAX 64
+
+/* Linerequest flags */
+#define GPIOHANDLE_REQUEST_INPUT	(1UL << 0)
+#define GPIOHANDLE_REQUEST_OUTPUT	(1UL << 1)
+#define GPIOHANDLE_REQUEST_ACTIVE_LOW	(1UL << 2)
+#define GPIOHANDLE_REQUEST_OPEN_DRAIN	(1UL << 3)
+#define GPIOHANDLE_REQUEST_OPEN_SOURCE	(1UL << 4)
+
+/**
+ * struct gpiohandle_request - Information about a GPIO handle request
+ * @lineoffsets: an array desired lines, specified by offset index for the
+ * associated GPIO device
+ * @flags: desired flags for the desired GPIO lines, such as
+ * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed
+ * together. Note that even if multiple lines are requested, the same flags
+ * must be applicable to all of them, if you want lines with individual
+ * flags set, request them one by one. It is possible to select
+ * a batch of input or output lines, but they must all have the same
+ * characteristics, i.e. all inputs or all outputs, all active low etc
+ * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set for a requested
+ * line, this specifies the default output value, should be 0 (low) or
+ * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high)
+ * @consumer_label: a desired consumer label for the selected GPIO line(s)
+ * such as "my-bitbanged-relay"
+ * @lines: number of lines requested in this request, i.e. the number of
+ * valid fields in the above arrays, set to 1 to request a single line
+ * @fd: if successful this field will contain a valid anonymous file handle
+ * after a GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value
+ * means error
+ */
+struct gpiohandle_request {
+	__u32 lineoffsets[GPIOHANDLES_MAX];
+	__u32 flags;
+	__u8 default_values[GPIOHANDLES_MAX];
+	char consumer_label[32];
+	__u32 lines;
+	int fd;
+};
+
+/**
+ * struct gpiohandle_data - Information of values on a GPIO handle
+ * @values: when getting the state of lines this contains the current
+ * state of a line, when setting the state of lines these should contain
+ * the desired target state
+ */
+struct gpiohandle_data {
+	__u8 values[GPIOHANDLES_MAX];
+};
+
+#define GPIOHANDLE_GET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x08, struct gpiohandle_data)
+#define GPIOHANDLE_SET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x09, struct gpiohandle_data)
+
+/* Eventrequest flags */
+#define GPIOEVENT_REQUEST_RISING_EDGE	(1UL << 0)
+#define GPIOEVENT_REQUEST_FALLING_EDGE	(1UL << 1)
+#define GPIOEVENT_REQUEST_BOTH_EDGES	((1UL << 0) | (1UL << 1))
+
+/**
+ * struct gpioevent_request - Information about a GPIO event request
+ * @lineoffset: the desired line to subscribe to events from, specified by
+ * offset index for the associated GPIO device
+ * @handleflags: desired handle flags for the desired GPIO line, such as
+ * GPIOHANDLE_REQUEST_ACTIVE_LOW or GPIOHANDLE_REQUEST_OPEN_DRAIN
+ * @eventflags: desired flags for the desired GPIO event line, such as
+ * GPIOEVENT_REQUEST_RISING_EDGE or GPIOEVENT_REQUEST_FALLING_EDGE
+ * @consumer_label: a desired consumer label for the selected GPIO line(s)
+ * such as "my-listener"
+ * @fd: if successful this field will contain a valid anonymous file handle
+ * after a GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value
+ * means error
+ */
+struct gpioevent_request {
+	__u32 lineoffset;
+	__u32 handleflags;
+	__u32 eventflags;
+	char consumer_label[32];
+	int fd;
+};
+
+/**
+ * GPIO event types
+ */
+#define GPIOEVENT_EVENT_RISING_EDGE 0x01
+#define GPIOEVENT_EVENT_FALLING_EDGE 0x02
+
+/**
+ * struct gpioevent_data - The actual event being pushed to userspace
+ * @timestamp: best estimate of time of event occurrence, in nanoseconds
+ * @id: event identifier
+ */
+struct gpioevent_data {
+	__u64 timestamp;
+	__u32 id;
+};
+
 #define GPIO_GET_CHIPINFO_IOCTL _IOR(0xB4, 0x01, struct gpiochip_info)
 #define GPIO_GET_LINEINFO_IOCTL _IOWR(0xB4, 0x02, struct gpioline_info)
+#define GPIO_GET_LINEHANDLE_IOCTL _IOWR(0xB4, 0x03, struct gpiohandle_request)
+#define GPIO_GET_LINEEVENT_IOCTL _IOWR(0xB4, 0x04, struct gpioevent_request)
 
 #endif /* _UAPI_GPIO_H_ */
diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h
index 16fff055f734..fddd9d736284 100644
--- a/include/uapi/linux/icmp.h
+++ b/include/uapi/linux/icmp.h
@@ -79,6 +79,7 @@ struct icmphdr {
 		__be16	__unused;
 		__be16	mtu;
 	} frag;
+	__u8	reserved[4];
   } un;
 };
 
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 397d503fdedb..c186f64fffca 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -247,8 +247,37 @@ enum {
 enum {
 	BRIDGE_XSTATS_UNSPEC,
 	BRIDGE_XSTATS_VLAN,
+	BRIDGE_XSTATS_MCAST,
+	BRIDGE_XSTATS_PAD,
 	__BRIDGE_XSTATS_MAX
 };
 #define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1)
 
+enum {
+	BR_MCAST_DIR_RX,
+	BR_MCAST_DIR_TX,
+	BR_MCAST_DIR_SIZE
+};
+
+/* IGMP/MLD statistics */
+struct br_mcast_stats {
+	__u64 igmp_v1queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v2queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v3queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_leaves[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v1reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v2reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v3reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_parse_errors;
+
+	__u64 mld_v1queries[BR_MCAST_DIR_SIZE];
+	__u64 mld_v2queries[BR_MCAST_DIR_SIZE];
+	__u64 mld_leaves[BR_MCAST_DIR_SIZE];
+	__u64 mld_v1reports[BR_MCAST_DIR_SIZE];
+	__u64 mld_v2reports[BR_MCAST_DIR_SIZE];
+	__u64 mld_parse_errors;
+
+	__u64 mcast_bytes[BR_MCAST_DIR_SIZE];
+	__u64 mcast_packets[BR_MCAST_DIR_SIZE];
+};
 #endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index cec849a239f6..117d02e0fc31 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -87,6 +87,7 @@
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
 #define ETH_P_MVRP	0x88F5          /* 802.1Q MVRP                  */
 #define ETH_P_1588	0x88F7		/* IEEE 1588 Timesync */
+#define ETH_P_NCSI	0x88F8		/* NCSI protocol		*/
 #define ETH_P_PRP	0x88FB		/* IEC 62439-3 PRP/HSRv0	*/
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
 #define ETH_P_TDLS	0x890D          /* TDLS */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bb36bd5675a7..a1b5202c5f6b 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -156,6 +156,7 @@ enum {
 	IFLA_GSO_MAX_SEGS,
 	IFLA_GSO_MAX_SIZE,
 	IFLA_PAD,
+	IFLA_XDP,
 	__IFLA_MAX
 };
 
@@ -273,6 +274,7 @@ enum {
 	IFLA_BR_VLAN_DEFAULT_PVID,
 	IFLA_BR_PAD,
 	IFLA_BR_VLAN_STATS_ENABLED,
+	IFLA_BR_MCAST_STATS_ENABLED,
 	__IFLA_BR_MAX,
 };
 
@@ -822,6 +824,7 @@ enum {
 	IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
 	IFLA_STATS_LINK_64,
 	IFLA_STATS_LINK_XSTATS,
+	IFLA_STATS_LINK_XSTATS_SLAVE,
 	__IFLA_STATS_MAX,
 };
 
@@ -841,4 +844,15 @@ enum {
 };
 #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
 
+/* XDP section */
+
+enum {
+	IFLA_XDP_UNSPEC,
+	IFLA_XDP_FD,
+	IFLA_XDP_ATTACHED,
+	__IFLA_XDP_MAX,
+};
+
+#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
index f7d4831a2cc7..02fc49cb72d8 100644
--- a/include/uapi/linux/if_macsec.h
+++ b/include/uapi/linux/if_macsec.h
@@ -26,6 +26,8 @@
 
 #define MACSEC_MIN_ICV_LEN 8
 #define MACSEC_MAX_ICV_LEN 32
+/* upper limit for ICV length as recommended by IEEE802.1AE-2006 */
+#define MACSEC_STD_ICV_LEN 16
 
 enum macsec_attrs {
 	MACSEC_ATTR_UNSPEC,
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index af4de90ba27d..1046f5515174 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -113,6 +113,7 @@ enum {
 	IFLA_GRE_ENCAP_SPORT,
 	IFLA_GRE_ENCAP_DPORT,
 	IFLA_GRE_COLLECT_METADATA,
+	IFLA_GRE_IGNORE_DF,
 	__IFLA_GRE_MAX,
 };
 
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index b0916fc72cce..22e5e589a274 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -39,6 +39,7 @@ enum iio_chan_type {
 	IIO_RESISTANCE,
 	IIO_PH,
 	IIO_UVINDEX,
+	IIO_ELECTRICALCONDUCTIVITY,
 };
 
 enum iio_modifier {
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index a16643705669..abbd1dc5d683 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -72,6 +72,7 @@ enum {
 	INET_DIAG_BC_AUTO,
 	INET_DIAG_BC_S_COND,
 	INET_DIAG_BC_D_COND,
+	INET_DIAG_BC_DEV_COND,   /* u32 ifindex */
 };
 
 struct inet_diag_hostcond {
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 87cf351bab03..d6d071fc3c56 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -611,6 +611,37 @@
 #define KEY_KBDINPUTASSIST_ACCEPT		0x264
 #define KEY_KBDINPUTASSIST_CANCEL		0x265
 
+/* Diagonal movement keys */
+#define KEY_RIGHT_UP			0x266
+#define KEY_RIGHT_DOWN			0x267
+#define KEY_LEFT_UP			0x268
+#define KEY_LEFT_DOWN			0x269
+
+#define KEY_ROOT_MENU			0x26a /* Show Device's Root Menu */
+/* Show Top Menu of the Media (e.g. DVD) */
+#define KEY_MEDIA_TOP_MENU		0x26b
+#define KEY_NUMERIC_11			0x26c
+#define KEY_NUMERIC_12			0x26d
+/*
+ * Toggle Audio Description: refers to an audio service that helps blind and
+ * visually impaired consumers understand the action in a program. Note: in
+ * some countries this is referred to as "Video Description".
+ */
+#define KEY_AUDIO_DESC			0x26e
+#define KEY_3D_MODE			0x26f
+#define KEY_NEXT_FAVORITE		0x270
+#define KEY_STOP_RECORD			0x271
+#define KEY_PAUSE_RECORD		0x272
+#define KEY_VOD				0x273 /* Video on Demand */
+#define KEY_UNMUTE			0x274
+#define KEY_FASTREVERSE			0x275
+#define KEY_SLOWREVERSE			0x276
+/*
+ * Control a data application associated with the currently viewed channel,
+ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.)
+ */
+#define KEY_DATA			0x275
+
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
 #define BTN_TRIGGER_HAPPY2		0x2c1
@@ -749,6 +780,7 @@
 #define SW_ROTATE_LOCK		0x0c  /* set = rotate locked/disabled */
 #define SW_LINEIN_INSERT	0x0d  /* set = inserted */
 #define SW_MUTE_DEVICE		0x0e  /* set = device disabled */
+#define SW_PEN_INSERTED		0x0f  /* set = pen inserted */
 #define SW_MAX			0x0f
 #define SW_CNT			(SW_MAX+1)
 
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 01113841190d..c51494119817 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -247,6 +247,7 @@ struct input_mask {
 #define BUS_ATARI		0x1B
 #define BUS_SPI			0x1C
 #define BUS_RMI			0x1D
+#define BUS_CEC			0x1E
 
 /*
  * MT_TOOL types
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 99048e501b88..aae5ebf2022b 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_AARCH64 (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 546b38886e11..e398beac67b8 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -80,5 +80,7 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 /* Since UDF 2.01 is ISO 13346 based... */
 #define UDF_SUPER_MAGIC		0x15013346
+#define BALLOON_KVM_MAGIC	0x13661366
+#define ZSMALLOC_MAGIC		0x58295829
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index df59edee25d1..7acf0f634f70 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -94,6 +94,16 @@ struct media_device_info {
 #define MEDIA_ENT_F_AUDIO_PLAYBACK	(MEDIA_ENT_F_BASE + 0x03002)
 #define MEDIA_ENT_F_AUDIO_MIXER		(MEDIA_ENT_F_BASE + 0x03003)
 
+/*
+ * Processing entities
+ */
+#define MEDIA_ENT_F_PROC_VIDEO_COMPOSER		(MEDIA_ENT_F_BASE + 0x4001)
+#define MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER	(MEDIA_ENT_F_BASE + 0x4002)
+#define MEDIA_ENT_F_PROC_VIDEO_PIXEL_ENC_CONV	(MEDIA_ENT_F_BASE + 0x4003)
+#define MEDIA_ENT_F_PROC_VIDEO_LUT		(MEDIA_ENT_F_BASE + 0x4004)
+#define MEDIA_ENT_F_PROC_VIDEO_SCALER		(MEDIA_ENT_F_BASE + 0x4005)
+#define MEDIA_ENT_F_PROC_VIDEO_STATISTICS	(MEDIA_ENT_F_BASE + 0x4006)
+
 /*
  * Connectors
  */
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 1d973d2ba417..cd26d7a0fd07 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -33,6 +33,7 @@ header-y += xt_NFLOG.h
 header-y += xt_NFQUEUE.h
 header-y += xt_RATEEST.h
 header-y += xt_SECMARK.h
+header-y += xt_SYNPROXY.h
 header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
 header-y += xt_TEE.h
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6a4dbe04f09e..01751faccaf8 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -546,6 +546,10 @@ enum nft_cmp_attributes {
 };
 #define NFTA_CMP_MAX		(__NFTA_CMP_MAX - 1)
 
+enum nft_lookup_flags {
+	NFT_LOOKUP_F_INV = (1 << 0),
+};
+
 /**
  * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes
  *
@@ -553,6 +557,7 @@ enum nft_cmp_attributes {
  * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
+ * @NFTA_LOOKUP_FLAGS: flags (NLA_U32: enum nft_lookup_flags)
  */
 enum nft_lookup_attributes {
 	NFTA_LOOKUP_UNSPEC,
@@ -560,6 +565,7 @@ enum nft_lookup_attributes {
 	NFTA_LOOKUP_SREG,
 	NFTA_LOOKUP_DREG,
 	NFTA_LOOKUP_SET_ID,
+	NFTA_LOOKUP_FLAGS,
 	__NFTA_LOOKUP_MAX
 };
 #define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_NFLOG.h b/include/uapi/linux/netfilter/xt_NFLOG.h
index 87b58311ce6b..f33070730fc8 100644
--- a/include/uapi/linux/netfilter/xt_NFLOG.h
+++ b/include/uapi/linux/netfilter/xt_NFLOG.h
@@ -6,9 +6,13 @@
 #define XT_NFLOG_DEFAULT_GROUP		0x1
 #define XT_NFLOG_DEFAULT_THRESHOLD	0
 
-#define XT_NFLOG_MASK			0x0
+#define XT_NFLOG_MASK			0x1
+
+/* This flag indicates that 'len' field in xt_nflog_info is set*/
+#define XT_NFLOG_F_COPY_LEN		0x1
 
 struct xt_nflog_info {
+	/* 'len' will be used iff you set XT_NFLOG_F_COPY_LEN in flags */
 	__u32	len;
 	__u16	group;
 	__u16	threshold;
diff --git a/include/uapi/linux/netfilter/xt_SYNPROXY.h b/include/uapi/linux/netfilter/xt_SYNPROXY.h
index 2d59fbaa93c6..ca67e61d2a61 100644
--- a/include/uapi/linux/netfilter/xt_SYNPROXY.h
+++ b/include/uapi/linux/netfilter/xt_SYNPROXY.h
@@ -1,6 +1,8 @@
 #ifndef _XT_SYNPROXY_H
 #define _XT_SYNPROXY_H
 
+#include <linux/types.h>
+
 #define XT_SYNPROXY_OPT_MSS		0x01
 #define XT_SYNPROXY_OPT_WSCALE		0x02
 #define XT_SYNPROXY_OPT_SACK_PERM	0x04
diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
index d79399394b46..76b4d87c83a8 100644
--- a/include/uapi/linux/netlink_diag.h
+++ b/include/uapi/linux/netlink_diag.h
@@ -49,6 +49,7 @@ enum {
 #define NDIAG_SHOW_MEMINFO	0x00000001 /* show memory info of a socket */
 #define NDIAG_SHOW_GROUPS	0x00000002 /* show groups of a netlink socket */
 #ifndef __KERNEL__
+/* deprecated since 4.6 */
 #define NDIAG_SHOW_RING_CFG	0x00000004 /* show ring configuration */
 #endif
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e23d78685a01..220694151434 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -493,7 +493,12 @@
  *	This attribute is ignored if driver does not support roam scan.
  *	It is also sent as an event, with the BSSID and response IEs when the
  *	connection is established or failed to be established. This can be
- *	determined by the STATUS_CODE attribute.
+ *	determined by the %NL80211_ATTR_STATUS_CODE attribute (0 = success,
+ *	non-zero = failure). If %NL80211_ATTR_TIMED_OUT is included in the
+ *	event, the connection attempt failed due to not being able to initiate
+ *	authentication/association or not receiving a response from the AP.
+ *	Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as
+ *	well to remain backwards compatible.
  * @NL80211_CMD_ROAM: request that the card roam (currently not implemented),
  *	sent as an event when the card/driver roamed by itself.
  * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
@@ -1819,6 +1824,49 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_PAD: attribute used for padding for 64-bit alignment
  *
+ * @NL80211_ATTR_IFTYPE_EXT_CAPA: Nested attribute of the following attributes:
+ *	%NL80211_ATTR_IFTYPE, %NL80211_ATTR_EXT_CAPA,
+ *	%NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per
+ *	interface type.
+ *
+ * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO
+ *	groupID for monitor mode.
+ *	The first 8 bytes are a mask that defines the membership in each
+ *	group (there are 64 groups, group 0 and 63 are reserved),
+ *	each bit represents a group and set to 1 for being a member in
+ *	that group and 0 for not being a member.
+ *	The remaining 16 bytes define the position in each group: 2 bits for
+ *	each group.
+ *	(smaller group numbers represented on most significant bits and bigger
+ *	group numbers on least significant bits.)
+ *	This attribute is used only if all interfaces are in monitor mode.
+ *	Set this attribute in order to monitor packets using the given MU-MIMO
+ *	groupID data.
+ *	to turn off that feature set all the bits of the groupID to zero.
+ * @NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR: mac address for the sniffer to follow
+ *	when using MU-MIMO air sniffer.
+ *	to turn that feature off set an invalid mac address
+ *	(e.g. FF:FF:FF:FF:FF:FF)
+ *
+ * @NL80211_ATTR_SCAN_START_TIME_TSF: The time at which the scan was actually
+ *	started (u64). The time is the TSF of the BSS the interface that
+ *	requested the scan is connected to (if available, otherwise this
+ *	attribute must not be included).
+ * @NL80211_ATTR_SCAN_START_TIME_TSF_BSSID: The BSS according to which
+ *	%NL80211_ATTR_SCAN_START_TIME_TSF is set.
+ * @NL80211_ATTR_MEASUREMENT_DURATION: measurement duration in TUs (u16). If
+ *	%NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY is not set, this is the
+ *	maximum measurement duration allowed. This attribute is used with
+ *	measurement requests. It can also be used with %NL80211_CMD_TRIGGER_SCAN
+ *	if the scan is used for beacon report radio measurement.
+ * @NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY: flag attribute that indicates
+ *	that the duration specified with %NL80211_ATTR_MEASUREMENT_DURATION is
+ *	mandatory. If this flag is not set, the duration is the maximum duration
+ *	and the actual measurement duration may be shorter.
+ *
+ * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is
+ *	used to pull the stored data for mesh peer in power save state.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2201,6 +2249,18 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PAD,
 
+	NL80211_ATTR_IFTYPE_EXT_CAPA,
+
+	NL80211_ATTR_MU_MIMO_GROUP_DATA,
+	NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR,
+
+	NL80211_ATTR_SCAN_START_TIME_TSF,
+	NL80211_ATTR_SCAN_START_TIME_TSF_BSSID,
+	NL80211_ATTR_MEASUREMENT_DURATION,
+	NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY,
+
+	NL80211_ATTR_MESH_PEER_AID,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3462,6 +3522,12 @@ enum nl80211_bss_scan_width {
  *	was last updated by a received frame. The value is expected to be
  *	accurate to about 10ms. (u64, nanoseconds)
  * @NL80211_BSS_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_BSS_PARENT_TSF: the time at the start of reception of the first
+ *	octet of the timestamp field of the last beacon/probe received for
+ *	this BSS. The time is the TSF of the BSS specified by
+ *	@NL80211_BSS_PARENT_BSSID. (u64).
+ * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF
+ *	is set.
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -3483,6 +3549,8 @@ enum nl80211_bss {
 	NL80211_BSS_PRESP_DATA,
 	NL80211_BSS_LAST_SEEN_BOOTTIME,
 	NL80211_BSS_PAD,
+	NL80211_BSS_PARENT_TSF,
+	NL80211_BSS_PARENT_BSSID,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
@@ -4467,6 +4535,22 @@ enum nl80211_feature_flags {
  *	%NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set
  *	the ASSOC_REQ_USE_RRM flag in the association request even if
  *	NL80211_FEATURE_QUIET is not advertized.
+ * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air
+ *	sniffer which means that it can be configured to hear packets from
+ *	certain groups which can be configured by the
+ *	%NL80211_ATTR_MU_MIMO_GROUP_DATA attribute,
+ *	or can be configured to follow a station by configuring the
+ *	%NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute.
+ * @NL80211_EXT_FEATURE_SCAN_START_TIME: This driver includes the actual
+ *	time the scan started in scan results event. The time is the TSF of
+ *	the BSS that the interface that requested the scan is connected to
+ *	(if available).
+ * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the
+ *	time the last beacon/probe was received. The time is the TSF of the
+ *	BSS that the interface that requested the scan is connected to
+ *	(if available).
+ * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of
+ *	channel dwell time.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4474,6 +4558,10 @@ enum nl80211_feature_flags {
 enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_VHT_IBSS,
 	NL80211_EXT_FEATURE_RRM,
+	NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER,
+	NL80211_EXT_FEATURE_SCAN_START_TIME,
+	NL80211_EXT_FEATURE_BSS_PARENT_TSF,
+	NL80211_EXT_FEATURE_SET_SCAN_DWELL,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index bb0d515b7654..d95a3018f6a1 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -166,6 +166,7 @@ enum ovs_packet_cmd {
  * output port is actually a tunnel port. Contains the output tunnel key
  * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
  * @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and
+ * @OVS_PACKET_ATTR_LEN: Packet size before truncation.
  * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment
  * size.
  *
@@ -185,6 +186,7 @@ enum ovs_packet_attr {
 	OVS_PACKET_ATTR_PROBE,      /* Packet operation is a feature probe,
 				       error logging should be suppressed. */
 	OVS_PACKET_ATTR_MRU,	    /* Maximum received IP fragment size. */
+	OVS_PACKET_ATTR_LEN,		/* Packet size before truncation. */
 	__OVS_PACKET_ATTR_MAX
 };
 
@@ -580,6 +582,10 @@ enum ovs_userspace_attr {
 
 #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
 
+struct ovs_action_trunc {
+	uint32_t max_len; /* Max packet size in bytes. */
+};
+
 /**
  * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
  * @mpls_lse: MPLS label stack entry to push.
@@ -703,6 +709,7 @@ enum ovs_nat_attr {
  * enum ovs_action_attr - Action types.
  *
  * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
+ * @OVS_ACTION_ATTR_TRUNC: Output packet to port with truncated packet size.
  * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
  * %OVS_USERSPACE_ATTR_* attributes.
  * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
@@ -756,6 +763,7 @@ enum ovs_action_attr {
 				       * The data must be zero for the unmasked
 				       * bits. */
 	OVS_ACTION_ATTR_CT,           /* Nested OVS_CT_ATTR_* . */
+	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 36ce552cf6a9..c66a485a24ac 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -276,6 +276,9 @@ enum perf_event_read_format {
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
+ *
+ * @sample_max_stack: Max number of frame pointers in a callchain,
+ *		      should be < /proc/sys/kernel/perf_event_max_stack
  */
 struct perf_event_attr {
 
@@ -385,7 +388,8 @@ struct perf_event_attr {
 	 * Wakeup watermark for AUX area
 	 */
 	__u32	aux_watermark;
-	__u32	__reserved_2;	/* align to __u64 */
+	__u16	sample_max_stack;
+	__u16	__reserved_2;	/* align to __u64 */
 };
 
 #define perf_flags(attr)	(*(&(attr)->read_format + 1))
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index f4297c8a42fe..d1c1ccaba787 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -115,8 +115,8 @@ struct tc_police {
 	__u32			mtu;
 	struct tc_ratespec	rate;
 	struct tc_ratespec	peakrate;
-	int 			refcnt;
-	int 			bindcnt;
+	int			refcnt;
+	int			bindcnt;
 	__u32			capab;
 };
 
@@ -124,10 +124,11 @@ struct tcf_t {
 	__u64   install;
 	__u64   lastuse;
 	__u64   expires;
+	__u64   firstuse;
 };
 
 struct tc_cnt {
-	int                   refcnt; 
+	int                   refcnt;
 	int                   bindcnt;
 };
 
@@ -432,6 +433,18 @@ enum {
 
 #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
 
+/* Match-all classifier */
+
+enum {
+	TCA_MATCHALL_UNSPEC,
+	TCA_MATCHALL_CLASSID,
+	TCA_MATCHALL_ACT,
+	TCA_MATCHALL_FLAGS,
+	__TCA_MATCHALL_MAX,
+};
+
+#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
+
 /* Extended Matches */
 
 struct tcf_ematch_tree_hdr {
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index ce70fe6b45df..d304f4c9792c 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -112,6 +112,31 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_SOCKOPT_CONNECTX	110		/* CONNECTX requests. */
 #define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
 #define SCTP_GET_ASSOC_STATS	112	/* Read only */
+#define SCTP_PR_SUPPORTED	113
+#define SCTP_DEFAULT_PRINFO	114
+#define SCTP_PR_ASSOC_STATUS	115
+
+/* PR-SCTP policies */
+#define SCTP_PR_SCTP_NONE	0x0000
+#define SCTP_PR_SCTP_TTL	0x0010
+#define SCTP_PR_SCTP_RTX	0x0020
+#define SCTP_PR_SCTP_PRIO	0x0030
+#define SCTP_PR_SCTP_MAX	SCTP_PR_SCTP_PRIO
+#define SCTP_PR_SCTP_MASK	0x0030
+
+#define __SCTP_PR_INDEX(x)	((x >> 4) - 1)
+#define SCTP_PR_INDEX(x)	__SCTP_PR_INDEX(SCTP_PR_SCTP_ ## x)
+
+#define SCTP_PR_POLICY(x)	((x) & SCTP_PR_SCTP_MASK)
+#define SCTP_PR_SET_POLICY(flags, x)	\
+	do {				\
+		flags &= ~SCTP_PR_SCTP_MASK;	\
+		flags |= x;		\
+	} while (0)
+
+#define SCTP_PR_TTL_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_TTL)
+#define SCTP_PR_RTX_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_RTX)
+#define SCTP_PR_PRIO_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_PRIO)
 
 /* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
 /* On user space Linux, these live in <bits/socket.h> as an enum.  */
@@ -902,4 +927,21 @@ struct sctp_paddrthlds {
 	__u16 spt_pathpfthld;
 };
 
+/*
+ * Socket Option for Getting the Association/Stream-Specific PR-SCTP Status
+ */
+struct sctp_prstatus {
+	sctp_assoc_t sprstat_assoc_id;
+	__u16 sprstat_sid;
+	__u16 sprstat_policy;
+	__u64 sprstat_abandoned_unsent;
+	__u64 sprstat_abandoned_sent;
+};
+
+struct sctp_default_prinfo {
+	sctp_assoc_t pr_assoc_id;
+	__u32 pr_value;
+	__u16 pr_policy;
+};
+
 #endif /* _UAPI_SCTP_H */
diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h
index c2ea1698257f..f2447a83ac8d 100644
--- a/include/uapi/linux/serio.h
+++ b/include/uapi/linux/serio.h
@@ -78,5 +78,6 @@
 #define SERIO_TSC40	0x3d
 #define SERIO_WACOM_IV	0x3e
 #define SERIO_EGALAX	0x3f
+#define SERIO_PULSE8_CEC	0x40
 
 #endif /* _UAPI_SERIO_H */
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index fecb5cc48c40..a4d00c608d8f 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -27,6 +27,7 @@
 #define SKBEDIT_F_PRIORITY		0x1
 #define SKBEDIT_F_QUEUE_MAPPING		0x2
 #define SKBEDIT_F_MARK			0x4
+#define SKBEDIT_F_PTYPE			0x8
 
 struct tc_skbedit {
 	tc_gen;
@@ -40,6 +41,7 @@ enum {
 	TCA_SKBEDIT_QUEUE_MAPPING,
 	TCA_SKBEDIT_MARK,
 	TCA_SKBEDIT_PAD,
+	TCA_SKBEDIT_PTYPE,
 	__TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 53e8e3fe6b1b..482898fc433a 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -115,12 +115,22 @@ enum {
 #define TCP_CC_INFO		26	/* Get Congestion Control (optional) info */
 #define TCP_SAVE_SYN		27	/* Record SYN headers for new connections */
 #define TCP_SAVED_SYN		28	/* Get SYN headers recorded for connection */
+#define TCP_REPAIR_WINDOW	29	/* Get/set window parameters */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
 	__u32	opt_val;
 };
 
+struct tcp_repair_window {
+	__u32	snd_wl1;
+	__u32	snd_wnd;
+	__u32	max_window;
+
+	__u32	rcv_wnd;
+	__u32	rcv_wup;
+};
+
 enum {
 	TCP_NO_QUEUE,
 	TCP_RECV_QUEUE,
diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 6f71b9b41595..bf049e8fe31b 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -60,26 +60,48 @@ struct tipc_name_seq {
 	__u32 upper;
 };
 
+/* TIPC Address Size, Offset, Mask specification for Z.C.N
+ */
+#define TIPC_NODE_BITS          12
+#define TIPC_CLUSTER_BITS       12
+#define TIPC_ZONE_BITS          8
+
+#define TIPC_NODE_OFFSET        0
+#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
+#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
+
+#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
+#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
+#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
+
+#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
+#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
+#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
+
+#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
+
 static inline __u32 tipc_addr(unsigned int zone,
 			      unsigned int cluster,
 			      unsigned int node)
 {
-	return (zone << 24) | (cluster << 12) | node;
+	return (zone << TIPC_ZONE_OFFSET) |
+		(cluster << TIPC_CLUSTER_OFFSET) |
+		node;
 }
 
 static inline unsigned int tipc_zone(__u32 addr)
 {
-	return addr >> 24;
+	return addr >> TIPC_ZONE_OFFSET;
 }
 
 static inline unsigned int tipc_cluster(__u32 addr)
 {
-	return (addr >> 12) & 0xfff;
+	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
 }
 
 static inline unsigned int tipc_node(__u32 addr)
 {
-	return addr & 0xfff;
+	return addr & TIPC_NODE_MASK;
 }
 
 /*
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d4c8f142ba63..5f3f6d09fb79 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -56,6 +56,9 @@ enum {
 	TIPC_NL_NET_GET,
 	TIPC_NL_NET_SET,
 	TIPC_NL_NAME_TABLE_GET,
+	TIPC_NL_MON_SET,
+	TIPC_NL_MON_GET,
+	TIPC_NL_MON_PEER_GET,
 
 	__TIPC_NL_CMD_MAX,
 	TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
@@ -72,6 +75,8 @@ enum {
 	TIPC_NLA_NODE,			/* nest */
 	TIPC_NLA_NET,			/* nest */
 	TIPC_NLA_NAME_TABLE,		/* nest */
+	TIPC_NLA_MON,			/* nest */
+	TIPC_NLA_MON_PEER,		/* nest */
 
 	__TIPC_NLA_MAX,
 	TIPC_NLA_MAX = __TIPC_NLA_MAX - 1
@@ -166,6 +171,20 @@ enum {
 	TIPC_NLA_NAME_TABLE_MAX = __TIPC_NLA_NAME_TABLE_MAX - 1
 };
 
+/* Monitor info */
+enum {
+	TIPC_NLA_MON_UNSPEC,
+	TIPC_NLA_MON_ACTIVATION_THRESHOLD,	/* u32 */
+	TIPC_NLA_MON_REF,			/* u32 */
+	TIPC_NLA_MON_ACTIVE,			/* flag */
+	TIPC_NLA_MON_BEARER_NAME,		/* string */
+	TIPC_NLA_MON_PEERCNT,			/* u32 */
+	TIPC_NLA_MON_LISTGEN,			/* u32 */
+
+	__TIPC_NLA_MON_MAX,
+	TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1
+};
+
 /* Publication info */
 enum {
 	TIPC_NLA_PUBL_UNSPEC,
@@ -182,6 +201,24 @@ enum {
 	TIPC_NLA_PUBL_MAX = __TIPC_NLA_PUBL_MAX - 1
 };
 
+/* Monitor peer info */
+enum {
+	TIPC_NLA_MON_PEER_UNSPEC,
+
+	TIPC_NLA_MON_PEER_ADDR,			/* u32 */
+	TIPC_NLA_MON_PEER_DOMGEN,		/* u32 */
+	TIPC_NLA_MON_PEER_APPLIED,		/* u32 */
+	TIPC_NLA_MON_PEER_UPMAP,		/* u64 */
+	TIPC_NLA_MON_PEER_MEMBERS,		/* tlv */
+	TIPC_NLA_MON_PEER_UP,			/* flag */
+	TIPC_NLA_MON_PEER_HEAD,			/* flag */
+	TIPC_NLA_MON_PEER_LOCAL,		/* flag */
+	TIPC_NLA_MON_PEER_PAD,			/* flag */
+
+	__TIPC_NLA_MON_PEER_MAX,
+	TIPC_NLA_MON_PEER_MAX = __TIPC_NLA_MON_PEER_MAX - 1
+};
+
 /* Nest, connection info */
 enum {
 	TIPC_NLA_CON_UNSPEC,
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 8f951917be74..724f43e69d03 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -504,22 +504,16 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_UV8     v4l2_fourcc('U', 'V', '8', ' ') /*  8  UV 4:4 */
 
 /* Luminance+Chrominance formats */
-#define V4L2_PIX_FMT_YVU410  v4l2_fourcc('Y', 'V', 'U', '9') /*  9  YVU 4:1:0     */
-#define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y', 'V', '1', '2') /* 12  YVU 4:2:0     */
 #define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y', 'U', 'Y', 'V') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y', 'Y', 'U', 'V') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_YVYU    v4l2_fourcc('Y', 'V', 'Y', 'U') /* 16 YVU 4:2:2 */
 #define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U', 'Y', 'V', 'Y') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_VYUY    v4l2_fourcc('V', 'Y', 'U', 'Y') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16  YVU422 planar */
-#define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4', '1', '1', 'P') /* 16  YVU411 planar */
 #define V4L2_PIX_FMT_Y41P    v4l2_fourcc('Y', '4', '1', 'P') /* 12  YUV 4:1:1     */
 #define V4L2_PIX_FMT_YUV444  v4l2_fourcc('Y', '4', '4', '4') /* 16  xxxxyyyy uuuuvvvv */
 #define V4L2_PIX_FMT_YUV555  v4l2_fourcc('Y', 'U', 'V', 'O') /* 16  YUV-5-5-5     */
 #define V4L2_PIX_FMT_YUV565  v4l2_fourcc('Y', 'U', 'V', 'P') /* 16  YUV-5-6-5     */
 #define V4L2_PIX_FMT_YUV32   v4l2_fourcc('Y', 'U', 'V', '4') /* 32  YUV-8-8-8-8   */
-#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
-#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
 #define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
 #define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 #define V4L2_PIX_FMT_M420    v4l2_fourcc('M', '4', '2', '0') /* 12  YUV 4:2:0 2 lines y, 1 line uv interleaved */
@@ -540,6 +534,14 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV12MT  v4l2_fourcc('T', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 64x32 macroblocks */
 #define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 16x16 macroblocks */
 
+/* three planes - Y Cb, Cr */
+#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
+#define V4L2_PIX_FMT_YVU410  v4l2_fourcc('Y', 'V', 'U', '9') /*  9  YVU 4:1:0     */
+#define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4', '1', '1', 'P') /* 12  YVU411 planar */
+#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
+#define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y', 'V', '1', '2') /* 12  YVU 4:2:0     */
+#define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16  YVU422 planar */
+
 /* three non contiguous planes - Y, Cb, Cr */
 #define V4L2_PIX_FMT_YUV420M v4l2_fourcc('Y', 'M', '1', '2') /* 12  YUV420 planar */
 #define V4L2_PIX_FMT_YVU420M v4l2_fourcc('Y', 'M', '2', '1') /* 12  YVU420 planar */
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index ec32293a00db..fc353b518288 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -1,5 +1,5 @@
-#ifndef _LINUX_VIRTIO_NET_H
-#define _LINUX_VIRTIO_NET_H
+#ifndef _UAPI_LINUX_VIRTIO_NET_H
+#define _UAPI_LINUX_VIRTIO_NET_H
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers.
  *
@@ -35,6 +35,7 @@
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
 #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
+#define VIRTIO_NET_F_MTU	3	/* Initial MTU advice */
 #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
 #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
 #define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
@@ -73,6 +74,8 @@ struct virtio_net_config {
 	 * Legal values are between 1 and 0x8000
 	 */
 	__u16 max_virtqueue_pairs;
+	/* Default maximum transmit unit advice */
+	__u16 mtu;
 } __attribute__((packed));
 
 /*
@@ -242,4 +245,4 @@ struct virtio_net_ctrl_mq {
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
 
-#endif /* _LINUX_VIRTIO_NET_H */
+#endif /* _UAPI_LINUX_VIRTIO_NET_H */
diff --git a/include/uapi/linux/vsp1.h b/include/uapi/linux/vsp1.h
deleted file mode 100644
index 9a823696d816..000000000000
--- a/include/uapi/linux/vsp1.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * vsp1.h
- *
- * Renesas R-Car VSP1 - User-space API
- *
- * Copyright (C) 2013 Renesas Corporation
- *
- * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __VSP1_USER_H__
-#define __VSP1_USER_H__
-
-#include <linux/types.h>
-#include <linux/videodev2.h>
-
-/*
- * Private IOCTLs
- *
- * VIDIOC_VSP1_LUT_CONFIG - Configure the lookup table
- */
-
-#define VIDIOC_VSP1_LUT_CONFIG \
-	_IOWR('V', BASE_VIDIOC_PRIVATE + 1, struct vsp1_lut_config)
-
-struct vsp1_lut_config {
-	__u32 lut[256];
-};
-
-#endif	/* __VSP1_USER_H__ */
diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h
index c1592e3e4036..d9ecd7c6d691 100644
--- a/include/uapi/linux/wireless.h
+++ b/include/uapi/linux/wireless.h
@@ -670,8 +670,7 @@
 /*
  *	Generic format for most parameters that fit in an int
  */
-struct	iw_param
-{
+struct iw_param {
   __s32		value;		/* The value of the parameter itself */
   __u8		fixed;		/* Hardware should not use auto select */
   __u8		disabled;	/* Disable the feature */
@@ -682,8 +681,7 @@ struct	iw_param
  *	For all data larger than 16 octets, we need to use a
  *	pointer to memory allocated in user space.
  */
-struct	iw_point
-{
+struct iw_point {
   void __user	*pointer;	/* Pointer to the data  (in user space) */
   __u16		length;		/* number of fields or size in bytes */
   __u16		flags;		/* Optional params */
@@ -698,8 +696,7 @@ struct	iw_point
  *	of 10 to get 'm' lower than 10^9, with 'm'= f / (10^'e')...
  *	The power of 10 is in 'e', the result of the division is in 'm'.
  */
-struct	iw_freq
-{
+struct iw_freq {
 	__s32		m;		/* Mantissa */
 	__s16		e;		/* Exponent */
 	__u8		i;		/* List index (when in range struct) */
@@ -709,8 +706,7 @@ struct	iw_freq
 /*
  *	Quality of the link
  */
-struct	iw_quality
-{
+struct iw_quality {
 	__u8		qual;		/* link quality (%retries, SNR,
 					   %missed beacons or better...) */
 	__u8		level;		/* signal level (dBm) */
@@ -725,8 +721,7 @@ struct	iw_quality
  *	is already pretty exhaustive, and you should use that first.
  *	This is only additional stats...
  */
-struct	iw_discarded
-{
+struct iw_discarded {
 	__u32		nwid;		/* Rx : Wrong nwid/essid */
 	__u32		code;		/* Rx : Unable to code/decode (WEP) */
 	__u32		fragment;	/* Rx : Can't perform MAC reassembly */
@@ -738,16 +733,14 @@ struct	iw_discarded
  *	Packet/Time period missed in the wireless adapter due to
  *	"wireless" specific problems...
  */
-struct	iw_missed
-{
+struct iw_missed {
 	__u32		beacon;		/* Missed beacons/superframe */
 };
 
 /*
  *	Quality range (for spy threshold)
  */
-struct	iw_thrspy
-{
+struct iw_thrspy {
 	struct sockaddr		addr;		/* Source address (hw/mac) */
 	struct iw_quality	qual;		/* Quality of the link */
 	struct iw_quality	low;		/* Low threshold */
@@ -765,8 +758,7 @@ struct	iw_thrspy
  *	Especially, scan results are required to include an entry for the
  *	current BSS if the driver is in Managed mode and associated with an AP.
  */
-struct	iw_scan_req
-{
+struct iw_scan_req {
 	__u8		scan_type; /* IW_SCAN_TYPE_{ACTIVE,PASSIVE} */
 	__u8		essid_len;
 	__u8		num_channels; /* num entries in channel_list;
@@ -827,8 +819,7 @@ struct	iw_scan_req
  *	RX_SEQ_VALID for SIOCGIWENCODEEXT are optional, but can be useful for
  *	debugging/testing.
  */
-struct	iw_encode_ext
-{
+struct iw_encode_ext {
 	__u32		ext_flags; /* IW_ENCODE_EXT_* */
 	__u8		tx_seq[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */
 	__u8		rx_seq[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */
@@ -841,8 +832,7 @@ struct	iw_encode_ext
 };
 
 /* SIOCSIWMLME data */
-struct	iw_mlme
-{
+struct iw_mlme {
 	__u16		cmd; /* IW_MLME_* */
 	__u16		reason_code;
 	struct sockaddr	addr;
@@ -855,16 +845,14 @@ struct	iw_mlme
 
 #define IW_PMKID_LEN	16
 
-struct	iw_pmksa
-{
+struct iw_pmksa {
 	__u32		cmd; /* IW_PMKSA_* */
 	struct sockaddr	bssid;
 	__u8		pmkid[IW_PMKID_LEN];
 };
 
 /* IWEVMICHAELMICFAILURE data */
-struct	iw_michaelmicfailure
-{
+struct iw_michaelmicfailure {
 	__u32		flags;
 	struct sockaddr	src_addr;
 	__u8		tsc[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */
@@ -872,8 +860,7 @@ struct	iw_michaelmicfailure
 
 /* IWEVPMKIDCAND data */
 #define IW_PMKID_CAND_PREAUTH	0x00000001 /* RNS pre-authentication enabled */
-struct	iw_pmkid_cand
-{
+struct iw_pmkid_cand {
 	__u32		flags; /* IW_PMKID_CAND_* */
 	__u32		index; /* the smaller the index, the higher the
 				* priority */
@@ -884,8 +871,7 @@ struct	iw_pmkid_cand
 /*
  * Wireless statistics (used for /proc/net/wireless)
  */
-struct	iw_statistics
-{
+struct iw_statistics {
 	__u16		status;		/* Status
 					 * - device dependent for now */
 
@@ -897,7 +883,7 @@ struct	iw_statistics
 
 /* ------------------------ IOCTL REQUEST ------------------------ */
 /*
- * This structure defines the payload of an ioctl, and is used 
+ * This structure defines the payload of an ioctl, and is used
  * below.
  *
  * Note that this structure should fit on the memory footprint
@@ -906,8 +892,7 @@ struct	iw_statistics
  * You should check this when increasing the structures defined
  * above in this file...
  */
-union	iwreq_data
-{
+union iwreq_data {
 	/* Config - generic */
 	char		name[IFNAMSIZ];
 	/* Name : used to verify the presence of  wireless extensions.
@@ -944,15 +929,14 @@ union	iwreq_data
  * convenience...
  * Do I need to remind you about structure size (32 octets) ?
  */
-struct	iwreq 
-{
+struct iwreq {
 	union
 	{
 		char	ifrn_name[IFNAMSIZ];	/* if name, e.g. "eth0" */
 	} ifr_ifrn;
 
 	/* Data part (defined just above) */
-	union	iwreq_data	u;
+	union iwreq_data	u;
 };
 
 /* -------------------------- IOCTL DATA -------------------------- */
@@ -965,8 +949,7 @@ struct	iwreq
  *	Range of parameters
  */
 
-struct	iw_range
-{
+struct iw_range {
 	/* Informative stuff (to choose between different interface) */
 	__u32		throughput;	/* To give an idea... */
 	/* In theory this value should be the maximum benchmarked
@@ -1069,9 +1052,8 @@ struct	iw_range
 /*
  * Private ioctl interface information
  */
- 
-struct	iw_priv_args
-{
+
+struct iw_priv_args {
 	__u32		cmd;		/* Number of the ioctl to issue */
 	__u16		set_args;	/* Type and number of args */
 	__u16		get_args;	/* Type and number of args */
@@ -1088,8 +1070,7 @@ struct	iw_priv_args
 /*
  * A Wireless Event. Contains basically the same data as the ioctl...
  */
-struct iw_event
-{
+struct iw_event {
 	__u16		len;			/* Real length of this stuff */
 	__u16		cmd;			/* Wireless IOCTL */
 	union iwreq_data	u;		/* IOCTL fixed payload */
diff --git a/include/uapi/xen/evtchn.h b/include/uapi/xen/evtchn.h
index 14e833ee4e0b..cb4aa4bb905e 100644
--- a/include/uapi/xen/evtchn.h
+++ b/include/uapi/xen/evtchn.h
@@ -85,4 +85,19 @@ struct ioctl_evtchn_notify {
 #define IOCTL_EVTCHN_RESET				\
 	_IOC(_IOC_NONE, 'E', 5, 0)
 
+/*
+ * Restrict this file descriptor so that it can only be used to bind
+ * new interdomain events from one domain.
+ *
+ * Once a file descriptor has been restricted it cannot be
+ * de-restricted, and must be closed and re-opened.  Event channels
+ * which were bound before restricting remain bound afterwards, and
+ * can be notified as usual.
+ */
+#define IOCTL_EVTCHN_RESTRICT_DOMID			\
+	_IOC(_IOC_NONE, 'E', 6, sizeof(struct ioctl_evtchn_restrict_domid))
+struct ioctl_evtchn_restrict_domid {
+	domid_t domid;
+};
+
 #endif /* __LINUX_PUBLIC_EVTCHN_H__ */
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
index a6c79911e729..4d61fc58d99d 100644
--- a/include/xen/interface/hvm/params.h
+++ b/include/xen/interface/hvm/params.h
@@ -27,16 +27,44 @@
  * Parameter space for HVMOP_{set,get}_param.
  */
 
+#define HVM_PARAM_CALLBACK_IRQ 0
 /*
  * How should CPU0 event-channel notifications be delivered?
- * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
- * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
- *                  Domain = val[47:32], Bus  = val[31:16],
- *                  DevFn  = val[15: 8], IntX = val[ 1: 0]
- * val[63:56] == 2: val[7:0] is a vector number.
+ *
  * If val == 0 then CPU0 event-channel notifications are not delivered.
+ * If val != 0, val[63:56] encodes the type, as follows:
  */
-#define HVM_PARAM_CALLBACK_IRQ 0
+
+#define HVM_PARAM_CALLBACK_TYPE_GSI      0
+/*
+ * val[55:0] is a delivery GSI.  GSI 0 cannot be used, as it aliases val == 0,
+ * and disables all notifications.
+ */
+
+#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1
+/*
+ * val[55:0] is a delivery PCI INTx line:
+ * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0]
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#define HVM_PARAM_CALLBACK_TYPE_VECTOR   2
+/*
+ * val[7:0] is a vector number.  Check for XENFEAT_hvm_callback_vector to know
+ * if this delivery method is available.
+ */
+#elif defined(__arm__) || defined(__aarch64__)
+#define HVM_PARAM_CALLBACK_TYPE_PPI      2
+/*
+ * val[55:16] needs to be zero.
+ * val[15:8] is interrupt flag of the PPI used by event-channel:
+ *  bit 8: the PPI is edge(1) or level(0) triggered
+ *  bit 9: the PPI is active low(1) or high(0)
+ * val[7:0] is a PPI number used by event-channel.
+ * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to
+ * the notification is handled by the interrupt controller.
+ */
+#endif
 
 #define HVM_PARAM_STORE_PFN    1
 #define HVM_PARAM_STORE_EVTCHN 2
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index 2ecfe4f700d9..9aa8988cb340 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -160,6 +160,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
 #define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
 				    * XENMEM_add_to_physmap_range only.
 				    */
+#define XENMAPSPACE_dev_mmio     5 /* device mmio region */
 
 /*
  * Sets the GPFN at which a particular page appears in the specified guest's
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index b05288ce3991..98188c87f5c1 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -75,15 +75,21 @@
  */
 #define VCPUOP_get_runstate_info	 4
 struct vcpu_runstate_info {
-		/* VCPU's current state (RUNSTATE_*). */
-		int		 state;
-		/* When was current state entered (system time, ns)? */
-		uint64_t state_entry_time;
-		/*
-		 * Time spent in each RUNSTATE_* (ns). The sum of these times is
-		 * guaranteed not to drift from system time.
-		 */
-		uint64_t time[4];
+	/* VCPU's current state (RUNSTATE_*). */
+	int		 state;
+	/* When was current state entered (system time, ns)? */
+	uint64_t state_entry_time;
+	/*
+	 * Update indicator set in state_entry_time:
+	 * When activated via VMASST_TYPE_runstate_update_flag, set during
+	 * updates in guest memory mapped copy of vcpu_runstate_info.
+	 */
+#define XEN_RUNSTATE_UPDATE	(1ULL << 63)
+	/*
+	 * Time spent in each RUNSTATE_* (ns). The sum of these times is
+	 * guaranteed not to drift from system time.
+	 */
+	uint64_t time[4];
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
 
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index d1331121c0bd..1b0d189cd3d3 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -413,7 +413,22 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
 /* x86/PAE guests: support PDPTs above 4GB. */
 #define VMASST_TYPE_pae_extended_cr3     3
 
-#define MAX_VMASST_TYPE 3
+/*
+ * x86 guests: Sane behaviour for virtual iopl
+ *  - virtual iopl updated from do_iret() hypercalls.
+ *  - virtual iopl reported in bounce frames.
+ *  - guest kernels assumed to be level 0 for the purpose of iopl checks.
+ */
+#define VMASST_TYPE_architectural_iopl   4
+
+/*
+ * All guests: activate update indicator in vcpu_runstate_info
+ * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped
+ * vcpu_runstate_info during updates of the runstate information.
+ */
+#define VMASST_TYPE_runstate_update_flag 5
+
+#define MAX_VMASST_TYPE 5
 
 #ifndef __ASSEMBLY__
 
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 86abe07b20ec..9a37c541822f 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -9,6 +9,12 @@
 
 DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 
+DECLARE_PER_CPU(int, xen_vcpu_id);
+static inline int xen_vcpu_nr(int cpu)
+{
+	return per_cpu(xen_vcpu_id, cpu);
+}
+
 void xen_arch_pre_suspend(void);
 void xen_arch_post_suspend(int suspend_cancelled);
 
@@ -21,7 +27,9 @@ void xen_resume_notifier_unregister(struct notifier_block *nb);
 
 bool xen_vcpu_stolen(int vcpu);
 void xen_setup_runstate_info(int cpu);
+void xen_time_setup_guest(void);
 void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
+u64 xen_steal_clock(int cpu);
 
 int xen_setup_shutdown_event(void);
 
@@ -85,17 +93,33 @@ int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
 			      struct page **pages);
 int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
 			      int nr, struct page **pages);
+int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr,
+				  unsigned long nr_grant_frames);
 
 bool xen_running_on_version_or_later(unsigned int major, unsigned int minor);
 
-#ifdef CONFIG_XEN_EFI
-extern efi_system_table_t *xen_efi_probe(void);
-#else
-static inline efi_system_table_t __init *xen_efi_probe(void)
-{
-	return NULL;
-}
-#endif
+efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc);
+efi_status_t xen_efi_set_time(efi_time_t *tm);
+efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
+				     efi_time_t *tm);
+efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm);
+efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
+				  u32 *attr, unsigned long *data_size,
+				  void *data);
+efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
+				       efi_char16_t *name, efi_guid_t *vendor);
+efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
+				  u32 attr, unsigned long data_size,
+				  void *data);
+efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
+					 u64 *remaining_space,
+					 u64 *max_variable_size);
+efi_status_t xen_efi_get_next_high_mono_count(u32 *count);
+efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
+				    unsigned long count, unsigned long sg_list);
+efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
+					unsigned long count, u64 *max_size,
+					int *reset_type);
 
 #ifdef CONFIG_PREEMPT
 
diff --git a/init/Kconfig b/init/Kconfig
index f755a602d4a1..504057925ee9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -375,9 +375,11 @@ config VIRT_CPU_ACCOUNTING_GEN
 
 	  If unsure, say N.
 
+endchoice
+
 config IRQ_TIME_ACCOUNTING
 	bool "Fine granularity task level IRQ time accounting"
-	depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL
+	depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
 	help
 	  Select this option to enable fine granularity task irq time
 	  accounting. This is done by reading a timestamp on each
@@ -386,8 +388,6 @@ config IRQ_TIME_ACCOUNTING
 
 	  If in doubt, say N here.
 
-endchoice
-
 config BSD_PROCESS_ACCT
 	bool "BSD Process Accounting"
 	depends on MULTIUSER
@@ -517,6 +517,7 @@ config SRCU
 config TASKS_RCU
 	bool
 	default n
+	depends on !UML
 	select SRCU
 	help
 	  This option enables a task-based RCU implementation that uses
@@ -1458,6 +1459,7 @@ config KALLSYMS_ALL
 
 config KALLSYMS_ABSOLUTE_PERCPU
 	bool
+	depends on KALLSYMS
 	default X86_64 && SMP
 
 config KALLSYMS_BASE_RELATIVE
@@ -1784,10 +1786,10 @@ endchoice
 
 config SLAB_FREELIST_RANDOM
 	default n
-	depends on SLAB
+	depends on SLAB || SLUB
 	bool "SLAB freelist randomization"
 	help
-	  Randomizes the freelist order used on creating new SLABs. This
+	  Randomizes the freelist order used on creating new pages. This
 	  security feature reduces the predictability of the kernel slab
 	  allocator against heap overflows.
 
diff --git a/init/main.c b/init/main.c
index 4c17fda5c2ff..eae02aa03c9e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -453,7 +453,7 @@ void __init __weak smp_setup_processor_id(void)
 }
 
 # if THREAD_SIZE >= PAGE_SIZE
-void __init __weak thread_info_cache_init(void)
+void __init __weak thread_stack_cache_init(void)
 {
 }
 #endif
@@ -627,7 +627,7 @@ asmlinkage __visible void __init start_kernel(void)
 	/* Should be run before the first non-init thread is created */
 	init_espfix_bsp();
 #endif
-	thread_info_cache_init();
+	thread_stack_cache_init();
 	cred_init();
 	fork_init();
 	proc_caches_init();
@@ -708,11 +708,13 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn)
 {
 	struct blacklist_entry *entry;
 	char fn_name[KSYM_SYMBOL_LEN];
+	unsigned long addr;
 
 	if (list_empty(&blacklisted_initcalls))
 		return false;
 
-	sprint_symbol_no_offset(fn_name, (unsigned long)fn);
+	addr = (unsigned long) dereference_function_descriptor(fn);
+	sprint_symbol_no_offset(fn_name, addr);
 
 	list_for_each_entry(entry, &blacklisted_initcalls, next) {
 		if (!strcmp(fn_name, entry->buf)) {
diff --git a/ipc/sem.c b/ipc/sem.c
index b3757ea0694b..ae72b3cddc8d 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -259,16 +259,6 @@ static void sem_rcu_free(struct rcu_head *head)
 	ipc_rcu_free(head);
 }
 
-/*
- * spin_unlock_wait() and !spin_is_locked() are not memory barriers, they
- * are only control barriers.
- * The code must pair with spin_unlock(&sem->lock) or
- * spin_unlock(&sem_perm.lock), thus just the control barrier is insufficient.
- *
- * smp_rmb() is sufficient, as writes cannot pass the control barrier.
- */
-#define ipc_smp_acquire__after_spin_is_unlocked()	smp_rmb()
-
 /*
  * Wait until all currently ongoing simple ops have completed.
  * Caller must own sem_perm.lock.
@@ -292,7 +282,6 @@ static void sem_wait_array(struct sem_array *sma)
 		sem = sma->sem_base + i;
 		spin_unlock_wait(&sem->lock);
 	}
-	ipc_smp_acquire__after_spin_is_unlocked();
 }
 
 /*
@@ -350,7 +339,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 			 *	complex_count++;
 			 *	spin_unlock(sem_perm.lock);
 			 */
-			ipc_smp_acquire__after_spin_is_unlocked();
+			smp_acquire__after_ctrl_dep();
 
 			/*
 			 * Now repeat the test of complex_count:
diff --git a/ipc/shm.c b/ipc/shm.c
index 13282510bc0d..dbac8860c721 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -476,13 +476,15 @@ static const struct file_operations shm_file_operations = {
 	.mmap		= shm_mmap,
 	.fsync		= shm_fsync,
 	.release	= shm_release,
-#ifndef CONFIG_MMU
 	.get_unmapped_area	= shm_get_unmapped_area,
-#endif
 	.llseek		= noop_llseek,
 	.fallocate	= shm_fallocate,
 };
 
+/*
+ * shm_file_operations_huge is now identical to shm_file_operations,
+ * but we keep it distinct for the sake of is_file_shm_hugepages().
+ */
 static const struct file_operations shm_file_operations_huge = {
 	.mmap		= shm_mmap,
 	.fsync		= shm_fsync,
@@ -764,10 +766,10 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
 	} else {
 #ifdef CONFIG_SHMEM
 		struct shmem_inode_info *info = SHMEM_I(inode);
-		spin_lock(&info->lock);
+		spin_lock_irq(&info->lock);
 		*rss_add += inode->i_mapping->nrpages;
 		*swp_add += info->swapped;
-		spin_unlock(&info->lock);
+		spin_unlock_irq(&info->lock);
 #else
 		*rss_add += inode->i_mapping->nrpages;
 #endif
diff --git a/kernel/audit.c b/kernel/audit.c
index 22bb4f24f071..8d528f9930da 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1883,6 +1883,23 @@ out_null:
 	audit_log_format(ab, " exe=(null)");
 }
 
+struct tty_struct *audit_get_tty(struct task_struct *tsk)
+{
+	struct tty_struct *tty = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tsk->sighand->siglock, flags);
+	if (tsk->signal)
+		tty = tty_kref_get(tsk->signal->tty);
+	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
+	return tty;
+}
+
+void audit_put_tty(struct tty_struct *tty)
+{
+	tty_kref_put(tty);
+}
+
 void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
 {
 	const struct cred *cred;
diff --git a/kernel/audit.h b/kernel/audit.h
index cbbe6bb6496e..a492f4c4e710 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -23,6 +23,7 @@
 #include <linux/audit.h>
 #include <linux/skbuff.h>
 #include <uapi/linux/mqueue.h>
+#include <linux/tty.h>
 
 /* AUDIT_NAMES is the number of slots we reserve in the audit_context
  * for saving names from getname().  If we get more names we will allocate
@@ -262,6 +263,9 @@ extern struct audit_entry *audit_dupe_rule(struct audit_krule *old);
 extern void audit_log_d_path_exe(struct audit_buffer *ab,
 				 struct mm_struct *mm);
 
+extern struct tty_struct *audit_get_tty(struct task_struct *tsk);
+extern void audit_put_tty(struct tty_struct *tty);
+
 /* audit watch functions */
 #ifdef CONFIG_AUDIT_WATCH
 extern void audit_put_watch(struct audit_watch *watch);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 62ab53d7619c..2672d105cffc 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -63,7 +63,6 @@
 #include <asm/unistd.h>
 #include <linux/security.h>
 #include <linux/list.h>
-#include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/highmem.h>
 #include <linux/syscalls.h>
@@ -1985,14 +1984,15 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
 	if (!audit_enabled)
 		return;
 
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+	if (!ab)
+		return;
+
 	uid = from_kuid(&init_user_ns, task_uid(current));
 	oldloginuid = from_kuid(&init_user_ns, koldloginuid);
 	loginuid = from_kuid(&init_user_ns, kloginuid),
 	tty = audit_get_tty(current);
 
-	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
-	if (!ab)
-		return;
 	audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid);
 	audit_log_task_context(ab);
 	audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 76d5a794e426..633a650d7aeb 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -328,8 +328,8 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
 }
 
 /* only called from syscall */
-static int fd_array_map_update_elem(struct bpf_map *map, void *key,
-				    void *value, u64 map_flags)
+int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
+				 void *key, void *value, u64 map_flags)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	void *new_ptr, *old_ptr;
@@ -342,7 +342,7 @@ static int fd_array_map_update_elem(struct bpf_map *map, void *key,
 		return -E2BIG;
 
 	ufd = *(u32 *)value;
-	new_ptr = map->ops->map_fd_get_ptr(map, ufd);
+	new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
 	if (IS_ERR(new_ptr))
 		return PTR_ERR(new_ptr);
 
@@ -371,10 +371,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
 	}
 }
 
-static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
+static void *prog_fd_array_get_ptr(struct bpf_map *map,
+				   struct file *map_file, int fd)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct bpf_prog *prog = bpf_prog_get(fd);
+
 	if (IS_ERR(prog))
 		return prog;
 
@@ -382,14 +384,13 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
 		bpf_prog_put(prog);
 		return ERR_PTR(-EINVAL);
 	}
+
 	return prog;
 }
 
 static void prog_fd_array_put_ptr(void *ptr)
 {
-	struct bpf_prog *prog = ptr;
-
-	bpf_prog_put_rcu(prog);
+	bpf_prog_put(ptr);
 }
 
 /* decrement refcnt of all bpf_progs that are stored in this map */
@@ -407,7 +408,6 @@ static const struct bpf_map_ops prog_array_ops = {
 	.map_free = fd_array_map_free,
 	.map_get_next_key = array_map_get_next_key,
 	.map_lookup_elem = fd_array_map_lookup_elem,
-	.map_update_elem = fd_array_map_update_elem,
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = prog_fd_array_get_ptr,
 	.map_fd_put_ptr = prog_fd_array_put_ptr,
@@ -425,59 +425,105 @@ static int __init register_prog_array_map(void)
 }
 late_initcall(register_prog_array_map);
 
-static void perf_event_array_map_free(struct bpf_map *map)
+static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
+						   struct file *map_file)
 {
-	bpf_fd_array_map_clear(map);
-	fd_array_map_free(map);
+	struct bpf_event_entry *ee;
+
+	ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
+	if (ee) {
+		ee->event = perf_file->private_data;
+		ee->perf_file = perf_file;
+		ee->map_file = map_file;
+	}
+
+	return ee;
 }
 
-static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
+static void __bpf_event_entry_free(struct rcu_head *rcu)
 {
-	struct perf_event *event;
-	const struct perf_event_attr *attr;
-	struct file *file;
+	struct bpf_event_entry *ee;
 
-	file = perf_event_get(fd);
-	if (IS_ERR(file))
-		return file;
+	ee = container_of(rcu, struct bpf_event_entry, rcu);
+	fput(ee->perf_file);
+	kfree(ee);
+}
 
-	event = file->private_data;
+static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
+{
+	call_rcu(&ee->rcu, __bpf_event_entry_free);
+}
 
-	attr = perf_event_attrs(event);
-	if (IS_ERR(attr))
-		goto err;
+static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
+					 struct file *map_file, int fd)
+{
+	const struct perf_event_attr *attr;
+	struct bpf_event_entry *ee;
+	struct perf_event *event;
+	struct file *perf_file;
 
-	if (attr->inherit)
-		goto err;
+	perf_file = perf_event_get(fd);
+	if (IS_ERR(perf_file))
+		return perf_file;
 
-	if (attr->type == PERF_TYPE_RAW)
-		return file;
+	event = perf_file->private_data;
+	ee = ERR_PTR(-EINVAL);
 
-	if (attr->type == PERF_TYPE_HARDWARE)
-		return file;
+	attr = perf_event_attrs(event);
+	if (IS_ERR(attr) || attr->inherit)
+		goto err_out;
+
+	switch (attr->type) {
+	case PERF_TYPE_SOFTWARE:
+		if (attr->config != PERF_COUNT_SW_BPF_OUTPUT)
+			goto err_out;
+		/* fall-through */
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+		ee = bpf_event_entry_gen(perf_file, map_file);
+		if (ee)
+			return ee;
+		ee = ERR_PTR(-ENOMEM);
+		/* fall-through */
+	default:
+		break;
+	}
 
-	if (attr->type == PERF_TYPE_SOFTWARE &&
-	    attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-		return file;
-err:
-	fput(file);
-	return ERR_PTR(-EINVAL);
+err_out:
+	fput(perf_file);
+	return ee;
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-	fput((struct file *)ptr);
+	bpf_event_entry_free_rcu(ptr);
+}
+
+static void perf_event_fd_array_release(struct bpf_map *map,
+					struct file *map_file)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct bpf_event_entry *ee;
+	int i;
+
+	rcu_read_lock();
+	for (i = 0; i < array->map.max_entries; i++) {
+		ee = READ_ONCE(array->ptrs[i]);
+		if (ee && ee->map_file == map_file)
+			fd_array_map_delete_elem(map, &i);
+	}
+	rcu_read_unlock();
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
 	.map_alloc = fd_array_map_alloc,
-	.map_free = perf_event_array_map_free,
+	.map_free = fd_array_map_free,
 	.map_get_next_key = array_map_get_next_key,
 	.map_lookup_elem = fd_array_map_lookup_elem,
-	.map_update_elem = fd_array_map_update_elem,
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
+	.map_release = perf_event_fd_array_release,
 };
 
 static struct bpf_map_type_list perf_event_array_type __read_mostly = {
@@ -491,3 +537,46 @@ static int __init register_perf_event_array_map(void)
 	return 0;
 }
 late_initcall(register_perf_event_array_map);
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
+				     struct file *map_file /* not used */,
+				     int fd)
+{
+	return cgroup_get_from_fd(fd);
+}
+
+static void cgroup_fd_array_put_ptr(void *ptr)
+{
+	/* cgroup_put free cgrp after a rcu grace period */
+	cgroup_put(ptr);
+}
+
+static void cgroup_fd_array_free(struct bpf_map *map)
+{
+	bpf_fd_array_map_clear(map);
+	fd_array_map_free(map);
+}
+
+static const struct bpf_map_ops cgroup_array_ops = {
+	.map_alloc = fd_array_map_alloc,
+	.map_free = cgroup_fd_array_free,
+	.map_get_next_key = array_map_get_next_key,
+	.map_lookup_elem = fd_array_map_lookup_elem,
+	.map_delete_elem = fd_array_map_delete_elem,
+	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
+	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
+};
+
+static struct bpf_map_type_list cgroup_array_type __read_mostly = {
+	.ops = &cgroup_array_ops,
+	.type = BPF_MAP_TYPE_CGROUP_ARRAY,
+};
+
+static int __init register_cgroup_array_map(void)
+{
+	bpf_register_map_type(&cgroup_array_type);
+	return 0;
+}
+late_initcall(register_cgroup_array_map);
+#endif
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b94a36550591..03fd23d4d587 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -719,14 +719,13 @@ select_insn:
 
 		if (unlikely(index >= array->map.max_entries))
 			goto out;
-
 		if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
 			goto out;
 
 		tail_call_cnt++;
 
 		prog = READ_ONCE(array->ptrs[index]);
-		if (unlikely(!prog))
+		if (!prog)
 			goto out;
 
 		/* ARG1 at this point is guaranteed to point to CTX from
@@ -1055,9 +1054,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 	return NULL;
 }
 
-const struct bpf_func_proto * __weak bpf_get_event_output_proto(void)
+u64 __weak
+bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+		 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
 {
-	return NULL;
+	return -ENOTSUPP;
 }
 
 /* Always built-in helper functions. */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index ad7a0573f71b..1ea3afba1a4f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -101,7 +101,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 
 static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
-	return raw_smp_processor_id();
+	return smp_processor_id();
 }
 
 const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 318858edb1cd..5967b870a895 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -11,7 +11,7 @@
  * version 2 as published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/magic.h>
 #include <linux/major.h>
 #include <linux/mount.h>
@@ -367,8 +367,6 @@ static struct file_system_type bpf_fs_type = {
 	.kill_sb	= kill_litter_super,
 };
 
-MODULE_ALIAS_FS("bpf");
-
 static int __init bpf_init(void)
 {
 	int ret;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 080a2dfb5800..bf4495fcd25d 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	if (err)
 		goto free_smap;
 
-	err = get_callchain_buffers();
+	err = get_callchain_buffers(sysctl_perf_event_max_stack);
 	if (err)
 		goto free_smap;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 46ecce4b79ed..228f962447a5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -124,7 +124,12 @@ void bpf_map_put_with_uref(struct bpf_map *map)
 
 static int bpf_map_release(struct inode *inode, struct file *filp)
 {
-	bpf_map_put_with_uref(filp->private_data);
+	struct bpf_map *map = filp->private_data;
+
+	if (map->ops->map_release)
+		map->ops->map_release(map, filp);
+
+	bpf_map_put_with_uref(map);
 	return 0;
 }
 
@@ -387,6 +392,13 @@ static int map_update_elem(union bpf_attr *attr)
 		err = bpf_percpu_hash_update(map, key, value, attr->flags);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 		err = bpf_percpu_array_update(map, key, value, attr->flags);
+	} else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
+		   map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+		   map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
+		rcu_read_lock();
+		err = bpf_fd_array_map_update_elem(map, f.file, key, value,
+						   attr->flags);
+		rcu_read_unlock();
 	} else {
 		rcu_read_lock();
 		err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -612,7 +624,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 	free_uid(user);
 }
 
-static void __prog_put_common(struct rcu_head *rcu)
+static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 {
 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 
@@ -621,17 +633,10 @@ static void __prog_put_common(struct rcu_head *rcu)
 	bpf_prog_free(aux->prog);
 }
 
-/* version of bpf_prog_put() that is called after a grace period */
-void bpf_prog_put_rcu(struct bpf_prog *prog)
-{
-	if (atomic_dec_and_test(&prog->aux->refcnt))
-		call_rcu(&prog->aux->rcu, __prog_put_common);
-}
-
 void bpf_prog_put(struct bpf_prog *prog)
 {
 	if (atomic_dec_and_test(&prog->aux->refcnt))
-		__prog_put_common(&prog->aux->rcu);
+		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_put);
 
@@ -639,7 +644,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
 {
 	struct bpf_prog *prog = filp->private_data;
 
-	bpf_prog_put_rcu(prog);
+	bpf_prog_put(prog);
 	return 0;
 }
 
@@ -653,7 +658,7 @@ int bpf_prog_new_fd(struct bpf_prog *prog)
 				O_RDWR | O_CLOEXEC);
 }
 
-static struct bpf_prog *__bpf_prog_get(struct fd f)
+static struct bpf_prog *____bpf_prog_get(struct fd f)
 {
 	if (!f.file)
 		return ERR_PTR(-EBADF);
@@ -665,33 +670,50 @@ static struct bpf_prog *__bpf_prog_get(struct fd f)
 	return f.file->private_data;
 }
 
-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 {
-	if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
-		atomic_dec(&prog->aux->refcnt);
+	if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
+		atomic_sub(i, &prog->aux->refcnt);
 		return ERR_PTR(-EBUSY);
 	}
 	return prog;
 }
+EXPORT_SYMBOL_GPL(bpf_prog_add);
 
-/* called by sockets/tracing/seccomp before attaching program to an event
- * pairs with bpf_prog_put()
- */
-struct bpf_prog *bpf_prog_get(u32 ufd)
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+	return bpf_prog_add(prog, 1);
+}
+
+static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 {
 	struct fd f = fdget(ufd);
 	struct bpf_prog *prog;
 
-	prog = __bpf_prog_get(f);
+	prog = ____bpf_prog_get(f);
 	if (IS_ERR(prog))
 		return prog;
+	if (type && prog->type != *type) {
+		prog = ERR_PTR(-EINVAL);
+		goto out;
+	}
 
 	prog = bpf_prog_inc(prog);
+out:
 	fdput(f);
-
 	return prog;
 }
-EXPORT_SYMBOL_GPL(bpf_prog_get);
+
+struct bpf_prog *bpf_prog_get(u32 ufd)
+{
+	return __bpf_prog_get(ufd, NULL);
+}
+
+struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
+{
+	return __bpf_prog_get(ufd, &type);
+}
+EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 
 /* last field in 'union bpf_attr' used by this command */
 #define	BPF_PROG_LOAD_LAST_FIELD kern_version
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 668e07903c8f..f72f23b8fdab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -126,31 +126,6 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
-/* types of values stored in eBPF registers */
-enum bpf_reg_type {
-	NOT_INIT = 0,		 /* nothing was written into register */
-	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
-	PTR_TO_CTX,		 /* reg points to bpf_context */
-	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
-	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
-	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
-	FRAME_PTR,		 /* reg == frame_pointer */
-	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
-	CONST_IMM,		 /* constant integer value */
-
-	/* PTR_TO_PACKET represents:
-	 * skb->data
-	 * skb->data + imm
-	 * skb->data + (u16) var
-	 * skb->data + (u16) var + imm
-	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
-	 * if (id > 0) means that some 'var' was added
-	 * if (off > 0) menas that 'imm' was added
-	 */
-	PTR_TO_PACKET,
-	PTR_TO_PACKET_END,	 /* skb->data + headlen */
-};
-
 struct reg_state {
 	enum bpf_reg_type type;
 	union {
@@ -678,6 +653,16 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
 
 #define MAX_PACKET_OFF 0xffff
 
+static bool may_write_pkt_data(enum bpf_prog_type type)
+{
+	switch (type) {
+	case BPF_PROG_TYPE_XDP:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static int check_packet_access(struct verifier_env *env, u32 regno, int off,
 			       int size)
 {
@@ -695,10 +680,10 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off,
 
 /* check access to 'struct bpf_context' fields */
 static int check_ctx_access(struct verifier_env *env, int off, int size,
-			    enum bpf_access_type t)
+			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
 	if (env->prog->aux->ops->is_valid_access &&
-	    env->prog->aux->ops->is_valid_access(off, size, t)) {
+	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
 		/* remember the offset of last byte accessed in ctx */
 		if (env->prog->aux->max_ctx_offset < off + size)
 			env->prog->aux->max_ctx_offset = off + size;
@@ -738,6 +723,7 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
 	switch (env->prog->type) {
 	case BPF_PROG_TYPE_SCHED_CLS:
 	case BPF_PROG_TYPE_SCHED_ACT:
+	case BPF_PROG_TYPE_XDP:
 		break;
 	default:
 		verbose("verifier is misconfigured\n");
@@ -798,21 +784,19 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 			mark_reg_unknown_value(state->regs, value_regno);
 
 	} else if (reg->type == PTR_TO_CTX) {
+		enum bpf_reg_type reg_type = UNKNOWN_VALUE;
+
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into ctx\n", value_regno);
 			return -EACCES;
 		}
-		err = check_ctx_access(env, off, size, t);
+		err = check_ctx_access(env, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
 			mark_reg_unknown_value(state->regs, value_regno);
-			if (off == offsetof(struct __sk_buff, data) &&
-			    env->allow_ptr_leaks)
+			if (env->allow_ptr_leaks)
 				/* note that reg.[id|off|range] == 0 */
-				state->regs[value_regno].type = PTR_TO_PACKET;
-			else if (off == offsetof(struct __sk_buff, data_end) &&
-				 env->allow_ptr_leaks)
-				state->regs[value_regno].type = PTR_TO_PACKET_END;
+				state->regs[value_regno].type = reg_type;
 		}
 
 	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -832,10 +816,15 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 			err = check_stack_read(state, off, size, value_regno);
 		}
 	} else if (state->regs[regno].type == PTR_TO_PACKET) {
-		if (t == BPF_WRITE) {
+		if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) {
 			verbose("cannot write into packet\n");
 			return -EACCES;
 		}
+		if (t == BPF_WRITE && value_regno >= 0 &&
+		    is_pointer_value(env, value_regno)) {
+			verbose("R%d leaks addr into packet\n", value_regno);
+			return -EACCES;
+		}
 		err = check_packet_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown_value(state->regs, value_regno);
@@ -1062,6 +1051,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (func_id != BPF_FUNC_get_stackid)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_CGROUP_ARRAY:
+		if (func_id != BPF_FUNC_skb_in_cgroup)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -1081,6 +1074,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
+	case BPF_FUNC_skb_in_cgroup:
+		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86cb5c6e8932..9624db80dc4e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -61,7 +61,7 @@
 #include <linux/cpuset.h>
 #include <linux/proc_ns.h>
 #include <linux/nsproxy.h>
-#include <linux/proc_ns.h>
+#include <linux/file.h>
 #include <net/sock.h>
 
 /*
@@ -837,6 +837,8 @@ static void put_css_set_locked(struct css_set *cset)
 
 static void put_css_set(struct css_set *cset)
 {
+	unsigned long flags;
+
 	/*
 	 * Ensure that the refcount doesn't hit zero while any readers
 	 * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -845,9 +847,9 @@ static void put_css_set(struct css_set *cset)
 	if (atomic_add_unless(&cset->refcount, -1, 1))
 		return;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irqsave(&css_set_lock, flags);
 	put_css_set_locked(cset);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irqrestore(&css_set_lock, flags);
 }
 
 /*
@@ -1070,11 +1072,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
 	/* First see if we already have a cgroup group that matches
 	 * the desired set */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	cset = find_existing_css_set(old_cset, cgrp, template);
 	if (cset)
 		get_css_set(cset);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (cset)
 		return cset;
@@ -1102,7 +1104,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	 * find_existing_css_set() */
 	memcpy(cset->subsys, template, sizeof(cset->subsys));
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	/* Add reference counts and links from the new css_set. */
 	list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
 		struct cgroup *c = link->cgrp;
@@ -1128,7 +1130,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 		css_get(css);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return cset;
 }
@@ -1158,18 +1160,12 @@ static void cgroup_exit_root_id(struct cgroup_root *root)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
-	if (root->hierarchy_id) {
-		idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
-		root->hierarchy_id = 0;
-	}
+	idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
 }
 
 static void cgroup_free_root(struct cgroup_root *root)
 {
 	if (root) {
-		/* hierarchy ID should already have been released */
-		WARN_ON_ONCE(root->hierarchy_id);
-
 		idr_destroy(&root->cgroup_idr);
 		kfree(root);
 	}
@@ -1192,7 +1188,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	 * Release all the links from cset_links to this hierarchy's
 	 * root cgroup
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
 		list_del(&link->cset_link);
@@ -1200,7 +1196,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 		kfree(link);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (!list_empty(&root->root_list)) {
 		list_del(&root->root_list);
@@ -1600,11 +1596,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
 		ss->root = dst_root;
 		css->cgroup = dcgrp;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		hash_for_each(css_set_table, i, cset, hlist)
 			list_move_tail(&cset->e_cset_node[ss->id],
 				       &dcgrp->e_csets[ss->id]);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		/* default hierarchy doesn't enable controllers by default */
 		dst_root->subsys_mask |= 1 << ssid;
@@ -1640,10 +1636,10 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
 	if (!buf)
 		return -ENOMEM;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
 	len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (len >= PATH_MAX)
 		len = -ERANGE;
@@ -1897,7 +1893,7 @@ static void cgroup_enable_task_cg_lists(void)
 {
 	struct task_struct *p, *g;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	if (use_task_css_set_links)
 		goto out_unlock;
@@ -1922,8 +1918,12 @@ static void cgroup_enable_task_cg_lists(void)
 		 * entry won't be deleted though the process has exited.
 		 * Do it while holding siglock so that we don't end up
 		 * racing against cgroup_exit().
+		 *
+		 * Interrupts were already disabled while acquiring
+		 * the css_set_lock, so we do not need to disable it
+		 * again when acquiring the sighand->siglock here.
 		 */
-		spin_lock_irq(&p->sighand->siglock);
+		spin_lock(&p->sighand->siglock);
 		if (!(p->flags & PF_EXITING)) {
 			struct css_set *cset = task_css_set(p);
 
@@ -1932,11 +1932,11 @@ static void cgroup_enable_task_cg_lists(void)
 			list_add_tail(&p->cg_list, &cset->tasks);
 			get_css_set(cset);
 		}
-		spin_unlock_irq(&p->sighand->siglock);
+		spin_unlock(&p->sighand->siglock);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 out_unlock:
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -2043,13 +2043,13 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 	 * Link the root cgroup in this hierarchy into all the css_set
 	 * objects.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	hash_for_each(css_set_table, i, cset, hlist) {
 		link_css_set(&tmp_links, cset, root_cgrp);
 		if (css_set_populated(cset))
 			cgroup_update_populated(root_cgrp, true);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	BUG_ON(!list_empty(&root_cgrp->self.children));
 	BUG_ON(atomic_read(&root->nr_cgrps) != 1);
@@ -2256,11 +2256,11 @@ out_mount:
 		struct cgroup *cgrp;
 
 		mutex_lock(&cgroup_mutex);
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 
 		cgrp = cset_cgroup_from_root(ns->root_cset, root);
 
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 		mutex_unlock(&cgroup_mutex);
 
 		nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
@@ -2337,11 +2337,11 @@ char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
 	char *ret;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 
 	return ret;
@@ -2369,7 +2369,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 	char *path = NULL;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
 
@@ -2382,7 +2382,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 			path = buf;
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 	return path;
 }
@@ -2557,7 +2557,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 	 * the new cgroup.  There are no failure cases after here, so this
 	 * is the commit point.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(cset, &tset->src_csets, mg_node) {
 		list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
 			struct css_set *from_cset = task_css_set(task);
@@ -2568,7 +2568,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 			put_css_set_locked(from_cset);
 		}
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/*
 	 * Migration is committed, all target tasks are now on dst_csets.
@@ -2597,13 +2597,13 @@ out_cancel_attach:
 		}
 	} while_each_subsys_mask();
 out_release_tset:
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_splice_init(&tset->dst_csets, &tset->src_csets);
 	list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
 		list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
 		list_del_init(&cset->mg_node);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return ret;
 }
 
@@ -2634,7 +2634,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 
 	lockdep_assert_held(&cgroup_mutex);
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		cset->mg_src_cgrp = NULL;
 		cset->mg_dst_cgrp = NULL;
@@ -2642,7 +2642,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 		list_del_init(&cset->mg_preload_node);
 		put_css_set_locked(cset);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 /**
@@ -2783,7 +2783,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 	 * already PF_EXITING could be freed from underneath us unless we
 	 * take an rcu_read_lock.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	task = leader;
 	do {
@@ -2792,7 +2792,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 			break;
 	} while_each_thread(leader, task);
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return cgroup_taskset_migrate(&tset, root);
 }
@@ -2816,7 +2816,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 		return -EBUSY;
 
 	/* look up all src csets */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	task = leader;
 	do {
@@ -2826,7 +2826,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 			break;
 	} while_each_thread(leader, task);
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* prepare dst csets and commit */
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
@@ -2859,9 +2859,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 		struct cgroup *cgrp;
 		struct inode *inode;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		while (!cgroup_is_descendant(dst_cgrp, cgrp))
 			cgrp = cgroup_parent(cgrp);
@@ -2962,9 +2962,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 		if (root == &cgrp_dfl_root)
 			continue;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		from_cgrp = task_cgroup_from_root(from, root);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		retval = cgroup_attach_task(from_cgrp, tsk, false);
 		if (retval)
@@ -3080,7 +3080,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 	percpu_down_write(&cgroup_threadgroup_rwsem);
 
 	/* look up all csses currently attached to @cgrp's subtree */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
 		struct cgrp_cset_link *link;
 
@@ -3088,14 +3088,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 			cgroup_migrate_add_src(link->cset, dsct,
 					       &preloaded_csets);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* NULL dst indicates self on default hierarchy */
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (ret)
 		goto out_finish;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
 		struct task_struct *task, *ntask;
 
@@ -3107,7 +3107,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 		list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
 			cgroup_taskset_add(task, &tset);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	ret = cgroup_taskset_migrate(&tset, cgrp->root);
 out_finish:
@@ -3908,10 +3908,10 @@ static int cgroup_task_count(const struct cgroup *cgrp)
 	int count = 0;
 	struct cgrp_cset_link *link;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
 		count += atomic_read(&link->cset->refcount);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return count;
 }
 
@@ -4249,7 +4249,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	memset(it, 0, sizeof(*it));
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	it->ss = css->ss;
 
@@ -4262,7 +4262,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	css_task_iter_advance_css_set(it);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 /**
@@ -4280,7 +4280,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 		it->cur_task = NULL;
 	}
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	if (it->task_pos) {
 		it->cur_task = list_entry(it->task_pos, struct task_struct,
@@ -4289,7 +4289,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 		css_task_iter_advance(it);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return it->cur_task;
 }
@@ -4303,10 +4303,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 void css_task_iter_end(struct css_task_iter *it)
 {
 	if (it->cur_cset) {
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		list_del(&it->iters_node);
 		put_css_set_locked(it->cur_cset);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	}
 
 	if (it->cur_task)
@@ -4338,10 +4338,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 	mutex_lock(&cgroup_mutex);
 
 	/* all tasks in @from are being moved, all csets are source */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &from->cset_links, cset_link)
 		cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (ret)
@@ -5063,6 +5063,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	memset(css, 0, sizeof(*css));
 	css->cgroup = cgrp;
 	css->ss = ss;
+	css->id = -1;
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
 	css->serial_nr = css_serial_nr_next++;
@@ -5139,6 +5140,8 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 	lockdep_assert_held(&cgroup_mutex);
 
 	css = ss->css_alloc(parent_css);
+	if (!css)
+		css = ERR_PTR(-ENOMEM);
 	if (IS_ERR(css))
 		return css;
 
@@ -5150,7 +5153,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
 	err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
 	if (err < 0)
-		goto err_free_percpu_ref;
+		goto err_free_css;
 	css->id = err;
 
 	/* @css is ready to be brought online now, make it visible */
@@ -5174,9 +5177,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
 err_list_del:
 	list_del_rcu(&css->sibling);
-	cgroup_idr_remove(&ss->css_idr, css->id);
-err_free_percpu_ref:
-	percpu_ref_exit(&css->refcnt);
 err_free_css:
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 	return ERR_PTR(err);
@@ -5451,10 +5451,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	cgrp->self.flags &= ~CSS_ONLINE;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
 		link->cset->dead = true;
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
@@ -5725,7 +5725,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		goto out;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	for_each_root(root) {
 		struct cgroup_subsys *ss;
@@ -5778,7 +5778,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 
 	retval = 0;
 out_unlock:
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 	kfree(buf);
 out:
@@ -5923,13 +5923,13 @@ void cgroup_post_fork(struct task_struct *child)
 	if (use_task_css_set_links) {
 		struct css_set *cset;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		cset = task_css_set(current);
 		if (list_empty(&child->cg_list)) {
 			get_css_set(cset);
 			css_set_move_task(child, NULL, cset, false);
 		}
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	}
 
 	/*
@@ -5974,9 +5974,9 @@ void cgroup_exit(struct task_struct *tsk)
 	cset = task_css_set(tsk);
 
 	if (!list_empty(&tsk->cg_list)) {
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	} else {
 		get_css_set(cset);
 	}
@@ -6044,9 +6044,9 @@ static void cgroup_release_agent(struct work_struct *work)
 	if (!pathbuf || !agentbuf)
 		goto out;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	if (!path)
 		goto out;
 
@@ -6168,7 +6168,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
-	return id > 0 ? idr_find(&ss->css_idr, id) : NULL;
+	return idr_find(&ss->css_idr, id);
 }
 
 /**
@@ -6205,6 +6205,40 @@ struct cgroup *cgroup_get_from_path(const char *path)
 }
 EXPORT_SYMBOL_GPL(cgroup_get_from_path);
 
+/**
+ * cgroup_get_from_fd - get a cgroup pointer from a fd
+ * @fd: fd obtained by open(cgroup2_dir)
+ *
+ * Find the cgroup from a fd which should be obtained
+ * by opening a cgroup directory.  Returns a pointer to the
+ * cgroup on success. ERR_PTR is returned if the cgroup
+ * cannot be found.
+ */
+struct cgroup *cgroup_get_from_fd(int fd)
+{
+	struct cgroup_subsys_state *css;
+	struct cgroup *cgrp;
+	struct file *f;
+
+	f = fget_raw(fd);
+	if (!f)
+		return ERR_PTR(-EBADF);
+
+	css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
+	fput(f);
+	if (IS_ERR(css))
+		return ERR_CAST(css);
+
+	cgrp = css->cgroup;
+	if (!cgroup_on_dfl(cgrp)) {
+		cgroup_put(cgrp);
+		return ERR_PTR(-EBADF);
+	}
+
+	return cgrp;
+}
+EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
+
 /*
  * sock->sk_cgrp_data handling.  For more info, see sock_cgroup_data
  * definition in cgroup-defs.h.
@@ -6306,12 +6340,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 		return ERR_PTR(-EPERM);
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	cset = task_css_set(current);
 	get_css_set(cset);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 
 	new_ns = alloc_cgroup_ns();
@@ -6435,7 +6469,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 	if (!name_buf)
 		return -ENOMEM;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	cset = rcu_dereference(current->cgroups);
 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
@@ -6446,7 +6480,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 			   c->root->hierarchy_id, name_buf);
 	}
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	kfree(name_buf);
 	return 0;
 }
@@ -6457,7 +6491,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 	struct cgroup_subsys_state *css = seq_css(seq);
 	struct cgrp_cset_link *link;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
 		struct css_set *cset = link->cset;
 		struct task_struct *task;
@@ -6480,7 +6514,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 	overflow:
 		seq_puts(seq, "  ...\n");
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return 0;
 }
 
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index 303097b37429..2bd673783f1a 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -49,6 +49,12 @@ struct pids_cgroup {
 	 */
 	atomic64_t			counter;
 	int64_t				limit;
+
+	/* Handle for "pids.events" */
+	struct cgroup_file		events_file;
+
+	/* Number of times fork failed because limit was hit. */
+	atomic64_t			events_limit;
 };
 
 static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
@@ -72,6 +78,7 @@ pids_css_alloc(struct cgroup_subsys_state *parent)
 
 	pids->limit = PIDS_MAX;
 	atomic64_set(&pids->counter, 0);
+	atomic64_set(&pids->events_limit, 0);
 	return &pids->css;
 }
 
@@ -213,10 +220,21 @@ static int pids_can_fork(struct task_struct *task)
 {
 	struct cgroup_subsys_state *css;
 	struct pids_cgroup *pids;
+	int err;
 
 	css = task_css_check(current, pids_cgrp_id, true);
 	pids = css_pids(css);
-	return pids_try_charge(pids, 1);
+	err = pids_try_charge(pids, 1);
+	if (err) {
+		/* Only log the first time events_limit is incremented. */
+		if (atomic64_inc_return(&pids->events_limit) == 1) {
+			pr_info("cgroup: fork rejected by pids controller in ");
+			pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+			pr_cont("\n");
+		}
+		cgroup_file_notify(&pids->events_file);
+	}
+	return err;
 }
 
 static void pids_cancel_fork(struct task_struct *task)
@@ -288,6 +306,14 @@ static s64 pids_current_read(struct cgroup_subsys_state *css,
 	return atomic64_read(&pids->counter);
 }
 
+static int pids_events_show(struct seq_file *sf, void *v)
+{
+	struct pids_cgroup *pids = css_pids(seq_css(sf));
+
+	seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events_limit));
+	return 0;
+}
+
 static struct cftype pids_files[] = {
 	{
 		.name = "max",
@@ -300,6 +326,12 @@ static struct cftype pids_files[] = {
 		.read_s64 = pids_current_read,
 		.flags = CFTYPE_NOT_ON_ROOT,
 	},
+	{
+		.name = "events",
+		.seq_show = pids_events_show,
+		.file_offset = offsetof(struct pids_cgroup, events_file),
+		.flags = CFTYPE_NOT_ON_ROOT,
+	},
 	{ }	/* terminate */
 };
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d948e44c471e..7b61887f7ccd 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1201,6 +1201,8 @@ static struct cpuhp_step cpuhp_bp_states[] = {
 		.teardown		= takedown_cpu,
 		.cant_stop		= true,
 	},
+#else
+	[CPUHP_BRINGUP_CPU] = { },
 #endif
 };
 
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 179ef4640964..e9fdb5203de5 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -104,7 +104,7 @@ fail:
 	return -ENOMEM;
 }
 
-int get_callchain_buffers(void)
+int get_callchain_buffers(int event_max_stack)
 {
 	int err = 0;
 	int count;
@@ -121,6 +121,15 @@ int get_callchain_buffers(void)
 		/* If the allocation failed, give up */
 		if (!callchain_cpus_entries)
 			err = -ENOMEM;
+		/*
+		 * If requesting per event more than the global cap,
+		 * return a different error to help userspace figure
+		 * this out.
+		 *
+		 * And also do it here so that we have &callchain_mutex held.
+		 */
+		if (event_max_stack > sysctl_perf_event_max_stack)
+			err = -EOVERFLOW;
 		goto exit;
 	}
 
@@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
 	bool user   = !event->attr.exclude_callchain_user;
 	/* Disallow cross-task user callchains. */
 	bool crosstask = event->ctx->task && event->ctx->task != current;
+	const u32 max_stack = event->attr.sample_max_stack;
 
 	if (!kernel && !user)
 		return NULL;
 
-	return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
+	return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
 }
 
 struct perf_callchain_entry *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9c51ec3f0f44..09ae27b353c1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -335,6 +335,7 @@ static atomic_t perf_sched_count;
 
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
+static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -396,6 +397,13 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
 	if (ret || !write)
 		return ret;
 
+	/*
+	 * If throttling is disabled don't allow the write:
+	 */
+	if (sysctl_perf_cpu_time_max_percent == 100 ||
+	    sysctl_perf_cpu_time_max_percent == 0)
+		return -EINVAL;
+
 	max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
 	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
 	update_perf_cpu_limits();
@@ -1678,12 +1686,33 @@ static bool is_orphaned_event(struct perf_event *event)
 	return event->state == PERF_EVENT_STATE_DEAD;
 }
 
-static inline int pmu_filter_match(struct perf_event *event)
+static inline int __pmu_filter_match(struct perf_event *event)
 {
 	struct pmu *pmu = event->pmu;
 	return pmu->filter_match ? pmu->filter_match(event) : 1;
 }
 
+/*
+ * Check whether we should attempt to schedule an event group based on
+ * PMU-specific filtering. An event group can consist of HW and SW events,
+ * potentially with a SW leader, so we must check all the filters, to
+ * determine whether a group is schedulable:
+ */
+static inline int pmu_filter_match(struct perf_event *event)
+{
+	struct perf_event *child;
+
+	if (!__pmu_filter_match(event))
+		return 0;
+
+	list_for_each_entry(child, &event->sibling_list, group_entry) {
+		if (!__pmu_filter_match(child))
+			return 0;
+	}
+
+	return 1;
+}
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
@@ -3665,6 +3694,39 @@ static void free_event_rcu(struct rcu_head *head)
 static void ring_buffer_attach(struct perf_event *event,
 			       struct ring_buffer *rb);
 
+static void detach_sb_event(struct perf_event *event)
+{
+	struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+	raw_spin_lock(&pel->lock);
+	list_del_rcu(&event->sb_list);
+	raw_spin_unlock(&pel->lock);
+}
+
+static bool is_sb_event(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+
+	if (event->parent)
+		return false;
+
+	if (event->attach_state & PERF_ATTACH_TASK)
+		return false;
+
+	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
+	    attr->comm || attr->comm_exec ||
+	    attr->task ||
+	    attr->context_switch)
+		return true;
+	return false;
+}
+
+static void unaccount_pmu_sb_event(struct perf_event *event)
+{
+	if (is_sb_event(event))
+		detach_sb_event(event);
+}
+
 static void unaccount_event_cpu(struct perf_event *event, int cpu)
 {
 	if (event->parent)
@@ -3728,6 +3790,8 @@ static void unaccount_event(struct perf_event *event)
 	}
 
 	unaccount_event_cpu(event, event->cpu);
+
+	unaccount_pmu_sb_event(event);
 }
 
 static void perf_sched_delayed(struct work_struct *work)
@@ -5553,16 +5617,26 @@ void perf_output_sample(struct perf_output_handle *handle,
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
-		if (data->raw) {
-			u32 raw_size = data->raw->size;
-			u32 real_size = round_up(raw_size + sizeof(u32),
-						 sizeof(u64)) - sizeof(u32);
-			u64 zero = 0;
-
-			perf_output_put(handle, real_size);
-			__output_copy(handle, data->raw->data, raw_size);
-			if (real_size - raw_size)
-				__output_copy(handle, &zero, real_size - raw_size);
+		struct perf_raw_record *raw = data->raw;
+
+		if (raw) {
+			struct perf_raw_frag *frag = &raw->frag;
+
+			perf_output_put(handle, raw->size);
+			do {
+				if (frag->copy) {
+					__output_custom(handle, frag->copy,
+							frag->data, frag->size);
+				} else {
+					__output_copy(handle, frag->data,
+						      frag->size);
+				}
+				if (perf_raw_frag_last(frag))
+					break;
+				frag = frag->next;
+			} while (1);
+			if (frag->pad)
+				__output_skip(handle, NULL, frag->pad);
 		} else {
 			struct {
 				u32	size;
@@ -5687,14 +5761,28 @@ void perf_prepare_sample(struct perf_event_header *header,
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
-		int size = sizeof(u32);
-
-		if (data->raw)
-			size += data->raw->size;
-		else
-			size += sizeof(u32);
+		struct perf_raw_record *raw = data->raw;
+		int size;
+
+		if (raw) {
+			struct perf_raw_frag *frag = &raw->frag;
+			u32 sum = 0;
+
+			do {
+				sum += frag->size;
+				if (perf_raw_frag_last(frag))
+					break;
+				frag = frag->next;
+			} while (1);
+
+			size = round_up(sum + sizeof(u32), sizeof(u64));
+			raw->size = size - sizeof(u32);
+			frag->pad = raw->size - sum;
+		} else {
+			size = sizeof(u64);
+		}
 
-		header->size += round_up(size, sizeof(u64));
+		header->size += size;
 	}
 
 	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -5854,11 +5942,11 @@ perf_event_read_event(struct perf_event *event,
 	perf_output_end(&handle);
 }
 
-typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
+typedef void (perf_iterate_f)(struct perf_event *event, void *data);
 
 static void
-perf_event_aux_ctx(struct perf_event_context *ctx,
-		   perf_event_aux_output_cb output,
+perf_iterate_ctx(struct perf_event_context *ctx,
+		   perf_iterate_f output,
 		   void *data, bool all)
 {
 	struct perf_event *event;
@@ -5875,52 +5963,55 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
 	}
 }
 
-static void
-perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
-			struct perf_event_context *task_ctx)
+static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
 {
-	rcu_read_lock();
-	preempt_disable();
-	perf_event_aux_ctx(task_ctx, output, data, false);
-	preempt_enable();
-	rcu_read_unlock();
+	struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
+	struct perf_event *event;
+
+	list_for_each_entry_rcu(event, &pel->list, sb_list) {
+		if (event->state < PERF_EVENT_STATE_INACTIVE)
+			continue;
+		if (!event_filter_match(event))
+			continue;
+		output(event, data);
+	}
 }
 
+/*
+ * Iterate all events that need to receive side-band events.
+ *
+ * For new callers; ensure that account_pmu_sb_event() includes
+ * your event, otherwise it might not get delivered.
+ */
 static void
-perf_event_aux(perf_event_aux_output_cb output, void *data,
+perf_iterate_sb(perf_iterate_f output, void *data,
 	       struct perf_event_context *task_ctx)
 {
-	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
-	struct pmu *pmu;
 	int ctxn;
 
+	rcu_read_lock();
+	preempt_disable();
+
 	/*
-	 * If we have task_ctx != NULL we only notify
-	 * the task context itself. The task_ctx is set
-	 * only for EXIT events before releasing task
+	 * If we have task_ctx != NULL we only notify the task context itself.
+	 * The task_ctx is set only for EXIT events before releasing task
 	 * context.
 	 */
 	if (task_ctx) {
-		perf_event_aux_task_ctx(output, data, task_ctx);
-		return;
+		perf_iterate_ctx(task_ctx, output, data, false);
+		goto done;
 	}
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-		if (cpuctx->unique_pmu != pmu)
-			goto next;
-		perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
-		ctxn = pmu->task_ctx_nr;
-		if (ctxn < 0)
-			goto next;
+	perf_iterate_sb_cpu(output, data);
+
+	for_each_task_context_nr(ctxn) {
 		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
 		if (ctx)
-			perf_event_aux_ctx(ctx, output, data, false);
-next:
-		put_cpu_ptr(pmu->pmu_cpu_context);
+			perf_iterate_ctx(ctx, output, data, false);
 	}
+done:
+	preempt_enable();
 	rcu_read_unlock();
 }
 
@@ -5969,7 +6060,7 @@ void perf_event_exec(void)
 
 		perf_event_enable_on_exec(ctxn);
 
-		perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+		perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
 				   true);
 	}
 	rcu_read_unlock();
@@ -6013,9 +6104,9 @@ static int __perf_pmu_output_stop(void *info)
 	};
 
 	rcu_read_lock();
-	perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+	perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
 	if (cpuctx->task_ctx)
-		perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+		perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
 				   &ro, false);
 	rcu_read_unlock();
 
@@ -6144,7 +6235,7 @@ static void perf_event_task(struct task_struct *task,
 		},
 	};
 
-	perf_event_aux(perf_event_task_output,
+	perf_iterate_sb(perf_event_task_output,
 		       &task_event,
 		       task_ctx);
 }
@@ -6223,7 +6314,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 
 	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
 
-	perf_event_aux(perf_event_comm_output,
+	perf_iterate_sb(perf_event_comm_output,
 		       comm_event,
 		       NULL);
 }
@@ -6454,7 +6545,7 @@ got_name:
 
 	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
-	perf_event_aux(perf_event_mmap_output,
+	perf_iterate_sb(perf_event_mmap_output,
 		       mmap_event,
 		       NULL);
 
@@ -6537,7 +6628,7 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
 		if (!ctx)
 			continue;
 
-		perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+		perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
 	}
 	rcu_read_unlock();
 }
@@ -6724,7 +6815,7 @@ static void perf_event_switch(struct task_struct *task,
 		},
 	};
 
-	perf_event_aux(perf_event_switch_output,
+	perf_iterate_sb(perf_event_switch_output,
 		       &switch_event,
 		       NULL);
 }
@@ -7331,7 +7422,7 @@ static struct pmu perf_swevent = {
 static int perf_tp_filter_match(struct perf_event *event,
 				struct perf_sample_data *data)
 {
-	void *record = data->raw->data;
+	void *record = data->raw->frag.data;
 
 	/* only top level events have filters set */
 	if (event->parent)
@@ -7387,8 +7478,10 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
 	struct perf_event *event;
 
 	struct perf_raw_record raw = {
-		.size = entry_size,
-		.data = record,
+		.frag = {
+			.size = entry_size,
+			.data = record,
+		},
 	};
 
 	perf_sample_data_init(&data, 0, 0);
@@ -8646,6 +8739,28 @@ unlock:
 	return pmu;
 }
 
+static void attach_sb_event(struct perf_event *event)
+{
+	struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+	raw_spin_lock(&pel->lock);
+	list_add_rcu(&event->sb_list, &pel->list);
+	raw_spin_unlock(&pel->lock);
+}
+
+/*
+ * We keep a list of all !task (and therefore per-cpu) events
+ * that need to receive side-band records.
+ *
+ * This avoids having to scan all the various PMU per-cpu contexts
+ * looking for them.
+ */
+static void account_pmu_sb_event(struct perf_event *event)
+{
+	if (is_sb_event(event))
+		attach_sb_event(event);
+}
+
 static void account_event_cpu(struct perf_event *event, int cpu)
 {
 	if (event->parent)
@@ -8726,6 +8841,8 @@ static void account_event(struct perf_event *event)
 enabled:
 
 	account_event_cpu(event, event->cpu);
+
+	account_pmu_sb_event(event);
 }
 
 /*
@@ -8874,7 +8991,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	if (!event->parent) {
 		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
-			err = get_callchain_buffers();
+			err = get_callchain_buffers(attr->sample_max_stack);
 			if (err)
 				goto err_addr_filters;
 		}
@@ -9196,6 +9313,9 @@ SYSCALL_DEFINE5(perf_event_open,
 			return -EINVAL;
 	}
 
+	if (!attr.sample_max_stack)
+		attr.sample_max_stack = sysctl_perf_event_max_stack;
+
 	/*
 	 * In cgroup mode, the pid argument is used to pass the fd
 	 * opened to the cgroup directory in cgroupfs. The cpu argument
@@ -9269,7 +9389,7 @@ SYSCALL_DEFINE5(perf_event_open,
 
 	if (is_sampling_event(event)) {
 		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
-			err = -ENOTSUPP;
+			err = -EOPNOTSUPP;
 			goto err_alloc;
 		}
 	}
@@ -10231,6 +10351,9 @@ static void __init perf_event_init_all_cpus(void)
 		swhash = &per_cpu(swevent_htable, cpu);
 		mutex_init(&swhash->hlist_mutex);
 		INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
+
+		INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
+		raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
 	}
 }
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 05f9f6d626df..486fd78eb8d5 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -123,21 +123,19 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb)
 	return rb->aux_nr_pages << PAGE_SHIFT;
 }
 
-#define DEFINE_OUTPUT_COPY(func_name, memcpy_func)			\
-static inline unsigned long						\
-func_name(struct perf_output_handle *handle,				\
-	  const void *buf, unsigned long len)				\
+#define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...)	\
 {									\
 	unsigned long size, written;					\
 									\
 	do {								\
 		size    = min(handle->size, len);			\
-		written = memcpy_func(handle->addr, buf, size);		\
+		written = memcpy_func(__VA_ARGS__);			\
 		written = size - written;				\
 									\
 		len -= written;						\
 		handle->addr += written;				\
-		buf += written;						\
+		if (advance_buf)					\
+			buf += written;					\
 		handle->size -= written;				\
 		if (!handle->size) {					\
 			struct ring_buffer *rb = handle->rb;		\
@@ -152,6 +150,21 @@ func_name(struct perf_output_handle *handle,				\
 	return len;							\
 }
 
+#define DEFINE_OUTPUT_COPY(func_name, memcpy_func)			\
+static inline unsigned long						\
+func_name(struct perf_output_handle *handle,				\
+	  const void *buf, unsigned long len)				\
+__DEFINE_OUTPUT_COPY_BODY(true, memcpy_func, handle->addr, buf, size)
+
+static inline unsigned long
+__output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
+		const void *buf, unsigned long len)
+{
+	unsigned long orig_len = len;
+	__DEFINE_OUTPUT_COPY_BODY(false, copy_func, handle->addr, buf,
+				  orig_len - len, size)
+}
+
 static inline unsigned long
 memcpy_common(void *dst, const void *src, unsigned long n)
 {
diff --git a/kernel/exit.c b/kernel/exit.c
index 9e6e1356e6bb..84ae830234f8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -210,6 +210,82 @@ repeat:
 		goto repeat;
 }
 
+/*
+ * Note that if this function returns a valid task_struct pointer (!NULL)
+ * task->usage must remain >0 for the duration of the RCU critical section.
+ */
+struct task_struct *task_rcu_dereference(struct task_struct **ptask)
+{
+	struct sighand_struct *sighand;
+	struct task_struct *task;
+
+	/*
+	 * We need to verify that release_task() was not called and thus
+	 * delayed_put_task_struct() can't run and drop the last reference
+	 * before rcu_read_unlock(). We check task->sighand != NULL,
+	 * but we can read the already freed and reused memory.
+	 */
+retry:
+	task = rcu_dereference(*ptask);
+	if (!task)
+		return NULL;
+
+	probe_kernel_address(&task->sighand, sighand);
+
+	/*
+	 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
+	 * was already freed we can not miss the preceding update of this
+	 * pointer.
+	 */
+	smp_rmb();
+	if (unlikely(task != READ_ONCE(*ptask)))
+		goto retry;
+
+	/*
+	 * We've re-checked that "task == *ptask", now we have two different
+	 * cases:
+	 *
+	 * 1. This is actually the same task/task_struct. In this case
+	 *    sighand != NULL tells us it is still alive.
+	 *
+	 * 2. This is another task which got the same memory for task_struct.
+	 *    We can't know this of course, and we can not trust
+	 *    sighand != NULL.
+	 *
+	 *    In this case we actually return a random value, but this is
+	 *    correct.
+	 *
+	 *    If we return NULL - we can pretend that we actually noticed that
+	 *    *ptask was updated when the previous task has exited. Or pretend
+	 *    that probe_slab_address(&sighand) reads NULL.
+	 *
+	 *    If we return the new task (because sighand is not NULL for any
+	 *    reason) - this is fine too. This (new) task can't go away before
+	 *    another gp pass.
+	 *
+	 *    And note: We could even eliminate the false positive if re-read
+	 *    task->sighand once again to avoid the falsely NULL. But this case
+	 *    is very unlikely so we don't care.
+	 */
+	if (!sighand)
+		return NULL;
+
+	return task;
+}
+
+struct task_struct *try_get_task_struct(struct task_struct **ptask)
+{
+	struct task_struct *task;
+
+	rcu_read_lock();
+	task = task_rcu_dereference(ptask);
+	if (task)
+		get_task_struct(task);
+	rcu_read_unlock();
+
+	return task;
+}
+
 /*
  * Determine if a process group is "orphaned", according to the POSIX
  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
@@ -700,10 +776,14 @@ void do_exit(long code)
 
 	exit_signals(tsk);  /* sets PF_EXITING */
 	/*
-	 * tsk->flags are checked in the futex code to protect against
-	 * an exiting task cleaning up the robust pi futexes.
+	 * Ensure that all new tsk->pi_lock acquisitions must observe
+	 * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
 	 */
 	smp_mb();
+	/*
+	 * Ensure that we must observe the pi_state in exit_mm() ->
+	 * mm_release() -> exit_pi_state_list().
+	 */
 	raw_spin_unlock_wait(&tsk->pi_lock);
 
 	if (unlikely(in_atomic())) {
diff --git a/kernel/fork.c b/kernel/fork.c
index 5c2c355aa97f..de21f25e0d2c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -148,22 +148,22 @@ static inline void free_task_struct(struct task_struct *tsk)
 }
 #endif
 
-void __weak arch_release_thread_info(struct thread_info *ti)
+void __weak arch_release_thread_stack(unsigned long *stack)
 {
 }
 
-#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
+#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
 
 /*
  * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
  * kmemcache based allocator.
  */
 # if THREAD_SIZE >= PAGE_SIZE
-static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
 						  int node)
 {
-	struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
-						  THREAD_SIZE_ORDER);
+	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+					     THREAD_SIZE_ORDER);
 
 	if (page)
 		memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
@@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 	return page ? page_address(page) : NULL;
 }
 
-static inline void free_thread_info(struct thread_info *ti)
+static inline void free_thread_stack(unsigned long *stack)
 {
-	struct page *page = virt_to_page(ti);
+	struct page *page = virt_to_page(stack);
 
 	memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
 				    -(1 << THREAD_SIZE_ORDER));
-	__free_kmem_pages(page, THREAD_SIZE_ORDER);
+	__free_pages(page, THREAD_SIZE_ORDER);
 }
 # else
-static struct kmem_cache *thread_info_cache;
+static struct kmem_cache *thread_stack_cache;
 
-static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
 						  int node)
 {
-	return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
+	return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
 }
 
-static void free_thread_info(struct thread_info *ti)
+static void free_thread_stack(unsigned long *stack)
 {
-	kmem_cache_free(thread_info_cache, ti);
+	kmem_cache_free(thread_stack_cache, stack);
 }
 
-void thread_info_cache_init(void)
+void thread_stack_cache_init(void)
 {
-	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+	thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE,
 					      THREAD_SIZE, 0, NULL);
-	BUG_ON(thread_info_cache == NULL);
+	BUG_ON(thread_stack_cache == NULL);
 }
 # endif
 #endif
@@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
-static void account_kernel_stack(struct thread_info *ti, int account)
+static void account_kernel_stack(unsigned long *stack, int account)
 {
-	struct zone *zone = page_zone(virt_to_page(ti));
+	struct zone *zone = page_zone(virt_to_page(stack));
 
 	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
 }
@@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account)
 void free_task(struct task_struct *tsk)
 {
 	account_kernel_stack(tsk->stack, -1);
-	arch_release_thread_info(tsk->stack);
-	free_thread_info(tsk->stack);
+	arch_release_thread_stack(tsk->stack);
+	free_thread_stack(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
@@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
 static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 {
 	struct task_struct *tsk;
-	struct thread_info *ti;
+	unsigned long *stack;
 	int err;
 
 	if (node == NUMA_NO_NODE)
@@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (!tsk)
 		return NULL;
 
-	ti = alloc_thread_info_node(tsk, node);
-	if (!ti)
+	stack = alloc_thread_stack_node(tsk, node);
+	if (!stack)
 		goto free_tsk;
 
 	err = arch_dup_task_struct(tsk, orig);
 	if (err)
-		goto free_ti;
+		goto free_stack;
 
-	tsk->stack = ti;
+	tsk->stack = stack;
 #ifdef CONFIG_SECCOMP
 	/*
 	 * We must handle setting up seccomp filters once we're under
@@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->task_frag.page = NULL;
 	tsk->wake_q.next = NULL;
 
-	account_kernel_stack(ti, 1);
+	account_kernel_stack(stack, 1);
 
 	kcov_task_init(tsk);
 
 	return tsk;
 
-free_ti:
-	free_thread_info(ti);
+free_stack:
+	free_thread_stack(stack);
 free_tsk:
 	free_task_struct(tsk);
 	return NULL;
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index e25e92fb44fa..6a5c239c7669 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,7 @@
 #include <linux/vmalloc.h>
 #include "gcov.h"
 
-#if __GNUC__ == 5 && __GNUC_MINOR__ >= 1
+#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
 #define GCOV_COUNTERS			10
 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
 #define GCOV_COUNTERS			9
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 2ee42e95a3ce..1d3ee3169202 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
 obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
 obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
+obj-$(CONFIG_SMP) += affinity.o
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
new file mode 100644
index 000000000000..f68959341c0f
--- /dev/null
+++ b/kernel/irq/affinity.c
@@ -0,0 +1,61 @@
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+
+static int get_first_sibling(unsigned int cpu)
+{
+	unsigned int ret;
+
+	ret = cpumask_first(topology_sibling_cpumask(cpu));
+	if (ret < nr_cpu_ids)
+		return ret;
+	return cpu;
+}
+
+/*
+ * Take a map of online CPUs and the number of available interrupt vectors
+ * and generate an output cpumask suitable for spreading MSI/MSI-X vectors
+ * so that they are distributed as good as possible around the CPUs.  If
+ * more vectors than CPUs are available we'll map one to each CPU,
+ * otherwise we map one to the first sibling of each socket.
+ *
+ * If there are more vectors than CPUs we will still only have one bit
+ * set per CPU, but interrupt code will keep on assigning the vectors from
+ * the start of the bitmap until we run out of vectors.
+ */
+struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+{
+	struct cpumask *affinity_mask;
+	unsigned int max_vecs = *nr_vecs;
+
+	if (max_vecs == 1)
+		return NULL;
+
+	affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL);
+	if (!affinity_mask) {
+		*nr_vecs = 1;
+		return NULL;
+	}
+
+	if (max_vecs >= num_online_cpus()) {
+		cpumask_copy(affinity_mask, cpu_online_mask);
+		*nr_vecs = num_online_cpus();
+	} else {
+		unsigned int vecs = 0, cpu;
+
+		for_each_online_cpu(cpu) {
+			if (cpu == get_first_sibling(cpu)) {
+				cpumask_set_cpu(cpu, affinity_mask);
+				vecs++;
+			}
+
+			if (--max_vecs == 0)
+				break;
+		}
+		*nr_vecs = vecs;
+	}
+
+	return affinity_mask;
+}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 2f9f2b0e79f2..b4c1bc7c9ca2 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -426,6 +426,49 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(handle_simple_irq);
 
+/**
+ *	handle_untracked_irq - Simple and software-decoded IRQs.
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	Untracked interrupts are sent from a demultiplexing interrupt
+ *	handler when the demultiplexer does not know which device it its
+ *	multiplexed irq domain generated the interrupt. IRQ's handled
+ *	through here are not subjected to stats tracking, randomness, or
+ *	spurious interrupt detection.
+ *
+ *	Note: Like handle_simple_irq, the caller is expected to handle
+ *	the ack, clear, mask and unmask issues if necessary.
+ */
+void handle_untracked_irq(struct irq_desc *desc)
+{
+	unsigned int flags = 0;
+
+	raw_spin_lock(&desc->lock);
+
+	if (!irq_may_run(desc))
+		goto out_unlock;
+
+	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
+
+	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
+		desc->istate |= IRQS_PENDING;
+		goto out_unlock;
+	}
+
+	desc->istate &= ~IRQS_PENDING;
+	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+	raw_spin_unlock(&desc->lock);
+
+	__handle_irq_event_percpu(desc, &flags);
+
+	raw_spin_lock(&desc->lock);
+	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+
+out_unlock:
+	raw_spin_unlock(&desc->lock);
+}
+EXPORT_SYMBOL_GPL(handle_untracked_irq);
+
 /*
  * Called unconditionally from handle_level_irq() and only for oneshot
  * interrupts from handle_fasteoi_irq()
@@ -1093,3 +1136,43 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 
 	return 0;
 }
+
+/**
+ * irq_chip_pm_get - Enable power for an IRQ chip
+ * @data:	Pointer to interrupt specific data
+ *
+ * Enable the power to the IRQ chip referenced by the interrupt data
+ * structure.
+ */
+int irq_chip_pm_get(struct irq_data *data)
+{
+	int retval;
+
+	if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) {
+		retval = pm_runtime_get_sync(data->chip->parent_device);
+		if (retval < 0) {
+			pm_runtime_put_noidle(data->chip->parent_device);
+			return retval;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * irq_chip_pm_put - Disable power for an IRQ chip
+ * @data:	Pointer to interrupt specific data
+ *
+ * Disable the power to the IRQ chip referenced by the interrupt data
+ * structure, belongs. Note that power will only be disabled, once this
+ * function has been called for all IRQs that have called irq_chip_pm_get().
+ */
+int irq_chip_pm_put(struct irq_data *data)
+{
+	int retval = 0;
+
+	if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device)
+		retval = pm_runtime_put(data->chip->parent_device);
+
+	return (retval < 0) ? retval : 0;
+}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a15b5485b446..d3f24905852c 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -132,10 +132,10 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
 	wake_up_process(action->thread);
 }
 
-irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
+irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
 {
 	irqreturn_t retval = IRQ_NONE;
-	unsigned int flags = 0, irq = desc->irq_data.irq;
+	unsigned int irq = desc->irq_data.irq;
 	struct irqaction *action;
 
 	for_each_action_of_desc(desc, action) {
@@ -164,7 +164,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 
 			/* Fall through to add to randomness */
 		case IRQ_HANDLED:
-			flags |= action->flags;
+			*flags |= action->flags;
 			break;
 
 		default:
@@ -174,7 +174,17 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 		retval |= res;
 	}
 
-	add_interrupt_randomness(irq, flags);
+	return retval;
+}
+
+irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
+{
+	irqreturn_t retval;
+	unsigned int flags = 0;
+
+	retval = __handle_irq_event_percpu(desc, &flags);
+
+	add_interrupt_randomness(desc->irq_data.irq, flags);
 
 	if (!noirqdebug)
 		note_interrupt(desc, retval);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 09be2c903c6d..bc226e783bd2 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -7,6 +7,7 @@
  */
 #include <linux/irqdesc.h>
 #include <linux/kernel_stat.h>
+#include <linux/pm_runtime.h>
 
 #ifdef CONFIG_SPARSE_IRQ
 # define IRQ_BITMAP_BITS	(NR_IRQS + 8196)
@@ -83,6 +84,7 @@ extern void irq_mark_irq(unsigned int irq);
 
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 
+irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
 irqreturn_t handle_irq_event_percpu(struct irq_desc *desc);
 irqreturn_t handle_irq_event(struct irq_desc *desc);
 
@@ -105,6 +107,8 @@ static inline void unregister_handler_proc(unsigned int irq,
 					   struct irqaction *action) { }
 #endif
 
+extern bool irq_can_set_affinity_usr(unsigned int irq);
+
 extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
 
 extern void irq_set_thread_affinity(struct irq_desc *desc);
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 89b49f6773f0..1a9abc1c8ea0 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -76,14 +76,14 @@ int irq_reserve_ipi(struct irq_domain *domain,
 		}
 	}
 
-	virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE);
+	virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE, NULL);
 	if (virq <= 0) {
 		pr_warn("Can't reserve IPI, failed to alloc descs\n");
 		return -ENOMEM;
 	}
 
 	virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE,
-				       (void *) dest, true);
+				       (void *) dest, true, NULL);
 
 	if (virq <= 0) {
 		pr_warn("Can't reserve IPI, failed to alloc hw irqs\n");
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 8731e1c5d1e7..a623b44f2d4b 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -68,9 +68,13 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
 	return 0;
 }
 
-static void desc_smp_init(struct irq_desc *desc, int node)
+static void desc_smp_init(struct irq_desc *desc, int node,
+			  const struct cpumask *affinity)
 {
-	cpumask_copy(desc->irq_common_data.affinity, irq_default_affinity);
+	if (!affinity)
+		affinity = irq_default_affinity;
+	cpumask_copy(desc->irq_common_data.affinity, affinity);
+
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_clear(desc->pending_mask);
 #endif
@@ -82,11 +86,12 @@ static void desc_smp_init(struct irq_desc *desc, int node)
 #else
 static inline int
 alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; }
-static inline void desc_smp_init(struct irq_desc *desc, int node) { }
+static inline void
+desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { }
 #endif
 
 static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
-		struct module *owner)
+			      const struct cpumask *affinity, struct module *owner)
 {
 	int cpu;
 
@@ -107,7 +112,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
 	desc->owner = owner;
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
-	desc_smp_init(desc, node);
+	desc_smp_init(desc, node, affinity);
 }
 
 int nr_irqs = NR_IRQS;
@@ -158,7 +163,9 @@ void irq_unlock_sparse(void)
 	mutex_unlock(&sparse_irq_lock);
 }
 
-static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
+static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
+				   const struct cpumask *affinity,
+				   struct module *owner)
 {
 	struct irq_desc *desc;
 	gfp_t gfp = GFP_KERNEL;
@@ -178,7 +185,8 @@ static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	init_rcu_head(&desc->rcu);
 
-	desc_set_defaults(irq, desc, node, owner);
+	desc_set_defaults(irq, desc, node, affinity, owner);
+	irqd_set(&desc->irq_data, flags);
 
 	return desc;
 
@@ -223,13 +231,32 @@ static void free_desc(unsigned int irq)
 }
 
 static int alloc_descs(unsigned int start, unsigned int cnt, int node,
-		       struct module *owner)
+		       const struct cpumask *affinity, struct module *owner)
 {
+	const struct cpumask *mask = NULL;
 	struct irq_desc *desc;
-	int i;
+	unsigned int flags;
+	int i, cpu = -1;
+
+	if (affinity && cpumask_empty(affinity))
+		return -EINVAL;
+
+	flags = affinity ? IRQD_AFFINITY_MANAGED : 0;
 
 	for (i = 0; i < cnt; i++) {
-		desc = alloc_desc(start + i, node, owner);
+		if (affinity) {
+			cpu = cpumask_next(cpu, affinity);
+			if (cpu >= nr_cpu_ids)
+				cpu = cpumask_first(affinity);
+			node = cpu_to_node(cpu);
+
+			/*
+			 * For single allocations we use the caller provided
+			 * mask otherwise we use the mask of the target cpu
+			 */
+			mask = cnt == 1 ? affinity : cpumask_of(cpu);
+		}
+		desc = alloc_desc(start + i, node, flags, mask, owner);
 		if (!desc)
 			goto err;
 		mutex_lock(&sparse_irq_lock);
@@ -277,7 +304,7 @@ int __init early_irq_init(void)
 		nr_irqs = initcnt;
 
 	for (i = 0; i < initcnt; i++) {
-		desc = alloc_desc(i, node, NULL);
+		desc = alloc_desc(i, node, 0, NULL, NULL);
 		set_bit(i, allocated_irqs);
 		irq_insert_desc(i, desc);
 	}
@@ -311,7 +338,7 @@ int __init early_irq_init(void)
 		alloc_masks(&desc[i], GFP_KERNEL, node);
 		raw_spin_lock_init(&desc[i].lock);
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-		desc_set_defaults(i, &desc[i], node, NULL);
+		desc_set_defaults(i, &desc[i], node, NULL, NULL);
 	}
 	return arch_early_irq_init();
 }
@@ -328,11 +355,12 @@ static void free_desc(unsigned int irq)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL);
+	desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+			      const struct cpumask *affinity,
 			      struct module *owner)
 {
 	u32 i;
@@ -453,12 +481,15 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
  * @cnt:	Number of consecutive irqs to allocate.
  * @node:	Preferred node on which the irq descriptor should be allocated
  * @owner:	Owning module (can be NULL)
+ * @affinity:	Optional pointer to an affinity mask which hints where the
+ *		irq descriptors should be allocated and which default
+ *		affinities to use
  *
  * Returns the first irq number or error code
  */
 int __ref
 __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
-		  struct module *owner)
+		  struct module *owner, const struct cpumask *affinity)
 {
 	int start, ret;
 
@@ -494,7 +525,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 
 	bitmap_set(allocated_irqs, start, cnt);
 	mutex_unlock(&sparse_irq_lock);
-	return alloc_descs(start, cnt, node, owner);
+	return alloc_descs(start, cnt, node, affinity, owner);
 
 err:
 	mutex_unlock(&sparse_irq_lock);
@@ -512,7 +543,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
  */
 unsigned int irq_alloc_hwirqs(int cnt, int node)
 {
-	int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL);
+	int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL, NULL);
 
 	if (irq < 0)
 		return 0;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8798b6c9e945..4752b43662e0 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -481,7 +481,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 	}
 
 	/* Allocate a virtual interrupt number */
-	virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node));
+	virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), NULL);
 	if (virq <= 0) {
 		pr_debug("-> virq allocation failed\n");
 		return 0;
@@ -567,6 +567,7 @@ static void of_phandle_args_to_fwspec(struct of_phandle_args *irq_data,
 unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 {
 	struct irq_domain *domain;
+	struct irq_data *irq_data;
 	irq_hw_number_t hwirq;
 	unsigned int type = IRQ_TYPE_NONE;
 	int virq;
@@ -588,15 +589,46 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 	if (irq_domain_translate(domain, fwspec, &hwirq, &type))
 		return 0;
 
-	if (irq_domain_is_hierarchy(domain)) {
+	/*
+	 * WARN if the irqchip returns a type with bits
+	 * outside the sense mask set and clear these bits.
+	 */
+	if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
+		type &= IRQ_TYPE_SENSE_MASK;
+
+	/*
+	 * If we've already configured this interrupt,
+	 * don't do it again, or hell will break loose.
+	 */
+	virq = irq_find_mapping(domain, hwirq);
+	if (virq) {
+		/*
+		 * If the trigger type is not specified or matches the
+		 * current trigger type then we are done so return the
+		 * interrupt number.
+		 */
+		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
+			return virq;
+
 		/*
-		 * If we've already configured this interrupt,
-		 * don't do it again, or hell will break loose.
+		 * If the trigger type has not been set yet, then set
+		 * it now and return the interrupt number.
 		 */
-		virq = irq_find_mapping(domain, hwirq);
-		if (virq)
+		if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
+			irq_data = irq_get_irq_data(virq);
+			if (!irq_data)
+				return 0;
+
+			irqd_set_trigger_type(irq_data, type);
 			return virq;
+		}
 
+		pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
+			hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
+		return 0;
+	}
+
+	if (irq_domain_is_hierarchy(domain)) {
 		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
 		if (virq <= 0)
 			return 0;
@@ -607,10 +639,18 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 			return virq;
 	}
 
-	/* Set type if specified and different than the current one */
-	if (type != IRQ_TYPE_NONE &&
-	    type != irq_get_trigger_type(virq))
-		irq_set_irq_type(virq, type);
+	irq_data = irq_get_irq_data(virq);
+	if (!irq_data) {
+		if (irq_domain_is_hierarchy(domain))
+			irq_domain_free_irqs(virq, 1);
+		else
+			irq_dispose_mapping(virq);
+		return 0;
+	}
+
+	/* Store trigger type */
+	irqd_set_trigger_type(irq_data, type);
+
 	return virq;
 }
 EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping);
@@ -640,8 +680,12 @@ void irq_dispose_mapping(unsigned int virq)
 	if (WARN_ON(domain == NULL))
 		return;
 
-	irq_domain_disassociate(domain, virq);
-	irq_free_desc(virq);
+	if (irq_domain_is_hierarchy(domain)) {
+		irq_domain_free_irqs(virq, 1);
+	} else {
+		irq_domain_disassociate(domain, virq);
+		irq_free_desc(virq);
+	}
 }
 EXPORT_SYMBOL_GPL(irq_dispose_mapping);
 
@@ -835,19 +879,23 @@ const struct irq_domain_ops irq_domain_simple_ops = {
 EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
 
 int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
-			   int node)
+			   int node, const struct cpumask *affinity)
 {
 	unsigned int hint;
 
 	if (virq >= 0) {
-		virq = irq_alloc_descs(virq, virq, cnt, node);
+		virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE,
+					 affinity);
 	} else {
 		hint = hwirq % nr_irqs;
 		if (hint == 0)
 			hint++;
-		virq = irq_alloc_descs_from(hint, cnt, node);
-		if (virq <= 0 && hint > 1)
-			virq = irq_alloc_descs_from(1, cnt, node);
+		virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE,
+					 affinity);
+		if (virq <= 0 && hint > 1) {
+			virq = __irq_alloc_descs(-1, 1, cnt, node, THIS_MODULE,
+						 affinity);
+		}
 	}
 
 	return virq;
@@ -1144,8 +1192,10 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
 	if (recursive)
 		ret = irq_domain_alloc_irqs_recursive(parent, irq_base,
 						      nr_irqs, arg);
-	if (ret >= 0)
-		ret = domain->ops->alloc(domain, irq_base, nr_irqs, arg);
+	if (ret < 0)
+		return ret;
+
+	ret = domain->ops->alloc(domain, irq_base, nr_irqs, arg);
 	if (ret < 0 && recursive)
 		irq_domain_free_irqs_recursive(parent, irq_base, nr_irqs);
 
@@ -1160,6 +1210,7 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
  * @node:	NUMA node id for memory allocation
  * @arg:	domain specific argument
  * @realloc:	IRQ descriptors have already been allocated if true
+ * @affinity:	Optional irq affinity mask for multiqueue devices
  *
  * Allocate IRQ numbers and initialized all data structures to support
  * hierarchy IRQ domains.
@@ -1175,7 +1226,7 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
  */
 int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 			    unsigned int nr_irqs, int node, void *arg,
-			    bool realloc)
+			    bool realloc, const struct cpumask *affinity)
 {
 	int i, ret, virq;
 
@@ -1193,7 +1244,8 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 	if (realloc && irq_base >= 0) {
 		virq = irq_base;
 	} else {
-		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node);
+		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
+					      affinity);
 		if (virq < 0) {
 			pr_debug("cannot allocate IRQ(base %d, count %d)\n",
 				 irq_base, nr_irqs);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ef0bc02c3a70..73a2b786b5e9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -115,12 +115,12 @@ EXPORT_SYMBOL(synchronize_irq);
 #ifdef CONFIG_SMP
 cpumask_var_t irq_default_affinity;
 
-static int __irq_can_set_affinity(struct irq_desc *desc)
+static bool __irq_can_set_affinity(struct irq_desc *desc)
 {
 	if (!desc || !irqd_can_balance(&desc->irq_data) ||
 	    !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
 /**
@@ -133,6 +133,21 @@ int irq_can_set_affinity(unsigned int irq)
 	return __irq_can_set_affinity(irq_to_desc(irq));
 }
 
+/**
+ * irq_can_set_affinity_usr - Check if affinity of a irq can be set from user space
+ * @irq:	Interrupt to check
+ *
+ * Like irq_can_set_affinity() above, but additionally checks for the
+ * AFFINITY_MANAGED flag.
+ */
+bool irq_can_set_affinity_usr(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return __irq_can_set_affinity(desc) &&
+		!irqd_affinity_is_managed(&desc->irq_data);
+}
+
 /**
  *	irq_set_thread_affinity - Notify irq threads to adjust affinity
  *	@desc:		irq descriptor which has affitnity changed
@@ -338,10 +353,11 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
 		return 0;
 
 	/*
-	 * Preserve an userspace affinity setup, but make sure that
-	 * one of the targets is online.
+	 * Preserve the managed affinity setting and an userspace affinity
+	 * setup, but make sure that one of the targets is online.
 	 */
-	if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
+	if (irqd_affinity_is_managed(&desc->irq_data) ||
+	    irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
 		if (cpumask_intersects(desc->irq_common_data.affinity,
 				       cpu_online_mask))
 			set = desc->irq_common_data.affinity;
@@ -1116,6 +1132,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
 	new->irq = irq;
 
+	/*
+	 * If the trigger type is not specified by the caller,
+	 * then use the default for this interrupt.
+	 */
+	if (!(new->flags & IRQF_TRIGGER_MASK))
+		new->flags |= irqd_get_trigger_type(&desc->irq_data);
+
 	/*
 	 * Check whether the interrupt nests into another interrupt
 	 * thread.
@@ -1409,10 +1432,18 @@ int setup_irq(unsigned int irq, struct irqaction *act)
 
 	if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
 		return -EINVAL;
+
+	retval = irq_chip_pm_get(&desc->irq_data);
+	if (retval < 0)
+		return retval;
+
 	chip_bus_lock(desc);
 	retval = __setup_irq(irq, desc, act);
 	chip_bus_sync_unlock(desc);
 
+	if (retval)
+		irq_chip_pm_put(&desc->irq_data);
+
 	return retval;
 }
 EXPORT_SYMBOL_GPL(setup_irq);
@@ -1506,6 +1537,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 		}
 	}
 
+	irq_chip_pm_put(&desc->irq_data);
 	module_put(desc->owner);
 	kfree(action->secondary);
 	return action;
@@ -1648,11 +1680,16 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	action->name = devname;
 	action->dev_id = dev_id;
 
+	retval = irq_chip_pm_get(&desc->irq_data);
+	if (retval < 0)
+		return retval;
+
 	chip_bus_lock(desc);
 	retval = __setup_irq(irq, desc, action);
 	chip_bus_sync_unlock(desc);
 
 	if (retval) {
+		irq_chip_pm_put(&desc->irq_data);
 		kfree(action->secondary);
 		kfree(action);
 	}
@@ -1730,7 +1767,14 @@ void enable_percpu_irq(unsigned int irq, unsigned int type)
 	if (!desc)
 		return;
 
+	/*
+	 * If the trigger type is not specified by the caller, then
+	 * use the default for this interrupt.
+	 */
 	type &= IRQ_TYPE_SENSE_MASK;
+	if (type == IRQ_TYPE_NONE)
+		type = irqd_get_trigger_type(&desc->irq_data);
+
 	if (type != IRQ_TYPE_NONE) {
 		int ret;
 
@@ -1822,6 +1866,7 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
 
 	unregister_handler_proc(irq, action);
 
+	irq_chip_pm_put(&desc->irq_data);
 	module_put(desc->owner);
 	return action;
 
@@ -1884,10 +1929,18 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act)
 
 	if (!desc || !irq_settings_is_per_cpu_devid(desc))
 		return -EINVAL;
+
+	retval = irq_chip_pm_get(&desc->irq_data);
+	if (retval < 0)
+		return retval;
+
 	chip_bus_lock(desc);
 	retval = __setup_irq(irq, desc, act);
 	chip_bus_sync_unlock(desc);
 
+	if (retval)
+		irq_chip_pm_put(&desc->irq_data);
+
 	return retval;
 }
 
@@ -1931,12 +1984,18 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
 	action->name = devname;
 	action->percpu_dev_id = dev_id;
 
+	retval = irq_chip_pm_get(&desc->irq_data);
+	if (retval < 0)
+		return retval;
+
 	chip_bus_lock(desc);
 	retval = __setup_irq(irq, desc, action);
 	chip_bus_sync_unlock(desc);
 
-	if (retval)
+	if (retval) {
+		irq_chip_pm_put(&desc->irq_data);
 		kfree(action);
+	}
 
 	return retval;
 }
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 38e89ce7b071..54999350162c 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -324,7 +324,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 	struct msi_domain_ops *ops = info->ops;
 	msi_alloc_info_t arg;
 	struct msi_desc *desc;
-	int i, ret, virq = -1;
+	int i, ret, virq;
 
 	ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
 	if (ret)
@@ -332,13 +332,10 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 
 	for_each_msi_entry(desc, dev) {
 		ops->set_desc(&arg, desc);
-		if (info->flags & MSI_FLAG_IDENTITY_MAP)
-			virq = (int)ops->get_hwirq(info, &arg);
-		else
-			virq = -1;
 
-		virq = __irq_domain_alloc_irqs(domain, virq, desc->nvec_used,
-					       dev_to_node(dev), &arg, false);
+		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
+					       dev_to_node(dev), &arg, false,
+					       desc->affinity);
 		if (virq < 0) {
 			ret = -ENOSPC;
 			if (ops->handle_error)
@@ -356,6 +353,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 		ops->msi_finish(&arg, 0);
 
 	for_each_msi_entry(desc, dev) {
+		virq = desc->irq;
 		if (desc->nvec_used == 1)
 			dev_dbg(dev, "irq %d for MSI\n", virq);
 		else
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 4e1b94726818..feaa813b84a9 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -96,7 +96,7 @@ static ssize_t write_irq_affinity(int type, struct file *file,
 	cpumask_var_t new_value;
 	int err;
 
-	if (!irq_can_set_affinity(irq) || no_irq_affinity)
+	if (!irq_can_set_affinity_usr(irq) || no_irq_affinity)
 		return -EIO;
 
 	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
@@ -311,7 +311,6 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 					!name_unique(irq, action))
 		return;
 
-	memset(name, 0, MAX_NAMELEN);
 	snprintf(name, MAX_NAMELEN, "%s", action->name);
 
 	/* create /proc/irq/1234/handler/ */
@@ -340,7 +339,6 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 	if (desc->dir)
 		goto out_unlock;
 
-	memset(name, 0, MAX_NAMELEN);
 	sprintf(name, "%d", irq);
 
 	/* create /proc/irq/1234 */
@@ -386,7 +384,6 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
 #endif
 	remove_proc_entry("spurious", desc->dir);
 
-	memset(name, 0, MAX_NAMELEN);
 	sprintf(name, "%u", irq);
 	remove_proc_entry(name, root_irq_dir);
 }
@@ -421,12 +418,8 @@ void init_irq_proc(void)
 	/*
 	 * Create entries for all existing IRQs.
 	 */
-	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
+	for_each_irq_desc(irq, desc)
 		register_irq_proc(irq, desc);
-	}
 }
 
 #ifdef CONFIG_GENERIC_IRQ_SHOW
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 05254eeb4b4e..0dbea887d625 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key);
 
 void static_key_slow_inc(struct static_key *key)
 {
+	int v, v1;
+
 	STATIC_KEY_CHECK_USE();
-	if (atomic_inc_not_zero(&key->enabled))
-		return;
+
+	/*
+	 * Careful if we get concurrent static_key_slow_inc() calls;
+	 * later calls must wait for the first one to _finish_ the
+	 * jump_label_update() process.  At the same time, however,
+	 * the jump_label_update() call below wants to see
+	 * static_key_enabled(&key) for jumps to be updated properly.
+	 *
+	 * So give a special meaning to negative key->enabled: it sends
+	 * static_key_slow_inc() down the slow path, and it is non-zero
+	 * so it counts as "enabled" in jump_label_update().  Note that
+	 * atomic_inc_unless_negative() checks >= 0, so roll our own.
+	 */
+	for (v = atomic_read(&key->enabled); v > 0; v = v1) {
+		v1 = atomic_cmpxchg(&key->enabled, v, v + 1);
+		if (likely(v1 == v))
+			return;
+	}
 
 	jump_label_lock();
-	if (atomic_inc_return(&key->enabled) == 1)
+	if (atomic_read(&key->enabled) == 0) {
+		atomic_set(&key->enabled, -1);
 		jump_label_update(key);
+		atomic_set(&key->enabled, 1);
+	} else {
+		atomic_inc(&key->enabled);
+	}
 	jump_label_unlock();
 }
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
@@ -72,6 +95,13 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc);
 static void __static_key_slow_dec(struct static_key *key,
 		unsigned long rate_limit, struct delayed_work *work)
 {
+	/*
+	 * The negative count check is valid even when a negative
+	 * key->enabled is in use by static_key_slow_inc(); a
+	 * __static_key_slow_dec() before the first static_key_slow_inc()
+	 * returns is unbalanced, because all other static_key_slow_inc()
+	 * instances block while the update is in progress.
+	 */
 	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
 		WARN(atomic_read(&key->enabled) < 0,
 		     "jump label: negative count!\n");
@@ -422,7 +452,7 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 	return notifier_from_errno(ret);
 }
 
-struct notifier_block jump_label_module_nb = {
+static struct notifier_block jump_label_module_nb = {
 	.notifier_call = jump_label_module_notify,
 	.priority = 1, /* higher than tracepoints */
 };
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 81f1a7107c0e..589d763a49b3 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -46,6 +46,7 @@
 #include <linux/gfp.h>
 #include <linux/kmemcheck.h>
 #include <linux/random.h>
+#include <linux/jhash.h>
 
 #include <asm/sections.h>
 
@@ -309,10 +310,14 @@ static struct hlist_head chainhash_table[CHAINHASH_SIZE];
  * It's a 64-bit hash, because it's important for the keys to be
  * unique.
  */
-#define iterate_chain_key(key1, key2) \
-	(((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \
-	((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \
-	(key2))
+static inline u64 iterate_chain_key(u64 key, u32 idx)
+{
+	u32 k0 = key, k1 = key >> 32;
+
+	__jhash_mix(idx, k0, k1); /* Macro that modifies arguments! */
+
+	return k0 | (u64)k1 << 32;
+}
 
 void lockdep_off(void)
 {
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 3ef3736002d8..9c951fade415 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -49,21 +49,21 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter)
 }
 
 void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-			    struct thread_info *ti)
+			    struct task_struct *task)
 {
 	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
 
 	/* Mark the current thread as blocked on the lock: */
-	ti->task->blocked_on = waiter;
+	task->blocked_on = waiter;
 }
 
 void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-			 struct thread_info *ti)
+			 struct task_struct *task)
 {
 	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
-	DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
-	DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
-	ti->task->blocked_on = NULL;
+	DEBUG_LOCKS_WARN_ON(waiter->task != task);
+	DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
+	task->blocked_on = NULL;
 
 	list_del_init(&waiter->list);
 	waiter->task = NULL;
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 0799fd3e4cfa..57a871ae3c81 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -20,21 +20,21 @@ extern void debug_mutex_wake_waiter(struct mutex *lock,
 extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
 extern void debug_mutex_add_waiter(struct mutex *lock,
 				   struct mutex_waiter *waiter,
-				   struct thread_info *ti);
+				   struct task_struct *task);
 extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-				struct thread_info *ti);
+				struct task_struct *task);
 extern void debug_mutex_unlock(struct mutex *lock);
 extern void debug_mutex_init(struct mutex *lock, const char *name,
 			     struct lock_class_key *key);
 
 static inline void mutex_set_owner(struct mutex *lock)
 {
-	lock->owner = current;
+	WRITE_ONCE(lock->owner, current);
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
 {
-	lock->owner = NULL;
+	WRITE_ONCE(lock->owner, NULL);
 }
 
 #define spin_lock_mutex(lock, flags)			\
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 79d2d765a75f..a70b90db3909 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -537,7 +537,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		goto skip_wait;
 
 	debug_mutex_lock_common(lock, &waiter);
-	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+	debug_mutex_add_waiter(lock, &waiter, task);
 
 	/* add waiting tasks to the end of the waitqueue (FIFO): */
 	list_add_tail(&waiter.list, &lock->wait_list);
@@ -584,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	}
 	__set_task_state(task, TASK_RUNNING);
 
-	mutex_remove_waiter(lock, &waiter, current_thread_info());
+	mutex_remove_waiter(lock, &waiter, task);
 	/* set it to 0 if there are no waiters left: */
 	if (likely(list_empty(&lock->wait_list)))
 		atomic_set(&lock->count, 0);
@@ -605,7 +605,7 @@ skip_wait:
 	return 0;
 
 err:
-	mutex_remove_waiter(lock, &waiter, task_thread_info(task));
+	mutex_remove_waiter(lock, &waiter, task);
 	spin_unlock_mutex(&lock->wait_lock, flags);
 	debug_mutex_free_waiter(&waiter);
 	mutex_release(&lock->dep_map, 1, ip);
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 5cda397607f2..6cd6b8e9efd7 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -13,18 +13,24 @@
 		do { spin_lock(lock); (void)(flags); } while (0)
 #define spin_unlock_mutex(lock, flags) \
 		do { spin_unlock(lock); (void)(flags); } while (0)
-#define mutex_remove_waiter(lock, waiter, ti) \
+#define mutex_remove_waiter(lock, waiter, task) \
 		__list_del((waiter)->list.prev, (waiter)->list.next)
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+/*
+ * The mutex owner can get read and written to locklessly.
+ * We should use WRITE_ONCE when writing the owner value to
+ * avoid store tearing, otherwise, a thread could potentially
+ * read a partially written and incomplete owner value.
+ */
 static inline void mutex_set_owner(struct mutex *lock)
 {
-	lock->owner = current;
+	WRITE_ONCE(lock->owner, current);
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
 {
-	lock->owner = NULL;
+	WRITE_ONCE(lock->owner, NULL);
 }
 #else
 static inline void mutex_set_owner(struct mutex *lock)
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index fec082338668..19248ddf37ce 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -93,7 +93,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 	 * that accesses can't leak upwards out of our subsequent critical
 	 * section in the case that the lock is currently held for write.
 	 */
-	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+	cnts = atomic_fetch_add_acquire(_QR_BIAS, &lock->cnts);
 	rspin_until_writer_unlock(lock, cnts);
 
 	/*
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 5fc8c311b8fe..b2caec7315af 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -90,7 +90,7 @@ static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
  * therefore increment the cpu number by one.
  */
 
-static inline u32 encode_tail(int cpu, int idx)
+static inline __pure u32 encode_tail(int cpu, int idx)
 {
 	u32 tail;
 
@@ -103,7 +103,7 @@ static inline u32 encode_tail(int cpu, int idx)
 	return tail;
 }
 
-static inline struct mcs_spinlock *decode_tail(u32 tail)
+static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
 {
 	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
 	int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
@@ -267,6 +267,63 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
 #endif
 
+/*
+ * Various notes on spin_is_locked() and spin_unlock_wait(), which are
+ * 'interesting' functions:
+ *
+ * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
+ * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
+ * PPC). Also qspinlock has a similar issue per construction, the setting of
+ * the locked byte can be unordered acquiring the lock proper.
+ *
+ * This gets to be 'interesting' in the following cases, where the /should/s
+ * end up false because of this issue.
+ *
+ *
+ * CASE 1:
+ *
+ * So the spin_is_locked() correctness issue comes from something like:
+ *
+ *   CPU0				CPU1
+ *
+ *   global_lock();			local_lock(i)
+ *     spin_lock(&G)			  spin_lock(&L[i])
+ *     for (i)				  if (!spin_is_locked(&G)) {
+ *       spin_unlock_wait(&L[i]);	    smp_acquire__after_ctrl_dep();
+ *					    return;
+ *					  }
+ *					  // deal with fail
+ *
+ * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
+ * that there is exclusion between the two critical sections.
+ *
+ * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
+ * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
+ * /should/ be constrained by the ACQUIRE from spin_lock(&G).
+ *
+ * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
+ *
+ *
+ * CASE 2:
+ *
+ * For spin_unlock_wait() there is a second correctness issue, namely:
+ *
+ *   CPU0				CPU1
+ *
+ *   flag = set;
+ *   smp_mb();				spin_lock(&l)
+ *   spin_unlock_wait(&l);		if (!flag)
+ *					  // add to lockless list
+ *					spin_unlock(&l);
+ *   // iterate lockless list
+ *
+ * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
+ * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
+ * semantics etc..)
+ *
+ * Where flag /should/ be ordered against the locked store of l.
+ */
+
 /*
  * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
  * issuing an _unordered_ store to set _Q_LOCKED_VAL.
@@ -322,7 +379,7 @@ void queued_spin_unlock_wait(struct qspinlock *lock)
 		cpu_relax();
 
 done:
-	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
+	smp_acquire__after_ctrl_dep();
 }
 EXPORT_SYMBOL(queued_spin_unlock_wait);
 #endif
@@ -418,7 +475,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 * sequentiality; this is because not all clear_pending_set_locked()
 	 * implementations imply full barriers.
 	 */
-	smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
+	smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
 
 	/*
 	 * take ownership and clear the pending bit.
@@ -455,6 +512,8 @@ queue:
 	 * pending stuff.
 	 *
 	 * p,*,* -> n,*,*
+	 *
+	 * RELEASE, such that the stores to @node must be complete.
 	 */
 	old = xchg_tail(lock, tail);
 	next = NULL;
@@ -465,6 +524,15 @@ queue:
 	 */
 	if (old & _Q_TAIL_MASK) {
 		prev = decode_tail(old);
+		/*
+		 * The above xchg_tail() is also a load of @lock which generates,
+		 * through decode_tail(), a pointer.
+		 *
+		 * The address dependency matches the RELEASE of xchg_tail()
+		 * such that the access to @prev must happen after.
+		 */
+		smp_read_barrier_depends();
+
 		WRITE_ONCE(prev->next, node);
 
 		pv_wait_node(node, prev);
@@ -494,7 +562,7 @@ queue:
 	 *
 	 * The PV pv_wait_head_or_lock function, if active, will acquire
 	 * the lock and return a non-zero value. So we have to skip the
-	 * smp_cond_acquire() call. As the next PV queue head hasn't been
+	 * smp_cond_load_acquire() call. As the next PV queue head hasn't been
 	 * designated yet, there is no way for the locked value to become
 	 * _Q_SLOW_VAL. So both the set_locked() and the
 	 * atomic_cmpxchg_relaxed() calls will be safe.
@@ -505,7 +573,7 @@ queue:
 	if ((val = pv_wait_head_or_lock(lock, node)))
 		goto locked;
 
-	smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
+	val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
 
 locked:
 	/*
@@ -525,9 +593,9 @@ locked:
 			break;
 		}
 		/*
-		 * The smp_cond_acquire() call above has provided the necessary
-		 * acquire semantics required for locking. At most two
-		 * iterations of this loop may be ran.
+		 * The smp_cond_load_acquire() call above has provided the
+		 * necessary acquire semantics required for locking. At most
+		 * two iterations of this loop may be ran.
 		 */
 		old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
 		if (old == val)
@@ -551,7 +619,7 @@ release:
 	/*
 	 * release the node
 	 */
-	this_cpu_dec(mcs_nodes[0].count);
+	__this_cpu_dec(mcs_nodes[0].count);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 21ede57f68b3..37649e69056c 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -112,12 +112,12 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
 {
-	atomic_set_mask(_Q_PENDING_VAL, &lock->val);
+	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
 static __always_inline void clear_pending(struct qspinlock *lock)
 {
-	atomic_clear_mask(_Q_PENDING_VAL, &lock->val);
+	atomic_andnot(_Q_PENDING_VAL, &lock->val);
 }
 
 static __always_inline int trylock_clear_pending(struct qspinlock *lock)
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3e746607abe5..1ec0f48962b3 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1478,7 +1478,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  */
 int __sched rt_mutex_trylock(struct rt_mutex *lock)
 {
-	if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
+	if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
 		return 0;
 
 	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 09e30c6225e5..447e08de1fab 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -80,7 +80,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
 	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
-	sem->count = RWSEM_UNLOCKED_VALUE;
+	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
@@ -114,12 +114,16 @@ enum rwsem_wake_type {
  *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
  *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
  * - there must be someone on the queue
- * - the spinlock must be held by the caller
+ * - the wait_lock must be held by the caller
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
+ *   to actually wakeup the blocked task(s) and drop the reference count,
+ *   preferably when the wait_lock is released
  * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if downgrading is false
+ * - writers are only marked woken if downgrading is false
  */
 static struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
+__rwsem_mark_wake(struct rw_semaphore *sem,
+		  enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
 {
 	struct rwsem_waiter *waiter;
 	struct task_struct *tsk;
@@ -128,13 +132,16 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
 
 	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-		if (wake_type == RWSEM_WAKE_ANY)
-			/* Wake writer at the front of the queue, but do not
-			 * grant it the lock yet as we want other writers
-			 * to be able to steal it.  Readers, on the other hand,
-			 * will block as they will notice the queued writer.
+		if (wake_type == RWSEM_WAKE_ANY) {
+			/*
+			 * Mark writer at the front of the queue for wakeup.
+			 * Until the task is actually later awoken later by
+			 * the caller, other writers are able to steal it.
+			 * Readers, on the other hand, will block as they
+			 * will notice the queued writer.
 			 */
-			wake_up_process(waiter->task);
+			wake_q_add(wake_q, waiter->task);
+		}
 		goto out;
 	}
 
@@ -146,15 +153,27 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
 	if (wake_type != RWSEM_WAKE_READ_OWNED) {
 		adjustment = RWSEM_ACTIVE_READ_BIAS;
  try_reader_grant:
-		oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+		oldcount = atomic_long_fetch_add(adjustment, &sem->count);
+
 		if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
-			/* A writer stole the lock. Undo our reader grant. */
-			if (rwsem_atomic_update(-adjustment, sem) &
-						RWSEM_ACTIVE_MASK)
+			/*
+			 * If the count is still less than RWSEM_WAITING_BIAS
+			 * after removing the adjustment, it is assumed that
+			 * a writer has stolen the lock. We have to undo our
+			 * reader grant.
+			 */
+			if (atomic_long_add_return(-adjustment, &sem->count) <
+			    RWSEM_WAITING_BIAS)
 				goto out;
 			/* Last active locker left. Retry waking readers. */
 			goto try_reader_grant;
 		}
+		/*
+		 * It is not really necessary to set it to reader-owned here,
+		 * but it gives the spinners an early indication that the
+		 * readers now have the lock.
+		 */
+		rwsem_set_reader_owned(sem);
 	}
 
 	/* Grant an infinite number of read locks to the readers at the front
@@ -179,7 +198,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
 		adjustment -= RWSEM_WAITING_BIAS;
 
 	if (adjustment)
-		rwsem_atomic_add(adjustment, sem);
+		atomic_long_add(adjustment, &sem->count);
 
 	next = sem->wait_list.next;
 	loop = woken;
@@ -187,17 +206,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
 		waiter = list_entry(next, struct rwsem_waiter, list);
 		next = waiter->list.next;
 		tsk = waiter->task;
+
+		wake_q_add(wake_q, tsk);
 		/*
-		 * Make sure we do not wakeup the next reader before
-		 * setting the nil condition to grant the next reader;
-		 * otherwise we could miss the wakeup on the other
-		 * side and end up sleeping again. See the pairing
-		 * in rwsem_down_read_failed().
+		 * Ensure that the last operation is setting the reader
+		 * waiter to nil such that rwsem_down_read_failed() cannot
+		 * race with do_exit() by always holding a reference count
+		 * to the task to wakeup.
 		 */
-		smp_mb();
-		waiter->task = NULL;
-		wake_up_process(tsk);
-		put_task_struct(tsk);
+		smp_store_release(&waiter->task, NULL);
 	} while (--loop);
 
 	sem->wait_list.next = next;
@@ -216,11 +233,11 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk = current;
+	WAKE_Q(wake_q);
 
 	/* set up my own style of waitqueue */
 	waiter.task = tsk;
 	waiter.type = RWSEM_WAITING_FOR_READ;
-	get_task_struct(tsk);
 
 	raw_spin_lock_irq(&sem->wait_lock);
 	if (list_empty(&sem->wait_list))
@@ -228,7 +245,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we're now waiting on the lock, but no longer actively locking */
-	count = rwsem_atomic_update(adjustment, sem);
+	count = atomic_long_add_return(adjustment, &sem->count);
 
 	/* If there are no active locks, wake the front queued process(es).
 	 *
@@ -238,9 +255,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	if (count == RWSEM_WAITING_BIAS ||
 	    (count > RWSEM_WAITING_BIAS &&
 	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irq(&sem->wait_lock);
+	wake_up_q(&wake_q);
 
 	/* wait to be given the lock */
 	while (true) {
@@ -255,17 +273,29 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 }
 EXPORT_SYMBOL(rwsem_down_read_failed);
 
+/*
+ * This function must be called with the sem->wait_lock held to prevent
+ * race conditions between checking the rwsem wait list and setting the
+ * sem->count accordingly.
+ */
 static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
 {
 	/*
-	 * Try acquiring the write lock. Check count first in order
-	 * to reduce unnecessary expensive cmpxchg() operations.
+	 * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS.
 	 */
-	if (count == RWSEM_WAITING_BIAS &&
-	    cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
-		    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
-		if (!list_is_singular(&sem->wait_list))
-			rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+	if (count != RWSEM_WAITING_BIAS)
+		return false;
+
+	/*
+	 * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there
+	 * are other tasks on the wait list, we need to add on WAITING_BIAS.
+	 */
+	count = list_is_singular(&sem->wait_list) ?
+			RWSEM_ACTIVE_WRITE_BIAS :
+			RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
+
+	if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
+							== RWSEM_WAITING_BIAS) {
 		rwsem_set_owner(sem);
 		return true;
 	}
@@ -279,13 +309,13 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
  */
 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 {
-	long old, count = READ_ONCE(sem->count);
+	long old, count = atomic_long_read(&sem->count);
 
 	while (true) {
 		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
 			return false;
 
-		old = cmpxchg_acquire(&sem->count, count,
+		old = atomic_long_cmpxchg_acquire(&sem->count, count,
 				      count + RWSEM_ACTIVE_WRITE_BIAS);
 		if (old == count) {
 			rwsem_set_owner(sem);
@@ -306,16 +336,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 
 	rcu_read_lock();
 	owner = READ_ONCE(sem->owner);
-	if (!owner) {
-		long count = READ_ONCE(sem->count);
+	if (!rwsem_owner_is_writer(owner)) {
 		/*
-		 * If sem->owner is not set, yet we have just recently entered the
-		 * slowpath with the lock being active, then there is a possibility
-		 * reader(s) may have the lock. To be safe, bail spinning in these
-		 * situations.
+		 * Don't spin if the rwsem is readers owned.
 		 */
-		if (count & RWSEM_ACTIVE_MASK)
-			ret = false;
+		ret = !rwsem_owner_is_reader(owner);
 		goto done;
 	}
 
@@ -325,10 +350,15 @@ done:
 	return ret;
 }
 
-static noinline
-bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
+/*
+ * Return true only if we can still spin on the owner field of the rwsem.
+ */
+static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
 {
-	long count;
+	struct task_struct *owner = READ_ONCE(sem->owner);
+
+	if (!rwsem_owner_is_writer(owner))
+		goto out;
 
 	rcu_read_lock();
 	while (sem->owner == owner) {
@@ -349,22 +379,16 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
 		cpu_relax_lowlatency();
 	}
 	rcu_read_unlock();
-
-	if (READ_ONCE(sem->owner))
-		return true; /* new owner, continue spinning */
-
+out:
 	/*
-	 * When the owner is not set, the lock could be free or
-	 * held by readers. Check the counter to verify the
-	 * state.
+	 * If there is a new owner or the owner is not set, we continue
+	 * spinning.
 	 */
-	count = READ_ONCE(sem->count);
-	return (count == 0 || count == RWSEM_WAITING_BIAS);
+	return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
 }
 
 static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 {
-	struct task_struct *owner;
 	bool taken = false;
 
 	preempt_disable();
@@ -376,12 +400,17 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 	if (!osq_lock(&sem->osq))
 		goto done;
 
-	while (true) {
-		owner = READ_ONCE(sem->owner);
-		if (owner && !rwsem_spin_on_owner(sem, owner))
-			break;
-
-		/* wait_lock will be acquired if write_lock is obtained */
+	/*
+	 * Optimistically spin on the owner field and attempt to acquire the
+	 * lock whenever the owner changes. Spinning will be stopped when:
+	 *  1) the owning writer isn't running; or
+	 *  2) readers own the lock as we can't determine if they are
+	 *     actively running or not.
+	 */
+	while (rwsem_spin_on_owner(sem)) {
+		/*
+		 * Try to acquire the lock
+		 */
 		if (rwsem_try_write_lock_unqueued(sem)) {
 			taken = true;
 			break;
@@ -393,7 +422,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 		 * we're an RT task that will live-lock because we won't let
 		 * the owner complete.
 		 */
-		if (!owner && (need_resched() || rt_task(current)))
+		if (!sem->owner && (need_resched() || rt_task(current)))
 			break;
 
 		/*
@@ -440,9 +469,10 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
 	struct rw_semaphore *ret = sem;
+	WAKE_Q(wake_q);
 
 	/* undo write bias from down_write operation, stop active locking */
-	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+	count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
 
 	/* do optimistic spinning and steal lock if possible */
 	if (rwsem_optimistic_spin(sem))
@@ -465,18 +495,29 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 
 	/* we're now waiting on the lock, but no longer actively locking */
 	if (waiting) {
-		count = READ_ONCE(sem->count);
+		count = atomic_long_read(&sem->count);
 
 		/*
 		 * If there were already threads queued before us and there are
 		 * no active writers, the lock must be read owned; so we try to
 		 * wake any read locks that were queued ahead of us.
 		 */
-		if (count > RWSEM_WAITING_BIAS)
-			sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+		if (count > RWSEM_WAITING_BIAS) {
+			WAKE_Q(wake_q);
+
+			sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+			/*
+			 * The wakeup is normally called _after_ the wait_lock
+			 * is released, but given that we are proactively waking
+			 * readers we can deal with the wake_q overhead as it is
+			 * similar to releasing and taking the wait_lock again
+			 * for attempting rwsem_try_write_lock().
+			 */
+			wake_up_q(&wake_q);
+		}
 
 	} else
-		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+		count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
 
 	/* wait until we successfully acquire the lock */
 	set_current_state(state);
@@ -492,7 +533,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 
 			schedule();
 			set_current_state(state);
-		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
+		} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
@@ -507,10 +548,11 @@ out_nolock:
 	raw_spin_lock_irq(&sem->wait_lock);
 	list_del(&waiter.list);
 	if (list_empty(&sem->wait_list))
-		rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
+		atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
 	else
-		__rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 	raw_spin_unlock_irq(&sem->wait_lock);
+	wake_up_q(&wake_q);
 
 	return ERR_PTR(-EINTR);
 }
@@ -537,6 +579,7 @@ __visible
 struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
+	WAKE_Q(wake_q);
 
 	/*
 	 * If a spinner is present, it is not necessary to do the wakeup.
@@ -573,9 +616,10 @@ locked:
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+	wake_up_q(&wake_q);
 
 	return sem;
 }
@@ -590,14 +634,16 @@ __visible
 struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
+	WAKE_Q(wake_q);
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
+		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+	wake_up_q(&wake_q);
 
 	return sem;
 }
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 2e853ad93a3a..45ba475d4be3 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -22,6 +22,7 @@ void __sched down_read(struct rw_semaphore *sem)
 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
+	rwsem_set_reader_owned(sem);
 }
 
 EXPORT_SYMBOL(down_read);
@@ -33,8 +34,10 @@ int down_read_trylock(struct rw_semaphore *sem)
 {
 	int ret = __down_read_trylock(sem);
 
-	if (ret == 1)
+	if (ret == 1) {
 		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
+		rwsem_set_reader_owned(sem);
+	}
 	return ret;
 }
 
@@ -124,7 +127,7 @@ void downgrade_write(struct rw_semaphore *sem)
 	 * lockdep: a downgraded write will live on as a write
 	 * dependency.
 	 */
-	rwsem_clear_owner(sem);
+	rwsem_set_reader_owned(sem);
 	__downgrade_write(sem);
 }
 
@@ -138,6 +141,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
+	rwsem_set_reader_owned(sem);
 }
 
 EXPORT_SYMBOL(down_read_nested);
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 870ed9a5b426..a699f4048ba1 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,14 +1,58 @@
+/*
+ * The owner field of the rw_semaphore structure will be set to
+ * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
+ * the owner field when it unlocks. A reader, on the other hand, will
+ * not touch the owner field when it unlocks.
+ *
+ * In essence, the owner field now has the following 3 states:
+ *  1) 0
+ *     - lock is free or the owner hasn't set the field yet
+ *  2) RWSEM_READER_OWNED
+ *     - lock is currently or previously owned by readers (lock is free
+ *       or not set by owner yet)
+ *  3) Other non-zero value
+ *     - a writer owns the lock
+ */
+#define RWSEM_READER_OWNED	((struct task_struct *)1UL)
+
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+/*
+ * All writes to owner are protected by WRITE_ONCE() to make sure that
+ * store tearing can't happen as optimistic spinners may read and use
+ * the owner value concurrently without lock. Read from owner, however,
+ * may not need READ_ONCE() as long as the pointer value is only used
+ * for comparison and isn't being dereferenced.
+ */
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
 {
-	sem->owner = current;
+	WRITE_ONCE(sem->owner, current);
 }
 
 static inline void rwsem_clear_owner(struct rw_semaphore *sem)
 {
-	sem->owner = NULL;
+	WRITE_ONCE(sem->owner, NULL);
+}
+
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
+{
+	/*
+	 * We check the owner value first to make sure that we will only
+	 * do a write to the rwsem cacheline when it is really necessary
+	 * to minimize cacheline contention.
+	 */
+	if (sem->owner != RWSEM_READER_OWNED)
+		WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
+}
+
+static inline bool rwsem_owner_is_writer(struct task_struct *owner)
+{
+	return owner && owner != RWSEM_READER_OWNED;
 }
 
+static inline bool rwsem_owner_is_reader(struct task_struct *owner)
+{
+	return owner == RWSEM_READER_OWNED;
+}
 #else
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
 {
@@ -17,4 +61,8 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem)
 static inline void rwsem_clear_owner(struct rw_semaphore *sem)
 {
 }
+
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
+{
+}
 #endif
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index cb880a14cc39..eb4f717705ba 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,6 +1,8 @@
 
 ccflags-$(CONFIG_PM_DEBUG)	:= -DDEBUG
 
+KASAN_SANITIZE_snapshot.o	:= n
+
 obj-y				+= qos.o
 obj-$(CONFIG_PM)		+= main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP)	+= console.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index aba9c545a0e3..0e781798b0b3 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -126,17 +126,17 @@ out:
 	return ret;
 }
 
-int pm_prepare_console(void)
+void pm_prepare_console(void)
 {
 	if (!pm_vt_switch())
-		return 0;
+		return;
 
 	orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1);
 	if (orig_fgconsole < 0)
-		return 1;
+		return;
 
 	orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
-	return 0;
+	return;
 }
 
 void pm_restore_console(void)
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index fca9254280ee..a881c6a7ba74 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -52,6 +52,7 @@ enum {
 #ifdef CONFIG_SUSPEND
 	HIBERNATION_SUSPEND,
 #endif
+	HIBERNATION_TEST_RESUME,
 	/* keep last */
 	__HIBERNATION_AFTER_LAST
 };
@@ -409,6 +410,11 @@ int hibernation_snapshot(int platform_mode)
 	goto Close;
 }
 
+int __weak hibernate_resume_nonboot_cpu_disable(void)
+{
+	return disable_nonboot_cpus();
+}
+
 /**
  * resume_target_kernel - Restore system state from a hibernation image.
  * @platform_mode: Whether or not to use the platform driver.
@@ -433,7 +439,7 @@ static int resume_target_kernel(bool platform_mode)
 	if (error)
 		goto Cleanup;
 
-	error = disable_nonboot_cpus();
+	error = hibernate_resume_nonboot_cpu_disable();
 	if (error)
 		goto Enable_cpus;
 
@@ -642,12 +648,39 @@ static void power_down(void)
 		cpu_relax();
 }
 
+static int load_image_and_restore(void)
+{
+	int error;
+	unsigned int flags;
+
+	pr_debug("PM: Loading hibernation image.\n");
+
+	lock_device_hotplug();
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Unlock;
+
+	error = swsusp_read(&flags);
+	swsusp_close(FMODE_READ);
+	if (!error)
+		hibernation_restore(flags & SF_PLATFORM_MODE);
+
+	printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
+	swsusp_free();
+	free_basic_memory_bitmaps();
+ Unlock:
+	unlock_device_hotplug();
+
+	return error;
+}
+
 /**
  * hibernate - Carry out system hibernation, including saving the image.
  */
 int hibernate(void)
 {
-	int error;
+	int error, nr_calls = 0;
+	bool snapshot_test = false;
 
 	if (!hibernation_available()) {
 		pr_debug("PM: Hibernation not available.\n");
@@ -662,9 +695,11 @@ int hibernate(void)
 	}
 
 	pm_prepare_console();
-	error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
-	if (error)
+	error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls);
+	if (error) {
+		nr_calls--;
 		goto Exit;
+	}
 
 	printk(KERN_INFO "PM: Syncing filesystems ... ");
 	sys_sync();
@@ -697,8 +732,12 @@ int hibernate(void)
 		pr_debug("PM: writing image.\n");
 		error = swsusp_write(flags);
 		swsusp_free();
-		if (!error)
-			power_down();
+		if (!error) {
+			if (hibernation_mode == HIBERNATION_TEST_RESUME)
+				snapshot_test = true;
+			else
+				power_down();
+		}
 		in_suspend = 0;
 		pm_restore_gfp_mask();
 	} else {
@@ -709,12 +748,18 @@ int hibernate(void)
 	free_basic_memory_bitmaps();
  Thaw:
 	unlock_device_hotplug();
+	if (snapshot_test) {
+		pr_debug("PM: Checking hibernation image\n");
+		error = swsusp_check();
+		if (!error)
+			error = load_image_and_restore();
+	}
 	thaw_processes();
 
 	/* Don't bother checking whether freezer_test_done is true */
 	freezer_test_done = false;
  Exit:
-	pm_notifier_call_chain(PM_POST_HIBERNATION);
+	__pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL);
 	pm_restore_console();
 	atomic_inc(&snapshot_device_available);
  Unlock:
@@ -740,8 +785,7 @@ int hibernate(void)
  */
 static int software_resume(void)
 {
-	int error;
-	unsigned int flags;
+	int error, nr_calls = 0;
 
 	/*
 	 * If the user said "noresume".. bail out early.
@@ -827,35 +871,20 @@ static int software_resume(void)
 	}
 
 	pm_prepare_console();
-	error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
-	if (error)
+	error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls);
+	if (error) {
+		nr_calls--;
 		goto Close_Finish;
+	}
 
 	pr_debug("PM: Preparing processes for restore.\n");
 	error = freeze_processes();
 	if (error)
 		goto Close_Finish;
-
-	pr_debug("PM: Loading hibernation image.\n");
-
-	lock_device_hotplug();
-	error = create_basic_memory_bitmaps();
-	if (error)
-		goto Thaw;
-
-	error = swsusp_read(&flags);
-	swsusp_close(FMODE_READ);
-	if (!error)
-		hibernation_restore(flags & SF_PLATFORM_MODE);
-
-	printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
-	swsusp_free();
-	free_basic_memory_bitmaps();
- Thaw:
-	unlock_device_hotplug();
+	error = load_image_and_restore();
 	thaw_processes();
  Finish:
-	pm_notifier_call_chain(PM_POST_RESTORE);
+	__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
 	pm_restore_console();
 	atomic_inc(&snapshot_device_available);
 	/* For success case, the suspend path will release the lock */
@@ -878,6 +907,7 @@ static const char * const hibernation_modes[] = {
 #ifdef CONFIG_SUSPEND
 	[HIBERNATION_SUSPEND]	= "suspend",
 #endif
+	[HIBERNATION_TEST_RESUME]	= "test_resume",
 };
 
 /*
@@ -924,6 +954,7 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
 #ifdef CONFIG_SUSPEND
 		case HIBERNATION_SUSPEND:
 #endif
+		case HIBERNATION_TEST_RESUME:
 			break;
 		case HIBERNATION_PLATFORM:
 			if (hibernation_ops)
@@ -970,6 +1001,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 #ifdef CONFIG_SUSPEND
 		case HIBERNATION_SUSPEND:
 #endif
+		case HIBERNATION_TEST_RESUME:
 			hibernation_mode = mode;
 			break;
 		case HIBERNATION_PLATFORM:
@@ -1115,13 +1147,16 @@ static int __init resume_offset_setup(char *str)
 
 static int __init hibernate_setup(char *str)
 {
-	if (!strncmp(str, "noresume", 8))
+	if (!strncmp(str, "noresume", 8)) {
 		noresume = 1;
-	else if (!strncmp(str, "nocompress", 10))
+	} else if (!strncmp(str, "nocompress", 10)) {
 		nocompress = 1;
-	else if (!strncmp(str, "no", 2)) {
+	} else if (!strncmp(str, "no", 2)) {
 		noresume = 1;
 		nohibernate = 1;
+	} else if (IS_ENABLED(CONFIG_DEBUG_RODATA)
+		   && !strncmp(str, "protect_image", 13)) {
+		enable_restore_image_protection();
 	}
 	return 1;
 }
@@ -1154,11 +1189,6 @@ static int __init nohibernate_setup(char *str)
 	return 1;
 }
 
-static int __init kaslr_nohibernate_setup(char *str)
-{
-	return nohibernate_setup(str);
-}
-
 static int __init page_poison_nohibernate_setup(char *str)
 {
 #ifdef CONFIG_PAGE_POISONING_ZERO
@@ -1182,5 +1212,4 @@ __setup("hibernate=", hibernate_setup);
 __setup("resumewait", resumewait_setup);
 __setup("resumedelay=", resumedelay_setup);
 __setup("nohibernate", nohibernate_setup);
-__setup("kaslr", kaslr_nohibernate_setup);
 __setup("page_poison=", page_poison_nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 27946975eff0..5ea50b1b7595 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -38,12 +38,19 @@ int unregister_pm_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(unregister_pm_notifier);
 
-int pm_notifier_call_chain(unsigned long val)
+int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls)
 {
-	int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL);
+	int ret;
+
+	ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL,
+						nr_to_call, nr_calls);
 
 	return notifier_to_errno(ret);
 }
+int pm_notifier_call_chain(unsigned long val)
+{
+	return __pm_notifier_call_chain(val, -1, NULL);
+}
 
 /* If set, devices may be suspended and resumed asynchronously. */
 int pm_async_enabled = 1;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index efe1b3b17c88..242d8b827dd5 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -38,6 +38,8 @@ static inline char *check_image_kernel(struct swsusp_info *info)
 }
 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
 
+extern int hibernate_resume_nonboot_cpu_disable(void);
+
 /*
  * Keep some memory free so that I/O operations can succeed without paging
  * [Might this be more than 4 MB?]
@@ -59,6 +61,13 @@ extern int hibernation_snapshot(int platform_mode);
 extern int hibernation_restore(int platform_mode);
 extern int hibernation_platform_enter(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+/* kernel/power/snapshot.c */
+extern void enable_restore_image_protection(void);
+#else
+static inline void enable_restore_image_protection(void) {}
+#endif /* CONFIG_DEBUG_RODATA */
+
 #else /* !CONFIG_HIBERNATION */
 
 static inline void hibernate_reserved_size_init(void) {}
@@ -200,6 +209,8 @@ static inline void suspend_test_finish(const char *label) {}
 
 #ifdef CONFIG_PM_SLEEP
 /* kernel/power/main.c */
+extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call,
+				    int *nr_calls);
 extern int pm_notifier_call_chain(unsigned long val);
 #endif
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index df058bed53ce..8f27d5a8adf6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -89,6 +89,9 @@ static int try_to_freeze_tasks(bool user_only)
 		       elapsed_msecs / 1000, elapsed_msecs % 1000,
 		       todo - wq_busy, wq_busy);
 
+		if (wq_busy)
+			show_workqueue_state();
+
 		if (!wakeup) {
 			read_lock(&tasklist_lock);
 			for_each_process_thread(g, p) {
@@ -146,6 +149,18 @@ int freeze_processes(void)
 	if (!error && !oom_killer_disable())
 		error = -EBUSY;
 
+	/*
+	 * There is a hard to fix race between oom_reaper kernel thread
+	 * and oom_killer_disable. oom_reaper calls exit_oom_victim
+	 * before the victim reaches exit_mm so try to freeze all the tasks
+	 * again and catch such a left over task.
+	 */
+	if (!error) {
+		pr_info("Double checking all user space processes after OOM killer disable... ");
+		error = try_to_freeze_tasks(true);
+		pr_cont("\n");
+	}
+
 	if (error)
 		thaw_processes();
 	return error;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3a970604308f..d90df926b59f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -38,6 +38,43 @@
 
 #include "power.h"
 
+#ifdef CONFIG_DEBUG_RODATA
+static bool hibernate_restore_protection;
+static bool hibernate_restore_protection_active;
+
+void enable_restore_image_protection(void)
+{
+	hibernate_restore_protection = true;
+}
+
+static inline void hibernate_restore_protection_begin(void)
+{
+	hibernate_restore_protection_active = hibernate_restore_protection;
+}
+
+static inline void hibernate_restore_protection_end(void)
+{
+	hibernate_restore_protection_active = false;
+}
+
+static inline void hibernate_restore_protect_page(void *page_address)
+{
+	if (hibernate_restore_protection_active)
+		set_memory_ro((unsigned long)page_address, 1);
+}
+
+static inline void hibernate_restore_unprotect_page(void *page_address)
+{
+	if (hibernate_restore_protection_active)
+		set_memory_rw((unsigned long)page_address, 1);
+}
+#else
+static inline void hibernate_restore_protection_begin(void) {}
+static inline void hibernate_restore_protection_end(void) {}
+static inline void hibernate_restore_protect_page(void *page_address) {}
+static inline void hibernate_restore_unprotect_page(void *page_address) {}
+#endif /* CONFIG_DEBUG_RODATA */
+
 static int swsusp_page_is_free(struct page *);
 static void swsusp_set_page_forbidden(struct page *);
 static void swsusp_unset_page_forbidden(struct page *);
@@ -67,25 +104,32 @@ void __init hibernate_image_size_init(void)
 	image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
 }
 
-/* List of PBEs needed for restoring the pages that were allocated before
+/*
+ * List of PBEs needed for restoring the pages that were allocated before
  * the suspend and included in the suspend image, but have also been
  * allocated by the "resume" kernel, so their contents cannot be written
  * directly to their "original" page frames.
  */
 struct pbe *restore_pblist;
 
-/* Pointer to an auxiliary buffer (1 page) */
-static void *buffer;
+/* struct linked_page is used to build chains of pages */
 
-/**
- *	@safe_needed - on resume, for storing the PBE list and the image,
- *	we can only use memory pages that do not conflict with the pages
- *	used before suspend.  The unsafe pages have PageNosaveFree set
- *	and we count them using unsafe_pages.
- *
- *	Each allocated image page is marked as PageNosave and PageNosaveFree
- *	so that swsusp_free() can release it.
+#define LINKED_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(void *))
+
+struct linked_page {
+	struct linked_page *next;
+	char data[LINKED_PAGE_DATA_SIZE];
+} __packed;
+
+/*
+ * List of "safe" pages (ie. pages that were not used by the image kernel
+ * before hibernation) that may be used as temporary storage for image kernel
+ * memory contents.
  */
+static struct linked_page *safe_pages_list;
+
+/* Pointer to an auxiliary buffer (1 page) */
+static void *buffer;
 
 #define PG_ANY		0
 #define PG_SAFE		1
@@ -94,6 +138,19 @@ static void *buffer;
 
 static unsigned int allocated_unsafe_pages;
 
+/**
+ * get_image_page - Allocate a page for a hibernation image.
+ * @gfp_mask: GFP mask for the allocation.
+ * @safe_needed: Get pages that were not used before hibernation (restore only)
+ *
+ * During image restoration, for storing the PBE list and the image data, we can
+ * only use memory pages that do not conflict with the pages used before
+ * hibernation.  The "unsafe" pages have PageNosaveFree set and we count them
+ * using allocated_unsafe_pages.
+ *
+ * Each allocated image page is marked as PageNosave and PageNosaveFree so that
+ * swsusp_free() can release it.
+ */
 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
 {
 	void *res;
@@ -113,9 +170,21 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed)
 	return res;
 }
 
+static void *__get_safe_page(gfp_t gfp_mask)
+{
+	if (safe_pages_list) {
+		void *ret = safe_pages_list;
+
+		safe_pages_list = safe_pages_list->next;
+		memset(ret, 0, PAGE_SIZE);
+		return ret;
+	}
+	return get_image_page(gfp_mask, PG_SAFE);
+}
+
 unsigned long get_safe_page(gfp_t gfp_mask)
 {
-	return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
+	return (unsigned long)__get_safe_page(gfp_mask);
 }
 
 static struct page *alloc_image_page(gfp_t gfp_mask)
@@ -130,11 +199,22 @@ static struct page *alloc_image_page(gfp_t gfp_mask)
 	return page;
 }
 
+static void recycle_safe_page(void *page_address)
+{
+	struct linked_page *lp = page_address;
+
+	lp->next = safe_pages_list;
+	safe_pages_list = lp;
+}
+
 /**
- *	free_image_page - free page represented by @addr, allocated with
- *	get_image_page (page flags set by it must be cleared)
+ * free_image_page - Free a page allocated for hibernation image.
+ * @addr: Address of the page to free.
+ * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page.
+ *
+ * The page to free should have been allocated by get_image_page() (page flags
+ * set by it are affected).
  */
-
 static inline void free_image_page(void *addr, int clear_nosave_free)
 {
 	struct page *page;
@@ -150,17 +230,8 @@ static inline void free_image_page(void *addr, int clear_nosave_free)
 	__free_page(page);
 }
 
-/* struct linked_page is used to build chains of pages */
-
-#define LINKED_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(void *))
-
-struct linked_page {
-	struct linked_page *next;
-	char data[LINKED_PAGE_DATA_SIZE];
-} __packed;
-
-static inline void
-free_list_of_pages(struct linked_page *list, int clear_page_nosave)
+static inline void free_list_of_pages(struct linked_page *list,
+				      int clear_page_nosave)
 {
 	while (list) {
 		struct linked_page *lp = list->next;
@@ -170,30 +241,28 @@ free_list_of_pages(struct linked_page *list, int clear_page_nosave)
 	}
 }
 
-/**
-  *	struct chain_allocator is used for allocating small objects out of
-  *	a linked list of pages called 'the chain'.
-  *
-  *	The chain grows each time when there is no room for a new object in
-  *	the current page.  The allocated objects cannot be freed individually.
-  *	It is only possible to free them all at once, by freeing the entire
-  *	chain.
-  *
-  *	NOTE: The chain allocator may be inefficient if the allocated objects
-  *	are not much smaller than PAGE_SIZE.
-  */
-
+/*
+ * struct chain_allocator is used for allocating small objects out of
+ * a linked list of pages called 'the chain'.
+ *
+ * The chain grows each time when there is no room for a new object in
+ * the current page.  The allocated objects cannot be freed individually.
+ * It is only possible to free them all at once, by freeing the entire
+ * chain.
+ *
+ * NOTE: The chain allocator may be inefficient if the allocated objects
+ * are not much smaller than PAGE_SIZE.
+ */
 struct chain_allocator {
 	struct linked_page *chain;	/* the chain */
 	unsigned int used_space;	/* total size of objects allocated out
-					 * of the current page
-					 */
+					   of the current page */
 	gfp_t gfp_mask;		/* mask for allocating pages */
 	int safe_needed;	/* if set, only "safe" pages are allocated */
 };
 
-static void
-chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
+static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask,
+		       int safe_needed)
 {
 	ca->chain = NULL;
 	ca->used_space = LINKED_PAGE_DATA_SIZE;
@@ -208,7 +277,8 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 	if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 		struct linked_page *lp;
 
-		lp = get_image_page(ca->gfp_mask, ca->safe_needed);
+		lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) :
+					get_image_page(ca->gfp_mask, PG_ANY);
 		if (!lp)
 			return NULL;
 
@@ -222,44 +292,44 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 }
 
 /**
- *	Data types related to memory bitmaps.
+ * Data types related to memory bitmaps.
  *
- *	Memory bitmap is a structure consiting of many linked lists of
- *	objects.  The main list's elements are of type struct zone_bitmap
- *	and each of them corresonds to one zone.  For each zone bitmap
- *	object there is a list of objects of type struct bm_block that
- *	represent each blocks of bitmap in which information is stored.
+ * Memory bitmap is a structure consiting of many linked lists of
+ * objects.  The main list's elements are of type struct zone_bitmap
+ * and each of them corresonds to one zone.  For each zone bitmap
+ * object there is a list of objects of type struct bm_block that
+ * represent each blocks of bitmap in which information is stored.
  *
- *	struct memory_bitmap contains a pointer to the main list of zone
- *	bitmap objects, a struct bm_position used for browsing the bitmap,
- *	and a pointer to the list of pages used for allocating all of the
- *	zone bitmap objects and bitmap block objects.
+ * struct memory_bitmap contains a pointer to the main list of zone
+ * bitmap objects, a struct bm_position used for browsing the bitmap,
+ * and a pointer to the list of pages used for allocating all of the
+ * zone bitmap objects and bitmap block objects.
  *
- *	NOTE: It has to be possible to lay out the bitmap in memory
- *	using only allocations of order 0.  Additionally, the bitmap is
- *	designed to work with arbitrary number of zones (this is over the
- *	top for now, but let's avoid making unnecessary assumptions ;-).
+ * NOTE: It has to be possible to lay out the bitmap in memory
+ * using only allocations of order 0.  Additionally, the bitmap is
+ * designed to work with arbitrary number of zones (this is over the
+ * top for now, but let's avoid making unnecessary assumptions ;-).
  *
- *	struct zone_bitmap contains a pointer to a list of bitmap block
- *	objects and a pointer to the bitmap block object that has been
- *	most recently used for setting bits.  Additionally, it contains the
- *	pfns that correspond to the start and end of the represented zone.
+ * struct zone_bitmap contains a pointer to a list of bitmap block
+ * objects and a pointer to the bitmap block object that has been
+ * most recently used for setting bits.  Additionally, it contains the
+ * PFNs that correspond to the start and end of the represented zone.
  *
- *	struct bm_block contains a pointer to the memory page in which
- *	information is stored (in the form of a block of bitmap)
- *	It also contains the pfns that correspond to the start and end of
- *	the represented memory area.
+ * struct bm_block contains a pointer to the memory page in which
+ * information is stored (in the form of a block of bitmap)
+ * It also contains the pfns that correspond to the start and end of
+ * the represented memory area.
  *
- *	The memory bitmap is organized as a radix tree to guarantee fast random
- *	access to the bits. There is one radix tree for each zone (as returned
- *	from create_mem_extents).
+ * The memory bitmap is organized as a radix tree to guarantee fast random
+ * access to the bits. There is one radix tree for each zone (as returned
+ * from create_mem_extents).
  *
- *	One radix tree is represented by one struct mem_zone_bm_rtree. There are
- *	two linked lists for the nodes of the tree, one for the inner nodes and
- *	one for the leave nodes. The linked leave nodes are used for fast linear
- *	access of the memory bitmap.
+ * One radix tree is represented by one struct mem_zone_bm_rtree. There are
+ * two linked lists for the nodes of the tree, one for the inner nodes and
+ * one for the leave nodes. The linked leave nodes are used for fast linear
+ * access of the memory bitmap.
  *
- *	The struct rtree_node represents one node of the radix tree.
+ * The struct rtree_node represents one node of the radix tree.
  */
 
 #define BM_END_OF_MAP	(~0UL)
@@ -305,9 +375,8 @@ struct bm_position {
 struct memory_bitmap {
 	struct list_head zones;
 	struct linked_page *p_list;	/* list of pages used to store zone
-					 * bitmap objects and bitmap block
-					 * objects
-					 */
+					   bitmap objects and bitmap block
+					   objects */
 	struct bm_position cur;	/* most recently used bit position */
 };
 
@@ -321,12 +390,12 @@ struct memory_bitmap {
 #endif
 #define BM_RTREE_LEVEL_MASK	((1UL << BM_RTREE_LEVEL_SHIFT) - 1)
 
-/*
- *	alloc_rtree_node - Allocate a new node and add it to the radix tree.
+/**
+ * alloc_rtree_node - Allocate a new node and add it to the radix tree.
  *
- *	This function is used to allocate inner nodes as well as the
- *	leave nodes of the radix tree. It also adds the node to the
- *	corresponding linked list passed in by the *list parameter.
+ * This function is used to allocate inner nodes as well as the
+ * leave nodes of the radix tree. It also adds the node to the
+ * corresponding linked list passed in by the *list parameter.
  */
 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
 					   struct chain_allocator *ca,
@@ -347,12 +416,12 @@ static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
 	return node;
 }
 
-/*
- *	add_rtree_block - Add a new leave node to the radix tree
+/**
+ * add_rtree_block - Add a new leave node to the radix tree.
  *
- *	The leave nodes need to be allocated in order to keep the leaves
- *	linked list in order. This is guaranteed by the zone->blocks
- *	counter.
+ * The leave nodes need to be allocated in order to keep the leaves
+ * linked list in order. This is guaranteed by the zone->blocks
+ * counter.
  */
 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
 			   int safe_needed, struct chain_allocator *ca)
@@ -417,17 +486,18 @@ static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 			       int clear_nosave_free);
 
-/*
- *	create_zone_bm_rtree - create a radix tree for one zone
+/**
+ * create_zone_bm_rtree - Create a radix tree for one zone.
  *
- *	Allocated the mem_zone_bm_rtree structure and initializes it.
- *	This function also allocated and builds the radix tree for the
- *	zone.
+ * Allocated the mem_zone_bm_rtree structure and initializes it.
+ * This function also allocated and builds the radix tree for the
+ * zone.
  */
-static struct mem_zone_bm_rtree *
-create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed,
-		     struct chain_allocator *ca,
-		     unsigned long start, unsigned long end)
+static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask,
+						      int safe_needed,
+						      struct chain_allocator *ca,
+						      unsigned long start,
+						      unsigned long end)
 {
 	struct mem_zone_bm_rtree *zone;
 	unsigned int i, nr_blocks;
@@ -454,12 +524,12 @@ create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed,
 	return zone;
 }
 
-/*
- *	free_zone_bm_rtree - Free the memory of the radix tree
+/**
+ * free_zone_bm_rtree - Free the memory of the radix tree.
  *
- *	Free all node pages of the radix tree. The mem_zone_bm_rtree
- *	structure itself is not freed here nor are the rtree_node
- *	structs.
+ * Free all node pages of the radix tree. The mem_zone_bm_rtree
+ * structure itself is not freed here nor are the rtree_node
+ * structs.
  */
 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 			       int clear_nosave_free)
@@ -492,8 +562,8 @@ struct mem_extent {
 };
 
 /**
- *	free_mem_extents - free a list of memory extents
- *	@list - list of extents to empty
+ * free_mem_extents - Free a list of memory extents.
+ * @list: List of extents to free.
  */
 static void free_mem_extents(struct list_head *list)
 {
@@ -506,10 +576,11 @@ static void free_mem_extents(struct list_head *list)
 }
 
 /**
- *	create_mem_extents - create a list of memory extents representing
- *	                     contiguous ranges of PFNs
- *	@list - list to put the extents into
- *	@gfp_mask - mask to use for memory allocations
+ * create_mem_extents - Create a list of memory extents.
+ * @list: List to put the extents into.
+ * @gfp_mask: Mask to use for memory allocations.
+ *
+ * The extents represent contiguous ranges of PFNs.
  */
 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 {
@@ -565,10 +636,10 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 }
 
 /**
-  *	memory_bm_create - allocate memory for a memory bitmap
-  */
-static int
-memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
+ * memory_bm_create - Allocate memory for a memory bitmap.
+ */
+static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
+			    int safe_needed)
 {
 	struct chain_allocator ca;
 	struct list_head mem_extents;
@@ -607,8 +678,9 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 }
 
 /**
-  *	memory_bm_free - free memory occupied by the memory bitmap @bm
-  */
+ * memory_bm_free - Free memory occupied by the memory bitmap.
+ * @bm: Memory bitmap.
+ */
 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 {
 	struct mem_zone_bm_rtree *zone;
@@ -622,14 +694,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 }
 
 /**
- *	memory_bm_find_bit - Find the bit for pfn in the memory
- *			     bitmap
+ * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap.
  *
- *	Find the bit in the bitmap @bm that corresponds to given pfn.
- *	The cur.zone, cur.block and cur.node_pfn member of @bm are
- *	updated.
- *	It walks the radix tree to find the page which contains the bit for
- *	pfn and returns the bit position in **addr and *bit_nr.
+ * Find the bit in memory bitmap @bm that corresponds to the given PFN.
+ * The cur.zone, cur.block and cur.node_pfn members of @bm are updated.
+ *
+ * Walk the radix tree to find the page containing the bit that represents @pfn
+ * and return the position of the bit in @addr and @bit_nr.
  */
 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 			      void **addr, unsigned int *bit_nr)
@@ -658,10 +729,9 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 
 zone_found:
 	/*
-	 * We have a zone. Now walk the radix tree to find the leave
-	 * node for our pfn.
+	 * We have found the zone. Now walk the radix tree to find the leaf node
+	 * for our PFN.
 	 */
-
 	node = bm->cur.node;
 	if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
 		goto node_found;
@@ -754,14 +824,14 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 }
 
 /*
- *	rtree_next_node - Jumps to the next leave node
+ * rtree_next_node - Jump to the next leaf node.
  *
- *	Sets the position to the beginning of the next node in the
- *	memory bitmap. This is either the next node in the current
- *	zone's radix tree or the first node in the radix tree of the
- *	next zone.
+ * Set the position to the beginning of the next node in the
+ * memory bitmap. This is either the next node in the current
+ * zone's radix tree or the first node in the radix tree of the
+ * next zone.
  *
- *	Returns true if there is a next node, false otherwise.
+ * Return true if there is a next node, false otherwise.
  */
 static bool rtree_next_node(struct memory_bitmap *bm)
 {
@@ -790,14 +860,15 @@ static bool rtree_next_node(struct memory_bitmap *bm)
 }
 
 /**
- *	memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm
+ * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap.
+ * @bm: Memory bitmap.
  *
- *	Starting from the last returned position this function searches
- *	for the next set bit in the memory bitmap and returns its
- *	number. If no more bit is set BM_END_OF_MAP is returned.
+ * Starting from the last returned position this function searches for the next
+ * set bit in @bm and returns the PFN represented by it.  If no more bits are
+ * set, BM_END_OF_MAP is returned.
  *
- *	It is required to run memory_bm_position_reset() before the
- *	first call to this function.
+ * It is required to run memory_bm_position_reset() before the first call to
+ * this function for the given memory bitmap.
  */
 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 {
@@ -819,11 +890,10 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 	return BM_END_OF_MAP;
 }
 
-/**
- *	This structure represents a range of page frames the contents of which
- *	should not be saved during the suspend.
+/*
+ * This structure represents a range of page frames the contents of which
+ * should not be saved during hibernation.
  */
-
 struct nosave_region {
 	struct list_head list;
 	unsigned long start_pfn;
@@ -832,15 +902,42 @@ struct nosave_region {
 
 static LIST_HEAD(nosave_regions);
 
+static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone)
+{
+	struct rtree_node *node;
+
+	list_for_each_entry(node, &zone->nodes, list)
+		recycle_safe_page(node->data);
+
+	list_for_each_entry(node, &zone->leaves, list)
+		recycle_safe_page(node->data);
+}
+
+static void memory_bm_recycle(struct memory_bitmap *bm)
+{
+	struct mem_zone_bm_rtree *zone;
+	struct linked_page *p_list;
+
+	list_for_each_entry(zone, &bm->zones, list)
+		recycle_zone_bm_rtree(zone);
+
+	p_list = bm->p_list;
+	while (p_list) {
+		struct linked_page *lp = p_list;
+
+		p_list = lp->next;
+		recycle_safe_page(lp);
+	}
+}
+
 /**
- *	register_nosave_region - register a range of page frames the contents
- *	of which should not be saved during the suspend (to be used in the early
- *	initialization code)
+ * register_nosave_region - Register a region of unsaveable memory.
+ *
+ * Register a range of page frames the contents of which should not be saved
+ * during hibernation (to be used in the early initialization code).
  */
-
-void __init
-__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
-			 int use_kmalloc)
+void __init __register_nosave_region(unsigned long start_pfn,
+				     unsigned long end_pfn, int use_kmalloc)
 {
 	struct nosave_region *region;
 
@@ -857,12 +954,13 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
 		}
 	}
 	if (use_kmalloc) {
-		/* during init, this shouldn't fail */
+		/* During init, this shouldn't fail */
 		region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
 		BUG_ON(!region);
-	} else
+	} else {
 		/* This allocation cannot fail */
 		region = memblock_virt_alloc(sizeof(struct nosave_region), 0);
+	}
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);
@@ -923,10 +1021,12 @@ static void swsusp_unset_page_forbidden(struct page *page)
 }
 
 /**
- *	mark_nosave_pages - set bits corresponding to the page frames the
- *	contents of which should not be saved in a given bitmap.
+ * mark_nosave_pages - Mark pages that should not be saved.
+ * @bm: Memory bitmap.
+ *
+ * Set the bits in @bm that correspond to the page frames the contents of which
+ * should not be saved.
  */
-
 static void mark_nosave_pages(struct memory_bitmap *bm)
 {
 	struct nosave_region *region;
@@ -956,13 +1056,13 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
 }
 
 /**
- *	create_basic_memory_bitmaps - create bitmaps needed for marking page
- *	frames that should not be saved and free page frames.  The pointers
- *	forbidden_pages_map and free_pages_map are only modified if everything
- *	goes well, because we don't want the bits to be used before both bitmaps
- *	are set up.
+ * create_basic_memory_bitmaps - Create bitmaps to hold basic page information.
+ *
+ * Create bitmaps needed for marking page frames that should not be saved and
+ * free page frames.  The forbidden_pages_map and free_pages_map pointers are
+ * only modified if everything goes well, because we don't want the bits to be
+ * touched before both bitmaps are set up.
  */
-
 int create_basic_memory_bitmaps(void)
 {
 	struct memory_bitmap *bm1, *bm2;
@@ -1007,12 +1107,12 @@ int create_basic_memory_bitmaps(void)
 }
 
 /**
- *	free_basic_memory_bitmaps - free memory bitmaps allocated by
- *	create_basic_memory_bitmaps().  The auxiliary pointers are necessary
- *	so that the bitmaps themselves are not referred to while they are being
- *	freed.
+ * free_basic_memory_bitmaps - Free memory bitmaps holding basic information.
+ *
+ * Free memory bitmaps allocated by create_basic_memory_bitmaps().  The
+ * auxiliary pointers are necessary so that the bitmaps themselves are not
+ * referred to while they are being freed.
  */
-
 void free_basic_memory_bitmaps(void)
 {
 	struct memory_bitmap *bm1, *bm2;
@@ -1033,11 +1133,13 @@ void free_basic_memory_bitmaps(void)
 }
 
 /**
- *	snapshot_additional_pages - estimate the number of additional pages
- *	be needed for setting up the suspend image data structures for given
- *	zone (usually the returned value is greater than the exact number)
+ * snapshot_additional_pages - Estimate the number of extra pages needed.
+ * @zone: Memory zone to carry out the computation for.
+ *
+ * Estimate the number of additional pages needed for setting up a hibernation
+ * image data structures for @zone (usually, the returned value is greater than
+ * the exact number).
  */
-
 unsigned int snapshot_additional_pages(struct zone *zone)
 {
 	unsigned int rtree, nodes;
@@ -1055,10 +1157,10 @@ unsigned int snapshot_additional_pages(struct zone *zone)
 
 #ifdef CONFIG_HIGHMEM
 /**
- *	count_free_highmem_pages - compute the total number of free highmem
- *	pages, system-wide.
+ * count_free_highmem_pages - Compute the total number of free highmem pages.
+ *
+ * The returned number is system-wide.
  */
-
 static unsigned int count_free_highmem_pages(void)
 {
 	struct zone *zone;
@@ -1072,11 +1174,12 @@ static unsigned int count_free_highmem_pages(void)
 }
 
 /**
- *	saveable_highmem_page - Determine whether a highmem page should be
- *	included in the suspend image.
+ * saveable_highmem_page - Check if a highmem page is saveable.
  *
- *	We should save the page if it isn't Nosave or NosaveFree, or Reserved,
- *	and it isn't a part of a free chunk of pages.
+ * Determine whether a highmem page should be included in a hibernation image.
+ *
+ * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
+ * and it isn't part of a free chunk of pages.
  */
 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 {
@@ -1102,10 +1205,8 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 }
 
 /**
- *	count_highmem_pages - compute the total number of saveable highmem
- *	pages.
+ * count_highmem_pages - Compute the total number of saveable highmem pages.
  */
-
 static unsigned int count_highmem_pages(void)
 {
 	struct zone *zone;
@@ -1133,12 +1234,14 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
 #endif /* CONFIG_HIGHMEM */
 
 /**
- *	saveable_page - Determine whether a non-highmem page should be included
- *	in the suspend image.
+ * saveable_page - Check if the given page is saveable.
  *
- *	We should save the page if it isn't Nosave, and is not in the range
- *	of pages statically defined as 'unsaveable', and it isn't a part of
- *	a free chunk of pages.
+ * Determine whether a non-highmem page should be included in a hibernation
+ * image.
+ *
+ * We should save the page if it isn't Nosave, and is not in the range
+ * of pages statically defined as 'unsaveable', and it isn't part of
+ * a free chunk of pages.
  */
 static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 {
@@ -1167,10 +1270,8 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 }
 
 /**
- *	count_data_pages - compute the total number of saveable non-highmem
- *	pages.
+ * count_data_pages - Compute the total number of saveable non-highmem pages.
  */
-
 static unsigned int count_data_pages(void)
 {
 	struct zone *zone;
@@ -1190,7 +1291,8 @@ static unsigned int count_data_pages(void)
 	return n;
 }
 
-/* This is needed, because copy_page and memcpy are not usable for copying
+/*
+ * This is needed, because copy_page and memcpy are not usable for copying
  * task structs.
  */
 static inline void do_copy_page(long *dst, long *src)
@@ -1201,12 +1303,12 @@ static inline void do_copy_page(long *dst, long *src)
 		*dst++ = *src++;
 }
 
-
 /**
- *	safe_copy_page - check if the page we are going to copy is marked as
- *		present in the kernel page tables (this always is the case if
- *		CONFIG_DEBUG_PAGEALLOC is not set and in that case
- *		kernel_page_present() always returns 'true').
+ * safe_copy_page - Copy a page in a safe way.
+ *
+ * Check if the page we are going to copy is marked as present in the kernel
+ * page tables (this always is the case if CONFIG_DEBUG_PAGEALLOC is not set
+ * and in that case kernel_page_present() always returns 'true').
  */
 static void safe_copy_page(void *dst, struct page *s_page)
 {
@@ -1219,10 +1321,8 @@ static void safe_copy_page(void *dst, struct page *s_page)
 	}
 }
 
-
 #ifdef CONFIG_HIGHMEM
-static inline struct page *
-page_is_saveable(struct zone *zone, unsigned long pfn)
+static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn)
 {
 	return is_highmem(zone) ?
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
@@ -1243,7 +1343,8 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 		kunmap_atomic(src);
 	} else {
 		if (PageHighMem(d_page)) {
-			/* Page pointed to by src may contain some kernel
+			/*
+			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
 			safe_copy_page(buffer, s_page);
@@ -1265,8 +1366,8 @@ static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 }
 #endif /* CONFIG_HIGHMEM */
 
-static void
-copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
+static void copy_data_pages(struct memory_bitmap *copy_bm,
+			    struct memory_bitmap *orig_bm)
 {
 	struct zone *zone;
 	unsigned long pfn;
@@ -1315,12 +1416,11 @@ static struct memory_bitmap orig_bm;
 static struct memory_bitmap copy_bm;
 
 /**
- *	swsusp_free - free pages allocated for the suspend.
+ * swsusp_free - Free pages allocated for hibernation image.
  *
- *	Suspend pages are alocated before the atomic copy is made, so we
- *	need to release them after the resume.
+ * Image pages are alocated before snapshot creation, so they need to be
+ * released after resume.
  */
-
 void swsusp_free(void)
 {
 	unsigned long fb_pfn, fr_pfn;
@@ -1351,6 +1451,7 @@ loop:
 
 		memory_bm_clear_current(forbidden_pages_map);
 		memory_bm_clear_current(free_pages_map);
+		hibernate_restore_unprotect_page(page_address(page));
 		__free_page(page);
 		goto loop;
 	}
@@ -1362,6 +1463,7 @@ out:
 	buffer = NULL;
 	alloc_normal = 0;
 	alloc_highmem = 0;
+	hibernate_restore_protection_end();
 }
 
 /* Helper functions used for the shrinking of memory. */
@@ -1369,7 +1471,7 @@ out:
 #define GFP_IMAGE	(GFP_KERNEL | __GFP_NOWARN)
 
 /**
- * preallocate_image_pages - Allocate a number of pages for hibernation image
+ * preallocate_image_pages - Allocate a number of pages for hibernation image.
  * @nr_pages: Number of page frames to allocate.
  * @mask: GFP flags to use for the allocation.
  *
@@ -1419,7 +1521,7 @@ static unsigned long preallocate_image_highmem(unsigned long nr_pages)
 }
 
 /**
- *  __fraction - Compute (an approximation of) x * (multiplier / base)
+ *  __fraction - Compute (an approximation of) x * (multiplier / base).
  */
 static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
 {
@@ -1429,8 +1531,8 @@ static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
 }
 
 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
-						unsigned long highmem,
-						unsigned long total)
+						  unsigned long highmem,
+						  unsigned long total)
 {
 	unsigned long alloc = __fraction(nr_pages, highmem, total);
 
@@ -1443,15 +1545,15 @@ static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
 }
 
 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
-						unsigned long highmem,
-						unsigned long total)
+							 unsigned long highmem,
+							 unsigned long total)
 {
 	return 0;
 }
 #endif /* CONFIG_HIGHMEM */
 
 /**
- * free_unnecessary_pages - Release preallocated pages not needed for the image
+ * free_unnecessary_pages - Release preallocated pages not needed for the image.
  */
 static unsigned long free_unnecessary_pages(void)
 {
@@ -1505,7 +1607,7 @@ static unsigned long free_unnecessary_pages(void)
 }
 
 /**
- * minimum_image_size - Estimate the minimum acceptable size of an image
+ * minimum_image_size - Estimate the minimum acceptable size of an image.
  * @saveable: Number of saveable pages in the system.
  *
  * We want to avoid attempting to free too much memory too hard, so estimate the
@@ -1535,7 +1637,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
 }
 
 /**
- * hibernate_preallocate_memory - Preallocate memory for hibernation image
+ * hibernate_preallocate_memory - Preallocate memory for hibernation image.
  *
  * To create a hibernation image it is necessary to make a copy of every page
  * frame in use.  We also need a number of page frames to be free during
@@ -1708,10 +1810,11 @@ int hibernate_preallocate_memory(void)
 
 #ifdef CONFIG_HIGHMEM
 /**
-  *	count_pages_for_highmem - compute the number of non-highmem pages
-  *	that will be necessary for creating copies of highmem pages.
-  */
-
+ * count_pages_for_highmem - Count non-highmem pages needed for copying highmem.
+ *
+ * Compute the number of non-highmem pages that will be necessary for creating
+ * copies of highmem pages.
+ */
 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
 {
 	unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
@@ -1724,15 +1827,12 @@ static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
 	return nr_highmem;
 }
 #else
-static unsigned int
-count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
+static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
 #endif /* CONFIG_HIGHMEM */
 
 /**
- *	enough_free_mem - Make sure we have enough free memory for the
- *	snapshot image.
+ * enough_free_mem - Check if there is enough free memory for the image.
  */
-
 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
 {
 	struct zone *zone;
@@ -1751,10 +1851,11 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
 
 #ifdef CONFIG_HIGHMEM
 /**
- *	get_highmem_buffer - if there are some highmem pages in the suspend
- *	image, we may need the buffer to copy them and/or load their data.
+ * get_highmem_buffer - Allocate a buffer for highmem pages.
+ *
+ * If there are some highmem pages in the hibernation image, we may need a
+ * buffer to copy them and/or load their data.
  */
-
 static inline int get_highmem_buffer(int safe_needed)
 {
 	buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
@@ -1762,13 +1863,13 @@ static inline int get_highmem_buffer(int safe_needed)
 }
 
 /**
- *	alloc_highmem_image_pages - allocate some highmem pages for the image.
- *	Try to allocate as many pages as needed, but if the number of free
- *	highmem pages is lesser than that, allocate them all.
+ * alloc_highmem_image_pages - Allocate some highmem pages for the image.
+ *
+ * Try to allocate as many pages as needed, but if the number of free highmem
+ * pages is less than that, allocate them all.
  */
-
-static inline unsigned int
-alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
+static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
+					       unsigned int nr_highmem)
 {
 	unsigned int to_alloc = count_free_highmem_pages();
 
@@ -1787,25 +1888,24 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 #else
 static inline int get_highmem_buffer(int safe_needed) { return 0; }
 
-static inline unsigned int
-alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
+static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
+					       unsigned int n) { return 0; }
 #endif /* CONFIG_HIGHMEM */
 
 /**
- *	swsusp_alloc - allocate memory for the suspend image
+ * swsusp_alloc - Allocate memory for hibernation image.
  *
- *	We first try to allocate as many highmem pages as there are
- *	saveable highmem pages in the system.  If that fails, we allocate
- *	non-highmem pages for the copies of the remaining highmem ones.
+ * We first try to allocate as many highmem pages as there are
+ * saveable highmem pages in the system.  If that fails, we allocate
+ * non-highmem pages for the copies of the remaining highmem ones.
  *
- *	In this approach it is likely that the copies of highmem pages will
- *	also be located in the high memory, because of the way in which
- *	copy_data_pages() works.
+ * In this approach it is likely that the copies of highmem pages will
+ * also be located in the high memory, because of the way in which
+ * copy_data_pages() works.
  */
-
-static int
-swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
-		unsigned int nr_pages, unsigned int nr_highmem)
+static int swsusp_alloc(struct memory_bitmap *orig_bm,
+			struct memory_bitmap *copy_bm,
+			unsigned int nr_pages, unsigned int nr_highmem)
 {
 	if (nr_highmem > 0) {
 		if (get_highmem_buffer(PG_ANY))
@@ -1855,7 +1955,8 @@ asmlinkage __visible int swsusp_save(void)
 		return -ENOMEM;
 	}
 
-	/* During allocating of suspend pagedir, new cold pages may appear.
+	/*
+	 * During allocating of suspend pagedir, new cold pages may appear.
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
@@ -1918,12 +2019,14 @@ static int init_header(struct swsusp_info *info)
 }
 
 /**
- *	pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
- *	are stored in the array @buf[] (1 page at a time)
+ * pack_pfns - Prepare PFNs for saving.
+ * @bm: Memory bitmap.
+ * @buf: Memory buffer to store the PFNs in.
+ *
+ * PFNs corresponding to set bits in @bm are stored in the area of memory
+ * pointed to by @buf (1 page at a time).
  */
-
-static inline void
-pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 {
 	int j;
 
@@ -1937,22 +2040,21 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 }
 
 /**
- *	snapshot_read_next - used for reading the system memory snapshot.
+ * snapshot_read_next - Get the address to read the next image page from.
+ * @handle: Snapshot handle to be used for the reading.
  *
- *	On the first call to it @handle should point to a zeroed
- *	snapshot_handle structure.  The structure gets updated and a pointer
- *	to it should be passed to this function every next time.
+ * On the first call, @handle should point to a zeroed snapshot_handle
+ * structure.  The structure gets populated then and a pointer to it should be
+ * passed to this function every next time.
  *
- *	On success the function returns a positive number.  Then, the caller
- *	is allowed to read up to the returned number of bytes from the memory
- *	location computed by the data_of() macro.
+ * On success, the function returns a positive number.  Then, the caller
+ * is allowed to read up to the returned number of bytes from the memory
+ * location computed by the data_of() macro.
  *
- *	The function returns 0 to indicate the end of data stream condition,
- *	and a negative number is returned on error.  In such cases the
- *	structure pointed to by @handle is not updated and should not be used
- *	any more.
+ * The function returns 0 to indicate the end of the data stream condition,
+ * and negative numbers are returned on errors.  If that happens, the structure
+ * pointed to by @handle is not updated and should not be used any more.
  */
-
 int snapshot_read_next(struct snapshot_handle *handle)
 {
 	if (handle->cur > nr_meta_pages + nr_copy_pages)
@@ -1981,7 +2083,8 @@ int snapshot_read_next(struct snapshot_handle *handle)
 
 		page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
 		if (PageHighMem(page)) {
-			/* Highmem pages are copied to the buffer,
+			/*
+			 * Highmem pages are copied to the buffer,
 			 * because we can't return with a kmapped
 			 * highmem page (we may not be called again).
 			 */
@@ -1999,53 +2102,41 @@ int snapshot_read_next(struct snapshot_handle *handle)
 	return PAGE_SIZE;
 }
 
-/**
- *	mark_unsafe_pages - mark the pages that cannot be used for storing
- *	the image during resume, because they conflict with the pages that
- *	had been used before suspend
- */
-
-static int mark_unsafe_pages(struct memory_bitmap *bm)
+static void duplicate_memory_bitmap(struct memory_bitmap *dst,
+				    struct memory_bitmap *src)
 {
-	struct zone *zone;
-	unsigned long pfn, max_zone_pfn;
+	unsigned long pfn;
 
-	/* Clear page flags */
-	for_each_populated_zone(zone) {
-		max_zone_pfn = zone_end_pfn(zone);
-		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-			if (pfn_valid(pfn))
-				swsusp_unset_page_free(pfn_to_page(pfn));
+	memory_bm_position_reset(src);
+	pfn = memory_bm_next_pfn(src);
+	while (pfn != BM_END_OF_MAP) {
+		memory_bm_set_bit(dst, pfn);
+		pfn = memory_bm_next_pfn(src);
 	}
-
-	/* Mark pages that correspond to the "original" pfns as "unsafe" */
-	memory_bm_position_reset(bm);
-	do {
-		pfn = memory_bm_next_pfn(bm);
-		if (likely(pfn != BM_END_OF_MAP)) {
-			if (likely(pfn_valid(pfn)))
-				swsusp_set_page_free(pfn_to_page(pfn));
-			else
-				return -EFAULT;
-		}
-	} while (pfn != BM_END_OF_MAP);
-
-	allocated_unsafe_pages = 0;
-
-	return 0;
 }
 
-static void
-duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
+/**
+ * mark_unsafe_pages - Mark pages that were used before hibernation.
+ *
+ * Mark the pages that cannot be used for storing the image during restoration,
+ * because they conflict with the pages that had been used before hibernation.
+ */
+static void mark_unsafe_pages(struct memory_bitmap *bm)
 {
 	unsigned long pfn;
 
-	memory_bm_position_reset(src);
-	pfn = memory_bm_next_pfn(src);
+	/* Clear the "free"/"unsafe" bit for all PFNs */
+	memory_bm_position_reset(free_pages_map);
+	pfn = memory_bm_next_pfn(free_pages_map);
 	while (pfn != BM_END_OF_MAP) {
-		memory_bm_set_bit(dst, pfn);
-		pfn = memory_bm_next_pfn(src);
+		memory_bm_clear_current(free_pages_map);
+		pfn = memory_bm_next_pfn(free_pages_map);
 	}
+
+	/* Mark pages that correspond to the "original" PFNs as "unsafe" */
+	duplicate_memory_bitmap(free_pages_map, bm);
+
+	allocated_unsafe_pages = 0;
 }
 
 static int check_header(struct swsusp_info *info)
@@ -2063,11 +2154,9 @@ static int check_header(struct swsusp_info *info)
 }
 
 /**
- *	load header - check the image header and copy data from it
+ * load header - Check the image header and copy the data from it.
  */
-
-static int
-load_header(struct swsusp_info *info)
+static int load_header(struct swsusp_info *info)
 {
 	int error;
 
@@ -2081,8 +2170,12 @@ load_header(struct swsusp_info *info)
 }
 
 /**
- *	unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
- *	the corresponding bit in the memory bitmap @bm
+ * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
+ * @bm: Memory bitmap.
+ * @buf: Area of memory containing the PFNs.
+ *
+ * For each element of the array pointed to by @buf (1 page at a time), set the
+ * corresponding bit in @bm.
  */
 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 {
@@ -2095,7 +2188,7 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		/* Extract and buffer page key for data page (s390 only). */
 		page_key_memorize(buf + j);
 
-		if (memory_bm_pfn_present(bm, buf[j]))
+		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j]))
 			memory_bm_set_bit(bm, buf[j]);
 		else
 			return -EFAULT;
@@ -2104,13 +2197,9 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 	return 0;
 }
 
-/* List of "safe" pages that may be used to store data loaded from the suspend
- * image
- */
-static struct linked_page *safe_pages_list;
-
 #ifdef CONFIG_HIGHMEM
-/* struct highmem_pbe is used for creating the list of highmem pages that
+/*
+ * struct highmem_pbe is used for creating the list of highmem pages that
  * should be restored atomically during the resume from disk, because the page
  * frames they have occupied before the suspend are in use.
  */
@@ -2120,7 +2209,8 @@ struct highmem_pbe {
 	struct highmem_pbe *next;
 };
 
-/* List of highmem PBEs needed for restoring the highmem pages that were
+/*
+ * List of highmem PBEs needed for restoring the highmem pages that were
  * allocated before the suspend and included in the suspend image, but have
  * also been allocated by the "resume" kernel, so their contents cannot be
  * written directly to their "original" page frames.
@@ -2128,11 +2218,11 @@ struct highmem_pbe {
 static struct highmem_pbe *highmem_pblist;
 
 /**
- *	count_highmem_image_pages - compute the number of highmem pages in the
- *	suspend image.  The bits in the memory bitmap @bm that correspond to the
- *	image pages are assumed to be set.
+ * count_highmem_image_pages - Compute the number of highmem pages in the image.
+ * @bm: Memory bitmap.
+ *
+ * The bits in @bm that correspond to image pages are assumed to be set.
  */
-
 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
 {
 	unsigned long pfn;
@@ -2149,24 +2239,25 @@ static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
 	return cnt;
 }
 
-/**
- *	prepare_highmem_image - try to allocate as many highmem pages as
- *	there are highmem image pages (@nr_highmem_p points to the variable
- *	containing the number of highmem image pages).  The pages that are
- *	"safe" (ie. will not be overwritten when the suspend image is
- *	restored) have the corresponding bits set in @bm (it must be
- *	unitialized).
- *
- *	NOTE: This function should not be called if there are no highmem
- *	image pages.
- */
-
 static unsigned int safe_highmem_pages;
 
 static struct memory_bitmap *safe_highmem_bm;
 
-static int
-prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
+/**
+ * prepare_highmem_image - Allocate memory for loading highmem data from image.
+ * @bm: Pointer to an uninitialized memory bitmap structure.
+ * @nr_highmem_p: Pointer to the number of highmem image pages.
+ *
+ * Try to allocate as many highmem pages as there are highmem image pages
+ * (@nr_highmem_p points to the variable containing the number of highmem image
+ * pages).  The pages that are "safe" (ie. will not be overwritten when the
+ * hibernation image is restored entirely) have the corresponding bits set in
+ * @bm (it must be unitialized).
+ *
+ * NOTE: This function should not be called if there are no highmem image pages.
+ */
+static int prepare_highmem_image(struct memory_bitmap *bm,
+				 unsigned int *nr_highmem_p)
 {
 	unsigned int to_alloc;
 
@@ -2201,39 +2292,42 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
 	return 0;
 }
 
+static struct page *last_highmem_page;
+
 /**
- *	get_highmem_page_buffer - for given highmem image page find the buffer
- *	that suspend_write_next() should set for its caller to write to.
+ * get_highmem_page_buffer - Prepare a buffer to store a highmem image page.
  *
- *	If the page is to be saved to its "original" page frame or a copy of
- *	the page is to be made in the highmem, @buffer is returned.  Otherwise,
- *	the copy of the page is to be made in normal memory, so the address of
- *	the copy is returned.
+ * For a given highmem image page get a buffer that suspend_write_next() should
+ * return to its caller to write to.
  *
- *	If @buffer is returned, the caller of suspend_write_next() will write
- *	the page's contents to @buffer, so they will have to be copied to the
- *	right location on the next call to suspend_write_next() and it is done
- *	with the help of copy_last_highmem_page().  For this purpose, if
- *	@buffer is returned, @last_highmem page is set to the page to which
- *	the data will have to be copied from @buffer.
+ * If the page is to be saved to its "original" page frame or a copy of
+ * the page is to be made in the highmem, @buffer is returned.  Otherwise,
+ * the copy of the page is to be made in normal memory, so the address of
+ * the copy is returned.
+ *
+ * If @buffer is returned, the caller of suspend_write_next() will write
+ * the page's contents to @buffer, so they will have to be copied to the
+ * right location on the next call to suspend_write_next() and it is done
+ * with the help of copy_last_highmem_page().  For this purpose, if
+ * @buffer is returned, @last_highmem_page is set to the page to which
+ * the data will have to be copied from @buffer.
  */
-
-static struct page *last_highmem_page;
-
-static void *
-get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+static void *get_highmem_page_buffer(struct page *page,
+				     struct chain_allocator *ca)
 {
 	struct highmem_pbe *pbe;
 	void *kaddr;
 
 	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
-		/* We have allocated the "original" page frame and we can
+		/*
+		 * We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
 		last_highmem_page = page;
 		return buffer;
 	}
-	/* The "original" page frame has not been allocated and we have to
+	/*
+	 * The "original" page frame has not been allocated and we have to
 	 * use a "safe" page frame to store the loaded page.
 	 */
 	pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
@@ -2263,11 +2357,12 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
 }
 
 /**
- *	copy_last_highmem_page - copy the contents of a highmem image from
- *	@buffer, where the caller of snapshot_write_next() has place them,
- *	to the right location represented by @last_highmem_page .
+ * copy_last_highmem_page - Copy most the most recent highmem image page.
+ *
+ * Copy the contents of a highmem image from @buffer, where the caller of
+ * snapshot_write_next() has stored them, to the right location represented by
+ * @last_highmem_page .
  */
-
 static void copy_last_highmem_page(void)
 {
 	if (last_highmem_page) {
@@ -2294,17 +2389,13 @@ static inline void free_highmem_data(void)
 		free_image_page(buffer, PG_UNSAFE_CLEAR);
 }
 #else
-static unsigned int
-count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
+static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
 
-static inline int
-prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
-{
-	return 0;
-}
+static inline int prepare_highmem_image(struct memory_bitmap *bm,
+					unsigned int *nr_highmem_p) { return 0; }
 
-static inline void *
-get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+static inline void *get_highmem_page_buffer(struct page *page,
+					    struct chain_allocator *ca)
 {
 	return ERR_PTR(-EINVAL);
 }
@@ -2314,27 +2405,27 @@ static inline int last_highmem_page_copied(void) { return 1; }
 static inline void free_highmem_data(void) {}
 #endif /* CONFIG_HIGHMEM */
 
+#define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
+
 /**
- *	prepare_image - use the memory bitmap @bm to mark the pages that will
- *	be overwritten in the process of restoring the system memory state
- *	from the suspend image ("unsafe" pages) and allocate memory for the
- *	image.
+ * prepare_image - Make room for loading hibernation image.
+ * @new_bm: Unitialized memory bitmap structure.
+ * @bm: Memory bitmap with unsafe pages marked.
+ *
+ * Use @bm to mark the pages that will be overwritten in the process of
+ * restoring the system memory state from the suspend image ("unsafe" pages)
+ * and allocate memory for the image.
  *
- *	The idea is to allocate a new memory bitmap first and then allocate
- *	as many pages as needed for the image data, but not to assign these
- *	pages to specific tasks initially.  Instead, we just mark them as
- *	allocated and create a lists of "safe" pages that will be used
- *	later.  On systems with high memory a list of "safe" highmem pages is
- *	also created.
+ * The idea is to allocate a new memory bitmap first and then allocate
+ * as many pages as needed for image data, but without specifying what those
+ * pages will be used for just yet.  Instead, we mark them all as allocated and
+ * create a lists of "safe" pages to be used later.  On systems with high
+ * memory a list of "safe" highmem pages is created too.
  */
-
-#define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
-
-static int
-prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 {
 	unsigned int nr_pages, nr_highmem;
-	struct linked_page *sp_list, *lp;
+	struct linked_page *lp;
 	int error;
 
 	/* If there is no highmem, the buffer will not be necessary */
@@ -2342,9 +2433,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	buffer = NULL;
 
 	nr_highmem = count_highmem_image_pages(bm);
-	error = mark_unsafe_pages(bm);
-	if (error)
-		goto Free;
+	mark_unsafe_pages(bm);
 
 	error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
 	if (error)
@@ -2357,14 +2446,15 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		if (error)
 			goto Free;
 	}
-	/* Reserve some safe pages for potential later use.
+	/*
+	 * Reserve some safe pages for potential later use.
 	 *
 	 * NOTE: This way we make sure there will be enough safe pages for the
 	 * chain_alloc() in get_buffer().  It is a bit wasteful, but
 	 * nr_copy_pages cannot be greater than 50% of the memory anyway.
+	 *
+	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
 	 */
-	sp_list = NULL;
-	/* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
 	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
@@ -2373,12 +2463,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 			error = -ENOMEM;
 			goto Free;
 		}
-		lp->next = sp_list;
-		sp_list = lp;
+		lp->next = safe_pages_list;
+		safe_pages_list = lp;
 		nr_pages--;
 	}
 	/* Preallocate memory for the image */
-	safe_pages_list = NULL;
 	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
@@ -2396,12 +2485,6 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		swsusp_set_page_free(virt_to_page(lp));
 		nr_pages--;
 	}
-	/* Free the reserved safe pages so that chain_alloc() can use them */
-	while (sp_list) {
-		lp = sp_list->next;
-		free_image_page(sp_list, PG_UNSAFE_CLEAR);
-		sp_list = lp;
-	}
 	return 0;
 
  Free:
@@ -2410,10 +2493,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 }
 
 /**
- *	get_buffer - compute the address that snapshot_write_next() should
- *	set for its caller to write to.
+ * get_buffer - Get the address to store the next image data page.
+ *
+ * Get the address that snapshot_write_next() should return to its caller to
+ * write to.
  */
-
 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 {
 	struct pbe *pbe;
@@ -2428,12 +2512,14 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 		return get_highmem_page_buffer(page, ca);
 
 	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
-		/* We have allocated the "original" page frame and we can
+		/*
+		 * We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
 		return page_address(page);
 
-	/* The "original" page frame has not been allocated and we have to
+	/*
+	 * The "original" page frame has not been allocated and we have to
 	 * use a "safe" page frame to store the loaded page.
 	 */
 	pbe = chain_alloc(ca, sizeof(struct pbe));
@@ -2450,22 +2536,21 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 }
 
 /**
- *	snapshot_write_next - used for writing the system memory snapshot.
+ * snapshot_write_next - Get the address to store the next image page.
+ * @handle: Snapshot handle structure to guide the writing.
  *
- *	On the first call to it @handle should point to a zeroed
- *	snapshot_handle structure.  The structure gets updated and a pointer
- *	to it should be passed to this function every next time.
+ * On the first call, @handle should point to a zeroed snapshot_handle
+ * structure.  The structure gets populated then and a pointer to it should be
+ * passed to this function every next time.
  *
- *	On success the function returns a positive number.  Then, the caller
- *	is allowed to write up to the returned number of bytes to the memory
- *	location computed by the data_of() macro.
+ * On success, the function returns a positive number.  Then, the caller
+ * is allowed to write up to the returned number of bytes to the memory
+ * location computed by the data_of() macro.
  *
- *	The function returns 0 to indicate the "end of file" condition,
- *	and a negative number is returned on error.  In such cases the
- *	structure pointed to by @handle is not updated and should not be used
- *	any more.
+ * The function returns 0 to indicate the "end of file" condition.  Negative
+ * numbers are returned on errors, in which cases the structure pointed to by
+ * @handle is not updated and should not be used any more.
  */
-
 int snapshot_write_next(struct snapshot_handle *handle)
 {
 	static struct chain_allocator ca;
@@ -2491,6 +2576,8 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		safe_pages_list = NULL;
+
 		error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
 		if (error)
 			return error;
@@ -2500,6 +2587,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
 		error = unpack_orig_pfns(buffer, &copy_bm);
 		if (error)
@@ -2522,6 +2610,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		copy_last_highmem_page();
 		/* Restore page key for data page (s390 only). */
 		page_key_write(handle->buffer);
+		hibernate_restore_protect_page(handle->buffer);
 		handle->buffer = get_buffer(&orig_bm, &ca);
 		if (IS_ERR(handle->buffer))
 			return PTR_ERR(handle->buffer);
@@ -2533,22 +2622,23 @@ int snapshot_write_next(struct snapshot_handle *handle)
 }
 
 /**
- *	snapshot_write_finalize - must be called after the last call to
- *	snapshot_write_next() in case the last page in the image happens
- *	to be a highmem page and its contents should be stored in the
- *	highmem.  Additionally, it releases the memory that will not be
- *	used any more.
+ * snapshot_write_finalize - Complete the loading of a hibernation image.
+ *
+ * Must be called after the last call to snapshot_write_next() in case the last
+ * page in the image happens to be a highmem page and its contents should be
+ * stored in highmem.  Additionally, it recycles bitmap memory that's not
+ * necessary any more.
  */
-
 void snapshot_write_finalize(struct snapshot_handle *handle)
 {
 	copy_last_highmem_page();
 	/* Restore page key for data page (s390 only). */
 	page_key_write(handle->buffer);
 	page_key_free();
-	/* Free only if we have loaded the image entirely */
+	hibernate_restore_protect_page(handle->buffer);
+	/* Do that only if we have loaded the image entirely */
 	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
-		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
+		memory_bm_recycle(&orig_bm);
 		free_highmem_data();
 	}
 }
@@ -2561,8 +2651,8 @@ int snapshot_image_loaded(struct snapshot_handle *handle)
 
 #ifdef CONFIG_HIGHMEM
 /* Assumes that @buf is ready and points to a "safe" page */
-static inline void
-swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
+static inline void swap_two_pages_data(struct page *p1, struct page *p2,
+				       void *buf)
 {
 	void *kaddr1, *kaddr2;
 
@@ -2576,15 +2666,15 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
 }
 
 /**
- *	restore_highmem - for each highmem page that was allocated before
- *	the suspend and included in the suspend image, and also has been
- *	allocated by the "resume" kernel swap its current (ie. "before
- *	resume") contents with the previous (ie. "before suspend") one.
+ * restore_highmem - Put highmem image pages into their original locations.
+ *
+ * For each highmem page that was in use before hibernation and is included in
+ * the image, and also has been allocated by the "restore" kernel, swap its
+ * current contents with the previous (ie. "before hibernation") ones.
  *
- *	If the resume eventually fails, we can call this function once
- *	again and restore the "before resume" highmem state.
+ * If the restore eventually fails, we can call this function once again and
+ * restore the highmem state as seen by the restore kernel.
  */
-
 int restore_highmem(void)
 {
 	struct highmem_pbe *pbe = highmem_pblist;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 5b70d64b871e..0acab9d7f96f 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -266,16 +266,18 @@ static int suspend_test(int level)
  */
 static int suspend_prepare(suspend_state_t state)
 {
-	int error;
+	int error, nr_calls = 0;
 
 	if (!sleep_state_supported(state))
 		return -EPERM;
 
 	pm_prepare_console();
 
-	error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
-	if (error)
+	error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls);
+	if (error) {
+		nr_calls--;
 		goto Finish;
+	}
 
 	trace_suspend_resume(TPS("freeze_processes"), 0, true);
 	error = suspend_freeze_processes();
@@ -286,7 +288,7 @@ static int suspend_prepare(suspend_state_t state)
 	suspend_stats.failed_freeze++;
 	dpm_save_failed_step(SUSPEND_FREEZE);
  Finish:
-	pm_notifier_call_chain(PM_POST_SUSPEND);
+	__pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL);
 	pm_restore_console();
 	return error;
 }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 160e1006640d..a3b1e617bcdc 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -261,7 +261,7 @@ static void hib_end_io(struct bio *bio)
 	bio_put(bio);
 }
 
-static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
+static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
 		struct hib_bio_batch *hb)
 {
 	struct page *page = virt_to_page(addr);
@@ -271,6 +271,7 @@ static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
 	bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
 	bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
 	bio->bi_bdev = hib_resume_bdev;
+	bio_set_op_attrs(bio, op, op_flags);
 
 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 		printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
@@ -283,9 +284,9 @@ static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
 		bio->bi_end_io = hib_end_io;
 		bio->bi_private = hb;
 		atomic_inc(&hb->count);
-		submit_bio(rw, bio);
+		submit_bio(bio);
 	} else {
-		error = submit_bio_wait(rw, bio);
+		error = submit_bio_wait(bio);
 		bio_put(bio);
 	}
 
@@ -306,7 +307,8 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
 {
 	int error;
 
-	hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
+	hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block,
+		      swsusp_header, NULL);
 	if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
 	    !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
 		memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
@@ -315,8 +317,8 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
 		swsusp_header->flags = flags;
 		if (flags & SF_CRC32_MODE)
 			swsusp_header->crc32 = handle->crc32;
-		error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
-					swsusp_header, NULL);
+		error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC,
+				      swsusp_resume_block, swsusp_header, NULL);
 	} else {
 		printk(KERN_ERR "PM: Swap header not found!\n");
 		error = -ENODEV;
@@ -348,6 +350,12 @@ static int swsusp_swap_check(void)
 	if (res < 0)
 		blkdev_put(hib_resume_bdev, FMODE_WRITE);
 
+	/*
+	 * Update the resume device to the one actually used,
+	 * so the test_resume mode can use it in case it is
+	 * invoked from hibernate() to test the snapshot.
+	 */
+	swsusp_resume_device = hib_resume_bdev->bd_dev;
 	return res;
 }
 
@@ -389,7 +397,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
 	} else {
 		src = buf;
 	}
-	return hib_submit_io(WRITE_SYNC, offset, src, hb);
+	return hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, offset, src, hb);
 }
 
 static void release_swap_writer(struct swap_map_handle *handle)
@@ -992,7 +1000,8 @@ static int get_swap_reader(struct swap_map_handle *handle,
 			return -ENOMEM;
 		}
 
-		error = hib_submit_io(READ_SYNC, offset, tmp->map, NULL);
+		error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset,
+				      tmp->map, NULL);
 		if (error) {
 			release_swap_reader(handle);
 			return error;
@@ -1016,7 +1025,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
 	offset = handle->cur->entries[handle->k];
 	if (!offset)
 		return -EFAULT;
-	error = hib_submit_io(READ_SYNC, offset, buf, hb);
+	error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset, buf, hb);
 	if (error)
 		return error;
 	if (++handle->k >= MAP_PAGE_ENTRIES) {
@@ -1525,7 +1534,8 @@ int swsusp_check(void)
 	if (!IS_ERR(hib_resume_bdev)) {
 		set_blocksize(hib_resume_bdev, PAGE_SIZE);
 		clear_page(swsusp_header);
-		error = hib_submit_io(READ_SYNC, swsusp_resume_block,
+		error = hib_submit_io(REQ_OP_READ, READ_SYNC,
+					swsusp_resume_block,
 					swsusp_header, NULL);
 		if (error)
 			goto put;
@@ -1533,7 +1543,8 @@ int swsusp_check(void)
 		if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
 			memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
 			/* Reset swap signature now */
-			error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
+			error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC,
+						swsusp_resume_block,
 						swsusp_header, NULL);
 		} else {
 			error = -EINVAL;
@@ -1577,10 +1588,12 @@ int swsusp_unmark(void)
 {
 	int error;
 
-	hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
+	hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block,
+		      swsusp_header, NULL);
 	if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
 		memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
-		error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
+		error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC,
+					swsusp_resume_block,
 					swsusp_header, NULL);
 	} else {
 		printk(KERN_ERR "PM: Cannot find swsusp signature!\n");
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 526e8911460a..35310b627388 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -47,7 +47,7 @@ atomic_t snapshot_device_available = ATOMIC_INIT(1);
 static int snapshot_open(struct inode *inode, struct file *filp)
 {
 	struct snapshot_data *data;
-	int error;
+	int error, nr_calls = 0;
 
 	if (!hibernation_available())
 		return -EPERM;
@@ -74,9 +74,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 			swap_type_of(swsusp_resume_device, 0, NULL) : -1;
 		data->mode = O_RDONLY;
 		data->free_bitmaps = false;
-		error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+		error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls);
 		if (error)
-			pm_notifier_call_chain(PM_POST_HIBERNATION);
+			__pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL);
 	} else {
 		/*
 		 * Resuming.  We may need to wait for the image device to
@@ -86,13 +86,15 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 
 		data->swap = -1;
 		data->mode = O_WRONLY;
-		error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+		error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls);
 		if (!error) {
 			error = create_basic_memory_bitmaps();
 			data->free_bitmaps = !error;
-		}
+		} else
+			nr_calls--;
+
 		if (error)
-			pm_notifier_call_chain(PM_POST_RESTORE);
+			__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
 	}
 	if (error)
 		atomic_inc(&snapshot_device_available);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 3cee0d8393ed..d38ab08a3fe7 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -58,7 +58,7 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
 #define VERBOSE_PERFOUT_ERRSTRING(s) \
 	do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
 
-torture_param(bool, gp_exp, true, "Use expedited GP wait primitives");
+torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
 torture_param(int, nreaders, -1, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
@@ -96,12 +96,7 @@ static int rcu_perf_writer_state;
 #define MAX_MEAS 10000
 #define MIN_MEAS 100
 
-#if defined(MODULE) || defined(CONFIG_RCU_PERF_TEST_RUNNABLE)
-#define RCUPERF_RUNNABLE_INIT 1
-#else
-#define RCUPERF_RUNNABLE_INIT 0
-#endif
-static int perf_runnable = RCUPERF_RUNNABLE_INIT;
+static int perf_runnable = IS_ENABLED(MODULE);
 module_param(perf_runnable, int, 0444);
 MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot");
 
@@ -363,8 +358,6 @@ rcu_perf_writer(void *arg)
 	u64 *wdpp = writer_durations[me];
 
 	VERBOSE_PERFOUT_STRING("rcu_perf_writer task started");
-	WARN_ON(rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp);
-	WARN_ON(rcu_gp_is_normal() && gp_exp);
 	WARN_ON(!wdpp);
 	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
 	sp.sched_priority = 1;
@@ -631,12 +624,24 @@ rcu_perf_init(void)
 		firsterr = -ENOMEM;
 		goto unwind;
 	}
+	if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) {
+		VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
+		firsterr = -EINVAL;
+		goto unwind;
+	}
+	if (rcu_gp_is_normal() && gp_exp) {
+		VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
+		firsterr = -EINVAL;
+		goto unwind;
+	}
 	for (i = 0; i < nrealwriters; i++) {
 		writer_durations[i] =
 			kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
 				GFP_KERNEL);
-		if (!writer_durations[i])
+		if (!writer_durations[i]) {
+			firsterr = -ENOMEM;
 			goto unwind;
+		}
 		firsterr = torture_create_kthread(rcu_perf_writer, (void *)i,
 						  writer_tasks[i]);
 		if (firsterr)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 084a28a732eb..971e2b138063 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -182,12 +182,7 @@ static const char *rcu_torture_writer_state_getname(void)
 	return rcu_torture_writer_state_names[i];
 }
 
-#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
-#define RCUTORTURE_RUNNABLE_INIT 1
-#else
-#define RCUTORTURE_RUNNABLE_INIT 0
-#endif
-static int torture_runnable = RCUTORTURE_RUNNABLE_INIT;
+static int torture_runnable = IS_ENABLED(MODULE);
 module_param(torture_runnable, int, 0444);
 MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
 
@@ -1476,7 +1471,7 @@ static int rcu_torture_barrier_cbs(void *arg)
 			break;
 		/*
 		 * The above smp_load_acquire() ensures barrier_phase load
-		 * is ordered before the folloiwng ->call().
+		 * is ordered before the following ->call().
 		 */
 		local_irq_disable(); /* Just to test no-irq call_rcu(). */
 		cur_ops->call(&rcu, rcu_torture_barrier_cbf);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c7f1bc4f817c..f433959e9322 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -125,12 +125,14 @@ int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
 /* Number of rcu_nodes at specified level. */
 static int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
 int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
+/* panic() on RCU Stall sysctl. */
+int sysctl_panic_on_rcu_stall __read_mostly;
 
 /*
  * The rcu_scheduler_active variable transitions from zero to one just
  * before the first task is spawned.  So when this variable is zero, RCU
  * can assume that there is but one task, allowing RCU to (for example)
- * optimize synchronize_sched() to a simple barrier().  When this variable
+ * optimize synchronize_rcu() to a simple barrier().  When this variable
  * is one, RCU must actually do all the hard work required to detect real
  * grace periods.  This variable is also used to suppress boot-time false
  * positives from lockdep-RCU error checking.
@@ -159,6 +161,7 @@ static void invoke_rcu_core(void);
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
 static void rcu_report_exp_rdp(struct rcu_state *rsp,
 			       struct rcu_data *rdp, bool wake);
+static void sync_sched_exp_online_cleanup(int cpu);
 
 /* rcuc/rcub kthread realtime priority */
 #ifdef CONFIG_RCU_KTHREAD_PRIO
@@ -1284,9 +1287,9 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
 	rcu_for_each_leaf_node(rsp, rnp) {
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->qsmask != 0) {
-			for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
-				if (rnp->qsmask & (1UL << cpu))
-					dump_cpu_task(rnp->grplo + cpu);
+			for_each_leaf_node_possible_cpu(rnp, cpu)
+				if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
+					dump_cpu_task(cpu);
 		}
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
@@ -1311,6 +1314,12 @@ static void rcu_stall_kick_kthreads(struct rcu_state *rsp)
 	}
 }
 
+static inline void panic_on_rcu_stall(void)
+{
+	if (sysctl_panic_on_rcu_stall)
+		panic("RCU Stall\n");
+}
+
 static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 {
 	int cpu;
@@ -1351,10 +1360,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		ndetected += rcu_print_task_stall(rnp);
 		if (rnp->qsmask != 0) {
-			for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
-				if (rnp->qsmask & (1UL << cpu)) {
-					print_cpu_stall_info(rsp,
-							     rnp->grplo + cpu);
+			for_each_leaf_node_possible_cpu(rnp, cpu)
+				if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
+					print_cpu_stall_info(rsp, cpu);
 					ndetected++;
 				}
 		}
@@ -1390,6 +1398,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 
 	rcu_check_gp_kthread_starvation(rsp);
 
+	panic_on_rcu_stall();
+
 	force_quiescent_state(rsp);  /* Kick them all. */
 }
 
@@ -1430,6 +1440,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
 			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
+	panic_on_rcu_stall();
+
 	/*
 	 * Attempt to revive the RCU machinery by forcing a context switch.
 	 *
@@ -1989,8 +2001,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
 	 * of the tree within the rsp->node[] array.  Note that other CPUs
 	 * will access only the leaves of the hierarchy, thus seeing that no
 	 * grace period is in progress, at least until the corresponding
-	 * leaf node has been initialized.  In addition, we have excluded
-	 * CPU-hotplug operations.
+	 * leaf node has been initialized.
 	 *
 	 * The grace period cannot complete until the initialization
 	 * process finishes, because this kthread handles both.
@@ -2872,7 +2883,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
 				  unsigned long *maxj),
 			 bool *isidle, unsigned long *maxj)
 {
-	unsigned long bit;
 	int cpu;
 	unsigned long flags;
 	unsigned long mask;
@@ -2907,9 +2917,8 @@ static void force_qs_rnp(struct rcu_state *rsp,
 				continue;
 			}
 		}
-		cpu = rnp->grplo;
-		bit = 1;
-		for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
+		for_each_leaf_node_possible_cpu(rnp, cpu) {
+			unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
 			if ((rnp->qsmask & bit) != 0) {
 				if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
 					mask |= bit;
@@ -3448,549 +3457,6 @@ static bool rcu_seq_done(unsigned long *sp, unsigned long s)
 	return ULONG_CMP_GE(READ_ONCE(*sp), s);
 }
 
-/* Wrapper functions for expedited grace periods.  */
-static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
-{
-	rcu_seq_start(&rsp->expedited_sequence);
-}
-static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
-{
-	rcu_seq_end(&rsp->expedited_sequence);
-	smp_mb(); /* Ensure that consecutive grace periods serialize. */
-}
-static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
-{
-	unsigned long s;
-
-	smp_mb(); /* Caller's modifications seen first by other CPUs. */
-	s = rcu_seq_snap(&rsp->expedited_sequence);
-	trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
-	return s;
-}
-static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
-{
-	return rcu_seq_done(&rsp->expedited_sequence, s);
-}
-
-/*
- * Reset the ->expmaskinit values in the rcu_node tree to reflect any
- * recent CPU-online activity.  Note that these masks are not cleared
- * when CPUs go offline, so they reflect the union of all CPUs that have
- * ever been online.  This means that this function normally takes its
- * no-work-to-do fastpath.
- */
-static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
-{
-	bool done;
-	unsigned long flags;
-	unsigned long mask;
-	unsigned long oldmask;
-	int ncpus = READ_ONCE(rsp->ncpus);
-	struct rcu_node *rnp;
-	struct rcu_node *rnp_up;
-
-	/* If no new CPUs onlined since last time, nothing to do. */
-	if (likely(ncpus == rsp->ncpus_snap))
-		return;
-	rsp->ncpus_snap = ncpus;
-
-	/*
-	 * Each pass through the following loop propagates newly onlined
-	 * CPUs for the current rcu_node structure up the rcu_node tree.
-	 */
-	rcu_for_each_leaf_node(rsp, rnp) {
-		raw_spin_lock_irqsave_rcu_node(rnp, flags);
-		if (rnp->expmaskinit == rnp->expmaskinitnext) {
-			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-			continue;  /* No new CPUs, nothing to do. */
-		}
-
-		/* Update this node's mask, track old value for propagation. */
-		oldmask = rnp->expmaskinit;
-		rnp->expmaskinit = rnp->expmaskinitnext;
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-
-		/* If was already nonzero, nothing to propagate. */
-		if (oldmask)
-			continue;
-
-		/* Propagate the new CPU up the tree. */
-		mask = rnp->grpmask;
-		rnp_up = rnp->parent;
-		done = false;
-		while (rnp_up) {
-			raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
-			if (rnp_up->expmaskinit)
-				done = true;
-			rnp_up->expmaskinit |= mask;
-			raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
-			if (done)
-				break;
-			mask = rnp_up->grpmask;
-			rnp_up = rnp_up->parent;
-		}
-	}
-}
-
-/*
- * Reset the ->expmask values in the rcu_node tree in preparation for
- * a new expedited grace period.
- */
-static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
-{
-	unsigned long flags;
-	struct rcu_node *rnp;
-
-	sync_exp_reset_tree_hotplug(rsp);
-	rcu_for_each_node_breadth_first(rsp, rnp) {
-		raw_spin_lock_irqsave_rcu_node(rnp, flags);
-		WARN_ON_ONCE(rnp->expmask);
-		rnp->expmask = rnp->expmaskinit;
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	}
-}
-
-/*
- * Return non-zero if there is no RCU expedited grace period in progress
- * for the specified rcu_node structure, in other words, if all CPUs and
- * tasks covered by the specified rcu_node structure have done their bit
- * for the current expedited grace period.  Works only for preemptible
- * RCU -- other RCU implementation use other means.
- *
- * Caller must hold the rcu_state's exp_mutex.
- */
-static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
-{
-	return rnp->exp_tasks == NULL &&
-	       READ_ONCE(rnp->expmask) == 0;
-}
-
-/*
- * Report the exit from RCU read-side critical section for the last task
- * that queued itself during or before the current expedited preemptible-RCU
- * grace period.  This event is reported either to the rcu_node structure on
- * which the task was queued or to one of that rcu_node structure's ancestors,
- * recursively up the tree.  (Calm down, calm down, we do the recursion
- * iteratively!)
- *
- * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
- * structure's ->lock.
- */
-static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
-				 bool wake, unsigned long flags)
-	__releases(rnp->lock)
-{
-	unsigned long mask;
-
-	for (;;) {
-		if (!sync_rcu_preempt_exp_done(rnp)) {
-			if (!rnp->expmask)
-				rcu_initiate_boost(rnp, flags);
-			else
-				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-			break;
-		}
-		if (rnp->parent == NULL) {
-			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-			if (wake) {
-				smp_mb(); /* EGP done before wake_up(). */
-				swake_up(&rsp->expedited_wq);
-			}
-			break;
-		}
-		mask = rnp->grpmask;
-		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
-		rnp = rnp->parent;
-		raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
-		WARN_ON_ONCE(!(rnp->expmask & mask));
-		rnp->expmask &= ~mask;
-	}
-}
-
-/*
- * Report expedited quiescent state for specified node.  This is a
- * lock-acquisition wrapper function for __rcu_report_exp_rnp().
- *
- * Caller must hold the rcu_state's exp_mutex.
- */
-static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
-					      struct rcu_node *rnp, bool wake)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	__rcu_report_exp_rnp(rsp, rnp, wake, flags);
-}
-
-/*
- * Report expedited quiescent state for multiple CPUs, all covered by the
- * specified leaf rcu_node structure.  Caller must hold the rcu_state's
- * exp_mutex.
- */
-static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
-				    unsigned long mask, bool wake)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	if (!(rnp->expmask & mask)) {
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-		return;
-	}
-	rnp->expmask &= ~mask;
-	__rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
-}
-
-/*
- * Report expedited quiescent state for specified rcu_data (CPU).
- */
-static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
-			       bool wake)
-{
-	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
-}
-
-/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
-static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
-			       unsigned long s)
-{
-	if (rcu_exp_gp_seq_done(rsp, s)) {
-		trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
-		/* Ensure test happens before caller kfree(). */
-		smp_mb__before_atomic(); /* ^^^ */
-		atomic_long_inc(stat);
-		return true;
-	}
-	return false;
-}
-
-/*
- * Funnel-lock acquisition for expedited grace periods.  Returns true
- * if some other task completed an expedited grace period that this task
- * can piggy-back on, and with no mutex held.  Otherwise, returns false
- * with the mutex held, indicating that the caller must actually do the
- * expedited grace period.
- */
-static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
-{
-	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
-	struct rcu_node *rnp = rdp->mynode;
-	struct rcu_node *rnp_root = rcu_get_root(rsp);
-
-	/* Low-contention fastpath. */
-	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
-	    (rnp == rnp_root ||
-	     ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
-	    !mutex_is_locked(&rsp->exp_mutex) &&
-	    mutex_trylock(&rsp->exp_mutex))
-		goto fastpath;
-
-	/*
-	 * Each pass through the following loop works its way up
-	 * the rcu_node tree, returning if others have done the work or
-	 * otherwise falls through to acquire rsp->exp_mutex.  The mapping
-	 * from CPU to rcu_node structure can be inexact, as it is just
-	 * promoting locality and is not strictly needed for correctness.
-	 */
-	for (; rnp != NULL; rnp = rnp->parent) {
-		if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
-			return true;
-
-		/* Work not done, either wait here or go up. */
-		spin_lock(&rnp->exp_lock);
-		if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
-
-			/* Someone else doing GP, so wait for them. */
-			spin_unlock(&rnp->exp_lock);
-			trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
-						  rnp->grplo, rnp->grphi,
-						  TPS("wait"));
-			wait_event(rnp->exp_wq[(s >> 1) & 0x3],
-				   sync_exp_work_done(rsp,
-						      &rdp->exp_workdone2, s));
-			return true;
-		}
-		rnp->exp_seq_rq = s; /* Followers can wait on us. */
-		spin_unlock(&rnp->exp_lock);
-		trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
-					  rnp->grphi, TPS("nxtlvl"));
-	}
-	mutex_lock(&rsp->exp_mutex);
-fastpath:
-	if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
-		mutex_unlock(&rsp->exp_mutex);
-		return true;
-	}
-	rcu_exp_gp_seq_start(rsp);
-	trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
-	return false;
-}
-
-/* Invoked on each online non-idle CPU for expedited quiescent state. */
-static void sync_sched_exp_handler(void *data)
-{
-	struct rcu_data *rdp;
-	struct rcu_node *rnp;
-	struct rcu_state *rsp = data;
-
-	rdp = this_cpu_ptr(rsp->rda);
-	rnp = rdp->mynode;
-	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
-	    __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
-		return;
-	if (rcu_is_cpu_rrupt_from_idle()) {
-		rcu_report_exp_rdp(&rcu_sched_state,
-				   this_cpu_ptr(&rcu_sched_data), true);
-		return;
-	}
-	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
-	resched_cpu(smp_processor_id());
-}
-
-/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
-static void sync_sched_exp_online_cleanup(int cpu)
-{
-	struct rcu_data *rdp;
-	int ret;
-	struct rcu_node *rnp;
-	struct rcu_state *rsp = &rcu_sched_state;
-
-	rdp = per_cpu_ptr(rsp->rda, cpu);
-	rnp = rdp->mynode;
-	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
-		return;
-	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
-	WARN_ON_ONCE(ret);
-}
-
-/*
- * Select the nodes that the upcoming expedited grace period needs
- * to wait for.
- */
-static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
-				     smp_call_func_t func)
-{
-	int cpu;
-	unsigned long flags;
-	unsigned long mask;
-	unsigned long mask_ofl_test;
-	unsigned long mask_ofl_ipi;
-	int ret;
-	struct rcu_node *rnp;
-
-	sync_exp_reset_tree(rsp);
-	rcu_for_each_leaf_node(rsp, rnp) {
-		raw_spin_lock_irqsave_rcu_node(rnp, flags);
-
-		/* Each pass checks a CPU for identity, offline, and idle. */
-		mask_ofl_test = 0;
-		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
-			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-			struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
-
-			if (raw_smp_processor_id() == cpu ||
-			    !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
-				mask_ofl_test |= rdp->grpmask;
-		}
-		mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
-
-		/*
-		 * Need to wait for any blocked tasks as well.  Note that
-		 * additional blocking tasks will also block the expedited
-		 * GP until such time as the ->expmask bits are cleared.
-		 */
-		if (rcu_preempt_has_tasks(rnp))
-			rnp->exp_tasks = rnp->blkd_tasks.next;
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-
-		/* IPI the remaining CPUs for expedited quiescent state. */
-		mask = 1;
-		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
-			if (!(mask_ofl_ipi & mask))
-				continue;
-retry_ipi:
-			ret = smp_call_function_single(cpu, func, rsp, 0);
-			if (!ret) {
-				mask_ofl_ipi &= ~mask;
-				continue;
-			}
-			/* Failed, raced with offline. */
-			raw_spin_lock_irqsave_rcu_node(rnp, flags);
-			if (cpu_online(cpu) &&
-			    (rnp->expmask & mask)) {
-				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-				schedule_timeout_uninterruptible(1);
-				if (cpu_online(cpu) &&
-				    (rnp->expmask & mask))
-					goto retry_ipi;
-				raw_spin_lock_irqsave_rcu_node(rnp, flags);
-			}
-			if (!(rnp->expmask & mask))
-				mask_ofl_ipi &= ~mask;
-			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-		}
-		/* Report quiescent states for those that went offline. */
-		mask_ofl_test |= mask_ofl_ipi;
-		if (mask_ofl_test)
-			rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
-	}
-}
-
-static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
-{
-	int cpu;
-	unsigned long jiffies_stall;
-	unsigned long jiffies_start;
-	unsigned long mask;
-	int ndetected;
-	struct rcu_node *rnp;
-	struct rcu_node *rnp_root = rcu_get_root(rsp);
-	int ret;
-
-	jiffies_stall = rcu_jiffies_till_stall_check();
-	jiffies_start = jiffies;
-
-	for (;;) {
-		ret = swait_event_timeout(
-				rsp->expedited_wq,
-				sync_rcu_preempt_exp_done(rnp_root),
-				jiffies_stall);
-		if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
-			return;
-		if (ret < 0) {
-			/* Hit a signal, disable CPU stall warnings. */
-			swait_event(rsp->expedited_wq,
-				   sync_rcu_preempt_exp_done(rnp_root));
-			return;
-		}
-		pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
-		       rsp->name);
-		ndetected = 0;
-		rcu_for_each_leaf_node(rsp, rnp) {
-			ndetected += rcu_print_task_exp_stall(rnp);
-			mask = 1;
-			for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
-				struct rcu_data *rdp;
-
-				if (!(rnp->expmask & mask))
-					continue;
-				ndetected++;
-				rdp = per_cpu_ptr(rsp->rda, cpu);
-				pr_cont(" %d-%c%c%c", cpu,
-					"O."[!!cpu_online(cpu)],
-					"o."[!!(rdp->grpmask & rnp->expmaskinit)],
-					"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
-			}
-			mask <<= 1;
-		}
-		pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
-			jiffies - jiffies_start, rsp->expedited_sequence,
-			rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
-		if (ndetected) {
-			pr_err("blocking rcu_node structures:");
-			rcu_for_each_node_breadth_first(rsp, rnp) {
-				if (rnp == rnp_root)
-					continue; /* printed unconditionally */
-				if (sync_rcu_preempt_exp_done(rnp))
-					continue;
-				pr_cont(" l=%u:%d-%d:%#lx/%c",
-					rnp->level, rnp->grplo, rnp->grphi,
-					rnp->expmask,
-					".T"[!!rnp->exp_tasks]);
-			}
-			pr_cont("\n");
-		}
-		rcu_for_each_leaf_node(rsp, rnp) {
-			mask = 1;
-			for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
-				if (!(rnp->expmask & mask))
-					continue;
-				dump_cpu_task(cpu);
-			}
-		}
-		jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
-	}
-}
-
-/*
- * Wait for the current expedited grace period to complete, and then
- * wake up everyone who piggybacked on the just-completed expedited
- * grace period.  Also update all the ->exp_seq_rq counters as needed
- * in order to avoid counter-wrap problems.
- */
-static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
-{
-	struct rcu_node *rnp;
-
-	synchronize_sched_expedited_wait(rsp);
-	rcu_exp_gp_seq_end(rsp);
-	trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
-
-	/*
-	 * Switch over to wakeup mode, allowing the next GP, but -only- the
-	 * next GP, to proceed.
-	 */
-	mutex_lock(&rsp->exp_wake_mutex);
-	mutex_unlock(&rsp->exp_mutex);
-
-	rcu_for_each_node_breadth_first(rsp, rnp) {
-		if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
-			spin_lock(&rnp->exp_lock);
-			/* Recheck, avoid hang in case someone just arrived. */
-			if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
-				rnp->exp_seq_rq = s;
-			spin_unlock(&rnp->exp_lock);
-		}
-		wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
-	}
-	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
-	mutex_unlock(&rsp->exp_wake_mutex);
-}
-
-/**
- * synchronize_sched_expedited - Brute-force RCU-sched grace period
- *
- * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
- * approach to force the grace period to end quickly.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.  In fact,
- * if you are using synchronize_sched_expedited() in a loop, please
- * restructure your code to batch your updates, and then use a single
- * synchronize_sched() instead.
- *
- * This implementation can be thought of as an application of sequence
- * locking to expedited grace periods, but using the sequence counter to
- * determine when someone else has already done the work instead of for
- * retrying readers.
- */
-void synchronize_sched_expedited(void)
-{
-	unsigned long s;
-	struct rcu_state *rsp = &rcu_sched_state;
-
-	/* If only one CPU, this is automatically a grace period. */
-	if (rcu_blocking_is_gp())
-		return;
-
-	/* If expedited grace periods are prohibited, fall back to normal. */
-	if (rcu_gp_is_normal()) {
-		wait_rcu_gp(call_rcu_sched);
-		return;
-	}
-
-	/* Take a snapshot of the sequence number.  */
-	s = rcu_exp_gp_seq_snap(rsp);
-	if (exp_funnel_lock(rsp, s))
-		return;  /* Someone else did our work for us. */
-
-	/* Initialize the rcu_node tree in preparation for the wait. */
-	sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
-
-	/* Wait and clean up, including waking everyone. */
-	rcu_exp_wait_wake(rsp, s);
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
 /*
  * Check to see if there is any immediate RCU-related work to be done
  * by the current CPU, for the specified type of RCU, returning 1 if so.
@@ -4281,7 +3747,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 
 	/* Set up local state, ensuring consistent view of global state. */
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
+	rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
 	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
 	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
 	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
@@ -4364,9 +3830,6 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
 
-	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
-		return;
-
 	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
 	mask = rdp->grpmask;
 	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
@@ -4751,4 +4214,5 @@ void __init rcu_init(void)
 		rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
 }
 
+#include "tree_exp.h"
 #include "tree_plugin.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e3959f5e6ddf..f714f873bf9d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -253,6 +253,13 @@ struct rcu_node {
 	wait_queue_head_t exp_wq[4];
 } ____cacheline_internodealigned_in_smp;
 
+/*
+ * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
+ * are indexed relative to this interval rather than the global CPU ID space.
+ * This generates the bit for a CPU in node-local masks.
+ */
+#define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo))
+
 /*
  * Do a full breadth-first scan of the rcu_node structures for the
  * specified rcu_state structure.
@@ -280,6 +287,14 @@ struct rcu_node {
 	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
 	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 
+/*
+ * Iterate over all possible CPUs in a leaf RCU node.
+ */
+#define for_each_leaf_node_possible_cpu(rnp, cpu) \
+	for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
+	     cpu <= rnp->grphi; \
+	     cpu = cpumask_next((cpu), cpu_possible_mask))
+
 /*
  * Union to allow "aggregate OR" operation on the need for a quiescent
  * state by the normal and expedited grace periods.
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
new file mode 100644
index 000000000000..6d86ab6ec2c9
--- /dev/null
+++ b/kernel/rcu/tree_exp.h
@@ -0,0 +1,655 @@
+/*
+ * RCU expedited grace periods
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2016
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+/* Wrapper functions for expedited grace periods.  */
+static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
+{
+	rcu_seq_start(&rsp->expedited_sequence);
+}
+static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
+{
+	rcu_seq_end(&rsp->expedited_sequence);
+	smp_mb(); /* Ensure that consecutive grace periods serialize. */
+}
+static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
+{
+	unsigned long s;
+
+	smp_mb(); /* Caller's modifications seen first by other CPUs. */
+	s = rcu_seq_snap(&rsp->expedited_sequence);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
+	return s;
+}
+static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
+{
+	return rcu_seq_done(&rsp->expedited_sequence, s);
+}
+
+/*
+ * Reset the ->expmaskinit values in the rcu_node tree to reflect any
+ * recent CPU-online activity.  Note that these masks are not cleared
+ * when CPUs go offline, so they reflect the union of all CPUs that have
+ * ever been online.  This means that this function normally takes its
+ * no-work-to-do fastpath.
+ */
+static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
+{
+	bool done;
+	unsigned long flags;
+	unsigned long mask;
+	unsigned long oldmask;
+	int ncpus = READ_ONCE(rsp->ncpus);
+	struct rcu_node *rnp;
+	struct rcu_node *rnp_up;
+
+	/* If no new CPUs onlined since last time, nothing to do. */
+	if (likely(ncpus == rsp->ncpus_snap))
+		return;
+	rsp->ncpus_snap = ncpus;
+
+	/*
+	 * Each pass through the following loop propagates newly onlined
+	 * CPUs for the current rcu_node structure up the rcu_node tree.
+	 */
+	rcu_for_each_leaf_node(rsp, rnp) {
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		if (rnp->expmaskinit == rnp->expmaskinitnext) {
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+			continue;  /* No new CPUs, nothing to do. */
+		}
+
+		/* Update this node's mask, track old value for propagation. */
+		oldmask = rnp->expmaskinit;
+		rnp->expmaskinit = rnp->expmaskinitnext;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
+		/* If was already nonzero, nothing to propagate. */
+		if (oldmask)
+			continue;
+
+		/* Propagate the new CPU up the tree. */
+		mask = rnp->grpmask;
+		rnp_up = rnp->parent;
+		done = false;
+		while (rnp_up) {
+			raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
+			if (rnp_up->expmaskinit)
+				done = true;
+			rnp_up->expmaskinit |= mask;
+			raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
+			if (done)
+				break;
+			mask = rnp_up->grpmask;
+			rnp_up = rnp_up->parent;
+		}
+	}
+}
+
+/*
+ * Reset the ->expmask values in the rcu_node tree in preparation for
+ * a new expedited grace period.
+ */
+static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
+{
+	unsigned long flags;
+	struct rcu_node *rnp;
+
+	sync_exp_reset_tree_hotplug(rsp);
+	rcu_for_each_node_breadth_first(rsp, rnp) {
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		WARN_ON_ONCE(rnp->expmask);
+		rnp->expmask = rnp->expmaskinit;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
+}
+
+/*
+ * Return non-zero if there is no RCU expedited grace period in progress
+ * for the specified rcu_node structure, in other words, if all CPUs and
+ * tasks covered by the specified rcu_node structure have done their bit
+ * for the current expedited grace period.  Works only for preemptible
+ * RCU -- other RCU implementation use other means.
+ *
+ * Caller must hold the rcu_state's exp_mutex.
+ */
+static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
+{
+	return rnp->exp_tasks == NULL &&
+	       READ_ONCE(rnp->expmask) == 0;
+}
+
+/*
+ * Report the exit from RCU read-side critical section for the last task
+ * that queued itself during or before the current expedited preemptible-RCU
+ * grace period.  This event is reported either to the rcu_node structure on
+ * which the task was queued or to one of that rcu_node structure's ancestors,
+ * recursively up the tree.  (Calm down, calm down, we do the recursion
+ * iteratively!)
+ *
+ * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
+ * structure's ->lock.
+ */
+static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
+				 bool wake, unsigned long flags)
+	__releases(rnp->lock)
+{
+	unsigned long mask;
+
+	for (;;) {
+		if (!sync_rcu_preempt_exp_done(rnp)) {
+			if (!rnp->expmask)
+				rcu_initiate_boost(rnp, flags);
+			else
+				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+			break;
+		}
+		if (rnp->parent == NULL) {
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+			if (wake) {
+				smp_mb(); /* EGP done before wake_up(). */
+				swake_up(&rsp->expedited_wq);
+			}
+			break;
+		}
+		mask = rnp->grpmask;
+		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
+		rnp = rnp->parent;
+		raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
+		WARN_ON_ONCE(!(rnp->expmask & mask));
+		rnp->expmask &= ~mask;
+	}
+}
+
+/*
+ * Report expedited quiescent state for specified node.  This is a
+ * lock-acquisition wrapper function for __rcu_report_exp_rnp().
+ *
+ * Caller must hold the rcu_state's exp_mutex.
+ */
+static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
+					      struct rcu_node *rnp, bool wake)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	__rcu_report_exp_rnp(rsp, rnp, wake, flags);
+}
+
+/*
+ * Report expedited quiescent state for multiple CPUs, all covered by the
+ * specified leaf rcu_node structure.  Caller must hold the rcu_state's
+ * exp_mutex.
+ */
+static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
+				    unsigned long mask, bool wake)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	if (!(rnp->expmask & mask)) {
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		return;
+	}
+	rnp->expmask &= ~mask;
+	__rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
+}
+
+/*
+ * Report expedited quiescent state for specified rcu_data (CPU).
+ */
+static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
+			       bool wake)
+{
+	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
+}
+
+/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
+static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
+			       unsigned long s)
+{
+	if (rcu_exp_gp_seq_done(rsp, s)) {
+		trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
+		/* Ensure test happens before caller kfree(). */
+		smp_mb__before_atomic(); /* ^^^ */
+		atomic_long_inc(stat);
+		return true;
+	}
+	return false;
+}
+
+/*
+ * Funnel-lock acquisition for expedited grace periods.  Returns true
+ * if some other task completed an expedited grace period that this task
+ * can piggy-back on, and with no mutex held.  Otherwise, returns false
+ * with the mutex held, indicating that the caller must actually do the
+ * expedited grace period.
+ */
+static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
+{
+	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
+	struct rcu_node *rnp = rdp->mynode;
+	struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+	/* Low-contention fastpath. */
+	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
+	    (rnp == rnp_root ||
+	     ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
+	    mutex_trylock(&rsp->exp_mutex))
+		goto fastpath;
+
+	/*
+	 * Each pass through the following loop works its way up
+	 * the rcu_node tree, returning if others have done the work or
+	 * otherwise falls through to acquire rsp->exp_mutex.  The mapping
+	 * from CPU to rcu_node structure can be inexact, as it is just
+	 * promoting locality and is not strictly needed for correctness.
+	 */
+	for (; rnp != NULL; rnp = rnp->parent) {
+		if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
+			return true;
+
+		/* Work not done, either wait here or go up. */
+		spin_lock(&rnp->exp_lock);
+		if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
+
+			/* Someone else doing GP, so wait for them. */
+			spin_unlock(&rnp->exp_lock);
+			trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
+						  rnp->grplo, rnp->grphi,
+						  TPS("wait"));
+			wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+				   sync_exp_work_done(rsp,
+						      &rdp->exp_workdone2, s));
+			return true;
+		}
+		rnp->exp_seq_rq = s; /* Followers can wait on us. */
+		spin_unlock(&rnp->exp_lock);
+		trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
+					  rnp->grphi, TPS("nxtlvl"));
+	}
+	mutex_lock(&rsp->exp_mutex);
+fastpath:
+	if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
+		mutex_unlock(&rsp->exp_mutex);
+		return true;
+	}
+	rcu_exp_gp_seq_start(rsp);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
+	return false;
+}
+
+/* Invoked on each online non-idle CPU for expedited quiescent state. */
+static void sync_sched_exp_handler(void *data)
+{
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp = data;
+
+	rdp = this_cpu_ptr(rsp->rda);
+	rnp = rdp->mynode;
+	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
+	    __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
+		return;
+	if (rcu_is_cpu_rrupt_from_idle()) {
+		rcu_report_exp_rdp(&rcu_sched_state,
+				   this_cpu_ptr(&rcu_sched_data), true);
+		return;
+	}
+	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
+	resched_cpu(smp_processor_id());
+}
+
+/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
+static void sync_sched_exp_online_cleanup(int cpu)
+{
+	struct rcu_data *rdp;
+	int ret;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp = &rcu_sched_state;
+
+	rdp = per_cpu_ptr(rsp->rda, cpu);
+	rnp = rdp->mynode;
+	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
+		return;
+	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
+	WARN_ON_ONCE(ret);
+}
+
+/*
+ * Select the nodes that the upcoming expedited grace period needs
+ * to wait for.
+ */
+static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
+				     smp_call_func_t func)
+{
+	int cpu;
+	unsigned long flags;
+	unsigned long mask_ofl_test;
+	unsigned long mask_ofl_ipi;
+	int ret;
+	struct rcu_node *rnp;
+
+	sync_exp_reset_tree(rsp);
+	rcu_for_each_leaf_node(rsp, rnp) {
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+
+		/* Each pass checks a CPU for identity, offline, and idle. */
+		mask_ofl_test = 0;
+		for_each_leaf_node_possible_cpu(rnp, cpu) {
+			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+			struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+			if (raw_smp_processor_id() == cpu ||
+			    !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+				mask_ofl_test |= rdp->grpmask;
+		}
+		mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
+
+		/*
+		 * Need to wait for any blocked tasks as well.  Note that
+		 * additional blocking tasks will also block the expedited
+		 * GP until such time as the ->expmask bits are cleared.
+		 */
+		if (rcu_preempt_has_tasks(rnp))
+			rnp->exp_tasks = rnp->blkd_tasks.next;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
+		/* IPI the remaining CPUs for expedited quiescent state. */
+		for_each_leaf_node_possible_cpu(rnp, cpu) {
+			unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+			if (!(mask_ofl_ipi & mask))
+				continue;
+retry_ipi:
+			ret = smp_call_function_single(cpu, func, rsp, 0);
+			if (!ret) {
+				mask_ofl_ipi &= ~mask;
+				continue;
+			}
+			/* Failed, raced with offline. */
+			raw_spin_lock_irqsave_rcu_node(rnp, flags);
+			if (cpu_online(cpu) &&
+			    (rnp->expmask & mask)) {
+				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+				schedule_timeout_uninterruptible(1);
+				if (cpu_online(cpu) &&
+				    (rnp->expmask & mask))
+					goto retry_ipi;
+				raw_spin_lock_irqsave_rcu_node(rnp, flags);
+			}
+			if (!(rnp->expmask & mask))
+				mask_ofl_ipi &= ~mask;
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		}
+		/* Report quiescent states for those that went offline. */
+		mask_ofl_test |= mask_ofl_ipi;
+		if (mask_ofl_test)
+			rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
+	}
+}
+
+static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
+{
+	int cpu;
+	unsigned long jiffies_stall;
+	unsigned long jiffies_start;
+	unsigned long mask;
+	int ndetected;
+	struct rcu_node *rnp;
+	struct rcu_node *rnp_root = rcu_get_root(rsp);
+	int ret;
+
+	jiffies_stall = rcu_jiffies_till_stall_check();
+	jiffies_start = jiffies;
+
+	for (;;) {
+		ret = swait_event_timeout(
+				rsp->expedited_wq,
+				sync_rcu_preempt_exp_done(rnp_root),
+				jiffies_stall);
+		if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
+			return;
+		if (ret < 0) {
+			/* Hit a signal, disable CPU stall warnings. */
+			swait_event(rsp->expedited_wq,
+				   sync_rcu_preempt_exp_done(rnp_root));
+			return;
+		}
+		pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
+		       rsp->name);
+		ndetected = 0;
+		rcu_for_each_leaf_node(rsp, rnp) {
+			ndetected += rcu_print_task_exp_stall(rnp);
+			for_each_leaf_node_possible_cpu(rnp, cpu) {
+				struct rcu_data *rdp;
+
+				mask = leaf_node_cpu_bit(rnp, cpu);
+				if (!(rnp->expmask & mask))
+					continue;
+				ndetected++;
+				rdp = per_cpu_ptr(rsp->rda, cpu);
+				pr_cont(" %d-%c%c%c", cpu,
+					"O."[!!cpu_online(cpu)],
+					"o."[!!(rdp->grpmask & rnp->expmaskinit)],
+					"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
+			}
+		}
+		pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
+			jiffies - jiffies_start, rsp->expedited_sequence,
+			rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
+		if (ndetected) {
+			pr_err("blocking rcu_node structures:");
+			rcu_for_each_node_breadth_first(rsp, rnp) {
+				if (rnp == rnp_root)
+					continue; /* printed unconditionally */
+				if (sync_rcu_preempt_exp_done(rnp))
+					continue;
+				pr_cont(" l=%u:%d-%d:%#lx/%c",
+					rnp->level, rnp->grplo, rnp->grphi,
+					rnp->expmask,
+					".T"[!!rnp->exp_tasks]);
+			}
+			pr_cont("\n");
+		}
+		rcu_for_each_leaf_node(rsp, rnp) {
+			for_each_leaf_node_possible_cpu(rnp, cpu) {
+				mask = leaf_node_cpu_bit(rnp, cpu);
+				if (!(rnp->expmask & mask))
+					continue;
+				dump_cpu_task(cpu);
+			}
+		}
+		jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
+	}
+}
+
+/*
+ * Wait for the current expedited grace period to complete, and then
+ * wake up everyone who piggybacked on the just-completed expedited
+ * grace period.  Also update all the ->exp_seq_rq counters as needed
+ * in order to avoid counter-wrap problems.
+ */
+static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
+{
+	struct rcu_node *rnp;
+
+	synchronize_sched_expedited_wait(rsp);
+	rcu_exp_gp_seq_end(rsp);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
+
+	/*
+	 * Switch over to wakeup mode, allowing the next GP, but -only- the
+	 * next GP, to proceed.
+	 */
+	mutex_lock(&rsp->exp_wake_mutex);
+	mutex_unlock(&rsp->exp_mutex);
+
+	rcu_for_each_node_breadth_first(rsp, rnp) {
+		if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
+			spin_lock(&rnp->exp_lock);
+			/* Recheck, avoid hang in case someone just arrived. */
+			if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
+				rnp->exp_seq_rq = s;
+			spin_unlock(&rnp->exp_lock);
+		}
+		wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
+	}
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
+	mutex_unlock(&rsp->exp_wake_mutex);
+}
+
+/**
+ * synchronize_sched_expedited - Brute-force RCU-sched grace period
+ *
+ * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
+ * approach to force the grace period to end quickly.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.  In fact,
+ * if you are using synchronize_sched_expedited() in a loop, please
+ * restructure your code to batch your updates, and then use a single
+ * synchronize_sched() instead.
+ *
+ * This implementation can be thought of as an application of sequence
+ * locking to expedited grace periods, but using the sequence counter to
+ * determine when someone else has already done the work instead of for
+ * retrying readers.
+ */
+void synchronize_sched_expedited(void)
+{
+	unsigned long s;
+	struct rcu_state *rsp = &rcu_sched_state;
+
+	/* If only one CPU, this is automatically a grace period. */
+	if (rcu_blocking_is_gp())
+		return;
+
+	/* If expedited grace periods are prohibited, fall back to normal. */
+	if (rcu_gp_is_normal()) {
+		wait_rcu_gp(call_rcu_sched);
+		return;
+	}
+
+	/* Take a snapshot of the sequence number.  */
+	s = rcu_exp_gp_seq_snap(rsp);
+	if (exp_funnel_lock(rsp, s))
+		return;  /* Someone else did our work for us. */
+
+	/* Initialize the rcu_node tree in preparation for the wait. */
+	sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
+
+	/* Wait and clean up, including waking everyone. */
+	rcu_exp_wait_wake(rsp, s);
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#ifdef CONFIG_PREEMPT_RCU
+
+/*
+ * Remote handler for smp_call_function_single().  If there is an
+ * RCU read-side critical section in effect, request that the
+ * next rcu_read_unlock() record the quiescent state up the
+ * ->expmask fields in the rcu_node tree.  Otherwise, immediately
+ * report the quiescent state.
+ */
+static void sync_rcu_exp_handler(void *info)
+{
+	struct rcu_data *rdp;
+	struct rcu_state *rsp = info;
+	struct task_struct *t = current;
+
+	/*
+	 * Within an RCU read-side critical section, request that the next
+	 * rcu_read_unlock() report.  Unless this RCU read-side critical
+	 * section has already blocked, in which case it is already set
+	 * up for the expedited grace period to wait on it.
+	 */
+	if (t->rcu_read_lock_nesting > 0 &&
+	    !t->rcu_read_unlock_special.b.blocked) {
+		t->rcu_read_unlock_special.b.exp_need_qs = true;
+		return;
+	}
+
+	/*
+	 * We are either exiting an RCU read-side critical section (negative
+	 * values of t->rcu_read_lock_nesting) or are not in one at all
+	 * (zero value of t->rcu_read_lock_nesting).  Or we are in an RCU
+	 * read-side critical section that blocked before this expedited
+	 * grace period started.  Either way, we can immediately report
+	 * the quiescent state.
+	 */
+	rdp = this_cpu_ptr(rsp->rda);
+	rcu_report_exp_rdp(rsp, rdp, true);
+}
+
+/**
+ * synchronize_rcu_expedited - Brute-force RCU grace period
+ *
+ * Wait for an RCU-preempt grace period, but expedite it.  The basic
+ * idea is to IPI all non-idle non-nohz online CPUs.  The IPI handler
+ * checks whether the CPU is in an RCU-preempt critical section, and
+ * if so, it sets a flag that causes the outermost rcu_read_unlock()
+ * to report the quiescent state.  On the other hand, if the CPU is
+ * not in an RCU read-side critical section, the IPI handler reports
+ * the quiescent state immediately.
+ *
+ * Although this is a greate improvement over previous expedited
+ * implementations, it is still unfriendly to real-time workloads, so is
+ * thus not recommended for any sort of common-case code.  In fact, if
+ * you are using synchronize_rcu_expedited() in a loop, please restructure
+ * your code to batch your updates, and then Use a single synchronize_rcu()
+ * instead.
+ */
+void synchronize_rcu_expedited(void)
+{
+	struct rcu_state *rsp = rcu_state_p;
+	unsigned long s;
+
+	/* If expedited grace periods are prohibited, fall back to normal. */
+	if (rcu_gp_is_normal()) {
+		wait_rcu_gp(call_rcu);
+		return;
+	}
+
+	s = rcu_exp_gp_seq_snap(rsp);
+	if (exp_funnel_lock(rsp, s))
+		return;  /* Someone else did our work for us. */
+
+	/* Initialize the rcu_node tree in preparation for the wait. */
+	sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
+
+	/* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
+	rcu_exp_wait_wake(rsp, s);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+
+/*
+ * Wait for an rcu-preempt grace period, but make it happen quickly.
+ * But because preemptible RCU does not exist, map to rcu-sched.
+ */
+void synchronize_rcu_expedited(void)
+{
+	synchronize_sched_expedited();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index ff1cd4e1188d..0082fce402a0 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -79,8 +79,6 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
 	if (IS_ENABLED(CONFIG_PROVE_RCU))
 		pr_info("\tRCU lockdep checking is enabled.\n");
-	if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE))
-		pr_info("\tRCU torture testing starts during boot.\n");
 	if (RCU_NUM_LVLS >= 4)
 		pr_info("\tFour(or more)-level hierarchy is enabled.\n");
 	if (RCU_FANOUT_LEAF != 16)
@@ -681,84 +679,6 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
-/*
- * Remote handler for smp_call_function_single().  If there is an
- * RCU read-side critical section in effect, request that the
- * next rcu_read_unlock() record the quiescent state up the
- * ->expmask fields in the rcu_node tree.  Otherwise, immediately
- * report the quiescent state.
- */
-static void sync_rcu_exp_handler(void *info)
-{
-	struct rcu_data *rdp;
-	struct rcu_state *rsp = info;
-	struct task_struct *t = current;
-
-	/*
-	 * Within an RCU read-side critical section, request that the next
-	 * rcu_read_unlock() report.  Unless this RCU read-side critical
-	 * section has already blocked, in which case it is already set
-	 * up for the expedited grace period to wait on it.
-	 */
-	if (t->rcu_read_lock_nesting > 0 &&
-	    !t->rcu_read_unlock_special.b.blocked) {
-		t->rcu_read_unlock_special.b.exp_need_qs = true;
-		return;
-	}
-
-	/*
-	 * We are either exiting an RCU read-side critical section (negative
-	 * values of t->rcu_read_lock_nesting) or are not in one at all
-	 * (zero value of t->rcu_read_lock_nesting).  Or we are in an RCU
-	 * read-side critical section that blocked before this expedited
-	 * grace period started.  Either way, we can immediately report
-	 * the quiescent state.
-	 */
-	rdp = this_cpu_ptr(rsp->rda);
-	rcu_report_exp_rdp(rsp, rdp, true);
-}
-
-/**
- * synchronize_rcu_expedited - Brute-force RCU grace period
- *
- * Wait for an RCU-preempt grace period, but expedite it.  The basic
- * idea is to IPI all non-idle non-nohz online CPUs.  The IPI handler
- * checks whether the CPU is in an RCU-preempt critical section, and
- * if so, it sets a flag that causes the outermost rcu_read_unlock()
- * to report the quiescent state.  On the other hand, if the CPU is
- * not in an RCU read-side critical section, the IPI handler reports
- * the quiescent state immediately.
- *
- * Although this is a greate improvement over previous expedited
- * implementations, it is still unfriendly to real-time workloads, so is
- * thus not recommended for any sort of common-case code.  In fact, if
- * you are using synchronize_rcu_expedited() in a loop, please restructure
- * your code to batch your updates, and then Use a single synchronize_rcu()
- * instead.
- */
-void synchronize_rcu_expedited(void)
-{
-	struct rcu_state *rsp = rcu_state_p;
-	unsigned long s;
-
-	/* If expedited grace periods are prohibited, fall back to normal. */
-	if (rcu_gp_is_normal()) {
-		wait_rcu_gp(call_rcu);
-		return;
-	}
-
-	s = rcu_exp_gp_seq_snap(rsp);
-	if (exp_funnel_lock(rsp, s))
-		return;  /* Someone else did our work for us. */
-
-	/* Initialize the rcu_node tree in preparation for the wait. */
-	sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
-
-	/* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
-	rcu_exp_wait_wake(rsp, s);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
-
 /**
  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
  *
@@ -882,16 +802,6 @@ static void rcu_preempt_check_callbacks(void)
 {
 }
 
-/*
- * Wait for an rcu-preempt grace period, but make it happen quickly.
- * But because preemptible RCU does not exist, map to rcu-sched.
- */
-void synchronize_rcu_expedited(void)
-{
-	synchronize_sched_expedited();
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
-
 /*
  * Because preemptible RCU does not exist, rcu_barrier() is just
  * another name for rcu_barrier_sched().
@@ -1254,8 +1164,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 		return;
 	if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
 		return;
-	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
-		if ((mask & 0x1) && cpu != outgoingcpu)
+	for_each_leaf_node_possible_cpu(rnp, cpu)
+		if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
+		    cpu != outgoingcpu)
 			cpumask_set_cpu(cpu, cm);
 	if (cpumask_weight(cm) == 0)
 		cpumask_setall(cm);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 3e888cd5a594..f0d8322bc3ec 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -528,6 +528,7 @@ static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
 module_param(rcu_task_stall_timeout, int, 0644);
 
 static void rcu_spawn_tasks_kthread(void);
+static struct task_struct *rcu_tasks_kthread_ptr;
 
 /*
  * Post an RCU-tasks callback.  First call must be from process context
@@ -537,6 +538,7 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 {
 	unsigned long flags;
 	bool needwake;
+	bool havetask = READ_ONCE(rcu_tasks_kthread_ptr);
 
 	rhp->next = NULL;
 	rhp->func = func;
@@ -545,7 +547,9 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 	*rcu_tasks_cbs_tail = rhp;
 	rcu_tasks_cbs_tail = &rhp->next;
 	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
-	if (needwake) {
+	/* We can't create the thread unless interrupts are enabled. */
+	if ((needwake && havetask) ||
+	    (!havetask && !irqs_disabled_flags(flags))) {
 		rcu_spawn_tasks_kthread();
 		wake_up(&rcu_tasks_cbs_wq);
 	}
@@ -790,7 +794,6 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 static void rcu_spawn_tasks_kthread(void)
 {
 	static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
-	static struct task_struct *rcu_tasks_kthread_ptr;
 	struct task_struct *t;
 
 	if (READ_ONCE(rcu_tasks_kthread_ptr)) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 017d5394f5dc..5c883fe8e440 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1536,7 +1536,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
-			if (!cpu_active(dest_cpu))
+			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
+				continue;
+			if (!cpu_online(dest_cpu))
 				continue;
 			goto out;
 		}
@@ -1935,7 +1937,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * chain to provide order. Instead we do:
  *
  *   1) smp_store_release(X->on_cpu, 0)
- *   2) smp_cond_acquire(!X->on_cpu)
+ *   2) smp_cond_load_acquire(!X->on_cpu)
  *
  * Example:
  *
@@ -1946,7 +1948,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  *   sched-out X
  *   smp_store_release(X->on_cpu, 0);
  *
- *                    smp_cond_acquire(!X->on_cpu);
+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
  *                    X->state = WAKING
  *                    set_task_cpu(X,2)
  *
@@ -1972,7 +1974,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * This means that any means of doing remote wakeups must order the CPU doing
  * the wakeup against the CPU the task is going to end up running on. This,
  * however, is already required for the regular Program-Order guarantee above,
- * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
  *
  */
 
@@ -2045,7 +2047,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 * This ensures that tasks getting woken will be fully ordered against
 	 * their previous state and preserve Program Order.
 	 */
-	smp_cond_acquire(!p->on_cpu);
+	smp_cond_load_acquire(&p->on_cpu, !VAL);
 
 	p->sched_contributes_to_load = !!task_contributes_to_load(p);
 	p->state = TASK_WAKING;
@@ -2340,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	__sched_fork(clone_flags, p);
 	/*
-	 * We mark the process as running here. This guarantees that
+	 * We mark the process as NEW here. This guarantees that
 	 * nobody will actually run it, and a signal or other external
 	 * event cannot wake it up and insert it on the runqueue either.
 	 */
-	p->state = TASK_RUNNING;
+	p->state = TASK_NEW;
 
 	/*
 	 * Make sure we do not leak PI boosting priority to the child.
@@ -2381,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 		p->sched_class = &fair_sched_class;
 	}
 
-	if (p->sched_class->task_fork)
-		p->sched_class->task_fork(p);
+	init_entity_runnable_average(&p->se);
 
 	/*
 	 * The child is not yet in the pid-hash so no cgroup attach races,
@@ -2392,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	 * Silence PROVE_RCU.
 	 */
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	set_task_cpu(p, cpu);
+	/*
+	 * We're setting the cpu for the first time, we don't migrate,
+	 * so use __set_task_cpu().
+	 */
+	__set_task_cpu(p, cpu);
+	if (p->sched_class->task_fork)
+		p->sched_class->task_fork(p);
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
 #ifdef CONFIG_SCHED_INFO
@@ -2524,21 +2531,22 @@ void wake_up_new_task(struct task_struct *p)
 	struct rq_flags rf;
 	struct rq *rq;
 
-	/* Initialize new task's runnable average */
-	init_entity_runnable_average(&p->se);
 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
 	/*
 	 * Fork balancing, do it here and not earlier because:
 	 *  - cpus_allowed can change in the fork path
 	 *  - any previously selected cpu might disappear through hotplug
+	 *
+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
+	 * as we're not fully set-up yet.
 	 */
-	set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
+	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
 #endif
-	/* Post initialize new task's util average when its cfs_rq is set */
+	rq = __task_rq_lock(p, &rf);
 	post_init_entity_util_avg(&p->se);
 
-	rq = __task_rq_lock(p, &rf);
 	activate_task(rq, p, 0);
 	p->on_rq = TASK_ON_RQ_QUEUED;
 	trace_sched_wakeup_new(p);
@@ -3160,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev)
 		pr_cont("\n");
 	}
 #endif
+	if (panic_on_warn)
+		panic("scheduling while atomic\n");
+
 	dump_stack();
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
@@ -4751,7 +4762,8 @@ out_unlock:
  * @len: length in bytes of the bitmask pointed to by user_mask_ptr
  * @user_mask_ptr: user-space pointer to hold the current cpu mask
  *
- * Return: 0 on success. An error code otherwise.
+ * Return: size of CPU mask copied to user_mask_ptr on success. An
+ * error code otherwise.
  */
 SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
 		unsigned long __user *, user_mask_ptr)
@@ -5148,14 +5160,16 @@ void show_state_filter(unsigned long state_filter)
 		/*
 		 * reset the NMI-timeout, listing all files on a slow
 		 * console might take a lot of time:
+		 * Also, reset softlockup watchdogs on all CPUs, because
+		 * another CPU might be blocked waiting for us to process
+		 * an IPI.
 		 */
 		touch_nmi_watchdog();
+		touch_all_softlockup_watchdogs();
 		if (!state_filter || (p->state & state_filter))
 			sched_show_task(p);
 	}
 
-	touch_all_softlockup_watchdogs();
-
 #ifdef CONFIG_SCHED_DEBUG
 	if (!state_filter)
 		sysrq_sched_debug_show();
@@ -5391,13 +5405,15 @@ void idle_task_exit(void)
 /*
  * Since this CPU is going 'away' for a while, fold any nr_active delta
  * we might have. Assumes we're called after migrate_tasks() so that the
- * nr_active count is stable.
+ * nr_active count is stable. We need to take the teardown thread which
+ * is calling this into account, so we hand in adjust = 1 to the load
+ * calculation.
  *
  * Also see the comment "Global load-average calculations".
  */
 static void calc_load_migrate(struct rq *rq)
 {
-	long delta = calc_load_fold_active(rq);
+	long delta = calc_load_fold_active(rq, 1);
 	if (delta)
 		atomic_long_add(delta, &calc_load_tasks);
 }
@@ -7228,7 +7244,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
 	struct rq *rq = cpu_rq(cpu);
 
 	rq->calc_load_update = calc_load_update;
-	account_reset_rq(rq);
 	update_max_interval();
 }
 
@@ -7708,6 +7723,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
 	INIT_LIST_HEAD(&tg->children);
 	list_add_rcu(&tg->siblings, &parent->children);
 	spin_unlock_irqrestore(&task_group_lock, flags);
+
+	online_fair_sched_group(tg);
 }
 
 /* rcu callback to free various structures associated with a task group */
@@ -7736,27 +7753,9 @@ void sched_offline_group(struct task_group *tg)
 	spin_unlock_irqrestore(&task_group_lock, flags);
 }
 
-/* change task's runqueue when it moves between groups.
- *	The caller of this function should have put the task in its new group
- *	by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
- *	reflect its new group.
- */
-void sched_move_task(struct task_struct *tsk)
+static void sched_change_group(struct task_struct *tsk, int type)
 {
 	struct task_group *tg;
-	int queued, running;
-	struct rq_flags rf;
-	struct rq *rq;
-
-	rq = task_rq_lock(tsk, &rf);
-
-	running = task_current(rq, tsk);
-	queued = task_on_rq_queued(tsk);
-
-	if (queued)
-		dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
-	if (unlikely(running))
-		put_prev_task(rq, tsk);
 
 	/*
 	 * All callers are synchronized by task_rq_lock(); we do not use RCU
@@ -7769,11 +7768,37 @@ void sched_move_task(struct task_struct *tsk)
 	tsk->sched_task_group = tg;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	if (tsk->sched_class->task_move_group)
-		tsk->sched_class->task_move_group(tsk);
+	if (tsk->sched_class->task_change_group)
+		tsk->sched_class->task_change_group(tsk, type);
 	else
 #endif
 		set_task_rq(tsk, task_cpu(tsk));
+}
+
+/*
+ * Change task's runqueue when it moves between groups.
+ *
+ * The caller of this function should have put the task in its new group by
+ * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
+ * its new group.
+ */
+void sched_move_task(struct task_struct *tsk)
+{
+	int queued, running;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(tsk, &rf);
+
+	running = task_current(rq, tsk);
+	queued = task_on_rq_queued(tsk);
+
+	if (queued)
+		dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
+	if (unlikely(running))
+		put_prev_task(rq, tsk);
+
+	sched_change_group(tsk, TASK_MOVE_GROUP);
 
 	if (unlikely(running))
 		tsk->sched_class->set_curr_task(rq);
@@ -8201,15 +8226,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
 	sched_free_group(tg);
 }
 
+/*
+ * This is called before wake_up_new_task(), therefore we really only
+ * have to set its group bits, all the other stuff does not apply.
+ */
 static void cpu_cgroup_fork(struct task_struct *task)
 {
-	sched_move_task(task);
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(task, &rf);
+
+	sched_change_group(task, TASK_SET_GROUP);
+
+	task_rq_unlock(rq, task, &rf);
 }
 
 static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 	struct cgroup_subsys_state *css;
+	int ret = 0;
 
 	cgroup_taskset_for_each(task, css, tset) {
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -8220,8 +8257,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 		if (task->sched_class != &fair_sched_class)
 			return -EINVAL;
 #endif
+		/*
+		 * Serialize against wake_up_new_task() such that if its
+		 * running, we're sure to observe its full state.
+		 */
+		raw_spin_lock_irq(&task->pi_lock);
+		/*
+		 * Avoid calling sched_move_task() before wake_up_new_task()
+		 * has happened. This would lead to problems with PELT, due to
+		 * move wanting to detach+attach while we're not attached yet.
+		 */
+		if (task->state == TASK_NEW)
+			ret = -EINVAL;
+		raw_spin_unlock_irq(&task->pi_lock);
+
+		if (ret)
+			break;
 	}
-	return 0;
+	return ret;
 }
 
 static void cpu_cgroup_attach(struct cgroup_taskset *tset)
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 41f85c4d0938..bc0b309c3f19 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -25,15 +25,13 @@ enum cpuacct_stat_index {
 	CPUACCT_STAT_NSTATS,
 };
 
-enum cpuacct_usage_index {
-	CPUACCT_USAGE_USER,	/* ... user mode */
-	CPUACCT_USAGE_SYSTEM,	/* ... kernel mode */
-
-	CPUACCT_USAGE_NRUSAGE,
+static const char * const cpuacct_stat_desc[] = {
+	[CPUACCT_STAT_USER] = "user",
+	[CPUACCT_STAT_SYSTEM] = "system",
 };
 
 struct cpuacct_usage {
-	u64	usages[CPUACCT_USAGE_NRUSAGE];
+	u64	usages[CPUACCT_STAT_NSTATS];
 };
 
 /* track cpu usage of a group of tasks and its child groups */
@@ -108,16 +106,16 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
 }
 
 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
-				 enum cpuacct_usage_index index)
+				 enum cpuacct_stat_index index)
 {
 	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
 	u64 data;
 
 	/*
-	 * We allow index == CPUACCT_USAGE_NRUSAGE here to read
+	 * We allow index == CPUACCT_STAT_NSTATS here to read
 	 * the sum of suages.
 	 */
-	BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
+	BUG_ON(index > CPUACCT_STAT_NSTATS);
 
 #ifndef CONFIG_64BIT
 	/*
@@ -126,11 +124,11 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
 #endif
 
-	if (index == CPUACCT_USAGE_NRUSAGE) {
+	if (index == CPUACCT_STAT_NSTATS) {
 		int i = 0;
 
 		data = 0;
-		for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
 			data += cpuusage->usages[i];
 	} else {
 		data = cpuusage->usages[index];
@@ -155,7 +153,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
 #endif
 
-	for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
 		cpuusage->usages[i] = val;
 
 #ifndef CONFIG_64BIT
@@ -165,7 +163,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
 
 /* return total cpu usage (in nanoseconds) of a group */
 static u64 __cpuusage_read(struct cgroup_subsys_state *css,
-			   enum cpuacct_usage_index index)
+			   enum cpuacct_stat_index index)
 {
 	struct cpuacct *ca = css_ca(css);
 	u64 totalcpuusage = 0;
@@ -180,18 +178,18 @@ static u64 __cpuusage_read(struct cgroup_subsys_state *css,
 static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
 			      struct cftype *cft)
 {
-	return __cpuusage_read(css, CPUACCT_USAGE_USER);
+	return __cpuusage_read(css, CPUACCT_STAT_USER);
 }
 
 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
 			     struct cftype *cft)
 {
-	return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
+	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
 }
 
 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
+	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
 }
 
 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -213,7 +211,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
 }
 
 static int __cpuacct_percpu_seq_show(struct seq_file *m,
-				     enum cpuacct_usage_index index)
+				     enum cpuacct_stat_index index)
 {
 	struct cpuacct *ca = css_ca(seq_css(m));
 	u64 percpu;
@@ -229,48 +227,78 @@ static int __cpuacct_percpu_seq_show(struct seq_file *m,
 
 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
 {
-	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
+	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
 }
 
 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
 {
-	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
+	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
 }
 
 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
 {
-	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
+	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
 }
 
-static const char * const cpuacct_stat_desc[] = {
-	[CPUACCT_STAT_USER] = "user",
-	[CPUACCT_STAT_SYSTEM] = "system",
-};
+static int cpuacct_all_seq_show(struct seq_file *m, void *V)
+{
+	struct cpuacct *ca = css_ca(seq_css(m));
+	int index;
+	int cpu;
+
+	seq_puts(m, "cpu");
+	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
+		seq_printf(m, " %s", cpuacct_stat_desc[index]);
+	seq_puts(m, "\n");
+
+	for_each_possible_cpu(cpu) {
+		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+
+		seq_printf(m, "%d", cpu);
+
+		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
+#ifndef CONFIG_64BIT
+			/*
+			 * Take rq->lock to make 64-bit read safe on 32-bit
+			 * platforms.
+			 */
+			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
+#endif
+
+			seq_printf(m, " %llu", cpuusage->usages[index]);
+
+#ifndef CONFIG_64BIT
+			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
+#endif
+		}
+		seq_puts(m, "\n");
+	}
+	return 0;
+}
 
 static int cpuacct_stats_show(struct seq_file *sf, void *v)
 {
 	struct cpuacct *ca = css_ca(seq_css(sf));
+	s64 val[CPUACCT_STAT_NSTATS];
 	int cpu;
-	s64 val = 0;
+	int stat;
 
+	memset(val, 0, sizeof(val));
 	for_each_possible_cpu(cpu) {
-		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
-		val += kcpustat->cpustat[CPUTIME_USER];
-		val += kcpustat->cpustat[CPUTIME_NICE];
-	}
-	val = cputime64_to_clock_t(val);
-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
+		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
 
-	val = 0;
-	for_each_possible_cpu(cpu) {
-		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
-		val += kcpustat->cpustat[CPUTIME_SYSTEM];
-		val += kcpustat->cpustat[CPUTIME_IRQ];
-		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
+		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
+		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
+		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
+		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
 	}
 
-	val = cputime64_to_clock_t(val);
-	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
+	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
+		seq_printf(sf, "%s %lld\n",
+			   cpuacct_stat_desc[stat],
+			   cputime64_to_clock_t(val[stat]));
+	}
 
 	return 0;
 }
@@ -301,6 +329,10 @@ static struct cftype files[] = {
 		.name = "usage_percpu_sys",
 		.seq_show = cpuacct_percpu_sys_seq_show,
 	},
+	{
+		.name = "usage_all",
+		.seq_show = cpuacct_all_seq_show,
+	},
 	{
 		.name = "stat",
 		.seq_show = cpuacct_stats_show,
@@ -316,11 +348,11 @@ static struct cftype files[] = {
 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
-	int index = CPUACCT_USAGE_SYSTEM;
+	int index = CPUACCT_STAT_SYSTEM;
 	struct pt_regs *regs = task_pt_regs(tsk);
 
 	if (regs && user_mode(regs))
-		index = CPUACCT_USAGE_USER;
+		index = CPUACCT_STAT_USER;
 
 	rcu_read_lock();
 
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 14c4aa25cc45..a84641b222c1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -47,6 +47,8 @@ struct sugov_cpu {
 	struct update_util_data update_util;
 	struct sugov_policy *sg_policy;
 
+	unsigned int cached_raw_freq;
+
 	/* The fields below are only needed when sharing a policy. */
 	unsigned long util;
 	unsigned long max;
@@ -106,7 +108,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 
 /**
  * get_next_freq - Compute a new frequency for a given cpufreq policy.
- * @policy: cpufreq policy object to compute the new frequency for.
+ * @sg_cpu: schedutil cpu object to compute the new frequency for.
  * @util: Current CPU utilization.
  * @max: CPU capacity.
  *
@@ -121,14 +123,25 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
  * next_freq = C * curr_freq * util_raw / max
  *
  * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
+ *
+ * The lowest driver-supported frequency which is equal or greater than the raw
+ * next_freq (as calculated above) is returned, subject to policy min/max and
+ * cpufreq driver limitations.
  */
-static unsigned int get_next_freq(struct cpufreq_policy *policy,
-				  unsigned long util, unsigned long max)
+static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
+				  unsigned long max)
 {
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
 
-	return (freq + (freq >> 2)) * util / max;
+	freq = (freq + (freq >> 2)) * util / max;
+
+	if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
+		return sg_policy->next_freq;
+	sg_cpu->cached_raw_freq = freq;
+	return cpufreq_driver_resolve_freq(policy, freq);
 }
 
 static void sugov_update_single(struct update_util_data *hook, u64 time,
@@ -143,13 +156,14 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 		return;
 
 	next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
-			get_next_freq(policy, util, max);
+			get_next_freq(sg_cpu, util, max);
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy,
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 					   unsigned long util, unsigned long max)
 {
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned int max_f = policy->cpuinfo.max_freq;
 	u64 last_freq_update_time = sg_policy->last_freq_update_time;
@@ -189,7 +203,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy,
 		}
 	}
 
-	return get_next_freq(policy, util, max);
+	return get_next_freq(sg_cpu, util, max);
 }
 
 static void sugov_update_shared(struct update_util_data *hook, u64 time,
@@ -206,7 +220,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_policy, util, max);
+		next_f = sugov_next_freq_shared(sg_cpu, util, max);
 		sugov_update_commit(sg_policy, time, next_f);
 	}
 
@@ -394,7 +408,7 @@ static int sugov_init(struct cpufreq_policy *policy)
 	return ret;
 }
 
-static int sugov_exit(struct cpufreq_policy *policy)
+static void sugov_exit(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
 	struct sugov_tunables *tunables = sg_policy->tunables;
@@ -412,7 +426,6 @@ static int sugov_exit(struct cpufreq_policy *policy)
 	mutex_unlock(&global_tunables_lock);
 
 	sugov_policy_free(sg_policy);
-	return 0;
 }
 
 static int sugov_start(struct cpufreq_policy *policy)
@@ -434,6 +447,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 			sg_cpu->util = ULONG_MAX;
 			sg_cpu->max = 0;
 			sg_cpu->last_update = 0;
+			sg_cpu->cached_raw_freq = 0;
 			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 						     sugov_update_shared);
 		} else {
@@ -444,7 +458,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static int sugov_stop(struct cpufreq_policy *policy)
+static void sugov_stop(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
 	unsigned int cpu;
@@ -456,53 +470,29 @@ static int sugov_stop(struct cpufreq_policy *policy)
 
 	irq_work_sync(&sg_policy->irq_work);
 	cancel_work_sync(&sg_policy->work);
-	return 0;
 }
 
-static int sugov_limits(struct cpufreq_policy *policy)
+static void sugov_limits(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
 
 	if (!policy->fast_switch_enabled) {
 		mutex_lock(&sg_policy->work_lock);
-
-		if (policy->max < policy->cur)
-			__cpufreq_driver_target(policy, policy->max,
-						CPUFREQ_RELATION_H);
-		else if (policy->min > policy->cur)
-			__cpufreq_driver_target(policy, policy->min,
-						CPUFREQ_RELATION_L);
-
+		cpufreq_policy_apply_limits(policy);
 		mutex_unlock(&sg_policy->work_lock);
 	}
 
 	sg_policy->need_freq_update = true;
-	return 0;
-}
-
-int sugov_governor(struct cpufreq_policy *policy, unsigned int event)
-{
-	if (event == CPUFREQ_GOV_POLICY_INIT) {
-		return sugov_init(policy);
-	} else if (policy->governor_data) {
-		switch (event) {
-		case CPUFREQ_GOV_POLICY_EXIT:
-			return sugov_exit(policy);
-		case CPUFREQ_GOV_START:
-			return sugov_start(policy);
-		case CPUFREQ_GOV_STOP:
-			return sugov_stop(policy);
-		case CPUFREQ_GOV_LIMITS:
-			return sugov_limits(policy);
-		}
-	}
-	return -EINVAL;
 }
 
 static struct cpufreq_governor schedutil_gov = {
 	.name = "schedutil",
-	.governor = sugov_governor,
 	.owner = THIS_MODULE,
+	.init = sugov_init,
+	.exit = sugov_exit,
+	.start = sugov_start,
+	.stop = sugov_stop,
+	.limits = sugov_limits,
 };
 
 static int __init sugov_module_init(void)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 75f98c5498d5..1934f658c036 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  */
 void irqtime_account_irq(struct task_struct *curr)
 {
-	unsigned long flags;
 	s64 delta;
 	int cpu;
 
 	if (!sched_clock_irqtime)
 		return;
 
-	local_irq_save(flags);
-
 	cpu = smp_processor_id();
 	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
 	__this_cpu_add(irq_start_time, delta);
@@ -75,44 +72,53 @@ void irqtime_account_irq(struct task_struct *curr)
 		__this_cpu_add(cpu_softirq_time, delta);
 
 	irq_time_write_end();
-	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
-static int irqtime_account_hi_update(void)
+static cputime_t irqtime_account_hi_update(cputime_t maxtime)
 {
 	u64 *cpustat = kcpustat_this_cpu->cpustat;
 	unsigned long flags;
-	u64 latest_ns;
-	int ret = 0;
+	cputime_t irq_cputime;
 
 	local_irq_save(flags);
-	latest_ns = this_cpu_read(cpu_hardirq_time);
-	if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
-		ret = 1;
+	irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
+		      cpustat[CPUTIME_IRQ];
+	irq_cputime = min(irq_cputime, maxtime);
+	cpustat[CPUTIME_IRQ] += irq_cputime;
 	local_irq_restore(flags);
-	return ret;
+	return irq_cputime;
 }
 
-static int irqtime_account_si_update(void)
+static cputime_t irqtime_account_si_update(cputime_t maxtime)
 {
 	u64 *cpustat = kcpustat_this_cpu->cpustat;
 	unsigned long flags;
-	u64 latest_ns;
-	int ret = 0;
+	cputime_t softirq_cputime;
 
 	local_irq_save(flags);
-	latest_ns = this_cpu_read(cpu_softirq_time);
-	if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
-		ret = 1;
+	softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
+			  cpustat[CPUTIME_SOFTIRQ];
+	softirq_cputime = min(softirq_cputime, maxtime);
+	cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
 	local_irq_restore(flags);
-	return ret;
+	return softirq_cputime;
 }
 
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 #define sched_clock_irqtime	(0)
 
+static cputime_t irqtime_account_hi_update(cputime_t dummy)
+{
+	return 0;
+}
+
+static cputime_t irqtime_account_si_update(cputime_t dummy)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 
 static inline void task_group_account_field(struct task_struct *p, int index,
@@ -257,29 +263,42 @@ void account_idle_time(cputime_t cputime)
 		cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }
 
-static __always_inline bool steal_account_process_tick(void)
+static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 {
 #ifdef CONFIG_PARAVIRT
 	if (static_key_false(&paravirt_steal_enabled)) {
+		cputime_t steal_cputime;
 		u64 steal;
-		unsigned long steal_jiffies;
 
 		steal = paravirt_steal_clock(smp_processor_id());
 		steal -= this_rq()->prev_steal_time;
 
-		/*
-		 * steal is in nsecs but our caller is expecting steal
-		 * time in jiffies. Lets cast the result to jiffies
-		 * granularity and account the rest on the next rounds.
-		 */
-		steal_jiffies = nsecs_to_jiffies(steal);
-		this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
+		steal_cputime = min(nsecs_to_cputime(steal), maxtime);
+		account_steal_time(steal_cputime);
+		this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
 
-		account_steal_time(jiffies_to_cputime(steal_jiffies));
-		return steal_jiffies;
+		return steal_cputime;
 	}
 #endif
-	return false;
+	return 0;
+}
+
+/*
+ * Account how much elapsed time was spent in steal, irq, or softirq time.
+ */
+static inline cputime_t account_other_time(cputime_t max)
+{
+	cputime_t accounted;
+
+	accounted = steal_account_process_time(max);
+
+	if (accounted < max)
+		accounted += irqtime_account_hi_update(max - accounted);
+
+	if (accounted < max)
+		accounted += irqtime_account_si_update(max - accounted);
+
+	return accounted;
 }
 
 /*
@@ -342,21 +361,23 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 					 struct rq *rq, int ticks)
 {
-	cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
-	u64 cputime = (__force u64) cputime_one_jiffy;
-	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	u64 cputime = (__force u64) cputime_one_jiffy * ticks;
+	cputime_t scaled, other;
 
-	if (steal_account_process_tick())
+	/*
+	 * When returning from idle, many ticks can get accounted at
+	 * once, including some ticks of steal, irq, and softirq time.
+	 * Subtract those ticks from the amount of time accounted to
+	 * idle, or potentially user or system time. Due to rounding,
+	 * other time can exceed ticks occasionally.
+	 */
+	other = account_other_time(cputime);
+	if (other >= cputime)
 		return;
+	cputime -= other;
+	scaled = cputime_to_scaled(cputime);
 
-	cputime *= ticks;
-	scaled *= ticks;
-
-	if (irqtime_account_hi_update()) {
-		cpustat[CPUTIME_IRQ] += cputime;
-	} else if (irqtime_account_si_update()) {
-		cpustat[CPUTIME_SOFTIRQ] += cputime;
-	} else if (this_cpu_ksoftirqd() == p) {
+	if (this_cpu_ksoftirqd() == p) {
 		/*
 		 * ksoftirqd time do not get accounted in cpu_softirq_time.
 		 * So, we have to handle it separately here.
@@ -406,6 +427,10 @@ void vtime_common_task_switch(struct task_struct *prev)
 }
 #endif
 
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Archs that account the whole time spent in the idle task
  * (outside irq) as idle time can rely on this and just implement
@@ -415,33 +440,16 @@ void vtime_common_task_switch(struct task_struct *prev)
  * vtime_account().
  */
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
-void vtime_common_account_irq_enter(struct task_struct *tsk)
+void vtime_account_irq_enter(struct task_struct *tsk)
 {
-	if (!in_interrupt()) {
-		/*
-		 * If we interrupted user, context_tracking_in_user()
-		 * is 1 because the context tracking don't hook
-		 * on irq entry/exit. This way we know if
-		 * we need to flush user time on kernel entry.
-		 */
-		if (context_tracking_in_user()) {
-			vtime_account_user(tsk);
-			return;
-		}
-
-		if (is_idle_task(tsk)) {
-			vtime_account_idle(tsk);
-			return;
-		}
-	}
-	vtime_account_system(tsk);
+	if (!in_interrupt() && is_idle_task(tsk))
+		vtime_account_idle(tsk);
+	else
+		vtime_account_system(tsk);
 }
-EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	*ut = p->utime;
@@ -466,7 +474,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
  */
 void account_process_tick(struct task_struct *p, int user_tick)
 {
-	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
+	cputime_t cputime, scaled, steal;
 	struct rq *rq = this_rq();
 
 	if (vtime_accounting_cpu_enabled())
@@ -477,26 +485,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
 		return;
 	}
 
-	if (steal_account_process_tick())
+	cputime = cputime_one_jiffy;
+	steal = steal_account_process_time(cputime);
+
+	if (steal >= cputime)
 		return;
 
+	cputime -= steal;
+	scaled = cputime_to_scaled(cputime);
+
 	if (user_tick)
-		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
+		account_user_time(p, cputime, scaled);
 	else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
-		account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
-				    one_jiffy_scaled);
+		account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
 	else
-		account_idle_time(cputime_one_jiffy);
-}
-
-/*
- * Account multiple ticks of steal time.
- * @p: the process from which the cpu time has been stolen
- * @ticks: number of stolen ticks
- */
-void account_steal_ticks(unsigned long ticks)
-{
-	account_steal_time(jiffies_to_cputime(ticks));
+		account_idle_time(cputime);
 }
 
 /*
@@ -681,12 +684,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
 static cputime_t get_vtime_delta(struct task_struct *tsk)
 {
 	unsigned long now = READ_ONCE(jiffies);
-	unsigned long delta = now - tsk->vtime_snap;
+	cputime_t delta, other;
 
+	delta = jiffies_to_cputime(now - tsk->vtime_snap);
+	other = account_other_time(delta);
 	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
 	tsk->vtime_snap = now;
 
-	return jiffies_to_cputime(delta);
+	return delta - other;
 }
 
 static void __vtime_account_system(struct task_struct *tsk)
@@ -706,16 +711,6 @@ void vtime_account_system(struct task_struct *tsk)
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
 
-void vtime_gen_account_irq_exit(struct task_struct *tsk)
-{
-	write_seqcount_begin(&tsk->vtime_seqcount);
-	if (vtime_delta(tsk))
-		__vtime_account_system(tsk);
-	if (context_tracking_in_user())
-		tsk->vtime_snap_whence = VTIME_USER;
-	write_seqcount_end(&tsk->vtime_seqcount);
-}
-
 void vtime_account_user(struct task_struct *tsk)
 {
 	cputime_t delta_cpu;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 0368c393a336..2a0a9995256d 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -879,9 +879,9 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
-#ifdef CONFIG_SCHEDSTATS
 	P(se.nr_migrations);
 
+#ifdef CONFIG_SCHEDSTATS
 	if (schedstat_enabled()) {
 		u64 avg_atom, avg_per_cpu;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 218f8e83db73..4088eedea763 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -690,6 +690,11 @@ void init_entity_runnable_average(struct sched_entity *se)
 	/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
 }
 
+static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
+static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq);
+static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force);
+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se);
+
 /*
  * With new tasks being created, their initial util_avgs are extrapolated
  * based on the cfs_rq's current util_avg:
@@ -720,6 +725,8 @@ void post_init_entity_util_avg(struct sched_entity *se)
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	struct sched_avg *sa = &se->avg;
 	long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
+	u64 now = cfs_rq_clock_task(cfs_rq);
+	int tg_update;
 
 	if (cap > 0) {
 		if (cfs_rq->avg.util_avg != 0) {
@@ -733,18 +740,42 @@ void post_init_entity_util_avg(struct sched_entity *se)
 		}
 		sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
 	}
+
+	if (entity_is_task(se)) {
+		struct task_struct *p = task_of(se);
+		if (p->sched_class != &fair_sched_class) {
+			/*
+			 * For !fair tasks do:
+			 *
+			update_cfs_rq_load_avg(now, cfs_rq, false);
+			attach_entity_load_avg(cfs_rq, se);
+			switched_from_fair(rq, p);
+			 *
+			 * such that the next switched_to_fair() has the
+			 * expected state.
+			 */
+			se->avg.last_update_time = now;
+			return;
+		}
+	}
+
+	tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+	attach_entity_load_avg(cfs_rq, se);
+	if (tg_update)
+		update_tg_load_avg(cfs_rq, false);
 }
 
-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
-static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
-#else
+#else /* !CONFIG_SMP */
 void init_entity_runnable_average(struct sched_entity *se)
 {
 }
 void post_init_entity_util_avg(struct sched_entity *se)
 {
 }
-#endif
+static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
+{
+}
+#endif /* CONFIG_SMP */
 
 /*
  * Update the current task's runtime statistics.
@@ -1305,6 +1336,8 @@ static void task_numa_assign(struct task_numa_env *env,
 {
 	if (env->best_task)
 		put_task_struct(env->best_task);
+	if (p)
+		get_task_struct(p);
 
 	env->best_task = p;
 	env->best_imp = imp;
@@ -1372,31 +1405,11 @@ static void task_numa_compare(struct task_numa_env *env,
 	long imp = env->p->numa_group ? groupimp : taskimp;
 	long moveimp = imp;
 	int dist = env->dist;
-	bool assigned = false;
 
 	rcu_read_lock();
-
-	raw_spin_lock_irq(&dst_rq->lock);
-	cur = dst_rq->curr;
-	/*
-	 * No need to move the exiting task or idle task.
-	 */
-	if ((cur->flags & PF_EXITING) || is_idle_task(cur))
+	cur = task_rcu_dereference(&dst_rq->curr);
+	if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
 		cur = NULL;
-	else {
-		/*
-		 * The task_struct must be protected here to protect the
-		 * p->numa_faults access in the task_weight since the
-		 * numa_faults could already be freed in the following path:
-		 * finish_task_switch()
-		 *     --> put_task_struct()
-		 *         --> __put_task_struct()
-		 *             --> task_numa_free()
-		 */
-		get_task_struct(cur);
-	}
-
-	raw_spin_unlock_irq(&dst_rq->lock);
 
 	/*
 	 * Because we have preemption enabled we can get migrated around and
@@ -1479,7 +1492,6 @@ balance:
 		 */
 		if (!load_too_imbalanced(src_load, dst_load, env)) {
 			imp = moveimp - 1;
-			put_task_struct(cur);
 			cur = NULL;
 			goto assign;
 		}
@@ -1505,16 +1517,9 @@ balance:
 		env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
 
 assign:
-	assigned = true;
 	task_numa_assign(env, cur, imp);
 unlock:
 	rcu_read_unlock();
-	/*
-	 * The dst_rq->curr isn't assigned. The protection for task_struct is
-	 * finished.
-	 */
-	if (cur && !assigned)
-		put_task_struct(cur);
 }
 
 static void task_numa_find_cpu(struct task_numa_env *env,
@@ -2499,28 +2504,22 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 # ifdef CONFIG_SMP
-static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
+static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
 {
-	long tg_weight;
+	long tg_weight, load, shares;
 
 	/*
-	 * Use this CPU's real-time load instead of the last load contribution
-	 * as the updating of the contribution is delayed, and we will use the
-	 * the real-time load to calc the share. See update_tg_load_avg().
+	 * This really should be: cfs_rq->avg.load_avg, but instead we use
+	 * cfs_rq->load.weight, which is its upper bound. This helps ramp up
+	 * the shares for small weight interactive tasks.
 	 */
-	tg_weight = atomic_long_read(&tg->load_avg);
-	tg_weight -= cfs_rq->tg_load_avg_contrib;
-	tg_weight += cfs_rq->load.weight;
+	load = scale_load_down(cfs_rq->load.weight);
 
-	return tg_weight;
-}
-
-static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
-{
-	long tg_weight, load, shares;
+	tg_weight = atomic_long_read(&tg->load_avg);
 
-	tg_weight = calc_tg_weight(tg, cfs_rq);
-	load = cfs_rq->load.weight;
+	/* Ensure tg_weight >= load */
+	tg_weight -= cfs_rq->tg_load_avg_contrib;
+	tg_weight += load;
 
 	shares = (tg->shares * load);
 	if (tg_weight)
@@ -2539,6 +2538,7 @@ static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
 	return tg->shares;
 }
 # endif /* CONFIG_SMP */
+
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 			    unsigned long weight)
 {
@@ -2873,8 +2873,6 @@ void set_task_rq_fair(struct sched_entity *se,
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
-
 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 {
 	struct rq *rq = rq_of(cfs_rq);
@@ -2904,7 +2902,40 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 	}
 }
 
-/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
+/*
+ * Unsigned subtract and clamp on underflow.
+ *
+ * Explicitly do a load-store to ensure the intermediate value never hits
+ * memory. This allows lockless observations without ever seeing the negative
+ * values.
+ */
+#define sub_positive(_ptr, _val) do {				\
+	typeof(_ptr) ptr = (_ptr);				\
+	typeof(*ptr) val = (_val);				\
+	typeof(*ptr) res, var = READ_ONCE(*ptr);		\
+	res = var - val;					\
+	if (res > var)						\
+		res = 0;					\
+	WRITE_ONCE(*ptr, res);					\
+} while (0)
+
+/**
+ * update_cfs_rq_load_avg - update the cfs_rq's load/util averages
+ * @now: current time, as per cfs_rq_clock_task()
+ * @cfs_rq: cfs_rq to update
+ * @update_freq: should we call cfs_rq_util_change() or will the call do so
+ *
+ * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
+ * avg. The immediate corollary is that all (fair) tasks must be attached, see
+ * post_init_entity_util_avg().
+ *
+ * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
+ *
+ * Returns true if the load decayed or we removed utilization. It is expected
+ * that one calls update_tg_load_avg() on this condition, but after you've
+ * modified the cfs_rq avg (attach/detach), such that we propagate the new
+ * avg up.
+ */
 static inline int
 update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 {
@@ -2913,15 +2944,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 
 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
 		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
-		sa->load_avg = max_t(long, sa->load_avg - r, 0);
-		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
+		sub_positive(&sa->load_avg, r);
+		sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
 		removed_load = 1;
 	}
 
 	if (atomic_long_read(&cfs_rq->removed_util_avg)) {
 		long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
-		sa->util_avg = max_t(long, sa->util_avg - r, 0);
-		sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
+		sub_positive(&sa->util_avg, r);
+		sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
 		removed_util = 1;
 	}
 
@@ -2959,6 +2990,14 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
 		update_tg_load_avg(cfs_rq, 0);
 }
 
+/**
+ * attach_entity_load_avg - attach this entity to its cfs_rq load avg
+ * @cfs_rq: cfs_rq to attach to
+ * @se: sched_entity to attach
+ *
+ * Must call update_cfs_rq_load_avg() before this, since we rely on
+ * cfs_rq->avg.last_update_time being current.
+ */
 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	if (!sched_feat(ATTACH_AGE_LOAD))
@@ -2967,6 +3006,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 	/*
 	 * If we got migrated (either between CPUs or between cgroups) we'll
 	 * have aged the average right before clearing @last_update_time.
+	 *
+	 * Or we're fresh through post_init_entity_util_avg().
 	 */
 	if (se->avg.last_update_time) {
 		__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
@@ -2988,16 +3029,24 @@ skip_aging:
 	cfs_rq_util_change(cfs_rq);
 }
 
+/**
+ * detach_entity_load_avg - detach this entity from its cfs_rq load avg
+ * @cfs_rq: cfs_rq to detach from
+ * @se: sched_entity to detach
+ *
+ * Must call update_cfs_rq_load_avg() before this, since we rely on
+ * cfs_rq->avg.last_update_time being current.
+ */
 static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
 			  &se->avg, se->on_rq * scale_load_down(se->load.weight),
 			  cfs_rq->curr == se, NULL);
 
-	cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
-	cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
-	cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
-	cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+	sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
+	sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
+	sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
+	sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
 
 	cfs_rq_util_change(cfs_rq);
 }
@@ -3072,11 +3121,14 @@ void remove_entity_load_avg(struct sched_entity *se)
 	u64 last_update_time;
 
 	/*
-	 * Newly created task or never used group entity should not be removed
-	 * from its (source) cfs_rq
+	 * tasks cannot exit without having gone through wake_up_new_task() ->
+	 * post_init_entity_util_avg() which will have added things to the
+	 * cfs_rq, so we can remove unconditionally.
+	 *
+	 * Similarly for groups, they will have passed through
+	 * post_init_entity_util_avg() before unregister_sched_fair_group()
+	 * calls this.
 	 */
-	if (se->avg.last_update_time == 0)
-		return;
 
 	last_update_time = cfs_rq_last_update_time(cfs_rq);
 
@@ -3099,6 +3151,12 @@ static int idle_balance(struct rq *this_rq);
 
 #else /* CONFIG_SMP */
 
+static inline int
+update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
+{
+	return 0;
+}
+
 static inline void update_load_avg(struct sched_entity *se, int not_used)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
@@ -3246,7 +3304,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_iowait_enabled()  ||
 			trace_sched_stat_blocked_enabled() ||
 			trace_sched_stat_runtime_enabled())  {
-		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
 			     "kernel parameter schedstats=enabled or "
 			     "kernel.sched_schedstats=1\n");
@@ -3688,7 +3746,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
 {
 	if (unlikely(cfs_rq->throttle_count))
-		return cfs_rq->throttled_clock_task;
+		return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
 
 	return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
 }
@@ -3826,13 +3884,11 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
 
 	cfs_rq->throttle_count--;
-#ifdef CONFIG_SMP
 	if (!cfs_rq->throttle_count) {
 		/* adjust cfs_rq_clock_task() */
 		cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
 					     cfs_rq->throttled_clock_task;
 	}
-#endif
 
 	return 0;
 }
@@ -4199,6 +4255,23 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
 		throttle_cfs_rq(cfs_rq);
 }
 
+static void sync_throttle(struct task_group *tg, int cpu)
+{
+	struct cfs_rq *pcfs_rq, *cfs_rq;
+
+	if (!cfs_bandwidth_used())
+		return;
+
+	if (!tg->parent)
+		return;
+
+	cfs_rq = tg->cfs_rq[cpu];
+	pcfs_rq = tg->parent->cfs_rq[cpu];
+
+	cfs_rq->throttle_count = pcfs_rq->throttle_count;
+	pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+}
+
 /* conditionally throttle active cfs_rq's from put_prev_entity() */
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
@@ -4338,6 +4411,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
 static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
+static inline void sync_throttle(struct task_group *tg, int cpu) {}
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 
 static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
@@ -4446,7 +4520,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		 *
 		 * note: in the case of encountering a throttled cfs_rq we will
 		 * post the final h_nr_running increment below.
-		*/
+		 */
 		if (cfs_rq_throttled(cfs_rq))
 			break;
 		cfs_rq->h_nr_running++;
@@ -4500,15 +4574,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
+			/* Avoid re-evaluating load for this entity: */
+			se = parent_entity(se);
 			/*
 			 * Bias pick_next to pick a task from this cfs_rq, as
 			 * p is sleeping when it is within its sched_slice.
 			 */
-			if (task_sleep && parent_entity(se))
-				set_next_buddy(parent_entity(se));
-
-			/* avoid re-evaluating load for this entity */
-			se = parent_entity(se);
+			if (task_sleep && se && !throttled_hierarchy(cfs_rq))
+				set_next_buddy(se);
 			break;
 		}
 		flags |= DEQUEUE_SLEEP;
@@ -4910,19 +4983,24 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
 		return wl;
 
 	for_each_sched_entity(se) {
-		long w, W;
+		struct cfs_rq *cfs_rq = se->my_q;
+		long W, w = cfs_rq_load_avg(cfs_rq);
 
-		tg = se->my_q->tg;
+		tg = cfs_rq->tg;
 
 		/*
 		 * W = @wg + \Sum rw_j
 		 */
-		W = wg + calc_tg_weight(tg, se->my_q);
+		W = wg + atomic_long_read(&tg->load_avg);
+
+		/* Ensure \Sum rw_j >= rw_i */
+		W -= cfs_rq->tg_load_avg_contrib;
+		W += w;
 
 		/*
 		 * w = rw_i + @wl
 		 */
-		w = cfs_rq_load_avg(se->my_q) + wl;
+		w += wl;
 
 		/*
 		 * wl = S * s'_i; see (2)
@@ -8283,31 +8361,17 @@ static void task_fork_fair(struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se, *curr;
-	int this_cpu = smp_processor_id();
 	struct rq *rq = this_rq();
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&rq->lock, flags);
 
+	raw_spin_lock(&rq->lock);
 	update_rq_clock(rq);
 
 	cfs_rq = task_cfs_rq(current);
 	curr = cfs_rq->curr;
-
-	/*
-	 * Not only the cpu but also the task_group of the parent might have
-	 * been changed after parent->se.parent,cfs_rq were copied to
-	 * child->se.parent,cfs_rq. So call __set_task_cpu() to make those
-	 * of child point to valid ones.
-	 */
-	rcu_read_lock();
-	__set_task_cpu(p, this_cpu);
-	rcu_read_unlock();
-
-	update_curr(cfs_rq);
-
-	if (curr)
+	if (curr) {
+		update_curr(cfs_rq);
 		se->vruntime = curr->vruntime;
+	}
 	place_entity(cfs_rq, se, 1);
 
 	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
@@ -8320,8 +8384,7 @@ static void task_fork_fair(struct task_struct *p)
 	}
 
 	se->vruntime -= cfs_rq->min_vruntime;
-
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+	raw_spin_unlock(&rq->lock);
 }
 
 /*
@@ -8377,6 +8440,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	u64 now = cfs_rq_clock_task(cfs_rq);
+	int tg_update;
 
 	if (!vruntime_normalized(p)) {
 		/*
@@ -8388,13 +8453,18 @@ static void detach_task_cfs_rq(struct task_struct *p)
 	}
 
 	/* Catch up with the cfs_rq and remove our load when we leave */
+	tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
 	detach_entity_load_avg(cfs_rq, se);
+	if (tg_update)
+		update_tg_load_avg(cfs_rq, false);
 }
 
 static void attach_task_cfs_rq(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	u64 now = cfs_rq_clock_task(cfs_rq);
+	int tg_update;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/*
@@ -8405,7 +8475,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
 #endif
 
 	/* Synchronize task with its cfs_rq */
+	tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
 	attach_entity_load_avg(cfs_rq, se);
+	if (tg_update)
+		update_tg_load_avg(cfs_rq, false);
 
 	if (!vruntime_normalized(p))
 		se->vruntime += cfs_rq->min_vruntime;
@@ -8465,6 +8538,14 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+static void task_set_group_fair(struct task_struct *p)
+{
+	struct sched_entity *se = &p->se;
+
+	set_task_rq(p, task_cpu(p));
+	se->depth = se->parent ? se->parent->depth + 1 : 0;
+}
+
 static void task_move_group_fair(struct task_struct *p)
 {
 	detach_task_cfs_rq(p);
@@ -8477,6 +8558,19 @@ static void task_move_group_fair(struct task_struct *p)
 	attach_task_cfs_rq(p);
 }
 
+static void task_change_group_fair(struct task_struct *p, int type)
+{
+	switch (type) {
+	case TASK_SET_GROUP:
+		task_set_group_fair(p);
+		break;
+
+	case TASK_MOVE_GROUP:
+		task_move_group_fair(p);
+		break;
+	}
+}
+
 void free_fair_sched_group(struct task_group *tg)
 {
 	int i;
@@ -8496,8 +8590,9 @@ void free_fair_sched_group(struct task_group *tg)
 
 int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 {
-	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
+	struct cfs_rq *cfs_rq;
+	struct rq *rq;
 	int i;
 
 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8512,6 +8607,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
 
 	for_each_possible_cpu(i) {
+		rq = cpu_rq(i);
+
 		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
 				      GFP_KERNEL, cpu_to_node(i));
 		if (!cfs_rq)
@@ -8525,7 +8622,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 		init_cfs_rq(cfs_rq);
 		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
 		init_entity_runnable_average(se);
-		post_init_entity_util_avg(se);
 	}
 
 	return 1;
@@ -8536,6 +8632,23 @@ err:
 	return 0;
 }
 
+void online_fair_sched_group(struct task_group *tg)
+{
+	struct sched_entity *se;
+	struct rq *rq;
+	int i;
+
+	for_each_possible_cpu(i) {
+		rq = cpu_rq(i);
+		se = tg->se[i];
+
+		raw_spin_lock_irq(&rq->lock);
+		post_init_entity_util_avg(se);
+		sync_throttle(tg, i);
+		raw_spin_unlock_irq(&rq->lock);
+	}
+}
+
 void unregister_fair_sched_group(struct task_group *tg)
 {
 	unsigned long flags;
@@ -8640,6 +8753,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 	return 1;
 }
 
+void online_fair_sched_group(struct task_group *tg) { }
+
 void unregister_fair_sched_group(struct task_group *tg) { }
 
 #endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -8699,7 +8814,7 @@ const struct sched_class fair_sched_class = {
 	.update_curr		= update_curr_fair,
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	.task_move_group	= task_move_group_fair,
+	.task_change_group	= task_change_group_fair,
 #endif
 };
 
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c5aeedf4e93a..9fb873cfc75c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -201,6 +201,8 @@ exit_idle:
  */
 static void cpu_idle_loop(void)
 {
+	int cpu = smp_processor_id();
+
 	while (1) {
 		/*
 		 * If the arch has a polling bit, we maintain an invariant:
@@ -219,7 +221,7 @@ static void cpu_idle_loop(void)
 			check_pgt_cache();
 			rmb();
 
-			if (cpu_is_offline(smp_processor_id())) {
+			if (cpu_is_offline(cpu)) {
 				cpuhp_report_idle_dead();
 				arch_cpu_idle_dead();
 			}
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index b0b93fd33af9..a2d6eb71f06b 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -78,11 +78,11 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
 	loads[2] = (avenrun[2] + offset) << shift;
 }
 
-long calc_load_fold_active(struct rq *this_rq)
+long calc_load_fold_active(struct rq *this_rq, long adjust)
 {
 	long nr_active, delta = 0;
 
-	nr_active = this_rq->nr_running;
+	nr_active = this_rq->nr_running - adjust;
 	nr_active += (long)this_rq->nr_uninterruptible;
 
 	if (nr_active != this_rq->calc_load_active) {
@@ -188,7 +188,7 @@ void calc_load_enter_idle(void)
 	 * We're going into NOHZ mode, if there's any pending delta, fold it
 	 * into the pending idle delta.
 	 */
-	delta = calc_load_fold_active(this_rq);
+	delta = calc_load_fold_active(this_rq, 0);
 	if (delta) {
 		int idx = calc_load_write_idx();
 
@@ -389,7 +389,7 @@ void calc_global_load_tick(struct rq *this_rq)
 	if (time_before(jiffies, this_rq->calc_load_update))
 		return;
 
-	delta  = calc_load_fold_active(this_rq);
+	delta  = calc_load_fold_active(this_rq, 0);
 	if (delta)
 		atomic_long_add(delta, &calc_load_tasks);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 72f1f3087b04..c64fc5114004 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -28,7 +28,7 @@ extern unsigned long calc_load_update;
 extern atomic_long_t calc_load_tasks;
 
 extern void calc_global_load_tick(struct rq *this_rq);
-extern long calc_load_fold_active(struct rq *this_rq);
+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
 
 #ifdef CONFIG_SMP
 extern void cpu_load_update_active(struct rq *this_rq);
@@ -321,6 +321,7 @@ extern int tg_nop(struct task_group *tg, void *data);
 
 extern void free_fair_sched_group(struct task_group *tg);
 extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
+extern void online_fair_sched_group(struct task_group *tg);
 extern void unregister_fair_sched_group(struct task_group *tg);
 extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
 			struct sched_entity *se, int cpu,
@@ -1113,7 +1114,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 	 * In particular, the load of prev->state in finish_task_switch() must
 	 * happen before this.
 	 *
-	 * Pairs with the smp_cond_acquire() in try_to_wake_up().
+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
 	 */
 	smp_store_release(&prev->on_cpu, 0);
 #endif
@@ -1246,8 +1247,11 @@ struct sched_class {
 
 	void (*update_curr) (struct rq *rq);
 
+#define TASK_SET_GROUP  0
+#define TASK_MOVE_GROUP	1
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	void (*task_move_group) (struct task_struct *p);
+	void (*task_change_group) (struct task_struct *p, int type);
 #endif
 };
 
@@ -1809,16 +1813,3 @@ static inline void cpufreq_trigger_update(u64 time) {}
 #else /* arch_scale_freq_capacity */
 #define arch_scale_freq_invariant()	(false)
 #endif
-
-static inline void account_reset_rq(struct rq *rq)
-{
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-	rq->prev_irq_time = 0;
-#endif
-#ifdef CONFIG_PARAVIRT
-	rq->prev_steal_time = 0;
-#endif
-#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-	rq->prev_steal_time_rq = 0;
-#endif
-}
diff --git a/kernel/signal.c b/kernel/signal.c
index 96e9bc40667f..af21afc00d08 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2751,23 +2751,18 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
  *  @ts: upper bound on process time suspension
  */
 int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
-			const struct timespec *ts)
+		    const struct timespec *ts)
 {
+	ktime_t *to = NULL, timeout = { .tv64 = KTIME_MAX };
 	struct task_struct *tsk = current;
-	long timeout = MAX_SCHEDULE_TIMEOUT;
 	sigset_t mask = *which;
-	int sig;
+	int sig, ret = 0;
 
 	if (ts) {
 		if (!timespec_valid(ts))
 			return -EINVAL;
-		timeout = timespec_to_jiffies(ts);
-		/*
-		 * We can be close to the next tick, add another one
-		 * to ensure we will wait at least the time asked for.
-		 */
-		if (ts->tv_sec || ts->tv_nsec)
-			timeout++;
+		timeout = timespec_to_ktime(*ts);
+		to = &timeout;
 	}
 
 	/*
@@ -2778,7 +2773,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
 
 	spin_lock_irq(&tsk->sighand->siglock);
 	sig = dequeue_signal(tsk, &mask, info);
-	if (!sig && timeout) {
+	if (!sig && timeout.tv64) {
 		/*
 		 * None ready, temporarily unblock those we're interested
 		 * while we are sleeping in so that we'll be awakened when
@@ -2790,8 +2785,9 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
 		recalc_sigpending();
 		spin_unlock_irq(&tsk->sighand->siglock);
 
-		timeout = freezable_schedule_timeout_interruptible(timeout);
-
+		__set_current_state(TASK_INTERRUPTIBLE);
+		ret = freezable_schedule_hrtimeout_range(to, tsk->timer_slack_ns,
+							 HRTIMER_MODE_REL);
 		spin_lock_irq(&tsk->sighand->siglock);
 		__set_task_blocked(tsk, &tsk->real_blocked);
 		sigemptyset(&tsk->real_blocked);
@@ -2801,7 +2797,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
 
 	if (sig)
 		return sig;
-	return timeout ? -EINTR : -EAGAIN;
+	return ret ? -EINTR : -EAGAIN;
 }
 
 /**
diff --git a/kernel/smp.c b/kernel/smp.c
index 74165443c240..36552beed397 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -107,7 +107,7 @@ void __init call_function_init(void)
  */
 static __always_inline void csd_lock_wait(struct call_single_data *csd)
 {
-	smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
+	smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
 }
 
 static __always_inline void csd_lock(struct call_single_data *csd)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 87b2fc38398b..35f0dcb1cb4f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1204,6 +1204,17 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &one,
 		.extra2		= &one,
 	},
+#endif
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
+	{
+		.procname	= "panic_on_rcu_stall",
+		.data		= &sysctl_panic_on_rcu_stall,
+		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #endif
 	{ }
 };
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 53fa971d000d..6ab4842b00e8 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -108,7 +108,6 @@ void task_work_run(void)
 		 * fail, but it can play with *work and other entries.
 		 */
 		raw_spin_unlock_wait(&task->pi_lock);
-		smp_mb();
 
 		do {
 			next = work->next;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index e840ed867a5d..c3aad685bbc0 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -30,7 +30,6 @@
  * struct alarm_base - Alarm timer bases
  * @lock:		Lock for syncrhonized access to the base
  * @timerqueue:		Timerqueue head managing the list of events
- * @timer: 		hrtimer used to schedule events while running
  * @gettime:		Function to read the time correlating to the base
  * @base_clockid:	clockid for the base
  */
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index a9b76a40319e..2c5bc77c0bb0 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -645,7 +645,7 @@ void tick_cleanup_dead_cpu(int cpu)
 #endif
 
 #ifdef CONFIG_SYSFS
-struct bus_type clockevents_subsys = {
+static struct bus_type clockevents_subsys = {
 	.name		= "clockevents",
 	.dev_name       = "clockevent",
 };
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 56ece145a814..6a5a310a1a53 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -669,10 +669,12 @@ static void clocksource_enqueue(struct clocksource *cs)
 	struct list_head *entry = &clocksource_list;
 	struct clocksource *tmp;
 
-	list_for_each_entry(tmp, &clocksource_list, list)
+	list_for_each_entry(tmp, &clocksource_list, list) {
 		/* Keep track of the place, where to insert */
-		if (tmp->rating >= cs->rating)
-			entry = &tmp->list;
+		if (tmp->rating < cs->rating)
+			break;
+		entry = &tmp->list;
+	}
 	list_add(&cs->list, entry);
 }
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e99df0ff1d42..d13c9aebf7a3 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -177,7 +177,7 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
 #endif
 }
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+#ifdef CONFIG_NO_HZ_COMMON
 static inline
 struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
 					 int pinned)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 1cafba860b08..39008d78927a 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -777,6 +777,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 			timer->it.cpu.expires = 0;
 			sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
 					   &itp->it_value);
+			return;
 		} else {
 			cpu_timer_sample_group(timer->it_clock, p, &now);
 			unlock_task_sighand(p, &flags);
diff --git a/kernel/time/test_udelay.c b/kernel/time/test_udelay.c
index e622ba365a13..b0928ab3270f 100644
--- a/kernel/time/test_udelay.c
+++ b/kernel/time/test_udelay.c
@@ -43,13 +43,13 @@ static int udelay_test_single(struct seq_file *s, int usecs, uint32_t iters)
 	int allowed_error_ns = usecs * 5;
 
 	for (i = 0; i < iters; ++i) {
-		struct timespec ts1, ts2;
+		s64 kt1, kt2;
 		int time_passed;
 
-		ktime_get_ts(&ts1);
+		kt1 = ktime_get_ns();
 		udelay(usecs);
-		ktime_get_ts(&ts2);
-		time_passed = timespec_to_ns(&ts2) - timespec_to_ns(&ts1);
+		kt2 = ktime_get_ns();
+		time_passed = kt2 - kt1;
 
 		if (i == 0 || time_passed < min)
 			min = time_passed;
@@ -87,11 +87,11 @@ static int udelay_test_show(struct seq_file *s, void *v)
 	if (usecs > 0 && iters > 0) {
 		return udelay_test_single(s, usecs, iters);
 	} else if (usecs == 0) {
-		struct timespec ts;
+		struct timespec64 ts;
 
-		ktime_get_ts(&ts);
-		seq_printf(s, "udelay() test (lpj=%ld kt=%ld.%09ld)\n",
-				loops_per_jiffy, ts.tv_sec, ts.tv_nsec);
+		ktime_get_ts64(&ts);
+		seq_printf(s, "udelay() test (lpj=%ld kt=%lld.%09ld)\n",
+				loops_per_jiffy, (s64)ts.tv_sec, ts.tv_nsec);
 		seq_puts(s, "usage:\n");
 		seq_puts(s, "echo USECS [ITERS] > " DEBUGFS_FILENAME "\n");
 		seq_puts(s, "cat " DEBUGFS_FILENAME "\n");
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index 53d7184da0be..690b797f522e 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -75,6 +75,7 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
 }
 
 static struct clock_event_device ce_broadcast_hrtimer = {
+	.name			= "bc_hrtimer",
 	.set_state_shutdown	= bc_shutdown,
 	.set_next_ktime		= bc_set_next,
 	.features		= CLOCK_EVT_FEAT_ONESHOT |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 966a5a6fdd0a..f738251000fe 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -164,3 +164,4 @@ static inline void timers_update_migration(bool update_nohz) { }
 DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
 
 extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+void timer_clear_idle(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 536ada80f6dd..204fdc86863d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -31,7 +31,7 @@
 #include <trace/events/timer.h>
 
 /*
- * Per cpu nohz control structure
+ * Per-CPU nohz control structure
  */
 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
@@ -61,7 +61,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 	if (delta.tv64 < tick_period.tv64)
 		return;
 
-	/* Reevalute with jiffies_lock held */
+	/* Reevaluate with jiffies_lock held */
 	write_seqlock(&jiffies_lock);
 
 	delta = ktime_sub(now, last_jiffies_update);
@@ -116,8 +116,8 @@ static void tick_sched_do_timer(ktime_t now)
 #ifdef CONFIG_NO_HZ_COMMON
 	/*
 	 * Check if the do_timer duty was dropped. We don't care about
-	 * concurrency: This happens only when the cpu in charge went
-	 * into a long sleep. If two cpus happen to assign themself to
+	 * concurrency: This happens only when the CPU in charge went
+	 * into a long sleep. If two CPUs happen to assign themselves to
 	 * this duty, then the jiffies update is still serialized by
 	 * jiffies_lock.
 	 */
@@ -349,7 +349,7 @@ void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bi
 /*
  * Re-evaluate the need for the tick as we switch the current task.
  * It might need the tick due to per task/process properties:
- * perf events, posix cpu timers, ...
+ * perf events, posix CPU timers, ...
  */
 void __tick_nohz_task_switch(void)
 {
@@ -509,8 +509,8 @@ int tick_nohz_tick_stopped(void)
  *
  * In case the sched_tick was stopped on this CPU, we have to check if jiffies
  * must be updated. Otherwise an interrupt handler could use a stale jiffy
- * value. We do this unconditionally on any cpu, as we don't know whether the
- * cpu, which has the update task assigned is in a long sleep.
+ * value. We do this unconditionally on any CPU, as we don't know whether the
+ * CPU, which has the update task assigned is in a long sleep.
  */
 static void tick_nohz_update_jiffies(ktime_t now)
 {
@@ -526,7 +526,7 @@ static void tick_nohz_update_jiffies(ktime_t now)
 }
 
 /*
- * Updates the per cpu time idle statistics counters
+ * Updates the per-CPU time idle statistics counters
  */
 static void
 update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
@@ -566,12 +566,12 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
 }
 
 /**
- * get_cpu_idle_time_us - get the total idle time of a cpu
+ * get_cpu_idle_time_us - get the total idle time of a CPU
  * @cpu: CPU number to query
  * @last_update_time: variable to store update time in. Do not update
  * counters if NULL.
  *
- * Return the cummulative idle time (since boot) for a given
+ * Return the cumulative idle time (since boot) for a given
  * CPU, in microseconds.
  *
  * This time is measured via accounting rather than sampling,
@@ -607,12 +607,12 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
 EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
 
 /**
- * get_cpu_iowait_time_us - get the total iowait time of a cpu
+ * get_cpu_iowait_time_us - get the total iowait time of a CPU
  * @cpu: CPU number to query
  * @last_update_time: variable to store update time in. Do not update
  * counters if NULL.
  *
- * Return the cummulative iowait time (since boot) for a given
+ * Return the cumulative iowait time (since boot) for a given
  * CPU, in microseconds.
  *
  * This time is measured via accounting rather than sampling,
@@ -700,6 +700,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	delta = next_tick - basemono;
 	if (delta <= (u64)TICK_NSEC) {
 		tick.tv64 = 0;
+
+		/*
+		 * Tell the timer code that the base is not idle, i.e. undo
+		 * the effect of get_next_timer_interrupt():
+		 */
+		timer_clear_idle();
 		/*
 		 * We've not stopped the tick yet, and there's a timer in the
 		 * next period, so no point in stopping it either, bail.
@@ -726,14 +732,14 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	}
 
 	/*
-	 * If this cpu is the one which updates jiffies, then give up
-	 * the assignment and let it be taken by the cpu which runs
-	 * the tick timer next, which might be this cpu as well. If we
+	 * If this CPU is the one which updates jiffies, then give up
+	 * the assignment and let it be taken by the CPU which runs
+	 * the tick timer next, which might be this CPU as well. If we
 	 * don't drop this here the jiffies might be stale and
 	 * do_timer() never invoked. Keep track of the fact that it
-	 * was the one which had the do_timer() duty last. If this cpu
+	 * was the one which had the do_timer() duty last. If this CPU
 	 * is the one which had the do_timer() duty last, we limit the
-	 * sleep time to the timekeeping max_deferement value.
+	 * sleep time to the timekeeping max_deferment value.
 	 * Otherwise we can sleep as long as we want.
 	 */
 	delta = timekeeping_max_deferment();
@@ -809,6 +815,12 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	tick_do_update_jiffies64(now);
 	cpu_load_update_nohz_stop();
 
+	/*
+	 * Clear the timer idle flag, so we avoid IPIs on remote queueing and
+	 * the clock forward checks in the enqueue path:
+	 */
+	timer_clear_idle();
+
 	calc_load_exit_idle();
 	touch_softlockup_watchdog_sched();
 	/*
@@ -841,9 +853,9 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 {
 	/*
-	 * If this cpu is offline and it is the one which updates
+	 * If this CPU is offline and it is the one which updates
 	 * jiffies, then give up the assignment and let it be taken by
-	 * the cpu which runs the tick timer next. If we don't drop
+	 * the CPU which runs the tick timer next. If we don't drop
 	 * this here the jiffies might be stale and do_timer() never
 	 * invoked.
 	 */
@@ -896,11 +908,10 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
 	ktime_t now, expires;
 	int cpu = smp_processor_id();
 
-	now = tick_nohz_start_idle(ts);
-
 	if (can_stop_idle_tick(cpu, ts)) {
 		int was_stopped = ts->tick_stopped;
 
+		now = tick_nohz_start_idle(ts);
 		ts->idle_calls++;
 
 		expires = tick_nohz_stop_sched_tick(ts, now, cpu);
@@ -933,11 +944,11 @@ void tick_nohz_idle_enter(void)
 	WARN_ON_ONCE(irqs_disabled());
 
 	/*
- 	 * Update the idle state in the scheduler domain hierarchy
- 	 * when tick_nohz_stop_sched_tick() is called from the idle loop.
- 	 * State will be updated to busy during the first busy tick after
- 	 * exiting idle.
- 	 */
+	 * Update the idle state in the scheduler domain hierarchy
+	 * when tick_nohz_stop_sched_tick() is called from the idle loop.
+	 * State will be updated to busy during the first busy tick after
+	 * exiting idle.
+	 */
 	set_cpu_sd_state_idle();
 
 	local_irq_disable();
@@ -1092,35 +1103,6 @@ static void tick_nohz_switch_to_nohz(void)
 	tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
 }
 
-/*
- * When NOHZ is enabled and the tick is stopped, we need to kick the
- * tick timer from irq_enter() so that the jiffies update is kept
- * alive during long running softirqs. That's ugly as hell, but
- * correctness is key even if we need to fix the offending softirq in
- * the first place.
- *
- * Note, this is different to tick_nohz_restart. We just kick the
- * timer and do not touch the other magic bits which need to be done
- * when idle is left.
- */
-static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now)
-{
-#if 0
-	/* Switch back to 2.6.27 behaviour */
-	ktime_t delta;
-
-	/*
-	 * Do not touch the tick device, when the next expiry is either
-	 * already reached or less/equal than the tick period.
-	 */
-	delta =	ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
-	if (delta.tv64 <= tick_period.tv64)
-		return;
-
-	tick_nohz_restart(ts, now);
-#endif
-}
-
 static inline void tick_nohz_irq_enter(void)
 {
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
@@ -1131,10 +1113,8 @@ static inline void tick_nohz_irq_enter(void)
 	now = ktime_get();
 	if (ts->idle_active)
 		tick_nohz_stop_idle(ts, now);
-	if (ts->tick_stopped) {
+	if (ts->tick_stopped)
 		tick_nohz_update_jiffies(now);
-		tick_nohz_kick_tick(ts, now);
-	}
 }
 
 #else
@@ -1211,7 +1191,7 @@ void tick_setup_sched_timer(void)
 	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	ts->sched_timer.function = tick_sched_timer;
 
-	/* Get the next period (per cpu) */
+	/* Get the next period (per-CPU) */
 	hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
 
 	/* Offset the tick to avert jiffies_lock contention. */
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
index 86628e755f38..7142580ad94f 100644
--- a/kernel/time/timeconv.c
+++ b/kernel/time/timeconv.c
@@ -67,20 +67,21 @@ static const unsigned short __mon_yday[2][13] = {
 #define SECS_PER_DAY	(SECS_PER_HOUR * 24)
 
 /**
- * time_to_tm - converts the calendar time to local broken-down time
+ * time64_to_tm - converts the calendar time to local broken-down time
  *
  * @totalsecs	the number of seconds elapsed since 00:00:00 on January 1, 1970,
  *		Coordinated Universal Time (UTC).
  * @offset	offset seconds adding to totalsecs.
  * @result	pointer to struct tm variable to receive broken-down time
  */
-void time_to_tm(time_t totalsecs, int offset, struct tm *result)
+void time64_to_tm(time64_t totalsecs, int offset, struct tm *result)
 {
 	long days, rem, y;
+	int remainder;
 	const unsigned short *ip;
 
-	days = totalsecs / SECS_PER_DAY;
-	rem = totalsecs % SECS_PER_DAY;
+	days = div_s64_rem(totalsecs, SECS_PER_DAY, &remainder);
+	rem = remainder;
 	rem += offset;
 	while (rem < 0) {
 		rem += SECS_PER_DAY;
@@ -124,4 +125,4 @@ void time_to_tm(time_t totalsecs, int offset, struct tm *result)
 	result->tm_mon = y;
 	result->tm_mday = days + 1;
 }
-EXPORT_SYMBOL(time_to_tm);
+EXPORT_SYMBOL(time64_to_tm);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 479d25cd3d4f..3b65746c7f15 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -480,10 +480,12 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
 	* users are removed, this can be killed.
 	*/
 	remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
-	tk->tkr_mono.xtime_nsec -= remainder;
-	tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
-	tk->ntp_error += remainder << tk->ntp_error_shift;
-	tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
+	if (remainder != 0) {
+		tk->tkr_mono.xtime_nsec -= remainder;
+		tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
+		tk->ntp_error += remainder << tk->ntp_error_shift;
+		tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
+	}
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -2186,6 +2188,7 @@ struct timespec64 get_monotonic_coarse64(void)
 
 	return now;
 }
+EXPORT_SYMBOL(get_monotonic_coarse64);
 
 /*
  * Must hold jiffies_lock
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 3a95f9728778..cb9ab401e2d9 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -59,43 +59,153 @@ __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 EXPORT_SYMBOL(jiffies_64);
 
 /*
- * per-CPU timer vector definitions:
+ * The timer wheel has LVL_DEPTH array levels. Each level provides an array of
+ * LVL_SIZE buckets. Each level is driven by its own clock and therefor each
+ * level has a different granularity.
+ *
+ * The level granularity is:		LVL_CLK_DIV ^ lvl
+ * The level clock frequency is:	HZ / (LVL_CLK_DIV ^ level)
+ *
+ * The array level of a newly armed timer depends on the relative expiry
+ * time. The farther the expiry time is away the higher the array level and
+ * therefor the granularity becomes.
+ *
+ * Contrary to the original timer wheel implementation, which aims for 'exact'
+ * expiry of the timers, this implementation removes the need for recascading
+ * the timers into the lower array levels. The previous 'classic' timer wheel
+ * implementation of the kernel already violated the 'exact' expiry by adding
+ * slack to the expiry time to provide batched expiration. The granularity
+ * levels provide implicit batching.
+ *
+ * This is an optimization of the original timer wheel implementation for the
+ * majority of the timer wheel use cases: timeouts. The vast majority of
+ * timeout timers (networking, disk I/O ...) are canceled before expiry. If
+ * the timeout expires it indicates that normal operation is disturbed, so it
+ * does not matter much whether the timeout comes with a slight delay.
+ *
+ * The only exception to this are networking timers with a small expiry
+ * time. They rely on the granularity. Those fit into the first wheel level,
+ * which has HZ granularity.
+ *
+ * We don't have cascading anymore. timers with a expiry time above the
+ * capacity of the last wheel level are force expired at the maximum timeout
+ * value of the last wheel level. From data sampling we know that the maximum
+ * value observed is 5 days (network connection tracking), so this should not
+ * be an issue.
+ *
+ * The currently chosen array constants values are a good compromise between
+ * array size and granularity.
+ *
+ * This results in the following granularity and range levels:
+ *
+ * HZ 1000 steps
+ * Level Offset  Granularity            Range
+ *  0      0         1 ms                0 ms -         63 ms
+ *  1     64         8 ms               64 ms -        511 ms
+ *  2    128        64 ms              512 ms -       4095 ms (512ms - ~4s)
+ *  3    192       512 ms             4096 ms -      32767 ms (~4s - ~32s)
+ *  4    256      4096 ms (~4s)      32768 ms -     262143 ms (~32s - ~4m)
+ *  5    320     32768 ms (~32s)    262144 ms -    2097151 ms (~4m - ~34m)
+ *  6    384    262144 ms (~4m)    2097152 ms -   16777215 ms (~34m - ~4h)
+ *  7    448   2097152 ms (~34m)  16777216 ms -  134217727 ms (~4h - ~1d)
+ *  8    512  16777216 ms (~4h)  134217728 ms - 1073741822 ms (~1d - ~12d)
+ *
+ * HZ  300
+ * Level Offset  Granularity            Range
+ *  0	   0         3 ms                0 ms -        210 ms
+ *  1	  64        26 ms              213 ms -       1703 ms (213ms - ~1s)
+ *  2	 128       213 ms             1706 ms -      13650 ms (~1s - ~13s)
+ *  3	 192      1706 ms (~1s)      13653 ms -     109223 ms (~13s - ~1m)
+ *  4	 256     13653 ms (~13s)    109226 ms -     873810 ms (~1m - ~14m)
+ *  5	 320    109226 ms (~1m)     873813 ms -    6990503 ms (~14m - ~1h)
+ *  6	 384    873813 ms (~14m)   6990506 ms -   55924050 ms (~1h - ~15h)
+ *  7	 448   6990506 ms (~1h)   55924053 ms -  447392423 ms (~15h - ~5d)
+ *  8    512  55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d)
+ *
+ * HZ  250
+ * Level Offset  Granularity            Range
+ *  0	   0         4 ms                0 ms -        255 ms
+ *  1	  64        32 ms              256 ms -       2047 ms (256ms - ~2s)
+ *  2	 128       256 ms             2048 ms -      16383 ms (~2s - ~16s)
+ *  3	 192      2048 ms (~2s)      16384 ms -     131071 ms (~16s - ~2m)
+ *  4	 256     16384 ms (~16s)    131072 ms -    1048575 ms (~2m - ~17m)
+ *  5	 320    131072 ms (~2m)    1048576 ms -    8388607 ms (~17m - ~2h)
+ *  6	 384   1048576 ms (~17m)   8388608 ms -   67108863 ms (~2h - ~18h)
+ *  7	 448   8388608 ms (~2h)   67108864 ms -  536870911 ms (~18h - ~6d)
+ *  8    512  67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d)
+ *
+ * HZ  100
+ * Level Offset  Granularity            Range
+ *  0	   0         10 ms               0 ms -        630 ms
+ *  1	  64         80 ms             640 ms -       5110 ms (640ms - ~5s)
+ *  2	 128        640 ms            5120 ms -      40950 ms (~5s - ~40s)
+ *  3	 192       5120 ms (~5s)     40960 ms -     327670 ms (~40s - ~5m)
+ *  4	 256      40960 ms (~40s)   327680 ms -    2621430 ms (~5m - ~43m)
+ *  5	 320     327680 ms (~5m)   2621440 ms -   20971510 ms (~43m - ~5h)
+ *  6	 384    2621440 ms (~43m) 20971520 ms -  167772150 ms (~5h - ~1d)
+ *  7	 448   20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d)
  */
-#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
-#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
-#define TVN_SIZE (1 << TVN_BITS)
-#define TVR_SIZE (1 << TVR_BITS)
-#define TVN_MASK (TVN_SIZE - 1)
-#define TVR_MASK (TVR_SIZE - 1)
-#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
-
-struct tvec {
-	struct hlist_head vec[TVN_SIZE];
-};
 
-struct tvec_root {
-	struct hlist_head vec[TVR_SIZE];
-};
+/* Clock divisor for the next level */
+#define LVL_CLK_SHIFT	3
+#define LVL_CLK_DIV	(1UL << LVL_CLK_SHIFT)
+#define LVL_CLK_MASK	(LVL_CLK_DIV - 1)
+#define LVL_SHIFT(n)	((n) * LVL_CLK_SHIFT)
+#define LVL_GRAN(n)	(1UL << LVL_SHIFT(n))
 
-struct tvec_base {
-	spinlock_t lock;
-	struct timer_list *running_timer;
-	unsigned long timer_jiffies;
-	unsigned long next_timer;
-	unsigned long active_timers;
-	unsigned long all_timers;
-	int cpu;
-	bool migration_enabled;
-	bool nohz_active;
-	struct tvec_root tv1;
-	struct tvec tv2;
-	struct tvec tv3;
-	struct tvec tv4;
-	struct tvec tv5;
-} ____cacheline_aligned;
+/*
+ * The time start value for each level to select the bucket at enqueue
+ * time.
+ */
+#define LVL_START(n)	((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
+
+/* Size of each clock level */
+#define LVL_BITS	6
+#define LVL_SIZE	(1UL << LVL_BITS)
+#define LVL_MASK	(LVL_SIZE - 1)
+#define LVL_OFFS(n)	((n) * LVL_SIZE)
+
+/* Level depth */
+#if HZ > 100
+# define LVL_DEPTH	9
+# else
+# define LVL_DEPTH	8
+#endif
+
+/* The cutoff (max. capacity of the wheel) */
+#define WHEEL_TIMEOUT_CUTOFF	(LVL_START(LVL_DEPTH))
+#define WHEEL_TIMEOUT_MAX	(WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))
+
+/*
+ * The resulting wheel size. If NOHZ is configured we allocate two
+ * wheels so we have a separate storage for the deferrable timers.
+ */
+#define WHEEL_SIZE	(LVL_SIZE * LVL_DEPTH)
+
+#ifdef CONFIG_NO_HZ_COMMON
+# define NR_BASES	2
+# define BASE_STD	0
+# define BASE_DEF	1
+#else
+# define NR_BASES	1
+# define BASE_STD	0
+# define BASE_DEF	0
+#endif
 
+struct timer_base {
+	spinlock_t		lock;
+	struct timer_list	*running_timer;
+	unsigned long		clk;
+	unsigned long		next_expiry;
+	unsigned int		cpu;
+	bool			migration_enabled;
+	bool			nohz_active;
+	bool			is_idle;
+	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
+	struct hlist_head	vectors[WHEEL_SIZE];
+} ____cacheline_aligned;
 
-static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
+static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 unsigned int sysctl_timer_migration = 1;
@@ -106,15 +216,17 @@ void timers_update_migration(bool update_nohz)
 	unsigned int cpu;
 
 	/* Avoid the loop, if nothing to update */
-	if (this_cpu_read(tvec_bases.migration_enabled) == on)
+	if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on)
 		return;
 
 	for_each_possible_cpu(cpu) {
-		per_cpu(tvec_bases.migration_enabled, cpu) = on;
+		per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on;
+		per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on;
 		per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
 		if (!update_nohz)
 			continue;
-		per_cpu(tvec_bases.nohz_active, cpu) = true;
+		per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true;
+		per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true;
 		per_cpu(hrtimer_bases.nohz_active, cpu) = true;
 	}
 }
@@ -133,20 +245,6 @@ int timer_migration_handler(struct ctl_table *table, int write,
 	mutex_unlock(&mutex);
 	return ret;
 }
-
-static inline struct tvec_base *get_target_base(struct tvec_base *base,
-						int pinned)
-{
-	if (pinned || !base->migration_enabled)
-		return this_cpu_ptr(&tvec_bases);
-	return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
-}
-#else
-static inline struct tvec_base *get_target_base(struct tvec_base *base,
-						int pinned)
-{
-	return this_cpu_ptr(&tvec_bases);
-}
 #endif
 
 static unsigned long round_jiffies_common(unsigned long j, int cpu,
@@ -351,101 +449,126 @@ unsigned long round_jiffies_up_relative(unsigned long j)
 }
 EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
 
-/**
- * set_timer_slack - set the allowed slack for a timer
- * @timer: the timer to be modified
- * @slack_hz: the amount of time (in jiffies) allowed for rounding
- *
- * Set the amount of time, in jiffies, that a certain timer has
- * in terms of slack. By setting this value, the timer subsystem
- * will schedule the actual timer somewhere between
- * the time mod_timer() asks for, and that time plus the slack.
- *
- * By setting the slack to -1, a percentage of the delay is used
- * instead.
- */
-void set_timer_slack(struct timer_list *timer, int slack_hz)
+
+static inline unsigned int timer_get_idx(struct timer_list *timer)
 {
-	timer->slack = slack_hz;
+	return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
 }
-EXPORT_SYMBOL_GPL(set_timer_slack);
 
-static void
-__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
 {
-	unsigned long expires = timer->expires;
-	unsigned long idx = expires - base->timer_jiffies;
-	struct hlist_head *vec;
+	timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
+			idx << TIMER_ARRAYSHIFT;
+}
 
-	if (idx < TVR_SIZE) {
-		int i = expires & TVR_MASK;
-		vec = base->tv1.vec + i;
-	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
-		int i = (expires >> TVR_BITS) & TVN_MASK;
-		vec = base->tv2.vec + i;
-	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
-		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
-		vec = base->tv3.vec + i;
-	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
-		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
-		vec = base->tv4.vec + i;
-	} else if ((signed long) idx < 0) {
-		/*
-		 * Can happen if you add a timer with expires == jiffies,
-		 * or you set a timer to go off in the past
-		 */
-		vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
+/*
+ * Helper function to calculate the array index for a given expiry
+ * time.
+ */
+static inline unsigned calc_index(unsigned expires, unsigned lvl)
+{
+	expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
+	return LVL_OFFS(lvl) + (expires & LVL_MASK);
+}
+
+static int calc_wheel_index(unsigned long expires, unsigned long clk)
+{
+	unsigned long delta = expires - clk;
+	unsigned int idx;
+
+	if (delta < LVL_START(1)) {
+		idx = calc_index(expires, 0);
+	} else if (delta < LVL_START(2)) {
+		idx = calc_index(expires, 1);
+	} else if (delta < LVL_START(3)) {
+		idx = calc_index(expires, 2);
+	} else if (delta < LVL_START(4)) {
+		idx = calc_index(expires, 3);
+	} else if (delta < LVL_START(5)) {
+		idx = calc_index(expires, 4);
+	} else if (delta < LVL_START(6)) {
+		idx = calc_index(expires, 5);
+	} else if (delta < LVL_START(7)) {
+		idx = calc_index(expires, 6);
+	} else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
+		idx = calc_index(expires, 7);
+	} else if ((long) delta < 0) {
+		idx = clk & LVL_MASK;
 	} else {
-		int i;
-		/* If the timeout is larger than MAX_TVAL (on 64-bit
-		 * architectures or with CONFIG_BASE_SMALL=1) then we
-		 * use the maximum timeout.
+		/*
+		 * Force expire obscene large timeouts to expire at the
+		 * capacity limit of the wheel.
 		 */
-		if (idx > MAX_TVAL) {
-			idx = MAX_TVAL;
-			expires = idx + base->timer_jiffies;
-		}
-		i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
-		vec = base->tv5.vec + i;
+		if (expires >= WHEEL_TIMEOUT_CUTOFF)
+			expires = WHEEL_TIMEOUT_MAX;
+
+		idx = calc_index(expires, LVL_DEPTH - 1);
 	}
+	return idx;
+}
 
-	hlist_add_head(&timer->entry, vec);
+/*
+ * Enqueue the timer into the hash bucket, mark it pending in
+ * the bitmap and store the index in the timer flags.
+ */
+static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
+			  unsigned int idx)
+{
+	hlist_add_head(&timer->entry, base->vectors + idx);
+	__set_bit(idx, base->pending_map);
+	timer_set_idx(timer, idx);
 }
 
-static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+static void
+__internal_add_timer(struct timer_base *base, struct timer_list *timer)
 {
-	/* Advance base->jiffies, if the base is empty */
-	if (!base->all_timers++)
-		base->timer_jiffies = jiffies;
+	unsigned int idx;
+
+	idx = calc_wheel_index(timer->expires, base->clk);
+	enqueue_timer(base, timer, idx);
+}
+
+static void
+trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
+{
+	if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+		return;
 
-	__internal_add_timer(base, timer);
 	/*
-	 * Update base->active_timers and base->next_timer
+	 * TODO: This wants some optimizing similar to the code below, but we
+	 * will do that when we switch from push to pull for deferrable timers.
 	 */
-	if (!(timer->flags & TIMER_DEFERRABLE)) {
-		if (!base->active_timers++ ||
-		    time_before(timer->expires, base->next_timer))
-			base->next_timer = timer->expires;
+	if (timer->flags & TIMER_DEFERRABLE) {
+		if (tick_nohz_full_cpu(base->cpu))
+			wake_up_nohz_cpu(base->cpu);
+		return;
 	}
 
 	/*
-	 * Check whether the other CPU is in dynticks mode and needs
-	 * to be triggered to reevaluate the timer wheel.
-	 * We are protected against the other CPU fiddling
-	 * with the timer by holding the timer base lock. This also
-	 * makes sure that a CPU on the way to stop its tick can not
-	 * evaluate the timer wheel.
-	 *
-	 * Spare the IPI for deferrable timers on idle targets though.
-	 * The next busy ticks will take care of it. Except full dynticks
-	 * require special care against races with idle_cpu(), lets deal
-	 * with that later.
+	 * We might have to IPI the remote CPU if the base is idle and the
+	 * timer is not deferrable. If the other CPU is on the way to idle
+	 * then it can't set base->is_idle as we hold the base lock:
 	 */
-	if (base->nohz_active) {
-		if (!(timer->flags & TIMER_DEFERRABLE) ||
-		    tick_nohz_full_cpu(base->cpu))
-			wake_up_nohz_cpu(base->cpu);
-	}
+	if (!base->is_idle)
+		return;
+
+	/* Check whether this is the new first expiring timer: */
+	if (time_after_eq(timer->expires, base->next_expiry))
+		return;
+
+	/*
+	 * Set the next expiry time and kick the CPU so it can reevaluate the
+	 * wheel:
+	 */
+	base->next_expiry = timer->expires;
+		wake_up_nohz_cpu(base->cpu);
+}
+
+static void
+internal_add_timer(struct timer_base *base, struct timer_list *timer)
+{
+	__internal_add_timer(base, timer);
+	trigger_dyntick_cpu(base, timer);
 }
 
 #ifdef CONFIG_TIMER_STATS
@@ -666,7 +789,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
 {
 	timer->entry.pprev = NULL;
 	timer->flags = flags | raw_smp_processor_id();
-	timer->slack = -1;
 #ifdef CONFIG_TIMER_STATS
 	timer->start_site = NULL;
 	timer->start_pid = -1;
@@ -706,54 +828,125 @@ static inline void detach_timer(struct timer_list *timer, bool clear_pending)
 	entry->next = LIST_POISON2;
 }
 
-static inline void
-detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
-{
-	detach_timer(timer, true);
-	if (!(timer->flags & TIMER_DEFERRABLE))
-		base->active_timers--;
-	base->all_timers--;
-}
-
-static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
+static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
 			     bool clear_pending)
 {
+	unsigned idx = timer_get_idx(timer);
+
 	if (!timer_pending(timer))
 		return 0;
 
+	if (hlist_is_singular_node(&timer->entry, base->vectors + idx))
+		__clear_bit(idx, base->pending_map);
+
 	detach_timer(timer, clear_pending);
-	if (!(timer->flags & TIMER_DEFERRABLE)) {
-		base->active_timers--;
-		if (timer->expires == base->next_timer)
-			base->next_timer = base->timer_jiffies;
-	}
-	/* If this was the last timer, advance base->jiffies */
-	if (!--base->all_timers)
-		base->timer_jiffies = jiffies;
 	return 1;
 }
 
+static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
+{
+	struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
+
+	/*
+	 * If the timer is deferrable and nohz is active then we need to use
+	 * the deferrable base.
+	 */
+	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
+	    (tflags & TIMER_DEFERRABLE))
+		base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
+	return base;
+}
+
+static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
+{
+	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+
+	/*
+	 * If the timer is deferrable and nohz is active then we need to use
+	 * the deferrable base.
+	 */
+	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
+	    (tflags & TIMER_DEFERRABLE))
+		base = this_cpu_ptr(&timer_bases[BASE_DEF]);
+	return base;
+}
+
+static inline struct timer_base *get_timer_base(u32 tflags)
+{
+	return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
+}
+
+#ifdef CONFIG_NO_HZ_COMMON
+static inline struct timer_base *
+__get_target_base(struct timer_base *base, unsigned tflags)
+{
+#ifdef CONFIG_SMP
+	if ((tflags & TIMER_PINNED) || !base->migration_enabled)
+		return get_timer_this_cpu_base(tflags);
+	return get_timer_cpu_base(tflags, get_nohz_timer_target());
+#else
+	return get_timer_this_cpu_base(tflags);
+#endif
+}
+
+static inline void forward_timer_base(struct timer_base *base)
+{
+	/*
+	 * We only forward the base when it's idle and we have a delta between
+	 * base clock and jiffies.
+	 */
+	if (!base->is_idle || (long) (jiffies - base->clk) < 2)
+		return;
+
+	/*
+	 * If the next expiry value is > jiffies, then we fast forward to
+	 * jiffies otherwise we forward to the next expiry value.
+	 */
+	if (time_after(base->next_expiry, jiffies))
+		base->clk = jiffies;
+	else
+		base->clk = base->next_expiry;
+}
+#else
+static inline struct timer_base *
+__get_target_base(struct timer_base *base, unsigned tflags)
+{
+	return get_timer_this_cpu_base(tflags);
+}
+
+static inline void forward_timer_base(struct timer_base *base) { }
+#endif
+
+static inline struct timer_base *
+get_target_base(struct timer_base *base, unsigned tflags)
+{
+	struct timer_base *target = __get_target_base(base, tflags);
+
+	forward_timer_base(target);
+	return target;
+}
+
 /*
- * We are using hashed locking: holding per_cpu(tvec_bases).lock
- * means that all timers which are tied to this base via timer->base are
- * locked, and the base itself is locked too.
+ * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
+ * that all timers which are tied to this base are locked, and the base itself
+ * is locked too.
  *
  * So __run_timers/migrate_timers can safely modify all timers which could
- * be found on ->tvX lists.
+ * be found in the base->vectors array.
  *
- * When the timer's base is locked and removed from the list, the
- * TIMER_MIGRATING flag is set, FIXME
+ * When a timer is migrating then the TIMER_MIGRATING flag is set and we need
+ * to wait until the migration is done.
  */
-static struct tvec_base *lock_timer_base(struct timer_list *timer,
-					unsigned long *flags)
+static struct timer_base *lock_timer_base(struct timer_list *timer,
+					  unsigned long *flags)
 	__acquires(timer->base->lock)
 {
 	for (;;) {
+		struct timer_base *base;
 		u32 tf = timer->flags;
-		struct tvec_base *base;
 
 		if (!(tf & TIMER_MIGRATING)) {
-			base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
+			base = get_timer_base(tf);
 			spin_lock_irqsave(&base->lock, *flags);
 			if (timer->flags == tf)
 				return base;
@@ -764,13 +957,41 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
 }
 
 static inline int
-__mod_timer(struct timer_list *timer, unsigned long expires,
-	    bool pending_only, int pinned)
+__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 {
-	struct tvec_base *base, *new_base;
-	unsigned long flags;
+	struct timer_base *base, *new_base;
+	unsigned int idx = UINT_MAX;
+	unsigned long clk = 0, flags;
 	int ret = 0;
 
+	/*
+	 * This is a common optimization triggered by the networking code - if
+	 * the timer is re-modified to have the same timeout or ends up in the
+	 * same array bucket then just return:
+	 */
+	if (timer_pending(timer)) {
+		if (timer->expires == expires)
+			return 1;
+		/*
+		 * Take the current timer_jiffies of base, but without holding
+		 * the lock!
+		 */
+		base = get_timer_base(timer->flags);
+		clk = base->clk;
+
+		idx = calc_wheel_index(expires, clk);
+
+		/*
+		 * Retrieve and compare the array index of the pending
+		 * timer. If it matches set the expiry to the new value so a
+		 * subsequent call will exit in the expires check above.
+		 */
+		if (idx == timer_get_idx(timer)) {
+			timer->expires = expires;
+			return 1;
+		}
+	}
+
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(!timer->function);
 
@@ -782,15 +1003,15 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 
 	debug_activate(timer, expires);
 
-	new_base = get_target_base(base, pinned);
+	new_base = get_target_base(base, timer->flags);
 
 	if (base != new_base) {
 		/*
-		 * We are trying to schedule the timer on the local CPU.
+		 * We are trying to schedule the timer on the new base.
 		 * However we can't change timer's base while it is running,
 		 * otherwise del_timer_sync() can't detect that the timer's
-		 * handler yet has not finished. This also guarantees that
-		 * the timer is serialized wrt itself.
+		 * handler yet has not finished. This also guarantees that the
+		 * timer is serialized wrt itself.
 		 */
 		if (likely(base->running_timer != timer)) {
 			/* See the comment in lock_timer_base() */
@@ -805,7 +1026,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 	}
 
 	timer->expires = expires;
-	internal_add_timer(base, timer);
+	/*
+	 * If 'idx' was calculated above and the base time did not advance
+	 * between calculating 'idx' and taking the lock, only enqueue_timer()
+	 * and trigger_dyntick_cpu() is required. Otherwise we need to
+	 * (re)calculate the wheel index via internal_add_timer().
+	 */
+	if (idx != UINT_MAX && clk == base->clk) {
+		enqueue_timer(base, timer, idx);
+		trigger_dyntick_cpu(base, timer);
+	} else {
+		internal_add_timer(base, timer);
+	}
 
 out_unlock:
 	spin_unlock_irqrestore(&base->lock, flags);
@@ -825,49 +1057,10 @@ out_unlock:
  */
 int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 {
-	return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
+	return __mod_timer(timer, expires, true);
 }
 EXPORT_SYMBOL(mod_timer_pending);
 
-/*
- * Decide where to put the timer while taking the slack into account
- *
- * Algorithm:
- *   1) calculate the maximum (absolute) time
- *   2) calculate the highest bit where the expires and new max are different
- *   3) use this bit to make a mask
- *   4) use the bitmask to round down the maximum time, so that all last
- *      bits are zeros
- */
-static inline
-unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
-{
-	unsigned long expires_limit, mask;
-	int bit;
-
-	if (timer->slack >= 0) {
-		expires_limit = expires + timer->slack;
-	} else {
-		long delta = expires - jiffies;
-
-		if (delta < 256)
-			return expires;
-
-		expires_limit = expires + delta / 256;
-	}
-	mask = expires ^ expires_limit;
-	if (mask == 0)
-		return expires;
-
-	bit = __fls(mask);
-
-	mask = (1UL << bit) - 1;
-
-	expires_limit = expires_limit & ~(mask);
-
-	return expires_limit;
-}
-
 /**
  * mod_timer - modify a timer's timeout
  * @timer: the timer to be modified
@@ -890,48 +1083,10 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
  */
 int mod_timer(struct timer_list *timer, unsigned long expires)
 {
-	expires = apply_slack(timer, expires);
-
-	/*
-	 * This is a common optimization triggered by the
-	 * networking code - if the timer is re-modified
-	 * to be the same thing then just return:
-	 */
-	if (timer_pending(timer) && timer->expires == expires)
-		return 1;
-
-	return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
+	return __mod_timer(timer, expires, false);
 }
 EXPORT_SYMBOL(mod_timer);
 
-/**
- * mod_timer_pinned - modify a timer's timeout
- * @timer: the timer to be modified
- * @expires: new timeout in jiffies
- *
- * mod_timer_pinned() is a way to update the expire field of an
- * active timer (if the timer is inactive it will be activated)
- * and to ensure that the timer is scheduled on the current CPU.
- *
- * Note that this does not prevent the timer from being migrated
- * when the current CPU goes offline.  If this is a problem for
- * you, use CPU-hotplug notifiers to handle it correctly, for
- * example, cancelling the timer when the corresponding CPU goes
- * offline.
- *
- * mod_timer_pinned(timer, expires) is equivalent to:
- *
- *     del_timer(timer); timer->expires = expires; add_timer(timer);
- */
-int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
-{
-	if (timer->expires == expires && timer_pending(timer))
-		return 1;
-
-	return __mod_timer(timer, expires, false, TIMER_PINNED);
-}
-EXPORT_SYMBOL(mod_timer_pinned);
-
 /**
  * add_timer - start a timer
  * @timer: the timer to be added
@@ -962,13 +1117,14 @@ EXPORT_SYMBOL(add_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-	struct tvec_base *new_base = per_cpu_ptr(&tvec_bases, cpu);
-	struct tvec_base *base;
+	struct timer_base *new_base, *base;
 	unsigned long flags;
 
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(timer_pending(timer) || !timer->function);
 
+	new_base = get_timer_cpu_base(timer->flags, cpu);
+
 	/*
 	 * If @timer was on a different CPU, it should be migrated with the
 	 * old base locked to prevent other operations proceeding with the
@@ -1004,7 +1160,7 @@ EXPORT_SYMBOL_GPL(add_timer_on);
  */
 int del_timer(struct timer_list *timer)
 {
-	struct tvec_base *base;
+	struct timer_base *base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -1030,7 +1186,7 @@ EXPORT_SYMBOL(del_timer);
  */
 int try_to_del_timer_sync(struct timer_list *timer)
 {
-	struct tvec_base *base;
+	struct timer_base *base;
 	unsigned long flags;
 	int ret = -1;
 
@@ -1114,27 +1270,6 @@ int del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
-static int cascade(struct tvec_base *base, struct tvec *tv, int index)
-{
-	/* cascade all the timers from tv up one level */
-	struct timer_list *timer;
-	struct hlist_node *tmp;
-	struct hlist_head tv_list;
-
-	hlist_move_list(tv->vec + index, &tv_list);
-
-	/*
-	 * We are removing _all_ timers from the list, so we
-	 * don't have to detach them individually.
-	 */
-	hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) {
-		/* No accounting, while moving them */
-		__internal_add_timer(base, timer);
-	}
-
-	return index;
-}
-
 static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
 			  unsigned long data)
 {
@@ -1178,147 +1313,141 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
 	}
 }
 
-#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
-
-/**
- * __run_timers - run all expired timers (if any) on this CPU.
- * @base: the timer vector to be processed.
- *
- * This function cascades all vectors and executes all expired timer
- * vectors.
- */
-static inline void __run_timers(struct tvec_base *base)
+static void expire_timers(struct timer_base *base, struct hlist_head *head)
 {
-	struct timer_list *timer;
+	while (!hlist_empty(head)) {
+		struct timer_list *timer;
+		void (*fn)(unsigned long);
+		unsigned long data;
 
-	spin_lock_irq(&base->lock);
+		timer = hlist_entry(head->first, struct timer_list, entry);
+		timer_stats_account_timer(timer);
 
-	while (time_after_eq(jiffies, base->timer_jiffies)) {
-		struct hlist_head work_list;
-		struct hlist_head *head = &work_list;
-		int index;
+		base->running_timer = timer;
+		detach_timer(timer, true);
 
-		if (!base->all_timers) {
-			base->timer_jiffies = jiffies;
-			break;
+		fn = timer->function;
+		data = timer->data;
+
+		if (timer->flags & TIMER_IRQSAFE) {
+			spin_unlock(&base->lock);
+			call_timer_fn(timer, fn, data);
+			spin_lock(&base->lock);
+		} else {
+			spin_unlock_irq(&base->lock);
+			call_timer_fn(timer, fn, data);
+			spin_lock_irq(&base->lock);
 		}
+	}
+}
 
-		index = base->timer_jiffies & TVR_MASK;
+static int __collect_expired_timers(struct timer_base *base,
+				    struct hlist_head *heads)
+{
+	unsigned long clk = base->clk;
+	struct hlist_head *vec;
+	int i, levels = 0;
+	unsigned int idx;
 
-		/*
-		 * Cascade timers:
-		 */
-		if (!index &&
-			(!cascade(base, &base->tv2, INDEX(0))) &&
-				(!cascade(base, &base->tv3, INDEX(1))) &&
-					!cascade(base, &base->tv4, INDEX(2)))
-			cascade(base, &base->tv5, INDEX(3));
-		++base->timer_jiffies;
-		hlist_move_list(base->tv1.vec + index, head);
-		while (!hlist_empty(head)) {
-			void (*fn)(unsigned long);
-			unsigned long data;
-			bool irqsafe;
-
-			timer = hlist_entry(head->first, struct timer_list, entry);
-			fn = timer->function;
-			data = timer->data;
-			irqsafe = timer->flags & TIMER_IRQSAFE;
-
-			timer_stats_account_timer(timer);
-
-			base->running_timer = timer;
-			detach_expired_timer(timer, base);
-
-			if (irqsafe) {
-				spin_unlock(&base->lock);
-				call_timer_fn(timer, fn, data);
-				spin_lock(&base->lock);
-			} else {
-				spin_unlock_irq(&base->lock);
-				call_timer_fn(timer, fn, data);
-				spin_lock_irq(&base->lock);
-			}
+	for (i = 0; i < LVL_DEPTH; i++) {
+		idx = (clk & LVL_MASK) + i * LVL_SIZE;
+
+		if (__test_and_clear_bit(idx, base->pending_map)) {
+			vec = base->vectors + idx;
+			hlist_move_list(vec, heads++);
+			levels++;
 		}
+		/* Is it time to look at the next level? */
+		if (clk & LVL_CLK_MASK)
+			break;
+		/* Shift clock for the next level granularity */
+		clk >>= LVL_CLK_SHIFT;
 	}
-	base->running_timer = NULL;
-	spin_unlock_irq(&base->lock);
+	return levels;
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
 /*
- * Find out when the next timer event is due to happen. This
- * is used on S/390 to stop all activity when a CPU is idle.
- * This function needs to be called with interrupts disabled.
+ * Find the next pending bucket of a level. Search from level start (@offset)
+ * + @clk upwards and if nothing there, search from start of the level
+ * (@offset) up to @offset + clk.
  */
-static unsigned long __next_timer_interrupt(struct tvec_base *base)
-{
-	unsigned long timer_jiffies = base->timer_jiffies;
-	unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
-	int index, slot, array, found = 0;
-	struct timer_list *nte;
-	struct tvec *varray[4];
-
-	/* Look for timer events in tv1. */
-	index = slot = timer_jiffies & TVR_MASK;
-	do {
-		hlist_for_each_entry(nte, base->tv1.vec + slot, entry) {
-			if (nte->flags & TIMER_DEFERRABLE)
-				continue;
-
-			found = 1;
-			expires = nte->expires;
-			/* Look at the cascade bucket(s)? */
-			if (!index || slot < index)
-				goto cascade;
-			return expires;
+static int next_pending_bucket(struct timer_base *base, unsigned offset,
+			       unsigned clk)
+{
+	unsigned pos, start = offset + clk;
+	unsigned end = offset + LVL_SIZE;
+
+	pos = find_next_bit(base->pending_map, end, start);
+	if (pos < end)
+		return pos - start;
+
+	pos = find_next_bit(base->pending_map, start, offset);
+	return pos < start ? pos + LVL_SIZE - start : -1;
+}
+
+/*
+ * Search the first expiring timer in the various clock levels. Caller must
+ * hold base->lock.
+ */
+static unsigned long __next_timer_interrupt(struct timer_base *base)
+{
+	unsigned long clk, next, adj;
+	unsigned lvl, offset = 0;
+
+	next = base->clk + NEXT_TIMER_MAX_DELTA;
+	clk = base->clk;
+	for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
+		int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
+
+		if (pos >= 0) {
+			unsigned long tmp = clk + (unsigned long) pos;
+
+			tmp <<= LVL_SHIFT(lvl);
+			if (time_before(tmp, next))
+				next = tmp;
 		}
-		slot = (slot + 1) & TVR_MASK;
-	} while (slot != index);
-
-cascade:
-	/* Calculate the next cascade event */
-	if (index)
-		timer_jiffies += TVR_SIZE - index;
-	timer_jiffies >>= TVR_BITS;
-
-	/* Check tv2-tv5. */
-	varray[0] = &base->tv2;
-	varray[1] = &base->tv3;
-	varray[2] = &base->tv4;
-	varray[3] = &base->tv5;
-
-	for (array = 0; array < 4; array++) {
-		struct tvec *varp = varray[array];
-
-		index = slot = timer_jiffies & TVN_MASK;
-		do {
-			hlist_for_each_entry(nte, varp->vec + slot, entry) {
-				if (nte->flags & TIMER_DEFERRABLE)
-					continue;
-
-				found = 1;
-				if (time_before(nte->expires, expires))
-					expires = nte->expires;
-			}
-			/*
-			 * Do we still search for the first timer or are
-			 * we looking up the cascade buckets ?
-			 */
-			if (found) {
-				/* Look at the cascade bucket(s)? */
-				if (!index || slot < index)
-					break;
-				return expires;
-			}
-			slot = (slot + 1) & TVN_MASK;
-		} while (slot != index);
-
-		if (index)
-			timer_jiffies += TVN_SIZE - index;
-		timer_jiffies >>= TVN_BITS;
+		/*
+		 * Clock for the next level. If the current level clock lower
+		 * bits are zero, we look at the next level as is. If not we
+		 * need to advance it by one because that's going to be the
+		 * next expiring bucket in that level. base->clk is the next
+		 * expiring jiffie. So in case of:
+		 *
+		 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
+		 *  0    0    0    0    0    0
+		 *
+		 * we have to look at all levels @index 0. With
+		 *
+		 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
+		 *  0    0    0    0    0    2
+		 *
+		 * LVL0 has the next expiring bucket @index 2. The upper
+		 * levels have the next expiring bucket @index 1.
+		 *
+		 * In case that the propagation wraps the next level the same
+		 * rules apply:
+		 *
+		 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
+		 *  0    0    0    0    F    2
+		 *
+		 * So after looking at LVL0 we get:
+		 *
+		 * LVL5 LVL4 LVL3 LVL2 LVL1
+		 *  0    0    0    1    0
+		 *
+		 * So no propagation from LVL1 to LVL2 because that happened
+		 * with the add already, but then we need to propagate further
+		 * from LVL2 to LVL3.
+		 *
+		 * So the simple check whether the lower bits of the current
+		 * level are 0 or not is sufficient for all cases.
+		 */
+		adj = clk & LVL_CLK_MASK ? 1 : 0;
+		clk >>= LVL_CLK_SHIFT;
+		clk += adj;
 	}
-	return expires;
+	return next;
 }
 
 /*
@@ -1364,7 +1493,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
  */
 u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 {
-	struct tvec_base *base = this_cpu_ptr(&tvec_bases);
+	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 	u64 expires = KTIME_MAX;
 	unsigned long nextevt;
 
@@ -1376,19 +1505,80 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 		return expires;
 
 	spin_lock(&base->lock);
-	if (base->active_timers) {
-		if (time_before_eq(base->next_timer, base->timer_jiffies))
-			base->next_timer = __next_timer_interrupt(base);
-		nextevt = base->next_timer;
-		if (time_before_eq(nextevt, basej))
-			expires = basem;
-		else
-			expires = basem + (nextevt - basej) * TICK_NSEC;
+	nextevt = __next_timer_interrupt(base);
+	base->next_expiry = nextevt;
+	/*
+	 * We have a fresh next event. Check whether we can forward the base:
+	 */
+	if (time_after(nextevt, jiffies))
+		base->clk = jiffies;
+	else if (time_after(nextevt, base->clk))
+		base->clk = nextevt;
+
+	if (time_before_eq(nextevt, basej)) {
+		expires = basem;
+		base->is_idle = false;
+	} else {
+		expires = basem + (nextevt - basej) * TICK_NSEC;
+		/*
+		 * If we expect to sleep more than a tick, mark the base idle:
+		 */
+		if ((expires - basem) > TICK_NSEC)
+			base->is_idle = true;
 	}
 	spin_unlock(&base->lock);
 
 	return cmp_next_hrtimer_event(basem, expires);
 }
+
+/**
+ * timer_clear_idle - Clear the idle state of the timer base
+ *
+ * Called with interrupts disabled
+ */
+void timer_clear_idle(void)
+{
+	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+
+	/*
+	 * We do this unlocked. The worst outcome is a remote enqueue sending
+	 * a pointless IPI, but taking the lock would just make the window for
+	 * sending the IPI a few instructions smaller for the cost of taking
+	 * the lock in the exit from idle path.
+	 */
+	base->is_idle = false;
+}
+
+static int collect_expired_timers(struct timer_base *base,
+				  struct hlist_head *heads)
+{
+	/*
+	 * NOHZ optimization. After a long idle sleep we need to forward the
+	 * base to current jiffies. Avoid a loop by searching the bitfield for
+	 * the next expiring timer.
+	 */
+	if ((long)(jiffies - base->clk) > 2) {
+		unsigned long next = __next_timer_interrupt(base);
+
+		/*
+		 * If the next timer is ahead of time forward to current
+		 * jiffies, otherwise forward to the next expiry time:
+		 */
+		if (time_after(next, jiffies)) {
+			/* The call site will increment clock! */
+			base->clk = jiffies - 1;
+			return 0;
+		}
+		base->clk = next;
+	}
+	return __collect_expired_timers(base, heads);
+}
+#else
+static inline int collect_expired_timers(struct timer_base *base,
+					 struct hlist_head *heads)
+{
+	return __collect_expired_timers(base, heads);
+}
 #endif
 
 /*
@@ -1411,15 +1601,42 @@ void update_process_times(int user_tick)
 	run_posix_cpu_timers(p);
 }
 
+/**
+ * __run_timers - run all expired timers (if any) on this CPU.
+ * @base: the timer vector to be processed.
+ */
+static inline void __run_timers(struct timer_base *base)
+{
+	struct hlist_head heads[LVL_DEPTH];
+	int levels;
+
+	if (!time_after_eq(jiffies, base->clk))
+		return;
+
+	spin_lock_irq(&base->lock);
+
+	while (time_after_eq(jiffies, base->clk)) {
+
+		levels = collect_expired_timers(base, heads);
+		base->clk++;
+
+		while (levels--)
+			expire_timers(base, heads + levels);
+	}
+	base->running_timer = NULL;
+	spin_unlock_irq(&base->lock);
+}
+
 /*
  * This function runs timers and the timer-tq in bottom half context.
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-	struct tvec_base *base = this_cpu_ptr(&tvec_bases);
+	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
-	if (time_after_eq(jiffies, base->timer_jiffies))
-		__run_timers(base);
+	__run_timers(base);
+	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
+		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
 }
 
 /*
@@ -1427,7 +1644,18 @@ static void run_timer_softirq(struct softirq_action *h)
  */
 void run_local_timers(void)
 {
+	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+
 	hrtimer_run_queues();
+	/* Raise the softirq only if required. */
+	if (time_before(jiffies, base->clk)) {
+		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+			return;
+		/* CPU is awake, so check the deferrable base. */
+		base++;
+		if (time_before(jiffies, base->clk))
+			return;
+	}
 	raise_softirq(TIMER_SOFTIRQ);
 }
 
@@ -1512,7 +1740,7 @@ signed long __sched schedule_timeout(signed long timeout)
 	expire = timeout + jiffies;
 
 	setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
-	__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
+	__mod_timer(&timer, expire, false);
 	schedule();
 	del_singleshot_timer_sync(&timer);
 
@@ -1563,14 +1791,13 @@ signed long __sched schedule_timeout_idle(signed long timeout)
 EXPORT_SYMBOL(schedule_timeout_idle);
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
+static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
 {
 	struct timer_list *timer;
 	int cpu = new_base->cpu;
 
 	while (!hlist_empty(head)) {
 		timer = hlist_entry(head->first, struct timer_list, entry);
-		/* We ignore the accounting on the dying cpu */
 		detach_timer(timer, false);
 		timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
 		internal_add_timer(new_base, timer);
@@ -1579,37 +1806,31 @@ static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *he
 
 static void migrate_timers(int cpu)
 {
-	struct tvec_base *old_base;
-	struct tvec_base *new_base;
-	int i;
+	struct timer_base *old_base;
+	struct timer_base *new_base;
+	int b, i;
 
 	BUG_ON(cpu_online(cpu));
-	old_base = per_cpu_ptr(&tvec_bases, cpu);
-	new_base = get_cpu_ptr(&tvec_bases);
-	/*
-	 * The caller is globally serialized and nobody else
-	 * takes two locks at once, deadlock is not possible.
-	 */
-	spin_lock_irq(&new_base->lock);
-	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
-
-	BUG_ON(old_base->running_timer);
-
-	for (i = 0; i < TVR_SIZE; i++)
-		migrate_timer_list(new_base, old_base->tv1.vec + i);
-	for (i = 0; i < TVN_SIZE; i++) {
-		migrate_timer_list(new_base, old_base->tv2.vec + i);
-		migrate_timer_list(new_base, old_base->tv3.vec + i);
-		migrate_timer_list(new_base, old_base->tv4.vec + i);
-		migrate_timer_list(new_base, old_base->tv5.vec + i);
-	}
 
-	old_base->active_timers = 0;
-	old_base->all_timers = 0;
+	for (b = 0; b < NR_BASES; b++) {
+		old_base = per_cpu_ptr(&timer_bases[b], cpu);
+		new_base = get_cpu_ptr(&timer_bases[b]);
+		/*
+		 * The caller is globally serialized and nobody else
+		 * takes two locks at once, deadlock is not possible.
+		 */
+		spin_lock_irq(&new_base->lock);
+		spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+
+		BUG_ON(old_base->running_timer);
 
-	spin_unlock(&old_base->lock);
-	spin_unlock_irq(&new_base->lock);
-	put_cpu_ptr(&tvec_bases);
+		for (i = 0; i < WHEEL_SIZE; i++)
+			migrate_timer_list(new_base, old_base->vectors + i);
+
+		spin_unlock(&old_base->lock);
+		spin_unlock_irq(&new_base->lock);
+		put_cpu_ptr(&timer_bases);
+	}
 }
 
 static int timer_cpu_notify(struct notifier_block *self,
@@ -1637,13 +1858,15 @@ static inline void timer_register_cpu_notifier(void) { }
 
 static void __init init_timer_cpu(int cpu)
 {
-	struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
-
-	base->cpu = cpu;
-	spin_lock_init(&base->lock);
+	struct timer_base *base;
+	int i;
 
-	base->timer_jiffies = jiffies;
-	base->next_timer = base->timer_jiffies;
+	for (i = 0; i < NR_BASES; i++) {
+		base = per_cpu_ptr(&timer_bases[i], cpu);
+		base->cpu = cpu;
+		spin_lock_init(&base->lock);
+		base->clk = jiffies;
+	}
 }
 
 static void __init init_timer_cpus(void)
@@ -1702,9 +1925,15 @@ static void __sched do_usleep_range(unsigned long min, unsigned long max)
 }
 
 /**
- * usleep_range - Drop in replacement for udelay where wakeup is flexible
+ * usleep_range - Sleep for an approximate time
  * @min: Minimum time in usecs to sleep
  * @max: Maximum time in usecs to sleep
+ *
+ * In non-atomic context where the exact wakeup time is flexible, use
+ * usleep_range() instead of udelay().  The sleep improves responsiveness
+ * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
+ * power usage by allowing hrtimers to take advantage of an already-
+ * scheduled interrupt instead of scheduling a new one just for this sleep.
  */
 void __sched usleep_range(unsigned long min, unsigned long max)
 {
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 1adecb4b87c8..087204c733eb 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -279,7 +279,7 @@ static void print_name_offset(struct seq_file *m, unsigned long addr)
 
 static int tstats_show(struct seq_file *m, void *v)
 {
-	struct timespec period;
+	struct timespec64 period;
 	struct entry *entry;
 	unsigned long ms;
 	long events = 0;
@@ -295,11 +295,11 @@ static int tstats_show(struct seq_file *m, void *v)
 
 	time = ktime_sub(time_stop, time_start);
 
-	period = ktime_to_timespec(time);
+	period = ktime_to_timespec64(time);
 	ms = period.tv_nsec / 1000000;
 
 	seq_puts(m, "Timer Stats Version: v0.3\n");
-	seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
+	seq_printf(m, "Sample period: %ld.%03ld s\n", (long)period.tv_sec, ms);
 	if (atomic_read(&overflow_count))
 		seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count));
 	seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive");
diff --git a/kernel/torture.c b/kernel/torture.c
index fa0bdeee17ac..75961b3decfe 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -81,6 +81,104 @@ static unsigned long sum_online;
 static int min_online = -1;
 static int max_online;
 
+/*
+ * Attempt to take a CPU offline.  Return false if the CPU is already
+ * offline or if it is not subject to CPU-hotplug operations.  The
+ * caller can detect other failures by looking at the statistics.
+ */
+bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
+		     unsigned long *sum_offl, int *min_offl, int *max_offl)
+{
+	unsigned long delta;
+	int ret;
+	unsigned long starttime;
+
+	if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
+		return false;
+
+	if (verbose)
+		pr_alert("%s" TORTURE_FLAG
+			 "torture_onoff task: offlining %d\n",
+			 torture_type, cpu);
+	starttime = jiffies;
+	(*n_offl_attempts)++;
+	ret = cpu_down(cpu);
+	if (ret) {
+		if (verbose)
+			pr_alert("%s" TORTURE_FLAG
+				 "torture_onoff task: offline %d failed: errno %d\n",
+				 torture_type, cpu, ret);
+	} else {
+		if (verbose)
+			pr_alert("%s" TORTURE_FLAG
+				 "torture_onoff task: offlined %d\n",
+				 torture_type, cpu);
+		(*n_offl_successes)++;
+		delta = jiffies - starttime;
+		sum_offl += delta;
+		if (*min_offl < 0) {
+			*min_offl = delta;
+			*max_offl = delta;
+		}
+		if (*min_offl > delta)
+			*min_offl = delta;
+		if (*max_offl < delta)
+			*max_offl = delta;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(torture_offline);
+
+/*
+ * Attempt to bring a CPU online.  Return false if the CPU is already
+ * online or if it is not subject to CPU-hotplug operations.  The
+ * caller can detect other failures by looking at the statistics.
+ */
+bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
+		    unsigned long *sum_onl, int *min_onl, int *max_onl)
+{
+	unsigned long delta;
+	int ret;
+	unsigned long starttime;
+
+	if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
+		return false;
+
+	if (verbose)
+		pr_alert("%s" TORTURE_FLAG
+			 "torture_onoff task: onlining %d\n",
+			 torture_type, cpu);
+	starttime = jiffies;
+	(*n_onl_attempts)++;
+	ret = cpu_up(cpu);
+	if (ret) {
+		if (verbose)
+			pr_alert("%s" TORTURE_FLAG
+				 "torture_onoff task: online %d failed: errno %d\n",
+				 torture_type, cpu, ret);
+	} else {
+		if (verbose)
+			pr_alert("%s" TORTURE_FLAG
+				 "torture_onoff task: onlined %d\n",
+				 torture_type, cpu);
+		(*n_onl_successes)++;
+		delta = jiffies - starttime;
+		*sum_onl += delta;
+		if (*min_onl < 0) {
+			*min_onl = delta;
+			*max_onl = delta;
+		}
+		if (*min_onl > delta)
+			*min_onl = delta;
+		if (*max_onl < delta)
+			*max_onl = delta;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(torture_online);
+
 /*
  * Execute random CPU-hotplug operations at the interval specified
  * by the onoff_interval.
@@ -89,16 +187,19 @@ static int
 torture_onoff(void *arg)
 {
 	int cpu;
-	unsigned long delta;
 	int maxcpu = -1;
 	DEFINE_TORTURE_RANDOM(rand);
-	int ret;
-	unsigned long starttime;
 
 	VERBOSE_TOROUT_STRING("torture_onoff task started");
 	for_each_online_cpu(cpu)
 		maxcpu = cpu;
 	WARN_ON(maxcpu < 0);
+
+	if (maxcpu == 0) {
+		VERBOSE_TOROUT_STRING("Only one CPU, so CPU-hotplug testing is disabled");
+		goto stop;
+	}
+
 	if (onoff_holdoff > 0) {
 		VERBOSE_TOROUT_STRING("torture_onoff begin holdoff");
 		schedule_timeout_interruptible(onoff_holdoff);
@@ -106,69 +207,16 @@ torture_onoff(void *arg)
 	}
 	while (!torture_must_stop()) {
 		cpu = (torture_random(&rand) >> 4) % (maxcpu + 1);
-		if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
-			if (verbose)
-				pr_alert("%s" TORTURE_FLAG
-					 "torture_onoff task: offlining %d\n",
-					 torture_type, cpu);
-			starttime = jiffies;
-			n_offline_attempts++;
-			ret = cpu_down(cpu);
-			if (ret) {
-				if (verbose)
-					pr_alert("%s" TORTURE_FLAG
-						 "torture_onoff task: offline %d failed: errno %d\n",
-						 torture_type, cpu, ret);
-			} else {
-				if (verbose)
-					pr_alert("%s" TORTURE_FLAG
-						 "torture_onoff task: offlined %d\n",
-						 torture_type, cpu);
-				n_offline_successes++;
-				delta = jiffies - starttime;
-				sum_offline += delta;
-				if (min_offline < 0) {
-					min_offline = delta;
-					max_offline = delta;
-				}
-				if (min_offline > delta)
-					min_offline = delta;
-				if (max_offline < delta)
-					max_offline = delta;
-			}
-		} else if (cpu_is_hotpluggable(cpu)) {
-			if (verbose)
-				pr_alert("%s" TORTURE_FLAG
-					 "torture_onoff task: onlining %d\n",
-					 torture_type, cpu);
-			starttime = jiffies;
-			n_online_attempts++;
-			ret = cpu_up(cpu);
-			if (ret) {
-				if (verbose)
-					pr_alert("%s" TORTURE_FLAG
-						 "torture_onoff task: online %d failed: errno %d\n",
-						 torture_type, cpu, ret);
-			} else {
-				if (verbose)
-					pr_alert("%s" TORTURE_FLAG
-						 "torture_onoff task: onlined %d\n",
-						 torture_type, cpu);
-				n_online_successes++;
-				delta = jiffies - starttime;
-				sum_online += delta;
-				if (min_online < 0) {
-					min_online = delta;
-					max_online = delta;
-				}
-				if (min_online > delta)
-					min_online = delta;
-				if (max_online < delta)
-					max_online = delta;
-			}
-		}
+		if (!torture_offline(cpu,
+				     &n_offline_attempts, &n_offline_successes,
+				     &sum_offline, &min_offline, &max_offline))
+			torture_online(cpu,
+				       &n_online_attempts, &n_online_successes,
+				       &sum_online, &min_online, &max_online);
 		schedule_timeout_interruptible(onoff_interval);
 	}
+
+stop:
 	torture_kthread_stopping("torture_onoff");
 	return 0;
 }
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 9aef8654e90d..fb345cd11883 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -127,12 +127,13 @@ static void trace_note_tsk(struct task_struct *tsk)
 
 static void trace_note_time(struct blk_trace *bt)
 {
-	struct timespec now;
+	struct timespec64 now;
 	unsigned long flags;
 	u32 words[2];
 
-	getnstimeofday(&now);
-	words[0] = now.tv_sec;
+	/* need to check user space to see if this breaks in y2038 or y2106 */
+	ktime_get_real_ts64(&now);
+	words[0] = (u32)now.tv_sec;
 	words[1] = now.tv_nsec;
 
 	local_irq_save(flags);
@@ -189,6 +190,7 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
 				 BLK_TC_ACT(BLK_TC_WRITE) };
 
 #define BLK_TC_RAHEAD		BLK_TC_AHEAD
+#define BLK_TC_PREFLUSH		BLK_TC_FLUSH
 
 /* The ilog2() calls fall out because they're constant */
 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
@@ -199,7 +201,8 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
  * blk_io_trace structure and places it in a per-cpu subbuffer.
  */
 static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
-		     int rw, u32 what, int error, int pdu_len, void *pdu_data)
+		     int op, int op_flags, u32 what, int error, int pdu_len,
+		     void *pdu_data)
 {
 	struct task_struct *tsk = current;
 	struct ring_buffer_event *event = NULL;
@@ -214,13 +217,16 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 	if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
 		return;
 
-	what |= ddir_act[rw & WRITE];
-	what |= MASK_TC_BIT(rw, SYNC);
-	what |= MASK_TC_BIT(rw, RAHEAD);
-	what |= MASK_TC_BIT(rw, META);
-	what |= MASK_TC_BIT(rw, DISCARD);
-	what |= MASK_TC_BIT(rw, FLUSH);
-	what |= MASK_TC_BIT(rw, FUA);
+	what |= ddir_act[op_is_write(op) ? WRITE : READ];
+	what |= MASK_TC_BIT(op_flags, SYNC);
+	what |= MASK_TC_BIT(op_flags, RAHEAD);
+	what |= MASK_TC_BIT(op_flags, META);
+	what |= MASK_TC_BIT(op_flags, PREFLUSH);
+	what |= MASK_TC_BIT(op_flags, FUA);
+	if (op == REQ_OP_DISCARD)
+		what |= BLK_TC_ACT(BLK_TC_DISCARD);
+	if (op == REQ_OP_FLUSH)
+		what |= BLK_TC_ACT(BLK_TC_FLUSH);
 
 	pid = tsk->pid;
 	if (act_log_check(bt, what, sector, pid))
@@ -708,11 +714,11 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags,
+		__blk_add_trace(bt, 0, nr_bytes, req_op(rq), rq->cmd_flags,
 				what, rq->errors, rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, blk_rq_pos(rq), nr_bytes,
+		__blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, req_op(rq),
 				rq->cmd_flags, what, rq->errors, 0, NULL);
 	}
 }
@@ -770,7 +776,7 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
 		return;
 
 	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-			bio->bi_rw, what, error, 0, NULL);
+			bio_op(bio), bio->bi_rw, what, error, 0, NULL);
 }
 
 static void blk_add_trace_bio_bounce(void *ignore,
@@ -818,7 +824,8 @@ static void blk_add_trace_getrq(void *ignore,
 		struct blk_trace *bt = q->blk_trace;
 
 		if (bt)
-			__blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
+			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
+					NULL);
 	}
 }
 
@@ -833,7 +840,7 @@ static void blk_add_trace_sleeprq(void *ignore,
 		struct blk_trace *bt = q->blk_trace;
 
 		if (bt)
-			__blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
+			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
 					0, 0, NULL);
 	}
 }
@@ -843,7 +850,7 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q)
 	struct blk_trace *bt = q->blk_trace;
 
 	if (bt)
-		__blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
+		__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
 }
 
 static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
@@ -860,7 +867,7 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
 		else
 			what = BLK_TA_UNPLUG_TIMER;
 
-		__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
+		__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
 	}
 }
 
@@ -874,8 +881,9 @@ static void blk_add_trace_split(void *ignore,
 		__be64 rpdu = cpu_to_be64(pdu);
 
 		__blk_add_trace(bt, bio->bi_iter.bi_sector,
-				bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT,
-				bio->bi_error, sizeof(rpdu), &rpdu);
+				bio->bi_iter.bi_size, bio_op(bio), bio->bi_rw,
+				BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
+				&rpdu);
 	}
 }
 
@@ -907,7 +915,7 @@ static void blk_add_trace_bio_remap(void *ignore,
 	r.sector_from = cpu_to_be64(from);
 
 	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-			bio->bi_rw, BLK_TA_REMAP, bio->bi_error,
+			bio_op(bio), bio->bi_rw, BLK_TA_REMAP, bio->bi_error,
 			sizeof(r), &r);
 }
 
@@ -940,7 +948,7 @@ static void blk_add_trace_rq_remap(void *ignore,
 	r.sector_from = cpu_to_be64(from);
 
 	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
-			rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
+			rq_data_dir(rq), 0, BLK_TA_REMAP, !!rq->errors,
 			sizeof(r), &r);
 }
 
@@ -965,10 +973,10 @@ void blk_add_driver_data(struct request_queue *q,
 		return;
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
-		__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
+		__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, 0,
 				BLK_TA_DRV_DATA, rq->errors, len, data);
 	else
-		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, 0,
 				BLK_TA_DRV_DATA, rq->errors, len, data);
 }
 EXPORT_SYMBOL_GPL(blk_add_driver_data);
@@ -1769,21 +1777,34 @@ void blk_dump_cmd(char *buf, struct request *rq)
 	}
 }
 
-void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
+void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes)
 {
 	int i = 0;
 
-	if (rw & REQ_FLUSH)
+	if (rw & REQ_PREFLUSH)
 		rwbs[i++] = 'F';
 
-	if (rw & WRITE)
+	switch (op) {
+	case REQ_OP_WRITE:
+	case REQ_OP_WRITE_SAME:
 		rwbs[i++] = 'W';
-	else if (rw & REQ_DISCARD)
+		break;
+	case REQ_OP_DISCARD:
+		rwbs[i++] = 'D';
+		break;
+	case REQ_OP_SECURE_ERASE:
 		rwbs[i++] = 'D';
-	else if (bytes)
+		rwbs[i++] = 'E';
+		break;
+	case REQ_OP_FLUSH:
+		rwbs[i++] = 'F';
+		break;
+	case REQ_OP_READ:
 		rwbs[i++] = 'R';
-	else
+		break;
+	default:
 		rwbs[i++] = 'N';
+	}
 
 	if (rw & REQ_FUA)
 		rwbs[i++] = 'F';
@@ -1793,8 +1814,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
 		rwbs[i++] = 'S';
 	if (rw & REQ_META)
 		rwbs[i++] = 'M';
-	if (rw & REQ_SECURE)
-		rwbs[i++] = 'E';
 
 	rwbs[i] = '\0';
 }
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 720b7bb01d43..b20438fdb029 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -81,6 +81,49 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	void *unsafe_ptr = (void *) (long) r1;
+	void *src = (void *) (long) r2;
+	int size = (int) r3;
+
+	/*
+	 * Ensure we're in user context which is safe for the helper to
+	 * run. This helper has no business in a kthread.
+	 *
+	 * access_ok() should prevent writing to non-user memory, but in
+	 * some situations (nommu, temporary switch, etc) access_ok() does
+	 * not provide enough validation, hence the check on KERNEL_DS.
+	 */
+
+	if (unlikely(in_interrupt() ||
+		     current->flags & (PF_KTHREAD | PF_EXITING)))
+		return -EPERM;
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
+		return -EPERM;
+	if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
+		return -EPERM;
+
+	return probe_kernel_write(unsafe_ptr, src, size);
+}
+
+static const struct bpf_func_proto bpf_probe_write_user_proto = {
+	.func		= bpf_probe_write_user,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_PTR_TO_STACK,
+	.arg3_type	= ARG_CONST_STACK_SIZE,
+};
+
+static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
+{
+	pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
+			    current->comm, task_pid_nr(current));
+
+	return &bpf_probe_write_user_proto;
+}
+
 /*
  * limited trace_printk()
  * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
@@ -188,25 +231,33 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	return &bpf_trace_printk_proto;
 }
 
-static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
+static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
 {
 	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	unsigned int cpu = smp_processor_id();
+	u64 index = flags & BPF_F_INDEX_MASK;
+	struct bpf_event_entry *ee;
 	struct perf_event *event;
-	struct file *file;
 
+	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+		return -EINVAL;
+	if (index == BPF_F_CURRENT_CPU)
+		index = cpu;
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
-	file = READ_ONCE(array->ptrs[index]);
-	if (unlikely(!file))
+	ee = READ_ONCE(array->ptrs[index]);
+	if (!ee)
 		return -ENOENT;
 
-	event = file->private_data;
+	event = ee->event;
+	if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
+		     event->attr.type != PERF_TYPE_RAW))
+		return -EINVAL;
 
 	/* make sure event is local and doesn't have pmu::count */
-	if (event->oncpu != smp_processor_id() ||
-	    event->pmu->count)
+	if (unlikely(event->oncpu != cpu || event->pmu->count))
 		return -EINVAL;
 
 	/*
@@ -225,47 +276,58 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+static __always_inline u64
+__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
+			u64 flags, struct perf_raw_record *raw)
 {
-	struct pt_regs *regs = (struct pt_regs *) (long) r1;
-	struct bpf_map *map = (struct bpf_map *) (long) r2;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	unsigned int cpu = smp_processor_id();
 	u64 index = flags & BPF_F_INDEX_MASK;
-	void *data = (void *) (long) r4;
 	struct perf_sample_data sample_data;
+	struct bpf_event_entry *ee;
 	struct perf_event *event;
-	struct file *file;
-	struct perf_raw_record raw = {
-		.size = size,
-		.data = data,
-	};
 
-	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
-		return -EINVAL;
 	if (index == BPF_F_CURRENT_CPU)
-		index = raw_smp_processor_id();
+		index = cpu;
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
-	file = READ_ONCE(array->ptrs[index]);
-	if (unlikely(!file))
+	ee = READ_ONCE(array->ptrs[index]);
+	if (!ee)
 		return -ENOENT;
 
-	event = file->private_data;
-
+	event = ee->event;
 	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
 		return -EINVAL;
 
-	if (unlikely(event->oncpu != smp_processor_id()))
+	if (unlikely(event->oncpu != cpu))
 		return -EOPNOTSUPP;
 
 	perf_sample_data_init(&sample_data, 0, 0);
-	sample_data.raw = &raw;
+	sample_data.raw = raw;
 	perf_event_output(event, &sample_data, regs);
 	return 0;
 }
 
+static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+{
+	struct pt_regs *regs = (struct pt_regs *)(long) r1;
+	struct bpf_map *map  = (struct bpf_map *)(long) r2;
+	void *data = (void *)(long) r4;
+	struct perf_raw_record raw = {
+		.frag = {
+			.size = size,
+			.data = data,
+		},
+	};
+
+	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+		return -EINVAL;
+
+	return __bpf_perf_event_output(regs, map, flags, &raw);
+}
+
 static const struct bpf_func_proto bpf_perf_event_output_proto = {
 	.func		= bpf_perf_event_output,
 	.gpl_only	= true,
@@ -279,31 +341,41 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
 
 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
 
-static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
 {
 	struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
+	struct perf_raw_frag frag = {
+		.copy		= ctx_copy,
+		.size		= ctx_size,
+		.data		= ctx,
+	};
+	struct perf_raw_record raw = {
+		.frag = {
+			{
+				.next	= ctx_size ? &frag : NULL,
+			},
+			.size	= meta_size,
+			.data	= meta,
+		},
+	};
 
 	perf_fetch_caller_regs(regs);
 
-	return bpf_perf_event_output((long)regs, r2, flags, r4, size);
+	return __bpf_perf_event_output(regs, map, flags, &raw);
 }
 
-static const struct bpf_func_proto bpf_event_output_proto = {
-	.func		= bpf_event_output,
+static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	return (long) current;
+}
+
+static const struct bpf_func_proto bpf_get_current_task_proto = {
+	.func		= bpf_get_current_task,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_CONST_MAP_PTR,
-	.arg3_type	= ARG_ANYTHING,
-	.arg4_type	= ARG_PTR_TO_STACK,
-	.arg5_type	= ARG_CONST_STACK_SIZE,
 };
 
-const struct bpf_func_proto *bpf_get_event_output_proto(void)
-{
-	return &bpf_event_output_proto;
-}
-
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -321,6 +393,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_current_pid_tgid:
 		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_get_current_task:
+		return &bpf_get_current_task_proto;
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
 	case BPF_FUNC_get_current_comm:
@@ -331,6 +405,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_perf_event_read:
 		return &bpf_perf_event_read_proto;
+	case BPF_FUNC_probe_write_user:
+		return bpf_get_probe_write_proto();
 	default:
 		return NULL;
 	}
@@ -349,20 +425,15 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 }
 
 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
-static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+					enum bpf_reg_type *reg_type)
 {
-	/* check bounds */
 	if (off < 0 || off >= sizeof(struct pt_regs))
 		return false;
-
-	/* only read is allowed */
 	if (type != BPF_READ)
 		return false;
-
-	/* disallow misaligned access */
 	if (off % size != 0)
 		return false;
-
 	return true;
 }
 
@@ -427,7 +498,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				    enum bpf_reg_type *reg_type)
 {
 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
 		return false;
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index f96f0383f6c6..ad1d6164e946 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -36,6 +36,10 @@ struct trace_bprintk_fmt {
 static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
 {
 	struct trace_bprintk_fmt *pos;
+
+	if (!fmt)
+		return ERR_PTR(-EINVAL);
+
 	list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
 		if (!strcmp(pos->fmt, fmt))
 			return pos;
@@ -57,7 +61,8 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
 	for (iter = start; iter < end; iter++) {
 		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
 		if (tb_fmt) {
-			*iter = tb_fmt->fmt;
+			if (!IS_ERR(tb_fmt))
+				*iter = tb_fmt->fmt;
 			continue;
 		}
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e1c0e996b5ae..d12bd958077e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4369,8 +4369,8 @@ static void show_pwq(struct pool_workqueue *pwq)
 /**
  * show_workqueue_state - dump workqueue state
  *
- * Called from a sysrq handler and prints out all busy workqueues and
- * pools.
+ * Called from a sysrq handler or try_to_freeze_tasks() and prints out
+ * all busy workqueues and pools.
  */
 void show_workqueue_state(void)
 {
@@ -4600,15 +4600,11 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
 	if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
 		return;
 
-	/* is @cpu the only online CPU? */
 	cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
-	if (cpumask_weight(&cpumask) != 1)
-		return;
 
 	/* as we're called from CPU_ONLINE, the following shouldn't fail */
 	for_each_pool_worker(worker, pool)
-		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
-						  pool->attrs->cpumask) < 0);
+		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
 }
 
 /*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b9cfdbfae9aa..f07842e2d69f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -244,6 +244,7 @@ config PAGE_OWNER
 	depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
 	select DEBUG_FS
 	select STACKTRACE
+	select STACKDEPOT
 	select PAGE_EXTENSION
 	help
 	  This keeps track of what call chain is the owner of a page, may
@@ -1307,22 +1308,6 @@ config RCU_PERF_TEST
 	  Say M if you want the RCU performance tests to build as a module.
 	  Say N if you are unsure.
 
-config RCU_PERF_TEST_RUNNABLE
-	bool "performance tests for RCU runnable by default"
-	depends on RCU_PERF_TEST = y
-	default n
-	help
-	  This option provides a way to build the RCU performance tests
-	  directly into the kernel without them starting up at boot time.
-	  You can use /sys/module to manually override this setting.
-	  This /proc file is available only when the RCU performance
-	  tests have been built into the kernel.
-
-	  Say Y here if you want the RCU performance tests to start during
-	  boot (you probably don't).
-	  Say N here if you want the RCU performance tests to start only
-	  after being manually enabled via /sys/module.
-
 config RCU_TORTURE_TEST
 	tristate "torture tests for RCU"
 	depends on DEBUG_KERNEL
@@ -1340,23 +1325,6 @@ config RCU_TORTURE_TEST
 	  Say M if you want the RCU torture tests to build as a module.
 	  Say N if you are unsure.
 
-config RCU_TORTURE_TEST_RUNNABLE
-	bool "torture tests for RCU runnable by default"
-	depends on RCU_TORTURE_TEST = y
-	default n
-	help
-	  This option provides a way to build the RCU torture tests
-	  directly into the kernel without them starting up at boot
-	  time.  You can use /proc/sys/kernel/rcutorture_runnable
-	  to manually override this setting.  This /proc file is
-	  available only when the RCU torture tests have been built
-	  into the kernel.
-
-	  Say Y here if you want the RCU torture tests to start during
-	  boot (you probably don't).
-	  Say N here if you want the RCU torture tests to start only
-	  after being manually enabled via /proc.
-
 config RCU_TORTURE_TEST_SLOW_PREINIT
 	bool "Slow down RCU grace-period pre-initialization to expose races"
 	depends on RCU_TORTURE_TEST
diff --git a/lib/Makefile b/lib/Makefile
index ff6a7a6c6395..07d06a8b9788 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
 KCOV_INSTRUMENT_list_debug.o := n
 KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
@@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 2886ebac6567..53c2d5edc826 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -96,17 +96,41 @@ long long atomic64_##op##_return(long long a, atomic64_t *v)		\
 }									\
 EXPORT_SYMBOL(atomic64_##op##_return);
 
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+long long atomic64_fetch_##op(long long a, atomic64_t *v)		\
+{									\
+	unsigned long flags;						\
+	raw_spinlock_t *lock = lock_addr(v);				\
+	long long val;							\
+									\
+	raw_spin_lock_irqsave(lock, flags);				\
+	val = v->counter;						\
+	v->counter c_op a;						\
+	raw_spin_unlock_irqrestore(lock, flags);			\
+	return val;							\
+}									\
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
 #define ATOMIC64_OPS(op, c_op)						\
 	ATOMIC64_OP(op, c_op)						\
-	ATOMIC64_OP_RETURN(op, c_op)
+	ATOMIC64_OP_RETURN(op, c_op)					\
+	ATOMIC64_FETCH_OP(op, c_op)
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_OP_RETURN(op, c_op)					\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 123481814320..dbb369145dda 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -53,11 +53,25 @@ do {								\
 	BUG_ON(atomic##bit##_read(&v) != r);			\
 } while (0)
 
+#define TEST_FETCH(bit, op, c_op, val)				\
+do {								\
+	atomic##bit##_set(&v, v0);				\
+	r = v0;							\
+	r c_op val;						\
+	BUG_ON(atomic##bit##_##op(val, &v) != v0);		\
+	BUG_ON(atomic##bit##_read(&v) != r);			\
+} while (0)
+
 #define RETURN_FAMILY_TEST(bit, op, c_op, val)			\
 do {								\
 	FAMILY_TEST(TEST_RETURN, bit, op, c_op, val);		\
 } while (0)
 
+#define FETCH_FAMILY_TEST(bit, op, c_op, val)			\
+do {								\
+	FAMILY_TEST(TEST_FETCH, bit, op, c_op, val);		\
+} while (0)
+
 #define TEST_ARGS(bit, op, init, ret, expect, args...)		\
 do {								\
 	atomic##bit##_set(&v, init);				\
@@ -114,6 +128,16 @@ static __init void test_atomic(void)
 	RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
 	RETURN_FAMILY_TEST(, sub_return, -=, -one);
 
+	FETCH_FAMILY_TEST(, fetch_add, +=, onestwos);
+	FETCH_FAMILY_TEST(, fetch_add, +=, -one);
+	FETCH_FAMILY_TEST(, fetch_sub, -=, onestwos);
+	FETCH_FAMILY_TEST(, fetch_sub, -=, -one);
+
+	FETCH_FAMILY_TEST(, fetch_or,  |=, v1);
+	FETCH_FAMILY_TEST(, fetch_and, &=, v1);
+	FETCH_FAMILY_TEST(, fetch_andnot, &= ~, v1);
+	FETCH_FAMILY_TEST(, fetch_xor, ^=, v1);
+
 	INC_RETURN_FAMILY_TEST(, v0);
 	DEC_RETURN_FAMILY_TEST(, v0);
 
@@ -154,6 +178,16 @@ static __init void test_atomic64(void)
 	RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
 	RETURN_FAMILY_TEST(64, sub_return, -=, -one);
 
+	FETCH_FAMILY_TEST(64, fetch_add, +=, onestwos);
+	FETCH_FAMILY_TEST(64, fetch_add, +=, -one);
+	FETCH_FAMILY_TEST(64, fetch_sub, -=, onestwos);
+	FETCH_FAMILY_TEST(64, fetch_sub, -=, -one);
+
+	FETCH_FAMILY_TEST(64, fetch_or,  |=, v1);
+	FETCH_FAMILY_TEST(64, fetch_and, &=, v1);
+	FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, v1);
+	FETCH_FAMILY_TEST(64, fetch_xor, ^=, v1);
+
 	INIT(v0);
 	atomic64_inc(&v);
 	r += one;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c66da508cbf7..eca88087fa8a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -14,9 +14,9 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
-#include <asm/uaccess.h>
 
 /*
  * bitmaps provide an array of bits, implemented using an an
diff --git a/lib/digsig.c b/lib/digsig.c
index 07be6c1ef4e2..55b8b2f41a9e 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -104,21 +104,25 @@ static int digsig_verify_rsa(struct key *key,
 	datap = pkh->mpi;
 	endp = ukp->data + ukp->datalen;
 
-	err = -ENOMEM;
-
 	for (i = 0; i < pkh->nmpi; i++) {
 		unsigned int remaining = endp - datap;
 		pkey[i] = mpi_read_from_buffer(datap, &remaining);
-		if (!pkey[i])
+		if (IS_ERR(pkey[i])) {
+			err = PTR_ERR(pkey[i]);
 			goto err;
+		}
 		datap += remaining;
 	}
 
 	mblen = mpi_get_nbits(pkey[0]);
 	mlen = DIV_ROUND_UP(mblen, 8);
 
-	if (mlen == 0)
+	if (mlen == 0) {
+		err = -EINVAL;
 		goto err;
+	}
+
+	err = -ENOMEM;
 
 	out1 = kzalloc(mlen, GFP_KERNEL);
 	if (!out1)
@@ -126,8 +130,10 @@ static int digsig_verify_rsa(struct key *key,
 
 	nret = siglen;
 	in = mpi_read_from_buffer(sig, &nret);
-	if (!in)
+	if (IS_ERR(in)) {
+		err = PTR_ERR(in);
 		goto err;
+	}
 
 	res = mpi_alloc(mpi_get_nlimbs(in) * 2);
 	if (!res)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 51a76af25c66..fcfa1939ac41 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -253,6 +253,7 @@ static int hash_fn(struct dma_debug_entry *entry)
  */
 static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
 					   unsigned long *flags)
+	__acquires(&dma_entry_hash[idx].lock)
 {
 	int idx = hash_fn(entry);
 	unsigned long __flags;
@@ -267,6 +268,7 @@ static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
  */
 static void put_hash_bucket(struct hash_bucket *bucket,
 			    unsigned long *flags)
+	__releases(&bucket->lock)
 {
 	unsigned long __flags = *flags;
 
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221558..43273a7d83cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
+#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned int __sw_hweight32(unsigned int w)
 {
 #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight32);
+#endif
 
 unsigned int __sw_hweight16(unsigned int w)
 {
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
 }
 EXPORT_SYMBOL(__sw_hweight8);
 
+#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight64);
+#endif
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0cd522753ff5..d67c8288d95d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -56,37 +56,24 @@
 	n = wanted;					\
 }
 
-#define iterate_bvec(i, n, __v, __p, skip, STEP) {	\
-	size_t wanted = n;				\
-	__p = i->bvec;					\
-	__v.bv_len = min_t(size_t, n, __p->bv_len - skip);	\
-	if (likely(__v.bv_len)) {			\
-		__v.bv_page = __p->bv_page;		\
-		__v.bv_offset = __p->bv_offset + skip; 	\
-		(void)(STEP);				\
-		skip += __v.bv_len;			\
-		n -= __v.bv_len;			\
-	}						\
-	while (unlikely(n)) {				\
-		__p++;					\
-		__v.bv_len = min_t(size_t, n, __p->bv_len);	\
-		if (unlikely(!__v.bv_len))		\
+#define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
+	struct bvec_iter __start;			\
+	__start.bi_size = n;				\
+	__start.bi_bvec_done = skip;			\
+	__start.bi_idx = 0;				\
+	for_each_bvec(__v, i->bvec, __bi, __start) {	\
+		if (!__v.bv_len)			\
 			continue;			\
-		__v.bv_page = __p->bv_page;		\
-		__v.bv_offset = __p->bv_offset;		\
 		(void)(STEP);				\
-		skip = __v.bv_len;			\
-		n -= __v.bv_len;			\
 	}						\
-	n = wanted;					\
 }
 
 #define iterate_all_kinds(i, n, v, I, B, K) {			\
 	size_t skip = i->iov_offset;				\
 	if (unlikely(i->type & ITER_BVEC)) {			\
-		const struct bio_vec *bvec;			\
 		struct bio_vec v;				\
-		iterate_bvec(i, n, v, bvec, skip, (B))		\
+		struct bvec_iter __bi;				\
+		iterate_bvec(i, n, v, __bi, skip, (B))		\
 	} else if (unlikely(i->type & ITER_KVEC)) {		\
 		const struct kvec *kvec;			\
 		struct kvec v;					\
@@ -104,15 +91,13 @@
 	if (i->count) {						\
 		size_t skip = i->iov_offset;			\
 		if (unlikely(i->type & ITER_BVEC)) {		\
-			const struct bio_vec *bvec;		\
+			const struct bio_vec *bvec = i->bvec;	\
 			struct bio_vec v;			\
-			iterate_bvec(i, n, v, bvec, skip, (B))	\
-			if (skip == bvec->bv_len) {		\
-				bvec++;				\
-				skip = 0;			\
-			}					\
-			i->nr_segs -= bvec - i->bvec;		\
-			i->bvec = bvec;				\
+			struct bvec_iter __bi;			\
+			iterate_bvec(i, n, v, __bi, skip, (B))	\
+			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
+			i->nr_segs -= i->bvec - bvec;		\
+			skip = __bi.bi_bvec_done;		\
 		} else if (unlikely(i->type & ITER_KVEC)) {	\
 			const struct kvec *kvec;		\
 			struct kvec v;				\
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 747606f9e4a3..c6272ae2015e 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/count_zeros.h>
 #include <linux/byteorder/generic.h>
+#include <linux/scatterlist.h>
 #include <linux/string.h>
 #include "mpi-internal.h"
 
@@ -50,9 +51,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes)
 		return NULL;
 	}
 	if (nbytes > 0)
-		nbits -= count_leading_zeros(buffer[0]);
-	else
-		nbits = 0;
+		nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8);
 
 	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
 	val = mpi_alloc(nlimbs);
@@ -82,50 +81,30 @@ EXPORT_SYMBOL_GPL(mpi_read_raw_data);
 MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 {
 	const uint8_t *buffer = xbuffer;
-	int i, j;
-	unsigned nbits, nbytes, nlimbs, nread = 0;
-	mpi_limb_t a;
-	MPI val = NULL;
+	unsigned int nbits, nbytes;
+	MPI val;
 
 	if (*ret_nread < 2)
-		goto leave;
+		return ERR_PTR(-EINVAL);
 	nbits = buffer[0] << 8 | buffer[1];
 
 	if (nbits > MAX_EXTERN_MPI_BITS) {
 		pr_info("MPI: mpi too large (%u bits)\n", nbits);
-		goto leave;
+		return ERR_PTR(-EINVAL);
 	}
-	buffer += 2;
-	nread = 2;
 
 	nbytes = DIV_ROUND_UP(nbits, 8);
-	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
-	val = mpi_alloc(nlimbs);
-	if (!val)
-		return NULL;
-	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
-	i %= BYTES_PER_MPI_LIMB;
-	val->nbits = nbits;
-	j = val->nlimbs = nlimbs;
-	val->sign = 0;
-	for (; j > 0; j--) {
-		a = 0;
-		for (; i < BYTES_PER_MPI_LIMB; i++) {
-			if (++nread > *ret_nread) {
-				printk
-				    ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
-				     nread, *ret_nread);
-				goto leave;
-			}
-			a <<= 8;
-			a |= *buffer++;
-		}
-		i = 0;
-		val->d[j - 1] = a;
+	if (nbytes + 2 > *ret_nread) {
+		pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n",
+				nbytes, *ret_nread);
+		return ERR_PTR(-EINVAL);
 	}
 
-leave:
-	*ret_nread = nread;
+	val = mpi_read_raw_data(buffer + 2, nbytes);
+	if (!val)
+		return ERR_PTR(-ENOMEM);
+
+	*ret_nread = nbytes + 2;
 	return val;
 }
 EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
@@ -250,82 +229,6 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
 }
 EXPORT_SYMBOL_GPL(mpi_get_buffer);
 
-/****************
- * Use BUFFER to update MPI.
- */
-int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
-{
-	const uint8_t *buffer = xbuffer, *p;
-	mpi_limb_t alimb;
-	int nlimbs;
-	int i;
-
-	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
-	if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
-		return -ENOMEM;
-	a->sign = sign;
-
-	for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
-#if BYTES_PER_MPI_LIMB == 4
-		alimb = (mpi_limb_t) *p--;
-		alimb |= (mpi_limb_t) *p-- << 8;
-		alimb |= (mpi_limb_t) *p-- << 16;
-		alimb |= (mpi_limb_t) *p-- << 24;
-#elif BYTES_PER_MPI_LIMB == 8
-		alimb = (mpi_limb_t) *p--;
-		alimb |= (mpi_limb_t) *p-- << 8;
-		alimb |= (mpi_limb_t) *p-- << 16;
-		alimb |= (mpi_limb_t) *p-- << 24;
-		alimb |= (mpi_limb_t) *p-- << 32;
-		alimb |= (mpi_limb_t) *p-- << 40;
-		alimb |= (mpi_limb_t) *p-- << 48;
-		alimb |= (mpi_limb_t) *p-- << 56;
-#else
-#error please implement for this limb size.
-#endif
-		a->d[i++] = alimb;
-	}
-	if (p >= buffer) {
-#if BYTES_PER_MPI_LIMB == 4
-		alimb = *p--;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 8;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 16;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 24;
-#elif BYTES_PER_MPI_LIMB == 8
-		alimb = (mpi_limb_t) *p--;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 8;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 16;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 24;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 32;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 40;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 48;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 56;
-#else
-#error please implement for this limb size.
-#endif
-		a->d[i++] = alimb;
-	}
-	a->nlimbs = i;
-
-	if (i != nlimbs) {
-		pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
-		       nlimbs);
-		BUG();
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mpi_set_buffer);
-
 /**
  * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first)
  *
@@ -335,16 +238,13 @@ EXPORT_SYMBOL_GPL(mpi_set_buffer);
  * @a:		a multi precision integer
  * @sgl:	scatterlist to write to. Needs to be at least
  *		mpi_get_size(a) long.
- * @nbytes:	in/out param - it has the be set to the maximum number of
- *		bytes that can be written to sgl. This has to be at least
- *		the size of the integer a. On return it receives the actual
- *		length of the data written on success or the data that would
- *		be written if buffer was too small.
+ * @nbytes:	the number of bytes to write.  Leading bytes will be
+ *		filled with zero.
  * @sign:	if not NULL, it will be set to the sign of a.
  *
  * Return:	0 on success or error code in case of error
  */
-int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
+int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes,
 		     int *sign)
 {
 	u8 *p, *p2;
@@ -356,55 +256,60 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
 #error please implement for this limb size.
 #endif
 	unsigned int n = mpi_get_size(a);
-	int i, x, y = 0, lzeros, buf_len;
-
-	if (!nbytes)
-		return -EINVAL;
+	struct sg_mapping_iter miter;
+	int i, x, buf_len;
+	int nents;
 
 	if (sign)
 		*sign = a->sign;
 
-	lzeros = count_lzeros(a);
-
-	if (*nbytes < n - lzeros) {
-		*nbytes = n - lzeros;
+	if (nbytes < n)
 		return -EOVERFLOW;
-	}
 
-	*nbytes = n - lzeros;
-	buf_len = sgl->length;
-	p2 = sg_virt(sgl);
+	nents = sg_nents_for_len(sgl, nbytes);
+	if (nents < 0)
+		return -EINVAL;
 
-	for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
-			lzeros %= BYTES_PER_MPI_LIMB;
-		i >= 0; i--) {
+	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG);
+	sg_miter_next(&miter);
+	buf_len = miter.length;
+	p2 = miter.addr;
+
+	while (nbytes > n) {
+		i = min_t(unsigned, nbytes - n, buf_len);
+		memset(p2, 0, i);
+		p2 += i;
+		nbytes -= i;
+
+		buf_len -= i;
+		if (!buf_len) {
+			sg_miter_next(&miter);
+			buf_len = miter.length;
+			p2 = miter.addr;
+		}
+	}
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
 #if BYTES_PER_MPI_LIMB == 4
-		alimb = cpu_to_be32(a->d[i]);
+		alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0;
 #elif BYTES_PER_MPI_LIMB == 8
-		alimb = cpu_to_be64(a->d[i]);
+		alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0;
 #else
 #error please implement for this limb size.
 #endif
-		if (lzeros) {
-			y = lzeros;
-			lzeros = 0;
-		}
+		p = (u8 *)&alimb;
 
-		p = (u8 *)&alimb + y;
-
-		for (x = 0; x < sizeof(alimb) - y; x++) {
-			if (!buf_len) {
-				sgl = sg_next(sgl);
-				if (!sgl)
-					return -EINVAL;
-				buf_len = sgl->length;
-				p2 = sg_virt(sgl);
-			}
+		for (x = 0; x < sizeof(alimb); x++) {
 			*p2++ = *p++;
-			buf_len--;
+			if (!--buf_len) {
+				sg_miter_next(&miter);
+				buf_len = miter.length;
+				p2 = miter.addr;
+			}
 		}
-		y = 0;
 	}
+
+	sg_miter_stop(&miter);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
@@ -424,19 +329,23 @@ EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
  */
 MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 {
-	struct scatterlist *sg;
-	int x, i, j, z, lzeros, ents;
+	struct sg_mapping_iter miter;
 	unsigned int nbits, nlimbs;
+	int x, j, z, lzeros, ents;
+	unsigned int len;
+	const u8 *buff;
 	mpi_limb_t a;
 	MPI val = NULL;
 
-	lzeros = 0;
-	ents = sg_nents(sgl);
+	ents = sg_nents_for_len(sgl, nbytes);
+	if (ents < 0)
+		return NULL;
 
-	for_each_sg(sgl, sg, ents, i) {
-		const u8 *buff = sg_virt(sg);
-		int len = sg->length;
+	sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG);
 
+	lzeros = 0;
+	len = 0;
+	while (nbytes > 0) {
 		while (len && !*buff) {
 			lzeros++;
 			len--;
@@ -446,12 +355,14 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 		if (len && *buff)
 			break;
 
-		ents--;
+		sg_miter_next(&miter);
+		buff = miter.addr;
+		len = miter.length;
+
 		nbytes -= lzeros;
 		lzeros = 0;
 	}
 
-	sgl = sg;
 	nbytes -= lzeros;
 	nbits = nbytes * 8;
 	if (nbits > MAX_EXTERN_MPI_BITS) {
@@ -460,8 +371,7 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 	}
 
 	if (nbytes > 0)
-		nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros)) -
-			(BITS_PER_LONG - 8);
+		nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8);
 
 	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
 	val = mpi_alloc(nlimbs);
@@ -480,21 +390,24 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 	z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
 	z %= BYTES_PER_MPI_LIMB;
 
-	for_each_sg(sgl, sg, ents, i) {
-		const u8 *buffer = sg_virt(sg) + lzeros;
-		int len = sg->length - lzeros;
-
+	for (;;) {
 		for (x = 0; x < len; x++) {
 			a <<= 8;
-			a |= *buffer++;
+			a |= *buff++;
 			if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) {
 				val->d[j--] = a;
 				a = 0;
 			}
 		}
 		z += x;
-		lzeros = 0;
+
+		if (!sg_miter_next(&miter))
+			break;
+
+		buff = miter.addr;
+		len = miter.length;
 	}
+
 	return val;
 }
 EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8b7d8459bb9d..61b8fb529cef 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,6 +38,9 @@
 #include <linux/preempt.h>		/* in_interrupt() */
 
 
+/* Number of nodes in fully populated tree of given height */
+static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
+
 /*
  * Radix tree node cache.
  */
@@ -342,7 +345,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  * To make use of this facility, the radix tree must be initialised without
  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  */
-static int __radix_tree_preload(gfp_t gfp_mask)
+static int __radix_tree_preload(gfp_t gfp_mask, int nr)
 {
 	struct radix_tree_preload *rtp;
 	struct radix_tree_node *node;
@@ -350,14 +353,14 @@ static int __radix_tree_preload(gfp_t gfp_mask)
 
 	preempt_disable();
 	rtp = this_cpu_ptr(&radix_tree_preloads);
-	while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+	while (rtp->nr < nr) {
 		preempt_enable();
 		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 		if (node == NULL)
 			goto out;
 		preempt_disable();
 		rtp = this_cpu_ptr(&radix_tree_preloads);
-		if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+		if (rtp->nr < nr) {
 			node->private_data = rtp->nodes;
 			rtp->nodes = node;
 			rtp->nr++;
@@ -383,7 +386,7 @@ int radix_tree_preload(gfp_t gfp_mask)
 {
 	/* Warn on non-sensical use... */
 	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
-	return __radix_tree_preload(gfp_mask);
+	return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
 }
 EXPORT_SYMBOL(radix_tree_preload);
 
@@ -395,13 +398,58 @@ EXPORT_SYMBOL(radix_tree_preload);
 int radix_tree_maybe_preload(gfp_t gfp_mask)
 {
 	if (gfpflags_allow_blocking(gfp_mask))
-		return __radix_tree_preload(gfp_mask);
+		return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
 	/* Preloading doesn't help anything with this gfp mask, skip it */
 	preempt_disable();
 	return 0;
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
+/*
+ * The same as function above, but preload number of nodes required to insert
+ * (1 << order) continuous naturally-aligned elements.
+ */
+int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
+{
+	unsigned long nr_subtrees;
+	int nr_nodes, subtree_height;
+
+	/* Preloading doesn't help anything with this gfp mask, skip it */
+	if (!gfpflags_allow_blocking(gfp_mask)) {
+		preempt_disable();
+		return 0;
+	}
+
+	/*
+	 * Calculate number and height of fully populated subtrees it takes to
+	 * store (1 << order) elements.
+	 */
+	nr_subtrees = 1 << order;
+	for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
+			subtree_height++)
+		nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
+
+	/*
+	 * The worst case is zero height tree with a single item at index 0 and
+	 * then inserting items starting at ULONG_MAX - (1 << order).
+	 *
+	 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
+	 * 0-index item.
+	 */
+	nr_nodes = RADIX_TREE_MAX_PATH;
+
+	/* Plus branch to fully populated subtrees. */
+	nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
+
+	/* Root node is shared. */
+	nr_nodes--;
+
+	/* Plus nodes required to build subtrees. */
+	nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
+
+	return __radix_tree_preload(gfp_mask, nr_nodes);
+}
+
 /*
  * The maximum index which can be stored in a radix tree
  */
@@ -1571,6 +1619,31 @@ radix_tree_node_ctor(void *arg)
 	INIT_LIST_HEAD(&node->private_list);
 }
 
+static __init unsigned long __maxindex(unsigned int height)
+{
+	unsigned int width = height * RADIX_TREE_MAP_SHIFT;
+	int shift = RADIX_TREE_INDEX_BITS - width;
+
+	if (shift < 0)
+		return ~0UL;
+	if (shift >= BITS_PER_LONG)
+		return 0UL;
+	return ~0UL >> shift;
+}
+
+static __init void radix_tree_init_maxnodes(void)
+{
+	unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
+	unsigned int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+		height_to_maxindex[i] = __maxindex(i);
+	for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
+		for (j = i; j > 0; j--)
+			height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
+	}
+}
+
 static int radix_tree_callback(struct notifier_block *nfb,
 				unsigned long action, void *hcpu)
 {
@@ -1597,5 +1670,6 @@ void __init radix_tree_init(void)
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
 			radix_tree_node_ctor);
+	radix_tree_init_maxnodes();
 	hotcpu_notifier(radix_tree_callback, 0);
 }
diff --git a/lib/random32.c b/lib/random32.c
index 510d1ce7d4d2..69ed593aab07 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -233,7 +233,6 @@ static void __prandom_timer(unsigned long dontcare)
 
 static void __init __prandom_start_seed_timer(void)
 {
-	set_timer_slack(&seed_timer, HZ);
 	seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
 	add_timer(&seed_timer);
 }
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1356454e36de..eb8a19fee110 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -539,17 +539,39 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
 {
 	struct rb_node *parent = rb_parent(victim);
 
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+
 	/* Set the surrounding nodes to point to the replacement */
-	__rb_change_child(victim, new, parent, root);
 	if (victim->rb_left)
 		rb_set_parent(victim->rb_left, new);
 	if (victim->rb_right)
 		rb_set_parent(victim->rb_right, new);
+	__rb_change_child(victim, new, parent, root);
+}
+EXPORT_SYMBOL(rb_replace_node);
+
+void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+			 struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
 
 	/* Copy the pointers/colour from the victim to the replacement */
 	*new = *victim;
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Set the parent's pointer to the new node last after an RCU barrier
+	 * so that the pointers onwards are seen to be set correctly when doing
+	 * an RCU walk over the tree.
+	 */
+	__rb_change_child_rcu(victim, new, parent, root);
 }
-EXPORT_SYMBOL(rb_replace_node);
+EXPORT_SYMBOL(rb_replace_node_rcu);
 
 static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
 {
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e2daef3c946..3c81803b00a3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -439,6 +439,14 @@ choice
 	  benefit.
 endchoice
 
+#
+# We don't deposit page tables on file THP mapping,
+# but Power makes use of them to address MMU quirk.
+#
+config	TRANSPARENT_HUGE_PAGECACHE
+	def_bool y
+	depends on TRANSPARENT_HUGEPAGE && !PPC
+
 #
 # UP and nommu archs use km based percpu allocator
 #
diff --git a/mm/Makefile b/mm/Makefile
index 78c6f7dedb83..fc059666c760 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -74,7 +74,7 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_MEMTEST)		+= memtest.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
 obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
 obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
 obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 57b3e9bd6bc5..da91df50ba31 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -70,7 +70,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 		 */
 		if (trylock_page(page)) {
 #ifdef CONFIG_BALLOON_COMPACTION
-			if (!PagePrivate(page)) {
+			if (PageIsolated(page)) {
 				/* raced with isolation */
 				unlock_page(page);
 				continue;
@@ -106,110 +106,50 @@ EXPORT_SYMBOL_GPL(balloon_page_dequeue);
 
 #ifdef CONFIG_BALLOON_COMPACTION
 
-static inline void __isolate_balloon_page(struct page *page)
+bool balloon_page_isolate(struct page *page, isolate_mode_t mode)
+
 {
 	struct balloon_dev_info *b_dev_info = balloon_page_device(page);
 	unsigned long flags;
 
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
-	ClearPagePrivate(page);
 	list_del(&page->lru);
 	b_dev_info->isolated_pages++;
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+
+	return true;
 }
 
-static inline void __putback_balloon_page(struct page *page)
+void balloon_page_putback(struct page *page)
 {
 	struct balloon_dev_info *b_dev_info = balloon_page_device(page);
 	unsigned long flags;
 
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
-	SetPagePrivate(page);
 	list_add(&page->lru, &b_dev_info->pages);
 	b_dev_info->isolated_pages--;
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 }
 
-/* __isolate_lru_page() counterpart for a ballooned page */
-bool balloon_page_isolate(struct page *page)
-{
-	/*
-	 * Avoid burning cycles with pages that are yet under __free_pages(),
-	 * or just got freed under us.
-	 *
-	 * In case we 'win' a race for a balloon page being freed under us and
-	 * raise its refcount preventing __free_pages() from doing its job
-	 * the put_page() at the end of this block will take care of
-	 * release this page, thus avoiding a nasty leakage.
-	 */
-	if (likely(get_page_unless_zero(page))) {
-		/*
-		 * As balloon pages are not isolated from LRU lists, concurrent
-		 * compaction threads can race against page migration functions
-		 * as well as race against the balloon driver releasing a page.
-		 *
-		 * In order to avoid having an already isolated balloon page
-		 * being (wrongly) re-isolated while it is under migration,
-		 * or to avoid attempting to isolate pages being released by
-		 * the balloon driver, lets be sure we have the page lock
-		 * before proceeding with the balloon page isolation steps.
-		 */
-		if (likely(trylock_page(page))) {
-			/*
-			 * A ballooned page, by default, has PagePrivate set.
-			 * Prevent concurrent compaction threads from isolating
-			 * an already isolated balloon page by clearing it.
-			 */
-			if (balloon_page_movable(page)) {
-				__isolate_balloon_page(page);
-				unlock_page(page);
-				return true;
-			}
-			unlock_page(page);
-		}
-		put_page(page);
-	}
-	return false;
-}
-
-/* putback_lru_page() counterpart for a ballooned page */
-void balloon_page_putback(struct page *page)
-{
-	/*
-	 * 'lock_page()' stabilizes the page and prevents races against
-	 * concurrent isolation threads attempting to re-isolate it.
-	 */
-	lock_page(page);
-
-	if (__is_movable_balloon_page(page)) {
-		__putback_balloon_page(page);
-		/* drop the extra ref count taken for page isolation */
-		put_page(page);
-	} else {
-		WARN_ON(1);
-		dump_page(page, "not movable balloon page");
-	}
-	unlock_page(page);
-}
 
 /* move_to_new_page() counterpart for a ballooned page */
-int balloon_page_migrate(struct page *newpage,
-			 struct page *page, enum migrate_mode mode)
+int balloon_page_migrate(struct address_space *mapping,
+		struct page *newpage, struct page *page,
+		enum migrate_mode mode)
 {
 	struct balloon_dev_info *balloon = balloon_page_device(page);
-	int rc = -EAGAIN;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
 
-	if (WARN_ON(!__is_movable_balloon_page(page))) {
-		dump_page(page, "not movable balloon page");
-		return rc;
-	}
+	return balloon->migratepage(balloon, newpage, page, mode);
+}
 
-	if (balloon && balloon->migratepage)
-		rc = balloon->migratepage(balloon, newpage, page, mode);
+const struct address_space_operations balloon_aops = {
+	.migratepage = balloon_page_migrate,
+	.isolate_page = balloon_page_isolate,
+	.putback_page = balloon_page_putback,
+};
+EXPORT_SYMBOL_GPL(balloon_aops);
 
-	return rc;
-}
 #endif /* CONFIG_BALLOON_COMPACTION */
diff --git a/mm/compaction.c b/mm/compaction.c
index 1427366ad673..64df5fe052db 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -15,11 +15,11 @@
 #include <linux/backing-dev.h>
 #include <linux/sysctl.h>
 #include <linux/sysfs.h>
-#include <linux/balloon_compaction.h>
 #include <linux/page-isolation.h>
 #include <linux/kasan.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/page_owner.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -65,13 +65,27 @@ static unsigned long release_freepages(struct list_head *freelist)
 
 static void map_pages(struct list_head *list)
 {
-	struct page *page;
+	unsigned int i, order, nr_pages;
+	struct page *page, *next;
+	LIST_HEAD(tmp_list);
+
+	list_for_each_entry_safe(page, next, list, lru) {
+		list_del(&page->lru);
+
+		order = page_private(page);
+		nr_pages = 1 << order;
 
-	list_for_each_entry(page, list, lru) {
-		arch_alloc_page(page, 0);
-		kernel_map_pages(page, 1, 1);
-		kasan_alloc_pages(page, 0);
+		post_alloc_hook(page, order, __GFP_MOVABLE);
+		if (order)
+			split_page(page, order);
+
+		for (i = 0; i < nr_pages; i++) {
+			list_add(&page->lru, &tmp_list);
+			page++;
+		}
 	}
+
+	list_splice(&tmp_list, list);
 }
 
 static inline bool migrate_async_suitable(int migratetype)
@@ -81,6 +95,44 @@ static inline bool migrate_async_suitable(int migratetype)
 
 #ifdef CONFIG_COMPACTION
 
+int PageMovable(struct page *page)
+{
+	struct address_space *mapping;
+
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	if (!__PageMovable(page))
+		return 0;
+
+	mapping = page_mapping(page);
+	if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(PageMovable);
+
+void __SetPageMovable(struct page *page, struct address_space *mapping)
+{
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
+	page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
+}
+EXPORT_SYMBOL(__SetPageMovable);
+
+void __ClearPageMovable(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	VM_BUG_ON_PAGE(!PageMovable(page), page);
+	/*
+	 * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
+	 * flag so that VM can catch up released page by driver after isolation.
+	 * With it, VM migration doesn't try to put it back.
+	 */
+	page->mapping = (void *)((unsigned long)page->mapping &
+				PAGE_MAPPING_MOVABLE);
+}
+EXPORT_SYMBOL(__ClearPageMovable);
+
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
 
@@ -368,12 +420,13 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 	unsigned long flags = 0;
 	bool locked = false;
 	unsigned long blockpfn = *start_pfn;
+	unsigned int order;
 
 	cursor = pfn_to_page(blockpfn);
 
 	/* Isolate free pages. */
 	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
-		int isolated, i;
+		int isolated;
 		struct page *page = cursor;
 
 		/*
@@ -439,27 +492,25 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 				goto isolate_fail;
 		}
 
-		/* Found a free page, break it into order-0 pages */
-		isolated = split_free_page(page);
-		total_isolated += isolated;
-		for (i = 0; i < isolated; i++) {
-			list_add(&page->lru, freelist);
-			page++;
-		}
+		/* Found a free page, will break it into order-0 pages */
+		order = page_order(page);
+		isolated = __isolate_free_page(page, order);
+		if (!isolated)
+			break;
+		set_page_private(page, order);
 
-		/* If a page was split, advance to the end of it */
-		if (isolated) {
-			cc->nr_freepages += isolated;
-			if (!strict &&
-				cc->nr_migratepages <= cc->nr_freepages) {
-				blockpfn += isolated;
-				break;
-			}
+		total_isolated += isolated;
+		cc->nr_freepages += isolated;
+		list_add_tail(&page->lru, freelist);
 
-			blockpfn += isolated - 1;
-			cursor += isolated - 1;
-			continue;
+		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
+			blockpfn += isolated;
+			break;
 		}
+		/* Advance to the end of split page */
+		blockpfn += isolated - 1;
+		cursor += isolated - 1;
+		continue;
 
 isolate_fail:
 		if (strict)
@@ -469,6 +520,9 @@ isolate_fail:
 
 	}
 
+	if (locked)
+		spin_unlock_irqrestore(&cc->zone->lock, flags);
+
 	/*
 	 * There is a tiny chance that we have read bogus compound_order(),
 	 * so be careful to not go outside of the pageblock.
@@ -490,9 +544,6 @@ isolate_fail:
 	if (strict && blockpfn < end_pfn)
 		total_isolated = 0;
 
-	if (locked)
-		spin_unlock_irqrestore(&cc->zone->lock, flags);
-
 	/* Update the pageblock-skip if the whole pageblock was scanned */
 	if (blockpfn == end_pfn)
 		update_pageblock_skip(cc, valid_page, total_isolated, false);
@@ -570,7 +621,7 @@ isolate_freepages_range(struct compact_control *cc,
 		 */
 	}
 
-	/* split_free_page does not map the pages */
+	/* __isolate_free_page() does not map the pages */
 	map_pages(&freelist);
 
 	if (pfn < end_pfn) {
@@ -672,7 +723,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
 	/* Time to isolate some pages for migration */
 	for (; low_pfn < end_pfn; low_pfn++) {
-		bool is_lru;
 
 		if (skip_on_failure && low_pfn >= next_skip_pfn) {
 			/*
@@ -734,21 +784,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			continue;
 		}
 
-		/*
-		 * Check may be lockless but that's ok as we recheck later.
-		 * It's possible to migrate LRU pages and balloon pages
-		 * Skip any other type of page
-		 */
-		is_lru = PageLRU(page);
-		if (!is_lru) {
-			if (unlikely(balloon_page_movable(page))) {
-				if (balloon_page_isolate(page)) {
-					/* Successfully isolated */
-					goto isolate_success;
-				}
-			}
-		}
-
 		/*
 		 * Regardless of being on LRU, compound pages such as THP and
 		 * hugetlbfs are not to be compacted. We can potentially save
@@ -765,8 +800,30 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			goto isolate_fail;
 		}
 
-		if (!is_lru)
+		/*
+		 * Check may be lockless but that's ok as we recheck later.
+		 * It's possible to migrate LRU and non-lru movable pages.
+		 * Skip any other type of page
+		 */
+		if (!PageLRU(page)) {
+			/*
+			 * __PageMovable can return false positive so we need
+			 * to verify it under page_lock.
+			 */
+			if (unlikely(__PageMovable(page)) &&
+					!PageIsolated(page)) {
+				if (locked) {
+					spin_unlock_irqrestore(&zone->lru_lock,
+									flags);
+					locked = false;
+				}
+
+				if (isolate_movable_page(page, isolate_mode))
+					goto isolate_success;
+			}
+
 			goto isolate_fail;
+		}
 
 		/*
 		 * Migration will fail if an anonymous page is pinned in memory,
@@ -1011,7 +1068,6 @@ static void isolate_freepages(struct compact_control *cc)
 				block_end_pfn = block_start_pfn,
 				block_start_pfn -= pageblock_nr_pages,
 				isolate_start_pfn = block_start_pfn) {
-
 		/*
 		 * This can iterate a massively long zone without finding any
 		 * suitable migration targets, so periodically check if we need
@@ -1035,36 +1091,34 @@ static void isolate_freepages(struct compact_control *cc)
 			continue;
 
 		/* Found a block suitable for isolating free pages from. */
-		isolate_freepages_block(cc, &isolate_start_pfn,
-					block_end_pfn, freelist, false);
+		isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
+					freelist, false);
 
 		/*
-		 * If we isolated enough freepages, or aborted due to async
-		 * compaction being contended, terminate the loop.
-		 * Remember where the free scanner should restart next time,
-		 * which is where isolate_freepages_block() left off.
-		 * But if it scanned the whole pageblock, isolate_start_pfn
-		 * now points at block_end_pfn, which is the start of the next
-		 * pageblock.
-		 * In that case we will however want to restart at the start
-		 * of the previous pageblock.
+		 * If we isolated enough freepages, or aborted due to lock
+		 * contention, terminate.
 		 */
 		if ((cc->nr_freepages >= cc->nr_migratepages)
 							|| cc->contended) {
-			if (isolate_start_pfn >= block_end_pfn)
+			if (isolate_start_pfn >= block_end_pfn) {
+				/*
+				 * Restart at previous pageblock if more
+				 * freepages can be isolated next time.
+				 */
 				isolate_start_pfn =
 					block_start_pfn - pageblock_nr_pages;
+			}
 			break;
-		} else {
+		} else if (isolate_start_pfn < block_end_pfn) {
 			/*
-			 * isolate_freepages_block() should not terminate
-			 * prematurely unless contended, or isolated enough
+			 * If isolation failed early, do not continue
+			 * needlessly.
 			 */
-			VM_BUG_ON(isolate_start_pfn < block_end_pfn);
+			break;
 		}
 	}
 
-	/* split_free_page does not map the pages */
+	/* __isolate_free_page() does not map the pages */
 	map_pages(freelist);
 
 	/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878b2a38..e90c1543ec2d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -114,14 +114,14 @@ static void page_cache_tree_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
 	struct radix_tree_node *node;
+	int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
 
-	VM_BUG_ON(!PageLocked(page));
-
-	node = radix_tree_replace_clear_tags(&mapping->page_tree, page->index,
-								shadow);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	VM_BUG_ON_PAGE(PageTail(page), page);
+	VM_BUG_ON_PAGE(nr != 1 && shadow, page);
 
 	if (shadow) {
-		mapping->nrexceptional++;
+		mapping->nrexceptional += nr;
 		/*
 		 * Make sure the nrexceptional update is committed before
 		 * the nrpages update so that final truncate racing
@@ -130,31 +130,38 @@ static void page_cache_tree_delete(struct address_space *mapping,
 		 */
 		smp_wmb();
 	}
-	mapping->nrpages--;
+	mapping->nrpages -= nr;
 
-	if (!node)
-		return;
-
-	workingset_node_pages_dec(node);
-	if (shadow)
-		workingset_node_shadows_inc(node);
-	else
-		if (__radix_tree_delete_node(&mapping->page_tree, node))
+	for (i = 0; i < nr; i++) {
+		node = radix_tree_replace_clear_tags(&mapping->page_tree,
+				page->index + i, shadow);
+		if (!node) {
+			VM_BUG_ON_PAGE(nr != 1, page);
 			return;
+		}
 
-	/*
-	 * Track node that only contains shadow entries. DAX mappings contain
-	 * no shadow entries and may contain other exceptional entries so skip
-	 * those.
-	 *
-	 * Avoid acquiring the list_lru lock if already tracked.  The
-	 * list_empty() test is safe as node->private_list is
-	 * protected by mapping->tree_lock.
-	 */
-	if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
-	    list_empty(&node->private_list)) {
-		node->private_data = mapping;
-		list_lru_add(&workingset_shadow_nodes, &node->private_list);
+		workingset_node_pages_dec(node);
+		if (shadow)
+			workingset_node_shadows_inc(node);
+		else
+			if (__radix_tree_delete_node(&mapping->page_tree, node))
+				continue;
+
+		/*
+		 * Track node that only contains shadow entries. DAX mappings
+		 * contain no shadow entries and may contain other exceptional
+		 * entries so skip those.
+		 *
+		 * Avoid acquiring the list_lru lock if already tracked.
+		 * The list_empty() test is safe as node->private_list is
+		 * protected by mapping->tree_lock.
+		 */
+		if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
+				list_empty(&node->private_list)) {
+			node->private_data = mapping;
+			list_lru_add(&workingset_shadow_nodes,
+					&node->private_list);
+		}
 	}
 }
 
@@ -166,6 +173,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
 void __delete_from_page_cache(struct page *page, void *shadow)
 {
 	struct address_space *mapping = page->mapping;
+	int nr = hpage_nr_pages(page);
 
 	trace_mm_filemap_delete_from_page_cache(page);
 	/*
@@ -178,6 +186,7 @@ void __delete_from_page_cache(struct page *page, void *shadow)
 	else
 		cleancache_invalidate_page(mapping, page);
 
+	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(page_mapped(page), page);
 	if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
 		int mapcount;
@@ -209,9 +218,14 @@ void __delete_from_page_cache(struct page *page, void *shadow)
 
 	/* hugetlb pages do not participate in page cache accounting. */
 	if (!PageHuge(page))
-		__dec_zone_page_state(page, NR_FILE_PAGES);
-	if (PageSwapBacked(page))
-		__dec_zone_page_state(page, NR_SHMEM);
+		__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr);
+	if (PageSwapBacked(page)) {
+		__mod_zone_page_state(page_zone(page), NR_SHMEM, -nr);
+		if (PageTransHuge(page))
+			__dec_zone_page_state(page, NR_SHMEM_THPS);
+	} else {
+		VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
+	}
 
 	/*
 	 * At this point page must be either written or cleaned by truncate.
@@ -235,9 +249,8 @@ void __delete_from_page_cache(struct page *page, void *shadow)
  */
 void delete_from_page_cache(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space *mapping = page_mapping(page);
 	unsigned long flags;
-
 	void (*freepage)(struct page *);
 
 	BUG_ON(!PageLocked(page));
@@ -250,7 +263,13 @@ void delete_from_page_cache(struct page *page)
 
 	if (freepage)
 		freepage(page);
-	put_page(page);
+
+	if (PageTransHuge(page) && !PageHuge(page)) {
+		page_ref_sub(page, HPAGE_PMD_NR);
+		VM_BUG_ON_PAGE(page_count(page) <= 0, page);
+	} else {
+		put_page(page);
+	}
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
@@ -1053,7 +1072,7 @@ EXPORT_SYMBOL(page_cache_prev_hole);
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 {
 	void **pagep;
-	struct page *page;
+	struct page *head, *page;
 
 	rcu_read_lock();
 repeat:
@@ -1073,16 +1092,24 @@ repeat:
 			 */
 			goto out;
 		}
-		if (!page_cache_get_speculative(page))
+
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
 			goto repeat;
 
+		/* The page was split under us? */
+		if (compound_head(page) != head) {
+			put_page(head);
+			goto repeat;
+		}
+
 		/*
 		 * Has the page moved?
 		 * This is part of the lockless pagecache protocol. See
 		 * include/linux/pagemap.h for details.
 		 */
 		if (unlikely(page != *pagep)) {
-			put_page(page);
+			put_page(head);
 			goto repeat;
 		}
 	}
@@ -1118,12 +1145,12 @@ repeat:
 	if (page && !radix_tree_exception(page)) {
 		lock_page(page);
 		/* Has the page been truncated? */
-		if (unlikely(page->mapping != mapping)) {
+		if (unlikely(page_mapping(page) != mapping)) {
 			unlock_page(page);
 			put_page(page);
 			goto repeat;
 		}
-		VM_BUG_ON_PAGE(page->index != offset, page);
+		VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
 	}
 	return page;
 }
@@ -1255,7 +1282,7 @@ unsigned find_get_entries(struct address_space *mapping,
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-		struct page *page;
+		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
@@ -1272,12 +1299,20 @@ repeat:
 			 */
 			goto export;
 		}
-		if (!page_cache_get_speculative(page))
+
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
 			goto repeat;
 
+		/* The page was split under us? */
+		if (compound_head(page) != head) {
+			put_page(head);
+			goto repeat;
+		}
+
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(page);
+			put_page(head);
 			goto repeat;
 		}
 export:
@@ -1318,7 +1353,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-		struct page *page;
+		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
@@ -1337,12 +1372,19 @@ repeat:
 			continue;
 		}
 
-		if (!page_cache_get_speculative(page))
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
 			goto repeat;
 
+		/* The page was split under us? */
+		if (compound_head(page) != head) {
+			put_page(head);
+			goto repeat;
+		}
+
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(page);
+			put_page(head);
 			goto repeat;
 		}
 
@@ -1379,7 +1421,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 
 	rcu_read_lock();
 	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
-		struct page *page;
+		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		/* The hole, there no reason to continue */
@@ -1399,12 +1441,19 @@ repeat:
 			break;
 		}
 
-		if (!page_cache_get_speculative(page))
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
 			goto repeat;
 
+		/* The page was split under us? */
+		if (compound_head(page) != head) {
+			put_page(head);
+			goto repeat;
+		}
+
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(page);
+			put_page(head);
 			goto repeat;
 		}
 
@@ -1413,7 +1462,7 @@ repeat:
 		 * otherwise we can get both false positives and false
 		 * negatives, which is just confusing to the caller.
 		 */
-		if (page->mapping == NULL || page->index != iter.index) {
+		if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
 			put_page(page);
 			break;
 		}
@@ -1451,7 +1500,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 	rcu_read_lock();
 	radix_tree_for_each_tagged(slot, &mapping->page_tree,
 				   &iter, *index, tag) {
-		struct page *page;
+		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
@@ -1476,12 +1525,19 @@ repeat:
 			continue;
 		}
 
-		if (!page_cache_get_speculative(page))
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
 			goto repeat;
 
+		/* The page was split under us? */
+		if (compound_head(page) != head) {
+			put_page(head);
+			goto repeat;
+		}
+
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(page);
+			put_page(head);
 			goto repeat;
 		}
 
@@ -1525,7 +1581,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
 	rcu_read_lock();
 	radix_tree_for_each_tagged(slot, &mapping->page_tree,
 				   &iter, start, tag) {
-		struct page *page;
+		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
@@ -1543,12 +1599,20 @@ repeat:
 			 */
 			goto export;
 		}
-		if (!page_cache_get_speculative(page))
+
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
 			goto repeat;
 
+		/* The page was split under us? */
+		if (compound_head(page) != head) {
+			put_page(head);
+			goto repeat;
+		}
+
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(page);
+			put_page(head);
 			goto repeat;
 		}
 export:
@@ -2128,21 +2192,21 @@ page_not_uptodate:
 }
 EXPORT_SYMBOL(filemap_fault);
 
-void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
+void filemap_map_pages(struct fault_env *fe,
+		pgoff_t start_pgoff, pgoff_t end_pgoff)
 {
 	struct radix_tree_iter iter;
 	void **slot;
-	struct file *file = vma->vm_file;
+	struct file *file = fe->vma->vm_file;
 	struct address_space *mapping = file->f_mapping;
+	pgoff_t last_pgoff = start_pgoff;
 	loff_t size;
-	struct page *page;
-	unsigned long address = (unsigned long) vmf->virtual_address;
-	unsigned long addr;
-	pte_t *pte;
+	struct page *head, *page;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
-		if (iter.index > vmf->max_pgoff)
+	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
+			start_pgoff) {
+		if (iter.index > end_pgoff)
 			break;
 repeat:
 		page = radix_tree_deref_slot(slot);
@@ -2156,12 +2220,19 @@ repeat:
 			goto next;
 		}
 
-		if (!page_cache_get_speculative(page))
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
 			goto repeat;
 
+		/* The page was split under us? */
+		if (compound_head(page) != head) {
+			put_page(head);
+			goto repeat;
+		}
+
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(page);
+			put_page(head);
 			goto repeat;
 		}
 
@@ -2179,14 +2250,15 @@ repeat:
 		if (page->index >= size >> PAGE_SHIFT)
 			goto unlock;
 
-		pte = vmf->pte + page->index - vmf->pgoff;
-		if (!pte_none(*pte))
-			goto unlock;
-
 		if (file->f_ra.mmap_miss > 0)
 			file->f_ra.mmap_miss--;
-		addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
-		do_set_pte(vma, addr, page, pte, false, false, true);
+
+		fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
+		if (fe->pte)
+			fe->pte += iter.index - last_pgoff;
+		last_pgoff = iter.index;
+		if (alloc_set_pte(fe, NULL, page))
+			goto unlock;
 		unlock_page(page);
 		goto next;
 unlock:
@@ -2194,7 +2266,10 @@ unlock:
 skip:
 		put_page(page);
 next:
-		if (iter.index == vmf->max_pgoff)
+		/* Huge page is mapped? No need to proceed. */
+		if (pmd_trans_huge(*fe->pmd))
+			break;
+		if (iter.index == end_pgoff)
 			break;
 	}
 	rcu_read_unlock();
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 27a9924caf61..fec8b5044040 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -20,6 +20,8 @@
 #include <linux/frontswap.h>
 #include <linux/swapfile.h>
 
+DEFINE_STATIC_KEY_FALSE(frontswap_enabled_key);
+
 /*
  * frontswap_ops are added by frontswap_register_ops, and provide the
  * frontswap "backend" implementation functions.  Multiple implementations
@@ -139,6 +141,8 @@ void frontswap_register_ops(struct frontswap_ops *ops)
 		ops->next = frontswap_ops;
 	} while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next);
 
+	static_branch_inc(&frontswap_enabled_key);
+
 	spin_lock(&swap_lock);
 	plist_for_each_entry(si, &swap_active_head, list) {
 		if (si->frontswap_map)
@@ -189,7 +193,7 @@ void __frontswap_init(unsigned type, unsigned long *map)
 	struct swap_info_struct *sis = swap_info[type];
 	struct frontswap_ops *ops;
 
-	BUG_ON(sis == NULL);
+	VM_BUG_ON(sis == NULL);
 
 	/*
 	 * p->frontswap is a bitmap that we MUST have to figure out which page
@@ -248,15 +252,9 @@ int __frontswap_store(struct page *page)
 	pgoff_t offset = swp_offset(entry);
 	struct frontswap_ops *ops;
 
-	/*
-	 * Return if no backend registed.
-	 * Don't need to inc frontswap_failed_stores here.
-	 */
-	if (!frontswap_ops)
-		return -1;
-
-	BUG_ON(!PageLocked(page));
-	BUG_ON(sis == NULL);
+	VM_BUG_ON(!frontswap_ops);
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(sis == NULL);
 
 	/*
 	 * If a dup, we must remove the old page first; we can't leave the
@@ -303,11 +301,10 @@ int __frontswap_load(struct page *page)
 	pgoff_t offset = swp_offset(entry);
 	struct frontswap_ops *ops;
 
-	if (!frontswap_ops)
-		return -1;
+	VM_BUG_ON(!frontswap_ops);
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(sis == NULL);
 
-	BUG_ON(!PageLocked(page));
-	BUG_ON(sis == NULL);
 	if (!__frontswap_test(sis, offset))
 		return -1;
 
@@ -337,10 +334,9 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
 	struct swap_info_struct *sis = swap_info[type];
 	struct frontswap_ops *ops;
 
-	if (!frontswap_ops)
-		return;
+	VM_BUG_ON(!frontswap_ops);
+	VM_BUG_ON(sis == NULL);
 
-	BUG_ON(sis == NULL);
 	if (!__frontswap_test(sis, offset))
 		return;
 
@@ -360,10 +356,9 @@ void __frontswap_invalidate_area(unsigned type)
 	struct swap_info_struct *sis = swap_info[type];
 	struct frontswap_ops *ops;
 
-	if (!frontswap_ops)
-		return;
+	VM_BUG_ON(!frontswap_ops);
+	VM_BUG_ON(sis == NULL);
 
-	BUG_ON(sis == NULL);
 	if (sis->frontswap_map == NULL)
 		return;
 
diff --git a/mm/gup.c b/mm/gup.c
index c057784c8444..547741f5f7a7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -279,6 +279,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 			spin_unlock(ptl);
 			ret = 0;
 			split_huge_pmd(vma, pmd, address);
+			if (pmd_trans_unstable(pmd))
+				ret = -EBUSY;
 		} else {
 			get_page(page);
 			spin_unlock(ptl);
@@ -286,6 +288,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 			ret = split_huge_page(page);
 			unlock_page(page);
 			put_page(page);
+			if (pmd_none(*pmd))
+				return no_page_table(vma, flags);
 		}
 
 		return ret ? ERR_PTR(ret) :
@@ -350,7 +354,6 @@ unmap:
 static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		unsigned long address, unsigned int *flags, int *nonblocking)
 {
-	struct mm_struct *mm = vma->vm_mm;
 	unsigned int fault_flags = 0;
 	int ret;
 
@@ -375,7 +378,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		fault_flags |= FAULT_FLAG_TRIED;
 	}
 
-	ret = handle_mm_fault(mm, vma, address, fault_flags);
+	ret = handle_mm_fault(vma, address, fault_flags);
 	if (ret & VM_FAULT_ERROR) {
 		if (ret & VM_FAULT_OOM)
 			return -ENOMEM;
@@ -690,7 +693,7 @@ retry:
 	if (!vma_permits_fault(vma, fault_flags))
 		return -EFAULT;
 
-	ret = handle_mm_fault(mm, vma, address, fault_flags);
+	ret = handle_mm_fault(vma, address, fault_flags);
 	major |= ret & VM_FAULT_MAJOR;
 	if (ret & VM_FAULT_ERROR) {
 		if (ret & VM_FAULT_OOM)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9ed58530f695..3647334c2ef9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -18,7 +18,6 @@
 #include <linux/mm_inline.h>
 #include <linux/swapops.h>
 #include <linux/dax.h>
-#include <linux/kthread.h>
 #include <linux/khugepaged.h>
 #include <linux/freezer.h>
 #include <linux/pfn_t.h>
@@ -30,39 +29,12 @@
 #include <linux/hashtable.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/page_idle.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
 #include "internal.h"
 
-enum scan_result {
-	SCAN_FAIL,
-	SCAN_SUCCEED,
-	SCAN_PMD_NULL,
-	SCAN_EXCEED_NONE_PTE,
-	SCAN_PTE_NON_PRESENT,
-	SCAN_PAGE_RO,
-	SCAN_NO_REFERENCED_PAGE,
-	SCAN_PAGE_NULL,
-	SCAN_SCAN_ABORT,
-	SCAN_PAGE_COUNT,
-	SCAN_PAGE_LRU,
-	SCAN_PAGE_LOCK,
-	SCAN_PAGE_ANON,
-	SCAN_PAGE_COMPOUND,
-	SCAN_ANY_PROCESS,
-	SCAN_VMA_NULL,
-	SCAN_VMA_CHECK,
-	SCAN_ADDRESS_RANGE,
-	SCAN_SWAP_CACHE_PAGE,
-	SCAN_DEL_PAGE_LRU,
-	SCAN_ALLOC_HUGE_PAGE_FAIL,
-	SCAN_CGROUP_CHARGE_FAIL
-};
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/huge_memory.h>
-
 /*
  * By default transparent hugepage support is disabled in order that avoid
  * to risk increase the memory footprint of applications without a guaranteed
@@ -82,127 +54,8 @@ unsigned long transparent_hugepage_flags __read_mostly =
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
-/* default scan 8*512 pte (or vmas) every 30 second */
-static unsigned int khugepaged_pages_to_scan __read_mostly;
-static unsigned int khugepaged_pages_collapsed;
-static unsigned int khugepaged_full_scans;
-static unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000;
-/* during fragmentation poll the hugepage allocator once every minute */
-static unsigned int khugepaged_alloc_sleep_millisecs __read_mostly = 60000;
-static unsigned long khugepaged_sleep_expire;
-static struct task_struct *khugepaged_thread __read_mostly;
-static DEFINE_MUTEX(khugepaged_mutex);
-static DEFINE_SPINLOCK(khugepaged_mm_lock);
-static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
-/*
- * default collapse hugepages if there is at least one pte mapped like
- * it would have happened if the vma was large enough during page
- * fault.
- */
-static unsigned int khugepaged_max_ptes_none __read_mostly;
-
-static int khugepaged(void *none);
-static int khugepaged_slab_init(void);
-static void khugepaged_slab_exit(void);
-
-#define MM_SLOTS_HASH_BITS 10
-static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
-
-static struct kmem_cache *mm_slot_cache __read_mostly;
-
-/**
- * struct mm_slot - hash lookup from mm to mm_slot
- * @hash: hash collision list
- * @mm_node: khugepaged scan list headed in khugepaged_scan.mm_head
- * @mm: the mm that this information is valid for
- */
-struct mm_slot {
-	struct hlist_node hash;
-	struct list_head mm_node;
-	struct mm_struct *mm;
-};
-
-/**
- * struct khugepaged_scan - cursor for scanning
- * @mm_head: the head of the mm list to scan
- * @mm_slot: the current mm_slot we are scanning
- * @address: the next address inside that to be scanned
- *
- * There is only the one khugepaged_scan instance of this cursor structure.
- */
-struct khugepaged_scan {
-	struct list_head mm_head;
-	struct mm_slot *mm_slot;
-	unsigned long address;
-};
-static struct khugepaged_scan khugepaged_scan = {
-	.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
-};
-
 static struct shrinker deferred_split_shrinker;
 
-static void set_recommended_min_free_kbytes(void)
-{
-	struct zone *zone;
-	int nr_zones = 0;
-	unsigned long recommended_min;
-
-	for_each_populated_zone(zone)
-		nr_zones++;
-
-	/* Ensure 2 pageblocks are free to assist fragmentation avoidance */
-	recommended_min = pageblock_nr_pages * nr_zones * 2;
-
-	/*
-	 * Make sure that on average at least two pageblocks are almost free
-	 * of another type, one for a migratetype to fall back to and a
-	 * second to avoid subsequent fallbacks of other types There are 3
-	 * MIGRATE_TYPES we care about.
-	 */
-	recommended_min += pageblock_nr_pages * nr_zones *
-			   MIGRATE_PCPTYPES * MIGRATE_PCPTYPES;
-
-	/* don't ever allow to reserve more than 5% of the lowmem */
-	recommended_min = min(recommended_min,
-			      (unsigned long) nr_free_buffer_pages() / 20);
-	recommended_min <<= (PAGE_SHIFT-10);
-
-	if (recommended_min > min_free_kbytes) {
-		if (user_min_free_kbytes >= 0)
-			pr_info("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n",
-				min_free_kbytes, recommended_min);
-
-		min_free_kbytes = recommended_min;
-	}
-	setup_per_zone_wmarks();
-}
-
-static int start_stop_khugepaged(void)
-{
-	int err = 0;
-	if (khugepaged_enabled()) {
-		if (!khugepaged_thread)
-			khugepaged_thread = kthread_run(khugepaged, NULL,
-							"khugepaged");
-		if (IS_ERR(khugepaged_thread)) {
-			pr_err("khugepaged: kthread_run(khugepaged) failed\n");
-			err = PTR_ERR(khugepaged_thread);
-			khugepaged_thread = NULL;
-			goto fail;
-		}
-
-		if (!list_empty(&khugepaged_scan.mm_head))
-			wake_up_interruptible(&khugepaged_wait);
-
-		set_recommended_min_free_kbytes();
-	} else if (khugepaged_thread) {
-		kthread_stop(khugepaged_thread);
-		khugepaged_thread = NULL;
-	}
-fail:
-	return err;
-}
-
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
 
@@ -328,12 +181,7 @@ static ssize_t enabled_store(struct kobject *kobj,
 				TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);
 
 	if (ret > 0) {
-		int err;
-
-		mutex_lock(&khugepaged_mutex);
-		err = start_stop_khugepaged();
-		mutex_unlock(&khugepaged_mutex);
-
+		int err = start_stop_khugepaged();
 		if (err)
 			ret = err;
 	}
@@ -343,7 +191,7 @@ static ssize_t enabled_store(struct kobject *kobj,
 static struct kobj_attribute enabled_attr =
 	__ATTR(enabled, 0644, enabled_show, enabled_store);
 
-static ssize_t single_flag_show(struct kobject *kobj,
+ssize_t single_hugepage_flag_show(struct kobject *kobj,
 				struct kobj_attribute *attr, char *buf,
 				enum transparent_hugepage_flag flag)
 {
@@ -351,7 +199,7 @@ static ssize_t single_flag_show(struct kobject *kobj,
 		       !!test_bit(flag, &transparent_hugepage_flags));
 }
 
-static ssize_t single_flag_store(struct kobject *kobj,
+ssize_t single_hugepage_flag_store(struct kobject *kobj,
 				 struct kobj_attribute *attr,
 				 const char *buf, size_t count,
 				 enum transparent_hugepage_flag flag)
@@ -406,13 +254,13 @@ static struct kobj_attribute defrag_attr =
 static ssize_t use_zero_page_show(struct kobject *kobj,
 		struct kobj_attribute *attr, char *buf)
 {
-	return single_flag_show(kobj, attr, buf,
+	return single_hugepage_flag_show(kobj, attr, buf,
 				TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 }
 static ssize_t use_zero_page_store(struct kobject *kobj,
 		struct kobj_attribute *attr, const char *buf, size_t count)
 {
-	return single_flag_store(kobj, attr, buf, count,
+	return single_hugepage_flag_store(kobj, attr, buf, count,
 				 TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 }
 static struct kobj_attribute use_zero_page_attr =
@@ -421,14 +269,14 @@ static struct kobj_attribute use_zero_page_attr =
 static ssize_t debug_cow_show(struct kobject *kobj,
 				struct kobj_attribute *attr, char *buf)
 {
-	return single_flag_show(kobj, attr, buf,
+	return single_hugepage_flag_show(kobj, attr, buf,
 				TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG);
 }
 static ssize_t debug_cow_store(struct kobject *kobj,
 			       struct kobj_attribute *attr,
 			       const char *buf, size_t count)
 {
-	return single_flag_store(kobj, attr, buf, count,
+	return single_hugepage_flag_store(kobj, attr, buf, count,
 				 TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG);
 }
 static struct kobj_attribute debug_cow_attr =
@@ -439,6 +287,9 @@ static struct attribute *hugepage_attr[] = {
 	&enabled_attr.attr,
 	&defrag_attr.attr,
 	&use_zero_page_attr.attr,
+#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
+	&shmem_enabled_attr.attr,
+#endif
 #ifdef CONFIG_DEBUG_VM
 	&debug_cow_attr.attr,
 #endif
@@ -449,171 +300,6 @@ static struct attribute_group hugepage_attr_group = {
 	.attrs = hugepage_attr,
 };
 
-static ssize_t scan_sleep_millisecs_show(struct kobject *kobj,
-					 struct kobj_attribute *attr,
-					 char *buf)
-{
-	return sprintf(buf, "%u\n", khugepaged_scan_sleep_millisecs);
-}
-
-static ssize_t scan_sleep_millisecs_store(struct kobject *kobj,
-					  struct kobj_attribute *attr,
-					  const char *buf, size_t count)
-{
-	unsigned long msecs;
-	int err;
-
-	err = kstrtoul(buf, 10, &msecs);
-	if (err || msecs > UINT_MAX)
-		return -EINVAL;
-
-	khugepaged_scan_sleep_millisecs = msecs;
-	khugepaged_sleep_expire = 0;
-	wake_up_interruptible(&khugepaged_wait);
-
-	return count;
-}
-static struct kobj_attribute scan_sleep_millisecs_attr =
-	__ATTR(scan_sleep_millisecs, 0644, scan_sleep_millisecs_show,
-	       scan_sleep_millisecs_store);
-
-static ssize_t alloc_sleep_millisecs_show(struct kobject *kobj,
-					  struct kobj_attribute *attr,
-					  char *buf)
-{
-	return sprintf(buf, "%u\n", khugepaged_alloc_sleep_millisecs);
-}
-
-static ssize_t alloc_sleep_millisecs_store(struct kobject *kobj,
-					   struct kobj_attribute *attr,
-					   const char *buf, size_t count)
-{
-	unsigned long msecs;
-	int err;
-
-	err = kstrtoul(buf, 10, &msecs);
-	if (err || msecs > UINT_MAX)
-		return -EINVAL;
-
-	khugepaged_alloc_sleep_millisecs = msecs;
-	khugepaged_sleep_expire = 0;
-	wake_up_interruptible(&khugepaged_wait);
-
-	return count;
-}
-static struct kobj_attribute alloc_sleep_millisecs_attr =
-	__ATTR(alloc_sleep_millisecs, 0644, alloc_sleep_millisecs_show,
-	       alloc_sleep_millisecs_store);
-
-static ssize_t pages_to_scan_show(struct kobject *kobj,
-				  struct kobj_attribute *attr,
-				  char *buf)
-{
-	return sprintf(buf, "%u\n", khugepaged_pages_to_scan);
-}
-static ssize_t pages_to_scan_store(struct kobject *kobj,
-				   struct kobj_attribute *attr,
-				   const char *buf, size_t count)
-{
-	int err;
-	unsigned long pages;
-
-	err = kstrtoul(buf, 10, &pages);
-	if (err || !pages || pages > UINT_MAX)
-		return -EINVAL;
-
-	khugepaged_pages_to_scan = pages;
-
-	return count;
-}
-static struct kobj_attribute pages_to_scan_attr =
-	__ATTR(pages_to_scan, 0644, pages_to_scan_show,
-	       pages_to_scan_store);
-
-static ssize_t pages_collapsed_show(struct kobject *kobj,
-				    struct kobj_attribute *attr,
-				    char *buf)
-{
-	return sprintf(buf, "%u\n", khugepaged_pages_collapsed);
-}
-static struct kobj_attribute pages_collapsed_attr =
-	__ATTR_RO(pages_collapsed);
-
-static ssize_t full_scans_show(struct kobject *kobj,
-			       struct kobj_attribute *attr,
-			       char *buf)
-{
-	return sprintf(buf, "%u\n", khugepaged_full_scans);
-}
-static struct kobj_attribute full_scans_attr =
-	__ATTR_RO(full_scans);
-
-static ssize_t khugepaged_defrag_show(struct kobject *kobj,
-				      struct kobj_attribute *attr, char *buf)
-{
-	return single_flag_show(kobj, attr, buf,
-				TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
-}
-static ssize_t khugepaged_defrag_store(struct kobject *kobj,
-				       struct kobj_attribute *attr,
-				       const char *buf, size_t count)
-{
-	return single_flag_store(kobj, attr, buf, count,
-				 TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
-}
-static struct kobj_attribute khugepaged_defrag_attr =
-	__ATTR(defrag, 0644, khugepaged_defrag_show,
-	       khugepaged_defrag_store);
-
-/*
- * max_ptes_none controls if khugepaged should collapse hugepages over
- * any unmapped ptes in turn potentially increasing the memory
- * footprint of the vmas. When max_ptes_none is 0 khugepaged will not
- * reduce the available free memory in the system as it
- * runs. Increasing max_ptes_none will instead potentially reduce the
- * free memory in the system during the khugepaged scan.
- */
-static ssize_t khugepaged_max_ptes_none_show(struct kobject *kobj,
-					     struct kobj_attribute *attr,
-					     char *buf)
-{
-	return sprintf(buf, "%u\n", khugepaged_max_ptes_none);
-}
-static ssize_t khugepaged_max_ptes_none_store(struct kobject *kobj,
-					      struct kobj_attribute *attr,
-					      const char *buf, size_t count)
-{
-	int err;
-	unsigned long max_ptes_none;
-
-	err = kstrtoul(buf, 10, &max_ptes_none);
-	if (err || max_ptes_none > HPAGE_PMD_NR-1)
-		return -EINVAL;
-
-	khugepaged_max_ptes_none = max_ptes_none;
-
-	return count;
-}
-static struct kobj_attribute khugepaged_max_ptes_none_attr =
-	__ATTR(max_ptes_none, 0644, khugepaged_max_ptes_none_show,
-	       khugepaged_max_ptes_none_store);
-
-static struct attribute *khugepaged_attr[] = {
-	&khugepaged_defrag_attr.attr,
-	&khugepaged_max_ptes_none_attr.attr,
-	&pages_to_scan_attr.attr,
-	&pages_collapsed_attr.attr,
-	&full_scans_attr.attr,
-	&scan_sleep_millisecs_attr.attr,
-	&alloc_sleep_millisecs_attr.attr,
-	NULL,
-};
-
-static struct attribute_group khugepaged_attr_group = {
-	.attrs = khugepaged_attr,
-	.name = "khugepaged",
-};
-
 static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
 {
 	int err;
@@ -672,8 +358,6 @@ static int __init hugepage_init(void)
 		return -EINVAL;
 	}
 
-	khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
-	khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
 	/*
 	 * hugepages can't be allocated by the buddy allocator
 	 */
@@ -688,7 +372,7 @@ static int __init hugepage_init(void)
 	if (err)
 		goto err_sysfs;
 
-	err = khugepaged_slab_init();
+	err = khugepaged_init();
 	if (err)
 		goto err_slab;
 
@@ -719,7 +403,7 @@ err_khugepaged:
 err_split_shrinker:
 	unregister_shrinker(&huge_zero_page_shrinker);
 err_hzp_shrinker:
-	khugepaged_slab_exit();
+	khugepaged_destroy();
 err_slab:
 	hugepage_exit_sysfs(hugepage_kobj);
 err_sysfs:
@@ -765,11 +449,6 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 	return pmd;
 }
 
-static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
-{
-	return pmd_mkhuge(mk_pmd(page, prot));
-}
-
 static inline struct list_head *page_deferred_list(struct page *page)
 {
 	/*
@@ -790,26 +469,23 @@ void prep_transhuge_page(struct page *page)
 	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
 }
 
-static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
-					struct vm_area_struct *vma,
-					unsigned long address, pmd_t *pmd,
-					struct page *page, gfp_t gfp,
-					unsigned int flags)
+static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
+		gfp_t gfp)
 {
+	struct vm_area_struct *vma = fe->vma;
 	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
-	spinlock_t *ptl;
-	unsigned long haddr = address & HPAGE_PMD_MASK;
+	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	if (mem_cgroup_try_charge(page, mm, gfp, &memcg, true)) {
+	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
 	}
 
-	pgtable = pte_alloc_one(mm, haddr);
+	pgtable = pte_alloc_one(vma->vm_mm, haddr);
 	if (unlikely(!pgtable)) {
 		mem_cgroup_cancel_charge(page, memcg, true);
 		put_page(page);
@@ -824,12 +500,12 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 	 */
 	__SetPageUptodate(page);
 
-	ptl = pmd_lock(mm, pmd);
-	if (unlikely(!pmd_none(*pmd))) {
-		spin_unlock(ptl);
+	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
+	if (unlikely(!pmd_none(*fe->pmd))) {
+		spin_unlock(fe->ptl);
 		mem_cgroup_cancel_charge(page, memcg, true);
 		put_page(page);
-		pte_free(mm, pgtable);
+		pte_free(vma->vm_mm, pgtable);
 	} else {
 		pmd_t entry;
 
@@ -837,12 +513,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 		if (userfaultfd_missing(vma)) {
 			int ret;
 
-			spin_unlock(ptl);
+			spin_unlock(fe->ptl);
 			mem_cgroup_cancel_charge(page, memcg, true);
 			put_page(page);
-			pte_free(mm, pgtable);
-			ret = handle_userfault(vma, address, flags,
-					       VM_UFFD_MISSING);
+			pte_free(vma->vm_mm, pgtable);
+			ret = handle_userfault(fe, VM_UFFD_MISSING);
 			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 			return ret;
 		}
@@ -852,11 +527,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 		page_add_new_anon_rmap(page, vma, haddr, true);
 		mem_cgroup_commit_charge(page, memcg, false, true);
 		lru_cache_add_active_or_unevictable(page, vma);
-		pgtable_trans_huge_deposit(mm, pmd, pgtable);
-		set_pmd_at(mm, haddr, pmd, entry);
-		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
-		atomic_long_inc(&mm->nr_ptes);
-		spin_unlock(ptl);
+		pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, pgtable);
+		set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
+		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+		atomic_long_inc(&vma->vm_mm->nr_ptes);
+		spin_unlock(fe->ptl);
 		count_vm_event(THP_FAULT_ALLOC);
 	}
 
@@ -883,12 +558,6 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
 	return GFP_TRANSHUGE | reclaim_flags;
 }
 
-/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */
-static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
-{
-	return GFP_TRANSHUGE | (khugepaged_defrag() ? __GFP_DIRECT_RECLAIM : 0);
-}
-
 /* Caller must hold page table lock. */
 static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 		struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
@@ -906,13 +575,12 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 	return true;
 }
 
-int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			       unsigned long address, pmd_t *pmd,
-			       unsigned int flags)
+int do_huge_pmd_anonymous_page(struct fault_env *fe)
 {
+	struct vm_area_struct *vma = fe->vma;
 	gfp_t gfp;
 	struct page *page;
-	unsigned long haddr = address & HPAGE_PMD_MASK;
+	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 
 	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
 		return VM_FAULT_FALLBACK;
@@ -920,42 +588,40 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_OOM;
 	if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
 		return VM_FAULT_OOM;
-	if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) &&
+	if (!(fe->flags & FAULT_FLAG_WRITE) &&
+			!mm_forbids_zeropage(vma->vm_mm) &&
 			transparent_hugepage_use_zero_page()) {
-		spinlock_t *ptl;
 		pgtable_t pgtable;
 		struct page *zero_page;
 		bool set;
 		int ret;
-		pgtable = pte_alloc_one(mm, haddr);
+		pgtable = pte_alloc_one(vma->vm_mm, haddr);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
 		zero_page = get_huge_zero_page();
 		if (unlikely(!zero_page)) {
-			pte_free(mm, pgtable);
+			pte_free(vma->vm_mm, pgtable);
 			count_vm_event(THP_FAULT_FALLBACK);
 			return VM_FAULT_FALLBACK;
 		}
-		ptl = pmd_lock(mm, pmd);
+		fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 		ret = 0;
 		set = false;
-		if (pmd_none(*pmd)) {
+		if (pmd_none(*fe->pmd)) {
 			if (userfaultfd_missing(vma)) {
-				spin_unlock(ptl);
-				ret = handle_userfault(vma, address, flags,
-						       VM_UFFD_MISSING);
+				spin_unlock(fe->ptl);
+				ret = handle_userfault(fe, VM_UFFD_MISSING);
 				VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 			} else {
-				set_huge_zero_page(pgtable, mm, vma,
-						   haddr, pmd,
-						   zero_page);
-				spin_unlock(ptl);
+				set_huge_zero_page(pgtable, vma->vm_mm, vma,
+						   haddr, fe->pmd, zero_page);
+				spin_unlock(fe->ptl);
 				set = true;
 			}
 		} else
-			spin_unlock(ptl);
+			spin_unlock(fe->ptl);
 		if (!set) {
-			pte_free(mm, pgtable);
+			pte_free(vma->vm_mm, pgtable);
 			put_huge_zero_page();
 		}
 		return ret;
@@ -967,8 +633,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_FALLBACK;
 	}
 	prep_transhuge_page(page);
-	return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp,
-					    flags);
+	return __do_huge_pmd_anonymous_page(fe, page, gfp);
 }
 
 static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -1080,14 +745,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	struct page *src_page;
 	pmd_t pmd;
 	pgtable_t pgtable = NULL;
-	int ret;
+	int ret = -ENOMEM;
 
-	if (!vma_is_dax(vma)) {
-		ret = -ENOMEM;
-		pgtable = pte_alloc_one(dst_mm, addr);
-		if (unlikely(!pgtable))
-			goto out;
-	}
+	/* Skip if can be re-fill on fault */
+	if (!vma_is_anonymous(vma))
+		return 0;
+
+	pgtable = pte_alloc_one(dst_mm, addr);
+	if (unlikely(!pgtable))
+		goto out;
 
 	dst_ptl = pmd_lock(dst_mm, dst_pmd);
 	src_ptl = pmd_lockptr(src_mm, src_pmd);
@@ -1095,7 +761,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
 	ret = -EAGAIN;
 	pmd = *src_pmd;
-	if (unlikely(!pmd_trans_huge(pmd) && !pmd_devmap(pmd))) {
+	if (unlikely(!pmd_trans_huge(pmd))) {
 		pte_free(dst_mm, pgtable);
 		goto out_unlock;
 	}
@@ -1118,16 +784,13 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		goto out_unlock;
 	}
 
-	if (!vma_is_dax(vma)) {
-		/* thp accounting separate from pmd_devmap accounting */
-		src_page = pmd_page(pmd);
-		VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
-		get_page(src_page);
-		page_dup_rmap(src_page, true);
-		add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-		atomic_long_inc(&dst_mm->nr_ptes);
-		pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
-	}
+	src_page = pmd_page(pmd);
+	VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
+	get_page(src_page);
+	page_dup_rmap(src_page, true);
+	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+	atomic_long_inc(&dst_mm->nr_ptes);
+	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
 
 	pmdp_set_wrprotect(src_mm, addr, src_pmd);
 	pmd = pmd_mkold(pmd_wrprotect(pmd));
@@ -1141,38 +804,31 @@ out:
 	return ret;
 }
 
-void huge_pmd_set_accessed(struct mm_struct *mm,
-			   struct vm_area_struct *vma,
-			   unsigned long address,
-			   pmd_t *pmd, pmd_t orig_pmd,
-			   int dirty)
+void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd)
 {
-	spinlock_t *ptl;
 	pmd_t entry;
 	unsigned long haddr;
 
-	ptl = pmd_lock(mm, pmd);
-	if (unlikely(!pmd_same(*pmd, orig_pmd)))
+	fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd);
+	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 		goto unlock;
 
 	entry = pmd_mkyoung(orig_pmd);
-	haddr = address & HPAGE_PMD_MASK;
-	if (pmdp_set_access_flags(vma, haddr, pmd, entry, dirty))
-		update_mmu_cache_pmd(vma, address, pmd);
+	haddr = fe->address & HPAGE_PMD_MASK;
+	if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry,
+				fe->flags & FAULT_FLAG_WRITE))
+		update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd);
 
 unlock:
-	spin_unlock(ptl);
+	spin_unlock(fe->ptl);
 }
 
-static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
-					struct vm_area_struct *vma,
-					unsigned long address,
-					pmd_t *pmd, pmd_t orig_pmd,
-					struct page *page,
-					unsigned long haddr)
+static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
+		struct page *page)
 {
+	struct vm_area_struct *vma = fe->vma;
+	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 	struct mem_cgroup *memcg;
-	spinlock_t *ptl;
 	pgtable_t pgtable;
 	pmd_t _pmd;
 	int ret = 0, i;
@@ -1189,11 +845,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
 		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
-					       __GFP_OTHER_NODE,
-					       vma, address, page_to_nid(page));
+					       __GFP_OTHER_NODE, vma,
+					       fe->address, page_to_nid(page));
 		if (unlikely(!pages[i] ||
-			     mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
-						   &memcg, false))) {
+			     mem_cgroup_try_charge(pages[i], vma->vm_mm,
+				     GFP_KERNEL, &memcg, false))) {
 			if (pages[i])
 				put_page(pages[i]);
 			while (--i >= 0) {
@@ -1219,41 +875,41 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 
 	mmun_start = haddr;
 	mmun_end   = haddr + HPAGE_PMD_SIZE;
-	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+	mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 
-	ptl = pmd_lock(mm, pmd);
-	if (unlikely(!pmd_same(*pmd, orig_pmd)))
+	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
+	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 		goto out_free_pages;
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 
-	pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+	pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
 	/* leave pmd empty until pte is filled */
 
-	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
-	pmd_populate(mm, &_pmd, pgtable);
+	pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, fe->pmd);
+	pmd_populate(vma->vm_mm, &_pmd, pgtable);
 
 	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
-		pte_t *pte, entry;
+		pte_t entry;
 		entry = mk_pte(pages[i], vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		memcg = (void *)page_private(pages[i]);
 		set_page_private(pages[i], 0);
-		page_add_new_anon_rmap(pages[i], vma, haddr, false);
+		page_add_new_anon_rmap(pages[i], fe->vma, haddr, false);
 		mem_cgroup_commit_charge(pages[i], memcg, false, false);
 		lru_cache_add_active_or_unevictable(pages[i], vma);
-		pte = pte_offset_map(&_pmd, haddr);
-		VM_BUG_ON(!pte_none(*pte));
-		set_pte_at(mm, haddr, pte, entry);
-		pte_unmap(pte);
+		fe->pte = pte_offset_map(&_pmd, haddr);
+		VM_BUG_ON(!pte_none(*fe->pte));
+		set_pte_at(vma->vm_mm, haddr, fe->pte, entry);
+		pte_unmap(fe->pte);
 	}
 	kfree(pages);
 
 	smp_wmb(); /* make pte visible before pmd */
-	pmd_populate(mm, pmd, pgtable);
+	pmd_populate(vma->vm_mm, fe->pmd, pgtable);
 	page_remove_rmap(page, true);
-	spin_unlock(ptl);
+	spin_unlock(fe->ptl);
 
-	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 
 	ret |= VM_FAULT_WRITE;
 	put_page(page);
@@ -1262,8 +918,8 @@ out:
 	return ret;
 
 out_free_pages:
-	spin_unlock(ptl);
-	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+	spin_unlock(fe->ptl);
+	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
 		memcg = (void *)page_private(pages[i]);
 		set_page_private(pages[i], 0);
@@ -1274,25 +930,23 @@ out_free_pages:
 	goto out;
 }
 
-int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
+int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
 {
-	spinlock_t *ptl;
-	int ret = 0;
+	struct vm_area_struct *vma = fe->vma;
 	struct page *page = NULL, *new_page;
 	struct mem_cgroup *memcg;
-	unsigned long haddr;
+	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 	gfp_t huge_gfp;			/* for allocation and charge */
+	int ret = 0;
 
-	ptl = pmd_lockptr(mm, pmd);
+	fe->ptl = pmd_lockptr(vma->vm_mm, fe->pmd);
 	VM_BUG_ON_VMA(!vma->anon_vma, vma);
-	haddr = address & HPAGE_PMD_MASK;
 	if (is_huge_zero_pmd(orig_pmd))
 		goto alloc;
-	spin_lock(ptl);
-	if (unlikely(!pmd_same(*pmd, orig_pmd)))
+	spin_lock(fe->ptl);
+	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 		goto out_unlock;
 
 	page = pmd_page(orig_pmd);
@@ -1305,13 +959,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		pmd_t entry;
 		entry = pmd_mkyoung(orig_pmd);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-		if (pmdp_set_access_flags(vma, haddr, pmd, entry,  1))
-			update_mmu_cache_pmd(vma, address, pmd);
+		if (pmdp_set_access_flags(vma, haddr, fe->pmd, entry,  1))
+			update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 		ret |= VM_FAULT_WRITE;
 		goto out_unlock;
 	}
 	get_page(page);
-	spin_unlock(ptl);
+	spin_unlock(fe->ptl);
 alloc:
 	if (transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
@@ -1324,13 +978,12 @@ alloc:
 		prep_transhuge_page(new_page);
 	} else {
 		if (!page) {
-			split_huge_pmd(vma, pmd, address);
+			split_huge_pmd(vma, fe->pmd, fe->address);
 			ret |= VM_FAULT_FALLBACK;
 		} else {
-			ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
-					pmd, orig_pmd, page, haddr);
+			ret = do_huge_pmd_wp_page_fallback(fe, orig_pmd, page);
 			if (ret & VM_FAULT_OOM) {
-				split_huge_pmd(vma, pmd, address);
+				split_huge_pmd(vma, fe->pmd, fe->address);
 				ret |= VM_FAULT_FALLBACK;
 			}
 			put_page(page);
@@ -1339,14 +992,12 @@ alloc:
 		goto out;
 	}
 
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg,
-					   true))) {
+	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
+					huge_gfp, &memcg, true))) {
 		put_page(new_page);
-		if (page) {
-			split_huge_pmd(vma, pmd, address);
+		split_huge_pmd(vma, fe->pmd, fe->address);
+		if (page)
 			put_page(page);
-		} else
-			split_huge_pmd(vma, pmd, address);
 		ret |= VM_FAULT_FALLBACK;
 		count_vm_event(THP_FAULT_FALLBACK);
 		goto out;
@@ -1362,13 +1013,13 @@ alloc:
 
 	mmun_start = haddr;
 	mmun_end   = haddr + HPAGE_PMD_SIZE;
-	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+	mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 
-	spin_lock(ptl);
+	spin_lock(fe->ptl);
 	if (page)
 		put_page(page);
-	if (unlikely(!pmd_same(*pmd, orig_pmd))) {
-		spin_unlock(ptl);
+	if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) {
+		spin_unlock(fe->ptl);
 		mem_cgroup_cancel_charge(new_page, memcg, true);
 		put_page(new_page);
 		goto out_mn;
@@ -1376,14 +1027,14 @@ alloc:
 		pmd_t entry;
 		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-		pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+		pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr, true);
 		mem_cgroup_commit_charge(new_page, memcg, false, true);
 		lru_cache_add_active_or_unevictable(new_page, vma);
-		set_pmd_at(mm, haddr, pmd, entry);
-		update_mmu_cache_pmd(vma, address, pmd);
+		set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
+		update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 		if (!page) {
-			add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
+			add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 			put_huge_zero_page();
 		} else {
 			VM_BUG_ON_PAGE(!PageHead(page), page);
@@ -1392,13 +1043,13 @@ alloc:
 		}
 		ret |= VM_FAULT_WRITE;
 	}
-	spin_unlock(ptl);
+	spin_unlock(fe->ptl);
 out_mn:
-	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 out:
 	return ret;
 out_unlock:
-	spin_unlock(ptl);
+	spin_unlock(fe->ptl);
 	return ret;
 }
 
@@ -1432,6 +1083,8 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 		 * We don't mlock() pte-mapped THPs. This way we can avoid
 		 * leaking mlocked pages into non-VM_LOCKED VMAs.
 		 *
+		 * For anon THP:
+		 *
 		 * In most cases the pmd is the only mapping of the page as we
 		 * break COW for the mlock() -- see gup_flags |= FOLL_WRITE for
 		 * writable private mappings in populate_vma_page_range().
@@ -1439,15 +1092,26 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 		 * The only scenario when we have the page shared here is if we
 		 * mlocking read-only mapping shared over fork(). We skip
 		 * mlocking such pages.
+		 *
+		 * For file THP:
+		 *
+		 * We can expect PageDoubleMap() to be stable under page lock:
+		 * for file pages we set it in page_add_file_rmap(), which
+		 * requires page to be locked.
 		 */
-		if (compound_mapcount(page) == 1 && !PageDoubleMap(page) &&
-				page->mapping && trylock_page(page)) {
-			lru_add_drain();
-			if (page->mapping)
-				mlock_vma_page(page);
-			unlock_page(page);
-		}
+
+		if (PageAnon(page) && compound_mapcount(page) != 1)
+			goto skip_mlock;
+		if (PageDoubleMap(page) || !page->mapping)
+			goto skip_mlock;
+		if (!trylock_page(page))
+			goto skip_mlock;
+		lru_add_drain();
+		if (page->mapping && !PageDoubleMap(page))
+			mlock_vma_page(page);
+		unlock_page(page);
 	}
+skip_mlock:
 	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 	if (flags & FOLL_GET)
@@ -1458,13 +1122,12 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
+int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 {
-	spinlock_t *ptl;
+	struct vm_area_struct *vma = fe->vma;
 	struct anon_vma *anon_vma = NULL;
 	struct page *page;
-	unsigned long haddr = addr & HPAGE_PMD_MASK;
+	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 	int page_nid = -1, this_nid = numa_node_id();
 	int target_nid, last_cpupid = -1;
 	bool page_locked;
@@ -1475,8 +1138,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* A PROT_NONE fault should not end up here */
 	BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
 
-	ptl = pmd_lock(mm, pmdp);
-	if (unlikely(!pmd_same(pmd, *pmdp)))
+	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
+	if (unlikely(!pmd_same(pmd, *fe->pmd)))
 		goto out_unlock;
 
 	/*
@@ -1484,9 +1147,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * without disrupting NUMA hinting information. Do not relock and
 	 * check_same as the page may no longer be mapped.
 	 */
-	if (unlikely(pmd_trans_migrating(*pmdp))) {
-		page = pmd_page(*pmdp);
-		spin_unlock(ptl);
+	if (unlikely(pmd_trans_migrating(*fe->pmd))) {
+		page = pmd_page(*fe->pmd);
+		spin_unlock(fe->ptl);
 		wait_on_page_locked(page);
 		goto out;
 	}
@@ -1519,7 +1182,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	/* Migration could have started since the pmd_trans_migrating check */
 	if (!page_locked) {
-		spin_unlock(ptl);
+		spin_unlock(fe->ptl);
 		wait_on_page_locked(page);
 		page_nid = -1;
 		goto out;
@@ -1530,12 +1193,12 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * to serialises splits
 	 */
 	get_page(page);
-	spin_unlock(ptl);
+	spin_unlock(fe->ptl);
 	anon_vma = page_lock_anon_vma_read(page);
 
 	/* Confirm the PMD did not change while page_table_lock was released */
-	spin_lock(ptl);
-	if (unlikely(!pmd_same(pmd, *pmdp))) {
+	spin_lock(fe->ptl);
+	if (unlikely(!pmd_same(pmd, *fe->pmd))) {
 		unlock_page(page);
 		put_page(page);
 		page_nid = -1;
@@ -1553,9 +1216,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * Migrate the THP to the requested node, returns with page unlocked
 	 * and access rights restored.
 	 */
-	spin_unlock(ptl);
-	migrated = migrate_misplaced_transhuge_page(mm, vma,
-				pmdp, pmd, addr, page, target_nid);
+	spin_unlock(fe->ptl);
+	migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
+				fe->pmd, pmd, fe->address, page, target_nid);
 	if (migrated) {
 		flags |= TNF_MIGRATED;
 		page_nid = target_nid;
@@ -1570,18 +1233,18 @@ clear_pmdnuma:
 	pmd = pmd_mkyoung(pmd);
 	if (was_writable)
 		pmd = pmd_mkwrite(pmd);
-	set_pmd_at(mm, haddr, pmdp, pmd);
-	update_mmu_cache_pmd(vma, addr, pmdp);
+	set_pmd_at(vma->vm_mm, haddr, fe->pmd, pmd);
+	update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 	unlock_page(page);
 out_unlock:
-	spin_unlock(ptl);
+	spin_unlock(fe->ptl);
 
 out:
 	if (anon_vma)
 		page_unlock_anon_vma_read(anon_vma);
 
 	if (page_nid != -1)
-		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
+		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, fe->flags);
 
 	return 0;
 }
@@ -1624,14 +1287,9 @@ int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	if (next - addr != HPAGE_PMD_SIZE) {
 		get_page(page);
 		spin_unlock(ptl);
-		if (split_huge_page(page)) {
-			put_page(page);
-			unlock_page(page);
-			goto out_unlocked;
-		}
+		split_huge_page(page);
 		put_page(page);
 		unlock_page(page);
-		ret = 1;
 		goto out_unlocked;
 	}
 
@@ -1689,12 +1347,18 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		struct page *page = pmd_page(orig_pmd);
 		page_remove_rmap(page, true);
 		VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
-		add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
 		VM_BUG_ON_PAGE(!PageHead(page), page);
-		pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
-		atomic_long_dec(&tlb->mm->nr_ptes);
+		if (PageAnon(page)) {
+			pgtable_t pgtable;
+			pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
+			pte_free(tlb->mm, pgtable);
+			atomic_long_dec(&tlb->mm->nr_ptes);
+			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+		} else {
+			add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
+		}
 		spin_unlock(ptl);
-		tlb_remove_page(tlb, page);
+		tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
 	}
 	return 1;
 }
@@ -1784,7 +1448,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 				entry = pmd_mkwrite(entry);
 			ret = HPAGE_PMD_NR;
 			set_pmd_at(mm, addr, pmd, entry);
-			BUG_ON(!preserve_write && pmd_write(entry));
+			BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
+					pmd_write(entry));
 		}
 		spin_unlock(ptl);
 	}
@@ -1793,10 +1458,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 }
 
 /*
- * Returns true if a given pmd maps a thp, false otherwise.
+ * Returns page table lock pointer if a given pmd maps a thp, NULL otherwise.
  *
- * Note that if it returns true, this routine returns without unlocking page
- * table lock. So callers must unlock it.
+ * Note that if it returns page table lock pointer, this routine returns without
+ * unlocking page table lock. So callers must unlock it.
  */
 spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
 {
@@ -1808,1101 +1473,75 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
 	return NULL;
 }
 
-#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
-
-int hugepage_madvise(struct vm_area_struct *vma,
-		     unsigned long *vm_flags, int advice)
+static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+		unsigned long haddr, pmd_t *pmd)
 {
-	switch (advice) {
-	case MADV_HUGEPAGE:
-#ifdef CONFIG_S390
-		/*
-		 * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
-		 * can't handle this properly after s390_enable_sie, so we simply
-		 * ignore the madvise to prevent qemu from causing a SIGSEGV.
-		 */
-		if (mm_has_pgste(vma->vm_mm))
-			return 0;
-#endif
-		/*
-		 * Be somewhat over-protective like KSM for now!
-		 */
-		if (*vm_flags & VM_NO_THP)
-			return -EINVAL;
-		*vm_flags &= ~VM_NOHUGEPAGE;
-		*vm_flags |= VM_HUGEPAGE;
-		/*
-		 * If the vma become good for khugepaged to scan,
-		 * register it here without waiting a page fault that
-		 * may not happen any time soon.
-		 */
-		if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
-			return -ENOMEM;
-		break;
-	case MADV_NOHUGEPAGE:
-		/*
-		 * Be somewhat over-protective like KSM for now!
-		 */
-		if (*vm_flags & VM_NO_THP)
-			return -EINVAL;
-		*vm_flags &= ~VM_HUGEPAGE;
-		*vm_flags |= VM_NOHUGEPAGE;
-		/*
-		 * Setting VM_NOHUGEPAGE will prevent khugepaged from scanning
-		 * this vma even if we leave the mm registered in khugepaged if
-		 * it got registered before VM_NOHUGEPAGE was set.
-		 */
-		break;
-	}
+	struct mm_struct *mm = vma->vm_mm;
+	pgtable_t pgtable;
+	pmd_t _pmd;
+	int i;
 
-	return 0;
-}
+	/* leave pmd empty until pte is filled */
+	pmdp_huge_clear_flush_notify(vma, haddr, pmd);
 
-static int __init khugepaged_slab_init(void)
-{
-	mm_slot_cache = kmem_cache_create("khugepaged_mm_slot",
-					  sizeof(struct mm_slot),
-					  __alignof__(struct mm_slot), 0, NULL);
-	if (!mm_slot_cache)
-		return -ENOMEM;
+	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+	pmd_populate(mm, &_pmd, pgtable);
 
-	return 0;
+	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+		pte_t *pte, entry;
+		entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
+		entry = pte_mkspecial(entry);
+		pte = pte_offset_map(&_pmd, haddr);
+		VM_BUG_ON(!pte_none(*pte));
+		set_pte_at(mm, haddr, pte, entry);
+		pte_unmap(pte);
+	}
+	smp_wmb(); /* make pte visible before pmd */
+	pmd_populate(mm, pmd, pgtable);
+	put_huge_zero_page();
 }
 
-static void __init khugepaged_slab_exit(void)
+static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
+		unsigned long haddr, bool freeze)
 {
-	kmem_cache_destroy(mm_slot_cache);
-}
+	struct mm_struct *mm = vma->vm_mm;
+	struct page *page;
+	pgtable_t pgtable;
+	pmd_t _pmd;
+	bool young, write, dirty;
+	unsigned long addr;
+	int i;
 
-static inline struct mm_slot *alloc_mm_slot(void)
-{
-	if (!mm_slot_cache)	/* initialization failed */
-		return NULL;
-	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
-}
+	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
+	VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
+	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
+	VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd));
 
-static inline void free_mm_slot(struct mm_slot *mm_slot)
-{
-	kmem_cache_free(mm_slot_cache, mm_slot);
-}
+	count_vm_event(THP_SPLIT_PMD);
 
-static struct mm_slot *get_mm_slot(struct mm_struct *mm)
-{
-	struct mm_slot *mm_slot;
+	if (!vma_is_anonymous(vma)) {
+		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+		if (is_huge_zero_pmd(_pmd))
+			put_huge_zero_page();
+		if (vma_is_dax(vma))
+			return;
+		page = pmd_page(_pmd);
+		if (!PageReferenced(page) && pmd_young(_pmd))
+			SetPageReferenced(page);
+		page_remove_rmap(page, true);
+		put_page(page);
+		add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR);
+		return;
+	} else if (is_huge_zero_pmd(*pmd)) {
+		return __split_huge_zero_page_pmd(vma, haddr, pmd);
+	}
 
-	hash_for_each_possible(mm_slots_hash, mm_slot, hash, (unsigned long)mm)
-		if (mm == mm_slot->mm)
-			return mm_slot;
-
-	return NULL;
-}
-
-static void insert_to_mm_slots_hash(struct mm_struct *mm,
-				    struct mm_slot *mm_slot)
-{
-	mm_slot->mm = mm;
-	hash_add(mm_slots_hash, &mm_slot->hash, (long)mm);
-}
-
-static inline int khugepaged_test_exit(struct mm_struct *mm)
-{
-	return atomic_read(&mm->mm_users) == 0;
-}
-
-int __khugepaged_enter(struct mm_struct *mm)
-{
-	struct mm_slot *mm_slot;
-	int wakeup;
-
-	mm_slot = alloc_mm_slot();
-	if (!mm_slot)
-		return -ENOMEM;
-
-	/* __khugepaged_exit() must not run from under us */
-	VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
-	if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
-		free_mm_slot(mm_slot);
-		return 0;
-	}
-
-	spin_lock(&khugepaged_mm_lock);
-	insert_to_mm_slots_hash(mm, mm_slot);
-	/*
-	 * Insert just behind the scanning cursor, to let the area settle
-	 * down a little.
-	 */
-	wakeup = list_empty(&khugepaged_scan.mm_head);
-	list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head);
-	spin_unlock(&khugepaged_mm_lock);
-
-	atomic_inc(&mm->mm_count);
-	if (wakeup)
-		wake_up_interruptible(&khugepaged_wait);
-
-	return 0;
-}
-
-int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
-			       unsigned long vm_flags)
-{
-	unsigned long hstart, hend;
-	if (!vma->anon_vma)
-		/*
-		 * Not yet faulted in so we will register later in the
-		 * page fault if needed.
-		 */
-		return 0;
-	if (vma->vm_ops || (vm_flags & VM_NO_THP))
-		/* khugepaged not yet working on file or special mappings */
-		return 0;
-	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
-	hend = vma->vm_end & HPAGE_PMD_MASK;
-	if (hstart < hend)
-		return khugepaged_enter(vma, vm_flags);
-	return 0;
-}
-
-void __khugepaged_exit(struct mm_struct *mm)
-{
-	struct mm_slot *mm_slot;
-	int free = 0;
-
-	spin_lock(&khugepaged_mm_lock);
-	mm_slot = get_mm_slot(mm);
-	if (mm_slot && khugepaged_scan.mm_slot != mm_slot) {
-		hash_del(&mm_slot->hash);
-		list_del(&mm_slot->mm_node);
-		free = 1;
-	}
-	spin_unlock(&khugepaged_mm_lock);
-
-	if (free) {
-		clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
-		free_mm_slot(mm_slot);
-		mmdrop(mm);
-	} else if (mm_slot) {
-		/*
-		 * This is required to serialize against
-		 * khugepaged_test_exit() (which is guaranteed to run
-		 * under mmap sem read mode). Stop here (after we
-		 * return all pagetables will be destroyed) until
-		 * khugepaged has finished working on the pagetables
-		 * under the mmap_sem.
-		 */
-		down_write(&mm->mmap_sem);
-		up_write(&mm->mmap_sem);
-	}
-}
-
-static void release_pte_page(struct page *page)
-{
-	/* 0 stands for page_is_file_cache(page) == false */
-	dec_zone_page_state(page, NR_ISOLATED_ANON + 0);
-	unlock_page(page);
-	putback_lru_page(page);
-}
-
-static void release_pte_pages(pte_t *pte, pte_t *_pte)
-{
-	while (--_pte >= pte) {
-		pte_t pteval = *_pte;
-		if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval)))
-			release_pte_page(pte_page(pteval));
-	}
-}
-
-static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
-					unsigned long address,
-					pte_t *pte)
-{
-	struct page *page = NULL;
-	pte_t *_pte;
-	int none_or_zero = 0, result = 0;
-	bool referenced = false, writable = false;
-
-	for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
-	     _pte++, address += PAGE_SIZE) {
-		pte_t pteval = *_pte;
-		if (pte_none(pteval) || (pte_present(pteval) &&
-				is_zero_pfn(pte_pfn(pteval)))) {
-			if (!userfaultfd_armed(vma) &&
-			    ++none_or_zero <= khugepaged_max_ptes_none) {
-				continue;
-			} else {
-				result = SCAN_EXCEED_NONE_PTE;
-				goto out;
-			}
-		}
-		if (!pte_present(pteval)) {
-			result = SCAN_PTE_NON_PRESENT;
-			goto out;
-		}
-		page = vm_normal_page(vma, address, pteval);
-		if (unlikely(!page)) {
-			result = SCAN_PAGE_NULL;
-			goto out;
-		}
-
-		VM_BUG_ON_PAGE(PageCompound(page), page);
-		VM_BUG_ON_PAGE(!PageAnon(page), page);
-		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
-
-		/*
-		 * We can do it before isolate_lru_page because the
-		 * page can't be freed from under us. NOTE: PG_lock
-		 * is needed to serialize against split_huge_page
-		 * when invoked from the VM.
-		 */
-		if (!trylock_page(page)) {
-			result = SCAN_PAGE_LOCK;
-			goto out;
-		}
-
-		/*
-		 * cannot use mapcount: can't collapse if there's a gup pin.
-		 * The page must only be referenced by the scanned process
-		 * and page swap cache.
-		 */
-		if (page_count(page) != 1 + !!PageSwapCache(page)) {
-			unlock_page(page);
-			result = SCAN_PAGE_COUNT;
-			goto out;
-		}
-		if (pte_write(pteval)) {
-			writable = true;
-		} else {
-			if (PageSwapCache(page) &&
-			    !reuse_swap_page(page, NULL)) {
-				unlock_page(page);
-				result = SCAN_SWAP_CACHE_PAGE;
-				goto out;
-			}
-			/*
-			 * Page is not in the swap cache. It can be collapsed
-			 * into a THP.
-			 */
-		}
-
-		/*
-		 * Isolate the page to avoid collapsing an hugepage
-		 * currently in use by the VM.
-		 */
-		if (isolate_lru_page(page)) {
-			unlock_page(page);
-			result = SCAN_DEL_PAGE_LRU;
-			goto out;
-		}
-		/* 0 stands for page_is_file_cache(page) == false */
-		inc_zone_page_state(page, NR_ISOLATED_ANON + 0);
-		VM_BUG_ON_PAGE(!PageLocked(page), page);
-		VM_BUG_ON_PAGE(PageLRU(page), page);
-
-		/* If there is no mapped pte young don't collapse the page */
-		if (pte_young(pteval) ||
-		    page_is_young(page) || PageReferenced(page) ||
-		    mmu_notifier_test_young(vma->vm_mm, address))
-			referenced = true;
-	}
-	if (likely(writable)) {
-		if (likely(referenced)) {
-			result = SCAN_SUCCEED;
-			trace_mm_collapse_huge_page_isolate(page, none_or_zero,
-							    referenced, writable, result);
-			return 1;
-		}
-	} else {
-		result = SCAN_PAGE_RO;
-	}
-
-out:
-	release_pte_pages(pte, _pte);
-	trace_mm_collapse_huge_page_isolate(page, none_or_zero,
-					    referenced, writable, result);
-	return 0;
-}
-
-static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
-				      struct vm_area_struct *vma,
-				      unsigned long address,
-				      spinlock_t *ptl)
-{
-	pte_t *_pte;
-	for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) {
-		pte_t pteval = *_pte;
-		struct page *src_page;
-
-		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
-			clear_user_highpage(page, address);
-			add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1);
-			if (is_zero_pfn(pte_pfn(pteval))) {
-				/*
-				 * ptl mostly unnecessary.
-				 */
-				spin_lock(ptl);
-				/*
-				 * paravirt calls inside pte_clear here are
-				 * superfluous.
-				 */
-				pte_clear(vma->vm_mm, address, _pte);
-				spin_unlock(ptl);
-			}
-		} else {
-			src_page = pte_page(pteval);
-			copy_user_highpage(page, src_page, address, vma);
-			VM_BUG_ON_PAGE(page_mapcount(src_page) != 1, src_page);
-			release_pte_page(src_page);
-			/*
-			 * ptl mostly unnecessary, but preempt has to
-			 * be disabled to update the per-cpu stats
-			 * inside page_remove_rmap().
-			 */
-			spin_lock(ptl);
-			/*
-			 * paravirt calls inside pte_clear here are
-			 * superfluous.
-			 */
-			pte_clear(vma->vm_mm, address, _pte);
-			page_remove_rmap(src_page, false);
-			spin_unlock(ptl);
-			free_page_and_swap_cache(src_page);
-		}
-
-		address += PAGE_SIZE;
-		page++;
-	}
-}
-
-static void khugepaged_alloc_sleep(void)
-{
-	DEFINE_WAIT(wait);
-
-	add_wait_queue(&khugepaged_wait, &wait);
-	freezable_schedule_timeout_interruptible(
-		msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
-	remove_wait_queue(&khugepaged_wait, &wait);
-}
-
-static int khugepaged_node_load[MAX_NUMNODES];
-
-static bool khugepaged_scan_abort(int nid)
-{
-	int i;
-
-	/*
-	 * If zone_reclaim_mode is disabled, then no extra effort is made to
-	 * allocate memory locally.
-	 */
-	if (!zone_reclaim_mode)
-		return false;
-
-	/* If there is a count for this node already, it must be acceptable */
-	if (khugepaged_node_load[nid])
-		return false;
-
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		if (!khugepaged_node_load[i])
-			continue;
-		if (node_distance(nid, i) > RECLAIM_DISTANCE)
-			return true;
-	}
-	return false;
-}
-
-#ifdef CONFIG_NUMA
-static int khugepaged_find_target_node(void)
-{
-	static int last_khugepaged_target_node = NUMA_NO_NODE;
-	int nid, target_node = 0, max_value = 0;
-
-	/* find first node with max normal pages hit */
-	for (nid = 0; nid < MAX_NUMNODES; nid++)
-		if (khugepaged_node_load[nid] > max_value) {
-			max_value = khugepaged_node_load[nid];
-			target_node = nid;
-		}
-
-	/* do some balance if several nodes have the same hit record */
-	if (target_node <= last_khugepaged_target_node)
-		for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
-				nid++)
-			if (max_value == khugepaged_node_load[nid]) {
-				target_node = nid;
-				break;
-			}
-
-	last_khugepaged_target_node = target_node;
-	return target_node;
-}
-
-static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
-{
-	if (IS_ERR(*hpage)) {
-		if (!*wait)
-			return false;
-
-		*wait = false;
-		*hpage = NULL;
-		khugepaged_alloc_sleep();
-	} else if (*hpage) {
-		put_page(*hpage);
-		*hpage = NULL;
-	}
-
-	return true;
-}
-
-static struct page *
-khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
-		       unsigned long address, int node)
-{
-	VM_BUG_ON_PAGE(*hpage, *hpage);
-
-	/*
-	 * Before allocating the hugepage, release the mmap_sem read lock.
-	 * The allocation can take potentially a long time if it involves
-	 * sync compaction, and we do not need to hold the mmap_sem during
-	 * that. We will recheck the vma after taking it again in write mode.
-	 */
-	up_read(&mm->mmap_sem);
-
-	*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
-	if (unlikely(!*hpage)) {
-		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
-		*hpage = ERR_PTR(-ENOMEM);
-		return NULL;
-	}
-
-	prep_transhuge_page(*hpage);
-	count_vm_event(THP_COLLAPSE_ALLOC);
-	return *hpage;
-}
-#else
-static int khugepaged_find_target_node(void)
-{
-	return 0;
-}
-
-static inline struct page *alloc_khugepaged_hugepage(void)
-{
-	struct page *page;
-
-	page = alloc_pages(alloc_hugepage_khugepaged_gfpmask(),
-			   HPAGE_PMD_ORDER);
-	if (page)
-		prep_transhuge_page(page);
-	return page;
-}
-
-static struct page *khugepaged_alloc_hugepage(bool *wait)
-{
-	struct page *hpage;
-
-	do {
-		hpage = alloc_khugepaged_hugepage();
-		if (!hpage) {
-			count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
-			if (!*wait)
-				return NULL;
-
-			*wait = false;
-			khugepaged_alloc_sleep();
-		} else
-			count_vm_event(THP_COLLAPSE_ALLOC);
-	} while (unlikely(!hpage) && likely(khugepaged_enabled()));
-
-	return hpage;
-}
-
-static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
-{
-	if (!*hpage)
-		*hpage = khugepaged_alloc_hugepage(wait);
-
-	if (unlikely(!*hpage))
-		return false;
-
-	return true;
-}
-
-static struct page *
-khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
-		       unsigned long address, int node)
-{
-	up_read(&mm->mmap_sem);
-	VM_BUG_ON(!*hpage);
-
-	return  *hpage;
-}
-#endif
-
-static bool hugepage_vma_check(struct vm_area_struct *vma)
-{
-	if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-	    (vma->vm_flags & VM_NOHUGEPAGE))
-		return false;
-	if (!vma->anon_vma || vma->vm_ops)
-		return false;
-	if (is_vma_temporary_stack(vma))
-		return false;
-	return !(vma->vm_flags & VM_NO_THP);
-}
-
-static void collapse_huge_page(struct mm_struct *mm,
-				   unsigned long address,
-				   struct page **hpage,
-				   struct vm_area_struct *vma,
-				   int node)
-{
-	pmd_t *pmd, _pmd;
-	pte_t *pte;
-	pgtable_t pgtable;
-	struct page *new_page;
-	spinlock_t *pmd_ptl, *pte_ptl;
-	int isolated = 0, result = 0;
-	unsigned long hstart, hend;
-	struct mem_cgroup *memcg;
-	unsigned long mmun_start;	/* For mmu_notifiers */
-	unsigned long mmun_end;		/* For mmu_notifiers */
-	gfp_t gfp;
-
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
-	/* Only allocate from the target node */
-	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_OTHER_NODE | __GFP_THISNODE;
-
-	/* release the mmap_sem read lock. */
-	new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node);
-	if (!new_page) {
-		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
-		goto out_nolock;
-	}
-
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
-		result = SCAN_CGROUP_CHARGE_FAIL;
-		goto out_nolock;
-	}
-
-	/*
-	 * Prevent all access to pagetables with the exception of
-	 * gup_fast later hanlded by the ptep_clear_flush and the VM
-	 * handled by the anon_vma lock + PG_lock.
-	 */
-	down_write(&mm->mmap_sem);
-	if (unlikely(khugepaged_test_exit(mm))) {
-		result = SCAN_ANY_PROCESS;
-		goto out;
-	}
-
-	vma = find_vma(mm, address);
-	if (!vma) {
-		result = SCAN_VMA_NULL;
-		goto out;
-	}
-	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
-	hend = vma->vm_end & HPAGE_PMD_MASK;
-	if (address < hstart || address + HPAGE_PMD_SIZE > hend) {
-		result = SCAN_ADDRESS_RANGE;
-		goto out;
-	}
-	if (!hugepage_vma_check(vma)) {
-		result = SCAN_VMA_CHECK;
-		goto out;
-	}
-	pmd = mm_find_pmd(mm, address);
-	if (!pmd) {
-		result = SCAN_PMD_NULL;
-		goto out;
-	}
-
-	anon_vma_lock_write(vma->anon_vma);
-
-	pte = pte_offset_map(pmd, address);
-	pte_ptl = pte_lockptr(mm, pmd);
-
-	mmun_start = address;
-	mmun_end   = address + HPAGE_PMD_SIZE;
-	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
-	pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
-	/*
-	 * After this gup_fast can't run anymore. This also removes
-	 * any huge TLB entry from the CPU so we won't allow
-	 * huge and small TLB entries for the same virtual address
-	 * to avoid the risk of CPU bugs in that area.
-	 */
-	_pmd = pmdp_collapse_flush(vma, address, pmd);
-	spin_unlock(pmd_ptl);
-	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-
-	spin_lock(pte_ptl);
-	isolated = __collapse_huge_page_isolate(vma, address, pte);
-	spin_unlock(pte_ptl);
-
-	if (unlikely(!isolated)) {
-		pte_unmap(pte);
-		spin_lock(pmd_ptl);
-		BUG_ON(!pmd_none(*pmd));
-		/*
-		 * We can only use set_pmd_at when establishing
-		 * hugepmds and never for establishing regular pmds that
-		 * points to regular pagetables. Use pmd_populate for that
-		 */
-		pmd_populate(mm, pmd, pmd_pgtable(_pmd));
-		spin_unlock(pmd_ptl);
-		anon_vma_unlock_write(vma->anon_vma);
-		result = SCAN_FAIL;
-		goto out;
-	}
-
-	/*
-	 * All pages are isolated and locked so anon_vma rmap
-	 * can't run anymore.
-	 */
-	anon_vma_unlock_write(vma->anon_vma);
-
-	__collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl);
-	pte_unmap(pte);
-	__SetPageUptodate(new_page);
-	pgtable = pmd_pgtable(_pmd);
-
-	_pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
-	_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
-
-	/*
-	 * spin_lock() below is not the equivalent of smp_wmb(), so
-	 * this is needed to avoid the copy_huge_page writes to become
-	 * visible after the set_pmd_at() write.
-	 */
-	smp_wmb();
-
-	spin_lock(pmd_ptl);
-	BUG_ON(!pmd_none(*pmd));
-	page_add_new_anon_rmap(new_page, vma, address, true);
-	mem_cgroup_commit_charge(new_page, memcg, false, true);
-	lru_cache_add_active_or_unevictable(new_page, vma);
-	pgtable_trans_huge_deposit(mm, pmd, pgtable);
-	set_pmd_at(mm, address, pmd, _pmd);
-	update_mmu_cache_pmd(vma, address, pmd);
-	spin_unlock(pmd_ptl);
-
-	*hpage = NULL;
-
-	khugepaged_pages_collapsed++;
-	result = SCAN_SUCCEED;
-out_up_write:
-	up_write(&mm->mmap_sem);
-	trace_mm_collapse_huge_page(mm, isolated, result);
-	return;
-
-out_nolock:
-	trace_mm_collapse_huge_page(mm, isolated, result);
-	return;
-out:
-	mem_cgroup_cancel_charge(new_page, memcg, true);
-	goto out_up_write;
-}
-
-static int khugepaged_scan_pmd(struct mm_struct *mm,
-			       struct vm_area_struct *vma,
-			       unsigned long address,
-			       struct page **hpage)
-{
-	pmd_t *pmd;
-	pte_t *pte, *_pte;
-	int ret = 0, none_or_zero = 0, result = 0;
-	struct page *page = NULL;
-	unsigned long _address;
-	spinlock_t *ptl;
-	int node = NUMA_NO_NODE;
-	bool writable = false, referenced = false;
-
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
-	pmd = mm_find_pmd(mm, address);
-	if (!pmd) {
-		result = SCAN_PMD_NULL;
-		goto out;
-	}
-
-	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
-	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-	for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
-	     _pte++, _address += PAGE_SIZE) {
-		pte_t pteval = *_pte;
-		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
-			if (!userfaultfd_armed(vma) &&
-			    ++none_or_zero <= khugepaged_max_ptes_none) {
-				continue;
-			} else {
-				result = SCAN_EXCEED_NONE_PTE;
-				goto out_unmap;
-			}
-		}
-		if (!pte_present(pteval)) {
-			result = SCAN_PTE_NON_PRESENT;
-			goto out_unmap;
-		}
-		if (pte_write(pteval))
-			writable = true;
-
-		page = vm_normal_page(vma, _address, pteval);
-		if (unlikely(!page)) {
-			result = SCAN_PAGE_NULL;
-			goto out_unmap;
-		}
-
-		/* TODO: teach khugepaged to collapse THP mapped with pte */
-		if (PageCompound(page)) {
-			result = SCAN_PAGE_COMPOUND;
-			goto out_unmap;
-		}
-
-		/*
-		 * Record which node the original page is from and save this
-		 * information to khugepaged_node_load[].
-		 * Khupaged will allocate hugepage from the node has the max
-		 * hit record.
-		 */
-		node = page_to_nid(page);
-		if (khugepaged_scan_abort(node)) {
-			result = SCAN_SCAN_ABORT;
-			goto out_unmap;
-		}
-		khugepaged_node_load[node]++;
-		if (!PageLRU(page)) {
-			result = SCAN_PAGE_LRU;
-			goto out_unmap;
-		}
-		if (PageLocked(page)) {
-			result = SCAN_PAGE_LOCK;
-			goto out_unmap;
-		}
-		if (!PageAnon(page)) {
-			result = SCAN_PAGE_ANON;
-			goto out_unmap;
-		}
-
-		/*
-		 * cannot use mapcount: can't collapse if there's a gup pin.
-		 * The page must only be referenced by the scanned process
-		 * and page swap cache.
-		 */
-		if (page_count(page) != 1 + !!PageSwapCache(page)) {
-			result = SCAN_PAGE_COUNT;
-			goto out_unmap;
-		}
-		if (pte_young(pteval) ||
-		    page_is_young(page) || PageReferenced(page) ||
-		    mmu_notifier_test_young(vma->vm_mm, address))
-			referenced = true;
-	}
-	if (writable) {
-		if (referenced) {
-			result = SCAN_SUCCEED;
-			ret = 1;
-		} else {
-			result = SCAN_NO_REFERENCED_PAGE;
-		}
-	} else {
-		result = SCAN_PAGE_RO;
-	}
-out_unmap:
-	pte_unmap_unlock(pte, ptl);
-	if (ret) {
-		node = khugepaged_find_target_node();
-		/* collapse_huge_page will return with the mmap_sem released */
-		collapse_huge_page(mm, address, hpage, vma, node);
-	}
-out:
-	trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
-				     none_or_zero, result);
-	return ret;
-}
-
-static void collect_mm_slot(struct mm_slot *mm_slot)
-{
-	struct mm_struct *mm = mm_slot->mm;
-
-	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
-
-	if (khugepaged_test_exit(mm)) {
-		/* free mm_slot */
-		hash_del(&mm_slot->hash);
-		list_del(&mm_slot->mm_node);
-
-		/*
-		 * Not strictly needed because the mm exited already.
-		 *
-		 * clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
-		 */
-
-		/* khugepaged_mm_lock actually not necessary for the below */
-		free_mm_slot(mm_slot);
-		mmdrop(mm);
-	}
-}
-
-static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
-					    struct page **hpage)
-	__releases(&khugepaged_mm_lock)
-	__acquires(&khugepaged_mm_lock)
-{
-	struct mm_slot *mm_slot;
-	struct mm_struct *mm;
-	struct vm_area_struct *vma;
-	int progress = 0;
-
-	VM_BUG_ON(!pages);
-	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
-
-	if (khugepaged_scan.mm_slot)
-		mm_slot = khugepaged_scan.mm_slot;
-	else {
-		mm_slot = list_entry(khugepaged_scan.mm_head.next,
-				     struct mm_slot, mm_node);
-		khugepaged_scan.address = 0;
-		khugepaged_scan.mm_slot = mm_slot;
-	}
-	spin_unlock(&khugepaged_mm_lock);
-
-	mm = mm_slot->mm;
-	down_read(&mm->mmap_sem);
-	if (unlikely(khugepaged_test_exit(mm)))
-		vma = NULL;
-	else
-		vma = find_vma(mm, khugepaged_scan.address);
-
-	progress++;
-	for (; vma; vma = vma->vm_next) {
-		unsigned long hstart, hend;
-
-		cond_resched();
-		if (unlikely(khugepaged_test_exit(mm))) {
-			progress++;
-			break;
-		}
-		if (!hugepage_vma_check(vma)) {
-skip:
-			progress++;
-			continue;
-		}
-		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
-		hend = vma->vm_end & HPAGE_PMD_MASK;
-		if (hstart >= hend)
-			goto skip;
-		if (khugepaged_scan.address > hend)
-			goto skip;
-		if (khugepaged_scan.address < hstart)
-			khugepaged_scan.address = hstart;
-		VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
-
-		while (khugepaged_scan.address < hend) {
-			int ret;
-			cond_resched();
-			if (unlikely(khugepaged_test_exit(mm)))
-				goto breakouterloop;
-
-			VM_BUG_ON(khugepaged_scan.address < hstart ||
-				  khugepaged_scan.address + HPAGE_PMD_SIZE >
-				  hend);
-			ret = khugepaged_scan_pmd(mm, vma,
-						  khugepaged_scan.address,
-						  hpage);
-			/* move to next address */
-			khugepaged_scan.address += HPAGE_PMD_SIZE;
-			progress += HPAGE_PMD_NR;
-			if (ret)
-				/* we released mmap_sem so break loop */
-				goto breakouterloop_mmap_sem;
-			if (progress >= pages)
-				goto breakouterloop;
-		}
-	}
-breakouterloop:
-	up_read(&mm->mmap_sem); /* exit_mmap will destroy ptes after this */
-breakouterloop_mmap_sem:
-
-	spin_lock(&khugepaged_mm_lock);
-	VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);
-	/*
-	 * Release the current mm_slot if this mm is about to die, or
-	 * if we scanned all vmas of this mm.
-	 */
-	if (khugepaged_test_exit(mm) || !vma) {
-		/*
-		 * Make sure that if mm_users is reaching zero while
-		 * khugepaged runs here, khugepaged_exit will find
-		 * mm_slot not pointing to the exiting mm.
-		 */
-		if (mm_slot->mm_node.next != &khugepaged_scan.mm_head) {
-			khugepaged_scan.mm_slot = list_entry(
-				mm_slot->mm_node.next,
-				struct mm_slot, mm_node);
-			khugepaged_scan.address = 0;
-		} else {
-			khugepaged_scan.mm_slot = NULL;
-			khugepaged_full_scans++;
-		}
-
-		collect_mm_slot(mm_slot);
-	}
-
-	return progress;
-}
-
-static int khugepaged_has_work(void)
-{
-	return !list_empty(&khugepaged_scan.mm_head) &&
-		khugepaged_enabled();
-}
-
-static int khugepaged_wait_event(void)
-{
-	return !list_empty(&khugepaged_scan.mm_head) ||
-		kthread_should_stop();
-}
-
-static void khugepaged_do_scan(void)
-{
-	struct page *hpage = NULL;
-	unsigned int progress = 0, pass_through_head = 0;
-	unsigned int pages = khugepaged_pages_to_scan;
-	bool wait = true;
-
-	barrier(); /* write khugepaged_pages_to_scan to local stack */
-
-	while (progress < pages) {
-		if (!khugepaged_prealloc_page(&hpage, &wait))
-			break;
-
-		cond_resched();
-
-		if (unlikely(kthread_should_stop() || try_to_freeze()))
-			break;
-
-		spin_lock(&khugepaged_mm_lock);
-		if (!khugepaged_scan.mm_slot)
-			pass_through_head++;
-		if (khugepaged_has_work() &&
-		    pass_through_head < 2)
-			progress += khugepaged_scan_mm_slot(pages - progress,
-							    &hpage);
-		else
-			progress = pages;
-		spin_unlock(&khugepaged_mm_lock);
-	}
-
-	if (!IS_ERR_OR_NULL(hpage))
-		put_page(hpage);
-}
-
-static bool khugepaged_should_wakeup(void)
-{
-	return kthread_should_stop() ||
-	       time_after_eq(jiffies, khugepaged_sleep_expire);
-}
-
-static void khugepaged_wait_work(void)
-{
-	if (khugepaged_has_work()) {
-		const unsigned long scan_sleep_jiffies =
-			msecs_to_jiffies(khugepaged_scan_sleep_millisecs);
-
-		if (!scan_sleep_jiffies)
-			return;
-
-		khugepaged_sleep_expire = jiffies + scan_sleep_jiffies;
-		wait_event_freezable_timeout(khugepaged_wait,
-					     khugepaged_should_wakeup(),
-					     scan_sleep_jiffies);
-		return;
-	}
-
-	if (khugepaged_enabled())
-		wait_event_freezable(khugepaged_wait, khugepaged_wait_event());
-}
-
-static int khugepaged(void *none)
-{
-	struct mm_slot *mm_slot;
-
-	set_freezable();
-	set_user_nice(current, MAX_NICE);
-
-	while (!kthread_should_stop()) {
-		khugepaged_do_scan();
-		khugepaged_wait_work();
-	}
-
-	spin_lock(&khugepaged_mm_lock);
-	mm_slot = khugepaged_scan.mm_slot;
-	khugepaged_scan.mm_slot = NULL;
-	if (mm_slot)
-		collect_mm_slot(mm_slot);
-	spin_unlock(&khugepaged_mm_lock);
-	return 0;
-}
-
-static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
-		unsigned long haddr, pmd_t *pmd)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	pgtable_t pgtable;
-	pmd_t _pmd;
-	int i;
-
-	/* leave pmd empty until pte is filled */
-	pmdp_huge_clear_flush_notify(vma, haddr, pmd);
-
-	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
-	pmd_populate(mm, &_pmd, pgtable);
-
-	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
-		pte_t *pte, entry;
-		entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
-		entry = pte_mkspecial(entry);
-		pte = pte_offset_map(&_pmd, haddr);
-		VM_BUG_ON(!pte_none(*pte));
-		set_pte_at(mm, haddr, pte, entry);
-		pte_unmap(pte);
-	}
-	smp_wmb(); /* make pte visible before pmd */
-	pmd_populate(mm, pmd, pgtable);
-	put_huge_zero_page();
-}
-
-static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long haddr, bool freeze)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	struct page *page;
-	pgtable_t pgtable;
-	pmd_t _pmd;
-	bool young, write, dirty;
-	unsigned long addr;
-	int i;
-
-	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
-	VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
-	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
-	VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd));
-
-	count_vm_event(THP_SPLIT_PMD);
-
-	if (vma_is_dax(vma)) {
-		pmd_t _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
-		if (is_huge_zero_pmd(_pmd))
-			put_huge_zero_page();
-		return;
-	} else if (is_huge_zero_pmd(*pmd)) {
-		return __split_huge_zero_page_pmd(vma, haddr, pmd);
-	}
-
-	page = pmd_page(*pmd);
-	VM_BUG_ON_PAGE(!page_count(page), page);
-	page_ref_add(page, HPAGE_PMD_NR - 1);
-	write = pmd_write(*pmd);
-	young = pmd_young(*pmd);
-	dirty = pmd_dirty(*pmd);
+	page = pmd_page(*pmd);
+	VM_BUG_ON_PAGE(!page_count(page), page);
+	page_ref_add(page, HPAGE_PMD_NR - 1);
+	write = pmd_write(*pmd);
+	young = pmd_young(*pmd);
+	dirty = pmd_dirty(*pmd);
 
 	pmdp_huge_split_prepare(vma, haddr, pmd);
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -2947,7 +1586,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 
 	if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
 		/* Last compound_mapcount is gone. */
-		__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+		__dec_zone_page_state(page, NR_ANON_THPS);
 		if (TestClearPageDoubleMap(page)) {
 			/* No need in mapcount reference anymore */
 			for (i = 0; i < HPAGE_PMD_NR; i++)
@@ -2989,7 +1628,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 }
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long address, bool freeze)
+		unsigned long address, bool freeze, struct page *page)
 {
 	spinlock_t *ptl;
 	struct mm_struct *mm = vma->vm_mm;
@@ -2997,8 +1636,17 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
 	mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
 	ptl = pmd_lock(mm, pmd);
+
+	/*
+	 * If caller asks to setup a migration entries, we need a page to check
+	 * pmd against. Otherwise we can end up replacing wrong page.
+	 */
+	VM_BUG_ON(freeze && !page);
+	if (page && page != pmd_page(*pmd))
+	        goto out;
+
 	if (pmd_trans_huge(*pmd)) {
-		struct page *page = pmd_page(*pmd);
+		page = pmd_page(*pmd);
 		if (PageMlocked(page))
 			clear_page_mlock(page);
 	} else if (!pmd_devmap(*pmd))
@@ -3025,24 +1673,8 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 		return;
 
 	pmd = pmd_offset(pud, address);
-	if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)))
-		return;
-
-	/*
-	 * If caller asks to setup a migration entries, we need a page to check
-	 * pmd against. Otherwise we can end up replacing wrong page.
-	 */
-	VM_BUG_ON(freeze && !page);
-	if (page && page != pmd_page(*pmd))
-		return;
 
-	/*
-	 * Caller holds the mmap_sem write mode or the anon_vma lock,
-	 * so a huge pmd cannot materialize from under us (khugepaged
-	 * holds both the mmap_sem write mode and the anon_vma lock
-	 * write mode).
-	 */
-	__split_huge_pmd(vma, pmd, address, freeze);
+	__split_huge_pmd(vma, pmd, address, freeze, page);
 }
 
 void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -3088,12 +1720,15 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void freeze_page(struct page *page)
 {
-	enum ttu_flags ttu_flags = TTU_MIGRATION | TTU_IGNORE_MLOCK |
-		TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED;
+	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+		TTU_RMAP_LOCKED;
 	int i, ret;
 
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 
+	if (PageAnon(page))
+		ttu_flags |= TTU_MIGRATION;
+
 	/* We only need TTU_SPLIT_HUGE_PMD once */
 	ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD);
 	for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
@@ -3103,7 +1738,7 @@ static void freeze_page(struct page *page)
 
 		ret = try_to_unmap(page + i, ttu_flags);
 	}
-	VM_BUG_ON(ret);
+	VM_BUG_ON_PAGE(ret, page + i - 1);
 }
 
 static void unfreeze_page(struct page *page)
@@ -3125,15 +1760,20 @@ static void __split_huge_page_tail(struct page *head, int tail,
 	/*
 	 * tail_page->_refcount is zero and not changing from under us. But
 	 * get_page_unless_zero() may be running from under us on the
-	 * tail_page. If we used atomic_set() below instead of atomic_inc(), we
-	 * would then run atomic_set() concurrently with
+	 * tail_page. If we used atomic_set() below instead of atomic_inc() or
+	 * atomic_add(), we would then run atomic_set() concurrently with
 	 * get_page_unless_zero(), and atomic_set() is implemented in C not
 	 * using locked ops. spin_unlock on x86 sometime uses locked ops
 	 * because of PPro errata 66, 92, so unless somebody can guarantee
 	 * atomic_set() here would be safe on all archs (and not only on x86),
-	 * it's safer to use atomic_inc().
+	 * it's safer to use atomic_inc()/atomic_add().
 	 */
-	page_ref_inc(page_tail);
+	if (PageAnon(head)) {
+		page_ref_inc(page_tail);
+	} else {
+		/* Additional pin to radix tree */
+		page_ref_add(page_tail, 2);
+	}
 
 	page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	page_tail->flags |= (head->flags &
@@ -3169,25 +1809,46 @@ static void __split_huge_page_tail(struct page *head, int tail,
 	lru_add_page_tail(head, page_tail, lruvec, list);
 }
 
-static void __split_huge_page(struct page *page, struct list_head *list)
+static void __split_huge_page(struct page *page, struct list_head *list,
+		unsigned long flags)
 {
 	struct page *head = compound_head(page);
 	struct zone *zone = page_zone(head);
 	struct lruvec *lruvec;
+	pgoff_t end = -1;
 	int i;
 
-	/* prevent PageLRU to go away from under us, and freeze lru stats */
-	spin_lock_irq(&zone->lru_lock);
 	lruvec = mem_cgroup_page_lruvec(head, zone);
 
 	/* complete memcg works before add pages to LRU */
 	mem_cgroup_split_huge_fixup(head);
 
-	for (i = HPAGE_PMD_NR - 1; i >= 1; i--)
+	if (!PageAnon(page))
+		end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
+
+	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
 		__split_huge_page_tail(head, i, lruvec, list);
+		/* Some pages can be beyond i_size: drop them from page cache */
+		if (head[i].index >= end) {
+			__ClearPageDirty(head + i);
+			__delete_from_page_cache(head + i, NULL);
+			if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
+				shmem_uncharge(head->mapping->host, 1);
+			put_page(head + i);
+		}
+	}
 
 	ClearPageCompound(head);
-	spin_unlock_irq(&zone->lru_lock);
+	/* See comment in __split_huge_page_tail() */
+	if (PageAnon(head)) {
+		page_ref_inc(head);
+	} else {
+		/* Additional pin to radix tree */
+		page_ref_add(head, 2);
+		spin_unlock(&head->mapping->tree_lock);
+	}
+
+	spin_unlock_irqrestore(&page_zone(head)->lru_lock, flags);
 
 	unfreeze_page(head);
 
@@ -3210,18 +1871,22 @@ static void __split_huge_page(struct page *page, struct list_head *list)
 
 int total_mapcount(struct page *page)
 {
-	int i, ret;
+	int i, compound, ret;
 
 	VM_BUG_ON_PAGE(PageTail(page), page);
 
 	if (likely(!PageCompound(page)))
 		return atomic_read(&page->_mapcount) + 1;
 
-	ret = compound_mapcount(page);
+	compound = compound_mapcount(page);
 	if (PageHuge(page))
-		return ret;
+		return compound;
+	ret = compound;
 	for (i = 0; i < HPAGE_PMD_NR; i++)
 		ret += atomic_read(&page[i]._mapcount) + 1;
+	/* File pages has compound_mapcount included in _mapcount */
+	if (!PageAnon(page))
+		return ret - compound * HPAGE_PMD_NR;
 	if (PageDoubleMap(page))
 		ret -= HPAGE_PMD_NR;
 	return ret;
@@ -3308,36 +1973,54 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct page *head = compound_head(page);
 	struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
-	struct anon_vma *anon_vma;
-	int count, mapcount, ret;
+	struct anon_vma *anon_vma = NULL;
+	struct address_space *mapping = NULL;
+	int count, mapcount, extra_pins, ret;
 	bool mlocked;
 	unsigned long flags;
 
 	VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
-	VM_BUG_ON_PAGE(!PageAnon(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	/*
-	 * The caller does not necessarily hold an mmap_sem that would prevent
-	 * the anon_vma disappearing so we first we take a reference to it
-	 * and then lock the anon_vma for write. This is similar to
-	 * page_lock_anon_vma_read except the write lock is taken to serialise
-	 * against parallel split or collapse operations.
-	 */
-	anon_vma = page_get_anon_vma(head);
-	if (!anon_vma) {
-		ret = -EBUSY;
-		goto out;
+	if (PageAnon(head)) {
+		/*
+		 * The caller does not necessarily hold an mmap_sem that would
+		 * prevent the anon_vma disappearing so we first we take a
+		 * reference to it and then lock the anon_vma for write. This
+		 * is similar to page_lock_anon_vma_read except the write lock
+		 * is taken to serialise against parallel split or collapse
+		 * operations.
+		 */
+		anon_vma = page_get_anon_vma(head);
+		if (!anon_vma) {
+			ret = -EBUSY;
+			goto out;
+		}
+		extra_pins = 0;
+		mapping = NULL;
+		anon_vma_lock_write(anon_vma);
+	} else {
+		mapping = head->mapping;
+
+		/* Truncated ? */
+		if (!mapping) {
+			ret = -EBUSY;
+			goto out;
+		}
+
+		/* Addidional pins from radix tree */
+		extra_pins = HPAGE_PMD_NR;
+		anon_vma = NULL;
+		i_mmap_lock_read(mapping);
 	}
-	anon_vma_lock_write(anon_vma);
 
 	/*
 	 * Racy check if we can split the page, before freeze_page() will
 	 * split PMDs
 	 */
-	if (total_mapcount(head) != page_count(head) - 1) {
+	if (total_mapcount(head) != page_count(head) - extra_pins - 1) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -3350,35 +2033,62 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	if (mlocked)
 		lru_add_drain();
 
+	/* prevent PageLRU to go away from under us, and freeze lru stats */
+	spin_lock_irqsave(&page_zone(head)->lru_lock, flags);
+
+	if (mapping) {
+		void **pslot;
+
+		spin_lock(&mapping->tree_lock);
+		pslot = radix_tree_lookup_slot(&mapping->page_tree,
+				page_index(head));
+		/*
+		 * Check if the head page is present in radix tree.
+		 * We assume all tail are present too, if head is there.
+		 */
+		if (radix_tree_deref_slot_protected(pslot,
+					&mapping->tree_lock) != head)
+			goto fail;
+	}
+
 	/* Prevent deferred_split_scan() touching ->_refcount */
-	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+	spin_lock(&pgdata->split_queue_lock);
 	count = page_count(head);
 	mapcount = total_mapcount(head);
-	if (!mapcount && count == 1) {
+	if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
 		if (!list_empty(page_deferred_list(head))) {
 			pgdata->split_queue_len--;
 			list_del(page_deferred_list(head));
 		}
-		spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
-		__split_huge_page(page, list);
+		if (mapping)
+			__dec_zone_page_state(page, NR_SHMEM_THPS);
+		spin_unlock(&pgdata->split_queue_lock);
+		__split_huge_page(page, list, flags);
 		ret = 0;
-	} else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-		spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
-		pr_alert("total_mapcount: %u, page_count(): %u\n",
-				mapcount, count);
-		if (PageTail(page))
-			dump_page(head, NULL);
-		dump_page(page, "total_mapcount(head) > 0");
-		BUG();
 	} else {
-		spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+		if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
+			pr_alert("total_mapcount: %u, page_count(): %u\n",
+					mapcount, count);
+			if (PageTail(page))
+				dump_page(head, NULL);
+			dump_page(page, "total_mapcount(head) > 0");
+			BUG();
+		}
+		spin_unlock(&pgdata->split_queue_lock);
+fail:		if (mapping)
+			spin_unlock(&mapping->tree_lock);
+		spin_unlock_irqrestore(&page_zone(head)->lru_lock, flags);
 		unfreeze_page(head);
 		ret = -EBUSY;
 	}
 
 out_unlock:
-	anon_vma_unlock_write(anon_vma);
-	put_anon_vma(anon_vma);
+	if (anon_vma) {
+		anon_vma_unlock_write(anon_vma);
+		put_anon_vma(anon_vma);
+	}
+	if (mapping)
+		i_mmap_unlock_read(mapping);
 out:
 	count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
 	return ret;
@@ -3501,8 +2211,7 @@ static int split_huge_pages_set(void *data, u64 val)
 			if (zone != page_zone(page))
 				goto next;
 
-			if (!PageHead(page) || !PageAnon(page) ||
-					PageHuge(page))
+			if (!PageHead(page) || PageHuge(page) || !PageLRU(page))
 				goto next;
 
 			total++;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 388c2bb9b55c..abc1c5fb7222 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1022,7 +1022,9 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 		((node = hstate_next_node_to_free(hs, mask)) || 1);	\
 		nr_nodes--)
 
-#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA))
+#if (defined(CONFIG_X86_64) || defined(CONFIG_S390)) && \
+	((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || \
+	defined(CONFIG_CMA))
 static void destroy_compound_gigantic_page(struct page *page,
 					unsigned int order)
 {
@@ -1030,6 +1032,7 @@ static void destroy_compound_gigantic_page(struct page *page,
 	int nr_pages = 1 << order;
 	struct page *p = page + 1;
 
+	atomic_set(compound_mapcount_ptr(page), 0);
 	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
 		clear_compound_head(p);
 		set_page_refcounted(p);
@@ -3176,7 +3179,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			    unsigned long start, unsigned long end,
 			    struct page *ref_page)
 {
-	int force_flush = 0;
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
 	pte_t *ptep;
@@ -3195,19 +3197,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	tlb_start_vma(tlb, vma);
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	address = start;
-again:
 	for (; address < end; address += sz) {
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
 
 		ptl = huge_pte_lock(h, mm, ptep);
-		if (huge_pmd_unshare(mm, &address, ptep))
-			goto unlock;
+		if (huge_pmd_unshare(mm, &address, ptep)) {
+			spin_unlock(ptl);
+			continue;
+		}
 
 		pte = huge_ptep_get(ptep);
-		if (huge_pte_none(pte))
-			goto unlock;
+		if (huge_pte_none(pte)) {
+			spin_unlock(ptl);
+			continue;
+		}
 
 		/*
 		 * Migrating hugepage or HWPoisoned hugepage is already
@@ -3215,7 +3220,8 @@ again:
 		 */
 		if (unlikely(!pte_present(pte))) {
 			huge_pte_clear(mm, address, ptep);
-			goto unlock;
+			spin_unlock(ptl);
+			continue;
 		}
 
 		page = pte_page(pte);
@@ -3225,9 +3231,10 @@ again:
 		 * are about to unmap is the actual page of interest.
 		 */
 		if (ref_page) {
-			if (page != ref_page)
-				goto unlock;
-
+			if (page != ref_page) {
+				spin_unlock(ptl);
+				continue;
+			}
 			/*
 			 * Mark the VMA as having unmapped its page so that
 			 * future faults in this VMA will fail rather than
@@ -3243,30 +3250,14 @@ again:
 
 		hugetlb_count_sub(pages_per_huge_page(h), mm);
 		page_remove_rmap(page, true);
-		force_flush = !__tlb_remove_page(tlb, page);
-		if (force_flush) {
-			address += sz;
-			spin_unlock(ptl);
-			break;
-		}
-		/* Bail out after unmapping reference page if supplied */
-		if (ref_page) {
-			spin_unlock(ptl);
-			break;
-		}
-unlock:
+
 		spin_unlock(ptl);
-	}
-	/*
-	 * mmu_gather ran out of room to batch pages, we break out of
-	 * the PTE lock to avoid doing the potential expensive TLB invalidate
-	 * and page-free while holding it.
-	 */
-	if (force_flush) {
-		force_flush = 0;
-		tlb_flush_mmu(tlb);
-		if (address < end && !ref_page)
-			goto again;
+		tlb_remove_page_size(tlb, page, huge_page_size(h));
+		/*
+		 * Bail out after unmapping reference page if supplied
+		 */
+		if (ref_page)
+			break;
 	}
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	tlb_end_vma(tlb, vma);
@@ -3382,7 +3373,7 @@ retry_avoidcopy:
 	/* If no-one else is actually using this page, avoid the copy
 	 * and just make the page writable */
 	if (page_mapcount(old_page) == 1 && PageAnon(old_page)) {
-		page_move_anon_rmap(old_page, vma, address);
+		page_move_anon_rmap(old_page, vma);
 		set_huge_ptep_writable(vma, address, ptep);
 		return 0;
 	}
@@ -4228,7 +4219,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 		if (saddr) {
 			spte = huge_pte_offset(svma->vm_mm, saddr);
 			if (spte) {
-				mm_inc_nr_pmds(mm);
 				get_page(virt_to_page(spte));
 				break;
 			}
@@ -4243,9 +4233,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	if (pud_none(*pud)) {
 		pud_populate(mm, pud,
 				(pmd_t *)((unsigned long)spte & PAGE_MASK));
+		mm_inc_nr_pmds(mm);
 	} else {
 		put_page(virt_to_page(spte));
-		mm_inc_nr_pmds(mm);
 	}
 	spin_unlock(ptl);
 out:
diff --git a/mm/internal.h b/mm/internal.h
index a37e5b6f9d25..9b6a6c43ac39 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,7 +24,8 @@
  */
 #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
-			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
+			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
+			__GFP_ATOMIC)
 
 /* The GFP flags allowed during early boot */
 #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
@@ -35,6 +36,8 @@
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
+int do_swap_page(struct fault_env *fe, pte_t orig_pte);
+
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
@@ -149,6 +152,8 @@ extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
 					unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned int order);
+extern void post_alloc_hook(struct page *page, unsigned int order,
+					gfp_t gfp_flags);
 extern int user_min_free_kbytes;
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 28439acda6ec..6845f9294696 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -508,7 +508,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
 	kasan_kmalloc(cache, object, cache->object_size, flags);
 }
 
-void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
+static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
 {
 	unsigned long size = cache->object_size;
 	unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
@@ -626,7 +626,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags)
 		kasan_kmalloc(page->slab_cache, object, size, flags);
 }
 
-void kasan_kfree(void *ptr)
+void kasan_poison_kfree(void *ptr)
 {
 	struct page *page;
 
@@ -636,7 +636,7 @@ void kasan_kfree(void *ptr)
 		kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
 				KASAN_FREE_PAGE);
 	else
-		kasan_slab_free(page->slab_cache, ptr);
+		kasan_poison_slab_free(page->slab_cache, ptr);
 }
 
 void kasan_kfree_large(const void *ptr)
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 4973505a9bdd..65793f150d1f 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -238,30 +238,23 @@ static void qlist_move_cache(struct qlist_head *from,
 				   struct qlist_head *to,
 				   struct kmem_cache *cache)
 {
-	struct qlist_node *prev = NULL, *curr;
+	struct qlist_node *curr;
 
 	if (unlikely(qlist_empty(from)))
 		return;
 
 	curr = from->head;
+	qlist_init(from);
 	while (curr) {
-		struct qlist_node *qlink = curr;
-		struct kmem_cache *obj_cache = qlink_to_cache(qlink);
-
-		if (obj_cache == cache) {
-			if (unlikely(from->head == qlink)) {
-				from->head = curr->next;
-				prev = curr;
-			} else
-				prev->next = curr->next;
-			if (unlikely(from->tail == qlink))
-				from->tail = curr->next;
-			from->bytes -= cache->size;
-			qlist_put(to, qlink, cache->size);
-		} else {
-			prev = curr;
-		}
-		curr = curr->next;
+		struct qlist_node *next = curr->next;
+		struct kmem_cache *obj_cache = qlink_to_cache(curr);
+
+		if (obj_cache == cache)
+			qlist_put(to, curr, obj_cache->size);
+		else
+			qlist_put(from, curr, obj_cache->size);
+
+		curr = next;
 	}
 }
 
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
new file mode 100644
index 000000000000..7dbee698d6aa
--- /dev/null
+++ b/mm/khugepaged.c
@@ -0,0 +1,1922 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/mmu_notifier.h>
+#include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/mm_inline.h>
+#include <linux/kthread.h>
+#include <linux/khugepaged.h>
+#include <linux/freezer.h>
+#include <linux/mman.h>
+#include <linux/hashtable.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/page_idle.h>
+#include <linux/swapops.h>
+#include <linux/shmem_fs.h>
+
+#include <asm/tlb.h>
+#include <asm/pgalloc.h>
+#include "internal.h"
+
+enum scan_result {
+	SCAN_FAIL,
+	SCAN_SUCCEED,
+	SCAN_PMD_NULL,
+	SCAN_EXCEED_NONE_PTE,
+	SCAN_PTE_NON_PRESENT,
+	SCAN_PAGE_RO,
+	SCAN_LACK_REFERENCED_PAGE,
+	SCAN_PAGE_NULL,
+	SCAN_SCAN_ABORT,
+	SCAN_PAGE_COUNT,
+	SCAN_PAGE_LRU,
+	SCAN_PAGE_LOCK,
+	SCAN_PAGE_ANON,
+	SCAN_PAGE_COMPOUND,
+	SCAN_ANY_PROCESS,
+	SCAN_VMA_NULL,
+	SCAN_VMA_CHECK,
+	SCAN_ADDRESS_RANGE,
+	SCAN_SWAP_CACHE_PAGE,
+	SCAN_DEL_PAGE_LRU,
+	SCAN_ALLOC_HUGE_PAGE_FAIL,
+	SCAN_CGROUP_CHARGE_FAIL,
+	SCAN_EXCEED_SWAP_PTE,
+	SCAN_TRUNCATED,
+};
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/huge_memory.h>
+
+/* default scan 8*512 pte (or vmas) every 30 second */
+static unsigned int khugepaged_pages_to_scan __read_mostly;
+static unsigned int khugepaged_pages_collapsed;
+static unsigned int khugepaged_full_scans;
+static unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000;
+/* during fragmentation poll the hugepage allocator once every minute */
+static unsigned int khugepaged_alloc_sleep_millisecs __read_mostly = 60000;
+static unsigned long khugepaged_sleep_expire;
+static DEFINE_SPINLOCK(khugepaged_mm_lock);
+static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
+/*
+ * default collapse hugepages if there is at least one pte mapped like
+ * it would have happened if the vma was large enough during page
+ * fault.
+ */
+static unsigned int khugepaged_max_ptes_none __read_mostly;
+static unsigned int khugepaged_max_ptes_swap __read_mostly;
+
+#define MM_SLOTS_HASH_BITS 10
+static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
+
+static struct kmem_cache *mm_slot_cache __read_mostly;
+
+/**
+ * struct mm_slot - hash lookup from mm to mm_slot
+ * @hash: hash collision list
+ * @mm_node: khugepaged scan list headed in khugepaged_scan.mm_head
+ * @mm: the mm that this information is valid for
+ */
+struct mm_slot {
+	struct hlist_node hash;
+	struct list_head mm_node;
+	struct mm_struct *mm;
+};
+
+/**
+ * struct khugepaged_scan - cursor for scanning
+ * @mm_head: the head of the mm list to scan
+ * @mm_slot: the current mm_slot we are scanning
+ * @address: the next address inside that to be scanned
+ *
+ * There is only the one khugepaged_scan instance of this cursor structure.
+ */
+struct khugepaged_scan {
+	struct list_head mm_head;
+	struct mm_slot *mm_slot;
+	unsigned long address;
+};
+
+static struct khugepaged_scan khugepaged_scan = {
+	.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
+};
+
+static ssize_t scan_sleep_millisecs_show(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 char *buf)
+{
+	return sprintf(buf, "%u\n", khugepaged_scan_sleep_millisecs);
+}
+
+static ssize_t scan_sleep_millisecs_store(struct kobject *kobj,
+					  struct kobj_attribute *attr,
+					  const char *buf, size_t count)
+{
+	unsigned long msecs;
+	int err;
+
+	err = kstrtoul(buf, 10, &msecs);
+	if (err || msecs > UINT_MAX)
+		return -EINVAL;
+
+	khugepaged_scan_sleep_millisecs = msecs;
+	khugepaged_sleep_expire = 0;
+	wake_up_interruptible(&khugepaged_wait);
+
+	return count;
+}
+static struct kobj_attribute scan_sleep_millisecs_attr =
+	__ATTR(scan_sleep_millisecs, 0644, scan_sleep_millisecs_show,
+	       scan_sleep_millisecs_store);
+
+static ssize_t alloc_sleep_millisecs_show(struct kobject *kobj,
+					  struct kobj_attribute *attr,
+					  char *buf)
+{
+	return sprintf(buf, "%u\n", khugepaged_alloc_sleep_millisecs);
+}
+
+static ssize_t alloc_sleep_millisecs_store(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   const char *buf, size_t count)
+{
+	unsigned long msecs;
+	int err;
+
+	err = kstrtoul(buf, 10, &msecs);
+	if (err || msecs > UINT_MAX)
+		return -EINVAL;
+
+	khugepaged_alloc_sleep_millisecs = msecs;
+	khugepaged_sleep_expire = 0;
+	wake_up_interruptible(&khugepaged_wait);
+
+	return count;
+}
+static struct kobj_attribute alloc_sleep_millisecs_attr =
+	__ATTR(alloc_sleep_millisecs, 0644, alloc_sleep_millisecs_show,
+	       alloc_sleep_millisecs_store);
+
+static ssize_t pages_to_scan_show(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  char *buf)
+{
+	return sprintf(buf, "%u\n", khugepaged_pages_to_scan);
+}
+static ssize_t pages_to_scan_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int err;
+	unsigned long pages;
+
+	err = kstrtoul(buf, 10, &pages);
+	if (err || !pages || pages > UINT_MAX)
+		return -EINVAL;
+
+	khugepaged_pages_to_scan = pages;
+
+	return count;
+}
+static struct kobj_attribute pages_to_scan_attr =
+	__ATTR(pages_to_scan, 0644, pages_to_scan_show,
+	       pages_to_scan_store);
+
+static ssize_t pages_collapsed_show(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    char *buf)
+{
+	return sprintf(buf, "%u\n", khugepaged_pages_collapsed);
+}
+static struct kobj_attribute pages_collapsed_attr =
+	__ATTR_RO(pages_collapsed);
+
+static ssize_t full_scans_show(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       char *buf)
+{
+	return sprintf(buf, "%u\n", khugepaged_full_scans);
+}
+static struct kobj_attribute full_scans_attr =
+	__ATTR_RO(full_scans);
+
+static ssize_t khugepaged_defrag_show(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *buf)
+{
+	return single_hugepage_flag_show(kobj, attr, buf,
+				TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
+}
+static ssize_t khugepaged_defrag_store(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       const char *buf, size_t count)
+{
+	return single_hugepage_flag_store(kobj, attr, buf, count,
+				 TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
+}
+static struct kobj_attribute khugepaged_defrag_attr =
+	__ATTR(defrag, 0644, khugepaged_defrag_show,
+	       khugepaged_defrag_store);
+
+/*
+ * max_ptes_none controls if khugepaged should collapse hugepages over
+ * any unmapped ptes in turn potentially increasing the memory
+ * footprint of the vmas. When max_ptes_none is 0 khugepaged will not
+ * reduce the available free memory in the system as it
+ * runs. Increasing max_ptes_none will instead potentially reduce the
+ * free memory in the system during the khugepaged scan.
+ */
+static ssize_t khugepaged_max_ptes_none_show(struct kobject *kobj,
+					     struct kobj_attribute *attr,
+					     char *buf)
+{
+	return sprintf(buf, "%u\n", khugepaged_max_ptes_none);
+}
+static ssize_t khugepaged_max_ptes_none_store(struct kobject *kobj,
+					      struct kobj_attribute *attr,
+					      const char *buf, size_t count)
+{
+	int err;
+	unsigned long max_ptes_none;
+
+	err = kstrtoul(buf, 10, &max_ptes_none);
+	if (err || max_ptes_none > HPAGE_PMD_NR-1)
+		return -EINVAL;
+
+	khugepaged_max_ptes_none = max_ptes_none;
+
+	return count;
+}
+static struct kobj_attribute khugepaged_max_ptes_none_attr =
+	__ATTR(max_ptes_none, 0644, khugepaged_max_ptes_none_show,
+	       khugepaged_max_ptes_none_store);
+
+static ssize_t khugepaged_max_ptes_swap_show(struct kobject *kobj,
+					     struct kobj_attribute *attr,
+					     char *buf)
+{
+	return sprintf(buf, "%u\n", khugepaged_max_ptes_swap);
+}
+
+static ssize_t khugepaged_max_ptes_swap_store(struct kobject *kobj,
+					      struct kobj_attribute *attr,
+					      const char *buf, size_t count)
+{
+	int err;
+	unsigned long max_ptes_swap;
+
+	err  = kstrtoul(buf, 10, &max_ptes_swap);
+	if (err || max_ptes_swap > HPAGE_PMD_NR-1)
+		return -EINVAL;
+
+	khugepaged_max_ptes_swap = max_ptes_swap;
+
+	return count;
+}
+
+static struct kobj_attribute khugepaged_max_ptes_swap_attr =
+	__ATTR(max_ptes_swap, 0644, khugepaged_max_ptes_swap_show,
+	       khugepaged_max_ptes_swap_store);
+
+static struct attribute *khugepaged_attr[] = {
+	&khugepaged_defrag_attr.attr,
+	&khugepaged_max_ptes_none_attr.attr,
+	&pages_to_scan_attr.attr,
+	&pages_collapsed_attr.attr,
+	&full_scans_attr.attr,
+	&scan_sleep_millisecs_attr.attr,
+	&alloc_sleep_millisecs_attr.attr,
+	&khugepaged_max_ptes_swap_attr.attr,
+	NULL,
+};
+
+struct attribute_group khugepaged_attr_group = {
+	.attrs = khugepaged_attr,
+	.name = "khugepaged",
+};
+
+#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB)
+
+int hugepage_madvise(struct vm_area_struct *vma,
+		     unsigned long *vm_flags, int advice)
+{
+	switch (advice) {
+	case MADV_HUGEPAGE:
+#ifdef CONFIG_S390
+		/*
+		 * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
+		 * can't handle this properly after s390_enable_sie, so we simply
+		 * ignore the madvise to prevent qemu from causing a SIGSEGV.
+		 */
+		if (mm_has_pgste(vma->vm_mm))
+			return 0;
+#endif
+		*vm_flags &= ~VM_NOHUGEPAGE;
+		*vm_flags |= VM_HUGEPAGE;
+		/*
+		 * If the vma become good for khugepaged to scan,
+		 * register it here without waiting a page fault that
+		 * may not happen any time soon.
+		 */
+		if (!(*vm_flags & VM_NO_KHUGEPAGED) &&
+				khugepaged_enter_vma_merge(vma, *vm_flags))
+			return -ENOMEM;
+		break;
+	case MADV_NOHUGEPAGE:
+		*vm_flags &= ~VM_HUGEPAGE;
+		*vm_flags |= VM_NOHUGEPAGE;
+		/*
+		 * Setting VM_NOHUGEPAGE will prevent khugepaged from scanning
+		 * this vma even if we leave the mm registered in khugepaged if
+		 * it got registered before VM_NOHUGEPAGE was set.
+		 */
+		break;
+	}
+
+	return 0;
+}
+
+int __init khugepaged_init(void)
+{
+	mm_slot_cache = kmem_cache_create("khugepaged_mm_slot",
+					  sizeof(struct mm_slot),
+					  __alignof__(struct mm_slot), 0, NULL);
+	if (!mm_slot_cache)
+		return -ENOMEM;
+
+	khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
+	khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
+	khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8;
+
+	return 0;
+}
+
+void __init khugepaged_destroy(void)
+{
+	kmem_cache_destroy(mm_slot_cache);
+}
+
+static inline struct mm_slot *alloc_mm_slot(void)
+{
+	if (!mm_slot_cache)	/* initialization failed */
+		return NULL;
+	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
+}
+
+static inline void free_mm_slot(struct mm_slot *mm_slot)
+{
+	kmem_cache_free(mm_slot_cache, mm_slot);
+}
+
+static struct mm_slot *get_mm_slot(struct mm_struct *mm)
+{
+	struct mm_slot *mm_slot;
+
+	hash_for_each_possible(mm_slots_hash, mm_slot, hash, (unsigned long)mm)
+		if (mm == mm_slot->mm)
+			return mm_slot;
+
+	return NULL;
+}
+
+static void insert_to_mm_slots_hash(struct mm_struct *mm,
+				    struct mm_slot *mm_slot)
+{
+	mm_slot->mm = mm;
+	hash_add(mm_slots_hash, &mm_slot->hash, (long)mm);
+}
+
+static inline int khugepaged_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+int __khugepaged_enter(struct mm_struct *mm)
+{
+	struct mm_slot *mm_slot;
+	int wakeup;
+
+	mm_slot = alloc_mm_slot();
+	if (!mm_slot)
+		return -ENOMEM;
+
+	/* __khugepaged_exit() must not run from under us */
+	VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
+	if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
+		free_mm_slot(mm_slot);
+		return 0;
+	}
+
+	spin_lock(&khugepaged_mm_lock);
+	insert_to_mm_slots_hash(mm, mm_slot);
+	/*
+	 * Insert just behind the scanning cursor, to let the area settle
+	 * down a little.
+	 */
+	wakeup = list_empty(&khugepaged_scan.mm_head);
+	list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head);
+	spin_unlock(&khugepaged_mm_lock);
+
+	atomic_inc(&mm->mm_count);
+	if (wakeup)
+		wake_up_interruptible(&khugepaged_wait);
+
+	return 0;
+}
+
+int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+			       unsigned long vm_flags)
+{
+	unsigned long hstart, hend;
+	if (!vma->anon_vma)
+		/*
+		 * Not yet faulted in so we will register later in the
+		 * page fault if needed.
+		 */
+		return 0;
+	if (vma->vm_ops || (vm_flags & VM_NO_KHUGEPAGED))
+		/* khugepaged not yet working on file or special mappings */
+		return 0;
+	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
+	hend = vma->vm_end & HPAGE_PMD_MASK;
+	if (hstart < hend)
+		return khugepaged_enter(vma, vm_flags);
+	return 0;
+}
+
+void __khugepaged_exit(struct mm_struct *mm)
+{
+	struct mm_slot *mm_slot;
+	int free = 0;
+
+	spin_lock(&khugepaged_mm_lock);
+	mm_slot = get_mm_slot(mm);
+	if (mm_slot && khugepaged_scan.mm_slot != mm_slot) {
+		hash_del(&mm_slot->hash);
+		list_del(&mm_slot->mm_node);
+		free = 1;
+	}
+	spin_unlock(&khugepaged_mm_lock);
+
+	if (free) {
+		clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
+		free_mm_slot(mm_slot);
+		mmdrop(mm);
+	} else if (mm_slot) {
+		/*
+		 * This is required to serialize against
+		 * khugepaged_test_exit() (which is guaranteed to run
+		 * under mmap sem read mode). Stop here (after we
+		 * return all pagetables will be destroyed) until
+		 * khugepaged has finished working on the pagetables
+		 * under the mmap_sem.
+		 */
+		down_write(&mm->mmap_sem);
+		up_write(&mm->mmap_sem);
+	}
+}
+
+static void release_pte_page(struct page *page)
+{
+	/* 0 stands for page_is_file_cache(page) == false */
+	dec_zone_page_state(page, NR_ISOLATED_ANON + 0);
+	unlock_page(page);
+	putback_lru_page(page);
+}
+
+static void release_pte_pages(pte_t *pte, pte_t *_pte)
+{
+	while (--_pte >= pte) {
+		pte_t pteval = *_pte;
+		if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval)))
+			release_pte_page(pte_page(pteval));
+	}
+}
+
+static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
+					unsigned long address,
+					pte_t *pte)
+{
+	struct page *page = NULL;
+	pte_t *_pte;
+	int none_or_zero = 0, result = 0, referenced = 0;
+	bool writable = false;
+
+	for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
+	     _pte++, address += PAGE_SIZE) {
+		pte_t pteval = *_pte;
+		if (pte_none(pteval) || (pte_present(pteval) &&
+				is_zero_pfn(pte_pfn(pteval)))) {
+			if (!userfaultfd_armed(vma) &&
+			    ++none_or_zero <= khugepaged_max_ptes_none) {
+				continue;
+			} else {
+				result = SCAN_EXCEED_NONE_PTE;
+				goto out;
+			}
+		}
+		if (!pte_present(pteval)) {
+			result = SCAN_PTE_NON_PRESENT;
+			goto out;
+		}
+		page = vm_normal_page(vma, address, pteval);
+		if (unlikely(!page)) {
+			result = SCAN_PAGE_NULL;
+			goto out;
+		}
+
+		VM_BUG_ON_PAGE(PageCompound(page), page);
+		VM_BUG_ON_PAGE(!PageAnon(page), page);
+		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
+
+		/*
+		 * We can do it before isolate_lru_page because the
+		 * page can't be freed from under us. NOTE: PG_lock
+		 * is needed to serialize against split_huge_page
+		 * when invoked from the VM.
+		 */
+		if (!trylock_page(page)) {
+			result = SCAN_PAGE_LOCK;
+			goto out;
+		}
+
+		/*
+		 * cannot use mapcount: can't collapse if there's a gup pin.
+		 * The page must only be referenced by the scanned process
+		 * and page swap cache.
+		 */
+		if (page_count(page) != 1 + !!PageSwapCache(page)) {
+			unlock_page(page);
+			result = SCAN_PAGE_COUNT;
+			goto out;
+		}
+		if (pte_write(pteval)) {
+			writable = true;
+		} else {
+			if (PageSwapCache(page) &&
+			    !reuse_swap_page(page, NULL)) {
+				unlock_page(page);
+				result = SCAN_SWAP_CACHE_PAGE;
+				goto out;
+			}
+			/*
+			 * Page is not in the swap cache. It can be collapsed
+			 * into a THP.
+			 */
+		}
+
+		/*
+		 * Isolate the page to avoid collapsing an hugepage
+		 * currently in use by the VM.
+		 */
+		if (isolate_lru_page(page)) {
+			unlock_page(page);
+			result = SCAN_DEL_PAGE_LRU;
+			goto out;
+		}
+		/* 0 stands for page_is_file_cache(page) == false */
+		inc_zone_page_state(page, NR_ISOLATED_ANON + 0);
+		VM_BUG_ON_PAGE(!PageLocked(page), page);
+		VM_BUG_ON_PAGE(PageLRU(page), page);
+
+		/* There should be enough young pte to collapse the page */
+		if (pte_young(pteval) ||
+		    page_is_young(page) || PageReferenced(page) ||
+		    mmu_notifier_test_young(vma->vm_mm, address))
+			referenced++;
+	}
+	if (likely(writable)) {
+		if (likely(referenced)) {
+			result = SCAN_SUCCEED;
+			trace_mm_collapse_huge_page_isolate(page, none_or_zero,
+							    referenced, writable, result);
+			return 1;
+		}
+	} else {
+		result = SCAN_PAGE_RO;
+	}
+
+out:
+	release_pte_pages(pte, _pte);
+	trace_mm_collapse_huge_page_isolate(page, none_or_zero,
+					    referenced, writable, result);
+	return 0;
+}
+
+static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
+				      struct vm_area_struct *vma,
+				      unsigned long address,
+				      spinlock_t *ptl)
+{
+	pte_t *_pte;
+	for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) {
+		pte_t pteval = *_pte;
+		struct page *src_page;
+
+		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
+			clear_user_highpage(page, address);
+			add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1);
+			if (is_zero_pfn(pte_pfn(pteval))) {
+				/*
+				 * ptl mostly unnecessary.
+				 */
+				spin_lock(ptl);
+				/*
+				 * paravirt calls inside pte_clear here are
+				 * superfluous.
+				 */
+				pte_clear(vma->vm_mm, address, _pte);
+				spin_unlock(ptl);
+			}
+		} else {
+			src_page = pte_page(pteval);
+			copy_user_highpage(page, src_page, address, vma);
+			VM_BUG_ON_PAGE(page_mapcount(src_page) != 1, src_page);
+			release_pte_page(src_page);
+			/*
+			 * ptl mostly unnecessary, but preempt has to
+			 * be disabled to update the per-cpu stats
+			 * inside page_remove_rmap().
+			 */
+			spin_lock(ptl);
+			/*
+			 * paravirt calls inside pte_clear here are
+			 * superfluous.
+			 */
+			pte_clear(vma->vm_mm, address, _pte);
+			page_remove_rmap(src_page, false);
+			spin_unlock(ptl);
+			free_page_and_swap_cache(src_page);
+		}
+
+		address += PAGE_SIZE;
+		page++;
+	}
+}
+
+static void khugepaged_alloc_sleep(void)
+{
+	DEFINE_WAIT(wait);
+
+	add_wait_queue(&khugepaged_wait, &wait);
+	freezable_schedule_timeout_interruptible(
+		msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
+	remove_wait_queue(&khugepaged_wait, &wait);
+}
+
+static int khugepaged_node_load[MAX_NUMNODES];
+
+static bool khugepaged_scan_abort(int nid)
+{
+	int i;
+
+	/*
+	 * If zone_reclaim_mode is disabled, then no extra effort is made to
+	 * allocate memory locally.
+	 */
+	if (!zone_reclaim_mode)
+		return false;
+
+	/* If there is a count for this node already, it must be acceptable */
+	if (khugepaged_node_load[nid])
+		return false;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		if (!khugepaged_node_load[i])
+			continue;
+		if (node_distance(nid, i) > RECLAIM_DISTANCE)
+			return true;
+	}
+	return false;
+}
+
+/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */
+static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
+{
+	return GFP_TRANSHUGE | (khugepaged_defrag() ? __GFP_DIRECT_RECLAIM : 0);
+}
+
+#ifdef CONFIG_NUMA
+static int khugepaged_find_target_node(void)
+{
+	static int last_khugepaged_target_node = NUMA_NO_NODE;
+	int nid, target_node = 0, max_value = 0;
+
+	/* find first node with max normal pages hit */
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		if (khugepaged_node_load[nid] > max_value) {
+			max_value = khugepaged_node_load[nid];
+			target_node = nid;
+		}
+
+	/* do some balance if several nodes have the same hit record */
+	if (target_node <= last_khugepaged_target_node)
+		for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
+				nid++)
+			if (max_value == khugepaged_node_load[nid]) {
+				target_node = nid;
+				break;
+			}
+
+	last_khugepaged_target_node = target_node;
+	return target_node;
+}
+
+static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
+{
+	if (IS_ERR(*hpage)) {
+		if (!*wait)
+			return false;
+
+		*wait = false;
+		*hpage = NULL;
+		khugepaged_alloc_sleep();
+	} else if (*hpage) {
+		put_page(*hpage);
+		*hpage = NULL;
+	}
+
+	return true;
+}
+
+static struct page *
+khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
+{
+	VM_BUG_ON_PAGE(*hpage, *hpage);
+
+	*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
+	if (unlikely(!*hpage)) {
+		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
+		*hpage = ERR_PTR(-ENOMEM);
+		return NULL;
+	}
+
+	prep_transhuge_page(*hpage);
+	count_vm_event(THP_COLLAPSE_ALLOC);
+	return *hpage;
+}
+#else
+static int khugepaged_find_target_node(void)
+{
+	return 0;
+}
+
+static inline struct page *alloc_khugepaged_hugepage(void)
+{
+	struct page *page;
+
+	page = alloc_pages(alloc_hugepage_khugepaged_gfpmask(),
+			   HPAGE_PMD_ORDER);
+	if (page)
+		prep_transhuge_page(page);
+	return page;
+}
+
+static struct page *khugepaged_alloc_hugepage(bool *wait)
+{
+	struct page *hpage;
+
+	do {
+		hpage = alloc_khugepaged_hugepage();
+		if (!hpage) {
+			count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
+			if (!*wait)
+				return NULL;
+
+			*wait = false;
+			khugepaged_alloc_sleep();
+		} else
+			count_vm_event(THP_COLLAPSE_ALLOC);
+	} while (unlikely(!hpage) && likely(khugepaged_enabled()));
+
+	return hpage;
+}
+
+static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
+{
+	if (!*hpage)
+		*hpage = khugepaged_alloc_hugepage(wait);
+
+	if (unlikely(!*hpage))
+		return false;
+
+	return true;
+}
+
+static struct page *
+khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
+{
+	VM_BUG_ON(!*hpage);
+
+	return  *hpage;
+}
+#endif
+
+static bool hugepage_vma_check(struct vm_area_struct *vma)
+{
+	if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
+	    (vma->vm_flags & VM_NOHUGEPAGE))
+		return false;
+	if (shmem_file(vma->vm_file)) {
+		if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
+			return false;
+		return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
+				HPAGE_PMD_NR);
+	}
+	if (!vma->anon_vma || vma->vm_ops)
+		return false;
+	if (is_vma_temporary_stack(vma))
+		return false;
+	return !(vma->vm_flags & VM_NO_KHUGEPAGED);
+}
+
+/*
+ * If mmap_sem temporarily dropped, revalidate vma
+ * before taking mmap_sem.
+ * Return 0 if succeeds, otherwise return none-zero
+ * value (scan code).
+ */
+
+static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
+{
+	struct vm_area_struct *vma;
+	unsigned long hstart, hend;
+
+	if (unlikely(khugepaged_test_exit(mm)))
+		return SCAN_ANY_PROCESS;
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		return SCAN_VMA_NULL;
+
+	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
+	hend = vma->vm_end & HPAGE_PMD_MASK;
+	if (address < hstart || address + HPAGE_PMD_SIZE > hend)
+		return SCAN_ADDRESS_RANGE;
+	if (!hugepage_vma_check(vma))
+		return SCAN_VMA_CHECK;
+	return 0;
+}
+
+/*
+ * Bring missing pages in from swap, to complete THP collapse.
+ * Only done if khugepaged_scan_pmd believes it is worthwhile.
+ *
+ * Called and returns without pte mapped or spinlocks held,
+ * but with mmap_sem held to protect against vma changes.
+ */
+
+static bool __collapse_huge_page_swapin(struct mm_struct *mm,
+					struct vm_area_struct *vma,
+					unsigned long address, pmd_t *pmd,
+					int referenced)
+{
+	pte_t pteval;
+	int swapped_in = 0, ret = 0;
+	struct fault_env fe = {
+		.vma = vma,
+		.address = address,
+		.flags = FAULT_FLAG_ALLOW_RETRY,
+		.pmd = pmd,
+	};
+
+	fe.pte = pte_offset_map(pmd, address);
+	for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
+			fe.pte++, fe.address += PAGE_SIZE) {
+		pteval = *fe.pte;
+		if (!is_swap_pte(pteval))
+			continue;
+		swapped_in++;
+		/* we only decide to swapin, if there is enough young ptes */
+		if (referenced < HPAGE_PMD_NR/2) {
+			trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+			return false;
+		}
+		ret = do_swap_page(&fe, pteval);
+
+		/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
+		if (ret & VM_FAULT_RETRY) {
+			down_read(&mm->mmap_sem);
+			if (hugepage_vma_revalidate(mm, address)) {
+				/* vma is no longer available, don't continue to swapin */
+				trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+				return false;
+			}
+			/* check if the pmd is still valid */
+			if (mm_find_pmd(mm, address) != pmd)
+				return false;
+		}
+		if (ret & VM_FAULT_ERROR) {
+			trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+			return false;
+		}
+		/* pte is unmapped now, we need to map it */
+		fe.pte = pte_offset_map(pmd, fe.address);
+	}
+	fe.pte--;
+	pte_unmap(fe.pte);
+	trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 1);
+	return true;
+}
+
+static void collapse_huge_page(struct mm_struct *mm,
+				   unsigned long address,
+				   struct page **hpage,
+				   struct vm_area_struct *vma,
+				   int node, int referenced)
+{
+	pmd_t *pmd, _pmd;
+	pte_t *pte;
+	pgtable_t pgtable;
+	struct page *new_page;
+	spinlock_t *pmd_ptl, *pte_ptl;
+	int isolated = 0, result = 0;
+	struct mem_cgroup *memcg;
+	unsigned long mmun_start;	/* For mmu_notifiers */
+	unsigned long mmun_end;		/* For mmu_notifiers */
+	gfp_t gfp;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	/* Only allocate from the target node */
+	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_OTHER_NODE | __GFP_THISNODE;
+
+	/*
+	 * Before allocating the hugepage, release the mmap_sem read lock.
+	 * The allocation can take potentially a long time if it involves
+	 * sync compaction, and we do not need to hold the mmap_sem during
+	 * that. We will recheck the vma after taking it again in write mode.
+	 */
+	up_read(&mm->mmap_sem);
+	new_page = khugepaged_alloc_page(hpage, gfp, node);
+	if (!new_page) {
+		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
+		goto out_nolock;
+	}
+
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+		result = SCAN_CGROUP_CHARGE_FAIL;
+		goto out_nolock;
+	}
+
+	down_read(&mm->mmap_sem);
+	result = hugepage_vma_revalidate(mm, address);
+	if (result) {
+		mem_cgroup_cancel_charge(new_page, memcg, true);
+		up_read(&mm->mmap_sem);
+		goto out_nolock;
+	}
+
+	pmd = mm_find_pmd(mm, address);
+	if (!pmd) {
+		result = SCAN_PMD_NULL;
+		mem_cgroup_cancel_charge(new_page, memcg, true);
+		up_read(&mm->mmap_sem);
+		goto out_nolock;
+	}
+
+	/*
+	 * __collapse_huge_page_swapin always returns with mmap_sem locked.
+	 * If it fails, we release mmap_sem and jump out_nolock.
+	 * Continuing to collapse causes inconsistency.
+	 */
+	if (!__collapse_huge_page_swapin(mm, vma, address, pmd, referenced)) {
+		mem_cgroup_cancel_charge(new_page, memcg, true);
+		up_read(&mm->mmap_sem);
+		goto out_nolock;
+	}
+
+	up_read(&mm->mmap_sem);
+	/*
+	 * Prevent all access to pagetables with the exception of
+	 * gup_fast later handled by the ptep_clear_flush and the VM
+	 * handled by the anon_vma lock + PG_lock.
+	 */
+	down_write(&mm->mmap_sem);
+	result = hugepage_vma_revalidate(mm, address);
+	if (result)
+		goto out;
+	/* check if the pmd is still valid */
+	if (mm_find_pmd(mm, address) != pmd)
+		goto out;
+
+	anon_vma_lock_write(vma->anon_vma);
+
+	pte = pte_offset_map(pmd, address);
+	pte_ptl = pte_lockptr(mm, pmd);
+
+	mmun_start = address;
+	mmun_end   = address + HPAGE_PMD_SIZE;
+	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+	pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
+	/*
+	 * After this gup_fast can't run anymore. This also removes
+	 * any huge TLB entry from the CPU so we won't allow
+	 * huge and small TLB entries for the same virtual address
+	 * to avoid the risk of CPU bugs in that area.
+	 */
+	_pmd = pmdp_collapse_flush(vma, address, pmd);
+	spin_unlock(pmd_ptl);
+	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+
+	spin_lock(pte_ptl);
+	isolated = __collapse_huge_page_isolate(vma, address, pte);
+	spin_unlock(pte_ptl);
+
+	if (unlikely(!isolated)) {
+		pte_unmap(pte);
+		spin_lock(pmd_ptl);
+		BUG_ON(!pmd_none(*pmd));
+		/*
+		 * We can only use set_pmd_at when establishing
+		 * hugepmds and never for establishing regular pmds that
+		 * points to regular pagetables. Use pmd_populate for that
+		 */
+		pmd_populate(mm, pmd, pmd_pgtable(_pmd));
+		spin_unlock(pmd_ptl);
+		anon_vma_unlock_write(vma->anon_vma);
+		result = SCAN_FAIL;
+		goto out;
+	}
+
+	/*
+	 * All pages are isolated and locked so anon_vma rmap
+	 * can't run anymore.
+	 */
+	anon_vma_unlock_write(vma->anon_vma);
+
+	__collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl);
+	pte_unmap(pte);
+	__SetPageUptodate(new_page);
+	pgtable = pmd_pgtable(_pmd);
+
+	_pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
+	_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
+
+	/*
+	 * spin_lock() below is not the equivalent of smp_wmb(), so
+	 * this is needed to avoid the copy_huge_page writes to become
+	 * visible after the set_pmd_at() write.
+	 */
+	smp_wmb();
+
+	spin_lock(pmd_ptl);
+	BUG_ON(!pmd_none(*pmd));
+	page_add_new_anon_rmap(new_page, vma, address, true);
+	mem_cgroup_commit_charge(new_page, memcg, false, true);
+	lru_cache_add_active_or_unevictable(new_page, vma);
+	pgtable_trans_huge_deposit(mm, pmd, pgtable);
+	set_pmd_at(mm, address, pmd, _pmd);
+	update_mmu_cache_pmd(vma, address, pmd);
+	spin_unlock(pmd_ptl);
+
+	*hpage = NULL;
+
+	khugepaged_pages_collapsed++;
+	result = SCAN_SUCCEED;
+out_up_write:
+	up_write(&mm->mmap_sem);
+out_nolock:
+	trace_mm_collapse_huge_page(mm, isolated, result);
+	return;
+out:
+	mem_cgroup_cancel_charge(new_page, memcg, true);
+	goto out_up_write;
+}
+
+static int khugepaged_scan_pmd(struct mm_struct *mm,
+			       struct vm_area_struct *vma,
+			       unsigned long address,
+			       struct page **hpage)
+{
+	pmd_t *pmd;
+	pte_t *pte, *_pte;
+	int ret = 0, none_or_zero = 0, result = 0, referenced = 0;
+	struct page *page = NULL;
+	unsigned long _address;
+	spinlock_t *ptl;
+	int node = NUMA_NO_NODE, unmapped = 0;
+	bool writable = false;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	pmd = mm_find_pmd(mm, address);
+	if (!pmd) {
+		result = SCAN_PMD_NULL;
+		goto out;
+	}
+
+	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
+	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+	for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
+	     _pte++, _address += PAGE_SIZE) {
+		pte_t pteval = *_pte;
+		if (is_swap_pte(pteval)) {
+			if (++unmapped <= khugepaged_max_ptes_swap) {
+				continue;
+			} else {
+				result = SCAN_EXCEED_SWAP_PTE;
+				goto out_unmap;
+			}
+		}
+		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
+			if (!userfaultfd_armed(vma) &&
+			    ++none_or_zero <= khugepaged_max_ptes_none) {
+				continue;
+			} else {
+				result = SCAN_EXCEED_NONE_PTE;
+				goto out_unmap;
+			}
+		}
+		if (!pte_present(pteval)) {
+			result = SCAN_PTE_NON_PRESENT;
+			goto out_unmap;
+		}
+		if (pte_write(pteval))
+			writable = true;
+
+		page = vm_normal_page(vma, _address, pteval);
+		if (unlikely(!page)) {
+			result = SCAN_PAGE_NULL;
+			goto out_unmap;
+		}
+
+		/* TODO: teach khugepaged to collapse THP mapped with pte */
+		if (PageCompound(page)) {
+			result = SCAN_PAGE_COMPOUND;
+			goto out_unmap;
+		}
+
+		/*
+		 * Record which node the original page is from and save this
+		 * information to khugepaged_node_load[].
+		 * Khupaged will allocate hugepage from the node has the max
+		 * hit record.
+		 */
+		node = page_to_nid(page);
+		if (khugepaged_scan_abort(node)) {
+			result = SCAN_SCAN_ABORT;
+			goto out_unmap;
+		}
+		khugepaged_node_load[node]++;
+		if (!PageLRU(page)) {
+			result = SCAN_PAGE_LRU;
+			goto out_unmap;
+		}
+		if (PageLocked(page)) {
+			result = SCAN_PAGE_LOCK;
+			goto out_unmap;
+		}
+		if (!PageAnon(page)) {
+			result = SCAN_PAGE_ANON;
+			goto out_unmap;
+		}
+
+		/*
+		 * cannot use mapcount: can't collapse if there's a gup pin.
+		 * The page must only be referenced by the scanned process
+		 * and page swap cache.
+		 */
+		if (page_count(page) != 1 + !!PageSwapCache(page)) {
+			result = SCAN_PAGE_COUNT;
+			goto out_unmap;
+		}
+		if (pte_young(pteval) ||
+		    page_is_young(page) || PageReferenced(page) ||
+		    mmu_notifier_test_young(vma->vm_mm, address))
+			referenced++;
+	}
+	if (writable) {
+		if (referenced) {
+			result = SCAN_SUCCEED;
+			ret = 1;
+		} else {
+			result = SCAN_LACK_REFERENCED_PAGE;
+		}
+	} else {
+		result = SCAN_PAGE_RO;
+	}
+out_unmap:
+	pte_unmap_unlock(pte, ptl);
+	if (ret) {
+		node = khugepaged_find_target_node();
+		/* collapse_huge_page will return with the mmap_sem released */
+		collapse_huge_page(mm, address, hpage, vma, node, referenced);
+	}
+out:
+	trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
+				     none_or_zero, result, unmapped);
+	return ret;
+}
+
+static void collect_mm_slot(struct mm_slot *mm_slot)
+{
+	struct mm_struct *mm = mm_slot->mm;
+
+	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
+
+	if (khugepaged_test_exit(mm)) {
+		/* free mm_slot */
+		hash_del(&mm_slot->hash);
+		list_del(&mm_slot->mm_node);
+
+		/*
+		 * Not strictly needed because the mm exited already.
+		 *
+		 * clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
+		 */
+
+		/* khugepaged_mm_lock actually not necessary for the below */
+		free_mm_slot(mm_slot);
+		mmdrop(mm);
+	}
+}
+
+#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
+static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr;
+	pmd_t *pmd, _pmd;
+
+	i_mmap_lock_write(mapping);
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+		/* probably overkill */
+		if (vma->anon_vma)
+			continue;
+		addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+		if (addr & ~HPAGE_PMD_MASK)
+			continue;
+		if (vma->vm_end < addr + HPAGE_PMD_SIZE)
+			continue;
+		pmd = mm_find_pmd(vma->vm_mm, addr);
+		if (!pmd)
+			continue;
+		/*
+		 * We need exclusive mmap_sem to retract page table.
+		 * If trylock fails we would end up with pte-mapped THP after
+		 * re-fault. Not ideal, but it's more important to not disturb
+		 * the system too much.
+		 */
+		if (down_write_trylock(&vma->vm_mm->mmap_sem)) {
+			spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
+			/* assume page table is clear */
+			_pmd = pmdp_collapse_flush(vma, addr, pmd);
+			spin_unlock(ptl);
+			up_write(&vma->vm_mm->mmap_sem);
+			atomic_long_dec(&vma->vm_mm->nr_ptes);
+			pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+		}
+	}
+	i_mmap_unlock_write(mapping);
+}
+
+/**
+ * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
+ *
+ * Basic scheme is simple, details are more complex:
+ *  - allocate and freeze a new huge page;
+ *  - scan over radix tree replacing old pages the new one
+ *    + swap in pages if necessary;
+ *    + fill in gaps;
+ *    + keep old pages around in case if rollback is required;
+ *  - if replacing succeed:
+ *    + copy data over;
+ *    + free old pages;
+ *    + unfreeze huge page;
+ *  - if replacing failed;
+ *    + put all pages back and unfreeze them;
+ *    + restore gaps in the radix-tree;
+ *    + free huge page;
+ */
+static void collapse_shmem(struct mm_struct *mm,
+		struct address_space *mapping, pgoff_t start,
+		struct page **hpage, int node)
+{
+	gfp_t gfp;
+	struct page *page, *new_page, *tmp;
+	struct mem_cgroup *memcg;
+	pgoff_t index, end = start + HPAGE_PMD_NR;
+	LIST_HEAD(pagelist);
+	struct radix_tree_iter iter;
+	void **slot;
+	int nr_none = 0, result = SCAN_SUCCEED;
+
+	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
+
+	/* Only allocate from the target node */
+	gfp = alloc_hugepage_khugepaged_gfpmask() |
+		__GFP_OTHER_NODE | __GFP_THISNODE;
+
+	new_page = khugepaged_alloc_page(hpage, gfp, node);
+	if (!new_page) {
+		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
+		goto out;
+	}
+
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+		result = SCAN_CGROUP_CHARGE_FAIL;
+		goto out;
+	}
+
+	new_page->index = start;
+	new_page->mapping = mapping;
+	__SetPageSwapBacked(new_page);
+	__SetPageLocked(new_page);
+	BUG_ON(!page_ref_freeze(new_page, 1));
+
+
+	/*
+	 * At this point the new_page is 'frozen' (page_count() is zero), locked
+	 * and not up-to-date. It's safe to insert it into radix tree, because
+	 * nobody would be able to map it or use it in other way until we
+	 * unfreeze it.
+	 */
+
+	index = start;
+	spin_lock_irq(&mapping->tree_lock);
+	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+		int n = min(iter.index, end) - index;
+
+		/*
+		 * Handle holes in the radix tree: charge it from shmem and
+		 * insert relevant subpage of new_page into the radix-tree.
+		 */
+		if (n && !shmem_charge(mapping->host, n)) {
+			result = SCAN_FAIL;
+			break;
+		}
+		nr_none += n;
+		for (; index < min(iter.index, end); index++) {
+			radix_tree_insert(&mapping->page_tree, index,
+					new_page + (index % HPAGE_PMD_NR));
+		}
+
+		/* We are done. */
+		if (index >= end)
+			break;
+
+		page = radix_tree_deref_slot_protected(slot,
+				&mapping->tree_lock);
+		if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
+			spin_unlock_irq(&mapping->tree_lock);
+			/* swap in or instantiate fallocated page */
+			if (shmem_getpage(mapping->host, index, &page,
+						SGP_NOHUGE)) {
+				result = SCAN_FAIL;
+				goto tree_unlocked;
+			}
+			spin_lock_irq(&mapping->tree_lock);
+		} else if (trylock_page(page)) {
+			get_page(page);
+		} else {
+			result = SCAN_PAGE_LOCK;
+			break;
+		}
+
+		/*
+		 * The page must be locked, so we can drop the tree_lock
+		 * without racing with truncate.
+		 */
+		VM_BUG_ON_PAGE(!PageLocked(page), page);
+		VM_BUG_ON_PAGE(!PageUptodate(page), page);
+		VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+		if (page_mapping(page) != mapping) {
+			result = SCAN_TRUNCATED;
+			goto out_unlock;
+		}
+		spin_unlock_irq(&mapping->tree_lock);
+
+		if (isolate_lru_page(page)) {
+			result = SCAN_DEL_PAGE_LRU;
+			goto out_isolate_failed;
+		}
+
+		if (page_mapped(page))
+			unmap_mapping_range(mapping, index << PAGE_SHIFT,
+					PAGE_SIZE, 0);
+
+		spin_lock_irq(&mapping->tree_lock);
+
+		VM_BUG_ON_PAGE(page_mapped(page), page);
+
+		/*
+		 * The page is expected to have page_count() == 3:
+		 *  - we hold a pin on it;
+		 *  - one reference from radix tree;
+		 *  - one from isolate_lru_page;
+		 */
+		if (!page_ref_freeze(page, 3)) {
+			result = SCAN_PAGE_COUNT;
+			goto out_lru;
+		}
+
+		/*
+		 * Add the page to the list to be able to undo the collapse if
+		 * something go wrong.
+		 */
+		list_add_tail(&page->lru, &pagelist);
+
+		/* Finally, replace with the new page. */
+		radix_tree_replace_slot(slot,
+				new_page + (index % HPAGE_PMD_NR));
+
+		index++;
+		continue;
+out_lru:
+		spin_unlock_irq(&mapping->tree_lock);
+		putback_lru_page(page);
+out_isolate_failed:
+		unlock_page(page);
+		put_page(page);
+		goto tree_unlocked;
+out_unlock:
+		unlock_page(page);
+		put_page(page);
+		break;
+	}
+
+	/*
+	 * Handle hole in radix tree at the end of the range.
+	 * This code only triggers if there's nothing in radix tree
+	 * beyond 'end'.
+	 */
+	if (result == SCAN_SUCCEED && index < end) {
+		int n = end - index;
+
+		if (!shmem_charge(mapping->host, n)) {
+			result = SCAN_FAIL;
+			goto tree_locked;
+		}
+
+		for (; index < end; index++) {
+			radix_tree_insert(&mapping->page_tree, index,
+					new_page + (index % HPAGE_PMD_NR));
+		}
+		nr_none += n;
+	}
+
+tree_locked:
+	spin_unlock_irq(&mapping->tree_lock);
+tree_unlocked:
+
+	if (result == SCAN_SUCCEED) {
+		unsigned long flags;
+		struct zone *zone = page_zone(new_page);
+
+		/*
+		 * Replacing old pages with new one has succeed, now we need to
+		 * copy the content and free old pages.
+		 */
+		list_for_each_entry_safe(page, tmp, &pagelist, lru) {
+			copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
+					page);
+			list_del(&page->lru);
+			unlock_page(page);
+			page_ref_unfreeze(page, 1);
+			page->mapping = NULL;
+			ClearPageActive(page);
+			ClearPageUnevictable(page);
+			put_page(page);
+		}
+
+		local_irq_save(flags);
+		__inc_zone_page_state(new_page, NR_SHMEM_THPS);
+		if (nr_none) {
+			__mod_zone_page_state(zone, NR_FILE_PAGES, nr_none);
+			__mod_zone_page_state(zone, NR_SHMEM, nr_none);
+		}
+		local_irq_restore(flags);
+
+		/*
+		 * Remove pte page tables, so we can re-faulti
+		 * the page as huge.
+		 */
+		retract_page_tables(mapping, start);
+
+		/* Everything is ready, let's unfreeze the new_page */
+		set_page_dirty(new_page);
+		SetPageUptodate(new_page);
+		page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+		mem_cgroup_commit_charge(new_page, memcg, false, true);
+		lru_cache_add_anon(new_page);
+		unlock_page(new_page);
+
+		*hpage = NULL;
+	} else {
+		/* Something went wrong: rollback changes to the radix-tree */
+		shmem_uncharge(mapping->host, nr_none);
+		spin_lock_irq(&mapping->tree_lock);
+		radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
+				start) {
+			if (iter.index >= end)
+				break;
+			page = list_first_entry_or_null(&pagelist,
+					struct page, lru);
+			if (!page || iter.index < page->index) {
+				if (!nr_none)
+					break;
+				/* Put holes back where they were */
+				radix_tree_replace_slot(slot, NULL);
+				nr_none--;
+				continue;
+			}
+
+			VM_BUG_ON_PAGE(page->index != iter.index, page);
+
+			/* Unfreeze the page. */
+			list_del(&page->lru);
+			page_ref_unfreeze(page, 2);
+			radix_tree_replace_slot(slot, page);
+			spin_unlock_irq(&mapping->tree_lock);
+			putback_lru_page(page);
+			unlock_page(page);
+			spin_lock_irq(&mapping->tree_lock);
+		}
+		VM_BUG_ON(nr_none);
+		spin_unlock_irq(&mapping->tree_lock);
+
+		/* Unfreeze new_page, caller would take care about freeing it */
+		page_ref_unfreeze(new_page, 1);
+		mem_cgroup_cancel_charge(new_page, memcg, true);
+		unlock_page(new_page);
+		new_page->mapping = NULL;
+	}
+out:
+	VM_BUG_ON(!list_empty(&pagelist));
+	/* TODO: tracepoints */
+}
+
+static void khugepaged_scan_shmem(struct mm_struct *mm,
+		struct address_space *mapping,
+		pgoff_t start, struct page **hpage)
+{
+	struct page *page = NULL;
+	struct radix_tree_iter iter;
+	void **slot;
+	int present, swap;
+	int node = NUMA_NO_NODE;
+	int result = SCAN_SUCCEED;
+
+	present = 0;
+	swap = 0;
+	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+		if (iter.index >= start + HPAGE_PMD_NR)
+			break;
+
+		page = radix_tree_deref_slot(slot);
+		if (radix_tree_deref_retry(page)) {
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
+
+		if (radix_tree_exception(page)) {
+			if (++swap > khugepaged_max_ptes_swap) {
+				result = SCAN_EXCEED_SWAP_PTE;
+				break;
+			}
+			continue;
+		}
+
+		if (PageTransCompound(page)) {
+			result = SCAN_PAGE_COMPOUND;
+			break;
+		}
+
+		node = page_to_nid(page);
+		if (khugepaged_scan_abort(node)) {
+			result = SCAN_SCAN_ABORT;
+			break;
+		}
+		khugepaged_node_load[node]++;
+
+		if (!PageLRU(page)) {
+			result = SCAN_PAGE_LRU;
+			break;
+		}
+
+		if (page_count(page) != 1 + page_mapcount(page)) {
+			result = SCAN_PAGE_COUNT;
+			break;
+		}
+
+		/*
+		 * We probably should check if the page is referenced here, but
+		 * nobody would transfer pte_young() to PageReferenced() for us.
+		 * And rmap walk here is just too costly...
+		 */
+
+		present++;
+
+		if (need_resched()) {
+			cond_resched_rcu();
+			slot = radix_tree_iter_next(&iter);
+		}
+	}
+	rcu_read_unlock();
+
+	if (result == SCAN_SUCCEED) {
+		if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
+			result = SCAN_EXCEED_NONE_PTE;
+		} else {
+			node = khugepaged_find_target_node();
+			collapse_shmem(mm, mapping, start, hpage, node);
+		}
+	}
+
+	/* TODO: tracepoints */
+}
+#else
+static void khugepaged_scan_shmem(struct mm_struct *mm,
+		struct address_space *mapping,
+		pgoff_t start, struct page **hpage)
+{
+	BUILD_BUG();
+}
+#endif
+
+static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
+					    struct page **hpage)
+	__releases(&khugepaged_mm_lock)
+	__acquires(&khugepaged_mm_lock)
+{
+	struct mm_slot *mm_slot;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	int progress = 0;
+
+	VM_BUG_ON(!pages);
+	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
+
+	if (khugepaged_scan.mm_slot)
+		mm_slot = khugepaged_scan.mm_slot;
+	else {
+		mm_slot = list_entry(khugepaged_scan.mm_head.next,
+				     struct mm_slot, mm_node);
+		khugepaged_scan.address = 0;
+		khugepaged_scan.mm_slot = mm_slot;
+	}
+	spin_unlock(&khugepaged_mm_lock);
+
+	mm = mm_slot->mm;
+	down_read(&mm->mmap_sem);
+	if (unlikely(khugepaged_test_exit(mm)))
+		vma = NULL;
+	else
+		vma = find_vma(mm, khugepaged_scan.address);
+
+	progress++;
+	for (; vma; vma = vma->vm_next) {
+		unsigned long hstart, hend;
+
+		cond_resched();
+		if (unlikely(khugepaged_test_exit(mm))) {
+			progress++;
+			break;
+		}
+		if (!hugepage_vma_check(vma)) {
+skip:
+			progress++;
+			continue;
+		}
+		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
+		hend = vma->vm_end & HPAGE_PMD_MASK;
+		if (hstart >= hend)
+			goto skip;
+		if (khugepaged_scan.address > hend)
+			goto skip;
+		if (khugepaged_scan.address < hstart)
+			khugepaged_scan.address = hstart;
+		VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
+
+		while (khugepaged_scan.address < hend) {
+			int ret;
+			cond_resched();
+			if (unlikely(khugepaged_test_exit(mm)))
+				goto breakouterloop;
+
+			VM_BUG_ON(khugepaged_scan.address < hstart ||
+				  khugepaged_scan.address + HPAGE_PMD_SIZE >
+				  hend);
+			if (shmem_file(vma->vm_file)) {
+				struct file *file;
+				pgoff_t pgoff = linear_page_index(vma,
+						khugepaged_scan.address);
+				if (!shmem_huge_enabled(vma))
+					goto skip;
+				file = get_file(vma->vm_file);
+				up_read(&mm->mmap_sem);
+				ret = 1;
+				khugepaged_scan_shmem(mm, file->f_mapping,
+						pgoff, hpage);
+				fput(file);
+			} else {
+				ret = khugepaged_scan_pmd(mm, vma,
+						khugepaged_scan.address,
+						hpage);
+			}
+			/* move to next address */
+			khugepaged_scan.address += HPAGE_PMD_SIZE;
+			progress += HPAGE_PMD_NR;
+			if (ret)
+				/* we released mmap_sem so break loop */
+				goto breakouterloop_mmap_sem;
+			if (progress >= pages)
+				goto breakouterloop;
+		}
+	}
+breakouterloop:
+	up_read(&mm->mmap_sem); /* exit_mmap will destroy ptes after this */
+breakouterloop_mmap_sem:
+
+	spin_lock(&khugepaged_mm_lock);
+	VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);
+	/*
+	 * Release the current mm_slot if this mm is about to die, or
+	 * if we scanned all vmas of this mm.
+	 */
+	if (khugepaged_test_exit(mm) || !vma) {
+		/*
+		 * Make sure that if mm_users is reaching zero while
+		 * khugepaged runs here, khugepaged_exit will find
+		 * mm_slot not pointing to the exiting mm.
+		 */
+		if (mm_slot->mm_node.next != &khugepaged_scan.mm_head) {
+			khugepaged_scan.mm_slot = list_entry(
+				mm_slot->mm_node.next,
+				struct mm_slot, mm_node);
+			khugepaged_scan.address = 0;
+		} else {
+			khugepaged_scan.mm_slot = NULL;
+			khugepaged_full_scans++;
+		}
+
+		collect_mm_slot(mm_slot);
+	}
+
+	return progress;
+}
+
+static int khugepaged_has_work(void)
+{
+	return !list_empty(&khugepaged_scan.mm_head) &&
+		khugepaged_enabled();
+}
+
+static int khugepaged_wait_event(void)
+{
+	return !list_empty(&khugepaged_scan.mm_head) ||
+		kthread_should_stop();
+}
+
+static void khugepaged_do_scan(void)
+{
+	struct page *hpage = NULL;
+	unsigned int progress = 0, pass_through_head = 0;
+	unsigned int pages = khugepaged_pages_to_scan;
+	bool wait = true;
+
+	barrier(); /* write khugepaged_pages_to_scan to local stack */
+
+	while (progress < pages) {
+		if (!khugepaged_prealloc_page(&hpage, &wait))
+			break;
+
+		cond_resched();
+
+		if (unlikely(kthread_should_stop() || try_to_freeze()))
+			break;
+
+		spin_lock(&khugepaged_mm_lock);
+		if (!khugepaged_scan.mm_slot)
+			pass_through_head++;
+		if (khugepaged_has_work() &&
+		    pass_through_head < 2)
+			progress += khugepaged_scan_mm_slot(pages - progress,
+							    &hpage);
+		else
+			progress = pages;
+		spin_unlock(&khugepaged_mm_lock);
+	}
+
+	if (!IS_ERR_OR_NULL(hpage))
+		put_page(hpage);
+}
+
+static bool khugepaged_should_wakeup(void)
+{
+	return kthread_should_stop() ||
+	       time_after_eq(jiffies, khugepaged_sleep_expire);
+}
+
+static void khugepaged_wait_work(void)
+{
+	if (khugepaged_has_work()) {
+		const unsigned long scan_sleep_jiffies =
+			msecs_to_jiffies(khugepaged_scan_sleep_millisecs);
+
+		if (!scan_sleep_jiffies)
+			return;
+
+		khugepaged_sleep_expire = jiffies + scan_sleep_jiffies;
+		wait_event_freezable_timeout(khugepaged_wait,
+					     khugepaged_should_wakeup(),
+					     scan_sleep_jiffies);
+		return;
+	}
+
+	if (khugepaged_enabled())
+		wait_event_freezable(khugepaged_wait, khugepaged_wait_event());
+}
+
+static int khugepaged(void *none)
+{
+	struct mm_slot *mm_slot;
+
+	set_freezable();
+	set_user_nice(current, MAX_NICE);
+
+	while (!kthread_should_stop()) {
+		khugepaged_do_scan();
+		khugepaged_wait_work();
+	}
+
+	spin_lock(&khugepaged_mm_lock);
+	mm_slot = khugepaged_scan.mm_slot;
+	khugepaged_scan.mm_slot = NULL;
+	if (mm_slot)
+		collect_mm_slot(mm_slot);
+	spin_unlock(&khugepaged_mm_lock);
+	return 0;
+}
+
+static void set_recommended_min_free_kbytes(void)
+{
+	struct zone *zone;
+	int nr_zones = 0;
+	unsigned long recommended_min;
+
+	for_each_populated_zone(zone)
+		nr_zones++;
+
+	/* Ensure 2 pageblocks are free to assist fragmentation avoidance */
+	recommended_min = pageblock_nr_pages * nr_zones * 2;
+
+	/*
+	 * Make sure that on average at least two pageblocks are almost free
+	 * of another type, one for a migratetype to fall back to and a
+	 * second to avoid subsequent fallbacks of other types There are 3
+	 * MIGRATE_TYPES we care about.
+	 */
+	recommended_min += pageblock_nr_pages * nr_zones *
+			   MIGRATE_PCPTYPES * MIGRATE_PCPTYPES;
+
+	/* don't ever allow to reserve more than 5% of the lowmem */
+	recommended_min = min(recommended_min,
+			      (unsigned long) nr_free_buffer_pages() / 20);
+	recommended_min <<= (PAGE_SHIFT-10);
+
+	if (recommended_min > min_free_kbytes) {
+		if (user_min_free_kbytes >= 0)
+			pr_info("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n",
+				min_free_kbytes, recommended_min);
+
+		min_free_kbytes = recommended_min;
+	}
+	setup_per_zone_wmarks();
+}
+
+int start_stop_khugepaged(void)
+{
+	static struct task_struct *khugepaged_thread __read_mostly;
+	static DEFINE_MUTEX(khugepaged_mutex);
+	int err = 0;
+
+	mutex_lock(&khugepaged_mutex);
+	if (khugepaged_enabled()) {
+		if (!khugepaged_thread)
+			khugepaged_thread = kthread_run(khugepaged, NULL,
+							"khugepaged");
+		if (IS_ERR(khugepaged_thread)) {
+			pr_err("khugepaged: kthread_run(khugepaged) failed\n");
+			err = PTR_ERR(khugepaged_thread);
+			khugepaged_thread = NULL;
+			goto fail;
+		}
+
+		if (!list_empty(&khugepaged_scan.mm_head))
+			wake_up_interruptible(&khugepaged_wait);
+
+		set_recommended_min_free_kbytes();
+	} else if (khugepaged_thread) {
+		kthread_stop(khugepaged_thread);
+		khugepaged_thread = NULL;
+	}
+fail:
+	mutex_unlock(&khugepaged_mutex);
+	return err;
+}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e6429926e957..04320d3adbef 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -307,8 +307,10 @@ static void hex_dump_object(struct seq_file *seq,
 	len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);
 
 	seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
+	kasan_disable_current();
 	seq_hex_dump(seq, "    ", DUMP_PREFIX_NONE, HEX_ROW_SIZE,
 		     HEX_GROUP_SIZE, ptr, len, HEX_ASCII);
+	kasan_enable_current();
 }
 
 /*
diff --git a/mm/ksm.c b/mm/ksm.c
index 4786b4150f62..73d43bafd9fb 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -376,9 +376,8 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 		if (IS_ERR_OR_NULL(page))
 			break;
 		if (PageKsm(page))
-			ret = handle_mm_fault(vma->vm_mm, vma, addr,
-							FAULT_FLAG_WRITE |
-							FAULT_FLAG_REMOTE);
+			ret = handle_mm_fault(vma, addr,
+					FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
 		else
 			ret = VM_FAULT_WRITE;
 		put_page(page);
@@ -532,8 +531,8 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
 	void *expected_mapping;
 	unsigned long kpfn;
 
-	expected_mapping = (void *)stable_node +
-				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
+	expected_mapping = (void *)((unsigned long)stable_node |
+					PAGE_MAPPING_KSM);
 again:
 	kpfn = READ_ONCE(stable_node->kpfn);
 	page = pfn_to_page(kpfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index ac1248933b31..ca099159b45a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -584,6 +584,9 @@ repeat:
 					       nid, flags);
 	}
 
+	if (!nr_new)
+		return 0;
+
 	/*
 	 * If this was the first round, resize array and repeat for actual
 	 * insertions; otherwise, merge and return.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75e74408cc8f..f3a84c64f35c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1259,6 +1259,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	struct oom_control oc = {
 		.zonelist = NULL,
 		.nodemask = NULL,
+		.memcg = memcg,
 		.gfp_mask = gfp_mask,
 		.order = order,
 	};
@@ -1281,7 +1282,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		goto unlock;
 	}
 
-	check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
+	check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
 	totalpages = mem_cgroup_get_limit(memcg) ? : 1;
 	for_each_mem_cgroup_tree(iter, memcg) {
 		struct css_task_iter it;
@@ -1289,7 +1290,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 		css_task_iter_start(&iter->css, &it);
 		while ((task = css_task_iter_next(&it))) {
-			switch (oom_scan_process_thread(&oc, task, totalpages)) {
+			switch (oom_scan_process_thread(&oc, task)) {
 			case OOM_SCAN_SELECT:
 				if (chosen)
 					put_task_struct(chosen);
@@ -1329,7 +1330,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 	if (chosen) {
 		points = chosen_points * 1000 / totalpages;
-		oom_kill_process(&oc, chosen, points, totalpages, memcg,
+		oom_kill_process(&oc, chosen, points, totalpages,
 				 "Memory cgroup out of memory");
 	}
 unlock:
@@ -2272,20 +2273,30 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
 	current->memcg_kmem_skip_account = 0;
 }
 
-/*
+static inline bool memcg_kmem_bypass(void)
+{
+	if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+		return true;
+	return false;
+}
+
+/**
+ * memcg_kmem_get_cache: select the correct per-memcg cache for allocation
+ * @cachep: the original global kmem cache
+ *
  * Return the kmem_cache we're supposed to use for a slab allocation.
  * We try to use the current memcg's version of the cache.
  *
- * If the cache does not exist yet, if we are the first user of it,
- * we either create it immediately, if possible, or create it asynchronously
- * in a workqueue.
- * In the latter case, we will let the current allocation go through with
- * the original cache.
+ * If the cache does not exist yet, if we are the first user of it, we
+ * create it asynchronously in a workqueue and let the current allocation
+ * go through with the original cache.
  *
- * Can't be called in interrupt context or from kernel threads.
- * This function needs to be called with rcu_read_lock() held.
+ * This function takes a reference to the cache it returns to assure it
+ * won't get destroyed while we are working with it. Once the caller is
+ * done with it, memcg_kmem_put_cache() must be called to release the
+ * reference.
  */
-struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
 {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *memcg_cachep;
@@ -2293,10 +2304,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 
 	VM_BUG_ON(!is_root_cache(cachep));
 
-	if (cachep->flags & SLAB_ACCOUNT)
-		gfp |= __GFP_ACCOUNT;
-
-	if (!(gfp & __GFP_ACCOUNT))
+	if (memcg_kmem_bypass())
 		return cachep;
 
 	if (current->memcg_kmem_skip_account)
@@ -2329,14 +2337,27 @@ out:
 	return cachep;
 }
 
-void __memcg_kmem_put_cache(struct kmem_cache *cachep)
+/**
+ * memcg_kmem_put_cache: drop reference taken by memcg_kmem_get_cache
+ * @cachep: the cache returned by memcg_kmem_get_cache
+ */
+void memcg_kmem_put_cache(struct kmem_cache *cachep)
 {
 	if (!is_root_cache(cachep))
 		css_put(&cachep->memcg_params.memcg->css);
 }
 
-int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
-			      struct mem_cgroup *memcg)
+/**
+ * memcg_kmem_charge: charge a kmem page
+ * @page: page to charge
+ * @gfp: reclaim mode
+ * @order: allocation order
+ * @memcg: memory cgroup to charge
+ *
+ * Returns 0 on success, an error code on failure.
+ */
+int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
+			    struct mem_cgroup *memcg)
 {
 	unsigned int nr_pages = 1 << order;
 	struct page_counter *counter;
@@ -2357,19 +2378,34 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 	return 0;
 }
 
-int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+/**
+ * memcg_kmem_charge: charge a kmem page to the current memory cgroup
+ * @page: page to charge
+ * @gfp: reclaim mode
+ * @order: allocation order
+ *
+ * Returns 0 on success, an error code on failure.
+ */
+int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 {
 	struct mem_cgroup *memcg;
 	int ret = 0;
 
+	if (memcg_kmem_bypass())
+		return 0;
+
 	memcg = get_mem_cgroup_from_mm(current->mm);
 	if (!mem_cgroup_is_root(memcg))
-		ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
+		ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
 	css_put(&memcg->css);
 	return ret;
 }
-
-void __memcg_kmem_uncharge(struct page *page, int order)
+/**
+ * memcg_kmem_uncharge: uncharge a kmem page
+ * @page: page to uncharge
+ * @order: allocation order
+ */
+void memcg_kmem_uncharge(struct page *page, int order)
 {
 	struct mem_cgroup *memcg = page->mem_cgroup;
 	unsigned int nr_pages = 1 << order;
@@ -4057,6 +4093,60 @@ static struct cftype mem_cgroup_legacy_files[] = {
 	{ },	/* terminate */
 };
 
+/*
+ * Private memory cgroup IDR
+ *
+ * Swap-out records and page cache shadow entries need to store memcg
+ * references in constrained space, so we maintain an ID space that is
+ * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of
+ * memory-controlled cgroups to 64k.
+ *
+ * However, there usually are many references to the oflline CSS after
+ * the cgroup has been destroyed, such as page cache or reclaimable
+ * slab objects, that don't need to hang on to the ID. We want to keep
+ * those dead CSS from occupying IDs, or we might quickly exhaust the
+ * relatively small ID space and prevent the creation of new cgroups
+ * even when there are much fewer than 64k cgroups - possibly none.
+ *
+ * Maintain a private 16-bit ID space for memcg, and allow the ID to
+ * be freed and recycled when it's no longer needed, which is usually
+ * when the CSS is offlined.
+ *
+ * The only exception to that are records of swapped out tmpfs/shmem
+ * pages that need to be attributed to live ancestors on swapin. But
+ * those references are manageable from userspace.
+ */
+
+static DEFINE_IDR(mem_cgroup_idr);
+
+static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+	atomic_inc(&memcg->id.ref);
+}
+
+static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+	if (atomic_dec_and_test(&memcg->id.ref)) {
+		idr_remove(&mem_cgroup_idr, memcg->id.id);
+		memcg->id.id = 0;
+
+		/* Memcg ID pins CSS */
+		css_put(&memcg->css);
+	}
+}
+
+/**
+ * mem_cgroup_from_id - look up a memcg from a memcg id
+ * @id: the memcg id to look up
+ *
+ * Caller must hold rcu_read_lock().
+ */
+struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return idr_find(&mem_cgroup_idr, id);
+}
+
 static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 {
 	struct mem_cgroup_per_node *pn;
@@ -4116,6 +4206,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	if (!memcg)
 		return NULL;
 
+	memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
+				 1, MEM_CGROUP_ID_MAX,
+				 GFP_KERNEL);
+	if (memcg->id.id < 0)
+		goto fail;
+
 	memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
 	if (!memcg->stat)
 		goto fail;
@@ -4142,8 +4238,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 #ifdef CONFIG_CGROUP_WRITEBACK
 	INIT_LIST_HEAD(&memcg->cgwb_list);
 #endif
+	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
 	return memcg;
 fail:
+	if (memcg->id.id > 0)
+		idr_remove(&mem_cgroup_idr, memcg->id.id);
 	mem_cgroup_free(memcg);
 	return NULL;
 }
@@ -4203,15 +4302,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	return &memcg->css;
 fail:
 	mem_cgroup_free(memcg);
-	return NULL;
+	return ERR_PTR(-ENOMEM);
 }
 
-static int
-mem_cgroup_css_online(struct cgroup_subsys_state *css)
+static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
-	if (css->id > MEM_CGROUP_ID_MAX)
-		return -ENOSPC;
-
+	/* Online state pins memcg ID, memcg ID pins CSS */
+	mem_cgroup_id_get(mem_cgroup_from_css(css));
+	css_get(css);
 	return 0;
 }
 
@@ -4234,6 +4332,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 
 	memcg_offline_kmem(memcg);
 	wb_memcg_offline(memcg);
+
+	mem_cgroup_id_put(memcg);
 }
 
 static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
@@ -4345,7 +4445,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
 
 #ifdef CONFIG_SWAP
 static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
-			unsigned long addr, pte_t ptent, swp_entry_t *entry)
+			pte_t ptent, swp_entry_t *entry)
 {
 	struct page *page = NULL;
 	swp_entry_t ent = pte_to_swp_entry(ptent);
@@ -4364,7 +4464,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
 }
 #else
 static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
-			unsigned long addr, pte_t ptent, swp_entry_t *entry)
+			pte_t ptent, swp_entry_t *entry)
 {
 	return NULL;
 }
@@ -4407,7 +4507,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 /**
  * mem_cgroup_move_account - move account of the page
  * @page: the page
- * @nr_pages: number of regular pages (>1 for huge pages)
+ * @compound: charge the page as compound or small page
  * @from: mem_cgroup which the page is moved from.
  * @to:	mem_cgroup which the page is moved to. @from != @to.
  *
@@ -4529,7 +4629,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
 	if (pte_present(ptent))
 		page = mc_handle_present_pte(vma, addr, ptent);
 	else if (is_swap_pte(ptent))
-		page = mc_handle_swap_pte(vma, addr, ptent, &ent);
+		page = mc_handle_swap_pte(vma, ptent, &ent);
 	else if (pte_none(ptent))
 		page = mc_handle_file_pte(vma, addr, ptent, &ent);
 
@@ -5269,6 +5369,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
  * @mm: mm context of the victim
  * @gfp_mask: reclaim mode
  * @memcgp: charged memcg return
+ * @compound: charge the page as compound or small page
  *
  * Try to charge @page to the memcg that @mm belongs to, reclaiming
  * pages according to @gfp_mask if necessary.
@@ -5331,6 +5432,7 @@ out:
  * @page: page to charge
  * @memcg: memcg to charge the page to
  * @lrucare: page might be on LRU already
+ * @compound: charge the page as compound or small page
  *
  * Finalize a charge transaction started by mem_cgroup_try_charge(),
  * after page->mapping has been set up.  This must happen atomically
@@ -5382,6 +5484,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
  * mem_cgroup_cancel_charge - cancel a page charge
  * @page: page to charge
  * @memcg: memcg to charge the page to
+ * @compound: charge the page as compound or small page
  *
  * Cancel a charge transaction started by mem_cgroup_try_charge().
  */
@@ -5405,15 +5508,18 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
 
 static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 			   unsigned long nr_anon, unsigned long nr_file,
-			   unsigned long nr_huge, struct page *dummy_page)
+			   unsigned long nr_huge, unsigned long nr_kmem,
+			   struct page *dummy_page)
 {
-	unsigned long nr_pages = nr_anon + nr_file;
+	unsigned long nr_pages = nr_anon + nr_file + nr_kmem;
 	unsigned long flags;
 
 	if (!mem_cgroup_is_root(memcg)) {
 		page_counter_uncharge(&memcg->memory, nr_pages);
 		if (do_memsw_account())
 			page_counter_uncharge(&memcg->memsw, nr_pages);
+		if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && nr_kmem)
+			page_counter_uncharge(&memcg->kmem, nr_kmem);
 		memcg_oom_recover(memcg);
 	}
 
@@ -5436,6 +5542,7 @@ static void uncharge_list(struct list_head *page_list)
 	unsigned long nr_anon = 0;
 	unsigned long nr_file = 0;
 	unsigned long nr_huge = 0;
+	unsigned long nr_kmem = 0;
 	unsigned long pgpgout = 0;
 	struct list_head *next;
 	struct page *page;
@@ -5446,8 +5553,6 @@ static void uncharge_list(struct list_head *page_list)
 	 */
 	next = page_list->next;
 	do {
-		unsigned int nr_pages = 1;
-
 		page = list_entry(next, struct page, lru);
 		next = page->lru.next;
 
@@ -5466,31 +5571,34 @@ static void uncharge_list(struct list_head *page_list)
 		if (memcg != page->mem_cgroup) {
 			if (memcg) {
 				uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
-					       nr_huge, page);
-				pgpgout = nr_anon = nr_file = nr_huge = 0;
+					       nr_huge, nr_kmem, page);
+				pgpgout = nr_anon = nr_file =
+					nr_huge = nr_kmem = 0;
 			}
 			memcg = page->mem_cgroup;
 		}
 
-		if (PageTransHuge(page)) {
-			nr_pages <<= compound_order(page);
-			VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-			nr_huge += nr_pages;
-		}
+		if (!PageKmemcg(page)) {
+			unsigned int nr_pages = 1;
 
-		if (PageAnon(page))
-			nr_anon += nr_pages;
-		else
-			nr_file += nr_pages;
+			if (PageTransHuge(page)) {
+				nr_pages <<= compound_order(page);
+				nr_huge += nr_pages;
+			}
+			if (PageAnon(page))
+				nr_anon += nr_pages;
+			else
+				nr_file += nr_pages;
+			pgpgout++;
+		} else
+			nr_kmem += 1 << compound_order(page);
 
 		page->mem_cgroup = NULL;
-
-		pgpgout++;
 	} while (next != page_list);
 
 	if (memcg)
 		uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
-			       nr_huge, page);
+			       nr_huge, nr_kmem, page);
 }
 
 /**
@@ -5544,6 +5652,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 	struct mem_cgroup *memcg;
 	unsigned int nr_pages;
 	bool compound;
+	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
@@ -5574,10 +5683,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 
 	commit_charge(newpage, memcg, false);
 
-	local_irq_disable();
+	local_irq_save(flags);
 	mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
 	memcg_check_events(memcg, newpage);
-	local_irq_enable();
+	local_irq_restore(flags);
 }
 
 DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
@@ -5755,6 +5864,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	if (!memcg)
 		return;
 
+	mem_cgroup_id_get(memcg);
 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
 	VM_BUG_ON_PAGE(oldid, page);
 	mem_cgroup_swap_statistics(memcg, true);
@@ -5773,6 +5883,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	VM_BUG_ON(!irqs_disabled());
 	mem_cgroup_charge_statistics(memcg, page, false, -1);
 	memcg_check_events(memcg, page);
+
+	if (!mem_cgroup_is_root(memcg))
+		css_put(&memcg->css);
 }
 
 /*
@@ -5803,11 +5916,11 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
 	    !page_counter_try_charge(&memcg->swap, 1, &counter))
 		return -ENOMEM;
 
+	mem_cgroup_id_get(memcg);
 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
 	VM_BUG_ON_PAGE(oldid, page);
 	mem_cgroup_swap_statistics(memcg, true);
 
-	css_get(&memcg->css);
 	return 0;
 }
 
@@ -5836,7 +5949,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
 				page_counter_uncharge(&memcg->memsw, 1);
 		}
 		mem_cgroup_swap_statistics(memcg, false);
-		css_put(&memcg->css);
+		mem_cgroup_id_put(memcg);
 	}
 	rcu_read_unlock();
 }
diff --git a/mm/memory.c b/mm/memory.c
index 15322b73636b..4425b6059339 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -233,6 +233,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb->batch = NULL;
 #endif
+	tlb->page_size = 0;
 
 	__tlb_reset_range(tlb);
 }
@@ -292,23 +293,31 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
  *	handling the additional races in SMP caused by other CPUs caching valid
  *	mappings in their TLBs. Returns the number of free page slots left.
  *	When out of page slots we must call tlb_flush_mmu().
+ *returns true if the caller should flush.
  */
-int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
 {
 	struct mmu_gather_batch *batch;
 
 	VM_BUG_ON(!tlb->end);
 
+	if (!tlb->page_size)
+		tlb->page_size = page_size;
+	else {
+		if (page_size != tlb->page_size)
+			return true;
+	}
+
 	batch = tlb->active;
-	batch->pages[batch->nr++] = page;
 	if (batch->nr == batch->max) {
 		if (!tlb_next_batch(tlb))
-			return 0;
+			return true;
 		batch = tlb->active;
 	}
 	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
 
-	return batch->max - batch->nr;
+	batch->pages[batch->nr++] = page;
+	return false;
 }
 
 #endif /* HAVE_GENERIC_MMU_GATHER */
@@ -1109,6 +1118,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	pte_t *start_pte;
 	pte_t *pte;
 	swp_entry_t entry;
+	struct page *pending_page = NULL;
 
 again:
 	init_rss_vec(rss);
@@ -1132,7 +1142,7 @@ again:
 				 * unmap shared but keep private pages.
 				 */
 				if (details->check_mapping &&
-				    details->check_mapping != page->mapping)
+				    details->check_mapping != page_rmapping(page))
 					continue;
 			}
 			ptent = ptep_get_and_clear_full(mm, addr, pte,
@@ -1160,8 +1170,9 @@ again:
 			page_remove_rmap(page, false);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			if (unlikely(!__tlb_remove_page(tlb, page))) {
+			if (unlikely(__tlb_remove_page(tlb, page))) {
 				force_flush = 1;
+				pending_page = page;
 				addr += PAGE_SIZE;
 				break;
 			}
@@ -1202,7 +1213,11 @@ again:
 	if (force_flush) {
 		force_flush = 0;
 		tlb_flush_mmu_free(tlb);
-
+		if (pending_page) {
+			/* remove the page with new size */
+			__tlb_remove_pte_page(tlb, pending_page);
+			pending_page = NULL;
+		}
 		if (addr != end)
 			goto again;
 	}
@@ -1479,7 +1494,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
 	inc_mm_counter_fast(mm, mm_counter_file(page));
-	page_add_file_rmap(page);
+	page_add_file_rmap(page, false);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
 	retval = 0;
@@ -2055,13 +2070,11 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
  * case, all we need to do here is to mark the page as writable and update
  * any related book-keeping.
  */
-static inline int wp_page_reuse(struct mm_struct *mm,
-			struct vm_area_struct *vma, unsigned long address,
-			pte_t *page_table, spinlock_t *ptl, pte_t orig_pte,
-			struct page *page, int page_mkwrite,
-			int dirty_shared)
-	__releases(ptl)
+static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
+			struct page *page, int page_mkwrite, int dirty_shared)
+	__releases(fe->ptl)
 {
+	struct vm_area_struct *vma = fe->vma;
 	pte_t entry;
 	/*
 	 * Clear the pages cpupid information as the existing
@@ -2071,12 +2084,12 @@ static inline int wp_page_reuse(struct mm_struct *mm,
 	if (page)
 		page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
 
-	flush_cache_page(vma, address, pte_pfn(orig_pte));
+	flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
 	entry = pte_mkyoung(orig_pte);
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-	if (ptep_set_access_flags(vma, address, page_table, entry, 1))
-		update_mmu_cache(vma, address, page_table);
-	pte_unmap_unlock(page_table, ptl);
+	if (ptep_set_access_flags(vma, fe->address, fe->pte, entry, 1))
+		update_mmu_cache(vma, fe->address, fe->pte);
+	pte_unmap_unlock(fe->pte, fe->ptl);
 
 	if (dirty_shared) {
 		struct address_space *mapping;
@@ -2122,30 +2135,31 @@ static inline int wp_page_reuse(struct mm_struct *mm,
  *   held to the old page, as well as updating the rmap.
  * - In any case, unlock the PTL and drop the reference we took to the old page.
  */
-static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, pte_t *page_table, pmd_t *pmd,
-			pte_t orig_pte, struct page *old_page)
+static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
+		struct page *old_page)
 {
+	struct vm_area_struct *vma = fe->vma;
+	struct mm_struct *mm = vma->vm_mm;
 	struct page *new_page = NULL;
-	spinlock_t *ptl = NULL;
 	pte_t entry;
 	int page_copied = 0;
-	const unsigned long mmun_start = address & PAGE_MASK;	/* For mmu_notifiers */
-	const unsigned long mmun_end = mmun_start + PAGE_SIZE;	/* For mmu_notifiers */
+	const unsigned long mmun_start = fe->address & PAGE_MASK;
+	const unsigned long mmun_end = mmun_start + PAGE_SIZE;
 	struct mem_cgroup *memcg;
 
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
 
 	if (is_zero_pfn(pte_pfn(orig_pte))) {
-		new_page = alloc_zeroed_user_highpage_movable(vma, address);
+		new_page = alloc_zeroed_user_highpage_movable(vma, fe->address);
 		if (!new_page)
 			goto oom;
 	} else {
-		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
+				fe->address);
 		if (!new_page)
 			goto oom;
-		cow_user_page(new_page, old_page, address, vma);
+		cow_user_page(new_page, old_page, fe->address, vma);
 	}
 
 	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
@@ -2158,8 +2172,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
 	/*
 	 * Re-check the pte - we dropped the lock
 	 */
-	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (likely(pte_same(*page_table, orig_pte))) {
+	fe->pte = pte_offset_map_lock(mm, fe->pmd, fe->address, &fe->ptl);
+	if (likely(pte_same(*fe->pte, orig_pte))) {
 		if (old_page) {
 			if (!PageAnon(old_page)) {
 				dec_mm_counter_fast(mm,
@@ -2169,7 +2183,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
 		} else {
 			inc_mm_counter_fast(mm, MM_ANONPAGES);
 		}
-		flush_cache_page(vma, address, pte_pfn(orig_pte));
+		flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		/*
@@ -2178,8 +2192,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * seen in the presence of one thread doing SMC and another
 		 * thread doing COW.
 		 */
-		ptep_clear_flush_notify(vma, address, page_table);
-		page_add_new_anon_rmap(new_page, vma, address, false);
+		ptep_clear_flush_notify(vma, fe->address, fe->pte);
+		page_add_new_anon_rmap(new_page, vma, fe->address, false);
 		mem_cgroup_commit_charge(new_page, memcg, false, false);
 		lru_cache_add_active_or_unevictable(new_page, vma);
 		/*
@@ -2187,8 +2201,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * mmu page tables (such as kvm shadow page tables), we want the
 		 * new page to be mapped directly into the secondary page table.
 		 */
-		set_pte_at_notify(mm, address, page_table, entry);
-		update_mmu_cache(vma, address, page_table);
+		set_pte_at_notify(mm, fe->address, fe->pte, entry);
+		update_mmu_cache(vma, fe->address, fe->pte);
 		if (old_page) {
 			/*
 			 * Only after switching the pte to the new page may
@@ -2225,7 +2239,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (new_page)
 		put_page(new_page);
 
-	pte_unmap_unlock(page_table, ptl);
+	pte_unmap_unlock(fe->pte, fe->ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	if (old_page) {
 		/*
@@ -2253,44 +2267,43 @@ oom:
  * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
  * mapping
  */
-static int wp_pfn_shared(struct mm_struct *mm,
-			struct vm_area_struct *vma, unsigned long address,
-			pte_t *page_table, spinlock_t *ptl, pte_t orig_pte,
-			pmd_t *pmd)
+static int wp_pfn_shared(struct fault_env *fe,  pte_t orig_pte)
 {
+	struct vm_area_struct *vma = fe->vma;
+
 	if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
 		struct vm_fault vmf = {
 			.page = NULL,
-			.pgoff = linear_page_index(vma, address),
-			.virtual_address = (void __user *)(address & PAGE_MASK),
+			.pgoff = linear_page_index(vma, fe->address),
+			.virtual_address =
+				(void __user *)(fe->address & PAGE_MASK),
 			.flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
 		};
 		int ret;
 
-		pte_unmap_unlock(page_table, ptl);
+		pte_unmap_unlock(fe->pte, fe->ptl);
 		ret = vma->vm_ops->pfn_mkwrite(vma, &vmf);
 		if (ret & VM_FAULT_ERROR)
 			return ret;
-		page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+		fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
+				&fe->ptl);
 		/*
 		 * We might have raced with another page fault while we
 		 * released the pte_offset_map_lock.
 		 */
-		if (!pte_same(*page_table, orig_pte)) {
-			pte_unmap_unlock(page_table, ptl);
+		if (!pte_same(*fe->pte, orig_pte)) {
+			pte_unmap_unlock(fe->pte, fe->ptl);
 			return 0;
 		}
 	}
-	return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte,
-			     NULL, 0, 0);
+	return wp_page_reuse(fe, orig_pte, NULL, 0, 0);
 }
 
-static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
-			  unsigned long address, pte_t *page_table,
-			  pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte,
-			  struct page *old_page)
-	__releases(ptl)
+static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
+		struct page *old_page)
+	__releases(fe->ptl)
 {
+	struct vm_area_struct *vma = fe->vma;
 	int page_mkwrite = 0;
 
 	get_page(old_page);
@@ -2298,8 +2311,8 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
 		int tmp;
 
-		pte_unmap_unlock(page_table, ptl);
-		tmp = do_page_mkwrite(vma, old_page, address);
+		pte_unmap_unlock(fe->pte, fe->ptl);
+		tmp = do_page_mkwrite(vma, old_page, fe->address);
 		if (unlikely(!tmp || (tmp &
 				      (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
 			put_page(old_page);
@@ -2311,19 +2324,18 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * they did, we just return, as we can count on the
 		 * MMU to tell us if they didn't also make it writable.
 		 */
-		page_table = pte_offset_map_lock(mm, pmd, address,
-						 &ptl);
-		if (!pte_same(*page_table, orig_pte)) {
+		fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
+						 &fe->ptl);
+		if (!pte_same(*fe->pte, orig_pte)) {
 			unlock_page(old_page);
-			pte_unmap_unlock(page_table, ptl);
+			pte_unmap_unlock(fe->pte, fe->ptl);
 			put_page(old_page);
 			return 0;
 		}
 		page_mkwrite = 1;
 	}
 
-	return wp_page_reuse(mm, vma, address, page_table, ptl,
-			     orig_pte, old_page, page_mkwrite, 1);
+	return wp_page_reuse(fe, orig_pte, old_page, page_mkwrite, 1);
 }
 
 /*
@@ -2344,14 +2356,13 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
  * but allow concurrent faults), with pte both mapped and locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, pte_t *page_table, pmd_t *pmd,
-		spinlock_t *ptl, pte_t orig_pte)
-	__releases(ptl)
+static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
+	__releases(fe->ptl)
 {
+	struct vm_area_struct *vma = fe->vma;
 	struct page *old_page;
 
-	old_page = vm_normal_page(vma, address, orig_pte);
+	old_page = vm_normal_page(vma, fe->address, orig_pte);
 	if (!old_page) {
 		/*
 		 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
@@ -2362,12 +2373,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 */
 		if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 				     (VM_WRITE|VM_SHARED))
-			return wp_pfn_shared(mm, vma, address, page_table, ptl,
-					     orig_pte, pmd);
+			return wp_pfn_shared(fe, orig_pte);
 
-		pte_unmap_unlock(page_table, ptl);
-		return wp_page_copy(mm, vma, address, page_table, pmd,
-				    orig_pte, old_page);
+		pte_unmap_unlock(fe->pte, fe->ptl);
+		return wp_page_copy(fe, orig_pte, old_page);
 	}
 
 	/*
@@ -2378,13 +2387,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		int total_mapcount;
 		if (!trylock_page(old_page)) {
 			get_page(old_page);
-			pte_unmap_unlock(page_table, ptl);
+			pte_unmap_unlock(fe->pte, fe->ptl);
 			lock_page(old_page);
-			page_table = pte_offset_map_lock(mm, pmd, address,
-							 &ptl);
-			if (!pte_same(*page_table, orig_pte)) {
+			fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd,
+					fe->address, &fe->ptl);
+			if (!pte_same(*fe->pte, orig_pte)) {
 				unlock_page(old_page);
-				pte_unmap_unlock(page_table, ptl);
+				pte_unmap_unlock(fe->pte, fe->ptl);
 				put_page(old_page);
 				return 0;
 			}
@@ -2399,18 +2408,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 				 * Protected against the rmap code by
 				 * the page lock.
 				 */
-				page_move_anon_rmap(compound_head(old_page),
-						    vma, address);
+				page_move_anon_rmap(old_page, vma);
 			}
 			unlock_page(old_page);
-			return wp_page_reuse(mm, vma, address, page_table, ptl,
-					     orig_pte, old_page, 0, 0);
+			return wp_page_reuse(fe, orig_pte, old_page, 0, 0);
 		}
 		unlock_page(old_page);
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {
-		return wp_page_shared(mm, vma, address, page_table, pmd,
-				      ptl, orig_pte, old_page);
+		return wp_page_shared(fe, orig_pte, old_page);
 	}
 
 	/*
@@ -2418,9 +2424,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	get_page(old_page);
 
-	pte_unmap_unlock(page_table, ptl);
-	return wp_page_copy(mm, vma, address, page_table, pmd,
-			    orig_pte, old_page);
+	pte_unmap_unlock(fe->pte, fe->ptl);
+	return wp_page_copy(fe, orig_pte, old_page);
 }
 
 static void unmap_mapping_range_vma(struct vm_area_struct *vma,
@@ -2508,11 +2513,9 @@ EXPORT_SYMBOL(unmap_mapping_range);
  * We return with the mmap_sem locked or unlocked in the same cases
  * as does filemap_fault().
  */
-static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, pte_t *page_table, pmd_t *pmd,
-		unsigned int flags, pte_t orig_pte)
+int do_swap_page(struct fault_env *fe, pte_t orig_pte)
 {
-	spinlock_t *ptl;
+	struct vm_area_struct *vma = fe->vma;
 	struct page *page, *swapcache;
 	struct mem_cgroup *memcg;
 	swp_entry_t entry;
@@ -2521,17 +2524,17 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	int exclusive = 0;
 	int ret = 0;
 
-	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+	if (!pte_unmap_same(vma->vm_mm, fe->pmd, fe->pte, orig_pte))
 		goto out;
 
 	entry = pte_to_swp_entry(orig_pte);
 	if (unlikely(non_swap_entry(entry))) {
 		if (is_migration_entry(entry)) {
-			migration_entry_wait(mm, pmd, address);
+			migration_entry_wait(vma->vm_mm, fe->pmd, fe->address);
 		} else if (is_hwpoison_entry(entry)) {
 			ret = VM_FAULT_HWPOISON;
 		} else {
-			print_bad_pte(vma, address, orig_pte, NULL);
+			print_bad_pte(vma, fe->address, orig_pte, NULL);
 			ret = VM_FAULT_SIGBUS;
 		}
 		goto out;
@@ -2540,14 +2543,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page = lookup_swap_cache(entry);
 	if (!page) {
 		page = swapin_readahead(entry,
-					GFP_HIGHUSER_MOVABLE, vma, address);
+					GFP_HIGHUSER_MOVABLE, vma, fe->address);
 		if (!page) {
 			/*
 			 * Back out if somebody else faulted in this pte
 			 * while we released the pte lock.
 			 */
-			page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-			if (likely(pte_same(*page_table, orig_pte)))
+			fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd,
+					fe->address, &fe->ptl);
+			if (likely(pte_same(*fe->pte, orig_pte)))
 				ret = VM_FAULT_OOM;
 			delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 			goto unlock;
@@ -2556,7 +2560,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
 		count_vm_event(PGMAJFAULT);
-		mem_cgroup_count_vm_event(mm, PGMAJFAULT);
+		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
 	} else if (PageHWPoison(page)) {
 		/*
 		 * hwpoisoned dirty swapcache pages are kept for killing
@@ -2569,7 +2573,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	swapcache = page;
-	locked = lock_page_or_retry(page, mm, flags);
+	locked = lock_page_or_retry(page, vma->vm_mm, fe->flags);
 
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 	if (!locked) {
@@ -2586,14 +2590,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
 		goto out_page;
 
-	page = ksm_might_need_to_copy(page, vma, address);
+	page = ksm_might_need_to_copy(page, vma, fe->address);
 	if (unlikely(!page)) {
 		ret = VM_FAULT_OOM;
 		page = swapcache;
 		goto out_page;
 	}
 
-	if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) {
+	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
+				&memcg, false)) {
 		ret = VM_FAULT_OOM;
 		goto out_page;
 	}
@@ -2601,8 +2606,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	/*
 	 * Back out if somebody else already faulted in this pte.
 	 */
-	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (unlikely(!pte_same(*page_table, orig_pte)))
+	fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
+			&fe->ptl);
+	if (unlikely(!pte_same(*fe->pte, orig_pte)))
 		goto out_nomap;
 
 	if (unlikely(!PageUptodate(page))) {
@@ -2620,24 +2626,24 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * must be called after the swap_free(), or it will never succeed.
 	 */
 
-	inc_mm_counter_fast(mm, MM_ANONPAGES);
-	dec_mm_counter_fast(mm, MM_SWAPENTS);
+	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
+	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
-	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
+	if ((fe->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
-		flags &= ~FAULT_FLAG_WRITE;
+		fe->flags &= ~FAULT_FLAG_WRITE;
 		ret |= VM_FAULT_WRITE;
 		exclusive = RMAP_EXCLUSIVE;
 	}
 	flush_icache_page(vma, page);
 	if (pte_swp_soft_dirty(orig_pte))
 		pte = pte_mksoft_dirty(pte);
-	set_pte_at(mm, address, page_table, pte);
+	set_pte_at(vma->vm_mm, fe->address, fe->pte, pte);
 	if (page == swapcache) {
-		do_page_add_anon_rmap(page, vma, address, exclusive);
+		do_page_add_anon_rmap(page, vma, fe->address, exclusive);
 		mem_cgroup_commit_charge(page, memcg, true, false);
 	} else { /* ksm created a completely new copy */
-		page_add_new_anon_rmap(page, vma, address, false);
+		page_add_new_anon_rmap(page, vma, fe->address, false);
 		mem_cgroup_commit_charge(page, memcg, false, false);
 		lru_cache_add_active_or_unevictable(page, vma);
 	}
@@ -2660,22 +2666,22 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		put_page(swapcache);
 	}
 
-	if (flags & FAULT_FLAG_WRITE) {
-		ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
+	if (fe->flags & FAULT_FLAG_WRITE) {
+		ret |= do_wp_page(fe, pte);
 		if (ret & VM_FAULT_ERROR)
 			ret &= VM_FAULT_ERROR;
 		goto out;
 	}
 
 	/* No need to invalidate - it was non-present before */
-	update_mmu_cache(vma, address, page_table);
+	update_mmu_cache(vma, fe->address, fe->pte);
 unlock:
-	pte_unmap_unlock(page_table, ptl);
+	pte_unmap_unlock(fe->pte, fe->ptl);
 out:
 	return ret;
 out_nomap:
 	mem_cgroup_cancel_charge(page, memcg, false);
-	pte_unmap_unlock(page_table, ptl);
+	pte_unmap_unlock(fe->pte, fe->ptl);
 out_page:
 	unlock_page(page);
 out_release:
@@ -2726,37 +2732,51 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, pte_t *page_table, pmd_t *pmd,
-		unsigned int flags)
+static int do_anonymous_page(struct fault_env *fe)
 {
+	struct vm_area_struct *vma = fe->vma;
 	struct mem_cgroup *memcg;
 	struct page *page;
-	spinlock_t *ptl;
 	pte_t entry;
 
-	pte_unmap(page_table);
-
 	/* File mapping without ->vm_ops ? */
 	if (vma->vm_flags & VM_SHARED)
 		return VM_FAULT_SIGBUS;
 
 	/* Check if we need to add a guard page to the stack */
-	if (check_stack_guard_page(vma, address) < 0)
+	if (check_stack_guard_page(vma, fe->address) < 0)
 		return VM_FAULT_SIGSEGV;
 
+	/*
+	 * Use pte_alloc() instead of pte_alloc_map().  We can't run
+	 * pte_offset_map() on pmds where a huge pmd might be created
+	 * from a different thread.
+	 *
+	 * pte_alloc_map() is safe to use under down_write(mmap_sem) or when
+	 * parallel threads are excluded by other means.
+	 *
+	 * Here we only have down_read(mmap_sem).
+	 */
+	if (pte_alloc(vma->vm_mm, fe->pmd, fe->address))
+		return VM_FAULT_OOM;
+
+	/* See the comment in pte_alloc_one_map() */
+	if (unlikely(pmd_trans_unstable(fe->pmd)))
+		return 0;
+
 	/* Use the zero-page for reads */
-	if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
-		entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
+	if (!(fe->flags & FAULT_FLAG_WRITE) &&
+			!mm_forbids_zeropage(vma->vm_mm)) {
+		entry = pte_mkspecial(pfn_pte(my_zero_pfn(fe->address),
 						vma->vm_page_prot));
-		page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-		if (!pte_none(*page_table))
+		fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
+				&fe->ptl);
+		if (!pte_none(*fe->pte))
 			goto unlock;
 		/* Deliver the page fault to userland, check inside PT lock */
 		if (userfaultfd_missing(vma)) {
-			pte_unmap_unlock(page_table, ptl);
-			return handle_userfault(vma, address, flags,
-						VM_UFFD_MISSING);
+			pte_unmap_unlock(fe->pte, fe->ptl);
+			return handle_userfault(fe, VM_UFFD_MISSING);
 		}
 		goto setpte;
 	}
@@ -2764,11 +2784,11 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* Allocate our own private page. */
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
-	page = alloc_zeroed_user_highpage_movable(vma, address);
+	page = alloc_zeroed_user_highpage_movable(vma, fe->address);
 	if (!page)
 		goto oom;
 
-	if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false))
+	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
 		goto oom_free_page;
 
 	/*
@@ -2782,30 +2802,30 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vma->vm_flags & VM_WRITE)
 		entry = pte_mkwrite(pte_mkdirty(entry));
 
-	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table))
+	fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
+			&fe->ptl);
+	if (!pte_none(*fe->pte))
 		goto release;
 
 	/* Deliver the page fault to userland, check inside PT lock */
 	if (userfaultfd_missing(vma)) {
-		pte_unmap_unlock(page_table, ptl);
+		pte_unmap_unlock(fe->pte, fe->ptl);
 		mem_cgroup_cancel_charge(page, memcg, false);
 		put_page(page);
-		return handle_userfault(vma, address, flags,
-					VM_UFFD_MISSING);
+		return handle_userfault(fe, VM_UFFD_MISSING);
 	}
 
-	inc_mm_counter_fast(mm, MM_ANONPAGES);
-	page_add_new_anon_rmap(page, vma, address, false);
+	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
+	page_add_new_anon_rmap(page, vma, fe->address, false);
 	mem_cgroup_commit_charge(page, memcg, false, false);
 	lru_cache_add_active_or_unevictable(page, vma);
 setpte:
-	set_pte_at(mm, address, page_table, entry);
+	set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
 
 	/* No need to invalidate - it was non-present before */
-	update_mmu_cache(vma, address, page_table);
+	update_mmu_cache(vma, fe->address, fe->pte);
 unlock:
-	pte_unmap_unlock(page_table, ptl);
+	pte_unmap_unlock(fe->pte, fe->ptl);
 	return 0;
 release:
 	mem_cgroup_cancel_charge(page, memcg, false);
@@ -2822,17 +2842,16 @@ oom:
  * released depending on flags and vma->vm_ops->fault() return value.
  * See filemap_fault() and __lock_page_retry().
  */
-static int __do_fault(struct vm_area_struct *vma, unsigned long address,
-			pgoff_t pgoff, unsigned int flags,
-			struct page *cow_page, struct page **page,
-			void **entry)
+static int __do_fault(struct fault_env *fe, pgoff_t pgoff,
+		struct page *cow_page, struct page **page, void **entry)
 {
+	struct vm_area_struct *vma = fe->vma;
 	struct vm_fault vmf;
 	int ret;
 
-	vmf.virtual_address = (void __user *)(address & PAGE_MASK);
+	vmf.virtual_address = (void __user *)(fe->address & PAGE_MASK);
 	vmf.pgoff = pgoff;
-	vmf.flags = flags;
+	vmf.flags = fe->flags;
 	vmf.page = NULL;
 	vmf.gfp_mask = __get_fault_gfp_mask(vma);
 	vmf.cow_page = cow_page;
@@ -2861,55 +2880,172 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
 	return ret;
 }
 
+static int pte_alloc_one_map(struct fault_env *fe)
+{
+	struct vm_area_struct *vma = fe->vma;
+
+	if (!pmd_none(*fe->pmd))
+		goto map_pte;
+	if (fe->prealloc_pte) {
+		fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
+		if (unlikely(!pmd_none(*fe->pmd))) {
+			spin_unlock(fe->ptl);
+			goto map_pte;
+		}
+
+		atomic_long_inc(&vma->vm_mm->nr_ptes);
+		pmd_populate(vma->vm_mm, fe->pmd, fe->prealloc_pte);
+		spin_unlock(fe->ptl);
+		fe->prealloc_pte = 0;
+	} else if (unlikely(pte_alloc(vma->vm_mm, fe->pmd, fe->address))) {
+		return VM_FAULT_OOM;
+	}
+map_pte:
+	/*
+	 * If a huge pmd materialized under us just retry later.  Use
+	 * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+	 * didn't become pmd_trans_huge under us and then back to pmd_none, as
+	 * a result of MADV_DONTNEED running immediately after a huge pmd fault
+	 * in a different thread of this mm, in turn leading to a misleading
+	 * pmd_trans_huge() retval.  All we have to ensure is that it is a
+	 * regular pmd that we can walk with pte_offset_map() and we can do that
+	 * through an atomic read in C, which is what pmd_trans_unstable()
+	 * provides.
+	 */
+	if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd))
+		return VM_FAULT_NOPAGE;
+
+	fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
+			&fe->ptl);
+	return 0;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+
+#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
+static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
+		unsigned long haddr)
+{
+	if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
+			(vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
+		return false;
+	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+		return false;
+	return true;
+}
+
+static int do_set_pmd(struct fault_env *fe, struct page *page)
+{
+	struct vm_area_struct *vma = fe->vma;
+	bool write = fe->flags & FAULT_FLAG_WRITE;
+	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+	pmd_t entry;
+	int i, ret;
+
+	if (!transhuge_vma_suitable(vma, haddr))
+		return VM_FAULT_FALLBACK;
+
+	ret = VM_FAULT_FALLBACK;
+	page = compound_head(page);
+
+	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
+	if (unlikely(!pmd_none(*fe->pmd)))
+		goto out;
+
+	for (i = 0; i < HPAGE_PMD_NR; i++)
+		flush_icache_page(vma, page + i);
+
+	entry = mk_huge_pmd(page, vma->vm_page_prot);
+	if (write)
+		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+
+	add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
+	page_add_file_rmap(page, true);
+
+	set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
+
+	update_mmu_cache_pmd(vma, haddr, fe->pmd);
+
+	/* fault is handled */
+	ret = 0;
+	count_vm_event(THP_FILE_MAPPED);
+out:
+	spin_unlock(fe->ptl);
+	return ret;
+}
+#else
+static int do_set_pmd(struct fault_env *fe, struct page *page)
+{
+	BUILD_BUG();
+	return 0;
+}
+#endif
+
 /**
- * do_set_pte - setup new PTE entry for given page and add reverse page mapping.
+ * alloc_set_pte - setup new PTE entry for given page and add reverse page
+ * mapping. If needed, the fucntion allocates page table or use pre-allocated.
  *
- * @vma: virtual memory area
- * @address: user virtual address
+ * @fe: fault environment
+ * @memcg: memcg to charge page (only for private mappings)
  * @page: page to map
- * @pte: pointer to target page table entry
- * @write: true, if new entry is writable
- * @anon: true, if it's anonymous page
  *
- * Caller must hold page table lock relevant for @pte.
+ * Caller must take care of unlocking fe->ptl, if fe->pte is non-NULL on return.
  *
  * Target users are page handler itself and implementations of
  * vm_ops->map_pages.
  */
-void do_set_pte(struct vm_area_struct *vma, unsigned long address,
-		struct page *page, pte_t *pte, bool write, bool anon, bool old)
+int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
+		struct page *page)
 {
+	struct vm_area_struct *vma = fe->vma;
+	bool write = fe->flags & FAULT_FLAG_WRITE;
 	pte_t entry;
+	int ret;
+
+	if (pmd_none(*fe->pmd) && PageTransCompound(page) &&
+			IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
+		/* THP on COW? */
+		VM_BUG_ON_PAGE(memcg, page);
+
+		ret = do_set_pmd(fe, page);
+		if (ret != VM_FAULT_FALLBACK)
+			return ret;
+	}
+
+	if (!fe->pte) {
+		ret = pte_alloc_one_map(fe);
+		if (ret)
+			return ret;
+	}
+
+	/* Re-check under ptl */
+	if (unlikely(!pte_none(*fe->pte)))
+		return VM_FAULT_NOPAGE;
 
 	flush_icache_page(vma, page);
 	entry = mk_pte(page, vma->vm_page_prot);
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-	if (old)
-		entry = pte_mkold(entry);
-	if (anon) {
+	/* copy-on-write page */
+	if (write && !(vma->vm_flags & VM_SHARED)) {
 		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-		page_add_new_anon_rmap(page, vma, address, false);
+		page_add_new_anon_rmap(page, vma, fe->address, false);
+		mem_cgroup_commit_charge(page, memcg, false, false);
+		lru_cache_add_active_or_unevictable(page, vma);
 	} else {
 		inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
-		page_add_file_rmap(page);
+		page_add_file_rmap(page, false);
 	}
-	set_pte_at(vma->vm_mm, address, pte, entry);
+	set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
 
 	/* no need to invalidate: a not-present page won't be cached */
-	update_mmu_cache(vma, address, pte);
+	update_mmu_cache(vma, fe->address, fe->pte);
+
+	return 0;
 }
 
-/*
- * If architecture emulates "accessed" or "young" bit without HW support,
- * there is no much gain with fault_around.
- */
 static unsigned long fault_around_bytes __read_mostly =
-#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-	PAGE_SIZE;
-#else
 	rounddown_pow_of_two(65536);
-#endif
 
 #ifdef CONFIG_DEBUG_FS
 static int fault_around_bytes_get(void *data, u64 *val)
@@ -2972,57 +3108,66 @@ late_initcall(fault_around_debugfs);
  * fault_around_pages() value (and therefore to page order).  This way it's
  * easier to guarantee that we don't cross page table boundaries.
  */
-static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
-		pte_t *pte, pgoff_t pgoff, unsigned int flags)
+static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
 {
-	unsigned long start_addr, nr_pages, mask;
-	pgoff_t max_pgoff;
-	struct vm_fault vmf;
-	int off;
+	unsigned long address = fe->address, nr_pages, mask;
+	pgoff_t end_pgoff;
+	int off, ret = 0;
 
 	nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
 	mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
 
-	start_addr = max(address & mask, vma->vm_start);
-	off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
-	pte -= off;
-	pgoff -= off;
+	fe->address = max(address & mask, fe->vma->vm_start);
+	off = ((address - fe->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+	start_pgoff -= off;
 
 	/*
-	 *  max_pgoff is either end of page table or end of vma
-	 *  or fault_around_pages() from pgoff, depending what is nearest.
+	 *  end_pgoff is either end of page table or end of vma
+	 *  or fault_around_pages() from start_pgoff, depending what is nearest.
 	 */
-	max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
+	end_pgoff = start_pgoff -
+		((fe->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
 		PTRS_PER_PTE - 1;
-	max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
-			pgoff + nr_pages - 1);
+	end_pgoff = min3(end_pgoff, vma_pages(fe->vma) + fe->vma->vm_pgoff - 1,
+			start_pgoff + nr_pages - 1);
 
-	/* Check if it makes any sense to call ->map_pages */
-	while (!pte_none(*pte)) {
-		if (++pgoff > max_pgoff)
-			return;
-		start_addr += PAGE_SIZE;
-		if (start_addr >= vma->vm_end)
-			return;
-		pte++;
+	if (pmd_none(*fe->pmd)) {
+		fe->prealloc_pte = pte_alloc_one(fe->vma->vm_mm, fe->address);
+		smp_wmb(); /* See comment in __pte_alloc() */
 	}
 
-	vmf.virtual_address = (void __user *) start_addr;
-	vmf.pte = pte;
-	vmf.pgoff = pgoff;
-	vmf.max_pgoff = max_pgoff;
-	vmf.flags = flags;
-	vmf.gfp_mask = __get_fault_gfp_mask(vma);
-	vma->vm_ops->map_pages(vma, &vmf);
+	fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
+
+	/* preallocated pagetable is unused: free it */
+	if (fe->prealloc_pte) {
+		pte_free(fe->vma->vm_mm, fe->prealloc_pte);
+		fe->prealloc_pte = 0;
+	}
+	/* Huge page is mapped? Page fault is solved */
+	if (pmd_trans_huge(*fe->pmd)) {
+		ret = VM_FAULT_NOPAGE;
+		goto out;
+	}
+
+	/* ->map_pages() haven't done anything useful. Cold page cache? */
+	if (!fe->pte)
+		goto out;
+
+	/* check if the page fault is solved */
+	fe->pte -= (fe->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT);
+	if (!pte_none(*fe->pte))
+		ret = VM_FAULT_NOPAGE;
+	pte_unmap_unlock(fe->pte, fe->ptl);
+out:
+	fe->address = address;
+	fe->pte = NULL;
+	return ret;
 }
 
-static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, pmd_t *pmd,
-		pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
+static int do_read_fault(struct fault_env *fe, pgoff_t pgoff)
 {
+	struct vm_area_struct *vma = fe->vma;
 	struct page *fault_page;
-	spinlock_t *ptl;
-	pte_t *pte;
 	int ret = 0;
 
 	/*
@@ -3031,96 +3176,64 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * something).
 	 */
 	if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
-		pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-		if (!pte_same(*pte, orig_pte))
-			goto unlock_out;
-		do_fault_around(vma, address, pte, pgoff, flags);
-		/* Check if the fault is handled by faultaround */
-		if (!pte_same(*pte, orig_pte)) {
-			/*
-			 * Faultaround produce old pte, but the pte we've
-			 * handler fault for should be young.
-			 */
-			pte_t entry = pte_mkyoung(*pte);
-			if (ptep_set_access_flags(vma, address, pte, entry, 0))
-				update_mmu_cache(vma, address, pte);
-			goto unlock_out;
-		}
-		pte_unmap_unlock(pte, ptl);
+		ret = do_fault_around(fe, pgoff);
+		if (ret)
+			return ret;
 	}
 
-	ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
+	ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		return ret;
 
-	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (unlikely(!pte_same(*pte, orig_pte))) {
-		pte_unmap_unlock(pte, ptl);
-		unlock_page(fault_page);
-		put_page(fault_page);
-		return ret;
-	}
-	do_set_pte(vma, address, fault_page, pte, false, false, false);
+	ret |= alloc_set_pte(fe, NULL, fault_page);
+	if (fe->pte)
+		pte_unmap_unlock(fe->pte, fe->ptl);
 	unlock_page(fault_page);
-unlock_out:
-	pte_unmap_unlock(pte, ptl);
+	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
+		put_page(fault_page);
 	return ret;
 }
 
-static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, pmd_t *pmd,
-		pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
+static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff)
 {
+	struct vm_area_struct *vma = fe->vma;
 	struct page *fault_page, *new_page;
 	void *fault_entry;
 	struct mem_cgroup *memcg;
-	spinlock_t *ptl;
-	pte_t *pte;
 	int ret;
 
 	if (unlikely(anon_vma_prepare(vma)))
 		return VM_FAULT_OOM;
 
-	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, fe->address);
 	if (!new_page)
 		return VM_FAULT_OOM;
 
-	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) {
+	if (mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+				&memcg, false)) {
 		put_page(new_page);
 		return VM_FAULT_OOM;
 	}
 
-	ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page,
-			 &fault_entry);
+	ret = __do_fault(fe, pgoff, new_page, &fault_page, &fault_entry);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		goto uncharge_out;
 
 	if (!(ret & VM_FAULT_DAX_LOCKED))
-		copy_user_highpage(new_page, fault_page, address, vma);
+		copy_user_highpage(new_page, fault_page, fe->address, vma);
 	__SetPageUptodate(new_page);
 
-	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (unlikely(!pte_same(*pte, orig_pte))) {
-		pte_unmap_unlock(pte, ptl);
-		if (!(ret & VM_FAULT_DAX_LOCKED)) {
-			unlock_page(fault_page);
-			put_page(fault_page);
-		} else {
-			dax_unlock_mapping_entry(vma->vm_file->f_mapping,
-						 pgoff);
-		}
-		goto uncharge_out;
-	}
-	do_set_pte(vma, address, new_page, pte, true, true, false);
-	mem_cgroup_commit_charge(new_page, memcg, false, false);
-	lru_cache_add_active_or_unevictable(new_page, vma);
-	pte_unmap_unlock(pte, ptl);
+	ret |= alloc_set_pte(fe, memcg, new_page);
+	if (fe->pte)
+		pte_unmap_unlock(fe->pte, fe->ptl);
 	if (!(ret & VM_FAULT_DAX_LOCKED)) {
 		unlock_page(fault_page);
 		put_page(fault_page);
 	} else {
 		dax_unlock_mapping_entry(vma->vm_file->f_mapping, pgoff);
 	}
+	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
+		goto uncharge_out;
 	return ret;
 uncharge_out:
 	mem_cgroup_cancel_charge(new_page, memcg, false);
@@ -3128,18 +3241,15 @@ uncharge_out:
 	return ret;
 }
 
-static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, pmd_t *pmd,
-		pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
+static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff)
 {
+	struct vm_area_struct *vma = fe->vma;
 	struct page *fault_page;
 	struct address_space *mapping;
-	spinlock_t *ptl;
-	pte_t *pte;
 	int dirtied = 0;
 	int ret, tmp;
 
-	ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
+	ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		return ret;
 
@@ -3149,7 +3259,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	if (vma->vm_ops->page_mkwrite) {
 		unlock_page(fault_page);
-		tmp = do_page_mkwrite(vma, fault_page, address);
+		tmp = do_page_mkwrite(vma, fault_page, fe->address);
 		if (unlikely(!tmp ||
 				(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
 			put_page(fault_page);
@@ -3157,15 +3267,15 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 	}
 
-	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (unlikely(!pte_same(*pte, orig_pte))) {
-		pte_unmap_unlock(pte, ptl);
+	ret |= alloc_set_pte(fe, NULL, fault_page);
+	if (fe->pte)
+		pte_unmap_unlock(fe->pte, fe->ptl);
+	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+					VM_FAULT_RETRY))) {
 		unlock_page(fault_page);
 		put_page(fault_page);
 		return ret;
 	}
-	do_set_pte(vma, address, fault_page, pte, true, false, false);
-	pte_unmap_unlock(pte, ptl);
 
 	if (set_page_dirty(fault_page))
 		dirtied = 1;
@@ -3197,23 +3307,19 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, pte_t *page_table, pmd_t *pmd,
-		unsigned int flags, pte_t orig_pte)
+static int do_fault(struct fault_env *fe)
 {
-	pgoff_t pgoff = linear_page_index(vma, address);
+	struct vm_area_struct *vma = fe->vma;
+	pgoff_t pgoff = linear_page_index(vma, fe->address);
 
-	pte_unmap(page_table);
 	/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
 	if (!vma->vm_ops->fault)
 		return VM_FAULT_SIGBUS;
-	if (!(flags & FAULT_FLAG_WRITE))
-		return do_read_fault(mm, vma, address, pmd, pgoff, flags,
-				orig_pte);
+	if (!(fe->flags & FAULT_FLAG_WRITE))
+		return do_read_fault(fe, pgoff);
 	if (!(vma->vm_flags & VM_SHARED))
-		return do_cow_fault(mm, vma, address, pmd, pgoff, flags,
-				orig_pte);
-	return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
+		return do_cow_fault(fe, pgoff);
+	return do_shared_fault(fe, pgoff);
 }
 
 static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
@@ -3231,11 +3337,10 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
 	return mpol_misplaced(page, vma, addr);
 }
 
-static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
+static int do_numa_page(struct fault_env *fe, pte_t pte)
 {
+	struct vm_area_struct *vma = fe->vma;
 	struct page *page = NULL;
-	spinlock_t *ptl;
 	int page_nid = -1;
 	int last_cpupid;
 	int target_nid;
@@ -3255,10 +3360,10 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	* page table entry is not accessible, so there would be no
 	* concurrent hardware modifications to the PTE.
 	*/
-	ptl = pte_lockptr(mm, pmd);
-	spin_lock(ptl);
-	if (unlikely(!pte_same(*ptep, pte))) {
-		pte_unmap_unlock(ptep, ptl);
+	fe->ptl = pte_lockptr(vma->vm_mm, fe->pmd);
+	spin_lock(fe->ptl);
+	if (unlikely(!pte_same(*fe->pte, pte))) {
+		pte_unmap_unlock(fe->pte, fe->ptl);
 		goto out;
 	}
 
@@ -3267,18 +3372,18 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	pte = pte_mkyoung(pte);
 	if (was_writable)
 		pte = pte_mkwrite(pte);
-	set_pte_at(mm, addr, ptep, pte);
-	update_mmu_cache(vma, addr, ptep);
+	set_pte_at(vma->vm_mm, fe->address, fe->pte, pte);
+	update_mmu_cache(vma, fe->address, fe->pte);
 
-	page = vm_normal_page(vma, addr, pte);
+	page = vm_normal_page(vma, fe->address, pte);
 	if (!page) {
-		pte_unmap_unlock(ptep, ptl);
+		pte_unmap_unlock(fe->pte, fe->ptl);
 		return 0;
 	}
 
 	/* TODO: handle PTE-mapped THP */
 	if (PageCompound(page)) {
-		pte_unmap_unlock(ptep, ptl);
+		pte_unmap_unlock(fe->pte, fe->ptl);
 		return 0;
 	}
 
@@ -3302,8 +3407,9 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	last_cpupid = page_cpupid_last(page);
 	page_nid = page_to_nid(page);
-	target_nid = numa_migrate_prep(page, vma, addr, page_nid, &flags);
-	pte_unmap_unlock(ptep, ptl);
+	target_nid = numa_migrate_prep(page, vma, fe->address, page_nid,
+			&flags);
+	pte_unmap_unlock(fe->pte, fe->ptl);
 	if (target_nid == -1) {
 		put_page(page);
 		goto out;
@@ -3323,24 +3429,29 @@ out:
 	return 0;
 }
 
-static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, pmd_t *pmd, unsigned int flags)
+static int create_huge_pmd(struct fault_env *fe)
 {
+	struct vm_area_struct *vma = fe->vma;
 	if (vma_is_anonymous(vma))
-		return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags);
+		return do_huge_pmd_anonymous_page(fe);
 	if (vma->vm_ops->pmd_fault)
-		return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
+		return vma->vm_ops->pmd_fault(vma, fe->address, fe->pmd,
+				fe->flags);
 	return VM_FAULT_FALLBACK;
 }
 
-static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, pmd_t *pmd, pmd_t orig_pmd,
-			unsigned int flags)
+static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
 {
-	if (vma_is_anonymous(vma))
-		return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd);
-	if (vma->vm_ops->pmd_fault)
-		return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
+	if (vma_is_anonymous(fe->vma))
+		return do_huge_pmd_wp_page(fe, orig_pmd);
+	if (fe->vma->vm_ops->pmd_fault)
+		return fe->vma->vm_ops->pmd_fault(fe->vma, fe->address, fe->pmd,
+				fe->flags);
+
+	/* COW handled on pte level: split pmd */
+	VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma);
+	split_huge_pmd(fe->vma, fe->pmd, fe->address);
+
 	return VM_FAULT_FALLBACK;
 }
 
@@ -3353,59 +3464,79 @@ static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
  * with external mmu caches can use to update those (ie the Sparc or
  * PowerPC hashed page tables that act as extended TLBs).
  *
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
- * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with pte unmapped and unlocked.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes, but allow
+ * concurrent faults).
  *
- * The mmap_sem may have been released depending on flags and our
- * return value.  See filemap_fault() and __lock_page_or_retry().
+ * The mmap_sem may have been released depending on flags and our return value.
+ * See filemap_fault() and __lock_page_or_retry().
  */
-static int handle_pte_fault(struct mm_struct *mm,
-		     struct vm_area_struct *vma, unsigned long address,
-		     pte_t *pte, pmd_t *pmd, unsigned int flags)
+static int handle_pte_fault(struct fault_env *fe)
 {
 	pte_t entry;
-	spinlock_t *ptl;
 
-	/*
-	 * some architectures can have larger ptes than wordsize,
-	 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y,
-	 * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses.
-	 * The code below just needs a consistent view for the ifs and
-	 * we later double check anyway with the ptl lock held. So here
-	 * a barrier will do.
-	 */
-	entry = *pte;
-	barrier();
-	if (!pte_present(entry)) {
+	if (unlikely(pmd_none(*fe->pmd))) {
+		/*
+		 * Leave __pte_alloc() until later: because vm_ops->fault may
+		 * want to allocate huge page, and if we expose page table
+		 * for an instant, it will be difficult to retract from
+		 * concurrent faults and from rmap lookups.
+		 */
+		fe->pte = NULL;
+	} else {
+		/* See comment in pte_alloc_one_map() */
+		if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd))
+			return 0;
+		/*
+		 * A regular pmd is established and it can't morph into a huge
+		 * pmd from under us anymore at this point because we hold the
+		 * mmap_sem read mode and khugepaged takes it in write mode.
+		 * So now it's safe to run pte_offset_map().
+		 */
+		fe->pte = pte_offset_map(fe->pmd, fe->address);
+
+		entry = *fe->pte;
+
+		/*
+		 * some architectures can have larger ptes than wordsize,
+		 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and
+		 * CONFIG_32BIT=y, so READ_ONCE or ACCESS_ONCE cannot guarantee
+		 * atomic accesses.  The code below just needs a consistent
+		 * view for the ifs and we later double check anyway with the
+		 * ptl lock held. So here a barrier will do.
+		 */
+		barrier();
 		if (pte_none(entry)) {
-			if (vma_is_anonymous(vma))
-				return do_anonymous_page(mm, vma, address,
-							 pte, pmd, flags);
-			else
-				return do_fault(mm, vma, address, pte, pmd,
-						flags, entry);
+			pte_unmap(fe->pte);
+			fe->pte = NULL;
 		}
-		return do_swap_page(mm, vma, address,
-					pte, pmd, flags, entry);
 	}
 
+	if (!fe->pte) {
+		if (vma_is_anonymous(fe->vma))
+			return do_anonymous_page(fe);
+		else
+			return do_fault(fe);
+	}
+
+	if (!pte_present(entry))
+		return do_swap_page(fe, entry);
+
 	if (pte_protnone(entry))
-		return do_numa_page(mm, vma, address, entry, pte, pmd);
+		return do_numa_page(fe, entry);
 
-	ptl = pte_lockptr(mm, pmd);
-	spin_lock(ptl);
-	if (unlikely(!pte_same(*pte, entry)))
+	fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd);
+	spin_lock(fe->ptl);
+	if (unlikely(!pte_same(*fe->pte, entry)))
 		goto unlock;
-	if (flags & FAULT_FLAG_WRITE) {
+	if (fe->flags & FAULT_FLAG_WRITE) {
 		if (!pte_write(entry))
-			return do_wp_page(mm, vma, address,
-					pte, pmd, ptl, entry);
+			return do_wp_page(fe, entry);
 		entry = pte_mkdirty(entry);
 	}
 	entry = pte_mkyoung(entry);
-	if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {
-		update_mmu_cache(vma, address, pte);
+	if (ptep_set_access_flags(fe->vma, fe->address, fe->pte, entry,
+				fe->flags & FAULT_FLAG_WRITE)) {
+		update_mmu_cache(fe->vma, fe->address, fe->pte);
 	} else {
 		/*
 		 * This is needed only for protection faults but the arch code
@@ -3413,11 +3544,11 @@ static int handle_pte_fault(struct mm_struct *mm,
 		 * This still avoids useless tlb flushes for .text page faults
 		 * with threads.
 		 */
-		if (flags & FAULT_FLAG_WRITE)
-			flush_tlb_fix_spurious_fault(vma, address);
+		if (fe->flags & FAULT_FLAG_WRITE)
+			flush_tlb_fix_spurious_fault(fe->vma, fe->address);
 	}
 unlock:
-	pte_unmap_unlock(pte, ptl);
+	pte_unmap_unlock(fe->pte, fe->ptl);
 	return 0;
 }
 
@@ -3427,87 +3558,51 @@ unlock:
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-			     unsigned long address, unsigned int flags)
+static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
+		unsigned int flags)
 {
+	struct fault_env fe = {
+		.vma = vma,
+		.address = address,
+		.flags = flags,
+	};
+	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
 	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
-					    flags & FAULT_FLAG_INSTRUCTION,
-					    flags & FAULT_FLAG_REMOTE))
-		return VM_FAULT_SIGSEGV;
-
-	if (unlikely(is_vm_hugetlb_page(vma)))
-		return hugetlb_fault(mm, vma, address, flags);
 
 	pgd = pgd_offset(mm, address);
 	pud = pud_alloc(mm, pgd, address);
 	if (!pud)
 		return VM_FAULT_OOM;
-	pmd = pmd_alloc(mm, pud, address);
-	if (!pmd)
+	fe.pmd = pmd_alloc(mm, pud, address);
+	if (!fe.pmd)
 		return VM_FAULT_OOM;
-	if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
-		int ret = create_huge_pmd(mm, vma, address, pmd, flags);
+	if (pmd_none(*fe.pmd) && transparent_hugepage_enabled(vma)) {
+		int ret = create_huge_pmd(&fe);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
 	} else {
-		pmd_t orig_pmd = *pmd;
+		pmd_t orig_pmd = *fe.pmd;
 		int ret;
 
 		barrier();
 		if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
-			unsigned int dirty = flags & FAULT_FLAG_WRITE;
-
 			if (pmd_protnone(orig_pmd))
-				return do_huge_pmd_numa_page(mm, vma, address,
-							     orig_pmd, pmd);
+				return do_huge_pmd_numa_page(&fe, orig_pmd);
 
-			if (dirty && !pmd_write(orig_pmd)) {
-				ret = wp_huge_pmd(mm, vma, address, pmd,
-							orig_pmd, flags);
+			if ((fe.flags & FAULT_FLAG_WRITE) &&
+					!pmd_write(orig_pmd)) {
+				ret = wp_huge_pmd(&fe, orig_pmd);
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
 			} else {
-				huge_pmd_set_accessed(mm, vma, address, pmd,
-						      orig_pmd, dirty);
+				huge_pmd_set_accessed(&fe, orig_pmd);
 				return 0;
 			}
 		}
 	}
 
-	/*
-	 * Use pte_alloc() instead of pte_alloc_map, because we can't
-	 * run pte_offset_map on the pmd, if an huge pmd could
-	 * materialize from under us from a different thread.
-	 */
-	if (unlikely(pte_alloc(mm, pmd, address)))
-		return VM_FAULT_OOM;
-	/*
-	 * If a huge pmd materialized under us just retry later.  Use
-	 * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
-	 * didn't become pmd_trans_huge under us and then back to pmd_none, as
-	 * a result of MADV_DONTNEED running immediately after a huge pmd fault
-	 * in a different thread of this mm, in turn leading to a misleading
-	 * pmd_trans_huge() retval.  All we have to ensure is that it is a
-	 * regular pmd that we can walk with pte_offset_map() and we can do that
-	 * through an atomic read in C, which is what pmd_trans_unstable()
-	 * provides.
-	 */
-	if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
-		return 0;
-	/*
-	 * A regular pmd is established and it can't morph into a huge pmd
-	 * from under us anymore at this point because we hold the mmap_sem
-	 * read mode and khugepaged takes it in write mode. So now it's
-	 * safe to run pte_offset_map().
-	 */
-	pte = pte_offset_map(pmd, address);
-
-	return handle_pte_fault(mm, vma, address, pte, pmd, flags);
+	return handle_pte_fault(&fe);
 }
 
 /*
@@ -3516,15 +3611,15 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-		    unsigned long address, unsigned int flags)
+int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
+		unsigned int flags)
 {
 	int ret;
 
 	__set_current_state(TASK_RUNNING);
 
 	count_vm_event(PGFAULT);
-	mem_cgroup_count_vm_event(mm, PGFAULT);
+	mem_cgroup_count_vm_event(vma->vm_mm, PGFAULT);
 
 	/* do counter updates before entering really critical section. */
 	check_sync_rss_stat(current);
@@ -3536,7 +3631,15 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (flags & FAULT_FLAG_USER)
 		mem_cgroup_oom_enable();
 
-	ret = __handle_mm_fault(mm, vma, address, flags);
+	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+					    flags & FAULT_FLAG_INSTRUCTION,
+					    flags & FAULT_FLAG_REMOTE))
+		return VM_FAULT_SIGSEGV;
+
+	if (unlikely(is_vm_hugetlb_page(vma)))
+		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
+	else
+		ret = __handle_mm_fault(vma, address, flags);
 
 	if (flags & FAULT_FLAG_USER) {
 		mem_cgroup_oom_disable();
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e3cbdcaff2a5..82d0b98d27f8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -449,6 +449,25 @@ out_fail:
 	return -1;
 }
 
+static struct zone * __meminit move_pfn_range(int zone_shift,
+		unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct zone *zone = page_zone(pfn_to_page(start_pfn));
+	int ret = 0;
+
+	if (zone_shift < 0)
+		ret = move_pfn_range_left(zone + zone_shift, zone,
+					  start_pfn, end_pfn);
+	else if (zone_shift)
+		ret = move_pfn_range_right(zone, zone + zone_shift,
+					   start_pfn, end_pfn);
+
+	if (ret)
+		return NULL;
+
+	return zone + zone_shift;
+}
+
 static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
 				      unsigned long end_pfn)
 {
@@ -1028,6 +1047,37 @@ static void node_states_set_node(int node, struct memory_notify *arg)
 	node_set_state(node, N_MEMORY);
 }
 
+int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
+		   enum zone_type target)
+{
+	struct zone *zone = page_zone(pfn_to_page(pfn));
+	enum zone_type idx = zone_idx(zone);
+	int i;
+
+	if (idx < target) {
+		/* pages must be at end of current zone */
+		if (pfn + nr_pages != zone_end_pfn(zone))
+			return 0;
+
+		/* no zones in use between current zone and target */
+		for (i = idx + 1; i < target; i++)
+			if (zone_is_initialized(zone - idx + i))
+				return 0;
+	}
+
+	if (target < idx) {
+		/* pages must be at beginning of current zone */
+		if (pfn != zone->zone_start_pfn)
+			return 0;
+
+		/* no zones in use between current zone and target */
+		for (i = target + 1; i < idx; i++)
+			if (zone_is_initialized(zone - idx + i))
+				return 0;
+	}
+
+	return target - idx;
+}
 
 /* Must be protected by mem_hotplug_begin() */
 int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
@@ -1039,6 +1089,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	int nid;
 	int ret;
 	struct memory_notify arg;
+	int zone_shift = 0;
 
 	/*
 	 * This doesn't need a lock to do pfn_to_page().
@@ -1052,19 +1103,14 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	    !can_online_high_movable(zone))
 		return -EINVAL;
 
-	if (online_type == MMOP_ONLINE_KERNEL &&
-	    zone_idx(zone) == ZONE_MOVABLE) {
-		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
-			return -EINVAL;
-	}
-	if (online_type == MMOP_ONLINE_MOVABLE &&
-	    zone_idx(zone) == ZONE_MOVABLE - 1) {
-		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
-			return -EINVAL;
-	}
+	if (online_type == MMOP_ONLINE_KERNEL)
+		zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL);
+	else if (online_type == MMOP_ONLINE_MOVABLE)
+		zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE);
 
-	/* Previous code may changed the zone of the pfn range */
-	zone = page_zone(pfn_to_page(pfn));
+	zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
+	if (!zone)
+		return -EINVAL;
 
 	arg.start_pfn = pfn;
 	arg.nr_pages = nr_pages;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 297d6854f849..53e40d3f3933 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -512,6 +512,8 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 		}
 	}
 
+	if (pmd_trans_unstable(pmd))
+		return 0;
 retry:
 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -529,7 +531,7 @@ retry:
 		nid = page_to_nid(page);
 		if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
 			continue;
-		if (PageTransCompound(page) && PageAnon(page)) {
+		if (PageTransCompound(page)) {
 			get_page(page);
 			pte_unmap_unlock(pte, ptl);
 			lock_page(page);
diff --git a/mm/mempool.c b/mm/mempool.c
index 9e075f829d0d..8f65464da5de 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -104,20 +104,16 @@ static inline void poison_element(mempool_t *pool, void *element)
 
 static void kasan_poison_element(mempool_t *pool, void *element)
 {
-	if (pool->alloc == mempool_alloc_slab)
-		kasan_poison_slab_free(pool->pool_data, element);
-	if (pool->alloc == mempool_kmalloc)
-		kasan_kfree(element);
+	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+		kasan_poison_kfree(element);
 	if (pool->alloc == mempool_alloc_pages)
 		kasan_free_pages(element, (unsigned long)pool->pool_data);
 }
 
 static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
 {
-	if (pool->alloc == mempool_alloc_slab)
-		kasan_slab_alloc(pool->pool_data, element, flags);
-	if (pool->alloc == mempool_kmalloc)
-		kasan_krealloc(element, (size_t)pool->pool_data, flags);
+	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+		kasan_unpoison_slab(element);
 	if (pool->alloc == mempool_alloc_pages)
 		kasan_alloc_pages(element, (unsigned long)pool->pool_data);
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 9baf41c877ff..2232f6923cc7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -31,6 +31,7 @@
 #include <linux/vmalloc.h>
 #include <linux/security.h>
 #include <linux/backing-dev.h>
+#include <linux/compaction.h>
 #include <linux/syscalls.h>
 #include <linux/hugetlb.h>
 #include <linux/hugetlb_cgroup.h>
@@ -73,6 +74,81 @@ int migrate_prep_local(void)
 	return 0;
 }
 
+bool isolate_movable_page(struct page *page, isolate_mode_t mode)
+{
+	struct address_space *mapping;
+
+	/*
+	 * Avoid burning cycles with pages that are yet under __free_pages(),
+	 * or just got freed under us.
+	 *
+	 * In case we 'win' a race for a movable page being freed under us and
+	 * raise its refcount preventing __free_pages() from doing its job
+	 * the put_page() at the end of this block will take care of
+	 * release this page, thus avoiding a nasty leakage.
+	 */
+	if (unlikely(!get_page_unless_zero(page)))
+		goto out;
+
+	/*
+	 * Check PageMovable before holding a PG_lock because page's owner
+	 * assumes anybody doesn't touch PG_lock of newly allocated page
+	 * so unconditionally grapping the lock ruins page's owner side.
+	 */
+	if (unlikely(!__PageMovable(page)))
+		goto out_putpage;
+	/*
+	 * As movable pages are not isolated from LRU lists, concurrent
+	 * compaction threads can race against page migration functions
+	 * as well as race against the releasing a page.
+	 *
+	 * In order to avoid having an already isolated movable page
+	 * being (wrongly) re-isolated while it is under migration,
+	 * or to avoid attempting to isolate pages being released,
+	 * lets be sure we have the page lock
+	 * before proceeding with the movable page isolation steps.
+	 */
+	if (unlikely(!trylock_page(page)))
+		goto out_putpage;
+
+	if (!PageMovable(page) || PageIsolated(page))
+		goto out_no_isolated;
+
+	mapping = page_mapping(page);
+	VM_BUG_ON_PAGE(!mapping, page);
+
+	if (!mapping->a_ops->isolate_page(page, mode))
+		goto out_no_isolated;
+
+	/* Driver shouldn't use PG_isolated bit of page->flags */
+	WARN_ON_ONCE(PageIsolated(page));
+	__SetPageIsolated(page);
+	unlock_page(page);
+
+	return true;
+
+out_no_isolated:
+	unlock_page(page);
+out_putpage:
+	put_page(page);
+out:
+	return false;
+}
+
+/* It should be called on page which is PG_movable */
+void putback_movable_page(struct page *page)
+{
+	struct address_space *mapping;
+
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	VM_BUG_ON_PAGE(!PageMovable(page), page);
+	VM_BUG_ON_PAGE(!PageIsolated(page), page);
+
+	mapping = page_mapping(page);
+	mapping->a_ops->putback_page(page);
+	__ClearPageIsolated(page);
+}
+
 /*
  * Put previously isolated pages back onto the appropriate lists
  * from where they were once taken off for compaction/migration.
@@ -94,10 +170,23 @@ void putback_movable_pages(struct list_head *l)
 		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
 				page_is_file_cache(page));
-		if (unlikely(isolated_balloon_page(page)))
-			balloon_page_putback(page);
-		else
+		/*
+		 * We isolated non-lru movable page so here we can use
+		 * __PageMovable because LRU page's mapping cannot have
+		 * PAGE_MAPPING_MOVABLE.
+		 */
+		if (unlikely(__PageMovable(page))) {
+			VM_BUG_ON_PAGE(!PageIsolated(page), page);
+			lock_page(page);
+			if (PageMovable(page))
+				putback_movable_page(page);
+			else
+				__ClearPageIsolated(page);
+			unlock_page(page);
+			put_page(page);
+		} else {
 			putback_lru_page(page);
+		}
 	}
 }
 
@@ -170,7 +259,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 	} else if (PageAnon(new))
 		page_add_anon_rmap(new, vma, addr, false);
 	else
-		page_add_file_rmap(new);
+		page_add_file_rmap(new, false);
 
 	if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
 		mlock_vma_page(new);
@@ -431,6 +520,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 
 	return MIGRATEPAGE_SUCCESS;
 }
+EXPORT_SYMBOL(migrate_page_move_mapping);
 
 /*
  * The expected number of remaining references is the same as that
@@ -586,13 +676,14 @@ void migrate_page_copy(struct page *newpage, struct page *page)
 
 	mem_cgroup_migrate(page, newpage);
 }
+EXPORT_SYMBOL(migrate_page_copy);
 
 /************************************************************
  *                    Migration functions
  ***********************************************************/
 
 /*
- * Common logic to directly migrate a single page suitable for
+ * Common logic to directly migrate a single LRU page suitable for
  * pages that do not use PagePrivate/PagePrivate2.
  *
  * Pages are locked upon entry and exit.
@@ -755,33 +846,72 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 				enum migrate_mode mode)
 {
 	struct address_space *mapping;
-	int rc;
+	int rc = -EAGAIN;
+	bool is_lru = !__PageMovable(page);
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
 
 	mapping = page_mapping(page);
-	if (!mapping)
-		rc = migrate_page(mapping, newpage, page, mode);
-	else if (mapping->a_ops->migratepage)
+
+	if (likely(is_lru)) {
+		if (!mapping)
+			rc = migrate_page(mapping, newpage, page, mode);
+		else if (mapping->a_ops->migratepage)
+			/*
+			 * Most pages have a mapping and most filesystems
+			 * provide a migratepage callback. Anonymous pages
+			 * are part of swap space which also has its own
+			 * migratepage callback. This is the most common path
+			 * for page migration.
+			 */
+			rc = mapping->a_ops->migratepage(mapping, newpage,
+							page, mode);
+		else
+			rc = fallback_migrate_page(mapping, newpage,
+							page, mode);
+	} else {
 		/*
-		 * Most pages have a mapping and most filesystems provide a
-		 * migratepage callback. Anonymous pages are part of swap
-		 * space which also has its own migratepage callback. This
-		 * is the most common path for page migration.
+		 * In case of non-lru page, it could be released after
+		 * isolation step. In that case, we shouldn't try migration.
 		 */
-		rc = mapping->a_ops->migratepage(mapping, newpage, page, mode);
-	else
-		rc = fallback_migrate_page(mapping, newpage, page, mode);
+		VM_BUG_ON_PAGE(!PageIsolated(page), page);
+		if (!PageMovable(page)) {
+			rc = MIGRATEPAGE_SUCCESS;
+			__ClearPageIsolated(page);
+			goto out;
+		}
+
+		rc = mapping->a_ops->migratepage(mapping, newpage,
+						page, mode);
+		WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
+			!PageIsolated(page));
+	}
 
 	/*
 	 * When successful, old pagecache page->mapping must be cleared before
 	 * page is freed; but stats require that PageAnon be left as PageAnon.
 	 */
 	if (rc == MIGRATEPAGE_SUCCESS) {
-		if (!PageAnon(page))
+		if (__PageMovable(page)) {
+			VM_BUG_ON_PAGE(!PageIsolated(page), page);
+
+			/*
+			 * We clear PG_movable under page_lock so any compactor
+			 * cannot try to migrate this page.
+			 */
+			__ClearPageIsolated(page);
+		}
+
+		/*
+		 * Anonymous and movable page->mapping will be cleard by
+		 * free_pages_prepare so don't reset it here for keeping
+		 * the type to work PageAnon, for example.
+		 */
+		if (!PageMappingFlags(page))
 			page->mapping = NULL;
 	}
+out:
 	return rc;
 }
 
@@ -791,6 +921,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	int rc = -EAGAIN;
 	int page_was_mapped = 0;
 	struct anon_vma *anon_vma = NULL;
+	bool is_lru = !__PageMovable(page);
 
 	if (!trylock_page(page)) {
 		if (!force || mode == MIGRATE_ASYNC)
@@ -859,15 +990,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	if (unlikely(!trylock_page(newpage)))
 		goto out_unlock;
 
-	if (unlikely(isolated_balloon_page(page))) {
-		/*
-		 * A ballooned page does not need any special attention from
-		 * physical to virtual reverse mapping procedures.
-		 * Skip any attempt to unmap PTEs or to remap swap cache,
-		 * in order to avoid burning cycles at rmap level, and perform
-		 * the page migration right away (proteced by page lock).
-		 */
-		rc = balloon_page_migrate(newpage, page, mode);
+	if (unlikely(!is_lru)) {
+		rc = move_to_new_page(newpage, page, mode);
 		goto out_unlock_both;
 	}
 
@@ -913,6 +1037,19 @@ out_unlock:
 		put_anon_vma(anon_vma);
 	unlock_page(page);
 out:
+	/*
+	 * If migration is successful, decrease refcount of the newpage
+	 * which will not free the page because new page owner increased
+	 * refcounter. As well, if it is LRU page, add the page to LRU
+	 * list in here.
+	 */
+	if (rc == MIGRATEPAGE_SUCCESS) {
+		if (unlikely(__PageMovable(newpage)))
+			put_page(newpage);
+		else
+			putback_lru_page(newpage);
+	}
+
 	return rc;
 }
 
@@ -946,6 +1083,18 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 
 	if (page_count(page) == 1) {
 		/* page was freed from under us. So we are done. */
+		ClearPageActive(page);
+		ClearPageUnevictable(page);
+		if (unlikely(__PageMovable(page))) {
+			lock_page(page);
+			if (!PageMovable(page))
+				__ClearPageIsolated(page);
+			unlock_page(page);
+		}
+		if (put_new_page)
+			put_new_page(newpage, private);
+		else
+			put_page(newpage);
 		goto out;
 	}
 
@@ -958,10 +1107,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 	}
 
 	rc = __unmap_and_move(page, newpage, force, mode);
-	if (rc == MIGRATEPAGE_SUCCESS) {
-		put_new_page = NULL;
+	if (rc == MIGRATEPAGE_SUCCESS)
 		set_page_owner_migrate_reason(newpage, reason);
-	}
 
 out:
 	if (rc != -EAGAIN) {
@@ -974,33 +1121,45 @@ out:
 		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
 				page_is_file_cache(page));
-		/* Soft-offlined page shouldn't go through lru cache list */
-		if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
+	}
+
+	/*
+	 * If migration is successful, releases reference grabbed during
+	 * isolation. Otherwise, restore the page to right list unless
+	 * we want to retry.
+	 */
+	if (rc == MIGRATEPAGE_SUCCESS) {
+		put_page(page);
+		if (reason == MR_MEMORY_FAILURE) {
 			/*
-			 * With this release, we free successfully migrated
-			 * page and set PG_HWPoison on just freed page
-			 * intentionally. Although it's rather weird, it's how
-			 * HWPoison flag works at the moment.
+			 * Set PG_HWPoison on just freed page
+			 * intentionally. Although it's rather weird,
+			 * it's how HWPoison flag works at the moment.
 			 */
-			put_page(page);
 			if (!test_set_page_hwpoison(page))
 				num_poisoned_pages_inc();
-		} else
-			putback_lru_page(page);
-	}
+		}
+	} else {
+		if (rc != -EAGAIN) {
+			if (likely(!__PageMovable(page))) {
+				putback_lru_page(page);
+				goto put_new;
+			}
 
-	/*
-	 * If migration was not successful and there's a freeing callback, use
-	 * it.  Otherwise, putback_lru_page() will drop the reference grabbed
-	 * during isolation.
-	 */
-	if (put_new_page)
-		put_new_page(newpage, private);
-	else if (unlikely(__is_movable_balloon_page(newpage))) {
-		/* drop our reference, page already in the balloon */
-		put_page(newpage);
-	} else
-		putback_lru_page(newpage);
+			lock_page(page);
+			if (PageMovable(page))
+				putback_movable_page(page);
+			else
+				__ClearPageIsolated(page);
+			unlock_page(page);
+			put_page(page);
+		}
+put_new:
+		if (put_new_page)
+			put_new_page(newpage, private);
+		else
+			put_page(newpage);
+	}
 
 	if (result) {
 		if (rc)
@@ -1827,8 +1986,7 @@ fail_putback:
 	}
 
 	orig_entry = *pmd;
-	entry = mk_pmd(new_page, vma->vm_page_prot);
-	entry = pmd_mkhuge(entry);
+	entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 
 	/*
diff --git a/mm/mmap.c b/mm/mmap.c
index de2c1769cc68..86b18f334f4f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -25,6 +25,7 @@
 #include <linux/personality.h>
 #include <linux/security.h>
 #include <linux/hugetlb.h>
+#include <linux/shmem_fs.h>
 #include <linux/profile.h>
 #include <linux/export.h>
 #include <linux/mount.h>
@@ -675,6 +676,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 		}
 	}
 
+	vma_adjust_trans_huge(vma, start, end, adjust_next);
+
 	if (file) {
 		mapping = file->f_mapping;
 		root = &mapping->i_mmap;
@@ -695,8 +698,6 @@ again:			remove_next = 1 + (end > next->vm_end);
 		}
 	}
 
-	vma_adjust_trans_huge(vma, start, end, adjust_next);
-
 	anon_vma = vma->anon_vma;
 	if (!anon_vma && adjust_next)
 		anon_vma = next->anon_vma;
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		return -ENOMEM;
 
 	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op->get_unmapped_area)
-		get_area = file->f_op->get_unmapped_area;
+	if (file) {
+		if (file->f_op->get_unmapped_area)
+			get_area = file->f_op->get_unmapped_area;
+	} else if (flags & MAP_SHARED) {
+		/*
+		 * mmap_region() will call shmem_zero_setup() to create a file,
+		 * so use shmem's get_unmapped_area in case it can be huge.
+		 * do_mmap_pgoff() will clear pgoff, so match alignment.
+		 */
+		pgoff = 0;
+		get_area = shmem_get_unmapped_area;
+	}
+
 	addr = get_area(file, addr, len, pgoff, flags);
 	if (IS_ERR_VALUE(addr))
 		return addr;
@@ -2591,6 +2603,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 		/* drop PG_Mlocked flag for over-mapped range */
 		for (tmp = vma; tmp->vm_start >= start + size;
 				tmp = tmp->vm_next) {
+			/*
+			 * Split pmd and munlock page on the border
+			 * of the range.
+			 */
+			vma_adjust_trans_huge(tmp, start, start + size, 0);
+
 			munlock_vma_pages_range(tmp,
 					max(tmp->vm_start, start),
 					min(tmp->vm_end, start + size));
@@ -2943,9 +2961,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 5019a1ef2848..a4830f0325fe 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -163,7 +163,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
 			if (next - addr != HPAGE_PMD_SIZE) {
 				split_huge_pmd(vma, pmd, addr);
-				if (pmd_none(*pmd))
+				if (pmd_trans_unstable(pmd))
 					continue;
 			} else {
 				int nr_ptes = change_huge_pmd(vma, pmd, addr,
diff --git a/mm/mremap.c b/mm/mremap.c
index 1f157adfdaf9..da22ad2a5678 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -210,9 +210,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 				}
 			}
 			split_huge_pmd(vma, old_pmd, old_addr);
-			if (pmd_none(*old_pmd))
+			if (pmd_trans_unstable(old_pmd))
 				continue;
-			VM_BUG_ON(pmd_trans_huge(*old_pmd));
 		}
 		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
 			break;
diff --git a/mm/nommu.c b/mm/nommu.c
index c2e58880207f..95daf81a4855 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1809,7 +1809,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 EXPORT_SYMBOL(filemap_fault);
 
-void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
+void filemap_map_pages(struct fault_env *fe,
+		pgoff_t start_pgoff, pgoff_t end_pgoff)
 {
 	BUG();
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index acbc432d1a52..d4a929d79470 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -274,7 +274,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
 #endif
 
 enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
-			struct task_struct *task, unsigned long totalpages)
+					struct task_struct *task)
 {
 	if (oom_unkillable_task(task, NULL, oc->nodemask))
 		return OOM_SCAN_CONTINUE;
@@ -311,7 +311,7 @@ static struct task_struct *select_bad_process(struct oom_control *oc,
 	for_each_process(p) {
 		unsigned int points;
 
-		switch (oom_scan_process_thread(oc, p, totalpages)) {
+		switch (oom_scan_process_thread(oc, p)) {
 		case OOM_SCAN_SELECT:
 			chosen = p;
 			chosen_points = ULONG_MAX;
@@ -383,8 +383,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 	rcu_read_unlock();
 }
 
-static void dump_header(struct oom_control *oc, struct task_struct *p,
-			struct mem_cgroup *memcg)
+static void dump_header(struct oom_control *oc, struct task_struct *p)
 {
 	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
 		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
@@ -392,12 +391,12 @@ static void dump_header(struct oom_control *oc, struct task_struct *p,
 
 	cpuset_print_current_mems_allowed();
 	dump_stack();
-	if (memcg)
-		mem_cgroup_print_oom_info(memcg, p);
+	if (oc->memcg)
+		mem_cgroup_print_oom_info(oc->memcg, p);
 	else
 		show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
-		dump_tasks(memcg, oc->nodemask);
+		dump_tasks(oc->memcg, oc->nodemask);
 }
 
 /*
@@ -453,7 +452,7 @@ static bool __oom_reap_task(struct task_struct *tsk)
 	 * We have to make sure to not race with the victim exit path
 	 * and cause premature new oom victim selection:
 	 * __oom_reap_task		exit_mm
-	 *   atomic_inc_not_zero
+	 *   mmget_not_zero
 	 *				  mmput
 	 *				    atomic_dec_and_test
 	 *				  exit_oom_victim
@@ -474,18 +473,23 @@ static bool __oom_reap_task(struct task_struct *tsk)
 	p = find_lock_task_mm(tsk);
 	if (!p)
 		goto unlock_oom;
-
 	mm = p->mm;
-	if (!atomic_inc_not_zero(&mm->mm_users)) {
-		task_unlock(p);
-		goto unlock_oom;
-	}
-
+	atomic_inc(&mm->mm_count);
 	task_unlock(p);
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		ret = false;
-		goto unlock_oom;
+		goto mm_drop;
+	}
+
+	/*
+	 * increase mm_users only after we know we will reap something so
+	 * that the mmput_async is called only when we have reaped something
+	 * and delayed __mmput doesn't matter that much
+	 */
+	if (!mmget_not_zero(mm)) {
+		up_read(&mm->mmap_sem);
+		goto mm_drop;
 	}
 
 	tlb_gather_mmu(&tlb, mm, 0, -1);
@@ -527,15 +531,16 @@ static bool __oom_reap_task(struct task_struct *tsk)
 	 * to release its memory.
 	 */
 	set_bit(MMF_OOM_REAPED, &mm->flags);
-unlock_oom:
-	mutex_unlock(&oom_lock);
 	/*
 	 * Drop our reference but make sure the mmput slow path is called from a
 	 * different context because we shouldn't risk we get stuck there and
 	 * put the oom_reaper out of the way.
 	 */
-	if (mm)
-		mmput_async(mm);
+	mmput_async(mm);
+mm_drop:
+	mmdrop(mm);
+unlock_oom:
+	mutex_unlock(&oom_lock);
 	return ret;
 }
 
@@ -744,7 +749,7 @@ void oom_killer_enable(void)
  */
 void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 		      unsigned int points, unsigned long totalpages,
-		      struct mem_cgroup *memcg, const char *message)
+		      const char *message)
 {
 	struct task_struct *victim = p;
 	struct task_struct *child;
@@ -770,7 +775,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 	task_unlock(p);
 
 	if (__ratelimit(&oom_rs))
-		dump_header(oc, p, memcg);
+		dump_header(oc, p);
 
 	pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
 		message, task_pid_nr(p), p->comm, points);
@@ -791,8 +796,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-			child_points = oom_badness(child, memcg, oc->nodemask,
-								totalpages);
+			child_points = oom_badness(child,
+					oc->memcg, oc->nodemask, totalpages);
 			if (child_points > victim_points) {
 				put_task_struct(victim);
 				victim = child;
@@ -870,8 +875,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 /*
  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
  */
-void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
-			struct mem_cgroup *memcg)
+void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint)
 {
 	if (likely(!sysctl_panic_on_oom))
 		return;
@@ -887,7 +891,7 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 	/* Do not panic for oom kills triggered by sysrq */
 	if (is_sysrq_oom(oc))
 		return;
-	dump_header(oc, NULL, memcg);
+	dump_header(oc, NULL);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
 }
@@ -962,13 +966,13 @@ bool out_of_memory(struct oom_control *oc)
 	constraint = constrained_alloc(oc, &totalpages);
 	if (constraint != CONSTRAINT_MEMORY_POLICY)
 		oc->nodemask = NULL;
-	check_panic_on_oom(oc, constraint, NULL);
+	check_panic_on_oom(oc, constraint);
 
 	if (sysctl_oom_kill_allocating_task && current->mm &&
 	    !oom_unkillable_task(current, NULL, oc->nodemask) &&
 	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
 		get_task_struct(current);
-		oom_kill_process(oc, current, 0, totalpages, NULL,
+		oom_kill_process(oc, current, 0, totalpages,
 				 "Out of memory (oom_kill_allocating_task)");
 		return true;
 	}
@@ -976,12 +980,11 @@ bool out_of_memory(struct oom_control *oc)
 	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
 	if (!p && !is_sysrq_oom(oc)) {
-		dump_header(oc, NULL, NULL);
+		dump_header(oc, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
 	if (p && p != (void *)-1UL) {
-		oom_kill_process(oc, p, points, totalpages, NULL,
-				 "Out of memory");
+		oom_kill_process(oc, p, points, totalpages, "Out of memory");
 		/*
 		 * Give the killed process a good chance to exit before trying
 		 * to allocate memory again.
@@ -993,14 +996,15 @@ bool out_of_memory(struct oom_control *oc)
 
 /*
  * The pagefault handler calls here because it is out of memory, so kill a
- * memory-hogging task.  If any populated zone has ZONE_OOM_LOCKED set, a
- * parallel oom killing is already in progress so do nothing.
+ * memory-hogging task. If oom_lock is held by somebody else, a parallel oom
+ * killing is already in progress so do nothing.
  */
 void pagefault_out_of_memory(void)
 {
 	struct oom_control oc = {
 		.zonelist = NULL,
 		.nodemask = NULL,
+		.memcg = NULL,
 		.gfp_mask = 0,
 		.order = 0,
 	};
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e2481949494c..d578d2a56b19 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2563,6 +2563,7 @@ int set_page_dirty(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
 
+	page = compound_head(page);
 	if (likely(mapping)) {
 		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
 		/*
@@ -2747,6 +2748,11 @@ int test_clear_page_writeback(struct page *page)
 				__wb_writeout_inc(wb);
 			}
 		}
+
+		if (mapping->host && !mapping_tagged(mapping,
+						     PAGECACHE_TAG_WRITEBACK))
+			sb_clear_inode_writeback(mapping->host);
+
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestClearPageWriteback(page);
@@ -2774,11 +2780,24 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		spin_lock_irqsave(&mapping->tree_lock, flags);
 		ret = TestSetPageWriteback(page);
 		if (!ret) {
+			bool on_wblist;
+
+			on_wblist = mapping_tagged(mapping,
+						   PAGECACHE_TAG_WRITEBACK);
+
 			radix_tree_tag_set(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi))
 				__inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+
+			/*
+			 * We can come through here when swapping anonymous
+			 * pages, so we don't necessarily have an inode to track
+			 * for sync.
+			 */
+			if (mapping->host && !on_wblist)
+				sb_mark_inode_writeback(mapping->host);
 		}
 		if (!PageDirty(page))
 			radix_tree_tag_clear(&mapping->page_tree,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6903b695ebae..452513bf02ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -63,6 +63,7 @@
 #include <linux/sched/rt.h>
 #include <linux/page_owner.h>
 #include <linux/kthread.h>
+#include <linux/memcontrol.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -286,7 +287,9 @@ static inline void reset_deferred_meminit(pg_data_t *pgdat)
 /* Returns true if the struct page for the pfn is uninitialised */
 static inline bool __meminit early_page_uninitialised(unsigned long pfn)
 {
-	if (pfn >= NODE_DATA(early_pfn_to_nid(pfn))->first_deferred_pfn)
+	int nid = early_pfn_to_nid(pfn);
+
+	if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn)
 		return true;
 
 	return false;
@@ -1004,6 +1007,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
 
 		VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
 
+		if (compound)
+			ClearPageDoubleMap(page);
 		for (i = 1; i < (1 << order); i++) {
 			if (compound)
 				bad += free_tail_pages_check(page, page + i);
@@ -1014,8 +1019,12 @@ static __always_inline bool free_pages_prepare(struct page *page,
 			(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 		}
 	}
-	if (PageAnonHead(page))
+	if (PageMappingFlags(page))
 		page->mapping = NULL;
+	if (memcg_kmem_enabled() && PageKmemcg(page)) {
+		memcg_kmem_uncharge(page, order);
+		__ClearPageKmemcg(page);
+	}
 	if (check_free)
 		bad += free_pages_check(page);
 	if (bad)
@@ -1273,7 +1282,7 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
 	spin_lock(&early_pfn_lock);
 	nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
 	if (nid < 0)
-		nid = 0;
+		nid = first_online_node;
 	spin_unlock(&early_pfn_lock);
 
 	return nid;
@@ -1722,6 +1731,19 @@ static bool check_new_pages(struct page *page, unsigned int order)
 	return false;
 }
 
+inline void post_alloc_hook(struct page *page, unsigned int order,
+				gfp_t gfp_flags)
+{
+	set_page_private(page, 0);
+	set_page_refcounted(page);
+
+	arch_alloc_page(page, order);
+	kernel_map_pages(page, 1 << order, 1);
+	kernel_poison_pages(page, 1 << order, 1);
+	kasan_alloc_pages(page, order);
+	set_page_owner(page, order, gfp_flags);
+}
+
 static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
 							unsigned int alloc_flags)
 {
@@ -1734,13 +1756,7 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
 			poisoned &= page_is_poisoned(p);
 	}
 
-	set_page_private(page, 0);
-	set_page_refcounted(page);
-
-	arch_alloc_page(page, order);
-	kernel_map_pages(page, 1 << order, 1);
-	kernel_poison_pages(page, 1 << order, 1);
-	kasan_alloc_pages(page, order);
+	post_alloc_hook(page, order, gfp_flags);
 
 	if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO))
 		for (i = 0; i < (1 << order); i++)
@@ -1749,8 +1765,6 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
 	if (order && (gfp_flags & __GFP_COMP))
 		prep_compound_page(page, order);
 
-	set_page_owner(page, order, gfp_flags);
-
 	/*
 	 * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
 	 * allocate the page. The expectation is that the caller is taking
@@ -2459,7 +2473,6 @@ void free_hot_cold_page_list(struct list_head *list, bool cold)
 void split_page(struct page *page, unsigned int order)
 {
 	int i;
-	gfp_t gfp_mask;
 
 	VM_BUG_ON_PAGE(PageCompound(page), page);
 	VM_BUG_ON_PAGE(!page_count(page), page);
@@ -2473,12 +2486,9 @@ void split_page(struct page *page, unsigned int order)
 		split_page(virt_to_page(page[0].shadow), order);
 #endif
 
-	gfp_mask = get_page_owner_gfp(page);
-	set_page_owner(page, 0, gfp_mask);
-	for (i = 1; i < (1 << order); i++) {
+	for (i = 1; i < (1 << order); i++)
 		set_page_refcounted(page + i);
-		set_page_owner(page + i, 0, gfp_mask);
-	}
+	split_page_owner(page, order);
 }
 EXPORT_SYMBOL_GPL(split_page);
 
@@ -2507,8 +2517,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
 	zone->free_area[order].nr_free--;
 	rmv_page_order(page);
 
-	set_page_owner(page, order, __GFP_MOVABLE);
-
 	/* Set the pageblock if the isolated page is at least a pageblock */
 	if (order >= pageblock_order - 1) {
 		struct page *endpage = page + (1 << order) - 1;
@@ -2524,33 +2532,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
 	return 1UL << order;
 }
 
-/*
- * Similar to split_page except the page is already free. As this is only
- * being used for migration, the migratetype of the block also changes.
- * As this is called with interrupts disabled, the caller is responsible
- * for calling arch_alloc_page() and kernel_map_page() after interrupts
- * are enabled.
- *
- * Note: this is probably too low level an operation for use in drivers.
- * Please consult with lkml before using this in your driver.
- */
-int split_free_page(struct page *page)
-{
-	unsigned int order;
-	int nr_pages;
-
-	order = page_order(page);
-
-	nr_pages = __isolate_free_page(page, order);
-	if (!nr_pages)
-		return 0;
-
-	/* Split into individual pages */
-	set_page_refcounted(page);
-	split_page(page, order);
-	return nr_pages;
-}
-
 /*
  * Update NUMA hit/miss statistics
  *
@@ -3103,6 +3084,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	struct oom_control oc = {
 		.zonelist = ac->zonelist,
 		.nodemask = ac->nodemask,
+		.memcg = NULL,
 		.gfp_mask = gfp_mask,
 		.order = order,
 	};
@@ -3866,6 +3848,14 @@ no_zone:
 	}
 
 out:
+	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
+		if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
+			__free_pages(page, order);
+			page = NULL;
+		} else
+			__SetPageKmemcg(page);
+	}
+
 	if (kmemcheck_enabled && page)
 		kmemcheck_pagealloc_alloc(page, order, gfp_mask);
 
@@ -4021,56 +4011,6 @@ void __free_page_frag(void *addr)
 }
 EXPORT_SYMBOL(__free_page_frag);
 
-/*
- * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
- * of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is
- * equivalent to alloc_pages.
- *
- * It should be used when the caller would like to use kmalloc, but since the
- * allocation is large, it has to fall back to the page allocator.
- */
-struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
-{
-	struct page *page;
-
-	page = alloc_pages(gfp_mask, order);
-	if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
-		__free_pages(page, order);
-		page = NULL;
-	}
-	return page;
-}
-
-struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
-{
-	struct page *page;
-
-	page = alloc_pages_node(nid, gfp_mask, order);
-	if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
-		__free_pages(page, order);
-		page = NULL;
-	}
-	return page;
-}
-
-/*
- * __free_kmem_pages and free_kmem_pages will free pages allocated with
- * alloc_kmem_pages.
- */
-void __free_kmem_pages(struct page *page, unsigned int order)
-{
-	memcg_kmem_uncharge(page, order);
-	__free_pages(page, order);
-}
-
-void free_kmem_pages(unsigned long addr, unsigned int order)
-{
-	if (addr != 0) {
-		VM_BUG_ON(!virt_addr_valid((void *)addr));
-		__free_kmem_pages(virt_to_page((void *)addr), order);
-	}
-}
-
 static void *make_alloc_exact(unsigned long addr, unsigned int order,
 		size_t size)
 {
@@ -4372,6 +4312,9 @@ void show_free_areas(unsigned int filter)
 		" unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
 		" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
 		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		" anon_thp: %lu shmem_thp: %lu shmem_pmdmapped: %lu\n"
+#endif
 		" free:%lu free_pcp:%lu free_cma:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
 		global_page_state(NR_INACTIVE_ANON),
@@ -4389,6 +4332,11 @@ void show_free_areas(unsigned int filter)
 		global_page_state(NR_SHMEM),
 		global_page_state(NR_PAGETABLE),
 		global_page_state(NR_BOUNCE),
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		global_page_state(NR_ANON_THPS) * HPAGE_PMD_NR,
+		global_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR,
+		global_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR,
+#endif
 		global_page_state(NR_FREE_PAGES),
 		free_pcp,
 		global_page_state(NR_FREE_CMA_PAGES));
@@ -4423,6 +4371,11 @@ void show_free_areas(unsigned int filter)
 			" writeback:%lukB"
 			" mapped:%lukB"
 			" shmem:%lukB"
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+			" shmem_thp: %lukB"
+			" shmem_pmdmapped: %lukB"
+			" anon_thp: %lukB"
+#endif
 			" slab_reclaimable:%lukB"
 			" slab_unreclaimable:%lukB"
 			" kernel_stack:%lukB"
@@ -4455,6 +4408,12 @@ void show_free_areas(unsigned int filter)
 			K(zone_page_state(zone, NR_WRITEBACK)),
 			K(zone_page_state(zone, NR_FILE_MAPPED)),
 			K(zone_page_state(zone, NR_SHMEM)),
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+			K(zone_page_state(zone, NR_SHMEM_THPS) * HPAGE_PMD_NR),
+			K(zone_page_state(zone, NR_SHMEM_PMDMAPPED)
+					* HPAGE_PMD_NR),
+			K(zone_page_state(zone, NR_ANON_THPS) * HPAGE_PMD_NR),
+#endif
 			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
 			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
 			zone_page_state(zone, NR_KERNEL_STACK) *
@@ -6465,15 +6424,18 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 				sizeof(arch_zone_lowest_possible_pfn));
 	memset(arch_zone_highest_possible_pfn, 0,
 				sizeof(arch_zone_highest_possible_pfn));
-	arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
-	arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
-	for (i = 1; i < MAX_NR_ZONES; i++) {
+
+	start_pfn = find_min_pfn_with_active_regions();
+
+	for (i = 0; i < MAX_NR_ZONES; i++) {
 		if (i == ZONE_MOVABLE)
 			continue;
-		arch_zone_lowest_possible_pfn[i] =
-			arch_zone_highest_possible_pfn[i-1];
-		arch_zone_highest_possible_pfn[i] =
-			max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
+
+		end_pfn = max(max_zone_pfn[i], start_pfn);
+		arch_zone_lowest_possible_pfn[i] = start_pfn;
+		arch_zone_highest_possible_pfn[i] = end_pfn;
+
+		start_pfn = end_pfn;
 	}
 	arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
 	arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
diff --git a/mm/page_io.c b/mm/page_io.c
index 242dba07545b..dcc5d3769608 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -259,7 +259,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		bio_end_io_t end_write_func)
 {
 	struct bio *bio;
-	int ret, rw = WRITE;
+	int ret;
 	struct swap_info_struct *sis = page_swap_info(page);
 
 	if (sis->flags & SWP_FILE) {
@@ -317,12 +317,13 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		ret = -ENOMEM;
 		goto out;
 	}
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	if (wbc->sync_mode == WB_SYNC_ALL)
-		rw |= REQ_SYNC;
+		bio->bi_rw |= REQ_SYNC;
 	count_vm_event(PSWPOUT);
 	set_page_writeback(page);
 	unlock_page(page);
-	submit_bio(rw, bio);
+	submit_bio(bio);
 out:
 	return ret;
 }
@@ -369,8 +370,9 @@ int swap_readpage(struct page *page)
 		ret = -ENOMEM;
 		goto out;
 	}
+	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 	count_vm_event(PSWPIN);
-	submit_bio(READ, bio);
+	submit_bio(bio);
 out:
 	return ret;
 }
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 612122bf6a42..064b7fb6e0b5 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -7,6 +7,7 @@
 #include <linux/pageblock-flags.h>
 #include <linux/memory.h>
 #include <linux/hugetlb.h>
+#include <linux/page_owner.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -80,7 +81,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 {
 	struct zone *zone;
 	unsigned long flags, nr_pages;
-	struct page *isolated_page = NULL;
+	bool isolated_page = false;
 	unsigned int order;
 	unsigned long page_idx, buddy_idx;
 	struct page *buddy;
@@ -108,9 +109,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 			if (pfn_valid_within(page_to_pfn(buddy)) &&
 			    !is_migrate_isolate_page(buddy)) {
 				__isolate_free_page(page, order);
-				kernel_map_pages(page, (1 << order), 1);
-				set_page_refcounted(page);
-				isolated_page = page;
+				isolated_page = true;
 			}
 		}
 	}
@@ -128,8 +127,10 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 	zone->nr_isolate_pageblock--;
 out:
 	spin_unlock_irqrestore(&zone->lock, flags);
-	if (isolated_page)
-		__free_pages(isolated_page, order);
+	if (isolated_page) {
+		post_alloc_hook(page, order, __GFP_MOVABLE);
+		__free_pages(page, order);
+	}
 }
 
 static inline struct page *
diff --git a/mm/page_owner.c b/mm/page_owner.c
index c6cda3e36212..ec6dc1886f71 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -7,11 +7,22 @@
 #include <linux/page_owner.h>
 #include <linux/jump_label.h>
 #include <linux/migrate.h>
+#include <linux/stackdepot.h>
+
 #include "internal.h"
 
+/*
+ * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
+ * to use off stack temporal storage
+ */
+#define PAGE_OWNER_STACK_DEPTH (16)
+
 static bool page_owner_disabled = true;
 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
+static depot_stack_handle_t dummy_handle;
+static depot_stack_handle_t failure_handle;
+
 static void init_early_allocated_pages(void);
 
 static int early_page_owner_param(char *buf)
@@ -34,11 +45,41 @@ static bool need_page_owner(void)
 	return true;
 }
 
+static noinline void register_dummy_stack(void)
+{
+	unsigned long entries[4];
+	struct stack_trace dummy;
+
+	dummy.nr_entries = 0;
+	dummy.max_entries = ARRAY_SIZE(entries);
+	dummy.entries = &entries[0];
+	dummy.skip = 0;
+
+	save_stack_trace(&dummy);
+	dummy_handle = depot_save_stack(&dummy, GFP_KERNEL);
+}
+
+static noinline void register_failure_stack(void)
+{
+	unsigned long entries[4];
+	struct stack_trace failure;
+
+	failure.nr_entries = 0;
+	failure.max_entries = ARRAY_SIZE(entries);
+	failure.entries = &entries[0];
+	failure.skip = 0;
+
+	save_stack_trace(&failure);
+	failure_handle = depot_save_stack(&failure, GFP_KERNEL);
+}
+
 static void init_page_owner(void)
 {
 	if (page_owner_disabled)
 		return;
 
+	register_dummy_stack();
+	register_failure_stack();
 	static_branch_enable(&page_owner_inited);
 	init_early_allocated_pages();
 }
@@ -61,25 +102,66 @@ void __reset_page_owner(struct page *page, unsigned int order)
 	}
 }
 
-void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
+static inline bool check_recursive_alloc(struct stack_trace *trace,
+					unsigned long ip)
 {
-	struct page_ext *page_ext = lookup_page_ext(page);
+	int i, count;
+
+	if (!trace->nr_entries)
+		return false;
+
+	for (i = 0, count = 0; i < trace->nr_entries; i++) {
+		if (trace->entries[i] == ip && ++count == 2)
+			return true;
+	}
+
+	return false;
+}
 
+static noinline depot_stack_handle_t save_stack(gfp_t flags)
+{
+	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
 	struct stack_trace trace = {
 		.nr_entries = 0,
-		.max_entries = ARRAY_SIZE(page_ext->trace_entries),
-		.entries = &page_ext->trace_entries[0],
-		.skip = 3,
+		.entries = entries,
+		.max_entries = PAGE_OWNER_STACK_DEPTH,
+		.skip = 0
 	};
+	depot_stack_handle_t handle;
+
+	save_stack_trace(&trace);
+	if (trace.nr_entries != 0 &&
+	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
+		trace.nr_entries--;
+
+	/*
+	 * We need to check recursion here because our request to stackdepot
+	 * could trigger memory allocation to save new entry. New memory
+	 * allocation would reach here and call depot_save_stack() again
+	 * if we don't catch it. There is still not enough memory in stackdepot
+	 * so it would try to allocate memory again and loop forever.
+	 */
+	if (check_recursive_alloc(&trace, _RET_IP_))
+		return dummy_handle;
+
+	handle = depot_save_stack(&trace, flags);
+	if (!handle)
+		handle = failure_handle;
+
+	return handle;
+}
+
+noinline void __set_page_owner(struct page *page, unsigned int order,
+					gfp_t gfp_mask)
+{
+	struct page_ext *page_ext = lookup_page_ext(page);
 
 	if (unlikely(!page_ext))
 		return;
 
-	save_stack_trace(&trace);
-
+	page_ext->handle = save_stack(gfp_mask);
 	page_ext->order = order;
 	page_ext->gfp_mask = gfp_mask;
-	page_ext->nr_entries = trace.nr_entries;
 	page_ext->last_migrate_reason = -1;
 
 	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
@@ -94,34 +176,31 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
 	page_ext->last_migrate_reason = reason;
 }
 
-gfp_t __get_page_owner_gfp(struct page *page)
+void __split_page_owner(struct page *page, unsigned int order)
 {
+	int i;
 	struct page_ext *page_ext = lookup_page_ext(page);
+
 	if (unlikely(!page_ext))
-		/*
-		 * The caller just returns 0 if no valid gfp
-		 * So return 0 here too.
-		 */
-		return 0;
+		return;
 
-	return page_ext->gfp_mask;
+	page_ext->order = 0;
+	for (i = 1; i < (1 << order); i++)
+		__copy_page_owner(page, page + i);
 }
 
 void __copy_page_owner(struct page *oldpage, struct page *newpage)
 {
 	struct page_ext *old_ext = lookup_page_ext(oldpage);
 	struct page_ext *new_ext = lookup_page_ext(newpage);
-	int i;
 
 	if (unlikely(!old_ext || !new_ext))
 		return;
 
 	new_ext->order = old_ext->order;
 	new_ext->gfp_mask = old_ext->gfp_mask;
-	new_ext->nr_entries = old_ext->nr_entries;
-
-	for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++)
-		new_ext->trace_entries[i] = old_ext->trace_entries[i];
+	new_ext->last_migrate_reason = old_ext->last_migrate_reason;
+	new_ext->handle = old_ext->handle;
 
 	/*
 	 * We don't clear the bit on the oldpage as it's going to be freed
@@ -137,14 +216,18 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
 
 static ssize_t
 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
-		struct page *page, struct page_ext *page_ext)
+		struct page *page, struct page_ext *page_ext,
+		depot_stack_handle_t handle)
 {
 	int ret;
 	int pageblock_mt, page_mt;
 	char *kbuf;
+	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
 	struct stack_trace trace = {
-		.nr_entries = page_ext->nr_entries,
-		.entries = &page_ext->trace_entries[0],
+		.nr_entries = 0,
+		.entries = entries,
+		.max_entries = PAGE_OWNER_STACK_DEPTH,
+		.skip = 0
 	};
 
 	kbuf = kmalloc(count, GFP_KERNEL);
@@ -173,6 +256,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 	if (ret >= count)
 		goto err;
 
+	depot_fetch_stack(handle, &trace);
 	ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
 	if (ret >= count)
 		goto err;
@@ -203,23 +287,36 @@ err:
 void __dump_page_owner(struct page *page)
 {
 	struct page_ext *page_ext = lookup_page_ext(page);
+	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
 	struct stack_trace trace = {
-		.nr_entries = page_ext->nr_entries,
-		.entries = &page_ext->trace_entries[0],
+		.nr_entries = 0,
+		.entries = entries,
+		.max_entries = PAGE_OWNER_STACK_DEPTH,
+		.skip = 0
 	};
-	gfp_t gfp_mask = page_ext->gfp_mask;
-	int mt = gfpflags_to_migratetype(gfp_mask);
+	depot_stack_handle_t handle;
+	gfp_t gfp_mask;
+	int mt;
 
 	if (unlikely(!page_ext)) {
 		pr_alert("There is not page extension available.\n");
 		return;
 	}
+	gfp_mask = page_ext->gfp_mask;
+	mt = gfpflags_to_migratetype(gfp_mask);
 
 	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
 		pr_alert("page_owner info is not active (free page?)\n");
 		return;
 	}
 
+	handle = READ_ONCE(page_ext->handle);
+	if (!handle) {
+		pr_alert("page_owner info is not active (free page?)\n");
+		return;
+	}
+
+	depot_fetch_stack(handle, &trace);
 	pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
 		 page_ext->order, migratetype_names[mt], gfp_mask, &gfp_mask);
 	print_stack_trace(&trace, 0);
@@ -235,6 +332,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	unsigned long pfn;
 	struct page *page;
 	struct page_ext *page_ext;
+	depot_stack_handle_t handle;
 
 	if (!static_branch_unlikely(&page_owner_inited))
 		return -EINVAL;
@@ -283,10 +381,19 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 		if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
 			continue;
 
+		/*
+		 * Access to page_ext->handle isn't synchronous so we should
+		 * be careful to access it.
+		 */
+		handle = READ_ONCE(page_ext->handle);
+		if (!handle)
+			continue;
+
 		/* Record the next PFN to read in the file offset */
 		*ppos = (pfn - min_low_pfn) + 1;
 
-		return print_page_owner(buf, count, pfn, page, page_ext);
+		return print_page_owner(buf, count, pfn, page,
+				page_ext, handle);
 	}
 
 	return 0;
diff --git a/mm/readahead.c b/mm/readahead.c
index 40be3ae0afe3..65ec288dc057 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -89,7 +89,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 		page = lru_to_page(pages);
 		list_del(&page->lru);
 		if (add_to_page_cache_lru(page, mapping, page->index,
-				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
+				readahead_gfp_mask(mapping))) {
 			read_cache_pages_invalidate_page(mapping, page);
 			continue;
 		}
@@ -108,7 +108,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 EXPORT_SYMBOL(read_cache_pages);
 
 static int read_pages(struct address_space *mapping, struct file *filp,
-		struct list_head *pages, unsigned nr_pages)
+		struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
 {
 	struct blk_plug plug;
 	unsigned page_idx;
@@ -126,10 +126,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = lru_to_page(pages);
 		list_del(&page->lru);
-		if (!add_to_page_cache_lru(page, mapping, page->index,
-				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
+		if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
 			mapping->a_ops->readpage(filp, page);
-		}
 		put_page(page);
 	}
 	ret = 0;
@@ -159,6 +157,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	int page_idx;
 	int ret = 0;
 	loff_t isize = i_size_read(inode);
+	gfp_t gfp_mask = readahead_gfp_mask(mapping);
 
 	if (isize == 0)
 		goto out;
@@ -180,7 +179,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 		if (page && !radix_tree_exceptional_entry(page))
 			continue;
 
-		page = page_cache_alloc_readahead(mapping);
+		page = __page_cache_alloc(gfp_mask);
 		if (!page)
 			break;
 		page->index = page_offset;
@@ -196,7 +195,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	 * will then handle the error.
 	 */
 	if (ret)
-		read_pages(mapping, filp, &page_pool, ret);
+		read_pages(mapping, filp, &page_pool, ret, gfp_mask);
 	BUG_ON(!list_empty(&page_pool));
 out:
 	return ret;
diff --git a/mm/rmap.c b/mm/rmap.c
index 0ea5d9071b32..8a13d9f7b566 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1084,23 +1084,20 @@ EXPORT_SYMBOL_GPL(page_mkclean);
  * page_move_anon_rmap - move a page to our anon_vma
  * @page:	the page to move to our anon_vma
  * @vma:	the vma the page belongs to
- * @address:	the user virtual address mapped
  *
  * When a page belongs exclusively to one process after a COW event,
  * that page can be moved into the anon_vma that belongs to just that
  * process, so the rmap code will not search the parent or sibling
  * processes.
  */
-void page_move_anon_rmap(struct page *page,
-	struct vm_area_struct *vma, unsigned long address)
+void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 
+	page = compound_head(page);
+
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_VMA(!anon_vma, vma);
-	if (IS_ENABLED(CONFIG_DEBUG_VM) && PageTransHuge(page))
-		address &= HPAGE_PMD_MASK;
-	VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
 
 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
 	/*
@@ -1215,10 +1212,8 @@ void do_page_add_anon_rmap(struct page *page,
 		 * pte lock(a spinlock) is held, which implies preemption
 		 * disabled.
 		 */
-		if (compound) {
-			__inc_zone_page_state(page,
-					      NR_ANON_TRANSPARENT_HUGEPAGES);
-		}
+		if (compound)
+			__inc_zone_page_state(page, NR_ANON_THPS);
 		__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
 	}
 	if (unlikely(PageKsm(page)))
@@ -1256,7 +1251,7 @@ void page_add_new_anon_rmap(struct page *page,
 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 		/* increment count (starts at -1) */
 		atomic_set(compound_mapcount_ptr(page), 0);
-		__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+		__inc_zone_page_state(page, NR_ANON_THPS);
 	} else {
 		/* Anon THP always mapped first with PMD */
 		VM_BUG_ON_PAGE(PageTransCompound(page), page);
@@ -1273,18 +1268,42 @@ void page_add_new_anon_rmap(struct page *page,
  *
  * The caller needs to hold the pte lock.
  */
-void page_add_file_rmap(struct page *page)
+void page_add_file_rmap(struct page *page, bool compound)
 {
+	int i, nr = 1;
+
+	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
 	lock_page_memcg(page);
-	if (atomic_inc_and_test(&page->_mapcount)) {
-		__inc_zone_page_state(page, NR_FILE_MAPPED);
-		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+	if (compound && PageTransHuge(page)) {
+		for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
+			if (atomic_inc_and_test(&page[i]._mapcount))
+				nr++;
+		}
+		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
+			goto out;
+		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
+		__inc_zone_page_state(page, NR_SHMEM_PMDMAPPED);
+	} else {
+		if (PageTransCompound(page)) {
+			VM_BUG_ON_PAGE(!PageLocked(page), page);
+			SetPageDoubleMap(compound_head(page));
+			if (PageMlocked(page))
+				clear_page_mlock(compound_head(page));
+		}
+		if (!atomic_inc_and_test(&page->_mapcount))
+			goto out;
 	}
+	__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, nr);
+	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+out:
 	unlock_page_memcg(page);
 }
 
-static void page_remove_file_rmap(struct page *page)
+static void page_remove_file_rmap(struct page *page, bool compound)
 {
+	int i, nr = 1;
+
+	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
 	lock_page_memcg(page);
 
 	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
@@ -1295,15 +1314,26 @@ static void page_remove_file_rmap(struct page *page)
 	}
 
 	/* page still mapped by someone else? */
-	if (!atomic_add_negative(-1, &page->_mapcount))
-		goto out;
+	if (compound && PageTransHuge(page)) {
+		for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
+			if (atomic_add_negative(-1, &page[i]._mapcount))
+				nr++;
+		}
+		if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
+			goto out;
+		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
+		__dec_zone_page_state(page, NR_SHMEM_PMDMAPPED);
+	} else {
+		if (!atomic_add_negative(-1, &page->_mapcount))
+			goto out;
+	}
 
 	/*
 	 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
 	 * these counters are not modified in interrupt context, and
 	 * pte lock(a spinlock) is held, which implies preemption disabled.
 	 */
-	__dec_zone_page_state(page, NR_FILE_MAPPED);
+	__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, -nr);
 	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
 
 	if (unlikely(PageMlocked(page)))
@@ -1326,7 +1356,7 @@ static void page_remove_anon_compound_rmap(struct page *page)
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 		return;
 
-	__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+	__dec_zone_page_state(page, NR_ANON_THPS);
 
 	if (TestClearPageDoubleMap(page)) {
 		/*
@@ -1359,11 +1389,8 @@ static void page_remove_anon_compound_rmap(struct page *page)
  */
 void page_remove_rmap(struct page *page, bool compound)
 {
-	if (!PageAnon(page)) {
-		VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
-		page_remove_file_rmap(page);
-		return;
-	}
+	if (!PageAnon(page))
+		return page_remove_file_rmap(page, compound);
 
 	if (compound)
 		return page_remove_anon_compound_rmap(page);
@@ -1427,7 +1454,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			goto out;
 	}
 
-	pte = page_check_address(page, mm, address, &ptl, 0);
+	pte = page_check_address(page, mm, address, &ptl,
+				 PageTransCompound(page));
 	if (!pte)
 		goto out;
 
@@ -1438,8 +1466,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 */
 	if (!(flags & TTU_IGNORE_MLOCK)) {
 		if (vma->vm_flags & VM_LOCKED) {
-			/* Holding pte lock, we do *not* need mmap_sem here */
-			mlock_vma_page(page);
+			/* PTE-mapped THP are never mlocked */
+			if (!PageTransCompound(page)) {
+				/*
+				 * Holding pte lock, we do *not* need
+				 * mmap_sem here
+				 */
+				mlock_vma_page(page);
+			}
 			ret = SWAP_MLOCK;
 			goto out_unmap;
 		}
diff --git a/mm/shmem.c b/mm/shmem.c
index a36144909b28..62e42c7d544c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -32,6 +32,7 @@
 #include <linux/export.h>
 #include <linux/swap.h>
 #include <linux/uio.h>
+#include <linux/khugepaged.h>
 
 static struct vfsmount *shm_mnt;
 
@@ -97,14 +98,6 @@ struct shmem_falloc {
 	pgoff_t nr_unswapped;	/* how often writepage refused to swap out */
 };
 
-/* Flag allocation requirements to shmem_getpage */
-enum sgp_type {
-	SGP_READ,	/* don't exceed i_size, don't allocate page */
-	SGP_CACHE,	/* don't exceed i_size, may allocate page */
-	SGP_WRITE,	/* may exceed i_size, may allocate !Uptodate page */
-	SGP_FALLOC,	/* like SGP_WRITE, but make existing page Uptodate */
-};
-
 #ifdef CONFIG_TMPFS
 static unsigned long shmem_default_max_blocks(void)
 {
@@ -124,7 +117,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 		struct page **pagep, enum sgp_type sgp,
 		gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
 
-static inline int shmem_getpage(struct inode *inode, pgoff_t index,
+int shmem_getpage(struct inode *inode, pgoff_t index,
 		struct page **pagep, enum sgp_type sgp)
 {
 	return shmem_getpage_gfp(inode, index, pagep, sgp,
@@ -173,10 +166,13 @@ static inline int shmem_reacct_size(unsigned long flags,
  * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
  * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
  */
-static inline int shmem_acct_block(unsigned long flags)
+static inline int shmem_acct_block(unsigned long flags, long pages)
 {
-	return (flags & VM_NORESERVE) ?
-		security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_SIZE)) : 0;
+	if (!(flags & VM_NORESERVE))
+		return 0;
+
+	return security_vm_enough_memory_mm(current->mm,
+			pages * VM_ACCT(PAGE_SIZE));
 }
 
 static inline void shmem_unacct_blocks(unsigned long flags, long pages)
@@ -192,6 +188,7 @@ static const struct inode_operations shmem_inode_operations;
 static const struct inode_operations shmem_dir_inode_operations;
 static const struct inode_operations shmem_special_inode_operations;
 static const struct vm_operations_struct shmem_vm_ops;
+static struct file_system_type shmem_fs_type;
 
 static LIST_HEAD(shmem_swaplist);
 static DEFINE_MUTEX(shmem_swaplist_mutex);
@@ -249,6 +246,53 @@ static void shmem_recalc_inode(struct inode *inode)
 	}
 }
 
+bool shmem_charge(struct inode *inode, long pages)
+{
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	unsigned long flags;
+
+	if (shmem_acct_block(info->flags, pages))
+		return false;
+	spin_lock_irqsave(&info->lock, flags);
+	info->alloced += pages;
+	inode->i_blocks += pages * BLOCKS_PER_PAGE;
+	shmem_recalc_inode(inode);
+	spin_unlock_irqrestore(&info->lock, flags);
+	inode->i_mapping->nrpages += pages;
+
+	if (!sbinfo->max_blocks)
+		return true;
+	if (percpu_counter_compare(&sbinfo->used_blocks,
+				sbinfo->max_blocks - pages) > 0) {
+		inode->i_mapping->nrpages -= pages;
+		spin_lock_irqsave(&info->lock, flags);
+		info->alloced -= pages;
+		shmem_recalc_inode(inode);
+		spin_unlock_irqrestore(&info->lock, flags);
+
+		return false;
+	}
+	percpu_counter_add(&sbinfo->used_blocks, pages);
+	return true;
+}
+
+void shmem_uncharge(struct inode *inode, long pages)
+{
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->lock, flags);
+	info->alloced -= pages;
+	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
+	shmem_recalc_inode(inode);
+	spin_unlock_irqrestore(&info->lock, flags);
+
+	if (sbinfo->max_blocks)
+		percpu_counter_sub(&sbinfo->used_blocks, pages);
+}
+
 /*
  * Replace item expected in radix tree by a new item, while holding tree lock.
  */
@@ -288,6 +332,199 @@ static bool shmem_confirm_swap(struct address_space *mapping,
 	return item == swp_to_radix_entry(swap);
 }
 
+/*
+ * Definitions for "huge tmpfs": tmpfs mounted with the huge= option
+ *
+ * SHMEM_HUGE_NEVER:
+ *	disables huge pages for the mount;
+ * SHMEM_HUGE_ALWAYS:
+ *	enables huge pages for the mount;
+ * SHMEM_HUGE_WITHIN_SIZE:
+ *	only allocate huge pages if the page will be fully within i_size,
+ *	also respect fadvise()/madvise() hints;
+ * SHMEM_HUGE_ADVISE:
+ *	only allocate huge pages if requested with fadvise()/madvise();
+ */
+
+#define SHMEM_HUGE_NEVER	0
+#define SHMEM_HUGE_ALWAYS	1
+#define SHMEM_HUGE_WITHIN_SIZE	2
+#define SHMEM_HUGE_ADVISE	3
+
+/*
+ * Special values.
+ * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled:
+ *
+ * SHMEM_HUGE_DENY:
+ *	disables huge on shm_mnt and all mounts, for emergency use;
+ * SHMEM_HUGE_FORCE:
+ *	enables huge on shm_mnt and all mounts, w/o needing option, for testing;
+ *
+ */
+#define SHMEM_HUGE_DENY		(-1)
+#define SHMEM_HUGE_FORCE	(-2)
+
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+/* ifdef here to avoid bloating shmem.o when not necessary */
+
+int shmem_huge __read_mostly;
+
+static int shmem_parse_huge(const char *str)
+{
+	if (!strcmp(str, "never"))
+		return SHMEM_HUGE_NEVER;
+	if (!strcmp(str, "always"))
+		return SHMEM_HUGE_ALWAYS;
+	if (!strcmp(str, "within_size"))
+		return SHMEM_HUGE_WITHIN_SIZE;
+	if (!strcmp(str, "advise"))
+		return SHMEM_HUGE_ADVISE;
+	if (!strcmp(str, "deny"))
+		return SHMEM_HUGE_DENY;
+	if (!strcmp(str, "force"))
+		return SHMEM_HUGE_FORCE;
+	return -EINVAL;
+}
+
+static const char *shmem_format_huge(int huge)
+{
+	switch (huge) {
+	case SHMEM_HUGE_NEVER:
+		return "never";
+	case SHMEM_HUGE_ALWAYS:
+		return "always";
+	case SHMEM_HUGE_WITHIN_SIZE:
+		return "within_size";
+	case SHMEM_HUGE_ADVISE:
+		return "advise";
+	case SHMEM_HUGE_DENY:
+		return "deny";
+	case SHMEM_HUGE_FORCE:
+		return "force";
+	default:
+		VM_BUG_ON(1);
+		return "bad_val";
+	}
+}
+
+static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+		struct shrink_control *sc, unsigned long nr_to_split)
+{
+	LIST_HEAD(list), *pos, *next;
+	struct inode *inode;
+	struct shmem_inode_info *info;
+	struct page *page;
+	unsigned long batch = sc ? sc->nr_to_scan : 128;
+	int removed = 0, split = 0;
+
+	if (list_empty(&sbinfo->shrinklist))
+		return SHRINK_STOP;
+
+	spin_lock(&sbinfo->shrinklist_lock);
+	list_for_each_safe(pos, next, &sbinfo->shrinklist) {
+		info = list_entry(pos, struct shmem_inode_info, shrinklist);
+
+		/* pin the inode */
+		inode = igrab(&info->vfs_inode);
+
+		/* inode is about to be evicted */
+		if (!inode) {
+			list_del_init(&info->shrinklist);
+			removed++;
+			goto next;
+		}
+
+		/* Check if there's anything to gain */
+		if (round_up(inode->i_size, PAGE_SIZE) ==
+				round_up(inode->i_size, HPAGE_PMD_SIZE)) {
+			list_del_init(&info->shrinklist);
+			removed++;
+			iput(inode);
+			goto next;
+		}
+
+		list_move(&info->shrinklist, &list);
+next:
+		if (!--batch)
+			break;
+	}
+	spin_unlock(&sbinfo->shrinklist_lock);
+
+	list_for_each_safe(pos, next, &list) {
+		int ret;
+
+		info = list_entry(pos, struct shmem_inode_info, shrinklist);
+		inode = &info->vfs_inode;
+
+		if (nr_to_split && split >= nr_to_split) {
+			iput(inode);
+			continue;
+		}
+
+		page = find_lock_page(inode->i_mapping,
+				(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
+		if (!page)
+			goto drop;
+
+		if (!PageTransHuge(page)) {
+			unlock_page(page);
+			put_page(page);
+			goto drop;
+		}
+
+		ret = split_huge_page(page);
+		unlock_page(page);
+		put_page(page);
+
+		if (ret) {
+			/* split failed: leave it on the list */
+			iput(inode);
+			continue;
+		}
+
+		split++;
+drop:
+		list_del_init(&info->shrinklist);
+		removed++;
+		iput(inode);
+	}
+
+	spin_lock(&sbinfo->shrinklist_lock);
+	list_splice_tail(&list, &sbinfo->shrinklist);
+	sbinfo->shrinklist_len -= removed;
+	spin_unlock(&sbinfo->shrinklist_lock);
+
+	return split;
+}
+
+static long shmem_unused_huge_scan(struct super_block *sb,
+		struct shrink_control *sc)
+{
+	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+
+	if (!READ_ONCE(sbinfo->shrinklist_len))
+		return SHRINK_STOP;
+
+	return shmem_unused_huge_shrink(sbinfo, sc, 0);
+}
+
+static long shmem_unused_huge_count(struct super_block *sb,
+		struct shrink_control *sc)
+{
+	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+	return READ_ONCE(sbinfo->shrinklist_len);
+}
+#else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */
+
+#define shmem_huge SHMEM_HUGE_DENY
+
+static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+		struct shrink_control *sc, unsigned long nr_to_split)
+{
+	return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
+
 /*
  * Like add_to_page_cache_locked, but error if expected item has gone.
  */
@@ -295,30 +532,57 @@ static int shmem_add_to_page_cache(struct page *page,
 				   struct address_space *mapping,
 				   pgoff_t index, void *expected)
 {
-	int error;
+	int error, nr = hpage_nr_pages(page);
 
+	VM_BUG_ON_PAGE(PageTail(page), page);
+	VM_BUG_ON_PAGE(index != round_down(index, nr), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
+	VM_BUG_ON(expected && PageTransHuge(page));
 
-	get_page(page);
+	page_ref_add(page, nr);
 	page->mapping = mapping;
 	page->index = index;
 
 	spin_lock_irq(&mapping->tree_lock);
-	if (!expected)
+	if (PageTransHuge(page)) {
+		void __rcu **results;
+		pgoff_t idx;
+		int i;
+
+		error = 0;
+		if (radix_tree_gang_lookup_slot(&mapping->page_tree,
+					&results, &idx, index, 1) &&
+				idx < index + HPAGE_PMD_NR) {
+			error = -EEXIST;
+		}
+
+		if (!error) {
+			for (i = 0; i < HPAGE_PMD_NR; i++) {
+				error = radix_tree_insert(&mapping->page_tree,
+						index + i, page + i);
+				VM_BUG_ON(error);
+			}
+			count_vm_event(THP_FILE_ALLOC);
+		}
+	} else if (!expected) {
 		error = radix_tree_insert(&mapping->page_tree, index, page);
-	else
+	} else {
 		error = shmem_radix_tree_replace(mapping, index, expected,
 								 page);
+	}
+
 	if (!error) {
-		mapping->nrpages++;
-		__inc_zone_page_state(page, NR_FILE_PAGES);
-		__inc_zone_page_state(page, NR_SHMEM);
+		mapping->nrpages += nr;
+		if (PageTransHuge(page))
+			__inc_zone_page_state(page, NR_SHMEM_THPS);
+		__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr);
+		__mod_zone_page_state(page_zone(page), NR_SHMEM, nr);
 		spin_unlock_irq(&mapping->tree_lock);
 	} else {
 		page->mapping = NULL;
 		spin_unlock_irq(&mapping->tree_lock);
-		put_page(page);
+		page_ref_sub(page, nr);
 	}
 	return error;
 }
@@ -331,6 +595,8 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
 	struct address_space *mapping = page->mapping;
 	int error;
 
+	VM_BUG_ON_PAGE(PageCompound(page), page);
+
 	spin_lock_irq(&mapping->tree_lock);
 	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
 	page->mapping = NULL;
@@ -510,10 +776,33 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 				continue;
 			}
 
+			VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
+
 			if (!trylock_page(page))
 				continue;
+
+			if (PageTransTail(page)) {
+				/* Middle of THP: zero out the page */
+				clear_highpage(page);
+				unlock_page(page);
+				continue;
+			} else if (PageTransHuge(page)) {
+				if (index == round_down(end, HPAGE_PMD_NR)) {
+					/*
+					 * Range ends in the middle of THP:
+					 * zero out the page
+					 */
+					clear_highpage(page);
+					unlock_page(page);
+					continue;
+				}
+				index += HPAGE_PMD_NR - 1;
+				i += HPAGE_PMD_NR - 1;
+			}
+
 			if (!unfalloc || !PageUptodate(page)) {
-				if (page->mapping == mapping) {
+				VM_BUG_ON_PAGE(PageTail(page), page);
+				if (page_mapping(page) == mapping) {
 					VM_BUG_ON_PAGE(PageWriteback(page), page);
 					truncate_inode_page(mapping, page);
 				}
@@ -589,8 +878,36 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			}
 
 			lock_page(page);
+
+			if (PageTransTail(page)) {
+				/* Middle of THP: zero out the page */
+				clear_highpage(page);
+				unlock_page(page);
+				/*
+				 * Partial thp truncate due 'start' in middle
+				 * of THP: don't need to look on these pages
+				 * again on !pvec.nr restart.
+				 */
+				if (index != round_down(end, HPAGE_PMD_NR))
+					start++;
+				continue;
+			} else if (PageTransHuge(page)) {
+				if (index == round_down(end, HPAGE_PMD_NR)) {
+					/*
+					 * Range ends in the middle of THP:
+					 * zero out the page
+					 */
+					clear_highpage(page);
+					unlock_page(page);
+					continue;
+				}
+				index += HPAGE_PMD_NR - 1;
+				i += HPAGE_PMD_NR - 1;
+			}
+
 			if (!unfalloc || !PageUptodate(page)) {
-				if (page->mapping == mapping) {
+				VM_BUG_ON_PAGE(PageTail(page), page);
+				if (page_mapping(page) == mapping) {
 					VM_BUG_ON_PAGE(PageWriteback(page), page);
 					truncate_inode_page(mapping, page);
 				} else {
@@ -607,10 +924,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		index++;
 	}
 
-	spin_lock(&info->lock);
+	spin_lock_irq(&info->lock);
 	info->swapped -= nr_swaps_freed;
 	shmem_recalc_inode(inode);
-	spin_unlock(&info->lock);
+	spin_unlock_irq(&info->lock);
 }
 
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
@@ -627,9 +944,9 @@ static int shmem_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct shmem_inode_info *info = SHMEM_I(inode);
 
 	if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
-		spin_lock(&info->lock);
+		spin_lock_irq(&info->lock);
 		shmem_recalc_inode(inode);
-		spin_unlock(&info->lock);
+		spin_unlock_irq(&info->lock);
 	}
 	generic_fillattr(inode, stat);
 	return 0;
@@ -639,6 +956,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = d_inode(dentry);
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	int error;
 
 	error = inode_change_ok(inode, attr);
@@ -674,6 +992,20 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 			if (oldsize > holebegin)
 				unmap_mapping_range(inode->i_mapping,
 							holebegin, 0, 1);
+
+			/*
+			 * Part of the huge page can be beyond i_size: subject
+			 * to shrink under memory pressure.
+			 */
+			if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
+				spin_lock(&sbinfo->shrinklist_lock);
+				if (list_empty(&info->shrinklist)) {
+					list_add_tail(&info->shrinklist,
+							&sbinfo->shrinklist);
+					sbinfo->shrinklist_len++;
+				}
+				spin_unlock(&sbinfo->shrinklist_lock);
+			}
 		}
 	}
 
@@ -686,11 +1018,20 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 static void shmem_evict_inode(struct inode *inode)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 
 	if (inode->i_mapping->a_ops == &shmem_aops) {
 		shmem_unacct_size(info->flags, inode->i_size);
 		inode->i_size = 0;
 		shmem_truncate_range(inode, 0, (loff_t)-1);
+		if (!list_empty(&info->shrinklist)) {
+			spin_lock(&sbinfo->shrinklist_lock);
+			if (!list_empty(&info->shrinklist)) {
+				list_del_init(&info->shrinklist);
+				sbinfo->shrinklist_len--;
+			}
+			spin_unlock(&sbinfo->shrinklist_lock);
+		}
 		if (!list_empty(&info->swaplist)) {
 			mutex_lock(&shmem_swaplist_mutex);
 			list_del_init(&info->swaplist);
@@ -773,9 +1114,9 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 		delete_from_swap_cache(*pagep);
 		set_page_dirty(*pagep);
 		if (!error) {
-			spin_lock(&info->lock);
+			spin_lock_irq(&info->lock);
 			info->swapped--;
-			spin_unlock(&info->lock);
+			spin_unlock_irq(&info->lock);
 			swap_free(swap);
 		}
 	}
@@ -848,6 +1189,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	swp_entry_t swap;
 	pgoff_t index;
 
+	VM_BUG_ON_PAGE(PageCompound(page), page);
 	BUG_ON(!PageLocked(page));
 	mapping = page->mapping;
 	index = page->index;
@@ -922,10 +1264,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 		list_add_tail(&info->swaplist, &shmem_swaplist);
 
 	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
-		spin_lock(&info->lock);
+		spin_lock_irq(&info->lock);
 		shmem_recalc_inode(inode);
 		info->swapped++;
-		spin_unlock(&info->lock);
+		spin_unlock_irq(&info->lock);
 
 		swap_shmem_alloc(swap);
 		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
@@ -984,24 +1326,63 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
 #define vm_policy vm_private_data
 #endif
 
+static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
+		struct shmem_inode_info *info, pgoff_t index)
+{
+	/* Create a pseudo vma that just contains the policy */
+	vma->vm_start = 0;
+	/* Bias interleave by inode number to distribute better across nodes */
+	vma->vm_pgoff = index + info->vfs_inode.i_ino;
+	vma->vm_ops = NULL;
+	vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
+}
+
+static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
+{
+	/* Drop reference taken by mpol_shared_policy_lookup() */
+	mpol_cond_put(vma->vm_policy);
+}
+
 static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
 			struct shmem_inode_info *info, pgoff_t index)
 {
 	struct vm_area_struct pvma;
 	struct page *page;
 
-	/* Create a pseudo vma that just contains the policy */
-	pvma.vm_start = 0;
-	/* Bias interleave by inode number to distribute better across nodes */
-	pvma.vm_pgoff = index + info->vfs_inode.i_ino;
-	pvma.vm_ops = NULL;
-	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
-
+	shmem_pseudo_vma_init(&pvma, info, index);
 	page = swapin_readahead(swap, gfp, &pvma, 0);
+	shmem_pseudo_vma_destroy(&pvma);
 
-	/* Drop reference taken by mpol_shared_policy_lookup() */
-	mpol_cond_put(pvma.vm_policy);
+	return page;
+}
 
+static struct page *shmem_alloc_hugepage(gfp_t gfp,
+		struct shmem_inode_info *info, pgoff_t index)
+{
+	struct vm_area_struct pvma;
+	struct inode *inode = &info->vfs_inode;
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t idx, hindex = round_down(index, HPAGE_PMD_NR);
+	void __rcu **results;
+	struct page *page;
+
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
+		return NULL;
+
+	rcu_read_lock();
+	if (radix_tree_gang_lookup_slot(&mapping->page_tree, &results, &idx,
+				hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
+		rcu_read_unlock();
+		return NULL;
+	}
+	rcu_read_unlock();
+
+	shmem_pseudo_vma_init(&pvma, info, hindex);
+	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
+			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+	shmem_pseudo_vma_destroy(&pvma);
+	if (page)
+		prep_transhuge_page(page);
 	return page;
 }
 
@@ -1011,23 +1392,51 @@ static struct page *shmem_alloc_page(gfp_t gfp,
 	struct vm_area_struct pvma;
 	struct page *page;
 
-	/* Create a pseudo vma that just contains the policy */
-	pvma.vm_start = 0;
-	/* Bias interleave by inode number to distribute better across nodes */
-	pvma.vm_pgoff = index + info->vfs_inode.i_ino;
-	pvma.vm_ops = NULL;
-	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
+	shmem_pseudo_vma_init(&pvma, info, index);
+	page = alloc_page_vma(gfp, &pvma, 0);
+	shmem_pseudo_vma_destroy(&pvma);
 
-	page = alloc_pages_vma(gfp, 0, &pvma, 0, numa_node_id(), false);
+	return page;
+}
+
+static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
+		struct shmem_inode_info *info, struct shmem_sb_info *sbinfo,
+		pgoff_t index, bool huge)
+{
+	struct page *page;
+	int nr;
+	int err = -ENOSPC;
+
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
+		huge = false;
+	nr = huge ? HPAGE_PMD_NR : 1;
+
+	if (shmem_acct_block(info->flags, nr))
+		goto failed;
+	if (sbinfo->max_blocks) {
+		if (percpu_counter_compare(&sbinfo->used_blocks,
+					sbinfo->max_blocks - nr) > 0)
+			goto unacct;
+		percpu_counter_add(&sbinfo->used_blocks, nr);
+	}
+
+	if (huge)
+		page = shmem_alloc_hugepage(gfp, info, index);
+	else
+		page = shmem_alloc_page(gfp, info, index);
 	if (page) {
 		__SetPageLocked(page);
 		__SetPageSwapBacked(page);
+		return page;
 	}
 
-	/* Drop reference taken by mpol_shared_policy_lookup() */
-	mpol_cond_put(pvma.vm_policy);
-
-	return page;
+	err = -ENOMEM;
+	if (sbinfo->max_blocks)
+		percpu_counter_add(&sbinfo->used_blocks, -nr);
+unacct:
+	shmem_unacct_blocks(info->flags, nr);
+failed:
+	return ERR_PTR(err);
 }
 
 /*
@@ -1132,12 +1541,16 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	struct mem_cgroup *memcg;
 	struct page *page;
 	swp_entry_t swap;
+	enum sgp_type sgp_huge = sgp;
+	pgoff_t hindex = index;
 	int error;
 	int once = 0;
 	int alloced = 0;
 
 	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
 		return -EFBIG;
+	if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
+		sgp = SGP_CACHE;
 repeat:
 	swap.val = 0;
 	page = find_lock_entry(mapping, index);
@@ -1240,10 +1653,10 @@ repeat:
 
 		mem_cgroup_commit_charge(page, memcg, true, false);
 
-		spin_lock(&info->lock);
+		spin_lock_irq(&info->lock);
 		info->swapped--;
 		shmem_recalc_inode(inode);
-		spin_unlock(&info->lock);
+		spin_unlock_irq(&info->lock);
 
 		if (sgp == SGP_WRITE)
 			mark_page_accessed(page);
@@ -1253,51 +1666,111 @@ repeat:
 		swap_free(swap);
 
 	} else {
-		if (shmem_acct_block(info->flags)) {
-			error = -ENOSPC;
-			goto failed;
+		/* shmem_symlink() */
+		if (mapping->a_ops != &shmem_aops)
+			goto alloc_nohuge;
+		if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
+			goto alloc_nohuge;
+		if (shmem_huge == SHMEM_HUGE_FORCE)
+			goto alloc_huge;
+		switch (sbinfo->huge) {
+			loff_t i_size;
+			pgoff_t off;
+		case SHMEM_HUGE_NEVER:
+			goto alloc_nohuge;
+		case SHMEM_HUGE_WITHIN_SIZE:
+			off = round_up(index, HPAGE_PMD_NR);
+			i_size = round_up(i_size_read(inode), PAGE_SIZE);
+			if (i_size >= HPAGE_PMD_SIZE &&
+					i_size >> PAGE_SHIFT >= off)
+				goto alloc_huge;
+			/* fallthrough */
+		case SHMEM_HUGE_ADVISE:
+			if (sgp_huge == SGP_HUGE)
+				goto alloc_huge;
+			/* TODO: implement fadvise() hints */
+			goto alloc_nohuge;
+		}
+
+alloc_huge:
+		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
+				index, true);
+		if (IS_ERR(page)) {
+alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
+					index, false);
 		}
-		if (sbinfo->max_blocks) {
-			if (percpu_counter_compare(&sbinfo->used_blocks,
-						sbinfo->max_blocks) >= 0) {
-				error = -ENOSPC;
-				goto unacct;
+		if (IS_ERR(page)) {
+			int retry = 5;
+			error = PTR_ERR(page);
+			page = NULL;
+			if (error != -ENOSPC)
+				goto failed;
+			/*
+			 * Try to reclaim some spece by splitting a huge page
+			 * beyond i_size on the filesystem.
+			 */
+			while (retry--) {
+				int ret;
+				ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
+				if (ret == SHRINK_STOP)
+					break;
+				if (ret)
+					goto alloc_nohuge;
 			}
-			percpu_counter_inc(&sbinfo->used_blocks);
+			goto failed;
 		}
 
-		page = shmem_alloc_page(gfp, info, index);
-		if (!page) {
-			error = -ENOMEM;
-			goto decused;
-		}
+		if (PageTransHuge(page))
+			hindex = round_down(index, HPAGE_PMD_NR);
+		else
+			hindex = index;
+
 		if (sgp == SGP_WRITE)
 			__SetPageReferenced(page);
 
 		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
-				false);
+				PageTransHuge(page));
 		if (error)
-			goto decused;
-		error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
+			goto unacct;
+		error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK,
+				compound_order(page));
 		if (!error) {
-			error = shmem_add_to_page_cache(page, mapping, index,
+			error = shmem_add_to_page_cache(page, mapping, hindex,
 							NULL);
 			radix_tree_preload_end();
 		}
 		if (error) {
-			mem_cgroup_cancel_charge(page, memcg, false);
-			goto decused;
+			mem_cgroup_cancel_charge(page, memcg,
+					PageTransHuge(page));
+			goto unacct;
 		}
-		mem_cgroup_commit_charge(page, memcg, false, false);
+		mem_cgroup_commit_charge(page, memcg, false,
+				PageTransHuge(page));
 		lru_cache_add_anon(page);
 
-		spin_lock(&info->lock);
-		info->alloced++;
-		inode->i_blocks += BLOCKS_PER_PAGE;
+		spin_lock_irq(&info->lock);
+		info->alloced += 1 << compound_order(page);
+		inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
 		shmem_recalc_inode(inode);
-		spin_unlock(&info->lock);
+		spin_unlock_irq(&info->lock);
 		alloced = true;
 
+		if (PageTransHuge(page) &&
+				DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
+				hindex + HPAGE_PMD_NR - 1) {
+			/*
+			 * Part of the huge page is beyond i_size: subject
+			 * to shrink under memory pressure.
+			 */
+			spin_lock(&sbinfo->shrinklist_lock);
+			if (list_empty(&info->shrinklist)) {
+				list_add_tail(&info->shrinklist,
+						&sbinfo->shrinklist);
+				sbinfo->shrinklist_len++;
+			}
+			spin_unlock(&sbinfo->shrinklist_lock);
+		}
+
 		/*
 		 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
 		 */
@@ -1309,10 +1782,15 @@ clear:
 		 * but SGP_FALLOC on a page fallocated earlier must initialize
 		 * it now, lest undo on failure cancel our earlier guarantee.
 		 */
-		if (sgp != SGP_WRITE) {
-			clear_highpage(page);
-			flush_dcache_page(page);
-			SetPageUptodate(page);
+		if (sgp != SGP_WRITE && !PageUptodate(page)) {
+			struct page *head = compound_head(page);
+			int i;
+
+			for (i = 0; i < (1 << compound_order(head)); i++) {
+				clear_highpage(head + i);
+				flush_dcache_page(head + i);
+			}
+			SetPageUptodate(head);
 		}
 	}
 
@@ -1322,24 +1800,30 @@ clear:
 		if (alloced) {
 			ClearPageDirty(page);
 			delete_from_page_cache(page);
-			spin_lock(&info->lock);
+			spin_lock_irq(&info->lock);
 			shmem_recalc_inode(inode);
-			spin_unlock(&info->lock);
+			spin_unlock_irq(&info->lock);
 		}
 		error = -EINVAL;
 		goto unlock;
 	}
-	*pagep = page;
+	*pagep = page + index - hindex;
 	return 0;
 
 	/*
 	 * Error recovery.
 	 */
-decused:
-	if (sbinfo->max_blocks)
-		percpu_counter_add(&sbinfo->used_blocks, -1);
 unacct:
-	shmem_unacct_blocks(info->flags, 1);
+	if (sbinfo->max_blocks)
+		percpu_counter_sub(&sbinfo->used_blocks,
+				1 << compound_order(page));
+	shmem_unacct_blocks(info->flags, 1 << compound_order(page));
+
+	if (PageTransHuge(page)) {
+		unlock_page(page);
+		put_page(page);
+		goto alloc_nohuge;
+	}
 failed:
 	if (swap.val && !shmem_confirm_swap(mapping, index, swap))
 		error = -EEXIST;
@@ -1350,9 +1834,9 @@ unlock:
 	}
 	if (error == -ENOSPC && !once++) {
 		info = SHMEM_I(inode);
-		spin_lock(&info->lock);
+		spin_lock_irq(&info->lock);
 		shmem_recalc_inode(inode);
-		spin_unlock(&info->lock);
+		spin_unlock_irq(&info->lock);
 		goto repeat;
 	}
 	if (error == -EEXIST)	/* from above or from radix_tree_insert */
@@ -1364,6 +1848,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct inode *inode = file_inode(vma->vm_file);
 	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+	enum sgp_type sgp;
 	int error;
 	int ret = VM_FAULT_LOCKED;
 
@@ -1425,13 +1910,107 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		spin_unlock(&inode->i_lock);
 	}
 
-	error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
+	sgp = SGP_CACHE;
+	if (vma->vm_flags & VM_HUGEPAGE)
+		sgp = SGP_HUGE;
+	else if (vma->vm_flags & VM_NOHUGEPAGE)
+		sgp = SGP_NOHUGE;
+
+	error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
 				  gfp, vma->vm_mm, &ret);
 	if (error)
 		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
 	return ret;
 }
 
+unsigned long shmem_get_unmapped_area(struct file *file,
+				      unsigned long uaddr, unsigned long len,
+				      unsigned long pgoff, unsigned long flags)
+{
+	unsigned long (*get_area)(struct file *,
+		unsigned long, unsigned long, unsigned long, unsigned long);
+	unsigned long addr;
+	unsigned long offset;
+	unsigned long inflated_len;
+	unsigned long inflated_addr;
+	unsigned long inflated_offset;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	get_area = current->mm->get_unmapped_area;
+	addr = get_area(file, uaddr, len, pgoff, flags);
+
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
+		return addr;
+	if (IS_ERR_VALUE(addr))
+		return addr;
+	if (addr & ~PAGE_MASK)
+		return addr;
+	if (addr > TASK_SIZE - len)
+		return addr;
+
+	if (shmem_huge == SHMEM_HUGE_DENY)
+		return addr;
+	if (len < HPAGE_PMD_SIZE)
+		return addr;
+	if (flags & MAP_FIXED)
+		return addr;
+	/*
+	 * Our priority is to support MAP_SHARED mapped hugely;
+	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
+	 * But if caller specified an address hint, respect that as before.
+	 */
+	if (uaddr)
+		return addr;
+
+	if (shmem_huge != SHMEM_HUGE_FORCE) {
+		struct super_block *sb;
+
+		if (file) {
+			VM_BUG_ON(file->f_op != &shmem_file_operations);
+			sb = file_inode(file)->i_sb;
+		} else {
+			/*
+			 * Called directly from mm/mmap.c, or drivers/char/mem.c
+			 * for "/dev/zero", to create a shared anonymous object.
+			 */
+			if (IS_ERR(shm_mnt))
+				return addr;
+			sb = shm_mnt->mnt_sb;
+		}
+		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+			return addr;
+	}
+
+	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
+	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
+		return addr;
+	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
+		return addr;
+
+	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
+	if (inflated_len > TASK_SIZE)
+		return addr;
+	if (inflated_len < len)
+		return addr;
+
+	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
+	if (IS_ERR_VALUE(inflated_addr))
+		return addr;
+	if (inflated_addr & ~PAGE_MASK)
+		return addr;
+
+	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
+	inflated_addr += offset - inflated_offset;
+	if (inflated_offset > offset)
+		inflated_addr += HPAGE_PMD_SIZE;
+
+	if (inflated_addr > TASK_SIZE - len)
+		return addr;
+	return inflated_addr;
+}
+
 #ifdef CONFIG_NUMA
 static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
 {
@@ -1456,7 +2035,7 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	int retval = -ENOMEM;
 
-	spin_lock(&info->lock);
+	spin_lock_irq(&info->lock);
 	if (lock && !(info->flags & VM_LOCKED)) {
 		if (!user_shm_lock(inode->i_size, user))
 			goto out_nomem;
@@ -1471,7 +2050,7 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	retval = 0;
 
 out_nomem:
-	spin_unlock(&info->lock);
+	spin_unlock_irq(&info->lock);
 	return retval;
 }
 
@@ -1479,6 +2058,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	file_accessed(file);
 	vma->vm_ops = &shmem_vm_ops;
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
+			((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
+			(vma->vm_end & HPAGE_PMD_MASK)) {
+		khugepaged_enter(vma, vma->vm_flags);
+	}
 	return 0;
 }
 
@@ -1504,6 +2088,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		spin_lock_init(&info->lock);
 		info->seals = F_SEAL_SEAL;
 		info->flags = flags & VM_NORESERVE;
+		INIT_LIST_HEAD(&info->shrinklist);
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
 		cache_no_acl(inode);
@@ -1589,12 +2174,23 @@ shmem_write_end(struct file *file, struct address_space *mapping,
 		i_size_write(inode, pos + copied);
 
 	if (!PageUptodate(page)) {
+		struct page *head = compound_head(page);
+		if (PageTransCompound(page)) {
+			int i;
+
+			for (i = 0; i < HPAGE_PMD_NR; i++) {
+				if (head + i == page)
+					continue;
+				clear_highpage(head + i);
+				flush_dcache_page(head + i);
+			}
+		}
 		if (copied < PAGE_SIZE) {
 			unsigned from = pos & (PAGE_SIZE - 1);
 			zero_user_segments(page, 0, from,
 					from + copied, PAGE_SIZE);
 		}
-		SetPageUptodate(page);
+		SetPageUptodate(head);
 	}
 	set_page_dirty(page);
 	unlock_page(page);
@@ -2225,9 +2821,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 			error = shmem_getpage(inode, index, &page, SGP_FALLOC);
 		if (error) {
 			/* Remove the !PageUptodate pages we added */
-			shmem_undo_range(inode,
-				(loff_t)start << PAGE_SHIFT,
-				(loff_t)index << PAGE_SHIFT, true);
+			if (index > start) {
+				shmem_undo_range(inode,
+				    (loff_t)start << PAGE_SHIFT,
+				    ((loff_t)index << PAGE_SHIFT) - 1, true);
+			}
 			goto undone;
 		}
 
@@ -2858,11 +3456,24 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 			sbinfo->gid = make_kgid(current_user_ns(), gid);
 			if (!gid_valid(sbinfo->gid))
 				goto bad_val;
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+		} else if (!strcmp(this_char, "huge")) {
+			int huge;
+			huge = shmem_parse_huge(value);
+			if (huge < 0)
+				goto bad_val;
+			if (!has_transparent_hugepage() &&
+					huge != SHMEM_HUGE_NEVER)
+				goto bad_val;
+			sbinfo->huge = huge;
+#endif
+#ifdef CONFIG_NUMA
 		} else if (!strcmp(this_char,"mpol")) {
 			mpol_put(mpol);
 			mpol = NULL;
 			if (mpol_parse_str(value, &mpol))
 				goto bad_val;
+#endif
 		} else {
 			pr_err("tmpfs: Bad mount option %s\n", this_char);
 			goto error;
@@ -2908,6 +3519,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
 		goto out;
 
 	error = 0;
+	sbinfo->huge = config.huge;
 	sbinfo->max_blocks  = config.max_blocks;
 	sbinfo->max_inodes  = config.max_inodes;
 	sbinfo->free_inodes = config.max_inodes - inodes;
@@ -2941,6 +3553,11 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
 		seq_printf(seq, ",gid=%u",
 				from_kgid_munged(&init_user_ns, sbinfo->gid));
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+	/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
+	if (sbinfo->huge)
+		seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
+#endif
 	shmem_show_mpol(seq, sbinfo->mpol);
 	return 0;
 }
@@ -3070,6 +3687,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
 		goto failed;
 	sbinfo->free_inodes = sbinfo->max_inodes;
+	spin_lock_init(&sbinfo->shrinklist_lock);
+	INIT_LIST_HEAD(&sbinfo->shrinklist);
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_blocksize = PAGE_SIZE;
@@ -3159,6 +3778,7 @@ static const struct address_space_operations shmem_aops = {
 
 static const struct file_operations shmem_file_operations = {
 	.mmap		= shmem_mmap,
+	.get_unmapped_area = shmem_get_unmapped_area,
 #ifdef CONFIG_TMPFS
 	.llseek		= shmem_file_llseek,
 	.read_iter	= shmem_file_read_iter,
@@ -3231,6 +3851,10 @@ static const struct super_operations shmem_ops = {
 	.evict_inode	= shmem_evict_inode,
 	.drop_inode	= generic_delete_inode,
 	.put_super	= shmem_put_super,
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+	.nr_cached_objects	= shmem_unused_huge_count,
+	.free_cached_objects	= shmem_unused_huge_scan,
+#endif
 };
 
 static const struct vm_operations_struct shmem_vm_ops = {
@@ -3280,6 +3904,13 @@ int __init shmem_init(void)
 		pr_err("Could not kern_mount tmpfs\n");
 		goto out1;
 	}
+
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+	if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY)
+		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
+	else
+		shmem_huge = 0; /* just in case it was patched */
+#endif
 	return 0;
 
 out1:
@@ -3291,6 +3922,91 @@ out3:
 	return error;
 }
 
+#if defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && defined(CONFIG_SYSFS)
+static ssize_t shmem_enabled_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	int values[] = {
+		SHMEM_HUGE_ALWAYS,
+		SHMEM_HUGE_WITHIN_SIZE,
+		SHMEM_HUGE_ADVISE,
+		SHMEM_HUGE_NEVER,
+		SHMEM_HUGE_DENY,
+		SHMEM_HUGE_FORCE,
+	};
+	int i, count;
+
+	for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) {
+		const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s ";
+
+		count += sprintf(buf + count, fmt,
+				shmem_format_huge(values[i]));
+	}
+	buf[count - 1] = '\n';
+	return count;
+}
+
+static ssize_t shmem_enabled_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	char tmp[16];
+	int huge;
+
+	if (count + 1 > sizeof(tmp))
+		return -EINVAL;
+	memcpy(tmp, buf, count);
+	tmp[count] = '\0';
+	if (count && tmp[count - 1] == '\n')
+		tmp[count - 1] = '\0';
+
+	huge = shmem_parse_huge(tmp);
+	if (huge == -EINVAL)
+		return -EINVAL;
+	if (!has_transparent_hugepage() &&
+			huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
+		return -EINVAL;
+
+	shmem_huge = huge;
+	if (shmem_huge < SHMEM_HUGE_DENY)
+		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
+	return count;
+}
+
+struct kobj_attribute shmem_enabled_attr =
+	__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
+
+bool shmem_huge_enabled(struct vm_area_struct *vma)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	loff_t i_size;
+	pgoff_t off;
+
+	if (shmem_huge == SHMEM_HUGE_FORCE)
+		return true;
+	if (shmem_huge == SHMEM_HUGE_DENY)
+		return false;
+	switch (sbinfo->huge) {
+		case SHMEM_HUGE_NEVER:
+			return false;
+		case SHMEM_HUGE_ALWAYS:
+			return true;
+		case SHMEM_HUGE_WITHIN_SIZE:
+			off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
+			i_size = round_up(i_size_read(inode), PAGE_SIZE);
+			if (i_size >= HPAGE_PMD_SIZE &&
+					i_size >> PAGE_SHIFT >= off)
+				return true;
+		case SHMEM_HUGE_ADVISE:
+			/* TODO: implement fadvise() hints */
+			return (vma->vm_flags & VM_HUGEPAGE);
+		default:
+			VM_BUG_ON(1);
+			return false;
+	}
+}
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+
 #else /* !CONFIG_SHMEM */
 
 /*
@@ -3333,6 +4049,15 @@ void shmem_unlock_mapping(struct address_space *mapping)
 {
 }
 
+#ifdef CONFIG_MMU
+unsigned long shmem_get_unmapped_area(struct file *file,
+				      unsigned long addr, unsigned long len,
+				      unsigned long pgoff, unsigned long flags)
+{
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+#endif
+
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
@@ -3459,6 +4184,13 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 		fput(vma->vm_file);
 	vma->vm_file = file;
 	vma->vm_ops = &shmem_vm_ops;
+
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
+			((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
+			(vma->vm_end & HPAGE_PMD_MASK)) {
+		khugepaged_enter(vma, vma->vm_flags);
+	}
+
 	return 0;
 }
 
diff --git a/mm/slab.c b/mm/slab.c
index cc8bbc1e6bc9..09771ed3e693 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1236,61 +1236,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
 	}
 }
 
-#ifdef CONFIG_SLAB_FREELIST_RANDOM
-static void freelist_randomize(struct rnd_state *state, freelist_idx_t *list,
-			size_t count)
-{
-	size_t i;
-	unsigned int rand;
-
-	for (i = 0; i < count; i++)
-		list[i] = i;
-
-	/* Fisher-Yates shuffle */
-	for (i = count - 1; i > 0; i--) {
-		rand = prandom_u32_state(state);
-		rand %= (i + 1);
-		swap(list[i], list[rand]);
-	}
-}
-
-/* Create a random sequence per cache */
-static int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
-{
-	unsigned int seed, count = cachep->num;
-	struct rnd_state state;
-
-	if (count < 2)
-		return 0;
-
-	/* If it fails, we will just use the global lists */
-	cachep->random_seq = kcalloc(count, sizeof(freelist_idx_t), gfp);
-	if (!cachep->random_seq)
-		return -ENOMEM;
-
-	/* Get best entropy at this stage */
-	get_random_bytes_arch(&seed, sizeof(seed));
-	prandom_seed_state(&state, seed);
-
-	freelist_randomize(&state, cachep->random_seq, count);
-	return 0;
-}
-
-/* Destroy the per-cache random freelist sequence */
-static void cache_random_seq_destroy(struct kmem_cache *cachep)
-{
-	kfree(cachep->random_seq);
-	cachep->random_seq = NULL;
-}
-#else
-static inline int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
-{
-	return 0;
-}
-static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
-#endif /* CONFIG_SLAB_FREELIST_RANDOM */
-
-
 /*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
@@ -2535,7 +2480,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
 union freelist_init_state {
 	struct {
 		unsigned int pos;
-		freelist_idx_t *list;
+		unsigned int *list;
 		unsigned int count;
 		unsigned int rand;
 	};
@@ -2554,7 +2499,7 @@ static bool freelist_state_initialize(union freelist_init_state *state,
 	unsigned int rand;
 
 	/* Use best entropy available to define a random shift */
-	get_random_bytes_arch(&rand, sizeof(rand));
+	rand = get_random_int();
 
 	/* Use a random state if the pre-computed list is not available */
 	if (!cachep->random_seq) {
@@ -2576,13 +2521,20 @@ static freelist_idx_t next_random_slot(union freelist_init_state *state)
 	return (state->list[state->pos++] + state->rand) % state->count;
 }
 
+/* Swap two freelist entries */
+static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
+{
+	swap(((freelist_idx_t *)page->freelist)[a],
+		((freelist_idx_t *)page->freelist)[b]);
+}
+
 /*
  * Shuffle the freelist initialization state based on pre-computed lists.
  * return true if the list was successfully shuffled, false otherwise.
  */
 static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
 {
-	unsigned int objfreelist = 0, i, count = cachep->num;
+	unsigned int objfreelist = 0, i, rand, count = cachep->num;
 	union freelist_init_state state;
 	bool precomputed;
 
@@ -2607,7 +2559,15 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
 	 * Later use a pre-computed list for speed.
 	 */
 	if (!precomputed) {
-		freelist_randomize(&state.rnd_state, page->freelist, count);
+		for (i = 0; i < count; i++)
+			set_free_obj(page, i, i);
+
+		/* Fisher-Yates shuffle */
+		for (i = count - 1; i > 0; i--) {
+			rand = prandom_u32_state(&state.rnd_state);
+			rand %= (i + 1);
+			swap_free_obj(page, i, rand);
+		}
 	} else {
 		for (i = 0; i < count; i++)
 			set_free_obj(page, i, next_random_slot(&state));
@@ -2726,8 +2686,11 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
 	 * critical path in kmem_cache_alloc().
 	 */
 	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
-		pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
-		BUG();
+		gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
+		flags &= ~GFP_SLAB_BUG_MASK;
+		pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
+				invalid_mask, &invalid_mask, flags, &flags);
+		dump_stack();
 	}
 	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
 
@@ -3489,8 +3452,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
 		n->free_objects -= cachep->num;
 
 		page = list_last_entry(&n->slabs_free, struct page, lru);
-		list_del(&page->lru);
-		list_add(&page->lru, list);
+		list_move(&page->lru, list);
 	}
 }
 
@@ -3979,7 +3941,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 	int shared = 0;
 	int batchcount = 0;
 
-	err = cache_random_seq_create(cachep, gfp);
+	err = cache_random_seq_create(cachep, cachep->num, gfp);
 	if (err)
 		goto end;
 
diff --git a/mm/slab.h b/mm/slab.h
index dedb1a920fb8..f33980ab0406 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -42,6 +42,7 @@ struct kmem_cache {
 #include <linux/kmemcheck.h>
 #include <linux/kasan.h>
 #include <linux/kmemleak.h>
+#include <linux/random.h>
 
 /*
  * State of the slab allocator.
@@ -253,8 +254,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
 	if (is_root_cache(s))
 		return 0;
 
-	ret = __memcg_kmem_charge_memcg(page, gfp, order,
-					s->memcg_params.memcg);
+	ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
 	if (ret)
 		return ret;
 
@@ -268,6 +268,9 @@ static __always_inline int memcg_charge_slab(struct page *page,
 static __always_inline void memcg_uncharge_slab(struct page *page, int order,
 						struct kmem_cache *s)
 {
+	if (!memcg_kmem_enabled())
+		return;
+
 	memcg_kmem_update_page_stat(page,
 			(s->flags & SLAB_RECLAIM_ACCOUNT) ?
 			MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE,
@@ -390,7 +393,11 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 	if (should_failslab(s, flags))
 		return NULL;
 
-	return memcg_kmem_get_cache(s, flags);
+	if (memcg_kmem_enabled() &&
+	    ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)))
+		return memcg_kmem_get_cache(s);
+
+	return s;
 }
 
 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
@@ -407,7 +414,9 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
 					 s->flags, flags);
 		kasan_slab_alloc(s, object, flags);
 	}
-	memcg_kmem_put_cache(s);
+
+	if (memcg_kmem_enabled())
+		memcg_kmem_put_cache(s);
 }
 
 #ifndef CONFIG_SLOB
@@ -464,4 +473,17 @@ int memcg_slab_show(struct seq_file *m, void *p);
 
 void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
 
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
+			gfp_t gfp);
+void cache_random_seq_destroy(struct kmem_cache *cachep);
+#else
+static inline int cache_random_seq_create(struct kmem_cache *cachep,
+					unsigned int count, gfp_t gfp)
+{
+	return 0;
+}
+static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
+#endif /* CONFIG_SLAB_FREELIST_RANDOM */
+
 #endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a65dad7fdcd1..71f0b28a1bec 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -526,8 +526,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
 		goto out_unlock;
 
 	cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
-	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
-			       css->id, memcg_name_buf);
+	cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
+			       css->serial_nr, memcg_name_buf);
 	if (!cache_name)
 		goto out_unlock;
 
@@ -1012,7 +1012,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 	struct page *page;
 
 	flags |= __GFP_COMP;
-	page = alloc_kmem_pages(flags, order);
+	page = alloc_pages(flags, order);
 	ret = page ? page_address(page) : NULL;
 	kmemleak_alloc(ret, size, 1, flags);
 	kasan_kmalloc_large(ret, size, flags);
@@ -1030,6 +1030,53 @@ void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 EXPORT_SYMBOL(kmalloc_order_trace);
 #endif
 
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+/* Randomize a generic freelist */
+static void freelist_randomize(struct rnd_state *state, unsigned int *list,
+			size_t count)
+{
+	size_t i;
+	unsigned int rand;
+
+	for (i = 0; i < count; i++)
+		list[i] = i;
+
+	/* Fisher-Yates shuffle */
+	for (i = count - 1; i > 0; i--) {
+		rand = prandom_u32_state(state);
+		rand %= (i + 1);
+		swap(list[i], list[rand]);
+	}
+}
+
+/* Create a random sequence per cache */
+int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
+				    gfp_t gfp)
+{
+	struct rnd_state state;
+
+	if (count < 2 || cachep->random_seq)
+		return 0;
+
+	cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
+	if (!cachep->random_seq)
+		return -ENOMEM;
+
+	/* Get best entropy at this stage of boot */
+	prandom_seed_state(&state, get_random_long());
+
+	freelist_randomize(&state, cachep->random_seq, count);
+	return 0;
+}
+
+/* Destroy the per-cache random freelist sequence */
+void cache_random_seq_destroy(struct kmem_cache *cachep)
+{
+	kfree(cachep->random_seq);
+	cachep->random_seq = NULL;
+}
+#endif /* CONFIG_SLAB_FREELIST_RANDOM */
+
 #ifdef CONFIG_SLABINFO
 
 #ifdef CONFIG_SLAB
diff --git a/mm/slub.c b/mm/slub.c
index 825ff4505336..f9da8716b8b3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1405,6 +1405,109 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
 	return page;
 }
 
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+/* Pre-initialize the random sequence cache */
+static int init_cache_random_seq(struct kmem_cache *s)
+{
+	int err;
+	unsigned long i, count = oo_objects(s->oo);
+
+	err = cache_random_seq_create(s, count, GFP_KERNEL);
+	if (err) {
+		pr_err("SLUB: Unable to initialize free list for %s\n",
+			s->name);
+		return err;
+	}
+
+	/* Transform to an offset on the set of pages */
+	if (s->random_seq) {
+		for (i = 0; i < count; i++)
+			s->random_seq[i] *= s->size;
+	}
+	return 0;
+}
+
+/* Initialize each random sequence freelist per cache */
+static void __init init_freelist_randomization(void)
+{
+	struct kmem_cache *s;
+
+	mutex_lock(&slab_mutex);
+
+	list_for_each_entry(s, &slab_caches, list)
+		init_cache_random_seq(s);
+
+	mutex_unlock(&slab_mutex);
+}
+
+/* Get the next entry on the pre-computed freelist randomized */
+static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
+				unsigned long *pos, void *start,
+				unsigned long page_limit,
+				unsigned long freelist_count)
+{
+	unsigned int idx;
+
+	/*
+	 * If the target page allocation failed, the number of objects on the
+	 * page might be smaller than the usual size defined by the cache.
+	 */
+	do {
+		idx = s->random_seq[*pos];
+		*pos += 1;
+		if (*pos >= freelist_count)
+			*pos = 0;
+	} while (unlikely(idx >= page_limit));
+
+	return (char *)start + idx;
+}
+
+/* Shuffle the single linked freelist based on a random pre-computed sequence */
+static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
+{
+	void *start;
+	void *cur;
+	void *next;
+	unsigned long idx, pos, page_limit, freelist_count;
+
+	if (page->objects < 2 || !s->random_seq)
+		return false;
+
+	freelist_count = oo_objects(s->oo);
+	pos = get_random_int() % freelist_count;
+
+	page_limit = page->objects * s->size;
+	start = fixup_red_left(s, page_address(page));
+
+	/* First entry is used as the base of the freelist */
+	cur = next_freelist_entry(s, page, &pos, start, page_limit,
+				freelist_count);
+	page->freelist = cur;
+
+	for (idx = 1; idx < page->objects; idx++) {
+		setup_object(s, page, cur);
+		next = next_freelist_entry(s, page, &pos, start, page_limit,
+			freelist_count);
+		set_freepointer(s, cur, next);
+		cur = next;
+	}
+	setup_object(s, page, cur);
+	set_freepointer(s, cur, NULL);
+
+	return true;
+}
+#else
+static inline int init_cache_random_seq(struct kmem_cache *s)
+{
+	return 0;
+}
+static inline void init_freelist_randomization(void) { }
+static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
+{
+	return false;
+}
+#endif /* CONFIG_SLAB_FREELIST_RANDOM */
+
 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
 	struct page *page;
@@ -1412,6 +1515,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	gfp_t alloc_gfp;
 	void *start, *p;
 	int idx, order;
+	bool shuffle;
 
 	flags &= gfp_allowed_mask;
 
@@ -1473,15 +1577,19 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 	kasan_poison_slab(page);
 
-	for_each_object_idx(p, idx, s, start, page->objects) {
-		setup_object(s, page, p);
-		if (likely(idx < page->objects))
-			set_freepointer(s, p, p + s->size);
-		else
-			set_freepointer(s, p, NULL);
+	shuffle = shuffle_freelist(s, page);
+
+	if (!shuffle) {
+		for_each_object_idx(p, idx, s, start, page->objects) {
+			setup_object(s, page, p);
+			if (likely(idx < page->objects))
+				set_freepointer(s, p, p + s->size);
+			else
+				set_freepointer(s, p, NULL);
+		}
+		page->freelist = fixup_red_left(s, start);
 	}
 
-	page->freelist = fixup_red_left(s, start);
 	page->inuse = page->objects;
 	page->frozen = 1;
 
@@ -1504,8 +1612,10 @@ out:
 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
 	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
-		pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
-		BUG();
+		gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
+		flags &= ~GFP_SLAB_BUG_MASK;
+		pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
+				invalid_mask, &invalid_mask, flags, &flags);
 	}
 
 	return allocate_slab(s,
@@ -2867,7 +2977,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
 		if (unlikely(!PageSlab(page))) {
 			BUG_ON(!PageCompound(page));
 			kfree_hook(object);
-			__free_kmem_pages(page, compound_order(page));
+			__free_pages(page, compound_order(page));
 			p[size] = NULL; /* mark object processed */
 			return size;
 		}
@@ -3207,6 +3317,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
 
 void __kmem_cache_release(struct kmem_cache *s)
 {
+	cache_random_seq_destroy(s);
 	free_percpu(s->cpu_slab);
 	free_kmem_cache_nodes(s);
 }
@@ -3431,6 +3542,13 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
 #ifdef CONFIG_NUMA
 	s->remote_node_defrag_ratio = 1000;
 #endif
+
+	/* Initialize the pre-computed randomized freelist if slab is up */
+	if (slab_state >= UP) {
+		if (init_cache_random_seq(s))
+			goto error;
+	}
+
 	if (!init_kmem_cache_nodes(s))
 		goto error;
 
@@ -3575,7 +3693,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 	void *ptr = NULL;
 
 	flags |= __GFP_COMP | __GFP_NOTRACK;
-	page = alloc_kmem_pages_node(node, flags, get_order(size));
+	page = alloc_pages_node(node, flags, get_order(size));
 	if (page)
 		ptr = page_address(page);
 
@@ -3656,7 +3774,7 @@ void kfree(const void *x)
 	if (unlikely(!PageSlab(page))) {
 		BUG_ON(!PageCompound(page));
 		kfree_hook(x);
-		__free_kmem_pages(page, compound_order(page));
+		__free_pages(page, compound_order(page));
 		return;
 	}
 	slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
@@ -3947,6 +4065,9 @@ void __init kmem_cache_init(void)
 	setup_kmalloc_cache_index_table();
 	create_kmalloc_caches(0);
 
+	/* Setup random freelists for each cache */
+	init_freelist_randomization();
+
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
 #endif
diff --git a/mm/swap.c b/mm/swap.c
index 59f5fafa6e1f..616df4ddd870 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -242,7 +242,7 @@ void rotate_reclaimable_page(struct page *page)
 		get_page(page);
 		local_irq_save(flags);
 		pvec = this_cpu_ptr(&lru_rotate_pvecs);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_move_tail(pvec);
 		local_irq_restore(flags);
 	}
@@ -292,11 +292,12 @@ static bool need_activate_page_drain(int cpu)
 
 void activate_page(struct page *page)
 {
+	page = compound_head(page);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
 
 		get_page(page);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
 		put_cpu_var(activate_page_pvecs);
 	}
@@ -316,6 +317,7 @@ void activate_page(struct page *page)
 {
 	struct zone *zone = page_zone(page);
 
+	page = compound_head(page);
 	spin_lock_irq(&zone->lru_lock);
 	__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
 	spin_unlock_irq(&zone->lru_lock);
@@ -391,9 +393,8 @@ static void __lru_cache_add(struct page *page)
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 
 	get_page(page);
-	if (!pagevec_space(pvec))
+	if (!pagevec_add(pvec, page) || PageCompound(page))
 		__pagevec_lru_add(pvec);
-	pagevec_add(pvec, page);
 	put_cpu_var(lru_add_pvec);
 }
 
@@ -628,7 +629,7 @@ void deactivate_file_page(struct page *page)
 	if (likely(get_page_unless_zero(page))) {
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
 
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 		put_cpu_var(lru_deactivate_file_pvecs);
 	}
@@ -648,7 +649,7 @@ void deactivate_page(struct page *page)
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 
 		get_page(page);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 		put_cpu_var(lru_deactivate_pvecs);
 	}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 031713ab40ce..78cfa292a29a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2493,7 +2493,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		goto bad_swap;
 	}
 	/* frontswap enabled? set up bit-per-page map for frontswap */
-	if (frontswap_enabled)
+	if (IS_ENABLED(CONFIG_FRONTSWAP))
 		frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
 
 	if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 4064f8f53daa..a01cce450a26 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -155,10 +155,14 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
 
 int truncate_inode_page(struct address_space *mapping, struct page *page)
 {
+	loff_t holelen;
+	VM_BUG_ON_PAGE(PageTail(page), page);
+
+	holelen = PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE;
 	if (page_mapped(page)) {
 		unmap_mapping_range(mapping,
 				   (loff_t)page->index << PAGE_SHIFT,
-				   PAGE_SIZE, 0);
+				   holelen, 0);
 	}
 	return truncate_complete_page(mapping, page);
 }
@@ -279,7 +283,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 
 			if (!trylock_page(page))
 				continue;
-			WARN_ON(page->index != index);
+			WARN_ON(page_to_pgoff(page) != index);
 			if (PageWriteback(page)) {
 				unlock_page(page);
 				continue;
@@ -367,7 +371,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			}
 
 			lock_page(page);
-			WARN_ON(page->index != index);
+			WARN_ON(page_to_pgoff(page) != index);
 			wait_on_page_writeback(page);
 			truncate_inode_page(mapping, page);
 			unlock_page(page);
@@ -487,7 +491,21 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 
 			if (!trylock_page(page))
 				continue;
-			WARN_ON(page->index != index);
+
+			WARN_ON(page_to_pgoff(page) != index);
+
+			/* Middle of THP: skip */
+			if (PageTransTail(page)) {
+				unlock_page(page);
+				continue;
+			} else if (PageTransHuge(page)) {
+				index += HPAGE_PMD_NR - 1;
+				i += HPAGE_PMD_NR - 1;
+				/* 'end' is in the middle of THP */
+				if (index ==  round_down(end, HPAGE_PMD_NR))
+					continue;
+			}
+
 			ret = invalidate_inode_page(page);
 			unlock_page(page);
 			/*
@@ -594,7 +612,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 			}
 
 			lock_page(page);
-			WARN_ON(page->index != index);
+			WARN_ON(page_to_pgoff(page) != index);
 			if (page->mapping != mapping) {
 				unlock_page(page);
 				continue;
diff --git a/mm/util.c b/mm/util.c
index 917e0e3d0f8e..8d010ef2ce1c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -399,10 +399,12 @@ struct address_space *page_mapping(struct page *page)
 	}
 
 	mapping = page->mapping;
-	if ((unsigned long)mapping & PAGE_MAPPING_FLAGS)
+	if ((unsigned long)mapping & PAGE_MAPPING_ANON)
 		return NULL;
-	return mapping;
+
+	return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
 }
+EXPORT_SYMBOL(page_mapping);
 
 /* Slow path of page_mapcount() for compound pages */
 int __page_mapcount(struct page *page)
@@ -410,6 +412,12 @@ int __page_mapcount(struct page *page)
 	int ret;
 
 	ret = atomic_read(&page->_mapcount) + 1;
+	/*
+	 * For file THP page->_mapcount contains total number of mapping
+	 * of the page: no need to look into compound_mapcount.
+	 */
+	if (!PageAnon(page) && !PageHuge(page))
+		return ret;
 	page = compound_head(page);
 	ret += atomic_read(compound_mapcount_ptr(page)) + 1;
 	if (PageDoubleMap(page))
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e11475cdeb7a..91f44e78c516 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1501,7 +1501,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
 			struct page *page = area->pages[i];
 
 			BUG_ON(!page);
-			__free_kmem_pages(page, 0);
+			__free_pages(page, 0);
 		}
 
 		kvfree(area->pages);
@@ -1629,9 +1629,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		struct page *page;
 
 		if (node == NUMA_NO_NODE)
-			page = alloc_kmem_pages(alloc_mask, order);
+			page = alloc_pages(alloc_mask, order);
 		else
-			page = alloc_kmem_pages_node(node, alloc_mask, order);
+			page = alloc_pages_node(node, alloc_mask, order);
 
 		if (unlikely(!page)) {
 			/* Successfully allocated i pages, free them in __vunmap() */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c4a2f4512fca..21d417ccff69 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1055,8 +1055,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
 			/* Adding to swap updated mapping */
 			mapping = page_mapping(page);
+		} else if (unlikely(PageTransHuge(page))) {
+			/* Split file THP */
+			if (split_huge_page_to_list(page, page_list))
+				goto keep_locked;
 		}
 
+		VM_BUG_ON_PAGE(PageTransHuge(page), page);
+
 		/*
 		 * The page is mapped into the page tables of one or more
 		 * processes. Try to unmap it here.
@@ -1254,7 +1260,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 
 	list_for_each_entry_safe(page, next, page_list, lru) {
 		if (page_is_file_cache(page) && !PageDirty(page) &&
-		    !isolated_balloon_page(page)) {
+		    !__PageMovable(page)) {
 			ClearPageActive(page);
 			list_move(&page->lru, &clean_pages);
 		}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index cb2a67bb4158..7997f52935c9 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -718,7 +718,9 @@ const char * const vmstat_text[] = {
 	"nr_dirtied",
 	"nr_written",
 	"nr_pages_scanned",
-
+#if IS_ENABLED(CONFIG_ZSMALLOC)
+	"nr_zspages",
+#endif
 #ifdef CONFIG_NUMA
 	"numa_hit",
 	"numa_miss",
@@ -731,6 +733,8 @@ const char * const vmstat_text[] = {
 	"workingset_activate",
 	"workingset_nodereclaim",
 	"nr_anon_transparent_hugepages",
+	"nr_shmem_hugepages",
+	"nr_shmem_pmdmapped",
 	"nr_free_cma",
 
 	/* enum writeback_stat_item counters */
@@ -815,6 +819,8 @@ const char * const vmstat_text[] = {
 	"thp_fault_fallback",
 	"thp_collapse_alloc",
 	"thp_collapse_alloc_failed",
+	"thp_file_alloc",
+	"thp_file_mapped",
 	"thp_split_page",
 	"thp_split_page_failed",
 	"thp_deferred_split_page",
diff --git a/mm/workingset.c b/mm/workingset.c
index 8a75f8d2916a..577277546d98 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -491,7 +491,7 @@ static int __init workingset_init(void)
 	max_order = fls_long(totalram_pages - 1);
 	if (max_order > timestamp_bits)
 		bucket_order = max_order - timestamp_bits;
-	printk("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
+	pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
 	       timestamp_bits, max_order, bucket_order);
 
 	ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b6d4f258cb53..04176de6df70 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -16,32 +16,15 @@
  * struct page(s) to form a zspage.
  *
  * Usage of struct page fields:
- *	page->private: points to the first component (0-order) page
- *	page->index (union with page->freelist): offset of the first object
- *		starting in this page. For the first page, this is
- *		always 0, so we use this field (aka freelist) to point
- *		to the first free object in zspage.
- *	page->lru: links together all component pages (except the first page)
- *		of a zspage
- *
- *	For _first_ page only:
- *
- *	page->private: refers to the component page after the first page
- *		If the page is first_page for huge object, it stores handle.
- *		Look at size_class->huge.
- *	page->freelist: points to the first free object in zspage.
- *		Free objects are linked together using in-place
- *		metadata.
- *	page->objects: maximum number of objects we can store in this
- *		zspage (class->zspage_order * PAGE_SIZE / class->size)
- *	page->lru: links together first pages of various zspages.
- *		Basically forming list of zspages in a fullness group.
- *	page->mapping: class index and fullness group of the zspage
- *	page->inuse: the number of objects that are used in this zspage
+ *	page->private: points to zspage
+ *	page->freelist(index): links together all component pages of a zspage
+ *		For the huge page, this is always 0, so we use this field
+ *		to store handle.
  *
  * Usage of struct page flags:
  *	PG_private: identifies the first component page
  *	PG_private2: identifies the last component page
+ *	PG_owner_priv_1: indentifies the huge component page
  *
  */
 
@@ -66,6 +49,11 @@
 #include <linux/debugfs.h>
 #include <linux/zsmalloc.h>
 #include <linux/zpool.h>
+#include <linux/mount.h>
+#include <linux/migrate.h>
+#include <linux/pagemap.h>
+
+#define ZSPAGE_MAGIC	0x58
 
 /*
  * This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -88,9 +76,7 @@
  * Object location (<PFN>, <obj_idx>) is encoded as
  * as single (unsigned long) handle value.
  *
- * Note that object index <obj_idx> is relative to system
- * page <PFN> it is stored in, so for each sub-page belonging
- * to a zspage, obj_idx starts with 0.
+ * Note that object index <obj_idx> starts from 0.
  *
  * This is made more complicated by various memory models and PAE.
  */
@@ -149,33 +135,29 @@
  *  ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
  *  (reason above)
  */
-#define ZS_SIZE_CLASS_DELTA	(PAGE_SIZE >> 8)
+#define ZS_SIZE_CLASS_DELTA	(PAGE_SIZE >> CLASS_BITS)
 
 /*
  * We do not maintain any list for completely empty or full pages
  */
 enum fullness_group {
-	ZS_ALMOST_FULL,
-	ZS_ALMOST_EMPTY,
-	_ZS_NR_FULLNESS_GROUPS,
-
 	ZS_EMPTY,
-	ZS_FULL
+	ZS_ALMOST_EMPTY,
+	ZS_ALMOST_FULL,
+	ZS_FULL,
+	NR_ZS_FULLNESS,
 };
 
 enum zs_stat_type {
+	CLASS_EMPTY,
+	CLASS_ALMOST_EMPTY,
+	CLASS_ALMOST_FULL,
+	CLASS_FULL,
 	OBJ_ALLOCATED,
 	OBJ_USED,
-	CLASS_ALMOST_FULL,
-	CLASS_ALMOST_EMPTY,
+	NR_ZS_STAT_TYPE,
 };
 
-#ifdef CONFIG_ZSMALLOC_STAT
-#define NR_ZS_STAT_TYPE	(CLASS_ALMOST_EMPTY + 1)
-#else
-#define NR_ZS_STAT_TYPE	(OBJ_USED + 1)
-#endif
-
 struct zs_size_stat {
 	unsigned long objs[NR_ZS_STAT_TYPE];
 };
@@ -184,6 +166,10 @@ struct zs_size_stat {
 static struct dentry *zs_stat_root;
 #endif
 
+#ifdef CONFIG_COMPACTION
+static struct vfsmount *zsmalloc_mnt;
+#endif
+
 /*
  * number of size_classes
  */
@@ -207,35 +193,49 @@ static const int fullness_threshold_frac = 4;
 
 struct size_class {
 	spinlock_t lock;
-	struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
+	struct list_head fullness_list[NR_ZS_FULLNESS];
 	/*
 	 * Size of objects stored in this class. Must be multiple
 	 * of ZS_ALIGN.
 	 */
 	int size;
-	unsigned int index;
-
-	struct zs_size_stat stats;
-
+	int objs_per_zspage;
 	/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
 	int pages_per_zspage;
-	/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
-	bool huge;
+
+	unsigned int index;
+	struct zs_size_stat stats;
 };
 
+/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
+static void SetPageHugeObject(struct page *page)
+{
+	SetPageOwnerPriv1(page);
+}
+
+static void ClearPageHugeObject(struct page *page)
+{
+	ClearPageOwnerPriv1(page);
+}
+
+static int PageHugeObject(struct page *page)
+{
+	return PageOwnerPriv1(page);
+}
+
 /*
  * Placed within free objects to form a singly linked list.
- * For every zspage, first_page->freelist gives head of this list.
+ * For every zspage, zspage->freeobj gives head of this list.
  *
  * This must be power of 2 and less than or equal to ZS_ALIGN
  */
 struct link_free {
 	union {
 		/*
-		 * Position of next free chunk (encodes <PFN, obj_idx>)
+		 * Free object index;
 		 * It's valid for non-allocated object
 		 */
-		void *next;
+		unsigned long next;
 		/*
 		 * Handle of allocated object.
 		 */
@@ -248,6 +248,7 @@ struct zs_pool {
 
 	struct size_class **size_class;
 	struct kmem_cache *handle_cachep;
+	struct kmem_cache *zspage_cachep;
 
 	atomic_long_t pages_allocated;
 
@@ -263,16 +264,36 @@ struct zs_pool {
 #ifdef CONFIG_ZSMALLOC_STAT
 	struct dentry *stat_dentry;
 #endif
+#ifdef CONFIG_COMPACTION
+	struct inode *inode;
+	struct work_struct free_work;
+#endif
 };
 
 /*
  * A zspage's class index and fullness group
  * are encoded in its (first)page->mapping
  */
-#define CLASS_IDX_BITS	28
-#define FULLNESS_BITS	4
-#define CLASS_IDX_MASK	((1 << CLASS_IDX_BITS) - 1)
-#define FULLNESS_MASK	((1 << FULLNESS_BITS) - 1)
+#define FULLNESS_BITS	2
+#define CLASS_BITS	8
+#define ISOLATED_BITS	3
+#define MAGIC_VAL_BITS	8
+
+struct zspage {
+	struct {
+		unsigned int fullness:FULLNESS_BITS;
+		unsigned int class:CLASS_BITS;
+		unsigned int isolated:ISOLATED_BITS;
+		unsigned int magic:MAGIC_VAL_BITS;
+	};
+	unsigned int inuse;
+	unsigned int freeobj;
+	struct page *first_page;
+	struct list_head list; /* fullness list */
+#ifdef CONFIG_COMPACTION
+	rwlock_t lock;
+#endif
+};
 
 struct mapping_area {
 #ifdef CONFIG_PGTABLE_MAPPING
@@ -284,29 +305,74 @@ struct mapping_area {
 	enum zs_mapmode vm_mm; /* mapping mode */
 };
 
-static int create_handle_cache(struct zs_pool *pool)
+#ifdef CONFIG_COMPACTION
+static int zs_register_migration(struct zs_pool *pool);
+static void zs_unregister_migration(struct zs_pool *pool);
+static void migrate_lock_init(struct zspage *zspage);
+static void migrate_read_lock(struct zspage *zspage);
+static void migrate_read_unlock(struct zspage *zspage);
+static void kick_deferred_free(struct zs_pool *pool);
+static void init_deferred_free(struct zs_pool *pool);
+static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
+#else
+static int zsmalloc_mount(void) { return 0; }
+static void zsmalloc_unmount(void) {}
+static int zs_register_migration(struct zs_pool *pool) { return 0; }
+static void zs_unregister_migration(struct zs_pool *pool) {}
+static void migrate_lock_init(struct zspage *zspage) {}
+static void migrate_read_lock(struct zspage *zspage) {}
+static void migrate_read_unlock(struct zspage *zspage) {}
+static void kick_deferred_free(struct zs_pool *pool) {}
+static void init_deferred_free(struct zs_pool *pool) {}
+static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
+#endif
+
+static int create_cache(struct zs_pool *pool)
 {
 	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
 					0, 0, NULL);
-	return pool->handle_cachep ? 0 : 1;
+	if (!pool->handle_cachep)
+		return 1;
+
+	pool->zspage_cachep = kmem_cache_create("zspage", sizeof(struct zspage),
+					0, 0, NULL);
+	if (!pool->zspage_cachep) {
+		kmem_cache_destroy(pool->handle_cachep);
+		pool->handle_cachep = NULL;
+		return 1;
+	}
+
+	return 0;
 }
 
-static void destroy_handle_cache(struct zs_pool *pool)
+static void destroy_cache(struct zs_pool *pool)
 {
 	kmem_cache_destroy(pool->handle_cachep);
+	kmem_cache_destroy(pool->zspage_cachep);
 }
 
-static unsigned long alloc_handle(struct zs_pool *pool, gfp_t gfp)
+static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
 	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-			gfp & ~__GFP_HIGHMEM);
+			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
 }
 
-static void free_handle(struct zs_pool *pool, unsigned long handle)
+static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
 {
 	kmem_cache_free(pool->handle_cachep, (void *)handle);
 }
 
+static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
+{
+	return kmem_cache_alloc(pool->zspage_cachep,
+			flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+};
+
+static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
+{
+	kmem_cache_free(pool->zspage_cachep, zspage);
+}
+
 static void record_obj(unsigned long handle, unsigned long obj)
 {
 	/*
@@ -409,38 +475,76 @@ static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
 
+static bool is_zspage_isolated(struct zspage *zspage)
+{
+	return zspage->isolated;
+}
+
 static int is_first_page(struct page *page)
 {
 	return PagePrivate(page);
 }
 
-static int is_last_page(struct page *page)
+/* Protected by class->lock */
+static inline int get_zspage_inuse(struct zspage *zspage)
+{
+	return zspage->inuse;
+}
+
+static inline void set_zspage_inuse(struct zspage *zspage, int val)
+{
+	zspage->inuse = val;
+}
+
+static inline void mod_zspage_inuse(struct zspage *zspage, int val)
+{
+	zspage->inuse += val;
+}
+
+static inline struct page *get_first_page(struct zspage *zspage)
+{
+	struct page *first_page = zspage->first_page;
+
+	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
+	return first_page;
+}
+
+static inline int get_first_obj_offset(struct page *page)
+{
+	return page->units;
+}
+
+static inline void set_first_obj_offset(struct page *page, int offset)
+{
+	page->units = offset;
+}
+
+static inline unsigned int get_freeobj(struct zspage *zspage)
+{
+	return zspage->freeobj;
+}
+
+static inline void set_freeobj(struct zspage *zspage, unsigned int obj)
 {
-	return PagePrivate2(page);
+	zspage->freeobj = obj;
 }
 
-static void get_zspage_mapping(struct page *first_page,
+static void get_zspage_mapping(struct zspage *zspage,
 				unsigned int *class_idx,
 				enum fullness_group *fullness)
 {
-	unsigned long m;
-	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
+	BUG_ON(zspage->magic != ZSPAGE_MAGIC);
 
-	m = (unsigned long)first_page->mapping;
-	*fullness = m & FULLNESS_MASK;
-	*class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK;
+	*fullness = zspage->fullness;
+	*class_idx = zspage->class;
 }
 
-static void set_zspage_mapping(struct page *first_page,
+static void set_zspage_mapping(struct zspage *zspage,
 				unsigned int class_idx,
 				enum fullness_group fullness)
 {
-	unsigned long m;
-	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
-
-	m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) |
-			(fullness & FULLNESS_MASK);
-	first_page->mapping = (struct address_space *)m;
+	zspage->class = class_idx;
+	zspage->fullness = fullness;
 }
 
 /*
@@ -464,23 +568,19 @@ static int get_size_class_index(int size)
 static inline void zs_stat_inc(struct size_class *class,
 				enum zs_stat_type type, unsigned long cnt)
 {
-	if (type < NR_ZS_STAT_TYPE)
-		class->stats.objs[type] += cnt;
+	class->stats.objs[type] += cnt;
 }
 
 static inline void zs_stat_dec(struct size_class *class,
 				enum zs_stat_type type, unsigned long cnt)
 {
-	if (type < NR_ZS_STAT_TYPE)
-		class->stats.objs[type] -= cnt;
+	class->stats.objs[type] -= cnt;
 }
 
 static inline unsigned long zs_stat_get(struct size_class *class,
 				enum zs_stat_type type)
 {
-	if (type < NR_ZS_STAT_TYPE)
-		return class->stats.objs[type];
-	return 0;
+	return class->stats.objs[type];
 }
 
 #ifdef CONFIG_ZSMALLOC_STAT
@@ -624,6 +724,7 @@ static inline void zs_pool_stat_destroy(struct zs_pool *pool)
 }
 #endif
 
+
 /*
  * For each size class, zspages are divided into different groups
  * depending on how "full" they are. This was done so that we could
@@ -631,21 +732,20 @@ static inline void zs_pool_stat_destroy(struct zs_pool *pool)
  * the pool (not yet implemented). This function returns fullness
  * status of the given page.
  */
-static enum fullness_group get_fullness_group(struct page *first_page)
+static enum fullness_group get_fullness_group(struct size_class *class,
+						struct zspage *zspage)
 {
-	int inuse, max_objects;
+	int inuse, objs_per_zspage;
 	enum fullness_group fg;
 
-	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
-
-	inuse = first_page->inuse;
-	max_objects = first_page->objects;
+	inuse = get_zspage_inuse(zspage);
+	objs_per_zspage = class->objs_per_zspage;
 
 	if (inuse == 0)
 		fg = ZS_EMPTY;
-	else if (inuse == max_objects)
+	else if (inuse == objs_per_zspage)
 		fg = ZS_FULL;
-	else if (inuse <= 3 * max_objects / fullness_threshold_frac)
+	else if (inuse <= 3 * objs_per_zspage / fullness_threshold_frac)
 		fg = ZS_ALMOST_EMPTY;
 	else
 		fg = ZS_ALMOST_FULL;
@@ -660,32 +760,25 @@ static enum fullness_group get_fullness_group(struct page *first_page)
  * identified by <class, fullness_group>.
  */
 static void insert_zspage(struct size_class *class,
-				enum fullness_group fullness,
-				struct page *first_page)
+				struct zspage *zspage,
+				enum fullness_group fullness)
 {
-	struct page **head;
-
-	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
-
-	if (fullness >= _ZS_NR_FULLNESS_GROUPS)
-		return;
-
-	zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ?
-			CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
-
-	head = &class->fullness_list[fullness];
-	if (!*head) {
-		*head = first_page;
-		return;
-	}
+	struct zspage *head;
 
+	zs_stat_inc(class, fullness, 1);
+	head = list_first_entry_or_null(&class->fullness_list[fullness],
+					struct zspage, list);
 	/*
-	 * We want to see more ZS_FULL pages and less almost
-	 * empty/full. Put pages with higher ->inuse first.
+	 * We want to see more ZS_FULL pages and less almost empty/full.
+	 * Put pages with higher ->inuse first.
 	 */
-	list_add_tail(&first_page->lru, &(*head)->lru);
-	if (first_page->inuse >= (*head)->inuse)
-		*head = first_page;
+	if (head) {
+		if (get_zspage_inuse(zspage) < get_zspage_inuse(head)) {
+			list_add(&zspage->list, &head->list);
+			return;
+		}
+	}
+	list_add(&zspage->list, &class->fullness_list[fullness]);
 }
 
 /*
@@ -693,27 +786,14 @@ static void insert_zspage(struct size_class *class,
  * by <class, fullness_group>.
  */
 static void remove_zspage(struct size_class *class,
-				enum fullness_group fullness,
-				struct page *first_page)
+				struct zspage *zspage,
+				enum fullness_group fullness)
 {
-	struct page **head;
-
-	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
-
-	if (fullness >= _ZS_NR_FULLNESS_GROUPS)
-		return;
-
-	head = &class->fullness_list[fullness];
-	VM_BUG_ON_PAGE(!*head, first_page);
-	if (list_empty(&(*head)->lru))
-		*head = NULL;
-	else if (*head == first_page)
-		*head = (struct page *)list_entry((*head)->lru.next,
-					struct page, lru);
+	VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
+	VM_BUG_ON(is_zspage_isolated(zspage));
 
-	list_del_init(&first_page->lru);
-	zs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ?
-			CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
+	list_del_init(&zspage->list);
+	zs_stat_dec(class, fullness, 1);
 }
 
 /*
@@ -726,19 +806,22 @@ static void remove_zspage(struct size_class *class,
  * fullness group.
  */
 static enum fullness_group fix_fullness_group(struct size_class *class,
-						struct page *first_page)
+						struct zspage *zspage)
 {
 	int class_idx;
 	enum fullness_group currfg, newfg;
 
-	get_zspage_mapping(first_page, &class_idx, &currfg);
-	newfg = get_fullness_group(first_page);
+	get_zspage_mapping(zspage, &class_idx, &currfg);
+	newfg = get_fullness_group(class, zspage);
 	if (newfg == currfg)
 		goto out;
 
-	remove_zspage(class, currfg, first_page);
-	insert_zspage(class, newfg, first_page);
-	set_zspage_mapping(first_page, class_idx, newfg);
+	if (!is_zspage_isolated(zspage)) {
+		remove_zspage(class, zspage, currfg);
+		insert_zspage(class, zspage, newfg);
+	}
+
+	set_zspage_mapping(zspage, class_idx, newfg);
 
 out:
 	return newfg;
@@ -780,64 +863,49 @@ static int get_pages_per_zspage(int class_size)
 	return max_usedpc_order;
 }
 
-/*
- * A single 'zspage' is composed of many system pages which are
- * linked together using fields in struct page. This function finds
- * the first/head page, given any component page of a zspage.
- */
-static struct page *get_first_page(struct page *page)
+static struct zspage *get_zspage(struct page *page)
 {
-	if (is_first_page(page))
-		return page;
-	else
-		return (struct page *)page_private(page);
+	struct zspage *zspage = (struct zspage *)page->private;
+
+	BUG_ON(zspage->magic != ZSPAGE_MAGIC);
+	return zspage;
 }
 
 static struct page *get_next_page(struct page *page)
 {
-	struct page *next;
+	if (unlikely(PageHugeObject(page)))
+		return NULL;
 
-	if (is_last_page(page))
-		next = NULL;
-	else if (is_first_page(page))
-		next = (struct page *)page_private(page);
-	else
-		next = list_entry(page->lru.next, struct page, lru);
+	return page->freelist;
+}
 
-	return next;
+/**
+ * obj_to_location - get (<page>, <obj_idx>) from encoded object value
+ * @page: page object resides in zspage
+ * @obj_idx: object index
+ */
+static void obj_to_location(unsigned long obj, struct page **page,
+				unsigned int *obj_idx)
+{
+	obj >>= OBJ_TAG_BITS;
+	*page = pfn_to_page(obj >> OBJ_INDEX_BITS);
+	*obj_idx = (obj & OBJ_INDEX_MASK);
 }
 
-/*
- * Encode <page, obj_idx> as a single handle value.
- * We use the least bit of handle for tagging.
+/**
+ * location_to_obj - get obj value encoded from (<page>, <obj_idx>)
+ * @page: page object resides in zspage
+ * @obj_idx: object index
  */
-static void *location_to_obj(struct page *page, unsigned long obj_idx)
+static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
 {
 	unsigned long obj;
 
-	if (!page) {
-		VM_BUG_ON(obj_idx);
-		return NULL;
-	}
-
 	obj = page_to_pfn(page) << OBJ_INDEX_BITS;
-	obj |= ((obj_idx) & OBJ_INDEX_MASK);
+	obj |= obj_idx & OBJ_INDEX_MASK;
 	obj <<= OBJ_TAG_BITS;
 
-	return (void *)obj;
-}
-
-/*
- * Decode <page, obj_idx> pair from the given object handle. We adjust the
- * decoded obj_idx back to its original value since it was adjusted in
- * location_to_obj().
- */
-static void obj_to_location(unsigned long obj, struct page **page,
-				unsigned long *obj_idx)
-{
-	obj >>= OBJ_TAG_BITS;
-	*page = pfn_to_page(obj >> OBJ_INDEX_BITS);
-	*obj_idx = (obj & OBJ_INDEX_MASK);
+	return obj;
 }
 
 static unsigned long handle_to_obj(unsigned long handle)
@@ -845,109 +913,147 @@ static unsigned long handle_to_obj(unsigned long handle)
 	return *(unsigned long *)handle;
 }
 
-static unsigned long obj_to_head(struct size_class *class, struct page *page,
-			void *obj)
+static unsigned long obj_to_head(struct page *page, void *obj)
 {
-	if (class->huge) {
+	if (unlikely(PageHugeObject(page))) {
 		VM_BUG_ON_PAGE(!is_first_page(page), page);
-		return page_private(page);
+		return page->index;
 	} else
 		return *(unsigned long *)obj;
 }
 
-static unsigned long obj_idx_to_offset(struct page *page,
-				unsigned long obj_idx, int class_size)
+static inline int testpin_tag(unsigned long handle)
 {
-	unsigned long off = 0;
-
-	if (!is_first_page(page))
-		off = page->index;
-
-	return off + obj_idx * class_size;
+	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
 }
 
 static inline int trypin_tag(unsigned long handle)
 {
-	unsigned long *ptr = (unsigned long *)handle;
-
-	return !test_and_set_bit_lock(HANDLE_PIN_BIT, ptr);
+	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
 }
 
 static void pin_tag(unsigned long handle)
 {
-	while (!trypin_tag(handle));
+	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
 }
 
 static void unpin_tag(unsigned long handle)
 {
-	unsigned long *ptr = (unsigned long *)handle;
-
-	clear_bit_unlock(HANDLE_PIN_BIT, ptr);
+	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
 }
 
 static void reset_page(struct page *page)
 {
+	__ClearPageMovable(page);
 	clear_bit(PG_private, &page->flags);
 	clear_bit(PG_private_2, &page->flags);
 	set_page_private(page, 0);
-	page->mapping = NULL;
-	page->freelist = NULL;
 	page_mapcount_reset(page);
+	ClearPageHugeObject(page);
+	page->freelist = NULL;
+}
+
+/*
+ * To prevent zspage destroy during migration, zspage freeing should
+ * hold locks of all pages in the zspage.
+ */
+void lock_zspage(struct zspage *zspage)
+{
+	struct page *page = get_first_page(zspage);
+
+	do {
+		lock_page(page);
+	} while ((page = get_next_page(page)) != NULL);
 }
 
-static void free_zspage(struct page *first_page)
+int trylock_zspage(struct zspage *zspage)
 {
-	struct page *nextp, *tmp, *head_extra;
+	struct page *cursor, *fail;
 
-	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
-	VM_BUG_ON_PAGE(first_page->inuse, first_page);
+	for (cursor = get_first_page(zspage); cursor != NULL; cursor =
+					get_next_page(cursor)) {
+		if (!trylock_page(cursor)) {
+			fail = cursor;
+			goto unlock;
+		}
+	}
 
-	head_extra = (struct page *)page_private(first_page);
+	return 1;
+unlock:
+	for (cursor = get_first_page(zspage); cursor != fail; cursor =
+					get_next_page(cursor))
+		unlock_page(cursor);
 
-	reset_page(first_page);
-	__free_page(first_page);
+	return 0;
+}
 
-	/* zspage with only 1 system page */
-	if (!head_extra)
-		return;
+static void __free_zspage(struct zs_pool *pool, struct size_class *class,
+				struct zspage *zspage)
+{
+	struct page *page, *next;
+	enum fullness_group fg;
+	unsigned int class_idx;
 
-	list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) {
-		list_del(&nextp->lru);
-		reset_page(nextp);
-		__free_page(nextp);
+	get_zspage_mapping(zspage, &class_idx, &fg);
+
+	assert_spin_locked(&class->lock);
+
+	VM_BUG_ON(get_zspage_inuse(zspage));
+	VM_BUG_ON(fg != ZS_EMPTY);
+
+	next = page = get_first_page(zspage);
+	do {
+		VM_BUG_ON_PAGE(!PageLocked(page), page);
+		next = get_next_page(page);
+		reset_page(page);
+		unlock_page(page);
+		dec_zone_page_state(page, NR_ZSPAGES);
+		put_page(page);
+		page = next;
+	} while (page != NULL);
+
+	cache_free_zspage(pool, zspage);
+
+	zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
+			class->size, class->pages_per_zspage));
+	atomic_long_sub(class->pages_per_zspage,
+					&pool->pages_allocated);
+}
+
+static void free_zspage(struct zs_pool *pool, struct size_class *class,
+				struct zspage *zspage)
+{
+	VM_BUG_ON(get_zspage_inuse(zspage));
+	VM_BUG_ON(list_empty(&zspage->list));
+
+	if (!trylock_zspage(zspage)) {
+		kick_deferred_free(pool);
+		return;
 	}
-	reset_page(head_extra);
-	__free_page(head_extra);
+
+	remove_zspage(class, zspage, ZS_EMPTY);
+	__free_zspage(pool, class, zspage);
 }
 
 /* Initialize a newly allocated zspage */
-static void init_zspage(struct size_class *class, struct page *first_page)
+static void init_zspage(struct size_class *class, struct zspage *zspage)
 {
+	unsigned int freeobj = 1;
 	unsigned long off = 0;
-	struct page *page = first_page;
-
-	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
+	struct page *page = get_first_page(zspage);
 
 	while (page) {
 		struct page *next_page;
 		struct link_free *link;
-		unsigned int i = 1;
 		void *vaddr;
 
-		/*
-		 * page->index stores offset of first object starting
-		 * in the page. For the first page, this is always 0,
-		 * so we use first_page->index (aka ->freelist) to store
-		 * head of corresponding zspage's freelist.
-		 */
-		if (page != first_page)
-			page->index = off;
+		set_first_obj_offset(page, off);
 
 		vaddr = kmap_atomic(page);
 		link = (struct link_free *)vaddr + off / sizeof(*link);
 
 		while ((off += class->size) < PAGE_SIZE) {
-			link->next = location_to_obj(page, i++);
+			link->next = freeobj++ << OBJ_TAG_BITS;
 			link += class->size / sizeof(*link);
 		}
 
@@ -957,87 +1063,112 @@ static void init_zspage(struct size_class *class, struct page *first_page)
 		 * page (if present)
 		 */
 		next_page = get_next_page(page);
-		link->next = location_to_obj(next_page, 0);
+		if (next_page) {
+			link->next = freeobj++ << OBJ_TAG_BITS;
+		} else {
+			/*
+			 * Reset OBJ_TAG_BITS bit to last link to tell
+			 * whether it's allocated object or not.
+			 */
+			link->next = -1 << OBJ_TAG_BITS;
+		}
 		kunmap_atomic(vaddr);
 		page = next_page;
 		off %= PAGE_SIZE;
 	}
+
+	set_freeobj(zspage, 0);
 }
 
-/*
- * Allocate a zspage for the given size class
- */
-static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
+static void create_page_chain(struct size_class *class, struct zspage *zspage,
+				struct page *pages[])
 {
-	int i, error;
-	struct page *first_page = NULL, *uninitialized_var(prev_page);
+	int i;
+	struct page *page;
+	struct page *prev_page = NULL;
+	int nr_pages = class->pages_per_zspage;
 
 	/*
 	 * Allocate individual pages and link them together as:
-	 * 1. first page->private = first sub-page
-	 * 2. all sub-pages are linked together using page->lru
-	 * 3. each sub-page is linked to the first page using page->private
+	 * 1. all pages are linked together using page->freelist
+	 * 2. each sub-page point to zspage using page->private
 	 *
-	 * For each size class, First/Head pages are linked together using
-	 * page->lru. Also, we set PG_private to identify the first page
-	 * (i.e. no other sub-page has this flag set) and PG_private_2 to
-	 * identify the last page.
+	 * we set PG_private to identify the first page (i.e. no other sub-page
+	 * has this flag set) and PG_private_2 to identify the last page.
 	 */
-	error = -ENOMEM;
-	for (i = 0; i < class->pages_per_zspage; i++) {
-		struct page *page;
-
-		page = alloc_page(flags);
-		if (!page)
-			goto cleanup;
-
-		INIT_LIST_HEAD(&page->lru);
-		if (i == 0) {	/* first page */
+	for (i = 0; i < nr_pages; i++) {
+		page = pages[i];
+		set_page_private(page, (unsigned long)zspage);
+		page->freelist = NULL;
+		if (i == 0) {
+			zspage->first_page = page;
 			SetPagePrivate(page);
-			set_page_private(page, 0);
-			first_page = page;
-			first_page->inuse = 0;
+			if (unlikely(class->objs_per_zspage == 1 &&
+					class->pages_per_zspage == 1))
+				SetPageHugeObject(page);
+		} else {
+			prev_page->freelist = page;
 		}
-		if (i == 1)
-			set_page_private(first_page, (unsigned long)page);
-		if (i >= 1)
-			set_page_private(page, (unsigned long)first_page);
-		if (i >= 2)
-			list_add(&page->lru, &prev_page->lru);
-		if (i == class->pages_per_zspage - 1)	/* last page */
+		if (i == nr_pages - 1)
 			SetPagePrivate2(page);
 		prev_page = page;
 	}
+}
+
+/*
+ * Allocate a zspage for the given size class
+ */
+static struct zspage *alloc_zspage(struct zs_pool *pool,
+					struct size_class *class,
+					gfp_t gfp)
+{
+	int i;
+	struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE];
+	struct zspage *zspage = cache_alloc_zspage(pool, gfp);
+
+	if (!zspage)
+		return NULL;
 
-	init_zspage(class, first_page);
+	memset(zspage, 0, sizeof(struct zspage));
+	zspage->magic = ZSPAGE_MAGIC;
+	migrate_lock_init(zspage);
 
-	first_page->freelist = location_to_obj(first_page, 0);
-	/* Maximum number of objects we can store in this zspage */
-	first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
+	for (i = 0; i < class->pages_per_zspage; i++) {
+		struct page *page;
 
-	error = 0; /* Success */
+		page = alloc_page(gfp);
+		if (!page) {
+			while (--i >= 0) {
+				dec_zone_page_state(pages[i], NR_ZSPAGES);
+				__free_page(pages[i]);
+			}
+			cache_free_zspage(pool, zspage);
+			return NULL;
+		}
 
-cleanup:
-	if (unlikely(error) && first_page) {
-		free_zspage(first_page);
-		first_page = NULL;
+		inc_zone_page_state(page, NR_ZSPAGES);
+		pages[i] = page;
 	}
 
-	return first_page;
+	create_page_chain(class, zspage, pages);
+	init_zspage(class, zspage);
+
+	return zspage;
 }
 
-static struct page *find_get_zspage(struct size_class *class)
+static struct zspage *find_get_zspage(struct size_class *class)
 {
 	int i;
-	struct page *page;
+	struct zspage *zspage;
 
-	for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
-		page = class->fullness_list[i];
-		if (page)
+	for (i = ZS_ALMOST_FULL; i >= ZS_EMPTY; i--) {
+		zspage = list_first_entry_or_null(&class->fullness_list[i],
+				struct zspage, list);
+		if (zspage)
 			break;
 	}
 
-	return page;
+	return zspage;
 }
 
 #ifdef CONFIG_PGTABLE_MAPPING
@@ -1242,11 +1373,9 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
 	return true;
 }
 
-static bool zspage_full(struct page *first_page)
+static bool zspage_full(struct size_class *class, struct zspage *zspage)
 {
-	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
-
-	return first_page->inuse == first_page->objects;
+	return get_zspage_inuse(zspage) == class->objs_per_zspage;
 }
 
 unsigned long zs_get_total_pages(struct zs_pool *pool)
@@ -1272,8 +1401,10 @@ EXPORT_SYMBOL_GPL(zs_get_total_pages);
 void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 			enum zs_mapmode mm)
 {
+	struct zspage *zspage;
 	struct page *page;
-	unsigned long obj, obj_idx, off;
+	unsigned long obj, off;
+	unsigned int obj_idx;
 
 	unsigned int class_idx;
 	enum fullness_group fg;
@@ -1294,9 +1425,14 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 
 	obj = handle_to_obj(handle);
 	obj_to_location(obj, &page, &obj_idx);
-	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+	zspage = get_zspage(page);
+
+	/* migration cannot move any subpage in this zspage */
+	migrate_read_lock(zspage);
+
+	get_zspage_mapping(zspage, &class_idx, &fg);
 	class = pool->size_class[class_idx];
-	off = obj_idx_to_offset(page, obj_idx, class->size);
+	off = (class->size * obj_idx) & ~PAGE_MASK;
 
 	area = &get_cpu_var(zs_map_area);
 	area->vm_mm = mm;
@@ -1314,7 +1450,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 
 	ret = __zs_map_object(area, pages, off, class->size);
 out:
-	if (!class->huge)
+	if (likely(!PageHugeObject(page)))
 		ret += ZS_HANDLE_SIZE;
 
 	return ret;
@@ -1323,8 +1459,10 @@ EXPORT_SYMBOL_GPL(zs_map_object);
 
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 {
+	struct zspage *zspage;
 	struct page *page;
-	unsigned long obj, obj_idx, off;
+	unsigned long obj, off;
+	unsigned int obj_idx;
 
 	unsigned int class_idx;
 	enum fullness_group fg;
@@ -1333,9 +1471,10 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 
 	obj = handle_to_obj(handle);
 	obj_to_location(obj, &page, &obj_idx);
-	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+	zspage = get_zspage(page);
+	get_zspage_mapping(zspage, &class_idx, &fg);
 	class = pool->size_class[class_idx];
-	off = obj_idx_to_offset(page, obj_idx, class->size);
+	off = (class->size * obj_idx) & ~PAGE_MASK;
 
 	area = this_cpu_ptr(&zs_map_area);
 	if (off + class->size <= PAGE_SIZE)
@@ -1350,38 +1489,50 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 		__zs_unmap_object(area, pages, off, class->size);
 	}
 	put_cpu_var(zs_map_area);
+
+	migrate_read_unlock(zspage);
 	unpin_tag(handle);
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
 static unsigned long obj_malloc(struct size_class *class,
-				struct page *first_page, unsigned long handle)
+				struct zspage *zspage, unsigned long handle)
 {
+	int i, nr_page, offset;
 	unsigned long obj;
 	struct link_free *link;
 
 	struct page *m_page;
-	unsigned long m_objidx, m_offset;
+	unsigned long m_offset;
 	void *vaddr;
 
 	handle |= OBJ_ALLOCATED_TAG;
-	obj = (unsigned long)first_page->freelist;
-	obj_to_location(obj, &m_page, &m_objidx);
-	m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
+	obj = get_freeobj(zspage);
+
+	offset = obj * class->size;
+	nr_page = offset >> PAGE_SHIFT;
+	m_offset = offset & ~PAGE_MASK;
+	m_page = get_first_page(zspage);
+
+	for (i = 0; i < nr_page; i++)
+		m_page = get_next_page(m_page);
 
 	vaddr = kmap_atomic(m_page);
 	link = (struct link_free *)vaddr + m_offset / sizeof(*link);
-	first_page->freelist = link->next;
-	if (!class->huge)
+	set_freeobj(zspage, link->next >> OBJ_TAG_BITS);
+	if (likely(!PageHugeObject(m_page)))
 		/* record handle in the header of allocated chunk */
 		link->handle = handle;
 	else
-		/* record handle in first_page->private */
-		set_page_private(first_page, handle);
+		/* record handle to page->index */
+		zspage->first_page->index = handle;
+
 	kunmap_atomic(vaddr);
-	first_page->inuse++;
+	mod_zspage_inuse(zspage, 1);
 	zs_stat_inc(class, OBJ_USED, 1);
 
+	obj = location_to_obj(m_page, obj);
+
 	return obj;
 }
 
@@ -1399,12 +1550,13 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
 {
 	unsigned long handle, obj;
 	struct size_class *class;
-	struct page *first_page;
+	enum fullness_group newfg;
+	struct zspage *zspage;
 
 	if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
 		return 0;
 
-	handle = alloc_handle(pool, gfp);
+	handle = cache_alloc_handle(pool, gfp);
 	if (!handle)
 		return 0;
 
@@ -1413,29 +1565,38 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
 	class = pool->size_class[get_size_class_index(size)];
 
 	spin_lock(&class->lock);
-	first_page = find_get_zspage(class);
-
-	if (!first_page) {
+	zspage = find_get_zspage(class);
+	if (likely(zspage)) {
+		obj = obj_malloc(class, zspage, handle);
+		/* Now move the zspage to another fullness group, if required */
+		fix_fullness_group(class, zspage);
+		record_obj(handle, obj);
 		spin_unlock(&class->lock);
-		first_page = alloc_zspage(class, gfp);
-		if (unlikely(!first_page)) {
-			free_handle(pool, handle);
-			return 0;
-		}
 
-		set_zspage_mapping(first_page, class->index, ZS_EMPTY);
-		atomic_long_add(class->pages_per_zspage,
-					&pool->pages_allocated);
+		return handle;
+	}
 
-		spin_lock(&class->lock);
-		zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
-				class->size, class->pages_per_zspage));
+	spin_unlock(&class->lock);
+
+	zspage = alloc_zspage(pool, class, gfp);
+	if (!zspage) {
+		cache_free_handle(pool, handle);
+		return 0;
 	}
 
-	obj = obj_malloc(class, first_page, handle);
-	/* Now move the zspage to another fullness group, if required */
-	fix_fullness_group(class, first_page);
+	spin_lock(&class->lock);
+	obj = obj_malloc(class, zspage, handle);
+	newfg = get_fullness_group(class, zspage);
+	insert_zspage(class, zspage, newfg);
+	set_zspage_mapping(zspage, class->index, newfg);
 	record_obj(handle, obj);
+	atomic_long_add(class->pages_per_zspage,
+				&pool->pages_allocated);
+	zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
+			class->size, class->pages_per_zspage));
+
+	/* We completely set up zspage so mark them as movable */
+	SetZsPageMovable(pool, zspage);
 	spin_unlock(&class->lock);
 
 	return handle;
@@ -1445,36 +1606,38 @@ EXPORT_SYMBOL_GPL(zs_malloc);
 static void obj_free(struct size_class *class, unsigned long obj)
 {
 	struct link_free *link;
-	struct page *first_page, *f_page;
-	unsigned long f_objidx, f_offset;
+	struct zspage *zspage;
+	struct page *f_page;
+	unsigned long f_offset;
+	unsigned int f_objidx;
 	void *vaddr;
 
 	obj &= ~OBJ_ALLOCATED_TAG;
 	obj_to_location(obj, &f_page, &f_objidx);
-	first_page = get_first_page(f_page);
-
-	f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
+	f_offset = (class->size * f_objidx) & ~PAGE_MASK;
+	zspage = get_zspage(f_page);
 
 	vaddr = kmap_atomic(f_page);
 
 	/* Insert this object in containing zspage's freelist */
 	link = (struct link_free *)(vaddr + f_offset);
-	link->next = first_page->freelist;
-	if (class->huge)
-		set_page_private(first_page, 0);
+	link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
 	kunmap_atomic(vaddr);
-	first_page->freelist = (void *)obj;
-	first_page->inuse--;
+	set_freeobj(zspage, f_objidx);
+	mod_zspage_inuse(zspage, -1);
 	zs_stat_dec(class, OBJ_USED, 1);
 }
 
 void zs_free(struct zs_pool *pool, unsigned long handle)
 {
-	struct page *first_page, *f_page;
-	unsigned long obj, f_objidx;
+	struct zspage *zspage;
+	struct page *f_page;
+	unsigned long obj;
+	unsigned int f_objidx;
 	int class_idx;
 	struct size_class *class;
 	enum fullness_group fullness;
+	bool isolated;
 
 	if (unlikely(!handle))
 		return;
@@ -1482,25 +1645,31 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
 	pin_tag(handle);
 	obj = handle_to_obj(handle);
 	obj_to_location(obj, &f_page, &f_objidx);
-	first_page = get_first_page(f_page);
+	zspage = get_zspage(f_page);
 
-	get_zspage_mapping(first_page, &class_idx, &fullness);
+	migrate_read_lock(zspage);
+
+	get_zspage_mapping(zspage, &class_idx, &fullness);
 	class = pool->size_class[class_idx];
 
 	spin_lock(&class->lock);
 	obj_free(class, obj);
-	fullness = fix_fullness_group(class, first_page);
-	if (fullness == ZS_EMPTY) {
-		zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
-				class->size, class->pages_per_zspage));
-		atomic_long_sub(class->pages_per_zspage,
-				&pool->pages_allocated);
-		free_zspage(first_page);
+	fullness = fix_fullness_group(class, zspage);
+	if (fullness != ZS_EMPTY) {
+		migrate_read_unlock(zspage);
+		goto out;
 	}
+
+	isolated = is_zspage_isolated(zspage);
+	migrate_read_unlock(zspage);
+	/* If zspage is isolated, zs_page_putback will free the zspage */
+	if (likely(!isolated))
+		free_zspage(pool, class, zspage);
+out:
+
 	spin_unlock(&class->lock);
 	unpin_tag(handle);
-
-	free_handle(pool, handle);
+	cache_free_handle(pool, handle);
 }
 EXPORT_SYMBOL_GPL(zs_free);
 
@@ -1508,7 +1677,7 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
 				unsigned long src)
 {
 	struct page *s_page, *d_page;
-	unsigned long s_objidx, d_objidx;
+	unsigned int s_objidx, d_objidx;
 	unsigned long s_off, d_off;
 	void *s_addr, *d_addr;
 	int s_size, d_size, size;
@@ -1519,8 +1688,8 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
 	obj_to_location(src, &s_page, &s_objidx);
 	obj_to_location(dst, &d_page, &d_objidx);
 
-	s_off = obj_idx_to_offset(s_page, s_objidx, class->size);
-	d_off = obj_idx_to_offset(d_page, d_objidx, class->size);
+	s_off = (class->size * s_objidx) & ~PAGE_MASK;
+	d_off = (class->size * d_objidx) & ~PAGE_MASK;
 
 	if (s_off + class->size > PAGE_SIZE)
 		s_size = PAGE_SIZE - s_off;
@@ -1579,12 +1748,11 @@ static unsigned long find_alloced_obj(struct size_class *class,
 	unsigned long handle = 0;
 	void *addr = kmap_atomic(page);
 
-	if (!is_first_page(page))
-		offset = page->index;
+	offset = get_first_obj_offset(page);
 	offset += class->size * index;
 
 	while (offset < PAGE_SIZE) {
-		head = obj_to_head(class, page, addr + offset);
+		head = obj_to_head(page, addr + offset);
 		if (head & OBJ_ALLOCATED_TAG) {
 			handle = head & ~OBJ_ALLOCATED_TAG;
 			if (trypin_tag(handle))
@@ -1601,7 +1769,7 @@ static unsigned long find_alloced_obj(struct size_class *class,
 }
 
 struct zs_compact_control {
-	/* Source page for migration which could be a subpage of zspage. */
+	/* Source spage for migration which could be a subpage of zspage */
 	struct page *s_page;
 	/* Destination page for migration which should be a first page
 	 * of zspage. */
@@ -1632,14 +1800,14 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
 		}
 
 		/* Stop if there is no more space */
-		if (zspage_full(d_page)) {
+		if (zspage_full(class, get_zspage(d_page))) {
 			unpin_tag(handle);
 			ret = -ENOMEM;
 			break;
 		}
 
 		used_obj = handle_to_obj(handle);
-		free_obj = obj_malloc(class, d_page, handle);
+		free_obj = obj_malloc(class, get_zspage(d_page), handle);
 		zs_object_copy(class, free_obj, used_obj);
 		index++;
 		/*
@@ -1661,68 +1829,422 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
 	return ret;
 }
 
-static struct page *isolate_target_page(struct size_class *class)
+static struct zspage *isolate_zspage(struct size_class *class, bool source)
 {
 	int i;
-	struct page *page;
+	struct zspage *zspage;
+	enum fullness_group fg[2] = {ZS_ALMOST_EMPTY, ZS_ALMOST_FULL};
 
-	for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
-		page = class->fullness_list[i];
-		if (page) {
-			remove_zspage(class, i, page);
-			break;
+	if (!source) {
+		fg[0] = ZS_ALMOST_FULL;
+		fg[1] = ZS_ALMOST_EMPTY;
+	}
+
+	for (i = 0; i < 2; i++) {
+		zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
+							struct zspage, list);
+		if (zspage) {
+			VM_BUG_ON(is_zspage_isolated(zspage));
+			remove_zspage(class, zspage, fg[i]);
+			return zspage;
 		}
 	}
 
-	return page;
+	return zspage;
 }
 
 /*
- * putback_zspage - add @first_page into right class's fullness list
- * @pool: target pool
+ * putback_zspage - add @zspage into right class's fullness list
  * @class: destination class
- * @first_page: target page
+ * @zspage: target page
  *
- * Return @fist_page's fullness_group
+ * Return @zspage's fullness_group
  */
-static enum fullness_group putback_zspage(struct zs_pool *pool,
-			struct size_class *class,
-			struct page *first_page)
+static enum fullness_group putback_zspage(struct size_class *class,
+			struct zspage *zspage)
 {
 	enum fullness_group fullness;
 
-	fullness = get_fullness_group(first_page);
-	insert_zspage(class, fullness, first_page);
-	set_zspage_mapping(first_page, class->index, fullness);
+	VM_BUG_ON(is_zspage_isolated(zspage));
 
-	if (fullness == ZS_EMPTY) {
-		zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
-			class->size, class->pages_per_zspage));
-		atomic_long_sub(class->pages_per_zspage,
-				&pool->pages_allocated);
+	fullness = get_fullness_group(class, zspage);
+	insert_zspage(class, zspage, fullness);
+	set_zspage_mapping(zspage, class->index, fullness);
+
+	return fullness;
+}
+
+#ifdef CONFIG_COMPACTION
+static struct dentry *zs_mount(struct file_system_type *fs_type,
+				int flags, const char *dev_name, void *data)
+{
+	static const struct dentry_operations ops = {
+		.d_dname = simple_dname,
+	};
+
+	return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC);
+}
+
+static struct file_system_type zsmalloc_fs = {
+	.name		= "zsmalloc",
+	.mount		= zs_mount,
+	.kill_sb	= kill_anon_super,
+};
+
+static int zsmalloc_mount(void)
+{
+	int ret = 0;
 
-		free_zspage(first_page);
+	zsmalloc_mnt = kern_mount(&zsmalloc_fs);
+	if (IS_ERR(zsmalloc_mnt))
+		ret = PTR_ERR(zsmalloc_mnt);
+
+	return ret;
+}
+
+static void zsmalloc_unmount(void)
+{
+	kern_unmount(zsmalloc_mnt);
+}
+
+static void migrate_lock_init(struct zspage *zspage)
+{
+	rwlock_init(&zspage->lock);
+}
+
+static void migrate_read_lock(struct zspage *zspage)
+{
+	read_lock(&zspage->lock);
+}
+
+static void migrate_read_unlock(struct zspage *zspage)
+{
+	read_unlock(&zspage->lock);
+}
+
+static void migrate_write_lock(struct zspage *zspage)
+{
+	write_lock(&zspage->lock);
+}
+
+static void migrate_write_unlock(struct zspage *zspage)
+{
+	write_unlock(&zspage->lock);
+}
+
+/* Number of isolated subpage for *page migration* in this zspage */
+static void inc_zspage_isolation(struct zspage *zspage)
+{
+	zspage->isolated++;
+}
+
+static void dec_zspage_isolation(struct zspage *zspage)
+{
+	zspage->isolated--;
+}
+
+static void replace_sub_page(struct size_class *class, struct zspage *zspage,
+				struct page *newpage, struct page *oldpage)
+{
+	struct page *page;
+	struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, };
+	int idx = 0;
+
+	page = get_first_page(zspage);
+	do {
+		if (page == oldpage)
+			pages[idx] = newpage;
+		else
+			pages[idx] = page;
+		idx++;
+	} while ((page = get_next_page(page)) != NULL);
+
+	create_page_chain(class, zspage, pages);
+	set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
+	if (unlikely(PageHugeObject(oldpage)))
+		newpage->index = oldpage->index;
+	__SetPageMovable(newpage, page_mapping(oldpage));
+}
+
+bool zs_page_isolate(struct page *page, isolate_mode_t mode)
+{
+	struct zs_pool *pool;
+	struct size_class *class;
+	int class_idx;
+	enum fullness_group fullness;
+	struct zspage *zspage;
+	struct address_space *mapping;
+
+	/*
+	 * Page is locked so zspage couldn't be destroyed. For detail, look at
+	 * lock_zspage in free_zspage.
+	 */
+	VM_BUG_ON_PAGE(!PageMovable(page), page);
+	VM_BUG_ON_PAGE(PageIsolated(page), page);
+
+	zspage = get_zspage(page);
+
+	/*
+	 * Without class lock, fullness could be stale while class_idx is okay
+	 * because class_idx is constant unless page is freed so we should get
+	 * fullness again under class lock.
+	 */
+	get_zspage_mapping(zspage, &class_idx, &fullness);
+	mapping = page_mapping(page);
+	pool = mapping->private_data;
+	class = pool->size_class[class_idx];
+
+	spin_lock(&class->lock);
+	if (get_zspage_inuse(zspage) == 0) {
+		spin_unlock(&class->lock);
+		return false;
 	}
 
-	return fullness;
+	/* zspage is isolated for object migration */
+	if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
+		spin_unlock(&class->lock);
+		return false;
+	}
+
+	/*
+	 * If this is first time isolation for the zspage, isolate zspage from
+	 * size_class to prevent further object allocation from the zspage.
+	 */
+	if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
+		get_zspage_mapping(zspage, &class_idx, &fullness);
+		remove_zspage(class, zspage, fullness);
+	}
+
+	inc_zspage_isolation(zspage);
+	spin_unlock(&class->lock);
+
+	return true;
 }
 
-static struct page *isolate_source_page(struct size_class *class)
+int zs_page_migrate(struct address_space *mapping, struct page *newpage,
+		struct page *page, enum migrate_mode mode)
+{
+	struct zs_pool *pool;
+	struct size_class *class;
+	int class_idx;
+	enum fullness_group fullness;
+	struct zspage *zspage;
+	struct page *dummy;
+	void *s_addr, *d_addr, *addr;
+	int offset, pos;
+	unsigned long handle, head;
+	unsigned long old_obj, new_obj;
+	unsigned int obj_idx;
+	int ret = -EAGAIN;
+
+	VM_BUG_ON_PAGE(!PageMovable(page), page);
+	VM_BUG_ON_PAGE(!PageIsolated(page), page);
+
+	zspage = get_zspage(page);
+
+	/* Concurrent compactor cannot migrate any subpage in zspage */
+	migrate_write_lock(zspage);
+	get_zspage_mapping(zspage, &class_idx, &fullness);
+	pool = mapping->private_data;
+	class = pool->size_class[class_idx];
+	offset = get_first_obj_offset(page);
+
+	spin_lock(&class->lock);
+	if (!get_zspage_inuse(zspage)) {
+		ret = -EBUSY;
+		goto unlock_class;
+	}
+
+	pos = offset;
+	s_addr = kmap_atomic(page);
+	while (pos < PAGE_SIZE) {
+		head = obj_to_head(page, s_addr + pos);
+		if (head & OBJ_ALLOCATED_TAG) {
+			handle = head & ~OBJ_ALLOCATED_TAG;
+			if (!trypin_tag(handle))
+				goto unpin_objects;
+		}
+		pos += class->size;
+	}
+
+	/*
+	 * Here, any user cannot access all objects in the zspage so let's move.
+	 */
+	d_addr = kmap_atomic(newpage);
+	memcpy(d_addr, s_addr, PAGE_SIZE);
+	kunmap_atomic(d_addr);
+
+	for (addr = s_addr + offset; addr < s_addr + pos;
+					addr += class->size) {
+		head = obj_to_head(page, addr);
+		if (head & OBJ_ALLOCATED_TAG) {
+			handle = head & ~OBJ_ALLOCATED_TAG;
+			if (!testpin_tag(handle))
+				BUG();
+
+			old_obj = handle_to_obj(handle);
+			obj_to_location(old_obj, &dummy, &obj_idx);
+			new_obj = (unsigned long)location_to_obj(newpage,
+								obj_idx);
+			new_obj |= BIT(HANDLE_PIN_BIT);
+			record_obj(handle, new_obj);
+		}
+	}
+
+	replace_sub_page(class, zspage, newpage, page);
+	get_page(newpage);
+
+	dec_zspage_isolation(zspage);
+
+	/*
+	 * Page migration is done so let's putback isolated zspage to
+	 * the list if @page is final isolated subpage in the zspage.
+	 */
+	if (!is_zspage_isolated(zspage))
+		putback_zspage(class, zspage);
+
+	reset_page(page);
+	put_page(page);
+	page = newpage;
+
+	ret = MIGRATEPAGE_SUCCESS;
+unpin_objects:
+	for (addr = s_addr + offset; addr < s_addr + pos;
+						addr += class->size) {
+		head = obj_to_head(page, addr);
+		if (head & OBJ_ALLOCATED_TAG) {
+			handle = head & ~OBJ_ALLOCATED_TAG;
+			if (!testpin_tag(handle))
+				BUG();
+			unpin_tag(handle);
+		}
+	}
+	kunmap_atomic(s_addr);
+unlock_class:
+	spin_unlock(&class->lock);
+	migrate_write_unlock(zspage);
+
+	return ret;
+}
+
+void zs_page_putback(struct page *page)
+{
+	struct zs_pool *pool;
+	struct size_class *class;
+	int class_idx;
+	enum fullness_group fg;
+	struct address_space *mapping;
+	struct zspage *zspage;
+
+	VM_BUG_ON_PAGE(!PageMovable(page), page);
+	VM_BUG_ON_PAGE(!PageIsolated(page), page);
+
+	zspage = get_zspage(page);
+	get_zspage_mapping(zspage, &class_idx, &fg);
+	mapping = page_mapping(page);
+	pool = mapping->private_data;
+	class = pool->size_class[class_idx];
+
+	spin_lock(&class->lock);
+	dec_zspage_isolation(zspage);
+	if (!is_zspage_isolated(zspage)) {
+		fg = putback_zspage(class, zspage);
+		/*
+		 * Due to page_lock, we cannot free zspage immediately
+		 * so let's defer.
+		 */
+		if (fg == ZS_EMPTY)
+			schedule_work(&pool->free_work);
+	}
+	spin_unlock(&class->lock);
+}
+
+const struct address_space_operations zsmalloc_aops = {
+	.isolate_page = zs_page_isolate,
+	.migratepage = zs_page_migrate,
+	.putback_page = zs_page_putback,
+};
+
+static int zs_register_migration(struct zs_pool *pool)
+{
+	pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb);
+	if (IS_ERR(pool->inode)) {
+		pool->inode = NULL;
+		return 1;
+	}
+
+	pool->inode->i_mapping->private_data = pool;
+	pool->inode->i_mapping->a_ops = &zsmalloc_aops;
+	return 0;
+}
+
+static void zs_unregister_migration(struct zs_pool *pool)
+{
+	flush_work(&pool->free_work);
+	if (pool->inode)
+		iput(pool->inode);
+}
+
+/*
+ * Caller should hold page_lock of all pages in the zspage
+ * In here, we cannot use zspage meta data.
+ */
+static void async_free_zspage(struct work_struct *work)
 {
 	int i;
-	struct page *page = NULL;
+	struct size_class *class;
+	unsigned int class_idx;
+	enum fullness_group fullness;
+	struct zspage *zspage, *tmp;
+	LIST_HEAD(free_pages);
+	struct zs_pool *pool = container_of(work, struct zs_pool,
+					free_work);
 
-	for (i = ZS_ALMOST_EMPTY; i >= ZS_ALMOST_FULL; i--) {
-		page = class->fullness_list[i];
-		if (!page)
+	for (i = 0; i < zs_size_classes; i++) {
+		class = pool->size_class[i];
+		if (class->index != i)
 			continue;
 
-		remove_zspage(class, i, page);
-		break;
+		spin_lock(&class->lock);
+		list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages);
+		spin_unlock(&class->lock);
+	}
+
+
+	list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
+		list_del(&zspage->list);
+		lock_zspage(zspage);
+
+		get_zspage_mapping(zspage, &class_idx, &fullness);
+		VM_BUG_ON(fullness != ZS_EMPTY);
+		class = pool->size_class[class_idx];
+		spin_lock(&class->lock);
+		__free_zspage(pool, pool->size_class[class_idx], zspage);
+		spin_unlock(&class->lock);
 	}
+};
+
+static void kick_deferred_free(struct zs_pool *pool)
+{
+	schedule_work(&pool->free_work);
+}
+
+static void init_deferred_free(struct zs_pool *pool)
+{
+	INIT_WORK(&pool->free_work, async_free_zspage);
+}
+
+static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage)
+{
+	struct page *page = get_first_page(zspage);
 
-	return page;
+	do {
+		WARN_ON(!trylock_page(page));
+		__SetPageMovable(page, pool->inode->i_mapping);
+		unlock_page(page);
+	} while ((page = get_next_page(page)) != NULL);
 }
+#endif
 
 /*
  *
@@ -1748,20 +2270,20 @@ static unsigned long zs_can_compact(struct size_class *class)
 static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 {
 	struct zs_compact_control cc;
-	struct page *src_page;
-	struct page *dst_page = NULL;
+	struct zspage *src_zspage;
+	struct zspage *dst_zspage = NULL;
 
 	spin_lock(&class->lock);
-	while ((src_page = isolate_source_page(class))) {
+	while ((src_zspage = isolate_zspage(class, true))) {
 
 		if (!zs_can_compact(class))
 			break;
 
 		cc.index = 0;
-		cc.s_page = src_page;
+		cc.s_page = get_first_page(src_zspage);
 
-		while ((dst_page = isolate_target_page(class))) {
-			cc.d_page = dst_page;
+		while ((dst_zspage = isolate_zspage(class, false))) {
+			cc.d_page = get_first_page(dst_zspage);
 			/*
 			 * If there is no more space in dst_page, resched
 			 * and see if anyone had allocated another zspage.
@@ -1769,23 +2291,25 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 			if (!migrate_zspage(pool, class, &cc))
 				break;
 
-			putback_zspage(pool, class, dst_page);
+			putback_zspage(class, dst_zspage);
 		}
 
 		/* Stop if we couldn't find slot */
-		if (dst_page == NULL)
+		if (dst_zspage == NULL)
 			break;
 
-		putback_zspage(pool, class, dst_page);
-		if (putback_zspage(pool, class, src_page) == ZS_EMPTY)
+		putback_zspage(class, dst_zspage);
+		if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
+			free_zspage(pool, class, src_zspage);
 			pool->stats.pages_compacted += class->pages_per_zspage;
+		}
 		spin_unlock(&class->lock);
 		cond_resched();
 		spin_lock(&class->lock);
 	}
 
-	if (src_page)
-		putback_zspage(pool, class, src_page);
+	if (src_zspage)
+		putback_zspage(class, src_zspage);
 
 	spin_unlock(&class->lock);
 }
@@ -1892,6 +2416,7 @@ struct zs_pool *zs_create_pool(const char *name)
 	if (!pool)
 		return NULL;
 
+	init_deferred_free(pool);
 	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
 			GFP_KERNEL);
 	if (!pool->size_class) {
@@ -1903,7 +2428,7 @@ struct zs_pool *zs_create_pool(const char *name)
 	if (!pool->name)
 		goto err;
 
-	if (create_handle_cache(pool))
+	if (create_cache(pool))
 		goto err;
 
 	/*
@@ -1914,6 +2439,7 @@ struct zs_pool *zs_create_pool(const char *name)
 		int size;
 		int pages_per_zspage;
 		struct size_class *class;
+		int fullness = 0;
 
 		size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
 		if (size > ZS_MAX_ALLOC_SIZE)
@@ -1943,11 +2469,13 @@ struct zs_pool *zs_create_pool(const char *name)
 		class->size = size;
 		class->index = i;
 		class->pages_per_zspage = pages_per_zspage;
-		if (pages_per_zspage == 1 &&
-			get_maxobj_per_zspage(size, pages_per_zspage) == 1)
-			class->huge = true;
+		class->objs_per_zspage = class->pages_per_zspage *
+						PAGE_SIZE / class->size;
 		spin_lock_init(&class->lock);
 		pool->size_class[i] = class;
+		for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;
+							fullness++)
+			INIT_LIST_HEAD(&class->fullness_list[fullness]);
 
 		prev_class = class;
 	}
@@ -1955,6 +2483,9 @@ struct zs_pool *zs_create_pool(const char *name)
 	/* debug only, don't abort if it fails */
 	zs_pool_stat_create(pool, name);
 
+	if (zs_register_migration(pool))
+		goto err;
+
 	/*
 	 * Not critical, we still can use the pool
 	 * and user can trigger compaction manually.
@@ -1974,6 +2505,7 @@ void zs_destroy_pool(struct zs_pool *pool)
 	int i;
 
 	zs_unregister_shrinker(pool);
+	zs_unregister_migration(pool);
 	zs_pool_stat_destroy(pool);
 
 	for (i = 0; i < zs_size_classes; i++) {
@@ -1986,8 +2518,8 @@ void zs_destroy_pool(struct zs_pool *pool)
 		if (class->index != i)
 			continue;
 
-		for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
-			if (class->fullness_list[fg]) {
+		for (fg = ZS_EMPTY; fg < NR_ZS_FULLNESS; fg++) {
+			if (!list_empty(&class->fullness_list[fg])) {
 				pr_info("Freeing non-empty class with size %db, fullness group %d\n",
 					class->size, fg);
 			}
@@ -1995,7 +2527,7 @@ void zs_destroy_pool(struct zs_pool *pool)
 		kfree(class);
 	}
 
-	destroy_handle_cache(pool);
+	destroy_cache(pool);
 	kfree(pool->size_class);
 	kfree(pool->name);
 	kfree(pool);
@@ -2004,7 +2536,13 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool);
 
 static int __init zs_init(void)
 {
-	int ret = zs_register_cpu_notifier();
+	int ret;
+
+	ret = zsmalloc_mount();
+	if (ret)
+		goto out;
+
+	ret = zs_register_cpu_notifier();
 
 	if (ret)
 		goto notifier_fail;
@@ -2021,7 +2559,8 @@ static int __init zs_init(void)
 
 notifier_fail:
 	zs_unregister_cpu_notifier();
-
+	zsmalloc_unmount();
+out:
 	return ret;
 }
 
@@ -2030,6 +2569,7 @@ static void __exit zs_exit(void)
 #ifdef CONFIG_ZPOOL
 	zpool_unregister_driver(&zs_zpool_driver);
 #endif
+	zsmalloc_unmount();
 	zs_unregister_cpu_notifier();
 
 	zs_stat_exit();
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index 97ecc27aeca6..a67caee11929 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -12,6 +12,10 @@ static inline bool lowpan_is_ll(const struct net_device *dev,
 	return lowpan_dev(dev)->lltype == lltype;
 }
 
+extern const struct ndisc_ops lowpan_ndisc_ops;
+
+int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev);
+
 #ifdef CONFIG_6LOWPAN_DEBUGFS
 int lowpan_dev_debugfs_init(struct net_device *dev);
 void lowpan_dev_debugfs_exit(struct net_device *dev);
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index e44f3bf2dd42..12d131ab2324 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_6LOWPAN) += 6lowpan.o
 
-6lowpan-y := core.o iphc.o nhc.o
+6lowpan-y := core.o iphc.o nhc.o ndisc.o
 6lowpan-$(CONFIG_6LOWPAN_DEBUGFS) += debugfs.o
 
 #rfc6282 nhcs
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 7a240b3eaed1..5945f7e19c67 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 
 #include <net/6lowpan.h>
+#include <net/addrconf.h>
 
 #include "6lowpan_i.h"
 
@@ -33,6 +34,8 @@ int lowpan_register_netdevice(struct net_device *dev,
 	for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
 		lowpan_dev(dev)->ctx.table[i].id = i;
 
+	dev->ndisc_ops = &lowpan_ndisc_ops;
+
 	ret = register_netdevice(dev);
 	if (ret < 0)
 		return ret;
@@ -72,16 +75,61 @@ void lowpan_unregister_netdev(struct net_device *dev)
 }
 EXPORT_SYMBOL(lowpan_unregister_netdev);
 
+int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev)
+{
+	struct wpan_dev *wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+	/* Set short_addr autoconfiguration if short_addr is present only */
+	if (!lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr))
+		return -1;
+
+	/* For either address format, all zero addresses MUST NOT be used */
+	if (wpan_dev->pan_id == cpu_to_le16(0x0000) &&
+	    wpan_dev->short_addr == cpu_to_le16(0x0000))
+		return -1;
+
+	/* Alternatively, if no PAN ID is known, 16 zero bits may be used */
+	if (wpan_dev->pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
+		memset(eui, 0, 2);
+	else
+		ieee802154_le16_to_be16(eui, &wpan_dev->pan_id);
+
+	/* The "Universal/Local" (U/L) bit shall be set to zero */
+	eui[0] &= ~2;
+	eui[2] = 0;
+	eui[3] = 0xFF;
+	eui[4] = 0xFE;
+	eui[5] = 0;
+	ieee802154_le16_to_be16(&eui[6], &wpan_dev->short_addr);
+	return 0;
+}
+
 static int lowpan_event(struct notifier_block *unused,
 			unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct inet6_dev *idev;
+	struct in6_addr addr;
 	int i;
 
 	if (dev->type != ARPHRD_6LOWPAN)
 		return NOTIFY_DONE;
 
+	idev = __in6_dev_get(dev);
+	if (!idev)
+		return NOTIFY_DONE;
+
 	switch (event) {
+	case NETDEV_UP:
+	case NETDEV_CHANGE:
+		/* (802.15.4 6LoWPAN short address slaac handling */
+		if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) &&
+		    addrconf_ifid_802154_6lowpan(addr.s6_addr + 8, dev) == 0) {
+			__ipv6_addr_set_half(&addr.s6_addr32[0],
+					     htonl(0xFE800000), 0);
+			addrconf_add_linklocal(idev, &addr, 0);
+		}
+		break;
 	case NETDEV_DOWN:
 		for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
 			clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE,
@@ -112,8 +160,6 @@ static int __init lowpan_module_init(void)
 		return ret;
 	}
 
-	request_module_nowait("ipv6");
-
 	request_module_nowait("nhc_dest");
 	request_module_nowait("nhc_fragment");
 	request_module_nowait("nhc_hop");
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index acbaa3db493b..24915e0bb9ea 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -245,6 +245,41 @@ static const struct file_operations lowpan_context_fops = {
 	.release	= single_release,
 };
 
+static int lowpan_short_addr_get(void *data, u64 *val)
+{
+	struct wpan_dev *wdev = data;
+
+	rtnl_lock();
+	*val = le16_to_cpu(wdev->short_addr);
+	rtnl_unlock();
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(lowpan_short_addr_fops, lowpan_short_addr_get,
+			NULL, "0x%04llx\n");
+
+static int lowpan_dev_debugfs_802154_init(const struct net_device *dev,
+					  struct lowpan_dev *ldev)
+{
+	struct dentry *dentry, *root;
+
+	if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+		return 0;
+
+	root = debugfs_create_dir("ieee802154", ldev->iface_debugfs);
+	if (!root)
+		return -EINVAL;
+
+	dentry = debugfs_create_file("short_addr", 0444, root,
+				     lowpan_802154_dev(dev)->wdev->ieee802154_ptr,
+				     &lowpan_short_addr_fops);
+	if (!dentry)
+		return -EINVAL;
+
+	return 0;
+}
+
 int lowpan_dev_debugfs_init(struct net_device *dev)
 {
 	struct lowpan_dev *ldev = lowpan_dev(dev);
@@ -272,6 +307,10 @@ int lowpan_dev_debugfs_init(struct net_device *dev)
 			goto remove_root;
 	}
 
+	ret = lowpan_dev_debugfs_802154_init(dev, ldev);
+	if (ret < 0)
+		goto remove_root;
+
 	return 0;
 
 remove_root:
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 8501dd532fe1..79f1fa22509a 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -761,22 +761,75 @@ static const u8 lowpan_iphc_dam_to_sam_value[] = {
 	[LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
 };
 
-static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct in6_addr *ipaddr,
+static inline bool
+lowpan_iphc_compress_ctx_802154_lladdr(const struct in6_addr *ipaddr,
+				       const struct lowpan_iphc_ctx *ctx,
+				       const void *lladdr)
+{
+	const struct ieee802154_addr *addr = lladdr;
+	unsigned char extended_addr[EUI64_ADDR_LEN];
+	bool lladdr_compress = false;
+	struct in6_addr tmp = {};
+
+	switch (addr->mode) {
+	case IEEE802154_ADDR_LONG:
+		ieee802154_le64_to_be64(&extended_addr, &addr->extended_addr);
+		/* check for SAM/DAM = 11 */
+		memcpy(&tmp.s6_addr[8], &extended_addr, EUI64_ADDR_LEN);
+		/* second bit-flip (Universe/Local) is done according RFC2464 */
+		tmp.s6_addr[8] ^= 0x02;
+		/* context information are always used */
+		ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+		if (ipv6_addr_equal(&tmp, ipaddr))
+			lladdr_compress = true;
+		break;
+	case IEEE802154_ADDR_SHORT:
+		tmp.s6_addr[11] = 0xFF;
+		tmp.s6_addr[12] = 0xFE;
+		ieee802154_le16_to_be16(&tmp.s6_addr16[7],
+					&addr->short_addr);
+		/* context information are always used */
+		ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+		if (ipv6_addr_equal(&tmp, ipaddr))
+			lladdr_compress = true;
+		break;
+	default:
+		/* should never handled and filtered by 802154 6lowpan */
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return lladdr_compress;
+}
+
+static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev,
+				   const struct in6_addr *ipaddr,
 				   const struct lowpan_iphc_ctx *ctx,
 				   const unsigned char *lladdr, bool sam)
 {
 	struct in6_addr tmp = {};
 	u8 dam;
 
-	/* check for SAM/DAM = 11 */
-	memcpy(&tmp.s6_addr[8], lladdr, 8);
-	/* second bit-flip (Universe/Local) is done according RFC2464 */
-	tmp.s6_addr[8] ^= 0x02;
-	/* context information are always used */
-	ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
-	if (ipv6_addr_equal(&tmp, ipaddr)) {
-		dam = LOWPAN_IPHC_DAM_11;
-		goto out;
+	switch (lowpan_dev(dev)->lltype) {
+	case LOWPAN_LLTYPE_IEEE802154:
+		if (lowpan_iphc_compress_ctx_802154_lladdr(ipaddr, ctx,
+							   lladdr)) {
+			dam = LOWPAN_IPHC_DAM_11;
+			goto out;
+		}
+		break;
+	default:
+		/* check for SAM/DAM = 11 */
+		memcpy(&tmp.s6_addr[8], lladdr, EUI64_ADDR_LEN);
+		/* second bit-flip (Universe/Local) is done according RFC2464 */
+		tmp.s6_addr[8] ^= 0x02;
+		/* context information are always used */
+		ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+		if (ipv6_addr_equal(&tmp, ipaddr)) {
+			dam = LOWPAN_IPHC_DAM_11;
+			goto out;
+		}
+		break;
 	}
 
 	memset(&tmp, 0, sizeof(tmp));
@@ -813,28 +866,85 @@ out:
 		return dam;
 }
 
-static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
+static inline bool
+lowpan_iphc_compress_802154_lladdr(const struct in6_addr *ipaddr,
+				   const void *lladdr)
+{
+	const struct ieee802154_addr *addr = lladdr;
+	unsigned char extended_addr[EUI64_ADDR_LEN];
+	bool lladdr_compress = false;
+	struct in6_addr tmp = {};
+
+	switch (addr->mode) {
+	case IEEE802154_ADDR_LONG:
+		ieee802154_le64_to_be64(&extended_addr, &addr->extended_addr);
+		if (is_addr_mac_addr_based(ipaddr, extended_addr))
+			lladdr_compress = true;
+		break;
+	case IEEE802154_ADDR_SHORT:
+		/* fe:80::ff:fe00:XXXX
+		 *                \__/
+		 *             short_addr
+		 *
+		 * Universe/Local bit is zero.
+		 */
+		tmp.s6_addr[0] = 0xFE;
+		tmp.s6_addr[1] = 0x80;
+		tmp.s6_addr[11] = 0xFF;
+		tmp.s6_addr[12] = 0xFE;
+		ieee802154_le16_to_be16(&tmp.s6_addr16[7],
+					&addr->short_addr);
+		if (ipv6_addr_equal(&tmp, ipaddr))
+			lladdr_compress = true;
+		break;
+	default:
+		/* should never handled and filtered by 802154 6lowpan */
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return lladdr_compress;
+}
+
+static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct net_device *dev,
+				  const struct in6_addr *ipaddr,
 				  const unsigned char *lladdr, bool sam)
 {
-	u8 dam = LOWPAN_IPHC_DAM_00;
+	u8 dam = LOWPAN_IPHC_DAM_01;
 
-	if (is_addr_mac_addr_based(ipaddr, lladdr)) {
-		dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
-		pr_debug("address compression 0 bits\n");
-	} else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
+	switch (lowpan_dev(dev)->lltype) {
+	case LOWPAN_LLTYPE_IEEE802154:
+		if (lowpan_iphc_compress_802154_lladdr(ipaddr, lladdr)) {
+			dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+			pr_debug("address compression 0 bits\n");
+			goto out;
+		}
+		break;
+	default:
+		if (is_addr_mac_addr_based(ipaddr, lladdr)) {
+			dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+			pr_debug("address compression 0 bits\n");
+			goto out;
+		}
+		break;
+	}
+
+	if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
 		/* compress IID to 16 bits xxxx::XXXX */
 		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2);
 		dam = LOWPAN_IPHC_DAM_10; /* 16-bits */
 		raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)",
 				*hc_ptr - 2, 2);
-	} else {
-		/* do not compress IID => xxxx::IID */
-		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
-		dam = LOWPAN_IPHC_DAM_01; /* 64-bits */
-		raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
-				*hc_ptr - 8, 8);
+		goto out;
 	}
 
+	/* do not compress IID => xxxx::IID */
+	lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
+	raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
+			*hc_ptr - 8, 8);
+
+out:
+
 	if (sam)
 		return lowpan_iphc_dam_to_sam_value[dam];
 	else
@@ -1013,9 +1123,6 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
 	iphc0 = LOWPAN_DISPATCH_IPHC;
 	iphc1 = 0;
 
-	raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
-	raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
-
 	raw_dump_table(__func__, "sending raw skb network uncompressed packet",
 		       skb->data, skb->len);
 
@@ -1088,14 +1195,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
 		iphc1 |= LOWPAN_IPHC_SAC;
 	} else {
 		if (sci) {
-			iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->saddr,
+			iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, dev,
+							  &hdr->saddr,
 							  &sci_entry, saddr,
 							  true);
 			iphc1 |= LOWPAN_IPHC_SAC;
 		} else {
 			if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL &&
 			    lowpan_is_linklocal_zero_padded(hdr->saddr)) {
-				iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+				iphc1 |= lowpan_compress_addr_64(&hc_ptr, dev,
 								 &hdr->saddr,
 								 saddr, true);
 				pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
@@ -1123,14 +1231,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
 		}
 	} else {
 		if (dci) {
-			iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->daddr,
+			iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, dev,
+							  &hdr->daddr,
 							  &dci_entry, daddr,
 							  false);
 			iphc1 |= LOWPAN_IPHC_DAC;
 		} else {
 			if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL &&
 			    lowpan_is_linklocal_zero_padded(hdr->daddr)) {
-				iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+				iphc1 |= lowpan_compress_addr_64(&hc_ptr, dev,
 								 &hdr->daddr,
 								 daddr, false);
 				pr_debug("dest address unicast link-local %pI6c iphc1 0x%02x\n",
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
new file mode 100644
index 000000000000..86450b7e2899
--- /dev/null
+++ b/net/6lowpan/ndisc.c
@@ -0,0 +1,241 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ * (C) 2016 Pengutronix, Alexander Aring <aar@pengutronix.de>
+ */
+
+#include <net/6lowpan.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+
+#include "6lowpan_i.h"
+
+static int lowpan_ndisc_is_useropt(u8 nd_opt_type)
+{
+	return nd_opt_type == ND_OPT_6CO;
+}
+
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+#define NDISC_802154_SHORT_ADDR_LENGTH	1
+static int lowpan_ndisc_parse_802154_options(const struct net_device *dev,
+					     struct nd_opt_hdr *nd_opt,
+					     struct ndisc_options *ndopts)
+{
+	switch (nd_opt->nd_opt_len) {
+	case NDISC_802154_SHORT_ADDR_LENGTH:
+		if (ndopts->nd_802154_opt_array[nd_opt->nd_opt_type])
+			ND_PRINTK(2, warn,
+				  "%s: duplicated short addr ND6 option found: type=%d\n",
+				  __func__, nd_opt->nd_opt_type);
+		else
+			ndopts->nd_802154_opt_array[nd_opt->nd_opt_type] = nd_opt;
+		return 1;
+	default:
+		/* all others will be handled by ndisc IPv6 option parsing */
+		return 0;
+	}
+}
+
+static int lowpan_ndisc_parse_options(const struct net_device *dev,
+				      struct nd_opt_hdr *nd_opt,
+				      struct ndisc_options *ndopts)
+{
+	if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+		return 0;
+
+	switch (nd_opt->nd_opt_type) {
+	case ND_OPT_SOURCE_LL_ADDR:
+	case ND_OPT_TARGET_LL_ADDR:
+		return lowpan_ndisc_parse_802154_options(dev, nd_opt, ndopts);
+	default:
+		return 0;
+	}
+}
+
+static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags,
+				       u8 icmp6_type,
+				       const struct ndisc_options *ndopts)
+{
+	struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
+	u8 *lladdr_short = NULL;
+
+	switch (icmp6_type) {
+	case NDISC_ROUTER_SOLICITATION:
+	case NDISC_ROUTER_ADVERTISEMENT:
+	case NDISC_NEIGHBOUR_SOLICITATION:
+		if (ndopts->nd_802154_opts_src_lladdr) {
+			lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_src_lladdr,
+							     IEEE802154_SHORT_ADDR_LEN, 0);
+			if (!lladdr_short) {
+				ND_PRINTK(2, warn,
+					  "NA: invalid short link-layer address length\n");
+				return;
+			}
+		}
+		break;
+	case NDISC_REDIRECT:
+	case NDISC_NEIGHBOUR_ADVERTISEMENT:
+		if (ndopts->nd_802154_opts_tgt_lladdr) {
+			lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_tgt_lladdr,
+							     IEEE802154_SHORT_ADDR_LEN, 0);
+			if (!lladdr_short) {
+				ND_PRINTK(2, warn,
+					  "NA: invalid short link-layer address length\n");
+				return;
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	write_lock_bh(&n->lock);
+	if (lladdr_short) {
+		ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short);
+		if (!lowpan_802154_is_valid_src_short_addr(neigh->short_addr))
+			neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+	} else {
+		neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+	}
+	write_unlock_bh(&n->lock);
+}
+
+static void lowpan_ndisc_update(const struct net_device *dev,
+				struct neighbour *n, u32 flags, u8 icmp6_type,
+				const struct ndisc_options *ndopts)
+{
+	if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+		return;
+
+	/* react on overrides only. TODO check if this is really right. */
+	if (flags & NEIGH_UPDATE_F_OVERRIDE)
+		lowpan_ndisc_802154_update(n, flags, icmp6_type, ndopts);
+}
+
+static int lowpan_ndisc_opt_addr_space(const struct net_device *dev,
+				       u8 icmp6_type, struct neighbour *neigh,
+				       u8 *ha_buf, u8 **ha)
+{
+	struct lowpan_802154_neigh *n;
+	struct wpan_dev *wpan_dev;
+	int addr_space = 0;
+
+	if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+		return 0;
+
+	switch (icmp6_type) {
+	case NDISC_REDIRECT:
+		n = lowpan_802154_neigh(neighbour_priv(neigh));
+
+		read_lock_bh(&neigh->lock);
+		if (lowpan_802154_is_valid_src_short_addr(n->short_addr)) {
+			memcpy(ha_buf, &n->short_addr,
+			       IEEE802154_SHORT_ADDR_LEN);
+			read_unlock_bh(&neigh->lock);
+			addr_space += __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
+			*ha = ha_buf;
+		} else {
+			read_unlock_bh(&neigh->lock);
+		}
+		break;
+	case NDISC_NEIGHBOUR_ADVERTISEMENT:
+	case NDISC_NEIGHBOUR_SOLICITATION:
+	case NDISC_ROUTER_SOLICITATION:
+		wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+		if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr))
+			addr_space = __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
+		break;
+	default:
+		break;
+	}
+
+	return addr_space;
+}
+
+static void lowpan_ndisc_fill_addr_option(const struct net_device *dev,
+					  struct sk_buff *skb, u8 icmp6_type,
+					  const u8 *ha)
+{
+	struct wpan_dev *wpan_dev;
+	__be16 short_addr;
+	u8 opt_type;
+
+	if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+		return;
+
+	switch (icmp6_type) {
+	case NDISC_REDIRECT:
+		if (ha) {
+			ieee802154_le16_to_be16(&short_addr, ha);
+			__ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+						 &short_addr,
+						 IEEE802154_SHORT_ADDR_LEN, 0);
+		}
+		return;
+	case NDISC_NEIGHBOUR_ADVERTISEMENT:
+		opt_type = ND_OPT_TARGET_LL_ADDR;
+		break;
+	case NDISC_ROUTER_SOLICITATION:
+	case NDISC_NEIGHBOUR_SOLICITATION:
+		opt_type = ND_OPT_SOURCE_LL_ADDR;
+		break;
+	default:
+		return;
+	}
+
+	wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+	if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) {
+		ieee802154_le16_to_be16(&short_addr,
+					&wpan_dev->short_addr);
+		__ndisc_fill_addr_option(skb, opt_type, &short_addr,
+					 IEEE802154_SHORT_ADDR_LEN, 0);
+	}
+}
+
+static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net,
+					     struct net_device *dev,
+					     const struct prefix_info *pinfo,
+					     struct inet6_dev *in6_dev,
+					     struct in6_addr *addr,
+					     int addr_type, u32 addr_flags,
+					     bool sllao, bool tokenized,
+					     __u32 valid_lft,
+					     u32 prefered_lft,
+					     bool dev_addr_generated)
+{
+	int err;
+
+	/* generates short based address for RA PIO's */
+	if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) && dev_addr_generated &&
+	    !addrconf_ifid_802154_6lowpan(addr->s6_addr + 8, dev)) {
+		err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+						   addr, addr_type, addr_flags,
+						   sllao, tokenized, valid_lft,
+						   prefered_lft);
+		if (err)
+			ND_PRINTK(2, warn,
+				  "RA: could not add a short address based address for prefix: %pI6c\n",
+				  &pinfo->prefix);
+	}
+}
+#endif
+
+const struct ndisc_ops lowpan_ndisc_ops = {
+	.is_useropt		= lowpan_ndisc_is_useropt,
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+	.parse_options		= lowpan_ndisc_parse_options,
+	.update			= lowpan_ndisc_update,
+	.opt_addr_space		= lowpan_ndisc_opt_addr_space,
+	.fill_addr_option	= lowpan_ndisc_fill_addr_option,
+	.prefix_rcv_add_addr	= lowpan_ndisc_prefix_rcv_add_addr,
+#endif
+};
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 86ae75b77390..fbfacd51aa34 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -146,10 +146,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 
 static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 {
-	/* TODO: gotta make sure the underlying layer can handle it,
-	 * maybe an IFF_VLAN_CAPABLE flag for devices?
-	 */
-	if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+	unsigned int max_mtu = real_dev->mtu;
+
+	if (netif_reduces_vlan_mtu(real_dev))
+		max_mtu -= VLAN_HLEN;
+	if (max_mtu < new_mtu)
 		return -ERANGE;
 
 	dev->mtu = new_mtu;
@@ -790,6 +792,8 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_netpoll_cleanup	= vlan_dev_netpoll_cleanup,
 #endif
 	.ndo_fix_features	= vlan_dev_fix_features,
+	.ndo_neigh_construct	= netdev_default_l2upper_neigh_construct,
+	.ndo_neigh_destroy	= netdev_default_l2upper_neigh_destroy,
 	.ndo_fdb_add		= switchdev_port_fdb_add,
 	.ndo_fdb_del		= switchdev_port_fdb_del,
 	.ndo_fdb_dump		= switchdev_port_fdb_dump,
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c92b52f37d38..1270207f3d7c 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -118,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev;
+	unsigned int max_mtu;
 	__be16 proto;
 	int err;
 
@@ -144,9 +145,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
+	max_mtu = netif_reduces_vlan_mtu(real_dev) ? real_dev->mtu - VLAN_HLEN :
+						     real_dev->mtu;
 	if (!tb[IFLA_MTU])
-		dev->mtu = real_dev->mtu;
-	else if (dev->mtu > real_dev->mtu)
+		dev->mtu = max_mtu;
+	else if (dev->mtu > max_mtu)
 		return -EINVAL;
 
 	err = vlan_changelink(dev, tb, data);
diff --git a/net/Kconfig b/net/Kconfig
index ff40562a782c..c2cdbce629bd 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -237,6 +237,7 @@ source "net/hsr/Kconfig"
 source "net/switchdev/Kconfig"
 source "net/l3mdev/Kconfig"
 source "net/qrtr/Kconfig"
+source "net/ncsi/Kconfig"
 
 config RPS
 	bool
diff --git a/net/Makefile b/net/Makefile
index bdd14553a774..9bd20bb86cc6 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -79,3 +79,4 @@ ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
 obj-y				+= l3mdev/
 endif
 obj-$(CONFIG_QRTR)		+= qrtr/
+obj-$(CONFIG_NET_NCSI)		+= ncsi/
diff --git a/net/atm/clip.c b/net/atm/clip.c
index e07f551a863c..53b4ac09e7b7 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = {
 	.connected_output =	neigh_direct_output,
 };
 
-static int clip_constructor(struct neighbour *neigh)
+static int clip_constructor(struct net_device *dev, struct neighbour *neigh)
 {
 	struct atmarp_entry *entry = neighbour_priv(neigh);
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index fbd0acf80b13..2fdebabbfacd 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock)
 			release_sock(sk);
 			ax25_disconnect(ax25, 0);
 			lock_sock(sk);
-			ax25_destroy_socket(ax25);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_destroy_socket(ax25);
 			break;
 
 		case AX25_STATE_3:
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 951cd57bb07d..5237dff6941d 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 	switch (ax25->state) {
 
 	case AX25_STATE_0:
+	case AX25_STATE_2:
 		/* Magic here: If we listen() and a new link dies before it
 		   is accepted() it isn't 'dead' so doesn't get removed. */
 		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
@@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
 				bh_unlock_sock(sk);
+				/* Ungrab socket and destroy it */
 				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
@@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25)
 	case AX25_STATE_2:
 		if (ax25->n2count == ax25->n2) {
 			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			ax25_disconnect(ax25, ETIMEDOUT);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_disconnect(ax25, ETIMEDOUT);
 			return;
 		} else {
 			ax25->n2count++;
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 004467c9e6e1..2c0d6ef66f9d 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 
 	switch (ax25->state) {
 	case AX25_STATE_0:
+	case AX25_STATE_2:
 		/* Magic here: If we listen() and a new link dies before it
 		   is accepted() it isn't 'dead' so doesn't get removed. */
 		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
@@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
 				bh_unlock_sock(sk);
+				/* Ungrab socket and destroy it */
 				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
@@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25)
 	case AX25_STATE_2:
 		if (ax25->n2count == ax25->n2) {
 			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			ax25_disconnect(ax25, ETIMEDOUT);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_disconnect(ax25, ETIMEDOUT);
 			return;
 		} else {
 			ax25->n2count++;
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 3b78e8473a01..655a7d4c96e1 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
 {
 	ax25_clear_queues(ax25);
 
-	ax25_stop_heartbeat(ax25);
+	if (!sock_flag(ax25->sk, SOCK_DESTROY))
+		ax25_stop_heartbeat(ax25);
 	ax25_stop_t1timer(ax25);
 	ax25_stop_t2timer(ax25);
 	ax25_stop_t3timer(ax25);
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index f66930ee3c0b..833bb145ba3c 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -66,7 +66,7 @@ config BATMAN_ADV_NC
 
 config BATMAN_ADV_MCAST
 	bool "Multicast optimisation"
-	depends on BATMAN_ADV
+	depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
 	default n
 	help
 	  This option enables the multicast optimisation which aims to
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 797cf2fc88c1..a83fc6c58d19 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -17,6 +17,7 @@
 #
 
 obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
+batman-adv-y += bat_algo.o
 batman-adv-y += bat_iv_ogm.o
 batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v.o
 batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
@@ -31,12 +32,16 @@ batman-adv-y += gateway_common.o
 batman-adv-y += hard-interface.o
 batman-adv-y += hash.o
 batman-adv-y += icmp_socket.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
 batman-adv-y += main.o
 batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-y += netlink.o
 batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
 batman-adv-y += originator.o
 batman-adv-y += routing.o
 batman-adv-y += send.o
 batman-adv-y += soft-interface.o
 batman-adv-y += sysfs.o
+batman-adv-y += tp_meter.o
 batman-adv-y += translation-table.o
+batman-adv-y += tvlv.o
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
new file mode 100644
index 000000000000..81dbbf569bd4
--- /dev/null
+++ b/net/batman-adv/bat_algo.c
@@ -0,0 +1,140 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+#include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+
+#include "bat_algo.h"
+
+char batadv_routing_algo[20] = "BATMAN_IV";
+static struct hlist_head batadv_algo_list;
+
+/**
+ * batadv_algo_init - Initialize batman-adv algorithm management data structures
+ */
+void batadv_algo_init(void)
+{
+	INIT_HLIST_HEAD(&batadv_algo_list);
+}
+
+static struct batadv_algo_ops *batadv_algo_get(char *name)
+{
+	struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
+
+	hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
+		if (strcmp(bat_algo_ops_tmp->name, name) != 0)
+			continue;
+
+		bat_algo_ops = bat_algo_ops_tmp;
+		break;
+	}
+
+	return bat_algo_ops;
+}
+
+int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
+{
+	struct batadv_algo_ops *bat_algo_ops_tmp;
+
+	bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
+	if (bat_algo_ops_tmp) {
+		pr_info("Trying to register already registered routing algorithm: %s\n",
+			bat_algo_ops->name);
+		return -EEXIST;
+	}
+
+	/* all algorithms must implement all ops (for now) */
+	if (!bat_algo_ops->iface.enable ||
+	    !bat_algo_ops->iface.disable ||
+	    !bat_algo_ops->iface.update_mac ||
+	    !bat_algo_ops->iface.primary_set ||
+	    !bat_algo_ops->neigh.cmp ||
+	    !bat_algo_ops->neigh.is_similar_or_better) {
+		pr_info("Routing algo '%s' does not implement required ops\n",
+			bat_algo_ops->name);
+		return -EINVAL;
+	}
+
+	INIT_HLIST_NODE(&bat_algo_ops->list);
+	hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
+
+	return 0;
+}
+
+int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
+{
+	struct batadv_algo_ops *bat_algo_ops;
+
+	bat_algo_ops = batadv_algo_get(name);
+	if (!bat_algo_ops)
+		return -EINVAL;
+
+	bat_priv->algo_ops = bat_algo_ops;
+
+	return 0;
+}
+
+int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct batadv_algo_ops *bat_algo_ops;
+
+	seq_puts(seq, "Available routing algorithms:\n");
+
+	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+		seq_printf(seq, " * %s\n", bat_algo_ops->name);
+	}
+
+	return 0;
+}
+
+static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
+{
+	struct batadv_algo_ops *bat_algo_ops;
+	char *algo_name = (char *)val;
+	size_t name_len = strlen(algo_name);
+
+	if (name_len > 0 && algo_name[name_len - 1] == '\n')
+		algo_name[name_len - 1] = '\0';
+
+	bat_algo_ops = batadv_algo_get(algo_name);
+	if (!bat_algo_ops) {
+		pr_err("Routing algorithm '%s' is not supported\n", algo_name);
+		return -EINVAL;
+	}
+
+	return param_set_copystring(algo_name, kp);
+}
+
+static const struct kernel_param_ops batadv_param_ops_ra = {
+	.set = batadv_param_set_ra,
+	.get = param_get_string,
+};
+
+static struct kparam_string batadv_param_string_ra = {
+	.maxlen = sizeof(batadv_routing_algo),
+	.string = batadv_routing_algo,
+};
+
+module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
+		0644);
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 03dafd33d23b..860d773dd8fa 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -18,32 +18,18 @@
 #ifndef _NET_BATMAN_ADV_BAT_ALGO_H_
 #define _NET_BATMAN_ADV_BAT_ALGO_H_
 
-struct batadv_priv;
+#include "main.h"
 
-int batadv_iv_init(void);
+#include <linux/types.h>
 
-#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+struct seq_file;
 
-int batadv_v_init(void);
-int batadv_v_mesh_init(struct batadv_priv *bat_priv);
-void batadv_v_mesh_free(struct batadv_priv *bat_priv);
+extern char batadv_routing_algo[];
+extern struct list_head batadv_hardif_list;
 
-#else
-
-static inline int batadv_v_init(void)
-{
-	return 0;
-}
-
-static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
-{
-	return 0;
-}
-
-static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
-{
-}
-
-#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+void batadv_algo_init(void);
+int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
+int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
+int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
 
 #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index ce2f203048d3..19b0abd6c640 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -15,7 +15,7 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "bat_algo.h"
+#include "bat_iv_ogm.h"
 #include "main.h"
 
 #include <linux/atomic.h>
@@ -30,8 +30,9 @@
 #include <linux/if_ether.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
-#include <linux/list.h>
+#include <linux/kernel.h>
 #include <linux/kref.h>
+#include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/pkt_sched.h>
@@ -48,15 +49,20 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "bitarray.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
 #include "send.h"
 #include "translation-table.h"
+#include "tvlv.h"
+
+static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work);
 
 /**
  * enum batadv_dup_status - duplicate status
@@ -336,7 +342,8 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
 {
 	struct batadv_neigh_node *neigh_node;
 
-	neigh_node = batadv_neigh_node_new(orig_node, hard_iface, neigh_addr);
+	neigh_node = batadv_neigh_node_get_or_create(orig_node,
+						     hard_iface, neigh_addr);
 	if (!neigh_node)
 		goto out;
 
@@ -730,7 +737,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
 
 	/* start timer for this packet */
 	INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
-			  batadv_send_outstanding_bat_ogm_packet);
+			  batadv_iv_send_outstanding_bat_ogm_packet);
 	queue_delayed_work(batadv_event_workqueue,
 			   &forw_packet_aggr->delayed_work,
 			   send_time - jiffies);
@@ -937,6 +944,19 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
 	u16 tvlv_len = 0;
 	unsigned long send_time;
 
+	if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
+	    (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+		return;
+
+	/* the interface gets activated here to avoid race conditions between
+	 * the moment of activating the interface in
+	 * hardif_activate_interface() where the originator mac is set and
+	 * outdated packets (especially uninitialized mac addresses) in the
+	 * packet queue
+	 */
+	if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
+		hard_iface->if_status = BATADV_IF_ACTIVE;
+
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 
 	if (hard_iface == primary_if) {
@@ -1778,6 +1798,45 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
 	batadv_orig_node_put(orig_node);
 }
 
+static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
+{
+	struct delayed_work *delayed_work;
+	struct batadv_forw_packet *forw_packet;
+	struct batadv_priv *bat_priv;
+
+	delayed_work = to_delayed_work(work);
+	forw_packet = container_of(delayed_work, struct batadv_forw_packet,
+				   delayed_work);
+	bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
+	spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	hlist_del(&forw_packet->list);
+	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
+		goto out;
+
+	batadv_iv_ogm_emit(forw_packet);
+
+	/* we have to have at least one packet in the queue to determine the
+	 * queues wake up time unless we are shutting down.
+	 *
+	 * only re-schedule if this is the "original" copy, e.g. the OGM of the
+	 * primary interface should only be rescheduled once per period, but
+	 * this function will be called for the forw_packet instances of the
+	 * other secondary interfaces as well.
+	 */
+	if (forw_packet->own &&
+	    forw_packet->if_incoming == forw_packet->if_outgoing)
+		batadv_iv_ogm_schedule(forw_packet->if_incoming);
+
+out:
+	/* don't count own packet */
+	if (!forw_packet->own)
+		atomic_inc(&bat_priv->batman_queue_left);
+
+	batadv_forw_packet_free(forw_packet);
+}
+
 static int batadv_iv_ogm_receive(struct sk_buff *skb,
 				 struct batadv_hard_iface *if_incoming)
 {
@@ -1794,7 +1853,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 	/* did we receive a B.A.T.M.A.N. IV OGM packet on an interface
 	 * that does not have B.A.T.M.A.N. IV enabled ?
 	 */
-	if (bat_priv->bat_algo_ops->bat_ogm_emit != batadv_iv_ogm_emit)
+	if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable)
 		return NET_RX_DROP;
 
 	batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
@@ -2052,21 +2111,32 @@ out:
 	return ret;
 }
 
+static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+{
+	/* begin scheduling originator messages on that interface */
+	batadv_iv_ogm_schedule(hard_iface);
+}
+
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.name = "BATMAN_IV",
-	.bat_iface_enable = batadv_iv_ogm_iface_enable,
-	.bat_iface_disable = batadv_iv_ogm_iface_disable,
-	.bat_iface_update_mac = batadv_iv_ogm_iface_update_mac,
-	.bat_primary_iface_set = batadv_iv_ogm_primary_iface_set,
-	.bat_ogm_schedule = batadv_iv_ogm_schedule,
-	.bat_ogm_emit = batadv_iv_ogm_emit,
-	.bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
-	.bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
-	.bat_neigh_print = batadv_iv_neigh_print,
-	.bat_orig_print = batadv_iv_ogm_orig_print,
-	.bat_orig_free = batadv_iv_ogm_orig_free,
-	.bat_orig_add_if = batadv_iv_ogm_orig_add_if,
-	.bat_orig_del_if = batadv_iv_ogm_orig_del_if,
+	.iface = {
+		.activate = batadv_iv_iface_activate,
+		.enable = batadv_iv_ogm_iface_enable,
+		.disable = batadv_iv_ogm_iface_disable,
+		.update_mac = batadv_iv_ogm_iface_update_mac,
+		.primary_set = batadv_iv_ogm_primary_iface_set,
+	},
+	.neigh = {
+		.cmp = batadv_iv_ogm_neigh_cmp,
+		.is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+		.print = batadv_iv_neigh_print,
+	},
+	.orig = {
+		.print = batadv_iv_ogm_orig_print,
+		.free = batadv_iv_ogm_orig_free,
+		.add_if = batadv_iv_ogm_orig_add_if,
+		.del_if = batadv_iv_ogm_orig_del_if,
+	},
 };
 
 int __init batadv_iv_init(void)
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
new file mode 100644
index 000000000000..b9f3550faaf7
--- /dev/null
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -0,0 +1,25 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _BATMAN_ADV_BATADV_IV_OGM_H_
+#define _BATMAN_ADV_BATADV_IV_OGM_H_
+
+#include "main.h"
+
+int batadv_iv_init(void);
+
+#endif /* _BATMAN_ADV_BATADV_IV_OGM_H_ */
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 0a12e5cdd65d..0366cbf5e444 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -15,7 +15,7 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "bat_algo.h"
+#include "bat_v.h"
 #include "main.h"
 
 #include <linux/atomic.h>
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "bat_v_elp.h"
 #include "bat_v_ogm.h"
 #include "hard-interface.h"
@@ -70,11 +71,6 @@ static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
 	if (ret < 0)
 		batadv_v_elp_iface_disable(hard_iface);
 
-	/* enable link throughput auto-detection by setting the throughput
-	 * override to zero
-	 */
-	atomic_set(&hard_iface->bat_v.throughput_override, 0);
-
 	return ret;
 }
 
@@ -119,14 +115,6 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
 		  batadv_v_elp_throughput_metric_update);
 }
 
-static void batadv_v_ogm_schedule(struct batadv_hard_iface *hard_iface)
-{
-}
-
-static void batadv_v_ogm_emit(struct batadv_forw_packet *forw_packet)
-{
-}
-
 /**
  * batadv_v_orig_print_neigh - print neighbors for the originator table
  * @orig_node: the orig_node for which the neighbors are printed
@@ -334,20 +322,38 @@ err_ifinfo1:
 
 static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 	.name = "BATMAN_V",
-	.bat_iface_activate = batadv_v_iface_activate,
-	.bat_iface_enable = batadv_v_iface_enable,
-	.bat_iface_disable = batadv_v_iface_disable,
-	.bat_iface_update_mac = batadv_v_iface_update_mac,
-	.bat_primary_iface_set = batadv_v_primary_iface_set,
-	.bat_hardif_neigh_init = batadv_v_hardif_neigh_init,
-	.bat_ogm_emit = batadv_v_ogm_emit,
-	.bat_ogm_schedule = batadv_v_ogm_schedule,
-	.bat_orig_print = batadv_v_orig_print,
-	.bat_neigh_cmp = batadv_v_neigh_cmp,
-	.bat_neigh_is_similar_or_better = batadv_v_neigh_is_sob,
-	.bat_neigh_print = batadv_v_neigh_print,
+	.iface = {
+		.activate = batadv_v_iface_activate,
+		.enable = batadv_v_iface_enable,
+		.disable = batadv_v_iface_disable,
+		.update_mac = batadv_v_iface_update_mac,
+		.primary_set = batadv_v_primary_iface_set,
+	},
+	.neigh = {
+		.hardif_init = batadv_v_hardif_neigh_init,
+		.cmp = batadv_v_neigh_cmp,
+		.is_similar_or_better = batadv_v_neigh_is_sob,
+		.print = batadv_v_neigh_print,
+	},
+	.orig = {
+		.print = batadv_v_orig_print,
+	},
 };
 
+/**
+ * batadv_v_hardif_init - initialize the algorithm specific fields in the
+ *  hard-interface object
+ * @hard_iface: the hard-interface to initialize
+ */
+void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
+{
+	/* enable link throughput auto-detection by setting the throughput
+	 * override to zero
+	 */
+	atomic_set(&hard_iface->bat_v.throughput_override, 0);
+	atomic_set(&hard_iface->bat_v.elp_interval, 500);
+}
+
 /**
  * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a
  *  mesh
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
new file mode 100644
index 000000000000..83b77639729e
--- /dev/null
+++ b/net/batman-adv/bat_v.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2011-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Linus LÃ¼ssing
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_BAT_V_H_
+#define _NET_BATMAN_ADV_BAT_V_H_
+
+#include "main.h"
+
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+
+int batadv_v_init(void);
+void batadv_v_hardif_init(struct batadv_hard_iface *hardif);
+int batadv_v_mesh_init(struct batadv_priv *bat_priv);
+void batadv_v_mesh_free(struct batadv_priv *bat_priv);
+
+#else
+
+static inline int batadv_v_init(void)
+{
+	return 0;
+}
+
+static inline void batadv_v_hardif_init(struct batadv_hard_iface *hardif)
+{
+}
+
+static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
+{
+}
+
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
+#endif /* _NET_BATMAN_ADV_BAT_V_H_ */
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index df42eb1365a0..7d170010beb9 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -43,6 +43,7 @@
 #include "bat_algo.h"
 #include "bat_v_ogm.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
@@ -344,7 +345,6 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
 	/* randomize initial seqno to avoid collision */
 	get_random_bytes(&random_seqno, sizeof(random_seqno));
 	atomic_set(&hard_iface->bat_v.elp_seqno, random_seqno);
-	atomic_set(&hard_iface->bat_v.elp_interval, 500);
 
 	/* assume full-duplex by default */
 	hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
@@ -443,7 +443,8 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv,
 	if (!orig_neigh)
 		return;
 
-	neigh = batadv_neigh_node_new(orig_neigh, if_incoming, neigh_addr);
+	neigh = batadv_neigh_node_get_or_create(orig_neigh,
+						if_incoming, neigh_addr);
 	if (!neigh)
 		goto orig_free;
 
@@ -503,7 +504,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
 	/* did we receive a B.A.T.M.A.N. V ELP packet on an interface
 	 * that does not have B.A.T.M.A.N. V ELP enabled ?
 	 */
-	if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
+	if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
 		return NET_RX_DROP;
 
 	elp_packet = (struct batadv_elp_packet *)skb->data;
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index cc130b2d05e5..be17c0b1369e 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -15,11 +15,11 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "main.h"
-
 #ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_
 #define _NET_BATMAN_ADV_BAT_V_ELP_H_
 
+#include "main.h"
+
 struct sk_buff;
 struct work_struct;
 
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 473ebb9a0e73..6fbba4eb0617 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -39,13 +39,16 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
 #include "send.h"
 #include "translation-table.h"
+#include "tvlv.h"
 
 /**
  * batadv_v_ogm_orig_get - retrieve and possibly create an originator node
@@ -683,8 +686,8 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 	if (!orig_node)
 		return;
 
-	neigh_node = batadv_neigh_node_new(orig_node, if_incoming,
-					   ethhdr->h_source);
+	neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming,
+						     ethhdr->h_source);
 	if (!neigh_node)
 		goto out;
 
@@ -751,7 +754,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
 	/* did we receive a OGM2 packet on an interface that does not have
 	 * B.A.T.M.A.N. V enabled ?
 	 */
-	if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
+	if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
 		return NET_RX_DROP;
 
 	if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index d849c75ada0e..4c4d45caa422 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -18,10 +18,10 @@
 #ifndef _BATMAN_ADV_BATADV_V_OGM_H_
 #define _BATMAN_ADV_BATADV_V_OGM_H_
 
+#include "main.h"
+
 #include <linux/types.h>
 
-struct batadv_hard_iface;
-struct batadv_priv;
 struct sk_buff;
 
 int batadv_v_ogm_init(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index a0c7913837a5..032271421a20 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -20,6 +20,8 @@
 
 #include <linux/bitmap.h>
 
+#include "log.h"
+
 /* shift the packet array by n places. */
 static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
 {
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 748a9ead7ce5..ad2ffe16d29f 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -48,6 +48,7 @@
 
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "sysfs.h"
@@ -177,10 +178,21 @@ static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
 static void batadv_claim_release(struct kref *ref)
 {
 	struct batadv_bla_claim *claim;
+	struct batadv_bla_backbone_gw *old_backbone_gw;
 
 	claim = container_of(ref, struct batadv_bla_claim, refcount);
 
-	batadv_backbone_gw_put(claim->backbone_gw);
+	spin_lock_bh(&claim->backbone_lock);
+	old_backbone_gw = claim->backbone_gw;
+	claim->backbone_gw = NULL;
+	spin_unlock_bh(&claim->backbone_lock);
+
+	spin_lock_bh(&old_backbone_gw->crc_lock);
+	old_backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+	spin_unlock_bh(&old_backbone_gw->crc_lock);
+
+	batadv_backbone_gw_put(old_backbone_gw);
+
 	kfree_rcu(claim, rcu);
 }
 
@@ -418,9 +430,12 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
 		break;
 	}
 
-	if (vid & BATADV_VLAN_HAS_TAG)
+	if (vid & BATADV_VLAN_HAS_TAG) {
 		skb = vlan_insert_tag(skb, htons(ETH_P_8021Q),
 				      vid & VLAN_VID_MASK);
+		if (!skb)
+			goto out;
+	}
 
 	skb_reset_mac_header(skb);
 	skb->protocol = eth_type_trans(skb, soft_iface);
@@ -674,8 +689,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 				 const u8 *mac, const unsigned short vid,
 				 struct batadv_bla_backbone_gw *backbone_gw)
 {
+	struct batadv_bla_backbone_gw *old_backbone_gw;
 	struct batadv_bla_claim *claim;
 	struct batadv_bla_claim search_claim;
+	bool remove_crc = false;
 	int hash_added;
 
 	ether_addr_copy(search_claim.addr, mac);
@@ -689,8 +706,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 			return;
 
 		ether_addr_copy(claim->addr, mac);
+		spin_lock_init(&claim->backbone_lock);
 		claim->vid = vid;
 		claim->lasttime = jiffies;
+		kref_get(&backbone_gw->refcount);
 		claim->backbone_gw = backbone_gw;
 
 		kref_init(&claim->refcount);
@@ -718,15 +737,26 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 			   "bla_add_claim(): changing ownership for %pM, vid %d\n",
 			   mac, BATADV_PRINT_VID(vid));
 
-		spin_lock_bh(&claim->backbone_gw->crc_lock);
-		claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
-		spin_unlock_bh(&claim->backbone_gw->crc_lock);
-		batadv_backbone_gw_put(claim->backbone_gw);
+		remove_crc = true;
 	}
-	/* set (new) backbone gw */
+
+	/* replace backbone_gw atomically and adjust reference counters */
+	spin_lock_bh(&claim->backbone_lock);
+	old_backbone_gw = claim->backbone_gw;
 	kref_get(&backbone_gw->refcount);
 	claim->backbone_gw = backbone_gw;
+	spin_unlock_bh(&claim->backbone_lock);
 
+	if (remove_crc) {
+		/* remove claim address from old backbone_gw */
+		spin_lock_bh(&old_backbone_gw->crc_lock);
+		old_backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+		spin_unlock_bh(&old_backbone_gw->crc_lock);
+	}
+
+	batadv_backbone_gw_put(old_backbone_gw);
+
+	/* add claim address to new backbone_gw */
 	spin_lock_bh(&backbone_gw->crc_lock);
 	backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
 	spin_unlock_bh(&backbone_gw->crc_lock);
@@ -736,6 +766,26 @@ claim_free_ref:
 	batadv_claim_put(claim);
 }
 
+/**
+ * batadv_bla_claim_get_backbone_gw - Get valid reference for backbone_gw of
+ *  claim
+ * @claim: claim whose backbone_gw should be returned
+ *
+ * Return: valid reference to claim::backbone_gw
+ */
+static struct batadv_bla_backbone_gw *
+batadv_bla_claim_get_backbone_gw(struct batadv_bla_claim *claim)
+{
+	struct batadv_bla_backbone_gw *backbone_gw;
+
+	spin_lock_bh(&claim->backbone_lock);
+	backbone_gw = claim->backbone_gw;
+	kref_get(&backbone_gw->refcount);
+	spin_unlock_bh(&claim->backbone_lock);
+
+	return backbone_gw;
+}
+
 /**
  * batadv_bla_del_claim - delete a claim from the claim hash
  * @bat_priv: the bat priv with all the soft interface information
@@ -760,10 +810,6 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
 			   batadv_choose_claim, claim);
 	batadv_claim_put(claim); /* reference from the hash is gone */
 
-	spin_lock_bh(&claim->backbone_gw->crc_lock);
-	claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
-	spin_unlock_bh(&claim->backbone_gw->crc_lock);
-
 	/* don't need the reference from hash_find() anymore */
 	batadv_claim_put(claim);
 }
@@ -1216,6 +1262,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 				    struct batadv_hard_iface *primary_if,
 				    int now)
 {
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_bla_claim *claim;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
@@ -1230,14 +1277,17 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(claim, head, hash_entry) {
+			backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
 			if (now)
 				goto purge_now;
-			if (!batadv_compare_eth(claim->backbone_gw->orig,
+
+			if (!batadv_compare_eth(backbone_gw->orig,
 						primary_if->net_dev->dev_addr))
-				continue;
+				goto skip;
+
 			if (!batadv_has_timed_out(claim->lasttime,
 						  BATADV_BLA_CLAIM_TIMEOUT))
-				continue;
+				goto skip;
 
 			batadv_dbg(BATADV_DBG_BLA, bat_priv,
 				   "bla_purge_claims(): %pM, vid %d, time out\n",
@@ -1245,8 +1295,10 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 
 purge_now:
 			batadv_handle_unclaim(bat_priv, primary_if,
-					      claim->backbone_gw->orig,
+					      backbone_gw->orig,
 					      claim->addr, claim->vid);
+skip:
+			batadv_backbone_gw_put(backbone_gw);
 		}
 		rcu_read_unlock();
 	}
@@ -1757,9 +1809,11 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
 bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		   unsigned short vid, bool is_bcast)
 {
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct ethhdr *ethhdr;
 	struct batadv_bla_claim search_claim, *claim = NULL;
 	struct batadv_hard_iface *primary_if;
+	bool own_claim;
 	bool ret;
 
 	ethhdr = eth_hdr(skb);
@@ -1794,8 +1848,12 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	}
 
 	/* if it is our own claim ... */
-	if (batadv_compare_eth(claim->backbone_gw->orig,
-			       primary_if->net_dev->dev_addr)) {
+	backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+	own_claim = batadv_compare_eth(backbone_gw->orig,
+				       primary_if->net_dev->dev_addr);
+	batadv_backbone_gw_put(backbone_gw);
+
+	if (own_claim) {
 		/* ... allow it in any case */
 		claim->lasttime = jiffies;
 		goto allow;
@@ -1859,7 +1917,9 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 {
 	struct ethhdr *ethhdr;
 	struct batadv_bla_claim search_claim, *claim = NULL;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_hard_iface *primary_if;
+	bool client_roamed;
 	bool ret = false;
 
 	primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -1889,8 +1949,12 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		goto allow;
 
 	/* check if we are responsible. */
-	if (batadv_compare_eth(claim->backbone_gw->orig,
-			       primary_if->net_dev->dev_addr)) {
+	backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+	client_roamed = batadv_compare_eth(backbone_gw->orig,
+					   primary_if->net_dev->dev_addr);
+	batadv_backbone_gw_put(backbone_gw);
+
+	if (client_roamed) {
 		/* if yes, the client has roamed and we have
 		 * to unclaim it.
 		 */
@@ -1938,6 +2002,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_bla_claim *claim;
 	struct batadv_hard_iface *primary_if;
 	struct hlist_head *head;
@@ -1962,17 +2027,21 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(claim, head, hash_entry) {
-			is_own = batadv_compare_eth(claim->backbone_gw->orig,
+			backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+
+			is_own = batadv_compare_eth(backbone_gw->orig,
 						    primary_addr);
 
-			spin_lock_bh(&claim->backbone_gw->crc_lock);
-			backbone_crc = claim->backbone_gw->crc;
-			spin_unlock_bh(&claim->backbone_gw->crc_lock);
+			spin_lock_bh(&backbone_gw->crc_lock);
+			backbone_crc = backbone_gw->crc;
+			spin_unlock_bh(&backbone_gw->crc_lock);
 			seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
 				   claim->addr, BATADV_PRINT_VID(claim->vid),
-				   claim->backbone_gw->orig,
+				   backbone_gw->orig,
 				   (is_own ? 'x' : ' '),
 				   backbone_crc);
+
+			batadv_backbone_gw_put(backbone_gw);
 		}
 		rcu_read_unlock();
 	}
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 952900466d88..1d68b6e63b96 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -18,245 +18,33 @@
 #include "debugfs.h"
 #include "main.h"
 
-#include <linux/compiler.h>
 #include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/export.h>
-#include <linux/fcntl.h>
 #include <linux/fs.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/netdevice.h>
-#include <linux/poll.h>
 #include <linux/printk.h>
 #include <linux/sched.h> /* for linux/wait.h */
 #include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/stat.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <linux/sysfs.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-#include <linux/wait.h>
-#include <stdarg.h>
 
+#include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "icmp_socket.h"
+#include "log.h"
+#include "multicast.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "translation-table.h"
 
 static struct dentry *batadv_debugfs;
 
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
-
-static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
-
-static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
-				  size_t idx)
-{
-	return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
-}
-
-static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
-				 char c)
-{
-	char *char_addr;
-
-	char_addr = batadv_log_char_addr(debug_log, debug_log->log_end);
-	*char_addr = c;
-	debug_log->log_end++;
-
-	if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len)
-		debug_log->log_start = debug_log->log_end - batadv_log_buff_len;
-}
-
-__printf(2, 3)
-static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
-			     const char *fmt, ...)
-{
-	va_list args;
-	static char debug_log_buf[256];
-	char *p;
-
-	if (!debug_log)
-		return 0;
-
-	spin_lock_bh(&debug_log->lock);
-	va_start(args, fmt);
-	vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
-	va_end(args);
-
-	for (p = debug_log_buf; *p != 0; p++)
-		batadv_emit_log_char(debug_log, *p);
-
-	spin_unlock_bh(&debug_log->lock);
-
-	wake_up(&debug_log->queue_wait);
-
-	return 0;
-}
-
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-{
-	va_list args;
-	char tmp_log_buf[256];
-
-	va_start(args, fmt);
-	vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
-	batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
-			  jiffies_to_msecs(jiffies), tmp_log_buf);
-	va_end(args);
-
-	return 0;
-}
-
-static int batadv_log_open(struct inode *inode, struct file *file)
-{
-	if (!try_module_get(THIS_MODULE))
-		return -EBUSY;
-
-	nonseekable_open(inode, file);
-	file->private_data = inode->i_private;
-	return 0;
-}
-
-static int batadv_log_release(struct inode *inode, struct file *file)
-{
-	module_put(THIS_MODULE);
-	return 0;
-}
-
-static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
-{
-	return !(debug_log->log_start - debug_log->log_end);
-}
-
-static ssize_t batadv_log_read(struct file *file, char __user *buf,
-			       size_t count, loff_t *ppos)
-{
-	struct batadv_priv *bat_priv = file->private_data;
-	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
-	int error, i = 0;
-	char *char_addr;
-	char c;
-
-	if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log))
-		return -EAGAIN;
-
-	if (!buf)
-		return -EINVAL;
-
-	if (count == 0)
-		return 0;
-
-	if (!access_ok(VERIFY_WRITE, buf, count))
-		return -EFAULT;
-
-	error = wait_event_interruptible(debug_log->queue_wait,
-					 (!batadv_log_empty(debug_log)));
-
-	if (error)
-		return error;
-
-	spin_lock_bh(&debug_log->lock);
-
-	while ((!error) && (i < count) &&
-	       (debug_log->log_start != debug_log->log_end)) {
-		char_addr = batadv_log_char_addr(debug_log,
-						 debug_log->log_start);
-		c = *char_addr;
-
-		debug_log->log_start++;
-
-		spin_unlock_bh(&debug_log->lock);
-
-		error = __put_user(c, buf);
-
-		spin_lock_bh(&debug_log->lock);
-
-		buf++;
-		i++;
-	}
-
-	spin_unlock_bh(&debug_log->lock);
-
-	if (!error)
-		return i;
-
-	return error;
-}
-
-static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
-{
-	struct batadv_priv *bat_priv = file->private_data;
-	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
-
-	poll_wait(file, &debug_log->queue_wait, wait);
-
-	if (!batadv_log_empty(debug_log))
-		return POLLIN | POLLRDNORM;
-
-	return 0;
-}
-
-static const struct file_operations batadv_log_fops = {
-	.open           = batadv_log_open,
-	.release        = batadv_log_release,
-	.read           = batadv_log_read,
-	.poll           = batadv_log_poll,
-	.llseek         = no_llseek,
-};
-
-static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
-{
-	struct dentry *d;
-
-	if (!bat_priv->debug_dir)
-		goto err;
-
-	bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
-	if (!bat_priv->debug_log)
-		goto err;
-
-	spin_lock_init(&bat_priv->debug_log->lock);
-	init_waitqueue_head(&bat_priv->debug_log->queue_wait);
-
-	d = debugfs_create_file("log", S_IFREG | S_IRUSR,
-				bat_priv->debug_dir, bat_priv,
-				&batadv_log_fops);
-	if (!d)
-		goto err;
-
-	return 0;
-
-err:
-	return -ENOMEM;
-}
-
-static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
-{
-	kfree(bat_priv->debug_log);
-	bat_priv->debug_log = NULL;
-}
-#else /* CONFIG_BATMAN_ADV_DEBUG */
-static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
-{
-	return 0;
-}
-
-static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
-{
-}
-#endif
-
 static int batadv_algorithms_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, batadv_algo_seq_print_text, NULL);
@@ -363,6 +151,22 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
 }
 #endif
 
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * batadv_mcast_flags_open - prepare file handler for reads from mcast_flags
+ * @inode: inode which was opened
+ * @file: file handle to be initialized
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_mcast_flags_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
+	return single_open(file, batadv_mcast_flags_seq_print_text, net_dev);
+}
+#endif
+
 #define BATADV_DEBUGINFO(_name, _mode, _open)		\
 struct batadv_debuginfo batadv_debuginfo_##_name = {	\
 	.attr = {					\
@@ -407,6 +211,9 @@ static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
 #ifdef CONFIG_BATMAN_ADV_NC
 static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
 #endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open);
+#endif
 
 static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
 	&batadv_debuginfo_neighbors,
@@ -423,6 +230,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
 	&batadv_debuginfo_transtable_local,
 #ifdef CONFIG_BATMAN_ADV_NC
 	&batadv_debuginfo_nc_nodes,
+#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+	&batadv_debuginfo_mcast_flags,
 #endif
 	NULL,
 };
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 278800a99c69..b1cc8bfe11ac 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -45,9 +45,11 @@
 
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "originator.h"
 #include "send.h"
 #include "translation-table.h"
+#include "tvlv.h"
 
 static void batadv_dat_purge(struct work_struct *work);
 
@@ -1009,9 +1011,12 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 		if (!skb_new)
 			goto out;
 
-		if (vid & BATADV_VLAN_HAS_TAG)
+		if (vid & BATADV_VLAN_HAS_TAG) {
 			skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
 						  vid & VLAN_VID_MASK);
+			if (!skb_new)
+				goto out;
+		}
 
 		skb_reset_mac_header(skb_new);
 		skb_new->protocol = eth_type_trans(skb_new,
@@ -1089,9 +1094,12 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
 	 */
 	skb_reset_mac_header(skb_new);
 
-	if (vid & BATADV_VLAN_HAS_TAG)
+	if (vid & BATADV_VLAN_HAS_TAG) {
 		skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
 					  vid & VLAN_VID_MASK);
+		if (!skb_new)
+			goto out;
+	}
 
 	/* To preserve backwards compatibility, the node has choose the outgoing
 	 * format based on the incoming request packet type. The assumption is
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 65536db1bff7..0934730fb7ff 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -27,7 +27,6 @@
 #include <linux/kernel.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
-#include <linux/pkt_sched.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -414,7 +413,7 @@ static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
 	if (!skb_fragment)
 		goto err;
 
-	skb->priority = TC_PRIO_CONTROL;
+	skb_fragment->priority = skb->priority;
 
 	/* Eat the last mtu-bytes of the skb */
 	skb_reserve(skb_fragment, header_size + ETH_HLEN);
@@ -434,11 +433,12 @@ err:
  * @orig_node: final destination of the created fragments
  * @neigh_node: next-hop of the created fragments
  *
- * Return: true on success, false otherwise.
+ * Return: the netdev tx status or -1 in case of error.
+ * When -1 is returned the skb is not consumed.
  */
-bool batadv_frag_send_packet(struct sk_buff *skb,
-			     struct batadv_orig_node *orig_node,
-			     struct batadv_neigh_node *neigh_node)
+int batadv_frag_send_packet(struct sk_buff *skb,
+			    struct batadv_orig_node *orig_node,
+			    struct batadv_neigh_node *neigh_node)
 {
 	struct batadv_priv *bat_priv;
 	struct batadv_hard_iface *primary_if = NULL;
@@ -447,7 +447,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
 	unsigned int header_size = sizeof(frag_header);
 	unsigned int max_fragment_size, max_packet_size;
-	bool ret = false;
+	int ret = -1;
 
 	/* To avoid merge and refragmentation at next-hops we never send
 	 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -458,12 +458,12 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 
 	/* Don't even try to fragment, if we need more than 16 fragments */
 	if (skb->len > max_packet_size)
-		goto out_err;
+		goto out;
 
 	bat_priv = orig_node->bat_priv;
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (!primary_if)
-		goto out_err;
+		goto out;
 
 	/* Create one header to be copied to all fragments */
 	frag_header.packet_type = BATADV_UNICAST_FRAG;
@@ -473,6 +473,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	frag_header.reserved = 0;
 	frag_header.no = 0;
 	frag_header.total_size = htons(skb->len);
+
+	/* skb->priority values from 256->263 are magic values to
+	 * directly indicate a specific 802.1d priority.  This is used
+	 * to allow 802.1d priority to be passed directly in from VLAN
+	 * tags, etc.
+	 */
+	if (skb->priority >= 256 && skb->priority <= 263)
+		frag_header.priority = skb->priority - 256;
+
 	ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
 	ether_addr_copy(frag_header.dest, orig_node->orig);
 
@@ -480,23 +489,33 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	while (skb->len > max_fragment_size) {
 		skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
 		if (!skb_fragment)
-			goto out_err;
+			goto out;
 
 		batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
 		batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
 				   skb_fragment->len + ETH_HLEN);
-		batadv_send_unicast_skb(skb_fragment, neigh_node);
+		ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
+		if (ret != NET_XMIT_SUCCESS) {
+			/* return -1 so that the caller can free the original
+			 * skb
+			 */
+			ret = -1;
+			goto out;
+		}
+
 		frag_header.no++;
 
 		/* The initial check in this function should cover this case */
-		if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
-			goto out_err;
+		if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
+			ret = -1;
+			goto out;
+		}
 	}
 
 	/* Make room for the fragment header. */
 	if (batadv_skb_head_push(skb, header_size) < 0 ||
 	    pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
-		goto out_err;
+		goto out;
 
 	memcpy(skb->data, &frag_header, header_size);
 
@@ -504,11 +523,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
 	batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
 			   skb->len + ETH_HLEN);
-	batadv_send_unicast_skb(skb, neigh_node);
-
-	ret = true;
+	ret = batadv_send_unicast_skb(skb, neigh_node);
 
-out_err:
+out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 9ff77c7ef7c7..3202fe329e63 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -34,9 +34,9 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
 			 struct batadv_orig_node *orig_node_src);
 bool batadv_frag_skb_buffer(struct sk_buff **skb,
 			    struct batadv_orig_node *orig_node);
-bool batadv_frag_send_packet(struct sk_buff *skb,
-			     struct batadv_orig_node *orig_node,
-			     struct batadv_neigh_node *neigh_node);
+int batadv_frag_send_packet(struct sk_buff *skb,
+			    struct batadv_orig_node *orig_node,
+			    struct batadv_neigh_node *neigh_node);
 
 /**
  * batadv_frag_check_entry - check if a list of fragments has timed out
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 5839c569f769..63a805d3f96e 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -42,6 +42,7 @@
 
 #include "gateway_common.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
@@ -192,7 +193,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 
 		tq_avg = router_ifinfo->bat_iv.tq_avg;
 
-		switch (atomic_read(&bat_priv->gw_sel_class)) {
+		switch (atomic_read(&bat_priv->gw.sel_class)) {
 		case 1: /* fast connection */
 			tmp_gw_factor = tq_avg * tq_avg;
 			tmp_gw_factor *= gw_node->bandwidth_down;
@@ -255,7 +256,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
 {
 	struct batadv_gw_node *curr_gw;
 
-	if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
+	if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
 		return;
 
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
@@ -283,7 +284,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
 	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
 	char gw_addr[18] = { '\0' };
 
-	if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
+	if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
 		goto out;
 
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
@@ -402,8 +403,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
 	/* if the routing class is greater than 3 the value tells us how much
 	 * greater the TQ value of the new gateway must be
 	 */
-	if ((atomic_read(&bat_priv->gw_sel_class) > 3) &&
-	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class)))
+	if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
+	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
 		goto out;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -638,8 +639,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 		goto out;
 
 	seq_printf(seq,
-		   "      %-12s (%s/%i) %17s [%10s]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
-		   "Gateway", "#", BATADV_TQ_MAX_VALUE, "Nexthop", "outgoingIF",
+		   "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
 		   primary_if->net_dev->dev_addr, net_dev->name);
 
@@ -821,7 +821,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
 	if (!gw_node)
 		goto out;
 
-	switch (atomic_read(&bat_priv->gw_mode)) {
+	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_SERVER:
 		/* If we are a GW then we are our best GW. We can artificially
 		 * set the tq towards ourself as the maximum value
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 4423047889e1..d7bc6a87bcc9 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -19,8 +19,8 @@
 #include "main.h"
 
 #include <linux/atomic.h>
-#include <linux/errno.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>
 #include <linux/netdevice.h>
@@ -28,7 +28,9 @@
 #include <linux/string.h>
 
 #include "gateway_client.h"
+#include "log.h"
 #include "packet.h"
+#include "tvlv.h"
 
 /**
  * batadv_parse_throughput - parse supplied string buffer to extract throughput
@@ -144,7 +146,7 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
 	u32 down, up;
 	char gw_mode;
 
-	gw_mode = atomic_read(&bat_priv->gw_mode);
+	gw_mode = atomic_read(&bat_priv->gw.mode);
 
 	switch (gw_mode) {
 	case BATADV_GW_MODE_OFF:
@@ -241,8 +243,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 
 	/* restart gateway selection if fast or late switching was enabled */
 	if ((gateway.bandwidth_down != 0) &&
-	    (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) &&
-	    (atomic_read(&bat_priv->gw_sel_class) > 2))
+	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) &&
+	    (atomic_read(&bat_priv->gw.sel_class) > 2))
 		batadv_gw_check_election(bat_priv, orig);
 }
 
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 8c2f39962fa5..1f9080840566 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -23,9 +23,9 @@
 #include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/if.h>
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
-#include <linux/if.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
@@ -37,10 +37,12 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
+#include "bat_v.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "send.h"
@@ -245,7 +247,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
 	if (!new_hard_iface)
 		goto out;
 
-	bat_priv->bat_algo_ops->bat_primary_iface_set(new_hard_iface);
+	bat_priv->algo_ops->iface.primary_set(new_hard_iface);
 	batadv_primary_if_update_addr(bat_priv, curr_hard_iface);
 
 out:
@@ -392,7 +394,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
 
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 
-	bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface);
+	bat_priv->algo_ops->iface.update_mac(hard_iface);
 	hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED;
 
 	/* the first active interface becomes our primary interface or
@@ -407,8 +409,8 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
 
 	batadv_update_min_mtu(hard_iface->soft_iface);
 
-	if (bat_priv->bat_algo_ops->bat_iface_activate)
-		bat_priv->bat_algo_ops->bat_iface_activate(hard_iface);
+	if (bat_priv->algo_ops->iface.activate)
+		bat_priv->algo_ops->iface.activate(hard_iface);
 
 out:
 	if (primary_if)
@@ -506,7 +508,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	if (ret)
 		goto err_dev;
 
-	ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
+	ret = bat_priv->algo_ops->iface.enable(hard_iface);
 	if (ret < 0)
 		goto err_upper;
 
@@ -515,7 +517,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	hard_iface->if_status = BATADV_IF_INACTIVE;
 	ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
 	if (ret < 0) {
-		bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+		bat_priv->algo_ops->iface.disable(hard_iface);
 		bat_priv->num_ifaces--;
 		hard_iface->if_status = BATADV_IF_NOT_IN_USE;
 		goto err_upper;
@@ -553,9 +555,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 
 	batadv_hardif_recalc_extra_skbroom(soft_iface);
 
-	/* begin scheduling originator messages on that interface */
-	batadv_schedule_bat_ogm(hard_iface);
-
 out:
 	return 0;
 
@@ -599,7 +598,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
 			batadv_hardif_put(new_if);
 	}
 
-	bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+	bat_priv->algo_ops->iface.disable(hard_iface);
 	hard_iface->if_status = BATADV_IF_NOT_IN_USE;
 
 	/* delete all references to this hard_iface */
@@ -686,6 +685,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 	if (batadv_is_wifi_netdev(net_dev))
 		hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
 
+	batadv_v_hardif_init(hard_iface);
+
 	/* extra reference for return */
 	kref_init(&hard_iface->refcount);
 	kref_get(&hard_iface->refcount);
@@ -782,7 +783,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
 		batadv_check_known_mac_addr(hard_iface->net_dev);
 
 		bat_priv = netdev_priv(hard_iface->soft_iface);
-		bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface);
+		bat_priv->algo_ops->iface.update_mac(hard_iface);
 
 		primary_if = batadv_primary_if_get_selected(bat_priv);
 		if (!primary_if)
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 777aea10cd8f..378cc1119d66 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -45,6 +45,7 @@
 #include <linux/wait.h>
 
 #include "hard-interface.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "send.h"
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
new file mode 100644
index 000000000000..56dc532f7a2c
--- /dev/null
+++ b/net/batman-adv/log.c
@@ -0,0 +1,231 @@
+/* Copyright (C) 2010-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "log.h"
+#include "main.h"
+
+#include <linux/compiler.h>
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/sched.h> /* for linux/wait.h */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+#include <stdarg.h>
+
+#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
+
+static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
+
+static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
+				  size_t idx)
+{
+	return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
+}
+
+static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
+				 char c)
+{
+	char *char_addr;
+
+	char_addr = batadv_log_char_addr(debug_log, debug_log->log_end);
+	*char_addr = c;
+	debug_log->log_end++;
+
+	if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len)
+		debug_log->log_start = debug_log->log_end - batadv_log_buff_len;
+}
+
+__printf(2, 3)
+static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
+			     const char *fmt, ...)
+{
+	va_list args;
+	static char debug_log_buf[256];
+	char *p;
+
+	if (!debug_log)
+		return 0;
+
+	spin_lock_bh(&debug_log->lock);
+	va_start(args, fmt);
+	vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
+	va_end(args);
+
+	for (p = debug_log_buf; *p != 0; p++)
+		batadv_emit_log_char(debug_log, *p);
+
+	spin_unlock_bh(&debug_log->lock);
+
+	wake_up(&debug_log->queue_wait);
+
+	return 0;
+}
+
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+{
+	va_list args;
+	char tmp_log_buf[256];
+
+	va_start(args, fmt);
+	vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
+	batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
+			  jiffies_to_msecs(jiffies), tmp_log_buf);
+	va_end(args);
+
+	return 0;
+}
+
+static int batadv_log_open(struct inode *inode, struct file *file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	nonseekable_open(inode, file);
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static int batadv_log_release(struct inode *inode, struct file *file)
+{
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
+{
+	return !(debug_log->log_start - debug_log->log_end);
+}
+
+static ssize_t batadv_log_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct batadv_priv *bat_priv = file->private_data;
+	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
+	int error, i = 0;
+	char *char_addr;
+	char c;
+
+	if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log))
+		return -EAGAIN;
+
+	if (!buf)
+		return -EINVAL;
+
+	if (count == 0)
+		return 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	error = wait_event_interruptible(debug_log->queue_wait,
+					 (!batadv_log_empty(debug_log)));
+
+	if (error)
+		return error;
+
+	spin_lock_bh(&debug_log->lock);
+
+	while ((!error) && (i < count) &&
+	       (debug_log->log_start != debug_log->log_end)) {
+		char_addr = batadv_log_char_addr(debug_log,
+						 debug_log->log_start);
+		c = *char_addr;
+
+		debug_log->log_start++;
+
+		spin_unlock_bh(&debug_log->lock);
+
+		error = __put_user(c, buf);
+
+		spin_lock_bh(&debug_log->lock);
+
+		buf++;
+		i++;
+	}
+
+	spin_unlock_bh(&debug_log->lock);
+
+	if (!error)
+		return i;
+
+	return error;
+}
+
+static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
+{
+	struct batadv_priv *bat_priv = file->private_data;
+	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
+
+	poll_wait(file, &debug_log->queue_wait, wait);
+
+	if (!batadv_log_empty(debug_log))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static const struct file_operations batadv_log_fops = {
+	.open           = batadv_log_open,
+	.release        = batadv_log_release,
+	.read           = batadv_log_read,
+	.poll           = batadv_log_poll,
+	.llseek         = no_llseek,
+};
+
+int batadv_debug_log_setup(struct batadv_priv *bat_priv)
+{
+	struct dentry *d;
+
+	if (!bat_priv->debug_dir)
+		goto err;
+
+	bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
+	if (!bat_priv->debug_log)
+		goto err;
+
+	spin_lock_init(&bat_priv->debug_log->lock);
+	init_waitqueue_head(&bat_priv->debug_log->queue_wait);
+
+	d = debugfs_create_file("log", S_IFREG | S_IRUSR,
+				bat_priv->debug_dir, bat_priv,
+				&batadv_log_fops);
+	if (!d)
+		goto err;
+
+	return 0;
+
+err:
+	return -ENOMEM;
+}
+
+void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
+{
+	kfree(bat_priv->debug_log);
+	bat_priv->debug_log = NULL;
+}
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
new file mode 100644
index 000000000000..e0e1a88c3e58
--- /dev/null
+++ b/net/batman-adv/log.h
@@ -0,0 +1,111 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_LOG_H_
+#define _NET_BATMAN_ADV_LOG_H_
+
+#include "main.h"
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/printk.h>
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+
+int batadv_debug_log_setup(struct batadv_priv *bat_priv);
+void batadv_debug_log_cleanup(struct batadv_priv *bat_priv);
+
+#else
+
+static inline int batadv_debug_log_setup(struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
+{
+}
+
+#endif
+
+/**
+ * enum batadv_dbg_level - available log levels
+ * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
+ * @BATADV_DBG_ROUTES: route added / changed / deleted
+ * @BATADV_DBG_TT: translation table messages
+ * @BATADV_DBG_BLA: bridge loop avoidance messages
+ * @BATADV_DBG_DAT: ARP snooping and DAT related messages
+ * @BATADV_DBG_NC: network coding related messages
+ * @BATADV_DBG_MCAST: multicast related messages
+ * @BATADV_DBG_TP_METER: throughput meter messages
+ * @BATADV_DBG_ALL: the union of all the above log levels
+ */
+enum batadv_dbg_level {
+	BATADV_DBG_BATMAN	= BIT(0),
+	BATADV_DBG_ROUTES	= BIT(1),
+	BATADV_DBG_TT		= BIT(2),
+	BATADV_DBG_BLA		= BIT(3),
+	BATADV_DBG_DAT		= BIT(4),
+	BATADV_DBG_NC		= BIT(5),
+	BATADV_DBG_MCAST	= BIT(6),
+	BATADV_DBG_TP_METER	= BIT(7),
+	BATADV_DBG_ALL		= 127,
+};
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+__printf(2, 3);
+
+/* possibly ratelimited debug output */
+#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...)	\
+	do {							\
+		if (atomic_read(&bat_priv->log_level) & type && \
+		    (!ratelimited || net_ratelimit()))		\
+			batadv_debug_log(bat_priv, fmt, ## arg);\
+	}							\
+	while (0)
+#else /* !CONFIG_BATMAN_ADV_DEBUG */
+__printf(4, 5)
+static inline void _batadv_dbg(int type __always_unused,
+			       struct batadv_priv *bat_priv __always_unused,
+			       int ratelimited __always_unused,
+			       const char *fmt __always_unused, ...)
+{
+}
+#endif
+
+#define batadv_dbg(type, bat_priv, arg...) \
+	_batadv_dbg(type, bat_priv, 0, ## arg)
+#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
+	_batadv_dbg(type, bat_priv, 1, ## arg)
+
+#define batadv_info(net_dev, fmt, arg...)				\
+	do {								\
+		struct net_device *_netdev = (net_dev);                 \
+		struct batadv_priv *_batpriv = netdev_priv(_netdev);    \
+		batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg);	\
+		pr_info("%s: " fmt, _netdev->name, ## arg);		\
+	} while (0)
+#define batadv_err(net_dev, fmt, arg...)				\
+	do {								\
+		struct net_device *_netdev = (net_dev);                 \
+		struct batadv_priv *_batpriv = netdev_priv(_netdev);    \
+		batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg);	\
+		pr_err("%s: " fmt, _netdev->name, ## arg);		\
+	} while (0)
+
+#endif /* _NET_BATMAN_ADV_LOG_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5f2974bd1227..fe4c5e29f96b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -31,16 +31,13 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
-#include <linux/lockdep.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/netdevice.h>
-#include <linux/pkt_sched.h>
+#include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/skbuff.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
@@ -49,6 +46,8 @@
 #include <net/rtnetlink.h>
 
 #include "bat_algo.h"
+#include "bat_iv_ogm.h"
+#include "bat_v.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
@@ -56,13 +55,16 @@
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "icmp_socket.h"
+#include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
 #include "send.h"
 #include "soft-interface.h"
+#include "tp_meter.h"
 #include "translation-table.h"
 
 /* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -71,8 +73,6 @@
 struct list_head batadv_hardif_list;
 static int (*batadv_rx_handler[256])(struct sk_buff *,
 				     struct batadv_hard_iface *);
-char batadv_routing_algo[20] = "BATMAN_IV";
-static struct hlist_head batadv_algo_list;
 
 unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
@@ -83,13 +83,14 @@ static void batadv_recv_handler_init(void);
 static int __init batadv_init(void)
 {
 	INIT_LIST_HEAD(&batadv_hardif_list);
-	INIT_HLIST_HEAD(&batadv_algo_list);
+	batadv_algo_init();
 
 	batadv_recv_handler_init();
 
 	batadv_v_init();
 	batadv_iv_init();
 	batadv_nc_init();
+	batadv_tp_meter_init();
 
 	batadv_event_workqueue = create_singlethread_workqueue("bat_events");
 
@@ -101,6 +102,7 @@ static int __init batadv_init(void)
 
 	register_netdevice_notifier(&batadv_hard_if_notifier);
 	rtnl_link_register(&batadv_link_ops);
+	batadv_netlink_register();
 
 	pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
 		BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -111,6 +113,7 @@ static int __init batadv_init(void)
 static void __exit batadv_exit(void)
 {
 	batadv_debugfs_destroy();
+	batadv_netlink_unregister();
 	rtnl_link_unregister(&batadv_link_ops);
 	unregister_netdevice_notifier(&batadv_hard_if_notifier);
 	batadv_hardif_remove_interfaces();
@@ -141,6 +144,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
 	spin_lock_init(&bat_priv->tvlv.container_list_lock);
 	spin_lock_init(&bat_priv->tvlv.handler_list_lock);
 	spin_lock_init(&bat_priv->softif_vlan_list_lock);
+	spin_lock_init(&bat_priv->tp_list_lock);
 
 	INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
 	INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
@@ -159,6 +163,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
 	INIT_HLIST_HEAD(&bat_priv->tvlv.container_list);
 	INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
 	INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
+	INIT_HLIST_HEAD(&bat_priv->tp_list);
 
 	ret = batadv_v_mesh_init(bat_priv);
 	if (ret < 0)
@@ -538,78 +543,6 @@ void batadv_recv_handler_unregister(u8 packet_type)
 	batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet;
 }
 
-static struct batadv_algo_ops *batadv_algo_get(char *name)
-{
-	struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
-
-	hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
-		if (strcmp(bat_algo_ops_tmp->name, name) != 0)
-			continue;
-
-		bat_algo_ops = bat_algo_ops_tmp;
-		break;
-	}
-
-	return bat_algo_ops;
-}
-
-int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
-{
-	struct batadv_algo_ops *bat_algo_ops_tmp;
-
-	bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
-	if (bat_algo_ops_tmp) {
-		pr_info("Trying to register already registered routing algorithm: %s\n",
-			bat_algo_ops->name);
-		return -EEXIST;
-	}
-
-	/* all algorithms must implement all ops (for now) */
-	if (!bat_algo_ops->bat_iface_enable ||
-	    !bat_algo_ops->bat_iface_disable ||
-	    !bat_algo_ops->bat_iface_update_mac ||
-	    !bat_algo_ops->bat_primary_iface_set ||
-	    !bat_algo_ops->bat_ogm_schedule ||
-	    !bat_algo_ops->bat_ogm_emit ||
-	    !bat_algo_ops->bat_neigh_cmp ||
-	    !bat_algo_ops->bat_neigh_is_similar_or_better) {
-		pr_info("Routing algo '%s' does not implement required ops\n",
-			bat_algo_ops->name);
-		return -EINVAL;
-	}
-
-	INIT_HLIST_NODE(&bat_algo_ops->list);
-	hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
-
-	return 0;
-}
-
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
-{
-	struct batadv_algo_ops *bat_algo_ops;
-
-	bat_algo_ops = batadv_algo_get(name);
-	if (!bat_algo_ops)
-		return -EINVAL;
-
-	bat_priv->bat_algo_ops = bat_algo_ops;
-
-	return 0;
-}
-
-int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
-{
-	struct batadv_algo_ops *bat_algo_ops;
-
-	seq_puts(seq, "Available routing algorithms:\n");
-
-	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
-		seq_printf(seq, " * %s\n", bat_algo_ops->name);
-	}
-
-	return 0;
-}
-
 /**
  * batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in
  *  the header
@@ -643,594 +576,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
 	return htonl(crc);
 }
 
-/**
- * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
- *  free after rcu grace period
- * @ref: kref pointer of the tvlv
- */
-static void batadv_tvlv_handler_release(struct kref *ref)
-{
-	struct batadv_tvlv_handler *tvlv_handler;
-
-	tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
-	kfree_rcu(tvlv_handler, rcu);
-}
-
-/**
- * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
- *  possibly release it
- * @tvlv_handler: the tvlv handler to free
- */
-static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
-{
-	kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
-}
-
-/**
- * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
- *  based on the provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv handler type to look for
- * @version: tvlv handler version to look for
- *
- * Return: tvlv handler if found or NULL otherwise.
- */
-static struct batadv_tvlv_handler *
-batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
-{
-	struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(tvlv_handler_tmp,
-				 &bat_priv->tvlv.handler_list, list) {
-		if (tvlv_handler_tmp->type != type)
-			continue;
-
-		if (tvlv_handler_tmp->version != version)
-			continue;
-
-		if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
-			continue;
-
-		tvlv_handler = tvlv_handler_tmp;
-		break;
-	}
-	rcu_read_unlock();
-
-	return tvlv_handler;
-}
-
-/**
- * batadv_tvlv_container_release - release tvlv from lists and free
- * @ref: kref pointer of the tvlv
- */
-static void batadv_tvlv_container_release(struct kref *ref)
-{
-	struct batadv_tvlv_container *tvlv;
-
-	tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
-	kfree(tvlv);
-}
-
-/**
- * batadv_tvlv_container_put - decrement the tvlv container refcounter and
- *  possibly release it
- * @tvlv: the tvlv container to free
- */
-static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
-{
-	kref_put(&tvlv->refcount, batadv_tvlv_container_release);
-}
-
-/**
- * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
- *  list based on the provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type to look for
- * @version: tvlv container version to look for
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- *
- * Return: tvlv container if found or NULL otherwise.
- */
-static struct batadv_tvlv_container *
-batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
-{
-	struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
-
-	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
-	hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
-		if (tvlv_tmp->tvlv_hdr.type != type)
-			continue;
-
-		if (tvlv_tmp->tvlv_hdr.version != version)
-			continue;
-
-		kref_get(&tvlv_tmp->refcount);
-		tvlv = tvlv_tmp;
-		break;
-	}
-
-	return tvlv;
-}
-
-/**
- * batadv_tvlv_container_list_size - calculate the size of the tvlv container
- *  list entries
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- *
- * Return: size of all currently registered tvlv containers in bytes.
- */
-static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
-{
-	struct batadv_tvlv_container *tvlv;
-	u16 tvlv_len = 0;
-
-	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
-	hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
-		tvlv_len += sizeof(struct batadv_tvlv_hdr);
-		tvlv_len += ntohs(tvlv->tvlv_hdr.len);
-	}
-
-	return tvlv_len;
-}
-
-/**
- * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
- *  list
- * @bat_priv: the bat priv with all the soft interface information
- * @tvlv: the to be removed tvlv container
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- */
-static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
-					 struct batadv_tvlv_container *tvlv)
-{
-	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
-	if (!tvlv)
-		return;
-
-	hlist_del(&tvlv->list);
-
-	/* first call to decrement the counter, second call to free */
-	batadv_tvlv_container_put(tvlv);
-	batadv_tvlv_container_put(tvlv);
-}
-
-/**
- * batadv_tvlv_container_unregister - unregister tvlv container based on the
- *  provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type to unregister
- * @version: tvlv container type to unregister
- */
-void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
-				      u8 type, u8 version)
-{
-	struct batadv_tvlv_container *tvlv;
-
-	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
-	tvlv = batadv_tvlv_container_get(bat_priv, type, version);
-	batadv_tvlv_container_remove(bat_priv, tvlv);
-	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-}
-
-/**
- * batadv_tvlv_container_register - register tvlv type, version and content
- *  to be propagated with each (primary interface) OGM
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type
- * @version: tvlv container version
- * @tvlv_value: tvlv container content
- * @tvlv_value_len: tvlv container content length
- *
- * If a container of the same type and version was already registered the new
- * content is going to replace the old one.
- */
-void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
-				    u8 type, u8 version,
-				    void *tvlv_value, u16 tvlv_value_len)
-{
-	struct batadv_tvlv_container *tvlv_old, *tvlv_new;
-
-	if (!tvlv_value)
-		tvlv_value_len = 0;
-
-	tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
-	if (!tvlv_new)
-		return;
-
-	tvlv_new->tvlv_hdr.version = version;
-	tvlv_new->tvlv_hdr.type = type;
-	tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
-
-	memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
-	INIT_HLIST_NODE(&tvlv_new->list);
-	kref_init(&tvlv_new->refcount);
-
-	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
-	tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
-	batadv_tvlv_container_remove(bat_priv, tvlv_old);
-	hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
-	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-}
-
-/**
- * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
- *  requested packet size
- * @packet_buff: packet buffer
- * @packet_buff_len: packet buffer size
- * @min_packet_len: requested packet minimum size
- * @additional_packet_len: requested additional packet size on top of minimum
- *  size
- *
- * Return: true of the packet buffer could be changed to the requested size,
- * false otherwise.
- */
-static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
-					    int *packet_buff_len,
-					    int min_packet_len,
-					    int additional_packet_len)
-{
-	unsigned char *new_buff;
-
-	new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
-
-	/* keep old buffer if kmalloc should fail */
-	if (!new_buff)
-		return false;
-
-	memcpy(new_buff, *packet_buff, min_packet_len);
-	kfree(*packet_buff);
-	*packet_buff = new_buff;
-	*packet_buff_len = min_packet_len + additional_packet_len;
-
-	return true;
-}
-
-/**
- * batadv_tvlv_container_ogm_append - append tvlv container content to given
- *  OGM packet buffer
- * @bat_priv: the bat priv with all the soft interface information
- * @packet_buff: ogm packet buffer
- * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
- *  content
- * @packet_min_len: ogm header size to be preserved for the OGM itself
- *
- * The ogm packet might be enlarged or shrunk depending on the current size
- * and the size of the to-be-appended tvlv containers.
- *
- * Return: size of all appended tvlv containers in bytes.
- */
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
-				     unsigned char **packet_buff,
-				     int *packet_buff_len, int packet_min_len)
-{
-	struct batadv_tvlv_container *tvlv;
-	struct batadv_tvlv_hdr *tvlv_hdr;
-	u16 tvlv_value_len;
-	void *tvlv_value;
-	bool ret;
-
-	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
-	tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
-
-	ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
-					      packet_min_len, tvlv_value_len);
-
-	if (!ret)
-		goto end;
-
-	if (!tvlv_value_len)
-		goto end;
-
-	tvlv_value = (*packet_buff) + packet_min_len;
-
-	hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
-		tvlv_hdr = tvlv_value;
-		tvlv_hdr->type = tvlv->tvlv_hdr.type;
-		tvlv_hdr->version = tvlv->tvlv_hdr.version;
-		tvlv_hdr->len = tvlv->tvlv_hdr.len;
-		tvlv_value = tvlv_hdr + 1;
-		memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
-		tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
-	}
-
-end:
-	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-	return tvlv_value_len;
-}
-
-/**
- * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
- *  appropriate handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @tvlv_handler: tvlv callback function handling the tvlv content
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
- * @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- *
- * Return: success if handler was not found or the return value of the handler
- * callback.
- */
-static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
-				    struct batadv_tvlv_handler *tvlv_handler,
-				    bool ogm_source,
-				    struct batadv_orig_node *orig_node,
-				    u8 *src, u8 *dst,
-				    void *tvlv_value, u16 tvlv_value_len)
-{
-	if (!tvlv_handler)
-		return NET_RX_SUCCESS;
-
-	if (ogm_source) {
-		if (!tvlv_handler->ogm_handler)
-			return NET_RX_SUCCESS;
-
-		if (!orig_node)
-			return NET_RX_SUCCESS;
-
-		tvlv_handler->ogm_handler(bat_priv, orig_node,
-					  BATADV_NO_FLAGS,
-					  tvlv_value, tvlv_value_len);
-		tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
-	} else {
-		if (!src)
-			return NET_RX_SUCCESS;
-
-		if (!dst)
-			return NET_RX_SUCCESS;
-
-		if (!tvlv_handler->unicast_handler)
-			return NET_RX_SUCCESS;
-
-		return tvlv_handler->unicast_handler(bat_priv, src,
-						     dst, tvlv_value,
-						     tvlv_value_len);
-	}
-
-	return NET_RX_SUCCESS;
-}
-
-/**
- * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
- *  appropriate handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
- * @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- *
- * Return: success when processing an OGM or the return value of all called
- * handler callbacks.
- */
-int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
-				   bool ogm_source,
-				   struct batadv_orig_node *orig_node,
-				   u8 *src, u8 *dst,
-				   void *tvlv_value, u16 tvlv_value_len)
-{
-	struct batadv_tvlv_handler *tvlv_handler;
-	struct batadv_tvlv_hdr *tvlv_hdr;
-	u16 tvlv_value_cont_len;
-	u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
-	int ret = NET_RX_SUCCESS;
-
-	while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
-		tvlv_hdr = tvlv_value;
-		tvlv_value_cont_len = ntohs(tvlv_hdr->len);
-		tvlv_value = tvlv_hdr + 1;
-		tvlv_value_len -= sizeof(*tvlv_hdr);
-
-		if (tvlv_value_cont_len > tvlv_value_len)
-			break;
-
-		tvlv_handler = batadv_tvlv_handler_get(bat_priv,
-						       tvlv_hdr->type,
-						       tvlv_hdr->version);
-
-		ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
-						ogm_source, orig_node,
-						src, dst, tvlv_value,
-						tvlv_value_cont_len);
-		if (tvlv_handler)
-			batadv_tvlv_handler_put(tvlv_handler);
-		tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
-		tvlv_value_len -= tvlv_value_cont_len;
-	}
-
-	if (!ogm_source)
-		return ret;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(tvlv_handler,
-				 &bat_priv->tvlv.handler_list, list) {
-		if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
-		    !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
-			tvlv_handler->ogm_handler(bat_priv, orig_node,
-						  cifnotfound, NULL, 0);
-
-		tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
-	}
-	rcu_read_unlock();
-
-	return NET_RX_SUCCESS;
-}
-
-/**
- * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
- *  handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @batadv_ogm_packet: ogm packet containing the tvlv containers
- * @orig_node: orig node emitting the ogm packet
- */
-void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
-			     struct batadv_ogm_packet *batadv_ogm_packet,
-			     struct batadv_orig_node *orig_node)
-{
-	void *tvlv_value;
-	u16 tvlv_value_len;
-
-	if (!batadv_ogm_packet)
-		return;
-
-	tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
-	if (!tvlv_value_len)
-		return;
-
-	tvlv_value = batadv_ogm_packet + 1;
-
-	batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
-				       tvlv_value, tvlv_value_len);
-}
-
-/**
- * batadv_tvlv_handler_register - register tvlv handler based on the provided
- *  type and version (both need to match) for ogm tvlv payload and/or unicast
- *  payload
- * @bat_priv: the bat priv with all the soft interface information
- * @optr: ogm tvlv handler callback function. This function receives the orig
- *  node, flags and the tvlv content as argument to process.
- * @uptr: unicast tvlv handler callback function. This function receives the
- *  source & destination of the unicast packet as well as the tvlv content
- *  to process.
- * @type: tvlv handler type to be registered
- * @version: tvlv handler version to be registered
- * @flags: flags to enable or disable TVLV API behavior
- */
-void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
-				  void (*optr)(struct batadv_priv *bat_priv,
-					       struct batadv_orig_node *orig,
-					       u8 flags,
-					       void *tvlv_value,
-					       u16 tvlv_value_len),
-				  int (*uptr)(struct batadv_priv *bat_priv,
-					      u8 *src, u8 *dst,
-					      void *tvlv_value,
-					      u16 tvlv_value_len),
-				  u8 type, u8 version, u8 flags)
-{
-	struct batadv_tvlv_handler *tvlv_handler;
-
-	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
-	if (tvlv_handler) {
-		batadv_tvlv_handler_put(tvlv_handler);
-		return;
-	}
-
-	tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
-	if (!tvlv_handler)
-		return;
-
-	tvlv_handler->ogm_handler = optr;
-	tvlv_handler->unicast_handler = uptr;
-	tvlv_handler->type = type;
-	tvlv_handler->version = version;
-	tvlv_handler->flags = flags;
-	kref_init(&tvlv_handler->refcount);
-	INIT_HLIST_NODE(&tvlv_handler->list);
-
-	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
-	hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
-	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
-}
-
-/**
- * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
- *  provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv handler type to be unregistered
- * @version: tvlv handler version to be unregistered
- */
-void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
-				    u8 type, u8 version)
-{
-	struct batadv_tvlv_handler *tvlv_handler;
-
-	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
-	if (!tvlv_handler)
-		return;
-
-	batadv_tvlv_handler_put(tvlv_handler);
-	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
-	hlist_del_rcu(&tvlv_handler->list);
-	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
-	batadv_tvlv_handler_put(tvlv_handler);
-}
-
-/**
- * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
- *  specified host
- * @bat_priv: the bat priv with all the soft interface information
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @type: tvlv type
- * @version: tvlv version
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- */
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
-			      u8 *dst, u8 type, u8 version,
-			      void *tvlv_value, u16 tvlv_value_len)
-{
-	struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
-	struct batadv_tvlv_hdr *tvlv_hdr;
-	struct batadv_orig_node *orig_node;
-	struct sk_buff *skb;
-	unsigned char *tvlv_buff;
-	unsigned int tvlv_len;
-	ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
-
-	orig_node = batadv_orig_hash_find(bat_priv, dst);
-	if (!orig_node)
-		return;
-
-	tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
-
-	skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
-	if (!skb)
-		goto out;
-
-	skb->priority = TC_PRIO_CONTROL;
-	skb_reserve(skb, ETH_HLEN);
-	tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
-	unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
-	unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV;
-	unicast_tvlv_packet->version = BATADV_COMPAT_VERSION;
-	unicast_tvlv_packet->ttl = BATADV_TTL;
-	unicast_tvlv_packet->reserved = 0;
-	unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
-	unicast_tvlv_packet->align = 0;
-	ether_addr_copy(unicast_tvlv_packet->src, src);
-	ether_addr_copy(unicast_tvlv_packet->dst, dst);
-
-	tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
-	tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
-	tvlv_hdr->version = version;
-	tvlv_hdr->type = type;
-	tvlv_hdr->len = htons(tvlv_value_len);
-	tvlv_buff += sizeof(*tvlv_hdr);
-	memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
-
-	if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
-		kfree_skb(skb);
-out:
-	batadv_orig_node_put(orig_node);
-}
-
 /**
  * batadv_get_vid - extract the VLAN identifier from skb if any
  * @skb: the buffer containing the packet
@@ -1284,36 +629,6 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
 	return ap_isolation_enabled;
 }
 
-static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
-{
-	struct batadv_algo_ops *bat_algo_ops;
-	char *algo_name = (char *)val;
-	size_t name_len = strlen(algo_name);
-
-	if (name_len > 0 && algo_name[name_len - 1] == '\n')
-		algo_name[name_len - 1] = '\0';
-
-	bat_algo_ops = batadv_algo_get(algo_name);
-	if (!bat_algo_ops) {
-		pr_err("Routing algorithm '%s' is not supported\n", algo_name);
-		return -EINVAL;
-	}
-
-	return param_set_copystring(algo_name, kp);
-}
-
-static const struct kernel_param_ops batadv_param_ops_ra = {
-	.set = batadv_param_set_ra,
-	.get = param_get_string,
-};
-
-static struct kparam_string batadv_param_string_ra = {
-	.maxlen = sizeof(batadv_routing_algo),
-	.string = batadv_routing_algo,
-};
-
-module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
-		0644);
 module_init(batadv_init);
 module_exit(batadv_exit);
 
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 76925266deed..06a860845434 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.2"
+#define BATADV_SOURCE_VERSION "2016.3"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -100,6 +100,9 @@
 #define BATADV_NUM_BCASTS_WIRELESS 3
 #define BATADV_NUM_BCASTS_MAX 3
 
+/* length of the single packet used by the TP meter */
+#define BATADV_TP_PACKET_LEN ETH_DATA_LEN
+
 /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */
 #define ARP_REQ_DELAY 250
 /* numbers of originator to contact for any PUT/GET DHT operation */
@@ -131,6 +134,11 @@
 
 #define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */
 
+/**
+ * BATADV_TP_MAX_NUM - maximum number of simultaneously active tp sessions
+ */
+#define BATADV_TP_MAX_NUM 5
+
 enum batadv_mesh_state {
 	BATADV_MESH_INACTIVE,
 	BATADV_MESH_ACTIVE,
@@ -175,29 +183,26 @@ enum batadv_uev_type {
 
 /* Kernel headers */
 
-#include <linux/atomic.h>
 #include <linux/bitops.h> /* for packet.h */
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h> /* for packet.h */
-#include <linux/netdevice.h>
-#include <linux/printk.h>
-#include <linux/types.h>
-#include <linux/percpu.h>
-#include <linux/jiffies.h>
 #include <linux/if_vlan.h>
+#include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
 
 #include "types.h"
 
-struct batadv_ogm_packet;
+struct net_device;
+struct packet_type;
 struct seq_file;
 struct sk_buff;
 
 #define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \
 			       (int)(vid & VLAN_VID_MASK) : -1)
 
-extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
 
 extern unsigned char batadv_broadcast_addr[];
@@ -218,73 +223,8 @@ batadv_recv_handler_register(u8 packet_type,
 			     int (*recv_handler)(struct sk_buff *,
 						 struct batadv_hard_iface *));
 void batadv_recv_handler_unregister(u8 packet_type);
-int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
-int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
 __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
 
-/**
- * enum batadv_dbg_level - available log levels
- * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
- * @BATADV_DBG_ROUTES: route added / changed / deleted
- * @BATADV_DBG_TT: translation table messages
- * @BATADV_DBG_BLA: bridge loop avoidance messages
- * @BATADV_DBG_DAT: ARP snooping and DAT related messages
- * @BATADV_DBG_NC: network coding related messages
- * @BATADV_DBG_ALL: the union of all the above log levels
- */
-enum batadv_dbg_level {
-	BATADV_DBG_BATMAN = BIT(0),
-	BATADV_DBG_ROUTES = BIT(1),
-	BATADV_DBG_TT	  = BIT(2),
-	BATADV_DBG_BLA    = BIT(3),
-	BATADV_DBG_DAT    = BIT(4),
-	BATADV_DBG_NC	  = BIT(5),
-	BATADV_DBG_ALL    = 63,
-};
-
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-__printf(2, 3);
-
-/* possibly ratelimited debug output */
-#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...)	\
-	do {							\
-		if (atomic_read(&bat_priv->log_level) & type && \
-		    (!ratelimited || net_ratelimit()))		\
-			batadv_debug_log(bat_priv, fmt, ## arg);\
-	}							\
-	while (0)
-#else /* !CONFIG_BATMAN_ADV_DEBUG */
-__printf(4, 5)
-static inline void _batadv_dbg(int type __always_unused,
-			       struct batadv_priv *bat_priv __always_unused,
-			       int ratelimited __always_unused,
-			       const char *fmt __always_unused, ...)
-{
-}
-#endif
-
-#define batadv_dbg(type, bat_priv, arg...) \
-	_batadv_dbg(type, bat_priv, 0, ## arg)
-#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
-	_batadv_dbg(type, bat_priv, 1, ## arg)
-
-#define batadv_info(net_dev, fmt, arg...)				\
-	do {								\
-		struct net_device *_netdev = (net_dev);                 \
-		struct batadv_priv *_batpriv = netdev_priv(_netdev);    \
-		batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg);	\
-		pr_info("%s: " fmt, _netdev->name, ## arg);		\
-	} while (0)
-#define batadv_err(net_dev, fmt, arg...)				\
-	do {								\
-		struct net_device *_netdev = (net_dev);                 \
-		struct batadv_priv *_batpriv = netdev_priv(_netdev);    \
-		batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg);	\
-		pr_err("%s: " fmt, _netdev->name, ## arg);		\
-	} while (0)
-
 /**
  * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses
  * @data1: Pointer to a six-byte array containing the Ethernet address
@@ -370,39 +310,6 @@ static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv,  size_t idx)
  */
 #define BATADV_SKB_CB(__skb)       ((struct batadv_skb_cb *)&((__skb)->cb[0]))
 
-void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
-				    u8 type, u8 version,
-				    void *tvlv_value, u16 tvlv_value_len);
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
-				     unsigned char **packet_buff,
-				     int *packet_buff_len, int packet_min_len);
-void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
-			     struct batadv_ogm_packet *batadv_ogm_packet,
-			     struct batadv_orig_node *orig_node);
-void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
-				      u8 type, u8 version);
-
-void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
-				  void (*optr)(struct batadv_priv *bat_priv,
-					       struct batadv_orig_node *orig,
-					       u8 flags,
-					       void *tvlv_value,
-					       u16 tvlv_value_len),
-				  int (*uptr)(struct batadv_priv *bat_priv,
-					      u8 *src, u8 *dst,
-					      void *tvlv_value,
-					      u16 tvlv_value_len),
-				  u8 type, u8 version, u8 flags);
-void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
-				    u8 type, u8 version);
-int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
-				   bool ogm_source,
-				   struct batadv_orig_node *orig_node,
-				   u8 *src, u8 *dst,
-				   void *tvlv_buff, u16 tvlv_buff_len);
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
-			      u8 *dst, u8 type, u8 version,
-			      void *tvlv_value, u16 tvlv_value_len);
 unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len);
 bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid);
 
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index c32f24fafe67..cc915073a753 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -25,17 +25,23 @@
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
+#include <linux/icmpv6.h>
+#include <linux/if_bridge.h>
 #include <linux/if_ether.h>
-#include <linux/in6.h>
+#include <linux/igmp.h>
 #include <linux/in.h>
+#include <linux/in6.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -43,18 +49,57 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <net/addrconf.h>
+#include <net/if_inet6.h>
+#include <net/ip.h>
 #include <net/ipv6.h>
 
+#include "hard-interface.h"
+#include "hash.h"
+#include "log.h"
 #include "packet.h"
 #include "translation-table.h"
+#include "tvlv.h"
+
+/**
+ * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists
+ * @soft_iface: netdev struct of the mesh interface
+ *
+ * If the given soft interface has a bridge on top then the refcount
+ * of the according net device is increased.
+ *
+ * Return: NULL if no such bridge exists. Otherwise the net device of the
+ * bridge.
+ */
+static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
+{
+	struct net_device *upper = soft_iface;
+
+	rcu_read_lock();
+	do {
+		upper = netdev_master_upper_dev_get_rcu(upper);
+	} while (upper && !(upper->priv_flags & IFF_EBRIDGE));
+
+	if (upper)
+		dev_hold(upper);
+	rcu_read_unlock();
+
+	return upper;
+}
 
 /**
  * batadv_mcast_mla_softif_get - get softif multicast listeners
  * @dev: the device to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
- * Collect multicast addresses of the local multicast listeners
- * on the given soft interface, dev, in the given mcast_list.
+ * Collects multicast addresses of multicast listeners residing
+ * on this kernel on the given soft interface, dev, in
+ * the given mcast_list. In general, multicast listeners provided by
+ * your multicast receiving applications run directly on this node.
+ *
+ * If there is a bridge interface on top of dev, collects from that one
+ * instead. Just like with IP addresses and routes, multicast listeners
+ * will(/should) register to the bridge interface instead of an
+ * enslaved bat0.
  *
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
@@ -62,12 +107,13 @@
 static int batadv_mcast_mla_softif_get(struct net_device *dev,
 				       struct hlist_head *mcast_list)
 {
+	struct net_device *bridge = batadv_mcast_get_bridge(dev);
 	struct netdev_hw_addr *mc_list_entry;
 	struct batadv_hw_addr *new;
 	int ret = 0;
 
-	netif_addr_lock_bh(dev);
-	netdev_for_each_mc_addr(mc_list_entry, dev) {
+	netif_addr_lock_bh(bridge ? bridge : dev);
+	netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
 		new = kmalloc(sizeof(*new), GFP_ATOMIC);
 		if (!new) {
 			ret = -ENOMEM;
@@ -78,7 +124,10 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
 		hlist_add_head(&new->list, mcast_list);
 		ret++;
 	}
-	netif_addr_unlock_bh(dev);
+	netif_addr_unlock_bh(bridge ? bridge : dev);
+
+	if (bridge)
+		dev_put(bridge);
 
 	return ret;
 }
@@ -103,6 +152,83 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
 	return false;
 }
 
+/**
+ * batadv_mcast_mla_br_addr_cpy - copy a bridge multicast address
+ * @dst: destination to write to - a multicast MAC address
+ * @src: source to read from - a multicast IP address
+ *
+ * Converts a given multicast IPv4/IPv6 address from a bridge
+ * to its matching multicast MAC address and copies it into the given
+ * destination buffer.
+ *
+ * Caller needs to make sure the destination buffer can hold
+ * at least ETH_ALEN bytes.
+ */
+static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
+{
+	if (src->proto == htons(ETH_P_IP))
+		ip_eth_mc_map(src->u.ip4, dst);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (src->proto == htons(ETH_P_IPV6))
+		ipv6_eth_mc_map(&src->u.ip6, dst);
+#endif
+	else
+		eth_zero_addr(dst);
+}
+
+/**
+ * batadv_mcast_mla_bridge_get - get bridged-in multicast listeners
+ * @dev: a bridge slave whose bridge to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ *
+ * Collects multicast addresses of multicast listeners residing
+ * on foreign, non-mesh devices which we gave access to our mesh via
+ * a bridge on top of the given soft interface, dev, in the given
+ * mcast_list.
+ *
+ * Return: -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
+ */
+static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+				       struct hlist_head *mcast_list)
+{
+	struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+	struct br_ip_list *br_ip_entry, *tmp;
+	struct batadv_hw_addr *new;
+	u8 mcast_addr[ETH_ALEN];
+	int ret;
+
+	/* we don't need to detect these devices/listeners, the IGMP/MLD
+	 * snooping code of the Linux bridge already does that for us
+	 */
+	ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
+	if (ret < 0)
+		goto out;
+
+	list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+		batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
+		if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
+			continue;
+
+		new = kmalloc(sizeof(*new), GFP_ATOMIC);
+		if (!new) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ether_addr_copy(new->addr, mcast_addr);
+		hlist_add_head(&new->list, mcast_list);
+	}
+
+out:
+	list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) {
+		list_del(&br_ip_entry->list);
+		kfree(br_ip_entry);
+	}
+
+	return ret;
+}
+
 /**
  * batadv_mcast_mla_list_free - free a list of multicast addresses
  * @bat_priv: the bat priv with all the soft interface information
@@ -213,6 +339,122 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
 	return upper;
 }
 
+/**
+ * batadv_mcast_querier_log - debug output regarding the querier status on link
+ * @bat_priv: the bat priv with all the soft interface information
+ * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD")
+ * @old_state: the previous querier state on our link
+ * @new_state: the new querier state on our link
+ *
+ * Outputs debug messages to the logging facility with log level 'mcast'
+ * regarding changes to the querier status on the link which are relevant
+ * to our multicast optimizations.
+ *
+ * Usually this is about whether a querier appeared or vanished in
+ * our mesh or whether the querier is in the suboptimal position of being
+ * behind our local bridge segment: Snooping switches will directly
+ * forward listener reports to the querier, therefore batman-adv and
+ * the bridge will potentially not see these listeners - the querier is
+ * potentially shadowing listeners from us then.
+ *
+ * This is only interesting for nodes with a bridge on top of their
+ * soft interface.
+ */
+static void
+batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto,
+			 struct batadv_mcast_querier_state *old_state,
+			 struct batadv_mcast_querier_state *new_state)
+{
+	if (!old_state->exists && new_state->exists)
+		batadv_info(bat_priv->soft_iface, "%s Querier appeared\n",
+			    str_proto);
+	else if (old_state->exists && !new_state->exists)
+		batadv_info(bat_priv->soft_iface,
+			    "%s Querier disappeared - multicast optimizations disabled\n",
+			    str_proto);
+	else if (!bat_priv->mcast.bridged && !new_state->exists)
+		batadv_info(bat_priv->soft_iface,
+			    "No %s Querier present - multicast optimizations disabled\n",
+			    str_proto);
+
+	if (new_state->exists) {
+		if ((!old_state->shadowing && new_state->shadowing) ||
+		    (!old_state->exists && new_state->shadowing))
+			batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+				   "%s Querier is behind our bridged segment: Might shadow listeners\n",
+				   str_proto);
+		else if (old_state->shadowing && !new_state->shadowing)
+			batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+				   "%s Querier is not behind our bridged segment\n",
+				   str_proto);
+	}
+}
+
+/**
+ * batadv_mcast_bridge_log - debug output for topology changes in bridged setups
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bridged: a flag about whether the soft interface is currently bridged or not
+ * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier
+ * @querier_ipv6: (maybe) new status of a potential, selected MLD querier
+ *
+ * If no bridges are ever used on this node, then this function does nothing.
+ *
+ * Otherwise this function outputs debug information to the 'mcast' log level
+ * which might be relevant to our multicast optimizations.
+ *
+ * More precisely, it outputs information when a bridge interface is added or
+ * removed from a soft interface. And when a bridge is present, it further
+ * outputs information about the querier state which is relevant for the
+ * multicast flags this node is going to set.
+ */
+static void
+batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged,
+			struct batadv_mcast_querier_state *querier_ipv4,
+			struct batadv_mcast_querier_state *querier_ipv6)
+{
+	if (!bat_priv->mcast.bridged && bridged)
+		batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+			   "Bridge added: Setting Unsnoopables(U)-flag\n");
+	else if (bat_priv->mcast.bridged && !bridged)
+		batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+			   "Bridge removed: Unsetting Unsnoopables(U)-flag\n");
+
+	if (bridged) {
+		batadv_mcast_querier_log(bat_priv, "IGMP",
+					 &bat_priv->mcast.querier_ipv4,
+					 querier_ipv4);
+		batadv_mcast_querier_log(bat_priv, "MLD",
+					 &bat_priv->mcast.querier_ipv6,
+					 querier_ipv6);
+	}
+}
+
+/**
+ * batadv_mcast_flags_logs - output debug information about mcast flag changes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @flags: flags indicating the new multicast state
+ *
+ * Whenever the multicast flags this nodes announces changes (@mcast_flags vs.
+ * bat_priv->mcast.flags), this notifies userspace via the 'mcast' log level.
+ */
+static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
+{
+	u8 old_flags = bat_priv->mcast.flags;
+	char str_old_flags[] = "[...]";
+
+	sprintf(str_old_flags, "[%c%c%c]",
+		(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+		(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+		(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+
+	batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+		   "Changing multicast flags from '%s' to '[%c%c%c]'\n",
+		   bat_priv->mcast.enabled ? str_old_flags : "<undefined>",
+		   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+		   (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+}
+
 /**
  * batadv_mcast_mla_tvlv_update - update multicast tvlv
  * @bat_priv: the bat priv with all the soft interface information
@@ -220,38 +462,73 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
  * Updates the own multicast tvlv with our current multicast related settings,
  * capabilities and inabilities.
  *
- * Return: true if the tvlv container is registered afterwards. Otherwise
- * returns false.
+ * Return: false if we want all IPv4 && IPv6 multicast traffic and true
+ * otherwise.
  */
 static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
 {
 	struct batadv_tvlv_mcast_data mcast_data;
+	struct batadv_mcast_querier_state querier4 = {false, false};
+	struct batadv_mcast_querier_state querier6 = {false, false};
+	struct net_device *dev = bat_priv->soft_iface;
+	bool bridged;
 
 	mcast_data.flags = BATADV_NO_FLAGS;
 	memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved));
 
-	/* Avoid attaching MLAs, if there is a bridge on top of our soft
-	 * interface, we don't support that yet (TODO)
+	bridged = batadv_mcast_has_bridge(bat_priv);
+	if (!bridged)
+		goto update;
+
+#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
+	pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
+#endif
+
+	querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
+	querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
+
+	querier6.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6);
+	querier6.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6);
+
+	mcast_data.flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES;
+
+	/* 1) If no querier exists at all, then multicast listeners on
+	 *    our local TT clients behind the bridge will keep silent.
+	 * 2) If the selected querier is on one of our local TT clients,
+	 *    behind the bridge, then this querier might shadow multicast
+	 *    listeners on our local TT clients, behind this bridge.
+	 *
+	 * In both cases, we will signalize other batman nodes that
+	 * we need all multicast traffic of the according protocol.
 	 */
-	if (batadv_mcast_has_bridge(bat_priv)) {
-		if (bat_priv->mcast.enabled) {
-			batadv_tvlv_container_unregister(bat_priv,
-							 BATADV_TVLV_MCAST, 1);
-			bat_priv->mcast.enabled = false;
-		}
+	if (!querier4.exists || querier4.shadowing)
+		mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV4;
 
-		return false;
-	}
+	if (!querier6.exists || querier6.shadowing)
+		mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV6;
+
+update:
+	batadv_mcast_bridge_log(bat_priv, bridged, &querier4, &querier6);
+
+	bat_priv->mcast.querier_ipv4.exists = querier4.exists;
+	bat_priv->mcast.querier_ipv4.shadowing = querier4.shadowing;
+
+	bat_priv->mcast.querier_ipv6.exists = querier6.exists;
+	bat_priv->mcast.querier_ipv6.shadowing = querier6.shadowing;
+
+	bat_priv->mcast.bridged = bridged;
 
 	if (!bat_priv->mcast.enabled ||
 	    mcast_data.flags != bat_priv->mcast.flags) {
-		batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 1,
+		batadv_mcast_flags_log(bat_priv, mcast_data.flags);
+		batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2,
 					       &mcast_data, sizeof(mcast_data));
 		bat_priv->mcast.flags = mcast_data.flags;
 		bat_priv->mcast.enabled = true;
 	}
 
-	return true;
+	return !(mcast_data.flags &
+		 (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6));
 }
 
 /**
@@ -274,6 +551,10 @@ void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
 	if (ret < 0)
 		goto out;
 
+	ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+	if (ret < 0)
+		goto out;
+
 update:
 	batadv_mcast_mla_tt_retract(bat_priv, &mcast_list);
 	batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
@@ -282,6 +563,31 @@ out:
 	batadv_mcast_mla_list_free(bat_priv, &mcast_list);
 }
 
+/**
+ * batadv_mcast_is_report_ipv4 -â¯check for IGMP reports
+ * @skb: the ethernet frame destined for the mesh
+ *
+ * This call might reallocate skb data.
+ *
+ * Checks whether the given frame is a valid IGMP report.
+ *
+ * Return: If so then true, otherwise false.
+ */
+static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
+{
+	if (ip_mc_check_igmp(skb, NULL) < 0)
+		return false;
+
+	switch (igmp_hdr(skb)->type) {
+	case IGMP_HOST_MEMBERSHIP_REPORT:
+	case IGMPV2_HOST_MEMBERSHIP_REPORT:
+	case IGMPV3_HOST_MEMBERSHIP_REPORT:
+		return true;
+	}
+
+	return false;
+}
+
 /**
  * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential
  * @bat_priv: the bat priv with all the soft interface information
@@ -304,6 +610,9 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
 	if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr)))
 		return -ENOMEM;
 
+	if (batadv_mcast_is_report_ipv4(skb))
+		return -EINVAL;
+
 	iphdr = ip_hdr(skb);
 
 	/* TODO: Implement Multicast Router Discovery (RFC4286),
@@ -320,6 +629,31 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * batadv_mcast_is_report_ipv6 - check for MLD reports
+ * @skb: the ethernet frame destined for the mesh
+ *
+ * This call might reallocate skb data.
+ *
+ * Checks whether the given frame is a valid MLD report.
+ *
+ * Return: If so then true, otherwise false.
+ */
+static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
+{
+	if (ipv6_mc_check_mld(skb, NULL) < 0)
+		return false;
+
+	switch (icmp6_hdr(skb)->icmp6_type) {
+	case ICMPV6_MGM_REPORT:
+	case ICMPV6_MLD2_REPORT:
+		return true;
+	}
+
+	return false;
+}
+
 /**
  * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential
  * @bat_priv: the bat priv with all the soft interface information
@@ -341,6 +675,9 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
 	if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr)))
 		return -ENOMEM;
 
+	if (batadv_mcast_is_report_ipv6(skb))
+		return -EINVAL;
+
 	ip6hdr = ipv6_hdr(skb);
 
 	/* TODO: Implement Multicast Router Discovery (RFC4286),
@@ -357,6 +694,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
 
 	return 0;
 }
+#endif
 
 /**
  * batadv_mcast_forw_mode_check - check for optimized forwarding potential
@@ -385,9 +723,11 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
 	case ETH_P_IP:
 		return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
 							 is_unsnoopable);
+#if IS_ENABLED(CONFIG_IPV6)
 	case ETH_P_IPV6:
 		return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
 							 is_unsnoopable);
+#endif
 	default:
 		return -EINVAL;
 	}
@@ -728,18 +1068,18 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
 }
 
 /**
- * batadv_mcast_tvlv_ogm_handler_v1 - process incoming multicast tvlv container
+ * batadv_mcast_tvlv_ogm_handler - process incoming multicast tvlv container
  * @bat_priv: the bat priv with all the soft interface information
  * @orig: the orig_node of the ogm
  * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
  * @tvlv_value: tvlv buffer containing the multicast data
  * @tvlv_value_len: tvlv buffer length
  */
-static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
-					     struct batadv_orig_node *orig,
-					     u8 flags,
-					     void *tvlv_value,
-					     u16 tvlv_value_len)
+static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig,
+					  u8 flags,
+					  void *tvlv_value,
+					  u16 tvlv_value_len)
 {
 	bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
 	u8 mcast_flags = BATADV_NO_FLAGS;
@@ -789,19 +1129,120 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
  */
 void batadv_mcast_init(struct batadv_priv *bat_priv)
 {
-	batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler_v1,
-				     NULL, BATADV_TVLV_MCAST, 1,
+	batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
+				     NULL, BATADV_TVLV_MCAST, 2,
 				     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
 }
 
+/**
+ * batadv_mcast_flags_print_header - print own mcast flags to debugfs table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: debugfs table seq_file struct
+ *
+ * Prints our own multicast flags including a more specific reason why
+ * they are set, that is prints the bridge and querier state too, to
+ * the debugfs table specified via @seq.
+ */
+static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv,
+					    struct seq_file *seq)
+{
+	u8 flags = bat_priv->mcast.flags;
+	char querier4, querier6, shadowing4, shadowing6;
+	bool bridged = bat_priv->mcast.bridged;
+
+	if (bridged) {
+		querier4 = bat_priv->mcast.querier_ipv4.exists ? '.' : '4';
+		querier6 = bat_priv->mcast.querier_ipv6.exists ? '.' : '6';
+		shadowing4 = bat_priv->mcast.querier_ipv4.shadowing ? '4' : '.';
+		shadowing6 = bat_priv->mcast.querier_ipv6.shadowing ? '6' : '.';
+	} else {
+		querier4 = '?';
+		querier6 = '?';
+		shadowing4 = '?';
+		shadowing6 = '?';
+	}
+
+	seq_printf(seq, "Multicast flags (own flags: [%c%c%c])\n",
+		   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+		   (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+	seq_printf(seq, "* Bridged [U]\t\t\t\t%c\n", bridged ? 'U' : '.');
+	seq_printf(seq, "* No IGMP/MLD Querier [4/6]:\t\t%c/%c\n",
+		   querier4, querier6);
+	seq_printf(seq, "* Shadowing IGMP/MLD Querier [4/6]:\t%c/%c\n",
+		   shadowing4, shadowing6);
+	seq_puts(seq, "-------------------------------------------\n");
+	seq_printf(seq, "       %-10s %s\n", "Originator", "Flags");
+}
+
+/**
+ * batadv_mcast_flags_seq_print_text - print the mcast flags of other nodes
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * This prints a table of (primary) originators and their according
+ * multicast flags, including (in the header) our own.
+ *
+ * Return: always 0
+ */
+int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct batadv_priv *bat_priv = netdev_priv(net_dev);
+	struct batadv_hard_iface *primary_if;
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct batadv_orig_node *orig_node;
+	struct hlist_head *head;
+	u8 flags;
+	u32 i;
+
+	primary_if = batadv_seq_print_text_primary_if_get(seq);
+	if (!primary_if)
+		return 0;
+
+	batadv_mcast_flags_print_header(bat_priv, seq);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+			if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+				      &orig_node->capa_initialized))
+				continue;
+
+			if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+				      &orig_node->capabilities)) {
+				seq_printf(seq, "%pM -\n", orig_node->orig);
+				continue;
+			}
+
+			flags = orig_node->mcast_flags;
+
+			seq_printf(seq, "%pM [%c%c%c]\n", orig_node->orig,
+				   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)
+				   ? 'U' : '.',
+				   (flags & BATADV_MCAST_WANT_ALL_IPV4)
+				   ? '4' : '.',
+				   (flags & BATADV_MCAST_WANT_ALL_IPV6)
+				   ? '6' : '.');
+		}
+		rcu_read_unlock();
+	}
+
+	batadv_hardif_put(primary_if);
+
+	return 0;
+}
+
 /**
  * batadv_mcast_free - free the multicast optimizations structures
  * @bat_priv: the bat priv with all the soft interface information
  */
 void batadv_mcast_free(struct batadv_priv *bat_priv)
 {
-	batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
-	batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
+	batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
+	batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
 
 	spin_lock_bh(&bat_priv->tt.commit_lock);
 	batadv_mcast_mla_tt_retract(bat_priv, NULL);
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 80bceec55592..1fb00ba84907 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -20,6 +20,7 @@
 
 #include "main.h"
 
+struct seq_file;
 struct sk_buff;
 
 /**
@@ -46,6 +47,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
 
 void batadv_mcast_init(struct batadv_priv *bat_priv);
 
+int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
+
 void batadv_mcast_free(struct batadv_priv *bat_priv);
 
 void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
new file mode 100644
index 000000000000..231f8eaf075b
--- /dev/null
+++ b/net/batman-adv/netlink.c
@@ -0,0 +1,424 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "netlink.h"
+#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genetlink.h>
+#include <linux/if_ether.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/printk.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
+
+#include "hard-interface.h"
+#include "soft-interface.h"
+#include "tp_meter.h"
+
+struct sk_buff;
+
+static struct genl_family batadv_netlink_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = BATADV_NL_NAME,
+	.version = 1,
+	.maxattr = BATADV_ATTR_MAX,
+};
+
+/* multicast groups */
+enum batadv_netlink_multicast_groups {
+	BATADV_NL_MCGRP_TPMETER,
+};
+
+static struct genl_multicast_group batadv_netlink_mcgrps[] = {
+	[BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
+};
+
+static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
+	[BATADV_ATTR_VERSION]		= { .type = NLA_STRING },
+	[BATADV_ATTR_ALGO_NAME]		= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_IFINDEX]	= { .type = NLA_U32 },
+	[BATADV_ATTR_MESH_IFNAME]	= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_HARD_IFINDEX]	= { .type = NLA_U32 },
+	[BATADV_ATTR_HARD_IFNAME]	= { .type = NLA_STRING },
+	[BATADV_ATTR_HARD_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_ORIG_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TPMETER_RESULT]	= { .type = NLA_U8 },
+	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
+	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
+	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
+};
+
+/**
+ * batadv_netlink_mesh_info_put - fill in generic information about mesh
+ *  interface
+ * @msg: netlink message to be sent back
+ * @soft_iface: interface for which the data should be taken
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
+{
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	struct batadv_hard_iface *primary_if = NULL;
+	struct net_device *hard_iface;
+	int ret = -ENOBUFS;
+
+	if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
+	    nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
+			   bat_priv->algo_ops->name) ||
+	    nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
+	    nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
+	    nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
+		    soft_iface->dev_addr))
+		goto out;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
+		hard_iface = primary_if->net_dev;
+
+		if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+				hard_iface->ifindex) ||
+		    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+				   hard_iface->name) ||
+		    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+			    hard_iface->dev_addr))
+			goto out;
+	}
+
+	ret = 0;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	return ret;
+}
+
+/**
+ * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO
+ *  netlink request
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct net_device *soft_iface;
+	struct sk_buff *msg = NULL;
+	void *msg_head;
+	int ifindex;
+	int ret;
+
+	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+		return -EINVAL;
+
+	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			       &batadv_netlink_family, 0,
+			       BATADV_CMD_GET_MESH_INFO);
+	if (!msg_head) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	ret = batadv_netlink_mesh_info_put(msg, soft_iface);
+
+ out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	if (ret) {
+		if (msg)
+			nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_end(msg, msg_head);
+	return genlmsg_reply(msg, info);
+}
+
+/**
+ * batadv_netlink_tp_meter_put - Fill information of started tp_meter session
+ * @msg: netlink message to be sent back
+ * @cookie: tp meter session cookie
+ *
+ *  Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
+{
+	if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
+		return -ENOBUFS;
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: destination of tp_meter session
+ * @result: reason for tp meter session stop
+ * @test_time: total time ot the tp_meter session
+ * @total_bytes: bytes acked to the receiver
+ * @cookie: cookie of tp_meter session
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
+				  u8 result, u32 test_time, u64 total_bytes,
+				  u32 cookie)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &batadv_netlink_family, 0,
+			  BATADV_CMD_TP_METER);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto err_genlmsg;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_TPMETER_TEST_TIME, test_time))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, BATADV_ATTR_TPMETER_BYTES, total_bytes,
+			      BATADV_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, BATADV_ATTR_TPMETER_RESULT, result))
+		goto nla_put_failure;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, dst))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&batadv_netlink_family,
+				dev_net(bat_priv->soft_iface), msg, 0,
+				BATADV_NL_MCGRP_TPMETER, GFP_KERNEL);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	ret = -EMSGSIZE;
+
+err_genlmsg:
+	nlmsg_free(msg);
+	return ret;
+}
+
+/**
+ * batadv_netlink_tp_meter_start - Start a new tp_meter session
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct sk_buff *msg = NULL;
+	u32 test_length;
+	void *msg_head;
+	int ifindex;
+	u32 cookie;
+	u8 *dst;
+	int ret;
+
+	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
+		return -EINVAL;
+
+	if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME])
+		return -EINVAL;
+
+	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+	if (!ifindex)
+		return -EINVAL;
+
+	dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
+
+	test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]);
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			       &batadv_netlink_family, 0,
+			       BATADV_CMD_TP_METER);
+	if (!msg_head) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	batadv_tp_start(bat_priv, dst, test_length, &cookie);
+
+	ret = batadv_netlink_tp_meter_put(msg, cookie);
+
+ out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	if (ret) {
+		if (msg)
+			nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_end(msg, msg_head);
+	return genlmsg_reply(msg, info);
+}
+
+/**
+ * batadv_netlink_tp_meter_start - Cancel a running tp_meter session
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	u8 *dst;
+	int ret = 0;
+
+	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
+		return -EINVAL;
+
+	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+	if (!ifindex)
+		return -EINVAL;
+
+	dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL);
+
+out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
+static struct genl_ops batadv_netlink_ops[] = {
+	{
+		.cmd = BATADV_CMD_GET_MESH_INFO,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_get_mesh_info,
+	},
+	{
+		.cmd = BATADV_CMD_TP_METER,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_tp_meter_start,
+	},
+	{
+		.cmd = BATADV_CMD_TP_METER_CANCEL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_tp_meter_cancel,
+	},
+};
+
+/**
+ * batadv_netlink_register - register batadv genl netlink family
+ */
+void __init batadv_netlink_register(void)
+{
+	int ret;
+
+	ret = genl_register_family_with_ops_groups(&batadv_netlink_family,
+						   batadv_netlink_ops,
+						   batadv_netlink_mcgrps);
+	if (ret)
+		pr_warn("unable to register netlink family");
+}
+
+/**
+ * batadv_netlink_unregister - unregister batadv genl netlink family
+ */
+void batadv_netlink_unregister(void)
+{
+	genl_unregister_family(&batadv_netlink_family);
+}
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
new file mode 100644
index 000000000000..945653ab58c6
--- /dev/null
+++ b/net/batman-adv/netlink.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_NETLINK_H_
+#define _NET_BATMAN_ADV_NETLINK_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+void batadv_netlink_register(void);
+void batadv_netlink_unregister(void);
+
+int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
+				  u8 result, u32 test_time, u64 total_bytes,
+				  u32 cookie);
+
+#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 678f06865312..293ef4ffd4e1 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -51,10 +51,12 @@
 
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
 #include "send.h"
+#include "tvlv.h"
 
 static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
 static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 7f51bc2c06eb..3940b5d24421 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -34,11 +34,13 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "distributed-arp-table.h"
 #include "fragmentation.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "multicast.h"
 #include "network-coding.h"
 #include "routing.h"
@@ -251,10 +253,8 @@ static void batadv_neigh_node_release(struct kref *ref)
 	struct hlist_node *node_tmp;
 	struct batadv_neigh_node *neigh_node;
 	struct batadv_neigh_ifinfo *neigh_ifinfo;
-	struct batadv_algo_ops *bao;
 
 	neigh_node = container_of(ref, struct batadv_neigh_node, refcount);
-	bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
 
 	hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
 				  &neigh_node->ifinfo_list, list) {
@@ -263,9 +263,6 @@ static void batadv_neigh_node_release(struct kref *ref)
 
 	batadv_hardif_neigh_put(neigh_node->hardif_neigh);
 
-	if (bao->bat_neigh_free)
-		bao->bat_neigh_free(neigh_node);
-
 	batadv_hardif_put(neigh_node->if_incoming);
 
 	kfree_rcu(neigh_node, rcu);
@@ -537,8 +534,8 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
 
 	kref_init(&hardif_neigh->refcount);
 
-	if (bat_priv->bat_algo_ops->bat_hardif_neigh_init)
-		bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh);
+	if (bat_priv->algo_ops->neigh.hardif_init)
+		bat_priv->algo_ops->neigh.hardif_init(hardif_neigh);
 
 	hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list);
 
@@ -602,19 +599,19 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
 }
 
 /**
- * batadv_neigh_node_new - create and init a new neigh_node object
+ * batadv_neigh_node_create - create a neigh node object
  * @orig_node: originator object representing the neighbour
  * @hard_iface: the interface where the neighbour is connected to
  * @neigh_addr: the mac address of the neighbour interface
  *
  * Allocates a new neigh_node object and initialises all the generic fields.
  *
- * Return: neighbor when found. Othwerwise NULL
+ * Return: the neighbour node if found or created or NULL otherwise.
  */
-struct batadv_neigh_node *
-batadv_neigh_node_new(struct batadv_orig_node *orig_node,
-		      struct batadv_hard_iface *hard_iface,
-		      const u8 *neigh_addr)
+static struct batadv_neigh_node *
+batadv_neigh_node_create(struct batadv_orig_node *orig_node,
+			 struct batadv_hard_iface *hard_iface,
+			 const u8 *neigh_addr)
 {
 	struct batadv_neigh_node *neigh_node;
 	struct batadv_hardif_neigh_node *hardif_neigh = NULL;
@@ -666,6 +663,29 @@ out:
 	return neigh_node;
 }
 
+/**
+ * batadv_neigh_node_get_or_create - retrieve or create a neigh node object
+ * @orig_node: originator object representing the neighbour
+ * @hard_iface: the interface where the neighbour is connected to
+ * @neigh_addr: the mac address of the neighbour interface
+ *
+ * Return: the neighbour node if found or created or NULL otherwise.
+ */
+struct batadv_neigh_node *
+batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
+				struct batadv_hard_iface *hard_iface,
+				const u8 *neigh_addr)
+{
+	struct batadv_neigh_node *neigh_node = NULL;
+
+	/* first check without locking to avoid the overhead */
+	neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
+	if (neigh_node)
+		return neigh_node;
+
+	return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr);
+}
+
 /**
  * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
  * @seq: neighbour table seq_file struct
@@ -686,17 +706,17 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
 	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
 		   primary_if->net_dev->dev_addr, net_dev->name,
-		   bat_priv->bat_algo_ops->name);
+		   bat_priv->algo_ops->name);
 
 	batadv_hardif_put(primary_if);
 
-	if (!bat_priv->bat_algo_ops->bat_neigh_print) {
+	if (!bat_priv->algo_ops->neigh.print) {
 		seq_puts(seq,
 			 "No printing function for this routing protocol\n");
 		return 0;
 	}
 
-	bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq);
+	bat_priv->algo_ops->neigh.print(bat_priv, seq);
 	return 0;
 }
 
@@ -747,8 +767,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 
 	batadv_frag_purge_orig(orig_node, NULL);
 
-	if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
-		orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+	if (orig_node->bat_priv->algo_ops->orig.free)
+		orig_node->bat_priv->algo_ops->orig.free(orig_node);
 
 	kfree(orig_node->tt_buff);
 	kfree(orig_node);
@@ -765,6 +785,8 @@ static void batadv_orig_node_release(struct kref *ref)
 	struct batadv_neigh_node *neigh_node;
 	struct batadv_orig_node *orig_node;
 	struct batadv_orig_ifinfo *orig_ifinfo;
+	struct batadv_orig_node_vlan *vlan;
+	struct batadv_orig_ifinfo *last_candidate;
 
 	orig_node = container_of(ref, struct batadv_orig_node, refcount);
 
@@ -782,8 +804,21 @@ static void batadv_orig_node_release(struct kref *ref)
 		hlist_del_rcu(&orig_ifinfo->list);
 		batadv_orig_ifinfo_put(orig_ifinfo);
 	}
+
+	last_candidate = orig_node->last_bonding_candidate;
+	orig_node->last_bonding_candidate = NULL;
 	spin_unlock_bh(&orig_node->neigh_list_lock);
 
+	if (last_candidate)
+		batadv_orig_ifinfo_put(last_candidate);
+
+	spin_lock_bh(&orig_node->vlan_list_lock);
+	hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) {
+		hlist_del_rcu(&vlan->list);
+		batadv_orig_node_vlan_put(vlan);
+	}
+	spin_unlock_bh(&orig_node->vlan_list_lock);
+
 	/* Free nc_nodes */
 	batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
 
@@ -1077,12 +1112,12 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
 			  struct batadv_hard_iface *if_outgoing)
 {
 	struct batadv_neigh_node *best = NULL, *neigh;
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) {
-		if (best && (bao->bat_neigh_cmp(neigh, if_outgoing,
-						best, if_outgoing) <= 0))
+		if (best && (bao->neigh.cmp(neigh, if_outgoing, best,
+					    if_outgoing) <= 0))
 			continue;
 
 		if (!kref_get_unless_zero(&neigh->refcount))
@@ -1234,18 +1269,17 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
 		   primary_if->net_dev->dev_addr, net_dev->name,
-		   bat_priv->bat_algo_ops->name);
+		   bat_priv->algo_ops->name);
 
 	batadv_hardif_put(primary_if);
 
-	if (!bat_priv->bat_algo_ops->bat_orig_print) {
+	if (!bat_priv->algo_ops->orig.print) {
 		seq_puts(seq,
 			 "No printing function for this routing protocol\n");
 		return 0;
 	}
 
-	bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq,
-					       BATADV_IF_DEFAULT);
+	bat_priv->algo_ops->orig.print(bat_priv, seq, BATADV_IF_DEFAULT);
 
 	return 0;
 }
@@ -1272,7 +1306,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
 	}
 
 	bat_priv = netdev_priv(hard_iface->soft_iface);
-	if (!bat_priv->bat_algo_ops->bat_orig_print) {
+	if (!bat_priv->algo_ops->orig.print) {
 		seq_puts(seq,
 			 "No printing function for this routing protocol\n");
 		goto out;
@@ -1286,9 +1320,9 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
 	seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, hard_iface->net_dev->name,
 		   hard_iface->net_dev->dev_addr,
-		   hard_iface->soft_iface->name, bat_priv->bat_algo_ops->name);
+		   hard_iface->soft_iface->name, bat_priv->algo_ops->name);
 
-	bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface);
+	bat_priv->algo_ops->orig.print(bat_priv, seq, hard_iface);
 
 out:
 	if (hard_iface)
@@ -1300,7 +1334,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
@@ -1316,9 +1350,8 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			ret = 0;
-			if (bao->bat_orig_add_if)
-				ret = bao->bat_orig_add_if(orig_node,
-							   max_if_num);
+			if (bao->orig.add_if)
+				ret = bao->orig.add_if(orig_node, max_if_num);
 			if (ret == -ENOMEM)
 				goto err;
 		}
@@ -1340,7 +1373,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 	struct hlist_head *head;
 	struct batadv_hard_iface *hard_iface_tmp;
 	struct batadv_orig_node *orig_node;
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 	u32 i;
 	int ret;
 
@@ -1353,10 +1386,9 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			ret = 0;
-			if (bao->bat_orig_del_if)
-				ret = bao->bat_orig_del_if(orig_node,
-							   max_if_num,
-							   hard_iface->if_num);
+			if (bao->orig.del_if)
+				ret = bao->orig.del_if(orig_node, max_if_num,
+						       hard_iface->if_num);
 			if (ret == -ENOMEM)
 				goto err;
 		}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 64a8951e5844..566306bf05dc 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -46,9 +46,9 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
 void
 batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
 struct batadv_neigh_node *
-batadv_neigh_node_new(struct batadv_orig_node *orig_node,
-		      struct batadv_hard_iface *hard_iface,
-		      const u8 *neigh_addr);
+batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
+				struct batadv_hard_iface *hard_iface,
+				const u8 *neigh_addr);
 void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
 struct batadv_neigh_node *
 batadv_orig_router_get(struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 372128ddb474..6b011ff64dd8 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -21,6 +21,8 @@
 #include <asm/byteorder.h>
 #include <linux/types.h>
 
+#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0)
+
 /**
  * enum batadv_packettype - types for batman-adv encapsulated packets
  * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
@@ -93,6 +95,7 @@ enum batadv_icmp_packettype {
 	BATADV_ECHO_REQUEST	       = 8,
 	BATADV_TTL_EXCEEDED	       = 11,
 	BATADV_PARAMETER_PROBLEM       = 12,
+	BATADV_TP		       = 15,
 };
 
 /**
@@ -284,6 +287,16 @@ struct batadv_elp_packet {
 
 #define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
 
+/**
+ * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes
+ * @BATADV_TP_START: start a throughput meter run
+ * @BATADV_TP_STOP: stop a throughput meter run
+ */
+enum batadv_icmp_user_cmd_type {
+	BATADV_TP_START		= 0,
+	BATADV_TP_STOP		= 2,
+};
+
 /**
  * struct batadv_icmp_header - common members among all the ICMP packets
  * @packet_type: batman-adv packet type, part of the general header
@@ -334,6 +347,47 @@ struct batadv_icmp_packet {
 	__be16 seqno;
 };
 
+/**
+ * struct batadv_icmp_tp_packet - ICMP TP Meter packet
+ * @packet_type: batman-adv packet type, part of the general header
+ * @version: batman-adv protocol version, part of the genereal header
+ * @ttl: time to live for this packet, part of the genereal header
+ * @msg_type: ICMP packet type
+ * @dst: address of the destination node
+ * @orig: address of the source node
+ * @uid: local ICMP socket identifier
+ * @subtype: TP packet subtype (see batadv_icmp_tp_subtype)
+ * @session: TP session identifier
+ * @seqno: the TP sequence number
+ * @timestamp: time when the packet has been sent. This value is filled in a
+ *  TP_MSG and echoed back in the next TP_ACK so that the sender can compute the
+ *  RTT. Since it is read only by the host which wrote it, there is no need to
+ *  store it using network order
+ */
+struct batadv_icmp_tp_packet {
+	u8  packet_type;
+	u8  version;
+	u8  ttl;
+	u8  msg_type; /* see ICMP message types above */
+	u8  dst[ETH_ALEN];
+	u8  orig[ETH_ALEN];
+	u8  uid;
+	u8  subtype;
+	u8  session[2];
+	__be32 seqno;
+	__be32 timestamp;
+};
+
+/**
+ * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes
+ * @BATADV_TP_MSG: Msg from sender to receiver
+ * @BATADV_TP_ACK: acknowledgment from receiver to sender
+ */
+enum batadv_icmp_tp_subtype {
+	BATADV_TP_MSG	= 0,
+	BATADV_TP_ACK,
+};
+
 #define BATADV_RR_LEN 16
 
 /**
@@ -420,6 +474,7 @@ struct batadv_unicast_4addr_packet {
  * @dest: final destination used when routing fragments
  * @orig: originator of the fragment used when merging the packet
  * @no: fragment number within this sequence
+ * @priority: priority of frame, from ToS IP precedence or 802.1p
  * @reserved: reserved byte for alignment
  * @seqno: sequence identification
  * @total_size: size of the merged packet
@@ -430,9 +485,11 @@ struct batadv_frag_packet {
 	u8     ttl;
 #if defined(__BIG_ENDIAN_BITFIELD)
 	u8     no:4;
-	u8     reserved:4;
+	u8     priority:3;
+	u8     reserved:1;
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-	u8     reserved:4;
+	u8     reserved:1;
+	u8     priority:3;
 	u8     no:4;
 #else
 #error "unknown bitfield endianness"
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e3857ed4057f..7602c001e92b 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -40,12 +40,15 @@
 #include "fragmentation.h"
 #include "hard-interface.h"
 #include "icmp_socket.h"
+#include "log.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
 #include "send.h"
 #include "soft-interface.h"
+#include "tp_meter.h"
 #include "translation-table.h"
+#include "tvlv.h"
 
 static int batadv_route_unicast_packet(struct sk_buff *skb,
 				       struct batadv_hard_iface *recv_if);
@@ -268,10 +271,19 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
 		icmph->ttl = BATADV_TTL;
 
 		res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-		if (res != NET_XMIT_DROP)
-			ret = NET_RX_SUCCESS;
+		if (res == -1)
+			goto out;
+
+		ret = NET_RX_SUCCESS;
 
 		break;
+	case BATADV_TP:
+		if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet)))
+			goto out;
+
+		batadv_tp_meter_recv(bat_priv, skb);
+		ret = NET_RX_SUCCESS;
+		goto out;
 	default:
 		/* drop unknown type */
 		goto out;
@@ -290,7 +302,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
 	struct batadv_hard_iface *primary_if = NULL;
 	struct batadv_orig_node *orig_node = NULL;
 	struct batadv_icmp_packet *icmp_packet;
-	int ret = NET_RX_DROP;
+	int res, ret = NET_RX_DROP;
 
 	icmp_packet = (struct batadv_icmp_packet *)skb->data;
 
@@ -321,7 +333,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
 	icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
 	icmp_packet->ttl = BATADV_TTL;
 
-	if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+	res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (res != -1)
 		ret = NET_RX_SUCCESS;
 
 out:
@@ -341,7 +354,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	struct ethhdr *ethhdr;
 	struct batadv_orig_node *orig_node = NULL;
 	int hdr_size = sizeof(struct batadv_icmp_header);
-	int ret = NET_RX_DROP;
+	int res, ret = NET_RX_DROP;
 
 	/* drop packet if it has not necessary minimum size */
 	if (unlikely(!pskb_may_pull(skb, hdr_size)))
@@ -374,6 +387,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 		if (skb_cow(skb, ETH_HLEN) < 0)
 			goto out;
 
+		ethhdr = eth_hdr(skb);
 		icmph = (struct batadv_icmp_header *)skb->data;
 		icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
 		if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
@@ -407,7 +421,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	icmph->ttl--;
 
 	/* route it */
-	if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
+	res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
+	if (res != -1)
 		ret = NET_RX_SUCCESS;
 
 out:
@@ -454,6 +469,29 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
 	return 0;
 }
 
+/**
+ * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
+ * @orig_node: originator node whose bonding candidates should be replaced
+ * @new_candidate: new bonding candidate or NULL
+ */
+static void
+batadv_last_bonding_replace(struct batadv_orig_node *orig_node,
+			    struct batadv_orig_ifinfo *new_candidate)
+{
+	struct batadv_orig_ifinfo *old_candidate;
+
+	spin_lock_bh(&orig_node->neigh_list_lock);
+	old_candidate = orig_node->last_bonding_candidate;
+
+	if (new_candidate)
+		kref_get(&new_candidate->refcount);
+	orig_node->last_bonding_candidate = new_candidate;
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+
+	if (old_candidate)
+		batadv_orig_ifinfo_put(old_candidate);
+}
+
 /**
  * batadv_find_router - find a suitable router for this originator
  * @bat_priv: the bat priv with all the soft interface information
@@ -468,7 +506,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
 		   struct batadv_orig_node *orig_node,
 		   struct batadv_hard_iface *recv_if)
 {
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 	struct batadv_neigh_node *first_candidate_router = NULL;
 	struct batadv_neigh_node *next_candidate_router = NULL;
 	struct batadv_neigh_node *router, *cand_router = NULL;
@@ -522,9 +560,9 @@ batadv_find_router(struct batadv_priv *bat_priv,
 		/* alternative candidate should be good enough to be
 		 * considered
 		 */
-		if (!bao->bat_neigh_is_similar_or_better(cand_router,
-							 cand->if_outgoing,
-							 router, recv_if))
+		if (!bao->neigh.is_similar_or_better(cand_router,
+						     cand->if_outgoing, router,
+						     recv_if))
 			goto next;
 
 		/* don't use the same router twice */
@@ -561,10 +599,6 @@ next:
 	}
 	rcu_read_unlock();
 
-	/* last_bonding_candidate is reset below, remove the old reference. */
-	if (orig_node->last_bonding_candidate)
-		batadv_orig_ifinfo_put(orig_node->last_bonding_candidate);
-
 	/* After finding candidates, handle the three cases:
 	 * 1) there is a next candidate, use that
 	 * 2) there is no next candidate, use the first of the list
@@ -573,21 +607,28 @@ next:
 	if (next_candidate) {
 		batadv_neigh_node_put(router);
 
-		/* remove references to first candidate, we don't need it. */
-		if (first_candidate) {
-			batadv_neigh_node_put(first_candidate_router);
-			batadv_orig_ifinfo_put(first_candidate);
-		}
+		kref_get(&next_candidate_router->refcount);
 		router = next_candidate_router;
-		orig_node->last_bonding_candidate = next_candidate;
+		batadv_last_bonding_replace(orig_node, next_candidate);
 	} else if (first_candidate) {
 		batadv_neigh_node_put(router);
 
-		/* refcounting has already been done in the loop above. */
+		kref_get(&first_candidate_router->refcount);
 		router = first_candidate_router;
-		orig_node->last_bonding_candidate = first_candidate;
+		batadv_last_bonding_replace(orig_node, first_candidate);
 	} else {
-		orig_node->last_bonding_candidate = NULL;
+		batadv_last_bonding_replace(orig_node, NULL);
+	}
+
+	/* cleanup of candidates */
+	if (first_candidate) {
+		batadv_neigh_node_put(first_candidate_router);
+		batadv_orig_ifinfo_put(first_candidate);
+	}
+
+	if (next_candidate) {
+		batadv_neigh_node_put(next_candidate_router);
+		batadv_orig_ifinfo_put(next_candidate);
 	}
 
 	return router;
@@ -644,6 +685,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 
 	len = skb->len;
 	res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
+	if (res == -1)
+		goto out;
 
 	/* translate transmit result into receive result */
 	if (res == NET_XMIT_SUCCESS) {
@@ -651,13 +694,10 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 		batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
 		batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
 				   len + ETH_HLEN);
-
-		ret = NET_RX_SUCCESS;
-	} else if (res == NET_XMIT_POLICED) {
-		/* skb was buffered and consumed */
-		ret = NET_RX_SUCCESS;
 	}
 
+	ret = NET_RX_SUCCESS;
+
 out:
 	if (orig_node)
 		batadv_orig_node_put(orig_node);
@@ -1006,6 +1046,8 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
 	if (!orig_node_src)
 		goto out;
 
+	skb->priority = frag_packet->priority + 256;
+
 	/* Route the fragment if it is not for us and too big to be merged. */
 	if (!batadv_is_my_mac(bat_priv, frag_packet->dest) &&
 	    batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) {
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index f2f125684ed9..6191159484df 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -20,10 +20,11 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
-#include <linux/if_ether.h>
 #include <linux/if.h>
+#include <linux/if_ether.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
@@ -42,6 +43,7 @@
 #include "fragmentation.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "routing.h"
@@ -71,6 +73,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 {
 	struct batadv_priv *bat_priv;
 	struct ethhdr *ethhdr;
+	int ret;
 
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 
@@ -108,8 +111,15 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 	/* dev_queue_xmit() returns a negative result on error.	 However on
 	 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
 	 * (which is > 0). This will not be treated as an error.
+	 *
+	 * a negative value cannot be returned because it could be interepreted
+	 * as not consumed skb by callers of batadv_send_skb_to_orig.
 	 */
-	return dev_queue_xmit(skb);
+	ret = dev_queue_xmit(skb);
+	if (ret < 0)
+		ret = NET_XMIT_DROP;
+
+	return ret;
 send_skb_err:
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
@@ -155,8 +165,11 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
  * host, NULL can be passed as recv_if and no interface alternating is
  * attempted.
  *
- * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
- * NET_XMIT_POLICED if the skb is buffered for later transmit.
+ * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the
+ * skb is buffered for later transmit or the NET_XMIT status returned by the
+ * lower routine if the packet has been passed down.
+ *
+ * If the returning value is not -1 the skb has been consumed.
  */
 int batadv_send_skb_to_orig(struct sk_buff *skb,
 			    struct batadv_orig_node *orig_node,
@@ -164,7 +177,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
 {
 	struct batadv_priv *bat_priv = orig_node->bat_priv;
 	struct batadv_neigh_node *neigh_node;
-	int ret = NET_XMIT_DROP;
+	int ret = -1;
 
 	/* batadv_find_router() increases neigh_nodes refcount if found. */
 	neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
@@ -177,8 +190,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
 	if (atomic_read(&bat_priv->fragmentation) &&
 	    skb->len > neigh_node->if_incoming->net_dev->mtu) {
 		/* Fragment and send packet. */
-		if (batadv_frag_send_packet(skb, orig_node, neigh_node))
-			ret = NET_XMIT_SUCCESS;
+		ret = batadv_frag_send_packet(skb, orig_node, neigh_node);
 
 		goto out;
 	}
@@ -187,12 +199,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
 	 * (i.e. being forwarded). If the packet originates from this node or if
 	 * network coding fails, then send the packet as usual.
 	 */
-	if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
-		ret = NET_XMIT_POLICED;
-	} else {
-		batadv_send_unicast_skb(skb, neigh_node);
-		ret = NET_XMIT_SUCCESS;
-	}
+	if (recv_if && batadv_nc_skb_forward(skb, neigh_node))
+		ret = -EINPROGRESS;
+	else
+		ret = batadv_send_unicast_skb(skb, neigh_node);
 
 out:
 	if (neigh_node)
@@ -318,7 +328,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 {
 	struct batadv_unicast_packet *unicast_packet;
 	struct ethhdr *ethhdr;
-	int ret = NET_XMIT_DROP;
+	int res, ret = NET_XMIT_DROP;
 
 	if (!orig_node)
 		goto out;
@@ -355,7 +365,8 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 	if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid))
 		unicast_packet->ttvn = unicast_packet->ttvn - 1;
 
-	if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+	res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (res != -1)
 		ret = NET_XMIT_SUCCESS;
 
 out:
@@ -424,31 +435,11 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	struct batadv_orig_node *orig_node;
 
 	orig_node = batadv_gw_get_selected_orig(bat_priv);
-	return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0,
-				       orig_node, vid);
+	return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
+				       BATADV_P_DATA, orig_node, vid);
 }
 
-void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
-{
-	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-
-	if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
-	    (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
-		return;
-
-	/* the interface gets activated here to avoid race conditions between
-	 * the moment of activating the interface in
-	 * hardif_activate_interface() where the originator mac is set and
-	 * outdated packets (especially uninitialized mac addresses) in the
-	 * packet queue
-	 */
-	if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
-		hard_iface->if_status = BATADV_IF_ACTIVE;
-
-	bat_priv->bat_algo_ops->bat_ogm_schedule(hard_iface);
-}
-
-static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
 {
 	kfree_skb(forw_packet->skb);
 	if (forw_packet->if_incoming)
@@ -604,45 +595,6 @@ out:
 	atomic_inc(&bat_priv->bcast_queue_left);
 }
 
-void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work)
-{
-	struct delayed_work *delayed_work;
-	struct batadv_forw_packet *forw_packet;
-	struct batadv_priv *bat_priv;
-
-	delayed_work = to_delayed_work(work);
-	forw_packet = container_of(delayed_work, struct batadv_forw_packet,
-				   delayed_work);
-	bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
-	spin_lock_bh(&bat_priv->forw_bat_list_lock);
-	hlist_del(&forw_packet->list);
-	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
-	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
-		goto out;
-
-	bat_priv->bat_algo_ops->bat_ogm_emit(forw_packet);
-
-	/* we have to have at least one packet in the queue to determine the
-	 * queues wake up time unless we are shutting down.
-	 *
-	 * only re-schedule if this is the "original" copy, e.g. the OGM of the
-	 * primary interface should only be rescheduled once per period, but
-	 * this function will be called for the forw_packet instances of the
-	 * other secondary interfaces as well.
-	 */
-	if (forw_packet->own &&
-	    forw_packet->if_incoming == forw_packet->if_outgoing)
-		batadv_schedule_bat_ogm(forw_packet->if_incoming);
-
-out:
-	/* don't count own packet */
-	if (!forw_packet->own)
-		atomic_inc(&bat_priv->batman_queue_left);
-
-	batadv_forw_packet_free(forw_packet);
-}
-
 void
 batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 				 const struct batadv_hard_iface *hard_iface)
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 6fd7270d8ce6..7cecb7563b45 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -26,8 +26,8 @@
 #include "packet.h"
 
 struct sk_buff;
-struct work_struct;
 
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
 int batadv_send_skb_to_orig(struct sk_buff *skb,
 			    struct batadv_orig_node *orig_node,
 			    struct batadv_hard_iface *recv_if);
@@ -38,11 +38,9 @@ int batadv_send_broadcast_skb(struct sk_buff *skb,
 			      struct batadv_hard_iface *hard_iface);
 int batadv_send_unicast_skb(struct sk_buff *skb,
 			    struct batadv_neigh_node *neigh_node);
-void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface);
 int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 				    const struct sk_buff *skb,
 				    unsigned long delay);
-void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work);
 void
 batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 				 const struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 343d2c904399..7527c0652dd5 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -48,6 +48,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
@@ -255,7 +256,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
 	if (batadv_compare_eth(ethhdr->h_dest, ectp_addr))
 		goto dropped;
 
-	gw_mode = atomic_read(&bat_priv->gw_mode);
+	gw_mode = atomic_read(&bat_priv->gw.mode);
 	if (is_multicast_ether_addr(ethhdr->h_dest)) {
 		/* if gw mode is off, broadcast every packet */
 		if (gw_mode == BATADV_GW_MODE_OFF) {
@@ -808,6 +809,10 @@ static int batadv_softif_init_late(struct net_device *dev)
 	atomic_set(&bat_priv->distributed_arp_table, 1);
 #endif
 #ifdef CONFIG_BATMAN_ADV_MCAST
+	bat_priv->mcast.querier_ipv4.exists = false;
+	bat_priv->mcast.querier_ipv4.shadowing = false;
+	bat_priv->mcast.querier_ipv6.exists = false;
+	bat_priv->mcast.querier_ipv6.shadowing = false;
 	bat_priv->mcast.flags = BATADV_NO_FLAGS;
 	atomic_set(&bat_priv->multicast_mode, 1);
 	atomic_set(&bat_priv->mcast.num_disabled, 0);
@@ -815,8 +820,8 @@ static int batadv_softif_init_late(struct net_device *dev)
 	atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
 	atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
 #endif
-	atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF);
-	atomic_set(&bat_priv->gw_sel_class, 20);
+	atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
+	atomic_set(&bat_priv->gw.sel_class, 20);
 	atomic_set(&bat_priv->gw.bandwidth_down, 100);
 	atomic_set(&bat_priv->gw.bandwidth_up, 20);
 	atomic_set(&bat_priv->orig_interval, 1000);
@@ -837,6 +842,8 @@ static int batadv_softif_init_late(struct net_device *dev)
 #ifdef CONFIG_BATMAN_ADV_BLA
 	atomic_set(&bat_priv->bla.num_requests, 0);
 #endif
+	atomic_set(&bat_priv->tp_num, 0);
+
 	bat_priv->tt.last_changeset = NULL;
 	bat_priv->tt.last_changeset_len = 0;
 	bat_priv->isolation_mark = 0;
@@ -1033,7 +1040,9 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
 static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
 					  struct list_head *head)
 {
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
 	struct batadv_hard_iface *hard_iface;
+	struct batadv_softif_vlan *vlan;
 
 	list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
 		if (hard_iface->soft_iface == soft_iface)
@@ -1041,6 +1050,13 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
 							BATADV_IF_CLEANUP_KEEP);
 	}
 
+	/* destroy the "untagged" VLAN */
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (vlan) {
+		batadv_softif_destroy_vlan(bat_priv, vlan);
+		batadv_softif_vlan_put(vlan);
+	}
+
 	batadv_sysfs_del_meshif(soft_iface);
 	unregister_netdevice_queue(soft_iface, head);
 }
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 414b2074165f..fe9ca94ddee2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -25,8 +25,8 @@
 #include <linux/fs.h>
 #include <linux/if.h>
 #include <linux/if_vlan.h>
-#include <linux/kref.h>
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
@@ -38,11 +38,12 @@
 #include <linux/string.h>
 #include <linux/stringify.h>
 
+#include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "gateway_common.h"
-#include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "network-coding.h"
 #include "packet.h"
 #include "soft-interface.h"
@@ -389,12 +390,12 @@ static int batadv_store_uint_attr(const char *buff, size_t count,
 	return count;
 }
 
-static inline ssize_t
-__batadv_store_uint_attr(const char *buff, size_t count,
-			 int min, int max,
-			 void (*post_func)(struct net_device *),
-			 const struct attribute *attr,
-			 atomic_t *attr_store, struct net_device *net_dev)
+static ssize_t __batadv_store_uint_attr(const char *buff, size_t count,
+					int min, int max,
+					void (*post_func)(struct net_device *),
+					const struct attribute *attr,
+					atomic_t *attr_store,
+					struct net_device *net_dev)
 {
 	int ret;
 
@@ -411,7 +412,7 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj,
 {
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 
-	return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name);
+	return sprintf(buff, "%s\n", bat_priv->algo_ops->name);
 }
 
 static void batadv_post_gw_reselect(struct net_device *net_dev)
@@ -427,7 +428,7 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr,
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	int bytes_written;
 
-	switch (atomic_read(&bat_priv->gw_mode)) {
+	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_CLIENT:
 		bytes_written = sprintf(buff, "%s\n",
 					BATADV_GW_MODE_CLIENT_NAME);
@@ -476,10 +477,10 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
 		return -EINVAL;
 	}
 
-	if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp)
+	if (atomic_read(&bat_priv->gw.mode) == gw_mode_tmp)
 		return count;
 
-	switch (atomic_read(&bat_priv->gw_mode)) {
+	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_CLIENT:
 		curr_gw_mode_str = BATADV_GW_MODE_CLIENT_NAME;
 		break;
@@ -508,7 +509,7 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
 	 * state
 	 */
 	batadv_gw_check_client_stop(bat_priv);
-	atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp);
+	atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp);
 	batadv_gw_tvlv_container_update(bat_priv);
 	return count;
 }
@@ -624,7 +625,7 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
 		     2 * BATADV_JITTER, INT_MAX, NULL);
 BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
 		     BATADV_TQ_MAX_VALUE, NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, gw_sel_class, S_IRUGO | S_IWUSR, 1,
+BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1,
 		     BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
 static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
 		   batadv_store_gw_bwidth);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
new file mode 100644
index 000000000000..2333777f919d
--- /dev/null
+++ b/net/batman-adv/tp_meter.c
@@ -0,0 +1,1507 @@
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
+ *
+ * Edo Monticelli, Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "tp_meter.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/param.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/batman_adv.h>
+
+#include "hard-interface.h"
+#include "log.h"
+#include "netlink.h"
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+
+/**
+ * BATADV_TP_DEF_TEST_LENGTH - Default test length if not specified by the user
+ *  in milliseconds
+ */
+#define BATADV_TP_DEF_TEST_LENGTH 10000
+
+/**
+ * BATADV_TP_AWND - Advertised window by the receiver (in bytes)
+ */
+#define BATADV_TP_AWND 0x20000000
+
+/**
+ * BATADV_TP_RECV_TIMEOUT - Receiver activity timeout. If the receiver does not
+ *  get anything for such amount of milliseconds, the connection is killed
+ */
+#define BATADV_TP_RECV_TIMEOUT 1000
+
+/**
+ * BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond
+ * such amound of milliseconds, the receiver is considered unreachable and the
+ * connection is killed
+ */
+#define BATADV_TP_MAX_RTO 30000
+
+/**
+ * BATADV_TP_FIRST_SEQ - First seqno of each session. The number is rather high
+ *  in order to immediately trigger a wrap around (test purposes)
+ */
+#define BATADV_TP_FIRST_SEQ ((u32)-1 - 2000)
+
+/**
+ * BATADV_TP_PLEN - length of the payload (data after the batadv_unicast header)
+ *  to simulate
+ */
+#define BATADV_TP_PLEN (BATADV_TP_PACKET_LEN - ETH_HLEN - \
+			sizeof(struct batadv_unicast_packet))
+
+static u8 batadv_tp_prerandom[4096] __read_mostly;
+
+/**
+ * batadv_tp_session_cookie - generate session cookie based on session ids
+ * @session: TP session identifier
+ * @icmp_uid: icmp pseudo uid of the tp session
+ *
+ * Return: 32 bit tp_meter session cookie
+ */
+static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid)
+{
+	u32 cookie;
+
+	cookie = icmp_uid << 16;
+	cookie |= session[0] << 8;
+	cookie |= session[1];
+
+	return cookie;
+}
+
+/**
+ * batadv_tp_cwnd - compute the new cwnd size
+ * @base: base cwnd size value
+ * @increment: the value to add to base to get the new size
+ * @min: minumim cwnd value (usually MSS)
+ *
+ * Return the new cwnd size and ensures it does not exceed the Advertised
+ * Receiver Window size. It is wrap around safe.
+ * For details refer to Section 3.1 of RFC5681
+ *
+ * Return: new congestion window size in bytes
+ */
+static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min)
+{
+	u32 new_size = base + increment;
+
+	/* check for wrap-around */
+	if (new_size < base)
+		new_size = (u32)ULONG_MAX;
+
+	new_size = min_t(u32, new_size, BATADV_TP_AWND);
+
+	return max_t(u32, new_size, min);
+}
+
+/**
+ * batadv_tp_updated_cwnd - update the Congestion Windows
+ * @tp_vars: the private data of the current TP meter session
+ * @mss: maximum segment size of transmission
+ *
+ * 1) if the session is in Slow Start, the CWND has to be increased by 1
+ * MSS every unique received ACK
+ * 2) if the session is in Congestion Avoidance, the CWND has to be
+ * increased by MSS * MSS / CWND for every unique received ACK
+ */
+static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss)
+{
+	spin_lock_bh(&tp_vars->cwnd_lock);
+
+	/* slow start... */
+	if (tp_vars->cwnd <= tp_vars->ss_threshold) {
+		tp_vars->dec_cwnd = 0;
+		tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss);
+		spin_unlock_bh(&tp_vars->cwnd_lock);
+		return;
+	}
+
+	/* increment CWND at least of 1 (section 3.1 of RFC5681) */
+	tp_vars->dec_cwnd += max_t(u32, 1U << 3,
+				   ((mss * mss) << 6) / (tp_vars->cwnd << 3));
+	if (tp_vars->dec_cwnd < (mss << 3)) {
+		spin_unlock_bh(&tp_vars->cwnd_lock);
+		return;
+	}
+
+	tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss);
+	tp_vars->dec_cwnd = 0;
+
+	spin_unlock_bh(&tp_vars->cwnd_lock);
+}
+
+/**
+ * batadv_tp_update_rto - calculate new retransmission timeout
+ * @tp_vars: the private data of the current TP meter session
+ * @new_rtt: new roundtrip time in msec
+ */
+static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars,
+				 u32 new_rtt)
+{
+	long m = new_rtt;
+
+	/* RTT update
+	 * Details in Section 2.2 and 2.3 of RFC6298
+	 *
+	 * It's tricky to understand. Don't lose hair please.
+	 * Inspired by tcp_rtt_estimator() tcp_input.c
+	 */
+	if (tp_vars->srtt != 0) {
+		m -= (tp_vars->srtt >> 3); /* m is now error in rtt est */
+		tp_vars->srtt += m; /* rtt = 7/8 srtt + 1/8 new */
+		if (m < 0)
+			m = -m;
+
+		m -= (tp_vars->rttvar >> 2);
+		tp_vars->rttvar += m; /* mdev ~= 3/4 rttvar + 1/4 new */
+	} else {
+		/* first measure getting in */
+		tp_vars->srtt = m << 3;	/* take the measured time to be srtt */
+		tp_vars->rttvar = m << 1; /* new_rtt / 2 */
+	}
+
+	/* rto = srtt + 4 * rttvar.
+	 * rttvar is scaled by 4, therefore doesn't need to be multiplied
+	 */
+	tp_vars->rto = (tp_vars->srtt >> 3) + tp_vars->rttvar;
+}
+
+/**
+ * batadv_tp_batctl_notify - send client status result to client
+ * @reason: reason for tp meter session stop
+ * @dst: destination of tp_meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @start_time: start of transmission in jiffies
+ * @total_sent: bytes acked to the receiver
+ * @cookie: cookie of tp_meter session
+ */
+static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason,
+				    const u8 *dst, struct batadv_priv *bat_priv,
+				    unsigned long start_time, u64 total_sent,
+				    u32 cookie)
+{
+	u32 test_time;
+	u8 result;
+	u32 total_bytes;
+
+	if (!batadv_tp_is_error(reason)) {
+		result = BATADV_TP_REASON_COMPLETE;
+		test_time = jiffies_to_msecs(jiffies - start_time);
+		total_bytes = total_sent;
+	} else {
+		result = reason;
+		test_time = 0;
+		total_bytes = 0;
+	}
+
+	batadv_netlink_tpmeter_notify(bat_priv, dst, result, test_time,
+				      total_bytes, cookie);
+}
+
+/**
+ * batadv_tp_batctl_error_notify - send client error result to client
+ * @reason: reason for tp meter session stop
+ * @dst: destination of tp_meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cookie: cookie of tp_meter session
+ */
+static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
+					  const u8 *dst,
+					  struct batadv_priv *bat_priv,
+					  u32 cookie)
+{
+	batadv_tp_batctl_notify(reason, dst, bat_priv, 0, 0, cookie);
+}
+
+/**
+ * batadv_tp_list_find - find a tp_vars object in the global list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the other endpoint MAC address to look for
+ *
+ * Look for a tp_vars object matching dst as end_point and return it after
+ * having incremented the refcounter. Return NULL is not found
+ *
+ * Return: matching tp_vars or NULL when no tp_vars with @dst was found
+ */
+static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
+						  const u8 *dst)
+{
+	struct batadv_tp_vars *pos, *tp_vars = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) {
+		if (!batadv_compare_eth(pos->other_end, dst))
+			continue;
+
+		/* most of the time this function is invoked during the normal
+		 * process..it makes sens to pay more when the session is
+		 * finished and to speed the process up during the measurement
+		 */
+		if (unlikely(!kref_get_unless_zero(&pos->refcount)))
+			continue;
+
+		tp_vars = pos;
+		break;
+	}
+	rcu_read_unlock();
+
+	return tp_vars;
+}
+
+/**
+ * batadv_tp_list_find_session - find tp_vars session object in the global list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the other endpoint MAC address to look for
+ * @session: session identifier
+ *
+ * Look for a tp_vars object matching dst as end_point, session as tp meter
+ * session and return it after having incremented the refcounter. Return NULL
+ * is not found
+ *
+ * Return: matching tp_vars or NULL when no tp_vars was found
+ */
+static struct batadv_tp_vars *
+batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
+			    const u8 *session)
+{
+	struct batadv_tp_vars *pos, *tp_vars = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) {
+		if (!batadv_compare_eth(pos->other_end, dst))
+			continue;
+
+		if (memcmp(pos->session, session, sizeof(pos->session)) != 0)
+			continue;
+
+		/* most of the time this function is invoked during the normal
+		 * process..it makes sense to pay more when the session is
+		 * finished and to speed the process up during the measurement
+		 */
+		if (unlikely(!kref_get_unless_zero(&pos->refcount)))
+			continue;
+
+		tp_vars = pos;
+		break;
+	}
+	rcu_read_unlock();
+
+	return tp_vars;
+}
+
+/**
+ * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for
+ *  free after rcu grace period
+ * @ref: kref pointer of the batadv_tp_vars
+ */
+static void batadv_tp_vars_release(struct kref *ref)
+{
+	struct batadv_tp_vars *tp_vars;
+	struct batadv_tp_unacked *un, *safe;
+
+	tp_vars = container_of(ref, struct batadv_tp_vars, refcount);
+
+	/* lock should not be needed because this object is now out of any
+	 * context!
+	 */
+	spin_lock_bh(&tp_vars->unacked_lock);
+	list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+		list_del(&un->list);
+		kfree(un);
+	}
+	spin_unlock_bh(&tp_vars->unacked_lock);
+
+	kfree_rcu(tp_vars, rcu);
+}
+
+/**
+ * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly
+ *  release it
+ * @tp_vars: the private data of the current TP meter session to be free'd
+ */
+static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
+{
+	kref_put(&tp_vars->refcount, batadv_tp_vars_release);
+}
+
+/**
+ * batadv_tp_sender_cleanup - cleanup sender data and drop and timer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tp_vars: the private data of the current TP meter session to cleanup
+ */
+static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
+				     struct batadv_tp_vars *tp_vars)
+{
+	cancel_delayed_work(&tp_vars->finish_work);
+
+	spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
+	hlist_del_rcu(&tp_vars->list);
+	spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+
+	/* drop list reference */
+	batadv_tp_vars_put(tp_vars);
+
+	atomic_dec(&tp_vars->bat_priv->tp_num);
+
+	/* kill the timer and remove its reference */
+	del_timer_sync(&tp_vars->timer);
+	/* the worker might have rearmed itself therefore we kill it again. Note
+	 * that if the worker should run again before invoking the following
+	 * del_timer(), it would not re-arm itself once again because the status
+	 * is OFF now
+	 */
+	del_timer(&tp_vars->timer);
+	batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_sender_end - print info about ended session and inform client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
+				 struct batadv_tp_vars *tp_vars)
+{
+	u32 session_cookie;
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Test towards %pM finished..shutting down (reason=%d)\n",
+		   tp_vars->other_end, tp_vars->reason);
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n",
+		   tp_vars->srtt >> 3, tp_vars->rttvar >> 2, tp_vars->rto);
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Final values: cwnd=%u ss_threshold=%u\n",
+		   tp_vars->cwnd, tp_vars->ss_threshold);
+
+	session_cookie = batadv_tp_session_cookie(tp_vars->session,
+						  tp_vars->icmp_uid);
+
+	batadv_tp_batctl_notify(tp_vars->reason,
+				tp_vars->other_end,
+				bat_priv,
+				tp_vars->start_time,
+				atomic64_read(&tp_vars->tot_sent),
+				session_cookie);
+}
+
+/**
+ * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully
+ * @tp_vars: the private data of the current TP meter session
+ * @reason: reason for tp meter session stop
+ */
+static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars,
+				      enum batadv_tp_meter_reason reason)
+{
+	if (!atomic_dec_and_test(&tp_vars->sending))
+		return;
+
+	tp_vars->reason = reason;
+}
+
+/**
+ * batadv_tp_sender_finish - stop sender session after test_length was reached
+ * @work: delayed work reference of the related tp_vars
+ */
+static void batadv_tp_sender_finish(struct work_struct *work)
+{
+	struct delayed_work *delayed_work;
+	struct batadv_tp_vars *tp_vars;
+
+	delayed_work = to_delayed_work(work);
+	tp_vars = container_of(delayed_work, struct batadv_tp_vars,
+			       finish_work);
+
+	batadv_tp_sender_shutdown(tp_vars, BATADV_TP_REASON_COMPLETE);
+}
+
+/**
+ * batadv_tp_reset_sender_timer - reschedule the sender timer
+ * @tp_vars: the private TP meter data for this session
+ *
+ * Reschedule the timer using tp_vars->rto as delay
+ */
+static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
+{
+	/* most of the time this function is invoked while normal packet
+	 * reception...
+	 */
+	if (unlikely(atomic_read(&tp_vars->sending) == 0))
+		/* timer ref will be dropped in batadv_tp_sender_cleanup */
+		return;
+
+	mod_timer(&tp_vars->timer, jiffies + msecs_to_jiffies(tp_vars->rto));
+}
+
+/**
+ * batadv_tp_sender_timeout - timer that fires in case of packet loss
+ * @arg: address of the related tp_vars
+ *
+ * If fired it means that there was packet loss.
+ * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
+ * reset the cwnd to 3*MSS
+ */
+static void batadv_tp_sender_timeout(unsigned long arg)
+{
+	struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg;
+	struct batadv_priv *bat_priv = tp_vars->bat_priv;
+
+	if (atomic_read(&tp_vars->sending) == 0)
+		return;
+
+	/* if the user waited long enough...shutdown the test */
+	if (unlikely(tp_vars->rto >= BATADV_TP_MAX_RTO)) {
+		batadv_tp_sender_shutdown(tp_vars,
+					  BATADV_TP_REASON_DST_UNREACHABLE);
+		return;
+	}
+
+	/* RTO exponential backoff
+	 * Details in Section 5.5 of RFC6298
+	 */
+	tp_vars->rto <<= 1;
+
+	spin_lock_bh(&tp_vars->cwnd_lock);
+
+	tp_vars->ss_threshold = tp_vars->cwnd >> 1;
+	if (tp_vars->ss_threshold < BATADV_TP_PLEN * 2)
+		tp_vars->ss_threshold = BATADV_TP_PLEN * 2;
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Meter: RTO fired during test towards %pM! cwnd=%u new ss_thr=%u, resetting last_sent to %u\n",
+		   tp_vars->other_end, tp_vars->cwnd, tp_vars->ss_threshold,
+		   atomic_read(&tp_vars->last_acked));
+
+	tp_vars->cwnd = BATADV_TP_PLEN * 3;
+
+	spin_unlock_bh(&tp_vars->cwnd_lock);
+
+	/* resend the non-ACKed packets.. */
+	tp_vars->last_sent = atomic_read(&tp_vars->last_acked);
+	wake_up(&tp_vars->more_bytes);
+
+	batadv_tp_reset_sender_timer(tp_vars);
+}
+
+/**
+ * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes
+ * @tp_vars: the private TP meter data for this session
+ * @buf: Buffer to fill with bytes
+ * @nbytes: amount of pseudorandom bytes
+ */
+static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars,
+				     u8 *buf, size_t nbytes)
+{
+	u32 local_offset;
+	size_t bytes_inbuf;
+	size_t to_copy;
+	size_t pos = 0;
+
+	spin_lock_bh(&tp_vars->prerandom_lock);
+	local_offset = tp_vars->prerandom_offset;
+	tp_vars->prerandom_offset += nbytes;
+	tp_vars->prerandom_offset %= sizeof(batadv_tp_prerandom);
+	spin_unlock_bh(&tp_vars->prerandom_lock);
+
+	while (nbytes) {
+		local_offset %= sizeof(batadv_tp_prerandom);
+		bytes_inbuf = sizeof(batadv_tp_prerandom) - local_offset;
+		to_copy = min(nbytes, bytes_inbuf);
+
+		memcpy(&buf[pos], &batadv_tp_prerandom[local_offset], to_copy);
+		pos += to_copy;
+		nbytes -= to_copy;
+		local_offset = 0;
+	}
+}
+
+/**
+ * batadv_tp_send_msg - send a single message
+ * @tp_vars: the private TP meter data for this session
+ * @src: source mac address
+ * @orig_node: the originator of the destination
+ * @seqno: sequence number of this packet
+ * @len: length of the entire packet
+ * @session: session identifier
+ * @uid: local ICMP "socket" index
+ * @timestamp: timestamp in jiffies which is replied in ack
+ *
+ * Create and send a single TP Meter message.
+ *
+ * Return: 0 on success, BATADV_TP_REASON_DST_UNREACHABLE if the destination is
+ * not reachable, BATADV_TP_REASON_MEMORY_ERROR if the packet couldn't be
+ * allocated
+ */
+static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
+			      struct batadv_orig_node *orig_node,
+			      u32 seqno, size_t len, const u8 *session,
+			      int uid, u32 timestamp)
+{
+	struct batadv_icmp_tp_packet *icmp;
+	struct sk_buff *skb;
+	int r;
+	u8 *data;
+	size_t data_len;
+
+	skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
+	if (unlikely(!skb))
+		return BATADV_TP_REASON_MEMORY_ERROR;
+
+	skb_reserve(skb, ETH_HLEN);
+	icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+
+	/* fill the icmp header */
+	ether_addr_copy(icmp->dst, orig_node->orig);
+	ether_addr_copy(icmp->orig, src);
+	icmp->version = BATADV_COMPAT_VERSION;
+	icmp->packet_type = BATADV_ICMP;
+	icmp->ttl = BATADV_TTL;
+	icmp->msg_type = BATADV_TP;
+	icmp->uid = uid;
+
+	icmp->subtype = BATADV_TP_MSG;
+	memcpy(icmp->session, session, sizeof(icmp->session));
+	icmp->seqno = htonl(seqno);
+	icmp->timestamp = htonl(timestamp);
+
+	data_len = len - sizeof(*icmp);
+	data = (u8 *)skb_put(skb, data_len);
+	batadv_tp_fill_prerandom(tp_vars, data, data_len);
+
+	r = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (r == -1)
+		kfree_skb(skb);
+
+	if (r == NET_XMIT_SUCCESS)
+		return 0;
+
+	return BATADV_TP_REASON_CANT_SEND;
+}
+
+/**
+ * batadv_tp_recv_ack - ACK receiving function
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ *
+ * Process a received TP ACK packet
+ */
+static void batadv_tp_recv_ack(struct batadv_priv *bat_priv,
+			       const struct sk_buff *skb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_orig_node *orig_node = NULL;
+	const struct batadv_icmp_tp_packet *icmp;
+	struct batadv_tp_vars *tp_vars;
+	size_t packet_len, mss;
+	u32 rtt, recv_ack, cwnd;
+	unsigned char *dev_addr;
+
+	packet_len = BATADV_TP_PLEN;
+	mss = BATADV_TP_PLEN;
+	packet_len += sizeof(struct batadv_unicast_packet);
+
+	icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+	/* find the tp_vars */
+	tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+					      icmp->session);
+	if (unlikely(!tp_vars))
+		return;
+
+	if (unlikely(atomic_read(&tp_vars->sending) == 0))
+		goto out;
+
+	/* old ACK? silently drop it.. */
+	if (batadv_seq_before(ntohl(icmp->seqno),
+			      (u32)atomic_read(&tp_vars->last_acked)))
+		goto out;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (unlikely(!primary_if))
+		goto out;
+
+	orig_node = batadv_orig_hash_find(bat_priv, icmp->orig);
+	if (unlikely(!orig_node))
+		goto out;
+
+	/* update RTO with the new sampled RTT, if any */
+	rtt = jiffies_to_msecs(jiffies) - ntohl(icmp->timestamp);
+	if (icmp->timestamp && rtt)
+		batadv_tp_update_rto(tp_vars, rtt);
+
+	/* ACK for new data... reset the timer */
+	batadv_tp_reset_sender_timer(tp_vars);
+
+	recv_ack = ntohl(icmp->seqno);
+
+	/* check if this ACK is a duplicate */
+	if (atomic_read(&tp_vars->last_acked) == recv_ack) {
+		atomic_inc(&tp_vars->dup_acks);
+		if (atomic_read(&tp_vars->dup_acks) != 3)
+			goto out;
+
+		if (recv_ack >= tp_vars->recover)
+			goto out;
+
+		/* if this is the third duplicate ACK do Fast Retransmit */
+		batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr,
+				   orig_node, recv_ack, packet_len,
+				   icmp->session, icmp->uid,
+				   jiffies_to_msecs(jiffies));
+
+		spin_lock_bh(&tp_vars->cwnd_lock);
+
+		/* Fast Recovery */
+		tp_vars->fast_recovery = true;
+		/* Set recover to the last outstanding seqno when Fast Recovery
+		 * is entered. RFC6582, Section 3.2, step 1
+		 */
+		tp_vars->recover = tp_vars->last_sent;
+		tp_vars->ss_threshold = tp_vars->cwnd >> 1;
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: Fast Recovery, (cur cwnd=%u) ss_thr=%u last_sent=%u recv_ack=%u\n",
+			   tp_vars->cwnd, tp_vars->ss_threshold,
+			   tp_vars->last_sent, recv_ack);
+		tp_vars->cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 3 * mss,
+					       mss);
+		tp_vars->dec_cwnd = 0;
+		tp_vars->last_sent = recv_ack;
+
+		spin_unlock_bh(&tp_vars->cwnd_lock);
+	} else {
+		/* count the acked data */
+		atomic64_add(recv_ack - atomic_read(&tp_vars->last_acked),
+			     &tp_vars->tot_sent);
+		/* reset the duplicate ACKs counter */
+		atomic_set(&tp_vars->dup_acks, 0);
+
+		if (tp_vars->fast_recovery) {
+			/* partial ACK */
+			if (batadv_seq_before(recv_ack, tp_vars->recover)) {
+				/* this is another hole in the window. React
+				 * immediately as specified by NewReno (see
+				 * Section 3.2 of RFC6582 for details)
+				 */
+				dev_addr = primary_if->net_dev->dev_addr;
+				batadv_tp_send_msg(tp_vars, dev_addr,
+						   orig_node, recv_ack,
+						   packet_len, icmp->session,
+						   icmp->uid,
+						   jiffies_to_msecs(jiffies));
+				tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd,
+							       mss, mss);
+			} else {
+				tp_vars->fast_recovery = false;
+				/* set cwnd to the value of ss_threshold at the
+				 * moment that Fast Recovery was entered.
+				 * RFC6582, Section 3.2, step 3
+				 */
+				cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 0,
+						      mss);
+				tp_vars->cwnd = cwnd;
+			}
+			goto move_twnd;
+		}
+
+		if (recv_ack - atomic_read(&tp_vars->last_acked) >= mss)
+			batadv_tp_update_cwnd(tp_vars, mss);
+move_twnd:
+		/* move the Transmit Window */
+		atomic_set(&tp_vars->last_acked, recv_ack);
+	}
+
+	wake_up(&tp_vars->more_bytes);
+out:
+	if (likely(primary_if))
+		batadv_hardif_put(primary_if);
+	if (likely(orig_node))
+		batadv_orig_node_put(orig_node);
+	if (likely(tp_vars))
+		batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_avail - check if congestion window is not full
+ * @tp_vars: the private data of the current TP meter session
+ * @payload_len: size of the payload of a single message
+ *
+ * Return: true when congestion window is not full, false otherwise
+ */
+static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars,
+			    size_t payload_len)
+{
+	u32 win_left, win_limit;
+
+	win_limit = atomic_read(&tp_vars->last_acked) + tp_vars->cwnd;
+	win_left = win_limit - tp_vars->last_sent;
+
+	return win_left >= payload_len;
+}
+
+/**
+ * batadv_tp_wait_available - wait until congestion window becomes free or
+ *  timeout is reached
+ * @tp_vars: the private data of the current TP meter session
+ * @plen: size of the payload of a single message
+ *
+ * Return: 0 if the condition evaluated to false after the timeout elapsed,
+ *  1 if the condition evaluated to true after the timeout elapsed, the
+ *  remaining jiffies (at least 1) if the condition evaluated to true before
+ *  the timeout elapsed, or -ERESTARTSYS if it was interrupted by a signal.
+ */
+static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen)
+{
+	int ret;
+
+	ret = wait_event_interruptible_timeout(tp_vars->more_bytes,
+					       batadv_tp_avail(tp_vars, plen),
+					       HZ / 10);
+
+	return ret;
+}
+
+/**
+ * batadv_tp_send - main sending thread of a tp meter session
+ * @arg: address of the related tp_vars
+ *
+ * Return: nothing, this function never returns
+ */
+static int batadv_tp_send(void *arg)
+{
+	struct batadv_tp_vars *tp_vars = arg;
+	struct batadv_priv *bat_priv = tp_vars->bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_orig_node *orig_node = NULL;
+	size_t payload_len, packet_len;
+	int err = 0;
+
+	if (unlikely(tp_vars->role != BATADV_TP_SENDER)) {
+		err = BATADV_TP_REASON_DST_UNREACHABLE;
+		tp_vars->reason = err;
+		goto out;
+	}
+
+	orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end);
+	if (unlikely(!orig_node)) {
+		err = BATADV_TP_REASON_DST_UNREACHABLE;
+		tp_vars->reason = err;
+		goto out;
+	}
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (unlikely(!primary_if)) {
+		err = BATADV_TP_REASON_DST_UNREACHABLE;
+		goto out;
+	}
+
+	/* assume that all the hard_interfaces have a correctly
+	 * configured MTU, so use the soft_iface MTU as MSS.
+	 * This might not be true and in that case the fragmentation
+	 * should be used.
+	 * Now, try to send the packet as it is
+	 */
+	payload_len = BATADV_TP_PLEN;
+	BUILD_BUG_ON(sizeof(struct batadv_icmp_tp_packet) > BATADV_TP_PLEN);
+
+	batadv_tp_reset_sender_timer(tp_vars);
+
+	/* queue the worker in charge of terminating the test */
+	queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work,
+			   msecs_to_jiffies(tp_vars->test_length));
+
+	while (atomic_read(&tp_vars->sending) != 0) {
+		if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) {
+			batadv_tp_wait_available(tp_vars, payload_len);
+			continue;
+		}
+
+		/* to emulate normal unicast traffic, add to the payload len
+		 * the size of the unicast header
+		 */
+		packet_len = payload_len + sizeof(struct batadv_unicast_packet);
+
+		err = batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr,
+					 orig_node, tp_vars->last_sent,
+					 packet_len,
+					 tp_vars->session, tp_vars->icmp_uid,
+					 jiffies_to_msecs(jiffies));
+
+		/* something went wrong during the preparation/transmission */
+		if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) {
+			batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+				   "Meter: batadv_tp_send() cannot send packets (%d)\n",
+				   err);
+			/* ensure nobody else tries to stop the thread now */
+			if (atomic_dec_and_test(&tp_vars->sending))
+				tp_vars->reason = err;
+			break;
+		}
+
+		/* right-shift the TWND */
+		if (!err)
+			tp_vars->last_sent += payload_len;
+
+		cond_resched();
+	}
+
+out:
+	if (likely(primary_if))
+		batadv_hardif_put(primary_if);
+	if (likely(orig_node))
+		batadv_orig_node_put(orig_node);
+
+	batadv_tp_sender_end(bat_priv, tp_vars);
+	batadv_tp_sender_cleanup(bat_priv, tp_vars);
+
+	batadv_tp_vars_put(tp_vars);
+
+	do_exit(0);
+}
+
+/**
+ * batadv_tp_start_kthread - start new thread which manages the tp meter sender
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
+{
+	struct task_struct *kthread;
+	struct batadv_priv *bat_priv = tp_vars->bat_priv;
+	u32 session_cookie;
+
+	kref_get(&tp_vars->refcount);
+	kthread = kthread_create(batadv_tp_send, tp_vars, "kbatadv_tp_meter");
+	if (IS_ERR(kthread)) {
+		session_cookie = batadv_tp_session_cookie(tp_vars->session,
+							  tp_vars->icmp_uid);
+		pr_err("batadv: cannot create tp meter kthread\n");
+		batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
+					      tp_vars->other_end,
+					      bat_priv, session_cookie);
+
+		/* drop reserved reference for kthread */
+		batadv_tp_vars_put(tp_vars);
+
+		/* cleanup of failed tp meter variables */
+		batadv_tp_sender_cleanup(bat_priv, tp_vars);
+		return;
+	}
+
+	wake_up_process(kthread);
+}
+
+/**
+ * batadv_tp_start - start a new tp meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the receiver MAC address
+ * @test_length: test length in milliseconds
+ * @cookie: session cookie
+ */
+void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
+		     u32 test_length, u32 *cookie)
+{
+	struct batadv_tp_vars *tp_vars;
+	u8 session_id[2];
+	u8 icmp_uid;
+	u32 session_cookie;
+
+	get_random_bytes(session_id, sizeof(session_id));
+	get_random_bytes(&icmp_uid, 1);
+	session_cookie = batadv_tp_session_cookie(session_id, icmp_uid);
+	*cookie = session_cookie;
+
+	/* look for an already existing test towards this node */
+	spin_lock_bh(&bat_priv->tp_list_lock);
+	tp_vars = batadv_tp_list_find(bat_priv, dst);
+	if (tp_vars) {
+		spin_unlock_bh(&bat_priv->tp_list_lock);
+		batadv_tp_vars_put(tp_vars);
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: test to or from the same node already ongoing, aborting\n");
+		batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING,
+					      dst, bat_priv, session_cookie);
+		return;
+	}
+
+	if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) {
+		spin_unlock_bh(&bat_priv->tp_list_lock);
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: too many ongoing sessions, aborting (SEND)\n");
+		batadv_tp_batctl_error_notify(BATADV_TP_REASON_TOO_MANY, dst,
+					      bat_priv, session_cookie);
+		return;
+	}
+
+	tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC);
+	if (!tp_vars) {
+		spin_unlock_bh(&bat_priv->tp_list_lock);
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: batadv_tp_start cannot allocate list elements\n");
+		batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
+					      dst, bat_priv, session_cookie);
+		return;
+	}
+
+	/* initialize tp_vars */
+	ether_addr_copy(tp_vars->other_end, dst);
+	kref_init(&tp_vars->refcount);
+	tp_vars->role = BATADV_TP_SENDER;
+	atomic_set(&tp_vars->sending, 1);
+	memcpy(tp_vars->session, session_id, sizeof(session_id));
+	tp_vars->icmp_uid = icmp_uid;
+
+	tp_vars->last_sent = BATADV_TP_FIRST_SEQ;
+	atomic_set(&tp_vars->last_acked, BATADV_TP_FIRST_SEQ);
+	tp_vars->fast_recovery = false;
+	tp_vars->recover = BATADV_TP_FIRST_SEQ;
+
+	/* initialise the CWND to 3*MSS (Section 3.1 in RFC5681).
+	 * For batman-adv the MSS is the size of the payload received by the
+	 * soft_interface, hence its MTU
+	 */
+	tp_vars->cwnd = BATADV_TP_PLEN * 3;
+	/* at the beginning initialise the SS threshold to the biggest possible
+	 * window size, hence the AWND size
+	 */
+	tp_vars->ss_threshold = BATADV_TP_AWND;
+
+	/* RTO initial value is 3 seconds.
+	 * Details in Section 2.1 of RFC6298
+	 */
+	tp_vars->rto = 1000;
+	tp_vars->srtt = 0;
+	tp_vars->rttvar = 0;
+
+	atomic64_set(&tp_vars->tot_sent, 0);
+
+	kref_get(&tp_vars->refcount);
+	setup_timer(&tp_vars->timer, batadv_tp_sender_timeout,
+		    (unsigned long)tp_vars);
+
+	tp_vars->bat_priv = bat_priv;
+	tp_vars->start_time = jiffies;
+
+	init_waitqueue_head(&tp_vars->more_bytes);
+
+	spin_lock_init(&tp_vars->unacked_lock);
+	INIT_LIST_HEAD(&tp_vars->unacked_list);
+
+	spin_lock_init(&tp_vars->cwnd_lock);
+
+	tp_vars->prerandom_offset = 0;
+	spin_lock_init(&tp_vars->prerandom_lock);
+
+	kref_get(&tp_vars->refcount);
+	hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
+	spin_unlock_bh(&bat_priv->tp_list_lock);
+
+	tp_vars->test_length = test_length;
+	if (!tp_vars->test_length)
+		tp_vars->test_length = BATADV_TP_DEF_TEST_LENGTH;
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Meter: starting throughput meter towards %pM (length=%ums)\n",
+		   dst, test_length);
+
+	/* init work item for finished tp tests */
+	INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish);
+
+	/* start tp kthread. This way the write() call issued from userspace can
+	 * happily return and avoid to block
+	 */
+	batadv_tp_start_kthread(tp_vars);
+
+	/* don't return reference to new tp_vars */
+	batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_stop - stop currently running tp meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the receiver MAC address
+ * @return_value: reason for tp meter session stop
+ */
+void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
+		    u8 return_value)
+{
+	struct batadv_orig_node *orig_node;
+	struct batadv_tp_vars *tp_vars;
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Meter: stopping test towards %pM\n", dst);
+
+	orig_node = batadv_orig_hash_find(bat_priv, dst);
+	if (!orig_node)
+		return;
+
+	tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig);
+	if (!tp_vars) {
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: trying to interrupt an already over connection\n");
+		goto out;
+	}
+
+	batadv_tp_sender_shutdown(tp_vars, return_value);
+	batadv_tp_vars_put(tp_vars);
+out:
+	batadv_orig_node_put(orig_node);
+}
+
+/**
+ * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer
+ * @tp_vars: the private data of the current TP meter session
+ *
+ * start the receiver shutdown timer or reset it if already started
+ */
+static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
+{
+	mod_timer(&tp_vars->timer,
+		  jiffies + msecs_to_jiffies(BATADV_TP_RECV_TIMEOUT));
+}
+
+/**
+ * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
+ *  reached without received ack
+ * @arg: address of the related tp_vars
+ */
+static void batadv_tp_receiver_shutdown(unsigned long arg)
+{
+	struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg;
+	struct batadv_tp_unacked *un, *safe;
+	struct batadv_priv *bat_priv;
+
+	bat_priv = tp_vars->bat_priv;
+
+	/* if there is recent activity rearm the timer */
+	if (!batadv_has_timed_out(tp_vars->last_recv_time,
+				  BATADV_TP_RECV_TIMEOUT)) {
+		/* reset the receiver shutdown timer */
+		batadv_tp_reset_receiver_timer(tp_vars);
+		return;
+	}
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Shutting down for inactivity (more than %dms) from %pM\n",
+		   BATADV_TP_RECV_TIMEOUT, tp_vars->other_end);
+
+	spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
+	hlist_del_rcu(&tp_vars->list);
+	spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+
+	/* drop list reference */
+	batadv_tp_vars_put(tp_vars);
+
+	atomic_dec(&bat_priv->tp_num);
+
+	spin_lock_bh(&tp_vars->unacked_lock);
+	list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+		list_del(&un->list);
+		kfree(un);
+	}
+	spin_unlock_bh(&tp_vars->unacked_lock);
+
+	/* drop reference of timer */
+	batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_send_ack - send an ACK packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the mac address of the destination originator
+ * @seq: the sequence number to ACK
+ * @timestamp: the timestamp to echo back in the ACK
+ * @session: session identifier
+ * @socket_index: local ICMP socket identifier
+ *
+ * Return: 0 on success, a positive integer representing the reason of the
+ * failure otherwise
+ */
+static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
+			      u32 seq, __be32 timestamp, const u8 *session,
+			      int socket_index)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_orig_node *orig_node;
+	struct batadv_icmp_tp_packet *icmp;
+	struct sk_buff *skb;
+	int r, ret;
+
+	orig_node = batadv_orig_hash_find(bat_priv, dst);
+	if (unlikely(!orig_node)) {
+		ret = BATADV_TP_REASON_DST_UNREACHABLE;
+		goto out;
+	}
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (unlikely(!primary_if)) {
+		ret = BATADV_TP_REASON_DST_UNREACHABLE;
+		goto out;
+	}
+
+	skb = netdev_alloc_skb_ip_align(NULL, sizeof(*icmp) + ETH_HLEN);
+	if (unlikely(!skb)) {
+		ret = BATADV_TP_REASON_MEMORY_ERROR;
+		goto out;
+	}
+
+	skb_reserve(skb, ETH_HLEN);
+	icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+	icmp->packet_type = BATADV_ICMP;
+	icmp->version = BATADV_COMPAT_VERSION;
+	icmp->ttl = BATADV_TTL;
+	icmp->msg_type = BATADV_TP;
+	ether_addr_copy(icmp->dst, orig_node->orig);
+	ether_addr_copy(icmp->orig, primary_if->net_dev->dev_addr);
+	icmp->uid = socket_index;
+
+	icmp->subtype = BATADV_TP_ACK;
+	memcpy(icmp->session, session, sizeof(icmp->session));
+	icmp->seqno = htonl(seq);
+	icmp->timestamp = timestamp;
+
+	/* send the ack */
+	r = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (r == -1)
+		kfree_skb(skb);
+
+	if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+		ret = BATADV_TP_REASON_DST_UNREACHABLE;
+		goto out;
+	}
+	ret = 0;
+
+out:
+	if (likely(orig_node))
+		batadv_orig_node_put(orig_node);
+	if (likely(primary_if))
+		batadv_hardif_put(primary_if);
+
+	return ret;
+}
+
+/**
+ * batadv_tp_handle_out_of_order - store an out of order packet
+ * @tp_vars: the private data of the current TP meter session
+ * @skb: the buffer containing the received packet
+ *
+ * Store the out of order packet in the unacked list for late processing. This
+ * packets are kept in this list so that they can be ACKed at once as soon as
+ * all the previous packets have been received
+ *
+ * Return: true if the packed has been successfully processed, false otherwise
+ */
+static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars,
+					  const struct sk_buff *skb)
+{
+	const struct batadv_icmp_tp_packet *icmp;
+	struct batadv_tp_unacked *un, *new;
+	u32 payload_len;
+	bool added = false;
+
+	new = kmalloc(sizeof(*new), GFP_ATOMIC);
+	if (unlikely(!new))
+		return false;
+
+	icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+	new->seqno = ntohl(icmp->seqno);
+	payload_len = skb->len - sizeof(struct batadv_unicast_packet);
+	new->len = payload_len;
+
+	spin_lock_bh(&tp_vars->unacked_lock);
+	/* if the list is empty immediately attach this new object */
+	if (list_empty(&tp_vars->unacked_list)) {
+		list_add(&new->list, &tp_vars->unacked_list);
+		goto out;
+	}
+
+	/* otherwise loop over the list and either drop the packet because this
+	 * is a duplicate or store it at the right position.
+	 *
+	 * The iteration is done in the reverse way because it is likely that
+	 * the last received packet (the one being processed now) has a bigger
+	 * seqno than all the others already stored.
+	 */
+	list_for_each_entry_reverse(un, &tp_vars->unacked_list, list) {
+		/* check for duplicates */
+		if (new->seqno == un->seqno) {
+			if (new->len > un->len)
+				un->len = new->len;
+			kfree(new);
+			added = true;
+			break;
+		}
+
+		/* look for the right position */
+		if (batadv_seq_before(new->seqno, un->seqno))
+			continue;
+
+		/* as soon as an entry having a bigger seqno is found, the new
+		 * one is attached _after_ it. In this way the list is kept in
+		 * ascending order
+		 */
+		list_add_tail(&new->list, &un->list);
+		added = true;
+		break;
+	}
+
+	/* received packet with smallest seqno out of order; add it to front */
+	if (!added)
+		list_add(&new->list, &tp_vars->unacked_list);
+
+out:
+	spin_unlock_bh(&tp_vars->unacked_lock);
+
+	return true;
+}
+
+/**
+ * batadv_tp_ack_unordered - update number received bytes in current stream
+ *  without gaps
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars)
+{
+	struct batadv_tp_unacked *un, *safe;
+	u32 to_ack;
+
+	/* go through the unacked packet list and possibly ACK them as
+	 * well
+	 */
+	spin_lock_bh(&tp_vars->unacked_lock);
+	list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+		/* the list is ordered, therefore it is possible to stop as soon
+		 * there is a gap between the last acked seqno and the seqno of
+		 * the packet under inspection
+		 */
+		if (batadv_seq_before(tp_vars->last_recv, un->seqno))
+			break;
+
+		to_ack = un->seqno + un->len - tp_vars->last_recv;
+
+		if (batadv_seq_before(tp_vars->last_recv, un->seqno + un->len))
+			tp_vars->last_recv += to_ack;
+
+		list_del(&un->list);
+		kfree(un);
+	}
+	spin_unlock_bh(&tp_vars->unacked_lock);
+}
+
+/**
+ * batadv_tp_init_recv - return matching or create new receiver tp_vars
+ * @bat_priv: the bat priv with all the soft interface information
+ * @icmp: received icmp tp msg
+ *
+ * Return: corresponding tp_vars or NULL on errors
+ */
+static struct batadv_tp_vars *
+batadv_tp_init_recv(struct batadv_priv *bat_priv,
+		    const struct batadv_icmp_tp_packet *icmp)
+{
+	struct batadv_tp_vars *tp_vars;
+
+	spin_lock_bh(&bat_priv->tp_list_lock);
+	tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+					      icmp->session);
+	if (tp_vars)
+		goto out_unlock;
+
+	if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) {
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: too many ongoing sessions, aborting (RECV)\n");
+		goto out_unlock;
+	}
+
+	tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC);
+	if (!tp_vars)
+		goto out_unlock;
+
+	ether_addr_copy(tp_vars->other_end, icmp->orig);
+	tp_vars->role = BATADV_TP_RECEIVER;
+	memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session));
+	tp_vars->last_recv = BATADV_TP_FIRST_SEQ;
+	tp_vars->bat_priv = bat_priv;
+	kref_init(&tp_vars->refcount);
+
+	spin_lock_init(&tp_vars->unacked_lock);
+	INIT_LIST_HEAD(&tp_vars->unacked_list);
+
+	kref_get(&tp_vars->refcount);
+	hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
+
+	kref_get(&tp_vars->refcount);
+	setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown,
+		    (unsigned long)tp_vars);
+
+	batadv_tp_reset_receiver_timer(tp_vars);
+
+out_unlock:
+	spin_unlock_bh(&bat_priv->tp_list_lock);
+
+	return tp_vars;
+}
+
+/**
+ * batadv_tp_recv_msg - process a single data message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ *
+ * Process a received TP MSG packet
+ */
+static void batadv_tp_recv_msg(struct batadv_priv *bat_priv,
+			       const struct sk_buff *skb)
+{
+	const struct batadv_icmp_tp_packet *icmp;
+	struct batadv_tp_vars *tp_vars;
+	size_t packet_size;
+	u32 seqno;
+
+	icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+	seqno = ntohl(icmp->seqno);
+	/* check if this is the first seqno. This means that if the
+	 * first packet is lost, the tp meter does not work anymore!
+	 */
+	if (seqno == BATADV_TP_FIRST_SEQ) {
+		tp_vars = batadv_tp_init_recv(bat_priv, icmp);
+		if (!tp_vars) {
+			batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+				   "Meter: seqno != BATADV_TP_FIRST_SEQ cannot initiate connection\n");
+			goto out;
+		}
+	} else {
+		tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+						      icmp->session);
+		if (!tp_vars) {
+			batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+				   "Unexpected packet from %pM!\n",
+				   icmp->orig);
+			goto out;
+		}
+	}
+
+	if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) {
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: dropping packet: not expected (role=%u)\n",
+			   tp_vars->role);
+		goto out;
+	}
+
+	tp_vars->last_recv_time = jiffies;
+
+	/* if the packet is a duplicate, it may be the case that an ACK has been
+	 * lost. Resend the ACK
+	 */
+	if (batadv_seq_before(seqno, tp_vars->last_recv))
+		goto send_ack;
+
+	/* if the packet is out of order enqueue it */
+	if (ntohl(icmp->seqno) != tp_vars->last_recv) {
+		/* exit immediately (and do not send any ACK) if the packet has
+		 * not been enqueued correctly
+		 */
+		if (!batadv_tp_handle_out_of_order(tp_vars, skb))
+			goto out;
+
+		/* send a duplicate ACK */
+		goto send_ack;
+	}
+
+	/* if everything was fine count the ACKed bytes */
+	packet_size = skb->len - sizeof(struct batadv_unicast_packet);
+	tp_vars->last_recv += packet_size;
+
+	/* check if this ordered message filled a gap.... */
+	batadv_tp_ack_unordered(tp_vars);
+
+send_ack:
+	/* send the ACK. If the received packet was out of order, the ACK that
+	 * is going to be sent is a duplicate (the sender will count them and
+	 * possibly enter Fast Retransmit as soon as it has reached 3)
+	 */
+	batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv,
+			   icmp->timestamp, icmp->session, icmp->uid);
+out:
+	if (likely(tp_vars))
+		batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_meter_recv - main TP Meter receiving function
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ */
+void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
+{
+	struct batadv_icmp_tp_packet *icmp;
+
+	icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+	switch (icmp->subtype) {
+	case BATADV_TP_MSG:
+		batadv_tp_recv_msg(bat_priv, skb);
+		break;
+	case BATADV_TP_ACK:
+		batadv_tp_recv_ack(bat_priv, skb);
+		break;
+	default:
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Received unknown TP Metric packet type %u\n",
+			   icmp->subtype);
+	}
+	consume_skb(skb);
+}
+
+/**
+ * batadv_tp_meter_init - initialize global tp_meter structures
+ */
+void batadv_tp_meter_init(void)
+{
+	get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom));
+}
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
new file mode 100644
index 000000000000..ba922c425e56
--- /dev/null
+++ b/net/batman-adv/tp_meter.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
+ *
+ * Edo Monticelli, Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_TP_METER_H_
+#define _NET_BATMAN_ADV_TP_METER_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+struct sk_buff;
+
+void batadv_tp_meter_init(void);
+void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
+		     u32 test_length, u32 *cookie);
+void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
+		    u8 return_value);
+void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
+#endif /* _NET_BATMAN_ADV_TP_METER_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index feaf492b01ca..7e6df7a4964a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -47,10 +47,12 @@
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "multicast.h"
 #include "originator.h"
 #include "packet.h"
 #include "soft-interface.h"
+#include "tvlv.h"
 
 /* hash class keys */
 static struct lock_class_key batadv_tt_local_hash_lock_class_key;
@@ -650,8 +652,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 
 	/* increase the refcounter of the related vlan */
 	vlan = batadv_softif_vlan_get(bat_priv, vid);
-	if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d",
-		 addr, BATADV_PRINT_VID(vid))) {
+	if (!vlan) {
+		net_ratelimited_function(batadv_info, soft_iface,
+					 "adding TT local entry %pM to non-existent VLAN %d\n",
+					 addr, BATADV_PRINT_VID(vid));
 		kfree(tt_local);
 		tt_local = NULL;
 		goto out;
@@ -691,7 +695,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 	if (unlikely(hash_added != 0)) {
 		/* remove the reference for the hash */
 		batadv_tt_local_entry_put(tt_local);
-		batadv_softif_vlan_put(vlan);
 		goto out;
 	}
 
@@ -995,7 +998,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_tt_local_entry *tt_local;
 	struct batadv_hard_iface *primary_if;
 	struct hlist_head *head;
-	unsigned short vid;
 	u32 i;
 	int last_seen_secs;
 	int last_seen_msecs;
@@ -1022,7 +1024,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 			tt_local = container_of(tt_common_entry,
 						struct batadv_tt_local_entry,
 						common);
-			vid = tt_common_entry->vid;
 			last_seen_jiffies = jiffies - tt_local->last_seen;
 			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
 			last_seen_secs = last_seen_msecs / 1000;
@@ -1546,7 +1547,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
 			    struct batadv_tt_global_entry *tt_global_entry)
 {
 	struct batadv_neigh_node *router, *best_router = NULL;
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 	struct hlist_head *head;
 	struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
 
@@ -1558,8 +1559,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
 			continue;
 
 		if (best_router &&
-		    bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT,
-				       best_router, BATADV_IF_DEFAULT) <= 0) {
+		    bao->neigh.cmp(router, BATADV_IF_DEFAULT, best_router,
+				   BATADV_IF_DEFAULT) <= 0) {
 			batadv_neigh_node_put(router);
 			continue;
 		}
@@ -2269,6 +2270,29 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
 	return crc;
 }
 
+/**
+ * batadv_tt_req_node_release - free tt_req node entry
+ * @ref: kref pointer of the tt req_node entry
+ */
+static void batadv_tt_req_node_release(struct kref *ref)
+{
+	struct batadv_tt_req_node *tt_req_node;
+
+	tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
+
+	kfree(tt_req_node);
+}
+
+/**
+ * batadv_tt_req_node_put - decrement the tt_req_node refcounter and
+ *  possibly release it
+ * @tt_req_node: tt_req_node to be free'd
+ */
+static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node)
+{
+	kref_put(&tt_req_node->refcount, batadv_tt_req_node_release);
+}
+
 static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
 {
 	struct batadv_tt_req_node *node;
@@ -2278,7 +2302,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
 
 	hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
 		hlist_del_init(&node->list);
-		kfree(node);
+		batadv_tt_req_node_put(node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2315,7 +2339,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
 		if (batadv_has_timed_out(node->issued_at,
 					 BATADV_TT_REQUEST_TIMEOUT)) {
 			hlist_del_init(&node->list);
-			kfree(node);
+			batadv_tt_req_node_put(node);
 		}
 	}
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2347,9 +2371,11 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv,
 	if (!tt_req_node)
 		goto unlock;
 
+	kref_init(&tt_req_node->refcount);
 	ether_addr_copy(tt_req_node->addr, orig_node->orig);
 	tt_req_node->issued_at = jiffies;
 
+	kref_get(&tt_req_node->refcount);
 	hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list);
 unlock:
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2613,13 +2639,19 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
 out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
+
 	if (ret && tt_req_node) {
 		spin_lock_bh(&bat_priv->tt.req_list_lock);
-		/* hlist_del_init() verifies tt_req_node still is in the list */
-		hlist_del_init(&tt_req_node->list);
+		if (!hlist_unhashed(&tt_req_node->list)) {
+			hlist_del_init(&tt_req_node->list);
+			batadv_tt_req_node_put(tt_req_node);
+		}
 		spin_unlock_bh(&bat_priv->tt.req_list_lock);
-		kfree(tt_req_node);
 	}
+
+	if (tt_req_node)
+		batadv_tt_req_node_put(tt_req_node);
+
 	kfree(tvlv_tt_data);
 	return ret;
 }
@@ -3055,7 +3087,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
 		if (!batadv_compare_eth(node->addr, resp_src))
 			continue;
 		hlist_del_init(&node->list);
-		kfree(node);
+		batadv_tt_req_node_put(node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
new file mode 100644
index 000000000000..3d1cf0fb112d
--- /dev/null
+++ b/net/batman-adv/tvlv.c
@@ -0,0 +1,632 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+#include <linux/byteorder/generic.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+#include "tvlv.h"
+
+/**
+ * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
+ *  free after rcu grace period
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_handler_release(struct kref *ref)
+{
+	struct batadv_tvlv_handler *tvlv_handler;
+
+	tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
+	kfree_rcu(tvlv_handler, rcu);
+}
+
+/**
+ * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
+ *  possibly release it
+ * @tvlv_handler: the tvlv handler to free
+ */
+static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
+{
+	kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
+}
+
+/**
+ * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
+ *  based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to look for
+ * @version: tvlv handler version to look for
+ *
+ * Return: tvlv handler if found or NULL otherwise.
+ */
+static struct batadv_tvlv_handler *
+batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+{
+	struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tvlv_handler_tmp,
+				 &bat_priv->tvlv.handler_list, list) {
+		if (tvlv_handler_tmp->type != type)
+			continue;
+
+		if (tvlv_handler_tmp->version != version)
+			continue;
+
+		if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
+			continue;
+
+		tvlv_handler = tvlv_handler_tmp;
+		break;
+	}
+	rcu_read_unlock();
+
+	return tvlv_handler;
+}
+
+/**
+ * batadv_tvlv_container_release - release tvlv from lists and free
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_container_release(struct kref *ref)
+{
+	struct batadv_tvlv_container *tvlv;
+
+	tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
+	kfree(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_put - decrement the tvlv container refcounter and
+ *  possibly release it
+ * @tvlv: the tvlv container to free
+ */
+static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
+{
+	kref_put(&tvlv->refcount, batadv_tvlv_container_release);
+}
+
+/**
+ * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
+ *  list based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to look for
+ * @version: tvlv container version to look for
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Return: tvlv container if found or NULL otherwise.
+ */
+static struct batadv_tvlv_container *
+batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+{
+	struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
+
+	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+	hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
+		if (tvlv_tmp->tvlv_hdr.type != type)
+			continue;
+
+		if (tvlv_tmp->tvlv_hdr.version != version)
+			continue;
+
+		kref_get(&tvlv_tmp->refcount);
+		tvlv = tvlv_tmp;
+		break;
+	}
+
+	return tvlv;
+}
+
+/**
+ * batadv_tvlv_container_list_size - calculate the size of the tvlv container
+ *  list entries
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Return: size of all currently registered tvlv containers in bytes.
+ */
+static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
+{
+	struct batadv_tvlv_container *tvlv;
+	u16 tvlv_len = 0;
+
+	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+	hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+		tvlv_len += sizeof(struct batadv_tvlv_hdr);
+		tvlv_len += ntohs(tvlv->tvlv_hdr.len);
+	}
+
+	return tvlv_len;
+}
+
+/**
+ * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
+ *  list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv: the to be removed tvlv container
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ */
+static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
+					 struct batadv_tvlv_container *tvlv)
+{
+	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+	if (!tvlv)
+		return;
+
+	hlist_del(&tvlv->list);
+
+	/* first call to decrement the counter, second call to free */
+	batadv_tvlv_container_put(tvlv);
+	batadv_tvlv_container_put(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_unregister - unregister tvlv container based on the
+ *  provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to unregister
+ * @version: tvlv container type to unregister
+ */
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+				      u8 type, u8 version)
+{
+	struct batadv_tvlv_container *tvlv;
+
+	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+	tvlv = batadv_tvlv_container_get(bat_priv, type, version);
+	batadv_tvlv_container_remove(bat_priv, tvlv);
+	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_container_register - register tvlv type, version and content
+ *  to be propagated with each (primary interface) OGM
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type
+ * @version: tvlv container version
+ * @tvlv_value: tvlv container content
+ * @tvlv_value_len: tvlv container content length
+ *
+ * If a container of the same type and version was already registered the new
+ * content is going to replace the old one.
+ */
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+				    u8 type, u8 version,
+				    void *tvlv_value, u16 tvlv_value_len)
+{
+	struct batadv_tvlv_container *tvlv_old, *tvlv_new;
+
+	if (!tvlv_value)
+		tvlv_value_len = 0;
+
+	tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
+	if (!tvlv_new)
+		return;
+
+	tvlv_new->tvlv_hdr.version = version;
+	tvlv_new->tvlv_hdr.type = type;
+	tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
+
+	memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
+	INIT_HLIST_NODE(&tvlv_new->list);
+	kref_init(&tvlv_new->refcount);
+
+	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+	tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
+	batadv_tvlv_container_remove(bat_priv, tvlv_old);
+	hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
+	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
+ *  requested packet size
+ * @packet_buff: packet buffer
+ * @packet_buff_len: packet buffer size
+ * @min_packet_len: requested packet minimum size
+ * @additional_packet_len: requested additional packet size on top of minimum
+ *  size
+ *
+ * Return: true of the packet buffer could be changed to the requested size,
+ * false otherwise.
+ */
+static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
+					    int *packet_buff_len,
+					    int min_packet_len,
+					    int additional_packet_len)
+{
+	unsigned char *new_buff;
+
+	new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
+
+	/* keep old buffer if kmalloc should fail */
+	if (!new_buff)
+		return false;
+
+	memcpy(new_buff, *packet_buff, min_packet_len);
+	kfree(*packet_buff);
+	*packet_buff = new_buff;
+	*packet_buff_len = min_packet_len + additional_packet_len;
+
+	return true;
+}
+
+/**
+ * batadv_tvlv_container_ogm_append - append tvlv container content to given
+ *  OGM packet buffer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @packet_buff: ogm packet buffer
+ * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
+ *  content
+ * @packet_min_len: ogm header size to be preserved for the OGM itself
+ *
+ * The ogm packet might be enlarged or shrunk depending on the current size
+ * and the size of the to-be-appended tvlv containers.
+ *
+ * Return: size of all appended tvlv containers in bytes.
+ */
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+				     unsigned char **packet_buff,
+				     int *packet_buff_len, int packet_min_len)
+{
+	struct batadv_tvlv_container *tvlv;
+	struct batadv_tvlv_hdr *tvlv_hdr;
+	u16 tvlv_value_len;
+	void *tvlv_value;
+	bool ret;
+
+	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+	tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
+
+	ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
+					      packet_min_len, tvlv_value_len);
+
+	if (!ret)
+		goto end;
+
+	if (!tvlv_value_len)
+		goto end;
+
+	tvlv_value = (*packet_buff) + packet_min_len;
+
+	hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+		tvlv_hdr = tvlv_value;
+		tvlv_hdr->type = tvlv->tvlv_hdr.type;
+		tvlv_hdr->version = tvlv->tvlv_hdr.version;
+		tvlv_hdr->len = tvlv->tvlv_hdr.len;
+		tvlv_value = tvlv_hdr + 1;
+		memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
+		tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
+	}
+
+end:
+	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+	return tvlv_value_len;
+}
+
+/**
+ * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
+ *  appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv_handler: tvlv callback function handling the tvlv content
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Return: success if handler was not found or the return value of the handler
+ * callback.
+ */
+static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
+				    struct batadv_tvlv_handler *tvlv_handler,
+				    bool ogm_source,
+				    struct batadv_orig_node *orig_node,
+				    u8 *src, u8 *dst,
+				    void *tvlv_value, u16 tvlv_value_len)
+{
+	if (!tvlv_handler)
+		return NET_RX_SUCCESS;
+
+	if (ogm_source) {
+		if (!tvlv_handler->ogm_handler)
+			return NET_RX_SUCCESS;
+
+		if (!orig_node)
+			return NET_RX_SUCCESS;
+
+		tvlv_handler->ogm_handler(bat_priv, orig_node,
+					  BATADV_NO_FLAGS,
+					  tvlv_value, tvlv_value_len);
+		tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
+	} else {
+		if (!src)
+			return NET_RX_SUCCESS;
+
+		if (!dst)
+			return NET_RX_SUCCESS;
+
+		if (!tvlv_handler->unicast_handler)
+			return NET_RX_SUCCESS;
+
+		return tvlv_handler->unicast_handler(bat_priv, src,
+						     dst, tvlv_value,
+						     tvlv_value_len);
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
+ *  appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Return: success when processing an OGM or the return value of all called
+ * handler callbacks.
+ */
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+				   bool ogm_source,
+				   struct batadv_orig_node *orig_node,
+				   u8 *src, u8 *dst,
+				   void *tvlv_value, u16 tvlv_value_len)
+{
+	struct batadv_tvlv_handler *tvlv_handler;
+	struct batadv_tvlv_hdr *tvlv_hdr;
+	u16 tvlv_value_cont_len;
+	u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
+	int ret = NET_RX_SUCCESS;
+
+	while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
+		tvlv_hdr = tvlv_value;
+		tvlv_value_cont_len = ntohs(tvlv_hdr->len);
+		tvlv_value = tvlv_hdr + 1;
+		tvlv_value_len -= sizeof(*tvlv_hdr);
+
+		if (tvlv_value_cont_len > tvlv_value_len)
+			break;
+
+		tvlv_handler = batadv_tvlv_handler_get(bat_priv,
+						       tvlv_hdr->type,
+						       tvlv_hdr->version);
+
+		ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
+						ogm_source, orig_node,
+						src, dst, tvlv_value,
+						tvlv_value_cont_len);
+		if (tvlv_handler)
+			batadv_tvlv_handler_put(tvlv_handler);
+		tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
+		tvlv_value_len -= tvlv_value_cont_len;
+	}
+
+	if (!ogm_source)
+		return ret;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tvlv_handler,
+				 &bat_priv->tvlv.handler_list, list) {
+		if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
+		    !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
+			tvlv_handler->ogm_handler(bat_priv, orig_node,
+						  cifnotfound, NULL, 0);
+
+		tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
+	}
+	rcu_read_unlock();
+
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
+ *  handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @batadv_ogm_packet: ogm packet containing the tvlv containers
+ * @orig_node: orig node emitting the ogm packet
+ */
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+			     struct batadv_ogm_packet *batadv_ogm_packet,
+			     struct batadv_orig_node *orig_node)
+{
+	void *tvlv_value;
+	u16 tvlv_value_len;
+
+	if (!batadv_ogm_packet)
+		return;
+
+	tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
+	if (!tvlv_value_len)
+		return;
+
+	tvlv_value = batadv_ogm_packet + 1;
+
+	batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
+				       tvlv_value, tvlv_value_len);
+}
+
+/**
+ * batadv_tvlv_handler_register - register tvlv handler based on the provided
+ *  type and version (both need to match) for ogm tvlv payload and/or unicast
+ *  payload
+ * @bat_priv: the bat priv with all the soft interface information
+ * @optr: ogm tvlv handler callback function. This function receives the orig
+ *  node, flags and the tvlv content as argument to process.
+ * @uptr: unicast tvlv handler callback function. This function receives the
+ *  source & destination of the unicast packet as well as the tvlv content
+ *  to process.
+ * @type: tvlv handler type to be registered
+ * @version: tvlv handler version to be registered
+ * @flags: flags to enable or disable TVLV API behavior
+ */
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+				  void (*optr)(struct batadv_priv *bat_priv,
+					       struct batadv_orig_node *orig,
+					       u8 flags,
+					       void *tvlv_value,
+					       u16 tvlv_value_len),
+				  int (*uptr)(struct batadv_priv *bat_priv,
+					      u8 *src, u8 *dst,
+					      void *tvlv_value,
+					      u16 tvlv_value_len),
+				  u8 type, u8 version, u8 flags)
+{
+	struct batadv_tvlv_handler *tvlv_handler;
+
+	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+	if (tvlv_handler) {
+		batadv_tvlv_handler_put(tvlv_handler);
+		return;
+	}
+
+	tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
+	if (!tvlv_handler)
+		return;
+
+	tvlv_handler->ogm_handler = optr;
+	tvlv_handler->unicast_handler = uptr;
+	tvlv_handler->type = type;
+	tvlv_handler->version = version;
+	tvlv_handler->flags = flags;
+	kref_init(&tvlv_handler->refcount);
+	INIT_HLIST_NODE(&tvlv_handler->list);
+
+	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+	hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
+	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+}
+
+/**
+ * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
+ *  provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to be unregistered
+ * @version: tvlv handler version to be unregistered
+ */
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+				    u8 type, u8 version)
+{
+	struct batadv_tvlv_handler *tvlv_handler;
+
+	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+	if (!tvlv_handler)
+		return;
+
+	batadv_tvlv_handler_put(tvlv_handler);
+	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+	hlist_del_rcu(&tvlv_handler->list);
+	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+	batadv_tvlv_handler_put(tvlv_handler);
+}
+
+/**
+ * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
+ *  specified host
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @type: tvlv type
+ * @version: tvlv version
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ */
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+			      u8 *dst, u8 type, u8 version,
+			      void *tvlv_value, u16 tvlv_value_len)
+{
+	struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
+	struct batadv_tvlv_hdr *tvlv_hdr;
+	struct batadv_orig_node *orig_node;
+	struct sk_buff *skb;
+	unsigned char *tvlv_buff;
+	unsigned int tvlv_len;
+	ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
+	int res;
+
+	orig_node = batadv_orig_hash_find(bat_priv, dst);
+	if (!orig_node)
+		return;
+
+	tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
+
+	skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
+	if (!skb)
+		goto out;
+
+	skb->priority = TC_PRIO_CONTROL;
+	skb_reserve(skb, ETH_HLEN);
+	tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
+	unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
+	unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV;
+	unicast_tvlv_packet->version = BATADV_COMPAT_VERSION;
+	unicast_tvlv_packet->ttl = BATADV_TTL;
+	unicast_tvlv_packet->reserved = 0;
+	unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
+	unicast_tvlv_packet->align = 0;
+	ether_addr_copy(unicast_tvlv_packet->src, src);
+	ether_addr_copy(unicast_tvlv_packet->dst, dst);
+
+	tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
+	tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
+	tvlv_hdr->version = version;
+	tvlv_hdr->type = type;
+	tvlv_hdr->len = htons(tvlv_value_len);
+	tvlv_buff += sizeof(*tvlv_hdr);
+	memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
+
+	res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (res == -1)
+		kfree_skb(skb);
+out:
+	batadv_orig_node_put(orig_node);
+}
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
new file mode 100644
index 000000000000..e4369b547b43
--- /dev/null
+++ b/net/batman-adv/tvlv.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_TVLV_H_
+#define _NET_BATMAN_ADV_TVLV_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_ogm_packet;
+
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+				    u8 type, u8 version,
+				    void *tvlv_value, u16 tvlv_value_len);
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+				     unsigned char **packet_buff,
+				     int *packet_buff_len, int packet_min_len);
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+			     struct batadv_ogm_packet *batadv_ogm_packet,
+			     struct batadv_orig_node *orig_node);
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+				      u8 type, u8 version);
+
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+				  void (*optr)(struct batadv_priv *bat_priv,
+					       struct batadv_orig_node *orig,
+					       u8 flags,
+					       void *tvlv_value,
+					       u16 tvlv_value_len),
+				  int (*uptr)(struct batadv_priv *bat_priv,
+					      u8 *src, u8 *dst,
+					      void *tvlv_value,
+					      u16 tvlv_value_len),
+				  u8 type, u8 version, u8 flags);
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+				    u8 type, u8 version);
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+				   bool ogm_source,
+				   struct batadv_orig_node *orig_node,
+				   u8 *src, u8 *dst,
+				   void *tvlv_buff, u16 tvlv_buff_len);
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+			      u8 *dst, u8 type, u8 version,
+			      void *tvlv_value, u16 tvlv_value_len);
+
+#endif /* _NET_BATMAN_ADV_TVLV_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 6a577f4f8ba7..a64522c3b45d 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -33,6 +33,7 @@
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "packet.h"
 
@@ -330,7 +331,9 @@ struct batadv_orig_node {
 	DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
 	u32 last_bcast_seqno;
 	struct hlist_head neigh_list;
-	/* neigh_list_lock protects: neigh_list and router */
+	/* neigh_list_lock protects: neigh_list, ifinfo_list,
+	 * last_bonding_candidate and router
+	 */
 	spinlock_t neigh_list_lock;
 	struct hlist_node hash_entry;
 	struct batadv_priv *bat_priv;
@@ -707,6 +710,8 @@ struct batadv_priv_debug_log {
  * @list: list of available gateway nodes
  * @list_lock: lock protecting gw_list & curr_gw
  * @curr_gw: pointer to currently selected gateway node
+ * @mode: gateway operation: off, client or server (see batadv_gw_modes)
+ * @sel_class: gateway selection class (applies if gw_mode client)
  * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server)
  * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server)
  * @reselect: bool indicating a gateway re-selection is in progress
@@ -715,6 +720,8 @@ struct batadv_priv_gw {
 	struct hlist_head list;
 	spinlock_t list_lock; /* protects gw_list & curr_gw */
 	struct batadv_gw_node __rcu *curr_gw;  /* rcu protected pointer */
+	atomic_t mode;
+	atomic_t sel_class;
 	atomic_t bandwidth_down;
 	atomic_t bandwidth_up;
 	atomic_t reselect;
@@ -750,6 +757,17 @@ struct batadv_priv_dat {
 #endif
 
 #ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged
+ * @exists: whether a querier exists in the mesh
+ * @shadowing: if a querier exists, whether it is potentially shadowing
+ *  multicast listeners (i.e. querier is behind our own bridge segment)
+ */
+struct batadv_mcast_querier_state {
+	bool exists;
+	bool shadowing;
+};
+
 /**
  * struct batadv_priv_mcast - per mesh interface mcast data
  * @mla_list: list of multicast addresses we are currently announcing via TT
@@ -757,8 +775,11 @@ struct batadv_priv_dat {
  *  multicast traffic
  * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic
  * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic
+ * @querier_ipv4: the current state of an IGMP querier in the mesh
+ * @querier_ipv6: the current state of an MLD querier in the mesh
  * @flags: the flags we have last sent in our mcast tvlv
  * @enabled: whether the multicast tvlv is currently enabled
+ * @bridged: whether the soft interface has a bridge on top
  * @num_disabled: number of nodes that have no mcast tvlv
  * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic
  * @num_want_all_ipv4: counter for items in want_all_ipv4_list
@@ -771,8 +792,11 @@ struct batadv_priv_mcast {
 	struct hlist_head want_all_unsnoopables_list;
 	struct hlist_head want_all_ipv4_list;
 	struct hlist_head want_all_ipv6_list;
+	struct batadv_mcast_querier_state querier_ipv4;
+	struct batadv_mcast_querier_state querier_ipv6;
 	u8 flags;
 	bool enabled;
+	bool bridged;
 	atomic_t num_disabled;
 	atomic_t num_want_all_unsnoopables;
 	atomic_t num_want_all_ipv4;
@@ -811,6 +835,111 @@ struct batadv_priv_nc {
 	struct batadv_hashtable *decoding_hash;
 };
 
+/**
+ * struct batadv_tp_unacked - unacked packet meta-information
+ * @seqno: seqno of the unacked packet
+ * @len: length of the packet
+ * @list: list node for batadv_tp_vars::unacked_list
+ *
+ * This struct is supposed to represent a buffer unacked packet. However, since
+ * the purpose of the TP meter is to count the traffic only, there is no need to
+ * store the entire sk_buff, the starting offset and the length are enough
+ */
+struct batadv_tp_unacked {
+	u32 seqno;
+	u16 len;
+	struct list_head list;
+};
+
+/**
+ * enum batadv_tp_meter_role - Modus in tp meter session
+ * @BATADV_TP_RECEIVER: Initialized as receiver
+ * @BATADV_TP_SENDER: Initialized as sender
+ */
+enum batadv_tp_meter_role {
+	BATADV_TP_RECEIVER,
+	BATADV_TP_SENDER
+};
+
+/**
+ * struct batadv_tp_vars - tp meter private variables per session
+ * @list: list node for bat_priv::tp_list
+ * @timer: timer for ack (receiver) and retry (sender)
+ * @bat_priv: pointer to the mesh object
+ * @start_time: start time in jiffies
+ * @other_end: mac address of remote
+ * @role: receiver/sender modi
+ * @sending: sending binary semaphore: 1 if sending, 0 is not
+ * @reason: reason for a stopped session
+ * @finish_work: work item for the finishing procedure
+ * @test_length: test length in milliseconds
+ * @session: TP session identifier
+ * @icmp_uid: local ICMP "socket" index
+ * @dec_cwnd: decimal part of the cwnd used during linear growth
+ * @cwnd: current size of the congestion window
+ * @cwnd_lock: lock do protect @cwnd & @dec_cwnd
+ * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the
+ *  connection switches to the Congestion Avoidance state
+ * @last_acked: last acked byte
+ * @last_sent: last sent byte, not yet acked
+ * @tot_sent: amount of data sent/ACKed so far
+ * @dup_acks: duplicate ACKs counter
+ * @fast_recovery: true if in Fast Recovery mode
+ * @recover: last sent seqno when entering Fast Recovery
+ * @rto: sender timeout
+ * @srtt: smoothed RTT scaled by 2^3
+ * @rttvar: RTT variation scaled by 2^2
+ * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout
+ * @prerandom_offset: offset inside the prerandom buffer
+ * @prerandom_lock: spinlock protecting access to prerandom_offset
+ * @last_recv: last in-order received packet
+ * @unacked_list: list of unacked packets (meta-info only)
+ * @unacked_lock: protect unacked_list
+ * @last_recv_time: time time (jiffies) a msg was received
+ * @refcount: number of context where the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+struct batadv_tp_vars {
+	struct hlist_node list;
+	struct timer_list timer;
+	struct batadv_priv *bat_priv;
+	unsigned long start_time;
+	u8 other_end[ETH_ALEN];
+	enum batadv_tp_meter_role role;
+	atomic_t sending;
+	enum batadv_tp_meter_reason reason;
+	struct delayed_work finish_work;
+	u32 test_length;
+	u8 session[2];
+	u8 icmp_uid;
+
+	/* sender variables */
+	u16 dec_cwnd;
+	u32 cwnd;
+	spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */
+	u32 ss_threshold;
+	atomic_t last_acked;
+	u32 last_sent;
+	atomic64_t tot_sent;
+	atomic_t dup_acks;
+	bool fast_recovery;
+	u32 recover;
+	u32 rto;
+	u32 srtt;
+	u32 rttvar;
+	wait_queue_head_t more_bytes;
+	u32 prerandom_offset;
+	spinlock_t prerandom_lock; /* Protects prerandom_offset */
+
+	/* receiver variables */
+	u32 last_recv;
+	struct list_head unacked_list;
+	spinlock_t unacked_lock; /* Protects unacked_list */
+	unsigned long last_recv_time;
+	struct kref refcount;
+	struct rcu_head rcu;
+};
+
 /**
  * struct batadv_softif_vlan - per VLAN attributes set
  * @bat_priv: pointer to the mesh object
@@ -865,8 +994,6 @@ struct batadv_priv_bat_v {
  *  enabled
  * @multicast_mode: Enable or disable multicast optimizations on this node's
  *  sender/originating side
- * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes)
- * @gw_sel_class: gateway selection class (applies if gw_mode client)
  * @orig_interval: OGM broadcast interval in milliseconds
  * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
  * @log_level: configured log level (see batadv_dbg_level)
@@ -881,14 +1008,17 @@ struct batadv_priv_bat_v {
  * @debug_dir: dentry for debugfs batman-adv subdirectory
  * @forw_bat_list: list of aggregated OGMs that will be forwarded
  * @forw_bcast_list: list of broadcast packets that will be rebroadcasted
+ * @tp_list: list of tp sessions
+ * @tp_num: number of currently active tp sessions
  * @orig_hash: hash table containing mesh participants (orig nodes)
  * @forw_bat_list_lock: lock protecting forw_bat_list
  * @forw_bcast_list_lock: lock protecting forw_bcast_list
+ * @tp_list_lock: spinlock protecting @tp_list
  * @orig_work: work queue callback item for orig node purging
  * @cleanup_work: work queue callback item for soft-interface deinit
  * @primary_if: one of the hard-interfaces assigned to this mesh interface
  *  becomes the primary interface
- * @bat_algo_ops: routing algorithm used by this mesh interface
+ * @algo_ops: routing algorithm used by this mesh interface
  * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
  *  of the mesh interface represented by this object
  * @softif_vlan_list_lock: lock protecting softif_vlan_list
@@ -922,8 +1052,6 @@ struct batadv_priv {
 #ifdef CONFIG_BATMAN_ADV_MCAST
 	atomic_t multicast_mode;
 #endif
-	atomic_t gw_mode;
-	atomic_t gw_sel_class;
 	atomic_t orig_interval;
 	atomic_t hop_penalty;
 #ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -939,13 +1067,16 @@ struct batadv_priv {
 	struct dentry *debug_dir;
 	struct hlist_head forw_bat_list;
 	struct hlist_head forw_bcast_list;
+	struct hlist_head tp_list;
 	struct batadv_hashtable *orig_hash;
 	spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
 	spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */
+	spinlock_t tp_list_lock; /* protects tp_list */
+	atomic_t tp_num;
 	struct delayed_work orig_work;
 	struct work_struct cleanup_work;
 	struct batadv_hard_iface __rcu *primary_if;  /* rcu protected pointer */
-	struct batadv_algo_ops *bat_algo_ops;
+	struct batadv_algo_ops *algo_ops;
 	struct hlist_head softif_vlan_list;
 	spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */
 #ifdef CONFIG_BATMAN_ADV_BLA
@@ -1042,6 +1173,7 @@ struct batadv_bla_backbone_gw {
  * @addr: mac address of claimed non-mesh client
  * @vid: vlan id this client was detected on
  * @backbone_gw: pointer to backbone gw claiming this client
+ * @backbone_lock: lock protecting backbone_gw pointer
  * @lasttime: last time we heard of claim (locals only)
  * @hash_entry: hlist node for batadv_priv_bla::claim_hash
  * @refcount: number of contexts the object is used
@@ -1051,6 +1183,7 @@ struct batadv_bla_claim {
 	u8 addr[ETH_ALEN];
 	unsigned short vid;
 	struct batadv_bla_backbone_gw *backbone_gw;
+	spinlock_t backbone_lock; /* protects backbone_gw */
 	unsigned long lasttime;
 	struct hlist_node hash_entry;
 	struct rcu_head rcu;
@@ -1137,11 +1270,13 @@ struct batadv_tt_change_node {
  * struct batadv_tt_req_node - data to keep track of the tt requests in flight
  * @addr: mac address address of the originator this request was sent to
  * @issued_at: timestamp used for purging stale tt requests
+ * @refcount: number of contexts the object is used by
  * @list: list node for batadv_priv_tt::req_list
  */
 struct batadv_tt_req_node {
 	u8 addr[ETH_ALEN];
 	unsigned long issued_at;
+	struct kref refcount;
 	struct hlist_node list;
 };
 
@@ -1258,67 +1393,78 @@ struct batadv_forw_packet {
 	struct batadv_hard_iface *if_outgoing;
 };
 
+/**
+ * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
+ * @activate: start routing mechanisms when hard-interface is brought up
+ * @enable: init routing info when hard-interface is enabled
+ * @disable: de-init routing info when hard-interface is disabled
+ * @update_mac: (re-)init mac addresses of the protocol information
+ *  belonging to this hard-interface
+ * @primary_set: called when primary interface is selected / changed
+ */
+struct batadv_algo_iface_ops {
+	void (*activate)(struct batadv_hard_iface *hard_iface);
+	int (*enable)(struct batadv_hard_iface *hard_iface);
+	void (*disable)(struct batadv_hard_iface *hard_iface);
+	void (*update_mac)(struct batadv_hard_iface *hard_iface);
+	void (*primary_set)(struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
+ * @hardif_init: called on creation of single hop entry
+ * @cmp: compare the metrics of two neighbors for their respective outgoing
+ *  interfaces
+ * @is_similar_or_better: check if neigh1 is equally similar or better than
+ *  neigh2 for their respective outgoing interface from the metric prospective
+ * @print: print the single hop neighbor list (optional)
+ */
+struct batadv_algo_neigh_ops {
+	void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
+	int (*cmp)(struct batadv_neigh_node *neigh1,
+		   struct batadv_hard_iface *if_outgoing1,
+		   struct batadv_neigh_node *neigh2,
+		   struct batadv_hard_iface *if_outgoing2);
+	bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1,
+				     struct batadv_hard_iface *if_outgoing1,
+				     struct batadv_neigh_node *neigh2,
+				     struct batadv_hard_iface *if_outgoing2);
+	void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+};
+
+/**
+ * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
+ * @free: free the resources allocated by the routing algorithm for an orig_node
+ *  object
+ * @add_if: ask the routing algorithm to apply the needed changes to the
+ *  orig_node due to a new hard-interface being added into the mesh
+ * @del_if: ask the routing algorithm to apply the needed changes to the
+ *  orig_node due to an hard-interface being removed from the mesh
+ * @print: print the originator table (optional)
+ */
+struct batadv_algo_orig_ops {
+	void (*free)(struct batadv_orig_node *orig_node);
+	int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
+	int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
+		      int del_if_num);
+	void (*print)(struct batadv_priv *priv, struct seq_file *seq,
+		      struct batadv_hard_iface *hard_iface);
+};
+
 /**
  * struct batadv_algo_ops - mesh algorithm callbacks
  * @list: list node for the batadv_algo_list
  * @name: name of the algorithm
- * @bat_iface_activate: start routing mechanisms when hard-interface is brought
- *  up
- * @bat_iface_enable: init routing info when hard-interface is enabled
- * @bat_iface_disable: de-init routing info when hard-interface is disabled
- * @bat_iface_update_mac: (re-)init mac addresses of the protocol information
- *  belonging to this hard-interface
- * @bat_primary_iface_set: called when primary interface is selected / changed
- * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
- * @bat_ogm_emit: send scheduled OGM
- * @bat_hardif_neigh_init: called on creation of single hop entry
- * @bat_neigh_cmp: compare the metrics of two neighbors for their respective
- *  outgoing interfaces
- * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or
- *  better than neigh2 for their respective outgoing interface from the metric
- *  prospective
- * @bat_neigh_print: print the single hop neighbor list (optional)
- * @bat_neigh_free: free the resources allocated by the routing algorithm for a
- *  neigh_node object
- * @bat_orig_print: print the originator table (optional)
- * @bat_orig_free: free the resources allocated by the routing algorithm for an
- *  orig_node object
- * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to
- *  the orig_node due to a new hard-interface being added into the mesh
- * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to
- *  the orig_node due to an hard-interface being removed from the mesh
+ * @iface: callbacks related to interface handling
+ * @neigh: callbacks related to neighbors handling
+ * @orig: callbacks related to originators handling
  */
 struct batadv_algo_ops {
 	struct hlist_node list;
 	char *name;
-	void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface);
-	int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
-	void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
-	void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
-	void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
-	void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
-	void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
-	/* neigh_node handling API */
-	void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh);
-	int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
-			     struct batadv_hard_iface *if_outgoing1,
-			     struct batadv_neigh_node *neigh2,
-			     struct batadv_hard_iface *if_outgoing2);
-	bool (*bat_neigh_is_similar_or_better)
-		(struct batadv_neigh_node *neigh1,
-		 struct batadv_hard_iface *if_outgoing1,
-		 struct batadv_neigh_node *neigh2,
-		 struct batadv_hard_iface *if_outgoing2);
-	void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq);
-	void (*bat_neigh_free)(struct batadv_neigh_node *neigh);
-	/* orig_node handling API */
-	void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq,
-			       struct batadv_hard_iface *hard_iface);
-	void (*bat_orig_free)(struct batadv_orig_node *orig_node);
-	int (*bat_orig_add_if)(struct batadv_orig_node *orig_node,
-			       int max_if_num);
-	int (*bat_orig_del_if)(struct batadv_orig_node *orig_node,
-			       int max_if_num, int del_if_num);
+	struct batadv_algo_iface_ops iface;
+	struct batadv_algo_neigh_ops neigh;
+	struct batadv_algo_orig_ops orig;
 };
 
 /**
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 780089d75915..d020299baba4 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -627,20 +627,9 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
 	return err < 0 ? NET_XMIT_DROP : err;
 }
 
-static struct lock_class_key bt_tx_busylock;
-static struct lock_class_key bt_netdev_xmit_lock_key;
-
-static void bt_set_lockdep_class_one(struct net_device *dev,
-				     struct netdev_queue *txq,
-				     void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &bt_netdev_xmit_lock_key);
-}
-
 static int bt_dev_init(struct net_device *dev)
 {
-	netdev_for_each_tx_queue(dev, bt_set_lockdep_class_one, NULL);
-	dev->qdisc_tx_busylock = &bt_tx_busylock;
+	netdev_lockdep_set_classes(dev);
 
 	return 0;
 }
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 3df7aefb7663..ece45e0683fd 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -215,6 +215,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	size_t copied;
+	size_t skblen;
 	int err;
 
 	BT_DBG("sock %p sk %p len %zu", sock, sk, len);
@@ -230,6 +231,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		return err;
 	}
 
+	skblen = skb->len;
 	copied = skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
@@ -248,6 +250,9 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	skb_free_datagram(sk, skb);
 
+	if (msg->msg_flags & MSG_TRUNC)
+		copied = skblen;
+
 	return err ? : copied;
 }
 EXPORT_SYMBOL(bt_sock_recvmsg);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index bf9f8a801a2e..3809617aa98d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -625,7 +625,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
 	list_for_each_entry(d, &hci_dev_list, list) {
 		if (!test_bit(HCI_UP, &d->flags) ||
 		    hci_dev_test_flag(d, HCI_USER_CHANNEL) ||
-		    d->dev_type != HCI_BREDR)
+		    d->dev_type != HCI_PRIMARY)
 			continue;
 
 		/* Simple routing:
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 45a9fc68c677..ddf8432fe8fb 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -260,14 +260,12 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt)
 		hci_reset_req(req, 0);
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		bredr_init(req);
 		break;
-
 	case HCI_AMP:
 		amp_init1(req);
 		break;
-
 	default:
 		BT_ERR("Unknown device type %d", hdev->dev_type);
 		break;
@@ -791,11 +789,11 @@ static int __hci_init(struct hci_dev *hdev)
 	if (err < 0)
 		return err;
 
-	/* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
+	/* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode
 	 * BR/EDR/LE type controllers. AMP controllers only need the
 	 * first two stages of init.
 	 */
-	if (hdev->dev_type != HCI_BREDR)
+	if (hdev->dev_type != HCI_PRIMARY)
 		return 0;
 
 	err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT, NULL);
@@ -1202,7 +1200,7 @@ int hci_inquiry(void __user *arg)
 		goto done;
 	}
 
-	if (hdev->dev_type != HCI_BREDR) {
+	if (hdev->dev_type != HCI_PRIMARY) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1307,7 +1305,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		 * since AMP controllers do not have an address.
 		 */
 		if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
-		    hdev->dev_type == HCI_BREDR &&
+		    hdev->dev_type == HCI_PRIMARY &&
 		    !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
 		    !bacmp(&hdev->static_addr, BDADDR_ANY)) {
 			ret = -EADDRNOTAVAIL;
@@ -1402,7 +1400,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		    !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
 		    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
 		    hci_dev_test_flag(hdev, HCI_MGMT) &&
-		    hdev->dev_type == HCI_BREDR) {
+		    hdev->dev_type == HCI_PRIMARY) {
 			ret = __hci_req_hci_power_on(hdev);
 			mgmt_power_on(hdev, ret);
 		}
@@ -1563,7 +1561,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
 
 	auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
 
-	if (!auto_off && hdev->dev_type == HCI_BREDR &&
+	if (!auto_off && hdev->dev_type == HCI_PRIMARY &&
 	    hci_dev_test_flag(hdev, HCI_MGMT))
 		__mgmt_power_off(hdev);
 
@@ -1802,7 +1800,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 		goto done;
 	}
 
-	if (hdev->dev_type != HCI_BREDR) {
+	if (hdev->dev_type != HCI_PRIMARY) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -2043,7 +2041,7 @@ static void hci_power_on(struct work_struct *work)
 	 */
 	if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
 	    hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
-	    (hdev->dev_type == HCI_BREDR &&
+	    (hdev->dev_type == HCI_PRIMARY &&
 	     !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
 	     !bacmp(&hdev->static_addr, BDADDR_ANY))) {
 		hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
@@ -3030,7 +3028,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	 * so the index can be used as the AMP controller ID.
 	 */
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
 		break;
 	case HCI_AMP:
@@ -3090,7 +3088,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	hci_dev_set_flag(hdev, HCI_SETUP);
 	hci_dev_set_flag(hdev, HCI_AUTO_OFF);
 
-	if (hdev->dev_type == HCI_BREDR) {
+	if (hdev->dev_type == HCI_PRIMARY) {
 		/* Assume BR/EDR support until proven otherwise (such as
 		 * through reading supported features during init.
 		 */
@@ -3165,6 +3163,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	device_del(&hdev->dev);
 
 	debugfs_remove_recursive(hdev->debugfs);
+	kfree_const(hdev->hw_info);
+	kfree_const(hdev->fw_info);
 
 	destroy_workqueue(hdev->workqueue);
 	destroy_workqueue(hdev->req_workqueue);
@@ -3268,6 +3268,28 @@ int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(hci_recv_diag);
 
+void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...)
+{
+	va_list vargs;
+
+	va_start(vargs, fmt);
+	kfree_const(hdev->hw_info);
+	hdev->hw_info = kvasprintf_const(GFP_KERNEL, fmt, vargs);
+	va_end(vargs);
+}
+EXPORT_SYMBOL(hci_set_hw_info);
+
+void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...)
+{
+	va_list vargs;
+
+	va_start(vargs, fmt);
+	kfree_const(hdev->fw_info);
+	hdev->fw_info = kvasprintf_const(GFP_KERNEL, fmt, vargs);
+	va_end(vargs);
+}
+EXPORT_SYMBOL(hci_set_fw_info);
+
 /* ---- Interface to upper protocols ---- */
 
 int hci_register_cb(struct hci_cb *cb)
@@ -3415,7 +3437,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
 	hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		hci_add_acl_hdr(skb, conn->handle, flags);
 		break;
 	case HCI_AMP:
@@ -3826,7 +3848,7 @@ static void hci_sched_acl(struct hci_dev *hdev)
 	BT_DBG("%s", hdev->name);
 
 	/* No ACL link over BR/EDR controller */
-	if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_BREDR)
+	if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY)
 		return;
 
 	/* No AMP link over AMP controller */
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 7db4220941cc..63df63ebfb24 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -76,6 +76,30 @@ static const struct file_operations __name ## _fops = {			      \
 	.llseek		= default_llseek,				      \
 }									      \
 
+#define DEFINE_INFO_ATTRIBUTE(__name, __field)				      \
+static int __name ## _show(struct seq_file *f, void *ptr)		      \
+{									      \
+	struct hci_dev *hdev = f->private;				      \
+									      \
+	hci_dev_lock(hdev);						      \
+	seq_printf(f, "%s\n", hdev->__field ? : "");			      \
+	hci_dev_unlock(hdev);						      \
+									      \
+	return 0;							      \
+}									      \
+									      \
+static int __name ## _open(struct inode *inode, struct file *file)	      \
+{									      \
+	return single_open(file, __name ## _show, inode->i_private);	      \
+}									      \
+									      \
+static const struct file_operations __name ## _fops = {			      \
+	.open		= __name ## _open,				      \
+	.read		= seq_read,					      \
+	.llseek		= seq_lseek,					      \
+	.release	= single_release,				      \
+}									      \
+
 static int features_show(struct seq_file *f, void *ptr)
 {
 	struct hci_dev *hdev = f->private;
@@ -349,6 +373,9 @@ static const struct file_operations sc_only_mode_fops = {
 	.llseek		= default_llseek,
 };
 
+DEFINE_INFO_ATTRIBUTE(hardware_info, hw_info);
+DEFINE_INFO_ATTRIBUTE(firmware_info, fw_info);
+
 void hci_debugfs_create_common(struct hci_dev *hdev)
 {
 	debugfs_create_file("features", 0444, hdev->debugfs, hdev,
@@ -382,6 +409,14 @@ void hci_debugfs_create_common(struct hci_dev *hdev)
 	if (lmp_sc_capable(hdev) || lmp_le_capable(hdev))
 		debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
 				    hdev, &sc_only_mode_fops);
+
+	if (hdev->hw_info)
+		debugfs_create_file("hardware_info", 0444, hdev->debugfs,
+				    hdev, &hardware_info_fops);
+
+	if (hdev->fw_info)
+		debugfs_create_file("firmware_info", 0444, hdev->debugfs,
+				    hdev, &firmware_info_fops);
 }
 
 static int inquiry_cache_show(struct seq_file *f, void *p)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d4b3dd5413be..e17aacbc5630 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2332,7 +2332,7 @@ static u8 hci_to_mgmt_reason(u8 err)
 static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_disconn_complete *ev = (void *) skb->data;
-	u8 reason = hci_to_mgmt_reason(ev->reason);
+	u8 reason;
 	struct hci_conn_params *params;
 	struct hci_conn *conn;
 	bool mgmt_connected;
@@ -2355,6 +2355,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	conn->state = BT_CLOSED;
 
 	mgmt_connected = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags);
+
+	if (test_bit(HCI_CONN_AUTH_FAILURE, &conn->flags))
+		reason = MGMT_DEV_DISCONN_AUTH_FAILURE;
+	else
+		reason = hci_to_mgmt_reason(ev->reason);
+
 	mgmt_device_disconnected(hdev, &conn->dst, conn->type, conn->dst_type,
 				reason, mgmt_connected);
 
@@ -2421,6 +2427,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		goto unlock;
 
 	if (!ev->status) {
+		clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
 		if (!hci_conn_ssp_enabled(conn) &&
 		    test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
 			BT_INFO("re-auth of legacy device is not possible.");
@@ -2429,6 +2437,9 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			conn->sec_level = conn->pending_sec_level;
 		}
 	} else {
+		if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
+			set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
 		mgmt_auth_failed(conn, ev->status);
 	}
 
@@ -2613,6 +2624,9 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
 
 	if (ev->status && conn->state == BT_CONNECTED) {
+		if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
+			set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
 		hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
 		hci_conn_drop(conn);
 		goto unlock;
@@ -3249,7 +3263,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
 	struct hci_chan *chan;
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		return hci_conn_hash_lookup_handle(hdev, handle);
 	case HCI_AMP:
 		chan = hci_chan_lookup_handle(hdev, handle);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1298d723c0e0..6ef8a01a9ad4 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -676,7 +676,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
 	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 		return -EOPNOTSUPP;
 
-	if (hdev->dev_type != HCI_BREDR)
+	if (hdev->dev_type != HCI_PRIMARY)
 		return -EOPNOTSUPP;
 
 	switch (cmd) {
@@ -1048,6 +1048,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	int copied, err;
+	unsigned int skblen;
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
@@ -1064,6 +1065,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 	if (!skb)
 		return err;
 
+	skblen = skb->len;
 	copied = skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
@@ -1089,6 +1091,9 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 
 	skb_free_datagram(sk, skb);
 
+	if (msg->msg_flags & MSG_TRUNC)
+		copied = skblen;
+
 	return err ? : copied;
 }
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 555982a78a58..ca7a35ebaefb 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -7,50 +7,6 @@
 
 static struct class *bt_class;
 
-static inline char *link_typetostr(int type)
-{
-	switch (type) {
-	case ACL_LINK:
-		return "ACL";
-	case SCO_LINK:
-		return "SCO";
-	case ESCO_LINK:
-		return "eSCO";
-	case LE_LINK:
-		return "LE";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static ssize_t show_link_type(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct hci_conn *conn = to_hci_conn(dev);
-	return sprintf(buf, "%s\n", link_typetostr(conn->type));
-}
-
-static ssize_t show_link_address(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct hci_conn *conn = to_hci_conn(dev);
-	return sprintf(buf, "%pMR\n", &conn->dst);
-}
-
-#define LINK_ATTR(_name, _mode, _show, _store) \
-struct device_attribute link_attr_##_name = __ATTR(_name, _mode, _show, _store)
-
-static LINK_ATTR(type, S_IRUGO, show_link_type, NULL);
-static LINK_ATTR(address, S_IRUGO, show_link_address, NULL);
-
-static struct attribute *bt_link_attrs[] = {
-	&link_attr_type.attr,
-	&link_attr_address.attr,
-	NULL
-};
-
-ATTRIBUTE_GROUPS(bt_link);
-
 static void bt_link_release(struct device *dev)
 {
 	struct hci_conn *conn = to_hci_conn(dev);
@@ -59,7 +15,6 @@ static void bt_link_release(struct device *dev)
 
 static struct device_type bt_link = {
 	.name    = "link",
-	.groups  = bt_link_groups,
 	.release = bt_link_release,
 };
 
@@ -124,59 +79,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 	hci_dev_put(hdev);
 }
 
-static inline char *host_typetostr(int type)
-{
-	switch (type) {
-	case HCI_BREDR:
-		return "BR/EDR";
-	case HCI_AMP:
-		return "AMP";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static ssize_t show_type(struct device *dev,
-			 struct device_attribute *attr, char *buf)
-{
-	struct hci_dev *hdev = to_hci_dev(dev);
-	return sprintf(buf, "%s\n", host_typetostr(hdev->dev_type));
-}
-
-static ssize_t show_name(struct device *dev,
-			 struct device_attribute *attr, char *buf)
-{
-	struct hci_dev *hdev = to_hci_dev(dev);
-	char name[HCI_MAX_NAME_LENGTH + 1];
-	int i;
-
-	for (i = 0; i < HCI_MAX_NAME_LENGTH; i++)
-		name[i] = hdev->dev_name[i];
-
-	name[HCI_MAX_NAME_LENGTH] = '\0';
-	return sprintf(buf, "%s\n", name);
-}
-
-static ssize_t show_address(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	struct hci_dev *hdev = to_hci_dev(dev);
-	return sprintf(buf, "%pMR\n", &hdev->bdaddr);
-}
-
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-
-static struct attribute *bt_host_attrs[] = {
-	&dev_attr_type.attr,
-	&dev_attr_name.attr,
-	&dev_attr_address.attr,
-	NULL
-};
-
-ATTRIBUTE_GROUPS(bt_host);
-
 static void bt_host_release(struct device *dev)
 {
 	struct hci_dev *hdev = to_hci_dev(dev);
@@ -186,7 +88,6 @@ static void bt_host_release(struct device *dev)
 
 static struct device_type bt_host = {
 	.name    = "host",
-	.groups  = bt_host_groups,
 	.release = bt_host_release,
 };
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index eb4f5f24cbe3..54ceb1f2cc9a 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7468,7 +7468,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
 	int len;
 
 	/* For AMP controller do not create l2cap conn */
-	if (!conn && hcon->hdev->dev_type != HCI_BREDR)
+	if (!conn && hcon->hdev->dev_type != HCI_PRIMARY)
 		goto drop;
 
 	if (!conn)
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 388ee8b59145..1842141baedb 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -927,7 +927,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
 			break;
 		}
 
-		if (get_user(opt, (u32 __user *) optval)) {
+		if (get_user(opt, (u16 __user *) optval)) {
 			err = -EFAULT;
 			break;
 		}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 9e4b931588cf..7639290b6de3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
 #include "mgmt_util.h"
 
 #define MGMT_VERSION	1
-#define MGMT_REVISION	12
+#define MGMT_REVISION	13
 
 static const u16 mgmt_commands[] = {
 	MGMT_OP_READ_INDEX_LIST,
@@ -359,7 +359,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
-		if (d->dev_type == HCI_BREDR &&
+		if (d->dev_type == HCI_PRIMARY &&
 		    !hci_dev_test_flag(d, HCI_UNCONFIGURED))
 			count++;
 	}
@@ -384,7 +384,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 		if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
 			continue;
 
-		if (d->dev_type == HCI_BREDR &&
+		if (d->dev_type == HCI_PRIMARY &&
 		    !hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
 			rp->index[count++] = cpu_to_le16(d->id);
 			BT_DBG("Added hci%u", d->id);
@@ -419,7 +419,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
-		if (d->dev_type == HCI_BREDR &&
+		if (d->dev_type == HCI_PRIMARY &&
 		    hci_dev_test_flag(d, HCI_UNCONFIGURED))
 			count++;
 	}
@@ -444,7 +444,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 		if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
 			continue;
 
-		if (d->dev_type == HCI_BREDR &&
+		if (d->dev_type == HCI_PRIMARY &&
 		    hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
 			rp->index[count++] = cpu_to_le16(d->id);
 			BT_DBG("Added hci%u", d->id);
@@ -479,7 +479,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
 
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
-		if (d->dev_type == HCI_BREDR || d->dev_type == HCI_AMP)
+		if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP)
 			count++;
 	}
 
@@ -503,7 +503,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
 		if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
 			continue;
 
-		if (d->dev_type == HCI_BREDR) {
+		if (d->dev_type == HCI_PRIMARY) {
 			if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
 				rp->entry[count].type = 0x01;
 			else
@@ -6366,7 +6366,7 @@ void mgmt_index_added(struct hci_dev *hdev)
 		return;
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 			mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev,
 					 NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
@@ -6399,7 +6399,7 @@ void mgmt_index_removed(struct hci_dev *hdev)
 		return;
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
 
 		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 50976a6481f3..4c1a16a96ae5 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -22,9 +22,9 @@
 
 #include <linux/debugfs.h>
 #include <linux/scatterlist.h>
+#include <linux/crypto.h>
 #include <crypto/b128ops.h>
 #include <crypto/hash.h>
-#include <crypto/skcipher.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -88,7 +88,7 @@ struct smp_dev {
 	u8			min_key_size;
 	u8			max_key_size;
 
-	struct crypto_skcipher	*tfm_aes;
+	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
 };
 
@@ -127,7 +127,7 @@ struct smp_chan {
 	u8			dhkey[32];
 	u8			mackey[16];
 
-	struct crypto_skcipher	*tfm_aes;
+	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
 };
 
@@ -361,10 +361,8 @@ static int smp_h6(struct crypto_shash *tfm_cmac, const u8 w[16],
  * s1 and ah.
  */
 
-static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
+static int smp_e(struct crypto_cipher *tfm, const u8 *k, u8 *r)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
-	struct scatterlist sg;
 	uint8_t tmp[16], data[16];
 	int err;
 
@@ -378,7 +376,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
 	/* The most significant octet of key corresponds to k[0] */
 	swap_buf(k, tmp, 16);
 
-	err = crypto_skcipher_setkey(tfm, tmp, 16);
+	err = crypto_cipher_setkey(tfm, tmp, 16);
 	if (err) {
 		BT_ERR("cipher setkey failed: %d", err);
 		return err;
@@ -387,16 +385,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
 	/* Most significant octet of plaintextData corresponds to data[0] */
 	swap_buf(r, data, 16);
 
-	sg_init_one(&sg, data, 16);
-
-	skcipher_request_set_tfm(req, tfm);
-	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg, &sg, 16, NULL);
-
-	err = crypto_skcipher_encrypt(req);
-	skcipher_request_zero(req);
-	if (err)
-		BT_ERR("Encrypt data error %d", err);
+	crypto_cipher_encrypt_one(tfm, data, data);
 
 	/* Most significant octet of encryptedData corresponds to data[0] */
 	swap_buf(data, r, 16);
@@ -406,7 +395,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
 	return err;
 }
 
-static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
+static int smp_c1(struct crypto_cipher *tfm_aes, const u8 k[16],
 		  const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat,
 		  const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16])
 {
@@ -455,7 +444,7 @@ static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
 	return err;
 }
 
-static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
+static int smp_s1(struct crypto_cipher *tfm_aes, const u8 k[16],
 		  const u8 r1[16], const u8 r2[16], u8 _r[16])
 {
 	int err;
@@ -471,7 +460,7 @@ static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
 	return err;
 }
 
-static int smp_ah(struct crypto_skcipher *tfm, const u8 irk[16],
+static int smp_ah(struct crypto_cipher *tfm, const u8 irk[16],
 		  const u8 r[3], u8 res[3])
 {
 	u8 _res[16];
@@ -759,7 +748,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
 	kzfree(smp->slave_csrk);
 	kzfree(smp->link_key);
 
-	crypto_free_skcipher(smp->tfm_aes);
+	crypto_free_cipher(smp->tfm_aes);
 	crypto_free_shash(smp->tfm_cmac);
 
 	/* Ensure that we don't leave any debug key around if debug key
@@ -1359,9 +1348,9 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 	if (!smp)
 		return NULL;
 
-	smp->tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+	smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(smp->tfm_aes)) {
-		BT_ERR("Unable to create ECB crypto context");
+		BT_ERR("Unable to create AES crypto context");
 		kzfree(smp);
 		return NULL;
 	}
@@ -1369,7 +1358,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 	smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(smp->tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_skcipher(smp->tfm_aes);
+		crypto_free_cipher(smp->tfm_aes);
 		kzfree(smp);
 		return NULL;
 	}
@@ -3120,7 +3109,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 {
 	struct l2cap_chan *chan;
 	struct smp_dev *smp;
-	struct crypto_skcipher *tfm_aes;
+	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
 
 	if (cid == L2CAP_CID_SMP_BREDR) {
@@ -3132,9 +3121,9 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 	if (!smp)
 		return ERR_PTR(-ENOMEM);
 
-	tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_aes)) {
-		BT_ERR("Unable to create ECB crypto context");
+		BT_ERR("Unable to create AES crypto context");
 		kzfree(smp);
 		return ERR_CAST(tfm_aes);
 	}
@@ -3142,7 +3131,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_skcipher(tfm_aes);
+		crypto_free_cipher(tfm_aes);
 		kzfree(smp);
 		return ERR_CAST(tfm_cmac);
 	}
@@ -3156,7 +3145,7 @@ create_chan:
 	chan = l2cap_chan_create();
 	if (!chan) {
 		if (smp) {
-			crypto_free_skcipher(smp->tfm_aes);
+			crypto_free_cipher(smp->tfm_aes);
 			crypto_free_shash(smp->tfm_cmac);
 			kzfree(smp);
 		}
@@ -3203,7 +3192,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
 	smp = chan->data;
 	if (smp) {
 		chan->data = NULL;
-		crypto_free_skcipher(smp->tfm_aes);
+		crypto_free_cipher(smp->tfm_aes);
 		crypto_free_shash(smp->tfm_cmac);
 		kzfree(smp);
 	}
@@ -3440,7 +3429,7 @@ void smp_unregister(struct hci_dev *hdev)
 
 #if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
 
-static int __init test_ah(struct crypto_skcipher *tfm_aes)
+static int __init test_ah(struct crypto_cipher *tfm_aes)
 {
 	const u8 irk[16] = {
 			0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3460,7 +3449,7 @@ static int __init test_ah(struct crypto_skcipher *tfm_aes)
 	return 0;
 }
 
-static int __init test_c1(struct crypto_skcipher *tfm_aes)
+static int __init test_c1(struct crypto_cipher *tfm_aes)
 {
 	const u8 k[16] = {
 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3490,7 +3479,7 @@ static int __init test_c1(struct crypto_skcipher *tfm_aes)
 	return 0;
 }
 
-static int __init test_s1(struct crypto_skcipher *tfm_aes)
+static int __init test_s1(struct crypto_cipher *tfm_aes)
 {
 	const u8 k[16] = {
 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3686,7 +3675,7 @@ static const struct file_operations test_smp_fops = {
 	.llseek		= default_llseek,
 };
 
-static int __init run_selftests(struct crypto_skcipher *tfm_aes,
+static int __init run_selftests(struct crypto_cipher *tfm_aes,
 				struct crypto_shash *tfm_cmac)
 {
 	ktime_t calltime, delta, rettime;
@@ -3764,27 +3753,27 @@ done:
 
 int __init bt_selftest_smp(void)
 {
-	struct crypto_skcipher *tfm_aes;
+	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
 	int err;
 
-	tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_aes)) {
-		BT_ERR("Unable to create ECB crypto context");
+		BT_ERR("Unable to create AES crypto context");
 		return PTR_ERR(tfm_aes);
 	}
 
 	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_skcipher(tfm_aes);
+		crypto_free_cipher(tfm_aes);
 		return PTR_ERR(tfm_cmac);
 	}
 
 	err = run_selftests(tfm_aes, tfm_cmac);
 
 	crypto_free_shash(tfm_cmac);
-	crypto_free_skcipher(tfm_aes);
+	crypto_free_cipher(tfm_aes);
 
 	return err;
 }
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 2c8095a5d824..09f26940aba5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -61,11 +61,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
 		goto out;
 
-	if (is_broadcast_ether_addr(dest))
-		br_flood_deliver(br, skb, false);
-	else if (is_multicast_ether_addr(dest)) {
+	if (is_broadcast_ether_addr(dest)) {
+		br_flood(br, skb, false, false, true);
+	} else if (is_multicast_ether_addr(dest)) {
 		if (unlikely(netpoll_tx_running(dev))) {
-			br_flood_deliver(br, skb, false);
+			br_flood(br, skb, false, false, true);
 			goto out;
 		}
 		if (br_multicast_rcv(br, NULL, skb, vid)) {
@@ -76,14 +76,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 		mdst = br_mdb_get(br, skb, vid);
 		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
 		    br_multicast_querier_exists(br, eth_hdr(skb)))
-			br_multicast_deliver(mdst, skb);
+			br_multicast_flood(mdst, skb, false, true);
 		else
-			br_flood_deliver(br, skb, false);
-	} else if ((dst = __br_fdb_get(br, dest, vid)) != NULL)
-		br_deliver(dst->dst, skb);
-	else
-		br_flood_deliver(br, skb, true);
-
+			br_flood(br, skb, false, false, true);
+	} else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) {
+		br_forward(dst->dst, skb, false, true);
+	} else {
+		br_flood(br, skb, true, false, true);
+	}
 out:
 	rcu_read_unlock();
 	return NETDEV_TX_OK;
@@ -104,8 +104,16 @@ static int br_dev_init(struct net_device *dev)
 		return -ENOMEM;
 
 	err = br_vlan_init(br);
-	if (err)
+	if (err) {
 		free_percpu(br->stats);
+		return err;
+	}
+
+	err = br_multicast_init_stats(br);
+	if (err) {
+		free_percpu(br->stats);
+		br_vlan_flush(br);
+	}
 	br_set_lockdep_class(dev);
 
 	return err;
@@ -341,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_add_slave		 = br_add_slave,
 	.ndo_del_slave		 = br_del_slave,
 	.ndo_fix_features        = br_fix_features,
+	.ndo_neigh_construct	 = netdev_default_l2upper_neigh_construct,
+	.ndo_neigh_destroy	 = netdev_default_l2upper_neigh_destroy,
 	.ndo_fdb_add		 = br_fdb_add,
 	.ndo_fdb_del		 = br_fdb_delete,
 	.ndo_fdb_dump		 = br_fdb_dump,
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f47759f05b6d..63a83d8d7da3 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -21,11 +21,6 @@
 #include <linux/netfilter_bridge.h>
 #include "br_private.h"
 
-static int deliver_clone(const struct net_bridge_port *prev,
-			 struct sk_buff *skb,
-			 void (*__packet_hook)(const struct net_bridge_port *p,
-					       struct sk_buff *skb));
-
 /* Don't forward packets to originating port or forwarding disabled */
 static inline int should_deliver(const struct net_bridge_port *p,
 				 const struct sk_buff *skb)
@@ -75,105 +70,92 @@ int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(br_forward_finish);
 
-static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
+static void __br_forward(const struct net_bridge_port *to,
+			 struct sk_buff *skb, bool local_orig)
 {
 	struct net_bridge_vlan_group *vg;
+	struct net_device *indev;
+	struct net *net;
+	int br_hook;
 
 	vg = nbp_vlan_group_rcu(to);
 	skb = br_handle_vlan(to->br, vg, skb);
 	if (!skb)
 		return;
 
+	indev = skb->dev;
 	skb->dev = to->dev;
-
-	if (unlikely(netpoll_tx_running(to->br->dev))) {
-		if (!is_skb_forwardable(skb->dev, skb))
+	if (!local_orig) {
+		if (skb_warn_if_lro(skb)) {
 			kfree_skb(skb);
-		else {
-			skb_push(skb, ETH_HLEN);
-			br_netpoll_send_skb(to, skb);
+			return;
 		}
-		return;
+		br_hook = NF_BR_FORWARD;
+		skb_forward_csum(skb);
+		net = dev_net(indev);
+	} else {
+		if (unlikely(netpoll_tx_running(to->br->dev))) {
+			if (!is_skb_forwardable(skb->dev, skb)) {
+				kfree_skb(skb);
+			} else {
+				skb_push(skb, ETH_HLEN);
+				br_netpoll_send_skb(to, skb);
+			}
+			return;
+		}
+		br_hook = NF_BR_LOCAL_OUT;
+		net = dev_net(skb->dev);
+		indev = NULL;
 	}
 
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
-		dev_net(skb->dev), NULL, skb,NULL, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, br_hook,
+		net, NULL, skb, indev, skb->dev,
 		br_forward_finish);
 }
 
-static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
+static int deliver_clone(const struct net_bridge_port *prev,
+			 struct sk_buff *skb, bool local_orig)
 {
-	struct net_bridge_vlan_group *vg;
-	struct net_device *indev;
-
-	if (skb_warn_if_lro(skb)) {
-		kfree_skb(skb);
-		return;
-	}
-
-	vg = nbp_vlan_group_rcu(to);
-	skb = br_handle_vlan(to->br, vg, skb);
-	if (!skb)
-		return;
-
-	indev = skb->dev;
-	skb->dev = to->dev;
-	skb_forward_csum(skb);
-
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,
-		dev_net(indev), NULL, skb, indev, skb->dev,
-		br_forward_finish);
-}
+	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
 
-/* called with rcu_read_lock */
-void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
-{
-	if (to && should_deliver(to, skb)) {
-		__br_deliver(to, skb);
-		return;
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (!skb) {
+		dev->stats.tx_dropped++;
+		return -ENOMEM;
 	}
 
-	kfree_skb(skb);
+	__br_forward(prev, skb, local_orig);
+	return 0;
 }
-EXPORT_SYMBOL_GPL(br_deliver);
 
-/* called with rcu_read_lock */
-void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0)
+/**
+ * br_forward - forward a packet to a specific port
+ * @to: destination port
+ * @skb: packet being forwarded
+ * @local_rcv: packet will be received locally after forwarding
+ * @local_orig: packet is locally originated
+ *
+ * Should be called with rcu_read_lock.
+ */
+void br_forward(const struct net_bridge_port *to,
+		struct sk_buff *skb, bool local_rcv, bool local_orig)
 {
 	if (to && should_deliver(to, skb)) {
-		if (skb0)
-			deliver_clone(to, skb, __br_forward);
+		if (local_rcv)
+			deliver_clone(to, skb, local_orig);
 		else
-			__br_forward(to, skb);
+			__br_forward(to, skb, local_orig);
 		return;
 	}
 
-	if (!skb0)
+	if (!local_rcv)
 		kfree_skb(skb);
 }
-
-static int deliver_clone(const struct net_bridge_port *prev,
-			 struct sk_buff *skb,
-			 void (*__packet_hook)(const struct net_bridge_port *p,
-					       struct sk_buff *skb))
-{
-	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
-
-	skb = skb_clone(skb, GFP_ATOMIC);
-	if (!skb) {
-		dev->stats.tx_dropped++;
-		return -ENOMEM;
-	}
-
-	__packet_hook(prev, skb);
-	return 0;
-}
+EXPORT_SYMBOL_GPL(br_forward);
 
 static struct net_bridge_port *maybe_deliver(
 	struct net_bridge_port *prev, struct net_bridge_port *p,
-	struct sk_buff *skb,
-	void (*__packet_hook)(const struct net_bridge_port *p,
-			      struct sk_buff *skb))
+	struct sk_buff *skb, bool local_orig)
 {
 	int err;
 
@@ -183,7 +165,7 @@ static struct net_bridge_port *maybe_deliver(
 	if (!prev)
 		goto out;
 
-	err = deliver_clone(prev, skb, __packet_hook);
+	err = deliver_clone(prev, skb, local_orig);
 	if (err)
 		return ERR_PTR(err);
 
@@ -191,17 +173,13 @@ out:
 	return p;
 }
 
-/* called under bridge lock */
-static void br_flood(struct net_bridge *br, struct sk_buff *skb,
-		     struct sk_buff *skb0,
-		     void (*__packet_hook)(const struct net_bridge_port *p,
-					   struct sk_buff *skb),
-		     bool unicast)
+/* called under rcu_read_lock */
+void br_flood(struct net_bridge *br, struct sk_buff *skb,
+	      bool unicast, bool local_rcv, bool local_orig)
 {
+	u8 igmp_type = br_multicast_igmp_type(skb);
+	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port *p;
-	struct net_bridge_port *prev;
-
-	prev = NULL;
 
 	list_for_each_entry_rcu(p, &br->port_list, list) {
 		/* Do not flood unicast traffic to ports that turn it off */
@@ -215,48 +193,36 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		    BR_INPUT_SKB_CB(skb)->proxyarp_replied)
 			continue;
 
-		prev = maybe_deliver(prev, p, skb, __packet_hook);
+		prev = maybe_deliver(prev, p, skb, local_orig);
 		if (IS_ERR(prev))
 			goto out;
+		if (prev == p)
+			br_multicast_count(p->br, p, skb, igmp_type,
+					   BR_MCAST_DIR_TX);
 	}
 
 	if (!prev)
 		goto out;
 
-	if (skb0)
-		deliver_clone(prev, skb, __packet_hook);
+	if (local_rcv)
+		deliver_clone(prev, skb, local_orig);
 	else
-		__packet_hook(prev, skb);
+		__br_forward(prev, skb, local_orig);
 	return;
 
 out:
-	if (!skb0)
+	if (!local_rcv)
 		kfree_skb(skb);
 }
 
-
-/* called with rcu_read_lock */
-void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast)
-{
-	br_flood(br, skb, NULL, __br_deliver, unicast);
-}
-
-/* called under bridge lock */
-void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
-		      struct sk_buff *skb2, bool unicast)
-{
-	br_flood(br, skb, skb2, __br_forward, unicast);
-}
-
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 /* called with rcu_read_lock */
-static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
-			       struct sk_buff *skb, struct sk_buff *skb0,
-			       void (*__packet_hook)(
-					const struct net_bridge_port *p,
-					struct sk_buff *skb))
+void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
+			struct sk_buff *skb,
+			bool local_rcv, bool local_orig)
 {
 	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
+	u8 igmp_type = br_multicast_igmp_type(skb);
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port_group *p;
@@ -274,9 +240,12 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 		port = (unsigned long)lport > (unsigned long)rport ?
 		       lport : rport;
 
-		prev = maybe_deliver(prev, port, skb, __packet_hook);
+		prev = maybe_deliver(prev, port, skb, local_orig);
 		if (IS_ERR(prev))
 			goto out;
+		if (prev == port)
+			br_multicast_count(port->br, port, skb, igmp_type,
+					   BR_MCAST_DIR_TX);
 
 		if ((unsigned long)lport >= (unsigned long)port)
 			p = rcu_dereference(p->next);
@@ -287,28 +256,14 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 	if (!prev)
 		goto out;
 
-	if (skb0)
-		deliver_clone(prev, skb, __packet_hook);
+	if (local_rcv)
+		deliver_clone(prev, skb, local_orig);
 	else
-		__packet_hook(prev, skb);
+		__br_forward(prev, skb, local_orig);
 	return;
 
 out:
-	if (!skb0)
+	if (!local_rcv)
 		kfree_skb(skb);
 }
-
-/* called with rcu_read_lock */
-void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
-			  struct sk_buff *skb)
-{
-	br_multicast_flood(mdst, skb, NULL, __br_deliver);
-}
-
-/* called with rcu_read_lock */
-void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
-			  struct sk_buff *skb, struct sk_buff *skb2)
-{
-	br_multicast_flood(mdst, skb, skb2, __br_forward);
-}
 #endif
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 8217aecf025b..f2fede05d32c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -345,8 +345,8 @@ static int find_portno(struct net_bridge *br)
 static struct net_bridge_port *new_nbp(struct net_bridge *br,
 				       struct net_device *dev)
 {
-	int index;
 	struct net_bridge_port *p;
+	int index, err;
 
 	index = find_portno(br);
 	if (index < 0)
@@ -366,7 +366,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	br_init_port(p);
 	br_set_state(p, BR_STATE_DISABLED);
 	br_stp_port_timer_init(p);
-	br_multicast_add_port(p);
+	err = br_multicast_add_port(p);
+	if (err) {
+		dev_put(dev);
+		kfree(p);
+		p = ERR_PTR(err);
+	}
 
 	return p;
 }
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 160797722228..8e486203d133 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -60,6 +60,9 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	skb = br_handle_vlan(br, vg, skb);
 	if (!skb)
 		return NET_RX_DROP;
+	/* update the multicast stats if the packet is IGMP/MLD */
+	br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
+			   BR_MCAST_DIR_TX);
 
 	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
 		       dev_net(indev), NULL, skb, indev, NULL,
@@ -128,13 +131,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 /* note: already called with rcu_read_lock */
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	const unsigned char *dest = eth_hdr(skb)->h_dest;
+	bool local_rcv = false, mcast_hit = false, unicast = true;
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
-	struct net_bridge *br;
-	struct net_bridge_fdb_entry *dst;
+	const unsigned char *dest = eth_hdr(skb)->h_dest;
+	struct net_bridge_fdb_entry *dst = NULL;
 	struct net_bridge_mdb_entry *mdst;
-	struct sk_buff *skb2;
-	bool unicast = true;
+	struct net_bridge *br;
 	u16 vid = 0;
 
 	if (!p || p->state == BR_STATE_DISABLED)
@@ -157,53 +159,46 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 
 	BR_INPUT_SKB_CB(skb)->brdev = br->dev;
 
-	/* The packet skb2 goes to the local host (NULL to skip). */
-	skb2 = NULL;
-
-	if (br->dev->flags & IFF_PROMISC)
-		skb2 = skb;
-
-	dst = NULL;
+	local_rcv = !!(br->dev->flags & IFF_PROMISC);
 
 	if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
 		br_do_proxy_arp(skb, br, vid, p);
 
 	if (is_broadcast_ether_addr(dest)) {
-		skb2 = skb;
+		local_rcv = true;
 		unicast = false;
 	} else if (is_multicast_ether_addr(dest)) {
 		mdst = br_mdb_get(br, skb, vid);
 		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
 		    br_multicast_querier_exists(br, eth_hdr(skb))) {
 			if ((mdst && mdst->mglist) ||
-			    br_multicast_is_router(br))
-				skb2 = skb;
-			br_multicast_forward(mdst, skb, skb2);
-			skb = NULL;
-			if (!skb2)
-				goto out;
-		} else
-			skb2 = skb;
-
+			    br_multicast_is_router(br)) {
+				local_rcv = true;
+				br->dev->stats.multicast++;
+			}
+			mcast_hit = true;
+		} else {
+			local_rcv = true;
+			br->dev->stats.multicast++;
+		}
 		unicast = false;
-		br->dev->stats.multicast++;
-	} else if ((dst = __br_fdb_get(br, dest, vid)) &&
-			dst->is_local) {
-		skb2 = skb;
+	} else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) {
 		/* Do not forward the packet since it's local. */
-		skb = NULL;
+		return br_pass_frame_up(skb);
 	}
 
-	if (skb) {
-		if (dst) {
-			dst->used = jiffies;
-			br_forward(dst->dst, skb, skb2);
-		} else
-			br_flood_forward(br, skb, skb2, unicast);
+	if (dst) {
+		dst->used = jiffies;
+		br_forward(dst->dst, skb, local_rcv, false);
+	} else {
+		if (!mcast_hit)
+			br_flood(br, skb, unicast, local_rcv, false);
+		else
+			br_multicast_flood(mdst, skb, local_rcv, false);
 	}
 
-	if (skb2)
-		return br_pass_frame_up(skb2);
+	if (local_rcv)
+		return br_pass_frame_up(skb);
 
 out:
 	return 0;
@@ -213,8 +208,7 @@ drop:
 }
 EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
-/* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static void __br_handle_local_finish(struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	u16 vid = 0;
@@ -222,6 +216,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu
 	/* check if vlan is allowed, to avoid spoofing */
 	if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
 		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
+}
+
+/* note: already called with rcu_read_lock */
+static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+
+	__br_handle_local_finish(skb);
 
 	BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
 	br_pass_frame_up(skb);
@@ -274,11 +276,21 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 			if (p->br->stp_enabled == BR_NO_STP ||
 			    fwd_mask & (1u << dest[5]))
 				goto forward;
-			break;
+			*pskb = skb;
+			__br_handle_local_finish(skb);
+			return RX_HANDLER_PASS;
 
 		case 0x01:	/* IEEE MAC (Pause) */
 			goto drop;
 
+		case 0x0E:	/* 802.1AB LLDP */
+			fwd_mask |= p->br->group_fwd_mask;
+			if (fwd_mask & (1u << dest[5]))
+				goto forward;
+			*pskb = skb;
+			__br_handle_local_finish(skb);
+			return RX_HANDLER_PASS;
+
 		default:
 			/* Allow selective forwarding for most other protocols */
 			fwd_mask |= p->br->group_fwd_mask;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6852f3c7009c..a5423a1eec05 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -361,7 +361,8 @@ out:
 }
 
 static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
-						    __be32 group)
+						    __be32 group,
+						    u8 *igmp_type)
 {
 	struct sk_buff *skb;
 	struct igmphdr *ih;
@@ -411,6 +412,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
 
 	skb_set_transport_header(skb, skb->len);
 	ih = igmp_hdr(skb);
+	*igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
 	ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
 	ih->code = (group ? br->multicast_last_member_interval :
 			    br->multicast_query_response_interval) /
@@ -428,7 +430,8 @@ out:
 
 #if IS_ENABLED(CONFIG_IPV6)
 static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
-						    const struct in6_addr *group)
+						    const struct in6_addr *grp,
+						    u8 *igmp_type)
 {
 	struct sk_buff *skb;
 	struct ipv6hdr *ip6h;
@@ -464,8 +467,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
 			       &ip6h->saddr)) {
 		kfree_skb(skb);
+		br->has_ipv6_addr = 0;
 		return NULL;
 	}
+
+	br->has_ipv6_addr = 1;
 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
@@ -484,16 +490,17 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	skb_set_transport_header(skb, skb->len);
 	mldq = (struct mld_msg *) icmp6_hdr(skb);
 
-	interval = ipv6_addr_any(group) ?
+	interval = ipv6_addr_any(grp) ?
 			br->multicast_query_response_interval :
 			br->multicast_last_member_interval;
 
+	*igmp_type = ICMPV6_MGM_QUERY;
 	mldq->mld_type = ICMPV6_MGM_QUERY;
 	mldq->mld_code = 0;
 	mldq->mld_cksum = 0;
 	mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
 	mldq->mld_reserved = 0;
-	mldq->mld_mca = *group;
+	mldq->mld_mca = *grp;
 
 	/* checksum */
 	mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -510,14 +517,16 @@ out:
 #endif
 
 static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
-						struct br_ip *addr)
+						struct br_ip *addr,
+						u8 *igmp_type)
 {
 	switch (addr->proto) {
 	case htons(ETH_P_IP):
-		return br_ip4_multicast_alloc_query(br, addr->u.ip4);
+		return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type);
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
+		return br_ip6_multicast_alloc_query(br, &addr->u.ip6,
+						    igmp_type);
 #endif
 	}
 	return NULL;
@@ -826,18 +835,23 @@ static void __br_multicast_send_query(struct net_bridge *br,
 				      struct br_ip *ip)
 {
 	struct sk_buff *skb;
+	u8 igmp_type;
 
-	skb = br_multicast_alloc_query(br, ip);
+	skb = br_multicast_alloc_query(br, ip, &igmp_type);
 	if (!skb)
 		return;
 
 	if (port) {
 		skb->dev = port->dev;
+		br_multicast_count(br, port, skb, igmp_type,
+				   BR_MCAST_DIR_TX);
 		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
 			dev_net(port->dev), NULL, skb, NULL, skb->dev,
 			br_dev_queue_push_xmit);
 	} else {
 		br_multicast_select_own_querier(br, ip, skb);
+		br_multicast_count(br, port, skb, igmp_type,
+				   BR_MCAST_DIR_RX);
 		netif_rx(skb);
 	}
 }
@@ -915,7 +929,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data)
 }
 #endif
 
-void br_multicast_add_port(struct net_bridge_port *port)
+int br_multicast_add_port(struct net_bridge_port *port)
 {
 	port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 
@@ -927,6 +941,11 @@ void br_multicast_add_port(struct net_bridge_port *port)
 	setup_timer(&port->ip6_own_query.timer,
 		    br_ip6_multicast_port_query_expired, (unsigned long)port);
 #endif
+	port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
+	if (!port->mcast_stats)
+		return -ENOMEM;
+
+	return 0;
 }
 
 void br_multicast_del_port(struct net_bridge_port *port)
@@ -941,6 +960,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
 		br_multicast_del_pg(br, pg);
 	spin_unlock_bh(&br->multicast_lock);
 	del_timer_sync(&port->multicast_router_timer);
+	free_percpu(port->mcast_stats);
 }
 
 static void br_multicast_enable(struct bridge_mcast_own_query *query)
@@ -1580,6 +1600,39 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 }
 #endif
 
+static void br_multicast_err_count(const struct net_bridge *br,
+				   const struct net_bridge_port *p,
+				   __be16 proto)
+{
+	struct bridge_mcast_stats __percpu *stats;
+	struct bridge_mcast_stats *pstats;
+
+	if (!br->multicast_stats_enabled)
+		return;
+
+	if (p)
+		stats = p->mcast_stats;
+	else
+		stats = br->mcast_stats;
+	if (WARN_ON(!stats))
+		return;
+
+	pstats = this_cpu_ptr(stats);
+
+	u64_stats_update_begin(&pstats->syncp);
+	switch (proto) {
+	case htons(ETH_P_IP):
+		pstats->mstats.igmp_parse_errors++;
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		pstats->mstats.mld_parse_errors++;
+		break;
+#endif
+	}
+	u64_stats_update_end(&pstats->syncp);
+}
+
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb,
@@ -1596,11 +1649,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
 		return 0;
 	} else if (err < 0) {
+		br_multicast_err_count(br, port, skb->protocol);
 		return err;
 	}
 
-	BR_INPUT_SKB_CB(skb)->igmp = 1;
 	ih = igmp_hdr(skb);
+	BR_INPUT_SKB_CB(skb)->igmp = ih->type;
 
 	switch (ih->type) {
 	case IGMP_HOST_MEMBERSHIP_REPORT:
@@ -1622,6 +1676,9 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	if (skb_trimmed && skb_trimmed != skb)
 		kfree_skb(skb_trimmed);
 
+	br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+			   BR_MCAST_DIR_RX);
+
 	return err;
 }
 
@@ -1642,11 +1699,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
 		return 0;
 	} else if (err < 0) {
+		br_multicast_err_count(br, port, skb->protocol);
 		return err;
 	}
 
-	BR_INPUT_SKB_CB(skb)->igmp = 1;
 	mld = (struct mld_msg *)skb_transport_header(skb);
+	BR_INPUT_SKB_CB(skb)->igmp = mld->mld_type;
 
 	switch (mld->mld_type) {
 	case ICMPV6_MGM_REPORT:
@@ -1667,6 +1725,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	if (skb_trimmed && skb_trimmed != skb)
 		kfree_skb(skb_trimmed);
 
+	br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+			   BR_MCAST_DIR_RX);
+
 	return err;
 }
 #endif
@@ -1674,6 +1735,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb, u16 vid)
 {
+	int ret = 0;
+
 	BR_INPUT_SKB_CB(skb)->igmp = 0;
 	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
 
@@ -1682,14 +1745,16 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		return br_multicast_ipv4_rcv(br, port, skb, vid);
+		ret = br_multicast_ipv4_rcv(br, port, skb, vid);
+		break;
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return br_multicast_ipv6_rcv(br, port, skb, vid);
+		ret = br_multicast_ipv6_rcv(br, port, skb, vid);
+		break;
 #endif
 	}
 
-	return 0;
+	return ret;
 }
 
 static void br_multicast_query_expired(struct net_bridge *br,
@@ -1745,6 +1810,7 @@ void br_multicast_init(struct net_bridge *br)
 	br->ip6_other_query.delay_time = 0;
 	br->ip6_querier.port = NULL;
 #endif
+	br->has_ipv6_addr = 1;
 
 	spin_lock_init(&br->multicast_lock);
 	setup_timer(&br->multicast_router_timer,
@@ -1827,6 +1893,8 @@ void br_multicast_dev_del(struct net_bridge *br)
 
 out:
 	spin_unlock_bh(&br->multicast_lock);
+
+	free_percpu(br->mcast_stats);
 }
 
 int br_multicast_set_router(struct net_bridge *br, unsigned long val)
@@ -2181,3 +2249,154 @@ unlock:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
+
+static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
+			       const struct sk_buff *skb, u8 type, u8 dir)
+{
+	struct bridge_mcast_stats *pstats = this_cpu_ptr(stats);
+	__be16 proto = skb->protocol;
+	unsigned int t_len;
+
+	u64_stats_update_begin(&pstats->syncp);
+	switch (proto) {
+	case htons(ETH_P_IP):
+		t_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
+		switch (type) {
+		case IGMP_HOST_MEMBERSHIP_REPORT:
+			pstats->mstats.igmp_v1reports[dir]++;
+			break;
+		case IGMPV2_HOST_MEMBERSHIP_REPORT:
+			pstats->mstats.igmp_v2reports[dir]++;
+			break;
+		case IGMPV3_HOST_MEMBERSHIP_REPORT:
+			pstats->mstats.igmp_v3reports[dir]++;
+			break;
+		case IGMP_HOST_MEMBERSHIP_QUERY:
+			if (t_len != sizeof(struct igmphdr)) {
+				pstats->mstats.igmp_v3queries[dir]++;
+			} else {
+				unsigned int offset = skb_transport_offset(skb);
+				struct igmphdr *ih, _ihdr;
+
+				ih = skb_header_pointer(skb, offset,
+							sizeof(_ihdr), &_ihdr);
+				if (!ih)
+					break;
+				if (!ih->code)
+					pstats->mstats.igmp_v1queries[dir]++;
+				else
+					pstats->mstats.igmp_v2queries[dir]++;
+			}
+			break;
+		case IGMP_HOST_LEAVE_MESSAGE:
+			pstats->mstats.igmp_leaves[dir]++;
+			break;
+		}
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		t_len = ntohs(ipv6_hdr(skb)->payload_len) +
+			sizeof(struct ipv6hdr);
+		t_len -= skb_network_header_len(skb);
+		switch (type) {
+		case ICMPV6_MGM_REPORT:
+			pstats->mstats.mld_v1reports[dir]++;
+			break;
+		case ICMPV6_MLD2_REPORT:
+			pstats->mstats.mld_v2reports[dir]++;
+			break;
+		case ICMPV6_MGM_QUERY:
+			if (t_len != sizeof(struct mld_msg))
+				pstats->mstats.mld_v2queries[dir]++;
+			else
+				pstats->mstats.mld_v1queries[dir]++;
+			break;
+		case ICMPV6_MGM_REDUCTION:
+			pstats->mstats.mld_leaves[dir]++;
+			break;
+		}
+		break;
+#endif /* CONFIG_IPV6 */
+	}
+	u64_stats_update_end(&pstats->syncp);
+}
+
+void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+			const struct sk_buff *skb, u8 type, u8 dir)
+{
+	struct bridge_mcast_stats __percpu *stats;
+
+	/* if multicast_disabled is true then igmp type can't be set */
+	if (!type || !br->multicast_stats_enabled)
+		return;
+
+	if (p)
+		stats = p->mcast_stats;
+	else
+		stats = br->mcast_stats;
+	if (WARN_ON(!stats))
+		return;
+
+	br_mcast_stats_add(stats, skb, type, dir);
+}
+
+int br_multicast_init_stats(struct net_bridge *br)
+{
+	br->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
+	if (!br->mcast_stats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void mcast_stats_add_dir(u64 *dst, u64 *src)
+{
+	dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
+	dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX];
+}
+
+void br_multicast_get_stats(const struct net_bridge *br,
+			    const struct net_bridge_port *p,
+			    struct br_mcast_stats *dest)
+{
+	struct bridge_mcast_stats __percpu *stats;
+	struct br_mcast_stats tdst;
+	int i;
+
+	memset(dest, 0, sizeof(*dest));
+	if (p)
+		stats = p->mcast_stats;
+	else
+		stats = br->mcast_stats;
+	if (WARN_ON(!stats))
+		return;
+
+	memset(&tdst, 0, sizeof(tdst));
+	for_each_possible_cpu(i) {
+		struct bridge_mcast_stats *cpu_stats = per_cpu_ptr(stats, i);
+		struct br_mcast_stats temp;
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			memcpy(&temp, &cpu_stats->mstats, sizeof(temp));
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries);
+		mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries);
+		mcast_stats_add_dir(tdst.igmp_v3queries, temp.igmp_v3queries);
+		mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves);
+		mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports);
+		mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports);
+		mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports);
+		tdst.igmp_parse_errors += temp.igmp_parse_errors;
+
+		mcast_stats_add_dir(tdst.mld_v1queries, temp.mld_v1queries);
+		mcast_stats_add_dir(tdst.mld_v2queries, temp.mld_v2queries);
+		mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves);
+		mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports);
+		mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports);
+		tdst.mld_parse_errors += temp.mld_parse_errors;
+	}
+	memcpy(dest, &tdst, sizeof(*dest));
+}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2d25979273a6..77e7f69bf80d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -700,7 +700,7 @@ static int
 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		  int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
-	unsigned int mtu = ip_skb_dst_mtu(skb);
+	unsigned int mtu = ip_skb_dst_mtu(sk, skb);
 	struct iphdr *iph = ip_hdr(skb);
 
 	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a5343c7232bf..f2a29e467e78 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -851,6 +851,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
 	[IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 },
 	[IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
 	[IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
+	[IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1055,6 +1056,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 
 		br->multicast_startup_query_interval = clock_t_to_jiffies(val);
 	}
+
+	if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
+		__u8 mcast_stats;
+
+		mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]);
+		br->multicast_stats_enabled = !!mcast_stats;
+	}
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (data[IFLA_BR_NF_CALL_IPTABLES]) {
@@ -1110,6 +1118,7 @@ static size_t br_get_size(const struct net_device *brdev)
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_SNOOPING */
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_QUERY_USE_IFADDR */
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_QUERIER */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_STATS_ENABLED */
 	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_HASH_ELASTICITY */
 	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_HASH_MAX */
 	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_LAST_MEMBER_CNT */
@@ -1187,6 +1196,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	    nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
 		       br->multicast_query_use_ifaddr) ||
 	    nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) ||
+	    nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
+		       br->multicast_stats_enabled) ||
 	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
 			br->hash_elasticity) ||
 	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
@@ -1234,7 +1245,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	return 0;
 }
 
-static size_t br_get_linkxstats_size(const struct net_device *dev)
+static size_t bridge_get_linkxstats_size(const struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_vlan_group *vg;
@@ -1242,53 +1253,88 @@ static size_t br_get_linkxstats_size(const struct net_device *dev)
 	int numvls = 0;
 
 	vg = br_vlan_group(br);
-	if (!vg)
-		return 0;
-
-	/* we need to count all, even placeholder entries */
-	list_for_each_entry(v, &vg->vlan_list, vlist)
-		numvls++;
+	if (vg) {
+		/* we need to count all, even placeholder entries */
+		list_for_each_entry(v, &vg->vlan_list, vlist)
+			numvls++;
+	}
 
-	/* account for the vlans and the link xstats type nest attribute */
 	return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
+	       nla_total_size(sizeof(struct br_mcast_stats)) +
 	       nla_total_size(0);
 }
 
-static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
-			      int *prividx)
+static size_t brport_get_linkxstats_size(const struct net_device *dev)
+{
+	return nla_total_size(sizeof(struct br_mcast_stats)) +
+	       nla_total_size(0);
+}
+
+static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
+{
+	size_t retsize = 0;
+
+	switch (attr) {
+	case IFLA_STATS_LINK_XSTATS:
+		retsize = bridge_get_linkxstats_size(dev);
+		break;
+	case IFLA_STATS_LINK_XSTATS_SLAVE:
+		retsize = brport_get_linkxstats_size(dev);
+		break;
+	}
+
+	return retsize;
+}
+
+static int bridge_fill_linkxstats(struct sk_buff *skb,
+				  const struct net_device *dev,
+				  int *prividx)
 {
 	struct net_bridge *br = netdev_priv(dev);
+	struct nlattr *nla __maybe_unused;
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *v;
 	struct nlattr *nest;
 	int vl_idx = 0;
 
-	vg = br_vlan_group(br);
-	if (!vg)
-		goto out;
 	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
 	if (!nest)
 		return -EMSGSIZE;
-	list_for_each_entry(v, &vg->vlan_list, vlist) {
-		struct bridge_vlan_xstats vxi;
-		struct br_vlan_stats stats;
 
-		if (vl_idx++ < *prividx)
-			continue;
-		memset(&vxi, 0, sizeof(vxi));
-		vxi.vid = v->vid;
-		br_vlan_get_stats(v, &stats);
-		vxi.rx_bytes = stats.rx_bytes;
-		vxi.rx_packets = stats.rx_packets;
-		vxi.tx_bytes = stats.tx_bytes;
-		vxi.tx_packets = stats.tx_packets;
-
-		if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
+	vg = br_vlan_group(br);
+	if (vg) {
+		list_for_each_entry(v, &vg->vlan_list, vlist) {
+			struct bridge_vlan_xstats vxi;
+			struct br_vlan_stats stats;
+
+			if (++vl_idx < *prividx)
+				continue;
+			memset(&vxi, 0, sizeof(vxi));
+			vxi.vid = v->vid;
+			br_vlan_get_stats(v, &stats);
+			vxi.rx_bytes = stats.rx_bytes;
+			vxi.rx_packets = stats.rx_packets;
+			vxi.tx_bytes = stats.tx_bytes;
+			vxi.tx_packets = stats.tx_packets;
+
+			if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
+				goto nla_put_failure;
+		}
+	}
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	if (++vl_idx >= *prividx) {
+		nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
+					sizeof(struct br_mcast_stats),
+					BRIDGE_XSTATS_PAD);
+		if (!nla)
 			goto nla_put_failure;
+		br_multicast_get_stats(br, NULL, nla_data(nla));
 	}
+#endif
 	nla_nest_end(skb, nest);
 	*prividx = 0;
-out:
+
 	return 0;
 
 nla_put_failure:
@@ -1298,6 +1344,52 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int brport_fill_linkxstats(struct sk_buff *skb,
+				  const struct net_device *dev,
+				  int *prividx)
+{
+	struct net_bridge_port *p = br_port_get_rtnl(dev);
+	struct nlattr *nla __maybe_unused;
+	struct nlattr *nest;
+
+	if (!p)
+		return 0;
+
+	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
+	if (!nest)
+		return -EMSGSIZE;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
+				sizeof(struct br_mcast_stats),
+				BRIDGE_XSTATS_PAD);
+	if (!nla) {
+		nla_nest_end(skb, nest);
+		return -EMSGSIZE;
+	}
+	br_multicast_get_stats(p->br, p, nla_data(nla));
+#endif
+	nla_nest_end(skb, nest);
+
+	return 0;
+}
+
+static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
+			      int *prividx, int attr)
+{
+	int ret = -EINVAL;
+
+	switch (attr) {
+	case IFLA_STATS_LINK_XSTATS:
+		ret = bridge_fill_linkxstats(skb, dev, prividx);
+		break;
+	case IFLA_STATS_LINK_XSTATS_SLAVE:
+		ret = brport_fill_linkxstats(skb, dev, prividx);
+		break;
+	}
+
+	return ret;
+}
+
 static struct rtnl_af_ops br_af_ops __read_mostly = {
 	.family			= AF_BRIDGE,
 	.get_link_af_size	= br_get_link_af_size_filtered,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c7fb5d7a7218..aac2a6e6b008 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -75,6 +75,12 @@ struct bridge_mcast_querier {
 	struct br_ip addr;
 	struct net_bridge_port __rcu	*port;
 };
+
+/* IGMP/MLD statistics */
+struct bridge_mcast_stats {
+	struct br_mcast_stats mstats;
+	struct u64_stats_sync syncp;
+};
 #endif
 
 struct br_vlan_stats {
@@ -229,6 +235,7 @@ struct net_bridge_port
 	struct bridge_mcast_own_query	ip6_own_query;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 	unsigned char			multicast_router;
+	struct bridge_mcast_stats	__percpu *mcast_stats;
 	struct timer_list		multicast_router_timer;
 	struct hlist_head		mglist;
 	struct hlist_node		rlist;
@@ -314,6 +321,8 @@ struct net_bridge
 	u8				multicast_disabled:1;
 	u8				multicast_querier:1;
 	u8				multicast_query_use_ifaddr:1;
+	u8				has_ipv6_addr:1;
+	u8				multicast_stats_enabled:1;
 
 	u32				hash_elasticity;
 	u32				hash_max;
@@ -336,6 +345,7 @@ struct net_bridge
 	struct bridge_mcast_other_query	ip4_other_query;
 	struct bridge_mcast_own_query	ip4_own_query;
 	struct bridge_mcast_querier	ip4_querier;
+	struct bridge_mcast_stats	__percpu *mcast_stats;
 #if IS_ENABLED(CONFIG_IPV6)
 	struct bridge_mcast_other_query	ip6_other_query;
 	struct bridge_mcast_own_query	ip6_own_query;
@@ -495,14 +505,12 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid);
 
 /* br_forward.c */
-void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
-void br_forward(const struct net_bridge_port *to,
-		struct sk_buff *skb, struct sk_buff *skb0);
+void br_forward(const struct net_bridge_port *to, struct sk_buff *skb,
+		bool local_rcv, bool local_orig);
 int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
-void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
-void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
-		      struct sk_buff *skb2, bool unicast);
+void br_flood(struct net_bridge *br, struct sk_buff *skb,
+	      bool unicast, bool local_rcv, bool local_orig);
 
 /* br_if.c */
 void br_port_carrier_check(struct net_bridge_port *p);
@@ -542,7 +550,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb, u16 vid);
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb, u16 vid);
-void br_multicast_add_port(struct net_bridge_port *port);
+int br_multicast_add_port(struct net_bridge_port *port);
 void br_multicast_del_port(struct net_bridge_port *port);
 void br_multicast_enable_port(struct net_bridge_port *port);
 void br_multicast_disable_port(struct net_bridge_port *port);
@@ -550,10 +558,8 @@ void br_multicast_init(struct net_bridge *br);
 void br_multicast_open(struct net_bridge *br);
 void br_multicast_stop(struct net_bridge *br);
 void br_multicast_dev_del(struct net_bridge *br);
-void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
-			  struct sk_buff *skb);
-void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
-			  struct sk_buff *skb, struct sk_buff *skb2);
+void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
+			struct sk_buff *skb, bool local_rcv, bool local_orig);
 int br_multicast_set_router(struct net_bridge *br, unsigned long val);
 int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
 int br_multicast_toggle(struct net_bridge *br, unsigned long val);
@@ -575,6 +581,12 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
 		   struct br_ip *group, int type, u8 flags);
 void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
 		   int type);
+void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+			const struct sk_buff *skb, u8 type, u8 dir);
+int br_multicast_init_stats(struct net_bridge *br);
+void br_multicast_get_stats(const struct net_bridge *br,
+			    const struct net_bridge_port *p,
+			    struct br_mcast_stats *dest);
 
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -588,10 +600,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
 
 static inline bool
 __br_multicast_querier_exists(struct net_bridge *br,
-			      struct bridge_mcast_other_query *querier)
+				struct bridge_mcast_other_query *querier,
+				const bool is_ipv6)
 {
+	bool own_querier_enabled;
+
+	if (br->multicast_querier) {
+		if (is_ipv6 && !br->has_ipv6_addr)
+			own_querier_enabled = false;
+		else
+			own_querier_enabled = true;
+	} else {
+		own_querier_enabled = false;
+	}
+
 	return time_is_before_jiffies(querier->delay_time) &&
-	       (br->multicast_querier || timer_pending(&querier->timer));
+	       (own_querier_enabled || timer_pending(&querier->timer));
 }
 
 static inline bool br_multicast_querier_exists(struct net_bridge *br,
@@ -599,15 +623,22 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
 {
 	switch (eth->h_proto) {
 	case (htons(ETH_P_IP)):
-		return __br_multicast_querier_exists(br, &br->ip4_other_query);
+		return __br_multicast_querier_exists(br,
+			&br->ip4_other_query, false);
 #if IS_ENABLED(CONFIG_IPV6)
 	case (htons(ETH_P_IPV6)):
-		return __br_multicast_querier_exists(br, &br->ip6_other_query);
+		return __br_multicast_querier_exists(br,
+			&br->ip6_other_query, true);
 #endif
 	default:
 		return false;
 	}
 }
+
+static inline int br_multicast_igmp_type(const struct sk_buff *skb)
+{
+	return BR_INPUT_SKB_CB(skb)->igmp;
+}
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
@@ -623,8 +654,9 @@ static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 	return NULL;
 }
 
-static inline void br_multicast_add_port(struct net_bridge_port *port)
+static inline int br_multicast_add_port(struct net_bridge_port *port)
 {
+	return 0;
 }
 
 static inline void br_multicast_del_port(struct net_bridge_port *port)
@@ -655,31 +687,47 @@ static inline void br_multicast_dev_del(struct net_bridge *br)
 {
 }
 
-static inline void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
-					struct sk_buff *skb)
+static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
+				      struct sk_buff *skb,
+				      bool local_rcv, bool local_orig)
 {
 }
 
-static inline void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
-					struct sk_buff *skb,
-					struct sk_buff *skb2)
-{
-}
 static inline bool br_multicast_is_router(struct net_bridge *br)
 {
 	return 0;
 }
+
 static inline bool br_multicast_querier_exists(struct net_bridge *br,
 					       struct ethhdr *eth)
 {
 	return false;
 }
+
 static inline void br_mdb_init(void)
 {
 }
+
 static inline void br_mdb_uninit(void)
 {
 }
+
+static inline void br_multicast_count(struct net_bridge *br,
+				      const struct net_bridge_port *p,
+				      const struct sk_buff *skb,
+				      u8 type, u8 dir)
+{
+}
+
+static inline int br_multicast_init_stats(struct net_bridge *br)
+{
+	return 0;
+}
+
+static inline int br_multicast_igmp_type(const struct sk_buff *skb)
+{
+	return 0;
+}
 #endif
 
 /* br_vlan.c */
@@ -927,7 +975,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
 int br_set_forward_delay(struct net_bridge *br, unsigned long x);
 int br_set_hello_time(struct net_bridge *br, unsigned long x);
 int br_set_max_age(struct net_bridge *br, unsigned long x);
-int br_set_ageing_time(struct net_bridge *br, u32 ageing_time);
+int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time);
 
 
 /* br_stp_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 9cb7044d0801..9258b8ef14ff 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -570,7 +570,7 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
  *
  * Offloaded switch entries maybe more restrictive
  */
-int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
+int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
 {
 	struct switchdev_attr attr = {
 		.orig_dev = br->dev,
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 984d46263007..341caa0ca63a 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -55,7 +55,7 @@ void br_init_port(struct net_bridge_port *p)
 		netdev_err(p->dev, "failed to set HW ageing time\n");
 }
 
-/* called under bridge lock */
+/* NO locks held */
 void br_stp_enable_bridge(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index beb47071e38d..e120307c6e36 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -618,6 +618,30 @@ static ssize_t multicast_startup_query_interval_store(
 	return store_bridge_parm(d, buf, len, set_startup_query_interval);
 }
 static DEVICE_ATTR_RW(multicast_startup_query_interval);
+
+static ssize_t multicast_stats_enabled_show(struct device *d,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+
+	return sprintf(buf, "%u\n", br->multicast_stats_enabled);
+}
+
+static int set_stats_enabled(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_stats_enabled = !!val;
+	return 0;
+}
+
+static ssize_t multicast_stats_enabled_store(struct device *d,
+					     struct device_attribute *attr,
+					     const char *buf,
+					     size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_stats_enabled);
+}
+static DEVICE_ATTR_RW(multicast_stats_enabled);
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static ssize_t nf_call_iptables_show(
@@ -784,6 +808,7 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_query_interval.attr,
 	&dev_attr_multicast_query_response_interval.attr,
 	&dev_attr_multicast_startup_query_interval.attr,
+	&dev_attr_multicast_stats_enabled.attr,
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	&dev_attr_nf_call_iptables.attr,
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 2a449b7ab8fa..5fc4affd9fdb 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -20,16 +20,16 @@ ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	__be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
 
 	if (info->bitmask & EBT_802_3_SAP) {
-		if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP))
+		if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.ssap))
 			return false;
-		if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP))
+		if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.dsap))
 			return false;
 	}
 
 	if (info->bitmask & EBT_802_3_TYPE) {
 		if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE))
 			return false;
-		if (FWINV(info->type != type, EBT_802_3_TYPE))
+		if (NF_INVF(info, EBT_802_3_TYPE, info->type != type))
 			return false;
 	}
 
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index cd457b891b27..227142282b45 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -25,14 +25,14 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
 	if (ah == NULL)
 		return false;
-	if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode !=
-	   ah->ar_op, EBT_ARP_OPCODE))
+	if ((info->bitmask & EBT_ARP_OPCODE) &&
+	    NF_INVF(info, EBT_ARP_OPCODE, info->opcode != ah->ar_op))
 		return false;
-	if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype !=
-	   ah->ar_hrd, EBT_ARP_HTYPE))
+	if ((info->bitmask & EBT_ARP_HTYPE) &&
+	    NF_INVF(info, EBT_ARP_HTYPE, info->htype != ah->ar_hrd))
 		return false;
-	if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype !=
-	   ah->ar_pro, EBT_ARP_PTYPE))
+	if ((info->bitmask & EBT_ARP_PTYPE) &&
+	    NF_INVF(info, EBT_ARP_PTYPE, info->ptype != ah->ar_pro))
 		return false;
 
 	if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) {
@@ -51,21 +51,22 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 					sizeof(daddr), &daddr);
 		if (dap == NULL)
 			return false;
-		if (info->bitmask & EBT_ARP_SRC_IP &&
-		    FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
+		if ((info->bitmask & EBT_ARP_SRC_IP) &&
+		    NF_INVF(info, EBT_ARP_SRC_IP,
+			    info->saddr != (*sap & info->smsk)))
 			return false;
-		if (info->bitmask & EBT_ARP_DST_IP &&
-		    FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
+		if ((info->bitmask & EBT_ARP_DST_IP) &&
+		    NF_INVF(info, EBT_ARP_DST_IP,
+			    info->daddr != (*dap & info->dmsk)))
 			return false;
-		if (info->bitmask & EBT_ARP_GRAT &&
-		    FWINV(*dap != *sap, EBT_ARP_GRAT))
+		if ((info->bitmask & EBT_ARP_GRAT) &&
+		    NF_INVF(info, EBT_ARP_GRAT, *dap != *sap))
 			return false;
 	}
 
 	if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
 		const unsigned char *mp;
 		unsigned char _mac[ETH_ALEN];
-		uint8_t verdict, i;
 
 		if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
 			return false;
@@ -74,11 +75,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 						sizeof(_mac), &_mac);
 			if (mp == NULL)
 				return false;
-			verdict = 0;
-			for (i = 0; i < 6; i++)
-				verdict |= (mp[i] ^ info->smaddr[i]) &
-				       info->smmsk[i];
-			if (FWINV(verdict != 0, EBT_ARP_SRC_MAC))
+			if (NF_INVF(info, EBT_ARP_SRC_MAC,
+				    !ether_addr_equal_masked(mp, info->smaddr,
+							     info->smmsk)))
 				return false;
 		}
 
@@ -88,11 +87,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 						sizeof(_mac), &_mac);
 			if (mp == NULL)
 				return false;
-			verdict = 0;
-			for (i = 0; i < 6; i++)
-				verdict |= (mp[i] ^ info->dmaddr[i]) &
-					info->dmmsk[i];
-			if (FWINV(verdict != 0, EBT_ARP_DST_MAC))
+			if (NF_INVF(info, EBT_ARP_DST_MAC,
+				    !ether_addr_equal_masked(mp, info->dmaddr,
+							     info->dmmsk)))
 				return false;
 		}
 	}
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 23bca62d58d2..d06968bdf5ec 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -36,19 +36,19 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
 	if (ih == NULL)
 		return false;
-	if (info->bitmask & EBT_IP_TOS &&
-	   FWINV(info->tos != ih->tos, EBT_IP_TOS))
+	if ((info->bitmask & EBT_IP_TOS) &&
+	    NF_INVF(info, EBT_IP_TOS, info->tos != ih->tos))
 		return false;
-	if (info->bitmask & EBT_IP_SOURCE &&
-	   FWINV((ih->saddr & info->smsk) !=
-	   info->saddr, EBT_IP_SOURCE))
+	if ((info->bitmask & EBT_IP_SOURCE) &&
+	    NF_INVF(info, EBT_IP_SOURCE,
+		    (ih->saddr & info->smsk) != info->saddr))
 		return false;
 	if ((info->bitmask & EBT_IP_DEST) &&
-	   FWINV((ih->daddr & info->dmsk) !=
-	   info->daddr, EBT_IP_DEST))
+	    NF_INVF(info, EBT_IP_DEST,
+		    (ih->daddr & info->dmsk) != info->daddr))
 		return false;
 	if (info->bitmask & EBT_IP_PROTO) {
-		if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO))
+		if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
 			return false;
 		if (!(info->bitmask & EBT_IP_DPORT) &&
 		    !(info->bitmask & EBT_IP_SPORT))
@@ -61,16 +61,16 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			return false;
 		if (info->bitmask & EBT_IP_DPORT) {
 			u32 dst = ntohs(pptr->dst);
-			if (FWINV(dst < info->dport[0] ||
-				  dst > info->dport[1],
-				  EBT_IP_DPORT))
+			if (NF_INVF(info, EBT_IP_DPORT,
+				    dst < info->dport[0] ||
+				    dst > info->dport[1]))
 			return false;
 		}
 		if (info->bitmask & EBT_IP_SPORT) {
 			u32 src = ntohs(pptr->src);
-			if (FWINV(src < info->sport[0] ||
-				  src > info->sport[1],
-				  EBT_IP_SPORT))
+			if (NF_INVF(info, EBT_IP_SPORT,
+				    src < info->sport[0] ||
+				    src > info->sport[1]))
 			return false;
 		}
 	}
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 98de6e7fd86d..4617491be41e 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -45,15 +45,18 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
 	if (ih6 == NULL)
 		return false;
-	if (info->bitmask & EBT_IP6_TCLASS &&
-	   FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
+	if ((info->bitmask & EBT_IP6_TCLASS) &&
+	    NF_INVF(info, EBT_IP6_TCLASS,
+		    info->tclass != ipv6_get_dsfield(ih6)))
 		return false;
-	if ((info->bitmask & EBT_IP6_SOURCE &&
-	    FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
-				       &info->saddr), EBT_IP6_SOURCE)) ||
-	    (info->bitmask & EBT_IP6_DEST &&
-	    FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
-				       &info->daddr), EBT_IP6_DEST)))
+	if (((info->bitmask & EBT_IP6_SOURCE) &&
+	     NF_INVF(info, EBT_IP6_SOURCE,
+		     ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
+					  &info->saddr))) ||
+	    ((info->bitmask & EBT_IP6_DEST) &&
+	     NF_INVF(info, EBT_IP6_DEST,
+		     ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
+					  &info->daddr))))
 		return false;
 	if (info->bitmask & EBT_IP6_PROTO) {
 		uint8_t nexthdr = ih6->nexthdr;
@@ -63,7 +66,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off);
 		if (offset_ph == -1)
 			return false;
-		if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
+		if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr))
 			return false;
 		if (!(info->bitmask & (EBT_IP6_DPORT |
 				       EBT_IP6_SPORT | EBT_IP6_ICMP6)))
@@ -76,22 +79,24 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			return false;
 		if (info->bitmask & EBT_IP6_DPORT) {
 			u16 dst = ntohs(pptr->tcpudphdr.dst);
-			if (FWINV(dst < info->dport[0] ||
-				  dst > info->dport[1], EBT_IP6_DPORT))
+			if (NF_INVF(info, EBT_IP6_DPORT,
+				    dst < info->dport[0] ||
+				    dst > info->dport[1]))
 				return false;
 		}
 		if (info->bitmask & EBT_IP6_SPORT) {
 			u16 src = ntohs(pptr->tcpudphdr.src);
-			if (FWINV(src < info->sport[0] ||
-				  src > info->sport[1], EBT_IP6_SPORT))
+			if (NF_INVF(info, EBT_IP6_SPORT,
+				    src < info->sport[0] ||
+				    src > info->sport[1]))
 			return false;
 		}
 		if ((info->bitmask & EBT_IP6_ICMP6) &&
-		     FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
-			   pptr->icmphdr.type > info->icmpv6_type[1] ||
-			   pptr->icmphdr.code < info->icmpv6_code[0] ||
-			   pptr->icmphdr.code > info->icmpv6_code[1],
-							EBT_IP6_ICMP6))
+		    NF_INVF(info, EBT_IP6_ICMP6,
+			    pptr->icmphdr.type < info->icmpv6_type[0] ||
+			    pptr->icmphdr.type > info->icmpv6_type[1] ||
+			    pptr->icmphdr.code < info->icmpv6_code[0] ||
+			    pptr->icmphdr.code > info->icmpv6_code[1]))
 			return false;
 	}
 	return true;
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 6b731e12ecfa..3140eb912d7e 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -17,24 +17,24 @@
 #define BPDU_TYPE_TCN 0x80
 
 struct stp_header {
-	uint8_t dsap;
-	uint8_t ssap;
-	uint8_t ctrl;
-	uint8_t pid;
-	uint8_t vers;
-	uint8_t type;
+	u8 dsap;
+	u8 ssap;
+	u8 ctrl;
+	u8 pid;
+	u8 vers;
+	u8 type;
 };
 
 struct stp_config_pdu {
-	uint8_t flags;
-	uint8_t root[8];
-	uint8_t root_cost[4];
-	uint8_t sender[8];
-	uint8_t port[2];
-	uint8_t msg_age[2];
-	uint8_t max_age[2];
-	uint8_t hello_time[2];
-	uint8_t forward_delay[2];
+	u8 flags;
+	u8 root[8];
+	u8 root_cost[4];
+	u8 sender[8];
+	u8 port[2];
+	u8 msg_age[2];
+	u8 max_age[2];
+	u8 hello_time[2];
+	u8 forward_delay[2];
 };
 
 #define NR16(p) (p[0] << 8 | p[1])
@@ -44,76 +44,73 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 			      const struct stp_config_pdu *stpc)
 {
 	const struct ebt_stp_config_info *c;
-	uint16_t v16;
-	uint32_t v32;
-	int verdict, i;
+	u16 v16;
+	u32 v32;
 
 	c = &info->config;
 	if ((info->bitmask & EBT_STP_FLAGS) &&
-	    FWINV(c->flags != stpc->flags, EBT_STP_FLAGS))
+	    NF_INVF(info, EBT_STP_FLAGS, c->flags != stpc->flags))
 		return false;
 	if (info->bitmask & EBT_STP_ROOTPRIO) {
 		v16 = NR16(stpc->root);
-		if (FWINV(v16 < c->root_priol ||
-		    v16 > c->root_priou, EBT_STP_ROOTPRIO))
+		if (NF_INVF(info, EBT_STP_ROOTPRIO,
+			    v16 < c->root_priol || v16 > c->root_priou))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_ROOTADDR) {
-		verdict = 0;
-		for (i = 0; i < 6; i++)
-			verdict |= (stpc->root[2+i] ^ c->root_addr[i]) &
-				   c->root_addrmsk[i];
-		if (FWINV(verdict != 0, EBT_STP_ROOTADDR))
+		if (NF_INVF(info, EBT_STP_ROOTADDR,
+			    !ether_addr_equal_masked(&stpc->root[2],
+						     c->root_addr,
+						     c->root_addrmsk)))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_ROOTCOST) {
 		v32 = NR32(stpc->root_cost);
-		if (FWINV(v32 < c->root_costl ||
-		    v32 > c->root_costu, EBT_STP_ROOTCOST))
+		if (NF_INVF(info, EBT_STP_ROOTCOST,
+			    v32 < c->root_costl || v32 > c->root_costu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_SENDERPRIO) {
 		v16 = NR16(stpc->sender);
-		if (FWINV(v16 < c->sender_priol ||
-		    v16 > c->sender_priou, EBT_STP_SENDERPRIO))
+		if (NF_INVF(info, EBT_STP_SENDERPRIO,
+			    v16 < c->sender_priol || v16 > c->sender_priou))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_SENDERADDR) {
-		verdict = 0;
-		for (i = 0; i < 6; i++)
-			verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) &
-				   c->sender_addrmsk[i];
-		if (FWINV(verdict != 0, EBT_STP_SENDERADDR))
+		if (NF_INVF(info, EBT_STP_SENDERADDR,
+			    !ether_addr_equal_masked(&stpc->sender[2],
+						     c->sender_addr,
+						     c->sender_addrmsk)))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_PORT) {
 		v16 = NR16(stpc->port);
-		if (FWINV(v16 < c->portl ||
-		    v16 > c->portu, EBT_STP_PORT))
+		if (NF_INVF(info, EBT_STP_PORT,
+			    v16 < c->portl || v16 > c->portu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_MSGAGE) {
 		v16 = NR16(stpc->msg_age);
-		if (FWINV(v16 < c->msg_agel ||
-		    v16 > c->msg_ageu, EBT_STP_MSGAGE))
+		if (NF_INVF(info, EBT_STP_MSGAGE,
+			    v16 < c->msg_agel || v16 > c->msg_ageu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_MAXAGE) {
 		v16 = NR16(stpc->max_age);
-		if (FWINV(v16 < c->max_agel ||
-		    v16 > c->max_ageu, EBT_STP_MAXAGE))
+		if (NF_INVF(info, EBT_STP_MAXAGE,
+			    v16 < c->max_agel || v16 > c->max_ageu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_HELLOTIME) {
 		v16 = NR16(stpc->hello_time);
-		if (FWINV(v16 < c->hello_timel ||
-		    v16 > c->hello_timeu, EBT_STP_HELLOTIME))
+		if (NF_INVF(info, EBT_STP_HELLOTIME,
+			    v16 < c->hello_timel || v16 > c->hello_timeu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_FWDD) {
 		v16 = NR16(stpc->forward_delay);
-		if (FWINV(v16 < c->forward_delayl ||
-		    v16 > c->forward_delayu, EBT_STP_FWDD))
+		if (NF_INVF(info, EBT_STP_FWDD,
+			    v16 < c->forward_delayl || v16 > c->forward_delayu))
 			return false;
 	}
 	return true;
@@ -125,7 +122,7 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
 	struct stp_header _stph;
-	const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
+	const u8 header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
 
 	sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph);
 	if (sp == NULL)
@@ -135,8 +132,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (memcmp(sp, header, sizeof(header)))
 		return false;
 
-	if (info->bitmask & EBT_STP_TYPE &&
-	    FWINV(info->type != sp->type, EBT_STP_TYPE))
+	if ((info->bitmask & EBT_STP_TYPE) &&
+	    NF_INVF(info, EBT_STP_TYPE, info->type != sp->type))
 		return false;
 
 	if (sp->type == BPDU_TYPE_CONFIG &&
@@ -156,8 +153,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
-	const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
-	const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
+	const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	const struct ebt_entry *e = par->entryinfo;
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5a61f35412a0..cceac5bb658f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -121,7 +121,6 @@ ebt_dev_check(const char *entry, const struct net_device *device)
 	return devname[i] != entry[i] && entry[i] != 1;
 }
 
-#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg))
 /* process standard matches */
 static inline int
 ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
@@ -130,7 +129,6 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 	const struct ethhdr *h = eth_hdr(skb);
 	const struct net_bridge_port *p;
 	__be16 ethproto;
-	int verdict, i;
 
 	if (skb_vlan_tag_present(skb))
 		ethproto = htons(ETH_P_8021Q);
@@ -138,38 +136,36 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 		ethproto = h->h_proto;
 
 	if (e->bitmask & EBT_802_3) {
-		if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO))
+		if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto)))
 			return 1;
 	} else if (!(e->bitmask & EBT_NOPROTO) &&
-	   FWINV2(e->ethproto != ethproto, EBT_IPROTO))
+		   NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto))
 		return 1;
 
-	if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
+	if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in)))
 		return 1;
-	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
+	if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out)))
 		return 1;
 	/* rcu_read_lock()ed by nf_hook_slow */
 	if (in && (p = br_port_get_rcu(in)) != NULL &&
-	    FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN))
+	    NF_INVF(e, EBT_ILOGICALIN,
+		    ebt_dev_check(e->logical_in, p->br->dev)))
 		return 1;
 	if (out && (p = br_port_get_rcu(out)) != NULL &&
-	    FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT))
+	    NF_INVF(e, EBT_ILOGICALOUT,
+		    ebt_dev_check(e->logical_out, p->br->dev)))
 		return 1;
 
 	if (e->bitmask & EBT_SOURCEMAC) {
-		verdict = 0;
-		for (i = 0; i < 6; i++)
-			verdict |= (h->h_source[i] ^ e->sourcemac[i]) &
-			   e->sourcemsk[i];
-		if (FWINV2(verdict != 0, EBT_ISOURCE))
+		if (NF_INVF(e, EBT_ISOURCE,
+			    !ether_addr_equal_masked(h->h_source, e->sourcemac,
+						     e->sourcemsk)))
 			return 1;
 	}
 	if (e->bitmask & EBT_DESTMAC) {
-		verdict = 0;
-		for (i = 0; i < 6; i++)
-			verdict |= (h->h_dest[i] ^ e->destmac[i]) &
-			   e->destmsk[i];
-		if (FWINV2(verdict != 0, EBT_IDEST))
+		if (NF_INVF(e, EBT_IDEST,
+			    !ether_addr_equal_masked(h->h_dest, e->destmac,
+						     e->destmsk)))
 			return 1;
 	}
 	return 0;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 77f7e7a9ebe1..0b77ffbc27d6 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -72,7 +72,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
 
 	nft_reject_br_push_etherhdr(oldskb, nskb);
 
-	br_deliver(br_port_get_rcu(dev), nskb);
+	br_forward(br_port_get_rcu(dev), nskb, false, true);
 }
 
 static void nft_reject_br_send_v4_unreach(struct net *net,
@@ -140,7 +140,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
 
 	nft_reject_br_push_etherhdr(oldskb, nskb);
 
-	br_deliver(br_port_get_rcu(dev), nskb);
+	br_forward(br_port_get_rcu(dev), nskb, false, true);
 }
 
 static void nft_reject_br_send_v6_tcp_reset(struct net *net,
@@ -174,7 +174,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
 
 	nft_reject_br_push_etherhdr(oldskb, nskb);
 
-	br_deliver(br_port_get_rcu(dev), nskb);
+	br_forward(br_port_get_rcu(dev), nskb, false, true);
 }
 
 static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
@@ -255,7 +255,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
 
 	nft_reject_br_push_etherhdr(oldskb, nskb);
 
-	br_deliver(br_port_get_rcu(dev), nskb);
+	br_forward(br_port_get_rcu(dev), nskb, false, true);
 }
 
 static void nft_reject_bridge_eval(const struct nft_expr *expr,
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 67a4a36febd1..3408ed51b611 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/if_ether.h>
-#include <linux/moduleparam.h>
 #include <linux/ip.h>
 #include <linux/sched.h>
 #include <linux/sockios.h>
diff --git a/net/can/Makefile b/net/can/Makefile
index cef49eb1f5c7..10936754e3f2 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -3,7 +3,8 @@
 #
 
 obj-$(CONFIG_CAN)	+= can.o
-can-y			:= af_can.o proc.o
+can-y			:= af_can.o
+can-$(CONFIG_PROC_FS)	+= proc.o
 
 obj-$(CONFIG_CAN_RAW)	+= can-raw.o
 can-raw-y		:= raw.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 166d436196c1..1108079d934f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -911,14 +911,14 @@ static __init int can_init(void)
 	if (!rcv_cache)
 		return -ENOMEM;
 
-	if (stats_timer) {
+	if (IS_ENABLED(CONFIG_PROC_FS)) {
+		if (stats_timer) {
 		/* the statistics are updated every second (timer triggered) */
-		setup_timer(&can_stattimer, can_stat_update, 0);
-		mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
-	} else
-		can_stattimer.function = NULL;
-
-	can_init_proc();
+			setup_timer(&can_stattimer, can_stat_update, 0);
+			mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
+		}
+		can_init_proc();
+	}
 
 	/* protocol register */
 	sock_register(&can_family_ops);
@@ -933,10 +933,12 @@ static __exit void can_exit(void)
 {
 	struct net_device *dev;
 
-	if (stats_timer)
-		del_timer_sync(&can_stattimer);
+	if (IS_ENABLED(CONFIG_PROC_FS)) {
+		if (stats_timer)
+			del_timer_sync(&can_stattimer);
 
-	can_remove_proc();
+		can_remove_proc();
+	}
 
 	/* protocol unregister */
 	dev_remove_pack(&canfd_packet);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 6863310d6973..8e999ffdf28b 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1,7 +1,7 @@
 /*
  * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
  *
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2016 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,27 +67,31 @@
  */
 #define MAX_NFRAMES 256
 
-/* use of last_frames[index].can_dlc */
+/* use of last_frames[index].flags */
 #define RX_RECV    0x40 /* received data for this element */
 #define RX_THR     0x80 /* element not been sent due to throttle feature */
-#define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */
+#define BCM_CAN_FLAGS_MASK 0x3F /* to clean private flags after usage */
 
 /* get best masking value for can_rx_register() for a given single can_id */
 #define REGMASK(id) ((id & CAN_EFF_FLAG) ? \
 		     (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
 		     (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
 
-#define CAN_BCM_VERSION CAN_VERSION
+#define CAN_BCM_VERSION "20160617"
 
 MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
 MODULE_ALIAS("can-proto-2");
 
-/* easy access to can_frame payload */
-static inline u64 GET_U64(const struct can_frame *cp)
+/*
+ * easy access to the first 64 bit of can(fd)_frame payload. cp->data is
+ * 64 bit aligned so the offset has to be multiples of 8 which is ensured
+ * by the only callers in bcm_rx_cmp_to_index() bcm_rx_handler().
+ */
+static inline u64 get_u64(const struct canfd_frame *cp, int offset)
 {
-	return *(u64 *)cp->data;
+	return *(u64 *)(cp->data + offset);
 }
 
 struct bcm_op {
@@ -101,13 +105,14 @@ struct bcm_op {
 	struct tasklet_struct tsklet, thrtsklet;
 	ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
 	int rx_ifindex;
+	int cfsiz;
 	u32 count;
 	u32 nframes;
 	u32 currframe;
-	struct can_frame *frames;
-	struct can_frame *last_frames;
-	struct can_frame sframe;
-	struct can_frame last_sframe;
+	struct canfd_frame *frames;
+	struct canfd_frame *last_frames;
+	struct canfd_frame sframe;
+	struct canfd_frame last_sframe;
 	struct sock *sk;
 	struct net_device *rx_reg_dev;
 };
@@ -136,7 +141,7 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
 	return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
 }
 
-#define CFSIZ sizeof(struct can_frame)
+#define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU)
 #define OPSIZ sizeof(struct bcm_op)
 #define MHSIZ sizeof(struct bcm_msg_head)
 
@@ -183,43 +188,50 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 		if (!op->frames_abs)
 			continue;
 
-		seq_printf(m, "rx_op: %03X %-5s ",
-				op->can_id, bcm_proc_getifname(ifname, op->ifindex));
-		seq_printf(m, "[%u]%c ", op->nframes,
-				(op->flags & RX_CHECK_DLC)?'d':' ');
+		seq_printf(m, "rx_op: %03X %-5s ", op->can_id,
+			   bcm_proc_getifname(ifname, op->ifindex));
+
+		if (op->flags & CAN_FD_FRAME)
+			seq_printf(m, "(%u)", op->nframes);
+		else
+			seq_printf(m, "[%u]", op->nframes);
+
+		seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' ');
+
 		if (op->kt_ival1.tv64)
 			seq_printf(m, "timeo=%lld ",
-					(long long)
-					ktime_to_us(op->kt_ival1));
+				   (long long)ktime_to_us(op->kt_ival1));
 
 		if (op->kt_ival2.tv64)
 			seq_printf(m, "thr=%lld ",
-					(long long)
-					ktime_to_us(op->kt_ival2));
+				   (long long)ktime_to_us(op->kt_ival2));
 
 		seq_printf(m, "# recv %ld (%ld) => reduction: ",
-				op->frames_filtered, op->frames_abs);
+			   op->frames_filtered, op->frames_abs);
 
 		reduction = 100 - (op->frames_filtered * 100) / op->frames_abs;
 
 		seq_printf(m, "%s%ld%%\n",
-				(reduction == 100)?"near ":"", reduction);
+			   (reduction == 100) ? "near " : "", reduction);
 	}
 
 	list_for_each_entry(op, &bo->tx_ops, list) {
 
-		seq_printf(m, "tx_op: %03X %s [%u] ",
-				op->can_id,
-				bcm_proc_getifname(ifname, op->ifindex),
-				op->nframes);
+		seq_printf(m, "tx_op: %03X %s ", op->can_id,
+			   bcm_proc_getifname(ifname, op->ifindex));
+
+		if (op->flags & CAN_FD_FRAME)
+			seq_printf(m, "(%u) ", op->nframes);
+		else
+			seq_printf(m, "[%u] ", op->nframes);
 
 		if (op->kt_ival1.tv64)
 			seq_printf(m, "t1=%lld ",
-					(long long) ktime_to_us(op->kt_ival1));
+				   (long long)ktime_to_us(op->kt_ival1));
 
 		if (op->kt_ival2.tv64)
 			seq_printf(m, "t2=%lld ",
-					(long long) ktime_to_us(op->kt_ival2));
+				   (long long)ktime_to_us(op->kt_ival2));
 
 		seq_printf(m, "# sent %ld\n", op->frames_abs);
 	}
@@ -248,7 +260,7 @@ static void bcm_can_tx(struct bcm_op *op)
 {
 	struct sk_buff *skb;
 	struct net_device *dev;
-	struct can_frame *cf = &op->frames[op->currframe];
+	struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
 
 	/* no target device? => exit */
 	if (!op->ifindex)
@@ -260,7 +272,7 @@ static void bcm_can_tx(struct bcm_op *op)
 		return;
 	}
 
-	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any());
+	skb = alloc_skb(op->cfsiz + sizeof(struct can_skb_priv), gfp_any());
 	if (!skb)
 		goto out;
 
@@ -268,7 +280,7 @@ static void bcm_can_tx(struct bcm_op *op)
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 	can_skb_prv(skb)->skbcnt = 0;
 
-	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
+	memcpy(skb_put(skb, op->cfsiz), cf, op->cfsiz);
 
 	/* send with loopback */
 	skb->dev = dev;
@@ -282,7 +294,7 @@ static void bcm_can_tx(struct bcm_op *op)
 	/* reached last frame? */
 	if (op->currframe >= op->nframes)
 		op->currframe = 0;
- out:
+out:
 	dev_put(dev);
 }
 
@@ -291,13 +303,13 @@ static void bcm_can_tx(struct bcm_op *op)
  *                    (consisting of bcm_msg_head + x CAN frames)
  */
 static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
-			     struct can_frame *frames, int has_timestamp)
+			     struct canfd_frame *frames, int has_timestamp)
 {
 	struct sk_buff *skb;
-	struct can_frame *firstframe;
+	struct canfd_frame *firstframe;
 	struct sockaddr_can *addr;
 	struct sock *sk = op->sk;
-	unsigned int datalen = head->nframes * CFSIZ;
+	unsigned int datalen = head->nframes * op->cfsiz;
 	int err;
 
 	skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
@@ -307,19 +319,19 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	memcpy(skb_put(skb, sizeof(*head)), head, sizeof(*head));
 
 	if (head->nframes) {
-		/* can_frames starting here */
-		firstframe = (struct can_frame *)skb_tail_pointer(skb);
+		/* CAN frames starting here */
+		firstframe = (struct canfd_frame *)skb_tail_pointer(skb);
 
 		memcpy(skb_put(skb, datalen), frames, datalen);
 
 		/*
-		 * the BCM uses the can_dlc-element of the can_frame
+		 * the BCM uses the flags-element of the canfd_frame
 		 * structure for internal purposes. This is only
 		 * relevant for updates that are generated by the
 		 * BCM, where nframes is 1
 		 */
 		if (head->nframes == 1)
-			firstframe->can_dlc &= BCM_CAN_DLC_MASK;
+			firstframe->flags &= BCM_CAN_FLAGS_MASK;
 	}
 
 	if (has_timestamp) {
@@ -406,7 +418,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
 /*
  * bcm_rx_changed - create a RX_CHANGED notification due to changed content
  */
-static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
+static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
 {
 	struct bcm_msg_head head;
 
@@ -418,7 +430,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
 		op->frames_filtered = op->frames_abs = 0;
 
 	/* this element is not throttled anymore */
-	data->can_dlc &= (BCM_CAN_DLC_MASK|RX_RECV);
+	data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
 
 	head.opcode  = RX_CHANGED;
 	head.flags   = op->flags;
@@ -437,13 +449,13 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
  *                          2. send a notification to the user (if possible)
  */
 static void bcm_rx_update_and_send(struct bcm_op *op,
-				   struct can_frame *lastdata,
-				   const struct can_frame *rxdata)
+				   struct canfd_frame *lastdata,
+				   const struct canfd_frame *rxdata)
 {
-	memcpy(lastdata, rxdata, CFSIZ);
+	memcpy(lastdata, rxdata, op->cfsiz);
 
 	/* mark as used and throttled by default */
-	lastdata->can_dlc |= (RX_RECV|RX_THR);
+	lastdata->flags |= (RX_RECV|RX_THR);
 
 	/* throttling mode inactive ? */
 	if (!op->kt_ival2.tv64) {
@@ -481,33 +493,36 @@ rx_changed_settime:
  *                       received data stored in op->last_frames[]
  */
 static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
-				const struct can_frame *rxdata)
+				const struct canfd_frame *rxdata)
 {
+	struct canfd_frame *cf = op->frames + op->cfsiz * index;
+	struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
+	int i;
+
 	/*
-	 * no one uses the MSBs of can_dlc for comparison,
+	 * no one uses the MSBs of flags for comparison,
 	 * so we use it here to detect the first time of reception
 	 */
 
-	if (!(op->last_frames[index].can_dlc & RX_RECV)) {
+	if (!(lcf->flags & RX_RECV)) {
 		/* received data for the first time => send update to user */
-		bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
+		bcm_rx_update_and_send(op, lcf, rxdata);
 		return;
 	}
 
-	/* do a real check in can_frame data section */
-
-	if ((GET_U64(&op->frames[index]) & GET_U64(rxdata)) !=
-	    (GET_U64(&op->frames[index]) & GET_U64(&op->last_frames[index]))) {
-		bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
-		return;
+	/* do a real check in CAN frame data section */
+	for (i = 0; i < rxdata->len; i += 8) {
+		if ((get_u64(cf, i) & get_u64(rxdata, i)) !=
+		    (get_u64(cf, i) & get_u64(lcf, i))) {
+			bcm_rx_update_and_send(op, lcf, rxdata);
+			return;
+		}
 	}
 
 	if (op->flags & RX_CHECK_DLC) {
-		/* do a real check in can_frame dlc */
-		if (rxdata->can_dlc != (op->last_frames[index].can_dlc &
-					BCM_CAN_DLC_MASK)) {
-			bcm_rx_update_and_send(op, &op->last_frames[index],
-					       rxdata);
+		/* do a real check in CAN frame length */
+		if (rxdata->len != lcf->len) {
+			bcm_rx_update_and_send(op, lcf, rxdata);
 			return;
 		}
 	}
@@ -556,8 +571,8 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 
 	/* if user wants to be informed, when cyclic CAN-Messages come back */
 	if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
-		/* clear received can_frames to indicate 'nothing received' */
-		memset(op->last_frames, 0, op->nframes * CFSIZ);
+		/* clear received CAN frames to indicate 'nothing received' */
+		memset(op->last_frames, 0, op->nframes * op->cfsiz);
 	}
 
 	return HRTIMER_NORESTART;
@@ -569,9 +584,11 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
 				  unsigned int index)
 {
-	if ((op->last_frames) && (op->last_frames[index].can_dlc & RX_THR)) {
+	struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
+
+	if ((op->last_frames) && (lcf->flags & RX_THR)) {
 		if (update)
-			bcm_rx_changed(op, &op->last_frames[index]);
+			bcm_rx_changed(op, lcf);
 		return 1;
 	}
 	return 0;
@@ -636,15 +653,19 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
 static void bcm_rx_handler(struct sk_buff *skb, void *data)
 {
 	struct bcm_op *op = (struct bcm_op *)data;
-	const struct can_frame *rxframe = (struct can_frame *)skb->data;
+	const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data;
 	unsigned int i;
 
-	/* disable timeout */
-	hrtimer_cancel(&op->timer);
-
 	if (op->can_id != rxframe->can_id)
 		return;
 
+	/* make sure to handle the correct frame type (CAN / CAN FD) */
+	if (skb->len != op->cfsiz)
+		return;
+
+	/* disable timeout */
+	hrtimer_cancel(&op->timer);
+
 	/* save rx timestamp */
 	op->rx_stamp = skb->tstamp;
 	/* save originator for recvfrom() */
@@ -675,13 +696,14 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
 		 * multiplex compare
 		 *
 		 * find the first multiplex mask that fits.
-		 * Remark: The MUX-mask is stored in index 0
+		 * Remark: The MUX-mask is stored in index 0 - but only the
+		 * first 64 bits of the frame data[] are relevant (CAN FD)
 		 */
 
 		for (i = 1; i < op->nframes; i++) {
-			if ((GET_U64(&op->frames[0]) & GET_U64(rxframe)) ==
-			    (GET_U64(&op->frames[0]) &
-			     GET_U64(&op->frames[i]))) {
+			if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) ==
+			    (get_u64(op->frames, 0) &
+			     get_u64(op->frames + op->cfsiz * i, 0))) {
 				bcm_rx_cmp_to_index(op, i, rxframe);
 				break;
 			}
@@ -695,13 +717,14 @@ rx_starttimer:
 /*
  * helpers for bcm_op handling: find & delete bcm [rx|tx] op elements
  */
-static struct bcm_op *bcm_find_op(struct list_head *ops, canid_t can_id,
-				  int ifindex)
+static struct bcm_op *bcm_find_op(struct list_head *ops,
+				  struct bcm_msg_head *mh, int ifindex)
 {
 	struct bcm_op *op;
 
 	list_for_each_entry(op, ops, list) {
-		if ((op->can_id == can_id) && (op->ifindex == ifindex))
+		if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
+		    (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME))
 			return op;
 	}
 
@@ -744,12 +767,14 @@ static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
 /*
  * bcm_delete_rx_op - find and remove a rx op (returns number of removed ops)
  */
-static int bcm_delete_rx_op(struct list_head *ops, canid_t can_id, int ifindex)
+static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
+			    int ifindex)
 {
 	struct bcm_op *op, *n;
 
 	list_for_each_entry_safe(op, n, ops, list) {
-		if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
+		if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
+		    (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
 
 			/*
 			 * Don't care if we're bound or not (due to netdev
@@ -789,12 +814,14 @@ static int bcm_delete_rx_op(struct list_head *ops, canid_t can_id, int ifindex)
 /*
  * bcm_delete_tx_op - find and remove a tx op (returns number of removed ops)
  */
-static int bcm_delete_tx_op(struct list_head *ops, canid_t can_id, int ifindex)
+static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh,
+			    int ifindex)
 {
 	struct bcm_op *op, *n;
 
 	list_for_each_entry_safe(op, n, ops, list) {
-		if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
+		if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
+		    (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
 			list_del(&op->list);
 			bcm_remove_op(op);
 			return 1; /* done */
@@ -810,7 +837,7 @@ static int bcm_delete_tx_op(struct list_head *ops, canid_t can_id, int ifindex)
 static int bcm_read_op(struct list_head *ops, struct bcm_msg_head *msg_head,
 		       int ifindex)
 {
-	struct bcm_op *op = bcm_find_op(ops, msg_head->can_id, ifindex);
+	struct bcm_op *op = bcm_find_op(ops, msg_head, ifindex);
 
 	if (!op)
 		return -EINVAL;
@@ -835,6 +862,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 {
 	struct bcm_sock *bo = bcm_sk(sk);
 	struct bcm_op *op;
+	struct canfd_frame *cf;
 	unsigned int i;
 	int err;
 
@@ -842,39 +870,46 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 	if (!ifindex)
 		return -ENODEV;
 
-	/* check nframes boundaries - we need at least one can_frame */
+	/* check nframes boundaries - we need at least one CAN frame */
 	if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES)
 		return -EINVAL;
 
 	/* check the given can_id */
-	op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex);
-
+	op = bcm_find_op(&bo->tx_ops, msg_head, ifindex);
 	if (op) {
 		/* update existing BCM operation */
 
 		/*
-		 * Do we need more space for the can_frames than currently
+		 * Do we need more space for the CAN frames than currently
 		 * allocated? -> This is a _really_ unusual use-case and
 		 * therefore (complexity / locking) it is not supported.
 		 */
 		if (msg_head->nframes > op->nframes)
 			return -E2BIG;
 
-		/* update can_frames content */
+		/* update CAN frames content */
 		for (i = 0; i < msg_head->nframes; i++) {
-			err = memcpy_from_msg((u8 *)&op->frames[i], msg, CFSIZ);
 
-			if (op->frames[i].can_dlc > 8)
-				err = -EINVAL;
+			cf = op->frames + op->cfsiz * i;
+			err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
+
+			if (op->flags & CAN_FD_FRAME) {
+				if (cf->len > 64)
+					err = -EINVAL;
+			} else {
+				if (cf->len > 8)
+					err = -EINVAL;
+			}
 
 			if (err < 0)
 				return err;
 
 			if (msg_head->flags & TX_CP_CAN_ID) {
 				/* copy can_id into frame */
-				op->frames[i].can_id = msg_head->can_id;
+				cf->can_id = msg_head->can_id;
 			}
 		}
+		op->flags = msg_head->flags;
 
 	} else {
 		/* insert new BCM operation for the given can_id */
@@ -883,11 +918,13 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		if (!op)
 			return -ENOMEM;
 
-		op->can_id    = msg_head->can_id;
+		op->can_id = msg_head->can_id;
+		op->cfsiz = CFSIZ(msg_head->flags);
+		op->flags = msg_head->flags;
 
-		/* create array for can_frames and copy the data */
+		/* create array for CAN frames and copy the data */
 		if (msg_head->nframes > 1) {
-			op->frames = kmalloc(msg_head->nframes * CFSIZ,
+			op->frames = kmalloc(msg_head->nframes * op->cfsiz,
 					     GFP_KERNEL);
 			if (!op->frames) {
 				kfree(op);
@@ -897,10 +934,17 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			op->frames = &op->sframe;
 
 		for (i = 0; i < msg_head->nframes; i++) {
-			err = memcpy_from_msg((u8 *)&op->frames[i], msg, CFSIZ);
 
-			if (op->frames[i].can_dlc > 8)
-				err = -EINVAL;
+			cf = op->frames + op->cfsiz * i;
+			err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
+
+			if (op->flags & CAN_FD_FRAME) {
+				if (cf->len > 64)
+					err = -EINVAL;
+			} else {
+				if (cf->len > 8)
+					err = -EINVAL;
+			}
 
 			if (err < 0) {
 				if (op->frames != &op->sframe)
@@ -911,7 +955,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 
 			if (msg_head->flags & TX_CP_CAN_ID) {
 				/* copy can_id into frame */
-				op->frames[i].can_id = msg_head->can_id;
+				cf->can_id = msg_head->can_id;
 			}
 		}
 
@@ -946,8 +990,6 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 
 	/* check flags */
 
-	op->flags = msg_head->flags;
-
 	if (op->flags & TX_RESET_MULTI_IDX) {
 		/* start multiple frame transmission with index 0 */
 		op->currframe = 0;
@@ -968,7 +1010,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 
 	if (op->flags & STARTTIMER) {
 		hrtimer_cancel(&op->timer);
-		/* spec: send can_frame when starting timer */
+		/* spec: send CAN frame when starting timer */
 		op->flags |= TX_ANNOUNCE;
 	}
 
@@ -981,7 +1023,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 	if (op->flags & STARTTIMER)
 		bcm_tx_start_timer(op);
 
-	return msg_head->nframes * CFSIZ + MHSIZ;
+	return msg_head->nframes * op->cfsiz + MHSIZ;
 }
 
 /*
@@ -1012,12 +1054,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		return -EINVAL;
 
 	/* check the given can_id */
-	op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex);
+	op = bcm_find_op(&bo->rx_ops, msg_head, ifindex);
 	if (op) {
 		/* update existing BCM operation */
 
 		/*
-		 * Do we need more space for the can_frames than currently
+		 * Do we need more space for the CAN frames than currently
 		 * allocated? -> This is a _really_ unusual use-case and
 		 * therefore (complexity / locking) it is not supported.
 		 */
@@ -1025,17 +1067,18 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			return -E2BIG;
 
 		if (msg_head->nframes) {
-			/* update can_frames content */
+			/* update CAN frames content */
 			err = memcpy_from_msg((u8 *)op->frames, msg,
-					      msg_head->nframes * CFSIZ);
+					      msg_head->nframes * op->cfsiz);
 			if (err < 0)
 				return err;
 
 			/* clear last_frames to indicate 'nothing received' */
-			memset(op->last_frames, 0, msg_head->nframes * CFSIZ);
+			memset(op->last_frames, 0, msg_head->nframes * op->cfsiz);
 		}
 
 		op->nframes = msg_head->nframes;
+		op->flags = msg_head->flags;
 
 		/* Only an update -> do not call can_rx_register() */
 		do_rx_register = 0;
@@ -1046,20 +1089,22 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		if (!op)
 			return -ENOMEM;
 
-		op->can_id    = msg_head->can_id;
-		op->nframes   = msg_head->nframes;
+		op->can_id = msg_head->can_id;
+		op->nframes = msg_head->nframes;
+		op->cfsiz = CFSIZ(msg_head->flags);
+		op->flags = msg_head->flags;
 
 		if (msg_head->nframes > 1) {
-			/* create array for can_frames and copy the data */
-			op->frames = kmalloc(msg_head->nframes * CFSIZ,
+			/* create array for CAN frames and copy the data */
+			op->frames = kmalloc(msg_head->nframes * op->cfsiz,
 					     GFP_KERNEL);
 			if (!op->frames) {
 				kfree(op);
 				return -ENOMEM;
 			}
 
-			/* create and init array for received can_frames */
-			op->last_frames = kzalloc(msg_head->nframes * CFSIZ,
+			/* create and init array for received CAN frames */
+			op->last_frames = kzalloc(msg_head->nframes * op->cfsiz,
 						  GFP_KERNEL);
 			if (!op->last_frames) {
 				kfree(op->frames);
@@ -1074,7 +1119,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 
 		if (msg_head->nframes) {
 			err = memcpy_from_msg((u8 *)op->frames, msg,
-					      msg_head->nframes * CFSIZ);
+					      msg_head->nframes * op->cfsiz);
 			if (err < 0) {
 				if (op->frames != &op->sframe)
 					kfree(op->frames);
@@ -1116,7 +1161,6 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 	} /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */
 
 	/* check flags */
-	op->flags = msg_head->flags;
 
 	if (op->flags & RX_RTR_FRAME) {
 
@@ -1188,13 +1232,14 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		}
 	}
 
-	return msg_head->nframes * CFSIZ + MHSIZ;
+	return msg_head->nframes * op->cfsiz + MHSIZ;
 }
 
 /*
  * bcm_tx_send - send a single CAN frame to the CAN interface (for bcm_sendmsg)
  */
-static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
+static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk,
+		       int cfsiz)
 {
 	struct sk_buff *skb;
 	struct net_device *dev;
@@ -1204,13 +1249,13 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	if (!ifindex)
 		return -ENODEV;
 
-	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL);
+	skb = alloc_skb(cfsiz + sizeof(struct can_skb_priv), GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
 	can_skb_reserve(skb);
 
-	err = memcpy_from_msg(skb_put(skb, CFSIZ), msg, CFSIZ);
+	err = memcpy_from_msg(skb_put(skb, cfsiz), msg, cfsiz);
 	if (err < 0) {
 		kfree_skb(skb);
 		return err;
@@ -1232,7 +1277,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	if (err)
 		return err;
 
-	return CFSIZ + MHSIZ;
+	return cfsiz + MHSIZ;
 }
 
 /*
@@ -1244,13 +1289,23 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	struct bcm_sock *bo = bcm_sk(sk);
 	int ifindex = bo->ifindex; /* default ifindex for this bcm_op */
 	struct bcm_msg_head msg_head;
+	int cfsiz;
 	int ret; /* read bytes or error codes as return value */
 
 	if (!bo->bound)
 		return -ENOTCONN;
 
 	/* check for valid message length from userspace */
-	if (size < MHSIZ || (size - MHSIZ) % CFSIZ)
+	if (size < MHSIZ)
+		return -EINVAL;
+
+	/* read message head information */
+	ret = memcpy_from_msg((u8 *)&msg_head, msg, MHSIZ);
+	if (ret < 0)
+		return ret;
+
+	cfsiz = CFSIZ(msg_head.flags);
+	if ((size - MHSIZ) % cfsiz)
 		return -EINVAL;
 
 	/* check for alternative ifindex for this bcm_op */
@@ -1284,12 +1339,6 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		}
 	}
 
-	/* read message head information */
-
-	ret = memcpy_from_msg((u8 *)&msg_head, msg, MHSIZ);
-	if (ret < 0)
-		return ret;
-
 	lock_sock(sk);
 
 	switch (msg_head.opcode) {
@@ -1303,14 +1352,14 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		break;
 
 	case TX_DELETE:
-		if (bcm_delete_tx_op(&bo->tx_ops, msg_head.can_id, ifindex))
+		if (bcm_delete_tx_op(&bo->tx_ops, &msg_head, ifindex))
 			ret = MHSIZ;
 		else
 			ret = -EINVAL;
 		break;
 
 	case RX_DELETE:
-		if (bcm_delete_rx_op(&bo->rx_ops, msg_head.can_id, ifindex))
+		if (bcm_delete_rx_op(&bo->rx_ops, &msg_head, ifindex))
 			ret = MHSIZ;
 		else
 			ret = -EINVAL;
@@ -1329,11 +1378,11 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		break;
 
 	case TX_SEND:
-		/* we need exactly one can_frame behind the msg head */
-		if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ))
+		/* we need exactly one CAN frame behind the msg head */
+		if ((msg_head.nframes != 1) || (size != cfsiz + MHSIZ))
 			ret = -EINVAL;
 		else
-			ret = bcm_tx_send(msg, ifindex, sk);
+			ret = bcm_tx_send(msg, ifindex, sk, cfsiz);
 		break;
 
 	default:
diff --git a/net/can/proc.c b/net/can/proc.c
index 1a19b985a868..85ef7bb0f176 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -517,8 +517,7 @@ void can_init_proc(void)
 	can_dir = proc_mkdir("can", init_net.proc_net);
 
 	if (!can_dir) {
-		printk(KERN_INFO "can: failed to create /proc/net/can . "
-		       "CONFIG_PROC_FS missing?\n");
+		pr_info("can: failed to create /proc/net/can.\n");
 		return;
 	}
 
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 03062bb763b3..7e480bf75bcf 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1260,6 +1260,115 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
 	return map;
 }
 
+/*
+ * Encoding order is (new_up_client, new_state, new_weight).  Need to
+ * apply in the (new_weight, new_state, new_up_client) order, because
+ * an incremental map may look like e.g.
+ *
+ *     new_up_client: { osd=6, addr=... } # set osd_state and addr
+ *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
+ */
+static int decode_new_up_state_weight(void **p, void *end,
+				      struct ceph_osdmap *map)
+{
+	void *new_up_client;
+	void *new_state;
+	void *new_weight_end;
+	u32 len;
+
+	new_up_client = *p;
+	ceph_decode_32_safe(p, end, len, e_inval);
+	len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
+	ceph_decode_need(p, end, len, e_inval);
+	*p += len;
+
+	new_state = *p;
+	ceph_decode_32_safe(p, end, len, e_inval);
+	len *= sizeof(u32) + sizeof(u8);
+	ceph_decode_need(p, end, len, e_inval);
+	*p += len;
+
+	/* new_weight */
+	ceph_decode_32_safe(p, end, len, e_inval);
+	while (len--) {
+		s32 osd;
+		u32 w;
+
+		ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
+		osd = ceph_decode_32(p);
+		w = ceph_decode_32(p);
+		BUG_ON(osd >= map->max_osd);
+		pr_info("osd%d weight 0x%x %s\n", osd, w,
+		     w == CEPH_OSD_IN ? "(in)" :
+		     (w == CEPH_OSD_OUT ? "(out)" : ""));
+		map->osd_weight[osd] = w;
+
+		/*
+		 * If we are marking in, set the EXISTS, and clear the
+		 * AUTOOUT and NEW bits.
+		 */
+		if (w) {
+			map->osd_state[osd] |= CEPH_OSD_EXISTS;
+			map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
+						 CEPH_OSD_NEW);
+		}
+	}
+	new_weight_end = *p;
+
+	/* new_state (up/down) */
+	*p = new_state;
+	len = ceph_decode_32(p);
+	while (len--) {
+		s32 osd;
+		u8 xorstate;
+		int ret;
+
+		osd = ceph_decode_32(p);
+		xorstate = ceph_decode_8(p);
+		if (xorstate == 0)
+			xorstate = CEPH_OSD_UP;
+		BUG_ON(osd >= map->max_osd);
+		if ((map->osd_state[osd] & CEPH_OSD_UP) &&
+		    (xorstate & CEPH_OSD_UP))
+			pr_info("osd%d down\n", osd);
+		if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
+		    (xorstate & CEPH_OSD_EXISTS)) {
+			pr_info("osd%d does not exist\n", osd);
+			map->osd_weight[osd] = CEPH_OSD_IN;
+			ret = set_primary_affinity(map, osd,
+						   CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
+			if (ret)
+				return ret;
+			memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
+			map->osd_state[osd] = 0;
+		} else {
+			map->osd_state[osd] ^= xorstate;
+		}
+	}
+
+	/* new_up_client */
+	*p = new_up_client;
+	len = ceph_decode_32(p);
+	while (len--) {
+		s32 osd;
+		struct ceph_entity_addr addr;
+
+		osd = ceph_decode_32(p);
+		ceph_decode_copy(p, &addr, sizeof(addr));
+		ceph_decode_addr(&addr);
+		BUG_ON(osd >= map->max_osd);
+		pr_info("osd%d up\n", osd);
+		map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
+		map->osd_addr[osd] = addr;
+	}
+
+	*p = new_weight_end;
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
 /*
  * decode and apply an incremental map update.
  */
@@ -1358,49 +1467,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 			__remove_pg_pool(&map->pg_pools, pi);
 	}
 
-	/* new_up */
-	ceph_decode_32_safe(p, end, len, e_inval);
-	while (len--) {
-		u32 osd;
-		struct ceph_entity_addr addr;
-		ceph_decode_32_safe(p, end, osd, e_inval);
-		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);
-		ceph_decode_addr(&addr);
-		pr_info("osd%d up\n", osd);
-		BUG_ON(osd >= map->max_osd);
-		map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS;
-		map->osd_addr[osd] = addr;
-	}
-
-	/* new_state */
-	ceph_decode_32_safe(p, end, len, e_inval);
-	while (len--) {
-		u32 osd;
-		u8 xorstate;
-		ceph_decode_32_safe(p, end, osd, e_inval);
-		xorstate = **(u8 **)p;
-		(*p)++;  /* clean flag */
-		if (xorstate == 0)
-			xorstate = CEPH_OSD_UP;
-		if (xorstate & CEPH_OSD_UP)
-			pr_info("osd%d down\n", osd);
-		if (osd < map->max_osd)
-			map->osd_state[osd] ^= xorstate;
-	}
-
-	/* new_weight */
-	ceph_decode_32_safe(p, end, len, e_inval);
-	while (len--) {
-		u32 osd, off;
-		ceph_decode_need(p, end, sizeof(u32)*2, e_inval);
-		osd = ceph_decode_32(p);
-		off = ceph_decode_32(p);
-		pr_info("osd%d weight 0x%x %s\n", osd, off,
-		     off == CEPH_OSD_IN ? "(in)" :
-		     (off == CEPH_OSD_OUT ? "(out)" : ""));
-		if (osd < map->max_osd)
-			map->osd_weight[osd] = off;
-	}
+	/* new_up_client, new_state, new_weight */
+	err = decode_new_up_state_weight(p, end, map);
+	if (err)
+		goto bad;
 
 	/* new_pg_temp */
 	err = decode_new_pg_temp(p, end, map);
diff --git a/net/core/dev.c b/net/core/dev.c
index 904ff431d570..2a9c39f8824e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -94,6 +94,7 @@
 #include <linux/ethtool.h>
 #include <linux/notifier.h>
 #include <linux/skbuff.h>
+#include <linux/bpf.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/busy_poll.h>
@@ -139,6 +140,7 @@
 #include <linux/hrtimer.h>
 #include <linux/netfilter_ingress.h>
 #include <linux/sctp.h>
+#include <linux/crash_dump.h>
 
 #include "net-sysfs.h"
 
@@ -2249,11 +2251,12 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues);
  */
 int netif_get_num_default_rss_queues(void)
 {
-	return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+	return is_kdump_kernel() ?
+		1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
 }
 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
 
-static inline void __netif_reschedule(struct Qdisc *q)
+static void __netif_reschedule(struct Qdisc *q)
 {
 	struct softnet_data *sd;
 	unsigned long flags;
@@ -2420,7 +2423,7 @@ EXPORT_SYMBOL(__skb_tx_hash);
 
 static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
-	static const netdev_features_t null_features = 0;
+	static const netdev_features_t null_features;
 	struct net_device *dev = skb->dev;
 	const char *name = "";
 
@@ -3068,6 +3071,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
+	struct sk_buff *to_free = NULL;
 	bool contended;
 	int rc;
 
@@ -3075,7 +3079,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	/*
 	 * Heuristic to force contended enqueues to serialize on a
 	 * separate lock before trying to get qdisc main lock.
-	 * This permits __QDISC___STATE_RUNNING owner to get the lock more
+	 * This permits qdisc->running owner to get the lock more
 	 * often and dequeue packets faster.
 	 */
 	contended = qdisc_is_running(q);
@@ -3084,7 +3088,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 	spin_lock(root_lock);
 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-		kfree_skb(skb);
+		__qdisc_drop(skb, &to_free);
 		rc = NET_XMIT_DROP;
 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
 		   qdisc_run_begin(q)) {
@@ -3107,7 +3111,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
-		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
+		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
@@ -3117,6 +3121,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		}
 	}
 	spin_unlock(root_lock);
+	if (unlikely(to_free))
+		kfree_skb_list(to_free);
 	if (unlikely(contended))
 		spin_unlock(&q->busylock);
 	return rc;
@@ -3142,8 +3148,6 @@ static void skb_update_prio(struct sk_buff *skb)
 DEFINE_PER_CPU(int, xmit_recursion);
 EXPORT_SYMBOL(xmit_recursion);
 
-#define RECURSION_LIMIT 10
-
 /**
  *	dev_loopback_xmit - loop back @skb
  *	@net: network namespace this loopback is happening in
@@ -3386,8 +3390,8 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 		int cpu = smp_processor_id(); /* ok because BHs are off */
 
 		if (txq->xmit_lock_owner != cpu) {
-
-			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+			if (unlikely(__this_cpu_read(xmit_recursion) >
+				     XMIT_RECURSION_LIMIT))
 				goto recursion_alert;
 
 			skb = validate_xmit_skb(skb, dev);
@@ -3898,22 +3902,14 @@ static void net_tx_action(struct softirq_action *h)
 			head = head->next_sched;
 
 			root_lock = qdisc_lock(q);
-			if (spin_trylock(root_lock)) {
-				smp_mb__before_atomic();
-				clear_bit(__QDISC_STATE_SCHED,
-					  &q->state);
-				qdisc_run(q);
-				spin_unlock(root_lock);
-			} else {
-				if (!test_bit(__QDISC_STATE_DEACTIVATED,
-					      &q->state)) {
-					__netif_reschedule(q);
-				} else {
-					smp_mb__before_atomic();
-					clear_bit(__QDISC_STATE_SCHED,
-						  &q->state);
-				}
-			}
+			spin_lock(root_lock);
+			/* We need to make sure head->next_sched is read
+			 * before clearing __QDISC_STATE_SCHED
+			 */
+			smp_mb__before_atomic();
+			clear_bit(__QDISC_STATE_SCHED, &q->state);
+			qdisc_run(q);
+			spin_unlock(root_lock);
 		}
 	}
 }
@@ -4977,7 +4973,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
 
 			if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
 				rc = napi->poll(napi, BUSY_POLL_BUDGET);
-				trace_napi_poll(napi);
+				trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
 				if (rc == BUSY_POLL_BUDGET) {
 					napi_complete_done(napi, rc);
 					napi_schedule(napi);
@@ -5133,7 +5129,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 	work = 0;
 	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
 		work = n->poll(n, weight);
-		trace_napi_poll(n);
+		trace_napi_poll(n, work, weight);
 	}
 
 	WARN_ON_ONCE(work > weight);
@@ -5449,6 +5445,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
 }
 EXPORT_SYMBOL(netdev_lower_get_next);
 
+/**
+ * netdev_all_lower_get_next - Get the next device from all lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour all lower
+ * list will remain unchanged.
+ */
+struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_entry(*iter, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->all_adj_list.lower)
+		return NULL;
+
+	*iter = lower->list.next;
+
+	return lower->dev;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next);
+
+/**
+ * netdev_all_lower_get_next_rcu - Get the next device from all
+ *				   lower neighbour list, RCU variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+						 struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
+				       struct netdev_adjacent, list);
+
+	return lower ? lower->dev : NULL;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+
 /**
  * netdev_lower_get_first_private_rcu - Get the first ->private from the
  *				       lower neighbour list, RCU
@@ -5919,7 +5961,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
 	struct net *net = dev_net(dev);
 
 	list_for_each_entry(iter, &dev->adj_list.upper, list) {
-		if (!net_eq(net,dev_net(iter->dev)))
+		if (!net_eq(net, dev_net(iter->dev)))
 			continue;
 		netdev_adjacent_sysfs_add(iter->dev, dev,
 					  &iter->dev->adj_list.lower);
@@ -5928,7 +5970,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
 	}
 
 	list_for_each_entry(iter, &dev->adj_list.lower, list) {
-		if (!net_eq(net,dev_net(iter->dev)))
+		if (!net_eq(net, dev_net(iter->dev)))
 			continue;
 		netdev_adjacent_sysfs_add(iter->dev, dev,
 					  &iter->dev->adj_list.upper);
@@ -5944,7 +5986,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
 	struct net *net = dev_net(dev);
 
 	list_for_each_entry(iter, &dev->adj_list.upper, list) {
-		if (!net_eq(net,dev_net(iter->dev)))
+		if (!net_eq(net, dev_net(iter->dev)))
 			continue;
 		netdev_adjacent_sysfs_del(iter->dev, dev->name,
 					  &iter->dev->adj_list.lower);
@@ -5953,7 +5995,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
 	}
 
 	list_for_each_entry(iter, &dev->adj_list.lower, list) {
-		if (!net_eq(net,dev_net(iter->dev)))
+		if (!net_eq(net, dev_net(iter->dev)))
 			continue;
 		netdev_adjacent_sysfs_del(iter->dev, dev->name,
 					  &iter->dev->adj_list.upper);
@@ -5969,7 +6011,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
 	struct net *net = dev_net(dev);
 
 	list_for_each_entry(iter, &dev->adj_list.upper, list) {
-		if (!net_eq(net,dev_net(iter->dev)))
+		if (!net_eq(net, dev_net(iter->dev)))
 			continue;
 		netdev_adjacent_sysfs_del(iter->dev, oldname,
 					  &iter->dev->adj_list.lower);
@@ -5978,7 +6020,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
 	}
 
 	list_for_each_entry(iter, &dev->adj_list.lower, list) {
-		if (!net_eq(net,dev_net(iter->dev)))
+		if (!net_eq(net, dev_net(iter->dev)))
 			continue;
 		netdev_adjacent_sysfs_del(iter->dev, oldname,
 					  &iter->dev->adj_list.upper);
@@ -6046,6 +6088,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
 }
 EXPORT_SYMBOL(netdev_lower_state_changed);
 
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+					   struct neighbour *n)
+{
+	struct net_device *lower_dev, *stop_dev;
+	struct list_head *iter;
+	int err;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		if (!lower_dev->netdev_ops->ndo_neigh_construct)
+			continue;
+		err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
+		if (err) {
+			stop_dev = lower_dev;
+			goto rollback;
+		}
+	}
+	return 0;
+
+rollback:
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		if (lower_dev == stop_dev)
+			break;
+		if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+			continue;
+		lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
+
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+					  struct neighbour *n)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+			continue;
+		lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+	}
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
+
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
@@ -6529,6 +6615,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
+/**
+ *	dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ *	@dev: device
+ *	@fd: new program fd or negative value to clear
+ *
+ *	Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, int fd)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct bpf_prog *prog = NULL;
+	struct netdev_xdp xdp = {};
+	int err;
+
+	if (!ops->ndo_xdp)
+		return -EOPNOTSUPP;
+	if (fd >= 0) {
+		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+		if (IS_ERR(prog))
+			return PTR_ERR(prog);
+	}
+
+	xdp.command = XDP_SETUP_PROG;
+	xdp.prog = prog;
+	err = ops->ndo_xdp(dev, &xdp);
+	if (err < 0 && prog)
+		bpf_prog_put(prog);
+
+	return err;
+}
+EXPORT_SYMBOL(dev_change_xdp_fd);
+
 /**
  *	dev_new_index	-	allocate an ifindex
  *	@net: the applicable net namespace
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 933e8d4d3968..1b5063088f1a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -26,6 +26,10 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/devlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/devlink.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 
 static LIST_HEAD(devlink_list);
 
@@ -1394,6 +1398,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
 	return -EOPNOTSUPP;
 }
 
+static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+				enum devlink_command cmd, u32 portid,
+				u32 seq, int flags, u16 mode)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
+						struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const struct devlink_ops *ops = devlink->ops;
+	struct sk_buff *msg;
+	u16 mode;
+	int err;
+
+	if (!ops || !ops->eswitch_mode_get)
+		return -EOPNOTSUPP;
+
+	err = ops->eswitch_mode_get(devlink, &mode);
+	if (err)
+		return err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
+				   info->snd_portid, info->snd_seq, 0, mode);
+
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
+						struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const struct devlink_ops *ops = devlink->ops;
+	u16 mode;
+
+	if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE])
+		return -EINVAL;
+
+	mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+
+	if (ops && ops->eswitch_mode_set)
+		return ops->eswitch_mode_set(devlink, mode);
+	return -EOPNOTSUPP;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -1407,6 +1483,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
+	[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -1525,6 +1602,20 @@ static const struct genl_ops devlink_nl_ops[] = {
 				  DEVLINK_NL_FLAG_NEED_SB |
 				  DEVLINK_NL_FLAG_LOCK_PORTS,
 	},
+	{
+		.cmd = DEVLINK_CMD_ESWITCH_MODE_GET,
+		.doit = devlink_nl_cmd_eswitch_mode_get_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_ESWITCH_MODE_SET,
+		.doit = devlink_nl_cmd_eswitch_mode_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 /**
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 252e155c837b..d6b3b579560d 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -187,7 +187,8 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *locatio
 	trace_drop_common(skb, location);
 }
 
-static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
+				int work, int budget)
 {
 	struct dm_hw_stat_delta *new_stat;
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f4034817d255..977489820eb9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_GSO_UDP_TUNNEL_BIT] =	 "tx-udp_tnl-segmentation",
 	[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
 	[NETIF_F_GSO_PARTIAL_BIT] =	 "tx-gso-partial",
+	[NETIF_F_GSO_SCTP_BIT] =	 "tx-sctp-segmentation",
 
 	[NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
 	[NETIF_F_SCTP_CRC_BIT] =        "tx-checksum-sctp",
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 840acebbb80c..be4629c344a6 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -173,7 +173,8 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
-			  struct flowi *fl, int flags)
+			  struct flowi *fl, int flags,
+			  struct fib_lookup_arg *arg)
 {
 	int ret = 0;
 
@@ -189,6 +190,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 	if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
 		goto out;
 
+	if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
+		goto out;
+
 	ret = ops->match(rule, fl, flags);
 out:
 	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -204,7 +208,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
 
 	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
 jumped:
-		if (!fib_rule_match(rule, ops, fl, flags))
+		if (!fib_rule_match(rule, ops, fl, flags, arg))
 			continue;
 
 		if (rule->action == FR_ACT_GOTO) {
@@ -265,7 +269,50 @@ errout:
 	return err;
 }
 
-static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
+static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
+		       struct nlattr **tb, struct fib_rule *rule)
+{
+	struct fib_rule *r;
+
+	list_for_each_entry(r, &ops->rules_list, list) {
+		if (r->action != rule->action)
+			continue;
+
+		if (r->table != rule->table)
+			continue;
+
+		if (r->pref != rule->pref)
+			continue;
+
+		if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
+			continue;
+
+		if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
+			continue;
+
+		if (r->mark != rule->mark)
+			continue;
+
+		if (r->mark_mask != rule->mark_mask)
+			continue;
+
+		if (r->tun_id != rule->tun_id)
+			continue;
+
+		if (r->fr_net != rule->fr_net)
+			continue;
+
+		if (r->l3mdev != rule->l3mdev)
+			continue;
+
+		if (!ops->compare(r, frh, tb))
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -336,6 +383,14 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 	if (tb[FRA_TUN_ID])
 		rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
 
+	if (tb[FRA_L3MDEV]) {
+#ifdef CONFIG_NET_L3_MASTER_DEV
+		rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
+		if (rule->l3mdev != 1)
+#endif
+			goto errout_free;
+	}
+
 	rule->action = frh->action;
 	rule->flags = frh->flags;
 	rule->table = frh_get_table(frh, tb);
@@ -371,6 +426,15 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 	} else if (rule->action == FR_ACT_GOTO)
 		goto errout_free;
 
+	if (rule->l3mdev && rule->table)
+		goto errout_free;
+
+	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
+	    rule_exists(ops, frh, tb, rule)) {
+		err = -EEXIST;
+		goto errout_free;
+	}
+
 	err = ops->configure(rule, skb, frh, tb);
 	if (err < 0)
 		goto errout_free;
@@ -424,8 +488,9 @@ errout:
 	rules_ops_put(ops);
 	return err;
 }
+EXPORT_SYMBOL_GPL(fib_nl_newrule);
 
-static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -483,6 +548,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 		    (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
 			continue;
 
+		if (tb[FRA_L3MDEV] &&
+		    (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
+			continue;
+
 		if (!ops->compare(rule, frh, tb))
 			continue;
 
@@ -536,6 +605,7 @@ errout:
 	rules_ops_put(ops);
 	return err;
 }
+EXPORT_SYMBOL_GPL(fib_nl_delrule);
 
 static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 					 struct fib_rule *rule)
@@ -607,7 +677,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	    (rule->target &&
 	     nla_put_u32(skb, FRA_GOTO, rule->target)) ||
 	    (rule->tun_id &&
-	     nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)))
+	     nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
+	    (rule->l3mdev &&
+	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
 		goto nla_put_failure;
 
 	if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 68adb5f52110..5708999f8a79 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -53,9 +53,10 @@
 #include <net/sock_reuseport.h>
 
 /**
- *	sk_filter - run a packet through a socket filter
+ *	sk_filter_trim_cap - run a packet through a socket filter
  *	@sk: sock associated with &sk_buff
  *	@skb: buffer to filter
+ *	@cap: limit on how short the eBPF program may trim the packet
  *
  * Run the eBPF program and then cut skb->data to correct size returned by
  * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
@@ -64,7 +65,7 @@
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
-int sk_filter(struct sock *sk, struct sk_buff *skb)
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
 {
 	int err;
 	struct sk_filter *filter;
@@ -85,14 +86,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
 		unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
-
-		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
+		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
 	}
 	rcu_read_unlock();
 
 	return err;
 }
-EXPORT_SYMBOL(sk_filter);
+EXPORT_SYMBOL(sk_filter_trim_cap);
 
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
@@ -150,6 +150,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 	return raw_smp_processor_id();
 }
 
+static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
+	.func		= __get_raw_cpu_id,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 			      struct bpf_insn *insn_buf)
 {
@@ -748,6 +754,17 @@ static bool chk_code_allowed(u16 code_to_probe)
 	return codes[code_to_probe];
 }
 
+static bool bpf_check_basics_ok(const struct sock_filter *filter,
+				unsigned int flen)
+{
+	if (filter == NULL)
+		return false;
+	if (flen == 0 || flen > BPF_MAXINSNS)
+		return false;
+
+	return true;
+}
+
 /**
  *	bpf_check_classic - verify socket filter code
  *	@filter: filter to verify
@@ -768,9 +785,6 @@ static int bpf_check_classic(const struct sock_filter *filter,
 	bool anc_found;
 	int pc;
 
-	if (flen == 0 || flen > BPF_MAXINSNS)
-		return -EINVAL;
-
 	/* Check the filter code now */
 	for (pc = 0; pc < flen; pc++) {
 		const struct sock_filter *ftest = &filter[pc];
@@ -1065,7 +1079,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
 	struct bpf_prog *fp;
 
 	/* Make sure new filter is there and in the right amounts. */
-	if (fprog->filter == NULL)
+	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
 		return -EINVAL;
 
 	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
@@ -1112,7 +1126,7 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 	int err;
 
 	/* Make sure new filter is there and in the right amounts. */
-	if (fprog->filter == NULL)
+	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
 		return -EINVAL;
 
 	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
@@ -1207,7 +1221,6 @@ static
 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
-	unsigned int bpf_fsize = bpf_prog_size(fprog->len);
 	struct bpf_prog *prog;
 	int err;
 
@@ -1215,10 +1228,10 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
 		return ERR_PTR(-EPERM);
 
 	/* Make sure new filter is there and in the right amounts. */
-	if (fprog->filter == NULL)
+	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
 		return ERR_PTR(-EINVAL);
 
-	prog = bpf_prog_alloc(bpf_fsize, 0);
+	prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
 	if (!prog)
 		return ERR_PTR(-ENOMEM);
 
@@ -1288,21 +1301,10 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 
 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
 {
-	struct bpf_prog *prog;
-
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return ERR_PTR(-EPERM);
 
-	prog = bpf_prog_get(ufd);
-	if (IS_ERR(prog))
-		return prog;
-
-	if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
-		bpf_prog_put(prog);
-		return ERR_PTR(-EINVAL);
-	}
-
-	return prog;
+	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
 }
 
 int sk_attach_bpf(u32 ufd, struct sock *sk)
@@ -1603,9 +1605,36 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	if (skb_at_tc_ingress(skb))
+		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+
+	return dev_forward_skb(dev, skb);
+}
+
+static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	int ret;
+
+	if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
+		net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+		kfree_skb(skb);
+		return -ENETDOWN;
+	}
+
+	skb->dev = dev;
+
+	__this_cpu_inc(xmit_recursion);
+	ret = dev_queue_xmit(skb);
+	__this_cpu_dec(xmit_recursion);
+
+	return ret;
+}
+
 static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	struct net_device *dev;
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
@@ -1615,19 +1644,12 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
 	if (unlikely(!dev))
 		return -EINVAL;
 
-	skb2 = skb_clone(skb, GFP_ATOMIC);
-	if (unlikely(!skb2))
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
 		return -ENOMEM;
 
-	if (flags & BPF_F_INGRESS) {
-		if (skb_at_tc_ingress(skb2))
-			skb_postpush_rcsum(skb2, skb_mac_header(skb2),
-					   skb2->mac_len);
-		return dev_forward_skb(dev, skb2);
-	}
-
-	skb2->dev = dev;
-	return dev_queue_xmit(skb2);
+	return flags & BPF_F_INGRESS ?
+	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
 }
 
 static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1671,15 +1693,8 @@ int skb_do_redirect(struct sk_buff *skb)
 		return -EINVAL;
 	}
 
-	if (ri->flags & BPF_F_INGRESS) {
-		if (skb_at_tc_ingress(skb))
-			skb_postpush_rcsum(skb, skb_mac_header(skb),
-					   skb->mac_len);
-		return dev_forward_skb(dev, skb);
-	}
-
-	skb->dev = dev;
-	return dev_queue_xmit(skb);
+	return ri->flags & BPF_F_INGRESS ?
+	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
 }
 
 static const struct bpf_func_proto bpf_redirect_proto = {
@@ -1714,6 +1729,23 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	/* If skb_clear_hash() was called due to mangling, we can
+	 * trigger SW recalculation here. Later access to hash
+	 * can then use the inline skb->hash via context directly
+	 * instead of calling this helper again.
+	 */
+	return skb_get_hash((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
+	.func		= bpf_get_hash_recalc,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1757,6 +1789,224 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
 };
 EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
 
+static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
+{
+	/* Caller already did skb_cow() with len as headroom,
+	 * so no need to do it here.
+	 */
+	skb_push(skb, len);
+	memmove(skb->data, skb->data + len, off);
+	memset(skb->data + off, 0, len);
+
+	/* No skb_postpush_rcsum(skb, skb->data + off, len)
+	 * needed here as it does not change the skb->csum
+	 * result for checksum complete when summing over
+	 * zeroed blocks.
+	 */
+	return 0;
+}
+
+static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
+{
+	/* skb_ensure_writable() is not needed here, as we're
+	 * already working on an uncloned skb.
+	 */
+	if (unlikely(!pskb_may_pull(skb, off + len)))
+		return -ENOMEM;
+
+	skb_postpull_rcsum(skb, skb->data + off, len);
+	memmove(skb->data + len, skb->data, off);
+	__skb_pull(skb, len);
+
+	return 0;
+}
+
+static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
+{
+	bool trans_same = skb->transport_header == skb->network_header;
+	int ret;
+
+	/* There's no need for __skb_push()/__skb_pull() pair to
+	 * get to the start of the mac header as we're guaranteed
+	 * to always start from here under eBPF.
+	 */
+	ret = bpf_skb_generic_push(skb, off, len);
+	if (likely(!ret)) {
+		skb->mac_header -= len;
+		skb->network_header -= len;
+		if (trans_same)
+			skb->transport_header = skb->network_header;
+	}
+
+	return ret;
+}
+
+static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
+{
+	bool trans_same = skb->transport_header == skb->network_header;
+	int ret;
+
+	/* Same here, __skb_push()/__skb_pull() pair not needed. */
+	ret = bpf_skb_generic_pop(skb, off, len);
+	if (likely(!ret)) {
+		skb->mac_header += len;
+		skb->network_header += len;
+		if (trans_same)
+			skb->transport_header = skb->network_header;
+	}
+
+	return ret;
+}
+
+static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
+{
+	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+	u32 off = skb->network_header - skb->mac_header;
+	int ret;
+
+	ret = skb_cow(skb, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = bpf_skb_net_hdr_push(skb, off, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (skb_is_gso(skb)) {
+		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
+		 * be changed into SKB_GSO_TCPV6.
+		 */
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
+			skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV6;
+		}
+
+		/* Due to IPv6 header, MSS needs to be downgraded. */
+		skb_shinfo(skb)->gso_size -= len_diff;
+		/* Header must be checked, and gso_segs recomputed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb_clear_hash(skb);
+
+	return 0;
+}
+
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
+{
+	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+	u32 off = skb->network_header - skb->mac_header;
+	int ret;
+
+	ret = skb_unclone(skb, GFP_ATOMIC);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (skb_is_gso(skb)) {
+		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
+		 * be changed into SKB_GSO_TCPV4.
+		 */
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
+			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
+			skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV4;
+		}
+
+		/* Due to IPv4 header, MSS can be upgraded. */
+		skb_shinfo(skb)->gso_size += len_diff;
+		/* Header must be checked, and gso_segs recomputed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	skb->protocol = htons(ETH_P_IP);
+	skb_clear_hash(skb);
+
+	return 0;
+}
+
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
+{
+	__be16 from_proto = skb->protocol;
+
+	if (from_proto == htons(ETH_P_IP) &&
+	      to_proto == htons(ETH_P_IPV6))
+		return bpf_skb_proto_4_to_6(skb);
+
+	if (from_proto == htons(ETH_P_IPV6) &&
+	      to_proto == htons(ETH_P_IP))
+		return bpf_skb_proto_6_to_4(skb);
+
+	return -ENOTSUPP;
+}
+
+static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	__be16 proto = (__force __be16) r2;
+	int ret;
+
+	if (unlikely(flags))
+		return -EINVAL;
+
+	/* General idea is that this helper does the basic groundwork
+	 * needed for changing the protocol, and eBPF program fills the
+	 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
+	 * and other helpers, rather than passing a raw buffer here.
+	 *
+	 * The rationale is to keep this minimal and without a need to
+	 * deal with raw packet data. F.e. even if we would pass buffers
+	 * here, the program still needs to call the bpf_lX_csum_replace()
+	 * helpers anyway. Plus, this way we keep also separation of
+	 * concerns, since f.e. bpf_skb_store_bytes() should only take
+	 * care of stores.
+	 *
+	 * Currently, additional options and extension header space are
+	 * not supported, but flags register is reserved so we can adapt
+	 * that. For offloads, we mark packet as dodgy, so that headers
+	 * need to be verified first.
+	 */
+	ret = bpf_skb_proto_xlat(skb, proto);
+	bpf_compute_data_end(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_proto_proto = {
+	.func		= bpf_skb_change_proto,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	u32 pkt_type = r2;
+
+	/* We only allow a restricted subset to be changed for now. */
+	if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
+		     pkt_type > PACKET_OTHERHOST))
+		return -EINVAL;
+
+	skb->pkt_type = pkt_type;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_change_type_proto = {
+	.func		= bpf_skb_change_type,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_skb_data(void *func)
 {
 	if (func == bpf_skb_vlan_push)
@@ -1765,6 +2015,8 @@ bool bpf_helper_changes_skb_data(void *func)
 		return true;
 	if (func == bpf_skb_store_bytes)
 		return true;
+	if (func == bpf_skb_change_proto)
+		return true;
 	if (func == bpf_l3_csum_replace)
 		return true;
 	if (func == bpf_l4_csum_replace)
@@ -1773,6 +2025,47 @@ bool bpf_helper_changes_skb_data(void *func)
 	return false;
 }
 
+static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
+				  unsigned long off, unsigned long len)
+{
+	void *ptr = skb_header_pointer(skb, off, len, dst_buff);
+
+	if (unlikely(!ptr))
+		return len;
+	if (ptr != dst_buff)
+		memcpy(dst_buff, ptr, len);
+
+	return 0;
+}
+
+static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
+				u64 meta_size)
+{
+	struct sk_buff *skb = (struct sk_buff *)(long) r1;
+	struct bpf_map *map = (struct bpf_map *)(long) r2;
+	u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+	void *meta = (void *)(long) r4;
+
+	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+		return -EINVAL;
+	if (unlikely(skb_size > skb->len))
+		return -EFAULT;
+
+	return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
+				bpf_skb_copy);
+}
+
+static const struct bpf_func_proto bpf_skb_event_output_proto = {
+	.func		= bpf_skb_event_output,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_STACK,
+	.arg5_type	= ARG_CONST_STACK_SIZE,
+};
+
 static unsigned short bpf_tunnel_key_af(u64 flags)
 {
 	return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
@@ -2004,6 +2297,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 	}
 }
 
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *)(long)r1;
+	struct bpf_map *map = (struct bpf_map *)(long)r2;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct cgroup *cgrp;
+	struct sock *sk;
+	u32 i = (u32)r3;
+
+	sk = skb->sk;
+	if (!sk || !sk_fullsock(sk))
+		return -ENOENT;
+
+	if (unlikely(i >= array->map.max_entries))
+		return -E2BIG;
+
+	cgrp = READ_ONCE(array->ptrs[i]);
+	if (unlikely(!cgrp))
+		return -EAGAIN;
+
+	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+}
+
+static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
+	.func		= bpf_skb_in_cgroup,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+};
+#endif
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -2017,7 +2344,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
-		return &bpf_get_smp_processor_id_proto;
+		return &bpf_get_raw_smp_processor_id_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_ktime_get_ns:
@@ -2052,6 +2379,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_vlan_push_proto;
 	case BPF_FUNC_skb_vlan_pop:
 		return &bpf_skb_vlan_pop_proto;
+	case BPF_FUNC_skb_change_proto:
+		return &bpf_skb_change_proto_proto;
+	case BPF_FUNC_skb_change_type:
+		return &bpf_skb_change_type_proto;
 	case BPF_FUNC_skb_get_tunnel_key:
 		return &bpf_skb_get_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_key:
@@ -2064,13 +2395,27 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_redirect_proto;
 	case BPF_FUNC_get_route_realm:
 		return &bpf_get_route_realm_proto;
+	case BPF_FUNC_get_hash_recalc:
+		return &bpf_get_hash_recalc_proto;
 	case BPF_FUNC_perf_event_output:
-		return bpf_get_event_output_proto();
+		return &bpf_skb_event_output_proto;
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_smp_processor_id_proto;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	case BPF_FUNC_skb_in_cgroup:
+		return &bpf_skb_in_cgroup_proto;
+#endif
 	default:
 		return sk_filter_func_proto(func_id);
 	}
 }
 
+static const struct bpf_func_proto *
+xdp_func_proto(enum bpf_func_id func_id)
+{
+	return sk_filter_func_proto(func_id);
+}
+
 static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 {
 	if (off < 0 || off >= sizeof(struct __sk_buff))
@@ -2085,7 +2430,8 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 }
 
 static bool sk_filter_is_valid_access(int off, int size,
-				      enum bpf_access_type type)
+				      enum bpf_access_type type,
+				      enum bpf_reg_type *reg_type)
 {
 	switch (off) {
 	case offsetof(struct __sk_buff, tc_classid):
@@ -2108,7 +2454,8 @@ static bool sk_filter_is_valid_access(int off, int size,
 }
 
 static bool tc_cls_act_is_valid_access(int off, int size,
-				       enum bpf_access_type type)
+				       enum bpf_access_type type,
+				       enum bpf_reg_type *reg_type)
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
@@ -2123,9 +2470,57 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 			return false;
 		}
 	}
+
+	switch (off) {
+	case offsetof(struct __sk_buff, data):
+		*reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct __sk_buff, data_end):
+		*reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
 	return __is_valid_access(off, size, type);
 }
 
+static bool __is_valid_xdp_access(int off, int size,
+				  enum bpf_access_type type)
+{
+	if (off < 0 || off >= sizeof(struct xdp_md))
+		return false;
+	if (off % size != 0)
+		return false;
+	if (size != 4)
+		return false;
+
+	return true;
+}
+
+static bool xdp_is_valid_access(int off, int size,
+				enum bpf_access_type type,
+				enum bpf_reg_type *reg_type)
+{
+	if (type == BPF_WRITE)
+		return false;
+
+	switch (off) {
+	case offsetof(struct xdp_md, data):
+		*reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct xdp_md, data_end):
+		*reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return __is_valid_xdp_access(off, size, type);
+}
+
+void bpf_warn_invalid_xdp_action(u32 act)
+{
+	WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act);
+}
+EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+
 static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 				      int src_reg, int ctx_off,
 				      struct bpf_insn *insn_buf,
@@ -2277,6 +2672,29 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 	return insn - insn_buf;
 }
 
+static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+				  int src_reg, int ctx_off,
+				  struct bpf_insn *insn_buf,
+				  struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (ctx_off) {
+	case offsetof(struct xdp_md, data):
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)),
+				      dst_reg, src_reg,
+				      offsetof(struct xdp_buff, data));
+		break;
+	case offsetof(struct xdp_md, data_end):
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)),
+				      dst_reg, src_reg,
+				      offsetof(struct xdp_buff, data_end));
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 static const struct bpf_verifier_ops sk_filter_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
@@ -2289,6 +2707,12 @@ static const struct bpf_verifier_ops tc_cls_act_ops = {
 	.convert_ctx_access	= bpf_net_convert_ctx_access,
 };
 
+static const struct bpf_verifier_ops xdp_ops = {
+	.get_func_proto		= xdp_func_proto,
+	.is_valid_access	= xdp_is_valid_access,
+	.convert_ctx_access	= xdp_convert_ctx_access,
+};
+
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 	.ops	= &sk_filter_ops,
 	.type	= BPF_PROG_TYPE_SOCKET_FILTER,
@@ -2304,11 +2728,17 @@ static struct bpf_prog_type_list sched_act_type __read_mostly = {
 	.type	= BPF_PROG_TYPE_SCHED_ACT,
 };
 
+static struct bpf_prog_type_list xdp_type __read_mostly = {
+	.ops	= &xdp_ops,
+	.type	= BPF_PROG_TYPE_XDP,
+};
+
 static int __init register_sk_filter_ops(void)
 {
 	bpf_register_prog_type(&sk_filter_type);
 	bpf_register_prog_type(&sched_cls_type);
 	bpf_register_prog_type(&sched_act_type);
+	bpf_register_prog_type(&xdp_type);
 
 	return 0;
 }
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a669dea146c6..61ad43f61c5e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
 }
 EXPORT_SYMBOL(make_flow_keys_digest);
 
+static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
+
+u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+{
+	struct flow_keys keys;
+
+	__flow_hash_secret_init();
+
+	memset(&keys, 0, sizeof(keys));
+	__skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
+			   NULL, 0, 0, 0,
+			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+	return __flow_hash_from_keys(&keys, hashrnd);
+}
+EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+
 /**
  * __skb_get_hash: calculate a flow hash
  * @skb: sk_buff to calculate flow hash from
@@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
 	},
 };
 
+static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
+	{
+		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
+		.offset = offsetof(struct flow_keys, control),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_BASIC,
+		.offset = offsetof(struct flow_keys, basic),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+		.offset = offsetof(struct flow_keys, addrs.v4addrs),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+		.offset = offsetof(struct flow_keys, addrs.v6addrs),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_PORTS,
+		.offset = offsetof(struct flow_keys, ports),
+	},
+};
+
 static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
 	{
 		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void)
 	skb_flow_dissector_init(&flow_keys_dissector,
 				flow_keys_dissector_keys,
 				ARRAY_SIZE(flow_keys_dissector_keys));
+	skb_flow_dissector_init(&flow_keys_dissector_symmetric,
+				flow_keys_dissector_symmetric_keys,
+				ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
 	skb_flow_dissector_init(&flow_keys_buf_dissector,
 				flow_keys_buf_dissector_keys,
 				ARRAY_SIZE(flow_keys_buf_dissector_keys));
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 4573d81093fe..cad8e791f28e 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -84,6 +84,7 @@ struct gen_estimator
 	struct gnet_stats_basic_packed	*bstats;
 	struct gnet_stats_rate_est64	*rate_est;
 	spinlock_t		*stats_lock;
+	seqcount_t		*running;
 	int			ewma_log;
 	u32			last_packets;
 	unsigned long		avpps;
@@ -121,26 +122,28 @@ static void est_timer(unsigned long arg)
 		unsigned long rate;
 		u64 brate;
 
-		spin_lock(e->stats_lock);
+		if (e->stats_lock)
+			spin_lock(e->stats_lock);
 		read_lock(&est_lock);
 		if (e->bstats == NULL)
 			goto skip;
 
-		__gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);
+		__gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats);
 
 		brate = (b.bytes - e->last_bytes)<<(7 - idx);
 		e->last_bytes = b.bytes;
 		e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
-		e->rate_est->bps = (e->avbps+0xF)>>5;
+		WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5);
 
 		rate = b.packets - e->last_packets;
 		rate <<= (7 - idx);
 		e->last_packets = b.packets;
 		e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
-		e->rate_est->pps = (e->avpps + 0xF) >> 5;
+		WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5);
 skip:
 		read_unlock(&est_lock);
-		spin_unlock(e->stats_lock);
+		if (e->stats_lock)
+			spin_unlock(e->stats_lock);
 	}
 
 	if (!list_empty(&elist[idx].list))
@@ -194,6 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
  * @stats_lock: statistics lock
+ * @running: qdisc running seqcount
  * @opt: rate estimator configuration TLV
  *
  * Creates a new rate estimator with &bstats as source and &rate_est
@@ -209,6 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct gnet_stats_rate_est64 *rate_est,
 		      spinlock_t *stats_lock,
+		      seqcount_t *running,
 		      struct nlattr *opt)
 {
 	struct gen_estimator *est;
@@ -226,12 +231,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 	if (est == NULL)
 		return -ENOBUFS;
 
-	__gnet_stats_copy_basic(&b, cpu_bstats, bstats);
+	__gnet_stats_copy_basic(running, &b, cpu_bstats, bstats);
 
 	idx = parm->interval + 2;
 	est->bstats = bstats;
 	est->rate_est = rate_est;
 	est->stats_lock = stats_lock;
+	est->running  = running;
 	est->ewma_log = parm->ewma_log;
 	est->last_bytes = b.bytes;
 	est->avbps = rate_est->bps<<5;
@@ -291,6 +297,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
  * @stats_lock: statistics lock
+ * @running: qdisc running seqcount (might be NULL)
  * @opt: rate estimator configuration TLV
  *
  * Replaces the configuration of a rate estimator by calling
@@ -301,10 +308,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct gnet_stats_rate_est64 *rate_est,
-			  spinlock_t *stats_lock, struct nlattr *opt)
+			  spinlock_t *stats_lock,
+			  seqcount_t *running, struct nlattr *opt)
 {
 	gen_kill_estimator(bstats, rate_est);
-	return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
+	return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt);
 }
 EXPORT_SYMBOL(gen_replace_estimator);
 
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index be873e4e3125..508e051304fb 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -32,10 +32,11 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
 	return 0;
 
 nla_put_failure:
+	if (d->lock)
+		spin_unlock_bh(d->lock);
 	kfree(d->xstats);
 	d->xstats = NULL;
 	d->xstats_len = 0;
-	spin_unlock_bh(d->lock);
 	return -1;
 }
 
@@ -66,15 +67,16 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 {
 	memset(d, 0, sizeof(*d));
 
-	spin_lock_bh(lock);
-	d->lock = lock;
 	if (type)
 		d->tail = (struct nlattr *)skb_tail_pointer(skb);
 	d->skb = skb;
 	d->compat_tc_stats = tc_stats_type;
 	d->compat_xstats = xstats_type;
 	d->padattr = padattr;
-
+	if (lock) {
+		d->lock = lock;
+		spin_lock_bh(lock);
+	}
 	if (d->tail)
 		return gnet_stats_copy(d, type, NULL, 0, padattr);
 
@@ -128,21 +130,29 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
 }
 
 void
-__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+__gnet_stats_copy_basic(const seqcount_t *running,
+			struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_basic_cpu __percpu *cpu,
 			struct gnet_stats_basic_packed *b)
 {
+	unsigned int seq;
+
 	if (cpu) {
 		__gnet_stats_copy_basic_cpu(bstats, cpu);
-	} else {
+		return;
+	}
+	do {
+		if (running)
+			seq = read_seqcount_begin(running);
 		bstats->bytes = b->bytes;
 		bstats->packets = b->packets;
-	}
+	} while (running && read_seqcount_retry(running, seq));
 }
 EXPORT_SYMBOL(__gnet_stats_copy_basic);
 
 /**
  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
+ * @running: seqcount_t pointer
  * @d: dumping handle
  * @cpu: copy statistic per cpu
  * @b: basic statistics
@@ -154,13 +164,14 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_basic(struct gnet_dump *d,
+gnet_stats_copy_basic(const seqcount_t *running,
+		      struct gnet_dump *d,
 		      struct gnet_stats_basic_cpu __percpu *cpu,
 		      struct gnet_stats_basic_packed *b)
 {
 	struct gnet_stats_basic_packed bstats = {0};
 
-	__gnet_stats_copy_basic(&bstats, cpu, b);
+	__gnet_stats_copy_basic(running, &bstats, cpu, b);
 
 	if (d->compat_tc_stats) {
 		d->tc_stats.bytes = bstats.bytes;
@@ -330,8 +341,9 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
 	return 0;
 
 err_out:
+	if (d->lock)
+		spin_unlock_bh(d->lock);
 	d->xstats_len = 0;
-	spin_unlock_bh(d->lock);
 	return -1;
 }
 EXPORT_SYMBOL(gnet_stats_copy_app);
@@ -365,10 +377,11 @@ gnet_stats_finish_copy(struct gnet_dump *d)
 			return -1;
 	}
 
+	if (d->lock)
+		spin_unlock_bh(d->lock);
 	kfree(d->xstats);
 	d->xstats = NULL;
 	d->xstats_len = 0;
-	spin_unlock_bh(d->lock);
 	return 0;
 }
 EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 29dd8cc22bbf..cf26e04c4046 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 	}
 
 	if (dev->netdev_ops->ndo_neigh_construct) {
-		error = dev->netdev_ops->ndo_neigh_construct(n);
+		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
 		if (error < 0) {
 			rc = ERR_PTR(error);
 			goto out_neigh_release;
@@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh)
 	neigh->arp_queue_len_bytes = 0;
 
 	if (dev->netdev_ops->ndo_neigh_destroy)
-		dev->netdev_ops->ndo_neigh_destroy(neigh);
+		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
 
 	dev_put(dev);
 	neigh_parms_put(neigh->parms);
@@ -1060,8 +1060,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
 				lladdr instead of overriding it
 				if it is different.
-				It also allows to retain current state
-				if lladdr is unchanged.
 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
 
 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
@@ -1150,10 +1148,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 			} else
 				goto out;
 		} else {
-			if (lladdr == neigh->ha && new == NUD_STALE &&
-			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
-			     (old & NUD_CONNECTED))
-			    )
+			if (lladdr == neigh->ha && new == NUD_STALE)
 				new = old;
 		}
 	}
@@ -2047,6 +2042,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
 			case NDTPA_DELAY_PROBE_TIME:
 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
 					      nla_get_msecs(tbp[i]));
+				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
 				break;
 			case NDTPA_RETRANS_TIME:
 				NEIGH_VAR_SET(p, RETRANS_TIME,
@@ -2469,13 +2465,17 @@ int neigh_xmit(int index, struct net_device *dev,
 		tbl = neigh_tables[index];
 		if (!tbl)
 			goto out;
+		rcu_read_lock_bh();
 		neigh = __neigh_lookup_noref(tbl, addr, dev);
 		if (!neigh)
 			neigh = __neigh_create(tbl, addr, dev, false);
 		err = PTR_ERR(neigh);
-		if (IS_ERR(neigh))
+		if (IS_ERR(neigh)) {
+			rcu_read_unlock_bh();
 			goto out_kfree_skb;
+		}
 		err = neigh->output(neigh, skb);
+		rcu_read_unlock_bh();
 	}
 	else if (index == NEIGH_LINK_TABLE) {
 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
@@ -2926,6 +2926,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
 		return;
 
 	set_bit(index, p->data_state);
+	call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
 	if (!dev) /* NULL dev means this is default value */
 		neigh_copy_dflt_parms(net, p, index);
 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7a0b616557ab..6e4f34721080 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -322,7 +322,20 @@ NETDEVICE_SHOW_RW(flags, fmt_hex);
 
 static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
 {
-	dev->tx_queue_len = new_len;
+	int res, orig_len = dev->tx_queue_len;
+
+	if (new_len != orig_len) {
+		dev->tx_queue_len = new_len;
+		res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+		res = notifier_to_errno(res);
+		if (res) {
+			netdev_err(dev,
+				   "refused to change device tx_queue_len\n");
+			dev->tx_queue_len = orig_len;
+			return -EFAULT;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94acfc89ad97..53599bd0c82d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -163,7 +163,7 @@ static void poll_one_napi(struct napi_struct *napi)
 	 */
 	work = napi->poll(napi, 0);
 	WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
-	trace_napi_poll(napi);
+	trace_napi_poll(napi, work, 0);
 
 	clear_bit(NAPI_STATE_NPSVC, &napi->state);
 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8b02df0d354d..bbd118b19aef 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -213,6 +213,7 @@
 /* Xmit modes */
 #define M_START_XMIT		0	/* Default normal TX */
 #define M_NETIF_RECEIVE 	1	/* Inject packets into stack */
+#define M_QUEUE_XMIT		2	/* Inject packet into qdisc */
 
 /* If lock -- protects updating of if_list */
 #define   if_lock(t)           spin_lock(&(t->if_lock));
@@ -626,6 +627,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 
 	if (pkt_dev->xmit_mode == M_NETIF_RECEIVE)
 		seq_puts(seq, "     xmit_mode: netif_receive\n");
+	else if (pkt_dev->xmit_mode == M_QUEUE_XMIT)
+		seq_puts(seq, "     xmit_mode: xmit_queue\n");
 
 	seq_puts(seq, "     Flags: ");
 
@@ -1142,8 +1145,10 @@ static ssize_t pktgen_if_write(struct file *file,
 			return len;
 
 		i += len;
-		if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) &&
-		    (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+		if ((value > 1) &&
+		    ((pkt_dev->xmit_mode == M_QUEUE_XMIT) ||
+		     ((pkt_dev->xmit_mode == M_START_XMIT) &&
+		     (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))))
 			return -ENOTSUPP;
 		pkt_dev->burst = value < 1 ? 1 : value;
 		sprintf(pg_result, "OK: burst=%d", pkt_dev->burst);
@@ -1198,6 +1203,9 @@ static ssize_t pktgen_if_write(struct file *file,
 			 * at module loading time
 			 */
 			pkt_dev->clone_skb = 0;
+		} else if (strcmp(f, "queue_xmit") == 0) {
+			pkt_dev->xmit_mode = M_QUEUE_XMIT;
+			pkt_dev->last_ok = 1;
 		} else {
 			sprintf(pg_result,
 				"xmit_mode -:%s:- unknown\nAvailable modes: %s",
@@ -3434,6 +3442,36 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 #endif
 		} while (--burst > 0);
 		goto out; /* Skips xmit_mode M_START_XMIT */
+	} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
+		local_bh_disable();
+		atomic_inc(&pkt_dev->skb->users);
+
+		ret = dev_queue_xmit(pkt_dev->skb);
+		switch (ret) {
+		case NET_XMIT_SUCCESS:
+			pkt_dev->sofar++;
+			pkt_dev->seq_num++;
+			pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
+			break;
+		case NET_XMIT_DROP:
+		case NET_XMIT_CN:
+		/* These are all valid return codes for a qdisc but
+		 * indicate packets are being dropped or will likely
+		 * be dropped soon.
+		 */
+		case NETDEV_TX_BUSY:
+		/* qdisc may call dev_hard_start_xmit directly in cases
+		 * where no queues exist e.g. loopback device, virtual
+		 * devices, etc. In this case we need to handle
+		 * NETDEV_TX_ codes.
+		 */
+		default:
+			pkt_dev->errors++;
+			net_info_ratelimited("%s xmit error: %d\n",
+					     pkt_dev->odevname, ret);
+			break;
+		}
+		goto out;
 	}
 
 	txq = skb_get_tx_queue(odev, pkt_dev->skb);
@@ -3463,7 +3501,6 @@ xmit_more:
 		break;
 	case NET_XMIT_DROP:
 	case NET_XMIT_CN:
-	case NET_XMIT_POLICED:
 		/* skb has been consumed */
 		pkt_dev->errors++;
 		break;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d69c4644f8f2..189cc78c77eb 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -71,9 +71,31 @@ void rtnl_lock(void)
 }
 EXPORT_SYMBOL(rtnl_lock);
 
+static struct sk_buff *defer_kfree_skb_list;
+void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
+{
+	if (head && tail) {
+		tail->next = defer_kfree_skb_list;
+		defer_kfree_skb_list = head;
+	}
+}
+EXPORT_SYMBOL(rtnl_kfree_skbs);
+
 void __rtnl_unlock(void)
 {
+	struct sk_buff *head = defer_kfree_skb_list;
+
+	defer_kfree_skb_list = NULL;
+
 	mutex_unlock(&rtnl_mutex);
+
+	while (head) {
+		struct sk_buff *next = head->next;
+
+		kfree_skb(head);
+		cond_resched();
+		head = next;
+	}
 }
 
 void rtnl_unlock(void)
@@ -869,6 +891,16 @@ static size_t rtnl_port_size(const struct net_device *dev,
 		return port_self_size;
 }
 
+static size_t rtnl_xdp_size(const struct net_device *dev)
+{
+	size_t xdp_size = nla_total_size(1);	/* XDP_ATTACHED */
+
+	if (!dev->netdev_ops->ndo_xdp)
+		return 0;
+	else
+		return xdp_size;
+}
+
 static noinline size_t if_nlmsg_size(const struct net_device *dev,
 				     u32 ext_filter_mask)
 {
@@ -905,6 +937,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+	       + rtnl_xdp_size(dev) /* IFLA_XDP */
 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
 
 }
@@ -1189,6 +1222,33 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
+static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct netdev_xdp xdp_op = {};
+	struct nlattr *xdp;
+	int err;
+
+	if (!dev->netdev_ops->ndo_xdp)
+		return 0;
+	xdp = nla_nest_start(skb, IFLA_XDP);
+	if (!xdp)
+		return -EMSGSIZE;
+	xdp_op.command = XDP_QUERY_PROG;
+	err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+	if (err)
+		goto err_cancel;
+	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+	if (err)
+		goto err_cancel;
+
+	nla_nest_end(skb, xdp);
+	return 0;
+
+err_cancel:
+	nla_nest_cancel(skb, xdp);
+	return err;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags, u32 ext_filter_mask)
@@ -1285,6 +1345,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (rtnl_port_fill(skb, dev, ext_filter_mask))
 		goto nla_put_failure;
 
+	if (rtnl_xdp_fill(skb, dev))
+		goto nla_put_failure;
+
 	if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -1370,6 +1433,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_PHYS_SWITCH_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
 	[IFLA_LINK_NETNSID]	= { .type = NLA_S32 },
 	[IFLA_PROTO_DOWN]	= { .type = NLA_U8 },
+	[IFLA_XDP]		= { .type = NLA_NESTED },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1407,6 +1471,11 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
 	[IFLA_PORT_RESPONSE]	= { .type = NLA_U16, },
 };
 
+static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
+	[IFLA_XDP_FD]		= { .type = NLA_S32 },
+	[IFLA_XDP_ATTACHED]	= { .type = NLA_U8 },
+};
+
 static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
 {
 	const struct rtnl_link_ops *ops = NULL;
@@ -1905,11 +1974,19 @@ static int do_setlink(const struct sk_buff *skb,
 
 	if (tb[IFLA_TXQLEN]) {
 		unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
-
-		if (dev->tx_queue_len ^ value)
+		unsigned long orig_len = dev->tx_queue_len;
+
+		if (dev->tx_queue_len ^ value) {
+			dev->tx_queue_len = value;
+			err = call_netdevice_notifiers(
+			      NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+			err = notifier_to_errno(err);
+			if (err) {
+				dev->tx_queue_len = orig_len;
+				goto errout;
+			}
 			status |= DO_SETLINK_NOTIFY;
-
-		dev->tx_queue_len = value;
+		}
 	}
 
 	if (tb[IFLA_OPERSTATE])
@@ -2024,6 +2101,27 @@ static int do_setlink(const struct sk_buff *skb,
 		status |= DO_SETLINK_NOTIFY;
 	}
 
+	if (tb[IFLA_XDP]) {
+		struct nlattr *xdp[IFLA_XDP_MAX + 1];
+
+		err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
+				       ifla_xdp_policy);
+		if (err < 0)
+			goto errout;
+
+		if (xdp[IFLA_XDP_ATTACHED]) {
+			err = -EINVAL;
+			goto errout;
+		}
+		if (xdp[IFLA_XDP_FD]) {
+			err = dev_change_xdp_fd(dev,
+						nla_get_s32(xdp[IFLA_XDP_FD]));
+			if (err)
+				goto errout;
+			status |= DO_SETLINK_NOTIFY;
+		}
+	}
+
 errout:
 	if (status & DO_SETLINK_MODIFIED) {
 		if (status & DO_SETLINK_NOTIFY)
@@ -3497,7 +3595,32 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 			if (!attr)
 				goto nla_put_failure;
 
-			err = ops->fill_linkxstats(skb, dev, prividx);
+			err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
+			nla_nest_end(skb, attr);
+			if (err)
+				goto nla_put_failure;
+			*idxattr = 0;
+		}
+	}
+
+	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE,
+			     *idxattr)) {
+		const struct rtnl_link_ops *ops = NULL;
+		const struct net_device *master;
+
+		master = netdev_master_upper_dev_get(dev);
+		if (master)
+			ops = master->rtnl_link_ops;
+		if (ops && ops->fill_linkxstats) {
+			int err;
+
+			*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
+			attr = nla_nest_start(skb,
+					      IFLA_STATS_LINK_XSTATS_SLAVE);
+			if (!attr)
+				goto nla_put_failure;
+
+			err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
 			nla_nest_end(skb, attr);
 			if (err)
 				goto nla_put_failure;
@@ -3533,14 +3656,35 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
 
 	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) {
 		const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+		int attr = IFLA_STATS_LINK_XSTATS;
 
 		if (ops && ops->get_linkxstats_size) {
-			size += nla_total_size(ops->get_linkxstats_size(dev));
+			size += nla_total_size(ops->get_linkxstats_size(dev,
+									attr));
 			/* for IFLA_STATS_LINK_XSTATS */
 			size += nla_total_size(0);
 		}
 	}
 
+	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) {
+		struct net_device *_dev = (struct net_device *)dev;
+		const struct rtnl_link_ops *ops = NULL;
+		const struct net_device *master;
+
+		/* netdev_master_upper_dev_get can't take const */
+		master = netdev_master_upper_dev_get(_dev);
+		if (master)
+			ops = master->rtnl_link_ops;
+		if (ops && ops->get_linkxstats_size) {
+			int attr = IFLA_STATS_LINK_XSTATS_SLAVE;
+
+			size += nla_total_size(ops->get_linkxstats_size(dev,
+									attr));
+			/* for IFLA_STATS_LINK_XSTATS_SLAVE */
+			size += nla_total_size(0);
+		}
+	}
+
 	return size;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f2b77e549c03..3864b4b68fa1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -49,6 +49,7 @@
 #include <linux/slab.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
+#include <linux/sctp.h>
 #include <linux/netdevice.h>
 #ifdef CONFIG_NET_CLS_ACT
 #include <net/pkt_sched.h>
@@ -3015,24 +3016,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 }
 EXPORT_SYMBOL_GPL(skb_append_pagefrags);
 
-/**
- *	skb_push_rcsum - push skb and update receive checksum
- *	@skb: buffer to update
- *	@len: length of data pulled
- *
- *	This function performs an skb_push on the packet and updates
- *	the CHECKSUM_COMPLETE checksum.  It should be used on
- *	receive path processing instead of skb_push unless you know
- *	that the checksum difference is zero (e.g., a valid IP header)
- *	or you are setting ip_summed to CHECKSUM_NONE.
- */
-static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
-{
-	skb_push(skb, len);
-	skb_postpush_rcsum(skb, skb->data, len);
-	return skb->data;
-}
-
 /**
  *	skb_pull_rcsum - pull skb and update receive checksum
  *	@skb: buffer to update
@@ -3116,9 +3099,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 		int hsize;
 		int size;
 
-		len = head_skb->len - offset;
-		if (len > mss)
-			len = mss;
+		if (unlikely(mss == GSO_BY_FRAGS)) {
+			len = list_skb->len;
+		} else {
+			len = head_skb->len - offset;
+			if (len > mss)
+				len = mss;
+		}
 
 		hsize = skb_headlen(head_skb) - offset;
 		if (hsize < 0)
@@ -3438,6 +3425,7 @@ done:
 	NAPI_GRO_CB(skb)->same_flow = 1;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(skb_gro_receive);
 
 void __init skb_init(void)
 {
@@ -4378,6 +4366,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 			thlen += inner_tcp_hdrlen(skb);
 	} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
 		thlen = tcp_hdrlen(skb);
+	} else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+		thlen = sizeof(struct sctphdr);
 	}
 	/* UFO sets gso_size to the size of the fragmentation
 	 * payload, i.e. the size of the L4 (UDP) header is already
@@ -4387,6 +4377,38 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
 
+/**
+ * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ *
+ * @skb: GSO skb
+ * @mtu: MTU to validate against
+ *
+ * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
+ * once split.
+ */
+bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	const struct sk_buff *iter;
+	unsigned int hlen;
+
+	hlen = skb_gso_network_seglen(skb);
+
+	if (shinfo->gso_size != GSO_BY_FRAGS)
+		return hlen <= mtu;
+
+	/* Undo this so we can re-use header sizes */
+	hlen -= GSO_BY_FRAGS;
+
+	skb_walk_frags(skb, iter) {
+		if (hlen + skb_headlen(iter) > mtu)
+			return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
+
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
 	if (skb_cow(skb, skb_headroom(skb)) < 0) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 08bf97eceeb3..25dab8b60223 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -452,11 +452,12 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_queue_rcv_skb);
 
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
+int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+		     const int nested, unsigned int trim_cap)
 {
 	int rc = NET_RX_SUCCESS;
 
-	if (sk_filter(sk, skb))
+	if (sk_filter_trim_cap(sk, skb, trim_cap))
 		goto discard_and_relse;
 
 	skb->dev = NULL;
@@ -492,7 +493,7 @@ discard_and_relse:
 	kfree_skb(skb);
 	goto out;
 }
-EXPORT_SYMBOL(sk_receive_skb);
+EXPORT_SYMBOL(__sk_receive_skb);
 
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
@@ -1938,6 +1939,10 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
 		sockc->tsflags |= tsflags;
 		break;
+	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
+	case SCM_RIGHTS:
+	case SCM_CREDENTIALS:
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/net/core/utils.c b/net/core/utils.c
index 3d17ca8b4744..cf5622b9ccc4 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -133,7 +133,7 @@ int in4_pton(const char *src, int srclen,
 	s = src;
 	d = dbuf;
 	i = 0;
-	while(1) {
+	while (1) {
 		int c;
 		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
 		if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) {
@@ -283,11 +283,11 @@ cont:
 	i = 15; d--;
 
 	if (dc) {
-		while(d >= dc)
+		while (d >= dc)
 			dst[i--] = *d--;
-		while(i >= dc - dbuf)
+		while (i >= dc - dbuf)
 			dst[i--] = 0;
-		while(i >= 0)
+		while (i >= 0)
 			dst[i--] = *d--;
 	} else
 		memcpy(dst, dbuf, sizeof(dbuf));
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5c7e413a3ae4..345a3aeb8c7e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_flow(net, &fl4, sk);
 	if (IS_ERR(rt)) {
-		__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
+		IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
 
@@ -527,17 +527,19 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 								 rxiph->daddr);
 	skb_dst_set(skb, dst_clone(dst));
 
+	local_bh_disable();
 	bh_lock_sock(ctl_sk);
 	err = ip_build_and_send_pkt(skb, ctl_sk,
 				    rxiph->daddr, rxiph->saddr, NULL);
 	bh_unlock_sock(ctl_sk);
 
 	if (net_xmit_eval(err) == 0) {
-		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
-		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
+		__DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+		__DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
 	}
+	local_bh_enable();
 out:
-	 dst_release(dst);
+	dst_release(dst);
 }
 
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
@@ -866,7 +868,7 @@ lookup:
 		goto discard_and_relse;
 	nf_reset(skb);
 
-	return sk_receive_skb(sk, skb, 1);
+	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
 
 no_dccp_socket:
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d176f4e66369..3ff137d9471d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -732,7 +732,7 @@ lookup:
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	return sk_receive_skb(sk, skb, 1) ? -1 : 0;
+	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
 
 no_dccp_socket:
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index df4803437888..a796fc7cbc35 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -41,6 +41,7 @@
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
+#include <net/nexthop.h>
 
 #define RT_MIN_TABLE 1
 
@@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr)
 	struct rtnexthop *nhp = nla_data(attr);
 	int nhs = 0, nhlen = nla_len(attr);
 
-	while(nhlen >= (int)sizeof(struct rtnexthop)) {
-		if ((nhlen -= nhp->rtnh_len) < 0)
-			return 0;
+	while (rtnh_ok(nhp, nhlen)) {
 		nhs++;
-		nhp = RTNH_NEXT(nhp);
+		nhp = rtnh_next(nhp, &nhlen);
 	}
 
-	return nhs;
+	/* leftover implies invalid nexthop configuration, discard it */
+	return nhlen > 0 ? 0 : nhs;
 }
 
 static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
@@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
 	int nhlen = nla_len(attr);
 
 	change_nexthops(fi) {
-		int attrlen = nhlen - sizeof(struct rtnexthop);
-		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+		int attrlen;
+
+		if (!rtnh_ok(nhp, nhlen))
 			return -EINVAL;
 
 		nh->nh_flags  = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
 		nh->nh_oif    = nhp->rtnh_ifindex;
 		nh->nh_weight = nhp->rtnh_hops + 1;
 
-		if (attrlen) {
+		attrlen = rtnh_attrlen(nhp);
+		if (attrlen > 0) {
 			struct nlattr *gw_attr;
 
 			gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
 			nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
 		}
-		nhp = RTNH_NEXT(nhp);
+
+		nhp = rtnh_next(nhp, &nhlen);
 	} endfor_nexthops(fi);
 
 	return 0;
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index da06ed1df620..8af4ded70f1c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
 # the core
 obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o
+dsa_core-y += dsa.o slave.o dsa2.o
 
 # tagging formats
 dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index eff5dfc2e33f..7e68bc6bc853 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -29,6 +29,33 @@
 
 char dsa_driver_version[] = "0.1";
 
+static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
+					    struct net_device *dev)
+{
+	/* Just return the original SKB */
+	return skb;
+}
+
+static const struct dsa_device_ops none_ops = {
+	.xmit	= dsa_slave_notag_xmit,
+	.rcv	= NULL,
+};
+
+const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
+#ifdef CONFIG_NET_DSA_TAG_DSA
+	[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+	[DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+	[DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+	[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#endif
+	[DSA_TAG_PROTO_NONE] = &none_ops,
+};
 
 /* switch driver registration ***********************************************/
 static DEFINE_MUTEX(dsa_switch_drivers_mutex);
@@ -180,41 +207,100 @@ __ATTRIBUTE_GROUPS(dsa_hwmon);
 #endif /* CONFIG_NET_DSA_HWMON */
 
 /* basic switch operations **************************************************/
-static int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct net_device *master)
+int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
+		      struct device_node *port_dn, int port)
 {
-	struct dsa_chip_data *cd = ds->cd;
-	struct device_node *port_dn;
 	struct phy_device *phydev;
-	int ret, port, mode;
+	int ret, mode;
+
+	if (of_phy_is_fixed_link(port_dn)) {
+		ret = of_phy_register_fixed_link(port_dn);
+		if (ret) {
+			dev_err(dev, "failed to register fixed PHY\n");
+			return ret;
+		}
+		phydev = of_phy_find_device(port_dn);
+
+		mode = of_get_phy_mode(port_dn);
+		if (mode < 0)
+			mode = PHY_INTERFACE_MODE_NA;
+		phydev->interface = mode;
+
+		genphy_config_init(phydev);
+		genphy_read_status(phydev);
+		if (ds->drv->adjust_link)
+			ds->drv->adjust_link(ds, port, phydev);
+	}
+
+	return 0;
+}
+
+static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
+{
+	struct device_node *port_dn;
+	int ret, port;
 
 	for (port = 0; port < DSA_MAX_PORTS; port++) {
 		if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
 			continue;
 
-		port_dn = cd->port_dn[port];
-		if (of_phy_is_fixed_link(port_dn)) {
-			ret = of_phy_register_fixed_link(port_dn);
-			if (ret) {
-				netdev_err(master,
-					   "failed to register fixed PHY\n");
-				return ret;
-			}
-			phydev = of_phy_find_device(port_dn);
+		port_dn = ds->ports[port].dn;
+		ret = dsa_cpu_dsa_setup(ds, dev, port_dn, port);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
 
-			mode = of_get_phy_mode(port_dn);
-			if (mode < 0)
-				mode = PHY_INTERFACE_MODE_NA;
-			phydev->interface = mode;
+const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
+{
+	const struct dsa_device_ops *ops;
+
+	if (tag_protocol >= DSA_TAG_LAST)
+		return ERR_PTR(-EINVAL);
+	ops = dsa_device_ops[tag_protocol];
+
+	if (!ops)
+		return ERR_PTR(-ENOPROTOOPT);
+
+	return ops;
+}
+
+int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds)
+{
+	struct net_device *master;
+	struct ethtool_ops *cpu_ops;
+
+	master = ds->dst->master_netdev;
+	if (ds->master_netdev)
+		master = ds->master_netdev;
+
+	cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
+	if (!cpu_ops)
+		return -ENOMEM;
+
+	memcpy(&ds->dst->master_ethtool_ops, master->ethtool_ops,
+	       sizeof(struct ethtool_ops));
+	ds->dst->master_orig_ethtool_ops = master->ethtool_ops;
+	memcpy(cpu_ops, &ds->dst->master_ethtool_ops,
+	       sizeof(struct ethtool_ops));
+	dsa_cpu_port_ethtool_init(cpu_ops);
+	master->ethtool_ops = cpu_ops;
 
-			genphy_config_init(phydev);
-			genphy_read_status(phydev);
-			if (ds->drv->adjust_link)
-				ds->drv->adjust_link(ds, port, phydev);
-		}
-	}
 	return 0;
 }
 
+void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
+{
+	struct net_device *master;
+
+	master = ds->dst->master_netdev;
+	if (ds->master_netdev)
+		master = ds->master_netdev;
+
+	master->ethtool_ops = ds->dst->master_orig_ethtool_ops;
+}
+
 static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 {
 	struct dsa_switch_driver *drv = ds->drv;
@@ -243,6 +329,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 			}
 			dst->cpu_switch = index;
 			dst->cpu_port = i;
+			ds->cpu_port_mask |= 1 << i;
 		} else if (!strcmp(name, "dsa")) {
 			ds->dsa_port_mask |= 1 << i;
 		} else {
@@ -267,37 +354,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 	 * switch.
 	 */
 	if (dst->cpu_switch == index) {
-		switch (drv->tag_protocol) {
-#ifdef CONFIG_NET_DSA_TAG_DSA
-		case DSA_TAG_PROTO_DSA:
-			dst->rcv = dsa_netdev_ops.rcv;
-			break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-		case DSA_TAG_PROTO_EDSA:
-			dst->rcv = edsa_netdev_ops.rcv;
-			break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-		case DSA_TAG_PROTO_TRAILER:
-			dst->rcv = trailer_netdev_ops.rcv;
-			break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
-		case DSA_TAG_PROTO_BRCM:
-			dst->rcv = brcm_netdev_ops.rcv;
-			break;
-#endif
-		case DSA_TAG_PROTO_NONE:
-			break;
-		default:
-			ret = -ENOPROTOOPT;
+		dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol);
+		if (IS_ERR(dst->tag_ops)) {
+			ret = PTR_ERR(dst->tag_ops);
 			goto out;
 		}
 
-		dst->tag_protocol = drv->tag_protocol;
+		dst->rcv = dst->tag_ops->rcv;
 	}
 
+	memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
+
 	/*
 	 * Do basic register setup.
 	 */
@@ -309,22 +376,25 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 	if (ret < 0)
 		goto out;
 
-	ds->slave_mii_bus = devm_mdiobus_alloc(parent);
-	if (ds->slave_mii_bus == NULL) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	dsa_slave_mii_bus_init(ds);
-
-	ret = mdiobus_register(ds->slave_mii_bus);
-	if (ret < 0)
-		goto out;
+	if (!ds->slave_mii_bus && drv->phy_read) {
+		ds->slave_mii_bus = devm_mdiobus_alloc(parent);
+		if (!ds->slave_mii_bus) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		dsa_slave_mii_bus_init(ds);
 
+		ret = mdiobus_register(ds->slave_mii_bus);
+		if (ret < 0)
+			goto out;
+	}
 
 	/*
 	 * Create network devices for physical switch ports.
 	 */
 	for (i = 0; i < DSA_MAX_PORTS; i++) {
+		ds->ports[i].dn = cd->port_dn[i];
+
 		if (!(ds->enabled_port_mask & (1 << i)))
 			continue;
 
@@ -337,13 +407,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 	}
 
 	/* Perform configuration of the CPU and DSA ports */
-	ret = dsa_cpu_dsa_setup(ds, dst->master_netdev);
+	ret = dsa_cpu_dsa_setups(ds, parent);
 	if (ret < 0) {
 		netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
 			   index);
 		ret = 0;
 	}
 
+	ret = dsa_cpu_port_ethtool_setup(ds);
+	if (ret)
+		return ret;
+
 #ifdef CONFIG_NET_DSA_HWMON
 	/* If the switch provides a temperature sensor,
 	 * register with hardware monitoring subsystem.
@@ -420,11 +494,21 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	return ds;
 }
 
-static void dsa_switch_destroy(struct dsa_switch *ds)
+void dsa_cpu_dsa_destroy(struct device_node *port_dn)
 {
-	struct device_node *port_dn;
 	struct phy_device *phydev;
-	struct dsa_chip_data *cd = ds->cd;
+
+	if (of_phy_is_fixed_link(port_dn)) {
+		phydev = of_phy_find_device(port_dn);
+		if (phydev) {
+			phy_device_free(phydev);
+			fixed_phy_unregister(phydev);
+		}
+	}
+}
+
+static void dsa_switch_destroy(struct dsa_switch *ds)
+{
 	int port;
 
 #ifdef CONFIG_NET_DSA_HWMON
@@ -437,26 +521,25 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 		if (!(ds->enabled_port_mask & (1 << port)))
 			continue;
 
-		if (!ds->ports[port])
+		if (!ds->ports[port].netdev)
 			continue;
 
-		dsa_slave_destroy(ds->ports[port]);
+		dsa_slave_destroy(ds->ports[port].netdev);
 	}
 
-	/* Remove any fixed link PHYs */
+	/* Disable configuration of the CPU and DSA ports */
 	for (port = 0; port < DSA_MAX_PORTS; port++) {
-		port_dn = cd->port_dn[port];
-		if (of_phy_is_fixed_link(port_dn)) {
-			phydev = of_phy_find_device(port_dn);
-			if (phydev) {
-				phy_device_free(phydev);
-				of_node_put(port_dn);
-				fixed_phy_unregister(phydev);
-			}
-		}
+		if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
+			continue;
+		dsa_cpu_dsa_destroy(ds->ports[port].dn);
+
+		/* Clearing a bit which is not set does no harm */
+		ds->cpu_port_mask |= ~(1 << port);
+		ds->dsa_port_mask |= ~(1 << port);
 	}
 
-	mdiobus_unregister(ds->slave_mii_bus);
+	if (ds->slave_mii_bus && ds->drv->phy_read)
+		mdiobus_unregister(ds->slave_mii_bus);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -469,7 +552,7 @@ static int dsa_switch_suspend(struct dsa_switch *ds)
 		if (!dsa_is_port_initialized(ds, i))
 			continue;
 
-		ret = dsa_slave_suspend(ds->ports[i]);
+		ret = dsa_slave_suspend(ds->ports[i].netdev);
 		if (ret)
 			return ret;
 	}
@@ -495,7 +578,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
 		if (!dsa_is_port_initialized(ds, i))
 			continue;
 
-		ret = dsa_slave_resume(ds->ports[i]);
+		ret = dsa_slave_resume(ds->ports[i].netdev);
 		if (ret)
 			return ret;
 	}
@@ -587,17 +670,6 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
 	if (link_sw_addr >= pd->nr_chips)
 		return -EINVAL;
 
-	/* First time routing table allocation */
-	if (!cd->rtable) {
-		cd->rtable = kmalloc_array(pd->nr_chips, sizeof(s8),
-					   GFP_KERNEL);
-		if (!cd->rtable)
-			return -ENOMEM;
-
-		/* default to no valid uplink/downlink */
-		memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
-	}
-
 	cd->rtable[link_sw_addr] = port_index;
 
 	return 0;
@@ -639,7 +711,6 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
 			kfree(pd->chip[i].port_names[port_index]);
 			port_index++;
 		}
-		kfree(pd->chip[i].rtable);
 
 		/* Drop our reference to the MDIO bus device */
 		if (pd->chip[i].host_dev)
@@ -703,11 +774,17 @@ static int dsa_of_probe(struct device *dev)
 
 	chip_index = -1;
 	for_each_available_child_of_node(np, child) {
+		int i;
+
 		chip_index++;
 		cd = &pd->chip[chip_index];
 
 		cd->of_node = child;
 
+		/* Initialize the routing table */
+		for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+			cd->rtable[i] = DSA_RTABLE_NONE;
+
 		/* When assigning the host device, increment its refcount */
 		cd->host_dev = get_device(&mdio_bus->dev);
 
@@ -931,6 +1008,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
 			dsa_switch_destroy(ds);
 	}
 
+	dsa_cpu_port_ethtool_restore(dst->ds[0]);
+
 	dev_put(dst->master_netdev);
 }
 
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
new file mode 100644
index 000000000000..f30bad9678f0
--- /dev/null
+++ b/net/dsa/dsa2.c
@@ -0,0 +1,695 @@
+/*
+ * net/dsa/dsa2.c - Hardware switch handling, binding version 2
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <net/dsa.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include "dsa_priv.h"
+
+static LIST_HEAD(dsa_switch_trees);
+static DEFINE_MUTEX(dsa2_mutex);
+
+static struct dsa_switch_tree *dsa_get_dst(u32 tree)
+{
+	struct dsa_switch_tree *dst;
+
+	list_for_each_entry(dst, &dsa_switch_trees, list)
+		if (dst->tree == tree)
+			return dst;
+	return NULL;
+}
+
+static void dsa_free_dst(struct kref *ref)
+{
+	struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree,
+						   refcount);
+
+	list_del(&dst->list);
+	kfree(dst);
+}
+
+static void dsa_put_dst(struct dsa_switch_tree *dst)
+{
+	kref_put(&dst->refcount, dsa_free_dst);
+}
+
+static struct dsa_switch_tree *dsa_add_dst(u32 tree)
+{
+	struct dsa_switch_tree *dst;
+
+	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+	if (!dst)
+		return NULL;
+	dst->tree = tree;
+	dst->cpu_switch = -1;
+	INIT_LIST_HEAD(&dst->list);
+	list_add_tail(&dsa_switch_trees, &dst->list);
+	kref_init(&dst->refcount);
+
+	return dst;
+}
+
+static void dsa_dst_add_ds(struct dsa_switch_tree *dst,
+			   struct dsa_switch *ds, u32 index)
+{
+	kref_get(&dst->refcount);
+	dst->ds[index] = ds;
+}
+
+static void dsa_dst_del_ds(struct dsa_switch_tree *dst,
+			   struct dsa_switch *ds, u32 index)
+{
+	dst->ds[index] = NULL;
+	kref_put(&dst->refcount, dsa_free_dst);
+}
+
+static bool dsa_port_is_dsa(struct device_node *port)
+{
+	const char *name;
+
+	name = of_get_property(port, "label", NULL);
+	if (!name)
+		return false;
+
+	if (!strcmp(name, "dsa"))
+		return true;
+
+	return false;
+}
+
+static bool dsa_port_is_cpu(struct device_node *port)
+{
+	const char *name;
+
+	name = of_get_property(port, "label", NULL);
+	if (!name)
+		return false;
+
+	if (!strcmp(name, "cpu"))
+		return true;
+
+	return false;
+}
+
+static bool dsa_ds_find_port(struct dsa_switch *ds,
+			     struct device_node *port)
+{
+	u32 index;
+
+	for (index = 0; index < DSA_MAX_PORTS; index++)
+		if (ds->ports[index].dn == port)
+			return true;
+	return false;
+}
+
+static struct dsa_switch *dsa_dst_find_port(struct dsa_switch_tree *dst,
+					    struct device_node *port)
+{
+	struct dsa_switch *ds;
+	u32 index;
+
+	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+		ds = dst->ds[index];
+		if (!ds)
+			continue;
+
+		if (dsa_ds_find_port(ds, port))
+			return ds;
+	}
+
+	return NULL;
+}
+
+static int dsa_port_complete(struct dsa_switch_tree *dst,
+			     struct dsa_switch *src_ds,
+			     struct device_node *port,
+			     u32 src_port)
+{
+	struct device_node *link;
+	int index;
+	struct dsa_switch *dst_ds;
+
+	for (index = 0;; index++) {
+		link = of_parse_phandle(port, "link", index);
+		if (!link)
+			break;
+
+		dst_ds = dsa_dst_find_port(dst, link);
+		of_node_put(link);
+
+		if (!dst_ds)
+			return 1;
+
+		src_ds->rtable[dst_ds->index] = src_port;
+	}
+
+	return 0;
+}
+
+/* A switch is complete if all the DSA ports phandles point to ports
+ * known in the tree. A return value of 1 means the tree is not
+ * complete. This is not an error condition. A value of 0 is
+ * success.
+ */
+static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+	struct device_node *port;
+	u32 index;
+	int err;
+
+	for (index = 0; index < DSA_MAX_PORTS; index++) {
+		port = ds->ports[index].dn;
+		if (!port)
+			continue;
+
+		if (!dsa_port_is_dsa(port))
+			continue;
+
+		err = dsa_port_complete(dst, ds, port, index);
+		if (err != 0)
+			return err;
+
+		ds->dsa_port_mask |= BIT(index);
+	}
+
+	return 0;
+}
+
+/* A tree is complete if all the DSA ports phandles point to ports
+ * known in the tree. A return value of 1 means the tree is not
+ * complete. This is not an error condition. A value of 0 is
+ * success.
+ */
+static int dsa_dst_complete(struct dsa_switch_tree *dst)
+{
+	struct dsa_switch *ds;
+	u32 index;
+	int err;
+
+	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+		ds = dst->ds[index];
+		if (!ds)
+			continue;
+
+		err = dsa_ds_complete(dst, ds);
+		if (err != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int dsa_dsa_port_apply(struct device_node *port, u32 index,
+			      struct dsa_switch *ds)
+{
+	int err;
+
+	err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+	if (err) {
+		dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
+			 index, err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void dsa_dsa_port_unapply(struct device_node *port, u32 index,
+				 struct dsa_switch *ds)
+{
+	dsa_cpu_dsa_destroy(port);
+}
+
+static int dsa_cpu_port_apply(struct device_node *port, u32 index,
+			      struct dsa_switch *ds)
+{
+	int err;
+
+	err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+	if (err) {
+		dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
+			 index, err);
+		return err;
+	}
+
+	ds->cpu_port_mask |= BIT(index);
+
+	return 0;
+}
+
+static void dsa_cpu_port_unapply(struct device_node *port, u32 index,
+				 struct dsa_switch *ds)
+{
+	dsa_cpu_dsa_destroy(port);
+	ds->cpu_port_mask &= ~BIT(index);
+
+}
+
+static int dsa_user_port_apply(struct device_node *port, u32 index,
+			       struct dsa_switch *ds)
+{
+	const char *name;
+	int err;
+
+	name = of_get_property(port, "label", NULL);
+
+	err = dsa_slave_create(ds, ds->dev, index, name);
+	if (err) {
+		dev_warn(ds->dev, "Failed to create slave %d: %d\n",
+			 index, err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void dsa_user_port_unapply(struct device_node *port, u32 index,
+				  struct dsa_switch *ds)
+{
+	if (ds->ports[index].netdev) {
+		dsa_slave_destroy(ds->ports[index].netdev);
+		ds->ports[index].netdev = NULL;
+		ds->enabled_port_mask &= ~(1 << index);
+	}
+}
+
+static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+	struct device_node *port;
+	u32 index;
+	int err;
+
+	/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
+	 * driver and before drv->setup() has run, since the switch drivers and
+	 * the slave MDIO bus driver rely on these values for probing PHY
+	 * devices or not
+	 */
+	ds->phys_mii_mask = ds->enabled_port_mask;
+
+	err = ds->drv->setup(ds);
+	if (err < 0)
+		return err;
+
+	err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+	if (err < 0)
+		return err;
+
+	err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+	if (err < 0)
+		return err;
+
+	if (!ds->slave_mii_bus && ds->drv->phy_read) {
+		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+		if (!ds->slave_mii_bus)
+			return -ENOMEM;
+
+		dsa_slave_mii_bus_init(ds);
+
+		err = mdiobus_register(ds->slave_mii_bus);
+		if (err < 0)
+			return err;
+	}
+
+	for (index = 0; index < DSA_MAX_PORTS; index++) {
+		port = ds->ports[index].dn;
+		if (!port)
+			continue;
+
+		if (dsa_port_is_dsa(port)) {
+			err = dsa_dsa_port_apply(port, index, ds);
+			if (err)
+				return err;
+			continue;
+		}
+
+		if (dsa_port_is_cpu(port)) {
+			err = dsa_cpu_port_apply(port, index, ds);
+			if (err)
+				return err;
+			continue;
+		}
+
+		err = dsa_user_port_apply(port, index, ds);
+		if (err)
+			continue;
+	}
+
+	return 0;
+}
+
+static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+	struct device_node *port;
+	u32 index;
+
+	for (index = 0; index < DSA_MAX_PORTS; index++) {
+		port = ds->ports[index].dn;
+		if (!port)
+			continue;
+
+		if (dsa_port_is_dsa(port)) {
+			dsa_dsa_port_unapply(port, index, ds);
+			continue;
+		}
+
+		if (dsa_port_is_cpu(port)) {
+			dsa_cpu_port_unapply(port, index, ds);
+			continue;
+		}
+
+		dsa_user_port_unapply(port, index, ds);
+	}
+
+	if (ds->slave_mii_bus && ds->drv->phy_read)
+		mdiobus_unregister(ds->slave_mii_bus);
+}
+
+static int dsa_dst_apply(struct dsa_switch_tree *dst)
+{
+	struct dsa_switch *ds;
+	u32 index;
+	int err;
+
+	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+		ds = dst->ds[index];
+		if (!ds)
+			continue;
+
+		err = dsa_ds_apply(dst, ds);
+		if (err)
+			return err;
+	}
+
+	err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
+	if (err)
+		return err;
+
+	/* If we use a tagging format that doesn't have an ethertype
+	 * field, make sure that all packets from this point on get
+	 * sent to the tag format's receive function.
+	 */
+	wmb();
+	dst->master_netdev->dsa_ptr = (void *)dst;
+	dst->applied = true;
+
+	return 0;
+}
+
+static void dsa_dst_unapply(struct dsa_switch_tree *dst)
+{
+	struct dsa_switch *ds;
+	u32 index;
+
+	if (!dst->applied)
+		return;
+
+	dst->master_netdev->dsa_ptr = NULL;
+
+	/* If we used a tagging format that doesn't have an ethertype
+	 * field, make sure that all packets from this point get sent
+	 * without the tag and go through the regular receive path.
+	 */
+	wmb();
+
+	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+		ds = dst->ds[index];
+		if (!ds)
+			continue;
+
+		dsa_ds_unapply(dst, ds);
+	}
+
+	dsa_cpu_port_ethtool_restore(dst->ds[0]);
+
+	pr_info("DSA: tree %d unapplied\n", dst->tree);
+	dst->applied = false;
+}
+
+static int dsa_cpu_parse(struct device_node *port, u32 index,
+			 struct dsa_switch_tree *dst,
+			 struct dsa_switch *ds)
+{
+	struct net_device *ethernet_dev;
+	struct device_node *ethernet;
+
+	ethernet = of_parse_phandle(port, "ethernet", 0);
+	if (!ethernet)
+		return -EINVAL;
+
+	ethernet_dev = of_find_net_device_by_node(ethernet);
+	if (!ethernet_dev)
+		return -EPROBE_DEFER;
+
+	if (!ds->master_netdev)
+		ds->master_netdev = ethernet_dev;
+
+	if (!dst->master_netdev)
+		dst->master_netdev = ethernet_dev;
+
+	if (dst->cpu_switch == -1) {
+		dst->cpu_switch = ds->index;
+		dst->cpu_port = index;
+	}
+
+	dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol);
+	if (IS_ERR(dst->tag_ops)) {
+		dev_warn(ds->dev, "No tagger for this switch\n");
+		return PTR_ERR(dst->tag_ops);
+	}
+
+	dst->rcv = dst->tag_ops->rcv;
+
+	return 0;
+}
+
+static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+	struct device_node *port;
+	u32 index;
+	int err;
+
+	for (index = 0; index < DSA_MAX_PORTS; index++) {
+		port = ds->ports[index].dn;
+		if (!port)
+			continue;
+
+		if (dsa_port_is_cpu(port)) {
+			err = dsa_cpu_parse(port, index, dst, ds);
+			if (err)
+				return err;
+		}
+	}
+
+	pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index);
+
+	return 0;
+}
+
+static int dsa_dst_parse(struct dsa_switch_tree *dst)
+{
+	struct dsa_switch *ds;
+	u32 index;
+	int err;
+
+	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+		ds = dst->ds[index];
+		if (!ds)
+			continue;
+
+		err = dsa_ds_parse(dst, ds);
+		if (err)
+			return err;
+	}
+
+	if (!dst->master_netdev) {
+		pr_warn("Tree has no master device\n");
+		return -EINVAL;
+	}
+
+	pr_info("DSA: tree %d parsed\n", dst->tree);
+
+	return 0;
+}
+
+static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
+{
+	struct device_node *port;
+	int err;
+	u32 reg;
+
+	for_each_available_child_of_node(ports, port) {
+		err = of_property_read_u32(port, "reg", &reg);
+		if (err)
+			return err;
+
+		if (reg >= DSA_MAX_PORTS)
+			return -EINVAL;
+
+		ds->ports[reg].dn = port;
+
+		/* Initialize enabled_port_mask now for drv->setup()
+		 * to have access to a correct value, just like what
+		 * net/dsa/dsa.c::dsa_switch_setup_one does.
+		 */
+		if (!dsa_port_is_cpu(port))
+			ds->enabled_port_mask |= 1 << reg;
+	}
+
+	return 0;
+}
+
+static int dsa_parse_member(struct device_node *np, u32 *tree, u32 *index)
+{
+	int err;
+
+	*tree = *index = 0;
+
+	err = of_property_read_u32_index(np, "dsa,member", 0, tree);
+	if (err) {
+		/* Does not exist, but it is optional */
+		if (err == -EINVAL)
+			return 0;
+		return err;
+	}
+
+	err = of_property_read_u32_index(np, "dsa,member", 1, index);
+	if (err)
+		return err;
+
+	if (*index >= DSA_MAX_SWITCHES)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct device_node *dsa_get_ports(struct dsa_switch *ds,
+					 struct device_node *np)
+{
+	struct device_node *ports;
+
+	ports = of_get_child_by_name(np, "ports");
+	if (!ports) {
+		dev_err(ds->dev, "no ports child node found\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return ports;
+}
+
+static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
+{
+	struct device_node *ports = dsa_get_ports(ds, np);
+	struct dsa_switch_tree *dst;
+	u32 tree, index;
+	int i, err;
+
+	err = dsa_parse_member(np, &tree, &index);
+	if (err)
+		return err;
+
+	if (IS_ERR(ports))
+		return PTR_ERR(ports);
+
+	err = dsa_parse_ports_dn(ports, ds);
+	if (err)
+		return err;
+
+	dst = dsa_get_dst(tree);
+	if (!dst) {
+		dst = dsa_add_dst(tree);
+		if (!dst)
+			return -ENOMEM;
+	}
+
+	if (dst->ds[index]) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	ds->dst = dst;
+	ds->index = index;
+
+	/* Initialize the routing table */
+	for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+		ds->rtable[i] = DSA_RTABLE_NONE;
+
+	dsa_dst_add_ds(dst, ds, index);
+
+	err = dsa_dst_complete(dst);
+	if (err < 0)
+		goto out_del_dst;
+
+	if (err == 1) {
+		/* Not all switches registered yet */
+		err = 0;
+		goto out;
+	}
+
+	if (dst->applied) {
+		pr_info("DSA: Disjoint trees?\n");
+		return -EINVAL;
+	}
+
+	err = dsa_dst_parse(dst);
+	if (err)
+		goto out_del_dst;
+
+	err = dsa_dst_apply(dst);
+	if (err) {
+		dsa_dst_unapply(dst);
+		goto out_del_dst;
+	}
+
+	dsa_put_dst(dst);
+	return 0;
+
+out_del_dst:
+	dsa_dst_del_ds(dst, ds, ds->index);
+out:
+	dsa_put_dst(dst);
+
+	return err;
+}
+
+int dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
+{
+	int err;
+
+	mutex_lock(&dsa2_mutex);
+	err = _dsa_register_switch(ds, np);
+	mutex_unlock(&dsa2_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(dsa_register_switch);
+
+static void _dsa_unregister_switch(struct dsa_switch *ds)
+{
+	struct dsa_switch_tree *dst = ds->dst;
+
+	dsa_dst_unapply(dst);
+
+	dsa_dst_del_ds(dst, ds, ds->index);
+}
+
+void dsa_unregister_switch(struct dsa_switch *ds)
+{
+	mutex_lock(&dsa2_mutex);
+	_dsa_unregister_switch(ds);
+	mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index dfa33779d49c..00077a9c97f4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -50,12 +50,19 @@ struct dsa_slave_priv {
 
 /* dsa.c */
 extern char dsa_driver_version[];
+int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
+		      struct device_node *port_dn, int port);
+void dsa_cpu_dsa_destroy(struct device_node *port_dn);
+const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
+int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds);
+void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
+void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops);
 int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
-		     int port, char *name);
+		     int port, const char *name);
 void dsa_slave_destroy(struct net_device *slave_dev);
 int dsa_slave_suspend(struct net_device *slave_dev);
 int dsa_slave_resume(struct net_device *slave_dev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 152436cdab30..fc9196745225 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -49,8 +49,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
 	ds->slave_mii_bus->name = "dsa slave smi";
 	ds->slave_mii_bus->read = dsa_slave_phy_read;
 	ds->slave_mii_bus->write = dsa_slave_phy_write;
-	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
-			ds->index, ds->cd->sw_addr);
+	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
+		 ds->dst->tree, ds->index);
 	ds->slave_mii_bus->parent = ds->dev;
 	ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
 }
@@ -333,6 +333,44 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
 	return 0;
 }
 
+static int dsa_fastest_ageing_time(struct dsa_switch *ds,
+				   unsigned int ageing_time)
+{
+	int i;
+
+	for (i = 0; i < DSA_MAX_PORTS; ++i) {
+		struct dsa_port *dp = &ds->ports[i];
+
+		if (dp && dp->ageing_time && dp->ageing_time < ageing_time)
+			ageing_time = dp->ageing_time;
+	}
+
+	return ageing_time;
+}
+
+static int dsa_slave_ageing_time(struct net_device *dev,
+				 const struct switchdev_attr *attr,
+				 struct switchdev_trans *trans)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
+	unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
+
+	/* bridge skips -EOPNOTSUPP, so skip the prepare phase */
+	if (switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	/* Keep the fastest ageing time in case of multiple bridges */
+	ds->ports[p->port].ageing_time = ageing_time;
+	ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
+
+	if (ds->drv->set_ageing_time)
+		return ds->drv->set_ageing_time(ds, ageing_time);
+
+	return 0;
+}
+
 static int dsa_slave_port_attr_set(struct net_device *dev,
 				   const struct switchdev_attr *attr,
 				   struct switchdev_trans *trans)
@@ -346,6 +384,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
 	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
 		ret = dsa_slave_vlan_filtering(dev, attr, trans);
 		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+		ret = dsa_slave_ageing_time(dev, attr, trans);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -522,14 +563,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
-					    struct net_device *dev)
-{
-	/* Just return the original SKB */
-	return skb;
-}
-
-
 /* ethtool operations *******************************************************/
 static int
 dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
@@ -615,7 +648,7 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->cd->eeprom_len)
+	if (ds->cd && ds->cd->eeprom_len)
 		return ds->cd->eeprom_len;
 
 	if (ds->drv->get_eeprom_len)
@@ -873,6 +906,13 @@ static void dsa_slave_poll_controller(struct net_device *dev)
 }
 #endif
 
+void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
+{
+	ops->get_sset_count = dsa_cpu_port_get_sset_count;
+	ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
+	ops->get_strings = dsa_cpu_port_get_strings;
+}
+
 static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_settings		= dsa_slave_get_settings,
 	.set_settings		= dsa_slave_set_settings,
@@ -893,8 +933,6 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_eee		= dsa_slave_get_eee,
 };
 
-static struct ethtool_ops dsa_cpu_port_ethtool_ops;
-
 static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_open	 	= dsa_slave_open,
 	.ndo_stop		= dsa_slave_close,
@@ -999,13 +1037,12 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 				struct net_device *slave_dev)
 {
 	struct dsa_switch *ds = p->parent;
-	struct dsa_chip_data *cd = ds->cd;
 	struct device_node *phy_dn, *port_dn;
 	bool phy_is_fixed = false;
 	u32 phy_flags = 0;
 	int mode, ret;
 
-	port_dn = cd->port_dn[p->port];
+	port_dn = ds->ports[p->port].dn;
 	mode = of_get_phy_mode(port_dn);
 	if (mode < 0)
 		mode = PHY_INTERFACE_MODE_NA;
@@ -1109,14 +1146,18 @@ int dsa_slave_resume(struct net_device *slave_dev)
 }
 
 int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
-		     int port, char *name)
+		     int port, const char *name)
 {
-	struct net_device *master = ds->dst->master_netdev;
 	struct dsa_switch_tree *dst = ds->dst;
+	struct net_device *master;
 	struct net_device *slave_dev;
 	struct dsa_slave_priv *p;
 	int ret;
 
+	master = ds->dst->master_netdev;
+	if (ds->master_netdev)
+		master = ds->master_netdev;
+
 	slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
 				 NET_NAME_UNKNOWN, ether_setup);
 	if (slave_dev == NULL)
@@ -1124,19 +1165,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	slave_dev->features = master->vlan_features;
 	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
-	if (master->ethtool_ops != &dsa_cpu_port_ethtool_ops) {
-		memcpy(&dst->master_ethtool_ops, master->ethtool_ops,
-		       sizeof(struct ethtool_ops));
-		memcpy(&dsa_cpu_port_ethtool_ops, &dst->master_ethtool_ops,
-		       sizeof(struct ethtool_ops));
-		dsa_cpu_port_ethtool_ops.get_sset_count =
-					dsa_cpu_port_get_sset_count;
-		dsa_cpu_port_ethtool_ops.get_ethtool_stats =
-					dsa_cpu_port_get_ethtool_stats;
-		dsa_cpu_port_ethtool_ops.get_strings =
-					dsa_cpu_port_get_strings;
-		master->ethtool_ops = &dsa_cpu_port_ethtool_ops;
-	}
 	eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->priv_flags |= IFF_NO_QUEUE;
 	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
@@ -1147,49 +1175,24 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 				 NULL);
 
 	SET_NETDEV_DEV(slave_dev, parent);
-	slave_dev->dev.of_node = ds->cd->port_dn[port];
+	slave_dev->dev.of_node = ds->ports[port].dn;
 	slave_dev->vlan_features = master->vlan_features;
 
 	p = netdev_priv(slave_dev);
 	p->parent = ds;
 	p->port = port;
-
-	switch (ds->dst->tag_protocol) {
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	case DSA_TAG_PROTO_DSA:
-		p->xmit = dsa_netdev_ops.xmit;
-		break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-	case DSA_TAG_PROTO_EDSA:
-		p->xmit = edsa_netdev_ops.xmit;
-		break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	case DSA_TAG_PROTO_TRAILER:
-		p->xmit = trailer_netdev_ops.xmit;
-		break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
-	case DSA_TAG_PROTO_BRCM:
-		p->xmit = brcm_netdev_ops.xmit;
-		break;
-#endif
-	default:
-		p->xmit	= dsa_slave_notag_xmit;
-		break;
-	}
+	p->xmit = dst->tag_ops->xmit;
 
 	p->old_pause = -1;
 	p->old_link = -1;
 	p->old_duplex = -1;
 
-	ds->ports[port] = slave_dev;
+	ds->ports[port].netdev = slave_dev;
 	ret = register_netdev(slave_dev);
 	if (ret) {
 		netdev_err(master, "error %d registering interface %s\n",
 			   ret, slave_dev->name);
-		ds->ports[port] = NULL;
+		ds->ports[port].netdev = NULL;
 		free_netdev(slave_dev);
 		return ret;
 	}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e2aadb73111d..21bffde6e4bf 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -127,7 +127,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
 
 	/* Validate port against switch setup, either the port is totally */
-	if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+	if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
 		goto out_drop;
 
 	/* Remove Broadcom tag and update checksum */
@@ -140,7 +140,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	skb_push(skb, ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
-	skb->dev = ds->ports[source_port];
+	skb->dev = ds->ports[source_port].netdev;
 	skb->protocol = eth_type_trans(skb, skb->dev);
 
 	skb->dev->stats.rx_packets++;
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index aa780e4ac0bd..bce79ffe342b 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -107,10 +107,14 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * Check that the source device exists and that the source
 	 * port is a registered DSA port.
 	 */
-	if (source_device >= dst->pd->nr_chips)
+	if (source_device >= DSA_MAX_SWITCHES)
 		goto out_drop;
+
 	ds = dst->ds[source_device];
-	if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+	if (!ds)
+		goto out_drop;
+
+	if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
 		goto out_drop;
 
 	/*
@@ -159,7 +163,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
 			2 * ETH_ALEN);
 	}
 
-	skb->dev = ds->ports[source_port];
+	skb->dev = ds->ports[source_port].netdev;
 	skb_push(skb, ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 2288c8098c42..6c1720e88537 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -120,10 +120,14 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * Check that the source device exists and that the source
 	 * port is a registered DSA port.
 	 */
-	if (source_device >= dst->pd->nr_chips)
+	if (source_device >= DSA_MAX_SWITCHES)
 		goto out_drop;
+
 	ds = dst->ds[source_device];
-	if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+	if (!ds)
+		goto out_drop;
+
+	if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
 		goto out_drop;
 
 	/*
@@ -178,7 +182,7 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
 			2 * ETH_ALEN);
 	}
 
-	skb->dev = ds->ports[source_port];
+	skb->dev = ds->ports[source_port].netdev;
 	skb_push(skb, ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b6ca0890d018..5e3903eb1afa 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -82,12 +82,12 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto out_drop;
 
 	source_port = trailer[1] & 7;
-	if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+	if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
 		goto out_drop;
 
 	pskb_trim_rcsum(skb, skb->len - 4);
 
-	skb->dev = ds->ports[source_port];
+	skb->dev = ds->ports[source_port].netdev;
 	skb_push(skb, ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index dd085db8580e..d7efbf0dad20 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -58,21 +58,10 @@ static struct header_ops lowpan_header_ops = {
 	.create	= lowpan_header_create,
 };
 
-static struct lock_class_key lowpan_tx_busylock;
-static struct lock_class_key lowpan_netdev_xmit_lock_key;
-
-static void lowpan_set_lockdep_class_one(struct net_device *ldev,
-					 struct netdev_queue *txq,
-					 void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock,
-			  &lowpan_netdev_xmit_lock_key);
-}
-
 static int lowpan_dev_init(struct net_device *ldev)
 {
-	netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL);
-	ldev->qdisc_tx_busylock = &lowpan_tx_busylock;
+	netdev_lockdep_set_classes(ldev);
+
 	return 0;
 }
 
@@ -92,11 +81,21 @@ static int lowpan_stop(struct net_device *dev)
 	return 0;
 }
 
+static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
+{
+	struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
+
+	/* default no short_addr is available for a neighbour */
+	neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+	return 0;
+}
+
 static const struct net_device_ops lowpan_netdev_ops = {
 	.ndo_init		= lowpan_dev_init,
 	.ndo_start_xmit		= lowpan_xmit,
 	.ndo_open		= lowpan_open,
 	.ndo_stop		= lowpan_stop,
+	.ndo_neigh_construct    = lowpan_neigh_construct,
 };
 
 static void lowpan_setup(struct net_device *ldev)
@@ -131,8 +130,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
 
 	pr_debug("adding new link\n");
 
-	if (!tb[IFLA_LINK] ||
-	    !net_eq(dev_net(ldev), &init_net))
+	if (!tb[IFLA_LINK])
 		return -EINVAL;
 	/* find and hold wpan device */
 	wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK]));
@@ -161,6 +159,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
 				wdev->needed_headroom;
 	ldev->needed_tailroom = wdev->needed_tailroom;
 
+	ldev->neigh_priv_len = sizeof(struct lowpan_802154_neigh);
+
 	ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154);
 	if (ret < 0) {
 		dev_put(wdev);
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index ef185dd4110d..649e7d45e88f 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -262,7 +262,7 @@ static inline bool lowpan_rx_h_check(struct sk_buff *skb)
 
 	/* check on ieee802154 conform 6LoWPAN header */
 	if (!ieee802154_is_data(fc) ||
-	    !ieee802154_is_intra_pan(fc))
+	    !ieee802154_skb_is_intra_pan_addressing(fc, skb))
 		return false;
 
 	/* check if we can dereference the dispatch */
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index e459afd16bb3..dbb476d7d38f 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -9,6 +9,7 @@
  */
 
 #include <net/6lowpan.h>
+#include <net/ndisc.h>
 #include <net/ieee802154_netdev.h>
 #include <net/mac802154.h>
 
@@ -17,19 +18,9 @@
 #define LOWPAN_FRAG1_HEAD_SIZE	0x4
 #define LOWPAN_FRAGN_HEAD_SIZE	0x5
 
-/* don't save pan id, it's intra pan */
-struct lowpan_addr {
-	u8 mode;
-	union {
-		/* IPv6 needs big endian here */
-		__be64 extended_addr;
-		__be16 short_addr;
-	} u;
-};
-
 struct lowpan_addr_info {
-	struct lowpan_addr daddr;
-	struct lowpan_addr saddr;
+	struct ieee802154_addr daddr;
+	struct ieee802154_addr saddr;
 };
 
 static inline struct
@@ -48,12 +39,14 @@ lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
  * RAW/DGRAM sockets.
  */
 int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
-			 unsigned short type, const void *_daddr,
-			 const void *_saddr, unsigned int len)
+			 unsigned short type, const void *daddr,
+			 const void *saddr, unsigned int len)
 {
-	const u8 *saddr = _saddr;
-	const u8 *daddr = _daddr;
-	struct lowpan_addr_info *info;
+	struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
+	struct lowpan_addr_info *info = lowpan_skb_priv(skb);
+	struct lowpan_802154_neigh *llneigh = NULL;
+	const struct ipv6hdr *hdr = ipv6_hdr(skb);
+	struct neighbour *n;
 
 	/* TODO:
 	 * if this package isn't ipv6 one, where should it be routed?
@@ -61,21 +54,50 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
 	if (type != ETH_P_IPV6)
 		return 0;
 
-	if (!saddr)
-		saddr = ldev->dev_addr;
+	/* intra-pan communication */
+	info->saddr.pan_id = wpan_dev->pan_id;
+	info->daddr.pan_id = info->saddr.pan_id;
 
-	raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
-	raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
+	if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
+		info->daddr.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+		info->daddr.mode = IEEE802154_ADDR_SHORT;
+	} else {
+		__le16 short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+
+		n = neigh_lookup(&nd_tbl, &hdr->daddr, ldev);
+		if (n) {
+			llneigh = lowpan_802154_neigh(neighbour_priv(n));
+			read_lock_bh(&n->lock);
+			short_addr = llneigh->short_addr;
+			read_unlock_bh(&n->lock);
+		}
 
-	info = lowpan_skb_priv(skb);
+		if (llneigh &&
+		    lowpan_802154_is_valid_src_short_addr(short_addr)) {
+			info->daddr.short_addr = short_addr;
+			info->daddr.mode = IEEE802154_ADDR_SHORT;
+		} else {
+			info->daddr.mode = IEEE802154_ADDR_LONG;
+			ieee802154_be64_to_le64(&info->daddr.extended_addr,
+						daddr);
+		}
 
-	/* TODO: Currently we only support extended_addr */
-	info->daddr.mode = IEEE802154_ADDR_LONG;
-	memcpy(&info->daddr.u.extended_addr, daddr,
-	       sizeof(info->daddr.u.extended_addr));
-	info->saddr.mode = IEEE802154_ADDR_LONG;
-	memcpy(&info->saddr.u.extended_addr, saddr,
-	       sizeof(info->daddr.u.extended_addr));
+		if (n)
+			neigh_release(n);
+	}
+
+	if (!saddr) {
+		if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) {
+			info->saddr.mode = IEEE802154_ADDR_SHORT;
+			info->saddr.short_addr = wpan_dev->short_addr;
+		} else {
+			info->saddr.mode = IEEE802154_ADDR_LONG;
+			info->saddr.extended_addr = wpan_dev->extended_addr;
+		}
+	} else {
+		info->saddr.mode = IEEE802154_ADDR_LONG;
+		ieee802154_be64_to_le64(&info->saddr.extended_addr, saddr);
+	}
 
 	return 0;
 }
@@ -209,47 +231,26 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
 			 u16 *dgram_size, u16 *dgram_offset)
 {
 	struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
-	struct ieee802154_addr sa, da;
 	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
 	struct lowpan_addr_info info;
-	void *daddr, *saddr;
 
 	memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
 
-	/* TODO: Currently we only support extended_addr */
-	daddr = &info.daddr.u.extended_addr;
-	saddr = &info.saddr.u.extended_addr;
-
 	*dgram_size = skb->len;
-	lowpan_header_compress(skb, ldev, daddr, saddr);
+	lowpan_header_compress(skb, ldev, &info.daddr, &info.saddr);
 	/* dgram_offset = (saved bytes after compression) + lowpan header len */
 	*dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb);
 
 	cb->type = IEEE802154_FC_TYPE_DATA;
 
-	/* prepare wpan address data */
-	sa.mode = IEEE802154_ADDR_LONG;
-	sa.pan_id = wpan_dev->pan_id;
-	sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
-
-	/* intra-PAN communications */
-	da.pan_id = sa.pan_id;
-
-	/* if the destination address is the broadcast address, use the
-	 * corresponding short address
-	 */
-	if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
-		da.mode = IEEE802154_ADDR_SHORT;
-		da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+	if (info.daddr.mode == IEEE802154_ADDR_SHORT &&
+	    ieee802154_is_broadcast_short_addr(info.daddr.short_addr))
 		cb->ackreq = false;
-	} else {
-		da.mode = IEEE802154_ADDR_LONG;
-		da.extended_addr = ieee802154_devaddr_from_raw(daddr);
+	else
 		cb->ackreq = wpan_dev->ackreq;
-	}
 
-	return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev, &da,
-				    &sa, 0);
+	return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev,
+				    &info.daddr, &info.saddr, 0);
 }
 
 netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index c35fdfa6d04e..cb7176cd4cd6 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -140,6 +140,8 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
 	rdev->wpan_phy.dev.class = &wpan_phy_class;
 	rdev->wpan_phy.dev.platform_data = rdev;
 
+	wpan_phy_net_set(&rdev->wpan_phy, &init_net);
+
 	init_waitqueue_head(&rdev->dev_wait);
 
 	return &rdev->wpan_phy;
@@ -207,6 +209,49 @@ void wpan_phy_free(struct wpan_phy *phy)
 }
 EXPORT_SYMBOL(wpan_phy_free);
 
+int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
+			   struct net *net)
+{
+	struct wpan_dev *wpan_dev;
+	int err = 0;
+
+	list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) {
+		if (!wpan_dev->netdev)
+			continue;
+		wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+		err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
+		if (err)
+			break;
+		wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+	}
+
+	if (err) {
+		/* failed -- clean up to old netns */
+		net = wpan_phy_net(&rdev->wpan_phy);
+
+		list_for_each_entry_continue_reverse(wpan_dev,
+						     &rdev->wpan_dev_list,
+						     list) {
+			if (!wpan_dev->netdev)
+				continue;
+			wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+			err = dev_change_net_namespace(wpan_dev->netdev, net,
+						       "wpan%d");
+			WARN_ON(err);
+			wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+		}
+
+		return err;
+	}
+
+	wpan_phy_net_set(&rdev->wpan_phy, net);
+
+	err = device_rename(&rdev->wpan_phy.dev, dev_name(&rdev->wpan_phy.dev));
+	WARN_ON(err);
+
+	return 0;
+}
+
 void cfg802154_dev_free(struct cfg802154_registered_device *rdev)
 {
 	kfree(rdev);
@@ -286,14 +331,34 @@ static struct notifier_block cfg802154_netdev_notifier = {
 	.notifier_call = cfg802154_netdev_notifier_call,
 };
 
+static void __net_exit cfg802154_pernet_exit(struct net *net)
+{
+	struct cfg802154_registered_device *rdev;
+
+	rtnl_lock();
+	list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
+		if (net_eq(wpan_phy_net(&rdev->wpan_phy), net))
+			WARN_ON(cfg802154_switch_netns(rdev, &init_net));
+	}
+	rtnl_unlock();
+}
+
+static struct pernet_operations cfg802154_pernet_ops = {
+	.exit = cfg802154_pernet_exit,
+};
+
 static int __init wpan_phy_class_init(void)
 {
 	int rc;
 
-	rc = wpan_phy_sysfs_init();
+	rc = register_pernet_device(&cfg802154_pernet_ops);
 	if (rc)
 		goto err;
 
+	rc = wpan_phy_sysfs_init();
+	if (rc)
+		goto err_sysfs;
+
 	rc = register_netdevice_notifier(&cfg802154_netdev_notifier);
 	if (rc)
 		goto err_nl;
@@ -315,6 +380,8 @@ err_notifier:
 	unregister_netdevice_notifier(&cfg802154_netdev_notifier);
 err_nl:
 	wpan_phy_sysfs_exit();
+err_sysfs:
+	unregister_pernet_device(&cfg802154_pernet_ops);
 err:
 	return rc;
 }
@@ -326,6 +393,7 @@ static void __exit wpan_phy_class_exit(void)
 	ieee802154_nl_exit();
 	unregister_netdevice_notifier(&cfg802154_netdev_notifier);
 	wpan_phy_sysfs_exit();
+	unregister_pernet_device(&cfg802154_pernet_ops);
 }
 module_exit(wpan_phy_class_exit);
 
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index 231fade959f3..81141f58d079 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -38,6 +38,8 @@ wpan_phy_to_rdev(struct wpan_phy *wpan_phy)
 extern struct list_head cfg802154_rdev_list;
 extern int cfg802154_rdev_list_generation;
 
+int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
+			   struct net *net);
 /* free object */
 void cfg802154_dev_free(struct cfg802154_registered_device *rdev);
 struct cfg802154_registered_device *
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 116187b5c267..d90a4ed5b8a0 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -80,7 +80,8 @@ __cfg802154_wpan_dev_from_attrs(struct net *netns, struct nlattr **attrs)
 	list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
 		struct wpan_dev *wpan_dev;
 
-		/* TODO netns compare */
+		if (wpan_phy_net(&rdev->wpan_phy) != netns)
+			continue;
 
 		if (have_wpan_dev_id && rdev->wpan_phy_idx != wpan_phy_idx)
 			continue;
@@ -175,7 +176,8 @@ __cfg802154_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
 	if (!rdev)
 		return ERR_PTR(-ENODEV);
 
-	/* TODO netns compare */
+	if (netns != wpan_phy_net(&rdev->wpan_phy))
+		return ERR_PTR(-ENODEV);
 
 	return rdev;
 }
@@ -233,6 +235,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
 
 	[NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 },
 
+	[NL802154_ATTR_PID] = { .type = NLA_U32 },
+	[NL802154_ATTR_NETNS_FD] = { .type = NLA_U32 },
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 	[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
 	[NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
@@ -590,7 +594,6 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
 		struct cfg802154_registered_device *rdev;
 		int ifidx = nla_get_u32(tb[NL802154_ATTR_IFINDEX]);
 
-		/* TODO netns */
 		netdev = __dev_get_by_index(&init_net, ifidx);
 		if (!netdev)
 			return -ENODEV;
@@ -629,7 +632,8 @@ nl802154_dump_wpan_phy(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
-		/* TODO net ns compare */
+		if (!net_eq(wpan_phy_net(&rdev->wpan_phy), sock_net(skb->sk)))
+			continue;
 		if (++idx <= state->start)
 			continue;
 		if (state->filter_wpan_phy != -1 &&
@@ -871,7 +875,8 @@ nl802154_dump_interface(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rtnl_lock();
 	list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
-		/* TODO netns compare */
+		if (!net_eq(wpan_phy_net(&rdev->wpan_phy), sock_net(skb->sk)))
+			continue;
 		if (wp_idx < wp_start) {
 			wp_idx++;
 			continue;
@@ -1271,6 +1276,37 @@ nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_ackreq_default(rdev, wpan_dev, ackreq);
 }
 
+static int nl802154_wpan_phy_netns(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net *net;
+	int err;
+
+	if (info->attrs[NL802154_ATTR_PID]) {
+		u32 pid = nla_get_u32(info->attrs[NL802154_ATTR_PID]);
+
+		net = get_net_ns_by_pid(pid);
+	} else if (info->attrs[NL802154_ATTR_NETNS_FD]) {
+		u32 fd = nla_get_u32(info->attrs[NL802154_ATTR_NETNS_FD]);
+
+		net = get_net_ns_by_fd(fd);
+	} else {
+		return -EINVAL;
+	}
+
+	if (IS_ERR(net))
+		return PTR_ERR(net);
+
+	err = 0;
+
+	/* check if anything to do */
+	if (!net_eq(wpan_phy_net(&rdev->wpan_phy), net))
+		err = cfg802154_switch_netns(rdev, net);
+
+	put_net(net);
+	return err;
+}
+
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
 	[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
@@ -2261,6 +2297,14 @@ static const struct genl_ops nl802154_ops[] = {
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL802154_CMD_SET_WPAN_PHY_NETNS,
+		.doit = nl802154_wpan_phy_netns,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
+				  NL802154_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL802154_CMD_SET_PAN_ID,
 		.doit = nl802154_set_pan_id,
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 238225b0c970..50d6a9b49f6c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -532,6 +532,22 @@ config TCP_CONG_VEGAS
 	window. TCP Vegas should provide less packet loss, but it is
 	not as aggressive as TCP Reno.
 
+config TCP_CONG_NV
+       tristate "TCP NV"
+       default n
+       ---help---
+       TCP NV is a follow up to TCP Vegas. It has been modified to deal with
+       10G networks, measurement noise introduced by LRO, GRO and interrupt
+       coalescence. In addition, it will decrease its cwnd multiplicatively
+       instead of linearly.
+
+       Note that in general congestion avoidance (cwnd decreased when # packets
+       queued grows) cannot coexist with congestion control (cwnd decreased only
+       when there is packet loss) due to fairness issues. One scenario when they
+       can coexist safely is when the CA flows have RTTs << CC flows RTTs.
+
+       For further details see http://www.brakmo.org/networking/tcp-nv/
+
 config TCP_CONG_SCALABLE
 	tristate "Scalable TCP"
 	default n
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bfa133691cde..24629b6f57cc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
 obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
 obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
 obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
+obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d39e9e47a26e..55513e654d79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -73,7 +73,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/kmod.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/string.h>
@@ -1916,6 +1916,3 @@ static int __init ipv4_proc_init(void)
 	return 0;
 }
 #endif /* CONFIG_PROC_FS */
-
-MODULE_ALIAS_NETPROTO(PF_INET);
-
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e333bc86bd39..415e117967c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1834,7 +1834,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
+	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
 	if (!skb)
 		goto errout;
 
@@ -1846,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
 	return;
 errout:
 	if (err < 0)
@@ -1903,7 +1903,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
 	}
 
 	err = -ENOBUFS;
-	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
 	if (!skb)
 		goto errout;
 
@@ -2027,16 +2027,16 @@ static void inet_forward_change(struct net *net)
 
 	for_each_netdev(net, dev) {
 		struct in_device *in_dev;
+
 		if (on)
 			dev_disable_lro(dev);
-		rcu_read_lock();
-		in_dev = __in_dev_get_rcu(dev);
+
+		in_dev = __in_dev_get_rtnl(dev);
 		if (in_dev) {
 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
 						    dev->ifindex, &in_dev->cnf);
 		}
-		rcu_read_unlock();
 	}
 }
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 477937465a20..d95631d09248 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -23,6 +23,11 @@ struct esp_skb_cb {
 	void *tmp;
 };
 
+struct esp_output_extra {
+	__be32 seqhi;
+	u32 esphoff;
+};
+
 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
 
 static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
@@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
  *
  * TODO: Use spare space in skb for this where possible.
  */
-static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
+static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen)
 {
 	unsigned int len;
 
-	len = seqhilen;
+	len = extralen;
 
 	len += crypto_aead_ivsize(aead);
 
@@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
 	return kmalloc(len, GFP_ATOMIC);
 }
 
-static inline __be32 *esp_tmp_seqhi(void *tmp)
+static inline void *esp_tmp_extra(void *tmp)
 {
-	return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+	return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra));
 }
-static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
+
+static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen)
 {
 	return crypto_aead_ivsize(aead) ?
-	       PTR_ALIGN((u8 *)tmp + seqhilen,
-			 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
+	       PTR_ALIGN((u8 *)tmp + extralen,
+			 crypto_aead_alignmask(aead) + 1) : tmp + extralen;
 }
 
 static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
@@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
 {
 	struct ip_esp_hdr *esph = (void *)(skb->data + offset);
 	void *tmp = ESP_SKB_CB(skb)->tmp;
-	__be32 *seqhi = esp_tmp_seqhi(tmp);
+	__be32 *seqhi = esp_tmp_extra(tmp);
 
 	esph->seq_no = esph->spi;
 	esph->spi = *seqhi;
@@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
 
 static void esp_output_restore_header(struct sk_buff *skb)
 {
-	esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+	void *tmp = ESP_SKB_CB(skb)->tmp;
+	struct esp_output_extra *extra = esp_tmp_extra(tmp);
+
+	esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff -
+				sizeof(__be32));
 }
 
 static void esp_output_done_esn(struct crypto_async_request *base, int err)
@@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
 static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
+	struct esp_output_extra *extra;
 	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead;
 	struct aead_request *req;
@@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	int tfclen;
 	int nfrags;
 	int assoclen;
-	int seqhilen;
-	__be32 *seqhi;
+	int extralen;
 	__be64 seqno;
 
 	/* skb is pure payload to encrypt */
@@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	nfrags = err;
 
 	assoclen = sizeof(*esph);
-	seqhilen = 0;
+	extralen = 0;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
-		seqhilen += sizeof(__be32);
-		assoclen += seqhilen;
+		extralen += sizeof(*extra);
+		assoclen += sizeof(__be32);
 	}
 
-	tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
+	tmp = esp_alloc_tmp(aead, nfrags, extralen);
 	if (!tmp) {
 		err = -ENOMEM;
 		goto error;
 	}
 
-	seqhi = esp_tmp_seqhi(tmp);
-	iv = esp_tmp_iv(aead, tmp, seqhilen);
+	extra = esp_tmp_extra(tmp);
+	iv = esp_tmp_iv(aead, tmp, extralen);
 	req = esp_tmp_req(aead, iv);
 	sg = esp_req_sg(aead, req);
 
@@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	 * encryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
-		esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
-		*seqhi = esph->spi;
+		extra->esphoff = (unsigned char *)esph -
+				 skb_transport_header(skb);
+		esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
+		extra->seqhi = esph->spi;
 		esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
 		aead_request_set_callback(req, 0, esp_output_done_esn, skb);
 	}
@@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 
 	ESP_SKB_CB(skb)->tmp = tmp;
-	seqhi = esp_tmp_seqhi(tmp);
+	seqhi = esp_tmp_extra(tmp);
 	iv = esp_tmp_iv(aead, tmp, seqhilen);
 	req = esp_tmp_req(aead, iv);
 	sg = esp_req_sg(aead, req);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f2bda9e89c61..6e9ea69e5f75 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -76,6 +76,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
 {
 	int err = -EAGAIN;
 	struct fib_table *tbl;
+	u32 tb_id;
 
 	switch (rule->action) {
 	case FR_ACT_TO_TBL:
@@ -94,7 +95,8 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
 
 	rcu_read_lock();
 
-	tbl = fib_get_table(rule->fr_net, rule->table);
+	tb_id = fib_rule_get_table(rule, arg);
+	tbl = fib_get_table(rule->fr_net, tb_id);
 	if (tbl)
 		err = fib_table_lookup(tbl, &flp->u.ip4,
 				       (struct fib_result *)arg->result,
@@ -180,7 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	if (err)
 		goto errout;
 
-	if (rule->table == RT_TABLE_UNSPEC) {
+	if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) {
 		if (rule->action == FR_ACT_TO_TBL) {
 			struct fib_table *table;
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d09173bf9500..539fa264e67d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
 
+		if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+			return -EINVAL;
+
 		nexthop_nh->nh_flags =
 			(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
 		nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
@@ -1003,6 +1006,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
 		goto err_inval;
 
+	if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+		goto err_inval;
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (cfg->fc_mp) {
 		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 5f9207c039e7..321d57f825ce 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -129,6 +129,36 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 
+	switch (guehdr->version) {
+	case 0: /* Full GUE header present */
+		break;
+
+	case 1: {
+		/* Direct encasulation of IPv4 or IPv6 */
+
+		int prot;
+
+		switch (((struct iphdr *)guehdr)->version) {
+		case 4:
+			prot = IPPROTO_IPIP;
+			break;
+		case 6:
+			prot = IPPROTO_IPV6;
+			break;
+		default:
+			goto drop;
+		}
+
+		if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
+			goto drop;
+
+		return -prot;
+	}
+
+	default: /* Undefined version */
+		goto drop;
+	}
+
 	optlen = guehdr->hlen << 2;
 	len += optlen;
 
@@ -289,6 +319,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 	int flush = 1;
 	struct fou *fou = fou_from_sock(sk);
 	struct gro_remcsum grc;
+	u8 proto;
 
 	skb_gro_remcsum_init(&grc);
 
@@ -302,6 +333,25 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 			goto out;
 	}
 
+	switch (guehdr->version) {
+	case 0:
+		break;
+	case 1:
+		switch (((struct iphdr *)guehdr)->version) {
+		case 4:
+			proto = IPPROTO_IPIP;
+			break;
+		case 6:
+			proto = IPPROTO_IPV6;
+			break;
+		default:
+			goto out;
+		}
+		goto next_proto;
+	default:
+		goto out;
+	}
+
 	optlen = guehdr->hlen << 2;
 	len += optlen;
 
@@ -370,6 +420,10 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 		}
 	}
 
+	proto = guehdr->proto_ctype;
+
+next_proto:
+
 	/* We can clear the encap_mark for GUE as we are essentially doing
 	 * one of two possible things.  We are either adding an L4 tunnel
 	 * header to the outer L3 tunnel header, or we are are simply
@@ -383,7 +437,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
-	ops = rcu_dereference(offloads[guehdr->proto_ctype]);
+	ops = rcu_dereference(offloads[proto]);
 	if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
 		goto out_unlock;
 
@@ -404,13 +458,30 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
 	const struct net_offload **offloads;
 	struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
 	const struct net_offload *ops;
-	unsigned int guehlen;
+	unsigned int guehlen = 0;
 	u8 proto;
 	int err = -ENOENT;
 
-	proto = guehdr->proto_ctype;
-
-	guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+	switch (guehdr->version) {
+	case 0:
+		proto = guehdr->proto_ctype;
+		guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+		break;
+	case 1:
+		switch (((struct iphdr *)guehdr)->version) {
+		case 4:
+			proto = IPPROTO_IPIP;
+			break;
+		case 6:
+			proto = IPPROTO_IPV6;
+			break;
+		default:
+			return err;
+		}
+		break;
+	default:
+		return err;
+	}
 
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 4c39f4fd332a..b798862b6be5 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol);
 
 /* Fills in tpi and returns header length to be pulled. */
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-		     bool *csum_err, __be16 proto)
+		     bool *csum_err, __be16 proto, int nhs)
 {
 	const struct gre_base_hdr *greh;
 	__be32 *options;
 	int hdr_len;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+	if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr))))
 		return -EINVAL;
 
-	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	greh = (struct gre_base_hdr *)(skb->data + nhs);
 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
 		return -EINVAL;
 
 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
 	hdr_len = gre_calc_hlen(tpi->flags);
 
-	if (!pskb_may_pull(skb, hdr_len))
+	if (!pskb_may_pull(skb, nhs + hdr_len))
 		return -EINVAL;
 
-	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	greh = (struct gre_base_hdr *)(skb->data + nhs);
 	tpi->proto = greh->protocol;
 
 	options = (__be32 *)(greh + 1);
@@ -117,6 +117,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 		if ((*(u8 *)options & 0xF0) != 0x40)
 			hdr_len += 4;
 	}
+	tpi->hdr_len = hdr_len;
 	return hdr_len;
 }
 EXPORT_SYMBOL(gre_parse_header);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fa8c39804bdb..61a9deec2993 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -603,7 +603,7 @@ static void reqsk_timer_handler(unsigned long data)
 		if (req->num_timeout++ == 0)
 			atomic_dec(&queue->young);
 		timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
-		mod_timer_pinned(&req->rsk_timer, jiffies + timeo);
+		mod_timer(&req->rsk_timer, jiffies + timeo);
 		return;
 	}
 drop:
@@ -617,8 +617,9 @@ static void reqsk_queue_hash_req(struct request_sock *req,
 	req->num_timeout = 0;
 	req->sk = NULL;
 
-	setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
-	mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
+	setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler,
+			    (unsigned long)req);
+	mod_timer(&req->rsk_timer, jiffies + timeout);
 
 	inet_ehash_insert(req_to_sk(req), NULL);
 	/* before letting lookups find us, make sure all req fields
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 25af1243649b..38c2c47fe0e8 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -44,6 +44,7 @@ struct inet_diag_entry {
 	u16 dport;
 	u16 family;
 	u16 userlocks;
+	u32 ifindex;
 };
 
 static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -571,6 +572,14 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
 			yes = 0;
 			break;
 		}
+		case INET_DIAG_BC_DEV_COND: {
+			u32 ifindex;
+
+			ifindex = *((const u32 *)(op + 1));
+			if (ifindex != entry->ifindex)
+				yes = 0;
+			break;
+		}
 		}
 
 		if (yes) {
@@ -613,6 +622,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 	entry_fill_addrs(&entry, sk);
 	entry.sport = inet->inet_num;
 	entry.dport = ntohs(inet->inet_dport);
+	entry.ifindex = sk->sk_bound_dev_if;
 	entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
 
 	return inet_diag_bc_run(bc, &entry);
@@ -636,6 +646,17 @@ static int valid_cc(const void *bc, int len, int cc)
 	return 0;
 }
 
+/* data is u32 ifindex */
+static bool valid_devcond(const struct inet_diag_bc_op *op, int len,
+			  int *min_len)
+{
+	/* Check ifindex space. */
+	*min_len += sizeof(u32);
+	if (len < *min_len)
+		return false;
+
+	return true;
+}
 /* Validate an inet_diag_hostcond. */
 static bool valid_hostcond(const struct inet_diag_bc_op *op, int len,
 			   int *min_len)
@@ -700,6 +721,10 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 			if (!valid_hostcond(bc, len, &min_len))
 				return -EINVAL;
 			break;
+		case INET_DIAG_BC_DEV_COND:
+			if (!valid_devcond(bc, len, &min_len))
+				return -EINVAL;
+			break;
 		case INET_DIAG_BC_S_GE:
 		case INET_DIAG_BC_S_LE:
 		case INET_DIAG_BC_D_GE:
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 3a88b0c73797..b5e9317eaf9e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -355,7 +355,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 {
 	struct inet_frag_queue *q;
 
-	if (frag_mem_limit(nf) > nf->high_thresh) {
+	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
 		inet_frag_schedule_worker(f);
 		return NULL;
 	}
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 206581674806..ddcd56c08d14 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -188,7 +188,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 		tw->tw_prot	    = sk->sk_prot_creator;
 		atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
 		twsk_net_set(tw, sock_net(sk));
-		setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw);
+		setup_pinned_timer(&tw->tw_timer, tw_timer_handler,
+				   (unsigned long)tw);
 		/*
 		 * Because we use RCU lookups, we should not set tw_refcnt
 		 * to a non null value before everything is setup for this
@@ -248,7 +249,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
 
 	tw->tw_kill = timeo <= 4*HZ;
 	if (!rearm) {
-		BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo));
+		BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
 		atomic_inc(&tw->tw_dr->tw_count);
 	} else {
 		mod_timer_pending(&tw->tw_timer, jiffies + timeo);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index cbfb1808fcc4..8b4ffd216839 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -54,7 +54,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->ignore_df)
 		return false;
 
-	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
 		return false;
 
 	return true;
@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_uses_gateway)
 		goto sr_failed;
 
-	IPCB(skb)->flags |= IPSKB_FORWARDED;
+	IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
 	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
 	if (ip_exceeds_mtu(skb, mtu)) {
 		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4d2025f7ec57..5b1481be0282 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -49,12 +49,6 @@
 #include <net/gre.h>
 #include <net/dst_metadata.h>
 
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
-#endif
-
 /*
    Problems & solutions
    --------------------
@@ -144,6 +138,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
 	const struct iphdr *iph;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
+	unsigned int data_len = 0;
 	struct ip_tunnel *t;
 
 	switch (type) {
@@ -169,6 +164,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
 	case ICMP_TIME_EXCEEDED:
 		if (code != ICMP_EXC_TTL)
 			return;
+		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
 		break;
 
 	case ICMP_REDIRECT:
@@ -187,6 +183,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
 	if (!t)
 		return;
 
+#if IS_ENABLED(CONFIG_IPV6)
+       if (tpi->proto == htons(ETH_P_IPV6) &&
+           !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+				       type, data_len))
+               return;
+#endif
+
 	if (t->parms.iph.daddr == 0 ||
 	    ipv4_is_multicast(t->parms.iph.daddr))
 		return;
@@ -217,12 +220,14 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	 * by themselves???
 	 */
 
+	const struct iphdr *iph = (struct iphdr *)skb->data;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
 	struct tnl_ptk_info tpi;
 	bool csum_err = false;
 
-	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) {
+	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
+			     iph->ihl * 4) < 0) {
 		if (!csum_err)		/* ignore csum errors. */
 			return;
 	}
@@ -338,7 +343,7 @@ static int gre_rcv(struct sk_buff *skb)
 	}
 #endif
 
-	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP));
+	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
 	if (hdr_len < 0)
 		goto drop;
 
@@ -841,17 +846,19 @@ out:
 	return ipgre_tunnel_validate(tb, data);
 }
 
-static void ipgre_netlink_parms(struct net_device *dev,
+static int ipgre_netlink_parms(struct net_device *dev,
 				struct nlattr *data[],
 				struct nlattr *tb[],
 				struct ip_tunnel_parm *parms)
 {
+	struct ip_tunnel *t = netdev_priv(dev);
+
 	memset(parms, 0, sizeof(*parms));
 
 	parms->iph.protocol = IPPROTO_GRE;
 
 	if (!data)
-		return;
+		return 0;
 
 	if (data[IFLA_GRE_LINK])
 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
@@ -880,16 +887,26 @@ static void ipgre_netlink_parms(struct net_device *dev,
 	if (data[IFLA_GRE_TOS])
 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
 
-	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
+	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
+		if (t->ignore_df)
+			return -EINVAL;
 		parms->iph.frag_off = htons(IP_DF);
+	}
 
 	if (data[IFLA_GRE_COLLECT_METADATA]) {
-		struct ip_tunnel *t = netdev_priv(dev);
-
 		t->collect_md = true;
 		if (dev->type == ARPHRD_IPGRE)
 			dev->type = ARPHRD_NONE;
 	}
+
+	if (data[IFLA_GRE_IGNORE_DF]) {
+		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
+		  && (parms->iph.frag_off & htons(IP_DF)))
+			return -EINVAL;
+		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
+	}
+
+	return 0;
 }
 
 /* This function returns true when ENCAP attributes are present in the nl msg */
@@ -960,16 +977,19 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
+	int err;
 
 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
 		struct ip_tunnel *t = netdev_priv(dev);
-		int err = ip_tunnel_encap_setup(t, &ipencap);
+		err = ip_tunnel_encap_setup(t, &ipencap);
 
 		if (err < 0)
 			return err;
 	}
 
-	ipgre_netlink_parms(dev, data, tb, &p);
+	err = ipgre_netlink_parms(dev, data, tb, &p);
+	if (err < 0)
+		return err;
 	return ip_tunnel_newlink(dev, tb, &p);
 }
 
@@ -978,16 +998,19 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
 {
 	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
+	int err;
 
 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
 		struct ip_tunnel *t = netdev_priv(dev);
-		int err = ip_tunnel_encap_setup(t, &ipencap);
+		err = ip_tunnel_encap_setup(t, &ipencap);
 
 		if (err < 0)
 			return err;
 	}
 
-	ipgre_netlink_parms(dev, data, tb, &p);
+	err = ipgre_netlink_parms(dev, data, tb, &p);
+	if (err < 0)
+		return err;
 	return ip_tunnel_changelink(dev, tb, &p);
 }
 
@@ -1024,6 +1047,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
 		nla_total_size(2) +
 		/* IFLA_GRE_COLLECT_METADATA */
 		nla_total_size(0) +
+		/* IFLA_GRE_IGNORE_DF */
+		nla_total_size(1) +
 		0;
 }
 
@@ -1057,6 +1082,9 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			t->encap.flags))
 		goto nla_put_failure;
 
+	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
+		goto nla_put_failure;
+
 	if (t->collect_md) {
 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
 			goto nla_put_failure;
@@ -1084,6 +1112,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
+	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
 };
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
@@ -1121,6 +1150,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 {
 	struct nlattr *tb[IFLA_MAX + 1];
 	struct net_device *dev;
+	LIST_HEAD(list_kill);
 	struct ip_tunnel *t;
 	int err;
 
@@ -1136,8 +1166,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 	t->collect_md = true;
 
 	err = ipgre_newlink(net, dev, tb, NULL);
-	if (err < 0)
-		goto out;
+	if (err < 0) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
 
 	/* openvswitch users expect packet sizes to be unrestricted,
 	 * so set the largest MTU we can.
@@ -1146,9 +1178,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 	if (err)
 		goto out;
 
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0)
+		goto out;
+
 	return dev;
 out:
-	free_netdev(dev);
+	ip_tunnel_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 124bf0a66328..dde37fb340bf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -223,9 +223,11 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
 	struct sk_buff *segs;
 	int ret = 0;
 
-	/* common case: locally created skb or seglen is <= mtu */
-	if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
-	      skb_gso_network_seglen(skb) <= mtu)
+	/* common case: fragmentation of segments is not allowed,
+	 * or seglen is <= mtu
+	 */
+	if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
+	      skb_gso_validate_mtu(skb, mtu))
 		return ip_finish_output2(net, sk, skb);
 
 	/* Slowpath -  GSO segment length is exceeding the dst MTU.
@@ -271,7 +273,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
 		return dst_output(net, sk, skb);
 	}
 #endif
-	mtu = ip_skb_dst_mtu(skb);
+	mtu = ip_skb_dst_mtu(sk, skb);
 	if (skb_is_gso(skb))
 		return ip_finish_output_gso(net, sk, skb, mtu);
 
@@ -541,7 +543,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
 	iph = ip_hdr(skb);
 
-	mtu = ip_skb_dst_mtu(skb);
+	mtu = ip_skb_dst_mtu(sk, skb);
 	if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
 		mtu = IPCB(skb)->frag_max_size;
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d8f5e0a269f5..95649ebd2874 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -682,7 +682,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	df = tnl_params->frag_off;
-	if (skb->protocol == htons(ETH_P_IP))
+	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
 		df |= (inner_iph->frag_off&htons(IP_DF));
 
 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index afd6b5968caf..9d847c302551 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -63,6 +63,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 	int pkt_len = skb->len - skb_inner_network_offset(skb);
 	struct net *net = dev_net(rt->dst.dev);
 	struct net_device *dev = skb->dev;
+	int skb_iif = skb->skb_iif;
 	struct iphdr *iph;
 	int err;
 
@@ -72,6 +73,14 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 	skb_dst_set(skb, &rt->dst);
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
+	if (skb_iif && proto == IPPROTO_UDP) {
+		/* Arrived from an ingress interface and got udp encapuslated.
+		 * The encapsulated network segment length may exceed dst mtu.
+		 * Allow IP Fragmentation of segments.
+		 */
+		IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
+	}
+
 	/* Push down and install the IP header. */
 	skb_push(skb, sizeof(struct iphdr));
 	skb_reset_network_header(skb);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 2ed9dd2b5f2f..1d71c40eaaf3 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE;		/* My IP address */
 static __be32 ic_netmask = NONE;	/* Netmask for local subnet */
 __be32 ic_gateway = NONE;	/* Gateway IP address */
 
-__be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+#ifdef IPCONFIG_DYNAMIC
+static __be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+#endif
 
 __be32 ic_servaddr = NONE;	/* Boot server IP address */
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 978370132f29..4ae3f8e6c6cc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,14 +148,14 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, IPPROTO_IPIP, 0);
+				 t->parms.link, 0, iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
 
 	if (type == ICMP_REDIRECT) {
 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      IPPROTO_IPIP, 0);
+			      iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
@@ -177,12 +177,19 @@ out:
 	return err;
 }
 
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
 	/* no tunnel info required for ipip. */
 	.proto = htons(ETH_P_IP),
 };
 
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+	/* no tunnel info required for mplsip. */
+	.proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 {
 	struct net *net = dev_net(skb->dev);
 	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
@@ -193,11 +200,23 @@ static int ipip_rcv(struct sk_buff *skb)
 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 			iph->saddr, iph->daddr, 0);
 	if (tunnel) {
+		const struct tnl_ptk_info *tpi;
+
+		if (tunnel->parms.iph.protocol != ipproto &&
+		    tunnel->parms.iph.protocol != 0)
+			goto drop;
+
 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto drop;
-		if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+		if (ipproto == IPPROTO_MPLS)
+			tpi = &mplsip_tpi;
+		else
+#endif
+			tpi = &ipip_tpi;
+		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+		return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
 	}
 
 	return -1;
@@ -207,24 +226,51 @@ drop:
 	return 0;
 }
 
+static int ipip_rcv(struct sk_buff *skb)
+{
+	return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+	return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
 /*
  *	This function assumes it is being called from dev_queue_xmit()
  *	and that skb is filled properly by that function.
  */
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *tiph = &tunnel->parms.iph;
+	u8 ipproto;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ipproto = IPPROTO_IPIP;
+		break;
+#if IS_ENABLED(CONFIG_MPLS)
+	case htons(ETH_P_MPLS_UC):
+		ipproto = IPPROTO_MPLS;
+		break;
+#endif
+	default:
+		goto tx_error;
+	}
 
-	if (unlikely(skb->protocol != htons(ETH_P_IP)))
+	if (tiph->protocol != ipproto && tiph->protocol != 0)
 		goto tx_error;
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
 		goto tx_error;
 
-	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+	skb_set_inner_ipproto(skb, ipproto);
 
-	ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
+	ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;
 
 tx_error:
@@ -234,6 +280,20 @@ tx_error:
 	return NETDEV_TX_OK;
 }
 
+static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
+{
+	switch (ipproto) {
+	case 0:
+	case IPPROTO_IPIP:
+#if IS_ENABLED(CONFIG_MPLS)
+	case IPPROTO_MPLS:
+#endif
+		return true;
+	}
+
+	return false;
+}
+
 static int
 ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -244,7 +304,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return -EFAULT;
 
 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
-		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+		if (p.iph.version != 4 ||
+		    !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 			return -EINVAL;
 	}
@@ -301,10 +362,23 @@ static int ipip_tunnel_init(struct net_device *dev)
 
 	tunnel->tun_hlen = 0;
 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
-	tunnel->parms.iph.protocol = IPPROTO_IPIP;
 	return ip_tunnel_init(dev);
 }
 
+static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	u8 proto;
+
+	if (!data || !data[IFLA_IPTUN_PROTO])
+		return 0;
+
+	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+	if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
 static void ipip_netlink_parms(struct nlattr *data[],
 			       struct ip_tunnel_parm *parms)
 {
@@ -335,6 +409,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
 	if (data[IFLA_IPTUN_TOS])
 		parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
 
+	if (data[IFLA_IPTUN_PROTO])
+		parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
 	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 		parms->iph.frag_off = htons(IP_DF);
 }
@@ -427,6 +504,8 @@ static size_t ipip_get_size(const struct net_device *dev)
 		nla_total_size(1) +
 		/* IFLA_IPTUN_TOS */
 		nla_total_size(1) +
+		/* IFLA_IPTUN_PROTO */
+		nla_total_size(1) +
 		/* IFLA_IPTUN_PMTUDISC */
 		nla_total_size(1) +
 		/* IFLA_IPTUN_ENCAP_TYPE */
@@ -450,6 +529,7 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
 	    nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
 		       !!(parm->iph.frag_off & htons(IP_DF))))
 		goto nla_put_failure;
@@ -476,6 +556,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
 	[IFLA_IPTUN_REMOTE]		= { .type = NLA_U32 },
 	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 },
 	[IFLA_IPTUN_TOS]		= { .type = NLA_U8 },
+	[IFLA_IPTUN_PROTO]		= { .type = NLA_U8 },
 	[IFLA_IPTUN_PMTUDISC]		= { .type = NLA_U8 },
 	[IFLA_IPTUN_ENCAP_TYPE]		= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_FLAGS]	= { .type = NLA_U16 },
@@ -489,6 +570,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 	.policy		= ipip_policy,
 	.priv_size	= sizeof(struct ip_tunnel),
 	.setup		= ipip_tunnel_setup,
+	.validate	= ipip_tunnel_validate,
 	.newlink	= ipip_newlink,
 	.changelink	= ipip_changelink,
 	.dellink	= ip_tunnel_dellink,
@@ -503,6 +585,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
 	.priority	=	1,
 };
 
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+	.handler	=	mplsip_rcv,
+	.err_handler	=	ipip_err,
+	.priority	=	1,
+};
+#endif
+
 static int __net_init ipip_init_net(struct net *net)
 {
 	return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
@@ -525,7 +615,7 @@ static int __init ipip_init(void)
 {
 	int err;
 
-	pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
+	pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
 
 	err = register_pernet_device(&ipip_net_ops);
 	if (err < 0)
@@ -533,8 +623,15 @@ static int __init ipip_init(void)
 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 	if (err < 0) {
 		pr_info("%s: can't register tunnel\n", __func__);
-		goto xfrm_tunnel_failed;
+		goto xfrm_tunnel_ipip_failed;
+	}
+#if IS_ENABLED(CONFIG_MPLS)
+	err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+	if (err < 0) {
+		pr_info("%s: can't register tunnel\n", __func__);
+		goto xfrm_tunnel_mplsip_failed;
 	}
+#endif
 	err = rtnl_link_register(&ipip_link_ops);
 	if (err < 0)
 		goto rtnl_link_failed;
@@ -543,8 +640,13 @@ out:
 	return err;
 
 rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+	xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
+xfrm_tunnel_mplsip_failed:
+
+#endif
 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-xfrm_tunnel_failed:
+xfrm_tunnel_ipip_failed:
 	unregister_pernet_device(&ipip_net_ops);
 	goto out;
 }
@@ -554,7 +656,10 @@ static void __exit ipip_fini(void)
 	rtnl_link_unregister(&ipip_link_ops);
 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 		pr_info("%s: can't deregister tunnel\n", __func__);
-
+#if IS_ENABLED(CONFIG_MPLS)
+	if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
+		pr_info("%s: can't deregister tunnel\n", __func__);
+#endif
 	unregister_pernet_device(&ipip_net_ops);
 }
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 21a38e296fe2..26253328d227 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -722,6 +722,7 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache
 				cache->mfc_un.res.maxvif = vifi + 1;
 		}
 	}
+	cache->mfc_un.res.lastuse = jiffies;
 }
 
 static int vif_add(struct net *net, struct mr_table *mrt,
@@ -891,8 +892,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
-	if (c)
+	if (c) {
+		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 		c->mfc_un.res.minvif = MAXVIFS;
+	}
 	return c;
 }
 
@@ -1746,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		vif->dev->stats.tx_bytes += skb->len;
 	}
 
-	IPCB(skb)->flags |= IPSKB_FORWARDED;
+	IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
 
 	/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
 	 * not only before forwarding, but after forwarding on all output
@@ -1790,6 +1793,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 	vif = cache->mfc_parent;
 	cache->mfc_un.res.pkt++;
 	cache->mfc_un.res.bytes += skb->len;
+	cache->mfc_un.res.lastuse = jiffies;
 
 	if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
 		struct mfc_cache *cache_proxy;
@@ -2069,10 +2073,10 @@ drop:
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			      struct mfc_cache *c, struct rtmsg *rtm)
 {
-	int ct;
-	struct rtnexthop *nhp;
-	struct nlattr *mp_attr;
 	struct rta_mfc_stats mfcs;
+	struct nlattr *mp_attr;
+	struct rtnexthop *nhp;
+	int ct;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
 	if (c->mfc_parent >= MAXVIFS)
@@ -2104,7 +2108,10 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0)
+	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+	    nla_put_u64_64bit(skb, RTA_EXPIRES,
+			      jiffies_to_clock_t(c->mfc_un.res.lastuse),
+			      RTA_PAD))
 		return -EMSGSIZE;
 
 	rtm->rtm_type = RTN_MULTICAST;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2033f929aa66..b31df597fd37 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -89,22 +89,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 	__be32 src_ipaddr, tgt_ipaddr;
 	long ret;
 
-#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
-
-	if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
-		  ARPT_INV_ARPOP))
+	if (NF_INVF(arpinfo, ARPT_INV_ARPOP,
+		    (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop))
 		return 0;
 
-	if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
-		  ARPT_INV_ARPHRD))
+	if (NF_INVF(arpinfo, ARPT_INV_ARPHRD,
+		    (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd))
 		return 0;
 
-	if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
-		  ARPT_INV_ARPPRO))
+	if (NF_INVF(arpinfo, ARPT_INV_ARPPRO,
+		    (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro))
 		return 0;
 
-	if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
-		  ARPT_INV_ARPHLN))
+	if (NF_INVF(arpinfo, ARPT_INV_ARPHLN,
+		    (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln))
 		return 0;
 
 	src_devaddr = arpptr;
@@ -115,31 +113,32 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 	arpptr += dev->addr_len;
 	memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
 
-	if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
-		  ARPT_INV_SRCDEVADDR) ||
-	    FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
-		  ARPT_INV_TGTDEVADDR))
+	if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR,
+		    arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr,
+					dev->addr_len)) ||
+	    NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR,
+		    arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr,
+					dev->addr_len)))
 		return 0;
 
-	if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
-		  ARPT_INV_SRCIP) ||
-	    FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
-		  ARPT_INV_TGTIP))
+	if (NF_INVF(arpinfo, ARPT_INV_SRCIP,
+		    (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) ||
+	    NF_INVF(arpinfo, ARPT_INV_TGTIP,
+		    (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr))
 		return 0;
 
 	/* Look for ifname matches.  */
 	ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
 
-	if (FWINV(ret != 0, ARPT_INV_VIA_IN))
+	if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0))
 		return 0;
 
 	ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
 
-	if (FWINV(ret != 0, ARPT_INV_VIA_OUT))
+	if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0))
 		return 0;
 
 	return 1;
-#undef FWINV
 }
 
 static inline int arp_checkentry(const struct arpt_arp *arp)
@@ -300,23 +299,12 @@ static inline bool unconditional(const struct arpt_entry *e)
 	       memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
 }
 
-static bool find_jump_target(const struct xt_table_info *t,
-			     const struct arpt_entry *target)
-{
-	struct arpt_entry *iter;
-
-	xt_entry_foreach(iter, t->entries, t->size) {
-		 if (iter == target)
-			return true;
-	}
-	return false;
-}
-
 /* Figures out from what hook each rule can be called: returns 0 if
  * there are loops.  Puts hook bitmask in comefrom.
  */
 static int mark_source_chains(const struct xt_table_info *newinfo,
-			      unsigned int valid_hooks, void *entry0)
+			      unsigned int valid_hooks, void *entry0,
+			      unsigned int *offsets)
 {
 	unsigned int hook;
 
@@ -389,10 +377,11 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 					   XT_STANDARD_TARGET) == 0 &&
 				    newpos >= 0) {
 					/* This a jump; chase it. */
+					if (!xt_find_jump_offset(offsets, newpos,
+								 newinfo->number))
+						return 0;
 					e = (struct arpt_entry *)
 						(entry0 + newpos);
-					if (!find_jump_target(newinfo, e))
-						return 0;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -544,6 +533,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 			   const struct arpt_replace *repl)
 {
 	struct arpt_entry *iter;
+	unsigned int *offsets;
 	unsigned int i;
 	int ret = 0;
 
@@ -556,6 +546,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 		newinfo->underflow[i] = 0xFFFFFFFF;
 	}
 
+	offsets = xt_alloc_entry_offsets(newinfo->number);
+	if (!offsets)
+		return -ENOMEM;
 	i = 0;
 
 	/* Walk through entries, checking offsets. */
@@ -566,17 +559,20 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 						 repl->underflow,
 						 repl->valid_hooks);
 		if (ret != 0)
-			break;
+			goto out_free;
+		if (i < repl->num_entries)
+			offsets[i] = (void *)iter - entry0;
 		++i;
 		if (strcmp(arpt_get_target(iter)->u.user.name,
 		    XT_ERROR_TARGET) == 0)
 			++newinfo->stacksize;
 	}
 	if (ret != 0)
-		return ret;
+		goto out_free;
 
+	ret = -EINVAL;
 	if (i != repl->num_entries)
-		return -EINVAL;
+		goto out_free;
 
 	/* Check hooks all assigned */
 	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
@@ -584,13 +580,16 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 		if (!(repl->valid_hooks & (1 << i)))
 			continue;
 		if (newinfo->hook_entry[i] == 0xFFFFFFFF)
-			return -EINVAL;
+			goto out_free;
 		if (newinfo->underflow[i] == 0xFFFFFFFF)
-			return -EINVAL;
+			goto out_free;
 	}
 
-	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
-		return -ELOOP;
+	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+		ret = -ELOOP;
+		goto out_free;
+	}
+	kvfree(offsets);
 
 	/* Finally, each sanity check must pass */
 	i = 0;
@@ -610,6 +609,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 		return ret;
 	}
 
+	return ret;
+ out_free:
+	kvfree(offsets);
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 54906e0e8e0c..f993545a3373 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -58,32 +58,31 @@ ip_packet_match(const struct iphdr *ip,
 {
 	unsigned long ret;
 
-#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
-
-	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
-		  IPT_INV_SRCIP) ||
-	    FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
-		  IPT_INV_DSTIP))
+	if (NF_INVF(ipinfo, IPT_INV_SRCIP,
+		    (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) ||
+	    NF_INVF(ipinfo, IPT_INV_DSTIP,
+		    (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr))
 		return false;
 
 	ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
 
-	if (FWINV(ret != 0, IPT_INV_VIA_IN))
+	if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0))
 		return false;
 
 	ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
 
-	if (FWINV(ret != 0, IPT_INV_VIA_OUT))
+	if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0))
 		return false;
 
 	/* Check specific protocol */
 	if (ipinfo->proto &&
-	    FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO))
+	    NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto))
 		return false;
 
 	/* If we have a fragment rule but the packet is not a fragment
 	 * then we return zero */
-	if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG))
+	if (NF_INVF(ipinfo, IPT_INV_FRAG,
+		    (ipinfo->flags & IPT_F_FRAG) && !isfrag))
 		return false;
 
 	return true;
@@ -122,7 +121,6 @@ static inline bool unconditional(const struct ipt_entry *e)
 
 	return e->target_offset == sizeof(struct ipt_entry) &&
 	       memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
-#undef FWINV
 }
 
 /* for const-correctness */
@@ -375,23 +373,12 @@ ipt_do_table(struct sk_buff *skb,
 	else return verdict;
 }
 
-static bool find_jump_target(const struct xt_table_info *t,
-			     const struct ipt_entry *target)
-{
-	struct ipt_entry *iter;
-
-	xt_entry_foreach(iter, t->entries, t->size) {
-		 if (iter == target)
-			return true;
-	}
-	return false;
-}
-
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
 mark_source_chains(const struct xt_table_info *newinfo,
-		   unsigned int valid_hooks, void *entry0)
+		   unsigned int valid_hooks, void *entry0,
+		   unsigned int *offsets)
 {
 	unsigned int hook;
 
@@ -460,10 +447,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					   XT_STANDARD_TARGET) == 0 &&
 				    newpos >= 0) {
 					/* This a jump; chase it. */
+					if (!xt_find_jump_offset(offsets, newpos,
+								 newinfo->number))
+						return 0;
 					e = (struct ipt_entry *)
 						(entry0 + newpos);
-					if (!find_jump_target(newinfo, e))
-						return 0;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -696,6 +684,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		const struct ipt_replace *repl)
 {
 	struct ipt_entry *iter;
+	unsigned int *offsets;
 	unsigned int i;
 	int ret = 0;
 
@@ -708,6 +697,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		newinfo->underflow[i] = 0xFFFFFFFF;
 	}
 
+	offsets = xt_alloc_entry_offsets(newinfo->number);
+	if (!offsets)
+		return -ENOMEM;
 	i = 0;
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -717,15 +709,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 						 repl->underflow,
 						 repl->valid_hooks);
 		if (ret != 0)
-			return ret;
+			goto out_free;
+		if (i < repl->num_entries)
+			offsets[i] = (void *)iter - entry0;
 		++i;
 		if (strcmp(ipt_get_target(iter)->u.user.name,
 		    XT_ERROR_TARGET) == 0)
 			++newinfo->stacksize;
 	}
 
+	ret = -EINVAL;
 	if (i != repl->num_entries)
-		return -EINVAL;
+		goto out_free;
 
 	/* Check hooks all assigned */
 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
@@ -733,13 +728,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (!(repl->valid_hooks & (1 << i)))
 			continue;
 		if (newinfo->hook_entry[i] == 0xFFFFFFFF)
-			return -EINVAL;
+			goto out_free;
 		if (newinfo->underflow[i] == 0xFFFFFFFF)
-			return -EINVAL;
+			goto out_free;
 	}
 
-	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
-		return -ELOOP;
+	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+		ret = -ELOOP;
+		goto out_free;
+	}
+	kvfree(offsets);
 
 	/* Finally, each sanity check must pass */
 	i = 0;
@@ -759,6 +757,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		return ret;
 	}
 
+	return ret;
+ out_free:
+	kvfree(offsets);
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 57fc97cdac70..aebdb337fd7e 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -87,10 +87,6 @@ iptable_mangle_hook(void *priv,
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
 		return ipt_mangle_out(skb, state);
-	if (state->hook == NF_INET_POST_ROUTING)
-		return ipt_do_table(skb, state,
-				    state->net->ipv4.iptable_mangle);
-	/* PREROUTING/INPUT/FORWARD: */
 	return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
 }
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index c6f3c406f707..63923710f325 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -26,6 +26,8 @@
 
 struct ct_iter_state {
 	struct seq_net_private p;
+	struct hlist_nulls_head *hash;
+	unsigned int htable_size;
 	unsigned int bucket;
 };
 
@@ -35,10 +37,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < st->htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(
-			hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
 		if (!is_a_nulls(n))
 			return n;
 	}
@@ -53,11 +55,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(hlist_nulls_next_rcu(head));
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= st->htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(
-			hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
 	}
 	return head;
 }
@@ -75,7 +77,11 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
+	struct ct_iter_state *st = seq->private;
+
 	rcu_read_lock();
+
+	nf_conntrack_get_ht(&st->hash, &st->htable_size);
 	return ct_get_idx(seq, *pos);
 }
 
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index b6ea57ec5e14..fd8220213afc 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -24,6 +24,9 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
 	if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
 		return NULL;
 
+	if (ip_hdr(oldskb)->protocol != IPPROTO_TCP)
+		return NULL;
+
 	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
 				 sizeof(struct tcphdr), _oth);
 	if (oth == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5c7ed147449c..032a96d78c99 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
 		((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
 }
 
+static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
+{
+	struct tcp_repair_window opt;
+
+	if (!tp->repair)
+		return -EPERM;
+
+	if (len != sizeof(opt))
+		return -EINVAL;
+
+	if (copy_from_user(&opt, optbuf, sizeof(opt)))
+		return -EFAULT;
+
+	if (opt.max_window < opt.snd_wnd)
+		return -EINVAL;
+
+	if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
+		return -EINVAL;
+
+	if (after(opt.rcv_wup, tp->rcv_nxt))
+		return -EINVAL;
+
+	tp->snd_wl1	= opt.snd_wl1;
+	tp->snd_wnd	= opt.snd_wnd;
+	tp->max_window	= opt.max_window;
+
+	tp->rcv_wnd	= opt.rcv_wnd;
+	tp->rcv_wup	= opt.rcv_wup;
+
+	return 0;
+}
+
 static int tcp_repair_options_est(struct tcp_sock *tp,
 		struct tcp_repair_opt __user *optbuf, unsigned int len)
 {
@@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		else
 			tp->tsoffset = val - tcp_time_stamp;
 		break;
+	case TCP_REPAIR_WINDOW:
+		err = tcp_repair_set_window(tp, optval, optlen);
+		break;
 	case TCP_NOTSENT_LOWAT:
 		tp->notsent_lowat = val;
 		sk->sk_write_space(sk);
@@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			return -EINVAL;
 		break;
 
+	case TCP_REPAIR_WINDOW: {
+		struct tcp_repair_window opt;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		if (len != sizeof(opt))
+			return -EINVAL;
+
+		if (!tp->repair)
+			return -EPERM;
+
+		opt.snd_wl1	= tp->snd_wl1;
+		opt.snd_wnd	= tp->snd_wnd;
+		opt.max_window	= tp->max_window;
+		opt.rcv_wnd	= tp->rcv_wnd;
+		opt.rcv_wup	= tp->rcv_wup;
+
+		if (copy_to_user(optval, &opt, len))
+			return -EFAULT;
+		return 0;
+	}
 	case TCP_QUEUE_SEQ:
 		if (tp->repair_queue == TCP_SEND_QUEUE)
 			val = tp->write_seq;
@@ -2969,8 +3026,18 @@ static void __tcp_alloc_md5sig_pool(void)
 		return;
 
 	for_each_possible_cpu(cpu) {
+		void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
 		struct ahash_request *req;
 
+		if (!scratch) {
+			scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
+					       sizeof(struct tcphdr),
+					       GFP_KERNEL,
+					       cpu_to_node(cpu));
+			if (!scratch)
+				return;
+			per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
+		}
 		if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
 			continue;
 
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 7e538f71f5fb..10d728b6804c 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -293,7 +293,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
 	 */
 	if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) ||
 	    ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		memset(info, 0, sizeof(struct tcp_dctcp_info));
+		memset(&info->dctcp, 0, sizeof(info->dctcp));
 		if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) {
 			info->dctcp.dctcp_enabled = 1;
 			info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
@@ -303,7 +303,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
 		}
 
 		*attr = INET_DIAG_DCTCPINFO;
-		return sizeof(*info);
+		return sizeof(info->dctcp);
 	}
 	return 0;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6c8f4cd0800..f9f9e375d7de 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -87,7 +87,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 
 /* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 100;
+int sysctl_tcp_challenge_ack_limit = 1000;
 
 int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
@@ -3115,6 +3115,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	long ca_rtt_us = -1L;
 	struct sk_buff *skb;
 	u32 pkts_acked = 0;
+	u32 last_in_flight = 0;
 	bool rtt_update;
 	int flag = 0;
 
@@ -3154,6 +3155,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			if (!first_ackt.v64)
 				first_ackt = last_ackt;
 
+			last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
 			reord = min(pkts_acked, reord);
 			if (!after(scb->end_seq, tp->high_seq))
 				flag |= FLAG_ORIG_SACK_ACKED;
@@ -3250,7 +3252,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 	if (icsk->icsk_ca_ops->pkts_acked) {
 		struct ack_sample sample = { .pkts_acked = pkts_acked,
-					     .rtt_us = ca_rtt_us };
+					     .rtt_us = ca_rtt_us,
+					     .in_flight = last_in_flight };
 
 		icsk->icsk_ca_ops->pkts_acked(sk, &sample);
 	}
@@ -3421,6 +3424,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 	return flag;
 }
 
+static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+				   u32 *last_oow_ack_time)
+{
+	if (*last_oow_ack_time) {
+		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+			NET_INC_STATS(net, mib_idx);
+			return true;	/* rate-limited: don't send yet! */
+		}
+	}
+
+	*last_oow_ack_time = tcp_time_stamp;
+
+	return false;	/* not rate-limited: go ahead, send dupack now! */
+}
+
 /* Return true if we're currently rate-limiting out-of-window ACKs and
  * thus shouldn't send a dupack right now. We rate-limit dupacks in
  * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
@@ -3434,21 +3454,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
 	/* Data packets without SYNs are not likely part of an ACK loop. */
 	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
 	    !tcp_hdr(skb)->syn)
-		goto not_rate_limited;
-
-	if (*last_oow_ack_time) {
-		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
-
-		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
-			NET_INC_STATS(net, mib_idx);
-			return true;	/* rate-limited: don't send yet! */
-		}
-	}
-
-	*last_oow_ack_time = tcp_time_stamp;
+		return false;
 
-not_rate_limited:
-	return false;	/* not rate-limited: go ahead, send dupack now! */
+	return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
 }
 
 /* RFC 5961 7 [ACK Throttling] */
@@ -3458,21 +3466,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 	static u32 challenge_timestamp;
 	static unsigned int challenge_count;
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 now;
+	u32 count, now;
 
 	/* First check our per-socket dupack rate limit. */
-	if (tcp_oow_rate_limited(sock_net(sk), skb,
-				 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
-				 &tp->last_oow_ack_time))
+	if (__tcp_oow_rate_limited(sock_net(sk),
+				   LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+				   &tp->last_oow_ack_time))
 		return;
 
-	/* Then check the check host-wide RFC 5961 rate limit. */
+	/* Then check host-wide RFC 5961 rate limit. */
 	now = jiffies / HZ;
 	if (now != challenge_timestamp) {
+		u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+
 		challenge_timestamp = now;
-		challenge_count = 0;
+		WRITE_ONCE(challenge_count, half +
+			   prandom_u32_max(sysctl_tcp_challenge_ack_limit));
 	}
-	if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
+	count = READ_ONCE(challenge_count);
+	if (count > 0) {
+		WRITE_ONCE(challenge_count, count - 1);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
 		tcp_send_ack(sk);
 	}
@@ -5159,6 +5172,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 				  const struct tcphdr *th, int syn_inerr)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	bool rst_seq_match = false;
 
 	/* RFC1323: H1. Apply PAWS check first. */
 	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
@@ -5195,13 +5209,32 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 
 	/* Step 2: check RST bit */
 	if (th->rst) {
-		/* RFC 5961 3.2 :
-		 * If sequence number exactly matches RCV.NXT, then
+		/* RFC 5961 3.2 (extend to match against SACK too if available):
+		 * If seq num matches RCV.NXT or the right-most SACK block,
+		 * then
 		 *     RESET the connection
 		 * else
 		 *     Send a challenge ACK
 		 */
-		if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
+		if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+			rst_seq_match = true;
+		} else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
+			struct tcp_sack_block *sp = &tp->selective_acks[0];
+			int max_sack = sp[0].end_seq;
+			int this_sack;
+
+			for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
+			     ++this_sack) {
+				max_sack = after(sp[this_sack].end_seq,
+						 max_sack) ?
+					sp[this_sack].end_seq : max_sack;
+			}
+
+			if (TCP_SKB_CB(skb)->seq == max_sack)
+				rst_seq_match = true;
+		}
+
+		if (rst_seq_match)
 			tcp_reset(sk);
 		else
 			tcp_send_challenge_ack(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3708de2a6683..32b048e524d6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1018,27 +1018,28 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 			      GFP_KERNEL);
 }
 
-static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
-					__be32 daddr, __be32 saddr, int nbytes)
+static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
+				   __be32 daddr, __be32 saddr,
+				   const struct tcphdr *th, int nbytes)
 {
 	struct tcp4_pseudohdr *bp;
 	struct scatterlist sg;
+	struct tcphdr *_th;
 
-	bp = &hp->md5_blk.ip4;
-
-	/*
-	 * 1. the TCP pseudo-header (in the order: source IP address,
-	 * destination IP address, zero-padded protocol number, and
-	 * segment length)
-	 */
+	bp = hp->scratch;
 	bp->saddr = saddr;
 	bp->daddr = daddr;
 	bp->pad = 0;
 	bp->protocol = IPPROTO_TCP;
 	bp->len = cpu_to_be16(nbytes);
 
-	sg_init_one(&sg, bp, sizeof(*bp));
-	ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+	_th = (struct tcphdr *)(bp + 1);
+	memcpy(_th, th, sizeof(*th));
+	_th->check = 0;
+
+	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+				sizeof(*bp) + sizeof(*th));
 	return crypto_ahash_update(hp->md5_req);
 }
 
@@ -1055,9 +1056,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 
 	if (crypto_ahash_init(req))
 		goto clear_hash;
-	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
-		goto clear_hash;
-	if (tcp_md5_hash_header(hp, th))
+	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 		goto clear_hash;
 	if (tcp_md5_hash_key(hp, key))
 		goto clear_hash;
@@ -1101,9 +1100,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
 	if (crypto_ahash_init(req))
 		goto clear_hash;
 
-	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
-		goto clear_hash;
-	if (tcp_md5_hash_header(hp, th))
+	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 		goto clear_hash;
 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 		goto clear_hash;
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
new file mode 100644
index 000000000000..5de82a8d4d87
--- /dev/null
+++ b/net/ipv4/tcp_nv.c
@@ -0,0 +1,476 @@
+/*
+ * TCP NV: TCP with Congestion Avoidance
+ *
+ * TCP-NV is a successor of TCP-Vegas that has been developed to
+ * deal with the issues that occur in modern networks.
+ * Like TCP-Vegas, TCP-NV supports true congestion avoidance,
+ * the ability to detect congestion before packet losses occur.
+ * When congestion (queue buildup) starts to occur, TCP-NV
+ * predicts what the cwnd size should be for the current
+ * throughput and it reduces the cwnd proportionally to
+ * the difference between the current cwnd and the predicted cwnd.
+ *
+ * NV is only recommeneded for traffic within a data center, and when
+ * all the flows are NV (at least those within the data center). This
+ * is due to the inherent unfairness between flows using losses to
+ * detect congestion (congestion control) and those that use queue
+ * buildup to detect congestion (congestion avoidance).
+ *
+ * Note: High NIC coalescence values may lower the performance of NV
+ * due to the increased noise in RTT values. In particular, we have
+ * seen issues with rx-frames values greater than 8.
+ *
+ * TODO:
+ * 1) Add mechanism to deal with reverse congestion.
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/math64.h>
+#include <net/tcp.h>
+#include <linux/inet_diag.h>
+
+/* TCP NV parameters
+ *
+ * nv_pad		Max number of queued packets allowed in network
+ * nv_pad_buffer	Do not grow cwnd if this closed to nv_pad
+ * nv_reset_period	How often (in) seconds)to reset min_rtt
+ * nv_min_cwnd		Don't decrease cwnd below this if there are no losses
+ * nv_cong_dec_mult	Decrease cwnd by X% (30%) of congestion when detected
+ * nv_ssthresh_factor	On congestion set ssthresh to this * <desired cwnd> / 8
+ * nv_rtt_factor	RTT averaging factor
+ * nv_loss_dec_factor	Decrease cwnd by this (50%) when losses occur
+ * nv_dec_eval_min_calls	Wait this many RTT measurements before dec cwnd
+ * nv_inc_eval_min_calls	Wait this many RTT measurements before inc cwnd
+ * nv_ssthresh_eval_min_calls	Wait this many RTT measurements before stopping
+ *				slow-start due to congestion
+ * nv_stop_rtt_cnt	Only grow cwnd for this many RTTs after non-congestion
+ * nv_rtt_min_cnt	Wait these many RTTs before making congesion decision
+ * nv_cwnd_growth_rate_neg
+ * nv_cwnd_growth_rate_pos
+ *	How quickly to double growth rate (not rate) of cwnd when not
+ *	congested. One value (nv_cwnd_growth_rate_neg) for when
+ *	rate < 1 pkt/RTT (after losses). The other (nv_cwnd_growth_rate_pos)
+ *	otherwise.
+ */
+
+static int nv_pad __read_mostly = 10;
+static int nv_pad_buffer __read_mostly = 2;
+static int nv_reset_period __read_mostly = 5; /* in seconds */
+static int nv_min_cwnd __read_mostly = 2;
+static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
+static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
+static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
+static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */
+static int nv_cwnd_growth_rate_neg __read_mostly = 8;
+static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
+static int nv_dec_eval_min_calls __read_mostly = 60;
+static int nv_inc_eval_min_calls __read_mostly = 20;
+static int nv_ssthresh_eval_min_calls __read_mostly = 30;
+static int nv_stop_rtt_cnt __read_mostly = 10;
+static int nv_rtt_min_cnt __read_mostly = 2;
+
+module_param(nv_pad, int, 0644);
+MODULE_PARM_DESC(nv_pad, "max queued packets allowed in network");
+module_param(nv_reset_period, int, 0644);
+MODULE_PARM_DESC(nv_reset_period, "nv_min_rtt reset period (secs)");
+module_param(nv_min_cwnd, int, 0644);
+MODULE_PARM_DESC(nv_min_cwnd, "NV will not decrease cwnd below this value"
+		 " without losses");
+
+/* TCP NV Parameters */
+struct tcpnv {
+	unsigned long nv_min_rtt_reset_jiffies;  /* when to switch to
+						  * nv_min_rtt_new */
+	s8  cwnd_growth_factor;	/* Current cwnd growth factor,
+				 * < 0 => less than 1 packet/RTT */
+	u8  available8;
+	u16 available16;
+	u32 loss_cwnd;	/* cwnd at last loss */
+	u8  nv_allow_cwnd_growth:1, /* whether cwnd can grow */
+		nv_reset:1,	    /* whether to reset values */
+		nv_catchup:1;	    /* whether we are growing because
+				     * of temporary cwnd decrease */
+	u8  nv_eval_call_cnt;	/* call count since last eval */
+	u8  nv_min_cwnd;	/* nv won't make a ca decision if cwnd is
+				 * smaller than this. It may grow to handle
+				 * TSO, LRO and interrupt coalescence because
+				 * with these a small cwnd cannot saturate
+				 * the link. Note that this is different from
+				 * the file local nv_min_cwnd */
+	u8  nv_rtt_cnt;		/* RTTs without making ca decision */;
+	u32 nv_last_rtt;	/* last rtt */
+	u32 nv_min_rtt;		/* active min rtt. Used to determine slope */
+	u32 nv_min_rtt_new;	/* min rtt for future use */
+	u32 nv_rtt_max_rate;	/* max rate seen during current RTT */
+	u32 nv_rtt_start_seq;	/* current RTT ends when packet arrives
+				 * acking beyond nv_rtt_start_seq */
+	u32 nv_last_snd_una;	/* Previous value of tp->snd_una. It is
+				 * used to determine bytes acked since last
+				 * call to bictcp_acked */
+	u32 nv_no_cong_cnt;	/* Consecutive no congestion decisions */
+};
+
+#define NV_INIT_RTT	  U32_MAX
+#define NV_MIN_CWND	  4
+#define NV_MIN_CWND_GROW  2
+#define NV_TSO_CWND_BOUND 80
+
+static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	ca->nv_reset = 0;
+	ca->loss_cwnd = 0;
+	ca->nv_no_cong_cnt = 0;
+	ca->nv_rtt_cnt = 0;
+	ca->nv_last_rtt = 0;
+	ca->nv_rtt_max_rate = 0;
+	ca->nv_rtt_start_seq = tp->snd_una;
+	ca->nv_eval_call_cnt = 0;
+	ca->nv_last_snd_una = tp->snd_una;
+}
+
+static void tcpnv_init(struct sock *sk)
+{
+	struct tcpnv *ca = inet_csk_ca(sk);
+
+	tcpnv_reset(ca, sk);
+
+	ca->nv_allow_cwnd_growth = 1;
+	ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
+	ca->nv_min_rtt = NV_INIT_RTT;
+	ca->nv_min_rtt_new = NV_INIT_RTT;
+	ca->nv_min_cwnd = NV_MIN_CWND;
+	ca->nv_catchup = 0;
+	ca->cwnd_growth_factor = 0;
+}
+
+static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcpnv *ca = inet_csk_ca(sk);
+	u32 cnt;
+
+	if (!tcp_is_cwnd_limited(sk))
+		return;
+
+	/* Only grow cwnd if NV has not detected congestion */
+	if (!ca->nv_allow_cwnd_growth)
+		return;
+
+	if (tcp_in_slow_start(tp)) {
+		acked = tcp_slow_start(tp, acked);
+		if (!acked)
+			return;
+	}
+
+	if (ca->cwnd_growth_factor < 0) {
+		cnt = tp->snd_cwnd << -ca->cwnd_growth_factor;
+		tcp_cong_avoid_ai(tp, cnt, acked);
+	} else {
+		cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor);
+		tcp_cong_avoid_ai(tp, cnt, acked);
+	}
+}
+
+static u32 tcpnv_recalc_ssthresh(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct tcpnv *ca = inet_csk_ca(sk);
+
+	ca->loss_cwnd = tp->snd_cwnd;
+	return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
+}
+
+static u32 tcpnv_undo_cwnd(struct sock *sk)
+{
+	struct tcpnv *ca = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
+static void tcpnv_state(struct sock *sk, u8 new_state)
+{
+	struct tcpnv *ca = inet_csk_ca(sk);
+
+	if (new_state == TCP_CA_Open && ca->nv_reset) {
+		tcpnv_reset(ca, sk);
+	} else if (new_state == TCP_CA_Loss || new_state == TCP_CA_CWR ||
+		new_state == TCP_CA_Recovery) {
+		ca->nv_reset = 1;
+		ca->nv_allow_cwnd_growth = 0;
+		if (new_state == TCP_CA_Loss) {
+			/* Reset cwnd growth factor to Reno value */
+			if (ca->cwnd_growth_factor > 0)
+				ca->cwnd_growth_factor = 0;
+			/* Decrease growth rate if allowed */
+			if (nv_cwnd_growth_rate_neg > 0 &&
+			    ca->cwnd_growth_factor > -8)
+				ca->cwnd_growth_factor--;
+		}
+	}
+}
+
+/* Do congestion avoidance calculations for TCP-NV
+ */
+static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcpnv *ca = inet_csk_ca(sk);
+	unsigned long now = jiffies;
+	s64 rate64 = 0;
+	u32 rate, max_win, cwnd_by_slope;
+	u32 avg_rtt;
+	u32 bytes_acked = 0;
+
+	/* Some calls are for duplicates without timetamps */
+	if (sample->rtt_us < 0)
+		return;
+
+	/* If not in TCP_CA_Open or TCP_CA_Disorder states, skip. */
+	if (icsk->icsk_ca_state != TCP_CA_Open &&
+	    icsk->icsk_ca_state != TCP_CA_Disorder)
+		return;
+
+	/* Stop cwnd growth if we were in catch up mode */
+	if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) {
+		ca->nv_catchup = 0;
+		ca->nv_allow_cwnd_growth = 0;
+	}
+
+	bytes_acked = tp->snd_una - ca->nv_last_snd_una;
+	ca->nv_last_snd_una = tp->snd_una;
+
+	if (sample->in_flight == 0)
+		return;
+
+	/* Calculate moving average of RTT */
+	if (nv_rtt_factor > 0) {
+		if (ca->nv_last_rtt > 0) {
+			avg_rtt = (((u64)sample->rtt_us) * nv_rtt_factor +
+				   ((u64)ca->nv_last_rtt)
+				   * (256 - nv_rtt_factor)) >> 8;
+		} else {
+			avg_rtt = sample->rtt_us;
+			ca->nv_min_rtt = avg_rtt << 1;
+		}
+		ca->nv_last_rtt = avg_rtt;
+	} else {
+		avg_rtt = sample->rtt_us;
+	}
+
+	/* rate in 100's bits per second */
+	rate64 = ((u64)sample->in_flight) * 8000000;
+	rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100));
+
+	/* Remember the maximum rate seen during this RTT
+	 * Note: It may be more than one RTT. This function should be
+	 *       called at least nv_dec_eval_min_calls times.
+	 */
+	if (ca->nv_rtt_max_rate < rate)
+		ca->nv_rtt_max_rate = rate;
+
+	/* We have valid information, increment counter */
+	if (ca->nv_eval_call_cnt < 255)
+		ca->nv_eval_call_cnt++;
+
+	/* update min rtt if necessary */
+	if (avg_rtt < ca->nv_min_rtt)
+		ca->nv_min_rtt = avg_rtt;
+
+	/* update future min_rtt if necessary */
+	if (avg_rtt < ca->nv_min_rtt_new)
+		ca->nv_min_rtt_new = avg_rtt;
+
+	/* nv_min_rtt is updated with the minimum (possibley averaged) rtt
+	 * seen in the last sysctl_tcp_nv_reset_period seconds (i.e. a
+	 * warm reset). This new nv_min_rtt will be continued to be updated
+	 * and be used for another sysctl_tcp_nv_reset_period seconds,
+	 * when it will be updated again.
+	 * In practice we introduce some randomness, so the actual period used
+	 * is chosen randomly from the range:
+	 *   [sysctl_tcp_nv_reset_period*3/4, sysctl_tcp_nv_reset_period*5/4)
+	 */
+	if (time_after_eq(now, ca->nv_min_rtt_reset_jiffies)) {
+		unsigned char rand;
+
+		ca->nv_min_rtt = ca->nv_min_rtt_new;
+		ca->nv_min_rtt_new = NV_INIT_RTT;
+		get_random_bytes(&rand, 1);
+		ca->nv_min_rtt_reset_jiffies =
+			now + ((nv_reset_period * (384 + rand) * HZ) >> 9);
+		/* Every so often we decrease ca->nv_min_cwnd in case previous
+		 *  value is no longer accurate.
+		 */
+		ca->nv_min_cwnd = max(ca->nv_min_cwnd / 2, NV_MIN_CWND);
+	}
+
+	/* Once per RTT check if we need to do congestion avoidance */
+	if (before(ca->nv_rtt_start_seq, tp->snd_una)) {
+		ca->nv_rtt_start_seq = tp->snd_nxt;
+		if (ca->nv_rtt_cnt < 0xff)
+			/* Increase counter for RTTs without CA decision */
+			ca->nv_rtt_cnt++;
+
+		/* If this function is only called once within an RTT
+		 * the cwnd is probably too small (in some cases due to
+		 * tso, lro or interrupt coalescence), so we increase
+		 * ca->nv_min_cwnd.
+		 */
+		if (ca->nv_eval_call_cnt == 1 &&
+		    bytes_acked >= (ca->nv_min_cwnd - 1) * tp->mss_cache &&
+		    ca->nv_min_cwnd < (NV_TSO_CWND_BOUND + 1)) {
+			ca->nv_min_cwnd = min(ca->nv_min_cwnd
+					      + NV_MIN_CWND_GROW,
+					      NV_TSO_CWND_BOUND + 1);
+			ca->nv_rtt_start_seq = tp->snd_nxt +
+				ca->nv_min_cwnd * tp->mss_cache;
+			ca->nv_eval_call_cnt = 0;
+			ca->nv_allow_cwnd_growth = 1;
+			return;
+		}
+
+		/* Find the ideal cwnd for current rate from slope
+		 * slope = 80000.0 * mss / nv_min_rtt
+		 * cwnd_by_slope = nv_rtt_max_rate / slope
+		 */
+		cwnd_by_slope = (u32)
+			div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt,
+				  (u64)(80000 * tp->mss_cache));
+		max_win = cwnd_by_slope + nv_pad;
+
+		/* If cwnd > max_win, decrease cwnd
+		 * if cwnd < max_win, grow cwnd
+		 * else leave the same
+		 */
+		if (tp->snd_cwnd > max_win) {
+			/* there is congestion, check that it is ok
+			 * to make a CA decision
+			 * 1. We should have at least nv_dec_eval_min_calls
+			 *    data points before making a CA  decision
+			 * 2. We only make a congesion decision after
+			 *    nv_rtt_min_cnt RTTs
+			 */
+			if (ca->nv_rtt_cnt < nv_rtt_min_cnt) {
+				return;
+			} else if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) {
+				if (ca->nv_eval_call_cnt <
+				    nv_ssthresh_eval_min_calls)
+					return;
+				/* otherwise we will decrease cwnd */
+			} else if (ca->nv_eval_call_cnt <
+				   nv_dec_eval_min_calls) {
+				if (ca->nv_allow_cwnd_growth &&
+				    ca->nv_rtt_cnt > nv_stop_rtt_cnt)
+					ca->nv_allow_cwnd_growth = 0;
+				return;
+			}
+
+			/* We have enough data to determine we are congested */
+			ca->nv_allow_cwnd_growth = 0;
+			tp->snd_ssthresh =
+				(nv_ssthresh_factor * max_win) >> 3;
+			if (tp->snd_cwnd - max_win > 2) {
+				/* gap > 2, we do exponential cwnd decrease */
+				int dec;
+
+				dec = max(2U, ((tp->snd_cwnd - max_win) *
+					       nv_cong_dec_mult) >> 7);
+				tp->snd_cwnd -= dec;
+			} else if (nv_cong_dec_mult > 0) {
+				tp->snd_cwnd = max_win;
+			}
+			if (ca->cwnd_growth_factor > 0)
+				ca->cwnd_growth_factor = 0;
+			ca->nv_no_cong_cnt = 0;
+		} else if (tp->snd_cwnd <= max_win - nv_pad_buffer) {
+			/* There is no congestion, grow cwnd if allowed*/
+			if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
+				return;
+
+			ca->nv_allow_cwnd_growth = 1;
+			ca->nv_no_cong_cnt++;
+			if (ca->cwnd_growth_factor < 0 &&
+			    nv_cwnd_growth_rate_neg > 0 &&
+			    ca->nv_no_cong_cnt > nv_cwnd_growth_rate_neg) {
+				ca->cwnd_growth_factor++;
+				ca->nv_no_cong_cnt = 0;
+			} else if (ca->cwnd_growth_factor >= 0 &&
+				   nv_cwnd_growth_rate_pos > 0 &&
+				   ca->nv_no_cong_cnt >
+				   nv_cwnd_growth_rate_pos) {
+				ca->cwnd_growth_factor++;
+				ca->nv_no_cong_cnt = 0;
+			}
+		} else {
+			/* cwnd is in-between, so do nothing */
+			return;
+		}
+
+		/* update state */
+		ca->nv_eval_call_cnt = 0;
+		ca->nv_rtt_cnt = 0;
+		ca->nv_rtt_max_rate = 0;
+
+		/* Don't want to make cwnd < nv_min_cwnd
+		 * (it wasn't before, if it is now is because nv
+		 *  decreased it).
+		 */
+		if (tp->snd_cwnd < nv_min_cwnd)
+			tp->snd_cwnd = nv_min_cwnd;
+	}
+}
+
+/* Extract info for Tcp socket info provided via netlink */
+size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
+		      union tcp_cc_info *info)
+{
+	const struct tcpnv *ca = inet_csk_ca(sk);
+
+	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+		info->vegas.tcpv_enabled = 1;
+		info->vegas.tcpv_rttcnt = ca->nv_rtt_cnt;
+		info->vegas.tcpv_rtt = ca->nv_last_rtt;
+		info->vegas.tcpv_minrtt = ca->nv_min_rtt;
+
+		*attr = INET_DIAG_VEGASINFO;
+		return sizeof(struct tcpvegas_info);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tcpnv_get_info);
+
+static struct tcp_congestion_ops tcpnv __read_mostly = {
+	.init		= tcpnv_init,
+	.ssthresh	= tcpnv_recalc_ssthresh,
+	.cong_avoid	= tcpnv_cong_avoid,
+	.set_state	= tcpnv_state,
+	.undo_cwnd	= tcpnv_undo_cwnd,
+	.pkts_acked     = tcpnv_acked,
+	.get_info	= tcpnv_get_info,
+
+	.owner		= THIS_MODULE,
+	.name		= "nv",
+};
+
+static int __init tcpnv_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct tcpnv) > ICSK_CA_PRIV_SIZE);
+
+	return tcp_register_congestion_control(&tcpnv);
+}
+
+static void __exit tcpnv_unregister(void)
+{
+	tcp_unregister_congestion_control(&tcpnv);
+}
+
+module_init(tcpnv_register);
+module_exit(tcpnv_unregister);
+
+MODULE_AUTHOR("Lawrence Brakmo");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP NV");
+MODULE_VERSION("1.0");
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8bd9911fdd16..b26aa870adc0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -911,9 +911,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	int err;
 
 	BUG_ON(!skb || !tcp_skb_pcount(skb));
+	tp = tcp_sk(sk);
 
 	if (clone_it) {
 		skb_mstamp_get(&skb->skb_mstamp);
+		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
+			- tp->snd_una;
 
 		if (unlikely(skb_cloned(skb)))
 			skb = pskb_copy(skb, gfp_mask);
@@ -924,7 +927,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	}
 
 	inet = inet_sk(sk);
-	tp = tcp_sk(sk);
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
@@ -2751,7 +2753,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	struct sk_buff *hole = NULL;
-	u32 last_lost;
+	u32 max_segs, last_lost;
 	int mib_idx;
 	int fwd_rexmitting = 0;
 
@@ -2771,6 +2773,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		last_lost = tp->snd_una;
 	}
 
+	max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
 	tcp_for_write_queue_from(skb, sk) {
 		__u8 sacked = TCP_SKB_CB(skb)->sacked;
 		int segs;
@@ -2784,6 +2787,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
 		if (segs <= 0)
 			return;
+		/* In case tcp_shift_skb_data() have aggregated large skbs,
+		 * we need to make sure not sending too bigs TSO packets
+		 */
+		segs = min_t(int, segs, max_segs);
 
 		if (fwd_rexmitting) {
 begin_fwd:
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index debdd8b33e69..d84930b2dd95 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -24,6 +24,13 @@
 
 int sysctl_tcp_thin_linear_timeouts __read_mostly;
 
+/**
+ *  tcp_write_err() - close socket and save error info
+ *  @sk:  The socket the error has appeared on.
+ *
+ *  Returns: Nothing (void)
+ */
+
 static void tcp_write_err(struct sock *sk)
 {
 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
@@ -33,16 +40,21 @@ static void tcp_write_err(struct sock *sk)
 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
 }
 
-/* Do not allow orphaned sockets to eat all our resources.
- * This is direct violation of TCP specs, but it is required
- * to prevent DoS attacks. It is called when a retransmission timeout
- * or zero probe timeout occurs on orphaned socket.
+/**
+ *  tcp_out_of_resources() - Close socket if out of resources
+ *  @sk:        pointer to current socket
+ *  @do_reset:  send a last packet with reset flag
  *
- * Criteria is still not confirmed experimentally and may change.
- * We kill the socket, if:
- * 1. If number of orphaned sockets exceeds an administratively configured
- *    limit.
- * 2. If we have strong memory pressure.
+ *  Do not allow orphaned sockets to eat all our resources.
+ *  This is direct violation of TCP specs, but it is required
+ *  to prevent DoS attacks. It is called when a retransmission timeout
+ *  or zero probe timeout occurs on orphaned socket.
+ *
+ *  Criteria is still not confirmed experimentally and may change.
+ *  We kill the socket, if:
+ *  1. If number of orphaned sockets exceeds an administratively configured
+ *     limit.
+ *  2. If we have strong memory pressure.
  */
 static int tcp_out_of_resources(struct sock *sk, bool do_reset)
 {
@@ -74,7 +86,11 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
 	return 0;
 }
 
-/* Calculate maximal number or retries on an orphaned socket. */
+/**
+ *  tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket
+ *  @sk:    Pointer to the current socket.
+ *  @alive: bool, socket alive state
+ */
 static int tcp_orphan_retries(struct sock *sk, bool alive)
 {
 	int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
@@ -115,10 +131,22 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 	}
 }
 
-/* This function calculates a "timeout" which is equivalent to the timeout of a
- * TCP connection after "boundary" unsuccessful, exponentially backed-off
+
+/**
+ *  retransmits_timed_out() - returns true if this connection has timed out
+ *  @sk:       The current socket
+ *  @boundary: max number of retransmissions
+ *  @timeout:  A custom timeout value.
+ *             If set to 0 the default timeout is calculated and used.
+ *             Using TCP_RTO_MIN and the number of unsuccessful retransmits.
+ *  @syn_set:  true if the SYN Bit was set.
+ *
+ * The default "timeout" value this function can calculate and use
+ * is equivalent to the timeout of a TCP Connection
+ * after "boundary" unsuccessful, exponentially backed-off
  * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
  * syn_set flag is set.
+ *
  */
 static bool retransmits_timed_out(struct sock *sk,
 				  unsigned int boundary,
@@ -257,6 +285,16 @@ out:
 		sk_mem_reclaim(sk);
 }
 
+
+/**
+ *  tcp_delack_timer() - The TCP delayed ACK timeout handler
+ *  @data:  Pointer to the current socket. (gets casted to struct sock *)
+ *
+ *  This function gets (indirectly) called when the kernel timer for a TCP packet
+ *  of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
+ *
+ *  Returns: Nothing (void)
+ */
 static void tcp_delack_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
@@ -350,10 +388,18 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
 			  TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
 }
 
-/*
- *	The TCP retransmit timer.
- */
 
+/**
+ *  tcp_retransmit_timer() - The TCP retransmit timeout handler
+ *  @sk:  Pointer to the current socket.
+ *
+ *  This function gets called when the kernel timer for a TCP packet
+ *  of this socket expires.
+ *
+ *  It handles retransmission, timer adjustment and other necesarry measures.
+ *
+ *  Returns: Nothing (void)
+ */
 void tcp_retransmit_timer(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -494,7 +540,8 @@ out_reset_timer:
 out:;
 }
 
-/* Called with BH disabled */
+/* Called with bottom-half processing disabled.
+   Called by tcp_write_timer() */
 void tcp_write_timer_handler(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -539,7 +586,7 @@ static void tcp_write_timer(unsigned long data)
 	if (!sock_owned_by_user(sk)) {
 		tcp_write_timer_handler(sk);
 	} else {
-		/* deleguate our work to tcp_release_cb() */
+		/* delegate our work to tcp_release_cb() */
 		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
 			sock_hold(sk);
 	}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 0d0171830620..ec35eaa5c029 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/mpls.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
@@ -16,11 +17,14 @@
 
 static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
 static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnelmpls4_handlers __read_mostly;
 static DEFINE_MUTEX(tunnel4_mutex);
 
 static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
 {
-	return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
+	return (family == AF_INET) ? &tunnel4_handlers :
+		(family == AF_INET6) ? &tunnel64_handlers :
+		&tunnelmpls4_handlers;
 }
 
 int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
@@ -125,6 +129,26 @@ drop:
 }
 #endif
 
+#if IS_ENABLED(CONFIG_MPLS)
+static int tunnelmpls4_rcv(struct sk_buff *skb)
+{
+	struct xfrm_tunnel *handler;
+
+	if (!pskb_may_pull(skb, sizeof(struct mpls_label)))
+		goto drop;
+
+	for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+		if (!handler->handler(skb))
+			return 0;
+
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+#endif
+
 static void tunnel4_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm_tunnel *handler;
@@ -145,6 +169,17 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_MPLS)
+static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
+{
+	struct xfrm_tunnel *handler;
+
+	for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+		if (!handler->err_handler(skb, info))
+			break;
+}
+#endif
+
 static const struct net_protocol tunnel4_protocol = {
 	.handler	=	tunnel4_rcv,
 	.err_handler	=	tunnel4_err,
@@ -161,24 +196,47 @@ static const struct net_protocol tunnel64_protocol = {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct net_protocol tunnelmpls4_protocol = {
+	.handler	=	tunnelmpls4_rcv,
+	.err_handler	=	tunnelmpls4_err,
+	.no_policy	=	1,
+	.netns_ok	=	1,
+};
+#endif
+
 static int __init tunnel4_init(void)
 {
-	if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) {
-		pr_err("%s: can't add protocol\n", __func__);
-		return -EAGAIN;
-	}
+	if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP))
+		goto err;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
-		pr_err("tunnel64 init: can't add protocol\n");
 		inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
-		return -EAGAIN;
+		goto err;
+	}
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+	if (inet_add_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS)) {
+		inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
+#if IS_ENABLED(CONFIG_IPV6)
+		inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6);
+#endif
+		goto err;
 	}
 #endif
 	return 0;
+
+err:
+	pr_err("%s: can't add protocol\n", __func__);
+	return -EAGAIN;
 }
 
 static void __exit tunnel4_fini(void)
 {
+#if IS_ENABLED(CONFIG_MPLS)
+	if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS))
+		pr_err("tunnelmpls4 close: can't remove protocol\n");
+#endif
 #if IS_ENABLED(CONFIG_IPV6)
 	if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
 		pr_err("tunnel64 close: can't remove protocol\n");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0ff31d97d485..e61f7cd65d08 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-				__be32 saddr, unsigned short hnum, __be16 sport,
-				__be32 daddr, __be16 dport, int dif)
+static int compute_score(struct sock *sk, struct net *net,
+			 __be32 saddr, __be16 sport,
+			 __be32 daddr, unsigned short hnum, int dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	return score;
 }
 
-/*
- * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
- */
-static inline int compute_score2(struct sock *sk, struct net *net,
-				 __be32 saddr, __be16 sport,
-				 __be32 daddr, unsigned int hnum, int dif)
-{
-	int score;
-	struct inet_sock *inet;
-
-	if (!net_eq(sock_net(sk), net) ||
-	    ipv6_only_sock(sk))
-		return -1;
-
-	inet = inet_sk(sk);
-
-	if (inet->inet_rcv_saddr != daddr ||
-	    inet->inet_num != hnum)
-		return -1;
-
-	score = (sk->sk_family == PF_INET) ? 2 : 1;
-
-	if (inet->inet_daddr) {
-		if (inet->inet_daddr != saddr)
-			return -1;
-		score += 4;
-	}
-
-	if (inet->inet_dport) {
-		if (inet->inet_dport != sport)
-			return -1;
-		score += 4;
-	}
-
-	if (sk->sk_bound_dev_if) {
-		if (sk->sk_bound_dev_if != dif)
-			return -1;
-		score += 4;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
-		score++;
-
-	return score;
-}
-
 static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 		       const __u16 lport, const __be32 faddr,
 		       const __be16 fport)
@@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 			      udp_ehash_secret + net_hash_mix(net));
 }
 
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
 		__be32 saddr, __be16 sport,
 		__be32 daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2,
+		struct udp_hslot *hslot2,
 		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
@@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 	result = NULL;
 	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		score = compute_score2(sk, net, saddr, sport,
+		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
@@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 
 		result = udp4_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2, skb);
+					  hslot2, skb);
 		if (!result) {
+			unsigned int old_slot2 = slot2;
 			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
 			slot2 = hash2 & udptable->mask;
+			/* avoid searching the same slot again. */
+			if (unlikely(slot2 == old_slot2))
+				return result;
+
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp4_lib_lookup2(net, saddr, sport,
-						  htonl(INADDR_ANY), hnum, dif,
-						  hslot2, slot2, skb);
+						  daddr, hnum, dif,
+						  hslot2, skb);
 		}
 		return result;
 	}
@@ -572,8 +531,8 @@ begin:
 	result = NULL;
 	badness = 0;
 	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, saddr, hnum, sport,
-				      daddr, dport, dif);
+		score = compute_score(sk, net, saddr, sport,
+				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -1622,7 +1581,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	    udp_lib_checksum_complete(skb))
 			goto csum_error;
 
-	if (sk_filter(sk, skb))
+	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
 		goto drop;
 
 	udp_csum_pull_header(skb);
@@ -1755,8 +1714,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 			return err;
 	}
 
-	return skb_checksum_init_zero_check(skb, proto, uh->check,
-					    inet_compute_pseudo);
+	/* Note, we are only interested in != 0 or == 0, thus the
+	 * force to int.
+	 */
+	return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+							 inet_compute_pseudo);
 }
 
 /*
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 47f12c73d959..58bd39fb14b4 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -76,6 +76,67 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 }
 EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
 
+void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type)
+{
+	struct sock *sk = sock->sk;
+	struct udp_tunnel_info ti;
+
+	if (!dev->netdev_ops->ndo_udp_tunnel_add)
+		return;
+
+	ti.type = type;
+	ti.sa_family = sk->sk_family;
+	ti.port = inet_sk(sk)->inet_sport;
+
+	dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
+
+/* Notify netdevs that UDP port started listening */
+void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
+{
+	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
+	struct udp_tunnel_info ti;
+	struct net_device *dev;
+
+	ti.type = type;
+	ti.sa_family = sk->sk_family;
+	ti.port = inet_sk(sk)->inet_sport;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
+		if (!dev->netdev_ops->ndo_udp_tunnel_add)
+			continue;
+		dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port);
+
+/* Notify netdevs that UDP port is no more listening */
+void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
+{
+	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
+	struct udp_tunnel_info ti;
+	struct net_device *dev;
+
+	ti.type = type;
+	ti.sa_family = sk->sk_family;
+	ti.port = inet_sk(sk)->inet_sport;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
+		if (!dev->netdev_ops->ndo_udp_tunnel_del)
+			continue;
+		dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port);
+
 void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
 			 __be16 df, __be16 src_port, __be16 dst_port,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7b0edb37a115..b644a23c3db0 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -295,7 +295,7 @@ static struct ctl_table xfrm4_policy_table[] = {
 	{ }
 };
 
-static int __net_init xfrm4_net_sysctl_init(struct net *net)
+static __net_init int xfrm4_net_sysctl_init(struct net *net)
 {
 	struct ctl_table *table;
 	struct ctl_table_header *hdr;
@@ -323,7 +323,7 @@ err_alloc:
 	return -ENOMEM;
 }
 
-static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
+static __net_exit void xfrm4_net_sysctl_exit(struct net *net)
 {
 	struct ctl_table *table;
 
@@ -336,12 +336,12 @@ static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
 		kfree(table);
 }
 #else /* CONFIG_SYSCTL */
-static int inline xfrm4_net_sysctl_init(struct net *net)
+static inline int xfrm4_net_sysctl_init(struct net *net)
 {
 	return 0;
 }
 
-static void inline xfrm4_net_sysctl_exit(struct net *net)
+static inline void xfrm4_net_sysctl_exit(struct net *net)
 {
 }
 #endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 47f837a58e0a..6287a8b9f428 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -547,7 +547,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+	skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL);
 	if (!skb)
 		goto errout;
 
@@ -559,7 +559,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL);
 	return;
 errout:
 	rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
@@ -1524,6 +1524,28 @@ out:
 	return hiscore_idx;
 }
 
+static int ipv6_get_saddr_master(struct net *net,
+				 const struct net_device *dst_dev,
+				 const struct net_device *master,
+				 struct ipv6_saddr_dst *dst,
+				 struct ipv6_saddr_score *scores,
+				 int hiscore_idx)
+{
+	struct inet6_dev *idev;
+
+	idev = __in6_dev_get(dst_dev);
+	if (idev)
+		hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
+						   scores, hiscore_idx);
+
+	idev = __in6_dev_get(master);
+	if (idev)
+		hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
+						   scores, hiscore_idx);
+
+	return hiscore_idx;
+}
+
 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 		       const struct in6_addr *daddr, unsigned int prefs,
 		       struct in6_addr *saddr)
@@ -1577,13 +1599,39 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 		if (idev)
 			hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
 	} else {
+		const struct net_device *master;
+		int master_idx = 0;
+
+		/* if dst_dev exists and is enslaved to an L3 device, then
+		 * prefer addresses from dst_dev and then the master over
+		 * any other enslaved devices in the L3 domain.
+		 */
+		master = l3mdev_master_dev_rcu(dst_dev);
+		if (master) {
+			master_idx = master->ifindex;
+
+			hiscore_idx = ipv6_get_saddr_master(net, dst_dev,
+							    master, &dst,
+							    scores, hiscore_idx);
+
+			if (scores[hiscore_idx].ifa)
+				goto out;
+		}
+
 		for_each_netdev_rcu(net, dev) {
+			/* only consider addresses on devices in the
+			 * same L3 domain
+			 */
+			if (l3mdev_master_ifindex_rcu(dev) != master_idx)
+				continue;
 			idev = __in6_dev_get(dev);
 			if (!idev)
 				continue;
 			hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
 		}
 	}
+
+out:
 	rcu_read_unlock();
 
 	hiscore = &scores[hiscore_idx];
@@ -2254,7 +2302,7 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
 		return ERR_PTR(-EACCES);
 
 	/* Add default multicast route */
-	if (!(dev->flags & IFF_LOOPBACK))
+	if (!(dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev))
 		addrconf_add_mroute(dev);
 
 	return idev;
@@ -2333,12 +2381,109 @@ static bool is_addr_mode_generate_stable(struct inet6_dev *idev)
 	       idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM;
 }
 
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+				 const struct prefix_info *pinfo,
+				 struct inet6_dev *in6_dev,
+				 const struct in6_addr *addr, int addr_type,
+				 u32 addr_flags, bool sllao, bool tokenized,
+				 __u32 valid_lft, u32 prefered_lft)
+{
+	struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
+	int create = 0, update_lft = 0;
+
+	if (!ifp && valid_lft) {
+		int max_addresses = in6_dev->cnf.max_addresses;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+		if (in6_dev->cnf.optimistic_dad &&
+		    !net->ipv6.devconf_all->forwarding && sllao)
+			addr_flags |= IFA_F_OPTIMISTIC;
+#endif
+
+		/* Do not allow to create too much of autoconfigured
+		 * addresses; this would be too easy way to crash kernel.
+		 */
+		if (!max_addresses ||
+		    ipv6_count_addresses(in6_dev) < max_addresses)
+			ifp = ipv6_add_addr(in6_dev, addr, NULL,
+					    pinfo->prefix_len,
+					    addr_type&IPV6_ADDR_SCOPE_MASK,
+					    addr_flags, valid_lft,
+					    prefered_lft);
+
+		if (IS_ERR_OR_NULL(ifp))
+			return -1;
+
+		update_lft = 0;
+		create = 1;
+		spin_lock_bh(&ifp->lock);
+		ifp->flags |= IFA_F_MANAGETEMPADDR;
+		ifp->cstamp = jiffies;
+		ifp->tokenized = tokenized;
+		spin_unlock_bh(&ifp->lock);
+		addrconf_dad_start(ifp);
+	}
+
+	if (ifp) {
+		u32 flags;
+		unsigned long now;
+		u32 stored_lft;
+
+		/* update lifetime (RFC2462 5.5.3 e) */
+		spin_lock_bh(&ifp->lock);
+		now = jiffies;
+		if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+			stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+		else
+			stored_lft = 0;
+		if (!update_lft && !create && stored_lft) {
+			const u32 minimum_lft = min_t(u32,
+				stored_lft, MIN_VALID_LIFETIME);
+			valid_lft = max(valid_lft, minimum_lft);
+
+			/* RFC4862 Section 5.5.3e:
+			 * "Note that the preferred lifetime of the
+			 *  corresponding address is always reset to
+			 *  the Preferred Lifetime in the received
+			 *  Prefix Information option, regardless of
+			 *  whether the valid lifetime is also reset or
+			 *  ignored."
+			 *
+			 * So we should always update prefered_lft here.
+			 */
+			update_lft = 1;
+		}
+
+		if (update_lft) {
+			ifp->valid_lft = valid_lft;
+			ifp->prefered_lft = prefered_lft;
+			ifp->tstamp = now;
+			flags = ifp->flags;
+			ifp->flags &= ~IFA_F_DEPRECATED;
+			spin_unlock_bh(&ifp->lock);
+
+			if (!(flags&IFA_F_TENTATIVE))
+				ipv6_ifa_notify(0, ifp);
+		} else
+			spin_unlock_bh(&ifp->lock);
+
+		manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
+				 create, now);
+
+		in6_ifa_put(ifp);
+		addrconf_verify();
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
+
 void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 {
 	struct prefix_info *pinfo;
 	__u32 valid_lft;
 	__u32 prefered_lft;
-	int addr_type;
+	int addr_type, err;
 	u32 addr_flags = 0;
 	struct inet6_dev *in6_dev;
 	struct net *net = dev_net(dev);
@@ -2432,10 +2577,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 	/* Try to figure out our local address for this prefix */
 
 	if (pinfo->autoconf && in6_dev->cnf.autoconf) {
-		struct inet6_ifaddr *ifp;
 		struct in6_addr addr;
-		int create = 0, update_lft = 0;
-		bool tokenized = false;
+		bool tokenized = false, dev_addr_generated = false;
 
 		if (pinfo->prefix_len == 64) {
 			memcpy(&addr, &pinfo->prefix, 8);
@@ -2453,106 +2596,36 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				goto ok;
 			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
 				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
-				in6_dev_put(in6_dev);
-				return;
+				goto put;
+			} else {
+				dev_addr_generated = true;
 			}
 			goto ok;
 		}
 		net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n",
 				    pinfo->prefix_len);
-		in6_dev_put(in6_dev);
-		return;
+		goto put;
 
 ok:
+		err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+						   &addr, addr_type,
+						   addr_flags, sllao,
+						   tokenized, valid_lft,
+						   prefered_lft);
+		if (err)
+			goto put;
 
-		ifp = ipv6_get_ifaddr(net, &addr, dev, 1);
-
-		if (!ifp && valid_lft) {
-			int max_addresses = in6_dev->cnf.max_addresses;
-
-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-			if (in6_dev->cnf.optimistic_dad &&
-			    !net->ipv6.devconf_all->forwarding && sllao)
-				addr_flags |= IFA_F_OPTIMISTIC;
-#endif
-
-			/* Do not allow to create too much of autoconfigured
-			 * addresses; this would be too easy way to crash kernel.
-			 */
-			if (!max_addresses ||
-			    ipv6_count_addresses(in6_dev) < max_addresses)
-				ifp = ipv6_add_addr(in6_dev, &addr, NULL,
-						    pinfo->prefix_len,
-						    addr_type&IPV6_ADDR_SCOPE_MASK,
-						    addr_flags, valid_lft,
-						    prefered_lft);
-
-			if (IS_ERR_OR_NULL(ifp)) {
-				in6_dev_put(in6_dev);
-				return;
-			}
-
-			update_lft = 0;
-			create = 1;
-			spin_lock_bh(&ifp->lock);
-			ifp->flags |= IFA_F_MANAGETEMPADDR;
-			ifp->cstamp = jiffies;
-			ifp->tokenized = tokenized;
-			spin_unlock_bh(&ifp->lock);
-			addrconf_dad_start(ifp);
-		}
-
-		if (ifp) {
-			u32 flags;
-			unsigned long now;
-			u32 stored_lft;
-
-			/* update lifetime (RFC2462 5.5.3 e) */
-			spin_lock_bh(&ifp->lock);
-			now = jiffies;
-			if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
-				stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
-			else
-				stored_lft = 0;
-			if (!update_lft && !create && stored_lft) {
-				const u32 minimum_lft = min_t(u32,
-					stored_lft, MIN_VALID_LIFETIME);
-				valid_lft = max(valid_lft, minimum_lft);
-
-				/* RFC4862 Section 5.5.3e:
-				 * "Note that the preferred lifetime of the
-				 *  corresponding address is always reset to
-				 *  the Preferred Lifetime in the received
-				 *  Prefix Information option, regardless of
-				 *  whether the valid lifetime is also reset or
-				 *  ignored."
-				 *
-				 * So we should always update prefered_lft here.
-				 */
-				update_lft = 1;
-			}
-
-			if (update_lft) {
-				ifp->valid_lft = valid_lft;
-				ifp->prefered_lft = prefered_lft;
-				ifp->tstamp = now;
-				flags = ifp->flags;
-				ifp->flags &= ~IFA_F_DEPRECATED;
-				spin_unlock_bh(&ifp->lock);
-
-				if (!(flags&IFA_F_TENTATIVE))
-					ipv6_ifa_notify(0, ifp);
-			} else
-				spin_unlock_bh(&ifp->lock);
-
-			manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
-					 create, now);
-
-			in6_ifa_put(ifp);
-			addrconf_verify();
-		}
+		/* Ignore error case here because previous prefix add addr was
+		 * successful which will be notified.
+		 */
+		ndisc_ops_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, &addr,
+					      addr_type, addr_flags, sllao,
+					      tokenized, valid_lft,
+					      prefered_lft,
+					      dev_addr_generated);
 	}
 	inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+put:
 	in6_dev_put(in6_dev);
 }
 
@@ -2947,8 +3020,8 @@ static void init_loopback(struct net_device *dev)
 	}
 }
 
-static void addrconf_add_linklocal(struct inet6_dev *idev,
-				   const struct in6_addr *addr, u32 flags)
+void addrconf_add_linklocal(struct inet6_dev *idev,
+			    const struct in6_addr *addr, u32 flags)
 {
 	struct inet6_ifaddr *ifp;
 	u32 addr_flags = flags | IFA_F_PERMANENT;
@@ -2967,6 +3040,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev,
 		in6_ifa_put(ifp);
 	}
 }
+EXPORT_SYMBOL_GPL(addrconf_add_linklocal);
 
 static bool ipv6_reserved_interfaceid(struct in6_addr address)
 {
@@ -3562,6 +3636,10 @@ restart:
 		if (state != INET6_IFADDR_STATE_DEAD) {
 			__ipv6_ifa_notify(RTM_DELADDR, ifa);
 			inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
+		} else {
+			if (idev->cnf.forwarding)
+				addrconf_leave_anycast(ifa);
+			addrconf_leave_solict(ifa->idev, &ifa->addr);
 		}
 
 		write_lock_bh(&idev->lock);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index bfa86f040c16..2076c21107d0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -92,6 +92,12 @@ MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
 module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
 MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
 
+bool ipv6_mod_enabled(void)
+{
+	return disable_ipv6_mod == 0;
+}
+EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
+
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 {
 	const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ed33abf57abd..5857c1fc8b67 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -67,6 +67,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 	struct net *net = rule->fr_net;
 	pol_lookup_t lookup = arg->lookup_ptr;
 	int err = 0;
+	u32 tb_id;
 
 	switch (rule->action) {
 	case FR_ACT_TO_TBL:
@@ -86,7 +87,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto discard_pkt;
 	}
 
-	table = fib6_get_table(net, rule->table);
+	tb_id = fib_rule_get_table(rule, arg);
+	table = fib6_get_table(net, tb_id);
 	if (!table) {
 		err = -EAGAIN;
 		goto out;
@@ -199,7 +201,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	struct net *net = sock_net(skb->sk);
 	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
 
-	if (rule->action == FR_ACT_TO_TBL) {
+	if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
 		if (rule->table == RT6_TABLE_UNSPEC)
 			goto errout;
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4527285fcaa2..bd59c343d35f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	if (!(type & ICMPV6_INFOMSG_MASK))
 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
-			ping_err(skb, offset, info);
+			ping_err(skb, offset, ntohl(info));
 }
 
 static int icmpv6_rcv(struct sk_buff *skb);
@@ -388,7 +388,8 @@ relookup_failed:
 /*
  *	Send an ICMP message in response to a packet in error
  */
-static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+		       const struct in6_addr *force_saddr)
 {
 	struct net *net = dev_net(skb->dev);
 	struct inet6_dev *idev = NULL;
@@ -475,6 +476,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_ICMPV6;
 	fl6.daddr = hdr->saddr;
+	if (force_saddr)
+		saddr = force_saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
 	fl6.flowi6_mark = mark;
@@ -502,12 +505,14 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
+	ipc6.tclass = np->tclass;
+	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 	if (IS_ERR(dst))
 		goto out;
 
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-	ipc6.tclass = np->tclass;
 	ipc6.dontfrag = np->dontfrag;
 	ipc6.opt = NULL;
 
@@ -549,10 +554,75 @@ out:
  */
 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
-	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
+	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
 	kfree_skb(skb);
 }
 
+/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
+ * if sufficient data bytes are available
+ * @nhs is the size of the tunnel header(s) :
+ *  Either an IPv4 header for SIT encap
+ *         an IPv4 header + GRE header for GRE encap
+ */
+int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
+			       unsigned int data_len)
+{
+	struct in6_addr temp_saddr;
+	struct rt6_info *rt;
+	struct sk_buff *skb2;
+	u32 info = 0;
+
+	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
+		return 1;
+
+	/* RFC 4884 (partial) support for ICMP extensions */
+	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
+		data_len = 0;
+
+	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
+
+	if (!skb2)
+		return 1;
+
+	skb_dst_drop(skb2);
+	skb_pull(skb2, nhs);
+	skb_reset_network_header(skb2);
+
+	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+
+	if (rt && rt->dst.dev)
+		skb2->dev = rt->dst.dev;
+
+	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
+
+	if (data_len) {
+		/* RFC 4884 (partial) support :
+		 * insert 0 padding at the end, before the extensions
+		 */
+		__skb_push(skb2, nhs);
+		skb_reset_network_header(skb2);
+		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
+		memset(skb2->data + data_len - nhs, 0, nhs);
+		/* RFC 4884 4.5 : Length is measured in 64-bit words,
+		 * and stored in reserved[0]
+		 */
+		info = (data_len/8) << 24;
+	}
+	if (type == ICMP_TIME_EXCEEDED)
+		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+			   info, &temp_saddr);
+	else
+		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
+			   info, &temp_saddr);
+	if (rt)
+		ip6_rt_put(rt);
+
+	kfree_skb(skb2);
+
+	return 0;
+}
+EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
+
 static void icmpv6_echo_reply(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
@@ -585,7 +655,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
-	fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
+	fl6.flowi6_oif = skb->dev->ifindex;
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	fl6.flowi6_mark = mark;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index d08fd2d48a78..e0170f62bc39 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -109,7 +109,8 @@ static inline bool ila_csum_neutral_set(struct ila_identifier ident)
 	return !!(ident.csum_neutral);
 }
 
-void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
+			     bool set_csum_neutral);
 
 void ila_init_saved_csum(struct ila_params *p);
 
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 0e94042d1289..ec9efbcdad35 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -34,12 +34,12 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
 	if (p->locator_match.v64) {
 		diff = p->csum_diff;
 	} else {
-		diff = compute_csum_diff8((__be32 *)iaddr,
-					  (__be32 *)&p->locator);
+		diff = compute_csum_diff8((__be32 *)&p->locator,
+					  (__be32 *)iaddr);
 	}
 
 	fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
-			~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG);
+			CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
 
 	diff = csum_add(diff, fval);
 
@@ -103,7 +103,8 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
 	iaddr->loc = p->locator;
 }
 
-void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
+			     bool set_csum_neutral)
 {
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
@@ -114,7 +115,8 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
 		 * is a locator being translated to a SIR address.
 		 * Perform (receiver) checksum-neutral translation.
 		 */
-		ila_csum_do_neutral(iaddr, p);
+		if (!set_csum_neutral)
+			ila_csum_do_neutral(iaddr, p);
 	} else {
 		switch (p->csum_mode) {
 		case ILA_CSUM_ADJUST_TRANSPORT:
@@ -138,8 +140,8 @@ void ila_init_saved_csum(struct ila_params *p)
 		return;
 
 	p->csum_diff = compute_csum_diff8(
-				(__be32 *)&p->locator_match,
-				(__be32 *)&p->locator);
+				(__be32 *)&p->locator,
+				(__be32 *)&p->locator_match);
 }
 
 static int __init ila_init(void)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 1dfb64166d7d..c8314c6b6154 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto drop;
 
-	ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+	ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true);
 
 	return dst->lwtstate->orig_output(net, sk, skb);
 
@@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb)
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto drop;
 
-	ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+	ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
 
 	return dst->lwtstate->orig_input(skb);
 
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index a90e57229c6c..e6eca5fdf4c9 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -210,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg)
 	}
 }
 
-static int ila_xlat_addr(struct sk_buff *skb);
+static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
 
 static unsigned int
 ila_nf_input(void *priv,
 	     struct sk_buff *skb,
 	     const struct nf_hook_state *state)
 {
-	ila_xlat_addr(skb);
+	ila_xlat_addr(skb, false);
 	return NF_ACCEPT;
 }
 
@@ -597,7 +597,7 @@ static struct pernet_operations ila_net_ops = {
 	.size = sizeof(struct ila_net),
 };
 
-static int ila_xlat_addr(struct sk_buff *skb)
+static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
 {
 	struct ila_map *ila;
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -616,7 +616,7 @@ static int ila_xlat_addr(struct sk_buff *skb)
 
 	ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
 	if (ila)
-		ila_update_ipv6_locator(skb, &ila->xp.ip);
+		ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
 
 	rcu_read_unlock();
 
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index b2025bf3da4a..c0cbcb259f5a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
 	 * we accept a checksum of zero here. When we find the socket
 	 * for the UDP packet we'll check if that socket allows zero checksum
 	 * for IPv6 (set by socket option).
+	 *
+	 * Note, we are only interested in != 0 or == 0, thus the
+	 * force to int.
 	 */
-	return skb_checksum_init_zero_check(skb, proto, uh->check,
-					   ip6_compute_pseudo);
+	return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+							 ip6_compute_pseudo);
 }
 EXPORT_SYMBOL(udp6_csum_init);
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1bcef2369d64..771be1fa4176 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 		}
 	}
 
+	free_percpu(non_pcpu_rt->rt6i_pcpu);
 	non_pcpu_rt->rt6i_pcpu = NULL;
 }
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index fdc9de276ab1..776d145113e1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb)
 	bool csum_err = false;
 	int hdr_len;
 
-	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6));
+	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0);
 	if (hdr_len < 0)
 		goto drop;
 
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 14dacc544c3e..713676f14a0e 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -39,7 +39,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 
 	if (!send)
 		goto out;
-	send(skb, type, code, info);
+	send(skb, type, code, info, NULL);
 out:
 	rcu_read_unlock();
 }
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 94611e450ec9..aacfb4bce153 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -323,6 +323,7 @@ int ip6_input(struct sk_buff *skb)
 		       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		       ip6_input_finish);
 }
+EXPORT_SYMBOL_GPL(ip6_input);
 
 int ip6_mc_input(struct sk_buff *skb)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 635b8d340cdb..1dfc402d9ad1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -368,7 +368,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->ignore_df)
 		return false;
 
-	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
 		return false;
 
 	return true;
@@ -910,6 +910,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 	int err;
 	int flags = 0;
 
+	if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
+	    (!*dst || !(*dst)->error)) {
+		err = l3mdev_get_saddr6(net, sk, fl6);
+		if (err)
+			goto out_err;
+	}
+
 	/* The correct way to handle this would be to do
 	 * ip6_route_get_saddr, and then ip6_route_output; however,
 	 * the route-specific preferred source forces the
@@ -999,10 +1006,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 	return 0;
 
 out_err_release:
-	if (err == -ENETUNREACH)
-		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 	dst_release(*dst);
 	*dst = NULL;
+out_err:
+	if (err == -ENETUNREACH)
+		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 	return err;
 }
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f2e2013f8346..6122f9c5cc49 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -921,6 +921,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
 				cache->mfc_un.res.maxvif = vifi + 1;
 		}
 	}
+	cache->mfc_un.res.lastuse = jiffies;
 }
 
 static int mif6_add(struct net *net, struct mr6_table *mrt,
@@ -1074,6 +1075,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (!c)
 		return NULL;
+	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 	c->mfc_un.res.minvif = MAXMIFS;
 	return c;
 }
@@ -1591,14 +1593,15 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 	if (likely(mrt->mroute6_sk == NULL)) {
 		mrt->mroute6_sk = sk;
 		net->ipv6.devconf_all->mc_forwarding++;
-		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
-					     NETCONFA_IFINDEX_ALL,
-					     net->ipv6.devconf_all);
-	}
-	else
+	} else {
 		err = -EADDRINUSE;
+	}
 	write_unlock_bh(&mrt_lock);
 
+	if (!err)
+		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+					     NETCONFA_IFINDEX_ALL,
+					     net->ipv6.devconf_all);
 	rtnl_unlock();
 
 	return err;
@@ -1616,11 +1619,11 @@ int ip6mr_sk_done(struct sock *sk)
 			write_lock_bh(&mrt_lock);
 			mrt->mroute6_sk = NULL;
 			net->ipv6.devconf_all->mc_forwarding--;
+			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net,
 						     NETCONFA_MC_FORWARDING,
 						     NETCONFA_IFINDEX_ALL,
 						     net->ipv6.devconf_all);
-			write_unlock_bh(&mrt_lock);
 
 			mroute_clean_tables(mrt, false);
 			err = 0;
@@ -2090,6 +2093,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 	vif = cache->mf6c_parent;
 	cache->mfc_un.res.pkt++;
 	cache->mfc_un.res.bytes += skb->len;
+	cache->mfc_un.res.lastuse = jiffies;
 
 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
 		struct mfc6_cache *cache_proxy;
@@ -2232,10 +2236,10 @@ int ip6_mr_input(struct sk_buff *skb)
 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 			       struct mfc6_cache *c, struct rtmsg *rtm)
 {
-	int ct;
-	struct rtnexthop *nhp;
-	struct nlattr *mp_attr;
 	struct rta_mfc_stats mfcs;
+	struct nlattr *mp_attr;
+	struct rtnexthop *nhp;
+	int ct;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
 	if (c->mf6c_parent >= MAXMIFS)
@@ -2268,7 +2272,10 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0)
+	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+	    nla_put_u64_64bit(skb, RTA_EXPIRES,
+			      jiffies_to_clock_t(c->mfc_un.res.lastuse),
+			      RTA_PAD))
 		return -EMSGSIZE;
 
 	rtm->rtm_type = RTN_MULTICAST;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c245895a3d41..fe65cdc28a45 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -73,15 +73,6 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 
-/* Set to 3 to get tracing... */
-#define ND_DEBUG 1
-
-#define ND_PRINTK(val, level, fmt, ...)				\
-do {								\
-	if (val <= ND_DEBUG)					\
-		net_##level##_ratelimited(fmt, ##__VA_ARGS__);	\
-} while (0)
-
 static u32 ndisc_hash(const void *pkey,
 		      const struct net_device *dev,
 		      __u32 *hash_rnd);
@@ -150,11 +141,10 @@ struct neigh_table nd_tbl = {
 };
 EXPORT_SYMBOL_GPL(nd_tbl);
 
-static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+			      int data_len, int pad)
 {
-	int pad   = ndisc_addr_option_pad(skb->dev->type);
-	int data_len = skb->dev->addr_len;
-	int space = ndisc_opt_addr_space(skb->dev);
+	int space = __ndisc_opt_addr_space(data_len, pad);
 	u8 *opt = skb_put(skb, space);
 
 	opt[0] = type;
@@ -171,6 +161,23 @@ static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
 	if (space > 0)
 		memset(opt, 0, space);
 }
+EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
+
+static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
+					  void *data, u8 icmp6_type)
+{
+	__ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
+				 ndisc_addr_option_pad(skb->dev->type));
+	ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
+}
+
+static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
+						   void *ha,
+						   const u8 *ops_data)
+{
+	ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
+	ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
+}
 
 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
 					    struct nd_opt_hdr *end)
@@ -185,24 +192,28 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
 	return cur <= end && cur->nd_opt_type == type ? cur : NULL;
 }
 
-static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
+static inline int ndisc_is_useropt(const struct net_device *dev,
+				   struct nd_opt_hdr *opt)
 {
 	return opt->nd_opt_type == ND_OPT_RDNSS ||
-		opt->nd_opt_type == ND_OPT_DNSSL;
+		opt->nd_opt_type == ND_OPT_DNSSL ||
+		ndisc_ops_is_useropt(dev, opt->nd_opt_type);
 }
 
-static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
+static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
+					     struct nd_opt_hdr *cur,
 					     struct nd_opt_hdr *end)
 {
 	if (!cur || !end || cur >= end)
 		return NULL;
 	do {
 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
-	} while (cur < end && !ndisc_is_useropt(cur));
-	return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
+	} while (cur < end && !ndisc_is_useropt(dev, cur));
+	return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
 }
 
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+					  u8 *opt, int opt_len,
 					  struct ndisc_options *ndopts)
 {
 	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
@@ -217,6 +228,8 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
 		l = nd_opt->nd_opt_len << 3;
 		if (opt_len < l || l == 0)
 			return NULL;
+		if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
+			goto next_opt;
 		switch (nd_opt->nd_opt_type) {
 		case ND_OPT_SOURCE_LL_ADDR:
 		case ND_OPT_TARGET_LL_ADDR:
@@ -243,7 +256,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
 			break;
 #endif
 		default:
-			if (ndisc_is_useropt(nd_opt)) {
+			if (ndisc_is_useropt(dev, nd_opt)) {
 				ndopts->nd_useropts_end = nd_opt;
 				if (!ndopts->nd_useropts)
 					ndopts->nd_useropts = nd_opt;
@@ -260,6 +273,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
 					  nd_opt->nd_opt_len);
 			}
 		}
+next_opt:
 		opt_len -= l;
 		nd_opt = ((void *)nd_opt) + l;
 	}
@@ -509,7 +523,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 	if (!dev->addr_len)
 		inc_opt = 0;
 	if (inc_opt)
-		optlen += ndisc_opt_addr_space(dev);
+		optlen += ndisc_opt_addr_space(dev,
+					       NDISC_NEIGHBOUR_ADVERTISEMENT);
 
 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
 	if (!skb)
@@ -528,8 +543,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 
 	if (inc_opt)
 		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
-				       dev->dev_addr);
-
+				       dev->dev_addr,
+				       NDISC_NEIGHBOUR_ADVERTISEMENT);
 
 	ndisc_send_skb(skb, daddr, src_addr);
 }
@@ -574,7 +589,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
 	if (ipv6_addr_any(saddr))
 		inc_opt = false;
 	if (inc_opt)
-		optlen += ndisc_opt_addr_space(dev);
+		optlen += ndisc_opt_addr_space(dev,
+					       NDISC_NEIGHBOUR_SOLICITATION);
 
 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
 	if (!skb)
@@ -590,7 +606,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
 
 	if (inc_opt)
 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
-				       dev->dev_addr);
+				       dev->dev_addr,
+				       NDISC_NEIGHBOUR_SOLICITATION);
 
 	ndisc_send_skb(skb, daddr, saddr);
 }
@@ -626,7 +643,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 	}
 #endif
 	if (send_sllao)
-		optlen += ndisc_opt_addr_space(dev);
+		optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
 
 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
 	if (!skb)
@@ -641,7 +658,8 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 
 	if (send_sllao)
 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
-				       dev->dev_addr);
+				       dev->dev_addr,
+				       NDISC_ROUTER_SOLICITATION);
 
 	ndisc_send_skb(skb, daddr, saddr);
 }
@@ -702,6 +720,15 @@ static int pndisc_is_router(const void *pkey,
 	return ret;
 }
 
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+		  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+		  struct ndisc_options *ndopts)
+{
+	neigh_update(neigh, lladdr, new, flags);
+	/* report ndisc ops about neighbour update */
+	ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
+}
+
 static void ndisc_recv_ns(struct sk_buff *skb)
 {
 	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
@@ -738,7 +765,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 		return;
 	}
 
-	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
 		ND_PRINTK(2, warn, "NS: invalid ND options\n");
 		return;
 	}
@@ -856,9 +883,10 @@ have_ifp:
 	neigh = __neigh_lookup(&nd_tbl, saddr, dev,
 			       !inc || lladdr || !dev->addr_len);
 	if (neigh)
-		neigh_update(neigh, lladdr, NUD_STALE,
+		ndisc_update(dev, neigh, lladdr, NUD_STALE,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
-			     NEIGH_UPDATE_F_OVERRIDE);
+			     NEIGH_UPDATE_F_OVERRIDE,
+			     NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
 	if (neigh || !dev->header_ops) {
 		ndisc_send_na(dev, saddr, &msg->target, !!is_router,
 			      true, (ifp != NULL && inc), inc);
@@ -911,7 +939,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 	    idev->cnf.drop_unsolicited_na)
 		return;
 
-	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
 		ND_PRINTK(2, warn, "NS: invalid ND option\n");
 		return;
 	}
@@ -967,12 +995,13 @@ static void ndisc_recv_na(struct sk_buff *skb)
 			goto out;
 		}
 
-		neigh_update(neigh, lladdr,
+		ndisc_update(dev, neigh, lladdr,
 			     msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
 			     (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
-			     (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
+			     (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
+			     NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
 
 		if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
 			/*
@@ -1017,7 +1046,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
 		goto out;
 
 	/* Parse ND options */
-	if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
+	if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) {
 		ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
 		goto out;
 	}
@@ -1031,10 +1060,11 @@ static void ndisc_recv_rs(struct sk_buff *skb)
 
 	neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
 	if (neigh) {
-		neigh_update(neigh, lladdr, NUD_STALE,
+		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
 			     NEIGH_UPDATE_F_OVERRIDE|
-			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
+			     NDISC_ROUTER_SOLICITATION, &ndopts);
 		neigh_release(neigh);
 	}
 out:
@@ -1135,7 +1165,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		return;
 	}
 
-	if (!ndisc_parse_options(opt, optlen, &ndopts)) {
+	if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) {
 		ND_PRINTK(2, warn, "RA: invalid ND options\n");
 		return;
 	}
@@ -1329,11 +1359,12 @@ skip_linkparms:
 				goto out;
 			}
 		}
-		neigh_update(neigh, lladdr, NUD_STALE,
+		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
 			     NEIGH_UPDATE_F_OVERRIDE|
 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
-			     NEIGH_UPDATE_F_ISROUTER);
+			     NEIGH_UPDATE_F_ISROUTER,
+			     NDISC_ROUTER_ADVERTISEMENT, &ndopts);
 	}
 
 	if (!ipv6_accept_ra(in6_dev)) {
@@ -1421,7 +1452,8 @@ skip_routeinfo:
 		struct nd_opt_hdr *p;
 		for (p = ndopts.nd_useropts;
 		     p;
-		     p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+		     p = ndisc_next_useropt(skb->dev, p,
+					    ndopts.nd_useropts_end)) {
 			ndisc_ra_useropt(skb, p);
 		}
 	}
@@ -1459,7 +1491,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
+	if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
 		return;
 
 	if (!ndopts.nd_opts_rh) {
@@ -1504,7 +1536,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	struct dst_entry *dst;
 	struct flowi6 fl6;
 	int rd_len;
-	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
+	   ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
 	int oif = l3mdev_fib_oif(dev);
 	bool ret;
 
@@ -1563,7 +1596,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 			memcpy(ha_buf, neigh->ha, dev->addr_len);
 			read_unlock_bh(&neigh->lock);
 			ha = ha_buf;
-			optlen += ndisc_opt_addr_space(dev);
+			optlen += ndisc_redirect_opt_addr_space(dev, neigh,
+								ops_data_buf,
+								&ops_data);
 		} else
 			read_unlock_bh(&neigh->lock);
 
@@ -1594,7 +1629,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	 */
 
 	if (ha)
-		ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
+		ndisc_fill_redirect_addr_option(buff, ha, ops_data);
 
 	/*
 	 *	build redirect option and copy skb over to the new packet.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 63e06c3dd319..552fac2f390a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -73,22 +73,22 @@ ip6_packet_match(const struct sk_buff *skb,
 	unsigned long ret;
 	const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
 
-#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
-
-	if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
-				       &ip6info->src), IP6T_INV_SRCIP) ||
-	    FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
-				       &ip6info->dst), IP6T_INV_DSTIP))
+	if (NF_INVF(ip6info, IP6T_INV_SRCIP,
+		    ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
+					 &ip6info->src)) ||
+	    NF_INVF(ip6info, IP6T_INV_DSTIP,
+		    ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+					 &ip6info->dst)))
 		return false;
 
 	ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
 
-	if (FWINV(ret != 0, IP6T_INV_VIA_IN))
+	if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0))
 		return false;
 
 	ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
 
-	if (FWINV(ret != 0, IP6T_INV_VIA_OUT))
+	if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0))
 		return false;
 
 /* ... might want to do something with class and flowlabel here ... */
@@ -402,23 +402,12 @@ ip6t_do_table(struct sk_buff *skb,
 	else return verdict;
 }
 
-static bool find_jump_target(const struct xt_table_info *t,
-			     const struct ip6t_entry *target)
-{
-	struct ip6t_entry *iter;
-
-	xt_entry_foreach(iter, t->entries, t->size) {
-		 if (iter == target)
-			return true;
-	}
-	return false;
-}
-
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
 mark_source_chains(const struct xt_table_info *newinfo,
-		   unsigned int valid_hooks, void *entry0)
+		   unsigned int valid_hooks, void *entry0,
+		   unsigned int *offsets)
 {
 	unsigned int hook;
 
@@ -487,10 +476,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					   XT_STANDARD_TARGET) == 0 &&
 				    newpos >= 0) {
 					/* This a jump; chase it. */
+					if (!xt_find_jump_offset(offsets, newpos,
+								 newinfo->number))
+						return 0;
 					e = (struct ip6t_entry *)
 						(entry0 + newpos);
-					if (!find_jump_target(newinfo, e))
-						return 0;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -724,6 +714,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		const struct ip6t_replace *repl)
 {
 	struct ip6t_entry *iter;
+	unsigned int *offsets;
 	unsigned int i;
 	int ret = 0;
 
@@ -736,6 +727,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		newinfo->underflow[i] = 0xFFFFFFFF;
 	}
 
+	offsets = xt_alloc_entry_offsets(newinfo->number);
+	if (!offsets)
+		return -ENOMEM;
 	i = 0;
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -745,15 +739,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 						 repl->underflow,
 						 repl->valid_hooks);
 		if (ret != 0)
-			return ret;
+			goto out_free;
+		if (i < repl->num_entries)
+			offsets[i] = (void *)iter - entry0;
 		++i;
 		if (strcmp(ip6t_get_target(iter)->u.user.name,
 		    XT_ERROR_TARGET) == 0)
 			++newinfo->stacksize;
 	}
 
+	ret = -EINVAL;
 	if (i != repl->num_entries)
-		return -EINVAL;
+		goto out_free;
 
 	/* Check hooks all assigned */
 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
@@ -761,13 +758,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (!(repl->valid_hooks & (1 << i)))
 			continue;
 		if (newinfo->hook_entry[i] == 0xFFFFFFFF)
-			return -EINVAL;
+			goto out_free;
 		if (newinfo->underflow[i] == 0xFFFFFFFF)
-			return -EINVAL;
+			goto out_free;
 	}
 
-	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
-		return -ELOOP;
+	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+		ret = -ELOOP;
+		goto out_free;
+	}
+	kvfree(offsets);
 
 	/* Finally, each sanity check must pass */
 	i = 0;
@@ -787,6 +787,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		return ret;
 	}
 
+	return ret;
+ out_free:
+	kvfree(offsets);
 	return ret;
 }
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index cb2b28883252..2b1a9dcdbcb3 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -83,10 +83,6 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
 		return ip6t_mangle_out(skb, state);
-	if (state->hook == NF_INET_POST_ROUTING)
-		return ip6t_do_table(skb, state,
-				     state->net->ipv6.ip6table_mangle);
-	/* INPUT/FORWARD */
 	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
 }
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 3ee3e444a66b..fed40d1ec29b 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -116,6 +116,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
+	ipc6.tclass = np->tclass;
+	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
 	dst = ip6_sk_dst_lookup_flow(sk, &fl6,  daddr);
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
@@ -140,7 +143,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	pfh.family = AF_INET6;
 
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-	ipc6.tclass = np->tclass;
 	ipc6.dontfrag = np->dontfrag;
 	ipc6.opt = NULL;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 896350df6423..590dd1f7746f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -878,6 +878,11 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (inet->hdrincl)
 		fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
 
+	if (ipc6.tclass < 0)
+		ipc6.tclass = np->tclass;
+
+	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
 		err = PTR_ERR(dst);
@@ -886,9 +891,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (ipc6.hlimit < 0)
 		ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
-	if (ipc6.tclass < 0)
-		ipc6.tclass = np->tclass;
-
 	if (ipc6.dontfrag < 0)
 		ipc6.dontfrag = np->dontfrag;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 969913da494f..49817555449e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1042,8 +1042,8 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
 	return pcpu_rt;
 }
 
-static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
-				      struct flowi6 *fl6, int flags)
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+			       int oif, struct flowi6 *fl6, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
 	struct rt6_info *rt;
@@ -1139,6 +1139,7 @@ redo_rt6_select:
 
 	}
 }
+EXPORT_SYMBOL_GPL(ip6_pol_route);
 
 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 					    struct flowi6 *fl6, int flags)
@@ -1782,7 +1783,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 	};
 	struct fib6_table *table;
 	struct rt6_info *rt;
-	int flags = 0;
+	int flags = RT6_LOOKUP_F_IFACE;
 
 	table = fib6_get_table(net, cfg->fc_table);
 	if (!table)
@@ -2200,7 +2201,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	 *	first-hop router for the specified ICMP Destination Address.
 	 */
 
-	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
+	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
 		return;
 	}
@@ -2235,12 +2236,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	 *	We have finally decided to accept it.
 	 */
 
-	neigh_update(neigh, lladdr, NUD_STALE,
+	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
 		     NEIGH_UPDATE_F_OVERRIDE|
 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
-				     NEIGH_UPDATE_F_ISROUTER))
-		     );
+				     NEIGH_UPDATE_F_ISROUTER)),
+		     NDISC_REDIRECT, &ndopts);
 
 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
 	if (!nrt)
@@ -2585,23 +2586,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	return rt;
 }
 
-int ip6_route_get_saddr(struct net *net,
-			struct rt6_info *rt,
-			const struct in6_addr *daddr,
-			unsigned int prefs,
-			struct in6_addr *saddr)
-{
-	struct inet6_dev *idev =
-		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
-	int err = 0;
-	if (rt && rt->rt6i_prefsrc.plen)
-		*saddr = rt->rt6i_prefsrc.addr;
-	else
-		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
-					 daddr, prefs, saddr);
-	return err;
-}
-
 /* remove deleted ip from prefsrc entries */
 struct arg_dev_net_ip {
 	struct net_device *dev;
@@ -3306,6 +3290,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 
 	err = -EINVAL;
 	memset(&fl6, 0, sizeof(fl6));
+	rtm = nlmsg_data(nlh);
+	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
 
 	if (tb[RTA_SRC]) {
 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0a5a255277e5..182b6a9be29d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -479,47 +479,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
 	dev_put(dev);
 }
 
-/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
- * if sufficient data bytes are available
- */
-static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb)
-{
-	int ihl = ((const struct iphdr *)skb->data)->ihl*4;
-	struct rt6_info *rt;
-	struct sk_buff *skb2;
-
-	if (!pskb_may_pull(skb, ihl + sizeof(struct ipv6hdr) + 8))
-		return 1;
-
-	skb2 = skb_clone(skb, GFP_ATOMIC);
-
-	if (!skb2)
-		return 1;
-
-	skb_dst_drop(skb2);
-	skb_pull(skb2, ihl);
-	skb_reset_network_header(skb2);
-
-	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
-
-	if (rt && rt->dst.dev)
-		skb2->dev = rt->dst.dev;
-
-	icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
-
-	if (rt)
-		ip6_rt_put(rt);
-
-	kfree_skb(skb2);
-
-	return 0;
-}
-
 static int ipip6_err(struct sk_buff *skb, u32 info)
 {
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
+	unsigned int data_len = 0;
 	struct ip_tunnel *t;
 	int err;
 
@@ -544,6 +509,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 	case ICMP_TIME_EXCEEDED:
 		if (code != ICMP_EXC_TTL)
 			return 0;
+		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
 		break;
 	case ICMP_REDIRECT:
 		break;
@@ -560,22 +526,22 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, IPPROTO_IPV6, 0);
+				 t->parms.link, 0, iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
 	if (type == ICMP_REDIRECT) {
 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      IPPROTO_IPV6, 0);
+			      iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
 
-	if (t->parms.iph.daddr == 0)
+	err = 0;
+	if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
 		goto out;
 
-	err = 0;
-	if (!ipip6_err_gen_icmpv6_unreach(skb))
+	if (t->parms.iph.daddr == 0)
 		goto out;
 
 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
@@ -722,12 +688,19 @@ out:
 	return 0;
 }
 
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
 	/* no tunnel info required for ipip. */
 	.proto = htons(ETH_P_IP),
 };
 
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+	/* no tunnel info required for mplsip. */
+	.proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 {
 	const struct iphdr *iph;
 	struct ip_tunnel *tunnel;
@@ -736,15 +709,23 @@ static int ipip_rcv(struct sk_buff *skb)
 	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 				     iph->saddr, iph->daddr);
 	if (tunnel) {
-		if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+		const struct tnl_ptk_info *tpi;
+
+		if (tunnel->parms.iph.protocol != ipproto &&
 		    tunnel->parms.iph.protocol != 0)
 			goto drop;
 
 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto drop;
-		if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+		if (ipproto == IPPROTO_MPLS)
+			tpi = &mplsip_tpi;
+		else
+#endif
+			tpi = &ipip_tpi;
+		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+		return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
 	}
 
 	return 1;
@@ -754,6 +735,18 @@ drop:
 	return 0;
 }
 
+static int ipip_rcv(struct sk_buff *skb)
+{
+	return sit_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+	return sit_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
 /*
  * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
  * stores the embedded IPv4 address in v4dst and returns true.
@@ -825,9 +818,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	u8 protocol = IPPROTO_IPV6;
 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
-	if (skb->protocol != htons(ETH_P_IPV6))
-		goto tx_error;
-
 	if (tos == 1)
 		tos = ipv6_get_dsfield(iph6);
 
@@ -995,7 +985,8 @@ tx_error:
 	return NETDEV_TX_OK;
 }
 
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
+				     struct net_device *dev, u8 ipproto)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *tiph = &tunnel->parms.iph;
@@ -1003,9 +994,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
 		goto tx_error;
 
-	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+	skb_set_inner_ipproto(skb, ipproto);
 
-	ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+	ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;
 tx_error:
 	kfree_skb(skb);
@@ -1018,11 +1009,16 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		ipip_tunnel_xmit(skb, dev);
+		sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
 		break;
 	case htons(ETH_P_IPV6):
 		ipip6_tunnel_xmit(skb, dev);
 		break;
+#if IS_ENABLED(CONFIG_MPLS)
+	case htons(ETH_P_MPLS_UC):
+		sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
+		break;
+#endif
 	default:
 		goto tx_err;
 	}
@@ -1130,6 +1126,16 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
 }
 #endif
 
+bool ipip6_valid_ip_proto(u8 ipproto)
+{
+	return ipproto == IPPROTO_IPV6 ||
+		ipproto == IPPROTO_IPIP ||
+#if IS_ENABLED(CONFIG_MPLS)
+		ipproto == IPPROTO_MPLS ||
+#endif
+		ipproto == 0;
+}
+
 static int
 ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -1189,9 +1195,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			goto done;
 
 		err = -EINVAL;
-		if (p.iph.protocol != IPPROTO_IPV6 &&
-		    p.iph.protocol != IPPROTO_IPIP &&
-		    p.iph.protocol != 0)
+		if (!ipip6_valid_ip_proto(p.iph.protocol))
 			goto done;
 		if (p.iph.version != 4 ||
 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
@@ -1416,9 +1420,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
 		return 0;
 
 	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
-	if (proto != IPPROTO_IPV6 &&
-	    proto != IPPROTO_IPIP &&
-	    proto != 0)
+	if (!ipip6_valid_ip_proto(proto))
 		return -EINVAL;
 
 	return 0;
@@ -1760,6 +1762,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
 	.priority	=	2,
 };
 
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+	.handler	=	mplsip_rcv,
+	.err_handler	=	ipip6_err,
+	.priority	=	2,
+};
+#endif
+
 static void __net_exit sit_destroy_tunnels(struct net *net,
 					   struct list_head *head)
 {
@@ -1855,6 +1865,9 @@ static void __exit sit_cleanup(void)
 	rtnl_link_unregister(&sit_link_ops);
 	xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+#if IS_ENABLED(CONFIG_MPLS)
+	xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+#endif
 
 	unregister_pernet_device(&sit_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1864,7 +1877,7 @@ static int __init sit_init(void)
 {
 	int err;
 
-	pr_info("IPv6 over IPv4 tunneling driver\n");
+	pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
 
 	err = register_pernet_device(&sit_net_ops);
 	if (err < 0)
@@ -1879,6 +1892,13 @@ static int __init sit_init(void)
 		pr_info("%s: can't register ip4ip4\n", __func__);
 		goto xfrm_tunnel4_failed;
 	}
+#if IS_ENABLED(CONFIG_MPLS)
+	err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+	if (err < 0) {
+		pr_info("%s: can't register mplsip\n", __func__);
+		goto xfrm_tunnel_mpls_failed;
+	}
+#endif
 	err = rtnl_link_register(&sit_link_ops);
 	if (err < 0)
 		goto rtnl_link_failed;
@@ -1887,6 +1907,10 @@ out:
 	return err;
 
 rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+	xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+xfrm_tunnel_mpls_failed:
+#endif
 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 xfrm_tunnel4_failed:
 	xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f36c2d076fce..37cf91323319 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -526,26 +526,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
 			      AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 }
 
-static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
-					const struct in6_addr *daddr,
-					const struct in6_addr *saddr, int nbytes)
+static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
+				   const struct in6_addr *daddr,
+				   const struct in6_addr *saddr,
+				   const struct tcphdr *th, int nbytes)
 {
 	struct tcp6_pseudohdr *bp;
 	struct scatterlist sg;
+	struct tcphdr *_th;
 
-	bp = &hp->md5_blk.ip6;
+	bp = hp->scratch;
 	/* 1. TCP pseudo-header (RFC2460) */
 	bp->saddr = *saddr;
 	bp->daddr = *daddr;
 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
 	bp->len = cpu_to_be32(nbytes);
 
-	sg_init_one(&sg, bp, sizeof(*bp));
-	ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+	_th = (struct tcphdr *)(bp + 1);
+	memcpy(_th, th, sizeof(*th));
+	_th->check = 0;
+
+	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+				sizeof(*bp) + sizeof(*th));
 	return crypto_ahash_update(hp->md5_req);
 }
 
-static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       const struct in6_addr *daddr, struct in6_addr *saddr,
 			       const struct tcphdr *th)
 {
@@ -559,9 +566,7 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
 
 	if (crypto_ahash_init(req))
 		goto clear_hash;
-	if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
-		goto clear_hash;
-	if (tcp_md5_hash_header(hp, th))
+	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 		goto clear_hash;
 	if (tcp_md5_hash_key(hp, key))
 		goto clear_hash;
@@ -606,9 +611,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
 	if (crypto_ahash_init(req))
 		goto clear_hash;
 
-	if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
-		goto clear_hash;
-	if (tcp_md5_hash_header(hp, th))
+	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 		goto clear_hash;
 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 		goto clear_hash;
@@ -738,7 +741,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 				 int oif, struct tcp_md5sig_key *key, int rst,
-				 u8 tclass, u32 label)
+				 u8 tclass, __be32 label)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@@ -911,7 +914,7 @@ out:
 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key, u8 tclass,
-			    u32 label)
+			    __be32 label)
 {
 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
 			     tclass, label);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f421c9f23c5b..81e2f98b958d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk)
 	udp_lib_rehash(sk, new_hash);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-				unsigned short hnum,
-				const struct in6_addr *saddr, __be16 sport,
-				const struct in6_addr *daddr, __be16 dport,
-				int dif)
+static int compute_score(struct sock *sk, struct net *net,
+			 const struct in6_addr *saddr, __be16 sport,
+			 const struct in6_addr *daddr, unsigned short hnum,
+			 int dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	return score;
 }
 
-static inline int compute_score2(struct sock *sk, struct net *net,
-				 const struct in6_addr *saddr, __be16 sport,
-				 const struct in6_addr *daddr,
-				 unsigned short hnum, int dif)
-{
-	int score;
-	struct inet_sock *inet;
-
-	if (!net_eq(sock_net(sk), net) ||
-	    udp_sk(sk)->udp_port_hash != hnum ||
-	    sk->sk_family != PF_INET6)
-		return -1;
-
-	if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
-		return -1;
-
-	score = 0;
-	inet = inet_sk(sk);
-
-	if (inet->inet_dport) {
-		if (inet->inet_dport != sport)
-			return -1;
-		score++;
-	}
-
-	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
-		if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
-			return -1;
-		score++;
-	}
-
-	if (sk->sk_bound_dev_if) {
-		if (sk->sk_bound_dev_if != dif)
-			return -1;
-		score++;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
-		score++;
-
-	return score;
-}
-
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
 		const struct in6_addr *daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2,
+		struct udp_hslot *hslot2,
 		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
@@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 	result = NULL;
 	badness = -1;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		score = compute_score2(sk, net, saddr, sport,
+		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
@@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net,
 
 		result = udp6_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2, skb);
+					  hslot2, skb);
 		if (!result) {
+			unsigned int old_slot2 = slot2;
 			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
 			slot2 = hash2 & udptable->mask;
+			/* avoid searching the same slot again. */
+			if (unlikely(slot2 == old_slot2))
+				return result;
+
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp6_lib_lookup2(net, saddr, sport,
-						  &in6addr_any, hnum, dif,
-						  hslot2, slot2, skb);
+						  daddr, hnum, dif,
+						  hslot2, skb);
 		}
 		return result;
 	}
@@ -286,7 +247,7 @@ begin:
 	result = NULL;
 	badness = -1;
 	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
+		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -657,7 +618,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	    udp_lib_checksum_complete(skb))
 		goto csum_error;
 
-	if (sk_filter(sk, skb))
+	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
 		goto drop;
 
 	udp_csum_pull_header(skb);
@@ -1246,6 +1207,11 @@ do_udp_sendmsg:
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
+	if (ipc6.tclass < 0)
+		ipc6.tclass = np->tclass;
+
+	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
 	dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
 		err = PTR_ERR(dst);
@@ -1256,9 +1222,6 @@ do_udp_sendmsg:
 	if (ipc6.hlimit < 0)
 		ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
-	if (ipc6.tclass < 0)
-		ipc6.tclass = np->tclass;
-
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 back_from_confirm:
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index c074771a10f7..6cc97003e4a9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -366,12 +366,12 @@ static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
 		kfree(table);
 }
 #else /* CONFIG_SYSCTL */
-static int inline xfrm6_net_sysctl_init(struct net *net)
+static inline int xfrm6_net_sysctl_init(struct net *net)
 {
 	return 0;
 }
 
-static void inline xfrm6_net_sysctl_exit(struct net *net)
+static inline void xfrm6_net_sysctl_exit(struct net *net)
 {
 }
 #endif
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 923abd6b3064..8d2f7c9b491d 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
 	}
 
 	/* Check if we have opened a local TSAP */
-	if (!self->tsap)
-		irda_open_tsap(self, LSAP_ANY, addr->sir_name);
+	if (!self->tsap) {
+		err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
+		if (err)
+			goto out;
+	}
 
 	/* Move to connecting socket, start sending Connect Requests */
 	sock->state = SS_CONNECTING;
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index d4fdf8f7b471..8f5678cb6263 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -246,9 +246,6 @@ static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self,
 {
 	struct serial_struct info;
 
-	if (!retinfo)
-		return -EFAULT;
-
 	memset(&info, 0, sizeof(info));
 	info.line = self->line;
 	info.flags = self->port.flags;
@@ -258,11 +255,6 @@ static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self,
 
 	/* For compatibility  */
 	info.type = PORT_16550A;
-	info.port = 0;
-	info.irq = 0;
-	info.xmit_fifo_size = 0;
-	info.hub6 = 0;
-	info.custom_divisor = 0;
 
 	if (copy_to_user(retinfo, &info, sizeof(*retinfo)))
 		return -EFAULT;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fc3598a922b0..37d674e6f8a9 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1033,6 +1033,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 {
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
+	size_t headroom, linear;
 	struct sk_buff *skb;
 	struct iucv_message txmsg = {0};
 	struct cmsghdr *cmsg;
@@ -1110,20 +1111,31 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 	 * this is fine for SOCK_SEQPACKET (unless we want to support
 	 * segmented records using the MSG_EOR flag), but
 	 * for SOCK_STREAM we might want to improve it in future */
-	if (iucv->transport == AF_IUCV_TRANS_HIPER)
-		skb = sock_alloc_send_skb(sk,
-			len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN,
-			noblock, &err);
-	else
-		skb = sock_alloc_send_skb(sk, len, noblock, &err);
+	headroom = (iucv->transport == AF_IUCV_TRANS_HIPER)
+		   ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0;
+	if (headroom + len < PAGE_SIZE) {
+		linear = len;
+	} else {
+		/* In nonlinear "classic" iucv skb,
+		 * reserve space for iucv_array
+		 */
+		if (iucv->transport != AF_IUCV_TRANS_HIPER)
+			headroom += sizeof(struct iucv_array) *
+				    (MAX_SKB_FRAGS + 1);
+		linear = PAGE_SIZE - headroom;
+	}
+	skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear,
+				   noblock, &err, 0);
 	if (!skb)
 		goto out;
-	if (iucv->transport == AF_IUCV_TRANS_HIPER)
-		skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
-	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
-		err = -EFAULT;
+	if (headroom)
+		skb_reserve(skb, headroom);
+	skb_put(skb, linear);
+	skb->len = len;
+	skb->data_len = len - linear;
+	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
+	if (err)
 		goto fail;
-	}
 
 	/* wait if outstanding messages for iucv path has reached */
 	timeo = sock_sndtimeo(sk, noblock);
@@ -1148,49 +1160,67 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 			atomic_dec(&iucv->msg_sent);
 			goto fail;
 		}
-		goto release;
-	}
-	skb_queue_tail(&iucv->send_skb_q, skb);
-
-	if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
-	      && skb->len <= 7) {
-		err = iucv_send_iprm(iucv->path, &txmsg, skb);
+	} else { /* Classic VM IUCV transport */
+		skb_queue_tail(&iucv->send_skb_q, skb);
+
+		if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) &&
+		    skb->len <= 7) {
+			err = iucv_send_iprm(iucv->path, &txmsg, skb);
+
+			/* on success: there is no message_complete callback */
+			/* for an IPRMDATA msg; remove skb from send queue   */
+			if (err == 0) {
+				skb_unlink(skb, &iucv->send_skb_q);
+				kfree_skb(skb);
+			}
 
-		/* on success: there is no message_complete callback
-		 * for an IPRMDATA msg; remove skb from send queue */
-		if (err == 0) {
-			skb_unlink(skb, &iucv->send_skb_q);
-			kfree_skb(skb);
+			/* this error should never happen since the	*/
+			/* IUCV_IPRMDATA path flag is set... sever path */
+			if (err == 0x15) {
+				pr_iucv->path_sever(iucv->path, NULL);
+				skb_unlink(skb, &iucv->send_skb_q);
+				err = -EPIPE;
+				goto fail;
+			}
+		} else if (skb_is_nonlinear(skb)) {
+			struct iucv_array *iba = (struct iucv_array *)skb->head;
+			int i;
+
+			/* skip iucv_array lying in the headroom */
+			iba[0].address = (u32)(addr_t)skb->data;
+			iba[0].length = (u32)skb_headlen(skb);
+			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+				skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+				iba[i + 1].address =
+					(u32)(addr_t)skb_frag_address(frag);
+				iba[i + 1].length = (u32)skb_frag_size(frag);
+			}
+			err = pr_iucv->message_send(iucv->path, &txmsg,
+						    IUCV_IPBUFLST, 0,
+						    (void *)iba, skb->len);
+		} else { /* non-IPRM Linear skb */
+			err = pr_iucv->message_send(iucv->path, &txmsg,
+					0, 0, (void *)skb->data, skb->len);
 		}
-
-		/* this error should never happen since the
-		 * IUCV_IPRMDATA path flag is set... sever path */
-		if (err == 0x15) {
-			pr_iucv->path_sever(iucv->path, NULL);
+		if (err) {
+			if (err == 3) {
+				user_id[8] = 0;
+				memcpy(user_id, iucv->dst_user_id, 8);
+				appl_id[8] = 0;
+				memcpy(appl_id, iucv->dst_name, 8);
+				pr_err(
+		"Application %s on z/VM guest %s exceeds message limit\n",
+					appl_id, user_id);
+				err = -EAGAIN;
+			} else {
+				err = -EPIPE;
+			}
 			skb_unlink(skb, &iucv->send_skb_q);
-			err = -EPIPE;
 			goto fail;
 		}
-	} else
-		err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0,
-					(void *) skb->data, skb->len);
-	if (err) {
-		if (err == 3) {
-			user_id[8] = 0;
-			memcpy(user_id, iucv->dst_user_id, 8);
-			appl_id[8] = 0;
-			memcpy(appl_id, iucv->dst_name, 8);
-			pr_err("Application %s on z/VM guest %s"
-				" exceeds message limit\n",
-				appl_id, user_id);
-			err = -EAGAIN;
-		} else
-			err = -EPIPE;
-		skb_unlink(skb, &iucv->send_skb_q);
-		goto fail;
 	}
 
-release:
 	release_sock(sk);
 	return len;
 
@@ -1201,42 +1231,32 @@ out:
 	return err;
 }
 
-/* iucv_fragment_skb() - Fragment a single IUCV message into multiple skb's
- *
- * Locking: must be called with message_q.lock held
- */
-static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len)
+static struct sk_buff *alloc_iucv_recv_skb(unsigned long len)
 {
-	int dataleft, size, copied = 0;
-	struct sk_buff *nskb;
-
-	dataleft = len;
-	while (dataleft) {
-		if (dataleft >= sk->sk_rcvbuf / 4)
-			size = sk->sk_rcvbuf / 4;
-		else
-			size = dataleft;
-
-		nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA);
-		if (!nskb)
-			return -ENOMEM;
-
-		/* copy target class to control buffer of new skb */
-		IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class;
-
-		/* copy data fragment */
-		memcpy(nskb->data, skb->data + copied, size);
-		copied += size;
-		dataleft -= size;
-
-		skb_reset_transport_header(nskb);
-		skb_reset_network_header(nskb);
-		nskb->len = size;
+	size_t headroom, linear;
+	struct sk_buff *skb;
+	int err;
 
-		skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, nskb);
+	if (len < PAGE_SIZE) {
+		headroom = 0;
+		linear = len;
+	} else {
+		headroom = sizeof(struct iucv_array) * (MAX_SKB_FRAGS + 1);
+		linear = PAGE_SIZE - headroom;
+	}
+	skb = alloc_skb_with_frags(headroom + linear, len - linear,
+				   0, &err, GFP_ATOMIC | GFP_DMA);
+	WARN_ONCE(!skb,
+		  "alloc of recv iucv skb len=%lu failed with errcode=%d\n",
+		  len, err);
+	if (skb) {
+		if (headroom)
+			skb_reserve(skb, headroom);
+		skb_put(skb, linear);
+		skb->len = len;
+		skb->data_len = len - linear;
 	}
-
-	return 0;
+	return skb;
 }
 
 /* iucv_process_message() - Receive a single outstanding IUCV message
@@ -1263,31 +1283,32 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
 			skb->len = 0;
 		}
 	} else {
-		rc = pr_iucv->message_receive(path, msg,
+		if (skb_is_nonlinear(skb)) {
+			struct iucv_array *iba = (struct iucv_array *)skb->head;
+			int i;
+
+			iba[0].address = (u32)(addr_t)skb->data;
+			iba[0].length = (u32)skb_headlen(skb);
+			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+				skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+				iba[i + 1].address =
+					(u32)(addr_t)skb_frag_address(frag);
+				iba[i + 1].length = (u32)skb_frag_size(frag);
+			}
+			rc = pr_iucv->message_receive(path, msg,
+					      IUCV_IPBUFLST,
+					      (void *)iba, len, NULL);
+		} else {
+			rc = pr_iucv->message_receive(path, msg,
 					      msg->flags & IUCV_IPRMDATA,
 					      skb->data, len, NULL);
+		}
 		if (rc) {
 			kfree_skb(skb);
 			return;
 		}
-		/* we need to fragment iucv messages for SOCK_STREAM only;
-		 * for SOCK_SEQPACKET, it is only relevant if we support
-		 * record segmentation using MSG_EOR (see also recvmsg()) */
-		if (sk->sk_type == SOCK_STREAM &&
-		    skb->truesize >= sk->sk_rcvbuf / 4) {
-			rc = iucv_fragment_skb(sk, skb, len);
-			kfree_skb(skb);
-			skb = NULL;
-			if (rc) {
-				pr_iucv->path_sever(path, NULL);
-				return;
-			}
-			skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
-		} else {
-			skb_reset_transport_header(skb);
-			skb_reset_network_header(skb);
-			skb->len = len;
-		}
+		WARN_ON_ONCE(skb->len != len);
 	}
 
 	IUCV_SKB_CB(skb)->offset = 0;
@@ -1306,7 +1327,7 @@ static void iucv_process_message_q(struct sock *sk)
 	struct sock_msg_q *p, *n;
 
 	list_for_each_entry_safe(p, n, &iucv->message_q.list, list) {
-		skb = alloc_skb(iucv_msg_length(&p->msg), GFP_ATOMIC | GFP_DMA);
+		skb = alloc_iucv_recv_skb(iucv_msg_length(&p->msg));
 		if (!skb)
 			break;
 		iucv_process_message(sk, skb, p->path, &p->msg);
@@ -1801,7 +1822,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 	if (len > sk->sk_rcvbuf)
 		goto save_message;
 
-	skb = alloc_skb(iucv_msg_length(msg), GFP_ATOMIC | GFP_DMA);
+	skb = alloc_iucv_recv_skb(iucv_msg_length(msg));
 	if (!skb)
 		goto save_message;
 
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 7eaa000c9258..88a2a3ba4212 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -320,21 +320,29 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
  *
  * Returns the result of the CP IUCV call.
  */
-static inline int iucv_call_b2f0(int command, union iucv_param *parm)
+static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
 {
 	register unsigned long reg0 asm ("0");
 	register unsigned long reg1 asm ("1");
 	int ccode;
 
 	reg0 = command;
-	reg1 = virt_to_phys(parm);
+	reg1 = (unsigned long)parm;
 	asm volatile(
 		"	.long 0xb2f01000\n"
 		"	ipm	%0\n"
 		"	srl	%0,28\n"
 		: "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
 		:  "m" (*parm) : "cc");
-	return (ccode == 1) ? parm->ctrl.iprcode : ccode;
+	return ccode;
+}
+
+static inline int iucv_call_b2f0(int command, union iucv_param *parm)
+{
+	int ccode;
+
+	ccode = __iucv_call_b2f0(command, parm);
+	return ccode == 1 ? parm->ctrl.iprcode : ccode;
 }
 
 /**
@@ -345,16 +353,12 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
  * Returns the maximum number of connections or -EPERM is IUCV is not
  * available.
  */
-static int iucv_query_maxconn(void)
+static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
 {
 	register unsigned long reg0 asm ("0");
 	register unsigned long reg1 asm ("1");
-	void *param;
 	int ccode;
 
-	param = kzalloc(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA);
-	if (!param)
-		return -ENOMEM;
 	reg0 = IUCV_QUERY;
 	reg1 = (unsigned long) param;
 	asm volatile (
@@ -362,8 +366,22 @@ static int iucv_query_maxconn(void)
 		"	ipm	%0\n"
 		"	srl	%0,28\n"
 		: "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+	*max_pathid = reg1;
+	return ccode;
+}
+
+static int iucv_query_maxconn(void)
+{
+	unsigned long max_pathid;
+	void *param;
+	int ccode;
+
+	param = kzalloc(sizeof(union iucv_param), GFP_KERNEL | GFP_DMA);
+	if (!param)
+		return -ENOMEM;
+	ccode = __iucv_query_maxconn(param, &max_pathid);
 	if (ccode == 0)
-		iucv_max_pathid = reg1;
+		iucv_max_pathid = max_pathid;
 	kfree(param);
 	return ccode ? -EPERM : 0;
 }
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 738008726cc6..16c2e03bd388 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -88,13 +88,9 @@ struct kcm_proc_mux_state {
 static int kcm_seq_open(struct inode *inode, struct file *file)
 {
 	struct kcm_seq_muxinfo *muxinfo = PDE_DATA(inode);
-	int err;
 
-	err = seq_open_net(inode, file, &muxinfo->seq_ops,
+	return seq_open_net(inode, file, &muxinfo->seq_ops,
 			   sizeof(struct kcm_proc_mux_state));
-	if (err < 0)
-		return err;
-	return err;
 }
 
 static void kcm_format_mux_header(struct seq_file *seq)
@@ -241,6 +237,7 @@ static const struct file_operations kcm_seq_fops = {
 	.open		= kcm_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
+	.release	= seq_release_net,
 };
 
 static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 0b68ba730a06..cb39e05b166c 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1765,18 +1765,12 @@ static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
 	if (!csock)
 		return -ENOENT;
 
-	prog = bpf_prog_get(info->bpf_fd);
+	prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER);
 	if (IS_ERR(prog)) {
 		err = PTR_ERR(prog);
 		goto out;
 	}
 
-	if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
-		bpf_prog_put(prog);
-		err = -EINVAL;
-		goto out;
-	}
-
 	err = kcm_attach(sock, csock, prog);
 	if (err) {
 		bpf_prog_put(prog);
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index e253c26f31ac..57fc5a46ce06 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -67,7 +67,6 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
 	return net_generic(net, l2tp_eth_net_id);
 }
 
-static struct lock_class_key l2tp_eth_tx_busylock;
 static int l2tp_eth_dev_init(struct net_device *dev)
 {
 	struct l2tp_eth *priv = netdev_priv(dev);
@@ -75,7 +74,8 @@ static int l2tp_eth_dev_init(struct net_device *dev)
 	priv->dev = dev;
 	eth_hw_addr_random(dev);
 	eth_broadcast_addr(dev->broadcast);
-	dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock;
+	netdev_lockdep_set_classes(dev);
+
 	return 0;
 }
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 6c54e03fe9c1..ea2ae6664cc8 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -611,6 +611,11 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
+	if (ipc6.tclass < 0)
+		ipc6.tclass = np->tclass;
+
+	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
 		err = PTR_ERR(dst);
@@ -620,9 +625,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (ipc6.hlimit < 0)
 		ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
-	if (ipc6.tclass < 0)
-		ipc6.tclass = np->tclass;
-
 	if (ipc6.dontfrag < 0)
 		ipc6.dontfrag = np->dontfrag;
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 652c250b9a3b..d9560aa2dba3 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -866,10 +866,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	pls = l2tp_session_priv(session);
 	tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
-	if (tunnel == NULL) {
-		error = -EBADF;
+	if (tunnel == NULL)
 		goto end_put_sess;
-	}
 
 	inet = inet_sk(tunnel->sock);
 	if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
@@ -947,12 +945,11 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 	}
 
 	*usockaddr_len = len;
+	error = 0;
 
 	sock_put(pls->tunnel_sock);
 end_put_sess:
 	sock_put(sk);
-	error = 0;
-
 end:
 	return error;
 }
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 6651a78e100c..c4a1c3e84e12 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/netdevice.h>
+#include <net/fib_rules.h>
 #include <net/l3mdev.h>
 
 /**
@@ -107,7 +108,7 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
  */
 
 struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
-				     const struct flowi6 *fl6)
+				     struct flowi6 *fl6)
 {
 	struct dst_entry *dst = NULL;
 	struct net_device *dev;
@@ -160,3 +161,64 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
 	return rc;
 }
 EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
+
+int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+		      struct flowi6 *fl6)
+{
+	struct net_device *dev;
+	int rc = 0;
+
+	if (fl6->flowi6_oif) {
+		rcu_read_lock();
+
+		dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
+		if (dev && netif_is_l3_slave(dev))
+			dev = netdev_master_upper_dev_get_rcu(dev);
+
+		if (dev && netif_is_l3_master(dev) &&
+		    dev->l3mdev_ops->l3mdev_get_saddr6)
+			rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6);
+
+		rcu_read_unlock();
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(l3mdev_get_saddr6);
+
+/**
+ *	l3mdev_fib_rule_match - Determine if flowi references an
+ *				L3 master device
+ *	@net: network namespace for device index lookup
+ *	@fl:  flow struct
+ */
+
+int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+			  struct fib_lookup_arg *arg)
+{
+	struct net_device *dev;
+	int rc = 0;
+
+	rcu_read_lock();
+
+	dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+	if (dev && netif_is_l3_master(dev) &&
+	    dev->l3mdev_ops->l3mdev_fib_table) {
+		arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+		rc = 1;
+		goto out;
+	}
+
+	dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+	if (dev && netif_is_l3_master(dev) &&
+	    dev->l3mdev_ops->l3mdev_fib_table) {
+		arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+		rc = 1;
+		goto out;
+	}
+
+out:
+	rcu_read_unlock();
+
+	return rc;
+}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3a8f881b22f1..a9aff6079c42 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -306,6 +306,24 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 	mutex_lock(&sta->ampdu_mlme.mtx);
 
 	if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
+		tid_agg_rx = rcu_dereference_protected(
+				sta->ampdu_mlme.tid_rx[tid],
+				lockdep_is_held(&sta->ampdu_mlme.mtx));
+
+		if (tid_agg_rx->dialog_token == dialog_token) {
+			ht_dbg_ratelimited(sta->sdata,
+					   "updated AddBA Req from %pM on tid %u\n",
+					   sta->sta.addr, tid);
+			/* We have no API to update the timeout value in the
+			 * driver so reject the timeout update.
+			 */
+			status = WLAN_STATUS_REQUEST_DECLINED;
+			ieee80211_send_addba_resp(sta->sdata, sta->sta.addr,
+						  tid, dialog_token, status,
+						  1, buf_size, timeout);
+			goto end;
+		}
+
 		ht_dbg_ratelimited(sta->sdata,
 				   "unexpected AddBA Req from %pM on tid %u\n",
 				   sta->sta.addr, tid);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 42fa81031dfa..5650c46bf91a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -194,17 +194,21 @@ static void
 ieee80211_agg_stop_txq(struct sta_info *sta, int tid)
 {
 	struct ieee80211_txq *txq = sta->sta.txq[tid];
+	struct ieee80211_sub_if_data *sdata;
+	struct fq *fq;
 	struct txq_info *txqi;
 
 	if (!txq)
 		return;
 
 	txqi = to_txq_info(txq);
+	sdata = vif_to_sdata(txq->vif);
+	fq = &sdata->local->fq;
 
 	/* Lock here to protect against further seqno updates on dequeue */
-	spin_lock_bh(&txqi->queue.lock);
+	spin_lock_bh(&fq->lock);
 	set_bit(IEEE80211_TXQ_STOP, &txqi->flags);
-	spin_unlock_bh(&txqi->queue.lock);
+	spin_unlock_bh(&fq->lock);
 }
 
 static void
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0c12e4001f19..47e99ab8d97a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -997,6 +997,7 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
 			if (sta->mesh->plink_state != NL80211_PLINK_ESTAB)
 				changed = mesh_plink_inc_estab_count(sdata);
 			sta->mesh->plink_state = params->plink_state;
+			sta->mesh->aid = params->peer_aid;
 
 			ieee80211_mps_sta_status_update(sta);
 			changed |= ieee80211_mps_set_sta_local_pm(sta,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b251b2f7f8dd..2906c1004e1a 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -10,6 +10,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/rtnetlink.h>
+#include <linux/vmalloc.h>
 #include "ieee80211_i.h"
 #include "driver-ops.h"
 #include "rate.h"
@@ -70,6 +71,177 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x",
 DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s",
 	local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver");
 
+struct aqm_info {
+	struct ieee80211_local *local;
+	size_t size;
+	size_t len;
+	unsigned char buf[0];
+};
+
+#define AQM_HDR_LEN 200
+#define AQM_HW_ENTRY_LEN 40
+#define AQM_TXQ_ENTRY_LEN 110
+
+static int aqm_open(struct inode *inode, struct file *file)
+{
+	struct ieee80211_local *local = inode->i_private;
+	struct ieee80211_sub_if_data *sdata;
+	struct sta_info *sta;
+	struct txq_info *txqi;
+	struct fq *fq = &local->fq;
+	struct aqm_info *info = NULL;
+	int len = 0;
+	int i;
+
+	if (!local->ops->wake_tx_queue)
+		return -EOPNOTSUPP;
+
+	len += AQM_HDR_LEN;
+	len += 6 * AQM_HW_ENTRY_LEN;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list)
+		len += AQM_TXQ_ENTRY_LEN;
+	list_for_each_entry_rcu(sta, &local->sta_list, list)
+		len += AQM_TXQ_ENTRY_LEN * ARRAY_SIZE(sta->sta.txq);
+	rcu_read_unlock();
+
+	info = vmalloc(len);
+	if (!info)
+		return -ENOMEM;
+
+	spin_lock_bh(&local->fq.lock);
+	rcu_read_lock();
+
+	file->private_data = info;
+	info->local = local;
+	info->size = len;
+	len = 0;
+
+	len += scnprintf(info->buf + len, info->size - len,
+			 "* hw\n"
+			 "access name value\n"
+			 "R fq_flows_cnt %u\n"
+			 "R fq_backlog %u\n"
+			 "R fq_overlimit %u\n"
+			 "R fq_collisions %u\n"
+			 "RW fq_limit %u\n"
+			 "RW fq_quantum %u\n",
+			 fq->flows_cnt,
+			 fq->backlog,
+			 fq->overlimit,
+			 fq->collisions,
+			 fq->limit,
+			 fq->quantum);
+
+	len += scnprintf(info->buf + len,
+			 info->size - len,
+			 "* vif\n"
+			 "ifname addr ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n");
+
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		txqi = to_txq_info(sdata->vif.txq);
+		len += scnprintf(info->buf + len, info->size - len,
+				 "%s %pM %u %u %u %u %u %u %u %u\n",
+				 sdata->name,
+				 sdata->vif.addr,
+				 txqi->txq.ac,
+				 txqi->tin.backlog_bytes,
+				 txqi->tin.backlog_packets,
+				 txqi->tin.flows,
+				 txqi->tin.overlimit,
+				 txqi->tin.collisions,
+				 txqi->tin.tx_bytes,
+				 txqi->tin.tx_packets);
+	}
+
+	len += scnprintf(info->buf + len,
+			 info->size - len,
+			 "* sta\n"
+			 "ifname addr tid ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n");
+
+	list_for_each_entry_rcu(sta, &local->sta_list, list) {
+		sdata = sta->sdata;
+		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+			txqi = to_txq_info(sta->sta.txq[i]);
+			len += scnprintf(info->buf + len, info->size - len,
+					 "%s %pM %d %d %u %u %u %u %u %u %u\n",
+					 sdata->name,
+					 sta->sta.addr,
+					 txqi->txq.tid,
+					 txqi->txq.ac,
+					 txqi->tin.backlog_bytes,
+					 txqi->tin.backlog_packets,
+					 txqi->tin.flows,
+					 txqi->tin.overlimit,
+					 txqi->tin.collisions,
+					 txqi->tin.tx_bytes,
+					 txqi->tin.tx_packets);
+		}
+	}
+
+	info->len = len;
+
+	rcu_read_unlock();
+	spin_unlock_bh(&local->fq.lock);
+
+	return 0;
+}
+
+static int aqm_release(struct inode *inode, struct file *file)
+{
+	vfree(file->private_data);
+	return 0;
+}
+
+static ssize_t aqm_read(struct file *file,
+			char __user *user_buf,
+			size_t count,
+			loff_t *ppos)
+{
+	struct aqm_info *info = file->private_data;
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       info->buf, info->len);
+}
+
+static ssize_t aqm_write(struct file *file,
+			 const char __user *user_buf,
+			 size_t count,
+			 loff_t *ppos)
+{
+	struct aqm_info *info = file->private_data;
+	struct ieee80211_local *local = info->local;
+	char buf[100];
+	size_t len;
+
+	if (count > sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(buf, user_buf, count))
+		return -EFAULT;
+
+	buf[sizeof(buf) - 1] = '\0';
+	len = strlen(buf);
+	if (len > 0 && buf[len-1] == '\n')
+		buf[len-1] = 0;
+
+	if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1)
+		return count;
+	else if (sscanf(buf, "fq_quantum %u", &local->fq.quantum) == 1)
+		return count;
+
+	return -EINVAL;
+}
+
+static const struct file_operations aqm_ops = {
+	.write = aqm_write,
+	.read = aqm_read,
+	.open = aqm_open,
+	.release = aqm_release,
+	.llseek = default_llseek,
+};
+
 #ifdef CONFIG_PM
 static ssize_t reset_write(struct file *file, const char __user *user_buf,
 			   size_t count, loff_t *ppos)
@@ -256,6 +428,7 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	DEBUGFS_ADD(hwflags);
 	DEBUGFS_ADD(user_power);
 	DEBUGFS_ADD(power);
+	DEBUGFS_ADD_MODE(aqm, 0600);
 
 	statsd = debugfs_create_dir("statistics", phyd);
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 33dfcbc2bf9c..fd334133ff45 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -328,14 +328,88 @@ STA_OPS(ht_capa);
 static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
 				 size_t count, loff_t *ppos)
 {
-	char buf[128], *p = buf;
+	char buf[512], *p = buf;
 	struct sta_info *sta = file->private_data;
 	struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
 
 	p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n",
 			vhtc->vht_supported ? "" : "not ");
 	if (vhtc->vht_supported) {
-		p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.8x\n", vhtc->cap);
+		p += scnprintf(p, sizeof(buf) + buf - p, "cap: %#.8x\n",
+			       vhtc->cap);
+#define PFLAG(a, b)							\
+		do {							\
+			if (vhtc->cap & IEEE80211_VHT_CAP_ ## a)	\
+				p += scnprintf(p, sizeof(buf) + buf - p, \
+					       "\t\t%s\n", b);		\
+		} while (0)
+
+		switch (vhtc->cap & 0x3) {
+		case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
+			p += scnprintf(p, sizeof(buf) + buf - p,
+				       "\t\tMAX-MPDU-3895\n");
+			break;
+		case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
+			p += scnprintf(p, sizeof(buf) + buf - p,
+				       "\t\tMAX-MPDU-7991\n");
+			break;
+		case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
+			p += scnprintf(p, sizeof(buf) + buf - p,
+				       "\t\tMAX-MPDU-11454\n");
+			break;
+		default:
+			p += scnprintf(p, sizeof(buf) + buf - p,
+				       "\t\tMAX-MPDU-UNKNOWN\n");
+		};
+		switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
+		case 0:
+			p += scnprintf(p, sizeof(buf) + buf - p,
+				       "\t\t80Mhz\n");
+			break;
+		case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
+			p += scnprintf(p, sizeof(buf) + buf - p,
+				       "\t\t160Mhz\n");
+			break;
+		case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
+			p += scnprintf(p, sizeof(buf) + buf - p,
+				       "\t\t80+80Mhz\n");
+			break;
+		default:
+			p += scnprintf(p, sizeof(buf) + buf - p,
+				       "\t\tUNKNOWN-MHZ: 0x%x\n",
+				       (vhtc->cap >> 2) & 0x3);
+		};
+		PFLAG(RXLDPC, "RXLDPC");
+		PFLAG(SHORT_GI_80, "SHORT-GI-80");
+		PFLAG(SHORT_GI_160, "SHORT-GI-160");
+		PFLAG(TXSTBC, "TXSTBC");
+		p += scnprintf(p, sizeof(buf) + buf - p,
+			       "\t\tRXSTBC_%d\n", (vhtc->cap >> 8) & 0x7);
+		PFLAG(SU_BEAMFORMER_CAPABLE, "SU-BEAMFORMER-CAPABLE");
+		PFLAG(SU_BEAMFORMEE_CAPABLE, "SU-BEAMFORMEE-CAPABLE");
+		p += scnprintf(p, sizeof(buf) + buf - p,
+			"\t\tBEAMFORMEE-STS: 0x%x\n",
+			(vhtc->cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK) >>
+			IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT);
+		p += scnprintf(p, sizeof(buf) + buf - p,
+			"\t\tSOUNDING-DIMENSIONS: 0x%x\n",
+			(vhtc->cap & IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK)
+			>> IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT);
+		PFLAG(MU_BEAMFORMER_CAPABLE, "MU-BEAMFORMER-CAPABLE");
+		PFLAG(MU_BEAMFORMEE_CAPABLE, "MU-BEAMFORMEE-CAPABLE");
+		PFLAG(VHT_TXOP_PS, "TXOP-PS");
+		PFLAG(HTC_VHT, "HTC-VHT");
+		p += scnprintf(p, sizeof(buf) + buf - p,
+			"\t\tMPDU-LENGTH-EXPONENT: 0x%x\n",
+			(vhtc->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK) >>
+			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT);
+		PFLAG(VHT_LINK_ADAPTATION_VHT_UNSOL_MFB,
+		      "LINK-ADAPTATION-VHT-UNSOL-MFB");
+		p += scnprintf(p, sizeof(buf) + buf - p,
+			"\t\tLINK-ADAPTATION-VHT-MRQ-MFB: 0x%x\n",
+			(vhtc->cap & IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB) >> 26);
+		PFLAG(RX_ANTENNA_PATTERN, "RX-ANTENNA-PATTERN");
+		PFLAG(TX_ANTENNA_PATTERN, "TX-ANTENNA-PATTERN");
 
 		p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n",
 			       le16_to_cpu(vhtc->vht_mcs.rx_mcs_map));
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9438c9406687..f56d342c31b8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -30,6 +30,7 @@
 #include <net/ieee80211_radiotap.h>
 #include <net/cfg80211.h>
 #include <net/mac80211.h>
+#include <net/fq.h>
 #include "key.h"
 #include "sta_info.h"
 #include "debug.h"
@@ -805,10 +806,19 @@ enum txq_info_flags {
 	IEEE80211_TXQ_NO_AMSDU,
 };
 
+/**
+ * struct txq_info - per tid queue
+ *
+ * @tin: contains packets split into multiple flows
+ * @def_flow: used as a fallback flow when a packet destined to @tin hashes to
+ *	a fq_flow which is already owned by a different tin
+ * @def_cvars: codel vars for @def_flow
+ */
 struct txq_info {
-	struct sk_buff_head queue;
+	struct fq_tin tin;
+	struct fq_flow def_flow;
+	struct codel_vars def_cvars;
 	unsigned long flags;
-	unsigned long byte_cnt;
 
 	/* keep last! */
 	struct ieee80211_txq txq;
@@ -856,7 +866,7 @@ struct ieee80211_sub_if_data {
 	bool control_port_no_encrypt;
 	int encrypt_headroom;
 
-	atomic_t txqs_len[IEEE80211_NUM_ACS];
+	atomic_t num_tx_queued;
 	struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
 	struct mac80211_qos_map __rcu *qos_map;
 
@@ -1099,6 +1109,11 @@ struct ieee80211_local {
 	 * it first anyway so they become a no-op */
 	struct ieee80211_hw hw;
 
+	struct fq fq;
+	struct codel_vars *cvars;
+	struct codel_params cparams;
+	struct codel_stats cstats;
+
 	const struct ieee80211_ops *ops;
 
 	/*
@@ -1235,6 +1250,7 @@ struct ieee80211_local {
 	int scan_channel_idx;
 	int scan_ies_len;
 	int hw_scan_ies_bufsize;
+	struct cfg80211_scan_info scan_info;
 
 	struct work_struct sched_scan_stopped_work;
 	struct ieee80211_sub_if_data __rcu *sched_scan_sdata;
@@ -1931,9 +1947,13 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
 	return true;
 }
 
-void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
-			     struct sta_info *sta,
-			     struct txq_info *txq, int tid);
+int ieee80211_txq_setup_flows(struct ieee80211_local *local);
+void ieee80211_txq_teardown_flows(struct ieee80211_local *local);
+void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
+			struct sta_info *sta,
+			struct txq_info *txq, int tid);
+void ieee80211_txq_purge(struct ieee80211_local *local,
+			 struct txq_info *txqi);
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
 			 const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index c59af3eb9fa4..b123a9e325b3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -779,6 +779,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 			      bool going_down)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct fq *fq = &local->fq;
 	unsigned long flags;
 	struct sk_buff *skb, *tmp;
 	u32 hw_reconf_flags = 0;
@@ -977,12 +978,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	if (sdata->vif.txq) {
 		struct txq_info *txqi = to_txq_info(sdata->vif.txq);
 
-		spin_lock_bh(&txqi->queue.lock);
-		ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
-		txqi->byte_cnt = 0;
-		spin_unlock_bh(&txqi->queue.lock);
-
-		atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
+		spin_lock_bh(&fq->lock);
+		ieee80211_txq_purge(local, txqi);
+		spin_unlock_bh(&fq->lock);
 	}
 
 	if (local->open_count == 0)
@@ -1198,6 +1196,12 @@ static void ieee80211_if_setup(struct net_device *dev)
 	dev->destructor = ieee80211_if_free;
 }
 
+static void ieee80211_if_setup_no_queue(struct net_device *dev)
+{
+	ieee80211_if_setup(dev);
+	dev->priv_flags |= IFF_NO_QUEUE;
+}
+
 static void ieee80211_iface_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
@@ -1707,6 +1711,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	struct net_device *ndev = NULL;
 	struct ieee80211_sub_if_data *sdata = NULL;
 	struct txq_info *txqi;
+	void (*if_setup)(struct net_device *dev);
 	int ret, i;
 	int txqs = 1;
 
@@ -1734,12 +1739,17 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 			txq_size += sizeof(struct txq_info) +
 				    local->hw.txq_data_size;
 
+		if (local->ops->wake_tx_queue)
+			if_setup = ieee80211_if_setup_no_queue;
+		else
+			if_setup = ieee80211_if_setup;
+
 		if (local->hw.queues >= IEEE80211_NUM_ACS)
 			txqs = IEEE80211_NUM_ACS;
 
 		ndev = alloc_netdev_mqs(size + txq_size,
 					name, name_assign_type,
-					ieee80211_if_setup, txqs, 1);
+					if_setup, txqs, 1);
 		if (!ndev)
 			return -ENOMEM;
 		dev_net_set(ndev, wiphy_net(local->hw.wiphy));
@@ -1780,7 +1790,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		if (txq_size) {
 			txqi = netdev_priv(ndev) + size;
-			ieee80211_init_tx_queue(sdata, NULL, txqi, 0);
+			ieee80211_txq_init(sdata, NULL, txqi, 0);
 		}
 
 		sdata->dev = ndev;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7ee91d6151d1..d00ea9b13f49 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1055,9 +1055,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	local->dynamic_ps_forced_timeout = -1;
 
-	if (!local->hw.txq_ac_max_pending)
-		local->hw.txq_ac_max_pending = 64;
-
 	result = ieee80211_wep_init(local);
 	if (result < 0)
 		wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
@@ -1089,6 +1086,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	rtnl_unlock();
 
+	result = ieee80211_txq_setup_flows(local);
+	if (result)
+		goto fail_flows;
+
 #ifdef CONFIG_INET
 	local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
 	result = register_inetaddr_notifier(&local->ifa_notifier);
@@ -1114,6 +1115,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 #if defined(CONFIG_INET) || defined(CONFIG_IPV6)
  fail_ifa:
 #endif
+	ieee80211_txq_teardown_flows(local);
+ fail_flows:
 	rtnl_lock();
 	rate_control_deinitialize(local);
 	ieee80211_remove_interfaces(local);
@@ -1172,6 +1175,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	skb_queue_purge(&local->skb_queue);
 	skb_queue_purge(&local->skb_queue_unreliable);
 	skb_queue_purge(&local->skb_queue_tdls_chsw);
+	ieee80211_txq_teardown_flows(local);
 
 	destroy_workqueue(local->workqueue);
 	wiphy_unregister(local->hw.wiphy);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 21b1fdf5d01d..c66411df9863 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -148,22 +148,7 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
 void mesh_sta_cleanup(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	u32 changed;
-
-	/*
-	 * maybe userspace handles peer allocation and peering, but in either
-	 * case the beacon is still generated by the kernel and we might need
-	 * an update.
-	 */
-	changed = mesh_accept_plinks_update(sdata);
-	if (!sdata->u.mesh.user_mpm) {
-		changed |= mesh_plink_deactivate(sta);
-		del_timer_sync(&sta->mesh->plink_timer);
-	}
-
-	/* make sure no readers can access nexthop sta from here on */
-	mesh_path_flush_by_nexthop(sta);
-	synchronize_net();
+	u32 changed = mesh_plink_deactivate(sta);
 
 	if (changed)
 		ieee80211_mbss_info_change_notify(sdata, changed);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 79f2a0a13db8..7fcdcf622655 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -370,13 +370,21 @@ u32 mesh_plink_deactivate(struct sta_info *sta)
 
 	spin_lock_bh(&sta->mesh->plink_lock);
 	changed = __mesh_plink_deactivate(sta);
-	sta->mesh->reason = WLAN_REASON_MESH_PEER_CANCELED;
-	mesh_plink_frame_tx(sdata, sta, WLAN_SP_MESH_PEERING_CLOSE,
-			    sta->sta.addr, sta->mesh->llid, sta->mesh->plid,
-			    sta->mesh->reason);
+
+	if (!sdata->u.mesh.user_mpm) {
+		sta->mesh->reason = WLAN_REASON_MESH_PEER_CANCELED;
+		mesh_plink_frame_tx(sdata, sta, WLAN_SP_MESH_PEERING_CLOSE,
+				    sta->sta.addr, sta->mesh->llid,
+				    sta->mesh->plid, sta->mesh->reason);
+	}
 	spin_unlock_bh(&sta->mesh->plink_lock);
+	if (!sdata->u.mesh.user_mpm)
+		del_timer_sync(&sta->mesh->plink_timer);
 	mesh_path_flush_by_nexthop(sta);
 
+	/* make sure no readers can access nexthop sta from here on */
+	synchronize_net();
+
 	return changed;
 }
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5e65e838992a..2e8a9024625a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1268,7 +1268,7 @@ static void sta_ps_start(struct sta_info *sta)
 	for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
 		struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
 
-		if (!skb_queue_len(&txqi->queue))
+		if (!txqi->tin.backlog_packets)
 			set_bit(tid, &sta->txq_buffered_tids);
 		else
 			clear_bit(tid, &sta->txq_buffered_tids);
@@ -1624,8 +1624,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		if (mmie_keyidx < NUM_DEFAULT_KEYS ||
 		    mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
 			return RX_DROP_MONITOR; /* unexpected BIP keyidx */
-		if (rx->sta)
+		if (rx->sta) {
+			if (ieee80211_is_group_privacy_action(skb) &&
+			    test_sta_flag(rx->sta, WLAN_STA_MFP))
+				return RX_DROP_MONITOR;
+
 			rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
+		}
 		if (!rx->key)
 			rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
 	} else if (!ieee80211_has_protected(fc)) {
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f9648ef9e31f..070b40f15850 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,6 +7,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
+ * Copyright 2016  Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -70,6 +71,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		.boottime_ns = rx_status->boottime_ns,
 	};
 	bool signal_valid;
+	struct ieee80211_sub_if_data *scan_sdata;
 
 	if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
 		bss_meta.signal = rx_status->signal * 100;
@@ -83,6 +85,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
 
 	bss_meta.chan = channel;
+
+	rcu_read_lock();
+	scan_sdata = rcu_dereference(local->scan_sdata);
+	if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    scan_sdata->vif.bss_conf.assoc &&
+	    ieee80211_have_rx_timestamp(rx_status)) {
+		bss_meta.parent_tsf =
+			ieee80211_calculate_rx_timestamp(local, rx_status,
+							 len + FCS_LEN, 24);
+		ether_addr_copy(bss_meta.parent_bssid,
+				scan_sdata->vif.bss_conf.bssid);
+	}
+	rcu_read_unlock();
+
 	cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta,
 					      mgmt, len, GFP_ATOMIC);
 	if (!cbss)
@@ -345,6 +361,12 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
 		if (rc == 0)
 			return;
+
+		/* HW scan failed and is going to be reported as aborted,
+		 * so clear old scan info.
+		 */
+		memset(&local->scan_info, 0, sizeof(local->scan_info));
+		aborted = true;
 	}
 
 	kfree(local->hw_scan_req);
@@ -353,8 +375,10 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	scan_req = rcu_dereference_protected(local->scan_req,
 					     lockdep_is_held(&local->mtx));
 
-	if (scan_req != local->int_scan_req)
-		cfg80211_scan_done(scan_req, aborted);
+	if (scan_req != local->int_scan_req) {
+		local->scan_info.aborted = aborted;
+		cfg80211_scan_done(scan_req, &local->scan_info);
+	}
 	RCU_INIT_POINTER(local->scan_req, NULL);
 
 	scan_sdata = rcu_dereference_protected(local->scan_sdata,
@@ -391,15 +415,19 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 		ieee80211_start_next_roc(local);
 }
 
-void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
+void ieee80211_scan_completed(struct ieee80211_hw *hw,
+			      struct cfg80211_scan_info *info)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	trace_api_scan_completed(local, aborted);
+	trace_api_scan_completed(local, info);
 
 	set_bit(SCAN_COMPLETED, &local->scanning);
-	if (aborted)
+	if (info->aborted)
 		set_bit(SCAN_ABORTED, &local->scanning);
+
+	memcpy(&local->scan_info, info, sizeof(*info));
+
 	ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
 }
 EXPORT_SYMBOL(ieee80211_scan_completed);
@@ -566,6 +594,9 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 		local->hw_scan_req->req.ie = ies;
 		local->hw_scan_req->req.flags = req->flags;
 		eth_broadcast_addr(local->hw_scan_req->req.bssid);
+		local->hw_scan_req->req.duration = req->duration;
+		local->hw_scan_req->req.duration_mandatory =
+			req->duration_mandatory;
 
 		local->hw_scan_band = 0;
 
@@ -1073,6 +1104,7 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
 	 */
 	cancel_delayed_work(&local->scan_work);
 	/* and clean up */
+	memset(&local->scan_info, 0, sizeof(local->scan_info));
 	__ieee80211_scan_completed(&local->hw, true);
 out:
 	mutex_unlock(&local->mtx);
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 2ddc661f0988..97f4c9d6b54c 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -129,42 +129,31 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (wide_bw_chansw_ie) {
-		new_vht_chandef.chan = new_chan;
-		new_vht_chandef.center_freq1 =
-			ieee80211_channel_to_frequency(
+		struct ieee80211_vht_operation vht_oper = {
+			.chan_width =
+				wide_bw_chansw_ie->new_channel_width,
+			.center_freq_seg1_idx =
 				wide_bw_chansw_ie->new_center_freq_seg0,
-				new_band);
-
-		switch (wide_bw_chansw_ie->new_channel_width) {
-		default:
-			/* hmmm, ignore VHT and use HT if present */
-		case IEEE80211_VHT_CHANWIDTH_USE_HT:
+			.center_freq_seg2_idx =
+				wide_bw_chansw_ie->new_center_freq_seg1,
+			/* .basic_mcs_set doesn't matter */
+		};
+
+		/* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT,
+		 * to the previously parsed chandef
+		 */
+		new_vht_chandef = csa_ie->chandef;
+
+		/* ignore if parsing fails */
+		if (!ieee80211_chandef_vht_oper(&vht_oper, &new_vht_chandef))
 			new_vht_chandef.chan = NULL;
-			break;
-		case IEEE80211_VHT_CHANWIDTH_80MHZ:
-			new_vht_chandef.width = NL80211_CHAN_WIDTH_80;
-			break;
-		case IEEE80211_VHT_CHANWIDTH_160MHZ:
-			new_vht_chandef.width = NL80211_CHAN_WIDTH_160;
-			break;
-		case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
-			/* field is otherwise reserved */
-			new_vht_chandef.center_freq2 =
-				ieee80211_channel_to_frequency(
-					wide_bw_chansw_ie->new_center_freq_seg1,
-					new_band);
-			new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
-			break;
-		}
+
 		if (sta_flags & IEEE80211_STA_DISABLE_80P80MHZ &&
 		    new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
 			ieee80211_chandef_downgrade(&new_vht_chandef);
 		if (sta_flags & IEEE80211_STA_DISABLE_160MHZ &&
 		    new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
 			ieee80211_chandef_downgrade(&new_vht_chandef);
-		if (sta_flags & IEEE80211_STA_DISABLE_40MHZ &&
-		    new_vht_chandef.width > NL80211_CHAN_WIDTH_20)
-			ieee80211_chandef_downgrade(&new_vht_chandef);
 	}
 
 	/* if VHT data is there validate & use it */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 5ccfdbd406bd..76b737dcc36f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -90,6 +90,7 @@ static void __cleanup_single_sta(struct sta_info *sta)
 	struct tid_ampdu_tx *tid_tx;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
+	struct fq *fq = &local->fq;
 	struct ps_data *ps;
 
 	if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -113,11 +114,10 @@ static void __cleanup_single_sta(struct sta_info *sta)
 	if (sta->sta.txq[0]) {
 		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
 			struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
-			int n = skb_queue_len(&txqi->queue);
 
-			ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
-			atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]);
-			txqi->byte_cnt = 0;
+			spin_lock_bh(&fq->lock);
+			ieee80211_txq_purge(local, txqi);
+			spin_unlock_bh(&fq->lock);
 		}
 	}
 
@@ -368,7 +368,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
 			struct txq_info *txq = txq_data + i * size;
 
-			ieee80211_init_tx_queue(sdata, sta, txq, i);
+			ieee80211_txq_init(sdata, sta, txq, i);
 		}
 	}
 
@@ -1211,7 +1211,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
 			struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
 
-			if (!skb_queue_len(&txqi->queue))
+			if (!txqi->tin.backlog_packets)
 				continue;
 
 			drv_wake_tx_queue(local, txqi);
@@ -1648,7 +1648,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
 		for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
 			struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
 
-			if (!(tids & BIT(tid)) || skb_queue_len(&txqi->queue))
+			if (!(tids & BIT(tid)) || txqi->tin.backlog_packets)
 				continue;
 
 			sta_info_recalc_tim(sta);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 1c7d45a6d93e..b5d28f14b9cf 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1747,6 +1747,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
+	ret = 0;
 call_drv:
 	drv_tdls_recv_channel_switch(sdata->local, sdata, &params);
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 203044379ce0..91461c415525 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -24,7 +24,10 @@
 #include <net/ieee80211_radiotap.h>
 #include <net/cfg80211.h>
 #include <net/mac80211.h>
+#include <net/codel.h>
+#include <net/codel_impl.h>
 #include <asm/unaligned.h>
+#include <net/fq_impl.h>
 
 #include "ieee80211_i.h"
 #include "driver-ops.h"
@@ -590,6 +593,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	else if (tx->sta &&
 		 (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
 		tx->key = key;
+	else if (ieee80211_is_group_privacy_action(tx->skb) &&
+		(key = rcu_dereference(tx->sdata->default_multicast_key)))
+		tx->key = key;
 	else if (ieee80211_is_mgmt(hdr->frame_control) &&
 		 is_multicast_ether_addr(hdr->addr1) &&
 		 ieee80211_is_robust_mgmt_frame(tx->skb) &&
@@ -622,7 +628,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		case WLAN_CIPHER_SUITE_GCMP_256:
 			if (!ieee80211_is_data_present(hdr->frame_control) &&
 			    !ieee80211_use_mfp(hdr->frame_control, tx->sta,
-					       tx->skb))
+					       tx->skb) &&
+			    !ieee80211_is_group_privacy_action(tx->skb))
 				tx->key = NULL;
 			else
 				skip_hw = (tx->key->conf.flags &
@@ -1236,27 +1243,21 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	return TX_CONTINUE;
 }
 
-static void ieee80211_drv_tx(struct ieee80211_local *local,
-			     struct ieee80211_vif *vif,
-			     struct ieee80211_sta *pubsta,
-			     struct sk_buff *skb)
+static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
+					  struct ieee80211_vif *vif,
+					  struct ieee80211_sta *pubsta,
+					  struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_tx_control control = {
-		.sta = pubsta,
-	};
 	struct ieee80211_txq *txq = NULL;
-	struct txq_info *txqi;
-	u8 ac;
 
 	if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
 	    (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
-		goto tx_normal;
+		return NULL;
 
 	if (!ieee80211_is_data(hdr->frame_control))
-		goto tx_normal;
+		return NULL;
 
 	if (pubsta) {
 		u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
@@ -1267,51 +1268,234 @@ static void ieee80211_drv_tx(struct ieee80211_local *local,
 	}
 
 	if (!txq)
-		goto tx_normal;
+		return NULL;
 
-	ac = txq->ac;
-	txqi = to_txq_info(txq);
-	atomic_inc(&sdata->txqs_len[ac]);
-	if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending)
-		netif_stop_subqueue(sdata->dev, ac);
+	return to_txq_info(txq);
+}
+
+static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
+{
+	IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
+}
+
+static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi)
+{
+	IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif;
+}
+
+static u32 codel_skb_len_func(const struct sk_buff *skb)
+{
+	return skb->len;
+}
+
+static codel_time_t codel_skb_time_func(const struct sk_buff *skb)
+{
+	const struct ieee80211_tx_info *info;
+
+	info = (const struct ieee80211_tx_info *)skb->cb;
+	return info->control.enqueue_time;
+}
+
+static struct sk_buff *codel_dequeue_func(struct codel_vars *cvars,
+					  void *ctx)
+{
+	struct ieee80211_local *local;
+	struct txq_info *txqi;
+	struct fq *fq;
+	struct fq_flow *flow;
+
+	txqi = ctx;
+	local = vif_to_sdata(txqi->txq.vif)->local;
+	fq = &local->fq;
+
+	if (cvars == &txqi->def_cvars)
+		flow = &txqi->def_flow;
+	else
+		flow = &fq->flows[cvars - local->cvars];
+
+	return fq_flow_dequeue(fq, flow);
+}
+
+static void codel_drop_func(struct sk_buff *skb,
+			    void *ctx)
+{
+	struct ieee80211_local *local;
+	struct ieee80211_hw *hw;
+	struct txq_info *txqi;
+
+	txqi = ctx;
+	local = vif_to_sdata(txqi->txq.vif)->local;
+	hw = &local->hw;
+
+	ieee80211_free_txskb(hw, skb);
+}
+
+static struct sk_buff *fq_tin_dequeue_func(struct fq *fq,
+					   struct fq_tin *tin,
+					   struct fq_flow *flow)
+{
+	struct ieee80211_local *local;
+	struct txq_info *txqi;
+	struct codel_vars *cvars;
+	struct codel_params *cparams;
+	struct codel_stats *cstats;
+
+	local = container_of(fq, struct ieee80211_local, fq);
+	txqi = container_of(tin, struct txq_info, tin);
+	cparams = &local->cparams;
+	cstats = &local->cstats;
+
+	if (flow == &txqi->def_flow)
+		cvars = &txqi->def_cvars;
+	else
+		cvars = &local->cvars[flow - fq->flows];
+
+	return codel_dequeue(txqi,
+			     &flow->backlog,
+			     cparams,
+			     cvars,
+			     cstats,
+			     codel_skb_len_func,
+			     codel_skb_time_func,
+			     codel_drop_func,
+			     codel_dequeue_func);
+}
+
+static void fq_skb_free_func(struct fq *fq,
+			     struct fq_tin *tin,
+			     struct fq_flow *flow,
+			     struct sk_buff *skb)
+{
+	struct ieee80211_local *local;
+
+	local = container_of(fq, struct ieee80211_local, fq);
+	ieee80211_free_txskb(&local->hw, skb);
+}
+
+static struct fq_flow *fq_flow_get_default_func(struct fq *fq,
+						struct fq_tin *tin,
+						int idx,
+						struct sk_buff *skb)
+{
+	struct txq_info *txqi;
+
+	txqi = container_of(tin, struct txq_info, tin);
+	return &txqi->def_flow;
+}
+
+static void ieee80211_txq_enqueue(struct ieee80211_local *local,
+				  struct txq_info *txqi,
+				  struct sk_buff *skb)
+{
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
+
+	ieee80211_set_skb_enqueue_time(skb);
+	fq_tin_enqueue(fq, tin, skb,
+		       fq_skb_free_func,
+		       fq_flow_get_default_func);
+}
+
+void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
+			struct sta_info *sta,
+			struct txq_info *txqi, int tid)
+{
+	fq_tin_init(&txqi->tin);
+	fq_flow_init(&txqi->def_flow);
+	codel_vars_init(&txqi->def_cvars);
+
+	txqi->txq.vif = &sdata->vif;
+
+	if (sta) {
+		txqi->txq.sta = &sta->sta;
+		sta->sta.txq[tid] = &txqi->txq;
+		txqi->txq.tid = tid;
+		txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
+	} else {
+		sdata->vif.txq = &txqi->txq;
+		txqi->txq.tid = 0;
+		txqi->txq.ac = IEEE80211_AC_BE;
+	}
+}
+
+void ieee80211_txq_purge(struct ieee80211_local *local,
+			 struct txq_info *txqi)
+{
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
+
+	fq_tin_reset(fq, tin, fq_skb_free_func);
+}
+
+int ieee80211_txq_setup_flows(struct ieee80211_local *local)
+{
+	struct fq *fq = &local->fq;
+	int ret;
+	int i;
 
-	spin_lock_bh(&txqi->queue.lock);
-	txqi->byte_cnt += skb->len;
-	__skb_queue_tail(&txqi->queue, skb);
-	spin_unlock_bh(&txqi->queue.lock);
+	if (!local->ops->wake_tx_queue)
+		return 0;
 
-	drv_wake_tx_queue(local, txqi);
+	ret = fq_init(fq, 4096);
+	if (ret)
+		return ret;
+
+	codel_params_init(&local->cparams);
+	codel_stats_init(&local->cstats);
+	local->cparams.interval = MS2TIME(100);
+	local->cparams.target = MS2TIME(20);
+	local->cparams.ecn = true;
+
+	local->cvars = kcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
+			       GFP_KERNEL);
+	if (!local->cvars) {
+		spin_lock_bh(&fq->lock);
+		fq_reset(fq, fq_skb_free_func);
+		spin_unlock_bh(&fq->lock);
+		return -ENOMEM;
+	}
 
-	return;
+	for (i = 0; i < fq->flows_cnt; i++)
+		codel_vars_init(&local->cvars[i]);
 
-tx_normal:
-	drv_tx(local, &control, skb);
+	return 0;
+}
+
+void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
+{
+	struct fq *fq = &local->fq;
+
+	if (!local->ops->wake_tx_queue)
+		return;
+
+	kfree(local->cvars);
+	local->cvars = NULL;
+
+	spin_lock_bh(&fq->lock);
+	fq_reset(fq, fq_skb_free_func);
+	spin_unlock_bh(&fq->lock);
 }
 
 struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 				     struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
 	struct txq_info *txqi = container_of(txq, struct txq_info, txq);
 	struct ieee80211_hdr *hdr;
 	struct sk_buff *skb = NULL;
-	u8 ac = txq->ac;
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
 
-	spin_lock_bh(&txqi->queue.lock);
+	spin_lock_bh(&fq->lock);
 
 	if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
 		goto out;
 
-	skb = __skb_dequeue(&txqi->queue);
+	skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
 	if (!skb)
 		goto out;
 
-	txqi->byte_cnt -= skb->len;
-
-	atomic_dec(&sdata->txqs_len[ac]);
-	if (__netif_subqueue_stopped(sdata->dev, ac))
-		ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]);
+	ieee80211_set_skb_vif(skb, txqi);
 
 	hdr = (struct ieee80211_hdr *)skb->data;
 	if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
@@ -1327,7 +1511,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	}
 
 out:
-	spin_unlock_bh(&txqi->queue.lock);
+	spin_unlock_bh(&fq->lock);
 
 	if (skb && skb_has_frag_list(skb) &&
 	    !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
@@ -1343,7 +1527,10 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
 			       struct sk_buff_head *skbs,
 			       bool txpending)
 {
+	struct ieee80211_tx_control control = {};
+	struct fq *fq = &local->fq;
 	struct sk_buff *skb, *tmp;
+	struct txq_info *txqi;
 	unsigned long flags;
 
 	skb_queue_walk_safe(skbs, skb, tmp) {
@@ -1358,6 +1545,21 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
 		}
 #endif
 
+		txqi = ieee80211_get_txq(local, vif, sta, skb);
+		if (txqi) {
+			info->control.vif = vif;
+
+			__skb_unlink(skb, skbs);
+
+			spin_lock_bh(&fq->lock);
+			ieee80211_txq_enqueue(local, txqi, skb);
+			spin_unlock_bh(&fq->lock);
+
+			drv_wake_tx_queue(local, txqi);
+
+			continue;
+		}
+
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 		if (local->queue_stop_reasons[q] ||
 		    (!txpending && !skb_queue_empty(&local->pending[q]))) {
@@ -1400,9 +1602,10 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
 		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
 		info->control.vif = vif;
+		control.sta = sta;
 
 		__skb_unlink(skb, skbs);
-		ieee80211_drv_tx(local, vif, sta, skb);
+		drv_tx(local, &control, skb);
 	}
 
 	return true;
@@ -2882,6 +3085,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 				      struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin;
+	struct fq_flow *flow;
 	u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 	struct ieee80211_txq *txq = sta->sta.txq[tid];
 	struct txq_info *txqi;
@@ -2893,6 +3099,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	__be16 len;
 	void *data;
 	bool ret = false;
+	unsigned int orig_len;
 	int n = 1, nfrags;
 
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
@@ -2909,12 +3116,20 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 		max_amsdu_len = min_t(int, max_amsdu_len,
 				      sta->sta.max_rc_amsdu_len);
 
-	spin_lock_bh(&txqi->queue.lock);
+	spin_lock_bh(&fq->lock);
+
+	/* TODO: Ideally aggregation should be done on dequeue to remain
+	 * responsive to environment changes.
+	 */
 
-	head = skb_peek_tail(&txqi->queue);
+	tin = &txqi->tin;
+	flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func);
+	head = skb_peek_tail(&flow->queue);
 	if (!head)
 		goto out;
 
+	orig_len = head->len;
+
 	if (skb->len + head->len > max_amsdu_len)
 		goto out;
 
@@ -2953,8 +3168,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	head->data_len += skb->len;
 	*frag_tail = skb;
 
+	flow->backlog += head->len - orig_len;
+	tin->backlog_bytes += head->len - orig_len;
+
+	fq_recalc_backlog(fq, tin, flow);
+
 out:
-	spin_unlock_bh(&txqi->queue.lock);
+	spin_unlock_bh(&fq->lock);
 
 	return ret;
 }
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 905003f75c4d..42bf0b6685e8 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -244,6 +244,9 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
 	struct ieee80211_sub_if_data *sdata;
 	int n_acs = IEEE80211_NUM_ACS;
 
+	if (local->ops->wake_tx_queue)
+		return;
+
 	if (local->hw.queues < IEEE80211_NUM_ACS)
 		n_acs = 1;
 
@@ -260,11 +263,6 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
 		for (ac = 0; ac < n_acs; ac++) {
 			int ac_queue = sdata->vif.hw_queue[ac];
 
-			if (local->ops->wake_tx_queue &&
-			    (atomic_read(&sdata->txqs_len[ac]) >
-			     local->hw.txq_ac_max_pending))
-				continue;
-
 			if (ac_queue == queue ||
 			    (sdata->vif.cab_queue == queue &&
 			     local->queue_stop_reasons[ac_queue] == 0 &&
@@ -352,6 +350,9 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
 	if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
 		return;
 
+	if (local->ops->wake_tx_queue)
+		return;
+
 	if (local->hw.queues < IEEE80211_NUM_ACS)
 		n_acs = 1;
 
@@ -3388,25 +3389,6 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo)
 	return buf;
 }
 
-void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
-			     struct sta_info *sta,
-			     struct txq_info *txqi, int tid)
-{
-	skb_queue_head_init(&txqi->queue);
-	txqi->txq.vif = &sdata->vif;
-
-	if (sta) {
-		txqi->txq.sta = &sta->sta;
-		sta->sta.txq[tid] = &txqi->txq;
-		txqi->txq.tid = tid;
-		txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
-	} else {
-		sdata->vif.txq = &txqi->txq;
-		txqi->txq.tid = 0;
-		txqi->txq.ac = IEEE80211_AC_BE;
-	}
-}
-
 void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
 			     unsigned long *frame_cnt,
 			     unsigned long *byte_cnt)
@@ -3414,9 +3396,9 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
 	struct txq_info *txqi = to_txq_info(txq);
 
 	if (frame_cnt)
-		*frame_cnt = txqi->queue.qlen;
+		*frame_cnt = txqi->tin.backlog_packets;
 
 	if (byte_cnt)
-		*byte_cnt = txqi->byte_cnt;
+		*byte_cnt = txqi->tin.backlog_bytes;
 }
 EXPORT_SYMBOL(ieee80211_txq_get_depth);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 0b80a7140cc4..5c161e7759b5 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -91,7 +91,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->len <= mtu)
 		return false;
 
-	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
 		return false;
 
 	return true;
@@ -1009,9 +1009,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
 	unsigned int flags;
 
 	if (event == NETDEV_REGISTER) {
-		/* For now just support ethernet devices */
-		if ((dev->type == ARPHRD_ETHER) ||
-		    (dev->type == ARPHRD_LOOPBACK)) {
+		/* For now just support Ethernet, IPGRE, SIT and IPIP devices */
+		if (dev->type == ARPHRD_ETHER ||
+		    dev->type == ARPHRD_LOOPBACK ||
+		    dev->type == ARPHRD_IPGRE ||
+		    dev->type == ARPHRD_SIT ||
+		    dev->type == ARPHRD_TUNNEL) {
 			mdev = mpls_add_dev(dev);
 			if (IS_ERR(mdev))
 				return notifier_from_errno(PTR_ERR(mdev));
diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
new file mode 100644
index 000000000000..08a8a6031fd7
--- /dev/null
+++ b/net/ncsi/Kconfig
@@ -0,0 +1,12 @@
+#
+# Configuration for NCSI support
+#
+
+config NET_NCSI
+	bool "NCSI interface support"
+	depends on INET
+	---help---
+	  This module provides NCSI (Network Controller Sideband Interface)
+	  support. Enable this only if your system connects to a network
+	  device via NCSI and the ethernet driver you're using supports
+	  the protocol explicitly.
diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile
new file mode 100644
index 000000000000..dd12b564f2e7
--- /dev/null
+++ b/net/ncsi/Makefile
@@ -0,0 +1,4 @@
+#
+# Makefile for NCSI API
+#
+obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
new file mode 100644
index 000000000000..33738c060547
--- /dev/null
+++ b/net/ncsi/internal.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_INTERNAL_H__
+#define __NCSI_INTERNAL_H__
+
+enum {
+	NCSI_CAP_BASE		= 0,
+	NCSI_CAP_GENERIC	= 0,
+	NCSI_CAP_BC,
+	NCSI_CAP_MC,
+	NCSI_CAP_BUFFER,
+	NCSI_CAP_AEN,
+	NCSI_CAP_VLAN,
+	NCSI_CAP_MAX
+};
+
+enum {
+	NCSI_CAP_GENERIC_HWA             = 0x01, /* HW arbitration           */
+	NCSI_CAP_GENERIC_HDS             = 0x02, /* HNC driver status change */
+	NCSI_CAP_GENERIC_FC              = 0x04, /* HNC to MC flow control   */
+	NCSI_CAP_GENERIC_FC1             = 0x08, /* MC to HNC flow control   */
+	NCSI_CAP_GENERIC_MC              = 0x10, /* Global MC filtering      */
+	NCSI_CAP_GENERIC_HWA_UNKNOWN     = 0x00, /* Unknown HW arbitration   */
+	NCSI_CAP_GENERIC_HWA_SUPPORT     = 0x20, /* Supported HW arbitration */
+	NCSI_CAP_GENERIC_HWA_NOT_SUPPORT = 0x40, /* No HW arbitration        */
+	NCSI_CAP_GENERIC_HWA_RESERVED    = 0x60, /* Reserved HW arbitration  */
+	NCSI_CAP_GENERIC_HWA_MASK        = 0x60, /* Mask for HW arbitration  */
+	NCSI_CAP_GENERIC_MASK            = 0x7f,
+	NCSI_CAP_BC_ARP                  = 0x01, /* ARP packet filtering     */
+	NCSI_CAP_BC_DHCPC                = 0x02, /* DHCP client filtering    */
+	NCSI_CAP_BC_DHCPS                = 0x04, /* DHCP server filtering    */
+	NCSI_CAP_BC_NETBIOS              = 0x08, /* NetBIOS packet filtering */
+	NCSI_CAP_BC_MASK                 = 0x0f,
+	NCSI_CAP_MC_IPV6_NEIGHBOR        = 0x01, /* IPv6 neighbor filtering  */
+	NCSI_CAP_MC_IPV6_ROUTER          = 0x02, /* IPv6 router filering     */
+	NCSI_CAP_MC_DHCPV6_RELAY         = 0x04, /* DHCPv6 relay / server MC */
+	NCSI_CAP_MC_DHCPV6_WELL_KNOWN    = 0x08, /* DHCPv6 well-known MC     */
+	NCSI_CAP_MC_IPV6_MLD             = 0x10, /* IPv6 MLD filtering       */
+	NCSI_CAP_MC_IPV6_NEIGHBOR_S      = 0x20, /* IPv6 neighbour filtering */
+	NCSI_CAP_MC_MASK                 = 0x3f,
+	NCSI_CAP_AEN_LSC                 = 0x01, /* Link status change       */
+	NCSI_CAP_AEN_CR                  = 0x02, /* Configuration required   */
+	NCSI_CAP_AEN_HDS                 = 0x04, /* HNC driver status        */
+	NCSI_CAP_AEN_MASK                = 0x07,
+	NCSI_CAP_VLAN_ONLY               = 0x01, /* Filter VLAN packet only  */
+	NCSI_CAP_VLAN_NO                 = 0x02, /* Filter VLAN and non-VLAN */
+	NCSI_CAP_VLAN_ANY                = 0x04, /* Filter Any-and-non-VLAN  */
+	NCSI_CAP_VLAN_MASK               = 0x07
+};
+
+enum {
+	NCSI_MODE_BASE		= 0,
+	NCSI_MODE_ENABLE	= 0,
+	NCSI_MODE_TX_ENABLE,
+	NCSI_MODE_LINK,
+	NCSI_MODE_VLAN,
+	NCSI_MODE_BC,
+	NCSI_MODE_MC,
+	NCSI_MODE_AEN,
+	NCSI_MODE_FC,
+	NCSI_MODE_MAX
+};
+
+enum {
+	NCSI_FILTER_BASE	= 0,
+	NCSI_FILTER_VLAN	= 0,
+	NCSI_FILTER_UC,
+	NCSI_FILTER_MC,
+	NCSI_FILTER_MIXED,
+	NCSI_FILTER_MAX
+};
+
+struct ncsi_channel_version {
+	u32 version;		/* Supported BCD encoded NCSI version */
+	u32 alpha2;		/* Supported BCD encoded NCSI version */
+	u8  fw_name[12];	/* Firware name string                */
+	u32 fw_version;		/* Firmware version                   */
+	u16 pci_ids[4];		/* PCI identification                 */
+	u32 mf_id;		/* Manufacture ID                     */
+};
+
+struct ncsi_channel_cap {
+	u32 index;	/* Index of channel capabilities */
+	u32 cap;	/* NCSI channel capability       */
+};
+
+struct ncsi_channel_mode {
+	u32 index;	/* Index of channel modes      */
+	u32 enable;	/* Enabled or disabled         */
+	u32 size;	/* Valid entries in ncm_data[] */
+	u32 data[8];	/* Data entries                */
+};
+
+struct ncsi_channel_filter {
+	u32 index;	/* Index of channel filters          */
+	u32 total;	/* Total entries in the filter table */
+	u64 bitmap;	/* Bitmap of valid entries           */
+	u32 data[];	/* Data for the valid entries        */
+};
+
+struct ncsi_channel_stats {
+	u32 hnc_cnt_hi;		/* Counter cleared            */
+	u32 hnc_cnt_lo;		/* Counter cleared            */
+	u32 hnc_rx_bytes;	/* Rx bytes                   */
+	u32 hnc_tx_bytes;	/* Tx bytes                   */
+	u32 hnc_rx_uc_pkts;	/* Rx UC packets              */
+	u32 hnc_rx_mc_pkts;     /* Rx MC packets              */
+	u32 hnc_rx_bc_pkts;	/* Rx BC packets              */
+	u32 hnc_tx_uc_pkts;	/* Tx UC packets              */
+	u32 hnc_tx_mc_pkts;	/* Tx MC packets              */
+	u32 hnc_tx_bc_pkts;	/* Tx BC packets              */
+	u32 hnc_fcs_err;	/* FCS errors                 */
+	u32 hnc_align_err;	/* Alignment errors           */
+	u32 hnc_false_carrier;	/* False carrier detection    */
+	u32 hnc_runt_pkts;	/* Rx runt packets            */
+	u32 hnc_jabber_pkts;	/* Rx jabber packets          */
+	u32 hnc_rx_pause_xon;	/* Rx pause XON frames        */
+	u32 hnc_rx_pause_xoff;	/* Rx XOFF frames             */
+	u32 hnc_tx_pause_xon;	/* Tx XON frames              */
+	u32 hnc_tx_pause_xoff;	/* Tx XOFF frames             */
+	u32 hnc_tx_s_collision;	/* Single collision frames    */
+	u32 hnc_tx_m_collision;	/* Multiple collision frames  */
+	u32 hnc_l_collision;	/* Late collision frames      */
+	u32 hnc_e_collision;	/* Excessive collision frames */
+	u32 hnc_rx_ctl_frames;	/* Rx control frames          */
+	u32 hnc_rx_64_frames;	/* Rx 64-bytes frames         */
+	u32 hnc_rx_127_frames;	/* Rx 65-127 bytes frames     */
+	u32 hnc_rx_255_frames;	/* Rx 128-255 bytes frames    */
+	u32 hnc_rx_511_frames;	/* Rx 256-511 bytes frames    */
+	u32 hnc_rx_1023_frames;	/* Rx 512-1023 bytes frames   */
+	u32 hnc_rx_1522_frames;	/* Rx 1024-1522 bytes frames  */
+	u32 hnc_rx_9022_frames;	/* Rx 1523-9022 bytes frames  */
+	u32 hnc_tx_64_frames;	/* Tx 64-bytes frames         */
+	u32 hnc_tx_127_frames;	/* Tx 65-127 bytes frames     */
+	u32 hnc_tx_255_frames;	/* Tx 128-255 bytes frames    */
+	u32 hnc_tx_511_frames;	/* Tx 256-511 bytes frames    */
+	u32 hnc_tx_1023_frames;	/* Tx 512-1023 bytes frames   */
+	u32 hnc_tx_1522_frames;	/* Tx 1024-1522 bytes frames  */
+	u32 hnc_tx_9022_frames;	/* Tx 1523-9022 bytes frames  */
+	u32 hnc_rx_valid_bytes;	/* Rx valid bytes             */
+	u32 hnc_rx_runt_pkts;	/* Rx error runt packets      */
+	u32 hnc_rx_jabber_pkts;	/* Rx error jabber packets    */
+	u32 ncsi_rx_cmds;	/* Rx NCSI commands           */
+	u32 ncsi_dropped_cmds;	/* Dropped commands           */
+	u32 ncsi_cmd_type_errs;	/* Command type errors        */
+	u32 ncsi_cmd_csum_errs;	/* Command checksum errors    */
+	u32 ncsi_rx_pkts;	/* Rx NCSI packets            */
+	u32 ncsi_tx_pkts;	/* Tx NCSI packets            */
+	u32 ncsi_tx_aen_pkts;	/* Tx AEN packets             */
+	u32 pt_tx_pkts;		/* Tx packets                 */
+	u32 pt_tx_dropped;	/* Tx dropped packets         */
+	u32 pt_tx_channel_err;	/* Tx channel errors          */
+	u32 pt_tx_us_err;	/* Tx undersize errors        */
+	u32 pt_rx_pkts;		/* Rx packets                 */
+	u32 pt_rx_dropped;	/* Rx dropped packets         */
+	u32 pt_rx_channel_err;	/* Rx channel errors          */
+	u32 pt_rx_us_err;	/* Rx undersize errors        */
+	u32 pt_rx_os_err;	/* Rx oversize errors         */
+};
+
+struct ncsi_dev_priv;
+struct ncsi_package;
+
+#define NCSI_PACKAGE_SHIFT	5
+#define NCSI_PACKAGE_INDEX(c)	(((c) >> NCSI_PACKAGE_SHIFT) & 0x7)
+#define NCSI_CHANNEL_INDEX(c)	((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1))
+#define NCSI_TO_CHANNEL(p, c)	(((p) << NCSI_PACKAGE_SHIFT) | (c))
+
+struct ncsi_channel {
+	unsigned char               id;
+	int                         state;
+#define NCSI_CHANNEL_INACTIVE		1
+#define NCSI_CHANNEL_ACTIVE		2
+#define NCSI_CHANNEL_INVISIBLE		3
+	spinlock_t                  lock;	/* Protect filters etc */
+	struct ncsi_package         *package;
+	struct ncsi_channel_version version;
+	struct ncsi_channel_cap	    caps[NCSI_CAP_MAX];
+	struct ncsi_channel_mode    modes[NCSI_MODE_MAX];
+	struct ncsi_channel_filter  *filters[NCSI_FILTER_MAX];
+	struct ncsi_channel_stats   stats;
+	struct timer_list           timer;	/* Link monitor timer  */
+	bool                        enabled;	/* Timer is enabled    */
+	unsigned int                timeout;	/* Times of timeout    */
+	struct list_head            node;
+	struct list_head            link;
+};
+
+struct ncsi_package {
+	unsigned char        id;          /* NCSI 3-bits package ID */
+	unsigned char        uuid[16];    /* UUID                   */
+	struct ncsi_dev_priv *ndp;        /* NCSI device            */
+	spinlock_t           lock;        /* Protect the package    */
+	unsigned int         channel_num; /* Number of channels     */
+	struct list_head     channels;    /* List of chanels        */
+	struct list_head     node;        /* Form list of packages  */
+};
+
+struct ncsi_request {
+	unsigned char        id;      /* Request ID - 0 to 255           */
+	bool                 used;    /* Request that has been assigned  */
+	bool                 driven;  /* Drive state machine             */
+	struct ncsi_dev_priv *ndp;    /* Associated NCSI device          */
+	struct sk_buff       *cmd;    /* Associated NCSI command packet  */
+	struct sk_buff       *rsp;    /* Associated NCSI response packet */
+	struct timer_list    timer;   /* Timer on waiting for response   */
+	bool                 enabled; /* Time has been enabled or not    */
+};
+
+enum {
+	ncsi_dev_state_major		= 0xff00,
+	ncsi_dev_state_minor		= 0x00ff,
+	ncsi_dev_state_probe_deselect	= 0x0201,
+	ncsi_dev_state_probe_package,
+	ncsi_dev_state_probe_channel,
+	ncsi_dev_state_probe_cis,
+	ncsi_dev_state_probe_gvi,
+	ncsi_dev_state_probe_gc,
+	ncsi_dev_state_probe_gls,
+	ncsi_dev_state_probe_dp,
+	ncsi_dev_state_config_sp	= 0x0301,
+	ncsi_dev_state_config_cis,
+	ncsi_dev_state_config_sma,
+	ncsi_dev_state_config_ebf,
+#if IS_ENABLED(CONFIG_IPV6)
+	ncsi_dev_state_config_egmf,
+#endif
+	ncsi_dev_state_config_ecnt,
+	ncsi_dev_state_config_ec,
+	ncsi_dev_state_config_ae,
+	ncsi_dev_state_config_gls,
+	ncsi_dev_state_config_done,
+	ncsi_dev_state_suspend_select	= 0x0401,
+	ncsi_dev_state_suspend_dcnt,
+	ncsi_dev_state_suspend_dc,
+	ncsi_dev_state_suspend_deselect,
+	ncsi_dev_state_suspend_done
+};
+
+struct ncsi_dev_priv {
+	struct ncsi_dev     ndev;            /* Associated NCSI device     */
+	unsigned int        flags;           /* NCSI device flags          */
+#define NCSI_DEV_PROBED		1            /* Finalized NCSI topology    */
+#define NCSI_DEV_HWA		2            /* Enabled HW arbitration     */
+#define NCSI_DEV_RESHUFFLE	4
+	spinlock_t          lock;            /* Protect the NCSI device    */
+#if IS_ENABLED(CONFIG_IPV6)
+	unsigned int        inet6_addr_num;  /* Number of IPv6 addresses   */
+#endif
+	unsigned int        package_num;     /* Number of packages         */
+	struct list_head    packages;        /* List of packages           */
+	struct ncsi_request requests[256];   /* Request table              */
+	unsigned int        request_id;      /* Last used request ID       */
+	unsigned int        pending_req_num; /* Number of pending requests */
+	struct ncsi_package *active_package; /* Currently handled package  */
+	struct ncsi_channel *active_channel; /* Currently handled channel  */
+	struct list_head    channel_queue;   /* Config queue of channels   */
+	struct work_struct  work;            /* For channel management     */
+	struct packet_type  ptype;           /* NCSI packet Rx handler     */
+	struct list_head    node;            /* Form NCSI device list      */
+};
+
+struct ncsi_cmd_arg {
+	struct ncsi_dev_priv *ndp;        /* Associated NCSI device        */
+	unsigned char        type;        /* Command in the NCSI packet    */
+	unsigned char        id;          /* Request ID (sequence number)  */
+	unsigned char        package;     /* Destination package ID        */
+	unsigned char        channel;     /* Detination channel ID or 0x1f */
+	unsigned short       payload;     /* Command packet payload length */
+	bool                 driven;      /* Drive the state machine?      */
+	union {
+		unsigned char  bytes[16]; /* Command packet specific data  */
+		unsigned short words[8];
+		unsigned int   dwords[4];
+	};
+};
+
+extern struct list_head ncsi_dev_list;
+extern spinlock_t ncsi_dev_lock;
+
+#define TO_NCSI_DEV_PRIV(nd) \
+	container_of(nd, struct ncsi_dev_priv, ndev)
+#define NCSI_FOR_EACH_DEV(ndp) \
+	list_for_each_entry_rcu(ndp, &ncsi_dev_list, node)
+#define NCSI_FOR_EACH_PACKAGE(ndp, np) \
+	list_for_each_entry_rcu(np, &ndp->packages, node)
+#define NCSI_FOR_EACH_CHANNEL(np, nc) \
+	list_for_each_entry_rcu(nc, &np->channels, node)
+
+/* Resources */
+int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
+int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
+int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
+void ncsi_start_channel_monitor(struct ncsi_channel *nc);
+void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
+struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
+				       unsigned char id);
+struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np,
+				      unsigned char id);
+struct ncsi_package *ncsi_find_package(struct ncsi_dev_priv *ndp,
+				       unsigned char id);
+struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp,
+				      unsigned char id);
+void ncsi_remove_package(struct ncsi_package *np);
+void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp,
+				   unsigned char id,
+				   struct ncsi_package **np,
+				   struct ncsi_channel **nc);
+struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven);
+void ncsi_free_request(struct ncsi_request *nr);
+struct ncsi_dev *ncsi_find_dev(struct net_device *dev);
+int ncsi_process_next_channel(struct ncsi_dev_priv *ndp);
+
+/* Packet handlers */
+u32 ncsi_calculate_checksum(unsigned char *data, int len);
+int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca);
+int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
+		 struct packet_type *pt, struct net_device *orig_dev);
+int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb);
+
+#endif /* __NCSI_INTERNAL_H__ */
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
new file mode 100644
index 000000000000..d463468442ae
--- /dev/null
+++ b/net/ncsi/ncsi-aen.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/ncsi.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+#include "ncsi-pkt.h"
+
+static int ncsi_validate_aen_pkt(struct ncsi_aen_pkt_hdr *h,
+				 const unsigned short payload)
+{
+	u32 checksum;
+	__be32 *pchecksum;
+
+	if (h->common.revision != NCSI_PKT_REVISION)
+		return -EINVAL;
+	if (ntohs(h->common.length) != payload)
+		return -EINVAL;
+
+	/* Validate checksum, which might be zeroes if the
+	 * sender doesn't support checksum according to NCSI
+	 * specification.
+	 */
+	pchecksum = (__be32 *)((void *)(h + 1) + payload - 4);
+	if (ntohl(*pchecksum) == 0)
+		return 0;
+
+	checksum = ncsi_calculate_checksum((unsigned char *)h,
+					   sizeof(*h) + payload - 4);
+	if (*pchecksum != htonl(checksum))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
+				struct ncsi_aen_pkt_hdr *h)
+{
+	struct ncsi_aen_lsc_pkt *lsc;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+	unsigned long old_data;
+	unsigned long flags;
+
+	/* Find the NCSI channel */
+	ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Update the link status */
+	ncm = &nc->modes[NCSI_MODE_LINK];
+	lsc = (struct ncsi_aen_lsc_pkt *)h;
+	old_data = ncm->data[2];
+	ncm->data[2] = ntohl(lsc->status);
+	ncm->data[4] = ntohl(lsc->oem_status);
+	if (!((old_data ^ ncm->data[2]) & 0x1) ||
+	    !list_empty(&nc->link))
+		return 0;
+	if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) &&
+	    !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1)))
+		return 0;
+
+	if (!(ndp->flags & NCSI_DEV_HWA) &&
+	    nc->state == NCSI_CHANNEL_ACTIVE)
+		ndp->flags |= NCSI_DEV_RESHUFFLE;
+
+	ncsi_stop_channel_monitor(nc);
+	spin_lock_irqsave(&ndp->lock, flags);
+	list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	return ncsi_process_next_channel(ndp);
+}
+
+static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp,
+			       struct ncsi_aen_pkt_hdr *h)
+{
+	struct ncsi_channel *nc;
+	unsigned long flags;
+
+	/* Find the NCSI channel */
+	ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	if (!list_empty(&nc->link) ||
+	    nc->state != NCSI_CHANNEL_ACTIVE)
+		return 0;
+
+	ncsi_stop_channel_monitor(nc);
+	spin_lock_irqsave(&ndp->lock, flags);
+	xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+	list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	return ncsi_process_next_channel(ndp);
+}
+
+static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
+				   struct ncsi_aen_pkt_hdr *h)
+{
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+	struct ncsi_aen_hncdsc_pkt *hncdsc;
+	unsigned long flags;
+
+	/* Find the NCSI channel */
+	ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* If the channel is active one, we need reconfigure it */
+	ncm = &nc->modes[NCSI_MODE_LINK];
+	hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
+	ncm->data[3] = ntohl(hncdsc->status);
+	if (!list_empty(&nc->link) ||
+	    nc->state != NCSI_CHANNEL_ACTIVE ||
+	    (ncm->data[3] & 0x1))
+		return 0;
+
+	if (ndp->flags & NCSI_DEV_HWA)
+		ndp->flags |= NCSI_DEV_RESHUFFLE;
+
+	/* If this channel is the active one and the link doesn't
+	 * work, we have to choose another channel to be active one.
+	 * The logic here is exactly similar to what we do when link
+	 * is down on the active channel.
+	 */
+	ncsi_stop_channel_monitor(nc);
+	spin_lock_irqsave(&ndp->lock, flags);
+	list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	ncsi_process_next_channel(ndp);
+
+	return 0;
+}
+
+static struct ncsi_aen_handler {
+	unsigned char type;
+	int           payload;
+	int           (*handler)(struct ncsi_dev_priv *ndp,
+				 struct ncsi_aen_pkt_hdr *h);
+} ncsi_aen_handlers[] = {
+	{ NCSI_PKT_AEN_LSC,    12, ncsi_aen_handler_lsc    },
+	{ NCSI_PKT_AEN_CR,      4, ncsi_aen_handler_cr     },
+	{ NCSI_PKT_AEN_HNCDSC,  4, ncsi_aen_handler_hncdsc }
+};
+
+int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
+{
+	struct ncsi_aen_pkt_hdr *h;
+	struct ncsi_aen_handler *nah = NULL;
+	int i, ret;
+
+	/* Find the handler */
+	h = (struct ncsi_aen_pkt_hdr *)skb_network_header(skb);
+	for (i = 0; i < ARRAY_SIZE(ncsi_aen_handlers); i++) {
+		if (ncsi_aen_handlers[i].type == h->type) {
+			nah = &ncsi_aen_handlers[i];
+			break;
+		}
+	}
+
+	if (!nah) {
+		netdev_warn(ndp->ndev.dev, "Invalid AEN (0x%x) received\n",
+			    h->type);
+		return -ENOENT;
+	}
+
+	ret = ncsi_validate_aen_pkt(h, nah->payload);
+	if (ret)
+		goto out;
+
+	ret = nah->handler(ndp, h);
+out:
+	consume_skb(skb);
+	return ret;
+}
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
new file mode 100644
index 000000000000..21057a8ceeac
--- /dev/null
+++ b/net/ncsi/ncsi-cmd.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/ncsi.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+#include "ncsi-pkt.h"
+
+u32 ncsi_calculate_checksum(unsigned char *data, int len)
+{
+	u32 checksum = 0;
+	int i;
+
+	for (i = 0; i < len; i += 2)
+		checksum += (((u32)data[i] << 8) | data[i + 1]);
+
+	checksum = (~checksum + 1);
+	return checksum;
+}
+
+/* This function should be called after the data area has been
+ * populated completely.
+ */
+static void ncsi_cmd_build_header(struct ncsi_pkt_hdr *h,
+				  struct ncsi_cmd_arg *nca)
+{
+	u32 checksum;
+	__be32 *pchecksum;
+
+	h->mc_id        = 0;
+	h->revision     = NCSI_PKT_REVISION;
+	h->reserved     = 0;
+	h->id           = nca->id;
+	h->type         = nca->type;
+	h->channel      = NCSI_TO_CHANNEL(nca->package,
+					  nca->channel);
+	h->length       = htons(nca->payload);
+	h->reserved1[0] = 0;
+	h->reserved1[1] = 0;
+
+	/* Fill with calculated checksum */
+	checksum = ncsi_calculate_checksum((unsigned char *)h,
+					   sizeof(*h) + nca->payload);
+	pchecksum = (__be32 *)((void *)h + sizeof(struct ncsi_pkt_hdr) +
+		    nca->payload);
+	*pchecksum = htonl(checksum);
+}
+
+static int ncsi_cmd_handler_default(struct sk_buff *skb,
+				    struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_sp(struct sk_buff *skb,
+			       struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_sp_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_sp_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->hw_arbitration = nca->bytes[0];
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_dc(struct sk_buff *skb,
+			       struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_dc_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_dc_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->ald = nca->bytes[0];
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_rc(struct sk_buff *skb,
+			       struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_rc_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_rc_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_ae(struct sk_buff *skb,
+			       struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_ae_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_ae_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->mc_id = nca->bytes[0];
+	cmd->mode = htonl(nca->dwords[1]);
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_sl(struct sk_buff *skb,
+			       struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_sl_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_sl_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->mode = htonl(nca->dwords[0]);
+	cmd->oem_mode = htonl(nca->dwords[1]);
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_svf(struct sk_buff *skb,
+				struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_svf_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_svf_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->vlan = htons(nca->words[0]);
+	cmd->index = nca->bytes[2];
+	cmd->enable = nca->bytes[3];
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_ev(struct sk_buff *skb,
+			       struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_ev_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_ev_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->mode = nca->bytes[0];
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_sma(struct sk_buff *skb,
+				struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_sma_pkt *cmd;
+	int i;
+
+	cmd = (struct ncsi_cmd_sma_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	for (i = 0; i < 6; i++)
+		cmd->mac[i] = nca->bytes[i];
+	cmd->index = nca->bytes[6];
+	cmd->at_e = nca->bytes[7];
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_ebf(struct sk_buff *skb,
+				struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_ebf_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_ebf_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->mode = htonl(nca->dwords[0]);
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_egmf(struct sk_buff *skb,
+				 struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_egmf_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_egmf_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->mode = htonl(nca->dwords[0]);
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static int ncsi_cmd_handler_snfc(struct sk_buff *skb,
+				 struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_snfc_pkt *cmd;
+
+	cmd = (struct ncsi_cmd_snfc_pkt *)skb_put(skb, sizeof(*cmd));
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->mode = nca->bytes[0];
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
+static struct ncsi_cmd_handler {
+	unsigned char type;
+	int           payload;
+	int           (*handler)(struct sk_buff *skb,
+				 struct ncsi_cmd_arg *nca);
+} ncsi_cmd_handlers[] = {
+	{ NCSI_PKT_CMD_CIS,    0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_SP,     4, ncsi_cmd_handler_sp      },
+	{ NCSI_PKT_CMD_DP,     0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_EC,     0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_DC,     4, ncsi_cmd_handler_dc      },
+	{ NCSI_PKT_CMD_RC,     4, ncsi_cmd_handler_rc      },
+	{ NCSI_PKT_CMD_ECNT,   0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_DCNT,   0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_AE,     8, ncsi_cmd_handler_ae      },
+	{ NCSI_PKT_CMD_SL,     8, ncsi_cmd_handler_sl      },
+	{ NCSI_PKT_CMD_GLS,    0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_SVF,    4, ncsi_cmd_handler_svf     },
+	{ NCSI_PKT_CMD_EV,     4, ncsi_cmd_handler_ev      },
+	{ NCSI_PKT_CMD_DV,     0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_SMA,    8, ncsi_cmd_handler_sma     },
+	{ NCSI_PKT_CMD_EBF,    4, ncsi_cmd_handler_ebf     },
+	{ NCSI_PKT_CMD_DBF,    0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_EGMF,   4, ncsi_cmd_handler_egmf    },
+	{ NCSI_PKT_CMD_DGMF,   0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_SNFC,   4, ncsi_cmd_handler_snfc    },
+	{ NCSI_PKT_CMD_GVI,    0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_GC,     0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_GP,     0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_GCPS,   0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_GNS,    0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_GNPTS,  0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_GPS,    0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_OEM,    0, NULL                     },
+	{ NCSI_PKT_CMD_PLDM,   0, NULL                     },
+	{ NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
+};
+
+static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_dev_priv *ndp = nca->ndp;
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct net_device *dev = nd->dev;
+	int hlen = LL_RESERVED_SPACE(dev);
+	int tlen = dev->needed_tailroom;
+	int len = hlen + tlen;
+	struct sk_buff *skb;
+	struct ncsi_request *nr;
+
+	nr = ncsi_alloc_request(ndp, nca->driven);
+	if (!nr)
+		return NULL;
+
+	/* NCSI command packet has 16-bytes header, payload, 4 bytes checksum.
+	 * The packet needs padding if its payload is less than 26 bytes to
+	 * meet 64 bytes minimal ethernet frame length.
+	 */
+	len += sizeof(struct ncsi_cmd_pkt_hdr) + 4;
+	if (nca->payload < 26)
+		len += 26;
+	else
+		len += nca->payload;
+
+	/* Allocate skb */
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (!skb) {
+		ncsi_free_request(nr);
+		return NULL;
+	}
+
+	nr->cmd = skb;
+	skb_reserve(skb, hlen);
+	skb_reset_network_header(skb);
+
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_NCSI);
+
+	return nr;
+}
+
+int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_request *nr;
+	struct ethhdr *eh;
+	struct ncsi_cmd_handler *nch = NULL;
+	int i, ret;
+
+	/* Search for the handler */
+	for (i = 0; i < ARRAY_SIZE(ncsi_cmd_handlers); i++) {
+		if (ncsi_cmd_handlers[i].type == nca->type) {
+			if (ncsi_cmd_handlers[i].handler)
+				nch = &ncsi_cmd_handlers[i];
+			else
+				nch = NULL;
+
+			break;
+		}
+	}
+
+	if (!nch) {
+		netdev_err(nca->ndp->ndev.dev,
+			   "Cannot send packet with type 0x%02x\n", nca->type);
+		return -ENOENT;
+	}
+
+	/* Get packet payload length and allocate the request */
+	nca->payload = nch->payload;
+	nr = ncsi_alloc_command(nca);
+	if (!nr)
+		return -ENOMEM;
+
+	/* Prepare the packet */
+	nca->id = nr->id;
+	ret = nch->handler(nr->cmd, nca);
+	if (ret) {
+		ncsi_free_request(nr);
+		return ret;
+	}
+
+	/* Fill the ethernet header */
+	eh = (struct ethhdr *)skb_push(nr->cmd, sizeof(*eh));
+	eh->h_proto = htons(ETH_P_NCSI);
+	eth_broadcast_addr(eh->h_dest);
+	eth_broadcast_addr(eh->h_source);
+
+	/* Start the timer for the request that might not have
+	 * corresponding response. Given NCSI is an internal
+	 * connection a 1 second delay should be sufficient.
+	 */
+	nr->enabled = true;
+	mod_timer(&nr->timer, jiffies + 1 * HZ);
+
+	/* Send NCSI packet */
+	skb_get(nr->cmd);
+	ret = dev_queue_xmit(nr->cmd);
+	if (ret < 0) {
+		ncsi_free_request(nr);
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
new file mode 100644
index 000000000000..ef017b871857
--- /dev/null
+++ b/net/ncsi/ncsi-manage.c
@@ -0,0 +1,1205 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+
+#include <net/ncsi.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/if_inet6.h>
+
+#include "internal.h"
+#include "ncsi-pkt.h"
+
+LIST_HEAD(ncsi_dev_list);
+DEFINE_SPINLOCK(ncsi_dev_lock);
+
+static inline int ncsi_filter_size(int table)
+{
+	int sizes[] = { 2, 6, 6, 6 };
+
+	BUILD_BUG_ON(ARRAY_SIZE(sizes) != NCSI_FILTER_MAX);
+	if (table < NCSI_FILTER_BASE || table >= NCSI_FILTER_MAX)
+		return -EINVAL;
+
+	return sizes[table];
+}
+
+int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
+{
+	struct ncsi_channel_filter *ncf;
+	void *bitmap;
+	int index, size;
+	unsigned long flags;
+
+	ncf = nc->filters[table];
+	if (!ncf)
+		return -ENXIO;
+
+	size = ncsi_filter_size(table);
+	if (size < 0)
+		return size;
+
+	spin_lock_irqsave(&nc->lock, flags);
+	bitmap = (void *)&ncf->bitmap;
+	index = -1;
+	while ((index = find_next_bit(bitmap, ncf->total, index + 1))
+	       < ncf->total) {
+		if (!memcmp(ncf->data + size * index, data, size)) {
+			spin_unlock_irqrestore(&nc->lock, flags);
+			return index;
+		}
+	}
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	return -ENOENT;
+}
+
+int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data)
+{
+	struct ncsi_channel_filter *ncf;
+	int index, size;
+	void *bitmap;
+	unsigned long flags;
+
+	size = ncsi_filter_size(table);
+	if (size < 0)
+		return size;
+
+	index = ncsi_find_filter(nc, table, data);
+	if (index >= 0)
+		return index;
+
+	ncf = nc->filters[table];
+	if (!ncf)
+		return -ENODEV;
+
+	spin_lock_irqsave(&nc->lock, flags);
+	bitmap = (void *)&ncf->bitmap;
+	do {
+		index = find_next_zero_bit(bitmap, ncf->total, 0);
+		if (index >= ncf->total) {
+			spin_unlock_irqrestore(&nc->lock, flags);
+			return -ENOSPC;
+		}
+	} while (test_and_set_bit(index, bitmap));
+
+	memcpy(ncf->data + size * index, data, size);
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	return index;
+}
+
+int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index)
+{
+	struct ncsi_channel_filter *ncf;
+	int size;
+	void *bitmap;
+	unsigned long flags;
+
+	size = ncsi_filter_size(table);
+	if (size < 0)
+		return size;
+
+	ncf = nc->filters[table];
+	if (!ncf || index >= ncf->total)
+		return -ENODEV;
+
+	spin_lock_irqsave(&nc->lock, flags);
+	bitmap = (void *)&ncf->bitmap;
+	if (test_and_clear_bit(index, bitmap))
+		memset(ncf->data + size * index, 0, size);
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	return 0;
+}
+
+static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
+{
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+
+	nd->state = ncsi_dev_state_functional;
+	if (force_down) {
+		nd->link_up = 0;
+		goto report;
+	}
+
+	nd->link_up = 0;
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			if (!list_empty(&nc->link) ||
+			    nc->state != NCSI_CHANNEL_ACTIVE)
+				continue;
+
+			if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+				nd->link_up = 1;
+				goto report;
+			}
+		}
+	}
+
+report:
+	nd->handler(nd);
+}
+
+static void ncsi_channel_monitor(unsigned long data)
+{
+	struct ncsi_channel *nc = (struct ncsi_channel *)data;
+	struct ncsi_package *np = nc->package;
+	struct ncsi_dev_priv *ndp = np->ndp;
+	struct ncsi_cmd_arg nca;
+	bool enabled;
+	unsigned int timeout;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nc->lock, flags);
+	timeout = nc->timeout;
+	enabled = nc->enabled;
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	if (!enabled || !list_empty(&nc->link))
+		return;
+	if (nc->state != NCSI_CHANNEL_INACTIVE &&
+	    nc->state != NCSI_CHANNEL_ACTIVE)
+		return;
+
+	if (!(timeout % 2)) {
+		nca.ndp = ndp;
+		nca.package = np->id;
+		nca.channel = nc->id;
+		nca.type = NCSI_PKT_CMD_GLS;
+		nca.driven = false;
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret) {
+			netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
+				   ret);
+			return;
+		}
+	}
+
+	if (timeout + 1 >= 3) {
+		if (!(ndp->flags & NCSI_DEV_HWA) &&
+		    nc->state == NCSI_CHANNEL_ACTIVE)
+			ncsi_report_link(ndp, true);
+
+		spin_lock_irqsave(&ndp->lock, flags);
+		xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+		list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		ncsi_process_next_channel(ndp);
+		return;
+	}
+
+	spin_lock_irqsave(&nc->lock, flags);
+	nc->timeout = timeout + 1;
+	nc->enabled = true;
+	spin_unlock_irqrestore(&nc->lock, flags);
+	mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2)));
+}
+
+void ncsi_start_channel_monitor(struct ncsi_channel *nc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&nc->lock, flags);
+	WARN_ON_ONCE(nc->enabled);
+	nc->timeout = 0;
+	nc->enabled = true;
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2)));
+}
+
+void ncsi_stop_channel_monitor(struct ncsi_channel *nc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&nc->lock, flags);
+	if (!nc->enabled) {
+		spin_unlock_irqrestore(&nc->lock, flags);
+		return;
+	}
+	nc->enabled = false;
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	del_timer_sync(&nc->timer);
+}
+
+struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
+				       unsigned char id)
+{
+	struct ncsi_channel *nc;
+
+	NCSI_FOR_EACH_CHANNEL(np, nc) {
+		if (nc->id == id)
+			return nc;
+	}
+
+	return NULL;
+}
+
+struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id)
+{
+	struct ncsi_channel *nc, *tmp;
+	int index;
+	unsigned long flags;
+
+	nc = kzalloc(sizeof(*nc), GFP_ATOMIC);
+	if (!nc)
+		return NULL;
+
+	nc->id = id;
+	nc->package = np;
+	nc->state = NCSI_CHANNEL_INACTIVE;
+	nc->enabled = false;
+	setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc);
+	spin_lock_init(&nc->lock);
+	INIT_LIST_HEAD(&nc->link);
+	for (index = 0; index < NCSI_CAP_MAX; index++)
+		nc->caps[index].index = index;
+	for (index = 0; index < NCSI_MODE_MAX; index++)
+		nc->modes[index].index = index;
+
+	spin_lock_irqsave(&np->lock, flags);
+	tmp = ncsi_find_channel(np, id);
+	if (tmp) {
+		spin_unlock_irqrestore(&np->lock, flags);
+		kfree(nc);
+		return tmp;
+	}
+
+	list_add_tail_rcu(&nc->node, &np->channels);
+	np->channel_num++;
+	spin_unlock_irqrestore(&np->lock, flags);
+
+	return nc;
+}
+
+static void ncsi_remove_channel(struct ncsi_channel *nc)
+{
+	struct ncsi_package *np = nc->package;
+	struct ncsi_channel_filter *ncf;
+	unsigned long flags;
+	int i;
+
+	/* Release filters */
+	spin_lock_irqsave(&nc->lock, flags);
+	for (i = 0; i < NCSI_FILTER_MAX; i++) {
+		ncf = nc->filters[i];
+		if (!ncf)
+			continue;
+
+		nc->filters[i] = NULL;
+		kfree(ncf);
+	}
+
+	nc->state = NCSI_CHANNEL_INACTIVE;
+	spin_unlock_irqrestore(&nc->lock, flags);
+	ncsi_stop_channel_monitor(nc);
+
+	/* Remove and free channel */
+	spin_lock_irqsave(&np->lock, flags);
+	list_del_rcu(&nc->node);
+	np->channel_num--;
+	spin_unlock_irqrestore(&np->lock, flags);
+
+	kfree(nc);
+}
+
+struct ncsi_package *ncsi_find_package(struct ncsi_dev_priv *ndp,
+				       unsigned char id)
+{
+	struct ncsi_package *np;
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (np->id == id)
+			return np;
+	}
+
+	return NULL;
+}
+
+struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp,
+				      unsigned char id)
+{
+	struct ncsi_package *np, *tmp;
+	unsigned long flags;
+
+	np = kzalloc(sizeof(*np), GFP_ATOMIC);
+	if (!np)
+		return NULL;
+
+	np->id = id;
+	np->ndp = ndp;
+	spin_lock_init(&np->lock);
+	INIT_LIST_HEAD(&np->channels);
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	tmp = ncsi_find_package(ndp, id);
+	if (tmp) {
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		kfree(np);
+		return tmp;
+	}
+
+	list_add_tail_rcu(&np->node, &ndp->packages);
+	ndp->package_num++;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	return np;
+}
+
+void ncsi_remove_package(struct ncsi_package *np)
+{
+	struct ncsi_dev_priv *ndp = np->ndp;
+	struct ncsi_channel *nc, *tmp;
+	unsigned long flags;
+
+	/* Release all child channels */
+	list_for_each_entry_safe(nc, tmp, &np->channels, node)
+		ncsi_remove_channel(nc);
+
+	/* Remove and free package */
+	spin_lock_irqsave(&ndp->lock, flags);
+	list_del_rcu(&np->node);
+	ndp->package_num--;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	kfree(np);
+}
+
+void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp,
+				   unsigned char id,
+				   struct ncsi_package **np,
+				   struct ncsi_channel **nc)
+{
+	struct ncsi_package *p;
+	struct ncsi_channel *c;
+
+	p = ncsi_find_package(ndp, NCSI_PACKAGE_INDEX(id));
+	c = p ? ncsi_find_channel(p, NCSI_CHANNEL_INDEX(id)) : NULL;
+
+	if (np)
+		*np = p;
+	if (nc)
+		*nc = c;
+}
+
+/* For two consecutive NCSI commands, the packet IDs shouldn't
+ * be same. Otherwise, the bogus response might be replied. So
+ * the available IDs are allocated in round-robin fashion.
+ */
+struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven)
+{
+	struct ncsi_request *nr = NULL;
+	int i, limit = ARRAY_SIZE(ndp->requests);
+	unsigned long flags;
+
+	/* Check if there is one available request until the ceiling */
+	spin_lock_irqsave(&ndp->lock, flags);
+	for (i = ndp->request_id; !nr && i < limit; i++) {
+		if (ndp->requests[i].used)
+			continue;
+
+		nr = &ndp->requests[i];
+		nr->used = true;
+		nr->driven = driven;
+		if (++ndp->request_id >= limit)
+			ndp->request_id = 0;
+	}
+
+	/* Fail back to check from the starting cursor */
+	for (i = 0; !nr && i < ndp->request_id; i++) {
+		if (ndp->requests[i].used)
+			continue;
+
+		nr = &ndp->requests[i];
+		nr->used = true;
+		nr->driven = driven;
+		if (++ndp->request_id >= limit)
+			ndp->request_id = 0;
+	}
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	return nr;
+}
+
+void ncsi_free_request(struct ncsi_request *nr)
+{
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct sk_buff *cmd, *rsp;
+	unsigned long flags;
+	bool driven;
+
+	if (nr->enabled) {
+		nr->enabled = false;
+		del_timer_sync(&nr->timer);
+	}
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	cmd = nr->cmd;
+	rsp = nr->rsp;
+	nr->cmd = NULL;
+	nr->rsp = NULL;
+	nr->used = false;
+	driven = nr->driven;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	if (driven && cmd && --ndp->pending_req_num == 0)
+		schedule_work(&ndp->work);
+
+	/* Release command and response */
+	consume_skb(cmd);
+	consume_skb(rsp);
+}
+
+struct ncsi_dev *ncsi_find_dev(struct net_device *dev)
+{
+	struct ncsi_dev_priv *ndp;
+
+	NCSI_FOR_EACH_DEV(ndp) {
+		if (ndp->ndev.dev == dev)
+			return &ndp->ndev;
+	}
+
+	return NULL;
+}
+
+static void ncsi_request_timeout(unsigned long data)
+{
+	struct ncsi_request *nr = (struct ncsi_request *)data;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	unsigned long flags;
+
+	/* If the request already had associated response,
+	 * let the response handler to release it.
+	 */
+	spin_lock_irqsave(&ndp->lock, flags);
+	nr->enabled = false;
+	if (nr->rsp || !nr->cmd) {
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	/* Release the request */
+	ncsi_free_request(nr);
+}
+
+static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct ncsi_package *np = ndp->active_package;
+	struct ncsi_channel *nc = ndp->active_channel;
+	struct ncsi_cmd_arg nca;
+	int ret;
+
+	nca.ndp = ndp;
+	nca.driven = true;
+	switch (nd->state) {
+	case ncsi_dev_state_suspend:
+		nd->state = ncsi_dev_state_suspend_select;
+		/* Fall through */
+	case ncsi_dev_state_suspend_select:
+	case ncsi_dev_state_suspend_dcnt:
+	case ncsi_dev_state_suspend_dc:
+	case ncsi_dev_state_suspend_deselect:
+		ndp->pending_req_num = 1;
+
+		np = ndp->active_package;
+		nc = ndp->active_channel;
+		nca.package = np->id;
+		if (nd->state == ncsi_dev_state_suspend_select) {
+			nca.type = NCSI_PKT_CMD_SP;
+			nca.channel = 0x1f;
+			if (ndp->flags & NCSI_DEV_HWA)
+				nca.bytes[0] = 0;
+			else
+				nca.bytes[0] = 1;
+			nd->state = ncsi_dev_state_suspend_dcnt;
+		} else if (nd->state == ncsi_dev_state_suspend_dcnt) {
+			nca.type = NCSI_PKT_CMD_DCNT;
+			nca.channel = nc->id;
+			nd->state = ncsi_dev_state_suspend_dc;
+		} else if (nd->state == ncsi_dev_state_suspend_dc) {
+			nca.type = NCSI_PKT_CMD_DC;
+			nca.channel = nc->id;
+			nca.bytes[0] = 1;
+			nd->state = ncsi_dev_state_suspend_deselect;
+		} else if (nd->state == ncsi_dev_state_suspend_deselect) {
+			nca.type = NCSI_PKT_CMD_DP;
+			nca.channel = 0x1f;
+			nd->state = ncsi_dev_state_suspend_done;
+		}
+
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret) {
+			nd->state = ncsi_dev_state_functional;
+			return;
+		}
+
+		break;
+	case ncsi_dev_state_suspend_done:
+		xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+		ncsi_process_next_channel(ndp);
+
+		break;
+	default:
+		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
+			    nd->state);
+	}
+}
+
+static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct net_device *dev = nd->dev;
+	struct ncsi_package *np = ndp->active_package;
+	struct ncsi_channel *nc = ndp->active_channel;
+	struct ncsi_cmd_arg nca;
+	unsigned char index;
+	int ret;
+
+	nca.ndp = ndp;
+	nca.driven = true;
+	switch (nd->state) {
+	case ncsi_dev_state_config:
+	case ncsi_dev_state_config_sp:
+		ndp->pending_req_num = 1;
+
+		/* Select the specific package */
+		nca.type = NCSI_PKT_CMD_SP;
+		if (ndp->flags & NCSI_DEV_HWA)
+			nca.bytes[0] = 0;
+		else
+			nca.bytes[0] = 1;
+		nca.package = np->id;
+		nca.channel = 0x1f;
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret)
+			goto error;
+
+		nd->state = ncsi_dev_state_config_cis;
+		break;
+	case ncsi_dev_state_config_cis:
+		ndp->pending_req_num = 1;
+
+		/* Clear initial state */
+		nca.type = NCSI_PKT_CMD_CIS;
+		nca.package = np->id;
+		nca.channel = nc->id;
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret)
+			goto error;
+
+		nd->state = ncsi_dev_state_config_sma;
+		break;
+	case ncsi_dev_state_config_sma:
+	case ncsi_dev_state_config_ebf:
+#if IS_ENABLED(CONFIG_IPV6)
+	case ncsi_dev_state_config_egmf:
+#endif
+	case ncsi_dev_state_config_ecnt:
+	case ncsi_dev_state_config_ec:
+	case ncsi_dev_state_config_ae:
+	case ncsi_dev_state_config_gls:
+		ndp->pending_req_num = 1;
+
+		nca.package = np->id;
+		nca.channel = nc->id;
+
+		/* Use first entry in unicast filter table. Note that
+		 * the MAC filter table starts from entry 1 instead of
+		 * 0.
+		 */
+		if (nd->state == ncsi_dev_state_config_sma) {
+			nca.type = NCSI_PKT_CMD_SMA;
+			for (index = 0; index < 6; index++)
+				nca.bytes[index] = dev->dev_addr[index];
+			nca.bytes[6] = 0x1;
+			nca.bytes[7] = 0x1;
+			nd->state = ncsi_dev_state_config_ebf;
+		} else if (nd->state == ncsi_dev_state_config_ebf) {
+			nca.type = NCSI_PKT_CMD_EBF;
+			nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap;
+			nd->state = ncsi_dev_state_config_ecnt;
+#if IS_ENABLED(CONFIG_IPV6)
+			if (ndp->inet6_addr_num > 0 &&
+			    (nc->caps[NCSI_CAP_GENERIC].cap &
+			     NCSI_CAP_GENERIC_MC))
+				nd->state = ncsi_dev_state_config_egmf;
+			else
+				nd->state = ncsi_dev_state_config_ecnt;
+		} else if (nd->state == ncsi_dev_state_config_egmf) {
+			nca.type = NCSI_PKT_CMD_EGMF;
+			nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
+			nd->state = ncsi_dev_state_config_ecnt;
+#endif /* CONFIG_IPV6 */
+		} else if (nd->state == ncsi_dev_state_config_ecnt) {
+			nca.type = NCSI_PKT_CMD_ECNT;
+			nd->state = ncsi_dev_state_config_ec;
+		} else if (nd->state == ncsi_dev_state_config_ec) {
+			/* Enable AEN if it's supported */
+			nca.type = NCSI_PKT_CMD_EC;
+			nd->state = ncsi_dev_state_config_ae;
+			if (!(nc->caps[NCSI_CAP_AEN].cap & NCSI_CAP_AEN_MASK))
+				nd->state = ncsi_dev_state_config_gls;
+		} else if (nd->state == ncsi_dev_state_config_ae) {
+			nca.type = NCSI_PKT_CMD_AE;
+			nca.bytes[0] = 0;
+			nca.dwords[1] = nc->caps[NCSI_CAP_AEN].cap;
+			nd->state = ncsi_dev_state_config_gls;
+		} else if (nd->state == ncsi_dev_state_config_gls) {
+			nca.type = NCSI_PKT_CMD_GLS;
+			nd->state = ncsi_dev_state_config_done;
+		}
+
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret)
+			goto error;
+		break;
+	case ncsi_dev_state_config_done:
+		if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1)
+			xchg(&nc->state, NCSI_CHANNEL_ACTIVE);
+		else
+			xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+
+		ncsi_start_channel_monitor(nc);
+		ncsi_process_next_channel(ndp);
+		break;
+	default:
+		netdev_warn(dev, "Wrong NCSI state 0x%x in config\n",
+			    nd->state);
+	}
+
+	return;
+
+error:
+	ncsi_report_link(ndp, true);
+}
+
+static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_package *np;
+	struct ncsi_channel *nc, *found;
+	struct ncsi_channel_mode *ncm;
+	unsigned long flags;
+
+	/* The search is done once an inactive channel with up
+	 * link is found.
+	 */
+	found = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			if (!list_empty(&nc->link) ||
+			    nc->state != NCSI_CHANNEL_INACTIVE)
+				continue;
+
+			if (!found)
+				found = nc;
+
+			ncm = &nc->modes[NCSI_MODE_LINK];
+			if (ncm->data[2] & 0x1) {
+				found = nc;
+				goto out;
+			}
+		}
+	}
+
+	if (!found) {
+		ncsi_report_link(ndp, true);
+		return -ENODEV;
+	}
+
+out:
+	spin_lock_irqsave(&ndp->lock, flags);
+	list_add_tail_rcu(&found->link, &ndp->channel_queue);
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	return ncsi_process_next_channel(ndp);
+}
+
+static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	unsigned int cap;
+
+	/* The hardware arbitration is disabled if any one channel
+	 * doesn't support explicitly.
+	 */
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			cap = nc->caps[NCSI_CAP_GENERIC].cap;
+			if (!(cap & NCSI_CAP_GENERIC_HWA) ||
+			    (cap & NCSI_CAP_GENERIC_HWA_MASK) !=
+			    NCSI_CAP_GENERIC_HWA_SUPPORT) {
+				ndp->flags &= ~NCSI_DEV_HWA;
+				return false;
+			}
+		}
+	}
+
+	ndp->flags |= NCSI_DEV_HWA;
+	return true;
+}
+
+static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	unsigned long flags;
+
+	/* Move all available channels to processing queue */
+	spin_lock_irqsave(&ndp->lock, flags);
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			WARN_ON_ONCE(nc->state != NCSI_CHANNEL_INACTIVE ||
+				     !list_empty(&nc->link));
+			ncsi_stop_channel_monitor(nc);
+			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+		}
+	}
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	/* We can have no channels in extremely case */
+	if (list_empty(&ndp->channel_queue)) {
+		ncsi_report_link(ndp, false);
+		return -ENOENT;
+	}
+
+	return ncsi_process_next_channel(ndp);
+}
+
+static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	struct ncsi_cmd_arg nca;
+	unsigned char index;
+	int ret;
+
+	nca.ndp = ndp;
+	nca.driven = true;
+	switch (nd->state) {
+	case ncsi_dev_state_probe:
+		nd->state = ncsi_dev_state_probe_deselect;
+		/* Fall through */
+	case ncsi_dev_state_probe_deselect:
+		ndp->pending_req_num = 8;
+
+		/* Deselect all possible packages */
+		nca.type = NCSI_PKT_CMD_DP;
+		nca.channel = 0x1f;
+		for (index = 0; index < 8; index++) {
+			nca.package = index;
+			ret = ncsi_xmit_cmd(&nca);
+			if (ret)
+				goto error;
+		}
+
+		nd->state = ncsi_dev_state_probe_package;
+		break;
+	case ncsi_dev_state_probe_package:
+		ndp->pending_req_num = 16;
+
+		/* Select all possible packages */
+		nca.type = NCSI_PKT_CMD_SP;
+		nca.bytes[0] = 1;
+		nca.channel = 0x1f;
+		for (index = 0; index < 8; index++) {
+			nca.package = index;
+			ret = ncsi_xmit_cmd(&nca);
+			if (ret)
+				goto error;
+		}
+
+		/* Disable all possible packages */
+		nca.type = NCSI_PKT_CMD_DP;
+		for (index = 0; index < 8; index++) {
+			nca.package = index;
+			ret = ncsi_xmit_cmd(&nca);
+			if (ret)
+				goto error;
+		}
+
+		nd->state = ncsi_dev_state_probe_channel;
+		break;
+	case ncsi_dev_state_probe_channel:
+		if (!ndp->active_package)
+			ndp->active_package = list_first_or_null_rcu(
+				&ndp->packages, struct ncsi_package, node);
+		else if (list_is_last(&ndp->active_package->node,
+				      &ndp->packages))
+			ndp->active_package = NULL;
+		else
+			ndp->active_package = list_next_entry(
+				ndp->active_package, node);
+
+		/* All available packages and channels are enumerated. The
+		 * enumeration happens for once when the NCSI interface is
+		 * started. So we need continue to start the interface after
+		 * the enumeration.
+		 *
+		 * We have to choose an active channel before configuring it.
+		 * Note that we possibly don't have active channel in extreme
+		 * situation.
+		 */
+		if (!ndp->active_package) {
+			ndp->flags |= NCSI_DEV_PROBED;
+			if (ncsi_check_hwa(ndp))
+				ncsi_enable_hwa(ndp);
+			else
+				ncsi_choose_active_channel(ndp);
+			return;
+		}
+
+		/* Select the active package */
+		ndp->pending_req_num = 1;
+		nca.type = NCSI_PKT_CMD_SP;
+		nca.bytes[0] = 1;
+		nca.package = ndp->active_package->id;
+		nca.channel = 0x1f;
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret)
+			goto error;
+
+		nd->state = ncsi_dev_state_probe_cis;
+		break;
+	case ncsi_dev_state_probe_cis:
+		ndp->pending_req_num = 32;
+
+		/* Clear initial state */
+		nca.type = NCSI_PKT_CMD_CIS;
+		nca.package = ndp->active_package->id;
+		for (index = 0; index < 0x20; index++) {
+			nca.channel = index;
+			ret = ncsi_xmit_cmd(&nca);
+			if (ret)
+				goto error;
+		}
+
+		nd->state = ncsi_dev_state_probe_gvi;
+		break;
+	case ncsi_dev_state_probe_gvi:
+	case ncsi_dev_state_probe_gc:
+	case ncsi_dev_state_probe_gls:
+		np = ndp->active_package;
+		ndp->pending_req_num = np->channel_num;
+
+		/* Retrieve version, capability or link status */
+		if (nd->state == ncsi_dev_state_probe_gvi)
+			nca.type = NCSI_PKT_CMD_GVI;
+		else if (nd->state == ncsi_dev_state_probe_gc)
+			nca.type = NCSI_PKT_CMD_GC;
+		else
+			nca.type = NCSI_PKT_CMD_GLS;
+
+		nca.package = np->id;
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			nca.channel = nc->id;
+			ret = ncsi_xmit_cmd(&nca);
+			if (ret)
+				goto error;
+		}
+
+		if (nd->state == ncsi_dev_state_probe_gvi)
+			nd->state = ncsi_dev_state_probe_gc;
+		else if (nd->state == ncsi_dev_state_probe_gc)
+			nd->state = ncsi_dev_state_probe_gls;
+		else
+			nd->state = ncsi_dev_state_probe_dp;
+		break;
+	case ncsi_dev_state_probe_dp:
+		ndp->pending_req_num = 1;
+
+		/* Deselect the active package */
+		nca.type = NCSI_PKT_CMD_DP;
+		nca.package = ndp->active_package->id;
+		nca.channel = 0x1f;
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret)
+			goto error;
+
+		/* Scan channels in next package */
+		nd->state = ncsi_dev_state_probe_channel;
+		break;
+	default:
+		netdev_warn(nd->dev, "Wrong NCSI state 0x%0x in enumeration\n",
+			    nd->state);
+	}
+
+	return;
+error:
+	ncsi_report_link(ndp, true);
+}
+
+static void ncsi_dev_work(struct work_struct *work)
+{
+	struct ncsi_dev_priv *ndp = container_of(work,
+			struct ncsi_dev_priv, work);
+	struct ncsi_dev *nd = &ndp->ndev;
+
+	switch (nd->state & ncsi_dev_state_major) {
+	case ncsi_dev_state_probe:
+		ncsi_probe_channel(ndp);
+		break;
+	case ncsi_dev_state_suspend:
+		ncsi_suspend_channel(ndp);
+		break;
+	case ncsi_dev_state_config:
+		ncsi_configure_channel(ndp);
+		break;
+	default:
+		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in workqueue\n",
+			    nd->state);
+	}
+}
+
+int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_channel *nc;
+	int old_state;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	nc = list_first_or_null_rcu(&ndp->channel_queue,
+				    struct ncsi_channel, link);
+	if (!nc) {
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		goto out;
+	}
+
+	old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE);
+	list_del_init(&nc->link);
+
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	ndp->active_channel = nc;
+	ndp->active_package = nc->package;
+
+	switch (old_state) {
+	case NCSI_CHANNEL_INACTIVE:
+		ndp->ndev.state = ncsi_dev_state_config;
+		ncsi_configure_channel(ndp);
+		break;
+	case NCSI_CHANNEL_ACTIVE:
+		ndp->ndev.state = ncsi_dev_state_suspend;
+		ncsi_suspend_channel(ndp);
+		break;
+	default:
+		netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n",
+			   nc->state, nc->package->id, nc->id);
+		ncsi_report_link(ndp, false);
+		return -EINVAL;
+	}
+
+	return 0;
+
+out:
+	ndp->active_channel = NULL;
+	ndp->active_package = NULL;
+	if (ndp->flags & NCSI_DEV_RESHUFFLE) {
+		ndp->flags &= ~NCSI_DEV_RESHUFFLE;
+		return ncsi_choose_active_channel(ndp);
+	}
+
+	ncsi_report_link(ndp, false);
+	return -ENODEV;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ncsi_inet6addr_event(struct notifier_block *this,
+				unsigned long event, void *data)
+{
+	struct inet6_ifaddr *ifa = data;
+	struct net_device *dev = ifa->idev->dev;
+	struct ncsi_dev *nd = ncsi_find_dev(dev);
+	struct ncsi_dev_priv *ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	struct ncsi_cmd_arg nca;
+	bool action;
+	int ret;
+
+	if (!ndp || (ipv6_addr_type(&ifa->addr) &
+	    (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)))
+		return NOTIFY_OK;
+
+	switch (event) {
+	case NETDEV_UP:
+		action = (++ndp->inet6_addr_num) == 1;
+		nca.type = NCSI_PKT_CMD_EGMF;
+		break;
+	case NETDEV_DOWN:
+		action = (--ndp->inet6_addr_num == 0);
+		nca.type = NCSI_PKT_CMD_DGMF;
+		break;
+	default:
+		return NOTIFY_OK;
+	}
+
+	/* We might not have active channel or packages. The IPv6
+	 * required multicast will be enabled when active channel
+	 * or packages are chosen.
+	 */
+	np = ndp->active_package;
+	nc = ndp->active_channel;
+	if (!action || !np || !nc)
+		return NOTIFY_OK;
+
+	/* We needn't enable or disable it if the function isn't supported */
+	if (!(nc->caps[NCSI_CAP_GENERIC].cap & NCSI_CAP_GENERIC_MC))
+		return NOTIFY_OK;
+
+	nca.ndp = ndp;
+	nca.driven = false;
+	nca.package = np->id;
+	nca.channel = nc->id;
+	nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
+	ret = ncsi_xmit_cmd(&nca);
+	if (ret) {
+		netdev_warn(dev, "Fail to %s global multicast filter (%d)\n",
+			    (event == NETDEV_UP) ? "enable" : "disable", ret);
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ncsi_inet6addr_notifier = {
+	.notifier_call = ncsi_inet6addr_event,
+};
+#endif /* CONFIG_IPV6 */
+
+struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
+				   void (*handler)(struct ncsi_dev *ndev))
+{
+	struct ncsi_dev_priv *ndp;
+	struct ncsi_dev *nd;
+	unsigned long flags;
+	int i;
+
+	/* Check if the device has been registered or not */
+	nd = ncsi_find_dev(dev);
+	if (nd)
+		return nd;
+
+	/* Create NCSI device */
+	ndp = kzalloc(sizeof(*ndp), GFP_ATOMIC);
+	if (!ndp)
+		return NULL;
+
+	nd = &ndp->ndev;
+	nd->state = ncsi_dev_state_registered;
+	nd->dev = dev;
+	nd->handler = handler;
+	ndp->pending_req_num = 0;
+	INIT_LIST_HEAD(&ndp->channel_queue);
+	INIT_WORK(&ndp->work, ncsi_dev_work);
+
+	/* Initialize private NCSI device */
+	spin_lock_init(&ndp->lock);
+	INIT_LIST_HEAD(&ndp->packages);
+	ndp->request_id = 0;
+	for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) {
+		ndp->requests[i].id = i;
+		ndp->requests[i].ndp = ndp;
+		setup_timer(&ndp->requests[i].timer,
+			    ncsi_request_timeout,
+			    (unsigned long)&ndp->requests[i]);
+	}
+
+	spin_lock_irqsave(&ncsi_dev_lock, flags);
+#if IS_ENABLED(CONFIG_IPV6)
+	ndp->inet6_addr_num = 0;
+	if (list_empty(&ncsi_dev_list))
+		register_inet6addr_notifier(&ncsi_inet6addr_notifier);
+#endif
+	list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
+	spin_unlock_irqrestore(&ncsi_dev_lock, flags);
+
+	/* Register NCSI packet Rx handler */
+	ndp->ptype.type = cpu_to_be16(ETH_P_NCSI);
+	ndp->ptype.func = ncsi_rcv_rsp;
+	ndp->ptype.dev = dev;
+	dev_add_pack(&ndp->ptype);
+
+	return nd;
+}
+EXPORT_SYMBOL_GPL(ncsi_register_dev);
+
+int ncsi_start_dev(struct ncsi_dev *nd)
+{
+	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	int old_state, ret;
+
+	if (nd->state != ncsi_dev_state_registered &&
+	    nd->state != ncsi_dev_state_functional)
+		return -ENOTTY;
+
+	if (!(ndp->flags & NCSI_DEV_PROBED)) {
+		nd->state = ncsi_dev_state_probe;
+		schedule_work(&ndp->work);
+		return 0;
+	}
+
+	/* Reset channel's state and start over */
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+			WARN_ON_ONCE(!list_empty(&nc->link) ||
+				     old_state == NCSI_CHANNEL_INVISIBLE);
+		}
+	}
+
+	if (ndp->flags & NCSI_DEV_HWA)
+		ret = ncsi_enable_hwa(ndp);
+	else
+		ret = ncsi_choose_active_channel(ndp);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ncsi_start_dev);
+
+void ncsi_unregister_dev(struct ncsi_dev *nd)
+{
+	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+	struct ncsi_package *np, *tmp;
+	unsigned long flags;
+
+	dev_remove_pack(&ndp->ptype);
+
+	list_for_each_entry_safe(np, tmp, &ndp->packages, node)
+		ncsi_remove_package(np);
+
+	spin_lock_irqsave(&ncsi_dev_lock, flags);
+	list_del_rcu(&ndp->node);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (list_empty(&ncsi_dev_list))
+		unregister_inet6addr_notifier(&ncsi_inet6addr_notifier);
+#endif
+	spin_unlock_irqrestore(&ncsi_dev_lock, flags);
+
+	kfree(ndp);
+}
+EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
new file mode 100644
index 000000000000..3ea49ed0a935
--- /dev/null
+++ b/net/ncsi/ncsi-pkt.h
@@ -0,0 +1,415 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_PKT_H__
+#define __NCSI_PKT_H__
+
+struct ncsi_pkt_hdr {
+	unsigned char mc_id;        /* Management controller ID */
+	unsigned char revision;     /* NCSI version - 0x01      */
+	unsigned char reserved;     /* Reserved                 */
+	unsigned char id;           /* Packet sequence number   */
+	unsigned char type;         /* Packet type              */
+	unsigned char channel;      /* Network controller ID    */
+	__be16        length;       /* Payload length           */
+	__be32        reserved1[2]; /* Reserved                 */
+};
+
+struct ncsi_cmd_pkt_hdr {
+	struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+};
+
+struct ncsi_rsp_pkt_hdr {
+	struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+	__be16              code;   /* Response code             */
+	__be16              reason; /* Response reason           */
+};
+
+struct ncsi_aen_pkt_hdr {
+	struct ncsi_pkt_hdr common;       /* Common NCSI packet header */
+	unsigned char       reserved2[3]; /* Reserved                  */
+	unsigned char       type;         /* AEN packet type           */
+};
+
+/* NCSI common command packet */
+struct ncsi_cmd_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;      /* Command header */
+	__be32                  checksum; /* Checksum       */
+	unsigned char           pad[26];
+};
+
+struct ncsi_rsp_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;      /* Response header */
+	__be32                  checksum; /* Checksum        */
+	unsigned char           pad[22];
+};
+
+/* Select Package */
+struct ncsi_cmd_sp_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;            /* Command header */
+	unsigned char           reserved[3];    /* Reserved       */
+	unsigned char           hw_arbitration; /* HW arbitration */
+	__be32                  checksum;       /* Checksum       */
+	unsigned char           pad[22];
+};
+
+/* Disable Channel */
+struct ncsi_cmd_dc_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;         /* Command header  */
+	unsigned char           reserved[3]; /* Reserved        */
+	unsigned char           ald;         /* Allow link down */
+	__be32                  checksum;    /* Checksum        */
+	unsigned char           pad[22];
+};
+
+/* Reset Channel */
+struct ncsi_cmd_rc_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;      /* Command header */
+	__be32                  reserved; /* Reserved       */
+	__be32                  checksum; /* Checksum       */
+	unsigned char           pad[22];
+};
+
+/* AEN Enable */
+struct ncsi_cmd_ae_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;         /* Command header   */
+	unsigned char           reserved[3]; /* Reserved         */
+	unsigned char           mc_id;       /* MC ID            */
+	__be32                  mode;        /* AEN working mode */
+	__be32                  checksum;    /* Checksum         */
+	unsigned char           pad[18];
+};
+
+/* Set Link */
+struct ncsi_cmd_sl_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;      /* Command header    */
+	__be32                  mode;     /* Link working mode */
+	__be32                  oem_mode; /* OEM link mode     */
+	__be32                  checksum; /* Checksum          */
+	unsigned char           pad[18];
+};
+
+/* Set VLAN Filter */
+struct ncsi_cmd_svf_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;       /* Command header    */
+	__be16                  reserved;  /* Reserved          */
+	__be16                  vlan;      /* VLAN ID           */
+	__be16                  reserved1; /* Reserved          */
+	unsigned char           index;     /* VLAN table index  */
+	unsigned char           enable;    /* Enable or disable */
+	__be32                  checksum;  /* Checksum          */
+	unsigned char           pad[14];
+};
+
+/* Enable VLAN */
+struct ncsi_cmd_ev_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;         /* Command header   */
+	unsigned char           reserved[3]; /* Reserved         */
+	unsigned char           mode;        /* VLAN filter mode */
+	__be32                  checksum;    /* Checksum         */
+	unsigned char           pad[22];
+};
+
+/* Set MAC Address */
+struct ncsi_cmd_sma_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;      /* Command header          */
+	unsigned char           mac[6];   /* MAC address             */
+	unsigned char           index;    /* MAC table index         */
+	unsigned char           at_e;     /* Addr type and operation */
+	__be32                  checksum; /* Checksum                */
+	unsigned char           pad[18];
+};
+
+/* Enable Broadcast Filter */
+struct ncsi_cmd_ebf_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;      /* Command header */
+	__be32                  mode;     /* Filter mode    */
+	__be32                  checksum; /* Checksum       */
+	unsigned char           pad[22];
+};
+
+/* Enable Global Multicast Filter */
+struct ncsi_cmd_egmf_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;      /* Command header */
+	__be32                  mode;     /* Global MC mode */
+	__be32                  checksum; /* Checksum       */
+	unsigned char           pad[22];
+};
+
+/* Set NCSI Flow Control */
+struct ncsi_cmd_snfc_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;         /* Command header    */
+	unsigned char           reserved[3]; /* Reserved          */
+	unsigned char           mode;        /* Flow control mode */
+	__be32                  checksum;    /* Checksum          */
+	unsigned char           pad[22];
+};
+
+/* Get Link Status */
+struct ncsi_rsp_gls_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;        /* Response header   */
+	__be32                  status;     /* Link status       */
+	__be32                  other;      /* Other indications */
+	__be32                  oem_status; /* OEM link status   */
+	__be32                  checksum;
+	unsigned char           pad[10];
+};
+
+/* Get Version ID */
+struct ncsi_rsp_gvi_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;          /* Response header */
+	__be32                  ncsi_version; /* NCSI version    */
+	unsigned char           reserved[3];  /* Reserved        */
+	unsigned char           alpha2;       /* NCSI version    */
+	unsigned char           fw_name[12];  /* f/w name string */
+	__be32                  fw_version;   /* f/w version     */
+	__be16                  pci_ids[4];   /* PCI IDs         */
+	__be32                  mf_id;        /* Manufacture ID  */
+	__be32                  checksum;
+};
+
+/* Get Capabilities */
+struct ncsi_rsp_gc_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;         /* Response header   */
+	__be32                  cap;         /* Capabilities      */
+	__be32                  bc_cap;      /* Broadcast cap     */
+	__be32                  mc_cap;      /* Multicast cap     */
+	__be32                  buf_cap;     /* Buffering cap     */
+	__be32                  aen_cap;     /* AEN cap           */
+	unsigned char           vlan_cnt;    /* VLAN filter count */
+	unsigned char           mixed_cnt;   /* Mix filter count  */
+	unsigned char           mc_cnt;      /* MC filter count   */
+	unsigned char           uc_cnt;      /* UC filter count   */
+	unsigned char           reserved[2]; /* Reserved          */
+	unsigned char           vlan_mode;   /* VLAN mode         */
+	unsigned char           channel_cnt; /* Channel count     */
+	__be32                  checksum;    /* Checksum          */
+};
+
+/* Get Parameters */
+struct ncsi_rsp_gp_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;          /* Response header       */
+	unsigned char           mac_cnt;      /* Number of MAC addr    */
+	unsigned char           reserved[2];  /* Reserved              */
+	unsigned char           mac_enable;   /* MAC addr enable flags */
+	unsigned char           vlan_cnt;     /* VLAN tag count        */
+	unsigned char           reserved1;    /* Reserved              */
+	__be16                  vlan_enable;  /* VLAN tag enable flags */
+	__be32                  link_mode;    /* Link setting          */
+	__be32                  bc_mode;      /* BC filter mode        */
+	__be32                  valid_modes;  /* Valid mode parameters */
+	unsigned char           vlan_mode;    /* VLAN mode             */
+	unsigned char           fc_mode;      /* Flow control mode     */
+	unsigned char           reserved2[2]; /* Reserved              */
+	__be32                  aen_mode;     /* AEN mode              */
+	unsigned char           mac[6];       /* Supported MAC addr    */
+	__be16                  vlan;         /* Supported VLAN tags   */
+	__be32                  checksum;     /* Checksum              */
+};
+
+/* Get Controller Packet Statistics */
+struct ncsi_rsp_gcps_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;            /* Response header            */
+	__be32                  cnt_hi;         /* Counter cleared            */
+	__be32                  cnt_lo;         /* Counter cleared            */
+	__be32                  rx_bytes;       /* Rx bytes                   */
+	__be32                  tx_bytes;       /* Tx bytes                   */
+	__be32                  rx_uc_pkts;     /* Rx UC packets              */
+	__be32                  rx_mc_pkts;     /* Rx MC packets              */
+	__be32                  rx_bc_pkts;     /* Rx BC packets              */
+	__be32                  tx_uc_pkts;     /* Tx UC packets              */
+	__be32                  tx_mc_pkts;     /* Tx MC packets              */
+	__be32                  tx_bc_pkts;     /* Tx BC packets              */
+	__be32                  fcs_err;        /* FCS errors                 */
+	__be32                  align_err;      /* Alignment errors           */
+	__be32                  false_carrier;  /* False carrier detection    */
+	__be32                  runt_pkts;      /* Rx runt packets            */
+	__be32                  jabber_pkts;    /* Rx jabber packets          */
+	__be32                  rx_pause_xon;   /* Rx pause XON frames        */
+	__be32                  rx_pause_xoff;  /* Rx XOFF frames             */
+	__be32                  tx_pause_xon;   /* Tx XON frames              */
+	__be32                  tx_pause_xoff;  /* Tx XOFF frames             */
+	__be32                  tx_s_collision; /* Single collision frames    */
+	__be32                  tx_m_collision; /* Multiple collision frames  */
+	__be32                  l_collision;    /* Late collision frames      */
+	__be32                  e_collision;    /* Excessive collision frames */
+	__be32                  rx_ctl_frames;  /* Rx control frames          */
+	__be32                  rx_64_frames;   /* Rx 64-bytes frames         */
+	__be32                  rx_127_frames;  /* Rx 65-127 bytes frames     */
+	__be32                  rx_255_frames;  /* Rx 128-255 bytes frames    */
+	__be32                  rx_511_frames;  /* Rx 256-511 bytes frames    */
+	__be32                  rx_1023_frames; /* Rx 512-1023 bytes frames   */
+	__be32                  rx_1522_frames; /* Rx 1024-1522 bytes frames  */
+	__be32                  rx_9022_frames; /* Rx 1523-9022 bytes frames  */
+	__be32                  tx_64_frames;   /* Tx 64-bytes frames         */
+	__be32                  tx_127_frames;  /* Tx 65-127 bytes frames     */
+	__be32                  tx_255_frames;  /* Tx 128-255 bytes frames    */
+	__be32                  tx_511_frames;  /* Tx 256-511 bytes frames    */
+	__be32                  tx_1023_frames; /* Tx 512-1023 bytes frames   */
+	__be32                  tx_1522_frames; /* Tx 1024-1522 bytes frames  */
+	__be32                  tx_9022_frames; /* Tx 1523-9022 bytes frames  */
+	__be32                  rx_valid_bytes; /* Rx valid bytes             */
+	__be32                  rx_runt_pkts;   /* Rx error runt packets      */
+	__be32                  rx_jabber_pkts; /* Rx error jabber packets    */
+	__be32                  checksum;       /* Checksum                   */
+};
+
+/* Get NCSI Statistics */
+struct ncsi_rsp_gns_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;           /* Response header         */
+	__be32                  rx_cmds;       /* Rx NCSI commands        */
+	__be32                  dropped_cmds;  /* Dropped commands        */
+	__be32                  cmd_type_errs; /* Command type errors     */
+	__be32                  cmd_csum_errs; /* Command checksum errors */
+	__be32                  rx_pkts;       /* Rx NCSI packets         */
+	__be32                  tx_pkts;       /* Tx NCSI packets         */
+	__be32                  tx_aen_pkts;   /* Tx AEN packets          */
+	__be32                  checksum;      /* Checksum                */
+};
+
+/* Get NCSI Pass-through Statistics */
+struct ncsi_rsp_gnpts_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;            /* Response header     */
+	__be32                  tx_pkts;        /* Tx packets          */
+	__be32                  tx_dropped;     /* Tx dropped packets  */
+	__be32                  tx_channel_err; /* Tx channel errors   */
+	__be32                  tx_us_err;      /* Tx undersize errors */
+	__be32                  rx_pkts;        /* Rx packets          */
+	__be32                  rx_dropped;     /* Rx dropped packets  */
+	__be32                  rx_channel_err; /* Rx channel errors   */
+	__be32                  rx_us_err;      /* Rx undersize errors */
+	__be32                  rx_os_err;      /* Rx oversize errors  */
+	__be32                  checksum;       /* Checksum            */
+};
+
+/* Get package status */
+struct ncsi_rsp_gps_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;      /* Response header             */
+	__be32                  status;   /* Hardware arbitration status */
+	__be32                  checksum;
+};
+
+/* Get package UUID */
+struct ncsi_rsp_gpuuid_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;      /* Response header */
+	unsigned char           uuid[16]; /* UUID            */
+	__be32                  checksum;
+};
+
+/* AEN: Link State Change */
+struct ncsi_aen_lsc_pkt {
+	struct ncsi_aen_pkt_hdr aen;        /* AEN header      */
+	__be32                  status;     /* Link status     */
+	__be32                  oem_status; /* OEM link status */
+	__be32                  checksum;   /* Checksum        */
+	unsigned char           pad[14];
+};
+
+/* AEN: Configuration Required */
+struct ncsi_aen_cr_pkt {
+	struct ncsi_aen_pkt_hdr aen;      /* AEN header */
+	__be32                  checksum; /* Checksum   */
+	unsigned char           pad[22];
+};
+
+/* AEN: Host Network Controller Driver Status Change */
+struct ncsi_aen_hncdsc_pkt {
+	struct ncsi_aen_pkt_hdr aen;      /* AEN header */
+	__be32                  status;   /* Status     */
+	__be32                  checksum; /* Checksum   */
+	unsigned char           pad[18];
+};
+
+/* NCSI packet revision */
+#define NCSI_PKT_REVISION	0x01
+
+/* NCSI packet commands */
+#define NCSI_PKT_CMD_CIS	0x00 /* Clear Initial State              */
+#define NCSI_PKT_CMD_SP		0x01 /* Select Package                   */
+#define NCSI_PKT_CMD_DP		0x02 /* Deselect Package                 */
+#define NCSI_PKT_CMD_EC		0x03 /* Enable Channel                   */
+#define NCSI_PKT_CMD_DC		0x04 /* Disable Channel                  */
+#define NCSI_PKT_CMD_RC		0x05 /* Reset Channel                    */
+#define NCSI_PKT_CMD_ECNT	0x06 /* Enable Channel Network Tx        */
+#define NCSI_PKT_CMD_DCNT	0x07 /* Disable Channel Network Tx       */
+#define NCSI_PKT_CMD_AE		0x08 /* AEN Enable                       */
+#define NCSI_PKT_CMD_SL		0x09 /* Set Link                         */
+#define NCSI_PKT_CMD_GLS	0x0a /* Get Link                         */
+#define NCSI_PKT_CMD_SVF	0x0b /* Set VLAN Filter                  */
+#define NCSI_PKT_CMD_EV		0x0c /* Enable VLAN                      */
+#define NCSI_PKT_CMD_DV		0x0d /* Disable VLAN                     */
+#define NCSI_PKT_CMD_SMA	0x0e /* Set MAC address                  */
+#define NCSI_PKT_CMD_EBF	0x10 /* Enable Broadcast Filter          */
+#define NCSI_PKT_CMD_DBF	0x11 /* Disable Broadcast Filter         */
+#define NCSI_PKT_CMD_EGMF	0x12 /* Enable Global Multicast Filter   */
+#define NCSI_PKT_CMD_DGMF	0x13 /* Disable Global Multicast Filter  */
+#define NCSI_PKT_CMD_SNFC	0x14 /* Set NCSI Flow Control            */
+#define NCSI_PKT_CMD_GVI	0x15 /* Get Version ID                   */
+#define NCSI_PKT_CMD_GC		0x16 /* Get Capabilities                 */
+#define NCSI_PKT_CMD_GP		0x17 /* Get Parameters                   */
+#define NCSI_PKT_CMD_GCPS	0x18 /* Get Controller Packet Statistics */
+#define NCSI_PKT_CMD_GNS	0x19 /* Get NCSI Statistics              */
+#define NCSI_PKT_CMD_GNPTS	0x1a /* Get NCSI Pass-throu Statistics   */
+#define NCSI_PKT_CMD_GPS	0x1b /* Get package status               */
+#define NCSI_PKT_CMD_OEM	0x50 /* OEM                              */
+#define NCSI_PKT_CMD_PLDM	0x51 /* PLDM request over NCSI over RBT  */
+#define NCSI_PKT_CMD_GPUUID	0x52 /* Get package UUID                 */
+
+/* NCSI packet responses */
+#define NCSI_PKT_RSP_CIS	(NCSI_PKT_CMD_CIS    + 0x80)
+#define NCSI_PKT_RSP_SP		(NCSI_PKT_CMD_SP     + 0x80)
+#define NCSI_PKT_RSP_DP		(NCSI_PKT_CMD_DP     + 0x80)
+#define NCSI_PKT_RSP_EC		(NCSI_PKT_CMD_EC     + 0x80)
+#define NCSI_PKT_RSP_DC		(NCSI_PKT_CMD_DC     + 0x80)
+#define NCSI_PKT_RSP_RC		(NCSI_PKT_CMD_RC     + 0x80)
+#define NCSI_PKT_RSP_ECNT	(NCSI_PKT_CMD_ECNT   + 0x80)
+#define NCSI_PKT_RSP_DCNT	(NCSI_PKT_CMD_DCNT   + 0x80)
+#define NCSI_PKT_RSP_AE		(NCSI_PKT_CMD_AE     + 0x80)
+#define NCSI_PKT_RSP_SL		(NCSI_PKT_CMD_SL     + 0x80)
+#define NCSI_PKT_RSP_GLS	(NCSI_PKT_CMD_GLS    + 0x80)
+#define NCSI_PKT_RSP_SVF	(NCSI_PKT_CMD_SVF    + 0x80)
+#define NCSI_PKT_RSP_EV		(NCSI_PKT_CMD_EV     + 0x80)
+#define NCSI_PKT_RSP_DV		(NCSI_PKT_CMD_DV     + 0x80)
+#define NCSI_PKT_RSP_SMA	(NCSI_PKT_CMD_SMA    + 0x80)
+#define NCSI_PKT_RSP_EBF	(NCSI_PKT_CMD_EBF    + 0x80)
+#define NCSI_PKT_RSP_DBF	(NCSI_PKT_CMD_DBF    + 0x80)
+#define NCSI_PKT_RSP_EGMF	(NCSI_PKT_CMD_EGMF   + 0x80)
+#define NCSI_PKT_RSP_DGMF	(NCSI_PKT_CMD_DGMF   + 0x80)
+#define NCSI_PKT_RSP_SNFC	(NCSI_PKT_CMD_SNFC   + 0x80)
+#define NCSI_PKT_RSP_GVI	(NCSI_PKT_CMD_GVI    + 0x80)
+#define NCSI_PKT_RSP_GC		(NCSI_PKT_CMD_GC     + 0x80)
+#define NCSI_PKT_RSP_GP		(NCSI_PKT_CMD_GP     + 0x80)
+#define NCSI_PKT_RSP_GCPS	(NCSI_PKT_CMD_GCPS   + 0x80)
+#define NCSI_PKT_RSP_GNS	(NCSI_PKT_CMD_GNS    + 0x80)
+#define NCSI_PKT_RSP_GNPTS	(NCSI_PKT_CMD_GNPTS  + 0x80)
+#define NCSI_PKT_RSP_GPS	(NCSI_PKT_CMD_GPS    + 0x80)
+#define NCSI_PKT_RSP_OEM	(NCSI_PKT_CMD_OEM    + 0x80)
+#define NCSI_PKT_RSP_PLDM	(NCSI_PKT_CMD_PLDM   + 0x80)
+#define NCSI_PKT_RSP_GPUUID	(NCSI_PKT_CMD_GPUUID + 0x80)
+
+/* NCSI response code/reason */
+#define NCSI_PKT_RSP_C_COMPLETED	0x0000 /* Command Completed        */
+#define NCSI_PKT_RSP_C_FAILED		0x0001 /* Command Failed           */
+#define NCSI_PKT_RSP_C_UNAVAILABLE	0x0002 /* Command Unavailable      */
+#define NCSI_PKT_RSP_C_UNSUPPORTED	0x0003 /* Command Unsupported      */
+#define NCSI_PKT_RSP_R_NO_ERROR		0x0000 /* No Error                 */
+#define NCSI_PKT_RSP_R_INTERFACE	0x0001 /* Interface not ready      */
+#define NCSI_PKT_RSP_R_PARAM		0x0002 /* Invalid Parameter        */
+#define NCSI_PKT_RSP_R_CHANNEL		0x0003 /* Channel not Ready        */
+#define NCSI_PKT_RSP_R_PACKAGE		0x0004 /* Package not Ready        */
+#define NCSI_PKT_RSP_R_LENGTH		0x0005 /* Invalid payload length   */
+#define NCSI_PKT_RSP_R_UNKNOWN		0x7fff /* Command type unsupported */
+
+/* NCSI AEN packet type */
+#define NCSI_PKT_AEN		0xFF /* AEN Packet               */
+#define NCSI_PKT_AEN_LSC	0x00 /* Link status change       */
+#define NCSI_PKT_AEN_CR		0x01 /* Configuration required   */
+#define NCSI_PKT_AEN_HNCDSC	0x02 /* HNC driver status change */
+
+#endif /* __NCSI_PKT_H__ */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
new file mode 100644
index 000000000000..af84389a6bf1
--- /dev/null
+++ b/net/ncsi/ncsi-rsp.c
@@ -0,0 +1,1035 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/ncsi.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+#include "ncsi-pkt.h"
+
+static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
+				 unsigned short payload)
+{
+	struct ncsi_rsp_pkt_hdr *h;
+	u32 checksum;
+	__be32 *pchecksum;
+
+	/* Check NCSI packet header. We don't need validate
+	 * the packet type, which should have been checked
+	 * before calling this function.
+	 */
+	h = (struct ncsi_rsp_pkt_hdr *)skb_network_header(nr->rsp);
+	if (h->common.revision != NCSI_PKT_REVISION)
+		return -EINVAL;
+	if (ntohs(h->common.length) != payload)
+		return -EINVAL;
+
+	/* Check on code and reason */
+	if (ntohs(h->code) != NCSI_PKT_RSP_C_COMPLETED ||
+	    ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR)
+		return -EINVAL;
+
+	/* Validate checksum, which might be zeroes if the
+	 * sender doesn't support checksum according to NCSI
+	 * specification.
+	 */
+	pchecksum = (__be32 *)((void *)(h + 1) + payload - 4);
+	if (ntohl(*pchecksum) == 0)
+		return 0;
+
+	checksum = ncsi_calculate_checksum((unsigned char *)h,
+					   sizeof(*h) + payload - 4);
+	if (*pchecksum != htonl(checksum))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_cis(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	unsigned char id;
+
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, &np, &nc);
+	if (!nc) {
+		if (ndp->flags & NCSI_DEV_PROBED)
+			return -ENXIO;
+
+		id = NCSI_CHANNEL_INDEX(rsp->rsp.common.channel);
+		nc = ncsi_add_channel(np, id);
+	}
+
+	return nc ? 0 : -ENODEV;
+}
+
+static int ncsi_rsp_handler_sp(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_package *np;
+	unsigned char id;
+
+	/* Add the package if it's not existing. Otherwise,
+	 * to change the state of its child channels.
+	 */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      &np, NULL);
+	if (!np) {
+		if (ndp->flags & NCSI_DEV_PROBED)
+			return -ENXIO;
+
+		id = NCSI_PACKAGE_INDEX(rsp->rsp.common.channel);
+		np = ncsi_add_package(ndp, id);
+		if (!np)
+			return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_dp(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	unsigned long flags;
+
+	/* Find the package */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      &np, NULL);
+	if (!np)
+		return -ENODEV;
+
+	/* Change state of all channels attached to the package */
+	NCSI_FOR_EACH_CHANNEL(np, nc) {
+		spin_lock_irqsave(&nc->lock, flags);
+		nc->state = NCSI_CHANNEL_INACTIVE;
+		spin_unlock_irqrestore(&nc->lock, flags);
+	}
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_ec(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	ncm = &nc->modes[NCSI_MODE_ENABLE];
+	if (ncm->enable)
+		return -EBUSY;
+
+	ncm->enable = 1;
+	return 0;
+}
+
+static int ncsi_rsp_handler_dc(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+	int ret;
+
+	ret = ncsi_validate_rsp_pkt(nr, 4);
+	if (ret)
+		return ret;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	ncm = &nc->modes[NCSI_MODE_ENABLE];
+	if (!ncm->enable)
+		return -EBUSY;
+
+	ncm->enable = 0;
+	return 0;
+}
+
+static int ncsi_rsp_handler_rc(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	unsigned long flags;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Update state for the specified channel */
+	spin_lock_irqsave(&nc->lock, flags);
+	nc->state = NCSI_CHANNEL_INACTIVE;
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
+	if (ncm->enable)
+		return -EBUSY;
+
+	ncm->enable = 1;
+	return 0;
+}
+
+static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
+	if (!ncm->enable)
+		return -EBUSY;
+
+	ncm->enable = 1;
+	return 0;
+}
+
+static int ncsi_rsp_handler_ae(struct ncsi_request *nr)
+{
+	struct ncsi_cmd_ae_pkt *cmd;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Check if the AEN has been enabled */
+	ncm = &nc->modes[NCSI_MODE_AEN];
+	if (ncm->enable)
+		return -EBUSY;
+
+	/* Update to AEN configuration */
+	cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd);
+	ncm->enable = 1;
+	ncm->data[0] = cmd->mc_id;
+	ncm->data[1] = ntohl(cmd->mode);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_sl(struct ncsi_request *nr)
+{
+	struct ncsi_cmd_sl_pkt *cmd;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	cmd = (struct ncsi_cmd_sl_pkt *)skb_network_header(nr->cmd);
+	ncm = &nc->modes[NCSI_MODE_LINK];
+	ncm->data[0] = ntohl(cmd->mode);
+	ncm->data[1] = ntohl(cmd->oem_mode);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gls(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gls_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+	unsigned long flags;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_gls_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	ncm = &nc->modes[NCSI_MODE_LINK];
+	ncm->data[2] = ntohl(rsp->status);
+	ncm->data[3] = ntohl(rsp->other);
+	ncm->data[4] = ntohl(rsp->oem_status);
+
+	if (nr->driven)
+		return 0;
+
+	/* Reset the channel monitor if it has been enabled */
+	spin_lock_irqsave(&nc->lock, flags);
+	nc->timeout = 0;
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
+{
+	struct ncsi_cmd_svf_pkt *cmd;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_filter *ncf;
+	unsigned short vlan;
+	int ret;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	cmd = (struct ncsi_cmd_svf_pkt *)skb_network_header(nr->cmd);
+	ncf = nc->filters[NCSI_FILTER_VLAN];
+	if (!ncf)
+		return -ENOENT;
+	if (cmd->index >= ncf->total)
+		return -ERANGE;
+
+	/* Add or remove the VLAN filter */
+	if (!(cmd->enable & 0x1)) {
+		ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index);
+	} else {
+		vlan = ntohs(cmd->vlan);
+		ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
+	}
+
+	return ret;
+}
+
+static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
+{
+	struct ncsi_cmd_ev_pkt *cmd;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Check if VLAN mode has been enabled */
+	ncm = &nc->modes[NCSI_MODE_VLAN];
+	if (ncm->enable)
+		return -EBUSY;
+
+	/* Update to VLAN mode */
+	cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
+	ncm->enable = 1;
+	ncm->data[0] = ntohl(cmd->mode);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_dv(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Check if VLAN mode has been enabled */
+	ncm = &nc->modes[NCSI_MODE_VLAN];
+	if (!ncm->enable)
+		return -EBUSY;
+
+	/* Update to VLAN mode */
+	ncm->enable = 0;
+	return 0;
+}
+
+static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
+{
+	struct ncsi_cmd_sma_pkt *cmd;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_filter *ncf;
+	void *bitmap;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* According to NCSI spec 1.01, the mixed filter table
+	 * isn't supported yet.
+	 */
+	cmd = (struct ncsi_cmd_sma_pkt *)skb_network_header(nr->cmd);
+	switch (cmd->at_e >> 5) {
+	case 0x0:	/* UC address */
+		ncf = nc->filters[NCSI_FILTER_UC];
+		break;
+	case 0x1:	/* MC address */
+		ncf = nc->filters[NCSI_FILTER_MC];
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Sanity check on the filter */
+	if (!ncf)
+		return -ENOENT;
+	else if (cmd->index >= ncf->total)
+		return -ERANGE;
+
+	bitmap = &ncf->bitmap;
+	if (cmd->at_e & 0x1) {
+		if (test_and_set_bit(cmd->index, bitmap))
+			return -EBUSY;
+		memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
+	} else {
+		if (!test_and_clear_bit(cmd->index, bitmap))
+			return -EBUSY;
+
+		memset(ncf->data + 6 * cmd->index, 0, 6);
+	}
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_ebf(struct ncsi_request *nr)
+{
+	struct ncsi_cmd_ebf_pkt *cmd;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the package and channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Check if broadcast filter has been enabled */
+	ncm = &nc->modes[NCSI_MODE_BC];
+	if (ncm->enable)
+		return -EBUSY;
+
+	/* Update to broadcast filter mode */
+	cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd);
+	ncm->enable = 1;
+	ncm->data[0] = ntohl(cmd->mode);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_dbf(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Check if broadcast filter isn't enabled */
+	ncm = &nc->modes[NCSI_MODE_BC];
+	if (!ncm->enable)
+		return -EBUSY;
+
+	/* Update to broadcast filter mode */
+	ncm->enable = 0;
+	ncm->data[0] = 0;
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_egmf(struct ncsi_request *nr)
+{
+	struct ncsi_cmd_egmf_pkt *cmd;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Check if multicast filter has been enabled */
+	ncm = &nc->modes[NCSI_MODE_MC];
+	if (ncm->enable)
+		return -EBUSY;
+
+	/* Update to multicast filter mode */
+	cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd);
+	ncm->enable = 1;
+	ncm->data[0] = ntohl(cmd->mode);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Check if multicast filter has been enabled */
+	ncm = &nc->modes[NCSI_MODE_MC];
+	if (!ncm->enable)
+		return -EBUSY;
+
+	/* Update to multicast filter mode */
+	ncm->enable = 0;
+	ncm->data[0] = 0;
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
+{
+	struct ncsi_cmd_snfc_pkt *cmd;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_mode *ncm;
+
+	/* Find the channel */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Check if flow control has been enabled */
+	ncm = &nc->modes[NCSI_MODE_FC];
+	if (ncm->enable)
+		return -EBUSY;
+
+	/* Update to flow control mode */
+	cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd);
+	ncm->enable = 1;
+	ncm->data[0] = cmd->mode;
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gvi_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_version *ncv;
+	int i;
+
+	/* Find the channel */
+	rsp = (struct ncsi_rsp_gvi_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Update to channel's version info */
+	ncv = &nc->version;
+	ncv->version = ntohl(rsp->ncsi_version);
+	ncv->alpha2 = rsp->alpha2;
+	memcpy(ncv->fw_name, rsp->fw_name, 12);
+	ncv->fw_version = ntohl(rsp->fw_version);
+	for (i = 0; i < ARRAY_SIZE(ncv->pci_ids); i++)
+		ncv->pci_ids[i] = ntohs(rsp->pci_ids[i]);
+	ncv->mf_id = ntohl(rsp->mf_id);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gc_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_filter *ncf;
+	size_t size, entry_size;
+	int cnt, i;
+
+	/* Find the channel */
+	rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Update channel's capabilities */
+	nc->caps[NCSI_CAP_GENERIC].cap = ntohl(rsp->cap) &
+					 NCSI_CAP_GENERIC_MASK;
+	nc->caps[NCSI_CAP_BC].cap = ntohl(rsp->bc_cap) &
+				    NCSI_CAP_BC_MASK;
+	nc->caps[NCSI_CAP_MC].cap = ntohl(rsp->mc_cap) &
+				    NCSI_CAP_MC_MASK;
+	nc->caps[NCSI_CAP_BUFFER].cap = ntohl(rsp->buf_cap);
+	nc->caps[NCSI_CAP_AEN].cap = ntohl(rsp->aen_cap) &
+				     NCSI_CAP_AEN_MASK;
+	nc->caps[NCSI_CAP_VLAN].cap = rsp->vlan_mode &
+				      NCSI_CAP_VLAN_MASK;
+
+	/* Build filters */
+	for (i = 0; i < NCSI_FILTER_MAX; i++) {
+		switch (i) {
+		case NCSI_FILTER_VLAN:
+			cnt = rsp->vlan_cnt;
+			entry_size = 2;
+			break;
+		case NCSI_FILTER_MIXED:
+			cnt = rsp->mixed_cnt;
+			entry_size = 6;
+			break;
+		case NCSI_FILTER_MC:
+			cnt = rsp->mc_cnt;
+			entry_size = 6;
+			break;
+		case NCSI_FILTER_UC:
+			cnt = rsp->uc_cnt;
+			entry_size = 6;
+			break;
+		default:
+			continue;
+		}
+
+		if (!cnt || nc->filters[i])
+			continue;
+
+		size = sizeof(*ncf) + cnt * entry_size;
+		ncf = kzalloc(size, GFP_ATOMIC);
+		if (!ncf) {
+			pr_warn("%s: Cannot alloc filter table (%d)\n",
+				__func__, i);
+			return -ENOMEM;
+		}
+
+		ncf->index = i;
+		ncf->total = cnt;
+		ncf->bitmap = 0x0ul;
+		nc->filters[i] = ncf;
+	}
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gp(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gp_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	unsigned short enable, vlan;
+	unsigned char *pdata;
+	int table, i;
+
+	/* Find the channel */
+	rsp = (struct ncsi_rsp_gp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Modes with explicit enabled indications */
+	if (ntohl(rsp->valid_modes) & 0x1) {	/* BC filter mode */
+		nc->modes[NCSI_MODE_BC].enable = 1;
+		nc->modes[NCSI_MODE_BC].data[0] = ntohl(rsp->bc_mode);
+	}
+	if (ntohl(rsp->valid_modes) & 0x2)	/* Channel enabled */
+		nc->modes[NCSI_MODE_ENABLE].enable = 1;
+	if (ntohl(rsp->valid_modes) & 0x4)	/* Channel Tx enabled */
+		nc->modes[NCSI_MODE_TX_ENABLE].enable = 1;
+	if (ntohl(rsp->valid_modes) & 0x8)	/* MC filter mode */
+		nc->modes[NCSI_MODE_MC].enable = 1;
+
+	/* Modes without explicit enabled indications */
+	nc->modes[NCSI_MODE_LINK].enable = 1;
+	nc->modes[NCSI_MODE_LINK].data[0] = ntohl(rsp->link_mode);
+	nc->modes[NCSI_MODE_VLAN].enable = 1;
+	nc->modes[NCSI_MODE_VLAN].data[0] = rsp->vlan_mode;
+	nc->modes[NCSI_MODE_FC].enable = 1;
+	nc->modes[NCSI_MODE_FC].data[0] = rsp->fc_mode;
+	nc->modes[NCSI_MODE_AEN].enable = 1;
+	nc->modes[NCSI_MODE_AEN].data[0] = ntohl(rsp->aen_mode);
+
+	/* MAC addresses filter table */
+	pdata = (unsigned char *)rsp + 48;
+	enable = rsp->mac_enable;
+	for (i = 0; i < rsp->mac_cnt; i++, pdata += 6) {
+		if (i >= (nc->filters[NCSI_FILTER_UC]->total +
+			  nc->filters[NCSI_FILTER_MC]->total))
+			table = NCSI_FILTER_MIXED;
+		else if (i >= nc->filters[NCSI_FILTER_UC]->total)
+			table = NCSI_FILTER_MC;
+		else
+			table = NCSI_FILTER_UC;
+
+		if (!(enable & (0x1 << i)))
+			continue;
+
+		if (ncsi_find_filter(nc, table, pdata) >= 0)
+			continue;
+
+		ncsi_add_filter(nc, table, pdata);
+	}
+
+	/* VLAN filter table */
+	enable = ntohs(rsp->vlan_enable);
+	for (i = 0; i < rsp->vlan_cnt; i++, pdata += 2) {
+		if (!(enable & (0x1 << i)))
+			continue;
+
+		vlan = ntohs(*(__be16 *)pdata);
+		if (ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan) >= 0)
+			continue;
+
+		ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
+	}
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gcps(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gcps_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_stats *ncs;
+
+	/* Find the channel */
+	rsp = (struct ncsi_rsp_gcps_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Update HNC's statistics */
+	ncs = &nc->stats;
+	ncs->hnc_cnt_hi         = ntohl(rsp->cnt_hi);
+	ncs->hnc_cnt_lo         = ntohl(rsp->cnt_lo);
+	ncs->hnc_rx_bytes       = ntohl(rsp->rx_bytes);
+	ncs->hnc_tx_bytes       = ntohl(rsp->tx_bytes);
+	ncs->hnc_rx_uc_pkts     = ntohl(rsp->rx_uc_pkts);
+	ncs->hnc_rx_mc_pkts     = ntohl(rsp->rx_mc_pkts);
+	ncs->hnc_rx_bc_pkts     = ntohl(rsp->rx_bc_pkts);
+	ncs->hnc_tx_uc_pkts     = ntohl(rsp->tx_uc_pkts);
+	ncs->hnc_tx_mc_pkts     = ntohl(rsp->tx_mc_pkts);
+	ncs->hnc_tx_bc_pkts     = ntohl(rsp->tx_bc_pkts);
+	ncs->hnc_fcs_err        = ntohl(rsp->fcs_err);
+	ncs->hnc_align_err      = ntohl(rsp->align_err);
+	ncs->hnc_false_carrier  = ntohl(rsp->false_carrier);
+	ncs->hnc_runt_pkts      = ntohl(rsp->runt_pkts);
+	ncs->hnc_jabber_pkts    = ntohl(rsp->jabber_pkts);
+	ncs->hnc_rx_pause_xon   = ntohl(rsp->rx_pause_xon);
+	ncs->hnc_rx_pause_xoff  = ntohl(rsp->rx_pause_xoff);
+	ncs->hnc_tx_pause_xon   = ntohl(rsp->tx_pause_xon);
+	ncs->hnc_tx_pause_xoff  = ntohl(rsp->tx_pause_xoff);
+	ncs->hnc_tx_s_collision = ntohl(rsp->tx_s_collision);
+	ncs->hnc_tx_m_collision = ntohl(rsp->tx_m_collision);
+	ncs->hnc_l_collision    = ntohl(rsp->l_collision);
+	ncs->hnc_e_collision    = ntohl(rsp->e_collision);
+	ncs->hnc_rx_ctl_frames  = ntohl(rsp->rx_ctl_frames);
+	ncs->hnc_rx_64_frames   = ntohl(rsp->rx_64_frames);
+	ncs->hnc_rx_127_frames  = ntohl(rsp->rx_127_frames);
+	ncs->hnc_rx_255_frames  = ntohl(rsp->rx_255_frames);
+	ncs->hnc_rx_511_frames  = ntohl(rsp->rx_511_frames);
+	ncs->hnc_rx_1023_frames = ntohl(rsp->rx_1023_frames);
+	ncs->hnc_rx_1522_frames = ntohl(rsp->rx_1522_frames);
+	ncs->hnc_rx_9022_frames = ntohl(rsp->rx_9022_frames);
+	ncs->hnc_tx_64_frames   = ntohl(rsp->tx_64_frames);
+	ncs->hnc_tx_127_frames  = ntohl(rsp->tx_127_frames);
+	ncs->hnc_tx_255_frames  = ntohl(rsp->tx_255_frames);
+	ncs->hnc_tx_511_frames  = ntohl(rsp->tx_511_frames);
+	ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames);
+	ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames);
+	ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames);
+	ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes);
+	ncs->hnc_rx_runt_pkts   = ntohl(rsp->rx_runt_pkts);
+	ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gns(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gns_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_stats *ncs;
+
+	/* Find the channel */
+	rsp = (struct ncsi_rsp_gns_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Update HNC's statistics */
+	ncs = &nc->stats;
+	ncs->ncsi_rx_cmds       = ntohl(rsp->rx_cmds);
+	ncs->ncsi_dropped_cmds  = ntohl(rsp->dropped_cmds);
+	ncs->ncsi_cmd_type_errs = ntohl(rsp->cmd_type_errs);
+	ncs->ncsi_cmd_csum_errs = ntohl(rsp->cmd_csum_errs);
+	ncs->ncsi_rx_pkts       = ntohl(rsp->rx_pkts);
+	ncs->ncsi_tx_pkts       = ntohl(rsp->tx_pkts);
+	ncs->ncsi_tx_aen_pkts   = ntohl(rsp->tx_aen_pkts);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gnpts(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gnpts_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_channel *nc;
+	struct ncsi_channel_stats *ncs;
+
+	/* Find the channel */
+	rsp = (struct ncsi_rsp_gnpts_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      NULL, &nc);
+	if (!nc)
+		return -ENODEV;
+
+	/* Update HNC's statistics */
+	ncs = &nc->stats;
+	ncs->pt_tx_pkts        = ntohl(rsp->tx_pkts);
+	ncs->pt_tx_dropped     = ntohl(rsp->tx_dropped);
+	ncs->pt_tx_channel_err = ntohl(rsp->tx_channel_err);
+	ncs->pt_tx_us_err      = ntohl(rsp->tx_us_err);
+	ncs->pt_rx_pkts        = ntohl(rsp->rx_pkts);
+	ncs->pt_rx_dropped     = ntohl(rsp->rx_dropped);
+	ncs->pt_rx_channel_err = ntohl(rsp->rx_channel_err);
+	ncs->pt_rx_us_err      = ntohl(rsp->rx_us_err);
+	ncs->pt_rx_os_err      = ntohl(rsp->rx_os_err);
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gps(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gps_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_package *np;
+
+	/* Find the package */
+	rsp = (struct ncsi_rsp_gps_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      &np, NULL);
+	if (!np)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int ncsi_rsp_handler_gpuuid(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_gpuuid_pkt *rsp;
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_package *np;
+
+	/* Find the package */
+	rsp = (struct ncsi_rsp_gpuuid_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      &np, NULL);
+	if (!np)
+		return -ENODEV;
+
+	memcpy(np->uuid, rsp->uuid, sizeof(rsp->uuid));
+
+	return 0;
+}
+
+static struct ncsi_rsp_handler {
+	unsigned char	type;
+	int             payload;
+	int		(*handler)(struct ncsi_request *nr);
+} ncsi_rsp_handlers[] = {
+	{ NCSI_PKT_RSP_CIS,     4, ncsi_rsp_handler_cis     },
+	{ NCSI_PKT_RSP_SP,      4, ncsi_rsp_handler_sp      },
+	{ NCSI_PKT_RSP_DP,      4, ncsi_rsp_handler_dp      },
+	{ NCSI_PKT_RSP_EC,      4, ncsi_rsp_handler_ec      },
+	{ NCSI_PKT_RSP_DC,      4, ncsi_rsp_handler_dc      },
+	{ NCSI_PKT_RSP_RC,      4, ncsi_rsp_handler_rc      },
+	{ NCSI_PKT_RSP_ECNT,    4, ncsi_rsp_handler_ecnt    },
+	{ NCSI_PKT_RSP_DCNT,    4, ncsi_rsp_handler_dcnt    },
+	{ NCSI_PKT_RSP_AE,      4, ncsi_rsp_handler_ae      },
+	{ NCSI_PKT_RSP_SL,      4, ncsi_rsp_handler_sl      },
+	{ NCSI_PKT_RSP_GLS,    16, ncsi_rsp_handler_gls     },
+	{ NCSI_PKT_RSP_SVF,     4, ncsi_rsp_handler_svf     },
+	{ NCSI_PKT_RSP_EV,      4, ncsi_rsp_handler_ev      },
+	{ NCSI_PKT_RSP_DV,      4, ncsi_rsp_handler_dv      },
+	{ NCSI_PKT_RSP_SMA,     4, ncsi_rsp_handler_sma     },
+	{ NCSI_PKT_RSP_EBF,     4, ncsi_rsp_handler_ebf     },
+	{ NCSI_PKT_RSP_DBF,     4, ncsi_rsp_handler_dbf     },
+	{ NCSI_PKT_RSP_EGMF,    4, ncsi_rsp_handler_egmf    },
+	{ NCSI_PKT_RSP_DGMF,    4, ncsi_rsp_handler_dgmf    },
+	{ NCSI_PKT_RSP_SNFC,    4, ncsi_rsp_handler_snfc    },
+	{ NCSI_PKT_RSP_GVI,    36, ncsi_rsp_handler_gvi     },
+	{ NCSI_PKT_RSP_GC,     32, ncsi_rsp_handler_gc      },
+	{ NCSI_PKT_RSP_GP,     -1, ncsi_rsp_handler_gp      },
+	{ NCSI_PKT_RSP_GCPS,  172, ncsi_rsp_handler_gcps    },
+	{ NCSI_PKT_RSP_GNS,   172, ncsi_rsp_handler_gns     },
+	{ NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts   },
+	{ NCSI_PKT_RSP_GPS,     8, ncsi_rsp_handler_gps     },
+	{ NCSI_PKT_RSP_OEM,     0, NULL                     },
+	{ NCSI_PKT_RSP_PLDM,    0, NULL                     },
+	{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid  }
+};
+
+int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
+		 struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct ncsi_rsp_handler *nrh = NULL;
+	struct ncsi_dev *nd;
+	struct ncsi_dev_priv *ndp;
+	struct ncsi_request *nr;
+	struct ncsi_pkt_hdr *hdr;
+	unsigned long flags;
+	int payload, i, ret;
+
+	/* Find the NCSI device */
+	nd = ncsi_find_dev(dev);
+	ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+	if (!ndp)
+		return -ENODEV;
+
+	/* Check if it is AEN packet */
+	hdr = (struct ncsi_pkt_hdr *)skb_network_header(skb);
+	if (hdr->type == NCSI_PKT_AEN)
+		return ncsi_aen_handler(ndp, skb);
+
+	/* Find the handler */
+	for (i = 0; i < ARRAY_SIZE(ncsi_rsp_handlers); i++) {
+		if (ncsi_rsp_handlers[i].type == hdr->type) {
+			if (ncsi_rsp_handlers[i].handler)
+				nrh = &ncsi_rsp_handlers[i];
+			else
+				nrh = NULL;
+
+			break;
+		}
+	}
+
+	if (!nrh) {
+		netdev_err(nd->dev, "Received unrecognized packet (0x%x)\n",
+			   hdr->type);
+		return -ENOENT;
+	}
+
+	/* Associate with the request */
+	spin_lock_irqsave(&ndp->lock, flags);
+	nr = &ndp->requests[hdr->id];
+	if (!nr->used) {
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		return -ENODEV;
+	}
+
+	nr->rsp = skb;
+	if (!nr->enabled) {
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	/* Validate the packet */
+	spin_unlock_irqrestore(&ndp->lock, flags);
+	payload = nrh->payload;
+	if (payload < 0)
+		payload = ntohs(hdr->length);
+	ret = ncsi_validate_rsp_pkt(nr, payload);
+	if (ret)
+		goto out;
+
+	/* Process the packet */
+	ret = nrh->handler(nr);
+out:
+	ncsi_free_request(nr);
+	return ret;
+}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 95e757c377f9..9266ceebd112 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -609,9 +609,8 @@ config NETFILTER_XT_MARK
 	The target allows you to create rules in the "mangle" table which alter
 	the netfilter mark (nfmark) field associated with the packet.
 
-	Prior to routing, the nfmark can influence the routing method (see
-	"Use netfilter MARK value as routing key") and can also be used by
-	other subsystems to change their behavior.
+	Prior to routing, the nfmark can influence the routing method and can
+	also be used by other subsystems to change their behavior.
 
 config NETFILTER_XT_CONNMARK
 	tristate 'ctmark target and match support'
@@ -753,9 +752,8 @@ config NETFILTER_XT_TARGET_HMARK
 
 	The target allows you to create rules in the "raw" and "mangle" tables
 	which set the skbuff mark by means of hash calculation within a given
-	range. The nfmark can influence the routing method (see "Use netfilter
-	MARK value as routing key") and can also be used by other subsystems to
-	change their behaviour.
+	range. The nfmark can influence the routing method and can also be used
+	by other subsystems to change their behaviour.
 
 	To compile it as a module, choose M here. If unsure, say N.
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index d7024b2ed769..5117bcb7d2f0 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -395,6 +395,20 @@ static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_LAST]		=	"BUG!",
 };
 
+static const bool tcp_state_active_table[IP_VS_TCP_S_LAST] = {
+	[IP_VS_TCP_S_NONE]		=	false,
+	[IP_VS_TCP_S_ESTABLISHED]	=	true,
+	[IP_VS_TCP_S_SYN_SENT]		=	true,
+	[IP_VS_TCP_S_SYN_RECV]		=	true,
+	[IP_VS_TCP_S_FIN_WAIT]		=	false,
+	[IP_VS_TCP_S_TIME_WAIT]		=	false,
+	[IP_VS_TCP_S_CLOSE]		=	false,
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	false,
+	[IP_VS_TCP_S_LAST_ACK]		=	false,
+	[IP_VS_TCP_S_LISTEN]		=	false,
+	[IP_VS_TCP_S_SYNACK]		=	true,
+};
+
 #define sNO IP_VS_TCP_S_NONE
 #define sES IP_VS_TCP_S_ESTABLISHED
 #define sSS IP_VS_TCP_S_SYN_SENT
@@ -418,6 +432,13 @@ static const char * tcp_state_name(int state)
 	return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
 }
 
+static bool tcp_state_active(int state)
+{
+	if (state >= IP_VS_TCP_S_LAST)
+		return false;
+	return tcp_state_active_table[state];
+}
+
 static struct tcp_states_t tcp_states [] = {
 /*	INPUT */
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
@@ -540,12 +561,12 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 
 		if (dest) {
 			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-			    (new_state != IP_VS_TCP_S_ESTABLISHED)) {
+			    !tcp_state_active(new_state)) {
 				atomic_dec(&dest->activeconns);
 				atomic_inc(&dest->inactconns);
 				cp->flags |= IP_VS_CONN_F_INACTIVE;
 			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-				   (new_state == IP_VS_TCP_S_ESTABLISHED)) {
+				   tcp_state_active(new_state)) {
 				atomic_inc(&dest->activeconns);
 				atomic_dec(&dest->inactconns);
 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 803001a45aa1..1b07578bedf3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1545,7 +1545,8 @@ error:
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
+static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
+					int ifindex)
 {
 	/* multicast addr */
 	union ipvs_sockaddr mcast_addr;
@@ -1566,6 +1567,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
 		set_sock_size(sock->sk, 0, result);
 
 	get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
+	sock->sk->sk_bound_dev_if = ifindex;
 	result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
 	if (result < 0) {
 		pr_err("Error binding to the multicast addr\n");
@@ -1868,7 +1870,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
 		if (state == IP_VS_STATE_MASTER)
 			sock = make_send_sock(ipvs, id);
 		else
-			sock = make_receive_sock(ipvs, id);
+			sock = make_receive_sock(ipvs, id, dev->ifindex);
 		if (IS_ERR(sock)) {
 			result = PTR_ERR(sock);
 			goto outtinfo;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index db2312eeb2a4..dd2c43abf9e2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -83,6 +83,13 @@ void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
 	spin_lock(lock);
 	while (unlikely(nf_conntrack_locks_all)) {
 		spin_unlock(lock);
+
+		/*
+		 * Order the 'nf_conntrack_locks_all' load vs. the
+		 * spin_unlock_wait() loads below, to ensure
+		 * that 'nf_conntrack_locks_all_lock' is indeed held:
+		 */
+		smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
 		spin_unlock_wait(&nf_conntrack_locks_all_lock);
 		spin_lock(lock);
 	}
@@ -128,6 +135,14 @@ static void nf_conntrack_all_lock(void)
 	spin_lock(&nf_conntrack_locks_all_lock);
 	nf_conntrack_locks_all = true;
 
+	/*
+	 * Order the above store of 'nf_conntrack_locks_all' against
+	 * the spin_unlock_wait() loads below, such that if
+	 * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
+	 * we must observe nf_conntrack_locks[] held:
+	 */
+	smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
+
 	for (i = 0; i < CONNTRACK_LOCKS; i++) {
 		spin_unlock_wait(&nf_conntrack_locks[i]);
 	}
@@ -135,7 +150,13 @@ static void nf_conntrack_all_lock(void)
 
 static void nf_conntrack_all_unlock(void)
 {
-	nf_conntrack_locks_all = false;
+	/*
+	 * All prior stores must be complete before we clear
+	 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
+	 * might observe the false value but not the entire
+	 * critical section:
+	 */
+	smp_store_release(&nf_conntrack_locks_all, false);
 	spin_unlock(&nf_conntrack_locks_all_lock);
 }
 
@@ -327,16 +348,10 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
 
 	tmpl->status = IPS_TEMPLATE;
 	write_pnet(&tmpl->ct_net, net);
-
-	if (nf_ct_zone_add(tmpl, flags, zone) < 0)
-		goto out_free;
-
+	nf_ct_zone_add(tmpl, zone);
 	atomic_set(&tmpl->ct_general.use, 0);
 
 	return tmpl;
-out_free:
-	kfree(tmpl);
-	return NULL;
 }
 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
 
@@ -466,6 +481,23 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
 	       net_eq(net, nf_ct_net(ct));
 }
 
+/* must be called with rcu read lock held */
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+	struct hlist_nulls_head *hptr;
+	unsigned int sequence, hsz;
+
+	do {
+		sequence = read_seqcount_begin(&nf_conntrack_generation);
+		hsz = nf_conntrack_htable_size;
+		hptr = nf_conntrack_hash;
+	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+	*hash = hptr;
+	*hsize = hsz;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
+
 /*
  * Warning :
  * - Caller must take a reference on returned object
@@ -646,6 +678,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	if (l4proto->allow_clash &&
+	    !nfct_nat(ct) &&
 	    !nf_ct_is_dying(ct) &&
 	    atomic_inc_not_zero(&ct->ct_general.use)) {
 		nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
@@ -823,67 +856,69 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static unsigned int early_drop_list(struct net *net,
+				    struct hlist_nulls_head *head)
 {
-	/* Use oldest entry, which is roughly LRU */
 	struct nf_conntrack_tuple_hash *h;
-	struct nf_conn *tmp;
 	struct hlist_nulls_node *n;
-	unsigned int i, hash, sequence;
-	struct nf_conn *ct = NULL;
-	spinlock_t *lockp;
-	bool ret = false;
+	unsigned int drops = 0;
+	struct nf_conn *tmp;
 
-	i = 0;
+	hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+		tmp = nf_ct_tuplehash_to_ctrack(h);
 
-	local_bh_disable();
-restart:
-	sequence = read_seqcount_begin(&nf_conntrack_generation);
-	for (; i < NF_CT_EVICTION_RANGE; i++) {
-		hash = scale_hash(_hash++);
-		lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
-		nf_conntrack_lock(lockp);
-		if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
-			spin_unlock(lockp);
-			goto restart;
-		}
-		hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
-					       hnnode) {
-			tmp = nf_ct_tuplehash_to_ctrack(h);
-
-			if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
-			    !net_eq(nf_ct_net(tmp), net) ||
-			    nf_ct_is_dying(tmp))
-				continue;
-
-			if (atomic_inc_not_zero(&tmp->ct_general.use)) {
-				ct = tmp;
-				break;
-			}
-		}
+		if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+		    !net_eq(nf_ct_net(tmp), net) ||
+		    nf_ct_is_dying(tmp))
+			continue;
 
-		spin_unlock(lockp);
-		if (ct)
-			break;
+		if (!atomic_inc_not_zero(&tmp->ct_general.use))
+			continue;
+
+		/* kill only if still in same netns -- might have moved due to
+		 * SLAB_DESTROY_BY_RCU rules.
+		 *
+		 * We steal the timer reference.  If that fails timer has
+		 * already fired or someone else deleted it. Just drop ref
+		 * and move to next entry.
+		 */
+		if (net_eq(nf_ct_net(tmp), net) &&
+		    nf_ct_is_confirmed(tmp) &&
+		    del_timer(&tmp->timeout) &&
+		    nf_ct_delete(tmp, 0, 0))
+			drops++;
+
+		nf_ct_put(tmp);
 	}
 
-	local_bh_enable();
+	return drops;
+}
 
-	if (!ct)
-		return false;
+static noinline int early_drop(struct net *net, unsigned int _hash)
+{
+	unsigned int i;
 
-	/* kill only if in same netns -- might have moved due to
-	 * SLAB_DESTROY_BY_RCU rules
-	 */
-	if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
-		if (nf_ct_delete(ct, 0, 0)) {
-			NF_CT_STAT_INC_ATOMIC(net, early_drop);
-			ret = true;
+	for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
+		struct hlist_nulls_head *ct_hash;
+		unsigned hash, sequence, drops;
+
+		rcu_read_lock();
+		do {
+			sequence = read_seqcount_begin(&nf_conntrack_generation);
+			hash = scale_hash(_hash++);
+			ct_hash = nf_conntrack_hash;
+		} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+		drops = early_drop_list(net, &ct_hash[hash]);
+		rcu_read_unlock();
+
+		if (drops) {
+			NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
+			return true;
 		}
 	}
 
-	nf_ct_put(ct);
-	return ret;
+	return false;
 }
 
 static struct nf_conn *
@@ -929,16 +964,13 @@ __nf_conntrack_alloc(struct net *net,
 	       offsetof(struct nf_conn, proto) -
 	       offsetof(struct nf_conn, __nfct_init_offset[0]));
 
-	if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
-		goto out_free;
+	nf_ct_zone_add(ct, zone);
 
 	/* Because we use RCU lookups, we set ct_general.use to zero before
 	 * this is inserted in any list.
 	 */
 	atomic_set(&ct->ct_general.use, 0);
 	return ct;
-out_free:
-	kmem_cache_free(nf_conntrack_cachep, ct);
 out:
 	atomic_dec(&net->ct.count);
 	return ERR_PTR(-ENOMEM);
@@ -1342,14 +1374,6 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
 }
 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
 
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
-	.len	= sizeof(struct nf_conntrack_zone),
-	.align	= __alignof__(struct nf_conntrack_zone),
-	.id	= NF_CT_EXT_ZONE,
-};
-#endif
-
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 
 #include <linux/netfilter/nfnetlink.h>
@@ -1532,9 +1556,6 @@ void nf_conntrack_cleanup_end(void)
 
 	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
 
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	nf_ct_extend_unregister(&nf_ct_zone_extend);
-#endif
 	nf_conntrack_proto_fini();
 	nf_conntrack_seqadj_fini();
 	nf_conntrack_labels_fini();
@@ -1544,6 +1565,8 @@ void nf_conntrack_cleanup_end(void)
 	nf_conntrack_tstamp_fini();
 	nf_conntrack_acct_fini();
 	nf_conntrack_expect_fini();
+
+	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 /*
@@ -1599,8 +1622,15 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 	unsigned int nr_slots, i;
 	size_t sz;
 
+	if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head)))
+		return NULL;
+
 	BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
 	nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
+
+	if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head)))
+		return NULL;
+
 	sz = nr_slots * sizeof(struct hlist_nulls_head);
 	hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
 					get_order(sz));
@@ -1615,24 +1645,14 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 }
 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
 
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_hash_resize(unsigned int hashsize)
 {
-	int i, bucket, rc;
-	unsigned int hashsize, old_size;
+	int i, bucket;
+	unsigned int old_size;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
-	if (current->nsproxy->net_ns != &init_net)
-		return -EOPNOTSUPP;
-
-	/* On boot, we can set this without any fancy locking. */
-	if (!nf_conntrack_htable_size)
-		return param_set_uint(val, kp);
-
-	rc = kstrtouint(val, 0, &hashsize);
-	if (rc)
-		return rc;
 	if (!hashsize)
 		return -EINVAL;
 
@@ -1640,6 +1660,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hash)
 		return -ENOMEM;
 
+	old_size = nf_conntrack_htable_size;
+	if (old_size == hashsize) {
+		nf_ct_free_hashtable(hash, hashsize);
+		return 0;
+	}
+
 	local_bh_disable();
 	nf_conntrack_all_lock();
 	write_seqcount_begin(&nf_conntrack_generation);
@@ -1675,6 +1701,25 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	nf_ct_free_hashtable(old_hash, old_size);
 	return 0;
 }
+
+int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+{
+	unsigned int hashsize;
+	int rc;
+
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
+	/* On boot, we can set this without any fancy locking. */
+	if (!nf_conntrack_htable_size)
+		return param_set_uint(val, kp);
+
+	rc = kstrtouint(val, 0, &hashsize);
+	if (rc)
+		return rc;
+
+	return nf_conntrack_hash_resize(hashsize);
+}
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
@@ -1731,7 +1776,7 @@ int nf_conntrack_init_start(void)
 
 	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
 						sizeof(struct nf_conn), 0,
-						SLAB_DESTROY_BY_RCU, NULL);
+						SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
 	if (!nf_conntrack_cachep)
 		goto err_cachep;
 
@@ -1771,11 +1816,6 @@ int nf_conntrack_init_start(void)
 	if (ret < 0)
 		goto err_seqadj;
 
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	ret = nf_ct_extend_register(&nf_ct_zone_extend);
-	if (ret < 0)
-		goto err_extend;
-#endif
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto err_proto;
@@ -1791,10 +1831,6 @@ int nf_conntrack_init_start(void)
 	return 0;
 
 err_proto:
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	nf_ct_extend_unregister(&nf_ct_zone_extend);
-err_extend:
-#endif
 	nf_conntrack_seqadj_fini();
 err_seqadj:
 	nf_conntrack_labels_fini();
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 1a9545965c0d..02bcf00c2492 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -73,7 +73,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
 			     size_t var_alloc_len, gfp_t gfp)
 {
 	struct nf_ct_ext *old, *new;
-	int i, newlen, newoff;
+	int newlen, newoff;
 	struct nf_ct_ext_type *t;
 
 	/* Conntrack must not be confirmed to avoid races on reallocation. */
@@ -99,19 +99,8 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
 		return NULL;
 
 	if (new != old) {
-		for (i = 0; i < NF_CT_EXT_NUM; i++) {
-			if (!__nf_ct_ext_exist(old, i))
-				continue;
-
-			rcu_read_lock();
-			t = rcu_dereference(nf_ct_ext_types[i]);
-			if (t && t->move)
-				t->move((void *)new + new->offset[i],
-					(void *)old + old->offset[i]);
-			rcu_read_unlock();
-		}
 		kfree_rcu(old, rcu);
-		ct->ext = new;
+		rcu_assign_pointer(ct->ext, new);
 	}
 
 	new->offset[id] = newoff;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 19efeba02abb..43147005bea3 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -572,7 +572,7 @@ static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
 	return 0;
 }
 
-static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly;
+static struct nf_conntrack_helper ftp[MAX_PORTS * 2] __read_mostly;
 
 static const struct nf_conntrack_expect_policy ftp_exp_policy = {
 	.max_expected	= 1,
@@ -582,24 +582,13 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
 /* don't make this __exit, since it's called from __init ! */
 static void nf_conntrack_ftp_fini(void)
 {
-	int i, j;
-	for (i = 0; i < ports_c; i++) {
-		for (j = 0; j < 2; j++) {
-			if (ftp[i][j].me == NULL)
-				continue;
-
-			pr_debug("unregistering helper for pf: %d port: %d\n",
-				 ftp[i][j].tuple.src.l3num, ports[i]);
-			nf_conntrack_helper_unregister(&ftp[i][j]);
-		}
-	}
-
+	nf_conntrack_helpers_unregister(ftp, ports_c * 2);
 	kfree(ftp_buffer);
 }
 
 static int __init nf_conntrack_ftp_init(void)
 {
-	int i, j = -1, ret = 0;
+	int i, ret = 0;
 
 	ftp_buffer = kmalloc(65536, GFP_KERNEL);
 	if (!ftp_buffer)
@@ -611,32 +600,21 @@ static int __init nf_conntrack_ftp_init(void)
 	/* FIXME should be configurable whether IPv4 and IPv6 FTP connections
 		 are tracked or not - YK */
 	for (i = 0; i < ports_c; i++) {
-		ftp[i][0].tuple.src.l3num = PF_INET;
-		ftp[i][1].tuple.src.l3num = PF_INET6;
-		for (j = 0; j < 2; j++) {
-			ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master);
-			ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
-			ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
-			ftp[i][j].expect_policy = &ftp_exp_policy;
-			ftp[i][j].me = THIS_MODULE;
-			ftp[i][j].help = help;
-			ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr;
-			if (ports[i] == FTP_PORT)
-				sprintf(ftp[i][j].name, "ftp");
-			else
-				sprintf(ftp[i][j].name, "ftp-%d", ports[i]);
-
-			pr_debug("registering helper for pf: %d port: %d\n",
-				 ftp[i][j].tuple.src.l3num, ports[i]);
-			ret = nf_conntrack_helper_register(&ftp[i][j]);
-			if (ret) {
-				pr_err("failed to register helper for pf: %d port: %d\n",
-				       ftp[i][j].tuple.src.l3num, ports[i]);
-				ports_c = i;
-				nf_conntrack_ftp_fini();
-				return ret;
-			}
-		}
+		nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp",
+				  FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
+				  0, sizeof(struct nf_ct_ftp_master), help,
+				  nf_ct_ftp_from_nlattr, THIS_MODULE);
+		nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp",
+				  FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
+				  0, sizeof(struct nf_ct_ftp_master), help,
+				  nf_ct_ftp_from_nlattr, THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
+	if (ret < 0) {
+		pr_err("failed to register helpers\n");
+		kfree(ftp_buffer);
+		return ret;
 	}
 
 	return 0;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index bcd5ed6b7130..89b2e46925c4 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -846,9 +846,10 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
 	sz -= len;
 
 	/* Message Type */
-	if (sz < 1)
+	if (sz < 2)
 		return H323_ERROR_BOUND;
 	q931->MessageType = *p++;
+	sz--;
 	PRINT("MessageType = %02X\n", q931->MessageType);
 	if (*p & 0x80) {
 		p++;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 9511af04dc81..bb77a97961bf 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1272,19 +1272,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 	return NULL;
 }
 
-/****************************************************************************/
-static int set_expect_timeout(struct nf_conntrack_expect *exp,
-			      unsigned int timeout)
-{
-	if (!exp || !del_timer(&exp->timeout))
-		return 0;
-
-	exp->timeout.expires = jiffies + timeout * HZ;
-	add_timer(&exp->timeout);
-
-	return 1;
-}
-
 /****************************************************************************/
 static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
@@ -1486,7 +1473,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
 				 "timeout to %u seconds for",
 				 info->timeout);
 			nf_ct_dump_tuple(&exp->tuple);
-			set_expect_timeout(exp, info->timeout);
+			mod_timer(&exp->timeout, jiffies + info->timeout * HZ);
 		}
 		spin_unlock_bh(&nf_conntrack_expect_lock);
 	}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 196cb39649e1..b989b81ac156 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -387,13 +387,42 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
 static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 					     struct net *net)
+{
+	struct nf_conntrack_tuple_hash *h;
+	const struct hlist_nulls_node *nn;
+	int cpu;
+
+	/* Get rid of expecteds, set helpers to NULL. */
+	for_each_possible_cpu(cpu) {
+		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+		spin_lock_bh(&pcpu->lock);
+		hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
+			unhelp(h, me);
+		spin_unlock_bh(&pcpu->lock);
+	}
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
 	const struct hlist_node *next;
 	const struct hlist_nulls_node *nn;
+	unsigned int last_hsize;
+	spinlock_t *lock;
+	struct net *net;
 	unsigned int i;
-	int cpu;
+
+	mutex_lock(&nf_ct_helper_mutex);
+	hlist_del_rcu(&me->hnode);
+	nf_ct_helper_count--;
+	mutex_unlock(&nf_ct_helper_mutex);
+
+	/* Make sure every nothing is still using the helper unless its a
+	 * connection in the hash.
+	 */
+	synchronize_rcu();
 
 	/* Get rid of expectations */
 	spin_lock_bh(&nf_conntrack_expect_lock);
@@ -413,47 +442,85 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	}
 	spin_unlock_bh(&nf_conntrack_expect_lock);
 
-	/* Get rid of expecteds, set helpers to NULL. */
-	for_each_possible_cpu(cpu) {
-		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+	rtnl_lock();
+	for_each_net(net)
+		__nf_conntrack_helper_unregister(me, net);
+	rtnl_unlock();
 
-		spin_lock_bh(&pcpu->lock);
-		hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
-			unhelp(h, me);
-		spin_unlock_bh(&pcpu->lock);
-	}
 	local_bh_disable();
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
-		if (i < nf_conntrack_htable_size) {
-			hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
-				unhelp(h, me);
+restart:
+	last_hsize = nf_conntrack_htable_size;
+	for (i = 0; i < last_hsize; i++) {
+		lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
+		nf_conntrack_lock(lock);
+		if (last_hsize != nf_conntrack_htable_size) {
+			spin_unlock(lock);
+			goto restart;
 		}
-		spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+		hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
+			unhelp(h, me);
+		spin_unlock(lock);
 	}
 	local_bh_enable();
 }
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
 
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+void nf_ct_helper_init(struct nf_conntrack_helper *helper,
+		       u16 l3num, u16 protonum, const char *name,
+		       u16 default_port, u16 spec_port, u32 id,
+		       const struct nf_conntrack_expect_policy *exp_pol,
+		       u32 expect_class_max, u32 data_len,
+		       int (*help)(struct sk_buff *skb, unsigned int protoff,
+				   struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo),
+		       int (*from_nlattr)(struct nlattr *attr,
+					  struct nf_conn *ct),
+		       struct module *module)
 {
-	struct net *net;
+	helper->tuple.src.l3num = l3num;
+	helper->tuple.dst.protonum = protonum;
+	helper->tuple.src.u.all = htons(spec_port);
+	helper->expect_policy = exp_pol;
+	helper->expect_class_max = expect_class_max;
+	helper->data_len = data_len;
+	helper->help = help;
+	helper->from_nlattr = from_nlattr;
+	helper->me = module;
+
+	if (spec_port == default_port)
+		snprintf(helper->name, sizeof(helper->name), "%s", name);
+	else
+		snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id);
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_init);
 
-	mutex_lock(&nf_ct_helper_mutex);
-	hlist_del_rcu(&me->hnode);
-	nf_ct_helper_count--;
-	mutex_unlock(&nf_ct_helper_mutex);
+int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper,
+				  unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
 
-	/* Make sure every nothing is still using the helper unless its a
-	 * connection in the hash.
-	 */
-	synchronize_rcu();
+	for (i = 0; i < n; i++) {
+		err = nf_conntrack_helper_register(&helper[i]);
+		if (err < 0)
+			goto err;
+	}
 
-	rtnl_lock();
-	for_each_net(net)
-		__nf_conntrack_helper_unregister(me, net);
-	rtnl_unlock();
+	return err;
+err:
+	if (i > 0)
+		nf_conntrack_helpers_unregister(helper, i);
+	return err;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
+EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register);
+
+void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper,
+				unsigned int n)
+{
+	while (n-- > 0)
+		nf_conntrack_helper_unregister(&helper[n]);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister);
 
 static struct nf_ct_ext_type helper_extend __read_mostly = {
 	.len	= sizeof(struct nf_conn_help),
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index f97ac61d2536..1972a149f958 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -255,27 +255,18 @@ static int __init nf_conntrack_irc_init(void)
 		ports[ports_c++] = IRC_PORT;
 
 	for (i = 0; i < ports_c; i++) {
-		irc[i].tuple.src.l3num = AF_INET;
-		irc[i].tuple.src.u.tcp.port = htons(ports[i]);
-		irc[i].tuple.dst.protonum = IPPROTO_TCP;
-		irc[i].expect_policy = &irc_exp_policy;
-		irc[i].me = THIS_MODULE;
-		irc[i].help = help;
-
-		if (ports[i] == IRC_PORT)
-			sprintf(irc[i].name, "irc");
-		else
-			sprintf(irc[i].name, "irc-%u", i);
-
-		ret = nf_conntrack_helper_register(&irc[i]);
-		if (ret) {
-			pr_err("failed to register helper for pf: %u port: %u\n",
-			       irc[i].tuple.src.l3num, ports[i]);
-			ports_c = i;
-			nf_conntrack_irc_fini();
-			return ret;
-		}
+		nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc",
+				  IRC_PORT, ports[i], i, &irc_exp_policy,
+				  0, 0, help, NULL, THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(&irc[0], ports_c);
+	if (ret) {
+		pr_err("failed to register helpers\n");
+		kfree(irc_buffer);
+		return ret;
 	}
+
 	return 0;
 }
 
@@ -283,10 +274,7 @@ static int __init nf_conntrack_irc_init(void)
  * it is needed by the init function */
 static void nf_conntrack_irc_fini(void)
 {
-	int i;
-
-	for (i = 0; i < ports_c; i++)
-		nf_conntrack_helper_unregister(&irc[i]);
+	nf_conntrack_helpers_unregister(irc, ports_c);
 	kfree(irc_buffer);
 }
 
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 252e6a7cd2f1..bcab8bde7312 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -16,23 +16,6 @@
 
 static spinlock_t nf_connlabels_lock;
 
-int nf_connlabel_set(struct nf_conn *ct, u16 bit)
-{
-	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
-
-	if (!labels || BIT_WORD(bit) >= labels->words)
-		return -ENOSPC;
-
-	if (test_bit(bit, labels->bits))
-		return 0;
-
-	if (!test_and_set_bit(bit, labels->bits))
-		nf_conntrack_event_cache(IPCT_LABEL, ct);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_connlabel_set);
-
 static int replace_u32(u32 *address, u32 mask, u32 new)
 {
 	u32 old, tmp;
@@ -60,7 +43,7 @@ int nf_connlabels_replace(struct nf_conn *ct,
 	if (!labels)
 		return -ENOSPC;
 
-	size = labels->words * sizeof(long);
+	size = sizeof(labels->bits);
 	if (size < (words32 * sizeof(u32)))
 		words32 = size / sizeof(u32);
 
@@ -80,16 +63,11 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace);
 
 int nf_connlabels_get(struct net *net, unsigned int bits)
 {
-	size_t words;
-
-	words = BIT_WORD(bits) + 1;
-	if (words > NF_CT_LABELS_MAX_SIZE / sizeof(long))
+	if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long))
 		return -ERANGE;
 
 	spin_lock(&nf_connlabels_lock);
 	net->ct.labels_used++;
-	if (words > net->ct.label_words)
-		net->ct.label_words = words;
 	spin_unlock(&nf_connlabels_lock);
 
 	return 0;
@@ -100,8 +78,6 @@ void nf_connlabels_put(struct net *net)
 {
 	spin_lock(&nf_connlabels_lock);
 	net->ct.labels_used--;
-	if (net->ct.labels_used == 0)
-		net->ct.label_words = 0;
 	spin_unlock(&nf_connlabels_lock);
 }
 EXPORT_SYMBOL_GPL(nf_connlabels_put);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a18d1ceabad5..050bb3420a6b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -346,25 +346,25 @@ static inline int ctnetlink_label_size(const struct nf_conn *ct)
 
 	if (!labels)
 		return 0;
-	return nla_total_size(labels->words * sizeof(long));
+	return nla_total_size(sizeof(labels->bits));
 }
 
 static int
 ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
 {
 	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
-	unsigned int len, i;
+	unsigned int i;
 
 	if (!labels)
 		return 0;
 
-	len = labels->words * sizeof(long);
 	i = 0;
 	do {
 		if (labels->bits[i] != 0)
-			return nla_put(skb, CTA_LABELS, len, labels->bits);
+			return nla_put(skb, CTA_LABELS, sizeof(labels->bits),
+				       labels->bits);
 		i++;
-	} while (i < labels->words);
+	} while (i < ARRAY_SIZE(labels->bits));
 
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 3fcbaab83b3d..9dcb9ee9b97d 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -166,7 +166,7 @@ out:
 	return ret;
 }
 
-static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly;
+static struct nf_conntrack_helper sane[MAX_PORTS * 2] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sane_exp_policy = {
 	.max_expected	= 1,
@@ -176,22 +176,13 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
 /* don't make this __exit, since it's called from __init ! */
 static void nf_conntrack_sane_fini(void)
 {
-	int i, j;
-
-	for (i = 0; i < ports_c; i++) {
-		for (j = 0; j < 2; j++) {
-			pr_debug("unregistering helper for pf: %d port: %d\n",
-				 sane[i][j].tuple.src.l3num, ports[i]);
-			nf_conntrack_helper_unregister(&sane[i][j]);
-		}
-	}
-
+	nf_conntrack_helpers_unregister(sane, ports_c * 2);
 	kfree(sane_buffer);
 }
 
 static int __init nf_conntrack_sane_init(void)
 {
-	int i, j = -1, ret = 0;
+	int i, ret = 0;
 
 	sane_buffer = kmalloc(65536, GFP_KERNEL);
 	if (!sane_buffer)
@@ -203,31 +194,23 @@ static int __init nf_conntrack_sane_init(void)
 	/* FIXME should be configurable whether IPv4 and IPv6 connections
 		 are tracked or not - YK */
 	for (i = 0; i < ports_c; i++) {
-		sane[i][0].tuple.src.l3num = PF_INET;
-		sane[i][1].tuple.src.l3num = PF_INET6;
-		for (j = 0; j < 2; j++) {
-			sane[i][j].data_len = sizeof(struct nf_ct_sane_master);
-			sane[i][j].tuple.src.u.tcp.port = htons(ports[i]);
-			sane[i][j].tuple.dst.protonum = IPPROTO_TCP;
-			sane[i][j].expect_policy = &sane_exp_policy;
-			sane[i][j].me = THIS_MODULE;
-			sane[i][j].help = help;
-			if (ports[i] == SANE_PORT)
-				sprintf(sane[i][j].name, "sane");
-			else
-				sprintf(sane[i][j].name, "sane-%d", ports[i]);
-
-			pr_debug("registering helper for pf: %d port: %d\n",
-				 sane[i][j].tuple.src.l3num, ports[i]);
-			ret = nf_conntrack_helper_register(&sane[i][j]);
-			if (ret) {
-				pr_err("failed to register helper for pf: %d port: %d\n",
-				       sane[i][j].tuple.src.l3num, ports[i]);
-				ports_c = i;
-				nf_conntrack_sane_fini();
-				return ret;
-			}
-		}
+		nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane",
+				  SANE_PORT, ports[i], ports[i],
+				  &sane_exp_policy, 0,
+				  sizeof(struct nf_ct_sane_master), help, NULL,
+				  THIS_MODULE);
+		nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane",
+				  SANE_PORT, ports[i], ports[i],
+				  &sane_exp_policy, 0,
+				  sizeof(struct nf_ct_sane_master), help, NULL,
+				  THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(sane, ports_c * 2);
+	if (ret < 0) {
+		pr_err("failed to register helpers\n");
+		kfree(sane_buffer);
+		return ret;
 	}
 
 	return 0;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index f72ba5587588..8d9db9d4702b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1589,7 +1589,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
 	return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen);
 }
 
-static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
+static struct nf_conntrack_helper sip[MAX_PORTS * 4] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
 	[SIP_EXPECT_SIGNALLING] = {
@@ -1616,20 +1616,12 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
 
 static void nf_conntrack_sip_fini(void)
 {
-	int i, j;
-
-	for (i = 0; i < ports_c; i++) {
-		for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
-			if (sip[i][j].me == NULL)
-				continue;
-			nf_conntrack_helper_unregister(&sip[i][j]);
-		}
-	}
+	nf_conntrack_helpers_unregister(sip, ports_c * 4);
 }
 
 static int __init nf_conntrack_sip_init(void)
 {
-	int i, j, ret;
+	int i, ret;
 
 	if (ports_c == 0)
 		ports[ports_c++] = SIP_PORT;
@@ -1637,43 +1629,32 @@ static int __init nf_conntrack_sip_init(void)
 	for (i = 0; i < ports_c; i++) {
 		memset(&sip[i], 0, sizeof(sip[i]));
 
-		sip[i][0].tuple.src.l3num = AF_INET;
-		sip[i][0].tuple.dst.protonum = IPPROTO_UDP;
-		sip[i][0].help = sip_help_udp;
-		sip[i][1].tuple.src.l3num = AF_INET;
-		sip[i][1].tuple.dst.protonum = IPPROTO_TCP;
-		sip[i][1].help = sip_help_tcp;
-
-		sip[i][2].tuple.src.l3num = AF_INET6;
-		sip[i][2].tuple.dst.protonum = IPPROTO_UDP;
-		sip[i][2].help = sip_help_udp;
-		sip[i][3].tuple.src.l3num = AF_INET6;
-		sip[i][3].tuple.dst.protonum = IPPROTO_TCP;
-		sip[i][3].help = sip_help_tcp;
-
-		for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
-			sip[i][j].data_len = sizeof(struct nf_ct_sip_master);
-			sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
-			sip[i][j].expect_policy = sip_exp_policy;
-			sip[i][j].expect_class_max = SIP_EXPECT_MAX;
-			sip[i][j].me = THIS_MODULE;
-
-			if (ports[i] == SIP_PORT)
-				sprintf(sip[i][j].name, "sip");
-			else
-				sprintf(sip[i][j].name, "sip-%u", i);
-
-			pr_debug("port #%u: %u\n", i, ports[i]);
+		nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
+				  SIP_PORT, ports[i], i, sip_exp_policy,
+				  SIP_EXPECT_MAX,
+				  sizeof(struct nf_ct_sip_master), sip_help_udp,
+				  NULL, THIS_MODULE);
+		nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip",
+				  SIP_PORT, ports[i], i, sip_exp_policy,
+				  SIP_EXPECT_MAX,
+				  sizeof(struct nf_ct_sip_master), sip_help_tcp,
+				  NULL, THIS_MODULE);
+		nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip",
+				  SIP_PORT, ports[i], i, sip_exp_policy,
+				  SIP_EXPECT_MAX,
+				  sizeof(struct nf_ct_sip_master), sip_help_udp,
+				  NULL, THIS_MODULE);
+		nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip",
+				  SIP_PORT, ports[i], i, sip_exp_policy,
+				  SIP_EXPECT_MAX,
+				  sizeof(struct nf_ct_sip_master), sip_help_tcp,
+				  NULL, THIS_MODULE);
+	}
 
-			ret = nf_conntrack_helper_register(&sip[i][j]);
-			if (ret) {
-				pr_err("failed to register helper for pf: %u port: %u\n",
-				       sip[i][j].tuple.src.l3num, ports[i]);
-				ports_c = i;
-				nf_conntrack_sip_fini();
-				return ret;
-			}
-		}
+	ret = nf_conntrack_helpers_register(sip, ports_c * 4);
+	if (ret < 0) {
+		pr_err("failed to register helpers\n");
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index c026c472ea80..958a1455ca7f 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(print_tuple);
 
 struct ct_iter_state {
 	struct seq_net_private p;
+	struct hlist_nulls_head *hash;
+	unsigned int htable_size;
 	unsigned int bucket;
 	u_int64_t time_now;
 };
@@ -58,9 +60,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < st->htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+		n = rcu_dereference(
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
 		if (!is_a_nulls(n))
 			return n;
 	}
@@ -75,12 +78,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(hlist_nulls_next_rcu(head));
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= st->htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(
-				hlist_nulls_first_rcu(
-					&nf_conntrack_hash[st->bucket]));
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
 	}
 	return head;
 }
@@ -102,6 +104,8 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
 
 	st->time_now = ktime_get_real_ns();
 	rcu_read_lock();
+
+	nf_conntrack_get_ht(&st->hash, &st->htable_size);
 	return ct_get_idx(seq, *pos);
 }
 
@@ -434,8 +438,29 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
 
 #ifdef CONFIG_SYSCTL
 /* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
+static int log_invalid_proto_min __read_mostly;
+static int log_invalid_proto_max __read_mostly = 255;
+
+/* size the user *wants to set */
+static unsigned int nf_conntrack_htable_size_user __read_mostly;
+
+static int
+nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret < 0 || !write)
+		return ret;
+
+	/* update ret, we might not be able to satisfy request */
+	ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user);
+
+	/* update it to the actual value used by conntrack */
+	nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+	return ret;
+}
 
 static struct ctl_table_header *nf_ct_netfilter_header;
 
@@ -456,10 +481,10 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	},
 	{
 		.procname       = "nf_conntrack_buckets",
-		.data           = &nf_conntrack_htable_size,
+		.data           = &nf_conntrack_htable_size_user,
 		.maxlen         = sizeof(unsigned int),
-		.mode           = 0444,
-		.proc_handler   = proc_dointvec,
+		.mode           = 0644,
+		.proc_handler   = nf_conntrack_hash_sysctl,
 	},
 	{
 		.procname	= "nf_conntrack_checksum",
@@ -515,6 +540,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 	if (net->user_ns != &init_user_ns)
 		table[0].procname = NULL;
 
+	if (!net_eq(&init_net, net))
+		table[2].mode = 0444;
+
 	net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
 	if (!net->ct.sysctl_header)
 		goto out_unregister_netfilter;
@@ -604,6 +632,8 @@ static int __init nf_conntrack_standalone_init(void)
 		ret = -ENOMEM;
 		goto out_sysctl;
 	}
+
+	nf_conntrack_htable_size_user = nf_conntrack_htable_size;
 #endif
 
 	ret = register_pernet_subsys(&nf_conntrack_net_ops);
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 2e65b5430fba..b1227dc6f75e 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -97,7 +97,7 @@ static int tftp_help(struct sk_buff *skb,
 	return ret;
 }
 
-static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly;
+static struct nf_conntrack_helper tftp[MAX_PORTS * 2] __read_mostly;
 
 static const struct nf_conntrack_expect_policy tftp_exp_policy = {
 	.max_expected	= 1,
@@ -106,47 +106,29 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = {
 
 static void nf_conntrack_tftp_fini(void)
 {
-	int i, j;
-
-	for (i = 0; i < ports_c; i++) {
-		for (j = 0; j < 2; j++)
-			nf_conntrack_helper_unregister(&tftp[i][j]);
-	}
+	nf_conntrack_helpers_unregister(tftp, ports_c * 2);
 }
 
 static int __init nf_conntrack_tftp_init(void)
 {
-	int i, j, ret;
+	int i, ret;
 
 	if (ports_c == 0)
 		ports[ports_c++] = TFTP_PORT;
 
 	for (i = 0; i < ports_c; i++) {
-		memset(&tftp[i], 0, sizeof(tftp[i]));
-
-		tftp[i][0].tuple.src.l3num = AF_INET;
-		tftp[i][1].tuple.src.l3num = AF_INET6;
-		for (j = 0; j < 2; j++) {
-			tftp[i][j].tuple.dst.protonum = IPPROTO_UDP;
-			tftp[i][j].tuple.src.u.udp.port = htons(ports[i]);
-			tftp[i][j].expect_policy = &tftp_exp_policy;
-			tftp[i][j].me = THIS_MODULE;
-			tftp[i][j].help = tftp_help;
-
-			if (ports[i] == TFTP_PORT)
-				sprintf(tftp[i][j].name, "tftp");
-			else
-				sprintf(tftp[i][j].name, "tftp-%u", i);
-
-			ret = nf_conntrack_helper_register(&tftp[i][j]);
-			if (ret) {
-				pr_err("failed to register helper for pf: %u port: %u\n",
-				       tftp[i][j].tuple.src.l3num, ports[i]);
-				ports_c = i;
-				nf_conntrack_tftp_fini();
-				return ret;
-			}
-		}
+		nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp",
+				  TFTP_PORT, ports[i], i, &tftp_exp_policy,
+				  0, 0, tftp_help, NULL, THIS_MODULE);
+		nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp",
+				  TFTP_PORT, ports[i], i, &tftp_exp_policy,
+				  0, 0, tftp_help, NULL, THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(tftp, ports_c * 2);
+	if (ret < 0) {
+		pr_err("failed to register helpers\n");
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index a5d41dfa9f05..aa5847a16713 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -159,6 +159,20 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
 	struct nf_logger *logger;
 	int ret = -ENOENT;
 
+	if (pf == NFPROTO_INET) {
+		ret = nf_logger_find_get(NFPROTO_IPV4, type);
+		if (ret < 0)
+			return ret;
+
+		ret = nf_logger_find_get(NFPROTO_IPV6, type);
+		if (ret < 0) {
+			nf_logger_put(NFPROTO_IPV4, type);
+			return ret;
+		}
+
+		return 0;
+	}
+
 	if (rcu_access_pointer(loggers[pf][type]) == NULL)
 		request_module("nf-logger-%u-%u", pf, type);
 
@@ -167,7 +181,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
 	if (logger == NULL)
 		goto out;
 
-	if (logger && try_module_get(logger->me))
+	if (try_module_get(logger->me))
 		ret = 0;
 out:
 	rcu_read_unlock();
@@ -179,6 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
 {
 	struct nf_logger *logger;
 
+	if (pf == NFPROTO_INET) {
+		nf_logger_put(NFPROTO_IPV4, type);
+		nf_logger_put(NFPROTO_IPV6, type);
+		return;
+	}
+
 	BUG_ON(loggers[pf][type] == NULL);
 
 	rcu_read_lock();
@@ -398,16 +418,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
 {
 	const struct nf_logger *logger;
 	char buf[NFLOGGER_NAME_LEN];
-	size_t size = *lenp;
 	int r = 0;
 	int tindex = (unsigned long)table->extra1;
 	struct net *net = current->nsproxy->net_ns;
 
 	if (write) {
-		if (size > sizeof(buf))
-			size = sizeof(buf);
-		if (copy_from_user(buf, buffer, size))
-			return -EFAULT;
+		struct ctl_table tmp = *table;
+
+		tmp.data = buf;
+		r = proc_dostring(&tmp, write, buffer, lenp, ppos);
+		if (r)
+			return r;
 
 		if (!strcmp(buf, "NONE")) {
 			nf_log_unbind_pf(net, tindex);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6877a396f8fc..de31818417b8 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,17 +30,19 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
 
-static DEFINE_SPINLOCK(nf_nat_lock);
-
 static DEFINE_MUTEX(nf_nat_proto_mutex);
 static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
 						__read_mostly;
 static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
 						__read_mostly;
 
-static struct hlist_head *nf_nat_bysource __read_mostly;
-static unsigned int nf_nat_htable_size __read_mostly;
-static unsigned int nf_nat_hash_rnd __read_mostly;
+struct nf_nat_conn_key {
+	const struct net *net;
+	const struct nf_conntrack_tuple *tuple;
+	const struct nf_conntrack_zone *zone;
+};
+
+static struct rhashtable nf_nat_bysource_table;
 
 inline const struct nf_nat_l3proto *
 __nf_nat_l3proto_find(u8 family)
@@ -119,19 +121,17 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 EXPORT_SYMBOL(nf_xfrm_me_harder);
 #endif /* CONFIG_XFRM */
 
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
+static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
 {
-	unsigned int hash;
-
-	get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
+	const struct nf_conntrack_tuple *t;
+	const struct nf_conn *ct = data;
 
+	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 	/* Original src, to ensure we map it consistently if poss. */
-	hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
-		      tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
 
-	return reciprocal_scale(hash, nf_nat_htable_size);
+	seed ^= net_hash_mix(nf_ct_net(ct));
+	return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
+		      t->dst.protonum ^ seed);
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -187,6 +187,26 @@ same_src(const struct nf_conn *ct,
 		t->src.u.all == tuple->src.u.all);
 }
 
+static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
+			       const void *obj)
+{
+	const struct nf_nat_conn_key *key = arg->key;
+	const struct nf_conn *ct = obj;
+
+	return same_src(ct, key->tuple) &&
+	       net_eq(nf_ct_net(ct), key->net) &&
+	       nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL);
+}
+
+static struct rhashtable_params nf_nat_bysource_params = {
+	.head_offset = offsetof(struct nf_conn, nat_bysource),
+	.obj_hashfn = nf_nat_bysource_hash,
+	.obj_cmpfn = nf_nat_bysource_cmp,
+	.nelem_hint = 256,
+	.min_size = 1024,
+	.nulls_base = (1U << RHT_BASE_SHIFT),
+};
+
 /* Only called for SRC manip */
 static int
 find_appropriate_src(struct net *net,
@@ -197,25 +217,23 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(net, tuple);
-	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
+	struct nf_nat_conn_key key = {
+		.net = net,
+		.tuple = tuple,
+		.zone = zone
+	};
 
-	hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
-		ct = nat->ct;
-		if (same_src(ct, tuple) &&
-		    net_eq(net, nf_ct_net(ct)) &&
-		    nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
-			/* Copy source part from reply tuple. */
-			nf_ct_invert_tuplepr(result,
-				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-			result->dst = tuple->dst;
-
-			if (in_range(l3proto, l4proto, result, range))
-				return 1;
-		}
-	}
-	return 0;
+	ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key,
+				    nf_nat_bysource_params);
+	if (!ct)
+		return 0;
+
+	nf_ct_invert_tuplepr(result,
+			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	result->dst = tuple->dst;
+
+	return in_range(l3proto, l4proto, result, range);
 }
 
 /* For [FUTURE] fragmentation handling, we want the least-used
@@ -387,7 +405,6 @@ nf_nat_setup_info(struct nf_conn *ct,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype)
 {
-	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 	struct nf_conn_nat *nat;
 
@@ -428,17 +445,13 @@ nf_nat_setup_info(struct nf_conn *ct,
 	}
 
 	if (maniptype == NF_NAT_MANIP_SRC) {
-		unsigned int srchash;
-
-		srchash = hash_by_src(net,
-				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-		spin_lock_bh(&nf_nat_lock);
-		/* nf_conntrack_alter_reply might re-allocate extension aera */
-		nat = nfct_nat(ct);
-		nat->ct = ct;
-		hlist_add_head_rcu(&nat->bysource,
-				   &nf_nat_bysource[srchash]);
-		spin_unlock_bh(&nf_nat_lock);
+		int err;
+
+		err = rhashtable_insert_fast(&nf_nat_bysource_table,
+					     &ct->nat_bysource,
+					     nf_nat_bysource_params);
+		if (err)
+			return NF_DROP;
 	}
 
 	/* It's done. */
@@ -543,7 +556,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 	if (nf_nat_proto_remove(ct, data))
 		return 1;
 
-	if (!nat || !nat->ct)
+	if (!nat)
 		return 0;
 
 	/* This netns is being destroyed, and conntrack has nat null binding.
@@ -555,11 +568,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 	if (!del_timer(&ct->timeout))
 		return 1;
 
-	spin_lock_bh(&nf_nat_lock);
-	hlist_del_rcu(&nat->bysource);
 	ct->status &= ~IPS_NAT_DONE_MASK;
-	nat->ct = NULL;
-	spin_unlock_bh(&nf_nat_lock);
+
+	rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
+			       nf_nat_bysource_params);
 
 	add_timer(&ct->timeout);
 
@@ -688,35 +700,17 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
 	struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
 
-	if (nat == NULL || nat->ct == NULL)
-		return;
-
-	NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
-
-	spin_lock_bh(&nf_nat_lock);
-	hlist_del_rcu(&nat->bysource);
-	spin_unlock_bh(&nf_nat_lock);
-}
-
-static void nf_nat_move_storage(void *new, void *old)
-{
-	struct nf_conn_nat *new_nat = new;
-	struct nf_conn_nat *old_nat = old;
-	struct nf_conn *ct = old_nat->ct;
-
-	if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
+	if (!nat)
 		return;
 
-	spin_lock_bh(&nf_nat_lock);
-	hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
-	spin_unlock_bh(&nf_nat_lock);
+	rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
+			       nf_nat_bysource_params);
 }
 
 static struct nf_ct_ext_type nat_extend __read_mostly = {
 	.len		= sizeof(struct nf_conn_nat),
 	.align		= __alignof__(struct nf_conn_nat),
 	.destroy	= nf_nat_cleanup_conntrack,
-	.move		= nf_nat_move_storage,
 	.id		= NF_CT_EXT_NAT,
 	.flags		= NF_CT_EXT_F_PREALLOC,
 };
@@ -845,16 +839,13 @@ static int __init nf_nat_init(void)
 {
 	int ret;
 
-	/* Leave them the same for the moment. */
-	nf_nat_htable_size = nf_conntrack_htable_size;
-
-	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
-	if (!nf_nat_bysource)
-		return -ENOMEM;
+	ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
+	if (ret)
+		return ret;
 
 	ret = nf_ct_extend_register(&nat_extend);
 	if (ret < 0) {
-		nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+		rhashtable_destroy(&nf_nat_bysource_table);
 		printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
 		return ret;
 	}
@@ -878,7 +869,7 @@ static int __init nf_nat_init(void)
 	return 0;
 
  cleanup_extend:
-	nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+	rhashtable_destroy(&nf_nat_bysource_table);
 	nf_ct_extend_unregister(&nat_extend);
 	return ret;
 }
@@ -896,8 +887,8 @@ static void __exit nf_nat_cleanup(void)
 #endif
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		kfree(nf_nat_l4protos[i]);
-	synchronize_net();
-	nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+
+	rhashtable_destroy(&nf_nat_bysource_table);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7b7aa871a174..7e1c876c7608 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -131,29 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans)
 	kfree(trans);
 }
 
-static int nft_register_basechain(struct nft_base_chain *basechain,
-				  unsigned int hook_nops)
-{
-	struct net *net = read_pnet(&basechain->pnet);
-
-	if (basechain->flags & NFT_BASECHAIN_DISABLED)
-		return 0;
-
-	return nf_register_net_hooks(net, basechain->ops, hook_nops);
-}
-
-static void nft_unregister_basechain(struct nft_base_chain *basechain,
-				     unsigned int hook_nops)
-{
-	struct net *net = read_pnet(&basechain->pnet);
-
-	if (basechain->flags & NFT_BASECHAIN_DISABLED)
-		return;
-
-	nf_unregister_net_hooks(net, basechain->ops, hook_nops);
-}
-
-static int nf_tables_register_hooks(const struct nft_table *table,
+static int nf_tables_register_hooks(struct net *net,
+				    const struct nft_table *table,
 				    struct nft_chain *chain,
 				    unsigned int hook_nops)
 {
@@ -161,10 +140,12 @@ static int nf_tables_register_hooks(const struct nft_table *table,
 	    !(chain->flags & NFT_BASE_CHAIN))
 		return 0;
 
-	return nft_register_basechain(nft_base_chain(chain), hook_nops);
+	return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+				     hook_nops);
 }
 
-static void nf_tables_unregister_hooks(const struct nft_table *table,
+static void nf_tables_unregister_hooks(struct net *net,
+				       const struct nft_table *table,
 				       struct nft_chain *chain,
 				       unsigned int hook_nops)
 {
@@ -172,12 +153,9 @@ static void nf_tables_unregister_hooks(const struct nft_table *table,
 	    !(chain->flags & NFT_BASE_CHAIN))
 		return;
 
-	nft_unregister_basechain(nft_base_chain(chain), hook_nops);
+	nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
 }
 
-/* Internal table flags */
-#define NFT_TABLE_INACTIVE	(1 << 15)
-
 static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
 {
 	struct nft_trans *trans;
@@ -187,7 +165,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
 		return -ENOMEM;
 
 	if (msg_type == NFT_MSG_NEWTABLE)
-		ctx->table->flags |= NFT_TABLE_INACTIVE;
+		nft_activate_next(ctx->net, ctx->table);
 
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -201,7 +179,7 @@ static int nft_deltable(struct nft_ctx *ctx)
 	if (err < 0)
 		return err;
 
-	list_del_rcu(&ctx->table->list);
+	nft_deactivate_next(ctx->net, ctx->table);
 	return err;
 }
 
@@ -214,7 +192,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
 		return -ENOMEM;
 
 	if (msg_type == NFT_MSG_NEWCHAIN)
-		ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+		nft_activate_next(ctx->net, ctx->chain);
 
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -229,47 +207,17 @@ static int nft_delchain(struct nft_ctx *ctx)
 		return err;
 
 	ctx->table->use--;
-	list_del_rcu(&ctx->chain->list);
+	nft_deactivate_next(ctx->net, ctx->chain);
 
 	return err;
 }
 
-static inline bool
-nft_rule_is_active(struct net *net, const struct nft_rule *rule)
-{
-	return (rule->genmask & nft_genmask_cur(net)) == 0;
-}
-
-static inline int
-nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
-{
-	return (rule->genmask & nft_genmask_next(net)) == 0;
-}
-
-static inline void
-nft_rule_activate_next(struct net *net, struct nft_rule *rule)
-{
-	/* Now inactive, will be active in the future */
-	rule->genmask = nft_genmask_cur(net);
-}
-
-static inline void
-nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
-{
-	rule->genmask = nft_genmask_next(net);
-}
-
-static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
-{
-	rule->genmask &= ~nft_genmask_next(net);
-}
-
 static int
 nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
 {
 	/* You cannot delete the same rule twice */
-	if (nft_rule_is_active_next(ctx->net, rule)) {
-		nft_rule_deactivate_next(ctx->net, rule);
+	if (nft_is_active_next(ctx->net, rule)) {
+		nft_deactivate_next(ctx->net, rule);
 		ctx->chain->use--;
 		return 0;
 	}
@@ -322,9 +270,6 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
 	return 0;
 }
 
-/* Internal set flag */
-#define NFT_SET_INACTIVE	(1 << 15)
-
 static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
 			     struct nft_set *set)
 {
@@ -337,7 +282,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
 	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
 		nft_trans_set_id(trans) =
 			ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
-		set->flags |= NFT_SET_INACTIVE;
+		nft_activate_next(ctx->net, set);
 	}
 	nft_trans_set(trans) = set;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -353,7 +298,7 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
 	if (err < 0)
 		return err;
 
-	list_del_rcu(&set->list);
+	nft_deactivate_next(ctx->net, set);
 	ctx->table->use--;
 
 	return err;
@@ -364,26 +309,29 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
  */
 
 static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
-					  const struct nlattr *nla)
+					  const struct nlattr *nla,
+					  u8 genmask)
 {
 	struct nft_table *table;
 
 	list_for_each_entry(table, &afi->tables, list) {
-		if (!nla_strcmp(nla, table->name))
+		if (!nla_strcmp(nla, table->name) &&
+		    nft_active_genmask(table, genmask))
 			return table;
 	}
 	return NULL;
 }
 
 static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
-						const struct nlattr *nla)
+						const struct nlattr *nla,
+						u8 genmask)
 {
 	struct nft_table *table;
 
 	if (nla == NULL)
 		return ERR_PTR(-EINVAL);
 
-	table = nft_table_lookup(afi, nla);
+	table = nft_table_lookup(afi, nla, genmask);
 	if (table != NULL)
 		return table;
 
@@ -524,6 +472,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
 			if (idx > s_idx)
 				memset(&cb->args[1], 0,
 				       sizeof(cb->args) - sizeof(cb->args[0]));
+			if (!nft_is_active(net, table))
+				continue;
 			if (nf_tables_fill_table_info(skb, net,
 						      NETLINK_CB(cb->skb).portid,
 						      cb->nlh->nlmsg_seq,
@@ -548,6 +498,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_af_info *afi;
 	const struct nft_table *table;
 	struct sk_buff *skb2;
@@ -565,11 +516,9 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
-	if (table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -588,17 +537,21 @@ err:
 	return err;
 }
 
-static int nf_tables_table_enable(const struct nft_af_info *afi,
+static int nf_tables_table_enable(struct net *net,
+				  const struct nft_af_info *afi,
 				  struct nft_table *table)
 {
 	struct nft_chain *chain;
 	int err, i = 0;
 
 	list_for_each_entry(chain, &table->chains, list) {
+		if (!nft_is_active_next(net, chain))
+			continue;
 		if (!(chain->flags & NFT_BASE_CHAIN))
 			continue;
 
-		err = nft_register_basechain(nft_base_chain(chain), afi->nops);
+		err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+					    afi->nops);
 		if (err < 0)
 			goto err;
 
@@ -607,26 +560,34 @@ static int nf_tables_table_enable(const struct nft_af_info *afi,
 	return 0;
 err:
 	list_for_each_entry(chain, &table->chains, list) {
+		if (!nft_is_active_next(net, chain))
+			continue;
 		if (!(chain->flags & NFT_BASE_CHAIN))
 			continue;
 
 		if (i-- <= 0)
 			break;
 
-		nft_unregister_basechain(nft_base_chain(chain), afi->nops);
+		nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+					afi->nops);
 	}
 	return err;
 }
 
-static void nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(struct net *net,
+				    const struct nft_af_info *afi,
 				    struct nft_table *table)
 {
 	struct nft_chain *chain;
 
 	list_for_each_entry(chain, &table->chains, list) {
-		if (chain->flags & NFT_BASE_CHAIN)
-			nft_unregister_basechain(nft_base_chain(chain),
-						 afi->nops);
+		if (!nft_is_active_next(net, chain))
+			continue;
+		if (!(chain->flags & NFT_BASE_CHAIN))
+			continue;
+
+		nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+					afi->nops);
 	}
 }
 
@@ -656,7 +617,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 		nft_trans_table_enable(trans) = false;
 	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
 		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
-		ret = nf_tables_table_enable(ctx->afi, ctx->table);
+		ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
 		if (ret >= 0) {
 			ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
 			nft_trans_table_enable(trans) = true;
@@ -678,6 +639,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *name;
 	struct nft_af_info *afi;
 	struct nft_table *table;
@@ -691,7 +653,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 		return PTR_ERR(afi);
 
 	name = nla[NFTA_TABLE_NAME];
-	table = nf_tables_table_lookup(afi, name);
+	table = nf_tables_table_lookup(afi, name, genmask);
 	if (IS_ERR(table)) {
 		if (PTR_ERR(table) != -ENOENT)
 			return PTR_ERR(table);
@@ -699,8 +661,6 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	}
 
 	if (table != NULL) {
-		if (table->flags & NFT_TABLE_INACTIVE)
-			return -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -752,6 +712,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
 	struct nft_set *set, *ns;
 
 	list_for_each_entry(chain, &ctx->table->chains, list) {
+		if (!nft_is_active_next(ctx->net, chain))
+			continue;
+
 		ctx->chain = chain;
 
 		err = nft_delrule_by_chain(ctx);
@@ -760,6 +723,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
 	}
 
 	list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, set))
+			continue;
+
 		if (set->flags & NFT_SET_ANONYMOUS &&
 		    !list_empty(&set->bindings))
 			continue;
@@ -770,6 +736,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
 	}
 
 	list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
+		if (!nft_is_active_next(ctx->net, chain))
+			continue;
+
 		ctx->chain = chain;
 
 		err = nft_delchain(ctx);
@@ -795,6 +764,9 @@ static int nft_flush(struct nft_ctx *ctx, int family)
 
 		ctx->afi = afi;
 		list_for_each_entry_safe(table, nt, &afi->tables, list) {
+			if (!nft_is_active_next(ctx->net, table))
+				continue;
+
 			if (nla[NFTA_TABLE_NAME] &&
 			    nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
 				continue;
@@ -815,6 +787,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	int family = nfmsg->nfgen_family;
@@ -828,7 +801,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -875,12 +848,14 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
  */
 
 static struct nft_chain *
-nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
+				u8 genmask)
 {
 	struct nft_chain *chain;
 
 	list_for_each_entry(chain, &table->chains, list) {
-		if (chain->handle == handle)
+		if (chain->handle == handle &&
+		    nft_active_genmask(chain, genmask))
 			return chain;
 	}
 
@@ -888,7 +863,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
 }
 
 static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
-						const struct nlattr *nla)
+						const struct nlattr *nla,
+						u8 genmask)
 {
 	struct nft_chain *chain;
 
@@ -896,7 +872,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
 		return ERR_PTR(-EINVAL);
 
 	list_for_each_entry(chain, &table->chains, list) {
-		if (!nla_strcmp(nla, chain->name))
+		if (!nla_strcmp(nla, chain->name) &&
+		    nft_active_genmask(chain, genmask))
 			return chain;
 	}
 
@@ -1079,6 +1056,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
 				if (idx > s_idx)
 					memset(&cb->args[1], 0,
 					       sizeof(cb->args) - sizeof(cb->args[0]));
+				if (!nft_is_active(net, chain))
+					continue;
 				if (nf_tables_fill_chain_info(skb, net,
 							      NETLINK_CB(cb->skb).portid,
 							      cb->nlh->nlmsg_seq,
@@ -1104,6 +1083,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_af_info *afi;
 	const struct nft_table *table;
 	const struct nft_chain *chain;
@@ -1122,17 +1102,13 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
-	if (table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
-	if (chain->flags & NFT_CHAIN_INACTIVE)
-		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -1231,6 +1207,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	struct nft_chain *chain;
 	struct nft_base_chain *basechain = NULL;
 	struct nlattr *ha[NFTA_HOOK_MAX + 1];
+	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
 	struct net_device *dev = NULL;
 	u8 policy = NF_ACCEPT;
@@ -1247,7 +1224,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -1256,11 +1233,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
 	if (nla[NFTA_CHAIN_HANDLE]) {
 		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
-		chain = nf_tables_chain_lookup_byhandle(table, handle);
+		chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 	} else {
-		chain = nf_tables_chain_lookup(table, name);
+		chain = nf_tables_chain_lookup(table, name, genmask);
 		if (IS_ERR(chain)) {
 			if (PTR_ERR(chain) != -ENOENT)
 				return PTR_ERR(chain);
@@ -1291,16 +1268,20 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 		struct nft_stats *stats = NULL;
 		struct nft_trans *trans;
 
-		if (chain->flags & NFT_CHAIN_INACTIVE)
-			return -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
-		if (nla[NFTA_CHAIN_HANDLE] && name &&
-		    !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
-			return -EEXIST;
+		if (nla[NFTA_CHAIN_HANDLE] && name) {
+			struct nft_chain *chain2;
+
+			chain2 = nf_tables_chain_lookup(table,
+							nla[NFTA_CHAIN_NAME],
+							genmask);
+			if (IS_ERR(chain2))
+				return PTR_ERR(chain2);
+		}
 
 		if (nla[NFTA_CHAIN_COUNTERS]) {
 			if (!(chain->flags & NFT_BASE_CHAIN))
@@ -1424,7 +1405,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 			rcu_assign_pointer(basechain->stats, stats);
 		}
 
-		write_pnet(&basechain->pnet, net);
 		basechain->type = type;
 		chain = &basechain->chain;
 
@@ -1455,7 +1435,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	chain->table = table;
 	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
 
-	err = nf_tables_register_hooks(table, chain, afi->nops);
+	err = nf_tables_register_hooks(net, table, chain, afi->nops);
 	if (err < 0)
 		goto err1;
 
@@ -1468,7 +1448,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	list_add_tail_rcu(&chain->list, &table->chains);
 	return 0;
 err2:
-	nf_tables_unregister_hooks(table, chain, afi->nops);
+	nf_tables_unregister_hooks(net, table, chain, afi->nops);
 err1:
 	nf_tables_chain_destroy(chain);
 	return err;
@@ -1479,6 +1459,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -1489,11 +1470,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 	if (chain->use > 0)
@@ -1724,9 +1705,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
 
 	err = nf_tables_newexpr(ctx, &info, expr);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	return expr;
+err3:
+	kfree(expr);
 err2:
 	module_put(info.ops->type->owner);
 err1:
@@ -1876,10 +1859,16 @@ err:
 	return err;
 }
 
+struct nft_rule_dump_ctx {
+	char table[NFT_TABLE_MAXNAMELEN];
+	char chain[NFT_CHAIN_MAXNAMELEN];
+};
+
 static int nf_tables_dump_rules(struct sk_buff *skb,
 				struct netlink_callback *cb)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_rule_dump_ctx *ctx = cb->data;
 	const struct nft_af_info *afi;
 	const struct nft_table *table;
 	const struct nft_chain *chain;
@@ -1896,9 +1885,17 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 			continue;
 
 		list_for_each_entry_rcu(table, &afi->tables, list) {
+			if (ctx && ctx->table[0] &&
+			    strcmp(ctx->table, table->name) != 0)
+				continue;
+
 			list_for_each_entry_rcu(chain, &table->chains, list) {
+				if (ctx && ctx->chain[0] &&
+				    strcmp(ctx->chain, chain->name) != 0)
+					continue;
+
 				list_for_each_entry_rcu(rule, &chain->rules, list) {
-					if (!nft_rule_is_active(net, rule))
+					if (!nft_is_active(net, rule))
 						goto cont;
 					if (idx < s_idx)
 						goto cont;
@@ -1926,11 +1923,18 @@ done:
 	return skb->len;
 }
 
+static int nf_tables_dump_rules_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
 static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_af_info *afi;
 	const struct nft_table *table;
 	const struct nft_chain *chain;
@@ -1942,7 +1946,25 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nf_tables_dump_rules,
+			.done = nf_tables_dump_rules_done,
 		};
+
+		if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
+			struct nft_rule_dump_ctx *ctx;
+
+			ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+			if (!ctx)
+				return -ENOMEM;
+
+			if (nla[NFTA_RULE_TABLE])
+				nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
+					    sizeof(ctx->table));
+			if (nla[NFTA_RULE_CHAIN])
+				nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
+					    sizeof(ctx->chain));
+			c.data = ctx;
+		}
+
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 
@@ -1950,17 +1972,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
-	if (table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
-	if (chain->flags & NFT_CHAIN_INACTIVE)
-		return -ENOENT;
 
 	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 	if (IS_ERR(rule))
@@ -2009,6 +2027,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -2029,11 +2048,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 
@@ -2102,7 +2121,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	if (rule == NULL)
 		goto err1;
 
-	nft_rule_activate_next(net, rule);
+	nft_activate_next(net, rule);
 
 	rule->handle = handle;
 	rule->dlen   = size;
@@ -2124,14 +2143,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		if (nft_rule_is_active_next(net, old_rule)) {
+		if (nft_is_active_next(net, old_rule)) {
 			trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
 						   old_rule);
 			if (trans == NULL) {
 				err = -ENOMEM;
 				goto err2;
 			}
-			nft_rule_deactivate_next(net, old_rule);
+			nft_deactivate_next(net, old_rule);
 			chain->use--;
 			list_add_tail_rcu(&rule->list, &old_rule->list);
 		} else {
@@ -2174,6 +2193,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain = NULL;
@@ -2185,12 +2205,13 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	if (nla[NFTA_RULE_CHAIN]) {
-		chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+		chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN],
+					       genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 	}
@@ -2210,6 +2231,9 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 		}
 	} else {
 		list_for_each_entry(chain, &table->chains, list) {
+			if (!nft_is_active_next(net, chain))
+				continue;
+
 			ctx.chain = chain;
 			err = nft_delrule_by_chain(&ctx);
 			if (err < 0)
@@ -2339,7 +2363,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
 static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 				     const struct sk_buff *skb,
 				     const struct nlmsghdr *nlh,
-				     const struct nlattr * const nla[])
+				     const struct nlattr * const nla[],
+				     u8 genmask)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nft_af_info *afi = NULL;
@@ -2355,7 +2380,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 		if (afi == NULL)
 			return -EAFNOSUPPORT;
 
-		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE],
+					       genmask);
 		if (IS_ERR(table))
 			return PTR_ERR(table);
 	}
@@ -2365,7 +2391,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 }
 
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-				     const struct nlattr *nla)
+				     const struct nlattr *nla, u8 genmask)
 {
 	struct nft_set *set;
 
@@ -2373,22 +2399,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 		return ERR_PTR(-EINVAL);
 
 	list_for_each_entry(set, &table->sets, list) {
-		if (!nla_strcmp(nla, set->name))
+		if (!nla_strcmp(nla, set->name) &&
+		    nft_active_genmask(set, genmask))
 			return set;
 	}
 	return ERR_PTR(-ENOENT);
 }
 
 struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-					  const struct nlattr *nla)
+					  const struct nlattr *nla,
+					  u8 genmask)
 {
 	struct nft_trans *trans;
 	u32 id = ntohl(nla_get_be32(nla));
 
 	list_for_each_entry(trans, &net->nft.commit_list, list) {
+		struct nft_set *set = nft_trans_set(trans);
+
 		if (trans->msg_type == NFT_MSG_NEWSET &&
-		    id == nft_trans_set_id(trans))
-			return nft_trans_set(trans);
+		    id == nft_trans_set_id(trans) &&
+		    nft_active_genmask(set, genmask))
+			return set;
 	}
 	return ERR_PTR(-ENOENT);
 }
@@ -2413,6 +2444,8 @@ cont:
 		list_for_each_entry(i, &ctx->table->sets, list) {
 			int tmp;
 
+			if (!nft_is_active_next(ctx->net, set))
+				continue;
 			if (!sscanf(i->name, name, &tmp))
 				continue;
 			if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
@@ -2432,6 +2465,8 @@ cont:
 
 	snprintf(set->name, sizeof(set->name), name, min + n);
 	list_for_each_entry(i, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, i))
+			continue;
 		if (!strcmp(set->name, i->name))
 			return -ENFILE;
 	}
@@ -2580,6 +2615,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
 			list_for_each_entry_rcu(set, &table->sets, list) {
 				if (idx < s_idx)
 					goto cont;
+				if (!nft_is_active(net, set))
+					goto cont;
 
 				ctx_set = *ctx;
 				ctx_set.table = table;
@@ -2616,6 +2653,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
 {
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
 	struct nft_ctx ctx;
 	struct sk_buff *skb2;
@@ -2623,7 +2661,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 	int err;
 
 	/* Verify existence before starting dump */
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
 
@@ -2650,11 +2688,9 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 	if (!nla[NFTA_SET_TABLE])
 		return -EINVAL;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-	if (set->flags & NFT_SET_INACTIVE)
-		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb2 == NULL)
@@ -2693,6 +2729,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 			    const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	const struct nft_set_ops *ops;
 	struct nft_af_info *afi;
 	struct nft_table *table;
@@ -2790,13 +2827,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
 
-	set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
+	set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set)) {
 		if (PTR_ERR(set) != -ENOENT)
 			return PTR_ERR(set);
@@ -2843,7 +2880,6 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	}
 
 	INIT_LIST_HEAD(&set->bindings);
-	write_pnet(&set->pnet, net);
 	set->ops   = ops;
 	set->ktype = ktype;
 	set->klen  = desc.klen;
@@ -2895,6 +2931,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 			    const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	int err;
@@ -2904,11 +2941,11 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_TABLE] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 	if (!list_empty(&set->bindings))
@@ -2946,24 +2983,20 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		 * jumps are already validated for that chain.
 		 */
 		list_for_each_entry(i, &set->bindings, list) {
-			if (binding->flags & NFT_SET_MAP &&
+			if (i->flags & NFT_SET_MAP &&
 			    i->chain == binding->chain)
 				goto bind;
 		}
 
+		iter.genmask	= nft_genmask_next(ctx->net);
 		iter.skip 	= 0;
 		iter.count	= 0;
 		iter.err	= 0;
 		iter.fn		= nf_tables_bind_check_setelem;
 
 		set->ops->walk(ctx, set, &iter);
-		if (iter.err < 0) {
-			/* Destroy anonymous sets if binding fails */
-			if (set->flags & NFT_SET_ANONYMOUS)
-				nf_tables_set_destroy(ctx, set);
-
+		if (iter.err < 0)
 			return iter.err;
-		}
 	}
 bind:
 	binding->chain = ctx->chain;
@@ -2977,7 +3010,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 	list_del_rcu(&binding->list);
 
 	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
-	    !(set->flags & NFT_SET_INACTIVE))
+	    nft_is_active(ctx->net, set))
 		nf_tables_set_destroy(ctx, set);
 }
 
@@ -3033,7 +3066,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
 static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
 				      const struct sk_buff *skb,
 				      const struct nlmsghdr *nlh,
-				      const struct nlattr * const nla[])
+				      const struct nlattr * const nla[],
+				      u8 genmask)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nft_af_info *afi;
@@ -3043,7 +3077,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE],
+				       genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -3140,6 +3175,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
 static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
 	struct nft_set_dump_args args;
 	struct nft_ctx ctx;
@@ -3156,17 +3192,14 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 		return err;
 
 	err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
-					 (void *)nla);
+					 (void *)nla, genmask);
 	if (err < 0)
 		return err;
-	if (ctx.table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-	if (set->flags & NFT_SET_INACTIVE)
-		return -ENOENT;
 
 	event  = NFT_MSG_NEWSETELEM;
 	event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -3192,12 +3225,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	args.cb		= cb;
-	args.skb	= skb;
-	args.iter.skip	= cb->args[0];
-	args.iter.count	= 0;
-	args.iter.err   = 0;
-	args.iter.fn	= nf_tables_dump_setelem;
+	args.cb			= cb;
+	args.skb		= skb;
+	args.iter.genmask	= nft_genmask_cur(ctx.net);
+	args.iter.skip		= cb->args[0];
+	args.iter.count		= 0;
+	args.iter.err		= 0;
+	args.iter.fn		= nf_tables_dump_setelem;
 	set->ops->walk(&ctx, set, &args.iter);
 
 	nla_nest_end(skb, nest);
@@ -3219,21 +3253,19 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
 	struct nft_ctx ctx;
 	int err;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
-	if (ctx.table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-	if (set->flags & NFT_SET_INACTIVE)
-		return -ENOENT;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -3526,7 +3558,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 		goto err4;
 
 	ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
-	err = set->ops->insert(set, &elem);
+	err = set->ops->insert(ctx->net, set, &elem);
 	if (err < 0)
 		goto err5;
 
@@ -3551,6 +3583,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
@@ -3559,15 +3592,17 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
 	if (IS_ERR(set)) {
 		if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
 			set = nf_tables_set_lookup_byid(net,
-					nla[NFTA_SET_ELEM_LIST_SET_ID]);
+					nla[NFTA_SET_ELEM_LIST_SET_ID],
+					genmask);
 		}
 		if (IS_ERR(set))
 			return PTR_ERR(set);
@@ -3647,7 +3682,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 		goto err3;
 	}
 
-	priv = set->ops->deactivate(set, &elem);
+	priv = set->ops->deactivate(ctx->net, set, &elem);
 	if (priv == NULL) {
 		err = -ENOENT;
 		goto err4;
@@ -3673,6 +3708,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
@@ -3681,11 +3717,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
@@ -3953,36 +3990,40 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
 				if (!nft_trans_table_enable(trans)) {
-					nf_tables_table_disable(trans->ctx.afi,
+					nf_tables_table_disable(net,
+								trans->ctx.afi,
 								trans->ctx.table);
 					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
 				}
 			} else {
-				trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+				nft_clear(net, trans->ctx.table);
 			}
 			nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELTABLE:
+			list_del_rcu(&trans->ctx.table->list);
 			nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
 			break;
 		case NFT_MSG_NEWCHAIN:
 			if (nft_trans_chain_update(trans))
 				nft_chain_commit_update(trans);
 			else
-				trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+				nft_clear(net, trans->ctx.chain);
 
 			nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELCHAIN:
+			list_del_rcu(&trans->ctx.chain->list);
 			nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
-			nf_tables_unregister_hooks(trans->ctx.table,
+			nf_tables_unregister_hooks(trans->ctx.net,
+						   trans->ctx.table,
 						   trans->ctx.chain,
 						   trans->ctx.afi->nops);
 			break;
 		case NFT_MSG_NEWRULE:
-			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+			nft_clear(trans->ctx.net, nft_trans_rule(trans));
 			nf_tables_rule_notify(&trans->ctx,
 					      nft_trans_rule(trans),
 					      NFT_MSG_NEWRULE);
@@ -3995,7 +4036,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 					      NFT_MSG_DELRULE);
 			break;
 		case NFT_MSG_NEWSET:
-			nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+			nft_clear(net, nft_trans_set(trans));
 			/* This avoids hitting -EBUSY when deleting the table
 			 * from the transaction.
 			 */
@@ -4008,13 +4049,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELSET:
+			list_del_rcu(&nft_trans_set(trans)->list);
 			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
 					     NFT_MSG_DELSET, GFP_KERNEL);
 			break;
 		case NFT_MSG_NEWSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
-			te->set->ops->activate(te->set, &te->elem);
+			te->set->ops->activate(net, te->set, &te->elem);
 			nf_tables_setelem_notify(&trans->ctx, te->set,
 						 &te->elem,
 						 NFT_MSG_NEWSETELEM, 0);
@@ -4079,7 +4121,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
 				if (nft_trans_table_enable(trans)) {
-					nf_tables_table_disable(trans->ctx.afi,
+					nf_tables_table_disable(net,
+								trans->ctx.afi,
 								trans->ctx.table);
 					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
 				}
@@ -4089,8 +4132,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 			}
 			break;
 		case NFT_MSG_DELTABLE:
-			list_add_tail_rcu(&trans->ctx.table->list,
-					  &trans->ctx.afi->tables);
+			nft_clear(trans->ctx.net, trans->ctx.table);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWCHAIN:
@@ -4101,15 +4143,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 			} else {
 				trans->ctx.table->use--;
 				list_del_rcu(&trans->ctx.chain->list);
-				nf_tables_unregister_hooks(trans->ctx.table,
+				nf_tables_unregister_hooks(trans->ctx.net,
+							   trans->ctx.table,
 							   trans->ctx.chain,
 							   trans->ctx.afi->nops);
 			}
 			break;
 		case NFT_MSG_DELCHAIN:
 			trans->ctx.table->use++;
-			list_add_tail_rcu(&trans->ctx.chain->list,
-					  &trans->ctx.table->chains);
+			nft_clear(trans->ctx.net, trans->ctx.chain);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWRULE:
@@ -4118,7 +4160,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 			break;
 		case NFT_MSG_DELRULE:
 			trans->ctx.chain->use++;
-			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+			nft_clear(trans->ctx.net, nft_trans_rule(trans));
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSET:
@@ -4127,8 +4169,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 			break;
 		case NFT_MSG_DELSET:
 			trans->ctx.table->use++;
-			list_add_tail_rcu(&nft_trans_set(trans)->list,
-					  &trans->ctx.table->sets);
+			nft_clear(trans->ctx.net, nft_trans_set(trans));
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSETELEM:
@@ -4140,7 +4181,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
-			te->set->ops->activate(te->set, &te->elem);
+			te->set->ops->activate(net, te->set, &te->elem);
 			te->set->ndeact--;
 
 			nft_trans_destroy(trans);
@@ -4275,6 +4316,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 	}
 
 	list_for_each_entry(set, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, set))
+			continue;
 		if (!(set->flags & NFT_SET_MAP) ||
 		    set->dtype != NFT_DATA_VERDICT)
 			continue;
@@ -4284,6 +4327,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 			    binding->chain != chain)
 				continue;
 
+			iter.genmask	= nft_genmask_next(ctx->net);
 			iter.skip 	= 0;
 			iter.count	= 0;
 			iter.err	= 0;
@@ -4432,6 +4476,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
 static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 			    struct nft_data_desc *desc, const struct nlattr *nla)
 {
+	u8 genmask = nft_genmask_next(ctx->net);
 	struct nlattr *tb[NFTA_VERDICT_MAX + 1];
 	struct nft_chain *chain;
 	int err;
@@ -4464,7 +4509,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 		if (!tb[NFTA_VERDICT_CHAIN])
 			return -EINVAL;
 		chain = nf_tables_chain_lookup(ctx->table,
-					       tb[NFTA_VERDICT_CHAIN]);
+					       tb[NFTA_VERDICT_CHAIN], genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 		if (chain->flags & NFT_BASE_CHAIN)
@@ -4642,7 +4687,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
 
 	BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN));
 
-	nf_tables_unregister_hooks(ctx->chain->table, ctx->chain,
+	nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
 				   ctx->afi->nops);
 	list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
 		list_del(&rule->list);
@@ -4671,7 +4716,8 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
 
 	list_for_each_entry_safe(table, nt, &afi->tables, list) {
 		list_for_each_entry(chain, &table->chains, list)
-			nf_tables_unregister_hooks(table, chain, afi->nops);
+			nf_tables_unregister_hooks(net, table, chain,
+						   afi->nops);
 		/* No packets are walking on these chains anymore. */
 		ctx.table = table;
 		list_for_each_entry(chain, &table->chains, list) {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index e9f8dffcc244..fb8b5892b5ff 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -143,7 +143,7 @@ next_rule:
 	list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
 
 		/* This rule is not active, skip. */
-		if (unlikely(rule->genmask & (1 << gencursor)))
+		if (unlikely(rule->genmask & gencursor))
 			continue;
 
 		rulenum++;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 3c84f14326f5..4cdcd969b64c 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -303,16 +303,24 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
 {
 	struct nf_conntrack_tuple_hash *h;
 	const struct hlist_nulls_node *nn;
+	unsigned int last_hsize;
+	spinlock_t *lock;
 	int i;
 
 	local_bh_disable();
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
-		if (i < nf_conntrack_htable_size) {
-			hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
-				untimeout(h, timeout);
+restart:
+	last_hsize = nf_conntrack_htable_size;
+	for (i = 0; i < last_hsize; i++) {
+		lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
+		nf_conntrack_lock(lock);
+		if (last_hsize != nf_conntrack_htable_size) {
+			spin_unlock(lock);
+			goto restart;
 		}
-		spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+
+		hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
+			untimeout(h, timeout);
+		spin_unlock(lock);
 	}
 	local_bh_enable();
 }
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11f81c8385fc..cbcfdfb586a6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -700,10 +700,13 @@ nfulnl_log_packet(struct net *net,
 		break;
 
 	case NFULNL_COPY_PACKET:
-		if (inst->copy_range > skb->len)
+		data_len = inst->copy_range;
+		if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
+		    (li->u.ulog.copy_len < data_len))
+			data_len = li->u.ulog.copy_len;
+
+		if (data_len > skb->len)
 			data_len = skb->len;
-		else
-			data_len = inst->copy_range;
 
 		size += nla_total_size(data_len);
 		break;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 6228c422c766..c21e7eb8dce0 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -23,6 +23,20 @@
 #include <linux/netfilter_arp/arp_tables.h>
 #include <net/netfilter/nf_tables.h>
 
+struct nft_xt {
+	struct list_head	head;
+	struct nft_expr_ops	ops;
+	unsigned int		refcnt;
+};
+
+static void nft_xt_put(struct nft_xt *xt)
+{
+	if (--xt->refcnt == 0) {
+		list_del(&xt->head);
+		kfree(xt);
+	}
+}
+
 static int nft_compat_chain_validate_dependency(const char *tablename,
 						const struct nft_chain *chain)
 {
@@ -260,6 +274,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 
+	nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
 	module_put(target->me);
 }
 
@@ -442,6 +457,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 
+	nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
 	module_put(match->me);
 }
 
@@ -612,11 +628,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
 
 static LIST_HEAD(nft_match_list);
 
-struct nft_xt {
-	struct list_head	head;
-	struct nft_expr_ops	ops;
-};
-
 static struct nft_expr_type nft_match_type;
 
 static bool nft_match_cmp(const struct xt_match *match,
@@ -634,6 +645,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	struct xt_match *match;
 	char *mt_name;
 	u32 rev, family;
+	int err;
 
 	if (tb[NFTA_MATCH_NAME] == NULL ||
 	    tb[NFTA_MATCH_REV] == NULL ||
@@ -652,6 +664,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 			if (!try_module_get(match->me))
 				return ERR_PTR(-ENOENT);
 
+			nft_match->refcnt++;
 			return &nft_match->ops;
 		}
 	}
@@ -660,14 +673,19 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	if (IS_ERR(match))
 		return ERR_PTR(-ENOENT);
 
-	if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO]))
-		return ERR_PTR(-EINVAL);
+	if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) {
+		err = -EINVAL;
+		goto err;
+	}
 
 	/* This is the first time we use this match, allocate operations */
 	nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
-	if (nft_match == NULL)
-		return ERR_PTR(-ENOMEM);
+	if (nft_match == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
 
+	nft_match->refcnt = 1;
 	nft_match->ops.type = &nft_match_type;
 	nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
 	nft_match->ops.eval = nft_match_eval;
@@ -680,14 +698,9 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	list_add(&nft_match->head, &nft_match_list);
 
 	return &nft_match->ops;
-}
-
-static void nft_match_release(void)
-{
-	struct nft_xt *nft_match, *tmp;
-
-	list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head)
-		kfree(nft_match);
+err:
+	module_put(match->me);
+	return ERR_PTR(err);
 }
 
 static struct nft_expr_type nft_match_type __read_mostly = {
@@ -717,6 +730,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	struct xt_target *target;
 	char *tg_name;
 	u32 rev, family;
+	int err;
 
 	if (tb[NFTA_TARGET_NAME] == NULL ||
 	    tb[NFTA_TARGET_REV] == NULL ||
@@ -735,6 +749,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 			if (!try_module_get(target->me))
 				return ERR_PTR(-ENOENT);
 
+			nft_target->refcnt++;
 			return &nft_target->ops;
 		}
 	}
@@ -743,14 +758,19 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	if (IS_ERR(target))
 		return ERR_PTR(-ENOENT);
 
-	if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO]))
-		return ERR_PTR(-EINVAL);
+	if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) {
+		err = -EINVAL;
+		goto err;
+	}
 
 	/* This is the first time we use this target, allocate operations */
 	nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
-	if (nft_target == NULL)
-		return ERR_PTR(-ENOMEM);
+	if (nft_target == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
 
+	nft_target->refcnt = 1;
 	nft_target->ops.type = &nft_target_type;
 	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
 	nft_target->ops.init = nft_target_init;
@@ -767,14 +787,9 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	list_add(&nft_target->head, &nft_target_list);
 
 	return &nft_target->ops;
-}
-
-static void nft_target_release(void)
-{
-	struct nft_xt *nft_target, *tmp;
-
-	list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head)
-		kfree(nft_target);
+err:
+	module_put(target->me);
+	return ERR_PTR(err);
 }
 
 static struct nft_expr_type nft_target_type __read_mostly = {
@@ -819,8 +834,6 @@ static void __exit nft_compat_module_exit(void)
 	nfnetlink_subsys_unregister(&nfnl_compat_subsys);
 	nft_unregister_expr(&nft_target_type);
 	nft_unregister_expr(&nft_match_type);
-	nft_match_release();
-	nft_target_release();
 }
 
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 137e308d5b24..51e180f2a003 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -54,7 +54,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 	const struct nf_conn_help *help;
 	const struct nf_conntrack_tuple *tuple;
 	const struct nf_conntrack_helper *helper;
-	long diff;
 	unsigned int state;
 
 	ct = nf_ct_get(pkt->skb, &ctinfo);
@@ -94,10 +93,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 		return;
 #endif
 	case NFT_CT_EXPIRATION:
-		diff = (long)jiffies - (long)ct->timeout.expires;
-		if (diff < 0)
-			diff = 0;
-		*dest = jiffies_to_msecs(diff);
+		*dest = jiffies_to_msecs(nf_ct_expires(ct));
 		return;
 	case NFT_CT_HELPER:
 		if (ct->master == NULL)
@@ -113,18 +109,11 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 #ifdef CONFIG_NF_CONNTRACK_LABELS
 	case NFT_CT_LABELS: {
 		struct nf_conn_labels *labels = nf_ct_labels_find(ct);
-		unsigned int size;
 
-		if (!labels) {
+		if (labels)
+			memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
+		else
 			memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
-			return;
-		}
-
-		size = labels->words * sizeof(long);
-		memcpy(dest, labels->bits, size);
-		if (size < NF_CT_LABELS_MAX_SIZE)
-			memset(((char *) dest) + size, 0,
-			       NF_CT_LABELS_MAX_SIZE - size);
 		return;
 	}
 #endif
@@ -355,6 +344,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
 	if (err < 0)
 		return err;
 
+	if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS)
+		nf_ct_set_acct(ctx->net, true);
+
 	return 0;
 }
 
@@ -363,6 +355,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_ct *priv = nft_expr_priv(expr);
+	bool label_got = false;
 	unsigned int len;
 	int err;
 
@@ -381,6 +374,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
 		err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
 		if (err)
 			return err;
+		label_got = true;
 		break;
 #endif
 	default:
@@ -390,17 +384,28 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
 	priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
 	err = nft_validate_register_load(priv->sreg, len);
 	if (err < 0)
-		return err;
+		goto err1;
 
 	err = nft_ct_l3proto_try_module_get(ctx->afi->family);
 	if (err < 0)
-		return err;
+		goto err1;
 
 	return 0;
+
+err1:
+	if (label_got)
+		nf_connlabels_put(ctx->net);
+	return err;
+}
+
+static void nft_ct_get_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	nft_ct_l3proto_module_put(ctx->afi->family);
 }
 
-static void nft_ct_destroy(const struct nft_ctx *ctx,
-			   const struct nft_expr *expr)
+static void nft_ct_set_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
 {
 	struct nft_ct *priv = nft_expr_priv(expr);
 
@@ -472,7 +477,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
 	.eval		= nft_ct_get_eval,
 	.init		= nft_ct_get_init,
-	.destroy	= nft_ct_destroy,
+	.destroy	= nft_ct_get_destroy,
 	.dump		= nft_ct_get_dump,
 };
 
@@ -481,7 +486,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
 	.eval		= nft_ct_set_eval,
 	.init		= nft_ct_set_init,
-	.destroy	= nft_ct_destroy,
+	.destroy	= nft_ct_set_destroy,
 	.dump		= nft_ct_set_dump,
 };
 
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 78d4914fb39c..0af26699bf04 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -103,6 +103,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_dynset *priv = nft_expr_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
 	struct nft_set *set;
 	u64 timeout;
 	int err;
@@ -112,11 +113,13 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	    tb[NFTA_DYNSET_SREG_KEY] == NULL)
 		return -EINVAL;
 
-	set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+	set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
+				   genmask);
 	if (IS_ERR(set)) {
 		if (tb[NFTA_DYNSET_SET_ID])
 			set = nf_tables_set_lookup_byid(ctx->net,
-							tb[NFTA_DYNSET_SET_ID]);
+							tb[NFTA_DYNSET_SET_ID],
+							genmask);
 		if (IS_ERR(set))
 			return PTR_ERR(set);
 	}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 6fa016564f90..564fa7929ed5 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -71,13 +71,13 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
 	return 0;
 }
 
-static bool nft_hash_lookup(const struct nft_set *set, const u32 *key,
-			    const struct nft_set_ext **ext)
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+			    const u32 *key, const struct nft_set_ext **ext)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	const struct nft_hash_elem *he;
 	struct nft_hash_cmp_arg arg = {
-		.genmask = nft_genmask_cur(read_pnet(&set->pnet)),
+		.genmask = nft_genmask_cur(net),
 		.set	 = set,
 		.key	 = key,
 	};
@@ -125,13 +125,13 @@ err1:
 	return false;
 }
 
-static int nft_hash_insert(const struct nft_set *set,
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
 			   const struct nft_set_elem *elem)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	struct nft_hash_elem *he = elem->priv;
 	struct nft_hash_cmp_arg arg = {
-		.genmask = nft_genmask_next(read_pnet(&set->pnet)),
+		.genmask = nft_genmask_next(net),
 		.set	 = set,
 		.key	 = elem->key.val.data,
 	};
@@ -140,22 +140,23 @@ static int nft_hash_insert(const struct nft_set *set,
 					    nft_hash_params);
 }
 
-static void nft_hash_activate(const struct nft_set *set,
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
 			      const struct nft_set_elem *elem)
 {
 	struct nft_hash_elem *he = elem->priv;
 
-	nft_set_elem_change_active(set, &he->ext);
+	nft_set_elem_change_active(net, set, &he->ext);
 	nft_set_elem_clear_busy(&he->ext);
 }
 
-static void *nft_hash_deactivate(const struct nft_set *set,
+static void *nft_hash_deactivate(const struct net *net,
+				 const struct nft_set *set,
 				 const struct nft_set_elem *elem)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	struct nft_hash_elem *he;
 	struct nft_hash_cmp_arg arg = {
-		.genmask = nft_genmask_next(read_pnet(&set->pnet)),
+		.genmask = nft_genmask_next(net),
 		.set	 = set,
 		.key	 = elem->key.val.data,
 	};
@@ -163,8 +164,9 @@ static void *nft_hash_deactivate(const struct nft_set *set,
 	rcu_read_lock();
 	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
 	if (he != NULL) {
-		if (!nft_set_elem_mark_busy(&he->ext))
-			nft_set_elem_change_active(set, &he->ext);
+		if (!nft_set_elem_mark_busy(&he->ext) ||
+		    !nft_is_active(net, &he->ext))
+			nft_set_elem_change_active(net, set, &he->ext);
 		else
 			he = NULL;
 	}
@@ -189,7 +191,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 	struct nft_hash_elem *he;
 	struct rhashtable_iter hti;
 	struct nft_set_elem elem;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 	int err;
 
 	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
@@ -218,7 +219,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 			goto cont;
 		if (nft_set_elem_expired(&he->ext))
 			goto cont;
-		if (!nft_set_elem_active(&he->ext, genmask))
+		if (!nft_set_elem_active(&he->ext, iter->genmask))
 			goto cont;
 
 		elem.priv = he;
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 319c22b4bca2..24a73bb26e94 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -52,7 +52,14 @@ static int nft_log_init(const struct nft_ctx *ctx,
 	struct nft_log *priv = nft_expr_priv(expr);
 	struct nf_loginfo *li = &priv->loginfo;
 	const struct nlattr *nla;
-	int ret;
+	int err;
+
+	li->type = NF_LOG_TYPE_LOG;
+	if (tb[NFTA_LOG_LEVEL] != NULL &&
+	    tb[NFTA_LOG_GROUP] != NULL)
+		return -EINVAL;
+	if (tb[NFTA_LOG_GROUP] != NULL)
+		li->type = NF_LOG_TYPE_ULOG;
 
 	nla = tb[NFTA_LOG_PREFIX];
 	if (nla != NULL) {
@@ -64,13 +71,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
 		priv->prefix = (char *)nft_log_null_prefix;
 	}
 
-	li->type = NF_LOG_TYPE_LOG;
-	if (tb[NFTA_LOG_LEVEL] != NULL &&
-	    tb[NFTA_LOG_GROUP] != NULL)
-		return -EINVAL;
-	if (tb[NFTA_LOG_GROUP] != NULL)
-		li->type = NF_LOG_TYPE_ULOG;
-
 	switch (li->type) {
 	case NF_LOG_TYPE_LOG:
 		if (tb[NFTA_LOG_LEVEL] != NULL) {
@@ -79,6 +79,11 @@ static int nft_log_init(const struct nft_ctx *ctx,
 		} else {
 			li->u.log.level = LOGLEVEL_WARNING;
 		}
+		if (li->u.log.level > LOGLEVEL_DEBUG) {
+			err = -EINVAL;
+			goto err1;
+		}
+
 		if (tb[NFTA_LOG_FLAGS] != NULL) {
 			li->u.log.logflags =
 				ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
@@ -87,6 +92,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
 	case NF_LOG_TYPE_ULOG:
 		li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
 		if (tb[NFTA_LOG_SNAPLEN] != NULL) {
+			li->u.ulog.flags |= NF_LOG_F_COPY_LEN;
 			li->u.ulog.copy_len =
 				ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
 		}
@@ -97,20 +103,16 @@ static int nft_log_init(const struct nft_ctx *ctx,
 		break;
 	}
 
-	if (ctx->afi->family == NFPROTO_INET) {
-		ret = nf_logger_find_get(NFPROTO_IPV4, li->type);
-		if (ret < 0)
-			return ret;
+	err = nf_logger_find_get(ctx->afi->family, li->type);
+	if (err < 0)
+		goto err1;
 
-		ret = nf_logger_find_get(NFPROTO_IPV6, li->type);
-		if (ret < 0) {
-			nf_logger_put(NFPROTO_IPV4, li->type);
-			return ret;
-		}
-		return 0;
-	}
+	return 0;
 
-	return nf_logger_find_get(ctx->afi->family, li->type);
+err1:
+	if (priv->prefix != nft_log_null_prefix)
+		kfree(priv->prefix);
+	return err;
 }
 
 static void nft_log_destroy(const struct nft_ctx *ctx,
@@ -122,12 +124,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
 	if (priv->prefix != nft_log_null_prefix)
 		kfree(priv->prefix);
 
-	if (ctx->afi->family == NFPROTO_INET) {
-		nf_logger_put(NFPROTO_IPV4, li->type);
-		nf_logger_put(NFPROTO_IPV6, li->type);
-	} else {
-		nf_logger_put(ctx->afi->family, li->type);
-	}
+	nf_logger_put(ctx->afi->family, li->type);
 }
 
 static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -153,7 +150,7 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
 		if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
 			goto nla_put_failure;
 
-		if (li->u.ulog.copy_len) {
+		if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) {
 			if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
 					 htonl(li->u.ulog.copy_len)))
 				goto nla_put_failure;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index b3c31ef8015d..e164325d1bc0 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -22,6 +22,7 @@ struct nft_lookup {
 	struct nft_set			*set;
 	enum nft_registers		sreg:8;
 	enum nft_registers		dreg:8;
+	bool				invert;
 	struct nft_set_binding		binding;
 };
 
@@ -32,14 +33,20 @@ static void nft_lookup_eval(const struct nft_expr *expr,
 	const struct nft_lookup *priv = nft_expr_priv(expr);
 	const struct nft_set *set = priv->set;
 	const struct nft_set_ext *ext;
+	bool found;
 
-	if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
-		if (set->flags & NFT_SET_MAP)
-			nft_data_copy(&regs->data[priv->dreg],
-				      nft_set_ext_data(ext), set->dlen);
+	found = set->ops->lookup(pkt->net, set, &regs->data[priv->sreg], &ext) ^
+		priv->invert;
+
+	if (!found) {
+		regs->verdict.code = NFT_BREAK;
 		return;
 	}
-	regs->verdict.code = NFT_BREAK;
+
+	if (found && set->flags & NFT_SET_MAP)
+		nft_data_copy(&regs->data[priv->dreg],
+			      nft_set_ext_data(ext), set->dlen);
+
 }
 
 static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
@@ -47,6 +54,7 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
 	[NFTA_LOOKUP_SET_ID]	= { .type = NLA_U32 },
 	[NFTA_LOOKUP_SREG]	= { .type = NLA_U32 },
 	[NFTA_LOOKUP_DREG]	= { .type = NLA_U32 },
+	[NFTA_LOOKUP_FLAGS]	= { .type = NLA_U32 },
 };
 
 static int nft_lookup_init(const struct nft_ctx *ctx,
@@ -54,18 +62,21 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_lookup *priv = nft_expr_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
 	struct nft_set *set;
+	u32 flags;
 	int err;
 
 	if (tb[NFTA_LOOKUP_SET] == NULL ||
 	    tb[NFTA_LOOKUP_SREG] == NULL)
 		return -EINVAL;
 
-	set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
+	set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
 	if (IS_ERR(set)) {
 		if (tb[NFTA_LOOKUP_SET_ID]) {
 			set = nf_tables_set_lookup_byid(ctx->net,
-							tb[NFTA_LOOKUP_SET_ID]);
+							tb[NFTA_LOOKUP_SET_ID],
+							genmask);
 		}
 		if (IS_ERR(set))
 			return PTR_ERR(set);
@@ -79,7 +90,22 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 	if (err < 0)
 		return err;
 
+	if (tb[NFTA_LOOKUP_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
+
+		if (flags & ~NFT_LOOKUP_F_INV)
+			return -EINVAL;
+
+		if (flags & NFT_LOOKUP_F_INV) {
+			if (set->flags & NFT_SET_MAP)
+				return -EINVAL;
+			priv->invert = true;
+		}
+	}
+
 	if (tb[NFTA_LOOKUP_DREG] != NULL) {
+		if (priv->invert)
+			return -EINVAL;
 		if (!(set->flags & NFT_SET_MAP))
 			return -EINVAL;
 
@@ -112,6 +138,7 @@ static void nft_lookup_destroy(const struct nft_ctx *ctx,
 static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_lookup *priv = nft_expr_priv(expr);
+	u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0;
 
 	if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
 		goto nla_put_failure;
@@ -120,6 +147,8 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	if (priv->set->flags & NFT_SET_MAP)
 		if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
 			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 16c50b0dd426..2863f3493038 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -199,13 +199,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(nft_meta_get_eval);
 
-/* don't change or set _LOOPBACK, _USER, etc. */
-static bool pkt_type_ok(u32 p)
-{
-	return p == PACKET_HOST || p == PACKET_BROADCAST ||
-	       p == PACKET_MULTICAST || p == PACKET_OTHERHOST;
-}
-
 void nft_meta_set_eval(const struct nft_expr *expr,
 		       struct nft_regs *regs,
 		       const struct nft_pktinfo *pkt)
@@ -223,11 +216,11 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_PKTTYPE:
 		if (skb->pkt_type != value &&
-		    pkt_type_ok(value) && pkt_type_ok(skb->pkt_type))
+		    skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type))
 			skb->pkt_type = value;
 		break;
 	case NFT_META_NFTRACE:
-		skb->nf_trace = 1;
+		skb->nf_trace = !!value;
 		break;
 	default:
 		WARN_ON(1);
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index f762094af7c1..6473936d05c6 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -41,13 +41,13 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
 	return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
 }
 
-static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
-			      const struct nft_set_ext **ext)
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+			      const u32 *key, const struct nft_set_ext **ext)
 {
 	const struct nft_rbtree *priv = nft_set_priv(set);
 	const struct nft_rbtree_elem *rbe, *interval = NULL;
+	u8 genmask = nft_genmask_cur(net);
 	const struct rb_node *parent;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 	const void *this;
 	int d;
 
@@ -93,13 +93,13 @@ out:
 	return false;
 }
 
-static int __nft_rbtree_insert(const struct nft_set *set,
+static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 			       struct nft_rbtree_elem *new)
 {
 	struct nft_rbtree *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_rbtree_elem *rbe;
 	struct rb_node *parent, **p;
-	u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
 	int d;
 
 	parent = NULL;
@@ -132,14 +132,14 @@ static int __nft_rbtree_insert(const struct nft_set *set,
 	return 0;
 }
 
-static int nft_rbtree_insert(const struct nft_set *set,
+static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 			     const struct nft_set_elem *elem)
 {
 	struct nft_rbtree_elem *rbe = elem->priv;
 	int err;
 
 	spin_lock_bh(&nft_rbtree_lock);
-	err = __nft_rbtree_insert(set, rbe);
+	err = __nft_rbtree_insert(net, set, rbe);
 	spin_unlock_bh(&nft_rbtree_lock);
 
 	return err;
@@ -156,21 +156,23 @@ static void nft_rbtree_remove(const struct nft_set *set,
 	spin_unlock_bh(&nft_rbtree_lock);
 }
 
-static void nft_rbtree_activate(const struct nft_set *set,
+static void nft_rbtree_activate(const struct net *net,
+				const struct nft_set *set,
 				const struct nft_set_elem *elem)
 {
 	struct nft_rbtree_elem *rbe = elem->priv;
 
-	nft_set_elem_change_active(set, &rbe->ext);
+	nft_set_elem_change_active(net, set, &rbe->ext);
 }
 
-static void *nft_rbtree_deactivate(const struct nft_set *set,
+static void *nft_rbtree_deactivate(const struct net *net,
+				   const struct nft_set *set,
 				   const struct nft_set_elem *elem)
 {
 	const struct nft_rbtree *priv = nft_set_priv(set);
 	const struct rb_node *parent = priv->root.rb_node;
 	struct nft_rbtree_elem *rbe, *this = elem->priv;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
+	u8 genmask = nft_genmask_next(net);
 	int d;
 
 	while (parent != NULL) {
@@ -196,7 +198,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
 				parent = parent->rb_right;
 				continue;
 			}
-			nft_set_elem_change_active(set, &rbe->ext);
+			nft_set_elem_change_active(net, set, &rbe->ext);
 			return rbe;
 		}
 	}
@@ -211,7 +213,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 	struct nft_rbtree_elem *rbe;
 	struct nft_set_elem elem;
 	struct rb_node *node;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 
 	spin_lock_bh(&nft_rbtree_lock);
 	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
@@ -219,7 +220,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
 		if (iter->count < iter->skip)
 			goto cont;
-		if (!nft_set_elem_active(&rbe->ext, genmask))
+		if (!nft_set_elem_active(&rbe->ext, iter->genmask))
 			goto cont;
 
 		elem.priv = rbe;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2675d580c490..e0aa7c1d0224 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -702,6 +702,56 @@ int xt_check_entry_offsets(const void *base,
 }
 EXPORT_SYMBOL(xt_check_entry_offsets);
 
+/**
+ * xt_alloc_entry_offsets - allocate array to store rule head offsets
+ *
+ * @size: number of entries
+ *
+ * Return: NULL or kmalloc'd or vmalloc'd array
+ */
+unsigned int *xt_alloc_entry_offsets(unsigned int size)
+{
+	unsigned int *off;
+
+	off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN);
+
+	if (off)
+		return off;
+
+	if (size < (SIZE_MAX / sizeof(unsigned int)))
+		off = vmalloc(size * sizeof(unsigned int));
+
+	return off;
+}
+EXPORT_SYMBOL(xt_alloc_entry_offsets);
+
+/**
+ * xt_find_jump_offset - check if target is a valid jump offset
+ *
+ * @offsets: array containing all valid rule start offsets of a rule blob
+ * @target: the jump target to search for
+ * @size: entries in @offset
+ */
+bool xt_find_jump_offset(const unsigned int *offsets,
+			 unsigned int target, unsigned int size)
+{
+	int m, low = 0, hi = size;
+
+	while (hi > low) {
+		m = (low + hi) / 2u;
+
+		if (offsets[m] > target)
+			hi = m;
+		else if (offsets[m] < target)
+			low = m + 1;
+		else
+			return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(xt_find_jump_offset);
+
 int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
@@ -1460,6 +1510,9 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
 	uint8_t hooknum;
 	struct nf_hook_ops *ops;
 
+	if (!num_hooks)
+		return ERR_PTR(-EINVAL);
+
 	ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
 	if (ops == NULL)
 		return ERR_PTR(-ENOMEM);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a1fa2c800cb9..018eed7e1ff1 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -33,6 +33,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	li.u.ulog.group	     = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
+	if (info->flags & XT_NFLOG_F_COPY_LEN)
+		li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
+
 	nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
 			  par->out, &li, info->prefix);
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 604df6fae6fc..515131f9e021 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -137,7 +137,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 	cfg.est.ewma_log	= info->ewma_log;
 
 	ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
-				&est->lock, &cfg.opt);
+				&est->lock, NULL, &cfg.opt);
 	if (ret < 0)
 		goto err2;
 
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index df48967af382..858d189a1303 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -4,12 +4,23 @@
 #include <linux/skbuff.h>
 
 #include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
 
 MODULE_DESCRIPTION("Xtables: packet flow tracing");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
+static int trace_tg_check(const struct xt_tgchk_param *par)
+{
+	return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+}
+
+static void trace_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_logger_put(par->family, NF_LOG_TYPE_LOG);
+}
+
 static unsigned int
 trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -18,12 +29,14 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static struct xt_target trace_tg_reg __read_mostly = {
-	.name       = "TRACE",
-	.revision   = 0,
-	.family     = NFPROTO_UNSPEC,
-	.table      = "raw",
-	.target     = trace_tg,
-	.me         = THIS_MODULE,
+	.name		= "TRACE",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.table		= "raw",
+	.target		= trace_tg,
+	.checkentry	= trace_tg_check,
+	.destroy	= trace_tg_destroy,
+	.me		= THIS_MODULE,
 };
 
 static int __init trace_tg_init(void)
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index a79af255561a..03d66f1c5e69 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <linux/netfilter/x_tables.h>
 
@@ -18,21 +19,12 @@ MODULE_DESCRIPTION("Xtables: add/match connection trackling labels");
 MODULE_ALIAS("ipt_connlabel");
 MODULE_ALIAS("ip6t_connlabel");
 
-static bool connlabel_match(const struct nf_conn *ct, u16 bit)
-{
-	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
-
-	if (!labels)
-		return false;
-
-	return BIT_WORD(bit) < labels->words && test_bit(bit, labels->bits);
-}
-
 static bool
 connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_connlabel_mtinfo *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
+	struct nf_conn_labels *labels;
 	struct nf_conn *ct;
 	bool invert = info->options & XT_CONNLABEL_OP_INVERT;
 
@@ -40,10 +32,21 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (ct == NULL || nf_ct_is_untracked(ct))
 		return invert;
 
-	if (info->options & XT_CONNLABEL_OP_SET)
-		return (nf_connlabel_set(ct, info->bit) == 0) ^ invert;
+	labels = nf_ct_labels_find(ct);
+	if (!labels)
+		return invert;
+
+	if (test_bit(info->bit, labels->bits))
+		return !invert;
+
+	if (info->options & XT_CONNLABEL_OP_SET) {
+		if (!test_and_set_bit(info->bit, labels->bits))
+			nf_conntrack_event_cache(IPCT_LABEL, ct);
+
+		return !invert;
+	}
 
-	return connlabel_match(ct, info->bit) ^ invert;
+	return invert;
 }
 
 static int connlabel_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 1302b475abcb..a20e731b5b6c 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -21,11 +21,39 @@
 static int owner_check(const struct xt_mtchk_param *par)
 {
 	struct xt_owner_match_info *info = par->matchinfo;
+	struct net *net = par->net;
 
-	/* For now only allow adding matches from the initial user namespace */
+	/* Only allow the common case where the userns of the writer
+	 * matches the userns of the network namespace.
+	 */
 	if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
-	    (current_user_ns() != &init_user_ns))
+	    (current_user_ns() != net->user_ns))
 		return -EINVAL;
+
+	/* Ensure the uids are valid */
+	if (info->match & XT_OWNER_UID) {
+		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
+
+		if (!uid_valid(uid_min) || !uid_valid(uid_max) ||
+		    (info->uid_max < info->uid_min) ||
+		    uid_lt(uid_max, uid_min)) {
+			return -EINVAL;
+		}
+	}
+
+	/* Ensure the gids are valid */
+	if (info->match & XT_OWNER_GID) {
+		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
+
+		if (!gid_valid(gid_min) || !gid_valid(gid_max) ||
+		    (info->gid_max < info->gid_min) ||
+		    gid_lt(gid_max, gid_min)) {
+			return -EINVAL;
+		}
+	}
+
 	return 0;
 }
 
@@ -35,6 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
 	struct sock *sk = skb_to_full_sk(skb);
+	struct net *net = par->net;
 
 	if (sk == NULL || sk->sk_socket == NULL)
 		return (info->match ^ info->invert) == 0;
@@ -51,8 +80,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		       (XT_OWNER_UID | XT_OWNER_GID)) == 0;
 
 	if (info->match & XT_OWNER_UID) {
-		kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
-		kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
+		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
 		if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
 		     uid_lte(filp->f_cred->fsuid, uid_max)) ^
 		    !(info->invert & XT_OWNER_UID))
@@ -60,8 +89,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	if (info->match & XT_OWNER_GID) {
-		kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
-		kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
+		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
 		if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
 		     gid_lte(filp->f_cred->fsgid, gid_max)) ^
 		    !(info->invert & XT_OWNER_GID))
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 1caaccbc306c..e5f18988aee0 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -102,14 +102,14 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
 		return -EINVAL;
-	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
+	if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
 	    par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
 	    (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
-		pr_info("using --physdev-out in the OUTPUT, FORWARD and "
-			"POSTROUTING chains for non-bridged traffic is not "
-			"supported anymore.\n");
+		pr_info("using --physdev-out and --physdev-is-out are only"
+			"supported in the FORWARD and POSTROUTING chains with"
+			"bridged traffic.\n");
 		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return -EINVAL;
 	}
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index c14d4645daa3..ade024c90f4f 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -83,8 +83,6 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		return false;
 	}
 
-#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
-
 	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -102,9 +100,8 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			ntohs(th->dest),
 			!!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
 		return false;
-	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
-		      == tcpinfo->flg_cmp,
-		      XT_TCP_INV_FLAGS))
+	if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS,
+		     (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp))
 		return false;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index e68ef9ccd703..3cfd6cc60504 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -8,20 +8,6 @@
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 #define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))
 
-struct netlink_ring {
-	void			**pg_vec;
-	unsigned int		head;
-	unsigned int		frames_per_block;
-	unsigned int		frame_size;
-	unsigned int		frame_max;
-
-	unsigned int		pg_vec_order;
-	unsigned int		pg_vec_pages;
-	unsigned int		pg_vec_len;
-
-	atomic_t		pending;
-};
-
 struct netlink_sock {
 	/* struct sock has to be the first member of netlink_sock */
 	struct sock		sk;
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index dd9003f38822..0fd5518bf252 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -30,6 +30,9 @@
 
 #define DIGITAL_PROTO_ISO15693_RF_TECH	NFC_PROTO_ISO15693_MASK
 
+/* Delay between each poll frame (ms) */
+#define DIGITAL_POLL_INTERVAL 10
+
 struct digital_cmd {
 	struct list_head queue;
 
@@ -173,6 +176,8 @@ static void digital_wq_cmd(struct work_struct *work)
 		return;
 	}
 
+	cmd->pending = 1;
+
 	mutex_unlock(&ddev->cmd_lock);
 
 	if (cmd->req)
@@ -419,7 +424,8 @@ void digital_poll_next_tech(struct nfc_digital_dev *ddev)
 
 	mutex_unlock(&ddev->poll_lock);
 
-	schedule_work(&ddev->poll_work);
+	schedule_delayed_work(&ddev->poll_work,
+			      msecs_to_jiffies(DIGITAL_POLL_INTERVAL));
 }
 
 static void digital_wq_poll(struct work_struct *work)
@@ -428,7 +434,7 @@ static void digital_wq_poll(struct work_struct *work)
 	struct digital_poll_tech *poll_tech;
 	struct nfc_digital_dev *ddev = container_of(work,
 						    struct nfc_digital_dev,
-						    poll_work);
+						    poll_work.work);
 	mutex_lock(&ddev->poll_lock);
 
 	if (!ddev->poll_tech_count) {
@@ -543,7 +549,7 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols,
 		return -EINVAL;
 	}
 
-	schedule_work(&ddev->poll_work);
+	schedule_delayed_work(&ddev->poll_work, 0);
 
 	return 0;
 }
@@ -564,7 +570,7 @@ static void digital_stop_poll(struct nfc_dev *nfc_dev)
 
 	mutex_unlock(&ddev->poll_lock);
 
-	cancel_work_sync(&ddev->poll_work);
+	cancel_delayed_work_sync(&ddev->poll_work);
 
 	digital_abort_cmd(ddev);
 }
@@ -606,6 +612,8 @@ static int digital_dep_link_down(struct nfc_dev *nfc_dev)
 {
 	struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev);
 
+	digital_abort_cmd(ddev);
+
 	ddev->curr_protocol = 0;
 
 	return 0;
@@ -770,7 +778,7 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
 	INIT_WORK(&ddev->cmd_complete_work, digital_wq_cmd_complete);
 
 	mutex_init(&ddev->poll_lock);
-	INIT_WORK(&ddev->poll_work, digital_wq_poll);
+	INIT_DELAYED_WORK(&ddev->poll_work, digital_wq_poll);
 
 	if (supported_protocols & NFC_PROTO_JEWEL_MASK)
 		ddev->protocols |= NFC_PROTO_JEWEL_MASK;
@@ -832,12 +840,20 @@ void nfc_digital_unregister_device(struct nfc_digital_dev *ddev)
 	ddev->poll_tech_count = 0;
 	mutex_unlock(&ddev->poll_lock);
 
-	cancel_work_sync(&ddev->poll_work);
+	cancel_delayed_work_sync(&ddev->poll_work);
 	cancel_work_sync(&ddev->cmd_work);
 	cancel_work_sync(&ddev->cmd_complete_work);
 
 	list_for_each_entry_safe(cmd, n, &ddev->cmd_queue, queue) {
 		list_del(&cmd->queue);
+
+		/* Call the command callback if any and pass it a ENODEV error.
+		 * This gives a chance to the command issuer to free any
+		 * allocated buffer.
+		 */
+		if (cmd->cmd_cb)
+			cmd->cmd_cb(ddev, cmd->cb_context, ERR_PTR(-ENODEV));
+
 		kfree(cmd->mdaa_params);
 		kfree(cmd);
 	}
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index f72be7433df3..f864ce19e13d 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -35,6 +35,8 @@
 #define DIGITAL_ATR_REQ_MIN_SIZE 16
 #define DIGITAL_ATR_REQ_MAX_SIZE 64
 
+#define DIGITAL_ATR_RES_TO_WT(s)	((s) & 0xF)
+
 #define DIGITAL_DID_MAX	14
 
 #define DIGITAL_PAYLOAD_SIZE_MAX	254
@@ -63,6 +65,9 @@
 #define DIGITAL_NFC_DEP_DID_BIT_SET(pfb) ((pfb) & DIGITAL_NFC_DEP_PFB_DID_BIT)
 #define DIGITAL_NFC_DEP_PFB_PNI(pfb)     ((pfb) & 0x03)
 
+#define DIGITAL_NFC_DEP_RTOX_VALUE(data) ((data) & 0x3F)
+#define DIGITAL_NFC_DEP_RTOX_MAX	 59
+
 #define DIGITAL_NFC_DEP_PFB_I_PDU          0x00
 #define DIGITAL_NFC_DEP_PFB_ACK_NACK_PDU   0x40
 #define DIGITAL_NFC_DEP_PFB_SUPERVISOR_PDU 0x80
@@ -122,6 +127,37 @@ static const u8 digital_payload_bits_map[4] = {
 	[3] = 254
 };
 
+/* Response Waiting Time for ATR_RES PDU in ms
+ *
+ * RWT(ATR_RES) = RWT(nfcdep,activation) + dRWT(nfcdep) + dT(nfcdep,initiator)
+ *
+ * with:
+ *  RWT(nfcdep,activation) = 4096 * 2^12 / f(c) s
+ *  dRWT(nfcdep) = 16 / f(c) s
+ *  dT(nfcdep,initiator) = 100 ms
+ *  f(c) = 13560000 Hz
+ */
+#define DIGITAL_ATR_RES_RWT 1337
+
+/* Response Waiting Time for other DEP PDUs in ms
+ *
+ * max_rwt = rwt + dRWT(nfcdep) + dT(nfcdep,initiator)
+ *
+ * with:
+ *  rwt = (256 * 16 / f(c)) * 2^wt s
+ *  dRWT(nfcdep) = 16 / f(c) s
+ *  dT(nfcdep,initiator) = 100 ms
+ *  f(c) = 13560000 Hz
+ *  0 <= wt <= 14 (given by the target by the TO field of ATR_RES response)
+ */
+#define DIGITAL_NFC_DEP_IN_MAX_WT 14
+#define DIGITAL_NFC_DEP_TG_MAX_WT 8
+static const u16 digital_rwt_map[DIGITAL_NFC_DEP_IN_MAX_WT + 1] = {
+	100,  101,  101,  102,  105,
+	110,  119,  139,  177,  255,
+	409,  719, 1337, 2575, 5049,
+};
+
 static u8 digital_payload_bits_to_size(u8 payload_bits)
 {
 	if (payload_bits >= ARRAY_SIZE(digital_payload_bits_map))
@@ -190,8 +226,6 @@ digital_send_dep_data_prep(struct nfc_digital_dev *ddev, struct sk_buff *skb,
 			return ERR_PTR(-ENOMEM);
 		}
 
-		skb_reserve(new_skb, ddev->tx_headroom + NFC_HEADER_SIZE +
-					DIGITAL_NFC_DEP_REQ_RES_HEADROOM);
 		memcpy(skb_put(new_skb, ddev->remote_payload_max), skb->data,
 		       ddev->remote_payload_max);
 		skb_pull(skb, ddev->remote_payload_max);
@@ -368,8 +402,8 @@ static int digital_in_send_psl_req(struct nfc_digital_dev *ddev,
 
 	ddev->skb_add_crc(skb);
 
-	rc = digital_in_send_cmd(ddev, skb, 500, digital_in_recv_psl_res,
-				 target);
+	rc = digital_in_send_cmd(ddev, skb, ddev->dep_rwt,
+				 digital_in_recv_psl_res, target);
 	if (rc)
 		kfree_skb(skb);
 
@@ -382,6 +416,7 @@ static void digital_in_recv_atr_res(struct nfc_digital_dev *ddev, void *arg,
 	struct nfc_target *target = arg;
 	struct digital_atr_res *atr_res;
 	u8 gb_len, payload_bits;
+	u8 wt;
 	int rc;
 
 	if (IS_ERR(resp)) {
@@ -411,6 +446,11 @@ static void digital_in_recv_atr_res(struct nfc_digital_dev *ddev, void *arg,
 
 	atr_res = (struct digital_atr_res *)resp->data;
 
+	wt = DIGITAL_ATR_RES_TO_WT(atr_res->to);
+	if (wt > DIGITAL_NFC_DEP_IN_MAX_WT)
+		wt = DIGITAL_NFC_DEP_IN_MAX_WT;
+	ddev->dep_rwt = digital_rwt_map[wt];
+
 	payload_bits = DIGITAL_PAYLOAD_PP_TO_BITS(atr_res->pp);
 	ddev->remote_payload_max = digital_payload_bits_to_size(payload_bits);
 
@@ -492,8 +532,8 @@ int digital_in_send_atr_req(struct nfc_digital_dev *ddev,
 
 	ddev->skb_add_crc(skb);
 
-	rc = digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res,
-				 target);
+	rc = digital_in_send_cmd(ddev, skb, DIGITAL_ATR_RES_RWT,
+				 digital_in_recv_atr_res, target);
 	if (rc)
 		kfree_skb(skb);
 
@@ -524,11 +564,10 @@ static int digital_in_send_ack(struct nfc_digital_dev *ddev,
 
 	ddev->skb_add_crc(skb);
 
-	ddev->saved_skb = skb_get(skb);
-	ddev->saved_skb_len = skb->len;
+	ddev->saved_skb = pskb_copy(skb, GFP_KERNEL);
 
-	rc = digital_in_send_cmd(ddev, skb, 1500, digital_in_recv_dep_res,
-				 data_exch);
+	rc = digital_in_send_cmd(ddev, skb, ddev->dep_rwt,
+				 digital_in_recv_dep_res, data_exch);
 	if (rc) {
 		kfree_skb(skb);
 		kfree_skb(ddev->saved_skb);
@@ -562,8 +601,8 @@ static int digital_in_send_nack(struct nfc_digital_dev *ddev,
 
 	ddev->skb_add_crc(skb);
 
-	rc = digital_in_send_cmd(ddev, skb, 1500, digital_in_recv_dep_res,
-				 data_exch);
+	rc = digital_in_send_cmd(ddev, skb, ddev->dep_rwt,
+				 digital_in_recv_dep_res, data_exch);
 	if (rc)
 		kfree_skb(skb);
 
@@ -593,8 +632,8 @@ static int digital_in_send_atn(struct nfc_digital_dev *ddev,
 
 	ddev->skb_add_crc(skb);
 
-	rc = digital_in_send_cmd(ddev, skb, 1500, digital_in_recv_dep_res,
-				 data_exch);
+	rc = digital_in_send_cmd(ddev, skb, ddev->dep_rwt,
+				 digital_in_recv_dep_res, data_exch);
 	if (rc)
 		kfree_skb(skb);
 
@@ -607,6 +646,11 @@ static int digital_in_send_rtox(struct nfc_digital_dev *ddev,
 	struct digital_dep_req_res *dep_req;
 	struct sk_buff *skb;
 	int rc;
+	u16 rwt_int;
+
+	rwt_int = ddev->dep_rwt * rtox;
+	if (rwt_int > digital_rwt_map[DIGITAL_NFC_DEP_IN_MAX_WT])
+		rwt_int = digital_rwt_map[DIGITAL_NFC_DEP_IN_MAX_WT];
 
 	skb = digital_skb_alloc(ddev, 1);
 	if (!skb)
@@ -627,16 +671,10 @@ static int digital_in_send_rtox(struct nfc_digital_dev *ddev,
 
 	ddev->skb_add_crc(skb);
 
-	ddev->saved_skb = skb_get(skb);
-	ddev->saved_skb_len = skb->len;
-
-	rc = digital_in_send_cmd(ddev, skb, 1500, digital_in_recv_dep_res,
-				 data_exch);
-	if (rc) {
+	rc = digital_in_send_cmd(ddev, skb, rwt_int,
+				 digital_in_recv_dep_res, data_exch);
+	if (rc)
 		kfree_skb(skb);
-		kfree_skb(ddev->saved_skb);
-		ddev->saved_skb = NULL;
-	}
 
 	return rc;
 }
@@ -644,11 +682,19 @@ static int digital_in_send_rtox(struct nfc_digital_dev *ddev,
 static int digital_in_send_saved_skb(struct nfc_digital_dev *ddev,
 				     struct digital_data_exch *data_exch)
 {
+	int rc;
+
+	if (!ddev->saved_skb)
+		return -EINVAL;
+
 	skb_get(ddev->saved_skb);
-	skb_push(ddev->saved_skb, ddev->saved_skb_len);
 
-	return digital_in_send_cmd(ddev, ddev->saved_skb, 1500,
-				   digital_in_recv_dep_res, data_exch);
+	rc = digital_in_send_cmd(ddev, ddev->saved_skb, ddev->dep_rwt,
+				 digital_in_recv_dep_res, data_exch);
+	if (rc)
+		kfree_skb(ddev->saved_skb);
+
+	return rc;
 }
 
 static void digital_in_recv_dep_res(struct nfc_digital_dev *ddev, void *arg,
@@ -659,12 +705,13 @@ static void digital_in_recv_dep_res(struct nfc_digital_dev *ddev, void *arg,
 	u8 pfb;
 	uint size;
 	int rc;
+	u8 rtox;
 
 	if (IS_ERR(resp)) {
 		rc = PTR_ERR(resp);
 		resp = NULL;
 
-		if (((rc != -ETIMEDOUT) || ddev->nack_count) &&
+		if ((rc == -EIO || (rc == -ETIMEDOUT && ddev->nack_count)) &&
 		    (ddev->nack_count++ < DIGITAL_NFC_DEP_N_RETRY_NACK)) {
 			ddev->atn_count = 0;
 
@@ -783,6 +830,12 @@ static void digital_in_recv_dep_res(struct nfc_digital_dev *ddev, void *arg,
 		break;
 
 	case DIGITAL_NFC_DEP_PFB_ACK_NACK_PDU:
+		if (DIGITAL_NFC_DEP_NACK_BIT_SET(pfb)) {
+			PROTOCOL_ERR("14.12.4.5");
+			rc = -EIO;
+			goto exit;
+		}
+
 		if (DIGITAL_NFC_DEP_PFB_PNI(pfb) != ddev->curr_nfc_dep_pni) {
 			PROTOCOL_ERR("14.12.3.3");
 			rc = -EIO;
@@ -792,43 +845,53 @@ static void digital_in_recv_dep_res(struct nfc_digital_dev *ddev, void *arg,
 		ddev->curr_nfc_dep_pni =
 			DIGITAL_NFC_DEP_PFB_PNI(ddev->curr_nfc_dep_pni + 1);
 
-		if (ddev->chaining_skb && !DIGITAL_NFC_DEP_NACK_BIT_SET(pfb)) {
-			kfree_skb(ddev->saved_skb);
-			ddev->saved_skb = NULL;
+		if (!ddev->chaining_skb) {
+			PROTOCOL_ERR("14.12.4.3");
+			rc = -EIO;
+			goto exit;
+		}
 
-			rc = digital_in_send_dep_req(ddev, NULL,
-						     ddev->chaining_skb,
-						     ddev->data_exch);
-			if (rc)
-				goto error;
+		/* The initiator has received a valid ACK. Free the last sent
+		 * PDU and keep on sending chained skb.
+		 */
+		kfree_skb(ddev->saved_skb);
+		ddev->saved_skb = NULL;
 
-			return;
-		}
+		rc = digital_in_send_dep_req(ddev, NULL,
+					     ddev->chaining_skb,
+					     ddev->data_exch);
+		if (rc)
+			goto error;
 
-		pr_err("Received a ACK/NACK PDU\n");
-		rc = -EINVAL;
-		goto exit;
+		goto free_resp;
 
 	case DIGITAL_NFC_DEP_PFB_SUPERVISOR_PDU:
 		if (!DIGITAL_NFC_DEP_PFB_IS_TIMEOUT(pfb)) { /* ATN */
 			rc = digital_in_send_saved_skb(ddev, data_exch);
-			if (rc) {
-				kfree_skb(ddev->saved_skb);
+			if (rc)
 				goto error;
-			}
 
-			return;
+			goto free_resp;
 		}
 
-		kfree_skb(ddev->saved_skb);
-		ddev->saved_skb = NULL;
+		if (ddev->atn_count || ddev->nack_count) {
+			PROTOCOL_ERR("14.12.4.4");
+			rc = -EIO;
+			goto error;
+		}
+
+		rtox = DIGITAL_NFC_DEP_RTOX_VALUE(resp->data[0]);
+		if (!rtox || rtox > DIGITAL_NFC_DEP_RTOX_MAX) {
+			PROTOCOL_ERR("14.8.4.1");
+			rc = -EIO;
+			goto error;
+		}
 
-		rc = digital_in_send_rtox(ddev, data_exch, resp->data[0]);
+		rc = digital_in_send_rtox(ddev, data_exch, rtox);
 		if (rc)
 			goto error;
 
-		kfree_skb(resp);
-		return;
+		goto free_resp;
 	}
 
 exit:
@@ -845,6 +908,11 @@ error:
 
 	if (rc)
 		kfree_skb(resp);
+
+	return;
+
+free_resp:
+	dev_kfree_skb(resp);
 }
 
 int digital_in_send_dep_req(struct nfc_digital_dev *ddev,
@@ -876,11 +944,10 @@ int digital_in_send_dep_req(struct nfc_digital_dev *ddev,
 
 	ddev->skb_add_crc(tmp_skb);
 
-	ddev->saved_skb = skb_get(tmp_skb);
-	ddev->saved_skb_len = tmp_skb->len;
+	ddev->saved_skb = pskb_copy(tmp_skb, GFP_KERNEL);
 
-	rc = digital_in_send_cmd(ddev, tmp_skb, 1500, digital_in_recv_dep_res,
-				 data_exch);
+	rc = digital_in_send_cmd(ddev, tmp_skb, ddev->dep_rwt,
+				 digital_in_recv_dep_res, data_exch);
 	if (rc) {
 		if (tmp_skb != skb)
 			kfree_skb(tmp_skb);
@@ -956,8 +1023,7 @@ static int digital_tg_send_ack(struct nfc_digital_dev *ddev,
 
 	ddev->skb_add_crc(skb);
 
-	ddev->saved_skb = skb_get(skb);
-	ddev->saved_skb_len = skb->len;
+	ddev->saved_skb = pskb_copy(skb, GFP_KERNEL);
 
 	rc = digital_tg_send_cmd(ddev, skb, 1500, digital_tg_recv_dep_req,
 				 data_exch);
@@ -1009,11 +1075,19 @@ static int digital_tg_send_atn(struct nfc_digital_dev *ddev)
 
 static int digital_tg_send_saved_skb(struct nfc_digital_dev *ddev)
 {
+	int rc;
+
+	if (!ddev->saved_skb)
+		return -EINVAL;
+
 	skb_get(ddev->saved_skb);
-	skb_push(ddev->saved_skb, ddev->saved_skb_len);
 
-	return digital_tg_send_cmd(ddev, ddev->saved_skb, 1500,
-				   digital_tg_recv_dep_req, NULL);
+	rc = digital_tg_send_cmd(ddev, ddev->saved_skb, 1500,
+				 digital_tg_recv_dep_req, NULL);
+	if (rc)
+		kfree_skb(ddev->saved_skb);
+
+	return rc;
 }
 
 static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
@@ -1086,22 +1160,38 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
 	case DIGITAL_NFC_DEP_PFB_I_PDU:
 		pr_debug("DIGITAL_NFC_DEP_PFB_I_PDU\n");
 
-		if ((ddev->atn_count && (DIGITAL_NFC_DEP_PFB_PNI(pfb - 1) !=
-						ddev->curr_nfc_dep_pni)) ||
-		    (DIGITAL_NFC_DEP_PFB_PNI(pfb) != ddev->curr_nfc_dep_pni)) {
-			PROTOCOL_ERR("14.12.3.4");
-			rc = -EIO;
-			goto exit;
-		}
-
 		if (ddev->atn_count) {
+			/* The target has received (and replied to) at least one
+			 * ATN DEP_REQ.
+			 */
 			ddev->atn_count = 0;
 
-			rc = digital_tg_send_saved_skb(ddev);
-			if (rc)
-				goto exit;
+			/* pni of resp PDU equal to the target current pni - 1
+			 * means resp is the previous DEP_REQ PDU received from
+			 * the initiator so the target replies with saved_skb
+			 * which is the previous DEP_RES saved in
+			 * digital_tg_send_dep_res().
+			 */
+			if (DIGITAL_NFC_DEP_PFB_PNI(pfb) ==
+			  DIGITAL_NFC_DEP_PFB_PNI(ddev->curr_nfc_dep_pni - 1)) {
+				rc = digital_tg_send_saved_skb(ddev);
+				if (rc)
+					goto exit;
 
-			return;
+				goto free_resp;
+			}
+
+			/* atn_count > 0 and PDU pni != curr_nfc_dep_pni - 1
+			 * means the target probably did not received the last
+			 * DEP_REQ PDU sent by the initiator. The target
+			 * fallbacks to normal processing then.
+			 */
+		}
+
+		if (DIGITAL_NFC_DEP_PFB_PNI(pfb) != ddev->curr_nfc_dep_pni) {
+			PROTOCOL_ERR("14.12.3.4");
+			rc = -EIO;
+			goto exit;
 		}
 
 		kfree_skb(ddev->saved_skb);
@@ -1125,51 +1215,64 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
 		rc = 0;
 		break;
 	case DIGITAL_NFC_DEP_PFB_ACK_NACK_PDU:
-		if (!DIGITAL_NFC_DEP_NACK_BIT_SET(pfb)) { /* ACK */
-			if ((ddev->atn_count &&
-			     (DIGITAL_NFC_DEP_PFB_PNI(pfb - 1) !=
-						ddev->curr_nfc_dep_pni)) ||
-			    (DIGITAL_NFC_DEP_PFB_PNI(pfb) !=
-						ddev->curr_nfc_dep_pni) ||
-			    !ddev->chaining_skb || !ddev->saved_skb) {
+		if (DIGITAL_NFC_DEP_NACK_BIT_SET(pfb)) { /* NACK */
+			if (DIGITAL_NFC_DEP_PFB_PNI(pfb + 1) !=
+						ddev->curr_nfc_dep_pni) {
 				rc = -EIO;
 				goto exit;
 			}
 
-			if (ddev->atn_count) {
-				ddev->atn_count = 0;
+			ddev->atn_count = 0;
 
+			rc = digital_tg_send_saved_skb(ddev);
+			if (rc)
+				goto exit;
+
+			goto free_resp;
+		}
+
+		/* ACK */
+		if (ddev->atn_count) {
+			/* The target has previously recevied one or more ATN
+			 * PDUs.
+			 */
+			ddev->atn_count = 0;
+
+			/* If the ACK PNI is equal to the target PNI - 1 means
+			 * that the initiator did not receive the previous PDU
+			 * sent by the target so re-send it.
+			 */
+			if (DIGITAL_NFC_DEP_PFB_PNI(pfb + 1) ==
+						ddev->curr_nfc_dep_pni) {
 				rc = digital_tg_send_saved_skb(ddev);
 				if (rc)
 					goto exit;
 
-				return;
+				goto free_resp;
 			}
 
-			kfree_skb(ddev->saved_skb);
-			ddev->saved_skb = NULL;
+			/* Otherwise, the target did not receive the previous
+			 * ACK PDU from the initiator. Fallback to normal
+			 * processing of chained PDU then.
+			 */
+		}
 
-			rc = digital_tg_send_dep_res(ddev, ddev->chaining_skb);
-			if (rc)
-				goto exit;
-		} else { /* NACK */
-			if ((DIGITAL_NFC_DEP_PFB_PNI(pfb + 1) !=
-						ddev->curr_nfc_dep_pni) ||
-			    !ddev->saved_skb) {
-				rc = -EIO;
-				goto exit;
-			}
+		/* Keep on sending chained PDU */
+		if (!ddev->chaining_skb ||
+		    DIGITAL_NFC_DEP_PFB_PNI(pfb) !=
+					ddev->curr_nfc_dep_pni) {
+			rc = -EIO;
+			goto exit;
+		}
 
-			ddev->atn_count = 0;
+		kfree_skb(ddev->saved_skb);
+		ddev->saved_skb = NULL;
 
-			rc = digital_tg_send_saved_skb(ddev);
-			if (rc) {
-				kfree_skb(ddev->saved_skb);
-				goto exit;
-			}
-		}
+		rc = digital_tg_send_dep_res(ddev, ddev->chaining_skb);
+		if (rc)
+			goto exit;
 
-		return;
+		goto free_resp;
 	case DIGITAL_NFC_DEP_PFB_SUPERVISOR_PDU:
 		if (DIGITAL_NFC_DEP_PFB_IS_TIMEOUT(pfb)) {
 			rc = -EINVAL;
@@ -1182,8 +1285,7 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
 
 		ddev->atn_count++;
 
-		kfree_skb(resp);
-		return;
+		goto free_resp;
 	}
 
 	rc = nfc_tm_data_received(ddev->nfc_dev, resp);
@@ -1199,6 +1301,11 @@ exit:
 
 	if (rc)
 		kfree_skb(resp);
+
+	return;
+
+free_resp:
+	dev_kfree_skb(resp);
 }
 
 int digital_tg_send_dep_res(struct nfc_digital_dev *ddev, struct sk_buff *skb)
@@ -1235,8 +1342,7 @@ int digital_tg_send_dep_res(struct nfc_digital_dev *ddev, struct sk_buff *skb)
 
 	ddev->skb_add_crc(tmp_skb);
 
-	ddev->saved_skb = skb_get(tmp_skb);
-	ddev->saved_skb_len = tmp_skb->len;
+	ddev->saved_skb = pskb_copy(tmp_skb, GFP_KERNEL);
 
 	rc = digital_tg_send_cmd(ddev, tmp_skb, 1500, digital_tg_recv_dep_req,
 				 NULL);
@@ -1420,7 +1526,7 @@ static int digital_tg_send_atr_res(struct nfc_digital_dev *ddev,
 	atr_res->dir = DIGITAL_NFC_DEP_FRAME_DIR_IN;
 	atr_res->cmd = DIGITAL_CMD_ATR_RES;
 	memcpy(atr_res->nfcid3, atr_req->nfcid3, sizeof(atr_req->nfcid3));
-	atr_res->to = 8;
+	atr_res->to = DIGITAL_NFC_DEP_TG_MAX_WT;
 
 	ddev->local_payload_max = DIGITAL_PAYLOAD_SIZE_MAX;
 	payload_bits = digital_payload_size_to_bits(ddev->local_payload_max);
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index fb58ed2dd41d..d9080dec5d27 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -1257,21 +1257,12 @@ static int digital_tg_config_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech)
 int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech)
 {
 	int rc;
-	u8 *nfcid2;
 
 	rc = digital_tg_config_nfcf(ddev, rf_tech);
 	if (rc)
 		return rc;
 
-	nfcid2 = kzalloc(NFC_NFCID2_MAXSIZE, GFP_KERNEL);
-	if (!nfcid2)
-		return -ENOMEM;
-
-	nfcid2[0] = DIGITAL_SENSF_NFCID2_NFC_DEP_B1;
-	nfcid2[1] = DIGITAL_SENSF_NFCID2_NFC_DEP_B2;
-	get_random_bytes(nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2);
-
-	return digital_tg_listen(ddev, 300, digital_tg_recv_sensf_req, nfcid2);
+	return digital_tg_listen(ddev, 300, digital_tg_recv_sensf_req, NULL);
 }
 
 void digital_tg_recv_md_req(struct nfc_digital_dev *ddev, void *arg,
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 1399a03fa6e6..3d699cbc7435 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -133,36 +133,29 @@ void nfc_llc_free(struct nfc_llc *llc)
 	kfree(llc);
 }
 
-inline void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom,
-					  int *rx_tailroom)
-{
-	*rx_headroom = llc->rx_headroom;
-	*rx_tailroom = llc->rx_tailroom;
-}
-
-inline int nfc_llc_start(struct nfc_llc *llc)
+int nfc_llc_start(struct nfc_llc *llc)
 {
 	return llc->ops->start(llc);
 }
 EXPORT_SYMBOL(nfc_llc_start);
 
-inline int nfc_llc_stop(struct nfc_llc *llc)
+int nfc_llc_stop(struct nfc_llc *llc)
 {
 	return llc->ops->stop(llc);
 }
 EXPORT_SYMBOL(nfc_llc_stop);
 
-inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
+void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
 {
 	llc->ops->rcv_from_drv(llc, skb);
 }
 
-inline int nfc_llc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
+int nfc_llc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
 {
 	return llc->ops->xmit_from_hci(llc, skb);
 }
 
-inline void *nfc_llc_get_data(struct nfc_llc *llc)
+void *nfc_llc_get_data(struct nfc_llc *llc)
 {
 	return llc->data;
 }
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 3425532c39f7..c5959ce503e6 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -438,19 +438,17 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
 		goto error_tlv;
 	}
 
-	if (service_name_tlv != NULL)
-		skb = llcp_add_tlv(skb, service_name_tlv,
-				   service_name_tlv_length);
-
-	skb = llcp_add_tlv(skb, miux_tlv, miux_tlv_length);
-	skb = llcp_add_tlv(skb, rw_tlv, rw_tlv_length);
+	llcp_add_tlv(skb, service_name_tlv, service_name_tlv_length);
+	llcp_add_tlv(skb, miux_tlv, miux_tlv_length);
+	llcp_add_tlv(skb, rw_tlv, rw_tlv_length);
 
 	skb_queue_tail(&local->tx_queue, skb);
 
-	return 0;
+	err = 0;
 
 error_tlv:
-	pr_err("error %d\n", err);
+	if (err)
+		pr_err("error %d\n", err);
 
 	kfree(service_name_tlv);
 	kfree(miux_tlv);
@@ -493,15 +491,16 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
 		goto error_tlv;
 	}
 
-	skb = llcp_add_tlv(skb, miux_tlv, miux_tlv_length);
-	skb = llcp_add_tlv(skb, rw_tlv, rw_tlv_length);
+	llcp_add_tlv(skb, miux_tlv, miux_tlv_length);
+	llcp_add_tlv(skb, rw_tlv, rw_tlv_length);
 
 	skb_queue_tail(&local->tx_queue, skb);
 
-	return 0;
+	err = 0;
 
 error_tlv:
-	pr_err("error %d\n", err);
+	if (err)
+		pr_err("error %d\n", err);
 
 	kfree(miux_tlv);
 	kfree(rw_tlv);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 98876274a1ee..e69786c6804c 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -732,9 +732,8 @@ static void nfc_llcp_tx_work(struct work_struct *work)
 			int ret;
 
 			pr_debug("Sending pending skb\n");
-			print_hex_dump(KERN_DEBUG, "LLCP Tx: ",
-				       DUMP_PREFIX_OFFSET, 16, 1,
-				       skb->data, skb->len, true);
+			print_hex_dump_debug("LLCP Tx: ", DUMP_PREFIX_OFFSET,
+					     16, 1, skb->data, skb->len, true);
 
 			if (ptype == LLCP_PDU_DISC && sk != NULL &&
 			    sk->sk_state == LLCP_DISCONNECTING) {
@@ -1412,8 +1411,8 @@ static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb)
 	pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap);
 
 	if (ptype != LLCP_PDU_SYMM)
-		print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET,
-			       16, 1, skb->data, skb->len, true);
+		print_hex_dump_debug("LLCP Rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+				     skb->data, skb->len, true);
 
 	switch (ptype) {
 	case LLCP_PDU_SYMM:
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 9a3eb7a0ebf4..1ecbd7715f6d 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -750,6 +750,14 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
 
 	if (likely(vport)) {
 		u16 mru = OVS_CB(skb)->mru;
+		u32 cutlen = OVS_CB(skb)->cutlen;
+
+		if (unlikely(cutlen > 0)) {
+			if (skb->len - cutlen > ETH_HLEN)
+				pskb_trim(skb, skb->len - cutlen);
+			else
+				pskb_trim(skb, ETH_HLEN);
+		}
 
 		if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
 			ovs_vport_send(vport, skb);
@@ -775,7 +783,8 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
 
 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			    struct sw_flow_key *key, const struct nlattr *attr,
-			    const struct nlattr *actions, int actions_len)
+			    const struct nlattr *actions, int actions_len,
+			    uint32_t cutlen)
 {
 	struct dp_upcall_info upcall;
 	const struct nlattr *a;
@@ -822,7 +831,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 		} /* End of switch. */
 	}
 
-	return ovs_dp_upcall(dp, skb, key, &upcall);
+	return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
 }
 
 static int sample(struct datapath *dp, struct sk_buff *skb,
@@ -832,6 +841,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 	const struct nlattr *acts_list = NULL;
 	const struct nlattr *a;
 	int rem;
+	u32 cutlen = 0;
 
 	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 		 a = nla_next(a, &rem)) {
@@ -858,13 +868,24 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 		return 0;
 
 	/* The only known usage of sample action is having a single user-space
+	 * action, or having a truncate action followed by a single user-space
 	 * action. Treat this usage as a special case.
 	 * The output_userspace() should clone the skb to be sent to the
 	 * user space. This skb will be consumed by its caller.
 	 */
+	if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
+		struct ovs_action_trunc *trunc = nla_data(a);
+
+		if (skb->len > trunc->max_len)
+			cutlen = skb->len - trunc->max_len;
+
+		a = nla_next(a, &rem);
+	}
+
 	if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
 		   nla_is_last(a, rem)))
-		return output_userspace(dp, skb, key, a, actions, actions_len);
+		return output_userspace(dp, skb, key, a, actions,
+					actions_len, cutlen);
 
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (!skb)
@@ -1051,6 +1072,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			if (out_skb)
 				do_output(dp, out_skb, prev_port, key);
 
+			OVS_CB(skb)->cutlen = 0;
 			prev_port = -1;
 		}
 
@@ -1059,8 +1081,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			prev_port = nla_get_u32(a);
 			break;
 
+		case OVS_ACTION_ATTR_TRUNC: {
+			struct ovs_action_trunc *trunc = nla_data(a);
+
+			if (skb->len > trunc->max_len)
+				OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
+			break;
+		}
+
 		case OVS_ACTION_ATTR_USERSPACE:
-			output_userspace(dp, skb, key, a, attr, len);
+			output_userspace(dp, skb, key, a, attr,
+						     len, OVS_CB(skb)->cutlen);
+			OVS_CB(skb)->cutlen = 0;
 			break;
 
 		case OVS_ACTION_ATTR_HASH:
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3d5feede962d..c644c78ed485 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -135,7 +135,7 @@ static void ovs_ct_get_labels(const struct nf_conn *ct,
 	struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
 
 	if (cl) {
-		size_t len = cl->words * sizeof(long);
+		size_t len = sizeof(cl->bits);
 
 		if (len > OVS_CT_LABELS_LEN)
 			len = OVS_CT_LABELS_LEN;
@@ -274,7 +274,7 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
 		nf_ct_labels_ext_add(ct);
 		cl = nf_ct_labels_find(ct);
 	}
-	if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN)
+	if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN)
 		return -ENOSPC;
 
 	err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
@@ -818,12 +818,33 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 		 */
 		state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
 		__ovs_ct_update_key(key, state, &info->zone, exp->master);
-	} else
-		return __ovs_ct_lookup(net, key, info, skb);
+	} else {
+		struct nf_conn *ct;
+		int err;
+
+		err = __ovs_ct_lookup(net, key, info, skb);
+		if (err)
+			return err;
+
+		ct = (struct nf_conn *)skb->nfct;
+		if (ct)
+			nf_ct_deliver_cached_events(ct);
+	}
 
 	return 0;
 }
 
+static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
+{
+	size_t i;
+
+	for (i = 0; i < sizeof(*labels); i++)
+		if (labels->ct_labels[i])
+			return true;
+
+	return false;
+}
+
 /* Lookup connection and confirm if unconfirmed. */
 static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 			 const struct ovs_conntrack_info *info,
@@ -834,24 +855,32 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 	err = __ovs_ct_lookup(net, key, info, skb);
 	if (err)
 		return err;
-	/* This is a no-op if the connection has already been confirmed. */
+
+	/* Apply changes before confirming the connection so that the initial
+	 * conntrack NEW netlink event carries the values given in the CT
+	 * action.
+	 */
+	if (info->mark.mask) {
+		err = ovs_ct_set_mark(skb, key, info->mark.value,
+				      info->mark.mask);
+		if (err)
+			return err;
+	}
+	if (labels_nonzero(&info->labels.mask)) {
+		err = ovs_ct_set_labels(skb, key, &info->labels.value,
+					&info->labels.mask);
+		if (err)
+			return err;
+	}
+	/* This will take care of sending queued events even if the connection
+	 * is already confirmed.
+	 */
 	if (nf_conntrack_confirm(skb) != NF_ACCEPT)
 		return -EINVAL;
 
 	return 0;
 }
 
-static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
-{
-	size_t i;
-
-	for (i = 0; i < sizeof(*labels); i++)
-		if (labels->ct_labels[i])
-			return true;
-
-	return false;
-}
-
 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
  * value if 'skb' is freed.
  */
@@ -876,19 +905,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 		err = ovs_ct_commit(net, key, info, skb);
 	else
 		err = ovs_ct_lookup(net, key, info, skb);
-	if (err)
-		goto err;
 
-	if (info->mark.mask) {
-		err = ovs_ct_set_mark(skb, key, info->mark.value,
-				      info->mark.mask);
-		if (err)
-			goto err;
-	}
-	if (labels_nonzero(&info->labels.mask))
-		err = ovs_ct_set_labels(skb, key, &info->labels.value,
-					&info->labels.mask);
-err:
 	skb_push(skb, nh_ofs);
 	if (err)
 		kfree_skb(skb);
@@ -1145,6 +1162,20 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
 		}
 	}
 
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	if (!info->commit && info->mark.mask) {
+		OVS_NLERR(log,
+			  "Setting conntrack mark requires 'commit' flag.");
+		return -EINVAL;
+	}
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	if (!info->commit && labels_nonzero(&info->labels.mask)) {
+		OVS_NLERR(log,
+			  "Setting conntrack labels requires 'commit' flag.");
+		return -EINVAL;
+	}
+#endif
 	if (rem > 0) {
 		OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
 		return -EINVAL;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 856bd8dba676..524c0fd3078e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -137,10 +137,12 @@ EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
 static struct vport *new_vport(const struct vport_parms *);
 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 			     const struct sw_flow_key *,
-			     const struct dp_upcall_info *);
+			     const struct dp_upcall_info *,
+			     uint32_t cutlen);
 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 				  const struct sw_flow_key *,
-				  const struct dp_upcall_info *);
+				  const struct dp_upcall_info *,
+				  uint32_t cutlen);
 
 /* Must be called with rcu_read_lock. */
 static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
@@ -275,7 +277,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 		upcall.cmd = OVS_PACKET_CMD_MISS;
 		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 		upcall.mru = OVS_CB(skb)->mru;
-		error = ovs_dp_upcall(dp, skb, key, &upcall);
+		error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
 		if (unlikely(error))
 			kfree_skb(skb);
 		else
@@ -300,7 +302,8 @@ out:
 
 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 		  const struct sw_flow_key *key,
-		  const struct dp_upcall_info *upcall_info)
+		  const struct dp_upcall_info *upcall_info,
+		  uint32_t cutlen)
 {
 	struct dp_stats_percpu *stats;
 	int err;
@@ -311,9 +314,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 	}
 
 	if (!skb_is_gso(skb))
-		err = queue_userspace_packet(dp, skb, key, upcall_info);
+		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 	else
-		err = queue_gso_packets(dp, skb, key, upcall_info);
+		err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
 	if (err)
 		goto err;
 
@@ -331,7 +334,8 @@ err:
 
 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 			     const struct sw_flow_key *key,
-			     const struct dp_upcall_info *upcall_info)
+			     const struct dp_upcall_info *upcall_info,
+				 uint32_t cutlen)
 {
 	unsigned short gso_type = skb_shinfo(skb)->gso_type;
 	struct sw_flow_key later_key;
@@ -360,7 +364,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 		if (gso_type & SKB_GSO_UDP && skb != segs)
 			key = &later_key;
 
-		err = queue_userspace_packet(dp, skb, key, upcall_info);
+		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 		if (err)
 			break;
 
@@ -383,7 +387,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 {
 	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
-		+ nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+		+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
+		+ nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
 
 	/* OVS_PACKET_ATTR_USERDATA */
 	if (upcall_info->userdata)
@@ -416,7 +421,8 @@ static void pad_packet(struct datapath *dp, struct sk_buff *skb)
 
 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 				  const struct sw_flow_key *key,
-				  const struct dp_upcall_info *upcall_info)
+				  const struct dp_upcall_info *upcall_info,
+				  uint32_t cutlen)
 {
 	struct ovs_header *upcall;
 	struct sk_buff *nskb = NULL;
@@ -461,7 +467,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 	else
 		hlen = skb->len;
 
-	len = upcall_msg_size(upcall_info, hlen);
+	len = upcall_msg_size(upcall_info, hlen - cutlen);
 	user_skb = genlmsg_new(len, GFP_ATOMIC);
 	if (!user_skb) {
 		err = -ENOMEM;
@@ -509,15 +515,25 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 		pad_packet(dp, user_skb);
 	}
 
+	/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
+	if (cutlen > 0) {
+		if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
+				skb->len)) {
+			err = -ENOBUFS;
+			goto out;
+		}
+		pad_packet(dp, user_skb);
+	}
+
 	/* Only reserve room for attribute header, packet data is added
 	 * in skb_zerocopy() */
 	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
 		err = -ENOBUFS;
 		goto out;
 	}
-	nla->nla_len = nla_attr_size(skb->len);
+	nla->nla_len = nla_attr_size(skb->len - cutlen);
 
-	err = skb_zerocopy(user_skb, skb, skb->len, hlen);
+	err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
 	if (err)
 		goto out;
 
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 427e39a045cf..ab85c1cae255 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -100,11 +100,13 @@ struct datapath {
  * @input_vport: The original vport packet came in on. This value is cached
  * when a packet is received by OVS.
  * @mru: The maximum received fragement size; 0 if the packet is not
+ * @cutlen: The number of bytes from the packet end to be removed.
  * fragmented.
  */
 struct ovs_skb_cb {
 	struct vport		*input_vport;
 	u16			mru;
+	u32			cutlen;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
 
@@ -194,7 +196,8 @@ extern struct genl_family dp_vport_genl_family;
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
-		  const struct sw_flow_key *, const struct dp_upcall_info *);
+		  const struct sw_flow_key *, const struct dp_upcall_info *,
+		  uint32_t cutlen);
 
 const char *ovs_dp_name(const struct datapath *dp);
 struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 0bb650f4f219..c78a6a1476fb 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2229,6 +2229,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
 			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
 			[OVS_ACTION_ATTR_CT] = (u32)-1,
+			[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -2255,6 +2256,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 				return -EINVAL;
 			break;
 
+		case OVS_ACTION_ATTR_TRUNC: {
+			const struct ovs_action_trunc *trunc = nla_data(a);
+
+			if (trunc->max_len < ETH_HLEN)
+				return -EINVAL;
+			break;
+		}
+
 		case OVS_ACTION_ATTR_HASH: {
 			const struct ovs_action_hash *act_hash = nla_data(a);
 
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 2ee48e447b72..434e04c3a189 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -195,7 +195,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
 	}
 
 	vport->dev = alloc_netdev(sizeof(struct internal_dev),
-				  parms->name, NET_NAME_UNKNOWN, do_setup);
+				  parms->name, NET_NAME_USER, do_setup);
 	if (!vport->dev) {
 		err = -ENOMEM;
 		goto error_free_vport;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 31cbc8c5c7db..6b21fd068d87 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -444,6 +444,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 
 	OVS_CB(skb)->input_vport = vport;
 	OVS_CB(skb)->mru = 0;
+	OVS_CB(skb)->cutlen = 0;
 	if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
 		u32 mark;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9bff6ef16fa7..33a4697d5539 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
 				      struct sk_buff *skb,
 				      unsigned int num)
 {
-	return reciprocal_scale(skb_get_hash(skb), num);
+	return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
 }
 
 static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1588,13 +1588,9 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
 	if (copy_from_user(&fd, data, len))
 		return -EFAULT;
 
-	new = bpf_prog_get(fd);
+	new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
-	if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) {
-		bpf_prog_put(new);
-		return -EINVAL;
-	}
 
 	__fanout_set_data_bpf(po->fanout, new);
 	return 0;
@@ -1927,13 +1923,11 @@ retry:
 		goto out_unlock;
 	}
 
-	sockc.tsflags = 0;
+	sockc.tsflags = sk->sk_tsflags;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
-		if (unlikely(err)) {
-			err = -EINVAL;
+		if (unlikely(err))
 			goto out_unlock;
-		}
 	}
 
 	skb->protocol = proto;
@@ -1979,40 +1973,8 @@ static int __packet_rcv_vnet(const struct sk_buff *skb,
 {
 	*vnet_hdr = (const struct virtio_net_hdr) { 0 };
 
-	if (skb_is_gso(skb)) {
-		struct skb_shared_info *sinfo = skb_shinfo(skb);
-
-		/* This is a hint as to how much should be linear. */
-		vnet_hdr->hdr_len =
-			__cpu_to_virtio16(vio_le(), skb_headlen(skb));
-		vnet_hdr->gso_size =
-			__cpu_to_virtio16(vio_le(), sinfo->gso_size);
-
-		if (sinfo->gso_type & SKB_GSO_TCPV4)
-			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-		else if (sinfo->gso_type & SKB_GSO_TCPV6)
-			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-		else if (sinfo->gso_type & SKB_GSO_UDP)
-			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
-		else if (sinfo->gso_type & SKB_GSO_FCOE)
-			return -EINVAL;
-		else
-			BUG();
-
-		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
-			vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
-	} else
-		vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(),
-				  skb_checksum_start_offset(skb));
-		vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(),
-						 skb->csum_offset);
-	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
-	} /* else everything is zero */
+	if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le()))
+		BUG();
 
 	return 0;
 }
@@ -2678,7 +2640,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
 	}
 
-	sockc.tsflags = 0;
+	sockc.tsflags = po->sk.sk_tsflags;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(&po->sk, msg, &sockc);
 		if (unlikely(err))
@@ -2881,7 +2843,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;
 
-	sockc.tsflags = 0;
+	sockc.tsflags = sk->sk_tsflags;
 	sockc.mark = sk->sk_mark;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
diff --git a/net/rds/bind.c b/net/rds/bind.c
index b22ea956522b..095f6ce583fe 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -81,6 +81,8 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 
 	if (*port != 0) {
 		rover = be16_to_cpu(*port);
+		if (rover == RDS_FLAG_PROBE_PORT)
+			return -EINVAL;
 		last = rover;
 	} else {
 		rover = max_t(u16, prandom_u32(), 2);
@@ -91,12 +93,16 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 		if (rover == 0)
 			rover++;
 
+		if (rover == RDS_FLAG_PROBE_PORT)
+			continue;
 		key = ((u64)addr << 32) | cpu_to_be16(rover);
 		if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
 			continue;
 
 		rs->rs_bound_key = key;
 		rs->rs_bound_addr = addr;
+		net_get_random_once(&rs->rs_hash_initval,
+				    sizeof(rs->rs_hash_initval));
 		rs->rs_bound_port = cpu_to_be16(rover);
 		rs->rs_bound_node.next = NULL;
 		rds_sock_addref(rs);
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 6641bcf7c185..8398fee7c866 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -235,7 +235,8 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
 			 *    therefore trigger warnings.
 			 * Defer the xmit to rds_send_worker() instead.
 			 */
-			queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+			queue_delayed_work(rds_wq,
+					   &conn->c_path[0].cp_send_w, 0);
 		}
 	}
 
diff --git a/net/rds/connection.c b/net/rds/connection.c
index e3b118cae81d..f5058559bb08 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -95,14 +95,16 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
  * and receiving over this connection again in the future.  It is up to
  * the transport to have serialized this call with its send and recv.
  */
-static void rds_conn_reset(struct rds_connection *conn)
+static void rds_conn_path_reset(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
+
 	rdsdebug("connection %pI4 to %pI4 reset\n",
 	  &conn->c_laddr, &conn->c_faddr);
 
 	rds_stats_inc(s_conn_reset);
-	rds_send_reset(conn);
-	conn->c_flags = 0;
+	rds_send_path_reset(cp);
+	cp->cp_flags = 0;
 
 	/* Do not clear next_rx_seq here, else we cannot distinguish
 	 * retransmitted packets from new packets, and will hand all
@@ -110,6 +112,32 @@ static void rds_conn_reset(struct rds_connection *conn)
 	 * reliability guarantees of RDS. */
 }
 
+static void __rds_conn_path_init(struct rds_connection *conn,
+				 struct rds_conn_path *cp, bool is_outgoing)
+{
+	spin_lock_init(&cp->cp_lock);
+	cp->cp_next_tx_seq = 1;
+	init_waitqueue_head(&cp->cp_waitq);
+	INIT_LIST_HEAD(&cp->cp_send_queue);
+	INIT_LIST_HEAD(&cp->cp_retrans);
+
+	cp->cp_conn = conn;
+	atomic_set(&cp->cp_state, RDS_CONN_DOWN);
+	cp->cp_send_gen = 0;
+	/* cp_outgoing is per-path. So we can only set it here
+	 * for the single-path transports.
+	 */
+	if (!conn->c_trans->t_mp_capable)
+		cp->cp_outgoing = (is_outgoing ? 1 : 0);
+	cp->cp_reconnect_jiffies = 0;
+	INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
+	INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
+	INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker);
+	INIT_WORK(&cp->cp_down_w, rds_shutdown_worker);
+	mutex_init(&cp->cp_cm_lock);
+	cp->cp_flags = 0;
+}
+
 /*
  * There is only every one 'conn' for a given pair of addresses in the
  * system at a time.  They contain messages to be retransmitted and so
@@ -127,7 +155,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	struct hlist_head *head = rds_conn_bucket(laddr, faddr);
 	struct rds_transport *loop_trans;
 	unsigned long flags;
-	int ret;
+	int ret, i;
 
 	rcu_read_lock();
 	conn = rds_conn_lookup(net, head, laddr, faddr, trans);
@@ -153,13 +181,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	INIT_HLIST_NODE(&conn->c_hash_node);
 	conn->c_laddr = laddr;
 	conn->c_faddr = faddr;
-	spin_lock_init(&conn->c_lock);
-	conn->c_next_tx_seq = 1;
-	rds_conn_net_set(conn, net);
 
-	init_waitqueue_head(&conn->c_waitq);
-	INIT_LIST_HEAD(&conn->c_send_queue);
-	INIT_LIST_HEAD(&conn->c_retrans);
+	rds_conn_net_set(conn, net);
 
 	ret = rds_cong_get_maps(conn);
 	if (ret) {
@@ -188,6 +211,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 
 	conn->c_trans = trans;
 
+	init_waitqueue_head(&conn->c_hs_waitq);
+	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+		__rds_conn_path_init(conn, &conn->c_path[i],
+				     is_outgoing);
+		conn->c_path[i].cp_index = i;
+	}
 	ret = trans->conn_alloc(conn, gfp);
 	if (ret) {
 		kmem_cache_free(rds_conn_slab, conn);
@@ -195,17 +224,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		goto out;
 	}
 
-	atomic_set(&conn->c_state, RDS_CONN_DOWN);
-	conn->c_send_gen = 0;
-	conn->c_outgoing = (is_outgoing ? 1 : 0);
-	conn->c_reconnect_jiffies = 0;
-	INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
-	INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
-	INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker);
-	INIT_WORK(&conn->c_down_w, rds_shutdown_worker);
-	mutex_init(&conn->c_cm_lock);
-	conn->c_flags = 0;
-
 	rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
 	  conn, &laddr, &faddr,
 	  trans->t_name ? trans->t_name : "[unknown]",
@@ -222,7 +240,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	if (parent) {
 		/* Creating passive conn */
 		if (parent->c_passive) {
-			trans->conn_free(conn->c_transport_data);
+			trans->conn_free(conn->c_path[0].cp_transport_data);
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = parent->c_passive;
 		} else {
@@ -236,7 +254,18 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 
 		found = rds_conn_lookup(net, head, laddr, faddr, trans);
 		if (found) {
-			trans->conn_free(conn->c_transport_data);
+			struct rds_conn_path *cp;
+			int i;
+
+			for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+				cp = &conn->c_path[i];
+				/* The ->conn_alloc invocation may have
+				 * allocated resource for all paths, so all
+				 * of them may have to be freed here.
+				 */
+				if (cp->cp_transport_data)
+					trans->conn_free(cp->cp_transport_data);
+			}
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = found;
 		} else {
@@ -267,10 +296,12 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
 }
 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
 
-void rds_conn_shutdown(struct rds_connection *conn)
+void rds_conn_shutdown(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
+
 	/* shut it down unless it's down already */
-	if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
+	if (!rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
 		/*
 		 * Quiesce the connection mgmt handlers before we start tearing
 		 * things down. We don't hold the mutex for the entire
@@ -278,35 +309,38 @@ void rds_conn_shutdown(struct rds_connection *conn)
 		 * deadlocking with the CM handler. Instead, the CM event
 		 * handler is supposed to check for state DISCONNECTING
 		 */
-		mutex_lock(&conn->c_cm_lock);
-		if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
-		 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
-			rds_conn_error(conn, "shutdown called in state %d\n",
-					atomic_read(&conn->c_state));
-			mutex_unlock(&conn->c_cm_lock);
+		mutex_lock(&cp->cp_cm_lock);
+		if (!rds_conn_path_transition(cp, RDS_CONN_UP,
+					      RDS_CONN_DISCONNECTING) &&
+		    !rds_conn_path_transition(cp, RDS_CONN_ERROR,
+					      RDS_CONN_DISCONNECTING)) {
+			rds_conn_path_error(cp,
+					    "shutdown called in state %d\n",
+					    atomic_read(&cp->cp_state));
+			mutex_unlock(&cp->cp_cm_lock);
 			return;
 		}
-		mutex_unlock(&conn->c_cm_lock);
+		mutex_unlock(&cp->cp_cm_lock);
 
-		wait_event(conn->c_waitq,
-			   !test_bit(RDS_IN_XMIT, &conn->c_flags));
-		wait_event(conn->c_waitq,
-			   !test_bit(RDS_RECV_REFILL, &conn->c_flags));
+		wait_event(cp->cp_waitq,
+			   !test_bit(RDS_IN_XMIT, &cp->cp_flags));
+		wait_event(cp->cp_waitq,
+			   !test_bit(RDS_RECV_REFILL, &cp->cp_flags));
 
-		conn->c_trans->conn_shutdown(conn);
-		rds_conn_reset(conn);
+		conn->c_trans->conn_path_shutdown(cp);
+		rds_conn_path_reset(cp);
 
-		if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
+		if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
+					      RDS_CONN_DOWN)) {
 			/* This can happen - eg when we're in the middle of tearing
 			 * down the connection, and someone unloads the rds module.
 			 * Quite reproduceable with loopback connections.
 			 * Mostly harmless.
 			 */
-			rds_conn_error(conn,
-				"%s: failed to transition to state DOWN, "
-				"current state is %d\n",
-				__func__,
-				atomic_read(&conn->c_state));
+			rds_conn_path_error(cp, "%s: failed to transition "
+					    "to state DOWN, current state "
+					    "is %d\n", __func__,
+					    atomic_read(&cp->cp_state));
 			return;
 		}
 	}
@@ -315,18 +349,47 @@ void rds_conn_shutdown(struct rds_connection *conn)
 	 * The passive side of an IB loopback connection is never added
 	 * to the conn hash, so we never trigger a reconnect on this
 	 * conn - the reconnect is always triggered by the active peer. */
-	cancel_delayed_work_sync(&conn->c_conn_w);
+	cancel_delayed_work_sync(&cp->cp_conn_w);
 	rcu_read_lock();
 	if (!hlist_unhashed(&conn->c_hash_node)) {
 		rcu_read_unlock();
-		if (conn->c_trans->t_type != RDS_TRANS_TCP ||
-		    conn->c_outgoing == 1)
-			rds_queue_reconnect(conn);
+		rds_queue_reconnect(cp);
 	} else {
 		rcu_read_unlock();
 	}
 }
 
+/* destroy a single rds_conn_path. rds_conn_destroy() iterates over
+ * all paths using rds_conn_path_destroy()
+ */
+static void rds_conn_path_destroy(struct rds_conn_path *cp)
+{
+	struct rds_message *rm, *rtmp;
+
+	if (!cp->cp_transport_data)
+		return;
+
+	rds_conn_path_drop(cp);
+	flush_work(&cp->cp_down_w);
+
+	/* make sure lingering queued work won't try to ref the conn */
+	cancel_delayed_work_sync(&cp->cp_send_w);
+	cancel_delayed_work_sync(&cp->cp_recv_w);
+
+	/* tear down queued messages */
+	list_for_each_entry_safe(rm, rtmp,
+				 &cp->cp_send_queue,
+				 m_conn_item) {
+		list_del_init(&rm->m_conn_item);
+		BUG_ON(!list_empty(&rm->m_sock_item));
+		rds_message_put(rm);
+	}
+	if (cp->cp_xmit_rm)
+		rds_message_put(cp->cp_xmit_rm);
+
+	cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
+}
+
 /*
  * Stop and free a connection.
  *
@@ -336,8 +399,9 @@ void rds_conn_shutdown(struct rds_connection *conn)
  */
 void rds_conn_destroy(struct rds_connection *conn)
 {
-	struct rds_message *rm, *rtmp;
 	unsigned long flags;
+	int i;
+	struct rds_conn_path *cp;
 
 	rdsdebug("freeing conn %p for %pI4 -> "
 		 "%pI4\n", conn, &conn->c_laddr,
@@ -350,25 +414,11 @@ void rds_conn_destroy(struct rds_connection *conn)
 	synchronize_rcu();
 
 	/* shut the connection down */
-	rds_conn_drop(conn);
-	flush_work(&conn->c_down_w);
-
-	/* make sure lingering queued work won't try to ref the conn */
-	cancel_delayed_work_sync(&conn->c_send_w);
-	cancel_delayed_work_sync(&conn->c_recv_w);
-
-	/* tear down queued messages */
-	list_for_each_entry_safe(rm, rtmp,
-				 &conn->c_send_queue,
-				 m_conn_item) {
-		list_del_init(&rm->m_conn_item);
-		BUG_ON(!list_empty(&rm->m_sock_item));
-		rds_message_put(rm);
+	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+		cp = &conn->c_path[i];
+		rds_conn_path_destroy(cp);
+		BUG_ON(!list_empty(&cp->cp_retrans));
 	}
-	if (conn->c_xmit_rm)
-		rds_message_put(conn->c_xmit_rm);
-
-	conn->c_trans->conn_free(conn->c_transport_data);
 
 	/*
 	 * The congestion maps aren't freed up here.  They're
@@ -377,7 +427,6 @@ void rds_conn_destroy(struct rds_connection *conn)
 	 */
 	rds_cong_remove_conn(conn);
 
-	BUG_ON(!list_empty(&conn->c_retrans));
 	kmem_cache_free(rds_conn_slab, conn);
 
 	spin_lock_irqsave(&rds_conn_lock, flags);
@@ -398,6 +447,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 	unsigned int total = 0;
 	unsigned long flags;
 	size_t i;
+	int j;
 
 	len /= sizeof(struct rds_info_message);
 
@@ -406,23 +456,32 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
 	     i++, head++) {
 		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
-			if (want_send)
-				list = &conn->c_send_queue;
-			else
-				list = &conn->c_retrans;
-
-			spin_lock_irqsave(&conn->c_lock, flags);
-
-			/* XXX too lazy to maintain counts.. */
-			list_for_each_entry(rm, list, m_conn_item) {
-				total++;
-				if (total <= len)
-					rds_inc_info_copy(&rm->m_inc, iter,
-							  conn->c_laddr,
-							  conn->c_faddr, 0);
+			struct rds_conn_path *cp;
+
+			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+				cp = &conn->c_path[j];
+				if (want_send)
+					list = &cp->cp_send_queue;
+				else
+					list = &cp->cp_retrans;
+
+				spin_lock_irqsave(&cp->cp_lock, flags);
+
+				/* XXX too lazy to maintain counts.. */
+				list_for_each_entry(rm, list, m_conn_item) {
+					total++;
+					if (total <= len)
+						rds_inc_info_copy(&rm->m_inc,
+								  iter,
+								  conn->c_laddr,
+								  conn->c_faddr,
+								  0);
+				}
+
+				spin_unlock_irqrestore(&cp->cp_lock, flags);
+				if (!conn->c_trans->t_mp_capable)
+					break;
 			}
-
-			spin_unlock_irqrestore(&conn->c_lock, flags);
 		}
 	}
 	rcu_read_unlock();
@@ -484,27 +543,72 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 }
 EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
 
-static int rds_conn_info_visitor(struct rds_connection *conn,
-				  void *buffer)
+void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
+			     struct rds_info_iterator *iter,
+			     struct rds_info_lengths *lens,
+			     int (*visitor)(struct rds_conn_path *, void *),
+			     size_t item_len)
+{
+	u64  buffer[(item_len + 7) / 8];
+	struct hlist_head *head;
+	struct rds_connection *conn;
+	size_t i;
+	int j;
+
+	rcu_read_lock();
+
+	lens->nr = 0;
+	lens->each = item_len;
+
+	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
+	     i++, head++) {
+		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
+			struct rds_conn_path *cp;
+
+			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+				cp = &conn->c_path[j];
+
+				/* XXX no cp_lock usage.. */
+				if (!visitor(cp, buffer))
+					continue;
+				if (!conn->c_trans->t_mp_capable)
+					break;
+			}
+
+			/* We copy as much as we can fit in the buffer,
+			 * but we count all items so that the caller
+			 * can resize the buffer.
+			 */
+			if (len >= item_len) {
+				rds_info_copy(iter, buffer, item_len);
+				len -= item_len;
+			}
+			lens->nr++;
+		}
+	}
+	rcu_read_unlock();
+}
+
+static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
 {
 	struct rds_info_connection *cinfo = buffer;
 
-	cinfo->next_tx_seq = conn->c_next_tx_seq;
-	cinfo->next_rx_seq = conn->c_next_rx_seq;
-	cinfo->laddr = conn->c_laddr;
-	cinfo->faddr = conn->c_faddr;
-	strncpy(cinfo->transport, conn->c_trans->t_name,
+	cinfo->next_tx_seq = cp->cp_next_tx_seq;
+	cinfo->next_rx_seq = cp->cp_next_rx_seq;
+	cinfo->laddr = cp->cp_conn->c_laddr;
+	cinfo->faddr = cp->cp_conn->c_faddr;
+	strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name,
 		sizeof(cinfo->transport));
 	cinfo->flags = 0;
 
-	rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
+	rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags),
 			  SENDING);
 	/* XXX Future: return the state rather than these funky bits */
 	rds_conn_info_set(cinfo->flags,
-			  atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
+			  atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING,
 			  CONNECTING);
 	rds_conn_info_set(cinfo->flags,
-			  atomic_read(&conn->c_state) == RDS_CONN_UP,
+			  atomic_read(&cp->cp_state) == RDS_CONN_UP,
 			  CONNECTED);
 	return 1;
 }
@@ -513,7 +617,7 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens)
 {
-	rds_for_each_conn_info(sock, len, iter, lens,
+	rds_walk_conn_path_info(sock, len, iter, lens,
 				rds_conn_info_visitor,
 				sizeof(struct rds_info_connection));
 }
@@ -553,10 +657,17 @@ void rds_conn_exit(void)
 /*
  * Force a disconnect
  */
+void rds_conn_path_drop(struct rds_conn_path *cp)
+{
+	atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+	queue_work(rds_wq, &cp->cp_down_w);
+}
+EXPORT_SYMBOL_GPL(rds_conn_path_drop);
+
 void rds_conn_drop(struct rds_connection *conn)
 {
-	atomic_set(&conn->c_state, RDS_CONN_ERROR);
-	queue_work(rds_wq, &conn->c_down_w);
+	WARN_ON(conn->c_trans->t_mp_capable);
+	rds_conn_path_drop(&conn->c_path[0]);
 }
 EXPORT_SYMBOL_GPL(rds_conn_drop);
 
@@ -564,11 +675,17 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
  * If the connection is down, trigger a connect. We may have scheduled a
  * delayed reconnect however - in this case we should not interfere.
  */
+void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
+{
+	if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
+	    !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
+		queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+}
+
 void rds_conn_connect_if_down(struct rds_connection *conn)
 {
-	if (rds_conn_state(conn) == RDS_CONN_DOWN &&
-	    !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
-		queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
+	WARN_ON(conn->c_trans->t_mp_capable);
+	rds_conn_path_connect_if_down(&conn->c_path[0]);
 }
 EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
 
@@ -586,3 +703,15 @@ __rds_conn_error(struct rds_connection *conn, const char *fmt, ...)
 
 	rds_conn_drop(conn);
 }
+
+void
+__rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vprintk(fmt, ap);
+	va_end(ap);
+
+	rds_conn_path_drop(cp);
+}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index b5342fddaf98..7eaf887e46f8 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -40,6 +40,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 
+#include "rds_single_path.h"
 #include "rds.h"
 #include "ib.h"
 #include "ib_mr.h"
@@ -380,15 +381,15 @@ void rds_ib_exit(void)
 
 struct rds_transport rds_ib_transport = {
 	.laddr_check		= rds_ib_laddr_check,
-	.xmit_complete		= rds_ib_xmit_complete,
+	.xmit_path_complete	= rds_ib_xmit_path_complete,
 	.xmit			= rds_ib_xmit,
 	.xmit_rdma		= rds_ib_xmit_rdma,
 	.xmit_atomic		= rds_ib_xmit_atomic,
-	.recv			= rds_ib_recv,
+	.recv_path		= rds_ib_recv_path,
 	.conn_alloc		= rds_ib_conn_alloc,
 	.conn_free		= rds_ib_conn_free,
-	.conn_connect		= rds_ib_conn_connect,
-	.conn_shutdown		= rds_ib_conn_shutdown,
+	.conn_path_connect	= rds_ib_conn_path_connect,
+	.conn_path_shutdown	= rds_ib_conn_path_shutdown,
 	.inc_copy_to_user	= rds_ib_inc_copy_to_user,
 	.inc_free		= rds_ib_inc_free,
 	.cm_initiate_connect	= rds_ib_cm_initiate_connect,
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 627fb79aee65..046f7508c06b 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -328,8 +328,8 @@ extern struct list_head ib_nodev_conns;
 /* ib_cm.c */
 int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
 void rds_ib_conn_free(void *arg);
-int rds_ib_conn_connect(struct rds_connection *conn);
-void rds_ib_conn_shutdown(struct rds_connection *conn);
+int rds_ib_conn_path_connect(struct rds_conn_path *cp);
+void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
 void rds_ib_state_change(struct sock *sk);
 int rds_ib_listen_init(void);
 void rds_ib_listen_stop(void);
@@ -354,7 +354,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
 /* ib_recv.c */
 int rds_ib_recv_init(void);
 void rds_ib_recv_exit(void);
-int rds_ib_recv(struct rds_connection *conn);
+int rds_ib_recv_path(struct rds_conn_path *conn);
 int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
@@ -384,7 +384,7 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
 extern wait_queue_head_t rds_ib_ring_empty_wait;
 
 /* ib_send.c */
-void rds_ib_xmit_complete(struct rds_connection *conn);
+void rds_ib_xmit_path_complete(struct rds_conn_path *cp);
 int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 		unsigned int hdr_off, unsigned int sg, unsigned int off);
 void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 310cabce2311..5b2ab95afa07 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -36,6 +36,7 @@
 #include <linux/vmalloc.h>
 #include <linux/ratelimit.h>
 
+#include "rds_single_path.h"
 #include "rds.h"
 #include "ib.h"
 
@@ -111,7 +112,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
 		}
 	}
 
-	if (conn->c_version < RDS_PROTOCOL(3,1)) {
+	if (conn->c_version < RDS_PROTOCOL(3, 1)) {
 		printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
 		       " no longer supported\n",
 		       &conn->c_faddr,
@@ -273,7 +274,7 @@ static void rds_ib_tasklet_fn_send(unsigned long data)
 	if (rds_conn_up(conn) &&
 	    (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
 	    test_bit(0, &conn->c_map_queued)))
-		rds_send_xmit(ic->conn);
+		rds_send_xmit(&ic->conn->c_path[0]);
 }
 
 static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq,
@@ -684,8 +685,9 @@ out:
 	return ret;
 }
 
-int rds_ib_conn_connect(struct rds_connection *conn)
+int rds_ib_conn_path_connect(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	struct sockaddr_in src, dest;
 	int ret;
@@ -730,8 +732,9 @@ out:
  * so that it can be called at any point during startup.  In fact it
  * can be called multiple times for a given connection.
  */
-void rds_ib_conn_shutdown(struct rds_connection *conn)
+void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	int err = 0;
 
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index f7164ac1ffc1..977f69886c00 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -35,6 +35,7 @@
 #include <linux/rculist.h>
 #include <linux/llist.h>
 
+#include "rds_single_path.h"
 #include "ib_mr.h"
 
 struct workqueue_struct *rds_ib_mr_wq;
@@ -618,7 +619,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
 
 int rds_ib_mr_init(void)
 {
-	rds_ib_mr_wq = create_workqueue("rds_mr_flushd");
+	rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM, 0);
 	if (!rds_ib_mr_wq)
 		return -ENOMEM;
 	return 0;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index abc8cc805e8d..606a11f681d2 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -36,6 +36,7 @@
 #include <linux/dma-mapping.h>
 #include <rdma/rdma_cm.h>
 
+#include "rds_single_path.h"
 #include "rds.h"
 #include "ib.h"
 
@@ -1008,8 +1009,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
 		rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
 }
 
-int rds_ib_recv(struct rds_connection *conn)
+int rds_ib_recv_path(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	int ret = 0;
 
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index f27d2c82b036..84d90c97332f 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -36,6 +36,7 @@
 #include <linux/dmapool.h>
 #include <linux/ratelimit.h>
 
+#include "rds_single_path.h"
 #include "rds.h"
 #include "ib.h"
 
@@ -979,8 +980,9 @@ out:
 	return ret;
 }
 
-void rds_ib_xmit_complete(struct rds_connection *conn)
+void rds_ib_xmit_path_complete(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We may have a pending ACK or window update we were unable
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 6b12b68541ae..f2bf78de5688 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/in.h>
 
+#include "rds_single_path.h"
 #include "rds.h"
 #include "loop.h"
 
@@ -95,12 +96,13 @@ out:
  */
 static void rds_loop_inc_free(struct rds_incoming *inc)
 {
-        struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
-        rds_message_put(rm);
+	struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
+
+	rds_message_put(rm);
 }
 
 /* we need to at least give the thread something to succeed */
-static int rds_loop_recv(struct rds_connection *conn)
+static int rds_loop_recv_path(struct rds_conn_path *cp)
 {
 	return 0;
 }
@@ -148,13 +150,13 @@ static void rds_loop_conn_free(void *arg)
 	kfree(lc);
 }
 
-static int rds_loop_conn_connect(struct rds_connection *conn)
+static int rds_loop_conn_path_connect(struct rds_conn_path *cp)
 {
-	rds_connect_complete(conn);
+	rds_connect_complete(cp->cp_conn);
 	return 0;
 }
 
-static void rds_loop_conn_shutdown(struct rds_connection *conn)
+static void rds_loop_conn_path_shutdown(struct rds_conn_path *cp)
 {
 }
 
@@ -183,11 +185,11 @@ void rds_loop_exit(void)
  */
 struct rds_transport rds_loop_transport = {
 	.xmit			= rds_loop_xmit,
-	.recv			= rds_loop_recv,
+	.recv_path		= rds_loop_recv_path,
 	.conn_alloc		= rds_loop_conn_alloc,
 	.conn_free		= rds_loop_conn_free,
-	.conn_connect		= rds_loop_conn_connect,
-	.conn_shutdown		= rds_loop_conn_shutdown,
+	.conn_path_connect	= rds_loop_conn_path_connect,
+	.conn_path_shutdown	= rds_loop_conn_path_shutdown,
 	.inc_copy_to_user	= rds_message_inc_copy_to_user,
 	.inc_free		= rds_loop_inc_free,
 	.t_name			= "loopback",
diff --git a/net/rds/message.c b/net/rds/message.c
index 756c73729126..6cb91061556a 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -41,6 +41,7 @@ static unsigned int	rds_exthdr_size[__RDS_EXTHDR_MAX] = {
 [RDS_EXTHDR_VERSION]	= sizeof(struct rds_ext_header_version),
 [RDS_EXTHDR_RDMA]	= sizeof(struct rds_ext_header_rdma),
 [RDS_EXTHDR_RDMA_DEST]	= sizeof(struct rds_ext_header_rdma_dest),
+[RDS_EXTHDR_NPATHS]	= sizeof(u16),
 };
 
 
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 7220bebcf558..345f09059e9f 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <rdma/rdma_cm.h>
 
+#include "rds_single_path.h"
 #include "rdma_transport.h"
 #include "ib.h"
 
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 387df5f32e49..b2d17f0fafa8 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -84,56 +84,73 @@ enum {
 #define RDS_IN_XMIT		2
 #define RDS_RECV_REFILL		3
 
+/* Max number of multipaths per RDS connection. Must be a power of 2 */
+#define	RDS_MPATH_WORKERS	8
+#define	RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
+			       (rs)->rs_hash_initval) & ((n) - 1))
+
+/* Per mpath connection state */
+struct rds_conn_path {
+	struct rds_connection	*cp_conn;
+	struct rds_message	*cp_xmit_rm;
+	unsigned long		cp_xmit_sg;
+	unsigned int		cp_xmit_hdr_off;
+	unsigned int		cp_xmit_data_off;
+	unsigned int		cp_xmit_atomic_sent;
+	unsigned int		cp_xmit_rdma_sent;
+	unsigned int		cp_xmit_data_sent;
+
+	spinlock_t		cp_lock;		/* protect msg queues */
+	u64			cp_next_tx_seq;
+	struct list_head	cp_send_queue;
+	struct list_head	cp_retrans;
+
+	u64			cp_next_rx_seq;
+
+	void			*cp_transport_data;
+
+	atomic_t		cp_state;
+	unsigned long		cp_send_gen;
+	unsigned long		cp_flags;
+	unsigned long		cp_reconnect_jiffies;
+	struct delayed_work	cp_send_w;
+	struct delayed_work	cp_recv_w;
+	struct delayed_work	cp_conn_w;
+	struct work_struct	cp_down_w;
+	struct mutex		cp_cm_lock;	/* protect cp_state & cm */
+	wait_queue_head_t	cp_waitq;
+
+	unsigned int		cp_unacked_packets;
+	unsigned int		cp_unacked_bytes;
+	unsigned int		cp_outgoing:1,
+				cp_pad_to_32:31;
+	unsigned int		cp_index;
+};
+
+/* One rds_connection per RDS address pair */
 struct rds_connection {
 	struct hlist_node	c_hash_node;
 	__be32			c_laddr;
 	__be32			c_faddr;
 	unsigned int		c_loopback:1,
-				c_outgoing:1,
+				c_ping_triggered:1,
 				c_pad_to_32:30;
+	int			c_npaths;
 	struct rds_connection	*c_passive;
+	struct rds_transport	*c_trans;
 
 	struct rds_cong_map	*c_lcong;
 	struct rds_cong_map	*c_fcong;
 
-	struct rds_message	*c_xmit_rm;
-	unsigned long		c_xmit_sg;
-	unsigned int		c_xmit_hdr_off;
-	unsigned int		c_xmit_data_off;
-	unsigned int		c_xmit_atomic_sent;
-	unsigned int		c_xmit_rdma_sent;
-	unsigned int		c_xmit_data_sent;
-
-	spinlock_t		c_lock;		/* protect msg queues */
-	u64			c_next_tx_seq;
-	struct list_head	c_send_queue;
-	struct list_head	c_retrans;
-
-	u64			c_next_rx_seq;
-
-	struct rds_transport	*c_trans;
-	void			*c_transport_data;
-
-	atomic_t		c_state;
-	unsigned long		c_send_gen;
-	unsigned long		c_flags;
-	unsigned long		c_reconnect_jiffies;
-	struct delayed_work	c_send_w;
-	struct delayed_work	c_recv_w;
-	struct delayed_work	c_conn_w;
-	struct work_struct	c_down_w;
-	struct mutex		c_cm_lock;	/* protect conn state & cm */
-	wait_queue_head_t	c_waitq;
+	/* Protocol version */
+	unsigned int		c_version;
+	possible_net_t		c_net;
 
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
 
-	unsigned int		c_unacked_packets;
-	unsigned int		c_unacked_bytes;
-
-	/* Protocol version */
-	unsigned int		c_version;
-	possible_net_t		c_net;
+	struct rds_conn_path	c_path[RDS_MPATH_WORKERS];
+	wait_queue_head_t	c_hs_waitq; /* handshake waitq */
 };
 
 static inline
@@ -153,6 +170,17 @@ void rds_conn_net_set(struct rds_connection *conn, struct net *net)
 #define RDS_FLAG_RETRANSMITTED	0x04
 #define RDS_MAX_ADV_CREDIT	255
 
+/* RDS_FLAG_PROBE_PORT is the reserved sport used for sending a ping
+ * probe to exchange control information before establishing a connection.
+ * Currently the control information that is exchanged is the number of
+ * supported paths. If the peer is a legacy (older kernel revision) peer,
+ * it would return a pong message without additional control information
+ * that would then alert the sender that the peer was an older rev.
+ */
+#define RDS_FLAG_PROBE_PORT	1
+#define	RDS_HS_PROBE(sport, dport) \
+		((sport == RDS_FLAG_PROBE_PORT && dport == 0) || \
+		 (sport == 0 && dport == RDS_FLAG_PROBE_PORT))
 /*
  * Maximum space available for extension headers.
  */
@@ -212,12 +240,18 @@ struct rds_ext_header_rdma_dest {
 	__be32			h_rdma_offset;
 };
 
+/* Extension header announcing number of paths.
+ * Implicit length = 2 bytes.
+ */
+#define RDS_EXTHDR_NPATHS	4
+
 #define __RDS_EXTHDR_MAX	16 /* for now */
 
 struct rds_incoming {
 	atomic_t		i_refcount;
 	struct list_head	i_item;
 	struct rds_connection	*i_conn;
+	struct rds_conn_path	*i_conn_path;
 	struct rds_header	i_hdr;
 	unsigned long		i_rx_jiffies;
 	__be32			i_saddr;
@@ -433,21 +467,22 @@ struct rds_transport {
 	char			t_name[TRANSNAMSIZ];
 	struct list_head	t_item;
 	struct module		*t_owner;
-	unsigned int		t_prefer_loopback:1;
+	unsigned int		t_prefer_loopback:1,
+				t_mp_capable:1;
 	unsigned int		t_type;
 
 	int (*laddr_check)(struct net *net, __be32 addr);
 	int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
 	void (*conn_free)(void *data);
-	int (*conn_connect)(struct rds_connection *conn);
-	void (*conn_shutdown)(struct rds_connection *conn);
-	void (*xmit_prepare)(struct rds_connection *conn);
-	void (*xmit_complete)(struct rds_connection *conn);
+	int (*conn_path_connect)(struct rds_conn_path *cp);
+	void (*conn_path_shutdown)(struct rds_conn_path *conn);
+	void (*xmit_path_prepare)(struct rds_conn_path *cp);
+	void (*xmit_path_complete)(struct rds_conn_path *cp);
 	int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
 		    unsigned int hdr_off, unsigned int sg, unsigned int off);
 	int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
 	int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
-	int (*recv)(struct rds_connection *conn);
+	int (*recv_path)(struct rds_conn_path *cp);
 	int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to);
 	void (*inc_free)(struct rds_incoming *inc);
 
@@ -530,6 +565,7 @@ struct rds_sock {
 	/* Socket options - in case there will be more */
 	unsigned char		rs_recverr,
 				rs_cong_monitor;
+	u32			rs_hash_initval;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -636,10 +672,12 @@ struct rds_connection *rds_conn_create(struct net *net,
 struct rds_connection *rds_conn_create_outgoing(struct net *net,
 						__be32 laddr, __be32 faddr,
 			       struct rds_transport *trans, gfp_t gfp);
-void rds_conn_shutdown(struct rds_connection *conn);
+void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
+void rds_conn_path_drop(struct rds_conn_path *cpath);
 void rds_conn_connect_if_down(struct rds_connection *conn);
+void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens,
@@ -650,28 +688,60 @@ void __rds_conn_error(struct rds_connection *conn, const char *, ...);
 #define rds_conn_error(conn, fmt...) \
 	__rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
 
+void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
+#define rds_conn_path_error(cp, fmt...) \
+	__rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt)
+
+static inline int
+rds_conn_path_transition(struct rds_conn_path *cp, int old, int new)
+{
+	return atomic_cmpxchg(&cp->cp_state, old, new) == old;
+}
+
 static inline int
 rds_conn_transition(struct rds_connection *conn, int old, int new)
 {
-	return atomic_cmpxchg(&conn->c_state, old, new) == old;
+	WARN_ON(conn->c_trans->t_mp_capable);
+	return rds_conn_path_transition(&conn->c_path[0], old, new);
+}
+
+static inline int
+rds_conn_path_state(struct rds_conn_path *cp)
+{
+	return atomic_read(&cp->cp_state);
 }
 
 static inline int
 rds_conn_state(struct rds_connection *conn)
 {
-	return atomic_read(&conn->c_state);
+	WARN_ON(conn->c_trans->t_mp_capable);
+	return rds_conn_path_state(&conn->c_path[0]);
+}
+
+static inline int
+rds_conn_path_up(struct rds_conn_path *cp)
+{
+	return atomic_read(&cp->cp_state) == RDS_CONN_UP;
 }
 
 static inline int
 rds_conn_up(struct rds_connection *conn)
 {
-	return atomic_read(&conn->c_state) == RDS_CONN_UP;
+	WARN_ON(conn->c_trans->t_mp_capable);
+	return rds_conn_path_up(&conn->c_path[0]);
+}
+
+static inline int
+rds_conn_path_connecting(struct rds_conn_path *cp)
+{
+	return atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING;
 }
 
 static inline int
 rds_conn_connecting(struct rds_connection *conn)
 {
-	return atomic_read(&conn->c_state) == RDS_CONN_CONNECTING;
+	WARN_ON(conn->c_trans->t_mp_capable);
+	return rds_conn_path_connecting(&conn->c_path[0]);
 }
 
 /* message.c */
@@ -720,6 +790,8 @@ void rds_page_exit(void);
 /* recv.c */
 void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
 		  __be32 saddr);
+void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn,
+		       __be32 saddr);
 void rds_inc_put(struct rds_incoming *inc);
 void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 		       struct rds_incoming *inc, gfp_t gfp);
@@ -733,16 +805,16 @@ void rds_inc_info_copy(struct rds_incoming *inc,
 
 /* send.c */
 int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
-void rds_send_reset(struct rds_connection *conn);
-int rds_send_xmit(struct rds_connection *conn);
+void rds_send_path_reset(struct rds_conn_path *conn);
+int rds_send_xmit(struct rds_conn_path *cp);
 struct sockaddr_in;
 void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
 typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
 void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
 			 is_acked_func is_acked);
-int rds_send_pong(struct rds_connection *conn, __be16 dport);
-struct rds_message *rds_send_get_message(struct rds_connection *,
-					 struct rm_rdma_op *);
+void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack,
+			      is_acked_func is_acked);
+int rds_send_pong(struct rds_conn_path *cp, __be16 dport);
 
 /* rdma.c */
 void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
@@ -809,12 +881,12 @@ extern unsigned int  rds_sysctl_trace_level;
 int rds_threads_init(void);
 void rds_threads_exit(void);
 extern struct workqueue_struct *rds_wq;
-void rds_queue_reconnect(struct rds_connection *conn);
+void rds_queue_reconnect(struct rds_conn_path *cp);
 void rds_connect_worker(struct work_struct *);
 void rds_shutdown_worker(struct work_struct *);
 void rds_send_worker(struct work_struct *);
 void rds_recv_worker(struct work_struct *);
-void rds_connect_path_complete(struct rds_connection *conn, int curr);
+void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
 void rds_connect_complete(struct rds_connection *conn);
 
 /* transport.c */
diff --git a/net/rds/rds_single_path.h b/net/rds/rds_single_path.h
new file mode 100644
index 000000000000..e1241af7c1ad
--- /dev/null
+++ b/net/rds/rds_single_path.h
@@ -0,0 +1,30 @@
+#ifndef _RDS_RDS_SINGLE_H
+#define _RDS_RDS_SINGLE_H
+
+#define	c_xmit_rm		c_path[0].cp_xmit_rm
+#define	c_xmit_sg		c_path[0].cp_xmit_sg
+#define	c_xmit_hdr_off		c_path[0].cp_xmit_hdr_off
+#define	c_xmit_data_off		c_path[0].cp_xmit_data_off
+#define	c_xmit_atomic_sent	c_path[0].cp_xmit_atomic_sent
+#define	c_xmit_rdma_sent	c_path[0].cp_xmit_rdma_sent
+#define	c_xmit_data_sent	c_path[0].cp_xmit_data_sent
+#define	c_lock			c_path[0].cp_lock
+#define c_next_tx_seq		c_path[0].cp_next_tx_seq
+#define c_send_queue		c_path[0].cp_send_queue
+#define c_retrans		c_path[0].cp_retrans
+#define c_next_rx_seq		c_path[0].cp_next_rx_seq
+#define c_transport_data	c_path[0].cp_transport_data
+#define c_state			c_path[0].cp_state
+#define c_send_gen		c_path[0].cp_send_gen
+#define c_flags			c_path[0].cp_flags
+#define c_reconnect_jiffies	c_path[0].cp_reconnect_jiffies
+#define c_send_w		c_path[0].cp_send_w
+#define c_recv_w		c_path[0].cp_recv_w
+#define c_conn_w		c_path[0].cp_conn_w
+#define c_down_w		c_path[0].cp_down_w
+#define c_cm_lock		c_path[0].cp_cm_lock
+#define c_waitq			c_path[0].cp_waitq
+#define c_unacked_packets	c_path[0].cp_unacked_packets
+#define c_unacked_bytes		c_path[0].cp_unacked_bytes
+
+#endif /* _RDS_RDS_SINGLE_H */
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 8413f6c99e13..cbfabdf3ff48 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -53,6 +53,20 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
 }
 EXPORT_SYMBOL_GPL(rds_inc_init);
 
+void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
+		       __be32 saddr)
+{
+	atomic_set(&inc->i_refcount, 1);
+	INIT_LIST_HEAD(&inc->i_item);
+	inc->i_conn = cp->cp_conn;
+	inc->i_conn_path = cp;
+	inc->i_saddr = saddr;
+	inc->i_rdma_cookie = 0;
+	inc->i_rx_tstamp.tv_sec = 0;
+	inc->i_rx_tstamp.tv_usec = 0;
+}
+EXPORT_SYMBOL_GPL(rds_inc_path_init);
+
 static void rds_inc_addref(struct rds_incoming *inc)
 {
 	rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
@@ -142,6 +156,67 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock
 	}
 }
 
+static void rds_recv_hs_exthdrs(struct rds_header *hdr,
+				struct rds_connection *conn)
+{
+	unsigned int pos = 0, type, len;
+	union {
+		struct rds_ext_header_version version;
+		u16 rds_npaths;
+	} buffer;
+
+	while (1) {
+		len = sizeof(buffer);
+		type = rds_message_next_extension(hdr, &pos, &buffer, &len);
+		if (type == RDS_EXTHDR_NONE)
+			break;
+		/* Process extension header here */
+		switch (type) {
+		case RDS_EXTHDR_NPATHS:
+			conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
+					       buffer.rds_npaths);
+			break;
+		default:
+			pr_warn_ratelimited("ignoring unknown exthdr type "
+					     "0x%x\n", type);
+		}
+	}
+	/* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
+	conn->c_npaths = max_t(int, conn->c_npaths, 1);
+}
+
+/* rds_start_mprds() will synchronously start multiple paths when appropriate.
+ * The scheme is based on the following rules:
+ *
+ * 1. rds_sendmsg on first connect attempt sends the probe ping, with the
+ *    sender's npaths (s_npaths)
+ * 2. rcvr of probe-ping knows the mprds_paths = min(s_npaths, r_npaths). It
+ *    sends back a probe-pong with r_npaths. After that, if rcvr is the
+ *    smaller ip addr, it starts rds_conn_path_connect_if_down on all
+ *    mprds_paths.
+ * 3. sender gets woken up, and can move to rds_conn_path_connect_if_down.
+ *    If it is the smaller ipaddr, rds_conn_path_connect_if_down can be
+ *    called after reception of the probe-pong on all mprds_paths.
+ *    Otherwise (sender of probe-ping is not the smaller ip addr): just call
+ *    rds_conn_path_connect_if_down on the hashed path. (see rule 4)
+ * 4. when cp_index > 0, rds_connect_worker must only trigger
+ *    a connection if laddr < faddr.
+ * 5. sender may end up queuing the packet on the cp. will get sent out later.
+ *    when connection is completed.
+ */
+static void rds_start_mprds(struct rds_connection *conn)
+{
+	int i;
+	struct rds_conn_path *cp;
+
+	if (conn->c_npaths > 1 && conn->c_laddr < conn->c_faddr) {
+		for (i = 1; i < conn->c_npaths; i++) {
+			cp = &conn->c_path[i];
+			rds_conn_path_connect_if_down(cp);
+		}
+	}
+}
+
 /*
  * The transport must make sure that this is serialized against other
  * rx and conn reset on this specific conn.
@@ -164,13 +239,18 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 	struct rds_sock *rs = NULL;
 	struct sock *sk;
 	unsigned long flags;
+	struct rds_conn_path *cp;
 
 	inc->i_conn = conn;
 	inc->i_rx_jiffies = jiffies;
+	if (conn->c_trans->t_mp_capable)
+		cp = inc->i_conn_path;
+	else
+		cp = &conn->c_path[0];
 
 	rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u "
 		 "flags 0x%x rx_jiffies %lu\n", conn,
-		 (unsigned long long)conn->c_next_rx_seq,
+		 (unsigned long long)cp->cp_next_rx_seq,
 		 inc,
 		 (unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence),
 		 be32_to_cpu(inc->i_hdr.h_len),
@@ -199,16 +279,34 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 	 * XXX we could spend more on the wire to get more robust failure
 	 * detection, arguably worth it to avoid data corruption.
 	 */
-	if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq &&
+	if (be64_to_cpu(inc->i_hdr.h_sequence) < cp->cp_next_rx_seq &&
 	    (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) {
 		rds_stats_inc(s_recv_drop_old_seq);
 		goto out;
 	}
-	conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
+	cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
 
 	if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
+		if (inc->i_hdr.h_sport == 0) {
+			rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr);
+			goto out;
+		}
 		rds_stats_inc(s_recv_ping);
-		rds_send_pong(conn, inc->i_hdr.h_sport);
+		rds_send_pong(cp, inc->i_hdr.h_sport);
+		/* if this is a handshake ping, start multipath if necessary */
+		if (RDS_HS_PROBE(inc->i_hdr.h_sport, inc->i_hdr.h_dport)) {
+			rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
+			rds_start_mprds(cp->cp_conn);
+		}
+		goto out;
+	}
+
+	if (inc->i_hdr.h_dport ==  RDS_FLAG_PROBE_PORT &&
+	    inc->i_hdr.h_sport == 0) {
+		rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
+		/* if this is a handshake pong, start multipath if necessary */
+		rds_start_mprds(cp->cp_conn);
+		wake_up(&cp->cp_conn->c_hs_waitq);
 		goto out;
 	}
 
diff --git a/net/rds/send.c b/net/rds/send.c
index b1962f8e30f7..896626b9a0ef 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -62,14 +62,14 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status);
  * Reset the send state.  Callers must ensure that this doesn't race with
  * rds_send_xmit().
  */
-void rds_send_reset(struct rds_connection *conn)
+void rds_send_path_reset(struct rds_conn_path *cp)
 {
 	struct rds_message *rm, *tmp;
 	unsigned long flags;
 
-	if (conn->c_xmit_rm) {
-		rm = conn->c_xmit_rm;
-		conn->c_xmit_rm = NULL;
+	if (cp->cp_xmit_rm) {
+		rm = cp->cp_xmit_rm;
+		cp->cp_xmit_rm = NULL;
 		/* Tell the user the RDMA op is no longer mapped by the
 		 * transport. This isn't entirely true (it's flushed out
 		 * independently) but as the connection is down, there's
@@ -78,37 +78,37 @@ void rds_send_reset(struct rds_connection *conn)
 		rds_message_put(rm);
 	}
 
-	conn->c_xmit_sg = 0;
-	conn->c_xmit_hdr_off = 0;
-	conn->c_xmit_data_off = 0;
-	conn->c_xmit_atomic_sent = 0;
-	conn->c_xmit_rdma_sent = 0;
-	conn->c_xmit_data_sent = 0;
+	cp->cp_xmit_sg = 0;
+	cp->cp_xmit_hdr_off = 0;
+	cp->cp_xmit_data_off = 0;
+	cp->cp_xmit_atomic_sent = 0;
+	cp->cp_xmit_rdma_sent = 0;
+	cp->cp_xmit_data_sent = 0;
 
-	conn->c_map_queued = 0;
+	cp->cp_conn->c_map_queued = 0;
 
-	conn->c_unacked_packets = rds_sysctl_max_unacked_packets;
-	conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes;
+	cp->cp_unacked_packets = rds_sysctl_max_unacked_packets;
+	cp->cp_unacked_bytes = rds_sysctl_max_unacked_bytes;
 
 	/* Mark messages as retransmissions, and move them to the send q */
-	spin_lock_irqsave(&conn->c_lock, flags);
-	list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
+	spin_lock_irqsave(&cp->cp_lock, flags);
+	list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) {
 		set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
 		set_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags);
 	}
-	list_splice_init(&conn->c_retrans, &conn->c_send_queue);
-	spin_unlock_irqrestore(&conn->c_lock, flags);
+	list_splice_init(&cp->cp_retrans, &cp->cp_send_queue);
+	spin_unlock_irqrestore(&cp->cp_lock, flags);
 }
-EXPORT_SYMBOL_GPL(rds_send_reset);
+EXPORT_SYMBOL_GPL(rds_send_path_reset);
 
-static int acquire_in_xmit(struct rds_connection *conn)
+static int acquire_in_xmit(struct rds_conn_path *cp)
 {
-	return test_and_set_bit(RDS_IN_XMIT, &conn->c_flags) == 0;
+	return test_and_set_bit(RDS_IN_XMIT, &cp->cp_flags) == 0;
 }
 
-static void release_in_xmit(struct rds_connection *conn)
+static void release_in_xmit(struct rds_conn_path *cp)
 {
-	clear_bit(RDS_IN_XMIT, &conn->c_flags);
+	clear_bit(RDS_IN_XMIT, &cp->cp_flags);
 	smp_mb__after_atomic();
 	/*
 	 * We don't use wait_on_bit()/wake_up_bit() because our waking is in a
@@ -116,8 +116,8 @@ static void release_in_xmit(struct rds_connection *conn)
 	 * the system-wide hashed waitqueue buckets in the fast path only to
 	 * almost never find waiters.
 	 */
-	if (waitqueue_active(&conn->c_waitq))
-		wake_up_all(&conn->c_waitq);
+	if (waitqueue_active(&cp->cp_waitq))
+		wake_up_all(&cp->cp_waitq);
 }
 
 /*
@@ -134,8 +134,9 @@ static void release_in_xmit(struct rds_connection *conn)
  *      - small message latency is higher behind queued large messages
  *      - large message latency isn't starved by intervening small sends
  */
-int rds_send_xmit(struct rds_connection *conn)
+int rds_send_xmit(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_message *rm;
 	unsigned long flags;
 	unsigned int tmp;
@@ -155,7 +156,7 @@ restart:
 	 * avoids blocking the caller and trading per-connection data between
 	 * caches per message.
 	 */
-	if (!acquire_in_xmit(conn)) {
+	if (!acquire_in_xmit(cp)) {
 		rds_stats_inc(s_send_lock_contention);
 		ret = -ENOMEM;
 		goto out;
@@ -169,21 +170,21 @@ restart:
 	 * The acquire_in_xmit() check above ensures that only one
 	 * caller can increment c_send_gen at any time.
 	 */
-	conn->c_send_gen++;
-	send_gen = conn->c_send_gen;
+	cp->cp_send_gen++;
+	send_gen = cp->cp_send_gen;
 
 	/*
 	 * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
 	 * we do the opposite to avoid races.
 	 */
-	if (!rds_conn_up(conn)) {
-		release_in_xmit(conn);
+	if (!rds_conn_path_up(cp)) {
+		release_in_xmit(cp);
 		ret = 0;
 		goto out;
 	}
 
-	if (conn->c_trans->xmit_prepare)
-		conn->c_trans->xmit_prepare(conn);
+	if (conn->c_trans->xmit_path_prepare)
+		conn->c_trans->xmit_path_prepare(cp);
 
 	/*
 	 * spin trying to push headers and data down the connection until
@@ -191,7 +192,7 @@ restart:
 	 */
 	while (1) {
 
-		rm = conn->c_xmit_rm;
+		rm = cp->cp_xmit_rm;
 
 		/*
 		 * If between sending messages, we can send a pending congestion
@@ -204,14 +205,16 @@ restart:
 				break;
 			}
 			rm->data.op_active = 1;
+			rm->m_inc.i_conn_path = cp;
+			rm->m_inc.i_conn = cp->cp_conn;
 
-			conn->c_xmit_rm = rm;
+			cp->cp_xmit_rm = rm;
 		}
 
 		/*
 		 * If not already working on one, grab the next message.
 		 *
-		 * c_xmit_rm holds a ref while we're sending this message down
+		 * cp_xmit_rm holds a ref while we're sending this message down
 		 * the connction.  We can use this ref while holding the
 		 * send_sem.. rds_send_reset() is serialized with it.
 		 */
@@ -228,10 +231,10 @@ restart:
 			if (batch_count >= send_batch_count)
 				goto over_batch;
 
-			spin_lock_irqsave(&conn->c_lock, flags);
+			spin_lock_irqsave(&cp->cp_lock, flags);
 
-			if (!list_empty(&conn->c_send_queue)) {
-				rm = list_entry(conn->c_send_queue.next,
+			if (!list_empty(&cp->cp_send_queue)) {
+				rm = list_entry(cp->cp_send_queue.next,
 						struct rds_message,
 						m_conn_item);
 				rds_message_addref(rm);
@@ -240,10 +243,11 @@ restart:
 				 * Move the message from the send queue to the retransmit
 				 * list right away.
 				 */
-				list_move_tail(&rm->m_conn_item, &conn->c_retrans);
+				list_move_tail(&rm->m_conn_item,
+					       &cp->cp_retrans);
 			}
 
-			spin_unlock_irqrestore(&conn->c_lock, flags);
+			spin_unlock_irqrestore(&cp->cp_lock, flags);
 
 			if (!rm)
 				break;
@@ -257,32 +261,34 @@ restart:
 			 */
 			if (rm->rdma.op_active &&
 			    test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
-				spin_lock_irqsave(&conn->c_lock, flags);
+				spin_lock_irqsave(&cp->cp_lock, flags);
 				if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
 					list_move(&rm->m_conn_item, &to_be_dropped);
-				spin_unlock_irqrestore(&conn->c_lock, flags);
+				spin_unlock_irqrestore(&cp->cp_lock, flags);
 				continue;
 			}
 
 			/* Require an ACK every once in a while */
 			len = ntohl(rm->m_inc.i_hdr.h_len);
-			if (conn->c_unacked_packets == 0 ||
-			    conn->c_unacked_bytes < len) {
+			if (cp->cp_unacked_packets == 0 ||
+			    cp->cp_unacked_bytes < len) {
 				__set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
 
-				conn->c_unacked_packets = rds_sysctl_max_unacked_packets;
-				conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes;
+				cp->cp_unacked_packets =
+					rds_sysctl_max_unacked_packets;
+				cp->cp_unacked_bytes =
+					rds_sysctl_max_unacked_bytes;
 				rds_stats_inc(s_send_ack_required);
 			} else {
-				conn->c_unacked_bytes -= len;
-				conn->c_unacked_packets--;
+				cp->cp_unacked_bytes -= len;
+				cp->cp_unacked_packets--;
 			}
 
-			conn->c_xmit_rm = rm;
+			cp->cp_xmit_rm = rm;
 		}
 
 		/* The transport either sends the whole rdma or none of it */
-		if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
+		if (rm->rdma.op_active && !cp->cp_xmit_rdma_sent) {
 			rm->m_final_op = &rm->rdma;
 			/* The transport owns the mapped memory for now.
 			 * You can't unmap it while it's on the send queue
@@ -294,11 +300,11 @@ restart:
 				wake_up_interruptible(&rm->m_flush_wait);
 				break;
 			}
-			conn->c_xmit_rdma_sent = 1;
+			cp->cp_xmit_rdma_sent = 1;
 
 		}
 
-		if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
+		if (rm->atomic.op_active && !cp->cp_xmit_atomic_sent) {
 			rm->m_final_op = &rm->atomic;
 			/* The transport owns the mapped memory for now.
 			 * You can't unmap it while it's on the send queue
@@ -310,7 +316,7 @@ restart:
 				wake_up_interruptible(&rm->m_flush_wait);
 				break;
 			}
-			conn->c_xmit_atomic_sent = 1;
+			cp->cp_xmit_atomic_sent = 1;
 
 		}
 
@@ -336,41 +342,42 @@ restart:
 				rm->data.op_active = 0;
 		}
 
-		if (rm->data.op_active && !conn->c_xmit_data_sent) {
+		if (rm->data.op_active && !cp->cp_xmit_data_sent) {
 			rm->m_final_op = &rm->data;
+
 			ret = conn->c_trans->xmit(conn, rm,
-						  conn->c_xmit_hdr_off,
-						  conn->c_xmit_sg,
-						  conn->c_xmit_data_off);
+						  cp->cp_xmit_hdr_off,
+						  cp->cp_xmit_sg,
+						  cp->cp_xmit_data_off);
 			if (ret <= 0)
 				break;
 
-			if (conn->c_xmit_hdr_off < sizeof(struct rds_header)) {
+			if (cp->cp_xmit_hdr_off < sizeof(struct rds_header)) {
 				tmp = min_t(int, ret,
 					    sizeof(struct rds_header) -
-					    conn->c_xmit_hdr_off);
-				conn->c_xmit_hdr_off += tmp;
+					    cp->cp_xmit_hdr_off);
+				cp->cp_xmit_hdr_off += tmp;
 				ret -= tmp;
 			}
 
-			sg = &rm->data.op_sg[conn->c_xmit_sg];
+			sg = &rm->data.op_sg[cp->cp_xmit_sg];
 			while (ret) {
 				tmp = min_t(int, ret, sg->length -
-						      conn->c_xmit_data_off);
-				conn->c_xmit_data_off += tmp;
+						      cp->cp_xmit_data_off);
+				cp->cp_xmit_data_off += tmp;
 				ret -= tmp;
-				if (conn->c_xmit_data_off == sg->length) {
-					conn->c_xmit_data_off = 0;
+				if (cp->cp_xmit_data_off == sg->length) {
+					cp->cp_xmit_data_off = 0;
 					sg++;
-					conn->c_xmit_sg++;
-					BUG_ON(ret != 0 &&
-					       conn->c_xmit_sg == rm->data.op_nents);
+					cp->cp_xmit_sg++;
+					BUG_ON(ret != 0 && cp->cp_xmit_sg ==
+					       rm->data.op_nents);
 				}
 			}
 
-			if (conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
-			    (conn->c_xmit_sg == rm->data.op_nents))
-				conn->c_xmit_data_sent = 1;
+			if (cp->cp_xmit_hdr_off == sizeof(struct rds_header) &&
+			    (cp->cp_xmit_sg == rm->data.op_nents))
+				cp->cp_xmit_data_sent = 1;
 		}
 
 		/*
@@ -378,23 +385,23 @@ restart:
 		 * if there is a data op. Thus, if the data is sent (or there was
 		 * none), then we're done with the rm.
 		 */
-		if (!rm->data.op_active || conn->c_xmit_data_sent) {
-			conn->c_xmit_rm = NULL;
-			conn->c_xmit_sg = 0;
-			conn->c_xmit_hdr_off = 0;
-			conn->c_xmit_data_off = 0;
-			conn->c_xmit_rdma_sent = 0;
-			conn->c_xmit_atomic_sent = 0;
-			conn->c_xmit_data_sent = 0;
+		if (!rm->data.op_active || cp->cp_xmit_data_sent) {
+			cp->cp_xmit_rm = NULL;
+			cp->cp_xmit_sg = 0;
+			cp->cp_xmit_hdr_off = 0;
+			cp->cp_xmit_data_off = 0;
+			cp->cp_xmit_rdma_sent = 0;
+			cp->cp_xmit_atomic_sent = 0;
+			cp->cp_xmit_data_sent = 0;
 
 			rds_message_put(rm);
 		}
 	}
 
 over_batch:
-	if (conn->c_trans->xmit_complete)
-		conn->c_trans->xmit_complete(conn);
-	release_in_xmit(conn);
+	if (conn->c_trans->xmit_path_complete)
+		conn->c_trans->xmit_path_complete(cp);
+	release_in_xmit(cp);
 
 	/* Nuke any messages we decided not to retransmit. */
 	if (!list_empty(&to_be_dropped)) {
@@ -422,12 +429,12 @@ over_batch:
 	if (ret == 0) {
 		smp_mb();
 		if ((test_bit(0, &conn->c_map_queued) ||
-		     !list_empty(&conn->c_send_queue)) &&
-		    send_gen == conn->c_send_gen) {
+		     !list_empty(&cp->cp_send_queue)) &&
+		    send_gen == cp->cp_send_gen) {
 			rds_stats_inc(s_send_lock_queue_raced);
 			if (batch_count < send_batch_count)
 				goto restart;
-			queue_delayed_work(rds_wq, &conn->c_send_w, 1);
+			queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
 		}
 	}
 out:
@@ -559,42 +566,6 @@ __rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
 	/* No need to wake the app - caller does this */
 }
 
-/*
- * This is called from the IB send completion when we detect
- * a RDMA operation that failed with remote access error.
- * So speed is not an issue here.
- */
-struct rds_message *rds_send_get_message(struct rds_connection *conn,
-					 struct rm_rdma_op *op)
-{
-	struct rds_message *rm, *tmp, *found = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&conn->c_lock, flags);
-
-	list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
-		if (&rm->rdma == op) {
-			atomic_inc(&rm->m_refcount);
-			found = rm;
-			goto out;
-		}
-	}
-
-	list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
-		if (&rm->rdma == op) {
-			atomic_inc(&rm->m_refcount);
-			found = rm;
-			break;
-		}
-	}
-
-out:
-	spin_unlock_irqrestore(&conn->c_lock, flags);
-
-	return found;
-}
-EXPORT_SYMBOL_GPL(rds_send_get_message);
-
 /*
  * This removes messages from the socket's list if they're on it.  The list
  * argument must be private to the caller, we must be able to modify it
@@ -685,16 +656,16 @@ unlock_and_drop:
  * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked
  * checks the RDS_MSG_HAS_ACK_SEQ bit.
  */
-void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
-			 is_acked_func is_acked)
+void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack,
+			      is_acked_func is_acked)
 {
 	struct rds_message *rm, *tmp;
 	unsigned long flags;
 	LIST_HEAD(list);
 
-	spin_lock_irqsave(&conn->c_lock, flags);
+	spin_lock_irqsave(&cp->cp_lock, flags);
 
-	list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
+	list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) {
 		if (!rds_send_is_acked(rm, ack, is_acked))
 			break;
 
@@ -706,17 +677,26 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
 	if (!list_empty(&list))
 		smp_mb__after_atomic();
 
-	spin_unlock_irqrestore(&conn->c_lock, flags);
+	spin_unlock_irqrestore(&cp->cp_lock, flags);
 
 	/* now remove the messages from the sock list as needed */
 	rds_send_remove_from_sock(&list, RDS_RDMA_SUCCESS);
 }
+EXPORT_SYMBOL_GPL(rds_send_path_drop_acked);
+
+void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
+			 is_acked_func is_acked)
+{
+	WARN_ON(conn->c_trans->t_mp_capable);
+	rds_send_path_drop_acked(&conn->c_path[0], ack, is_acked);
+}
 EXPORT_SYMBOL_GPL(rds_send_drop_acked);
 
 void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 {
 	struct rds_message *rm, *tmp;
 	struct rds_connection *conn;
+	struct rds_conn_path *cp;
 	unsigned long flags;
 	LIST_HEAD(list);
 
@@ -745,22 +725,26 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 	list_for_each_entry(rm, &list, m_sock_item) {
 
 		conn = rm->m_inc.i_conn;
+		if (conn->c_trans->t_mp_capable)
+			cp = rm->m_inc.i_conn_path;
+		else
+			cp = &conn->c_path[0];
 
-		spin_lock_irqsave(&conn->c_lock, flags);
+		spin_lock_irqsave(&cp->cp_lock, flags);
 		/*
 		 * Maybe someone else beat us to removing rm from the conn.
 		 * If we race with their flag update we'll get the lock and
 		 * then really see that the flag has been cleared.
 		 */
 		if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
-			spin_unlock_irqrestore(&conn->c_lock, flags);
+			spin_unlock_irqrestore(&cp->cp_lock, flags);
 			spin_lock_irqsave(&rm->m_rs_lock, flags);
 			rm->m_rs = NULL;
 			spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 			continue;
 		}
 		list_del_init(&rm->m_conn_item);
-		spin_unlock_irqrestore(&conn->c_lock, flags);
+		spin_unlock_irqrestore(&cp->cp_lock, flags);
 
 		/*
 		 * Couldn't grab m_rs_lock in top loop (lock ordering),
@@ -809,6 +793,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
  * message from the flow with RDS_CANCEL_SENT_TO.
  */
 static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
+			     struct rds_conn_path *cp,
 			     struct rds_message *rm, __be16 sport,
 			     __be16 dport, int *queued)
 {
@@ -852,13 +837,14 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
 		   trying to minimize the time we hold c_lock */
 		rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, 0);
 		rm->m_inc.i_conn = conn;
+		rm->m_inc.i_conn_path = cp;
 		rds_message_addref(rm);
 
-		spin_lock(&conn->c_lock);
-		rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++);
-		list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
+		spin_lock(&cp->cp_lock);
+		rm->m_inc.i_hdr.h_sequence = cpu_to_be64(cp->cp_next_tx_seq++);
+		list_add_tail(&rm->m_conn_item, &cp->cp_send_queue);
 		set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
-		spin_unlock(&conn->c_lock);
+		spin_unlock(&cp->cp_lock);
 
 		rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n",
 			 rm, len, rs, rs->rs_snd_bytes,
@@ -977,6 +963,29 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 	return ret;
 }
 
+static void rds_send_ping(struct rds_connection *conn);
+
+static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
+{
+	int hash;
+
+	if (conn->c_npaths == 0)
+		hash = RDS_MPATH_HASH(rs, RDS_MPATH_WORKERS);
+	else
+		hash = RDS_MPATH_HASH(rs, conn->c_npaths);
+	if (conn->c_npaths == 0 && hash != 0) {
+		rds_send_ping(conn);
+
+		if (conn->c_npaths == 0) {
+			wait_event_interruptible(conn->c_hs_waitq,
+						 (conn->c_npaths != 0));
+		}
+		if (conn->c_npaths == 1)
+			hash = 0;
+	}
+	return hash;
+}
+
 int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 {
 	struct sock *sk = sock->sk;
@@ -990,6 +999,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 	int queued = 0, allocated_mr = 0;
 	int nonblock = msg->msg_flags & MSG_DONTWAIT;
 	long timeo = sock_sndtimeo(sk, nonblock);
+	struct rds_conn_path *cpath;
 
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
@@ -1088,15 +1098,19 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		goto out;
 	}
 
-	rds_conn_connect_if_down(conn);
+	if (conn->c_trans->t_mp_capable)
+		cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)];
+	else
+		cpath = &conn->c_path[0];
+
+	rds_conn_path_connect_if_down(cpath);
 
 	ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
 	if (ret) {
 		rs->rs_seen_congestion = 1;
 		goto out;
 	}
-
-	while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
+	while (!rds_send_queue_rm(rs, conn, cpath, rm, rs->rs_bound_port,
 				  dport, &queued)) {
 		rds_stats_inc(s_send_queue_full);
 
@@ -1106,7 +1120,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		}
 
 		timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
-					rds_send_queue_rm(rs, conn, rm,
+					rds_send_queue_rm(rs, conn, cpath, rm,
 							  rs->rs_bound_port,
 							  dport,
 							  &queued),
@@ -1127,9 +1141,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 	 */
 	rds_stats_inc(s_send_queued);
 
-	ret = rds_send_xmit(conn);
+	ret = rds_send_xmit(cpath);
 	if (ret == -ENOMEM || ret == -EAGAIN)
-		queue_delayed_work(rds_wq, &conn->c_send_w, 1);
+		queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
 
 	rds_message_put(rm);
 	return payload_len;
@@ -1147,10 +1161,16 @@ out:
 }
 
 /*
- * Reply to a ping packet.
+ * send out a probe. Can be shared by rds_send_ping,
+ * rds_send_pong, rds_send_hb.
+ * rds_send_hb should use h_flags
+ *   RDS_FLAG_HB_PING|RDS_FLAG_ACK_REQUIRED
+ * or
+ *   RDS_FLAG_HB_PONG|RDS_FLAG_ACK_REQUIRED
  */
 int
-rds_send_pong(struct rds_connection *conn, __be16 dport)
+rds_send_probe(struct rds_conn_path *cp, __be16 sport,
+	       __be16 dport, u8 h_flags)
 {
 	struct rds_message *rm;
 	unsigned long flags;
@@ -1162,31 +1182,41 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
 		goto out;
 	}
 
-	rm->m_daddr = conn->c_faddr;
+	rm->m_daddr = cp->cp_conn->c_faddr;
 	rm->data.op_active = 1;
 
-	rds_conn_connect_if_down(conn);
+	rds_conn_path_connect_if_down(cp);
 
-	ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
+	ret = rds_cong_wait(cp->cp_conn->c_fcong, dport, 1, NULL);
 	if (ret)
 		goto out;
 
-	spin_lock_irqsave(&conn->c_lock, flags);
-	list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
+	spin_lock_irqsave(&cp->cp_lock, flags);
+	list_add_tail(&rm->m_conn_item, &cp->cp_send_queue);
 	set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
 	rds_message_addref(rm);
-	rm->m_inc.i_conn = conn;
+	rm->m_inc.i_conn = cp->cp_conn;
+	rm->m_inc.i_conn_path = cp;
+
+	rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport,
+				    cp->cp_next_tx_seq);
+	rm->m_inc.i_hdr.h_flags |= h_flags;
+	cp->cp_next_tx_seq++;
+
+	if (RDS_HS_PROBE(sport, dport) && cp->cp_conn->c_trans->t_mp_capable) {
+		u16 npaths = RDS_MPATH_WORKERS;
 
-	rds_message_populate_header(&rm->m_inc.i_hdr, 0, dport,
-				    conn->c_next_tx_seq);
-	conn->c_next_tx_seq++;
-	spin_unlock_irqrestore(&conn->c_lock, flags);
+		rds_message_add_extension(&rm->m_inc.i_hdr,
+					  RDS_EXTHDR_NPATHS, &npaths,
+					  sizeof(npaths));
+	}
+	spin_unlock_irqrestore(&cp->cp_lock, flags);
 
 	rds_stats_inc(s_send_queued);
 	rds_stats_inc(s_send_pong);
 
 	/* schedule the send work on rds_wq */
-	queue_delayed_work(rds_wq, &conn->c_send_w, 1);
+	queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
 
 	rds_message_put(rm);
 	return 0;
@@ -1196,3 +1226,25 @@ out:
 		rds_message_put(rm);
 	return ret;
 }
+
+int
+rds_send_pong(struct rds_conn_path *cp, __be16 dport)
+{
+	return rds_send_probe(cp, 0, dport, 0);
+}
+
+void
+rds_send_ping(struct rds_connection *conn)
+{
+	unsigned long flags;
+	struct rds_conn_path *cp = &conn->c_path[0];
+
+	spin_lock_irqsave(&cp->cp_lock, flags);
+	if (conn->c_ping_triggered) {
+		spin_unlock_irqrestore(&cp->cp_lock, flags);
+		return;
+	}
+	conn->c_ping_triggered = 1;
+	spin_unlock_irqrestore(&cp->cp_lock, flags);
+	rds_send_probe(&conn->c_path[0], RDS_FLAG_PROBE_PORT, 0, 0);
+}
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index c173f69e1479..e381bbcd9cc1 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -102,7 +102,8 @@ int rds_sysctl_init(void)
 	rds_sysctl_reconnect_min = msecs_to_jiffies(1);
 	rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
 
-	rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table);
+	rds_sysctl_reg_table =
+		register_net_sysctl(&init_net, "net/rds", rds_sysctl_rds_table);
 	if (!rds_sysctl_reg_table)
 		return -ENOMEM;
 	return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 74ee126a6fe6..fcddacc92e01 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -56,8 +56,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *fpos);
 
-int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
-int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
+static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
+static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
 
 static struct ctl_table rds_tcp_sysctl_table[] = {
 #define	RDS_TCP_SNDBUF	0
@@ -135,9 +135,9 @@ void rds_tcp_restore_callbacks(struct socket *sock,
  * from being called while it isn't set.
  */
 void rds_tcp_reset_callbacks(struct socket *sock,
-			     struct rds_connection *conn)
+			     struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct socket *osock = tc->t_sock;
 
 	if (!osock)
@@ -147,8 +147,8 @@ void rds_tcp_reset_callbacks(struct socket *sock,
 	 * We have an outstanding SYN to this peer, which may
 	 * potentially have transitioned to the RDS_CONN_UP state,
 	 * so we must quiesce any send threads before resetting
-	 * c_transport_data. We quiesce these threads by setting
-	 * c_state to something other than RDS_CONN_UP, and then
+	 * cp_transport_data. We quiesce these threads by setting
+	 * cp_state to something other than RDS_CONN_UP, and then
 	 * waiting for any existing threads in rds_send_xmit to
 	 * complete release_in_xmit(). (Subsequent threads entering
 	 * rds_send_xmit() will bail on !rds_conn_up().
@@ -163,38 +163,25 @@ void rds_tcp_reset_callbacks(struct socket *sock,
 	 * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
 	 * cannot mark rds_conn_path_up() in the window before lock_sock()
 	 */
-	atomic_set(&conn->c_state, RDS_CONN_RESETTING);
-	wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
+	atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
+	wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
 	lock_sock(osock->sk);
 	/* reset receive side state for rds_tcp_data_recv() for osock  */
+	cancel_delayed_work_sync(&cp->cp_send_w);
+	cancel_delayed_work_sync(&cp->cp_recv_w);
 	if (tc->t_tinc) {
 		rds_inc_put(&tc->t_tinc->ti_inc);
 		tc->t_tinc = NULL;
 	}
 	tc->t_tinc_hdr_rem = sizeof(struct rds_header);
 	tc->t_tinc_data_rem = 0;
-	tc->t_sock = NULL;
-
-	write_lock_bh(&osock->sk->sk_callback_lock);
-
-	osock->sk->sk_user_data = NULL;
-	osock->sk->sk_data_ready = tc->t_orig_data_ready;
-	osock->sk->sk_write_space = tc->t_orig_write_space;
-	osock->sk->sk_state_change = tc->t_orig_state_change;
-	write_unlock_bh(&osock->sk->sk_callback_lock);
+	rds_tcp_restore_callbacks(osock, tc);
 	release_sock(osock->sk);
 	sock_release(osock);
 newsock:
-	rds_send_reset(conn);
+	rds_send_path_reset(cp);
 	lock_sock(sock->sk);
-	write_lock_bh(&sock->sk->sk_callback_lock);
-	tc->t_sock = sock;
-	sock->sk->sk_user_data = conn;
-	sock->sk->sk_data_ready = rds_tcp_data_ready;
-	sock->sk->sk_write_space = rds_tcp_write_space;
-	sock->sk->sk_state_change = rds_tcp_state_change;
-
-	write_unlock_bh(&sock->sk->sk_callback_lock);
+	rds_tcp_set_callbacks(sock, cp);
 	release_sock(sock->sk);
 }
 
@@ -202,9 +189,9 @@ newsock:
  * above rds_tcp_reset_callbacks for notes about synchronization
  * with data path
  */
-void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
 	rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
 	write_lock_bh(&sock->sk->sk_callback_lock);
@@ -220,12 +207,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
 		sock->sk->sk_data_ready = sock->sk->sk_user_data;
 
 	tc->t_sock = sock;
-	tc->conn = conn;
+	tc->t_cpath = cp;
 	tc->t_orig_data_ready = sock->sk->sk_data_ready;
 	tc->t_orig_write_space = sock->sk->sk_write_space;
 	tc->t_orig_state_change = sock->sk->sk_state_change;
 
-	sock->sk->sk_user_data = conn;
+	sock->sk->sk_user_data = cp;
 	sock->sk->sk_data_ready = rds_tcp_data_ready;
 	sock->sk->sk_write_space = rds_tcp_write_space;
 	sock->sk->sk_state_change = rds_tcp_state_change;
@@ -283,24 +270,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 {
 	struct rds_tcp_connection *tc;
+	int i;
 
-	tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
-	if (!tc)
-		return -ENOMEM;
+	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+		tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
+		if (!tc)
+			return -ENOMEM;
 
-	mutex_init(&tc->t_conn_lock);
-	tc->t_sock = NULL;
-	tc->t_tinc = NULL;
-	tc->t_tinc_hdr_rem = sizeof(struct rds_header);
-	tc->t_tinc_data_rem = 0;
+		mutex_init(&tc->t_conn_path_lock);
+		tc->t_sock = NULL;
+		tc->t_tinc = NULL;
+		tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+		tc->t_tinc_data_rem = 0;
 
-	conn->c_transport_data = tc;
+		conn->c_path[i].cp_transport_data = tc;
+		tc->t_cpath = &conn->c_path[i];
 
-	spin_lock_irq(&rds_tcp_conn_lock);
-	list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
-	spin_unlock_irq(&rds_tcp_conn_lock);
+		spin_lock_irq(&rds_tcp_conn_lock);
+		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
+		spin_unlock_irq(&rds_tcp_conn_lock);
+		rdsdebug("rds_conn_path [%d] tc %p\n", i,
+			 conn->c_path[i].cp_transport_data);
+	}
 
-	rdsdebug("alloced tc %p\n", conn->c_transport_data);
 	return 0;
 }
 
@@ -317,6 +309,17 @@ static void rds_tcp_conn_free(void *arg)
 	kmem_cache_free(rds_tcp_conn_slab, tc);
 }
 
+static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
+{
+	struct rds_tcp_connection *tc, *_tc;
+
+	list_for_each_entry_safe(tc, _tc, list, t_tcp_node) {
+		if (tc->t_cpath->cp_conn == conn)
+			return true;
+	}
+	return false;
+}
+
 static void rds_tcp_destroy_conns(void)
 {
 	struct rds_tcp_connection *tc, *_tc;
@@ -324,29 +327,28 @@ static void rds_tcp_destroy_conns(void)
 
 	/* avoid calling conn_destroy with irqs off */
 	spin_lock_irq(&rds_tcp_conn_lock);
-	list_splice(&rds_tcp_conn_list, &tmp_list);
-	INIT_LIST_HEAD(&rds_tcp_conn_list);
+	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+			list_move_tail(&tc->t_tcp_node, &tmp_list);
+	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 
-	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
-		if (tc->conn->c_passive)
-			rds_conn_destroy(tc->conn->c_passive);
-		rds_conn_destroy(tc->conn);
-	}
+	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
+		rds_conn_destroy(tc->t_cpath->cp_conn);
 }
 
 static void rds_tcp_exit(void);
 
 struct rds_transport rds_tcp_transport = {
 	.laddr_check		= rds_tcp_laddr_check,
-	.xmit_prepare		= rds_tcp_xmit_prepare,
-	.xmit_complete		= rds_tcp_xmit_complete,
+	.xmit_path_prepare	= rds_tcp_xmit_path_prepare,
+	.xmit_path_complete	= rds_tcp_xmit_path_complete,
 	.xmit			= rds_tcp_xmit,
-	.recv			= rds_tcp_recv,
+	.recv_path		= rds_tcp_recv_path,
 	.conn_alloc		= rds_tcp_conn_alloc,
 	.conn_free		= rds_tcp_conn_free,
-	.conn_connect		= rds_tcp_conn_connect,
-	.conn_shutdown		= rds_tcp_conn_shutdown,
+	.conn_path_connect	= rds_tcp_conn_path_connect,
+	.conn_path_shutdown	= rds_tcp_conn_path_shutdown,
 	.inc_copy_to_user	= rds_tcp_inc_copy_to_user,
 	.inc_free		= rds_tcp_inc_free,
 	.stats_info_copy	= rds_tcp_stats_info_copy,
@@ -355,6 +357,7 @@ struct rds_transport rds_tcp_transport = {
 	.t_name			= "tcp",
 	.t_type			= RDS_TRANS_TCP,
 	.t_prefer_loopback	= 1,
+	.t_mp_capable		= 1,
 };
 
 static int rds_tcp_netid;
@@ -488,10 +491,30 @@ static struct pernet_operations rds_tcp_net_ops = {
 	.size = sizeof(struct rds_tcp_net),
 };
 
+/* explicitly send a RST on each socket, thereby releasing any socket refcnts
+ * that may otherwise hold up netns deletion.
+ */
+static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
+{
+	struct rds_conn_path *cp;
+	struct rds_tcp_connection *tc;
+	int i;
+	struct sock *sk;
+
+	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+		cp = &conn->c_path[i];
+		tc = cp->cp_transport_data;
+		if (!tc->t_sock)
+			continue;
+		sk = tc->t_sock->sk;
+		sk->sk_prot->disconnect(sk, 0);
+		tcp_done(sk);
+	}
+}
+
 static void rds_tcp_kill_sock(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
-	struct sock *sk;
 	LIST_HEAD(tmp_list);
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 
@@ -500,23 +523,27 @@ static void rds_tcp_kill_sock(struct net *net)
 	flush_work(&rtn->rds_tcp_accept_w);
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->conn->c_net);
+		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
 		if (net != c_net || !tc->t_sock)
 			continue;
-		list_move_tail(&tc->t_tcp_node, &tmp_list);
+		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+			list_move_tail(&tc->t_tcp_node, &tmp_list);
 	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
-		sk = tc->t_sock->sk;
-		sk->sk_prot->disconnect(sk, 0);
-		tcp_done(sk);
-		if (tc->conn->c_passive)
-			rds_conn_destroy(tc->conn->c_passive);
-		rds_conn_destroy(tc->conn);
+		rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
+		rds_conn_destroy(tc->t_cpath->cp_conn);
 	}
 }
 
+void *rds_tcp_listen_sock_def_readable(struct net *net)
+{
+	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+	return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+}
+
 static int rds_tcp_dev_event(struct notifier_block *this,
 			     unsigned long event, void *ptr)
 {
@@ -551,12 +578,13 @@ static void rds_tcp_sysctl_reset(struct net *net)
 
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->conn->c_net);
+		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
 		if (net != c_net || !tc->t_sock)
 			continue;
 
-		rds_conn_drop(tc->conn); /* reconnect with new parameters */
+		/* reconnect with new parameters */
+		rds_conn_path_drop(tc->t_cpath);
 	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 }
@@ -616,7 +644,7 @@ static int rds_tcp_init(void)
 
 	ret = rds_tcp_recv_init();
 	if (ret)
-		goto out_slab;
+		goto out_pernet;
 
 	ret = rds_trans_register(&rds_tcp_transport);
 	if (ret)
@@ -628,8 +656,9 @@ static int rds_tcp_init(void)
 
 out_recv:
 	rds_tcp_recv_exit();
-out_slab:
+out_pernet:
 	unregister_pernet_subsys(&rds_tcp_net_ops);
+out_slab:
 	kmem_cache_destroy(rds_tcp_conn_slab);
 out:
 	return ret;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index ec0602b0dc24..9a1cc8906576 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -11,11 +11,11 @@ struct rds_tcp_incoming {
 struct rds_tcp_connection {
 
 	struct list_head	t_tcp_node;
-	struct rds_connection   *conn;
-	/* t_conn_lock synchronizes the connection establishment between
-	 * rds_tcp_accept_one and rds_tcp_conn_connect
+	struct rds_conn_path	*t_cpath;
+	/* t_conn_path_lock synchronizes the connection establishment between
+	 * rds_tcp_accept_one and rds_tcp_conn_path_connect
 	 */
-	struct mutex		t_conn_lock;
+	struct mutex		t_conn_path_lock;
 	struct socket		*t_sock;
 	void			*t_orig_write_space;
 	void			*t_orig_data_ready;
@@ -49,8 +49,8 @@ struct rds_tcp_statistics {
 /* tcp.c */
 void rds_tcp_tune(struct socket *sock);
 void rds_tcp_nonagle(struct socket *sock);
-void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
-void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn);
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
+void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_restore_callbacks(struct socket *sock,
 			       struct rds_tcp_connection *tc);
 u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
@@ -60,8 +60,8 @@ extern struct rds_transport rds_tcp_transport;
 void rds_tcp_accept_work(struct sock *sk);
 
 /* tcp_connect.c */
-int rds_tcp_conn_connect(struct rds_connection *conn);
-void rds_tcp_conn_shutdown(struct rds_connection *conn);
+int rds_tcp_conn_path_connect(struct rds_conn_path *cp);
+void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
 void rds_tcp_state_change(struct sock *sk);
 
 /* tcp_listen.c */
@@ -70,20 +70,21 @@ void rds_tcp_listen_stop(struct socket *);
 void rds_tcp_listen_data_ready(struct sock *sk);
 int rds_tcp_accept_one(struct socket *sock);
 int rds_tcp_keepalive(struct socket *sock);
+void *rds_tcp_listen_sock_def_readable(struct net *net);
 
 /* tcp_recv.c */
 int rds_tcp_recv_init(void);
 void rds_tcp_recv_exit(void);
 void rds_tcp_data_ready(struct sock *sk);
-int rds_tcp_recv(struct rds_connection *conn);
+int rds_tcp_recv_path(struct rds_conn_path *cp);
 void rds_tcp_inc_free(struct rds_incoming *inc);
 int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
 
 /* tcp_send.c */
-void rds_tcp_xmit_prepare(struct rds_connection *conn);
-void rds_tcp_xmit_complete(struct rds_connection *conn);
+void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp);
+void rds_tcp_xmit_path_complete(struct rds_conn_path *cp);
 int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
-	         unsigned int hdr_off, unsigned int sg, unsigned int off);
+		 unsigned int hdr_off, unsigned int sg, unsigned int off);
 void rds_tcp_write_space(struct sock *sk);
 
 /* tcp_stats.c */
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index fba13d0305fb..05f61c533ed3 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -40,50 +40,57 @@
 void rds_tcp_state_change(struct sock *sk)
 {
 	void (*state_change)(struct sock *sk);
-	struct rds_connection *conn;
+	struct rds_conn_path *cp;
 	struct rds_tcp_connection *tc;
 
 	read_lock_bh(&sk->sk_callback_lock);
-	conn = sk->sk_user_data;
-	if (!conn) {
+	cp = sk->sk_user_data;
+	if (!cp) {
 		state_change = sk->sk_state_change;
 		goto out;
 	}
-	tc = conn->c_transport_data;
+	tc = cp->cp_transport_data;
 	state_change = tc->t_orig_state_change;
 
 	rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state);
 
-	switch(sk->sk_state) {
-		/* ignore connecting sockets as they make progress */
-		case TCP_SYN_SENT:
-		case TCP_SYN_RECV:
-			break;
-		case TCP_ESTABLISHED:
-			rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
-			break;
-		case TCP_CLOSE_WAIT:
-		case TCP_CLOSE:
-			rds_conn_drop(conn);
-		default:
-			break;
+	switch (sk->sk_state) {
+	/* ignore connecting sockets as they make progress */
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:
+		break;
+	case TCP_ESTABLISHED:
+		rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
+		break;
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSE:
+		rds_conn_path_drop(cp);
+	default:
+		break;
 	}
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 	state_change(sk);
 }
 
-int rds_tcp_conn_connect(struct rds_connection *conn)
+int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 {
 	struct socket *sock = NULL;
 	struct sockaddr_in src, dest;
 	int ret;
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_connection *conn = cp->cp_conn;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
-	mutex_lock(&tc->t_conn_lock);
+	/* for multipath rds,we only trigger the connection after
+	 * the handshake probe has determined the number of paths.
+	 */
+	if (cp->cp_index > 0 && cp->cp_conn->c_npaths < 2)
+		return -EAGAIN;
+
+	mutex_lock(&tc->t_conn_path_lock);
 
-	if (rds_conn_up(conn)) {
-		mutex_unlock(&tc->t_conn_lock);
+	if (rds_conn_path_up(cp)) {
+		mutex_unlock(&tc->t_conn_path_lock);
 		return 0;
 	}
 	ret = sock_create_kern(rds_conn_net(conn), PF_INET,
@@ -112,10 +119,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
 	 * once we call connect() we can start getting callbacks and they
 	 * own the socket
 	 */
-	rds_tcp_set_callbacks(sock, conn);
+	rds_tcp_set_callbacks(sock, cp);
 	ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
 				 O_NONBLOCK);
 
+	cp->cp_outgoing = 1;
 	rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
 	if (ret == -EINPROGRESS)
 		ret = 0;
@@ -123,11 +131,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
 		rds_tcp_keepalive(sock);
 		sock = NULL;
 	} else {
-		rds_tcp_restore_callbacks(sock, conn->c_transport_data);
+		rds_tcp_restore_callbacks(sock, cp->cp_transport_data);
 	}
 
 out:
-	mutex_unlock(&tc->t_conn_lock);
+	mutex_unlock(&tc->t_conn_path_lock);
 	if (sock)
 		sock_release(sock);
 	return ret;
@@ -142,12 +150,13 @@ out:
  * callbacks to those set by TCP.  Our callbacks won't execute again once we
  * hold the sock lock.
  */
-void rds_tcp_conn_shutdown(struct rds_connection *conn)
+void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct socket *sock = tc->t_sock;
 
-	rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock);
+	rdsdebug("shutting down conn %p tc %p sock %p\n",
+		 cp->cp_conn, tc, sock);
 
 	if (sock) {
 		sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 686b1d03a558..e0b23fb5b8d5 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -70,6 +70,52 @@ bail:
 	return ret;
 }
 
+/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
+ * client's ipaddr < server's ipaddr. Otherwise, close the accepted
+ * socket and force a reconneect from smaller -> larger ip addr. The reason
+ * we special case cp_index 0 is to allow the rds probe ping itself to itself
+ * get through efficiently.
+ * Since reconnects are only initiated from the node with the numerically
+ * smaller ip address, we recycle conns in RDS_CONN_ERROR on the passive side
+ * by moving them to CONNECTING in this function.
+ */
+struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
+{
+	int i;
+	bool peer_is_smaller = (conn->c_faddr < conn->c_laddr);
+	int npaths = conn->c_npaths;
+
+	if (npaths <= 1) {
+		struct rds_conn_path *cp = &conn->c_path[0];
+		int ret;
+
+		ret = rds_conn_path_transition(cp, RDS_CONN_DOWN,
+					       RDS_CONN_CONNECTING);
+		if (!ret)
+			rds_conn_path_transition(cp, RDS_CONN_ERROR,
+						 RDS_CONN_CONNECTING);
+		return cp->cp_transport_data;
+	}
+
+	/* for mprds, paths with cp_index > 0 MUST be initiated by the peer
+	 * with the smaller address.
+	 */
+	if (!peer_is_smaller)
+		return NULL;
+
+	for (i = 1; i < npaths; i++) {
+		struct rds_conn_path *cp = &conn->c_path[i];
+
+		if (rds_conn_path_transition(cp, RDS_CONN_DOWN,
+					     RDS_CONN_CONNECTING) ||
+		    rds_conn_path_transition(cp, RDS_CONN_ERROR,
+					     RDS_CONN_CONNECTING)) {
+			return cp->cp_transport_data;
+		}
+	}
+	return NULL;
+}
+
 int rds_tcp_accept_one(struct socket *sock)
 {
 	struct socket *new_sock = NULL;
@@ -78,6 +124,7 @@ int rds_tcp_accept_one(struct socket *sock)
 	struct inet_sock *inet;
 	struct rds_tcp_connection *rs_tcp = NULL;
 	int conn_state;
+	struct rds_conn_path *cp;
 
 	if (!sock) /* module unload or netns delete in progress */
 		return -ENETUNREACH;
@@ -118,11 +165,14 @@ int rds_tcp_accept_one(struct socket *sock)
 	 * If the client reboots, this conn will need to be cleaned up.
 	 * rds_tcp_state_change() will do that cleanup
 	 */
-	rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
-	rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
-	mutex_lock(&rs_tcp->t_conn_lock);
-	conn_state = rds_conn_state(conn);
-	if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
+	rs_tcp = rds_tcp_accept_one_path(conn);
+	if (!rs_tcp)
+		goto rst_nsk;
+	mutex_lock(&rs_tcp->t_conn_path_lock);
+	cp = rs_tcp->t_cpath;
+	conn_state = rds_conn_path_state(cp);
+	if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP &&
+	    conn_state != RDS_CONN_ERROR)
 		goto rst_nsk;
 	if (rs_tcp->t_sock) {
 		/* Need to resolve a duelling SYN between peers.
@@ -132,17 +182,17 @@ int rds_tcp_accept_one(struct socket *sock)
 		 * c_transport_data.
 		 */
 		if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) ||
-		    !conn->c_outgoing) {
+		    !cp->cp_outgoing) {
 			goto rst_nsk;
 		} else {
-			rds_tcp_reset_callbacks(new_sock, conn);
-			conn->c_outgoing = 0;
+			rds_tcp_reset_callbacks(new_sock, cp);
+			cp->cp_outgoing = 0;
 			/* rds_connect_path_complete() marks RDS_CONN_UP */
-			rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING);
+			rds_connect_path_complete(cp, RDS_CONN_RESETTING);
 		}
 	} else {
-		rds_tcp_set_callbacks(new_sock, conn);
-		rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+		rds_tcp_set_callbacks(new_sock, cp);
+		rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
 	}
 	new_sock = NULL;
 	ret = 0;
@@ -153,7 +203,7 @@ rst_nsk:
 	ret = 0;
 out:
 	if (rs_tcp)
-		mutex_unlock(&rs_tcp->t_conn_lock);
+		mutex_unlock(&rs_tcp->t_conn_path_lock);
 	if (new_sock)
 		sock_release(new_sock);
 	return ret;
@@ -180,6 +230,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
 	 */
 	if (sk->sk_state == TCP_LISTEN)
 		rds_tcp_accept_work(sk);
+	else
+		ready = rds_tcp_listen_sock_def_readable(sock_net(sk));
 
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index c3196f9d070a..ad4892e97f91 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -147,7 +147,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn,
 }
 
 struct rds_tcp_desc_arg {
-	struct rds_connection *conn;
+	struct rds_conn_path *conn_path;
 	gfp_t gfp;
 };
 
@@ -155,8 +155,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 			     unsigned int offset, size_t len)
 {
 	struct rds_tcp_desc_arg *arg = desc->arg.data;
-	struct rds_connection *conn = arg->conn;
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_conn_path *cp = arg->conn_path;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct rds_tcp_incoming *tinc = tc->t_tinc;
 	struct sk_buff *clone;
 	size_t left = len, to_copy;
@@ -171,14 +171,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 	while (left) {
 		if (!tinc) {
 			tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
-					        arg->gfp);
+						arg->gfp);
 			if (!tinc) {
 				desc->error = -ENOMEM;
 				goto out;
 			}
 			tc->t_tinc = tinc;
 			rdsdebug("alloced tinc %p\n", tinc);
-			rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr);
+			rds_inc_path_init(&tinc->ti_inc, cp,
+					  cp->cp_conn->c_faddr);
 			/*
 			 * XXX * we might be able to use the __ variants when
 			 * we've already serialized at a higher level.
@@ -228,6 +229,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 		}
 
 		if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) {
+			struct rds_connection *conn = cp->cp_conn;
+
 			if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
 				rds_tcp_cong_recv(conn, tinc);
 			else
@@ -250,15 +253,15 @@ out:
 }
 
 /* the caller has to hold the sock lock */
-static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
+static int rds_tcp_read_sock(struct rds_conn_path *cp, gfp_t gfp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct socket *sock = tc->t_sock;
 	read_descriptor_t desc;
 	struct rds_tcp_desc_arg arg;
 
 	/* It's like glib in the kernel! */
-	arg.conn = conn;
+	arg.conn_path = cp;
 	arg.gfp = gfp;
 	desc.arg.data = &arg;
 	desc.error = 0;
@@ -278,16 +281,17 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
  * if we fail to allocate we're in trouble.. blindly wait some time before
  * trying again to see if the VM can free up something for us.
  */
-int rds_tcp_recv(struct rds_connection *conn)
+int rds_tcp_recv_path(struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct socket *sock = tc->t_sock;
 	int ret = 0;
 
-	rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock);
+	rdsdebug("recv worker path [%d] tc %p sock %p\n",
+		 cp->cp_index, tc, sock);
 
 	lock_sock(sock->sk);
-	ret = rds_tcp_read_sock(conn, GFP_KERNEL);
+	ret = rds_tcp_read_sock(cp, GFP_KERNEL);
 	release_sock(sock->sk);
 
 	return ret;
@@ -296,24 +300,24 @@ int rds_tcp_recv(struct rds_connection *conn)
 void rds_tcp_data_ready(struct sock *sk)
 {
 	void (*ready)(struct sock *sk);
-	struct rds_connection *conn;
+	struct rds_conn_path *cp;
 	struct rds_tcp_connection *tc;
 
 	rdsdebug("data ready sk %p\n", sk);
 
 	read_lock_bh(&sk->sk_callback_lock);
-	conn = sk->sk_user_data;
-	if (!conn) { /* check for teardown race */
+	cp = sk->sk_user_data;
+	if (!cp) { /* check for teardown race */
 		ready = sk->sk_data_ready;
 		goto out;
 	}
 
-	tc = conn->c_transport_data;
+	tc = cp->cp_transport_data;
 	ready = tc->t_orig_data_ready;
 	rds_tcp_stats_inc(s_tcp_data_ready_calls);
 
-	if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM)
-		queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+	if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM)
+		queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 	ready(sk);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 22d0f2020a79..89d09b481f47 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -34,6 +34,7 @@
 #include <linux/in.h>
 #include <net/tcp.h>
 
+#include "rds_single_path.h"
 #include "rds.h"
 #include "tcp.h"
 
@@ -48,16 +49,16 @@ static void rds_tcp_cork(struct socket *sock, int val)
 	set_fs(oldfs);
 }
 
-void rds_tcp_xmit_prepare(struct rds_connection *conn)
+void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
 	rds_tcp_cork(tc->t_sock, 1);
 }
 
-void rds_tcp_xmit_complete(struct rds_connection *conn)
+void rds_tcp_xmit_path_complete(struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
 	rds_tcp_cork(tc->t_sock, 0);
 }
@@ -66,21 +67,22 @@ void rds_tcp_xmit_complete(struct rds_connection *conn)
 static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
 {
 	struct kvec vec = {
-                .iov_base = data,
-                .iov_len = len,
+		.iov_base = data,
+		.iov_len = len,
+	};
+	struct msghdr msg = {
+		.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
 	};
-        struct msghdr msg = {
-                .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
-        };
 
 	return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len);
 }
 
 /* the core send_sem serializes this with other xmit and shutdown */
 int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
-	         unsigned int hdr_off, unsigned int sg, unsigned int off)
+		 unsigned int hdr_off, unsigned int sg, unsigned int off)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_conn_path *cp = rm->m_inc.i_conn_path;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	int done = 0;
 	int ret = 0;
 	int more;
@@ -149,10 +151,17 @@ out:
 			rds_tcp_stats_inc(s_tcp_sndbuf_full);
 			ret = 0;
 		} else {
-			printk(KERN_WARNING "RDS/tcp: send to %pI4 "
-			       "returned %d, disconnecting and reconnecting\n",
-			       &conn->c_faddr, ret);
-			rds_conn_drop(conn);
+			/* No need to disconnect/reconnect if path_drop
+			 * has already been triggered, because, e.g., of
+			 * an incoming RST.
+			 */
+			if (rds_conn_path_up(cp)) {
+				pr_warn("RDS/tcp: send to %pI4 on cp [%d]"
+					"returned %d, "
+					"disconnecting and reconnecting\n",
+					&conn->c_faddr, cp->cp_index, ret);
+				rds_conn_path_drop(cp);
+			}
 		}
 	}
 	if (done == 0)
@@ -177,27 +186,27 @@ static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack)
 void rds_tcp_write_space(struct sock *sk)
 {
 	void (*write_space)(struct sock *sk);
-	struct rds_connection *conn;
+	struct rds_conn_path *cp;
 	struct rds_tcp_connection *tc;
 
 	read_lock_bh(&sk->sk_callback_lock);
-	conn = sk->sk_user_data;
-	if (!conn) {
+	cp = sk->sk_user_data;
+	if (!cp) {
 		write_space = sk->sk_write_space;
 		goto out;
 	}
 
-	tc = conn->c_transport_data;
+	tc = cp->cp_transport_data;
 	rdsdebug("write_space for tc %p\n", tc);
 	write_space = tc->t_orig_write_space;
 	rds_tcp_stats_inc(s_tcp_write_space_calls);
 
 	rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc));
 	tc->t_last_seen_una = rds_tcp_snd_una(tc);
-	rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
+	rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
 
-        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
-		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+		queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
 
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 4a323045719b..e42df11bf30a 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -71,30 +71,30 @@
 struct workqueue_struct *rds_wq;
 EXPORT_SYMBOL_GPL(rds_wq);
 
-void rds_connect_path_complete(struct rds_connection *conn, int curr)
+void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
 {
-	if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) {
+	if (!rds_conn_path_transition(cp, curr, RDS_CONN_UP)) {
 		printk(KERN_WARNING "%s: Cannot transition to state UP, "
 				"current state is %d\n",
 				__func__,
-				atomic_read(&conn->c_state));
-		rds_conn_drop(conn);
+				atomic_read(&cp->cp_state));
+		rds_conn_path_drop(cp);
 		return;
 	}
 
 	rdsdebug("conn %p for %pI4 to %pI4 complete\n",
-	  conn, &conn->c_laddr, &conn->c_faddr);
+	  cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr);
 
-	conn->c_reconnect_jiffies = 0;
-	set_bit(0, &conn->c_map_queued);
-	queue_delayed_work(rds_wq, &conn->c_send_w, 0);
-	queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+	cp->cp_reconnect_jiffies = 0;
+	set_bit(0, &cp->cp_conn->c_map_queued);
+	queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
+	queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
 }
 EXPORT_SYMBOL_GPL(rds_connect_path_complete);
 
 void rds_connect_complete(struct rds_connection *conn)
 {
-	rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+	rds_connect_path_complete(&conn->c_path[0], RDS_CONN_CONNECTING);
 }
 EXPORT_SYMBOL_GPL(rds_connect_complete);
 
@@ -116,70 +116,87 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
  * We should *always* start with a random backoff; otherwise a broken connection
  * will always take several iterations to be re-established.
  */
-void rds_queue_reconnect(struct rds_connection *conn)
+void rds_queue_reconnect(struct rds_conn_path *cp)
 {
 	unsigned long rand;
+	struct rds_connection *conn = cp->cp_conn;
 
 	rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n",
 	  conn, &conn->c_laddr, &conn->c_faddr,
-	  conn->c_reconnect_jiffies);
+	  cp->cp_reconnect_jiffies);
 
-	set_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
-	if (conn->c_reconnect_jiffies == 0) {
-		conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
-		queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
+	/* let peer with smaller addr initiate reconnect, to avoid duels */
+	if (conn->c_trans->t_type == RDS_TRANS_TCP &&
+	    conn->c_laddr > conn->c_faddr)
+		return;
+
+	set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
+	if (cp->cp_reconnect_jiffies == 0) {
+		cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
+		queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
 		return;
 	}
 
 	get_random_bytes(&rand, sizeof(rand));
 	rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
-		 rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies,
+		 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
 		 conn, &conn->c_laddr, &conn->c_faddr);
-	queue_delayed_work(rds_wq, &conn->c_conn_w,
-			   rand % conn->c_reconnect_jiffies);
+	queue_delayed_work(rds_wq, &cp->cp_conn_w,
+			   rand % cp->cp_reconnect_jiffies);
 
-	conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2,
+	cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
 					rds_sysctl_reconnect_max_jiffies);
 }
 
 void rds_connect_worker(struct work_struct *work)
 {
-	struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work);
+	struct rds_conn_path *cp = container_of(work,
+						struct rds_conn_path,
+						cp_conn_w.work);
+	struct rds_connection *conn = cp->cp_conn;
 	int ret;
 
-	clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
-	if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
-		ret = conn->c_trans->conn_connect(conn);
+	if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
+		return;
+	clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
+	ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
+	if (ret) {
+		ret = conn->c_trans->conn_path_connect(cp);
 		rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
 			conn, &conn->c_laddr, &conn->c_faddr, ret);
 
 		if (ret) {
-			if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN))
-				rds_queue_reconnect(conn);
+			if (rds_conn_path_transition(cp,
+						     RDS_CONN_CONNECTING,
+						     RDS_CONN_DOWN))
+				rds_queue_reconnect(cp);
 			else
-				rds_conn_error(conn, "RDS: connect failed\n");
+				rds_conn_path_error(cp,
+						    "RDS: connect failed\n");
 		}
 	}
 }
 
 void rds_send_worker(struct work_struct *work)
 {
-	struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
+	struct rds_conn_path *cp = container_of(work,
+						struct rds_conn_path,
+						cp_send_w.work);
 	int ret;
 
-	if (rds_conn_state(conn) == RDS_CONN_UP) {
-		clear_bit(RDS_LL_SEND_FULL, &conn->c_flags);
-		ret = rds_send_xmit(conn);
+	if (rds_conn_path_state(cp) == RDS_CONN_UP) {
+		clear_bit(RDS_LL_SEND_FULL, &cp->cp_flags);
+		ret = rds_send_xmit(cp);
 		cond_resched();
-		rdsdebug("conn %p ret %d\n", conn, ret);
+		rdsdebug("conn %p ret %d\n", cp->cp_conn, ret);
 		switch (ret) {
 		case -EAGAIN:
 			rds_stats_inc(s_send_immediate_retry);
-			queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+			queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
 			break;
 		case -ENOMEM:
 			rds_stats_inc(s_send_delayed_retry);
-			queue_delayed_work(rds_wq, &conn->c_send_w, 2);
+			queue_delayed_work(rds_wq, &cp->cp_send_w, 2);
 		default:
 			break;
 		}
@@ -188,20 +205,22 @@ void rds_send_worker(struct work_struct *work)
 
 void rds_recv_worker(struct work_struct *work)
 {
-	struct rds_connection *conn = container_of(work, struct rds_connection, c_recv_w.work);
+	struct rds_conn_path *cp = container_of(work,
+						struct rds_conn_path,
+						cp_recv_w.work);
 	int ret;
 
-	if (rds_conn_state(conn) == RDS_CONN_UP) {
-		ret = conn->c_trans->recv(conn);
-		rdsdebug("conn %p ret %d\n", conn, ret);
+	if (rds_conn_path_state(cp) == RDS_CONN_UP) {
+		ret = cp->cp_conn->c_trans->recv_path(cp);
+		rdsdebug("conn %p ret %d\n", cp->cp_conn, ret);
 		switch (ret) {
 		case -EAGAIN:
 			rds_stats_inc(s_recv_immediate_retry);
-			queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+			queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
 			break;
 		case -ENOMEM:
 			rds_stats_inc(s_recv_delayed_retry);
-			queue_delayed_work(rds_wq, &conn->c_recv_w, 2);
+			queue_delayed_work(rds_wq, &cp->cp_recv_w, 2);
 		default:
 			break;
 		}
@@ -210,9 +229,11 @@ void rds_recv_worker(struct work_struct *work)
 
 void rds_shutdown_worker(struct work_struct *work)
 {
-	struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
+	struct rds_conn_path *cp = container_of(work,
+						struct rds_conn_path,
+						cp_down_w);
 
-	rds_conn_shutdown(conn);
+	rds_conn_shutdown(cp);
 }
 
 void rds_threads_exit(void)
diff --git a/net/rds/transport.c b/net/rds/transport.c
index f3afd1d60d3c..2ffd3e30c643 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -140,8 +140,7 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
 	rds_info_iter_unmap(iter);
 	down_read(&rds_trans_sem);
 
-	for (i = 0; i < RDS_TRANS_COUNT; i++)
-	{
+	for (i = 0; i < RDS_TRANS_COUNT; i++) {
 		trans = transports[i];
 		if (!trans || !trans->stats_info_copy)
 			continue;
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 79c4abcfa6b4..0a6394754e81 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -164,7 +164,8 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety
 		rose_frames_acked(sk, nr);
 		if (ns == rose->vr) {
 			rose_start_idletimer(sk);
-			if (sock_queue_rcv_skb(sk, skb) == 0) {
+			if (sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) == 0 &&
+			    __sock_queue_rcv_skb(sk, skb) == 0) {
 				rose->vr = (rose->vr + 1) % ROSE_MODULUS;
 				queued = 1;
 			} else {
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index e05a06ef2254..10f3f48a16a8 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -4,25 +4,28 @@
 
 af-rxrpc-y := \
 	af_rxrpc.o \
-	ar-accept.o \
-	ar-ack.o \
-	ar-call.o \
-	ar-connection.o \
-	ar-connevent.o \
-	ar-error.o \
-	ar-input.o \
-	ar-key.o \
-	ar-local.o \
-	ar-output.o \
-	ar-peer.o \
-	ar-recvmsg.o \
-	ar-security.o \
-	ar-skbuff.o \
-	ar-transport.o \
+	call_accept.o \
+	call_event.o \
+	call_object.o \
+	conn_client.o \
+	conn_event.o \
+	conn_object.o \
+	conn_service.o \
+	input.o \
 	insecure.o \
-	misc.o
+	key.o \
+	local_event.o \
+	local_object.o \
+	misc.o \
+	output.o \
+	peer_event.o \
+	peer_object.o \
+	recvmsg.o \
+	security.o \
+	skbuff.o \
+	utils.o
 
-af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o
+af-rxrpc-$(CONFIG_PROC_FS) += proc.o
 af-rxrpc-$(CONFIG_RXKAD) += rxkad.o
 af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o
 
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index e45e94ca030f..88effadd4b16 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/net.h>
@@ -31,8 +33,6 @@ unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
 module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(debug, "RxRPC debugging mask");
 
-static int sysctl_rxrpc_max_qlen __read_mostly = 10;
-
 static struct proto rxrpc_proto;
 static const struct proto_ops rxrpc_rpc_ops;
 
@@ -97,11 +97,13 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
 	    srx->transport_len > len)
 		return -EINVAL;
 
-	if (srx->transport.family != rx->proto)
+	if (srx->transport.family != rx->family)
 		return -EAFNOSUPPORT;
 
 	switch (srx->transport.family) {
 	case AF_INET:
+		if (srx->transport_len < sizeof(struct sockaddr_in))
+			return -EINVAL;
 		_debug("INET: %x @ %pI4",
 		       ntohs(srx->transport.sin.sin_port),
 		       &srx->transport.sin.sin_addr);
@@ -137,33 +139,33 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
 
 	lock_sock(&rx->sk);
 
-	if (rx->sk.sk_state != RXRPC_UNCONNECTED) {
+	if (rx->sk.sk_state != RXRPC_UNBOUND) {
 		ret = -EINVAL;
 		goto error_unlock;
 	}
 
 	memcpy(&rx->srx, srx, sizeof(rx->srx));
 
-	/* Find or create a local transport endpoint to use */
 	local = rxrpc_lookup_local(&rx->srx);
 	if (IS_ERR(local)) {
 		ret = PTR_ERR(local);
 		goto error_unlock;
 	}
 
-	rx->local = local;
-	if (srx->srx_service) {
+	if (rx->srx.srx_service) {
 		write_lock_bh(&local->services_lock);
 		list_for_each_entry(prx, &local->services, listen_link) {
-			if (prx->srx.srx_service == srx->srx_service)
+			if (prx->srx.srx_service == rx->srx.srx_service)
 				goto service_in_use;
 		}
 
+		rx->local = local;
 		list_add_tail(&rx->listen_link, &local->services);
 		write_unlock_bh(&local->services_lock);
 
 		rx->sk.sk_state = RXRPC_SERVER_BOUND;
 	} else {
+		rx->local = local;
 		rx->sk.sk_state = RXRPC_CLIENT_BOUND;
 	}
 
@@ -172,8 +174,9 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
 	return 0;
 
 service_in_use:
-	ret = -EADDRINUSE;
 	write_unlock_bh(&local->services_lock);
+	rxrpc_put_local(local);
+	ret = -EADDRINUSE;
 error_unlock:
 	release_sock(&rx->sk);
 error:
@@ -188,6 +191,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
 {
 	struct sock *sk = sock->sk;
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	unsigned int max;
 	int ret;
 
 	_enter("%p,%d", rx, backlog);
@@ -195,20 +199,24 @@ static int rxrpc_listen(struct socket *sock, int backlog)
 	lock_sock(&rx->sk);
 
 	switch (rx->sk.sk_state) {
-	case RXRPC_UNCONNECTED:
+	case RXRPC_UNBOUND:
 		ret = -EADDRNOTAVAIL;
 		break;
-	case RXRPC_CLIENT_BOUND:
-	case RXRPC_CLIENT_CONNECTED:
-	default:
-		ret = -EBUSY;
-		break;
 	case RXRPC_SERVER_BOUND:
 		ASSERT(rx->local != NULL);
+		max = READ_ONCE(rxrpc_max_backlog);
+		ret = -EINVAL;
+		if (backlog == INT_MAX)
+			backlog = max;
+		else if (backlog < 0 || backlog > max)
+			break;
 		sk->sk_max_ack_backlog = backlog;
 		rx->sk.sk_state = RXRPC_SERVER_LISTENING;
 		ret = 0;
 		break;
+	default:
+		ret = -EBUSY;
+		break;
 	}
 
 	release_sock(&rx->sk);
@@ -216,45 +224,10 @@ static int rxrpc_listen(struct socket *sock, int backlog)
 	return ret;
 }
 
-/*
- * find a transport by address
- */
-static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
-						       struct sockaddr *addr,
-						       int addr_len, int flags,
-						       gfp_t gfp)
-{
-	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
-	struct rxrpc_transport *trans;
-	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
-	struct rxrpc_peer *peer;
-
-	_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
-
-	ASSERT(rx->local != NULL);
-	ASSERT(rx->sk.sk_state > RXRPC_UNCONNECTED);
-
-	if (rx->srx.transport_type != srx->transport_type)
-		return ERR_PTR(-ESOCKTNOSUPPORT);
-	if (rx->srx.transport.family != srx->transport.family)
-		return ERR_PTR(-EAFNOSUPPORT);
-
-	/* find a remote transport endpoint from the local one */
-	peer = rxrpc_get_peer(srx, gfp);
-	if (IS_ERR(peer))
-		return ERR_CAST(peer);
-
-	/* find a transport */
-	trans = rxrpc_get_transport(rx->local, peer, gfp);
-	rxrpc_put_peer(peer);
-	_leave(" = %p", trans);
-	return trans;
-}
-
 /**
  * rxrpc_kernel_begin_call - Allow a kernel service to begin a call
  * @sock: The socket on which to make the call
- * @srx: The address of the peer to contact (defaults to socket setting)
+ * @srx: The address of the peer to contact
  * @key: The security context to use (defaults to socket setting)
  * @user_call_ID: The ID to use
  *
@@ -271,51 +244,32 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 					   unsigned long user_call_ID,
 					   gfp_t gfp)
 {
-	struct rxrpc_conn_bundle *bundle;
-	struct rxrpc_transport *trans;
+	struct rxrpc_conn_parameters cp;
 	struct rxrpc_call *call;
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	int ret;
 
 	_enter(",,%x,%lx", key_serial(key), user_call_ID);
 
-	lock_sock(&rx->sk);
+	ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
+	if (ret < 0)
+		return ERR_PTR(ret);
 
-	if (srx) {
-		trans = rxrpc_name_to_transport(sock, (struct sockaddr *) srx,
-						sizeof(*srx), 0, gfp);
-		if (IS_ERR(trans)) {
-			call = ERR_CAST(trans);
-			trans = NULL;
-			goto out_notrans;
-		}
-	} else {
-		trans = rx->trans;
-		if (!trans) {
-			call = ERR_PTR(-ENOTCONN);
-			goto out_notrans;
-		}
-		atomic_inc(&trans->usage);
-	}
+	lock_sock(&rx->sk);
 
-	if (!srx)
-		srx = &rx->srx;
 	if (!key)
 		key = rx->key;
 	if (key && !key->payload.data[0])
 		key = NULL; /* a no-security key */
 
-	bundle = rxrpc_get_bundle(rx, trans, key, srx->srx_service, gfp);
-	if (IS_ERR(bundle)) {
-		call = ERR_CAST(bundle);
-		goto out;
-	}
+	memset(&cp, 0, sizeof(cp));
+	cp.local		= rx->local;
+	cp.key			= key;
+	cp.security_level	= 0;
+	cp.exclusive		= false;
+	cp.service_id		= srx->srx_service;
+	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
 
-	call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID, true,
-				     gfp);
-	rxrpc_put_bundle(trans, bundle);
-out:
-	rxrpc_put_transport(trans);
-out_notrans:
 	release_sock(&rx->sk);
 	_leave(" = %p", call);
 	return call;
@@ -367,11 +321,8 @@ EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
 static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
 			 int addr_len, int flags)
 {
-	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
-	struct sock *sk = sock->sk;
-	struct rxrpc_transport *trans;
-	struct rxrpc_local *local;
-	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)addr;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	int ret;
 
 	_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
@@ -384,45 +335,28 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
 
 	lock_sock(&rx->sk);
 
+	ret = -EISCONN;
+	if (test_bit(RXRPC_SOCK_CONNECTED, &rx->flags))
+		goto error;
+
 	switch (rx->sk.sk_state) {
-	case RXRPC_UNCONNECTED:
-		/* find a local transport endpoint if we don't have one already */
-		ASSERTCMP(rx->local, ==, NULL);
-		rx->srx.srx_family = AF_RXRPC;
-		rx->srx.srx_service = 0;
-		rx->srx.transport_type = srx->transport_type;
-		rx->srx.transport_len = sizeof(sa_family_t);
-		rx->srx.transport.family = srx->transport.family;
-		local = rxrpc_lookup_local(&rx->srx);
-		if (IS_ERR(local)) {
-			release_sock(&rx->sk);
-			return PTR_ERR(local);
-		}
-		rx->local = local;
-		rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+	case RXRPC_UNBOUND:
+		rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+	case RXRPC_CLIENT_UNBOUND:
 	case RXRPC_CLIENT_BOUND:
 		break;
-	case RXRPC_CLIENT_CONNECTED:
-		release_sock(&rx->sk);
-		return -EISCONN;
 	default:
-		release_sock(&rx->sk);
-		return -EBUSY; /* server sockets can't connect as well */
-	}
-
-	trans = rxrpc_name_to_transport(sock, addr, addr_len, flags,
-					GFP_KERNEL);
-	if (IS_ERR(trans)) {
-		release_sock(&rx->sk);
-		_leave(" = %ld", PTR_ERR(trans));
-		return PTR_ERR(trans);
+		ret = -EBUSY;
+		goto error;
 	}
 
-	rx->trans = trans;
-	rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
+	rx->connect_srx = *srx;
+	set_bit(RXRPC_SOCK_CONNECTED, &rx->flags);
+	ret = 0;
 
+error:
 	release_sock(&rx->sk);
-	return 0;
+	return ret;
 }
 
 /*
@@ -436,7 +370,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
  */
 static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 {
-	struct rxrpc_transport *trans;
+	struct rxrpc_local *local;
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	int ret;
 
@@ -453,48 +387,38 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 		}
 	}
 
-	trans = NULL;
 	lock_sock(&rx->sk);
 
-	if (m->msg_name) {
-		ret = -EISCONN;
-		trans = rxrpc_name_to_transport(sock, m->msg_name,
-						m->msg_namelen, 0, GFP_KERNEL);
-		if (IS_ERR(trans)) {
-			ret = PTR_ERR(trans);
-			trans = NULL;
-			goto out;
-		}
-	} else {
-		trans = rx->trans;
-		if (trans)
-			atomic_inc(&trans->usage);
-	}
-
 	switch (rx->sk.sk_state) {
-	case RXRPC_SERVER_LISTENING:
-		if (!m->msg_name) {
-			ret = rxrpc_server_sendmsg(rx, m, len);
-			break;
+	case RXRPC_UNBOUND:
+		local = rxrpc_lookup_local(&rx->srx);
+		if (IS_ERR(local)) {
+			ret = PTR_ERR(local);
+			goto error_unlock;
 		}
-	case RXRPC_SERVER_BOUND:
+
+		rx->local = local;
+		rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+		/* Fall through */
+
+	case RXRPC_CLIENT_UNBOUND:
 	case RXRPC_CLIENT_BOUND:
-		if (!m->msg_name) {
-			ret = -ENOTCONN;
-			break;
+		if (!m->msg_name &&
+		    test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) {
+			m->msg_name = &rx->connect_srx;
+			m->msg_namelen = sizeof(rx->connect_srx);
 		}
-	case RXRPC_CLIENT_CONNECTED:
-		ret = rxrpc_client_sendmsg(rx, trans, m, len);
+	case RXRPC_SERVER_BOUND:
+	case RXRPC_SERVER_LISTENING:
+		ret = rxrpc_do_sendmsg(rx, m, len);
 		break;
 	default:
-		ret = -ENOTCONN;
+		ret = -EINVAL;
 		break;
 	}
 
-out:
+error_unlock:
 	release_sock(&rx->sk);
-	if (trans)
-		rxrpc_put_transport(trans);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -521,9 +445,9 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
 			if (optlen != 0)
 				goto error;
 			ret = -EISCONN;
-			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+			if (rx->sk.sk_state != RXRPC_UNBOUND)
 				goto error;
-			set_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags);
+			rx->exclusive = true;
 			goto success;
 
 		case RXRPC_SECURITY_KEY:
@@ -531,7 +455,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
 			if (rx->key)
 				goto error;
 			ret = -EISCONN;
-			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+			if (rx->sk.sk_state != RXRPC_UNBOUND)
 				goto error;
 			ret = rxrpc_request_key(rx, optval, optlen);
 			goto error;
@@ -541,7 +465,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
 			if (rx->key)
 				goto error;
 			ret = -EISCONN;
-			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+			if (rx->sk.sk_state != RXRPC_UNBOUND)
 				goto error;
 			ret = rxrpc_server_keyring(rx, optval, optlen);
 			goto error;
@@ -551,7 +475,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
 			if (optlen != sizeof(unsigned int))
 				goto error;
 			ret = -EISCONN;
-			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+			if (rx->sk.sk_state != RXRPC_UNBOUND)
 				goto error;
 			ret = get_user(min_sec_level,
 				       (unsigned int __user *) optval);
@@ -630,13 +554,13 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
 		return -ENOMEM;
 
 	sock_init_data(sock, sk);
-	sk->sk_state		= RXRPC_UNCONNECTED;
+	sk->sk_state		= RXRPC_UNBOUND;
 	sk->sk_write_space	= rxrpc_write_space;
-	sk->sk_max_ack_backlog	= sysctl_rxrpc_max_qlen;
+	sk->sk_max_ack_backlog	= 0;
 	sk->sk_destruct		= rxrpc_sock_destructor;
 
 	rx = rxrpc_sk(sk);
-	rx->proto = protocol;
+	rx->family = protocol;
 	rx->calls = RB_ROOT;
 
 	INIT_LIST_HEAD(&rx->listen_link);
@@ -698,24 +622,8 @@ static int rxrpc_release_sock(struct sock *sk)
 	flush_workqueue(rxrpc_workqueue);
 	rxrpc_purge_queue(&sk->sk_receive_queue);
 
-	if (rx->conn) {
-		rxrpc_put_connection(rx->conn);
-		rx->conn = NULL;
-	}
-
-	if (rx->bundle) {
-		rxrpc_put_bundle(rx->trans, rx->bundle);
-		rx->bundle = NULL;
-	}
-	if (rx->trans) {
-		rxrpc_put_transport(rx->trans);
-		rx->trans = NULL;
-	}
-	if (rx->local) {
-		rxrpc_put_local(rx->local);
-		rx->local = NULL;
-	}
-
+	rxrpc_put_local(rx->local);
+	rx->local = NULL;
 	key_put(rx->key);
 	rx->key = NULL;
 	key_put(rx->securities);
@@ -796,49 +704,49 @@ static int __init af_rxrpc_init(void)
 		"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
 		SLAB_HWCACHE_ALIGN, NULL);
 	if (!rxrpc_call_jar) {
-		printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
+		pr_notice("Failed to allocate call jar\n");
 		goto error_call_jar;
 	}
 
 	rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1);
 	if (!rxrpc_workqueue) {
-		printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
+		pr_notice("Failed to allocate work queue\n");
 		goto error_work_queue;
 	}
 
 	ret = rxrpc_init_security();
 	if (ret < 0) {
-		printk(KERN_CRIT "RxRPC: Cannot initialise security\n");
+		pr_crit("Cannot initialise security\n");
 		goto error_security;
 	}
 
 	ret = proto_register(&rxrpc_proto, 1);
 	if (ret < 0) {
-		printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
+		pr_crit("Cannot register protocol\n");
 		goto error_proto;
 	}
 
 	ret = sock_register(&rxrpc_family_ops);
 	if (ret < 0) {
-		printk(KERN_CRIT "RxRPC: Cannot register socket family\n");
+		pr_crit("Cannot register socket family\n");
 		goto error_sock;
 	}
 
 	ret = register_key_type(&key_type_rxrpc);
 	if (ret < 0) {
-		printk(KERN_CRIT "RxRPC: Cannot register client key type\n");
+		pr_crit("Cannot register client key type\n");
 		goto error_key_type;
 	}
 
 	ret = register_key_type(&key_type_rxrpc_s);
 	if (ret < 0) {
-		printk(KERN_CRIT "RxRPC: Cannot register server key type\n");
+		pr_crit("Cannot register server key type\n");
 		goto error_key_type_s;
 	}
 
 	ret = rxrpc_sysctl_init();
 	if (ret < 0) {
-		printk(KERN_CRIT "RxRPC: Cannot register sysctls\n");
+		pr_crit("Cannot register sysctls\n");
 		goto error_sysctls;
 	}
 
@@ -858,9 +766,9 @@ error_key_type:
 error_sock:
 	proto_unregister(&rxrpc_proto);
 error_proto:
-	destroy_workqueue(rxrpc_workqueue);
-error_security:
 	rxrpc_exit_security();
+error_security:
+	destroy_workqueue(rxrpc_workqueue);
 error_work_queue:
 	kmem_cache_destroy(rxrpc_call_jar);
 error_call_jar:
@@ -880,14 +788,9 @@ static void __exit af_rxrpc_exit(void)
 	proto_unregister(&rxrpc_proto);
 	rxrpc_destroy_all_calls();
 	rxrpc_destroy_all_connections();
-	rxrpc_destroy_all_transports();
-	rxrpc_destroy_all_peers();
-	rxrpc_destroy_all_locals();
-
 	ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
+	rxrpc_destroy_all_locals();
 
-	_debug("flush scheduled work");
-	flush_workqueue(rxrpc_workqueue);
 	remove_proc_entry("rxrpc_conns", init_net.proc_net);
 	remove_proc_entry("rxrpc_calls", init_net.proc_net);
 	destroy_workqueue(rxrpc_workqueue);
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
deleted file mode 100644
index 97f4fae74bca..000000000000
--- a/net/rxrpc/ar-connection.c
+++ /dev/null
@@ -1,927 +0,0 @@
-/* RxRPC virtual connection handler
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/crypto.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include "ar-internal.h"
-
-/*
- * Time till a connection expires after last use (in seconds).
- */
-unsigned int rxrpc_connection_expiry = 10 * 60;
-
-static void rxrpc_connection_reaper(struct work_struct *work);
-
-LIST_HEAD(rxrpc_connections);
-DEFINE_RWLOCK(rxrpc_connection_lock);
-static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
-
-/*
- * allocate a new client connection bundle
- */
-static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
-{
-	struct rxrpc_conn_bundle *bundle;
-
-	_enter("");
-
-	bundle = kzalloc(sizeof(struct rxrpc_conn_bundle), gfp);
-	if (bundle) {
-		INIT_LIST_HEAD(&bundle->unused_conns);
-		INIT_LIST_HEAD(&bundle->avail_conns);
-		INIT_LIST_HEAD(&bundle->busy_conns);
-		init_waitqueue_head(&bundle->chanwait);
-		atomic_set(&bundle->usage, 1);
-	}
-
-	_leave(" = %p", bundle);
-	return bundle;
-}
-
-/*
- * compare bundle parameters with what we're looking for
- * - return -ve, 0 or +ve
- */
-static inline
-int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
-		     struct key *key, u16 service_id)
-{
-	return (bundle->service_id - service_id) ?:
-		((unsigned long)bundle->key - (unsigned long)key);
-}
-
-/*
- * get bundle of client connections that a client socket can make use of
- */
-struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
-					   struct rxrpc_transport *trans,
-					   struct key *key,
-					   u16 service_id,
-					   gfp_t gfp)
-{
-	struct rxrpc_conn_bundle *bundle, *candidate;
-	struct rb_node *p, *parent, **pp;
-
-	_enter("%p{%x},%x,%hx,",
-	       rx, key_serial(key), trans->debug_id, service_id);
-
-	if (rx->trans == trans && rx->bundle) {
-		atomic_inc(&rx->bundle->usage);
-		return rx->bundle;
-	}
-
-	/* search the extant bundles first for one that matches the specified
-	 * user ID */
-	spin_lock(&trans->client_lock);
-
-	p = trans->bundles.rb_node;
-	while (p) {
-		bundle = rb_entry(p, struct rxrpc_conn_bundle, node);
-
-		if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
-			p = p->rb_left;
-		else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
-			p = p->rb_right;
-		else
-			goto found_extant_bundle;
-	}
-
-	spin_unlock(&trans->client_lock);
-
-	/* not yet present - create a candidate for a new record and then
-	 * redo the search */
-	candidate = rxrpc_alloc_bundle(gfp);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	candidate->key = key_get(key);
-	candidate->service_id = service_id;
-
-	spin_lock(&trans->client_lock);
-
-	pp = &trans->bundles.rb_node;
-	parent = NULL;
-	while (*pp) {
-		parent = *pp;
-		bundle = rb_entry(parent, struct rxrpc_conn_bundle, node);
-
-		if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
-			pp = &(*pp)->rb_left;
-		else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
-			pp = &(*pp)->rb_right;
-		else
-			goto found_extant_second;
-	}
-
-	/* second search also failed; add the new bundle */
-	bundle = candidate;
-	candidate = NULL;
-
-	rb_link_node(&bundle->node, parent, pp);
-	rb_insert_color(&bundle->node, &trans->bundles);
-	spin_unlock(&trans->client_lock);
-	_net("BUNDLE new on trans %d", trans->debug_id);
-	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
-		atomic_inc(&bundle->usage);
-		rx->bundle = bundle;
-	}
-	_leave(" = %p [new]", bundle);
-	return bundle;
-
-	/* we found the bundle in the list immediately */
-found_extant_bundle:
-	atomic_inc(&bundle->usage);
-	spin_unlock(&trans->client_lock);
-	_net("BUNDLE old on trans %d", trans->debug_id);
-	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
-		atomic_inc(&bundle->usage);
-		rx->bundle = bundle;
-	}
-	_leave(" = %p [extant %d]", bundle, atomic_read(&bundle->usage));
-	return bundle;
-
-	/* we found the bundle on the second time through the list */
-found_extant_second:
-	atomic_inc(&bundle->usage);
-	spin_unlock(&trans->client_lock);
-	kfree(candidate);
-	_net("BUNDLE old2 on trans %d", trans->debug_id);
-	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
-		atomic_inc(&bundle->usage);
-		rx->bundle = bundle;
-	}
-	_leave(" = %p [second %d]", bundle, atomic_read(&bundle->usage));
-	return bundle;
-}
-
-/*
- * release a bundle
- */
-void rxrpc_put_bundle(struct rxrpc_transport *trans,
-		      struct rxrpc_conn_bundle *bundle)
-{
-	_enter("%p,%p{%d}",trans, bundle, atomic_read(&bundle->usage));
-
-	if (atomic_dec_and_lock(&bundle->usage, &trans->client_lock)) {
-		_debug("Destroy bundle");
-		rb_erase(&bundle->node, &trans->bundles);
-		spin_unlock(&trans->client_lock);
-		ASSERT(list_empty(&bundle->unused_conns));
-		ASSERT(list_empty(&bundle->avail_conns));
-		ASSERT(list_empty(&bundle->busy_conns));
-		ASSERTCMP(bundle->num_conns, ==, 0);
-		key_put(bundle->key);
-		kfree(bundle);
-	}
-
-	_leave("");
-}
-
-/*
- * allocate a new connection
- */
-static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
-{
-	struct rxrpc_connection *conn;
-
-	_enter("");
-
-	conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
-	if (conn) {
-		INIT_WORK(&conn->processor, &rxrpc_process_connection);
-		INIT_LIST_HEAD(&conn->bundle_link);
-		conn->calls = RB_ROOT;
-		skb_queue_head_init(&conn->rx_queue);
-		conn->security = &rxrpc_no_security;
-		rwlock_init(&conn->lock);
-		spin_lock_init(&conn->state_lock);
-		atomic_set(&conn->usage, 1);
-		conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
-		conn->avail_calls = RXRPC_MAXCALLS;
-		conn->size_align = 4;
-		conn->header_size = sizeof(struct rxrpc_wire_header);
-	}
-
-	_leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
-	return conn;
-}
-
-/*
- * assign a connection ID to a connection and add it to the transport's
- * connection lookup tree
- * - called with transport client lock held
- */
-static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
-{
-	struct rxrpc_connection *xconn;
-	struct rb_node *parent, **p;
-	__be32 epoch;
-	u32 cid;
-
-	_enter("");
-
-	epoch = conn->epoch;
-
-	write_lock_bh(&conn->trans->conn_lock);
-
-	conn->trans->conn_idcounter += RXRPC_CID_INC;
-	if (conn->trans->conn_idcounter < RXRPC_CID_INC)
-		conn->trans->conn_idcounter = RXRPC_CID_INC;
-	cid = conn->trans->conn_idcounter;
-
-attempt_insertion:
-	parent = NULL;
-	p = &conn->trans->client_conns.rb_node;
-
-	while (*p) {
-		parent = *p;
-		xconn = rb_entry(parent, struct rxrpc_connection, node);
-
-		if (epoch < xconn->epoch)
-			p = &(*p)->rb_left;
-		else if (epoch > xconn->epoch)
-			p = &(*p)->rb_right;
-		else if (cid < xconn->cid)
-			p = &(*p)->rb_left;
-		else if (cid > xconn->cid)
-			p = &(*p)->rb_right;
-		else
-			goto id_exists;
-	}
-
-	/* we've found a suitable hole - arrange for this connection to occupy
-	 * it */
-	rb_link_node(&conn->node, parent, p);
-	rb_insert_color(&conn->node, &conn->trans->client_conns);
-
-	conn->cid = cid;
-	write_unlock_bh(&conn->trans->conn_lock);
-	_leave(" [CID %x]", cid);
-	return;
-
-	/* we found a connection with the proposed ID - walk the tree from that
-	 * point looking for the next unused ID */
-id_exists:
-	for (;;) {
-		cid += RXRPC_CID_INC;
-		if (cid < RXRPC_CID_INC) {
-			cid = RXRPC_CID_INC;
-			conn->trans->conn_idcounter = cid;
-			goto attempt_insertion;
-		}
-
-		parent = rb_next(parent);
-		if (!parent)
-			goto attempt_insertion;
-
-		xconn = rb_entry(parent, struct rxrpc_connection, node);
-		if (epoch < xconn->epoch ||
-		    cid < xconn->cid)
-			goto attempt_insertion;
-	}
-}
-
-/*
- * add a call to a connection's call-by-ID tree
- */
-static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
-				      struct rxrpc_call *call)
-{
-	struct rxrpc_call *xcall;
-	struct rb_node *parent, **p;
-	__be32 call_id;
-
-	write_lock_bh(&conn->lock);
-
-	call_id = call->call_id;
-	p = &conn->calls.rb_node;
-	parent = NULL;
-	while (*p) {
-		parent = *p;
-		xcall = rb_entry(parent, struct rxrpc_call, conn_node);
-
-		if (call_id < xcall->call_id)
-			p = &(*p)->rb_left;
-		else if (call_id > xcall->call_id)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&call->conn_node, parent, p);
-	rb_insert_color(&call->conn_node, &conn->calls);
-
-	write_unlock_bh(&conn->lock);
-}
-
-/*
- * connect a call on an exclusive connection
- */
-static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
-				   struct rxrpc_transport *trans,
-				   u16 service_id,
-				   struct rxrpc_call *call,
-				   gfp_t gfp)
-{
-	struct rxrpc_connection *conn;
-	int chan, ret;
-
-	_enter("");
-
-	conn = rx->conn;
-	if (!conn) {
-		/* not yet present - create a candidate for a new connection
-		 * and then redo the check */
-		conn = rxrpc_alloc_connection(gfp);
-		if (!conn) {
-			_leave(" = -ENOMEM");
-			return -ENOMEM;
-		}
-
-		conn->trans = trans;
-		conn->bundle = NULL;
-		conn->service_id = service_id;
-		conn->epoch = rxrpc_epoch;
-		conn->in_clientflag = 0;
-		conn->out_clientflag = RXRPC_CLIENT_INITIATED;
-		conn->cid = 0;
-		conn->state = RXRPC_CONN_CLIENT;
-		conn->avail_calls = RXRPC_MAXCALLS - 1;
-		conn->security_level = rx->min_sec_level;
-		conn->key = key_get(rx->key);
-
-		ret = rxrpc_init_client_conn_security(conn);
-		if (ret < 0) {
-			key_put(conn->key);
-			kfree(conn);
-			_leave(" = %d [key]", ret);
-			return ret;
-		}
-
-		write_lock_bh(&rxrpc_connection_lock);
-		list_add_tail(&conn->link, &rxrpc_connections);
-		write_unlock_bh(&rxrpc_connection_lock);
-
-		spin_lock(&trans->client_lock);
-		atomic_inc(&trans->usage);
-
-		_net("CONNECT EXCL new %d on TRANS %d",
-		     conn->debug_id, conn->trans->debug_id);
-
-		rxrpc_assign_connection_id(conn);
-		rx->conn = conn;
-	} else {
-		spin_lock(&trans->client_lock);
-	}
-
-	/* we've got a connection with a free channel and we can now attach the
-	 * call to it
-	 * - we're holding the transport's client lock
-	 * - we're holding a reference on the connection
-	 */
-	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
-		if (!conn->channels[chan])
-			goto found_channel;
-	goto no_free_channels;
-
-found_channel:
-	atomic_inc(&conn->usage);
-	conn->channels[chan] = call;
-	call->conn = conn;
-	call->channel = chan;
-	call->cid = conn->cid | chan;
-	call->call_id = ++conn->call_counter;
-
-	_net("CONNECT client on conn %d chan %d as call %x",
-	     conn->debug_id, chan, call->call_id);
-
-	spin_unlock(&trans->client_lock);
-
-	rxrpc_add_call_ID_to_conn(conn, call);
-	_leave(" = 0");
-	return 0;
-
-no_free_channels:
-	spin_unlock(&trans->client_lock);
-	_leave(" = -ENOSR");
-	return -ENOSR;
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
- */
-int rxrpc_connect_call(struct rxrpc_sock *rx,
-		       struct rxrpc_transport *trans,
-		       struct rxrpc_conn_bundle *bundle,
-		       struct rxrpc_call *call,
-		       gfp_t gfp)
-{
-	struct rxrpc_connection *conn, *candidate;
-	int chan, ret;
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	_enter("%p,%lx,", rx, call->user_call_ID);
-
-	if (test_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags))
-		return rxrpc_connect_exclusive(rx, trans, bundle->service_id,
-					       call, gfp);
-
-	spin_lock(&trans->client_lock);
-	for (;;) {
-		/* see if the bundle has a call slot available */
-		if (!list_empty(&bundle->avail_conns)) {
-			_debug("avail");
-			conn = list_entry(bundle->avail_conns.next,
-					  struct rxrpc_connection,
-					  bundle_link);
-			if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
-				list_del_init(&conn->bundle_link);
-				bundle->num_conns--;
-				continue;
-			}
-			if (--conn->avail_calls == 0)
-				list_move(&conn->bundle_link,
-					  &bundle->busy_conns);
-			ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
-			ASSERT(conn->channels[0] == NULL ||
-			       conn->channels[1] == NULL ||
-			       conn->channels[2] == NULL ||
-			       conn->channels[3] == NULL);
-			atomic_inc(&conn->usage);
-			break;
-		}
-
-		if (!list_empty(&bundle->unused_conns)) {
-			_debug("unused");
-			conn = list_entry(bundle->unused_conns.next,
-					  struct rxrpc_connection,
-					  bundle_link);
-			if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
-				list_del_init(&conn->bundle_link);
-				bundle->num_conns--;
-				continue;
-			}
-			ASSERTCMP(conn->avail_calls, ==, RXRPC_MAXCALLS);
-			conn->avail_calls = RXRPC_MAXCALLS - 1;
-			ASSERT(conn->channels[0] == NULL &&
-			       conn->channels[1] == NULL &&
-			       conn->channels[2] == NULL &&
-			       conn->channels[3] == NULL);
-			atomic_inc(&conn->usage);
-			list_move(&conn->bundle_link, &bundle->avail_conns);
-			break;
-		}
-
-		/* need to allocate a new connection */
-		_debug("get new conn [%d]", bundle->num_conns);
-
-		spin_unlock(&trans->client_lock);
-
-		if (signal_pending(current))
-			goto interrupted;
-
-		if (bundle->num_conns >= 20) {
-			_debug("too many conns");
-
-			if (!gfpflags_allow_blocking(gfp)) {
-				_leave(" = -EAGAIN");
-				return -EAGAIN;
-			}
-
-			add_wait_queue(&bundle->chanwait, &myself);
-			for (;;) {
-				set_current_state(TASK_INTERRUPTIBLE);
-				if (bundle->num_conns < 20 ||
-				    !list_empty(&bundle->unused_conns) ||
-				    !list_empty(&bundle->avail_conns))
-					break;
-				if (signal_pending(current))
-					goto interrupted_dequeue;
-				schedule();
-			}
-			remove_wait_queue(&bundle->chanwait, &myself);
-			__set_current_state(TASK_RUNNING);
-			spin_lock(&trans->client_lock);
-			continue;
-		}
-
-		/* not yet present - create a candidate for a new connection and then
-		 * redo the check */
-		candidate = rxrpc_alloc_connection(gfp);
-		if (!candidate) {
-			_leave(" = -ENOMEM");
-			return -ENOMEM;
-		}
-
-		candidate->trans = trans;
-		candidate->bundle = bundle;
-		candidate->service_id = bundle->service_id;
-		candidate->epoch = rxrpc_epoch;
-		candidate->in_clientflag = 0;
-		candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
-		candidate->cid = 0;
-		candidate->state = RXRPC_CONN_CLIENT;
-		candidate->avail_calls = RXRPC_MAXCALLS;
-		candidate->security_level = rx->min_sec_level;
-		candidate->key = key_get(bundle->key);
-
-		ret = rxrpc_init_client_conn_security(candidate);
-		if (ret < 0) {
-			key_put(candidate->key);
-			kfree(candidate);
-			_leave(" = %d [key]", ret);
-			return ret;
-		}
-
-		write_lock_bh(&rxrpc_connection_lock);
-		list_add_tail(&candidate->link, &rxrpc_connections);
-		write_unlock_bh(&rxrpc_connection_lock);
-
-		spin_lock(&trans->client_lock);
-
-		list_add(&candidate->bundle_link, &bundle->unused_conns);
-		bundle->num_conns++;
-		atomic_inc(&bundle->usage);
-		atomic_inc(&trans->usage);
-
-		_net("CONNECT new %d on TRANS %d",
-		     candidate->debug_id, candidate->trans->debug_id);
-
-		rxrpc_assign_connection_id(candidate);
-		candidate->security->prime_packet_security(candidate);
-
-		/* leave the candidate lurking in zombie mode attached to the
-		 * bundle until we're ready for it */
-		rxrpc_put_connection(candidate);
-		candidate = NULL;
-	}
-
-	/* we've got a connection with a free channel and we can now attach the
-	 * call to it
-	 * - we're holding the transport's client lock
-	 * - we're holding a reference on the connection
-	 * - we're holding a reference on the bundle
-	 */
-	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
-		if (!conn->channels[chan])
-			goto found_channel;
-	ASSERT(conn->channels[0] == NULL ||
-	       conn->channels[1] == NULL ||
-	       conn->channels[2] == NULL ||
-	       conn->channels[3] == NULL);
-	BUG();
-
-found_channel:
-	conn->channels[chan] = call;
-	call->conn = conn;
-	call->channel = chan;
-	call->cid = conn->cid | chan;
-	call->call_id = ++conn->call_counter;
-
-	_net("CONNECT client on conn %d chan %d as call %x",
-	     conn->debug_id, chan, call->call_id);
-
-	ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
-	spin_unlock(&trans->client_lock);
-
-	rxrpc_add_call_ID_to_conn(conn, call);
-
-	_leave(" = 0");
-	return 0;
-
-interrupted_dequeue:
-	remove_wait_queue(&bundle->chanwait, &myself);
-	__set_current_state(TASK_RUNNING);
-interrupted:
-	_leave(" = -ERESTARTSYS");
-	return -ERESTARTSYS;
-}
-
-/*
- * get a record of an incoming connection
- */
-struct rxrpc_connection *
-rxrpc_incoming_connection(struct rxrpc_transport *trans,
-			  struct rxrpc_host_header *hdr)
-{
-	struct rxrpc_connection *conn, *candidate = NULL;
-	struct rb_node *p, **pp;
-	const char *new = "old";
-	__be32 epoch;
-	u32 cid;
-
-	_enter("");
-
-	ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
-
-	epoch = hdr->epoch;
-	cid = hdr->cid & RXRPC_CIDMASK;
-
-	/* search the connection list first */
-	read_lock_bh(&trans->conn_lock);
-
-	p = trans->server_conns.rb_node;
-	while (p) {
-		conn = rb_entry(p, struct rxrpc_connection, node);
-
-		_debug("maybe %x", conn->cid);
-
-		if (epoch < conn->epoch)
-			p = p->rb_left;
-		else if (epoch > conn->epoch)
-			p = p->rb_right;
-		else if (cid < conn->cid)
-			p = p->rb_left;
-		else if (cid > conn->cid)
-			p = p->rb_right;
-		else
-			goto found_extant_connection;
-	}
-	read_unlock_bh(&trans->conn_lock);
-
-	/* not yet present - create a candidate for a new record and then
-	 * redo the search */
-	candidate = rxrpc_alloc_connection(GFP_NOIO);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	candidate->trans = trans;
-	candidate->epoch = hdr->epoch;
-	candidate->cid = hdr->cid & RXRPC_CIDMASK;
-	candidate->service_id = hdr->serviceId;
-	candidate->security_ix = hdr->securityIndex;
-	candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
-	candidate->out_clientflag = 0;
-	candidate->state = RXRPC_CONN_SERVER;
-	if (candidate->service_id)
-		candidate->state = RXRPC_CONN_SERVER_UNSECURED;
-
-	write_lock_bh(&trans->conn_lock);
-
-	pp = &trans->server_conns.rb_node;
-	p = NULL;
-	while (*pp) {
-		p = *pp;
-		conn = rb_entry(p, struct rxrpc_connection, node);
-
-		if (epoch < conn->epoch)
-			pp = &(*pp)->rb_left;
-		else if (epoch > conn->epoch)
-			pp = &(*pp)->rb_right;
-		else if (cid < conn->cid)
-			pp = &(*pp)->rb_left;
-		else if (cid > conn->cid)
-			pp = &(*pp)->rb_right;
-		else
-			goto found_extant_second;
-	}
-
-	/* we can now add the new candidate to the list */
-	conn = candidate;
-	candidate = NULL;
-	rb_link_node(&conn->node, p, pp);
-	rb_insert_color(&conn->node, &trans->server_conns);
-	atomic_inc(&conn->trans->usage);
-
-	write_unlock_bh(&trans->conn_lock);
-
-	write_lock_bh(&rxrpc_connection_lock);
-	list_add_tail(&conn->link, &rxrpc_connections);
-	write_unlock_bh(&rxrpc_connection_lock);
-
-	new = "new";
-
-success:
-	_net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->cid);
-
-	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
-	return conn;
-
-	/* we found the connection in the list immediately */
-found_extant_connection:
-	if (hdr->securityIndex != conn->security_ix) {
-		read_unlock_bh(&trans->conn_lock);
-		goto security_mismatch;
-	}
-	atomic_inc(&conn->usage);
-	read_unlock_bh(&trans->conn_lock);
-	goto success;
-
-	/* we found the connection on the second time through the list */
-found_extant_second:
-	if (hdr->securityIndex != conn->security_ix) {
-		write_unlock_bh(&trans->conn_lock);
-		goto security_mismatch;
-	}
-	atomic_inc(&conn->usage);
-	write_unlock_bh(&trans->conn_lock);
-	kfree(candidate);
-	goto success;
-
-security_mismatch:
-	kfree(candidate);
-	_leave(" = -EKEYREJECTED");
-	return ERR_PTR(-EKEYREJECTED);
-}
-
-/*
- * find a connection based on transport and RxRPC connection ID for an incoming
- * packet
- */
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
-					       struct rxrpc_host_header *hdr)
-{
-	struct rxrpc_connection *conn;
-	struct rb_node *p;
-	u32 epoch, cid;
-
-	_enter(",{%x,%x}", hdr->cid, hdr->flags);
-
-	read_lock_bh(&trans->conn_lock);
-
-	cid = hdr->cid & RXRPC_CIDMASK;
-	epoch = hdr->epoch;
-
-	if (hdr->flags & RXRPC_CLIENT_INITIATED)
-		p = trans->server_conns.rb_node;
-	else
-		p = trans->client_conns.rb_node;
-
-	while (p) {
-		conn = rb_entry(p, struct rxrpc_connection, node);
-
-		_debug("maybe %x", conn->cid);
-
-		if (epoch < conn->epoch)
-			p = p->rb_left;
-		else if (epoch > conn->epoch)
-			p = p->rb_right;
-		else if (cid < conn->cid)
-			p = p->rb_left;
-		else if (cid > conn->cid)
-			p = p->rb_right;
-		else
-			goto found;
-	}
-
-	read_unlock_bh(&trans->conn_lock);
-	_leave(" = NULL");
-	return NULL;
-
-found:
-	atomic_inc(&conn->usage);
-	read_unlock_bh(&trans->conn_lock);
-	_leave(" = %p", conn);
-	return conn;
-}
-
-/*
- * release a virtual connection
- */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
-{
-	_enter("%p{u=%d,d=%d}",
-	       conn, atomic_read(&conn->usage), conn->debug_id);
-
-	ASSERTCMP(atomic_read(&conn->usage), >, 0);
-
-	conn->put_time = ktime_get_seconds();
-	if (atomic_dec_and_test(&conn->usage)) {
-		_debug("zombie");
-		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
-	}
-
-	_leave("");
-}
-
-/*
- * destroy a virtual connection
- */
-static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
-{
-	_enter("%p{%d}", conn, atomic_read(&conn->usage));
-
-	ASSERTCMP(atomic_read(&conn->usage), ==, 0);
-
-	_net("DESTROY CONN %d", conn->debug_id);
-
-	if (conn->bundle)
-		rxrpc_put_bundle(conn->trans, conn->bundle);
-
-	ASSERT(RB_EMPTY_ROOT(&conn->calls));
-	rxrpc_purge_queue(&conn->rx_queue);
-
-	conn->security->clear(conn);
-	key_put(conn->key);
-	key_put(conn->server_key);
-
-	rxrpc_put_transport(conn->trans);
-	kfree(conn);
-	_leave("");
-}
-
-/*
- * reap dead connections
- */
-static void rxrpc_connection_reaper(struct work_struct *work)
-{
-	struct rxrpc_connection *conn, *_p;
-	unsigned long now, earliest, reap_time;
-
-	LIST_HEAD(graveyard);
-
-	_enter("");
-
-	now = ktime_get_seconds();
-	earliest = ULONG_MAX;
-
-	write_lock_bh(&rxrpc_connection_lock);
-	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
-		_debug("reap CONN %d { u=%d,t=%ld }",
-		       conn->debug_id, atomic_read(&conn->usage),
-		       (long) now - (long) conn->put_time);
-
-		if (likely(atomic_read(&conn->usage) > 0))
-			continue;
-
-		spin_lock(&conn->trans->client_lock);
-		write_lock(&conn->trans->conn_lock);
-		reap_time = conn->put_time + rxrpc_connection_expiry;
-
-		if (atomic_read(&conn->usage) > 0) {
-			;
-		} else if (reap_time <= now) {
-			list_move_tail(&conn->link, &graveyard);
-			if (conn->out_clientflag)
-				rb_erase(&conn->node,
-					 &conn->trans->client_conns);
-			else
-				rb_erase(&conn->node,
-					 &conn->trans->server_conns);
-			if (conn->bundle) {
-				list_del_init(&conn->bundle_link);
-				conn->bundle->num_conns--;
-			}
-
-		} else if (reap_time < earliest) {
-			earliest = reap_time;
-		}
-
-		write_unlock(&conn->trans->conn_lock);
-		spin_unlock(&conn->trans->client_lock);
-	}
-	write_unlock_bh(&rxrpc_connection_lock);
-
-	if (earliest != ULONG_MAX) {
-		_debug("reschedule reaper %ld", (long) earliest - now);
-		ASSERTCMP(earliest, >, now);
-		rxrpc_queue_delayed_work(&rxrpc_connection_reap,
-					 (earliest - now) * HZ);
-	}
-
-	/* then destroy all those pulled out */
-	while (!list_empty(&graveyard)) {
-		conn = list_entry(graveyard.next, struct rxrpc_connection,
-				  link);
-		list_del_init(&conn->link);
-
-		ASSERTCMP(atomic_read(&conn->usage), ==, 0);
-		rxrpc_destroy_connection(conn);
-	}
-
-	_leave("");
-}
-
-/*
- * preemptively destroy all the connection records rather than waiting for them
- * to time out
- */
-void __exit rxrpc_destroy_all_connections(void)
-{
-	_enter("");
-
-	rxrpc_connection_expiry = 0;
-	cancel_delayed_work(&rxrpc_connection_reap);
-	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
-
-	_leave("");
-}
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
deleted file mode 100644
index 3e82d6f0313c..000000000000
--- a/net/rxrpc/ar-error.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/* Error message handling (ICMP)
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/errqueue.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include <net/ip.h>
-#include "ar-internal.h"
-
-/*
- * handle an error received on the local endpoint
- */
-void rxrpc_UDP_error_report(struct sock *sk)
-{
-	struct sock_exterr_skb *serr;
-	struct rxrpc_transport *trans;
-	struct rxrpc_local *local = sk->sk_user_data;
-	struct rxrpc_peer *peer;
-	struct sk_buff *skb;
-	__be32 addr;
-	__be16 port;
-
-	_enter("%p{%d}", sk, local->debug_id);
-
-	skb = sock_dequeue_err_skb(sk);
-	if (!skb) {
-		_leave("UDP socket errqueue empty");
-		return;
-	}
-	serr = SKB_EXT_ERR(skb);
-	if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
-		_leave("UDP empty message");
-		kfree_skb(skb);
-		return;
-	}
-
-	rxrpc_new_skb(skb);
-
-	addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
-	port = serr->port;
-
-	_net("Rx UDP Error from %pI4:%hu", &addr, ntohs(port));
-	_debug("Msg l:%d d:%d", skb->len, skb->data_len);
-
-	peer = rxrpc_find_peer(local, addr, port);
-	if (IS_ERR(peer)) {
-		rxrpc_free_skb(skb);
-		_leave(" [no peer]");
-		return;
-	}
-
-	trans = rxrpc_find_transport(local, peer);
-	if (!trans) {
-		rxrpc_put_peer(peer);
-		rxrpc_free_skb(skb);
-		_leave(" [no trans]");
-		return;
-	}
-
-	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
-	    serr->ee.ee_type == ICMP_DEST_UNREACH &&
-	    serr->ee.ee_code == ICMP_FRAG_NEEDED
-	    ) {
-		u32 mtu = serr->ee.ee_info;
-
-		_net("Rx Received ICMP Fragmentation Needed (%d)", mtu);
-
-		/* wind down the local interface MTU */
-		if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
-			peer->if_mtu = mtu;
-			_net("I/F MTU %u", mtu);
-		}
-
-		if (mtu == 0) {
-			/* they didn't give us a size, estimate one */
-			mtu = peer->if_mtu;
-			if (mtu > 1500) {
-				mtu >>= 1;
-				if (mtu < 1500)
-					mtu = 1500;
-			} else {
-				mtu -= 100;
-				if (mtu < peer->hdrsize)
-					mtu = peer->hdrsize + 4;
-			}
-		}
-
-		if (mtu < peer->mtu) {
-			spin_lock_bh(&peer->lock);
-			peer->mtu = mtu;
-			peer->maxdata = peer->mtu - peer->hdrsize;
-			spin_unlock_bh(&peer->lock);
-			_net("Net MTU %u (maxdata %u)",
-			     peer->mtu, peer->maxdata);
-		}
-	}
-
-	rxrpc_put_peer(peer);
-
-	/* pass the transport ref to error_handler to release */
-	skb_queue_tail(&trans->error_queue, skb);
-	rxrpc_queue_work(&trans->error_handler);
-	_leave("");
-}
-
-/*
- * deal with UDP error messages
- */
-void rxrpc_UDP_error_handler(struct work_struct *work)
-{
-	struct sock_extended_err *ee;
-	struct sock_exterr_skb *serr;
-	struct rxrpc_transport *trans =
-		container_of(work, struct rxrpc_transport, error_handler);
-	struct sk_buff *skb;
-	int err;
-
-	_enter("");
-
-	skb = skb_dequeue(&trans->error_queue);
-	if (!skb)
-		return;
-
-	serr = SKB_EXT_ERR(skb);
-	ee = &serr->ee;
-
-	_net("Rx Error o=%d t=%d c=%d e=%d",
-	     ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
-
-	err = ee->ee_errno;
-
-	switch (ee->ee_origin) {
-	case SO_EE_ORIGIN_ICMP:
-		switch (ee->ee_type) {
-		case ICMP_DEST_UNREACH:
-			switch (ee->ee_code) {
-			case ICMP_NET_UNREACH:
-				_net("Rx Received ICMP Network Unreachable");
-				break;
-			case ICMP_HOST_UNREACH:
-				_net("Rx Received ICMP Host Unreachable");
-				break;
-			case ICMP_PORT_UNREACH:
-				_net("Rx Received ICMP Port Unreachable");
-				break;
-			case ICMP_NET_UNKNOWN:
-				_net("Rx Received ICMP Unknown Network");
-				break;
-			case ICMP_HOST_UNKNOWN:
-				_net("Rx Received ICMP Unknown Host");
-				break;
-			default:
-				_net("Rx Received ICMP DestUnreach code=%u",
-				     ee->ee_code);
-				break;
-			}
-			break;
-
-		case ICMP_TIME_EXCEEDED:
-			_net("Rx Received ICMP TTL Exceeded");
-			break;
-
-		default:
-			_proto("Rx Received ICMP error { type=%u code=%u }",
-			       ee->ee_type, ee->ee_code);
-			break;
-		}
-		break;
-
-	case SO_EE_ORIGIN_LOCAL:
-		_proto("Rx Received local error { error=%d }",
-		       ee->ee_errno);
-		break;
-
-	case SO_EE_ORIGIN_NONE:
-	case SO_EE_ORIGIN_ICMP6:
-	default:
-		_proto("Rx Received error report { orig=%u }",
-		       ee->ee_origin);
-		break;
-	}
-
-	/* terminate all the affected calls if there's an unrecoverable
-	 * error */
-	if (err) {
-		struct rxrpc_call *call, *_n;
-
-		_debug("ISSUE ERROR %d", err);
-
-		spin_lock_bh(&trans->peer->lock);
-		trans->peer->net_error = err;
-
-		list_for_each_entry_safe(call, _n, &trans->peer->error_targets,
-					 error_link) {
-			write_lock(&call->state_lock);
-			if (call->state != RXRPC_CALL_COMPLETE &&
-			    call->state < RXRPC_CALL_NETWORK_ERROR) {
-				call->state = RXRPC_CALL_NETWORK_ERROR;
-				set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
-				rxrpc_queue_call(call);
-			}
-			write_unlock(&call->state_lock);
-			list_del_init(&call->error_link);
-		}
-
-		spin_unlock_bh(&trans->peer->lock);
-	}
-
-	if (!skb_queue_empty(&trans->error_queue))
-		rxrpc_queue_work(&trans->error_handler);
-
-	rxrpc_free_skb(skb);
-	rxrpc_put_transport(trans);
-	_leave("");
-}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index f0b807a163fa..1bb9e7ac9e14 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -9,7 +9,10 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/atomic.h>
+#include <linux/seqlock.h>
 #include <net/sock.h>
+#include <net/af_rxrpc.h>
 #include <rxrpc/packet.h>
 
 #if 0
@@ -33,15 +36,16 @@ struct rxrpc_crypt {
 	queue_delayed_work(rxrpc_workqueue, (WS), (D))
 
 #define rxrpc_queue_call(CALL)	rxrpc_queue_work(&(CALL)->processor)
-#define rxrpc_queue_conn(CONN)	rxrpc_queue_work(&(CONN)->processor)
+
+struct rxrpc_connection;
 
 /*
  * sk_state for RxRPC sockets
  */
 enum {
-	RXRPC_UNCONNECTED = 0,
+	RXRPC_UNBOUND = 0,
+	RXRPC_CLIENT_UNBOUND,		/* Unbound socket used as client */
 	RXRPC_CLIENT_BOUND,		/* client local address bound */
-	RXRPC_CLIENT_CONNECTED,		/* client is connected */
 	RXRPC_SERVER_BOUND,		/* server local address bound */
 	RXRPC_SERVER_LISTENING,		/* server listening for connections */
 	RXRPC_CLOSE,			/* socket is being closed */
@@ -55,9 +59,6 @@ struct rxrpc_sock {
 	struct sock		sk;
 	rxrpc_interceptor_t	interceptor;	/* kernel service Rx interceptor function */
 	struct rxrpc_local	*local;		/* local endpoint */
-	struct rxrpc_transport	*trans;		/* transport handler */
-	struct rxrpc_conn_bundle *bundle;	/* virtual connection bundle */
-	struct rxrpc_connection	*conn;		/* exclusive virtual connection */
 	struct list_head	listen_link;	/* link in the local endpoint's listen list */
 	struct list_head	secureq;	/* calls awaiting connection security clearance */
 	struct list_head	acceptq;	/* calls awaiting acceptance */
@@ -65,12 +66,14 @@ struct rxrpc_sock {
 	struct key		*securities;	/* list of server security descriptors */
 	struct rb_root		calls;		/* outstanding calls on this socket */
 	unsigned long		flags;
-#define RXRPC_SOCK_EXCLUSIVE_CONN	1	/* exclusive connection for a client socket */
+#define RXRPC_SOCK_CONNECTED		0	/* connect_srx is set */
 	rwlock_t		call_lock;	/* lock for calls */
 	u32			min_sec_level;	/* minimum security level */
 #define RXRPC_SECURITY_MAX	RXRPC_SECURITY_ENCRYPT
+	bool			exclusive;	/* Exclusive connection for a client socket */
+	sa_family_t		family;		/* Protocol family created with */
 	struct sockaddr_rxrpc	srx;		/* local address */
-	sa_family_t		proto;		/* protocol created with */
+	struct sockaddr_rxrpc	connect_srx;	/* Default client address from connect() */
 };
 
 #define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
@@ -138,17 +141,16 @@ struct rxrpc_security {
 	int (*init_connection_security)(struct rxrpc_connection *);
 
 	/* prime a connection's packet security */
-	void (*prime_packet_security)(struct rxrpc_connection *);
+	int (*prime_packet_security)(struct rxrpc_connection *);
 
 	/* impose security on a packet */
-	int (*secure_packet)(const struct rxrpc_call *,
+	int (*secure_packet)(struct rxrpc_call *,
 			     struct sk_buff *,
 			     size_t,
 			     void *);
 
 	/* verify the security on a received packet */
-	int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
-			     u32 *);
+	int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *);
 
 	/* issue a challenge */
 	int (*issue_challenge)(struct rxrpc_connection *);
@@ -168,46 +170,52 @@ struct rxrpc_security {
 };
 
 /*
- * RxRPC local transport endpoint definition
- * - matched by local port, address and protocol type
+ * RxRPC local transport endpoint description
+ * - owned by a single AF_RXRPC socket
+ * - pointed to by transport socket struct sk_user_data
  */
 struct rxrpc_local {
+	struct rcu_head		rcu;
+	atomic_t		usage;
+	struct list_head	link;
 	struct socket		*socket;	/* my UDP socket */
-	struct work_struct	destroyer;	/* endpoint destroyer */
-	struct work_struct	acceptor;	/* incoming call processor */
-	struct work_struct	rejecter;	/* packet reject writer */
-	struct work_struct	event_processor; /* endpoint event processor */
+	struct work_struct	processor;
 	struct list_head	services;	/* services listening on this endpoint */
-	struct list_head	link;		/* link in endpoint list */
 	struct rw_semaphore	defrag_sem;	/* control re-enablement of IP DF bit */
 	struct sk_buff_head	accept_queue;	/* incoming calls awaiting acceptance */
 	struct sk_buff_head	reject_queue;	/* packets awaiting rejection */
 	struct sk_buff_head	event_queue;	/* endpoint event packets awaiting processing */
+	struct rb_root		client_conns;	/* Client connections by socket params */
+	spinlock_t		client_conns_lock; /* Lock for client_conns */
 	spinlock_t		lock;		/* access lock */
 	rwlock_t		services_lock;	/* lock for services list */
-	atomic_t		usage;
 	int			debug_id;	/* debug ID for printks */
-	volatile char		error_rcvd;	/* T if received ICMP error outstanding */
+	bool			dead;
 	struct sockaddr_rxrpc	srx;		/* local address */
 };
 
 /*
  * RxRPC remote transport endpoint definition
- * - matched by remote port, address and protocol type
- * - holds the connection ID counter for connections between the two endpoints
+ * - matched by local endpoint, remote port, address and protocol type
  */
 struct rxrpc_peer {
-	struct work_struct	destroyer;	/* peer destroyer */
-	struct list_head	link;		/* link in master peer list */
-	struct list_head	error_targets;	/* targets for net error distribution */
-	spinlock_t		lock;		/* access lock */
+	struct rcu_head		rcu;		/* This must be first */
 	atomic_t		usage;
+	unsigned long		hash_key;
+	struct hlist_node	hash_link;
+	struct rxrpc_local	*local;
+	struct hlist_head	error_targets;	/* targets for net error distribution */
+	struct work_struct	error_distributor;
+	struct rb_root		service_conns;	/* Service connections */
+	seqlock_t		service_conn_lock;
+	spinlock_t		lock;		/* access lock */
 	unsigned int		if_mtu;		/* interface MTU for this peer */
 	unsigned int		mtu;		/* network MTU for this peer */
 	unsigned int		maxdata;	/* data size (MTU - hdrsize) */
 	unsigned short		hdrsize;	/* header size (IP + UDP + RxRPC) */
 	int			debug_id;	/* debug ID for printks */
-	int			net_error;	/* network error distributed */
+	int			error_report;	/* Net (+0) or local (+1000000) to distribute */
+#define RXRPC_LOCAL_ERROR_OFFSET 1000000
 	struct sockaddr_rxrpc	srx;		/* remote address */
 
 	/* calculated RTT cache */
@@ -219,99 +227,108 @@ struct rxrpc_peer {
 };
 
 /*
- * RxRPC point-to-point transport / connection manager definition
- * - handles a bundle of connections between two endpoints
- * - matched by { local, peer }
- */
-struct rxrpc_transport {
-	struct rxrpc_local	*local;		/* local transport endpoint */
-	struct rxrpc_peer	*peer;		/* remote transport endpoint */
-	struct work_struct	error_handler;	/* network error distributor */
-	struct rb_root		bundles;	/* client connection bundles on this transport */
-	struct rb_root		client_conns;	/* client connections on this transport */
-	struct rb_root		server_conns;	/* server connections on this transport */
-	struct list_head	link;		/* link in master session list */
-	struct sk_buff_head	error_queue;	/* error packets awaiting processing */
-	unsigned long		put_time;	/* time at which to reap */
-	spinlock_t		client_lock;	/* client connection allocation lock */
-	rwlock_t		conn_lock;	/* lock for active/dead connections */
-	atomic_t		usage;
-	int			debug_id;	/* debug ID for printks */
-	unsigned int		conn_idcounter;	/* connection ID counter (client) */
+ * Keys for matching a connection.
+ */
+struct rxrpc_conn_proto {
+	union {
+		struct {
+			u32	epoch;		/* epoch of this connection */
+			u32	cid;		/* connection ID */
+		};
+		u64		index_key;
+	};
+};
+
+struct rxrpc_conn_parameters {
+	struct rxrpc_local	*local;		/* Representation of local endpoint */
+	struct rxrpc_peer	*peer;		/* Remote endpoint */
+	struct key		*key;		/* Security details */
+	bool			exclusive;	/* T if conn is exclusive */
+	u16			service_id;	/* Service ID for this connection */
+	u32			security_level;	/* Security level selected */
 };
 
 /*
- * RxRPC client connection bundle
- * - matched by { transport, service_id, key }
+ * Bits in the connection flags.
  */
-struct rxrpc_conn_bundle {
-	struct rb_node		node;		/* node in transport's lookup tree */
-	struct list_head	unused_conns;	/* unused connections in this bundle */
-	struct list_head	avail_conns;	/* available connections in this bundle */
-	struct list_head	busy_conns;	/* busy connections in this bundle */
-	struct key		*key;		/* security for this bundle */
-	wait_queue_head_t	chanwait;	/* wait for channel to become available */
-	atomic_t		usage;
-	int			debug_id;	/* debug ID for printks */
-	unsigned short		num_conns;	/* number of connections in this bundle */
-	u16			service_id;	/* Service ID for this bundle */
-	u8			security_ix;	/* security type */
+enum rxrpc_conn_flag {
+	RXRPC_CONN_HAS_IDR,		/* Has a client conn ID assigned */
+	RXRPC_CONN_IN_SERVICE_CONNS,	/* Conn is in peer->service_conns */
+	RXRPC_CONN_IN_CLIENT_CONNS,	/* Conn is in local->client_conns */
+};
+
+/*
+ * Events that can be raised upon a connection.
+ */
+enum rxrpc_conn_event {
+	RXRPC_CONN_EV_CHALLENGE,	/* Send challenge packet */
+};
+
+/*
+ * The connection protocol state.
+ */
+enum rxrpc_conn_proto_state {
+	RXRPC_CONN_UNUSED,		/* Connection not yet attempted */
+	RXRPC_CONN_CLIENT,		/* Client connection */
+	RXRPC_CONN_SERVICE_UNSECURED,	/* Service unsecured connection */
+	RXRPC_CONN_SERVICE_CHALLENGING,	/* Service challenging for security */
+	RXRPC_CONN_SERVICE,		/* Service secured connection */
+	RXRPC_CONN_REMOTELY_ABORTED,	/* Conn aborted by peer */
+	RXRPC_CONN_LOCALLY_ABORTED,	/* Conn aborted locally */
+	RXRPC_CONN_NETWORK_ERROR,	/* Conn terminated by network error */
+	RXRPC_CONN__NR_STATES
 };
 
 /*
  * RxRPC connection definition
- * - matched by { transport, service_id, conn_id, direction, key }
+ * - matched by { local, peer, epoch, conn_id, direction }
  * - each connection can only handle four simultaneous calls
  */
 struct rxrpc_connection {
-	struct rxrpc_transport	*trans;		/* transport session */
-	struct rxrpc_conn_bundle *bundle;	/* connection bundle (client) */
+	struct rxrpc_conn_proto	proto;
+	struct rxrpc_conn_parameters params;
+
+	spinlock_t		channel_lock;
+
+	struct rxrpc_channel {
+		struct rxrpc_call __rcu	*call;		/* Active call */
+		u32			call_id;	/* ID of current call */
+		u32			call_counter;	/* Call ID counter */
+		u32			last_call;	/* ID of last call */
+		u32			last_result;	/* Result of last call (0/abort) */
+	} channels[RXRPC_MAXCALLS];
+	wait_queue_head_t	channel_wq;	/* queue to wait for channel to become available */
+
+	struct rcu_head		rcu;
 	struct work_struct	processor;	/* connection event processor */
-	struct rb_node		node;		/* node in transport's lookup tree */
+	union {
+		struct rb_node	client_node;	/* Node in local->client_conns */
+		struct rb_node	service_node;	/* Node in peer->service_conns */
+	};
 	struct list_head	link;		/* link in master connection list */
-	struct list_head	bundle_link;	/* link in bundle */
-	struct rb_root		calls;		/* calls on this connection */
 	struct sk_buff_head	rx_queue;	/* received conn-level packets */
-	struct rxrpc_call	*channels[RXRPC_MAXCALLS]; /* channels (active calls) */
 	const struct rxrpc_security *security;	/* applied security module */
-	struct key		*key;		/* security for this connection (client) */
 	struct key		*server_key;	/* security for this service */
 	struct crypto_skcipher	*cipher;	/* encryption handle */
 	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
+	unsigned long		flags;
 	unsigned long		events;
-#define RXRPC_CONN_CHALLENGE	0		/* send challenge packet */
-	unsigned long		put_time;	/* time at which to reap */
-	rwlock_t		lock;		/* access lock */
+	unsigned long		put_time;	/* Time at which last put */
 	spinlock_t		state_lock;	/* state-change lock */
 	atomic_t		usage;
-	enum {					/* current state of connection */
-		RXRPC_CONN_UNUSED,		/* - connection not yet attempted */
-		RXRPC_CONN_CLIENT,		/* - client connection */
-		RXRPC_CONN_SERVER_UNSECURED,	/* - server unsecured connection */
-		RXRPC_CONN_SERVER_CHALLENGING,	/* - server challenging for security */
-		RXRPC_CONN_SERVER,		/* - server secured connection */
-		RXRPC_CONN_REMOTELY_ABORTED,	/* - conn aborted by peer */
-		RXRPC_CONN_LOCALLY_ABORTED,	/* - conn aborted locally */
-		RXRPC_CONN_NETWORK_ERROR,	/* - conn terminated by network error */
-	} state;
+	enum rxrpc_conn_proto_state state : 8;	/* current state of connection */
 	u32			local_abort;	/* local abort code */
 	u32			remote_abort;	/* remote abort code */
 	int			error;		/* local error incurred */
 	int			debug_id;	/* debug ID for printks */
-	unsigned int		call_counter;	/* call ID counter */
 	atomic_t		serial;		/* packet serial number counter */
 	atomic_t		hi_serial;	/* highest serial number received */
-	u8			avail_calls;	/* number of calls available */
+	atomic_t		avail_chans;	/* number of channels available */
 	u8			size_align;	/* data size alignment (for security) */
 	u8			header_size;	/* rxrpc + security header size */
 	u8			security_size;	/* security header size */
-	u32			security_level;	/* security level negotiated */
 	u32			security_nonce;	/* response re-use preventer */
-	u32			epoch;		/* epoch of this connection */
-	u32			cid;		/* connection ID */
-	u16			service_id;	/* service ID for this connection */
 	u8			security_ix;	/* security type */
-	u8			in_clientflag;	/* RXRPC_CLIENT_INITIATED if we are server */
 	u8			out_clientflag;	/* RXRPC_CLIENT_INITIATED if we are client */
 };
 
@@ -357,6 +374,8 @@ enum rxrpc_call_event {
  * The states that a call can be in.
  */
 enum rxrpc_call_state {
+	RXRPC_CALL_UNINITIALISED,
+	RXRPC_CALL_CLIENT_AWAIT_CONN,	/* - client waiting for connection to become available */
 	RXRPC_CALL_CLIENT_SEND_REQUEST,	/* - client sending request phase */
 	RXRPC_CALL_CLIENT_AWAIT_REPLY,	/* - client awaiting reply */
 	RXRPC_CALL_CLIENT_RECV_REPLY,	/* - client receiving reply phase */
@@ -381,6 +400,7 @@ enum rxrpc_call_state {
  * - matched by { connection, call_id }
  */
 struct rxrpc_call {
+	struct rcu_head		rcu;
 	struct rxrpc_connection	*conn;		/* connection carrying call */
 	struct rxrpc_sock	*socket;	/* socket responsible */
 	struct timer_list	lifetimer;	/* lifetime remaining on call */
@@ -390,14 +410,14 @@ struct rxrpc_call {
 	struct work_struct	destroyer;	/* call destroyer */
 	struct work_struct	processor;	/* packet processor and ACK generator */
 	struct list_head	link;		/* link in master call list */
-	struct list_head	error_link;	/* link in error distribution list */
+	struct hlist_node	error_link;	/* link in error distribution list */
 	struct list_head	accept_link;	/* calls awaiting acceptance */
 	struct rb_node		sock_node;	/* node in socket call tree */
-	struct rb_node		conn_node;	/* node in connection call tree */
 	struct sk_buff_head	rx_queue;	/* received packets */
 	struct sk_buff_head	rx_oos_queue;	/* packets received out of sequence */
 	struct sk_buff		*tx_pending;	/* Tx socket buffer being filled */
 	wait_queue_head_t	tx_waitq;	/* wait for Tx window space to become available */
+	__be32			crypto_buf[2];	/* Temporary packet crypto buffer */
 	unsigned long		user_call_ID;	/* user-defined call ID */
 	unsigned long		creation_jif;	/* time of call creation */
 	unsigned long		flags;
@@ -408,7 +428,8 @@ struct rxrpc_call {
 	atomic_t		sequence;	/* Tx data packet sequence counter */
 	u32			local_abort;	/* local abort code */
 	u32			remote_abort;	/* remote abort code */
-	int			error;		/* local error incurred */
+	int			error_report;	/* Network error (ICMP/local transport) */
+	int			error;		/* Local error incurred */
 	enum rxrpc_call_state	state : 8;	/* current state of call */
 	int			debug_id;	/* debug ID for printks */
 	u8			channel;	/* connection channel occupied by this call */
@@ -440,19 +461,12 @@ struct rxrpc_call {
 #define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
 	unsigned long		ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
 
-	struct hlist_node	hash_node;
-	unsigned long		hash_key;	/* Full hash key */
-	u8			in_clientflag;	/* Copy of conn->in_clientflag for hashing */
-	struct rxrpc_local	*local;		/* Local endpoint. Used for hashing. */
-	sa_family_t		proto;		/* Frame protocol */
+	u8			in_clientflag;	/* Copy of conn->in_clientflag */
+	struct rxrpc_local	*local;		/* Local endpoint. */
 	u32			call_id;	/* call ID on connection  */
 	u32			cid;		/* connection ID plus channel index */
 	u32			epoch;		/* epoch of this connection */
 	u16			service_id;	/* service ID */
-	union {					/* Peer IP address for hashing */
-		__be32	ipv4_addr;
-		__u8	ipv6_addr[16];		/* Anticipates eventual IPv6 support */
-	} peer_ip;
 };
 
 /*
@@ -478,21 +492,21 @@ extern atomic_t rxrpc_debug_id;
 extern struct workqueue_struct *rxrpc_workqueue;
 
 /*
- * ar-accept.c
+ * call_accept.c
  */
-void rxrpc_accept_incoming_calls(struct work_struct *);
+void rxrpc_accept_incoming_calls(struct rxrpc_local *);
 struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long);
 int rxrpc_reject_call(struct rxrpc_sock *);
 
 /*
- * ar-ack.c
+ * call_event.c
  */
 void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
 void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
 void rxrpc_process_call(struct work_struct *);
 
 /*
- * ar-call.c
+ * call_object.c
  */
 extern unsigned int rxrpc_max_call_lifetime;
 extern unsigned int rxrpc_dead_call_expiry;
@@ -500,72 +514,106 @@ extern struct kmem_cache *rxrpc_call_jar;
 extern struct list_head rxrpc_calls;
 extern rwlock_t rxrpc_call_lock;
 
-struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
-					void *, sa_family_t, const void *);
-struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
-					 struct rxrpc_transport *,
-					 struct rxrpc_conn_bundle *,
-					 unsigned long, int, gfp_t);
+struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
+					 struct rxrpc_conn_parameters *,
+					 struct sockaddr_rxrpc *,
+					 unsigned long, gfp_t);
 struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
 				       struct rxrpc_connection *,
-				       struct rxrpc_host_header *);
-struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long);
+				       struct sk_buff *);
 void rxrpc_release_call(struct rxrpc_call *);
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
 void __rxrpc_put_call(struct rxrpc_call *);
 void __exit rxrpc_destroy_all_calls(void);
 
 /*
- * ar-connection.c
+ * conn_client.c
+ */
+extern struct idr rxrpc_client_conn_ids;
+
+void rxrpc_destroy_client_conn_ids(void);
+int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
+		       struct sockaddr_rxrpc *, gfp_t);
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
+
+/*
+ * conn_event.c
+ */
+void rxrpc_process_connection(struct work_struct *);
+void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
+void rxrpc_reject_packets(struct rxrpc_local *);
+
+/*
+ * conn_object.c
  */
 extern unsigned int rxrpc_connection_expiry;
 extern struct list_head rxrpc_connections;
 extern rwlock_t rxrpc_connection_lock;
 
-struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
-					   struct rxrpc_transport *,
-					   struct key *, u16, gfp_t);
-void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *);
-int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
-		       struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
+						   struct sk_buff *);
+void __rxrpc_disconnect_call(struct rxrpc_call *);
+void rxrpc_disconnect_call(struct rxrpc_call *);
 void rxrpc_put_connection(struct rxrpc_connection *);
 void __exit rxrpc_destroy_all_connections(void);
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
-					       struct rxrpc_host_header *);
-extern struct rxrpc_connection *
-rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *);
 
-/*
- * ar-connevent.c
- */
-void rxrpc_process_connection(struct work_struct *);
-void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
-void rxrpc_reject_packets(struct work_struct *);
+static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
+{
+	return conn->out_clientflag;
+}
+
+static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
+{
+	return !rxrpc_conn_is_client(conn);
+}
+
+static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
+{
+	atomic_inc(&conn->usage);
+}
+
+static inline
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+{
+	return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
+}
+
+static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+{
+	if (!rxrpc_get_connection_maybe(conn))
+		return false;
+	if (!rxrpc_queue_work(&conn->processor))
+		rxrpc_put_connection(conn);
+	return true;
+}
 
 /*
- * ar-error.c
+ * conn_service.c
  */
-void rxrpc_UDP_error_report(struct sock *);
-void rxrpc_UDP_error_handler(struct work_struct *);
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
+						     struct sk_buff *);
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
+						   struct sockaddr_rxrpc *,
+						   struct sk_buff *);
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
 
 /*
- * ar-input.c
+ * input.c
  */
 void rxrpc_data_ready(struct sock *);
 int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool);
 void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
 
 /*
- * ar-local.c
+ * insecure.c
  */
-extern rwlock_t rxrpc_local_lock;
-
-struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
-void rxrpc_put_local(struct rxrpc_local *);
-void __exit rxrpc_destroy_all_locals(void);
+extern const struct rxrpc_security rxrpc_no_security;
 
 /*
- * ar-key.c
+ * key.c
  */
 extern struct key_type key_type_rxrpc;
 extern struct key_type key_type_rxrpc_s;
@@ -576,80 +624,108 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
 			      u32);
 
 /*
- * ar-output.c
+ * local_event.c
  */
-extern unsigned int rxrpc_resend_timeout;
-
-int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
-int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *,
-			 struct msghdr *, size_t);
-int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+extern void rxrpc_process_local_events(struct rxrpc_local *);
 
 /*
- * ar-peer.c
+ * local_object.c
  */
-struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
-void rxrpc_put_peer(struct rxrpc_peer *);
-struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *, __be32, __be16);
-void __exit rxrpc_destroy_all_peers(void);
+struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *);
+void __rxrpc_put_local(struct rxrpc_local *);
+void __exit rxrpc_destroy_all_locals(void);
 
-/*
- * ar-proc.c
- */
-extern const char *const rxrpc_call_states[];
-extern const struct file_operations rxrpc_call_seq_fops;
-extern const struct file_operations rxrpc_connection_seq_fops;
+static inline void rxrpc_get_local(struct rxrpc_local *local)
+{
+	atomic_inc(&local->usage);
+}
+
+static inline
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
+{
+	return atomic_inc_not_zero(&local->usage) ? local : NULL;
+}
+
+static inline void rxrpc_put_local(struct rxrpc_local *local)
+{
+	if (local && atomic_dec_and_test(&local->usage))
+		__rxrpc_put_local(local);
+}
+
+static inline void rxrpc_queue_local(struct rxrpc_local *local)
+{
+	rxrpc_queue_work(&local->processor);
+}
 
 /*
- * ar-recvmsg.c
+ * misc.c
  */
-void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
-int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
+extern unsigned int rxrpc_max_backlog __read_mostly;
+extern unsigned int rxrpc_requested_ack_delay;
+extern unsigned int rxrpc_soft_ack_delay;
+extern unsigned int rxrpc_idle_ack_delay;
+extern unsigned int rxrpc_rx_window_size;
+extern unsigned int rxrpc_rx_mtu;
+extern unsigned int rxrpc_rx_jumbo_max;
+
+extern const char *const rxrpc_pkts[];
+extern const s8 rxrpc_ack_priority[];
+
+extern const char *rxrpc_acks(u8 reason);
 
 /*
- * ar-security.c
+ * output.c
  */
-int __init rxrpc_init_security(void);
-void rxrpc_exit_security(void);
-int rxrpc_init_client_conn_security(struct rxrpc_connection *);
-int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+extern unsigned int rxrpc_resend_timeout;
+
+int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *);
+int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
 
 /*
- * ar-skbuff.c
+ * peer_event.c
  */
-void rxrpc_packet_destructor(struct sk_buff *);
+void rxrpc_error_report(struct sock *);
+void rxrpc_peer_error_distributor(struct work_struct *);
 
 /*
- * ar-transport.c
+ * peer_object.c
  */
-extern unsigned int rxrpc_transport_expiry;
+struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
+					 const struct sockaddr_rxrpc *);
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
+				     struct sockaddr_rxrpc *, gfp_t);
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
 
-struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
-					    struct rxrpc_peer *, gfp_t);
-void rxrpc_put_transport(struct rxrpc_transport *);
-void __exit rxrpc_destroy_all_transports(void);
-struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
-					     struct rxrpc_peer *);
+static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
+{
+	atomic_inc(&peer->usage);
+}
+
+static inline
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
+{
+	return atomic_inc_not_zero(&peer->usage) ? peer : NULL;
+}
+
+extern void __rxrpc_put_peer(struct rxrpc_peer *peer);
+static inline void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+	if (peer && atomic_dec_and_test(&peer->usage))
+		__rxrpc_put_peer(peer);
+}
 
 /*
- * insecure.c
+ * proc.c
  */
-extern const struct rxrpc_security rxrpc_no_security;
+extern const char *const rxrpc_call_states[];
+extern const struct file_operations rxrpc_call_seq_fops;
+extern const struct file_operations rxrpc_connection_seq_fops;
 
 /*
- * misc.c
+ * recvmsg.c
  */
-extern unsigned int rxrpc_requested_ack_delay;
-extern unsigned int rxrpc_soft_ack_delay;
-extern unsigned int rxrpc_idle_ack_delay;
-extern unsigned int rxrpc_rx_window_size;
-extern unsigned int rxrpc_rx_mtu;
-extern unsigned int rxrpc_rx_jumbo_max;
-
-extern const char *const rxrpc_pkts[];
-extern const s8 rxrpc_ack_priority[];
-
-extern const char *rxrpc_acks(u8 reason);
+void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
 
 /*
  * rxkad.c
@@ -658,6 +734,19 @@ extern const char *rxrpc_acks(u8 reason);
 extern const struct rxrpc_security rxkad;
 #endif
 
+/*
+ * security.c
+ */
+int __init rxrpc_init_security(void);
+void rxrpc_exit_security(void);
+int rxrpc_init_client_conn_security(struct rxrpc_connection *);
+int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+
+/*
+ * skbuff.c
+ */
+void rxrpc_packet_destructor(struct sk_buff *);
+
 /*
  * sysctl.c
  */
@@ -669,6 +758,11 @@ static inline int __init rxrpc_sysctl_init(void) { return 0; }
 static inline void rxrpc_sysctl_exit(void) {}
 #endif
 
+/*
+ * utils.c
+ */
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+
 /*
  * debug tracing
  */
@@ -744,21 +838,18 @@ do {							\
 #define ASSERT(X)						\
 do {								\
 	if (unlikely(!(X))) {					\
-		printk(KERN_ERR "\n");				\
-		printk(KERN_ERR "RxRPC: Assertion failed\n");	\
+		pr_err("Assertion failed\n");			\
 		BUG();						\
 	}							\
 } while (0)
 
 #define ASSERTCMP(X, OP, Y)						\
 do {									\
-	if (unlikely(!((X) OP (Y)))) {					\
-		printk(KERN_ERR "\n");					\
-		printk(KERN_ERR "RxRPC: Assertion failed\n");		\
-		printk(KERN_ERR "%lu " #OP " %lu is false\n",		\
-		       (unsigned long)(X), (unsigned long)(Y));		\
-		printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",	\
-		       (unsigned long)(X), (unsigned long)(Y));		\
+	unsigned long _x = (unsigned long)(X);				\
+	unsigned long _y = (unsigned long)(Y);				\
+	if (unlikely(!(_x OP _y))) {					\
+		pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n",			\
+		       _x, _x, #OP, _y, _y);				\
 		BUG();							\
 	}								\
 } while (0)
@@ -766,21 +857,18 @@ do {									\
 #define ASSERTIF(C, X)						\
 do {								\
 	if (unlikely((C) && !(X))) {				\
-		printk(KERN_ERR "\n");				\
-		printk(KERN_ERR "RxRPC: Assertion failed\n");	\
+		pr_err("Assertion failed\n");			\
 		BUG();						\
 	}							\
 } while (0)
 
 #define ASSERTIFCMP(C, X, OP, Y)					\
 do {									\
-	if (unlikely((C) && !((X) OP (Y)))) {				\
-		printk(KERN_ERR "\n");					\
-		printk(KERN_ERR "RxRPC: Assertion failed\n");		\
-		printk(KERN_ERR "%lu " #OP " %lu is false\n",		\
-		       (unsigned long)(X), (unsigned long)(Y));		\
-		printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",	\
-		       (unsigned long)(X), (unsigned long)(Y));		\
+	unsigned long _x = (unsigned long)(X);				\
+	unsigned long _y = (unsigned long)(Y);				\
+	if (unlikely((C) && !(_x OP _y))) {				\
+		pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+		       _x, _x, #OP, _y, _y);				\
 		BUG();							\
 	}								\
 } while (0)
@@ -844,15 +932,6 @@ static inline void rxrpc_purge_queue(struct sk_buff_head *list)
 		rxrpc_free_skb(skb);
 }
 
-static inline void __rxrpc_get_local(struct rxrpc_local *local, const char *f)
-{
-	CHECK_SLAB_OKAY(&local->usage);
-	if (atomic_inc_return(&local->usage) == 1)
-		printk("resurrected (%s)\n", f);
-}
-
-#define rxrpc_get_local(LOCAL) __rxrpc_get_local((LOCAL), __func__)
-
 #define rxrpc_get_call(CALL)				\
 do {							\
 	CHECK_SLAB_OKAY(&(CALL)->usage);		\
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
deleted file mode 100644
index 4e1e6db0050b..000000000000
--- a/net/rxrpc/ar-local.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/* AF_RXRPC local endpoint management
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include <generated/utsrelease.h>
-#include "ar-internal.h"
-
-static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
-
-static LIST_HEAD(rxrpc_locals);
-DEFINE_RWLOCK(rxrpc_local_lock);
-static DECLARE_RWSEM(rxrpc_local_sem);
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
-
-static void rxrpc_destroy_local(struct work_struct *work);
-static void rxrpc_process_local_events(struct work_struct *work);
-
-/*
- * allocate a new local
- */
-static
-struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
-{
-	struct rxrpc_local *local;
-
-	local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
-	if (local) {
-		INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
-		INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
-		INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
-		INIT_WORK(&local->event_processor, &rxrpc_process_local_events);
-		INIT_LIST_HEAD(&local->services);
-		INIT_LIST_HEAD(&local->link);
-		init_rwsem(&local->defrag_sem);
-		skb_queue_head_init(&local->accept_queue);
-		skb_queue_head_init(&local->reject_queue);
-		skb_queue_head_init(&local->event_queue);
-		spin_lock_init(&local->lock);
-		rwlock_init(&local->services_lock);
-		atomic_set(&local->usage, 1);
-		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
-		memcpy(&local->srx, srx, sizeof(*srx));
-	}
-
-	_leave(" = %p", local);
-	return local;
-}
-
-/*
- * create the local socket
- * - must be called with rxrpc_local_sem writelocked
- */
-static int rxrpc_create_local(struct rxrpc_local *local)
-{
-	struct sock *sock;
-	int ret, opt;
-
-	_enter("%p{%d}", local, local->srx.transport_type);
-
-	/* create a socket to represent the local endpoint */
-	ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
-			       IPPROTO_UDP, &local->socket);
-	if (ret < 0) {
-		_leave(" = %d [socket]", ret);
-		return ret;
-	}
-
-	/* if a local address was supplied then bind it */
-	if (local->srx.transport_len > sizeof(sa_family_t)) {
-		_debug("bind");
-		ret = kernel_bind(local->socket,
-				  (struct sockaddr *) &local->srx.transport,
-				  local->srx.transport_len);
-		if (ret < 0) {
-			_debug("bind failed");
-			goto error;
-		}
-	}
-
-	/* we want to receive ICMP errors */
-	opt = 1;
-	ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
-				(char *) &opt, sizeof(opt));
-	if (ret < 0) {
-		_debug("setsockopt failed");
-		goto error;
-	}
-
-	/* we want to set the don't fragment bit */
-	opt = IP_PMTUDISC_DO;
-	ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
-				(char *) &opt, sizeof(opt));
-	if (ret < 0) {
-		_debug("setsockopt failed");
-		goto error;
-	}
-
-	write_lock_bh(&rxrpc_local_lock);
-	list_add(&local->link, &rxrpc_locals);
-	write_unlock_bh(&rxrpc_local_lock);
-
-	/* set the socket up */
-	sock = local->socket->sk;
-	sock->sk_user_data	= local;
-	sock->sk_data_ready	= rxrpc_data_ready;
-	sock->sk_error_report	= rxrpc_UDP_error_report;
-	_leave(" = 0");
-	return 0;
-
-error:
-	kernel_sock_shutdown(local->socket, SHUT_RDWR);
-	local->socket->sk->sk_user_data = NULL;
-	sock_release(local->socket);
-	local->socket = NULL;
-
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * create a new local endpoint using the specified UDP address
- */
-struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx)
-{
-	struct rxrpc_local *local;
-	int ret;
-
-	_enter("{%d,%u,%pI4+%hu}",
-	       srx->transport_type,
-	       srx->transport.family,
-	       &srx->transport.sin.sin_addr,
-	       ntohs(srx->transport.sin.sin_port));
-
-	down_write(&rxrpc_local_sem);
-
-	/* see if we have a suitable local local endpoint already */
-	read_lock_bh(&rxrpc_local_lock);
-
-	list_for_each_entry(local, &rxrpc_locals, link) {
-		_debug("CMP {%d,%u,%pI4+%hu}",
-		       local->srx.transport_type,
-		       local->srx.transport.family,
-		       &local->srx.transport.sin.sin_addr,
-		       ntohs(local->srx.transport.sin.sin_port));
-
-		if (local->srx.transport_type != srx->transport_type ||
-		    local->srx.transport.family != srx->transport.family)
-			continue;
-
-		switch (srx->transport.family) {
-		case AF_INET:
-			if (local->srx.transport.sin.sin_port !=
-			    srx->transport.sin.sin_port)
-				continue;
-			if (memcmp(&local->srx.transport.sin.sin_addr,
-				   &srx->transport.sin.sin_addr,
-				   sizeof(struct in_addr)) != 0)
-				continue;
-			goto found_local;
-
-		default:
-			BUG();
-		}
-	}
-
-	read_unlock_bh(&rxrpc_local_lock);
-
-	/* we didn't find one, so we need to create one */
-	local = rxrpc_alloc_local(srx);
-	if (!local) {
-		up_write(&rxrpc_local_sem);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	ret = rxrpc_create_local(local);
-	if (ret < 0) {
-		up_write(&rxrpc_local_sem);
-		kfree(local);
-		_leave(" = %d", ret);
-		return ERR_PTR(ret);
-	}
-
-	up_write(&rxrpc_local_sem);
-
-	_net("LOCAL new %d {%d,%u,%pI4+%hu}",
-	     local->debug_id,
-	     local->srx.transport_type,
-	     local->srx.transport.family,
-	     &local->srx.transport.sin.sin_addr,
-	     ntohs(local->srx.transport.sin.sin_port));
-
-	_leave(" = %p [new]", local);
-	return local;
-
-found_local:
-	rxrpc_get_local(local);
-	read_unlock_bh(&rxrpc_local_lock);
-	up_write(&rxrpc_local_sem);
-
-	_net("LOCAL old %d {%d,%u,%pI4+%hu}",
-	     local->debug_id,
-	     local->srx.transport_type,
-	     local->srx.transport.family,
-	     &local->srx.transport.sin.sin_addr,
-	     ntohs(local->srx.transport.sin.sin_port));
-
-	_leave(" = %p [reuse]", local);
-	return local;
-}
-
-/*
- * release a local endpoint
- */
-void rxrpc_put_local(struct rxrpc_local *local)
-{
-	_enter("%p{u=%d}", local, atomic_read(&local->usage));
-
-	ASSERTCMP(atomic_read(&local->usage), >, 0);
-
-	/* to prevent a race, the decrement and the dequeue must be effectively
-	 * atomic */
-	write_lock_bh(&rxrpc_local_lock);
-	if (unlikely(atomic_dec_and_test(&local->usage))) {
-		_debug("destroy local");
-		rxrpc_queue_work(&local->destroyer);
-	}
-	write_unlock_bh(&rxrpc_local_lock);
-	_leave("");
-}
-
-/*
- * destroy a local endpoint
- */
-static void rxrpc_destroy_local(struct work_struct *work)
-{
-	struct rxrpc_local *local =
-		container_of(work, struct rxrpc_local, destroyer);
-
-	_enter("%p{%d}", local, atomic_read(&local->usage));
-
-	down_write(&rxrpc_local_sem);
-
-	write_lock_bh(&rxrpc_local_lock);
-	if (atomic_read(&local->usage) > 0) {
-		write_unlock_bh(&rxrpc_local_lock);
-		up_read(&rxrpc_local_sem);
-		_leave(" [resurrected]");
-		return;
-	}
-
-	list_del(&local->link);
-	local->socket->sk->sk_user_data = NULL;
-	write_unlock_bh(&rxrpc_local_lock);
-
-	downgrade_write(&rxrpc_local_sem);
-
-	ASSERT(list_empty(&local->services));
-	ASSERT(!work_pending(&local->acceptor));
-	ASSERT(!work_pending(&local->rejecter));
-	ASSERT(!work_pending(&local->event_processor));
-
-	/* finish cleaning up the local descriptor */
-	rxrpc_purge_queue(&local->accept_queue);
-	rxrpc_purge_queue(&local->reject_queue);
-	rxrpc_purge_queue(&local->event_queue);
-	kernel_sock_shutdown(local->socket, SHUT_RDWR);
-	sock_release(local->socket);
-
-	up_read(&rxrpc_local_sem);
-
-	_net("DESTROY LOCAL %d", local->debug_id);
-	kfree(local);
-
-	if (list_empty(&rxrpc_locals))
-		wake_up_all(&rxrpc_local_wq);
-
-	_leave("");
-}
-
-/*
- * preemptively destroy all local local endpoint rather than waiting for
- * them to be destroyed
- */
-void __exit rxrpc_destroy_all_locals(void)
-{
-	DECLARE_WAITQUEUE(myself,current);
-
-	_enter("");
-
-	/* we simply have to wait for them to go away */
-	if (!list_empty(&rxrpc_locals)) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		add_wait_queue(&rxrpc_local_wq, &myself);
-
-		while (!list_empty(&rxrpc_locals)) {
-			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
-		}
-
-		remove_wait_queue(&rxrpc_local_wq, &myself);
-		set_current_state(TASK_RUNNING);
-	}
-
-	_leave("");
-}
-
-/*
- * Reply to a version request
- */
-static void rxrpc_send_version_request(struct rxrpc_local *local,
-				       struct rxrpc_host_header *hdr,
-				       struct sk_buff *skb)
-{
-	struct rxrpc_wire_header whdr;
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct sockaddr_in sin;
-	struct msghdr msg;
-	struct kvec iov[2];
-	size_t len;
-	int ret;
-
-	_enter("");
-
-	sin.sin_family = AF_INET;
-	sin.sin_port = udp_hdr(skb)->source;
-	sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
-
-	msg.msg_name	= &sin;
-	msg.msg_namelen	= sizeof(sin);
-	msg.msg_control	= NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags	= 0;
-
-	whdr.epoch	= htonl(sp->hdr.epoch);
-	whdr.cid	= htonl(sp->hdr.cid);
-	whdr.callNumber	= htonl(sp->hdr.callNumber);
-	whdr.seq	= 0;
-	whdr.serial	= 0;
-	whdr.type	= RXRPC_PACKET_TYPE_VERSION;
-	whdr.flags	= RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
-	whdr.userStatus	= 0;
-	whdr.securityIndex = 0;
-	whdr._rsvd	= 0;
-	whdr.serviceId	= htons(sp->hdr.serviceId);
-
-	iov[0].iov_base	= &whdr;
-	iov[0].iov_len	= sizeof(whdr);
-	iov[1].iov_base	= (char *)rxrpc_version_string;
-	iov[1].iov_len	= sizeof(rxrpc_version_string);
-
-	len = iov[0].iov_len + iov[1].iov_len;
-
-	_proto("Tx VERSION (reply)");
-
-	ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
-	if (ret < 0)
-		_debug("sendmsg failed: %d", ret);
-
-	_leave("");
-}
-
-/*
- * Process event packets targetted at a local endpoint.
- */
-static void rxrpc_process_local_events(struct work_struct *work)
-{
-	struct rxrpc_local *local = container_of(work, struct rxrpc_local, event_processor);
-	struct sk_buff *skb;
-	char v;
-
-	_enter("");
-
-	atomic_inc(&local->usage);
-	
-	while ((skb = skb_dequeue(&local->event_queue))) {
-		struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-		_debug("{%d},{%u}", local->debug_id, sp->hdr.type);
-
-		switch (sp->hdr.type) {
-		case RXRPC_PACKET_TYPE_VERSION:
-			if (skb_copy_bits(skb, 0, &v, 1) < 0)
-				return;
-			_proto("Rx VERSION { %02x }", v);
-			if (v == 0)
-				rxrpc_send_version_request(local, &sp->hdr, skb);
-			break;
-
-		default:
-			/* Just ignore anything we don't understand */
-			break;
-		}
-
-		rxrpc_put_local(local);
-		rxrpc_free_skb(skb);
-	}
-
-	rxrpc_put_local(local);
-	_leave("");
-}
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
deleted file mode 100644
index dc089b1976aa..000000000000
--- a/net/rxrpc/ar-peer.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/* RxRPC remote transport endpoint management
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include "ar-internal.h"
-
-static LIST_HEAD(rxrpc_peers);
-static DEFINE_RWLOCK(rxrpc_peer_lock);
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
-
-static void rxrpc_destroy_peer(struct work_struct *work);
-
-/*
- * assess the MTU size for the network interface through which this peer is
- * reached
- */
-static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
-{
-	struct rtable *rt;
-	struct flowi4 fl4;
-
-	peer->if_mtu = 1500;
-
-	rt = ip_route_output_ports(&init_net, &fl4, NULL,
-				   peer->srx.transport.sin.sin_addr.s_addr, 0,
-				   htons(7000), htons(7001),
-				   IPPROTO_UDP, 0, 0);
-	if (IS_ERR(rt)) {
-		_leave(" [route err %ld]", PTR_ERR(rt));
-		return;
-	}
-
-	peer->if_mtu = dst_mtu(&rt->dst);
-	dst_release(&rt->dst);
-
-	_leave(" [if_mtu %u]", peer->if_mtu);
-}
-
-/*
- * allocate a new peer
- */
-static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
-					   gfp_t gfp)
-{
-	struct rxrpc_peer *peer;
-
-	_enter("");
-
-	peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
-	if (peer) {
-		INIT_WORK(&peer->destroyer, &rxrpc_destroy_peer);
-		INIT_LIST_HEAD(&peer->link);
-		INIT_LIST_HEAD(&peer->error_targets);
-		spin_lock_init(&peer->lock);
-		atomic_set(&peer->usage, 1);
-		peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
-		memcpy(&peer->srx, srx, sizeof(*srx));
-
-		rxrpc_assess_MTU_size(peer);
-		peer->mtu = peer->if_mtu;
-
-		if (srx->transport.family == AF_INET) {
-			peer->hdrsize = sizeof(struct iphdr);
-			switch (srx->transport_type) {
-			case SOCK_DGRAM:
-				peer->hdrsize += sizeof(struct udphdr);
-				break;
-			default:
-				BUG();
-				break;
-			}
-		} else {
-			BUG();
-		}
-
-		peer->hdrsize += sizeof(struct rxrpc_wire_header);
-		peer->maxdata = peer->mtu - peer->hdrsize;
-	}
-
-	_leave(" = %p", peer);
-	return peer;
-}
-
-/*
- * obtain a remote transport endpoint for the specified address
- */
-struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp)
-{
-	struct rxrpc_peer *peer, *candidate;
-	const char *new = "old";
-	int usage;
-
-	_enter("{%d,%d,%pI4+%hu}",
-	       srx->transport_type,
-	       srx->transport_len,
-	       &srx->transport.sin.sin_addr,
-	       ntohs(srx->transport.sin.sin_port));
-
-	/* search the peer list first */
-	read_lock_bh(&rxrpc_peer_lock);
-	list_for_each_entry(peer, &rxrpc_peers, link) {
-		_debug("check PEER %d { u=%d t=%d l=%d }",
-		       peer->debug_id,
-		       atomic_read(&peer->usage),
-		       peer->srx.transport_type,
-		       peer->srx.transport_len);
-
-		if (atomic_read(&peer->usage) > 0 &&
-		    peer->srx.transport_type == srx->transport_type &&
-		    peer->srx.transport_len == srx->transport_len &&
-		    memcmp(&peer->srx.transport,
-			   &srx->transport,
-			   srx->transport_len) == 0)
-			goto found_extant_peer;
-	}
-	read_unlock_bh(&rxrpc_peer_lock);
-
-	/* not yet present - create a candidate for a new record and then
-	 * redo the search */
-	candidate = rxrpc_alloc_peer(srx, gfp);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	write_lock_bh(&rxrpc_peer_lock);
-
-	list_for_each_entry(peer, &rxrpc_peers, link) {
-		if (atomic_read(&peer->usage) > 0 &&
-		    peer->srx.transport_type == srx->transport_type &&
-		    peer->srx.transport_len == srx->transport_len &&
-		    memcmp(&peer->srx.transport,
-			   &srx->transport,
-			   srx->transport_len) == 0)
-			goto found_extant_second;
-	}
-
-	/* we can now add the new candidate to the list */
-	peer = candidate;
-	candidate = NULL;
-	usage = atomic_read(&peer->usage);
-
-	list_add_tail(&peer->link, &rxrpc_peers);
-	write_unlock_bh(&rxrpc_peer_lock);
-	new = "new";
-
-success:
-	_net("PEER %s %d {%d,%u,%pI4+%hu}",
-	     new,
-	     peer->debug_id,
-	     peer->srx.transport_type,
-	     peer->srx.transport.family,
-	     &peer->srx.transport.sin.sin_addr,
-	     ntohs(peer->srx.transport.sin.sin_port));
-
-	_leave(" = %p {u=%d}", peer, usage);
-	return peer;
-
-	/* we found the peer in the list immediately */
-found_extant_peer:
-	usage = atomic_inc_return(&peer->usage);
-	read_unlock_bh(&rxrpc_peer_lock);
-	goto success;
-
-	/* we found the peer on the second time through the list */
-found_extant_second:
-	usage = atomic_inc_return(&peer->usage);
-	write_unlock_bh(&rxrpc_peer_lock);
-	kfree(candidate);
-	goto success;
-}
-
-/*
- * find the peer associated with a packet
- */
-struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *local,
-				   __be32 addr, __be16 port)
-{
-	struct rxrpc_peer *peer;
-
-	_enter("");
-
-	/* search the peer list */
-	read_lock_bh(&rxrpc_peer_lock);
-
-	if (local->srx.transport.family == AF_INET &&
-	    local->srx.transport_type == SOCK_DGRAM
-	    ) {
-		list_for_each_entry(peer, &rxrpc_peers, link) {
-			if (atomic_read(&peer->usage) > 0 &&
-			    peer->srx.transport_type == SOCK_DGRAM &&
-			    peer->srx.transport.family == AF_INET &&
-			    peer->srx.transport.sin.sin_port == port &&
-			    peer->srx.transport.sin.sin_addr.s_addr == addr)
-				goto found_UDP_peer;
-		}
-
-		goto new_UDP_peer;
-	}
-
-	read_unlock_bh(&rxrpc_peer_lock);
-	_leave(" = -EAFNOSUPPORT");
-	return ERR_PTR(-EAFNOSUPPORT);
-
-found_UDP_peer:
-	_net("Rx UDP DGRAM from peer %d", peer->debug_id);
-	atomic_inc(&peer->usage);
-	read_unlock_bh(&rxrpc_peer_lock);
-	_leave(" = %p", peer);
-	return peer;
-
-new_UDP_peer:
-	_net("Rx UDP DGRAM from NEW peer");
-	read_unlock_bh(&rxrpc_peer_lock);
-	_leave(" = -EBUSY [new]");
-	return ERR_PTR(-EBUSY);
-}
-
-/*
- * release a remote transport endpoint
- */
-void rxrpc_put_peer(struct rxrpc_peer *peer)
-{
-	_enter("%p{u=%d}", peer, atomic_read(&peer->usage));
-
-	ASSERTCMP(atomic_read(&peer->usage), >, 0);
-
-	if (likely(!atomic_dec_and_test(&peer->usage))) {
-		_leave(" [in use]");
-		return;
-	}
-
-	rxrpc_queue_work(&peer->destroyer);
-	_leave("");
-}
-
-/*
- * destroy a remote transport endpoint
- */
-static void rxrpc_destroy_peer(struct work_struct *work)
-{
-	struct rxrpc_peer *peer =
-		container_of(work, struct rxrpc_peer, destroyer);
-
-	_enter("%p{%d}", peer, atomic_read(&peer->usage));
-
-	write_lock_bh(&rxrpc_peer_lock);
-	list_del(&peer->link);
-	write_unlock_bh(&rxrpc_peer_lock);
-
-	_net("DESTROY PEER %d", peer->debug_id);
-	kfree(peer);
-
-	if (list_empty(&rxrpc_peers))
-		wake_up_all(&rxrpc_peer_wq);
-	_leave("");
-}
-
-/*
- * preemptively destroy all the peer records from a transport endpoint rather
- * than waiting for them to time out
- */
-void __exit rxrpc_destroy_all_peers(void)
-{
-	DECLARE_WAITQUEUE(myself,current);
-
-	_enter("");
-
-	/* we simply have to wait for them to go away */
-	if (!list_empty(&rxrpc_peers)) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		add_wait_queue(&rxrpc_peer_wq, &myself);
-
-		while (!list_empty(&rxrpc_peers)) {
-			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
-		}
-
-		remove_wait_queue(&rxrpc_peer_wq, &myself);
-		set_current_state(TASK_RUNNING);
-	}
-
-	_leave("");
-}
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
deleted file mode 100644
index 66a1a5676446..000000000000
--- a/net/rxrpc/ar-transport.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/* RxRPC point-to-point transport session management
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include "ar-internal.h"
-
-/*
- * Time after last use at which transport record is cleaned up.
- */
-unsigned int rxrpc_transport_expiry = 3600 * 24;
-
-static void rxrpc_transport_reaper(struct work_struct *work);
-
-static LIST_HEAD(rxrpc_transports);
-static DEFINE_RWLOCK(rxrpc_transport_lock);
-static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
-
-/*
- * allocate a new transport session manager
- */
-static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
-						     struct rxrpc_peer *peer,
-						     gfp_t gfp)
-{
-	struct rxrpc_transport *trans;
-
-	_enter("");
-
-	trans = kzalloc(sizeof(struct rxrpc_transport), gfp);
-	if (trans) {
-		trans->local = local;
-		trans->peer = peer;
-		INIT_LIST_HEAD(&trans->link);
-		trans->bundles = RB_ROOT;
-		trans->client_conns = RB_ROOT;
-		trans->server_conns = RB_ROOT;
-		skb_queue_head_init(&trans->error_queue);
-		spin_lock_init(&trans->client_lock);
-		rwlock_init(&trans->conn_lock);
-		atomic_set(&trans->usage, 1);
-		trans->conn_idcounter = peer->srx.srx_service << 16;
-		trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
-
-		if (peer->srx.transport.family == AF_INET) {
-			switch (peer->srx.transport_type) {
-			case SOCK_DGRAM:
-				INIT_WORK(&trans->error_handler,
-					  rxrpc_UDP_error_handler);
-				break;
-			default:
-				BUG();
-				break;
-			}
-		} else {
-			BUG();
-		}
-	}
-
-	_leave(" = %p", trans);
-	return trans;
-}
-
-/*
- * obtain a transport session for the nominated endpoints
- */
-struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local,
-					    struct rxrpc_peer *peer,
-					    gfp_t gfp)
-{
-	struct rxrpc_transport *trans, *candidate;
-	const char *new = "old";
-	int usage;
-
-	_enter("{%pI4+%hu},{%pI4+%hu},",
-	       &local->srx.transport.sin.sin_addr,
-	       ntohs(local->srx.transport.sin.sin_port),
-	       &peer->srx.transport.sin.sin_addr,
-	       ntohs(peer->srx.transport.sin.sin_port));
-
-	/* search the transport list first */
-	read_lock_bh(&rxrpc_transport_lock);
-	list_for_each_entry(trans, &rxrpc_transports, link) {
-		if (trans->local == local && trans->peer == peer)
-			goto found_extant_transport;
-	}
-	read_unlock_bh(&rxrpc_transport_lock);
-
-	/* not yet present - create a candidate for a new record and then
-	 * redo the search */
-	candidate = rxrpc_alloc_transport(local, peer, gfp);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	write_lock_bh(&rxrpc_transport_lock);
-
-	list_for_each_entry(trans, &rxrpc_transports, link) {
-		if (trans->local == local && trans->peer == peer)
-			goto found_extant_second;
-	}
-
-	/* we can now add the new candidate to the list */
-	trans = candidate;
-	candidate = NULL;
-	usage = atomic_read(&trans->usage);
-
-	rxrpc_get_local(trans->local);
-	atomic_inc(&trans->peer->usage);
-	list_add_tail(&trans->link, &rxrpc_transports);
-	write_unlock_bh(&rxrpc_transport_lock);
-	new = "new";
-
-success:
-	_net("TRANSPORT %s %d local %d -> peer %d",
-	     new,
-	     trans->debug_id,
-	     trans->local->debug_id,
-	     trans->peer->debug_id);
-
-	_leave(" = %p {u=%d}", trans, usage);
-	return trans;
-
-	/* we found the transport in the list immediately */
-found_extant_transport:
-	usage = atomic_inc_return(&trans->usage);
-	read_unlock_bh(&rxrpc_transport_lock);
-	goto success;
-
-	/* we found the transport on the second time through the list */
-found_extant_second:
-	usage = atomic_inc_return(&trans->usage);
-	write_unlock_bh(&rxrpc_transport_lock);
-	kfree(candidate);
-	goto success;
-}
-
-/*
- * find the transport connecting two endpoints
- */
-struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *local,
-					     struct rxrpc_peer *peer)
-{
-	struct rxrpc_transport *trans;
-
-	_enter("{%pI4+%hu},{%pI4+%hu},",
-	       &local->srx.transport.sin.sin_addr,
-	       ntohs(local->srx.transport.sin.sin_port),
-	       &peer->srx.transport.sin.sin_addr,
-	       ntohs(peer->srx.transport.sin.sin_port));
-
-	/* search the transport list */
-	read_lock_bh(&rxrpc_transport_lock);
-
-	list_for_each_entry(trans, &rxrpc_transports, link) {
-		if (trans->local == local && trans->peer == peer)
-			goto found_extant_transport;
-	}
-
-	read_unlock_bh(&rxrpc_transport_lock);
-	_leave(" = NULL");
-	return NULL;
-
-found_extant_transport:
-	atomic_inc(&trans->usage);
-	read_unlock_bh(&rxrpc_transport_lock);
-	_leave(" = %p", trans);
-	return trans;
-}
-
-/*
- * release a transport session
- */
-void rxrpc_put_transport(struct rxrpc_transport *trans)
-{
-	_enter("%p{u=%d}", trans, atomic_read(&trans->usage));
-
-	ASSERTCMP(atomic_read(&trans->usage), >, 0);
-
-	trans->put_time = ktime_get_seconds();
-	if (unlikely(atomic_dec_and_test(&trans->usage))) {
-		_debug("zombie");
-		/* let the reaper determine the timeout to avoid a race with
-		 * overextending the timeout if the reaper is running at the
-		 * same time */
-		rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
-	}
-	_leave("");
-}
-
-/*
- * clean up a transport session
- */
-static void rxrpc_cleanup_transport(struct rxrpc_transport *trans)
-{
-	_net("DESTROY TRANS %d", trans->debug_id);
-
-	rxrpc_purge_queue(&trans->error_queue);
-
-	rxrpc_put_local(trans->local);
-	rxrpc_put_peer(trans->peer);
-	kfree(trans);
-}
-
-/*
- * reap dead transports that have passed their expiry date
- */
-static void rxrpc_transport_reaper(struct work_struct *work)
-{
-	struct rxrpc_transport *trans, *_p;
-	unsigned long now, earliest, reap_time;
-
-	LIST_HEAD(graveyard);
-
-	_enter("");
-
-	now = ktime_get_seconds();
-	earliest = ULONG_MAX;
-
-	/* extract all the transports that have been dead too long */
-	write_lock_bh(&rxrpc_transport_lock);
-	list_for_each_entry_safe(trans, _p, &rxrpc_transports, link) {
-		_debug("reap TRANS %d { u=%d t=%ld }",
-		       trans->debug_id, atomic_read(&trans->usage),
-		       (long) now - (long) trans->put_time);
-
-		if (likely(atomic_read(&trans->usage) > 0))
-			continue;
-
-		reap_time = trans->put_time + rxrpc_transport_expiry;
-		if (reap_time <= now)
-			list_move_tail(&trans->link, &graveyard);
-		else if (reap_time < earliest)
-			earliest = reap_time;
-	}
-	write_unlock_bh(&rxrpc_transport_lock);
-
-	if (earliest != ULONG_MAX) {
-		_debug("reschedule reaper %ld", (long) earliest - now);
-		ASSERTCMP(earliest, >, now);
-		rxrpc_queue_delayed_work(&rxrpc_transport_reap,
-					 (earliest - now) * HZ);
-	}
-
-	/* then destroy all those pulled out */
-	while (!list_empty(&graveyard)) {
-		trans = list_entry(graveyard.next, struct rxrpc_transport,
-				   link);
-		list_del_init(&trans->link);
-
-		ASSERTCMP(atomic_read(&trans->usage), ==, 0);
-		rxrpc_cleanup_transport(trans);
-	}
-
-	_leave("");
-}
-
-/*
- * preemptively destroy all the transport session records rather than waiting
- * for them to time out
- */
-void __exit rxrpc_destroy_all_transports(void)
-{
-	_enter("");
-
-	rxrpc_transport_expiry = 0;
-	cancel_delayed_work(&rxrpc_transport_reap);
-	rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
-
-	_leave("");
-}
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/call_accept.c
similarity index 87%
rename from net/rxrpc/ar-accept.c
rename to net/rxrpc/call_accept.c
index e7a7f05f13e2..0b2832141bd0 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/call_accept.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
@@ -72,9 +74,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 				      struct sockaddr_rxrpc *srx)
 {
 	struct rxrpc_connection *conn;
-	struct rxrpc_transport *trans;
 	struct rxrpc_skb_priv *sp, *nsp;
-	struct rxrpc_peer *peer;
 	struct rxrpc_call *call;
 	struct sk_buff *notification;
 	int ret;
@@ -93,30 +93,14 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 	rxrpc_new_skb(notification);
 	notification->mark = RXRPC_SKB_MARK_NEW_CALL;
 
-	peer = rxrpc_get_peer(srx, GFP_NOIO);
-	if (IS_ERR(peer)) {
-		_debug("no peer");
-		ret = -EBUSY;
-		goto error;
-	}
-
-	trans = rxrpc_get_transport(local, peer, GFP_NOIO);
-	rxrpc_put_peer(peer);
-	if (IS_ERR(trans)) {
-		_debug("no trans");
-		ret = -EBUSY;
-		goto error;
-	}
-
-	conn = rxrpc_incoming_connection(trans, &sp->hdr);
-	rxrpc_put_transport(trans);
+	conn = rxrpc_incoming_connection(local, srx, skb);
 	if (IS_ERR(conn)) {
 		_debug("no conn");
 		ret = PTR_ERR(conn);
 		goto error;
 	}
 
-	call = rxrpc_incoming_call(rx, conn, &sp->hdr);
+	call = rxrpc_incoming_call(rx, conn, skb);
 	rxrpc_put_connection(conn);
 	if (IS_ERR(call)) {
 		_debug("no call");
@@ -135,12 +119,11 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 
 		spin_lock(&call->conn->state_lock);
 		if (sp->hdr.securityIndex > 0 &&
-		    call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+		    call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
 			_debug("await conn sec");
 			list_add_tail(&call->accept_link, &rx->secureq);
-			call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
-			atomic_inc(&call->conn->usage);
-			set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+			call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+			set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
 			rxrpc_queue_conn(call->conn);
 		} else {
 			_debug("conn ready");
@@ -200,10 +183,8 @@ error_nofree:
  * accept incoming calls that need peer, transport and/or connection setting up
  * - the packets we get are all incoming client DATA packets that have seq == 1
  */
-void rxrpc_accept_incoming_calls(struct work_struct *work)
+void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
 {
-	struct rxrpc_local *local =
-		container_of(work, struct rxrpc_local, acceptor);
 	struct rxrpc_skb_priv *sp;
 	struct sockaddr_rxrpc srx;
 	struct rxrpc_sock *rx;
@@ -213,21 +194,8 @@ void rxrpc_accept_incoming_calls(struct work_struct *work)
 
 	_enter("%d", local->debug_id);
 
-	read_lock_bh(&rxrpc_local_lock);
-	if (atomic_read(&local->usage) > 0)
-		rxrpc_get_local(local);
-	else
-		local = NULL;
-	read_unlock_bh(&rxrpc_local_lock);
-	if (!local) {
-		_leave(" [local dead]");
-		return;
-	}
-
-process_next_packet:
 	skb = skb_dequeue(&local->accept_queue);
 	if (!skb) {
-		rxrpc_put_local(local);
 		_leave("\n");
 		return;
 	}
@@ -249,20 +217,8 @@ process_next_packet:
 	whdr._rsvd	= 0;
 	whdr.serviceId	= htons(sp->hdr.serviceId);
 
-	/* determine the remote address */
-	memset(&srx, 0, sizeof(srx));
-	srx.srx_family = AF_RXRPC;
-	srx.transport.family = local->srx.transport.family;
-	srx.transport_type = local->srx.transport_type;
-	switch (srx.transport.family) {
-	case AF_INET:
-		srx.transport_len = sizeof(struct sockaddr_in);
-		srx.transport.sin.sin_port = udp_hdr(skb)->source;
-		srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
-		break;
-	default:
-		goto busy;
-	}
+	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+		goto drop;
 
 	/* get the socket providing the service */
 	read_lock_bh(&local->services_lock);
@@ -290,7 +246,7 @@ found_service:
 	case -ECONNRESET: /* old calls are ignored */
 	case -ECONNABORTED: /* aborted calls are reaborted or ignored */
 	case 0:
-		goto process_next_packet;
+		return;
 	case -ECONNREFUSED:
 		goto invalid_service;
 	case -EBUSY:
@@ -306,18 +262,22 @@ backlog_full:
 busy:
 	rxrpc_busy(local, &srx, &whdr);
 	rxrpc_free_skb(skb);
-	goto process_next_packet;
+	return;
+
+drop:
+	rxrpc_free_skb(skb);
+	return;
 
 invalid_service:
 	skb->priority = RX_INVALID_OPERATION;
 	rxrpc_reject_packet(local, skb);
-	goto process_next_packet;
+	return;
 
 	/* can't change connection security type mid-flow */
 security_mismatch:
 	skb->priority = RX_PROTOCOL_ERROR;
 	rxrpc_reject_packet(local, skb);
-	goto process_next_packet;
+	return;
 }
 
 /*
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/call_event.c
similarity index 96%
rename from net/rxrpc/ar-ack.c
rename to net/rxrpc/call_event.c
index 374478e006e7..fc32aa5764a2 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/call_event.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/circ_buf.h>
 #include <linux/net.h>
@@ -185,7 +187,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
 
 			_proto("Tx DATA %%%u { #%d }",
 			       sp->hdr.serial, sp->hdr.seq);
-			if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
+			if (rxrpc_send_data_packet(call->conn, txb) < 0) {
 				stop = true;
 				sp->resend_at = jiffies + 3;
 			} else {
@@ -543,7 +545,7 @@ static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
 
 	mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
 
-	peer = call->conn->trans->peer;
+	peer = call->conn->params.peer;
 	if (mtu < peer->maxdata) {
 		spin_lock_bh(&peer->lock);
 		peer->maxdata = mtu;
@@ -834,13 +836,13 @@ void rxrpc_process_call(struct work_struct *work)
 
 	/* there's a good chance we're going to have to send a message, so set
 	 * one up in advance */
-	msg.msg_name	= &call->conn->trans->peer->srx.transport;
-	msg.msg_namelen	= call->conn->trans->peer->srx.transport_len;
+	msg.msg_name	= &call->conn->params.peer->srx.transport;
+	msg.msg_namelen	= call->conn->params.peer->srx.transport_len;
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
 
-	whdr.epoch	= htonl(call->conn->epoch);
+	whdr.epoch	= htonl(call->conn->proto.epoch);
 	whdr.cid	= htonl(call->cid);
 	whdr.callNumber	= htonl(call->call_id);
 	whdr.seq	= 0;
@@ -856,23 +858,25 @@ void rxrpc_process_call(struct work_struct *work)
 	iov[0].iov_len	= sizeof(whdr);
 
 	/* deal with events of a final nature */
-	if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
-		rxrpc_release_call(call);
-		clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
-	}
-
 	if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
+		enum rxrpc_skb_mark mark;
 		int error;
 
 		clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
 		clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
 		clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
 
-		error = call->conn->trans->peer->net_error;
-		_debug("post net error %d", error);
+		error = call->error_report;
+		if (error < RXRPC_LOCAL_ERROR_OFFSET) {
+			mark = RXRPC_SKB_MARK_NET_ERROR;
+			_debug("post net error %d", error);
+		} else {
+			mark = RXRPC_SKB_MARK_LOCAL_ERROR;
+			error -= RXRPC_LOCAL_ERROR_OFFSET;
+			_debug("post net local error %d", error);
+		}
 
-		if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
-				       error, true) < 0)
+		if (rxrpc_post_message(call, mark, error, true) < 0)
 			goto no_mem;
 		clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
 		goto kill_ACKs;
@@ -1085,7 +1089,7 @@ void rxrpc_process_call(struct work_struct *work)
 
 		if (call->state == RXRPC_CALL_SERVER_SECURING) {
 			_debug("securing");
-			write_lock(&call->conn->lock);
+			write_lock(&call->socket->call_lock);
 			if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
 			    !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
 				_debug("not released");
@@ -1093,7 +1097,7 @@ void rxrpc_process_call(struct work_struct *work)
 				list_move_tail(&call->accept_link,
 					       &call->socket->acceptq);
 			}
-			write_unlock(&call->conn->lock);
+			write_unlock(&call->socket->call_lock);
 			read_lock(&call->state_lock);
 			if (call->state < RXRPC_CALL_COMPLETE)
 				set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
@@ -1135,6 +1139,11 @@ void rxrpc_process_call(struct work_struct *work)
 		goto maybe_reschedule;
 	}
 
+	if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
+		rxrpc_release_call(call);
+		clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
+	}
+
 	/* other events may have been raised since we started checking */
 	goto maybe_reschedule;
 
@@ -1142,8 +1151,8 @@ send_ACK_with_skew:
 	ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
 			    ntohl(ack.serial));
 send_ACK:
-	mtu = call->conn->trans->peer->if_mtu;
-	mtu -= call->conn->trans->peer->hdrsize;
+	mtu = call->conn->params.peer->if_mtu;
+	mtu -= call->conn->params.peer->hdrsize;
 	ackinfo.maxMTU	= htonl(mtu);
 	ackinfo.rwind	= htonl(rxrpc_rx_window_size);
 
@@ -1197,7 +1206,7 @@ send_message_2:
 		len += iov[1].iov_len;
 	}
 
-	ret = kernel_sendmsg(call->conn->trans->local->socket,
+	ret = kernel_sendmsg(call->conn->params.local->socket,
 			     &msg, iov, ioc, len);
 	if (ret < 0) {
 		_debug("sendmsg failed: %d", ret);
@@ -1255,7 +1264,7 @@ maybe_reschedule:
 	if (call->state >= RXRPC_CALL_COMPLETE &&
 	    !list_empty(&call->accept_link)) {
 		_debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
-		       call, call->events, call->flags, call->conn->cid);
+		       call, call->events, call->flags, call->conn->proto.cid);
 
 		read_lock_bh(&call->state_lock);
 		if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
@@ -1273,7 +1282,7 @@ error:
 	 * this means there's a race between clearing the flag and setting the
 	 * work pending bit and the work item being processed again */
 	if (call->events && !work_pending(&call->processor)) {
-		_debug("jumpstart %x", call->conn->cid);
+		_debug("jumpstart %x", call->conn->proto.cid);
 		rxrpc_queue_call(call);
 	}
 
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/call_object.c
similarity index 62%
rename from net/rxrpc/ar-call.c
rename to net/rxrpc/call_object.c
index 571a41fd5a32..91287c9d01bb 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/call_object.c
@@ -9,10 +9,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/circ_buf.h>
-#include <linux/hashtable.h>
 #include <linux/spinlock_types.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
@@ -29,6 +30,8 @@ unsigned int rxrpc_max_call_lifetime = 60 * HZ;
 unsigned int rxrpc_dead_call_expiry = 2 * HZ;
 
 const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
+	[RXRPC_CALL_UNINITIALISED]		= "Uninit",
+	[RXRPC_CALL_CLIENT_AWAIT_CONN]		= "ClWtConn",
 	[RXRPC_CALL_CLIENT_SEND_REQUEST]	= "ClSndReq",
 	[RXRPC_CALL_CLIENT_AWAIT_REPLY]		= "ClAwtRpl",
 	[RXRPC_CALL_CLIENT_RECV_REPLY]		= "ClRcvRpl",
@@ -57,140 +60,41 @@ static void rxrpc_dead_call_expired(unsigned long _call);
 static void rxrpc_ack_time_expired(unsigned long _call);
 static void rxrpc_resend_time_expired(unsigned long _call);
 
-static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
-static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
-
 /*
- * Hash function for rxrpc_call_hash
+ * find an extant server call
+ * - called in process context with IRQs enabled
  */
-static unsigned long rxrpc_call_hashfunc(
-	u8		in_clientflag,
-	u32		cid,
-	u32		call_id,
-	u32		epoch,
-	u16		service_id,
-	sa_family_t	proto,
-	void		*localptr,
-	unsigned int	addr_size,
-	const u8	*peer_addr)
+struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
+					      unsigned long user_call_ID)
 {
-	const u16 *p;
-	unsigned int i;
-	unsigned long key;
+	struct rxrpc_call *call;
+	struct rb_node *p;
 
-	_enter("");
+	_enter("%p,%lx", rx, user_call_ID);
 
-	key = (unsigned long)localptr;
-	/* We just want to add up the __be32 values, so forcing the
-	 * cast should be okay.
-	 */
-	key += epoch;
-	key += service_id;
-	key += call_id;
-	key += (cid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
-	key += cid & RXRPC_CHANNELMASK;
-	key += in_clientflag;
-	key += proto;
-	/* Step through the peer address in 16-bit portions for speed */
-	for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
-		key += *p;
-	_leave(" key = 0x%lx", key);
-	return key;
-}
+	read_lock(&rx->call_lock);
 
-/*
- * Add a call to the hashtable
- */
-static void rxrpc_call_hash_add(struct rxrpc_call *call)
-{
-	unsigned long key;
-	unsigned int addr_size = 0;
+	p = rx->calls.rb_node;
+	while (p) {
+		call = rb_entry(p, struct rxrpc_call, sock_node);
 
-	_enter("");
-	switch (call->proto) {
-	case AF_INET:
-		addr_size = sizeof(call->peer_ip.ipv4_addr);
-		break;
-	case AF_INET6:
-		addr_size = sizeof(call->peer_ip.ipv6_addr);
-		break;
-	default:
-		break;
+		if (user_call_ID < call->user_call_ID)
+			p = p->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			p = p->rb_right;
+		else
+			goto found_extant_call;
 	}
-	key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
-				  call->call_id, call->epoch,
-				  call->service_id, call->proto,
-				  call->conn->trans->local, addr_size,
-				  call->peer_ip.ipv6_addr);
-	/* Store the full key in the call */
-	call->hash_key = key;
-	spin_lock(&rxrpc_call_hash_lock);
-	hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
-	spin_unlock(&rxrpc_call_hash_lock);
-	_leave("");
-}
-
-/*
- * Remove a call from the hashtable
- */
-static void rxrpc_call_hash_del(struct rxrpc_call *call)
-{
-	_enter("");
-	spin_lock(&rxrpc_call_hash_lock);
-	hash_del_rcu(&call->hash_node);
-	spin_unlock(&rxrpc_call_hash_lock);
-	_leave("");
-}
 
-/*
- * Find a call in the hashtable and return it, or NULL if it
- * isn't there.
- */
-struct rxrpc_call *rxrpc_find_call_hash(
-	struct rxrpc_host_header *hdr,
-	void		*localptr,
-	sa_family_t	proto,
-	const void	*peer_addr)
-{
-	unsigned long key;
-	unsigned int addr_size = 0;
-	struct rxrpc_call *call = NULL;
-	struct rxrpc_call *ret = NULL;
-	u8 in_clientflag = hdr->flags & RXRPC_CLIENT_INITIATED;
-
-	_enter("");
-	switch (proto) {
-	case AF_INET:
-		addr_size = sizeof(call->peer_ip.ipv4_addr);
-		break;
-	case AF_INET6:
-		addr_size = sizeof(call->peer_ip.ipv6_addr);
-		break;
-	default:
-		break;
-	}
+	read_unlock(&rx->call_lock);
+	_leave(" = NULL");
+	return NULL;
 
-	key = rxrpc_call_hashfunc(in_clientflag, hdr->cid, hdr->callNumber,
-				  hdr->epoch, hdr->serviceId,
-				  proto, localptr, addr_size,
-				  peer_addr);
-	hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
-		if (call->hash_key == key &&
-		    call->call_id == hdr->callNumber &&
-		    call->cid == hdr->cid &&
-		    call->in_clientflag == in_clientflag &&
-		    call->service_id == hdr->serviceId &&
-		    call->proto == proto &&
-		    call->local == localptr &&
-		    memcmp(call->peer_ip.ipv6_addr, peer_addr,
-			   addr_size) == 0 &&
-		    call->epoch == hdr->epoch) {
-			ret = call;
-			break;
-		}
-	}
-	_leave(" = %p", ret);
-	return ret;
+found_extant_call:
+	rxrpc_get_call(call);
+	read_unlock(&rx->call_lock);
+	_leave(" = %p [%d]", call, atomic_read(&call->usage));
+	return call;
 }
 
 /*
@@ -222,6 +126,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 		    (unsigned long) call);
 	INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
 	INIT_WORK(&call->processor, &rxrpc_process_call);
+	INIT_LIST_HEAD(&call->link);
 	INIT_LIST_HEAD(&call->accept_link);
 	skb_queue_head_init(&call->rx_queue);
 	skb_queue_head_init(&call->rx_oos_queue);
@@ -230,7 +135,6 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 	rwlock_init(&call->state_lock);
 	atomic_set(&call->usage, 1);
 	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
-	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
 
 	memset(&call->sock_node, 0xed, sizeof(call->sock_node));
 
@@ -243,117 +147,88 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 }
 
 /*
- * allocate a new client call and attempt to get a connection slot for it
+ * Allocate a new client call.
  */
-static struct rxrpc_call *rxrpc_alloc_client_call(
-	struct rxrpc_sock *rx,
-	struct rxrpc_transport *trans,
-	struct rxrpc_conn_bundle *bundle,
-	gfp_t gfp)
+static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
+						  struct sockaddr_rxrpc *srx,
+						  gfp_t gfp)
 {
 	struct rxrpc_call *call;
-	int ret;
 
 	_enter("");
 
-	ASSERT(rx != NULL);
-	ASSERT(trans != NULL);
-	ASSERT(bundle != NULL);
+	ASSERT(rx->local != NULL);
 
 	call = rxrpc_alloc_call(gfp);
 	if (!call)
 		return ERR_PTR(-ENOMEM);
+	call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
 
 	sock_hold(&rx->sk);
 	call->socket = rx;
 	call->rx_data_post = 1;
 
-	ret = rxrpc_connect_call(rx, trans, bundle, call, gfp);
-	if (ret < 0) {
-		kmem_cache_free(rxrpc_call_jar, call);
-		return ERR_PTR(ret);
-	}
+	call->local = rx->local;
+	call->service_id = srx->srx_service;
+	call->in_clientflag = 0;
 
-	/* Record copies of information for hashtable lookup */
-	call->proto = rx->proto;
-	call->local = trans->local;
-	switch (call->proto) {
-	case AF_INET:
-		call->peer_ip.ipv4_addr =
-			trans->peer->srx.transport.sin.sin_addr.s_addr;
-		break;
-	case AF_INET6:
-		memcpy(call->peer_ip.ipv6_addr,
-		       trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
-		       sizeof(call->peer_ip.ipv6_addr));
-		break;
-	}
-	call->epoch = call->conn->epoch;
-	call->service_id = call->conn->service_id;
-	call->in_clientflag = call->conn->in_clientflag;
-	/* Add the new call to the hashtable */
-	rxrpc_call_hash_add(call);
+	_leave(" = %p", call);
+	return call;
+}
 
-	spin_lock(&call->conn->trans->peer->lock);
-	list_add(&call->error_link, &call->conn->trans->peer->error_targets);
-	spin_unlock(&call->conn->trans->peer->lock);
+/*
+ * Begin client call.
+ */
+static int rxrpc_begin_client_call(struct rxrpc_call *call,
+				   struct rxrpc_conn_parameters *cp,
+				   struct sockaddr_rxrpc *srx,
+				   gfp_t gfp)
+{
+	int ret;
+
+	/* Set up or get a connection record and set the protocol parameters,
+	 * including channel number and call ID.
+	 */
+	ret = rxrpc_connect_call(call, cp, srx, gfp);
+	if (ret < 0)
+		return ret;
+
+	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+
+	spin_lock(&call->conn->params.peer->lock);
+	hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
+	spin_unlock(&call->conn->params.peer->lock);
 
 	call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
 	add_timer(&call->lifetimer);
-
-	_leave(" = %p", call);
-	return call;
+	return 0;
 }
 
 /*
  * set up a call for the given data
  * - called in process context with IRQs enabled
  */
-struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
-					 struct rxrpc_transport *trans,
-					 struct rxrpc_conn_bundle *bundle,
+struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
+					 struct rxrpc_conn_parameters *cp,
+					 struct sockaddr_rxrpc *srx,
 					 unsigned long user_call_ID,
-					 int create,
 					 gfp_t gfp)
 {
-	struct rxrpc_call *call, *candidate;
-	struct rb_node *p, *parent, **pp;
-
-	_enter("%p,%d,%d,%lx,%d",
-	       rx, trans ? trans->debug_id : -1, bundle ? bundle->debug_id : -1,
-	       user_call_ID, create);
-
-	/* search the extant calls first for one that matches the specified
-	 * user ID */
-	read_lock(&rx->call_lock);
-
-	p = rx->calls.rb_node;
-	while (p) {
-		call = rb_entry(p, struct rxrpc_call, sock_node);
-
-		if (user_call_ID < call->user_call_ID)
-			p = p->rb_left;
-		else if (user_call_ID > call->user_call_ID)
-			p = p->rb_right;
-		else
-			goto found_extant_call;
-	}
-
-	read_unlock(&rx->call_lock);
+	struct rxrpc_call *call, *xcall;
+	struct rb_node *parent, **pp;
+	int ret;
 
-	if (!create || !trans)
-		return ERR_PTR(-EBADSLT);
+	_enter("%p,%lx", rx, user_call_ID);
 
-	/* not yet present - create a candidate for a new record and then
-	 * redo the search */
-	candidate = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
-	if (IS_ERR(candidate)) {
-		_leave(" = %ld", PTR_ERR(candidate));
-		return candidate;
+	call = rxrpc_alloc_client_call(rx, srx, gfp);
+	if (IS_ERR(call)) {
+		_leave(" = %ld", PTR_ERR(call));
+		return call;
 	}
 
-	candidate->user_call_ID = user_call_ID;
-	__set_bit(RXRPC_CALL_HAS_USERID, &candidate->flags);
+	/* Publish the call, even though it is incompletely set up as yet */
+	call->user_call_ID = user_call_ID;
+	__set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
 
 	write_lock(&rx->call_lock);
 
@@ -361,19 +236,16 @@ struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
 	parent = NULL;
 	while (*pp) {
 		parent = *pp;
-		call = rb_entry(parent, struct rxrpc_call, sock_node);
+		xcall = rb_entry(parent, struct rxrpc_call, sock_node);
 
-		if (user_call_ID < call->user_call_ID)
+		if (user_call_ID < xcall->user_call_ID)
 			pp = &(*pp)->rb_left;
-		else if (user_call_ID > call->user_call_ID)
+		else if (user_call_ID > xcall->user_call_ID)
 			pp = &(*pp)->rb_right;
 		else
-			goto found_extant_second;
+			goto found_user_ID_now_present;
 	}
 
-	/* second search also failed; add the new call */
-	call = candidate;
-	candidate = NULL;
 	rxrpc_get_call(call);
 
 	rb_link_node(&call->sock_node, parent, pp);
@@ -384,25 +256,41 @@ struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
 	list_add_tail(&call->link, &rxrpc_calls);
 	write_unlock_bh(&rxrpc_call_lock);
 
+	ret = rxrpc_begin_client_call(call, cp, srx, gfp);
+	if (ret < 0)
+		goto error;
+
 	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
 
 	_leave(" = %p [new]", call);
 	return call;
 
-	/* we found the call in the list immediately */
-found_extant_call:
-	rxrpc_get_call(call);
-	read_unlock(&rx->call_lock);
-	_leave(" = %p [extant %d]", call, atomic_read(&call->usage));
-	return call;
+error:
+	write_lock(&rx->call_lock);
+	rb_erase(&call->sock_node, &rx->calls);
+	write_unlock(&rx->call_lock);
+	rxrpc_put_call(call);
 
-	/* we found the call on the second time through the list */
-found_extant_second:
-	rxrpc_get_call(call);
+	write_lock_bh(&rxrpc_call_lock);
+	list_del_init(&call->link);
+	write_unlock_bh(&rxrpc_call_lock);
+
+	call->state = RXRPC_CALL_DEAD;
+	rxrpc_put_call(call);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+
+	/* We unexpectedly found the user ID in the list after taking
+	 * the call_lock.  This shouldn't happen unless the user races
+	 * with itself and tries to add the same user ID twice at the
+	 * same time in different threads.
+	 */
+found_user_ID_now_present:
 	write_unlock(&rx->call_lock);
-	rxrpc_put_call(candidate);
-	_leave(" = %p [second %d]", call, atomic_read(&call->usage));
-	return call;
+	call->state = RXRPC_CALL_DEAD;
+	rxrpc_put_call(call);
+	_leave(" = -EEXIST [%p]", call);
+	return ERR_PTR(-EEXIST);
 }
 
 /*
@@ -411,11 +299,11 @@ found_extant_second:
  */
 struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 				       struct rxrpc_connection *conn,
-				       struct rxrpc_host_header *hdr)
+				       struct sk_buff *skb)
 {
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_call *call, *candidate;
-	struct rb_node **p, *parent;
-	u32 call_id;
+	u32 call_id, chan;
 
 	_enter(",%d", conn->debug_id);
 
@@ -425,22 +313,25 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	if (!candidate)
 		return ERR_PTR(-EBUSY);
 
-	candidate->socket = rx;
-	candidate->conn = conn;
-	candidate->cid = hdr->cid;
-	candidate->call_id = hdr->callNumber;
-	candidate->channel = hdr->cid & RXRPC_CHANNELMASK;
-	candidate->rx_data_post = 0;
-	candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
+	chan = sp->hdr.cid & RXRPC_CHANNELMASK;
+	candidate->socket	= rx;
+	candidate->conn		= conn;
+	candidate->cid		= sp->hdr.cid;
+	candidate->call_id	= sp->hdr.callNumber;
+	candidate->channel	= chan;
+	candidate->rx_data_post	= 0;
+	candidate->state	= RXRPC_CALL_SERVER_ACCEPTING;
 	if (conn->security_ix > 0)
 		candidate->state = RXRPC_CALL_SERVER_SECURING;
 
-	write_lock_bh(&conn->lock);
+	spin_lock(&conn->channel_lock);
 
 	/* set the channel for this call */
-	call = conn->channels[candidate->channel];
+	call = rcu_dereference_protected(conn->channels[chan].call,
+					 lockdep_is_held(&conn->channel_lock));
+
 	_debug("channel[%u] is %p", candidate->channel, call);
-	if (call && call->call_id == hdr->callNumber) {
+	if (call && call->call_id == sp->hdr.callNumber) {
 		/* already set; must've been a duplicate packet */
 		_debug("extant call [%d]", call->state);
 		ASSERTCMP(call->conn, ==, conn);
@@ -467,9 +358,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 		       call->debug_id, rxrpc_call_states[call->state]);
 
 		if (call->state >= RXRPC_CALL_COMPLETE) {
-			conn->channels[call->channel] = NULL;
+			__rxrpc_disconnect_call(call);
 		} else {
-			write_unlock_bh(&conn->lock);
+			spin_unlock(&conn->channel_lock);
 			kmem_cache_free(rxrpc_call_jar, candidate);
 			_leave(" = -EBUSY");
 			return ERR_PTR(-EBUSY);
@@ -478,64 +369,36 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 
 	/* check the call number isn't duplicate */
 	_debug("check dup");
-	call_id = hdr->callNumber;
-	p = &conn->calls.rb_node;
-	parent = NULL;
-	while (*p) {
-		parent = *p;
-		call = rb_entry(parent, struct rxrpc_call, conn_node);
-
-		/* The tree is sorted in order of the __be32 value without
-		 * turning it into host order.
-		 */
-		if (call_id < call->call_id)
-			p = &(*p)->rb_left;
-		else if (call_id > call->call_id)
-			p = &(*p)->rb_right;
-		else
-			goto old_call;
-	}
+	call_id = sp->hdr.callNumber;
+
+	/* We just ignore calls prior to the current call ID.  Terminated calls
+	 * are handled via the connection.
+	 */
+	if (call_id <= conn->channels[chan].call_counter)
+		goto old_call; /* TODO: Just drop packet */
 
 	/* make the call available */
 	_debug("new call");
 	call = candidate;
 	candidate = NULL;
-	rb_link_node(&call->conn_node, parent, p);
-	rb_insert_color(&call->conn_node, &conn->calls);
-	conn->channels[call->channel] = call;
+	conn->channels[chan].call_counter = call_id;
+	rcu_assign_pointer(conn->channels[chan].call, call);
 	sock_hold(&rx->sk);
-	atomic_inc(&conn->usage);
-	write_unlock_bh(&conn->lock);
+	rxrpc_get_connection(conn);
+	spin_unlock(&conn->channel_lock);
 
-	spin_lock(&conn->trans->peer->lock);
-	list_add(&call->error_link, &conn->trans->peer->error_targets);
-	spin_unlock(&conn->trans->peer->lock);
+	spin_lock(&conn->params.peer->lock);
+	hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
+	spin_unlock(&conn->params.peer->lock);
 
 	write_lock_bh(&rxrpc_call_lock);
 	list_add_tail(&call->link, &rxrpc_calls);
 	write_unlock_bh(&rxrpc_call_lock);
 
-	/* Record copies of information for hashtable lookup */
-	call->proto = rx->proto;
-	call->local = conn->trans->local;
-	switch (call->proto) {
-	case AF_INET:
-		call->peer_ip.ipv4_addr =
-			conn->trans->peer->srx.transport.sin.sin_addr.s_addr;
-		break;
-	case AF_INET6:
-		memcpy(call->peer_ip.ipv6_addr,
-		       conn->trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
-		       sizeof(call->peer_ip.ipv6_addr));
-		break;
-	default:
-		break;
-	}
-	call->epoch = conn->epoch;
-	call->service_id = conn->service_id;
-	call->in_clientflag = conn->in_clientflag;
-	/* Add the new call to the hashtable */
-	rxrpc_call_hash_add(call);
+	call->local = conn->params.local;
+	call->epoch = conn->proto.epoch;
+	call->service_id = conn->params.service_id;
+	call->in_clientflag = RXRPC_CLIENT_INITIATED;
 
 	_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
 
@@ -545,64 +408,24 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	return call;
 
 extant_call:
-	write_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 	kmem_cache_free(rxrpc_call_jar, candidate);
 	_leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
 	return call;
 
 aborted_call:
-	write_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 	kmem_cache_free(rxrpc_call_jar, candidate);
 	_leave(" = -ECONNABORTED");
 	return ERR_PTR(-ECONNABORTED);
 
 old_call:
-	write_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 	kmem_cache_free(rxrpc_call_jar, candidate);
 	_leave(" = -ECONNRESET [old]");
 	return ERR_PTR(-ECONNRESET);
 }
 
-/*
- * find an extant server call
- * - called in process context with IRQs enabled
- */
-struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *rx,
-					  unsigned long user_call_ID)
-{
-	struct rxrpc_call *call;
-	struct rb_node *p;
-
-	_enter("%p,%lx", rx, user_call_ID);
-
-	/* search the extant calls for one that matches the specified user
-	 * ID */
-	read_lock(&rx->call_lock);
-
-	p = rx->calls.rb_node;
-	while (p) {
-		call = rb_entry(p, struct rxrpc_call, sock_node);
-
-		if (user_call_ID < call->user_call_ID)
-			p = p->rb_left;
-		else if (user_call_ID > call->user_call_ID)
-			p = p->rb_right;
-		else
-			goto found_extant_call;
-	}
-
-	read_unlock(&rx->call_lock);
-	_leave(" = NULL");
-	return NULL;
-
-	/* we found the call in the list immediately */
-found_extant_call:
-	rxrpc_get_call(call);
-	read_unlock(&rx->call_lock);
-	_leave(" = %p [%d]", call, atomic_read(&call->usage));
-	return call;
-}
-
 /*
  * detach a call from a socket and set up for release
  */
@@ -626,6 +449,10 @@ void rxrpc_release_call(struct rxrpc_call *call)
 	 */
 	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
 
+	spin_lock(&conn->params.peer->lock);
+	hlist_del_init(&call->error_link);
+	spin_unlock(&conn->params.peer->lock);
+
 	write_lock_bh(&rx->call_lock);
 	if (!list_empty(&call->accept_link)) {
 		_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -641,52 +468,17 @@ void rxrpc_release_call(struct rxrpc_call *call)
 	write_unlock_bh(&rx->call_lock);
 
 	/* free up the channel for reuse */
-	spin_lock(&conn->trans->client_lock);
-	write_lock_bh(&conn->lock);
-	write_lock(&call->state_lock);
-
-	if (conn->channels[call->channel] == call)
-		conn->channels[call->channel] = NULL;
-
-	if (conn->out_clientflag && conn->bundle) {
-		conn->avail_calls++;
-		switch (conn->avail_calls) {
-		case 1:
-			list_move_tail(&conn->bundle_link,
-				       &conn->bundle->avail_conns);
-		case 2 ... RXRPC_MAXCALLS - 1:
-			ASSERT(conn->channels[0] == NULL ||
-			       conn->channels[1] == NULL ||
-			       conn->channels[2] == NULL ||
-			       conn->channels[3] == NULL);
-			break;
-		case RXRPC_MAXCALLS:
-			list_move_tail(&conn->bundle_link,
-				       &conn->bundle->unused_conns);
-			ASSERT(conn->channels[0] == NULL &&
-			       conn->channels[1] == NULL &&
-			       conn->channels[2] == NULL &&
-			       conn->channels[3] == NULL);
-			break;
-		default:
-			printk(KERN_ERR "RxRPC: conn->avail_calls=%d\n",
-			       conn->avail_calls);
-			BUG();
-		}
-	}
-
-	spin_unlock(&conn->trans->client_lock);
+	write_lock_bh(&call->state_lock);
 
 	if (call->state < RXRPC_CALL_COMPLETE &&
 	    call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
 		_debug("+++ ABORTING STATE %d +++\n", call->state);
 		call->state = RXRPC_CALL_LOCALLY_ABORTED;
 		call->local_abort = RX_CALL_DEAD;
-		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-		rxrpc_queue_call(call);
 	}
-	write_unlock(&call->state_lock);
-	write_unlock_bh(&conn->lock);
+	write_unlock_bh(&call->state_lock);
+
+	rxrpc_disconnect_call(call);
 
 	/* clean up the Rx queue */
 	if (!skb_queue_empty(&call->rx_queue) ||
@@ -819,6 +611,17 @@ void __rxrpc_put_call(struct rxrpc_call *call)
 	_leave("");
 }
 
+/*
+ * Final call destruction under RCU.
+ */
+static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+{
+	struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+
+	rxrpc_purge_queue(&call->rx_queue);
+	kmem_cache_free(rxrpc_call_jar, call);
+}
+
 /*
  * clean up a call
  */
@@ -843,19 +646,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
 		return;
 	}
 
-	if (call->conn) {
-		spin_lock(&call->conn->trans->peer->lock);
-		list_del(&call->error_link);
-		spin_unlock(&call->conn->trans->peer->lock);
-
-		write_lock_bh(&call->conn->lock);
-		rb_erase(&call->conn_node, &call->conn->calls);
-		write_unlock_bh(&call->conn->lock);
-		rxrpc_put_connection(call->conn);
-	}
-
-	/* Remove the call from the hash */
-	rxrpc_call_hash_del(call);
+	ASSERTCMP(call->conn, ==, NULL);
 
 	if (call->acks_window) {
 		_debug("kill Tx window %d",
@@ -883,7 +674,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
 	rxrpc_purge_queue(&call->rx_queue);
 	ASSERT(skb_queue_empty(&call->rx_oos_queue));
 	sock_put(&call->socket->sk);
-	kmem_cache_free(rxrpc_call_jar, call);
+	call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
 }
 
 /*
@@ -935,16 +726,15 @@ void __exit rxrpc_destroy_all_calls(void)
 			if (call->state != RXRPC_CALL_DEAD)
 				break;
 		default:
-			printk(KERN_ERR "RXRPC:"
-			       " Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
+			pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
 			       call, atomic_read(&call->usage),
 			       atomic_read(&call->ackr_not_idle),
 			       rxrpc_call_states[call->state],
 			       call->flags, call->events);
 			if (!skb_queue_empty(&call->rx_queue))
-				printk(KERN_ERR"RXRPC: Rx queue occupied\n");
+				pr_err("Rx queue occupied\n");
 			if (!skb_queue_empty(&call->rx_oos_queue))
-				printk(KERN_ERR"RXRPC: OOS queue occupied\n");
+				pr_err("OOS queue occupied\n");
 			break;
 		}
 
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
new file mode 100644
index 000000000000..9e91f27b0d0f
--- /dev/null
+++ b/net/rxrpc/conn_client.c
@@ -0,0 +1,372 @@
+/* Client connection-specific management code.
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/timer.h>
+#include "ar-internal.h"
+
+/*
+ * We use machine-unique IDs for our client connections.
+ */
+DEFINE_IDR(rxrpc_client_conn_ids);
+static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
+
+/*
+ * Get a connection ID and epoch for a client connection from the global pool.
+ * The connection struct pointer is then recorded in the idr radix tree.  The
+ * epoch is changed if this wraps.
+ *
+ * TODO: The IDR tree gets very expensive on memory if the connection IDs are
+ * widely scattered throughout the number space, so we shall need to retire
+ * connections that have, say, an ID more than four times the maximum number of
+ * client conns away from the current allocation point to try and keep the IDs
+ * concentrated.  We will also need to retire connections from an old epoch.
+ */
+static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
+					  gfp_t gfp)
+{
+	u32 epoch;
+	int id;
+
+	_enter("");
+
+	idr_preload(gfp);
+	spin_lock(&rxrpc_conn_id_lock);
+
+	epoch = rxrpc_epoch;
+
+	/* We could use idr_alloc_cyclic() here, but we really need to know
+	 * when the thing wraps so that we can advance the epoch.
+	 */
+	if (rxrpc_client_conn_ids.cur == 0)
+		rxrpc_client_conn_ids.cur = 1;
+	id = idr_alloc(&rxrpc_client_conn_ids, conn,
+		       rxrpc_client_conn_ids.cur, 0x40000000, GFP_NOWAIT);
+	if (id < 0) {
+		if (id != -ENOSPC)
+			goto error;
+		id = idr_alloc(&rxrpc_client_conn_ids, conn,
+			       1, 0x40000000, GFP_NOWAIT);
+		if (id < 0)
+			goto error;
+		epoch++;
+		rxrpc_epoch = epoch;
+	}
+	rxrpc_client_conn_ids.cur = id + 1;
+
+	spin_unlock(&rxrpc_conn_id_lock);
+	idr_preload_end();
+
+	conn->proto.epoch = epoch;
+	conn->proto.cid = id << RXRPC_CIDSHIFT;
+	set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
+	_leave(" [CID %x:%x]", epoch, conn->proto.cid);
+	return 0;
+
+error:
+	spin_unlock(&rxrpc_conn_id_lock);
+	idr_preload_end();
+	_leave(" = %d", id);
+	return id;
+}
+
+/*
+ * Release a connection ID for a client connection from the global pool.
+ */
+static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+{
+	if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
+		spin_lock(&rxrpc_conn_id_lock);
+		idr_remove(&rxrpc_client_conn_ids,
+			   conn->proto.cid >> RXRPC_CIDSHIFT);
+		spin_unlock(&rxrpc_conn_id_lock);
+	}
+}
+
+/*
+ * Destroy the client connection ID tree.
+ */
+void rxrpc_destroy_client_conn_ids(void)
+{
+	struct rxrpc_connection *conn;
+	int id;
+
+	if (!idr_is_empty(&rxrpc_client_conn_ids)) {
+		idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+			pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
+			       conn, atomic_read(&conn->usage));
+		}
+		BUG();
+	}
+
+	idr_destroy(&rxrpc_client_conn_ids);
+}
+
+/*
+ * Allocate a client connection.  The caller must take care to clear any
+ * padding bytes in *cp.
+ */
+static struct rxrpc_connection *
+rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+{
+	struct rxrpc_connection *conn;
+	int ret;
+
+	_enter("");
+
+	conn = rxrpc_alloc_connection(gfp);
+	if (!conn) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	conn->params		= *cp;
+	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
+	conn->state		= RXRPC_CONN_CLIENT;
+
+	ret = rxrpc_get_client_connection_id(conn, gfp);
+	if (ret < 0)
+		goto error_0;
+
+	ret = rxrpc_init_client_conn_security(conn);
+	if (ret < 0)
+		goto error_1;
+
+	ret = conn->security->prime_packet_security(conn);
+	if (ret < 0)
+		goto error_2;
+
+	write_lock(&rxrpc_connection_lock);
+	list_add_tail(&conn->link, &rxrpc_connections);
+	write_unlock(&rxrpc_connection_lock);
+
+	/* We steal the caller's peer ref. */
+	cp->peer = NULL;
+	rxrpc_get_local(conn->params.local);
+	key_get(conn->params.key);
+
+	_leave(" = %p", conn);
+	return conn;
+
+error_2:
+	conn->security->clear(conn);
+error_1:
+	rxrpc_put_client_connection_id(conn);
+error_0:
+	kfree(conn);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+		       struct rxrpc_conn_parameters *cp,
+		       struct sockaddr_rxrpc *srx,
+		       gfp_t gfp)
+{
+	struct rxrpc_connection *conn, *candidate = NULL;
+	struct rxrpc_local *local = cp->local;
+	struct rb_node *p, **pp, *parent;
+	long diff;
+	int chan;
+
+	DECLARE_WAITQUEUE(myself, current);
+
+	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+	cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
+	if (!cp->peer)
+		return -ENOMEM;
+
+	if (!cp->exclusive) {
+		/* Search for a existing client connection unless this is going
+		 * to be a connection that's used exclusively for a single call.
+		 */
+		_debug("search 1");
+		spin_lock(&local->client_conns_lock);
+		p = local->client_conns.rb_node;
+		while (p) {
+			conn = rb_entry(p, struct rxrpc_connection, client_node);
+
+#define cmp(X) ((long)conn->params.X - (long)cp->X)
+			diff = (cmp(peer) ?:
+				cmp(key) ?:
+				cmp(security_level));
+			if (diff < 0)
+				p = p->rb_left;
+			else if (diff > 0)
+				p = p->rb_right;
+			else
+				goto found_extant_conn;
+		}
+		spin_unlock(&local->client_conns_lock);
+	}
+
+	/* We didn't find a connection or we want an exclusive one. */
+	_debug("get new conn");
+	candidate = rxrpc_alloc_client_connection(cp, gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	if (cp->exclusive) {
+		/* Assign the call on an exclusive connection to channel 0 and
+		 * don't add the connection to the endpoint's shareable conn
+		 * lookup tree.
+		 */
+		_debug("exclusive chan 0");
+		conn = candidate;
+		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+		spin_lock(&conn->channel_lock);
+		chan = 0;
+		goto found_channel;
+	}
+
+	/* We need to redo the search before attempting to add a new connection
+	 * lest we race with someone else adding a conflicting instance.
+	 */
+	_debug("search 2");
+	spin_lock(&local->client_conns_lock);
+
+	pp = &local->client_conns.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		conn = rb_entry(parent, struct rxrpc_connection, client_node);
+
+		diff = (cmp(peer) ?:
+			cmp(key) ?:
+			cmp(security_level));
+		if (diff < 0)
+			pp = &(*pp)->rb_left;
+		else if (diff > 0)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_conn;
+	}
+
+	/* The second search also failed; simply add the new connection with
+	 * the new call in channel 0.  Note that we need to take the channel
+	 * lock before dropping the client conn lock.
+	 */
+	_debug("new conn");
+	set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+	rb_link_node(&candidate->client_node, parent, pp);
+	rb_insert_color(&candidate->client_node, &local->client_conns);
+attached:
+	conn = candidate;
+	candidate = NULL;
+
+	atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+	spin_lock(&conn->channel_lock);
+	spin_unlock(&local->client_conns_lock);
+	chan = 0;
+
+found_channel:
+	_debug("found chan");
+	call->conn	= conn;
+	call->channel	= chan;
+	call->epoch	= conn->proto.epoch;
+	call->cid	= conn->proto.cid | chan;
+	call->call_id	= ++conn->channels[chan].call_counter;
+	conn->channels[chan].call_id = call->call_id;
+	rcu_assign_pointer(conn->channels[chan].call, call);
+
+	_net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+
+	spin_unlock(&conn->channel_lock);
+	rxrpc_put_peer(cp->peer);
+	cp->peer = NULL;
+	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+	return 0;
+
+	/* We found a potentially suitable connection already in existence.  If
+	 * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
+	 * reaper), discard any candidate we may have allocated, and try to get
+	 * a channel on this one, otherwise we have to replace it.
+	 */
+found_extant_conn:
+	_debug("found conn");
+	if (!rxrpc_get_connection_maybe(conn)) {
+		set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+		rb_replace_node(&conn->client_node,
+				&candidate->client_node,
+				&local->client_conns);
+		clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+		goto attached;
+	}
+
+	spin_unlock(&local->client_conns_lock);
+
+	rxrpc_put_connection(candidate);
+
+	if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
+		if (!gfpflags_allow_blocking(gfp)) {
+			rxrpc_put_connection(conn);
+			_leave(" = -EAGAIN");
+			return -EAGAIN;
+		}
+
+		add_wait_queue(&conn->channel_wq, &myself);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (atomic_add_unless(&conn->avail_chans, -1, 0))
+				break;
+			if (signal_pending(current))
+				goto interrupted;
+			schedule();
+		}
+		remove_wait_queue(&conn->channel_wq, &myself);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	/* The connection allegedly now has a free channel and we can now
+	 * attach the call to it.
+	 */
+	spin_lock(&conn->channel_lock);
+
+	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+		if (!conn->channels[chan].call)
+			goto found_channel;
+	BUG();
+
+interrupted:
+	remove_wait_queue(&conn->channel_wq, &myself);
+	__set_current_state(TASK_RUNNING);
+	rxrpc_put_connection(conn);
+	rxrpc_put_peer(cp->peer);
+	cp->peer = NULL;
+	_leave(" = -ERESTARTSYS");
+	return -ERESTARTSYS;
+}
+
+/*
+ * Remove a client connection from the local endpoint's tree, thereby removing
+ * it as a target for reuse for new client calls.
+ */
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+{
+	struct rxrpc_local *local = conn->params.local;
+
+	spin_lock(&local->client_conns_lock);
+	if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
+		rb_erase(&conn->client_node, &local->client_conns);
+	spin_unlock(&local->client_conns_lock);
+
+	rxrpc_put_client_connection_id(conn);
+}
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/conn_event.c
similarity index 85%
rename from net/rxrpc/ar-connevent.c
rename to net/rxrpc/conn_event.c
index 5f9563968a5b..cee0f35bc1cf 100644
--- a/net/rxrpc/ar-connevent.c
+++ b/net/rxrpc/conn_event.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
@@ -29,15 +31,17 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
 			      u32 abort_code)
 {
 	struct rxrpc_call *call;
-	struct rb_node *p;
+	int i;
 
 	_enter("{%d},%x", conn->debug_id, abort_code);
 
-	read_lock_bh(&conn->lock);
+	spin_lock(&conn->channel_lock);
 
-	for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
-		call = rb_entry(p, struct rxrpc_call, conn_node);
-		write_lock(&call->state_lock);
+	for (i = 0; i < RXRPC_MAXCALLS; i++) {
+		call = rcu_dereference_protected(
+			conn->channels[i].call,
+			lockdep_is_held(&conn->channel_lock));
+		write_lock_bh(&call->state_lock);
 		if (call->state <= RXRPC_CALL_COMPLETE) {
 			call->state = state;
 			if (state == RXRPC_CALL_LOCALLY_ABORTED) {
@@ -49,10 +53,10 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
 			}
 			rxrpc_queue_call(call);
 		}
-		write_unlock(&call->state_lock);
+		write_unlock_bh(&call->state_lock);
 	}
 
-	read_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 	_leave("");
 }
 
@@ -86,14 +90,14 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 
 	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
 
-	msg.msg_name	= &conn->trans->peer->srx.transport;
-	msg.msg_namelen	= conn->trans->peer->srx.transport_len;
+	msg.msg_name	= &conn->params.peer->srx.transport;
+	msg.msg_namelen	= conn->params.peer->srx.transport_len;
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
 
-	whdr.epoch	= htonl(conn->epoch);
-	whdr.cid	= htonl(conn->cid);
+	whdr.epoch	= htonl(conn->proto.epoch);
+	whdr.cid	= htonl(conn->proto.cid);
 	whdr.callNumber	= 0;
 	whdr.seq	= 0;
 	whdr.type	= RXRPC_PACKET_TYPE_ABORT;
@@ -101,7 +105,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 	whdr.userStatus	= 0;
 	whdr.securityIndex = conn->security_ix;
 	whdr._rsvd	= 0;
-	whdr.serviceId	= htons(conn->service_id);
+	whdr.serviceId	= htons(conn->params.service_id);
 
 	word		= htonl(conn->local_abort);
 
@@ -116,7 +120,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 	whdr.serial = htonl(serial);
 	_proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort);
 
-	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
 	if (ret < 0) {
 		_debug("sendmsg failed: %d", ret);
 		return -EAGAIN;
@@ -186,18 +190,24 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 		if (ret < 0)
 			return ret;
 
-		conn->security->prime_packet_security(conn);
-		read_lock_bh(&conn->lock);
+		ret = conn->security->prime_packet_security(conn);
+		if (ret < 0)
+			return ret;
+
+		spin_lock(&conn->channel_lock);
 		spin_lock(&conn->state_lock);
 
-		if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
-			conn->state = RXRPC_CONN_SERVER;
+		if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+			conn->state = RXRPC_CONN_SERVICE;
 			for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
-				rxrpc_call_is_secure(conn->channels[loop]);
+				rxrpc_call_is_secure(
+					rcu_dereference_protected(
+						conn->channels[loop].call,
+						lockdep_is_held(&conn->channel_lock)));
 		}
 
 		spin_unlock(&conn->state_lock);
-		read_unlock_bh(&conn->lock);
+		spin_unlock(&conn->channel_lock);
 		return 0;
 
 	default:
@@ -218,7 +228,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn)
 
 	ASSERT(conn->security_ix != 0);
 
-	if (!conn->key) {
+	if (!conn->params.key) {
 		_debug("set up security");
 		ret = rxrpc_init_server_conn_security(conn);
 		switch (ret) {
@@ -261,12 +271,8 @@ void rxrpc_process_connection(struct work_struct *work)
 
 	_enter("{%d}", conn->debug_id);
 
-	atomic_inc(&conn->usage);
-
-	if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+	if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
 		rxrpc_secure_connection(conn);
-		rxrpc_put_connection(conn);
-	}
 
 	/* go through the conn-level event packets, releasing the ref on this
 	 * connection that each one has when we've finished with it */
@@ -281,7 +287,6 @@ void rxrpc_process_connection(struct work_struct *work)
 			goto requeue_and_leave;
 		case -ECONNABORTED:
 		default:
-			rxrpc_put_connection(conn);
 			rxrpc_free_skb(skb);
 			break;
 		}
@@ -299,7 +304,6 @@ requeue_and_leave:
 protocol_error:
 	if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
 		goto requeue_and_leave;
-	rxrpc_put_connection(conn);
 	rxrpc_free_skb(skb);
 	_leave(" [EPROTO]");
 	goto out;
@@ -312,19 +316,14 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
 {
 	CHECK_SLAB_OKAY(&local->usage);
 
-	if (!atomic_inc_not_zero(&local->usage)) {
-		printk("resurrected on reject\n");
-		BUG();
-	}
-
 	skb_queue_tail(&local->reject_queue, skb);
-	rxrpc_queue_work(&local->rejecter);
+	rxrpc_queue_local(local);
 }
 
 /*
  * reject packets through the local endpoint
  */
-void rxrpc_reject_packets(struct work_struct *work)
+void rxrpc_reject_packets(struct rxrpc_local *local)
 {
 	union {
 		struct sockaddr sa;
@@ -332,16 +331,12 @@ void rxrpc_reject_packets(struct work_struct *work)
 	} sa;
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_wire_header whdr;
-	struct rxrpc_local *local;
 	struct sk_buff *skb;
 	struct msghdr msg;
 	struct kvec iov[2];
 	size_t size;
 	__be32 code;
 
-	local = container_of(work, struct rxrpc_local, rejecter);
-	rxrpc_get_local(local);
-
 	_enter("%d", local->debug_id);
 
 	iov[0].iov_base = &whdr;
@@ -393,9 +388,7 @@ void rxrpc_reject_packets(struct work_struct *work)
 		}
 
 		rxrpc_free_skb(skb);
-		rxrpc_put_local(local);
 	}
 
-	rxrpc_put_local(local);
 	_leave("");
 }
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
new file mode 100644
index 000000000000..896d84493a05
--- /dev/null
+++ b/net/rxrpc/conn_object.c
@@ -0,0 +1,340 @@
+/* RxRPC virtual connection handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * Time till a connection expires after last use (in seconds).
+ */
+unsigned int rxrpc_connection_expiry = 10 * 60;
+
+static void rxrpc_connection_reaper(struct work_struct *work);
+
+LIST_HEAD(rxrpc_connections);
+DEFINE_RWLOCK(rxrpc_connection_lock);
+static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
+
+/*
+ * allocate a new connection
+ */
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+{
+	struct rxrpc_connection *conn;
+
+	_enter("");
+
+	conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
+	if (conn) {
+		spin_lock_init(&conn->channel_lock);
+		init_waitqueue_head(&conn->channel_wq);
+		INIT_WORK(&conn->processor, &rxrpc_process_connection);
+		INIT_LIST_HEAD(&conn->link);
+		skb_queue_head_init(&conn->rx_queue);
+		conn->security = &rxrpc_no_security;
+		spin_lock_init(&conn->state_lock);
+		/* We maintain an extra ref on the connection whilst it is
+		 * on the rxrpc_connections list.
+		 */
+		atomic_set(&conn->usage, 2);
+		conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
+		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
+		conn->size_align = 4;
+		conn->header_size = sizeof(struct rxrpc_wire_header);
+	}
+
+	_leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
+	return conn;
+}
+
+/*
+ * Look up a connection in the cache by protocol parameters.
+ *
+ * If successful, a pointer to the connection is returned, but no ref is taken.
+ * NULL is returned if there is no match.
+ *
+ * The caller must be holding the RCU read lock.
+ */
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
+						   struct sk_buff *skb)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_conn_proto k;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct sockaddr_rxrpc srx;
+	struct rxrpc_peer *peer;
+
+	_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
+
+	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+		goto not_found;
+
+	k.epoch	= sp->hdr.epoch;
+	k.cid	= sp->hdr.cid & RXRPC_CIDMASK;
+
+	/* We may have to handle mixing IPv4 and IPv6 */
+	if (srx.transport.family != local->srx.transport.family) {
+		pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
+				    srx.transport.family,
+				    local->srx.transport.family);
+		goto not_found;
+	}
+
+	k.epoch	= sp->hdr.epoch;
+	k.cid	= sp->hdr.cid & RXRPC_CIDMASK;
+
+	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
+		/* We need to look up service connections by the full protocol
+		 * parameter set.  We look up the peer first as an intermediate
+		 * step and then the connection from the peer's tree.
+		 */
+		peer = rxrpc_lookup_peer_rcu(local, &srx);
+		if (!peer)
+			goto not_found;
+		conn = rxrpc_find_service_conn_rcu(peer, skb);
+		if (!conn || atomic_read(&conn->usage) == 0)
+			goto not_found;
+		_leave(" = %p", conn);
+		return conn;
+	} else {
+		/* Look up client connections by connection ID alone as their
+		 * IDs are unique for this machine.
+		 */
+		conn = idr_find(&rxrpc_client_conn_ids,
+				sp->hdr.cid >> RXRPC_CIDSHIFT);
+		if (!conn || atomic_read(&conn->usage) == 0) {
+			_debug("no conn");
+			goto not_found;
+		}
+
+		if (conn->proto.epoch != k.epoch ||
+		    conn->params.local != local)
+			goto not_found;
+
+		peer = conn->params.peer;
+		switch (srx.transport.family) {
+		case AF_INET:
+			if (peer->srx.transport.sin.sin_port !=
+			    srx.transport.sin.sin_port ||
+			    peer->srx.transport.sin.sin_addr.s_addr !=
+			    srx.transport.sin.sin_addr.s_addr)
+				goto not_found;
+			break;
+		default:
+			BUG();
+		}
+
+		_leave(" = %p", conn);
+		return conn;
+	}
+
+not_found:
+	_leave(" = NULL");
+	return NULL;
+}
+
+/*
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates.  The caller must hold the channel_lock and must release the
+ * call's ref on the connection.
+ */
+void __rxrpc_disconnect_call(struct rxrpc_call *call)
+{
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_channel *chan = &conn->channels[call->channel];
+
+	_enter("%d,%d", conn->debug_id, call->channel);
+
+	if (rcu_access_pointer(chan->call) == call) {
+		/* Save the result of the call so that we can repeat it if necessary
+		 * through the channel, whilst disposing of the actual call record.
+		 */
+		chan->last_result = call->local_abort;
+		smp_wmb();
+		chan->last_call = chan->call_id;
+		chan->call_id = chan->call_counter;
+
+		rcu_assign_pointer(chan->call, NULL);
+		atomic_inc(&conn->avail_chans);
+		wake_up(&conn->channel_wq);
+	}
+
+	_leave("");
+}
+
+/*
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates.
+ */
+void rxrpc_disconnect_call(struct rxrpc_call *call)
+{
+	struct rxrpc_connection *conn = call->conn;
+
+	spin_lock(&conn->channel_lock);
+	__rxrpc_disconnect_call(call);
+	spin_unlock(&conn->channel_lock);
+
+	call->conn = NULL;
+	rxrpc_put_connection(conn);
+}
+
+/*
+ * release a virtual connection
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+	if (!conn)
+		return;
+
+	_enter("%p{u=%d,d=%d}",
+	       conn, atomic_read(&conn->usage), conn->debug_id);
+
+	ASSERTCMP(atomic_read(&conn->usage), >, 1);
+
+	conn->put_time = ktime_get_seconds();
+	if (atomic_dec_return(&conn->usage) == 1) {
+		_debug("zombie");
+		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+	}
+
+	_leave("");
+}
+
+/*
+ * destroy a virtual connection
+ */
+static void rxrpc_destroy_connection(struct rcu_head *rcu)
+{
+	struct rxrpc_connection *conn =
+		container_of(rcu, struct rxrpc_connection, rcu);
+
+	_enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
+
+	ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+
+	_net("DESTROY CONN %d", conn->debug_id);
+
+	rxrpc_purge_queue(&conn->rx_queue);
+
+	conn->security->clear(conn);
+	key_put(conn->params.key);
+	key_put(conn->server_key);
+	rxrpc_put_peer(conn->params.peer);
+	rxrpc_put_local(conn->params.local);
+
+	kfree(conn);
+	_leave("");
+}
+
+/*
+ * reap dead connections
+ */
+static void rxrpc_connection_reaper(struct work_struct *work)
+{
+	struct rxrpc_connection *conn, *_p;
+	unsigned long reap_older_than, earliest, put_time, now;
+
+	LIST_HEAD(graveyard);
+
+	_enter("");
+
+	now = ktime_get_seconds();
+	reap_older_than =  now - rxrpc_connection_expiry;
+	earliest = ULONG_MAX;
+
+	write_lock(&rxrpc_connection_lock);
+	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+		ASSERTCMP(atomic_read(&conn->usage), >, 0);
+		if (likely(atomic_read(&conn->usage) > 1))
+			continue;
+
+		put_time = READ_ONCE(conn->put_time);
+		if (time_after(put_time, reap_older_than)) {
+			if (time_before(put_time, earliest))
+				earliest = put_time;
+			continue;
+		}
+
+		/* The usage count sits at 1 whilst the object is unused on the
+		 * list; we reduce that to 0 to make the object unavailable.
+		 */
+		if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
+			continue;
+
+		if (rxrpc_conn_is_client(conn))
+			rxrpc_unpublish_client_conn(conn);
+		else
+			rxrpc_unpublish_service_conn(conn);
+
+		list_move_tail(&conn->link, &graveyard);
+	}
+	write_unlock(&rxrpc_connection_lock);
+
+	if (earliest != ULONG_MAX) {
+		_debug("reschedule reaper %ld", (long) earliest - now);
+		ASSERTCMP(earliest, >, now);
+		rxrpc_queue_delayed_work(&rxrpc_connection_reap,
+					 (earliest - now) * HZ);
+	}
+
+	while (!list_empty(&graveyard)) {
+		conn = list_entry(graveyard.next, struct rxrpc_connection,
+				  link);
+		list_del_init(&conn->link);
+
+		ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+		skb_queue_purge(&conn->rx_queue);
+		call_rcu(&conn->rcu, rxrpc_destroy_connection);
+	}
+
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the connection records rather than waiting for them
+ * to time out
+ */
+void __exit rxrpc_destroy_all_connections(void)
+{
+	struct rxrpc_connection *conn, *_p;
+	bool leak = false;
+
+	_enter("");
+
+	rxrpc_connection_expiry = 0;
+	cancel_delayed_work(&rxrpc_connection_reap);
+	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+	flush_workqueue(rxrpc_workqueue);
+
+	write_lock(&rxrpc_connection_lock);
+	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+		pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
+		       conn, atomic_read(&conn->usage));
+		leak = true;
+	}
+	write_unlock(&rxrpc_connection_lock);
+	BUG_ON(leak);
+
+	/* Make sure the local and peer records pinned by any dying connections
+	 * are released.
+	 */
+	rcu_barrier();
+	rxrpc_destroy_client_conn_ids();
+
+	_leave("");
+}
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
new file mode 100644
index 000000000000..fd9027ccba8f
--- /dev/null
+++ b/net/rxrpc/conn_service.c
@@ -0,0 +1,230 @@
+/* Service connection management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include "ar-internal.h"
+
+/*
+ * Find a service connection under RCU conditions.
+ *
+ * We could use a hash table, but that is subject to bucket stuffing by an
+ * attacker as the client gets to pick the epoch and cid values and would know
+ * the hash function.  So, instead, we use a hash table for the peer and from
+ * that an rbtree to find the service connection.  Under ordinary circumstances
+ * it might be slower than a large hash table, but it is at least limited in
+ * depth.
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
+						     struct sk_buff *skb)
+{
+	struct rxrpc_connection *conn = NULL;
+	struct rxrpc_conn_proto k;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rb_node *p;
+	unsigned int seq = 0;
+
+	k.epoch	= sp->hdr.epoch;
+	k.cid	= sp->hdr.cid & RXRPC_CIDMASK;
+
+	do {
+		/* Unfortunately, rbtree walking doesn't give reliable results
+		 * under just the RCU read lock, so we have to check for
+		 * changes.
+		 */
+		read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
+
+		p = rcu_dereference_raw(peer->service_conns.rb_node);
+		while (p) {
+			conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+			if (conn->proto.index_key < k.index_key)
+				p = rcu_dereference_raw(p->rb_left);
+			else if (conn->proto.index_key > k.index_key)
+				p = rcu_dereference_raw(p->rb_right);
+			else
+				goto done;
+			conn = NULL;
+		}
+	} while (need_seqretry(&peer->service_conn_lock, seq));
+
+done:
+	done_seqretry(&peer->service_conn_lock, seq);
+	_leave(" = %d", conn ? conn->debug_id : -1);
+	return conn;
+}
+
+/*
+ * Insert a service connection into a peer's tree, thereby making it a target
+ * for incoming packets.
+ */
+static struct rxrpc_connection *
+rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+			   struct rxrpc_connection *conn)
+{
+	struct rxrpc_connection *cursor = NULL;
+	struct rxrpc_conn_proto k = conn->proto;
+	struct rb_node **pp, *parent;
+
+	write_seqlock_bh(&peer->service_conn_lock);
+
+	pp = &peer->service_conns.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		cursor = rb_entry(parent,
+				  struct rxrpc_connection, service_node);
+
+		if (cursor->proto.index_key < k.index_key)
+			pp = &(*pp)->rb_left;
+		else if (cursor->proto.index_key > k.index_key)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_conn;
+	}
+
+	rb_link_node_rcu(&conn->service_node, parent, pp);
+	rb_insert_color(&conn->service_node, &peer->service_conns);
+conn_published:
+	set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
+	write_sequnlock_bh(&peer->service_conn_lock);
+	_leave(" = %d [new]", conn->debug_id);
+	return conn;
+
+found_extant_conn:
+	if (atomic_read(&cursor->usage) == 0)
+		goto replace_old_connection;
+	write_sequnlock_bh(&peer->service_conn_lock);
+	/* We should not be able to get here.  rxrpc_incoming_connection() is
+	 * called in a non-reentrant context, so there can't be a race to
+	 * insert a new connection.
+	 */
+	BUG();
+
+replace_old_connection:
+	/* The old connection is from an outdated epoch. */
+	_debug("replace conn");
+	rb_replace_node_rcu(&cursor->service_node,
+			    &conn->service_node,
+			    &peer->service_conns);
+	clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &cursor->flags);
+	goto conn_published;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
+						   struct sockaddr_rxrpc *srx,
+						   struct sk_buff *skb)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_peer *peer;
+	const char *new = "old";
+
+	_enter("");
+
+	peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+	if (!peer) {
+		_debug("no peer");
+		return ERR_PTR(-EBUSY);
+	}
+
+	ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
+
+	rcu_read_lock();
+	peer = rxrpc_lookup_peer_rcu(local, srx);
+	if (peer) {
+		conn = rxrpc_find_service_conn_rcu(peer, skb);
+		if (conn) {
+			if (sp->hdr.securityIndex != conn->security_ix)
+				goto security_mismatch_rcu;
+			if (rxrpc_get_connection_maybe(conn))
+				goto found_extant_connection_rcu;
+
+			/* The conn has expired but we can't remove it without
+			 * the appropriate lock, so we attempt to replace it
+			 * when we have a new candidate.
+			 */
+		}
+
+		if (!rxrpc_get_peer_maybe(peer))
+			peer = NULL;
+	}
+	rcu_read_unlock();
+
+	if (!peer) {
+		peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+		if (!peer)
+			goto enomem;
+	}
+
+	/* We don't have a matching record yet. */
+	conn = rxrpc_alloc_connection(GFP_NOIO);
+	if (!conn)
+		goto enomem_peer;
+
+	conn->proto.epoch	= sp->hdr.epoch;
+	conn->proto.cid		= sp->hdr.cid & RXRPC_CIDMASK;
+	conn->params.local	= local;
+	conn->params.peer	= peer;
+	conn->params.service_id	= sp->hdr.serviceId;
+	conn->security_ix	= sp->hdr.securityIndex;
+	conn->out_clientflag	= 0;
+	conn->state		= RXRPC_CONN_SERVICE;
+	if (conn->params.service_id)
+		conn->state	= RXRPC_CONN_SERVICE_UNSECURED;
+
+	rxrpc_get_local(local);
+
+	write_lock(&rxrpc_connection_lock);
+	list_add_tail(&conn->link, &rxrpc_connections);
+	write_unlock(&rxrpc_connection_lock);
+
+	/* Make the connection a target for incoming packets. */
+	rxrpc_publish_service_conn(peer, conn);
+
+	new = "new";
+
+success:
+	_net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
+	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+	return conn;
+
+found_extant_connection_rcu:
+	rcu_read_unlock();
+	goto success;
+
+security_mismatch_rcu:
+	rcu_read_unlock();
+	_leave(" = -EKEYREJECTED");
+	return ERR_PTR(-EKEYREJECTED);
+
+enomem_peer:
+	rxrpc_put_peer(peer);
+enomem:
+	_leave(" = -ENOMEM");
+	return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Remove the service connection from the peer's tree, thereby removing it as a
+ * target for incoming packets.
+ */
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn)
+{
+	struct rxrpc_peer *peer = conn->params.peer;
+
+	write_seqlock_bh(&peer->service_conn_lock);
+	if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags))
+		rb_erase(&conn->service_node, &peer->service_conns);
+	write_sequnlock_bh(&peer->service_conn_lock);
+}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/input.c
similarity index 90%
rename from net/rxrpc/ar-input.c
rename to net/rxrpc/input.c
index 6ff97412a0bb..991a20d25093 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/input.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
@@ -358,7 +360,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 	case RXRPC_PACKET_TYPE_BUSY:
 		_proto("Rx BUSY %%%u", sp->hdr.serial);
 
-		if (call->conn->out_clientflag)
+		if (rxrpc_conn_is_service(call->conn))
 			goto protocol_error;
 
 		write_lock_bh(&call->state_lock);
@@ -474,7 +476,7 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
 		sp->hdr.seq	+= 1;
 		sp->hdr.serial	+= 1;
 		sp->hdr.flags	= jhdr.flags;
-		sp->hdr._rsvd	= jhdr._rsvd;
+		sp->hdr._rsvd	= ntohs(jhdr._rsvd);
 
 		_proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
 
@@ -531,7 +533,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
 	case RXRPC_CALL_COMPLETE:
 	case RXRPC_CALL_CLIENT_FINAL_ACK:
 		/* complete server call */
-		if (call->conn->in_clientflag)
+		if (rxrpc_conn_is_service(call->conn))
 			goto dead_call;
 		/* resend last packet of a completed call */
 		_debug("final ack again");
@@ -558,7 +560,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
 dead_call:
 	if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
 		skb->priority = RX_CALL_DEAD;
-		rxrpc_reject_packet(call->conn->trans->local, skb);
+		rxrpc_reject_packet(call->conn->params.local, skb);
 		goto unlock;
 	}
 free_unlock:
@@ -573,14 +575,13 @@ done:
  * post connection-level events to the connection
  * - this includes challenges, responses and some aborts
  */
-static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 				      struct sk_buff *skb)
 {
 	_enter("%p,%p", conn, skb);
 
-	atomic_inc(&conn->usage);
 	skb_queue_tail(&conn->rx_queue, skb);
-	rxrpc_queue_conn(conn);
+	return rxrpc_queue_conn(conn);
 }
 
 /*
@@ -592,9 +593,8 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
 {
 	_enter("%p,%p", local, skb);
 
-	atomic_inc(&local->usage);
 	skb_queue_tail(&local->event_queue, skb);
-	rxrpc_queue_work(&local->event_processor);
+	rxrpc_queue_local(local);
 }
 
 /*
@@ -626,42 +626,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
 	return 0;
 }
 
-static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
-					       struct sk_buff *skb,
-					       struct rxrpc_skb_priv *sp)
-{
-	struct rxrpc_peer *peer;
-	struct rxrpc_transport *trans;
-	struct rxrpc_connection *conn;
-
-	peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr,
-				udp_hdr(skb)->source);
-	if (IS_ERR(peer))
-		goto cant_find_conn;
-
-	trans = rxrpc_find_transport(local, peer);
-	rxrpc_put_peer(peer);
-	if (!trans)
-		goto cant_find_conn;
-
-	conn = rxrpc_find_connection(trans, &sp->hdr);
-	rxrpc_put_transport(trans);
-	if (!conn)
-		goto cant_find_conn;
-
-	return conn;
-cant_find_conn:
-	return NULL;
-}
-
 /*
  * handle data received on the local endpoint
  * - may be called in interrupt context
+ *
+ * The socket is locked by the caller and this prevents the socket from being
+ * shut down and the local endpoint from going away, thus sk_user_data will not
+ * be cleared until this function returns.
  */
 void rxrpc_data_ready(struct sock *sk)
 {
+	struct rxrpc_connection *conn;
 	struct rxrpc_skb_priv *sp;
-	struct rxrpc_local *local;
+	struct rxrpc_local *local = sk->sk_user_data;
 	struct sk_buff *skb;
 	int ret;
 
@@ -669,21 +646,8 @@ void rxrpc_data_ready(struct sock *sk)
 
 	ASSERT(!irqs_disabled());
 
-	read_lock_bh(&rxrpc_local_lock);
-	local = sk->sk_user_data;
-	if (local && atomic_read(&local->usage) > 0)
-		rxrpc_get_local(local);
-	else
-		local = NULL;
-	read_unlock_bh(&rxrpc_local_lock);
-	if (!local) {
-		_leave(" [local dead]");
-		return;
-	}
-
 	skb = skb_recv_datagram(sk, 0, 1, &ret);
 	if (!skb) {
-		rxrpc_put_local(local);
 		if (ret == -EAGAIN)
 			return;
 		_debug("UDP socket error %d", ret);
@@ -697,7 +661,6 @@ void rxrpc_data_ready(struct sock *sk)
 	/* we'll probably need to checksum it (didn't call sock_recvmsg) */
 	if (skb_checksum_complete(skb)) {
 		rxrpc_free_skb(skb);
-		rxrpc_put_local(local);
 		__UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
 		_leave(" [CSUM failed]");
 		return;
@@ -732,48 +695,49 @@ void rxrpc_data_ready(struct sock *sk)
 		rxrpc_post_packet_to_local(local, skb);
 		goto out;
 	}
-	
+
 	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
 	    (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
 		goto bad_message;
 
-	if (sp->hdr.callNumber == 0) {
-		/* This is a connection-level packet. These should be
-		 * fairly rare, so the extra overhead of looking them up the
-		 * old-fashioned way doesn't really hurt */
-		struct rxrpc_connection *conn;
+	rcu_read_lock();
 
-		conn = rxrpc_conn_from_local(local, skb, sp);
-		if (!conn)
-			goto cant_route_call;
+retry_find_conn:
+	conn = rxrpc_find_connection_rcu(local, skb);
+	if (!conn)
+		goto cant_route_call;
 
+	if (sp->hdr.callNumber == 0) {
+		/* Connection-level packet */
 		_debug("CONN %p {%d}", conn, conn->debug_id);
-		rxrpc_post_packet_to_conn(conn, skb);
-		rxrpc_put_connection(conn);
+		if (!rxrpc_post_packet_to_conn(conn, skb))
+			goto retry_find_conn;
 	} else {
-		struct rxrpc_call *call;
+		/* Call-bound packets are routed by connection channel. */
+		unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+		struct rxrpc_channel *chan = &conn->channels[channel];
+		struct rxrpc_call *call = rcu_dereference(chan->call);
 
-		call = rxrpc_find_call_hash(&sp->hdr, local,
-					    AF_INET, &ip_hdr(skb)->saddr);
-		if (call)
-			rxrpc_post_packet_to_call(call, skb);
-		else
+		if (!call || atomic_read(&call->usage) == 0)
 			goto cant_route_call;
+
+		rxrpc_post_packet_to_call(call, skb);
 	}
 
+	rcu_read_unlock();
 out:
-	rxrpc_put_local(local);
 	return;
 
 cant_route_call:
+	rcu_read_unlock();
+
 	_debug("can't route call");
 	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
 	    sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
 		if (sp->hdr.seq == 1) {
 			_debug("first packet");
 			skb_queue_tail(&local->accept_queue, skb);
-			rxrpc_queue_work(&local->acceptor);
-			rxrpc_put_local(local);
+			rxrpc_queue_work(&local->processor);
 			_leave(" [incoming]");
 			return;
 		}
@@ -786,13 +750,11 @@ cant_route_call:
 		_debug("reject type %d",sp->hdr.type);
 		rxrpc_reject_packet(local, skb);
 	}
-	rxrpc_put_local(local);
 	_leave(" [no call]");
 	return;
 
 bad_message:
 	skb->priority = RX_PROTOCOL_ERROR;
 	rxrpc_reject_packet(local, skb);
-	rxrpc_put_local(local);
 	_leave(" [badmsg]");
 }
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index e571403613c1..c21ad213b337 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -17,11 +17,12 @@ static int none_init_connection_security(struct rxrpc_connection *conn)
 	return 0;
 }
 
-static void none_prime_packet_security(struct rxrpc_connection *conn)
+static int none_prime_packet_security(struct rxrpc_connection *conn)
 {
+	return 0;
 }
 
-static int none_secure_packet(const struct rxrpc_call *call,
+static int none_secure_packet(struct rxrpc_call *call,
 			       struct sk_buff *skb,
 			       size_t data_size,
 			       void *sechdr)
@@ -29,7 +30,7 @@ static int none_secure_packet(const struct rxrpc_call *call,
 	return 0;
 }
 
-static int none_verify_packet(const struct rxrpc_call *call,
+static int none_verify_packet(struct rxrpc_call *call,
 			       struct sk_buff *skb,
 			       u32 *_abort_code)
 {
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/key.c
similarity index 99%
rename from net/rxrpc/ar-key.c
rename to net/rxrpc/key.c
index 1021b4c0bdd2..18c737a61d80 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/key.c
@@ -12,6 +12,8 @@
  *	"afs@CAMBRIDGE.REDHAT.COM>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <crypto/skcipher.h>
 #include <linux/module.h>
 #include <linux/net.h>
@@ -800,7 +802,7 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token)
 				rxrpc_rxk5_free(token->k5);
 			break;
 		default:
-			printk(KERN_ERR "Unknown token type %x on rxrpc key\n",
+			pr_err("Unknown token type %x on rxrpc key\n",
 			       token->security_index);
 			BUG();
 		}
@@ -985,7 +987,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 	if (ret < 0)
 		goto error;
 
-	conn->key = key;
+	conn->params.key = key;
 	_leave(" = 0 [%d]", key_serial(key));
 	return 0;
 
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
new file mode 100644
index 000000000000..31a3f86ef2f6
--- /dev/null
+++ b/net/rxrpc/local_event.c
@@ -0,0 +1,116 @@
+/* AF_RXRPC local endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <generated/utsrelease.h>
+#include "ar-internal.h"
+
+static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
+
+/*
+ * Reply to a version request
+ */
+static void rxrpc_send_version_request(struct rxrpc_local *local,
+				       struct rxrpc_host_header *hdr,
+				       struct sk_buff *skb)
+{
+	struct rxrpc_wire_header whdr;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct sockaddr_in sin;
+	struct msghdr msg;
+	struct kvec iov[2];
+	size_t len;
+	int ret;
+
+	_enter("");
+
+	sin.sin_family = AF_INET;
+	sin.sin_port = udp_hdr(skb)->source;
+	sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+
+	msg.msg_name	= &sin;
+	msg.msg_namelen	= sizeof(sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	whdr.epoch	= htonl(sp->hdr.epoch);
+	whdr.cid	= htonl(sp->hdr.cid);
+	whdr.callNumber	= htonl(sp->hdr.callNumber);
+	whdr.seq	= 0;
+	whdr.serial	= 0;
+	whdr.type	= RXRPC_PACKET_TYPE_VERSION;
+	whdr.flags	= RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
+	whdr.userStatus	= 0;
+	whdr.securityIndex = 0;
+	whdr._rsvd	= 0;
+	whdr.serviceId	= htons(sp->hdr.serviceId);
+
+	iov[0].iov_base	= &whdr;
+	iov[0].iov_len	= sizeof(whdr);
+	iov[1].iov_base	= (char *)rxrpc_version_string;
+	iov[1].iov_len	= sizeof(rxrpc_version_string);
+
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	_proto("Tx VERSION (reply)");
+
+	ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
+	if (ret < 0)
+		_debug("sendmsg failed: %d", ret);
+
+	_leave("");
+}
+
+/*
+ * Process event packets targetted at a local endpoint.
+ */
+void rxrpc_process_local_events(struct rxrpc_local *local)
+{
+	struct sk_buff *skb;
+	char v;
+
+	_enter("");
+
+	skb = skb_dequeue(&local->event_queue);
+	if (skb) {
+		struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+		_debug("{%d},{%u}", local->debug_id, sp->hdr.type);
+
+		switch (sp->hdr.type) {
+		case RXRPC_PACKET_TYPE_VERSION:
+			if (skb_copy_bits(skb, 0, &v, 1) < 0)
+				return;
+			_proto("Rx VERSION { %02x }", v);
+			if (v == 0)
+				rxrpc_send_version_request(local, &sp->hdr, skb);
+			break;
+
+		default:
+			/* Just ignore anything we don't understand */
+			break;
+		}
+
+		rxrpc_free_skb(skb);
+	}
+
+	_leave("");
+}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
new file mode 100644
index 000000000000..a753796fbe8f
--- /dev/null
+++ b/net/rxrpc/local_object.c
@@ -0,0 +1,390 @@
+/* Local endpoint object management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <linux/hashtable.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_local_processor(struct work_struct *);
+static void rxrpc_local_rcu(struct rcu_head *);
+
+static DEFINE_MUTEX(rxrpc_local_mutex);
+static LIST_HEAD(rxrpc_local_endpoints);
+
+/*
+ * Compare a local to an address.  Return -ve, 0 or +ve to indicate less than,
+ * same or greater than.
+ *
+ * We explicitly don't compare the RxRPC service ID as we want to reject
+ * conflicting uses by differing services.  Further, we don't want to share
+ * addresses with different options (IPv6), so we don't compare those bits
+ * either.
+ */
+static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
+				const struct sockaddr_rxrpc *srx)
+{
+	long diff;
+
+	diff = ((local->srx.transport_type - srx->transport_type) ?:
+		(local->srx.transport_len - srx->transport_len) ?:
+		(local->srx.transport.family - srx->transport.family));
+	if (diff != 0)
+		return diff;
+
+	switch (srx->transport.family) {
+	case AF_INET:
+		/* If the choice of UDP port is left up to the transport, then
+		 * the endpoint record doesn't match.
+		 */
+		return ((u16 __force)local->srx.transport.sin.sin_port -
+			(u16 __force)srx->transport.sin.sin_port) ?:
+			memcmp(&local->srx.transport.sin.sin_addr,
+			       &srx->transport.sin.sin_addr,
+			       sizeof(struct in_addr));
+	default:
+		BUG();
+	}
+}
+
+/*
+ * Allocate a new local endpoint.
+ */
+static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_local *local;
+
+	local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
+	if (local) {
+		atomic_set(&local->usage, 1);
+		INIT_LIST_HEAD(&local->link);
+		INIT_WORK(&local->processor, rxrpc_local_processor);
+		INIT_LIST_HEAD(&local->services);
+		init_rwsem(&local->defrag_sem);
+		skb_queue_head_init(&local->accept_queue);
+		skb_queue_head_init(&local->reject_queue);
+		skb_queue_head_init(&local->event_queue);
+		local->client_conns = RB_ROOT;
+		spin_lock_init(&local->client_conns_lock);
+		spin_lock_init(&local->lock);
+		rwlock_init(&local->services_lock);
+		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
+		memcpy(&local->srx, srx, sizeof(*srx));
+	}
+
+	_leave(" = %p", local);
+	return local;
+}
+
+/*
+ * create the local socket
+ * - must be called with rxrpc_local_mutex locked
+ */
+static int rxrpc_open_socket(struct rxrpc_local *local)
+{
+	struct sock *sock;
+	int ret, opt;
+
+	_enter("%p{%d}", local, local->srx.transport_type);
+
+	/* create a socket to represent the local endpoint */
+	ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
+			       IPPROTO_UDP, &local->socket);
+	if (ret < 0) {
+		_leave(" = %d [socket]", ret);
+		return ret;
+	}
+
+	/* if a local address was supplied then bind it */
+	if (local->srx.transport_len > sizeof(sa_family_t)) {
+		_debug("bind");
+		ret = kernel_bind(local->socket,
+				  (struct sockaddr *)&local->srx.transport,
+				  local->srx.transport_len);
+		if (ret < 0) {
+			_debug("bind failed %d", ret);
+			goto error;
+		}
+	}
+
+	/* we want to receive ICMP errors */
+	opt = 1;
+	ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
+				(char *) &opt, sizeof(opt));
+	if (ret < 0) {
+		_debug("setsockopt failed");
+		goto error;
+	}
+
+	/* we want to set the don't fragment bit */
+	opt = IP_PMTUDISC_DO;
+	ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
+				(char *) &opt, sizeof(opt));
+	if (ret < 0) {
+		_debug("setsockopt failed");
+		goto error;
+	}
+
+	/* set the socket up */
+	sock = local->socket->sk;
+	sock->sk_user_data	= local;
+	sock->sk_data_ready	= rxrpc_data_ready;
+	sock->sk_error_report	= rxrpc_error_report;
+	_leave(" = 0");
+	return 0;
+
+error:
+	kernel_sock_shutdown(local->socket, SHUT_RDWR);
+	local->socket->sk->sk_user_data = NULL;
+	sock_release(local->socket);
+	local->socket = NULL;
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Look up or create a new local endpoint using the specified local address.
+ */
+struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_local *local;
+	struct list_head *cursor;
+	const char *age;
+	long diff;
+	int ret;
+
+	if (srx->transport.family == AF_INET) {
+		_enter("{%d,%u,%pI4+%hu}",
+		       srx->transport_type,
+		       srx->transport.family,
+		       &srx->transport.sin.sin_addr,
+		       ntohs(srx->transport.sin.sin_port));
+	} else {
+		_enter("{%d,%u}",
+		       srx->transport_type,
+		       srx->transport.family);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
+	mutex_lock(&rxrpc_local_mutex);
+
+	for (cursor = rxrpc_local_endpoints.next;
+	     cursor != &rxrpc_local_endpoints;
+	     cursor = cursor->next) {
+		local = list_entry(cursor, struct rxrpc_local, link);
+
+		diff = rxrpc_local_cmp_key(local, srx);
+		if (diff < 0)
+			continue;
+		if (diff > 0)
+			break;
+
+		/* Services aren't allowed to share transport sockets, so
+		 * reject that here.  It is possible that the object is dying -
+		 * but it may also still have the local transport address that
+		 * we want bound.
+		 */
+		if (srx->srx_service) {
+			local = NULL;
+			goto addr_in_use;
+		}
+
+		/* Found a match.  We replace a dying object.  Attempting to
+		 * bind the transport socket may still fail if we're attempting
+		 * to use a local address that the dying object is still using.
+		 */
+		if (!rxrpc_get_local_maybe(local)) {
+			cursor = cursor->next;
+			list_del_init(&local->link);
+			break;
+		}
+
+		age = "old";
+		goto found;
+	}
+
+	local = rxrpc_alloc_local(srx);
+	if (!local)
+		goto nomem;
+
+	ret = rxrpc_open_socket(local);
+	if (ret < 0)
+		goto sock_error;
+
+	list_add_tail(&local->link, cursor);
+	age = "new";
+
+found:
+	mutex_unlock(&rxrpc_local_mutex);
+
+	_net("LOCAL %s %d {%d,%u,%pI4+%hu}",
+	     age,
+	     local->debug_id,
+	     local->srx.transport_type,
+	     local->srx.transport.family,
+	     &local->srx.transport.sin.sin_addr,
+	     ntohs(local->srx.transport.sin.sin_port));
+
+	_leave(" = %p", local);
+	return local;
+
+nomem:
+	ret = -ENOMEM;
+sock_error:
+	mutex_unlock(&rxrpc_local_mutex);
+	kfree(local);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+
+addr_in_use:
+	mutex_unlock(&rxrpc_local_mutex);
+	_leave(" = -EADDRINUSE");
+	return ERR_PTR(-EADDRINUSE);
+}
+
+/*
+ * A local endpoint reached its end of life.
+ */
+void __rxrpc_put_local(struct rxrpc_local *local)
+{
+	_enter("%d", local->debug_id);
+	rxrpc_queue_work(&local->processor);
+}
+
+/*
+ * Destroy a local endpoint's socket and then hand the record to RCU to dispose
+ * of.
+ *
+ * Closing the socket cannot be done from bottom half context or RCU callback
+ * context because it might sleep.
+ */
+static void rxrpc_local_destroyer(struct rxrpc_local *local)
+{
+	struct socket *socket = local->socket;
+
+	_enter("%d", local->debug_id);
+
+	/* We can get a race between an incoming call packet queueing the
+	 * processor again and the work processor starting the destruction
+	 * process which will shut down the UDP socket.
+	 */
+	if (local->dead) {
+		_leave(" [already dead]");
+		return;
+	}
+	local->dead = true;
+
+	mutex_lock(&rxrpc_local_mutex);
+	list_del_init(&local->link);
+	mutex_unlock(&rxrpc_local_mutex);
+
+	ASSERT(RB_EMPTY_ROOT(&local->client_conns));
+	ASSERT(list_empty(&local->services));
+
+	if (socket) {
+		local->socket = NULL;
+		kernel_sock_shutdown(socket, SHUT_RDWR);
+		socket->sk->sk_user_data = NULL;
+		sock_release(socket);
+	}
+
+	/* At this point, there should be no more packets coming in to the
+	 * local endpoint.
+	 */
+	rxrpc_purge_queue(&local->accept_queue);
+	rxrpc_purge_queue(&local->reject_queue);
+	rxrpc_purge_queue(&local->event_queue);
+
+	_debug("rcu local %d", local->debug_id);
+	call_rcu(&local->rcu, rxrpc_local_rcu);
+}
+
+/*
+ * Process events on an endpoint
+ */
+static void rxrpc_local_processor(struct work_struct *work)
+{
+	struct rxrpc_local *local =
+		container_of(work, struct rxrpc_local, processor);
+	bool again;
+
+	_enter("%d", local->debug_id);
+
+	do {
+		again = false;
+		if (atomic_read(&local->usage) == 0)
+			return rxrpc_local_destroyer(local);
+
+		if (!skb_queue_empty(&local->accept_queue)) {
+			rxrpc_accept_incoming_calls(local);
+			again = true;
+		}
+
+		if (!skb_queue_empty(&local->reject_queue)) {
+			rxrpc_reject_packets(local);
+			again = true;
+		}
+
+		if (!skb_queue_empty(&local->event_queue)) {
+			rxrpc_process_local_events(local);
+			again = true;
+		}
+	} while (again);
+}
+
+/*
+ * Destroy a local endpoint after the RCU grace period expires.
+ */
+static void rxrpc_local_rcu(struct rcu_head *rcu)
+{
+	struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu);
+
+	_enter("%d", local->debug_id);
+
+	ASSERT(!work_pending(&local->processor));
+
+	_net("DESTROY LOCAL %d", local->debug_id);
+	kfree(local);
+	_leave("");
+}
+
+/*
+ * Verify the local endpoint list is empty by this point.
+ */
+void __exit rxrpc_destroy_all_locals(void)
+{
+	struct rxrpc_local *local;
+
+	_enter("");
+
+	flush_workqueue(rxrpc_workqueue);
+
+	if (!list_empty(&rxrpc_local_endpoints)) {
+		mutex_lock(&rxrpc_local_mutex);
+		list_for_each_entry(local, &rxrpc_local_endpoints, link) {
+			pr_err("AF_RXRPC: Leaked local %p {%d}\n",
+			       local, atomic_read(&local->usage));
+		}
+		mutex_unlock(&rxrpc_local_mutex);
+		BUG();
+	}
+
+	rcu_barrier();
+}
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 1afe9876e79f..bdc5e42fe600 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -14,6 +14,12 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
+/*
+ * The maximum listening backlog queue size that may be set on a socket by
+ * listen().
+ */
+unsigned int rxrpc_max_backlog __read_mostly = 10;
+
 /*
  * How long to wait before scheduling ACK generation after seeing a
  * packet with RXRPC_REQUEST_ACK set (in jiffies).
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/output.c
similarity index 82%
rename from net/rxrpc/ar-output.c
rename to net/rxrpc/output.c
index 51cb10062a8d..f4bda06b7d2d 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/output.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/net.h>
 #include <linux/gfp.h>
 #include <linux/skbuff.h>
@@ -30,13 +32,14 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 /*
  * extract control messages from the sendmsg() control buffer
  */
-static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
 			      unsigned long *user_call_ID,
 			      enum rxrpc_command *command,
 			      u32 *abort_code,
-			      bool server)
+			      bool *_exclusive)
 {
 	struct cmsghdr *cmsg;
+	bool got_user_ID = false;
 	int len;
 
 	*command = RXRPC_CMD_SEND_DATA;
@@ -68,6 +71,7 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 					CMSG_DATA(cmsg);
 			}
 			_debug("User Call ID %lx", *user_call_ID);
+			got_user_ID = true;
 			break;
 
 		case RXRPC_ABORT:
@@ -88,15 +92,20 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 			*command = RXRPC_CMD_ACCEPT;
 			if (len != 0)
 				return -EINVAL;
-			if (!server)
-				return -EISCONN;
 			break;
 
+		case RXRPC_EXCLUSIVE_CALL:
+			*_exclusive = true;
+			if (len != 0)
+				return -EINVAL;
+			break;
 		default:
 			return -EINVAL;
 		}
 	}
 
+	if (!got_user_ID)
+		return -EINVAL;
 	_leave(" = 0");
 	return 0;
 }
@@ -123,56 +132,79 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
 	write_unlock_bh(&call->state_lock);
 }
 
+/*
+ * Create a new client call for sendmsg().
+ */
+static struct rxrpc_call *
+rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+				  unsigned long user_call_ID, bool exclusive)
+{
+	struct rxrpc_conn_parameters cp;
+	struct rxrpc_call *call;
+	struct key *key;
+
+	DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
+
+	_enter("");
+
+	if (!msg->msg_name)
+		return ERR_PTR(-EDESTADDRREQ);
+
+	key = rx->key;
+	if (key && !rx->key->payload.data[0])
+		key = NULL;
+
+	memset(&cp, 0, sizeof(cp));
+	cp.local		= rx->local;
+	cp.key			= rx->key;
+	cp.security_level	= rx->min_sec_level;
+	cp.exclusive		= rx->exclusive | exclusive;
+	cp.service_id		= srx->srx_service;
+	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+
+	_leave(" = %p\n", call);
+	return call;
+}
+
 /*
  * send a message forming part of a client call through an RxRPC socket
  * - caller holds the socket locked
  * - the socket may be either a client socket or a server socket
  */
-int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
-			 struct msghdr *msg, size_t len)
+int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 {
-	struct rxrpc_conn_bundle *bundle;
 	enum rxrpc_command cmd;
 	struct rxrpc_call *call;
 	unsigned long user_call_ID = 0;
-	struct key *key;
-	u16 service_id;
+	bool exclusive = false;
 	u32 abort_code = 0;
 	int ret;
 
 	_enter("");
 
-	ASSERT(trans != NULL);
-
-	ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
-				 false);
+	ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
+				 &exclusive);
 	if (ret < 0)
 		return ret;
 
-	bundle = NULL;
-	if (trans) {
-		service_id = rx->srx.srx_service;
-		if (msg->msg_name) {
-			DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx,
-					 msg->msg_name);
-			service_id = srx->srx_service;
-		}
-		key = rx->key;
-		if (key && !rx->key->payload.data[0])
-			key = NULL;
-		bundle = rxrpc_get_bundle(rx, trans, key, service_id,
-					  GFP_KERNEL);
-		if (IS_ERR(bundle))
-			return PTR_ERR(bundle);
+	if (cmd == RXRPC_CMD_ACCEPT) {
+		if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
+			return -EINVAL;
+		call = rxrpc_accept_call(rx, user_call_ID);
+		if (IS_ERR(call))
+			return PTR_ERR(call);
+		rxrpc_put_call(call);
+		return 0;
 	}
 
-	call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
-				     abort_code == 0, GFP_KERNEL);
-	if (trans)
-		rxrpc_put_bundle(trans, bundle);
-	if (IS_ERR(call)) {
-		_leave(" = %ld", PTR_ERR(call));
-		return PTR_ERR(call);
+	call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
+	if (!call) {
+		if (cmd != RXRPC_CMD_SEND_DATA)
+			return -EBADSLT;
+		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
+							 exclusive);
+		if (IS_ERR(call))
+			return PTR_ERR(call);
 	}
 
 	_debug("CALL %d USR %lx ST %d on CONN %p",
@@ -180,14 +212,21 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
 
 	if (call->state >= RXRPC_CALL_COMPLETE) {
 		/* it's too late for this call */
-		ret = -ESHUTDOWN;
+		ret = -ECONNRESET;
 	} else if (cmd == RXRPC_CMD_SEND_ABORT) {
 		rxrpc_send_abort(call, abort_code);
+		ret = 0;
 	} else if (cmd != RXRPC_CMD_SEND_DATA) {
 		ret = -EINVAL;
-	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+	} else if (!call->in_clientflag &&
+		   call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
 		/* request phase complete for this client call */
 		ret = -EPROTO;
+	} else if (call->in_clientflag &&
+		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+		/* Reply phase not begun or not complete for service call. */
+		ret = -EPROTO;
 	} else {
 		ret = rxrpc_send_data(rx, call, msg, len);
 	}
@@ -265,71 +304,10 @@ void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
 
 EXPORT_SYMBOL(rxrpc_kernel_abort_call);
 
-/*
- * send a message through a server socket
- * - caller holds the socket locked
- */
-int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
-{
-	enum rxrpc_command cmd;
-	struct rxrpc_call *call;
-	unsigned long user_call_ID = 0;
-	u32 abort_code = 0;
-	int ret;
-
-	_enter("");
-
-	ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
-				 true);
-	if (ret < 0)
-		return ret;
-
-	if (cmd == RXRPC_CMD_ACCEPT) {
-		call = rxrpc_accept_call(rx, user_call_ID);
-		if (IS_ERR(call))
-			return PTR_ERR(call);
-		rxrpc_put_call(call);
-		return 0;
-	}
-
-	call = rxrpc_find_server_call(rx, user_call_ID);
-	if (!call)
-		return -EBADSLT;
-	if (call->state >= RXRPC_CALL_COMPLETE) {
-		ret = -ESHUTDOWN;
-		goto out;
-	}
-
-	switch (cmd) {
-	case RXRPC_CMD_SEND_DATA:
-		if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-		    call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-		    call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-			/* Tx phase not yet begun for this call */
-			ret = -EPROTO;
-			break;
-		}
-
-		ret = rxrpc_send_data(rx, call, msg, len);
-		break;
-
-	case RXRPC_CMD_SEND_ABORT:
-		rxrpc_send_abort(call, abort_code);
-		break;
-	default:
-		BUG();
-	}
-
-	out:
-	rxrpc_put_call(call);
-	_leave(" = %d", ret);
-	return ret;
-}
-
 /*
  * send a packet through the transport endpoint
  */
-int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
+int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
 {
 	struct kvec iov[1];
 	struct msghdr msg;
@@ -340,30 +318,30 @@ int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
 	iov[0].iov_base = skb->head;
 	iov[0].iov_len = skb->len;
 
-	msg.msg_name = &trans->peer->srx.transport.sin;
-	msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
+	msg.msg_name = &conn->params.peer->srx.transport;
+	msg.msg_namelen = conn->params.peer->srx.transport_len;
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags = 0;
 
 	/* send the packet with the don't fragment bit set if we currently
 	 * think it's small enough */
-	if (skb->len - sizeof(struct rxrpc_wire_header) < trans->peer->maxdata) {
-		down_read(&trans->local->defrag_sem);
+	if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) {
+		down_read(&conn->params.local->defrag_sem);
 		/* send the packet by UDP
 		 * - returns -EMSGSIZE if UDP would have to fragment the packet
 		 *   to go out of the interface
 		 *   - in which case, we'll have processed the ICMP error
 		 *     message and update the peer record
 		 */
-		ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+		ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
 				     iov[0].iov_len);
 
-		up_read(&trans->local->defrag_sem);
+		up_read(&conn->params.local->defrag_sem);
 		if (ret == -EMSGSIZE)
 			goto send_fragmentable;
 
-		_leave(" = %d [%u]", ret, trans->peer->maxdata);
+		_leave(" = %d [%u]", ret, conn->params.peer->maxdata);
 		return ret;
 	}
 
@@ -371,21 +349,28 @@ send_fragmentable:
 	/* attempt to send this message with fragmentation enabled */
 	_debug("send fragment");
 
-	down_write(&trans->local->defrag_sem);
-	opt = IP_PMTUDISC_DONT;
-	ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
-				(char *) &opt, sizeof(opt));
-	if (ret == 0) {
-		ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
-				     iov[0].iov_len);
-
-		opt = IP_PMTUDISC_DO;
-		kernel_setsockopt(trans->local->socket, SOL_IP,
-				  IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
+	down_write(&conn->params.local->defrag_sem);
+
+	switch (conn->params.local->srx.transport.family) {
+	case AF_INET:
+		opt = IP_PMTUDISC_DONT;
+		ret = kernel_setsockopt(conn->params.local->socket,
+					SOL_IP, IP_MTU_DISCOVER,
+					(char *)&opt, sizeof(opt));
+		if (ret == 0) {
+			ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
+					     iov[0].iov_len);
+
+			opt = IP_PMTUDISC_DO;
+			kernel_setsockopt(conn->params.local->socket, SOL_IP,
+					  IP_MTU_DISCOVER,
+					  (char *)&opt, sizeof(opt));
+		}
+		break;
 	}
 
-	up_write(&trans->local->defrag_sem);
-	_leave(" = %d [frag %u]", ret, trans->peer->maxdata);
+	up_write(&conn->params.local->defrag_sem);
+	_leave(" = %d [frag %u]", ret, conn->params.peer->maxdata);
 	return ret;
 }
 
@@ -497,7 +482,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
 		/* the packet may be freed by rxrpc_process_call() before this
 		 * returns */
-		ret = rxrpc_send_packet(call->conn->trans, skb);
+		ret = rxrpc_send_data_packet(call->conn, skb);
 		_net("sent skb %p", skb);
 	} else {
 		_debug("failed to delete ACK timer");
@@ -583,7 +568,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 					goto maybe_error;
 			}
 
-			max = call->conn->trans->peer->maxdata;
+			max = call->conn->params.peer->maxdata;
 			max -= call->conn->security_size;
 			max &= ~(call->conn->size_align - 1UL);
 
@@ -674,7 +659,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
 			seq = atomic_inc_return(&call->sequence);
 
-			sp->hdr.epoch	= conn->epoch;
+			sp->hdr.epoch	= conn->proto.epoch;
 			sp->hdr.cid	= call->cid;
 			sp->hdr.callNumber = call->call_id;
 			sp->hdr.seq	= seq;
@@ -717,7 +702,9 @@ out:
 call_aborted:
 	rxrpc_free_skb(skb);
 	if (call->state == RXRPC_CALL_NETWORK_ERROR)
-		ret = call->conn->trans->peer->net_error;
+		ret = call->error_report < RXRPC_LOCAL_ERROR_OFFSET ?
+			call->error_report :
+			call->error_report - RXRPC_LOCAL_ERROR_OFFSET;
 	else
 		ret = -ECONNABORTED;
 	_leave(" = %d", ret);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
new file mode 100644
index 000000000000..8940674b5e08
--- /dev/null
+++ b/net/rxrpc/peer_event.c
@@ -0,0 +1,281 @@
+/* Peer event handling, typically ICMP messages.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
+
+/*
+ * Find the peer associated with an ICMP packet.
+ */
+static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
+						     const struct sk_buff *skb)
+{
+	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+	struct sockaddr_rxrpc srx;
+
+	_enter("");
+
+	memset(&srx, 0, sizeof(srx));
+	srx.transport_type = local->srx.transport_type;
+	srx.transport.family = local->srx.transport.family;
+
+	/* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
+	 * versa?
+	 */
+	switch (srx.transport.family) {
+	case AF_INET:
+		srx.transport.sin.sin_port = serr->port;
+		srx.transport_len = sizeof(struct sockaddr_in);
+		switch (serr->ee.ee_origin) {
+		case SO_EE_ORIGIN_ICMP:
+			_net("Rx ICMP");
+			memcpy(&srx.transport.sin.sin_addr,
+			       skb_network_header(skb) + serr->addr_offset,
+			       sizeof(struct in_addr));
+			break;
+		case SO_EE_ORIGIN_ICMP6:
+			_net("Rx ICMP6 on v4 sock");
+			memcpy(&srx.transport.sin.sin_addr,
+			       skb_network_header(skb) + serr->addr_offset + 12,
+			       sizeof(struct in_addr));
+			break;
+		default:
+			memcpy(&srx.transport.sin.sin_addr, &ip_hdr(skb)->saddr,
+			       sizeof(struct in_addr));
+			break;
+		}
+		break;
+
+	default:
+		BUG();
+	}
+
+	return rxrpc_lookup_peer_rcu(local, &srx);
+}
+
+/*
+ * Handle an MTU/fragmentation problem.
+ */
+static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr)
+{
+	u32 mtu = serr->ee.ee_info;
+
+	_net("Rx ICMP Fragmentation Needed (%d)", mtu);
+
+	/* wind down the local interface MTU */
+	if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
+		peer->if_mtu = mtu;
+		_net("I/F MTU %u", mtu);
+	}
+
+	if (mtu == 0) {
+		/* they didn't give us a size, estimate one */
+		mtu = peer->if_mtu;
+		if (mtu > 1500) {
+			mtu >>= 1;
+			if (mtu < 1500)
+				mtu = 1500;
+		} else {
+			mtu -= 100;
+			if (mtu < peer->hdrsize)
+				mtu = peer->hdrsize + 4;
+		}
+	}
+
+	if (mtu < peer->mtu) {
+		spin_lock_bh(&peer->lock);
+		peer->mtu = mtu;
+		peer->maxdata = peer->mtu - peer->hdrsize;
+		spin_unlock_bh(&peer->lock);
+		_net("Net MTU %u (maxdata %u)",
+		     peer->mtu, peer->maxdata);
+	}
+}
+
+/*
+ * Handle an error received on the local endpoint.
+ */
+void rxrpc_error_report(struct sock *sk)
+{
+	struct sock_exterr_skb *serr;
+	struct rxrpc_local *local = sk->sk_user_data;
+	struct rxrpc_peer *peer;
+	struct sk_buff *skb;
+
+	_enter("%p{%d}", sk, local->debug_id);
+
+	skb = sock_dequeue_err_skb(sk);
+	if (!skb) {
+		_leave("UDP socket errqueue empty");
+		return;
+	}
+	serr = SKB_EXT_ERR(skb);
+	if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
+		_leave("UDP empty message");
+		kfree_skb(skb);
+		return;
+	}
+
+	rxrpc_new_skb(skb);
+
+	rcu_read_lock();
+	peer = rxrpc_lookup_peer_icmp_rcu(local, skb);
+	if (peer && !rxrpc_get_peer_maybe(peer))
+		peer = NULL;
+	if (!peer) {
+		rcu_read_unlock();
+		rxrpc_free_skb(skb);
+		_leave(" [no peer]");
+		return;
+	}
+
+	if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+	     serr->ee.ee_type == ICMP_DEST_UNREACH &&
+	     serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
+		rxrpc_adjust_mtu(peer, serr);
+		rcu_read_unlock();
+		rxrpc_free_skb(skb);
+		rxrpc_put_peer(peer);
+		_leave(" [MTU update]");
+		return;
+	}
+
+	rxrpc_store_error(peer, serr);
+	rcu_read_unlock();
+	rxrpc_free_skb(skb);
+
+	/* The ref we obtained is passed off to the work item */
+	rxrpc_queue_work(&peer->error_distributor);
+	_leave("");
+}
+
+/*
+ * Map an error report to error codes on the peer record.
+ */
+static void rxrpc_store_error(struct rxrpc_peer *peer,
+			      struct sock_exterr_skb *serr)
+{
+	struct sock_extended_err *ee;
+	int err;
+
+	_enter("");
+
+	ee = &serr->ee;
+
+	_net("Rx Error o=%d t=%d c=%d e=%d",
+	     ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
+
+	err = ee->ee_errno;
+
+	switch (ee->ee_origin) {
+	case SO_EE_ORIGIN_ICMP:
+		switch (ee->ee_type) {
+		case ICMP_DEST_UNREACH:
+			switch (ee->ee_code) {
+			case ICMP_NET_UNREACH:
+				_net("Rx Received ICMP Network Unreachable");
+				break;
+			case ICMP_HOST_UNREACH:
+				_net("Rx Received ICMP Host Unreachable");
+				break;
+			case ICMP_PORT_UNREACH:
+				_net("Rx Received ICMP Port Unreachable");
+				break;
+			case ICMP_NET_UNKNOWN:
+				_net("Rx Received ICMP Unknown Network");
+				break;
+			case ICMP_HOST_UNKNOWN:
+				_net("Rx Received ICMP Unknown Host");
+				break;
+			default:
+				_net("Rx Received ICMP DestUnreach code=%u",
+				     ee->ee_code);
+				break;
+			}
+			break;
+
+		case ICMP_TIME_EXCEEDED:
+			_net("Rx Received ICMP TTL Exceeded");
+			break;
+
+		default:
+			_proto("Rx Received ICMP error { type=%u code=%u }",
+			       ee->ee_type, ee->ee_code);
+			break;
+		}
+		break;
+
+	case SO_EE_ORIGIN_NONE:
+	case SO_EE_ORIGIN_LOCAL:
+		_proto("Rx Received local error { error=%d }", err);
+		err += RXRPC_LOCAL_ERROR_OFFSET;
+		break;
+
+	case SO_EE_ORIGIN_ICMP6:
+	default:
+		_proto("Rx Received error report { orig=%u }", ee->ee_origin);
+		break;
+	}
+
+	peer->error_report = err;
+}
+
+/*
+ * Distribute an error that occurred on a peer
+ */
+void rxrpc_peer_error_distributor(struct work_struct *work)
+{
+	struct rxrpc_peer *peer =
+		container_of(work, struct rxrpc_peer, error_distributor);
+	struct rxrpc_call *call;
+	int error_report;
+
+	_enter("");
+
+	error_report = READ_ONCE(peer->error_report);
+
+	_debug("ISSUE ERROR %d", error_report);
+
+	spin_lock_bh(&peer->lock);
+
+	while (!hlist_empty(&peer->error_targets)) {
+		call = hlist_entry(peer->error_targets.first,
+				   struct rxrpc_call, error_link);
+		hlist_del_init(&call->error_link);
+
+		write_lock(&call->state_lock);
+		if (call->state != RXRPC_CALL_COMPLETE &&
+		    call->state < RXRPC_CALL_NETWORK_ERROR) {
+			call->error_report = error_report;
+			call->state = RXRPC_CALL_NETWORK_ERROR;
+			set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
+			rxrpc_queue_call(call);
+		}
+		write_unlock(&call->state_lock);
+	}
+
+	spin_unlock_bh(&peer->lock);
+
+	rxrpc_put_peer(peer);
+	_leave("");
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
new file mode 100644
index 000000000000..538e9831c699
--- /dev/null
+++ b/net/rxrpc/peer_object.c
@@ -0,0 +1,315 @@
+/* RxRPC remote transport endpoint record management
+ *
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/slab.h>
+#include <linux/hashtable.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include "ar-internal.h"
+
+static DEFINE_HASHTABLE(rxrpc_peer_hash, 10);
+static DEFINE_SPINLOCK(rxrpc_peer_hash_lock);
+
+/*
+ * Hash a peer key.
+ */
+static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local,
+					 const struct sockaddr_rxrpc *srx)
+{
+	const u16 *p;
+	unsigned int i, size;
+	unsigned long hash_key;
+
+	_enter("");
+
+	hash_key = (unsigned long)local / __alignof__(*local);
+	hash_key += srx->transport_type;
+	hash_key += srx->transport_len;
+	hash_key += srx->transport.family;
+
+	switch (srx->transport.family) {
+	case AF_INET:
+		hash_key += (u16 __force)srx->transport.sin.sin_port;
+		size = sizeof(srx->transport.sin.sin_addr);
+		p = (u16 *)&srx->transport.sin.sin_addr;
+		break;
+	default:
+		WARN(1, "AF_RXRPC: Unsupported transport address family\n");
+		return 0;
+	}
+
+	/* Step through the peer address in 16-bit portions for speed */
+	for (i = 0; i < size; i += sizeof(*p), p++)
+		hash_key += *p;
+
+	_leave(" 0x%lx", hash_key);
+	return hash_key;
+}
+
+/*
+ * Compare a peer to a key.  Return -ve, 0 or +ve to indicate less than, same
+ * or greater than.
+ *
+ * Unfortunately, the primitives in linux/hashtable.h don't allow for sorted
+ * buckets and mid-bucket insertion, so we don't make full use of this
+ * information at this point.
+ */
+static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer,
+			       struct rxrpc_local *local,
+			       const struct sockaddr_rxrpc *srx,
+			       unsigned long hash_key)
+{
+	long diff;
+
+	diff = ((peer->hash_key - hash_key) ?:
+		((unsigned long)peer->local - (unsigned long)local) ?:
+		(peer->srx.transport_type - srx->transport_type) ?:
+		(peer->srx.transport_len - srx->transport_len) ?:
+		(peer->srx.transport.family - srx->transport.family));
+	if (diff != 0)
+		return diff;
+
+	switch (srx->transport.family) {
+	case AF_INET:
+		return ((u16 __force)peer->srx.transport.sin.sin_port -
+			(u16 __force)srx->transport.sin.sin_port) ?:
+			memcmp(&peer->srx.transport.sin.sin_addr,
+			       &srx->transport.sin.sin_addr,
+			       sizeof(struct in_addr));
+	default:
+		BUG();
+	}
+}
+
+/*
+ * Look up a remote transport endpoint for the specified address using RCU.
+ */
+static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
+	struct rxrpc_local *local,
+	const struct sockaddr_rxrpc *srx,
+	unsigned long hash_key)
+{
+	struct rxrpc_peer *peer;
+
+	hash_for_each_possible_rcu(rxrpc_peer_hash, peer, hash_link, hash_key) {
+		if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) {
+			if (atomic_read(&peer->usage) == 0)
+				return NULL;
+			return peer;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Look up a remote transport endpoint for the specified address using RCU.
+ */
+struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
+					 const struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_peer *peer;
+	unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
+
+	peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+	if (peer) {
+		switch (srx->transport.family) {
+		case AF_INET:
+			_net("PEER %d {%d,%u,%pI4+%hu}",
+			     peer->debug_id,
+			     peer->srx.transport_type,
+			     peer->srx.transport.family,
+			     &peer->srx.transport.sin.sin_addr,
+			     ntohs(peer->srx.transport.sin.sin_port));
+			break;
+		}
+
+		_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+	}
+	return peer;
+}
+
+/*
+ * assess the MTU size for the network interface through which this peer is
+ * reached
+ */
+static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
+{
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	peer->if_mtu = 1500;
+
+	rt = ip_route_output_ports(&init_net, &fl4, NULL,
+				   peer->srx.transport.sin.sin_addr.s_addr, 0,
+				   htons(7000), htons(7001),
+				   IPPROTO_UDP, 0, 0);
+	if (IS_ERR(rt)) {
+		_leave(" [route err %ld]", PTR_ERR(rt));
+		return;
+	}
+
+	peer->if_mtu = dst_mtu(&rt->dst);
+	dst_release(&rt->dst);
+
+	_leave(" [if_mtu %u]", peer->if_mtu);
+}
+
+/*
+ * Allocate a peer.
+ */
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
+{
+	struct rxrpc_peer *peer;
+
+	_enter("");
+
+	peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
+	if (peer) {
+		atomic_set(&peer->usage, 1);
+		peer->local = local;
+		INIT_HLIST_HEAD(&peer->error_targets);
+		INIT_WORK(&peer->error_distributor,
+			  &rxrpc_peer_error_distributor);
+		peer->service_conns = RB_ROOT;
+		seqlock_init(&peer->service_conn_lock);
+		spin_lock_init(&peer->lock);
+		peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+	}
+
+	_leave(" = %p", peer);
+	return peer;
+}
+
+/*
+ * Set up a new peer.
+ */
+static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
+					    struct sockaddr_rxrpc *srx,
+					    unsigned long hash_key,
+					    gfp_t gfp)
+{
+	struct rxrpc_peer *peer;
+
+	_enter("");
+
+	peer = rxrpc_alloc_peer(local, gfp);
+	if (peer) {
+		peer->hash_key = hash_key;
+		memcpy(&peer->srx, srx, sizeof(*srx));
+
+		rxrpc_assess_MTU_size(peer);
+		peer->mtu = peer->if_mtu;
+
+		if (srx->transport.family == AF_INET) {
+			peer->hdrsize = sizeof(struct iphdr);
+			switch (srx->transport_type) {
+			case SOCK_DGRAM:
+				peer->hdrsize += sizeof(struct udphdr);
+				break;
+			default:
+				BUG();
+				break;
+			}
+		} else {
+			BUG();
+		}
+
+		peer->hdrsize += sizeof(struct rxrpc_wire_header);
+		peer->maxdata = peer->mtu - peer->hdrsize;
+	}
+
+	_leave(" = %p", peer);
+	return peer;
+}
+
+/*
+ * obtain a remote transport endpoint for the specified address
+ */
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
+				     struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+	struct rxrpc_peer *peer, *candidate;
+	unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
+
+	_enter("{%d,%d,%pI4+%hu}",
+	       srx->transport_type,
+	       srx->transport_len,
+	       &srx->transport.sin.sin_addr,
+	       ntohs(srx->transport.sin.sin_port));
+
+	/* search the peer list first */
+	rcu_read_lock();
+	peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+	if (peer && !rxrpc_get_peer_maybe(peer))
+		peer = NULL;
+	rcu_read_unlock();
+
+	if (!peer) {
+		/* The peer is not yet present in hash - create a candidate
+		 * for a new record and then redo the search.
+		 */
+		candidate = rxrpc_create_peer(local, srx, hash_key, gfp);
+		if (!candidate) {
+			_leave(" = NULL [nomem]");
+			return NULL;
+		}
+
+		spin_lock(&rxrpc_peer_hash_lock);
+
+		/* Need to check that we aren't racing with someone else */
+		peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+		if (peer && !rxrpc_get_peer_maybe(peer))
+			peer = NULL;
+		if (!peer)
+			hash_add_rcu(rxrpc_peer_hash,
+				     &candidate->hash_link, hash_key);
+
+		spin_unlock(&rxrpc_peer_hash_lock);
+
+		if (peer)
+			kfree(candidate);
+		else
+			peer = candidate;
+	}
+
+	_net("PEER %d {%d,%pI4+%hu}",
+	     peer->debug_id,
+	     peer->srx.transport_type,
+	     &peer->srx.transport.sin.sin_addr,
+	     ntohs(peer->srx.transport.sin.sin_port));
+
+	_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+	return peer;
+}
+
+/*
+ * Discard a ref on a remote peer record.
+ */
+void __rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+	ASSERT(hlist_empty(&peer->error_targets));
+
+	spin_lock(&rxrpc_peer_hash_lock);
+	hash_del_rcu(&peer->hash_link);
+	spin_unlock(&rxrpc_peer_hash_lock);
+
+	kfree_rcu(peer, rcu);
+}
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/proc.c
similarity index 75%
rename from net/rxrpc/ar-proc.c
rename to net/rxrpc/proc.c
index 225163bc658d..ced5f07444e5 100644
--- a/net/rxrpc/ar-proc.c
+++ b/net/rxrpc/proc.c
@@ -14,15 +14,15 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-static const char *const rxrpc_conn_states[] = {
-	[RXRPC_CONN_UNUSED]		= "Unused  ",
-	[RXRPC_CONN_CLIENT]		= "Client  ",
-	[RXRPC_CONN_SERVER_UNSECURED]	= "SvUnsec ",
-	[RXRPC_CONN_SERVER_CHALLENGING]	= "SvChall ",
-	[RXRPC_CONN_SERVER]		= "SvSecure",
-	[RXRPC_CONN_REMOTELY_ABORTED]	= "RmtAbort",
-	[RXRPC_CONN_LOCALLY_ABORTED]	= "LocAbort",
-	[RXRPC_CONN_NETWORK_ERROR]	= "NetError",
+static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
+	[RXRPC_CONN_UNUSED]			= "Unused  ",
+	[RXRPC_CONN_CLIENT]			= "Client  ",
+	[RXRPC_CONN_SERVICE_UNSECURED]		= "SvUnsec ",
+	[RXRPC_CONN_SERVICE_CHALLENGING]	= "SvChall ",
+	[RXRPC_CONN_SERVICE]			= "SvSecure",
+	[RXRPC_CONN_REMOTELY_ABORTED]		= "RmtAbort",
+	[RXRPC_CONN_LOCALLY_ABORTED]		= "LocAbort",
+	[RXRPC_CONN_NETWORK_ERROR]		= "NetError",
 };
 
 /*
@@ -46,7 +46,7 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
 
 static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 {
-	struct rxrpc_transport *trans;
+	struct rxrpc_connection *conn;
 	struct rxrpc_call *call;
 	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
 
@@ -59,25 +59,28 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 	}
 
 	call = list_entry(v, struct rxrpc_call, link);
-	trans = call->conn->trans;
 
 	sprintf(lbuff, "%pI4:%u",
-		&trans->local->srx.transport.sin.sin_addr,
-		ntohs(trans->local->srx.transport.sin.sin_port));
+		&call->local->srx.transport.sin.sin_addr,
+		ntohs(call->local->srx.transport.sin.sin_port));
 
-	sprintf(rbuff, "%pI4:%u",
-		&trans->peer->srx.transport.sin.sin_addr,
-		ntohs(trans->peer->srx.transport.sin.sin_port));
+	conn = call->conn;
+	if (conn)
+		sprintf(rbuff, "%pI4:%u",
+			&conn->params.peer->srx.transport.sin.sin_addr,
+			ntohs(conn->params.peer->srx.transport.sin.sin_port));
+	else
+		strcpy(rbuff, "no_connection");
 
 	seq_printf(seq,
 		   "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
 		   " %-8.8s %08x %lx\n",
 		   lbuff,
 		   rbuff,
-		   call->conn->service_id,
+		   call->service_id,
 		   call->cid,
 		   call->call_id,
-		   call->conn->in_clientflag ? "Svc" : "Clt",
+		   call->in_clientflag ? "Svc" : "Clt",
 		   atomic_read(&call->usage),
 		   rxrpc_call_states[call->state],
 		   call->remote_abort ?: call->local_abort,
@@ -129,41 +132,38 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
 static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 {
 	struct rxrpc_connection *conn;
-	struct rxrpc_transport *trans;
 	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
 
 	if (v == &rxrpc_connections) {
 		seq_puts(seq,
 			 "Proto Local                  Remote                "
-			 " SvID ConnID   Calls    End Use State    Key     "
+			 " SvID ConnID   End Use State    Key     "
 			 " Serial   ISerial\n"
 			 );
 		return 0;
 	}
 
 	conn = list_entry(v, struct rxrpc_connection, link);
-	trans = conn->trans;
 
 	sprintf(lbuff, "%pI4:%u",
-		&trans->local->srx.transport.sin.sin_addr,
-		ntohs(trans->local->srx.transport.sin.sin_port));
+		&conn->params.local->srx.transport.sin.sin_addr,
+		ntohs(conn->params.local->srx.transport.sin.sin_port));
 
 	sprintf(rbuff, "%pI4:%u",
-		&trans->peer->srx.transport.sin.sin_addr,
-		ntohs(trans->peer->srx.transport.sin.sin_port));
+		&conn->params.peer->srx.transport.sin.sin_addr,
+		ntohs(conn->params.peer->srx.transport.sin.sin_port));
 
 	seq_printf(seq,
-		   "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
+		   "UDP   %-22.22s %-22.22s %4x %08x %s %3u"
 		   " %s %08x %08x %08x\n",
 		   lbuff,
 		   rbuff,
-		   conn->service_id,
-		   conn->cid,
-		   conn->call_counter,
-		   conn->in_clientflag ? "Svc" : "Clt",
+		   conn->params.service_id,
+		   conn->proto.cid,
+		   rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
 		   atomic_read(&conn->usage),
 		   rxrpc_conn_states[conn->state],
-		   key_serial(conn->key),
+		   key_serial(conn->params.key),
 		   atomic_read(&conn->serial),
 		   atomic_read(&conn->hi_serial));
 
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/recvmsg.c
similarity index 97%
rename from net/rxrpc/ar-recvmsg.c
rename to net/rxrpc/recvmsg.c
index 160f0927aa3e..a3fa2ed85d63 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/export.h>
@@ -145,9 +147,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		if (!continue_call) {
 			if (msg->msg_name) {
 				size_t len =
-					sizeof(call->conn->trans->peer->srx);
+					sizeof(call->conn->params.peer->srx);
 				memcpy(msg->msg_name,
-				       &call->conn->trans->peer->srx, len);
+				       &call->conn->params.peer->srx, len);
 				msg->msg_namelen = len;
 			}
 			sock_recv_timestamp(msg, &rx->sk, skb);
@@ -203,7 +205,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		/* we transferred the whole data packet */
 		if (sp->hdr.flags & RXRPC_LAST_PACKET) {
 			_debug("last");
-			if (call->conn->out_clientflag) {
+			if (rxrpc_conn_is_client(call->conn)) {
 				 /* last byte of reply received */
 				ret = copied;
 				goto terminal_message;
@@ -307,7 +309,7 @@ receive_non_data_message:
 			       &abort_code);
 		break;
 	default:
-		pr_err("RxRPC: Unknown packet mark %u\n", skb->mark);
+		pr_err("Unknown packet mark %u\n", skb->mark);
 		BUG();
 		break;
 	}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index bab56ed649ba..63afa9e9cc08 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <crypto/skcipher.h>
 #include <linux/module.h>
 #include <linux/net.h>
@@ -56,9 +58,9 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 	struct rxrpc_key_token *token;
 	int ret;
 
-	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
+	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key));
 
-	token = conn->key->payload.data[0];
+	token = conn->params.key->payload.data[0];
 	conn->security_ix = token->security_index;
 
 	ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
@@ -72,7 +74,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 				   sizeof(token->kad->session_key)) < 0)
 		BUG();
 
-	switch (conn->security_level) {
+	switch (conn->params.security_level) {
 	case RXRPC_SECURITY_PLAIN:
 		break;
 	case RXRPC_SECURITY_AUTH:
@@ -101,43 +103,43 @@ error:
  * prime the encryption state with the invariant parts of a connection's
  * description
  */
-static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 {
 	struct rxrpc_key_token *token;
 	SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
-	struct scatterlist sg[2];
+	struct scatterlist sg;
 	struct rxrpc_crypt iv;
-	struct {
-		__be32 x[4];
-	} tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+	__be32 *tmpbuf;
+	size_t tmpsize = 4 * sizeof(__be32);
 
 	_enter("");
 
-	if (!conn->key)
-		return;
+	if (!conn->params.key)
+		return 0;
 
-	token = conn->key->payload.data[0];
-	memcpy(&iv, token->kad->session_key, sizeof(iv));
+	tmpbuf = kmalloc(tmpsize, GFP_KERNEL);
+	if (!tmpbuf)
+		return -ENOMEM;
 
-	tmpbuf.x[0] = htonl(conn->epoch);
-	tmpbuf.x[1] = htonl(conn->cid);
-	tmpbuf.x[2] = 0;
-	tmpbuf.x[3] = htonl(conn->security_ix);
+	token = conn->params.key->payload.data[0];
+	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
-	sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
-	sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	tmpbuf[0] = htonl(conn->proto.epoch);
+	tmpbuf[1] = htonl(conn->proto.cid);
+	tmpbuf[2] = 0;
+	tmpbuf[3] = htonl(conn->security_ix);
 
+	sg_init_one(&sg, tmpbuf, tmpsize);
 	skcipher_request_set_tfm(req, conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+	skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 
-	memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
-	ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
-
-	_leave("");
+	memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv));
+	kfree(tmpbuf);
+	_leave(" = 0");
+	return 0;
 }
 
 /*
@@ -150,12 +152,9 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
 {
 	struct rxrpc_skb_priv *sp;
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+	struct rxkad_level1_hdr hdr;
 	struct rxrpc_crypt iv;
-	struct scatterlist sg[2];
-	struct {
-		struct rxkad_level1_hdr hdr;
-		__be32	first;	/* first four bytes of data and padding */
-	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	struct scatterlist sg;
 	u16 check;
 
 	sp = rxrpc_skb(skb);
@@ -165,24 +164,19 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
 	check = sp->hdr.seq ^ sp->hdr.callNumber;
 	data_size |= (u32)check << 16;
 
-	tmpbuf.hdr.data_size = htonl(data_size);
-	memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+	hdr.data_size = htonl(data_size);
+	memcpy(sechdr, &hdr, sizeof(hdr));
 
 	/* start the encryption afresh */
 	memset(&iv, 0, sizeof(iv));
 
-	sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
-	sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
-
+	sg_init_one(&sg, sechdr, 8);
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 
-	memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
-
 	_leave(" = 0");
 	return 0;
 }
@@ -196,8 +190,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 				       void *sechdr)
 {
 	const struct rxrpc_key_token *token;
-	struct rxkad_level2_hdr rxkhdr
-		__attribute__((aligned(8))); /* must be all on one page */
+	struct rxkad_level2_hdr rxkhdr;
 	struct rxrpc_skb_priv *sp;
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_crypt iv;
@@ -216,18 +209,16 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 
 	rxkhdr.data_size = htonl(data_size | (u32)check << 16);
 	rxkhdr.checksum = 0;
+	memcpy(sechdr, &rxkhdr, sizeof(rxkhdr));
 
 	/* encrypt from the session key */
-	token = call->conn->key->payload.data[0];
+	token = call->conn->params.key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
-	sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
-
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
-
+	skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
 	crypto_skcipher_encrypt(req);
 
 	/* we want to encrypt the skbuff in-place */
@@ -241,9 +232,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 
 	sg_init_table(sg, nsg);
 	skb_to_sgvec(skb, sg, 0, len);
-
 	skcipher_request_set_crypt(req, sg, sg, len, iv.x);
-
 	crypto_skcipher_encrypt(req);
 
 	_leave(" = 0");
@@ -257,7 +246,7 @@ out:
 /*
  * checksum an RxRPC packet header
  */
-static int rxkad_secure_packet(const struct rxrpc_call *call,
+static int rxkad_secure_packet(struct rxrpc_call *call,
 			       struct sk_buff *skb,
 			       size_t data_size,
 			       void *sechdr)
@@ -265,23 +254,20 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
 	struct rxrpc_skb_priv *sp;
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_crypt iv;
-	struct scatterlist sg[2];
-	struct {
-		__be32 x[2];
-	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	struct scatterlist sg;
 	u32 x, y;
 	int ret;
 
 	sp = rxrpc_skb(skb);
 
 	_enter("{%d{%x}},{#%u},%zu,",
-	       call->debug_id, key_serial(call->conn->key), sp->hdr.seq,
-	       data_size);
+	       call->debug_id, key_serial(call->conn->params.key),
+	       sp->hdr.seq, data_size);
 
 	if (!call->conn->cipher)
 		return 0;
 
-	ret = key_validate(call->conn->key);
+	ret = key_validate(call->conn->params.key);
 	if (ret < 0)
 		return ret;
 
@@ -291,26 +277,23 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
 	/* calculate the security checksum */
 	x = call->channel << (32 - RXRPC_CIDSHIFT);
 	x |= sp->hdr.seq & 0x3fffffff;
-	tmpbuf.x[0] = htonl(sp->hdr.callNumber);
-	tmpbuf.x[1] = htonl(x);
-
-	sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
-	sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	call->crypto_buf[0] = htonl(sp->hdr.callNumber);
+	call->crypto_buf[1] = htonl(x);
 
+	sg_init_one(&sg, call->crypto_buf, 8);
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 
-	y = ntohl(tmpbuf.x[1]);
+	y = ntohl(call->crypto_buf[1]);
 	y = (y >> 16) & 0xffff;
 	if (y == 0)
 		y = 1; /* zero checksums are not permitted */
 	sp->hdr.cksum = y;
 
-	switch (call->conn->security_level) {
+	switch (call->conn->params.security_level) {
 	case RXRPC_SECURITY_PLAIN:
 		ret = 0;
 		break;
@@ -365,7 +348,6 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
-
 	crypto_skcipher_decrypt(req);
 	skcipher_request_zero(req);
 
@@ -444,13 +426,12 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	/* decrypt from the session key */
-	token = call->conn->key->payload.data[0];
+	token = call->conn->params.key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
-
 	crypto_skcipher_decrypt(req);
 	skcipher_request_zero(req);
 	if (sg != _sg)
@@ -496,17 +477,14 @@ nomem:
 /*
  * verify the security on a received packet
  */
-static int rxkad_verify_packet(const struct rxrpc_call *call,
+static int rxkad_verify_packet(struct rxrpc_call *call,
 			       struct sk_buff *skb,
 			       u32 *_abort_code)
 {
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_crypt iv;
-	struct scatterlist sg[2];
-	struct {
-		__be32 x[2];
-	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	struct scatterlist sg;
 	u16 cksum;
 	u32 x, y;
 	int ret;
@@ -514,7 +492,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
 	sp = rxrpc_skb(skb);
 
 	_enter("{%d{%x}},{#%u}",
-	       call->debug_id, key_serial(call->conn->key), sp->hdr.seq);
+	       call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq);
 
 	if (!call->conn->cipher)
 		return 0;
@@ -531,20 +509,17 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
 	/* validate the security checksum */
 	x = call->channel << (32 - RXRPC_CIDSHIFT);
 	x |= sp->hdr.seq & 0x3fffffff;
-	tmpbuf.x[0] = htonl(call->call_id);
-	tmpbuf.x[1] = htonl(x);
-
-	sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
-	sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	call->crypto_buf[0] = htonl(call->call_id);
+	call->crypto_buf[1] = htonl(x);
 
+	sg_init_one(&sg, call->crypto_buf, 8);
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 
-	y = ntohl(tmpbuf.x[1]);
+	y = ntohl(call->crypto_buf[1]);
 	cksum = (y >> 16) & 0xffff;
 	if (cksum == 0)
 		cksum = 1; /* zero checksums are not permitted */
@@ -555,7 +530,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
 		return -EPROTO;
 	}
 
-	switch (call->conn->security_level) {
+	switch (call->conn->params.security_level) {
 	case RXRPC_SECURITY_PLAIN:
 		ret = 0;
 		break;
@@ -587,9 +562,9 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	u32 serial;
 	int ret;
 
-	_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
 
-	ret = key_validate(conn->key);
+	ret = key_validate(conn->params.key);
 	if (ret < 0)
 		return ret;
 
@@ -600,14 +575,14 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	challenge.min_level	= htonl(0);
 	challenge.__padding	= 0;
 
-	msg.msg_name	= &conn->trans->peer->srx.transport.sin;
-	msg.msg_namelen	= sizeof(conn->trans->peer->srx.transport.sin);
+	msg.msg_name	= &conn->params.peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(conn->params.peer->srx.transport.sin);
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
 
-	whdr.epoch	= htonl(conn->epoch);
-	whdr.cid	= htonl(conn->cid);
+	whdr.epoch	= htonl(conn->proto.epoch);
+	whdr.cid	= htonl(conn->proto.cid);
 	whdr.callNumber	= 0;
 	whdr.seq	= 0;
 	whdr.type	= RXRPC_PACKET_TYPE_CHALLENGE;
@@ -615,7 +590,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	whdr.userStatus	= 0;
 	whdr.securityIndex = conn->security_ix;
 	whdr._rsvd	= 0;
-	whdr.serviceId	= htons(conn->service_id);
+	whdr.serviceId	= htons(conn->params.service_id);
 
 	iov[0].iov_base	= &whdr;
 	iov[0].iov_len	= sizeof(whdr);
@@ -628,7 +603,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	whdr.serial = htonl(serial);
 	_proto("Tx CHALLENGE %%%u", serial);
 
-	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
 	if (ret < 0) {
 		_debug("sendmsg failed: %d", ret);
 		return -EAGAIN;
@@ -655,8 +630,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
 
 	_enter("");
 
-	msg.msg_name	= &conn->trans->peer->srx.transport.sin;
-	msg.msg_namelen	= sizeof(conn->trans->peer->srx.transport.sin);
+	msg.msg_name	= &conn->params.peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(conn->params.peer->srx.transport.sin);
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
@@ -682,7 +657,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
 	whdr.serial = htonl(serial);
 	_proto("Tx RESPONSE %%%u", serial);
 
-	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
+	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len);
 	if (ret < 0) {
 		_debug("sendmsg failed: %d", ret);
 		return -EAGAIN;
@@ -707,29 +682,6 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response)
 	response->encrypted.checksum = htonl(csum);
 }
 
-/*
- * load a scatterlist with a potentially split-page buffer
- */
-static void rxkad_sg_set_buf2(struct scatterlist sg[2],
-			      void *buf, size_t buflen)
-{
-	int nsg = 1;
-
-	sg_init_table(sg, 2);
-
-	sg_set_buf(&sg[0], buf, buflen);
-	if (sg[0].offset + buflen > PAGE_SIZE) {
-		/* the buffer was split over two pages */
-		sg[0].length = PAGE_SIZE - sg[0].offset;
-		sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
-		nsg++;
-	}
-
-	sg_mark_end(&sg[nsg - 1]);
-
-	ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
-}
-
 /*
  * encrypt the response packet
  */
@@ -739,17 +691,16 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
 {
 	SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
 	struct rxrpc_crypt iv;
-	struct scatterlist sg[2];
+	struct scatterlist sg[1];
 
 	/* continue encrypting from where we left off */
 	memcpy(&iv, s2->session_key, sizeof(iv));
 
-	rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
 	skcipher_request_set_tfm(req, conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 }
@@ -769,14 +720,14 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 	u32 version, nonce, min_level, abort_code;
 	int ret;
 
-	_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
 
-	if (!conn->key) {
+	if (!conn->params.key) {
 		_leave(" = -EPROTO [no key]");
 		return -EPROTO;
 	}
 
-	ret = key_validate(conn->key);
+	ret = key_validate(conn->params.key);
 	if (ret < 0) {
 		*_abort_code = RXKADEXPIRED;
 		return ret;
@@ -799,31 +750,27 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 		goto protocol_error;
 
 	abort_code = RXKADLEVELFAIL;
-	if (conn->security_level < min_level)
+	if (conn->params.security_level < min_level)
 		goto protocol_error;
 
-	token = conn->key->payload.data[0];
+	token = conn->params.key->payload.data[0];
 
 	/* build the response packet */
 	memset(&resp, 0, sizeof(resp));
 
 	resp.version			= htonl(RXKAD_VERSION);
-	resp.encrypted.epoch		= htonl(conn->epoch);
-	resp.encrypted.cid		= htonl(conn->cid);
+	resp.encrypted.epoch		= htonl(conn->proto.epoch);
+	resp.encrypted.cid		= htonl(conn->proto.cid);
 	resp.encrypted.securityIndex	= htonl(conn->security_ix);
 	resp.encrypted.inc_nonce	= htonl(nonce + 1);
-	resp.encrypted.level		= htonl(conn->security_level);
+	resp.encrypted.level		= htonl(conn->params.security_level);
 	resp.kvno			= htonl(token->kad->kvno);
 	resp.ticket_len			= htonl(token->kad->ticket_len);
 
-	resp.encrypted.call_id[0] =
-		htonl(conn->channels[0] ? conn->channels[0]->call_id : 0);
-	resp.encrypted.call_id[1] =
-		htonl(conn->channels[1] ? conn->channels[1]->call_id : 0);
-	resp.encrypted.call_id[2] =
-		htonl(conn->channels[2] ? conn->channels[2]->call_id : 0);
-	resp.encrypted.call_id[3] =
-		htonl(conn->channels[3] ? conn->channels[3]->call_id : 0);
+	resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
+	resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
+	resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
+	resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
 
 	/* calculate the response checksum and then do the encryption */
 	rxkad_calc_response_checksum(&resp);
@@ -885,10 +832,8 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	}
 
 	sg_init_one(&sg[0], ticket, ticket_len);
-
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
-
 	crypto_skcipher_decrypt(req);
 	skcipher_request_free(req);
 
@@ -999,7 +944,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
 				   const struct rxrpc_crypt *session_key)
 {
 	SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
-	struct scatterlist sg[2];
+	struct scatterlist sg[1];
 	struct rxrpc_crypt iv;
 
 	_enter(",,%08x%08x",
@@ -1014,12 +959,11 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
 
 	memcpy(&iv, session_key, sizeof(iv));
 
-	rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
 	skcipher_request_set_tfm(req, rxkad_ci);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
 	crypto_skcipher_decrypt(req);
 	skcipher_request_zero(req);
 
@@ -1043,7 +987,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	void *ticket;
 	u32 abort_code, version, kvno, ticket_len, level;
 	__be32 csum;
-	int ret;
+	int ret, i;
 
 	_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
 
@@ -1094,9 +1038,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	rxkad_decrypt_response(conn, &response, &session_key);
 
 	abort_code = RXKADSEALEDINCON;
-	if (ntohl(response.encrypted.epoch) != conn->epoch)
+	if (ntohl(response.encrypted.epoch) != conn->proto.epoch)
 		goto protocol_error_free;
-	if (ntohl(response.encrypted.cid) != conn->cid)
+	if (ntohl(response.encrypted.cid) != conn->proto.cid)
 		goto protocol_error_free;
 	if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
 		goto protocol_error_free;
@@ -1106,11 +1050,26 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	if (response.encrypted.checksum != csum)
 		goto protocol_error_free;
 
-	if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
-	    ntohl(response.encrypted.call_id[1]) > INT_MAX ||
-	    ntohl(response.encrypted.call_id[2]) > INT_MAX ||
-	    ntohl(response.encrypted.call_id[3]) > INT_MAX)
-		goto protocol_error_free;
+	spin_lock(&conn->channel_lock);
+	for (i = 0; i < RXRPC_MAXCALLS; i++) {
+		struct rxrpc_call *call;
+		u32 call_id = ntohl(response.encrypted.call_id[i]);
+
+		if (call_id > INT_MAX)
+			goto protocol_error_unlock;
+
+		if (call_id < conn->channels[i].call_counter)
+			goto protocol_error_unlock;
+		if (call_id > conn->channels[i].call_counter) {
+			call = rcu_dereference_protected(
+				conn->channels[i].call,
+				lockdep_is_held(&conn->channel_lock));
+			if (call && call->state < RXRPC_CALL_COMPLETE)
+				goto protocol_error_unlock;
+			conn->channels[i].call_counter = call_id;
+		}
+	}
+	spin_unlock(&conn->channel_lock);
 
 	abort_code = RXKADOUTOFSEQUENCE;
 	if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
@@ -1120,7 +1079,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	level = ntohl(response.encrypted.level);
 	if (level > RXRPC_SECURITY_ENCRYPT)
 		goto protocol_error_free;
-	conn->security_level = level;
+	conn->params.security_level = level;
 
 	/* create a key to hold the security data and expiration time - after
 	 * this the connection security can be handled in exactly the same way
@@ -1135,6 +1094,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	_leave(" = 0");
 	return 0;
 
+protocol_error_unlock:
+	spin_unlock(&conn->channel_lock);
 protocol_error_free:
 	kfree(ticket);
 protocol_error:
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/security.c
similarity index 94%
rename from net/rxrpc/ar-security.c
rename to net/rxrpc/security.c
index d223253b22fa..814d285ff802 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/security.c
@@ -76,7 +76,7 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 {
 	const struct rxrpc_security *sec;
 	struct rxrpc_key_token *token;
-	struct key *key = conn->key;
+	struct key *key = conn->params.key;
 	int ret;
 
 	_enter("{%d},{%x}", conn->debug_id, key_serial(key));
@@ -113,7 +113,7 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
 {
 	const struct rxrpc_security *sec;
-	struct rxrpc_local *local = conn->trans->local;
+	struct rxrpc_local *local = conn->params.local;
 	struct rxrpc_sock *rx;
 	struct key *key;
 	key_ref_t kref;
@@ -121,7 +121,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
 
 	_enter("");
 
-	sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix);
+	sprintf(kdesc, "%u:%u", conn->params.service_id, conn->security_ix);
 
 	sec = rxrpc_security_lookup(conn->security_ix);
 	if (!sec) {
@@ -132,7 +132,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
 	/* find the service */
 	read_lock_bh(&local->services_lock);
 	list_for_each_entry(rx, &local->services, listen_link) {
-		if (rx->srx.srx_service == conn->service_id)
+		if (rx->srx.srx_service == conn->params.service_id)
 			goto found_service;
 	}
 
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/skbuff.c
similarity index 98%
rename from net/rxrpc/ar-skbuff.c
rename to net/rxrpc/skbuff.c
index 62a267472fce..eee0cfd9ac8c 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index d20ed575acf4..03ad08774d4e 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -18,6 +18,7 @@ static struct ctl_table_header *rxrpc_sysctl_reg_table;
 static const unsigned int zero = 0;
 static const unsigned int one = 1;
 static const unsigned int four = 4;
+static const unsigned int thirtytwo = 32;
 static const unsigned int n_65535 = 65535;
 static const unsigned int n_max_acks = RXRPC_MAXACKS;
 
@@ -89,16 +90,17 @@ static struct ctl_table rxrpc_sysctl_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= (void *)&one,
 	},
+
+	/* Non-time values */
 	{
-		.procname	= "transport_expiry",
-		.data		= &rxrpc_transport_expiry,
+		.procname	= "max_backlog",
+		.data		= &rxrpc_max_backlog,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&one,
+		.extra1		= (void *)&four,
+		.extra2		= (void *)&thirtytwo,
 	},
-
-	/* Non-time values */
 	{
 		.procname	= "rx_window_size",
 		.data		= &rxrpc_rx_window_size,
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
new file mode 100644
index 000000000000..b88914d53ca5
--- /dev/null
+++ b/net/rxrpc/utils.c
@@ -0,0 +1,46 @@
+/* Utility routines
+ *
+ * Copyright (C) 2015 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include "ar-internal.h"
+
+/*
+ * Fill out a peer address from a socket buffer containing a packet.
+ */
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
+{
+	memset(srx, 0, sizeof(*srx));
+
+	switch (ntohs(skb->protocol)) {
+	case ETH_P_IP:
+		srx->transport_type = SOCK_DGRAM;
+		srx->transport_len = sizeof(srx->transport.sin);
+		srx->transport.sin.sin_family = AF_INET;
+		srx->transport.sin.sin_port = udp_hdr(skb)->source;
+		srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		return 0;
+
+	case ETH_P_IPV6:
+		srx->transport_type = SOCK_DGRAM;
+		srx->transport_len = sizeof(srx->transport.sin6);
+		srx->transport.sin6.sin6_family = AF_INET6;
+		srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+		srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+		return 0;
+
+	default:
+		pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
+				    ntohs(skb->protocol));
+		return -EAFNOSUPPORT;
+	}
+}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b148302bbaf2..ccf931b3b94c 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -494,6 +494,16 @@ config NET_CLS_FLOWER
 	  To compile this code as a module, choose M here: the module will
 	  be called cls_flower.
 
+config NET_CLS_MATCHALL
+	tristate "Match-all classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  nothing. Every packet will match.
+
+	  To compile this code as a module, choose M here: the module will
+	  be called cls_matchall.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 84bddb373517..ae088a5a9d95 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_CLS_BPF)	+= cls_bpf.o
 obj-$(CONFIG_NET_CLS_FLOWER)	+= cls_flower.o
+obj-$(CONFIG_NET_CLS_MATCHALL)	+= cls_matchall.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 336774a535c3..e4a5f2607ffa 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -29,45 +29,43 @@
 
 static void free_tcf(struct rcu_head *head)
 {
-	struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu);
+	struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
 
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_qstats);
 	kfree(p);
 }
 
-static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *a)
+static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
 {
-	struct tcf_common *p = a->priv;
-
 	spin_lock_bh(&hinfo->lock);
-	hlist_del(&p->tcfc_head);
+	hlist_del(&p->tcfa_head);
 	spin_unlock_bh(&hinfo->lock);
-	gen_kill_estimator(&p->tcfc_bstats,
-			   &p->tcfc_rate_est);
+	gen_kill_estimator(&p->tcfa_bstats,
+			   &p->tcfa_rate_est);
 	/*
-	 * gen_estimator est_timer() might access p->tcfc_lock
+	 * gen_estimator est_timer() might access p->tcfa_lock
 	 * or bstats, wait a RCU grace period before freeing p
 	 */
-	call_rcu(&p->tcfc_rcu, free_tcf);
+	call_rcu(&p->tcfa_rcu, free_tcf);
 }
 
-int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
+int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
 {
-	struct tcf_common *p = a->priv;
 	int ret = 0;
 
 	if (p) {
 		if (bind)
-			p->tcfc_bindcnt--;
-		else if (strict && p->tcfc_bindcnt > 0)
+			p->tcfa_bindcnt--;
+		else if (strict && p->tcfa_bindcnt > 0)
 			return -EPERM;
 
-		p->tcfc_refcnt--;
-		if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
-			if (a->ops->cleanup)
-				a->ops->cleanup(a, bind);
-			tcf_hash_destroy(a->hinfo, a);
+		p->tcfa_refcnt--;
+		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
+			if (p->ops->cleanup)
+				p->ops->cleanup(p, bind);
+			list_del(&p->list);
+			tcf_hash_destroy(p->hinfo, p);
 			ret = ACT_P_DELETED;
 		}
 	}
@@ -77,10 +75,8 @@ int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
 EXPORT_SYMBOL(__tcf_hash_release);
 
 static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
-			   struct netlink_callback *cb, struct tc_action *a)
+			   struct netlink_callback *cb)
 {
-	struct hlist_head *head;
-	struct tcf_common *p;
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	struct nlattr *nest;
 
@@ -89,19 +85,20 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 	s_i = cb->args[0];
 
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
+		struct hlist_head *head;
+		struct tc_action *p;
+
 		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
 
-		hlist_for_each_entry_rcu(p, head, tcfc_head) {
+		hlist_for_each_entry_rcu(p, head, tcfa_head) {
 			index++;
 			if (index < s_i)
 				continue;
-			a->priv = p;
-			a->order = n_i;
 
-			nest = nla_nest_start(skb, a->order);
+			nest = nla_nest_start(skb, n_i);
 			if (nest == NULL)
 				goto nla_put_failure;
-			err = tcf_action_dump_1(skb, a, 0, 0);
+			err = tcf_action_dump_1(skb, p, 0, 0);
 			if (err < 0) {
 				index--;
 				nlmsg_trim(skb, nest);
@@ -125,27 +122,27 @@ nla_put_failure:
 }
 
 static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
-			  struct tc_action *a)
+			  const struct tc_action_ops *ops)
 {
-	struct hlist_head *head;
-	struct hlist_node *n;
-	struct tcf_common *p;
 	struct nlattr *nest;
 	int i = 0, n_i = 0;
 	int ret = -EINVAL;
 
-	nest = nla_nest_start(skb, a->order);
+	nest = nla_nest_start(skb, 0);
 	if (nest == NULL)
 		goto nla_put_failure;
-	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
+	if (nla_put_string(skb, TCA_KIND, ops->kind))
 		goto nla_put_failure;
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
+		struct hlist_head *head;
+		struct hlist_node *n;
+		struct tc_action *p;
+
 		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
-		hlist_for_each_entry_safe(p, n, head, tcfc_head) {
-			a->priv = p;
-			ret = __tcf_hash_release(a, false, true);
+		hlist_for_each_entry_safe(p, n, head, tcfa_head) {
+			ret = __tcf_hash_release(p, false, true);
 			if (ret == ACT_P_DELETED) {
-				module_put(a->ops->owner);
+				module_put(p->ops->owner);
 				n_i++;
 			} else if (ret < 0)
 				goto nla_put_failure;
@@ -163,16 +160,14 @@ nla_put_failure:
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       struct tc_action *a)
+		       const struct tc_action_ops *ops)
 {
 	struct tcf_hashinfo *hinfo = tn->hinfo;
 
-	a->hinfo = hinfo;
-
 	if (type == RTM_DELACTION) {
-		return tcf_del_walker(hinfo, skb, a);
+		return tcf_del_walker(hinfo, skb, ops);
 	} else if (type == RTM_GETACTION) {
-		return tcf_dump_walker(hinfo, skb, cb, a);
+		return tcf_dump_walker(hinfo, skb, cb);
 	} else {
 		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
 		return -EINVAL;
@@ -180,15 +175,15 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
 {
-	struct tcf_common *p = NULL;
+	struct tc_action *p = NULL;
 	struct hlist_head *head;
 
 	spin_lock_bh(&hinfo->lock);
 	head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
-	hlist_for_each_entry_rcu(p, head, tcfc_head)
-		if (p->tcfc_index == index)
+	hlist_for_each_entry_rcu(p, head, tcfa_head)
+		if (p->tcfa_index == index)
 			break;
 	spin_unlock_bh(&hinfo->lock);
 
@@ -210,59 +205,58 @@ u32 tcf_hash_new_index(struct tc_action_net *tn)
 }
 EXPORT_SYMBOL(tcf_hash_new_index);
 
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index)
+int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
 	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct tcf_common *p = tcf_hash_lookup(index, hinfo);
+	struct tc_action *p = tcf_hash_lookup(index, hinfo);
 
 	if (p) {
-		a->priv = p;
-		a->hinfo = hinfo;
+		*a = p;
 		return 1;
 	}
 	return 0;
 }
 EXPORT_SYMBOL(tcf_hash_search);
 
-int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
-		   int bind)
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+		    int bind)
 {
 	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct tcf_common *p = NULL;
+	struct tc_action *p = NULL;
+
 	if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
 		if (bind)
-			p->tcfc_bindcnt++;
-		p->tcfc_refcnt++;
-		a->priv = p;
-		a->hinfo = hinfo;
-		return 1;
+			p->tcfa_bindcnt++;
+		p->tcfa_refcnt++;
+		*a = p;
+		return true;
 	}
-	return 0;
+	return false;
 }
 EXPORT_SYMBOL(tcf_hash_check);
 
 void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
 {
-	struct tcf_common *pc = a->priv;
 	if (est)
-		gen_kill_estimator(&pc->tcfc_bstats,
-				   &pc->tcfc_rate_est);
-	call_rcu(&pc->tcfc_rcu, free_tcf);
+		gen_kill_estimator(&a->tcfa_bstats,
+				   &a->tcfa_rate_est);
+	call_rcu(&a->tcfa_rcu, free_tcf);
 }
 EXPORT_SYMBOL(tcf_hash_cleanup);
 
 int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
-		    struct tc_action *a, int size, int bind, bool cpustats)
+		    struct tc_action **a, const struct tc_action_ops *ops,
+		    int bind, bool cpustats)
 {
-	struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+	struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
 	struct tcf_hashinfo *hinfo = tn->hinfo;
 	int err = -ENOMEM;
 
 	if (unlikely(!p))
 		return -ENOMEM;
-	p->tcfc_refcnt = 1;
+	p->tcfa_refcnt = 1;
 	if (bind)
-		p->tcfc_bindcnt = 1;
+		p->tcfa_bindcnt = 1;
 
 	if (cpustats) {
 		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
@@ -278,35 +272,37 @@ err2:
 			goto err1;
 		}
 	}
-	spin_lock_init(&p->tcfc_lock);
-	INIT_HLIST_NODE(&p->tcfc_head);
-	p->tcfc_index = index ? index : tcf_hash_new_index(tn);
-	p->tcfc_tm.install = jiffies;
-	p->tcfc_tm.lastuse = jiffies;
+	spin_lock_init(&p->tcfa_lock);
+	INIT_HLIST_NODE(&p->tcfa_head);
+	p->tcfa_index = index ? index : tcf_hash_new_index(tn);
+	p->tcfa_tm.install = jiffies;
+	p->tcfa_tm.lastuse = jiffies;
+	p->tcfa_tm.firstuse = 0;
 	if (est) {
-		err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
-					&p->tcfc_rate_est,
-					&p->tcfc_lock, est);
+		err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
+					&p->tcfa_rate_est,
+					&p->tcfa_lock, NULL, est);
 		if (err) {
 			free_percpu(p->cpu_qstats);
 			goto err2;
 		}
 	}
 
-	a->priv = (void *) p;
-	a->hinfo = hinfo;
+	p->hinfo = hinfo;
+	p->ops = ops;
+	INIT_LIST_HEAD(&p->list);
+	*a = p;
 	return 0;
 }
 EXPORT_SYMBOL(tcf_hash_create);
 
 void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
 {
-	struct tcf_common *p = a->priv;
 	struct tcf_hashinfo *hinfo = tn->hinfo;
-	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+	unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask);
 
 	spin_lock_bh(&hinfo->lock);
-	hlist_add_head(&p->tcfc_head, &hinfo->htab[h]);
+	hlist_add_head(&a->tcfa_head, &hinfo->htab[h]);
 	spin_unlock_bh(&hinfo->lock);
 }
 EXPORT_SYMBOL(tcf_hash_insert);
@@ -314,21 +310,16 @@ EXPORT_SYMBOL(tcf_hash_insert);
 void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
 			  struct tcf_hashinfo *hinfo)
 {
-	struct tc_action a = {
-		.ops = ops,
-		.hinfo = hinfo,
-	};
 	int i;
 
 	for (i = 0; i < hinfo->hmask + 1; i++) {
-		struct tcf_common *p;
+		struct tc_action *p;
 		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfc_head) {
+		hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) {
 			int ret;
 
-			a.priv = p;
-			ret = __tcf_hash_release(&a, false, true);
+			ret = __tcf_hash_release(p, false, true);
 			if (ret == ACT_P_DELETED)
 				module_put(ops->owner);
 			else if (ret < 0)
@@ -465,8 +456,6 @@ int tcf_action_destroy(struct list_head *actions, int bind)
 			module_put(a->ops->owner);
 		else if (ret < 0)
 			return ret;
-		list_del(&a->list);
-		kfree(a);
 	}
 	return ret;
 }
@@ -503,8 +492,8 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(tcf_action_dump_1);
 
-int
-tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref)
+int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
+		    int bind, int ref)
 {
 	struct tc_action *a;
 	int err = -EINVAL;
@@ -580,20 +569,13 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 		goto err_out;
 	}
 
-	err = -ENOMEM;
-	a = kzalloc(sizeof(*a), GFP_KERNEL);
-	if (a == NULL)
-		goto err_mod;
-
-	a->ops = a_o;
-	INIT_LIST_HEAD(&a->list);
 	/* backward compatibility for policer */
 	if (name == NULL)
-		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
+		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
 	else
-		err = a_o->init(net, nla, est, a, ovr, bind);
+		err = a_o->init(net, nla, est, &a, ovr, bind);
 	if (err < 0)
-		goto err_free;
+		goto err_mod;
 
 	/* module count goes up only when brand new policy is created
 	 * if it exists and is only bound to in a_o->init() then
@@ -604,8 +586,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 
 	return a;
 
-err_free:
-	kfree(a);
 err_mod:
 	module_put(a_o->owner);
 err_out:
@@ -641,12 +621,11 @@ err:
 	return err;
 }
 
-int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
+int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
 			  int compat_mode)
 {
 	int err = 0;
 	struct gnet_dump d;
-	struct tcf_common *p = a->priv;
 
 	if (p == NULL)
 		goto errout;
@@ -655,27 +634,27 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
 	 * to add additional backward compatibility statistic TLVs.
 	 */
 	if (compat_mode) {
-		if (a->type == TCA_OLD_COMPAT)
+		if (p->type == TCA_OLD_COMPAT)
 			err = gnet_stats_start_copy_compat(skb, 0,
 							   TCA_STATS,
 							   TCA_XSTATS,
-							   &p->tcfc_lock, &d,
+							   &p->tcfa_lock, &d,
 							   TCA_PAD);
 		else
 			return 0;
 	} else
 		err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
-					    &p->tcfc_lock, &d, TCA_ACT_PAD);
+					    &p->tcfa_lock, &d, TCA_ACT_PAD);
 
 	if (err < 0)
 		goto errout;
 
-	if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
-	    gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
-				     &p->tcfc_rate_est) < 0 ||
+	if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &p->tcfa_bstats,
+				     &p->tcfa_rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, p->cpu_qstats,
-				  &p->tcfc_qstats,
-				  p->tcfc_qstats.qlen) < 0)
+				  &p->tcfa_qstats,
+				  p->tcfa_qstats.qlen) < 0)
 		goto errout;
 
 	if (gnet_stats_finish_copy(&d) < 0)
@@ -687,9 +666,9 @@ errout:
 	return -1;
 }
 
-static int
-tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq,
-	     u16 flags, int event, int bind, int ref)
+static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
+			u32 portid, u32 seq, u16 flags, int event, int bind,
+			int ref)
 {
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
@@ -730,7 +709,8 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
-	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
+			 0, 0) <= 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -738,24 +718,11 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 	return rtnl_unicast(skb, net, portid);
 }
 
-static struct tc_action *create_a(int i)
-{
-	struct tc_action *act;
-
-	act = kzalloc(sizeof(*act), GFP_KERNEL);
-	if (act == NULL) {
-		pr_debug("create_a: failed to alloc!\n");
-		return NULL;
-	}
-	act->order = i;
-	INIT_LIST_HEAD(&act->list);
-	return act;
-}
-
 static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
 					  struct nlmsghdr *n, u32 portid)
 {
 	struct nlattr *tb[TCA_ACT_MAX + 1];
+	const struct tc_action_ops *ops;
 	struct tc_action *a;
 	int index;
 	int err;
@@ -770,26 +737,19 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
 		goto err_out;
 	index = nla_get_u32(tb[TCA_ACT_INDEX]);
 
-	err = -ENOMEM;
-	a = create_a(0);
-	if (a == NULL)
-		goto err_out;
-
 	err = -EINVAL;
-	a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
-	if (a->ops == NULL) /* could happen in batch of actions */
-		goto err_free;
+	ops = tc_lookup_action(tb[TCA_ACT_KIND]);
+	if (!ops) /* could happen in batch of actions */
+		goto err_out;
 	err = -ENOENT;
-	if (a->ops->lookup(net, a, index) == 0)
+	if (ops->lookup(net, &a, index) == 0)
 		goto err_mod;
 
-	module_put(a->ops->owner);
+	module_put(ops->owner);
 	return a;
 
 err_mod:
-	module_put(a->ops->owner);
-err_free:
-	kfree(a);
+	module_put(ops->owner);
 err_out:
 	return ERR_PTR(err);
 }
@@ -814,8 +774,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	struct netlink_callback dcb;
 	struct nlattr *nest;
 	struct nlattr *tb[TCA_ACT_MAX + 1];
+	const struct tc_action_ops *ops;
 	struct nlattr *kind;
-	struct tc_action a;
 	int err = -ENOMEM;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
@@ -832,13 +792,12 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 
 	err = -EINVAL;
 	kind = tb[TCA_ACT_KIND];
-	memset(&a, 0, sizeof(struct tc_action));
-	INIT_LIST_HEAD(&a.list);
-	a.ops = tc_lookup_action(kind);
-	if (a.ops == NULL) /*some idjot trying to flush unknown action */
+	ops = tc_lookup_action(kind);
+	if (!ops) /*some idjot trying to flush unknown action */
 		goto err_out;
 
-	nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
+	nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
+			sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
 	t = nlmsg_data(nlh);
@@ -850,7 +809,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	if (nest == NULL)
 		goto out_module_put;
 
-	err = a.ops->walk(net, skb, &dcb, RTM_DELACTION, &a);
+	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
 	if (err < 0)
 		goto out_module_put;
 	if (err == 0)
@@ -860,7 +819,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	nlh->nlmsg_flags |= NLM_F_ROOT;
-	module_put(a.ops->owner);
+	module_put(ops->owner);
 	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 			     n->nlmsg_flags & NLM_F_ECHO);
 	if (err > 0)
@@ -869,7 +828,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	return err;
 
 out_module_put:
-	module_put(a.ops->owner);
+	module_put(ops->owner);
 err_out:
 noflush_out:
 	kfree_skb(skb);
@@ -1001,7 +960,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
-	if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN))
+	if ((n->nlmsg_type != RTM_GETACTION) &&
+	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
@@ -1080,7 +1040,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 	struct tc_action_ops *a_o;
-	struct tc_action a;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
 	struct nlattr *kind = find_dump_kind(cb->nlh);
@@ -1094,9 +1053,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
-	memset(&a, 0, sizeof(struct tc_action));
-	a.ops = a_o;
-
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
@@ -1110,7 +1066,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nest == NULL)
 		goto out_module_put;
 
-	ret = a_o->walk(net, skb, cb, RTM_GETACTION, &a);
+	ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
 	if (ret < 0)
 		goto out_module_put;
 
@@ -1118,7 +1074,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 		nla_nest_end(skb, nest);
 		ret = skb->len;
 	} else
-		nla_nest_cancel(skb, nest);
+		nlmsg_trim(skb, b);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	if (NETLINK_CB(cb->skb).portid && ret)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c7123e01c2ca..bfa870731e74 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -34,11 +34,12 @@ struct tcf_bpf_cfg {
 };
 
 static int bpf_net_id;
+static struct tc_action_ops act_bpf_ops;
 
 static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 		   struct tcf_result *res)
 {
-	struct tcf_bpf *prog = act->priv;
+	struct tcf_bpf *prog = to_bpf(act);
 	struct bpf_prog *filter;
 	int action, filter_res;
 	bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
@@ -134,7 +135,7 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
 			int bind, int ref)
 {
 	unsigned char *tp = skb_tail_pointer(skb);
-	struct tcf_bpf *prog = act->priv;
+	struct tcf_bpf *prog = to_bpf(act);
 	struct tc_act_bpf opt = {
 		.index   = prog->tcf_index,
 		.refcnt  = prog->tcf_refcnt - ref,
@@ -154,10 +155,7 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
 	if (ret)
 		goto nla_put_failure;
 
-	tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
-	tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
-	tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
-
+	tcf_tm_dump(&tm, &prog->tcf_tm);
 	if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm,
 			  TCA_ACT_BPF_PAD))
 		goto nla_put_failure;
@@ -172,7 +170,8 @@ nla_put_failure:
 static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
 	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
 	[TCA_ACT_BPF_FD]	= { .type = NLA_U32 },
-	[TCA_ACT_BPF_NAME]	= { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
+	[TCA_ACT_BPF_NAME]	= { .type = NLA_NUL_STRING,
+				    .len = ACT_BPF_NAME_LEN },
 	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
 	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
@@ -225,15 +224,10 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
 
 	bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
 
-	fp = bpf_prog_get(bpf_fd);
+	fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
-	if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
-		bpf_prog_put(fp);
-		return -EINVAL;
-	}
-
 	if (tb[TCA_ACT_BPF_NAME]) {
 		name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
 			       nla_len(tb[TCA_ACT_BPF_NAME]),
@@ -277,7 +271,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 }
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
-			struct nlattr *est, struct tc_action *act,
+			struct nlattr *est, struct tc_action **act,
 			int replace, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
@@ -302,7 +296,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	if (!tcf_hash_check(tn, parm->index, act, bind)) {
 		ret = tcf_hash_create(tn, parm->index, est, act,
-				      sizeof(*prog), bind, true);
+				      &act_bpf_ops, bind, true);
 		if (ret < 0)
 			return ret;
 
@@ -312,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 		if (bind)
 			return 0;
 
-		tcf_hash_release(act, bind);
+		tcf_hash_release(*act, bind);
 		if (!replace)
 			return -EEXIST;
 	}
@@ -332,7 +326,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	if (ret < 0)
 		goto out;
 
-	prog = to_bpf(act);
+	prog = to_bpf(*act);
 	ASSERT_RTNL();
 
 	if (res != ACT_P_CREATED)
@@ -350,7 +344,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	rcu_assign_pointer(prog->filter, cfg.filter);
 
 	if (res == ACT_P_CREATED) {
-		tcf_hash_insert(tn, act);
+		tcf_hash_insert(tn, *act);
 	} else {
 		/* make sure the program being replaced is no longer executing */
 		synchronize_rcu();
@@ -360,7 +354,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	return res;
 out:
 	if (res == ACT_P_CREATED)
-		tcf_hash_cleanup(act, est);
+		tcf_hash_cleanup(*act, est);
 
 	return ret;
 }
@@ -369,20 +363,20 @@ static void tcf_bpf_cleanup(struct tc_action *act, int bind)
 {
 	struct tcf_bpf_cfg tmp;
 
-	tcf_bpf_prog_fill_cfg(act->priv, &tmp);
+	tcf_bpf_prog_fill_cfg(to_bpf(act), &tmp);
 	tcf_bpf_cfg_cleanup(&tmp);
 }
 
 static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  struct tc_action *a)
+			  const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_bpf_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
@@ -399,6 +393,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.init		=	tcf_bpf_init,
 	.walk		=	tcf_bpf_walker,
 	.lookup		=	tcf_bpf_search,
+	.size		=	sizeof(struct tcf_bpf),
 };
 
 static __net_init int bpf_init_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2ba700c765e0..eae07a2e774d 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -31,6 +31,7 @@
 #define CONNMARK_TAB_MASK     3
 
 static int connmark_net_id;
+static struct tc_action_ops act_connmark_ops;
 
 static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
 			struct tcf_result *res)
@@ -38,13 +39,13 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
 	const struct nf_conntrack_tuple_hash *thash;
 	struct nf_conntrack_tuple tuple;
 	enum ip_conntrack_info ctinfo;
-	struct tcf_connmark_info *ca = a->priv;
+	struct tcf_connmark_info *ca = to_connmark(a);
 	struct nf_conntrack_zone zone;
 	struct nf_conn *c;
 	int proto;
 
 	spin_lock(&ca->tcf_lock);
-	ca->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&ca->tcf_tm);
 	bstats_update(&ca->tcf_bstats, skb);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
@@ -96,7 +97,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 };
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
-			     struct nlattr *est, struct tc_action *a,
+			     struct nlattr *est, struct tc_action **a,
 			     int ovr, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
@@ -116,22 +117,22 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 	if (!tcf_hash_check(tn, parm->index, a, bind)) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*ci), bind, false);
+				      &act_connmark_ops, bind, false);
 		if (ret)
 			return ret;
 
-		ci = to_connmark(a);
+		ci = to_connmark(*a);
 		ci->tcf_action = parm->action;
 		ci->net = net;
 		ci->zone = parm->zone;
 
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 		ret = ACT_P_CREATED;
 	} else {
-		ci = to_connmark(a);
+		ci = to_connmark(*a);
 		if (bind)
 			return 0;
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 		/* replacing action and zone */
@@ -146,7 +147,7 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
 				    int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_connmark_info *ci = a->priv;
+	struct tcf_connmark_info *ci = to_connmark(a);
 
 	struct tc_connmark opt = {
 		.index   = ci->tcf_index,
@@ -160,9 +161,7 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
 	if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
-	t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
+	tcf_tm_dump(&t, &ci->tcf_tm);
 	if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
 			  TCA_CONNMARK_PAD))
 		goto nla_put_failure;
@@ -175,14 +174,14 @@ nla_put_failure:
 
 static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
 			       struct netlink_callback *cb, int type,
-			       struct tc_action *a)
+			       const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_connmark_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
@@ -198,6 +197,7 @@ static struct tc_action_ops act_connmark_ops = {
 	.init		=	tcf_connmark_init,
 	.walk		=	tcf_connmark_walker,
 	.lookup		=	tcf_connmark_search,
+	.size		=	sizeof(struct tcf_connmark_info),
 };
 
 static __net_init int connmark_init_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 28e934ed038a..b5dbf633a863 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -43,9 +43,10 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
 };
 
 static int csum_net_id;
+static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
-			 struct nlattr *est, struct tc_action *a, int ovr,
+			 struct nlattr *est, struct tc_action **a, int ovr,
 			 int bind)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
@@ -67,26 +68,26 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 
 	if (!tcf_hash_check(tn, parm->index, a, bind)) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*p), bind, false);
+				      &act_csum_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
 
-	p = to_tcf_csum(a);
+	p = to_tcf_csum(*a);
 	spin_lock_bh(&p->tcf_lock);
 	p->tcf_action = parm->action;
 	p->update_flags = parm->update_flags;
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 
 	return ret;
 }
@@ -496,12 +497,12 @@ fail:
 static int tcf_csum(struct sk_buff *skb,
 		    const struct tc_action *a, struct tcf_result *res)
 {
-	struct tcf_csum *p = a->priv;
+	struct tcf_csum *p = to_tcf_csum(a);
 	int action;
 	u32 update_flags;
 
 	spin_lock(&p->tcf_lock);
-	p->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&p->tcf_tm);
 	bstats_update(&p->tcf_bstats, skb);
 	action = p->tcf_action;
 	update_flags = p->update_flags;
@@ -534,7 +535,7 @@ static int tcf_csum_dump(struct sk_buff *skb,
 			 struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_csum *p = a->priv;
+	struct tcf_csum *p = to_tcf_csum(a);
 	struct tc_csum opt = {
 		.update_flags = p->update_flags,
 		.index   = p->tcf_index,
@@ -546,9 +547,8 @@ static int tcf_csum_dump(struct sk_buff *skb,
 
 	if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+	tcf_tm_dump(&t, &p->tcf_tm);
 	if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD))
 		goto nla_put_failure;
 
@@ -561,14 +561,14 @@ nla_put_failure:
 
 static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   struct tc_action *a)
+			   const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_csum_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
@@ -584,6 +584,7 @@ static struct tc_action_ops act_csum_ops = {
 	.init		= tcf_csum_init,
 	.walk		= tcf_csum_walker,
 	.lookup		= tcf_csum_search,
+	.size		= sizeof(struct tcf_csum),
 };
 
 static __net_init int csum_init_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ec5cc8435238..e24a4093d6f6 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -26,6 +26,7 @@
 #define GACT_TAB_MASK	15
 
 static int gact_net_id;
+static struct tc_action_ops act_gact_ops;
 
 #ifdef CONFIG_GACT_PROB
 static int gact_net_rand(struct tcf_gact *gact)
@@ -56,7 +57,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 };
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
-			 struct nlattr *est, struct tc_action *a,
+			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
@@ -93,19 +94,19 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 
 	if (!tcf_hash_check(tn, parm->index, a, bind)) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*gact), bind, true);
+				      &act_gact_ops, bind, true);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
 
-	gact = to_gact(a);
+	gact = to_gact(*a);
 
 	ASSERT_RTNL();
 	gact->tcf_action = parm->action;
@@ -121,14 +122,14 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 	return ret;
 }
 
 static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
 		    struct tcf_result *res)
 {
-	struct tcf_gact *gact = a->priv;
+	struct tcf_gact *gact = to_gact(a);
 	int action = READ_ONCE(gact->tcf_action);
 
 #ifdef CONFIG_GACT_PROB
@@ -151,7 +152,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
 static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
 				  u64 lastuse)
 {
-	struct tcf_gact *gact = a->priv;
+	struct tcf_gact *gact = to_gact(a);
 	int action = READ_ONCE(gact->tcf_action);
 	struct tcf_t *tm = &gact->tcf_tm;
 
@@ -162,10 +163,11 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
 	tm->lastuse = lastuse;
 }
 
-static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
+			 int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_gact *gact = a->priv;
+	struct tcf_gact *gact = to_gact(a);
 	struct tc_gact opt = {
 		.index   = gact->tcf_index,
 		.refcnt  = gact->tcf_refcnt - ref,
@@ -188,9 +190,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 			goto nla_put_failure;
 	}
 #endif
-	t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
+	tcf_tm_dump(&t, &gact->tcf_tm);
 	if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD))
 		goto nla_put_failure;
 	return skb->len;
@@ -202,14 +202,14 @@ nla_put_failure:
 
 static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   struct tc_action *a)
+			   const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_gact_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
@@ -226,6 +226,7 @@ static struct tc_action_ops act_gact_ops = {
 	.init		=	tcf_gact_init,
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
+	.size		=	sizeof(struct tcf_gact),
 };
 
 static __net_init int gact_init_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 658046dfe02d..141a06eeb1e5 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -37,6 +37,7 @@
 
 static int ife_net_id;
 static int max_metacnt = IFE_META_MAX + 1;
+static struct tc_action_ops act_ife_ops;
 
 static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = {
 	[TCA_IFE_PARMS] = { .len = sizeof(struct tc_ife)},
@@ -106,9 +107,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi)
 }
 EXPORT_SYMBOL_GPL(ife_get_meta_u16);
 
-int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
+int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp)
 {
-	mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL);
+	mi->metaval = kmemdup(metaval, sizeof(u32), gfp);
 	if (!mi->metaval)
 		return -ENOMEM;
 
@@ -116,9 +117,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
 }
 EXPORT_SYMBOL_GPL(ife_alloc_meta_u32);
 
-int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval)
+int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp)
 {
-	mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL);
+	mi->metaval = kmemdup(metaval, sizeof(u16), gfp);
 	if (!mi->metaval)
 		return -ENOMEM;
 
@@ -240,10 +241,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
+ * under ife->tcf_lock for existing action
 */
 static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
-				void *val, int len)
+				void *val, int len, bool exists)
 {
 	struct tcf_meta_ops *ops = find_ife_oplist(metaid);
 	int ret = 0;
@@ -251,11 +252,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
 	if (!ops) {
 		ret = -ENOENT;
 #ifdef CONFIG_MODULES
-		spin_unlock_bh(&ife->tcf_lock);
+		if (exists)
+			spin_unlock_bh(&ife->tcf_lock);
 		rtnl_unlock();
 		request_module("ifemeta%u", metaid);
 		rtnl_lock();
-		spin_lock_bh(&ife->tcf_lock);
+		if (exists)
+			spin_lock_bh(&ife->tcf_lock);
 		ops = find_ife_oplist(metaid);
 #endif
 	}
@@ -272,10 +275,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
+ * under ife->tcf_lock for existing action
 */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
-			int len)
+			int len, bool atomic)
 {
 	struct tcf_meta_info *mi = NULL;
 	struct tcf_meta_ops *ops = find_ife_oplist(metaid);
@@ -284,7 +287,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	if (!ops)
 		return -ENOENT;
 
-	mi = kzalloc(sizeof(*mi), GFP_KERNEL);
+	mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL);
 	if (!mi) {
 		/*put back what find_ife_oplist took */
 		module_put(ops->owner);
@@ -294,7 +297,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	mi->metaid = metaid;
 	mi->ops = ops;
 	if (len > 0) {
-		ret = ops->alloc(mi, metaval);
+		ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL);
 		if (ret != 0) {
 			kfree(mi);
 			module_put(ops->owner);
@@ -313,11 +316,13 @@ static int use_all_metadata(struct tcf_ife_info *ife)
 	int rc = 0;
 	int installed = 0;
 
+	read_lock(&ife_mod_lock);
 	list_for_each_entry(o, &ifeoplist, list) {
-		rc = add_metainfo(ife, o->metaid, NULL, 0);
+		rc = add_metainfo(ife, o->metaid, NULL, 0, true);
 		if (rc == 0)
 			installed += 1;
 	}
+	read_unlock(&ife_mod_lock);
 
 	if (installed)
 		return 0;
@@ -360,7 +365,7 @@ out_nlmsg_trim:
 /* under ife->tcf_lock */
 static void _tcf_ife_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_ife_info *ife = a->priv;
+	struct tcf_ife_info *ife = to_ife(a);
 	struct tcf_meta_info *e, *n;
 
 	list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
@@ -378,15 +383,16 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
 
 static void tcf_ife_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_ife_info *ife = a->priv;
+	struct tcf_ife_info *ife = to_ife(a);
 
 	spin_lock_bh(&ife->tcf_lock);
 	_tcf_ife_cleanup(a, bind);
 	spin_unlock_bh(&ife->tcf_lock);
 }
 
-/* under ife->tcf_lock */
-static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
+/* under ife->tcf_lock for existing action */
+static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
+			     bool exists)
 {
 	int len = 0;
 	int rc = 0;
@@ -398,11 +404,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
 			val = nla_data(tb[i]);
 			len = nla_len(tb[i]);
 
-			rc = load_metaops_and_vet(ife, i, val, len);
+			rc = load_metaops_and_vet(ife, i, val, len, exists);
 			if (rc != 0)
 				return rc;
 
-			rc = add_metainfo(ife, i, val, len);
+			rc = add_metainfo(ife, i, val, len, exists);
 			if (rc)
 				return rc;
 		}
@@ -412,7 +418,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
 }
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
-			struct nlattr *est, struct tc_action *a,
+			struct nlattr *est, struct tc_action **a,
 			int ovr, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
@@ -423,7 +429,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	u16 ife_type = 0;
 	u8 *daddr = NULL;
 	u8 *saddr = NULL;
-	int ret = 0, exists = 0;
+	bool exists = false;
+	int ret = 0;
 	int err;
 
 	err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
@@ -445,25 +452,25 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		**/
 		if (!tb[TCA_IFE_TYPE]) {
 			if (exists)
-				tcf_hash_release(a, bind);
+				tcf_hash_release(*a, bind);
 			pr_info("You MUST pass etherype for encoding\n");
 			return -EINVAL;
 		}
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*ife),
+		ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops,
 				      bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
 
-	ife = to_ife(a);
+	ife = to_ife(*a);
 	ife->flags = parm->flags;
 
 	if (parm->flags & IFE_ENCODE) {
@@ -474,7 +481,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			saddr = nla_data(tb[TCA_IFE_SMAC]);
 	}
 
-	spin_lock_bh(&ife->tcf_lock);
+	if (exists)
+		spin_lock_bh(&ife->tcf_lock);
 	ife->tcf_action = parm->action;
 
 	if (parm->flags & IFE_ENCODE) {
@@ -500,15 +508,16 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		if (err) {
 metadata_parse_err:
 			if (exists)
-				tcf_hash_release(a, bind);
+				tcf_hash_release(*a, bind);
 			if (ret == ACT_P_CREATED)
-				_tcf_ife_cleanup(a, bind);
+				_tcf_ife_cleanup(*a, bind);
 
-			spin_unlock_bh(&ife->tcf_lock);
+			if (exists)
+				spin_unlock_bh(&ife->tcf_lock);
 			return err;
 		}
 
-		err = populate_metalist(ife, tb2);
+		err = populate_metalist(ife, tb2, exists);
 		if (err)
 			goto metadata_parse_err;
 
@@ -521,17 +530,19 @@ metadata_parse_err:
 		err = use_all_metadata(ife);
 		if (err) {
 			if (ret == ACT_P_CREATED)
-				_tcf_ife_cleanup(a, bind);
+				_tcf_ife_cleanup(*a, bind);
 
-			spin_unlock_bh(&ife->tcf_lock);
+			if (exists)
+				spin_unlock_bh(&ife->tcf_lock);
 			return err;
 		}
 	}
 
-	spin_unlock_bh(&ife->tcf_lock);
+	if (exists)
+		spin_unlock_bh(&ife->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 
 	return ret;
 }
@@ -540,7 +551,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 			int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_ife_info *ife = a->priv;
+	struct tcf_ife_info *ife = to_ife(a);
 	struct tc_ife opt = {
 		.index = ife->tcf_index,
 		.refcnt = ife->tcf_refcnt - ref,
@@ -553,9 +564,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
-	t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(ife->tcf_tm.expires);
+	tcf_tm_dump(&t, &ife->tcf_tm);
 	if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
 		goto nla_put_failure;
 
@@ -615,7 +624,7 @@ struct meta_tlvhdr {
 static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
-	struct tcf_ife_info *ife = a->priv;
+	struct tcf_ife_info *ife = to_ife(a);
 	int action = ife->tcf_action;
 	struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
 	u16 ifehdrln = ifehdr->metalen;
@@ -623,7 +632,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
 
 	spin_lock(&ife->tcf_lock);
 	bstats_update(&ife->tcf_bstats, skb);
-	ife->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&ife->tcf_tm);
 	spin_unlock(&ife->tcf_lock);
 
 	ifehdrln = ntohs(ifehdrln);
@@ -687,7 +696,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
 static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
-	struct tcf_ife_info *ife = a->priv;
+	struct tcf_ife_info *ife = to_ife(a);
 	int action = ife->tcf_action;
 	struct ethhdr *oethh;	/* outer ether header */
 	struct ethhdr *iethh;	/* inner eth header */
@@ -711,7 +720,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
 
 	spin_lock(&ife->tcf_lock);
 	bstats_update(&ife->tcf_bstats, skb);
-	ife->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&ife->tcf_tm);
 
 	if (!metalen) {		/* no metadata to send */
 		/* abuse overlimits to count when we allow packet
@@ -791,7 +800,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
 static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 		       struct tcf_result *res)
 {
-	struct tcf_ife_info *ife = a->priv;
+	struct tcf_ife_info *ife = to_ife(a);
 
 	if (ife->flags & IFE_ENCODE)
 		return tcf_ife_encode(skb, a, res);
@@ -802,7 +811,7 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 	pr_info_ratelimited("unknown failure(policy neither de/encode\n");
 	spin_lock(&ife->tcf_lock);
 	bstats_update(&ife->tcf_bstats, skb);
-	ife->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&ife->tcf_tm);
 	ife->tcf_qstats.drops++;
 	spin_unlock(&ife->tcf_lock);
 
@@ -811,14 +820,14 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 
 static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  struct tc_action *a)
+			  const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_ife_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
@@ -835,6 +844,7 @@ static struct tc_action_ops act_ife_ops = {
 	.init = tcf_ife_init,
 	.walk = tcf_ife_walker,
 	.lookup = tcf_ife_search,
+	.size =	sizeof(struct tcf_ife_info),
 };
 
 static __net_init int ife_init_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 9f002ada7074..378c1c976058 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -31,10 +31,13 @@
 #define IPT_TAB_MASK     15
 
 static int ipt_net_id;
+static struct tc_action_ops act_ipt_ops;
 
 static int xt_net_id;
+static struct tc_action_ops act_xt_ops;
 
-static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
+static int ipt_init_target(struct xt_entry_target *t, char *table,
+			   unsigned int hook)
 {
 	struct xt_tgchk_param par;
 	struct xt_target *target;
@@ -89,14 +92,15 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 };
 
 static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
-			  struct nlattr *est, struct tc_action *a, int ovr,
-			  int bind)
+			  struct nlattr *est, struct tc_action **a,
+			  const struct tc_action_ops *ops, int ovr, int bind)
 {
 	struct nlattr *tb[TCA_IPT_MAX + 1];
 	struct tcf_ipt *ipt;
 	struct xt_entry_target *td, *t;
 	char *tname;
-	int ret = 0, err, exists = 0;
+	bool exists = false;
+	int ret = 0, err;
 	u32 hook = 0;
 	u32 index = 0;
 
@@ -116,16 +120,19 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 
 	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
 		if (exists)
-			tcf_hash_release(a, bind);
+			tcf_hash_release(*a, bind);
 		return -EINVAL;
 	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
-	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
+	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
+		if (exists)
+			tcf_hash_release(*a, bind);
 		return -EINVAL;
+	}
 
-	if (!tcf_hash_check(tn, index, a, bind)) {
-		ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind,
+	if (!exists) {
+		ret = tcf_hash_create(tn, index, est, a, ops, bind,
 				      false);
 		if (ret)
 			return ret;
@@ -133,13 +140,11 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 
 		if (!ovr)
 			return -EEXIST;
 	}
-	ipt = to_ipt(a);
-
 	hook = nla_get_u32(tb[TCA_IPT_HOOK]);
 
 	err = -ENOMEM;
@@ -158,6 +163,8 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 	if (err < 0)
 		goto err3;
 
+	ipt = to_ipt(*a);
+
 	spin_lock_bh(&ipt->tcf_lock);
 	if (ret != ACT_P_CREATED) {
 		ipt_destroy_target(ipt->tcfi_t);
@@ -169,7 +176,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 	ipt->tcfi_hook  = hook;
 	spin_unlock_bh(&ipt->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 	return ret;
 
 err3:
@@ -178,33 +185,33 @@ err2:
 	kfree(tname);
 err1:
 	if (ret == ACT_P_CREATED)
-		tcf_hash_cleanup(a, est);
+		tcf_hash_cleanup(*a, est);
 	return err;
 }
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
-			struct nlattr *est, struct tc_action *a, int ovr,
+			struct nlattr *est, struct tc_action **a, int ovr,
 			int bind)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return __tcf_ipt_init(tn, nla, est, a, ovr, bind);
+	return __tcf_ipt_init(tn, nla, est, a, &act_ipt_ops, ovr, bind);
 }
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
-		       struct nlattr *est, struct tc_action *a, int ovr,
+		       struct nlattr *est, struct tc_action **a, int ovr,
 		       int bind)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return __tcf_ipt_init(tn, nla, est, a, ovr, bind);
+	return __tcf_ipt_init(tn, nla, est, a, &act_xt_ops, ovr, bind);
 }
 
 static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 		   struct tcf_result *res)
 {
 	int ret = 0, result = 0;
-	struct tcf_ipt *ipt = a->priv;
+	struct tcf_ipt *ipt = to_ipt(a);
 	struct xt_action_param par;
 
 	if (skb_unclone(skb, GFP_ATOMIC))
@@ -212,7 +219,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 
 	spin_lock(&ipt->tcf_lock);
 
-	ipt->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&ipt->tcf_tm);
 	bstats_update(&ipt->tcf_bstats, skb);
 
 	/* yes, we have to worry about both in and out dev
@@ -242,7 +249,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	default:
 		net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n",
 				       ret);
-		result = TC_POLICE_OK;
+		result = TC_ACT_OK;
 		break;
 	}
 	spin_unlock(&ipt->tcf_lock);
@@ -250,10 +257,11 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 
 }
 
-static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+			int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_ipt *ipt = a->priv;
+	struct tcf_ipt *ipt = to_ipt(a);
 	struct xt_entry_target *t;
 	struct tcf_t tm;
 	struct tc_cnt c;
@@ -277,11 +285,11 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	    nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) ||
 	    nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname))
 		goto nla_put_failure;
-	tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
-	tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
-	tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
+
+	tcf_tm_dump(&tm, &ipt->tcf_tm);
 	if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
 		goto nla_put_failure;
+
 	kfree(t);
 	return skb->len;
 
@@ -293,14 +301,14 @@ nla_put_failure:
 
 static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  struct tc_action *a)
+			  const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_ipt_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
@@ -317,6 +325,7 @@ static struct tc_action_ops act_ipt_ops = {
 	.init		=	tcf_ipt_init,
 	.walk		=	tcf_ipt_walker,
 	.lookup		=	tcf_ipt_search,
+	.size		=	sizeof(struct tcf_ipt),
 };
 
 static __net_init int ipt_init_net(struct net *net)
@@ -342,14 +351,14 @@ static struct pernet_operations ipt_net_ops = {
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
 			 struct netlink_callback *cb, int type,
-			 struct tc_action *a)
+			 const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_xt_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
@@ -366,6 +375,7 @@ static struct tc_action_ops act_xt_ops = {
 	.init		=	tcf_xt_init,
 	.walk		=	tcf_xt_walker,
 	.lookup		=	tcf_xt_search,
+	.size		=	sizeof(struct tcf_ipt),
 };
 
 static __net_init int xt_init_net(struct net *net)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 128942bc9e42..6038c85d92f5 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -52,9 +52,10 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
 };
 
 static int mirred_net_id;
+static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
-			   struct nlattr *est, struct tc_action *a, int ovr,
+			   struct nlattr *est, struct tc_action **a, int ovr,
 			   int bind)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
@@ -62,7 +63,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	struct tc_mirred *parm;
 	struct tcf_mirred *m;
 	struct net_device *dev;
-	int ret, ok_push = 0, exists = 0;
+	int ret, ok_push = 0;
+	bool exists = false;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -83,14 +85,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		break;
 	default:
 		if (exists)
-			tcf_hash_release(a, bind);
+			tcf_hash_release(*a, bind);
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
 		dev = __dev_get_by_index(net, parm->ifindex);
 		if (dev == NULL) {
 			if (exists)
-				tcf_hash_release(a, bind);
+				tcf_hash_release(*a, bind);
 			return -ENODEV;
 		}
 		switch (dev->type) {
@@ -114,16 +116,16 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		if (dev == NULL)
 			return -EINVAL;
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*m), bind, true);
+				      &act_mirred_ops, bind, true);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
-	m = to_mirred(a);
+	m = to_mirred(*a);
 
 	ASSERT_RTNL();
 	m->tcf_action = parm->action;
@@ -141,7 +143,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		spin_lock_bh(&mirred_list_lock);
 		list_add(&m->tcfm_list, &mirred_list);
 		spin_unlock_bh(&mirred_list_lock);
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 	}
 
 	return ret;
@@ -150,14 +152,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 		      struct tcf_result *res)
 {
-	struct tcf_mirred *m = a->priv;
+	struct tcf_mirred *m = to_mirred(a);
 	struct net_device *dev;
 	struct sk_buff *skb2;
 	int retval, err;
 	u32 at;
 
 	tcf_lastuse_update(&m->tcf_tm);
-
 	bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
 
 	rcu_read_lock();
@@ -181,7 +182,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
-			skb_push(skb2, skb->mac_len);
+			skb_push_rcsum(skb2, skb->mac_len);
 	}
 
 	/* mirror is always swallowed */
@@ -206,7 +207,7 @@ out:
 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_mirred *m = a->priv;
+	struct tcf_mirred *m = to_mirred(a);
 	struct tc_mirred opt = {
 		.index   = m->tcf_index,
 		.action  = m->tcf_action,
@@ -219,9 +220,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
 
 	if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-	t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
+
+	tcf_tm_dump(&t, &m->tcf_tm);
 	if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
 		goto nla_put_failure;
 	return skb->len;
@@ -233,14 +233,14 @@ nla_put_failure:
 
 static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     struct tc_action *a)
+			     const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_mirred_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
@@ -285,6 +285,7 @@ static struct tc_action_ops act_mirred_ops = {
 	.init		=	tcf_mirred_init,
 	.walk		=	tcf_mirred_walker,
 	.lookup		=	tcf_mirred_search,
+	.size		=	sizeof(struct tcf_mirred),
 };
 
 static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c0a879f940de..8e8b0cc30704 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -32,13 +32,14 @@
 #define NAT_TAB_MASK	15
 
 static int nat_net_id;
+static struct tc_action_ops act_nat_ops;
 
 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
 };
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-			struct tc_action *a, int ovr, int bind)
+			struct tc_action **a, int ovr, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -59,18 +60,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 
 	if (!tcf_hash_check(tn, parm->index, a, bind)) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*p), bind, false);
+				      &act_nat_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)
 			return 0;
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
-	p = to_tcf_nat(a);
+	p = to_tcf_nat(*a);
 
 	spin_lock_bh(&p->tcf_lock);
 	p->old_addr = parm->old_addr;
@@ -82,7 +83,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 
 	return ret;
 }
@@ -90,7 +91,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
 		   struct tcf_result *res)
 {
-	struct tcf_nat *p = a->priv;
+	struct tcf_nat *p = to_tcf_nat(a);
 	struct iphdr *iph;
 	__be32 old_addr;
 	__be32 new_addr;
@@ -103,7 +104,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
 
 	spin_lock(&p->tcf_lock);
 
-	p->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&p->tcf_tm);
 	old_addr = p->old_addr;
 	new_addr = p->new_addr;
 	mask = p->mask;
@@ -248,7 +249,7 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 			int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_nat *p = a->priv;
+	struct tcf_nat *p = to_tcf_nat(a);
 	struct tc_nat opt = {
 		.old_addr = p->old_addr,
 		.new_addr = p->new_addr,
@@ -264,9 +265,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 
 	if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+	tcf_tm_dump(&t, &p->tcf_tm);
 	if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
 		goto nla_put_failure;
 
@@ -279,14 +279,14 @@ nla_put_failure:
 
 static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  struct tc_action *a)
+			  const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_nat_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
@@ -302,6 +302,7 @@ static struct tc_action_ops act_nat_ops = {
 	.init		=	tcf_nat_init,
 	.walk		=	tcf_nat_walker,
 	.lookup		=	tcf_nat_search,
+	.size		=	sizeof(struct tcf_nat),
 };
 
 static __net_init int nat_init_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c6e18f230af6..b54d56d4959b 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -26,13 +26,14 @@
 #define PEDIT_TAB_MASK	15
 
 static int pedit_net_id;
+static struct tc_action_ops act_pedit_ops;
 
 static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
 	[TCA_PEDIT_PARMS]	= { .len = sizeof(struct tc_pedit) },
 };
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
-			  struct nlattr *est, struct tc_action *a,
+			  struct nlattr *est, struct tc_action **a,
 			  int ovr, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
@@ -61,23 +62,23 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 		if (!parm->nkeys)
 			return -EINVAL;
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*p), bind, false);
+				      &act_pedit_ops, bind, false);
 		if (ret)
 			return ret;
-		p = to_pedit(a);
+		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			tcf_hash_cleanup(a, est);
+			tcf_hash_cleanup(*a, est);
 			return -ENOMEM;
 		}
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)
 			return 0;
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
-		p = to_pedit(a);
+		p = to_pedit(*a);
 		if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
 			keys = kmalloc(ksize, GFP_KERNEL);
 			if (keys == NULL)
@@ -96,13 +97,13 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	memcpy(p->tcfp_keys, parm->keys, ksize);
 	spin_unlock_bh(&p->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 	return ret;
 }
 
 static void tcf_pedit_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_pedit *p = a->priv;
+	struct tcf_pedit *p = to_pedit(a);
 	struct tc_pedit_key *keys = p->tcfp_keys;
 	kfree(keys);
 }
@@ -110,7 +111,7 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind)
 static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 		     struct tcf_result *res)
 {
-	struct tcf_pedit *p = a->priv;
+	struct tcf_pedit *p = to_pedit(a);
 	int i;
 	unsigned int off;
 
@@ -121,7 +122,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 
 	spin_lock(&p->tcf_lock);
 
-	p->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&p->tcf_tm);
 
 	if (p->tcfp_nkeys > 0) {
 		struct tc_pedit_key *tkey = p->tcfp_keys;
@@ -177,7 +178,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 			  int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_pedit *p = a->priv;
+	struct tcf_pedit *p = to_pedit(a);
 	struct tc_pedit *opt;
 	struct tcf_t t;
 	int s;
@@ -200,11 +201,11 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 
 	if (nla_put(skb, TCA_PEDIT_PARMS, s, opt))
 		goto nla_put_failure;
-	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+	tcf_tm_dump(&t, &p->tcf_tm);
 	if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD))
 		goto nla_put_failure;
+
 	kfree(opt);
 	return skb->len;
 
@@ -216,14 +217,14 @@ nla_put_failure:
 
 static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
 			    struct netlink_callback *cb, int type,
-			    struct tc_action *a)
+			    const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_pedit_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
@@ -240,6 +241,7 @@ static struct tc_action_ops act_pedit_ops = {
 	.init		=	tcf_pedit_init,
 	.walk		=	tcf_pedit_walker,
 	.lookup		=	tcf_pedit_search,
+	.size		=	sizeof(struct tcf_pedit),
 };
 
 static __net_init int pedit_init_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c557789765dc..b3c7e975fc9e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -23,7 +23,7 @@
 #include <net/netlink.h>
 
 struct tcf_police {
-	struct tcf_common	common;
+	struct tc_action	common;
 	int			tcfp_result;
 	u32			tcfp_ewma_rate;
 	s64			tcfp_burst;
@@ -37,8 +37,8 @@ struct tcf_police {
 	struct psched_ratecfg	peak;
 	bool			peak_present;
 };
-#define to_police(pc)	\
-	container_of(pc->priv, struct tcf_police, common)
+
+#define to_police(pc) ((struct tcf_police *)pc)
 
 #define POL_TAB_MASK     15
 
@@ -56,15 +56,14 @@ struct tc_police_compat {
 /* Each policer is serialized by its individual spinlock */
 
 static int police_net_id;
+static struct tc_action_ops act_police_ops;
 
 static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
 				 struct netlink_callback *cb, int type,
-				 struct tc_action *a)
+				 const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct hlist_head *head;
-	struct tcf_common *p;
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	struct nlattr *nest;
 
@@ -73,21 +72,22 @@ static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
 	s_i = cb->args[0];
 
 	for (i = 0; i < (POL_TAB_MASK + 1); i++) {
+		struct hlist_head *head;
+		struct tc_action *p;
+
 		head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)];
 
-		hlist_for_each_entry_rcu(p, head, tcfc_head) {
+		hlist_for_each_entry_rcu(p, head, tcfa_head) {
 			index++;
 			if (index < s_i)
 				continue;
-			a->priv = p;
-			a->order = index;
-			nest = nla_nest_start(skb, a->order);
+			nest = nla_nest_start(skb, index);
 			if (nest == NULL)
 				goto nla_put_failure;
 			if (type == RTM_DELACTION)
-				err = tcf_action_dump_1(skb, a, 0, 1);
+				err = tcf_action_dump_1(skb, p, 0, 1);
 			else
-				err = tcf_action_dump_1(skb, a, 0, 0);
+				err = tcf_action_dump_1(skb, p, 0, 0);
 			if (err < 0) {
 				index--;
 				nla_nest_cancel(skb, nest);
@@ -115,9 +115,9 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
 };
 
-static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
-				 struct nlattr *est, struct tc_action *a,
-				 int ovr, int bind)
+static int tcf_act_police_init(struct net *net, struct nlattr *nla,
+			       struct nlattr *est, struct tc_action **a,
+			       int ovr, int bind)
 {
 	int ret = 0, err;
 	struct nlattr *tb[TCA_POLICE_MAX + 1];
@@ -142,13 +142,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
 	parm = nla_data(tb[TCA_POLICE_TBF]);
 
 	if (parm->index) {
-		if (tcf_hash_search(tn, a, parm->index)) {
-			police = to_police(a);
-			if (bind) {
-				police->tcf_bindcnt += 1;
-				police->tcf_refcnt += 1;
-				return 0;
-			}
+		if (tcf_hash_check(tn, parm->index, a, bind)) {
 			if (ovr)
 				goto override;
 			/* not replacing */
@@ -156,14 +150,14 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
 		}
 	} else {
 		ret = tcf_hash_create(tn, parm->index, NULL, a,
-				      sizeof(*police), bind, false);
+				      &act_police_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	}
 
-	police = to_police(a);
 override:
+	police = to_police(*a);
 	if (parm->rate.rate) {
 		err = -ENOMEM;
 		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
@@ -182,7 +176,8 @@ override:
 	if (est) {
 		err = gen_replace_estimator(&police->tcf_bstats, NULL,
 					    &police->tcf_rate_est,
-					    &police->tcf_lock, est);
+					    &police->tcf_lock,
+					    NULL, est);
 		if (err)
 			goto failure_unlock;
 	} else if (tb[TCA_POLICE_AVRATE] &&
@@ -234,7 +229,7 @@ override:
 		return ret;
 
 	police->tcfp_t_c = ktime_get_ns();
-	tcf_hash_insert(tn, a);
+	tcf_hash_insert(tn, *a);
 
 	return ret;
 
@@ -244,14 +239,14 @@ failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_cleanup(a, est);
+		tcf_hash_cleanup(*a, est);
 	return err;
 }
 
 static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
-	struct tcf_police *police = a->priv;
+	struct tcf_police *police = to_police(a);
 	s64 now;
 	s64 toks;
 	s64 ptoks = 0;
@@ -310,7 +305,7 @@ static int
 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_police *police = a->priv;
+	struct tcf_police *police = to_police(a);
 	struct tc_police opt = {
 		.index = police->tcf_index,
 		.action = police->tcf_action,
@@ -336,6 +331,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 
 	t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse);
+	t.firstuse = jiffies_to_clock_t(jiffies - police->tcf_tm.firstuse);
 	t.expires = jiffies_to_clock_t(police->tcf_tm.expires);
 	if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
 		goto nla_put_failure;
@@ -347,7 +343,7 @@ nla_put_failure:
 	return -1;
 }
 
-static int tcf_police_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
@@ -364,9 +360,10 @@ static struct tc_action_ops act_police_ops = {
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_act_police,
 	.dump		=	tcf_act_police_dump,
-	.init		=	tcf_act_police_locate,
+	.init		=	tcf_act_police_init,
 	.walk		=	tcf_act_police_walker,
 	.lookup		=	tcf_police_search,
+	.size		=	sizeof(struct tcf_police),
 };
 
 static __net_init int police_init_net(struct net *net)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e42f8daca147..289af6f9bb3b 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -27,15 +27,16 @@
 #define SIMP_TAB_MASK     7
 
 static int simp_net_id;
+static struct tc_action_ops act_simp_ops;
 
 #define SIMP_MAX_DATA	32
 static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
 		    struct tcf_result *res)
 {
-	struct tcf_defact *d = a->priv;
+	struct tcf_defact *d = to_defact(a);
 
 	spin_lock(&d->tcf_lock);
-	d->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&d->tcf_tm);
 	bstats_update(&d->tcf_bstats, skb);
 
 	/* print policy string followed by _ then packet count
@@ -79,15 +80,16 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 };
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
-			 struct nlattr *est, struct tc_action *a,
+			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
 	struct tc_defact *parm;
 	struct tcf_defact *d;
+	bool exists = false;
+	int ret = 0, err;
 	char *defdata;
-	int ret = 0, err, exists = 0;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -99,7 +101,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	if (tb[TCA_DEF_PARMS] == NULL)
 		return -EINVAL;
 
-
 	parm = nla_data(tb[TCA_DEF_PARMS]);
 	exists = tcf_hash_check(tn, parm->index, a, bind);
 	if (exists && bind)
@@ -107,7 +108,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 
 	if (tb[TCA_DEF_DATA] == NULL) {
 		if (exists)
-			tcf_hash_release(a, bind);
+			tcf_hash_release(*a, bind);
 		return -EINVAL;
 	}
 
@@ -115,22 +116,22 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 
 	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*d), bind, false);
+				      &act_simp_ops, bind, false);
 		if (ret)
 			return ret;
 
-		d = to_defact(a);
+		d = to_defact(*a);
 		ret = alloc_defdata(d, defdata);
 		if (ret < 0) {
-			tcf_hash_cleanup(a, est);
+			tcf_hash_cleanup(*a, est);
 			return ret;
 		}
 		d->tcf_action = parm->action;
 		ret = ACT_P_CREATED;
 	} else {
-		d = to_defact(a);
+		d = to_defact(*a);
 
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 
@@ -138,7 +139,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 	return ret;
 }
 
@@ -146,7 +147,7 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 			 int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_defact *d = a->priv;
+	struct tcf_defact *d = to_defact(a);
 	struct tc_defact opt = {
 		.index   = d->tcf_index,
 		.refcnt  = d->tcf_refcnt - ref,
@@ -158,9 +159,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 	if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt) ||
 	    nla_put_string(skb, TCA_DEF_DATA, d->tcfd_defdata))
 		goto nla_put_failure;
-	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+
+	tcf_tm_dump(&t, &d->tcf_tm);
 	if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
 		goto nla_put_failure;
 	return skb->len;
@@ -172,14 +172,14 @@ nla_put_failure:
 
 static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   struct tc_action *a)
+			   const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_simp_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
@@ -196,6 +196,7 @@ static struct tc_action_ops act_simp_ops = {
 	.init		=	tcf_simp_init,
 	.walk		=	tcf_simp_walker,
 	.lookup		=	tcf_simp_search,
+	.size		=	sizeof(struct tcf_defact),
 };
 
 static __net_init int simp_init_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index e928802966bc..a133dcb82132 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -30,14 +30,15 @@
 #define SKBEDIT_TAB_MASK     15
 
 static int skbedit_net_id;
+static struct tc_action_ops act_skbedit_ops;
 
 static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 		       struct tcf_result *res)
 {
-	struct tcf_skbedit *d = a->priv;
+	struct tcf_skbedit *d = to_skbedit(a);
 
 	spin_lock(&d->tcf_lock);
-	d->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&d->tcf_tm);
 	bstats_update(&d->tcf_bstats, skb);
 
 	if (d->flags & SKBEDIT_F_PRIORITY)
@@ -47,6 +48,8 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 		skb_set_queue_mapping(skb, d->queue_mapping);
 	if (d->flags & SKBEDIT_F_MARK)
 		skb->mark = d->mark;
+	if (d->flags & SKBEDIT_F_PTYPE)
+		skb->pkt_type = d->ptype;
 
 	spin_unlock(&d->tcf_lock);
 	return d->tcf_action;
@@ -57,10 +60,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_PRIORITY]		= { .len = sizeof(u32) },
 	[TCA_SKBEDIT_QUEUE_MAPPING]	= { .len = sizeof(u16) },
 	[TCA_SKBEDIT_MARK]		= { .len = sizeof(u32) },
+	[TCA_SKBEDIT_PTYPE]		= { .len = sizeof(u16) },
 };
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
-			    struct nlattr *est, struct tc_action *a,
+			    struct nlattr *est, struct tc_action **a,
 			    int ovr, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
@@ -68,8 +72,9 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	struct tc_skbedit *parm;
 	struct tcf_skbedit *d;
 	u32 flags = 0, *priority = NULL, *mark = NULL;
-	u16 *queue_mapping = NULL;
-	int ret = 0, err, exists = 0;
+	u16 *queue_mapping = NULL, *ptype = NULL;
+	bool exists = false;
+	int ret = 0, err;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -91,6 +96,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
 	}
 
+	if (tb[TCA_SKBEDIT_PTYPE] != NULL) {
+		ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]);
+		if (!skb_pkt_type_ok(*ptype))
+			return -EINVAL;
+		flags |= SKBEDIT_F_PTYPE;
+	}
+
 	if (tb[TCA_SKBEDIT_MARK] != NULL) {
 		flags |= SKBEDIT_F_MARK;
 		mark = nla_data(tb[TCA_SKBEDIT_MARK]);
@@ -103,21 +115,21 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		return 0;
 
 	if (!flags) {
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*d), bind, false);
+				      &act_skbedit_ops, bind, false);
 		if (ret)
 			return ret;
 
-		d = to_skbedit(a);
+		d = to_skbedit(*a);
 		ret = ACT_P_CREATED;
 	} else {
-		d = to_skbedit(a);
-		tcf_hash_release(a, bind);
+		d = to_skbedit(*a);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -131,13 +143,15 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		d->queue_mapping = *queue_mapping;
 	if (flags & SKBEDIT_F_MARK)
 		d->mark = *mark;
+	if (flags & SKBEDIT_F_PTYPE)
+		d->ptype = *ptype;
 
 	d->tcf_action = parm->action;
 
 	spin_unlock_bh(&d->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 	return ret;
 }
 
@@ -145,7 +159,7 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 			    int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_skbedit *d = a->priv;
+	struct tcf_skbedit *d = to_skbedit(a);
 	struct tc_skbedit opt = {
 		.index   = d->tcf_index,
 		.refcnt  = d->tcf_refcnt - ref,
@@ -157,20 +171,19 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 	if ((d->flags & SKBEDIT_F_PRIORITY) &&
-	    nla_put(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
-		    &d->priority))
+	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority))
 		goto nla_put_failure;
 	if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
-	    nla_put(skb, TCA_SKBEDIT_QUEUE_MAPPING,
-		    sizeof(d->queue_mapping), &d->queue_mapping))
+	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping))
 		goto nla_put_failure;
 	if ((d->flags & SKBEDIT_F_MARK) &&
-	    nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
-		    &d->mark))
+	    nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark))
 		goto nla_put_failure;
-	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+	if ((d->flags & SKBEDIT_F_PTYPE) &&
+	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &d->tcf_tm);
 	if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
 		goto nla_put_failure;
 	return skb->len;
@@ -182,14 +195,14 @@ nla_put_failure:
 
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 			      struct netlink_callback *cb, int type,
-			      struct tc_action *a)
+			      const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_skbedit_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
@@ -205,6 +218,7 @@ static struct tc_action_ops act_skbedit_ops = {
 	.init		=	tcf_skbedit_init,
 	.walk		=	tcf_skbedit_walker,
 	.lookup		=	tcf_skbedit_search,
+	.size		=	sizeof(struct tcf_skbedit),
 };
 
 static __net_init int skbedit_init_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ac4adc812c12..691409de3e1a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -22,16 +22,17 @@
 #define VLAN_TAB_MASK     15
 
 static int vlan_net_id;
+static struct tc_action_ops act_vlan_ops;
 
 static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 		    struct tcf_result *res)
 {
-	struct tcf_vlan *v = a->priv;
+	struct tcf_vlan *v = to_vlan(a);
 	int action;
 	int err;
 
 	spin_lock(&v->tcf_lock);
-	v->tcf_tm.lastuse = jiffies;
+	tcf_lastuse_update(&v->tcf_tm);
 	bstats_update(&v->tcf_bstats, skb);
 	action = v->tcf_action;
 
@@ -67,7 +68,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 };
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
-			 struct nlattr *est, struct tc_action *a,
+			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
@@ -77,8 +78,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	int action;
 	__be16 push_vid = 0;
 	__be16 push_proto = 0;
-	int ret = 0, exists = 0;
-	int err;
+	bool exists = false;
+	int ret = 0, err;
 
 	if (!nla)
 		return -EINVAL;
@@ -100,13 +101,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	case TCA_VLAN_ACT_PUSH:
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
-				tcf_hash_release(a, bind);
+				tcf_hash_release(*a, bind);
 			return -EINVAL;
 		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
 		if (push_vid >= VLAN_VID_MASK) {
 			if (exists)
-				tcf_hash_release(a, bind);
+				tcf_hash_release(*a, bind);
 			return -ERANGE;
 		}
 
@@ -125,25 +126,25 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		break;
 	default:
 		if (exists)
-			tcf_hash_release(a, bind);
+			tcf_hash_release(*a, bind);
 		return -EINVAL;
 	}
 	action = parm->v_action;
 
 	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
-				      sizeof(*v), bind, false);
+				      &act_vlan_ops, bind, false);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(a, bind);
+		tcf_hash_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
 
-	v = to_vlan(a);
+	v = to_vlan(*a);
 
 	spin_lock_bh(&v->tcf_lock);
 
@@ -156,7 +157,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&v->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, a);
+		tcf_hash_insert(tn, *a);
 	return ret;
 }
 
@@ -164,7 +165,7 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 			 int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_vlan *v = a->priv;
+	struct tcf_vlan *v = to_vlan(a);
 	struct tc_vlan opt = {
 		.index    = v->tcf_index,
 		.refcnt   = v->tcf_refcnt - ref,
@@ -179,12 +180,11 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 
 	if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
 	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
-	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto)))
+	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
+			  v->tcfv_push_proto)))
 		goto nla_put_failure;
 
-	t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(v->tcf_tm.expires);
+	tcf_tm_dump(&t, &v->tcf_tm);
 	if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD))
 		goto nla_put_failure;
 	return skb->len;
@@ -196,14 +196,14 @@ nla_put_failure:
 
 static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   struct tc_action *a)
+			   const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, a);
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_vlan_search(struct net *net, struct tc_action *a, u32 index)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
@@ -219,6 +219,7 @@ static struct tc_action_ops act_vlan_ops = {
 	.init		=	tcf_vlan_init,
 	.walk		=	tcf_vlan_walker,
 	.lookup		=	tcf_vlan_search,
+	.size		=	sizeof(struct tcf_vlan),
 };
 
 static __net_init int vlan_init_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a75864d93142..843a716a4303 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -103,6 +103,17 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 			  struct nlmsghdr *n, struct tcf_proto *tp,
 			  unsigned long fh, int event);
 
+static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+				 struct nlmsghdr *n,
+				 struct tcf_proto __rcu **chain, int event)
+{
+	struct tcf_proto __rcu **it_chain;
+	struct tcf_proto *tp;
+
+	for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL;
+	     it_chain = &tp->next)
+		tfilter_notify(net, oskb, n, tp, 0, event);
+}
 
 /* Select new prio value from the range, managed by kernel. */
 
@@ -156,11 +167,23 @@ replay:
 	cl = 0;
 
 	if (prio == 0) {
-		/* If no priority is given, user wants we allocated it. */
-		if (n->nlmsg_type != RTM_NEWTFILTER ||
-		    !(n->nlmsg_flags & NLM_F_CREATE))
+		switch (n->nlmsg_type) {
+		case RTM_DELTFILTER:
+			if (protocol || t->tcm_handle || tca[TCA_KIND])
+				return -ENOENT;
+			break;
+		case RTM_NEWTFILTER:
+			/* If no priority is provided by the user,
+			 * we allocate one.
+			 */
+			if (n->nlmsg_flags & NLM_F_CREATE) {
+				prio = TC_H_MAKE(0x80000000U, 0U);
+				break;
+			}
+			/* fall-through */
+		default:
 			return -ENOENT;
-		prio = TC_H_MAKE(0x80000000U, 0U);
+		}
 	}
 
 	/* Find head of filter chain. */
@@ -200,6 +223,12 @@ replay:
 	err = -EINVAL;
 	if (chain == NULL)
 		goto errout;
+	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
+		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
+		tcf_destroy_chain(chain);
+		err = 0;
+		goto errout;
+	}
 
 	/* Check the chain for existence of proto-tcf with this priority */
 	for (back = chain;
@@ -351,8 +380,9 @@ errout:
 	return err;
 }
 
-static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp,
-			 unsigned long fh, u32 portid, u32 seq, u16 flags, int event)
+static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+			 struct tcf_proto *tp, unsigned long fh, u32 portid,
+			 u32 seq, u16 flags, int event)
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
@@ -474,9 +504,11 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
 			continue;
 		if (t > s_t)
-			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+			memset(&cb->args[1], 0,
+			       sizeof(cb->args)-sizeof(cb->args[0]));
 		if (cb->args[1] == 0) {
-			if (tcf_fill_node(net, skb, tp, 0, NETLINK_CB(cb->skb).portid,
+			if (tcf_fill_node(net, skb, tp, 0,
+					  NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					  RTM_NEWTFILTER) <= 0)
 				break;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 7b342c779da7..c3002c2c68bb 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -272,15 +272,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 
 	bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
 
-	fp = bpf_prog_get(bpf_fd);
+	fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
-	if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
-		bpf_prog_put(fp);
-		return -EINVAL;
-	}
-
 	if (tb[TCA_BPF_NAME]) {
 		name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
 			       nla_len(tb[TCA_BPF_NAME]),
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b3b7978f4182..5060801a2f6d 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -66,6 +66,7 @@ struct cls_fl_filter {
 	struct fl_flow_key key;
 	struct list_head list;
 	u32 handle;
+	u32 flags;
 	struct rcu_head	rcu;
 };
 
@@ -123,6 +124,9 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct fl_flow_key skb_key;
 	struct fl_flow_key skb_mkey;
 
+	if (!atomic_read(&head->ht.nelems))
+		return -1;
+
 	fl_clear_masked_range(&skb_key, &head->mask);
 	skb_key.indev_ifindex = skb->skb_iif;
 	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
@@ -136,7 +140,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	f = rhashtable_lookup_fast(&head->ht,
 				   fl_key_get_start(&skb_mkey, &head->mask),
 				   head->ht_params);
-	if (f) {
+	if (f && !tc_skip_sw(f->flags)) {
 		*res = f->res;
 		return tcf_exts_exec(skb, &f->exts, res);
 	}
@@ -183,19 +187,20 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
 	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
 }
 
-static void fl_hw_replace_filter(struct tcf_proto *tp,
-				 struct flow_dissector *dissector,
-				 struct fl_flow_key *mask,
-				 struct fl_flow_key *key,
-				 struct tcf_exts *actions,
-				 unsigned long cookie, u32 flags)
+static int fl_hw_replace_filter(struct tcf_proto *tp,
+				struct flow_dissector *dissector,
+				struct fl_flow_key *mask,
+				struct fl_flow_key *key,
+				struct tcf_exts *actions,
+				unsigned long cookie, u32 flags)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_flower_offload offload = {0};
 	struct tc_to_netdev tc;
+	int err;
 
 	if (!tc_should_offload(dev, tp, flags))
-		return;
+		return tc_skip_sw(flags) ? -EINVAL : 0;
 
 	offload.command = TC_CLSFLOWER_REPLACE;
 	offload.cookie = cookie;
@@ -207,7 +212,12 @@ static void fl_hw_replace_filter(struct tcf_proto *tp,
 	tc.type = TC_SETUP_CLSFLOWER;
 	tc.cls_flower = &offload;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+
+	if (tc_skip_sw(flags))
+		return err;
+
+	return 0;
 }
 
 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
@@ -524,7 +534,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	struct cls_fl_filter *fnew;
 	struct nlattr *tb[TCA_FLOWER_MAX + 1];
 	struct fl_flow_mask mask = {};
-	u32 flags = 0;
 	int err;
 
 	if (!tca[TCA_OPTIONS])
@@ -552,8 +561,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	}
 	fnew->handle = handle;
 
-	if (tb[TCA_FLOWER_FLAGS])
-		flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+	if (tb[TCA_FLOWER_FLAGS]) {
+		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+
+		if (!tc_flags_valid(fnew->flags)) {
+			err = -EINVAL;
+			goto errout;
+		}
+	}
 
 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
 	if (err)
@@ -563,19 +578,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto errout;
 
-	err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
-				     head->ht_params);
+	if (!tc_skip_sw(fnew->flags)) {
+		err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+					     head->ht_params);
+		if (err)
+			goto errout;
+	}
+
+	err = fl_hw_replace_filter(tp,
+				   &head->dissector,
+				   &mask.key,
+				   &fnew->key,
+				   &fnew->exts,
+				   (unsigned long)fnew,
+				   fnew->flags);
 	if (err)
 		goto errout;
 
-	fl_hw_replace_filter(tp,
-			     &head->dissector,
-			     &mask.key,
-			     &fnew->key,
-			     &fnew->exts,
-			     (unsigned long)fnew,
-			     flags);
-
 	if (fold) {
 		rhashtable_remove_fast(&head->ht, &fold->ht_node,
 				       head->ht_params);
@@ -734,6 +753,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 				  sizeof(key->tp.dst))))
 		goto nla_put_failure;
 
+	nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
+
 	if (tcf_exts_dump(skb, &f->exts))
 		goto nla_put_failure;
 
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
new file mode 100644
index 000000000000..25927b6c4436
--- /dev/null
+++ b/net/sched/cls_matchall.c
@@ -0,0 +1,318 @@
+/*
+ * net/sched/cls_matchll.c		Match-all classifier
+ *
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+
+struct cls_mall_filter {
+	struct tcf_exts exts;
+	struct tcf_result res;
+	u32 handle;
+	struct rcu_head	rcu;
+	u32 flags;
+};
+
+struct cls_mall_head {
+	struct cls_mall_filter *filter;
+	struct rcu_head	rcu;
+};
+
+static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+			 struct tcf_result *res)
+{
+	struct cls_mall_head *head = rcu_dereference_bh(tp->root);
+	struct cls_mall_filter *f = head->filter;
+
+	if (tc_skip_sw(f->flags))
+		return -1;
+
+	return tcf_exts_exec(skb, &f->exts, res);
+}
+
+static int mall_init(struct tcf_proto *tp)
+{
+	struct cls_mall_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (!head)
+		return -ENOBUFS;
+
+	rcu_assign_pointer(tp->root, head);
+
+	return 0;
+}
+
+static void mall_destroy_filter(struct rcu_head *head)
+{
+	struct cls_mall_filter *f = container_of(head, struct cls_mall_filter, rcu);
+
+	tcf_exts_destroy(&f->exts);
+
+	kfree(f);
+}
+
+static int mall_replace_hw_filter(struct tcf_proto *tp,
+				  struct cls_mall_filter *f,
+				  unsigned long cookie)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_to_netdev offload;
+	struct tc_cls_matchall_offload mall_offload = {0};
+
+	offload.type = TC_SETUP_MATCHALL;
+	offload.cls_mall = &mall_offload;
+	offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
+	offload.cls_mall->exts = &f->exts;
+	offload.cls_mall->cookie = cookie;
+
+	return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+					     &offload);
+}
+
+static void mall_destroy_hw_filter(struct tcf_proto *tp,
+				   struct cls_mall_filter *f,
+				   unsigned long cookie)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_to_netdev offload;
+	struct tc_cls_matchall_offload mall_offload = {0};
+
+	offload.type = TC_SETUP_MATCHALL;
+	offload.cls_mall = &mall_offload;
+	offload.cls_mall->command = TC_CLSMATCHALL_DESTROY;
+	offload.cls_mall->exts = NULL;
+	offload.cls_mall->cookie = cookie;
+
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+					     &offload);
+}
+
+static bool mall_destroy(struct tcf_proto *tp, bool force)
+{
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct cls_mall_filter *f = head->filter;
+
+	if (!force && f)
+		return false;
+
+	if (f) {
+		if (tc_should_offload(dev, tp, f->flags))
+			mall_destroy_hw_filter(tp, f, (unsigned long) f);
+
+		call_rcu(&f->rcu, mall_destroy_filter);
+	}
+	RCU_INIT_POINTER(tp->root, NULL);
+	kfree_rcu(head, rcu);
+	return true;
+}
+
+static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
+{
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+	struct cls_mall_filter *f = head->filter;
+
+	if (f && f->handle == handle)
+		return (unsigned long) f;
+	return 0;
+}
+
+static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
+	[TCA_MATCHALL_UNSPEC]		= { .type = NLA_UNSPEC },
+	[TCA_MATCHALL_CLASSID]		= { .type = NLA_U32 },
+};
+
+static int mall_set_parms(struct net *net, struct tcf_proto *tp,
+			  struct cls_mall_filter *f,
+			  unsigned long base, struct nlattr **tb,
+			  struct nlattr *est, bool ovr)
+{
+	struct tcf_exts e;
+	int err;
+
+	tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_MATCHALL_CLASSID]) {
+		f->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+}
+
+static int mall_change(struct net *net, struct sk_buff *in_skb,
+		       struct tcf_proto *tp, unsigned long base,
+		       u32 handle, struct nlattr **tca,
+		       unsigned long *arg, bool ovr)
+{
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+	struct cls_mall_filter *fold = (struct cls_mall_filter *) *arg;
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct cls_mall_filter *f;
+	struct nlattr *tb[TCA_MATCHALL_MAX + 1];
+	u32 flags = 0;
+	int err;
+
+	if (!tca[TCA_OPTIONS])
+		return -EINVAL;
+
+	if (head->filter)
+		return -EBUSY;
+
+	if (fold)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_MATCHALL_MAX,
+			       tca[TCA_OPTIONS], mall_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_MATCHALL_FLAGS]) {
+		flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
+		if (!tc_flags_valid(flags))
+			return -EINVAL;
+	}
+
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return -ENOBUFS;
+
+	tcf_exts_init(&f->exts, TCA_MATCHALL_ACT, 0);
+
+	if (!handle)
+		handle = 1;
+	f->handle = handle;
+	f->flags = flags;
+
+	err = mall_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
+	if (err)
+		goto errout;
+
+	if (tc_should_offload(dev, tp, flags)) {
+		err = mall_replace_hw_filter(tp, f, (unsigned long) f);
+		if (err) {
+			if (tc_skip_sw(flags))
+				goto errout;
+			else
+				err = 0;
+		}
+	}
+
+	*arg = (unsigned long) f;
+	rcu_assign_pointer(head->filter, f);
+
+	return 0;
+
+errout:
+	kfree(f);
+	return err;
+}
+
+static int mall_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+	struct cls_mall_filter *f = (struct cls_mall_filter *) arg;
+	struct net_device *dev = tp->q->dev_queue->dev;
+
+	if (tc_should_offload(dev, tp, f->flags))
+		mall_destroy_hw_filter(tp, f, (unsigned long) f);
+
+	RCU_INIT_POINTER(head->filter, NULL);
+	tcf_unbind_filter(tp, &f->res);
+	call_rcu(&f->rcu, mall_destroy_filter);
+	return 0;
+}
+
+static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+	struct cls_mall_filter *f = head->filter;
+
+	if (arg->count < arg->skip)
+		goto skip;
+	if (arg->fn(tp, (unsigned long) f, arg) < 0)
+		arg->stop = 1;
+skip:
+	arg->count++;
+}
+
+static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_mall_filter *f = (struct cls_mall_filter *) fh;
+	struct nlattr *nest;
+
+	if (!f)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (f->res.classid &&
+	    nla_put_u32(skb, TCA_MATCHALL_CLASSID, f->res.classid))
+		goto nla_put_failure;
+
+	if (tcf_exts_dump(skb, &f->exts))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_mall_ops __read_mostly = {
+	.kind		= "matchall",
+	.classify	= mall_classify,
+	.init		= mall_init,
+	.destroy	= mall_destroy,
+	.get		= mall_get,
+	.change		= mall_change,
+	.delete		= mall_delete,
+	.walk		= mall_walk,
+	.dump		= mall_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init cls_mall_init(void)
+{
+	return register_tcf_proto_ops(&cls_mall_ops);
+}
+
+static void __exit cls_mall_exit(void)
+{
+	unregister_tcf_proto_ops(&cls_mall_ops);
+}
+
+module_init(cls_mall_init);
+module_exit(cls_mall_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Match-all classifier");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ddf047df5361..12ebde845523 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -95,8 +95,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
      Expected action: do not backoff, but wait until queue will clear.
    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
      Expected action: backoff or ignore
-   NET_XMIT_POLICED	- dropped by police.
-     Expected action: backoff or error to real-time apps.
 
    Auxiliary routines:
 
@@ -583,7 +581,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 						 timer);
 
 	rcu_read_lock();
-	qdisc_unthrottled(wd->qdisc);
 	__netif_schedule(qdisc_root(wd->qdisc));
 	rcu_read_unlock();
 
@@ -598,15 +595,12 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_watchdog_init);
 
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle)
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
 {
 	if (test_bit(__QDISC_STATE_DEACTIVATED,
 		     &qdisc_root_sleeping(wd->qdisc)->state))
 		return;
 
-	if (throttle)
-		qdisc_throttled(wd->qdisc);
-
 	if (wd->last_expires == expires)
 		return;
 
@@ -620,7 +614,6 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
 	hrtimer_cancel(&wd->timer);
-	qdisc_unthrottled(wd->qdisc);
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
@@ -982,7 +975,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 			rcu_assign_pointer(sch->stab, stab);
 		}
 		if (tca[TCA_RATE]) {
-			spinlock_t *root_lock;
+			seqcount_t *running;
 
 			err = -EOPNOTSUPP;
 			if (sch->flags & TCQ_F_MQROOT)
@@ -991,14 +984,15 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 			if ((sch->parent != TC_H_ROOT) &&
 			    !(sch->flags & TCQ_F_INGRESS) &&
 			    (!p || !(p->flags & TCQ_F_MQROOT)))
-				root_lock = qdisc_root_sleeping_lock(sch);
+				running = qdisc_root_sleeping_running(sch);
 			else
-				root_lock = qdisc_lock(sch);
+				running = &sch->running;
 
 			err = gen_new_estimator(&sch->bstats,
 						sch->cpu_bstats,
 						&sch->rate_est,
-						root_lock,
+						NULL,
+						running,
 						tca[TCA_RATE]);
 			if (err)
 				goto err_out4;
@@ -1061,7 +1055,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 		gen_replace_estimator(&sch->bstats,
 				      sch->cpu_bstats,
 				      &sch->rate_est,
-				      qdisc_root_sleeping_lock(sch),
+				      NULL,
+				      qdisc_root_sleeping_running(sch),
 				      tca[TCA_RATE]);
 	}
 out:
@@ -1369,8 +1364,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		goto nla_put_failure;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
-					 qdisc_root_sleeping_lock(q), &d,
-					 TCA_PAD) < 0)
+					 NULL, &d, TCA_PAD) < 0)
 		goto nla_put_failure;
 
 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1381,7 +1375,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		cpu_qstats = q->cpu_qstats;
 	}
 
-	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+				  &d, cpu_bstats, &q->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
 		goto nla_put_failure;
@@ -1684,8 +1679,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 		goto nla_put_failure;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
-					 qdisc_root_sleeping_lock(q), &d,
-					 TCA_PAD) < 0)
+					 NULL, &d, TCA_PAD) < 0)
 		goto nla_put_failure;
 
 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1911af3ca7c0..481e4f12aeb4 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -357,16 +357,17 @@ static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
 
 /* --------------------------- Qdisc operations ---------------------------- */
 
-static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 	struct tcf_result res;
 	int result;
-	int ret = NET_XMIT_POLICED;
+	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 
 	pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-	result = TC_POLICE_OK;	/* be nice to gcc */
+	result = TC_ACT_OK;	/* be nice to gcc */
 	flow = NULL;
 	if (TC_H_MAJ(skb->priority) != sch->handle ||
 	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
@@ -398,12 +399,12 @@ done:
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			kfree_skb(skb);
+			__qdisc_drop(skb, to_free);
 			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
-			kfree_skb(skb);
+			__qdisc_drop(skb, to_free);
 			goto drop;
-		case TC_POLICE_RECLASSIFY:
+		case TC_ACT_RECLASSIFY:
 			if (flow->excess)
 				flow = flow->excess;
 			else
@@ -413,7 +414,7 @@ done:
 #endif
 	}
 
-	ret = qdisc_enqueue(skb, flow->q);
+	ret = qdisc_enqueue(skb, flow->q, to_free);
 	if (ret != NET_XMIT_SUCCESS) {
 drop: __maybe_unused
 		if (net_xmit_drop_count(ret)) {
@@ -519,20 +520,6 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
 	return p->link.q->ops->peek(p->link.q);
 }
 
-static unsigned int atm_tc_drop(struct Qdisc *sch)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow;
-	unsigned int len;
-
-	pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
-	list_for_each_entry(flow, &p->flows, list) {
-		if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
-			return len;
-	}
-	return 0;
-}
-
 static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
@@ -637,7 +624,8 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 {
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
 
-	if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+				  d, NULL, &flow->bstats) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
 		return -1;
 
@@ -671,7 +659,6 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
 	.enqueue	= atm_tc_enqueue,
 	.dequeue	= atm_tc_dequeue,
 	.peek		= atm_tc_peek,
-	.drop		= atm_tc_drop,
 	.init		= atm_tc_init,
 	.reset		= atm_tc_reset,
 	.destroy	= atm_tc_destroy,
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 3fee70d9814f..c98a61e980ba 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -17,9 +17,10 @@
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
 
-static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			     struct sk_buff **to_free)
 {
-	qdisc_drop(skb, sch);
+	qdisc_drop(skb, sch, to_free);
 	return NET_XMIT_SUCCESS;
 }
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index baafddf229ce..beb554aa8cfb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -80,10 +80,6 @@ struct cbq_class {
 	unsigned char		priority;	/* class priority */
 	unsigned char		priority2;	/* priority to be used after overlimit */
 	unsigned char		ewma_log;	/* time constant for idle time calculation */
-	unsigned char		ovl_strategy;
-#ifdef CONFIG_NET_CLS_ACT
-	unsigned char		police;
-#endif
 
 	u32			defmap;
 
@@ -94,10 +90,6 @@ struct cbq_class {
 	u32			avpkt;
 	struct qdisc_rate_table	*R_tab;
 
-	/* Overlimit strategy parameters */
-	void			(*overlimit)(struct cbq_class *cl);
-	psched_tdiff_t		penalty;
-
 	/* General scheduler (WRR) parameters */
 	long			allot;
 	long			quantum;	/* Allotment per WRR round */
@@ -353,7 +345,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 {
 	int toplevel = q->toplevel;
 
-	if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
+	if (toplevel > cl->level) {
 		psched_time_t now = psched_get_time();
 
 		do {
@@ -366,7 +358,8 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 }
 
 static int
-cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+	    struct sk_buff **to_free)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	int uninitialized_var(ret);
@@ -378,14 +371,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (cl == NULL) {
 		if (ret & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return ret;
 	}
 
-#ifdef CONFIG_NET_CLS_ACT
-	cl->q->__parent = sch;
-#endif
-	ret = qdisc_enqueue(skb, cl->q);
+	ret = qdisc_enqueue(skb, cl->q, to_free);
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		cbq_mark_toplevel(q, cl);
@@ -402,11 +392,8 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-/* Overlimit actions */
-
-/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
-
-static void cbq_ovl_classic(struct cbq_class *cl)
+/* Overlimit action: penalize leaf class by adding offtime */
+static void cbq_overlimit(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 	psched_tdiff_t delay = cl->undertime - q->now;
@@ -456,99 +443,6 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 	}
 }
 
-/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
- * they go overlimit
- */
-
-static void cbq_ovl_rclassic(struct cbq_class *cl)
-{
-	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	struct cbq_class *this = cl;
-
-	do {
-		if (cl->level > q->toplevel) {
-			cl = NULL;
-			break;
-		}
-	} while ((cl = cl->borrow) != NULL);
-
-	if (cl == NULL)
-		cl = this;
-	cbq_ovl_classic(cl);
-}
-
-/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
-
-static void cbq_ovl_delay(struct cbq_class *cl)
-{
-	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	psched_tdiff_t delay = cl->undertime - q->now;
-
-	if (test_bit(__QDISC_STATE_DEACTIVATED,
-		     &qdisc_root_sleeping(cl->qdisc)->state))
-		return;
-
-	if (!cl->delayed) {
-		psched_time_t sched = q->now;
-		ktime_t expires;
-
-		delay += cl->offtime;
-		if (cl->avgidle < 0)
-			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
-		if (cl->avgidle < cl->minidle)
-			cl->avgidle = cl->minidle;
-		cl->undertime = q->now + delay;
-
-		if (delay > 0) {
-			sched += delay + cl->penalty;
-			cl->penalized = sched;
-			cl->cpriority = TC_CBQ_MAXPRIO;
-			q->pmask |= (1<<TC_CBQ_MAXPRIO);
-
-			expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
-			if (hrtimer_try_to_cancel(&q->delay_timer) &&
-			    ktime_to_ns(ktime_sub(
-					hrtimer_get_expires(&q->delay_timer),
-					expires)) > 0)
-				hrtimer_set_expires(&q->delay_timer, expires);
-			hrtimer_restart(&q->delay_timer);
-			cl->delayed = 1;
-			cl->xstats.overactions++;
-			return;
-		}
-		delay = 1;
-	}
-	if (q->wd_expires == 0 || q->wd_expires > delay)
-		q->wd_expires = delay;
-}
-
-/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
-
-static void cbq_ovl_lowprio(struct cbq_class *cl)
-{
-	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-
-	cl->penalized = q->now + cl->penalty;
-
-	if (cl->cpriority != cl->priority2) {
-		cl->cpriority = cl->priority2;
-		q->pmask |= (1<<cl->cpriority);
-		cl->xstats.overactions++;
-	}
-	cbq_ovl_classic(cl);
-}
-
-/* TC_CBQ_OVL_DROP: penalize class by dropping */
-
-static void cbq_ovl_drop(struct cbq_class *cl)
-{
-	if (cl->q->ops->drop)
-		if (cl->q->ops->drop(cl->q))
-			cl->qdisc->q.qlen--;
-	cl->xstats.overactions++;
-	cbq_ovl_classic(cl);
-}
-
 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 				       psched_time_t now)
 {
@@ -620,45 +514,10 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
 	}
 
-	qdisc_unthrottled(sch);
 	__netif_schedule(qdisc_root(sch));
 	return HRTIMER_NORESTART;
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
-{
-	struct Qdisc *sch = child->__parent;
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = q->rx_class;
-
-	q->rx_class = NULL;
-
-	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
-		int ret;
-
-		cbq_mark_toplevel(q, cl);
-
-		q->rx_class = cl;
-		cl->q->__parent = sch;
-
-		ret = qdisc_enqueue(skb, cl->q);
-		if (ret == NET_XMIT_SUCCESS) {
-			sch->q.qlen++;
-			if (!cl->next_alive)
-				cbq_activate_class(cl);
-			return 0;
-		}
-		if (net_xmit_drop_count(ret))
-			qdisc_qstats_drop(sch);
-		return 0;
-	}
-
-	qdisc_qstats_drop(sch);
-	return -1;
-}
-#endif
-
 /*
  * It is mission critical procedure.
  *
@@ -807,7 +666,7 @@ cbq_under_limit(struct cbq_class *cl)
 		cl = cl->borrow;
 		if (!cl) {
 			this_cl->qstats.overlimits++;
-			this_cl->overlimit(this_cl);
+			cbq_overlimit(this_cl);
 			return NULL;
 		}
 		if (cl->level > q->toplevel)
@@ -960,7 +819,6 @@ cbq_dequeue(struct Qdisc *sch)
 		if (skb) {
 			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
-			qdisc_unthrottled(sch);
 			return skb;
 		}
 
@@ -1166,31 +1024,6 @@ static void cbq_link_class(struct cbq_class *this)
 	}
 }
 
-static unsigned int cbq_drop(struct Qdisc *sch)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl, *cl_head;
-	int prio;
-	unsigned int len;
-
-	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
-		cl_head = q->active[prio];
-		if (!cl_head)
-			continue;
-
-		cl = cl_head;
-		do {
-			if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
-				sch->q.qlen--;
-				if (!cl->q->q.qlen)
-					cbq_deactivate_class(cl);
-				return len;
-			}
-		} while ((cl = cl->next_alive) != cl_head);
-	}
-	return 0;
-}
-
 static void
 cbq_reset(struct Qdisc *sch)
 {
@@ -1280,50 +1113,6 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
 	return 0;
 }
 
-static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
-{
-	switch (ovl->strategy) {
-	case TC_CBQ_OVL_CLASSIC:
-		cl->overlimit = cbq_ovl_classic;
-		break;
-	case TC_CBQ_OVL_DELAY:
-		cl->overlimit = cbq_ovl_delay;
-		break;
-	case TC_CBQ_OVL_LOWPRIO:
-		if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
-		    ovl->priority2 - 1 <= cl->priority)
-			return -EINVAL;
-		cl->priority2 = ovl->priority2 - 1;
-		cl->overlimit = cbq_ovl_lowprio;
-		break;
-	case TC_CBQ_OVL_DROP:
-		cl->overlimit = cbq_ovl_drop;
-		break;
-	case TC_CBQ_OVL_RCLASSIC:
-		cl->overlimit = cbq_ovl_rclassic;
-		break;
-	default:
-		return -EINVAL;
-	}
-	cl->penalty = ovl->penalty;
-	return 0;
-}
-
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
-{
-	cl->police = p->police;
-
-	if (cl->q->handle) {
-		if (p->police == TC_POLICE_RECLASSIFY)
-			cl->q->reshape_fail = cbq_reshape_fail;
-		else
-			cl->q->reshape_fail = NULL;
-	}
-	return 0;
-}
-#endif
-
 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
 {
 	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
@@ -1375,8 +1164,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.priority = TC_CBQ_MAXPRIO - 1;
 	q->link.priority2 = TC_CBQ_MAXPRIO - 1;
 	q->link.cpriority = TC_CBQ_MAXPRIO - 1;
-	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
-	q->link.overlimit = cbq_ovl_classic;
 	q->link.allot = psched_mtu(qdisc_dev(sch));
 	q->link.quantum = q->link.allot;
 	q->link.weight = q->link.R_tab->rate.rate;
@@ -1463,24 +1250,6 @@ nla_put_failure:
 	return -1;
 }
 
-static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
-{
-	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_cbq_ovl opt;
-
-	opt.strategy = cl->ovl_strategy;
-	opt.priority2 = cl->priority2 + 1;
-	opt.pad = 0;
-	opt.penalty = cl->penalty;
-	if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
-		goto nla_put_failure;
-	return skb->len;
-
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
 static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
@@ -1500,36 +1269,11 @@ nla_put_failure:
 	return -1;
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
-{
-	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_cbq_police opt;
-
-	if (cl->police) {
-		opt.police = cl->police;
-		opt.__res1 = 0;
-		opt.__res2 = 0;
-		if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
-			goto nla_put_failure;
-	}
-	return skb->len;
-
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-#endif
-
 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
 {
 	if (cbq_dump_lss(skb, cl) < 0 ||
 	    cbq_dump_rate(skb, cl) < 0 ||
 	    cbq_dump_wrr(skb, cl) < 0 ||
-	    cbq_dump_ovl(skb, cl) < 0 ||
-#ifdef CONFIG_NET_CLS_ACT
-	    cbq_dump_police(skb, cl) < 0 ||
-#endif
 	    cbq_dump_fopt(skb, cl) < 0)
 		return -1;
 	return 0;
@@ -1600,7 +1344,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (cl->undertime != PSCHED_PASTPERFECT)
 		cl->xstats.undertime = cl->undertime - q->now;
 
-	if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
 		return -1;
@@ -1618,11 +1363,6 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 					&pfifo_qdisc_ops, cl->common.classid);
 		if (new == NULL)
 			return -ENOBUFS;
-	} else {
-#ifdef CONFIG_NET_CLS_ACT
-		if (cl->police == TC_POLICE_RECLASSIFY)
-			new->reshape_fail = cbq_reshape_fail;
-#endif
 	}
 
 	*old = qdisc_replace(sch, new, &cl->q);
@@ -1735,6 +1475,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	if (err < 0)
 		return err;
 
+	if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE])
+		return -EOPNOTSUPP;
+
 	if (cl) {
 		/* Check parent */
 		if (parentid) {
@@ -1755,7 +1498,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
-						    qdisc_root_sleeping_lock(sch),
+						    NULL,
+						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
 			if (err) {
 				qdisc_put_rtab(rtab);
@@ -1782,14 +1526,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 			cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
 		}
 
-		if (tb[TCA_CBQ_OVL_STRATEGY])
-			cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
-
-#ifdef CONFIG_NET_CLS_ACT
-		if (tb[TCA_CBQ_POLICE])
-			cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
-#endif
-
 		if (tb[TCA_CBQ_FOPT])
 			cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
 
@@ -1848,7 +1584,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					qdisc_root_sleeping_lock(sch),
+					NULL,
+					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err) {
 			kfree(cl);
@@ -1884,13 +1621,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 		cl->maxidle = q->link.maxidle;
 	if (cl->avpkt == 0)
 		cl->avpkt = q->link.avpkt;
-	cl->overlimit = cbq_ovl_classic;
-	if (tb[TCA_CBQ_OVL_STRATEGY])
-		cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
-#ifdef CONFIG_NET_CLS_ACT
-	if (tb[TCA_CBQ_POLICE])
-		cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
-#endif
 	if (tb[TCA_CBQ_FOPT])
 		cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
 	sch_tree_unlock(sch);
@@ -2035,7 +1765,6 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
 	.enqueue	=	cbq_enqueue,
 	.dequeue	=	cbq_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.drop		=	cbq_drop,
 	.init		=	cbq_init,
 	.reset		=	cbq_reset,
 	.destroy	=	cbq_destroy,
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 0a08c860eee4..3b6d5bd69101 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -115,7 +115,8 @@ static void choke_zap_tail_holes(struct choke_sched_data *q)
 }
 
 /* Drop packet from queue array by creating a "hole" */
-static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
+static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
+			      struct sk_buff **to_free)
 {
 	struct choke_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb = q->tab[idx];
@@ -129,7 +130,7 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
 
 	qdisc_qstats_backlog_dec(sch, skb);
 	qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
-	qdisc_drop(skb, sch);
+	qdisc_drop(skb, sch, to_free);
 	--sch->q.qlen;
 }
 
@@ -261,7 +262,8 @@ static bool choke_match_random(const struct choke_sched_data *q,
 	return choke_match_flow(oskb, nskb);
 }
 
-static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			 struct sk_buff **to_free)
 {
 	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	struct choke_sched_data *q = qdisc_priv(sch);
@@ -288,7 +290,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		/* Draw a packet at random from queue and compare flow */
 		if (choke_match_random(q, skb, &idx)) {
 			q->stats.matched++;
-			choke_drop_by_idx(sch, idx);
+			choke_drop_by_idx(sch, idx, to_free);
 			goto congestion_drop;
 		}
 
@@ -331,16 +333,16 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	q->stats.pdrop++;
-	return qdisc_drop(skb, sch);
+	return qdisc_drop(skb, sch, to_free);
 
 congestion_drop:
-	qdisc_drop(skb, sch);
+	qdisc_drop(skb, sch, to_free);
 	return NET_XMIT_CN;
 
 other_drop:
 	if (ret & __NET_XMIT_BYPASS)
 		qdisc_qstats_drop(sch);
-	kfree_skb(skb);
+	__qdisc_drop(skb, to_free);
 	return ret;
 }
 
@@ -365,22 +367,6 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 	return skb;
 }
 
-static unsigned int choke_drop(struct Qdisc *sch)
-{
-	struct choke_sched_data *q = qdisc_priv(sch);
-	unsigned int len;
-
-	len = qdisc_queue_drop(sch);
-	if (len > 0)
-		q->stats.other++;
-	else {
-		if (!red_is_idling(&q->vars))
-			red_start_of_idle_period(&q->vars);
-	}
-
-	return len;
-}
-
 static void choke_reset(struct Qdisc *sch)
 {
 	struct choke_sched_data *q = qdisc_priv(sch);
@@ -391,11 +377,11 @@ static void choke_reset(struct Qdisc *sch)
 		q->head = (q->head + 1) & q->tab_mask;
 		if (!skb)
 			continue;
-		qdisc_qstats_backlog_dec(sch, skb);
-		--sch->q.qlen;
-		qdisc_drop(skb, sch);
+		rtnl_qdisc_drop(skb, sch);
 	}
 
+	sch->q.qlen = 0;
+	sch->qstats.backlog = 0;
 	memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
 	q->head = q->tail = 0;
 	red_restart(&q->vars);
@@ -471,7 +457,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 				dropped += qdisc_pkt_len(skb);
 				qdisc_qstats_backlog_dec(sch, skb);
 				--sch->q.qlen;
-				qdisc_drop(skb, sch);
+				rtnl_qdisc_drop(skb, sch);
 			}
 			qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
 			q->head = 0;
@@ -569,7 +555,6 @@ static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
 	.enqueue	=	choke_enqueue,
 	.dequeue	=	choke_dequeue,
 	.peek		=	choke_peek_head,
-	.drop		=	choke_drop,
 	.init		=	choke_init,
 	.destroy	=	choke_destroy,
 	.reset		=	choke_reset,
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index dddf3bb65a32..4002df3c7d9f 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -82,7 +82,8 @@ static void drop_func(struct sk_buff *skb, void *ctx)
 {
 	struct Qdisc *sch = ctx;
 
-	qdisc_drop(skb, sch);
+	kfree_skb(skb);
+	qdisc_qstats_drop(sch);
 }
 
 static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
@@ -107,7 +108,8 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
 	return skb;
 }
 
-static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			       struct sk_buff **to_free)
 {
 	struct codel_sched_data *q;
 
@@ -117,7 +119,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 	q = qdisc_priv(sch);
 	q->drop_overlimit++;
-	return qdisc_drop(skb, sch);
+	return qdisc_drop(skb, sch, to_free);
 }
 
 static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
@@ -174,7 +176,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
 
 		dropped += qdisc_pkt_len(skb);
 		qdisc_qstats_backlog_dec(sch, skb);
-		qdisc_drop(skb, sch);
+		rtnl_qdisc_drop(skb, sch);
 	}
 	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
 
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index bf8af2c43c2c..8af5c59eef84 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -91,7 +91,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
-						    qdisc_root_sleeping_lock(sch),
+						    NULL,
+						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
@@ -119,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE]) {
 		err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
-					    qdisc_root_sleeping_lock(sch),
+					    NULL,
+					    qdisc_root_sleeping_running(sch),
 					    tca[TCA_RATE]);
 		if (err) {
 			qdisc_destroy(cl->qdisc);
@@ -279,7 +281,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (qlen)
 		xstats.deficit = cl->deficit;
 
-	if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
 		return -1;
@@ -347,7 +350,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
 	return NULL;
 }
 
-static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
@@ -357,11 +361,11 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (cl == NULL) {
 		if (err & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return err;
 	}
 
-	err = qdisc_enqueue(skb, cl->qdisc);
+	err = qdisc_enqueue(skb, cl->qdisc, to_free);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		if (net_xmit_drop_count(err)) {
 			cl->qstats.drops++;
@@ -420,27 +424,6 @@ out:
 	return NULL;
 }
 
-static unsigned int drr_drop(struct Qdisc *sch)
-{
-	struct drr_sched *q = qdisc_priv(sch);
-	struct drr_class *cl;
-	unsigned int len;
-
-	list_for_each_entry(cl, &q->active, alist) {
-		if (cl->qdisc->ops->drop) {
-			len = cl->qdisc->ops->drop(cl->qdisc);
-			if (len > 0) {
-				sch->qstats.backlog -= len;
-				sch->q.qlen--;
-				if (cl->qdisc->q.qlen == 0)
-					list_del(&cl->alist);
-				return len;
-			}
-		}
-	}
-	return 0;
-}
-
 static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct drr_sched *q = qdisc_priv(sch);
@@ -510,7 +493,6 @@ static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
 	.enqueue	= drr_enqueue,
 	.dequeue	= drr_dequeue,
 	.peek		= qdisc_peek_dequeued,
-	.drop		= drr_drop,
 	.init		= drr_init_qdisc,
 	.reset		= drr_reset_qdisc,
 	.destroy	= drr_destroy_qdisc,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 34b4ddaca27c..1308bbf460f7 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -191,7 +191,8 @@ static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
 
 /* --------------------------- Qdisc operations ---------------------------- */
 
-static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int err;
@@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 #ifdef CONFIG_NET_CLS_ACT
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			kfree_skb(skb);
+			__qdisc_drop(skb, to_free);
 			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 
 		case TC_ACT_SHOT:
@@ -251,7 +252,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 	}
 
-	err = qdisc_enqueue(skb, p->q);
+	err = qdisc_enqueue(skb, p->q, to_free);
 	if (err != NET_XMIT_SUCCESS) {
 		if (net_xmit_drop_count(err))
 			qdisc_qstats_drop(sch);
@@ -264,7 +265,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 
 drop:
-	qdisc_drop(skb, sch);
+	qdisc_drop(skb, sch, to_free);
 	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 }
 
@@ -320,23 +321,6 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
 	return p->q->ops->peek(p->q);
 }
 
-static unsigned int dsmark_drop(struct Qdisc *sch)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	unsigned int len;
-
-	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
-	if (p->q->ops->drop == NULL)
-		return 0;
-
-	len = p->q->ops->drop(p->q);
-	if (len)
-		sch->q.qlen--;
-
-	return len;
-}
-
 static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
@@ -489,7 +473,6 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
 	.enqueue	=	dsmark_enqueue,
 	.dequeue	=	dsmark_dequeue,
 	.peek		=	dsmark_peek,
-	.drop		=	dsmark_drop,
 	.init		=	dsmark_init,
 	.reset		=	dsmark_reset,
 	.destroy	=	dsmark_destroy,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2177eac0a61e..baeed6a78d28 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,32 +19,39 @@
 
 /* 1 band FIFO pseudo-"scheduler" */
 
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			 struct sk_buff **to_free)
 {
 	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
-	return qdisc_reshape_fail(skb, sch);
+	return qdisc_drop(skb, sch, to_free);
 }
 
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			 struct sk_buff **to_free)
 {
 	if (likely(skb_queue_len(&sch->q) < sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
-	return qdisc_reshape_fail(skb, sch);
+	return qdisc_drop(skb, sch, to_free);
 }
 
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			      struct sk_buff **to_free)
 {
+	unsigned int prev_backlog;
+
 	if (likely(skb_queue_len(&sch->q) < sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
+	prev_backlog = sch->qstats.backlog;
 	/* queue full, remove one skb to fulfill the limit */
-	__qdisc_queue_drop_head(sch, &sch->q);
+	__qdisc_queue_drop_head(sch, &sch->q, to_free);
 	qdisc_qstats_drop(sch);
 	qdisc_enqueue_tail(skb, sch);
 
+	qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog);
 	return NET_XMIT_CN;
 }
 
@@ -99,7 +106,6 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.enqueue	=	pfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
 	.change		=	fifo_init,
@@ -114,7 +120,6 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.enqueue	=	bfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
 	.change		=	fifo_init,
@@ -129,7 +134,6 @@ struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
 	.enqueue	=	pfifo_tail_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.drop		=	qdisc_queue_drop_head,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
 	.change		=	fifo_init,
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 3c6a47d66a04..e5458b99e09c 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -368,18 +368,19 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
 	}
 }
 
-static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		      struct sk_buff **to_free)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
 	struct fq_flow *f;
 
 	if (unlikely(sch->q.qlen >= sch->limit))
-		return qdisc_drop(skb, sch);
+		return qdisc_drop(skb, sch, to_free);
 
 	f = fq_classify(skb, q);
 	if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
 		q->stat_flows_plimit++;
-		return qdisc_drop(skb, sch);
+		return qdisc_drop(skb, sch, to_free);
 	}
 
 	f->qlen++;
@@ -445,8 +446,7 @@ begin:
 		if (!head->first) {
 			if (q->time_next_delayed_flow != ~0ULL)
 				qdisc_watchdog_schedule_ns(&q->watchdog,
-							   q->time_next_delayed_flow,
-							   false);
+							   q->time_next_delayed_flow);
 			return NULL;
 		}
 	}
@@ -515,17 +515,25 @@ out:
 	return skb;
 }
 
+static void fq_flow_purge(struct fq_flow *flow)
+{
+	rtnl_kfree_skbs(flow->head, flow->tail);
+	flow->head = NULL;
+	flow->qlen = 0;
+}
+
 static void fq_reset(struct Qdisc *sch)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
 	struct rb_root *root;
-	struct sk_buff *skb;
 	struct rb_node *p;
 	struct fq_flow *f;
 	unsigned int idx;
 
-	while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
-		kfree_skb(skb);
+	sch->q.qlen = 0;
+	sch->qstats.backlog = 0;
+
+	fq_flow_purge(&q->internal);
 
 	if (!q->fq_root)
 		return;
@@ -536,8 +544,7 @@ static void fq_reset(struct Qdisc *sch)
 			f = container_of(p, struct fq_flow, fq_node);
 			rb_erase(p, root);
 
-			while ((skb = fq_dequeue_head(sch, f)) != NULL)
-				kfree_skb(skb);
+			fq_flow_purge(f);
 
 			kmem_cache_free(fq_flow_cachep, f);
 		}
@@ -738,7 +745,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 		if (!skb)
 			break;
 		drop_len += qdisc_pkt_len(skb);
-		kfree_skb(skb);
+		rtnl_kfree_skbs(skb, skb);
 		drop_count++;
 	}
 	qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index da250b2e06ae..a5ea0e9b6be4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -139,7 +139,8 @@ static inline void flow_queue_add(struct fq_codel_flow *flow,
 	skb->next = NULL;
 }
 
-static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
+static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets,
+				  struct sk_buff **to_free)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
@@ -171,8 +172,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
 	do {
 		skb = dequeue_head(flow);
 		len += qdisc_pkt_len(skb);
-		mem += skb->truesize;
-		kfree_skb(skb);
+		mem += get_codel_cb(skb)->mem_usage;
+		__qdisc_drop(skb, to_free);
 	} while (++i < max_packets && len < threshold);
 
 	flow->dropped += i;
@@ -184,16 +185,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
 	return idx;
 }
 
-static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
-{
-	unsigned int prev_backlog;
-
-	prev_backlog = sch->qstats.backlog;
-	fq_codel_drop(sch, 1U);
-	return prev_backlog - sch->qstats.backlog;
-}
-
-static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			    struct sk_buff **to_free)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 	unsigned int idx, prev_backlog, prev_qlen;
@@ -206,7 +199,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (idx == 0) {
 		if (ret & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return ret;
 	}
 	idx--;
@@ -223,7 +216,8 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		flow->deficit = q->quantum;
 		flow->dropped = 0;
 	}
-	q->memory_usage += skb->truesize;
+	get_codel_cb(skb)->mem_usage = skb->truesize;
+	q->memory_usage += get_codel_cb(skb)->mem_usage;
 	memory_limited = q->memory_usage > q->memory_limit;
 	if (++sch->q.qlen <= sch->limit && !memory_limited)
 		return NET_XMIT_SUCCESS;
@@ -238,7 +232,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	 * So instead of dropping a single packet, drop half of its backlog
 	 * with a 64 packets limit to not add a too big cpu spike here.
 	 */
-	ret = fq_codel_drop(sch, q->drop_batch_size);
+	ret = fq_codel_drop(sch, q->drop_batch_size, to_free);
 
 	prev_qlen -= sch->q.qlen;
 	prev_backlog -= sch->qstats.backlog;
@@ -274,7 +268,7 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
 	if (flow->head) {
 		skb = dequeue_head(flow);
 		q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb);
-		q->memory_usage -= skb->truesize;
+		q->memory_usage -= get_codel_cb(skb)->mem_usage;
 		sch->q.qlen--;
 		sch->qstats.backlog -= qdisc_pkt_len(skb);
 	}
@@ -285,7 +279,8 @@ static void drop_func(struct sk_buff *skb, void *ctx)
 {
 	struct Qdisc *sch = ctx;
 
-	qdisc_drop(skb, sch);
+	kfree_skb(skb);
+	qdisc_qstats_drop(sch);
 }
 
 static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
@@ -345,6 +340,12 @@ begin:
 	return skb;
 }
 
+static void fq_codel_flow_purge(struct fq_codel_flow *flow)
+{
+	rtnl_kfree_skbs(flow->head, flow->tail);
+	flow->head = NULL;
+}
+
 static void fq_codel_reset(struct Qdisc *sch)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -355,18 +356,13 @@ static void fq_codel_reset(struct Qdisc *sch)
 	for (i = 0; i < q->flows_cnt; i++) {
 		struct fq_codel_flow *flow = q->flows + i;
 
-		while (flow->head) {
-			struct sk_buff *skb = dequeue_head(flow);
-
-			qdisc_qstats_backlog_dec(sch, skb);
-			kfree_skb(skb);
-		}
-
+		fq_codel_flow_purge(flow);
 		INIT_LIST_HEAD(&flow->flowchain);
 		codel_vars_init(&flow->cvars);
 	}
 	memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
 	sch->q.qlen = 0;
+	sch->qstats.backlog = 0;
 	q->memory_usage = 0;
 }
 
@@ -442,7 +438,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
 		struct sk_buff *skb = fq_codel_dequeue(sch);
 
 		q->cstats.drop_len += qdisc_pkt_len(skb);
-		kfree_skb(skb);
+		rtnl_kfree_skbs(skb, skb);
 		q->cstats.drop_count++;
 	}
 	qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
@@ -578,11 +574,13 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	st.qdisc_stats.memory_usage  = q->memory_usage;
 	st.qdisc_stats.drop_overmemory = q->drop_overmemory;
 
+	sch_tree_lock(sch);
 	list_for_each(pos, &q->new_flows)
 		st.qdisc_stats.new_flows_len++;
 
 	list_for_each(pos, &q->old_flows)
 		st.qdisc_stats.old_flows_len++;
+	sch_tree_unlock(sch);
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
 }
@@ -636,7 +634,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 	if (idx < q->flows_cnt) {
 		const struct fq_codel_flow *flow = &q->flows[idx];
-		const struct sk_buff *skb = flow->head;
+		const struct sk_buff *skb;
 
 		memset(&xstats, 0, sizeof(xstats));
 		xstats.type = TCA_FQ_CODEL_XSTATS_CLASS;
@@ -654,9 +652,14 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 				codel_time_to_us(delta) :
 				-codel_time_to_us(-delta);
 		}
-		while (skb) {
-			qs.qlen++;
-			skb = skb->next;
+		if (flow->head) {
+			sch_tree_lock(sch);
+			skb = flow->head;
+			while (skb) {
+				qs.qlen++;
+				skb = skb->next;
+			}
+			sch_tree_unlock(sch);
 		}
 		qs.backlog = q->backlogs[idx];
 		qs.drops = flow->dropped;
@@ -709,7 +712,6 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
 	.enqueue	=	fq_codel_enqueue,
 	.dequeue	=	fq_codel_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.drop		=	fq_codel_qdisc_drop,
 	.init		=	fq_codel_init,
 	.reset		=	fq_codel_reset,
 	.destroy	=	fq_codel_destroy,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f9e0e9c03d0a..e95b67cd5718 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -77,6 +77,34 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
 	skb->next = NULL;
 }
 
+/* This variant of try_bulk_dequeue_skb() makes sure
+ * all skbs in the chain are for the same txq
+ */
+static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
+				      struct sk_buff *skb,
+				      int *packets)
+{
+	int mapping = skb_get_queue_mapping(skb);
+	struct sk_buff *nskb;
+	int cnt = 0;
+
+	do {
+		nskb = q->dequeue(q);
+		if (!nskb)
+			break;
+		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
+			q->skb_bad_txq = nskb;
+			qdisc_qstats_backlog_inc(q, nskb);
+			q->q.qlen++;
+			break;
+		}
+		skb->next = nskb;
+		skb = nskb;
+	} while (++cnt < 8);
+	(*packets) += cnt;
+	skb->next = NULL;
+}
+
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
  * A requeued skb (via q->gso_skb) can also be a SKB list.
  */
@@ -87,8 +115,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 	const struct netdev_queue *txq = q->dev_queue;
 
 	*packets = 1;
-	*validate = true;
 	if (unlikely(skb)) {
+		/* skb in gso_skb were already validated */
+		*validate = false;
 		/* check the reason of requeuing without tx lock first */
 		txq = skb_get_tx_queue(txq->dev, skb);
 		if (!netif_xmit_frozen_or_stopped(txq)) {
@@ -97,22 +126,37 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			q->q.qlen--;
 		} else
 			skb = NULL;
-		/* skb in gso_skb were already validated */
-		*validate = false;
-	} else {
-		if (!(q->flags & TCQ_F_ONETXQUEUE) ||
-		    !netif_xmit_frozen_or_stopped(txq)) {
-			skb = q->dequeue(q);
-			if (skb && qdisc_may_bulk(q))
-				try_bulk_dequeue_skb(q, skb, txq, packets);
+		return skb;
+	}
+	*validate = true;
+	skb = q->skb_bad_txq;
+	if (unlikely(skb)) {
+		/* check the reason of requeuing without tx lock first */
+		txq = skb_get_tx_queue(txq->dev, skb);
+		if (!netif_xmit_frozen_or_stopped(txq)) {
+			q->skb_bad_txq = NULL;
+			qdisc_qstats_backlog_dec(q, skb);
+			q->q.qlen--;
+			goto bulk;
 		}
+		return NULL;
+	}
+	if (!(q->flags & TCQ_F_ONETXQUEUE) ||
+	    !netif_xmit_frozen_or_stopped(txq))
+		skb = q->dequeue(q);
+	if (skb) {
+bulk:
+		if (qdisc_may_bulk(q))
+			try_bulk_dequeue_skb(q, skb, txq, packets);
+		else
+			try_bulk_dequeue_skb_slow(q, skb, packets);
 	}
 	return skb;
 }
 
 /*
  * Transmit possibly several skbs, and handle the return status as
- * required. Holding the __QDISC___STATE_RUNNING bit guarantees that
+ * required. Owning running seqcount bit guarantees that
  * only one CPU can execute this function.
  *
  * Returns to the caller:
@@ -165,7 +209,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 /*
  * NOTE: Called under qdisc_lock(q) with locally disabled BH.
  *
- * __QDISC___STATE_RUNNING guarantees only one CPU can process
+ * running seqcount guarantees only one CPU can process
  * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
  * this queue.
  *
@@ -348,9 +392,10 @@ EXPORT_SYMBOL(netif_carrier_off);
    cheaper.
  */
 
-static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
+static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
+			struct sk_buff **to_free)
 {
-	kfree_skb(skb);
+	__qdisc_drop(skb, to_free);
 	return NET_XMIT_CN;
 }
 
@@ -381,6 +426,7 @@ struct Qdisc noop_qdisc = {
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
+	.running	=	SEQCNT_ZERO(noop_qdisc.running),
 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
 };
 EXPORT_SYMBOL(noop_qdisc);
@@ -438,7 +484,8 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
 	return priv->q + band;
 }
 
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
+			      struct sk_buff **to_free)
 {
 	if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
 		int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -450,7 +497,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
 		return __qdisc_enqueue_tail(skb, qdisc, list);
 	}
 
-	return qdisc_drop(skb, qdisc);
+	return qdisc_drop(skb, qdisc, to_free);
 }
 
 static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
@@ -492,7 +539,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 
 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
-		__qdisc_reset_queue(qdisc, band2list(priv, prio));
+		__qdisc_reset_queue(band2list(priv, prio));
 
 	priv->bitmap = 0;
 	qdisc->qstats.backlog = 0;
@@ -539,6 +586,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 EXPORT_SYMBOL(pfifo_fast_ops);
 
 static struct lock_class_key qdisc_tx_busylock;
+static struct lock_class_key qdisc_running_key;
 
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  const struct Qdisc_ops *ops)
@@ -572,6 +620,10 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	lockdep_set_class(&sch->busylock,
 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
 
+	seqcount_init(&sch->running);
+	lockdep_set_class(&sch->running,
+			  dev->qdisc_running_key ?: &qdisc_running_key);
+
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
@@ -616,11 +668,14 @@ void qdisc_reset(struct Qdisc *qdisc)
 	if (ops->reset)
 		ops->reset(qdisc);
 
+	kfree_skb(qdisc->skb_bad_txq);
+	qdisc->skb_bad_txq = NULL;
+
 	if (qdisc->gso_skb) {
 		kfree_skb_list(qdisc->gso_skb);
 		qdisc->gso_skb = NULL;
-		qdisc->q.qlen = 0;
 	}
+	qdisc->q.qlen = 0;
 }
 EXPORT_SYMBOL(qdisc_reset);
 
@@ -659,6 +714,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	dev_put(qdisc_dev(qdisc));
 
 	kfree_skb_list(qdisc->gso_skb);
+	kfree_skb(qdisc->skb_bad_txq);
 	/*
 	 * gen_estimator est_timer() might access qdisc->q.lock,
 	 * wait a RCU grace period before freeing qdisc.
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 80105109f756..c78a093c551a 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -149,7 +149,8 @@ static inline int gred_use_harddrop(struct gred_sched *t)
 	return t->red_flags & TC_RED_HARDDROP;
 }
 
-static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			struct sk_buff **to_free)
 {
 	struct gred_sched_data *q = NULL;
 	struct gred_sched *t = qdisc_priv(sch);
@@ -237,10 +238,10 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	q->stats.pdrop++;
 drop:
-	return qdisc_drop(skb, sch);
+	return qdisc_drop(skb, sch, to_free);
 
 congestion_drop:
-	qdisc_drop(skb, sch);
+	qdisc_drop(skb, sch, to_free);
 	return NET_XMIT_CN;
 }
 
@@ -276,40 +277,6 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch)
 	return NULL;
 }
 
-static unsigned int gred_drop(struct Qdisc *sch)
-{
-	struct sk_buff *skb;
-	struct gred_sched *t = qdisc_priv(sch);
-
-	skb = qdisc_dequeue_tail(sch);
-	if (skb) {
-		unsigned int len = qdisc_pkt_len(skb);
-		struct gred_sched_data *q;
-		u16 dp = tc_index_to_dp(skb);
-
-		if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
-			net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x while dropping, screwing up backlog\n",
-					     tc_index_to_dp(skb));
-		} else {
-			q->backlog -= len;
-			q->stats.other++;
-
-			if (gred_wred_mode(t)) {
-				if (!sch->qstats.backlog)
-					red_start_of_idle_period(&t->wred_set);
-			} else {
-				if (!q->backlog)
-					red_start_of_idle_period(&q->vars);
-			}
-		}
-
-		qdisc_drop(skb, sch);
-		return len;
-	}
-
-	return 0;
-}
-
 static void gred_reset(struct Qdisc *sch)
 {
 	int i;
@@ -623,7 +590,6 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
 	.enqueue	=	gred_enqueue,
 	.dequeue	=	gred_dequeue,
 	.peek		=	qdisc_peek_head,
-	.drop		=	gred_drop,
 	.init		=	gred_init,
 	.reset		=	gred_reset,
 	.destroy	=	gred_destroy,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 1ac9f9f03fe3..3ddc7bd74ecb 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -115,9 +115,9 @@ struct hfsc_class {
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est64 rate_est;
-	unsigned int	level;		/* class level in hierarchy */
 	struct tcf_proto __rcu *filter_list; /* filter list */
 	unsigned int	filter_cnt;	/* filter count */
+	unsigned int	level;		/* class level in hierarchy */
 
 	struct hfsc_sched *sched;	/* scheduler data */
 	struct hfsc_class *cl_parent;	/* parent class */
@@ -130,7 +130,6 @@ struct hfsc_class {
 	struct rb_node vt_node;		/* parent's vt_tree member */
 	struct rb_root cf_tree;		/* active children sorted by cl_f */
 	struct rb_node cf_node;		/* parent's cf_heap member */
-	struct list_head dlist;		/* drop list member */
 
 	u64	cl_total;		/* total work in bytes */
 	u64	cl_cumul;		/* cumulative work in bytes done by
@@ -166,10 +165,10 @@ struct hfsc_class {
 	struct runtime_sc cl_virtual;	/* virtual curve */
 	struct runtime_sc cl_ulimit;	/* upperlimit curve */
 
-	unsigned long	cl_flags;	/* which curves are valid */
-	unsigned long	cl_vtperiod;	/* vt period sequence number */
-	unsigned long	cl_parentperiod;/* parent's vt period sequence number*/
-	unsigned long	cl_nactive;	/* number of active children */
+	u8		cl_flags;	/* which curves are valid */
+	u32		cl_vtperiod;	/* vt period sequence number */
+	u32		cl_parentperiod;/* parent's vt period sequence number*/
+	u32		cl_nactive;	/* number of active children */
 };
 
 struct hfsc_sched {
@@ -177,8 +176,6 @@ struct hfsc_sched {
 	struct hfsc_class root;			/* root class */
 	struct Qdisc_class_hash clhash;		/* class hash */
 	struct rb_root eligible;		/* eligible tree */
-	struct list_head droplist;		/* active leaf class list (for
-						   dropping) */
 	struct qdisc_watchdog watchdog;		/* watchdog timer */
 };
 
@@ -781,6 +778,20 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 		else
 			go_passive = 0;
 
+		/* update vt */
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+			    - cl->cl_vtoff + cl->cl_vtadj;
+
+		/*
+		 * if vt of the class is smaller than cvtmin,
+		 * the class was skipped in the past due to non-fit.
+		 * if so, we need to adjust vtadj.
+		 */
+		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+			cl->cl_vt = cl->cl_parent->cl_cvtmin;
+		}
+
 		if (go_passive) {
 			/* no more active child, going passive */
 
@@ -797,25 +808,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 			continue;
 		}
 
-		/*
-		 * update vt and f
-		 */
-		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
-			    - cl->cl_vtoff + cl->cl_vtadj;
-
-		/*
-		 * if vt of the class is smaller than cvtmin,
-		 * the class was skipped in the past due to non-fit.
-		 * if so, we need to adjust vtadj.
-		 */
-		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
-			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
-			cl->cl_vt = cl->cl_parent->cl_cvtmin;
-		}
-
 		/* update the vt tree */
 		vttree_update(cl);
 
+		/* update f */
 		if (cl->cl_flags & HFSC_USC) {
 			cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
 							      cl->cl_total);
@@ -858,7 +854,6 @@ set_active(struct hfsc_class *cl, unsigned int len)
 	if (cl->cl_flags & HFSC_FSC)
 		init_vf(cl, len);
 
-	list_add_tail(&cl->dlist, &cl->sched->droplist);
 }
 
 static void
@@ -867,8 +862,6 @@ set_passive(struct hfsc_class *cl)
 	if (cl->cl_flags & HFSC_RSC)
 		eltree_remove(cl);
 
-	list_del(&cl->dlist);
-
 	/*
 	 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
 	 * needs to be called explicitly to remove a class from vttree.
@@ -882,7 +875,7 @@ qdisc_peek_len(struct Qdisc *sch)
 	unsigned int len;
 
 	skb = sch->ops->peek(sch);
-	if (skb == NULL) {
+	if (unlikely(skb == NULL)) {
 		qdisc_warn_nonwc("qdisc_peek_len", sch);
 		return 0;
 	}
@@ -947,7 +940,7 @@ static void
 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
 {
 	sc2isc(fsc, &cl->cl_fsc);
-	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total);
 	cl->cl_flags |= HFSC_FSC;
 }
 
@@ -1015,11 +1008,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		cur_time = psched_get_time();
 
 		if (tca[TCA_RATE]) {
-			spinlock_t *lock = qdisc_root_sleeping_lock(sch);
-
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
-						    lock,
+						    NULL,
+						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
@@ -1068,7 +1060,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					qdisc_root_sleeping_lock(sch),
+					NULL,
+					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err) {
 			kfree(cl);
@@ -1373,7 +1366,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	xstats.work    = cl->cl_total;
 	xstats.rtwork  = cl->cl_cumul;
 
-	if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
 		return -1;
@@ -1443,7 +1436,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		return err;
 	q->eligible = RB_ROOT;
-	INIT_LIST_HEAD(&q->droplist);
 
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
@@ -1527,7 +1519,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 			hfsc_reset_class(cl);
 	}
 	q->eligible = RB_ROOT;
-	INIT_LIST_HEAD(&q->droplist);
 	qdisc_watchdog_cancel(&q->watchdog);
 	sch->qstats.backlog = 0;
 	sch->q.qlen = 0;
@@ -1572,7 +1563,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 }
 
 static int
-hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 {
 	struct hfsc_class *cl;
 	int uninitialized_var(err);
@@ -1581,11 +1572,11 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (cl == NULL) {
 		if (err & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return err;
 	}
 
-	err = qdisc_enqueue(skb, cl->qdisc);
+	err = qdisc_enqueue(skb, cl->qdisc, to_free);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		if (net_xmit_drop_count(err)) {
 			cl->qstats.drops++;
@@ -1594,8 +1585,17 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	if (cl->qdisc->q.qlen == 1)
+	if (cl->qdisc->q.qlen == 1) {
 		set_active(cl, qdisc_pkt_len(skb));
+		/*
+		 * If this is the first packet, isolate the head so an eventual
+		 * head drop before the first dequeue operation has no chance
+		 * to invalidate the deadline.
+		 */
+		if (cl->cl_flags & HFSC_RSC)
+			cl->qdisc->ops->peek(cl->qdisc);
+
+	}
 
 	qdisc_qstats_backlog_inc(sch, skb);
 	sch->q.qlen++;
@@ -1664,7 +1664,6 @@ hfsc_dequeue(struct Qdisc *sch)
 		set_passive(cl);
 	}
 
-	qdisc_unthrottled(sch);
 	qdisc_bstats_update(sch, skb);
 	qdisc_qstats_backlog_dec(sch, skb);
 	sch->q.qlen--;
@@ -1672,32 +1671,6 @@ hfsc_dequeue(struct Qdisc *sch)
 	return skb;
 }
 
-static unsigned int
-hfsc_drop(struct Qdisc *sch)
-{
-	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hfsc_class *cl;
-	unsigned int len;
-
-	list_for_each_entry(cl, &q->droplist, dlist) {
-		if (cl->qdisc->ops->drop != NULL &&
-		    (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) {
-			if (cl->qdisc->q.qlen == 0) {
-				update_vf(cl, 0, 0);
-				set_passive(cl);
-			} else {
-				list_move_tail(&cl->dlist, &q->droplist);
-			}
-			cl->qstats.drops++;
-			qdisc_qstats_drop(sch);
-			sch->qstats.backlog -= len;
-			sch->q.qlen--;
-			return len;
-		}
-	}
-	return 0;
-}
-
 static const struct Qdisc_class_ops hfsc_class_ops = {
 	.change		= hfsc_change_class,
 	.delete		= hfsc_delete_class,
@@ -1724,7 +1697,6 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
 	.enqueue	= hfsc_enqueue,
 	.dequeue	= hfsc_dequeue,
 	.peek		= qdisc_peek_dequeued,
-	.drop		= hfsc_drop,
 	.cl_ops		= &hfsc_class_ops,
 	.priv_size	= sizeof(struct hfsc_sched),
 	.owner		= THIS_MODULE
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 13d6f83ec491..e3d0458af17b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -345,7 +345,7 @@ static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb)
 	skb->next = NULL;
 }
 
-static unsigned int hhf_drop(struct Qdisc *sch)
+static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free)
 {
 	struct hhf_sched_data *q = qdisc_priv(sch);
 	struct wdrr_bucket *bucket;
@@ -359,25 +359,16 @@ static unsigned int hhf_drop(struct Qdisc *sch)
 		struct sk_buff *skb = dequeue_head(bucket);
 
 		sch->q.qlen--;
-		qdisc_qstats_drop(sch);
 		qdisc_qstats_backlog_dec(sch, skb);
-		kfree_skb(skb);
+		qdisc_drop(skb, sch, to_free);
 	}
 
 	/* Return id of the bucket from which the packet was dropped. */
 	return bucket - q->buckets;
 }
 
-static unsigned int hhf_qdisc_drop(struct Qdisc *sch)
-{
-	unsigned int prev_backlog;
-
-	prev_backlog = sch->qstats.backlog;
-	hhf_drop(sch);
-	return prev_backlog - sch->qstats.backlog;
-}
-
-static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
 {
 	struct hhf_sched_data *q = qdisc_priv(sch);
 	enum wdrr_bucket_idx idx;
@@ -415,7 +406,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	/* Return Congestion Notification only if we dropped a packet from this
 	 * bucket.
 	 */
-	if (hhf_drop(sch) == idx)
+	if (hhf_drop(sch, to_free) == idx)
 		return NET_XMIT_CN;
 
 	/* As we dropped a packet, better let upper stack know this. */
@@ -473,7 +464,7 @@ static void hhf_reset(struct Qdisc *sch)
 	struct sk_buff *skb;
 
 	while ((skb = hhf_dequeue(sch)) != NULL)
-		kfree_skb(skb);
+		rtnl_kfree_skbs(skb, skb);
 }
 
 static void *hhf_zalloc(size_t sz)
@@ -583,7 +574,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
 	while (sch->q.qlen > sch->limit) {
 		struct sk_buff *skb = hhf_dequeue(sch);
 
-		kfree_skb(skb);
+		rtnl_kfree_skbs(skb, skb);
 	}
 	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
 				  prev_backlog - sch->qstats.backlog);
@@ -709,7 +700,6 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
 	.enqueue	=	hhf_enqueue,
 	.dequeue	=	hhf_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.drop		=	hhf_qdisc_drop,
 	.init		=	hhf_init,
 	.reset		=	hhf_reset,
 	.destroy	=	hhf_destroy,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d4b4218af6b1..53dbfa187870 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -117,7 +117,6 @@ struct htb_class {
 	 * Written often fields
 	 */
 	struct gnet_stats_basic_packed bstats;
-	struct gnet_stats_queue	qstats;
 	struct tc_htb_xstats	xstats;	/* our special stats */
 
 	/* token bucket parameters */
@@ -140,6 +139,8 @@ struct htb_class {
 	enum htb_cmode		cmode;		/* current mode of the class */
 	struct rb_node		pq_node;	/* node for event queue */
 	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
+
+	unsigned int drops ____cacheline_aligned_in_smp;
 };
 
 struct htb_level {
@@ -569,7 +570,8 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 	list_del_init(&cl->un.leaf.drop_list);
 }
 
-static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
 {
 	int uninitialized_var(ret);
 	struct htb_sched *q = qdisc_priv(sch);
@@ -581,19 +583,20 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			__skb_queue_tail(&q->direct_queue, skb);
 			q->direct_pkts++;
 		} else {
-			return qdisc_drop(skb, sch);
+			return qdisc_drop(skb, sch, to_free);
 		}
 #ifdef CONFIG_NET_CLS_ACT
 	} else if (!cl) {
 		if (ret & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return ret;
 #endif
-	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q,
+					to_free)) != NET_XMIT_SUCCESS) {
 		if (net_xmit_drop_count(ret)) {
 			qdisc_qstats_drop(sch);
-			cl->qstats.drops++;
+			cl->drops++;
 		}
 		return ret;
 	} else {
@@ -889,7 +892,6 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 	if (skb != NULL) {
 ok:
 		qdisc_bstats_update(sch, skb);
-		qdisc_unthrottled(sch);
 		qdisc_qstats_backlog_dec(sch, skb);
 		sch->q.qlen--;
 		return skb;
@@ -929,38 +931,13 @@ ok:
 	}
 	qdisc_qstats_overlimit(sch);
 	if (likely(next_event > q->now))
-		qdisc_watchdog_schedule_ns(&q->watchdog, next_event, true);
+		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
 	else
 		schedule_work(&q->work);
 fin:
 	return skb;
 }
 
-/* try to drop from each class (by prio) until one succeed */
-static unsigned int htb_drop(struct Qdisc *sch)
-{
-	struct htb_sched *q = qdisc_priv(sch);
-	int prio;
-
-	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
-		struct list_head *p;
-		list_for_each(p, q->drops + prio) {
-			struct htb_class *cl = list_entry(p, struct htb_class,
-							  un.leaf.drop_list);
-			unsigned int len;
-			if (cl->un.leaf.q->ops->drop &&
-			    (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
-				sch->qstats.backlog -= len;
-				sch->q.qlen--;
-				if (!cl->un.leaf.q->q.qlen)
-					htb_deactivate(q, cl);
-				return len;
-			}
-		}
-	}
-	return 0;
-}
-
 /* reset all classes */
 /* always caled under BH & queue lock */
 static void htb_reset(struct Qdisc *sch)
@@ -983,7 +960,7 @@ static void htb_reset(struct Qdisc *sch)
 		}
 	}
 	qdisc_watchdog_cancel(&q->watchdog);
-	__skb_queue_purge(&q->direct_queue);
+	__qdisc_reset_queue(&q->direct_queue);
 	sch->q.qlen = 0;
 	sch->qstats.backlog = 0;
 	memset(q->hlevel, 0, sizeof(q->hlevel));
@@ -1007,7 +984,9 @@ static void htb_work_func(struct work_struct *work)
 	struct htb_sched *q = container_of(work, struct htb_sched, work);
 	struct Qdisc *sch = q->watchdog.qdisc;
 
+	rcu_read_lock();
 	__netif_schedule(qdisc_root(sch));
+	rcu_read_unlock();
 }
 
 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
@@ -1134,16 +1113,24 @@ static int
 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
+	struct gnet_stats_queue qs = {
+		.drops = cl->drops,
+	};
 	__u32 qlen = 0;
 
-	if (!cl->level && cl->un.leaf.q)
+	if (!cl->level && cl->un.leaf.q) {
 		qlen = cl->un.leaf.q->q.qlen;
-	cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
-	cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
+		qs.backlog = cl->un.leaf.q->qstats.backlog;
+	}
+	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
+				    INT_MIN, INT_MAX);
+	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
+				     INT_MIN, INT_MAX);
 
-	if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
+	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
@@ -1256,7 +1243,7 @@ static void htb_destroy(struct Qdisc *sch)
 			htb_destroy_class(sch, cl);
 	}
 	qdisc_class_hash_destroy(&q->clhash);
-	__skb_queue_purge(&q->direct_queue);
+	__qdisc_reset_queue(&q->direct_queue);
 }
 
 static int htb_delete(struct Qdisc *sch, unsigned long arg)
@@ -1395,7 +1382,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if (htb_rate_est || tca[TCA_RATE]) {
 			err = gen_new_estimator(&cl->bstats, NULL,
 						&cl->rate_est,
-						qdisc_root_sleeping_lock(sch),
+						NULL,
+						qdisc_root_sleeping_running(sch),
 						tca[TCA_RATE] ? : &est.nla);
 			if (err) {
 				kfree(cl);
@@ -1457,11 +1445,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			parent->children++;
 	} else {
 		if (tca[TCA_RATE]) {
-			spinlock_t *lock = qdisc_root_sleeping_lock(sch);
-
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
-						    lock,
+						    NULL,
+						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
@@ -1599,7 +1586,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.enqueue	=	htb_enqueue,
 	.dequeue	=	htb_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.drop		=	htb_drop,
 	.init		=	htb_init,
 	.reset		=	htb_reset,
 	.destroy	=	htb_destroy,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 56a77b878eb3..b9439827c172 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -199,7 +199,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 
 	sch = dev_queue->qdisc_sleeping;
-	if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
 		return -1;
 	return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b8002ce3d010..549c66359924 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -342,7 +342,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 		 * hold here is the look on dev_queue->qdisc_sleeping
 		 * also acquired below.
 		 */
-		spin_unlock_bh(d->lock);
+		if (d->lock)
+			spin_unlock_bh(d->lock);
 
 		for (i = tc.offset; i < tc.offset + tc.count; i++) {
 			struct netdev_queue *q = netdev_get_tx_queue(dev, i);
@@ -359,15 +360,17 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 			spin_unlock_bh(qdisc_lock(qdisc));
 		}
 		/* Reclaim root sleeping lock before completing stats */
-		spin_lock_bh(d->lock);
-		if (gnet_stats_copy_basic(d, NULL, &bstats) < 0 ||
+		if (d->lock)
+			spin_lock_bh(d->lock);
+		if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
 		    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
 			return -1;
 	} else {
 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 
 		sch = dev_queue->qdisc_sleeping;
-		if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+					  d, NULL, &sch->bstats) < 0 ||
 		    gnet_stats_copy_queue(d, NULL,
 					  &sch->qstats, sch->q.qlen) < 0)
 			return -1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index bcdd54bb101c..9ffbb025b37e 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -65,7 +65,8 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 }
 
 static int
-multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+	       struct sk_buff **to_free)
 {
 	struct Qdisc *qdisc;
 	int ret;
@@ -76,12 +77,12 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 		if (ret & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return ret;
 	}
 #endif
 
-	ret = qdisc_enqueue(skb, qdisc);
+	ret = qdisc_enqueue(skb, qdisc, to_free);
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
@@ -151,27 +152,6 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
 
 }
 
-static unsigned int multiq_drop(struct Qdisc *sch)
-{
-	struct multiq_sched_data *q = qdisc_priv(sch);
-	int band;
-	unsigned int len;
-	struct Qdisc *qdisc;
-
-	for (band = q->bands - 1; band >= 0; band--) {
-		qdisc = q->queues[band];
-		if (qdisc->ops->drop) {
-			len = qdisc->ops->drop(qdisc);
-			if (len != 0) {
-				sch->q.qlen--;
-				return len;
-			}
-		}
-	}
-	return 0;
-}
-
-
 static void
 multiq_reset(struct Qdisc *sch)
 {
@@ -356,7 +336,8 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	struct Qdisc *cl_q;
 
 	cl_q = q->queues[cl - 1];
-	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+				  d, NULL, &cl_q->bstats) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
 		return -1;
 
@@ -415,7 +396,6 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
 	.enqueue	=	multiq_enqueue,
 	.dequeue	=	multiq_dequeue,
 	.peek		=	multiq_peek,
-	.drop		=	multiq_drop,
 	.init		=	multiq_init,
 	.reset		=	multiq_reset,
 	.destroy	=	multiq_destroy,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 205bed00dd34..aaaf02175338 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -368,9 +368,7 @@ static void tfifo_reset(struct Qdisc *sch)
 		struct sk_buff *skb = netem_rb_to_skb(p);
 
 		rb_erase(p, &q->t_root);
-		skb->next = NULL;
-		skb->prev = NULL;
-		kfree_skb(skb);
+		rtnl_kfree_skbs(skb, skb);
 	}
 }
 
@@ -399,7 +397,8 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
  * when we statistically choose to corrupt one, we instead segment it, returning
  * the first packet to be corrupted, and re-enqueue the remaining frames
  */
-static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
+static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
+				     struct sk_buff **to_free)
 {
 	struct sk_buff *segs;
 	netdev_features_t features = netif_skb_features(skb);
@@ -407,7 +406,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
 
 	if (IS_ERR_OR_NULL(segs)) {
-		qdisc_reshape_fail(skb, sch);
+		qdisc_drop(skb, sch, to_free);
 		return NULL;
 	}
 	consume_skb(skb);
@@ -420,7 +419,8 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
  * 	NET_XMIT_DROP: queue length didn't change.
  *      NET_XMIT_SUCCESS: one skb was queued.
  */
-static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			 struct sk_buff **to_free)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	/* We don't fill cb now as skb_unshare() may invalidate it */
@@ -445,7 +445,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 	if (count == 0) {
 		qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	}
 
@@ -465,7 +465,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 
 		q->duplicate = 0;
-		rootq->enqueue(skb2, rootq);
+		rootq->enqueue(skb2, rootq, to_free);
 		q->duplicate = dupsave;
 	}
 
@@ -477,7 +477,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	 */
 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 		if (skb_is_gso(skb)) {
-			segs = netem_segment(skb, sch);
+			segs = netem_segment(skb, sch, to_free);
 			if (!segs)
 				return NET_XMIT_DROP;
 		} else {
@@ -487,10 +487,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		skb = segs;
 		segs = segs->next;
 
-		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
-		    (skb->ip_summed == CHECKSUM_PARTIAL &&
-		     skb_checksum_help(skb))) {
-			rc = qdisc_drop(skb, sch);
+		skb = skb_unshare(skb, GFP_ATOMIC);
+		if (unlikely(!skb)) {
+			qdisc_qstats_drop(sch);
+			goto finish_segs;
+		}
+		if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		    skb_checksum_help(skb)) {
+			qdisc_drop(skb, sch, to_free);
 			goto finish_segs;
 		}
 
@@ -499,7 +503,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
-		return qdisc_reshape_fail(skb, sch);
+		return qdisc_drop(skb, sch, to_free);
 
 	qdisc_qstats_backlog_inc(sch, skb);
 
@@ -559,7 +563,7 @@ finish_segs:
 			segs->next = NULL;
 			qdisc_skb_cb(segs)->pkt_len = segs->len;
 			last_len = segs->len;
-			rc = qdisc_enqueue(segs, sch);
+			rc = qdisc_enqueue(segs, sch, to_free);
 			if (rc != NET_XMIT_SUCCESS) {
 				if (net_xmit_drop_count(rc))
 					qdisc_qstats_drop(sch);
@@ -576,50 +580,17 @@ finish_segs:
 	return NET_XMIT_SUCCESS;
 }
 
-static unsigned int netem_drop(struct Qdisc *sch)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-	unsigned int len;
-
-	len = qdisc_queue_drop(sch);
-
-	if (!len) {
-		struct rb_node *p = rb_first(&q->t_root);
-
-		if (p) {
-			struct sk_buff *skb = netem_rb_to_skb(p);
-
-			rb_erase(p, &q->t_root);
-			sch->q.qlen--;
-			skb->next = NULL;
-			skb->prev = NULL;
-			qdisc_qstats_backlog_dec(sch, skb);
-			kfree_skb(skb);
-		}
-	}
-	if (!len && q->qdisc && q->qdisc->ops->drop)
-	    len = q->qdisc->ops->drop(q->qdisc);
-	if (len)
-		qdisc_qstats_drop(sch);
-
-	return len;
-}
-
 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 	struct rb_node *p;
 
-	if (qdisc_is_throttled(sch))
-		return NULL;
-
 tfifo_dequeue:
 	skb = __skb_dequeue(&sch->q);
 	if (skb) {
 		qdisc_qstats_backlog_dec(sch, skb);
 deliver:
-		qdisc_unthrottled(sch);
 		qdisc_bstats_update(sch, skb);
 		return skb;
 	}
@@ -650,14 +621,17 @@ deliver:
 #endif
 
 			if (q->qdisc) {
-				int err = qdisc_enqueue(skb, q->qdisc);
-
-				if (unlikely(err != NET_XMIT_SUCCESS)) {
-					if (net_xmit_drop_count(err)) {
-						qdisc_qstats_drop(sch);
-						qdisc_tree_reduce_backlog(sch, 1,
-									  qdisc_pkt_len(skb));
-					}
+				unsigned int pkt_len = qdisc_pkt_len(skb);
+				struct sk_buff *to_free = NULL;
+				int err;
+
+				err = qdisc_enqueue(skb, q->qdisc, &to_free);
+				kfree_skb_list(to_free);
+				if (err != NET_XMIT_SUCCESS &&
+				    net_xmit_drop_count(err)) {
+					qdisc_qstats_drop(sch);
+					qdisc_tree_reduce_backlog(sch, 1,
+								  pkt_len);
 				}
 				goto tfifo_dequeue;
 			}
@@ -1143,7 +1117,6 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.enqueue	=	netem_enqueue,
 	.dequeue	=	netem_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.drop		=	netem_drop,
 	.init		=	netem_init,
 	.reset		=	netem_reset,
 	.destroy	=	netem_destroy,
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 71ae3b9629f9..a570b0bb254c 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -134,7 +134,8 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
 	return false;
 }
 
-static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			     struct sk_buff **to_free)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
 	bool enqueue = false;
@@ -166,7 +167,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 out:
 	q->stats.dropped++;
-	return qdisc_drop(skb, sch);
+	return qdisc_drop(skb, sch, to_free);
 }
 
 static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
@@ -234,7 +235,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
 
 		dropped += qdisc_pkt_len(skb);
 		qdisc_qstats_backlog_dec(sch, skb);
-		qdisc_drop(skb, sch);
+		rtnl_qdisc_drop(skb, sch);
 	}
 	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
 
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 5abfe44678d4..1c6cbab3e7b9 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -64,6 +64,8 @@ struct plug_sched_data {
 	 */
 	bool unplug_indefinite;
 
+	bool throttled;
+
 	/* Queue Limit in bytes */
 	u32 limit;
 
@@ -86,7 +88,8 @@ struct plug_sched_data {
 	u32 pkts_to_release;
 };
 
-static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			struct sk_buff **to_free)
 {
 	struct plug_sched_data *q = qdisc_priv(sch);
 
@@ -96,14 +99,14 @@ static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return qdisc_enqueue_tail(skb, sch);
 	}
 
-	return qdisc_reshape_fail(skb, sch);
+	return qdisc_drop(skb, sch, to_free);
 }
 
 static struct sk_buff *plug_dequeue(struct Qdisc *sch)
 {
 	struct plug_sched_data *q = qdisc_priv(sch);
 
-	if (qdisc_is_throttled(sch))
+	if (q->throttled)
 		return NULL;
 
 	if (!q->unplug_indefinite) {
@@ -111,7 +114,7 @@ static struct sk_buff *plug_dequeue(struct Qdisc *sch)
 			/* No more packets to dequeue. Block the queue
 			 * and wait for the next release command.
 			 */
-			qdisc_throttled(sch);
+			q->throttled = true;
 			return NULL;
 		}
 		q->pkts_to_release--;
@@ -141,7 +144,7 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt)
 		q->limit = ctl->limit;
 	}
 
-	qdisc_throttled(sch);
+	q->throttled = true;
 	return 0;
 }
 
@@ -173,7 +176,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt)
 		q->pkts_last_epoch = q->pkts_current_epoch;
 		q->pkts_current_epoch = 0;
 		if (q->unplug_indefinite)
-			qdisc_throttled(sch);
+			q->throttled = true;
 		q->unplug_indefinite = false;
 		break;
 	case TCQ_PLUG_RELEASE_ONE:
@@ -182,7 +185,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt)
 		 */
 		q->pkts_to_release += q->pkts_last_epoch;
 		q->pkts_last_epoch = 0;
-		qdisc_unthrottled(sch);
+		q->throttled = false;
 		netif_schedule_queue(sch->dev_queue);
 		break;
 	case TCQ_PLUG_RELEASE_INDEFINITE:
@@ -190,7 +193,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt)
 		q->pkts_to_release = 0;
 		q->pkts_last_epoch = 0;
 		q->pkts_current_epoch = 0;
-		qdisc_unthrottled(sch);
+		q->throttled = false;
 		netif_schedule_queue(sch->dev_queue);
 		break;
 	case TCQ_PLUG_LIMIT:
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 4b0a82191bc4..8f575899adfa 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -67,7 +67,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 }
 
 static int
-prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 {
 	struct Qdisc *qdisc;
 	int ret;
@@ -83,7 +83,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 #endif
 
-	ret = qdisc_enqueue(skb, qdisc);
+	ret = qdisc_enqueue(skb, qdisc, to_free);
 	if (ret == NET_XMIT_SUCCESS) {
 		qdisc_qstats_backlog_inc(sch, skb);
 		sch->q.qlen++;
@@ -127,25 +127,6 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch)
 
 }
 
-static unsigned int prio_drop(struct Qdisc *sch)
-{
-	struct prio_sched_data *q = qdisc_priv(sch);
-	int prio;
-	unsigned int len;
-	struct Qdisc *qdisc;
-
-	for (prio = q->bands-1; prio >= 0; prio--) {
-		qdisc = q->queues[prio];
-		if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) {
-			sch->qstats.backlog -= len;
-			sch->q.qlen--;
-			return len;
-		}
-	}
-	return 0;
-}
-
-
 static void
 prio_reset(struct Qdisc *sch)
 {
@@ -172,8 +153,9 @@ prio_destroy(struct Qdisc *sch)
 static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *queues[TCQ_PRIO_BANDS];
+	int oldbands = q->bands, i;
 	struct tc_prio_qopt *qopt;
-	int i;
 
 	if (nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
@@ -187,62 +169,42 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 			return -EINVAL;
 	}
 
+	/* Before commit, make sure we can allocate all new qdiscs */
+	for (i = oldbands; i < qopt->bands; i++) {
+		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+					      TC_H_MAKE(sch->handle, i + 1));
+		if (!queues[i]) {
+			while (i > oldbands)
+				qdisc_destroy(queues[--i]);
+			return -ENOMEM;
+		}
+	}
+
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-	for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
+	for (i = q->bands; i < oldbands; i++) {
 		struct Qdisc *child = q->queues[i];
-		q->queues[i] = &noop_qdisc;
-		if (child != &noop_qdisc) {
-			qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
-			qdisc_destroy(child);
-		}
-	}
-	sch_tree_unlock(sch);
 
-	for (i = 0; i < q->bands; i++) {
-		if (q->queues[i] == &noop_qdisc) {
-			struct Qdisc *child, *old;
-
-			child = qdisc_create_dflt(sch->dev_queue,
-						  &pfifo_qdisc_ops,
-						  TC_H_MAKE(sch->handle, i + 1));
-			if (child) {
-				sch_tree_lock(sch);
-				old = q->queues[i];
-				q->queues[i] = child;
-
-				if (old != &noop_qdisc) {
-					qdisc_tree_reduce_backlog(old,
-								  old->q.qlen,
-								  old->qstats.backlog);
-					qdisc_destroy(old);
-				}
-				sch_tree_unlock(sch);
-			}
-		}
+		qdisc_tree_reduce_backlog(child, child->q.qlen,
+					  child->qstats.backlog);
+		qdisc_destroy(child);
 	}
+
+	for (i = oldbands; i < q->bands; i++)
+		q->queues[i] = queues[i];
+
+	sch_tree_unlock(sch);
 	return 0;
 }
 
 static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 {
-	struct prio_sched_data *q = qdisc_priv(sch);
-	int i;
-
-	for (i = 0; i < TCQ_PRIO_BANDS; i++)
-		q->queues[i] = &noop_qdisc;
-
-	if (opt == NULL) {
+	if (!opt)
 		return -EINVAL;
-	} else {
-		int err;
 
-		if ((err = prio_tune(sch, opt)) != 0)
-			return err;
-	}
-	return 0;
+	return prio_tune(sch, opt);
 }
 
 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -323,7 +285,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	struct Qdisc *cl_q;
 
 	cl_q = q->queues[cl - 1];
-	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+				  d, NULL, &cl_q->bstats) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
 		return -1;
 
@@ -382,7 +345,6 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 	.enqueue	=	prio_enqueue,
 	.dequeue	=	prio_dequeue,
 	.peek		=	prio_peek,
-	.drop		=	prio_drop,
 	.init		=	prio_init,
 	.reset		=	prio_reset,
 	.destroy	=	prio_destroy,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f18857febdad..f27ffee106f6 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -460,7 +460,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
-						    qdisc_root_sleeping_lock(sch),
+						    NULL,
+						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
@@ -486,7 +487,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL,
 					&cl->rate_est,
-					qdisc_root_sleeping_lock(sch),
+					NULL,
+					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err)
 			goto destroy_class;
@@ -663,7 +665,8 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	xstats.weight = cl->agg->class_weight;
 	xstats.lmax = cl->agg->lmax;
 
-	if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL,
 				  &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
@@ -1214,7 +1217,8 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
 	return agg;
 }
 
-static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl;
@@ -1237,11 +1241,11 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				     qdisc_pkt_len(skb));
 		if (err) {
 			cl->qstats.drops++;
-			return qdisc_drop(skb, sch);
+			return qdisc_drop(skb, sch, to_free);
 		}
 	}
 
-	err = qdisc_enqueue(skb, cl->qdisc);
+	err = qdisc_enqueue(skb, cl->qdisc, to_free);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		pr_debug("qfq_enqueue: enqueue failed %d\n", err);
 		if (net_xmit_drop_count(err)) {
@@ -1422,52 +1426,6 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
 		qfq_deactivate_class(q, cl);
 }
 
-static unsigned int qfq_drop_from_slot(struct qfq_sched *q,
-				       struct hlist_head *slot)
-{
-	struct qfq_aggregate *agg;
-	struct qfq_class *cl;
-	unsigned int len;
-
-	hlist_for_each_entry(agg, slot, next) {
-		list_for_each_entry(cl, &agg->active, alist) {
-
-			if (!cl->qdisc->ops->drop)
-				continue;
-
-			len = cl->qdisc->ops->drop(cl->qdisc);
-			if (len > 0) {
-				if (cl->qdisc->q.qlen == 0)
-					qfq_deactivate_class(q, cl);
-
-				return len;
-			}
-		}
-	}
-	return 0;
-}
-
-static unsigned int qfq_drop(struct Qdisc *sch)
-{
-	struct qfq_sched *q = qdisc_priv(sch);
-	struct qfq_group *grp;
-	unsigned int i, j, len;
-
-	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
-		grp = &q->groups[i];
-		for (j = 0; j < QFQ_MAX_SLOTS; j++) {
-			len = qfq_drop_from_slot(q, &grp->slots[j]);
-			if (len > 0) {
-				sch->q.qlen--;
-				return len;
-			}
-		}
-
-	}
-
-	return 0;
-}
-
 static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
@@ -1562,7 +1520,6 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
 	.enqueue	= qfq_enqueue,
 	.dequeue	= qfq_dequeue,
 	.peek		= qdisc_peek_dequeued,
-	.drop		= qfq_drop,
 	.init		= qfq_init_qdisc,
 	.reset		= qfq_reset_qdisc,
 	.destroy	= qfq_destroy_qdisc,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 91578bdd378c..249b2a18acbd 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -56,7 +56,8 @@ static inline int red_use_harddrop(struct red_sched_data *q)
 	return q->flags & TC_RED_HARDDROP;
 }
 
-static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct Qdisc *child = q->qdisc;
@@ -95,7 +96,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		break;
 	}
 
-	ret = qdisc_enqueue(skb, child);
+	ret = qdisc_enqueue(skb, child, to_free);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		qdisc_qstats_backlog_inc(sch, skb);
 		sch->q.qlen++;
@@ -106,7 +107,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 
 congestion_drop:
-	qdisc_drop(skb, sch);
+	qdisc_drop(skb, sch, to_free);
 	return NET_XMIT_CN;
 }
 
@@ -136,26 +137,6 @@ static struct sk_buff *red_peek(struct Qdisc *sch)
 	return child->ops->peek(child);
 }
 
-static unsigned int red_drop(struct Qdisc *sch)
-{
-	struct red_sched_data *q = qdisc_priv(sch);
-	struct Qdisc *child = q->qdisc;
-	unsigned int len;
-
-	if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
-		q->stats.other++;
-		qdisc_qstats_drop(sch);
-		sch->qstats.backlog -= len;
-		sch->q.qlen--;
-		return len;
-	}
-
-	if (!red_is_idling(&q->vars))
-		red_start_of_idle_period(&q->vars);
-
-	return 0;
-}
-
 static void red_reset(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
@@ -365,7 +346,6 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 	.enqueue	=	red_enqueue,
 	.dequeue	=	red_dequeue,
 	.peek		=	red_peek,
-	.drop		=	red_drop,
 	.init		=	red_init,
 	.reset		=	red_reset,
 	.destroy	=	red_destroy,
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index c69611640fa5..add3cc7d37ec 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -275,7 +275,8 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
 	return false;
 }
 
-static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
 {
 
 	struct sfb_sched_data *q = qdisc_priv(sch);
@@ -397,7 +398,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 enqueue:
-	ret = qdisc_enqueue(skb, child);
+	ret = qdisc_enqueue(skb, child, to_free);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->q.qlen++;
 		increment_qlen(skb, q);
@@ -408,7 +409,7 @@ enqueue:
 	return ret;
 
 drop:
-	qdisc_drop(skb, sch);
+	qdisc_drop(skb, sch, to_free);
 	return NET_XMIT_CN;
 other_drop:
 	if (ret & __NET_XMIT_BYPASS)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 498f0a2cb47f..7f195ed4d568 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -343,7 +343,7 @@ static int sfq_headdrop(const struct sfq_sched_data *q)
 }
 
 static int
-sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned int hash, dropped;
@@ -367,7 +367,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (x == SFQ_EMPTY_SLOT) {
 		x = q->dep[0].next; /* get a free slot */
 		if (x >= SFQ_MAX_FLOWS)
-			return qdisc_drop(skb, sch);
+			return qdisc_drop(skb, sch, to_free);
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
@@ -424,14 +424,14 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (slot->qlen >= q->maxdepth) {
 congestion_drop:
 		if (!sfq_headdrop(q))
-			return qdisc_drop(skb, sch);
+			return qdisc_drop(skb, sch, to_free);
 
 		/* We know we have at least one packet in queue */
 		head = slot_dequeue_head(slot);
 		delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
 		sch->qstats.backlog -= delta;
 		slot->backlog -= delta;
-		qdisc_drop(head, sch);
+		qdisc_drop(head, sch, to_free);
 
 		slot_queue_add(slot, skb);
 		return NET_XMIT_CN;
@@ -520,7 +520,7 @@ sfq_reset(struct Qdisc *sch)
 	struct sk_buff *skb;
 
 	while ((skb = sfq_dequeue(sch)) != NULL)
-		kfree_skb(skb);
+		rtnl_kfree_skbs(skb, skb);
 }
 
 /*
@@ -896,7 +896,6 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 	.enqueue	=	sfq_enqueue,
 	.dequeue	=	sfq_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.drop		=	sfq_drop,
 	.init		=	sfq_init,
 	.reset		=	sfq_reset,
 	.destroy	=	sfq_destroy,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 3161e491990b..303355c449ab 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -155,7 +155,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
 /* GSO packet is too big, segment it so that tbf can transmit
  * each segment in time
  */
-static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
+static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *segs, *nskb;
@@ -166,7 +167,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
 
 	if (IS_ERR_OR_NULL(segs))
-		return qdisc_reshape_fail(skb, sch);
+		return qdisc_drop(skb, sch, to_free);
 
 	nb = 0;
 	while (segs) {
@@ -174,7 +175,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
 		segs->next = NULL;
 		qdisc_skb_cb(segs)->pkt_len = segs->len;
 		len += segs->len;
-		ret = qdisc_enqueue(segs, q->qdisc);
+		ret = qdisc_enqueue(segs, q->qdisc, to_free);
 		if (ret != NET_XMIT_SUCCESS) {
 			if (net_xmit_drop_count(ret))
 				qdisc_qstats_drop(sch);
@@ -190,17 +191,18 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
 	return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
 }
 
-static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	int ret;
 
 	if (qdisc_pkt_len(skb) > q->max_size) {
 		if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
-			return tbf_segment(skb, sch);
-		return qdisc_reshape_fail(skb, sch);
+			return tbf_segment(skb, sch, to_free);
+		return qdisc_drop(skb, sch, to_free);
 	}
-	ret = qdisc_enqueue(skb, q->qdisc);
+	ret = qdisc_enqueue(skb, q->qdisc, to_free);
 	if (ret != NET_XMIT_SUCCESS) {
 		if (net_xmit_drop_count(ret))
 			qdisc_qstats_drop(sch);
@@ -212,19 +214,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
-static unsigned int tbf_drop(struct Qdisc *sch)
-{
-	struct tbf_sched_data *q = qdisc_priv(sch);
-	unsigned int len = 0;
-
-	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
-		sch->qstats.backlog -= len;
-		sch->q.qlen--;
-		qdisc_qstats_drop(sch);
-	}
-	return len;
-}
-
 static bool tbf_peak_present(const struct tbf_sched_data *q)
 {
 	return q->peak.rate_bytes_ps;
@@ -267,14 +256,12 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 			q->ptokens = ptoks;
 			qdisc_qstats_backlog_dec(sch, skb);
 			sch->q.qlen--;
-			qdisc_unthrottled(sch);
 			qdisc_bstats_update(sch, skb);
 			return skb;
 		}
 
 		qdisc_watchdog_schedule_ns(&q->watchdog,
-					   now + max_t(long, -toks, -ptoks),
-					   true);
+					   now + max_t(long, -toks, -ptoks));
 
 		/* Maybe we have a shorter packet in the queue,
 		   which can be sent now. It sounds cool,
@@ -559,7 +546,6 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
 	.enqueue	=	tbf_enqueue,
 	.dequeue	=	tbf_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.drop		=	tbf_drop,
 	.init		=	tbf_init,
 	.reset		=	tbf_reset,
 	.destroy	=	tbf_destroy,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index e02687185a59..2cd9b4478b92 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -77,7 +77,7 @@ struct teql_sched_data {
 /* "teql*" qdisc routines */
 
 static int
-teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	struct teql_sched_data *q = qdisc_priv(sch);
@@ -87,7 +87,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return NET_XMIT_SUCCESS;
 	}
 
-	return qdisc_drop(skb, sch);
+	return qdisc_drop(skb, sch, to_free);
 }
 
 static struct sk_buff *
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 0fca5824ad0e..6c4f7496cec6 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  transport.o chunk.o sm_make_chunk.o ulpevent.o \
 	  inqueue.o outqueue.o ulpqueue.o \
 	  tsnmap.o bind_addr.o socket.o primitive.o \
-	  output.o input.o debug.o ssnmap.o auth.o
+	  output.o input.o debug.o ssnmap.o auth.o \
+	  offload.o
 
 sctp_probe-y := probe.o
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index e1849f3714ad..1c23060c41a6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -268,6 +268,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 		goto fail_init;
 
 	asoc->active_key_id = ep->active_key_id;
+	asoc->prsctp_enable = ep->prsctp_enable;
 
 	/* Save the hmacs and chunks list into this association */
 	if (ep->auth_hmacs_list)
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1eb94bf18ef4..a55e54738b81 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -335,13 +335,32 @@ errout:
 /* Check whether this message has expired. */
 int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 {
-	struct sctp_datamsg *msg = chunk->msg;
+	if (!chunk->asoc->prsctp_enable ||
+	    !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
+		struct sctp_datamsg *msg = chunk->msg;
+
+		if (!msg->can_abandon)
+			return 0;
+
+		if (time_after(jiffies, msg->expires_at))
+			return 1;
 
-	if (!msg->can_abandon)
 		return 0;
+	}
 
-	if (time_after(jiffies, msg->expires_at))
+	if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
+	    time_after(jiffies, chunk->prsctp_param)) {
+		if (chunk->sent_count)
+			chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+		else
+			chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+		return 1;
+	} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
+		   chunk->sent_count > chunk->prsctp_param) {
+		chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
 		return 1;
+	}
+	/* PRIO policy is processed by sendmsg, not here */
 
 	return 0;
 }
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9d494e35e7f9..1f03065686fe 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -163,6 +163,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	 */
 	ep->auth_hmacs_list = auth_hmacs;
 	ep->auth_chunk_list = auth_chunks;
+	ep->prsctp_enable = net->sctp.prsctp_enable;
 
 	return ep;
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a701527a9480..c182db7d691f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -90,17 +90,6 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
 	return 0;
 }
 
-struct sctp_input_cb {
-	union {
-		struct inet_skb_parm	h4;
-#if IS_ENABLED(CONFIG_IPV6)
-		struct inet6_skb_parm	h6;
-#endif
-	} header;
-	struct sctp_chunk *chunk;
-};
-#define SCTP_INPUT_CB(__skb)	((struct sctp_input_cb *)&((__skb)->cb[0]))
-
 /*
  * This is the routine which IP calls when receiving an SCTP packet.
  */
@@ -112,7 +101,6 @@ int sctp_rcv(struct sk_buff *skb)
 	struct sctp_ep_common *rcvr;
 	struct sctp_transport *transport = NULL;
 	struct sctp_chunk *chunk;
-	struct sctphdr *sh;
 	union sctp_addr src;
 	union sctp_addr dest;
 	int family;
@@ -124,33 +112,35 @@ int sctp_rcv(struct sk_buff *skb)
 
 	__SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS);
 
-	if (skb_linearize(skb))
+	/* If packet is too small to contain a single chunk, let's not
+	 * waste time on it anymore.
+	 */
+	if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) +
+		       skb_transport_offset(skb))
 		goto discard_it;
 
-	sh = sctp_hdr(skb);
+	if (!pskb_may_pull(skb, sizeof(struct sctphdr)))
+		goto discard_it;
 
-	/* Pull up the IP and SCTP headers. */
+	/* Pull up the IP header. */
 	__skb_pull(skb, skb_transport_offset(skb));
-	if (skb->len < sizeof(struct sctphdr))
-		goto discard_it;
 
 	skb->csum_valid = 0; /* Previous value not applicable */
 	if (skb_csum_unnecessary(skb))
 		__skb_decr_checksum_unnecessary(skb);
-	else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
+	else if (!sctp_checksum_disable &&
+		 !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+		 sctp_rcv_checksum(net, skb) < 0)
 		goto discard_it;
 	skb->csum_valid = 1;
 
-	skb_pull(skb, sizeof(struct sctphdr));
-
-	/* Make sure we at least have chunk headers worth of data left. */
-	if (skb->len < sizeof(struct sctp_chunkhdr))
-		goto discard_it;
+	__skb_pull(skb, sizeof(struct sctphdr));
 
 	family = ipver2af(ip_hdr(skb)->version);
 	af = sctp_get_af_specific(family);
 	if (unlikely(!af))
 		goto discard_it;
+	SCTP_INPUT_CB(skb)->af = af;
 
 	/* Initialize local addresses for lookups. */
 	af->from_skb(&src, skb, 1);
@@ -230,7 +220,7 @@ int sctp_rcv(struct sk_buff *skb)
 	chunk->rcvr = rcvr;
 
 	/* Remember the SCTP header. */
-	chunk->sctp_hdr = sh;
+	chunk->sctp_hdr = sctp_hdr(skb);
 
 	/* Set the source and destination addresses of the incoming chunk.  */
 	sctp_init_addrs(chunk, &src, &dest);
@@ -331,6 +321,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		 */
 
 		sk = rcvr->sk;
+		local_bh_disable();
 		bh_lock_sock(sk);
 
 		if (sock_owned_by_user(sk)) {
@@ -342,6 +333,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 			sctp_inq_push(inqueue, chunk);
 
 		bh_unlock_sock(sk);
+		local_bh_enable();
 
 		/* If the chunk was backloged again, don't drop refs */
 		if (backloged)
@@ -660,19 +652,23 @@ out_unlock:
  */
 static int sctp_rcv_ootb(struct sk_buff *skb)
 {
-	sctp_chunkhdr_t *ch;
-	__u8 *ch_end;
-
-	ch = (sctp_chunkhdr_t *) skb->data;
+	sctp_chunkhdr_t *ch, _ch;
+	int ch_end, offset = 0;
 
 	/* Scan through all the chunks in the packet.  */
 	do {
+		/* Make sure we have at least the header there */
+		if (offset + sizeof(sctp_chunkhdr_t) > skb->len)
+			break;
+
+		ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
+
 		/* Break out if chunk length is less then minimal. */
 		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
 			break;
 
-		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
-		if (ch_end > skb_tail_pointer(skb))
+		ch_end = offset + WORD_ROUND(ntohs(ch->length));
+		if (ch_end > skb->len)
 			break;
 
 		/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
@@ -697,8 +693,8 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
 		if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data)
 			goto discard;
 
-		ch = (sctp_chunkhdr_t *) ch_end;
-	} while (ch_end < skb_tail_pointer(skb));
+		offset = ch_end;
+	} while (ch_end < skb->len);
 
 	return 0;
 
@@ -1173,6 +1169,17 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
 {
 	sctp_chunkhdr_t *ch;
 
+	/* We do not allow GSO frames here as we need to linearize and
+	 * then cannot guarantee frame boundaries. This shouldn't be an
+	 * issue as packets hitting this are mostly INIT or INIT-ACK and
+	 * those cannot be on GSO-style anyway.
+	 */
+	if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
+		return NULL;
+
+	if (skb_linearize(skb))
+		return NULL;
+
 	ch = (sctp_chunkhdr_t *) skb->data;
 
 	/* The code below will attempt to walk the chunk and extract
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 9d87bba0ff1d..c30ddb0f3190 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -89,12 +89,10 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
 	 * Eventually, we should clean up inqueue to not rely
 	 * on the BH related data structures.
 	 */
-	local_bh_disable();
 	list_add_tail(&chunk->list, &q->in_chunk_list);
 	if (chunk->asoc)
 		chunk->asoc->stats.ipackets++;
 	q->immediate.func(&q->immediate);
-	local_bh_enable();
 }
 
 /* Peek at the next chunk on the inqeue. */
@@ -130,13 +128,25 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 	 * at this time.
 	 */
 
-	if ((chunk = queue->in_progress)) {
+	chunk = queue->in_progress;
+	if (chunk) {
 		/* There is a packet that we have been working on.
 		 * Any post processing work to do before we move on?
 		 */
 		if (chunk->singleton ||
 		    chunk->end_of_packet ||
 		    chunk->pdiscard) {
+			if (chunk->head_skb == chunk->skb) {
+				chunk->skb = skb_shinfo(chunk->skb)->frag_list;
+				goto new_skb;
+			}
+			if (chunk->skb->next) {
+				chunk->skb = chunk->skb->next;
+				goto new_skb;
+			}
+
+			if (chunk->head_skb)
+				chunk->skb = chunk->head_skb;
 			sctp_chunk_free(chunk);
 			chunk = queue->in_progress = NULL;
 		} else {
@@ -152,34 +162,71 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 	if (!chunk) {
 		struct list_head *entry;
 
+next_chunk:
 		/* Is the queue empty?  */
-		if (list_empty(&queue->in_chunk_list))
+		entry = sctp_list_dequeue(&queue->in_chunk_list);
+		if (!entry)
 			return NULL;
 
-		entry = queue->in_chunk_list.next;
-		chunk = queue->in_progress =
-			list_entry(entry, struct sctp_chunk, list);
-		list_del_init(entry);
+		chunk = list_entry(entry, struct sctp_chunk, list);
+
+		/* Linearize if it's not GSO */
+		if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
+		    skb_is_nonlinear(chunk->skb)) {
+			if (skb_linearize(chunk->skb)) {
+				__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
+				sctp_chunk_free(chunk);
+				goto next_chunk;
+			}
+
+			/* Update sctp_hdr as it probably changed */
+			chunk->sctp_hdr = sctp_hdr(chunk->skb);
+		}
+
+		if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
+			/* GSO-marked skbs but without frags, handle
+			 * them normally
+			 */
+			if (skb_shinfo(chunk->skb)->frag_list)
+				chunk->head_skb = chunk->skb;
+
+			/* skbs with "cover letter" */
+			if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len)
+				chunk->skb = skb_shinfo(chunk->skb)->frag_list;
+
+			if (WARN_ON(!chunk->skb)) {
+				__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
+				sctp_chunk_free(chunk);
+				goto next_chunk;
+			}
+		}
+
+		if (chunk->asoc)
+			sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
+
+		queue->in_progress = chunk;
 
+new_skb:
 		/* This is the first chunk in the packet.  */
-		chunk->singleton = 1;
 		ch = (sctp_chunkhdr_t *) chunk->skb->data;
+		chunk->singleton = 1;
 		chunk->data_accepted = 0;
+		chunk->pdiscard = 0;
+		chunk->auth = 0;
+		chunk->has_asconf = 0;
+		chunk->end_of_packet = 0;
+		if (chunk->head_skb) {
+			struct sctp_input_cb
+				*cb = SCTP_INPUT_CB(chunk->skb),
+				*head_cb = SCTP_INPUT_CB(chunk->head_skb);
 
-		if (chunk->asoc)
-			sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
+			cb->chunk = head_cb->chunk;
+			cb->af = head_cb->af;
+		}
 	}
 
 	chunk->chunk_hdr = ch;
 	chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
-	/* In the unlikely case of an IP reassembly, the skb could be
-	 * non-linear. If so, update chunk_end so that it doesn't go past
-	 * the skb->tail.
-	 */
-	if (unlikely(skb_is_nonlinear(chunk->skb))) {
-		if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
-			chunk->chunk_end = skb_tail_pointer(chunk->skb);
-	}
 	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0657d18a85bf..f473779e8b1c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -420,6 +420,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 	addr->v6.sin6_flowinfo = 0; /* FIXME */
 	addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
 
+	/* Always called on head skb, so this is safe */
 	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
@@ -559,6 +560,7 @@ static int sctp_v6_is_any(const union sctp_addr *addr)
 static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
 {
 	int type;
+	struct net *net = sock_net(&sp->inet.sk);
 	const struct in6_addr *in6 = (const struct in6_addr *)&addr->v6.sin6_addr;
 
 	type = ipv6_addr_type(in6);
@@ -573,7 +575,8 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
 	if (!(type & IPV6_ADDR_UNICAST))
 		return 0;
 
-	return ipv6_chk_addr(sock_net(&sp->inet.sk), in6, NULL, 0);
+	return sp->inet.freebind || net->ipv6.sysctl.ip_nonlocal_bind ||
+		ipv6_chk_addr(net, in6, NULL, 0);
 }
 
 /* This function checks if the address is a valid address to be used for
@@ -710,8 +713,7 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
 /* Where did this skb come from?  */
 static int sctp_v6_skb_iif(const struct sk_buff *skb)
 {
-	struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
-	return opt->iif;
+	return IP6CB(skb)->iif;
 }
 
 /* Was this packet marked by Explicit Congestion Notification? */
@@ -780,15 +782,14 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 	if (ip_hdr(skb)->version == 4) {
 		addr->v4.sin_family = AF_INET;
 		addr->v4.sin_port = sh->source;
-		addr->v4.sin_addr.s_addr =  ip_hdr(skb)->saddr;
+		addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr;
 	} else {
 		addr->v6.sin6_family = AF_INET6;
 		addr->v6.sin6_flowinfo = 0;
 		addr->v6.sin6_port = sh->source;
 		addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
 		if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
-			struct sctp_ulpevent *ev = sctp_skb2event(skb);
-			addr->v6.sin6_scope_id = ev->iif;
+			addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
 		}
 	}
 
@@ -955,7 +956,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
new file mode 100644
index 000000000000..7e869d0cca69
--- /dev/null
+++ b/net/sctp/offload.c
@@ -0,0 +1,119 @@
+/*
+ * sctp_offload - GRO/GSO Offloading for SCTP
+ *
+ * Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <linux/skbuff.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/checksum.h>
+#include <net/protocol.h>
+
+static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
+{
+	skb->ip_summed = CHECKSUM_NONE;
+	return sctp_compute_cksum(skb, skb_transport_offset(skb));
+}
+
+static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
+					netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct sctphdr *sh;
+
+	sh = sctp_hdr(skb);
+	if (!pskb_may_pull(skb, sizeof(*sh)))
+		goto out;
+
+	__skb_pull(skb, sizeof(*sh));
+
+	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+		/* Packet is from an untrusted source, reset gso_segs. */
+		struct skb_shared_info *pinfo = skb_shinfo(skb);
+		struct sk_buff *frag_iter;
+
+		pinfo->gso_segs = 0;
+		if (skb->len != skb->data_len) {
+			/* Means we have chunks in here too */
+			pinfo->gso_segs++;
+		}
+
+		skb_walk_frags(skb, frag_iter)
+			pinfo->gso_segs++;
+
+		segs = NULL;
+		goto out;
+	}
+
+	segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
+	if (IS_ERR(segs))
+		goto out;
+
+	/* All that is left is update SCTP CRC if necessary */
+	if (!(features & NETIF_F_SCTP_CRC)) {
+		for (skb = segs; skb; skb = skb->next) {
+			if (skb->ip_summed == CHECKSUM_PARTIAL) {
+				sh = sctp_hdr(skb);
+				sh->checksum = sctp_gso_make_checksum(skb);
+			}
+		}
+	}
+
+out:
+	return segs;
+}
+
+static const struct net_offload sctp_offload = {
+	.callbacks = {
+		.gso_segment = sctp_gso_segment,
+	},
+};
+
+static const struct net_offload sctp6_offload = {
+	.callbacks = {
+		.gso_segment = sctp_gso_segment,
+	},
+};
+
+int __init sctp_offload_init(void)
+{
+	int ret;
+
+	ret = inet_add_offload(&sctp_offload, IPPROTO_SCTP);
+	if (ret)
+		goto out;
+
+	ret = inet6_add_offload(&sctp6_offload, IPPROTO_SCTP);
+	if (ret)
+		goto ipv4;
+
+	return ret;
+
+ipv4:
+	inet_del_offload(&sctp_offload, IPPROTO_SCTP);
+out:
+	return ret;
+}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9844fe573029..7425f6c23888 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet)
 struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
 				       __u32 vtag, int ecn_capable)
 {
-	struct sctp_chunk *chunk = NULL;
+	struct sctp_transport *tp = packet->transport;
+	struct sctp_association *asoc = tp->asoc;
 
 	pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
 
 	packet->vtag = vtag;
 
+	if (asoc && tp->dst) {
+		struct sock *sk = asoc->base.sk;
+
+		rcu_read_lock();
+		if (__sk_dst_get(sk) != tp->dst) {
+			dst_hold(tp->dst);
+			sk_setup_caps(sk, tp->dst);
+		}
+
+		if (sk_can_gso(sk)) {
+			struct net_device *dev = tp->dst->dev;
+
+			packet->max_size = dev->gso_max_size;
+		} else {
+			packet->max_size = asoc->pathmtu;
+		}
+		rcu_read_unlock();
+
+	} else {
+		packet->max_size = tp->pathmtu;
+	}
+
 	if (ecn_capable && sctp_packet_empty(packet)) {
-		chunk = sctp_get_ecne_prepend(packet->transport->asoc);
+		struct sctp_chunk *chunk;
 
 		/* If there a is a prepend chunk stick it on the list before
 		 * any other chunks get appended.
 		 */
+		chunk = sctp_get_ecne_prepend(asoc);
 		if (chunk)
 			sctp_packet_append_chunk(packet, chunk);
 	}
@@ -158,7 +182,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 	sctp_xmit_t retval;
 	int error = 0;
 
-	pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
+	pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__,
+		 packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
 
 	switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
 	case SCTP_XMIT_PMTU_FULL:
@@ -291,6 +316,8 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
 		packet->has_data = 1;
 		/* timestamp the chunk for rtx purposes */
 		chunk->sent_at = jiffies;
+		/* Mainly used for prsctp RTX policy */
+		chunk->sent_count++;
 		break;
 	case SCTP_CID_COOKIE_ECHO:
 		packet->has_cookie_echo = 1;
@@ -381,12 +408,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	struct sctp_transport *tp = packet->transport;
 	struct sctp_association *asoc = tp->asoc;
 	struct sctphdr *sh;
-	struct sk_buff *nskb;
+	struct sk_buff *nskb = NULL, *head = NULL;
 	struct sctp_chunk *chunk, *tmp;
 	struct sock *sk;
 	int err = 0;
 	int padding;		/* How much padding do we need?  */
+	int pkt_size;
 	__u8 has_data = 0;
+	int gso = 0;
+	int pktcount = 0;
 	struct dst_entry *dst;
 	unsigned char *auth = NULL;	/* pointer to auth in skb data */
 
@@ -400,18 +430,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
 	sk = chunk->skb->sk;
 
-	/* Allocate the new skb.  */
-	nskb = alloc_skb(packet->size + MAX_HEADER, gfp);
-	if (!nskb)
+	/* Allocate the head skb, or main one if not in GSO */
+	if (packet->size > tp->pathmtu && !packet->ipfragok) {
+		if (sk_can_gso(sk)) {
+			gso = 1;
+			pkt_size = packet->overhead;
+		} else {
+			/* If this happens, we trash this packet and try
+			 * to build a new one, hopefully correct this
+			 * time. Application may notice this error.
+			 */
+			pr_err_once("Trying to GSO but underlying device doesn't support it.");
+			goto nomem;
+		}
+	} else {
+		pkt_size = packet->size;
+	}
+	head = alloc_skb(pkt_size + MAX_HEADER, gfp);
+	if (!head)
 		goto nomem;
+	if (gso) {
+		NAPI_GRO_CB(head)->last = head;
+		skb_shinfo(head)->gso_type = sk->sk_gso_type;
+	}
 
 	/* Make sure the outbound skb has enough header room reserved. */
-	skb_reserve(nskb, packet->overhead + MAX_HEADER);
+	skb_reserve(head, packet->overhead + MAX_HEADER);
 
 	/* Set the owning socket so that we know where to get the
 	 * destination IP address.
 	 */
-	sctp_packet_set_owner_w(nskb, sk);
+	sctp_packet_set_owner_w(head, sk);
 
 	if (!sctp_transport_dst_check(tp)) {
 		sctp_transport_route(tp, NULL, sctp_sk(sk));
@@ -422,11 +471,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	dst = dst_clone(tp->dst);
 	if (!dst)
 		goto no_route;
-	skb_dst_set(nskb, dst);
+	skb_dst_set(head, dst);
 
 	/* Build the SCTP header.  */
-	sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
-	skb_reset_transport_header(nskb);
+	sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
+	skb_reset_transport_header(head);
 	sh->source = htons(packet->source_port);
 	sh->dest   = htons(packet->destination_port);
 
@@ -441,90 +490,143 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	sh->vtag     = htonl(packet->vtag);
 	sh->checksum = 0;
 
-	/**
-	 * 6.10 Bundling
-	 *
-	 *    An endpoint bundles chunks by simply including multiple
-	 *    chunks in one outbound SCTP packet.  ...
-	 */
-
-	/**
-	 * 3.2  Chunk Field Descriptions
-	 *
-	 * The total length of a chunk (including Type, Length and
-	 * Value fields) MUST be a multiple of 4 bytes.  If the length
-	 * of the chunk is not a multiple of 4 bytes, the sender MUST
-	 * pad the chunk with all zero bytes and this padding is not
-	 * included in the chunk length field.  The sender should
-	 * never pad with more than 3 bytes.
-	 *
-	 * [This whole comment explains WORD_ROUND() below.]
-	 */
-
 	pr_debug("***sctp_transmit_packet***\n");
 
-	list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
-		list_del_init(&chunk->list);
-		if (sctp_chunk_is_data(chunk)) {
-			/* 6.3.1 C4) When data is in flight and when allowed
-			 * by rule C5, a new RTT measurement MUST be made each
-			 * round trip.  Furthermore, new RTT measurements
-			 * SHOULD be made no more than once per round-trip
-			 * for a given destination transport address.
-			 */
+	do {
+		/* Set up convenience variables... */
+		chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
+		pktcount++;
 
-			if (!chunk->resent && !tp->rto_pending) {
-				chunk->rtt_in_progress = 1;
-				tp->rto_pending = 1;
+		/* Calculate packet size, so it fits in PMTU. Leave
+		 * other chunks for the next packets.
+		 */
+		if (gso) {
+			pkt_size = packet->overhead;
+			list_for_each_entry(chunk, &packet->chunk_list, list) {
+				int padded = WORD_ROUND(chunk->skb->len);
+
+				if (pkt_size + padded > tp->pathmtu)
+					break;
+				pkt_size += padded;
 			}
 
-			has_data = 1;
-		}
+			/* Allocate a new skb. */
+			nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
+			if (!nskb)
+				goto nomem;
 
-		padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
-		if (padding)
-			memset(skb_put(chunk->skb, padding), 0, padding);
+			/* Make sure the outbound skb has enough header
+			 * room reserved.
+			 */
+			skb_reserve(nskb, packet->overhead + MAX_HEADER);
+		} else {
+			nskb = head;
+		}
 
-		/* if this is the auth chunk that we are adding,
-		 * store pointer where it will be added and put
-		 * the auth into the packet.
+		/**
+		 * 3.2  Chunk Field Descriptions
+		 *
+		 * The total length of a chunk (including Type, Length and
+		 * Value fields) MUST be a multiple of 4 bytes.  If the length
+		 * of the chunk is not a multiple of 4 bytes, the sender MUST
+		 * pad the chunk with all zero bytes and this padding is not
+		 * included in the chunk length field.  The sender should
+		 * never pad with more than 3 bytes.
+		 *
+		 * [This whole comment explains WORD_ROUND() below.]
 		 */
-		if (chunk == packet->auth)
-			auth = skb_tail_pointer(nskb);
 
-		memcpy(skb_put(nskb, chunk->skb->len),
+		pkt_size -= packet->overhead;
+		list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
+			list_del_init(&chunk->list);
+			if (sctp_chunk_is_data(chunk)) {
+				/* 6.3.1 C4) When data is in flight and when allowed
+				 * by rule C5, a new RTT measurement MUST be made each
+				 * round trip.  Furthermore, new RTT measurements
+				 * SHOULD be made no more than once per round-trip
+				 * for a given destination transport address.
+				 */
+
+				if (!chunk->resent && !tp->rto_pending) {
+					chunk->rtt_in_progress = 1;
+					tp->rto_pending = 1;
+				}
+
+				has_data = 1;
+			}
+
+			padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+			if (padding)
+				memset(skb_put(chunk->skb, padding), 0, padding);
+
+			/* if this is the auth chunk that we are adding,
+			 * store pointer where it will be added and put
+			 * the auth into the packet.
+			 */
+			if (chunk == packet->auth)
+				auth = skb_tail_pointer(nskb);
+
+			memcpy(skb_put(nskb, chunk->skb->len),
 			       chunk->skb->data, chunk->skb->len);
 
-		pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, "
-			 "rtt_in_progress:%d\n", chunk,
-			 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
-			 chunk->has_tsn ? "TSN" : "No TSN",
-			 chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
-			 ntohs(chunk->chunk_hdr->length), chunk->skb->len,
-			 chunk->rtt_in_progress);
-
-		/*
-		 * If this is a control chunk, this is our last
-		 * reference. Free data chunks after they've been
-		 * acknowledged or have failed.
+			pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
+				 chunk,
+				 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
+				 chunk->has_tsn ? "TSN" : "No TSN",
+				 chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
+				 ntohs(chunk->chunk_hdr->length), chunk->skb->len,
+				 chunk->rtt_in_progress);
+
+			/* If this is a control chunk, this is our last
+			 * reference. Free data chunks after they've been
+			 * acknowledged or have failed.
+			 * Re-queue auth chunks if needed.
+			 */
+			pkt_size -= WORD_ROUND(chunk->skb->len);
+
+			if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
+				sctp_chunk_free(chunk);
+
+			if (!pkt_size)
+				break;
+		}
+
+		/* SCTP-AUTH, Section 6.2
+		 *    The sender MUST calculate the MAC as described in RFC2104 [2]
+		 *    using the hash function H as described by the MAC Identifier and
+		 *    the shared association key K based on the endpoint pair shared key
+		 *    described by the shared key identifier.  The 'data' used for the
+		 *    computation of the AUTH-chunk is given by the AUTH chunk with its
+		 *    HMAC field set to zero (as shown in Figure 6) followed by all
+		 *    chunks that are placed after the AUTH chunk in the SCTP packet.
 		 */
-		if (!sctp_chunk_is_data(chunk))
-			sctp_chunk_free(chunk);
-	}
+		if (auth)
+			sctp_auth_calculate_hmac(asoc, nskb,
+						 (struct sctp_auth_chunk *)auth,
+						 gfp);
+
+		if (packet->auth) {
+			if (!list_empty(&packet->chunk_list)) {
+				/* We will generate more packets, so re-queue
+				 * auth chunk.
+				 */
+				list_add(&chunk->list, &packet->chunk_list);
+			} else {
+				sctp_chunk_free(packet->auth);
+				packet->auth = NULL;
+			}
+		}
 
-	/* SCTP-AUTH, Section 6.2
-	 *    The sender MUST calculate the MAC as described in RFC2104 [2]
-	 *    using the hash function H as described by the MAC Identifier and
-	 *    the shared association key K based on the endpoint pair shared key
-	 *    described by the shared key identifier.  The 'data' used for the
-	 *    computation of the AUTH-chunk is given by the AUTH chunk with its
-	 *    HMAC field set to zero (as shown in Figure 6) followed by all
-	 *    chunks that are placed after the AUTH chunk in the SCTP packet.
-	 */
-	if (auth)
-		sctp_auth_calculate_hmac(asoc, nskb,
-					 (struct sctp_auth_chunk *)auth,
-					 gfp);
+		if (!gso)
+			break;
+
+		if (skb_gro_receive(&head, nskb))
+			goto nomem;
+		nskb = NULL;
+		if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
+				 sk->sk_gso_max_segs))
+			goto nomem;
+	} while (!list_empty(&packet->chunk_list));
 
 	/* 2) Calculate the Adler-32 checksum of the whole packet,
 	 *    including the SCTP common header and all the
@@ -532,16 +634,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	 *
 	 * Note: Adler-32 is no longer applicable, as has been replaced
 	 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
+	 *
+	 * If it's a GSO packet, it's postponed to sctp_skb_segment.
 	 */
-	if (!sctp_checksum_disable) {
-		if (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
-		    (dst_xfrm(dst) != NULL) || packet->ipfragok) {
-			sh->checksum = sctp_compute_cksum(nskb, 0);
+	if (!sctp_checksum_disable || gso) {
+		if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
+			     dst_xfrm(dst) || packet->ipfragok)) {
+			sh->checksum = sctp_compute_cksum(head, 0);
 		} else {
 			/* no need to seed pseudo checksum for SCTP */
-			nskb->ip_summed = CHECKSUM_PARTIAL;
-			nskb->csum_start = skb_transport_header(nskb) - nskb->head;
-			nskb->csum_offset = offsetof(struct sctphdr, checksum);
+			head->ip_summed = CHECKSUM_PARTIAL;
+			head->csum_start = skb_transport_header(head) - head->head;
+			head->csum_offset = offsetof(struct sctphdr, checksum);
 		}
 	}
 
@@ -557,7 +661,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	 * Note: The works for IPv6 layer checks this bit too later
 	 * in transmission.  See IP6_ECN_flow_xmit().
 	 */
-	tp->af_specific->ecn_capable(nskb->sk);
+	tp->af_specific->ecn_capable(sk);
 
 	/* Set up the IP options.  */
 	/* BUG: not implemented
@@ -566,7 +670,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 
 	/* Dump that on IP!  */
 	if (asoc) {
-		asoc->stats.opackets++;
+		asoc->stats.opackets += pktcount;
 		if (asoc->peer.last_sent_to != tp)
 			/* Considering the multiple CPU scenario, this is a
 			 * "correcter" place for last_sent_to.  --xguo
@@ -589,16 +693,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 		}
 	}
 
-	pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
+	pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
+
+	if (gso) {
+		/* Cleanup our debris for IP stacks */
+		memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
+					sizeof(struct inet6_skb_parm)));
 
-	nskb->ignore_df = packet->ipfragok;
-	tp->af_specific->sctp_xmit(nskb, tp);
+		skb_shinfo(head)->gso_segs = pktcount;
+		skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+
+		/* We have to refresh this in case we are xmiting to
+		 * more than one transport at a time
+		 */
+		rcu_read_lock();
+		if (__sk_dst_get(sk) != tp->dst) {
+			dst_hold(tp->dst);
+			sk_setup_caps(sk, tp->dst);
+		}
+		rcu_read_unlock();
+	}
+	head->ignore_df = packet->ipfragok;
+	tp->af_specific->sctp_xmit(head, tp);
 
 out:
 	sctp_packet_reset(packet);
 	return err;
 no_route:
-	kfree_skb(nskb);
+	kfree_skb(head);
+	if (nskb != head)
+		kfree_skb(nskb);
 
 	if (asoc)
 		IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
@@ -623,6 +747,8 @@ err:
 	}
 	goto out;
 nomem:
+	if (packet->auth && list_empty(&packet->auth->list))
+		sctp_chunk_free(packet->auth);
 	err = -ENOMEM;
 	goto err;
 }
@@ -751,39 +877,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
 					struct sctp_chunk *chunk,
 					u16 chunk_len)
 {
-	size_t psize;
-	size_t pmtu;
-	int too_big;
+	size_t psize, pmtu;
 	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	psize = packet->size;
-	pmtu  = ((packet->transport->asoc) ?
-		(packet->transport->asoc->pathmtu) :
-		(packet->transport->pathmtu));
-
-	too_big = (psize + chunk_len > pmtu);
+	if (packet->transport->asoc)
+		pmtu = packet->transport->asoc->pathmtu;
+	else
+		pmtu = packet->transport->pathmtu;
 
 	/* Decide if we need to fragment or resubmit later. */
-	if (too_big) {
-		/* It's OK to fragmet at IP level if any one of the following
+	if (psize + chunk_len > pmtu) {
+		/* It's OK to fragment at IP level if any one of the following
 		 * is true:
-		 * 	1. The packet is empty (meaning this chunk is greater
-		 * 	   the MTU)
-		 * 	2. The chunk we are adding is a control chunk
-		 * 	3. The packet doesn't have any data in it yet and data
-		 * 	requires authentication.
+		 *	1. The packet is empty (meaning this chunk is greater
+		 *	   the MTU)
+		 *	2. The packet doesn't have any data in it yet and data
+		 *	   requires authentication.
 		 */
-		if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) ||
+		if (sctp_packet_empty(packet) ||
 		    (!packet->has_data && chunk->auth)) {
 			/* We no longer do re-fragmentation.
 			 * Just fragment at the IP layer, if we
 			 * actually hit this condition
 			 */
 			packet->ipfragok = 1;
-		} else {
-			retval = SCTP_XMIT_PMTU_FULL;
+			goto out;
 		}
+
+		/* It is also okay to fragment if the chunk we are
+		 * adding is a control chunk, but only if current packet
+		 * is not a GSO one otherwise it causes fragmentation of
+		 * a large frame. So in this case we allow the
+		 * fragmentation by forcing it to be in a new packet.
+		 */
+		if (!sctp_chunk_is_data(chunk) && packet->has_data)
+			retval = SCTP_XMIT_PMTU_FULL;
+
+		if (psize + chunk_len > packet->max_size)
+			/* Hit GSO/PMTU limit, gotta flush */
+			retval = SCTP_XMIT_PMTU_FULL;
+
+		if (!packet->transport->burst_limited &&
+		    psize + chunk_len > (packet->transport->cwnd >> 1))
+			/* Do not allow a single GSO packet to use more
+			 * than half of cwnd.
+			 */
+			retval = SCTP_XMIT_PMTU_FULL;
+
+		if (packet->transport->burst_limited &&
+		    psize + chunk_len > (packet->transport->burst_limited >> 1))
+			/* Do not allow a single GSO packet to use more
+			 * than half of original cwnd.
+			 */
+			retval = SCTP_XMIT_PMTU_FULL;
+		/* Otherwise it will fit in the GSO packet */
 	}
 
+out:
 	return retval;
 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 084718f9b3da..72e54a416af6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -326,6 +326,9 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
 
 			sctp_chunk_hold(chunk);
 			sctp_outq_tail_data(q, chunk);
+			if (chunk->asoc->prsctp_enable &&
+			    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+				chunk->asoc->sent_cnt_removable++;
 			if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
 				SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
 			else
@@ -372,6 +375,96 @@ static void sctp_insert_list(struct list_head *head, struct list_head *new)
 		list_add_tail(new, head);
 }
 
+static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
+				  struct sctp_sndrcvinfo *sinfo,
+				  struct list_head *queue, int msg_len)
+{
+	struct sctp_chunk *chk, *temp;
+
+	list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		    chk->prsctp_param <= sinfo->sinfo_timetolive)
+			continue;
+
+		list_del_init(&chk->transmitted_list);
+		sctp_insert_list(&asoc->outqueue.abandoned,
+				 &chk->transmitted_list);
+
+		asoc->sent_cnt_removable--;
+		asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+
+		if (!chk->tsn_gap_acked) {
+			if (chk->transport)
+				chk->transport->flight_size -=
+						sctp_data_size(chk);
+			asoc->outqueue.outstanding_bytes -= sctp_data_size(chk);
+		}
+
+		msg_len -= SCTP_DATA_SNDSIZE(chk) +
+			   sizeof(struct sk_buff) +
+			   sizeof(struct sctp_chunk);
+		if (msg_len <= 0)
+			break;
+	}
+
+	return msg_len;
+}
+
+static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
+				    struct sctp_sndrcvinfo *sinfo,
+				    struct list_head *queue, int msg_len)
+{
+	struct sctp_chunk *chk, *temp;
+
+	list_for_each_entry_safe(chk, temp, queue, list) {
+		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		    chk->prsctp_param <= sinfo->sinfo_timetolive)
+			continue;
+
+		list_del_init(&chk->list);
+		asoc->sent_cnt_removable--;
+		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+
+		msg_len -= SCTP_DATA_SNDSIZE(chk) +
+			   sizeof(struct sk_buff) +
+			   sizeof(struct sctp_chunk);
+		sctp_chunk_free(chk);
+		if (msg_len <= 0)
+			break;
+	}
+
+	return msg_len;
+}
+
+/* Abandon the chunks according their priorities */
+void sctp_prsctp_prune(struct sctp_association *asoc,
+		       struct sctp_sndrcvinfo *sinfo, int msg_len)
+{
+	struct sctp_transport *transport;
+
+	if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+		return;
+
+	msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+					 &asoc->outqueue.retransmit,
+					 msg_len);
+	if (msg_len <= 0)
+		return;
+
+	list_for_each_entry(transport, &asoc->peer.transport_addr_list,
+			    transports) {
+		msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+						 &transport->transmitted,
+						 msg_len);
+		if (msg_len <= 0)
+			return;
+	}
+
+	sctp_prsctp_prune_unsent(asoc, sinfo,
+				 &asoc->outqueue.out_chunk_list,
+				 msg_len);
+}
+
 /* Mark all the eligible packets on a transport for retransmission.  */
 void sctp_retransmit_mark(struct sctp_outq *q,
 			  struct sctp_transport *transport,
@@ -962,6 +1055,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 
 				/* Mark as failed send. */
 				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
+				if (asoc->prsctp_enable &&
+				    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+					asoc->sent_cnt_removable--;
 				sctp_chunk_free(chunk);
 				continue;
 			}
@@ -1251,6 +1347,9 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
 		tsn = ntohl(tchunk->subh.data_hdr->tsn);
 		if (TSN_lte(tsn, ctsn)) {
 			list_del_init(&tchunk->transmitted_list);
+			if (asoc->prsctp_enable &&
+			    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+				asoc->sent_cnt_removable--;
 			sctp_chunk_free(tchunk);
 		}
 	}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d3d50daa248b..7b523e3f551f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -240,6 +240,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 	port = &addr->v4.sin_port;
 	addr->v4.sin_family = AF_INET;
 
+	/* Always called on head skb, so this is safe */
 	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
@@ -1027,7 +1028,7 @@ static const struct proto_ops inet_seqpacket_ops = {
 	.setsockopt	   = sock_common_setsockopt, /* IP_SOL IP_OPTION is a problem */
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
@@ -1479,7 +1480,8 @@ static __init int sctp_init(void)
 		INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
 	}
 
-	if (sctp_transport_hashtable_init())
+	status = sctp_transport_hashtable_init();
+	if (status)
 		goto err_thash_alloc;
 
 	pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize,
@@ -1516,6 +1518,9 @@ static __init int sctp_init(void)
 	if (status)
 		goto err_v6_add_protocol;
 
+	if (sctp_offload_init() < 0)
+		pr_crit("%s: Cannot add SCTP protocol offload\n", __func__);
+
 out:
 	return status;
 err_v6_add_protocol:
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 1ce724b87618..f69edcf219e5 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -3,12 +3,6 @@
 #include <linux/sock_diag.h>
 #include <net/sctp/sctp.h>
 
-extern void inet_diag_msg_common_fill(struct inet_diag_msg *r,
-				      struct sock *sk);
-extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
-				    struct inet_diag_msg *r, int ext,
-				    struct user_namespace *user_ns);
-
 static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 			       void *info);
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 56f364d8f932..8c77b87a8565 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -108,14 +108,9 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
 /* What was the inbound interface for this chunk? */
 int sctp_chunk_iif(const struct sctp_chunk *chunk)
 {
-	struct sctp_af *af;
-	int iif = 0;
-
-	af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version));
-	if (af)
-		iif = af->skb_iif(chunk->skb);
+	struct sk_buff *skb = chunk->skb;
 
-	return iif;
+	return SCTP_INPUT_CB(skb)->af->skb_iif(skb);
 }
 
 /* RFC 2960 3.3.2 Initiation (INIT) (1)
@@ -261,7 +256,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
 	chunksize += sizeof(ecap_param);
 
-	if (net->sctp.prsctp_enable)
+	if (asoc->prsctp_enable)
 		chunksize += sizeof(prsctp_param);
 
 	/* ADDIP: Section 4.2.7:
@@ -355,7 +350,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 
-	if (net->sctp.prsctp_enable)
+	if (asoc->prsctp_enable)
 		sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
 
 	if (sp->adaptation_ind) {
@@ -711,6 +706,20 @@ nodata:
 	return retval;
 }
 
+static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
+				   const struct sctp_sndrcvinfo *sinfo)
+{
+	if (!chunk->asoc->prsctp_enable)
+		return;
+
+	if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+		chunk->prsctp_param =
+			jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+	else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
+		 SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
+		chunk->prsctp_param = sinfo->sinfo_timetolive;
+}
+
 /* Make a DATA chunk for the given association from the provided
  * parameters.  However, do not populate the data payload.
  */
@@ -744,6 +753,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
 
 	retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
 	memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
+	sctp_set_prsctp_policy(retval, sinfo);
 
 nodata:
 	return retval;
@@ -1585,7 +1595,6 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 	struct sctp_association *asoc;
 	struct sk_buff *skb;
 	sctp_scope_t scope;
-	struct sctp_af *af;
 
 	/* Create the bare association.  */
 	scope = sctp_scope(sctp_source(chunk));
@@ -1595,16 +1604,10 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 	asoc->temp = 1;
 	skb = chunk->skb;
 	/* Create an entry for the source address of the packet.  */
-	af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version));
-	if (unlikely(!af))
-		goto fail;
-	af->from_skb(&asoc->c.peer_addr, skb, 1);
+	SCTP_INPUT_CB(skb)->af->from_skb(&asoc->c.peer_addr, skb, 1);
+
 nodata:
 	return asoc;
-
-fail:
-	sctp_association_free(asoc);
-	return NULL;
 }
 
 /* Build a cookie representing asoc.
@@ -2024,8 +2027,8 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 	for (i = 0; i < num_ext; i++) {
 		switch (param.ext->chunks[i]) {
 		case SCTP_CID_FWD_TSN:
-			if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable)
-				    asoc->peer.prsctp_capable = 1;
+			if (asoc->prsctp_enable && !asoc->peer.prsctp_capable)
+				asoc->peer.prsctp_capable = 1;
 			break;
 		case SCTP_CID_AUTH:
 			/* if the peer reports AUTH, assume that he
@@ -2169,7 +2172,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_FWD_TSN_SUPPORT:
-		if (net->sctp.prsctp_enable)
+		if (ep->prsctp_enable)
 			break;
 		goto fallthrough;
 
@@ -2653,7 +2656,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_FWD_TSN_SUPPORT:
-		if (net->sctp.prsctp_enable) {
+		if (asoc->prsctp_enable) {
 			asoc->peer.prsctp_capable = 1;
 			break;
 		}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index aa3712259368..12d45193357c 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -806,8 +806,10 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
 
 		/* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */
 		if (sctp_state(asoc, SHUTDOWN_RECEIVED) &&
-		    sctp_sstate(sk, ESTABLISHED))
+		    sctp_sstate(sk, ESTABLISHED)) {
+			sk->sk_state = SCTP_SS_CLOSING;
 			sk->sk_shutdown |= RCV_SHUTDOWN;
+		}
 	}
 
 	if (sctp_state(asoc, COOKIE_WAIT)) {
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f1f08c8f277b..d88bb2b0b699 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6118,14 +6118,11 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * chunk later.
 	 */
 
-	if (!chunk->ecn_ce_done) {
-		struct sctp_af *af;
+	if (asoc->peer.ecn_capable && !chunk->ecn_ce_done) {
+		struct sctp_af *af = SCTP_INPUT_CB(chunk->skb)->af;
 		chunk->ecn_ce_done = 1;
 
-		af = sctp_get_af_specific(
-			ipver2af(ip_hdr(chunk->skb)->version));
-
-		if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
+		if (af->is_ce(sctp_gso_headskb(chunk->skb))) {
 			/* Do real work as sideffect. */
 			sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE,
 					SCTP_U32(tsn));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 67154b848aa9..8812e1bf6c1c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -202,7 +202,7 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
 		 * could be a TCP-style listening socket or a socket which
 		 * hasn't yet called connect() to establish an association.
 		 */
-		if (!sctp_sstate(sk, ESTABLISHED))
+		if (!sctp_sstate(sk, ESTABLISHED) && !sctp_sstate(sk, CLOSING))
 			return NULL;
 
 		/* Get the first and the only association from the list. */
@@ -1068,7 +1068,7 @@ static int __sctp_connect(struct sock *sk,
 	 * is already connected.
 	 * It cannot be done even on a TCP-style listening socket.
 	 */
-	if (sctp_sstate(sk, ESTABLISHED) ||
+	if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) ||
 	    (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
 		err = -EISCONN;
 		goto out_free;
@@ -1705,18 +1705,19 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	if (msg_name) {
 		/* Look for a matching association on the endpoint. */
 		asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
-		if (!asoc) {
-			/* If we could not find a matching association on the
-			 * endpoint, make sure that it is not a TCP-style
-			 * socket that already has an association or there is
-			 * no peeled-off association on another socket.
-			 */
-			if ((sctp_style(sk, TCP) &&
-			     sctp_sstate(sk, ESTABLISHED)) ||
-			    sctp_endpoint_is_peeled_off(ep, &to)) {
-				err = -EADDRNOTAVAIL;
-				goto out_unlock;
-			}
+
+		/* If we could not find a matching association on the
+		 * endpoint, make sure that it is not a TCP-style
+		 * socket that already has an association or there is
+		 * no peeled-off association on another socket.
+		 */
+		if (!asoc &&
+		    ((sctp_style(sk, TCP) &&
+		      (sctp_sstate(sk, ESTABLISHED) ||
+		       sctp_sstate(sk, CLOSING))) ||
+		     sctp_endpoint_is_peeled_off(ep, &to))) {
+			err = -EADDRNOTAVAIL;
+			goto out_unlock;
 		}
 	} else {
 		asoc = sctp_id2assoc(sk, associd);
@@ -1914,6 +1915,9 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		goto out_free;
 	}
 
+	if (sctp_wspace(asoc) < msg_len)
+		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	if (!sctp_wspace(asoc)) {
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -2063,7 +2067,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 {
 	struct sctp_ulpevent *event = NULL;
 	struct sctp_sock *sp = sctp_sk(sk);
-	struct sk_buff *skb;
+	struct sk_buff *skb, *head_skb;
 	int copied;
 	int err = 0;
 	int skb_len;
@@ -2074,7 +2078,8 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 	lock_sock(sk);
 
-	if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED)) {
+	if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) &&
+	    !sctp_sstate(sk, CLOSING)) {
 		err = -ENOTCONN;
 		goto out;
 	}
@@ -2099,12 +2104,16 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (err)
 		goto out_free;
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	if (event->chunk && event->chunk->head_skb)
+		head_skb = event->chunk->head_skb;
+	else
+		head_skb = skb;
+	sock_recv_ts_and_drops(msg, sk, head_skb);
 	if (sctp_ulpevent_is_notification(event)) {
 		msg->msg_flags |= MSG_NOTIFICATION;
 		sp->pf->event_msgname(event, msg->msg_name, addr_len);
 	} else {
-		sp->pf->skb_msgname(skb, msg->msg_name, addr_len);
+		sp->pf->skb_msgname(head_skb, msg->msg_name, addr_len);
 	}
 
 	/* Check if we allow SCTP_NXTINFO. */
@@ -3661,6 +3670,80 @@ static int sctp_setsockopt_recvnxtinfo(struct sock *sk,
 	return 0;
 }
 
+static int sctp_setsockopt_pr_supported(struct sock *sk,
+					char __user *optval,
+					unsigned int optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(params))
+		goto out;
+
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (asoc) {
+		asoc->prsctp_enable = !!params.assoc_value;
+	} else if (!params.assoc_id) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		sp->ep->prsctp_enable = !!params.assoc_value;
+	} else {
+		goto out;
+	}
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_setsockopt_default_prinfo(struct sock *sk,
+					  char __user *optval,
+					  unsigned int optlen)
+{
+	struct sctp_default_prinfo info;
+	struct sctp_association *asoc;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(info))
+		goto out;
+
+	if (copy_from_user(&info, optval, sizeof(info))) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	if (info.pr_policy & ~SCTP_PR_SCTP_MASK)
+		goto out;
+
+	if (info.pr_policy == SCTP_PR_SCTP_NONE)
+		info.pr_value = 0;
+
+	asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+	if (asoc) {
+		SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
+		asoc->default_timetolive = info.pr_value;
+	} else if (!info.pr_assoc_id) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
+		sp->default_timetolive = info.pr_value;
+	} else {
+		goto out;
+	}
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3821,6 +3904,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_RECVNXTINFO:
 		retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen);
 		break;
+	case SCTP_PR_SUPPORTED:
+		retval = sctp_setsockopt_pr_supported(sk, optval, optlen);
+		break;
+	case SCTP_DEFAULT_PRINFO:
+		retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -4003,6 +4092,8 @@ static int sctp_init_sock(struct sock *sk)
 		return -ESOCKTNOSUPPORT;
 	}
 
+	sk->sk_gso_type = SKB_GSO_SCTP;
+
 	/* Initialize default send parameters. These parameters can be
 	 * modified with the SCTP_DEFAULT_SEND_PARAM socket option.
 	 */
@@ -4193,6 +4284,7 @@ static void sctp_shutdown(struct sock *sk, int how)
 		return;
 
 	if (how & SEND_SHUTDOWN) {
+		sk->sk_state = SCTP_SS_CLOSING;
 		ep = sctp_sk(sk)->ep;
 		if (!list_empty(&ep->asocs)) {
 			asoc = list_entry(ep->asocs.next,
@@ -4301,6 +4393,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter)
 
 	err = rhashtable_walk_start(iter);
 	if (err && err != -EAGAIN) {
+		rhashtable_walk_stop(iter);
 		rhashtable_walk_exit(iter);
 		return err;
 	}
@@ -6163,6 +6256,148 @@ static int sctp_getsockopt_recvnxtinfo(struct sock *sk,	int len,
 	return 0;
 }
 
+static int sctp_getsockopt_pr_supported(struct sock *sk, int len,
+					char __user *optval,
+					int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (asoc) {
+		params.assoc_value = asoc->prsctp_enable;
+	} else if (!params.assoc_id) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		params.assoc_value = sp->ep->prsctp_enable;
+	} else {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_getsockopt_default_prinfo(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
+{
+	struct sctp_default_prinfo info;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(info)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(info);
+	if (copy_from_user(&info, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+	if (asoc) {
+		info.pr_policy = SCTP_PR_POLICY(asoc->default_flags);
+		info.pr_value = asoc->default_timetolive;
+	} else if (!info.pr_assoc_id) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		info.pr_policy = SCTP_PR_POLICY(sp->default_flags);
+		info.pr_value = sp->default_timetolive;
+	} else {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &info, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
+{
+	struct sctp_prstatus params;
+	struct sctp_association *asoc;
+	int policy;
+	int retval = -EINVAL;
+
+	if (len < sizeof(params))
+		goto out;
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	policy = params.sprstat_policy;
+	if (policy & ~SCTP_PR_SCTP_MASK)
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+	if (!asoc)
+		goto out;
+
+	if (policy == SCTP_PR_SCTP_NONE) {
+		params.sprstat_abandoned_unsent = 0;
+		params.sprstat_abandoned_sent = 0;
+		for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+			params.sprstat_abandoned_unsent +=
+				asoc->abandoned_unsent[policy];
+			params.sprstat_abandoned_sent +=
+				asoc->abandoned_sent[policy];
+		}
+	} else {
+		params.sprstat_abandoned_unsent =
+			asoc->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+		params.sprstat_abandoned_sent =
+			asoc->abandoned_sent[__SCTP_PR_INDEX(policy)];
+	}
+
+	if (put_user(len, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	if (copy_to_user(optval, &params, len)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -6316,6 +6551,17 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_RECVNXTINFO:
 		retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen);
 		break;
+	case SCTP_PR_SUPPORTED:
+		retval = sctp_getsockopt_pr_supported(sk, len, optval, optlen);
+		break;
+	case SCTP_DEFAULT_PRINFO:
+		retval = sctp_getsockopt_default_prinfo(sk, len, optval,
+							optlen);
+		break;
+	case SCTP_PR_ASSOC_STATUS:
+		retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
+							optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -6863,7 +7109,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
 			if (cmsgs->srinfo->sinfo_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY |
+			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
 			      SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
@@ -6887,7 +7133,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
 			if (cmsgs->sinfo->snd_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY |
+			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
 			      SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
@@ -7564,10 +7810,13 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	/* If the association on the newsk is already closed before accept()
 	 * is called, set RCV_SHUTDOWN flag.
 	 */
-	if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP))
+	if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) {
+		newsk->sk_state = SCTP_SS_CLOSED;
 		newsk->sk_shutdown |= RCV_SHUTDOWN;
+	} else {
+		newsk->sk_state = SCTP_SS_ESTABLISHED;
+	}
 
-	newsk->sk_state = SCTP_SS_ESTABLISHED;
 	release_sock(newsk);
 }
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index d1e38308f615..1bc4f71aaba8 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -51,7 +51,7 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event);
 
 /* Initialize an ULP event from an given skb.  */
 static void sctp_ulpevent_init(struct sctp_ulpevent *event,
-			       int msg_flags,
+			       __u16 msg_flags,
 			       unsigned int len)
 {
 	memset(event, 0, sizeof(struct sctp_ulpevent));
@@ -60,7 +60,7 @@ static void sctp_ulpevent_init(struct sctp_ulpevent *event,
 }
 
 /* Create a new sctp_ulpevent.  */
-static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
+static struct sctp_ulpevent *sctp_ulpevent_new(int size, __u16 msg_flags,
 					       gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
@@ -91,6 +91,7 @@ int sctp_ulpevent_is_notification(const struct sctp_ulpevent *event)
 static inline void sctp_ulpevent_set_owner(struct sctp_ulpevent *event,
 					   const struct sctp_association *asoc)
 {
+	struct sctp_chunk *chunk = event->chunk;
 	struct sk_buff *skb;
 
 	/* Cast away the const, as we are just wanting to
@@ -101,6 +102,8 @@ static inline void sctp_ulpevent_set_owner(struct sctp_ulpevent *event,
 	event->asoc = (struct sctp_association *)asoc;
 	atomic_add(event->rmem_len, &event->asoc->rmem_alloc);
 	sctp_skb_set_owner_r(skb, asoc->base.sk);
+	if (chunk && chunk->head_skb && !chunk->head_skb->sk)
+		chunk->head_skb->sk = asoc->base.sk;
 }
 
 /* A simple destructor to give up the reference to the association. */
@@ -701,6 +704,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 
 	sctp_ulpevent_receive_data(event, asoc);
 
+	/* And hold the chunk as we need it for getting the IP headers
+	 * later in recvmsg
+	 */
+	sctp_chunk_hold(chunk);
+	event->chunk = chunk;
+
 	event->stream = ntohs(chunk->subh.data_hdr->stream);
 	event->ssn = ntohs(chunk->subh.data_hdr->ssn);
 	event->ppid = chunk->subh.data_hdr->ppid;
@@ -710,11 +719,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	}
 	event->tsn = ntohl(chunk->subh.data_hdr->tsn);
 	event->msg_flags |= chunk->chunk_hdr->flags;
-	event->iif = sctp_chunk_iif(chunk);
 
 	return event;
 
 fail_mark:
+	sctp_chunk_put(chunk);
 	kfree_skb(skb);
 fail:
 	return NULL;
@@ -1007,6 +1016,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
 
 done:
 	sctp_assoc_rwnd_increase(event->asoc, len);
+	sctp_chunk_put(event->chunk);
 	sctp_ulpevent_release_owner(event);
 }
 
@@ -1029,6 +1039,7 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
 	}
 
 done:
+	sctp_chunk_put(event->chunk);
 	sctp_ulpevent_release_owner(event);
 }
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 59658b2e9cdf..a5fc9dd24aa9 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1286,8 +1286,8 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
 
-static bool switchdev_port_same_parent_id(struct net_device *a,
-					  struct net_device *b)
+bool switchdev_port_same_parent_id(struct net_device *a,
+				   struct net_device *b)
 {
 	struct switchdev_attr a_attr = {
 		.orig_dev = a,
@@ -1323,6 +1323,7 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
 
 	return dev->ifindex;
 }
+EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
 
 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
 					  u32 old_mark, u32 *reset_mark)
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 57e460be4692..31b9f9c52974 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_TIPC) := tipc.o
 
 tipc-y	+= addr.o bcast.o bearer.o \
 	   core.o link.o discover.o msg.o  \
-	   name_distr.o  subscr.o name_table.o net.o  \
+	   name_distr.o  subscr.o monitor.o name_table.o net.o  \
 	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
 	   server.o socket.o
 
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 93f7c983be33..bebb347803ce 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -43,9 +43,6 @@
 #include <net/netns/generic.h>
 #include "core.h"
 
-#define TIPC_ZONE_MASK		0xff000000u
-#define TIPC_CLUSTER_MASK	0xfffff000u
-
 static inline u32 tipc_own_addr(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -60,7 +57,7 @@ static inline u32 tipc_zone_mask(u32 addr)
 
 static inline u32 tipc_cluster_mask(u32 addr)
 {
-	return addr & TIPC_CLUSTER_MASK;
+	return addr & TIPC_ZONE_CLUSTER_MASK;
 }
 
 u32 tipc_own_addr(struct net *net);
@@ -73,4 +70,5 @@ int tipc_addr_node_valid(u32 addr);
 int tipc_in_scope(u32 domain, u32 addr);
 int tipc_addr_scope(u32 domain);
 char *tipc_addr_string_fill(char *string, u32 addr);
+
 #endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6f11c62bc8f9..65b1bbf133bd 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/bearer.c: TIPC bearer code
  *
- * Copyright (c) 1996-2006, 2013-2014, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
  * Copyright (c) 2004-2006, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -39,6 +39,7 @@
 #include "bearer.h"
 #include "link.h"
 #include "discover.h"
+#include "monitor.h"
 #include "bcast.h"
 #include "netlink.h"
 
@@ -170,6 +171,27 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
 	return NULL;
 }
 
+/*     tipc_bearer_get_name - get the bearer name from its id.
+ *     @net: network namespace
+ *     @name: a pointer to the buffer where the name will be stored.
+ *     @bearer_id: the id to get the name from.
+ */
+int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_bearer *b;
+
+	if (bearer_id >= MAX_BEARERS)
+		return -EINVAL;
+
+	b = rtnl_dereference(tn->bearer_list[bearer_id]);
+	if (!b)
+		return -EINVAL;
+
+	strcpy(name, b->name);
+	return 0;
+}
+
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -224,7 +246,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	if (tipc_addr_domain_valid(disc_domain) &&
 	    (disc_domain != tn->own_addr)) {
 		if (tipc_in_scope(disc_domain, tn->own_addr)) {
-			disc_domain = tn->own_addr & TIPC_CLUSTER_MASK;
+			disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
 			res = 0;   /* accept any node in own cluster */
 		} else if (in_own_cluster_exact(net, disc_domain))
 			res = 0;   /* accept specified node in own cluster */
@@ -313,6 +335,10 @@ restart:
 	rcu_assign_pointer(tn->bearer_list[bearer_id], b);
 	if (skb)
 		tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
+	if (tipc_mon_create(net, bearer_id))
+		return -ENOMEM;
+
 	pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
 		name,
 		tipc_addr_string_fill(addr_string, disc_domain), priority);
@@ -330,6 +356,21 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
 	return 0;
 }
 
+/* tipc_bearer_reset_all - reset all links on all bearers
+ */
+void tipc_bearer_reset_all(struct net *net)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_bearer *b;
+	int i;
+
+	for (i = 0; i < MAX_BEARERS; i++) {
+		b = rcu_dereference_rtnl(tn->bearer_list[i]);
+		if (b)
+			tipc_reset_bearer(net, b);
+	}
+}
+
 /**
  * bearer_disable
  *
@@ -348,6 +389,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
 		tipc_disc_delete(b->link_req);
 	RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
 	kfree_rcu(b, rcu);
+	tipc_mon_delete(net, bearer_id);
 }
 
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
@@ -405,7 +447,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
 		return 0;
 
 	/* Send RESET message even if bearer is detached from device */
-	tipc_ptr = rtnl_dereference(dev->tipc_ptr);
+	tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
 	if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb))))
 		goto drop;
 
@@ -811,7 +853,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 	u32 prio;
 
 	prio = TIPC_MEDIA_LINK_PRI;
-	domain = tn->own_addr & TIPC_CLUSTER_MASK;
+	domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index f686e41b5abb..43757f1f9cb3 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/bearer.h: Include file for TIPC bearer code
  *
- * Copyright (c) 1996-2006, 2013-2014, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
  * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -197,7 +197,9 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest);
 void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
 struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
+int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id);
 struct tipc_media *tipc_media_find(const char *name);
+void tipc_bearer_reset_all(struct net *net);
 int tipc_bearer_setup(void);
 void tipc_bearer_cleanup(void);
 void tipc_bearer_stop(struct net *net);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fe1b062c4f18..236b043a4156 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -57,6 +57,7 @@ static int __net_init tipc_init_net(struct net *net)
 
 	tn->net_id = 4711;
 	tn->own_addr = 0;
+	tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
 	get_random_bytes(&tn->random, sizeof(int));
 	INIT_LIST_HEAD(&tn->node_list);
 	spin_lock_init(&tn->node_list_lock);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index eff58dc53aa1..a1845fb27d80 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -66,11 +66,13 @@ struct tipc_bc_base;
 struct tipc_link;
 struct tipc_name_table;
 struct tipc_server;
+struct tipc_monitor;
 
 #define TIPC_MOD_VER "2.0.0"
 
-#define NODE_HTABLE_SIZE   512
-#define MAX_BEARERS	   3
+#define NODE_HTABLE_SIZE       512
+#define MAX_BEARERS	         3
+#define TIPC_DEF_MON_THRESHOLD  32
 
 extern int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
@@ -88,6 +90,10 @@ struct tipc_net {
 	u32 num_nodes;
 	u32 num_links;
 
+	/* Neighbor monitoring list */
+	struct tipc_monitor *monitors[MAX_BEARERS];
+	int mon_threshold;
+
 	/* Bearer list */
 	struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
 
@@ -126,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
 	return &tipc_net(net)->node_list;
 }
 
+static inline unsigned int tipc_hashfn(u32 addr)
+{
+	return addr & (NODE_HTABLE_SIZE - 1);
+}
+
 static inline u16 mod(u16 x)
 {
 	return x & 0xffffu;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index ad9d477cc242..6b109a808d4c 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -135,9 +135,12 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 	u16 caps = msg_node_capabilities(hdr);
 	bool respond = false;
 	bool dupl_addr = false;
+	int err;
 
-	bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
+	err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
 	kfree_skb(skb);
+	if (err)
+		return;
 
 	/* Ensure message from node is valid and communication is permitted */
 	if (net_id != tn->net_id)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 7059c94f33c5..877d94f34814 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -42,6 +42,7 @@
 #include "name_distr.h"
 #include "discover.h"
 #include "netlink.h"
+#include "monitor.h"
 
 #include <linux/pkt_sched.h>
 
@@ -87,7 +88,6 @@ struct tipc_stats {
  * @peer_bearer_id: bearer id used by link's peer endpoint
  * @bearer_id: local bearer id used by link
  * @tolerance: minimum link continuity loss needed to reset link [in ms]
- * @keepalive_intv: link keepalive timer interval
  * @abort_limit: # of unacknowledged continuity probes needed to reset link
  * @state: current state of link FSM
  * @peer_caps: bitmap describing capabilities of peer node
@@ -96,6 +96,7 @@ struct tipc_stats {
  * @pmsg: convenience pointer to "proto_msg" field
  * @priority: current link priority
  * @net_plane: current link network plane ('A' through 'H')
+ * @mon_state: cookie with information needed by link monitor
  * @backlog_limit: backlog queue congestion thresholds (indexed by importance)
  * @exp_msg_count: # of tunnelled messages expected during link changeover
  * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
@@ -131,7 +132,6 @@ struct tipc_link {
 	u32 peer_bearer_id;
 	u32 bearer_id;
 	u32 tolerance;
-	unsigned long keepalive_intv;
 	u32 abort_limit;
 	u32 state;
 	u16 peer_caps;
@@ -140,6 +140,7 @@ struct tipc_link {
 	char if_name[TIPC_MAX_IF_NAME];
 	u32 priority;
 	char net_plane;
+	struct tipc_mon_state mon_state;
 	u16 rst_cnt;
 
 	/* Failover/synch */
@@ -349,6 +350,8 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
 	u16 ack = snd_l->snd_nxt - 1;
 
 	snd_l->ackers--;
+	rcv_l->bc_peer_is_up = true;
+	rcv_l->state = LINK_ESTABLISHED;
 	tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
 	tipc_link_reset(rcv_l);
 	rcv_l->state = LINK_RESET;
@@ -704,24 +707,32 @@ static void link_profile_stats(struct tipc_link *l)
  */
 int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
 {
-	int mtyp, rc = 0;
+	int mtyp = 0;
+	int rc = 0;
 	bool state = false;
 	bool probe = false;
 	bool setup = false;
 	u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
 	u16 bc_acked = l->bc_rcvlink->acked;
-
-	link_profile_stats(l);
+	struct tipc_mon_state *mstate = &l->mon_state;
 
 	switch (l->state) {
 	case LINK_ESTABLISHED:
 	case LINK_SYNCHING:
-		if (l->silent_intv_cnt > l->abort_limit)
-			return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 		mtyp = STATE_MSG;
+		link_profile_stats(l);
+		tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id);
+		if (mstate->reset || (l->silent_intv_cnt > l->abort_limit))
+			return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 		state = bc_acked != bc_snt;
-		probe = l->silent_intv_cnt;
-		l->silent_intv_cnt++;
+		state |= l->bc_rcvlink->rcv_unacked;
+		state |= l->rcv_unacked;
+		state |= !skb_queue_empty(&l->transmq);
+		state |= !skb_queue_empty(&l->deferdq);
+		probe = mstate->probing;
+		probe |= l->silent_intv_cnt;
+		if (probe || mstate->monitoring)
+			l->silent_intv_cnt++;
 		break;
 	case LINK_RESET:
 		setup = l->rst_cnt++ <= 4;
@@ -832,6 +843,7 @@ void tipc_link_reset(struct tipc_link *l)
 	l->stats.recv_info = 0;
 	l->stale_count = 0;
 	l->bc_peer_is_up = false;
+	memset(&l->mon_state, 0, sizeof(l->mon_state));
 	tipc_link_reset_stats(l);
 }
 
@@ -1240,6 +1252,9 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 	struct tipc_msg *hdr;
 	struct sk_buff_head *dfq = &l->deferdq;
 	bool node_up = link_is_up(l->bc_rcvlink);
+	struct tipc_mon_state *mstate = &l->mon_state;
+	int dlen = 0;
+	void *data;
 
 	/* Don't send protocol message during reset or link failover */
 	if (tipc_link_is_blocked(l))
@@ -1252,12 +1267,13 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 		rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
 
 	skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
-			      TIPC_MAX_IF_NAME, l->addr,
+			      tipc_max_domain_size, l->addr,
 			      tipc_own_addr(l->net), 0, 0, 0);
 	if (!skb)
 		return;
 
 	hdr = buf_msg(skb);
+	data = msg_data(hdr);
 	msg_set_session(hdr, l->session);
 	msg_set_bearer_id(hdr, l->bearer_id);
 	msg_set_net_plane(hdr, l->net_plane);
@@ -1273,14 +1289,18 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 
 	if (mtyp == STATE_MSG) {
 		msg_set_seq_gap(hdr, rcvgap);
-		msg_set_size(hdr, INT_H_SIZE);
 		msg_set_probe(hdr, probe);
+		tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
+		msg_set_size(hdr, INT_H_SIZE + dlen);
+		skb_trim(skb, INT_H_SIZE + dlen);
 		l->stats.sent_states++;
 		l->rcv_unacked = 0;
 	} else {
 		/* RESET_MSG or ACTIVATE_MSG */
 		msg_set_max_pkt(hdr, l->advertised_mtu);
-		strcpy(msg_data(hdr), l->if_name);
+		strcpy(data, l->if_name);
+		msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
+		skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME);
 	}
 	if (probe)
 		l->stats.sent_probes++;
@@ -1373,7 +1393,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 	u16 peers_tol = msg_link_tolerance(hdr);
 	u16 peers_prio = msg_linkprio(hdr);
 	u16 rcv_nxt = l->rcv_nxt;
+	u16 dlen = msg_data_sz(hdr);
 	int mtyp = msg_type(hdr);
+	void *data;
 	char *if_name;
 	int rc = 0;
 
@@ -1383,6 +1405,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 	if (tipc_own_addr(l->net) > msg_prevnode(hdr))
 		l->net_plane = msg_net_plane(hdr);
 
+	skb_linearize(skb);
+	hdr = buf_msg(skb);
+	data = msg_data(hdr);
+
 	switch (mtyp) {
 	case RESET_MSG:
 
@@ -1393,8 +1419,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		/* fall thru' */
 
 	case ACTIVATE_MSG:
-		skb_linearize(skb);
-		hdr = buf_msg(skb);
 
 		/* Complete own link name with peer's interface name */
 		if_name =  strrchr(l->name, ':') + 1;
@@ -1402,7 +1426,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 			break;
 		if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
 			break;
-		strncpy(if_name, msg_data(hdr),	TIPC_MAX_IF_NAME);
+		strncpy(if_name, data, TIPC_MAX_IF_NAME);
 
 		/* Update own tolerance if peer indicates a non-zero value */
 		if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
@@ -1450,6 +1474,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 				rc = TIPC_LINK_UP_EVT;
 			break;
 		}
+		tipc_mon_rcv(l->net, data, dlen, l->addr,
+			     &l->mon_state, l->bearer_id);
 
 		/* Send NACK if peer has sent pkts we haven't received yet */
 		if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
@@ -1558,7 +1584,12 @@ void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 	if (!msg_peer_node_is_up(hdr))
 		return;
 
-	l->bc_peer_is_up = true;
+	/* Open when peer ackowledges our bcast init msg (pkt #1) */
+	if (msg_ack(hdr))
+		l->bc_peer_is_up = true;
+
+	if (!l->bc_peer_is_up)
+		return;
 
 	/* Ignore if peers_snd_nxt goes beyond receive window */
 	if (more(peers_snd_nxt, l->rcv_nxt + l->window))
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
new file mode 100644
index 000000000000..be70a57c1ff9
--- /dev/null
+++ b/net/tipc/monitor.c
@@ -0,0 +1,803 @@
+/*
+ * net/tipc/monitor.c
+ *
+ * Copyright (c) 2016, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <net/genetlink.h>
+#include "core.h"
+#include "addr.h"
+#include "monitor.h"
+#include "bearer.h"
+
+#define MAX_MON_DOMAIN       64
+#define MON_TIMEOUT          120000
+#define MAX_PEER_DOWN_EVENTS 4
+
+/* struct tipc_mon_domain: domain record to be transferred between peers
+ * @len: actual size of domain record
+ * @gen: current generation of sender's domain
+ * @ack_gen: most recent generation of self's domain acked by peer
+ * @member_cnt: number of domain member nodes described in this record
+ * @up_map: bit map indicating which of the members the sender considers up
+ * @members: identity of the domain members
+ */
+struct tipc_mon_domain {
+	u16 len;
+	u16 gen;
+	u16 ack_gen;
+	u16 member_cnt;
+	u64 up_map;
+	u32 members[MAX_MON_DOMAIN];
+};
+
+/* struct tipc_peer: state of a peer node and its domain
+ * @addr: tipc node identity of peer
+ * @head_map: shows which other nodes currently consider peer 'up'
+ * @domain: most recent domain record from peer
+ * @hash: position in hashed lookup list
+ * @list: position in linked list, in circular ascending order by 'addr'
+ * @applied: number of reported domain members applied on this monitor list
+ * @is_up: peer is up as seen from this node
+ * @is_head: peer is assigned domain head as seen from this node
+ * @is_local: peer is in local domain and should be continuously monitored
+ * @down_cnt: - numbers of other peers which have reported this on lost
+ */
+struct tipc_peer {
+	u32 addr;
+	struct tipc_mon_domain *domain;
+	struct hlist_node hash;
+	struct list_head list;
+	u8 applied;
+	u8 down_cnt;
+	bool is_up;
+	bool is_head;
+	bool is_local;
+};
+
+struct tipc_monitor {
+	struct hlist_head peers[NODE_HTABLE_SIZE];
+	int peer_cnt;
+	struct tipc_peer *self;
+	rwlock_t lock;
+	struct tipc_mon_domain cache;
+	u16 list_gen;
+	u16 dom_gen;
+	struct net *net;
+	struct timer_list timer;
+	unsigned long timer_intv;
+};
+
+static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
+{
+	return tipc_net(net)->monitors[bearer_id];
+}
+
+const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
+
+/* dom_rec_len(): actual length of domain record for transport
+ */
+static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
+{
+	return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
+}
+
+/* dom_size() : calculate size of own domain based on number of peers
+ */
+static int dom_size(int peers)
+{
+	int i = 0;
+
+	while ((i * i) < peers)
+		i++;
+	return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
+}
+
+static void map_set(u64 *up_map, int i, unsigned int v)
+{
+	*up_map &= ~(1ULL << i);
+	*up_map |= ((u64)v << i);
+}
+
+static int map_get(u64 up_map, int i)
+{
+	return (up_map & (1 << i)) >> i;
+}
+
+static struct tipc_peer *peer_prev(struct tipc_peer *peer)
+{
+	return list_last_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
+{
+	return list_first_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_head(struct tipc_peer *peer)
+{
+	while (!peer->is_head)
+		peer = peer_prev(peer);
+	return peer;
+}
+
+static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
+{
+	struct tipc_peer *peer;
+	unsigned int thash = tipc_hashfn(addr);
+
+	hlist_for_each_entry(peer, &mon->peers[thash], hash) {
+		if (peer->addr == addr)
+			return peer;
+	}
+	return NULL;
+}
+
+static struct tipc_peer *get_self(struct net *net, int bearer_id)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+
+	return mon->self;
+}
+
+static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
+{
+	struct tipc_net *tn = tipc_net(net);
+
+	return mon->peer_cnt > tn->mon_threshold;
+}
+
+/* mon_identify_lost_members() : - identify amd mark potentially lost members
+ */
+static void mon_identify_lost_members(struct tipc_peer *peer,
+				      struct tipc_mon_domain *dom_bef,
+				      int applied_bef)
+{
+	struct tipc_peer *member = peer;
+	struct tipc_mon_domain *dom_aft = peer->domain;
+	int applied_aft = peer->applied;
+	int i;
+
+	for (i = 0; i < applied_bef; i++) {
+		member = peer_nxt(member);
+
+		/* Do nothing if self or peer already see member as down */
+		if (!member->is_up || !map_get(dom_bef->up_map, i))
+			continue;
+
+		/* Loss of local node must be detected by active probing */
+		if (member->is_local)
+			continue;
+
+		/* Start probing if member was removed from applied domain */
+		if (!applied_aft || (applied_aft < i)) {
+			member->down_cnt = 1;
+			continue;
+		}
+
+		/* Member loss is confirmed if it is still in applied domain */
+		if (!map_get(dom_aft->up_map, i))
+			member->down_cnt++;
+	}
+}
+
+/* mon_apply_domain() : match a peer's domain record against monitor list
+ */
+static void mon_apply_domain(struct tipc_monitor *mon,
+			     struct tipc_peer *peer)
+{
+	struct tipc_mon_domain *dom = peer->domain;
+	struct tipc_peer *member;
+	u32 addr;
+	int i;
+
+	if (!dom || !peer->is_up)
+		return;
+
+	/* Scan across domain members and match against monitor list */
+	peer->applied = 0;
+	member = peer_nxt(peer);
+	for (i = 0; i < dom->member_cnt; i++) {
+		addr = dom->members[i];
+		if (addr != member->addr)
+			return;
+		peer->applied++;
+		member = peer_nxt(member);
+	}
+}
+
+/* mon_update_local_domain() : update after peer addition/removal/up/down
+ */
+static void mon_update_local_domain(struct tipc_monitor *mon)
+{
+	struct tipc_peer *self = mon->self;
+	struct tipc_mon_domain *cache = &mon->cache;
+	struct tipc_mon_domain *dom = self->domain;
+	struct tipc_peer *peer = self;
+	u64 prev_up_map = dom->up_map;
+	u16 member_cnt, i;
+	bool diff;
+
+	/* Update local domain size based on current size of cluster */
+	member_cnt = dom_size(mon->peer_cnt) - 1;
+	self->applied = member_cnt;
+
+	/* Update native and cached outgoing local domain records */
+	dom->len = dom_rec_len(dom, member_cnt);
+	diff = dom->member_cnt != member_cnt;
+	dom->member_cnt = member_cnt;
+	for (i = 0; i < member_cnt; i++) {
+		peer = peer_nxt(peer);
+		diff |= dom->members[i] != peer->addr;
+		dom->members[i] = peer->addr;
+		map_set(&dom->up_map, i, peer->is_up);
+		cache->members[i] = htonl(peer->addr);
+	}
+	diff |= dom->up_map != prev_up_map;
+	if (!diff)
+		return;
+	dom->gen = ++mon->dom_gen;
+	cache->len = htons(dom->len);
+	cache->gen = htons(dom->gen);
+	cache->member_cnt = htons(member_cnt);
+	cache->up_map = cpu_to_be64(dom->up_map);
+	mon_apply_domain(mon, self);
+}
+
+/* mon_update_neighbors() : update preceding neighbors of added/removed peer
+ */
+static void mon_update_neighbors(struct tipc_monitor *mon,
+				 struct tipc_peer *peer)
+{
+	int dz, i;
+
+	dz = dom_size(mon->peer_cnt);
+	for (i = 0; i < dz; i++) {
+		mon_apply_domain(mon, peer);
+		peer = peer_prev(peer);
+	}
+}
+
+/* mon_assign_roles() : reassign peer roles after a network change
+ * The monitor list is consistent at this stage; i.e., each peer is monitoring
+ * a set of domain members as matched between domain record and the monitor list
+ */
+static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
+{
+	struct tipc_peer *peer = peer_nxt(head);
+	struct tipc_peer *self = mon->self;
+	int i = 0;
+
+	for (; peer != self; peer = peer_nxt(peer)) {
+		peer->is_local = false;
+
+		/* Update domain member */
+		if (i++ < head->applied) {
+			peer->is_head = false;
+			if (head == self)
+				peer->is_local = true;
+			continue;
+		}
+		/* Assign next domain head */
+		if (!peer->is_up)
+			continue;
+		if (peer->is_head)
+			break;
+		head = peer;
+		head->is_head = true;
+		i = 0;
+	}
+	mon->list_gen++;
+}
+
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	struct tipc_peer *self = get_self(net, bearer_id);
+	struct tipc_peer *peer, *prev, *head;
+
+	write_lock_bh(&mon->lock);
+	peer = get_peer(mon, addr);
+	if (!peer)
+		goto exit;
+	prev = peer_prev(peer);
+	list_del(&peer->list);
+	hlist_del(&peer->hash);
+	kfree(peer->domain);
+	kfree(peer);
+	mon->peer_cnt--;
+	head = peer_head(prev);
+	if (head == self)
+		mon_update_local_domain(mon);
+	mon_update_neighbors(mon, prev);
+
+	/* Revert to full-mesh monitoring if we reach threshold */
+	if (!tipc_mon_is_active(net, mon)) {
+		list_for_each_entry(peer, &self->list, list) {
+			kfree(peer->domain);
+			peer->domain = NULL;
+			peer->applied = 0;
+		}
+	}
+	mon_assign_roles(mon, head);
+exit:
+	write_unlock_bh(&mon->lock);
+}
+
+static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
+			      struct tipc_peer **peer)
+{
+	struct tipc_peer *self = mon->self;
+	struct tipc_peer *cur, *prev, *p;
+
+	p = kzalloc(sizeof(*p), GFP_ATOMIC);
+	*peer = p;
+	if (!p)
+		return false;
+	p->addr = addr;
+
+	/* Add new peer to lookup list */
+	INIT_LIST_HEAD(&p->list);
+	hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
+
+	/* Sort new peer into iterator list, in ascending circular order */
+	prev = self;
+	list_for_each_entry(cur, &self->list, list) {
+		if ((addr > prev->addr) && (addr < cur->addr))
+			break;
+		if (((addr < cur->addr) || (addr > prev->addr)) &&
+		    (prev->addr > cur->addr))
+			break;
+		prev = cur;
+	}
+	list_add_tail(&p->list, &cur->list);
+	mon->peer_cnt++;
+	mon_update_neighbors(mon, p);
+	return true;
+}
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	struct tipc_peer *self = get_self(net, bearer_id);
+	struct tipc_peer *peer, *head;
+
+	write_lock_bh(&mon->lock);
+	peer = get_peer(mon, addr);
+	if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
+		goto exit;
+	peer->is_up = true;
+	head = peer_head(peer);
+	if (head == self)
+		mon_update_local_domain(mon);
+	mon_assign_roles(mon, head);
+exit:
+	write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	struct tipc_peer *self = get_self(net, bearer_id);
+	struct tipc_peer *peer, *head;
+	struct tipc_mon_domain *dom;
+	int applied;
+
+	write_lock_bh(&mon->lock);
+	peer = get_peer(mon, addr);
+	if (!peer) {
+		pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
+		goto exit;
+	}
+	applied = peer->applied;
+	peer->applied = 0;
+	dom = peer->domain;
+	peer->domain = NULL;
+	if (peer->is_head)
+		mon_identify_lost_members(peer, dom, applied);
+	kfree(dom);
+	peer->is_up = false;
+	peer->is_head = false;
+	peer->is_local = false;
+	peer->down_cnt = 0;
+	head = peer_head(peer);
+	if (head == self)
+		mon_update_local_domain(mon);
+	mon_assign_roles(mon, head);
+exit:
+	write_unlock_bh(&mon->lock);
+}
+
+/* tipc_mon_rcv - process monitor domain event message
+ */
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+		  struct tipc_mon_state *state, int bearer_id)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	struct tipc_mon_domain *arrv_dom = data;
+	struct tipc_mon_domain dom_bef;
+	struct tipc_mon_domain *dom;
+	struct tipc_peer *peer;
+	u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
+	int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
+	u16 new_gen = ntohs(arrv_dom->gen);
+	u16 acked_gen = ntohs(arrv_dom->ack_gen);
+	bool probing = state->probing;
+	int i, applied_bef;
+
+	state->probing = false;
+	if (!dlen)
+		return;
+
+	/* Sanity check received domain record */
+	if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
+		pr_warn_ratelimited("Received illegal domain record\n");
+		return;
+	}
+
+	/* Synch generation numbers with peer if link just came up */
+	if (!state->synched) {
+		state->peer_gen = new_gen - 1;
+		state->acked_gen = acked_gen;
+		state->synched = true;
+	}
+
+	if (more(acked_gen, state->acked_gen))
+		state->acked_gen = acked_gen;
+
+	/* Drop duplicate unless we are waiting for a probe response */
+	if (!more(new_gen, state->peer_gen) && !probing)
+		return;
+
+	write_lock_bh(&mon->lock);
+	peer = get_peer(mon, addr);
+	if (!peer || !peer->is_up)
+		goto exit;
+
+	/* Peer is confirmed, stop any ongoing probing */
+	peer->down_cnt = 0;
+
+	/* Task is done for duplicate record */
+	if (!more(new_gen, state->peer_gen))
+		goto exit;
+
+	state->peer_gen = new_gen;
+
+	/* Cache current domain record for later use */
+	dom_bef.member_cnt = 0;
+	dom = peer->domain;
+	if (dom)
+		memcpy(&dom_bef, dom, dom->len);
+
+	/* Transform and store received domain record */
+	if (!dom || (dom->len < new_dlen)) {
+		kfree(dom);
+		dom = kmalloc(new_dlen, GFP_ATOMIC);
+		peer->domain = dom;
+		if (!dom)
+			goto exit;
+	}
+	dom->len = new_dlen;
+	dom->gen = new_gen;
+	dom->member_cnt = new_member_cnt;
+	dom->up_map = be64_to_cpu(arrv_dom->up_map);
+	for (i = 0; i < new_member_cnt; i++)
+		dom->members[i] = ntohl(arrv_dom->members[i]);
+
+	/* Update peers affected by this domain record */
+	applied_bef = peer->applied;
+	mon_apply_domain(mon, peer);
+	mon_identify_lost_members(peer, &dom_bef, applied_bef);
+	mon_assign_roles(mon, peer_head(peer));
+exit:
+	write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+		   struct tipc_mon_state *state, int bearer_id)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	struct tipc_mon_domain *dom = data;
+	u16 gen = mon->dom_gen;
+	u16 len;
+
+	if (!tipc_mon_is_active(net, mon))
+		return;
+
+	/* Send only a dummy record with ack if peer has acked our last sent */
+	if (likely(state->acked_gen == gen)) {
+		len = dom_rec_len(dom, 0);
+		*dlen = len;
+		dom->len = htons(len);
+		dom->gen = htons(gen);
+		dom->ack_gen = htons(state->peer_gen);
+		dom->member_cnt = 0;
+		return;
+	}
+	/* Send the full record */
+	read_lock_bh(&mon->lock);
+	len = ntohs(mon->cache.len);
+	*dlen = len;
+	memcpy(data, &mon->cache, len);
+	read_unlock_bh(&mon->lock);
+	dom->ack_gen = htons(state->peer_gen);
+}
+
+void tipc_mon_get_state(struct net *net, u32 addr,
+			struct tipc_mon_state *state,
+			int bearer_id)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	struct tipc_peer *peer;
+
+	/* Used cached state if table has not changed */
+	if (!state->probing &&
+	    (state->list_gen == mon->list_gen) &&
+	    (state->acked_gen == mon->dom_gen))
+		return;
+
+	read_lock_bh(&mon->lock);
+	peer = get_peer(mon, addr);
+	if (peer) {
+		state->probing = state->acked_gen != mon->dom_gen;
+		state->probing |= peer->down_cnt;
+		state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
+		state->monitoring = peer->is_local;
+		state->monitoring |= peer->is_head;
+		state->list_gen = mon->list_gen;
+	}
+	read_unlock_bh(&mon->lock);
+}
+
+static void mon_timeout(unsigned long m)
+{
+	struct tipc_monitor *mon = (void *)m;
+	struct tipc_peer *self;
+	int best_member_cnt = dom_size(mon->peer_cnt) - 1;
+
+	write_lock_bh(&mon->lock);
+	self = mon->self;
+	if (self && (best_member_cnt != self->applied)) {
+		mon_update_local_domain(mon);
+		mon_assign_roles(mon, self);
+	}
+	write_unlock_bh(&mon->lock);
+	mod_timer(&mon->timer, jiffies + mon->timer_intv);
+}
+
+int tipc_mon_create(struct net *net, int bearer_id)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_monitor *mon;
+	struct tipc_peer *self;
+	struct tipc_mon_domain *dom;
+
+	if (tn->monitors[bearer_id])
+		return 0;
+
+	mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
+	self = kzalloc(sizeof(*self), GFP_ATOMIC);
+	dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
+	if (!mon || !self || !dom) {
+		kfree(mon);
+		kfree(self);
+		kfree(dom);
+		return -ENOMEM;
+	}
+	tn->monitors[bearer_id] = mon;
+	rwlock_init(&mon->lock);
+	mon->net = net;
+	mon->peer_cnt = 1;
+	mon->self = self;
+	self->domain = dom;
+	self->addr = tipc_own_addr(net);
+	self->is_up = true;
+	self->is_head = true;
+	INIT_LIST_HEAD(&self->list);
+	setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
+	mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
+	mod_timer(&mon->timer, jiffies + mon->timer_intv);
+	return 0;
+}
+
+void tipc_mon_delete(struct net *net, int bearer_id)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	struct tipc_peer *self = get_self(net, bearer_id);
+	struct tipc_peer *peer, *tmp;
+
+	write_lock_bh(&mon->lock);
+	tn->monitors[bearer_id] = NULL;
+	list_for_each_entry_safe(peer, tmp, &self->list, list) {
+		list_del(&peer->list);
+		hlist_del(&peer->hash);
+		kfree(peer->domain);
+		kfree(peer);
+	}
+	mon->self = NULL;
+	write_unlock_bh(&mon->lock);
+	del_timer_sync(&mon->timer);
+	kfree(self->domain);
+	kfree(self);
+	kfree(mon);
+}
+
+int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size)
+{
+	struct tipc_net *tn = tipc_net(net);
+
+	if (cluster_size > TIPC_CLUSTER_SIZE)
+		return -EINVAL;
+
+	tn->mon_threshold = cluster_size;
+
+	return 0;
+}
+
+int tipc_nl_monitor_get_threshold(struct net *net)
+{
+	struct tipc_net *tn = tipc_net(net);
+
+	return tn->mon_threshold;
+}
+
+int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg)
+{
+	struct tipc_mon_domain *dom = peer->domain;
+	struct nlattr *attrs;
+	void *hdr;
+
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+			  NLM_F_MULTI, TIPC_NL_MON_PEER_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER);
+	if (!attrs)
+		goto msg_full;
+
+	if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr))
+		goto attr_msg_full;
+	if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied))
+		goto attr_msg_full;
+
+	if (peer->is_up)
+		if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP))
+			goto attr_msg_full;
+	if (peer->is_local)
+		if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL))
+			goto attr_msg_full;
+	if (peer->is_head)
+		if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD))
+			goto attr_msg_full;
+
+	if (dom) {
+		if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen))
+			goto attr_msg_full;
+		if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP,
+				      dom->up_map, TIPC_NLA_MON_PEER_PAD))
+			goto attr_msg_full;
+		if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS,
+			    dom->member_cnt * sizeof(u32), &dom->members))
+			goto attr_msg_full;
+	}
+
+	nla_nest_end(msg->skb, attrs);
+	genlmsg_end(msg->skb, hdr);
+	return 0;
+
+attr_msg_full:
+	nla_nest_cancel(msg->skb, attrs);
+msg_full:
+	genlmsg_cancel(msg->skb, hdr);
+
+	return -EMSGSIZE;
+}
+
+int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
+			     u32 bearer_id, u32 *prev_node)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	struct tipc_peer *peer = mon->self;
+
+	if (!mon)
+		return -EINVAL;
+
+	read_lock_bh(&mon->lock);
+	do {
+		if (*prev_node) {
+			if (peer->addr == *prev_node)
+				*prev_node = 0;
+			else
+				continue;
+		}
+		if (__tipc_nl_add_monitor_peer(peer, msg)) {
+			*prev_node = peer->addr;
+			read_unlock_bh(&mon->lock);
+			return -EMSGSIZE;
+		}
+	} while ((peer = peer_nxt(peer)) != mon->self);
+	read_unlock_bh(&mon->lock);
+
+	return 0;
+}
+
+int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg,
+			  u32 bearer_id)
+{
+	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+	char bearer_name[TIPC_MAX_BEARER_NAME];
+	struct nlattr *attrs;
+	void *hdr;
+	int ret;
+
+	ret = tipc_bearer_get_name(net, bearer_name, bearer_id);
+	if (ret || !mon)
+		return -EINVAL;
+
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+			  NLM_F_MULTI, TIPC_NL_MON_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	attrs = nla_nest_start(msg->skb, TIPC_NLA_MON);
+	if (!attrs)
+		goto msg_full;
+
+	read_lock_bh(&mon->lock);
+	if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id))
+		goto attr_msg_full;
+	if (tipc_mon_is_active(net, mon))
+		if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE))
+			goto attr_msg_full;
+	if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name))
+		goto attr_msg_full;
+	if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt))
+		goto attr_msg_full;
+	if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen))
+		goto attr_msg_full;
+
+	read_unlock_bh(&mon->lock);
+	nla_nest_end(msg->skb, attrs);
+	genlmsg_end(msg->skb, hdr);
+
+	return 0;
+
+attr_msg_full:
+	nla_nest_cancel(msg->skb, attrs);
+msg_full:
+	genlmsg_cancel(msg->skb, hdr);
+	read_unlock_bh(&mon->lock);
+
+	return -EMSGSIZE;
+}
diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
new file mode 100644
index 000000000000..2a21b93e0d04
--- /dev/null
+++ b/net/tipc/monitor.h
@@ -0,0 +1,82 @@
+/*
+ * net/tipc/monitor.h
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_MONITOR_H
+#define _TIPC_MONITOR_H
+
+#include "netlink.h"
+
+/* struct tipc_mon_state: link instance's cache of monitor list and domain state
+ * @list_gen: current generation of this node's monitor list
+ * @gen: current generation of this node's local domain
+ * @peer_gen: most recent domain generation received from peer
+ * @acked_gen: most recent generation of self's domain acked by peer
+ * @monitoring: this peer endpoint should continuously monitored
+ * @probing: peer endpoint should be temporarily probed for potential loss
+ * @synched: domain record's generation has been synched with peer after reset
+ */
+struct tipc_mon_state {
+	u16 list_gen;
+	u16 peer_gen;
+	u16 acked_gen;
+	bool monitoring :1;
+	bool probing    :1;
+	bool reset      :1;
+	bool synched    :1;
+};
+
+int tipc_mon_create(struct net *net, int bearer_id);
+void tipc_mon_delete(struct net *net, int bearer_id);
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+		   struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+		  struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_get_state(struct net *net, u32 addr,
+			struct tipc_mon_state *state,
+			int bearer_id);
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id);
+
+int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size);
+int tipc_nl_monitor_get_threshold(struct net *net);
+int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg,
+			  u32 bearer_id);
+int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
+			     u32 bearer_id, u32 *prev_node);
+
+extern const int tipc_max_domain_size;
+#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 8740930f0787..17201aa8423d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -41,6 +41,8 @@
 #include "name_table.h"
 
 #define MAX_FORWARD_SIZE 1024
+#define BUF_HEADROOM (LL_MAX_HEADER + 48)
+#define BUF_TAILROOM 16
 
 static unsigned int align(unsigned int i)
 {
@@ -505,6 +507,10 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
 	}
 
+	if (skb_cloned(_skb) &&
+	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL))
+		goto exit;
+
 	/* Now reverse the concerned fields */
 	msg_set_errcode(hdr, err);
 	msg_set_origport(hdr, msg_destport(&ohdr));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 024da8af91f0..7cf52fb39bee 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -94,17 +94,6 @@ struct plist;
 
 #define TIPC_MEDIA_INFO_OFFSET	5
 
-/**
- * TIPC message buffer code
- *
- * TIPC message buffer headroom reserves space for the worst-case
- * link-level device header (in case the message is sent off-node).
- *
- * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
- *       are word aligned for quicker access
- */
-#define BUF_HEADROOM (LL_MAX_HEADER + 48)
-
 struct tipc_skb_cb {
 	void *handle;
 	struct sk_buff *tail;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 56935df2167a..a84daec0afe9 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -52,7 +52,8 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
 	[TIPC_NLA_MEDIA]	= { .type = NLA_NESTED, },
 	[TIPC_NLA_NODE]		= { .type = NLA_NESTED, },
 	[TIPC_NLA_NET]		= { .type = NLA_NESTED, },
-	[TIPC_NLA_NAME_TABLE]	= { .type = NLA_NESTED, }
+	[TIPC_NLA_NAME_TABLE]	= { .type = NLA_NESTED, },
+	[TIPC_NLA_MON]		= { .type = NLA_NESTED, },
 };
 
 const struct nla_policy
@@ -61,6 +62,12 @@ tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = {
 	[TIPC_NLA_NAME_TABLE_PUBL]	= { .type = NLA_NESTED }
 };
 
+const struct nla_policy tipc_nl_monitor_policy[TIPC_NLA_MON_MAX + 1] = {
+	[TIPC_NLA_MON_UNSPEC]			= { .type = NLA_UNSPEC },
+	[TIPC_NLA_MON_REF]			= { .type = NLA_U32 },
+	[TIPC_NLA_MON_ACTIVATION_THRESHOLD]	= { .type = NLA_U32 },
+};
+
 const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
 	[TIPC_NLA_SOCK_UNSPEC]		= { .type = NLA_UNSPEC },
 	[TIPC_NLA_SOCK_ADDR]		= { .type = NLA_U32 },
@@ -214,7 +221,23 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 		.cmd	= TIPC_NL_NAME_TABLE_GET,
 		.dumpit	= tipc_nl_name_table_dump,
 		.policy = tipc_nl_policy,
-	}
+	},
+	{
+		.cmd	= TIPC_NL_MON_SET,
+		.doit	= tipc_nl_node_set_monitor,
+		.policy = tipc_nl_policy,
+	},
+	{
+		.cmd	= TIPC_NL_MON_GET,
+		.doit	= tipc_nl_node_get_monitor,
+		.dumpit	= tipc_nl_node_dump_monitor,
+		.policy = tipc_nl_policy,
+	},
+	{
+		.cmd	= TIPC_NL_MON_PEER_GET,
+		.dumpit	= tipc_nl_node_dump_monitor_peer,
+		.policy = tipc_nl_policy,
+	},
 };
 
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index ed1dbcb4afbd..4ba0ad422110 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -55,6 +55,7 @@ extern const struct nla_policy tipc_nl_prop_policy[];
 extern const struct nla_policy tipc_nl_bearer_policy[];
 extern const struct nla_policy tipc_nl_media_policy[];
 extern const struct nla_policy tipc_nl_udp_policy[];
+extern const struct nla_policy tipc_nl_monitor_policy[];
 
 int tipc_netlink_start(void);
 int tipc_netlink_compat_start(void);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 3ad9fab1985f..1fd464764765 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
 
 	link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
 	link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
-	nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]),
+	nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME],
 		    TIPC_MAX_LINK_NAME);
 
 	return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index e01e2c71b5a1..21974191e425 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,6 +40,7 @@
 #include "name_distr.h"
 #include "socket.h"
 #include "bcast.h"
+#include "monitor.h"
 #include "discover.h"
 #include "netlink.h"
 
@@ -205,17 +206,6 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr)
 	return caps;
 }
 
-/*
- * A trivial power-of-two bitmask technique is used for speed, since this
- * operation is done for every incoming TIPC packet. The number of hash table
- * entries has been chosen so that no hash chain exceeds 8 nodes and will
- * usually be much smaller (typically only a single node).
- */
-static unsigned int tipc_hashfn(u32 addr)
-{
-	return addr & (NODE_HTABLE_SIZE - 1);
-}
-
 static void tipc_node_kref_release(struct kref *kref)
 {
 	struct tipc_node *n = container_of(kref, struct tipc_node, kref);
@@ -279,6 +269,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	u32 addr = 0;
 	u32 flags = n->action_flags;
 	u32 link_id = 0;
+	u32 bearer_id;
 	struct list_head *publ_list;
 
 	if (likely(!flags)) {
@@ -288,6 +279,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 
 	addr = n->addr;
 	link_id = n->link_id;
+	bearer_id = link_id & 0xffff;
 	publ_list = &n->publ_list;
 
 	n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@ -301,13 +293,16 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	if (flags & TIPC_NOTIFY_NODE_UP)
 		tipc_named_node_up(net, addr);
 
-	if (flags & TIPC_NOTIFY_LINK_UP)
+	if (flags & TIPC_NOTIFY_LINK_UP) {
+		tipc_mon_peer_up(net, addr, bearer_id);
 		tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
 				     TIPC_NODE_SCOPE, link_id, addr);
-
-	if (flags & TIPC_NOTIFY_LINK_DOWN)
+	}
+	if (flags & TIPC_NOTIFY_LINK_DOWN) {
+		tipc_mon_peer_down(net, addr, bearer_id);
 		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
 				      link_id, addr);
+	}
 }
 
 struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
@@ -378,14 +373,13 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
 {
 	unsigned long tol = tipc_link_tolerance(l);
 	unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
-	unsigned long keepalive_intv = msecs_to_jiffies(intv);
 
 	/* Link with lowest tolerance determines timer interval */
-	if (keepalive_intv < n->keepalive_intv)
-		n->keepalive_intv = keepalive_intv;
+	if (intv < n->keepalive_intv)
+		n->keepalive_intv = intv;
 
-	/* Ensure link's abort limit corresponds to current interval */
-	tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv));
+	/* Ensure link's abort limit corresponds to current tolerance */
+	tipc_link_set_abort_limit(l, tol / n->keepalive_intv);
 }
 
 static void tipc_node_delete(struct tipc_node *node)
@@ -526,7 +520,7 @@ static void tipc_node_timeout(unsigned long data)
 		if (rc & TIPC_LINK_DOWN_EVT)
 			tipc_node_link_down(n, bearer_id, false);
 	}
-	mod_timer(&n->timer, jiffies + n->keepalive_intv);
+	mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv));
 }
 
 /**
@@ -692,6 +686,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
 	struct tipc_link *l = le->link;
 	struct tipc_media_addr *maddr;
 	struct sk_buff_head xmitq;
+	int old_bearer_id = bearer_id;
 
 	if (!l)
 		return;
@@ -711,6 +706,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
 		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 	}
 	tipc_node_write_unlock(n);
+	if (delete)
+		tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
 	tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
 	tipc_sk_rcv(n->net, &le->inputq);
 }
@@ -735,6 +732,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 	bool accept_addr = false;
 	bool reset = true;
 	char *if_name;
+	unsigned long intv;
 
 	*dupl_addr = false;
 	*respond = false;
@@ -840,9 +838,11 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 		le->link = l;
 		n->link_cnt++;
 		tipc_node_calculate_timer(n, l);
-		if (n->link_cnt == 1)
-			if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
+		if (n->link_cnt == 1) {
+			intv = jiffies + msecs_to_jiffies(n->keepalive_intv);
+			if (!mod_timer(&n->timer, intv))
 				tipc_node_get(n);
+		}
 	}
 	memcpy(&le->maddr, maddr, sizeof(*maddr));
 exit:
@@ -950,7 +950,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
 			state = SELF_UP_PEER_UP;
 			break;
 		case SELF_LOST_CONTACT_EVT:
-			state = SELF_DOWN_PEER_LEAVING;
+			state = SELF_DOWN_PEER_DOWN;
 			break;
 		case SELF_ESTABL_CONTACT_EVT:
 		case PEER_LOST_CONTACT_EVT:
@@ -969,7 +969,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
 			state = SELF_UP_PEER_UP;
 			break;
 		case PEER_LOST_CONTACT_EVT:
-			state = SELF_LEAVING_PEER_DOWN;
+			state = SELF_DOWN_PEER_DOWN;
 			break;
 		case SELF_LOST_CONTACT_EVT:
 		case PEER_ESTABL_CONTACT_EVT:
@@ -1297,10 +1297,6 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
 
 	rc = tipc_bcast_rcv(net, be->link, skb);
 
-	/* Broadcast link reset may happen at reassembly failure */
-	if (rc & TIPC_LINK_DOWN_EVT)
-		tipc_node_reset_links(n);
-
 	/* Broadcast ACKs are sent on a unicast link */
 	if (rc & TIPC_LINK_SND_BC_ACK) {
 		tipc_node_read_lock(n);
@@ -1320,6 +1316,17 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
 		spin_unlock_bh(&be->inputq2.lock);
 		tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2);
 	}
+
+	if (rc & TIPC_LINK_DOWN_EVT) {
+		/* Reception reassembly failure => reset all links to peer */
+		if (!tipc_link_is_up(be->link))
+			tipc_node_reset_links(n);
+
+		/* Retransmission failure => reset all links to all peers */
+		if (!tipc_link_is_up(tipc_bc_sndlink(net)))
+			tipc_bearer_reset_all(net);
+	}
+
 	tipc_node_put(n);
 }
 
@@ -1921,3 +1928,168 @@ out:
 
 	return skb->len;
 }
+
+int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *attrs[TIPC_NLA_MON_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+	int err;
+
+	if (!info->attrs[TIPC_NLA_MON])
+		return -EINVAL;
+
+	err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX,
+			       info->attrs[TIPC_NLA_MON],
+			       tipc_nl_monitor_policy);
+	if (err)
+		return err;
+
+	if (attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]) {
+		u32 val;
+
+		val = nla_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]);
+		err = tipc_nl_monitor_set_threshold(net, val);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg)
+{
+	struct nlattr *attrs;
+	void *hdr;
+	u32 val;
+
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+			  0, TIPC_NL_MON_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	attrs = nla_nest_start(msg->skb, TIPC_NLA_MON);
+	if (!attrs)
+		goto msg_full;
+
+	val = tipc_nl_monitor_get_threshold(net);
+
+	if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val))
+		goto attr_msg_full;
+
+	nla_nest_end(msg->skb, attrs);
+	genlmsg_end(msg->skb, hdr);
+
+	return 0;
+
+attr_msg_full:
+	nla_nest_cancel(msg->skb, attrs);
+msg_full:
+	genlmsg_cancel(msg->skb, hdr);
+
+	return -EMSGSIZE;
+}
+
+int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tipc_nl_msg msg;
+	int err;
+
+	msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	msg.portid = info->snd_portid;
+	msg.seq = info->snd_seq;
+
+	err = __tipc_nl_add_monitor_prop(net, &msg);
+	if (err) {
+		nlmsg_free(msg.skb);
+		return err;
+	}
+
+	return genlmsg_reply(msg.skb, info);
+}
+
+int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	u32 prev_bearer = cb->args[0];
+	struct tipc_nl_msg msg;
+	int err;
+	int i;
+
+	if (prev_bearer == MAX_BEARERS)
+		return 0;
+
+	msg.skb = skb;
+	msg.portid = NETLINK_CB(cb->skb).portid;
+	msg.seq = cb->nlh->nlmsg_seq;
+
+	rtnl_lock();
+	for (i = prev_bearer; i < MAX_BEARERS; i++) {
+		prev_bearer = i;
+		err = __tipc_nl_add_monitor(net, &msg, prev_bearer);
+		if (err)
+			goto out;
+	}
+
+out:
+	rtnl_unlock();
+	cb->args[0] = prev_bearer;
+
+	return skb->len;
+}
+
+int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	u32 prev_node = cb->args[1];
+	u32 bearer_id = cb->args[2];
+	int done = cb->args[0];
+	struct tipc_nl_msg msg;
+	int err;
+
+	if (!prev_node) {
+		struct nlattr **attrs;
+		struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
+
+		err = tipc_nlmsg_parse(cb->nlh, &attrs);
+		if (err)
+			return err;
+
+		if (!attrs[TIPC_NLA_MON])
+			return -EINVAL;
+
+		err = nla_parse_nested(mon, TIPC_NLA_MON_MAX,
+				       attrs[TIPC_NLA_MON],
+				       tipc_nl_monitor_policy);
+		if (err)
+			return err;
+
+		if (!mon[TIPC_NLA_MON_REF])
+			return -EINVAL;
+
+		bearer_id = nla_get_u32(mon[TIPC_NLA_MON_REF]);
+
+		if (bearer_id >= MAX_BEARERS)
+			return -EINVAL;
+	}
+
+	if (done)
+		return 0;
+
+	msg.skb = skb;
+	msg.portid = NETLINK_CB(cb->skb).portid;
+	msg.seq = cb->nlh->nlmsg_seq;
+
+	rtnl_lock();
+	err = tipc_nl_add_monitor_peer(net, &msg, bearer_id, &prev_node);
+	if (!err)
+		done = 1;
+
+	rtnl_unlock();
+	cb->args[0] = done;
+	cb->args[1] = prev_node;
+	cb->args[2] = bearer_id;
+
+	return skb->len;
+}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 8264b3d97dc4..d69fdfcc0ec9 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -78,4 +78,9 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info);
 
+int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
+				   struct netlink_callback *cb);
 #endif
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 272d20a795d5..215849ce453d 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -418,13 +418,12 @@ static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
 	if (!entry)
 		return NULL;
 
-	buf = kmalloc(len, GFP_ATOMIC);
+	buf = kmemdup(data, len, GFP_ATOMIC);
 	if (!buf) {
 		kfree(entry);
 		return NULL;
 	}
 
-	memcpy(buf, data, len);
 	entry->iov.iov_base = buf;
 	entry->iov.iov_len = len;
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 88bfcd707064..c49b8df438cb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -796,9 +796,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
  * @tsk: receiving socket
  * @skb: pointer to message buffer.
  */
-static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
+static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+			      struct sk_buff_head *xmitq)
 {
 	struct sock *sk = &tsk->sk;
+	u32 onode = tsk_own_node(tsk);
 	struct tipc_msg *hdr = buf_msg(skb);
 	int mtyp = msg_type(hdr);
 	bool conn_cong;
@@ -811,7 +813,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
 
 	if (mtyp == CONN_PROBE) {
 		msg_set_type(hdr, CONN_PROBE_REPLY);
-		tipc_sk_respond(sk, skb, TIPC_OK);
+		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
+			__skb_queue_tail(xmitq, skb);
 		return;
 	} else if (mtyp == CONN_ACK) {
 		conn_cong = tsk_conn_cong(tsk);
@@ -1686,7 +1689,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
  *
  * Returns true if message was added to socket receive queue, otherwise false
  */
-static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
+static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
+		       struct sk_buff_head *xmitq)
 {
 	struct socket *sock = sk->sk_socket;
 	struct tipc_sock *tsk = tipc_sk(sk);
@@ -1696,7 +1700,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
 	int usr = msg_user(hdr);
 
 	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
-		tipc_sk_proto_rcv(tsk, skb);
+		tipc_sk_proto_rcv(tsk, skb, xmitq);
 		return false;
 	}
 
@@ -1739,7 +1743,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
 	return true;
 
 reject:
-	tipc_sk_respond(sk, skb, err);
+	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
+		__skb_queue_tail(xmitq, skb);
 	return false;
 }
 
@@ -1755,9 +1760,24 @@ reject:
 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int truesize = skb->truesize;
+	struct sk_buff_head xmitq;
+	u32 dnode, selector;
 
-	if (likely(filter_rcv(sk, skb)))
+	__skb_queue_head_init(&xmitq);
+
+	if (likely(filter_rcv(sk, skb, &xmitq))) {
 		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
+		return 0;
+	}
+
+	if (skb_queue_empty(&xmitq))
+		return 0;
+
+	/* Send response/rejected message */
+	skb = __skb_dequeue(&xmitq);
+	dnode = msg_destnode(buf_msg(skb));
+	selector = msg_origport(buf_msg(skb));
+	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
 	return 0;
 }
 
@@ -1771,12 +1791,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
  * Caller must hold socket lock
  */
 static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
-			    u32 dport)
+			    u32 dport, struct sk_buff_head *xmitq)
 {
+	unsigned long time_limit = jiffies + 2;
+	struct sk_buff *skb;
 	unsigned int lim;
 	atomic_t *dcnt;
-	struct sk_buff *skb;
-	unsigned long time_limit = jiffies + 2;
+	u32 onode;
 
 	while (skb_queue_len(inputq)) {
 		if (unlikely(time_after_eq(jiffies, time_limit)))
@@ -1788,7 +1809,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 
 		/* Add message directly to receive queue if possible */
 		if (!sock_owned_by_user(sk)) {
-			filter_rcv(sk, skb);
+			filter_rcv(sk, skb, xmitq);
 			continue;
 		}
 
@@ -1801,7 +1822,9 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 			continue;
 
 		/* Overload => reject message back to sender */
-		tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD);
+		onode = tipc_own_addr(sock_net(sk));
+		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
+			__skb_queue_tail(xmitq, skb);
 		break;
 	}
 }
@@ -1814,12 +1837,14 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
  */
 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
 {
+	struct sk_buff_head xmitq;
 	u32 dnode, dport = 0;
 	int err;
 	struct tipc_sock *tsk;
 	struct sock *sk;
 	struct sk_buff *skb;
 
+	__skb_queue_head_init(&xmitq);
 	while (skb_queue_len(inputq)) {
 		dport = tipc_skb_peek_port(inputq, dport);
 		tsk = tipc_sk_lookup(net, dport);
@@ -1827,9 +1852,14 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
 		if (likely(tsk)) {
 			sk = &tsk->sk;
 			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
-				tipc_sk_enqueue(inputq, sk, dport);
+				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
 				spin_unlock_bh(&sk->sk_lock.slock);
 			}
+			/* Send pending response/rejected messages, if any */
+			while ((skb = __skb_dequeue(&xmitq))) {
+				dnode = msg_destnode(buf_msg(skb));
+				tipc_node_xmit_skb(net, skb, dnode, dport);
+			}
 			sock_put(sk);
 			continue;
 		}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index c9cf2be3674a..b016c011970b 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -63,7 +63,7 @@
  */
 struct udp_media_addr {
 	__be16	proto;
-	__be16	udp_port;
+	__be16	port;
 	union {
 		struct in_addr ipv4;
 		struct in6_addr ipv6;
@@ -108,9 +108,9 @@ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
 	struct udp_media_addr *ua = (struct udp_media_addr *)&a->value;
 
 	if (ntohs(ua->proto) == ETH_P_IP)
-		snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port));
+		snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port));
 	else if (ntohs(ua->proto) == ETH_P_IPV6)
-		snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port));
+		snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port));
 	else
 		pr_err("Invalid UDP media address\n");
 	return 0;
@@ -178,8 +178,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 		skb->dev = rt->dst.dev;
 		ttl = ip4_dst_hoplimit(&rt->dst);
 		udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
-				    dst->ipv4.s_addr, 0, ttl, 0, src->udp_port,
-				    dst->udp_port, false, true);
+				    dst->ipv4.s_addr, 0, ttl, 0, src->port,
+				    dst->port, false, true);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
 		struct dst_entry *ndst;
@@ -196,8 +196,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 		ttl = ip6_dst_hoplimit(ndst);
 		err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
 					   ndst->dev, &src->ipv6,
-					   &dst->ipv6, 0, ttl, 0, src->udp_port,
-					   dst->udp_port, false);
+					   &dst->ipv6, 0, ttl, 0, src->port,
+					   dst->port, false);
 #endif
 	}
 	return err;
@@ -292,12 +292,12 @@ err:
 
 		ip4 = (struct sockaddr_in *)&sa_local;
 		local->proto = htons(ETH_P_IP);
-		local->udp_port = ip4->sin_port;
+		local->port = ip4->sin_port;
 		local->ipv4.s_addr = ip4->sin_addr.s_addr;
 
 		ip4 = (struct sockaddr_in *)&sa_remote;
 		remote->proto = htons(ETH_P_IP);
-		remote->udp_port = ip4->sin_port;
+		remote->port = ip4->sin_port;
 		remote->ipv4.s_addr = ip4->sin_addr.s_addr;
 		return 0;
 
@@ -312,13 +312,13 @@ err:
 			return -EINVAL;
 
 		local->proto = htons(ETH_P_IPV6);
-		local->udp_port = ip6->sin6_port;
+		local->port = ip6->sin6_port;
 		memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
 		ub->ifindex = ip6->sin6_scope_id;
 
 		ip6 = (struct sockaddr_in6 *)&sa_remote;
 		remote->proto = htons(ETH_P_IPV6);
-		remote->udp_port = ip6->sin6_port;
+		remote->port = ip6->sin6_port;
 		memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
 		return 0;
 #endif
@@ -386,7 +386,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 		err = -EAFNOSUPPORT;
 		goto err;
 	}
-	udp_conf.local_udp_port = local.udp_port;
+	udp_conf.local_udp_port = local.port;
 	err = udp_sock_create(net, &udp_conf, &ub->ubsock);
 	if (err)
 		goto err;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 735362c26c8e..f1dffe84f0d5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -769,6 +769,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 	lockdep_set_class(&sk->sk_receive_queue.lock,
 				&af_unix_sk_receive_queue_lock_key);
 
+	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
 	sk->sk_write_space	= unix_write_space;
 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
 	sk->sk_destruct		= unix_sock_destructor;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b5f1221f48d4..b96ac918e0ba 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -61,6 +61,14 @@
  * function will also cleanup rejected sockets, those that reach the connected
  * state but leave it before they have been accepted.
  *
+ * - Lock ordering for pending or accept queue sockets is:
+ *
+ *     lock_sock(listener);
+ *     lock_sock_nested(pending, SINGLE_DEPTH_NESTING);
+ *
+ * Using explicit nested locking keeps lockdep happy since normally only one
+ * lock of a given class may be taken at a time.
+ *
  * - Sockets created by user action will be cleaned up when the user process
  * calls close(2), causing our release implementation to be called. Our release
  * implementation will perform some cleanup then drop the last reference so our
@@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work)
 	cleanup = true;
 
 	lock_sock(listener);
-	lock_sock(sk);
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 
 	if (vsock_is_pending(sk)) {
 		vsock_remove_pending(listener, sk);
@@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
 	if (connected) {
 		listener->sk_ack_backlog--;
 
-		lock_sock(connected);
+		lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
 		vconnected = vsock_sk(connected);
 
 		/* If the listener socket has received an error, then we should
diff --git a/net/wireless/core.c b/net/wireless/core.c
index ecca3896b9f7..7645e97362c0 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -220,7 +220,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 
 	if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
 		if (WARN_ON(!rdev->scan_req->notified))
-			rdev->scan_req->aborted = true;
+			rdev->scan_req->info.aborted = true;
 		___cfg80211_scan_done(rdev, false);
 	}
 }
@@ -748,6 +748,36 @@ int wiphy_register(struct wiphy *wiphy)
 		nl80211_send_reg_change_event(&request);
 	}
 
+	/* Check that nobody globally advertises any capabilities they do not
+	 * advertise on all possible interface types.
+	 */
+	if (wiphy->extended_capabilities_len &&
+	    wiphy->num_iftype_ext_capab &&
+	    wiphy->iftype_ext_capab) {
+		u8 supported_on_all, j;
+		const struct wiphy_iftype_ext_capab *capab;
+
+		capab = wiphy->iftype_ext_capab;
+		for (j = 0; j < wiphy->extended_capabilities_len; j++) {
+			if (capab[0].extended_capabilities_len > j)
+				supported_on_all =
+					capab[0].extended_capabilities[j];
+			else
+				supported_on_all = 0x00;
+			for (i = 1; i < wiphy->num_iftype_ext_capab; i++) {
+				if (j >= capab[i].extended_capabilities_len) {
+					supported_on_all = 0x00;
+					break;
+				}
+				supported_on_all &=
+					capab[i].extended_capabilities[j];
+			}
+			if (WARN_ON(wiphy->extended_capabilities[j] &
+				    ~supported_on_all))
+				break;
+		}
+	}
+
 	rdev->wiphy.registered = true;
 	rtnl_unlock();
 
@@ -1057,7 +1087,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		cfg80211_update_iface_num(rdev, wdev->iftype, -1);
 		if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
 			if (WARN_ON(!rdev->scan_req->notified))
-				rdev->scan_req->aborted = true;
+				rdev->scan_req->info.aborted = true;
 			___cfg80211_scan_done(rdev, false);
 		}
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 025b7a5d508b..eee91443924d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -141,6 +141,18 @@ struct cfg80211_internal_bss {
 	unsigned long refcount;
 	atomic_t hold;
 
+	/* time at the start of the reception of the first octet of the
+	 * timestamp field of the last beacon/probe received for this BSS.
+	 * The time is the TSF of the BSS specified by %parent_bssid.
+	 */
+	u64 parent_tsf;
+
+	/* the BSS according to which %parent_tsf is set. This is set to
+	 * the BSS that the interface that requested the scan was connected to
+	 * when the beacon/probe was received.
+	 */
+	u8 parent_bssid[ETH_ALEN] __aligned(2);
+
 	/* must be last because of priv member */
 	struct cfg80211_bss pub;
 };
@@ -214,7 +226,7 @@ struct cfg80211_event {
 			size_t req_ie_len;
 			size_t resp_ie_len;
 			struct cfg80211_bss *bss;
-			u16 status;
+			int status; /* -1 = failed; 0..65535 = status code */
 		} cr;
 		struct {
 			const u8 *req_ie;
@@ -374,7 +386,7 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			       const u8 *req_ie, size_t req_ie_len,
 			       const u8 *resp_ie, size_t resp_ie_len,
-			       u16 status, bool wextev,
+			       int status, bool wextev,
 			       struct cfg80211_bss *bss);
 void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 			     size_t ie_len, u16 reason, bool from_ap);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d7599014055d..46417f9cce68 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -167,6 +167,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
 
 	if (attrs[NL80211_ATTR_IFINDEX]) {
 		int ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]);
+
 		netdev = __dev_get_by_index(netns, ifindex);
 		if (netdev) {
 			if (netdev->ieee80211_ptr)
@@ -404,6 +405,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_PBSS] = { .type = NLA_FLAG },
 	[NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED },
 	[NL80211_ATTR_STA_SUPPORT_P2P_PS] = { .type = NLA_U8 },
+	[NL80211_ATTR_MU_MIMO_GROUP_DATA] = {
+		.len = VHT_MUMIMO_GROUPS_DATA_LEN
+	},
+	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
 };
 
 /* policy for the key attributes */
@@ -731,6 +736,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
 
 	if (tb[NL80211_KEY_DEFAULT_TYPES]) {
 		struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
+
 		err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
 				       tb[NL80211_KEY_DEFAULT_TYPES],
 				       nl80211_key_default_policy);
@@ -1264,7 +1270,7 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg,
 struct nl80211_dump_wiphy_state {
 	s64 filter_wiphy;
 	long start;
-	long split_start, band_start, chan_start;
+	long split_start, band_start, chan_start, capa_start;
 	bool split;
 };
 
@@ -1382,6 +1388,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 		    rdev->ops->get_antenna) {
 			u32 tx_ant = 0, rx_ant = 0;
 			int res;
+
 			res = rdev_get_antenna(rdev, &tx_ant, &rx_ant);
 			if (!res) {
 				if (nla_put_u32(msg,
@@ -1761,6 +1768,47 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			nla_nest_end(msg, nested);
 		}
 
+		state->split_start++;
+		break;
+	case 13:
+		if (rdev->wiphy.num_iftype_ext_capab &&
+		    rdev->wiphy.iftype_ext_capab) {
+			struct nlattr *nested_ext_capab, *nested;
+
+			nested = nla_nest_start(msg,
+						NL80211_ATTR_IFTYPE_EXT_CAPA);
+			if (!nested)
+				goto nla_put_failure;
+
+			for (i = state->capa_start;
+			     i < rdev->wiphy.num_iftype_ext_capab; i++) {
+				const struct wiphy_iftype_ext_capab *capab;
+
+				capab = &rdev->wiphy.iftype_ext_capab[i];
+
+				nested_ext_capab = nla_nest_start(msg, i);
+				if (!nested_ext_capab ||
+				    nla_put_u32(msg, NL80211_ATTR_IFTYPE,
+						capab->iftype) ||
+				    nla_put(msg, NL80211_ATTR_EXT_CAPA,
+					    capab->extended_capabilities_len,
+					    capab->extended_capabilities) ||
+				    nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
+					    capab->extended_capabilities_len,
+					    capab->extended_capabilities_mask))
+					goto nla_put_failure;
+
+				nla_nest_end(msg, nested_ext_capab);
+				if (state->split)
+					break;
+			}
+			nla_nest_end(msg, nested);
+			if (i < rdev->wiphy.num_iftype_ext_capab) {
+				state->capa_start = i + 1;
+				break;
+			}
+		}
+
 		/* done */
 		state->split_start = 0;
 		break;
@@ -2116,7 +2164,6 @@ static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_wds_peer(rdev, dev, bssid);
 }
 
-
 static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
@@ -2251,6 +2298,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] &&
 	    info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) {
 		u32 tx_ant, rx_ant;
+
 		if ((!rdev->wiphy.available_antennas_tx &&
 		     !rdev->wiphy.available_antennas_rx) ||
 		    !rdev->ops->set_antenna)
@@ -2651,6 +2699,38 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 		change = true;
 	}
 
+	if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) {
+		const u8 *mumimo_groups;
+		u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+		if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+			return -EOPNOTSUPP;
+
+		mumimo_groups =
+			nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]);
+
+		/* bits 0 and 63 are reserved and must be zero */
+		if ((mumimo_groups[0] & BIT(7)) ||
+		    (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0)))
+			return -EINVAL;
+
+		memcpy(params.vht_mumimo_groups, mumimo_groups,
+		       VHT_MUMIMO_GROUPS_DATA_LEN);
+		change = true;
+	}
+
+	if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) {
+		u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+		if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+			return -EOPNOTSUPP;
+
+		nla_memcpy(params.macaddr,
+			   info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR],
+			   ETH_ALEN);
+		change = true;
+	}
+
 	if (flags && (*flags & MONITOR_FLAG_ACTIVE) &&
 	    !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
 		return -EOPNOTSUPP;
@@ -2919,6 +2999,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	pairwise = !!mac_addr;
 	if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
 		u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
+
 		if (kt >= NUM_NL80211_KEYTYPES)
 			return -EINVAL;
 		if (kt != NL80211_KEYTYPE_GROUP &&
@@ -3487,16 +3568,16 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 		params.smps_mode = NL80211_SMPS_OFF;
 	}
 
+	params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
+	if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ])
+		return -EOPNOTSUPP;
+
 	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
 		params.acl = parse_acl_data(&rdev->wiphy, info);
 		if (IS_ERR(params.acl))
 			return PTR_ERR(params.acl);
 	}
 
-	params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
-	if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ])
-		return -EOPNOTSUPP;
-
 	wdev_lock(wdev);
 	err = rdev_start_ap(rdev, dev, &params);
 	if (!err) {
@@ -3962,7 +4043,6 @@ static int nl80211_dump_station(struct sk_buff *skb,
 		sta_idx++;
 	}
 
-
  out:
 	cb->args[2] = sta_idx;
 	err = skb->len;
@@ -4366,6 +4446,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
 		if (params.plink_state >= NUM_NL80211_PLINK_STATES)
 			return -EINVAL;
+		if (info->attrs[NL80211_ATTR_MESH_PEER_AID]) {
+			params.peer_aid = nla_get_u16(
+				info->attrs[NL80211_ATTR_MESH_PEER_AID]);
+			if (params.peer_aid > IEEE80211_MAX_AID)
+				return -EINVAL;
+		}
 		params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
 	}
 
@@ -4763,7 +4849,6 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 		path_idx++;
 	}
 
-
  out:
 	cb->args[2] = path_idx;
 	err = skb->len;
@@ -5053,7 +5138,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 	enum nl80211_user_reg_hint_type user_reg_hint_type;
 	u32 owner_nlportid;
 
-
 	/*
 	 * You should only get this when cfg80211 hasn't yet initialized
 	 * completely when built-in to the kernel right between the time
@@ -5245,6 +5329,51 @@ static const struct nla_policy
 	[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
 };
 
+static int nl80211_check_bool(const struct nlattr *nla, u8 min, u8 max, bool *out)
+{
+	u8 val = nla_get_u8(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
+static int nl80211_check_u8(const struct nlattr *nla, u8 min, u8 max, u8 *out)
+{
+	u8 val = nla_get_u8(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
+static int nl80211_check_u16(const struct nlattr *nla, u16 min, u16 max, u16 *out)
+{
+	u16 val = nla_get_u16(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
+static int nl80211_check_u32(const struct nlattr *nla, u32 min, u32 max, u32 *out)
+{
+	u32 val = nla_get_u32(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
+static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *out)
+{
+	s32 val = nla_get_s32(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
 static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
@@ -5255,14 +5384,12 @@ static int nl80211_parse_mesh_config(struct genl_info *info,
 #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
 do {									    \
 	if (tb[attr]) {							    \
-		if (fn(tb[attr]) < min || fn(tb[attr]) > max)		    \
+		if (fn(tb[attr], min, max, &cfg->param))		    \
 			return -EINVAL;					    \
-		cfg->param = fn(tb[attr]);				    \
 		mask |= (1 << (attr - 1));				    \
 	}								    \
 } while (0)
 
-
 	if (!info->attrs[NL80211_ATTR_MESH_CONFIG])
 		return -EINVAL;
 	if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
@@ -5277,99 +5404,99 @@ do {									    \
 	/* Fill in the params struct */
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_RETRY_TIMEOUT,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_CONFIRM_TIMEOUT,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_HOLDING_TIMEOUT,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255,
 				  mask, NL80211_MESHCONF_MAX_PEER_LINKS,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16,
 				  mask, NL80211_MESHCONF_MAX_RETRIES,
-				  nla_get_u8);
+				  nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255,
-				  mask, NL80211_MESHCONF_TTL, nla_get_u8);
+				  mask, NL80211_MESHCONF_TTL, nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255,
 				  mask, NL80211_MESHCONF_ELEMENT_TTL,
-				  nla_get_u8);
+				  nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1,
 				  mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
-				  nla_get_u8);
+				  nl80211_check_bool);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor,
 				  1, 255, mask,
 				  NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255,
 				  mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
-				  nla_get_u8);
+				  nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535,
 				  mask, NL80211_MESHCONF_PATH_REFRESH_TIME,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535,
 				  mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
 				  dot11MeshHWMPnetDiameterTraversalTime,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4,
 				  mask, NL80211_MESHCONF_HWMP_ROOTMODE,
-				  nla_get_u8);
+				  nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535,
 				  mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
 				  dot11MeshGateAnnouncementProtocol, 0, 1,
 				  mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
-				  nla_get_u8);
+				  nl80211_check_bool);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1,
 				  mask, NL80211_MESHCONF_FORWARDING,
-				  nla_get_u8);
+				  nl80211_check_bool);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
 				  mask, NL80211_MESHCONF_RSSI_THRESHOLD,
-				  nla_get_s32);
+				  nl80211_check_s32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
 				  mask, NL80211_MESHCONF_HT_OPMODE,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535,
 				  mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
 				  dot11MeshHWMPconfirmationInterval,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode,
 				  NL80211_MESH_POWER_ACTIVE,
 				  NL80211_MESH_POWER_MAX,
 				  mask, NL80211_MESHCONF_POWER_MODE,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
 				  0, 65535, mask,
-				  NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
+				  NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff,
 				  mask, NL80211_MESHCONF_PLINK_TIMEOUT,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	if (mask_out)
 		*mask_out = mask;
 
@@ -5409,7 +5536,6 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		 IEEE80211_PATH_METRIC_VENDOR :
 		 IEEE80211_PATH_METRIC_AIRTIME;
 
-
 	if (tb[NL80211_MESH_SETUP_IE]) {
 		struct nlattr *ieattr =
 			tb[NL80211_MESH_SETUP_IE];
@@ -5796,10 +5922,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	r = set_regdom(rd, REGD_SOURCE_CRDA);
-	/* set_regdom took ownership */
-	rd = NULL;
-
+	/* set_regdom takes ownership of rd */
+	return set_regdom(rd, REGD_SOURCE_CRDA);
  bad_reg:
 	kfree(rd);
 	return r;
@@ -6033,6 +6157,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		/* all channels */
 		for (band = 0; band < NUM_NL80211_BANDS; band++) {
 			int j;
+
 			if (!wiphy->bands[band])
 				continue;
 			for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
@@ -6104,6 +6229,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) {
+		if (!wiphy_ext_feature_isset(wiphy,
+					NL80211_EXT_FEATURE_SET_SCAN_DWELL)) {
+			err = -EOPNOTSUPP;
+			goto out_free;
+		}
+
+		request->duration =
+			nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]);
+		request->duration_mandatory =
+			nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
+	}
+
 	if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
 		request->flags = nla_get_u32(
 			info->attrs[NL80211_ATTR_SCAN_FLAGS]);
@@ -6442,6 +6580,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 		/* all channels */
 		for (band = 0; band < NUM_NL80211_BANDS; band++) {
 			int j;
+
 			if (!wiphy->bands[band])
 				continue;
 			for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
@@ -6511,7 +6650,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 				       nla_data(ssid), nla_len(ssid));
 				request->match_sets[i].ssid.ssid_len =
 					nla_len(ssid);
-				/* special attribute - old implemenation w/a */
+				/* special attribute - old implementation w/a */
 				request->match_sets[i].rssi_thold =
 					default_match_rssi;
 				rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
@@ -6936,6 +7075,13 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 			jiffies_to_msecs(jiffies - intbss->ts)))
 		goto nla_put_failure;
 
+	if (intbss->parent_tsf &&
+	    (nla_put_u64_64bit(msg, NL80211_BSS_PARENT_TSF,
+			       intbss->parent_tsf, NL80211_BSS_PAD) ||
+	     nla_put(msg, NL80211_BSS_PARENT_BSSID, ETH_ALEN,
+		     intbss->parent_bssid)))
+		goto nla_put_failure;
+
 	if (intbss->ts_boottime &&
 	    nla_put_u64_64bit(msg, NL80211_BSS_LAST_SEEN_BOOTTIME,
 			      intbss->ts_boottime, NL80211_BSS_PAD))
@@ -7204,6 +7350,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	if (key.idx >= 0) {
 		int i;
 		bool ok = false;
+
 		for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) {
 			if (key.p.cipher == rdev->wiphy.cipher_suites[i]) {
 				ok = true;
@@ -7282,6 +7429,7 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 
 	if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
 		u16 proto;
+
 		proto = nla_get_u16(
 			info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
 		settings->control_port_ethertype = cpu_to_be16(proto);
@@ -8435,6 +8583,7 @@ static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
 	for (i = 0; i < rates_len; i++) {
 		int rate = (rates[i] & 0x7f) * 5;
 		int ridx;
+
 		for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
 			struct ieee80211_rate *srate =
 				&sband->bitrates[ridx];
@@ -8743,7 +8892,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 		if (params.wait < NL80211_MIN_REMAIN_ON_CHANNEL_TIME ||
 		    params.wait > rdev->wiphy.max_remain_on_channel_duration)
 			return -EINVAL;
-
 	}
 
 	params.offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK];
@@ -10590,7 +10738,6 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply);
 
-
 static int nl80211_set_qos_map(struct sk_buff *skb,
 			       struct genl_info *info)
 {
@@ -10945,7 +11092,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_WIPHY,
 		.doit = nl80211_set_wiphy,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_RTNL,
 	},
 	{
@@ -10961,7 +11108,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_INTERFACE,
 		.doit = nl80211_set_interface,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -10969,7 +11116,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_NEW_INTERFACE,
 		.doit = nl80211_new_interface,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -10977,7 +11124,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DEL_INTERFACE,
 		.doit = nl80211_del_interface,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -10985,7 +11132,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_GET_KEY,
 		.doit = nl80211_get_key,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -10993,7 +11140,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_KEY,
 		.doit = nl80211_set_key,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL |
 				  NL80211_FLAG_CLEAR_SKB,
@@ -11002,7 +11149,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_NEW_KEY,
 		.doit = nl80211_new_key,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL |
 				  NL80211_FLAG_CLEAR_SKB,
@@ -11011,14 +11158,14 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DEL_KEY,
 		.doit = nl80211_del_key,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_BEACON,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.doit = nl80211_set_beacon,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
@@ -11026,7 +11173,7 @@ static const struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_START_AP,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.doit = nl80211_start_ap,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
@@ -11034,7 +11181,7 @@ static const struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_STOP_AP,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.doit = nl80211_stop_ap,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
@@ -11051,7 +11198,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_STATION,
 		.doit = nl80211_set_station,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11059,7 +11206,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_NEW_STATION,
 		.doit = nl80211_new_station,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11067,7 +11214,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DEL_STATION,
 		.doit = nl80211_del_station,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11076,7 +11223,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.doit = nl80211_get_mpath,
 		.dumpit = nl80211_dump_mpath,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11085,7 +11232,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.doit = nl80211_get_mpp,
 		.dumpit = nl80211_dump_mpp,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11093,7 +11240,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_MPATH,
 		.doit = nl80211_set_mpath,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11101,7 +11248,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_NEW_MPATH,
 		.doit = nl80211_new_mpath,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11109,7 +11256,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DEL_MPATH,
 		.doit = nl80211_del_mpath,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11117,7 +11264,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_BSS,
 		.doit = nl80211_set_bss,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11156,7 +11303,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_MESH_CONFIG,
 		.doit = nl80211_update_mesh_config,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11164,7 +11311,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_TRIGGER_SCAN,
 		.doit = nl80211_trigger_scan,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11172,7 +11319,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_ABORT_SCAN,
 		.doit = nl80211_abort_scan,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11185,7 +11332,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_START_SCHED_SCAN,
 		.doit = nl80211_start_sched_scan,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11193,7 +11340,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_STOP_SCHED_SCAN,
 		.doit = nl80211_stop_sched_scan,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11201,7 +11348,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_AUTHENTICATE,
 		.doit = nl80211_authenticate,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL |
 				  NL80211_FLAG_CLEAR_SKB,
@@ -11210,7 +11357,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_ASSOCIATE,
 		.doit = nl80211_associate,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11218,7 +11365,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DEAUTHENTICATE,
 		.doit = nl80211_deauthenticate,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11226,7 +11373,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DISASSOCIATE,
 		.doit = nl80211_disassociate,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11234,7 +11381,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_JOIN_IBSS,
 		.doit = nl80211_join_ibss,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11242,7 +11389,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_LEAVE_IBSS,
 		.doit = nl80211_leave_ibss,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11252,7 +11399,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.doit = nl80211_testmode_do,
 		.dumpit = nl80211_testmode_dump,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11261,7 +11408,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_CONNECT,
 		.doit = nl80211_connect,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11269,7 +11416,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DISCONNECT,
 		.doit = nl80211_disconnect,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11277,7 +11424,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_WIPHY_NETNS,
 		.doit = nl80211_wiphy_netns,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11290,7 +11437,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_PMKSA,
 		.doit = nl80211_setdel_pmksa,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11298,7 +11445,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DEL_PMKSA,
 		.doit = nl80211_setdel_pmksa,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11306,7 +11453,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_FLUSH_PMKSA,
 		.doit = nl80211_flush_pmksa,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11314,7 +11461,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
 		.doit = nl80211_remain_on_channel,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11322,7 +11469,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
 		.doit = nl80211_cancel_remain_on_channel,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11330,7 +11477,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
 		.doit = nl80211_set_tx_bitrate_mask,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11338,7 +11485,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_REGISTER_FRAME,
 		.doit = nl80211_register_mgmt,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11346,7 +11493,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_FRAME,
 		.doit = nl80211_tx_mgmt,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11354,7 +11501,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_FRAME_WAIT_CANCEL,
 		.doit = nl80211_tx_mgmt_cancel_wait,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11362,7 +11509,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_POWER_SAVE,
 		.doit = nl80211_set_power_save,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11378,7 +11525,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_CQM,
 		.doit = nl80211_set_cqm,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11386,7 +11533,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_CHANNEL,
 		.doit = nl80211_set_channel,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11394,7 +11541,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_WDS_PEER,
 		.doit = nl80211_set_wds_peer,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11402,7 +11549,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_JOIN_MESH,
 		.doit = nl80211_join_mesh,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11410,7 +11557,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_LEAVE_MESH,
 		.doit = nl80211_leave_mesh,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11418,7 +11565,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_JOIN_OCB,
 		.doit = nl80211_join_ocb,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11426,7 +11573,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_LEAVE_OCB,
 		.doit = nl80211_leave_ocb,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11443,7 +11590,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_WOWLAN,
 		.doit = nl80211_set_wowlan,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11452,7 +11599,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_REKEY_OFFLOAD,
 		.doit = nl80211_set_rekey_data,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL |
 				  NL80211_FLAG_CLEAR_SKB,
@@ -11461,7 +11608,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_TDLS_MGMT,
 		.doit = nl80211_tdls_mgmt,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11469,7 +11616,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_TDLS_OPER,
 		.doit = nl80211_tdls_oper,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11477,7 +11624,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_UNEXPECTED_FRAME,
 		.doit = nl80211_register_unexpected_frame,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11485,7 +11632,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_PROBE_CLIENT,
 		.doit = nl80211_probe_client,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11493,7 +11640,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_REGISTER_BEACONS,
 		.doit = nl80211_register_beacons,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11501,7 +11648,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_NOACK_MAP,
 		.doit = nl80211_set_noack_map,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11509,7 +11656,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_START_P2P_DEVICE,
 		.doit = nl80211_start_p2p_device,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11517,7 +11664,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_STOP_P2P_DEVICE,
 		.doit = nl80211_stop_p2p_device,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11525,7 +11672,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_MCAST_RATE,
 		.doit = nl80211_set_mcast_rate,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11533,7 +11680,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_MAC_ACL,
 		.doit = nl80211_set_mac_acl,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11541,7 +11688,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_RADAR_DETECT,
 		.doit = nl80211_start_radar_detection,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11554,7 +11701,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_UPDATE_FT_IES,
 		.doit = nl80211_update_ft_ies,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11562,7 +11709,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_CRIT_PROTOCOL_START,
 		.doit = nl80211_crit_protocol_start,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11570,7 +11717,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
 		.doit = nl80211_crit_protocol_stop,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11585,7 +11732,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_COALESCE,
 		.doit = nl80211_set_coalesce,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11593,7 +11740,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_CHANNEL_SWITCH,
 		.doit = nl80211_channel_switch,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11602,7 +11749,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.doit = nl80211_vendor_cmd,
 		.dumpit = nl80211_vendor_cmd_dump,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11610,7 +11757,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_SET_QOS_MAP,
 		.doit = nl80211_set_qos_map,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11618,7 +11765,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_ADD_TX_TS,
 		.doit = nl80211_add_tx_ts,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11626,7 +11773,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_DEL_TX_TS,
 		.doit = nl80211_del_tx_ts,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11634,7 +11781,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH,
 		.doit = nl80211_tdls_channel_switch,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11642,7 +11789,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
 		.doit = nl80211_tdls_cancel_channel_switch,
 		.policy = nl80211_policy,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
@@ -11708,6 +11855,13 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
 	    nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags))
 		goto nla_put_failure;
 
+	if (req->info.scan_start_tsf &&
+	    (nla_put_u64_64bit(msg, NL80211_ATTR_SCAN_START_TIME_TSF,
+			       req->info.scan_start_tsf, NL80211_BSS_PAD) ||
+	     nla_put(msg, NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, ETH_ALEN,
+		     req->info.tsf_bssid)))
+		goto nla_put_failure;
+
 	return 0;
  nla_put_failure:
 	return -ENOBUFS;
@@ -12092,7 +12246,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 				 struct net_device *netdev, const u8 *bssid,
 				 const u8 *req_ie, size_t req_ie_len,
 				 const u8 *resp_ie, size_t resp_ie_len,
-				 u16 status, gfp_t gfp)
+				 int status, gfp_t gfp)
 {
 	struct sk_buff *msg;
 	void *hdr;
@@ -12110,7 +12264,10 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
 	    (bssid && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) ||
-	    nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, status) ||
+	    nla_put_u16(msg, NL80211_ATTR_STATUS_CODE,
+			status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE :
+			status) ||
+	    (status < 0 && nla_put_flag(msg, NL80211_ATTR_TIMED_OUT)) ||
 	    (req_ie &&
 	     nla_put(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie)) ||
 	    (resp_ie &&
@@ -12126,7 +12283,6 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
-
 }
 
 void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
@@ -12165,7 +12321,6 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
-
 }
 
 void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
@@ -12203,7 +12358,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
-
 }
 
 void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
@@ -13545,7 +13699,6 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
 	if (hdr)
 		genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
-
 }
 EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
 
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 84d4edf1d545..a63f402b10b7 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -55,7 +55,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 				 struct net_device *netdev, const u8 *bssid,
 				 const u8 *req_ie, size_t req_ie_len,
 				 const u8 *resp_ie, size_t resp_ie_len,
-				 u16 status, gfp_t gfp);
+				 int status, gfp_t gfp);
 void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 			 struct net_device *netdev, const u8 *bssid,
 			 const u8 *req_ie, size_t req_ie_len,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ef2955c89a00..0358e12be54b 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -3,6 +3,7 @@
  *
  * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2016	Intel Deutschland GmbH
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -194,7 +195,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
 	if (wdev->netdev)
 		cfg80211_sme_scan_done(wdev->netdev);
 
-	if (!request->aborted &&
+	if (!request->info.aborted &&
 	    request->flags & NL80211_SCAN_FLAG_FLUSH) {
 		/* flush entries from previous scans */
 		spin_lock_bh(&rdev->bss_lock);
@@ -202,10 +203,10 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
 		spin_unlock_bh(&rdev->bss_lock);
 	}
 
-	msg = nl80211_build_scan_msg(rdev, wdev, request->aborted);
+	msg = nl80211_build_scan_msg(rdev, wdev, request->info.aborted);
 
 #ifdef CONFIG_CFG80211_WEXT
-	if (wdev->netdev && !request->aborted) {
+	if (wdev->netdev && !request->info.aborted) {
 		memset(&wrqu, 0, sizeof(wrqu));
 
 		wireless_send_event(wdev->netdev, SIOCGIWSCAN, &wrqu, NULL);
@@ -236,12 +237,13 @@ void __cfg80211_scan_done(struct work_struct *wk)
 	rtnl_unlock();
 }
 
-void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
+void cfg80211_scan_done(struct cfg80211_scan_request *request,
+			struct cfg80211_scan_info *info)
 {
-	trace_cfg80211_scan_done(request, aborted);
+	trace_cfg80211_scan_done(request, info);
 	WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req);
 
-	request->aborted = aborted;
+	request->info = *info;
 	request->notified = true;
 	queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk);
 }
@@ -843,6 +845,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 		found->pub.capability = tmp->pub.capability;
 		found->ts = tmp->ts;
 		found->ts_boottime = tmp->ts_boottime;
+		found->parent_tsf = tmp->parent_tsf;
+		ether_addr_copy(found->parent_bssid, tmp->parent_bssid);
 	} else {
 		struct cfg80211_internal_bss *new;
 		struct cfg80211_internal_bss *hidden;
@@ -1086,6 +1090,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
 	tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
 	tmp.ts_boottime = data->boottime_ns;
+	tmp.parent_tsf = data->parent_tsf;
+	ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
 
 	signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
 		wiphy->max_adj_channel_rssi_comp;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 584fdc347221..add6824c44fd 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -244,9 +244,7 @@ void cfg80211_conn_work(struct work_struct *work)
 		if (cfg80211_conn_do_work(wdev)) {
 			__cfg80211_connect_result(
 					wdev->netdev, bssid,
-					NULL, 0, NULL, 0,
-					WLAN_STATUS_UNSPECIFIED_FAILURE,
-					false, NULL);
+					NULL, 0, NULL, 0, -1, false, NULL);
 		}
 		wdev_unlock(wdev);
 	}
@@ -648,7 +646,7 @@ static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
 void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			       const u8 *req_ie, size_t req_ie_len,
 			       const u8 *resp_ie, size_t resp_ie_len,
-			       u16 status, bool wextev,
+			       int status, bool wextev,
 			       struct cfg80211_bss *bss)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -757,7 +755,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid,
 			  struct cfg80211_bss *bss, const u8 *req_ie,
 			  size_t req_ie_len, const u8 *resp_ie,
-			  size_t resp_ie_len, u16 status, gfp_t gfp)
+			  size_t resp_ie_len, int status, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 3c1091ae6c36..72b5255cefe2 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2642,8 +2642,9 @@ TRACE_EVENT(cfg80211_tdls_oper_request,
 	);
 
 TRACE_EVENT(cfg80211_scan_done,
-	TP_PROTO(struct cfg80211_scan_request *request, bool aborted),
-	TP_ARGS(request, aborted),
+	TP_PROTO(struct cfg80211_scan_request *request,
+		 struct cfg80211_scan_info *info),
+	TP_ARGS(request, info),
 	TP_STRUCT__entry(
 		__field(u32, n_channels)
 		__dynamic_array(u8, ie, request ? request->ie_len : 0)
@@ -2652,6 +2653,8 @@ TRACE_EVENT(cfg80211_scan_done,
 		MAC_ENTRY(wiphy_mac)
 		__field(bool, no_cck)
 		__field(bool, aborted)
+		__field(u64, scan_start_tsf)
+		MAC_ENTRY(tsf_bssid)
 	),
 	TP_fast_assign(
 		if (request) {
@@ -2666,9 +2669,16 @@ TRACE_EVENT(cfg80211_scan_done,
 					   request->wiphy->perm_addr);
 			__entry->no_cck = request->no_cck;
 		}
-		__entry->aborted = aborted;
+		if (info) {
+			__entry->aborted = info->aborted;
+			__entry->scan_start_tsf = info->scan_start_tsf;
+			MAC_ASSIGN(tsf_bssid, info->tsf_bssid);
+		}
 	),
-	TP_printk("aborted: %s", BOOL_TO_STR(__entry->aborted))
+	TP_printk("aborted: %s, scan start (TSF): %llu, tsf_bssid: " MAC_PR_FMT,
+		  BOOL_TO_STR(__entry->aborted),
+		  (unsigned long long)__entry->scan_start_tsf,
+		  MAC_PR_ARG(tsf_bssid))
 );
 
 DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_results,
@@ -2721,6 +2731,8 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
 		__dynamic_array(u8, mgmt, len)
 		__field(s32, signal)
 		__field(u64, ts_boottime)
+		__field(u64, parent_tsf)
+		MAC_ENTRY(parent_bssid)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
@@ -2730,10 +2742,15 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
 			memcpy(__get_dynamic_array(mgmt), mgmt, len);
 		__entry->signal = data->signal;
 		__entry->ts_boottime = data->boottime_ns;
-	),
-	TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d, tsb:%llu",
-		  WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
-		  __entry->signal, (unsigned long long)__entry->ts_boottime)
+		__entry->parent_tsf = data->parent_tsf;
+		MAC_ASSIGN(parent_bssid, data->parent_bssid);
+	),
+	TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT
+		  "(scan_width: %d) signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: "
+		  MAC_PR_FMT, WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
+		  __entry->signal, (unsigned long long)__entry->ts_boottime,
+		  (unsigned long long)__entry->parent_tsf,
+		  MAC_PR_ARG(parent_bssid))
 );
 
 DECLARE_EVENT_CLASS(cfg80211_bss_evt,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4e809e978b7d..b7d1592bd5b8 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -509,7 +509,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
 		 * replace EtherType */
 		hdrlen += ETH_ALEN + 2;
 	else
-		tmp.h_proto = htons(skb->len);
+		tmp.h_proto = htons(skb->len - hdrlen);
 
 	pskb_pull(skb, hdrlen);
 
@@ -721,6 +721,8 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
 	 * alignment since sizeof(struct ethhdr) is 14.
 	 */
 	frame = dev_alloc_skb(hlen + sizeof(struct ethhdr) + 2 + cur_len);
+	if (!frame)
+		return NULL;
 
 	skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2);
 	skb_copy_bits(skb, offset, skb_put(frame, cur_len), cur_len);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0bf2478cb7df..90ebf7d35c07 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -14,12 +14,16 @@ hostprogs-y += tracex3
 hostprogs-y += tracex4
 hostprogs-y += tracex5
 hostprogs-y += tracex6
+hostprogs-y += test_probe_write_user
 hostprogs-y += trace_output
 hostprogs-y += lathist
 hostprogs-y += offwaketime
 hostprogs-y += spintest
 hostprogs-y += map_perf_test
 hostprogs-y += test_overhead
+hostprogs-y += test_cgrp2_array_pin
+hostprogs-y += xdp1
+hostprogs-y += xdp2
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -34,12 +38,17 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
 tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
 tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
 tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
+test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o
 trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
 lathist-objs := bpf_load.o libbpf.o lathist_user.o
 offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
 spintest-objs := bpf_load.o libbpf.o spintest_user.o
 map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
 test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
+test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
+xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
+# reuse xdp1 source intentionally
+xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -52,6 +61,7 @@ always += tracex3_kern.o
 always += tracex4_kern.o
 always += tracex5_kern.o
 always += tracex6_kern.o
+always += test_probe_write_user_kern.o
 always += trace_output_kern.o
 always += tcbpf1_kern.o
 always += lathist_kern.o
@@ -61,6 +71,9 @@ always += map_perf_test_kern.o
 always += test_overhead_tp_kern.o
 always += test_overhead_kprobe_kern.o
 always += parse_varlen.o parse_simple.o parse_ldabs.o
+always += test_cgrp2_tc_kern.o
+always += xdp1_kern.o
+always += xdp2_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -75,12 +88,15 @@ HOSTLOADLIBES_tracex3 += -lelf
 HOSTLOADLIBES_tracex4 += -lelf -lrt
 HOSTLOADLIBES_tracex5 += -lelf
 HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_test_probe_write_user += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
 HOSTLOADLIBES_offwaketime += -lelf
 HOSTLOADLIBES_spintest += -lelf
 HOSTLOADLIBES_map_perf_test += -lelf -lrt
 HOSTLOADLIBES_test_overhead += -lelf -lrt
+HOSTLOADLIBES_xdp1 += -lelf
+HOSTLOADLIBES_xdp2 += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 7904a2a493de..217c8d507f2e 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -41,6 +41,8 @@ static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data,
 	(void *) BPF_FUNC_perf_event_output;
 static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
 	(void *) BPF_FUNC_get_stackid;
+static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+	(void *) BPF_FUNC_probe_write_user;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -70,6 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
 	(void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
+	(void *) BPF_FUNC_skb_in_cgroup;
 
 #if defined(__x86_64__)
 
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 022af71c2bb5..0cfda2320320 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -50,6 +50,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
 	bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
 	bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
+	bool is_xdp = strncmp(event, "xdp", 3) == 0;
 	enum bpf_prog_type prog_type;
 	char buf[256];
 	int fd, efd, err, id;
@@ -66,6 +67,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_KPROBE;
 	} else if (is_tracepoint) {
 		prog_type = BPF_PROG_TYPE_TRACEPOINT;
+	} else if (is_xdp) {
+		prog_type = BPF_PROG_TYPE_XDP;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -79,6 +82,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
 	prog_fd[prog_cnt++] = fd;
 
+	if (is_xdp)
+		return 0;
+
 	if (is_socket) {
 		event += 6;
 		if (*event != '/')
@@ -319,6 +325,7 @@ int load_bpf_file(char *path)
 			if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
 			    memcmp(shname_prog, "kretprobe/", 10) == 0 ||
 			    memcmp(shname_prog, "tracepoint/", 11) == 0 ||
+			    memcmp(shname_prog, "xdp", 3) == 0 ||
 			    memcmp(shname_prog, "socket", 6) == 0)
 				load_and_attach(shname_prog, insns, data_prog->d_size);
 		}
@@ -336,6 +343,7 @@ int load_bpf_file(char *path)
 		if (memcmp(shname, "kprobe/", 7) == 0 ||
 		    memcmp(shname, "kretprobe/", 10) == 0 ||
 		    memcmp(shname, "tracepoint/", 11) == 0 ||
+		    memcmp(shname, "xdp", 3) == 0 ||
 		    memcmp(shname, "socket", 6) == 0)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index 29a276d766fc..8a4085c2d117 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -5,6 +5,7 @@
 #include "bpf_load.h"
 #include <unistd.h>
 #include <arpa/inet.h>
+#include <sys/resource.h>
 
 struct pair {
 	__u64 packets;
@@ -13,11 +14,13 @@ struct pair {
 
 int main(int ac, char **argv)
 {
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	char filename[256];
 	FILE *f;
 	int i, sock;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	if (load_bpf_file(filename)) {
 		printf("%s", bpf_log_buf);
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index 2617772d060d..d4184ab5f3ac 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -5,6 +5,7 @@
 #include "bpf_load.h"
 #include <unistd.h>
 #include <arpa/inet.h>
+#include <sys/resource.h>
 
 struct flow_keys {
 	__be32 src;
@@ -23,11 +24,13 @@ struct pair {
 
 int main(int argc, char **argv)
 {
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	char filename[256];
 	FILE *f;
 	int i, sock;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	if (load_bpf_file(filename)) {
 		printf("%s", bpf_log_buf);
diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c
new file mode 100644
index 000000000000..70e86f7be69d
--- /dev/null
+++ b/samples/bpf/test_cgrp2_array_pin.c
@@ -0,0 +1,109 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include "libbpf.h"
+
+static void usage(void)
+{
+	printf("Usage: test_cgrp2_array_pin [...]\n");
+	printf("       -F <file>   File to pin an BPF cgroup array\n");
+	printf("       -U <file>   Update an already pinned BPF cgroup array\n");
+	printf("       -v <value>  Full path of the cgroup2\n");
+	printf("       -h          Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+	const char *pinned_file = NULL, *cg2 = NULL;
+	int create_array = 1;
+	int array_key = 0;
+	int array_fd = -1;
+	int cg2_fd = -1;
+	int ret = -1;
+	int opt;
+
+	while ((opt = getopt(argc, argv, "F:U:v:")) != -1) {
+		switch (opt) {
+		/* General args */
+		case 'F':
+			pinned_file = optarg;
+			break;
+		case 'U':
+			pinned_file = optarg;
+			create_array = 0;
+			break;
+		case 'v':
+			cg2 = optarg;
+			break;
+		default:
+			usage();
+			goto out;
+		}
+	}
+
+	if (!cg2 || !pinned_file) {
+		usage();
+		goto out;
+	}
+
+	cg2_fd = open(cg2, O_RDONLY);
+	if (cg2_fd < 0) {
+		fprintf(stderr, "open(%s,...): %s(%d)\n",
+			cg2, strerror(errno), errno);
+		goto out;
+	}
+
+	if (create_array) {
+		array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,
+					  sizeof(uint32_t), sizeof(uint32_t),
+					  1, 0);
+		if (array_fd < 0) {
+			fprintf(stderr,
+				"bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
+				strerror(errno), errno);
+			goto out;
+		}
+	} else {
+		array_fd = bpf_obj_get(pinned_file);
+		if (array_fd < 0) {
+			fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+				pinned_file, strerror(errno), errno);
+			goto out;
+		}
+	}
+
+	ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0);
+	if (ret) {
+		perror("bpf_update_elem");
+		goto out;
+	}
+
+	if (create_array) {
+		ret = bpf_obj_pin(array_fd, pinned_file);
+		if (ret) {
+			fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n",
+				pinned_file, strerror(errno), errno);
+			goto out;
+		}
+	}
+
+out:
+	if (array_fd != -1)
+		close(array_fd);
+	if (cg2_fd != -1)
+		close(cg2_fd);
+	return ret;
+}
diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh
new file mode 100755
index 000000000000..0b119eeaf85c
--- /dev/null
+++ b/samples/bpf/test_cgrp2_tc.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+
+MY_DIR=$(dirname $0)
+# Details on the bpf prog
+BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin'
+BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o"
+BPF_SECTION='filter'
+
+[ -z "$TC" ] && TC='tc'
+[ -z "$IP" ] && IP='ip'
+
+# Names of the veth interface, net namespace...etc.
+HOST_IFC='ve'
+NS_IFC='vens'
+NS='ns'
+
+find_mnt() {
+    cat /proc/mounts | \
+	awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }'
+}
+
+# Init cgroup2 vars
+init_cgrp2_vars() {
+    CGRP2_ROOT=$(find_mnt cgroup2)
+    if [ -z "$CGRP2_ROOT" ]
+    then
+	CGRP2_ROOT='/mnt/cgroup2'
+	MOUNT_CGRP2="yes"
+    fi
+    CGRP2_TC="$CGRP2_ROOT/tc"
+    CGRP2_TC_LEAF="$CGRP2_TC/leaf"
+}
+
+# Init bpf fs vars
+init_bpf_fs_vars() {
+    local bpf_fs_root=$(find_mnt bpf)
+    [ -n "$bpf_fs_root" ] || return -1
+    BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals"
+}
+
+setup_cgrp2() {
+    case $1 in
+	start)
+	    if [ "$MOUNT_CGRP2" == 'yes' ]
+	    then
+		[ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT
+		mount -t cgroup2 none $CGRP2_ROOT || return $?
+	    fi
+	    mkdir -p $CGRP2_TC_LEAF
+	    ;;
+	*)
+	    rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC
+	    [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT
+	    ;;
+    esac
+}
+
+setup_bpf_cgrp2_array() {
+    local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME"
+    case $1 in
+	start)
+	    $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC
+	    ;;
+	*)
+	    [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array
+	    ;;
+    esac
+}
+
+setup_net() {
+    case $1 in
+	start)
+	    $IP link add $HOST_IFC type veth peer name $NS_IFC || return $?
+	    $IP link set dev $HOST_IFC up || return $?
+	    sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0
+
+	    $IP netns add ns || return $?
+	    $IP link set dev $NS_IFC netns ns || return $?
+	    $IP -n $NS link set dev $NS_IFC up || return $?
+	    $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0
+	    $TC qdisc add dev $HOST_IFC clsact || return $?
+	    $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $?
+	    ;;
+	*)
+	    $IP netns del $NS
+	    $IP link del $HOST_IFC
+	    ;;
+    esac
+}
+
+run_in_cgrp() {
+    # Fork another bash and move it under the specified cgroup.
+    # It makes the cgroup cleanup easier at the end of the test.
+    cmd='echo $$ > '
+    cmd="$cmd $1/cgroup.procs; exec $2"
+    bash -c "$cmd"
+}
+
+do_test() {
+    run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null"
+    local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \
+			   awk '/drop/{print substr($7, 0, index($7, ",")-1)}')
+    if [[ $dropped -eq 0 ]]
+    then
+	echo "FAIL"
+	return 1
+    else
+	echo "Successfully filtered $dropped packets"
+	return 0
+    fi
+}
+
+do_exit() {
+    if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ]
+    then
+	echo "------ DEBUG ------"
+	echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo
+	echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo
+	if [ -d "$BPF_FS_TC_SHARE" ]
+	then
+	    echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo
+	fi
+	echo "Host net:"
+	$IP netns
+	$IP link show dev $HOST_IFC
+	$IP -6 a show dev $HOST_IFC
+	$TC -s qdisc show dev $HOST_IFC
+	echo
+	echo "$NS net:"
+	$IP -n $NS link show dev $NS_IFC
+	$IP -n $NS -6 link show dev $NS_IFC
+	echo "------ DEBUG ------"
+	echo
+    fi
+
+    if [ "$MODE" != 'nocleanup' ]
+    then
+	setup_net stop
+	setup_bpf_cgrp2_array stop
+	setup_cgrp2 stop
+    fi
+}
+
+init_cgrp2_vars
+init_bpf_fs_vars
+
+while [[ $# -ge 1 ]]
+do
+    a="$1"
+    case $a in
+	debug)
+	    DEBUG='yes'
+	    shift 1
+	    ;;
+	cleanup-only)
+	    MODE='cleanuponly'
+	    shift 1
+	    ;;
+	no-cleanup)
+	    MODE='nocleanup'
+	    shift 1
+	    ;;
+	*)
+	    echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]"
+	    echo "  debug: Print cgrp and network setup details at the end of the test"
+	    echo "  cleanup-only: Try to cleanup things from last test.  No test will be run"
+	    echo "  no-cleanup: Run the test but don't do cleanup at the end"
+	    echo "[Note: If no arg is given, it will run the test and do cleanup at the end]"
+	    echo
+	    exit -1
+	    ;;
+    esac
+done
+
+trap do_exit 0
+
+[ "$MODE" == 'cleanuponly' ] && exit
+
+setup_cgrp2 start || exit $?
+setup_net start || exit $?
+init_bpf_fs_vars || exit $?
+setup_bpf_cgrp2_array start || exit $?
+do_test
+echo
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
new file mode 100644
index 000000000000..2732c37c8d5b
--- /dev/null
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in6.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/pkt_cls.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* copy of 'struct ethhdr' without __packed */
+struct eth_hdr {
+	unsigned char   h_dest[ETH_ALEN];
+	unsigned char   h_source[ETH_ALEN];
+	unsigned short  h_proto;
+};
+
+#define PIN_GLOBAL_NS		2
+struct bpf_elf_map {
+	__u32 type;
+	__u32 size_key;
+	__u32 size_value;
+	__u32 max_elem;
+	__u32 flags;
+	__u32 id;
+	__u32 pinning;
+};
+
+struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = {
+	.type		= BPF_MAP_TYPE_CGROUP_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+SEC("filter")
+int handle_egress(struct __sk_buff *skb)
+{
+	void *data = (void *)(long)skb->data;
+	struct eth_hdr *eth = data;
+	struct ipv6hdr *ip6h = data + sizeof(*eth);
+	void *data_end = (void *)(long)skb->data_end;
+	char dont_care_msg[] = "dont care %04x %d\n";
+	char pass_msg[] = "pass\n";
+	char reject_msg[] = "reject\n";
+
+	/* single length check */
+	if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+		return TC_ACT_OK;
+
+	if (eth->h_proto != htons(ETH_P_IPV6) ||
+	    ip6h->nexthdr != IPPROTO_ICMPV6) {
+		bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
+				 eth->h_proto, ip6h->nexthdr);
+		return TC_ACT_OK;
+	} else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+		bpf_trace_printk(pass_msg, sizeof(pass_msg));
+		return TC_ACT_OK;
+	} else {
+		bpf_trace_printk(reject_msg, sizeof(reject_msg));
+		return TC_ACT_SHOT;
+	}
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
new file mode 100644
index 000000000000..3a677c807044
--- /dev/null
+++ b/samples/bpf/test_probe_write_user_kern.c
@@ -0,0 +1,52 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") dnat_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct sockaddr_in),
+	.value_size = sizeof(struct sockaddr_in),
+	.max_entries = 256,
+};
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ *
+ * This example sits on a syscall, and the syscall ABI is relatively stable
+ * of course, across platforms, and over time, the ABI may change.
+ */
+SEC("kprobe/sys_connect")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	struct sockaddr_in new_addr, orig_addr = {};
+	struct sockaddr_in *mapped_addr;
+	void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx);
+	int sockaddr_len = (int)PT_REGS_PARM3(ctx);
+
+	if (sockaddr_len > sizeof(orig_addr))
+		return 0;
+
+	if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
+		return 0;
+
+	mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
+	if (mapped_addr != NULL) {
+		memcpy(&new_addr, mapped_addr, sizeof(new_addr));
+		bpf_probe_write_user(sockaddr_arg, &new_addr,
+				     sizeof(new_addr));
+	}
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
new file mode 100644
index 000000000000..a44bf347bedd
--- /dev/null
+++ b/samples/bpf/test_probe_write_user_user.c
@@ -0,0 +1,78 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <sys/socket.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+int main(int ac, char **argv)
+{
+	int serverfd, serverconnfd, clientfd;
+	socklen_t sockaddr_len;
+	struct sockaddr serv_addr, mapped_addr, tmp_addr;
+	struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
+	char filename[256];
+	char *ip;
+
+	serv_addr_in = (struct sockaddr_in *)&serv_addr;
+	mapped_addr_in = (struct sockaddr_in *)&mapped_addr;
+	tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
+	assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
+
+	/* Bind server to ephemeral port on lo */
+	memset(&serv_addr, 0, sizeof(serv_addr));
+	serv_addr_in->sin_family = AF_INET;
+	serv_addr_in->sin_port = 0;
+	serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+	assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0);
+
+	sockaddr_len = sizeof(serv_addr);
+	assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0);
+	ip = inet_ntoa(serv_addr_in->sin_addr);
+	printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port));
+
+	memset(&mapped_addr, 0, sizeof(mapped_addr));
+	mapped_addr_in->sin_family = AF_INET;
+	mapped_addr_in->sin_port = htons(5555);
+	mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
+
+	assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
+
+	assert(listen(serverfd, 5) == 0);
+
+	ip = inet_ntoa(mapped_addr_in->sin_addr);
+	printf("Client connecting to: %s:%d\n",
+	       ip, ntohs(mapped_addr_in->sin_port));
+	assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0);
+
+	sockaddr_len = sizeof(tmp_addr);
+	ip = inet_ntoa(tmp_addr_in->sin_addr);
+	assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0);
+	printf("Server received connection from: %s:%d\n",
+	       ip, ntohs(tmp_addr_in->sin_port));
+
+	sockaddr_len = sizeof(tmp_addr);
+	assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0);
+	ip = inet_ntoa(tmp_addr_in->sin_addr);
+	printf("Client's peer address: %s:%d\n",
+	       ip, ntohs(tmp_addr_in->sin_port));
+
+	/* Is the server's getsockname = the socket getpeername */
+	assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
+
+	return 0;
+}
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
new file mode 100644
index 000000000000..219742106bfd
--- /dev/null
+++ b/samples/bpf/xdp1_kern.c
@@ -0,0 +1,93 @@
+/* Copyright (c) 2016 PLUMgrid
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 256,
+};
+
+static int parse_ipv4(void *data, u64 nh_off, void *data_end)
+{
+	struct iphdr *iph = data + nh_off;
+
+	if (iph + 1 > data_end)
+		return 0;
+	return iph->protocol;
+}
+
+static int parse_ipv6(void *data, u64 nh_off, void *data_end)
+{
+	struct ipv6hdr *ip6h = data + nh_off;
+
+	if (ip6h + 1 > data_end)
+		return 0;
+	return ip6h->nexthdr;
+}
+
+SEC("xdp1")
+int xdp_prog1(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	int rc = XDP_DROP;
+	long *value;
+	u16 h_proto;
+	u64 nh_off;
+	u32 ipproto;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return rc;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vhdr;
+
+		vhdr = data + nh_off;
+		nh_off += sizeof(struct vlan_hdr);
+		if (data + nh_off > data_end)
+			return rc;
+		h_proto = vhdr->h_vlan_encapsulated_proto;
+	}
+	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vhdr;
+
+		vhdr = data + nh_off;
+		nh_off += sizeof(struct vlan_hdr);
+		if (data + nh_off > data_end)
+			return rc;
+		h_proto = vhdr->h_vlan_encapsulated_proto;
+	}
+
+	if (h_proto == htons(ETH_P_IP))
+		ipproto = parse_ipv4(data, nh_off, data_end);
+	else if (h_proto == htons(ETH_P_IPV6))
+		ipproto = parse_ipv6(data, nh_off, data_end);
+	else
+		ipproto = 0;
+
+	value = bpf_map_lookup_elem(&rxcnt, &ipproto);
+	if (value)
+		*value += 1;
+
+	return rc;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
new file mode 100644
index 000000000000..a5e109e398a1
--- /dev/null
+++ b/samples/bpf/xdp1_user.c
@@ -0,0 +1,181 @@
+/* Copyright (c) 2016 PLUMgrid
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+
+static int set_link_xdp_fd(int ifindex, int fd)
+{
+	struct sockaddr_nl sa;
+	int sock, seq = 0, len, ret = -1;
+	char buf[4096];
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	struct nlmsghdr *nh;
+	struct nlmsgerr *err;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+
+	nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
+	nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		printf("send to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	len = recv(sock, buf, sizeof(buf), 0);
+	if (len < 0) {
+		printf("recv from netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+	     nh = NLMSG_NEXT(nh, len)) {
+		if (nh->nlmsg_pid != getpid()) {
+			printf("Wrong pid %d, expected %d\n",
+			       nh->nlmsg_pid, getpid());
+			goto cleanup;
+		}
+		if (nh->nlmsg_seq != seq) {
+			printf("Wrong seq %d, expected %d\n",
+			       nh->nlmsg_seq, seq);
+			goto cleanup;
+		}
+		switch (nh->nlmsg_type) {
+		case NLMSG_ERROR:
+			err = (struct nlmsgerr *)NLMSG_DATA(nh);
+			if (!err->error)
+				continue;
+			printf("nlmsg error %s\n", strerror(-err->error));
+			goto cleanup;
+		case NLMSG_DONE:
+			break;
+		}
+	}
+
+	ret = 0;
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+static int ifindex;
+
+static void int_exit(int sig)
+{
+	set_link_xdp_fd(ifindex, -1);
+	exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(int interval)
+{
+	unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+	const unsigned int nr_keys = 256;
+	__u64 values[nr_cpus], prev[nr_keys][nr_cpus];
+	__u32 key;
+	int i;
+
+	memset(prev, 0, sizeof(prev));
+
+	while (1) {
+		sleep(interval);
+
+		for (key = 0; key < nr_keys; key++) {
+			__u64 sum = 0;
+
+			assert(bpf_lookup_elem(map_fd[0], &key, values) == 0);
+			for (i = 0; i < nr_cpus; i++)
+				sum += (values[i] - prev[key][i]);
+			if (sum)
+				printf("proto %u: %10llu pkt/s\n",
+				       key, sum / interval);
+			memcpy(prev[key], values, sizeof(values));
+		}
+	}
+}
+
+int main(int ac, char **argv)
+{
+	char filename[256];
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (ac != 2) {
+		printf("usage: %s IFINDEX\n", argv[0]);
+		return 1;
+	}
+
+	ifindex = strtoul(argv[1], NULL, 0);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	signal(SIGINT, int_exit);
+
+	if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) {
+		printf("link set xdp fd failed\n");
+		return 1;
+	}
+
+	poll_stats(2);
+
+	return 0;
+}
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
new file mode 100644
index 000000000000..e01288867d15
--- /dev/null
+++ b/samples/bpf/xdp2_kern.c
@@ -0,0 +1,114 @@
+/* Copyright (c) 2016 PLUMgrid
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 256,
+};
+
+static void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
+static int parse_ipv4(void *data, u64 nh_off, void *data_end)
+{
+	struct iphdr *iph = data + nh_off;
+
+	if (iph + 1 > data_end)
+		return 0;
+	return iph->protocol;
+}
+
+static int parse_ipv6(void *data, u64 nh_off, void *data_end)
+{
+	struct ipv6hdr *ip6h = data + nh_off;
+
+	if (ip6h + 1 > data_end)
+		return 0;
+	return ip6h->nexthdr;
+}
+
+SEC("xdp1")
+int xdp_prog1(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	int rc = XDP_DROP;
+	long *value;
+	u16 h_proto;
+	u64 nh_off;
+	u32 ipproto;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return rc;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vhdr;
+
+		vhdr = data + nh_off;
+		nh_off += sizeof(struct vlan_hdr);
+		if (data + nh_off > data_end)
+			return rc;
+		h_proto = vhdr->h_vlan_encapsulated_proto;
+	}
+	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vhdr;
+
+		vhdr = data + nh_off;
+		nh_off += sizeof(struct vlan_hdr);
+		if (data + nh_off > data_end)
+			return rc;
+		h_proto = vhdr->h_vlan_encapsulated_proto;
+	}
+
+	if (h_proto == htons(ETH_P_IP))
+		ipproto = parse_ipv4(data, nh_off, data_end);
+	else if (h_proto == htons(ETH_P_IPV6))
+		ipproto = parse_ipv6(data, nh_off, data_end);
+	else
+		ipproto = 0;
+
+	value = bpf_map_lookup_elem(&rxcnt, &ipproto);
+	if (value)
+		*value += 1;
+
+	if (ipproto == IPPROTO_UDP) {
+		swap_src_dst_mac(data);
+		rc = XDP_TX;
+	}
+
+	return rc;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index ed0ca0c07242..f3b61b4ee09c 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -46,6 +46,11 @@ static int handler_pre(struct kprobe *p, struct pt_regs *regs)
 			" ex1 = 0x%lx\n",
 		p->symbol_name, p->addr, regs->pc, regs->ex1);
 #endif
+#ifdef CONFIG_ARM64
+	pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
+			" pstate = 0x%lx\n",
+		p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
+#endif
 
 	/* A dump_stack() here will give a stack backtrace */
 	return 0;
@@ -71,6 +76,10 @@ static void handler_post(struct kprobe *p, struct pt_regs *regs,
 	printk(KERN_INFO "<%s> post_handler: p->addr = 0x%p, ex1 = 0x%lx\n",
 		p->symbol_name, p->addr, regs->ex1);
 #endif
+#ifdef CONFIG_ARM64
+	pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
+		p->symbol_name, p->addr, (long)regs->pstate);
+#endif
 }
 
 /*
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index 33b70fdd5a4a..f70ea7dd5660 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -14,12 +14,13 @@ function usage() {
     echo "  -b : (\$BURST)     HW level bursting of SKBs"
     echo "  -v : (\$VERBOSE)   verbose"
     echo "  -x : (\$DEBUG)     debug"
+    echo "  -6 : (\$IP6)       IPv6"
     echo ""
 }
 
 ##  --- Parse command line arguments / parameters ---
 ## echo "Commandline options:"
-while getopts "s:i:d:m:t:c:b:vxh" option; do
+while getopts "s:i:d:m:t:c:b:vxh6" option; do
     case $option in
         i) # interface
           export DEV=$OPTARG
@@ -59,6 +60,10 @@ while getopts "s:i:d:m:t:c:b:vxh" option; do
           export DEBUG=yes
           info "Debug mode: DEBUG=$DEBUG"
           ;;
+	6)
+	  export IP6=6
+	  info "IP6: IP6=$IP6"
+	  ;;
         h|?|*)
           usage;
           err 2 "[ERROR] Unknown parameters!!!"
diff --git a/samples/pktgen/pktgen.conf-1-1-flows b/samples/pktgen/pktgen.conf-1-1-flows
deleted file mode 100755
index 081749c9707d..000000000000
--- a/samples/pktgen/pktgen.conf-1-1-flows
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
-    local result
-
-    echo $1 > $PGDEV
-
-    result=`cat $PGDEV | fgrep "Result: OK:"`
-    if [ "$result" = "" ]; then
-         cat $PGDEV | fgrep Result:
-    fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. One CPU example. We add eth1.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
-  echo "Removing all devices"
- pgset "rem_device_all"
-  echo "Adding eth1"
- pgset "add_device eth1"
-
-
-# device config
-# delay 0
-# We need to do alloc for every skb since we cannot clone here.
-
-CLONE_SKB="clone_skb 0"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
-  echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- # Random address with in the min-max range
- pgset "flag IPDST_RND"
- pgset "dst_min 10.0.0.0"
- pgset "dst_max 10.255.255.255"
-
- # 8k Concurrent flows at 4 pkts
- pgset "flows 8192"
- pgset "flowlen 4"
-
- pgset "dst_mac  00:04:23:08:91:dc"
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-1-1-rdos b/samples/pktgen/pktgen.conf-1-1-rdos
deleted file mode 100755
index c7553be49b80..000000000000
--- a/samples/pktgen/pktgen.conf-1-1-rdos
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
-    local result
-
-    echo $1 > $PGDEV
-
-    result=`cat $PGDEV | fgrep "Result: OK:"`
-    if [ "$result" = "" ]; then
-         cat $PGDEV | fgrep Result:
-    fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. One CPU example. We add eth1.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
-  echo "Removing all devices"
- pgset "rem_device_all"
-  echo "Adding eth1"
- pgset "add_device eth1"
-
-
-# device config
-# delay 0
-
-# We need to do alloc for every skb since we cannot clone here.
-
-CLONE_SKB="clone_skb 0"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
-  echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- # Random address with in the min-max range
- pgset "flag IPDST_RND"
- pgset "dst_min 10.0.0.0"
- pgset "dst_max 10.255.255.255"
-
- pgset "dst_mac  00:04:23:08:91:dc"
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index cb1590331b47..f3e1bedfd77f 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -34,7 +34,9 @@ root_check_run_with_sudo "$@"
 source ${basedir}/parameters.sh
 # Using invalid DST_MAC will cause the packets to get dropped in
 # ip_rcv() which is part of the test
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+    [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
 [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$BURST" ] && BURST=1024
 
@@ -64,7 +66,7 @@ for ((thread = 0; thread < $THREADS; thread++)); do
 
     # Destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst $DEST_IP"
+    pg_set $dev "dst$IP6 $DEST_IP"
 
     # Inject packet into RX path of stack
     pg_set $dev "xmit_mode netif_receive"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
new file mode 100755
index 000000000000..cc102e923241
--- /dev/null
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+#
+# Benchmark script:
+#  - developed for benchmarking egress qdisc path, derived (more
+#    like cut'n'pasted) from ingress benchmark script.
+#
+# Script for injecting packets into egress qdisc path of the stack
+# with pktgen "xmit_mode queue_xmit".
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+if [ -z "$DEST_IP" ]; then
+    [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
+[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+
+# Burst greater than 1 are invalid for queue_xmit mode
+if [[ -n "$BURST" ]]; then
+    err 1 "Bursting not supported for this mode"
+fi
+
+# Base Config
+DELAY="0"        # Zero means max speed
+COUNT="10000000" # Zero means indefinitely
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+    # The device name is extended with @name, using thread number to
+    # make then unique, but any name will do.
+    dev=${DEV}@${thread}
+
+    # Add remove all other devices and add_device $dev to thread
+    pg_thread $thread "rem_device_all"
+    pg_thread $thread "add_device" $dev
+
+    # Base config of dev
+    pg_set $dev "flag QUEUE_MAP_CPU"
+    pg_set $dev "count $COUNT"
+    pg_set $dev "pkt_size $PKT_SIZE"
+    pg_set $dev "delay $DELAY"
+    pg_set $dev "flag NO_TIMESTAMP"
+
+    # Destination
+    pg_set $dev "dst_mac $DST_MAC"
+    pg_set $dev "dst$IP6 $DEST_IP"
+
+    # Inject packet into TX qdisc egress path of stack
+    pg_set $dev "xmit_mode queue_xmit"
+done
+
+# start_run
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+echo "Done" >&2
+
+# Print results
+for ((thread = 0; thread < $THREADS; thread++)); do
+    dev=${DEV}@${thread}
+    echo "Device: $dev"
+    cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+done
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index 8c9d318c221b..29ef4ba50796 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -14,7 +14,9 @@ root_check_run_with_sudo "$@"
 source ${basedir}/parameters.sh
 #
 # Set some default params, if they didn't get set
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+    [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 # Example enforce param "-m" for dst_mac
 [ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac"
@@ -54,7 +56,7 @@ pg_set $DEV "flag NO_TIMESTAMP"
 
 # Destination
 pg_set $DEV "dst_mac $DST_MAC"
-pg_set $DEV "dst $DEST_IP"
+pg_set $DEV "dst$IP6 $DEST_IP"
 
 # Setup random UDP port src range
 pg_set $DEV "flag UDPSRC_RND"
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index 32467aea8e47..c88a161d3e6f 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -23,7 +23,9 @@ UDP_MIN=9
 UDP_MAX=109
 
 # (example of setting default params in your script)
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+    [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
 [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
 
 # General cleanup everything since last run
@@ -54,7 +56,7 @@ for ((thread = 0; thread < $THREADS; thread++)); do
 
     # Destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst $DEST_IP"
+    pg_set $dev "dst$IP6 $DEST_IP"
 
     # Setup random UDP port src range
     pg_set $dev "flag UDPSRC_RND"
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index 775f5d0a1e53..80cf8f5ba6b2 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -25,7 +25,9 @@ root_check_run_with_sudo "$@"
 # Parameter parsing via include
 source ${basedir}/parameters.sh
 # Set some default params, if they didn't get set
-[ -z "$DEST_IP" ]   && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+    [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
 [ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$BURST" ]     && BURST=32
 [ -z "$CLONE_SKB" ] && CLONE_SKB="100000"
@@ -55,7 +57,7 @@ for ((thread = 0; thread < $THREADS; thread++)); do
 
     # Destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst $DEST_IP"
+    pg_set $dev "dst$IP6 $DEST_IP"
 
     # Setup burst, for easy testing -b 0 disable bursting
     # (internally in pktgen default and minimum burst=1)
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
new file mode 100755
index 000000000000..f60412e445bb
--- /dev/null
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+#
+# Script example for many flows testing
+#
+# Number of simultaneous flows limited by variable $FLOWS
+# and number of packets per flow controlled by variable $FLOWLEN
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+# Set some default params, if they didn't get set
+[ -z "$DEST_IP" ]   && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
+[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+
+# NOTICE:  Script specific settings
+# =======
+# Limiting the number of concurrent flows ($FLOWS)
+# and also set how many packets each flow contains ($FLOWLEN)
+#
+[ -z "$FLOWS" ]     && FLOWS="8000"
+[ -z "$FLOWLEN" ]   && FLOWLEN="10"
+
+# Base Config
+DELAY="0"  # Zero means max speed
+COUNT="0"  # Zero means indefinitely
+
+if [[ -n "$BURST" ]]; then
+    err 1 "Bursting not supported for this mode"
+fi
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+    dev=${DEV}@${thread}
+
+    # Add remove all other devices and add_device $dev to thread
+    pg_thread $thread "rem_device_all"
+    pg_thread $thread "add_device" $dev
+
+    # Base config
+    pg_set $dev "flag QUEUE_MAP_CPU"
+    pg_set $dev "count $COUNT"
+    pg_set $dev "clone_skb $CLONE_SKB"
+    pg_set $dev "pkt_size $PKT_SIZE"
+    pg_set $dev "delay $DELAY"
+    pg_set $dev "flag NO_TIMESTAMP"
+
+    # Single destination
+    pg_set $dev "dst_mac $DST_MAC"
+    pg_set $dev "dst $DEST_IP"
+
+    # Randomize source IP-addresses
+    pg_set $dev "flag IPSRC_RND"
+    pg_set $dev "src_min 198.18.0.0"
+    pg_set $dev "src_max 198.19.255.255"
+
+    # Limit number of flows (max 65535)
+    pg_set $dev "flows $FLOWS"
+    #
+    # How many packets a flow will send, before flow "entry" is
+    # re-generated/setup.
+    pg_set $dev "flowlen $FLOWLEN"
+    #
+    # Flag FLOW_SEQ will cause $FLOWLEN packets from the same flow
+    # being send back-to-back, before next flow is selected
+    # incrementally.  This helps lookup caches, and is more realistic.
+    #
+    pg_set $dev "flag FLOW_SEQ"
+
+done
+
+# Run if user hits control-c
+function print_result() {
+    # Print results
+    for ((thread = 0; thread < $THREADS; thread++)); do
+	dev=${DEV}@${thread}
+	echo "Device: $dev"
+	cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+
+print_result
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
new file mode 100755
index 000000000000..32ad818e2829
--- /dev/null
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+#
+# Script will generate one flow per thread (-t N)
+#  - Same destination IP
+#  - Fake source IPs for each flow (fixed based on thread number)
+#
+# Useful for scale testing on receiver, to see whether silo'ing flows
+# works and scales.  For optimal scalability (on receiver) each
+# separate-flow should not access shared variables/data. This script
+# helps magnify any of these scaling issues by overloading the receiver.
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+# Set some default params, if they didn't get set
+[ -z "$DEST_IP" ]   && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
+[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+[ -z "$BURST" ]     && BURST=32
+
+
+# Base Config
+DELAY="0"  # Zero means max speed
+COUNT="0"  # Zero means indefinitely
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+    dev=${DEV}@${thread}
+
+    # Add remove all other devices and add_device $dev to thread
+    pg_thread $thread "rem_device_all"
+    pg_thread $thread "add_device" $dev
+
+    # Base config
+    pg_set $dev "flag QUEUE_MAP_CPU"
+    pg_set $dev "count $COUNT"
+    pg_set $dev "clone_skb $CLONE_SKB"
+    pg_set $dev "pkt_size $PKT_SIZE"
+    pg_set $dev "delay $DELAY"
+    pg_set $dev "flag NO_TIMESTAMP"
+
+    # Single destination
+    pg_set $dev "dst_mac $DST_MAC"
+    pg_set $dev "dst $DEST_IP"
+
+    # Setup source IP-addresses based on thread number
+    pg_set $dev "src_min 198.18.$((thread+1)).1"
+    pg_set $dev "src_max 198.18.$((thread+1)).1"
+
+    # Setup burst, for easy testing -b 0 disable bursting
+    # (internally in pktgen default and minimum burst=1)
+    if [[ ${BURST} -ne 0 ]]; then
+	pg_set $dev "burst $BURST"
+    else
+	info "$dev: Not using burst"
+    fi
+
+done
+
+# Run if user hits control-c
+function print_result() {
+    # Print results
+    for ((thread = 0; thread < $THREADS; thread++)); do
+	dev=${DEV}@${thread}
+	echo "Device: $dev"
+	cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+
+print_result
diff --git a/samples/v4l/v4l2-pci-skeleton.c b/samples/v4l/v4l2-pci-skeleton.c
index a55cf94ac907..93b76c3220fd 100644
--- a/samples/v4l/v4l2-pci-skeleton.c
+++ b/samples/v4l/v4l2-pci-skeleton.c
@@ -56,7 +56,6 @@ MODULE_LICENSE("GPL v2");
  * @format: current pix format
  * @input: current video input (0 = SDTV, 1 = HDTV)
  * @queue: vb2 video capture queue
- * @alloc_ctx: vb2 contiguous DMA context
  * @qlock: spinlock controlling access to buf_list and sequence
  * @buf_list: list of buffers queued for DMA
  * @sequence: frame sequence counter
@@ -73,7 +72,6 @@ struct skeleton {
 	unsigned input;
 
 	struct vb2_queue queue;
-	struct vb2_alloc_ctx *alloc_ctx;
 
 	spinlock_t qlock;
 	struct list_head buf_list;
@@ -165,7 +163,7 @@ static irqreturn_t skeleton_irq(int irq, void *dev_id)
  */
 static int queue_setup(struct vb2_queue *vq,
 		       unsigned int *nbuffers, unsigned int *nplanes,
-		       unsigned int sizes[], void *alloc_ctxs[])
+		       unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct skeleton *skel = vb2_get_drv_priv(vq);
 
@@ -182,7 +180,6 @@ static int queue_setup(struct vb2_queue *vq,
 
 	if (vq->num_buffers + *nbuffers < 3)
 		*nbuffers = 3 - vq->num_buffers;
-	alloc_ctxs[0] = skel->alloc_ctx;
 
 	if (*nplanes)
 		return sizes[0] < skel->format.sizeimage ? -EINVAL : 0;
@@ -820,6 +817,7 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	q = &skel->queue;
 	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	q->io_modes = VB2_MMAP | VB2_DMABUF | VB2_READ;
+	q->dev = &pdev->dev;
 	q->drv_priv = skel;
 	q->buf_struct_size = sizeof(struct skel_buffer);
 	q->ops = &skel_qops;
@@ -850,12 +848,6 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto free_hdl;
 
-	skel->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
-	if (IS_ERR(skel->alloc_ctx)) {
-		dev_err(&pdev->dev, "Can't allocate buffer context");
-		ret = PTR_ERR(skel->alloc_ctx);
-		goto free_hdl;
-	}
 	INIT_LIST_HEAD(&skel->buf_list);
 	spin_lock_init(&skel->qlock);
 
@@ -885,13 +877,11 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ret = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
 	if (ret)
-		goto free_ctx;
+		goto free_hdl;
 
 	dev_info(&pdev->dev, "V4L2 PCI Skeleton Driver loaded\n");
 	return 0;
 
-free_ctx:
-	vb2_dma_contig_cleanup_ctx(skel->alloc_ctx);
 free_hdl:
 	v4l2_ctrl_handler_free(&skel->ctrl_handler);
 	v4l2_device_unregister(&skel->v4l2_dev);
@@ -907,7 +897,6 @@ static void skeleton_remove(struct pci_dev *pdev)
 
 	video_unregister_device(&skel->vdev);
 	v4l2_ctrl_handler_free(&skel->ctrl_handler);
-	vb2_dma_contig_cleanup_ctx(skel->alloc_ctx);
 	v4l2_device_unregister(&skel->v4l2_dev);
 	pci_disable_device(skel->pdev);
 }
diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic
index 045e0098e962..e4d017d53819 100644
--- a/scripts/Makefile.asm-generic
+++ b/scripts/Makefile.asm-generic
@@ -13,11 +13,26 @@ include scripts/Kbuild.include
 # Create output directory if not already present
 _dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj))
 
+# Stale wrappers when the corresponding files are removed from generic-y
+# need removing.
+generated-y   := $(generic-y) $(genhdr-y) $(generated-y)
+all-files     := $(patsubst %, $(obj)/%, $(generated-y))
+old-headers   := $(wildcard $(obj)/*.h)
+unwanted      := $(filter-out $(all-files),$(old-headers))
+
 quiet_cmd_wrap = WRAP    $@
 cmd_wrap = echo "\#include <asm-generic/$*.h>" >$@
 
-all: $(patsubst %, $(obj)/%, $(generic-y))
+quiet_cmd_remove = REMOVE  $(unwanted)
+cmd_remove = rm -f $(unwanted)
+
+all: $(patsubst %, $(obj)/%, $(generic-y)) FORCE
+	$(if $(unwanted),$(call cmd,remove),)
 	@:
 
 $(obj)/%.h:
 	$(call cmd,wrap)
+
+PHONY += FORCE
+.PHONY: $(PHONY)
+FORCE: ;
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 77ce538268b5..8ab68679cfb5 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -14,8 +14,4 @@ ifdef CONFIG_UBSAN
 ifdef CONFIG_UBSAN_ALIGNMENT
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
 endif
-
-      # -fsanitize=* options makes GCC less smart than usual and
-      # increase number of 'maybe-uninitialized false-positives
-      CFLAGS_UBSAN += $(call cc-option, -Wno-maybe-uninitialized)
 endif
diff --git a/scripts/analyze_suspend.py b/scripts/analyze_suspend.py
index 93e1fd40f430..a0ba48fa2c5e 100755
--- a/scripts/analyze_suspend.py
+++ b/scripts/analyze_suspend.py
@@ -19,6 +19,17 @@
 # Authors:
 #	 Todd Brandt <todd.e.brandt@linux.intel.com>
 #
+# Links:
+#	 Home Page
+#	   https://01.org/suspendresume
+#	 Source repo
+#	   https://github.com/01org/suspendresume
+#	 Documentation
+#	   Getting Started
+#	     https://01.org/suspendresume/documentation/getting-started
+#	   Command List:
+#	     https://01.org/suspendresume/documentation/command-list
+#
 # Description:
 #	 This tool is designed to assist kernel and OS developers in optimizing
 #	 their linux stack's suspend/resume time. Using a kernel image built
@@ -35,6 +46,8 @@
 #		 CONFIG_FTRACE=y
 #		 CONFIG_FUNCTION_TRACER=y
 #		 CONFIG_FUNCTION_GRAPH_TRACER=y
+#		 CONFIG_KPROBES=y
+#		 CONFIG_KPROBES_ON_FTRACE=y
 #
 #	 For kernel versions older than 3.15:
 #	 The following additional kernel parameters are required:
@@ -52,6 +65,7 @@ import re
 import platform
 from datetime import datetime
 import struct
+import ConfigParser
 
 # ----------------- CLASSES --------------------
 
@@ -60,8 +74,15 @@ import struct
 #	 A global, single-instance container used to
 #	 store system values and test parameters
 class SystemValues:
-	version = 3.0
+	ansi = False
+	version = '4.2'
 	verbose = False
+	addlogs = False
+	mindevlen = 0.001
+	mincglen = 1.0
+	srgap = 0
+	cgexp = False
+	outdir = ''
 	testdir = '.'
 	tpath = '/sys/kernel/debug/tracing/'
 	fpdtpath = '/sys/firmware/acpi/tables/FPDT'
@@ -71,26 +92,21 @@ class SystemValues:
 		'device_pm_callback_end',
 		'device_pm_callback_start'
 	]
-	modename = {
-		'freeze': 'Suspend-To-Idle (S0)',
-		'standby': 'Power-On Suspend (S1)',
-		'mem': 'Suspend-to-RAM (S3)',
-		'disk': 'Suspend-to-disk (S4)'
-	}
+	testcommand = ''
 	mempath = '/dev/mem'
 	powerfile = '/sys/power/state'
 	suspendmode = 'mem'
 	hostname = 'localhost'
 	prefix = 'test'
 	teststamp = ''
+	dmesgstart = 0.0
 	dmesgfile = ''
 	ftracefile = ''
 	htmlfile = ''
+	embedded = False
 	rtcwake = False
 	rtcwaketime = 10
 	rtcpath = ''
-	android = False
-	adb = 'adb'
 	devicefilter = []
 	stamp = 0
 	execcount = 1
@@ -98,16 +114,90 @@ class SystemValues:
 	usecallgraph = False
 	usetraceevents = False
 	usetraceeventsonly = False
+	usetracemarkers = True
+	usekprobes = True
+	usedevsrc = False
 	notestrun = False
-	altdevname = dict()
+	devprops = dict()
 	postresumetime = 0
+	devpropfmt = '# Device Properties: .*'
 	tracertypefmt = '# tracer: (?P<t>.*)'
 	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
 	postresumefmt = '# post resume time (?P<t>[0-9]*)$'
 	stampfmt = '# suspend-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
 				'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
 				' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
+	kprobecolor = 'rgba(204,204,204,0.5)'
+	synccolor = 'rgba(204,204,204,0.5)'
+	debugfuncs = []
+	tracefuncs = {
+		'sys_sync': dict(),
+		'pm_prepare_console': dict(),
+		'pm_notifier_call_chain': dict(),
+		'freeze_processes': dict(),
+		'freeze_kernel_threads': dict(),
+		'pm_restrict_gfp_mask': dict(),
+		'acpi_suspend_begin': dict(),
+		'suspend_console': dict(),
+		'acpi_pm_prepare': dict(),
+		'syscore_suspend': dict(),
+		'arch_enable_nonboot_cpus_end': dict(),
+		'syscore_resume': dict(),
+		'acpi_pm_finish': dict(),
+		'resume_console': dict(),
+		'acpi_pm_end': dict(),
+		'pm_restore_gfp_mask': dict(),
+		'thaw_processes': dict(),
+		'pm_restore_console': dict(),
+		'CPU_OFF': {
+			'func':'_cpu_down',
+			'args_x86_64': {'cpu':'%di:s32'},
+			'format': 'CPU_OFF[{cpu}]',
+			'mask': 'CPU_.*_DOWN'
+		},
+		'CPU_ON': {
+			'func':'_cpu_up',
+			'args_x86_64': {'cpu':'%di:s32'},
+			'format': 'CPU_ON[{cpu}]',
+			'mask': 'CPU_.*_UP'
+		},
+	}
+	dev_tracefuncs = {
+		# general wait/delay/sleep
+		'msleep': { 'args_x86_64': {'time':'%di:s32'} },
+		'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'} },
+		'acpi_os_stall': dict(),
+		# ACPI
+		'acpi_resume_power_resources': dict(),
+		'acpi_ps_parse_aml': dict(),
+		# filesystem
+		'ext4_sync_fs': dict(),
+		# ATA
+		'ata_eh_recover': { 'args_x86_64': {'port':'+36(%di):s32'} },
+		# i915
+		'i915_gem_restore_gtt_mappings': dict(),
+		'intel_opregion_setup': dict(),
+		'intel_dp_detect': dict(),
+		'intel_hdmi_detect': dict(),
+		'intel_opregion_init': dict(),
+	}
+	kprobes_postresume = [
+		{
+			'name': 'ataportrst',
+			'func': 'ata_eh_recover',
+			'args': {'port':'+36(%di):s32'},
+			'format': 'ata{port}_port_reset',
+			'mask': 'ata.*_port_reset'
+		}
+	]
+	kprobes = dict()
+	timeformat = '%.3f'
 	def __init__(self):
+		# if this is a phoronix test run, set some default options
+		if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ):
+			self.embedded = True
+			self.addlogs = True
+			self.htmlfile = os.environ['LOG_FILE']
 		self.hostname = platform.node()
 		if(self.hostname == ''):
 			self.hostname = 'localhost'
@@ -118,6 +208,12 @@ class SystemValues:
 		if os.path.exists(rtc) and os.path.exists(rtc+'/date') and \
 			os.path.exists(rtc+'/time') and os.path.exists(rtc+'/wakealarm'):
 			self.rtcpath = rtc
+		if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
+			self.ansi = True
+	def setPrecision(self, num):
+		if num < 0 or num > 6:
+			return
+		self.timeformat = '%.{0}f'.format(num)
 	def setOutputFile(self):
 		if((self.htmlfile == '') and (self.dmesgfile != '')):
 			m = re.match('(?P<name>.*)_dmesg\.txt$', self.dmesgfile)
@@ -129,32 +225,37 @@ class SystemValues:
 				self.htmlfile = m.group('name')+'.html'
 		if(self.htmlfile == ''):
 			self.htmlfile = 'output.html'
-	def initTestOutput(self, subdir):
-		if(not self.android):
-			self.prefix = self.hostname
-			v = open('/proc/version', 'r').read().strip()
-			kver = string.split(v)[2]
-		else:
-			self.prefix = 'android'
-			v = os.popen(self.adb+' shell cat /proc/version').read().strip()
-			kver = string.split(v)[2]
-		testtime = datetime.now().strftime('suspend-%m%d%y-%H%M%S')
+	def initTestOutput(self, subdir, testpath=''):
+		self.prefix = self.hostname
+		v = open('/proc/version', 'r').read().strip()
+		kver = string.split(v)[2]
+		n = datetime.now()
+		testtime = n.strftime('suspend-%m%d%y-%H%M%S')
+		if not testpath:
+			testpath = n.strftime('suspend-%y%m%d-%H%M%S')
 		if(subdir != "."):
-			self.testdir = subdir+"/"+testtime
+			self.testdir = subdir+"/"+testpath
 		else:
-			self.testdir = testtime
+			self.testdir = testpath
 		self.teststamp = \
 			'# '+testtime+' '+self.prefix+' '+self.suspendmode+' '+kver
+		if(self.embedded):
+			self.dmesgfile = \
+				'/tmp/'+testtime+'_'+self.suspendmode+'_dmesg.txt'
+			self.ftracefile = \
+				'/tmp/'+testtime+'_'+self.suspendmode+'_ftrace.txt'
+			return
 		self.dmesgfile = \
 			self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_dmesg.txt'
 		self.ftracefile = \
 			self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_ftrace.txt'
 		self.htmlfile = \
 			self.testdir+'/'+self.prefix+'_'+self.suspendmode+'.html'
-		os.mkdir(self.testdir)
+		if not os.path.isdir(self.testdir):
+			os.mkdir(self.testdir)
 	def setDeviceFilter(self, devnames):
 		self.devicefilter = string.split(devnames)
-	def rtcWakeAlarm(self):
+	def rtcWakeAlarmOn(self):
 		os.system('echo 0 > '+self.rtcpath+'/wakealarm')
 		outD = open(self.rtcpath+'/date', 'r').read().strip()
 		outT = open(self.rtcpath+'/time', 'r').read().strip()
@@ -172,9 +273,361 @@ class SystemValues:
 			nowtime = int(datetime.now().strftime('%s'))
 		alarm = nowtime + self.rtcwaketime
 		os.system('echo %d > %s/wakealarm' % (alarm, self.rtcpath))
+	def rtcWakeAlarmOff(self):
+		os.system('echo 0 > %s/wakealarm' % self.rtcpath)
+	def initdmesg(self):
+		# get the latest time stamp from the dmesg log
+		fp = os.popen('dmesg')
+		ktime = '0'
+		for line in fp:
+			line = line.replace('\r\n', '')
+			idx = line.find('[')
+			if idx > 1:
+				line = line[idx:]
+			m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+			if(m):
+				ktime = m.group('ktime')
+		fp.close()
+		self.dmesgstart = float(ktime)
+	def getdmesg(self):
+		# store all new dmesg lines since initdmesg was called
+		fp = os.popen('dmesg')
+		op = open(self.dmesgfile, 'a')
+		for line in fp:
+			line = line.replace('\r\n', '')
+			idx = line.find('[')
+			if idx > 1:
+				line = line[idx:]
+			m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+			if(not m):
+				continue
+			ktime = float(m.group('ktime'))
+			if ktime > self.dmesgstart:
+				op.write(line)
+		fp.close()
+		op.close()
+	def addFtraceFilterFunctions(self, file):
+		fp = open(file)
+		list = fp.read().split('\n')
+		fp.close()
+		for i in list:
+			if len(i) < 2:
+				continue
+			self.tracefuncs[i] = dict()
+	def getFtraceFilterFunctions(self, current):
+		rootCheck(True)
+		if not current:
+			os.system('cat '+self.tpath+'available_filter_functions')
+			return
+		fp = open(self.tpath+'available_filter_functions')
+		master = fp.read().split('\n')
+		fp.close()
+		if len(self.debugfuncs) > 0:
+			for i in self.debugfuncs:
+				if i in master:
+					print i
+				else:
+					print self.colorText(i)
+		else:
+			for i in self.tracefuncs:
+				if 'func' in self.tracefuncs[i]:
+					i = self.tracefuncs[i]['func']
+				if i in master:
+					print i
+				else:
+					print self.colorText(i)
+	def setFtraceFilterFunctions(self, list):
+		fp = open(self.tpath+'available_filter_functions')
+		master = fp.read().split('\n')
+		fp.close()
+		flist = ''
+		for i in list:
+			if i not in master:
+				continue
+			if ' [' in i:
+				flist += i.split(' ')[0]+'\n'
+			else:
+				flist += i+'\n'
+		fp = open(self.tpath+'set_graph_function', 'w')
+		fp.write(flist)
+		fp.close()
+	def kprobeMatch(self, name, target):
+		if name not in self.kprobes:
+			return False
+		if re.match(self.kprobes[name]['mask'], target):
+			return True
+		return False
+	def basicKprobe(self, name):
+		self.kprobes[name] = {'name': name,'func': name,'args': dict(),'format': name,'mask': name}
+	def defaultKprobe(self, name, kdata):
+		k = kdata
+		for field in ['name', 'format', 'mask', 'func']:
+			if field not in k:
+				k[field] = name
+		archargs = 'args_'+platform.machine()
+		if archargs in k:
+			k['args'] = k[archargs]
+		else:
+			k['args'] = dict()
+			k['format'] = name
+		self.kprobes[name] = k
+	def kprobeColor(self, name):
+		if name not in self.kprobes or 'color' not in self.kprobes[name]:
+			return ''
+		return self.kprobes[name]['color']
+	def kprobeDisplayName(self, name, dataraw):
+		if name not in self.kprobes:
+			self.basicKprobe(name)
+		data = ''
+		quote=0
+		# first remvoe any spaces inside quotes, and the quotes
+		for c in dataraw:
+			if c == '"':
+				quote = (quote + 1) % 2
+			if quote and c == ' ':
+				data += '_'
+			elif c != '"':
+				data += c
+		fmt, args = self.kprobes[name]['format'], self.kprobes[name]['args']
+		arglist = dict()
+		# now process the args
+		for arg in sorted(args):
+			arglist[arg] = ''
+			m = re.match('.* '+arg+'=(?P<arg>.*) ', data);
+			if m:
+				arglist[arg] = m.group('arg')
+			else:
+				m = re.match('.* '+arg+'=(?P<arg>.*)', data);
+				if m:
+					arglist[arg] = m.group('arg')
+		out = fmt.format(**arglist)
+		out = out.replace(' ', '_').replace('"', '')
+		return out
+	def kprobeText(self, kprobe):
+		name, fmt, func, args = kprobe['name'], kprobe['format'], kprobe['func'], kprobe['args']
+		if re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', func):
+			doError('Kprobe "%s" has format info in the function name "%s"' % (name, func), False)
+		for arg in re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', fmt):
+			if arg not in args:
+				doError('Kprobe "%s" is missing argument "%s"' % (name, arg), False)
+		val = 'p:%s_cal %s' % (name, func)
+		for i in sorted(args):
+			val += ' %s=%s' % (i, args[i])
+		val += '\nr:%s_ret %s $retval\n' % (name, func)
+		return val
+	def addKprobes(self):
+		# first test each kprobe
+		print('INITIALIZING KPROBES...')
+		rejects = []
+		for name in sorted(self.kprobes):
+			if not self.testKprobe(self.kprobes[name]):
+				rejects.append(name)
+		# remove all failed ones from the list
+		for name in rejects:
+			vprint('Skipping KPROBE: %s' % name)
+			self.kprobes.pop(name)
+		self.fsetVal('', 'kprobe_events')
+		kprobeevents = ''
+		# set the kprobes all at once
+		for kp in self.kprobes:
+			val = self.kprobeText(self.kprobes[kp])
+			vprint('Adding KPROBE: %s\n%s' % (kp, val.strip()))
+			kprobeevents += self.kprobeText(self.kprobes[kp])
+		self.fsetVal(kprobeevents, 'kprobe_events')
+		# verify that the kprobes were set as ordered
+		check = self.fgetVal('kprobe_events')
+		linesout = len(kprobeevents.split('\n'))
+		linesack = len(check.split('\n'))
+		if linesack < linesout:
+			# if not, try appending the kprobes 1 by 1
+			for kp in self.kprobes:
+				kprobeevents = self.kprobeText(self.kprobes[kp])
+				self.fsetVal(kprobeevents, 'kprobe_events', 'a')
+		self.fsetVal('1', 'events/kprobes/enable')
+	def testKprobe(self, kprobe):
+		kprobeevents = self.kprobeText(kprobe)
+		if not kprobeevents:
+			return False
+		try:
+			self.fsetVal(kprobeevents, 'kprobe_events')
+			check = self.fgetVal('kprobe_events')
+		except:
+			return False
+		linesout = len(kprobeevents.split('\n'))
+		linesack = len(check.split('\n'))
+		if linesack < linesout:
+			return False
+		return True
+	def fsetVal(self, val, path, mode='w'):
+		file = self.tpath+path
+		if not os.path.exists(file):
+			return False
+		try:
+			fp = open(file, mode)
+			fp.write(val)
+			fp.close()
+		except:
+			pass
+		return True
+	def fgetVal(self, path):
+		file = self.tpath+path
+		res = ''
+		if not os.path.exists(file):
+			return res
+		try:
+			fp = open(file, 'r')
+			res = fp.read()
+			fp.close()
+		except:
+			pass
+		return res
+	def cleanupFtrace(self):
+		if(self.usecallgraph or self.usetraceevents):
+			self.fsetVal('0', 'events/kprobes/enable')
+			self.fsetVal('', 'kprobe_events')
+	def setupAllKprobes(self):
+		for name in self.tracefuncs:
+			self.defaultKprobe(name, self.tracefuncs[name])
+		for name in self.dev_tracefuncs:
+			self.defaultKprobe(name, self.dev_tracefuncs[name])
+	def isCallgraphFunc(self, name):
+		if len(self.debugfuncs) < 1 and self.suspendmode == 'command':
+			return True
+		if name in self.debugfuncs:
+			return True
+		funclist = []
+		for i in self.tracefuncs:
+			if 'func' in self.tracefuncs[i]:
+				funclist.append(self.tracefuncs[i]['func'])
+			else:
+				funclist.append(i)
+		if name in funclist:
+			return True
+		return False
+	def initFtrace(self, testing=False):
+		tp = self.tpath
+		print('INITIALIZING FTRACE...')
+		# turn trace off
+		self.fsetVal('0', 'tracing_on')
+		self.cleanupFtrace()
+		# set the trace clock to global
+		self.fsetVal('global', 'trace_clock')
+		# set trace buffer to a huge value
+		self.fsetVal('nop', 'current_tracer')
+		self.fsetVal('100000', 'buffer_size_kb')
+		# go no further if this is just a status check
+		if testing:
+			return
+		if self.usekprobes:
+			# add tracefunc kprobes so long as were not using full callgraph
+			if(not self.usecallgraph or len(self.debugfuncs) > 0):
+				for name in self.tracefuncs:
+					self.defaultKprobe(name, self.tracefuncs[name])
+				if self.usedevsrc:
+					for name in self.dev_tracefuncs:
+						self.defaultKprobe(name, self.dev_tracefuncs[name])
+			else:
+				self.usedevsrc = False
+			self.addKprobes()
+		# initialize the callgraph trace, unless this is an x2 run
+		if(self.usecallgraph):
+			# set trace type
+			self.fsetVal('function_graph', 'current_tracer')
+			self.fsetVal('', 'set_ftrace_filter')
+			# set trace format options
+			self.fsetVal('print-parent', 'trace_options')
+			self.fsetVal('funcgraph-abstime', 'trace_options')
+			self.fsetVal('funcgraph-cpu', 'trace_options')
+			self.fsetVal('funcgraph-duration', 'trace_options')
+			self.fsetVal('funcgraph-proc', 'trace_options')
+			self.fsetVal('funcgraph-tail', 'trace_options')
+			self.fsetVal('nofuncgraph-overhead', 'trace_options')
+			self.fsetVal('context-info', 'trace_options')
+			self.fsetVal('graph-time', 'trace_options')
+			self.fsetVal('0', 'max_graph_depth')
+			if len(self.debugfuncs) > 0:
+				self.setFtraceFilterFunctions(self.debugfuncs)
+			elif self.suspendmode == 'command':
+				self.fsetVal('', 'set_graph_function')
+			else:
+				cf = ['dpm_run_callback']
+				if(self.usetraceeventsonly):
+					cf += ['dpm_prepare', 'dpm_complete']
+				for fn in self.tracefuncs:
+					if 'func' in self.tracefuncs[fn]:
+						cf.append(self.tracefuncs[fn]['func'])
+					else:
+						cf.append(fn)
+				self.setFtraceFilterFunctions(cf)
+		if(self.usetraceevents):
+			# turn trace events on
+			events = iter(self.traceevents)
+			for e in events:
+				self.fsetVal('1', 'events/power/'+e+'/enable')
+		# clear the trace buffer
+		self.fsetVal('', 'trace')
+	def verifyFtrace(self):
+		# files needed for any trace data
+		files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock',
+				 'trace_marker', 'trace_options', 'tracing_on']
+		# files needed for callgraph trace data
+		tp = self.tpath
+		if(self.usecallgraph):
+			files += [
+				'available_filter_functions',
+				'set_ftrace_filter',
+				'set_graph_function'
+			]
+		for f in files:
+			if(os.path.exists(tp+f) == False):
+				return False
+		return True
+	def verifyKprobes(self):
+		# files needed for kprobes to work
+		files = ['kprobe_events', 'events']
+		tp = self.tpath
+		for f in files:
+			if(os.path.exists(tp+f) == False):
+				return False
+		return True
+	def colorText(self, str):
+		if not self.ansi:
+			return str
+		return '\x1B[31;40m'+str+'\x1B[m'
 
 sysvals = SystemValues()
 
+# Class: DevProps
+# Description:
+#	 Simple class which holds property values collected
+#	 for all the devices used in the timeline.
+class DevProps:
+	syspath = ''
+	altname = ''
+	async = True
+	xtraclass = ''
+	xtrainfo = ''
+	def out(self, dev):
+		return '%s,%s,%d;' % (dev, self.altname, self.async)
+	def debug(self, dev):
+		print '%s:\n\taltname = %s\n\t  async = %s' % (dev, self.altname, self.async)
+	def altName(self, dev):
+		if not self.altname or self.altname == dev:
+			return dev
+		return '%s [%s]' % (self.altname, dev)
+	def xtraClass(self):
+		if self.xtraclass:
+			return ' '+self.xtraclass
+		if not self.async:
+			return ' sync'
+		return ''
+	def xtraInfo(self):
+		if self.xtraclass:
+			return ' '+self.xtraclass
+		if self.async:
+			return ' async'
+		return ' sync'
+
 # Class: DeviceNode
 # Description:
 #	 A container used to create a device hierachy, with a single root node
@@ -228,6 +681,7 @@ class Data:
 	html_device_id = 0
 	stamp = 0
 	outfile = ''
+	dev_ubiquitous = ['msleep', 'udelay']
 	def __init__(self, num):
 		idchar = 'abcdefghijklmnopqrstuvwxyz'
 		self.testnumber = num
@@ -257,6 +711,9 @@ class Data:
 								'row': 0, 'color': '#FFFFCC', 'order': 9}
 		}
 		self.phases = self.sortedPhases()
+		self.devicegroups = []
+		for phase in self.phases:
+			self.devicegroups.append([phase])
 	def getStart(self):
 		return self.dmesg[self.phases[0]]['start']
 	def setStart(self, time):
@@ -273,51 +730,61 @@ class Data:
 			for dev in list:
 				d = list[dev]
 				if(d['pid'] == pid and time >= d['start'] and
-					time <= d['end']):
+					time < d['end']):
 					return False
 		return True
-	def addIntraDevTraceEvent(self, action, name, pid, time):
-		if(action == 'mutex_lock_try'):
-			color = 'red'
-		elif(action == 'mutex_lock_pass'):
-			color = 'green'
-		elif(action == 'mutex_unlock'):
-			color = 'blue'
-		else:
-			# create separate colors based on the name
-			v1 = len(name)*10 % 256
-			v2 = string.count(name, 'e')*100 % 256
-			v3 = ord(name[0])*20 % 256
-			color = '#%06X' % ((v1*0x10000) + (v2*0x100) + v3)
-		for phase in self.phases:
+	def targetDevice(self, phaselist, start, end, pid=-1):
+		tgtdev = ''
+		for phase in phaselist:
 			list = self.dmesg[phase]['list']
-			for dev in list:
-				d = list[dev]
-				if(d['pid'] == pid and time >= d['start'] and
-					time <= d['end']):
-					e = TraceEvent(action, name, color, time)
-					if('traceevents' not in d):
-						d['traceevents'] = []
-					d['traceevents'].append(e)
-					return d
-					break
-		return 0
-	def capIntraDevTraceEvent(self, action, name, pid, time):
-		for phase in self.phases:
-			list = self.dmesg[phase]['list']
-			for dev in list:
-				d = list[dev]
-				if(d['pid'] == pid and time >= d['start'] and
-					time <= d['end']):
-					if('traceevents' not in d):
-						return
-					for e in d['traceevents']:
-						if(e.action == action and
-							e.name == name and not e.ready):
-							e.length = time - e.time
-							e.ready = True
-							break
-					return
+			for devname in list:
+				dev = list[devname]
+				if(pid >= 0 and dev['pid'] != pid):
+					continue
+				devS = dev['start']
+				devE = dev['end']
+				if(start < devS or start >= devE or end <= devS or end > devE):
+					continue
+				tgtdev = dev
+				break
+		return tgtdev
+	def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata):
+		machstart = self.dmesg['suspend_machine']['start']
+		machend = self.dmesg['resume_machine']['end']
+		tgtdev = self.targetDevice(self.phases, start, end, pid)
+		if not tgtdev and start >= machstart and end < machend:
+			# device calls in machine phases should be serial
+			tgtdev = self.targetDevice(['suspend_machine', 'resume_machine'], start, end)
+		if not tgtdev:
+			if 'scsi_eh' in proc:
+				self.newActionGlobal(proc, start, end, pid)
+				self.addDeviceFunctionCall(displayname, kprobename, proc, pid, start, end, cdata, rdata)
+			else:
+				vprint('IGNORE: %s[%s](%d) [%f - %f] | %s | %s | %s' % (displayname, kprobename,
+					pid, start, end, cdata, rdata, proc))
+			return False
+		# detail block fits within tgtdev
+		if('src' not in tgtdev):
+			tgtdev['src'] = []
+		title = cdata+' '+rdata
+		mstr = '\(.*\) *(?P<args>.*) *\((?P<caller>.*)\+.* arg1=(?P<ret>.*)'
+		m = re.match(mstr, title)
+		if m:
+			c = m.group('caller')
+			a = m.group('args').strip()
+			r = m.group('ret')
+			if len(r) > 6:
+				r = ''
+			else:
+				r = 'ret=%s ' % r
+			l = '%0.3fms' % ((end - start) * 1000)
+			if kprobename in self.dev_ubiquitous:
+				title = '%s(%s) <- %s, %s(%s)' % (displayname, a, c, r, l)
+			else:
+				title = '%s(%s) %s(%s)' % (displayname, a, r, l)
+		e = TraceEvent(title, kprobename, start, end - start)
+		tgtdev['src'].append(e)
+		return True
 	def trimTimeVal(self, t, t0, dT, left):
 		if left:
 			if(t > t0):
@@ -353,11 +820,11 @@ class Data:
 					cg.end = self.trimTimeVal(cg.end, t0, dT, left)
 					for line in cg.list:
 						line.time = self.trimTimeVal(line.time, t0, dT, left)
-				if('traceevents' in d):
-					for e in d['traceevents']:
+				if('src' in d):
+					for e in d['src']:
 						e.time = self.trimTimeVal(e.time, t0, dT, left)
 	def normalizeTime(self, tZero):
-		# first trim out any standby or freeze clock time
+		# trim out any standby or freeze clock time
 		if(self.tSuspended != self.tResumed):
 			if(self.tResumed > tZero):
 				self.trimTime(self.tSuspended, \
@@ -365,29 +832,6 @@ class Data:
 			else:
 				self.trimTime(self.tSuspended, \
 					self.tResumed-self.tSuspended, False)
-		# shift the timeline so that tZero is the new 0
-		self.tSuspended -= tZero
-		self.tResumed -= tZero
-		self.start -= tZero
-		self.end -= tZero
-		for phase in self.phases:
-			p = self.dmesg[phase]
-			p['start'] -= tZero
-			p['end'] -= tZero
-			list = p['list']
-			for name in list:
-				d = list[name]
-				d['start'] -= tZero
-				d['end'] -= tZero
-				if('ftrace' in d):
-					cg = d['ftrace']
-					cg.start -= tZero
-					cg.end -= tZero
-					for line in cg.list:
-						line.time -= tZero
-				if('traceevents' in d):
-					for e in d['traceevents']:
-						e.time -= tZero
 	def newPhaseWithSingleAction(self, phasename, devname, start, end, color):
 		for phase in self.phases:
 			self.dmesg[phase]['order'] += 1
@@ -417,6 +861,7 @@ class Data:
 			{'list': list, 'start': start, 'end': end,
 			'row': 0, 'color': color, 'order': order}
 		self.phases = self.sortedPhases()
+		self.devicegroups.append([phasename])
 	def setPhase(self, phase, ktime, isbegin):
 		if(isbegin):
 			self.dmesg[phase]['start'] = ktime
@@ -442,7 +887,10 @@ class Data:
 		for devname in phaselist:
 			dev = phaselist[devname]
 			if(dev['end'] < 0):
-				dev['end'] = end
+				for p in self.phases:
+					if self.dmesg[p]['end'] > dev['start']:
+						dev['end'] = self.dmesg[p]['end']
+						break
 				vprint('%s (%s): callback didnt return' % (devname, phase))
 	def deviceFilter(self, devicefilter):
 		# remove all by the relatives of the filter devnames
@@ -472,22 +920,58 @@ class Data:
 		# if any calls never returned, clip them at system resume end
 		for phase in self.phases:
 			self.fixupInitcalls(phase, self.getEnd())
-	def newActionGlobal(self, name, start, end):
+	def isInsideTimeline(self, start, end):
+		if(self.start <= start and self.end > start):
+			return True
+		return False
+	def phaseOverlap(self, phases):
+		rmgroups = []
+		newgroup = []
+		for group in self.devicegroups:
+			for phase in phases:
+				if phase not in group:
+					continue
+				for p in group:
+					if p not in newgroup:
+						newgroup.append(p)
+				if group not in rmgroups:
+					rmgroups.append(group)
+		for group in rmgroups:
+			self.devicegroups.remove(group)
+		self.devicegroups.append(newgroup)
+	def newActionGlobal(self, name, start, end, pid=-1, color=''):
+		# if event starts before timeline start, expand timeline
+		if(start < self.start):
+			self.setStart(start)
+		# if event ends after timeline end, expand the timeline
+		if(end > self.end):
+			self.setEnd(end)
 		# which phase is this device callback or action "in"
 		targetphase = "none"
+		htmlclass = ''
 		overlap = 0.0
+		phases = []
 		for phase in self.phases:
 			pstart = self.dmesg[phase]['start']
 			pend = self.dmesg[phase]['end']
 			o = max(0, min(end, pend) - max(start, pstart))
-			if(o > overlap):
+			if o > 0:
+				phases.append(phase)
+			if o > overlap:
+				if overlap > 0 and phase == 'post_resume':
+					continue
 				targetphase = phase
 				overlap = o
+		if pid == -2:
+			htmlclass = ' bg'
+		if len(phases) > 1:
+			htmlclass = ' bg'
+			self.phaseOverlap(phases)
 		if targetphase in self.phases:
-			self.newAction(targetphase, name, -1, '', start, end, '')
-			return True
+			newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color)
+			return (targetphase, newname)
 		return False
-	def newAction(self, phase, name, pid, parent, start, end, drv):
+	def newAction(self, phase, name, pid, parent, start, end, drv, htmlclass='', color=''):
 		# new device callback for a specific phase
 		self.html_device_id += 1
 		devid = '%s%d' % (self.idstr, self.html_device_id)
@@ -495,8 +979,19 @@ class Data:
 		length = -1.0
 		if(start >= 0 and end >= 0):
 			length = end - start
+		if pid == -2:
+			i = 2
+			origname = name
+			while(name in list):
+				name = '%s[%d]' % (origname, i)
+				i += 1
 		list[name] = {'start': start, 'end': end, 'pid': pid, 'par': parent,
 					  'length': length, 'row': 0, 'id': devid, 'drv': drv }
+		if htmlclass:
+			list[name]['htmlclass'] = htmlclass
+		if color:
+			list[name]['color'] = color
+		return name
 	def deviceIDs(self, devlist, phase):
 		idlist = []
 		list = self.dmesg[phase]['list']
@@ -536,10 +1031,21 @@ class Data:
 			vprint('    %16s: %f - %f (%d devices)' % (phase, \
 				self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc))
 		vprint('            test end: %f' % self.end)
+	def deviceChildrenAllPhases(self, devname):
+		devlist = []
+		for phase in self.phases:
+			list = self.deviceChildren(devname, phase)
+			for dev in list:
+				if dev not in devlist:
+					devlist.append(dev)
+		return devlist
 	def masterTopology(self, name, list, depth):
 		node = DeviceNode(name, depth)
 		for cname in list:
-			clist = self.deviceChildren(cname, 'resume')
+			# avoid recursions
+			if name == cname:
+				continue
+			clist = self.deviceChildrenAllPhases(cname)
 			cnode = self.masterTopology(cname, clist, depth+1)
 			node.children.append(cnode)
 		return node
@@ -580,7 +1086,8 @@ class Data:
 			list = self.dmesg[phase]['list']
 			for dev in list:
 				pdev = list[dev]['par']
-				if(re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)):
+				pid = list[dev]['pid']
+				if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)):
 					continue
 				if pdev and pdev not in real and pdev not in rootlist:
 					rootlist.append(pdev)
@@ -589,22 +1096,33 @@ class Data:
 		rootlist = self.rootDeviceList()
 		master = self.masterTopology('', rootlist, 0)
 		return self.printTopology(master)
+	def selectTimelineDevices(self, widfmt, tTotal, mindevlen):
+		# only select devices that will actually show up in html
+		self.tdevlist = dict()
+		for phase in self.dmesg:
+			devlist = []
+			list = self.dmesg[phase]['list']
+			for dev in list:
+				length = (list[dev]['end'] - list[dev]['start']) * 1000
+				width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal)
+				if width != '0.000000' and length >= mindevlen:
+					devlist.append(dev)
+			self.tdevlist[phase] = devlist
 
 # Class: TraceEvent
 # Description:
 #	 A container for trace event data found in the ftrace file
 class TraceEvent:
-	ready = False
-	name = ''
+	text = ''
 	time = 0.0
-	color = '#FFFFFF'
 	length = 0.0
-	action = ''
-	def __init__(self, a, n, c, t):
-		self.action = a
-		self.name = n
-		self.color = c
+	title = ''
+	row = 0
+	def __init__(self, a, n, t, l):
+		self.title = a
+		self.text = n
 		self.time = t
+		self.length = l
 
 # Class: FTraceLine
 # Description:
@@ -623,11 +1141,14 @@ class FTraceLine:
 	fcall = False
 	freturn = False
 	fevent = False
+	fkprobe = False
 	depth = 0
 	name = ''
 	type = ''
-	def __init__(self, t, m, d):
+	def __init__(self, t, m='', d=''):
 		self.time = float(t)
+		if not m and not d:
+			return
 		# is this a trace event
 		if(d == 'traceevent' or re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m)):
 			if(d == 'traceevent'):
@@ -644,6 +1165,18 @@ class FTraceLine:
 				self.type = emm.group('call')
 			else:
 				self.name = msg
+			km = re.match('^(?P<n>.*)_cal$', self.type)
+			if km:
+				self.fcall = True
+				self.fkprobe = True
+				self.type = km.group('n')
+				return
+			km = re.match('^(?P<n>.*)_ret$', self.type)
+			if km:
+				self.freturn = True
+				self.fkprobe = True
+				self.type = km.group('n')
+				return
 			self.fevent = True
 			return
 		# convert the duration to seconds
@@ -662,7 +1195,7 @@ class FTraceLine:
 				# includes comment with function name
 				match = re.match('^} *\/\* *(?P<n>.*) *\*\/$', m)
 				if(match):
-					self.name = match.group('n')
+					self.name = match.group('n').strip()
 		# function call
 		else:
 			self.fcall = True
@@ -670,19 +1203,19 @@ class FTraceLine:
 			if(m[-1] == '{'):
 				match = re.match('^(?P<n>.*) *\(.*', m)
 				if(match):
-					self.name = match.group('n')
+					self.name = match.group('n').strip()
 			# function call with no children (leaf)
 			elif(m[-1] == ';'):
 				self.freturn = True
 				match = re.match('^(?P<n>.*) *\(.*', m)
 				if(match):
-					self.name = match.group('n')
+					self.name = match.group('n').strip()
 			# something else (possibly a trace marker)
 			else:
 				self.name = m
 	def getDepth(self, str):
 		return len(str)/2
-	def debugPrint(self, dev):
+	def debugPrint(self, dev=''):
 		if(self.freturn and self.fcall):
 			print('%s -- %f (%02d): %s(); (%.3f us)' % (dev, self.time, \
 				self.depth, self.name, self.length*1000000))
@@ -692,6 +1225,33 @@ class FTraceLine:
 		else:
 			print('%s -- %f (%02d): %s() { (%.3f us)' % (dev, self.time, \
 				self.depth, self.name, self.length*1000000))
+	def startMarker(self):
+		global sysvals
+		# Is this the starting line of a suspend?
+		if not self.fevent:
+			return False
+		if sysvals.usetracemarkers:
+			if(self.name == 'SUSPEND START'):
+				return True
+			return False
+		else:
+			if(self.type == 'suspend_resume' and
+				re.match('suspend_enter\[.*\] begin', self.name)):
+				return True
+			return False
+	def endMarker(self):
+		# Is this the ending line of a resume?
+		if not self.fevent:
+			return False
+		if sysvals.usetracemarkers:
+			if(self.name == 'RESUME COMPLETE'):
+				return True
+			return False
+		else:
+			if(self.type == 'suspend_resume' and
+				re.match('thaw_processes\[.*\] end', self.name)):
+				return True
+			return False
 
 # Class: FTraceCallGraph
 # Description:
@@ -705,54 +1265,124 @@ class FTraceCallGraph:
 	list = []
 	invalid = False
 	depth = 0
-	def __init__(self):
+	pid = 0
+	def __init__(self, pid):
 		self.start = -1.0
 		self.end = -1.0
 		self.list = []
 		self.depth = 0
-	def setDepth(self, line):
+		self.pid = pid
+	def addLine(self, line, debug=False):
+		# if this is already invalid, just leave
+		if(self.invalid):
+			return False
+		# invalidate on too much data or bad depth
+		if(len(self.list) >= 1000000 or self.depth < 0):
+			self.invalidate(line)
+			return False
+		# compare current depth with this lines pre-call depth
+		prelinedep = line.depth
+		if(line.freturn and not line.fcall):
+			prelinedep += 1
+		last = 0
+		lasttime = line.time
+		virtualfname = 'execution_misalignment'
+		if len(self.list) > 0:
+			last = self.list[-1]
+			lasttime = last.time
+		# handle low misalignments by inserting returns
+		if prelinedep < self.depth:
+			if debug and last:
+				print '-------- task %d --------' % self.pid
+				last.debugPrint()
+			idx = 0
+			# add return calls to get the depth down
+			while prelinedep < self.depth:
+				if debug:
+					print 'MISALIGN LOW (add returns): C%d - eC%d' % (self.depth, prelinedep)
+				self.depth -= 1
+				if idx == 0 and last and last.fcall and not last.freturn:
+					# special case, turn last call into a leaf
+					last.depth = self.depth
+					last.freturn = True
+					last.length = line.time - last.time
+					if debug:
+						last.debugPrint()
+				else:
+					vline = FTraceLine(lasttime)
+					vline.depth = self.depth
+					vline.name = virtualfname
+					vline.freturn = True
+					self.list.append(vline)
+					if debug:
+						vline.debugPrint()
+				idx += 1
+			if debug:
+				line.debugPrint()
+				print ''
+		# handle high misalignments by inserting calls
+		elif prelinedep > self.depth:
+			if debug and last:
+				print '-------- task %d --------' % self.pid
+				last.debugPrint()
+			idx = 0
+			# add calls to get the depth up
+			while prelinedep > self.depth:
+				if debug:
+					print 'MISALIGN HIGH (add calls): C%d - eC%d' % (self.depth, prelinedep)
+				if idx == 0 and line.freturn and not line.fcall:
+					# special case, turn this return into a leaf
+					line.fcall = True
+					prelinedep -= 1
+				else:
+					vline = FTraceLine(lasttime)
+					vline.depth = self.depth
+					vline.name = virtualfname
+					vline.fcall = True
+					if debug:
+						vline.debugPrint()
+					self.list.append(vline)
+					self.depth += 1
+					if not last:
+						self.start = vline.time
+				idx += 1
+			if debug:
+				line.debugPrint()
+				print ''
+		# process the call and set the new depth
 		if(line.fcall and not line.freturn):
-			line.depth = self.depth
 			self.depth += 1
 		elif(line.freturn and not line.fcall):
 			self.depth -= 1
-			line.depth = self.depth
-		else:
-			line.depth = self.depth
-	def addLine(self, line, match):
-		if(not self.invalid):
-			self.setDepth(line)
+		if len(self.list) < 1:
+			self.start = line.time
+		self.list.append(line)
 		if(line.depth == 0 and line.freturn):
 			if(self.start < 0):
 				self.start = line.time
 			self.end = line.time
-			self.list.append(line)
+			if line.fcall:
+				self.end += line.length
+			if self.list[0].name == virtualfname:
+				self.invalid = True
 			return True
-		if(self.invalid):
-			return False
-		if(len(self.list) >= 1000000 or self.depth < 0):
-			if(len(self.list) > 0):
-				first = self.list[0]
-				self.list = []
-				self.list.append(first)
-			self.invalid = True
-			if(not match):
-				return False
-			id = 'task %s cpu %s' % (match.group('pid'), match.group('cpu'))
-			window = '(%f - %f)' % (self.start, line.time)
-			if(self.depth < 0):
-				print('Too much data for '+id+\
-					' (buffer overflow), ignoring this callback')
-			else:
-				print('Too much data for '+id+\
-					' '+window+', ignoring this callback')
-			return False
-		self.list.append(line)
-		if(self.start < 0):
-			self.start = line.time
 		return False
+	def invalidate(self, line):
+		if(len(self.list) > 0):
+			first = self.list[0]
+			self.list = []
+			self.list.append(first)
+		self.invalid = True
+		id = 'task %s' % (self.pid)
+		window = '(%f - %f)' % (self.start, line.time)
+		if(self.depth < 0):
+			vprint('Too much data for '+id+\
+				' (buffer overflow), ignoring this callback')
+		else:
+			vprint('Too much data for '+id+\
+				' '+window+', ignoring this callback')
 	def slice(self, t0, tN):
-		minicg = FTraceCallGraph()
+		minicg = FTraceCallGraph(0)
 		count = -1
 		firstdepth = 0
 		for l in self.list:
@@ -764,13 +1394,26 @@ class FTraceCallGraph:
 				firstdepth = l.depth
 				count = 0
 			l.depth -= firstdepth
-			minicg.addLine(l, 0)
+			minicg.addLine(l)
 			if((count == 0 and l.freturn and l.fcall) or
 				(count > 0 and l.depth <= 0)):
 				break
 			count += 1
 		return minicg
-	def sanityCheck(self):
+	def repair(self, enddepth):
+		# bring the depth back to 0 with additional returns
+		fixed = False
+		last = self.list[-1]
+		for i in reversed(range(enddepth)):
+			t = FTraceLine(last.time)
+			t.depth = i
+			t.freturn = True
+			fixed = self.addLine(t)
+			if fixed:
+				self.end = last.time
+				return True
+		return False
+	def postProcess(self, debug=False):
 		stack = dict()
 		cnt = 0
 		for l in self.list:
@@ -779,98 +1422,317 @@ class FTraceCallGraph:
 				cnt += 1
 			elif(l.freturn and not l.fcall):
 				if(l.depth not in stack):
+					if debug:
+						print 'Post Process Error: Depth missing'
+						l.debugPrint()
 					return False
+				# transfer total time from return line to call line
 				stack[l.depth].length = l.length
-				stack[l.depth] = 0
+				stack.pop(l.depth)
 				l.length = 0
 				cnt -= 1
 		if(cnt == 0):
+			# trace caught the whole call tree
 			return True
-		return False
-	def debugPrint(self, filename):
-		if(filename == 'stdout'):
-			print('[%f - %f]') % (self.start, self.end)
-			for l in self.list:
-				if(l.freturn and l.fcall):
-					print('%f (%02d): %s(); (%.3f us)' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-				elif(l.freturn):
-					print('%f (%02d): %s} (%.3f us)' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-				else:
-					print('%f (%02d): %s() { (%.3f us)' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-			print(' ')
-		else:
-			fp = open(filename, 'w')
-			print(filename)
-			for l in self.list:
-				if(l.freturn and l.fcall):
-					fp.write('%f (%02d): %s(); (%.3f us)\n' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-				elif(l.freturn):
-					fp.write('%f (%02d): %s} (%.3f us)\n' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-				else:
-					fp.write('%f (%02d): %s() { (%.3f us)\n' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-			fp.close()
+		elif(cnt < 0):
+			if debug:
+				print 'Post Process Error: Depth is less than 0'
+			return False
+		# trace ended before call tree finished
+		return self.repair(cnt)
+	def deviceMatch(self, pid, data):
+		found = False
+		# add the callgraph data to the device hierarchy
+		borderphase = {
+			'dpm_prepare': 'suspend_prepare',
+			'dpm_complete': 'resume_complete'
+		}
+		if(self.list[0].name in borderphase):
+			p = borderphase[self.list[0].name]
+			list = data.dmesg[p]['list']
+			for devname in list:
+				dev = list[devname]
+				if(pid == dev['pid'] and
+					self.start <= dev['start'] and
+					self.end >= dev['end']):
+					dev['ftrace'] = self.slice(dev['start'], dev['end'])
+					found = True
+			return found
+		for p in data.phases:
+			if(data.dmesg[p]['start'] <= self.start and
+				self.start <= data.dmesg[p]['end']):
+				list = data.dmesg[p]['list']
+				for devname in list:
+					dev = list[devname]
+					if(pid == dev['pid'] and
+						self.start <= dev['start'] and
+						self.end >= dev['end']):
+						dev['ftrace'] = self
+						found = True
+						break
+				break
+		return found
+	def newActionFromFunction(self, data):
+		name = self.list[0].name
+		if name in ['dpm_run_callback', 'dpm_prepare', 'dpm_complete']:
+			return
+		fs = self.start
+		fe = self.end
+		if fs < data.start or fe > data.end:
+			return
+		phase = ''
+		for p in data.phases:
+			if(data.dmesg[p]['start'] <= self.start and
+				self.start < data.dmesg[p]['end']):
+				phase = p
+				break
+		if not phase:
+			return
+		out = data.newActionGlobal(name, fs, fe, -2)
+		if out:
+			phase, myname = out
+			data.dmesg[phase]['list'][myname]['ftrace'] = self
+	def debugPrint(self):
+		print('[%f - %f] %s (%d)') % (self.start, self.end, self.list[0].name, self.pid)
+		for l in self.list:
+			if(l.freturn and l.fcall):
+				print('%f (%02d): %s(); (%.3f us)' % (l.time, \
+					l.depth, l.name, l.length*1000000))
+			elif(l.freturn):
+				print('%f (%02d): %s} (%.3f us)' % (l.time, \
+					l.depth, l.name, l.length*1000000))
+			else:
+				print('%f (%02d): %s() { (%.3f us)' % (l.time, \
+					l.depth, l.name, l.length*1000000))
+		print(' ')
 
 # Class: Timeline
 # Description:
-#	 A container for a suspend/resume html timeline. In older versions
-#	 of the script there were multiple timelines, but in the latest
-#	 there is only one.
+#	 A container for a device timeline which calculates
+#	 all the html properties to display it correctly
 class Timeline:
 	html = {}
-	scaleH = 0.0 # height of the row as a percent of the timeline height
-	rowH = 0.0 # height of each row in percent of the timeline height
-	row_height_pixels = 30
-	maxrows = 0
-	height = 0
-	def __init__(self):
+	height = 0	# total timeline height
+	scaleH = 20	# timescale (top) row height
+	rowH = 30	# device row height
+	bodyH = 0	# body height
+	rows = 0	# total timeline rows
+	phases = []
+	rowmaxlines = dict()
+	rowcount = dict()
+	rowheight = dict()
+	def __init__(self, rowheight):
+		self.rowH = rowheight
 		self.html = {
+			'header': '',
 			'timeline': '',
 			'legend': '',
-			'scale': ''
 		}
-	def setRows(self, rows):
-		self.maxrows = int(rows)
-		self.scaleH = 100.0/float(self.maxrows)
-		self.height = self.maxrows*self.row_height_pixels
-		r = float(self.maxrows - 1)
-		if(r < 1.0):
-			r = 1.0
-		self.rowH = (100.0 - self.scaleH)/r
+	# Function: getDeviceRows
+	# Description:
+	#    determine how may rows the device funcs will take
+	# Arguments:
+	#	 rawlist: the list of devices/actions for a single phase
+	# Output:
+	#	 The total number of rows needed to display this phase of the timeline
+	def getDeviceRows(self, rawlist):
+		# clear all rows and set them to undefined
+		lendict = dict()
+		for item in rawlist:
+			item.row = -1
+			lendict[item] = item.length
+		list = []
+		for i in sorted(lendict, key=lendict.get, reverse=True):
+			list.append(i)
+		remaining = len(list)
+		rowdata = dict()
+		row = 1
+		# try to pack each row with as many ranges as possible
+		while(remaining > 0):
+			if(row not in rowdata):
+				rowdata[row] = []
+			for i in list:
+				if(i.row >= 0):
+					continue
+				s = i.time
+				e = i.time + i.length
+				valid = True
+				for ritem in rowdata[row]:
+					rs = ritem.time
+					re = ritem.time + ritem.length
+					if(not (((s <= rs) and (e <= rs)) or
+						((s >= re) and (e >= re)))):
+						valid = False
+						break
+				if(valid):
+					rowdata[row].append(i)
+					i.row = row
+					remaining -= 1
+			row += 1
+		return row
+	# Function: getPhaseRows
+	# Description:
+	#	 Organize the timeline entries into the smallest
+	#	 number of rows possible, with no entry overlapping
+	# Arguments:
+	#	 list: the list of devices/actions for a single phase
+	#	 devlist: string list of device names to use
+	# Output:
+	#	 The total number of rows needed to display this phase of the timeline
+	def getPhaseRows(self, dmesg, devlist):
+		# clear all rows and set them to undefined
+		remaining = len(devlist)
+		rowdata = dict()
+		row = 0
+		lendict = dict()
+		myphases = []
+		for item in devlist:
+			if item[0] not in self.phases:
+				self.phases.append(item[0])
+			if item[0] not in myphases:
+				myphases.append(item[0])
+				self.rowmaxlines[item[0]] = dict()
+				self.rowheight[item[0]] = dict()
+			dev = dmesg[item[0]]['list'][item[1]]
+			dev['row'] = -1
+			lendict[item] = float(dev['end']) - float(dev['start'])
+			if 'src' in dev:
+				dev['devrows'] = self.getDeviceRows(dev['src'])
+		lenlist = []
+		for i in sorted(lendict, key=lendict.get, reverse=True):
+			lenlist.append(i)
+		orderedlist = []
+		for item in lenlist:
+			dev = dmesg[item[0]]['list'][item[1]]
+			if dev['pid'] == -2:
+				orderedlist.append(item)
+		for item in lenlist:
+			if item not in orderedlist:
+				orderedlist.append(item)
+		# try to pack each row with as many ranges as possible
+		while(remaining > 0):
+			rowheight = 1
+			if(row not in rowdata):
+				rowdata[row] = []
+			for item in orderedlist:
+				dev = dmesg[item[0]]['list'][item[1]]
+				if(dev['row'] < 0):
+					s = dev['start']
+					e = dev['end']
+					valid = True
+					for ritem in rowdata[row]:
+						rs = ritem['start']
+						re = ritem['end']
+						if(not (((s <= rs) and (e <= rs)) or
+							((s >= re) and (e >= re)))):
+							valid = False
+							break
+					if(valid):
+						rowdata[row].append(dev)
+						dev['row'] = row
+						remaining -= 1
+						if 'devrows' in dev and dev['devrows'] > rowheight:
+							rowheight = dev['devrows']
+			for phase in myphases:
+				self.rowmaxlines[phase][row] = rowheight
+				self.rowheight[phase][row] = rowheight * self.rowH
+			row += 1
+		if(row > self.rows):
+			self.rows = int(row)
+		for phase in myphases:
+			self.rowcount[phase] = row
+		return row
+	def phaseRowHeight(self, phase, row):
+		return self.rowheight[phase][row]
+	def phaseRowTop(self, phase, row):
+		top = 0
+		for i in sorted(self.rowheight[phase]):
+			if i >= row:
+				break
+			top += self.rowheight[phase][i]
+		return top
+	# Function: calcTotalRows
+	# Description:
+	#	 Calculate the heights and offsets for the header and rows
+	def calcTotalRows(self):
+		maxrows = 0
+		standardphases = []
+		for phase in self.phases:
+			total = 0
+			for i in sorted(self.rowmaxlines[phase]):
+				total += self.rowmaxlines[phase][i]
+			if total > maxrows:
+				maxrows = total
+			if total == self.rowcount[phase]:
+				standardphases.append(phase)
+		self.height = self.scaleH + (maxrows*self.rowH)
+		self.bodyH = self.height - self.scaleH
+		for phase in standardphases:
+			for i in sorted(self.rowheight[phase]):
+				self.rowheight[phase][i] = self.bodyH/self.rowcount[phase]
+	# Function: createTimeScale
+	# Description:
+	#	 Create the timescale for a timeline block
+	# Arguments:
+	#	 m0: start time (mode begin)
+	#	 mMax: end time (mode end)
+	#	 tTotal: total timeline time
+	#	 mode: suspend or resume
+	# Output:
+	#	 The html code needed to display the time scale
+	def createTimeScale(self, m0, mMax, tTotal, mode):
+		timescale = '<div class="t" style="right:{0}%">{1}</div>\n'
+		rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">Resume</div>\n'
+		output = '<div class="timescale">\n'
+		# set scale for timeline
+		mTotal = mMax - m0
+		tS = 0.1
+		if(tTotal <= 0):
+			return output+'</div>\n'
+		if(tTotal > 4):
+			tS = 1
+		divTotal = int(mTotal/tS) + 1
+		divEdge = (mTotal - tS*(divTotal-1))*100/mTotal
+		for i in range(divTotal):
+			htmlline = ''
+			if(mode == 'resume'):
+				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal))
+				val = '%0.fms' % (float(i)*tS*1000)
+				htmlline = timescale.format(pos, val)
+				if(i == 0):
+					htmlline = rline
+			else:
+				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge)
+				val = '%0.fms' % (float(i-divTotal+1)*tS*1000)
+				if(i == divTotal - 1):
+					val = 'Suspend'
+				htmlline = timescale.format(pos, val)
+			output += htmlline
+		output += '</div>\n'
+		return output
 
-# Class: TestRun
+# Class: TestProps
 # Description:
-#	 A container for a suspend/resume test run. This is necessary as
-#	 there could be more than one, and they need to be separate.
-class TestRun:
+#	 A list of values describing the properties of these test runs
+class TestProps:
+	stamp = ''
+	tracertype = ''
+	S0i3 = False
+	fwdata = []
 	ftrace_line_fmt_fg = \
 		'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
-		'[ +!]*(?P<dur>[0-9\.]*) .*\|  (?P<msg>.*)'
+		'[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\|  (?P<msg>.*)'
 	ftrace_line_fmt_nop = \
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\
 		'(?P<flags>.{4}) *(?P<time>[0-9\.]*): *'+\
 		'(?P<msg>.*)'
 	ftrace_line_fmt = ftrace_line_fmt_nop
 	cgformat = False
-	ftemp = dict()
-	ttemp = dict()
-	inthepipe = False
-	tracertype = ''
 	data = 0
-	def __init__(self, dataobj):
-		self.data = dataobj
-		self.ftemp = dict()
-		self.ttemp = dict()
-	def isReady(self):
-		if(tracertype == '' or not data):
-			return False
-		return True
+	ktemp = dict()
+	def __init__(self):
+		self.ktemp = dict()
 	def setTracerType(self, tracer):
 		self.tracertype = tracer
 		if(tracer == 'function_graph'):
@@ -881,6 +1743,19 @@ class TestRun:
 		else:
 			doError('Invalid tracer format: [%s]' % tracer, False)
 
+# Class: TestRun
+# Description:
+#	 A container for a suspend/resume test run. This is necessary as
+#	 there could be more than one, and they need to be separate.
+class TestRun:
+	ftemp = dict()
+	ttemp = dict()
+	data = 0
+	def __init__(self, dataobj):
+		self.data = dataobj
+		self.ftemp = dict()
+		self.ttemp = dict()
+
 # ----------------- FUNCTIONS --------------------
 
 # Function: vprint
@@ -893,104 +1768,16 @@ def vprint(msg):
 	if(sysvals.verbose):
 		print(msg)
 
-# Function: initFtrace
-# Description:
-#	 Configure ftrace to use trace events and/or a callgraph
-def initFtrace():
-	global sysvals
-
-	tp = sysvals.tpath
-	cf = 'dpm_run_callback'
-	if(sysvals.usetraceeventsonly):
-		cf = '-e dpm_prepare -e dpm_complete -e dpm_run_callback'
-	if(sysvals.usecallgraph or sysvals.usetraceevents):
-		print('INITIALIZING FTRACE...')
-		# turn trace off
-		os.system('echo 0 > '+tp+'tracing_on')
-		# set the trace clock to global
-		os.system('echo global > '+tp+'trace_clock')
-		# set trace buffer to a huge value
-		os.system('echo nop > '+tp+'current_tracer')
-		os.system('echo 100000 > '+tp+'buffer_size_kb')
-		# initialize the callgraph trace, unless this is an x2 run
-		if(sysvals.usecallgraph and sysvals.execcount == 1):
-			# set trace type
-			os.system('echo function_graph > '+tp+'current_tracer')
-			os.system('echo "" > '+tp+'set_ftrace_filter')
-			# set trace format options
-			os.system('echo funcgraph-abstime > '+tp+'trace_options')
-			os.system('echo funcgraph-proc > '+tp+'trace_options')
-			# focus only on device suspend and resume
-			os.system('cat '+tp+'available_filter_functions | grep '+\
-				cf+' > '+tp+'set_graph_function')
-		if(sysvals.usetraceevents):
-			# turn trace events on
-			events = iter(sysvals.traceevents)
-			for e in events:
-				os.system('echo 1 > '+sysvals.epath+e+'/enable')
-		# clear the trace buffer
-		os.system('echo "" > '+tp+'trace')
-
-# Function: initFtraceAndroid
-# Description:
-#	 Configure ftrace to capture trace events
-def initFtraceAndroid():
-	global sysvals
-
-	tp = sysvals.tpath
-	if(sysvals.usetraceevents):
-		print('INITIALIZING FTRACE...')
-		# turn trace off
-		os.system(sysvals.adb+" shell 'echo 0 > "+tp+"tracing_on'")
-		# set the trace clock to global
-		os.system(sysvals.adb+" shell 'echo global > "+tp+"trace_clock'")
-		# set trace buffer to a huge value
-		os.system(sysvals.adb+" shell 'echo nop > "+tp+"current_tracer'")
-		os.system(sysvals.adb+" shell 'echo 10000 > "+tp+"buffer_size_kb'")
-		# turn trace events on
-		events = iter(sysvals.traceevents)
-		for e in events:
-			os.system(sysvals.adb+" shell 'echo 1 > "+\
-				sysvals.epath+e+"/enable'")
-		# clear the trace buffer
-		os.system(sysvals.adb+" shell 'echo \"\" > "+tp+"trace'")
-
-# Function: verifyFtrace
-# Description:
-#	 Check that ftrace is working on the system
-# Output:
-#	 True or False
-def verifyFtrace():
-	global sysvals
-	# files needed for any trace data
-	files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock',
-			 'trace_marker', 'trace_options', 'tracing_on']
-	# files needed for callgraph trace data
-	tp = sysvals.tpath
-	if(sysvals.usecallgraph):
-		files += [
-			'available_filter_functions',
-			'set_ftrace_filter',
-			'set_graph_function'
-		]
-	for f in files:
-		if(sysvals.android):
-			out = os.popen(sysvals.adb+' shell ls '+tp+f).read().strip()
-			if(out != tp+f):
-				return False
-		else:
-			if(os.path.exists(tp+f) == False):
-				return False
-	return True
-
 # Function: parseStamp
 # Description:
 #	 Pull in the stamp comment line from the data file(s),
 #	 create the stamp, and add it to the global sysvals object
 # Arguments:
 #	 m: the valid re.match output for the stamp line
-def parseStamp(m, data):
+def parseStamp(line, data):
 	global sysvals
+
+	m = re.match(sysvals.stampfmt, line)
 	data.stamp = {'time': '', 'host': '', 'mode': ''}
 	dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
 		int(m.group('d')), int(m.group('H')), int(m.group('M')),
@@ -999,6 +1786,7 @@ def parseStamp(m, data):
 	data.stamp['host'] = m.group('host')
 	data.stamp['mode'] = m.group('mode')
 	data.stamp['kernel'] = m.group('kernel')
+	sysvals.hostname = data.stamp['host']
 	sysvals.suspendmode = data.stamp['mode']
 	if not sysvals.stamp:
 		sysvals.stamp = data.stamp
@@ -1031,14 +1819,35 @@ def diffStamp(stamp1, stamp2):
 def doesTraceLogHaveTraceEvents():
 	global sysvals
 
+	# check for kprobes
+	sysvals.usekprobes = False
+	out = os.system('grep -q "_cal: (" '+sysvals.ftracefile)
+	if(out == 0):
+		sysvals.usekprobes = True
+	# check for callgraph data on trace event blocks
+	out = os.system('grep -q "_cpu_down()" '+sysvals.ftracefile)
+	if(out == 0):
+		sysvals.usekprobes = True
+	out = os.popen('head -1 '+sysvals.ftracefile).read().replace('\n', '')
+	m = re.match(sysvals.stampfmt, out)
+	if m and m.group('mode') == 'command':
+		sysvals.usetraceeventsonly = True
+		sysvals.usetraceevents = True
+		return
+	# figure out what level of trace events are supported
 	sysvals.usetraceeventsonly = True
 	sysvals.usetraceevents = False
 	for e in sysvals.traceevents:
-		out = os.popen('cat '+sysvals.ftracefile+' | grep "'+e+': "').read()
-		if(not out):
+		out = os.system('grep -q "'+e+': " '+sysvals.ftracefile)
+		if(out != 0):
 			sysvals.usetraceeventsonly = False
-		if(e == 'suspend_resume' and out):
+		if(e == 'suspend_resume' and out == 0):
 			sysvals.usetraceevents = True
+	# determine is this log is properly formatted
+	for e in ['SUSPEND START', 'RESUME COMPLETE']:
+		out = os.system('grep -q "'+e+'" '+sysvals.ftracefile)
+		if(out != 0):
+			sysvals.usetracemarkers = False
 
 # Function: appendIncompleteTraceLog
 # Description:
@@ -1055,44 +1864,42 @@ def appendIncompleteTraceLog(testruns):
 
 	# create TestRun vessels for ftrace parsing
 	testcnt = len(testruns)
-	testidx = -1
+	testidx = 0
 	testrun = []
 	for data in testruns:
 		testrun.append(TestRun(data))
 
 	# extract the callgraph and traceevent data
 	vprint('Analyzing the ftrace data...')
+	tp = TestProps()
 	tf = open(sysvals.ftracefile, 'r')
+	data = 0
 	for line in tf:
 		# remove any latent carriage returns
 		line = line.replace('\r\n', '')
-		# grab the time stamp first (signifies the start of the test run)
+		# grab the time stamp
 		m = re.match(sysvals.stampfmt, line)
 		if(m):
-			testidx += 1
-			parseStamp(m, testrun[testidx].data)
-			continue
-		# pull out any firmware data
-		if(re.match(sysvals.firmwarefmt, line)):
-			continue
-		# if we havent found a test time stamp yet keep spinning til we do
-		if(testidx < 0):
+			tp.stamp = line
 			continue
 		# determine the trace data type (required for further parsing)
 		m = re.match(sysvals.tracertypefmt, line)
 		if(m):
-			tracer = m.group('t')
-			testrun[testidx].setTracerType(tracer)
+			tp.setTracerType(m.group('t'))
+			continue
+		# device properties line
+		if(re.match(sysvals.devpropfmt, line)):
+			devProps(line)
 			continue
-		# parse only valid lines, if this isnt one move on
-		m = re.match(testrun[testidx].ftrace_line_fmt, line)
+		# parse only valid lines, if this is not one move on
+		m = re.match(tp.ftrace_line_fmt, line)
 		if(not m):
 			continue
 		# gather the basic message data from the line
 		m_time = m.group('time')
 		m_pid = m.group('pid')
 		m_msg = m.group('msg')
-		if(testrun[testidx].cgformat):
+		if(tp.cgformat):
 			m_param3 = m.group('dur')
 		else:
 			m_param3 = 'traceevent'
@@ -1104,119 +1911,114 @@ def appendIncompleteTraceLog(testruns):
 		# the line should be a call, return, or event
 		if(not t.fcall and not t.freturn and not t.fevent):
 			continue
-		# only parse the ftrace data during suspend/resume
-		data = testrun[testidx].data
-		if(not testrun[testidx].inthepipe):
-			# look for the suspend start marker
-			if(t.fevent):
-				if(t.name == 'SUSPEND START'):
-					testrun[testidx].inthepipe = True
-					data.setStart(t.time)
+		# look for the suspend start marker
+		if(t.startMarker()):
+			data = testrun[testidx].data
+			parseStamp(tp.stamp, data)
+			data.setStart(t.time)
+			continue
+		if(not data):
+			continue
+		# find the end of resume
+		if(t.endMarker()):
+			data.setEnd(t.time)
+			testidx += 1
+			if(testidx >= testcnt):
+				break
+			continue
+		# trace event processing
+		if(t.fevent):
+			# general trace events have two types, begin and end
+			if(re.match('(?P<name>.*) begin$', t.name)):
+				isbegin = True
+			elif(re.match('(?P<name>.*) end$', t.name)):
+				isbegin = False
+			else:
 				continue
-		else:
-			# trace event processing
-			if(t.fevent):
-				if(t.name == 'RESUME COMPLETE'):
-					testrun[testidx].inthepipe = False
-					data.setEnd(t.time)
-					if(testidx == testcnt - 1):
-						break
-					continue
-				# general trace events have two types, begin and end
-				if(re.match('(?P<name>.*) begin$', t.name)):
-					isbegin = True
-				elif(re.match('(?P<name>.*) end$', t.name)):
-					isbegin = False
+			m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
+			if(m):
+				val = m.group('val')
+				if val == '0':
+					name = m.group('name')
 				else:
-					continue
-				m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
-				if(m):
-					val = m.group('val')
-					if val == '0':
-						name = m.group('name')
-					else:
-						name = m.group('name')+'['+val+']'
+					name = m.group('name')+'['+val+']'
+			else:
+				m = re.match('(?P<name>.*) .*', t.name)
+				name = m.group('name')
+			# special processing for trace events
+			if re.match('dpm_prepare\[.*', name):
+				continue
+			elif re.match('machine_suspend.*', name):
+				continue
+			elif re.match('suspend_enter\[.*', name):
+				if(not isbegin):
+					data.dmesg['suspend_prepare']['end'] = t.time
+				continue
+			elif re.match('dpm_suspend\[.*', name):
+				if(not isbegin):
+					data.dmesg['suspend']['end'] = t.time
+				continue
+			elif re.match('dpm_suspend_late\[.*', name):
+				if(isbegin):
+					data.dmesg['suspend_late']['start'] = t.time
 				else:
-					m = re.match('(?P<name>.*) .*', t.name)
-					name = m.group('name')
-				# special processing for trace events
-				if re.match('dpm_prepare\[.*', name):
-					continue
-				elif re.match('machine_suspend.*', name):
-					continue
-				elif re.match('suspend_enter\[.*', name):
-					if(not isbegin):
-						data.dmesg['suspend_prepare']['end'] = t.time
-					continue
-				elif re.match('dpm_suspend\[.*', name):
-					if(not isbegin):
-						data.dmesg['suspend']['end'] = t.time
-					continue
-				elif re.match('dpm_suspend_late\[.*', name):
-					if(isbegin):
-						data.dmesg['suspend_late']['start'] = t.time
-					else:
-						data.dmesg['suspend_late']['end'] = t.time
-					continue
-				elif re.match('dpm_suspend_noirq\[.*', name):
-					if(isbegin):
-						data.dmesg['suspend_noirq']['start'] = t.time
-					else:
-						data.dmesg['suspend_noirq']['end'] = t.time
-					continue
-				elif re.match('dpm_resume_noirq\[.*', name):
-					if(isbegin):
-						data.dmesg['resume_machine']['end'] = t.time
-						data.dmesg['resume_noirq']['start'] = t.time
-					else:
-						data.dmesg['resume_noirq']['end'] = t.time
-					continue
-				elif re.match('dpm_resume_early\[.*', name):
-					if(isbegin):
-						data.dmesg['resume_early']['start'] = t.time
-					else:
-						data.dmesg['resume_early']['end'] = t.time
-					continue
-				elif re.match('dpm_resume\[.*', name):
-					if(isbegin):
-						data.dmesg['resume']['start'] = t.time
-					else:
-						data.dmesg['resume']['end'] = t.time
-					continue
-				elif re.match('dpm_complete\[.*', name):
-					if(isbegin):
-						data.dmesg['resume_complete']['start'] = t.time
-					else:
-						data.dmesg['resume_complete']['end'] = t.time
-					continue
-				# is this trace event outside of the devices calls
-				if(data.isTraceEventOutsideDeviceCalls(pid, t.time)):
-					# global events (outside device calls) are simply graphed
-					if(isbegin):
-						# store each trace event in ttemp
-						if(name not in testrun[testidx].ttemp):
-							testrun[testidx].ttemp[name] = []
-						testrun[testidx].ttemp[name].append(\
-							{'begin': t.time, 'end': t.time})
-					else:
-						# finish off matching trace event in ttemp
-						if(name in testrun[testidx].ttemp):
-							testrun[testidx].ttemp[name][-1]['end'] = t.time
+					data.dmesg['suspend_late']['end'] = t.time
+				continue
+			elif re.match('dpm_suspend_noirq\[.*', name):
+				if(isbegin):
+					data.dmesg['suspend_noirq']['start'] = t.time
 				else:
-					if(isbegin):
-						data.addIntraDevTraceEvent('', name, pid, t.time)
-					else:
-						data.capIntraDevTraceEvent('', name, pid, t.time)
-			# call/return processing
-			elif sysvals.usecallgraph:
-				# create a callgraph object for the data
-				if(pid not in testrun[testidx].ftemp):
-					testrun[testidx].ftemp[pid] = []
-					testrun[testidx].ftemp[pid].append(FTraceCallGraph())
-				# when the call is finished, see which device matches it
-				cg = testrun[testidx].ftemp[pid][-1]
-				if(cg.addLine(t, m)):
-					testrun[testidx].ftemp[pid].append(FTraceCallGraph())
+					data.dmesg['suspend_noirq']['end'] = t.time
+				continue
+			elif re.match('dpm_resume_noirq\[.*', name):
+				if(isbegin):
+					data.dmesg['resume_machine']['end'] = t.time
+					data.dmesg['resume_noirq']['start'] = t.time
+				else:
+					data.dmesg['resume_noirq']['end'] = t.time
+				continue
+			elif re.match('dpm_resume_early\[.*', name):
+				if(isbegin):
+					data.dmesg['resume_early']['start'] = t.time
+				else:
+					data.dmesg['resume_early']['end'] = t.time
+				continue
+			elif re.match('dpm_resume\[.*', name):
+				if(isbegin):
+					data.dmesg['resume']['start'] = t.time
+				else:
+					data.dmesg['resume']['end'] = t.time
+				continue
+			elif re.match('dpm_complete\[.*', name):
+				if(isbegin):
+					data.dmesg['resume_complete']['start'] = t.time
+				else:
+					data.dmesg['resume_complete']['end'] = t.time
+				continue
+			# skip trace events inside devices calls
+			if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
+				continue
+			# global events (outside device calls) are simply graphed
+			if(isbegin):
+				# store each trace event in ttemp
+				if(name not in testrun[testidx].ttemp):
+					testrun[testidx].ttemp[name] = []
+				testrun[testidx].ttemp[name].append(\
+					{'begin': t.time, 'end': t.time})
+			else:
+				# finish off matching trace event in ttemp
+				if(name in testrun[testidx].ttemp):
+					testrun[testidx].ttemp[name][-1]['end'] = t.time
+		# call/return processing
+		elif sysvals.usecallgraph:
+			# create a callgraph object for the data
+			if(pid not in testrun[testidx].ftemp):
+				testrun[testidx].ftemp[pid] = []
+				testrun[testidx].ftemp[pid].append(FTraceCallGraph(pid))
+			# when the call is finished, see which device matches it
+			cg = testrun[testidx].ftemp[pid][-1]
+			if(cg.addLine(t)):
+				testrun[testidx].ftemp[pid].append(FTraceCallGraph(pid))
 	tf.close()
 
 	for test in testrun:
@@ -1224,20 +2026,14 @@ def appendIncompleteTraceLog(testruns):
 		if(sysvals.usetraceevents):
 			for name in test.ttemp:
 				for event in test.ttemp[name]:
-					begin = event['begin']
-					end = event['end']
-					# if event starts before timeline start, expand timeline
-					if(begin < test.data.start):
-						test.data.setStart(begin)
-					# if event ends after timeline end, expand the timeline
-					if(end > test.data.end):
-						test.data.setEnd(end)
-					test.data.newActionGlobal(name, begin, end)
+					test.data.newActionGlobal(name, event['begin'], event['end'])
 
 		# add the callgraph data to the device hierarchy
 		for pid in test.ftemp:
 			for cg in test.ftemp[pid]:
-				if(not cg.sanityCheck()):
+				if len(cg.list) < 1 or cg.invalid:
+					continue
+				if(not cg.postProcess()):
 					id = 'task %s cpu %s' % (pid, m.group('cpu'))
 					vprint('Sanity check failed for '+\
 						id+', ignoring this callback')
@@ -1259,14 +2055,6 @@ def appendIncompleteTraceLog(testruns):
 		if(sysvals.verbose):
 			test.data.printDetails()
 
-
-	# add the time in between the tests as a new phase so we can see it
-	if(len(testruns) > 1):
-		t1e = testruns[0].getEnd()
-		t2s = testruns[-1].getStart()
-		testruns[-1].newPhaseWithSingleAction('user mode', \
-			'user mode', t1e, t2s, '#FF9966')
-
 # Function: parseTraceLog
 # Description:
 #	 Analyze an ftrace log output file generated from this app during
@@ -1280,9 +2068,16 @@ def parseTraceLog():
 
 	vprint('Analyzing the ftrace data...')
 	if(os.path.exists(sysvals.ftracefile) == False):
-		doError('%s doesnt exist' % sysvals.ftracefile, False)
+		doError('%s does not exist' % sysvals.ftracefile, False)
+
+	sysvals.setupAllKprobes()
+	tracewatch = ['suspend_enter']
+	if sysvals.usekprobes:
+		tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
+			'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', 'CPU_OFF']
 
 	# extract the callgraph and traceevent data
+	tp = TestProps()
 	testruns = []
 	testdata = []
 	testrun = 0
@@ -1295,27 +2090,17 @@ def parseTraceLog():
 		# stamp line: each stamp means a new test run
 		m = re.match(sysvals.stampfmt, line)
 		if(m):
-			data = Data(len(testdata))
-			testdata.append(data)
-			testrun = TestRun(data)
-			testruns.append(testrun)
-			parseStamp(m, data)
-			continue
-		if(not data):
+			tp.stamp = line
 			continue
 		# firmware line: pull out any firmware data
 		m = re.match(sysvals.firmwarefmt, line)
 		if(m):
-			data.fwSuspend = int(m.group('s'))
-			data.fwResume = int(m.group('r'))
-			if(data.fwSuspend > 0 or data.fwResume > 0):
-				data.fwValid = True
+			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
 		# tracer type line: determine the trace data type
 		m = re.match(sysvals.tracertypefmt, line)
 		if(m):
-			tracer = m.group('t')
-			testrun.setTracerType(tracer)
+			tp.setTracerType(m.group('t'))
 			continue
 		# post resume time line: did this test run include post-resume data
 		m = re.match(sysvals.postresumefmt, line)
@@ -1324,15 +2109,20 @@ def parseTraceLog():
 			if(t > 0):
 				sysvals.postresumetime = t
 			continue
+		# device properties line
+		if(re.match(sysvals.devpropfmt, line)):
+			devProps(line)
+			continue
 		# ftrace line: parse only valid lines
-		m = re.match(testrun.ftrace_line_fmt, line)
+		m = re.match(tp.ftrace_line_fmt, line)
 		if(not m):
 			continue
 		# gather the basic message data from the line
 		m_time = m.group('time')
+		m_proc = m.group('proc')
 		m_pid = m.group('pid')
 		m_msg = m.group('msg')
-		if(testrun.cgformat):
+		if(tp.cgformat):
 			m_param3 = m.group('dur')
 		else:
 			m_param3 = 'traceevent'
@@ -1344,24 +2134,38 @@ def parseTraceLog():
 		# the line should be a call, return, or event
 		if(not t.fcall and not t.freturn and not t.fevent):
 			continue
-		# only parse the ftrace data during suspend/resume
-		if(not testrun.inthepipe):
-			# look for the suspend start marker
-			if(t.fevent):
-				if(t.name == 'SUSPEND START'):
-					testrun.inthepipe = True
-					data.setStart(t.time)
+		# find the start of suspend
+		if(t.startMarker()):
+			phase = 'suspend_prepare'
+			data = Data(len(testdata))
+			testdata.append(data)
+			testrun = TestRun(data)
+			testruns.append(testrun)
+			parseStamp(tp.stamp, data)
+			if len(tp.fwdata) > data.testnumber:
+				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
+				if(data.fwSuspend > 0 or data.fwResume > 0):
+					data.fwValid = True
+			data.setStart(t.time)
+			continue
+		if(not data):
+			continue
+		# find the end of resume
+		if(t.endMarker()):
+			if(sysvals.usetracemarkers and sysvals.postresumetime > 0):
+				phase = 'post_resume'
+				data.newPhase(phase, t.time, t.time, '#F0F0F0', -1)
+			data.setEnd(t.time)
+			if(not sysvals.usetracemarkers):
+				# no trace markers? then quit and be sure to finish recording
+				# the event we used to trigger resume end
+				if(len(testrun.ttemp['thaw_processes']) > 0):
+					# if an entry exists, assume this is its end
+					testrun.ttemp['thaw_processes'][-1]['end'] = t.time
+				break
 			continue
 		# trace event processing
 		if(t.fevent):
-			if(t.name == 'RESUME COMPLETE'):
-				if(sysvals.postresumetime > 0):
-					phase = 'post_resume'
-					data.newPhase(phase, t.time, t.time, '#FF9966', -1)
-				else:
-					testrun.inthepipe = False
-				data.setEnd(t.time)
-				continue
 			if(phase == 'post_resume'):
 				data.setEnd(t.time)
 			if(t.type == 'suspend_resume'):
@@ -1383,8 +2187,7 @@ def parseTraceLog():
 					m = re.match('(?P<name>.*) .*', t.name)
 					name = m.group('name')
 				# ignore these events
-				if(re.match('acpi_suspend\[.*', t.name) or
-					re.match('suspend_enter\[.*', name)):
+				if(name.split('[')[0] in tracewatch):
 					continue
 				# -- phase changes --
 				# suspend_prepare start
@@ -1418,7 +2221,7 @@ def parseTraceLog():
 						data.dmesg[phase]['end'] = t.time
 						data.tSuspended = t.time
 					else:
-						if(sysvals.suspendmode in ['mem', 'disk']):
+						if(sysvals.suspendmode in ['mem', 'disk'] and not tp.S0i3):
 							data.dmesg['suspend_machine']['end'] = t.time
 							data.tSuspended = t.time
 						phase = 'resume_machine'
@@ -1426,6 +2229,15 @@ def parseTraceLog():
 						data.tResumed = t.time
 						data.tLow = data.tResumed - data.tSuspended
 					continue
+				# acpi_suspend
+				elif(re.match('acpi_suspend\[.*', t.name)):
+					# acpi_suspend[0] S0i3
+					if(re.match('acpi_suspend\[0\] begin', t.name)):
+						if(sysvals.suspendmode == 'mem'):
+							tp.S0i3 = True
+							data.dmesg['suspend_machine']['end'] = t.time
+							data.tSuspended = t.time
+					continue
 				# resume_noirq start
 				elif(re.match('dpm_resume_noirq\[.*', t.name)):
 					phase = 'resume_noirq'
@@ -1449,30 +2261,25 @@ def parseTraceLog():
 					if(isbegin):
 						data.dmesg[phase]['start'] = t.time
 					continue
-
-				# is this trace event outside of the devices calls
-				if(data.isTraceEventOutsideDeviceCalls(pid, t.time)):
-					# global events (outside device calls) are simply graphed
-					if(name not in testrun.ttemp):
-						testrun.ttemp[name] = []
-					if(isbegin):
-						# create a new list entry
-						testrun.ttemp[name].append(\
-							{'begin': t.time, 'end': t.time})
-					else:
-						if(len(testrun.ttemp[name]) > 0):
-							# if an antry exists, assume this is its end
-							testrun.ttemp[name][-1]['end'] = t.time
-						elif(phase == 'post_resume'):
-							# post resume events can just have ends
-							testrun.ttemp[name].append({
-								'begin': data.dmesg[phase]['start'],
-								'end': t.time})
+				# skip trace events inside devices calls
+				if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
+					continue
+				# global events (outside device calls) are graphed
+				if(name not in testrun.ttemp):
+					testrun.ttemp[name] = []
+				if(isbegin):
+					# create a new list entry
+					testrun.ttemp[name].append(\
+						{'begin': t.time, 'end': t.time, 'pid': pid})
 				else:
-					if(isbegin):
-						data.addIntraDevTraceEvent('', name, pid, t.time)
-					else:
-						data.capIntraDevTraceEvent('', name, pid, t.time)
+					if(len(testrun.ttemp[name]) > 0):
+						# if an entry exists, assume this is its end
+						testrun.ttemp[name][-1]['end'] = t.time
+					elif(phase == 'post_resume'):
+						# post resume events can just have ends
+						testrun.ttemp[name].append({
+							'begin': data.dmesg[phase]['start'],
+							'end': t.time})
 			# device callback start
 			elif(t.type == 'device_pm_callback_start'):
 				m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\
@@ -1495,75 +2302,127 @@ def parseTraceLog():
 					dev = list[n]
 					dev['length'] = t.time - dev['start']
 					dev['end'] = t.time
+		# kprobe event processing
+		elif(t.fkprobe):
+			kprobename = t.type
+			kprobedata = t.name
+			key = (kprobename, pid)
+			# displayname is generated from kprobe data
+			displayname = ''
+			if(t.fcall):
+				displayname = sysvals.kprobeDisplayName(kprobename, kprobedata)
+				if not displayname:
+					continue
+				if(key not in tp.ktemp):
+					tp.ktemp[key] = []
+				tp.ktemp[key].append({
+					'pid': pid,
+					'begin': t.time,
+					'end': t.time,
+					'name': displayname,
+					'cdata': kprobedata,
+					'proc': m_proc,
+				})
+			elif(t.freturn):
+				if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1:
+					continue
+				e = tp.ktemp[key][-1]
+				if e['begin'] < 0.0 or t.time - e['begin'] < 0.000001:
+					tp.ktemp[key].pop()
+				else:
+					e['end'] = t.time
+					e['rdata'] = kprobedata
 		# callgraph processing
 		elif sysvals.usecallgraph:
-			# this shouldn't happen, but JIC, ignore callgraph data post-res
-			if(phase == 'post_resume'):
-				continue
 			# create a callgraph object for the data
-			if(pid not in testrun.ftemp):
-				testrun.ftemp[pid] = []
-				testrun.ftemp[pid].append(FTraceCallGraph())
+			key = (m_proc, pid)
+			if(key not in testrun.ftemp):
+				testrun.ftemp[key] = []
+				testrun.ftemp[key].append(FTraceCallGraph(pid))
 			# when the call is finished, see which device matches it
-			cg = testrun.ftemp[pid][-1]
-			if(cg.addLine(t, m)):
-				testrun.ftemp[pid].append(FTraceCallGraph())
+			cg = testrun.ftemp[key][-1]
+			if(cg.addLine(t)):
+				testrun.ftemp[key].append(FTraceCallGraph(pid))
 	tf.close()
 
+	if sysvals.suspendmode == 'command':
+		for test in testruns:
+			for p in test.data.phases:
+				if p == 'resume_complete':
+					test.data.dmesg[p]['start'] = test.data.start
+					test.data.dmesg[p]['end'] = test.data.end
+				else:
+					test.data.dmesg[p]['start'] = test.data.start
+					test.data.dmesg[p]['end'] = test.data.start
+			test.data.tSuspended = test.data.start
+			test.data.tResumed = test.data.start
+			test.data.tLow = 0
+			test.data.fwValid = False
+
 	for test in testruns:
 		# add the traceevent data to the device hierarchy
 		if(sysvals.usetraceevents):
+			# add actual trace funcs
 			for name in test.ttemp:
 				for event in test.ttemp[name]:
-					begin = event['begin']
-					end = event['end']
-					# if event starts before timeline start, expand timeline
-					if(begin < test.data.start):
-						test.data.setStart(begin)
-					# if event ends after timeline end, expand the timeline
-					if(end > test.data.end):
-						test.data.setEnd(end)
-					test.data.newActionGlobal(name, begin, end)
-
-		# add the callgraph data to the device hierarchy
-		borderphase = {
-			'dpm_prepare': 'suspend_prepare',
-			'dpm_complete': 'resume_complete'
-		}
-		for pid in test.ftemp:
-			for cg in test.ftemp[pid]:
-				if len(cg.list) < 2:
+					test.data.newActionGlobal(name, event['begin'], event['end'], event['pid'])
+			# add the kprobe based virtual tracefuncs as actual devices
+			for key in tp.ktemp:
+				name, pid = key
+				if name not in sysvals.tracefuncs:
 					continue
-				if(not cg.sanityCheck()):
-					id = 'task %s cpu %s' % (pid, m.group('cpu'))
-					vprint('Sanity check failed for '+\
-						id+', ignoring this callback')
-					continue
-				callstart = cg.start
-				callend = cg.end
-				if(cg.list[0].name in borderphase):
-					p = borderphase[cg.list[0].name]
-					list = test.data.dmesg[p]['list']
-					for devname in list:
-						dev = list[devname]
-						if(pid == dev['pid'] and
-							callstart <= dev['start'] and
-							callend >= dev['end']):
-							dev['ftrace'] = cg.slice(dev['start'], dev['end'])
-					continue
-				if(cg.list[0].name != 'dpm_run_callback'):
+				for e in tp.ktemp[key]:
+					kb, ke = e['begin'], e['end']
+					if kb == ke or not test.data.isInsideTimeline(kb, ke):
+						continue
+					test.data.newActionGlobal(e['name'], kb, ke, pid)
+			# add config base kprobes and dev kprobes
+			for key in tp.ktemp:
+				name, pid = key
+				if name in sysvals.tracefuncs:
 					continue
-				for p in test.data.phases:
-					if(test.data.dmesg[p]['start'] <= callstart and
-						callstart <= test.data.dmesg[p]['end']):
-						list = test.data.dmesg[p]['list']
-						for devname in list:
-							dev = list[devname]
-							if(pid == dev['pid'] and
-								callstart <= dev['start'] and
-								callend >= dev['end']):
-								dev['ftrace'] = cg
-						break
+				for e in tp.ktemp[key]:
+					kb, ke = e['begin'], e['end']
+					if kb == ke or not test.data.isInsideTimeline(kb, ke):
+						continue
+					color = sysvals.kprobeColor(e['name'])
+					if name not in sysvals.dev_tracefuncs:
+						# config base kprobe
+						test.data.newActionGlobal(e['name'], kb, ke, -2, color)
+					elif sysvals.usedevsrc:
+						# dev kprobe
+						data.addDeviceFunctionCall(e['name'], name, e['proc'], pid, kb,
+							ke, e['cdata'], e['rdata'])
+		if sysvals.usecallgraph:
+			# add the callgraph data to the device hierarchy
+			sortlist = dict()
+			for key in test.ftemp:
+				proc, pid = key
+				for cg in test.ftemp[key]:
+					if len(cg.list) < 1 or cg.invalid:
+						continue
+					if(not cg.postProcess()):
+						id = 'task %s' % (pid)
+						vprint('Sanity check failed for '+\
+							id+', ignoring this callback')
+						continue
+					# match cg data to devices
+					if sysvals.suspendmode == 'command' or not cg.deviceMatch(pid, test.data):
+						sortkey = '%f%f%d' % (cg.start, cg.end, pid)
+						sortlist[sortkey] = cg
+			# create blocks for orphan cg data
+			for sortkey in sorted(sortlist):
+				cg = sortlist[sortkey]
+				name = cg.list[0].name
+				if sysvals.isCallgraphFunc(name):
+					vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name))
+					cg.newActionFromFunction(test.data)
+
+	if sysvals.suspendmode == 'command':
+		if(sysvals.verbose):
+			for data in testdata:
+				data.printDetails()
+		return testdata
 
 	# fill in any missing phases
 	for data in testdata:
@@ -1587,14 +2446,52 @@ def parseTraceLog():
 		if(sysvals.verbose):
 			data.printDetails()
 
-	# add the time in between the tests as a new phase so we can see it
-	if(len(testdata) > 1):
-		t1e = testdata[0].getEnd()
-		t2s = testdata[-1].getStart()
-		testdata[-1].newPhaseWithSingleAction('user mode', \
-			'user mode', t1e, t2s, '#FF9966')
 	return testdata
 
+# Function: loadRawKernelLog
+# Description:
+#	 Load a raw kernel log that wasn't created by this tool, it might be
+#	 possible to extract a valid suspend/resume log
+def loadRawKernelLog(dmesgfile):
+	global sysvals
+
+	stamp = {'time': '', 'host': '', 'mode': 'mem', 'kernel': ''}
+	stamp['time'] = datetime.now().strftime('%B %d %Y, %I:%M:%S %p')
+	stamp['host'] = sysvals.hostname
+
+	testruns = []
+	data = 0
+	lf = open(dmesgfile, 'r')
+	for line in lf:
+		line = line.replace('\r\n', '')
+		idx = line.find('[')
+		if idx > 1:
+			line = line[idx:]
+		m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+		if(not m):
+			continue
+		msg = m.group("msg")
+		m = re.match('PM: Syncing filesystems.*', msg)
+		if(m):
+			if(data):
+				testruns.append(data)
+			data = Data(len(testruns))
+			data.stamp = stamp
+		if(data):
+			m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg)
+			if(m):
+				stamp['kernel'] = m.group('k')
+			m = re.match('PM: Preparing system for (?P<m>.*) sleep', msg)
+			if(m):
+				stamp['mode'] = m.group('m')
+			data.dmesgtext.append(line)
+	if(data):
+		testruns.append(data)
+		sysvals.stamp = stamp
+		sysvals.suspendmode = stamp['mode']
+	lf.close()
+	return testruns
+
 # Function: loadKernelLog
 # Description:
 #	 [deprecated for kernel 3.15.0 or newer]
@@ -1607,9 +2504,10 @@ def loadKernelLog():
 
 	vprint('Analyzing the dmesg data...')
 	if(os.path.exists(sysvals.dmesgfile) == False):
-		doError('%s doesnt exist' % sysvals.dmesgfile, False)
+		doError('%s does not exist' % sysvals.dmesgfile, False)
 
-	# there can be multiple test runs in a single file delineated by stamps
+	# there can be multiple test runs in a single file
+	tp = TestProps()
 	testruns = []
 	data = 0
 	lf = open(sysvals.dmesgfile, 'r')
@@ -1620,35 +2518,43 @@ def loadKernelLog():
 			line = line[idx:]
 		m = re.match(sysvals.stampfmt, line)
 		if(m):
-			if(data):
-				testruns.append(data)
-			data = Data(len(testruns))
-			parseStamp(m, data)
-			continue
-		if(not data):
+			tp.stamp = line
 			continue
 		m = re.match(sysvals.firmwarefmt, line)
 		if(m):
-			data.fwSuspend = int(m.group('s'))
-			data.fwResume = int(m.group('r'))
-			if(data.fwSuspend > 0 or data.fwResume > 0):
-				data.fwValid = True
+			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
 		m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
-		if(m):
-			data.dmesgtext.append(line)
-			if(re.match('ACPI: resume from mwait', m.group('msg'))):
-				print('NOTE: This suspend appears to be freeze rather than'+\
-					' %s, it will be treated as such' % sysvals.suspendmode)
-				sysvals.suspendmode = 'freeze'
-		else:
-			vprint('ignoring dmesg line: %s' % line.replace('\n', ''))
-	testruns.append(data)
+		if(not m):
+			continue
+		msg = m.group("msg")
+		if(re.match('PM: Syncing filesystems.*', msg)):
+			if(data):
+				testruns.append(data)
+			data = Data(len(testruns))
+			parseStamp(tp.stamp, data)
+			if len(tp.fwdata) > data.testnumber:
+				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
+				if(data.fwSuspend > 0 or data.fwResume > 0):
+					data.fwValid = True
+		if(re.match('ACPI: resume from mwait', msg)):
+			print('NOTE: This suspend appears to be freeze rather than'+\
+				' %s, it will be treated as such' % sysvals.suspendmode)
+			sysvals.suspendmode = 'freeze'
+		if(not data):
+			continue
+		data.dmesgtext.append(line)
+	if(data):
+		testruns.append(data)
 	lf.close()
 
-	if(not data):
-		print('ERROR: analyze_suspend header missing from dmesg log')
-		sys.exit()
+	if(len(testruns) < 1):
+		# bad log, but see if you can extract something meaningful anyway
+		testruns = loadRawKernelLog(sysvals.dmesgfile)
+
+	if(len(testruns) < 1):
+		doError(' dmesg log is completely unreadable: %s' \
+			% sysvals.dmesgfile, False)
 
 	# fix lines with same timestamp/function with the call and return swapped
 	for data in testruns:
@@ -1865,7 +2771,8 @@ def parseKernelLog(data):
 						actions[a] = []
 					actions[a].append({'begin': ktime, 'end': ktime})
 				if(re.match(at[a]['emsg'], msg)):
-					actions[a][-1]['end'] = ktime
+					if(a in actions):
+						actions[a][-1]['end'] = ktime
 			# now look for CPU on/off events
 			if(re.match('Disabling non-boot CPUs .*', msg)):
 				# start of first cpu suspend
@@ -1912,15 +2819,7 @@ def parseKernelLog(data):
 	# fill in any actions we've found
 	for name in actions:
 		for event in actions[name]:
-			begin = event['begin']
-			end = event['end']
-			# if event starts before timeline start, expand timeline
-			if(begin < data.start):
-				data.setStart(begin)
-			# if event ends after timeline end, expand the timeline
-			if(end > data.end):
-				data.setEnd(end)
-			data.newActionGlobal(name, begin, end)
+			data.newActionGlobal(name, event['begin'], event['end'])
 
 	if(sysvals.verbose):
 		data.printDetails()
@@ -1929,92 +2828,6 @@ def parseKernelLog(data):
 	data.fixupInitcallsThatDidntReturn()
 	return True
 
-# Function: setTimelineRows
-# Description:
-#	 Organize the timeline entries into the smallest
-#	 number of rows possible, with no entry overlapping
-# Arguments:
-#	 list: the list of devices/actions for a single phase
-#	 sortedkeys: cronologically sorted key list to use
-# Output:
-#	 The total number of rows needed to display this phase of the timeline
-def setTimelineRows(list, sortedkeys):
-
-	# clear all rows and set them to undefined
-	remaining = len(list)
-	rowdata = dict()
-	row = 0
-	for item in list:
-		list[item]['row'] = -1
-
-	# try to pack each row with as many ranges as possible
-	while(remaining > 0):
-		if(row not in rowdata):
-			rowdata[row] = []
-		for item in sortedkeys:
-			if(list[item]['row'] < 0):
-				s = list[item]['start']
-				e = list[item]['end']
-				valid = True
-				for ritem in rowdata[row]:
-					rs = ritem['start']
-					re = ritem['end']
-					if(not (((s <= rs) and (e <= rs)) or
-						((s >= re) and (e >= re)))):
-						valid = False
-						break
-				if(valid):
-					rowdata[row].append(list[item])
-					list[item]['row'] = row
-					remaining -= 1
-		row += 1
-	return row
-
-# Function: createTimeScale
-# Description:
-#	 Create the timescale header for the html timeline
-# Arguments:
-#	 t0: start time (suspend begin)
-#	 tMax: end time (resume end)
-#	 tSuspend: time when suspend occurs, i.e. the zero time
-# Output:
-#	 The html code needed to display the time scale
-def createTimeScale(t0, tMax, tSuspended):
-	timescale = '<div class="t" style="right:{0}%">{1}</div>\n'
-	output = '<div id="timescale">\n'
-
-	# set scale for timeline
-	tTotal = tMax - t0
-	tS = 0.1
-	if(tTotal <= 0):
-		return output
-	if(tTotal > 4):
-		tS = 1
-	if(tSuspended < 0):
-		for i in range(int(tTotal/tS)+1):
-			pos = '%0.3f' % (100 - ((float(i)*tS*100)/tTotal))
-			if(i > 0):
-				val = '%0.fms' % (float(i)*tS*1000)
-			else:
-				val = ''
-			output += timescale.format(pos, val)
-	else:
-		tSuspend = tSuspended - t0
-		divTotal = int(tTotal/tS) + 1
-		divSuspend = int(tSuspend/tS)
-		s0 = (tSuspend - tS*divSuspend)*100/tTotal
-		for i in range(divTotal):
-			pos = '%0.3f' % (100 - ((float(i)*tS*100)/tTotal) - s0)
-			if((i == 0) and (s0 < 3)):
-				val = ''
-			elif(i == divSuspend):
-				val = 'S/R'
-			else:
-				val = '%0.fms' % (float(i-divSuspend)*tS*1000)
-			output += timescale.format(pos, val)
-	output += '</div>\n'
-	return output
-
 # Function: createHTMLSummarySimple
 # Description:
 #	 Create summary html file for a series of tests
@@ -2146,6 +2959,32 @@ def createHTMLSummarySimple(testruns, htmlfile):
 	hf.write('</body>\n</html>\n')
 	hf.close()
 
+def htmlTitle():
+	global sysvals
+	modename = {
+		'freeze': 'Freeze (S0)',
+		'standby': 'Standby (S1)',
+		'mem': 'Suspend (S3)',
+		'disk': 'Hibernate (S4)'
+	}
+	kernel = sysvals.stamp['kernel']
+	host = sysvals.hostname[0].upper()+sysvals.hostname[1:]
+	mode = sysvals.suspendmode
+	if sysvals.suspendmode in modename:
+		mode = modename[sysvals.suspendmode]
+	return host+' '+mode+' '+kernel
+
+def ordinal(value):
+	suffix = 'th'
+	if value < 10 or value > 19:
+		if value % 10 == 1:
+			suffix = 'st'
+		elif value % 10 == 2:
+			suffix = 'nd'
+		elif value % 10 == 3:
+			suffix = 'rd'
+	return '%d%s' % (value, suffix)
+
 # Function: createHTML
 # Description:
 #	 Create the output html file from the resident test data
@@ -2156,6 +2995,10 @@ def createHTMLSummarySimple(testruns, htmlfile):
 def createHTML(testruns):
 	global sysvals
 
+	if len(testruns) < 1:
+		print('ERROR: Not enough test data to build a timeline')
+		return
+
 	for data in testruns:
 		data.normalizeTime(testruns[-1].tSuspended)
 
@@ -2163,16 +3006,18 @@ def createHTML(testruns):
 	if len(testruns) > 1:
 		x2changes = ['1', 'relative']
 	# html function templates
+	headline_version = '<div class="version"><a href="https://01.org/suspendresume">AnalyzeSuspend v%s</a></div>' % sysvals.version
 	headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n'
 	html_devlist1 = '<button id="devlist1" class="devlist" style="float:left;">Device Detail%s</button>' % x2changes[0]
 	html_zoombox = '<center><button id="zoomin">ZOOM IN</button><button id="zoomout">ZOOM OUT</button><button id="zoomdef">ZOOM 1:1</button></center>\n'
 	html_devlist2 = '<button id="devlist2" class="devlist" style="float:right;">Device Detail2</button>\n'
 	html_timeline = '<div id="dmesgzoombox" class="zoombox">\n<div id="{0}" class="timeline" style="height:{1}px">\n'
-	html_device = '<div id="{0}" title="{1}" class="thread" style="left:{2}%;top:{3}%;height:{4}%;width:{5}%;">{6}</div>\n'
-	html_traceevent = '<div title="{0}" class="traceevent" style="left:{1}%;top:{2}%;height:{3}%;width:{4}%;border:1px solid {5};background-color:{5}">{6}</div>\n'
-	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}%;height:{3}%;background-color:{4}">{5}</div>\n'
+	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;">\n'
+	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
+	html_traceevent = '<div title="{0}" class="traceevent" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;">{5}</div>\n'
+	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background-color:{4}">{5}</div>\n'
 	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background-color:{3}"></div>\n'
-	html_legend = '<div class="square" style="left:{0}%;background-color:{1}">&nbsp;{2}</div>\n'
+	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background-color:{1}">&nbsp;{2}</div>\n'
 	html_timetotal = '<table class="time1">\n<tr>'\
 		'<td class="green">{2} Suspend Time: <b>{0} ms</b></td>'\
 		'<td class="yellow">{2} Resume Time: <b>{1} ms</b></td>'\
@@ -2182,6 +3027,10 @@ def createHTML(testruns):
 		'<td class="gray">'+sysvals.suspendmode+' time: <b>{1} ms</b></td>'\
 		'<td class="yellow">{3} Resume Time: <b>{2} ms</b></td>'\
 		'</tr>\n</table>\n'
+	html_timetotal3 = '<table class="time1">\n<tr>'\
+		'<td class="green">Execution Time: <b>{0} ms</b></td>'\
+		'<td class="yellow">Command: <b>{1}</b></td>'\
+		'</tr>\n</table>\n'
 	html_timegroups = '<table class="time2">\n<tr>'\
 		'<td class="green">{4}Kernel Suspend: {0} ms</td>'\
 		'<td class="purple">{4}Firmware Suspend: {1} ms</td>'\
@@ -2189,12 +3038,21 @@ def createHTML(testruns):
 		'<td class="yellow">{4}Kernel Resume: {3} ms</td>'\
 		'</tr>\n</table>\n'
 
+	# html format variables
+	rowheight = 30
+	devtextS = '14px'
+	devtextH = '30px'
+	hoverZ = 'z-index:10;'
+
+	if sysvals.usedevsrc:
+		hoverZ = ''
+
 	# device timeline
 	vprint('Creating Device Timeline...')
-	devtl = Timeline()
+
+	devtl = Timeline(rowheight)
 
 	# Generate the header for this timeline
-	textnum = ['First', 'Second']
 	for data in testruns:
 		tTotal = data.end - data.start
 		tEnd = data.dmesg['resume_complete']['end']
@@ -2203,7 +3061,17 @@ def createHTML(testruns):
 			sys.exit()
 		if(data.tLow > 0):
 			low_time = '%.0f'%(data.tLow*1000)
-		if data.fwValid:
+		if sysvals.suspendmode == 'command':
+			run_time = '%.0f'%((data.end-data.start)*1000)
+			if sysvals.testcommand:
+				testdesc = sysvals.testcommand
+			else:
+				testdesc = 'unknown'
+			if(len(testruns) > 1):
+				testdesc = ordinal(data.testnumber+1)+' '+testdesc
+			thtml = html_timetotal3.format(run_time, testdesc)
+			devtl.html['header'] += thtml
+		elif data.fwValid:
 			suspend_time = '%.0f'%((data.tSuspended-data.start)*1000 + \
 				(data.fwSuspend/1000000.0))
 			resume_time = '%.0f'%((tEnd-data.tSuspended)*1000 + \
@@ -2211,7 +3079,7 @@ def createHTML(testruns):
 			testdesc1 = 'Total'
 			testdesc2 = ''
 			if(len(testruns) > 1):
-				testdesc1 = testdesc2 = textnum[data.testnumber]
+				testdesc1 = testdesc2 = ordinal(data.testnumber+1)
 				testdesc2 += ' '
 			if(data.tLow == 0):
 				thtml = html_timetotal.format(suspend_time, \
@@ -2219,28 +3087,28 @@ def createHTML(testruns):
 			else:
 				thtml = html_timetotal2.format(suspend_time, low_time, \
 					resume_time, testdesc1)
-			devtl.html['timeline'] += thtml
+			devtl.html['header'] += thtml
 			sktime = '%.3f'%((data.dmesg['suspend_machine']['end'] - \
 				data.getStart())*1000)
 			sftime = '%.3f'%(data.fwSuspend / 1000000.0)
 			rftime = '%.3f'%(data.fwResume / 1000000.0)
-			rktime = '%.3f'%((data.getEnd() - \
+			rktime = '%.3f'%((data.dmesg['resume_complete']['end'] - \
 				data.dmesg['resume_machine']['start'])*1000)
-			devtl.html['timeline'] += html_timegroups.format(sktime, \
+			devtl.html['header'] += html_timegroups.format(sktime, \
 				sftime, rftime, rktime, testdesc2)
 		else:
 			suspend_time = '%.0f'%((data.tSuspended-data.start)*1000)
 			resume_time = '%.0f'%((tEnd-data.tSuspended)*1000)
 			testdesc = 'Kernel'
 			if(len(testruns) > 1):
-				testdesc = textnum[data.testnumber]+' '+testdesc
+				testdesc = ordinal(data.testnumber+1)+' '+testdesc
 			if(data.tLow == 0):
 				thtml = html_timetotal.format(suspend_time, \
 					resume_time, testdesc)
 			else:
 				thtml = html_timetotal2.format(suspend_time, low_time, \
 					resume_time, testdesc)
-			devtl.html['timeline'] += thtml
+			devtl.html['header'] += thtml
 
 	# time scale for potentially multiple datasets
 	t0 = testruns[0].start
@@ -2249,153 +3117,241 @@ def createHTML(testruns):
 	tTotal = tMax - t0
 
 	# determine the maximum number of rows we need to draw
-	timelinerows = 0
 	for data in testruns:
-		for phase in data.dmesg:
-			list = data.dmesg[phase]['list']
-			rows = setTimelineRows(list, list)
-			data.dmesg[phase]['row'] = rows
-			if(rows > timelinerows):
-				timelinerows = rows
-
-	# calculate the timeline height and create bounding box, add buttons
-	devtl.setRows(timelinerows + 1)
-	devtl.html['timeline'] += html_devlist1
-	if len(testruns) > 1:
-		devtl.html['timeline'] += html_devlist2
+		data.selectTimelineDevices('%f', tTotal, sysvals.mindevlen)
+		for group in data.devicegroups:
+			devlist = []
+			for phase in group:
+				for devname in data.tdevlist[phase]:
+					devlist.append((phase,devname))
+			devtl.getPhaseRows(data.dmesg, devlist)
+	devtl.calcTotalRows()
+
+	# create bounding box, add buttons
+	if sysvals.suspendmode != 'command':
+		devtl.html['timeline'] += html_devlist1
+		if len(testruns) > 1:
+			devtl.html['timeline'] += html_devlist2
 	devtl.html['timeline'] += html_zoombox
 	devtl.html['timeline'] += html_timeline.format('dmesg', devtl.height)
 
-	# draw the colored boxes for each of the phases
-	for data in testruns:
-		for b in data.dmesg:
-			phase = data.dmesg[b]
-			length = phase['end']-phase['start']
-			left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
-			width = '%.3f' % ((length*100.0)/tTotal)
-			devtl.html['timeline'] += html_phase.format(left, width, \
-				'%.3f'%devtl.scaleH, '%.3f'%(100-devtl.scaleH), \
-				data.dmesg[b]['color'], '')
+	# draw the full timeline
+	phases = {'suspend':[],'resume':[]}
+	for phase in data.dmesg:
+		if 'resume' in phase:
+			phases['resume'].append(phase)
+		else:
+			phases['suspend'].append(phase)
 
-	# draw the time scale, try to make the number of labels readable
-	devtl.html['scale'] = createTimeScale(t0, tMax, tSuspended)
-	devtl.html['timeline'] += devtl.html['scale']
-	for data in testruns:
-		for b in data.dmesg:
-			phaselist = data.dmesg[b]['list']
-			for d in phaselist:
-				name = d
-				drv = ''
-				dev = phaselist[d]
-				if(d in sysvals.altdevname):
-					name = sysvals.altdevname[d]
-				if('drv' in dev and dev['drv']):
-					drv = ' {%s}' % dev['drv']
-				height = (100.0 - devtl.scaleH)/data.dmesg[b]['row']
-				top = '%.3f' % ((dev['row']*height) + devtl.scaleH)
-				left = '%.3f' % (((dev['start']-t0)*100)/tTotal)
-				width = '%.3f' % (((dev['end']-dev['start'])*100)/tTotal)
-				length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
-				color = 'rgba(204,204,204,0.5)'
-				devtl.html['timeline'] += html_device.format(dev['id'], \
-					d+drv+length+b, left, top, '%.3f'%height, width, name+drv)
-
-	# draw any trace events found
+	# draw each test run chronologically
 	for data in testruns:
-		for b in data.dmesg:
-			phaselist = data.dmesg[b]['list']
-			for name in phaselist:
-				dev = phaselist[name]
-				if('traceevents' in dev):
-					vprint('Debug trace events found for device %s' % name)
-					vprint('%20s %20s %10s %8s' % ('action', \
+		# if nore than one test, draw a block to represent user mode
+		if(data.testnumber > 0):
+			m0 = testruns[data.testnumber-1].end
+			mMax = testruns[data.testnumber].start
+			mTotal = mMax - m0
+			name = 'usermode%d' % data.testnumber
+			top = '%d' % devtl.scaleH
+			left = '%f' % (((m0-t0)*100.0)/tTotal)
+			width = '%f' % ((mTotal*100.0)/tTotal)
+			title = 'user mode (%0.3f ms) ' % (mTotal*1000)
+			devtl.html['timeline'] += html_device.format(name, \
+				title, left, top, '%d'%devtl.bodyH, width, '', '', '')
+		# now draw the actual timeline blocks
+		for dir in phases:
+			# draw suspend and resume blocks separately
+			bname = '%s%d' % (dir[0], data.testnumber)
+			if dir == 'suspend':
+				m0 = testruns[data.testnumber].start
+				mMax = testruns[data.testnumber].tSuspended
+				mTotal = mMax - m0
+				left = '%f' % (((m0-t0)*100.0)/tTotal)
+			else:
+				m0 = testruns[data.testnumber].tSuspended
+				mMax = testruns[data.testnumber].end
+				mTotal = mMax - m0
+				left = '%f' % ((((m0-t0)*100.0)+sysvals.srgap/2)/tTotal)
+			# if a timeline block is 0 length, skip altogether
+			if mTotal == 0:
+				continue
+			width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal)
+			devtl.html['timeline'] += html_tblock.format(bname, left, width)
+			for b in sorted(phases[dir]):
+				# draw the phase color background
+				phase = data.dmesg[b]
+				length = phase['end']-phase['start']
+				left = '%f' % (((phase['start']-m0)*100.0)/mTotal)
+				width = '%f' % ((length*100.0)/mTotal)
+				devtl.html['timeline'] += html_phase.format(left, width, \
+					'%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
+					data.dmesg[b]['color'], '')
+				# draw the devices for this phase
+				phaselist = data.dmesg[b]['list']
+				for d in data.tdevlist[b]:
+					name = d
+					drv = ''
+					dev = phaselist[d]
+					xtraclass = ''
+					xtrainfo = ''
+					xtrastyle = ''
+					if 'htmlclass' in dev:
+						xtraclass = dev['htmlclass']
+						xtrainfo = dev['htmlclass']
+					if 'color' in dev:
+						xtrastyle = 'background-color:%s;' % dev['color']
+					if(d in sysvals.devprops):
+						name = sysvals.devprops[d].altName(d)
+						xtraclass = sysvals.devprops[d].xtraClass()
+						xtrainfo = sysvals.devprops[d].xtraInfo()
+					if('drv' in dev and dev['drv']):
+						drv = ' {%s}' % dev['drv']
+					rowheight = devtl.phaseRowHeight(b, dev['row'])
+					rowtop = devtl.phaseRowTop(b, dev['row'])
+					top = '%.3f' % (rowtop + devtl.scaleH)
+					left = '%f' % (((dev['start']-m0)*100)/mTotal)
+					width = '%f' % (((dev['end']-dev['start'])*100)/mTotal)
+					length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
+					if sysvals.suspendmode == 'command':
+						title = name+drv+xtrainfo+length+'cmdexec'
+					else:
+						title = name+drv+xtrainfo+length+b
+					devtl.html['timeline'] += html_device.format(dev['id'], \
+						title, left, top, '%.3f'%rowheight, width, \
+						d+drv, xtraclass, xtrastyle)
+					if('src' not in dev):
+						continue
+					# draw any trace events for this device
+					vprint('Debug trace events found for device %s' % d)
+					vprint('%20s %20s %10s %8s' % ('title', \
 						'name', 'time(ms)', 'length(ms)'))
-					for e in dev['traceevents']:
-						vprint('%20s %20s %10.3f %8.3f' % (e.action, \
-							e.name, e.time*1000, e.length*1000))
-						height = (100.0 - devtl.scaleH)/data.dmesg[b]['row']
-						top = '%.3f' % ((dev['row']*height) + devtl.scaleH)
-						left = '%.3f' % (((e.time-t0)*100)/tTotal)
-						width = '%.3f' % (e.length*100/tTotal)
+					for e in dev['src']:
+						vprint('%20s %20s %10.3f %8.3f' % (e.title, \
+							e.text, e.time*1000, e.length*1000))
+						height = devtl.rowH
+						top = '%.3f' % (rowtop + devtl.scaleH + (e.row*devtl.rowH))
+						left = '%f' % (((e.time-m0)*100)/mTotal)
+						width = '%f' % (e.length*100/mTotal)
 						color = 'rgba(204,204,204,0.5)'
 						devtl.html['timeline'] += \
-							html_traceevent.format(e.action+' '+e.name, \
+							html_traceevent.format(e.title, \
 								left, top, '%.3f'%height, \
-								width, e.color, '')
+								width, e.text)
+			# draw the time scale, try to make the number of labels readable
+			devtl.html['timeline'] += devtl.createTimeScale(m0, mMax, tTotal, dir)
+			devtl.html['timeline'] += '</div>\n'
 
 	# timeline is finished
 	devtl.html['timeline'] += '</div>\n</div>\n'
 
 	# draw a legend which describes the phases by color
-	data = testruns[-1]
-	devtl.html['legend'] = '<div class="legend">\n'
-	pdelta = 100.0/len(data.phases)
-	pmargin = pdelta / 4.0
-	for phase in data.phases:
-		order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
-		name = string.replace(phase, '_', ' &nbsp;')
-		devtl.html['legend'] += html_legend.format(order, \
-			data.dmesg[phase]['color'], name)
-	devtl.html['legend'] += '</div>\n'
+	if sysvals.suspendmode != 'command':
+		data = testruns[-1]
+		devtl.html['legend'] = '<div class="legend">\n'
+		pdelta = 100.0/len(data.phases)
+		pmargin = pdelta / 4.0
+		for phase in data.phases:
+			tmp = phase.split('_')
+			id = tmp[0][0]
+			if(len(tmp) > 1):
+				id += tmp[1][0]
+			order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
+			name = string.replace(phase, '_', ' &nbsp;')
+			devtl.html['legend'] += html_legend.format(order, \
+				data.dmesg[phase]['color'], name, id)
+		devtl.html['legend'] += '</div>\n'
 
 	hf = open(sysvals.htmlfile, 'w')
-	thread_height = 0
+
+	if not sysvals.cgexp:
+		cgchk = 'checked'
+		cgnchk = 'not(:checked)'
+	else:
+		cgchk = 'not(:checked)'
+		cgnchk = 'checked'
 
 	# write the html header first (html head, css code, up to body start)
 	html_header = '<!DOCTYPE html>\n<html>\n<head>\n\
 	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
-	<title>AnalyzeSuspend</title>\n\
+	<title>'+htmlTitle()+'</title>\n\
 	<style type=\'text/css\'>\n\
-		body {overflow-y: scroll;}\n\
-		.stamp {width: 100%;text-align:center;background-color:gray;line-height:30px;color:white;font: 25px Arial;}\n\
-		.callgraph {margin-top: 30px;box-shadow: 5px 5px 20px black;}\n\
-		.callgraph article * {padding-left: 28px;}\n\
-		h1 {color:black;font: bold 30px Times;}\n\
-		t0 {color:black;font: bold 30px Times;}\n\
-		t1 {color:black;font: 30px Times;}\n\
-		t2 {color:black;font: 25px Times;}\n\
-		t3 {color:black;font: 20px Times;white-space:nowrap;}\n\
-		t4 {color:black;font: bold 30px Times;line-height:60px;white-space:nowrap;}\n\
+		body {overflow-y:scroll;}\n\
+		.stamp {width:100%;text-align:center;background-color:gray;line-height:30px;color:white;font:25px Arial;}\n\
+		.callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\
+		.callgraph article * {padding-left:28px;}\n\
+		h1 {color:black;font:bold 30px Times;}\n\
+		t0 {color:black;font:bold 30px Times;}\n\
+		t1 {color:black;font:30px Times;}\n\
+		t2 {color:black;font:25px Times;}\n\
+		t3 {color:black;font:20px Times;white-space:nowrap;}\n\
+		t4 {color:black;font:bold 30px Times;line-height:60px;white-space:nowrap;}\n\
+		cS {color:blue;font:bold 11px Times;}\n\
+		cR {color:red;font:bold 11px Times;}\n\
 		table {width:100%;}\n\
 		.gray {background-color:rgba(80,80,80,0.1);}\n\
 		.green {background-color:rgba(204,255,204,0.4);}\n\
 		.purple {background-color:rgba(128,0,128,0.2);}\n\
 		.yellow {background-color:rgba(255,255,204,0.4);}\n\
-		.time1 {font: 22px Arial;border:1px solid;}\n\
-		.time2 {font: 15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
-		td {text-align: center;}\n\
+		.time1 {font:22px Arial;border:1px solid;}\n\
+		.time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
+		td {text-align:center;}\n\
 		r {color:#500000;font:15px Tahoma;}\n\
 		n {color:#505050;font:15px Tahoma;}\n\
-		.tdhl {color: red;}\n\
-		.hide {display: none;}\n\
-		.pf {display: none;}\n\
-		.pf:checked + label {background: url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
-		.pf:not(:checked) ~ label {background: url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
-		.pf:checked ~ *:not(:nth-child(2)) {display: none;}\n\
-		.zoombox {position: relative; width: 100%; overflow-x: scroll;}\n\
-		.timeline {position: relative; font-size: 14px;cursor: pointer;width: 100%; overflow: hidden; background-color:#dddddd;}\n\
-		.thread {position: absolute; height: '+'%.3f'%thread_height+'%; overflow: hidden; line-height: 30px; border:1px solid;text-align:center;white-space:nowrap;background-color:rgba(204,204,204,0.5);}\n\
-		.thread:hover {background-color:white;border:1px solid red;z-index:10;}\n\
-		.hover {background-color:white;border:1px solid red;z-index:10;}\n\
-		.traceevent {position: absolute;opacity: 0.3;height: '+'%.3f'%thread_height+'%;width:0;overflow:hidden;line-height:30px;text-align:center;white-space:nowrap;}\n\
-		.phase {position: absolute;overflow: hidden;border:0px;text-align:center;}\n\
+		.tdhl {color:red;}\n\
+		.hide {display:none;}\n\
+		.pf {display:none;}\n\
+		.pf:'+cgchk+' + label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+		.pf:'+cgnchk+' ~ label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+		.pf:'+cgchk+' ~ *:not(:nth-child(2)) {display:none;}\n\
+		.zoombox {position:relative;width:100%;overflow-x:scroll;}\n\
+		.timeline {position:relative;font-size:14px;cursor:pointer;width:100%; overflow:hidden;background:linear-gradient(#cccccc, white);}\n\
+		.thread {position:absolute;height:0%;overflow:hidden;line-height:'+devtextH+';font-size:'+devtextS+';border:1px solid;text-align:center;white-space:nowrap;background-color:rgba(204,204,204,0.5);}\n\
+		.thread.sync {background-color:'+sysvals.synccolor+';}\n\
+		.thread.bg {background-color:'+sysvals.kprobecolor+';}\n\
+		.thread:hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\
+		.hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\
+		.hover.sync {background-color:white;}\n\
+		.hover.bg {background-color:white;}\n\
+		.traceevent {position:absolute;font-size:10px;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,rgba(204,204,204,1),rgba(150,150,150,1));}\n\
+		.traceevent:hover {background:white;}\n\
+		.phase {position:absolute;overflow:hidden;border:0px;text-align:center;}\n\
 		.phaselet {position:absolute;overflow:hidden;border:0px;text-align:center;height:100px;font-size:24px;}\n\
-		.t {position:absolute;top:0%;height:100%;border-right:1px solid black;}\n\
-		.legend {position: relative; width: 100%; height: 40px; text-align: center;margin-bottom:20px}\n\
-		.legend .square {position:absolute;top:10px; width: 0px;height: 20px;border:1px solid;padding-left:20px;}\n\
+		.t {z-index:2;position:absolute;pointer-events:none;top:0%;height:100%;border-right:1px solid black;}\n\
+		.legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\
+		.legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\
 		button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\
+		.logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
 		.devlist {position:'+x2changes[1]+';width:190px;}\n\
-		#devicedetail {height:100px;box-shadow: 5px 5px 20px black;}\n\
+		a:link {color:white;text-decoration:none;}\n\
+		a:visited {color:white;}\n\
+		a:hover {color:white;}\n\
+		a:active {color:white;}\n\
+		.version {position:relative;float:left;color:white;font-size:10px;line-height:30px;margin-left:10px;}\n\
+		#devicedetail {height:100px;box-shadow:5px 5px 20px black;}\n\
+		.tblock {position:absolute;height:100%;}\n\
+		.bg {z-index:1;}\n\
 	</style>\n</head>\n<body>\n'
-	hf.write(html_header)
+
+	# no header or css if its embedded
+	if(sysvals.embedded):
+		hf.write('pass True tSus %.3f tRes %.3f tLow %.3f fwvalid %s tSus %.3f tRes %.3f\n' %
+			(data.tSuspended-data.start, data.end-data.tSuspended, data.tLow, data.fwValid, \
+				data.fwSuspend/1000000, data.fwResume/1000000))
+	else:
+		hf.write(html_header)
 
 	# write the test title and general info header
 	if(sysvals.stamp['time'] != ""):
+		hf.write(headline_version)
+		if sysvals.addlogs and sysvals.dmesgfile:
+			hf.write('<button id="showdmesg" class="logbtn">dmesg</button>')
+		if sysvals.addlogs and sysvals.ftracefile:
+			hf.write('<button id="showftrace" class="logbtn">ftrace</button>')
 		hf.write(headline_stamp.format(sysvals.stamp['host'],
 			sysvals.stamp['kernel'], sysvals.stamp['mode'], \
 				sysvals.stamp['time']))
 
 	# write the device timeline
+	hf.write(devtl.html['header'])
 	hf.write(devtl.html['timeline'])
 	hf.write(devtl.html['legend'])
 	hf.write('<div id="devicedetailtitle"></div>\n')
@@ -2410,12 +3366,15 @@ def createHTML(testruns):
 			width = '%.3f' % ((length*100.0)/tTotal)
 			hf.write(html_phaselet.format(b, left, width, \
 				data.dmesg[b]['color']))
+		if sysvals.suspendmode == 'command':
+			hf.write(html_phaselet.format('cmdexec', '0', '0', \
+				data.dmesg['resume_complete']['color']))
 		hf.write('</div>\n')
 	hf.write('</div>\n')
 
 	# write the ftrace data (callgraph)
 	data = testruns[-1]
-	if(sysvals.usecallgraph):
+	if(sysvals.usecallgraph and not sysvals.embedded):
 		hf.write('<section id="callgraphs" class="callgraph">\n')
 		# write out the ftrace data converted to html
 		html_func_top = '<article id="{0}" class="atop" style="background-color:{1}">\n<input type="checkbox" class="pf" id="f{2}" checked/><label for="f{2}">{3} {4}</label>\n'
@@ -2428,22 +3387,29 @@ def createHTML(testruns):
 			for devname in data.sortedDevices(p):
 				if('ftrace' not in list[devname]):
 					continue
-				name = devname
-				if(devname in sysvals.altdevname):
-					name = sysvals.altdevname[devname]
 				devid = list[devname]['id']
 				cg = list[devname]['ftrace']
-				flen = '<r>(%.3f ms @ %.3f to %.3f)</r>' % \
-					((cg.end - cg.start)*1000, cg.start*1000, cg.end*1000)
+				clen = (cg.end - cg.start) * 1000
+				if clen < sysvals.mincglen:
+					continue
+				fmt = '<r>(%.3f ms @ '+sysvals.timeformat+' to '+sysvals.timeformat+')</r>'
+				flen = fmt % (clen, cg.start, cg.end)
+				name = devname
+				if(devname in sysvals.devprops):
+					name = sysvals.devprops[devname].altName(devname)
+				if sysvals.suspendmode == 'command':
+					ftitle = name
+				else:
+					ftitle = name+' '+p
 				hf.write(html_func_top.format(devid, data.dmesg[p]['color'], \
-					num, name+' '+p, flen))
+					num, ftitle, flen))
 				num += 1
 				for line in cg.list:
 					if(line.length < 0.000000001):
 						flen = ''
 					else:
-						flen = '<n>(%.3f ms @ %.3f)</n>' % (line.length*1000, \
-							line.time*1000)
+						fmt = '<n>(%.3f ms @ '+sysvals.timeformat+')</n>'
+						flen = fmt % (line.length*1000, line.time)
 					if(line.freturn and line.fcall):
 						hf.write(html_func_leaf.format(line.name, flen))
 					elif(line.freturn):
@@ -2453,9 +3419,40 @@ def createHTML(testruns):
 						num += 1
 				hf.write(html_func_end)
 		hf.write('\n\n    </section>\n')
-	# write the footer and close
-	addScriptCode(hf, testruns)
-	hf.write('</body>\n</html>\n')
+
+	# add the dmesg log as a hidden div
+	if sysvals.addlogs and sysvals.dmesgfile:
+		hf.write('<div id="dmesglog" style="display:none;">\n')
+		lf = open(sysvals.dmesgfile, 'r')
+		for line in lf:
+			hf.write(line)
+		lf.close()
+		hf.write('</div>\n')
+	# add the ftrace log as a hidden div
+	if sysvals.addlogs and sysvals.ftracefile:
+		hf.write('<div id="ftracelog" style="display:none;">\n')
+		lf = open(sysvals.ftracefile, 'r')
+		for line in lf:
+			hf.write(line)
+		lf.close()
+		hf.write('</div>\n')
+
+	if(not sysvals.embedded):
+		# write the footer and close
+		addScriptCode(hf, testruns)
+		hf.write('</body>\n</html>\n')
+	else:
+		# embedded out will be loaded in a page, skip the js
+		t0 = (testruns[0].start - testruns[-1].tSuspended) * 1000
+		tMax = (testruns[-1].end - testruns[-1].tSuspended) * 1000
+		# add js code in a div entry for later evaluation
+		detail = 'var bounds = [%f,%f];\n' % (t0, tMax)
+		detail += 'var devtable = [\n'
+		for data in testruns:
+			topo = data.deviceTopology()
+			detail += '\t"%s",\n' % (topo)
+		detail += '];\n'
+		hf.write('<div id=customcode style=display:none>\n'+detail+'</div>\n')
 	hf.close()
 	return True
 
@@ -2466,8 +3463,8 @@ def createHTML(testruns):
 #	 hf: the open html file pointer
 #	 testruns: array of Data objects from parseKernelLog or parseTraceLog
 def addScriptCode(hf, testruns):
-	t0 = (testruns[0].start - testruns[-1].tSuspended) * 1000
-	tMax = (testruns[-1].end - testruns[-1].tSuspended) * 1000
+	t0 = testruns[0].start * 1000
+	tMax = testruns[-1].end * 1000
 	# create an array in javascript memory with the device details
 	detail = '	var devtable = [];\n'
 	for data in testruns:
@@ -2477,8 +3474,43 @@ def addScriptCode(hf, testruns):
 	# add the code which will manipulate the data in the browser
 	script_code = \
 	'<script type="text/javascript">\n'+detail+\
+	'	var resolution = -1;\n'\
+	'	function redrawTimescale(t0, tMax, tS) {\n'\
+	'		var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;"><cR><-R</cR></div>\';\n'\
+	'		var tTotal = tMax - t0;\n'\
+	'		var list = document.getElementsByClassName("tblock");\n'\
+	'		for (var i = 0; i < list.length; i++) {\n'\
+	'			var timescale = list[i].getElementsByClassName("timescale")[0];\n'\
+	'			var m0 = t0 + (tTotal*parseFloat(list[i].style.left)/100);\n'\
+	'			var mTotal = tTotal*parseFloat(list[i].style.width)/100;\n'\
+	'			var mMax = m0 + mTotal;\n'\
+	'			var html = "";\n'\
+	'			var divTotal = Math.floor(mTotal/tS) + 1;\n'\
+	'			if(divTotal > 1000) continue;\n'\
+	'			var divEdge = (mTotal - tS*(divTotal-1))*100/mTotal;\n'\
+	'			var pos = 0.0, val = 0.0;\n'\
+	'			for (var j = 0; j < divTotal; j++) {\n'\
+	'				var htmlline = "";\n'\
+	'				if(list[i].id[5] == "r") {\n'\
+	'					pos = 100 - (((j)*tS*100)/mTotal);\n'\
+	'					val = (j)*tS;\n'\
+	'					htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+	'					if(j == 0)\n'\
+	'						htmlline = rline;\n'\
+	'				} else {\n'\
+	'					pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\
+	'					val = (j-divTotal+1)*tS;\n'\
+	'					if(j == divTotal - 1)\n'\
+	'						htmlline = \'<div class="t" style="right:\'+pos+\'%"><cS>S-></cS></div>\';\n'\
+	'					else\n'\
+	'						htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+	'				}\n'\
+	'				html += htmlline;\n'\
+	'			}\n'\
+	'			timescale.innerHTML = html;\n'\
+	'		}\n'\
+	'	}\n'\
 	'	function zoomTimeline() {\n'\
-	'		var timescale = document.getElementById("timescale");\n'\
 	'		var dmesg = document.getElementById("dmesg");\n'\
 	'		var zoombox = document.getElementById("dmesgzoombox");\n'\
 	'		var val = parseFloat(dmesg.style.width);\n'\
@@ -2486,7 +3518,7 @@ def addScriptCode(hf, testruns):
 	'		var sh = window.outerWidth / 2;\n'\
 	'		if(this.id == "zoomin") {\n'\
 	'			newval = val * 1.2;\n'\
-	'			if(newval > 40000) newval = 40000;\n'\
+	'			if(newval > 910034) newval = 910034;\n'\
 	'			dmesg.style.width = newval+"%";\n'\
 	'			zoombox.scrollLeft = ((zoombox.scrollLeft + sh) * newval / val) - sh;\n'\
 	'		} else if (this.id == "zoomout") {\n'\
@@ -2498,19 +3530,17 @@ def addScriptCode(hf, testruns):
 	'			zoombox.scrollLeft = 0;\n'\
 	'			dmesg.style.width = "100%";\n'\
 	'		}\n'\
-	'		var html = "";\n'\
+	'		var tS = [10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1];\n'\
 	'		var t0 = bounds[0];\n'\
 	'		var tMax = bounds[1];\n'\
 	'		var tTotal = tMax - t0;\n'\
 	'		var wTotal = tTotal * 100.0 / newval;\n'\
-	'		for(var tS = 1000; (wTotal / tS) < 3; tS /= 10);\n'\
-	'		if(tS < 1) tS = 1;\n'\
-	'		for(var s = ((t0 / tS)|0) * tS; s < tMax; s += tS) {\n'\
-	'			var pos = (tMax - s) * 100.0 / tTotal;\n'\
-	'			var name = (s == 0)?"S/R":(s+"ms");\n'\
-	'			html += "<div class=\\"t\\" style=\\"right:"+pos+"%\\">"+name+"</div>";\n'\
-	'		}\n'\
-	'		timescale.innerHTML = html;\n'\
+	'		var idx = 7*window.innerWidth/1100;\n'\
+	'		for(var i = 0; (i < tS.length)&&((wTotal / tS[i]) < idx); i++);\n'\
+	'		if(i >= tS.length) i = tS.length - 1;\n'\
+	'		if(tS[i] == resolution) return;\n'\
+	'		resolution = tS[i];\n'\
+	'		redrawTimescale(t0, tMax, tS[i]);\n'\
 	'	}\n'\
 	'	function deviceHover() {\n'\
 	'		var name = this.title.slice(0, this.title.indexOf(" ("));\n'\
@@ -2523,12 +3553,13 @@ def addScriptCode(hf, testruns):
 	'			cpu = parseInt(name.slice(8));\n'\
 	'		for (var i = 0; i < dev.length; i++) {\n'\
 	'			dname = dev[i].title.slice(0, dev[i].title.indexOf(" ("));\n'\
+	'			var cname = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\
 	'			if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\
 	'				(name == dname))\n'\
 	'			{\n'\
-	'				dev[i].className = "thread hover";\n'\
+	'				dev[i].className = "hover "+cname;\n'\
 	'			} else {\n'\
-	'				dev[i].className = "thread";\n'\
+	'				dev[i].className = cname;\n'\
 	'			}\n'\
 	'		}\n'\
 	'	}\n'\
@@ -2536,7 +3567,7 @@ def addScriptCode(hf, testruns):
 	'		var dmesg = document.getElementById("dmesg");\n'\
 	'		var dev = dmesg.getElementsByClassName("thread");\n'\
 	'		for (var i = 0; i < dev.length; i++) {\n'\
-	'			dev[i].className = "thread";\n'\
+	'			dev[i].className = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\
 	'		}\n'\
 	'	}\n'\
 	'	function deviceTitle(title, total, cpu) {\n'\
@@ -2547,7 +3578,7 @@ def addScriptCode(hf, testruns):
 	'			total[2] = (total[2]+total[4])/2;\n'\
 	'		}\n'\
 	'		var devtitle = document.getElementById("devicedetailtitle");\n'\
-	'		var name = title.slice(0, title.indexOf(" "));\n'\
+	'		var name = title.slice(0, title.indexOf(" ("));\n'\
 	'		if(cpu >= 0) name = "CPU"+cpu;\n'\
 	'		var driver = "";\n'\
 	'		var tS = "<t2>(</t2>";\n'\
@@ -2579,6 +3610,8 @@ def addScriptCode(hf, testruns):
 	'		var dev = dmesg.getElementsByClassName("thread");\n'\
 	'		var idlist = [];\n'\
 	'		var pdata = [[]];\n'\
+	'		if(document.getElementById("devicedetail1"))\n'\
+	'			pdata = [[], []];\n'\
 	'		var pd = pdata[0];\n'\
 	'		var total = [0.0, 0.0, 0.0];\n'\
 	'		for (var i = 0; i < dev.length; i++) {\n'\
@@ -2634,6 +3667,7 @@ def addScriptCode(hf, testruns):
 	'		var cglist = document.getElementById("callgraphs");\n'\
 	'		if(!cglist) return;\n'\
 	'		var cg = cglist.getElementsByClassName("atop");\n'\
+	'		if(cg.length < 10) return;\n'\
 	'		for (var i = 0; i < cg.length; i++) {\n'\
 	'			if(idlist.indexOf(cg[i].id) >= 0) {\n'\
 	'				cg[i].style.display = "block";\n'\
@@ -2658,15 +3692,32 @@ def addScriptCode(hf, testruns):
 	'			dt = devtable[1];\n'\
 	'		win.document.write(html+dt);\n'\
 	'	}\n'\
+	'	function logWindow(e) {\n'\
+	'		var name = e.target.id.slice(4);\n'\
+	'		var win = window.open();\n'\
+	'		var log = document.getElementById(name+"log");\n'\
+	'		var title = "<title>"+document.title.split(" ")[0]+" "+name+" log</title>";\n'\
+	'		win.document.write(title+"<pre>"+log.innerHTML+"</pre>");\n'\
+	'		win.document.close();\n'\
+	'	}\n'\
+	'	function onClickPhase(e) {\n'\
+	'	}\n'\
+	'	window.addEventListener("resize", function () {zoomTimeline();});\n'\
 	'	window.addEventListener("load", function () {\n'\
 	'		var dmesg = document.getElementById("dmesg");\n'\
 	'		dmesg.style.width = "100%"\n'\
 	'		document.getElementById("zoomin").onclick = zoomTimeline;\n'\
 	'		document.getElementById("zoomout").onclick = zoomTimeline;\n'\
 	'		document.getElementById("zoomdef").onclick = zoomTimeline;\n'\
-	'		var devlist = document.getElementsByClassName("devlist");\n'\
-	'		for (var i = 0; i < devlist.length; i++)\n'\
-	'			devlist[i].onclick = devListWindow;\n'\
+	'		var list = document.getElementsByClassName("square");\n'\
+	'		for (var i = 0; i < list.length; i++)\n'\
+	'			list[i].onclick = onClickPhase;\n'\
+	'		var list = document.getElementsByClassName("logbtn");\n'\
+	'		for (var i = 0; i < list.length; i++)\n'\
+	'			list[i].onclick = logWindow;\n'\
+	'		list = document.getElementsByClassName("devlist");\n'\
+	'		for (var i = 0; i < list.length; i++)\n'\
+	'			list[i].onclick = devListWindow;\n'\
 	'		var dev = dmesg.getElementsByClassName("thread");\n'\
 	'		for (var i = 0; i < dev.length; i++) {\n'\
 	'			dev[i].onclick = deviceDetail;\n'\
@@ -2685,141 +3736,87 @@ def addScriptCode(hf, testruns):
 def executeSuspend():
 	global sysvals
 
-	detectUSB(False)
 	t0 = time.time()*1000
 	tp = sysvals.tpath
+	fwdata = []
+	# mark the start point in the kernel ring buffer just as we start
+	sysvals.initdmesg()
+	# start ftrace
+	if(sysvals.usecallgraph or sysvals.usetraceevents):
+		print('START TRACING')
+		sysvals.fsetVal('1', 'tracing_on')
 	# execute however many s/r runs requested
 	for count in range(1,sysvals.execcount+1):
-		# clear the kernel ring buffer just as we start
-		os.system('dmesg -C')
-		# enable callgraph ftrace only for the second run
-		if(sysvals.usecallgraph and count == 2):
-			# set trace type
-			os.system('echo function_graph > '+tp+'current_tracer')
-			os.system('echo "" > '+tp+'set_ftrace_filter')
-			# set trace format options
-			os.system('echo funcgraph-abstime > '+tp+'trace_options')
-			os.system('echo funcgraph-proc > '+tp+'trace_options')
-			# focus only on device suspend and resume
-			os.system('cat '+tp+'available_filter_functions | '+\
-				'grep dpm_run_callback > '+tp+'set_graph_function')
 		# if this is test2 and there's a delay, start here
 		if(count > 1 and sysvals.x2delay > 0):
 			tN = time.time()*1000
 			while (tN - t0) < sysvals.x2delay:
 				tN = time.time()*1000
 				time.sleep(0.001)
-		# start ftrace
-		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			print('START TRACING')
-			os.system('echo 1 > '+tp+'tracing_on')
 		# initiate suspend
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			os.system('echo SUSPEND START > '+tp+'trace_marker')
-		if(sysvals.rtcwake):
-			print('SUSPEND START')
-			print('will autoresume in %d seconds' % sysvals.rtcwaketime)
-			sysvals.rtcWakeAlarm()
+			sysvals.fsetVal('SUSPEND START', 'trace_marker')
+		if sysvals.suspendmode == 'command':
+			print('COMMAND START')
+			if(sysvals.rtcwake):
+				print('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
+				sysvals.rtcWakeAlarmOn()
+			os.system(sysvals.testcommand)
 		else:
-			print('SUSPEND START (press a key to resume)')
-		pf = open(sysvals.powerfile, 'w')
-		pf.write(sysvals.suspendmode)
-		# execution will pause here
-		pf.close()
+			if(sysvals.rtcwake):
+				print('SUSPEND START')
+				print('will autoresume in %d seconds' % sysvals.rtcwaketime)
+				sysvals.rtcWakeAlarmOn()
+			else:
+				print('SUSPEND START (press a key to resume)')
+			pf = open(sysvals.powerfile, 'w')
+			pf.write(sysvals.suspendmode)
+			# execution will pause here
+			try:
+				pf.close()
+			except:
+				pass
 		t0 = time.time()*1000
+		if(sysvals.rtcwake):
+			sysvals.rtcWakeAlarmOff()
 		# return from suspend
 		print('RESUME COMPLETE')
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			os.system('echo RESUME COMPLETE > '+tp+'trace_marker')
-		# see if there's firmware timing data to be had
-		t = sysvals.postresumetime
-		if(t > 0):
-			print('Waiting %d seconds for POST-RESUME trace events...' % t)
-			time.sleep(t)
-		# stop ftrace
-		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			os.system('echo 0 > '+tp+'tracing_on')
-			print('CAPTURING TRACE')
-			writeDatafileHeader(sysvals.ftracefile)
-			os.system('cat '+tp+'trace >> '+sysvals.ftracefile)
-			os.system('echo "" > '+tp+'trace')
-		# grab a copy of the dmesg output
-		print('CAPTURING DMESG')
-		writeDatafileHeader(sysvals.dmesgfile)
-		os.system('dmesg -c >> '+sysvals.dmesgfile)
-
-def writeDatafileHeader(filename):
+			sysvals.fsetVal('RESUME COMPLETE', 'trace_marker')
+		if(sysvals.suspendmode == 'mem'):
+			fwdata.append(getFPDT(False))
+	# look for post resume events after the last test run
+	t = sysvals.postresumetime
+	if(t > 0):
+		print('Waiting %d seconds for POST-RESUME trace events...' % t)
+		time.sleep(t)
+	# stop ftrace
+	if(sysvals.usecallgraph or sysvals.usetraceevents):
+		sysvals.fsetVal('0', 'tracing_on')
+		print('CAPTURING TRACE')
+		writeDatafileHeader(sysvals.ftracefile, fwdata)
+		os.system('cat '+tp+'trace >> '+sysvals.ftracefile)
+		sysvals.fsetVal('', 'trace')
+		devProps()
+	# grab a copy of the dmesg output
+	print('CAPTURING DMESG')
+	writeDatafileHeader(sysvals.dmesgfile, fwdata)
+	sysvals.getdmesg()
+
+def writeDatafileHeader(filename, fwdata):
 	global sysvals
 
-	fw = getFPDT(False)
 	prt = sysvals.postresumetime
 	fp = open(filename, 'a')
 	fp.write(sysvals.teststamp+'\n')
-	if(fw):
-		fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
+	if(sysvals.suspendmode == 'mem'):
+		for fw in fwdata:
+			if(fw):
+				fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
 	if(prt > 0):
 		fp.write('# post resume time %u\n' % prt)
 	fp.close()
 
-# Function: executeAndroidSuspend
-# Description:
-#	 Execute system suspend through the sysfs interface
-#	 on a remote android device, then transfer the output
-#	 dmesg and ftrace files to the local output directory.
-def executeAndroidSuspend():
-	global sysvals
-
-	# check to see if the display is currently off
-	tp = sysvals.tpath
-	out = os.popen(sysvals.adb+\
-		' shell dumpsys power | grep mScreenOn').read().strip()
-	# if so we need to turn it on so we can issue a new suspend
-	if(out.endswith('false')):
-		print('Waking the device up for the test...')
-		# send the KEYPAD_POWER keyevent to wake it up
-		os.system(sysvals.adb+' shell input keyevent 26')
-		# wait a few seconds so the user can see the device wake up
-		time.sleep(3)
-	# execute however many s/r runs requested
-	for count in range(1,sysvals.execcount+1):
-		# clear the kernel ring buffer just as we start
-		os.system(sysvals.adb+' shell dmesg -c > /dev/null 2>&1')
-		# start ftrace
-		if(sysvals.usetraceevents):
-			print('START TRACING')
-			os.system(sysvals.adb+" shell 'echo 1 > "+tp+"tracing_on'")
-		# initiate suspend
-		for count in range(1,sysvals.execcount+1):
-			if(sysvals.usetraceevents):
-				os.system(sysvals.adb+\
-					" shell 'echo SUSPEND START > "+tp+"trace_marker'")
-			print('SUSPEND START (press a key on the device to resume)')
-			os.system(sysvals.adb+" shell 'echo "+sysvals.suspendmode+\
-				" > "+sysvals.powerfile+"'")
-			# execution will pause here, then adb will exit
-			while(True):
-				check = os.popen(sysvals.adb+\
-					' shell pwd 2>/dev/null').read().strip()
-				if(len(check) > 0):
-					break
-				time.sleep(1)
-			if(sysvals.usetraceevents):
-				os.system(sysvals.adb+" shell 'echo RESUME COMPLETE > "+tp+\
-					"trace_marker'")
-		# return from suspend
-		print('RESUME COMPLETE')
-		# stop ftrace
-		if(sysvals.usetraceevents):
-			os.system(sysvals.adb+" shell 'echo 0 > "+tp+"tracing_on'")
-			print('CAPTURING TRACE')
-			os.system('echo "'+sysvals.teststamp+'" > '+sysvals.ftracefile)
-			os.system(sysvals.adb+' shell cat '+tp+\
-				'trace >> '+sysvals.ftracefile)
-		# grab a copy of the dmesg output
-		print('CAPTURING DMESG')
-		os.system('echo "'+sysvals.teststamp+'" > '+sysvals.dmesgfile)
-		os.system(sysvals.adb+' shell dmesg >> '+sysvals.dmesgfile)
-
 # Function: setUSBDevicesAuto
 # Description:
 #	 Set the autosuspend control parameter of all USB devices to auto
@@ -2829,7 +3826,7 @@ def executeAndroidSuspend():
 def setUSBDevicesAuto():
 	global sysvals
 
-	rootCheck()
+	rootCheck(True)
 	for dirname, dirnames, filenames in os.walk('/sys/devices'):
 		if(re.match('.*/usb[0-9]*.*', dirname) and
 			'idVendor' in filenames and 'idProduct' in filenames):
@@ -2874,9 +3871,7 @@ def ms2nice(val):
 # Description:
 #	 Detect all the USB hosts and devices currently connected and add
 #	 a list of USB device names to sysvals for better timeline readability
-# Arguments:
-#	 output: True to output the info to stdout, False otherwise
-def detectUSB(output):
+def detectUSB():
 	global sysvals
 
 	field = {'idVendor':'', 'idProduct':'', 'product':'', 'speed':''}
@@ -2887,18 +3882,18 @@ def detectUSB(output):
 			'runtime_suspended_time':'',
 			'active_duration':'',
 			'connected_duration':''}
-	if(output):
-		print('LEGEND')
-		print('---------------------------------------------------------------------------------------------')
-		print('  A = async/sync PM queue Y/N                       D = autosuspend delay (seconds)')
-		print('  S = autosuspend Y/N                         rACTIVE = runtime active (min/sec)')
-		print('  P = persist across suspend Y/N              rSUSPEN = runtime suspend (min/sec)')
-		print('  E = runtime suspend enabled/forbidden Y/N    ACTIVE = active duration (min/sec)')
-		print('  R = runtime status active/suspended Y/N     CONNECT = connected duration (min/sec)')
-		print('  U = runtime usage count')
-		print('---------------------------------------------------------------------------------------------')
-		print('  NAME       ID      DESCRIPTION         SPEED A S P E R U D rACTIVE rSUSPEN  ACTIVE CONNECT')
-		print('---------------------------------------------------------------------------------------------')
+
+	print('LEGEND')
+	print('---------------------------------------------------------------------------------------------')
+	print('  A = async/sync PM queue Y/N                       D = autosuspend delay (seconds)')
+	print('  S = autosuspend Y/N                         rACTIVE = runtime active (min/sec)')
+	print('  P = persist across suspend Y/N              rSUSPEN = runtime suspend (min/sec)')
+	print('  E = runtime suspend enabled/forbidden Y/N    ACTIVE = active duration (min/sec)')
+	print('  R = runtime status active/suspended Y/N     CONNECT = connected duration (min/sec)')
+	print('  U = runtime usage count')
+	print('---------------------------------------------------------------------------------------------')
+	print('  NAME       ID      DESCRIPTION         SPEED A S P E R U D rACTIVE rSUSPEN  ACTIVE CONNECT')
+	print('---------------------------------------------------------------------------------------------')
 
 	for dirname, dirnames, filenames in os.walk('/sys/devices'):
 		if(re.match('.*/usb[0-9]*.*', dirname) and
@@ -2907,35 +3902,149 @@ def detectUSB(output):
 				field[i] = os.popen('cat %s/%s 2>/dev/null' % \
 					(dirname, i)).read().replace('\n', '')
 			name = dirname.split('/')[-1]
-			if(len(field['product']) > 0):
-				sysvals.altdevname[name] = \
-					'%s [%s]' % (field['product'], name)
+			for i in power:
+				power[i] = os.popen('cat %s/power/%s 2>/dev/null' % \
+					(dirname, i)).read().replace('\n', '')
+			if(re.match('usb[0-9]*', name)):
+				first = '%-8s' % name
 			else:
-				sysvals.altdevname[name] = \
-					'%s:%s [%s]' % (field['idVendor'], \
-						field['idProduct'], name)
-			if(output):
-				for i in power:
-					power[i] = os.popen('cat %s/power/%s 2>/dev/null' % \
-						(dirname, i)).read().replace('\n', '')
-				if(re.match('usb[0-9]*', name)):
-					first = '%-8s' % name
-				else:
-					first = '%8s' % name
-				print('%s [%s:%s] %-20s %-4s %1s %1s %1s %1s %1s %1s %1s %s %s %s %s' % \
-					(first, field['idVendor'], field['idProduct'], \
-					field['product'][0:20], field['speed'], \
-					yesno(power['async']), \
-					yesno(power['control']), \
-					yesno(power['persist']), \
-					yesno(power['runtime_enabled']), \
-					yesno(power['runtime_status']), \
-					power['runtime_usage'], \
-					power['autosuspend'], \
-					ms2nice(power['runtime_active_time']), \
-					ms2nice(power['runtime_suspended_time']), \
-					ms2nice(power['active_duration']), \
-					ms2nice(power['connected_duration'])))
+				first = '%8s' % name
+			print('%s [%s:%s] %-20s %-4s %1s %1s %1s %1s %1s %1s %1s %s %s %s %s' % \
+				(first, field['idVendor'], field['idProduct'], \
+				field['product'][0:20], field['speed'], \
+				yesno(power['async']), \
+				yesno(power['control']), \
+				yesno(power['persist']), \
+				yesno(power['runtime_enabled']), \
+				yesno(power['runtime_status']), \
+				power['runtime_usage'], \
+				power['autosuspend'], \
+				ms2nice(power['runtime_active_time']), \
+				ms2nice(power['runtime_suspended_time']), \
+				ms2nice(power['active_duration']), \
+				ms2nice(power['connected_duration'])))
+
+# Function: devProps
+# Description:
+#	 Retrieve a list of properties for all devices in the trace log
+def devProps(data=0):
+	global sysvals
+	props = dict()
+
+	if data:
+		idx = data.index(': ') + 2
+		if idx >= len(data):
+			return
+		devlist = data[idx:].split(';')
+		for dev in devlist:
+			f = dev.split(',')
+			if len(f) < 3:
+				continue
+			dev = f[0]
+			props[dev] = DevProps()
+			props[dev].altname = f[1]
+			if int(f[2]):
+				props[dev].async = True
+			else:
+				props[dev].async = False
+			sysvals.devprops = props
+		if sysvals.suspendmode == 'command' and 'testcommandstring' in props:
+			sysvals.testcommand = props['testcommandstring'].altname
+		return
+
+	if(os.path.exists(sysvals.ftracefile) == False):
+		doError('%s does not exist' % sysvals.ftracefile, False)
+
+	# first get the list of devices we need properties for
+	msghead = 'Additional data added by AnalyzeSuspend'
+	alreadystamped = False
+	tp = TestProps()
+	tf = open(sysvals.ftracefile, 'r')
+	for line in tf:
+		if msghead in line:
+			alreadystamped = True
+			continue
+		# determine the trace data type (required for further parsing)
+		m = re.match(sysvals.tracertypefmt, line)
+		if(m):
+			tp.setTracerType(m.group('t'))
+			continue
+		# parse only valid lines, if this is not one move on
+		m = re.match(tp.ftrace_line_fmt, line)
+		if(not m or 'device_pm_callback_start' not in line):
+			continue
+		m = re.match('.*: (?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*', m.group('msg'));
+		if(not m):
+			continue
+		drv, dev, par = m.group('drv'), m.group('d'), m.group('p')
+		if dev not in props:
+			props[dev] = DevProps()
+	tf.close()
+
+	if not alreadystamped and sysvals.suspendmode == 'command':
+		out = '#\n# '+msghead+'\n# Device Properties: '
+		out += 'testcommandstring,%s,0;' % (sysvals.testcommand)
+		with open(sysvals.ftracefile, 'a') as fp:
+			fp.write(out+'\n')
+		sysvals.devprops = props
+		return
+
+	# now get the syspath for each of our target devices
+	for dirname, dirnames, filenames in os.walk('/sys/devices'):
+		if(re.match('.*/power', dirname) and 'async' in filenames):
+			dev = dirname.split('/')[-2]
+			if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)):
+				props[dev].syspath = dirname[:-6]
+
+	# now fill in the properties for our target devices
+	for dev in props:
+		dirname = props[dev].syspath
+		if not dirname or not os.path.exists(dirname):
+			continue
+		with open(dirname+'/power/async') as fp:
+			text = fp.read()
+			props[dev].async = False
+			if 'enabled' in text:
+				props[dev].async = True
+		fields = os.listdir(dirname)
+		if 'product' in fields:
+			with open(dirname+'/product') as fp:
+				props[dev].altname = fp.read()
+		elif 'name' in fields:
+			with open(dirname+'/name') as fp:
+				props[dev].altname = fp.read()
+		elif 'model' in fields:
+			with open(dirname+'/model') as fp:
+				props[dev].altname = fp.read()
+		elif 'description' in fields:
+			with open(dirname+'/description') as fp:
+				props[dev].altname = fp.read()
+		elif 'id' in fields:
+			with open(dirname+'/id') as fp:
+				props[dev].altname = fp.read()
+		elif 'idVendor' in fields and 'idProduct' in fields:
+			idv, idp = '', ''
+			with open(dirname+'/idVendor') as fp:
+				idv = fp.read().strip()
+			with open(dirname+'/idProduct') as fp:
+				idp = fp.read().strip()
+			props[dev].altname = '%s:%s' % (idv, idp)
+
+		if props[dev].altname:
+			out = props[dev].altname.strip().replace('\n', ' ')
+			out = out.replace(',', ' ')
+			out = out.replace(';', ' ')
+			props[dev].altname = out
+
+	# and now write the data to the ftrace file
+	if not alreadystamped:
+		out = '#\n# '+msghead+'\n# Device Properties: '
+		for dev in sorted(props):
+			out += props[dev].out(dev)
+		with open(sysvals.ftracefile, 'a') as fp:
+			fp.write(out+'\n')
+
+	sysvals.devprops = props
 
 # Function: getModes
 # Description:
@@ -2945,15 +4054,10 @@ def detectUSB(output):
 def getModes():
 	global sysvals
 	modes = ''
-	if(not sysvals.android):
-		if(os.path.exists(sysvals.powerfile)):
-			fp = open(sysvals.powerfile, 'r')
-			modes = string.split(fp.read())
-			fp.close()
-	else:
-		line = os.popen(sysvals.adb+' shell cat '+\
-			sysvals.powerfile).read().strip()
-		modes = string.split(line)
+	if(os.path.exists(sysvals.powerfile)):
+		fp = open(sysvals.powerfile, 'r')
+		modes = string.split(fp.read())
+		fp.close()
 	return modes
 
 # Function: getFPDT
@@ -2971,22 +4075,22 @@ def getFPDT(output):
 	prectype[0] = 'Basic S3 Resume Performance Record'
 	prectype[1] = 'Basic S3 Suspend Performance Record'
 
-	rootCheck()
+	rootCheck(True)
 	if(not os.path.exists(sysvals.fpdtpath)):
 		if(output):
-			doError('file doesnt exist: %s' % sysvals.fpdtpath, False)
+			doError('file does not exist: %s' % sysvals.fpdtpath, False)
 		return False
 	if(not os.access(sysvals.fpdtpath, os.R_OK)):
 		if(output):
-			doError('file isnt readable: %s' % sysvals.fpdtpath, False)
+			doError('file is not readable: %s' % sysvals.fpdtpath, False)
 		return False
 	if(not os.path.exists(sysvals.mempath)):
 		if(output):
-			doError('file doesnt exist: %s' % sysvals.mempath, False)
+			doError('file does not exist: %s' % sysvals.mempath, False)
 		return False
 	if(not os.access(sysvals.mempath, os.R_OK)):
 		if(output):
-			doError('file isnt readable: %s' % sysvals.mempath, False)
+			doError('file is not readable: %s' % sysvals.mempath, False)
 		return False
 
 	fp = open(sysvals.fpdtpath, 'rb')
@@ -3027,15 +4131,19 @@ def getFPDT(output):
 	while(i < len(records)):
 		header = struct.unpack('HBB', records[i:i+4])
 		if(header[0] not in rectype):
+			i += header[1]
 			continue
 		if(header[1] != 16):
+			i += header[1]
 			continue
 		addr = struct.unpack('Q', records[i+8:i+16])[0]
 		try:
 			fp.seek(addr)
 			first = fp.read(8)
 		except:
-			doError('Bad address 0x%x in %s' % (addr, sysvals.mempath), False)
+			if(output):
+				print('Bad address 0x%x in %s' % (addr, sysvals.mempath))
+			return [0, 0]
 		rechead = struct.unpack('4sI', first)
 		recdata = fp.read(rechead[1]-8)
 		if(rechead[0] == 'FBPT'):
@@ -3090,89 +4198,60 @@ def getFPDT(output):
 #	 print the results to the terminal
 # Output:
 #	 True if the test will work, False if not
-def statusCheck():
+def statusCheck(probecheck=False):
 	global sysvals
 	status = True
 
-	if(sysvals.android):
-		print('Checking the android system ...')
-	else:
-		print('Checking this system (%s)...' % platform.node())
-
-	# check if adb is connected to a device
-	if(sysvals.android):
-		res = 'NO'
-		out = os.popen(sysvals.adb+' get-state').read().strip()
-		if(out == 'device'):
-			res = 'YES'
-		print('    is android device connected: %s' % res)
-		if(res != 'YES'):
-			print('    Please connect the device before using this tool')
-			return False
+	print('Checking this system (%s)...' % platform.node())
 
 	# check we have root access
-	res = 'NO (No features of this tool will work!)'
-	if(sysvals.android):
-		out = os.popen(sysvals.adb+' shell id').read().strip()
-		if('root' in out):
-			res = 'YES'
-	else:
-		if(os.environ['USER'] == 'root'):
-			res = 'YES'
+	res = sysvals.colorText('NO (No features of this tool will work!)')
+	if(rootCheck(False)):
+		res = 'YES'
 	print('    have root access: %s' % res)
 	if(res != 'YES'):
-		if(sysvals.android):
-			print('    Try running "adb root" to restart the daemon as root')
-		else:
-			print('    Try running this script with sudo')
+		print('    Try running this script with sudo')
 		return False
 
 	# check sysfs is mounted
-	res = 'NO (No features of this tool will work!)'
-	if(sysvals.android):
-		out = os.popen(sysvals.adb+' shell ls '+\
-			sysvals.powerfile).read().strip()
-		if(out == sysvals.powerfile):
-			res = 'YES'
-	else:
-		if(os.path.exists(sysvals.powerfile)):
-			res = 'YES'
+	res = sysvals.colorText('NO (No features of this tool will work!)')
+	if(os.path.exists(sysvals.powerfile)):
+		res = 'YES'
 	print('    is sysfs mounted: %s' % res)
 	if(res != 'YES'):
 		return False
 
 	# check target mode is a valid mode
-	res = 'NO'
-	modes = getModes()
-	if(sysvals.suspendmode in modes):
-		res = 'YES'
-	else:
-		status = False
-	print('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
-	if(res == 'NO'):
-		print('      valid power modes are: %s' % modes)
-		print('      please choose one with -m')
-
-	# check if the tool can unlock the device
-	if(sysvals.android):
-		res = 'YES'
-		out1 = os.popen(sysvals.adb+\
-			' shell dumpsys power | grep mScreenOn').read().strip()
-		out2 = os.popen(sysvals.adb+\
-			' shell input').read().strip()
-		if(not out1.startswith('mScreenOn') or not out2.startswith('usage')):
-			res = 'NO (wake the android device up before running the test)'
-		print('    can I unlock the screen: %s' % res)
+	if sysvals.suspendmode != 'command':
+		res = sysvals.colorText('NO')
+		modes = getModes()
+		if(sysvals.suspendmode in modes):
+			res = 'YES'
+		else:
+			status = False
+		print('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
+		if(res == 'NO'):
+			print('      valid power modes are: %s' % modes)
+			print('      please choose one with -m')
 
 	# check if ftrace is available
-	res = 'NO'
-	ftgood = verifyFtrace()
+	res = sysvals.colorText('NO')
+	ftgood = sysvals.verifyFtrace()
 	if(ftgood):
 		res = 'YES'
 	elif(sysvals.usecallgraph):
 		status = False
 	print('    is ftrace supported: %s' % res)
 
+	# check if kprobes are available
+	res = sysvals.colorText('NO')
+	sysvals.usekprobes = sysvals.verifyKprobes()
+	if(sysvals.usekprobes):
+		res = 'YES'
+	else:
+		sysvals.usedevsrc = False
+	print('    are kprobes supported: %s' % res)
+
 	# what data source are we using
 	res = 'DMESG'
 	if(ftgood):
@@ -3180,14 +4259,8 @@ def statusCheck():
 		sysvals.usetraceevents = False
 		for e in sysvals.traceevents:
 			check = False
-			if(sysvals.android):
-				out = os.popen(sysvals.adb+' shell ls -d '+\
-					sysvals.epath+e).read().strip()
-				if(out == sysvals.epath+e):
-					check = True
-			else:
-				if(os.path.exists(sysvals.epath+e)):
-					check = True
+			if(os.path.exists(sysvals.epath+e)):
+				check = True
 			if(not check):
 				sysvals.usetraceeventsonly = False
 			if(e == 'suspend_resume' and check):
@@ -3199,13 +4272,48 @@ def statusCheck():
 	print('    timeline data source: %s' % res)
 
 	# check if rtcwake
-	res = 'NO'
+	res = sysvals.colorText('NO')
 	if(sysvals.rtcpath != ''):
 		res = 'YES'
 	elif(sysvals.rtcwake):
 		status = False
 	print('    is rtcwake supported: %s' % res)
 
+	if not probecheck:
+		return status
+
+	if (sysvals.usecallgraph and len(sysvals.debugfuncs) > 0) or len(sysvals.kprobes) > 0:
+		sysvals.initFtrace(True)
+
+	# verify callgraph debugfuncs
+	if sysvals.usecallgraph and len(sysvals.debugfuncs) > 0:
+		print('    verifying these ftrace callgraph functions work:')
+		sysvals.setFtraceFilterFunctions(sysvals.debugfuncs)
+		fp = open(sysvals.tpath+'set_graph_function', 'r')
+		flist = fp.read().split('\n')
+		fp.close()
+		for func in sysvals.debugfuncs:
+			res = sysvals.colorText('NO')
+			if func in flist:
+				res = 'YES'
+			else:
+				for i in flist:
+					if ' [' in i and func == i.split(' ')[0]:
+						res = 'YES'
+						break
+			print('         %s: %s' % (func, res))
+
+	# verify kprobes
+	if len(sysvals.kprobes) > 0:
+		print('    verifying these kprobes work:')
+		for name in sorted(sysvals.kprobes):
+			if name in sysvals.tracefuncs:
+				continue
+			res = sysvals.colorText('NO')
+			if sysvals.testKprobe(sysvals.kprobes[name]):
+				res = 'YES'
+			print('         %s: %s' % (name, res))
+
 	return status
 
 # Function: doError
@@ -3226,7 +4334,7 @@ def doError(msg, help):
 # Arguments:
 #	 msg: the warning message to print
 #	 file: If not empty, a filename to request be sent to the owner for debug
-def doWarning(msg, file):
+def doWarning(msg, file=''):
 	print('/* %s */') % msg
 	if(file):
 		print('/* For a fix, please send this'+\
@@ -3235,18 +4343,25 @@ def doWarning(msg, file):
 # Function: rootCheck
 # Description:
 #	 quick check to see if we have root access
-def rootCheck():
-	if(os.environ['USER'] != 'root'):
-		doError('This script must be run as root', False)
+def rootCheck(fatal):
+	global sysvals
+	if(os.access(sysvals.powerfile, os.W_OK)):
+		return True
+	if fatal:
+		doError('This command must be run as root', False)
+	return False
 
 # Function: getArgInt
 # Description:
 #	 pull out an integer argument from the command line with checks
-def getArgInt(name, args, min, max):
-	try:
-		arg = args.next()
-	except:
-		doError(name+': no argument supplied', True)
+def getArgInt(name, args, min, max, main=True):
+	if main:
+		try:
+			arg = args.next()
+		except:
+			doError(name+': no argument supplied', True)
+	else:
+		arg = args
 	try:
 		val = int(arg)
 	except:
@@ -3255,6 +4370,25 @@ def getArgInt(name, args, min, max):
 		doError(name+': value should be between %d and %d' % (min, max), True)
 	return val
 
+# Function: getArgFloat
+# Description:
+#	 pull out a float argument from the command line with checks
+def getArgFloat(name, args, min, max, main=True):
+	if main:
+		try:
+			arg = args.next()
+		except:
+			doError(name+': no argument supplied', True)
+	else:
+		arg = args
+	try:
+		val = float(arg)
+	except:
+		doError(name+': non-numerical value given', True)
+	if(val < min or val > max):
+		doError(name+': value should be between %f and %f' % (min, max), True)
+	return val
+
 # Function: rerunTest
 # Description:
 #	 generate an output from an existing set of ftrace/dmesg logs
@@ -3282,15 +4416,12 @@ def rerunTest():
 # Function: runTest
 # Description:
 #	 execute a suspend/resume, gather the logs, and generate the output
-def runTest(subdir):
+def runTest(subdir, testpath=''):
 	global sysvals
 
 	# prepare for the test
-	if(not sysvals.android):
-		initFtrace()
-	else:
-		initFtraceAndroid()
-	sysvals.initTestOutput(subdir)
+	sysvals.initFtrace()
+	sysvals.initTestOutput(subdir, testpath)
 
 	vprint('Output files:\n    %s' % sysvals.dmesgfile)
 	if(sysvals.usecallgraph or
@@ -3300,10 +4431,8 @@ def runTest(subdir):
 	vprint('    %s' % sysvals.htmlfile)
 
 	# execute the test
-	if(not sysvals.android):
-		executeSuspend()
-	else:
-		executeAndroidSuspend()
+	executeSuspend()
+	sysvals.cleanupFtrace()
 
 	# analyze the data and create the html output
 	print('PROCESSING DATA')
@@ -3367,6 +4496,153 @@ def runSummary(subdir, output):
 
 	createHTMLSummarySimple(testruns, subdir+'/summary.html')
 
+# Function: checkArgBool
+# Description:
+#	 check if a boolean string value is true or false
+def checkArgBool(value):
+	yes = ['1', 'true', 'yes', 'on']
+	if value.lower() in yes:
+		return True
+	return False
+
+# Function: configFromFile
+# Description:
+#	 Configure the script via the info in a config file
+def configFromFile(file):
+	global sysvals
+	Config = ConfigParser.ConfigParser()
+
+	ignorekprobes = False
+	Config.read(file)
+	sections = Config.sections()
+	if 'Settings' in sections:
+		for opt in Config.options('Settings'):
+			value = Config.get('Settings', opt).lower()
+			if(opt.lower() == 'verbose'):
+				sysvals.verbose = checkArgBool(value)
+			elif(opt.lower() == 'addlogs'):
+				sysvals.addlogs = checkArgBool(value)
+			elif(opt.lower() == 'dev'):
+				sysvals.usedevsrc = checkArgBool(value)
+			elif(opt.lower() == 'ignorekprobes'):
+				ignorekprobes = checkArgBool(value)
+			elif(opt.lower() == 'x2'):
+				if checkArgBool(value):
+					sysvals.execcount = 2
+			elif(opt.lower() == 'callgraph'):
+				sysvals.usecallgraph = checkArgBool(value)
+			elif(opt.lower() == 'callgraphfunc'):
+				sysvals.debugfuncs = []
+				if value:
+					value = value.split(',')
+				for i in value:
+					sysvals.debugfuncs.append(i.strip())
+			elif(opt.lower() == 'expandcg'):
+				sysvals.cgexp = checkArgBool(value)
+			elif(opt.lower() == 'srgap'):
+				if checkArgBool(value):
+					sysvals.srgap = 5
+			elif(opt.lower() == 'mode'):
+				sysvals.suspendmode = value
+			elif(opt.lower() == 'command'):
+				sysvals.testcommand = value
+			elif(opt.lower() == 'x2delay'):
+				sysvals.x2delay = getArgInt('-x2delay', value, 0, 60000, False)
+			elif(opt.lower() == 'postres'):
+				sysvals.postresumetime = getArgInt('-postres', value, 0, 3600, False)
+			elif(opt.lower() == 'rtcwake'):
+				sysvals.rtcwake = True
+				sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False)
+			elif(opt.lower() == 'timeprec'):
+				sysvals.setPrecision(getArgInt('-timeprec', value, 0, 6, False))
+			elif(opt.lower() == 'mindev'):
+				sysvals.mindevlen = getArgFloat('-mindev', value, 0.0, 10000.0, False)
+			elif(opt.lower() == 'mincg'):
+				sysvals.mincglen = getArgFloat('-mincg', value, 0.0, 10000.0, False)
+			elif(opt.lower() == 'kprobecolor'):
+				try:
+					val = int(value, 16)
+					sysvals.kprobecolor = '#'+value
+				except:
+					sysvals.kprobecolor = value
+			elif(opt.lower() == 'synccolor'):
+				try:
+					val = int(value, 16)
+					sysvals.synccolor = '#'+value
+				except:
+					sysvals.synccolor = value
+			elif(opt.lower() == 'output-dir'):
+				args = dict()
+				n = datetime.now()
+				args['date'] = n.strftime('%y%m%d')
+				args['time'] = n.strftime('%H%M%S')
+				args['hostname'] = sysvals.hostname
+				sysvals.outdir = value.format(**args)
+
+	if sysvals.suspendmode == 'command' and not sysvals.testcommand:
+		doError('No command supplied for mode "command"', False)
+	if sysvals.usedevsrc and sysvals.usecallgraph:
+		doError('dev and callgraph cannot both be true', False)
+	if sysvals.usecallgraph and sysvals.execcount > 1:
+		doError('-x2 is not compatible with -f', False)
+
+	if ignorekprobes:
+		return
+
+	kprobes = dict()
+	archkprobe = 'Kprobe_'+platform.machine()
+	if archkprobe in sections:
+		for name in Config.options(archkprobe):
+			kprobes[name] = Config.get(archkprobe, name)
+	if 'Kprobe' in sections:
+		for name in Config.options('Kprobe'):
+			kprobes[name] = Config.get('Kprobe', name)
+
+	for name in kprobes:
+		function = name
+		format = name
+		color = ''
+		args = dict()
+		data = kprobes[name].split()
+		i = 0
+		for val in data:
+			# bracketted strings are special formatting, read them separately
+			if val[0] == '[' and val[-1] == ']':
+				for prop in val[1:-1].split(','):
+					p = prop.split('=')
+					if p[0] == 'color':
+						try:
+							color = int(p[1], 16)
+							color = '#'+p[1]
+						except:
+							color = p[1]
+				continue
+			# first real arg should be the format string
+			if i == 0:
+				format = val
+			# all other args are actual function args
+			else:
+				d = val.split('=')
+				args[d[0]] = d[1]
+			i += 1
+		if not function or not format:
+			doError('Invalid kprobe: %s' % name, False)
+		for arg in re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', format):
+			if arg not in args:
+				doError('Kprobe "%s" is missing argument "%s"' % (name, arg), False)
+		if name in sysvals.kprobes:
+			doError('Duplicate kprobe found "%s"' % (name), False)
+		vprint('Adding KPROBE: %s %s %s %s' % (name, function, format, args))
+		sysvals.kprobes[name] = {
+			'name': name,
+			'func': function,
+			'format': format,
+			'args': args,
+			'mask': re.sub('{(?P<n>[a-z,A-Z,0-9]*)}', '.*', format)
+		}
+		if color:
+			sysvals.kprobes[name]['color'] = color
+
 # Function: printHelp
 # Description:
 #	 print out the help text
@@ -3375,7 +4651,7 @@ def printHelp():
 	modes = getModes()
 
 	print('')
-	print('AnalyzeSuspend v%.1f' % sysvals.version)
+	print('AnalyzeSuspend v%s' % sysvals.version)
 	print('Usage: sudo analyze_suspend.py <options>')
 	print('')
 	print('Description:')
@@ -3396,27 +4672,38 @@ def printHelp():
 	print('  [general]')
 	print('    -h          Print this help text')
 	print('    -v          Print the current tool version')
+	print('    -config file Pull arguments and config options from a file')
 	print('    -verbose    Print extra information during execution and analysis')
 	print('    -status     Test to see if the system is enabled to run this tool')
 	print('    -modes      List available suspend modes')
 	print('    -m mode     Mode to initiate for suspend %s (default: %s)') % (modes, sysvals.suspendmode)
-	print('    -rtcwake t  Use rtcwake to autoresume after <t> seconds (default: disabled)')
+	print('    -o subdir   Override the output subdirectory')
 	print('  [advanced]')
+	print('    -rtcwake t  Use rtcwake to autoresume after <t> seconds (default: disabled)')
+	print('    -addlogs    Add the dmesg and ftrace logs to the html output')
+	print('    -multi n d  Execute <n> consecutive tests at <d> seconds intervals. The outputs will')
+	print('                be created in a new subdirectory with a summary page.')
+	print('    -srgap      Add a visible gap in the timeline between sus/res (default: disabled)')
+	print('    -cmd {s}    Instead of suspend/resume, run a command, e.g. "sync -d"')
+	print('    -mindev ms  Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)')
+	print('    -mincg  ms  Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
+	print('    -timeprec N Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
+	print('  [debug]')
 	print('    -f          Use ftrace to create device callgraphs (default: disabled)')
-	print('    -filter "d1 d2 ..." Filter out all but this list of dev names')
+	print('    -expandcg   pre-expand the callgraph data in the html output (default: disabled)')
+	print('    -flist      Print the list of functions currently being captured in ftrace')
+	print('    -flistall   Print all functions capable of being captured in ftrace')
+	print('    -fadd file  Add functions to be graphed in the timeline from a list in a text file')
+	print('    -filter "d1 d2 ..." Filter out all but this list of device names')
+	print('    -dev        Display common low level functions in the timeline')
+	print('  [post-resume task analysis]')
 	print('    -x2         Run two suspend/resumes back to back (default: disabled)')
 	print('    -x2delay t  Minimum millisecond delay <t> between the two test runs (default: 0 ms)')
 	print('    -postres t  Time after resume completion to wait for post-resume events (default: 0 S)')
-	print('    -multi n d  Execute <n> consecutive tests at <d> seconds intervals. The outputs will')
-	print('                be created in a new subdirectory with a summary page.')
 	print('  [utilities]')
 	print('    -fpdt       Print out the contents of the ACPI Firmware Performance Data Table')
 	print('    -usbtopo    Print out the current USB topology with power info')
 	print('    -usbauto    Enable autosuspend for all connected USB devices')
-	print('  [android testing]')
-	print('    -adb binary Use the given adb binary to run the test on an android device.')
-	print('                The device should already be connected and with root access.')
-	print('                Commands will be executed on the device using "adb shell"')
 	print('  [re-analyze data from previous runs]')
 	print('    -ftrace ftracefile  Create HTML output using ftrace input')
 	print('    -dmesg dmesgfile    Create HTML output using dmesg (not needed for kernel >= 3.15)')
@@ -3430,6 +4717,7 @@ if __name__ == '__main__':
 	cmd = ''
 	cmdarg = ''
 	multitest = {'run': False, 'count': 0, 'delay': 0}
+	simplecmds = ['-modes', '-fpdt', '-flist', '-flistall', '-usbtopo', '-usbauto', '-status']
 	# loop through the command line arguments
 	args = iter(sys.argv[1:])
 	for arg in args:
@@ -3438,58 +4726,85 @@ if __name__ == '__main__':
 				val = args.next()
 			except:
 				doError('No mode supplied', True)
+			if val == 'command' and not sysvals.testcommand:
+				doError('No command supplied for mode "command"', True)
 			sysvals.suspendmode = val
-		elif(arg == '-adb'):
-			try:
-				val = args.next()
-			except:
-				doError('No adb binary supplied', True)
-			if(not os.path.exists(val)):
-				doError('file doesnt exist: %s' % val, False)
-			if(not os.access(val, os.X_OK)):
-				doError('file isnt executable: %s' % val, False)
-			try:
-				check = os.popen(val+' version').read().strip()
-			except:
-				doError('adb version failed to execute', False)
-			if(not re.match('Android Debug Bridge .*', check)):
-				doError('adb version failed to execute', False)
-			sysvals.adb = val
-			sysvals.android = True
+		elif(arg in simplecmds):
+			cmd = arg[1:]
+		elif(arg == '-h'):
+			printHelp()
+			sys.exit()
+		elif(arg == '-v'):
+			print("Version %s" % sysvals.version)
+			sys.exit()
 		elif(arg == '-x2'):
-			if(sysvals.postresumetime > 0):
-				doError('-x2 is not compatible with -postres', False)
 			sysvals.execcount = 2
+			if(sysvals.usecallgraph):
+				doError('-x2 is not compatible with -f', False)
 		elif(arg == '-x2delay'):
 			sysvals.x2delay = getArgInt('-x2delay', args, 0, 60000)
 		elif(arg == '-postres'):
-			if(sysvals.execcount != 1):
-				doError('-x2 is not compatible with -postres', False)
 			sysvals.postresumetime = getArgInt('-postres', args, 0, 3600)
 		elif(arg == '-f'):
 			sysvals.usecallgraph = True
-		elif(arg == '-modes'):
-			cmd = 'modes'
-		elif(arg == '-fpdt'):
-			cmd = 'fpdt'
-		elif(arg == '-usbtopo'):
-			cmd = 'usbtopo'
-		elif(arg == '-usbauto'):
-			cmd = 'usbauto'
-		elif(arg == '-status'):
-			cmd = 'status'
+			if(sysvals.execcount > 1):
+				doError('-x2 is not compatible with -f', False)
+			if(sysvals.usedevsrc):
+				doError('-dev is not compatible with -f', False)
+		elif(arg == '-addlogs'):
+			sysvals.addlogs = True
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
-		elif(arg == '-v'):
-			print("Version %.1f" % sysvals.version)
-			sys.exit()
+		elif(arg == '-dev'):
+			sysvals.usedevsrc = True
+			if(sysvals.usecallgraph):
+				doError('-dev is not compatible with -f', False)
 		elif(arg == '-rtcwake'):
 			sysvals.rtcwake = True
 			sysvals.rtcwaketime = getArgInt('-rtcwake', args, 0, 3600)
+		elif(arg == '-timeprec'):
+			sysvals.setPrecision(getArgInt('-timeprec', args, 0, 6))
+		elif(arg == '-mindev'):
+			sysvals.mindevlen = getArgFloat('-mindev', args, 0.0, 10000.0)
+		elif(arg == '-mincg'):
+			sysvals.mincglen = getArgFloat('-mincg', args, 0.0, 10000.0)
+		elif(arg == '-cmd'):
+			try:
+				val = args.next()
+			except:
+				doError('No command string supplied', True)
+			sysvals.testcommand = val
+			sysvals.suspendmode = 'command'
+		elif(arg == '-expandcg'):
+			sysvals.cgexp = True
+		elif(arg == '-srgap'):
+			sysvals.srgap = 5
 		elif(arg == '-multi'):
 			multitest['run'] = True
 			multitest['count'] = getArgInt('-multi n (exec count)', args, 2, 1000000)
 			multitest['delay'] = getArgInt('-multi d (delay between tests)', args, 0, 3600)
+		elif(arg == '-o'):
+			try:
+				val = args.next()
+			except:
+				doError('No subdirectory name supplied', True)
+			sysvals.outdir = val
+		elif(arg == '-config'):
+			try:
+				val = args.next()
+			except:
+				doError('No text file supplied', True)
+			if(os.path.exists(val) == False):
+				doError('%s does not exist' % val, False)
+			configFromFile(val)
+		elif(arg == '-fadd'):
+			try:
+				val = args.next()
+			except:
+				doError('No text file supplied', True)
+			if(os.path.exists(val) == False):
+				doError('%s does not exist' % val, False)
+			sysvals.addFtraceFilterFunctions(val)
 		elif(arg == '-dmesg'):
 			try:
 				val = args.next()
@@ -3498,17 +4813,16 @@ if __name__ == '__main__':
 			sysvals.notestrun = True
 			sysvals.dmesgfile = val
 			if(os.path.exists(sysvals.dmesgfile) == False):
-				doError('%s doesnt exist' % sysvals.dmesgfile, False)
+				doError('%s does not exist' % sysvals.dmesgfile, False)
 		elif(arg == '-ftrace'):
 			try:
 				val = args.next()
 			except:
 				doError('No ftrace file supplied', True)
 			sysvals.notestrun = True
-			sysvals.usecallgraph = True
 			sysvals.ftracefile = val
 			if(os.path.exists(sysvals.ftracefile) == False):
-				doError('%s doesnt exist' % sysvals.ftracefile, False)
+				doError('%s does not exist' % sysvals.ftracefile, False)
 		elif(arg == '-summary'):
 			try:
 				val = args.next()
@@ -3518,35 +4832,35 @@ if __name__ == '__main__':
 			cmdarg = val
 			sysvals.notestrun = True
 			if(os.path.isdir(val) == False):
-				doError('%s isnt accesible' % val, False)
+				doError('%s is not accesible' % val, False)
 		elif(arg == '-filter'):
 			try:
 				val = args.next()
 			except:
 				doError('No devnames supplied', True)
 			sysvals.setDeviceFilter(val)
-		elif(arg == '-h'):
-			printHelp()
-			sys.exit()
 		else:
 			doError('Invalid argument: '+arg, True)
 
+	# callgraph size cannot exceed device size
+	if sysvals.mincglen < sysvals.mindevlen:
+		sysvals.mincglen = sysvals.mindevlen
+
 	# just run a utility command and exit
 	if(cmd != ''):
 		if(cmd == 'status'):
-			statusCheck()
+			statusCheck(True)
 		elif(cmd == 'fpdt'):
-			if(sysvals.android):
-				doError('cannot read FPDT on android device', False)
 			getFPDT(True)
 		elif(cmd == 'usbtopo'):
-			if(sysvals.android):
-				doError('cannot read USB topology '+\
-					'on an android device', False)
-			detectUSB(True)
+			detectUSB()
 		elif(cmd == 'modes'):
 			modes = getModes()
 			print modes
+		elif(cmd == 'flist'):
+			sysvals.getFtraceFilterFunctions(True)
+		elif(cmd == 'flistall'):
+			sysvals.getFtraceFilterFunctions(False)
 		elif(cmd == 'usbauto'):
 			setUSBDevicesAuto()
 		elif(cmd == 'summary'):
@@ -3554,15 +4868,6 @@ if __name__ == '__main__':
 			runSummary(cmdarg, True)
 		sys.exit()
 
-	# run test on android device
-	if(sysvals.android):
-		if(sysvals.usecallgraph):
-			doError('ftrace (-f) is not yet supported '+\
-				'in the android kernel', False)
-		if(sysvals.notestrun):
-			doError('cannot analyze test files on the '+\
-				'android device', False)
-
 	# if instructed, re-analyze existing data files
 	if(sysvals.notestrun):
 		rerunTest()
@@ -3574,18 +4879,20 @@ if __name__ == '__main__':
 		sys.exit()
 
 	if multitest['run']:
-		# run multiple tests in a separte subdirectory
+		# run multiple tests in a separate subdirectory
 		s = 'x%d' % multitest['count']
-		subdir = datetime.now().strftime('suspend-'+s+'-%m%d%y-%H%M%S')
-		os.mkdir(subdir)
+		if not sysvals.outdir:
+			sysvals.outdir = datetime.now().strftime('suspend-'+s+'-%m%d%y-%H%M%S')
+		if not os.path.isdir(sysvals.outdir):
+			os.mkdir(sysvals.outdir)
 		for i in range(multitest['count']):
 			if(i != 0):
 				print('Waiting %d seconds...' % (multitest['delay']))
 				time.sleep(multitest['delay'])
 			print('TEST (%d/%d) START' % (i+1, multitest['count']))
-			runTest(subdir)
+			runTest(sysvals.outdir)
 			print('TEST (%d/%d) COMPLETE' % (i+1, multitest['count']))
-		runSummary(subdir, False)
+		runSummary(sysvals.outdir, False)
 	else:
 		# run the test in the current directory
-		runTest(".")
+		runTest('.', sysvals.outdir)
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
index 0254f3ba0dba..19f5adfd877d 100755
--- a/scripts/bloat-o-meter
+++ b/scripts/bloat-o-meter
@@ -67,5 +67,5 @@ print("%-40s %7s %7s %+7s" % ("function", "old", "new", "delta"))
 for d, n in delta:
     if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d))
 
-print("Total: Before=%d, After=%d, chg %f%%" % \
-    (otot, ntot, (ntot - otot)*100/otot))
+print("Total: Before=%d, After=%d, chg %+.2f%%" % \
+    (otot, ntot, (ntot - otot)*100.0/otot))
diff --git a/scripts/gdb/linux/.gitignore b/scripts/gdb/linux/.gitignore
index 52e4e61140d1..2573543842d0 100644
--- a/scripts/gdb/linux/.gitignore
+++ b/scripts/gdb/linux/.gitignore
@@ -1,2 +1,3 @@
 *.pyc
 *.pyo
+constants.py
diff --git a/scripts/gdb/linux/Makefile b/scripts/gdb/linux/Makefile
index cd129e65d1ff..8b00031f5349 100644
--- a/scripts/gdb/linux/Makefile
+++ b/scripts/gdb/linux/Makefile
@@ -13,9 +13,11 @@ quiet_cmd_gen_constants_py = GEN     $@
 	$(CPP) -E -x c -P $(c_flags) $< > $@ ;\
 	sed -i '1,/<!-- end-c-headers -->/d;' $@
 
-$(obj)/constants.py: $(SRCTREE)/$(obj)/constants.py.in
-	$(call if_changed,gen_constants_py)
+targets += constants.py
+$(obj)/constants.py: $(SRCTREE)/$(obj)/constants.py.in FORCE
+	$(call if_changed_dep,gen_constants_py)
 
 build_constants_py: $(obj)/constants.py
+	@:
 
 clean-files := *.pyc *.pyo $(if $(KBUILD_SRC),*.py) $(obj)/constants.py
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index 07e6c2befe36..7986f4e0da12 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -14,7 +14,6 @@
 
 #include <linux/fs.h>
 #include <linux/mount.h>
-#include <linux/radix-tree.h>
 
 /* We need to stringify expanded macros so that they can be parsed */
 
@@ -51,9 +50,3 @@ LX_VALUE(MNT_NOEXEC)
 LX_VALUE(MNT_NOATIME)
 LX_VALUE(MNT_NODIRATIME)
 LX_VALUE(MNT_RELATIME)
-
-/* linux/radix-tree.h */
-LX_VALUE(RADIX_TREE_INDIRECT_PTR)
-LX_GDBPARSED(RADIX_TREE_HEIGHT_MASK)
-LX_GDBPARSED(RADIX_TREE_MAP_SHIFT)
-LX_GDBPARSED(RADIX_TREE_MAP_MASK)
diff --git a/scripts/gdb/linux/radixtree.py b/scripts/gdb/linux/radixtree.py
deleted file mode 100644
index 0fdef4e2971a..000000000000
--- a/scripts/gdb/linux/radixtree.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#
-# gdb helper commands and functions for Linux kernel debugging
-#
-#  Radix Tree Parser
-#
-# Copyright (c) 2016 Linaro Ltd
-#
-# Authors:
-#  Kieran Bingham <kieran.bingham@linaro.org>
-#
-# This work is licensed under the terms of the GNU GPL version 2.
-#
-
-import gdb
-
-from linux import utils
-from linux import constants
-
-radix_tree_root_type = utils.CachedType("struct radix_tree_root")
-radix_tree_node_type = utils.CachedType("struct radix_tree_node")
-
-
-def is_indirect_ptr(node):
-    long_type = utils.get_long_type()
-    return (node.cast(long_type) & constants.LX_RADIX_TREE_INDIRECT_PTR)
-
-
-def indirect_to_ptr(node):
-    long_type = utils.get_long_type()
-    node_type = node.type
-    indirect_ptr = node.cast(long_type) & ~constants.LX_RADIX_TREE_INDIRECT_PTR
-    return indirect_ptr.cast(node_type)
-
-
-def maxindex(height):
-    height = height & constants.LX_RADIX_TREE_HEIGHT_MASK
-    return gdb.parse_and_eval("height_to_maxindex["+str(height)+"]")
-
-
-def lookup(root, index):
-    if root.type == radix_tree_root_type.get_type().pointer():
-        root = root.dereference()
-    elif root.type != radix_tree_root_type.get_type():
-        raise gdb.GdbError("Must be struct radix_tree_root not {}"
-                           .format(root.type))
-
-    node = root['rnode']
-    if node is 0:
-        return None
-
-    if not (is_indirect_ptr(node)):
-        if (index > 0):
-            return None
-        return node
-
-    node = indirect_to_ptr(node)
-
-    height = node['path'] & constants.LX_RADIX_TREE_HEIGHT_MASK
-    if (index > maxindex(height)):
-        return None
-
-    shift = (height-1) * constants.LX_RADIX_TREE_MAP_SHIFT
-
-    while True:
-        new_index = (index >> shift) & constants.LX_RADIX_TREE_MAP_MASK
-        slot = node['slots'][new_index]
-
-        node = slot.cast(node.type.pointer()).dereference()
-        if node is 0:
-            return None
-
-        shift -= constants.LX_RADIX_TREE_MAP_SHIFT
-        height -= 1
-
-        if (height <= 0):
-            break
-
-    return node
-
-
-class LxRadixTree(gdb.Function):
-    """ Lookup and return a node from a RadixTree.
-
-$lx_radix_tree_lookup(root_node [, index]): Return the node at the given index.
-If index is omitted, the root node is dereferenced and returned."""
-
-    def __init__(self):
-        super(LxRadixTree, self).__init__("lx_radix_tree_lookup")
-
-    def invoke(self, root, index=0):
-        result = lookup(root, index)
-        if result is None:
-            raise gdb.GdbError("No entry in tree at index {}".format(index))
-
-        return result
-
-LxRadixTree()
diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py
index 9a0f8923f67c..004b0ac7fa72 100644
--- a/scripts/gdb/linux/symbols.py
+++ b/scripts/gdb/linux/symbols.py
@@ -153,7 +153,7 @@ lx-symbols command."""
             saved_state['breakpoint'].enabled = saved_state['enabled']
 
     def invoke(self, arg, from_tty):
-        self.module_paths = arg.split()
+        self.module_paths = [os.path.expanduser(p) for p in arg.split()]
         self.module_paths.append(os.getcwd())
 
         # enforce update
diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py
index 3a80ad6eecad..6e0b0afd888a 100644
--- a/scripts/gdb/vmlinux-gdb.py
+++ b/scripts/gdb/vmlinux-gdb.py
@@ -31,4 +31,3 @@ else:
     import linux.lists
     import linux.proc
     import linux.constants
-    import linux.radixtree
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 2fc8fad5195e..4f2e9049e8fa 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -59,6 +59,12 @@ Output format selection (mutually exclusive):
   -text			Output plain text format.
 
 Output selection (mutually exclusive):
+  -export		Only output documentation for symbols that have been
+			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
+                        in any input FILE or -export-file FILE.
+  -internal		Only output documentation for symbols that have NOT been
+			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
+                        in any input FILE or -export-file FILE.
   -function NAME	Only output documentation for the given function(s)
 			or DOC: section title(s). All other functions and DOC:
 			sections are ignored. May be specified multiple times.
@@ -68,6 +74,11 @@ Output selection (mutually exclusive):
 
 Output selection modifiers:
   -no-doc-sections	Do not output DOC: sections.
+  -enable-lineno        Enable output of #define LINENO lines. Only works with
+                        reStructuredText format.
+  -export-file FILE     Specify an additional FILE in which to look for
+                        EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with
+                        -export or -internal. May be specified multiple times.
 
 Other parameters:
   -v			Verbose output, more warnings and other information.
@@ -206,6 +217,10 @@ my $type_struct_xml = '\\&amp;((struct\s*)*[_\w]+)';
 my $type_env = '(\$\w+)';
 my $type_enum_full = '\&(enum)\s*([_\w]+)';
 my $type_struct_full = '\&(struct)\s*([_\w]+)';
+my $type_typedef_full = '\&(typedef)\s*([_\w]+)';
+my $type_union_full = '\&(union)\s*([_\w]+)';
+my $type_member = '\&([_\w]+)((\.|->)[_\w]+)';
+my $type_member_func = $type_member . '\(\)';
 
 # Output conversion substitutions.
 #  One for each output format
@@ -274,10 +289,16 @@ my $blankline_text = "";
 # rst-mode
 my @highlights_rst = (
                        [$type_constant, "``\$1``"],
-                       [$type_func, "\\:c\\:func\\:`\$1`"],
+                       # Note: need to escape () to avoid func matching later
+                       [$type_member_func, "\\:c\\:type\\:`\$1\$2\\\\(\\\\) <\$1>`"],
+                       [$type_member, "\\:c\\:type\\:`\$1\$2 <\$1>`"],
+                       [$type_func, "\\:c\\:func\\:`\$1()`"],
                        [$type_struct_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
                        [$type_enum_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
-                       [$type_struct, "\\:c\\:type\\:`struct \$1 <\$1>`"],
+                       [$type_typedef_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
+                       [$type_union_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
+                       # in rst this can refer to any type
+                       [$type_struct, "\\:c\\:type\\:`\$1`"],
                        [$type_param, "**\$1**"]
 		      );
 my $blankline_rst = "\n";
@@ -303,12 +324,23 @@ my $verbose = 0;
 my $output_mode = "man";
 my $output_preformatted = 0;
 my $no_doc_sections = 0;
+my $enable_lineno = 0;
 my @highlights = @highlights_man;
 my $blankline = $blankline_man;
 my $modulename = "Kernel API";
-my $function_only = 0;
+
+use constant {
+    OUTPUT_ALL          => 0, # output all symbols and doc sections
+    OUTPUT_INCLUDE      => 1, # output only specified symbols
+    OUTPUT_EXCLUDE      => 2, # output everything except specified symbols
+    OUTPUT_EXPORTED     => 3, # output exported symbols
+    OUTPUT_INTERNAL     => 4, # output non-exported symbols
+};
+my $output_selection = OUTPUT_ALL;
 my $show_not_found = 0;
 
+my @export_file_list;
+
 my @build_time;
 if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) &&
     (my $seconds = `date -d"${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') {
@@ -327,6 +359,7 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
 # CAVEAT EMPTOR!  Some of the others I localised may not want to be, which
 # could cause "use of undefined value" or other bugs.
 my ($function, %function_table, %parametertypes, $declaration_purpose);
+my $declaration_start_line;
 my ($type, $declaration_name, $return_type);
 my ($newsection, $newcontents, $prototype, $brcount, %source_map);
 
@@ -344,52 +377,62 @@ my $section_counter = 0;
 
 my $lineprefix="";
 
-# states
-# 0 - normal code
-# 1 - looking for function name
-# 2 - scanning field start.
-# 3 - scanning prototype.
-# 4 - documentation block
-# 5 - gathering documentation outside main block
+# Parser states
+use constant {
+    STATE_NORMAL        => 0, # normal code
+    STATE_NAME          => 1, # looking for function name
+    STATE_FIELD         => 2, # scanning field start
+    STATE_PROTO         => 3, # scanning prototype
+    STATE_DOCBLOCK      => 4, # documentation block
+    STATE_INLINE        => 5, # gathering documentation outside main block
+};
 my $state;
 my $in_doc_sect;
 
-# Split Doc State
-# 0 - Invalid (Before start or after finish)
-# 1 - Is started (the /** was found inside a struct)
-# 2 - The @parameter header was found, start accepting multi paragraph text.
-# 3 - Finished (the */ was found)
-# 4 - Error - Comment without header was found. Spit a warning as it's not
-#     proper kernel-doc and ignore the rest.
-my $split_doc_state;
+# Inline documentation state
+use constant {
+    STATE_INLINE_NA     => 0, # not applicable ($state != STATE_INLINE)
+    STATE_INLINE_NAME   => 1, # looking for member name (@foo:)
+    STATE_INLINE_TEXT   => 2, # looking for member documentation
+    STATE_INLINE_END    => 3, # done
+    STATE_INLINE_ERROR  => 4, # error - Comment without header was found.
+                              # Spit a warning as it's not
+                              # proper kernel-doc and ignore the rest.
+};
+my $inline_doc_state;
 
 #declaration types: can be
 # 'function', 'struct', 'union', 'enum', 'typedef'
 my $decl_type;
 
-my $doc_special = "\@\%\$\&";
-
 my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start.
 my $doc_end = '\*/';
 my $doc_com = '\s*\*\s*';
 my $doc_com_body = '\s*\* ?';
 my $doc_decl = $doc_com . '(\w+)';
-my $doc_sect = $doc_com . '([' . $doc_special . ']?[\w\s]+):(.*)';
+# @params and a strictly limited set of supported section names
+my $doc_sect = $doc_com . 
+    '\s*(\@\w+|description|context|returns?|notes?|examples?)\s*:(.*)';
 my $doc_content = $doc_com_body . '(.*)';
 my $doc_block = $doc_com . 'DOC:\s*(.*)?';
-my $doc_split_start = '^\s*/\*\*\s*$';
-my $doc_split_sect = '\s*\*\s*(@[\w\s]+):(.*)';
-my $doc_split_end = '^\s*\*/\s*$';
+my $doc_inline_start = '^\s*/\*\*\s*$';
+my $doc_inline_sect = '\s*\*\s*(@[\w\s]+):(.*)';
+my $doc_inline_end = '^\s*\*/\s*$';
+my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
 
-my %constants;
 my %parameterdescs;
+my %parameterdesc_start_lines;
 my @parameterlist;
 my %sections;
 my @sectionlist;
+my %section_start_lines;
 my $sectcheck;
 my $struct_actual;
 
 my $contents = "";
+my $new_start_line = 0;
+
+# the canonical section names. see also $doc_sect above.
 my $section_default = "Description";	# default section
 my $section_intro = "Introduction";
 my $section = $section_default;
@@ -437,19 +480,30 @@ while ($ARGV[0] =~ m/^-(.*)/) {
     } elsif ($cmd eq "-module") { # not needed for XML, inherits from calling document
 	$modulename = shift @ARGV;
     } elsif ($cmd eq "-function") { # to only output specific functions
-	$function_only = 1;
+	$output_selection = OUTPUT_INCLUDE;
 	$function = shift @ARGV;
 	$function_table{$function} = 1;
-    } elsif ($cmd eq "-nofunction") { # to only output specific functions
-	$function_only = 2;
+    } elsif ($cmd eq "-nofunction") { # output all except specific functions
+	$output_selection = OUTPUT_EXCLUDE;
 	$function = shift @ARGV;
 	$function_table{$function} = 1;
+    } elsif ($cmd eq "-export") { # only exported symbols
+	$output_selection = OUTPUT_EXPORTED;
+	%function_table = ();
+    } elsif ($cmd eq "-internal") { # only non-exported symbols
+	$output_selection = OUTPUT_INTERNAL;
+	%function_table = ();
+    } elsif ($cmd eq "-export-file") {
+	my $file = shift @ARGV;
+	push(@export_file_list, $file);
     } elsif ($cmd eq "-v") {
 	$verbose = 1;
     } elsif (($cmd eq "-h") || ($cmd eq "--help")) {
 	usage();
     } elsif ($cmd eq '-no-doc-sections') {
 	    $no_doc_sections = 1;
+    } elsif ($cmd eq '-enable-lineno') {
+	    $enable_lineno = 1;
     } elsif ($cmd eq '-show-not-found') {
 	$show_not_found = 1;
     }
@@ -467,6 +521,13 @@ sub get_kernel_version() {
     return $version;
 }
 
+#
+sub print_lineno {
+    my $lineno = shift;
+    if ($enable_lineno && defined($lineno)) {
+        print "#define LINENO " . $lineno . "\n";
+    }
+}
 ##
 # dumps section contents to arrays/hashes intended for that purpose.
 #
@@ -475,28 +536,32 @@ sub dump_section {
     my $name = shift;
     my $contents = join "\n", @_;
 
-    if ($name =~ m/$type_constant/) {
-	$name = $1;
-#	print STDERR "constant section '$1' = '$contents'\n";
-	$constants{$name} = $contents;
-    } elsif ($name =~ m/$type_param/) {
-#	print STDERR "parameter def '$1' = '$contents'\n";
+    if ($name =~ m/$type_param/) {
 	$name = $1;
 	$parameterdescs{$name} = $contents;
 	$sectcheck = $sectcheck . $name . " ";
+        $parameterdesc_start_lines{$name} = $new_start_line;
+        $new_start_line = 0;
     } elsif ($name eq "@\.\.\.") {
-#	print STDERR "parameter def '...' = '$contents'\n";
 	$name = "...";
 	$parameterdescs{$name} = $contents;
 	$sectcheck = $sectcheck . $name . " ";
+        $parameterdesc_start_lines{$name} = $new_start_line;
+        $new_start_line = 0;
     } else {
-#	print STDERR "other section '$name' = '$contents'\n";
 	if (defined($sections{$name}) && ($sections{$name} ne "")) {
-		print STDERR "${file}:$.: error: duplicate section name '$name'\n";
-		++$errors;
+	    # Only warn on user specified duplicate section names.
+	    if ($name ne $section_default) {
+		print STDERR "${file}:$.: warning: duplicate section name '$name'\n";
+		++$warnings;
+	    }
+	    $sections{$name} .= $contents;
+	} else {
+	    $sections{$name} = $contents;
+	    push @sectionlist, $name;
+            $section_start_lines{$name} = $new_start_line;
+            $new_start_line = 0;
 	}
-	$sections{$name} = $contents;
-	push @sectionlist, $name;
     }
 }
 
@@ -512,15 +577,17 @@ sub dump_doc_section {
         return;
     }
 
-    if (($function_only == 0) ||
-	( $function_only == 1 && defined($function_table{$name})) ||
-	( $function_only == 2 && !defined($function_table{$name})))
+    if (($output_selection == OUTPUT_ALL) ||
+	($output_selection == OUTPUT_INCLUDE &&
+	 defined($function_table{$name})) ||
+	($output_selection == OUTPUT_EXCLUDE &&
+	 !defined($function_table{$name})))
     {
 	dump_section($file, $name, $contents);
 	output_blockhead({'sectionlist' => \@sectionlist,
 			  'sections' => \%sections,
 			  'module' => $modulename,
-			  'content-only' => ($function_only != 0), });
+			  'content-only' => ($output_selection != OUTPUT_ALL), });
     }
 }
 
@@ -1736,7 +1803,10 @@ sub output_blockhead_rst(%) {
     my ($parameter, $section);
 
     foreach $section (@{$args{'sectionlist'}}) {
-	print "**$section**\n\n";
+	if ($output_selection != OUTPUT_INCLUDE) {
+	    print "**$section**\n\n";
+	}
+        print_lineno($section_start_lines{$section});
 	output_highlight_rst($args{'sections'}{$section});
 	print "\n";
     }
@@ -1753,19 +1823,14 @@ sub output_highlight_rst {
     die $@ if $@;
 
     foreach $line (split "\n", $contents) {
-	if ($line eq "") {
-	    print $lineprefix, $blankline;
-	} else {
-	    $line =~ s/\\\\\\/\&/g;
-	    print $lineprefix, $line;
-	}
-	print "\n";
+	print $lineprefix . $line . "\n";
     }
 }
 
 sub output_function_rst(%) {
     my %args = %{$_[0]};
     my ($parameter, $section);
+    my $oldprefix = $lineprefix;
     my $start;
 
     print ".. c:function:: ";
@@ -1783,6 +1848,10 @@ sub output_function_rst(%) {
 	}
 	$count++;
 	$type = $args{'parametertypes'}{$parameter};
+
+	# RST doesn't like address_space tags at function prototypes
+	$type =~ s/__(user|kernel|iomem|percpu|pmem|rcu)\s*//;
+
 	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
 	    # pointer-to-function
 	    print $1 . $parameter . ") (" . $2;
@@ -1790,29 +1859,37 @@ sub output_function_rst(%) {
 	    print $type . " " . $parameter;
 	}
     }
-    print ")\n\n    " . $args{'purpose'} . "\n\n";
+    print ")\n\n";
+    print_lineno($declaration_start_line);
+    $lineprefix = "   ";
+    output_highlight_rst($args{'purpose'});
+    print "\n";
 
-    print ":Parameters:\n\n";
+    print "**Parameters**\n\n";
+    $lineprefix = "  ";
     foreach $parameter (@{$args{'parameterlist'}}) {
 	my $parameter_name = $parameter;
 	#$parameter_name =~ s/\[.*//;
 	$type = $args{'parametertypes'}{$parameter};
 
 	if ($type ne "") {
-	    print "      ``$type $parameter``\n";
+	    print "``$type $parameter``\n";
 	} else {
-	    print "      ``$parameter``\n";
+	    print "``$parameter``\n";
 	}
-	if ($args{'parameterdescs'}{$parameter_name} ne $undescribed) {
-	    my $oldprefix = $lineprefix;
-	    $lineprefix = "        ";
+
+        print_lineno($parameterdesc_start_lines{$parameter_name});
+
+	if (defined($args{'parameterdescs'}{$parameter_name}) &&
+	    $args{'parameterdescs'}{$parameter_name} ne $undescribed) {
 	    output_highlight_rst($args{'parameterdescs'}{$parameter_name});
-	    $lineprefix = $oldprefix;
 	} else {
-	    print "\n        _undescribed_\n";
+	    print "  *undescribed*\n";
 	}
 	print "\n";
     }
+
+    $lineprefix = $oldprefix;
     output_section_rst(@_);
 }
 
@@ -1820,10 +1897,11 @@ sub output_section_rst(%) {
     my %args = %{$_[0]};
     my $section;
     my $oldprefix = $lineprefix;
-    $lineprefix = "        ";
+    $lineprefix = "";
 
     foreach $section (@{$args{'sectionlist'}}) {
-	print ":$section:\n\n";
+	print "**$section**\n\n";
+        print_lineno($section_start_lines{$section});
 	output_highlight_rst($args{'sections'}{$section});
 	print "\n";
     }
@@ -1834,24 +1912,28 @@ sub output_section_rst(%) {
 sub output_enum_rst(%) {
     my %args = %{$_[0]};
     my ($parameter);
+    my $oldprefix = $lineprefix;
     my $count;
     my $name = "enum " . $args{'enum'};
 
     print "\n\n.. c:type:: " . $name . "\n\n";
-    print "    " . $args{'purpose'} . "\n\n";
+    print_lineno($declaration_start_line);
+    $lineprefix = "   ";
+    output_highlight_rst($args{'purpose'});
+    print "\n";
 
-    print "..\n\n:Constants:\n\n";
-    my $oldprefix = $lineprefix;
-    $lineprefix = "    ";
+    print "**Constants**\n\n";
+    $lineprefix = "  ";
     foreach $parameter (@{$args{'parameterlist'}}) {
-	print "  `$parameter`\n";
+	print "``$parameter``\n";
 	if ($args{'parameterdescs'}{$parameter} ne $undescribed) {
 	    output_highlight_rst($args{'parameterdescs'}{$parameter});
 	} else {
-	    print "    undescribed\n";
+	    print "  *undescribed*\n";
 	}
 	print "\n";
     }
+
     $lineprefix = $oldprefix;
     output_section_rst(@_);
 }
@@ -1859,30 +1941,37 @@ sub output_enum_rst(%) {
 sub output_typedef_rst(%) {
     my %args = %{$_[0]};
     my ($parameter);
-    my $count;
+    my $oldprefix = $lineprefix;
     my $name = "typedef " . $args{'typedef'};
 
-    ### FIXME: should the name below contain "typedef" or not?
     print "\n\n.. c:type:: " . $name . "\n\n";
-    print "    " . $args{'purpose'} . "\n\n";
+    print_lineno($declaration_start_line);
+    $lineprefix = "   ";
+    output_highlight_rst($args{'purpose'});
+    print "\n";
 
+    $lineprefix = $oldprefix;
     output_section_rst(@_);
 }
 
 sub output_struct_rst(%) {
     my %args = %{$_[0]};
     my ($parameter);
+    my $oldprefix = $lineprefix;
     my $name = $args{'type'} . " " . $args{'struct'};
 
     print "\n\n.. c:type:: " . $name . "\n\n";
-    print "    " . $args{'purpose'} . "\n\n";
+    print_lineno($declaration_start_line);
+    $lineprefix = "   ";
+    output_highlight_rst($args{'purpose'});
+    print "\n";
 
-    print ":Definition:\n\n";
-    print " ::\n\n";
+    print "**Definition**\n\n";
+    print "::\n\n";
     print "  " . $args{'type'} . " " . $args{'struct'} . " {\n";
     foreach $parameter (@{$args{'parameterlist'}}) {
 	if ($parameter =~ /^#/) {
-	    print "    " . "$parameter\n";
+	    print "  " . "$parameter\n";
 	    next;
 	}
 
@@ -1903,7 +1992,8 @@ sub output_struct_rst(%) {
     }
     print "  };\n\n";
 
-    print ":Members:\n\n";
+    print "**Members**\n\n";
+    $lineprefix = "  ";
     foreach $parameter (@{$args{'parameterlist'}}) {
 	($parameter =~ /^#/) && next;
 
@@ -1912,14 +2002,14 @@ sub output_struct_rst(%) {
 
 	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
 	$type = $args{'parametertypes'}{$parameter};
-	print "      `$type $parameter`" . "\n";
-	my $oldprefix = $lineprefix;
-	$lineprefix = "        ";
+        print_lineno($parameterdesc_start_lines{$parameter_name});
+	print "``$type $parameter``\n";
 	output_highlight_rst($args{'parameterdescs'}{$parameter_name});
-	$lineprefix = $oldprefix;
 	print "\n";
     }
     print "\n";
+
+    $lineprefix = $oldprefix;
     output_section_rst(@_);
 }
 
@@ -1969,9 +2059,13 @@ sub output_declaration {
     my $name = shift;
     my $functype = shift;
     my $func = "output_${functype}_$output_mode";
-    if (($function_only==0) ||
-	( $function_only == 1 && defined($function_table{$name})) ||
-	( $function_only == 2 && !($functype eq "function" && defined($function_table{$name}))))
+    if (($output_selection == OUTPUT_ALL) ||
+	(($output_selection == OUTPUT_INCLUDE ||
+	  $output_selection == OUTPUT_EXPORTED) &&
+	 defined($function_table{$name})) ||
+	(($output_selection == OUTPUT_EXCLUDE ||
+	  $output_selection == OUTPUT_INTERNAL) &&
+	 !($functype eq "function" && defined($function_table{$name}))))
     {
 	&$func(@_);
 	$section_counter++;
@@ -2471,7 +2565,6 @@ sub dump_function($$) {
 
 sub reset_state {
     $function = "";
-    %constants = ();
     %parameterdescs = ();
     %parametertypes = ();
     @parameterlist = ();
@@ -2481,8 +2574,8 @@ sub reset_state {
     $struct_actual = "";
     $prototype = "";
 
-    $state = 0;
-    $split_doc_state = 0;
+    $state = STATE_NORMAL;
+    $inline_doc_state = STATE_INLINE_NA;
 }
 
 sub tracepoint_munge($) {
@@ -2545,7 +2638,7 @@ sub syscall_munge() {
 	}
 }
 
-sub process_state3_function($$) {
+sub process_proto_function($$) {
     my $x = shift;
     my $file = shift;
 
@@ -2575,7 +2668,7 @@ sub process_state3_function($$) {
     }
 }
 
-sub process_state3_type($$) {
+sub process_proto_type($$) {
     my $x = shift;
     my $file = shift;
 
@@ -2649,25 +2742,54 @@ sub local_unescape($) {
 	return $text;
 }
 
-sub process_file($) {
+sub map_filename($) {
     my $file;
-    my $identifier;
-    my $func;
-    my $descr;
-    my $in_purpose = 0;
-    my $initial_section_counter = $section_counter;
     my ($orig_file) = @_;
 
     if (defined($ENV{'SRCTREE'})) {
 	$file = "$ENV{'SRCTREE'}" . "/" . $orig_file;
-    }
-    else {
+    } else {
 	$file = $orig_file;
     }
+
     if (defined($source_map{$file})) {
 	$file = $source_map{$file};
     }
 
+    return $file;
+}
+
+sub process_export_file($) {
+    my ($orig_file) = @_;
+    my $file = map_filename($orig_file);
+
+    if (!open(IN,"<$file")) {
+	print STDERR "Error: Cannot open file $file\n";
+	++$errors;
+	return;
+    }
+
+    while (<IN>) {
+	if (/$export_symbol/) {
+	    $function_table{$2} = 1;
+	}
+    }
+
+    close(IN);
+}
+
+sub process_file($) {
+    my $file;
+    my $identifier;
+    my $func;
+    my $descr;
+    my $in_purpose = 0;
+    my $initial_section_counter = $section_counter;
+    my ($orig_file) = @_;
+    my $leading_space;
+
+    $file = map_filename($orig_file);
+
     if (!open(IN,"<$file")) {
 	print STDERR "Error: Cannot open file $file\n";
 	++$errors;
@@ -2681,15 +2803,18 @@ sub process_file($) {
 	while (s/\\\s*$//) {
 	    $_ .= <IN>;
 	}
-	if ($state == 0) {
+	if ($state == STATE_NORMAL) {
 	    if (/$doc_start/o) {
-		$state = 1;		# next line is always the function name
+		$state = STATE_NAME;	# next line is always the function name
 		$in_doc_sect = 0;
+		$declaration_start_line = $. + 1;
 	    }
-	} elsif ($state == 1) {	# this line is the function name (always)
+	} elsif ($state == STATE_NAME) {# this line is the function name (always)
 	    if (/$doc_block/o) {
-		$state = 4;
+		$state = STATE_DOCBLOCK;
 		$contents = "";
+                $new_start_line = $. + 1;
+
 		if ( $1 eq "" ) {
 			$section = $section_intro;
 		} else {
@@ -2702,7 +2827,12 @@ sub process_file($) {
 		    $identifier = $1;
 		}
 
-		$state = 2;
+		$state = STATE_FIELD;
+		# if there's no @param blocks need to set up default section
+		# here
+		$contents = "";
+		$section = $section_default;
+		$new_start_line = $. + 1;
 		if (/-(.*)/) {
 		    # strip leading/trailing/multiple spaces
 		    $descr= $1;
@@ -2740,13 +2870,25 @@ sub process_file($) {
 		print STDERR "${file}:$.: warning: Cannot understand $_ on line $.",
 		" - I thought it was a doc line\n";
 		++$warnings;
-		$state = 0;
+		$state = STATE_NORMAL;
 	    }
-	} elsif ($state == 2) {	# look for head: lines, and include content
-	    if (/$doc_sect/o) {
+	} elsif ($state == STATE_FIELD) {	# look for head: lines, and include content
+	    if (/$doc_sect/i) { # case insensitive for supported section names
 		$newsection = $1;
 		$newcontents = $2;
 
+		# map the supported section names to the canonical names
+		if ($newsection =~ m/^description$/i) {
+		    $newsection = $section_default;
+		} elsif ($newsection =~ m/^context$/i) {
+		    $newsection = $section_context;
+		} elsif ($newsection =~ m/^returns?$/i) {
+		    $newsection = $section_return;
+		} elsif ($newsection =~ m/^\@return$/) {
+		    # special: @return is a section, not a param description
+		    $newsection = $section_return;
+		}
+
 		if (($contents ne "") && ($contents ne "\n")) {
 		    if (!$in_doc_sect && $verbose) {
 			print STDERR "${file}:$.: warning: contents before sections\n";
@@ -2759,14 +2901,16 @@ sub process_file($) {
 		$in_doc_sect = 1;
 		$in_purpose = 0;
 		$contents = $newcontents;
+                $new_start_line = $.;
+		while ((substr($contents, 0, 1) eq " ") ||
+		       substr($contents, 0, 1) eq "\t") {
+		    $contents = substr($contents, 1);
+		}
 		if ($contents ne "") {
-		    while ((substr($contents, 0, 1) eq " ") ||
-			substr($contents, 0, 1) eq "\t") {
-			    $contents = substr($contents, 1);
-		    }
 		    $contents .= "\n";
 		}
 		$section = $newsection;
+		$leading_space = undef;
 	    } elsif (/$doc_end/) {
 		if (($contents ne "") && ($contents ne "\n")) {
 		    dump_section($file, $section, xml_escape($contents));
@@ -2780,7 +2924,7 @@ sub process_file($) {
 		}
 
 		$prototype = "";
-		$state = 3;
+		$state = STATE_PROTO;
 		$brcount = 0;
 #		print STDERR "end of doc comment, looking for prototype\n";
 	    } elsif (/$doc_content/) {
@@ -2791,6 +2935,7 @@ sub process_file($) {
 			dump_section($file, $section, xml_escape($contents));
 			$section = $section_default;
 			$contents = "";
+                        $new_start_line = $.;
 		    } else {
 			$contents .= "\n";
 		    }
@@ -2801,87 +2946,86 @@ sub process_file($) {
 		    $declaration_purpose .= " " . xml_escape($1);
 		    $declaration_purpose =~ s/\s+/ /g;
 		} else {
-		    $contents .= $1 . "\n";
+		    my $cont = $1;
+		    if ($section =~ m/^@/ || $section eq $section_context) {
+			if (!defined $leading_space) {
+			    if ($cont =~ m/^(\s+)/) {
+				$leading_space = $1;
+			    } else {
+				$leading_space = "";
+			    }
+			}
+
+			$cont =~ s/^$leading_space//;
+		    }
+		    $contents .= $cont . "\n";
 		}
 	    } else {
 		# i dont know - bad line?  ignore.
 		print STDERR "${file}:$.: warning: bad line: $_";
 		++$warnings;
 	    }
-	} elsif ($state == 5) { # scanning for split parameters
+	} elsif ($state == STATE_INLINE) { # scanning for inline parameters
 	    # First line (state 1) needs to be a @parameter
-	    if ($split_doc_state == 1 && /$doc_split_sect/o) {
+	    if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) {
 		$section = $1;
 		$contents = $2;
+                $new_start_line = $.;
 		if ($contents ne "") {
 		    while ((substr($contents, 0, 1) eq " ") ||
 		           substr($contents, 0, 1) eq "\t") {
 			$contents = substr($contents, 1);
 		    }
-		$contents .= "\n";
+		    $contents .= "\n";
 		}
-		$split_doc_state = 2;
+		$inline_doc_state = STATE_INLINE_TEXT;
 	    # Documentation block end */
-	    } elsif (/$doc_split_end/) {
+	    } elsif (/$doc_inline_end/) {
 		if (($contents ne "") && ($contents ne "\n")) {
 		    dump_section($file, $section, xml_escape($contents));
 		    $section = $section_default;
 		    $contents = "";
 		}
-		$state = 3;
-		$split_doc_state = 0;
+		$state = STATE_PROTO;
+		$inline_doc_state = STATE_INLINE_NA;
 	    # Regular text
 	    } elsif (/$doc_content/) {
-		if ($split_doc_state == 2) {
+		if ($inline_doc_state == STATE_INLINE_TEXT) {
 		    $contents .= $1 . "\n";
-		} elsif ($split_doc_state == 1) {
-		    $split_doc_state = 4;
-		    print STDERR "Warning(${file}:$.): ";
+		    # nuke leading blank lines
+		    if ($contents =~ /^\s*$/) {
+			$contents = "";
+		    }
+		} elsif ($inline_doc_state == STATE_INLINE_NAME) {
+		    $inline_doc_state = STATE_INLINE_ERROR;
+		    print STDERR "${file}:$.: warning: ";
 		    print STDERR "Incorrect use of kernel-doc format: $_";
 		    ++$warnings;
 		}
 	    }
-	} elsif ($state == 3) {	# scanning for function '{' (end of prototype)
-	    if (/$doc_split_start/) {
-		$state = 5;
-		$split_doc_state = 1;
+	} elsif ($state == STATE_PROTO) {	# scanning for function '{' (end of prototype)
+	    if (/$doc_inline_start/) {
+		$state = STATE_INLINE;
+		$inline_doc_state = STATE_INLINE_NAME;
 	    } elsif ($decl_type eq 'function') {
-		process_state3_function($_, $file);
+		process_proto_function($_, $file);
 	    } else {
-		process_state3_type($_, $file);
+		process_proto_type($_, $file);
 	    }
-	} elsif ($state == 4) {
-		# Documentation block
-		if (/$doc_block/) {
-			dump_doc_section($file, $section, xml_escape($contents));
-			$contents = "";
-			$function = "";
-			%constants = ();
-			%parameterdescs = ();
-			%parametertypes = ();
-			@parameterlist = ();
-			%sections = ();
-			@sectionlist = ();
-			$prototype = "";
-			if ( $1 eq "" ) {
-				$section = $section_intro;
-			} else {
-				$section = $1;
-			}
-		}
-		elsif (/$doc_end/)
+	} elsif ($state == STATE_DOCBLOCK) {
+		if (/$doc_end/)
 		{
 			dump_doc_section($file, $section, xml_escape($contents));
+			$section = $section_default;
 			$contents = "";
 			$function = "";
-			%constants = ();
 			%parameterdescs = ();
 			%parametertypes = ();
 			@parameterlist = ();
 			%sections = ();
 			@sectionlist = ();
 			$prototype = "";
-			$state = 0;
+			$state = STATE_NORMAL;
 		}
 		elsif (/$doc_content/)
 		{
@@ -2898,7 +3042,7 @@ sub process_file($) {
     }
     if ($initial_section_counter == $section_counter) {
 	print STDERR "${file}:1: warning: no structured comments found\n";
-	if (($function_only == 1) && ($show_not_found == 1)) {
+	if (($output_selection == OUTPUT_INCLUDE) && ($show_not_found == 1)) {
 	    print STDERR "    Was looking for '$_'.\n" for keys %function_table;
 	}
 	if ($output_mode eq "xml") {
@@ -2957,6 +3101,17 @@ if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
 	close(SOURCE_MAP);
 }
 
+if ($output_selection == OUTPUT_EXPORTED ||
+    $output_selection == OUTPUT_INTERNAL) {
+
+    push(@export_file_list, @ARGV);
+
+    foreach (@export_file_list) {
+	chomp;
+	process_export_file($_);
+    }
+}
+
 foreach (@ARGV) {
     chomp;
     process_file($_);
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 840b97328b39..e4d90e50f6fe 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -202,5 +202,9 @@ int main(void)
 	DEVID_FIELD(hda_device_id, rev_id);
 	DEVID_FIELD(hda_device_id, api_version);
 
+	DEVID(fsl_mc_device_id);
+	DEVID_FIELD(fsl_mc_device_id, vendor);
+	DEVID_FIELD(fsl_mc_device_id, obj_type);
+
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index fec75786f75b..29d6699d5a06 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1289,6 +1289,18 @@ static int do_hda_entry(const char *filename, void *symval, char *alias)
 }
 ADD_TO_DEVTABLE("hdaudio", hda_device_id, do_hda_entry);
 
+/* Looks like: fsl-mc:vNdN */
+static int do_fsl_mc_entry(const char *filename, void *symval,
+			   char *alias)
+{
+	DEF_FIELD(symval, fsl_mc_device_id, vendor);
+	DEF_FIELD_ADDR(symval, fsl_mc_device_id, obj_type);
+
+	sprintf(alias, "fsl-mc:v%08Xd%s", vendor, *obj_type);
+	return 1;
+}
+ADD_TO_DEVTABLE("fslmc", fsl_mc_device_id, do_fsl_mc_entry);
+
 /* Does namelen bytes of name exactly match the symbol? */
 static bool sym_is(const char *name, unsigned namelen, const char *symbol)
 {
diff --git a/scripts/tags.sh b/scripts/tags.sh
index f72f48f638ae..ed7eef24ef89 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -185,6 +185,9 @@ regex_c=(
 	'/\<CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/ClearPage\1/'
 	'/\<__CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/__ClearPage\1/'
 	'/\<TESTCLEARFLAG_FALSE(\([[:alnum:]_]*\).*/TestClearPage\1/'
+	'/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/Page\1/'
+	'/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/__SetPage\1/'
+	'/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/__ClearPage\1/'
 	'/^TASK_PFA_TEST([^,]*, *\([[:alnum:]_]*\))/task_\1/'
 	'/^TASK_PFA_SET([^,]*, *\([[:alnum:]_]*\))/task_set_\1/'
 	'/^TASK_PFA_CLEAR([^,]*, *\([[:alnum:]_]*\))/task_clear_\1/'
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 2660fbcf94d1..7798e1608f4f 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -500,34 +500,34 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
 {
 	struct common_audit_data sa;
 	struct apparmor_audit_data aad = {0,};
-	char *command, *args = value;
+	char *command, *largs = NULL, *args = value;
 	size_t arg_size;
 	int error;
 
 	if (size == 0)
 		return -EINVAL;
-	/* args points to a PAGE_SIZE buffer, AppArmor requires that
-	 * the buffer must be null terminated or have size <= PAGE_SIZE -1
-	 * so that AppArmor can null terminate them
-	 */
-	if (args[size - 1] != '\0') {
-		if (size == PAGE_SIZE)
-			return -EINVAL;
-		args[size] = '\0';
-	}
-
 	/* task can only write its own attributes */
 	if (current != task)
 		return -EACCES;
 
-	args = value;
+	/* AppArmor requires that the buffer must be null terminated atm */
+	if (args[size - 1] != '\0') {
+		/* null terminate */
+		largs = args = kmalloc(size + 1, GFP_KERNEL);
+		if (!args)
+			return -ENOMEM;
+		memcpy(args, value, size);
+		args[size] = '\0';
+	}
+
+	error = -EINVAL;
 	args = strim(args);
 	command = strsep(&args, " ");
 	if (!args)
-		return -EINVAL;
+		goto out;
 	args = skip_spaces(args);
 	if (!*args)
-		return -EINVAL;
+		goto out;
 
 	arg_size = size - (args - (char *) value);
 	if (strcmp(name, "current") == 0) {
@@ -553,10 +553,12 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
 			goto fail;
 	} else
 		/* only support the "current" and "exec" process attributes */
-		return -EINVAL;
+		goto fail;
 
 	if (!error)
 		error = size;
+out:
+	kfree(largs);
 	return error;
 
 fail:
@@ -565,9 +567,9 @@ fail:
 	aad.profile = aa_current_profile();
 	aad.op = OP_SETPROCATTR;
 	aad.info = name;
-	aad.error = -EINVAL;
+	aad.error = error = -EINVAL;
 	aa_audit_msg(AUDIT_APPARMOR_DENIED, &sa, NULL);
-	return -EINVAL;
+	goto out;
 }
 
 static int apparmor_task_setrlimit(struct task_struct *task,
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 9e443fccad4c..c0b3030b5634 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -18,6 +18,7 @@
 #include <keys/user-type.h>
 #include <keys/big_key-type.h>
 #include <crypto/rng.h>
+#include <crypto/skcipher.h>
 
 /*
  * Layout of key payload words.
@@ -74,7 +75,7 @@ static const char big_key_alg_name[] = "ecb(aes)";
  * Crypto algorithms for big_key data encryption
  */
 static struct crypto_rng *big_key_rng;
-static struct crypto_blkcipher *big_key_blkcipher;
+static struct crypto_skcipher *big_key_skcipher;
 
 /*
  * Generate random key to encrypt big_key data
@@ -91,22 +92,26 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
 {
 	int ret = -EINVAL;
 	struct scatterlist sgio;
-	struct blkcipher_desc desc;
+	SKCIPHER_REQUEST_ON_STACK(req, big_key_skcipher);
 
-	if (crypto_blkcipher_setkey(big_key_blkcipher, key, ENC_KEY_SIZE)) {
+	if (crypto_skcipher_setkey(big_key_skcipher, key, ENC_KEY_SIZE)) {
 		ret = -EAGAIN;
 		goto error;
 	}
 
-	desc.flags = 0;
-	desc.tfm = big_key_blkcipher;
+	skcipher_request_set_tfm(req, big_key_skcipher);
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+				      NULL, NULL);
 
 	sg_init_one(&sgio, data, datalen);
+	skcipher_request_set_crypt(req, &sgio, &sgio, datalen, NULL);
 
 	if (op == BIG_KEY_ENC)
-		ret = crypto_blkcipher_encrypt(&desc, &sgio, &sgio, datalen);
+		ret = crypto_skcipher_encrypt(req);
 	else
-		ret = crypto_blkcipher_decrypt(&desc, &sgio, &sgio, datalen);
+		ret = crypto_skcipher_decrypt(req);
+
+	skcipher_request_zero(req);
 
 error:
 	return ret;
@@ -140,7 +145,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 		 *
 		 * File content is stored encrypted with randomly generated key.
 		 */
-		size_t enclen = ALIGN(datalen, crypto_blkcipher_blocksize(big_key_blkcipher));
+		size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher));
 
 		/* prepare aligned data to encrypt */
 		data = kmalloc(enclen, GFP_KERNEL);
@@ -288,7 +293,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
 		struct file *file;
 		u8 *data;
 		u8 *enckey = (u8 *)key->payload.data[big_key_data];
-		size_t enclen = ALIGN(datalen, crypto_blkcipher_blocksize(big_key_blkcipher));
+		size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher));
 
 		data = kmalloc(enclen, GFP_KERNEL);
 		if (!data)
@@ -359,9 +364,10 @@ static int __init big_key_crypto_init(void)
 		goto error;
 
 	/* init block cipher */
-	big_key_blkcipher = crypto_alloc_blkcipher(big_key_alg_name, 0, 0);
-	if (IS_ERR(big_key_blkcipher)) {
-		big_key_blkcipher = NULL;
+	big_key_skcipher = crypto_alloc_skcipher(big_key_alg_name,
+						 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(big_key_skcipher)) {
+		big_key_skcipher = NULL;
 		ret = -EFAULT;
 		goto error;
 	}
diff --git a/sound/core/control.c b/sound/core/control.c
index a85d45595d02..b4fe9b002512 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -160,6 +160,8 @@ void snd_ctl_notify(struct snd_card *card, unsigned int mask,
 	
 	if (snd_BUG_ON(!card || !id))
 		return;
+	if (card->shutdown)
+		return;
 	read_lock(&card->ctl_files_rwlock);
 #if IS_ENABLED(CONFIG_SND_MIXER_OSS)
 	card->mixer_oss_change_count++;
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 308c9ecf73db..8e980aa678d0 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -849,6 +849,14 @@ int snd_pcm_new_internal(struct snd_card *card, const char *id, int device,
 }
 EXPORT_SYMBOL(snd_pcm_new_internal);
 
+static void free_chmap(struct snd_pcm_str *pstr)
+{
+	if (pstr->chmap_kctl) {
+		snd_ctl_remove(pstr->pcm->card, pstr->chmap_kctl);
+		pstr->chmap_kctl = NULL;
+	}
+}
+
 static void snd_pcm_free_stream(struct snd_pcm_str * pstr)
 {
 	struct snd_pcm_substream *substream, *substream_next;
@@ -871,6 +879,7 @@ static void snd_pcm_free_stream(struct snd_pcm_str * pstr)
 		kfree(setup);
 	}
 #endif
+	free_chmap(pstr);
 	if (pstr->substream_count)
 		put_device(&pstr->dev);
 }
@@ -1135,10 +1144,7 @@ static int snd_pcm_dev_disconnect(struct snd_device *device)
 	for (cidx = 0; cidx < 2; cidx++) {
 		if (!pcm->internal)
 			snd_unregister_device(&pcm->streams[cidx].dev);
-		if (pcm->streams[cidx].chmap_kctl) {
-			snd_ctl_remove(pcm->card, pcm->streams[cidx].chmap_kctl);
-			pcm->streams[cidx].chmap_kctl = NULL;
-		}
+		free_chmap(&pcm->streams[cidx]);
 	}
 	mutex_unlock(&pcm->open_mutex);
 	mutex_unlock(&register_mutex);
diff --git a/sound/core/timer.c b/sound/core/timer.c
index e722022d325d..9a6157ea6881 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1955,6 +1955,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
 		qhead = tu->qhead++;
 		tu->qhead %= tu->queue_size;
+		tu->qused--;
 		spin_unlock_irq(&tu->qlock);
 
 		if (tu->tread) {
@@ -1968,7 +1969,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 		}
 
 		spin_lock_irq(&tu->qlock);
-		tu->qused--;
 		if (err < 0)
 			goto _error;
 		result += unit;
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index c0f8f613f1f1..172dacd925f5 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -420,6 +420,7 @@ static int dummy_hrtimer_stop(struct snd_pcm_substream *substream)
 
 static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm)
 {
+	hrtimer_cancel(&dpcm->timer);
 	tasklet_kill(&dpcm->tasklet);
 }
 
diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index 87041ddd29cb..47a358fab132 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -444,7 +444,7 @@ int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg,
 	err = reg_raw_write(codec, reg, val);
 	if (err == -EAGAIN) {
 		err = snd_hdac_power_up_pm(codec);
-		if (!err)
+		if (err >= 0)
 			err = reg_raw_write(codec, reg, val);
 		snd_hdac_power_down_pm(codec);
 	}
@@ -470,7 +470,7 @@ static int __snd_hdac_regmap_read_raw(struct hdac_device *codec,
 	err = reg_raw_read(codec, reg, val, uncached);
 	if (err == -EAGAIN) {
 		err = snd_hdac_power_up_pm(codec);
-		if (!err)
+		if (err >= 0)
 			err = reg_raw_read(codec, reg, val, uncached);
 		snd_hdac_power_down_pm(codec);
 	}
diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
index 4a054d720112..d3125c169684 100644
--- a/sound/pci/au88x0/au88x0_core.c
+++ b/sound/pci/au88x0/au88x0_core.c
@@ -1444,9 +1444,8 @@ static int vortex_wtdma_bufshift(vortex_t * vortex, int wtdma)
 	int page, p, pp, delta, i;
 
 	page =
-	    (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) &
-	     WT_SUBBUF_MASK)
-	    >> WT_SUBBUF_SHIFT;
+	    (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2))
+	     >> WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK;
 	if (dma->nr_periods >= 4)
 		delta = (page - dma->period_real) & 3;
 	else {
diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c
index 1cb85aeb0cea..286f5e3686a3 100644
--- a/sound/pci/echoaudio/echoaudio.c
+++ b/sound/pci/echoaudio/echoaudio.c
@@ -2200,11 +2200,11 @@ static int snd_echo_resume(struct device *dev)
 	u32 pipe_alloc_mask;
 	int err;
 
-	commpage_bak = kmalloc(sizeof(struct echoaudio), GFP_KERNEL);
+	commpage_bak = kmalloc(sizeof(*commpage), GFP_KERNEL);
 	if (commpage_bak == NULL)
 		return -ENOMEM;
 	commpage = chip->comm_page;
-	memcpy(commpage_bak, commpage, sizeof(struct comm_page));
+	memcpy(commpage_bak, commpage, sizeof(*commpage));
 
 	err = init_hw(chip, chip->pci->device, chip->pci->subsystem_device);
 	if (err < 0) {
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 320445f3bf73..79c7b340acc2 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -3977,6 +3977,8 @@ static hda_nid_t set_path_power(struct hda_codec *codec, hda_nid_t nid,
 
 	for (n = 0; n < spec->paths.used; n++) {
 		path = snd_array_elem(&spec->paths, n);
+		if (!path->depth)
+			continue;
 		if (path->path[0] == nid ||
 		    path->path[path->depth - 1] == nid) {
 			bool pin_old = path->pin_enabled;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 94089fc71884..6f8ea13323c1 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -367,9 +367,10 @@ enum {
 #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
 #define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171)
 #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
+#define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
 #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
-			IS_KBL(pci) || IS_KBL_LP(pci)
+			IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)
 
 static char *driver_short_names[] = {
 	[AZX_DRIVER_ICH] = "HDA Intel",
@@ -1217,8 +1218,10 @@ static int azx_free(struct azx *chip)
 	if (use_vga_switcheroo(hda)) {
 		if (chip->disabled && hda->probe_continued)
 			snd_hda_unlock_devices(&chip->bus);
-		if (hda->vga_switcheroo_registered)
+		if (hda->vga_switcheroo_registered) {
 			vga_switcheroo_unregister_client(chip->pci);
+			vga_switcheroo_fini_domain_pm_ops(chip->card->dev);
+		}
 	}
 
 	if (bus->chip_init) {
@@ -2190,6 +2193,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Kabylake-LP */
 	{ PCI_DEVICE(0x8086, 0x9d71),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+	/* Kabylake-H */
+	{ PCI_DEVICE(0x8086, 0xa2f0),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
 	/* Broxton-P(Apollolake) */
 	{ PCI_DEVICE(0x8086, 0x5a98),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
@@ -2263,6 +2269,8 @@ static const struct pci_device_id azx_ids[] = {
 	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
 	{ PCI_DEVICE(0x1002, 0x157a),
 	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+	{ PCI_DEVICE(0x1002, 0x15b3),
+	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
 	{ PCI_DEVICE(0x1002, 0x793b),
 	  .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
 	{ PCI_DEVICE(0x1002, 0x7919),
diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index 17fd81736d3d..0621920f7617 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -115,20 +115,20 @@ static int substream_free_pages(struct azx *chip,
 /*
  * Register access ops. Tegra HDA register access is DWORD only.
  */
-static void hda_tegra_writel(u32 value, u32 *addr)
+static void hda_tegra_writel(u32 value, u32 __iomem *addr)
 {
 	writel(value, addr);
 }
 
-static u32 hda_tegra_readl(u32 *addr)
+static u32 hda_tegra_readl(u32 __iomem *addr)
 {
 	return readl(addr);
 }
 
-static void hda_tegra_writew(u16 value, u16 *addr)
+static void hda_tegra_writew(u16 value, u16 __iomem  *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
@@ -137,20 +137,20 @@ static void hda_tegra_writew(u16 value, u16 *addr)
 	writel(v, dword_addr);
 }
 
-static u16 hda_tegra_readw(u16 *addr)
+static u16 hda_tegra_readw(u16 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
 	return (v >> shift) & 0xffff;
 }
 
-static void hda_tegra_writeb(u8 value, u8 *addr)
+static void hda_tegra_writeb(u8 value, u8 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
@@ -159,10 +159,10 @@ static void hda_tegra_writeb(u8 value, u8 *addr)
 	writel(v, dword_addr);
 }
 
-static u8 hda_tegra_readb(u8 *addr)
+static u8 hda_tegra_readb(u8 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0fe18ede3e85..abcb5a6a1cd9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5650,6 +5650,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
+	SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
+	SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460),
+	SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
@@ -5735,7 +5738,6 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{}
 };
 #define ALC225_STANDARD_PINS \
-	{0x12, 0xb7a60130}, \
 	{0x21, 0x04211020}
 
 #define ALC256_STANDARD_PINS \
@@ -5760,10 +5762,24 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC225_STANDARD_PINS,
+		{0x12, 0xb7a60130},
 		{0x14, 0x901701a0}),
 	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC225_STANDARD_PINS,
+		{0x12, 0xb7a60130},
 		{0x14, 0x901701b0}),
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+		ALC225_STANDARD_PINS,
+		{0x12, 0xb7a60150},
+		{0x14, 0x901701a0}),
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+		ALC225_STANDARD_PINS,
+		{0x12, 0xb7a60150},
+		{0x14, 0x901701b0}),
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+		ALC225_STANDARD_PINS,
+		{0x12, 0xb7a60130},
+		{0x1b, 0x90170110}),
 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
 		{0x14, 0x90170110},
 		{0x21, 0x02211020}),
@@ -5831,6 +5847,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x12, 0x90a60160},
 		{0x14, 0x90170120},
 		{0x21, 0x02211030}),
+	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x12, 0x90a60170},
+		{0x14, 0x90170120},
+		{0x21, 0x02211030}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC256_STANDARD_PINS),
 	SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 4d82a58ff6b0..f3fb98f0a995 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -483,9 +483,10 @@ config SND_SOC_DMIC
 	tristate
 
 config SND_SOC_HDMI_CODEC
-       tristate
-       select SND_PCM_ELD
-       select SND_PCM_IEC958
+	tristate
+	select SND_PCM_ELD
+	select SND_PCM_IEC958
+	select HDMI
 
 config SND_SOC_ES8328
 	tristate "Everest Semi ES8328 CODEC"
diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c
index 647f69de6baa..5013d2ba0c10 100644
--- a/sound/soc/codecs/ak4613.c
+++ b/sound/soc/codecs/ak4613.c
@@ -146,6 +146,7 @@ static const struct regmap_config ak4613_regmap_cfg = {
 	.max_register		= 0x16,
 	.reg_defaults		= ak4613_reg,
 	.num_reg_defaults	= ARRAY_SIZE(ak4613_reg),
+	.cache_type		= REGCACHE_RBTREE,
 };
 
 static const struct of_device_id ak4613_of_match[] = {
@@ -530,7 +531,6 @@ static int ak4613_i2c_remove(struct i2c_client *client)
 static struct i2c_driver ak4613_i2c_driver = {
 	.driver = {
 		.name = "ak4613-codec",
-		.owner = THIS_MODULE,
 		.of_match_table = ak4613_of_match,
 	},
 	.probe		= ak4613_i2c_probe,
diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c
index d6f4abbbf8a7..fb3885fe0afb 100644
--- a/sound/soc/codecs/cx20442.c
+++ b/sound/soc/codecs/cx20442.c
@@ -226,6 +226,7 @@ static int v253_open(struct tty_struct *tty)
 	if (!tty->disc_data)
 		return -ENODEV;
 
+	tty->receive_room = 16;
 	if (tty->ops->write(tty, v253_init, len) != len) {
 		ret = -EIO;
 		goto err;
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 181cd3bf0b92..2abb742fc47b 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1474,6 +1474,11 @@ static int hdmi_codec_probe(struct snd_soc_codec *codec)
 	 * exit, we call pm_runtime_suspend() so that will do for us
 	 */
 	hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev));
+	if (!hlink) {
+		dev_err(&edev->hdac.dev, "hdac link not found\n");
+		return -EIO;
+	}
+
 	snd_hdac_ext_bus_link_get(edev->ebus, hlink);
 
 	ret = create_fill_widget_route_map(dapm);
@@ -1634,6 +1639,11 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
 
 	/* hold the ref while we probe */
 	hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev));
+	if (!hlink) {
+		dev_err(&edev->hdac.dev, "hdac link not found\n");
+		return -EIO;
+	}
+
 	snd_hdac_ext_bus_link_get(edev->ebus, hlink);
 
 	hdmi_priv = devm_kzalloc(&codec->dev, sizeof(*hdmi_priv), GFP_KERNEL);
@@ -1744,6 +1754,11 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
 	}
 
 	hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+	if (!hlink) {
+		dev_err(dev, "hdac link not found\n");
+		return -EIO;
+	}
+
 	snd_hdac_ext_bus_link_put(ebus, hlink);
 
 	return 0;
@@ -1765,6 +1780,11 @@ static int hdac_hdmi_runtime_resume(struct device *dev)
 		return 0;
 
 	hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+	if (!hlink) {
+		dev_err(dev, "hdac link not found\n");
+		return -EIO;
+	}
+
 	snd_hdac_ext_bus_link_get(ebus, hlink);
 
 	err = snd_hdac_display_power(bus, true);
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 3c6594da6c9c..d70847c9eeb0 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -253,7 +253,7 @@ static const struct reg_default rt5650_reg[] = {
 	{ 0x2b, 0x5454 },
 	{ 0x2c, 0xaaa0 },
 	{ 0x2d, 0x0000 },
-	{ 0x2f, 0x1002 },
+	{ 0x2f, 0x5002 },
 	{ 0x31, 0x5000 },
 	{ 0x32, 0x0000 },
 	{ 0x33, 0x0000 },
diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c
index 49a9e7049e2b..0af5ddbef1da 100644
--- a/sound/soc/codecs/rt5670.c
+++ b/sound/soc/codecs/rt5670.c
@@ -619,7 +619,7 @@ static const struct snd_kcontrol_new rt5670_snd_controls[] = {
 		RT5670_L_MUTE_SFT, RT5670_R_MUTE_SFT, 1, 1),
 	SOC_DOUBLE_TLV("HP Playback Volume", RT5670_HP_VOL,
 		RT5670_L_VOL_SFT, RT5670_R_VOL_SFT,
-		39, 0, out_vol_tlv),
+		39, 1, out_vol_tlv),
 	/* OUTPUT Control */
 	SOC_DOUBLE("OUT Channel Switch", RT5670_LOUT1,
 		RT5670_VOL_L_SFT, RT5670_VOL_R_SFT, 1, 1),
diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c
index da60e3fe5ee7..e7fe6b7b95b7 100644
--- a/sound/soc/codecs/wm5102.c
+++ b/sound/soc/codecs/wm5102.c
@@ -1872,7 +1872,7 @@ static struct snd_soc_dai_driver wm5102_dai[] = {
 		.capture = {
 			.stream_name = "Audio Trace CPU",
 			.channels_min = 1,
-			.channels_max = 6,
+			.channels_max = 4,
 			.rates = WM5102_RATES,
 			.formats = WM5102_FORMATS,
 		},
diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c
index b5820e4d5471..d54f1b46c9ec 100644
--- a/sound/soc/codecs/wm5110.c
+++ b/sound/soc/codecs/wm5110.c
@@ -1723,6 +1723,7 @@ static const struct snd_soc_dapm_route wm5110_dapm_routes[] = {
 	{ "OUT2L", NULL, "SYSCLK" },
 	{ "OUT2R", NULL, "SYSCLK" },
 	{ "OUT3L", NULL, "SYSCLK" },
+	{ "OUT3R", NULL, "SYSCLK" },
 	{ "OUT4L", NULL, "SYSCLK" },
 	{ "OUT4R", NULL, "SYSCLK" },
 	{ "OUT5L", NULL, "SYSCLK" },
diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c
index f6f9395ea38e..1c600819f768 100644
--- a/sound/soc/codecs/wm8940.c
+++ b/sound/soc/codecs/wm8940.c
@@ -743,6 +743,7 @@ static const struct regmap_config wm8940_regmap = {
 	.max_register = WM8940_MONOMIX,
 	.reg_defaults = wm8940_reg_defaults,
 	.num_reg_defaults = ARRAY_SIZE(wm8940_reg_defaults),
+	.cache_type = REGCACHE_RBTREE,
 
 	.readable_reg = wm8940_readable_register,
 	.volatile_reg = wm8940_volatile_register,
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 0f66fda2c772..237dc67002ef 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -1513,8 +1513,9 @@ static struct davinci_mcasp_pdata am33xx_mcasp_pdata = {
 };
 
 static struct davinci_mcasp_pdata dra7_mcasp_pdata = {
-	.tx_dma_offset = 0x200,
-	.rx_dma_offset = 0x284,
+	/* The CFG port offset will be calculated if it is needed */
+	.tx_dma_offset = 0,
+	.rx_dma_offset = 0,
 	.version = MCASP_VERSION_4,
 };
 
@@ -1734,6 +1735,52 @@ static int davinci_mcasp_get_dma_type(struct davinci_mcasp *mcasp)
 	return PCM_EDMA;
 }
 
+static u32 davinci_mcasp_txdma_offset(struct davinci_mcasp_pdata *pdata)
+{
+	int i;
+	u32 offset = 0;
+
+	if (pdata->version != MCASP_VERSION_4)
+		return pdata->tx_dma_offset;
+
+	for (i = 0; i < pdata->num_serializer; i++) {
+		if (pdata->serial_dir[i] == TX_MODE) {
+			if (!offset) {
+				offset = DAVINCI_MCASP_TXBUF_REG(i);
+			} else {
+				pr_err("%s: Only one serializer allowed!\n",
+				       __func__);
+				break;
+			}
+		}
+	}
+
+	return offset;
+}
+
+static u32 davinci_mcasp_rxdma_offset(struct davinci_mcasp_pdata *pdata)
+{
+	int i;
+	u32 offset = 0;
+
+	if (pdata->version != MCASP_VERSION_4)
+		return pdata->rx_dma_offset;
+
+	for (i = 0; i < pdata->num_serializer; i++) {
+		if (pdata->serial_dir[i] == RX_MODE) {
+			if (!offset) {
+				offset = DAVINCI_MCASP_RXBUF_REG(i);
+			} else {
+				pr_err("%s: Only one serializer allowed!\n",
+				       __func__);
+				break;
+			}
+		}
+	}
+
+	return offset;
+}
+
 static int davinci_mcasp_probe(struct platform_device *pdev)
 {
 	struct snd_dmaengine_dai_dma_data *dma_data;
@@ -1862,7 +1909,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 	if (dat)
 		dma_data->addr = dat->start;
 	else
-		dma_data->addr = mem->start + pdata->tx_dma_offset;
+		dma_data->addr = mem->start + davinci_mcasp_txdma_offset(pdata);
 
 	dma = &mcasp->dma_request[SNDRV_PCM_STREAM_PLAYBACK];
 	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
@@ -1883,7 +1930,8 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 		if (dat)
 			dma_data->addr = dat->start;
 		else
-			dma_data->addr = mem->start + pdata->rx_dma_offset;
+			dma_data->addr =
+				mem->start + davinci_mcasp_rxdma_offset(pdata);
 
 		dma = &mcasp->dma_request[SNDRV_PCM_STREAM_CAPTURE];
 		res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
diff --git a/sound/soc/davinci/davinci-mcasp.h b/sound/soc/davinci/davinci-mcasp.h
index 1e8787fb3fb7..afddc8010c54 100644
--- a/sound/soc/davinci/davinci-mcasp.h
+++ b/sound/soc/davinci/davinci-mcasp.h
@@ -85,9 +85,9 @@
 						(n << 2))
 
 /* Transmit Buffer for Serializer n */
-#define DAVINCI_MCASP_TXBUF_REG		0x200
+#define DAVINCI_MCASP_TXBUF_REG(n)	(0x200 + (n << 2))
 /* Receive Buffer for Serializer n */
-#define DAVINCI_MCASP_RXBUF_REG		0x280
+#define DAVINCI_MCASP_RXBUF_REG(n)	(0x280 + (n << 2))
 
 /* McASP FIFO Registers */
 #define DAVINCI_MCASP_V2_AFIFO_BASE	(0x1010)
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 632ecc0e3956..bedec4a32581 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -952,16 +952,16 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev,
 	ssi_private->i2s_mode = CCSR_SSI_SCR_NET;
 	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
 	case SND_SOC_DAIFMT_I2S:
+		regmap_update_bits(regs, CCSR_SSI_STCCR,
+				   CCSR_SSI_SxCCR_DC_MASK,
+				   CCSR_SSI_SxCCR_DC(2));
+		regmap_update_bits(regs, CCSR_SSI_SRCCR,
+				   CCSR_SSI_SxCCR_DC_MASK,
+				   CCSR_SSI_SxCCR_DC(2));
 		switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
 		case SND_SOC_DAIFMT_CBM_CFS:
 		case SND_SOC_DAIFMT_CBS_CFS:
 			ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_MASTER;
-			regmap_update_bits(regs, CCSR_SSI_STCCR,
-					CCSR_SSI_SxCCR_DC_MASK,
-					CCSR_SSI_SxCCR_DC(2));
-			regmap_update_bits(regs, CCSR_SSI_SRCCR,
-					CCSR_SSI_SxCCR_DC_MASK,
-					CCSR_SSI_SxCCR_DC(2));
 			break;
 		case SND_SOC_DAIFMT_CBM_CFM:
 			ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_SLAVE;
diff --git a/sound/soc/intel/atom/sst-mfld-platform-compress.c b/sound/soc/intel/atom/sst-mfld-platform-compress.c
index 395168986462..1bead81bb510 100644
--- a/sound/soc/intel/atom/sst-mfld-platform-compress.c
+++ b/sound/soc/intel/atom/sst-mfld-platform-compress.c
@@ -182,24 +182,29 @@ static int sst_platform_compr_trigger(struct snd_compr_stream *cstream, int cmd)
 	case SNDRV_PCM_TRIGGER_START:
 		if (stream->compr_ops->stream_start)
 			return stream->compr_ops->stream_start(sst->dev, stream->id);
+		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 		if (stream->compr_ops->stream_drop)
 			return stream->compr_ops->stream_drop(sst->dev, stream->id);
+		break;
 	case SND_COMPR_TRIGGER_DRAIN:
 		if (stream->compr_ops->stream_drain)
 			return stream->compr_ops->stream_drain(sst->dev, stream->id);
+		break;
 	case SND_COMPR_TRIGGER_PARTIAL_DRAIN:
 		if (stream->compr_ops->stream_partial_drain)
 			return stream->compr_ops->stream_partial_drain(sst->dev, stream->id);
+		break;
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		if (stream->compr_ops->stream_pause)
 			return stream->compr_ops->stream_pause(sst->dev, stream->id);
+		break;
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 		if (stream->compr_ops->stream_pause_release)
 			return stream->compr_ops->stream_pause_release(sst->dev, stream->id);
-	default:
-		return -EINVAL;
+		break;
 	}
+	return -EINVAL;
 }
 
 static int sst_platform_compr_pointer(struct snd_compr_stream *cstream,
diff --git a/sound/soc/intel/skylake/bxt-sst.c b/sound/soc/intel/skylake/bxt-sst.c
index 965ce40ce752..8b95e09e23e8 100644
--- a/sound/soc/intel/skylake/bxt-sst.c
+++ b/sound/soc/intel/skylake/bxt-sst.c
@@ -291,6 +291,7 @@ int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
 	sst_dsp_mailbox_init(sst, (BXT_ADSP_SRAM0_BASE + SKL_ADSP_W0_STAT_SZ),
 			SKL_ADSP_W0_UP_SZ, BXT_ADSP_SRAM1_BASE, SKL_ADSP_W1_SZ);
 
+	INIT_LIST_HEAD(&sst->module_list);
 	ret = skl_ipc_init(dev, skl);
 	if (ret)
 		return ret;
diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c
index 49354d17ea55..c4c51a4d3c8f 100644
--- a/sound/soc/sh/rcar/adg.c
+++ b/sound/soc/sh/rcar/adg.c
@@ -518,7 +518,7 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv,
 		}
 	}
 
-	rsnd_mod_bset(adg_mod, SSICKR, 0x00FF0000, ckr);
+	rsnd_mod_bset(adg_mod, SSICKR, 0x80FF0000, ckr);
 	rsnd_mod_write(adg_mod, BRRA,  rbga);
 	rsnd_mod_write(adg_mod, BRRB,  rbgb);
 
diff --git a/sound/usb/card.c b/sound/usb/card.c
index 69860da473ea..9e5276d6dda0 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -556,7 +556,6 @@ static int usb_audio_probe(struct usb_interface *intf,
 				goto __error;
 			}
 			chip = usb_chip[i];
-			dev_set_drvdata(&dev->dev, chip);
 			atomic_inc(&chip->active); /* avoid autopm */
 			break;
 		}
@@ -582,6 +581,7 @@ static int usb_audio_probe(struct usb_interface *intf,
 			goto __error;
 		}
 	}
+	dev_set_drvdata(&dev->dev, chip);
 
 	/*
 	 * For devices with more than one control interface, we assume the
diff --git a/tools/Makefile b/tools/Makefile
index f10b64d8c674..daa8fb3e4363 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -85,7 +85,7 @@ tmon: FORCE
 freefall: FORCE
 	$(call descend,laptop/$@)
 
-all: acpi cgroup cpupower hv firewire lguest \
+all: acpi cgroup cpupower gpio hv firewire lguest \
 		perf selftests turbostat usb \
 		virtio vm net x86_energy_perf_policy \
 		tmon freefall objtool
@@ -96,7 +96,7 @@ acpi_install:
 cpupower_install:
 	$(call descend,power/$(@:_install=),install)
 
-cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install:
+cgroup_install firewire_install gpio_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install:
 	$(call descend,$(@:_install=),install)
 
 selftests_install:
@@ -114,7 +114,8 @@ freefall_install:
 kvm_stat_install:
 	$(call descend,kvm/$(@:_install=),install)
 
-install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
+install: acpi_install cgroup_install cpupower_install gpio_install \
+		hv_install firewire_install lguest_install \
 		perf_install selftests_install turbostat_install usb_install \
 		virtio_install vm_install net_install x86_energy_perf_policy_install \
 		tmon_install freefall_install objtool_install kvm_stat_install
diff --git a/tools/arch/alpha/include/uapi/asm/bitsperlong.h b/tools/arch/alpha/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..ad57f7868203
--- /dev/null
+++ b/tools/arch/alpha/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..a2b3eb313a25
--- /dev/null
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#include <linux/types.h>
+#include <linux/psci.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_REG_SIZE(id)						\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+/* Valid for svc_regs, abt_regs, und_regs, irq_regs in struct kvm_regs */
+#define KVM_ARM_SVC_sp		svc_regs[0]
+#define KVM_ARM_SVC_lr		svc_regs[1]
+#define KVM_ARM_SVC_spsr	svc_regs[2]
+#define KVM_ARM_ABT_sp		abt_regs[0]
+#define KVM_ARM_ABT_lr		abt_regs[1]
+#define KVM_ARM_ABT_spsr	abt_regs[2]
+#define KVM_ARM_UND_sp		und_regs[0]
+#define KVM_ARM_UND_lr		und_regs[1]
+#define KVM_ARM_UND_spsr	und_regs[2]
+#define KVM_ARM_IRQ_sp		irq_regs[0]
+#define KVM_ARM_IRQ_lr		irq_regs[1]
+#define KVM_ARM_IRQ_spsr	irq_regs[2]
+
+/* Valid only for fiq_regs in struct kvm_regs */
+#define KVM_ARM_FIQ_r8		fiq_regs[0]
+#define KVM_ARM_FIQ_r9		fiq_regs[1]
+#define KVM_ARM_FIQ_r10		fiq_regs[2]
+#define KVM_ARM_FIQ_fp		fiq_regs[3]
+#define KVM_ARM_FIQ_ip		fiq_regs[4]
+#define KVM_ARM_FIQ_sp		fiq_regs[5]
+#define KVM_ARM_FIQ_lr		fiq_regs[6]
+#define KVM_ARM_FIQ_spsr	fiq_regs[7]
+
+struct kvm_regs {
+	struct pt_regs usr_regs;	/* R0_usr - R14_usr, PC, CPSR */
+	unsigned long svc_regs[3];	/* SP_svc, LR_svc, SPSR_svc */
+	unsigned long abt_regs[3];	/* SP_abt, LR_abt, SPSR_abt */
+	unsigned long und_regs[3];	/* SP_und, LR_und, SPSR_und */
+	unsigned long irq_regs[3];	/* SP_irq, LR_irq, SPSR_irq */
+	unsigned long fiq_regs[8];	/* R8_fiq - R14_fiq, SPSR_fiq */
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_CORTEX_A15	0
+#define KVM_ARM_TARGET_CORTEX_A7	1
+#define KVM_ARM_NUM_TARGETS		2
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT	0
+#define KVM_ARM_DEVICE_TYPE_MASK	(0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT		16
+#define KVM_ARM_DEVICE_ID_MASK		(0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2		0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST	0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU	1
+
+#define KVM_VGIC_V2_DIST_SIZE		0x1000
+#define KVM_VGIC_V2_CPU_SIZE		0x2000
+
+#define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
+
+struct kvm_vcpu_init {
+	__u32 target;
+	__u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT	16
+#define KVM_REG_ARM_32_OPC2_MASK	0x0000000000000007
+#define KVM_REG_ARM_32_OPC2_SHIFT	0
+#define KVM_REG_ARM_OPC1_MASK		0x0000000000000078
+#define KVM_REG_ARM_OPC1_SHIFT		3
+#define KVM_REG_ARM_CRM_MASK		0x0000000000000780
+#define KVM_REG_ARM_CRM_SHIFT		7
+#define KVM_REG_ARM_32_CRN_MASK		0x0000000000007800
+#define KVM_REG_ARM_32_CRN_SHIFT	11
+
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+	(KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+	ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+	ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+	ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+	ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14)
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / 4)
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX		(0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK	0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT	8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR	(0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK	0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT	0
+
+/* VFP registers: we could overload CP10 like ARM does, but that's ugly. */
+#define KVM_REG_ARM_VFP			(0x0012 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_VFP_MASK		0x000000000000FFFF
+#define KVM_REG_ARM_VFP_BASE_REG	0x0
+#define KVM_REG_ARM_VFP_FPSID		0x1000
+#define KVM_REG_ARM_VFP_FPSCR		0x1001
+#define KVM_REG_ARM_VFP_MVFR1		0x1006
+#define KVM_REG_ARM_VFP_MVFR0		0x1007
+#define KVM_REG_ARM_VFP_FPEXC		0x1008
+#define KVM_REG_ARM_VFP_FPINST		0x1009
+#define KVM_REG_ARM_VFP_FPINST2		0x100A
+
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
+#define KVM_DEV_ARM_VGIC_GRP_CTRL       4
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT		24
+#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT		16
+#define KVM_ARM_IRQ_VCPU_MASK		0xff
+#define KVM_ARM_IRQ_NUM_SHIFT		0
+#define KVM_ARM_IRQ_NUM_MASK		0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU		0
+#define KVM_ARM_IRQ_TYPE_SPI		1
+#define KVM_ARM_IRQ_TYPE_PPI		2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ		0
+#define KVM_ARM_IRQ_CPU_FIQ		1
+
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
+#define KVM_ARM_IRQ_GIC_MAX		127
+#endif
+
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE		0x95c1ba5e
+#define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND		KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
+
+#endif /* __ARM_KVM_H__ */
diff --git a/tools/arch/arm/include/uapi/asm/perf_regs.h b/tools/arch/arm/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..ce59448458b2
--- /dev/null
+++ b/tools/arch/arm/include/uapi/asm/perf_regs.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_ARM_PERF_REGS_H
+#define _ASM_ARM_PERF_REGS_H
+
+enum perf_event_arm_regs {
+	PERF_REG_ARM_R0,
+	PERF_REG_ARM_R1,
+	PERF_REG_ARM_R2,
+	PERF_REG_ARM_R3,
+	PERF_REG_ARM_R4,
+	PERF_REG_ARM_R5,
+	PERF_REG_ARM_R6,
+	PERF_REG_ARM_R7,
+	PERF_REG_ARM_R8,
+	PERF_REG_ARM_R9,
+	PERF_REG_ARM_R10,
+	PERF_REG_ARM_FP,
+	PERF_REG_ARM_IP,
+	PERF_REG_ARM_SP,
+	PERF_REG_ARM_LR,
+	PERF_REG_ARM_PC,
+	PERF_REG_ARM_MAX,
+};
+#endif /* _ASM_ARM_PERF_REGS_H */
diff --git a/tools/arch/arm64/include/uapi/asm/bitsperlong.h b/tools/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..fce9c2924fa3
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif	/* __ASM_BITSPERLONG_H */
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..f209ea151dca
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1	0
+#define KVM_SPSR_SVC	KVM_SPSR_EL1
+#define KVM_SPSR_ABT	1
+#define KVM_SPSR_UND	2
+#define KVM_SPSR_IRQ	3
+#define KVM_SPSR_FIQ	4
+#define KVM_NR_SPSR	5
+
+#ifndef __ASSEMBLY__
+#include <linux/psci.h>
+#include <linux/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_REG_SIZE(id)						\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+	struct user_pt_regs regs;	/* sp = sp_el0 */
+
+	__u64	sp_el1;
+	__u64	elr_el1;
+
+	__u64	spsr[KVM_NR_SPSR];
+
+	struct user_fpsimd_state fp_regs;
+};
+
+/*
+ * Supported CPU Targets - Adding a new target type is not recommended,
+ * unless there are some special registers not supported by the
+ * genericv8 syreg table.
+ */
+#define KVM_ARM_TARGET_AEM_V8		0
+#define KVM_ARM_TARGET_FOUNDATION_V8	1
+#define KVM_ARM_TARGET_CORTEX_A57	2
+#define KVM_ARM_TARGET_XGENE_POTENZA	3
+#define KVM_ARM_TARGET_CORTEX_A53	4
+/* Generic ARM v8 target */
+#define KVM_ARM_TARGET_GENERIC_V8	5
+
+#define KVM_ARM_NUM_TARGETS		6
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT	0
+#define KVM_ARM_DEVICE_TYPE_MASK	(0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT		16
+#define KVM_ARM_DEVICE_ID_MASK		(0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2		0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST	0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU	1
+
+#define KVM_VGIC_V2_DIST_SIZE		0x1000
+#define KVM_VGIC_V2_CPU_SIZE		0x2000
+
+/* Supported VGICv3 address types  */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST	2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+
+#define KVM_VGIC_V3_DIST_SIZE		SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+
+#define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
+#define KVM_ARM_VCPU_PMU_V3		3 /* Support guest PMUv3 */
+
+struct kvm_vcpu_init {
+	__u32 target;
+	__u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+/*
+ * See v8 ARM ARM D7.3: Debug Registers
+ *
+ * The architectural limit is 16 debug registers of each type although
+ * in practice there are usually less (see ID_AA64DFR0_EL1).
+ *
+ * Although the control registers are architecturally defined as 32
+ * bits wide we use a 64 bit structure here to keep parity with
+ * KVM_GET/SET_ONE_REG behaviour which treats all system registers as
+ * 64 bit values. It also allows for the possibility of the
+ * architecture expanding the control registers without having to
+ * change the userspace ABI.
+ */
+#define KVM_ARM_MAX_DBG_REGS 16
+struct kvm_guest_debug_arch {
+	__u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS];
+	__u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS];
+	__u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS];
+	__u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
+};
+
+struct kvm_debug_exit_arch {
+	__u32 hsr;
+	__u64 far;	/* used for watchpoints */
+};
+
+/*
+ * Architecture specific defines for kvm_guest_debug->control
+ */
+
+#define KVM_GUESTDBG_USE_SW_BP		(1 << 16)
+#define KVM_GUESTDBG_USE_HW		(1 << 17)
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT	16
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / sizeof(__u32))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX		(0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK	0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT	8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR	(0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK	0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT	0
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG		(0x0013 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK	0x000000000000c000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT	14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK	0x0000000000003800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT	11
+#define KVM_REG_ARM64_SYSREG_CRN_MASK	0x0000000000000780
+#define KVM_REG_ARM64_SYSREG_CRN_SHIFT	7
+#define KVM_REG_ARM64_SYSREG_CRM_MASK	0x0000000000000078
+#define KVM_REG_ARM64_SYSREG_CRM_SHIFT	3
+#define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
+#define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
+
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+	KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+	(KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+	ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+	ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+	ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+	ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+	ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
+
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
+#define KVM_DEV_ARM_VGIC_GRP_CTRL	4
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
+
+/* Device Control API on vcpu fd */
+#define KVM_ARM_VCPU_PMU_V3_CTRL	0
+#define   KVM_ARM_VCPU_PMU_V3_IRQ	0
+#define   KVM_ARM_VCPU_PMU_V3_INIT	1
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT		24
+#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT		16
+#define KVM_ARM_IRQ_VCPU_MASK		0xff
+#define KVM_ARM_IRQ_NUM_SHIFT		0
+#define KVM_ARM_IRQ_NUM_MASK		0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU		0
+#define KVM_ARM_IRQ_TYPE_SPI		1
+#define KVM_ARM_IRQ_TYPE_PPI		2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ		0
+#define KVM_ARM_IRQ_CPU_FIQ		1
+
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
+#define KVM_ARM_IRQ_GIC_MAX		127
+#endif
+
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE		0x95c1ba5e
+#define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND		KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
+
+#endif
+
+#endif /* __ARM_KVM_H__ */
diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..172b8317ee49
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_ARM64_PERF_REGS_H
+#define _ASM_ARM64_PERF_REGS_H
+
+enum perf_event_arm_regs {
+	PERF_REG_ARM64_X0,
+	PERF_REG_ARM64_X1,
+	PERF_REG_ARM64_X2,
+	PERF_REG_ARM64_X3,
+	PERF_REG_ARM64_X4,
+	PERF_REG_ARM64_X5,
+	PERF_REG_ARM64_X6,
+	PERF_REG_ARM64_X7,
+	PERF_REG_ARM64_X8,
+	PERF_REG_ARM64_X9,
+	PERF_REG_ARM64_X10,
+	PERF_REG_ARM64_X11,
+	PERF_REG_ARM64_X12,
+	PERF_REG_ARM64_X13,
+	PERF_REG_ARM64_X14,
+	PERF_REG_ARM64_X15,
+	PERF_REG_ARM64_X16,
+	PERF_REG_ARM64_X17,
+	PERF_REG_ARM64_X18,
+	PERF_REG_ARM64_X19,
+	PERF_REG_ARM64_X20,
+	PERF_REG_ARM64_X21,
+	PERF_REG_ARM64_X22,
+	PERF_REG_ARM64_X23,
+	PERF_REG_ARM64_X24,
+	PERF_REG_ARM64_X25,
+	PERF_REG_ARM64_X26,
+	PERF_REG_ARM64_X27,
+	PERF_REG_ARM64_X28,
+	PERF_REG_ARM64_X29,
+	PERF_REG_ARM64_LR,
+	PERF_REG_ARM64_SP,
+	PERF_REG_ARM64_PC,
+	PERF_REG_ARM64_MAX,
+};
+#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/tools/arch/frv/include/uapi/asm/bitsperlong.h b/tools/arch/frv/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..6dc0bb0c13b2
--- /dev/null
+++ b/tools/arch/frv/include/uapi/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/tools/arch/h8300/include/asm/bitsperlong.h b/tools/arch/h8300/include/asm/bitsperlong.h
new file mode 100644
index 000000000000..e140e46729ac
--- /dev/null
+++ b/tools/arch/h8300/include/asm/bitsperlong.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_H8300_BITS_PER_LONG
+#define __ASM_H8300_BITS_PER_LONG
+
+#include <asm-generic/bitsperlong.h>
+
+#if !defined(__ASSEMBLY__)
+/* h8300-unknown-linux required long */
+#define __kernel_size_t __kernel_size_t
+typedef unsigned long	__kernel_size_t;
+typedef long		__kernel_ssize_t;
+typedef long		__kernel_ptrdiff_t;
+#endif
+
+#endif /* __ASM_H8300_BITS_PER_LONG */
diff --git a/tools/arch/hexagon/include/uapi/asm/bitsperlong.h b/tools/arch/hexagon/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..4a658151383d
--- /dev/null
+++ b/tools/arch/hexagon/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ASM_HEXAGON_BITSPERLONG_H
+#define __ASM_HEXAGON_BITSPERLONG_H
+
+#define __BITS_PER_LONG 32
+
+#include <asm-generic/bitsperlong.h>
+
+#endif
diff --git a/tools/arch/ia64/include/uapi/asm/bitsperlong.h b/tools/arch/ia64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..ec4db3c970b7
--- /dev/null
+++ b/tools/arch/ia64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_IA64_BITSPERLONG_H
+#define __ASM_IA64_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_IA64_BITSPERLONG_H */
diff --git a/tools/arch/m32r/include/uapi/asm/bitsperlong.h b/tools/arch/m32r/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..6dc0bb0c13b2
--- /dev/null
+++ b/tools/arch/m32r/include/uapi/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/tools/arch/microblaze/include/uapi/asm/bitsperlong.h b/tools/arch/microblaze/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..6dc0bb0c13b2
--- /dev/null
+++ b/tools/arch/microblaze/include/uapi/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/tools/arch/mips/include/uapi/asm/bitsperlong.h b/tools/arch/mips/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..3e4c10a8e787
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_MIPS_BITSPERLONG_H
+#define __ASM_MIPS_BITSPERLONG_H
+
+#define __BITS_PER_LONG _MIPS_SZLONG
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_MIPS_BITSPERLONG_H */
diff --git a/tools/arch/mips/include/uapi/asm/kvm.h b/tools/arch/mips/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..6985eb59b085
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/kvm.h
@@ -0,0 +1,208 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Cavium, Inc.
+ * Authors: Sanjay Lal <sanjayl@kymasys.com>
+ */
+
+#ifndef __LINUX_KVM_MIPS_H
+#define __LINUX_KVM_MIPS_H
+
+#include <linux/types.h>
+
+/*
+ * KVM MIPS specific structures and definitions.
+ *
+ * Some parts derived from the x86 version of this file.
+ */
+
+/*
+ * for KVM_GET_REGS and KVM_SET_REGS
+ *
+ * If Config[AT] is zero (32-bit CPU), the register contents are
+ * stored in the lower 32-bits of the struct kvm_regs fields and sign
+ * extended to 64-bits.
+ */
+struct kvm_regs {
+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+	__u64 gpr[32];
+	__u64 hi;
+	__u64 lo;
+	__u64 pc;
+};
+
+/*
+ * for KVM_GET_FPU and KVM_SET_FPU
+ */
+struct kvm_fpu {
+};
+
+
+/*
+ * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
+ * registers.  The id field is broken down as follows:
+ *
+ *  bits[63..52] - As per linux/kvm.h
+ *  bits[51..32] - Must be zero.
+ *  bits[31..16] - Register set.
+ *
+ * Register set = 0: GP registers from kvm_regs (see definitions below).
+ *
+ * Register set = 1: CP0 registers.
+ *  bits[15..8]  - Must be zero.
+ *  bits[7..3]   - Register 'rd'  index.
+ *  bits[2..0]   - Register 'sel' index.
+ *
+ * Register set = 2: KVM specific registers (see definitions below).
+ *
+ * Register set = 3: FPU / MSA registers (see definitions below).
+ *
+ * Other sets registers may be added in the future.  Each set would
+ * have its own identifier in bits[31..16].
+ */
+
+#define KVM_REG_MIPS_GP		(KVM_REG_MIPS | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_CP0	(KVM_REG_MIPS | 0x0000000000010000ULL)
+#define KVM_REG_MIPS_KVM	(KVM_REG_MIPS | 0x0000000000020000ULL)
+#define KVM_REG_MIPS_FPU	(KVM_REG_MIPS | 0x0000000000030000ULL)
+
+
+/*
+ * KVM_REG_MIPS_GP - General purpose registers from kvm_regs.
+ */
+
+#define KVM_REG_MIPS_R0		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  0)
+#define KVM_REG_MIPS_R1		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  1)
+#define KVM_REG_MIPS_R2		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  2)
+#define KVM_REG_MIPS_R3		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  3)
+#define KVM_REG_MIPS_R4		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  4)
+#define KVM_REG_MIPS_R5		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  5)
+#define KVM_REG_MIPS_R6		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  6)
+#define KVM_REG_MIPS_R7		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  7)
+#define KVM_REG_MIPS_R8		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  8)
+#define KVM_REG_MIPS_R9		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  9)
+#define KVM_REG_MIPS_R10	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10)
+#define KVM_REG_MIPS_R11	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11)
+#define KVM_REG_MIPS_R12	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12)
+#define KVM_REG_MIPS_R13	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13)
+#define KVM_REG_MIPS_R14	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14)
+#define KVM_REG_MIPS_R15	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15)
+#define KVM_REG_MIPS_R16	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16)
+#define KVM_REG_MIPS_R17	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17)
+#define KVM_REG_MIPS_R18	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18)
+#define KVM_REG_MIPS_R19	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19)
+#define KVM_REG_MIPS_R20	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20)
+#define KVM_REG_MIPS_R21	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21)
+#define KVM_REG_MIPS_R22	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22)
+#define KVM_REG_MIPS_R23	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23)
+#define KVM_REG_MIPS_R24	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24)
+#define KVM_REG_MIPS_R25	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25)
+#define KVM_REG_MIPS_R26	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26)
+#define KVM_REG_MIPS_R27	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27)
+#define KVM_REG_MIPS_R28	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28)
+#define KVM_REG_MIPS_R29	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29)
+#define KVM_REG_MIPS_R30	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30)
+#define KVM_REG_MIPS_R31	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31)
+
+#define KVM_REG_MIPS_HI		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32)
+#define KVM_REG_MIPS_LO		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33)
+#define KVM_REG_MIPS_PC		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34)
+
+
+/*
+ * KVM_REG_MIPS_KVM - KVM specific control registers.
+ */
+
+/*
+ * CP0_Count control
+ * DC:    Set 0: Master disable CP0_Count and set COUNT_RESUME to now
+ *        Set 1: Master re-enable CP0_Count with unchanged bias, handling timer
+ *               interrupts since COUNT_RESUME
+ *        This can be used to freeze the timer to get a consistent snapshot of
+ *        the CP0_Count and timer interrupt pending state, while also resuming
+ *        safely without losing time or guest timer interrupts.
+ * Other: Reserved, do not change.
+ */
+#define KVM_REG_MIPS_COUNT_CTL	    (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_MIPS_COUNT_CTL_DC	0x00000001
+
+/*
+ * CP0_Count resume monotonic nanoseconds
+ * The monotonic nanosecond time of the last set of COUNT_CTL.DC (master
+ * disable). Any reads and writes of Count related registers while
+ * COUNT_CTL.DC=1 will appear to occur at this time. When COUNT_CTL.DC is
+ * cleared again (master enable) any timer interrupts since this time will be
+ * emulated.
+ * Modifications to times in the future are rejected.
+ */
+#define KVM_REG_MIPS_COUNT_RESUME   (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1)
+/*
+ * CP0_Count rate in Hz
+ * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
+ * discontinuities in CP0_Count.
+ */
+#define KVM_REG_MIPS_COUNT_HZ	    (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2)
+
+
+/*
+ * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers.
+ *
+ *  bits[15..8]  - Register subset (see definitions below).
+ *  bits[7..5]   - Must be zero.
+ *  bits[4..0]   - Register number within register subset.
+ */
+
+#define KVM_REG_MIPS_FPR	(KVM_REG_MIPS_FPU | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_FCR	(KVM_REG_MIPS_FPU | 0x0000000000000100ULL)
+#define KVM_REG_MIPS_MSACR	(KVM_REG_MIPS_FPU | 0x0000000000000200ULL)
+
+/*
+ * KVM_REG_MIPS_FPR - Floating point / Vector registers.
+ */
+#define KVM_REG_MIPS_FPR_32(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32  | (n))
+#define KVM_REG_MIPS_FPR_64(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64  | (n))
+#define KVM_REG_MIPS_VEC_128(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n))
+
+/*
+ * KVM_REG_MIPS_FCR - Floating point control registers.
+ */
+#define KVM_REG_MIPS_FCR_IR	(KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_FCR_CSR	(KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31)
+
+/*
+ * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers.
+ */
+#define KVM_REG_MIPS_MSA_IR	 (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_MSA_CSR	 (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  1)
+
+
+/*
+ * KVM MIPS specific structures and definitions
+ *
+ */
+struct kvm_debug_exit_arch {
+	__u64 epc;
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+/* dummy definition */
+struct kvm_sregs {
+};
+
+struct kvm_mips_interrupt {
+	/* in */
+	__u32 cpu;
+	__u32 irq;
+};
+
+#endif /* __LINUX_KVM_MIPS_H */
diff --git a/tools/arch/mn10300/include/uapi/asm/bitsperlong.h b/tools/arch/mn10300/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..6dc0bb0c13b2
--- /dev/null
+++ b/tools/arch/mn10300/include/uapi/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/tools/arch/parisc/include/uapi/asm/bitsperlong.h b/tools/arch/parisc/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..e0a23c7bdd43
--- /dev/null
+++ b/tools/arch/parisc/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_PARISC_BITSPERLONG_H
+#define __ASM_PARISC_BITSPERLONG_H
+
+#if defined(__LP64__)
+#define __BITS_PER_LONG 64
+#define SHIFT_PER_LONG 6
+#else
+#define __BITS_PER_LONG 32
+#define SHIFT_PER_LONG 5
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_PARISC_BITSPERLONG_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/bitsperlong.h b/tools/arch/powerpc/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..5f1659032c40
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_POWERPC_BITSPERLONG_H
+#define __ASM_POWERPC_BITSPERLONG_H
+
+#if defined(__powerpc64__)
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_POWERPC_BITSPERLONG_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..c93cf35ce379
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -0,0 +1,612 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __LINUX_KVM_POWERPC_H
+#define __LINUX_KVM_POWERPC_H
+
+#include <linux/types.h>
+
+/* Select powerpc specific features in <linux/kvm.h> */
+#define __KVM_HAVE_SPAPR_TCE
+#define __KVM_HAVE_PPC_SMT
+#define __KVM_HAVE_IRQCHIP
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_GUEST_DEBUG
+
+struct kvm_regs {
+	__u64 pc;
+	__u64 cr;
+	__u64 ctr;
+	__u64 lr;
+	__u64 xer;
+	__u64 msr;
+	__u64 srr0;
+	__u64 srr1;
+	__u64 pid;
+
+	__u64 sprg0;
+	__u64 sprg1;
+	__u64 sprg2;
+	__u64 sprg3;
+	__u64 sprg4;
+	__u64 sprg5;
+	__u64 sprg6;
+	__u64 sprg7;
+
+	__u64 gpr[32];
+};
+
+#define KVM_SREGS_E_IMPL_NONE	0
+#define KVM_SREGS_E_IMPL_FSL	1
+
+#define KVM_SREGS_E_FSL_PIDn	(1 << 0) /* PID1/PID2 */
+
+/*
+ * Feature bits indicate which sections of the sregs struct are valid,
+ * both in KVM_GET_SREGS and KVM_SET_SREGS.  On KVM_SET_SREGS, registers
+ * corresponding to unset feature bits will not be modified.  This allows
+ * restoring a checkpoint made without that feature, while keeping the
+ * default values of the new registers.
+ *
+ * KVM_SREGS_E_BASE contains:
+ * CSRR0/1 (refers to SRR2/3 on 40x)
+ * ESR
+ * DEAR
+ * MCSR
+ * TSR
+ * TCR
+ * DEC
+ * TB
+ * VRSAVE (USPRG0)
+ */
+#define KVM_SREGS_E_BASE		(1 << 0)
+
+/*
+ * KVM_SREGS_E_ARCH206 contains:
+ *
+ * PIR
+ * MCSRR0/1
+ * DECAR
+ * IVPR
+ */
+#define KVM_SREGS_E_ARCH206		(1 << 1)
+
+/*
+ * Contains EPCR, plus the upper half of 64-bit registers
+ * that are 32-bit on 32-bit implementations.
+ */
+#define KVM_SREGS_E_64			(1 << 2)
+
+#define KVM_SREGS_E_SPRG8		(1 << 3)
+#define KVM_SREGS_E_MCIVPR		(1 << 4)
+
+/*
+ * IVORs are used -- contains IVOR0-15, plus additional IVORs
+ * in combination with an appropriate feature bit.
+ */
+#define KVM_SREGS_E_IVOR		(1 << 5)
+
+/*
+ * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
+ * Also TLBnPS if MMUCFG[MAVN] = 1.
+ */
+#define KVM_SREGS_E_ARCH206_MMU		(1 << 6)
+
+/* DBSR, DBCR, IAC, DAC, DVC */
+#define KVM_SREGS_E_DEBUG		(1 << 7)
+
+/* Enhanced debug -- DSRR0/1, SPRG9 */
+#define KVM_SREGS_E_ED			(1 << 8)
+
+/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_SPE			(1 << 9)
+
+/*
+ * DEPRECATED! USE ONE_REG FOR THIS ONE!
+ * External Proxy (EXP) -- EPR
+ */
+#define KVM_SREGS_EXP			(1 << 10)
+
+/* External PID (E.PD) -- EPSC/EPLC */
+#define KVM_SREGS_E_PD			(1 << 11)
+
+/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PC			(1 << 12)
+
+/* Page table (E.PT) -- EPTCFG */
+#define KVM_SREGS_E_PT			(1 << 13)
+
+/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PM			(1 << 14)
+
+/*
+ * Special updates:
+ *
+ * Some registers may change even while a vcpu is not running.
+ * To avoid losing these changes, by default these registers are
+ * not updated by KVM_SET_SREGS.  To force an update, set the bit
+ * in u.e.update_special corresponding to the register to be updated.
+ *
+ * The update_special field is zero on return from KVM_GET_SREGS.
+ *
+ * When restoring a checkpoint, the caller can set update_special
+ * to 0xffffffff to ensure that everything is restored, even new features
+ * that the caller doesn't know about.
+ */
+#define KVM_SREGS_E_UPDATE_MCSR		(1 << 0)
+#define KVM_SREGS_E_UPDATE_TSR		(1 << 1)
+#define KVM_SREGS_E_UPDATE_DEC		(1 << 2)
+#define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
+
+/*
+ * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
+ * previous KVM_GET_REGS.
+ *
+ * Unless otherwise indicated, setting any register with KVM_SET_SREGS
+ * directly sets its value.  It does not trigger any special semantics such
+ * as write-one-to-clear.  Calling KVM_SET_SREGS on an unmodified struct
+ * just received from KVM_GET_SREGS is always a no-op.
+ */
+struct kvm_sregs {
+	__u32 pvr;
+	union {
+		struct {
+			__u64 sdr1;
+			struct {
+				struct {
+					__u64 slbe;
+					__u64 slbv;
+				} slb[64];
+			} ppc64;
+			struct {
+				__u32 sr[16];
+				__u64 ibat[8];
+				__u64 dbat[8];
+			} ppc32;
+		} s;
+		struct {
+			union {
+				struct { /* KVM_SREGS_E_IMPL_FSL */
+					__u32 features; /* KVM_SREGS_E_FSL_ */
+					__u32 svr;
+					__u64 mcar;
+					__u32 hid0;
+
+					/* KVM_SREGS_E_FSL_PIDn */
+					__u32 pid1, pid2;
+				} fsl;
+				__u8 pad[256];
+			} impl;
+
+			__u32 features; /* KVM_SREGS_E_ */
+			__u32 impl_id;	/* KVM_SREGS_E_IMPL_ */
+			__u32 update_special; /* KVM_SREGS_E_UPDATE_ */
+			__u32 pir;	/* read-only */
+			__u64 sprg8;
+			__u64 sprg9;	/* E.ED */
+			__u64 csrr0;
+			__u64 dsrr0;	/* E.ED */
+			__u64 mcsrr0;
+			__u32 csrr1;
+			__u32 dsrr1;	/* E.ED */
+			__u32 mcsrr1;
+			__u32 esr;
+			__u64 dear;
+			__u64 ivpr;
+			__u64 mcivpr;
+			__u64 mcsr;	/* KVM_SREGS_E_UPDATE_MCSR */
+
+			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
+			__u32 tcr;
+			__u32 decar;
+			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
+
+			/*
+			 * Userspace can read TB directly, but the
+			 * value reported here is consistent with "dec".
+			 *
+			 * Read-only.
+			 */
+			__u64 tb;
+
+			__u32 dbsr;	/* KVM_SREGS_E_UPDATE_DBSR */
+			__u32 dbcr[3];
+			/*
+			 * iac/dac registers are 64bit wide, while this API
+			 * interface provides only lower 32 bits on 64 bit
+			 * processors. ONE_REG interface is added for 64bit
+			 * iac/dac registers.
+			 */
+			__u32 iac[4];
+			__u32 dac[2];
+			__u32 dvc[2];
+			__u8 num_iac;	/* read-only */
+			__u8 num_dac;	/* read-only */
+			__u8 num_dvc;	/* read-only */
+			__u8 pad;
+
+			__u32 epr;	/* EXP */
+			__u32 vrsave;	/* a.k.a. USPRG0 */
+			__u32 epcr;	/* KVM_SREGS_E_64 */
+
+			__u32 mas0;
+			__u32 mas1;
+			__u64 mas2;
+			__u64 mas7_3;
+			__u32 mas4;
+			__u32 mas6;
+
+			__u32 ivor_low[16]; /* IVOR0-15 */
+			__u32 ivor_high[18]; /* IVOR32+, plus room to expand */
+
+			__u32 mmucfg;	/* read-only */
+			__u32 eptcfg;	/* E.PT, read-only */
+			__u32 tlbcfg[4];/* read-only */
+			__u32 tlbps[4]; /* read-only */
+
+			__u32 eplc, epsc; /* E.PD */
+		} e;
+		__u8 pad[1020];
+	} u;
+};
+
+struct kvm_fpu {
+	__u64 fpr[32];
+};
+
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE		0x0
+#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
+struct kvm_debug_exit_arch {
+	__u64 address;
+	/*
+	 * exiting to userspace because of h/w breakpoint, watchpoint
+	 * (read, write or both) and software breakpoint.
+	 */
+	__u32 status;
+	__u32 reserved;
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+	struct {
+		/* H/W breakpoint/watchpoint address */
+		__u64 addr;
+		/*
+		 * Type denotes h/w breakpoint, read watchpoint, write
+		 * watchpoint or watchpoint (both read and write).
+		 */
+		__u32 type;
+		__u32 reserved;
+	} bp[16];
+};
+
+/* Debug related defines */
+/*
+ * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
+ * and upper 16 bits are architecture specific. Architecture specific defines
+ * that ioctl is for setting hardware breakpoint or software breakpoint.
+ */
+#define KVM_GUESTDBG_USE_SW_BP		0x00010000
+#define KVM_GUESTDBG_USE_HW_BP		0x00020000
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+#define KVM_INTERRUPT_SET	-1U
+#define KVM_INTERRUPT_UNSET	-2U
+#define KVM_INTERRUPT_SET_LEVEL	-3U
+
+#define KVM_CPU_440		1
+#define KVM_CPU_E500V2		2
+#define KVM_CPU_3S_32		3
+#define KVM_CPU_3S_64		4
+#define KVM_CPU_E500MC		5
+
+/* for KVM_CAP_SPAPR_TCE */
+struct kvm_create_spapr_tce {
+	__u64 liobn;
+	__u32 window_size;
+};
+
+/* for KVM_CAP_SPAPR_TCE_64 */
+struct kvm_create_spapr_tce_64 {
+	__u64 liobn;
+	__u32 page_shift;
+	__u32 flags;
+	__u64 offset;	/* in pages */
+	__u64 size;	/* in pages */
+};
+
+/* for KVM_ALLOCATE_RMA */
+struct kvm_allocate_rma {
+	__u64 rma_size;
+};
+
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+	char name[120];
+	__u64 token;	/* Use a token of 0 to undefine a mapping */
+};
+
+struct kvm_book3e_206_tlb_entry {
+	__u32 mas8;
+	__u32 mas1;
+	__u64 mas2;
+	__u64 mas7_3;
+};
+
+struct kvm_book3e_206_tlb_params {
+	/*
+	 * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+	 *
+	 * - The number of ways of TLB0 must be a power of two between 2 and
+	 *   16.
+	 * - TLB1 must be fully associative.
+	 * - The size of TLB0 must be a multiple of the number of ways, and
+	 *   the number of sets must be a power of two.
+	 * - The size of TLB1 may not exceed 64 entries.
+	 * - TLB0 supports 4 KiB pages.
+	 * - The page sizes supported by TLB1 are as indicated by
+	 *   TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
+	 *   as returned by KVM_GET_SREGS.
+	 * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
+	 *   and tlb_ways[] must be zero.
+	 *
+	 * tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
+	 *
+	 * KVM will adjust TLBnCFG based on the sizes configured here,
+	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+	 * set to zero.
+	 */
+	__u32 tlb_sizes[4];
+	__u32 tlb_ways[4];
+	__u32 reserved[8];
+};
+
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+	__u64	flags;
+	__u64	start_index;
+	__u64	reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY	((__u64)0x1)
+#define KVM_GET_HTAB_WRITE		((__u64)0x2)
+
+/*
+ * Data read on the file descriptor is formatted as a series of
+ * records, each consisting of a header followed by a series of
+ * `n_valid' HPTEs (16 bytes each), which are all valid.  Following
+ * those valid HPTEs there are `n_invalid' invalid HPTEs, which
+ * are not represented explicitly in the stream.  The same format
+ * is used for writing.
+ */
+struct kvm_get_htab_header {
+	__u32	index;
+	__u16	n_valid;
+	__u16	n_invalid;
+};
+
+/* Per-vcpu XICS interrupt controller state */
+#define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
+
+#define  KVM_REG_PPC_ICP_CPPR_SHIFT	56	/* current proc priority */
+#define  KVM_REG_PPC_ICP_CPPR_MASK	0xff
+#define  KVM_REG_PPC_ICP_XISR_SHIFT	32	/* interrupt status field */
+#define  KVM_REG_PPC_ICP_XISR_MASK	0xffffff
+#define  KVM_REG_PPC_ICP_MFRR_SHIFT	24	/* pending IPI priority */
+#define  KVM_REG_PPC_ICP_MFRR_MASK	0xff
+#define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
+#define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
+
+/* Device control API: PPC-specific devices */
+#define KVM_DEV_MPIC_GRP_MISC		1
+#define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
+
+#define KVM_DEV_MPIC_GRP_REGISTER	2	/* 32-bit */
+#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE	3	/* 32-bit */
+
+/* One-Reg API: PPC-specific registers */
+#define KVM_REG_PPC_HIOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
+#define KVM_REG_PPC_IAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_PPC_IAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_PPC_IAC3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_PPC_IAC4	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_PPC_DAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_PPC_DAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_PPC_DABR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_PPC_DSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9)
+#define KVM_REG_PPC_PURR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa)
+#define KVM_REG_PPC_SPURR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb)
+#define KVM_REG_PPC_DAR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc)
+#define KVM_REG_PPC_DSISR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd)
+#define KVM_REG_PPC_AMR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe)
+#define KVM_REG_PPC_UAMOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf)
+
+#define KVM_REG_PPC_MMCR0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
+#define KVM_REG_PPC_MMCR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
+#define KVM_REG_PPC_MMCRA	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
+#define KVM_REG_PPC_MMCR2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13)
+#define KVM_REG_PPC_MMCRS	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14)
+#define KVM_REG_PPC_SIAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15)
+#define KVM_REG_PPC_SDAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16)
+#define KVM_REG_PPC_SIER	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17)
+
+#define KVM_REG_PPC_PMC1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
+#define KVM_REG_PPC_PMC2	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
+#define KVM_REG_PPC_PMC3	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a)
+#define KVM_REG_PPC_PMC4	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b)
+#define KVM_REG_PPC_PMC5	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c)
+#define KVM_REG_PPC_PMC6	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d)
+#define KVM_REG_PPC_PMC7	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
+#define KVM_REG_PPC_PMC8	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
+
+/* 32 floating-point registers */
+#define KVM_REG_PPC_FPR0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20)
+#define KVM_REG_PPC_FPR(n)	(KVM_REG_PPC_FPR0 + (n))
+#define KVM_REG_PPC_FPR31	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f)
+
+/* 32 VMX/Altivec vector registers */
+#define KVM_REG_PPC_VR0		(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40)
+#define KVM_REG_PPC_VR(n)	(KVM_REG_PPC_VR0 + (n))
+#define KVM_REG_PPC_VR31	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f)
+
+/* 32 double-width FP registers for VSX */
+/* High-order halves overlap with FP regs */
+#define KVM_REG_PPC_VSR0	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60)
+#define KVM_REG_PPC_VSR(n)	(KVM_REG_PPC_VSR0 + (n))
+#define KVM_REG_PPC_VSR31	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f)
+
+/* FP and vector status/control registers */
+#define KVM_REG_PPC_FPSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
+#define KVM_REG_PPC_VSCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
+
+/* Virtual processor areas */
+/* For SLB & DTL, address in high (first) half, length in low half */
+#define KVM_REG_PPC_VPA_ADDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82)
+#define KVM_REG_PPC_VPA_SLB	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
+#define KVM_REG_PPC_VPA_DTL	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
+
+#define KVM_REG_PPC_EPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
+#define KVM_REG_PPC_EPR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
+
+/* Timer Status Register OR/CLEAR interface */
+#define KVM_REG_PPC_OR_TSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
+#define KVM_REG_PPC_CLEAR_TSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
+#define KVM_REG_PPC_TCR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
+#define KVM_REG_PPC_TSR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
+
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_PPC_DEBUG_INST	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
+
+/* MMU registers */
+#define KVM_REG_PPC_MAS0	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
+#define KVM_REG_PPC_MAS1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
+#define KVM_REG_PPC_MAS2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
+#define KVM_REG_PPC_MAS7_3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
+#define KVM_REG_PPC_MAS4	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
+#define KVM_REG_PPC_MAS6	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
+#define KVM_REG_PPC_MMUCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
+/*
+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
+ * KVM_CAP_SW_TLB ioctl
+ */
+#define KVM_REG_PPC_TLB0CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
+#define KVM_REG_PPC_TLB1CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
+#define KVM_REG_PPC_TLB2CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
+#define KVM_REG_PPC_TLB3CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+#define KVM_REG_PPC_TLB0PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
+#define KVM_REG_PPC_TLB1PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
+#define KVM_REG_PPC_TLB2PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
+#define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
+#define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
+
+/* Timebase offset */
+#define KVM_REG_PPC_TB_OFFSET	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c)
+
+/* POWER8 registers */
+#define KVM_REG_PPC_SPMC1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d)
+#define KVM_REG_PPC_SPMC2	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e)
+#define KVM_REG_PPC_IAMR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f)
+#define KVM_REG_PPC_TFHAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0)
+#define KVM_REG_PPC_TFIAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1)
+#define KVM_REG_PPC_TEXASR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2)
+#define KVM_REG_PPC_FSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3)
+#define KVM_REG_PPC_PSPB	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4)
+#define KVM_REG_PPC_EBBHR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5)
+#define KVM_REG_PPC_EBBRR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6)
+#define KVM_REG_PPC_BESCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7)
+#define KVM_REG_PPC_TAR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8)
+#define KVM_REG_PPC_DPDES	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9)
+#define KVM_REG_PPC_DAWR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa)
+#define KVM_REG_PPC_DAWRX	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab)
+#define KVM_REG_PPC_CIABR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac)
+#define KVM_REG_PPC_IC		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad)
+#define KVM_REG_PPC_VTB		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae)
+#define KVM_REG_PPC_CSIGR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf)
+#define KVM_REG_PPC_TACR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0)
+#define KVM_REG_PPC_TCSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
+#define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
+#define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
+
+#define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
+#define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
+#define KVM_REG_PPC_LPCR_64	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5)
+#define KVM_REG_PPC_PPR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
+
+/* Architecture compatibility level */
+#define KVM_REG_PPC_ARCH_COMPAT	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
+
+#define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
+#define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
+#define KVM_REG_PPC_SPRG9	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
+#define KVM_REG_PPC_DBSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
+
+/* Transactional Memory checkpointed state:
+ * This is all GPRs, all VSX regs and a subset of SPRs
+ */
+#define KVM_REG_PPC_TM		(KVM_REG_PPC | 0x80000000)
+/* TM GPRs */
+#define KVM_REG_PPC_TM_GPR0	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_PPC_TM_GPR(n)	(KVM_REG_PPC_TM_GPR0 + (n))
+#define KVM_REG_PPC_TM_GPR31	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f)
+/* TM VSX */
+#define KVM_REG_PPC_TM_VSR0	(KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20)
+#define KVM_REG_PPC_TM_VSR(n)	(KVM_REG_PPC_TM_VSR0 + (n))
+#define KVM_REG_PPC_TM_VSR63	(KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f)
+/* TM SPRS */
+#define KVM_REG_PPC_TM_CR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60)
+#define KVM_REG_PPC_TM_LR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61)
+#define KVM_REG_PPC_TM_CTR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62)
+#define KVM_REG_PPC_TM_FPSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63)
+#define KVM_REG_PPC_TM_AMR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64)
+#define KVM_REG_PPC_TM_PPR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65)
+#define KVM_REG_PPC_TM_VRSAVE	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66)
+#define KVM_REG_PPC_TM_VSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
+#define KVM_REG_PPC_TM_DSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
+#define KVM_REG_PPC_TM_TAR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+
+/* PPC64 eXternal Interrupt Controller Specification */
+#define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
+
+/* Layout of 64-bit source attribute values */
+#define  KVM_XICS_DESTINATION_SHIFT	0
+#define  KVM_XICS_DESTINATION_MASK	0xffffffffULL
+#define  KVM_XICS_PRIORITY_SHIFT	32
+#define  KVM_XICS_PRIORITY_MASK		0xff
+#define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
+#define  KVM_XICS_MASKED		(1ULL << 41)
+#define  KVM_XICS_PENDING		(1ULL << 42)
+
+#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..6a93209748a1
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,50 @@
+#ifndef _UAPI_ASM_POWERPC_PERF_REGS_H
+#define _UAPI_ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+	PERF_REG_POWERPC_R0,
+	PERF_REG_POWERPC_R1,
+	PERF_REG_POWERPC_R2,
+	PERF_REG_POWERPC_R3,
+	PERF_REG_POWERPC_R4,
+	PERF_REG_POWERPC_R5,
+	PERF_REG_POWERPC_R6,
+	PERF_REG_POWERPC_R7,
+	PERF_REG_POWERPC_R8,
+	PERF_REG_POWERPC_R9,
+	PERF_REG_POWERPC_R10,
+	PERF_REG_POWERPC_R11,
+	PERF_REG_POWERPC_R12,
+	PERF_REG_POWERPC_R13,
+	PERF_REG_POWERPC_R14,
+	PERF_REG_POWERPC_R15,
+	PERF_REG_POWERPC_R16,
+	PERF_REG_POWERPC_R17,
+	PERF_REG_POWERPC_R18,
+	PERF_REG_POWERPC_R19,
+	PERF_REG_POWERPC_R20,
+	PERF_REG_POWERPC_R21,
+	PERF_REG_POWERPC_R22,
+	PERF_REG_POWERPC_R23,
+	PERF_REG_POWERPC_R24,
+	PERF_REG_POWERPC_R25,
+	PERF_REG_POWERPC_R26,
+	PERF_REG_POWERPC_R27,
+	PERF_REG_POWERPC_R28,
+	PERF_REG_POWERPC_R29,
+	PERF_REG_POWERPC_R30,
+	PERF_REG_POWERPC_R31,
+	PERF_REG_POWERPC_NIP,
+	PERF_REG_POWERPC_MSR,
+	PERF_REG_POWERPC_ORIG_R3,
+	PERF_REG_POWERPC_CTR,
+	PERF_REG_POWERPC_LINK,
+	PERF_REG_POWERPC_XER,
+	PERF_REG_POWERPC_CCR,
+	PERF_REG_POWERPC_SOFTE,
+	PERF_REG_POWERPC_TRAP,
+	PERF_REG_POWERPC_DAR,
+	PERF_REG_POWERPC_DSISR,
+	PERF_REG_POWERPC_MAX,
+};
+#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/arch/s390/include/uapi/asm/bitsperlong.h b/tools/arch/s390/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..e351ea2ad43e
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_S390_BITSPERLONG_H
+#define __ASM_S390_BITSPERLONG_H
+
+#ifndef __s390x__
+#define __BITS_PER_LONG 32
+#else
+#define __BITS_PER_LONG 64
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_S390_BITSPERLONG_H */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..3b8e99ef9d58
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -0,0 +1,192 @@
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+/*
+ * KVM s390 specific structures and definitions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#include <linux/types.h>
+
+#define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS	1
+#define KVM_DEV_FLIC_ENQUEUE		2
+#define KVM_DEV_FLIC_CLEAR_IRQS		3
+#define KVM_DEV_FLIC_APF_ENABLE		4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT	5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER	6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY	7
+#define KVM_DEV_FLIC_CLEAR_IO_IRQ	8
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+#define KVM_S390_MAX_FLOAT_IRQS	266250
+#define KVM_S390_FLIC_MAX_BUFFER	0x2000000
+
+struct kvm_s390_io_adapter {
+	__u32 id;
+	__u8 isc;
+	__u8 maskable;
+	__u8 swap;
+	__u8 pad;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+	__u32 id;
+	__u8 type;
+	__u8 mask;
+	__u16 pad0;
+	__u64 addr;
+};
+
+/* kvm attr_group  on vm fd */
+#define KVM_S390_VM_MEM_CTRL		0
+#define KVM_S390_VM_TOD			1
+#define KVM_S390_VM_CRYPTO		2
+#define KVM_S390_VM_CPU_MODEL		3
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA	0
+#define KVM_S390_VM_MEM_CLR_CMMA	1
+#define KVM_S390_VM_MEM_LIMIT_SIZE	2
+
+#define KVM_S390_NO_MEM_LIMIT		U64_MAX
+
+/* kvm attributes for KVM_S390_VM_TOD */
+#define KVM_S390_VM_TOD_LOW		0
+#define KVM_S390_VM_TOD_HIGH		1
+
+/* kvm attributes for KVM_S390_VM_CPU_MODEL */
+/* processor related attributes are r/w */
+#define KVM_S390_VM_CPU_PROCESSOR	0
+struct kvm_s390_vm_cpu_processor {
+	__u64 cpuid;
+	__u16 ibc;
+	__u8  pad[6];
+	__u64 fac_list[256];
+};
+
+/* machine related attributes are r/o */
+#define KVM_S390_VM_CPU_MACHINE		1
+struct kvm_s390_vm_cpu_machine {
+	__u64 cpuid;
+	__u32 ibc;
+	__u8  pad[4];
+	__u64 fac_mask[256];
+	__u64 fac_list[256];
+};
+
+/* kvm attributes for crypto */
+#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
+#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
+#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
+#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+	/* general purpose regs for s390 */
+	__u64 gprs[16];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+	__u32 acrs[16];
+	__u64 crs[16];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+	__u32 fpc;
+	__u64 fprs[16];
+};
+
+#define KVM_GUESTDBG_USE_HW_BP		0x00010000
+
+#define KVM_HW_BP			1
+#define KVM_HW_WP_WRITE			2
+#define KVM_SINGLESTEP			4
+
+struct kvm_debug_exit_arch {
+	__u64 addr;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+	__u64 addr;
+	__u64 phys_addr;
+	__u64 len;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+	__u32 nr_hw_bp;
+	__u32 pad; /* Should be set to 0 */
+	struct kvm_hw_breakpoint __user *hw_bp;
+};
+
+/* for KVM_SYNC_PFAULT and KVM_REG_S390_PFTOKEN */
+#define KVM_S390_PFAULT_TOKEN_INVALID	0xffffffffffffffffULL
+
+#define KVM_SYNC_PREFIX (1UL << 0)
+#define KVM_SYNC_GPRS   (1UL << 1)
+#define KVM_SYNC_ACRS   (1UL << 2)
+#define KVM_SYNC_CRS    (1UL << 3)
+#define KVM_SYNC_ARCH0  (1UL << 4)
+#define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS    (1UL << 6)
+#define KVM_SYNC_RICCB  (1UL << 7)
+#define KVM_SYNC_FPRS   (1UL << 8)
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+	__u64 prefix;	/* prefix register */
+	__u64 gprs[16];	/* general purpose registers */
+	__u32 acrs[16];	/* access registers */
+	__u64 crs[16];	/* control registers */
+	__u64 todpr;	/* tod programmable register [ARCH0] */
+	__u64 cputm;	/* cpu timer [ARCH0] */
+	__u64 ckc;	/* clock comparator [ARCH0] */
+	__u64 pp;	/* program parameter [ARCH0] */
+	__u64 gbea;	/* guest breaking-event address [ARCH0] */
+	__u64 pft;	/* pfault token [PFAULT] */
+	__u64 pfs;	/* pfault select [PFAULT] */
+	__u64 pfc;	/* pfault compare [PFAULT] */
+	union {
+		__u64 vrs[32][2];	/* vector registers (KVM_SYNC_VRS) */
+		__u64 fprs[16];		/* fp registers (KVM_SYNC_FPRS) */
+	};
+	__u8  reserved[512];	/* for future vector expansion */
+	__u32 fpc;		/* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
+	__u8 padding[52];	/* riccb needs to be 64byte aligned */
+	__u8 riccb[64];		/* runtime instrumentation controls block */
+};
+
+#define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
+#define KVM_REG_S390_EPOCHDIFF	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_S390_CPU_TIMER  (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_S390_PP		(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_S390_GBEA	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/kvm_perf.h b/tools/arch/s390/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000000000000..397282727e21
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,25 @@
+/*
+ * Definitions for perf-kvm on s390
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_KVM_PERF_S390_H
+#define __LINUX_KVM_PERF_S390_H
+
+#include <asm/sie.h>
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
+#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
+#define KVM_EXIT_REASON "icptcode"
+
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 000000000000..8fb5d4a6dd25
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,250 @@
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#define diagnose_codes						\
+	{ 0x10, "DIAG (0x10) release pages" },			\
+	{ 0x44, "DIAG (0x44) time slice end" },			\
+	{ 0x9c, "DIAG (0x9c) time slice end directed" },	\
+	{ 0x204, "DIAG (0x204) logical-cpu utilization" },	\
+	{ 0x258, "DIAG (0x258) page-reference services" },	\
+	{ 0x288, "DIAG (0x288) watchdog functions" },		\
+	{ 0x308, "DIAG (0x308) ipl functions" },		\
+	{ 0x500, "DIAG (0x500) KVM virtio functions" },		\
+	{ 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes					\
+	{ 0x01, "SIGP sense" },					\
+	{ 0x02, "SIGP external call" },				\
+	{ 0x03, "SIGP emergency signal" },			\
+	{ 0x04, "SIGP start" },					\
+	{ 0x05, "SIGP stop" },					\
+	{ 0x06, "SIGP restart" },				\
+	{ 0x09, "SIGP stop and store status" },			\
+	{ 0x0b, "SIGP initial cpu reset" },			\
+	{ 0x0c, "SIGP cpu reset" },				\
+	{ 0x0d, "SIGP set prefix" },				\
+	{ 0x0e, "SIGP store status at address" },		\
+	{ 0x12, "SIGP set architecture" },			\
+	{ 0x13, "SIGP conditional emergency signal" },		\
+	{ 0x15, "SIGP sense running" },				\
+	{ 0x16, "SIGP set multithreading"},			\
+	{ 0x17, "SIGP store additional status ait address"}
+
+#define icpt_prog_codes						\
+	{ 0x0001, "Prog Operation" },				\
+	{ 0x0002, "Prog Privileged Operation" },		\
+	{ 0x0003, "Prog Execute" },				\
+	{ 0x0004, "Prog Protection" },				\
+	{ 0x0005, "Prog Addressing" },				\
+	{ 0x0006, "Prog Specification" },			\
+	{ 0x0007, "Prog Data" },				\
+	{ 0x0008, "Prog Fixedpoint overflow" },			\
+	{ 0x0009, "Prog Fixedpoint divide" },			\
+	{ 0x000A, "Prog Decimal overflow" },			\
+	{ 0x000B, "Prog Decimal divide" },			\
+	{ 0x000C, "Prog HFP exponent overflow" },		\
+	{ 0x000D, "Prog HFP exponent underflow" },		\
+	{ 0x000E, "Prog HFP significance" },			\
+	{ 0x000F, "Prog HFP divide" },				\
+	{ 0x0010, "Prog Segment translation" },			\
+	{ 0x0011, "Prog Page translation" },			\
+	{ 0x0012, "Prog Translation specification" },		\
+	{ 0x0013, "Prog Special operation" },			\
+	{ 0x0015, "Prog Operand" },				\
+	{ 0x0016, "Prog Trace table" },				\
+	{ 0x0017, "Prog ASNtranslation specification" },	\
+	{ 0x001C, "Prog Spaceswitch event" },			\
+	{ 0x001D, "Prog HFP square root" },			\
+	{ 0x001F, "Prog PCtranslation specification" },		\
+	{ 0x0020, "Prog AFX translation" },			\
+	{ 0x0021, "Prog ASX translation" },			\
+	{ 0x0022, "Prog LX translation" },			\
+	{ 0x0023, "Prog EX translation" },			\
+	{ 0x0024, "Prog Primary authority" },			\
+	{ 0x0025, "Prog Secondary authority" },			\
+	{ 0x0026, "Prog LFXtranslation exception" },		\
+	{ 0x0027, "Prog LSXtranslation exception" },		\
+	{ 0x0028, "Prog ALET specification" },			\
+	{ 0x0029, "Prog ALEN translation" },			\
+	{ 0x002A, "Prog ALE sequence" },			\
+	{ 0x002B, "Prog ASTE validity" },			\
+	{ 0x002C, "Prog ASTE sequence" },			\
+	{ 0x002D, "Prog Extended authority" },			\
+	{ 0x002E, "Prog LSTE sequence" },			\
+	{ 0x002F, "Prog ASTE instance" },			\
+	{ 0x0030, "Prog Stack full" },				\
+	{ 0x0031, "Prog Stack empty" },				\
+	{ 0x0032, "Prog Stack specification" },			\
+	{ 0x0033, "Prog Stack type" },				\
+	{ 0x0034, "Prog Stack operation" },			\
+	{ 0x0039, "Prog Region first translation" },		\
+	{ 0x003A, "Prog Region second translation" },		\
+	{ 0x003B, "Prog Region third translation" },		\
+	{ 0x0040, "Prog Monitor event" },			\
+	{ 0x0080, "Prog PER event" },				\
+	{ 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic)		\
+	{ (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic)			\
+	{ opcode, mnemonic }
+
+#define icpt_insn_codes				\
+	exit_code_ipa0(0x01, 0x01, "PR"),	\
+	exit_code_ipa0(0x01, 0x04, "PTFF"),	\
+	exit_code_ipa0(0x01, 0x07, "SCKPF"),	\
+	exit_code_ipa0(0xAA, 0x00, "RINEXT"),	\
+	exit_code_ipa0(0xAA, 0x01, "RION"),	\
+	exit_code_ipa0(0xAA, 0x02, "TRIC"),	\
+	exit_code_ipa0(0xAA, 0x03, "RIOFF"),	\
+	exit_code_ipa0(0xAA, 0x04, "RIEMIT"),	\
+	exit_code_ipa0(0xB2, 0x02, "STIDP"),	\
+	exit_code_ipa0(0xB2, 0x04, "SCK"),	\
+	exit_code_ipa0(0xB2, 0x05, "STCK"),	\
+	exit_code_ipa0(0xB2, 0x06, "SCKC"),	\
+	exit_code_ipa0(0xB2, 0x07, "STCKC"),	\
+	exit_code_ipa0(0xB2, 0x08, "SPT"),	\
+	exit_code_ipa0(0xB2, 0x09, "STPT"),	\
+	exit_code_ipa0(0xB2, 0x0d, "PTLB"),	\
+	exit_code_ipa0(0xB2, 0x10, "SPX"),	\
+	exit_code_ipa0(0xB2, 0x11, "STPX"),	\
+	exit_code_ipa0(0xB2, 0x12, "STAP"),	\
+	exit_code_ipa0(0xB2, 0x14, "SIE"),	\
+	exit_code_ipa0(0xB2, 0x16, "SETR"),	\
+	exit_code_ipa0(0xB2, 0x17, "STETR"),	\
+	exit_code_ipa0(0xB2, 0x18, "PC"),	\
+	exit_code_ipa0(0xB2, 0x20, "SERVC"),	\
+	exit_code_ipa0(0xB2, 0x21, "IPTE"),	\
+	exit_code_ipa0(0xB2, 0x28, "PT"),	\
+	exit_code_ipa0(0xB2, 0x29, "ISKE"),	\
+	exit_code_ipa0(0xB2, 0x2a, "RRBE"),	\
+	exit_code_ipa0(0xB2, 0x2b, "SSKE"),	\
+	exit_code_ipa0(0xB2, 0x2c, "TB"),	\
+	exit_code_ipa0(0xB2, 0x2e, "PGIN"),	\
+	exit_code_ipa0(0xB2, 0x2f, "PGOUT"),	\
+	exit_code_ipa0(0xB2, 0x30, "CSCH"),	\
+	exit_code_ipa0(0xB2, 0x31, "HSCH"),	\
+	exit_code_ipa0(0xB2, 0x32, "MSCH"),	\
+	exit_code_ipa0(0xB2, 0x33, "SSCH"),	\
+	exit_code_ipa0(0xB2, 0x34, "STSCH"),	\
+	exit_code_ipa0(0xB2, 0x35, "TSCH"),	\
+	exit_code_ipa0(0xB2, 0x36, "TPI"),	\
+	exit_code_ipa0(0xB2, 0x37, "SAL"),	\
+	exit_code_ipa0(0xB2, 0x38, "RSCH"),	\
+	exit_code_ipa0(0xB2, 0x39, "STCRW"),	\
+	exit_code_ipa0(0xB2, 0x3a, "STCPS"),	\
+	exit_code_ipa0(0xB2, 0x3b, "RCHP"),	\
+	exit_code_ipa0(0xB2, 0x3c, "SCHM"),	\
+	exit_code_ipa0(0xB2, 0x40, "BAKR"),	\
+	exit_code_ipa0(0xB2, 0x48, "PALB"),	\
+	exit_code_ipa0(0xB2, 0x4c, "TAR"),	\
+	exit_code_ipa0(0xB2, 0x50, "CSP"),	\
+	exit_code_ipa0(0xB2, 0x54, "MVPG"),	\
+	exit_code_ipa0(0xB2, 0x58, "BSG"),	\
+	exit_code_ipa0(0xB2, 0x5a, "BSA"),	\
+	exit_code_ipa0(0xB2, 0x5f, "CHSC"),	\
+	exit_code_ipa0(0xB2, 0x74, "SIGA"),	\
+	exit_code_ipa0(0xB2, 0x76, "XSCH"),	\
+	exit_code_ipa0(0xB2, 0x78, "STCKE"),	\
+	exit_code_ipa0(0xB2, 0x7c, "STCKF"),	\
+	exit_code_ipa0(0xB2, 0x7d, "STSI"),	\
+	exit_code_ipa0(0xB2, 0xb0, "STFLE"),	\
+	exit_code_ipa0(0xB2, 0xb1, "STFL"),	\
+	exit_code_ipa0(0xB2, 0xb2, "LPSWE"),	\
+	exit_code_ipa0(0xB2, 0xf8, "TEND"),	\
+	exit_code_ipa0(0xB2, 0xfc, "TABORT"),	\
+	exit_code_ipa0(0xB9, 0x1e, "KMAC"),	\
+	exit_code_ipa0(0xB9, 0x28, "PCKMO"),	\
+	exit_code_ipa0(0xB9, 0x2a, "KMF"),	\
+	exit_code_ipa0(0xB9, 0x2b, "KMO"),	\
+	exit_code_ipa0(0xB9, 0x2d, "KMCTR"),	\
+	exit_code_ipa0(0xB9, 0x2e, "KM"),	\
+	exit_code_ipa0(0xB9, 0x2f, "KMC"),	\
+	exit_code_ipa0(0xB9, 0x3e, "KIMD"),	\
+	exit_code_ipa0(0xB9, 0x3f, "KLMD"),	\
+	exit_code_ipa0(0xB9, 0x8a, "CSPG"),	\
+	exit_code_ipa0(0xB9, 0x8d, "EPSW"),	\
+	exit_code_ipa0(0xB9, 0x8e, "IDTE"),	\
+	exit_code_ipa0(0xB9, 0x8f, "CRDTE"),	\
+	exit_code_ipa0(0xB9, 0x9c, "EQBS"),	\
+	exit_code_ipa0(0xB9, 0xa2, "PTF"),	\
+	exit_code_ipa0(0xB9, 0xab, "ESSA"),	\
+	exit_code_ipa0(0xB9, 0xae, "RRBM"),	\
+	exit_code_ipa0(0xB9, 0xaf, "PFMF"),	\
+	exit_code_ipa0(0xE3, 0x03, "LRAG"),	\
+	exit_code_ipa0(0xE3, 0x13, "LRAY"),	\
+	exit_code_ipa0(0xE3, 0x25, "NTSTG"),	\
+	exit_code_ipa0(0xE5, 0x00, "LASP"),	\
+	exit_code_ipa0(0xE5, 0x01, "TPROT"),	\
+	exit_code_ipa0(0xE5, 0x60, "TBEGIN"),	\
+	exit_code_ipa0(0xE5, 0x61, "TBEGINC"),	\
+	exit_code_ipa0(0xEB, 0x25, "STCTG"),	\
+	exit_code_ipa0(0xEB, 0x2f, "LCTLG"),	\
+	exit_code_ipa0(0xEB, 0x60, "LRIC"),	\
+	exit_code_ipa0(0xEB, 0x61, "STRIC"),	\
+	exit_code_ipa0(0xEB, 0x62, "MRIC"),	\
+	exit_code_ipa0(0xEB, 0x8a, "SQBS"),	\
+	exit_code_ipa0(0xC8, 0x01, "ECTG"),	\
+	exit_code(0x0a, "SVC"),			\
+	exit_code(0x80, "SSM"),			\
+	exit_code(0x82, "LPSW"),		\
+	exit_code(0x83, "DIAG"),		\
+	exit_code(0xae, "SIGP"),		\
+	exit_code(0xac, "STNSM"),		\
+	exit_code(0xad, "STOSM"),		\
+	exit_code(0xb1, "LRA"),			\
+	exit_code(0xb6, "STCTL"),		\
+	exit_code(0xb7, "LCTL"),		\
+	exit_code(0xee, "PLO")
+
+#define sie_intercept_code					\
+	{ 0x00, "Host interruption" },				\
+	{ 0x04, "Instruction" },				\
+	{ 0x08, "Program interruption" },			\
+	{ 0x0c, "Instruction and program interruption" },	\
+	{ 0x10, "External request" },				\
+	{ 0x14, "External interruption" },			\
+	{ 0x18, "I/O request" },				\
+	{ 0x1c, "Wait state" },					\
+	{ 0x20, "Validity" },					\
+	{ 0x28, "Stop request" },				\
+	{ 0x2c, "Operation exception" },			\
+	{ 0x38, "Partial-execution" },				\
+	{ 0x3c, "I/O interruption" },				\
+	{ 0x40, "I/O instruction" },				\
+	{ 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask)		\
+	(insn >> 56) == (ipa0) ?				\
+		((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn) (		\
+	INSN_DECODE_IPA0(0x01, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)	\
+	INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xb9, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe3, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)	\
+	INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)	\
+	INSN_DECODE(insn))
+
+#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/tools/arch/score/include/uapi/asm/bitsperlong.h b/tools/arch/score/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..86ff337aa459
--- /dev/null
+++ b/tools/arch/score/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_SCORE_BITSPERLONG_H
+#define _ASM_SCORE_BITSPERLONG_H
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _ASM_SCORE_BITSPERLONG_H */
diff --git a/tools/arch/sparc/include/uapi/asm/bitsperlong.h b/tools/arch/sparc/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..b62dd907d7c3
--- /dev/null
+++ b/tools/arch/sparc/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#if defined(__sparc__) && defined(__arch64__)
+#define __BITS_PER_LONG 64
+#else
+#define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
diff --git a/tools/arch/tile/include/uapi/asm/bitsperlong.h b/tools/arch/tile/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..58c771f2af2f
--- /dev/null
+++ b/tools/arch/tile/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BITSPERLONG_H
+#define _ASM_TILE_BITSPERLONG_H
+
+#ifdef __LP64__
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _ASM_TILE_BITSPERLONG_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 000000000000..4a413485f9eb
--- /dev/null
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -0,0 +1,316 @@
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
+
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#include <asm/required-features.h>
+#endif
+
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#include <asm/disabled-features.h>
+#endif
+
+/*
+ * Defines x86 CPU feature bits
+ */
+#define NCAPINTS	18	/* N 32-bit words worth of info */
+#define NBUGINTS	1	/* N 32-bit bug flags */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name.  If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
+					  /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_ART		( 3*32+10) /* Platform has always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+#define X86_FEATURE_ACC_POWER	( 3*32+19) /* AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
+/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
+#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
+#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
+#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PTSC	( 6*32+27) /* performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+
+#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+
+#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+
+/* Virtualization flags: Linux defined, word 8 */
+#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
+
+#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
+
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ	( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
+#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW	( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL	( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+
+/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF	(13*32+1) /* Instructions Retired Count */
+
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+#define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
+
+/*
+ * BUG word(s)
+ */
+#define X86_BUG(x)		(NCAPINTS*32 + (x))
+
+#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
+
+
+#ifdef CONFIG_X86_32
+/*
+ * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
+ * to avoid confusion.
+ */
+#define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#endif
+
+#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
new file mode 100644
index 000000000000..911e9358ceb1
--- /dev/null
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -0,0 +1,60 @@
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#define _ASM_X86_DISABLED_FEATURES_H
+
+/* These features, although they might be available in a CPU
+ * will not be used because the compile options to support
+ * them are not present.
+ *
+ * This code allows them to be checked and disabled at
+ * compile time without an explicit #ifdef.  Use
+ * cpu_feature_enabled().
+ */
+
+#ifdef CONFIG_X86_INTEL_MPX
+# define DISABLE_MPX	0
+#else
+# define DISABLE_MPX	(1<<(X86_FEATURE_MPX & 31))
+#endif
+
+#ifdef CONFIG_X86_64
+# define DISABLE_VME		(1<<(X86_FEATURE_VME & 31))
+# define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
+# define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
+# define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
+#else
+# define DISABLE_VME		0
+# define DISABLE_K6_MTRR	0
+# define DISABLE_CYRIX_ARR	0
+# define DISABLE_CENTAUR_MCR	0
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+# define DISABLE_PKU		0
+# define DISABLE_OSPKE		0
+#else
+# define DISABLE_PKU		(1<<(X86_FEATURE_PKU & 31))
+# define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE & 31))
+#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
+
+/*
+ * Make sure to add features to the correct mask
+ */
+#define DISABLED_MASK0	(DISABLE_VME)
+#define DISABLED_MASK1	0
+#define DISABLED_MASK2	0
+#define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
+#define DISABLED_MASK4	0
+#define DISABLED_MASK5	0
+#define DISABLED_MASK6	0
+#define DISABLED_MASK7	0
+#define DISABLED_MASK8	0
+#define DISABLED_MASK9	(DISABLE_MPX)
+#define DISABLED_MASK10	0
+#define DISABLED_MASK11	0
+#define DISABLED_MASK12	0
+#define DISABLED_MASK13	0
+#define DISABLED_MASK14	0
+#define DISABLED_MASK15	0
+#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
+
+#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
new file mode 100644
index 000000000000..4916144e3c42
--- /dev/null
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -0,0 +1,103 @@
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#define _ASM_X86_REQUIRED_FEATURES_H
+
+/* Define minimum CPUID feature set for kernel These bits are checked
+   really early to actually display a visible error message before the
+   kernel dies.  Make sure to assign features to the proper mask!
+
+   Some requirements that are not in CPUID yet are also in the
+   CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
+
+   The real information is in arch/x86/Kconfig.cpu, this just converts
+   the CONFIGs into a bitmask */
+
+#ifndef CONFIG_MATH_EMULATION
+# define NEED_FPU	(1<<(X86_FEATURE_FPU & 31))
+#else
+# define NEED_FPU	0
+#endif
+
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+# define NEED_PAE	(1<<(X86_FEATURE_PAE & 31))
+#else
+# define NEED_PAE	0
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+# define NEED_CX8	(1<<(X86_FEATURE_CX8 & 31))
+#else
+# define NEED_CX8	0
+#endif
+
+#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
+# define NEED_CMOV	(1<<(X86_FEATURE_CMOV & 31))
+#else
+# define NEED_CMOV	0
+#endif
+
+#ifdef CONFIG_X86_USE_3DNOW
+# define NEED_3DNOW	(1<<(X86_FEATURE_3DNOW & 31))
+#else
+# define NEED_3DNOW	0
+#endif
+
+#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
+# define NEED_NOPL	(1<<(X86_FEATURE_NOPL & 31))
+#else
+# define NEED_NOPL	0
+#endif
+
+#ifdef CONFIG_MATOM
+# define NEED_MOVBE	(1<<(X86_FEATURE_MOVBE & 31))
+#else
+# define NEED_MOVBE	0
+#endif
+
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT
+/* Paravirtualized systems may not have PSE or PGE available */
+#define NEED_PSE	0
+#define NEED_PGE	0
+#else
+#define NEED_PSE	(1<<(X86_FEATURE_PSE) & 31)
+#define NEED_PGE	(1<<(X86_FEATURE_PGE) & 31)
+#endif
+#define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
+#define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
+#define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
+#define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
+#define NEED_LM		(1<<(X86_FEATURE_LM & 31))
+#else
+#define NEED_PSE	0
+#define NEED_MSR	0
+#define NEED_PGE	0
+#define NEED_FXSR	0
+#define NEED_XMM	0
+#define NEED_XMM2	0
+#define NEED_LM		0
+#endif
+
+#define REQUIRED_MASK0	(NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
+			 NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
+			 NEED_XMM|NEED_XMM2)
+#define SSE_MASK	(NEED_XMM|NEED_XMM2)
+
+#define REQUIRED_MASK1	(NEED_LM|NEED_3DNOW)
+
+#define REQUIRED_MASK2	0
+#define REQUIRED_MASK3	(NEED_NOPL)
+#define REQUIRED_MASK4	(NEED_MOVBE)
+#define REQUIRED_MASK5	0
+#define REQUIRED_MASK6	0
+#define REQUIRED_MASK7	0
+#define REQUIRED_MASK8	0
+#define REQUIRED_MASK9	0
+#define REQUIRED_MASK10	0
+#define REQUIRED_MASK11	0
+#define REQUIRED_MASK12	0
+#define REQUIRED_MASK13	0
+#define REQUIRED_MASK14	0
+#define REQUIRED_MASK15	0
+#define REQUIRED_MASK16	0
+
+#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/unistd_32.h b/tools/arch/x86/include/asm/unistd_32.h
new file mode 100644
index 000000000000..88b3f8c8920c
--- /dev/null
+++ b/tools/arch/x86/include/asm/unistd_32.h
@@ -0,0 +1,12 @@
+#ifndef __NR_perf_event_open
+# define __NR_perf_event_open 336
+#endif
+#ifndef __NR_futex
+# define __NR_futex 240
+#endif
+#ifndef __NR_gettid
+# define __NR_gettid 224
+#endif
+#ifndef __NR_getcpu
+# define __NR_getcpu 318
+#endif
diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h
new file mode 100644
index 000000000000..fbdb70ee8837
--- /dev/null
+++ b/tools/arch/x86/include/asm/unistd_64.h
@@ -0,0 +1,12 @@
+#ifndef __NR_perf_event_open
+# define __NR_perf_event_open 298
+#endif
+#ifndef __NR_futex
+# define __NR_futex 202
+#endif
+#ifndef __NR_gettid
+# define __NR_gettid 186
+#endif
+#ifndef __NR_getcpu
+# define __NR_getcpu 309
+#endif
diff --git a/tools/arch/x86/include/uapi/asm/bitsperlong.h b/tools/arch/x86/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..6e23c543cd80
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_X86_BITSPERLONG_H
+#define __ASM_X86_BITSPERLONG_H
+
+#if defined(__x86_64__) && !defined(__ILP32__)
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_X86_BITSPERLONG_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..739c0c594022
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -0,0 +1,360 @@
+#ifndef _ASM_X86_KVM_H
+#define _ASM_X86_KVM_H
+
+/*
+ * KVM x86 specific structures and definitions
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define DE_VECTOR 0
+#define DB_VECTOR 1
+#define BP_VECTOR 3
+#define OF_VECTOR 4
+#define BR_VECTOR 5
+#define UD_VECTOR 6
+#define NM_VECTOR 7
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+#define MF_VECTOR 16
+#define AC_VECTOR 17
+#define MC_VECTOR 18
+#define XM_VECTOR 19
+#define VE_VECTOR 20
+
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_PIT
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_MSI
+#define __KVM_HAVE_USER_NMI
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_MSIX
+#define __KVM_HAVE_MCE
+#define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
+#define __KVM_HAVE_VCPU_EVENTS
+#define __KVM_HAVE_DEBUGREGS
+#define __KVM_HAVE_XSAVE
+#define __KVM_HAVE_XCRS
+#define __KVM_HAVE_READONLY_MEM
+
+/* Architectural interrupt line count. */
+#define KVM_NR_INTERRUPTS 256
+
+struct kvm_memory_alias {
+	__u32 slot;  /* this has a different namespace than memory slots */
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size;
+	__u64 target_phys_addr;
+};
+
+/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
+struct kvm_pic_state {
+	__u8 last_irr;	/* edge detection */
+	__u8 irr;		/* interrupt request register */
+	__u8 imr;		/* interrupt mask register */
+	__u8 isr;		/* interrupt service register */
+	__u8 priority_add;	/* highest irq priority */
+	__u8 irq_base;
+	__u8 read_reg_select;
+	__u8 poll;
+	__u8 special_mask;
+	__u8 init_state;
+	__u8 auto_eoi;
+	__u8 rotate_on_auto_eoi;
+	__u8 special_fully_nested_mode;
+	__u8 init4;		/* true if 4 byte init */
+	__u8 elcr;		/* PIIX edge/trigger selection */
+	__u8 elcr_mask;
+};
+
+#define KVM_IOAPIC_NUM_PINS  24
+struct kvm_ioapic_state {
+	__u64 base_address;
+	__u32 ioregsel;
+	__u32 id;
+	__u32 irr;
+	__u32 pad;
+	union {
+		__u64 bits;
+		struct {
+			__u8 vector;
+			__u8 delivery_mode:3;
+			__u8 dest_mode:1;
+			__u8 delivery_status:1;
+			__u8 polarity:1;
+			__u8 remote_irr:1;
+			__u8 trig_mode:1;
+			__u8 mask:1;
+			__u8 reserve:7;
+			__u8 reserved[4];
+			__u8 dest_id;
+		} fields;
+	} redirtbl[KVM_IOAPIC_NUM_PINS];
+};
+
+#define KVM_IRQCHIP_PIC_MASTER   0
+#define KVM_IRQCHIP_PIC_SLAVE    1
+#define KVM_IRQCHIP_IOAPIC       2
+#define KVM_NR_IRQCHIPS          3
+
+#define KVM_RUN_X86_SMM		 (1 << 0)
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+	__u64 rax, rbx, rcx, rdx;
+	__u64 rsi, rdi, rsp, rbp;
+	__u64 r8,  r9,  r10, r11;
+	__u64 r12, r13, r14, r15;
+	__u64 rip, rflags;
+};
+
+/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
+#define KVM_APIC_REG_SIZE 0x400
+struct kvm_lapic_state {
+	char regs[KVM_APIC_REG_SIZE];
+};
+
+struct kvm_segment {
+	__u64 base;
+	__u32 limit;
+	__u16 selector;
+	__u8  type;
+	__u8  present, dpl, db, s, l, g, avl;
+	__u8  unusable;
+	__u8  padding;
+};
+
+struct kvm_dtable {
+	__u64 base;
+	__u16 limit;
+	__u16 padding[3];
+};
+
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+	/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+	__u8  fpr[8][16];
+	__u16 fcw;
+	__u16 fsw;
+	__u8  ftwx;  /* in fxsave format */
+	__u8  pad1;
+	__u16 last_opcode;
+	__u64 last_ip;
+	__u64 last_dp;
+	__u8  xmm[16][16];
+	__u32 mxcsr;
+	__u32 pad2;
+};
+
+struct kvm_msr_entry {
+	__u32 index;
+	__u32 reserved;
+	__u64 data;
+};
+
+/* for KVM_GET_MSRS and KVM_SET_MSRS */
+struct kvm_msrs {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 pad;
+
+	struct kvm_msr_entry entries[0];
+};
+
+/* for KVM_GET_MSR_INDEX_LIST */
+struct kvm_msr_list {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 indices[0];
+};
+
+
+struct kvm_cpuid_entry {
+	__u32 function;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding;
+};
+
+/* for KVM_SET_CPUID */
+struct kvm_cpuid {
+	__u32 nent;
+	__u32 padding;
+	struct kvm_cpuid_entry entries[0];
+};
+
+struct kvm_cpuid_entry2 {
+	__u32 function;
+	__u32 index;
+	__u32 flags;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding[3];
+};
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		(1 << 0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		(1 << 1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		(1 << 2)
+
+/* for KVM_SET_CPUID2 */
+struct kvm_cpuid2 {
+	__u32 nent;
+	__u32 padding;
+	struct kvm_cpuid_entry2 entries[0];
+};
+
+/* for KVM_GET_PIT and KVM_SET_PIT */
+struct kvm_pit_channel_state {
+	__u32 count; /* can be 65536 */
+	__u16 latched_count;
+	__u8 count_latched;
+	__u8 status_latched;
+	__u8 status;
+	__u8 read_state;
+	__u8 write_state;
+	__u8 write_latch;
+	__u8 rw_mode;
+	__u8 mode;
+	__u8 bcd;
+	__u8 gate;
+	__s64 count_load_time;
+};
+
+struct kvm_debug_exit_arch {
+	__u32 exception;
+	__u32 pad;
+	__u64 pc;
+	__u64 dr6;
+	__u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP		0x00010000
+#define KVM_GUESTDBG_USE_HW_BP		0x00020000
+#define KVM_GUESTDBG_INJECT_DB		0x00040000
+#define KVM_GUESTDBG_INJECT_BP		0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+	__u64 debugreg[8];
+};
+
+struct kvm_pit_state {
+	struct kvm_pit_channel_state channels[3];
+};
+
+#define KVM_PIT_FLAGS_HPET_LEGACY  0x00000001
+
+struct kvm_pit_state2 {
+	struct kvm_pit_channel_state channels[3];
+	__u32 flags;
+	__u32 reserved[9];
+};
+
+struct kvm_reinject_control {
+	__u8 pit_reinject;
+	__u8 reserved[31];
+};
+
+/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */
+#define KVM_VCPUEVENT_VALID_NMI_PENDING	0x00000001
+#define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
+#define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
+#define KVM_VCPUEVENT_VALID_SMM		0x00000008
+
+/* Interrupt shadow states */
+#define KVM_X86_SHADOW_INT_MOV_SS	0x01
+#define KVM_X86_SHADOW_INT_STI		0x02
+
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 has_error_code;
+		__u8 pad;
+		__u32 error_code;
+	} exception;
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 soft;
+		__u8 shadow;
+	} interrupt;
+	struct {
+		__u8 injected;
+		__u8 pending;
+		__u8 masked;
+		__u8 pad;
+	} nmi;
+	__u32 sipi_vector;
+	__u32 flags;
+	struct {
+		__u8 smm;
+		__u8 pending;
+		__u8 smm_inside_nmi;
+		__u8 latched_init;
+	} smi;
+	__u32 reserved[9];
+};
+
+/* for KVM_GET/SET_DEBUGREGS */
+struct kvm_debugregs {
+	__u64 db[4];
+	__u64 dr6;
+	__u64 dr7;
+	__u64 flags;
+	__u64 reserved[9];
+};
+
+/* for KVM_CAP_XSAVE */
+struct kvm_xsave {
+	__u32 region[1024];
+};
+
+#define KVM_MAX_XCRS	16
+
+struct kvm_xcr {
+	__u32 xcr;
+	__u32 reserved;
+	__u64 value;
+};
+
+struct kvm_xcrs {
+	__u32 nr_xcrs;
+	__u32 flags;
+	struct kvm_xcr xcrs[KVM_MAX_XCRS];
+	__u64 padding[16];
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+#define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
+
+#endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm_perf.h b/tools/arch/x86/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000000000000..3bb964f88aa1
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_X86_KVM_PERF_H
+#define _ASM_X86_KVM_PERF_H
+
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID "vcpu_id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_entry"
+#define KVM_EXIT_TRACE "kvm:kvm_exit"
+#define KVM_EXIT_REASON "exit_reason"
+
+#endif /* _ASM_X86_KVM_PERF_H */
diff --git a/tools/arch/x86/include/uapi/asm/perf_regs.h b/tools/arch/x86/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..3f2207bfd17b
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/perf_regs.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_X86_PERF_REGS_H
+#define _ASM_X86_PERF_REGS_H
+
+enum perf_event_x86_regs {
+	PERF_REG_X86_AX,
+	PERF_REG_X86_BX,
+	PERF_REG_X86_CX,
+	PERF_REG_X86_DX,
+	PERF_REG_X86_SI,
+	PERF_REG_X86_DI,
+	PERF_REG_X86_BP,
+	PERF_REG_X86_SP,
+	PERF_REG_X86_IP,
+	PERF_REG_X86_FLAGS,
+	PERF_REG_X86_CS,
+	PERF_REG_X86_SS,
+	PERF_REG_X86_DS,
+	PERF_REG_X86_ES,
+	PERF_REG_X86_FS,
+	PERF_REG_X86_GS,
+	PERF_REG_X86_R8,
+	PERF_REG_X86_R9,
+	PERF_REG_X86_R10,
+	PERF_REG_X86_R11,
+	PERF_REG_X86_R12,
+	PERF_REG_X86_R13,
+	PERF_REG_X86_R14,
+	PERF_REG_X86_R15,
+
+	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
+	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+};
+#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
new file mode 100644
index 000000000000..3725e145aa58
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -0,0 +1,178 @@
+#ifndef _UAPI__SVM_H
+#define _UAPI__SVM_H
+
+#define SVM_EXIT_READ_CR0      0x000
+#define SVM_EXIT_READ_CR2      0x002
+#define SVM_EXIT_READ_CR3      0x003
+#define SVM_EXIT_READ_CR4      0x004
+#define SVM_EXIT_READ_CR8      0x008
+#define SVM_EXIT_WRITE_CR0     0x010
+#define SVM_EXIT_WRITE_CR2     0x012
+#define SVM_EXIT_WRITE_CR3     0x013
+#define SVM_EXIT_WRITE_CR4     0x014
+#define SVM_EXIT_WRITE_CR8     0x018
+#define SVM_EXIT_READ_DR0      0x020
+#define SVM_EXIT_READ_DR1      0x021
+#define SVM_EXIT_READ_DR2      0x022
+#define SVM_EXIT_READ_DR3      0x023
+#define SVM_EXIT_READ_DR4      0x024
+#define SVM_EXIT_READ_DR5      0x025
+#define SVM_EXIT_READ_DR6      0x026
+#define SVM_EXIT_READ_DR7      0x027
+#define SVM_EXIT_WRITE_DR0     0x030
+#define SVM_EXIT_WRITE_DR1     0x031
+#define SVM_EXIT_WRITE_DR2     0x032
+#define SVM_EXIT_WRITE_DR3     0x033
+#define SVM_EXIT_WRITE_DR4     0x034
+#define SVM_EXIT_WRITE_DR5     0x035
+#define SVM_EXIT_WRITE_DR6     0x036
+#define SVM_EXIT_WRITE_DR7     0x037
+#define SVM_EXIT_EXCP_BASE     0x040
+#define SVM_EXIT_INTR          0x060
+#define SVM_EXIT_NMI           0x061
+#define SVM_EXIT_SMI           0x062
+#define SVM_EXIT_INIT          0x063
+#define SVM_EXIT_VINTR         0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ     0x066
+#define SVM_EXIT_GDTR_READ     0x067
+#define SVM_EXIT_LDTR_READ     0x068
+#define SVM_EXIT_TR_READ       0x069
+#define SVM_EXIT_IDTR_WRITE    0x06a
+#define SVM_EXIT_GDTR_WRITE    0x06b
+#define SVM_EXIT_LDTR_WRITE    0x06c
+#define SVM_EXIT_TR_WRITE      0x06d
+#define SVM_EXIT_RDTSC         0x06e
+#define SVM_EXIT_RDPMC         0x06f
+#define SVM_EXIT_PUSHF         0x070
+#define SVM_EXIT_POPF          0x071
+#define SVM_EXIT_CPUID         0x072
+#define SVM_EXIT_RSM           0x073
+#define SVM_EXIT_IRET          0x074
+#define SVM_EXIT_SWINT         0x075
+#define SVM_EXIT_INVD          0x076
+#define SVM_EXIT_PAUSE         0x077
+#define SVM_EXIT_HLT           0x078
+#define SVM_EXIT_INVLPG        0x079
+#define SVM_EXIT_INVLPGA       0x07a
+#define SVM_EXIT_IOIO          0x07b
+#define SVM_EXIT_MSR           0x07c
+#define SVM_EXIT_TASK_SWITCH   0x07d
+#define SVM_EXIT_FERR_FREEZE   0x07e
+#define SVM_EXIT_SHUTDOWN      0x07f
+#define SVM_EXIT_VMRUN         0x080
+#define SVM_EXIT_VMMCALL       0x081
+#define SVM_EXIT_VMLOAD        0x082
+#define SVM_EXIT_VMSAVE        0x083
+#define SVM_EXIT_STGI          0x084
+#define SVM_EXIT_CLGI          0x085
+#define SVM_EXIT_SKINIT        0x086
+#define SVM_EXIT_RDTSCP        0x087
+#define SVM_EXIT_ICEBP         0x088
+#define SVM_EXIT_WBINVD        0x089
+#define SVM_EXIT_MONITOR       0x08a
+#define SVM_EXIT_MWAIT         0x08b
+#define SVM_EXIT_MWAIT_COND    0x08c
+#define SVM_EXIT_XSETBV        0x08d
+#define SVM_EXIT_NPF           0x400
+#define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
+#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
+
+#define SVM_EXIT_ERR           -1
+
+#define SVM_EXIT_REASONS \
+	{ SVM_EXIT_READ_CR0,    "read_cr0" }, \
+	{ SVM_EXIT_READ_CR2,    "read_cr2" }, \
+	{ SVM_EXIT_READ_CR3,    "read_cr3" }, \
+	{ SVM_EXIT_READ_CR4,    "read_cr4" }, \
+	{ SVM_EXIT_READ_CR8,    "read_cr8" }, \
+	{ SVM_EXIT_WRITE_CR0,   "write_cr0" }, \
+	{ SVM_EXIT_WRITE_CR2,   "write_cr2" }, \
+	{ SVM_EXIT_WRITE_CR3,   "write_cr3" }, \
+	{ SVM_EXIT_WRITE_CR4,   "write_cr4" }, \
+	{ SVM_EXIT_WRITE_CR8,   "write_cr8" }, \
+	{ SVM_EXIT_READ_DR0,    "read_dr0" }, \
+	{ SVM_EXIT_READ_DR1,    "read_dr1" }, \
+	{ SVM_EXIT_READ_DR2,    "read_dr2" }, \
+	{ SVM_EXIT_READ_DR3,    "read_dr3" }, \
+	{ SVM_EXIT_READ_DR4,    "read_dr4" }, \
+	{ SVM_EXIT_READ_DR5,    "read_dr5" }, \
+	{ SVM_EXIT_READ_DR6,    "read_dr6" }, \
+	{ SVM_EXIT_READ_DR7,    "read_dr7" }, \
+	{ SVM_EXIT_WRITE_DR0,   "write_dr0" }, \
+	{ SVM_EXIT_WRITE_DR1,   "write_dr1" }, \
+	{ SVM_EXIT_WRITE_DR2,   "write_dr2" }, \
+	{ SVM_EXIT_WRITE_DR3,   "write_dr3" }, \
+	{ SVM_EXIT_WRITE_DR4,   "write_dr4" }, \
+	{ SVM_EXIT_WRITE_DR5,   "write_dr5" }, \
+	{ SVM_EXIT_WRITE_DR6,   "write_dr6" }, \
+	{ SVM_EXIT_WRITE_DR7,   "write_dr7" }, \
+	{ SVM_EXIT_EXCP_BASE + DE_VECTOR,       "DE excp" }, \
+	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,       "DB excp" }, \
+	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,       "BP excp" }, \
+	{ SVM_EXIT_EXCP_BASE + OF_VECTOR,       "OF excp" }, \
+	{ SVM_EXIT_EXCP_BASE + BR_VECTOR,       "BR excp" }, \
+	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,       "UD excp" }, \
+	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,       "NM excp" }, \
+	{ SVM_EXIT_EXCP_BASE + DF_VECTOR,       "DF excp" }, \
+	{ SVM_EXIT_EXCP_BASE + TS_VECTOR,       "TS excp" }, \
+	{ SVM_EXIT_EXCP_BASE + NP_VECTOR,       "NP excp" }, \
+	{ SVM_EXIT_EXCP_BASE + SS_VECTOR,       "SS excp" }, \
+	{ SVM_EXIT_EXCP_BASE + GP_VECTOR,       "GP excp" }, \
+	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,       "PF excp" }, \
+	{ SVM_EXIT_EXCP_BASE + MF_VECTOR,       "MF excp" }, \
+	{ SVM_EXIT_EXCP_BASE + AC_VECTOR,       "AC excp" }, \
+	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,       "MC excp" }, \
+	{ SVM_EXIT_EXCP_BASE + XM_VECTOR,       "XF excp" }, \
+	{ SVM_EXIT_INTR,        "interrupt" }, \
+	{ SVM_EXIT_NMI,         "nmi" }, \
+	{ SVM_EXIT_SMI,         "smi" }, \
+	{ SVM_EXIT_INIT,        "init" }, \
+	{ SVM_EXIT_VINTR,       "vintr" }, \
+	{ SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
+	{ SVM_EXIT_IDTR_READ,   "read_idtr" }, \
+	{ SVM_EXIT_GDTR_READ,   "read_gdtr" }, \
+	{ SVM_EXIT_LDTR_READ,   "read_ldtr" }, \
+	{ SVM_EXIT_TR_READ,     "read_rt" }, \
+	{ SVM_EXIT_IDTR_WRITE,  "write_idtr" }, \
+	{ SVM_EXIT_GDTR_WRITE,  "write_gdtr" }, \
+	{ SVM_EXIT_LDTR_WRITE,  "write_ldtr" }, \
+	{ SVM_EXIT_TR_WRITE,    "write_rt" }, \
+	{ SVM_EXIT_RDTSC,       "rdtsc" }, \
+	{ SVM_EXIT_RDPMC,       "rdpmc" }, \
+	{ SVM_EXIT_PUSHF,       "pushf" }, \
+	{ SVM_EXIT_POPF,        "popf" }, \
+	{ SVM_EXIT_CPUID,       "cpuid" }, \
+	{ SVM_EXIT_RSM,         "rsm" }, \
+	{ SVM_EXIT_IRET,        "iret" }, \
+	{ SVM_EXIT_SWINT,       "swint" }, \
+	{ SVM_EXIT_INVD,        "invd" }, \
+	{ SVM_EXIT_PAUSE,       "pause" }, \
+	{ SVM_EXIT_HLT,         "hlt" }, \
+	{ SVM_EXIT_INVLPG,      "invlpg" }, \
+	{ SVM_EXIT_INVLPGA,     "invlpga" }, \
+	{ SVM_EXIT_IOIO,        "io" }, \
+	{ SVM_EXIT_MSR,         "msr" }, \
+	{ SVM_EXIT_TASK_SWITCH, "task_switch" }, \
+	{ SVM_EXIT_FERR_FREEZE, "ferr_freeze" }, \
+	{ SVM_EXIT_SHUTDOWN,    "shutdown" }, \
+	{ SVM_EXIT_VMRUN,       "vmrun" }, \
+	{ SVM_EXIT_VMMCALL,     "hypercall" }, \
+	{ SVM_EXIT_VMLOAD,      "vmload" }, \
+	{ SVM_EXIT_VMSAVE,      "vmsave" }, \
+	{ SVM_EXIT_STGI,        "stgi" }, \
+	{ SVM_EXIT_CLGI,        "clgi" }, \
+	{ SVM_EXIT_SKINIT,      "skinit" }, \
+	{ SVM_EXIT_RDTSCP,      "rdtscp" }, \
+	{ SVM_EXIT_ICEBP,       "icebp" }, \
+	{ SVM_EXIT_WBINVD,      "wbinvd" }, \
+	{ SVM_EXIT_MONITOR,     "monitor" }, \
+	{ SVM_EXIT_MWAIT,       "mwait" }, \
+	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
+	{ SVM_EXIT_NPF,         "npf" }, \
+	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
+	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
+	{ SVM_EXIT_ERR,         "invalid_guest_state" }
+
+
+#endif /* _UAPI__SVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
new file mode 100644
index 000000000000..5b15d94a33f8
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -0,0 +1,136 @@
+/*
+ * vmx.h: VMX Architecture related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * A few random additions are:
+ * Copyright (C) 2006 Qumranet
+ *    Avi Kivity <avi@qumranet.com>
+ *    Yaniv Kamay <yaniv@qumranet.com>
+ *
+ */
+#ifndef _UAPIVMX_H
+#define _UAPIVMX_H
+
+
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT        2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW          8
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVD                13
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_INVALID_STATE       33
+#define EXIT_REASON_MSR_LOAD_FAIL       34
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_TRAP_FLAG   37
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
+#define EXIT_REASON_MCE_DURING_VMENTRY  41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EOI_INDUCED         45
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_INVEPT              50
+#define EXIT_REASON_RDTSCP              51
+#define EXIT_REASON_PREEMPTION_TIMER    52
+#define EXIT_REASON_INVVPID             53
+#define EXIT_REASON_WBINVD              54
+#define EXIT_REASON_XSETBV              55
+#define EXIT_REASON_APIC_WRITE          56
+#define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_PML_FULL            62
+#define EXIT_REASON_XSAVES              63
+#define EXIT_REASON_XRSTORS             64
+#define EXIT_REASON_PCOMMIT             65
+
+#define VMX_EXIT_REASONS \
+	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
+	{ EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
+	{ EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
+	{ EXIT_REASON_PENDING_INTERRUPT,     "PENDING_INTERRUPT" }, \
+	{ EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
+	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
+	{ EXIT_REASON_CPUID,                 "CPUID" }, \
+	{ EXIT_REASON_HLT,                   "HLT" }, \
+	{ EXIT_REASON_INVLPG,                "INVLPG" }, \
+	{ EXIT_REASON_RDPMC,                 "RDPMC" }, \
+	{ EXIT_REASON_RDTSC,                 "RDTSC" }, \
+	{ EXIT_REASON_VMCALL,                "VMCALL" }, \
+	{ EXIT_REASON_VMCLEAR,               "VMCLEAR" }, \
+	{ EXIT_REASON_VMLAUNCH,              "VMLAUNCH" }, \
+	{ EXIT_REASON_VMPTRLD,               "VMPTRLD" }, \
+	{ EXIT_REASON_VMPTRST,               "VMPTRST" }, \
+	{ EXIT_REASON_VMREAD,                "VMREAD" }, \
+	{ EXIT_REASON_VMRESUME,              "VMRESUME" }, \
+	{ EXIT_REASON_VMWRITE,               "VMWRITE" }, \
+	{ EXIT_REASON_VMOFF,                 "VMOFF" }, \
+	{ EXIT_REASON_VMON,                  "VMON" }, \
+	{ EXIT_REASON_CR_ACCESS,             "CR_ACCESS" }, \
+	{ EXIT_REASON_DR_ACCESS,             "DR_ACCESS" }, \
+	{ EXIT_REASON_IO_INSTRUCTION,        "IO_INSTRUCTION" }, \
+	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
+	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
+	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
+	{ EXIT_REASON_MONITOR_TRAP_FLAG,     "MONITOR_TRAP_FLAG" }, \
+	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
+	{ EXIT_REASON_PAUSE_INSTRUCTION,     "PAUSE_INSTRUCTION" }, \
+	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
+	{ EXIT_REASON_TPR_BELOW_THRESHOLD,   "TPR_BELOW_THRESHOLD" }, \
+	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
+	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
+	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
+	{ EXIT_REASON_INVEPT,                "INVEPT" }, \
+	{ EXIT_REASON_PREEMPTION_TIMER,      "PREEMPTION_TIMER" }, \
+	{ EXIT_REASON_WBINVD,                "WBINVD" }, \
+	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
+	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
+	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
+	{ EXIT_REASON_MSR_LOAD_FAIL,         "MSR_LOAD_FAIL" }, \
+	{ EXIT_REASON_INVD,                  "INVD" }, \
+	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
+	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
+	{ EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+
+#define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
+#define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
+
+#endif /* _UAPIVMX_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
new file mode 100644
index 000000000000..2ec0b0abbfaa
--- /dev/null
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -0,0 +1,297 @@
+/* Copyright 2002 Andi Kleen */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+
+/*
+ * We build a jump to memcpy_orig by default which gets NOPped out on
+ * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
+ * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
+ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
+ */
+
+.weak memcpy
+
+/*
+ * memcpy - Copy a memory block.
+ *
+ * Input:
+ *  rdi destination
+ *  rsi source
+ *  rdx count
+ *
+ * Output:
+ * rax original destination
+ */
+ENTRY(__memcpy)
+ENTRY(memcpy)
+	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+		      "jmp memcpy_erms", X86_FEATURE_ERMS
+
+	movq %rdi, %rax
+	movq %rdx, %rcx
+	shrq $3, %rcx
+	andl $7, %edx
+	rep movsq
+	movl %edx, %ecx
+	rep movsb
+	ret
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
+
+/*
+ * memcpy_erms() - enhanced fast string memcpy. This is faster and
+ * simpler than memcpy. Use memcpy_erms when possible.
+ */
+ENTRY(memcpy_erms)
+	movq %rdi, %rax
+	movq %rdx, %rcx
+	rep movsb
+	ret
+ENDPROC(memcpy_erms)
+
+ENTRY(memcpy_orig)
+	movq %rdi, %rax
+
+	cmpq $0x20, %rdx
+	jb .Lhandle_tail
+
+	/*
+	 * We check whether memory false dependence could occur,
+	 * then jump to corresponding copy mode.
+	 */
+	cmp  %dil, %sil
+	jl .Lcopy_backward
+	subq $0x20, %rdx
+.Lcopy_forward_loop:
+	subq $0x20,	%rdx
+
+	/*
+	 * Move in blocks of 4x8 bytes:
+	 */
+	movq 0*8(%rsi),	%r8
+	movq 1*8(%rsi),	%r9
+	movq 2*8(%rsi),	%r10
+	movq 3*8(%rsi),	%r11
+	leaq 4*8(%rsi),	%rsi
+
+	movq %r8,	0*8(%rdi)
+	movq %r9,	1*8(%rdi)
+	movq %r10,	2*8(%rdi)
+	movq %r11,	3*8(%rdi)
+	leaq 4*8(%rdi),	%rdi
+	jae  .Lcopy_forward_loop
+	addl $0x20,	%edx
+	jmp  .Lhandle_tail
+
+.Lcopy_backward:
+	/*
+	 * Calculate copy position to tail.
+	 */
+	addq %rdx,	%rsi
+	addq %rdx,	%rdi
+	subq $0x20,	%rdx
+	/*
+	 * At most 3 ALU operations in one cycle,
+	 * so append NOPS in the same 16 bytes trunk.
+	 */
+	.p2align 4
+.Lcopy_backward_loop:
+	subq $0x20,	%rdx
+	movq -1*8(%rsi),	%r8
+	movq -2*8(%rsi),	%r9
+	movq -3*8(%rsi),	%r10
+	movq -4*8(%rsi),	%r11
+	leaq -4*8(%rsi),	%rsi
+	movq %r8,		-1*8(%rdi)
+	movq %r9,		-2*8(%rdi)
+	movq %r10,		-3*8(%rdi)
+	movq %r11,		-4*8(%rdi)
+	leaq -4*8(%rdi),	%rdi
+	jae  .Lcopy_backward_loop
+
+	/*
+	 * Calculate copy position to head.
+	 */
+	addl $0x20,	%edx
+	subq %rdx,	%rsi
+	subq %rdx,	%rdi
+.Lhandle_tail:
+	cmpl $16,	%edx
+	jb   .Lless_16bytes
+
+	/*
+	 * Move data from 16 bytes to 31 bytes.
+	 */
+	movq 0*8(%rsi), %r8
+	movq 1*8(%rsi),	%r9
+	movq -2*8(%rsi, %rdx),	%r10
+	movq -1*8(%rsi, %rdx),	%r11
+	movq %r8,	0*8(%rdi)
+	movq %r9,	1*8(%rdi)
+	movq %r10,	-2*8(%rdi, %rdx)
+	movq %r11,	-1*8(%rdi, %rdx)
+	retq
+	.p2align 4
+.Lless_16bytes:
+	cmpl $8,	%edx
+	jb   .Lless_8bytes
+	/*
+	 * Move data from 8 bytes to 15 bytes.
+	 */
+	movq 0*8(%rsi),	%r8
+	movq -1*8(%rsi, %rdx),	%r9
+	movq %r8,	0*8(%rdi)
+	movq %r9,	-1*8(%rdi, %rdx)
+	retq
+	.p2align 4
+.Lless_8bytes:
+	cmpl $4,	%edx
+	jb   .Lless_3bytes
+
+	/*
+	 * Move data from 4 bytes to 7 bytes.
+	 */
+	movl (%rsi), %ecx
+	movl -4(%rsi, %rdx), %r8d
+	movl %ecx, (%rdi)
+	movl %r8d, -4(%rdi, %rdx)
+	retq
+	.p2align 4
+.Lless_3bytes:
+	subl $1, %edx
+	jb .Lend
+	/*
+	 * Move data from 1 bytes to 3 bytes.
+	 */
+	movzbl (%rsi), %ecx
+	jz .Lstore_1byte
+	movzbq 1(%rsi), %r8
+	movzbq (%rsi, %rdx), %r9
+	movb %r8b, 1(%rdi)
+	movb %r9b, (%rdi, %rdx)
+.Lstore_1byte:
+	movb %cl, (%rdi)
+
+.Lend:
+	retq
+ENDPROC(memcpy_orig)
+
+#ifndef CONFIG_UML
+/*
+ * memcpy_mcsafe - memory copy with machine check exception handling
+ * Note that we only catch machine checks when reading the source addresses.
+ * Writes to target are posted and don't generate machine checks.
+ */
+ENTRY(memcpy_mcsafe)
+	cmpl $8, %edx
+	/* Less than 8 bytes? Go to byte copy loop */
+	jb .L_no_whole_words
+
+	/* Check for bad alignment of source */
+	testl $7, %esi
+	/* Already aligned */
+	jz .L_8byte_aligned
+
+	/* Copy one byte at a time until source is 8-byte aligned */
+	movl %esi, %ecx
+	andl $7, %ecx
+	subl $8, %ecx
+	negl %ecx
+	subl %ecx, %edx
+.L_copy_leading_bytes:
+	movb (%rsi), %al
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_copy_leading_bytes
+
+.L_8byte_aligned:
+	/* Figure out how many whole cache lines (64-bytes) to copy */
+	movl %edx, %ecx
+	andl $63, %edx
+	shrl $6, %ecx
+	jz .L_no_whole_cache_lines
+
+	/* Loop copying whole cache lines */
+.L_cache_w0: movq (%rsi), %r8
+.L_cache_w1: movq 1*8(%rsi), %r9
+.L_cache_w2: movq 2*8(%rsi), %r10
+.L_cache_w3: movq 3*8(%rsi), %r11
+	movq %r8, (%rdi)
+	movq %r9, 1*8(%rdi)
+	movq %r10, 2*8(%rdi)
+	movq %r11, 3*8(%rdi)
+.L_cache_w4: movq 4*8(%rsi), %r8
+.L_cache_w5: movq 5*8(%rsi), %r9
+.L_cache_w6: movq 6*8(%rsi), %r10
+.L_cache_w7: movq 7*8(%rsi), %r11
+	movq %r8, 4*8(%rdi)
+	movq %r9, 5*8(%rdi)
+	movq %r10, 6*8(%rdi)
+	movq %r11, 7*8(%rdi)
+	leaq 64(%rsi), %rsi
+	leaq 64(%rdi), %rdi
+	decl %ecx
+	jnz .L_cache_w0
+
+	/* Are there any trailing 8-byte words? */
+.L_no_whole_cache_lines:
+	movl %edx, %ecx
+	andl $7, %edx
+	shrl $3, %ecx
+	jz .L_no_whole_words
+
+	/* Copy trailing words */
+.L_copy_trailing_words:
+	movq (%rsi), %r8
+	mov %r8, (%rdi)
+	leaq 8(%rsi), %rsi
+	leaq 8(%rdi), %rdi
+	decl %ecx
+	jnz .L_copy_trailing_words
+
+	/* Any trailing bytes? */
+.L_no_whole_words:
+	andl %edx, %edx
+	jz .L_done_memcpy_trap
+
+	/* Copy trailing bytes */
+	movl %edx, %ecx
+.L_copy_trailing_bytes:
+	movb (%rsi), %al
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_copy_trailing_bytes
+
+	/* Copy successful. Return zero */
+.L_done_memcpy_trap:
+	xorq %rax, %rax
+	ret
+ENDPROC(memcpy_mcsafe)
+
+	.section .fixup, "ax"
+	/* Return -EFAULT for any failure */
+.L_memcpy_mcsafe_fail:
+	mov	$-EFAULT, %rax
+	ret
+
+	.previous
+
+	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
+#endif
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
new file mode 100644
index 000000000000..e1229ecd2a82
--- /dev/null
+++ b/tools/arch/x86/lib/memset_64.S
@@ -0,0 +1,138 @@
+/* Copyright 2002 Andi Kleen, SuSE Labs */
+
+#include <linux/linkage.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+
+.weak memset
+
+/*
+ * ISO C memset - set a memory block to a byte value. This function uses fast
+ * string to get better performance than the original function. The code is
+ * simpler and shorter than the original function as well.
+ *
+ * rdi   destination
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
+ * rax   original destination
+ */
+ENTRY(memset)
+ENTRY(__memset)
+	/*
+	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
+	 * to use it when possible. If not available, use fast string instructions.
+	 *
+	 * Otherwise, use original memset function.
+	 */
+	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
+		      "jmp memset_erms", X86_FEATURE_ERMS
+
+	movq %rdi,%r9
+	movq %rdx,%rcx
+	andl $7,%edx
+	shrq $3,%rcx
+	/* expand byte value  */
+	movzbl %sil,%esi
+	movabs $0x0101010101010101,%rax
+	imulq %rsi,%rax
+	rep stosq
+	movl %edx,%ecx
+	rep stosb
+	movq %r9,%rax
+	ret
+ENDPROC(memset)
+ENDPROC(__memset)
+
+/*
+ * ISO C memset - set a memory block to a byte value. This function uses
+ * enhanced rep stosb to override the fast string function.
+ * The code is simpler and shorter than the fast string function as well.
+ *
+ * rdi   destination
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
+ * rax   original destination
+ */
+ENTRY(memset_erms)
+	movq %rdi,%r9
+	movb %sil,%al
+	movq %rdx,%rcx
+	rep stosb
+	movq %r9,%rax
+	ret
+ENDPROC(memset_erms)
+
+ENTRY(memset_orig)
+	movq %rdi,%r10
+
+	/* expand byte value  */
+	movzbl %sil,%ecx
+	movabs $0x0101010101010101,%rax
+	imulq  %rcx,%rax
+
+	/* align dst */
+	movl  %edi,%r9d
+	andl  $7,%r9d
+	jnz  .Lbad_alignment
+.Lafter_bad_alignment:
+
+	movq  %rdx,%rcx
+	shrq  $6,%rcx
+	jz	 .Lhandle_tail
+
+	.p2align 4
+.Lloop_64:
+	decq  %rcx
+	movq  %rax,(%rdi)
+	movq  %rax,8(%rdi)
+	movq  %rax,16(%rdi)
+	movq  %rax,24(%rdi)
+	movq  %rax,32(%rdi)
+	movq  %rax,40(%rdi)
+	movq  %rax,48(%rdi)
+	movq  %rax,56(%rdi)
+	leaq  64(%rdi),%rdi
+	jnz    .Lloop_64
+
+	/* Handle tail in loops. The loops should be faster than hard
+	   to predict jump tables. */
+	.p2align 4
+.Lhandle_tail:
+	movl	%edx,%ecx
+	andl    $63&(~7),%ecx
+	jz 		.Lhandle_7
+	shrl	$3,%ecx
+	.p2align 4
+.Lloop_8:
+	decl   %ecx
+	movq  %rax,(%rdi)
+	leaq  8(%rdi),%rdi
+	jnz    .Lloop_8
+
+.Lhandle_7:
+	andl	$7,%edx
+	jz      .Lende
+	.p2align 4
+.Lloop_1:
+	decl    %edx
+	movb 	%al,(%rdi)
+	leaq	1(%rdi),%rdi
+	jnz     .Lloop_1
+
+.Lende:
+	movq	%r10,%rax
+	ret
+
+.Lbad_alignment:
+	cmpq $7,%rdx
+	jbe	.Lhandle_7
+	movq %rax,(%rdi)	/* unaligned store */
+	movq $8,%r8
+	subq %r9,%r8
+	addq %r8,%rdi
+	subq %r8,%rdx
+	jmp .Lafter_bad_alignment
+.Lfinal:
+ENDPROC(memset_orig)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 57c8f98874e8..a120c6b755a9 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -40,6 +40,8 @@ FEATURE_TESTS_BASIC :=			\
 	libbfd				\
 	libelf				\
 	libelf-getphdrnum		\
+	libelf-gelf_getnote		\
+	libelf-getshdrstrndx		\
 	libelf-mmap			\
 	libnuma				\
 	numa_num_possible_cpus		\
@@ -60,7 +62,8 @@ FEATURE_TESTS_BASIC :=			\
 	zlib				\
 	lzma				\
 	get_cpuid			\
-	bpf
+	bpf				\
+	sdt
 
 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
 # of all feature tests
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 3d88f09e188b..a0b29a311816 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -17,6 +17,8 @@ FILES=					\
 	test-cplus-demangle.bin		\
 	test-libelf.bin			\
 	test-libelf-getphdrnum.bin	\
+	test-libelf-gelf_getnote.bin	\
+	test-libelf-getshdrstrndx.bin	\
 	test-libelf-mmap.bin		\
 	test-libnuma.bin		\
 	test-numa_num_possible_cpus.bin	\
@@ -43,7 +45,8 @@ FILES=					\
 	test-zlib.bin			\
 	test-lzma.bin			\
 	test-bpf.bin			\
-	test-get_cpuid.bin
+	test-get_cpuid.bin		\
+	test-sdt.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -98,6 +101,12 @@ $(OUTPUT)test-libelf-mmap.bin:
 $(OUTPUT)test-libelf-getphdrnum.bin:
 	$(BUILD) -lelf
 
+$(OUTPUT)test-libelf-gelf_getnote.bin:
+	$(BUILD) -lelf
+
+$(OUTPUT)test-libelf-getshdrstrndx.bin:
+	$(BUILD) -lelf
+
 $(OUTPUT)test-libnuma.bin:
 	$(BUILD) -lnuma
 
@@ -205,6 +214,9 @@ $(OUTPUT)test-get_cpuid.bin:
 $(OUTPUT)test-bpf.bin:
 	$(BUILD)
 
+$(OUTPUT)test-sdt.bin:
+	$(BUILD)
+
 -include $(OUTPUT)*.d
 
 ###############################
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index a282e8cb84f3..699e43627397 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -49,6 +49,14 @@
 # include "test-libelf-getphdrnum.c"
 #undef main
 
+#define main main_test_libelf_gelf_getnote
+# include "test-libelf-gelf_getnote.c"
+#undef main
+
+#define main main_test_libelf_getshdrstrndx
+# include "test-libelf-getshdrstrndx.c"
+#undef main
+
 #define main main_test_libunwind
 # include "test-libunwind.c"
 #undef main
@@ -137,6 +145,10 @@
 # include "test-libcrypto.c"
 #undef main
 
+#define main main_test_sdt
+# include "test-sdt.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
 	main_test_libpython();
@@ -149,6 +161,8 @@ int main(int argc, char *argv[])
 	main_test_dwarf();
 	main_test_dwarf_getlocations();
 	main_test_libelf_getphdrnum();
+	main_test_libelf_gelf_getnote();
+	main_test_libelf_getshdrstrndx();
 	main_test_libunwind();
 	main_test_libaudit();
 	main_test_libslang();
@@ -168,6 +182,7 @@ int main(int argc, char *argv[])
 	main_test_get_cpuid();
 	main_test_bpf();
 	main_test_libcrypto();
+	main_test_sdt();
 
 	return 0;
 }
diff --git a/tools/build/feature/test-libelf-gelf_getnote.c b/tools/build/feature/test-libelf-gelf_getnote.c
new file mode 100644
index 000000000000..d78cf4d5271f
--- /dev/null
+++ b/tools/build/feature/test-libelf-gelf_getnote.c
@@ -0,0 +1,7 @@
+#include <stdlib.h>
+#include <gelf.h>
+
+int main(void)
+{
+	return gelf_getnote(NULL, 0, NULL, NULL, NULL);
+}
diff --git a/tools/build/feature/test-libelf-getshdrstrndx.c b/tools/build/feature/test-libelf-getshdrstrndx.c
new file mode 100644
index 000000000000..f0c3b47cce28
--- /dev/null
+++ b/tools/build/feature/test-libelf-getshdrstrndx.c
@@ -0,0 +1,8 @@
+#include <libelf.h>
+
+int main(void)
+{
+	size_t dst;
+
+	return elf_getshdrstrndx(0, &dst);
+}
diff --git a/tools/build/feature/test-sdt.c b/tools/build/feature/test-sdt.c
new file mode 100644
index 000000000000..e4531a6e80ea
--- /dev/null
+++ b/tools/build/feature/test-sdt.c
@@ -0,0 +1,7 @@
+#include <sys/sdt.h>
+
+int main(void)
+{
+	DTRACE_PROBE(provider, name);
+	return 0;
+}
diff --git a/tools/gpio/Build b/tools/gpio/Build
new file mode 100644
index 000000000000..620c1937d957
--- /dev/null
+++ b/tools/gpio/Build
@@ -0,0 +1,3 @@
+lsgpio-y += lsgpio.o gpio-utils.o
+gpio-hammer-y += gpio-hammer.o gpio-utils.o
+gpio-event-mon-y += gpio-event-mon.o gpio-utils.o
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index c155d6bc47a7..250a891e6ef0 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -1,12 +1,75 @@
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
 CC = $(CROSS_COMPILE)gcc
-CFLAGS += -O2 -Wall -g -D_GNU_SOURCE
+LD = $(CROSS_COMPILE)ld
+CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
 
-all: lsgpio
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
 
-lsgpio: lsgpio.o gpio-utils.o
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h
+	mkdir -p $(OUTPUT)include/linux 2>&1 || true
+	ln -sf $(CURDIR)/../../include/uapi/linux/gpio.h $@
 
-%.o: %.c gpio-utils.h
+prepare: $(OUTPUT)include/linux/gpio.h
+
+#
+# lsgpio
+#
+LSGPIO_IN := $(OUTPUT)lsgpio-in.o
+$(LSGPIO_IN): prepare FORCE
+	$(Q)$(MAKE) $(build)=lsgpio
+$(OUTPUT)lsgpio: $(LSGPIO_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+#
+# gpio-hammer
+#
+GPIO_HAMMER_IN := $(OUTPUT)gpio-hammer-in.o
+$(GPIO_HAMMER_IN): prepare FORCE
+	$(Q)$(MAKE) $(build)=gpio-hammer
+$(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+#
+# gpio-event-mon
+#
+GPIO_EVENT_MON_IN := $(OUTPUT)gpio-event-mon-in.o
+$(GPIO_EVENT_MON_IN): prepare FORCE
+	$(Q)$(MAKE) $(build)=gpio-event-mon
+$(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
 
-.PHONY: clean
 clean:
-	rm -f *.o lsgpio
+	rm -f $(ALL_PROGRAMS)
+	rm -f $(OUTPUT)include/linux/gpio.h
+	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+
+install: $(ALL_PROGRAMS)
+	install -d -m 755 $(DESTDIR)$(bindir);		\
+	for program in $(ALL_PROGRAMS); do		\
+		install $$program $(DESTDIR)$(bindir);	\
+	done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
new file mode 100644
index 000000000000..448ed96b3b4f
--- /dev/null
+++ b/tools/gpio/gpio-event-mon.c
@@ -0,0 +1,192 @@
+/*
+ * gpio-hammer - example swiss army knife to shake GPIO lines on a system
+ *
+ * Copyright (C) 2016 Linus Walleij
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * Usage:
+ *	gpio-event-mon -n <device-name> -o <offset>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+
+int monitor_device(const char *device_name,
+		   unsigned int line,
+		   u_int32_t handleflags,
+		   u_int32_t eventflags,
+		   unsigned int loops)
+{
+	struct gpioevent_request req;
+	struct gpiohandle_data data;
+	char *chrdev_name;
+	int fd;
+	int ret;
+	int i = 0;
+
+	ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+	if (ret < 0)
+		return -ENOMEM;
+
+	fd = open(chrdev_name, 0);
+	if (fd == -1) {
+		ret = -errno;
+		fprintf(stderr, "Failed to open %s\n", chrdev_name);
+		goto exit_close_error;
+	}
+
+	req.lineoffset = line;
+	req.handleflags = handleflags;
+	req.eventflags = eventflags;
+	strcpy(req.consumer_label, "gpio-event-mon");
+
+	ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req);
+	if (ret == -1) {
+		ret = -errno;
+		fprintf(stderr, "Failed to issue GET EVENT "
+			"IOCTL (%d)\n",
+			ret);
+		goto exit_close_error;
+	}
+
+	/* Read initial states */
+	ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
+	if (ret == -1) {
+		ret = -errno;
+		fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
+			"VALUES IOCTL (%d)\n",
+			ret);
+		goto exit_close_error;
+	}
+
+	fprintf(stdout, "Monitoring line %d on %s\n", line, device_name);
+	fprintf(stdout, "Initial line value: %d\n", data.values[0]);
+
+	while (1) {
+		struct gpioevent_data event;
+
+		ret = read(req.fd, &event, sizeof(event));
+		if (ret == -1) {
+			if (errno == -EAGAIN) {
+				fprintf(stderr, "nothing available\n");
+				continue;
+			} else {
+				ret = -errno;
+				fprintf(stderr, "Failed to read event (%d)\n",
+					ret);
+				break;
+			}
+		}
+
+		if (ret != sizeof(event)) {
+			fprintf(stderr, "Reading event failed\n");
+			ret = -EIO;
+			break;
+		}
+		fprintf(stdout, "GPIO EVENT %" PRIu64 ": ", event.timestamp);
+		switch (event.id) {
+		case GPIOEVENT_EVENT_RISING_EDGE:
+			fprintf(stdout, "rising edge");
+			break;
+		case GPIOEVENT_EVENT_FALLING_EDGE:
+			fprintf(stdout, "falling edge");
+			break;
+		default:
+			fprintf(stdout, "unknown event");
+		}
+		fprintf(stdout, "\n");
+
+		i++;
+		if (i == loops)
+			break;
+	}
+
+exit_close_error:
+	if (close(fd) == -1)
+		perror("Failed to close GPIO character device file");
+	free(chrdev_name);
+	return ret;
+}
+
+void print_usage(void)
+{
+	fprintf(stderr, "Usage: gpio-event-mon [options]...\n"
+		"Listen to events on GPIO lines, 0->1 1->0\n"
+		"  -n <name>  Listen on GPIOs on a named device (must be stated)\n"
+		"  -o <n>     Offset to monitor\n"
+		"  -d         Set line as open drain\n"
+		"  -s         Set line as open source\n"
+		"  -r         Listen for rising edges\n"
+		"  -f         Listen for falling edges\n"
+		" [-c <n>]    Do <n> loops (optional, infinite loop if not stated)\n"
+		"  -?         This helptext\n"
+		"\n"
+		"Example:\n"
+		"gpio-event-mon -n gpiochip0 -o 4 -r -f\n"
+	);
+}
+
+int main(int argc, char **argv)
+{
+	const char *device_name = NULL;
+	unsigned int line = -1;
+	unsigned int loops = 0;
+	u_int32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
+	u_int32_t eventflags = 0;
+	int c;
+
+	while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) {
+		switch (c) {
+		case 'c':
+			loops = strtoul(optarg, NULL, 10);
+			break;
+		case 'n':
+			device_name = optarg;
+			break;
+		case 'o':
+			line = strtoul(optarg, NULL, 10);
+			break;
+		case 'd':
+			handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN;
+			break;
+		case 's':
+			handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE;
+			break;
+		case 'r':
+			eventflags |= GPIOEVENT_REQUEST_RISING_EDGE;
+			break;
+		case 'f':
+			eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE;
+			break;
+		case '?':
+			print_usage();
+			return -1;
+		}
+	}
+
+	if (!device_name || line == -1) {
+		print_usage();
+		return -1;
+	}
+	if (!eventflags) {
+		printf("No flags specified, listening on both rising and "
+		       "falling edges\n");
+		eventflags = GPIOEVENT_REQUEST_BOTH_EDGES;
+	}
+	return monitor_device(device_name, line, handleflags,
+			      eventflags, loops);
+}
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
new file mode 100644
index 000000000000..37b3f141053d
--- /dev/null
+++ b/tools/gpio/gpio-hammer.c
@@ -0,0 +1,189 @@
+/*
+ * gpio-hammer - example swiss army knife to shake GPIO lines on a system
+ *
+ * Copyright (C) 2016 Linus Walleij
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * Usage:
+ *	gpio-hammer -n <device-name> -o <offset1> -o <offset2>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+
+int hammer_device(const char *device_name, unsigned int *lines, int nlines,
+		  unsigned int loops)
+{
+	struct gpiohandle_request req;
+	struct gpiohandle_data data;
+	char *chrdev_name;
+	char swirr[] = "-\\|/";
+	int fd;
+	int ret;
+	int i, j;
+	unsigned int iteration = 0;
+
+	ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+	if (ret < 0)
+		return -ENOMEM;
+
+	fd = open(chrdev_name, 0);
+	if (fd == -1) {
+		ret = -errno;
+		fprintf(stderr, "Failed to open %s\n", chrdev_name);
+		goto exit_close_error;
+	}
+
+	/* Request lines as output */
+	for (i = 0; i < nlines; i++)
+		req.lineoffsets[i] = lines[i];
+	req.flags = GPIOHANDLE_REQUEST_OUTPUT; /* Request as output */
+	strcpy(req.consumer_label, "gpio-hammer");
+	req.lines = nlines;
+	ret = ioctl(fd, GPIO_GET_LINEHANDLE_IOCTL, &req);
+	if (ret == -1) {
+		ret = -errno;
+		fprintf(stderr, "Failed to issue GET LINEHANDLE "
+			"IOCTL (%d)\n",
+			ret);
+		goto exit_close_error;
+	}
+
+	/* Read initial states */
+	ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
+	if (ret == -1) {
+		ret = -errno;
+		fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
+			"VALUES IOCTL (%d)\n",
+			ret);
+		goto exit_close_error;
+	}
+	fprintf(stdout, "Hammer lines [");
+	for (i = 0; i < nlines; i++) {
+		fprintf(stdout, "%d", lines[i]);
+		if (i != (nlines - 1))
+			fprintf(stdout, ", ");
+	}
+	fprintf(stdout, "] on %s, initial states: [", device_name);
+	for (i = 0; i < nlines; i++) {
+		fprintf(stdout, "%d", data.values[i]);
+		if (i != (nlines - 1))
+			fprintf(stdout, ", ");
+	}
+	fprintf(stdout, "]\n");
+
+	/* Hammertime! */
+	j = 0;
+	while (1) {
+		/* Invert all lines so we blink */
+		for (i = 0; i < nlines; i++)
+			data.values[i] = !data.values[i];
+
+		ret = ioctl(req.fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, &data);
+		if (ret == -1) {
+			ret = -errno;
+			fprintf(stderr, "Failed to issue GPIOHANDLE SET LINE "
+				"VALUES IOCTL (%d)\n",
+				ret);
+			goto exit_close_error;
+		}
+		/* Re-read values to get status */
+		ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
+		if (ret == -1) {
+			ret = -errno;
+			fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
+				"VALUES IOCTL (%d)\n",
+				ret);
+			goto exit_close_error;
+		}
+
+		fprintf(stdout, "[%c] ", swirr[j]);
+		j++;
+		if (j == sizeof(swirr)-1)
+			j = 0;
+
+		fprintf(stdout, "[");
+		for (i = 0; i < nlines; i++) {
+			fprintf(stdout, "%d: %d", lines[i], data.values[i]);
+			if (i != (nlines - 1))
+				fprintf(stdout, ", ");
+		}
+		fprintf(stdout, "]\r");
+		fflush(stdout);
+		sleep(1);
+		iteration++;
+		if (loops && iteration == loops)
+			break;
+	}
+	fprintf(stdout, "\n");
+	ret = 0;
+
+exit_close_error:
+	if (close(fd) == -1)
+		perror("Failed to close GPIO character device file");
+	free(chrdev_name);
+	return ret;
+}
+
+void print_usage(void)
+{
+	fprintf(stderr, "Usage: gpio-hammer [options]...\n"
+		"Hammer GPIO lines, 0->1->0->1...\n"
+		"  -n <name>  Hammer GPIOs on a named device (must be stated)\n"
+		"  -o <n>     Offset[s] to hammer, at least one, several can be stated\n"
+		" [-c <n>]    Do <n> loops (optional, infinite loop if not stated)\n"
+		"  -?         This helptext\n"
+		"\n"
+		"Example:\n"
+		"gpio-hammer -n gpiochip0 -o 4\n"
+	);
+}
+
+int main(int argc, char **argv)
+{
+	const char *device_name = NULL;
+	unsigned int lines[GPIOHANDLES_MAX];
+	unsigned int loops = 0;
+	int nlines;
+	int c;
+	int i;
+
+	i = 0;
+	while ((c = getopt(argc, argv, "c:n:o:?")) != -1) {
+		switch (c) {
+		case 'c':
+			loops = strtoul(optarg, NULL, 10);
+			break;
+		case 'n':
+			device_name = optarg;
+			break;
+		case 'o':
+			lines[i] = strtoul(optarg, NULL, 10);
+			i++;
+			break;
+		case '?':
+			print_usage();
+			return -1;
+		}
+	}
+	nlines = i;
+
+	if (!device_name || !nlines) {
+		print_usage();
+		return -1;
+	}
+	return hammer_device(device_name, lines, nlines, loops);
+}
diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
new file mode 100755
index 000000000000..8e960234013d
--- /dev/null
+++ b/tools/hv/bondvf.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+
+# This example script creates bonding network devices based on synthetic NIC
+# (the virtual network adapter usually provided by Hyper-V) and the matching
+# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
+# function as one network device, and fail over to the synthetic NIC if VF is
+# down.
+#
+# Usage:
+# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
+#   machine (VM)
+# - Run this scripts on the VM. It will create configuration files in
+#   distro specific directory.
+# - Reboot the VM, so that the bonding config are enabled.
+#
+# The config files are DHCP by default. You may edit them if you need to change
+# to Static IP or change other settings.
+#
+
+sysdir=/sys/class/net
+netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
+bondcnt=0
+
+# Detect Distro
+if [ -f /etc/redhat-release ];
+then
+	cfgdir=/etc/sysconfig/network-scripts
+	distro=redhat
+elif grep -q 'Ubuntu' /etc/issue
+then
+	cfgdir=/etc/network
+	distro=ubuntu
+elif grep -q 'SUSE' /etc/issue
+then
+	cfgdir=/etc/sysconfig/network
+	distro=suse
+else
+	echo "Unsupported Distro"
+	exit 1
+fi
+
+echo Detected Distro: $distro, or compatible
+
+# Get a list of ethernet names
+list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
+eth_cnt=${#list_eth[@]}
+
+echo List of net devices:
+
+# Get the MAC addresses
+for (( i=0; i < $eth_cnt; i++ ))
+do
+	list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
+	echo ${list_eth[$i]}, ${list_mac[$i]}
+done
+
+# Find NIC with matching MAC
+for (( i=0; i < $eth_cnt-1; i++ ))
+do
+	for (( j=i+1; j < $eth_cnt; j++ ))
+	do
+		if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
+		then
+			list_match[$i]=${list_eth[$j]}
+			break
+		fi
+	done
+done
+
+function create_eth_cfg_redhat {
+	local fn=$cfgdir/ifcfg-$1
+
+	rm -f $fn
+	echo DEVICE=$1 >>$fn
+	echo TYPE=Ethernet >>$fn
+	echo BOOTPROTO=none >>$fn
+	echo ONBOOT=yes >>$fn
+	echo NM_CONTROLLED=no >>$fn
+	echo PEERDNS=yes >>$fn
+	echo IPV6INIT=yes >>$fn
+	echo MASTER=$2 >>$fn
+	echo SLAVE=yes >>$fn
+}
+
+function create_eth_cfg_pri_redhat {
+	create_eth_cfg_redhat $1 $2
+}
+
+function create_bond_cfg_redhat {
+	local fn=$cfgdir/ifcfg-$1
+
+	rm -f $fn
+	echo DEVICE=$1 >>$fn
+	echo TYPE=Bond >>$fn
+	echo BOOTPROTO=dhcp >>$fn
+	echo ONBOOT=yes >>$fn
+	echo NM_CONTROLLED=no >>$fn
+	echo PEERDNS=yes >>$fn
+	echo IPV6INIT=yes >>$fn
+	echo BONDING_MASTER=yes >>$fn
+	echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
+}
+
+function create_eth_cfg_ubuntu {
+	local fn=$cfgdir/interfaces
+
+	echo $'\n'auto $1 >>$fn
+	echo iface $1 inet manual >>$fn
+	echo bond-master $2 >>$fn
+}
+
+function create_eth_cfg_pri_ubuntu {
+	local fn=$cfgdir/interfaces
+
+	create_eth_cfg_ubuntu $1 $2
+	echo bond-primary $1 >>$fn
+}
+
+function create_bond_cfg_ubuntu {
+	local fn=$cfgdir/interfaces
+
+	echo $'\n'auto $1 >>$fn
+	echo iface $1 inet dhcp >>$fn
+	echo bond-mode active-backup >>$fn
+	echo bond-miimon 100 >>$fn
+	echo bond-slaves none >>$fn
+}
+
+function create_eth_cfg_suse {
+        local fn=$cfgdir/ifcfg-$1
+
+        rm -f $fn
+	echo BOOTPROTO=none >>$fn
+	echo STARTMODE=auto >>$fn
+}
+
+function create_eth_cfg_pri_suse {
+	create_eth_cfg_suse $1
+}
+
+function create_bond_cfg_suse {
+	local fn=$cfgdir/ifcfg-$1
+
+	rm -f $fn
+	echo BOOTPROTO=dhcp >>$fn
+	echo STARTMODE=auto >>$fn
+	echo BONDING_MASTER=yes >>$fn
+	echo BONDING_SLAVE_0=$2 >>$fn
+	echo BONDING_SLAVE_1=$3 >>$fn
+	echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn
+}
+
+function create_bond {
+	local bondname=bond$bondcnt
+	local primary
+	local secondary
+
+	local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null`
+	local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null`
+
+	if [ "$class_id1" = "$netvsc_cls" ]
+	then
+		primary=$2
+		secondary=$1
+	elif [ "$class_id2" = "$netvsc_cls" ]
+	then
+		primary=$1
+		secondary=$2
+	else
+		return 0
+	fi
+
+	echo $'\nBond name:' $bondname
+
+	echo configuring $primary
+	create_eth_cfg_pri_$distro $primary $bondname
+
+	echo configuring $secondary
+	create_eth_cfg_$distro $secondary $bondname
+
+	echo creating: $bondname with primary slave: $primary
+	create_bond_cfg_$distro $bondname $primary $secondary
+
+	let bondcnt=bondcnt+1
+}
+
+for (( i=0; i < $eth_cnt-1; i++ ))
+do
+        if [ -n "${list_match[$i]}" ]
+        then
+		create_bond ${list_eth[$i]} ${list_match[$i]}
+        fi
+done
diff --git a/tools/iio/Makefile b/tools/iio/Makefile
index 3a7a54f59713..5446d625e17d 100644
--- a/tools/iio/Makefile
+++ b/tools/iio/Makefile
@@ -1,16 +1,31 @@
 CC = $(CROSS_COMPILE)gcc
 CFLAGS += -Wall -g -D_GNU_SOURCE
 
-all: iio_event_monitor lsiio generic_buffer
+BINDIR=usr/bin
+INSTALL_PROGRAM=install -m 755 -p
+DEL_FILE=rm -f
+
+all: iio_event_monitor lsiio iio_generic_buffer
 
 iio_event_monitor: iio_event_monitor.o iio_utils.o
 
 lsiio: lsiio.o iio_utils.o
 
-generic_buffer: generic_buffer.o iio_utils.o
+iio_generic_buffer: iio_generic_buffer.o iio_utils.o
 
 %.o: %.c iio_utils.h
 
+install:
+	- mkdir -p $(INSTALL_ROOT)/$(BINDIR)
+	- $(INSTALL_PROGRAM) "iio_event_monitor" "$(INSTALL_ROOT)/$(BINDIR)/iio_event_monitor"
+	- $(INSTALL_PROGRAM) "lsiio" "$(INSTALL_ROOT)/$(BINDIR)/lsiio"
+	- $(INSTALL_PROGRAM) "iio_generic_buffer" "$(INSTALL_ROOT)/$(BINDIR)/iio_generic_buffer"
+
+uninstall:
+	$(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/iio_event_monitor"
+	$(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/lsiio"
+	$(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/iio_generic_buffer"
+
 .PHONY: clean
 clean:
-	rm -f *.o iio_event_monitor lsiio generic_buffer
+	rm -f *.o iio_event_monitor lsiio iio_generic_buffer
diff --git a/tools/iio/generic_buffer.c b/tools/iio/iio_generic_buffer.c
similarity index 76%
rename from tools/iio/generic_buffer.c
rename to tools/iio/iio_generic_buffer.c
index 2429c78de940..0e8a1f7a292d 100644
--- a/tools/iio/generic_buffer.c
+++ b/tools/iio/iio_generic_buffer.c
@@ -32,6 +32,8 @@
 #include <endian.h>
 #include <getopt.h>
 #include <inttypes.h>
+#include <stdbool.h>
+#include <signal.h>
 #include "iio_utils.h"
 
 /**
@@ -249,11 +251,82 @@ void print_usage(void)
 		"  -e         Disable wait for event (new data)\n"
 		"  -g         Use trigger-less mode\n"
 		"  -l <n>     Set buffer length to n samples\n"
-		"  -n <name>  Set device name (mandatory)\n"
-		"  -t <name>  Set trigger name\n"
+		"  --device-name -n <name>\n"
+		"  --device-num -N <num>\n"
+		"        Set device by name or number (mandatory)\n"
+		"  --trigger-name -t <name>\n"
+		"  --trigger-num -T <num>\n"
+		"        Set trigger by name or number\n"
 		"  -w <n>     Set delay between reads in us (event-less mode)\n");
 }
 
+enum autochan autochannels = AUTOCHANNELS_DISABLED;
+char *dev_dir_name = NULL;
+char *buf_dir_name = NULL;
+bool current_trigger_set = false;
+
+void cleanup(void)
+{
+	int ret;
+
+	/* Disable trigger */
+	if (dev_dir_name && current_trigger_set) {
+		/* Disconnect the trigger - just write a dummy name. */
+		ret = write_sysfs_string("trigger/current_trigger",
+					 dev_dir_name, "NULL");
+		if (ret < 0)
+			fprintf(stderr, "Failed to disable trigger: %s\n",
+				strerror(-ret));
+		current_trigger_set = false;
+	}
+
+	/* Disable buffer */
+	if (buf_dir_name) {
+		ret = write_sysfs_int("enable", buf_dir_name, 0);
+		if (ret < 0)
+			fprintf(stderr, "Failed to disable buffer: %s\n",
+				strerror(-ret));
+	}
+
+	/* Disable channels if auto-enabled */
+	if (dev_dir_name && autochannels == AUTOCHANNELS_ACTIVE) {
+		ret = enable_disable_all_channels(dev_dir_name, 0);
+		if (ret)
+			fprintf(stderr, "Failed to disable all channels\n");
+		autochannels = AUTOCHANNELS_DISABLED;
+	}
+}
+
+void sig_handler(int signum)
+{
+	fprintf(stderr, "Caught signal %d\n", signum);
+	cleanup();
+	exit(-signum);
+}
+
+void register_cleanup(void)
+{
+	struct sigaction sa = { .sa_handler = sig_handler };
+	const int signums[] = { SIGINT, SIGTERM, SIGABRT };
+	int ret, i;
+
+	for (i = 0; i < ARRAY_SIZE(signums); ++i) {
+		ret = sigaction(signums[i], &sa, NULL);
+		if (ret) {
+			perror("Failed to register signal handler");
+			exit(-1);
+		}
+	}
+}
+
+static const struct option longopts[] = {
+	{ "device-name",	1, 0, 'n' },
+	{ "device-num",		1, 0, 'N' },
+	{ "trigger-name",	1, 0, 't' },
+	{ "trigger-num",	1, 0, 'T' },
+	{ },
+};
+
 int main(int argc, char **argv)
 {
 	unsigned long num_loops = 2;
@@ -261,26 +334,25 @@ int main(int argc, char **argv)
 	unsigned long buf_len = 128;
 
 	int ret, c, i, j, toread;
-	int fp;
+	int fp = -1;
 
-	int num_channels;
+	int num_channels = 0;
 	char *trigger_name = NULL, *device_name = NULL;
-	char *dev_dir_name, *buf_dir_name;
 
-	int datardytrigger = 1;
-	char *data;
+	char *data = NULL;
 	ssize_t read_size;
-	int dev_num, trig_num;
-	char *buffer_access;
+	int dev_num = -1, trig_num = -1;
+	char *buffer_access = NULL;
 	int scan_size;
 	int noevents = 0;
 	int notrigger = 0;
-	enum autochan autochannels = AUTOCHANNELS_DISABLED;
 	char *dummy;
 
 	struct iio_channel_info *channels;
 
-	while ((c = getopt(argc, argv, "ac:egl:n:t:w:")) != -1) {
+	register_cleanup();
+
+	while ((c = getopt_long(argc, argv, "ac:egl:n:N:t:T:w:", longopts, NULL)) != -1) {
 		switch (c) {
 		case 'a':
 			autochannels = AUTOCHANNELS_ENABLED;
@@ -288,8 +360,10 @@ int main(int argc, char **argv)
 		case 'c':
 			errno = 0;
 			num_loops = strtoul(optarg, &dummy, 10);
-			if (errno)
-				return -errno;
+			if (errno) {
+				ret = -errno;
+				goto error;
+			}
 
 			break;
 		case 'e':
@@ -301,49 +375,102 @@ int main(int argc, char **argv)
 		case 'l':
 			errno = 0;
 			buf_len = strtoul(optarg, &dummy, 10);
-			if (errno)
-				return -errno;
+			if (errno) {
+				ret = -errno;
+				goto error;
+			}
 
 			break;
 		case 'n':
-			device_name = optarg;
+			device_name = strdup(optarg);
+			break;
+		case 'N':
+			errno = 0;
+			dev_num = strtoul(optarg, &dummy, 10);
+			if (errno) {
+				ret = -errno;
+				goto error;
+			}
 			break;
 		case 't':
-			trigger_name = optarg;
-			datardytrigger = 0;
+			trigger_name = strdup(optarg);
 			break;
-		case 'w':
+		case 'T':
 			errno = 0;
-			timedelay = strtoul(optarg, &dummy, 10);
+			trig_num = strtoul(optarg, &dummy, 10);
 			if (errno)
 				return -errno;
 			break;
+		case 'w':
+			errno = 0;
+			timedelay = strtoul(optarg, &dummy, 10);
+			if (errno) {
+				ret = -errno;
+				goto error;
+			}
+			break;
 		case '?':
 			print_usage();
-			return -1;
+			ret = -1;
+			goto error;
 		}
 	}
 
-	if (!device_name) {
-		fprintf(stderr, "Device name not set\n");
-		print_usage();
-		return -1;
-	}
-
 	/* Find the device requested */
-	dev_num = find_type_by_name(device_name, "iio:device");
-	if (dev_num < 0) {
-		fprintf(stderr, "Failed to find the %s\n", device_name);
-		return dev_num;
+	if (dev_num < 0 && !device_name) {
+		fprintf(stderr, "Device not set\n");
+		print_usage();
+		ret = -1;
+		goto error;
+	} else if (dev_num >= 0 && device_name) {
+		fprintf(stderr, "Only one of --device-num or --device-name needs to be set\n");
+		print_usage();
+		ret = -1;
+		goto error;
+	} else if (dev_num < 0) {
+		dev_num = find_type_by_name(device_name, "iio:device");
+		if (dev_num < 0) {
+			fprintf(stderr, "Failed to find the %s\n", device_name);
+			ret = dev_num;
+			goto error;
+		}
 	}
-
 	printf("iio device number being used is %d\n", dev_num);
 
 	ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num);
 	if (ret < 0)
 		return -ENOMEM;
+	/* Fetch device_name if specified by number */
+	if (!device_name) {
+		device_name = malloc(IIO_MAX_NAME_LENGTH);
+		if (!device_name) {
+			ret = -ENOMEM;
+			goto error;
+		}
+		ret = read_sysfs_string("name", dev_dir_name, device_name);
+		if (ret < 0) {
+			fprintf(stderr, "Failed to read name of device %d\n", dev_num);
+			goto error;
+		}
+	}
 
-	if (!notrigger) {
+	if (notrigger) {
+		printf("trigger-less mode selected\n");
+	} if (trig_num >= 0) {
+		char *trig_dev_name;
+		ret = asprintf(&trig_dev_name, "%strigger%d", iio_dir, trig_num);
+		if (ret < 0) {
+			return -ENOMEM;
+		}
+		trigger_name = malloc(IIO_MAX_NAME_LENGTH);
+		ret = read_sysfs_string("name", trig_dev_name, trigger_name);
+		free(trig_dev_name);
+		if (ret < 0) {
+			fprintf(stderr, "Failed to read trigger%d name from\n", trig_num);
+			return ret;
+		}
+		printf("iio trigger number being used is %d\n", trig_num);
+	} else {
 		if (!trigger_name) {
 			/*
 			 * Build the trigger name. If it is device associated
@@ -354,7 +481,7 @@ int main(int argc, char **argv)
 				       "%s-dev%d", device_name, dev_num);
 			if (ret < 0) {
 				ret = -ENOMEM;
-				goto error_free_dev_dir_name;
+				goto error;
 			}
 		}
 
@@ -367,7 +494,7 @@ int main(int argc, char **argv)
 				       "%s-trigger", device_name);
 			if (ret < 0) {
 				ret = -ENOMEM;
-				goto error_free_dev_dir_name;
+				goto error;
 			}
 		}
 
@@ -376,12 +503,10 @@ int main(int argc, char **argv)
 			fprintf(stderr, "Failed to find the trigger %s\n",
 				trigger_name);
 			ret = trig_num;
-			goto error_free_triggername;
+			goto error;
 		}
 
 		printf("iio trigger number being used is %d\n", trig_num);
-	} else {
-		printf("trigger-less mode selected\n");
 	}
 
 	/*
@@ -392,7 +517,7 @@ int main(int argc, char **argv)
 	if (ret) {
 		fprintf(stderr, "Problem reading scan element information\n"
 			"diag %s\n", dev_dir_name);
-		goto error_free_triggername;
+		goto error;
 	}
 	if (num_channels && autochannels == AUTOCHANNELS_ENABLED) {
 		fprintf(stderr, "Auto-channels selected but some channels "
@@ -407,7 +532,7 @@ int main(int argc, char **argv)
 		ret = enable_disable_all_channels(dev_dir_name, 1);
 		if (ret) {
 			fprintf(stderr, "Failed to enable all channels\n");
-			goto error_free_triggername;
+			goto error;
 		}
 
 		/* This flags that we need to disable the channels again */
@@ -419,12 +544,12 @@ int main(int argc, char **argv)
 			fprintf(stderr, "Problem reading scan element "
 				"information\n"
 				"diag %s\n", dev_dir_name);
-			goto error_disable_channels;
+			goto error;
 		}
 		if (!num_channels) {
 			fprintf(stderr, "Still no channels after "
 				"auto-enabling, giving up\n");
-			goto error_disable_channels;
+			goto error;
 		}
 	}
 
@@ -436,7 +561,7 @@ int main(int argc, char **argv)
 			"/*_en or pass -a to autoenable channels and "
 			"try again.\n", dev_dir_name);
 		ret = -ENOENT;
-		goto error_free_triggername;
+		goto error;
 	}
 
 	/*
@@ -448,7 +573,7 @@ int main(int argc, char **argv)
 		       "%siio:device%d/buffer", iio_dir, dev_num);
 	if (ret < 0) {
 		ret = -ENOMEM;
-		goto error_free_channels;
+		goto error;
 	}
 
 	if (!notrigger) {
@@ -463,34 +588,34 @@ int main(int argc, char **argv)
 		if (ret < 0) {
 			fprintf(stderr,
 				"Failed to write current_trigger file\n");
-			goto error_free_buf_dir_name;
+			goto error;
 		}
 	}
 
 	/* Setup ring buffer parameters */
 	ret = write_sysfs_int("length", buf_dir_name, buf_len);
 	if (ret < 0)
-		goto error_free_buf_dir_name;
+		goto error;
 
 	/* Enable the buffer */
 	ret = write_sysfs_int("enable", buf_dir_name, 1);
 	if (ret < 0) {
 		fprintf(stderr,
 			"Failed to enable buffer: %s\n", strerror(-ret));
-		goto error_free_buf_dir_name;
+		goto error;
 	}
 
 	scan_size = size_from_channelarray(channels, num_channels);
 	data = malloc(scan_size * buf_len);
 	if (!data) {
 		ret = -ENOMEM;
-		goto error_free_buf_dir_name;
+		goto error;
 	}
 
 	ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num);
 	if (ret < 0) {
 		ret = -ENOMEM;
-		goto error_free_data;
+		goto error;
 	}
 
 	/* Attempt to open non blocking the access dev */
@@ -498,7 +623,7 @@ int main(int argc, char **argv)
 	if (fp == -1) { /* TODO: If it isn't there make the node */
 		ret = -errno;
 		fprintf(stderr, "Failed to open %s\n", buffer_access);
-		goto error_free_buffer_access;
+		goto error;
 	}
 
 	for (j = 0; j < num_loops; j++) {
@@ -511,7 +636,7 @@ int main(int argc, char **argv)
 			ret = poll(&pfd, 1, -1);
 			if (ret < 0) {
 				ret = -errno;
-				goto error_close_buffer_access;
+				goto error;
 			} else if (ret == 0) {
 				continue;
 			}
@@ -536,45 +661,21 @@ int main(int argc, char **argv)
 				     num_channels);
 	}
 
-	/* Stop the buffer */
-	ret = write_sysfs_int("enable", buf_dir_name, 0);
-	if (ret < 0)
-		goto error_close_buffer_access;
+error:
+	cleanup();
 
-	if (!notrigger)
-		/* Disconnect the trigger - just write a dummy name. */
-		ret = write_sysfs_string("trigger/current_trigger",
-					 dev_dir_name, "NULL");
-		if (ret < 0)
-			fprintf(stderr, "Failed to write to %s\n",
-				dev_dir_name);
-
-error_close_buffer_access:
-	if (close(fp) == -1)
+	if (fp >= 0 && close(fp) == -1)
 		perror("Failed to close buffer");
-
-error_free_buffer_access:
 	free(buffer_access);
-error_free_data:
 	free(data);
-error_free_buf_dir_name:
 	free(buf_dir_name);
-error_free_channels:
 	for (i = num_channels - 1; i >= 0; i--) {
 		free(channels[i].name);
 		free(channels[i].generic_name);
 	}
 	free(channels);
-error_free_triggername:
-	if (datardytrigger)
-		free(trigger_name);
-error_disable_channels:
-	if (autochannels == AUTOCHANNELS_ACTIVE) {
-		ret = enable_disable_all_channels(dev_dir_name, 0);
-		if (ret)
-			fprintf(stderr, "Failed to disable all channels\n");
-	}
-error_free_dev_dir_name:
+	free(trigger_name);
+	free(device_name);
 	free(dev_dir_name);
 
 	return ret;
diff --git a/tools/include/asm-generic/bitops/__ffs.h b/tools/include/asm-generic/bitops/__ffs.h
index c94175015a82..b3accfdf24b9 100644
--- a/tools/include/asm-generic/bitops/__ffs.h
+++ b/tools/include/asm-generic/bitops/__ffs.h
@@ -2,6 +2,7 @@
 #define _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
 
 #include <asm/types.h>
+#include <asm/bitsperlong.h>
 
 /**
  * __ffs - find first bit in word.
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index 494c9c615d1c..a60a7ccb6782 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -1 +1,43 @@
-#include "../../../../include/asm-generic/bitops/__fls.h"
+#ifndef _ASM_GENERIC_BITOPS___FLS_H_
+#define _ASM_GENERIC_BITOPS___FLS_H_
+
+#include <asm/types.h>
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __fls(unsigned long word)
+{
+	int num = BITS_PER_LONG - 1;
+
+#if BITS_PER_LONG == 64
+	if (!(word & (~0ul << 32))) {
+		num -= 32;
+		word <<= 32;
+	}
+#endif
+	if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+		num -= 16;
+		word <<= 16;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+		num -= 8;
+		word <<= 8;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+		num -= 4;
+		word <<= 4;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+		num -= 2;
+		word <<= 2;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-1))))
+		num -= 1;
+	return num;
+}
+
+#endif /* _ASM_GENERIC_BITOPS___FLS_H_ */
diff --git a/tools/include/asm-generic/bitops/arch_hweight.h b/tools/include/asm-generic/bitops/arch_hweight.h
index 318bb2b202b0..6a211f40665c 100644
--- a/tools/include/asm-generic/bitops/arch_hweight.h
+++ b/tools/include/asm-generic/bitops/arch_hweight.h
@@ -1 +1,25 @@
-#include "../../../../include/asm-generic/bitops/arch_hweight.h"
+#ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+
+#include <asm/types.h>
+
+static inline unsigned int __arch_hweight32(unsigned int w)
+{
+	return __sw_hweight32(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+	return __sw_hweight16(w);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+	return __sw_hweight8(w);
+}
+
+static inline unsigned long __arch_hweight64(__u64 w)
+{
+	return __sw_hweight64(w);
+}
+#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index 4bccd7c3d5d6..18663f59d72f 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -2,6 +2,7 @@
 #define _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
 
 #include <asm/types.h>
+#include <asm/bitsperlong.h>
 
 static inline void set_bit(int nr, unsigned long *addr)
 {
diff --git a/tools/include/asm-generic/bitops/const_hweight.h b/tools/include/asm-generic/bitops/const_hweight.h
index 0afd644aff83..0a7e06623470 100644
--- a/tools/include/asm-generic/bitops/const_hweight.h
+++ b/tools/include/asm-generic/bitops/const_hweight.h
@@ -1 +1,43 @@
-#include "../../../../include/asm-generic/bitops/const_hweight.h"
+#ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+
+/*
+ * Compile time versions of __arch_hweightN()
+ */
+#define __const_hweight8(w)		\
+	((unsigned int)			\
+	 ((!!((w) & (1ULL << 0))) +	\
+	  (!!((w) & (1ULL << 1))) +	\
+	  (!!((w) & (1ULL << 2))) +	\
+	  (!!((w) & (1ULL << 3))) +	\
+	  (!!((w) & (1ULL << 4))) +	\
+	  (!!((w) & (1ULL << 5))) +	\
+	  (!!((w) & (1ULL << 6))) +	\
+	  (!!((w) & (1ULL << 7)))))
+
+#define __const_hweight16(w) (__const_hweight8(w)  + __const_hweight8((w)  >> 8 ))
+#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16))
+#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32))
+
+/*
+ * Generic interface.
+ */
+#define hweight8(w)  (__builtin_constant_p(w) ? __const_hweight8(w)  : __arch_hweight8(w))
+#define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w))
+#define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w))
+#define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w))
+
+/*
+ * Interface for known constant arguments
+ */
+#define HWEIGHT8(w)  (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w))
+#define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w))
+#define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w))
+#define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w))
+
+/*
+ * Type invariant interface to the compile time constant hweight functions.
+ */
+#define HWEIGHT(w)   HWEIGHT64((u64)w)
+
+#endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index 0e4995fa0248..0576d1f42f43 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -1 +1,41 @@
-#include "../../../../include/asm-generic/bitops/fls.h"
+#ifndef _ASM_GENERIC_BITOPS_FLS_H_
+#define _ASM_GENERIC_BITOPS_FLS_H_
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+
+static __always_inline int fls(int x)
+{
+	int r = 32;
+
+	if (!x)
+		return 0;
+	if (!(x & 0xffff0000u)) {
+		x <<= 16;
+		r -= 16;
+	}
+	if (!(x & 0xff000000u)) {
+		x <<= 8;
+		r -= 8;
+	}
+	if (!(x & 0xf0000000u)) {
+		x <<= 4;
+		r -= 4;
+	}
+	if (!(x & 0xc0000000u)) {
+		x <<= 2;
+		r -= 2;
+	}
+	if (!(x & 0x80000000u)) {
+		x <<= 1;
+		r -= 1;
+	}
+	return r;
+}
+
+#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
index 35bee0071e78..b097cf8444e3 100644
--- a/tools/include/asm-generic/bitops/fls64.h
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -1 +1,36 @@
-#include "../../../../include/asm-generic/bitops/fls64.h"
+#ifndef _ASM_GENERIC_BITOPS_FLS64_H_
+#define _ASM_GENERIC_BITOPS_FLS64_H_
+
+#include <asm/types.h>
+
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#if BITS_PER_LONG == 32
+static __always_inline int fls64(__u64 x)
+{
+	__u32 h = x >> 32;
+	if (h)
+		return fls(h) + 32;
+	return fls(x);
+}
+#elif BITS_PER_LONG == 64
+static __always_inline int fls64(__u64 x)
+{
+	if (x == 0)
+		return 0;
+	return __fls(x) + 1;
+}
+#else
+#error BITS_PER_LONG not 32 or 64
+#endif
+
+#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/tools/include/asm-generic/bitsperlong.h b/tools/include/asm-generic/bitsperlong.h
new file mode 100644
index 000000000000..45eca517efb3
--- /dev/null
+++ b/tools/include/asm-generic/bitsperlong.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_GENERIC_BITS_PER_LONG
+#define __ASM_GENERIC_BITS_PER_LONG
+
+#include <uapi/asm-generic/bitsperlong.h>
+
+#ifdef __SIZEOF_LONG__
+#define BITS_PER_LONG (__CHAR_BIT__ * __SIZEOF_LONG__)
+#else
+#define BITS_PER_LONG __WORDSIZE
+#endif
+
+#if BITS_PER_LONG != __BITS_PER_LONG
+#error Inconsistent word size. Check asm/bitsperlong.h
+#endif
+
+#ifndef BITS_PER_LONG_LONG
+#define BITS_PER_LONG_LONG 64
+#endif
+
+#endif /* __ASM_GENERIC_BITS_PER_LONG */
diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h
similarity index 66%
rename from tools/perf/util/include/asm/alternative-asm.h
rename to tools/include/asm/alternative-asm.h
index 3a3a0f16456a..2a4d1bfa2988 100644
--- a/tools/perf/util/include/asm/alternative-asm.h
+++ b/tools/include/asm/alternative-asm.h
@@ -1,5 +1,5 @@
-#ifndef _PERF_ASM_ALTERNATIVE_ASM_H
-#define _PERF_ASM_ALTERNATIVE_ASM_H
+#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
+#define _TOOLS_ASM_ALTERNATIVE_ASM_H
 
 /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
 
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index 5ad9ee1dd7f6..49c929a104ee 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -9,7 +9,9 @@
 #define __WORDSIZE (__SIZEOF_LONG__ * 8)
 #endif
 
-#define BITS_PER_LONG __WORDSIZE
+#ifndef BITS_PER_LONG
+# define BITS_PER_LONG __WORDSIZE
+#endif
 
 #define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
 #define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index fa7208a32d76..e33fc1df3935 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -9,6 +9,17 @@
 # define __always_inline	inline __attribute__((always_inline))
 #endif
 
+#ifdef __ANDROID__
+/*
+ * FIXME: Big hammer to get rid of tons of:
+ *   "warning: always_inline function might not be inlinable"
+ *
+ * At least on android-ndk-r12/platforms/android-24/arch-arm
+ */
+#undef __always_inline
+#define __always_inline	inline
+#endif
+
 #define __user
 
 #ifndef __attribute_const__
diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h
index d026c6573018..ad6fa21d977b 100644
--- a/tools/include/linux/hash.h
+++ b/tools/include/linux/hash.h
@@ -1,5 +1,104 @@
-#include "../../../include/linux/hash.h"
+#ifndef _LINUX_HASH_H
+#define _LINUX_HASH_H
+/* Fast hashing routine for ints,  longs and pointers.
+   (C) 2002 Nadia Yvette Chambers, IBM */
 
-#ifndef _TOOLS_LINUX_HASH_H
-#define _TOOLS_LINUX_HASH_H
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
+ * fs/inode.c.  It's not actually prime any more (the previous primes
+ * were actively bad for hashing), but the name remains.
+ */
+#if BITS_PER_LONG == 32
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32
+#define hash_long(val, bits) hash_32(val, bits)
+#elif BITS_PER_LONG == 64
+#define hash_long(val, bits) hash_64(val, bits)
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64
+#else
+#error Wordsize not 32 or 64
+#endif
+
+/*
+ * This hash multiplies the input by a large odd number and takes the
+ * high bits.  Since multiplication propagates changes to the most
+ * significant end only, it is essential that the high bits of the
+ * product be used for the hash value.
+ *
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
+ *
+ * Although a random odd number will do, it turns out that the golden
+ * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
+ * properties.  (See Knuth vol 3, section 6.4, exercise 9.)
+ *
+ * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2,
+ * which is very slightly easier to multiply by and makes no
+ * difference to the hash distribution.
+ */
+#define GOLDEN_RATIO_32 0x61C88647
+#define GOLDEN_RATIO_64 0x61C8864680B583EBull
+
+#ifdef CONFIG_HAVE_ARCH_HASH
+/* This header may use the GOLDEN_RATIO_xx constants */
+#include <asm/hash.h>
+#endif
+
+/*
+ * The _generic versions exist only so lib/test_hash.c can compare
+ * the arch-optimized versions with the generic.
+ *
+ * Note that if you change these, any <asm/hash.h> that aren't updated
+ * to match need to have their HAVE_ARCH_* define values updated so the
+ * self-test will not false-positive.
+ */
+#ifndef HAVE_ARCH__HASH_32
+#define __hash_32 __hash_32_generic
+#endif
+static inline u32 __hash_32_generic(u32 val)
+{
+	return val * GOLDEN_RATIO_32;
+}
+
+#ifndef HAVE_ARCH_HASH_32
+#define hash_32 hash_32_generic
 #endif
+static inline u32 hash_32_generic(u32 val, unsigned int bits)
+{
+	/* High bits are more random, so use them. */
+	return __hash_32(val) >> (32 - bits);
+}
+
+#ifndef HAVE_ARCH_HASH_64
+#define hash_64 hash_64_generic
+#endif
+static __always_inline u32 hash_64_generic(u64 val, unsigned int bits)
+{
+#if BITS_PER_LONG == 64
+	/* 64x64-bit multiply is efficient on all 64-bit processors */
+	return val * GOLDEN_RATIO_64 >> (64 - bits);
+#else
+	/* Hash 64 bits using only 32x32-bit multiply. */
+	return hash_32((u32)val ^ __hash_32(val >> 32), bits);
+#endif
+}
+
+static inline u32 hash_ptr(const void *ptr, unsigned int bits)
+{
+	return hash_long((unsigned long)ptr, bits);
+}
+
+/* This really should be called fold32_ptr; it does no hashing to speak of. */
+static inline u32 hash32_ptr(const void *ptr)
+{
+	unsigned long val = (unsigned long)ptr;
+
+#if BITS_PER_LONG == 64
+	val ^= (val >> 32);
+#endif
+	return (u32)val;
+}
+
+#endif /* _LINUX_HASH_H */
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 76df53539c2a..28607db02bd3 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -2,8 +2,7 @@
 #define __TOOLS_LINUX_KERNEL_H
 
 #include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <stddef.h>
 #include <assert.h>
 
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
@@ -70,29 +69,8 @@
 #define cpu_to_le64(x)	(x)
 #define cpu_to_le32(x)	(x)
 
-static inline int
-vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
-{
-	int i;
-	ssize_t ssize = size;
-
-	i = vsnprintf(buf, size, fmt, args);
-
-	return (i >= ssize) ? (ssize - 1) : i;
-}
-
-static inline int scnprintf(char * buf, size_t size, const char * fmt, ...)
-{
-	va_list args;
-	ssize_t ssize = size;
-	int i;
-
-	va_start(args, fmt);
-	i = vsnprintf(buf, size, fmt, args);
-	va_end(args);
-
-	return (i >= ssize) ? (ssize - 1) : i;
-}
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+int scnprintf(char * buf, size_t size, const char * fmt, ...);
 
 /*
  * This looks more complex than it should be. But we need to
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index 0c27bdf14233..51334edec506 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -1 +1,90 @@
-#include "../../../include/linux/poison.h"
+#ifndef _LINUX_POISON_H
+#define _LINUX_POISON_H
+
+/********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1  ((void *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2  ((void *) 0x200 + POISON_POINTER_DELTA)
+
+/********** include/linux/timer.h **********/
+/*
+ * Magic number "tsta" to indicate a static timer initializer
+ * for the object debugging code.
+ */
+#define TIMER_ENTRY_STATIC	((void *) 0x300 + POISON_POINTER_DELTA)
+
+/********** mm/debug-pagealloc.c **********/
+#ifdef CONFIG_PAGE_POISONING_ZERO
+#define PAGE_POISON 0x00
+#else
+#define PAGE_POISON 0xaa
+#endif
+
+/********** mm/page_alloc.c ************/
+
+#define TAIL_MAPPING	((void *) 0x400 + POISON_POINTER_DELTA)
+
+/********** mm/slab.c **********/
+/*
+ * Magic nums for obj red zoning.
+ * Placed in the first word before and the first word after an obj.
+ */
+#define	RED_INACTIVE	0x09F911029D74E35BULL	/* when obj is inactive */
+#define	RED_ACTIVE	0xD84156C5635688C0ULL	/* when obj is active */
+
+#define SLUB_RED_INACTIVE	0xbb
+#define SLUB_RED_ACTIVE		0xcc
+
+/* ...and for poisoning */
+#define	POISON_INUSE	0x5a	/* for use-uninitialised poisoning */
+#define POISON_FREE	0x6b	/* for use-after-free poisoning */
+#define	POISON_END	0xa5	/* end-byte of poisoning */
+
+/********** arch/$ARCH/mm/init.c **********/
+#define POISON_FREE_INITMEM	0xcc
+
+/********** arch/ia64/hp/common/sba_iommu.c **********/
+/*
+ * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
+ * value of "SBAIOMMU POISON\0" for spill-over poisoning.
+ */
+
+/********** fs/jbd/journal.c **********/
+#define JBD_POISON_FREE		0x5b
+#define JBD2_POISON_FREE	0x5c
+
+/********** drivers/base/dmapool.c **********/
+#define	POOL_POISON_FREED	0xa7	/* !inuse */
+#define	POOL_POISON_ALLOCATED	0xa9	/* !initted */
+
+/********** drivers/atm/ **********/
+#define ATM_POISON_FREE		0x12
+#define ATM_POISON		0xdeadbeef
+
+/********** kernel/mutexes **********/
+#define MUTEX_DEBUG_INIT	0x11
+#define MUTEX_DEBUG_FREE	0x22
+
+/********** lib/flex_array.c **********/
+#define FLEX_ARRAY_FREE	0x6c	/* for use-after-free poisoning */
+
+/********** security/ **********/
+#define KEY_DESTROY		0xbd
+
+#endif
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index e26223f1f287..b96879477311 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -8,8 +8,10 @@ void *memdup(const void *src, size_t len);
 
 int strtobool(const char *s, bool *res);
 
-#ifndef __UCLIBC__
+#ifdef __GLIBC__
 extern size_t strlcpy(char *dest, const char *src, size_t size);
 #endif
 
+char *str_error_r(int errnum, char *buf, size_t buflen);
+
 #endif /* _LINUX_STRING_H_ */
diff --git a/tools/include/uapi/asm-generic/bitsperlong.h b/tools/include/uapi/asm-generic/bitsperlong.h
new file mode 100644
index 000000000000..23e6c416b85f
--- /dev/null
+++ b/tools/include/uapi/asm-generic/bitsperlong.h
@@ -0,0 +1,15 @@
+#ifndef _UAPI__ASM_GENERIC_BITS_PER_LONG
+#define _UAPI__ASM_GENERIC_BITS_PER_LONG
+
+/*
+ * There seems to be no way of detecting this automatically from user
+ * space, so 64 bit architectures should override this in their
+ * bitsperlong.h. In particular, an architecture that supports
+ * both 32 and 64 bit user space must not rely on CONFIG_64BIT
+ * to decide it, but rather check a compiler provided macro.
+ */
+#ifndef __BITS_PER_LONG
+#define __BITS_PER_LONG 32
+#endif
+
+#endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
new file mode 100644
index 000000000000..406459b935a2
--- /dev/null
+++ b/tools/include/uapi/linux/bpf.h
@@ -0,0 +1,389 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64	0x07	/* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW		0x18	/* double word */
+#define BPF_XADD	0xc0	/* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV		0xb0	/* mov reg to reg */
+#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END		0xd0	/* flags for endianness conversion: */
+#define BPF_TO_LE	0x00	/* convert to little-endian */
+#define BPF_TO_BE	0x08	/* convert to big-endian */
+#define BPF_FROM_LE	BPF_TO_LE
+#define BPF_FROM_BE	BPF_TO_BE
+
+#define BPF_JNE		0x50	/* jump != */
+#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
+#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_CALL	0x80	/* function call */
+#define BPF_EXIT	0x90	/* function return */
+
+/* Register numbers */
+enum {
+	BPF_REG_0 = 0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_6,
+	BPF_REG_7,
+	BPF_REG_8,
+	BPF_REG_9,
+	BPF_REG_10,
+	__MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+struct bpf_insn {
+	__u8	code;		/* opcode */
+	__u8	dst_reg:4;	/* dest register */
+	__u8	src_reg:4;	/* source register */
+	__s16	off;		/* signed offset */
+	__s32	imm;		/* signed immediate constant */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+	BPF_MAP_CREATE,
+	BPF_MAP_LOOKUP_ELEM,
+	BPF_MAP_UPDATE_ELEM,
+	BPF_MAP_DELETE_ELEM,
+	BPF_MAP_GET_NEXT_KEY,
+	BPF_PROG_LOAD,
+	BPF_OBJ_PIN,
+	BPF_OBJ_GET,
+};
+
+enum bpf_map_type {
+	BPF_MAP_TYPE_UNSPEC,
+	BPF_MAP_TYPE_HASH,
+	BPF_MAP_TYPE_ARRAY,
+	BPF_MAP_TYPE_PROG_ARRAY,
+	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	BPF_MAP_TYPE_PERCPU_HASH,
+	BPF_MAP_TYPE_PERCPU_ARRAY,
+	BPF_MAP_TYPE_STACK_TRACE,
+};
+
+enum bpf_prog_type {
+	BPF_PROG_TYPE_UNSPEC,
+	BPF_PROG_TYPE_SOCKET_FILTER,
+	BPF_PROG_TYPE_KPROBE,
+	BPF_PROG_TYPE_SCHED_CLS,
+	BPF_PROG_TYPE_SCHED_ACT,
+	BPF_PROG_TYPE_TRACEPOINT,
+};
+
+#define BPF_PSEUDO_MAP_FD	1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY		0 /* create new element or update existing */
+#define BPF_NOEXIST	1 /* create new element if it didn't exist */
+#define BPF_EXIST	2 /* update existing element */
+
+#define BPF_F_NO_PREALLOC	(1U << 0)
+
+union bpf_attr {
+	struct { /* anonymous struct used by BPF_MAP_CREATE command */
+		__u32	map_type;	/* one of enum bpf_map_type */
+		__u32	key_size;	/* size of key in bytes */
+		__u32	value_size;	/* size of value in bytes */
+		__u32	max_entries;	/* max number of entries in a map */
+		__u32	map_flags;	/* prealloc or not */
+	};
+
+	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+		__u32		map_fd;
+		__aligned_u64	key;
+		union {
+			__aligned_u64 value;
+			__aligned_u64 next_key;
+		};
+		__u64		flags;
+	};
+
+	struct { /* anonymous struct used by BPF_PROG_LOAD command */
+		__u32		prog_type;	/* one of enum bpf_prog_type */
+		__u32		insn_cnt;
+		__aligned_u64	insns;
+		__aligned_u64	license;
+		__u32		log_level;	/* verbosity level of verifier */
+		__u32		log_size;	/* size of user buffer */
+		__aligned_u64	log_buf;	/* user supplied buffer */
+		__u32		kern_version;	/* checked when prog_type=kprobe */
+	};
+
+	struct { /* anonymous struct used by BPF_OBJ_* commands */
+		__aligned_u64	pathname;
+		__u32		bpf_fd;
+	};
+} __attribute__((aligned(8)));
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+	BPF_FUNC_unspec,
+	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
+	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
+	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
+	BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
+	BPF_FUNC_trace_printk,    /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
+	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
+	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+
+	/**
+	 * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
+	 * @skb: pointer to skb
+	 * @offset: offset within packet from skb->mac_header
+	 * @from: pointer where to copy bytes from
+	 * @len: number of bytes to store into packet
+	 * @flags: bit 0 - if true, recompute skb->csum
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_skb_store_bytes,
+
+	/**
+	 * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
+	 * @skb: pointer to skb
+	 * @offset: offset within packet where IP checksum is located
+	 * @from: old value of header field
+	 * @to: new value of header field
+	 * @flags: bits 0-3 - size of header field
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_l3_csum_replace,
+
+	/**
+	 * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
+	 * @skb: pointer to skb
+	 * @offset: offset within packet where TCP/UDP checksum is located
+	 * @from: old value of header field
+	 * @to: new value of header field
+	 * @flags: bits 0-3 - size of header field
+	 *         bit 4 - is pseudo header
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_l4_csum_replace,
+
+	/**
+	 * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program
+	 * @ctx: context pointer passed to next program
+	 * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+	 * @index: index inside array that selects specific program to run
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_tail_call,
+
+	/**
+	 * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev
+	 * @skb: pointer to skb
+	 * @ifindex: ifindex of the net device
+	 * @flags: bit 0 - if set, redirect to ingress instead of egress
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_clone_redirect,
+
+	/**
+	 * u64 bpf_get_current_pid_tgid(void)
+	 * Return: current->tgid << 32 | current->pid
+	 */
+	BPF_FUNC_get_current_pid_tgid,
+
+	/**
+	 * u64 bpf_get_current_uid_gid(void)
+	 * Return: current_gid << 32 | current_uid
+	 */
+	BPF_FUNC_get_current_uid_gid,
+
+	/**
+	 * bpf_get_current_comm(char *buf, int size_of_buf)
+	 * stores current->comm into buf
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_get_current_comm,
+
+	/**
+	 * bpf_get_cgroup_classid(skb) - retrieve a proc's classid
+	 * @skb: pointer to skb
+	 * Return: classid if != 0
+	 */
+	BPF_FUNC_get_cgroup_classid,
+	BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */
+	BPF_FUNC_skb_vlan_pop,  /* bpf_skb_vlan_pop(skb) */
+
+	/**
+	 * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags)
+	 * retrieve or populate tunnel metadata
+	 * @skb: pointer to skb
+	 * @key: pointer to 'struct bpf_tunnel_key'
+	 * @size: size of 'struct bpf_tunnel_key'
+	 * @flags: room for future extensions
+	 * Retrun: 0 on success
+	 */
+	BPF_FUNC_skb_get_tunnel_key,
+	BPF_FUNC_skb_set_tunnel_key,
+	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */
+	/**
+	 * bpf_redirect(ifindex, flags) - redirect to another netdev
+	 * @ifindex: ifindex of the net device
+	 * @flags: bit 0 - if set, redirect to ingress instead of egress
+	 *         other bits - reserved
+	 * Return: TC_ACT_REDIRECT
+	 */
+	BPF_FUNC_redirect,
+
+	/**
+	 * bpf_get_route_realm(skb) - retrieve a dst's tclassid
+	 * @skb: pointer to skb
+	 * Return: realm if != 0
+	 */
+	BPF_FUNC_get_route_realm,
+
+	/**
+	 * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
+	 * @ctx: struct pt_regs*
+	 * @map: pointer to perf_event_array map
+	 * @index: index of event in the map
+	 * @data: data on stack to be output as raw data
+	 * @size: size of data
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_perf_event_output,
+	BPF_FUNC_skb_load_bytes,
+
+	/**
+	 * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id
+	 * @ctx: struct pt_regs*
+	 * @map: pointer to stack_trace map
+	 * @flags: bits 0-7 - numer of stack frames to skip
+	 *         bit 8 - collect user stack instead of kernel
+	 *         bit 9 - compare stacks by hash only
+	 *         bit 10 - if two different stacks hash into the same stackid
+	 *                  discard old
+	 *         other bits - reserved
+	 * Return: >= 0 stackid on success or negative error
+	 */
+	BPF_FUNC_get_stackid,
+
+	/**
+	 * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff
+	 * @from: raw from buffer
+	 * @from_size: length of from buffer
+	 * @to: raw to buffer
+	 * @to_size: length of to buffer
+	 * @seed: optional seed
+	 * Return: csum result
+	 */
+	BPF_FUNC_csum_diff,
+
+	/**
+	 * bpf_skb_[gs]et_tunnel_opt(skb, opt, size)
+	 * retrieve or populate tunnel options metadata
+	 * @skb: pointer to skb
+	 * @opt: pointer to raw tunnel option data
+	 * @size: size of @opt
+	 * Return: 0 on success for set, option size for get
+	 */
+	BPF_FUNC_skb_get_tunnel_opt,
+	BPF_FUNC_skb_set_tunnel_opt,
+	__BPF_FUNC_MAX_ID,
+};
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM		(1ULL << 0)
+#define BPF_F_INVALIDATE_HASH		(1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK		0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR		(1ULL << 4)
+#define BPF_F_MARK_MANGLED_0		(1ULL << 5)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS			(1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6		(1ULL << 0)
+
+/* BPF_FUNC_get_stackid flags. */
+#define BPF_F_SKIP_FIELD_MASK		0xffULL
+#define BPF_F_USER_STACK		(1ULL << 8)
+#define BPF_F_FAST_STACK_CMP		(1ULL << 9)
+#define BPF_F_REUSE_STACKID		(1ULL << 10)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
+#define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+
+/* BPF_FUNC_perf_event_output flags. */
+#define BPF_F_INDEX_MASK		0xffffffffULL
+#define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+	__u32 len;
+	__u32 pkt_type;
+	__u32 mark;
+	__u32 queue_mapping;
+	__u32 protocol;
+	__u32 vlan_present;
+	__u32 vlan_tci;
+	__u32 vlan_proto;
+	__u32 priority;
+	__u32 ingress_ifindex;
+	__u32 ifindex;
+	__u32 tc_index;
+	__u32 cb[5];
+	__u32 hash;
+	__u32 tc_classid;
+	__u32 data;
+	__u32 data_end;
+};
+
+struct bpf_tunnel_key {
+	__u32 tunnel_id;
+	union {
+		__u32 remote_ipv4;
+		__u32 remote_ipv6[4];
+	};
+	__u8 tunnel_tos;
+	__u8 tunnel_ttl;
+	__u16 tunnel_ext;
+	__u32 tunnel_label;
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/bpf_common.h b/tools/include/uapi/linux/bpf_common.h
new file mode 100644
index 000000000000..a5c220e0828f
--- /dev/null
+++ b/tools/include/uapi/linux/bpf_common.h
@@ -0,0 +1,55 @@
+#ifndef _UAPI__LINUX_BPF_COMMON_H__
+#define _UAPI__LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define		BPF_LD		0x00
+#define		BPF_LDX		0x01
+#define		BPF_ST		0x02
+#define		BPF_STX		0x03
+#define		BPF_ALU		0x04
+#define		BPF_JMP		0x05
+#define		BPF_RET		0x06
+#define		BPF_MISC        0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)  ((code) & 0x18)
+#define		BPF_W		0x00
+#define		BPF_H		0x08
+#define		BPF_B		0x10
+#define BPF_MODE(code)  ((code) & 0xe0)
+#define		BPF_IMM		0x00
+#define		BPF_ABS		0x20
+#define		BPF_IND		0x40
+#define		BPF_MEM		0x60
+#define		BPF_LEN		0x80
+#define		BPF_MSH		0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code)    ((code) & 0xf0)
+#define		BPF_ADD		0x00
+#define		BPF_SUB		0x10
+#define		BPF_MUL		0x20
+#define		BPF_DIV		0x30
+#define		BPF_OR		0x40
+#define		BPF_AND		0x50
+#define		BPF_LSH		0x60
+#define		BPF_RSH		0x70
+#define		BPF_NEG		0x80
+#define		BPF_MOD		0x90
+#define		BPF_XOR		0xa0
+
+#define		BPF_JA		0x00
+#define		BPF_JEQ		0x10
+#define		BPF_JGT		0x20
+#define		BPF_JGE		0x30
+#define		BPF_JSET        0x40
+#define BPF_SRC(code)   ((code) & 0x08)
+#define		BPF_K		0x00
+#define		BPF_X		0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* _UAPI__LINUX_BPF_COMMON_H__ */
diff --git a/tools/include/uapi/linux/hw_breakpoint.h b/tools/include/uapi/linux/hw_breakpoint.h
new file mode 100644
index 000000000000..b04000a2296a
--- /dev/null
+++ b/tools/include/uapi/linux/hw_breakpoint.h
@@ -0,0 +1,30 @@
+#ifndef _UAPI_LINUX_HW_BREAKPOINT_H
+#define _UAPI_LINUX_HW_BREAKPOINT_H
+
+enum {
+	HW_BREAKPOINT_LEN_1 = 1,
+	HW_BREAKPOINT_LEN_2 = 2,
+	HW_BREAKPOINT_LEN_4 = 4,
+	HW_BREAKPOINT_LEN_8 = 8,
+};
+
+enum {
+	HW_BREAKPOINT_EMPTY	= 0,
+	HW_BREAKPOINT_R		= 1,
+	HW_BREAKPOINT_W		= 2,
+	HW_BREAKPOINT_RW	= HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+	HW_BREAKPOINT_X		= 4,
+	HW_BREAKPOINT_INVALID   = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
+};
+
+enum bp_type_idx {
+	TYPE_INST 	= 0,
+#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
+	TYPE_DATA	= 0,
+#else
+	TYPE_DATA	= 1,
+#endif
+	TYPE_MAX
+};
+
+#endif /* _UAPI_LINUX_HW_BREAKPOINT_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
new file mode 100644
index 000000000000..c66a485a24ac
--- /dev/null
+++ b/tools/include/uapi/linux/perf_event.h
@@ -0,0 +1,983 @@
+/*
+ * Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
+ *
+ * Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#ifndef _UAPI_LINUX_PERF_EVENT_H
+#define _UAPI_LINUX_PERF_EVENT_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_BREAKPOINT			= 5,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance event event_id types, used by the
+ * attr.event_id parameter of the sys_perf_event_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
+	PERF_COUNT_HW_REF_CPU_CYCLES		= 9,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache events:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+	PERF_COUNT_HW_CACHE_NODE		= 6,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
+	PERF_COUNT_SW_DUMMY			= 9,
+	PERF_COUNT_SW_BPF_OUTPUT		= 10,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
+	PERF_SAMPLE_REGS_USER			= 1U << 12,
+	PERF_SAMPLE_STACK_USER			= 1U << 13,
+	PERF_SAMPLE_WEIGHT			= 1U << 14,
+	PERF_SAMPLE_DATA_SRC			= 1U << 15,
+	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
+	PERF_SAMPLE_TRANSACTION			= 1U << 17,
+	PERF_SAMPLE_REGS_INTR			= 1U << 18,
+
+	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
+};
+
+/*
+ * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ *
+ * If the user does not pass priv level information via branch_sample_type,
+ * the kernel uses the event's priv level. Branch and event priv levels do
+ * not have to match. Branch priv level is checked for permissions.
+ *
+ * The branch types can be combined, however BRANCH_ANY covers all types
+ * of branches and therefore it supersedes all the other types.
+ */
+enum perf_branch_sample_type_shift {
+	PERF_SAMPLE_BRANCH_USER_SHIFT		= 0, /* user branches */
+	PERF_SAMPLE_BRANCH_KERNEL_SHIFT		= 1, /* kernel branches */
+	PERF_SAMPLE_BRANCH_HV_SHIFT		= 2, /* hypervisor branches */
+
+	PERF_SAMPLE_BRANCH_ANY_SHIFT		= 3, /* any branch types */
+	PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT	= 4, /* any call branch */
+	PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT	= 5, /* any return branch */
+	PERF_SAMPLE_BRANCH_IND_CALL_SHIFT	= 6, /* indirect calls */
+	PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT	= 7, /* transaction aborts */
+	PERF_SAMPLE_BRANCH_IN_TX_SHIFT		= 8, /* in transaction */
+	PERF_SAMPLE_BRANCH_NO_TX_SHIFT		= 9, /* not in transaction */
+	PERF_SAMPLE_BRANCH_COND_SHIFT		= 10, /* conditional branches */
+
+	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT	= 11, /* call/ret stack */
+	PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT	= 12, /* indirect jumps */
+	PERF_SAMPLE_BRANCH_CALL_SHIFT		= 13, /* direct call */
+
+	PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT	= 14, /* no flags */
+	PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT	= 15, /* no cycles */
+
+	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
+};
+
+enum perf_branch_sample_type {
+	PERF_SAMPLE_BRANCH_USER		= 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+	PERF_SAMPLE_BRANCH_KERNEL	= 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+	PERF_SAMPLE_BRANCH_HV		= 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+
+	PERF_SAMPLE_BRANCH_ANY		= 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_IN_TX	= 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_NO_TX	= 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_COND		= 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+
+	PERF_SAMPLE_BRANCH_CALL_STACK	= 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+	PERF_SAMPLE_BRANCH_IND_JUMP	= 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+	PERF_SAMPLE_BRANCH_CALL		= 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+
+	PERF_SAMPLE_BRANCH_NO_FLAGS	= 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+	PERF_SAMPLE_BRANCH_NO_CYCLES	= 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+
+	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+};
+
+#define PERF_SAMPLE_BRANCH_PLM_ALL \
+	(PERF_SAMPLE_BRANCH_USER|\
+	 PERF_SAMPLE_BRANCH_KERNEL|\
+	 PERF_SAMPLE_BRANCH_HV)
+
+/*
+ * Values to determine ABI of the registers dump.
+ */
+enum perf_sample_regs_abi {
+	PERF_SAMPLE_REGS_ABI_NONE	= 0,
+	PERF_SAMPLE_REGS_ABI_32		= 1,
+	PERF_SAMPLE_REGS_ABI_64		= 2,
+};
+
+/*
+ * Values for the memory transaction event qualifier, mostly for
+ * abort events. Multiple bits can be set.
+ */
+enum {
+	PERF_TXN_ELISION        = (1 << 0), /* From elision */
+	PERF_TXN_TRANSACTION    = (1 << 1), /* From transaction */
+	PERF_TXN_SYNC           = (1 << 2), /* Instruction is related */
+	PERF_TXN_ASYNC          = (1 << 3), /* Instruction not related */
+	PERF_TXN_RETRY          = (1 << 4), /* Retry possible */
+	PERF_TXN_CONFLICT       = (1 << 5), /* Conflict abort */
+	PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+	PERF_TXN_CAPACITY_READ  = (1 << 7), /* Capacity read abort */
+
+	PERF_TXN_MAX	        = (1 << 8), /* non-ABI */
+
+	/* bits 32..63 are reserved for the abort code */
+
+	PERF_TXN_ABORT_MASK  = (0xffffffffULL << 32),
+	PERF_TXN_ABORT_SHIFT = 32,
+};
+
+/*
+ * The format of the data returned by read() on a perf event fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
+ *
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_event_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+#define PERF_ATTR_SIZE_VER1	72	/* add: config2 */
+#define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3	96	/* add: sample_regs_user */
+					/* add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
+
+/*
+ * Hardware event_id to monitor via a performance monitoring event:
+ *
+ * @sample_max_stack: Max number of frame pointers in a callchain,
+ *		      should be < /proc/sys/kernel/perf_event_max_stack
+ */
+struct perf_event_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+				/*
+				 * precise_ip:
+				 *
+				 *  0 - SAMPLE_IP can have arbitrary skid
+				 *  1 - SAMPLE_IP must have constant skid
+				 *  2 - SAMPLE_IP requested to have 0 skid
+				 *  3 - SAMPLE_IP must have 0 skid
+				 *
+				 *  See also PERF_RECORD_MISC_EXACT_IP
+				 */
+				precise_ip     :  2, /* skid constraint       */
+				mmap_data      :  1, /* non-exec mmap data    */
+				sample_id_all  :  1, /* sample_type all events */
+
+				exclude_host   :  1, /* don't count in host   */
+				exclude_guest  :  1, /* don't count in guest  */
+
+				exclude_callchain_kernel : 1, /* exclude kernel callchains */
+				exclude_callchain_user   : 1, /* exclude user callchains */
+				mmap2          :  1, /* include mmap with inode data     */
+				comm_exec      :  1, /* flag comm events that are due to an exec */
+				use_clockid    :  1, /* use @clockid for time fields */
+				context_switch :  1, /* context switch data */
+				write_backward :  1, /* Write ring buffer from end to beginning */
+				__reserved_1   : 36;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+
+	__u32			bp_type;
+	union {
+		__u64		bp_addr;
+		__u64		config1; /* extension of config */
+	};
+	union {
+		__u64		bp_len;
+		__u64		config2; /* extension of config1 */
+	};
+	__u64	branch_sample_type; /* enum perf_branch_sample_type */
+
+	/*
+	 * Defines set of user regs to dump on samples.
+	 * See asm/perf_regs.h for details.
+	 */
+	__u64	sample_regs_user;
+
+	/*
+	 * Defines size of the user stack to dump on samples.
+	 */
+	__u32	sample_stack_user;
+
+	__s32	clockid;
+	/*
+	 * Defines set of regs to dump for each sample
+	 * state captured on:
+	 *  - precise = 0: PMU interrupt
+	 *  - precise > 0: sampled instruction
+	 *
+	 * See asm/perf_regs.h for details.
+	 */
+	__u64	sample_regs_intr;
+
+	/*
+	 * Wakeup watermark for AUX area
+	 */
+	__u32	aux_watermark;
+	__u16	sample_max_stack;
+	__u16	__reserved_2;	/* align to __u64 */
+};
+
+#define perf_flags(attr)	(*(&(attr)->read_format + 1))
+
+/*
+ * Ioctls that can be done on a perf event fd:
+ */
+#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
+#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
+
+enum perf_event_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw events in user-space.
+	 *
+	 *   u32 seq, time_mult, time_shift, index, width;
+	 *   u64 count, enabled, running;
+	 *   u64 cyc, time_offset;
+	 *   s64 pmc = 0;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *     barrier()
+	 *
+	 *     enabled = pc->time_enabled;
+	 *     running = pc->time_running;
+	 *
+	 *     if (pc->cap_usr_time && enabled != running) {
+	 *       cyc = rdtsc();
+	 *       time_offset = pc->time_offset;
+	 *       time_mult   = pc->time_mult;
+	 *       time_shift  = pc->time_shift;
+	 *     }
+	 *
+	 *     index = pc->index;
+	 *     count = pc->offset;
+	 *     if (pc->cap_user_rdpmc && index) {
+	 *       width = pc->pmc_width;
+	 *       pmc = rdpmc(index - 1);
+	 *     }
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware event identifier */
+	__s64	offset;			/* add to hardware event value */
+	__u64	time_enabled;		/* time event active */
+	__u64	time_running;		/* time event on cpu */
+	union {
+		__u64	capabilities;
+		struct {
+			__u64	cap_bit0		: 1, /* Always 0, deprecated, see commit 860f085b74e9 */
+				cap_bit0_is_deprecated	: 1, /* Always 1, signals that bit 0 is zero */
+
+				cap_user_rdpmc		: 1, /* The RDPMC instruction can be used to read counts */
+				cap_user_time		: 1, /* The time_* fields are used */
+				cap_user_time_zero	: 1, /* The time_zero field is used */
+				cap_____res		: 59;
+		};
+	};
+
+	/*
+	 * If cap_user_rdpmc this field provides the bit-width of the value
+	 * read using the rdpmc() or equivalent instruction. This can be used
+	 * to sign extend the result like:
+	 *
+	 *   pmc <<= 64 - width;
+	 *   pmc >>= 64 - width; // signed shift right
+	 *   count += pmc;
+	 */
+	__u16	pmc_width;
+
+	/*
+	 * If cap_usr_time the below fields can be used to compute the time
+	 * delta since time_enabled (in ns) using rdtsc or similar.
+	 *
+	 *   u64 quot, rem;
+	 *   u64 delta;
+	 *
+	 *   quot = (cyc >> time_shift);
+	 *   rem = cyc & (((u64)1 << time_shift) - 1);
+	 *   delta = time_offset + quot * time_mult +
+	 *              ((rem * time_mult) >> time_shift);
+	 *
+	 * Where time_offset,time_mult,time_shift and cyc are read in the
+	 * seqcount loop described above. This delta can then be added to
+	 * enabled and possible running (if index), improving the scaling:
+	 *
+	 *   enabled += delta;
+	 *   if (index)
+	 *     running += delta;
+	 *
+	 *   quot = count / running;
+	 *   rem  = count % running;
+	 *   count = quot * enabled + (rem * enabled) / running;
+	 */
+	__u16	time_shift;
+	__u32	time_mult;
+	__u64	time_offset;
+	/*
+	 * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+	 * from sample timestamps.
+	 *
+	 *   time = timestamp - time_zero;
+	 *   quot = time / time_mult;
+	 *   rem  = time % time_mult;
+	 *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+	 *
+	 * And vice versa:
+	 *
+	 *   quot = cyc >> time_shift;
+	 *   rem  = cyc & (((u64)1 << time_shift) - 1);
+	 *   timestamp = time_zero + quot * time_mult +
+	 *               ((rem * time_mult) >> time_shift);
+	 */
+	__u64	time_zero;
+	__u32	size;			/* Header size up to __reserved[] fields. */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u8	__reserved[118*8+4];	/* align to 1k. */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an smp_rmb(),
+	 * after reading this value.
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data, after issueing
+	 * an smp_mb() to separate the data read from the ->data_tail store.
+	 * In this case the kernel will not over-write unread data.
+	 *
+	 * See perf_output_put_handle() for the data ordering.
+	 *
+	 * data_{offset,size} indicate the location and size of the perf record
+	 * buffer within the mmapped area.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+	__u64	data_offset;		/* where the buffer starts */
+	__u64	data_size;		/* data buffer size */
+
+	/*
+	 * AUX area is defined by aux_{offset,size} fields that should be set
+	 * by the userspace, so that
+	 *
+	 *   aux_offset >= data_offset + data_size
+	 *
+	 * prior to mmap()ing it. Size of the mmap()ed area should be aux_size.
+	 *
+	 * Ring buffer pointers aux_{head,tail} have the same semantics as
+	 * data_{head,tail} and same ordering rules apply.
+	 */
+	__u64	aux_head;
+	__u64	aux_tail;
+	__u64	aux_offset;
+	__u64	aux_size;
+};
+
+#define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
+#define PERF_RECORD_MISC_KERNEL			(1 << 0)
+#define PERF_RECORD_MISC_USER			(2 << 0)
+#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
+#define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
+#define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
+
+/*
+ * Indicates that /proc/PID/maps parsing are truncated by time out.
+ */
+#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT	(1 << 12)
+/*
+ * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
+ * different events so can reuse the same bit position.
+ * Ditto PERF_RECORD_MISC_SWITCH_OUT.
+ */
+#define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
+#define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
+#define PERF_RECORD_MISC_SWITCH_OUT		(1 << 13)
+/*
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ */
+#define PERF_RECORD_MISC_EXACT_IP		(1 << 14)
+/*
+ * Reserve the last bit to indicate some extended misc field
+ */
+#define PERF_RECORD_MISC_EXT_RESERVED		(1 << 15)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * If perf_event_attr.sample_id_all is set then all event types will
+	 * have the sample_type selected fields related to where/when
+	 * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
+	 * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
+	 * just after the perf_event_header and the fields already present for
+	 * the existing fields, i.e. at the end of the payload. That way a newer
+	 * perf.data file will be supported by older perf tools, with these new
+	 * optional fields being ignored.
+	 *
+	 * struct sample_id {
+	 * 	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 * 	{ u64			time;     } && PERF_SAMPLE_TIME
+	 * 	{ u64			id;       } && PERF_SAMPLE_ID
+	 * 	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 * 	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
+	 * } && perf_event_attr::sample_id_all
+	 *
+	 * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.  The
+	 * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
+	 * relative to header.size.
+	 */
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_MMAP			= 1,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_THROTTLE			= 5,
+	PERF_RECORD_UNTHROTTLE			= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_FORK			= 7,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, tid;
+	 *
+	 *	struct read_format		values;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	#
+	 *	# Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
+	 *	# The advantage of PERF_SAMPLE_IDENTIFIER is that its position
+	 *	# is fixed relative to header.
+	 *	#
+	 *
+	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 *
+	 *	{ u64                   nr;
+	 *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+	 *
+	 * 	{ u64			abi; # enum perf_sample_regs_abi
+	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+	 *
+	 * 	{ u64			size;
+	 * 	  char			data[size];
+	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
+	 *
+	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
+	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
+	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
+	 *	{ u64			abi; # enum perf_sample_regs_abi
+	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+	 * };
+	 */
+	PERF_RECORD_SAMPLE			= 9,
+
+	/*
+	 * The MMAP2 records are an augmented version of MMAP, they add
+	 * maj, min, ino numbers to be used to uniquely identify each mapping
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	u32				maj;
+	 *	u32				min;
+	 *	u64				ino;
+	 *	u64				ino_generation;
+	 *	u32				prot, flags;
+	 *	char				filename[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_MMAP2			= 10,
+
+	/*
+	 * Records that new data landed in the AUX buffer part.
+	 *
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	u64				aux_offset;
+	 * 	u64				aux_size;
+	 *	u64				flags;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_AUX				= 11,
+
+	/*
+	 * Indicates that instruction trace has started
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid;
+	 *	u32				tid;
+	 * };
+	 */
+	PERF_RECORD_ITRACE_START		= 12,
+
+	/*
+	 * Records the dropped/lost sample number.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u64				lost;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_LOST_SAMPLES		= 13,
+
+	/*
+	 * Records a context switch in or out (flagged by
+	 * PERF_RECORD_MISC_SWITCH_OUT). See also
+	 * PERF_RECORD_SWITCH_CPU_WIDE.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_SWITCH			= 14,
+
+	/*
+	 * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and
+	 * next_prev_tid that are the next (switching out) or previous
+	 * (switching in) pid/tid.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				next_prev_pid;
+	 *	u32				next_prev_tid;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
+
+	PERF_RECORD_MAX,			/* non-ABI */
+};
+
+#define PERF_MAX_STACK_DEPTH		127
+#define PERF_MAX_CONTEXTS_PER_STACK	  8
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+/**
+ * PERF_RECORD_AUX::flags bits
+ */
+#define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
+
+#define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
+#define PERF_FLAG_FD_OUTPUT		(1UL << 1)
+#define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
+
+union perf_mem_data_src {
+	__u64 val;
+	struct {
+		__u64   mem_op:5,	/* type of opcode */
+			mem_lvl:14,	/* memory hierarchy level */
+			mem_snoop:5,	/* snoop mode */
+			mem_lock:2,	/* lock instr */
+			mem_dtlb:7,	/* tlb access */
+			mem_rsvd:31;
+	};
+};
+
+/* type of opcode (load/store/prefetch,code) */
+#define PERF_MEM_OP_NA		0x01 /* not available */
+#define PERF_MEM_OP_LOAD	0x02 /* load instruction */
+#define PERF_MEM_OP_STORE	0x04 /* store instruction */
+#define PERF_MEM_OP_PFETCH	0x08 /* prefetch */
+#define PERF_MEM_OP_EXEC	0x10 /* code (execution) */
+#define PERF_MEM_OP_SHIFT	0
+
+/* memory hierarchy (memory level, hit or miss) */
+#define PERF_MEM_LVL_NA		0x01  /* not available */
+#define PERF_MEM_LVL_HIT	0x02  /* hit level */
+#define PERF_MEM_LVL_MISS	0x04  /* miss level  */
+#define PERF_MEM_LVL_L1		0x08  /* L1 */
+#define PERF_MEM_LVL_LFB	0x10  /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2		0x20  /* L2 */
+#define PERF_MEM_LVL_L3		0x40  /* L3 */
+#define PERF_MEM_LVL_LOC_RAM	0x80  /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1	0x100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2	0x200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1	0x400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2	0x800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO		0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT	5
+
+/* snoop mode */
+#define PERF_MEM_SNOOP_NA	0x01 /* not available */
+#define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
+#define PERF_MEM_SNOOP_HIT	0x04 /* snoop hit */
+#define PERF_MEM_SNOOP_MISS	0x08 /* snoop miss */
+#define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT	19
+
+/* locked instruction */
+#define PERF_MEM_LOCK_NA	0x01 /* not available */
+#define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
+#define PERF_MEM_LOCK_SHIFT	24
+
+/* TLB access */
+#define PERF_MEM_TLB_NA		0x01 /* not available */
+#define PERF_MEM_TLB_HIT	0x02 /* hit level */
+#define PERF_MEM_TLB_MISS	0x04 /* miss level */
+#define PERF_MEM_TLB_L1		0x08 /* L1 */
+#define PERF_MEM_TLB_L2		0x10 /* L2 */
+#define PERF_MEM_TLB_WK		0x20 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS		0x40 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT	26
+
+#define PERF_MEM_S(a, s) \
+	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
+
+/*
+ * single taken branch record layout:
+ *
+ *      from: source instruction (may not always be a branch insn)
+ *        to: branch target
+ *   mispred: branch target was mispredicted
+ * predicted: branch target was predicted
+ *
+ * support for mispred, predicted is optional. In case it
+ * is not supported mispred = predicted = 0.
+ *
+ *     in_tx: running in a hardware transaction
+ *     abort: aborting a hardware transaction
+ *    cycles: cycles from last branch (or 0 if not supported)
+ */
+struct perf_branch_entry {
+	__u64	from;
+	__u64	to;
+	__u64	mispred:1,  /* target mispredicted */
+		predicted:1,/* target predicted */
+		in_tx:1,    /* in transaction */
+		abort:1,    /* transaction abort */
+		cycles:16,  /* cycle count to last branch */
+		reserved:44;
+};
+
+#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 316f308a63ea..0a6fda9837f7 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -10,15 +10,23 @@ endif
 
 CC = $(CROSS_COMPILE)gcc
 AR = $(CROSS_COMPILE)ar
+LD = $(CROSS_COMPILE)ld
 
 MAKEFLAGS += --no-print-directory
 
 LIBFILE = $(OUTPUT)libapi.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+  CFLAGS += -Werror
+endif
+
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 CFLAGS += -I$(srctree)/tools/lib/api
+CFLAGS += -I$(srctree)/tools/include
 
 RM = rm -f
 
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 0e636c4339b8..b0a035fc87b3 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -85,7 +85,8 @@ int fdarray__add(struct fdarray *fda, int fd, short revents)
 }
 
 int fdarray__filter(struct fdarray *fda, short revents,
-		    void (*entry_destructor)(struct fdarray *fda, int fd))
+		    void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+		    void *arg)
 {
 	int fd, nr = 0;
 
@@ -95,7 +96,7 @@ int fdarray__filter(struct fdarray *fda, short revents,
 	for (fd = 0; fd < fda->nr; ++fd) {
 		if (fda->entries[fd].revents & revents) {
 			if (entry_destructor)
-				entry_destructor(fda, fd);
+				entry_destructor(fda, fd, arg);
 
 			continue;
 		}
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index 45db01818f45..71287dddc05f 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -22,6 +22,7 @@ struct fdarray {
 	struct pollfd *entries;
 	union {
 		int    idx;
+		void   *ptr;
 	} *priv;
 };
 
@@ -34,7 +35,8 @@ void fdarray__delete(struct fdarray *fda);
 int fdarray__add(struct fdarray *fda, int fd, short revents);
 int fdarray__poll(struct fdarray *fda, int timeout);
 int fdarray__filter(struct fdarray *fda, short revents,
-		    void (*entry_destructor)(struct fdarray *fda, int fd));
+		    void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+		    void *arg);
 int fdarray__grow(struct fdarray *fda, int extra);
 int fdarray__fprintf(struct fdarray *fda, FILE *fp);
 
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 08556cf2c70d..ba7094b945ff 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -283,6 +283,11 @@ int filename__read_int(const char *filename, int *value)
 	return err;
 }
 
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 0 for base, the strtoull detects the
+ * base automatically (see man strtoull).
+ */
 int filename__read_ull(const char *filename, unsigned long long *value)
 {
 	char line[64];
@@ -292,7 +297,7 @@ int filename__read_ull(const char *filename, unsigned long long *value)
 		return -1;
 
 	if (read(fd, line, sizeof(line)) > 0) {
-		*value = strtoull(line, NULL, 10);
+		*value = strtoull(line, NULL, 0);
 		if (*value != ULLONG_MAX)
 			err = 0;
 	}
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index a26bb5ea8283..251b7c342a87 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -5,6 +5,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <linux/string.h>
 #include <errno.h>
 #include <unistd.h>
 #include "fs.h"
@@ -118,7 +119,7 @@ static int strerror_open(int err, char *buf, size_t size, const char *filename)
 	}
 		break;
 	default:
-		snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf)));
+		snprintf(buf, size, "%s", str_error_r(err, sbuf, sizeof(sbuf)));
 		break;
 	}
 
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index fc1bc75ae56d..62d89d50fcbd 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -68,7 +68,7 @@ FEATURE_USER = .libbpf
 FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf
 FEATURE_DISPLAY = libelf bpf
 
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
 FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
 
 check_feat := 1
@@ -154,6 +154,12 @@ all: fixdep $(VERSION_FILES) all_cmd
 all_cmd: $(CMD_TARGETS)
 
 $(BPF_IN): force elfdep bpfdep
+	@(test -f ../../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
+	(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
+	echo "Warning: tools/include/uapi/linux/bpf.h differs from kernel" >&2 )) || true
+	@(test -f ../../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \
+	(diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \
+	echo "Warning: tools/include/uapi/linux/bpf_common.h differs from kernel" >&2 )) || true
 	$(Q)$(MAKE) $(build)=libbpf
 
 $(OUTPUT)libbpf.so: $(BPF_IN)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 1f91cc941b7c..4212ed62235b 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -4,6 +4,19 @@
  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
 
 #include <stdlib.h>
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index a76465541292..e8ba54087497 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -4,6 +4,19 @@
  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
 #ifndef __BPF_BPF_H
 #define __BPF_BPF_H
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7e543c3102d4..32e6b6bc6f7d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4,6 +4,19 @@
  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
 
 #include <stdlib.h>
@@ -71,12 +84,13 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
 	[ERRCODE_OFFSET(LIBELF)]	= "Something wrong in libelf",
 	[ERRCODE_OFFSET(FORMAT)]	= "BPF object format invalid",
 	[ERRCODE_OFFSET(KVERSION)]	= "'version' section incorrect or lost",
-	[ERRCODE_OFFSET(ENDIAN)]	= "Endian missmatch",
+	[ERRCODE_OFFSET(ENDIAN)]	= "Endian mismatch",
 	[ERRCODE_OFFSET(INTERNAL)]	= "Internal error in libbpf",
 	[ERRCODE_OFFSET(RELOC)]		= "Relocation failed",
 	[ERRCODE_OFFSET(VERIFY)]	= "Kernel verifier blocks program loading",
 	[ERRCODE_OFFSET(PROG2BIG)]	= "Program too big",
 	[ERRCODE_OFFSET(KVER)]		= "Incorrect kernel version",
+	[ERRCODE_OFFSET(PROGTYPE)]	= "Kernel doesn't support this program type",
 };
 
 int libbpf_strerror(int err, char *buf, size_t size)
@@ -145,6 +159,7 @@ struct bpf_program {
 	char *section_name;
 	struct bpf_insn *insns;
 	size_t insns_cnt;
+	enum bpf_prog_type type;
 
 	struct {
 		int insn_idx;
@@ -286,6 +301,7 @@ bpf_program__init(void *data, size_t size, char *name, int idx,
 	prog->idx = idx;
 	prog->instances.fds = NULL;
 	prog->instances.nr = -1;
+	prog->type = BPF_PROG_TYPE_KPROBE;
 
 	return 0;
 errout:
@@ -881,8 +897,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 }
 
 static int
-load_program(struct bpf_insn *insns, int insns_cnt,
-	     char *license, u32 kern_version, int *pfd)
+load_program(enum bpf_prog_type type, struct bpf_insn *insns,
+	     int insns_cnt, char *license, u32 kern_version, int *pfd)
 {
 	int ret;
 	char *log_buf;
@@ -894,9 +910,8 @@ load_program(struct bpf_insn *insns, int insns_cnt,
 	if (!log_buf)
 		pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
 
-	ret = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
-			       insns_cnt, license, kern_version,
-			       log_buf, BPF_LOG_BUF_SIZE);
+	ret = bpf_load_program(type, insns, insns_cnt, license,
+			       kern_version, log_buf, BPF_LOG_BUF_SIZE);
 
 	if (ret >= 0) {
 		*pfd = ret;
@@ -912,15 +927,27 @@ load_program(struct bpf_insn *insns, int insns_cnt,
 		pr_warning("-- BEGIN DUMP LOG ---\n");
 		pr_warning("\n%s\n", log_buf);
 		pr_warning("-- END LOG --\n");
+	} else if (insns_cnt >= BPF_MAXINSNS) {
+		pr_warning("Program too large (%d insns), at most %d insns\n",
+			   insns_cnt, BPF_MAXINSNS);
+		ret = -LIBBPF_ERRNO__PROG2BIG;
 	} else {
-		if (insns_cnt >= BPF_MAXINSNS) {
-			pr_warning("Program too large (%d insns), at most %d insns\n",
-				   insns_cnt, BPF_MAXINSNS);
-			ret = -LIBBPF_ERRNO__PROG2BIG;
-		} else if (log_buf) {
-			pr_warning("log buffer is empty\n");
-			ret = -LIBBPF_ERRNO__KVER;
+		/* Wrong program type? */
+		if (type != BPF_PROG_TYPE_KPROBE) {
+			int fd;
+
+			fd = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
+					      insns_cnt, license, kern_version,
+					      NULL, 0);
+			if (fd >= 0) {
+				close(fd);
+				ret = -LIBBPF_ERRNO__PROGTYPE;
+				goto out;
+			}
 		}
+
+		if (log_buf)
+			ret = -LIBBPF_ERRNO__KVER;
 	}
 
 out:
@@ -955,7 +982,7 @@ bpf_program__load(struct bpf_program *prog,
 			pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
 				   prog->section_name, prog->instances.nr);
 		}
-		err = load_program(prog->insns, prog->insns_cnt,
+		err = load_program(prog->type, prog->insns, prog->insns_cnt,
 				   license, kern_version, &fd);
 		if (!err)
 			prog->instances.fds[0] = fd;
@@ -984,7 +1011,7 @@ bpf_program__load(struct bpf_program *prog,
 			continue;
 		}
 
-		err = load_program(result.new_insn_ptr,
+		err = load_program(prog->type, result.new_insn_ptr,
 				   result.new_insn_cnt,
 				   license, kern_version, &fd);
 
@@ -1186,20 +1213,14 @@ bpf_object__next(struct bpf_object *prev)
 	return next;
 }
 
-const char *
-bpf_object__get_name(struct bpf_object *obj)
+const char *bpf_object__name(struct bpf_object *obj)
 {
-	if (!obj)
-		return ERR_PTR(-EINVAL);
-	return obj->path;
+	return obj ? obj->path : ERR_PTR(-EINVAL);
 }
 
-unsigned int
-bpf_object__get_kversion(struct bpf_object *obj)
+unsigned int bpf_object__kversion(struct bpf_object *obj)
 {
-	if (!obj)
-		return 0;
-	return obj->kern_version;
+	return obj ? obj->kern_version : 0;
 }
 
 struct bpf_program *
@@ -1224,9 +1245,8 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 	return &obj->programs[idx];
 }
 
-int bpf_program__set_private(struct bpf_program *prog,
-			     void *priv,
-			     bpf_program_clear_priv_t clear_priv)
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+			  bpf_program_clear_priv_t clear_priv)
 {
 	if (prog->priv && prog->clear_priv)
 		prog->clear_priv(prog, prog->priv);
@@ -1236,10 +1256,9 @@ int bpf_program__set_private(struct bpf_program *prog,
 	return 0;
 }
 
-int bpf_program__get_private(struct bpf_program *prog, void **ppriv)
+void *bpf_program__priv(struct bpf_program *prog)
 {
-	*ppriv = prog->priv;
-	return 0;
+	return prog ? prog->priv : ERR_PTR(-EINVAL);
 }
 
 const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
@@ -1311,32 +1330,61 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n)
 	return fd;
 }
 
-int bpf_map__get_fd(struct bpf_map *map)
+static void bpf_program__set_type(struct bpf_program *prog,
+				  enum bpf_prog_type type)
 {
-	if (!map)
-		return -EINVAL;
-
-	return map->fd;
+	prog->type = type;
 }
 
-int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef)
+int bpf_program__set_tracepoint(struct bpf_program *prog)
 {
-	if (!map || !pdef)
+	if (!prog)
 		return -EINVAL;
+	bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+	return 0;
+}
 
-	*pdef = map->def;
+int bpf_program__set_kprobe(struct bpf_program *prog)
+{
+	if (!prog)
+		return -EINVAL;
+	bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE);
 	return 0;
 }
 
-const char *bpf_map__get_name(struct bpf_map *map)
+static bool bpf_program__is_type(struct bpf_program *prog,
+				 enum bpf_prog_type type)
 {
-	if (!map)
-		return NULL;
-	return map->name;
+	return prog ? (prog->type == type) : false;
+}
+
+bool bpf_program__is_tracepoint(struct bpf_program *prog)
+{
+	return bpf_program__is_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+}
+
+bool bpf_program__is_kprobe(struct bpf_program *prog)
+{
+	return bpf_program__is_type(prog, BPF_PROG_TYPE_KPROBE);
+}
+
+int bpf_map__fd(struct bpf_map *map)
+{
+	return map ? map->fd : -EINVAL;
 }
 
-int bpf_map__set_private(struct bpf_map *map, void *priv,
-			 bpf_map_clear_priv_t clear_priv)
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
+{
+	return map ? &map->def : ERR_PTR(-EINVAL);
+}
+
+const char *bpf_map__name(struct bpf_map *map)
+{
+	return map ? map->name : NULL;
+}
+
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+		     bpf_map_clear_priv_t clear_priv)
 {
 	if (!map)
 		return -EINVAL;
@@ -1351,14 +1399,9 @@ int bpf_map__set_private(struct bpf_map *map, void *priv,
 	return 0;
 }
 
-int bpf_map__get_private(struct bpf_map *map, void **ppriv)
+void *bpf_map__priv(struct bpf_map *map)
 {
-	if (!map)
-		return -EINVAL;
-
-	if (ppriv)
-		*ppriv = map->priv;
-	return 0;
+	return map ? map->priv : ERR_PTR(-EINVAL);
 }
 
 struct bpf_map *
@@ -1389,7 +1432,7 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
 }
 
 struct bpf_map *
-bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
 {
 	struct bpf_map *pos;
 
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a51594c7b518..dd7a513efb10 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -4,6 +4,19 @@
  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
 #ifndef __BPF_LIBBPF_H
 #define __BPF_LIBBPF_H
@@ -19,13 +32,14 @@ enum libbpf_errno {
 	LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START,
 	LIBBPF_ERRNO__FORMAT,	/* BPF object format invalid */
 	LIBBPF_ERRNO__KVERSION,	/* Incorrect or no 'version' section */
-	LIBBPF_ERRNO__ENDIAN,	/* Endian missmatch */
+	LIBBPF_ERRNO__ENDIAN,	/* Endian mismatch */
 	LIBBPF_ERRNO__INTERNAL,	/* Internal error in libbpf */
 	LIBBPF_ERRNO__RELOC,	/* Relocation failed */
 	LIBBPF_ERRNO__LOAD,	/* Load program failure for unknown reason */
 	LIBBPF_ERRNO__VERIFY,	/* Kernel verifier blocks program loading */
 	LIBBPF_ERRNO__PROG2BIG,	/* Program too big */
 	LIBBPF_ERRNO__KVER,	/* Incorrect kernel version */
+	LIBBPF_ERRNO__PROGTYPE,	/* Kernel doesn't support this program type */
 	__LIBBPF_ERRNO__END,
 };
 
@@ -55,8 +69,8 @@ void bpf_object__close(struct bpf_object *object);
 /* Load/unload object into/from kernel */
 int bpf_object__load(struct bpf_object *obj);
 int bpf_object__unload(struct bpf_object *obj);
-const char *bpf_object__get_name(struct bpf_object *obj);
-unsigned int bpf_object__get_kversion(struct bpf_object *obj);
+const char *bpf_object__name(struct bpf_object *obj);
+unsigned int bpf_object__kversion(struct bpf_object *obj);
 
 struct bpf_object *bpf_object__next(struct bpf_object *prev);
 #define bpf_object__for_each_safe(pos, tmp)			\
@@ -78,11 +92,10 @@ struct bpf_program *bpf_program__next(struct bpf_program *prog,
 typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
 					 void *);
 
-int bpf_program__set_private(struct bpf_program *prog, void *priv,
-			     bpf_program_clear_priv_t clear_priv);
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+			  bpf_program_clear_priv_t clear_priv);
 
-int bpf_program__get_private(struct bpf_program *prog,
-			     void **ppriv);
+void *bpf_program__priv(struct bpf_program *prog);
 
 const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
 
@@ -152,6 +165,15 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
 
 int bpf_program__nth_fd(struct bpf_program *prog, int n);
 
+/*
+ * Adjust type of bpf program. Default is kprobe.
+ */
+int bpf_program__set_tracepoint(struct bpf_program *prog);
+int bpf_program__set_kprobe(struct bpf_program *prog);
+
+bool bpf_program__is_tracepoint(struct bpf_program *prog);
+bool bpf_program__is_kprobe(struct bpf_program *prog);
+
 /*
  * We don't need __attribute__((packed)) now since it is
  * unnecessary for 'bpf_map_def' because they are all aligned.
@@ -171,7 +193,7 @@ struct bpf_map_def {
  */
 struct bpf_map;
 struct bpf_map *
-bpf_object__get_map_by_name(struct bpf_object *obj, const char *name);
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
 
 struct bpf_map *
 bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
@@ -180,13 +202,13 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 	     (pos) != NULL;				\
 	     (pos) = bpf_map__next((pos), (obj)))
 
-int bpf_map__get_fd(struct bpf_map *map);
-int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef);
-const char *bpf_map__get_name(struct bpf_map *map);
+int bpf_map__fd(struct bpf_map *map);
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
+const char *bpf_map__name(struct bpf_map *map);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-int bpf_map__set_private(struct bpf_map *map, void *priv,
-			 bpf_map_clear_priv_t clear_priv);
-int bpf_map__get_private(struct bpf_map *map, void **ppriv);
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+		      bpf_map_clear_priv_t clear_priv);
+void *bpf_map__priv(struct bpf_map *map);
 
 #endif
diff --git a/tools/lib/str_error_r.c b/tools/lib/str_error_r.c
new file mode 100644
index 000000000000..503ae072244c
--- /dev/null
+++ b/tools/lib/str_error_r.c
@@ -0,0 +1,26 @@
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include <linux/string.h>
+
+/*
+ * The tools so far have been using the strerror_r() GNU variant, that returns
+ * a string, be it the buffer passed or something else.
+ *
+ * But that, besides being tricky in cases where we expect that the function
+ * using strerror_r() returns the error formatted in a provided buffer (we have
+ * to check if it returned something else and copy that instead), breaks the
+ * build on systems not using glibc, like Alpine Linux, where musl libc is
+ * used.
+ *
+ * So, introduce yet another wrapper, str_error_r(), that has the GNU
+ * interface, but uses the portable XSI variant of strerror_r(), so that users
+ * rest asured that the provided buffer is used and it is what is returned.
+ */
+char *str_error_r(int errnum, char *buf, size_t buflen)
+{
+	int err = strerror_r(errnum, buf, buflen);
+	if (err)
+		snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, %p, %zd)=%d", errnum, buf, buflen, err);
+	return buf;
+}
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index a8103700c172..ce4b7e527566 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -19,7 +19,13 @@ MAKEFLAGS += --no-print-directory
 LIBFILE = $(OUTPUT)libsubcmd.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+  CFLAGS += -Werror
+endif
+
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 
 CFLAGS += -I$(srctree)/tools/include/
diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c
index f4f6c9eb8e59..911f83942723 100644
--- a/tools/lib/subcmd/run-command.c
+++ b/tools/lib/subcmd/run-command.c
@@ -3,6 +3,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <string.h>
+#include <linux/string.h>
 #include <errno.h>
 #include <sys/wait.h>
 #include "subcmd-util.h"
@@ -109,7 +110,7 @@ int start_command(struct child_process *cmd)
 
 		if (cmd->dir && chdir(cmd->dir))
 			die("exec %s: cd to %s failed (%s)", cmd->argv[0],
-			    cmd->dir, strerror_r(errno, sbuf, sizeof(sbuf)));
+			    cmd->dir, str_error_r(errno, sbuf, sizeof(sbuf)));
 		if (cmd->env) {
 			for (; *cmd->env; cmd->env++) {
 				if (strchr(*cmd->env, '='))
@@ -173,7 +174,7 @@ static int wait_or_whine(pid_t pid)
 			if (errno == EINTR)
 				continue;
 			fprintf(stderr, " Error: waitpid failed (%s)",
-				strerror_r(errno, sbuf, sizeof(sbuf)));
+				str_error_r(errno, sbuf, sizeof(sbuf)));
 			return -ERR_RUN_COMMAND_WAITPID;
 		}
 		if (waiting != pid)
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index a8b6357d1ffe..664c90c8e22b 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -23,6 +23,7 @@
  *  Frederic Weisbecker gave his permission to relicense the code to
  *  the Lesser General Public License.
  */
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -31,8 +32,9 @@
 #include <errno.h>
 #include <stdint.h>
 #include <limits.h>
+#include <linux/string.h>
 
-#include <netinet/ip6.h>
+#include <netinet/in.h>
 #include "event-parse.h"
 #include "event-utils.h"
 
@@ -6131,12 +6133,7 @@ int pevent_strerror(struct pevent *pevent __maybe_unused,
 	const char *msg;
 
 	if (errnum >= 0) {
-		msg = strerror_r(errnum, buf, buflen);
-		if (msg != buf) {
-			size_t len = strlen(msg);
-			memcpy(buf, msg, min(buflen - 1, len));
-			*(buf + min(buflen - 1, len)) = '\0';
-		}
+		str_error_r(errnum, buf, buflen);
 		return 0;
 	}
 
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 88cccea3ca99..7c214ceb9386 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -28,11 +28,16 @@
 #include "event-utils.h"
 
 #define COMM "COMM"
+#define CPU "CPU"
 
 static struct format_field comm = {
 	.name = "COMM",
 };
 
+static struct format_field cpu = {
+	.name = "CPU",
+};
+
 struct event_list {
 	struct event_list	*next;
 	struct event_format	*event;
@@ -382,14 +387,17 @@ create_arg_item(struct event_format *event, const char *token,
 		/* Consider this a field */
 		field = pevent_find_any_field(event, token);
 		if (!field) {
-			if (strcmp(token, COMM) != 0) {
+			/* If token is 'COMM' or 'CPU' then it is special */
+			if (strcmp(token, COMM) == 0) {
+				field = &comm;
+			} else if (strcmp(token, CPU) == 0) {
+				field = &cpu;
+			} else {
 				/* not a field, Make it false */
 				arg->type = FILTER_ARG_BOOLEAN;
 				arg->boolean.value = FILTER_FALSE;
 				break;
 			}
-			/* If token is 'COMM' then it is special */
-			field = &comm;
 		}
 		arg->type = FILTER_ARG_FIELD;
 		arg->field.field = field;
@@ -1718,6 +1726,10 @@ get_value(struct event_format *event,
 		return (unsigned long)name;
 	}
 
+	/* Handle our dummy "cpu" field */
+	if (field == &cpu)
+		return record->cpu;
+
 	pevent_read_number_field(field, record->data, &val);
 
 	if (!(field->flags & FIELD_IS_SIGNED))
diff --git a/tools/lib/vsprintf.c b/tools/lib/vsprintf.c
new file mode 100644
index 000000000000..45f9a06daa56
--- /dev/null
+++ b/tools/lib/vsprintf.c
@@ -0,0 +1,24 @@
+#include <sys/types.h>
+#include <linux/kernel.h>
+#include <stdio.h>
+
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+       int i = vsnprintf(buf, size, fmt, args);
+       ssize_t ssize = size;
+
+       return (i >= ssize) ? (ssize - 1) : i;
+}
+
+int scnprintf(char * buf, size_t size, const char * fmt, ...)
+{
+       ssize_t ssize = size;
+       va_list args;
+       int i;
+
+       va_start(args, fmt);
+       i = vsnprintf(buf, size, fmt, args);
+       va_end(args);
+
+       return (i >= ssize) ? (ssize - 1) : i;
+}
diff --git a/tools/objtool/Build b/tools/objtool/Build
index 0e89258a3541..d6cdece5e58b 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -1,13 +1,18 @@
-objtool-y += arch/$(ARCH)/
+objtool-y += arch/$(SRCARCH)/
 objtool-y += builtin-check.o
 objtool-y += elf.o
 objtool-y += special.o
 objtool-y += objtool.o
 
 objtool-y += libstring.o
+objtool-y += str_error_r.o
 
 CFLAGS += -I$(srctree)/tools/lib
 
 $(OUTPUT)libstring.o: ../lib/string.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)str_error_r.o: ../lib/str_error_r.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index f094f3c4ed84..0b437700f688 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -1,11 +1,9 @@
 include ../scripts/Makefile.include
+include ../scripts/Makefile.arch
 
-ifndef ($(ARCH))
-ARCH ?= $(shell uname -m)
 ifeq ($(ARCH),x86_64)
 ARCH := x86
 endif
-endif
 
 # always use the host compiler
 CC = gcc
@@ -26,7 +24,7 @@ OBJTOOL_IN := $(OBJTOOL)-in.o
 
 all: $(OBJTOOL)
 
-INCLUDES := -I$(srctree)/tools/include
+INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
 CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
 LDFLAGS  += -lelf $(LIBSUBCMD)
 
@@ -35,14 +33,17 @@ elfshdr := $(shell echo '\#include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | gre
 CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
 
 AWK = awk
-export srctree OUTPUT CFLAGS ARCH AWK
+export srctree OUTPUT CFLAGS SRCARCH AWK
 include $(srctree)/tools/build/Makefile.include
 
 $(OBJTOOL_IN): fixdep FORCE
 	@$(MAKE) $(build)=objtool
 
+# Busybox's diff doesn't have -I, avoid warning in that case
+#
 $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-	@(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
+	@(diff -I 2>&1 | grep -q 'option requires an argument' && \
+	test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
 	diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
 	diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
 	diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index e8a1e69eb92c..17fa7fc34fdf 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -26,6 +26,7 @@
  */
 
 #include <string.h>
+#include <stdlib.h>
 #include <subcmd/parse-options.h>
 
 #include "builtin.h"
@@ -122,10 +123,14 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func)
 
 	/* check for STACK_FRAME_NON_STANDARD */
 	if (file->whitelist && file->whitelist->rela)
-		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list)
-			if (rela->sym->sec == func->sec &&
+		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
+			if (rela->sym->type == STT_SECTION &&
+			    rela->sym->sec == func->sec &&
 			    rela->addend == func->offset)
 				return true;
+			if (rela->sym->type == STT_FUNC && rela->sym == func)
+				return true;
+		}
 
 	/* check if it has a context switching instruction */
 	func_for_each_insn(file, func, insn)
@@ -663,7 +668,7 @@ static int add_func_switch_tables(struct objtool_file *file,
 				  struct symbol *func)
 {
 	struct instruction *insn, *prev_jump;
-	struct rela *text_rela, *rodata_rela, *prev_rela;
+	struct rela *text_rela, *rodata_rela, *prev_rela = NULL;
 	int ret;
 
 	prev_jump = NULL;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index e11f6b69cce6..0d7983ac63ef 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -30,6 +30,13 @@
 #include "elf.h"
 #include "warn.h"
 
+/*
+ * Fallback for systems without this "read, mmaping if possible" cmd.
+ */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
 struct section *find_section_by_name(struct elf *elf, const char *name)
 {
 	struct section *sec;
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 3d1bb802dbf4..3db3db9278be 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -30,3 +30,4 @@ config.mak.autogen
 *.pyo
 .config-detected
 util/intel-pt-decoder/inat-tables.c
+arch/*/include/generated/
diff --git a/tools/perf/Documentation/android.txt b/tools/perf/Documentation/android.txt
index 8484c3a04a6a..24a59998fc91 100644
--- a/tools/perf/Documentation/android.txt
+++ b/tools/perf/Documentation/android.txt
@@ -12,14 +12,14 @@ Set the NDK variable to point to the path where you installed the NDK:
 
 2. Set cross-compiling environment variables for NDK toolchain and sysroot.
 For arm:
-  export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.6/prebuilt/linux-x86/bin/arm-linux-androideabi-
-  export NDK_SYSROOT=${NDK}/platforms/android-9/arch-arm
+  export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-
+  export NDK_SYSROOT=${NDK}/platforms/android-24/arch-arm
 For x86:
-  export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.6/prebuilt/linux-x86/bin/i686-linux-android-
-  export NDK_SYSROOT=${NDK}/platforms/android-9/arch-x86
+  export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.9/prebuilt/linux-x86_64/bin/i686-linux-android-
+  export NDK_SYSROOT=${NDK}/platforms/android-24/arch-x86
 
-This method is not working for Android NDK versions up to Revision 8b.
-perf uses some bionic enhancements that are not included in these NDK versions.
+This method is only tested for Android NDK versions Revision 11b and later.
+perf uses some bionic enhancements that are not included in prior NDK versions.
 You can use method (b) described below instead.
 
 (b). Use the Android source tree
@@ -49,9 +49,9 @@ II. Compile perf for Android
 ------------------------------------------------
 You need to run make with the NDK toolchain and sysroot defined above:
 For arm:
-  make ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}"
+  make WERROR=0 ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
 For x86:
-  make ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}"
+  make WERROR=0 ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
 
 III. Install perf
 -----------------------------------------------
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 778f54d4d0bd..8ffbd272952d 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -61,6 +61,13 @@ OPTIONS
 
 --stdio:: Use the stdio interface.
 
+--stdio-color::
+	'always', 'never' or 'auto', allowing configuring color output
+	via the command line, in addition to via "color.ui" .perfconfig.
+	Use '--stdio-color always' to generate color even when redirecting
+	to a pipe or file. Using just '--stdio-color' is equivalent to
+	using 'always'.
+
 --tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
 	present, as when piping to other commands, the stdio interface is
 	used. This interfaces starts by centering on the line with more
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index dd07b55f58d8..058064db39d2 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -15,6 +15,9 @@ DESCRIPTION
 This command manages the build-id cache. It can add, remove, update and purge
 files to/from the cache. In the future it should as well set upper limits for
 the space used by the cache, etc.
+This also scans the target binary for SDT (Statically Defined Tracing) and
+record it along with the buildid-cache, which will be used by perf-probe.
+For more details, see linkperf:perf-probe[1].
 
 OPTIONS
 -------
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index be8fa1a0a97e..f0796a47dfa3 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -34,6 +34,10 @@ OPTIONS for 'convert'
 --verbose::
         Be more verbose (show counter open errors, etc).
 
+--all::
+	Convert all events, including non-sample events (comm, fork, ...), to output.
+	Default is off, only convert samples.
+
 SEE ALSO
 --------
 linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 1d6092c460dd..73496320fca3 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -56,6 +56,9 @@ OPTIONS
 --all-user::
 	Configure all used events to run in user space.
 
+--ldload::
+	Specify desired latency for loads event.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 3a8a9ba2b041..736da44596e4 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -67,7 +67,10 @@ OPTIONS
 
 -l::
 --list[=[GROUP:]EVENT]::
-	List up current probe events. This can also accept filtering patterns of event names.
+	List up current probe events. This can also accept filtering patterns of
+	event names.
+	When this is used with --cache, perf shows all cached probes instead of
+	the live probes.
 
 -L::
 --line=::
@@ -109,6 +112,12 @@ OPTIONS
 	Dry run. With this option, --add and --del doesn't execute actual
 	adding and removal operations.
 
+--cache::
+	(With --add) Cache the probes. Any events which successfully added
+	are also stored in the cache file.
+	(With --list) Show cached probes.
+	(With --del) Remove cached probes.
+
 --max-probes=NUM::
 	Set the maximum number of probe points for an event. Default is 128.
 
@@ -134,19 +143,30 @@ PROBE SYNTAX
 Probe points are defined by following syntax.
 
     1) Define event based on function name
-     [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...]
+     [[GROUP:]EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...]
 
     2) Define event based on source file with line number
-     [EVENT=]SRC:ALN [ARG ...]
+     [[GROUP:]EVENT=]SRC:ALN [ARG ...]
 
     3) Define event based on source file with lazy pattern
-     [EVENT=]SRC;PTN [ARG ...]
+     [[GROUP:]EVENT=]SRC;PTN [ARG ...]
 
+    4) Pre-defined SDT events or cached event with name
+     %[sdt_PROVIDER:]SDTEVENT
+     or,
+     sdt_PROVIDER:SDTEVENT
 
-'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'.
+'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_<bin>' is used for uprobe.
+Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the
+modules.
 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition.  In addition, '@SRC' specifies a source file which has that function.
 It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
 'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT).
+'SDTEVENT' and 'PROVIDER' is the pre-defined event name which is defined by user SDT (Statically Defined Tracing) or the pre-cached probes with event name.
+Note that before using the SDT event, the target binary (on which SDT events are defined) must be scanned by linkperf:perf-buildid-cache[1] to make SDT events as cached events.
+
+For details of the SDT, see below.
+https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html
 
 PROBE ARGUMENT
 --------------
@@ -226,4 +246,4 @@ Add probes at malloc() function on libc
 
 SEE ALSO
 --------
-linkperf:perf-trace[1], linkperf:perf-record[1]
+linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 8dbee832abd9..69966abf65d1 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -360,6 +360,35 @@ particular perf.data snapshot should be kept or not.
 
 Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
 
+--dry-run::
+Parse options then exit. --dry-run can be used to detect errors in cmdline
+options.
+
+'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
+in config file is set to true.
+
+--tail-synthesize::
+Instead of collecting non-sample events (for example, fork, comm, mmap) at
+the beginning of record, collect them during finalizing an output file.
+The collected non-sample events reflects the status of the system when
+record is finished.
+
+--overwrite::
+Makes all events use an overwritable ring buffer. An overwritable ring
+buffer works like a flight recorder: when it gets full, the kernel will
+overwrite the oldest records, that thus will never make it to the
+perf.data file.
+
+When '--overwrite' and '--switch-output' are used perf records and drops
+events until it receives a signal, meaning that something unusual was
+detected that warrants taking a snapshot of the most current events,
+those fitting in the ring buffer at that moment.
+
+'overwrite' attribute can also be set or canceled for an event using
+config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.
+
+Implies --tail-synthesize.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 9cbddc290aff..2d1746295abf 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -265,6 +265,13 @@ OPTIONS
 
 --stdio:: Use the stdio interface.
 
+--stdio-color::
+	'always', 'never' or 'auto', allowing configuring color output
+	via the command line, in addition to via "color.ui" .perfconfig.
+	Use '--stdio-color always' to generate color even when redirecting
+	to a pipe or file. Using just '--stdio-color' is equivalent to
+	using 'always'.
+
 --tui:: Use the TUI interface, that is integrated with annotate and allows
         zooming into DSOs or threads, among other features. Use of --tui
 	requires a tty, if one is not present, as when piping to other
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 4fc44c75263f..1f6c70594f0f 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -119,13 +119,13 @@ OPTIONS
 	srcline, period, iregs, brstack, brstacksym, flags.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
-        e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
+        e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 
-		perf script -f <fields>
+		perf script -F <fields>
 
 	is equivalent to:
 
-		perf script -f trace:<fields> -f sw:<fields> -f hw:<fields>
+		perf script -F trace:<fields> -F sw:<fields> -F hw:<fields>
 
 	i.e., the specified fields apply to all event types if the type string
 	is not given.
@@ -133,9 +133,9 @@ OPTIONS
 	The arguments are processed in the order received. A later usage can
 	reset a prior request. e.g.:
 
-		-f trace: -f comm,tid,time,ip,sym
+		-F trace: -F comm,tid,time,ip,sym
 
-	The first -f suppresses trace events (field list is ""), but then the
+	The first -F suppresses trace events (field list is ""), but then the
 	second invocation sets the fields to comm,tid,time,ip,sym. In this case a
 	warning is given to the user:
 
@@ -143,9 +143,9 @@ OPTIONS
 
 	Alternatively, consider the order:
 
-		-f comm,tid,time,ip,sym -f trace:
+		-F comm,tid,time,ip,sym -F trace:
 
-	The first -f sets the fields for all events and the second -f
+	The first -F sets the fields for all events and the second -F
 	suppresses trace events. The user is given a warning message about
 	the override, and the result of the above is that only S/W and H/W
 	events are displayed with the given fields.
@@ -154,14 +154,14 @@ OPTIONS
 	event type, a message is displayed to the user that the option is
 	ignored for that type. For example:
 
-		$ perf script -f comm,tid,trace
+		$ perf script -F comm,tid,trace
 		'trace' not valid for hardware events. Ignoring.
 		'trace' not valid for software events. Ignoring.
 
 	Alternatively, if the type is given an invalid field is specified it
 	is an error. For example:
 
-        perf script -v -f sw:comm,tid,trace
+        perf script -v -F sw:comm,tid,trace
         'trace' not valid for software events.
 
 	At this point usage is displayed, and perf-script exits.
@@ -170,10 +170,19 @@ OPTIONS
 	Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
 	call, return, conditional, system, asynchronous, interrupt,
 	transaction abort, trace begin, trace end, and in transaction,
-	respectively.
+	respectively. Known combinations of flags are printed more nicely e.g.
+	"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
+	"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
+	"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
+	"tr end" for "bE". However the "x" flag will be display separately in those
+	cases e.g. "jcc     (x)" for a condition branch within a transaction.
+
+	The callindent field is synthesized and may have a value when
+	Instruction Trace decoding. For calls and returns, it will display the
+	name of the symbol indented with spaces to reflect the stack depth.
 
 	Finally, a user may not set fields to none for all event types.
-	i.e., -f "" is not allowed.
+	i.e., -F "" is not allowed.
 
 	The brstack output includes branch related information with raw addresses using the
 	/v/v/v/v/ syntax in the following order:
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04f23b404bbc..d96ccd4844df 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements.
 --no-aggr::
 Do not aggregate counts across all monitored CPUs.
 
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
 
 EXAMPLES
 --------
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 31a5c3ea7f74..b329c65d7f40 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -30,3 +30,7 @@ OPTIONS
 -v::
 --verbose::
 	Be more verbose.
+
+-F::
+--dont-fork::
+	Do not fork child for each test, run all tests within single process.
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
new file mode 100644
index 000000000000..fdc99fe6bbc3
--- /dev/null
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -0,0 +1,442 @@
+perf.data format
+
+Uptodate as of v4.7
+
+This document describes the on-disk perf.data format, generated by perf record
+or perf inject and consumed by the other perf tools.
+
+On a high level perf.data contains the events generated by the PMUs, plus metadata.
+
+All fields are in native-endian of the machine that generated the perf.data.
+
+When perf is writing to a pipe it uses a special version of the file
+format that does not rely on seeking to adjust data offsets.  This
+format is not described here. The pipe version can be converted to
+normal perf.data with perf inject.
+
+The file starts with a perf_header:
+
+struct perf_header {
+	char magic[8];		/* PERFILE2 */
+	uint64_t size;		/* size of the header */
+	uint64_t attr_size;	/* size of an attribute in attrs */
+	struct perf_file_section attrs;
+	struct perf_file_section data;
+	struct perf_file_section event_types;
+	uint64_t flags;
+	uint64_t flags1[3];
+};
+
+The magic number identifies the perf file and the version. Current perf versions
+use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
+is not described here. The magic number also identifies the endian. When the
+magic value is 64bit byte swapped compared the file is in non-native
+endian.
+
+A perf_file_section contains a pointer to another section of the perf file.
+The header contains three such pointers: for attributes, data and event types.
+
+struct perf_file_section {
+	uint64_t offset;	/* offset from start of file */
+	uint64_t size;		/* size of the section */
+};
+
+Flags section:
+
+The header is followed by different optional headers, described by the bits set
+in flags. Only headers for which the bit is set are included. Each header
+consists of a perf_file_section located after the initial header.
+The respective perf_file_section points to the data of the additional
+header and defines its size.
+
+Some headers consist of strings, which are defined like this:
+
+struct perf_header_string {
+       uint32_t len;
+       char string[len]; /* zero terminated */
+};
+
+Some headers consist of a sequence of strings, which start with a
+
+struct perf_header_string_list {
+     uint32_t nr;
+     struct perf_header_string strings[nr]; /* variable length records */
+};
+
+The bits are the flags bits in a 256 bit bitmap starting with
+flags. These define the valid bits:
+
+	HEADER_RESERVED		= 0,	/* always cleared */
+	HEADER_FIRST_FEATURE	= 1,
+	HEADER_TRACING_DATA	= 1,
+
+Describe me.
+
+	HEADER_BUILD_ID = 2,
+
+The header consists of an sequence of build_id_event. The size of each record
+is defined by header.size (see perf_event.h). Each event defines a ELF build id
+for a executable file name for a pid. An ELF build id is a unique identifier
+assigned by the linker to an executable.
+
+struct build_id_event {
+	struct perf_event_header header;
+	pid_t			 pid;
+	uint8_t			 build_id[24];
+	char			 filename[header.size - offsetof(struct build_id_event, filename)];
+};
+
+	HEADER_HOSTNAME = 3,
+
+A perf_header_string with the hostname where the data was collected
+(uname -n)
+
+	HEADER_OSRELEASE = 4,
+
+A perf_header_string with the os release where the data was collected
+(uname -r)
+
+	HEADER_VERSION = 5,
+
+A perf_header_string with the perf user tool version where the
+data was collected. This is the same as the version of the source tree
+the perf tool was built from.
+
+	HEADER_ARCH = 6,
+
+A perf_header_string with the CPU architecture (uname -m)
+
+	HEADER_NRCPUS = 7,
+
+A structure defining the number of CPUs.
+
+struct nr_cpus {
+       uint32_t nr_cpus_online;
+       uint32_t nr_cpus_available; /* CPUs not yet onlined */
+};
+
+	HEADER_CPUDESC = 8,
+
+A perf_header_string with description of the CPU. On x86 this is the model name
+in /proc/cpuinfo
+
+	HEADER_CPUID = 9,
+
+A perf_header_string with the exact CPU type. On x86 this is
+vendor,family,model,stepping. For example: GenuineIntel,6,69,1
+
+	HEADER_TOTAL_MEM = 10,
+
+An uint64_t with the total memory in bytes.
+
+	HEADER_CMDLINE = 11,
+
+A perf_header_string with the perf command line used to collect the data.
+
+	HEADER_EVENT_DESC = 12,
+
+Another description of the perf_event_attrs, more detailed than header.attrs
+including IDs and names. See perf_event.h or the man page for a description
+of a struct perf_event_attr.
+
+struct {
+       uint32_t nr; /* number of events */
+       uint32_t attr_size; /* size of each perf_event_attr */
+       struct {
+	      struct perf_event_attr attr;  /* size of attr_size */
+	      uint32_t nr_ids;
+	      struct perf_header_string event_string;
+	      uint64_t ids[nr_ids];
+       } events[nr]; /* Variable length records */
+};
+
+	HEADER_CPU_TOPOLOGY = 13,
+
+String lists defining the core and CPU threads topology.
+
+struct {
+       struct perf_header_string_list cores; /* Variable length */
+       struct perf_header_string_list threads; /* Variable length */
+};
+
+Example:
+	sibling cores   : 0-3
+	sibling threads : 0-1
+	sibling threads : 2-3
+
+	HEADER_NUMA_TOPOLOGY = 14,
+
+	A list of NUMA node descriptions
+
+struct {
+       uint32_t nr;
+       struct {
+	      uint32_t nodenr;
+	      uint64_t mem_total;
+	      uint64_t mem_free;
+	      struct perf_header_string cpus;
+       } nodes[nr]; /* Variable length records */
+};
+
+	HEADER_BRANCH_STACK = 15,
+
+Not implemented in perf.
+
+	HEADER_PMU_MAPPINGS = 16,
+
+	A list of PMU structures, defining the different PMUs supported by perf.
+
+struct {
+       uint32_t nr;
+       struct pmu {
+	      uint32_t pmu_type;
+	      struct perf_header_string pmu_name;
+       } [nr]; /* Variable length records */
+};
+
+	HEADER_GROUP_DESC = 17,
+
+	Description of counter groups ({...} in perf syntax)
+
+struct {
+         uint32_t nr;
+         struct {
+		struct perf_header_string string;
+		uint32_t leader_idx;
+		uint32_t nr_members;
+	 } [nr]; /* Variable length records */
+};
+
+	HEADER_AUXTRACE = 18,
+
+Define additional auxtrace areas in the perf.data. auxtrace is used to store
+undecoded hardware tracing information, such as Intel Processor Trace data.
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ *                               perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+	u64			file_offset;
+	u64			sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ *                         file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+	struct list_head	list;
+	size_t			nr;
+	struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+	other bits are reserved and should ignored for now
+	HEADER_FEAT_BITS	= 256,
+
+Attributes
+
+This is an array of perf_event_attrs, each attr_size bytes long, which defines
+each event collected. See perf_event.h or the man page for a detailed
+description.
+
+Data
+
+This section is the bulk of the file. It consist of a stream of perf_events
+describing events. This matches the format generated by the kernel.
+See perf_event.h or the manpage for a detailed description.
+
+Some notes on parsing:
+
+Ordering
+
+The events are not necessarily in time stamp order, as they can be
+collected in parallel on different CPUs. If the events should be
+processed in time order they need to be sorted first. It is possible
+to only do a partial sort using the FINISHED_ROUND event header (see
+below). perf record guarantees that there is no reordering over a
+FINISHED_ROUND.
+
+ID vs IDENTIFIER
+
+When the event stream contains multiple events each event is identified
+by an ID. This can be either through the PERF_SAMPLE_ID or the
+PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
+at a fixed offset from the event header, which allows reliable
+parsing of the header. Relying on ID may be ambigious.
+IDENTIFIER is only supported by newer Linux kernels.
+
+Perf record specific events:
+
+In addition to the kernel generated event types perf record adds its
+own event types (in addition it also synthesizes some kernel events,
+for example MMAP events)
+
+	PERF_RECORD_USER_TYPE_START		= 64,
+	PERF_RECORD_HEADER_ATTR			= 64,
+
+struct attr_event {
+	struct perf_event_header header;
+	struct perf_event_attr attr;
+	uint64_t id[];
+};
+
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* depreceated */
+
+#define MAX_EVENT_NAME 64
+
+struct perf_trace_event_type {
+	uint64_t	event_id;
+	char	name[MAX_EVENT_NAME];
+};
+
+struct event_type_event {
+	struct perf_event_header header;
+	struct perf_trace_event_type event_type;
+};
+
+
+	PERF_RECORD_HEADER_TRACING_DATA		= 66,
+
+Describe me
+
+struct tracing_data_event {
+	struct perf_event_header header;
+	uint32_t size;
+};
+
+	PERF_RECORD_HEADER_BUILD_ID		= 67,
+
+Define a ELF build ID for a referenced executable.
+
+       struct build_id_event;   /* See above */
+
+	PERF_RECORD_FINISHED_ROUND		= 68,
+
+No event reordering over this header. No payload.
+
+	PERF_RECORD_ID_INDEX			= 69,
+
+Map event ids to CPUs and TIDs.
+
+struct id_index_entry {
+	uint64_t id;
+	uint64_t idx;
+	uint64_t cpu;
+	uint64_t tid;
+};
+
+struct id_index_event {
+	struct perf_event_header header;
+	uint64_t nr;
+	struct id_index_entry entries[nr];
+};
+
+	PERF_RECORD_AUXTRACE_INFO		= 70,
+
+Auxtrace type specific information. Describe me
+
+struct auxtrace_info_event {
+	struct perf_event_header header;
+	uint32_t type;
+	uint32_t reserved__; /* For alignment */
+	uint64_t priv[];
+};
+
+	PERF_RECORD_AUXTRACE			= 71,
+
+Defines auxtrace data. Followed by the actual data. The contents of
+the auxtrace data is dependent on the event and the CPU. For example
+for Intel Processor Trace it contains Processor Trace data generated
+by the CPU.
+
+struct auxtrace_event {
+	struct perf_event_header header;
+	uint64_t size;
+	uint64_t offset;
+	uint64_t reference;
+	uint32_t idx;
+	uint32_t tid;
+	uint32_t cpu;
+	uint32_t reserved__; /* For alignment */
+};
+
+struct aux_event {
+	struct perf_event_header header;
+	uint64_t	aux_offset;
+	uint64_t	aux_size;
+	uint64_t	flags;
+};
+
+	PERF_RECORD_AUXTRACE_ERROR		= 72,
+
+Describes an error in hardware tracing
+
+enum auxtrace_error_type {
+	PERF_AUXTRACE_ERROR_ITRACE  = 1,
+	PERF_AUXTRACE_ERROR_MAX
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+	struct perf_event_header header;
+	uint32_t type;
+	uint32_t code;
+	uint32_t cpu;
+	uint32_t pid;
+	uint32_t tid;
+	uint32_t reserved__; /* For alignment */
+	uint64_t ip;
+	char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+Event types
+
+Define the event attributes with their IDs.
+
+An array bound by the perf_file_section size.
+
+	struct {
+		struct perf_event_attr attr;   /* Size defined by header.attr_size */
+		struct perf_file_section ids;
+	}
+
+ids points to a array of uint64_t defining the ids for event attr attr.
+
+References:
+
+include/uapi/linux/perf_event.h
+
+This is the canonical description of the kernel generated perf_events
+and the perf_event_attrs.
+
+perf_events manpage
+
+A manpage describing perf_event and perf_event_attr is here:
+http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
+This tends to be slightly behind the kernel include, but has better
+descriptions.  An (typically older) version of the man page may be
+included with the standard Linux man pages, available with "man
+perf_events"
+
+pmu-tools
+
+https://github.com/andikleen/pmu-tools/tree/master/parser
+
+A definition of the perf.data format in python "construct" format is available
+in pmu-tools parser. This allows to read perf.data from python and dump it.
+
+quipper
+
+The quipper C++ parser is available at
+https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/
+Unfortunately this parser tends to be many versions behind and may not be able
+to parse data files generated by recent perf.
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 8c8c6b9ce915..ad2534df4ba6 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -12,13 +12,23 @@ tools/arch/sparc/include/asm/barrier_32.h
 tools/arch/sparc/include/asm/barrier_64.h
 tools/arch/tile/include/asm/barrier.h
 tools/arch/x86/include/asm/barrier.h
+tools/arch/x86/include/asm/cpufeatures.h
+tools/arch/x86/include/asm/disabled-features.h
+tools/arch/x86/include/asm/required-features.h
+tools/arch/x86/include/uapi/asm/svm.h
+tools/arch/x86/include/uapi/asm/vmx.h
+tools/arch/x86/include/uapi/asm/kvm.h
+tools/arch/x86/include/uapi/asm/kvm_perf.h
+tools/arch/x86/lib/memcpy_64.S
+tools/arch/x86/lib/memset_64.S
+tools/arch/s390/include/uapi/asm/kvm_perf.h
+tools/arch/s390/include/uapi/asm/sie.h
 tools/arch/xtensa/include/asm/barrier.h
 tools/scripts
 tools/build
 tools/arch/x86/include/asm/atomic.h
 tools/arch/x86/include/asm/rmwcc.h
 tools/lib/traceevent
-tools/lib/bpf
 tools/lib/api
 tools/lib/bpf
 tools/lib/subcmd
@@ -29,6 +39,9 @@ tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
 tools/lib/find_bit.c
 tools/lib/bitmap.c
+tools/lib/str_error_r.c
+tools/lib/vsprintf.c
+tools/include/asm/alternative-asm.h
 tools/include/asm/atomic.h
 tools/include/asm/barrier.h
 tools/include/asm/bug.h
@@ -52,43 +65,16 @@ tools/include/linux/hash.h
 tools/include/linux/kernel.h
 tools/include/linux/list.h
 tools/include/linux/log2.h
+tools/include/uapi/linux/bpf.h
+tools/include/uapi/linux/bpf_common.h
+tools/include/uapi/linux/hw_breakpoint.h
+tools/include/uapi/linux/perf_event.h
 tools/include/linux/poison.h
 tools/include/linux/rbtree.h
 tools/include/linux/rbtree_augmented.h
 tools/include/linux/string.h
+tools/include/linux/stringify.h
 tools/include/linux/types.h
 tools/include/linux/err.h
 tools/include/linux/bitmap.h
-include/asm-generic/bitops/arch_hweight.h
-include/asm-generic/bitops/const_hweight.h
-include/asm-generic/bitops/fls64.h
-include/asm-generic/bitops/__fls.h
-include/asm-generic/bitops/fls.h
-include/linux/perf_event.h
-include/linux/list.h
-include/linux/hash.h
-include/linux/stringify.h
-include/linux/swab.h
-arch/*/include/asm/unistd*.h
-arch/*/include/uapi/asm/unistd*.h
-arch/*/include/uapi/asm/perf_regs.h
-arch/*/lib/memcpy*.S
-arch/*/lib/memset*.S
-arch/*/include/asm/*features.h
-include/linux/poison.h
-include/linux/hw_breakpoint.h
-include/uapi/linux/perf_event.h
-include/uapi/linux/bpf.h
-include/uapi/linux/bpf_common.h
-include/uapi/linux/const.h
-include/uapi/linux/swab.h
-include/uapi/linux/hw_breakpoint.h
-arch/x86/include/asm/svm.h
-arch/x86/include/asm/vmx.h
-arch/x86/include/asm/kvm_host.h
-arch/x86/include/uapi/asm/svm.h
-arch/x86/include/uapi/asm/vmx.h
-arch/x86/include/uapi/asm/kvm.h
-arch/x86/include/uapi/asm/kvm_perf.h
-arch/s390/include/uapi/asm/sie.h
-arch/s390/include/uapi/asm/kvm_perf.h
+tools/arch/*/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index bde8cbae7dd9..6641abb97f0a 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -81,6 +81,9 @@ include ../scripts/utilities.mak
 #
 # Define NO_LIBBPF if you do not want BPF support
 #
+# Define NO_SDT if you do not want to define SDT event in perf tools,
+# note that it doesn't disable SDT scanning support.
+#
 # Define FEATURES_DUMP to provide features detection dump file
 # and bypass the feature detection
 
@@ -254,7 +257,8 @@ PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
 
 $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
-	$(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+	$(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
+        CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
 	  $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
 	mkdir -p $(OUTPUT)python && \
@@ -344,6 +348,87 @@ export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
 include $(srctree)/tools/build/Makefile.include
 
 $(PERF_IN): prepare FORCE
+	@(test -f ../../include/uapi/linux/perf_event.h && ( \
+        (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \
+        || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/linux/hash.h && ( \
+        (diff -B ../include/linux/hash.h ../../include/linux/hash.h >/dev/null) \
+        || echo "Warning: tools/include/linux/hash.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/uapi/linux/hw_breakpoint.h && ( \
+        (diff -B ../include/uapi/linux/hw_breakpoint.h ../../include/uapi/linux/hw_breakpoint.h >/dev/null) \
+        || echo "Warning: tools/include/uapi/linux/hw_breakpoint.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/include/asm/disabled-features.h && ( \
+        (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \
+        || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/include/asm/required-features.h && ( \
+        (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \
+        || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \
+        (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \
+        || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/lib/memcpy_64.S && ( \
+        (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \
+        || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/lib/memset_64.S && ( \
+        (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \
+        || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true
+	@(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \
+        (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \
+        || echo "Warning: tools/arch/arm/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/arm64/include/uapi/asm/perf_regs.h && ( \
+        (diff -B ../arch/arm64/include/uapi/asm/perf_regs.h ../../arch/arm64/include/uapi/asm/perf_regs.h >/dev/null) \
+        || echo "Warning: tools/arch/arm64/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/powerpc/include/uapi/asm/perf_regs.h && ( \
+        (diff -B ../arch/powerpc/include/uapi/asm/perf_regs.h ../../arch/powerpc/include/uapi/asm/perf_regs.h >/dev/null) \
+        || echo "Warning: tools/arch/powerpc/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/include/uapi/asm/perf_regs.h && ( \
+        (diff -B ../arch/x86/include/uapi/asm/perf_regs.h ../../arch/x86/include/uapi/asm/perf_regs.h >/dev/null) \
+        || echo "Warning: tools/arch/x86/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/include/uapi/asm/kvm.h && ( \
+        (diff -B ../arch/x86/include/uapi/asm/kvm.h ../../arch/x86/include/uapi/asm/kvm.h >/dev/null) \
+        || echo "Warning: tools/arch/x86/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/include/uapi/asm/kvm_perf.h && ( \
+        (diff -B ../arch/x86/include/uapi/asm/kvm_perf.h ../../arch/x86/include/uapi/asm/kvm_perf.h >/dev/null) \
+        || echo "Warning: tools/arch/x86/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/include/uapi/asm/svm.h && ( \
+        (diff -B ../arch/x86/include/uapi/asm/svm.h ../../arch/x86/include/uapi/asm/svm.h >/dev/null) \
+        || echo "Warning: tools/arch/x86/include/uapi/asm/svm.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/x86/include/uapi/asm/vmx.h && ( \
+        (diff -B ../arch/x86/include/uapi/asm/vmx.h ../../arch/x86/include/uapi/asm/vmx.h >/dev/null) \
+        || echo "Warning: tools/arch/x86/include/uapi/asm/vmx.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/powerpc/include/uapi/asm/kvm.h && ( \
+        (diff -B ../arch/powerpc/include/uapi/asm/kvm.h ../../arch/powerpc/include/uapi/asm/kvm.h >/dev/null) \
+        || echo "Warning: tools/arch/powerpc/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/s390/include/uapi/asm/kvm.h && ( \
+        (diff -B ../arch/s390/include/uapi/asm/kvm.h ../../arch/s390/include/uapi/asm/kvm.h >/dev/null) \
+        || echo "Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/s390/include/uapi/asm/kvm_perf.h && ( \
+        (diff -B ../arch/s390/include/uapi/asm/kvm_perf.h ../../arch/s390/include/uapi/asm/kvm_perf.h >/dev/null) \
+        || echo "Warning: tools/arch/s390/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/s390/include/uapi/asm/sie.h && ( \
+        (diff -B ../arch/s390/include/uapi/asm/sie.h ../../arch/s390/include/uapi/asm/sie.h >/dev/null) \
+        || echo "Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/arm/include/uapi/asm/kvm.h && ( \
+        (diff -B ../arch/arm/include/uapi/asm/kvm.h ../../arch/arm/include/uapi/asm/kvm.h >/dev/null) \
+        || echo "Warning: tools/arch/arm/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
+	@(test -f ../../arch/arm64/include/uapi/asm/kvm.h && ( \
+        (diff -B ../arch/arm64/include/uapi/asm/kvm.h ../../arch/arm64/include/uapi/asm/kvm.h >/dev/null) \
+        || echo "Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/asm-generic/bitops/arch_hweight.h && ( \
+        (diff -B ../include/asm-generic/bitops/arch_hweight.h ../../include/asm-generic/bitops/arch_hweight.h >/dev/null) \
+        || echo "Warning: tools/include/asm-generic/bitops/arch_hweight.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/asm-generic/bitops/const_hweight.h && ( \
+        (diff -B ../include/asm-generic/bitops/const_hweight.h ../../include/asm-generic/bitops/const_hweight.h >/dev/null) \
+        || echo "Warning: tools/include/asm-generic/bitops/const_hweight.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/asm-generic/bitops/__fls.h && ( \
+        (diff -B ../include/asm-generic/bitops/__fls.h ../../include/asm-generic/bitops/__fls.h >/dev/null) \
+        || echo "Warning: tools/include/asm-generic/bitops/__fls.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/asm-generic/bitops/fls.h && ( \
+        (diff -B ../include/asm-generic/bitops/fls.h ../../include/asm-generic/bitops/fls.h >/dev/null) \
+        || echo "Warning: tools/include/asm-generic/bitops/fls.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/asm-generic/bitops/fls64.h && ( \
+        (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \
+        || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true
 	$(Q)$(MAKE) $(build)=perf
 
 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index d22e3d07de3d..f98da17357c0 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -1,4 +1,4 @@
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 
-libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index e58123a8912b..02f41dba4f4f 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,2 +1,2 @@
 libperf-$(CONFIG_DWARF)     += dwarf-regs.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
index a87afa91a99e..c116b713f7f7 100644
--- a/tools/perf/arch/arm64/util/unwind-libunwind.c
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -1,11 +1,13 @@
 
+#ifndef REMOTE_UNWIND_LIBUNWIND
 #include <errno.h>
 #include <libunwind.h>
 #include "perf_regs.h"
 #include "../../util/unwind.h"
 #include "../../util/debug.h"
+#endif
 
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
 {
 	switch (regnum) {
 	case UNW_AARCH64_X0:
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index e83c8ce24303..886dd2aaff0d 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <sys/utsname.h>
 #include "common.h"
+#include "../util/util.h"
 #include "../util/debug.h"
 
 const char *const arm_triplets[] = {
@@ -9,34 +10,44 @@ const char *const arm_triplets[] = {
 	"arm-unknown-linux-",
 	"arm-unknown-linux-gnu-",
 	"arm-unknown-linux-gnueabi-",
+	"arm-linux-gnu-",
+	"arm-linux-gnueabihf-",
+	"arm-none-eabi-",
 	NULL
 };
 
 const char *const arm64_triplets[] = {
 	"aarch64-linux-android-",
+	"aarch64-linux-gnu-",
 	NULL
 };
 
 const char *const powerpc_triplets[] = {
 	"powerpc-unknown-linux-gnu-",
 	"powerpc64-unknown-linux-gnu-",
+	"powerpc64-linux-gnu-",
+	"powerpc64le-linux-gnu-",
 	NULL
 };
 
 const char *const s390_triplets[] = {
 	"s390-ibm-linux-",
+	"s390x-linux-gnu-",
 	NULL
 };
 
 const char *const sh_triplets[] = {
 	"sh-unknown-linux-gnu-",
 	"sh64-unknown-linux-gnu-",
+	"sh-linux-gnu-",
+	"sh64-linux-gnu-",
 	NULL
 };
 
 const char *const sparc_triplets[] = {
 	"sparc-unknown-linux-gnu-",
 	"sparc64-unknown-linux-gnu-",
+	"sparc64-linux-gnu-",
 	NULL
 };
 
@@ -49,12 +60,19 @@ const char *const x86_triplets[] = {
 	"i386-pc-linux-gnu-",
 	"i686-linux-android-",
 	"i686-android-linux-",
+	"x86_64-linux-gnu-",
+	"i586-linux-gnu-",
 	NULL
 };
 
 const char *const mips_triplets[] = {
 	"mips-unknown-linux-gnu-",
 	"mipsel-linux-android-",
+	"mips-linux-gnu-",
+	"mips64-linux-gnu-",
+	"mips64el-linux-gnuabi64-",
+	"mips64-linux-gnuabi64-",
+	"mipsel-linux-gnu-",
 	NULL
 };
 
@@ -102,7 +120,7 @@ static int lookup_triplets(const char *const *triplets, const char *name)
  * Return architecture name in a normalized form.
  * The conversion logic comes from the Makefile.
  */
-static const char *normalize_arch(char *arch)
+const char *normalize_arch(char *arch)
 {
 	if (!strcmp(arch, "x86_64"))
 		return "x86";
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 7529cfb143ce..6b01c736b7d9 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -6,5 +6,6 @@
 extern const char *objdump_path;
 
 int perf_env__lookup_objdump(struct perf_env *env);
+const char *normalize_arch(char *arch);
 
 #endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index cac6d17ce5db..555263e385c9 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,3 +374,5 @@
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		compat_sys_execveat/ptregs
+534	x32	preadv2			compat_sys_preadv2
+535	x32	pwritev2		compat_sys_pwritev2
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 3b491cfe204e..3918dd52e903 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -6,6 +6,1016 @@
 
 {{0x0f, 0x31, }, 2, 0, "", "",
 "0f 31                \trdtsc  ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 81 78 56 34 12    \tbound  %eax,0x12345678(%ecx)",},
+{{0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 88 78 56 34 12    \tbound  %ecx,0x12345678(%eax)",},
+{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 90 78 56 34 12    \tbound  %edx,0x12345678(%eax)",},
+{{0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 98 78 56 34 12    \tbound  %ebx,0x12345678(%eax)",},
+{{0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 a0 78 56 34 12    \tbound  %esp,0x12345678(%eax)",},
+{{0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 a8 78 56 34 12    \tbound  %ebp,0x12345678(%eax)",},
+{{0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 b0 78 56 34 12    \tbound  %esi,0x12345678(%eax)",},
+{{0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 b8 78 56 34 12    \tbound  %edi,0x12345678(%eax)",},
+{{0x62, 0x08, }, 2, 0, "", "",
+"62 08                \tbound  %ecx,(%eax)",},
+{{0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 05 78 56 34 12    \tbound  %eax,0x12345678",},
+{{0x62, 0x14, 0x01, }, 3, 0, "", "",
+"62 14 01             \tbound  %edx,(%ecx,%eax,1)",},
+{{0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 14 05 78 56 34 12 \tbound  %edx,0x12345678(,%eax,1)",},
+{{0x62, 0x14, 0x08, }, 3, 0, "", "",
+"62 14 08             \tbound  %edx,(%eax,%ecx,1)",},
+{{0x62, 0x14, 0xc8, }, 3, 0, "", "",
+"62 14 c8             \tbound  %edx,(%eax,%ecx,8)",},
+{{0x62, 0x50, 0x12, }, 3, 0, "", "",
+"62 50 12             \tbound  %edx,0x12(%eax)",},
+{{0x62, 0x55, 0x12, }, 3, 0, "", "",
+"62 55 12             \tbound  %edx,0x12(%ebp)",},
+{{0x62, 0x54, 0x01, 0x12, }, 4, 0, "", "",
+"62 54 01 12          \tbound  %edx,0x12(%ecx,%eax,1)",},
+{{0x62, 0x54, 0x05, 0x12, }, 4, 0, "", "",
+"62 54 05 12          \tbound  %edx,0x12(%ebp,%eax,1)",},
+{{0x62, 0x54, 0x08, 0x12, }, 4, 0, "", "",
+"62 54 08 12          \tbound  %edx,0x12(%eax,%ecx,1)",},
+{{0x62, 0x54, 0xc8, 0x12, }, 4, 0, "", "",
+"62 54 c8 12          \tbound  %edx,0x12(%eax,%ecx,8)",},
+{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 90 78 56 34 12    \tbound  %edx,0x12345678(%eax)",},
+{{0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 95 78 56 34 12    \tbound  %edx,0x12345678(%ebp)",},
+{{0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 01 78 56 34 12 \tbound  %edx,0x12345678(%ecx,%eax,1)",},
+{{0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 05 78 56 34 12 \tbound  %edx,0x12345678(%ebp,%eax,1)",},
+{{0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 08 78 56 34 12 \tbound  %edx,0x12345678(%eax,%ecx,1)",},
+{{0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 c8 78 56 34 12 \tbound  %edx,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 81 78 56 34 12 \tbound  %ax,0x12345678(%ecx)",},
+{{0x66, 0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 88 78 56 34 12 \tbound  %cx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 90 78 56 34 12 \tbound  %dx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 98 78 56 34 12 \tbound  %bx,0x12345678(%eax)",},
+{{0x66, 0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 a0 78 56 34 12 \tbound  %sp,0x12345678(%eax)",},
+{{0x66, 0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 a8 78 56 34 12 \tbound  %bp,0x12345678(%eax)",},
+{{0x66, 0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 b0 78 56 34 12 \tbound  %si,0x12345678(%eax)",},
+{{0x66, 0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 b8 78 56 34 12 \tbound  %di,0x12345678(%eax)",},
+{{0x66, 0x62, 0x08, }, 3, 0, "", "",
+"66 62 08             \tbound  %cx,(%eax)",},
+{{0x66, 0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 05 78 56 34 12 \tbound  %ax,0x12345678",},
+{{0x66, 0x62, 0x14, 0x01, }, 4, 0, "", "",
+"66 62 14 01          \tbound  %dx,(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 14 05 78 56 34 12 \tbound  %dx,0x12345678(,%eax,1)",},
+{{0x66, 0x62, 0x14, 0x08, }, 4, 0, "", "",
+"66 62 14 08          \tbound  %dx,(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x14, 0xc8, }, 4, 0, "", "",
+"66 62 14 c8          \tbound  %dx,(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x50, 0x12, }, 4, 0, "", "",
+"66 62 50 12          \tbound  %dx,0x12(%eax)",},
+{{0x66, 0x62, 0x55, 0x12, }, 4, 0, "", "",
+"66 62 55 12          \tbound  %dx,0x12(%ebp)",},
+{{0x66, 0x62, 0x54, 0x01, 0x12, }, 5, 0, "", "",
+"66 62 54 01 12       \tbound  %dx,0x12(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x54, 0x05, 0x12, }, 5, 0, "", "",
+"66 62 54 05 12       \tbound  %dx,0x12(%ebp,%eax,1)",},
+{{0x66, 0x62, 0x54, 0x08, 0x12, }, 5, 0, "", "",
+"66 62 54 08 12       \tbound  %dx,0x12(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x54, 0xc8, 0x12, }, 5, 0, "", "",
+"66 62 54 c8 12       \tbound  %dx,0x12(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 90 78 56 34 12 \tbound  %dx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 95 78 56 34 12 \tbound  %dx,0x12345678(%ebp)",},
+{{0x66, 0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 01 78 56 34 12 \tbound  %dx,0x12345678(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 05 78 56 34 12 \tbound  %dx,0x12345678(%ebp,%eax,1)",},
+{{0x66, 0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 08 78 56 34 12 \tbound  %dx,0x12345678(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 c8 78 56 34 12 \tbound  %dx,0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0x41, 0xd8, }, 3, 0, "", "",
+"0f 41 d8             \tcmovno %eax,%ebx",},
+{{0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%ecx",},
+{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%cx",},
+{{0x0f, 0x44, 0xd8, }, 3, 0, "", "",
+"0f 44 d8             \tcmove  %eax,%ebx",},
+{{0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 44 88 78 56 34 12 \tcmove  0x12345678(%eax),%ecx",},
+{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 44 88 78 56 34 12 \tcmove  0x12345678(%eax),%cx",},
+{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 90 80 78 56 34 12 \tseto   0x12345678(%eax)",},
+{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 91 80 78 56 34 12 \tsetno  0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 98 80 78 56 34 12 \tsets   0x12345678(%eax)",},
+{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 99 80 78 56 34 12 \tsetns  0x12345678(%eax)",},
+{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cc 41 ef          \tkandw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 41 ef       \tkandq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cd 41 ef          \tkandb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 41 ef       \tkandd  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cc 42 ef          \tkandnw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 42 ef       \tkandnq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cd 42 ef          \tkandnb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 42 ef       \tkandnd %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f8 44 f7          \tknotw  %k7,%k6",},
+{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f8 44 f7       \tknotq  %k7,%k6",},
+{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f9 44 f7          \tknotb  %k7,%k6",},
+{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f9 44 f7       \tknotd  %k7,%k6",},
+{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cc 45 ef          \tkorw   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 45 ef       \tkorq   %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cd 45 ef          \tkorb   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 45 ef       \tkord   %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cc 46 ef          \tkxnorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 46 ef       \tkxnorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cd 46 ef          \tkxnorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 46 ef       \tkxnord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cc 47 ef          \tkxorw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 47 ef       \tkxorq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cd 47 ef          \tkxorb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 47 ef       \tkxord  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cc 4a ef          \tkaddw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4a ef       \tkaddq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cd 4a ef          \tkaddb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 4a ef       \tkaddd  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cd 4b ef          \tkunpckbw %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cc 4b ef          \tkunpckwd %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4b ef       \tkunpckdq %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f8 90 ee          \tkmovw  %k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f8 90 29          \tkmovw  (%ecx),%k5",},
+{{0xc5, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f8 90 ac c8 23 01 00 00 \tkmovw  0x123(%eax,%ecx,8),%k5",},
+{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f8 91 29          \tkmovw  %k5,(%ecx)",},
+{{0xc5, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f8 91 ac c8 23 01 00 00 \tkmovw  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f8 92 e8          \tkmovw  %eax,%k5",},
+{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f8 92 ed          \tkmovw  %ebp,%k5",},
+{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f8 93 c5          \tkmovw  %k5,%eax",},
+{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f8 93 ed          \tkmovw  %k5,%ebp",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 90 ee       \tkmovq  %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 90 29       \tkmovq  (%ecx),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f8 90 ac c8 23 01 00 00 \tkmovq  0x123(%eax,%ecx,8),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 91 29       \tkmovq  %k5,(%ecx)",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f8 91 ac c8 23 01 00 00 \tkmovq  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f9 90 ee          \tkmovb  %k6,%k5",},
+{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f9 90 29          \tkmovb  (%ecx),%k5",},
+{{0xc5, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f9 90 ac c8 23 01 00 00 \tkmovb  0x123(%eax,%ecx,8),%k5",},
+{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f9 91 29          \tkmovb  %k5,(%ecx)",},
+{{0xc5, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f9 91 ac c8 23 01 00 00 \tkmovb  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f9 92 e8          \tkmovb  %eax,%k5",},
+{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f9 92 ed          \tkmovb  %ebp,%k5",},
+{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f9 93 c5          \tkmovb  %k5,%eax",},
+{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f9 93 ed          \tkmovb  %k5,%ebp",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 90 ee       \tkmovd  %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 90 29       \tkmovd  (%ecx),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f9 90 ac c8 23 01 00 00 \tkmovd  0x123(%eax,%ecx,8),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 91 29       \tkmovd  %k5,(%ecx)",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f9 91 ac c8 23 01 00 00 \tkmovd  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 fb 92 e8          \tkmovd  %eax,%k5",},
+{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "",
+"c5 fb 92 ed          \tkmovd  %ebp,%k5",},
+{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 fb 93 c5          \tkmovd  %k5,%eax",},
+{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "",
+"c5 fb 93 ed          \tkmovd  %k5,%ebp",},
+{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f8 98 ee          \tkortestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 98 ee       \tkortestq %k6,%k5",},
+{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f9 98 ee          \tkortestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 98 ee       \tkortestd %k6,%k5",},
+{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f8 99 ee          \tktestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 99 ee       \tktestq %k6,%k5",},
+{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f9 99 ee          \tktestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 99 ee       \tktestd %k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 30 ee 12    \tkshiftrw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 31 ee 5b    \tkshiftrq $0x5b,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 32 ee 12    \tkshiftlw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 33 ee 5b    \tkshiftlq $0x5b,%k6,%k5",},
+{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f8 5b f5          \tvcvtdq2ps %xmm5,%xmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 5b f5    \tvcvtqq2ps %zmm5,%ymm6{%k7}",},
+{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f9 5b f5          \tvcvtps2dq %xmm5,%xmm6",},
+{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 fa 5b f5          \tvcvttps2dq %xmm5,%xmm6",},
+{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "",
+"0f 6f e0             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fd 6f f4          \tvmovdqa %ymm4,%ymm6",},
+{{0x62, 0xf1, 0x7d, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 48 6f f5    \tvmovdqa32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 6f f5    \tvmovdqa64 %zmm5,%zmm6",},
+{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fe 6f f4          \tvmovdqu %ymm4,%ymm6",},
+{{0x62, 0xf1, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 48 6f f5    \tvmovdqu32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 6f f5    \tvmovdqu64 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7f 48 6f f5    \tvmovdqu8 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 ff 48 6f f5    \tvmovdqu16 %zmm5,%zmm6",},
+{{0x0f, 0x78, 0xc3, }, 3, 0, "", "",
+"0f 78 c3             \tvmread %eax,%ebx",},
+{{0x62, 0xf1, 0x7c, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 7c 48 78 f5    \tvcvttps2udq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 78 f5    \tvcvttpd2udq %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7f, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 7f 08 78 c6    \tvcvttsd2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7e, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 7e 08 78 c6    \tvcvttss2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 78 f5    \tvcvttps2uqq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 78 f5    \tvcvttpd2uqq %zmm5,%zmm6",},
+{{0x0f, 0x79, 0xd8, }, 3, 0, "", "",
+"0f 79 d8             \tvmwrite %eax,%ebx",},
+{{0x62, 0xf1, 0x7c, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 7c 48 79 f5    \tvcvtps2udq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 79 f5    \tvcvtpd2udq %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7f, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 7f 08 79 c6    \tvcvtsd2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7e, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 7e 08 79 c6    \tvcvtss2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 79 f5    \tvcvtps2uqq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 79 f5    \tvcvtpd2uqq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7e, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 4f 7a f5    \tvcvtudq2pd %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 7a f5    \tvcvtuqq2pd %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7f 48 7a f5    \tvcvtudq2ps %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 ff 4f 7a f5    \tvcvtuqq2ps %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 7a f5    \tvcvttps2qq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 7a f5    \tvcvttpd2qq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 57 08 7b f0    \tvcvtusi2sd %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 56 08 7b f0    \tvcvtusi2ss %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 7b f5    \tvcvtps2qq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 7b f5    \tvcvtpd2qq %zmm5,%zmm6",},
+{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "",
+"0f 7f c4             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x7f, 0xee, }, 4, 0, "", "",
+"c5 fd 7f ee          \tvmovdqa %ymm5,%ymm6",},
+{{0x62, 0xf1, 0x7d, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7d 48 7f ee    \tvmovdqa32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 fd 48 7f ee    \tvmovdqa64 %zmm5,%zmm6",},
+{{0xc5, 0xfe, 0x7f, 0xee, }, 4, 0, "", "",
+"c5 fe 7f ee          \tvmovdqu %ymm5,%ymm6",},
+{{0x62, 0xf1, 0x7e, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7e 48 7f ee    \tvmovdqu32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 fe 48 7f ee    \tvmovdqu64 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7f 48 7f ee    \tvmovdqu8 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 ff 48 7f ee    \tvmovdqu16 %zmm5,%zmm6",},
+{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "",
+"0f db d1             \tpand   %mm1,%mm2",},
+{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "",
+"66 0f db d1          \tpand   %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "",
+"c5 cd db d4          \tvpand  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xdb, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 db f4    \tvpandd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xdb, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 db f4    \tvpandq %zmm4,%zmm5,%zmm6",},
+{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "",
+"0f df d1             \tpandn  %mm1,%mm2",},
+{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "",
+"66 0f df d1          \tpandn  %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "",
+"c5 cd df d4          \tvpandn %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xdf, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 df f4    \tvpandnd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xdf, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 df f4    \tvpandnq %zmm4,%zmm5,%zmm6",},
+{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 f9 e6 d1          \tvcvttpd2dq %xmm1,%xmm2",},
+{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "",
+"c5 fa e6 f5          \tvcvtdq2pd %xmm5,%xmm6",},
+{{0x62, 0xf1, 0x7e, 0x4f, 0xe6, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 4f e6 f5    \tvcvtdq2pd %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfe, 0x48, 0xe6, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 e6 f5    \tvcvtqq2pd %zmm5,%zmm6",},
+{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 fb e6 d1          \tvcvtpd2dq %xmm1,%xmm2",},
+{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "",
+"0f eb f4             \tpor    %mm4,%mm6",},
+{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "",
+"c5 cd eb d4          \tvpor   %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xeb, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 eb f4    \tvpord  %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xeb, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 eb f4    \tvporq  %zmm4,%zmm5,%zmm6",},
+{{0x0f, 0xef, 0xf4, }, 3, 0, "", "",
+"0f ef f4             \tpxor   %mm4,%mm6",},
+{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "",
+"c5 cd ef d4          \tvpxor  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xef, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 ef f4    \tvpxord %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xef, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 ef f4    \tvpxorq %zmm4,%zmm5,%zmm6",},
+{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "",
+"66 0f 38 10 c1       \tpblendvb %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x10, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 10 f4    \tvpsrlvw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x10, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 10 ee    \tvpmovuswb %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x11, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 11 ee    \tvpmovusdb %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x11, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 11 f4    \tvpsravw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x12, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 12 ee    \tvpmovusqb %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x12, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 12 f4    \tvpsllvw %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0xf2, 0x7d, 0x4f, 0x13, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 4f 13 f5    \tvcvtph2ps %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x13, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 13 ee    \tvpmovusdw %zmm5,%ymm6{%k7}",},
+{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "",
+"66 0f 38 14 c1       \tblendvps %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x14, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 14 ee    \tvpmovusqw %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0x55, 0x48, 0x14, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 14 f4    \tvprorvd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x14, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 14 f4    \tvprorvq %zmm4,%zmm5,%zmm6",},
+{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "",
+"66 0f 38 15 c1       \tblendvpd %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x15, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 15 ee    \tvpmovusqd %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x55, 0x48, 0x15, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 15 f4    \tvprolvd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x15, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 15 f4    \tvprolvq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 16 d4       \tvpermps %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x4d, 0x2f, 0x16, 0xd4, }, 6, 0, "", "",
+"62 f2 4d 2f 16 d4    \tvpermps %ymm4,%ymm6,%ymm2{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x2f, 0x16, 0xd4, }, 6, 0, "", "",
+"62 f2 cd 2f 16 d4    \tvpermpd %ymm4,%ymm6,%ymm2{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 19 f4       \tvbroadcastsd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x19, 0xf7, }, 6, 0, "", "",
+"62 f2 7d 48 19 f7    \tvbroadcastf32x2 %xmm7,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 1a 21       \tvbroadcastf128 (%ecx),%ymm4",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x1a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 1a 31    \tvbroadcastf32x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 1a 31    \tvbroadcastf64x2 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x1b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 1b 31    \tvbroadcastf32x8 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 1b 31    \tvbroadcastf64x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1f, 0xf4, }, 6, 0, "", "",
+"62 f2 fd 48 1f f4    \tvpabsq %zmm4,%zmm6",},
+{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "",
+"c4 e2 79 20 ec       \tvpmovsxbw %xmm4,%xmm5",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x20, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 20 ee    \tvpmovswb %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 21 f4       \tvpmovsxbd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x21, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 21 ee    \tvpmovsdb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 22 e4       \tvpmovsxbq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x22, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 22 ee    \tvpmovsqb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 23 e4       \tvpmovsxwd %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x23, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 23 ee    \tvpmovsdw %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 24 f4       \tvpmovsxwq %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x24, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 24 ee    \tvpmovsqw %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 25 e4       \tvpmovsxdq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x25, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 25 ee    \tvpmovsqd %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x4d, 0x48, 0x26, 0xed, }, 6, 0, "", "",
+"62 f2 4d 48 26 ed    \tvptestmb %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0xcd, 0x48, 0x26, 0xed, }, 6, 0, "", "",
+"62 f2 cd 48 26 ed    \tvptestmw %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0x56, 0x48, 0x26, 0xec, }, 6, 0, "", "",
+"62 f2 56 48 26 ec    \tvptestnmb %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0xd6, 0x48, 0x26, 0xec, }, 6, 0, "", "",
+"62 f2 d6 48 26 ec    \tvptestnmw %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0x4d, 0x48, 0x27, 0xed, }, 6, 0, "", "",
+"62 f2 4d 48 27 ed    \tvptestmd %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0xcd, 0x48, 0x27, 0xed, }, 6, 0, "", "",
+"62 f2 cd 48 27 ed    \tvptestmq %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0x56, 0x48, 0x27, 0xec, }, 6, 0, "", "",
+"62 f2 56 48 27 ec    \tvptestnmd %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0xd6, 0x48, 0x27, 0xec, }, 6, 0, "", "",
+"62 f2 d6 48 27 ec    \tvptestnmq %zmm4,%zmm5,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 28 d4       \tvpmuldq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x28, 0xf5, }, 6, 0, "", "",
+"62 f2 7e 48 28 f5    \tvpmovm2b %k5,%zmm6",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x28, 0xf5, }, 6, 0, "", "",
+"62 f2 fe 48 28 f5    \tvpmovm2w %k5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 29 d4       \tvpcmpeqq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x29, 0xee, }, 6, 0, "", "",
+"62 f2 7e 48 29 ee    \tvpmovb2m %zmm6,%k5",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x29, 0xee, }, 6, 0, "", "",
+"62 f2 fe 48 29 ee    \tvpmovw2m %zmm6,%k5",},
+{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 2a 21       \tvmovntdqa (%ecx),%ymm4",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x2a, 0xce, }, 6, 0, "", "",
+"62 f2 fe 48 2a ce    \tvpbroadcastmb2q %k6,%zmm1",},
+{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2c 31       \tvmaskmovps (%ecx),%ymm4,%ymm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x2c, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 2c f4    \tvscalefps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x2c, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 2c f4    \tvscalefpd %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2d 31       \tvmaskmovpd (%ecx),%ymm4,%ymm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 2d f4    \tvscalefss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 2d f4    \tvscalefsd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 30 e4       \tvpmovzxbw %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x30, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 30 ee    \tvpmovwb %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 31 f4       \tvpmovzxbd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x31, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 31 ee    \tvpmovdb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 32 e4       \tvpmovzxbq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x32, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 32 ee    \tvpmovqb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 33 e4       \tvpmovzxwd %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x33, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 33 ee    \tvpmovdw %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 34 f4       \tvpmovzxwq %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x34, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 34 ee    \tvpmovqw %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 35 e4       \tvpmovzxdq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x35, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 35 ee    \tvpmovqd %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 36 d4       \tvpermd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x4d, 0x2f, 0x36, 0xd4, }, 6, 0, "", "",
+"62 f2 4d 2f 36 d4    \tvpermd %ymm4,%ymm6,%ymm2{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x2f, 0x36, 0xd4, }, 6, 0, "", "",
+"62 f2 cd 2f 36 d4    \tvpermq %ymm4,%ymm6,%ymm2{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 38 d4       \tvpminsb %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x38, 0xf5, }, 6, 0, "", "",
+"62 f2 7e 48 38 f5    \tvpmovm2d %k5,%zmm6",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x38, 0xf5, }, 6, 0, "", "",
+"62 f2 fe 48 38 f5    \tvpmovm2q %k5,%zmm6",},
+{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "",
+"c4 e2 69 39 d9       \tvpminsd %xmm1,%xmm2,%xmm3",},
+{{0x62, 0xf2, 0x55, 0x48, 0x39, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 39 f4    \tvpminsd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x39, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 39 f4    \tvpminsq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x39, 0xee, }, 6, 0, "", "",
+"62 f2 7e 48 39 ee    \tvpmovd2m %zmm6,%k5",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x39, 0xee, }, 6, 0, "", "",
+"62 f2 fe 48 39 ee    \tvpmovq2m %zmm6,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3a d4       \tvpminuw %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x3a, 0xf6, }, 6, 0, "", "",
+"62 f2 7e 48 3a f6    \tvpbroadcastmw2d %k6,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3b d4       \tvpminud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3b, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3b f4    \tvpminud %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3b, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3b f4    \tvpminuq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3d d4       \tvpmaxsd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3d f4    \tvpmaxsd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3d f4    \tvpmaxsq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3f d4       \tvpmaxud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3f f4    \tvpmaxud %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3f f4    \tvpmaxuq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 40 d4       \tvpmulld %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x40, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 40 f4    \tvpmulld %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x40, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 40 f4    \tvpmullq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x42, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 42 f5    \tvgetexpps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x42, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 42 f5    \tvgetexppd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x43, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 43 f4    \tvgetexpss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xe5, 0x0f, 0x43, 0xe2, }, 6, 0, "", "",
+"62 f2 e5 0f 43 e2    \tvgetexpsd %xmm2,%xmm3,%xmm4{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x44, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 44 f5    \tvplzcntd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x44, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 44 f5    \tvplzcntq %zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 46 d4       \tvpsravd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x46, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 46 f4    \tvpsravd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x46, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 46 f4    \tvpsravq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x4c, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 4c f5    \tvrcp14ps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x4c, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 4c f5    \tvrcp14pd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 4d f4    \tvrcp14ss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 4d f4    \tvrcp14sd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x4e, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 4e f5    \tvrsqrt14ps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x4e, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 4e f5    \tvrsqrt14pd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 4f f4    \tvrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 4f f4    \tvrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "",
+"c4 e2 79 59 f4       \tvpbroadcastq %xmm4,%xmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x59, 0xf7, }, 6, 0, "", "",
+"62 f2 7d 48 59 f7    \tvbroadcasti32x2 %xmm7,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 5a 21       \tvbroadcasti128 (%ecx),%ymm4",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x5a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 5a 31    \tvbroadcasti32x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x5a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 5a 31    \tvbroadcasti64x2 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x5b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 5b 31    \tvbroadcasti32x8 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x5b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 5b 31    \tvbroadcasti64x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x64, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 64 f4    \tvpblendmd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x64, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 64 f4    \tvpblendmq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x65, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 65 f4    \tvblendmps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x65, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 65 f4    \tvblendmpd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x66, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 66 f4    \tvpblendmb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x66, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 66 f4    \tvpblendmw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x75, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 75 f4    \tvpermi2b %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x75, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 75 f4    \tvpermi2w %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x76, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 76 f4    \tvpermi2d %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x76, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 76 f4    \tvpermi2q %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x77, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 77 f4    \tvpermi2ps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x77, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 77 f4    \tvpermi2pd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7a, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7a d8    \tvpbroadcastb %eax,%xmm3",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7b, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7b d8    \tvpbroadcastw %eax,%xmm3",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7c, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7c d8    \tvpbroadcastd %eax,%xmm3",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7d f4    \tvpermt2b %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7d f4    \tvpermt2w %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7e, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7e f4    \tvpermt2d %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7e, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7e f4    \tvpermt2q %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7f f4    \tvpermt2ps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7f f4    \tvpermt2pd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x83, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 83 f4    \tvpmultishiftqb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x88, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 88 31    \tvexpandps (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x88, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 88 31    \tvexpandpd (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x89, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 89 31    \tvpexpandd (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x89, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 89 31    \tvpexpandq (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x8a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 8a 31    \tvcompressps %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x8a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 8a 31    \tvcompresspd %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x8b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 8b 31    \tvpcompressd %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x8b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 8b 31    \tvpcompressq %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0x55, 0x48, 0x8d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 8d f4    \tvpermb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x8d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 8d f4    \tvpermw %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "",
+"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%ebp,%xmm7,2),%xmm1",},
+{{0x62, 0xf2, 0x7d, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 90 b4 fd 7b 00 00 00 \tvpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 90 b4 fd 7b 00 00 00 \tvpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}",},
+{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0x62, 0xf2, 0x7d, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 91 b4 fd 7b 00 00 00 \tvpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 91 b4 fd 7b 00 00 00 \tvpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a0 b4 fd 7b 00 00 00 \tvpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a0 b4 fd 7b 00 00 00 \tvpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a1 b4 fd 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x29, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 29 a1 b4 fd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a2 b4 fd 7b 00 00 00 \tvscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a2 b4 fd 7b 00 00 00 \tvscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a3 b4 fd 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a3 b4 fd 7b 00 00 00 \tvscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0xb4, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 b4 f4    \tvpmadd52luq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0xb5, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 b5 f4    \tvpmadd52huq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xc4, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 c4 f5    \tvpconflictd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xc4, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 c4 f5    \tvpconflictq %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xc8, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 c8 fe    \tvexp2ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 c8 fe    \tvexp2pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xca, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 ca fe    \tvrcp28ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xca, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 ca fe    \tvrcp28pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x4d, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "",
+"62 f2 4d 0f cb fd    \tvrcp28ss %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "",
+"62 f2 cd 0f cb fd    \tvrcp28sd %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xcc, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 cc fe    \tvrsqrt28ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xcc, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 cc fe    \tvrsqrt28pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x4d, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "",
+"62 f2 4d 0f cd fd    \tvrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "",
+"62 f2 cd 0f cd fd    \tvrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 03 fd 12 \tvalignd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 03 fd 12 \tvalignq $0x12,%zmm5,%zmm6,%zmm7",},
+{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 08 d6 05    \tvroundps $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x08, 0xf5, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 08 f5 12 \tvrndscaleps $0x12,%zmm5,%zmm6",},
+{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 09 d6 05    \tvroundpd $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x09, 0xf5, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 09 f5 12 \tvrndscalepd $0x12,%zmm5,%zmm6",},
+{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0a d4 05    \tvroundss $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0xf3, 0x55, 0x0f, 0x0a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 0f 0a f4 12 \tvrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0b d4 05    \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0xf3, 0xd5, 0x0f, 0x0b, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 0f 0b f4 12 \tvrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 18 f4 05    \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0xf3, 0x55, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 4f 18 f4 12 \tvinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf3, 0xd5, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 4f 18 f4 12 \tvinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 19 e4 05    \tvextractf128 $0x5,%ymm4,%xmm4",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 19 ee 12 \tvextractf32x4 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 19 ee 12 \tvextractf64x2 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 4f 1a fd 12 \tvinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 4f 1a fd 12 \tvinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 1b f7 12 \tvextractf32x8 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 1b f7 12 \tvextractf64x4 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0x45, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 1e ee 12 \tvpcmpud $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 1e ee 12 \tvpcmpuq $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x45, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 1f ee 12 \tvpcmpd $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 1f ee 12 \tvpcmpq $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 23 fd 12 \tvshuff32x4 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 23 fd 12 \tvshuff64x2 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 25 fd 12 \tvpternlogd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 25 fd 12 \tvpternlogq $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 26 fe 12 \tvgetmantps $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 26 fe 12 \tvgetmantpd $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 0f 27 fd 12 \tvgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 0f 27 fd 12 \tvgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 38 f4 05    \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0xf3, 0x55, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 4f 38 f4 12 \tvinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf3, 0xd5, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 4f 38 f4 12 \tvinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 39 e6 05    \tvextracti128 $0x5,%ymm4,%xmm6",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 39 ee 12 \tvextracti32x4 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 39 ee 12 \tvextracti64x2 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 4f 3a fd 12 \tvinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 4f 3a fd 12 \tvinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 3b f7 12 \tvextracti32x8 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 3b f7 12 \tvextracti64x4 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0x45, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 3e ee 12 \tvpcmpub $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 3e ee 12 \tvpcmpuw $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x45, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 3f ee 12 \tvpcmpb $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 3f ee 12 \tvpcmpw $0x12,%zmm6,%zmm7,%k5",},
+{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 4d 42 d4 05    \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 43 fd 12 \tvshufi32x4 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 43 fd 12 \tvshufi64x2 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 50 fd 12 \tvrangeps $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 50 fd 12 \tvrangepd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 08 51 fd 12 \tvrangess $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0xcd, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 08 51 fd 12 \tvrangesd $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 54 fd 12 \tvfixupimmps $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 54 fd 12 \tvfixupimmpd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 0f 55 fd 12 \tvfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 0f 55 fd 12 \tvfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 56 fe 12 \tvreduceps $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 56 fe 12 \tvreducepd $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 08 57 fd 12 \tvreducess $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0xcd, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 08 57 fd 12 \tvreducesd $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 66 ef 12 \tvfpclassps $0x12,%zmm7,%k5",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 66 ef 12 \tvfpclasspd $0x12,%zmm7,%k5",},
+{{0x62, 0xf3, 0x7d, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 7d 08 67 ef 12 \tvfpclassss $0x12,%xmm7,%k5",},
+{{0x62, 0xf3, 0xfd, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 fd 08 67 ef 12 \tvfpclasssd $0x12,%xmm7,%k5",},
+{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "",
+"62 f1 4d 48 72 c5 12 \tvprord $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "",
+"62 f1 cd 48 72 c5 12 \tvprorq $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 f1 4d 48 72 cd 12 \tvprold $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 f1 cd 48 72 cd 12 \tvprolq $0x12,%zmm5,%zmm6",},
+{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "",
+"0f 72 e6 02          \tpsrad  $0x2,%mm6",},
+{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "",
+"c5 ed 72 e6 05       \tvpsrad $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x6d, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "",
+"62 f1 6d 48 72 e6 05 \tvpsrad $0x5,%zmm6,%zmm2",},
+{{0x62, 0xf1, 0xed, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "",
+"62 f1 ed 48 72 e6 05 \tvpsraq $0x5,%zmm6,%zmm2",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x4f, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 4f 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xd5, 0xcf, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 cf 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6{%k7}{z}",},
+{{0x62, 0xf1, 0xd5, 0x18, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 18 58 f4    \tvaddpd {rn-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 58 58 f4    \tvaddpd {ru-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x38, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 38 58 f4    \tvaddpd {rd-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x78, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 78 58 f4    \tvaddpd {rz-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x31, }, 6, 0, "", "",
+"62 f1 d5 48 58 31    \tvaddpd (%ecx),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xb4, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "",
+"62 f1 d5 48 58 b4 c8 23 01 00 00 \tvaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x31, }, 6, 0, "", "",
+"62 f1 d5 58 58 31    \tvaddpd (%ecx){1to8},%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 f1 d5 48 58 72 7f \tvaddpd 0x1fc0(%edx),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 f1 d5 58 58 72 7f \tvaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6",},
+{{0x62, 0xf1, 0x4c, 0x58, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "",
+"62 f1 4c 58 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5",},
+{{0x62, 0xf1, 0xe7, 0x0f, 0xc2, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "",
+"62 f1 e7 0f c2 ac c8 23 01 00 00 01 \tvcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}",},
+{{0x62, 0xf1, 0xd7, 0x1f, 0xc2, 0xec, 0x02, }, 7, 0, "", "",
+"62 f1 d7 1f c2 ec 02 \tvcmplesd {sae},%xmm4,%xmm5,%k5{%k7}",},
+{{0x62, 0xf3, 0x5d, 0x0f, 0x27, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "",
+"62 f3 5d 0f 27 ac c8 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}",},
 {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
 "f3 0f 1b 00          \tbndmk  (%eax),%bnd0",},
 {{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
@@ -309,19 +1319,19 @@
 {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",},
 {{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional",
-"f2 e8 fc ff ff ff    \tbnd call 3c3 <main+0x3c3>",},
+"f2 e8 fc ff ff ff    \tbnd call fce <main+0xfce>",},
 {{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
 "f2 ff 10             \tbnd call *(%eax)",},
 {{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
 "f2 c3                \tbnd ret ",},
 {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
-"f2 e9 fc ff ff ff    \tbnd jmp 3ce <main+0x3ce>",},
+"f2 e9 fc ff ff ff    \tbnd jmp fd9 <main+0xfd9>",},
 {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
-"f2 e9 fc ff ff ff    \tbnd jmp 3d4 <main+0x3d4>",},
+"f2 e9 fc ff ff ff    \tbnd jmp fdf <main+0xfdf>",},
 {{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect",
 "f2 ff 21             \tbnd jmp *(%ecx)",},
 {{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional",
-"f2 0f 85 fc ff ff ff \tbnd jne 3de <main+0x3de>",},
+"f2 0f 85 fc ff ff ff \tbnd jne fe9 <main+0xfe9>",},
 {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
 "0f 3a cc c1 00       \tsha1rnds4 $0x0,%xmm1,%xmm0",},
 {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 4fe7cce179c4..9c8c61e06d5a 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -6,6 +6,938 @@
 
 {{0x0f, 0x31, }, 2, 0, "", "",
 "0f 31                \trdtsc  ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x48, 0x0f, 0x41, 0xd8, }, 4, 0, "", "",
+"48 0f 41 d8          \tcmovno %rax,%rbx",},
+{{0x48, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"48 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%rcx",},
+{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%cx",},
+{{0x48, 0x0f, 0x44, 0xd8, }, 4, 0, "", "",
+"48 0f 44 d8          \tcmove  %rax,%rbx",},
+{{0x48, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"48 0f 44 88 78 56 34 12 \tcmove  0x12345678(%rax),%rcx",},
+{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 44 88 78 56 34 12 \tcmove  0x12345678(%rax),%cx",},
+{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 90 80 78 56 34 12 \tseto   0x12345678(%rax)",},
+{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 91 80 78 56 34 12 \tsetno  0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 98 80 78 56 34 12 \tsets   0x12345678(%rax)",},
+{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 99 80 78 56 34 12 \tsetns  0x12345678(%rax)",},
+{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cc 41 ef          \tkandw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 41 ef       \tkandq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cd 41 ef          \tkandb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 41 ef       \tkandd  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cc 42 ef          \tkandnw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 42 ef       \tkandnq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cd 42 ef          \tkandnb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 42 ef       \tkandnd %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f8 44 f7          \tknotw  %k7,%k6",},
+{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f8 44 f7       \tknotq  %k7,%k6",},
+{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f9 44 f7          \tknotb  %k7,%k6",},
+{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f9 44 f7       \tknotd  %k7,%k6",},
+{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cc 45 ef          \tkorw   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 45 ef       \tkorq   %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cd 45 ef          \tkorb   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 45 ef       \tkord   %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cc 46 ef          \tkxnorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 46 ef       \tkxnorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cd 46 ef          \tkxnorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 46 ef       \tkxnord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cc 47 ef          \tkxorw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 47 ef       \tkxorq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cd 47 ef          \tkxorb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 47 ef       \tkxord  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cc 4a ef          \tkaddw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4a ef       \tkaddq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cd 4a ef          \tkaddb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 4a ef       \tkaddd  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cd 4b ef          \tkunpckbw %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cc 4b ef          \tkunpckwd %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4b ef       \tkunpckdq %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f8 90 ee          \tkmovw  %k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f8 90 29          \tkmovw  (%rcx),%k5",},
+{{0xc4, 0xa1, 0x78, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 78 90 ac f0 23 01 00 00 \tkmovw  0x123(%rax,%r14,8),%k5",},
+{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f8 91 29          \tkmovw  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0x78, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 78 91 ac f0 23 01 00 00 \tkmovw  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f8 92 e8          \tkmovw  %eax,%k5",},
+{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f8 92 ed          \tkmovw  %ebp,%k5",},
+{{0xc4, 0xc1, 0x78, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 78 92 ed       \tkmovw  %r13d,%k5",},
+{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f8 93 c5          \tkmovw  %k5,%eax",},
+{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f8 93 ed          \tkmovw  %k5,%ebp",},
+{{0xc5, 0x78, 0x93, 0xed, }, 4, 0, "", "",
+"c5 78 93 ed          \tkmovw  %k5,%r13d",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 90 ee       \tkmovq  %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 90 29       \tkmovq  (%rcx),%k5",},
+{{0xc4, 0xa1, 0xf8, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f8 90 ac f0 23 01 00 00 \tkmovq  0x123(%rax,%r14,8),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 91 29       \tkmovq  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0xf8, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f8 91 ac f0 23 01 00 00 \tkmovq  %k5,0x123(%rax,%r14,8)",},
+{{0xc4, 0xe1, 0xfb, 0x92, 0xe8, }, 5, 0, "", "",
+"c4 e1 fb 92 e8       \tkmovq  %rax,%k5",},
+{{0xc4, 0xe1, 0xfb, 0x92, 0xed, }, 5, 0, "", "",
+"c4 e1 fb 92 ed       \tkmovq  %rbp,%k5",},
+{{0xc4, 0xc1, 0xfb, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 fb 92 ed       \tkmovq  %r13,%k5",},
+{{0xc4, 0xe1, 0xfb, 0x93, 0xc5, }, 5, 0, "", "",
+"c4 e1 fb 93 c5       \tkmovq  %k5,%rax",},
+{{0xc4, 0xe1, 0xfb, 0x93, 0xed, }, 5, 0, "", "",
+"c4 e1 fb 93 ed       \tkmovq  %k5,%rbp",},
+{{0xc4, 0x61, 0xfb, 0x93, 0xed, }, 5, 0, "", "",
+"c4 61 fb 93 ed       \tkmovq  %k5,%r13",},
+{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f9 90 ee          \tkmovb  %k6,%k5",},
+{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f9 90 29          \tkmovb  (%rcx),%k5",},
+{{0xc4, 0xa1, 0x79, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 79 90 ac f0 23 01 00 00 \tkmovb  0x123(%rax,%r14,8),%k5",},
+{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f9 91 29          \tkmovb  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0x79, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 79 91 ac f0 23 01 00 00 \tkmovb  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f9 92 e8          \tkmovb  %eax,%k5",},
+{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f9 92 ed          \tkmovb  %ebp,%k5",},
+{{0xc4, 0xc1, 0x79, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 79 92 ed       \tkmovb  %r13d,%k5",},
+{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f9 93 c5          \tkmovb  %k5,%eax",},
+{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f9 93 ed          \tkmovb  %k5,%ebp",},
+{{0xc5, 0x79, 0x93, 0xed, }, 4, 0, "", "",
+"c5 79 93 ed          \tkmovb  %k5,%r13d",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 90 ee       \tkmovd  %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 90 29       \tkmovd  (%rcx),%k5",},
+{{0xc4, 0xa1, 0xf9, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f9 90 ac f0 23 01 00 00 \tkmovd  0x123(%rax,%r14,8),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 91 29       \tkmovd  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0xf9, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f9 91 ac f0 23 01 00 00 \tkmovd  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 fb 92 e8          \tkmovd  %eax,%k5",},
+{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "",
+"c5 fb 92 ed          \tkmovd  %ebp,%k5",},
+{{0xc4, 0xc1, 0x7b, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 7b 92 ed       \tkmovd  %r13d,%k5",},
+{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 fb 93 c5          \tkmovd  %k5,%eax",},
+{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "",
+"c5 fb 93 ed          \tkmovd  %k5,%ebp",},
+{{0xc5, 0x7b, 0x93, 0xed, }, 4, 0, "", "",
+"c5 7b 93 ed          \tkmovd  %k5,%r13d",},
+{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f8 98 ee          \tkortestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 98 ee       \tkortestq %k6,%k5",},
+{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f9 98 ee          \tkortestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 98 ee       \tkortestd %k6,%k5",},
+{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f8 99 ee          \tktestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 99 ee       \tktestq %k6,%k5",},
+{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f9 99 ee          \tktestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 99 ee       \tktestd %k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 30 ee 12    \tkshiftrw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 31 ee 5b    \tkshiftrq $0x5b,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 32 ee 12    \tkshiftlw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 33 ee 5b    \tkshiftlq $0x5b,%k6,%k5",},
+{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f8 5b f5          \tvcvtdq2ps %xmm5,%xmm6",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 5b f5    \tvcvtqq2ps %zmm29,%ymm6{%k7}",},
+{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f9 5b f5          \tvcvtps2dq %xmm5,%xmm6",},
+{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 fa 5b f5          \tvcvttps2dq %xmm5,%xmm6",},
+{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "",
+"0f 6f e0             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fd 6f f4          \tvmovdqa %ymm4,%ymm6",},
+{{0x62, 0x01, 0x7d, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 7d 48 6f d1    \tvmovdqa32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfd, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 fd 48 6f d1    \tvmovdqa64 %zmm25,%zmm26",},
+{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fe 6f f4          \tvmovdqu %ymm4,%ymm6",},
+{{0x62, 0x01, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 01 7e 48 6f f5    \tvmovdqu32 %zmm29,%zmm30",},
+{{0x62, 0x01, 0xfe, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 6f d1    \tvmovdqu64 %zmm25,%zmm26",},
+{{0x62, 0x01, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 01 7f 48 6f f5    \tvmovdqu8 %zmm29,%zmm30",},
+{{0x62, 0x01, 0xff, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 ff 48 6f d1    \tvmovdqu16 %zmm25,%zmm26",},
+{{0x0f, 0x78, 0xc3, }, 3, 0, "", "",
+"0f 78 c3             \tvmread %rax,%rbx",},
+{{0x62, 0x01, 0x7c, 0x48, 0x78, 0xd1, }, 6, 0, "", "",
+"62 01 7c 48 78 d1    \tvcvttps2udq %zmm25,%zmm26",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 78 f5    \tvcvttpd2udq %zmm29,%ymm6{%k7}",},
+{{0x62, 0xf1, 0xff, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 ff 08 78 c6    \tvcvttsd2usi %xmm6,%rax",},
+{{0x62, 0xf1, 0xfe, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 fe 08 78 c6    \tvcvttss2usi %xmm6,%rax",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x78, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 78 d5    \tvcvttps2uqq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 78 f5    \tvcvttpd2uqq %zmm29,%zmm30",},
+{{0x0f, 0x79, 0xd8, }, 3, 0, "", "",
+"0f 79 d8             \tvmwrite %rax,%rbx",},
+{{0x62, 0x01, 0x7c, 0x48, 0x79, 0xd1, }, 6, 0, "", "",
+"62 01 7c 48 79 d1    \tvcvtps2udq %zmm25,%zmm26",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 79 f5    \tvcvtpd2udq %zmm29,%ymm6{%k7}",},
+{{0x62, 0xf1, 0xff, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 ff 08 79 c6    \tvcvtsd2usi %xmm6,%rax",},
+{{0x62, 0xf1, 0xfe, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 fe 08 79 c6    \tvcvtss2usi %xmm6,%rax",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x79, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 79 d5    \tvcvtps2uqq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 79 f5    \tvcvtpd2uqq %zmm29,%zmm30",},
+{{0x62, 0x61, 0x7e, 0x4f, 0x7a, 0xed, }, 6, 0, "", "",
+"62 61 7e 4f 7a ed    \tvcvtudq2pd %ymm5,%zmm29{%k7}",},
+{{0x62, 0x01, 0xfe, 0x48, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 7a d1    \tvcvtuqq2pd %zmm25,%zmm26",},
+{{0x62, 0x01, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 01 7f 48 7a f5    \tvcvtudq2ps %zmm29,%zmm30",},
+{{0x62, 0x01, 0xff, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 ff 4f 7a d1    \tvcvtuqq2ps %zmm25,%ymm26{%k7}",},
+{{0x62, 0x01, 0x7d, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 7d 4f 7a d1    \tvcvttps2qq %ymm25,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 7a f5    \tvcvttpd2qq %zmm29,%zmm30",},
+{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 57 08 7b f0    \tvcvtusi2sd %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 56 08 7b f0    \tvcvtusi2ss %eax,%xmm5,%xmm6",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x7b, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 7b d5    \tvcvtps2qq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 7b f5    \tvcvtpd2qq %zmm29,%zmm30",},
+{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "",
+"0f 7f c4             \tmovq   %mm0,%mm4",},
+{{0xc5, 0x7d, 0x7f, 0xc6, }, 4, 0, "", "",
+"c5 7d 7f c6          \tvmovdqa %ymm8,%ymm6",},
+{{0x62, 0x01, 0x7d, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 7d 48 7f ca    \tvmovdqa32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 fd 48 7f ca    \tvmovdqa64 %zmm25,%zmm26",},
+{{0xc5, 0x7e, 0x7f, 0xc6, }, 4, 0, "", "",
+"c5 7e 7f c6          \tvmovdqu %ymm8,%ymm6",},
+{{0x62, 0x01, 0x7e, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 7e 48 7f ca    \tvmovdqu32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfe, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 fe 48 7f ca    \tvmovdqu64 %zmm25,%zmm26",},
+{{0x62, 0x61, 0x7f, 0x48, 0x7f, 0x31, }, 6, 0, "", "",
+"62 61 7f 48 7f 31    \tvmovdqu8 %zmm30,(%rcx)",},
+{{0x62, 0x01, 0xff, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 ff 48 7f ca    \tvmovdqu16 %zmm25,%zmm26",},
+{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "",
+"0f db d1             \tpand   %mm1,%mm2",},
+{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "",
+"66 0f db d1          \tpand   %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "",
+"c5 cd db d4          \tvpand  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xdb, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 db d0    \tvpandd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xdb, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 db d0    \tvpandq %zmm24,%zmm25,%zmm26",},
+{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "",
+"0f df d1             \tpandn  %mm1,%mm2",},
+{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "",
+"66 0f df d1          \tpandn  %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "",
+"c5 cd df d4          \tvpandn %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xdf, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 df d0    \tvpandnd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xdf, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 df d0    \tvpandnq %zmm24,%zmm25,%zmm26",},
+{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 f9 e6 d1          \tvcvttpd2dq %xmm1,%xmm2",},
+{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "",
+"c5 fa e6 f5          \tvcvtdq2pd %xmm5,%xmm6",},
+{{0x62, 0x61, 0x7e, 0x4f, 0xe6, 0xd5, }, 6, 0, "", "",
+"62 61 7e 4f e6 d5    \tvcvtdq2pd %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfe, 0x48, 0xe6, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 e6 d1    \tvcvtqq2pd %zmm25,%zmm26",},
+{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 fb e6 d1          \tvcvtpd2dq %xmm1,%xmm2",},
+{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "",
+"0f eb f4             \tpor    %mm4,%mm6",},
+{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "",
+"c5 cd eb d4          \tvpor   %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xeb, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 eb d0    \tvpord  %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xeb, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 eb d0    \tvporq  %zmm24,%zmm25,%zmm26",},
+{{0x0f, 0xef, 0xf4, }, 3, 0, "", "",
+"0f ef f4             \tpxor   %mm4,%mm6",},
+{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "",
+"c5 cd ef d4          \tvpxor  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xef, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 ef d0    \tvpxord %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xef, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 ef d0    \tvpxorq %zmm24,%zmm25,%zmm26",},
+{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "",
+"66 0f 38 10 c1       \tpblendvb %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x02, 0x9d, 0x40, 0x10, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 10 eb    \tvpsrlvw %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x10, 0xe6, }, 6, 0, "", "",
+"62 62 7e 4f 10 e6    \tvpmovuswb %zmm28,%ymm6{%k7}",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x11, 0xe6, }, 6, 0, "", "",
+"62 62 7e 4f 11 e6    \tvpmovusdb %zmm28,%xmm6{%k7}",},
+{{0x62, 0x02, 0x9d, 0x40, 0x11, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 11 eb    \tvpsravw %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x12, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 12 de    \tvpmovusqb %zmm27,%xmm6{%k7}",},
+{{0x62, 0x02, 0x9d, 0x40, 0x12, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 12 eb    \tvpsllvw %zmm27,%zmm28,%zmm29",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0x62, 0x7d, 0x4f, 0x13, 0xdd, }, 6, 0, "", "",
+"62 62 7d 4f 13 dd    \tvcvtph2ps %ymm5,%zmm27{%k7}",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x13, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 13 de    \tvpmovusdw %zmm27,%ymm6{%k7}",},
+{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "",
+"66 0f 38 14 c1       \tblendvps %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x14, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 14 de    \tvpmovusqw %zmm27,%xmm6{%k7}",},
+{{0x62, 0x02, 0x1d, 0x40, 0x14, 0xeb, }, 6, 0, "", "",
+"62 02 1d 40 14 eb    \tvprorvd %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x02, 0x9d, 0x40, 0x14, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 14 eb    \tvprorvq %zmm27,%zmm28,%zmm29",},
+{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "",
+"66 0f 38 15 c1       \tblendvpd %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x15, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 15 de    \tvpmovusqd %zmm27,%ymm6{%k7}",},
+{{0x62, 0x02, 0x1d, 0x40, 0x15, 0xeb, }, 6, 0, "", "",
+"62 02 1d 40 15 eb    \tvprolvd %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x02, 0x9d, 0x40, 0x15, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 15 eb    \tvprolvq %zmm27,%zmm28,%zmm29",},
+{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 16 d4       \tvpermps %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x82, 0x2d, 0x27, 0x16, 0xf0, }, 6, 0, "", "",
+"62 82 2d 27 16 f0    \tvpermps %ymm24,%ymm26,%ymm22{%k7}",},
+{{0x62, 0x82, 0xad, 0x27, 0x16, 0xf0, }, 6, 0, "", "",
+"62 82 ad 27 16 f0    \tvpermpd %ymm24,%ymm26,%ymm22{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 19 f4       \tvbroadcastsd %xmm4,%ymm6",},
+{{0x62, 0x02, 0x7d, 0x48, 0x19, 0xd3, }, 6, 0, "", "",
+"62 02 7d 48 19 d3    \tvbroadcastf32x2 %xmm27,%zmm26",},
+{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 1a 21       \tvbroadcastf128 (%rcx),%ymm4",},
+{{0x62, 0x62, 0x7d, 0x48, 0x1a, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 1a 11    \tvbroadcastf32x4 (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x1a, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 1a 11    \tvbroadcastf64x2 (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x1b, 0x19, }, 6, 0, "", "",
+"62 62 7d 48 1b 19    \tvbroadcastf32x8 (%rcx),%zmm27",},
+{{0x62, 0x62, 0xfd, 0x48, 0x1b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 1b 11    \tvbroadcastf64x4 (%rcx),%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x1f, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 1f e3    \tvpabsq %zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "",
+"c4 e2 79 20 ec       \tvpmovsxbw %xmm4,%xmm5",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x20, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 20 de    \tvpmovswb %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 21 f4       \tvpmovsxbd %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x21, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 21 de    \tvpmovsdb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 22 e4       \tvpmovsxbq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x22, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 22 de    \tvpmovsqb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 23 e4       \tvpmovsxwd %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x23, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 23 de    \tvpmovsdw %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 24 f4       \tvpmovsxwq %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x24, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 24 de    \tvpmovsqw %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 25 e4       \tvpmovsxdq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x25, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 25 de    \tvpmovsqd %zmm27,%ymm6{%k7}",},
+{{0x62, 0x92, 0x1d, 0x40, 0x26, 0xeb, }, 6, 0, "", "",
+"62 92 1d 40 26 eb    \tvptestmb %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x9d, 0x40, 0x26, 0xeb, }, 6, 0, "", "",
+"62 92 9d 40 26 eb    \tvptestmw %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x26, 0x40, 0x26, 0xea, }, 6, 0, "", "",
+"62 92 26 40 26 ea    \tvptestnmb %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0xa6, 0x40, 0x26, 0xea, }, 6, 0, "", "",
+"62 92 a6 40 26 ea    \tvptestnmw %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0x1d, 0x40, 0x27, 0xeb, }, 6, 0, "", "",
+"62 92 1d 40 27 eb    \tvptestmd %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x9d, 0x40, 0x27, 0xeb, }, 6, 0, "", "",
+"62 92 9d 40 27 eb    \tvptestmq %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x26, 0x40, 0x27, 0xea, }, 6, 0, "", "",
+"62 92 26 40 27 ea    \tvptestnmd %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0xa6, 0x40, 0x27, 0xea, }, 6, 0, "", "",
+"62 92 a6 40 27 ea    \tvptestnmq %zmm26,%zmm27,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 28 d4       \tvpmuldq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x28, 0xe5, }, 6, 0, "", "",
+"62 62 7e 48 28 e5    \tvpmovm2b %k5,%zmm28",},
+{{0x62, 0x62, 0xfe, 0x48, 0x28, 0xe5, }, 6, 0, "", "",
+"62 62 fe 48 28 e5    \tvpmovm2w %k5,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 29 d4       \tvpcmpeqq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x92, 0x7e, 0x48, 0x29, 0xec, }, 6, 0, "", "",
+"62 92 7e 48 29 ec    \tvpmovb2m %zmm28,%k5",},
+{{0x62, 0x92, 0xfe, 0x48, 0x29, 0xec, }, 6, 0, "", "",
+"62 92 fe 48 29 ec    \tvpmovw2m %zmm28,%k5",},
+{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 2a 21       \tvmovntdqa (%rcx),%ymm4",},
+{{0x62, 0x62, 0xfe, 0x48, 0x2a, 0xf6, }, 6, 0, "", "",
+"62 62 fe 48 2a f6    \tvpbroadcastmb2q %k6,%zmm30",},
+{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2c 31       \tvmaskmovps (%rcx),%ymm4,%ymm6",},
+{{0x62, 0x02, 0x35, 0x40, 0x2c, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 2c d0    \tvscalefps %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x2c, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 2c d0    \tvscalefpd %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2d 31       \tvmaskmovpd (%rcx),%ymm4,%ymm6",},
+{{0x62, 0x02, 0x35, 0x07, 0x2d, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 2d d0    \tvscalefss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x2d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 2d d0    \tvscalefsd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 30 e4       \tvpmovzxbw %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x30, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 30 de    \tvpmovwb %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 31 f4       \tvpmovzxbd %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x31, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 31 de    \tvpmovdb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 32 e4       \tvpmovzxbq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x32, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 32 de    \tvpmovqb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 33 e4       \tvpmovzxwd %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x33, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 33 de    \tvpmovdw %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 34 f4       \tvpmovzxwq %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x34, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 34 de    \tvpmovqw %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 35 e4       \tvpmovzxdq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x35, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 35 de    \tvpmovqd %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 36 d4       \tvpermd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x82, 0x2d, 0x27, 0x36, 0xf0, }, 6, 0, "", "",
+"62 82 2d 27 36 f0    \tvpermd %ymm24,%ymm26,%ymm22{%k7}",},
+{{0x62, 0x82, 0xad, 0x27, 0x36, 0xf0, }, 6, 0, "", "",
+"62 82 ad 27 36 f0    \tvpermq %ymm24,%ymm26,%ymm22{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 38 d4       \tvpminsb %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x38, 0xe5, }, 6, 0, "", "",
+"62 62 7e 48 38 e5    \tvpmovm2d %k5,%zmm28",},
+{{0x62, 0x62, 0xfe, 0x48, 0x38, 0xe5, }, 6, 0, "", "",
+"62 62 fe 48 38 e5    \tvpmovm2q %k5,%zmm28",},
+{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "",
+"c4 e2 69 39 d9       \tvpminsd %xmm1,%xmm2,%xmm3",},
+{{0x62, 0x02, 0x35, 0x40, 0x39, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 39 d0    \tvpminsd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x39, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 39 d0    \tvpminsq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x92, 0x7e, 0x48, 0x39, 0xec, }, 6, 0, "", "",
+"62 92 7e 48 39 ec    \tvpmovd2m %zmm28,%k5",},
+{{0x62, 0x92, 0xfe, 0x48, 0x39, 0xec, }, 6, 0, "", "",
+"62 92 fe 48 39 ec    \tvpmovq2m %zmm28,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3a d4       \tvpminuw %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x3a, 0xe6, }, 6, 0, "", "",
+"62 62 7e 48 3a e6    \tvpbroadcastmw2d %k6,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3b d4       \tvpminud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3b, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3b d0    \tvpminud %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3b, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3b d0    \tvpminuq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3d d4       \tvpmaxsd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3d, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3d d0    \tvpmaxsd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3d d0    \tvpmaxsq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3f d4       \tvpmaxud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3f, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3f d0    \tvpmaxud %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3f, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3f d0    \tvpmaxuq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 40 d4       \tvpmulld %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x40, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 40 d0    \tvpmulld %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x40, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 40 d0    \tvpmullq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0x7d, 0x48, 0x42, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 42 d1    \tvgetexpps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x42, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 42 e3    \tvgetexppd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x43, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 43 d0    \tvgetexpss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0x95, 0x07, 0x43, 0xf4, }, 6, 0, "", "",
+"62 02 95 07 43 f4    \tvgetexpsd %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0x44, 0xe3, }, 6, 0, "", "",
+"62 02 7d 48 44 e3    \tvplzcntd %zmm27,%zmm28",},
+{{0x62, 0x02, 0xfd, 0x48, 0x44, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 44 e3    \tvplzcntq %zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 46 d4       \tvpsravd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x46, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 46 d0    \tvpsravd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x46, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 46 d0    \tvpsravq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0x7d, 0x48, 0x4c, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 4c d1    \tvrcp14ps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x4c, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 4c e3    \tvrcp14pd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x4d, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 4d d0    \tvrcp14ss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x4d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 4d d0    \tvrcp14sd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0x4e, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 4e d1    \tvrsqrt14ps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x4e, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 4e e3    \tvrsqrt14pd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x4f, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 4f d0    \tvrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x4f, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 4f d0    \tvrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "",
+"c4 e2 79 59 f4       \tvpbroadcastq %xmm4,%xmm6",},
+{{0x62, 0x02, 0x7d, 0x48, 0x59, 0xd3, }, 6, 0, "", "",
+"62 02 7d 48 59 d3    \tvbroadcasti32x2 %xmm27,%zmm26",},
+{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 5a 21       \tvbroadcasti128 (%rcx),%ymm4",},
+{{0x62, 0x62, 0x7d, 0x48, 0x5a, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 5a 11    \tvbroadcasti32x4 (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x5a, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 5a 11    \tvbroadcasti64x2 (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x5b, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 5b 21    \tvbroadcasti32x8 (%rcx),%zmm28",},
+{{0x62, 0x62, 0xfd, 0x48, 0x5b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 5b 11    \tvbroadcasti64x4 (%rcx),%zmm26",},
+{{0x62, 0x02, 0x25, 0x40, 0x64, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 64 e2    \tvpblendmd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x64, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 64 e2    \tvpblendmq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x40, 0x65, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 65 d0    \tvblendmps %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xa5, 0x40, 0x65, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 65 e2    \tvblendmpd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x66, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 66 e2    \tvpblendmb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x66, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 66 e2    \tvpblendmw %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x40, 0x75, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 75 d0    \tvpermi2b %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xa5, 0x40, 0x75, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 75 e2    \tvpermi2w %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x76, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 76 e2    \tvpermi2d %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x76, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 76 e2    \tvpermi2q %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x77, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 77 e2    \tvpermi2ps %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x77, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 77 e2    \tvpermi2pd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7a, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7a f0    \tvpbroadcastb %eax,%xmm30",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7b f0    \tvpbroadcastw %eax,%xmm30",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7c, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7c f0    \tvpbroadcastd %eax,%xmm30",},
+{{0x62, 0x62, 0xfd, 0x48, 0x7c, 0xf0, }, 6, 0, "", "",
+"62 62 fd 48 7c f0    \tvpbroadcastq %rax,%zmm30",},
+{{0x62, 0x02, 0x25, 0x40, 0x7d, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7d e2    \tvpermt2b %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7d, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7d e2    \tvpermt2w %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x7e, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7e e2    \tvpermt2d %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7e, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7e e2    \tvpermt2q %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x7f, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7f e2    \tvpermt2ps %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7f, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7f e2    \tvpermt2pd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x83, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 83 e2    \tvpmultishiftqb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x62, 0x7d, 0x48, 0x88, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 88 11    \tvexpandps (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x88, 0x21, }, 6, 0, "", "",
+"62 62 fd 48 88 21    \tvexpandpd (%rcx),%zmm28",},
+{{0x62, 0x62, 0x7d, 0x48, 0x89, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 89 21    \tvpexpandd (%rcx),%zmm28",},
+{{0x62, 0x62, 0xfd, 0x48, 0x89, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 89 11    \tvpexpandq (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x8a, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 8a 21    \tvcompressps %zmm28,(%rcx)",},
+{{0x62, 0x62, 0xfd, 0x48, 0x8a, 0x21, }, 6, 0, "", "",
+"62 62 fd 48 8a 21    \tvcompresspd %zmm28,(%rcx)",},
+{{0x62, 0x62, 0x7d, 0x48, 0x8b, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 8b 21    \tvpcompressd %zmm28,(%rcx)",},
+{{0x62, 0x62, 0xfd, 0x48, 0x8b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 8b 11    \tvpcompressq %zmm26,(%rcx)",},
+{{0x62, 0x02, 0x25, 0x40, 0x8d, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 8d e2    \tvpermb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x8d, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 8d e2    \tvpermw %zmm26,%zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "",
+"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%rbp,%xmm7,2),%xmm1",},
+{{0x62, 0x22, 0x7d, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 90 94 dd 7b 00 00 00 \tvpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 90 94 dd 7b 00 00 00 \tvpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}",},
+{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0x62, 0x22, 0x7d, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 91 94 dd 7b 00 00 00 \tvpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 91 94 dd 7b 00 00 00 \tvpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",},
+{{0x62, 0x22, 0x7d, 0x41, 0xa0, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 a0 a4 ed 7b 00 00 00 \tvpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa0, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a0 94 dd 7b 00 00 00 \tvpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0xb2, 0x7d, 0x41, 0xa1, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 7d 41 a1 b4 ed 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0xb2, 0xfd, 0x21, 0xa1, 0xb4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 fd 21 a1 b4 dd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0x22, 0x7d, 0x41, 0xa2, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 a2 a4 ed 7b 00 00 00 \tvscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa2, 0xa4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a2 a4 dd 7b 00 00 00 \tvscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0xb2, 0x7d, 0x41, 0xa3, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 7d 41 a3 b4 ed 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa3, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a3 a4 ed 7b 00 00 00 \tvscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x02, 0xa5, 0x40, 0xb4, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 b4 e2    \tvpmadd52luq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0xb5, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 b5 e2    \tvpmadd52huq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x7d, 0x48, 0xc4, 0xda, }, 6, 0, "", "",
+"62 02 7d 48 c4 da    \tvpconflictd %zmm26,%zmm27",},
+{{0x62, 0x02, 0xfd, 0x48, 0xc4, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 c4 da    \tvpconflictq %zmm26,%zmm27",},
+{{0x62, 0x02, 0x7d, 0x48, 0xc8, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 c8 f5    \tvexp2ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xc8, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 c8 da    \tvexp2pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x7d, 0x48, 0xca, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 ca f5    \tvrcp28ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xca, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 ca da    \tvrcp28pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x15, 0x07, 0xcb, 0xf4, }, 6, 0, "", "",
+"62 02 15 07 cb f4    \tvrcp28ss %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0xad, 0x07, 0xcb, 0xd9, }, 6, 0, "", "",
+"62 02 ad 07 cb d9    \tvrcp28sd %xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0xcc, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 cc f5    \tvrsqrt28ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xcc, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 cc da    \tvrsqrt28pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x15, 0x07, 0xcd, 0xf4, }, 6, 0, "", "",
+"62 02 15 07 cd f4    \tvrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0xad, 0x07, 0xcd, 0xd9, }, 6, 0, "", "",
+"62 02 ad 07 cd d9    \tvrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x15, 0x40, 0x03, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 03 f4 12 \tvalignd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x03, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 03 d9 12 \tvalignq $0x12,%zmm25,%zmm26,%zmm27",},
+{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 08 d6 05    \tvroundps $0x5,%ymm6,%ymm2",},
+{{0x62, 0x03, 0x7d, 0x48, 0x08, 0xd1, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 08 d1 12 \tvrndscaleps $0x12,%zmm25,%zmm26",},
+{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 09 d6 05    \tvroundpd $0x5,%ymm6,%ymm2",},
+{{0x62, 0x03, 0xfd, 0x48, 0x09, 0xd1, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 09 d1 12 \tvrndscalepd $0x12,%zmm25,%zmm26",},
+{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0a d4 05    \tvroundss $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0x03, 0x35, 0x07, 0x0a, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 07 0a d0 12 \tvrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0b d4 05    \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0x03, 0xb5, 0x07, 0x0b, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 07 0b d0 12 \tvrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 18 f4 05    \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0x03, 0x35, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 47 18 d0 12 \tvinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0x62, 0x03, 0xb5, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 47 18 d0 12 \tvinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 19 e4 05    \tvextractf128 $0x5,%ymm4,%xmm4",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 19 ca 12 \tvextractf32x4 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 19 ca 12 \tvextractf64x2 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0x2d, 0x47, 0x1a, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 47 1a d9 12 \tvinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}",},
+{{0x62, 0x03, 0x95, 0x47, 0x1a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 47 1a f4 12 \tvinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x1b, 0xee, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 1b ee 12 \tvextractf32x8 $0x12,%zmm29,%ymm30{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x1b, 0xd3, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 1b d3 12 \tvextractf64x4 $0x12,%zmm26,%ymm27{%k7}",},
+{{0x62, 0x93, 0x0d, 0x40, 0x1e, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 1e ed 12 \tvpcmpud $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x1e, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 1e ea 12 \tvpcmpuq $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x93, 0x0d, 0x40, 0x1f, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 1f ed 12 \tvpcmpd $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x1f, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 1f ea 12 \tvpcmpq $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x03, 0x15, 0x40, 0x23, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 23 f4 12 \tvshuff32x4 $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x23, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 23 d9 12 \tvshuff64x2 $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x15, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 25 f4 12 \tvpternlogd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x95, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 25 f4 12 \tvpternlogq $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x7d, 0x48, 0x26, 0xda, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 26 da 12 \tvgetmantps $0x12,%zmm26,%zmm27",},
+{{0x62, 0x03, 0xfd, 0x48, 0x26, 0xf5, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 26 f5 12 \tvgetmantpd $0x12,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x07, 0x27, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 07 27 d9 12 \tvgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x95, 0x07, 0x27, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 07 27 f4 12 \tvgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 38 f4 05    \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0x03, 0x35, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 47 38 d0 12 \tvinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0x62, 0x03, 0xb5, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 47 38 d0 12 \tvinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 39 e6 05    \tvextracti128 $0x5,%ymm4,%xmm6",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 39 ca 12 \tvextracti32x4 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 39 ca 12 \tvextracti64x2 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0x15, 0x47, 0x3a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 47 3a f4 12 \tvinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x03, 0xad, 0x47, 0x3a, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 47 3a d9 12 \tvinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x3b, 0xee, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 3b ee 12 \tvextracti32x8 $0x12,%zmm29,%ymm30{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x3b, 0xd3, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 3b d3 12 \tvextracti64x4 $0x12,%zmm26,%ymm27{%k7}",},
+{{0x62, 0x93, 0x0d, 0x40, 0x3e, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 3e ed 12 \tvpcmpub $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x3e, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 3e ea 12 \tvpcmpuw $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x93, 0x0d, 0x40, 0x3f, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 3f ed 12 \tvpcmpb $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x3f, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 3f ea 12 \tvpcmpw $0x12,%zmm26,%zmm27,%k5",},
+{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 4d 42 d4 05    \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",},
+{{0x62, 0x03, 0x2d, 0x40, 0x43, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 40 43 d9 12 \tvshufi32x4 $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x95, 0x40, 0x43, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 43 f4 12 \tvshufi64x2 $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x40, 0x50, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 40 50 d9 12 \tvrangeps $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x95, 0x40, 0x50, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 50 f4 12 \tvrangepd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x00, 0x51, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 00 51 d9 12 \tvrangess $0x12,%xmm25,%xmm26,%xmm27",},
+{{0x62, 0x03, 0x95, 0x00, 0x51, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 00 51 f4 12 \tvrangesd $0x12,%xmm28,%xmm29,%xmm30",},
+{{0x62, 0x03, 0x15, 0x40, 0x54, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 54 f4 12 \tvfixupimmps $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x54, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 54 d9 12 \tvfixupimmpd $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x15, 0x07, 0x55, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 07 55 f4 12 \tvfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x03, 0xad, 0x07, 0x55, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 07 55 d9 12 \tvfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x7d, 0x48, 0x56, 0xda, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 56 da 12 \tvreduceps $0x12,%zmm26,%zmm27",},
+{{0x62, 0x03, 0xfd, 0x48, 0x56, 0xf5, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 56 f5 12 \tvreducepd $0x12,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x00, 0x57, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 00 57 d9 12 \tvreducess $0x12,%xmm25,%xmm26,%xmm27",},
+{{0x62, 0x03, 0x95, 0x00, 0x57, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 00 57 f4 12 \tvreducesd $0x12,%xmm28,%xmm29,%xmm30",},
+{{0x62, 0x93, 0x7d, 0x48, 0x66, 0xeb, 0x12, }, 7, 0, "", "",
+"62 93 7d 48 66 eb 12 \tvfpclassps $0x12,%zmm27,%k5",},
+{{0x62, 0x93, 0xfd, 0x48, 0x66, 0xee, 0x12, }, 7, 0, "", "",
+"62 93 fd 48 66 ee 12 \tvfpclasspd $0x12,%zmm30,%k5",},
+{{0x62, 0x93, 0x7d, 0x08, 0x67, 0xeb, 0x12, }, 7, 0, "", "",
+"62 93 7d 08 67 eb 12 \tvfpclassss $0x12,%xmm27,%k5",},
+{{0x62, 0x93, 0xfd, 0x08, 0x67, 0xee, 0x12, }, 7, 0, "", "",
+"62 93 fd 08 67 ee 12 \tvfpclasssd $0x12,%xmm30,%k5",},
+{{0x62, 0x91, 0x2d, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "",
+"62 91 2d 40 72 c1 12 \tvprord $0x12,%zmm25,%zmm26",},
+{{0x62, 0x91, 0xad, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "",
+"62 91 ad 40 72 c1 12 \tvprorq $0x12,%zmm25,%zmm26",},
+{{0x62, 0x91, 0x0d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 91 0d 40 72 cd 12 \tvprold $0x12,%zmm29,%zmm30",},
+{{0x62, 0x91, 0x8d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 91 8d 40 72 cd 12 \tvprolq $0x12,%zmm29,%zmm30",},
+{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "",
+"0f 72 e6 02          \tpsrad  $0x2,%mm6",},
+{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "",
+"c5 ed 72 e6 05       \tvpsrad $0x5,%ymm6,%ymm2",},
+{{0x62, 0x91, 0x4d, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "",
+"62 91 4d 40 72 e2 05 \tvpsrad $0x5,%zmm26,%zmm22",},
+{{0x62, 0x91, 0xcd, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "",
+"62 91 cd 40 72 e2 05 \tvpsraq $0x5,%zmm26,%zmm22",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x01, 0x95, 0x40, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 40 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x47, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 47 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x01, 0x95, 0xc7, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 c7 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30{%k7}{z}",},
+{{0x62, 0x01, 0x95, 0x10, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 10 58 f4    \tvaddpd {rn-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x50, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 50 58 f4    \tvaddpd {ru-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x30, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 30 58 f4    \tvaddpd {rd-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x70, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 70 58 f4    \tvaddpd {rz-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x40, 0x58, 0x31, }, 6, 0, "", "",
+"62 61 95 40 58 31    \tvaddpd (%rcx),%zmm29,%zmm30",},
+{{0x62, 0x21, 0x95, 0x40, 0x58, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "",
+"62 21 95 40 58 b4 f0 23 01 00 00 \tvaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x50, 0x58, 0x31, }, 6, 0, "", "",
+"62 61 95 50 58 31    \tvaddpd (%rcx){1to8},%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x40, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 61 95 40 58 72 7f \tvaddpd 0x1fc0(%rdx),%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x50, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 61 95 50 58 72 7f \tvaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30",},
+{{0x62, 0xf1, 0x0c, 0x50, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "",
+"62 f1 0c 50 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5",},
+{{0x62, 0xb1, 0x97, 0x07, 0xc2, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "",
+"62 b1 97 07 c2 ac f0 23 01 00 00 01 \tvcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}",},
+{{0x62, 0x91, 0x97, 0x17, 0xc2, 0xec, 0x02, }, 7, 0, "", "",
+"62 91 97 17 c2 ec 02 \tvcmplesd {sae},%xmm28,%xmm29,%k5{%k7}",},
+{{0x62, 0x23, 0x15, 0x07, 0x27, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "",
+"62 23 15 07 27 b4 f0 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}",},
 {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
 "f3 0f 1b 00          \tbndmk  (%rax),%bnd0",},
 {{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
@@ -325,19 +1257,19 @@
 {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",},
 {{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional",
-"f2 e8 00 00 00 00    \tbnd callq 3f6 <main+0x3f6>",},
+"f2 e8 00 00 00 00    \tbnd callq f22 <main+0xf22>",},
 {{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
 "67 f2 ff 10          \tbnd callq *(%eax)",},
 {{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
 "f2 c3                \tbnd retq ",},
 {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
-"f2 e9 00 00 00 00    \tbnd jmpq 402 <main+0x402>",},
+"f2 e9 00 00 00 00    \tbnd jmpq f2e <main+0xf2e>",},
 {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
-"f2 e9 00 00 00 00    \tbnd jmpq 408 <main+0x408>",},
+"f2 e9 00 00 00 00    \tbnd jmpq f34 <main+0xf34>",},
 {{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect",
 "67 f2 ff 21          \tbnd jmpq *(%ecx)",},
 {{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional",
-"f2 0f 85 00 00 00 00 \tbnd jne 413 <main+0x413>",},
+"f2 0f 85 00 00 00 00 \tbnd jne f3f <main+0xf3f>",},
 {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
 "0f 3a cc c1 00       \tsha1rnds4 $0x0,%xmm1,%xmm0",},
 {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index 41b1b1c62660..76e0ec379c8b 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -19,8 +19,882 @@ int main(void)
 	/* Following line is a marker for the awk script - do not change */
 	asm volatile("rdtsc"); /* Start here */
 
+	/* Test fix for vcvtph2ps in x86-opcode-map.txt */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+
 #ifdef __x86_64__
 
+	/* AVX-512: Instructions with the same op codes as Mask Instructions  */
+
+	asm volatile("cmovno %rax,%rbx");
+	asm volatile("cmovno 0x12345678(%rax),%rcx");
+	asm volatile("cmovno 0x12345678(%rax),%cx");
+
+	asm volatile("cmove  %rax,%rbx");
+	asm volatile("cmove 0x12345678(%rax),%rcx");
+	asm volatile("cmove 0x12345678(%rax),%cx");
+
+	asm volatile("seto    0x12345678(%rax)");
+	asm volatile("setno   0x12345678(%rax)");
+	asm volatile("setb    0x12345678(%rax)");
+	asm volatile("setc    0x12345678(%rax)");
+	asm volatile("setnae  0x12345678(%rax)");
+	asm volatile("setae   0x12345678(%rax)");
+	asm volatile("setnb   0x12345678(%rax)");
+	asm volatile("setnc   0x12345678(%rax)");
+	asm volatile("sets    0x12345678(%rax)");
+	asm volatile("setns   0x12345678(%rax)");
+
+	/* AVX-512: Mask Instructions */
+
+	asm volatile("kandw  %k7,%k6,%k5");
+	asm volatile("kandq  %k7,%k6,%k5");
+	asm volatile("kandb  %k7,%k6,%k5");
+	asm volatile("kandd  %k7,%k6,%k5");
+
+	asm volatile("kandnw  %k7,%k6,%k5");
+	asm volatile("kandnq  %k7,%k6,%k5");
+	asm volatile("kandnb  %k7,%k6,%k5");
+	asm volatile("kandnd  %k7,%k6,%k5");
+
+	asm volatile("knotw  %k7,%k6");
+	asm volatile("knotq  %k7,%k6");
+	asm volatile("knotb  %k7,%k6");
+	asm volatile("knotd  %k7,%k6");
+
+	asm volatile("korw  %k7,%k6,%k5");
+	asm volatile("korq  %k7,%k6,%k5");
+	asm volatile("korb  %k7,%k6,%k5");
+	asm volatile("kord  %k7,%k6,%k5");
+
+	asm volatile("kxnorw  %k7,%k6,%k5");
+	asm volatile("kxnorq  %k7,%k6,%k5");
+	asm volatile("kxnorb  %k7,%k6,%k5");
+	asm volatile("kxnord  %k7,%k6,%k5");
+
+	asm volatile("kxorw  %k7,%k6,%k5");
+	asm volatile("kxorq  %k7,%k6,%k5");
+	asm volatile("kxorb  %k7,%k6,%k5");
+	asm volatile("kxord  %k7,%k6,%k5");
+
+	asm volatile("kaddw  %k7,%k6,%k5");
+	asm volatile("kaddq  %k7,%k6,%k5");
+	asm volatile("kaddb  %k7,%k6,%k5");
+	asm volatile("kaddd  %k7,%k6,%k5");
+
+	asm volatile("kunpckbw %k7,%k6,%k5");
+	asm volatile("kunpckwd %k7,%k6,%k5");
+	asm volatile("kunpckdq %k7,%k6,%k5");
+
+	asm volatile("kmovw  %k6,%k5");
+	asm volatile("kmovw  (%rcx),%k5");
+	asm volatile("kmovw  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovw  %k5,(%rcx)");
+	asm volatile("kmovw  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovw  %eax,%k5");
+	asm volatile("kmovw  %ebp,%k5");
+	asm volatile("kmovw  %r13d,%k5");
+	asm volatile("kmovw  %k5,%eax");
+	asm volatile("kmovw  %k5,%ebp");
+	asm volatile("kmovw  %k5,%r13d");
+
+	asm volatile("kmovq  %k6,%k5");
+	asm volatile("kmovq  (%rcx),%k5");
+	asm volatile("kmovq  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovq  %k5,(%rcx)");
+	asm volatile("kmovq  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovq  %rax,%k5");
+	asm volatile("kmovq  %rbp,%k5");
+	asm volatile("kmovq  %r13,%k5");
+	asm volatile("kmovq  %k5,%rax");
+	asm volatile("kmovq  %k5,%rbp");
+	asm volatile("kmovq  %k5,%r13");
+
+	asm volatile("kmovb  %k6,%k5");
+	asm volatile("kmovb  (%rcx),%k5");
+	asm volatile("kmovb  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovb  %k5,(%rcx)");
+	asm volatile("kmovb  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovb  %eax,%k5");
+	asm volatile("kmovb  %ebp,%k5");
+	asm volatile("kmovb  %r13d,%k5");
+	asm volatile("kmovb  %k5,%eax");
+	asm volatile("kmovb  %k5,%ebp");
+	asm volatile("kmovb  %k5,%r13d");
+
+	asm volatile("kmovd  %k6,%k5");
+	asm volatile("kmovd  (%rcx),%k5");
+	asm volatile("kmovd  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovd  %k5,(%rcx)");
+	asm volatile("kmovd  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovd  %eax,%k5");
+	asm volatile("kmovd  %ebp,%k5");
+	asm volatile("kmovd  %r13d,%k5");
+	asm volatile("kmovd  %k5,%eax");
+	asm volatile("kmovd  %k5,%ebp");
+	asm volatile("kmovd %k5,%r13d");
+
+	asm volatile("kortestw %k6,%k5");
+	asm volatile("kortestq %k6,%k5");
+	asm volatile("kortestb %k6,%k5");
+	asm volatile("kortestd %k6,%k5");
+
+	asm volatile("ktestw %k6,%k5");
+	asm volatile("ktestq %k6,%k5");
+	asm volatile("ktestb %k6,%k5");
+	asm volatile("ktestd %k6,%k5");
+
+	asm volatile("kshiftrw $0x12,%k6,%k5");
+	asm volatile("kshiftrq $0x5b,%k6,%k5");
+	asm volatile("kshiftlw $0x12,%k6,%k5");
+	asm volatile("kshiftlq $0x5b,%k6,%k5");
+
+	/* AVX-512: Op code 0f 5b */
+	asm volatile("vcvtdq2ps %xmm5,%xmm6");
+	asm volatile("vcvtqq2ps %zmm29,%ymm6{%k7}");
+	asm volatile("vcvtps2dq %xmm5,%xmm6");
+	asm volatile("vcvttps2dq %xmm5,%xmm6");
+
+	/* AVX-512: Op code 0f 6f */
+
+	asm volatile("movq   %mm0,%mm4");
+	asm volatile("vmovdqa %ymm4,%ymm6");
+	asm volatile("vmovdqa32 %zmm25,%zmm26");
+	asm volatile("vmovdqa64 %zmm25,%zmm26");
+	asm volatile("vmovdqu %ymm4,%ymm6");
+	asm volatile("vmovdqu32 %zmm29,%zmm30");
+	asm volatile("vmovdqu64 %zmm25,%zmm26");
+	asm volatile("vmovdqu8 %zmm29,%zmm30");
+	asm volatile("vmovdqu16 %zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 78 */
+
+	asm volatile("vmread %rax,%rbx");
+	asm volatile("vcvttps2udq %zmm25,%zmm26");
+	asm volatile("vcvttpd2udq %zmm29,%ymm6{%k7}");
+	asm volatile("vcvttsd2usi %xmm6,%rax");
+	asm volatile("vcvttss2usi %xmm6,%rax");
+	asm volatile("vcvttps2uqq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvttpd2uqq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 79 */
+
+	asm volatile("vmwrite %rax,%rbx");
+	asm volatile("vcvtps2udq %zmm25,%zmm26");
+	asm volatile("vcvtpd2udq %zmm29,%ymm6{%k7}");
+	asm volatile("vcvtsd2usi %xmm6,%rax");
+	asm volatile("vcvtss2usi %xmm6,%rax");
+	asm volatile("vcvtps2uqq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtpd2uqq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7a */
+
+	asm volatile("vcvtudq2pd %ymm5,%zmm29{%k7}");
+	asm volatile("vcvtuqq2pd %zmm25,%zmm26");
+	asm volatile("vcvtudq2ps %zmm29,%zmm30");
+	asm volatile("vcvtuqq2ps %zmm25,%ymm26{%k7}");
+	asm volatile("vcvttps2qq %ymm25,%zmm26{%k7}");
+	asm volatile("vcvttpd2qq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7b */
+
+	asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6");
+	asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6");
+	asm volatile("vcvtps2qq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtpd2qq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7f */
+
+	asm volatile("movq.s  %mm0,%mm4");
+	asm volatile("vmovdqa %ymm8,%ymm6");
+	asm volatile("vmovdqa32.s %zmm25,%zmm26");
+	asm volatile("vmovdqa64.s %zmm25,%zmm26");
+	asm volatile("vmovdqu %ymm8,%ymm6");
+	asm volatile("vmovdqu32.s %zmm25,%zmm26");
+	asm volatile("vmovdqu64.s %zmm25,%zmm26");
+	asm volatile("vmovdqu8.s %zmm30,(%rcx)");
+	asm volatile("vmovdqu16.s %zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f db */
+
+	asm volatile("pand  %mm1,%mm2");
+	asm volatile("pand  %xmm1,%xmm2");
+	asm volatile("vpand  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpandq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f df */
+
+	asm volatile("pandn  %mm1,%mm2");
+	asm volatile("pandn  %xmm1,%xmm2");
+	asm volatile("vpandn %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandnd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpandnq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f e6 */
+
+	asm volatile("vcvttpd2dq %xmm1,%xmm2");
+	asm volatile("vcvtdq2pd %xmm5,%xmm6");
+	asm volatile("vcvtdq2pd %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtqq2pd %zmm25,%zmm26");
+	asm volatile("vcvtpd2dq %xmm1,%xmm2");
+
+	/* AVX-512: Op code 0f eb */
+
+	asm volatile("por   %mm4,%mm6");
+	asm volatile("vpor   %ymm4,%ymm6,%ymm2");
+	asm volatile("vpord  %zmm24,%zmm25,%zmm26");
+	asm volatile("vporq  %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f ef */
+
+	asm volatile("pxor   %mm4,%mm6");
+	asm volatile("vpxor  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpxord %zmm24,%zmm25,%zmm26");
+	asm volatile("vpxorq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 10 */
+
+	asm volatile("pblendvb %xmm1,%xmm0");
+	asm volatile("vpsrlvw %zmm27,%zmm28,%zmm29");
+	asm volatile("vpmovuswb %zmm28,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 11 */
+
+	asm volatile("vpmovusdb %zmm28,%xmm6{%k7}");
+	asm volatile("vpsravw %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 12 */
+
+	asm volatile("vpmovusqb %zmm27,%xmm6{%k7}");
+	asm volatile("vpsllvw %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 13 */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+	asm volatile("vcvtph2ps %ymm5,%zmm27{%k7}");
+	asm volatile("vpmovusdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 14 */
+
+	asm volatile("blendvps %xmm1,%xmm0");
+	asm volatile("vpmovusqw %zmm27,%xmm6{%k7}");
+	asm volatile("vprorvd %zmm27,%zmm28,%zmm29");
+	asm volatile("vprorvq %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 15 */
+
+	asm volatile("blendvpd %xmm1,%xmm0");
+	asm volatile("vpmovusqd %zmm27,%ymm6{%k7}");
+	asm volatile("vprolvd %zmm27,%zmm28,%zmm29");
+	asm volatile("vprolvq %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 16 */
+
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermps %ymm24,%ymm26,%ymm22{%k7}");
+	asm volatile("vpermpd %ymm24,%ymm26,%ymm22{%k7}");
+
+	/* AVX-512: Op code 0f 38 19 */
+
+	asm volatile("vbroadcastsd %xmm4,%ymm6");
+	asm volatile("vbroadcastf32x2 %xmm27,%zmm26");
+
+	/* AVX-512: Op code 0f 38 1a */
+
+	asm volatile("vbroadcastf128 (%rcx),%ymm4");
+	asm volatile("vbroadcastf32x4 (%rcx),%zmm26");
+	asm volatile("vbroadcastf64x2 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 1b */
+
+	asm volatile("vbroadcastf32x8 (%rcx),%zmm27");
+	asm volatile("vbroadcastf64x4 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 1f */
+
+	asm volatile("vpabsq %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 20 */
+
+	asm volatile("vpmovsxbw %xmm4,%xmm5");
+	asm volatile("vpmovswb %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 21 */
+
+	asm volatile("vpmovsxbd %xmm4,%ymm6");
+	asm volatile("vpmovsdb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 22 */
+
+	asm volatile("vpmovsxbq %xmm4,%ymm4");
+	asm volatile("vpmovsqb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 23 */
+
+	asm volatile("vpmovsxwd %xmm4,%ymm4");
+	asm volatile("vpmovsdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 24 */
+
+	asm volatile("vpmovsxwq %xmm4,%ymm6");
+	asm volatile("vpmovsqw %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 25 */
+
+	asm volatile("vpmovsxdq %xmm4,%ymm4");
+	asm volatile("vpmovsqd %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 26 */
+
+	asm volatile("vptestmb %zmm27,%zmm28,%k5");
+	asm volatile("vptestmw %zmm27,%zmm28,%k5");
+	asm volatile("vptestnmb %zmm26,%zmm27,%k5");
+	asm volatile("vptestnmw %zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 38 27 */
+
+	asm volatile("vptestmd %zmm27,%zmm28,%k5");
+	asm volatile("vptestmq %zmm27,%zmm28,%k5");
+	asm volatile("vptestnmd %zmm26,%zmm27,%k5");
+	asm volatile("vptestnmq %zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 38 28 */
+
+	asm volatile("vpmuldq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2b %k5,%zmm28");
+	asm volatile("vpmovm2w %k5,%zmm28");
+
+	/* AVX-512: Op code 0f 38 29 */
+
+	asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovb2m %zmm28,%k5");
+	asm volatile("vpmovw2m %zmm28,%k5");
+
+	/* AVX-512: Op code 0f 38 2a */
+
+	asm volatile("vmovntdqa (%rcx),%ymm4");
+	asm volatile("vpbroadcastmb2q %k6,%zmm30");
+
+	/* AVX-512: Op code 0f 38 2c */
+
+	asm volatile("vmaskmovps (%rcx),%ymm4,%ymm6");
+	asm volatile("vscalefps %zmm24,%zmm25,%zmm26");
+	asm volatile("vscalefpd %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 2d */
+
+	asm volatile("vmaskmovpd (%rcx),%ymm4,%ymm6");
+	asm volatile("vscalefss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vscalefsd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 30 */
+
+	asm volatile("vpmovzxbw %xmm4,%ymm4");
+	asm volatile("vpmovwb %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 31 */
+
+	asm volatile("vpmovzxbd %xmm4,%ymm6");
+	asm volatile("vpmovdb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 32 */
+
+	asm volatile("vpmovzxbq %xmm4,%ymm4");
+	asm volatile("vpmovqb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 33 */
+
+	asm volatile("vpmovzxwd %xmm4,%ymm4");
+	asm volatile("vpmovdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 34 */
+
+	asm volatile("vpmovzxwq %xmm4,%ymm6");
+	asm volatile("vpmovqw %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 35 */
+
+	asm volatile("vpmovzxdq %xmm4,%ymm4");
+	asm volatile("vpmovqd %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermd %ymm24,%ymm26,%ymm22{%k7}");
+	asm volatile("vpermq %ymm24,%ymm26,%ymm22{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpminsb %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2d %k5,%zmm28");
+	asm volatile("vpmovm2q %k5,%zmm28");
+
+	/* AVX-512: Op code 0f 38 39 */
+
+	asm volatile("vpminsd %xmm1,%xmm2,%xmm3");
+	asm volatile("vpminsd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpminsq %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmovd2m %zmm28,%k5");
+	asm volatile("vpmovq2m %zmm28,%k5");
+
+	/* AVX-512: Op code 0f 38 3a */
+
+	asm volatile("vpminuw %ymm4,%ymm6,%ymm2");
+	asm volatile("vpbroadcastmw2d %k6,%zmm28");
+
+	/* AVX-512: Op code 0f 38 3b */
+
+	asm volatile("vpminud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpminud %zmm24,%zmm25,%zmm26");
+	asm volatile("vpminuq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 3d */
+
+	asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxsd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmaxsq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 3f */
+
+	asm volatile("vpmaxud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxud %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmaxuq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vpmulld %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmulld %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmullq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vgetexpps %zmm25,%zmm26");
+	asm volatile("vgetexppd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 43 */
+
+	asm volatile("vgetexpss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vgetexpsd %xmm28,%xmm29,%xmm30{%k7}");
+
+	/* AVX-512: Op code 0f 38 44 */
+
+	asm volatile("vplzcntd %zmm27,%zmm28");
+	asm volatile("vplzcntq %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 46 */
+
+	asm volatile("vpsravd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpsravd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpsravq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 4c */
+
+	asm volatile("vrcp14ps %zmm25,%zmm26");
+	asm volatile("vrcp14pd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 4d */
+
+	asm volatile("vrcp14ss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vrcp14sd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 4e */
+
+	asm volatile("vrsqrt14ps %zmm25,%zmm26");
+	asm volatile("vrsqrt14pd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 4f */
+
+	asm volatile("vrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 59 */
+
+	asm volatile("vpbroadcastq %xmm4,%xmm6");
+	asm volatile("vbroadcasti32x2 %xmm27,%zmm26");
+
+	/* AVX-512: Op code 0f 38 5a */
+
+	asm volatile("vbroadcasti128 (%rcx),%ymm4");
+	asm volatile("vbroadcasti32x4 (%rcx),%zmm26");
+	asm volatile("vbroadcasti64x2 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 5b */
+
+	asm volatile("vbroadcasti32x8 (%rcx),%zmm28");
+	asm volatile("vbroadcasti64x4 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 64 */
+
+	asm volatile("vpblendmd %zmm26,%zmm27,%zmm28");
+	asm volatile("vpblendmq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 65 */
+
+	asm volatile("vblendmps %zmm24,%zmm25,%zmm26");
+	asm volatile("vblendmpd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 66 */
+
+	asm volatile("vpblendmb %zmm26,%zmm27,%zmm28");
+	asm volatile("vpblendmw %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 75 */
+
+	asm volatile("vpermi2b %zmm24,%zmm25,%zmm26");
+	asm volatile("vpermi2w %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 76 */
+
+	asm volatile("vpermi2d %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermi2q %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 77 */
+
+	asm volatile("vpermi2ps %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermi2pd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7a */
+
+	asm volatile("vpbroadcastb %eax,%xmm30");
+
+	/* AVX-512: Op code 0f 38 7b */
+
+	asm volatile("vpbroadcastw %eax,%xmm30");
+
+	/* AVX-512: Op code 0f 38 7c */
+
+	asm volatile("vpbroadcastd %eax,%xmm30");
+	asm volatile("vpbroadcastq %rax,%zmm30");
+
+	/* AVX-512: Op code 0f 38 7d */
+
+	asm volatile("vpermt2b %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2w %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7e */
+
+	asm volatile("vpermt2d %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2q %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7f */
+
+	asm volatile("vpermt2ps %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2pd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 83 */
+
+	asm volatile("vpmultishiftqb %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 88 */
+
+	asm volatile("vexpandps (%rcx),%zmm26");
+	asm volatile("vexpandpd (%rcx),%zmm28");
+
+	/* AVX-512: Op code 0f 38 89 */
+
+	asm volatile("vpexpandd (%rcx),%zmm28");
+	asm volatile("vpexpandq (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 8a */
+
+	asm volatile("vcompressps %zmm28,(%rcx)");
+	asm volatile("vcompresspd %zmm28,(%rcx)");
+
+	/* AVX-512: Op code 0f 38 8b */
+
+	asm volatile("vpcompressd %zmm28,(%rcx)");
+	asm volatile("vpcompressq %zmm26,(%rcx)");
+
+	/* AVX-512: Op code 0f 38 8d */
+
+	asm volatile("vpermb %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermw %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 90 */
+
+	asm volatile("vpgatherdd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdq %xmm2,0x04(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
+	asm volatile("vpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}");
+
+	/* AVX-512: Op code 0f 38 91 */
+
+	asm volatile("vpgatherqd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqq %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}");
+	asm volatile("vpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
+
+	/* AVX-512: Op code 0f 38 a0 */
+
+	asm volatile("vpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a1 */
+
+	asm volatile("vpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a2 */
+
+	asm volatile("vscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a3 */
+
+	asm volatile("vscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 b4 */
+
+	asm volatile("vpmadd52luq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 b5 */
+
+	asm volatile("vpmadd52huq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 c4 */
+
+	asm volatile("vpconflictd %zmm26,%zmm27");
+	asm volatile("vpconflictq %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 c8 */
+
+	asm volatile("vexp2ps %zmm29,%zmm30");
+	asm volatile("vexp2pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 ca */
+
+	asm volatile("vrcp28ps %zmm29,%zmm30");
+	asm volatile("vrcp28pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 cb */
+
+	asm volatile("vrcp28ss %xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vrcp28sd %xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 38 cc */
+
+	asm volatile("vrsqrt28ps %zmm29,%zmm30");
+	asm volatile("vrsqrt28pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 cd */
+
+	asm volatile("vrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 03 */
+
+	asm volatile("valignd $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("valignq $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 08 */
+
+	asm volatile("vroundps $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscaleps $0x12,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 3a 09 */
+
+	asm volatile("vroundpd $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscalepd $0x12,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 0b */
+
+	asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 18 */
+
+	asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+	asm volatile("vinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 19 */
+
+	asm volatile("vextractf128 $0x5,%ymm4,%xmm4");
+	asm volatile("vextractf32x4 $0x12,%zmm25,%xmm26{%k7}");
+	asm volatile("vextractf64x2 $0x12,%zmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}");
+	asm volatile("vinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1b */
+
+	asm volatile("vextractf32x8 $0x12,%zmm29,%ymm30{%k7}");
+	asm volatile("vextractf64x4 $0x12,%zmm26,%ymm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1e */
+
+	asm volatile("vpcmpud $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpuq $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 1f */
+
+	asm volatile("vpcmpd $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpq $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 23 */
+
+	asm volatile("vshuff32x4 $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vshuff64x2 $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 25 */
+
+	asm volatile("vpternlogd $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vpternlogq $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 26 */
+
+	asm volatile("vgetmantps $0x12,%zmm26,%zmm27");
+	asm volatile("vgetmantpd $0x12,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 27 */
+
+	asm volatile("vgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}");
+	asm volatile("vgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}");
+
+	/* AVX-512: Op code 0f 3a 38 */
+
+	asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+	asm volatile("vinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 39 */
+
+	asm volatile("vextracti128 $0x5,%ymm4,%xmm6");
+	asm volatile("vextracti32x4 $0x12,%zmm25,%xmm26{%k7}");
+	asm volatile("vextracti64x2 $0x12,%zmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3a */
+
+	asm volatile("vinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}");
+	asm volatile("vinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3b */
+
+	asm volatile("vextracti32x8 $0x12,%zmm29,%ymm30{%k7}");
+	asm volatile("vextracti64x4 $0x12,%zmm26,%ymm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3e */
+
+	asm volatile("vpcmpub $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpuw $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 3f */
+
+	asm volatile("vpcmpb $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpw $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2");
+	asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vshufi32x4 $0x12,%zmm25,%zmm26,%zmm27");
+	asm volatile("vshufi64x2 $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 50 */
+
+	asm volatile("vrangeps $0x12,%zmm25,%zmm26,%zmm27");
+	asm volatile("vrangepd $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 51 */
+
+	asm volatile("vrangess $0x12,%xmm25,%xmm26,%xmm27");
+	asm volatile("vrangesd $0x12,%xmm28,%xmm29,%xmm30");
+
+	/* AVX-512: Op code 0f 3a 54 */
+
+	asm volatile("vfixupimmps $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vfixupimmpd $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 55 */
+
+	asm volatile("vfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 56 */
+
+	asm volatile("vreduceps $0x12,%zmm26,%zmm27");
+	asm volatile("vreducepd $0x12,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 57 */
+
+	asm volatile("vreducess $0x12,%xmm25,%xmm26,%xmm27");
+	asm volatile("vreducesd $0x12,%xmm28,%xmm29,%xmm30");
+
+	/* AVX-512: Op code 0f 3a 66 */
+
+	asm volatile("vfpclassps $0x12,%zmm27,%k5");
+	asm volatile("vfpclasspd $0x12,%zmm30,%k5");
+
+	/* AVX-512: Op code 0f 3a 67 */
+
+	asm volatile("vfpclassss $0x12,%xmm27,%k5");
+	asm volatile("vfpclasssd $0x12,%xmm30,%k5");
+
+	/* AVX-512: Op code 0f 72 (Grp13) */
+
+	asm volatile("vprord $0x12,%zmm25,%zmm26");
+	asm volatile("vprorq $0x12,%zmm25,%zmm26");
+	asm volatile("vprold $0x12,%zmm29,%zmm30");
+	asm volatile("vprolq $0x12,%zmm29,%zmm30");
+	asm volatile("psrad  $0x2,%mm6");
+	asm volatile("vpsrad $0x5,%ymm6,%ymm2");
+	asm volatile("vpsrad $0x5,%zmm26,%zmm22");
+	asm volatile("vpsraq $0x5,%zmm26,%zmm22");
+
+	/* AVX-512: Op code 0f 38 c6 (Grp18) */
+
+	asm volatile("vgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 c7 (Grp19) */
+
+	asm volatile("vgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}");
+
+	/* AVX-512: Examples */
+
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}");
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}{z}");
+	asm volatile("vaddpd {rn-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {ru-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {rd-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {rz-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd (%rcx),%zmm29,%zmm30");
+	asm volatile("vaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30");
+	asm volatile("vaddpd (%rcx){1to8},%zmm29,%zmm30");
+	asm volatile("vaddpd 0x1fc0(%rdx),%zmm29,%zmm30");
+	asm volatile("vaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30");
+	asm volatile("vcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5");
+	asm volatile("vcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}");
+	asm volatile("vcmplesd {sae},%xmm28,%xmm29,%k5{%k7}");
+	asm volatile("vgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}");
+
 	/* bndmk m64, bnd */
 
 	asm volatile("bndmk (%rax), %bnd0");
@@ -471,6 +1345,921 @@ int main(void)
 
 #else  /* #ifdef __x86_64__ */
 
+	/* bound r32, mem (same op code as EVEX prefix) */
+
+	asm volatile("bound %eax, 0x12345678(%ecx)");
+	asm volatile("bound %ecx, 0x12345678(%eax)");
+	asm volatile("bound %edx, 0x12345678(%eax)");
+	asm volatile("bound %ebx, 0x12345678(%eax)");
+	asm volatile("bound %esp, 0x12345678(%eax)");
+	asm volatile("bound %ebp, 0x12345678(%eax)");
+	asm volatile("bound %esi, 0x12345678(%eax)");
+	asm volatile("bound %edi, 0x12345678(%eax)");
+	asm volatile("bound %ecx, (%eax)");
+	asm volatile("bound %eax, (0x12345678)");
+	asm volatile("bound %edx, (%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(,%eax,1)");
+	asm volatile("bound %edx, (%eax,%ecx,1)");
+	asm volatile("bound %edx, (%eax,%ecx,8)");
+	asm volatile("bound %edx, 0x12(%eax)");
+	asm volatile("bound %edx, 0x12(%ebp)");
+	asm volatile("bound %edx, 0x12(%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12(%ebp,%eax,1)");
+	asm volatile("bound %edx, 0x12(%eax,%ecx,1)");
+	asm volatile("bound %edx, 0x12(%eax,%ecx,8)");
+	asm volatile("bound %edx, 0x12345678(%eax)");
+	asm volatile("bound %edx, 0x12345678(%ebp)");
+	asm volatile("bound %edx, 0x12345678(%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(%ebp,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(%eax,%ecx,1)");
+	asm volatile("bound %edx, 0x12345678(%eax,%ecx,8)");
+
+	/* bound r16, mem (same op code as EVEX prefix) */
+
+	asm volatile("bound %ax, 0x12345678(%ecx)");
+	asm volatile("bound %cx, 0x12345678(%eax)");
+	asm volatile("bound %dx, 0x12345678(%eax)");
+	asm volatile("bound %bx, 0x12345678(%eax)");
+	asm volatile("bound %sp, 0x12345678(%eax)");
+	asm volatile("bound %bp, 0x12345678(%eax)");
+	asm volatile("bound %si, 0x12345678(%eax)");
+	asm volatile("bound %di, 0x12345678(%eax)");
+	asm volatile("bound %cx, (%eax)");
+	asm volatile("bound %ax, (0x12345678)");
+	asm volatile("bound %dx, (%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(,%eax,1)");
+	asm volatile("bound %dx, (%eax,%ecx,1)");
+	asm volatile("bound %dx, (%eax,%ecx,8)");
+	asm volatile("bound %dx, 0x12(%eax)");
+	asm volatile("bound %dx, 0x12(%ebp)");
+	asm volatile("bound %dx, 0x12(%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12(%ebp,%eax,1)");
+	asm volatile("bound %dx, 0x12(%eax,%ecx,1)");
+	asm volatile("bound %dx, 0x12(%eax,%ecx,8)");
+	asm volatile("bound %dx, 0x12345678(%eax)");
+	asm volatile("bound %dx, 0x12345678(%ebp)");
+	asm volatile("bound %dx, 0x12345678(%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(%ebp,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(%eax,%ecx,1)");
+	asm volatile("bound %dx, 0x12345678(%eax,%ecx,8)");
+
+	/* AVX-512: Instructions with the same op codes as Mask Instructions  */
+
+	asm volatile("cmovno %eax,%ebx");
+	asm volatile("cmovno 0x12345678(%eax),%ecx");
+	asm volatile("cmovno 0x12345678(%eax),%cx");
+
+	asm volatile("cmove  %eax,%ebx");
+	asm volatile("cmove 0x12345678(%eax),%ecx");
+	asm volatile("cmove 0x12345678(%eax),%cx");
+
+	asm volatile("seto    0x12345678(%eax)");
+	asm volatile("setno   0x12345678(%eax)");
+	asm volatile("setb    0x12345678(%eax)");
+	asm volatile("setc    0x12345678(%eax)");
+	asm volatile("setnae  0x12345678(%eax)");
+	asm volatile("setae   0x12345678(%eax)");
+	asm volatile("setnb   0x12345678(%eax)");
+	asm volatile("setnc   0x12345678(%eax)");
+	asm volatile("sets    0x12345678(%eax)");
+	asm volatile("setns   0x12345678(%eax)");
+
+	/* AVX-512: Mask Instructions */
+
+	asm volatile("kandw  %k7,%k6,%k5");
+	asm volatile("kandq  %k7,%k6,%k5");
+	asm volatile("kandb  %k7,%k6,%k5");
+	asm volatile("kandd  %k7,%k6,%k5");
+
+	asm volatile("kandnw  %k7,%k6,%k5");
+	asm volatile("kandnq  %k7,%k6,%k5");
+	asm volatile("kandnb  %k7,%k6,%k5");
+	asm volatile("kandnd  %k7,%k6,%k5");
+
+	asm volatile("knotw  %k7,%k6");
+	asm volatile("knotq  %k7,%k6");
+	asm volatile("knotb  %k7,%k6");
+	asm volatile("knotd  %k7,%k6");
+
+	asm volatile("korw  %k7,%k6,%k5");
+	asm volatile("korq  %k7,%k6,%k5");
+	asm volatile("korb  %k7,%k6,%k5");
+	asm volatile("kord  %k7,%k6,%k5");
+
+	asm volatile("kxnorw  %k7,%k6,%k5");
+	asm volatile("kxnorq  %k7,%k6,%k5");
+	asm volatile("kxnorb  %k7,%k6,%k5");
+	asm volatile("kxnord  %k7,%k6,%k5");
+
+	asm volatile("kxorw  %k7,%k6,%k5");
+	asm volatile("kxorq  %k7,%k6,%k5");
+	asm volatile("kxorb  %k7,%k6,%k5");
+	asm volatile("kxord  %k7,%k6,%k5");
+
+	asm volatile("kaddw  %k7,%k6,%k5");
+	asm volatile("kaddq  %k7,%k6,%k5");
+	asm volatile("kaddb  %k7,%k6,%k5");
+	asm volatile("kaddd  %k7,%k6,%k5");
+
+	asm volatile("kunpckbw %k7,%k6,%k5");
+	asm volatile("kunpckwd %k7,%k6,%k5");
+	asm volatile("kunpckdq %k7,%k6,%k5");
+
+	asm volatile("kmovw  %k6,%k5");
+	asm volatile("kmovw  (%ecx),%k5");
+	asm volatile("kmovw  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovw  %k5,(%ecx)");
+	asm volatile("kmovw  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovw  %eax,%k5");
+	asm volatile("kmovw  %ebp,%k5");
+	asm volatile("kmovw  %k5,%eax");
+	asm volatile("kmovw  %k5,%ebp");
+
+	asm volatile("kmovq  %k6,%k5");
+	asm volatile("kmovq  (%ecx),%k5");
+	asm volatile("kmovq  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovq  %k5,(%ecx)");
+	asm volatile("kmovq  %k5,0x123(%eax,%ecx,8)");
+
+	asm volatile("kmovb  %k6,%k5");
+	asm volatile("kmovb  (%ecx),%k5");
+	asm volatile("kmovb  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovb  %k5,(%ecx)");
+	asm volatile("kmovb  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovb  %eax,%k5");
+	asm volatile("kmovb  %ebp,%k5");
+	asm volatile("kmovb  %k5,%eax");
+	asm volatile("kmovb  %k5,%ebp");
+
+	asm volatile("kmovd  %k6,%k5");
+	asm volatile("kmovd  (%ecx),%k5");
+	asm volatile("kmovd  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovd  %k5,(%ecx)");
+	asm volatile("kmovd  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovd  %eax,%k5");
+	asm volatile("kmovd  %ebp,%k5");
+	asm volatile("kmovd  %k5,%eax");
+	asm volatile("kmovd  %k5,%ebp");
+
+	asm volatile("kortestw %k6,%k5");
+	asm volatile("kortestq %k6,%k5");
+	asm volatile("kortestb %k6,%k5");
+	asm volatile("kortestd %k6,%k5");
+
+	asm volatile("ktestw %k6,%k5");
+	asm volatile("ktestq %k6,%k5");
+	asm volatile("ktestb %k6,%k5");
+	asm volatile("ktestd %k6,%k5");
+
+	asm volatile("kshiftrw $0x12,%k6,%k5");
+	asm volatile("kshiftrq $0x5b,%k6,%k5");
+	asm volatile("kshiftlw $0x12,%k6,%k5");
+	asm volatile("kshiftlq $0x5b,%k6,%k5");
+
+	/* AVX-512: Op code 0f 5b */
+	asm volatile("vcvtdq2ps %xmm5,%xmm6");
+	asm volatile("vcvtqq2ps %zmm5,%ymm6{%k7}");
+	asm volatile("vcvtps2dq %xmm5,%xmm6");
+	asm volatile("vcvttps2dq %xmm5,%xmm6");
+
+	/* AVX-512: Op code 0f 6f */
+
+	asm volatile("movq   %mm0,%mm4");
+	asm volatile("vmovdqa %ymm4,%ymm6");
+	asm volatile("vmovdqa32 %zmm5,%zmm6");
+	asm volatile("vmovdqa64 %zmm5,%zmm6");
+	asm volatile("vmovdqu %ymm4,%ymm6");
+	asm volatile("vmovdqu32 %zmm5,%zmm6");
+	asm volatile("vmovdqu64 %zmm5,%zmm6");
+	asm volatile("vmovdqu8 %zmm5,%zmm6");
+	asm volatile("vmovdqu16 %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 78 */
+
+	asm volatile("vmread %eax,%ebx");
+	asm volatile("vcvttps2udq %zmm5,%zmm6");
+	asm volatile("vcvttpd2udq %zmm5,%ymm6{%k7}");
+	asm volatile("vcvttsd2usi %xmm6,%eax");
+	asm volatile("vcvttss2usi %xmm6,%eax");
+	asm volatile("vcvttps2uqq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvttpd2uqq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 79 */
+
+	asm volatile("vmwrite %eax,%ebx");
+	asm volatile("vcvtps2udq %zmm5,%zmm6");
+	asm volatile("vcvtpd2udq %zmm5,%ymm6{%k7}");
+	asm volatile("vcvtsd2usi %xmm6,%eax");
+	asm volatile("vcvtss2usi %xmm6,%eax");
+	asm volatile("vcvtps2uqq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtpd2uqq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7a */
+
+	asm volatile("vcvtudq2pd %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtuqq2pd %zmm5,%zmm6");
+	asm volatile("vcvtudq2ps %zmm5,%zmm6");
+	asm volatile("vcvtuqq2ps %zmm5,%ymm6{%k7}");
+	asm volatile("vcvttps2qq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvttpd2qq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7b */
+
+	asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6");
+	asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6");
+	asm volatile("vcvtps2qq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtpd2qq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7f */
+
+	asm volatile("movq.s  %mm0,%mm4");
+	asm volatile("vmovdqa.s %ymm5,%ymm6");
+	asm volatile("vmovdqa32.s %zmm5,%zmm6");
+	asm volatile("vmovdqa64.s %zmm5,%zmm6");
+	asm volatile("vmovdqu.s %ymm5,%ymm6");
+	asm volatile("vmovdqu32.s %zmm5,%zmm6");
+	asm volatile("vmovdqu64.s %zmm5,%zmm6");
+	asm volatile("vmovdqu8.s %zmm5,%zmm6");
+	asm volatile("vmovdqu16.s %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f db */
+
+	asm volatile("pand  %mm1,%mm2");
+	asm volatile("pand  %xmm1,%xmm2");
+	asm volatile("vpand  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpandq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f df */
+
+	asm volatile("pandn  %mm1,%mm2");
+	asm volatile("pandn  %xmm1,%xmm2");
+	asm volatile("vpandn %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandnd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpandnq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f e6 */
+
+	asm volatile("vcvttpd2dq %xmm1,%xmm2");
+	asm volatile("vcvtdq2pd %xmm5,%xmm6");
+	asm volatile("vcvtdq2pd %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtqq2pd %zmm5,%zmm6");
+	asm volatile("vcvtpd2dq %xmm1,%xmm2");
+
+	/* AVX-512: Op code 0f eb */
+
+	asm volatile("por   %mm4,%mm6");
+	asm volatile("vpor   %ymm4,%ymm6,%ymm2");
+	asm volatile("vpord  %zmm4,%zmm5,%zmm6");
+	asm volatile("vporq  %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f ef */
+
+	asm volatile("pxor   %mm4,%mm6");
+	asm volatile("vpxor  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpxord %zmm4,%zmm5,%zmm6");
+	asm volatile("vpxorq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 10 */
+
+	asm volatile("pblendvb %xmm1,%xmm0");
+	asm volatile("vpsrlvw %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmovuswb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 11 */
+
+	asm volatile("vpmovusdb %zmm5,%xmm6{%k7}");
+	asm volatile("vpsravw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 12 */
+
+	asm volatile("vpmovusqb %zmm5,%xmm6{%k7}");
+	asm volatile("vpsllvw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 13 */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+	asm volatile("vcvtph2ps %ymm5,%zmm6{%k7}");
+	asm volatile("vpmovusdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 14 */
+
+	asm volatile("blendvps %xmm1,%xmm0");
+	asm volatile("vpmovusqw %zmm5,%xmm6{%k7}");
+	asm volatile("vprorvd %zmm4,%zmm5,%zmm6");
+	asm volatile("vprorvq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 15 */
+
+	asm volatile("blendvpd %xmm1,%xmm0");
+	asm volatile("vpmovusqd %zmm5,%ymm6{%k7}");
+	asm volatile("vprolvd %zmm4,%zmm5,%zmm6");
+	asm volatile("vprolvq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 16 */
+
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2{%k7}");
+	asm volatile("vpermpd %ymm4,%ymm6,%ymm2{%k7}");
+
+	/* AVX-512: Op code 0f 38 19 */
+
+	asm volatile("vbroadcastsd %xmm4,%ymm6");
+	asm volatile("vbroadcastf32x2 %xmm7,%zmm6");
+
+	/* AVX-512: Op code 0f 38 1a */
+
+	asm volatile("vbroadcastf128 (%ecx),%ymm4");
+	asm volatile("vbroadcastf32x4 (%ecx),%zmm6");
+	asm volatile("vbroadcastf64x2 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 1b */
+
+	asm volatile("vbroadcastf32x8 (%ecx),%zmm6");
+	asm volatile("vbroadcastf64x4 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 1f */
+
+	asm volatile("vpabsq %zmm4,%zmm6");
+
+	/* AVX-512: Op code 0f 38 20 */
+
+	asm volatile("vpmovsxbw %xmm4,%xmm5");
+	asm volatile("vpmovswb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 21 */
+
+	asm volatile("vpmovsxbd %xmm4,%ymm6");
+	asm volatile("vpmovsdb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 22 */
+
+	asm volatile("vpmovsxbq %xmm4,%ymm4");
+	asm volatile("vpmovsqb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 23 */
+
+	asm volatile("vpmovsxwd %xmm4,%ymm4");
+	asm volatile("vpmovsdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 24 */
+
+	asm volatile("vpmovsxwq %xmm4,%ymm6");
+	asm volatile("vpmovsqw %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 25 */
+
+	asm volatile("vpmovsxdq %xmm4,%ymm4");
+	asm volatile("vpmovsqd %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 26 */
+
+	asm volatile("vptestmb %zmm5,%zmm6,%k5");
+	asm volatile("vptestmw %zmm5,%zmm6,%k5");
+	asm volatile("vptestnmb %zmm4,%zmm5,%k5");
+	asm volatile("vptestnmw %zmm4,%zmm5,%k5");
+
+	/* AVX-512: Op code 0f 38 27 */
+
+	asm volatile("vptestmd %zmm5,%zmm6,%k5");
+	asm volatile("vptestmq %zmm5,%zmm6,%k5");
+	asm volatile("vptestnmd %zmm4,%zmm5,%k5");
+	asm volatile("vptestnmq %zmm4,%zmm5,%k5");
+
+	/* AVX-512: Op code 0f 38 28 */
+
+	asm volatile("vpmuldq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2b %k5,%zmm6");
+	asm volatile("vpmovm2w %k5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 29 */
+
+	asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovb2m %zmm6,%k5");
+	asm volatile("vpmovw2m %zmm6,%k5");
+
+	/* AVX-512: Op code 0f 38 2a */
+
+	asm volatile("vmovntdqa (%ecx),%ymm4");
+	asm volatile("vpbroadcastmb2q %k6,%zmm1");
+
+	/* AVX-512: Op code 0f 38 2c */
+
+	asm volatile("vmaskmovps (%ecx),%ymm4,%ymm6");
+	asm volatile("vscalefps %zmm4,%zmm5,%zmm6");
+	asm volatile("vscalefpd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 2d */
+
+	asm volatile("vmaskmovpd (%ecx),%ymm4,%ymm6");
+	asm volatile("vscalefss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vscalefsd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 30 */
+
+	asm volatile("vpmovzxbw %xmm4,%ymm4");
+	asm volatile("vpmovwb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 31 */
+
+	asm volatile("vpmovzxbd %xmm4,%ymm6");
+	asm volatile("vpmovdb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 32 */
+
+	asm volatile("vpmovzxbq %xmm4,%ymm4");
+	asm volatile("vpmovqb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 33 */
+
+	asm volatile("vpmovzxwd %xmm4,%ymm4");
+	asm volatile("vpmovdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 34 */
+
+	asm volatile("vpmovzxwq %xmm4,%ymm6");
+	asm volatile("vpmovqw %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 35 */
+
+	asm volatile("vpmovzxdq %xmm4,%ymm4");
+	asm volatile("vpmovqd %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 36 */
+
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2{%k7}");
+	asm volatile("vpermq %ymm4,%ymm6,%ymm2{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpminsb %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2d %k5,%zmm6");
+	asm volatile("vpmovm2q %k5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 39 */
+
+	asm volatile("vpminsd %xmm1,%xmm2,%xmm3");
+	asm volatile("vpminsd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpminsq %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmovd2m %zmm6,%k5");
+	asm volatile("vpmovq2m %zmm6,%k5");
+
+	/* AVX-512: Op code 0f 38 3a */
+
+	asm volatile("vpminuw %ymm4,%ymm6,%ymm2");
+	asm volatile("vpbroadcastmw2d %k6,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3b */
+
+	asm volatile("vpminud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpminud %zmm4,%zmm5,%zmm6");
+	asm volatile("vpminuq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3d */
+
+	asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxsd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmaxsq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3f */
+
+	asm volatile("vpmaxud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxud %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmaxuq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 40 */
+
+	asm volatile("vpmulld %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmulld %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmullq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vgetexpps %zmm5,%zmm6");
+	asm volatile("vgetexppd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 43 */
+
+	asm volatile("vgetexpss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vgetexpsd %xmm2,%xmm3,%xmm4{%k7}");
+
+	/* AVX-512: Op code 0f 38 44 */
+
+	asm volatile("vplzcntd %zmm5,%zmm6");
+	asm volatile("vplzcntq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 46 */
+
+	asm volatile("vpsravd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpsravd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpsravq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4c */
+
+	asm volatile("vrcp14ps %zmm5,%zmm6");
+	asm volatile("vrcp14pd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4d */
+
+	asm volatile("vrcp14ss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vrcp14sd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 4e */
+
+	asm volatile("vrsqrt14ps %zmm5,%zmm6");
+	asm volatile("vrsqrt14pd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4f */
+
+	asm volatile("vrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 59 */
+
+	asm volatile("vpbroadcastq %xmm4,%xmm6");
+	asm volatile("vbroadcasti32x2 %xmm7,%zmm6");
+
+	/* AVX-512: Op code 0f 38 5a */
+
+	asm volatile("vbroadcasti128 (%ecx),%ymm4");
+	asm volatile("vbroadcasti32x4 (%ecx),%zmm6");
+	asm volatile("vbroadcasti64x2 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 5b */
+
+	asm volatile("vbroadcasti32x8 (%ecx),%zmm6");
+	asm volatile("vbroadcasti64x4 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 64 */
+
+	asm volatile("vpblendmd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpblendmq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 65 */
+
+	asm volatile("vblendmps %zmm4,%zmm5,%zmm6");
+	asm volatile("vblendmpd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 66 */
+
+	asm volatile("vpblendmb %zmm4,%zmm5,%zmm6");
+	asm volatile("vpblendmw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 75 */
+
+	asm volatile("vpermi2b %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2w %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 76 */
+
+	asm volatile("vpermi2d %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2q %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 77 */
+
+	asm volatile("vpermi2ps %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2pd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7a */
+
+	asm volatile("vpbroadcastb %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7b */
+
+	asm volatile("vpbroadcastw %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7c */
+
+	asm volatile("vpbroadcastd %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7d */
+
+	asm volatile("vpermt2b %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2w %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7e */
+
+	asm volatile("vpermt2d %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2q %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7f */
+
+	asm volatile("vpermt2ps %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2pd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 83 */
+
+	asm volatile("vpmultishiftqb %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 88 */
+
+	asm volatile("vexpandps (%ecx),%zmm6");
+	asm volatile("vexpandpd (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 89 */
+
+	asm volatile("vpexpandd (%ecx),%zmm6");
+	asm volatile("vpexpandq (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 8a */
+
+	asm volatile("vcompressps %zmm6,(%ecx)");
+	asm volatile("vcompresspd %zmm6,(%ecx)");
+
+	/* AVX-512: Op code 0f 38 8b */
+
+	asm volatile("vpcompressd %zmm6,(%ecx)");
+	asm volatile("vpcompressq %zmm6,(%ecx)");
+
+	/* AVX-512: Op code 0f 38 8d */
+
+	asm volatile("vpermb %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 90 */
+
+	asm volatile("vpgatherdd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdq %xmm2,0x04(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
+	asm volatile("vpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}");
+
+	/* AVX-512: Op code 0f 38 91 */
+
+	asm volatile("vpgatherqd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqq %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}");
+	asm volatile("vpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
+
+	/* AVX-512: Op code 0f 38 a0 */
+
+	asm volatile("vpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a1 */
+
+	asm volatile("vpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a2 */
+
+	asm volatile("vscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a3 */
+
+	asm volatile("vscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 b4 */
+
+	asm volatile("vpmadd52luq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 b5 */
+
+	asm volatile("vpmadd52huq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 c4 */
+
+	asm volatile("vpconflictd %zmm5,%zmm6");
+	asm volatile("vpconflictq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 c8 */
+
+	asm volatile("vexp2ps %zmm6,%zmm7");
+	asm volatile("vexp2pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 ca */
+
+	asm volatile("vrcp28ps %zmm6,%zmm7");
+	asm volatile("vrcp28pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 cb */
+
+	asm volatile("vrcp28ss %xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vrcp28sd %xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 38 cc */
+
+	asm volatile("vrsqrt28ps %zmm6,%zmm7");
+	asm volatile("vrsqrt28pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 cd */
+
+	asm volatile("vrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 03 */
+
+	asm volatile("valignd $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("valignq $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 08 */
+
+	asm volatile("vroundps $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscaleps $0x12,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 09 */
+
+	asm volatile("vroundpd $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscalepd $0x12,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 0a */
+
+	asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 0b */
+
+	asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 18 */
+
+	asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 19 */
+
+	asm volatile("vextractf128 $0x5,%ymm4,%xmm4");
+	asm volatile("vextractf32x4 $0x12,%zmm5,%xmm6{%k7}");
+	asm volatile("vextractf64x2 $0x12,%zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+	asm volatile("vinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1b */
+
+	asm volatile("vextractf32x8 $0x12,%zmm6,%ymm7{%k7}");
+	asm volatile("vextractf64x4 $0x12,%zmm6,%ymm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1e */
+
+	asm volatile("vpcmpud $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpuq $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 1f */
+
+	asm volatile("vpcmpd $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpq $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 23 */
+
+	asm volatile("vshuff32x4 $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vshuff64x2 $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 25 */
+
+	asm volatile("vpternlogd $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vpternlogq $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 26 */
+
+	asm volatile("vgetmantps $0x12,%zmm6,%zmm7");
+	asm volatile("vgetmantpd $0x12,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 27 */
+
+	asm volatile("vgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 38 */
+
+	asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 39 */
+
+	asm volatile("vextracti128 $0x5,%ymm4,%xmm6");
+	asm volatile("vextracti32x4 $0x12,%zmm5,%xmm6{%k7}");
+	asm volatile("vextracti64x2 $0x12,%zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3a */
+
+	asm volatile("vinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+	asm volatile("vinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3b */
+
+	asm volatile("vextracti32x8 $0x12,%zmm6,%ymm7{%k7}");
+	asm volatile("vextracti64x4 $0x12,%zmm6,%ymm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3e */
+
+	asm volatile("vpcmpub $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpuw $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 3f */
+
+	asm volatile("vpcmpb $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpw $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 42 */
+
+	asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2");
+	asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vshufi32x4 $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vshufi64x2 $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 50 */
+
+	asm volatile("vrangeps $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vrangepd $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 51 */
+
+	asm volatile("vrangess $0x12,%xmm5,%xmm6,%xmm7");
+	asm volatile("vrangesd $0x12,%xmm5,%xmm6,%xmm7");
+
+	/* AVX-512: Op code 0f 3a 54 */
+
+	asm volatile("vfixupimmps $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vfixupimmpd $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 55 */
+
+	asm volatile("vfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 56 */
+
+	asm volatile("vreduceps $0x12,%zmm6,%zmm7");
+	asm volatile("vreducepd $0x12,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 57 */
+
+	asm volatile("vreducess $0x12,%xmm5,%xmm6,%xmm7");
+	asm volatile("vreducesd $0x12,%xmm5,%xmm6,%xmm7");
+
+	/* AVX-512: Op code 0f 3a 66 */
+
+	asm volatile("vfpclassps $0x12,%zmm7,%k5");
+	asm volatile("vfpclasspd $0x12,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 67 */
+
+	asm volatile("vfpclassss $0x12,%xmm7,%k5");
+	asm volatile("vfpclasssd $0x12,%xmm7,%k5");
+
+	/* AVX-512: Op code 0f 72 (Grp13) */
+
+	asm volatile("vprord $0x12,%zmm5,%zmm6");
+	asm volatile("vprorq $0x12,%zmm5,%zmm6");
+	asm volatile("vprold $0x12,%zmm5,%zmm6");
+	asm volatile("vprolq $0x12,%zmm5,%zmm6");
+	asm volatile("psrad  $0x2,%mm6");
+	asm volatile("vpsrad $0x5,%ymm6,%ymm2");
+	asm volatile("vpsrad $0x5,%zmm6,%zmm2");
+	asm volatile("vpsraq $0x5,%zmm6,%zmm2");
+
+	/* AVX-512: Op code 0f 38 c6 (Grp18) */
+
+	asm volatile("vgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 c7 (Grp19) */
+
+	asm volatile("vgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}");
+
+	/* AVX-512: Examples */
+
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}{z}");
+	asm volatile("vaddpd {rn-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {ru-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {rd-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {rz-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd (%ecx),%zmm5,%zmm6");
+	asm volatile("vaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6");
+	asm volatile("vaddpd (%ecx){1to8},%zmm5,%zmm6");
+	asm volatile("vaddpd 0x1fc0(%edx),%zmm5,%zmm6");
+	asm volatile("vaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6");
+	asm volatile("vcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5");
+	asm volatile("vcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}");
+	asm volatile("vcmplesd {sae},%xmm4,%xmm5,%k5{%k7}");
+	asm volatile("vgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}");
+
 	/* bndmk m32, bnd */
 
 	asm volatile("bndmk (%eax), %bnd0");
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index d4aa567a29c4..5c76cc83186a 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -154,10 +154,6 @@ next_event:
 	err = 0;
 
 out_err:
-	if (evlist) {
-		perf_evlist__disable(evlist);
-		perf_evlist__delete(evlist);
-	}
-
+	perf_evlist__delete(evlist);
 	return err;
 }
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index 72193f19d6d7..500cf96db979 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -1,12 +1,16 @@
+#include <errno.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <signal.h>
 #include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 #include <linux/types.h>
 #include "perf.h"
 #include "debug.h"
 #include "tests/tests.h"
 #include "cloexec.h"
+#include "util.h"
 #include "arch-tests.h"
 
 static u64 rdpmc(unsigned int counter)
@@ -111,14 +115,14 @@ static int __test__rdpmc(void)
 	if (fd < 0) {
 		pr_err("Error: sys_perf_event_open() syscall returned "
 		       "with %d (%s)\n", fd,
-		       strerror_r(errno, sbuf, sizeof(sbuf)));
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
 		return -1;
 	}
 
 	addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
 	if (addr == (void *)(-1)) {
 		pr_err("Error: mmap() syscall returned with (%s)\n",
-		       strerror_r(errno, sbuf, sizeof(sbuf)));
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_close;
 	}
 
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 465970370f3e..f95e6f46ef0d 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,11 +3,12 @@ libperf-y += tsc.o
 libperf-y += pmu.o
 libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
+libperf-y += group.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
 
-libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 
 libperf-$(CONFIG_AUXTRACE) += auxtrace.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 7a7805583e3f..cc1d865e31f1 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -37,7 +37,7 @@ struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
 	intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
 
 	if (evlist) {
-		evlist__for_each(evlist, evsel) {
+		evlist__for_each_entry(evlist, evsel) {
 			if (intel_pt_pmu &&
 			    evsel->attr.type == intel_pt_pmu->type)
 				found_pt = true;
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
new file mode 100644
index 000000000000..37f92aa39a5d
--- /dev/null
+++ b/tools/perf/arch/x86/util/group.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+	int n;
+
+	if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+		return false;
+	if (n > 0) {
+		*warn = true;
+		return false;
+	}
+	return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+	fprintf(stderr,
+		"nmi_watchdog enabled with topdown. May give wrong results.\n"
+		"Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 7dc30637cf66..5132775a044f 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -124,7 +124,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
 	btsr->evlist = evlist;
 	btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type == intel_bts_pmu->type) {
 			if (intel_bts_evsel) {
 				pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
@@ -327,7 +327,7 @@ static int intel_bts_snapshot_start(struct auxtrace_record *itr)
 			container_of(itr, struct intel_bts_recording, itr);
 	struct perf_evsel *evsel;
 
-	evlist__for_each(btsr->evlist, evsel) {
+	evlist__for_each_entry(btsr->evlist, evsel) {
 		if (evsel->attr.type == btsr->intel_bts_pmu->type)
 			return perf_evsel__disable(evsel);
 	}
@@ -340,7 +340,7 @@ static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
 			container_of(itr, struct intel_bts_recording, itr);
 	struct perf_evsel *evsel;
 
-	evlist__for_each(btsr->evlist, evsel) {
+	evlist__for_each_entry(btsr->evlist, evsel) {
 		if (evsel->attr.type == btsr->intel_bts_pmu->type)
 			return perf_evsel__enable(evsel);
 	}
@@ -422,7 +422,7 @@ static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
 			container_of(itr, struct intel_bts_recording, itr);
 	struct perf_evsel *evsel;
 
-	evlist__for_each(btsr->evlist, evsel) {
+	evlist__for_each_entry(btsr->evlist, evsel) {
 		if (evsel->attr.type == btsr->intel_bts_pmu->type)
 			return perf_evlist__enable_event_idx(btsr->evlist,
 							     evsel, idx);
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a07b9605e93b..fb51457ba338 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -131,7 +131,7 @@ static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
 	if (!mask)
 		return -EINVAL;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type == intel_pt_pmu->type) {
 			*res = intel_pt_masked_bits(mask, evsel->attr.config);
 			return 0;
@@ -511,7 +511,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 	ptr->evlist = evlist;
 	ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type == intel_pt_pmu->type) {
 			if (intel_pt_evsel) {
 				pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
@@ -725,7 +725,7 @@ static int intel_pt_snapshot_start(struct auxtrace_record *itr)
 			container_of(itr, struct intel_pt_recording, itr);
 	struct perf_evsel *evsel;
 
-	evlist__for_each(ptr->evlist, evsel) {
+	evlist__for_each_entry(ptr->evlist, evsel) {
 		if (evsel->attr.type == ptr->intel_pt_pmu->type)
 			return perf_evsel__disable(evsel);
 	}
@@ -738,7 +738,7 @@ static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
 			container_of(itr, struct intel_pt_recording, itr);
 	struct perf_evsel *evsel;
 
-	evlist__for_each(ptr->evlist, evsel) {
+	evlist__for_each_entry(ptr->evlist, evsel) {
 		if (evsel->attr.type == ptr->intel_pt_pmu->type)
 			return perf_evsel__enable(evsel);
 	}
@@ -1011,7 +1011,7 @@ static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
 			container_of(itr, struct intel_pt_recording, itr);
 	struct perf_evsel *evsel;
 
-	evlist__for_each(ptr->evlist, evsel) {
+	evlist__for_each_entry(ptr->evlist, evsel) {
 		if (evsel->attr.type == ptr->intel_pt_pmu->type)
 			return perf_evlist__enable_event_idx(ptr->evlist, evsel,
 							     idx);
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 357f1b13b5ae..2e5567c94e09 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -62,6 +62,8 @@ int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
 	struct perf_tsc_conversion tc;
 	int err;
 
+	if (!pc)
+		return 0;
 	err = perf_read_tsc_conversion(pc, &tc);
 	if (err == -EOPNOTSUPP)
 		return 0;
diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
index db25e93d989c..4f16661cbdbb 100644
--- a/tools/perf/arch/x86/util/unwind-libunwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -1,12 +1,14 @@
 
+#ifndef REMOTE_UNWIND_LIBUNWIND
 #include <errno.h>
 #include <libunwind.h>
 #include "perf_regs.h"
 #include "../../util/unwind.h"
 #include "../../util/debug.h"
+#endif
 
 #ifdef HAVE_ARCH_X86_64_SUPPORT
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
 {
 	int id;
 
@@ -70,7 +72,7 @@ int libunwind__arch_reg_id(int regnum)
 	return id;
 }
 #else
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
 {
 	int id;
 
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 0999ac536d86..8024cd5febd2 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -8,18 +8,23 @@
  * many threads and futexes as possible.
  */
 
-#include "../perf.h"
-#include "../util/util.h"
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <sys/time.h>
+
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
-#include "../util/header.h"
 #include "bench.h"
 #include "futex.h"
 
 #include <err.h>
-#include <stdlib.h>
 #include <sys/time.h>
-#include <pthread.h>
 
 static unsigned int nthreads = 0;
 static unsigned int nsecs    = 10;
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 6952db65508a..936d89d30483 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -2,18 +2,21 @@
  * Copyright (C) 2015 Davidlohr Bueso.
  */
 
-#include "../perf.h"
-#include "../util/util.h"
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include <signal.h>
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
-#include "../util/header.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <errno.h>
 #include "bench.h"
 #include "futex.h"
 
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
-#include <pthread.h>
 
 struct worker {
 	int tid;
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 718238683013..f96e22ed9f87 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -8,18 +8,21 @@
  * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
  */
 
-#include "../perf.h"
-#include "../util/util.h"
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include <signal.h>
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
-#include "../util/header.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <errno.h>
 #include "bench.h"
 #include "futex.h"
 
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
-#include <pthread.h>
 
 static u_int32_t futex1 = 0, futex2 = 0;
 
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 91aaf2a1fa90..4a2ecd7438ca 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -7,18 +7,21 @@
  * it can be used to measure futex_wake() changes.
  */
 
-#include "../perf.h"
-#include "../util/util.h"
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include <signal.h>
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
-#include "../util/header.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <errno.h>
 #include "bench.h"
 #include "futex.h"
 
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
-#include <pthread.h>
 
 struct thread_data {
 	pthread_t worker;
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index f416bd705f66..87d8f4f292d9 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -8,18 +8,21 @@
  * one or more tasks, and thus the waitqueue is never empty.
  */
 
-#include "../perf.h"
-#include "../util/util.h"
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include <signal.h>
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
-#include "../util/header.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <errno.h>
 #include "bench.h"
 #include "futex.h"
 
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
-#include <pthread.h>
 
 /* all threads will block on the same futex */
 static u_int32_t futex1 = 0;
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 5c3cce082cb8..f700369bb0f6 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -6,7 +6,7 @@
 #define globl p2align 4; .globl
 #define _ASM_EXTABLE_FAULT(x, y)
 
-#include "../../../arch/x86/lib/memcpy_64.S"
+#include "../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want
  * NOT executable stack. Otherwise the final linking will assume that
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index de278784c866..58407aa24c1b 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,7 +1,7 @@
 #define memset MEMSET /* don't hide glibc's memset() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
-#include "../../../arch/x86/lib/memset_64.S"
+#include "../../arch/x86/lib/memset_64.S"
 
 /*
  * We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 7500d959d7eb..f7f530081aa9 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -4,6 +4,9 @@
  * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
  */
 
+/* For the CLR_() macros */
+#include <pthread.h>
+
 #include "../perf.h"
 #include "../builtin.h"
 #include "../util/util.h"
@@ -21,7 +24,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <pthread.h>
 #include <sys/mman.h>
 #include <sys/time.h>
 #include <sys/resource.h>
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 25c81734a950..9c1034d81b4f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -75,7 +75,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 	sample->period = 1;
 	sample->weight = 1;
 
-	he = __hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
+	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
 	if (he == NULL)
 		return -ENOMEM;
 
@@ -236,7 +236,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
 		perf_session__fprintf_dsos(session, stdout);
 
 	total_nr_samples = 0;
-	evlist__for_each(session->evlist, pos) {
+	evlist__for_each_entry(session->evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 		u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
 
@@ -339,6 +339,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Show event group information together"),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
+	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
+			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
+			     stdio__config_color, "always"),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index d75bded21fe0..30e2b2cb2421 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -209,7 +209,7 @@ static int build_id_cache__purge_path(const char *pathname)
 	if (err)
 		goto out;
 
-	strlist__for_each(pos, list) {
+	strlist__for_each_entry(pos, list) {
 		err = build_id_cache__remove_s(pos->s);
 		pr_debug("Removing %s %s: %s\n", pos->s, pathname,
 			 err ? "FAIL" : "Ok");
@@ -343,7 +343,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 	if (add_name_list_str) {
 		list = strlist__new(add_name_list_str, NULL);
 		if (list) {
-			strlist__for_each(pos, list)
+			strlist__for_each_entry(pos, list)
 				if (build_id_cache__add_file(pos->s)) {
 					if (errno == EEXIST) {
 						pr_debug("%s already in the cache\n",
@@ -351,7 +351,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 						continue;
 					}
 					pr_warning("Couldn't add %s: %s\n",
-						   pos->s, strerror_r(errno, sbuf, sizeof(sbuf)));
+						   pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
 				}
 
 			strlist__delete(list);
@@ -361,7 +361,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 	if (remove_name_list_str) {
 		list = strlist__new(remove_name_list_str, NULL);
 		if (list) {
-			strlist__for_each(pos, list)
+			strlist__for_each_entry(pos, list)
 				if (build_id_cache__remove_file(pos->s)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
@@ -369,7 +369,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 						continue;
 					}
 					pr_warning("Couldn't remove %s: %s\n",
-						   pos->s, strerror_r(errno, sbuf, sizeof(sbuf)));
+						   pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
 				}
 
 			strlist__delete(list);
@@ -379,7 +379,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 	if (purge_name_list_str) {
 		list = strlist__new(purge_name_list_str, NULL);
 		if (list) {
-			strlist__for_each(pos, list)
+			strlist__for_each_entry(pos, list)
 				if (build_id_cache__purge_path(pos->s)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
@@ -387,7 +387,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 						continue;
 					}
 					pr_warning("Couldn't remove %s: %s\n",
-						   pos->s, strerror_r(errno, sbuf, sizeof(sbuf)));
+						   pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
 				}
 
 			strlist__delete(list);
@@ -400,7 +400,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 	if (update_name_list_str) {
 		list = strlist__new(update_name_list_str, NULL);
 		if (list) {
-			strlist__for_each(pos, list)
+			strlist__for_each_entry(pos, list)
 				if (build_id_cache__update_file(pos->s)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
@@ -408,7 +408,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 						continue;
 					}
 					pr_warning("Couldn't update %s: %s\n",
-						   pos->s, strerror_r(errno, sbuf, sizeof(sbuf)));
+						   pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
 				}
 
 			strlist__delete(list);
@@ -419,8 +419,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 		pr_warning("Couldn't add %s\n", kcore_filename);
 
 out:
-	if (session)
-		perf_session__delete(session);
+	perf_session__delete(session);
 
 	return ret;
 }
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index fe1b77fa21f9..e4207a23b52c 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -37,23 +37,16 @@ static int show_config(struct perf_config_set *set)
 {
 	struct perf_config_section *section;
 	struct perf_config_item *item;
-	struct list_head *sections;
 
 	if (set == NULL)
 		return -1;
 
-	sections = &set->sections;
-	if (list_empty(sections))
-		return -1;
-
-	list_for_each_entry(section, sections, node) {
-		list_for_each_entry(item, &section->items, node) {
-			char *value = item->value;
+	perf_config_set__for_each_entry(set, section, item) {
+		char *value = item->value;
 
-			if (value)
-				printf("%s.%s=%s\n", section->name,
-				       item->name, value);
-		}
+		if (value)
+			printf("%s.%s=%s\n", section->name,
+			       item->name, value);
 	}
 
 	return 0;
@@ -80,6 +73,10 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 	else if (use_user_config)
 		config_exclusive_filename = user_config;
 
+	/*
+	 * At only 'config' sub-command, individually use the config set
+	 * because of reinitializing with options config file location.
+	 */
 	set = perf_config_set__new();
 	if (!set) {
 		ret = -1;
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index b97bc1518b44..7ad6e17ac6b3 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -3,6 +3,7 @@
 #include "perf.h"
 #include "debug.h"
 #include <subcmd/parse-options.h>
+#include "data-convert.h"
 #include "data-convert-bt.h"
 
 typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix);
@@ -53,14 +54,18 @@ static int cmd_data_convert(int argc, const char **argv,
 			    const char *prefix __maybe_unused)
 {
 	const char *to_ctf     = NULL;
-	bool force = false;
+	struct perf_data_convert_opts opts = {
+		.force = false,
+		.all = false,
+	};
 	const struct option options[] = {
 		OPT_INCR('v', "verbose", &verbose, "be more verbose"),
 		OPT_STRING('i', "input", &input_name, "file", "input file name"),
 #ifdef HAVE_LIBBABELTRACE_SUPPORT
 		OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
 #endif
-		OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+		OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"),
+		OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"),
 		OPT_END()
 	};
 
@@ -78,7 +83,7 @@ static int cmd_data_convert(int argc, const char **argv,
 
 	if (to_ctf) {
 #ifdef HAVE_LIBBABELTRACE_SUPPORT
-		return bt_convert__perf2ctf(input_name, to_ctf, force);
+		return bt_convert__perf2ctf(input_name, to_ctf, &opts);
 #else
 		pr_err("The libbabeltrace support is not compiled in.\n");
 		return -1;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f7645a42708e..21ee753211ad 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -310,16 +310,6 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
 	return -1;
 }
 
-static int hists__add_entry(struct hists *hists,
-			    struct addr_location *al,
-			    struct perf_sample *sample)
-{
-	if (__hists__add_entry(hists, al, NULL, NULL, NULL,
-			       sample, true) != NULL)
-		return 0;
-	return -ENOMEM;
-}
-
 static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 				      union perf_event *event,
 				      struct perf_sample *sample,
@@ -336,7 +326,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
-	if (hists__add_entry(hists, &al, sample)) {
+	if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) {
 		pr_warning("problem incrementing symbol period, skipping event\n");
 		goto out_put;
 	}
@@ -373,7 +363,7 @@ static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
 {
 	struct perf_evsel *e;
 
-	evlist__for_each(evlist, e) {
+	evlist__for_each_entry(evlist, e) {
 		if (perf_evsel__match2(evsel, e))
 			return e;
 	}
@@ -385,7 +375,7 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		struct hists *hists = evsel__hists(evsel);
 
 		hists__collapse_resort(hists, NULL);
@@ -666,7 +656,8 @@ static void hists__process(struct hists *hists)
 	hists__precompute(hists);
 	hists__output_resort(hists, NULL);
 
-	hists__fprintf(hists, true, 0, 0, 0, stdout);
+	hists__fprintf(hists, true, 0, 0, 0, stdout,
+		       symbol_conf.use_callchain);
 }
 
 static void data__fprintf(void)
@@ -690,7 +681,7 @@ static void data_process(void)
 	struct perf_evsel *evsel_base;
 	bool first = true;
 
-	evlist__for_each(evlist_base, evsel_base) {
+	evlist__for_each_entry(evlist_base, evsel_base) {
 		struct hists *hists_base = evsel__hists(evsel_base);
 		struct data__file *d;
 		int i;
@@ -765,9 +756,7 @@ static int __cmd_diff(void)
 
  out_delete:
 	data__for_each_file(i, d) {
-		if (d->session)
-			perf_session__delete(d->session);
-
+		perf_session__delete(d->session);
 		data__free(d);
 	}
 
@@ -1044,7 +1033,7 @@ static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
 }
 
 static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-		       struct perf_evsel *evsel __maybe_unused)
+		       struct hists *hists __maybe_unused)
 {
 	struct diff_hpp_fmt *dfmt =
 		container_of(fmt, struct diff_hpp_fmt, fmt);
@@ -1055,7 +1044,7 @@ static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 
 static int hpp__width(struct perf_hpp_fmt *fmt,
 		      struct perf_hpp *hpp __maybe_unused,
-		      struct perf_evsel *evsel __maybe_unused)
+		      struct hists *hists __maybe_unused)
 {
 	struct diff_hpp_fmt *dfmt =
 		container_of(fmt, struct diff_hpp_fmt, fmt);
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 8a31f511e1a0..e09c4287fe87 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -32,7 +32,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
 	if (session == NULL)
 		return -1;
 
-	evlist__for_each(session->evlist, pos) {
+	evlist__for_each_entry(session->evlist, pos) {
 		perf_evsel__fprintf(pos, details, stdout);
 
 		if (pos->attr.type == PERF_TYPE_TRACEPOINT)
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index f9830c902b78..3bdb2c78a21b 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -4,7 +4,7 @@
  * Builtin help command
  */
 #include "perf.h"
-#include "util/cache.h"
+#include "util/config.h"
 #include "builtin.h"
 #include <subcmd/exec-cmd.h>
 #include "common-cmds.h"
@@ -117,7 +117,7 @@ static void exec_woman_emacs(const char *path, const char *page)
 			free(man_page);
 		}
 		warning("failed to exec '%s': %s", path,
-			strerror_r(errno, sbuf, sizeof(sbuf)));
+			str_error_r(errno, sbuf, sizeof(sbuf)));
 	}
 }
 
@@ -150,7 +150,7 @@ static void exec_man_konqueror(const char *path, const char *page)
 			free(man_page);
 		}
 		warning("failed to exec '%s': %s", path,
-			strerror_r(errno, sbuf, sizeof(sbuf)));
+			str_error_r(errno, sbuf, sizeof(sbuf)));
 	}
 }
 
@@ -162,7 +162,7 @@ static void exec_man_man(const char *path, const char *page)
 		path = "man";
 	execlp(path, "man", page, NULL);
 	warning("failed to exec '%s': %s", path,
-		strerror_r(errno, sbuf, sizeof(sbuf)));
+		str_error_r(errno, sbuf, sizeof(sbuf)));
 }
 
 static void exec_man_cmd(const char *cmd, const char *page)
@@ -175,7 +175,7 @@ static void exec_man_cmd(const char *cmd, const char *page)
 		free(shell_cmd);
 	}
 	warning("failed to exec '%s': %s", cmd,
-		strerror_r(errno, sbuf, sizeof(sbuf)));
+		str_error_r(errno, sbuf, sizeof(sbuf)));
 }
 
 static void add_man_viewer(const char *name)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index e5afa8fe1bf1..73c1c4cc3600 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -562,7 +562,7 @@ static void strip_init(struct perf_inject *inject)
 
 	inject->tool.context_switch = perf_event__drop;
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		evsel->handler = drop_sample;
 }
 
@@ -590,7 +590,7 @@ static bool ok_to_remove(struct perf_evlist *evlist,
 	if (!has_tracking(evsel_to_remove))
 		return true;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->handler != drop_sample) {
 			cnt += 1;
 			if ((evsel->attr.sample_type & COMPAT_MASK) ==
@@ -608,7 +608,7 @@ static void strip_fini(struct perf_inject *inject)
 	struct perf_evsel *evsel, *tmp;
 
 	/* Remove non-synthesized evsels if possible */
-	evlist__for_each_safe(evlist, tmp, evsel) {
+	evlist__for_each_entry_safe(evlist, tmp, evsel) {
 		if (evsel->handler == drop_sample &&
 		    ok_to_remove(evlist, evsel)) {
 			pr_debug("Deleting %s\n", perf_evsel__name(evsel));
@@ -643,7 +643,7 @@ static int __cmd_inject(struct perf_inject *inject)
 	} else if (inject->sched_stat) {
 		struct perf_evsel *evsel;
 
-		evlist__for_each(session->evlist, evsel) {
+		evlist__for_each_entry(session->evlist, evsel) {
 			const char *name = perf_evsel__name(evsel);
 
 			if (!strcmp(name, "sched:sched_switch")) {
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 58adfee230de..b1d491c2e704 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -4,7 +4,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/util.h"
-#include "util/cache.h"
+#include "util/config.h"
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/header.h"
@@ -1354,7 +1354,7 @@ static int __cmd_kmem(struct perf_session *session)
 		goto out;
 	}
 
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
 		    perf_evsel__field(evsel, "pfn")) {
 			use_pfn = true;
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 6487c06d2708..5e2127e04f83 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -988,7 +988,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
 	 * Note: exclude_{guest,host} do not apply here.
 	 *       This command processes KVM tracepoints from host only
 	 */
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		struct perf_event_attr *attr = &pos->attr;
 
 		/* make sure these *are* set */
@@ -1018,13 +1018,13 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		printf("Couldn't create the events: %s\n",
-		       strerror_r(errno, sbuf, sizeof(sbuf)));
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out;
 	}
 
 	if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) {
 		ui__error("Failed to mmap the events: %s\n",
-			  strerror_r(errno, sbuf, sizeof(sbuf)));
+			  str_error_r(errno, sbuf, sizeof(sbuf)));
 		perf_evlist__close(evlist);
 		goto out;
 	}
@@ -1426,11 +1426,9 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	err = kvm_events_live_report(kvm);
 
 out:
-	if (kvm->session)
-		perf_session__delete(kvm->session);
+	perf_session__delete(kvm->session);
 	kvm->session = NULL;
-	if (kvm->evlist)
-		perf_evlist__delete(kvm->evlist);
+	perf_evlist__delete(kvm->evlist);
 
 	return err;
 }
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 5e22db4684b8..88ee419e5189 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -25,7 +25,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
 		OPT_END()
 	};
 	const char * const list_usage[] = {
-		"perf list [hw|sw|cache|tracepoint|pmu|event_glob]",
+		"perf list [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
 		NULL
 	};
 
@@ -62,6 +62,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
 			print_hwcache_events(NULL, raw_dump);
 		else if (strcmp(argv[i], "pmu") == 0)
 			print_pmu_events(NULL, raw_dump);
+		else if (strcmp(argv[i], "sdt") == 0)
+			print_sdt_events(NULL, NULL, raw_dump);
 		else if ((sep = strchr(argv[i], ':')) != NULL) {
 			int sep_idx;
 
@@ -76,6 +78,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
 
 			s[sep_idx] = '\0';
 			print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
+			print_sdt_events(s, s + sep_idx + 1, raw_dump);
 			free(s);
 		} else {
 			if (asprintf(&s, "*%s*", argv[i]) < 0) {
@@ -89,6 +92,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
 			print_hwcache_events(s, raw_dump);
 			print_pmu_events(s, raw_dump);
 			print_tracepoint_events(NULL, s, raw_dump);
+			print_sdt_events(NULL, s, raw_dump);
 			free(s);
 		}
 	}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 1dc140c5481d..d608a2c9e48c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -67,6 +67,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 	OPT_CALLBACK('e', "event", &mem, "event",
 		     "event selector. use 'perf mem record -e list' to list available events",
 		     parse_record_events),
+	OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"),
 	OPT_INCR('v', "verbose", &verbose,
 		 "be more verbose (show counter open errors, etc)"),
 	OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"),
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 9af859b28b15..ee5b42173ba3 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -44,7 +44,7 @@
 
 #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
 #define DEFAULT_FUNC_FILTER "!_*"
-#define DEFAULT_LIST_FILTER "*:*"
+#define DEFAULT_LIST_FILTER "*"
 
 /* Session management structure */
 static struct {
@@ -308,7 +308,7 @@ static void pr_err_with_code(const char *msg, int err)
 
 	pr_err("%s", msg);
 	pr_debug(" Reason: %s (Code: %d)",
-		 strerror_r(-err, sbuf, sizeof(sbuf)), err);
+		 str_error_r(-err, sbuf, sizeof(sbuf)), err);
 	pr_err("\n");
 }
 
@@ -363,6 +363,32 @@ out_cleanup:
 	return ret;
 }
 
+static int del_perf_probe_caches(struct strfilter *filter)
+{
+	struct probe_cache *cache;
+	struct strlist *bidlist;
+	struct str_node *nd;
+	int ret;
+
+	bidlist = build_id_cache__list_all(false);
+	if (!bidlist) {
+		ret = -errno;
+		pr_debug("Failed to get buildids: %d\n", ret);
+		return ret ?: -ENOMEM;
+	}
+
+	strlist__for_each_entry(nd, bidlist) {
+		cache = probe_cache__new(nd->s);
+		if (!cache)
+			continue;
+		if (probe_cache__filter_purge(cache, filter) < 0 ||
+		    probe_cache__commit(cache) < 0)
+			pr_warning("Failed to remove entries for %s\n", nd->s);
+		probe_cache__delete(cache);
+	}
+	return 0;
+}
+
 static int perf_del_probe_events(struct strfilter *filter)
 {
 	int ret, ret2, ufd = -1, kfd = -1;
@@ -375,6 +401,9 @@ static int perf_del_probe_events(struct strfilter *filter)
 
 	pr_debug("Delete filter: \'%s\'\n", str);
 
+	if (probe_conf.cache)
+		return del_perf_probe_caches(filter);
+
 	/* Get current event names */
 	ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
 	if (ret < 0)
@@ -389,7 +418,7 @@ static int perf_del_probe_events(struct strfilter *filter)
 
 	ret = probe_file__get_events(kfd, filter, klist);
 	if (ret == 0) {
-		strlist__for_each(ent, klist)
+		strlist__for_each_entry(ent, klist)
 			pr_info("Removed event: %s\n", ent->s);
 
 		ret = probe_file__del_strlist(kfd, klist);
@@ -399,7 +428,7 @@ static int perf_del_probe_events(struct strfilter *filter)
 
 	ret2 = probe_file__get_events(ufd, filter, ulist);
 	if (ret2 == 0) {
-		strlist__for_each(ent, ulist)
+		strlist__for_each_entry(ent, ulist)
 			pr_info("Removed event: %s\n", ent->s);
 
 		ret2 = probe_file__del_strlist(ufd, ulist);
@@ -512,6 +541,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Enable symbol demangling"),
 	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
 		    "Enable kernel symbol demangling"),
+	OPT_BOOLEAN(0, "cache", &probe_conf.cache, "Manipulate probe cache"),
 	OPT_END()
 	};
 	int ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dc3fcb597e4c..8f2c16d9275f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -13,6 +13,7 @@
 #include "util/util.h"
 #include <subcmd/parse-options.h>
 #include "util/parse-events.h"
+#include "util/config.h"
 
 #include "util/callchain.h"
 #include "util/cgroup.h"
@@ -118,11 +119,10 @@ backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
 }
 
 static int
-rb_find_range(struct perf_evlist *evlist,
-	      void *data, int mask, u64 head, u64 old,
-	      u64 *start, u64 *end)
+rb_find_range(void *data, int mask, u64 head, u64 old,
+	      u64 *start, u64 *end, bool backward)
 {
-	if (!evlist->backward) {
+	if (!backward) {
 		*start = old;
 		*end = head;
 		return 0;
@@ -131,9 +131,10 @@ rb_find_range(struct perf_evlist *evlist,
 	return backward_rb_find_range(data, mask, head, start, end);
 }
 
-static int record__mmap_read(struct record *rec, int idx)
+static int
+record__mmap_read(struct record *rec, struct perf_mmap *md,
+		  bool overwrite, bool backward)
 {
-	struct perf_mmap *md = &rec->evlist->mmap[idx];
 	u64 head = perf_mmap__read_head(md);
 	u64 old = md->prev;
 	u64 end = head, start = old;
@@ -142,8 +143,8 @@ static int record__mmap_read(struct record *rec, int idx)
 	void *buf;
 	int rc = 0;
 
-	if (rb_find_range(rec->evlist, data, md->mask, head,
-			  old, &start, &end))
+	if (rb_find_range(data, md->mask, head,
+			  old, &start, &end, backward))
 		return -1;
 
 	if (start == end)
@@ -156,7 +157,7 @@ static int record__mmap_read(struct record *rec, int idx)
 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 
 		md->prev = head;
-		perf_evlist__mmap_consume(rec->evlist, idx);
+		perf_mmap__consume(md, overwrite || backward);
 		return 0;
 	}
 
@@ -181,7 +182,7 @@ static int record__mmap_read(struct record *rec, int idx)
 	}
 
 	md->prev = head;
-	perf_evlist__mmap_consume(rec->evlist, idx);
+	perf_mmap__consume(md, overwrite || backward);
 out:
 	return rc;
 }
@@ -341,6 +342,40 @@ int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 
 #endif
 
+static int record__mmap_evlist(struct record *rec,
+			       struct perf_evlist *evlist)
+{
+	struct record_opts *opts = &rec->opts;
+	char msg[512];
+
+	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
+				 opts->auxtrace_mmap_pages,
+				 opts->auxtrace_snapshot_mode) < 0) {
+		if (errno == EPERM) {
+			pr_err("Permission error mapping pages.\n"
+			       "Consider increasing "
+			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
+			       "or try again with a smaller value of -m/--mmap_pages.\n"
+			       "(current value: %u,%u)\n",
+			       opts->mmap_pages, opts->auxtrace_mmap_pages);
+			return -errno;
+		} else {
+			pr_err("failed to mmap with %d (%s)\n", errno,
+				str_error_r(errno, msg, sizeof(msg)));
+			if (errno)
+				return -errno;
+			else
+				return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int record__mmap(struct record *rec)
+{
+	return record__mmap_evlist(rec, rec->evlist);
+}
+
 static int record__open(struct record *rec)
 {
 	char msg[512];
@@ -352,7 +387,7 @@ static int record__open(struct record *rec)
 
 	perf_evlist__config(evlist, opts, &callchain_param);
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 try_again:
 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
@@ -372,32 +407,14 @@ try_again:
 	if (perf_evlist__apply_filters(evlist, &pos)) {
 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
 			pos->filter, perf_evsel__name(pos), errno,
-			strerror_r(errno, msg, sizeof(msg)));
+			str_error_r(errno, msg, sizeof(msg)));
 		rc = -1;
 		goto out;
 	}
 
-	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
-				 opts->auxtrace_mmap_pages,
-				 opts->auxtrace_snapshot_mode) < 0) {
-		if (errno == EPERM) {
-			pr_err("Permission error mapping pages.\n"
-			       "Consider increasing "
-			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
-			       "or try again with a smaller value of -m/--mmap_pages.\n"
-			       "(current value: %u,%u)\n",
-			       opts->mmap_pages, opts->auxtrace_mmap_pages);
-			rc = -errno;
-		} else {
-			pr_err("failed to mmap with %d (%s)\n", errno,
-				strerror_r(errno, msg, sizeof(msg)));
-			if (errno)
-				rc = -errno;
-			else
-				rc = -EINVAL;
-		}
+	rc = record__mmap(rec);
+	if (rc)
 		goto out;
-	}
 
 	session->evlist = evlist;
 	perf_session__set_id_hdr_size(session);
@@ -481,17 +498,30 @@ static struct perf_event_header finished_round_event = {
 	.type = PERF_RECORD_FINISHED_ROUND,
 };
 
-static int record__mmap_read_all(struct record *rec)
+static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
+				    bool backward)
 {
 	u64 bytes_written = rec->bytes_written;
 	int i;
 	int rc = 0;
+	struct perf_mmap *maps;
 
-	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
-		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
+	if (!evlist)
+		return 0;
 
-		if (rec->evlist->mmap[i].base) {
-			if (record__mmap_read(rec, i) != 0) {
+	maps = backward ? evlist->backward_mmap : evlist->mmap;
+	if (!maps)
+		return 0;
+
+	if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
+		return 0;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
+
+		if (maps[i].base) {
+			if (record__mmap_read(rec, &maps[i],
+					      evlist->overwrite, backward) != 0) {
 				rc = -1;
 				goto out;
 			}
@@ -511,10 +541,23 @@ static int record__mmap_read_all(struct record *rec)
 	if (bytes_written != rec->bytes_written)
 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 
+	if (backward)
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 out:
 	return rc;
 }
 
+static int record__mmap_read_all(struct record *rec)
+{
+	int err;
+
+	err = record__mmap_read_evlist(rec, rec->evlist, false);
+	if (err)
+		return err;
+
+	return record__mmap_read_evlist(rec, rec->evlist, true);
+}
+
 static void record__init_features(struct record *rec)
 {
 	struct perf_session *session = rec->session;
@@ -561,13 +604,16 @@ record__finish_output(struct record *rec)
 	return;
 }
 
-static int record__synthesize_workload(struct record *rec)
+static int record__synthesize_workload(struct record *rec, bool tail)
 {
 	struct {
 		struct thread_map map;
 		struct thread_map_data map_data;
 	} thread_map;
 
+	if (rec->opts.tail_synthesize != tail)
+		return 0;
+
 	thread_map.map.nr = 1;
 	thread_map.map.map[0].pid = rec->evlist->workload.pid;
 	thread_map.map.map[0].comm = NULL;
@@ -578,7 +624,7 @@ static int record__synthesize_workload(struct record *rec)
 						 rec->opts.proc_map_timeout);
 }
 
-static int record__synthesize(struct record *rec);
+static int record__synthesize(struct record *rec, bool tail);
 
 static int
 record__switch_output(struct record *rec, bool at_exit)
@@ -589,6 +635,10 @@ record__switch_output(struct record *rec, bool at_exit)
 	/* Same Size:      "2015122520103046"*/
 	char timestamp[] = "InvalidTimestamp";
 
+	record__synthesize(rec, true);
+	if (target__none(&rec->opts.target))
+		record__synthesize_workload(rec, true);
+
 	rec->samples = 0;
 	record__finish_output(rec);
 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
@@ -611,7 +661,7 @@ record__switch_output(struct record *rec, bool at_exit)
 
 	/* Output tracking events */
 	if (!at_exit) {
-		record__synthesize(rec);
+		record__synthesize(rec, false);
 
 		/*
 		 * In 'perf record --switch-output' without -a,
@@ -623,7 +673,7 @@ record__switch_output(struct record *rec, bool at_exit)
 		 * perf_event__synthesize_thread_map() for those events.
 		 */
 		if (target__none(&rec->opts.target))
-			record__synthesize_workload(rec);
+			record__synthesize_workload(rec, false);
 	}
 	return fd;
 }
@@ -655,7 +705,29 @@ perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused
 	return 0;
 }
 
-static int record__synthesize(struct record *rec)
+static const struct perf_event_mmap_page *
+perf_evlist__pick_pc(struct perf_evlist *evlist)
+{
+	if (evlist) {
+		if (evlist->mmap && evlist->mmap[0].base)
+			return evlist->mmap[0].base;
+		if (evlist->backward_mmap && evlist->backward_mmap[0].base)
+			return evlist->backward_mmap[0].base;
+	}
+	return NULL;
+}
+
+static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
+{
+	const struct perf_event_mmap_page *pc;
+
+	pc = perf_evlist__pick_pc(rec->evlist);
+	if (pc)
+		return pc;
+	return NULL;
+}
+
+static int record__synthesize(struct record *rec, bool tail)
 {
 	struct perf_session *session = rec->session;
 	struct machine *machine = &session->machines.host;
@@ -665,6 +737,9 @@ static int record__synthesize(struct record *rec)
 	int fd = perf_data_file__fd(file);
 	int err = 0;
 
+	if (rec->opts.tail_synthesize != tail)
+		return 0;
+
 	if (file->is_pipe) {
 		err = perf_event__synthesize_attrs(tool, session,
 						   process_synthesized_event);
@@ -692,7 +767,7 @@ static int record__synthesize(struct record *rec)
 		}
 	}
 
-	err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
+	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
 					  process_synthesized_event, machine);
 	if (err)
 		goto out;
@@ -828,7 +903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 	machine = &session->machines.host;
 
-	err = record__synthesize(rec);
+	err = record__synthesize(rec, false);
 	if (err < 0)
 		goto out_child;
 
@@ -888,6 +963,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	for (;;) {
 		unsigned long long hits = rec->samples;
 
+		/*
+		 * rec->evlist->bkw_mmap_state is possible to be
+		 * BKW_MMAP_EMPTY here: when done == true and
+		 * hits != rec->samples in previous round.
+		 *
+		 * perf_evlist__toggle_bkw_mmap ensure we never
+		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
+		 */
+		if (trigger_is_hit(&switch_output_trigger) || done || draining)
+			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
+
 		if (record__mmap_read_all(rec) < 0) {
 			trigger_error(&auxtrace_snapshot_trigger);
 			trigger_error(&switch_output_trigger);
@@ -907,8 +993,26 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		}
 
 		if (trigger_is_hit(&switch_output_trigger)) {
+			/*
+			 * If switch_output_trigger is hit, the data in
+			 * overwritable ring buffer should have been collected,
+			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
+			 *
+			 * If SIGUSR2 raise after or during record__mmap_read_all(),
+			 * record__mmap_read_all() didn't collect data from
+			 * overwritable ring buffer. Read again.
+			 */
+			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
+				continue;
 			trigger_ready(&switch_output_trigger);
 
+			/*
+			 * Reenable events in overwrite ring buffer after
+			 * record__mmap_read_all(): we should have collected
+			 * data from it.
+			 */
+			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
+
 			if (!quiet)
 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
 					waking);
@@ -954,7 +1058,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 	if (forks && workload_exec_errno) {
 		char msg[STRERR_BUFSIZE];
-		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
+		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
 		pr_err("Workload failed: %s\n", emsg);
 		err = -1;
 		goto out_child;
@@ -963,6 +1067,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	if (!quiet)
 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 
+	if (target__none(&rec->opts.target))
+		record__synthesize_workload(rec, true);
+
 out_child:
 	if (forks) {
 		int exit_status;
@@ -981,6 +1088,7 @@ out_child:
 	} else
 		status = err;
 
+	record__synthesize(rec, true);
 	/* this will be recalculated during process_buildids() */
 	rec->samples = 0;
 
@@ -1267,6 +1375,8 @@ static struct record record = {
 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
 	"\n\t\t\t\tDefault: fp";
 
+static bool dry_run;
+
 /*
  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
  * with it and switch to use the library functions in perf_evlist that came
@@ -1303,6 +1413,9 @@ struct option __record_options[] = {
 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
 			&record.opts.no_inherit_set,
 			"child tasks do not inherit counters"),
+	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
+		    "synthesize non-sample events at the end of output"),
+	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
 		     "number of mmap data pages and AUX area tracing mmap pages",
@@ -1386,6 +1499,8 @@ struct option __record_options[] = {
 		    "append timestamp to output filename"),
 	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
 		    "Switch output when receive SIGUSR2"),
+	OPT_BOOLEAN(0, "dry-run", &dry_run,
+		    "Parse options then exit"),
 	OPT_END()
 };
 
@@ -1455,6 +1570,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (err)
 		return err;
 
+	if (dry_run)
+		return 0;
+
 	err = bpf__setup_stdout(rec->evlist);
 	if (err) {
 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
@@ -1508,6 +1626,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 	}
 
+	if (record.opts.overwrite)
+		record.opts.tail_synthesize = true;
+
 	if (rec->evlist->nr_entries == 0 &&
 	    perf_evlist__add_default(rec->evlist) < 0) {
 		pr_err("Not enough memory for event selector list\n");
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index a87cb338bdf1..949e5a15c960 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -8,7 +8,7 @@
 #include "builtin.h"
 
 #include "util/util.h"
-#include "util/cache.h"
+#include "util/config.h"
 
 #include "util/annotate.h"
 #include "util/color.h"
@@ -361,7 +361,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 	struct perf_evsel *pos;
 
 	fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 		const char *evname = perf_evsel__name(pos);
 
@@ -370,7 +370,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 			continue;
 
 		hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
-		hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout);
+		hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout,
+			       symbol_conf.use_callchain);
 		fprintf(stdout, "\n\n");
 	}
 
@@ -477,7 +478,7 @@ static int report__collapse_hists(struct report *rep)
 
 	ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
 
-	evlist__for_each(rep->session->evlist, pos) {
+	evlist__for_each_entry(rep->session->evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 
 		if (pos->idx == 0)
@@ -510,7 +511,7 @@ static void report__output_resort(struct report *rep)
 
 	ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
 
-	evlist__for_each(rep->session->evlist, pos)
+	evlist__for_each_entry(rep->session->evlist, pos)
 		perf_evsel__output_resort(pos, &prog);
 
 	ui_progress__finish();
@@ -551,7 +552,7 @@ static int __cmd_report(struct report *rep)
 
 	report__warn_kptr_restrict(rep);
 
-	evlist__for_each(session->evlist, pos)
+	evlist__for_each_entry(session->evlist, pos)
 		rep->nr_entries += evsel__hists(pos)->nr_entries;
 
 	if (use_browser == 0) {
@@ -582,7 +583,7 @@ static int __cmd_report(struct report *rep)
 	 * might be changed during the collapse phase.
 	 */
 	rep->nr_entries = 0;
-	evlist__for_each(session->evlist, pos)
+	evlist__for_each_entry(session->evlist, pos)
 		rep->nr_entries += evsel__hists(pos)->nr_entries;
 
 	if (rep->nr_entries == 0) {
@@ -816,6 +817,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Show raw trace event output (do not use print fmt or plugins)"),
 	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
 		    "Show entries in a hierarchy"),
+	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
+			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
+			     stdio__config_color, "always"),
 	OPT_END()
 	};
 	struct perf_data_file file = {
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index afa057666c2a..0dfe8df2ab9b 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -494,7 +494,7 @@ force_again:
 		}
 		pr_err("Error: sys_perf_event_open() syscall returned "
 		       "with %d (%s)\n%s", fd,
-		       strerror_r(errno, sbuf, sizeof(sbuf)), info);
+		       str_error_r(errno, sbuf, sizeof(sbuf)), info);
 		exit(EXIT_FAILURE);
 	}
 	return fd;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e3ce2f34d3ad..971ff91b16cb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -21,6 +21,7 @@
 #include "util/cpumap.h"
 #include "util/thread_map.h"
 #include "util/stat.h"
+#include "util/thread-stack.h"
 #include <linux/bitmap.h>
 #include <linux/stringify.h>
 #include "asm/bug.h"
@@ -63,6 +64,7 @@ enum perf_output_field {
 	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
 	PERF_OUTPUT_WEIGHT	    = 1U << 18,
 	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
+	PERF_OUTPUT_CALLINDENT	    = 1U << 20,
 };
 
 struct output_option {
@@ -89,6 +91,7 @@ struct output_option {
 	{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
 	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
 	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
+	{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
 };
 
 /* default set to maintain compatibility with current format */
@@ -339,7 +342,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
  */
 static int perf_session__check_output_opt(struct perf_session *session)
 {
-	int j;
+	unsigned int j;
 	struct perf_evsel *evsel;
 
 	for (j = 0; j < PERF_TYPE_MAX; ++j) {
@@ -369,7 +372,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
 	if (!no_callchain) {
 		bool use_callchain = false;
 
-		evlist__for_each(session->evlist, evsel) {
+		evlist__for_each_entry(session->evlist, evsel) {
 			if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 				use_callchain = true;
 				break;
@@ -388,17 +391,20 @@ static int perf_session__check_output_opt(struct perf_session *session)
 		struct perf_event_attr *attr;
 
 		j = PERF_TYPE_TRACEPOINT;
-		evsel = perf_session__find_first_evtype(session, j);
-		if (evsel == NULL)
-			goto out;
 
-		attr = &evsel->attr;
+		evlist__for_each_entry(session->evlist, evsel) {
+			if (evsel->attr.type != j)
+				continue;
+
+			attr = &evsel->attr;
 
-		if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
-			output[j].fields |= PERF_OUTPUT_IP;
-			output[j].fields |= PERF_OUTPUT_SYM;
-			output[j].fields |= PERF_OUTPUT_DSO;
-			set_print_ip_opts(attr);
+			if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
+				output[j].fields |= PERF_OUTPUT_IP;
+				output[j].fields |= PERF_OUTPUT_SYM;
+				output[j].fields |= PERF_OUTPUT_DSO;
+				set_print_ip_opts(attr);
+				goto out;
+			}
 		}
 	}
 
@@ -559,6 +565,62 @@ static void print_sample_addr(struct perf_sample *sample,
 	}
 }
 
+static void print_sample_callindent(struct perf_sample *sample,
+				    struct perf_evsel *evsel,
+				    struct thread *thread,
+				    struct addr_location *al)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	size_t depth = thread_stack__depth(thread);
+	struct addr_location addr_al;
+	const char *name = NULL;
+	static int spacing;
+	int len = 0;
+	u64 ip = 0;
+
+	/*
+	 * The 'return' has already been popped off the stack so the depth has
+	 * to be adjusted to match the 'call'.
+	 */
+	if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
+		depth += 1;
+
+	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+		if (sample_addr_correlates_sym(attr)) {
+			thread__resolve(thread, &addr_al, sample);
+			if (addr_al.sym)
+				name = addr_al.sym->name;
+			else
+				ip = sample->addr;
+		} else {
+			ip = sample->addr;
+		}
+	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+		if (al->sym)
+			name = al->sym->name;
+		else
+			ip = sample->ip;
+	}
+
+	if (name)
+		len = printf("%*s%s", (int)depth * 4, "", name);
+	else if (ip)
+		len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip);
+
+	if (len < 0)
+		return;
+
+	/*
+	 * Try to keep the output length from changing frequently so that the
+	 * output lines up more nicely.
+	 */
+	if (len > spacing || (len && len < spacing - 52))
+		spacing = round_up(len + 4, 32);
+
+	if (len < spacing)
+		printf("%*s", spacing - len, "");
+}
+
 static void print_sample_bts(struct perf_sample *sample,
 			     struct perf_evsel *evsel,
 			     struct thread *thread,
@@ -567,6 +629,9 @@ static void print_sample_bts(struct perf_sample *sample,
 	struct perf_event_attr *attr = &evsel->attr;
 	bool print_srcline_last = false;
 
+	if (PRINT_FIELD(CALLINDENT))
+		print_sample_callindent(sample, evsel, thread, al);
+
 	/* print branch_from information */
 	if (PRINT_FIELD(IP)) {
 		unsigned int print_opts = output[attr->type].print_ip_opts;
@@ -603,13 +668,42 @@ static void print_sample_bts(struct perf_sample *sample,
 	printf("\n");
 }
 
+static struct {
+	u32 flags;
+	const char *name;
+} sample_flags[] = {
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"},
+	{PERF_IP_FLAG_BRANCH, "jmp"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |	PERF_IP_FLAG_INTERRUPT, "hw int"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
+	{0, NULL}
+};
+
 static void print_sample_flags(u32 flags)
 {
 	const char *chars = PERF_IP_FLAG_CHARS;
 	const int n = strlen(PERF_IP_FLAG_CHARS);
+	bool in_tx = flags & PERF_IP_FLAG_IN_TX;
+	const char *name = NULL;
 	char str[33];
 	int i, pos = 0;
 
+	for (i = 0; sample_flags[i].name ; i++) {
+		if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) {
+			name = sample_flags[i].name;
+			break;
+		}
+	}
+
 	for (i = 0; i < n; i++, flags >>= 1) {
 		if (flags & 1)
 			str[pos++] = chars[i];
@@ -619,7 +713,11 @@ static void print_sample_flags(u32 flags)
 			str[pos++] = '?';
 	}
 	str[pos] = 0;
-	printf("  %-4s ", str);
+
+	if (name)
+		printf("  %-7s%4s ", name, in_tx ? "(x)" : "");
+	else
+		printf("  %-11s ", str);
 }
 
 struct printer_data {
@@ -717,7 +815,7 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist)
 	struct perf_evsel *evsel;
 	int max = 0;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		int len = strlen(perf_evsel__name(evsel));
 
 		max = MAX(len, max);
@@ -942,7 +1040,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	if (evsel->attr.type >= PERF_TYPE_MAX)
 		return 0;
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		if (pos->attr.type == evsel->attr.type && pos != evsel)
 			return 0;
 	}
@@ -1668,7 +1766,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
 			snprintf(evname, len + 1, "%s", p);
 
 			match = 0;
-			evlist__for_each(session->evlist, pos) {
+			evlist__for_each_entry(session->evlist, pos) {
 				if (!strcmp(perf_evsel__name(pos), evname)) {
 					match = 1;
 					break;
@@ -1870,7 +1968,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
 	struct stat_round_event *round = &event->stat_round;
 	struct perf_evsel *counter;
 
-	evlist__for_each(session->evlist, counter) {
+	evlist__for_each_entry(session->evlist, counter) {
 		perf_stat_process_counter(&stat_config, counter);
 		process_stat(counter, round->time);
 	}
@@ -2017,7 +2115,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "comma separated output fields prepend with 'type:'. "
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
-		     "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
+		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
+		     "callindent", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -2256,6 +2355,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	script.session = session;
 	script__setup_sample_type(&script);
 
+	if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+		itrace_synth_opts.thread_stack = true;
+
 	session->itrace_synth_opts = &itrace_synth_opts;
 
 	if (cpu_list) {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ee7ada78d86f..0c16d20d7e32 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,10 +59,13 @@
 #include "util/thread.h"
 #include "util/thread_map.h"
 #include "util/counts.h"
+#include "util/group.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/group.h"
 #include "asm/bug.h"
 
+#include <api/fs/fs.h>
 #include <stdlib.h>
 #include <sys/prctl.h>
 #include <locale.h>
@@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = {
 	"}"
 };
 
+static const char * topdown_attrs[] = {
+	"topdown-total-slots",
+	"topdown-slots-retired",
+	"topdown-recovery-bubbles",
+	"topdown-fetch-bubbles",
+	"topdown-slots-issued",
+	NULL,
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct target target = {
@@ -112,6 +124,7 @@ static volatile pid_t		child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			transaction_run;
+static bool			topdown_run			= false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -124,6 +137,7 @@ static unsigned int		initial_delay			= 0;
 static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
 static bool			metric_only			= false;
+static bool			force_metric_only		= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
@@ -276,8 +290,12 @@ perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
 static int read_counter(struct perf_evsel *counter)
 {
 	int nthreads = thread_map__nr(evsel_list->threads);
-	int ncpus = perf_evsel__nr_cpus(counter);
-	int cpu, thread;
+	int ncpus, cpu, thread;
+
+	if (target__has_cpu(&target))
+		ncpus = perf_evsel__nr_cpus(counter);
+	else
+		ncpus = 1;
 
 	if (!counter->supported)
 		return -ENOENT;
@@ -317,7 +335,7 @@ static void read_counters(bool close_counters)
 {
 	struct perf_evsel *counter;
 
-	evlist__for_each(evsel_list, counter) {
+	evlist__for_each_entry(evsel_list, counter) {
 		if (read_counter(counter))
 			pr_debug("failed to read counter %s\n", counter->name);
 
@@ -403,7 +421,7 @@ static int perf_stat_synthesize_config(bool is_pipe)
 	 * Synthesize other events stuff not carried within
 	 * attr event - unit, scale, name
 	 */
-	evlist__for_each(evsel_list, counter) {
+	evlist__for_each_entry(evsel_list, counter) {
 		if (!counter->supported)
 			continue;
 
@@ -536,7 +554,7 @@ static int __run_perf_stat(int argc, const char **argv)
 	if (group)
 		perf_evlist__set_leader(evsel_list);
 
-	evlist__for_each(evsel_list, counter) {
+	evlist__for_each_entry(evsel_list, counter) {
 try_again:
 		if (create_perf_stat_counter(counter) < 0) {
 			/*
@@ -582,7 +600,7 @@ try_again:
 	if (perf_evlist__apply_filters(evsel_list, &counter)) {
 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
 			counter->filter, perf_evsel__name(counter), errno,
-			strerror_r(errno, msg, sizeof(msg)));
+			str_error_r(errno, msg, sizeof(msg)));
 		return -1;
 	}
 
@@ -623,7 +641,7 @@ try_again:
 		wait(&status);
 
 		if (workload_exec_errno) {
-			const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
+			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
 			pr_err("Workload failed: %s\n", emsg);
 			return -1;
 		}
@@ -1120,7 +1138,7 @@ static void aggr_update_shadow(void)
 
 	for (s = 0; s < aggr_map->nr; s++) {
 		id = aggr_map->map[s];
-		evlist__for_each(evsel_list, counter) {
+		evlist__for_each_entry(evsel_list, counter) {
 			val = 0;
 			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 				s2 = aggr_get_id(evsel_list->cpus, cpu);
@@ -1159,7 +1177,7 @@ static void print_aggr(char *prefix)
 
 		id = aggr_map->map[s];
 		first = true;
-		evlist__for_each(evsel_list, counter) {
+		evlist__for_each_entry(evsel_list, counter) {
 			val = ena = run = 0;
 			nr = 0;
 			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
@@ -1278,7 +1296,7 @@ static void print_no_aggr_metric(char *prefix)
 
 		if (prefix)
 			fputs(prefix, stat_config.output);
-		evlist__for_each(evsel_list, counter) {
+		evlist__for_each_entry(evsel_list, counter) {
 			if (first) {
 				aggr_printout(counter, cpu, 0);
 				first = false;
@@ -1302,7 +1320,15 @@ static int aggr_header_lens[] = {
 	[AGGR_GLOBAL] = 0,
 };
 
-static void print_metric_headers(char *prefix)
+static const char *aggr_header_csv[] = {
+	[AGGR_CORE] 	= 	"core,cpus,",
+	[AGGR_SOCKET] 	= 	"socket,cpus",
+	[AGGR_NONE] 	= 	"cpu,",
+	[AGGR_THREAD] 	= 	"comm-pid,",
+	[AGGR_GLOBAL] 	=	""
+};
+
+static void print_metric_headers(const char *prefix, bool no_indent)
 {
 	struct perf_stat_output_ctx out;
 	struct perf_evsel *counter;
@@ -1313,12 +1339,18 @@ static void print_metric_headers(char *prefix)
 	if (prefix)
 		fprintf(stat_config.output, "%s", prefix);
 
-	if (!csv_output)
+	if (!csv_output && !no_indent)
 		fprintf(stat_config.output, "%*s",
 			aggr_header_lens[stat_config.aggr_mode], "");
+	if (csv_output) {
+		if (stat_config.interval)
+			fputs("time,", stat_config.output);
+		fputs(aggr_header_csv[stat_config.aggr_mode],
+			stat_config.output);
+	}
 
 	/* Print metrics headers only */
-	evlist__for_each(evsel_list, counter) {
+	evlist__for_each_entry(evsel_list, counter) {
 		os.evsel = counter;
 		out.ctx = &os;
 		out.print_metric = print_metric_header;
@@ -1338,28 +1370,40 @@ static void print_interval(char *prefix, struct timespec *ts)
 
 	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-	if (num_print_interval == 0 && !csv_output && !metric_only) {
+	if (num_print_interval == 0 && !csv_output) {
 		switch (stat_config.aggr_mode) {
 		case AGGR_SOCKET:
-			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
+			fprintf(output, "#           time socket cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
 			break;
 		case AGGR_CORE:
-			fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
+			fprintf(output, "#           time core         cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
 			break;
 		case AGGR_NONE:
-			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
+			fprintf(output, "#           time CPU");
+			if (!metric_only)
+				fprintf(output, "                counts %*s events\n", unit_width, "unit");
 			break;
 		case AGGR_THREAD:
-			fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
+			fprintf(output, "#           time             comm-pid");
+			if (!metric_only)
+				fprintf(output, "                  counts %*s events\n", unit_width, "unit");
 			break;
 		case AGGR_GLOBAL:
 		default:
-			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
+			fprintf(output, "#           time");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
 		case AGGR_UNSET:
 			break;
 		}
 	}
 
+	if (num_print_interval == 0 && metric_only)
+		print_metric_headers(" ", true);
 	if (++num_print_interval == 25)
 		num_print_interval = 0;
 }
@@ -1428,8 +1472,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	if (metric_only) {
 		static int num_print_iv;
 
-		if (num_print_iv == 0)
-			print_metric_headers(prefix);
+		if (num_print_iv == 0 && !interval)
+			print_metric_headers(prefix, false);
 		if (num_print_iv++ == 25)
 			num_print_iv = 0;
 		if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
@@ -1442,11 +1486,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 		print_aggr(prefix);
 		break;
 	case AGGR_THREAD:
-		evlist__for_each(evsel_list, counter)
+		evlist__for_each_entry(evsel_list, counter)
 			print_aggr_thread(counter, prefix);
 		break;
 	case AGGR_GLOBAL:
-		evlist__for_each(evsel_list, counter)
+		evlist__for_each_entry(evsel_list, counter)
 			print_counter_aggr(counter, prefix);
 		if (metric_only)
 			fputc('\n', stat_config.output);
@@ -1455,7 +1499,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 		if (metric_only)
 			print_no_aggr_metric(prefix);
 		else {
-			evlist__for_each(evsel_list, counter)
+			evlist__for_each_entry(evsel_list, counter)
 				print_counter(counter, prefix);
 		}
 		break;
@@ -1520,6 +1564,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
 	return 0;
 }
 
+static int enable_metric_only(const struct option *opt __maybe_unused,
+			      const char *s __maybe_unused, int unset)
+{
+	force_metric_only = true;
+	metric_only = !unset;
+	return 0;
+}
+
 static const struct option stat_options[] = {
 	OPT_BOOLEAN('T', "transaction", &transaction_run,
 		    "hardware transaction statistics"),
@@ -1578,8 +1630,10 @@ static const struct option stat_options[] = {
 		     "aggregate counts per thread", AGGR_THREAD),
 	OPT_UINTEGER('D', "delay", &initial_delay,
 		     "ms to wait before starting measurement after program start"),
-	OPT_BOOLEAN(0, "metric-only", &metric_only,
-			"Only print computed metrics. No raw values"),
+	OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+			"Only print computed metrics. No raw values", enable_metric_only),
+	OPT_BOOLEAN(0, "topdown", &topdown_run,
+			"measure topdown level 1 statistics"),
 	OPT_END()
 };
 
@@ -1772,12 +1826,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
 	return 0;
 }
 
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+	int off = 0;
+	int i;
+	int len = 0;
+	char *s;
+
+	for (i = 0; attr[i]; i++) {
+		if (pmu_have_event("cpu", attr[i])) {
+			len += strlen(attr[i]) + 1;
+			attr[i - off] = attr[i];
+		} else
+			off++;
+	}
+	attr[i - off] = NULL;
+
+	*str = malloc(len + 1 + 2);
+	if (!*str)
+		return -1;
+	s = *str;
+	if (i - off == 0) {
+		*s = 0;
+		return 0;
+	}
+	if (use_group)
+		*s++ = '{';
+	for (i = 0; attr[i]; i++) {
+		strcpy(s, attr[i]);
+		s += strlen(s);
+		*s++ = ',';
+	}
+	if (use_group) {
+		s[-1] = '}';
+		*s = 0;
+	} else
+		s[-1] = 0;
+	return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+	*warn = false;
+	return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
 /*
  * Add default attributes, if there were no attributes specified or
  * if -d/--detailed, -d -d or -d -d -d is used:
  */
 static int add_default_attributes(void)
 {
+	int err;
 	struct perf_event_attr default_attrs0[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
@@ -1896,7 +2000,6 @@ static int add_default_attributes(void)
 		return 0;
 
 	if (transaction_run) {
-		int err;
 		if (pmu_have_event("cpu", "cycles-ct") &&
 		    pmu_have_event("cpu", "el-start"))
 			err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +2012,46 @@ static int add_default_attributes(void)
 		return 0;
 	}
 
+	if (topdown_run) {
+		char *str = NULL;
+		bool warn = false;
+
+		if (stat_config.aggr_mode != AGGR_GLOBAL &&
+		    stat_config.aggr_mode != AGGR_CORE) {
+			pr_err("top down event configuration requires --per-core mode\n");
+			return -1;
+		}
+		stat_config.aggr_mode = AGGR_CORE;
+		if (nr_cgroups || !target__has_cpu(&target)) {
+			pr_err("top down event configuration requires system-wide mode (-a)\n");
+			return -1;
+		}
+
+		if (!force_metric_only)
+			metric_only = true;
+		if (topdown_filter_events(topdown_attrs, &str,
+				arch_topdown_check_group(&warn)) < 0) {
+			pr_err("Out of memory\n");
+			return -1;
+		}
+		if (topdown_attrs[0] && str) {
+			if (warn)
+				arch_topdown_group_warn();
+			err = parse_events(evsel_list, str, NULL);
+			if (err) {
+				fprintf(stderr,
+					"Cannot set up top down events %s: %d\n",
+					str, err);
+				free(str);
+				return -1;
+			}
+		} else {
+			fprintf(stderr, "System does not support topdown\n");
+			return -1;
+		}
+		free(str);
+	}
+
 	if (!evsel_list->nr_entries) {
 		if (target__has_cpu(&target))
 			default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
@@ -2010,7 +2153,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
 	const char **argv = session->header.env.cmdline_argv;
 	int argc = session->header.env.nr_cmdline;
 
-	evlist__for_each(evsel_list, counter)
+	evlist__for_each_entry(evsel_list, counter)
 		perf_stat_process_counter(&stat_config, counter);
 
 	if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 2a6cc254ad0c..bd108683fcb8 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -22,7 +22,7 @@
 #include "perf.h"
 
 #include "util/annotate.h"
-#include "util/cache.h"
+#include "util/config.h"
 #include "util/color.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
@@ -295,7 +295,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
 	hists__output_recalc_col_len(hists, top->print_entries - printed);
 	putchar('\n');
 	hists__fprintf(hists, false, top->print_entries - printed, win_width,
-		       top->min_percent, stdout);
+		       top->min_percent, stdout, symbol_conf.use_callchain);
 }
 
 static void prompt_integer(int *target, const char *msg)
@@ -479,7 +479,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 
 				fprintf(stderr, "\nAvailable events:");
 
-				evlist__for_each(top->evlist, top->sym_evsel)
+				evlist__for_each_entry(top->evlist, top->sym_evsel)
 					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
 
 				prompt_integer(&counter, "Enter details event counter");
@@ -490,7 +490,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 					sleep(1);
 					break;
 				}
-				evlist__for_each(top->evlist, top->sym_evsel)
+				evlist__for_each_entry(top->evlist, top->sym_evsel)
 					if (top->sym_evsel->idx == counter)
 						break;
 			} else
@@ -583,7 +583,7 @@ static void *display_thread_tui(void *arg)
 	 * Zooming in/out UIDs. For now juse use whatever the user passed
 	 * via --uid.
 	 */
-	evlist__for_each(top->evlist, pos) {
+	evlist__for_each_entry(top->evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 		hists->uid_filter_str = top->record_opts.target.uid_str;
 	}
@@ -888,7 +888,7 @@ static int perf_top__start_counters(struct perf_top *top)
 
 	perf_evlist__config(evlist, opts, &callchain_param);
 
-	evlist__for_each(evlist, counter) {
+	evlist__for_each_entry(evlist, counter) {
 try_again:
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads) < 0) {
@@ -907,7 +907,7 @@ try_again:
 
 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
 		ui__error("Failed to mmap with %d (%s)\n",
-			    errno, strerror_r(errno, msg, sizeof(msg)));
+			    errno, str_error_r(errno, msg, sizeof(msg)));
 		goto out_err;
 	}
 
@@ -1028,7 +1028,7 @@ out_delete:
 
 out_err_cpu_topo: {
 	char errbuf[BUFSIZ];
-	const char *err = strerror_r(-ret, errbuf, sizeof(errbuf));
+	const char *err = str_error_r(-ret, errbuf, sizeof(errbuf));
 
 	ui__error("Could not read the CPU topology map: %s\n", err);
 	goto out_delete;
@@ -1295,7 +1295,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	if (perf_evlist__create_maps(top.evlist, target) < 0) {
 		ui__error("Couldn't create thread/CPU maps: %s\n",
-			  errno == ENOENT ? "No such process" : strerror_r(errno, errbuf, sizeof(errbuf)));
+			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5c50fe70d6b3..b8c6766301db 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -43,7 +43,6 @@
 #include <linux/err.h>
 #include <linux/filter.h>
 #include <linux/audit.h>
-#include <sys/ptrace.h>
 #include <linux/random.h>
 #include <linux/stringify.h>
 
@@ -334,6 +333,10 @@ static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 
 #define SCA_FD syscall_arg__scnprintf_fd
 
+#ifndef AT_FDCWD
+#define AT_FDCWD	-100
+#endif
+
 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 					   struct syscall_arg *arg)
 {
@@ -1247,7 +1250,7 @@ static int trace__validate_ev_qualifier(struct trace *trace)
 
 	i = 0;
 
-	strlist__for_each(pos, trace->ev_qualifier) {
+	strlist__for_each_entry(pos, trace->ev_qualifier) {
 		const char *sc = pos->s;
 		int id = syscalltbl__id(trace->sctbl, sc);
 
@@ -1601,7 +1604,7 @@ signed_print:
 		fprintf(trace->output, ") = %ld", ret);
 	} else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
 		char bf[STRERR_BUFSIZE];
-		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
+		const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
 			   *e = audit_errno_to_name(-ret);
 
 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
@@ -2402,7 +2405,7 @@ out_error_apply_filters:
 	fprintf(trace->output,
 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
 		evsel->filter, perf_evsel__name(evsel), errno,
-		strerror_r(errno, errbuf, sizeof(errbuf)));
+		str_error_r(errno, errbuf, sizeof(errbuf)));
 	goto out_delete_evlist;
 }
 out_error_mem:
@@ -2483,7 +2486,7 @@ static int trace__replay(struct trace *trace)
 		goto out;
 	}
 
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
@@ -2550,7 +2553,7 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
 
-	resort_rb__for_each(nd, syscall_stats) {
+	resort_rb__for_each_entry(nd, syscall_stats) {
 		struct stats *stats = syscall_stats_entry->stats;
 		if (stats) {
 			double min = (double)(stats->min) / NSEC_PER_MSEC;
@@ -2627,7 +2630,7 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 		return 0;
 	}
 
-	resort_rb__for_each(nd, threads)
+	resort_rb__for_each_entry(nd, threads)
 		printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
 
 	resort_rb__delete(threads);
@@ -2714,7 +2717,7 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		evsel->handler = handler;
 }
 
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 5ad0255f8756..24803c58049a 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -73,17 +73,25 @@ endif
 #
 #   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
 #
+
+libunwind_arch_set_flags = $(eval $(libunwind_arch_set_flags_code))
+define libunwind_arch_set_flags_code
+  FEATURE_CHECK_CFLAGS-libunwind-$(1)  = -I$(LIBUNWIND_DIR)/include
+  FEATURE_CHECK_LDFLAGS-libunwind-$(1) = -L$(LIBUNWIND_DIR)/lib
+endef
+
 ifdef LIBUNWIND_DIR
   LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
   LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+  LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64
+  $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
 endif
-LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
 
 # Set per-feature check compilation flags
 FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
 FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
 
 ifeq ($(NO_PERF_REGS),0)
   CFLAGS += -DHAVE_PERF_REGS_SUPPORT
@@ -107,7 +115,7 @@ endif
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
-FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
 # include ARCH specific config
 -include $(src-perf)/arch/$(ARCH)/Makefile
 
@@ -198,11 +206,11 @@ endif
 
 CFLAGS += -I$(src-perf)/util/include
 CFLAGS += -I$(src-perf)/arch/$(ARCH)/include
+CFLAGS += -I$(srctree)/tools/include/uapi
 CFLAGS += -I$(srctree)/tools/include/
-CFLAGS += -I$(srctree)/arch/$(ARCH)/include/uapi
-CFLAGS += -I$(srctree)/arch/$(ARCH)/include
-CFLAGS += -I$(srctree)/include/uapi
-CFLAGS += -I$(srctree)/include
+CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
+CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
+CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/
 
 # $(obj-perf)      for generated common-cmds.h
 # $(obj-perf)/util for generated bison/flex headers
@@ -249,7 +257,7 @@ else
       LIBC_SUPPORT := 1
     endif
     ifeq ($(LIBC_SUPPORT),1)
-      msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install elfutils-libelf-devel/libelf-dev);
+      msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel);
 
       NO_LIBELF := 1
       NO_DWARF := 1
@@ -301,6 +309,16 @@ ifndef NO_LIBELF
     CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
   endif
 
+  ifeq ($(feature-libelf-gelf_getnote), 1)
+    CFLAGS += -DHAVE_GELF_GETNOTE_SUPPORT
+  else
+    msg := $(warning gelf_getnote() not found on libelf, SDT support disabled);
+  endif
+
+  ifeq ($(feature-libelf-getshdrstrndx), 1)
+    CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
+  endif
+
   ifndef NO_DWARF
     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
       msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
@@ -337,6 +355,16 @@ ifndef NO_LIBELF
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
+ifndef NO_SDT
+  ifneq ($(feature-sdt), 1)
+    msg := $(warning No sys/sdt.h found, no SDT events are defined, please install systemtap-sdt-devel or systemtap-sdt-dev);
+    NO_SDT := 1;
+  else
+    CFLAGS += -DHAVE_SDT_EVENT
+    $(call detected,CONFIG_SDT_EVENT)
+  endif
+endif
+
 ifdef PERF_HAVE_JITDUMP
   ifndef NO_DWARF
     $(call detected,CONFIG_JITDUMP)
@@ -351,10 +379,42 @@ ifeq ($(ARCH),powerpc)
 endif
 
 ifndef NO_LIBUNWIND
+  have_libunwind :=
+
+  ifeq ($(feature-libunwind-x86), 1)
+    $(call detected,CONFIG_LIBUNWIND_X86)
+    CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
+    LDFLAGS += -lunwind-x86
+    EXTLIBS_LIBUNWIND += -lunwind-x86
+    have_libunwind = 1
+  endif
+
+  ifeq ($(feature-libunwind-aarch64), 1)
+    $(call detected,CONFIG_LIBUNWIND_AARCH64)
+    CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
+    LDFLAGS += -lunwind-aarch64
+    EXTLIBS_LIBUNWIND += -lunwind-aarch64
+    have_libunwind = 1
+    $(call feature_check,libunwind-debug-frame-aarch64)
+    ifneq ($(feature-libunwind-debug-frame-aarch64), 1)
+      msg := $(warning No debug_frame support found in libunwind-aarch64);
+      CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64
+    endif
+  endif
+
   ifneq ($(feature-libunwind), 1)
     msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR);
+    NO_LOCAL_LIBUNWIND := 1
+  else
+    have_libunwind := 1
+    $(call detected,CONFIG_LOCAL_LIBUNWIND)
+  endif
+
+  ifneq ($(have_libunwind), 1)
     NO_LIBUNWIND := 1
   endif
+else
+  NO_LOCAL_LIBUNWIND := 1
 endif
 
 ifndef NO_LIBBPF
@@ -392,7 +452,7 @@ else
   NO_DWARF_UNWIND := 1
 endif
 
-ifndef NO_LIBUNWIND
+ifndef NO_LOCAL_LIBUNWIND
   ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
     $(call feature_check,libunwind-debug-frame)
     ifneq ($(feature-libunwind-debug-frame), 1)
@@ -403,10 +463,15 @@ ifndef NO_LIBUNWIND
     # non-ARM has no dwarf_find_debug_frame() function:
     CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
   endif
-  CFLAGS  += -DHAVE_LIBUNWIND_SUPPORT
   EXTLIBS += $(LIBUNWIND_LIBS)
+  LDFLAGS += $(LIBUNWIND_LIBS)
+endif
+
+ifndef NO_LIBUNWIND
+  CFLAGS  += -DHAVE_LIBUNWIND_SUPPORT
   CFLAGS  += $(LIBUNWIND_CFLAGS)
   LDFLAGS += $(LIBUNWIND_LDFLAGS)
+  EXTLIBS += $(EXTLIBS_LIBUNWIND)
 endif
 
 ifndef NO_LIBAUDIT
@@ -437,7 +502,7 @@ endif
 
 ifndef NO_SLANG
   ifneq ($(feature-libslang), 1)
-    msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev);
+    msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
     NO_SLANG := 1
   else
     # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index 3573f315f955..55daefff0d54 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -59,7 +59,6 @@ static int get_e_machine(struct jitheader *hdr)
 	ssize_t sret;
 	char id[16];
 	int fd, ret = -1;
-	int m = -1;
 	struct {
 		uint16_t e_type;
 		uint16_t e_machine;
@@ -81,11 +80,7 @@ static int get_e_machine(struct jitheader *hdr)
 	if (sret != sizeof(info))
 		goto error;
 
-	m = info.e_machine;
-	if (m < 0)
-		m = 0; /* ELF EM_NONE */
-
-	hdr->elf_mach = m;
+	hdr->elf_mach = info.e_machine;
 	ret = 0;
 error:
 	close(fd);
@@ -491,10 +486,11 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
 		if (sret != 1)
 			goto error;
 	}
-	if (padding_count)
+	if (padding_count) {
 		sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp);
 		if (sret != 1)
 			goto error;
+	}
 
 	funlockfile(fp);
 	return 0;
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 83a25cef82fd..7ed72a475c57 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -5,35 +5,18 @@
 #include <sys/types.h>
 #include <sys/syscall.h>
 #include <linux/types.h>
+#include <linux/compiler.h>
 #include <linux/perf_event.h>
 #include <asm/barrier.h>
 
 #if defined(__i386__)
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC	{"model name"}
-#ifndef __NR_perf_event_open
-# define __NR_perf_event_open 336
-#endif
-#ifndef __NR_futex
-# define __NR_futex 240
-#endif
-#ifndef __NR_gettid
-# define __NR_gettid 224
-#endif
 #endif
 
 #if defined(__x86_64__)
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC	{"model name"}
-#ifndef __NR_perf_event_open
-# define __NR_perf_event_open 298
-#endif
-#ifndef __NR_futex
-# define __NR_futex 202
-#endif
-#ifndef __NR_gettid
-# define __NR_gettid 186
-#endif
 #endif
 
 #ifdef __powerpc__
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 15982cee5ef3..64c06961bfe4 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -10,7 +10,7 @@
 
 #include "util/env.h"
 #include <subcmd/exec-cmd.h>
-#include "util/cache.h"
+#include "util/config.h"
 #include "util/quote.h"
 #include <subcmd/run-command.h>
 #include "util/parse-events.h"
@@ -139,8 +139,6 @@ struct option options[] = {
 	OPT_ARGUMENT("html-path", "html-path"),
 	OPT_ARGUMENT("paginate", "paginate"),
 	OPT_ARGUMENT("no-pager", "no-pager"),
-	OPT_ARGUMENT("perf-dir", "perf-dir"),
-	OPT_ARGUMENT("work-tree", "work-tree"),
 	OPT_ARGUMENT("debugfs-dir", "debugfs-dir"),
 	OPT_ARGUMENT("buildid-dir", "buildid-dir"),
 	OPT_ARGUMENT("list-cmds", "list-cmds"),
@@ -200,35 +198,6 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 			use_pager = 0;
 			if (envchanged)
 				*envchanged = 1;
-		} else if (!strcmp(cmd, "--perf-dir")) {
-			if (*argc < 2) {
-				fprintf(stderr, "No directory given for --perf-dir.\n");
-				usage(perf_usage_string);
-			}
-			setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
-			if (envchanged)
-				*envchanged = 1;
-			(*argv)++;
-			(*argc)--;
-			handled++;
-		} else if (!prefixcmp(cmd, CMD_PERF_DIR)) {
-			setenv(PERF_DIR_ENVIRONMENT, cmd + strlen(CMD_PERF_DIR), 1);
-			if (envchanged)
-				*envchanged = 1;
-		} else if (!strcmp(cmd, "--work-tree")) {
-			if (*argc < 2) {
-				fprintf(stderr, "No directory given for --work-tree.\n");
-				usage(perf_usage_string);
-			}
-			setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
-			if (envchanged)
-				*envchanged = 1;
-			(*argv)++;
-			(*argc)--;
-		} else if (!prefixcmp(cmd, CMD_WORK_TREE)) {
-			setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + strlen(CMD_WORK_TREE), 1);
-			if (envchanged)
-				*envchanged = 1;
 		} else if (!strcmp(cmd, "--debugfs-dir")) {
 			if (*argc < 2) {
 				fprintf(stderr, "No directory given for --debugfs-dir.\n");
@@ -363,11 +332,6 @@ const char perf_version_string[] = PERF_VERSION;
 
 #define RUN_SETUP	(1<<0)
 #define USE_PAGER	(1<<1)
-/*
- * require working tree to be present -- anything uses this needs
- * RUN_SETUP for reading from the configuration file.
- */
-#define NEED_WORK_TREE	(1<<2)
 
 static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 {
@@ -391,6 +355,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 
 	perf_env__set_cmdline(&perf_env, argc, argv);
 	status = p->fn(argc, argv, prefix);
+	perf_config__exit();
 	exit_browser(status);
 	perf_env__exit(&perf_env);
 	bpf__clear();
@@ -409,7 +374,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 	/* Check for ENOSPC and EIO errors.. */
 	if (fflush(stdout)) {
 		fprintf(stderr, "write failure on standard output: %s",
-			strerror_r(errno, sbuf, sizeof(sbuf)));
+			str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out;
 	}
 	if (ferror(stdout)) {
@@ -418,7 +383,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 	}
 	if (fclose(stdout)) {
 		fprintf(stderr, "close failed on standard output: %s",
-			strerror_r(errno, sbuf, sizeof(sbuf)));
+			str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out;
 	}
 	status = 0;
@@ -532,6 +497,16 @@ void pthread__unblock_sigwinch(void)
 	pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 }
 
+#ifdef _SC_LEVEL1_DCACHE_LINESIZE
+#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
+#else
+static void cache_line_size(int *cacheline_sizep)
+{
+	if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
+		pr_debug("cannot determine cache line size");
+}
+#endif
+
 int main(int argc, const char **argv)
 {
 	const char *cmd;
@@ -544,7 +519,7 @@ int main(int argc, const char **argv)
 
 	/* The page_size is placed in util object. */
 	page_size = sysconf(_SC_PAGE_SIZE);
-	cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+	cache_line_size(&cacheline_size);
 
 	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
 		sysctl_perf_event_max_stack = value;
@@ -558,6 +533,7 @@ int main(int argc, const char **argv)
 
 	srandom(time(NULL));
 
+	perf_config__init();
 	perf_config(perf_default_config, NULL);
 	set_buildid_dir(NULL);
 
@@ -649,7 +625,7 @@ int main(int argc, const char **argv)
 	}
 
 	fprintf(stderr, "Failed to run command '%s': %s\n",
-		cmd, strerror_r(errno, sbuf, sizeof(sbuf)));
+		cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
 out:
 	return 1;
 }
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cd8f1b150f9e..a7e0f1497244 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -59,6 +59,8 @@ struct record_opts {
 	bool	     record_switch_events;
 	bool	     all_kernel;
 	bool	     all_user;
+	bool	     tail_synthesize;
+	bool	     overwrite;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/python/tracepoint.py b/tools/perf/python/tracepoint.py
new file mode 100755
index 000000000000..eb4dbed57de7
--- /dev/null
+++ b/tools/perf/python/tracepoint.py
@@ -0,0 +1,47 @@
+#! /usr/bin/python
+# -*- python -*-
+# -*- coding: utf-8 -*-
+
+import perf
+
+class tracepoint(perf.evsel):
+    def __init__(self, sys, name):
+        config = perf.tracepoint(sys, name)
+        perf.evsel.__init__(self,
+                            type   = perf.TYPE_TRACEPOINT,
+                            config = config,
+                            freq = 0, sample_period = 1, wakeup_events = 1,
+                            sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_RAW | perf.SAMPLE_TIME)
+
+def main():
+    tp      = tracepoint("sched", "sched_switch")
+    cpus    = perf.cpu_map()
+    threads = perf.thread_map(-1)
+
+    evlist = perf.evlist(cpus, threads)
+    evlist.add(tp)
+    evlist.open()
+    evlist.mmap()
+
+    while True:
+        evlist.poll(timeout = -1)
+        for cpu in cpus:
+            event = evlist.read_on_cpu(cpu)
+            if not event:
+                continue
+
+            if not isinstance(event, perf.sample_event):
+                continue
+
+            print "time %u prev_comm=%s prev_pid=%d prev_prio=%d prev_state=0x%x ==> next_comm=%s next_pid=%d next_prio=%d" % (
+                   event.sample_time,
+                   event.prev_comm,
+                   event.prev_pid,
+                   event.prev_prio,
+                   event.prev_state,
+                   event.next_comm,
+                   event.next_pid,
+                   event.next_prio)
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/perf/scripts/python/bin/stackcollapse-record b/tools/perf/scripts/python/bin/stackcollapse-record
new file mode 100755
index 000000000000..9d8f9f0f3a17
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-record
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+#
+# stackcollapse.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record "$@"
diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report
new file mode 100755
index 000000000000..356b9656393d
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-report
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: produce callgraphs in short form for scripting use
+perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
index 4d21ef2d601d..4c6f09ac7d12 100644
--- a/tools/perf/scripts/python/netdev-times.py
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -252,9 +252,10 @@ def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, i
 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
 	all_event_list.append(event_info)
 
-def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, dev_name):
+def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi,
+                    dev_name, work=None, budget=None):
 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
-			napi, dev_name)
+			napi, dev_name, work, budget)
 	all_event_list.append(event_info)
 
 def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
@@ -354,11 +355,13 @@ def handle_irq_softirq_exit(event_info):
 	receive_hunk_list.append(rec_data)
 
 def handle_napi_poll(event_info):
-	(name, context, cpu, time, pid, comm, napi, dev_name) = event_info
+	(name, context, cpu, time, pid, comm, napi, dev_name,
+		work, budget) = event_info
 	if cpu in net_rx_dic.keys():
 		event_list = net_rx_dic[cpu]['event_list']
 		rec_data = {'event_name':'napi_poll',
-				'dev':dev_name, 'event_t':time}
+				'dev':dev_name, 'event_t':time,
+				'work':work, 'budget':budget}
 		event_list.append(rec_data)
 
 def handle_netif_rx(event_info):
diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py
new file mode 100755
index 000000000000..5a605f70ef32
--- /dev/null
+++ b/tools/perf/scripts/python/stackcollapse.py
@@ -0,0 +1,125 @@
+# stackcollapse.py - format perf samples with one line per distinct call stack
+#
+# This script's output has two space-separated fields.  The first is a semicolon
+# separated stack including the program name (from the "comm" field) and the
+# function names from the call stack.  The second is a count:
+#
+#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2
+#
+# The file is sorted according to the first field.
+#
+# Input may be created and processed using:
+#
+#  perf record -a -g -F 99 sleep 60
+#  perf script report stackcollapse > out.stacks-folded
+#
+# (perf script record stackcollapse works too).
+#
+# Written by Paolo Bonzini <pbonzini@redhat.com>
+# Based on Brendan Gregg's stackcollapse-perf.pl script.
+
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, make_option
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+                '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from EventClass import *
+
+# command line parsing
+
+option_list = [
+    # formatting options for the bottom entry of the stack
+    make_option("--include-tid", dest="include_tid",
+                 action="store_true", default=False,
+                 help="include thread id in stack"),
+    make_option("--include-pid", dest="include_pid",
+                 action="store_true", default=False,
+                 help="include process id in stack"),
+    make_option("--no-comm", dest="include_comm",
+                 action="store_false", default=True,
+                 help="do not separate stacks according to comm"),
+    make_option("--tidy-java", dest="tidy_java",
+                 action="store_true", default=False,
+                 help="beautify Java signatures"),
+    make_option("--kernel", dest="annotate_kernel",
+                 action="store_true", default=False,
+                 help="annotate kernel functions with _[k]")
+]
+
+parser = OptionParser(option_list=option_list)
+(opts, args) = parser.parse_args()
+
+if len(args) != 0:
+    parser.error("unexpected command line argument")
+if opts.include_tid and not opts.include_comm:
+    parser.error("requesting tid but not comm is invalid")
+if opts.include_pid and not opts.include_comm:
+    parser.error("requesting pid but not comm is invalid")
+
+# event handlers
+
+lines = defaultdict(lambda: 0)
+
+def process_event(param_dict):
+    def tidy_function_name(sym, dso):
+        if sym is None:
+            sym = '[unknown]'
+
+        sym = sym.replace(';', ':')
+        if opts.tidy_java:
+            # the original stackcollapse-perf.pl script gives the
+            # example of converting this:
+            #    Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
+            # to this:
+            #    org/mozilla/javascript/MemberBox:.init
+            sym = sym.replace('<', '')
+            sym = sym.replace('>', '')
+            if sym[0] == 'L' and sym.find('/'):
+                sym = sym[1:]
+            try:
+                sym = sym[:sym.index('(')]
+            except ValueError:
+                pass
+
+        if opts.annotate_kernel and dso == '[kernel.kallsyms]':
+            return sym + '_[k]'
+        else:
+            return sym
+
+    stack = list()
+    if 'callchain' in param_dict:
+        for entry in param_dict['callchain']:
+            entry.setdefault('sym', dict())
+            entry['sym'].setdefault('name', None)
+            entry.setdefault('dso', None)
+            stack.append(tidy_function_name(entry['sym']['name'],
+                                            entry['dso']))
+    else:
+        param_dict.setdefault('symbol', None)
+        param_dict.setdefault('dso', None)
+        stack.append(tidy_function_name(param_dict['symbol'],
+                                        param_dict['dso']))
+
+    if opts.include_comm:
+        comm = param_dict["comm"].replace(' ', '_')
+        sep = "-"
+        if opts.include_pid:
+            comm = comm + sep + str(param_dict['sample']['pid'])
+            sep = "/"
+        if opts.include_tid:
+            comm = comm + sep + str(param_dict['sample']['tid'])
+        stack.append(comm)
+
+    stack_string = ';'.join(reversed(stack))
+    lines[stack_string] = lines[stack_string] + 1
+
+def trace_end():
+    list = lines.keys()
+    list.sort()
+    for stack in list:
+        print "%s %d" % (stack, lines[stack])
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 66a28982547b..cb20ae1c0d35 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -39,6 +39,8 @@ perf-y += stat.o
 perf-y += event_update.o
 perf-y += event-times.o
 perf-y += backward-ring-buffer.o
+perf-y += sdt.o
+perf-y += is_printable_array.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index d9ba991a9a30..615780cbfe1d 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -31,8 +31,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		union perf_event *event;
 
-		perf_evlist__mmap_read_catchup(evlist, i);
-		while ((event = perf_evlist__mmap_read_backward(evlist, i)) != NULL) {
+		perf_mmap__read_catchup(&evlist->backward_mmap[i]);
+		while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) {
 			const u32 type = event->header.type;
 
 			switch (type) {
@@ -60,7 +60,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages,
 	err = perf_evlist__mmap(evlist, mmap_pages, true);
 	if (err < 0) {
 		pr_debug("perf_evlist__mmap: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		return TEST_FAIL;
 	}
 
@@ -108,7 +108,11 @@ int test__backward_ring_buffer(int subtest __maybe_unused)
 	}
 
 	bzero(&parse_error, sizeof(parse_error));
-	err = parse_events(evlist, "syscalls:sys_enter_prctl", &parse_error);
+	/*
+	 * Set backward bit, ring buffer should be writing from end. Record
+	 * it in aux evlist
+	 */
+	err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error);
 	if (err) {
 		pr_debug("Failed to parse tracepoint event, try use root\n");
 		ret = TEST_SKIP;
@@ -117,14 +121,10 @@ int test__backward_ring_buffer(int subtest __maybe_unused)
 
 	perf_evlist__config(evlist, &opts, NULL);
 
-	/* Set backward bit, ring buffer should be writing from end */
-	evlist__for_each(evlist, evsel)
-		evsel->attr.write_backward = 1;
-
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("perf_evlist__open: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
index 0ec9c2c03164..e53bc91fa260 100644
--- a/tools/perf/tests/bpf-script-example.c
+++ b/tools/perf/tests/bpf-script-example.c
@@ -31,8 +31,8 @@ struct bpf_map_def SEC("maps") flip_table = {
 	.max_entries = 1,
 };
 
-SEC("func=sys_epoll_pwait")
-int bpf_func__sys_epoll_pwait(void *ctx)
+SEC("func=sys_epoll_wait")
+int bpf_func__sys_epoll_wait(void *ctx)
 {
 	int ind =0;
 	int *flag = bpf_map_lookup_elem(&flip_table, &ind);
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index f31eed31c1a9..fc54064b9186 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -13,13 +13,13 @@
 
 #ifdef HAVE_LIBBPF_SUPPORT
 
-static int epoll_pwait_loop(void)
+static int epoll_wait_loop(void)
 {
 	int i;
 
 	/* Should fail NR_ITERS times */
 	for (i = 0; i < NR_ITERS; i++)
-		epoll_pwait(-(i + 1), NULL, 0, 0, NULL);
+		epoll_wait(-(i + 1), NULL, 0, 0);
 	return 0;
 }
 
@@ -61,7 +61,7 @@ static struct {
 		"[basic_bpf_test]",
 		"fix 'perf test LLVM' first",
 		"load bpf object failed",
-		&epoll_pwait_loop,
+		&epoll_wait_loop,
 		(NR_ITERS + 1) / 2,
 	},
 #ifdef HAVE_BPF_PROLOGUE
@@ -143,14 +143,14 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("perf_evlist__open: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
 	err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
 	if (err < 0) {
 		pr_debug("perf_evlist__mmap: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 0e95c20ecf6e..10eb30686c9c 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -14,6 +14,8 @@
 #include <subcmd/parse-options.h>
 #include "symbol.h"
 
+static bool dont_fork;
+
 struct test __weak arch_tests[] = {
 	{
 		.func = NULL,
@@ -211,6 +213,18 @@ static struct test generic_tests[] = {
 		.desc = "Test backward reading from ring buffer",
 		.func = test__backward_ring_buffer,
 	},
+	{
+		.desc = "Test cpu map print",
+		.func = test__cpu_map_print,
+	},
+	{
+		.desc = "Test SDT event probing",
+		.func = test__sdt_event,
+	},
+	{
+		.desc = "Test is_printable_array function",
+		.func = test__is_printable_array,
+	},
 	{
 		.func = NULL,
 	},
@@ -247,44 +261,51 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
 
 static int run_test(struct test *test, int subtest)
 {
-	int status, err = -1, child = fork();
+	int status, err = -1, child = dont_fork ? 0 : fork();
 	char sbuf[STRERR_BUFSIZE];
 
 	if (child < 0) {
 		pr_err("failed to fork test: %s\n",
-			strerror_r(errno, sbuf, sizeof(sbuf)));
+			str_error_r(errno, sbuf, sizeof(sbuf)));
 		return -1;
 	}
 
 	if (!child) {
-		pr_debug("test child forked, pid %d\n", getpid());
-		if (!verbose) {
-			int nullfd = open("/dev/null", O_WRONLY);
-			if (nullfd >= 0) {
-				close(STDERR_FILENO);
-				close(STDOUT_FILENO);
-
-				dup2(nullfd, STDOUT_FILENO);
-				dup2(STDOUT_FILENO, STDERR_FILENO);
-				close(nullfd);
+		if (!dont_fork) {
+			pr_debug("test child forked, pid %d\n", getpid());
+
+			if (!verbose) {
+				int nullfd = open("/dev/null", O_WRONLY);
+
+				if (nullfd >= 0) {
+					close(STDERR_FILENO);
+					close(STDOUT_FILENO);
+
+					dup2(nullfd, STDOUT_FILENO);
+					dup2(STDOUT_FILENO, STDERR_FILENO);
+					close(nullfd);
+				}
+			} else {
+				signal(SIGSEGV, sighandler_dump_stack);
+				signal(SIGFPE, sighandler_dump_stack);
 			}
-		} else {
-			signal(SIGSEGV, sighandler_dump_stack);
-			signal(SIGFPE, sighandler_dump_stack);
 		}
 
 		err = test->func(subtest);
-		exit(err);
+		if (!dont_fork)
+			exit(err);
 	}
 
-	wait(&status);
+	if (!dont_fork) {
+		wait(&status);
 
-	if (WIFEXITED(status)) {
-		err = (signed char)WEXITSTATUS(status);
-		pr_debug("test child finished with %d\n", err);
-	} else if (WIFSIGNALED(status)) {
-		err = -1;
-		pr_debug("test child interrupted\n");
+		if (WIFEXITED(status)) {
+			err = (signed char)WEXITSTATUS(status);
+			pr_debug("test child finished with %d\n", err);
+		} else if (WIFSIGNALED(status)) {
+			err = -1;
+			pr_debug("test child interrupted\n");
+		}
 	}
 
 	return err;
@@ -425,6 +446,8 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_STRING('s', "skip", &skip, "tests", "tests to skip"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('F', "dont-fork", &dont_fork,
+		    "Do not fork for testcase"),
 	OPT_END()
 	};
 	const char * const test_subcommands[] = { "list", NULL };
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 4cb6418a8ffc..f168a85992d0 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -1,5 +1,12 @@
 #include "tests.h"
+#include <stdio.h>
 #include "cpumap.h"
+#include "event.h"
+#include <string.h>
+#include <linux/bitops.h>
+#include "debug.h"
+
+struct machine;
 
 static int process_event_mask(struct perf_tool *tool __maybe_unused,
 			 union perf_event *event,
@@ -86,3 +93,27 @@ int test__cpu_map_synthesize(int subtest __maybe_unused)
 	cpu_map__put(cpus);
 	return 0;
 }
+
+static int cpu_map_print(const char *str)
+{
+	struct cpu_map *map = cpu_map__new(str);
+	char buf[100];
+
+	if (!map)
+		return -1;
+
+	cpu_map__snprint(map, buf, sizeof(buf));
+	return !strcmp(buf, str);
+}
+
+int test__cpu_map_print(int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3,5,7,9,11,13,15,17,19,21-40"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("2-5"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3-6,8-10,24,35-37"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3-6,8-10,24,35-37"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1-10,12-20,22-30,32-40"));
+	return 0;
+}
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 8cf0d9e189a8..13725e09ba22 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -251,6 +251,9 @@ int test__dso_data_cache(int subtest __maybe_unused)
 	long nr_end, nr = open_files_cnt();
 	int dso_cnt, limit, i, fd;
 
+	/* Rest the internal dso open counter limit. */
+	reset_fd_limit();
+
 	memset(&machine, 0, sizeof(machine));
 
 	/* set as system limit */
@@ -312,6 +315,9 @@ int test__dso_data_reopen(int subtest __maybe_unused)
 #define dso_1 (dsos[1])
 #define dso_2 (dsos[2])
 
+	/* Rest the internal dso open counter limit. */
+	reset_fd_limit();
+
 	memset(&machine, 0, sizeof(machine));
 
 	/*
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index 95fb744f6628..19ef77bd6eb4 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -37,7 +37,7 @@ static int attach__enable_on_exec(struct perf_evlist *evlist)
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("perf_evlist__open: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		return err;
 	}
 
@@ -200,8 +200,7 @@ static int test_times(int (attach)(struct perf_evlist *),
 		 count.ena, count.run);
 
 out_err:
-	if (evlist)
-		perf_evlist__delete(evlist);
+	perf_evlist__delete(evlist);
 	return !err ? TEST_OK : TEST_FAIL;
 }
 
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 2de4a4f2c3ed..60926a1f6fd7 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -80,7 +80,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
 	}
 
 	err = 0;
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
 			--err;
 			pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index c809463edbe5..a2b5ff9bf83d 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -1,4 +1,5 @@
 #include <api/fd/array.h>
+#include <poll.h>
 #include "util/debug.h"
 #include "tests/tests.h"
 
@@ -36,7 +37,7 @@ int test__fdarray__filter(int subtest __maybe_unused)
 	}
 
 	fdarray__init_revents(fda, POLLIN);
-	nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+	nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
 	if (nr_fds != fda->nr_alloc) {
 		pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
 			 nr_fds, fda->nr_alloc);
@@ -44,7 +45,7 @@ int test__fdarray__filter(int subtest __maybe_unused)
 	}
 
 	fdarray__init_revents(fda, POLLHUP);
-	nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+	nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
 	if (nr_fds != 0) {
 		pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
 			 nr_fds, fda->nr_alloc);
@@ -57,7 +58,7 @@ int test__fdarray__filter(int subtest __maybe_unused)
 
 	pr_debug("\nfiltering all but fda->entries[2]:");
 	fdarray__fprintf_prefix(fda, "before", stderr);
-	nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+	nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
 	fdarray__fprintf_prefix(fda, " after", stderr);
 	if (nr_fds != 1) {
 		pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
@@ -78,7 +79,7 @@ int test__fdarray__filter(int subtest __maybe_unused)
 
 	pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
 	fdarray__fprintf_prefix(fda, "before", stderr);
-	nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+	nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
 	fdarray__fprintf_prefix(fda, " after", stderr);
 	if (nr_fds != 2) {
 		pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index a9e3db3afac4..9fd54b79a788 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -216,6 +216,8 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec
 
 		/* check callchain entries */
 		root = &he->callchain->node.rb_root;
+
+		TEST_ASSERT_VAL("callchains expected", !RB_EMPTY_ROOT(root));
 		cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
 
 		c = 0;
@@ -666,6 +668,8 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
 	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting(NULL);
+
+	callchain_param = callchain_param_default;
 	callchain_register_param(&callchain_param);
 
 	err = add_hist_entries(hists, machine);
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index e846f8c42013..62efb14f3a5a 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -56,7 +56,7 @@ static int add_hist_entries(struct perf_evlist *evlist,
 	 * (perf [perf] main) will be collapsed to an existing entry
 	 * so total 9 entries will be in the tree.
 	 */
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
 			struct hist_entry_iter iter = {
 				.evsel = evsel,
@@ -136,7 +136,7 @@ int test__hists_filter(int subtest __maybe_unused)
 	if (err < 0)
 		goto out;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		struct hists *hists = evsel__hists(evsel);
 
 		hists__collapse_resort(hists, NULL);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index acf5a1301c07..eddc7407ff8a 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -72,7 +72,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 	 * However the second evsel also has a collapsed entry for
 	 * "bash [libc] malloc" so total 9 entries will be in the tree.
 	 */
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		struct hists *hists = evsel__hists(evsel);
 
 		for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
@@ -84,7 +84,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;
 
-			he = __hists__add_entry(hists, &al, NULL,
+			he = hists__add_entry(hists, &al, NULL,
 						NULL, NULL, &sample, true);
 			if (he == NULL) {
 				addr_location__put(&al);
@@ -103,7 +103,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;
 
-			he = __hists__add_entry(hists, &al, NULL,
+			he = hists__add_entry(hists, &al, NULL,
 						NULL, NULL, &sample, true);
 			if (he == NULL) {
 				addr_location__put(&al);
@@ -301,7 +301,7 @@ int test__hists_link(int subtest __maybe_unused)
 	if (err < 0)
 		goto out;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		hists = evsel__hists(evsel);
 		hists__collapse_resort(hists, NULL);
 
diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c
new file mode 100644
index 000000000000..42e13393e502
--- /dev/null
+++ b/tools/perf/tests/is_printable_array.c
@@ -0,0 +1,36 @@
+#include <linux/compiler.h>
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+
+int test__is_printable_array(int subtest __maybe_unused)
+{
+	char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 };
+	char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 };
+	struct {
+		char		*buf;
+		unsigned int	 len;
+		int		 ret;
+	} t[] = {
+		{ (char *) "krava",	sizeof("krava"),	1 },
+		{ (char *) "krava",	sizeof("krava") - 1,	0 },
+		{ (char *) "",		sizeof(""),		1 },
+		{ (char *) "",		0,			0 },
+		{ NULL,			0,			0 },
+		{ buf1,			sizeof(buf1),		0 },
+		{ buf2,			sizeof(buf2),		0 },
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(t); i++) {
+		int ret;
+
+		ret = is_printable_array((char *) t[i].buf, t[i].len);
+		if (ret != t[i].ret) {
+			pr_err("failed: test %u\n", i);
+			return TEST_FAIL;
+		}
+	}
+
+	return TEST_OK;
+}
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index d2af78193153..76f41f249944 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -1,4 +1,5 @@
 #include <stdbool.h>
+#include <stdlib.h>
 #include "tests.h"
 #include "dso.h"
 #include "debug.h"
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index cff564fb4b66..b798a4bfd238 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -5,6 +5,7 @@
 #include "llvm.h"
 #include "tests.h"
 #include "debug.h"
+#include "util.h"
 
 #ifdef HAVE_LIBBPF_SUPPORT
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index cac15d93aea6..143f4d549769 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -81,6 +81,8 @@ make_no_libbionic   := NO_LIBBIONIC=1
 make_no_auxtrace    := NO_AUXTRACE=1
 make_no_libbpf	    := NO_LIBBPF=1
 make_no_libcrypto   := NO_LIBCRYPTO=1
+make_with_babeltrace:= LIBBABELTRACE=1
+make_no_sdt	    := NO_SDT=1
 make_tags           := tags
 make_cscope         := cscope
 make_help           := help
@@ -104,7 +106,7 @@ make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
-make_minimal        += NO_LIBCRYPTO=1
+make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -136,6 +138,7 @@ run += make_no_libaudit
 run += make_no_libbionic
 run += make_no_auxtrace
 run += make_no_libbpf
+run += make_with_babeltrace
 run += make_help
 run += make_doc
 run += make_perf_o
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 359e98fcd94c..634bce9caebd 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -1,3 +1,6 @@
+/* For the CLR_() macros */
+#include <pthread.h>
+
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
@@ -49,7 +52,7 @@ int test__basic_mmap(int subtest __maybe_unused)
 	sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
 	if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
 		pr_debug("sched_setaffinity() failed on CPU %d: %s ",
-			 cpus->map[0], strerror_r(errno, sbuf, sizeof(sbuf)));
+			 cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_free_cpus;
 	}
 
@@ -79,7 +82,7 @@ int test__basic_mmap(int subtest __maybe_unused)
 		if (perf_evsel__open(evsels[i], cpus, threads) < 0) {
 			pr_debug("failed to open counter: %s, "
 				 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
-				 strerror_r(errno, sbuf, sizeof(sbuf)));
+				 str_error_r(errno, sbuf, sizeof(sbuf)));
 			goto out_delete_evlist;
 		}
 
@@ -89,7 +92,7 @@ int test__basic_mmap(int subtest __maybe_unused)
 
 	if (perf_evlist__mmap(evlist, 128, true) < 0) {
 		pr_debug("failed to mmap events: %d (%s)\n", errno,
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
@@ -126,7 +129,7 @@ int test__basic_mmap(int subtest __maybe_unused)
 	}
 
 	err = 0;
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
 			pr_debug("expected %d %s events, got %d\n",
 				 expected_nr_events[evsel->idx],
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index ad1cb63139a7..c8d9592eb142 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -1,3 +1,6 @@
+/* For the CPU_* macros */
+#include <pthread.h>
+
 #include <api/fs/fs.h>
 #include <linux/err.h>
 #include "evsel.h"
@@ -41,7 +44,7 @@ int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused)
 	if (perf_evsel__open(evsel, cpus, threads) < 0) {
 		pr_debug("failed to open counter: %s, "
 			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_evsel_delete;
 	}
 
@@ -62,7 +65,7 @@ int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused)
 		if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
 			pr_debug("sched_setaffinity() failed on CPU %d: %s ",
 				 cpus->map[cpu],
-				 strerror_r(errno, sbuf, sizeof(sbuf)));
+				 str_error_r(errno, sbuf, sizeof(sbuf)));
 			goto out_close_fd;
 		}
 		for (i = 0; i < ncalls; ++i) {
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 4344fe482c1d..f52239fed361 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -6,6 +6,13 @@
 #include "tests.h"
 #include "debug.h"
 
+#ifndef O_DIRECTORY
+#define O_DIRECTORY    00200000
+#endif
+#ifndef AT_FDCWD
+#define AT_FDCWD       -100
+#endif
+
 int test__syscall_openat_tp_fields(int subtest __maybe_unused)
 {
 	struct record_opts opts = {
@@ -51,14 +58,14 @@ int test__syscall_openat_tp_fields(int subtest __maybe_unused)
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("perf_evlist__open: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
 	err = perf_evlist__mmap(evlist, UINT_MAX, false);
 	if (err < 0) {
 		pr_debug("perf_evlist__mmap: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 1184f9ba6499..d7414128d7fe 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -29,7 +29,7 @@ int test__openat_syscall_event(int subtest __maybe_unused)
 	if (perf_evsel__open_per_thread(evsel, threads) < 0) {
 		pr_debug("failed to open counter: %s, "
 			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_evsel_delete;
 	}
 
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 7865f68dc0d8..20c2e641c422 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -32,7 +32,7 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
 	TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		TEST_ASSERT_VAL("wrong type",
 			PERF_TYPE_TRACEPOINT == evsel->attr.type);
 		TEST_ASSERT_VAL("wrong sample_type",
@@ -207,7 +207,7 @@ test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist)
 
 	TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		TEST_ASSERT_VAL("wrong exclude_user",
 				!evsel->attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel",
@@ -1783,8 +1783,8 @@ static int test_pmu_events(void)
 		struct evlist_test e;
 		char name[MAX_NAME];
 
-		if (!strcmp(ent->d_name, ".") ||
-		    !strcmp(ent->d_name, ".."))
+		/* Names containing . are special and cannot be used directly */
+		if (strchr(ent->d_name, '.'))
 			continue;
 
 		snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name);
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index 294c76b01b41..81c6eeaca0f5 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -44,8 +44,7 @@ static int process_events(union perf_event **events, size_t count)
 	for (i = 0; i < count && !err; i++)
 		err = process_event(&evlist, events[i]);
 
-	if (evlist)
-		perf_evlist__delete(evlist);
+	perf_evlist__delete(evlist);
 
 	return err;
 }
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index b836ee6a8d9b..8f2e1de6d0ea 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -1,3 +1,6 @@
+/* For the CLR_() macros */
+#include <pthread.h>
+
 #include <sched.h>
 #include "evlist.h"
 #include "evsel.h"
@@ -104,7 +107,7 @@ int test__PERF_RECORD(int subtest __maybe_unused)
 	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
 	if (err < 0) {
 		pr_debug("sched__get_first_possible_cpu: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
@@ -115,7 +118,7 @@ int test__PERF_RECORD(int subtest __maybe_unused)
 	 */
 	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
 		pr_debug("sched_setaffinity: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
@@ -126,7 +129,7 @@ int test__PERF_RECORD(int subtest __maybe_unused)
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("perf_evlist__open: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
@@ -138,7 +141,7 @@ int test__PERF_RECORD(int subtest __maybe_unused)
 	err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
 	if (err < 0) {
 		pr_debug("perf_evlist__mmap: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
new file mode 100644
index 000000000000..f59d210e1baf
--- /dev/null
+++ b/tools/perf/tests/sdt.c
@@ -0,0 +1,115 @@
+#include <stdio.h>
+#include <sys/epoll.h>
+#include <util/util.h>
+#include <util/evlist.h>
+#include <linux/filter.h>
+#include "tests.h"
+#include "debug.h"
+#include "probe-file.h"
+#include "build-id.h"
+
+/* To test SDT event, we need libelf support to scan elf binary */
+#if defined(HAVE_SDT_EVENT) && defined(HAVE_LIBELF_SUPPORT)
+
+#include <sys/sdt.h>
+
+static int target_function(void)
+{
+	DTRACE_PROBE(perf, test_target);
+	return TEST_OK;
+}
+
+/* Copied from builtin-buildid-cache.c */
+static int build_id_cache__add_file(const char *filename)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+	u8 build_id[BUILD_ID_SIZE];
+	int err;
+
+	err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+	if (err < 0) {
+		pr_debug("Failed to read build id of %s\n", filename);
+		return err;
+	}
+
+	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+	err = build_id_cache__add_s(sbuild_id, filename, false, false);
+	if (err < 0)
+		pr_debug("Failed to add build id cache of %s\n", filename);
+	return err;
+}
+
+static char *get_self_path(void)
+{
+	char *buf = calloc(PATH_MAX, sizeof(char));
+
+	if (buf && readlink("/proc/self/exe", buf, PATH_MAX) < 0) {
+		pr_debug("Failed to get correct path of perf\n");
+		free(buf);
+		return NULL;
+	}
+	return buf;
+}
+
+static int search_cached_probe(const char *target,
+			       const char *group, const char *event)
+{
+	struct probe_cache *cache = probe_cache__new(target);
+	int ret = 0;
+
+	if (!cache) {
+		pr_debug("Failed to open probe cache of %s\n", target);
+		return -EINVAL;
+	}
+
+	if (!probe_cache__find_by_name(cache, group, event)) {
+		pr_debug("Failed to find %s:%s in the cache\n", group, event);
+		ret = -ENOENT;
+	}
+	probe_cache__delete(cache);
+
+	return ret;
+}
+
+int test__sdt_event(int subtests __maybe_unused)
+{
+	int ret = TEST_FAIL;
+	char __tempdir[] = "./test-buildid-XXXXXX";
+	char *tempdir = NULL, *myself = get_self_path();
+
+	if (myself == NULL || mkdtemp(__tempdir) == NULL) {
+		pr_debug("Failed to make a tempdir for build-id cache\n");
+		goto error;
+	}
+	/* Note that buildid_dir must be an absolute path */
+	tempdir = realpath(__tempdir, NULL);
+
+	/* At first, scan itself */
+	set_buildid_dir(tempdir);
+	if (build_id_cache__add_file(myself) < 0)
+		goto error_rmdir;
+
+	/* Open a cache and make sure the SDT is stored */
+	if (search_cached_probe(myself, "sdt_perf", "test_target") < 0)
+		goto error_rmdir;
+
+	/* TBD: probing on the SDT event and collect logs */
+
+	/* Call the target and get an event */
+	ret = target_function();
+
+error_rmdir:
+	/* Cleanup temporary buildid dir */
+	rm_rf(tempdir);
+error:
+	free(tempdir);
+	free(myself);
+	return ret;
+}
+#else
+int test__sdt_event(int subtests __maybe_unused)
+{
+	pr_debug("Skip SDT event test because SDT support is not compiled\n");
+	return TEST_SKIP;
+}
+#endif
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 36e8ce1550e3..4c9fd046d57b 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -70,7 +70,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 
 		err = -errno;
 		pr_debug("Couldn't open evlist: %s\nHint: check %s, using %" PRIu64 " in this test.\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)),
+			 str_error_r(errno, sbuf, sizeof(sbuf)),
 			 knob, (u64)attr.sample_freq);
 		goto out_delete_evlist;
 	}
@@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 	err = perf_evlist__mmap(evlist, 128, true);
 	if (err < 0) {
 		pr_debug("failed to mmap event: %d (%s)\n", errno,
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 39a689bf7574..7ddbe267d0ac 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -432,7 +432,7 @@ int test__switch_tracking(int subtest __maybe_unused)
 	}
 
 	/* Check non-tracking events are not tracking */
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel != tracking_evsel) {
 			if (evsel->attr.mmap || evsel->attr.comm) {
 				pr_debug("Non-tracking event is tracking\n");
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 2dfff7ac8ef3..01a5ba2788c6 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -91,13 +91,13 @@ int test__task_exit(int subtest __maybe_unused)
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("Couldn't open the evlist: %s\n",
-			 strerror_r(-err, sbuf, sizeof(sbuf)));
+			 str_error_r(-err, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
 	if (perf_evlist__mmap(evlist, 128, true) < 0) {
 		pr_debug("failed to mmap events: %d (%s)\n", errno,
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index c57e72c826d2..9bfc0e06c61a 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -87,6 +87,9 @@ int test__synthesize_stat_round(int subtest);
 int test__event_update(int subtest);
 int test__event_times(int subtest);
 int test__backward_ring_buffer(int subtest);
+int test__cpu_map_print(int subtest);
+int test__sdt_event(int subtest);
+int test__is_printable_array(int subtest);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index fccde848fe9c..cee2a2cdc933 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -1,13 +1,20 @@
 #include <sys/types.h>
 #include <unistd.h>
+#include <sys/prctl.h>
 #include "tests.h"
 #include "thread_map.h"
 #include "debug.h"
 
+#define NAME	(const char *) "perf"
+#define NAMEUL	(unsigned long) NAME
+
 int test__thread_map(int subtest __maybe_unused)
 {
 	struct thread_map *map;
 
+	TEST_ASSERT_VAL("failed to set process name",
+			!prctl(PR_SET_NAME, NAMEUL, 0, 0, 0));
+
 	/* test map on current pid */
 	map = thread_map__new_by_pid(getpid());
 	TEST_ASSERT_VAL("failed to alloc map", map);
@@ -19,7 +26,7 @@ int test__thread_map(int subtest __maybe_unused)
 			thread_map__pid(map, 0) == getpid());
 	TEST_ASSERT_VAL("wrong comm",
 			thread_map__comm(map, 0) &&
-			!strcmp(thread_map__comm(map, 0), "perf"));
+			!strcmp(thread_map__comm(map, 0), NAME));
 	TEST_ASSERT_VAL("wrong refcnt",
 			atomic_read(&map->refcnt) == 1);
 	thread_map__put(map);
@@ -51,7 +58,7 @@ static int process_event(struct perf_tool *tool __maybe_unused,
 
 	TEST_ASSERT_VAL("wrong nr",   map->nr == 1);
 	TEST_ASSERT_VAL("wrong pid",  map->entries[0].pid == (u64) getpid());
-	TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, "perf"));
+	TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, NAME));
 
 	threads = thread_map__new_event(&event->thread_map);
 	TEST_ASSERT_VAL("failed to alloc map", threads);
@@ -61,7 +68,7 @@ static int process_event(struct perf_tool *tool __maybe_unused,
 			thread_map__pid(threads, 0) == getpid());
 	TEST_ASSERT_VAL("wrong comm",
 			thread_map__comm(threads, 0) &&
-			!strcmp(thread_map__comm(threads, 0), "perf"));
+			!strcmp(thread_map__comm(threads, 0), NAME));
 	TEST_ASSERT_VAL("wrong refcnt",
 			atomic_read(&threads->refcnt) == 1);
 	thread_map__put(threads);
@@ -72,6 +79,9 @@ int test__thread_map_synthesize(int subtest __maybe_unused)
 {
 	struct thread_map *threads;
 
+	TEST_ASSERT_VAL("failed to set process name",
+			!prctl(PR_SET_NAME, NAMEUL, 0, 0, 0));
+
 	/* test map on current pid */
 	threads = thread_map__new_by_pid(getpid());
 	TEST_ASSERT_VAL("failed to alloc map", threads);
diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
index d64f4a9128a1..b08f21eb6f4d 100644
--- a/tools/perf/trace/beauty/eventfd.c
+++ b/tools/perf/trace/beauty/eventfd.c
@@ -1,5 +1,3 @@
-#include <sys/eventfd.h>
-
 #ifndef EFD_SEMAPHORE
 #define EFD_SEMAPHORE		1
 #endif
diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
index 021bb48c6336..74613703a14e 100644
--- a/tools/perf/trace/beauty/flock.c
+++ b/tools/perf/trace/beauty/flock.c
@@ -1,3 +1,20 @@
+#include <fcntl.h>
+
+#ifndef LOCK_MAND
+#define LOCK_MAND	 32
+#endif
+
+#ifndef LOCK_READ
+#define LOCK_READ	 64
+#endif
+
+#ifndef LOCK_WRITE
+#define LOCK_WRITE	128
+#endif
+
+#ifndef LOCK_RW
+#define LOCK_RW		192
+#endif
 
 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 					   struct syscall_arg *arg)
diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
index e2476211f22d..bfd3359b09b6 100644
--- a/tools/perf/trace/beauty/futex_op.c
+++ b/tools/perf/trace/beauty/futex_op.c
@@ -1,5 +1,21 @@
 #include <linux/futex.h>
 
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET	  9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET	 10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI	 11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI	 12
+#endif
+#ifndef FUTEX_CLOCK_REALTIME
+#define FUTEX_CLOCK_REALTIME	256
+#endif
+
 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 {
 	enum syscall_futex_args {
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 3444a4d5382d..d0a3a8e402e7 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,5 +1,9 @@
 #include <sys/mman.h>
 
+#ifndef PROT_SEM
+#define PROT_SEM 0x8
+#endif
+
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 					       struct syscall_arg *arg)
 {
@@ -16,9 +20,7 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 	P_MMAP_PROT(EXEC);
 	P_MMAP_PROT(READ);
 	P_MMAP_PROT(WRITE);
-#ifdef PROT_SEM
 	P_MMAP_PROT(SEM);
-#endif
 	P_MMAP_PROT(GROWSDOWN);
 	P_MMAP_PROT(GROWSUP);
 #undef P_MMAP_PROT
@@ -31,10 +33,31 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
+#ifndef MAP_FIXED
+#define MAP_FIXED		     0x10
+#endif
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS		     0x20
+#endif
+
+#ifndef MAP_32BIT
+#define MAP_32BIT		     0x40
+#endif
+
 #ifndef MAP_STACK
-# define MAP_STACK		0x20000
+#define MAP_STACK		  0x20000
 #endif
 
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB		  0x40000
+#endif
+
+#ifndef MAP_UNINITIALIZED
+#define MAP_UNINITIALIZED	0x4000000
+#endif
+
+
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 						struct syscall_arg *arg)
 {
@@ -48,26 +71,20 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 
 	P_MMAP_FLAG(SHARED);
 	P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
 	P_MMAP_FLAG(32BIT);
-#endif
 	P_MMAP_FLAG(ANONYMOUS);
 	P_MMAP_FLAG(DENYWRITE);
 	P_MMAP_FLAG(EXECUTABLE);
 	P_MMAP_FLAG(FILE);
 	P_MMAP_FLAG(FIXED);
 	P_MMAP_FLAG(GROWSDOWN);
-#ifdef MAP_HUGETLB
 	P_MMAP_FLAG(HUGETLB);
-#endif
 	P_MMAP_FLAG(LOCKED);
 	P_MMAP_FLAG(NONBLOCK);
 	P_MMAP_FLAG(NORESERVE);
 	P_MMAP_FLAG(POPULATE);
 	P_MMAP_FLAG(STACK);
-#ifdef MAP_UNINITIALIZED
 	P_MMAP_FLAG(UNINITIALIZED);
-#endif
 #undef P_MMAP_FLAG
 
 	if (flags)
@@ -78,6 +95,13 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 
+#ifndef MREMAP_MAYMOVE
+#define MREMAP_MAYMOVE 1
+#endif
+#ifndef MREMAP_FIXED
+#define MREMAP_FIXED 2
+#endif
+
 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 						  struct syscall_arg *arg)
 {
@@ -90,9 +114,7 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 	}
 
 	P_MREMAP_FLAG(MAYMOVE);
-#ifdef MREMAP_FIXED
 	P_MREMAP_FLAG(FIXED);
-#endif
 #undef P_MREMAP_FLAG
 
 	if (flags)
@@ -107,6 +129,10 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 #define MADV_HWPOISON		100
 #endif
 
+#ifndef MADV_SOFT_OFFLINE
+#define MADV_SOFT_OFFLINE	101
+#endif
+
 #ifndef MADV_MERGEABLE
 #define MADV_MERGEABLE		 12
 #endif
@@ -115,6 +141,23 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 #define MADV_UNMERGEABLE	 13
 #endif
 
+#ifndef MADV_HUGEPAGE
+#define MADV_HUGEPAGE		 14
+#endif
+
+#ifndef MADV_NOHUGEPAGE
+#define MADV_NOHUGEPAGE		 15
+#endif
+
+#ifndef MADV_DONTDUMP
+#define MADV_DONTDUMP		 16
+#endif
+
+#ifndef MADV_DODUMP
+#define MADV_DODUMP		 17
+#endif
+
+
 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 						      struct syscall_arg *arg)
 {
@@ -131,24 +174,14 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 	P_MADV_BHV(DONTFORK);
 	P_MADV_BHV(DOFORK);
 	P_MADV_BHV(HWPOISON);
-#ifdef MADV_SOFT_OFFLINE
 	P_MADV_BHV(SOFT_OFFLINE);
-#endif
 	P_MADV_BHV(MERGEABLE);
 	P_MADV_BHV(UNMERGEABLE);
-#ifdef MADV_HUGEPAGE
 	P_MADV_BHV(HUGEPAGE);
-#endif
-#ifdef MADV_NOHUGEPAGE
 	P_MADV_BHV(NOHUGEPAGE);
-#endif
-#ifdef MADV_DONTDUMP
 	P_MADV_BHV(DONTDUMP);
-#endif
-#ifdef MADV_DODUMP
 	P_MADV_BHV(DODUMP);
-#endif
-#undef P_MADV_PHV
+#undef P_MADV_BHV
 	default: break;
 	}
 
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index 07fa8a0acad6..1106c8960cc4 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -33,7 +33,6 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 	P_MSG_FLAG(OOB);
 	P_MSG_FLAG(PEEK);
 	P_MSG_FLAG(DONTROUTE);
-	P_MSG_FLAG(TRYHARD);
 	P_MSG_FLAG(CTRUNC);
 	P_MSG_FLAG(PROBE);
 	P_MSG_FLAG(TRUNC);
diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
index 0f3679e0cdcf..f55a4597fc38 100644
--- a/tools/perf/trace/beauty/open_flags.c
+++ b/tools/perf/trace/beauty/open_flags.c
@@ -1,3 +1,18 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#ifndef O_DIRECT
+#define O_DIRECT	00040000
+#endif
+
+#ifndef O_DIRECTORY
+#define O_DIRECTORY	00200000
+#endif
+
+#ifndef O_NOATIME
+#define O_NOATIME	01000000
+#endif
 
 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 					       struct syscall_arg *arg)
diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
index c205bc608b3c..34775295b9b3 100644
--- a/tools/perf/trace/beauty/sched_policy.c
+++ b/tools/perf/trace/beauty/sched_policy.c
@@ -9,6 +9,9 @@
 #ifndef SCHED_DEADLINE
 #define SCHED_DEADLINE 6
 #endif
+#ifndef SCHED_RESET_ON_FORK
+#define SCHED_RESET_ON_FORK 0x40000000
+#endif
 
 static size_t syscall_arg__scnprintf_sched_policy(char *bf, size_t size,
 						  struct syscall_arg *arg)
diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
index 213c5a7e3e92..356441bce27d 100644
--- a/tools/perf/trace/beauty/seccomp.c
+++ b/tools/perf/trace/beauty/seccomp.c
@@ -1,5 +1,3 @@
-#include <linux/seccomp.h>
-
 #ifndef SECCOMP_SET_MODE_STRICT
 #define SECCOMP_SET_MODE_STRICT 0
 #endif
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index af68a9d488bf..3eb3edb307a4 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -1,5 +1,5 @@
 #include "../util.h"
-#include "../cache.h"
+#include "../config.h"
 #include "../../perf.h"
 #include "libslang.h"
 #include "ui.h"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 4fc208e82c6f..29dc6d20364e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -8,6 +8,7 @@
 #include "../../util/sort.h"
 #include "../../util/symbol.h"
 #include "../../util/evsel.h"
+#include "../../util/config.h"
 #include <pthread.h>
 
 struct disasm_line_samples {
@@ -222,16 +223,14 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 			} else if (ins__is_call(dl->ins)) {
 				ui_browser__write_graph(browser, SLSMG_RARROW_CHAR);
 				SLsmg_write_char(' ');
+			} else if (ins__is_ret(dl->ins)) {
+				ui_browser__write_graph(browser, SLSMG_LARROW_CHAR);
+				SLsmg_write_char(' ');
 			} else {
 				ui_browser__write_nstring(browser, " ", 2);
 			}
 		} else {
-			if (strcmp(dl->name, "retq")) {
-				ui_browser__write_nstring(browser, " ", 2);
-			} else {
-				ui_browser__write_graph(browser, SLSMG_LARROW_CHAR);
-				SLsmg_write_char(' ');
-			}
+			ui_browser__write_nstring(browser, " ", 2);
 		}
 
 		disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset);
@@ -842,14 +841,14 @@ show_help:
 				ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org");
 			else if (browser->selection->offset == -1)
 				ui_helpline__puts("Actions are only available for assembly lines.");
-			else if (!browser->selection->ins) {
-				if (strcmp(browser->selection->name, "retq"))
-					goto show_sup_ins;
+			else if (!browser->selection->ins)
+				goto show_sup_ins;
+			else if (ins__is_ret(browser->selection->ins))
 				goto out;
-			} else if (!(annotate_browser__jump(browser) ||
+			else if (!(annotate_browser__jump(browser) ||
 				     annotate_browser__callq(browser, evsel, hbt))) {
 show_sup_ins:
-				ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions.");
+				ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions.");
 			}
 			continue;
 		case 't':
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 538bae880bfe..13d414384739 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -12,35 +12,17 @@
 #include "../../util/top.h"
 #include "../../arch/common.h"
 
-#include "../browser.h"
+#include "../browsers/hists.h"
 #include "../helpline.h"
 #include "../util.h"
 #include "../ui.h"
 #include "map.h"
 #include "annotate.h"
 
-struct hist_browser {
-	struct ui_browser   b;
-	struct hists	    *hists;
-	struct hist_entry   *he_selection;
-	struct map_symbol   *selection;
-	struct hist_browser_timer *hbt;
-	struct pstack	    *pstack;
-	struct perf_env *env;
-	int		     print_seq;
-	bool		     show_dso;
-	bool		     show_headers;
-	float		     min_pcnt;
-	u64		     nr_non_filtered_entries;
-	u64		     nr_hierarchy_entries;
-	u64		     nr_callchain_rows;
-};
-
 extern void hist_browser__init_hpp(void);
 
-static int hists__browser_title(struct hists *hists,
-				struct hist_browser_timer *hbt,
-				char *bf, size_t size);
+static int perf_evsel_browser_title(struct hist_browser *browser,
+				    char *bf, size_t size);
 static void hist_browser__update_nr_entries(struct hist_browser *hb);
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
@@ -585,7 +567,12 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
 		"Or reduce the sampling frequency.");
 }
 
-static int hist_browser__run(struct hist_browser *browser, const char *help)
+static int hist_browser__title(struct hist_browser *browser, char *bf, size_t size)
+{
+	return browser->title ? browser->title(browser, bf, size) : 0;
+}
+
+int hist_browser__run(struct hist_browser *browser, const char *help)
 {
 	int key;
 	char title[160];
@@ -595,7 +582,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *help)
 	browser->b.entries = &browser->hists->entries;
 	browser->b.nr_entries = hist_browser__nr_entries(browser);
 
-	hists__browser_title(browser->hists, hbt, title, sizeof(title));
+	hist_browser__title(browser, title, sizeof(title));
 
 	if (ui_browser__show(&browser->b, title, "%s", help) < 0)
 		return -1;
@@ -621,8 +608,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *help)
 				ui_browser__warn_lost_events(&browser->b);
 			}
 
-			hists__browser_title(browser->hists,
-					     hbt, title, sizeof(title));
+			hist_browser__title(browser, title, sizeof(title));
 			ui_browser__show_title(&browser->b, title);
 			continue;
 		}
@@ -1470,7 +1456,7 @@ static int hist_browser__show_no_entry(struct hist_browser *browser,
 		    column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists));
+		ret = fmt->width(fmt, NULL, browser->hists);
 
 		if (first) {
 			/* for folded sign */
@@ -1531,7 +1517,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
 		if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+		ret = fmt->header(fmt, &dummy_hpp, hists);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
@@ -1568,7 +1554,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 		if (column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+		ret = fmt->header(fmt, &dummy_hpp, hists);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
@@ -1605,7 +1591,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 			}
 			first_col = false;
 
-			ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+			ret = fmt->header(fmt, &dummy_hpp, hists);
 			dummy_hpp.buf[ret] = '\0';
 
 			start = trim(dummy_hpp.buf);
@@ -1622,21 +1608,38 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 	return ret;
 }
 
-static void hist_browser__show_headers(struct hist_browser *browser)
+static void hists_browser__hierarchy_headers(struct hist_browser *browser)
 {
 	char headers[1024];
 
-	if (symbol_conf.report_hierarchy)
-		hists_browser__scnprintf_hierarchy_headers(browser, headers,
-							   sizeof(headers));
-	else
-		hists_browser__scnprintf_headers(browser, headers,
-						 sizeof(headers));
+	hists_browser__scnprintf_hierarchy_headers(browser, headers,
+						   sizeof(headers));
+
+	ui_browser__gotorc(&browser->b, 0, 0);
+	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+	ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+}
+
+static void hists_browser__headers(struct hist_browser *browser)
+{
+	char headers[1024];
+
+	hists_browser__scnprintf_headers(browser, headers,
+					 sizeof(headers));
+
 	ui_browser__gotorc(&browser->b, 0, 0);
 	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
 	ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
 }
 
+static void hist_browser__show_headers(struct hist_browser *browser)
+{
+	if (symbol_conf.report_hierarchy)
+		hists_browser__hierarchy_headers(browser);
+	else
+		hists_browser__headers(browser);
+}
+
 static void ui_browser__hists_init_top(struct ui_browser *browser)
 {
 	if (browser->top == NULL) {
@@ -2026,7 +2029,7 @@ static int hist_browser__dump(struct hist_browser *browser)
 	fp = fopen(filename, "w");
 	if (fp == NULL) {
 		char bf[64];
-		const char *err = strerror_r(errno, bf, sizeof(bf));
+		const char *err = str_error_r(errno, bf, sizeof(bf));
 		ui_helpline__fpush("Couldn't write to %s: %s", filename, err);
 		return -1;
 	}
@@ -2039,27 +2042,50 @@ static int hist_browser__dump(struct hist_browser *browser)
 	return 0;
 }
 
-static struct hist_browser *hist_browser__new(struct hists *hists,
-					      struct hist_browser_timer *hbt,
-					      struct perf_env *env)
+void hist_browser__init(struct hist_browser *browser,
+			struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+
+	browser->hists			= hists;
+	browser->b.refresh		= hist_browser__refresh;
+	browser->b.refresh_dimensions	= hist_browser__refresh_dimensions;
+	browser->b.seek			= ui_browser__hists_seek;
+	browser->b.use_navkeypressed	= true;
+	browser->show_headers		= symbol_conf.show_hist_headers;
+
+	hists__for_each_format(hists, fmt) {
+		perf_hpp__reset_width(fmt, hists);
+		++browser->b.columns;
+	}
+}
+
+struct hist_browser *hist_browser__new(struct hists *hists)
 {
 	struct hist_browser *browser = zalloc(sizeof(*browser));
 
+	if (browser)
+		hist_browser__init(browser, hists);
+
+	return browser;
+}
+
+static struct hist_browser *
+perf_evsel_browser__new(struct perf_evsel *evsel,
+			struct hist_browser_timer *hbt,
+			struct perf_env *env)
+{
+	struct hist_browser *browser = hist_browser__new(evsel__hists(evsel));
+
 	if (browser) {
-		browser->hists = hists;
-		browser->b.refresh = hist_browser__refresh;
-		browser->b.refresh_dimensions = hist_browser__refresh_dimensions;
-		browser->b.seek = ui_browser__hists_seek;
-		browser->b.use_navkeypressed = true;
-		browser->show_headers = symbol_conf.show_hist_headers;
-		browser->hbt = hbt;
-		browser->env = env;
+		browser->hbt   = hbt;
+		browser->env   = env;
+		browser->title = perf_evsel_browser_title;
 	}
-
 	return browser;
 }
 
-static void hist_browser__delete(struct hist_browser *browser)
+void hist_browser__delete(struct hist_browser *browser)
 {
 	free(browser);
 }
@@ -2080,10 +2106,11 @@ static inline bool is_report_browser(void *timer)
 	return timer == NULL;
 }
 
-static int hists__browser_title(struct hists *hists,
-				struct hist_browser_timer *hbt,
+static int perf_evsel_browser_title(struct hist_browser *browser,
 				char *bf, size_t size)
 {
+	struct hist_browser_timer *hbt = browser->hbt;
+	struct hists *hists = browser->hists;
 	char unit;
 	int printed;
 	const struct dso *dso = hists->dso_filter;
@@ -2640,7 +2667,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    struct perf_env *env)
 {
 	struct hists *hists = evsel__hists(evsel);
-	struct hist_browser *browser = hist_browser__new(hists, hbt, env);
+	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
 	struct branch_info *bi;
 #define MAX_OPTIONS  16
 	char *options[MAX_OPTIONS];
@@ -2649,7 +2676,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	int key = -1;
 	char buf[64];
 	int delay_secs = hbt ? hbt->refresh : 0;
-	struct perf_hpp_fmt *fmt;
 
 #define HIST_BROWSER_HELP_COMMON					\
 	"h/?/F1        Show this window\n"				\
@@ -2708,18 +2734,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	memset(options, 0, sizeof(options));
 	memset(actions, 0, sizeof(actions));
 
-	hists__for_each_format(browser->hists, fmt) {
-		perf_hpp__reset_width(fmt, hists);
-		/*
-		 * This is done just once, and activates the horizontal scrolling
-		 * code in the ui_browser code, it would be better to have a the
-		 * counter in the perf_hpp code, but I couldn't find doing it here
-		 * works, FIXME by setting this in hist_browser__new, for now, be
-		 * clever 8-)
-		 */
-		++browser->b.columns;
-	}
-
 	if (symbol_conf.col_width_list_str)
 		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
 
@@ -3185,7 +3199,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 
 	ui_helpline__push("Press ESC to exit");
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		const char *ev_name = perf_evsel__name(pos);
 		size_t line_len = strlen(ev_name) + 7;
 
@@ -3216,7 +3230,7 @@ single_entry:
 		struct perf_evsel *pos;
 
 		nr_entries = 0;
-		evlist__for_each(evlist, pos) {
+		evlist__for_each_entry(evlist, pos) {
 			if (perf_evsel__is_group_leader(pos))
 				nr_entries++;
 		}
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
new file mode 100644
index 000000000000..39bd0f28f211
--- /dev/null
+++ b/tools/perf/ui/browsers/hists.h
@@ -0,0 +1,32 @@
+#ifndef _PERF_UI_BROWSER_HISTS_H_
+#define _PERF_UI_BROWSER_HISTS_H_ 1
+
+#include "ui/browser.h"
+
+struct hist_browser {
+	struct ui_browser   b;
+	struct hists	    *hists;
+	struct hist_entry   *he_selection;
+	struct map_symbol   *selection;
+	struct hist_browser_timer *hbt;
+	struct pstack	    *pstack;
+	struct perf_env	    *env;
+	int		     print_seq;
+	bool		     show_dso;
+	bool		     show_headers;
+	float		     min_pcnt;
+	u64		     nr_non_filtered_entries;
+	u64		     nr_hierarchy_entries;
+	u64		     nr_callchain_rows;
+
+	/* Get title string. */
+	int                  (*title)(struct hist_browser *browser,
+			     char *bf, size_t size);
+};
+
+struct hist_browser *hist_browser__new(struct hists *hists);
+void hist_browser__delete(struct hist_browser *browser);
+int hist_browser__run(struct hist_browser *browser, const char *help);
+void hist_browser__init(struct hist_browser *browser,
+			struct hists *hists);
+#endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 932adfaa05af..c5f3677f6679 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -549,7 +549,7 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
 				strcat(buf, "+");
 			first_col = false;
 
-			fmt->header(fmt, &hpp, hists_to_evsel(hists));
+			fmt->header(fmt, &hpp, hists);
 			strcat(buf, ltrim(rtrim(hpp.buf)));
 		}
 	}
@@ -627,7 +627,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 
 	gtk_container_add(GTK_CONTAINER(window), vbox);
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 		const char *evname = perf_evsel__name(pos);
 		GtkWidget *scrolled_window;
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
index 52e7fc48af9f..00b91921edb1 100644
--- a/tools/perf/ui/gtk/util.c
+++ b/tools/perf/ui/gtk/util.c
@@ -1,4 +1,5 @@
 #include "../util.h"
+#include "../../util/util.h"
 #include "../../util/debug.h"
 #include "gtk.h"
 
diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c
index 700fb3cfa1c7..5b74a7eba210 100644
--- a/tools/perf/ui/helpline.c
+++ b/tools/perf/ui/helpline.c
@@ -5,6 +5,7 @@
 #include "../debug.h"
 #include "helpline.h"
 #include "ui.h"
+#include "../util.h"
 
 char ui_helpline__current[512];
 
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index af07ffb129ca..4274969ddc89 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -215,9 +215,10 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
 
 static int hpp__width_fn(struct perf_hpp_fmt *fmt,
 			 struct perf_hpp *hpp __maybe_unused,
-			 struct perf_evsel *evsel)
+			 struct hists *hists)
 {
 	int len = fmt->user_len ?: fmt->len;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
 
 	if (symbol_conf.event_group)
 		len = max(len, evsel->nr_members * fmt->len);
@@ -229,9 +230,9 @@ static int hpp__width_fn(struct perf_hpp_fmt *fmt,
 }
 
 static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			  struct perf_evsel *evsel)
+			  struct hists *hists)
 {
-	int len = hpp__width_fn(fmt, hpp, evsel);
+	int len = hpp__width_fn(fmt, hpp, hists);
 	return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
 }
 
@@ -632,7 +633,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
 		else
 			ret += 2;
 
-		ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
+		ret += fmt->width(fmt, &dummy_hpp, hists);
 	}
 
 	if (verbose && hists__has(hists, sym)) /* Addr + origin */
@@ -657,7 +658,7 @@ unsigned int hists__overhead_width(struct hists *hists)
 		else
 			ret += 2;
 
-		ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
+		ret += fmt->width(fmt, &dummy_hpp, hists);
 	}
 
 	return ret;
@@ -765,7 +766,7 @@ int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
 	if (!symbol_conf.report_hierarchy)
 		return 0;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		hists = evsel__hists(evsel);
 
 		perf_hpp_list__for_each_sort_list(list, fmt) {
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
index ba51fa8a1176..1f6b0994f4f4 100644
--- a/tools/perf/ui/setup.c
+++ b/tools/perf/ui/setup.c
@@ -60,6 +60,13 @@ static inline int setup_gtk_browser(void) { return -1; }
 static inline void exit_gtk_browser(bool wait_for_ok __maybe_unused) {}
 #endif
 
+int stdio__config_color(const struct option *opt __maybe_unused,
+			const char *mode, int unset __maybe_unused)
+{
+	perf_use_color_default = perf_config_colorbool("color.ui", mode, -1);
+	return 0;
+}
+
 void setup_browser(bool fallback_to_pager)
 {
 	if (use_browser < 2 && (!isatty(1) || dump_trace))
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 560eb47d56f9..f04a63112079 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -492,14 +492,15 @@ out:
 }
 
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
-			       struct hists *hists,
-			       char *bf, size_t bfsz, FILE *fp)
+			       char *bf, size_t bfsz, FILE *fp,
+			       bool use_callchain)
 {
 	int ret;
 	struct perf_hpp hpp = {
 		.buf		= bf,
 		.size		= size,
 	};
+	struct hists *hists = he->hists;
 	u64 total_period = hists->stats.total_period;
 
 	if (size == 0 || size > bfsz)
@@ -512,7 +513,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 
 	ret = fprintf(fp, "%s\n", bf);
 
-	if (symbol_conf.use_callchain)
+	if (use_callchain)
 		ret += hist_entry_callchain__fprintf(he, total_period, 0, fp);
 
 	return ret;
@@ -548,7 +549,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 				    struct perf_hpp_list_node, list);
 
 	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
-		fmt->header(fmt, hpp, hists_to_evsel(hists));
+		fmt->header(fmt, hpp, hists);
 		fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
 	}
 
@@ -568,7 +569,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 				header_width += fprintf(fp, "+");
 			first_col = false;
 
-			fmt->header(fmt, hpp, hists_to_evsel(hists));
+			fmt->header(fmt, hpp, hists);
 
 			header_width += fprintf(fp, "%s", trim(hpp->buf));
 		}
@@ -589,7 +590,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 			fprintf(fp, "%s", sep ?: "..");
 		first_col = false;
 
-		width = fmt->width(fmt, hpp, hists_to_evsel(hists));
+		width = fmt->width(fmt, hpp, hists);
 		fprintf(fp, "%.*s", width, dots);
 	}
 
@@ -606,7 +607,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 				width++;  /* for '+' sign between column header */
 			first_col = false;
 
-			width += fmt->width(fmt, hpp, hists_to_evsel(hists));
+			width += fmt->width(fmt, hpp, hists);
 		}
 
 		if (width > header_width)
@@ -622,47 +623,31 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 	return 2;
 }
 
-size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
-		      int max_cols, float min_pcnt, FILE *fp)
+static int
+hists__fprintf_hierarchy_headers(struct hists *hists,
+				 struct perf_hpp *hpp,
+				 FILE *fp)
 {
-	struct perf_hpp_fmt *fmt;
 	struct perf_hpp_list_node *fmt_node;
-	struct rb_node *nd;
-	size_t ret = 0;
-	unsigned int width;
-	const char *sep = symbol_conf.field_sep;
-	int nr_rows = 0;
-	char bf[96];
-	struct perf_hpp dummy_hpp = {
-		.buf	= bf,
-		.size	= sizeof(bf),
-	};
-	bool first = true;
-	size_t linesz;
-	char *line = NULL;
-	unsigned indent;
-
-	init_rem_hits();
-
-	hists__for_each_format(hists, fmt)
-		perf_hpp__reset_width(fmt, hists);
-
-	if (symbol_conf.col_width_list_str)
-		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
+	struct perf_hpp_fmt *fmt;
 
-	if (!show_header)
-		goto print_entries;
+	list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+			perf_hpp__reset_width(fmt, hists);
+	}
 
-	fprintf(fp, "# ");
+	return print_hierarchy_header(hists, hpp, symbol_conf.field_sep, fp);
+}
 
-	if (symbol_conf.report_hierarchy) {
-		list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
-			perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
-				perf_hpp__reset_width(fmt, hists);
-		}
-		nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp);
-		goto print_entries;
-	}
+static int
+hists__fprintf_standard_headers(struct hists *hists,
+				struct perf_hpp *hpp,
+				FILE *fp)
+{
+	struct perf_hpp_fmt *fmt;
+	unsigned int width;
+	const char *sep = symbol_conf.field_sep;
+	bool first = true;
 
 	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
@@ -673,16 +658,14 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		else
 			first = false;
 
-		fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
-		fprintf(fp, "%s", bf);
+		fmt->header(fmt, hpp, hists);
+		fprintf(fp, "%s", hpp->buf);
 	}
 
 	fprintf(fp, "\n");
-	if (max_rows && ++nr_rows >= max_rows)
-		goto out;
 
 	if (sep)
-		goto print_entries;
+		return 1;
 
 	first = true;
 
@@ -699,20 +682,60 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		else
 			first = false;
 
-		width = fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
+		width = fmt->width(fmt, hpp, hists);
 		for (i = 0; i < width; i++)
 			fprintf(fp, ".");
 	}
 
 	fprintf(fp, "\n");
-	if (max_rows && ++nr_rows >= max_rows)
-		goto out;
-
 	fprintf(fp, "#\n");
-	if (max_rows && ++nr_rows >= max_rows)
+	return 3;
+}
+
+static int hists__fprintf_headers(struct hists *hists, FILE *fp)
+{
+	char bf[96];
+	struct perf_hpp dummy_hpp = {
+		.buf	= bf,
+		.size	= sizeof(bf),
+	};
+
+	fprintf(fp, "# ");
+
+	if (symbol_conf.report_hierarchy)
+		return hists__fprintf_hierarchy_headers(hists, &dummy_hpp, fp);
+	else
+		return hists__fprintf_standard_headers(hists, &dummy_hpp, fp);
+
+}
+
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
+		      int max_cols, float min_pcnt, FILE *fp,
+		      bool use_callchain)
+{
+	struct perf_hpp_fmt *fmt;
+	struct rb_node *nd;
+	size_t ret = 0;
+	const char *sep = symbol_conf.field_sep;
+	int nr_rows = 0;
+	size_t linesz;
+	char *line = NULL;
+	unsigned indent;
+
+	init_rem_hits();
+
+	hists__for_each_format(hists, fmt)
+		perf_hpp__reset_width(fmt, hists);
+
+	if (symbol_conf.col_width_list_str)
+		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
+
+	if (show_header)
+		nr_rows += hists__fprintf_headers(hists, fp);
+
+	if (max_rows && nr_rows >= max_rows)
 		goto out;
 
-print_entries:
 	linesz = hists__sort_list_width(hists) + 3 + 1;
 	linesz += perf_hpp__color_overhead();
 	line = malloc(linesz);
@@ -734,7 +757,7 @@ print_entries:
 		if (percent < min_pcnt)
 			continue;
 
-		ret += hist_entry__fprintf(h, max_cols, hists, line, linesz, fp);
+		ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, use_callchain);
 
 		if (max_rows && ++nr_rows >= max_rows)
 			break;
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index 7dfeba0a91f3..4ea2ba861fc2 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -1,3 +1,4 @@
+#include <errno.h>
 #include <signal.h>
 #include <stdbool.h>
 #ifdef HAVE_BACKTRACE_SUPPORT
@@ -6,6 +7,7 @@
 
 #include "../../util/cache.h"
 #include "../../util/debug.h"
+#include "../../util/util.h"
 #include "../browser.h"
 #include "../helpline.h"
 #include "../ui.h"
diff --git a/tools/perf/ui/ui.h b/tools/perf/ui/ui.h
index ab88383f8be8..4b6fb6c7a542 100644
--- a/tools/perf/ui/ui.h
+++ b/tools/perf/ui/ui.h
@@ -26,4 +26,8 @@ static inline void ui__exit(bool wait_for_ok __maybe_unused) {}
 
 void ui__refresh_dimensions(bool force);
 
+struct option;
+
+int stdio__config_color(const struct option *opt, const char *mode, int unset);
+
 #endif /* _PERF_UI_H_ */
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8c6c8a0ca642..2fa7d8b69873 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -70,6 +70,7 @@ libperf-y += stat.o
 libperf-y += stat-shadow.o
 libperf-y += record.o
 libperf-y += srcline.o
+libperf-y += str_error_r.o
 libperf-y += data.o
 libperf-y += tsc.o
 libperf-y += cloexec.o
@@ -84,6 +85,7 @@ libperf-y += parse-regs-options.o
 libperf-y += term.o
 libperf-y += help-unknown-cmd.o
 libperf-y += mem-events.o
+libperf-y += vsprintf.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -99,7 +101,10 @@ libperf-$(CONFIG_DWARF) += probe-finder.o
 libperf-$(CONFIG_DWARF) += dwarf-aux.o
 
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind-local.o
 libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LIBUNWIND_X86)      += libunwind/x86_32.o
+libperf-$(CONFIG_LIBUNWIND_AARCH64)  += libunwind/arm64.o
 
 libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
 
@@ -108,6 +113,7 @@ libperf-y += scripting-engines/
 libperf-$(CONFIG_ZLIB) += zlib.o
 libperf-$(CONFIG_LZMA) += lzma.o
 libperf-y += demangle-java.o
+libperf-y += demangle-rust.o
 
 ifdef CONFIG_JITDUMP
 libperf-$(CONFIG_LIBELF) += jitdump.o
@@ -170,6 +176,14 @@ $(OUTPUT)util/libstring.o: ../lib/string.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
 
+$(OUTPUT)util/str_error_r.o: ../lib/str_error_r.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
 $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c
index c0b43ee40d95..6455471d9cd1 100644
--- a/tools/perf/util/alias.c
+++ b/tools/perf/util/alias.c
@@ -1,4 +1,6 @@
 #include "cache.h"
+#include "util.h"
+#include "config.h"
 
 static const char *alias_key;
 static char *alias_val;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7e5a1e8874ce..e9825fe825fd 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -354,6 +354,15 @@ static struct ins_ops nop_ops = {
 	.scnprintf = nop__scnprintf,
 };
 
+static struct ins_ops ret_ops = {
+	.scnprintf = ins__raw_scnprintf,
+};
+
+bool ins__is_ret(const struct ins *ins)
+{
+	return ins->ops == &ret_ops;
+}
+
 static struct ins instructions[] = {
 	{ .name = "add",   .ops  = &mov_ops, },
 	{ .name = "addl",  .ops  = &mov_ops, },
@@ -444,6 +453,7 @@ static struct ins instructions[] = {
 	{ .name = "xadd",  .ops  = &mov_ops, },
 	{ .name = "xbeginl", .ops  = &jump_ops, },
 	{ .name = "xbeginq", .ops  = &jump_ops, },
+	{ .name = "retq",  .ops  = &ret_ops, },
 };
 
 static int ins__key_cmp(const void *name, const void *insp)
@@ -1512,13 +1522,14 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
 	const char *d_filename;
 	const char *evsel_name = perf_evsel__name(evsel);
 	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evsel->idx);
 	struct disasm_line *pos, *queue = NULL;
 	u64 start = map__rip_2objdump(map, sym->start);
 	int printed = 2, queue_len = 0;
 	int more = 0;
 	u64 len;
 	int width = 8;
-	int namelen, evsel_name_len, graph_dotted_len;
+	int graph_dotted_len;
 
 	filename = strdup(dso->long_name);
 	if (!filename)
@@ -1530,17 +1541,14 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
 		d_filename = basename(filename);
 
 	len = symbol__size(sym);
-	namelen = strlen(d_filename);
-	evsel_name_len = strlen(evsel_name);
 
 	if (perf_evsel__is_group_event(evsel))
 		width *= evsel->nr_members;
 
-	printf(" %-*.*s|	Source code & Disassembly of %s for %s\n",
-	       width, width, "Percent", d_filename, evsel_name);
+	graph_dotted_len = printf(" %-*.*s|	Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
+	       width, width, "Percent", d_filename, evsel_name, h->sum);
 
-	graph_dotted_len = width + namelen + evsel_name_len;
-	printf("-%-*.*s-----------------------------------------\n",
+	printf("%-*.*s----\n",
 	       graph_dotted_len, graph_dotted_len, graph_dotted_line);
 
 	if (verbose)
@@ -1676,11 +1684,6 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 	return 0;
 }
 
-int hist_entry__annotate(struct hist_entry *he, size_t privsize)
-{
-	return symbol__annotate(he->ms.sym, he->ms.map, privsize);
-}
-
 bool ui__has_annotation(void)
 {
 	return use_browser == 1 && perf_hpp_list.sym;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 9241f8c2b7e1..a23084f54128 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -48,6 +48,7 @@ struct ins {
 
 bool ins__is_jump(const struct ins *ins);
 bool ins__is_call(const struct ins *ins);
+bool ins__is_ret(const struct ins *ins);
 int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
 
 struct annotation;
@@ -156,8 +157,6 @@ void symbol__annotate_zero_histograms(struct symbol *sym);
 
 int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
 
-int hist_entry__annotate(struct hist_entry *he, size_t privsize);
-
 int symbol__annotate_init(struct map *map, struct symbol *sym);
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
 			    struct perf_evsel *evsel, bool full_paths,
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 767989e0e312..ac5f0d7167e6 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -63,6 +63,7 @@ enum itrace_period_type {
  * @calls: limit branch samples to calls (can be combined with @returns)
  * @returns: limit branch samples to returns (can be combined with @calls)
  * @callchain: add callchain to 'instructions' events
+ * @thread_stack: feed branches to the thread_stack
  * @last_branch: add branch context to 'instruction' events
  * @callchain_sz: maximum callchain size
  * @last_branch_sz: branch context size
@@ -82,6 +83,7 @@ struct itrace_synth_opts {
 	bool			calls;
 	bool			returns;
 	bool			callchain;
+	bool			thread_stack;
 	bool			last_branch;
 	unsigned int		callchain_sz;
 	unsigned int		last_branch_sz;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 493307d1414c..1f12e4e40006 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -37,6 +37,9 @@ DEFINE_PRINT_FN(info, 1)
 DEFINE_PRINT_FN(debug, 1)
 
 struct bpf_prog_priv {
+	bool is_tp;
+	char *sys_name;
+	char *evt_name;
 	struct perf_probe_event pev;
 	bool need_prologue;
 	struct bpf_insn *insns_buf;
@@ -118,6 +121,8 @@ clear_prog_priv(struct bpf_program *prog __maybe_unused,
 	cleanup_perf_probe_events(&priv->pev, 1);
 	zfree(&priv->insns_buf);
 	zfree(&priv->type_mapping);
+	zfree(&priv->sys_name);
+	zfree(&priv->evt_name);
 	free(priv);
 }
 
@@ -269,7 +274,8 @@ nextline:
 }
 
 static int
-parse_prog_config(const char *config_str, struct perf_probe_event *pev)
+parse_prog_config(const char *config_str, const char **p_main_str,
+		  bool *is_tp, struct perf_probe_event *pev)
 {
 	int err;
 	const char *main_str = parse_prog_config_kvpair(config_str, pev);
@@ -277,6 +283,22 @@ parse_prog_config(const char *config_str, struct perf_probe_event *pev)
 	if (IS_ERR(main_str))
 		return PTR_ERR(main_str);
 
+	*p_main_str = main_str;
+	if (!strchr(main_str, '=')) {
+		/* Is a tracepoint event? */
+		const char *s = strchr(main_str, ':');
+
+		if (!s) {
+			pr_debug("bpf: '%s' is not a valid tracepoint\n",
+				 config_str);
+			return -BPF_LOADER_ERRNO__CONFIG;
+		}
+
+		*is_tp = true;
+		return 0;
+	}
+
+	*is_tp = false;
 	err = parse_perf_probe_command(main_str, pev);
 	if (err < 0) {
 		pr_debug("bpf: '%s' is not a valid config string\n",
@@ -292,7 +314,8 @@ config_bpf_program(struct bpf_program *prog)
 {
 	struct perf_probe_event *pev = NULL;
 	struct bpf_prog_priv *priv = NULL;
-	const char *config_str;
+	const char *config_str, *main_str;
+	bool is_tp = false;
 	int err;
 
 	/* Initialize per-program probing setting */
@@ -313,10 +336,19 @@ config_bpf_program(struct bpf_program *prog)
 	pev = &priv->pev;
 
 	pr_debug("bpf: config program '%s'\n", config_str);
-	err = parse_prog_config(config_str, pev);
+	err = parse_prog_config(config_str, &main_str, &is_tp, pev);
 	if (err)
 		goto errout;
 
+	if (is_tp) {
+		char *s = strchr(main_str, ':');
+
+		priv->is_tp = true;
+		priv->sys_name = strndup(main_str, s - main_str);
+		priv->evt_name = strdup(s + 1);
+		goto set_priv;
+	}
+
 	if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
 		pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
 			 config_str, PERF_BPF_PROBE_GROUP);
@@ -339,7 +371,8 @@ config_bpf_program(struct bpf_program *prog)
 	}
 	pr_debug("bpf: config '%s' is ok\n", config_str);
 
-	err = bpf_program__set_private(prog, priv, clear_prog_priv);
+set_priv:
+	err = bpf_program__set_priv(prog, priv, clear_prog_priv);
 	if (err) {
 		pr_debug("Failed to set priv for program '%s'\n", config_str);
 		goto errout;
@@ -380,15 +413,14 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
 		     struct bpf_insn *orig_insns, int orig_insns_cnt,
 		     struct bpf_prog_prep_result *res)
 {
+	struct bpf_prog_priv *priv = bpf_program__priv(prog);
 	struct probe_trace_event *tev;
 	struct perf_probe_event *pev;
-	struct bpf_prog_priv *priv;
 	struct bpf_insn *buf;
 	size_t prologue_cnt = 0;
 	int i, err;
 
-	err = bpf_program__get_private(prog, (void **)&priv);
-	if (err || !priv)
+	if (IS_ERR(priv) || !priv || priv->is_tp)
 		goto errout;
 
 	pev = &priv->pev;
@@ -535,17 +567,21 @@ static int map_prologue(struct perf_probe_event *pev, int *mapping,
 
 static int hook_load_preprocessor(struct bpf_program *prog)
 {
+	struct bpf_prog_priv *priv = bpf_program__priv(prog);
 	struct perf_probe_event *pev;
-	struct bpf_prog_priv *priv;
 	bool need_prologue = false;
 	int err, i;
 
-	err = bpf_program__get_private(prog, (void **)&priv);
-	if (err || !priv) {
+	if (IS_ERR(priv) || !priv) {
 		pr_debug("Internal error when hook preprocessor\n");
 		return -BPF_LOADER_ERRNO__INTERNAL;
 	}
 
+	if (priv->is_tp) {
+		priv->need_prologue = false;
+		return 0;
+	}
+
 	pev = &priv->pev;
 	for (i = 0; i < pev->ntevs; i++) {
 		struct probe_trace_event *tev = &pev->tevs[i];
@@ -607,9 +643,18 @@ int bpf__probe(struct bpf_object *obj)
 		if (err)
 			goto out;
 
-		err = bpf_program__get_private(prog, (void **)&priv);
-		if (err || !priv)
+		priv = bpf_program__priv(prog);
+		if (IS_ERR(priv) || !priv) {
+			err = PTR_ERR(priv);
 			goto out;
+		}
+
+		if (priv->is_tp) {
+			bpf_program__set_tracepoint(prog);
+			continue;
+		}
+
+		bpf_program__set_kprobe(prog);
 		pev = &priv->pev;
 
 		err = convert_perf_probe_events(pev, 1);
@@ -645,13 +690,12 @@ int bpf__unprobe(struct bpf_object *obj)
 {
 	int err, ret = 0;
 	struct bpf_program *prog;
-	struct bpf_prog_priv *priv;
 
 	bpf_object__for_each_program(prog, obj) {
+		struct bpf_prog_priv *priv = bpf_program__priv(prog);
 		int i;
 
-		err = bpf_program__get_private(prog, (void **)&priv);
-		if (err || !priv)
+		if (IS_ERR(priv) || !priv || priv->is_tp)
 			continue;
 
 		for (i = 0; i < priv->pev.ntevs; i++) {
@@ -694,26 +738,34 @@ int bpf__load(struct bpf_object *obj)
 	return 0;
 }
 
-int bpf__foreach_tev(struct bpf_object *obj,
-		     bpf_prog_iter_callback_t func,
-		     void *arg)
+int bpf__foreach_event(struct bpf_object *obj,
+		       bpf_prog_iter_callback_t func,
+		       void *arg)
 {
 	struct bpf_program *prog;
 	int err;
 
 	bpf_object__for_each_program(prog, obj) {
+		struct bpf_prog_priv *priv = bpf_program__priv(prog);
 		struct probe_trace_event *tev;
 		struct perf_probe_event *pev;
-		struct bpf_prog_priv *priv;
 		int i, fd;
 
-		err = bpf_program__get_private(prog,
-				(void **)&priv);
-		if (err || !priv) {
+		if (IS_ERR(priv) || !priv) {
 			pr_debug("bpf: failed to get private field\n");
 			return -BPF_LOADER_ERRNO__INTERNAL;
 		}
 
+		if (priv->is_tp) {
+			fd = bpf_program__fd(prog);
+			err = (*func)(priv->sys_name, priv->evt_name, fd, arg);
+			if (err) {
+				pr_debug("bpf: tracepoint call back failed, stop iterate\n");
+				return err;
+			}
+			continue;
+		}
+
 		pev = &priv->pev;
 		for (i = 0; i < pev->ntevs; i++) {
 			tev = &pev->tevs[i];
@@ -731,7 +783,7 @@ int bpf__foreach_tev(struct bpf_object *obj,
 				return fd;
 			}
 
-			err = (*func)(tev, fd, arg);
+			err = (*func)(tev->group, tev->event, fd, arg);
 			if (err) {
 				pr_debug("bpf: call back failed, stop iterate\n");
 				return err;
@@ -897,15 +949,12 @@ bpf_map_priv__clone(struct bpf_map_priv *priv)
 static int
 bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
 {
-	struct bpf_map_priv *priv;
-	const char *map_name;
-	int err;
+	const char *map_name = bpf_map__name(map);
+	struct bpf_map_priv *priv = bpf_map__priv(map);
 
-	map_name = bpf_map__get_name(map);
-	err = bpf_map__get_private(map, (void **)&priv);
-	if (err) {
+	if (IS_ERR(priv)) {
 		pr_debug("Failed to get private from map %s\n", map_name);
-		return err;
+		return PTR_ERR(priv);
 	}
 
 	if (!priv) {
@@ -916,7 +965,7 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
 		}
 		INIT_LIST_HEAD(&priv->ops_list);
 
-		if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+		if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) {
 			free(priv);
 			return -BPF_LOADER_ERRNO__INTERNAL;
 		}
@@ -948,30 +997,26 @@ static int
 __bpf_map__config_value(struct bpf_map *map,
 			struct parse_events_term *term)
 {
-	struct bpf_map_def def;
 	struct bpf_map_op *op;
-	const char *map_name;
-	int err;
+	const char *map_name = bpf_map__name(map);
+	const struct bpf_map_def *def = bpf_map__def(map);
 
-	map_name = bpf_map__get_name(map);
-
-	err = bpf_map__get_def(map, &def);
-	if (err) {
+	if (IS_ERR(def)) {
 		pr_debug("Unable to get map definition from '%s'\n",
 			 map_name);
 		return -BPF_LOADER_ERRNO__INTERNAL;
 	}
 
-	if (def.type != BPF_MAP_TYPE_ARRAY) {
+	if (def->type != BPF_MAP_TYPE_ARRAY) {
 		pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
 			 map_name);
 		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
 	}
-	if (def.key_size < sizeof(unsigned int)) {
+	if (def->key_size < sizeof(unsigned int)) {
 		pr_debug("Map %s has incorrect key size\n", map_name);
 		return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
 	}
-	switch (def.value_size) {
+	switch (def->value_size) {
 	case 1:
 	case 2:
 	case 4:
@@ -1014,12 +1059,10 @@ __bpf_map__config_event(struct bpf_map *map,
 			struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
-	struct bpf_map_def def;
+	const struct bpf_map_def *def;
 	struct bpf_map_op *op;
-	const char *map_name;
-	int err;
+	const char *map_name = bpf_map__name(map);
 
-	map_name = bpf_map__get_name(map);
 	evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
 	if (!evsel) {
 		pr_debug("Event (for '%s') '%s' doesn't exist\n",
@@ -1027,18 +1070,18 @@ __bpf_map__config_event(struct bpf_map *map,
 		return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
 	}
 
-	err = bpf_map__get_def(map, &def);
-	if (err) {
+	def = bpf_map__def(map);
+	if (IS_ERR(def)) {
 		pr_debug("Unable to get map definition from '%s'\n",
 			 map_name);
-		return err;
+		return PTR_ERR(def);
 	}
 
 	/*
 	 * No need to check key_size and value_size:
 	 * kernel has already checked them.
 	 */
-	if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+	if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
 		pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
 			 map_name);
 		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1087,9 +1130,8 @@ config_map_indices_range_check(struct parse_events_term *term,
 			       const char *map_name)
 {
 	struct parse_events_array *array = &term->array;
-	struct bpf_map_def def;
+	const struct bpf_map_def *def;
 	unsigned int i;
-	int err;
 
 	if (!array->nr_ranges)
 		return 0;
@@ -1099,8 +1141,8 @@ config_map_indices_range_check(struct parse_events_term *term,
 		return -BPF_LOADER_ERRNO__INTERNAL;
 	}
 
-	err = bpf_map__get_def(map, &def);
-	if (err) {
+	def = bpf_map__def(map);
+	if (IS_ERR(def)) {
 		pr_debug("ERROR: Unable to get map definition from '%s'\n",
 			 map_name);
 		return -BPF_LOADER_ERRNO__INTERNAL;
@@ -1111,7 +1153,7 @@ config_map_indices_range_check(struct parse_events_term *term,
 		size_t length = array->ranges[i].length;
 		unsigned int idx = start + length - 1;
 
-		if (idx >= def.max_entries) {
+		if (idx >= def->max_entries) {
 			pr_debug("ERROR: index %d too large\n", idx);
 			return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
 		}
@@ -1147,7 +1189,7 @@ bpf__obj_config_map(struct bpf_object *obj,
 		goto out;
 	}
 
-	map = bpf_object__get_map_by_name(obj, map_name);
+	map = bpf_object__find_map_by_name(obj, map_name);
 	if (!map) {
 		pr_debug("ERROR: Map %s doesn't exist\n", map_name);
 		err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
@@ -1204,14 +1246,14 @@ out:
 }
 
 typedef int (*map_config_func_t)(const char *name, int map_fd,
-				 struct bpf_map_def *pdef,
+				 const struct bpf_map_def *pdef,
 				 struct bpf_map_op *op,
 				 void *pkey, void *arg);
 
 static int
 foreach_key_array_all(map_config_func_t func,
 		      void *arg, const char *name,
-		      int map_fd, struct bpf_map_def *pdef,
+		      int map_fd, const struct bpf_map_def *pdef,
 		      struct bpf_map_op *op)
 {
 	unsigned int i;
@@ -1231,7 +1273,7 @@ foreach_key_array_all(map_config_func_t func,
 static int
 foreach_key_array_ranges(map_config_func_t func, void *arg,
 			 const char *name, int map_fd,
-			 struct bpf_map_def *pdef,
+			 const struct bpf_map_def *pdef,
 			 struct bpf_map_op *op)
 {
 	unsigned int i, j;
@@ -1261,15 +1303,12 @@ bpf_map_config_foreach_key(struct bpf_map *map,
 			   void *arg)
 {
 	int err, map_fd;
-	const char *name;
 	struct bpf_map_op *op;
-	struct bpf_map_def def;
-	struct bpf_map_priv *priv;
+	const struct bpf_map_def *def;
+	const char *name = bpf_map__name(map);
+	struct bpf_map_priv *priv = bpf_map__priv(map);
 
-	name = bpf_map__get_name(map);
-
-	err = bpf_map__get_private(map, (void **)&priv);
-	if (err) {
+	if (IS_ERR(priv)) {
 		pr_debug("ERROR: failed to get private from map %s\n", name);
 		return -BPF_LOADER_ERRNO__INTERNAL;
 	}
@@ -1278,29 +1317,29 @@ bpf_map_config_foreach_key(struct bpf_map *map,
 		return 0;
 	}
 
-	err = bpf_map__get_def(map, &def);
-	if (err) {
+	def = bpf_map__def(map);
+	if (IS_ERR(def)) {
 		pr_debug("ERROR: failed to get definition from map %s\n", name);
 		return -BPF_LOADER_ERRNO__INTERNAL;
 	}
-	map_fd = bpf_map__get_fd(map);
+	map_fd = bpf_map__fd(map);
 	if (map_fd < 0) {
 		pr_debug("ERROR: failed to get fd from map %s\n", name);
 		return map_fd;
 	}
 
 	list_for_each_entry(op, &priv->ops_list, list) {
-		switch (def.type) {
+		switch (def->type) {
 		case BPF_MAP_TYPE_ARRAY:
 		case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
 			switch (op->key_type) {
 			case BPF_MAP_KEY_ALL:
 				err = foreach_key_array_all(func, arg, name,
-							    map_fd, &def, op);
+							    map_fd, def, op);
 				break;
 			case BPF_MAP_KEY_RANGES:
 				err = foreach_key_array_ranges(func, arg, name,
-							       map_fd, &def,
+							       map_fd, def,
 							       op);
 				break;
 			default:
@@ -1410,7 +1449,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
 
 static int
 apply_obj_config_map_for_key(const char *name, int map_fd,
-			     struct bpf_map_def *pdef __maybe_unused,
+			     const struct bpf_map_def *pdef,
 			     struct bpf_map_op *op,
 			     void *pkey, void *arg __maybe_unused)
 {
@@ -1475,9 +1514,9 @@ int bpf__apply_obj_config(void)
 
 #define bpf__for_each_stdout_map(pos, obj, objtmp)	\
 	bpf__for_each_map(pos, obj, objtmp) 		\
-		if (bpf_map__get_name(pos) && 		\
+		if (bpf_map__name(pos) && 		\
 			(strcmp("__bpf_stdout__", 	\
-				bpf_map__get_name(pos)) == 0))
+				bpf_map__name(pos)) == 0))
 
 int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
 {
@@ -1489,10 +1528,9 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
 	bool need_init = false;
 
 	bpf__for_each_stdout_map(map, obj, tmp) {
-		struct bpf_map_priv *priv;
+		struct bpf_map_priv *priv = bpf_map__priv(map);
 
-		err = bpf_map__get_private(map, (void **)&priv);
-		if (err)
+		if (IS_ERR(priv))
 			return -BPF_LOADER_ERRNO__INTERNAL;
 
 		/*
@@ -1520,10 +1558,9 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
 	}
 
 	bpf__for_each_stdout_map(map, obj, tmp) {
-		struct bpf_map_priv *priv;
+		struct bpf_map_priv *priv = bpf_map__priv(map);
 
-		err = bpf_map__get_private(map, (void **)&priv);
-		if (err)
+		if (IS_ERR(priv))
 			return -BPF_LOADER_ERRNO__INTERNAL;
 		if (priv)
 			continue;
@@ -1533,7 +1570,7 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
 			if (!priv)
 				return -ENOMEM;
 
-			err = bpf_map__set_private(map, priv, bpf_map_priv__clear);
+			err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
 			if (err) {
 				bpf_map_priv__clear(map, priv);
 				return err;
@@ -1607,7 +1644,7 @@ bpf_loader_strerror(int err, char *buf, size_t size)
 		snprintf(buf, size, "Unknown bpf loader error %d", err);
 	else
 		snprintf(buf, size, "%s",
-			 strerror_r(err, sbuf, sizeof(sbuf)));
+			 str_error_r(err, sbuf, sizeof(sbuf)));
 
 	buf[size - 1] = '\0';
 	return -1;
@@ -1677,7 +1714,7 @@ int bpf__strerror_load(struct bpf_object *obj,
 {
 	bpf__strerror_head(err, buf, size);
 	case LIBBPF_ERRNO__KVER: {
-		unsigned int obj_kver = bpf_object__get_kversion(obj);
+		unsigned int obj_kver = bpf_object__kversion(obj);
 		unsigned int real_kver;
 
 		if (fetch_kernel_version(&real_kver, NULL, 0)) {
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 941e17275aa7..f2b737b225f2 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -46,7 +46,7 @@ struct bpf_object;
 struct parse_events_term;
 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
 
-typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
+typedef int (*bpf_prog_iter_callback_t)(const char *group, const char *event,
 					int fd, void *arg);
 
 #ifdef HAVE_LIBBPF_SUPPORT
@@ -67,8 +67,8 @@ int bpf__strerror_probe(struct bpf_object *obj, int err,
 int bpf__load(struct bpf_object *obj);
 int bpf__strerror_load(struct bpf_object *obj, int err,
 		       char *buf, size_t size);
-int bpf__foreach_tev(struct bpf_object *obj,
-		     bpf_prog_iter_callback_t func, void *arg);
+int bpf__foreach_event(struct bpf_object *obj,
+		       bpf_prog_iter_callback_t func, void *arg);
 
 int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
 		    struct perf_evlist *evlist, int *error_pos);
@@ -107,9 +107,9 @@ static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0
 static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; }
 
 static inline int
-bpf__foreach_tev(struct bpf_object *obj __maybe_unused,
-		 bpf_prog_iter_callback_t func __maybe_unused,
-		 void *arg __maybe_unused)
+bpf__foreach_event(struct bpf_object *obj __maybe_unused,
+		   bpf_prog_iter_callback_t func __maybe_unused,
+		   void *arg __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 67e5966503b2..5651f3c12f93 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -17,6 +17,7 @@
 #include "tool.h"
 #include "header.h"
 #include "vdso.h"
+#include "probe-file.h"
 
 
 static bool no_buildid_cache;
@@ -144,7 +145,28 @@ static int asnprintf(char **strp, size_t size, const char *fmt, ...)
 	return ret;
 }
 
-static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+				    size_t size)
+{
+	bool retry_old = true;
+
+	snprintf(bf, size, "%s/%s/%s/kallsyms",
+		 buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+retry:
+	if (!access(bf, F_OK))
+		return bf;
+	if (retry_old) {
+		/* Try old style kallsyms cache */
+		snprintf(bf, size, "%s/%s/%s",
+			 buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+		retry_old = false;
+		goto retry;
+	}
+
+	return NULL;
+}
+
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size)
 {
 	char *tmp = bf;
 	int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
@@ -154,23 +176,107 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
 	return bf;
 }
 
+char *build_id_cache__origname(const char *sbuild_id)
+{
+	char *linkname;
+	char buf[PATH_MAX];
+	char *ret = NULL, *p;
+	size_t offs = 5;	/* == strlen("../..") */
+
+	linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+	if (!linkname)
+		return NULL;
+
+	if (readlink(linkname, buf, PATH_MAX) < 0)
+		goto out;
+	/* The link should be "../..<origpath>/<sbuild_id>" */
+	p = strrchr(buf, '/');	/* Cut off the "/<sbuild_id>" */
+	if (p && (p > buf + offs)) {
+		*p = '\0';
+		if (buf[offs + 1] == '[')
+			offs++;	/*
+				 * This is a DSO name, like [kernel.kallsyms].
+				 * Skip the first '/', since this is not the
+				 * cache of a regular file.
+				 */
+		ret = strdup(buf + offs);	/* Skip "../..[/]" */
+	}
+out:
+	free(linkname);
+	return ret;
+}
+
+/* Check if the given build_id cache is valid on current running system */
+static bool build_id_cache__valid_id(char *sbuild_id)
+{
+	char real_sbuild_id[SBUILD_ID_SIZE] = "";
+	char *pathname;
+	int ret = 0;
+	bool result = false;
+
+	pathname = build_id_cache__origname(sbuild_id);
+	if (!pathname)
+		return false;
+
+	if (!strcmp(pathname, DSO__NAME_KALLSYMS))
+		ret = sysfs__sprintf_build_id("/", real_sbuild_id);
+	else if (pathname[0] == '/')
+		ret = filename__sprintf_build_id(pathname, real_sbuild_id);
+	else
+		ret = -EINVAL;	/* Should we support other special DSO cache? */
+	if (ret >= 0)
+		result = (strcmp(sbuild_id, real_sbuild_id) == 0);
+	free(pathname);
+
+	return result;
+}
+
+static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso)
+{
+	return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : "elf");
+}
+
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
 {
-	char build_id_hex[SBUILD_ID_SIZE];
+	bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+	bool is_vdso = dso__is_vdso((struct dso *)dso);
+	char sbuild_id[SBUILD_ID_SIZE];
+	char *linkname;
+	bool alloc = (bf == NULL);
+	int ret;
 
 	if (!dso->has_build_id)
 		return NULL;
 
-	build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex);
-	return build_id__filename(build_id_hex, bf, size);
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+	linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+	if (!linkname)
+		return NULL;
+
+	/* Check if old style build_id cache */
+	if (is_regular_file(linkname))
+		ret = asnprintf(&bf, size, "%s", linkname);
+	else
+		ret = asnprintf(&bf, size, "%s/%s", linkname,
+			 build_id_cache__basename(is_kallsyms, is_vdso));
+	if (ret < 0 || (!alloc && size < (unsigned int)ret))
+		bf = NULL;
+	free(linkname);
+
+	return bf;
 }
 
 bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
 {
-	char *id_name, *ch;
+	char *id_name = NULL, *ch;
 	struct stat sb;
+	char sbuild_id[SBUILD_ID_SIZE];
 
-	id_name = dso__build_id_filename(dso, bf, size);
+	if (!dso->has_build_id)
+		goto err;
+
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+	id_name = build_id_cache__linkname(sbuild_id, NULL, 0);
 	if (!id_name)
 		goto err;
 	if (access(id_name, F_OK))
@@ -194,18 +300,14 @@ bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
 	if (ch - 3 < bf)
 		goto err;
 
+	free(id_name);
 	return strncmp(".ko", ch - 3, 3) == 0;
 err:
-	/*
-	 * If dso__build_id_filename work, get id_name again,
-	 * because id_name points to bf and is broken.
-	 */
-	if (id_name)
-		id_name = dso__build_id_filename(dso, bf, size);
 	pr_err("Invalid build id: %s\n", id_name ? :
 					 dso->long_name ? :
 					 dso->short_name ? :
 					 "[unknown]");
+	free(id_name);
 	return false;
 }
 
@@ -340,8 +442,132 @@ void disable_buildid_cache(void)
 	no_buildid_cache = true;
 }
 
-static char *build_id_cache__dirname_from_path(const char *name,
-					       bool is_kallsyms, bool is_vdso)
+static bool lsdir_bid_head_filter(const char *name __maybe_unused,
+				  struct dirent *d __maybe_unused)
+{
+	return (strlen(d->d_name) == 2) &&
+		isxdigit(d->d_name[0]) && isxdigit(d->d_name[1]);
+}
+
+static bool lsdir_bid_tail_filter(const char *name __maybe_unused,
+				  struct dirent *d __maybe_unused)
+{
+	int i = 0;
+	while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3)
+		i++;
+	return (i == SBUILD_ID_SIZE - 3) && (d->d_name[i] == '\0');
+}
+
+struct strlist *build_id_cache__list_all(bool validonly)
+{
+	struct strlist *toplist, *linklist = NULL, *bidlist;
+	struct str_node *nd, *nd2;
+	char *topdir, *linkdir = NULL;
+	char sbuild_id[SBUILD_ID_SIZE];
+
+	/* for filename__ functions */
+	if (validonly)
+		symbol__init(NULL);
+
+	/* Open the top-level directory */
+	if (asprintf(&topdir, "%s/.build-id/", buildid_dir) < 0)
+		return NULL;
+
+	bidlist = strlist__new(NULL, NULL);
+	if (!bidlist)
+		goto out;
+
+	toplist = lsdir(topdir, lsdir_bid_head_filter);
+	if (!toplist) {
+		pr_debug("Error in lsdir(%s): %d\n", topdir, errno);
+		/* If there is no buildid cache, return an empty list */
+		if (errno == ENOENT)
+			goto out;
+		goto err_out;
+	}
+
+	strlist__for_each_entry(nd, toplist) {
+		if (asprintf(&linkdir, "%s/%s", topdir, nd->s) < 0)
+			goto err_out;
+		/* Open the lower-level directory */
+		linklist = lsdir(linkdir, lsdir_bid_tail_filter);
+		if (!linklist) {
+			pr_debug("Error in lsdir(%s): %d\n", linkdir, errno);
+			goto err_out;
+		}
+		strlist__for_each_entry(nd2, linklist) {
+			if (snprintf(sbuild_id, SBUILD_ID_SIZE, "%s%s",
+				     nd->s, nd2->s) != SBUILD_ID_SIZE - 1)
+				goto err_out;
+			if (validonly && !build_id_cache__valid_id(sbuild_id))
+				continue;
+			if (strlist__add(bidlist, sbuild_id) < 0)
+				goto err_out;
+		}
+		strlist__delete(linklist);
+		zfree(&linkdir);
+	}
+
+out_free:
+	strlist__delete(toplist);
+out:
+	free(topdir);
+
+	return bidlist;
+
+err_out:
+	strlist__delete(linklist);
+	zfree(&linkdir);
+	strlist__delete(bidlist);
+	bidlist = NULL;
+	goto out_free;
+}
+
+static bool str_is_build_id(const char *maybe_sbuild_id, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (!isxdigit(maybe_sbuild_id[i]))
+			return false;
+	}
+	return true;
+}
+
+/* Return the valid complete build-id */
+char *build_id_cache__complement(const char *incomplete_sbuild_id)
+{
+	struct strlist *bidlist;
+	struct str_node *nd, *cand = NULL;
+	char *sbuild_id = NULL;
+	size_t len = strlen(incomplete_sbuild_id);
+
+	if (len >= SBUILD_ID_SIZE ||
+	    !str_is_build_id(incomplete_sbuild_id, len))
+		return NULL;
+
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist)
+		return NULL;
+
+	strlist__for_each_entry(nd, bidlist) {
+		if (strncmp(nd->s, incomplete_sbuild_id, len) != 0)
+			continue;
+		if (cand) {	/* Error: There are more than 2 candidates. */
+			cand = NULL;
+			break;
+		}
+		cand = nd;
+	}
+	if (cand)
+		sbuild_id = strdup(cand->s);
+	strlist__delete(bidlist);
+
+	return sbuild_id;
+}
+
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+			       bool is_kallsyms, bool is_vdso)
 {
 	char *realname = (char *)name, *filename;
 	bool slash = is_kallsyms || is_vdso;
@@ -352,8 +578,9 @@ static char *build_id_cache__dirname_from_path(const char *name,
 			return NULL;
 	}
 
-	if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "",
-		     is_vdso ? DSO__NAME_VDSO : realname) < 0)
+	if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "",
+		     is_vdso ? DSO__NAME_VDSO : realname,
+		     sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
 		filename = NULL;
 
 	if (!slash)
@@ -368,7 +595,7 @@ int build_id_cache__list_build_ids(const char *pathname,
 	char *dir_name;
 	int ret = 0;
 
-	dir_name = build_id_cache__dirname_from_path(pathname, false, false);
+	dir_name = build_id_cache__cachedir(NULL, pathname, false, false);
 	if (!dir_name)
 		return -ENOMEM;
 
@@ -380,12 +607,36 @@ int build_id_cache__list_build_ids(const char *pathname,
 	return ret;
 }
 
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_GELF_GETNOTE_SUPPORT)
+static int build_id_cache__add_sdt_cache(const char *sbuild_id,
+					  const char *realname)
+{
+	struct probe_cache *cache;
+	int ret;
+
+	cache = probe_cache__new(sbuild_id);
+	if (!cache)
+		return -1;
+
+	ret = probe_cache__scan_sdt(cache, realname);
+	if (ret >= 0) {
+		pr_debug("Found %d SDTs in %s\n", ret, realname);
+		if (probe_cache__commit(cache) < 0)
+			ret = -1;
+	}
+	probe_cache__delete(cache);
+	return ret;
+}
+#else
+#define build_id_cache__add_sdt_cache(sbuild_id, realname) (0)
+#endif
+
 int build_id_cache__add_s(const char *sbuild_id, const char *name,
 			  bool is_kallsyms, bool is_vdso)
 {
 	const size_t size = PATH_MAX;
 	char *realname = NULL, *filename = NULL, *dir_name = NULL,
-	     *linkname = zalloc(size), *targetname, *tmp;
+	     *linkname = zalloc(size), *tmp;
 	int err = -1;
 
 	if (!is_kallsyms) {
@@ -394,14 +645,22 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
 			goto out_free;
 	}
 
-	dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso);
+	dir_name = build_id_cache__cachedir(sbuild_id, name,
+					    is_kallsyms, is_vdso);
 	if (!dir_name)
 		goto out_free;
 
+	/* Remove old style build-id cache */
+	if (is_regular_file(dir_name))
+		if (unlink(dir_name))
+			goto out_free;
+
 	if (mkdir_p(dir_name, 0755))
 		goto out_free;
 
-	if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) {
+	/* Save the allocated buildid dirname */
+	if (asprintf(&filename, "%s/%s", dir_name,
+		     build_id_cache__basename(is_kallsyms, is_vdso)) < 0) {
 		filename = NULL;
 		goto out_free;
 	}
@@ -415,7 +674,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
 			goto out_free;
 	}
 
-	if (!build_id__filename(sbuild_id, linkname, size))
+	if (!build_id_cache__linkname(sbuild_id, linkname, size))
 		goto out_free;
 	tmp = strrchr(linkname, '/');
 	*tmp = '\0';
@@ -424,11 +683,16 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
 		goto out_free;
 
 	*tmp = '/';
-	targetname = filename + strlen(buildid_dir) - 5;
-	memcpy(targetname, "../..", 5);
+	tmp = dir_name + strlen(buildid_dir) - 5;
+	memcpy(tmp, "../..", 5);
 
-	if (symlink(targetname, linkname) == 0)
+	if (symlink(tmp, linkname) == 0)
 		err = 0;
+
+	/* Update SDT cache : error is just warned */
+	if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
+		pr_debug("Failed to update/scan SDT cache for %s\n", realname);
+
 out_free:
 	if (!is_kallsyms)
 		free(realname);
@@ -452,7 +716,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
 bool build_id_cache__cached(const char *sbuild_id)
 {
 	bool ret = false;
-	char *filename = build_id__filename(sbuild_id, NULL, 0);
+	char *filename = build_id_cache__linkname(sbuild_id, NULL, 0);
 
 	if (filename && !access(filename, F_OK))
 		ret = true;
@@ -471,7 +735,7 @@ int build_id_cache__remove_s(const char *sbuild_id)
 	if (filename == NULL || linkname == NULL)
 		goto out_free;
 
-	if (!build_id__filename(sbuild_id, linkname, size))
+	if (!build_id_cache__linkname(sbuild_id, linkname, size))
 		goto out_free;
 
 	if (access(linkname, F_OK))
@@ -489,7 +753,7 @@ int build_id_cache__remove_s(const char *sbuild_id)
 	tmp = strrchr(linkname, '/') + 1;
 	snprintf(tmp, size - (tmp - linkname), "%s", filename);
 
-	if (unlink(linkname))
+	if (rm_rf(linkname))
 		goto out_free;
 
 	err = 0;
@@ -501,7 +765,7 @@ out_free:
 
 static int dso__cache_build_id(struct dso *dso, struct machine *machine)
 {
-	bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
+	bool is_kallsyms = dso__is_kallsyms(dso);
 	bool is_vdso = dso__is_vdso(dso);
 	const char *name = dso->long_name;
 	char nm[PATH_MAX];
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 64af3e20610d..d27990610f9f 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -14,6 +14,8 @@ struct dso;
 int build_id__sprintf(const u8 *build_id, int len, char *bf);
 int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
 int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+				    size_t size);
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
 bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
@@ -28,6 +30,12 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
 int perf_session__write_buildid_table(struct perf_session *session, int fd);
 int perf_session__cache_build_ids(struct perf_session *session);
 
+char *build_id_cache__origname(const char *sbuild_id);
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size);
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+			       bool is_kallsyms, bool is_vdso);
+struct strlist *build_id_cache__list_all(bool validonly);
+char *build_id_cache__complement(const char *incomplete_sbuild_id);
 int build_id_cache__list_build_ids(const char *pathname,
 				   struct strlist **result);
 bool build_id_cache__cached(const char *sbuild_id);
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 0d814bb74661..512c0c83fbc6 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -1,40 +1,20 @@
 #ifndef __PERF_CACHE_H
 #define __PERF_CACHE_H
 
-#include <stdbool.h>
-#include "util.h"
 #include "strbuf.h"
 #include <subcmd/pager.h>
-#include "../perf.h"
 #include "../ui/ui.h"
 
 #include <linux/string.h>
 
 #define CMD_EXEC_PATH "--exec-path"
-#define CMD_PERF_DIR "--perf-dir="
-#define CMD_WORK_TREE "--work-tree="
 #define CMD_DEBUGFS_DIR "--debugfs-dir="
 
-#define PERF_DIR_ENVIRONMENT "PERF_DIR"
-#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
 #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
-#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
 #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
 #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
 #define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
 
-extern const char *config_exclusive_filename;
-
-typedef int (*config_fn_t)(const char *, const char *, void *);
-int perf_default_config(const char *, const char *, void *);
-int perf_config(config_fn_t fn, void *);
-int perf_config_int(const char *, const char *);
-u64 perf_config_u64(const char *, const char *);
-int perf_config_bool(const char *, const char *);
-int config_error_nonbool(const char *);
-const char *perf_config_dirname(const char *, const char *);
-const char *perf_etc_perfconfig(void);
-
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
 
@@ -45,9 +25,6 @@ static inline int is_absolute_path(const char *path)
 	return path[0] == '/';
 }
 
-char *strip_path_suffix(const char *path, const char *suffix);
-
 char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
 
 #endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 65e2a4f7cb4e..13e75549c440 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -94,6 +94,7 @@ struct callchain_param {
 	enum perf_call_graph_mode record_mode;
 	u32			dump_size;
 	enum chain_mode 	mode;
+	u16			max_stack;
 	u32			print_limit;
 	double			min_percent;
 	sort_chain_func_t	sort;
@@ -105,6 +106,7 @@ struct callchain_param {
 };
 
 extern struct callchain_param callchain_param;
+extern struct callchain_param callchain_param_default;
 
 struct callchain_list {
 	u64			ip;
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 90aa1b46b2e5..8fdee24725a7 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -81,7 +81,7 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 	/*
 	 * check if cgrp is already defined, if so we reuse it
 	 */
-	evlist__for_each(evlist, counter) {
+	evlist__for_each_entry(evlist, counter) {
 		cgrp = counter->cgrp;
 		if (!cgrp)
 			continue;
@@ -110,7 +110,7 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 	 * if add cgroup N, then need to find event N
 	 */
 	n = 0;
-	evlist__for_each(evlist, counter) {
+	evlist__for_each_entry(evlist, counter) {
 		if (n == nr_cgroups)
 			goto found;
 		n++;
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index 2babddaa2481..f0dcd0ee0afa 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -4,18 +4,24 @@
 #include "cloexec.h"
 #include "asm/bug.h"
 #include "debug.h"
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <sys/syscall.h>
 
 static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
 
-#ifdef __GLIBC_PREREQ
-#if !__GLIBC_PREREQ(2, 6)
 int __weak sched_getcpu(void)
 {
+#ifdef __NR_getcpu
+	unsigned cpu;
+	int err = syscall(__NR_getcpu, &cpu, NULL, NULL);
+	if (!err)
+		return cpu;
+#else
 	errno = ENOSYS;
+#endif
 	return -1;
 }
-#endif
-#endif
 
 static int perf_flag_probe(void)
 {
@@ -58,7 +64,7 @@ static int perf_flag_probe(void)
 
 	WARN_ONCE(err != EINVAL && err != EBUSY,
 		  "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
-		  err, strerror_r(err, sbuf, sizeof(sbuf)));
+		  err, str_error_r(err, sbuf, sizeof(sbuf)));
 
 	/* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */
 	while (1) {
@@ -76,7 +82,7 @@ static int perf_flag_probe(void)
 
 	if (WARN_ONCE(fd < 0 && err != EBUSY,
 		      "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
-		      err, strerror_r(err, sbuf, sizeof(sbuf))))
+		      err, str_error_r(err, sbuf, sizeof(sbuf))))
 		return -1;
 
 	return 0;
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 43e84aa27e4a..dbbf89b050a5 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -1,7 +1,11 @@
 #include <linux/kernel.h>
 #include "cache.h"
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
 #include "color.h"
 #include <math.h>
+#include <unistd.h>
 
 int perf_use_color_default = -1;
 
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index dad7d8272168..18dae745034f 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -26,6 +26,7 @@ static FILE *config_file;
 static const char *config_file_name;
 static int config_linenr;
 static int config_file_eof;
+static struct perf_config_set *config_set;
 
 const char *config_exclusive_filename;
 
@@ -275,7 +276,8 @@ static int perf_parse_file(config_fn_t fn, void *data)
 			break;
 		}
 	}
-	die("bad config file line %d in %s", config_linenr, config_file_name);
+	pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+	return -1;
 }
 
 static int parse_unit_factor(const char *end, unsigned long *val)
@@ -371,7 +373,7 @@ int perf_config_bool(const char *name, const char *value)
 	return !!perf_config_bool_or_int(name, value, &discard);
 }
 
-const char *perf_config_dirname(const char *name, const char *value)
+static const char *perf_config_dirname(const char *name, const char *value)
 {
 	if (!name)
 		return NULL;
@@ -477,54 +479,6 @@ static int perf_config_global(void)
 	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
 }
 
-int perf_config(config_fn_t fn, void *data)
-{
-	int ret = 0, found = 0;
-	const char *home = NULL;
-
-	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
-	if (config_exclusive_filename)
-		return perf_config_from_file(fn, config_exclusive_filename, data);
-	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
-		ret += perf_config_from_file(fn, perf_etc_perfconfig(),
-					    data);
-		found += 1;
-	}
-
-	home = getenv("HOME");
-	if (perf_config_global() && home) {
-		char *user_config = strdup(mkpath("%s/.perfconfig", home));
-		struct stat st;
-
-		if (user_config == NULL) {
-			warning("Not enough memory to process %s/.perfconfig, "
-				"ignoring it.", home);
-			goto out;
-		}
-
-		if (stat(user_config, &st) < 0)
-			goto out_free;
-
-		if (st.st_uid && (st.st_uid != geteuid())) {
-			warning("File %s not owned by current user or root, "
-				"ignoring it.", user_config);
-			goto out_free;
-		}
-
-		if (!st.st_size)
-			goto out_free;
-
-		ret += perf_config_from_file(fn, user_config, data);
-		found += 1;
-out_free:
-		free(user_config);
-	}
-out:
-	if (found == 0)
-		return -1;
-	return ret;
-}
-
 static struct perf_config_section *find_section(struct list_head *sections,
 						const char *section_name)
 {
@@ -609,8 +563,12 @@ static int collect_config(const char *var, const char *value,
 	struct perf_config_section *section = NULL;
 	struct perf_config_item *item = NULL;
 	struct perf_config_set *set = perf_config_set;
-	struct list_head *sections = &set->sections;
+	struct list_head *sections;
+
+	if (set == NULL)
+		return -1;
 
+	sections = &set->sections;
 	key = ptr = strdup(var);
 	if (!key) {
 		pr_debug("%s: strdup failed\n", __func__);
@@ -641,22 +599,115 @@ static int collect_config(const char *var, const char *value,
 
 out_free:
 	free(key);
-	perf_config_set__delete(set);
 	return -1;
 }
 
+static int perf_config_set__init(struct perf_config_set *set)
+{
+	int ret = -1;
+	const char *home = NULL;
+
+	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+	if (config_exclusive_filename)
+		return perf_config_from_file(collect_config, config_exclusive_filename, set);
+	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+		if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
+			goto out;
+	}
+
+	home = getenv("HOME");
+	if (perf_config_global() && home) {
+		char *user_config = strdup(mkpath("%s/.perfconfig", home));
+		struct stat st;
+
+		if (user_config == NULL) {
+			warning("Not enough memory to process %s/.perfconfig, "
+				"ignoring it.", home);
+			goto out;
+		}
+
+		if (stat(user_config, &st) < 0)
+			goto out_free;
+
+		if (st.st_uid && (st.st_uid != geteuid())) {
+			warning("File %s not owned by current user or root, "
+				"ignoring it.", user_config);
+			goto out_free;
+		}
+
+		if (!st.st_size)
+			goto out_free;
+
+		ret = perf_config_from_file(collect_config, user_config, set);
+
+out_free:
+		free(user_config);
+	}
+out:
+	return ret;
+}
+
 struct perf_config_set *perf_config_set__new(void)
 {
 	struct perf_config_set *set = zalloc(sizeof(*set));
 
 	if (set) {
 		INIT_LIST_HEAD(&set->sections);
-		perf_config(collect_config, set);
+		if (perf_config_set__init(set) < 0) {
+			perf_config_set__delete(set);
+			set = NULL;
+		}
 	}
 
 	return set;
 }
 
+int perf_config(config_fn_t fn, void *data)
+{
+	int ret = 0;
+	char key[BUFSIZ];
+	struct perf_config_section *section;
+	struct perf_config_item *item;
+
+	if (config_set == NULL)
+		return -1;
+
+	perf_config_set__for_each_entry(config_set, section, item) {
+		char *value = item->value;
+
+		if (value) {
+			scnprintf(key, sizeof(key), "%s.%s",
+				  section->name, item->name);
+			ret = fn(key, value, data);
+			if (ret < 0) {
+				pr_err("Error: wrong config key-value pair %s=%s\n",
+				       key, value);
+				break;
+			}
+		}
+	}
+
+	return ret;
+}
+
+void perf_config__init(void)
+{
+	if (config_set == NULL)
+		config_set = perf_config_set__new();
+}
+
+void perf_config__exit(void)
+{
+	perf_config_set__delete(config_set);
+	config_set = NULL;
+}
+
+void perf_config__refresh(void)
+{
+	perf_config__exit();
+	perf_config__init();
+}
+
 static void perf_config_item__delete(struct perf_config_item *item)
 {
 	zfree(&item->name);
@@ -693,6 +744,9 @@ static void perf_config_set__purge(struct perf_config_set *set)
 
 void perf_config_set__delete(struct perf_config_set *set)
 {
+	if (set == NULL)
+		return;
+
 	perf_config_set__purge(set);
 	free(set);
 }
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 22ec626ac718..6f813d46045e 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -20,7 +20,47 @@ struct perf_config_set {
 	struct list_head sections;
 };
 
+extern const char *config_exclusive_filename;
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_int(const char *, const char *);
+u64 perf_config_u64(const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_etc_perfconfig(void);
+
 struct perf_config_set *perf_config_set__new(void);
 void perf_config_set__delete(struct perf_config_set *set);
+void perf_config__init(void);
+void perf_config__exit(void);
+void perf_config__refresh(void);
+
+/**
+ * perf_config_sections__for_each - iterate thru all the sections
+ * @list: list_head instance to iterate
+ * @section: struct perf_config_section iterator
+ */
+#define perf_config_sections__for_each_entry(list, section)	\
+        list_for_each_entry(section, list, node)
+
+/**
+ * perf_config_items__for_each - iterate thru all the items
+ * @list: list_head instance to iterate
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_items__for_each_entry(list, item)	\
+        list_for_each_entry(item, list, node)
+
+/**
+ * perf_config_set__for_each - iterate thru all the config section-item pairs
+ * @set: evlist instance to iterate
+ * @section: struct perf_config_section iterator
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_set__for_each_entry(set, section, item)			\
+	perf_config_sections__for_each_entry(&set->sections, section)		\
+	perf_config_items__for_each_entry(&section->items, item)
 
 #endif /* __PERF_CONFIG_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 02d801670f30..2c0b52264a46 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -236,13 +236,12 @@ struct cpu_map *cpu_map__new_data(struct cpu_map_data *data)
 
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
 {
-	int i;
-	size_t printed = fprintf(fp, "%d cpu%s: ",
-				 map->nr, map->nr > 1 ? "s" : "");
-	for (i = 0; i < map->nr; ++i)
-		printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]);
+#define BUFSIZE 1024
+	char buf[BUFSIZE];
 
-	return printed + fprintf(fp, "\n");
+	cpu_map__snprint(map, buf, sizeof(buf));
+	return fprintf(fp, "%s\n", buf);
+#undef BUFSIZE
 }
 
 struct cpu_map *cpu_map__dummy_new(void)
@@ -589,13 +588,66 @@ int cpu__setup_cpunode_map(void)
 }
 
 bool cpu_map__has(struct cpu_map *cpus, int cpu)
+{
+	return cpu_map__idx(cpus, cpu) != -1;
+}
+
+int cpu_map__idx(struct cpu_map *cpus, int cpu)
 {
 	int i;
 
 	for (i = 0; i < cpus->nr; ++i) {
 		if (cpus->map[i] == cpu)
-			return true;
+			return i;
+	}
+
+	return -1;
+}
+
+int cpu_map__cpu(struct cpu_map *cpus, int idx)
+{
+	return cpus->map[idx];
+}
+
+size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size)
+{
+	int i, cpu, start = -1;
+	bool first = true;
+	size_t ret = 0;
+
+#define COMMA first ? "" : ","
+
+	for (i = 0; i < map->nr + 1; i++) {
+		bool last = i == map->nr;
+
+		cpu = last ? INT_MAX : map->map[i];
+
+		if (start == -1) {
+			start = i;
+			if (last) {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d", COMMA,
+						map->map[i]);
+			}
+		} else if (((i - start) != (cpu - map->map[start])) || last) {
+			int end = i - 1;
+
+			if (start == end) {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d", COMMA,
+						map->map[start]);
+			} else {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d-%d", COMMA,
+						map->map[start], map->map[end]);
+			}
+			first = false;
+			start = i;
+		}
 	}
 
-	return false;
+#undef COMMA
+
+	pr_debug("cpumask list: %s\n", buf);
+	return ret;
 }
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 1a0a35073ce1..06bd689f5989 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -19,6 +19,7 @@ struct cpu_map *cpu_map__empty_new(int nr);
 struct cpu_map *cpu_map__dummy_new(void);
 struct cpu_map *cpu_map__new_data(struct cpu_map_data *data);
 struct cpu_map *cpu_map__read(FILE *file);
+size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size);
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
 int cpu_map__get_socket_id(int cpu);
 int cpu_map__get_socket(struct cpu_map *map, int idx, void *data);
@@ -67,5 +68,7 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
 		       int (*f)(struct cpu_map *map, int cpu, void *data),
 		       void *data);
 
+int cpu_map__cpu(struct cpu_map *cpus, int idx);
 bool cpu_map__has(struct cpu_map *cpus, int cpu);
+int cpu_map__idx(struct cpu_map *cpus, int cpu);
 #endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 9f53020c3269..4f979bb27b6c 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -26,6 +26,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "machine.h"
+#include "config.h"
 
 #define pr_N(n, fmt, ...) \
 	eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
@@ -68,6 +69,9 @@ struct ctf_writer {
 		};
 		struct bt_ctf_field_type *array[6];
 	} data;
+	struct bt_ctf_event_class	*comm_class;
+	struct bt_ctf_event_class	*exit_class;
+	struct bt_ctf_event_class	*fork_class;
 };
 
 struct convert {
@@ -76,6 +80,7 @@ struct convert {
 
 	u64			events_size;
 	u64			events_count;
+	u64			non_sample_count;
 
 	/* Ordered events configured queue size. */
 	u64			queue_size;
@@ -140,6 +145,36 @@ FUNC_VALUE_SET(s64)
 FUNC_VALUE_SET(u64)
 __FUNC_VALUE_SET(u64_hex, u64)
 
+static int string_set_value(struct bt_ctf_field *field, const char *string);
+static __maybe_unused int
+value_set_string(struct ctf_writer *cw, struct bt_ctf_event *event,
+		 const char *name, const char *string)
+{
+	struct bt_ctf_field_type *type = cw->data.string;
+	struct bt_ctf_field *field;
+	int ret = 0;
+
+	field = bt_ctf_field_create(type);
+	if (!field) {
+		pr_err("failed to create a field %s\n", name);
+		return -1;
+	}
+
+	ret = string_set_value(field, string);
+	if (ret) {
+		pr_err("failed to set value %s\n", name);
+		goto err_put_field;
+	}
+
+	ret = bt_ctf_event_set_payload(event, name, field);
+	if (ret)
+		pr_err("failed to set payload %s\n", name);
+
+err_put_field:
+	bt_ctf_field_put(field);
+	return ret;
+}
+
 static struct bt_ctf_field_type*
 get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
 {
@@ -731,6 +766,72 @@ static int process_sample_event(struct perf_tool *tool,
 	return cs ? 0 : -1;
 }
 
+#define __NON_SAMPLE_SET_FIELD(_name, _type, _field) 	\
+do {							\
+	ret = value_set_##_type(cw, event, #_field, _event->_name._field);\
+	if (ret)					\
+		return -1;				\
+} while(0)
+
+#define __FUNC_PROCESS_NON_SAMPLE(_name, body) 	\
+static int process_##_name##_event(struct perf_tool *tool,	\
+				   union perf_event *_event,	\
+				   struct perf_sample *sample,	\
+				   struct machine *machine)	\
+{								\
+	struct convert *c = container_of(tool, struct convert, tool);\
+	struct ctf_writer *cw = &c->writer;			\
+	struct bt_ctf_event_class *event_class = cw->_name##_class;\
+	struct bt_ctf_event *event;				\
+	struct ctf_stream *cs;					\
+	int ret;						\
+								\
+	c->non_sample_count++;					\
+	c->events_size += _event->header.size;			\
+	event = bt_ctf_event_create(event_class);		\
+	if (!event) {						\
+		pr_err("Failed to create an CTF event\n");	\
+		return -1;					\
+	}							\
+								\
+	bt_ctf_clock_set_time(cw->clock, sample->time);		\
+	body							\
+	cs = ctf_stream(cw, 0);					\
+	if (cs) {						\
+		if (is_flush_needed(cs))			\
+			ctf_stream__flush(cs);			\
+								\
+		cs->count++;					\
+		bt_ctf_stream_append_event(cs->stream, event);	\
+	}							\
+	bt_ctf_event_put(event);				\
+								\
+	return perf_event__process_##_name(tool, _event, sample, machine);\
+}
+
+__FUNC_PROCESS_NON_SAMPLE(comm,
+	__NON_SAMPLE_SET_FIELD(comm, u32, pid);
+	__NON_SAMPLE_SET_FIELD(comm, u32, tid);
+	__NON_SAMPLE_SET_FIELD(comm, string, comm);
+)
+__FUNC_PROCESS_NON_SAMPLE(fork,
+	__NON_SAMPLE_SET_FIELD(fork, u32, pid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, tid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+	__NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+
+__FUNC_PROCESS_NON_SAMPLE(exit,
+	__NON_SAMPLE_SET_FIELD(fork, u32, pid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, tid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+	__NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+#undef __NON_SAMPLE_SET_FIELD
+#undef __FUNC_PROCESS_NON_SAMPLE
+
 /* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
 static char *change_name(char *name, char *orig_name, int dup)
 {
@@ -997,7 +1098,7 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session)
 	struct perf_evsel *evsel;
 	int ret;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		ret = add_event(cw, evsel);
 		if (ret)
 			return ret;
@@ -1005,12 +1106,86 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session)
 	return 0;
 }
 
+#define __NON_SAMPLE_ADD_FIELD(t, n)						\
+	do {							\
+		pr2("  field '%s'\n", #n);			\
+		if (bt_ctf_event_class_add_field(event_class, cw->data.t, #n)) {\
+			pr_err("Failed to add field '%s';\n", #n);\
+			return -1;				\
+		}						\
+	} while(0)
+
+#define __FUNC_ADD_NON_SAMPLE_EVENT_CLASS(_name, body) 		\
+static int add_##_name##_event(struct ctf_writer *cw)		\
+{								\
+	struct bt_ctf_event_class *event_class;			\
+	int ret;						\
+								\
+	pr("Adding "#_name" event\n");				\
+	event_class = bt_ctf_event_class_create("perf_" #_name);\
+	if (!event_class)					\
+		return -1;					\
+	body							\
+								\
+	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);\
+	if (ret) {						\
+		pr("Failed to add event class '"#_name"' into stream.\n");\
+		return ret;					\
+	}							\
+								\
+	cw->_name##_class = event_class;			\
+	bt_ctf_event_class_put(event_class);			\
+	return 0;						\
+}
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(comm,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(string, comm);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(fork,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, ppid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u32, ptid);
+	__NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(exit,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, ppid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u32, ptid);
+	__NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+#undef __NON_SAMPLE_ADD_FIELD
+#undef __FUNC_ADD_NON_SAMPLE_EVENT_CLASS
+
+static int setup_non_sample_events(struct ctf_writer *cw,
+				   struct perf_session *session __maybe_unused)
+{
+	int ret;
+
+	ret = add_comm_event(cw);
+	if (ret)
+		return ret;
+	ret = add_exit_event(cw);
+	if (ret)
+		return ret;
+	ret = add_fork_event(cw);
+	if (ret)
+		return ret;
+	return 0;
+}
+
 static void cleanup_events(struct perf_session *session)
 {
 	struct perf_evlist *evlist = session->evlist;
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		struct evsel_priv *priv;
 
 		priv = evsel->priv;
@@ -1273,13 +1448,14 @@ static int convert__config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
-int bt_convert__perf2ctf(const char *input, const char *path, bool force)
+int bt_convert__perf2ctf(const char *input, const char *path,
+			 struct perf_data_convert_opts *opts)
 {
 	struct perf_session *session;
 	struct perf_data_file file = {
 		.path = input,
 		.mode = PERF_DATA_MODE_READ,
-		.force = force,
+		.force = opts->force,
 	};
 	struct convert c = {
 		.tool = {
@@ -1299,6 +1475,12 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 	struct ctf_writer *cw = &c.writer;
 	int err = -1;
 
+	if (opts->all) {
+		c.tool.comm = process_comm_event;
+		c.tool.exit = process_exit_event;
+		c.tool.fork = process_fork_event;
+	}
+
 	perf_config(convert__config, &c);
 
 	/* CTF writer */
@@ -1323,6 +1505,9 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 	if (setup_events(cw, session))
 		goto free_session;
 
+	if (opts->all && setup_non_sample_events(cw, session))
+		goto free_session;
+
 	if (setup_streams(cw, session))
 		goto free_session;
 
@@ -1337,10 +1522,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 		file.path, path);
 
 	fprintf(stderr,
-		"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
+		"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples",
 		(double) c.events_size / 1024.0 / 1024.0,
 		c.events_count);
 
+	if (!c.non_sample_count)
+		fprintf(stderr, ") ]\n");
+	else
+		fprintf(stderr, ", %" PRIu64 " non-samples) ]\n", c.non_sample_count);
+
 	cleanup_events(session);
 	perf_session__delete(session);
 	ctf_writer__cleanup(cw);
diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h
index 4c204342a9d8..9a3b587f76c1 100644
--- a/tools/perf/util/data-convert-bt.h
+++ b/tools/perf/util/data-convert-bt.h
@@ -1,8 +1,10 @@
 #ifndef __DATA_CONVERT_BT_H
 #define __DATA_CONVERT_BT_H
+#include "data-convert.h"
 #ifdef HAVE_LIBBABELTRACE_SUPPORT
 
-int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, bool force);
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+			 struct perf_data_convert_opts *opts);
 
 #endif /* HAVE_LIBBABELTRACE_SUPPORT */
 #endif /* __DATA_CONVERT_BT_H */
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
new file mode 100644
index 000000000000..5314962fe95b
--- /dev/null
+++ b/tools/perf/util/data-convert.h
@@ -0,0 +1,9 @@
+#ifndef __DATA_CONVERT_H
+#define __DATA_CONVERT_H
+
+struct perf_data_convert_opts {
+	bool force;
+	bool all;
+};
+
+#endif /* __DATA_CONVERT_H */
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index be83516155ee..60bfc9ca1e22 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -57,7 +57,7 @@ static int open_file_read(struct perf_data_file *file)
 		int err = errno;
 
 		pr_err("failed to open %s: %s", file->path,
-			strerror_r(err, sbuf, sizeof(sbuf)));
+			str_error_r(err, sbuf, sizeof(sbuf)));
 		if (err == ENOENT && !strcmp(file->path, "perf.data"))
 			pr_err("  (try 'perf record' first)");
 		pr_err("\n");
@@ -99,7 +99,7 @@ static int open_file_write(struct perf_data_file *file)
 
 	if (fd < 0)
 		pr_err("failed to open %s : %s\n", file->path,
-			strerror_r(errno, sbuf, sizeof(sbuf)));
+			str_error_r(errno, sbuf, sizeof(sbuf)));
 
 	return fd;
 }
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index c9a6dc173e74..b0c2b5c5d337 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -233,17 +233,6 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym,
 	return 0;
 }
 
-static struct thread *get_main_thread(struct machine *machine, struct thread *thread)
-{
-	if (thread->pid_ == thread->tid)
-		return thread__get(thread);
-
-	if (thread->pid_ == -1)
-		return NULL;
-
-	return machine__find_thread(machine, thread->pid_, thread->pid_);
-}
-
 static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
 			  u64 *dso_db_id, u64 *sym_db_id, u64 *offset)
 {
@@ -382,7 +371,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 	if (err)
 		return err;
 
-	main_thread = get_main_thread(al->machine, thread);
+	main_thread = thread__main_thread(al->machine, thread);
 	if (main_thread)
 		comm = machine__thread_exec_comm(al->machine, main_thread);
 
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 14bafda79eda..d242adc3d5a2 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -38,7 +38,7 @@ extern int debug_data_convert;
 #define pr_oe_time(t, fmt, ...)  pr_time_N(1, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_oe_time2(t, fmt, ...) pr_time_N(2, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
 
-#define STRERR_BUFSIZE	128	/* For the buffer size of strerror_r */
+#define STRERR_BUFSIZE	128	/* For the buffer size of str_error_r */
 
 int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
 void trace_event(union perf_event *event);
diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c
new file mode 100644
index 000000000000..f9dafa888c06
--- /dev/null
+++ b/tools/perf/util/demangle-rust.c
@@ -0,0 +1,269 @@
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+
+#include "demangle-rust.h"
+
+/*
+ * Mangled Rust symbols look like this:
+ *
+ *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
+ *
+ * The original symbol is:
+ *
+ *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
+ *
+ * The last component of the path is a 64-bit hash in lowercase hex, prefixed
+ * with "h". Rust does not have a global namespace between crates, an illusion
+ * which Rust maintains by using the hash to distinguish things that would
+ * otherwise have the same symbol.
+ *
+ * Any path component not starting with a XID_Start character is prefixed with
+ * "_".
+ *
+ * The following escape sequences are used:
+ *
+ *     ","  =>  $C$
+ *     "@"  =>  $SP$
+ *     "*"  =>  $BP$
+ *     "&"  =>  $RF$
+ *     "<"  =>  $LT$
+ *     ">"  =>  $GT$
+ *     "("  =>  $LP$
+ *     ")"  =>  $RP$
+ *     " "  =>  $u20$
+ *     "'"  =>  $u27$
+ *     "["  =>  $u5b$
+ *     "]"  =>  $u5d$
+ *     "~"  =>  $u7e$
+ *
+ * A double ".." means "::" and a single "." means "-".
+ *
+ * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
+ */
+
+static const char *hash_prefix = "::h";
+static const size_t hash_prefix_len = 3;
+static const size_t hash_len = 16;
+
+static bool is_prefixed_hash(const char *start);
+static bool looks_like_rust(const char *sym, size_t len);
+static bool unescape(const char **in, char **out, const char *seq, char value);
+
+/*
+ * INPUT:
+ *     sym: symbol that has been through BFD-demangling
+ *
+ * This function looks for the following indicators:
+ *
+ *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
+ *
+ *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
+ *     hex digits. This is true of 99.9998% of hashes so once in your life you
+ *     may see a false negative. The point is to notice path components that
+ *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
+ *     this case a false positive (non-Rust symbol has an important path
+ *     component removed because it looks like a Rust hash) is worse than a
+ *     false negative (the rare Rust symbol is not demangled) so this sets the
+ *     balance in favor of false negatives.
+ *
+ *  3. There must be no characters other than a-zA-Z0-9 and _.:$
+ *
+ *  4. There must be no unrecognized $-sign sequences.
+ *
+ *  5. There must be no sequence of three or more dots in a row ("...").
+ */
+bool
+rust_is_mangled(const char *sym)
+{
+	size_t len, len_without_hash;
+
+	if (!sym)
+		return false;
+
+	len = strlen(sym);
+	if (len <= hash_prefix_len + hash_len)
+		/* Not long enough to contain "::h" + hash + something else */
+		return false;
+
+	len_without_hash = len - (hash_prefix_len + hash_len);
+	if (!is_prefixed_hash(sym + len_without_hash))
+		return false;
+
+	return looks_like_rust(sym, len_without_hash);
+}
+
+/*
+ * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
+ * digits must comprise between 5 and 15 (inclusive) distinct digits.
+ */
+static bool is_prefixed_hash(const char *str)
+{
+	const char *end;
+	bool seen[16];
+	size_t i;
+	int count;
+
+	if (strncmp(str, hash_prefix, hash_prefix_len))
+		return false;
+	str += hash_prefix_len;
+
+	memset(seen, false, sizeof(seen));
+	for (end = str + hash_len; str < end; str++)
+		if (*str >= '0' && *str <= '9')
+			seen[*str - '0'] = true;
+		else if (*str >= 'a' && *str <= 'f')
+			seen[*str - 'a' + 10] = true;
+		else
+			return false;
+
+	/* Count how many distinct digits seen */
+	count = 0;
+	for (i = 0; i < 16; i++)
+		if (seen[i])
+			count++;
+
+	return count >= 5 && count <= 15;
+}
+
+static bool looks_like_rust(const char *str, size_t len)
+{
+	const char *end = str + len;
+
+	while (str < end)
+		switch (*str) {
+		case '$':
+			if (!strncmp(str, "$C$", 3))
+				str += 3;
+			else if (!strncmp(str, "$SP$", 4)
+					|| !strncmp(str, "$BP$", 4)
+					|| !strncmp(str, "$RF$", 4)
+					|| !strncmp(str, "$LT$", 4)
+					|| !strncmp(str, "$GT$", 4)
+					|| !strncmp(str, "$LP$", 4)
+					|| !strncmp(str, "$RP$", 4))
+				str += 4;
+			else if (!strncmp(str, "$u20$", 5)
+					|| !strncmp(str, "$u27$", 5)
+					|| !strncmp(str, "$u5b$", 5)
+					|| !strncmp(str, "$u5d$", 5)
+					|| !strncmp(str, "$u7e$", 5))
+				str += 5;
+			else
+				return false;
+			break;
+		case '.':
+			/* Do not allow three or more consecutive dots */
+			if (!strncmp(str, "...", 3))
+				return false;
+			/* Fall through */
+		case 'a' ... 'z':
+		case 'A' ... 'Z':
+		case '0' ... '9':
+		case '_':
+		case ':':
+			str++;
+			break;
+		default:
+			return false;
+		}
+
+	return true;
+}
+
+/*
+ * INPUT:
+ *     sym: symbol for which rust_is_mangled(sym) returns true
+ *
+ * The input is demangled in-place because the mangled name is always longer
+ * than the demangled one.
+ */
+void
+rust_demangle_sym(char *sym)
+{
+	const char *in;
+	char *out;
+	const char *end;
+
+	if (!sym)
+		return;
+
+	in = sym;
+	out = sym;
+	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
+
+	while (in < end)
+		switch (*in) {
+		case '$':
+			if (!(unescape(&in, &out, "$C$", ',')
+					|| unescape(&in, &out, "$SP$", '@')
+					|| unescape(&in, &out, "$BP$", '*')
+					|| unescape(&in, &out, "$RF$", '&')
+					|| unescape(&in, &out, "$LT$", '<')
+					|| unescape(&in, &out, "$GT$", '>')
+					|| unescape(&in, &out, "$LP$", '(')
+					|| unescape(&in, &out, "$RP$", ')')
+					|| unescape(&in, &out, "$u20$", ' ')
+					|| unescape(&in, &out, "$u27$", '\'')
+					|| unescape(&in, &out, "$u5b$", '[')
+					|| unescape(&in, &out, "$u5d$", ']')
+					|| unescape(&in, &out, "$u7e$", '~'))) {
+				pr_err("demangle-rust: unexpected escape sequence");
+				goto done;
+			}
+			break;
+		case '_':
+			/*
+			 * If this is the start of a path component and the next
+			 * character is an escape sequence, ignore the
+			 * underscore. The mangler inserts an underscore to make
+			 * sure the path component begins with a XID_Start
+			 * character.
+			 */
+			if ((in == sym || in[-1] == ':') && in[1] == '$')
+				in++;
+			else
+				*out++ = *in++;
+			break;
+		case '.':
+			if (in[1] == '.') {
+				/* ".." becomes "::" */
+				*out++ = ':';
+				*out++ = ':';
+				in += 2;
+			} else {
+				/* "." becomes "-" */
+				*out++ = '-';
+				in++;
+			}
+			break;
+		case 'a' ... 'z':
+		case 'A' ... 'Z':
+		case '0' ... '9':
+		case ':':
+			*out++ = *in++;
+			break;
+		default:
+			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
+				*in);
+			goto done;
+		}
+
+done:
+	*out = '\0';
+}
+
+static bool unescape(const char **in, char **out, const char *seq, char value)
+{
+	size_t len = strlen(seq);
+
+	if (strncmp(*in, seq, len))
+		return false;
+
+	**out = value;
+
+	*in += len;
+	*out += 1;
+
+	return true;
+}
diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h
new file mode 100644
index 000000000000..7b41ead7e0dd
--- /dev/null
+++ b/tools/perf/util/demangle-rust.h
@@ -0,0 +1,7 @@
+#ifndef __PERF_DEMANGLE_RUST
+#define __PERF_DEMANGLE_RUST 1
+
+bool rust_is_mangled(const char *str);
+void rust_demangle_sym(char *str);
+
+#endif /* __PERF_DEMANGLE_RUST */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 5d286f5d7906..774f6ec884d5 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -335,7 +335,7 @@ static int do_open(char *name)
 			return fd;
 
 		pr_debug("dso open failed: %s\n",
-			 strerror_r(errno, sbuf, sizeof(sbuf)));
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		if (!dso__data_open_cnt || errno != EMFILE)
 			break;
 
@@ -442,17 +442,27 @@ static rlim_t get_fd_limit(void)
 	return limit;
 }
 
-static bool may_cache_fd(void)
+static rlim_t fd_limit;
+
+/*
+ * Used only by tests/dso-data.c to reset the environment
+ * for tests. I dont expect we should change this during
+ * standard runtime.
+ */
+void reset_fd_limit(void)
 {
-	static rlim_t limit;
+	fd_limit = 0;
+}
 
-	if (!limit)
-		limit = get_fd_limit();
+static bool may_cache_fd(void)
+{
+	if (!fd_limit)
+		fd_limit = get_fd_limit();
 
-	if (limit == RLIM_INFINITY)
+	if (fd_limit == RLIM_INFINITY)
 		return true;
 
-	return limit > (rlim_t) dso__data_open_cnt;
+	return fd_limit > (rlim_t) dso__data_open_cnt;
 }
 
 /*
@@ -776,7 +786,7 @@ static int data_file_size(struct dso *dso, struct machine *machine)
 	if (fstat(dso->data.fd, &st) < 0) {
 		ret = -errno;
 		pr_err("dso cache fstat failed: %s\n",
-		       strerror_r(errno, sbuf, sizeof(sbuf)));
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
 		dso->data.status = DSO_DATA_STATUS_ERROR;
 		goto out;
 	}
@@ -1356,7 +1366,7 @@ int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
 	BUG_ON(buflen == 0);
 
 	if (errnum >= 0) {
-		const char *err = strerror_r(errnum, buf, buflen);
+		const char *err = str_error_r(errnum, buf, buflen);
 
 		if (err != buf)
 			scnprintf(buf, buflen, "%s", err);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 0953280629cf..ecc4bbd3f82e 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -4,6 +4,7 @@
 #include <linux/atomic.h>
 #include <linux/types.h>
 #include <linux/rbtree.h>
+#include <sys/types.h>
 #include <stdbool.h>
 #include <pthread.h>
 #include <linux/types.h>
@@ -349,10 +350,17 @@ static inline bool dso__is_kcore(struct dso *dso)
 	       dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
 }
 
+static inline bool dso__is_kallsyms(struct dso *dso)
+{
+	return dso->kernel && dso->long_name[0] != '/';
+}
+
 void dso__free_a2l(struct dso *dso);
 
 enum dso_type dso__type(struct dso *dso, struct machine *machine);
 
 int dso__strerror_load(struct dso *dso, char *buf, size_t buflen);
 
+void reset_fd_limit(void);
+
 #endif /* __PERF_DSO */
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 49a11d9d8b8f..bb964e86b09d 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -18,10 +18,13 @@ void perf_env__exit(struct perf_env *env)
 	zfree(&env->cmdline_argv);
 	zfree(&env->sibling_cores);
 	zfree(&env->sibling_threads);
-	zfree(&env->numa_nodes);
 	zfree(&env->pmu_mappings);
 	zfree(&env->cpu);
 
+	for (i = 0; i < env->nr_numa_nodes; i++)
+		cpu_map__put(env->numa_nodes[i].map);
+	zfree(&env->numa_nodes);
+
 	for (i = 0; i < env->caches_cnt; i++)
 		cpu_cache_level__free(&env->caches[i]);
 	zfree(&env->caches);
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 56cffb60a0b4..b164dfd2dcbf 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -2,6 +2,7 @@
 #define __PERF_ENV_H
 
 #include <linux/types.h>
+#include "cpumap.h"
 
 struct cpu_topology_map {
 	int	socket_id;
@@ -18,6 +19,13 @@ struct cpu_cache_level {
 	char	*map;
 };
 
+struct numa_node {
+	u32		 node;
+	u64		 mem_total;
+	u64		 mem_free;
+	struct cpu_map	*map;
+};
+
 struct perf_env {
 	char			*hostname;
 	char			*os_release;
@@ -40,11 +48,11 @@ struct perf_env {
 	const char		**cmdline_argv;
 	char			*sibling_cores;
 	char			*sibling_threads;
-	char			*numa_nodes;
 	char			*pmu_mappings;
 	struct cpu_topology_map	*cpu;
 	struct cpu_cache_level	*caches;
 	int			 caches_cnt;
+	struct numa_node	*numa_nodes;
 };
 
 extern struct perf_env perf_env;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9b141f12329e..e20438b784be 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1092,7 +1092,7 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
 	struct cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
 	size_t ret;
 
-	ret = fprintf(fp, " nr: ");
+	ret = fprintf(fp, ": ");
 
 	if (cpus)
 		ret += cpu_map__fprintf(cpus, fp);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e82ba90cc969..2a40b8e1def7 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -15,6 +15,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "debug.h"
+#include "asm/bug.h"
 #include <unistd.h>
 
 #include "parse-events.h"
@@ -27,8 +28,8 @@
 #include <linux/log2.h>
 #include <linux/err.h>
 
-static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
-static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
+static void perf_mmap__munmap(struct perf_mmap *map);
+static void perf_mmap__put(struct perf_mmap *map);
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
@@ -44,7 +45,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
 	perf_evlist__set_maps(evlist, cpus, threads);
 	fdarray__init(&evlist->pollfd, 64);
 	evlist->workload.pid = -1;
-	evlist->backward = false;
+	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
 }
 
 struct perf_evlist *perf_evlist__new(void)
@@ -100,7 +101,7 @@ static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		perf_evsel__calc_id_pos(evsel);
 
 	perf_evlist__set_id_pos(evlist);
@@ -110,7 +111,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
 {
 	struct perf_evsel *pos, *n;
 
-	evlist__for_each_safe(evlist, n, pos) {
+	evlist__for_each_entry_safe(evlist, n, pos) {
 		list_del_init(&pos->node);
 		pos->evlist = NULL;
 		perf_evsel__delete(pos);
@@ -122,11 +123,15 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
 void perf_evlist__exit(struct perf_evlist *evlist)
 {
 	zfree(&evlist->mmap);
+	zfree(&evlist->backward_mmap);
 	fdarray__exit(&evlist->pollfd);
 }
 
 void perf_evlist__delete(struct perf_evlist *evlist)
 {
+	if (evlist == NULL)
+		return;
+
 	perf_evlist__munmap(evlist);
 	perf_evlist__close(evlist);
 	cpu_map__put(evlist->cpus);
@@ -161,7 +166,7 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		__perf_evlist__propagate_maps(evlist, evsel);
 }
 
@@ -190,7 +195,7 @@ void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
 {
 	struct perf_evsel *evsel, *temp;
 
-	__evlist__for_each_safe(list, temp, evsel) {
+	__evlist__for_each_entry_safe(list, temp, evsel) {
 		list_del_init(&evsel->node);
 		perf_evlist__add(evlist, evsel);
 	}
@@ -205,7 +210,7 @@ void __perf_evlist__set_leader(struct list_head *list)
 
 	leader->nr_members = evsel->idx - leader->idx + 1;
 
-	__evlist__for_each(list, evsel) {
+	__evlist__for_each_entry(list, evsel) {
 		evsel->leader = leader;
 	}
 }
@@ -296,7 +301,7 @@ static int perf_evlist__add_attrs(struct perf_evlist *evlist,
 	return 0;
 
 out_delete_partial_list:
-	__evlist__for_each_safe(&head, n, evsel)
+	__evlist__for_each_entry_safe(&head, n, evsel)
 		perf_evsel__delete(evsel);
 	return -1;
 }
@@ -317,7 +322,7 @@ perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
 		    (int)evsel->attr.config == id)
 			return evsel;
@@ -332,7 +337,7 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
 		    (strcmp(evsel->name, name) == 0))
 			return evsel;
@@ -367,7 +372,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
 {
 	struct perf_evsel *pos;
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 			continue;
 		perf_evsel__disable(pos);
@@ -380,7 +385,7 @@ void perf_evlist__enable(struct perf_evlist *evlist)
 {
 	struct perf_evsel *pos;
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 			continue;
 		perf_evsel__enable(pos);
@@ -448,7 +453,7 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
 	int nfds = 0;
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->system_wide)
 			nfds += nr_cpus;
 		else
@@ -462,15 +467,16 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
 	return 0;
 }
 
-static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
+static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+				     struct perf_mmap *map, short revent)
 {
-	int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
+	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
 	/*
 	 * Save the idx so that when we filter out fds POLLHUP'ed we can
 	 * close the associated evlist->mmap[] entry.
 	 */
 	if (pos >= 0) {
-		evlist->pollfd.priv[pos].idx = idx;
+		evlist->pollfd.priv[pos].ptr = map;
 
 		fcntl(fd, F_SETFL, O_NONBLOCK);
 	}
@@ -480,20 +486,22 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx
 
 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
 {
-	return __perf_evlist__add_pollfd(evlist, fd, -1);
+	return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
 }
 
-static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
+					 void *arg __maybe_unused)
 {
-	struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
+	struct perf_mmap *map = fda->priv[fd].ptr;
 
-	perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
+	if (map)
+		perf_mmap__put(map);
 }
 
 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
 {
 	return fdarray__filter(&evlist->pollfd, revents_and_mask,
-			       perf_evlist__munmap_filtered);
+			       perf_evlist__munmap_filtered, NULL);
 }
 
 int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
@@ -647,8 +655,8 @@ static int perf_evlist__event2id(struct perf_evlist *evlist,
 	return 0;
 }
 
-static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
-						   union perf_event *event)
+struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
+					    union perf_event *event)
 {
 	struct perf_evsel *first = perf_evlist__first(evlist);
 	struct hlist_head *head;
@@ -684,8 +692,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
 {
 	int i;
 
+	if (!evlist->backward_mmap)
+		return 0;
+
 	for (i = 0; i < evlist->nr_mmaps; i++) {
-		int fd = evlist->mmap[i].fd;
+		int fd = evlist->backward_mmap[i].fd;
 		int err;
 
 		if (fd < 0)
@@ -697,12 +708,12 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
 	return 0;
 }
 
-int perf_evlist__pause(struct perf_evlist *evlist)
+static int perf_evlist__pause(struct perf_evlist *evlist)
 {
 	return perf_evlist__set_paused(evlist, true);
 }
 
-int perf_evlist__resume(struct perf_evlist *evlist)
+static int perf_evlist__resume(struct perf_evlist *evlist)
 {
 	return perf_evlist__set_paused(evlist, false);
 }
@@ -777,9 +788,8 @@ broken_event:
 	return event;
 }
 
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup)
 {
-	struct perf_mmap *md = &evlist->mmap[idx];
 	u64 head;
 	u64 old = md->prev;
 
@@ -791,13 +801,12 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 
 	head = perf_mmap__read_head(md);
 
-	return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev);
+	return perf_mmap__read(md, check_messup, old, head, &md->prev);
 }
 
 union perf_event *
-perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
+perf_mmap__read_backward(struct perf_mmap *md)
 {
-	struct perf_mmap *md = &evlist->mmap[idx];
 	u64 head, end;
 	u64 start = md->prev;
 
@@ -832,9 +841,38 @@ perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
 	return perf_mmap__read(md, false, start, end, &md->prev);
 }
 
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
+{
+	struct perf_mmap *md = &evlist->mmap[idx];
+
+	/*
+	 * Check messup is required for forward overwritable ring buffer:
+	 * memory pointed by md->prev can be overwritten in this case.
+	 * No need for read-write ring buffer: kernel stop outputting when
+	 * it hit md->prev (perf_mmap__consume()).
+	 */
+	return perf_mmap__read_forward(md, evlist->overwrite);
+}
+
+union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
 {
 	struct perf_mmap *md = &evlist->mmap[idx];
+
+	/*
+	 * No need to check messup for backward ring buffer:
+	 * We can always read arbitrary long data from a backward
+	 * ring buffer unless we forget to pause it before reading.
+	 */
+	return perf_mmap__read_backward(md);
+}
+
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+	return perf_evlist__mmap_read_forward(evlist, idx);
+}
+
+void perf_mmap__read_catchup(struct perf_mmap *md)
+{
 	u64 head;
 
 	if (!atomic_read(&md->refcnt))
@@ -844,36 +882,44 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
 	md->prev = head;
 }
 
+void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
+{
+	perf_mmap__read_catchup(&evlist->mmap[idx]);
+}
+
 static bool perf_mmap__empty(struct perf_mmap *md)
 {
 	return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
 }
 
-static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
+static void perf_mmap__get(struct perf_mmap *map)
 {
-	atomic_inc(&evlist->mmap[idx].refcnt);
+	atomic_inc(&map->refcnt);
 }
 
-static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
+static void perf_mmap__put(struct perf_mmap *md)
 {
-	BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
+	BUG_ON(md->base && atomic_read(&md->refcnt) == 0);
 
-	if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
-		__perf_evlist__munmap(evlist, idx);
+	if (atomic_dec_and_test(&md->refcnt))
+		perf_mmap__munmap(md);
 }
 
-void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
+void perf_mmap__consume(struct perf_mmap *md, bool overwrite)
 {
-	struct perf_mmap *md = &evlist->mmap[idx];
-
-	if (!evlist->overwrite) {
+	if (!overwrite) {
 		u64 old = md->prev;
 
 		perf_mmap__write_tail(md, old);
 	}
 
 	if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
-		perf_evlist__mmap_put(evlist, idx);
+		perf_mmap__put(md);
+}
+
+void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
+{
+	perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite);
 }
 
 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
@@ -904,41 +950,52 @@ void __weak auxtrace_mmap_params__set_idx(
 {
 }
 
-static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
+static void perf_mmap__munmap(struct perf_mmap *map)
 {
-	if (evlist->mmap[idx].base != NULL) {
-		munmap(evlist->mmap[idx].base, evlist->mmap_len);
-		evlist->mmap[idx].base = NULL;
-		evlist->mmap[idx].fd = -1;
-		atomic_set(&evlist->mmap[idx].refcnt, 0);
+	if (map->base != NULL) {
+		munmap(map->base, perf_mmap__mmap_len(map));
+		map->base = NULL;
+		map->fd = -1;
+		atomic_set(&map->refcnt, 0);
 	}
-	auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
+	auxtrace_mmap__munmap(&map->auxtrace_mmap);
 }
 
-void perf_evlist__munmap(struct perf_evlist *evlist)
+static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
 {
 	int i;
 
-	if (evlist->mmap == NULL)
-		return;
+	if (evlist->mmap)
+		for (i = 0; i < evlist->nr_mmaps; i++)
+			perf_mmap__munmap(&evlist->mmap[i]);
 
-	for (i = 0; i < evlist->nr_mmaps; i++)
-		__perf_evlist__munmap(evlist, i);
+	if (evlist->backward_mmap)
+		for (i = 0; i < evlist->nr_mmaps; i++)
+			perf_mmap__munmap(&evlist->backward_mmap[i]);
+}
 
+void perf_evlist__munmap(struct perf_evlist *evlist)
+{
+	perf_evlist__munmap_nofree(evlist);
 	zfree(&evlist->mmap);
+	zfree(&evlist->backward_mmap);
 }
 
-static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
+static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 {
 	int i;
+	struct perf_mmap *map;
 
 	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
 	if (cpu_map__empty(evlist->cpus))
 		evlist->nr_mmaps = thread_map__nr(evlist->threads);
-	evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+	map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+	if (!map)
+		return NULL;
+
 	for (i = 0; i < evlist->nr_mmaps; i++)
-		evlist->mmap[i].fd = -1;
-	return evlist->mmap != NULL ? 0 : -ENOMEM;
+		map[i].fd = -1;
+	return map;
 }
 
 struct mmap_params {
@@ -947,8 +1004,8 @@ struct mmap_params {
 	struct auxtrace_mmap_params auxtrace_mp;
 };
 
-static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
-			       struct mmap_params *mp, int fd)
+static int perf_mmap__mmap(struct perf_mmap *map,
+			   struct mmap_params *mp, int fd)
 {
 	/*
 	 * The last one will be done at perf_evlist__mmap_consume(), so that we
@@ -963,35 +1020,61 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
 	 * evlist layer can't just drop it when filtering events in
 	 * perf_evlist__filter_pollfd().
 	 */
-	atomic_set(&evlist->mmap[idx].refcnt, 2);
-	evlist->mmap[idx].prev = 0;
-	evlist->mmap[idx].mask = mp->mask;
-	evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
-				      MAP_SHARED, fd, 0);
-	if (evlist->mmap[idx].base == MAP_FAILED) {
+	atomic_set(&map->refcnt, 2);
+	map->prev = 0;
+	map->mask = mp->mask;
+	map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
+			 MAP_SHARED, fd, 0);
+	if (map->base == MAP_FAILED) {
 		pr_debug2("failed to mmap perf event ring buffer, error %d\n",
 			  errno);
-		evlist->mmap[idx].base = NULL;
+		map->base = NULL;
 		return -1;
 	}
-	evlist->mmap[idx].fd = fd;
+	map->fd = fd;
 
-	if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
-				&mp->auxtrace_mp, evlist->mmap[idx].base, fd))
+	if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
+				&mp->auxtrace_mp, map->base, fd))
 		return -1;
 
 	return 0;
 }
 
+static bool
+perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
+			 struct perf_evsel *evsel)
+{
+	if (evsel->attr.write_backward)
+		return false;
+	return true;
+}
+
 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 				       struct mmap_params *mp, int cpu,
-				       int thread, int *output)
+				       int thread, int *_output, int *_output_backward)
 {
 	struct perf_evsel *evsel;
+	int revent;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
+		struct perf_mmap *maps = evlist->mmap;
+		int *output = _output;
 		int fd;
 
+		if (evsel->attr.write_backward) {
+			output = _output_backward;
+			maps = evlist->backward_mmap;
+
+			if (!maps) {
+				maps = perf_evlist__alloc_mmap(evlist);
+				if (!maps)
+					return -1;
+				evlist->backward_mmap = maps;
+				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
+					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+			}
+		}
+
 		if (evsel->system_wide && thread)
 			continue;
 
@@ -999,15 +1082,18 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 
 		if (*output == -1) {
 			*output = fd;
-			if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
+
+			if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)
 				return -1;
 		} else {
 			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
 				return -1;
 
-			perf_evlist__mmap_get(evlist, idx);
+			perf_mmap__get(&maps[idx]);
 		}
 
+		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
+
 		/*
 		 * The system_wide flag causes a selected event to be opened
 		 * always without a pid.  Consequently it will never get a
@@ -1016,8 +1102,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 		 * Therefore don't add it for polling.
 		 */
 		if (!evsel->system_wide &&
-		    __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
-			perf_evlist__mmap_put(evlist, idx);
+		    __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
+			perf_mmap__put(&maps[idx]);
 			return -1;
 		}
 
@@ -1043,13 +1129,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
 	pr_debug2("perf event ring buffer mmapped per cpu\n");
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		int output = -1;
+		int output_backward = -1;
 
 		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
 					      true);
 
 		for (thread = 0; thread < nr_threads; thread++) {
 			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
-							thread, &output))
+							thread, &output, &output_backward))
 				goto out_unmap;
 		}
 	}
@@ -1057,8 +1144,7 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
 	return 0;
 
 out_unmap:
-	for (cpu = 0; cpu < nr_cpus; cpu++)
-		__perf_evlist__munmap(evlist, cpu);
+	perf_evlist__munmap_nofree(evlist);
 	return -1;
 }
 
@@ -1071,20 +1157,20 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
 	pr_debug2("perf event ring buffer mmapped per thread\n");
 	for (thread = 0; thread < nr_threads; thread++) {
 		int output = -1;
+		int output_backward = -1;
 
 		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
 					      false);
 
 		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
-						&output))
+						&output, &output_backward))
 			goto out_unmap;
 	}
 
 	return 0;
 
 out_unmap:
-	for (thread = 0; thread < nr_threads; thread++)
-		__perf_evlist__munmap(evlist, thread);
+	perf_evlist__munmap_nofree(evlist);
 	return -1;
 }
 
@@ -1217,7 +1303,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 		.prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
 	};
 
-	if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
+	if (!evlist->mmap)
+		evlist->mmap = perf_evlist__alloc_mmap(evlist);
+	if (!evlist->mmap)
 		return -ENOMEM;
 
 	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
@@ -1231,7 +1319,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
 				   auxtrace_pages, auxtrace_overwrite);
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
 		    evsel->sample_id == NULL &&
 		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
@@ -1307,7 +1395,7 @@ void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		__perf_evsel__set_sample_bit(evsel, bit);
 }
 
@@ -1316,7 +1404,7 @@ void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		__perf_evsel__reset_sample_bit(evsel, bit);
 }
 
@@ -1327,7 +1415,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e
 	const int ncpus = cpu_map__nr(evlist->cpus),
 		  nthreads = thread_map__nr(evlist->threads);
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->filter == NULL)
 			continue;
 
@@ -1350,7 +1438,7 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
 	struct perf_evsel *evsel;
 	int err = 0;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
@@ -1404,7 +1492,7 @@ bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
 	if (evlist->id_pos < 0 || evlist->is_pos < 0)
 		return false;
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		if (pos->id_pos != evlist->id_pos ||
 		    pos->is_pos != evlist->is_pos)
 			return false;
@@ -1420,7 +1508,7 @@ u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
 	if (evlist->combined_sample_type)
 		return evlist->combined_sample_type;
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		evlist->combined_sample_type |= evsel->attr.sample_type;
 
 	return evlist->combined_sample_type;
@@ -1437,7 +1525,7 @@ u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
 	struct perf_evsel *evsel;
 	u64 branch_type = 0;
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		branch_type |= evsel->attr.branch_sample_type;
 	return branch_type;
 }
@@ -1448,7 +1536,7 @@ bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
 	u64 read_format = first->attr.read_format;
 	u64 sample_type = first->attr.sample_type;
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		if (read_format != pos->attr.read_format)
 			return false;
 	}
@@ -1505,7 +1593,7 @@ bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
 {
 	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
 
-	evlist__for_each_continue(evlist, pos) {
+	evlist__for_each_entry_continue(evlist, pos) {
 		if (first->attr.sample_id_all != pos->attr.sample_id_all)
 			return false;
 	}
@@ -1532,7 +1620,7 @@ void perf_evlist__close(struct perf_evlist *evlist)
 	int nthreads = thread_map__nr(evlist->threads);
 	int n;
 
-	evlist__for_each_reverse(evlist, evsel) {
+	evlist__for_each_entry_reverse(evlist, evsel) {
 		n = evsel->cpus ? evsel->cpus->nr : ncpus;
 		perf_evsel__close(evsel, n, nthreads);
 	}
@@ -1586,7 +1674,7 @@ int perf_evlist__open(struct perf_evlist *evlist)
 
 	perf_evlist__update_id_pos(evlist);
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
 		if (err < 0)
 			goto out_err;
@@ -1747,7 +1835,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
 	struct perf_evsel *evsel;
 	size_t printed = 0;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
 				   perf_evsel__name(evsel));
 	}
@@ -1759,7 +1847,7 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist,
 			       int err, char *buf, size_t size)
 {
 	int printed, value;
-	char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
+	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
 
 	switch (err) {
 	case EACCES:
@@ -1811,7 +1899,7 @@ out_default:
 
 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
 {
-	char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
+	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
 	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
 
 	switch (err) {
@@ -1849,7 +1937,7 @@ void perf_evlist__to_front(struct perf_evlist *evlist,
 	if (move_evsel == perf_evlist__first(evlist))
 		return;
 
-	evlist__for_each_safe(evlist, n, evsel) {
+	evlist__for_each_entry_safe(evlist, n, evsel) {
 		if (evsel->leader == move_evsel->leader)
 			list_move_tail(&evsel->node, &move);
 	}
@@ -1865,7 +1953,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
 	if (tracking_evsel->tracking)
 		return;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel != tracking_evsel)
 			evsel->tracking = false;
 	}
@@ -1879,7 +1967,7 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (!evsel->name)
 			continue;
 		if (strcmp(str, evsel->name) == 0)
@@ -1888,3 +1976,61 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
 
 	return NULL;
 }
+
+void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
+				  enum bkw_mmap_state state)
+{
+	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
+	enum action {
+		NONE,
+		PAUSE,
+		RESUME,
+	} action = NONE;
+
+	if (!evlist->backward_mmap)
+		return;
+
+	switch (old_state) {
+	case BKW_MMAP_NOTREADY: {
+		if (state != BKW_MMAP_RUNNING)
+			goto state_err;;
+		break;
+	}
+	case BKW_MMAP_RUNNING: {
+		if (state != BKW_MMAP_DATA_PENDING)
+			goto state_err;
+		action = PAUSE;
+		break;
+	}
+	case BKW_MMAP_DATA_PENDING: {
+		if (state != BKW_MMAP_EMPTY)
+			goto state_err;
+		break;
+	}
+	case BKW_MMAP_EMPTY: {
+		if (state != BKW_MMAP_RUNNING)
+			goto state_err;
+		action = RESUME;
+		break;
+	}
+	default:
+		WARN_ONCE(1, "Shouldn't get there\n");
+	}
+
+	evlist->bkw_mmap_state = state;
+
+	switch (action) {
+	case PAUSE:
+		perf_evlist__pause(evlist);
+		break;
+	case RESUME:
+		perf_evlist__resume(evlist);
+		break;
+	case NONE:
+	default:
+		break;
+	}
+
+state_err:
+	return;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d740fb877ab6..4fd034f22d2f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -35,6 +35,40 @@ struct perf_mmap {
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
 };
 
+static inline size_t
+perf_mmap__mmap_len(struct perf_mmap *map)
+{
+	return map->mask + 1 + page_size;
+}
+
+/*
+ * State machine of bkw_mmap_state:
+ *
+ *                     .________________(forbid)_____________.
+ *                     |                                     V
+ * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ *                     ^  ^              |   ^               |
+ *                     |  |__(forbid)____/   |___(forbid)___/|
+ *                     |                                     |
+ *                      \_________________(3)_______________/
+ *
+ * NOTREADY     : Backward ring buffers are not ready
+ * RUNNING      : Backward ring buffers are recording
+ * DATA_PENDING : We are required to collect data from backward ring buffers
+ * EMPTY        : We have collected data from backward ring buffers.
+ *
+ * (0): Setup backward ring buffer
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum bkw_mmap_state {
+	BKW_MMAP_NOTREADY,
+	BKW_MMAP_RUNNING,
+	BKW_MMAP_DATA_PENDING,
+	BKW_MMAP_EMPTY,
+};
+
 struct perf_evlist {
 	struct list_head entries;
 	struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
@@ -44,17 +78,18 @@ struct perf_evlist {
 	bool		 overwrite;
 	bool		 enabled;
 	bool		 has_user_cpus;
-	bool		 backward;
 	size_t		 mmap_len;
 	int		 id_pos;
 	int		 is_pos;
 	u64		 combined_sample_type;
+	enum bkw_mmap_state bkw_mmap_state;
 	struct {
 		int	cork_fd;
 		pid_t	pid;
 	} workload;
 	struct fdarray	 pollfd;
 	struct perf_mmap *mmap;
+	struct perf_mmap *backward_mmap;
 	struct thread_map *threads;
 	struct cpu_map	  *cpus;
 	struct perf_evsel *selected;
@@ -129,16 +164,24 @@ struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
 
 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
 
+void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state);
+
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup);
+union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
+
+void perf_mmap__read_catchup(struct perf_mmap *md);
+void perf_mmap__consume(struct perf_mmap *md, bool overwrite);
+
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
 
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
+						 int idx);
 union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
 						  int idx);
 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
 
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 
-int perf_evlist__pause(struct perf_evlist *evlist);
-int perf_evlist__resume(struct perf_evlist *evlist);
 int perf_evlist__open(struct perf_evlist *evlist);
 void perf_evlist__close(struct perf_evlist *evlist);
 
@@ -249,70 +292,70 @@ void perf_evlist__to_front(struct perf_evlist *evlist,
 			   struct perf_evsel *move_evsel);
 
 /**
- * __evlist__for_each - iterate thru all the evsels
+ * __evlist__for_each_entry - iterate thru all the evsels
  * @list: list_head instance to iterate
  * @evsel: struct evsel iterator
  */
-#define __evlist__for_each(list, evsel) \
+#define __evlist__for_each_entry(list, evsel) \
         list_for_each_entry(evsel, list, node)
 
 /**
- * evlist__for_each - iterate thru all the evsels
+ * evlist__for_each_entry - iterate thru all the evsels
  * @evlist: evlist instance to iterate
  * @evsel: struct evsel iterator
  */
-#define evlist__for_each(evlist, evsel) \
-	__evlist__for_each(&(evlist)->entries, evsel)
+#define evlist__for_each_entry(evlist, evsel) \
+	__evlist__for_each_entry(&(evlist)->entries, evsel)
 
 /**
- * __evlist__for_each_continue - continue iteration thru all the evsels
+ * __evlist__for_each_entry_continue - continue iteration thru all the evsels
  * @list: list_head instance to iterate
  * @evsel: struct evsel iterator
  */
-#define __evlist__for_each_continue(list, evsel) \
+#define __evlist__for_each_entry_continue(list, evsel) \
         list_for_each_entry_continue(evsel, list, node)
 
 /**
- * evlist__for_each_continue - continue iteration thru all the evsels
+ * evlist__for_each_entry_continue - continue iteration thru all the evsels
  * @evlist: evlist instance to iterate
  * @evsel: struct evsel iterator
  */
-#define evlist__for_each_continue(evlist, evsel) \
-	__evlist__for_each_continue(&(evlist)->entries, evsel)
+#define evlist__for_each_entry_continue(evlist, evsel) \
+	__evlist__for_each_entry_continue(&(evlist)->entries, evsel)
 
 /**
- * __evlist__for_each_reverse - iterate thru all the evsels in reverse order
+ * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
  * @list: list_head instance to iterate
  * @evsel: struct evsel iterator
  */
-#define __evlist__for_each_reverse(list, evsel) \
+#define __evlist__for_each_entry_reverse(list, evsel) \
         list_for_each_entry_reverse(evsel, list, node)
 
 /**
- * evlist__for_each_reverse - iterate thru all the evsels in reverse order
+ * evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
  * @evlist: evlist instance to iterate
  * @evsel: struct evsel iterator
  */
-#define evlist__for_each_reverse(evlist, evsel) \
-	__evlist__for_each_reverse(&(evlist)->entries, evsel)
+#define evlist__for_each_entry_reverse(evlist, evsel) \
+	__evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
 
 /**
- * __evlist__for_each_safe - safely iterate thru all the evsels
+ * __evlist__for_each_entry_safe - safely iterate thru all the evsels
  * @list: list_head instance to iterate
  * @tmp: struct evsel temp iterator
  * @evsel: struct evsel iterator
  */
-#define __evlist__for_each_safe(list, tmp, evsel) \
+#define __evlist__for_each_entry_safe(list, tmp, evsel) \
         list_for_each_entry_safe(evsel, tmp, list, node)
 
 /**
- * evlist__for_each_safe - safely iterate thru all the evsels
+ * evlist__for_each_entry_safe - safely iterate thru all the evsels
  * @evlist: evlist instance to iterate
  * @evsel: struct evsel iterator
  * @tmp: struct evsel temp iterator
  */
-#define evlist__for_each_safe(evlist, tmp, evsel) \
-	__evlist__for_each_safe(&(evlist)->entries, tmp, evsel)
+#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
+	__evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel)
 
 void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
 				     struct perf_evsel *tracking_evsel);
@@ -321,4 +364,7 @@ void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
 
 struct perf_evsel *
 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
+
+struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
+					    union perf_event *event);
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5d7037ef7d3b..8c54df61fe64 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -200,6 +200,24 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel,
 	evsel->attr.read_format |= PERF_FORMAT_ID;
 }
 
+/**
+ * perf_evsel__is_function_event - Return whether given evsel is a function
+ * trace event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if event is function trace event
+ */
+bool perf_evsel__is_function_event(struct perf_evsel *evsel)
+{
+#define FUNCTION_EVENT "ftrace:function"
+
+	return evsel->name &&
+	       !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
+
+#undef FUNCTION_EVENT
+}
+
 void perf_evsel__init(struct perf_evsel *evsel,
 		      struct perf_event_attr *attr, int idx)
 {
@@ -572,6 +590,8 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel,
 
 	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
+	attr->sample_max_stack = param->max_stack;
+
 	if (param->record_mode == CALLCHAIN_LBR) {
 		if (!opts->branch_stack) {
 			if (attr->exclude_user) {
@@ -635,7 +655,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
 	struct perf_event_attr *attr = &evsel->attr;
 	struct callchain_param param;
 	u32 dump_size = 0;
-	char *callgraph_buf = NULL;
+	int max_stack = 0;
+	const char *callgraph_buf = NULL;
 
 	/* callgraph default */
 	param.record_mode = callchain_param.record_mode;
@@ -662,6 +683,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
 		case PERF_EVSEL__CONFIG_TERM_STACK_USER:
 			dump_size = term->val.stack_user;
 			break;
+		case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+			max_stack = term->val.max_stack;
+			break;
 		case PERF_EVSEL__CONFIG_TERM_INHERIT:
 			/*
 			 * attr->inherit should has already been set by
@@ -671,13 +695,21 @@ static void apply_config_terms(struct perf_evsel *evsel,
 			 */
 			attr->inherit = term->val.inherit ? 1 : 0;
 			break;
+		case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
+			attr->write_backward = term->val.overwrite ? 1 : 0;
+			break;
 		default:
 			break;
 		}
 	}
 
 	/* User explicitly set per-event callgraph, clear the old setting and reset. */
-	if ((callgraph_buf != NULL) || (dump_size > 0)) {
+	if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+		if (max_stack) {
+			param.max_stack = max_stack;
+			if (callgraph_buf == NULL)
+				callgraph_buf = "fp";
+		}
 
 		/* parse callgraph parameters */
 		if (callgraph_buf != NULL) {
@@ -747,6 +779,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 
 	attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
 	attr->inherit	    = !opts->no_inherit;
+	attr->write_backward = opts->overwrite ? 1 : 0;
 
 	perf_evsel__set_sample_bit(evsel, IP);
 	perf_evsel__set_sample_bit(evsel, TID);
@@ -1329,6 +1362,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	PRINT_ATTRf(clockid, p_signed);
 	PRINT_ATTRf(sample_regs_intr, p_hex);
 	PRINT_ATTRf(aux_watermark, p_unsigned);
+	PRINT_ATTRf(sample_max_stack, p_unsigned);
 
 	return ret;
 }
@@ -1347,6 +1381,9 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 	int pid = -1, err;
 	enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
 
+	if (perf_missing_features.write_backward && evsel->attr.write_backward)
+		return -EINVAL;
+
 	if (evsel->system_wide)
 		nthreads = 1;
 	else
@@ -1377,8 +1414,6 @@ fallback_missing_features:
 	if (perf_missing_features.lbr_flags)
 		evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
 				     PERF_SAMPLE_BRANCH_NO_CYCLES);
-	if (perf_missing_features.write_backward)
-		evsel->attr.write_backward = false;
 retry_sample_id:
 	if (perf_missing_features.sample_id_all)
 		evsel->attr.sample_id_all = 0;
@@ -1441,12 +1476,6 @@ retry_open:
 				err = -EINVAL;
 				goto out_close;
 			}
-
-			if (evsel->overwrite &&
-			    perf_missing_features.write_backward) {
-				err = -EINVAL;
-				goto out_close;
-			}
 		}
 	}
 
@@ -1484,7 +1513,10 @@ try_fallback:
 	 * Must probe features in the order they were added to the
 	 * perf_event_attr interface.
 	 */
-	if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
+	if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
+		perf_missing_features.write_backward = true;
+		goto out_close;
+	} else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
 		perf_missing_features.clockid_wrong = true;
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
@@ -1509,12 +1541,7 @@ try_fallback:
 			  PERF_SAMPLE_BRANCH_NO_FLAGS))) {
 		perf_missing_features.lbr_flags = true;
 		goto fallback_missing_features;
-	} else if (!perf_missing_features.write_backward &&
-			evsel->attr.write_backward) {
-		perf_missing_features.write_backward = true;
-		goto fallback_missing_features;
 	}
-
 out_close:
 	do {
 		while (--thread >= 0) {
@@ -2239,17 +2266,11 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
 	return sample->raw_data + offset;
 }
 
-u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
-		       const char *name)
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+			 bool needs_swap)
 {
-	struct format_field *field = perf_evsel__field(evsel, name);
-	void *ptr;
 	u64 value;
-
-	if (!field)
-		return 0;
-
-	ptr = sample->raw_data + field->offset;
+	void *ptr = sample->raw_data + field->offset;
 
 	switch (field->size) {
 	case 1:
@@ -2267,7 +2288,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
 		return 0;
 	}
 
-	if (!evsel->needs_swap)
+	if (!needs_swap)
 		return value;
 
 	switch (field->size) {
@@ -2284,6 +2305,17 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
 	return 0;
 }
 
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+		       const char *name)
+{
+	struct format_field *field = perf_evsel__field(evsel, name);
+
+	if (!field)
+		return 0;
+
+	return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
 			  char *msg, size_t msgsize)
 {
@@ -2372,6 +2404,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 	 "No such device - did you specify an out-of-range profile CPU?");
 		break;
 	case EOPNOTSUPP:
+		if (evsel->attr.sample_period != 0)
+			return scnprintf(msg, size, "%s",
+	"PMU Hardware doesn't support sampling/overflow-interrupts.");
 		if (evsel->attr.precise_ip)
 			return scnprintf(msg, size, "%s",
 	"\'precise\' request may not be supported. Try removing 'p' modifier.");
@@ -2389,6 +2424,8 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 	"We found oprofile daemon running, please stop it and try again.");
 		break;
 	case EINVAL:
+		if (evsel->attr.write_backward && perf_missing_features.write_backward)
+			return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
 		if (perf_missing_features.clockid)
 			return scnprintf(msg, size, "clockid feature not supported.");
 		if (perf_missing_features.clockid_wrong)
@@ -2402,6 +2439,13 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
 	"/bin/dmesg may provide additional information.\n"
 	"No CONFIG_PERF_EVENTS=y kernel support configured?",
-			 err, strerror_r(err, sbuf, sizeof(sbuf)),
+			 err, str_error_r(err, sbuf, sizeof(sbuf)),
 			 perf_evsel__name(evsel));
 }
+
+char *perf_evsel__env_arch(struct perf_evsel *evsel)
+{
+	if (evsel && evsel->evlist && evsel->evlist->env)
+		return evsel->evlist->env->arch;
+	return NULL;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index c1f10159804c..8a4a6c9f1480 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -44,6 +44,8 @@ enum {
 	PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
 	PERF_EVSEL__CONFIG_TERM_STACK_USER,
 	PERF_EVSEL__CONFIG_TERM_INHERIT,
+	PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+	PERF_EVSEL__CONFIG_TERM_OVERWRITE,
 	PERF_EVSEL__CONFIG_TERM_MAX,
 };
 
@@ -56,7 +58,9 @@ struct perf_evsel_config_term {
 		bool	time;
 		char	*callgraph;
 		u64	stack_user;
+		int	max_stack;
 		bool	inherit;
+		bool	overwrite;
 	} val;
 };
 
@@ -112,7 +116,6 @@ struct perf_evsel {
 	bool			tracking;
 	bool			per_pkg;
 	bool			precise_max;
-	bool			overwrite;
 	/* parse modifier helper */
 	int			exclude_GH;
 	int			nr_members;
@@ -259,6 +262,8 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel,
 
 struct format_field;
 
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+
 struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
 
 #define perf_evsel__match(evsel, t, c)		\
@@ -351,23 +356,7 @@ static inline bool perf_evsel__is_group_event(struct perf_evsel *evsel)
 	return perf_evsel__is_group_leader(evsel) && evsel->nr_members > 1;
 }
 
-/**
- * perf_evsel__is_function_event - Return whether given evsel is a function
- * trace event
- *
- * @evsel - evsel selector to be tested
- *
- * Return %true if event is function trace event
- */
-static inline bool perf_evsel__is_function_event(struct perf_evsel *evsel)
-{
-#define FUNCTION_EVENT "ftrace:function"
-
-	return evsel->name &&
-	       !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
-
-#undef FUNCTION_EVENT
-}
+bool perf_evsel__is_function_event(struct perf_evsel *evsel);
 
 static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel)
 {
@@ -431,4 +420,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
 int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 			     attr__fprintf_f attr__fprintf, void *priv);
 
+char *perf_evsel__env_arch(struct perf_evsel *evsel);
+
 #endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
new file mode 100644
index 000000000000..116debe7a995
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,7 @@
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 08852dde1378..8f0db4007282 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -336,7 +336,7 @@ static int write_event_desc(int fd, struct perf_header *h __maybe_unused,
 	if (ret < 0)
 		return ret;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		ret = do_write(fd, &evsel->attr, sz);
 		if (ret < 0)
 			return ret;
@@ -801,7 +801,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
 	if (ret < 0)
 		return ret;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (perf_evsel__is_group_leader(evsel) &&
 		    evsel->nr_members > 1) {
 			const char *name = evsel->group_name ?: "{anon_group}";
@@ -1306,42 +1306,19 @@ static void print_total_mem(struct perf_header *ph, int fd __maybe_unused,
 static void print_numa_topology(struct perf_header *ph, int fd __maybe_unused,
 				FILE *fp)
 {
-	u32 nr, c, i;
-	char *str, *tmp;
-	uint64_t mem_total, mem_free;
-
-	/* nr nodes */
-	nr = ph->env.nr_numa_nodes;
-	str = ph->env.numa_nodes;
-
-	for (i = 0; i < nr; i++) {
-		/* node number */
-		c = strtoul(str, &tmp, 0);
-		if (*tmp != ':')
-			goto error;
-
-		str = tmp + 1;
-		mem_total = strtoull(str, &tmp, 0);
-		if (*tmp != ':')
-			goto error;
+	int i;
+	struct numa_node *n;
 
-		str = tmp + 1;
-		mem_free = strtoull(str, &tmp, 0);
-		if (*tmp != ':')
-			goto error;
+	for (i = 0; i < ph->env.nr_numa_nodes; i++) {
+		n = &ph->env.numa_nodes[i];
 
 		fprintf(fp, "# node%u meminfo  : total = %"PRIu64" kB,"
 			    " free = %"PRIu64" kB\n",
-			c, mem_total, mem_free);
+			n->node, n->mem_total, n->mem_free);
 
-		str = tmp + 1;
-		fprintf(fp, "# node%u cpu list : %s\n", c, str);
-
-		str += strlen(str) + 1;
+		fprintf(fp, "# node%u cpu list : ", n->node);
+		cpu_map__fprintf(n->map, fp);
 	}
-	return;
-error:
-	fprintf(fp, "# numa topology : not available\n");
 }
 
 static void print_cpuid(struct perf_header *ph, int fd __maybe_unused, FILE *fp)
@@ -1425,7 +1402,7 @@ static void print_group_desc(struct perf_header *ph, int fd __maybe_unused,
 
 	session = container_of(ph, struct perf_session, header);
 
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		if (perf_evsel__is_group_leader(evsel) &&
 		    evsel->nr_members > 1) {
 			fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
@@ -1703,7 +1680,7 @@ perf_evlist__find_by_index(struct perf_evlist *evlist, int idx)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->idx == idx)
 			return evsel;
 	}
@@ -1906,11 +1883,10 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
 				 struct perf_header *ph, int fd,
 				 void *data __maybe_unused)
 {
+	struct numa_node *nodes, *n;
 	ssize_t ret;
-	u32 nr, node, i;
+	u32 nr, i;
 	char *str;
-	uint64_t mem_total, mem_free;
-	struct strbuf sb;
 
 	/* nr nodes */
 	ret = readn(fd, &nr, sizeof(nr));
@@ -1921,47 +1897,47 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
 		nr = bswap_32(nr);
 
 	ph->env.nr_numa_nodes = nr;
-	if (strbuf_init(&sb, 256) < 0)
-		return -1;
+	nodes = zalloc(sizeof(*nodes) * nr);
+	if (!nodes)
+		return -ENOMEM;
 
 	for (i = 0; i < nr; i++) {
+		n = &nodes[i];
+
 		/* node number */
-		ret = readn(fd, &node, sizeof(node));
-		if (ret != sizeof(node))
+		ret = readn(fd, &n->node, sizeof(u32));
+		if (ret != sizeof(n->node))
 			goto error;
 
-		ret = readn(fd, &mem_total, sizeof(u64));
+		ret = readn(fd, &n->mem_total, sizeof(u64));
 		if (ret != sizeof(u64))
 			goto error;
 
-		ret = readn(fd, &mem_free, sizeof(u64));
+		ret = readn(fd, &n->mem_free, sizeof(u64));
 		if (ret != sizeof(u64))
 			goto error;
 
 		if (ph->needs_swap) {
-			node = bswap_32(node);
-			mem_total = bswap_64(mem_total);
-			mem_free = bswap_64(mem_free);
+			n->node      = bswap_32(n->node);
+			n->mem_total = bswap_64(n->mem_total);
+			n->mem_free  = bswap_64(n->mem_free);
 		}
 
-		if (strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":",
-				node, mem_total, mem_free) < 0)
-			goto error;
-
 		str = do_read_string(fd, ph);
 		if (!str)
 			goto error;
 
-		/* include a NULL character at the end */
-		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+		n->map = cpu_map__new(str);
+		if (!n->map)
 			goto error;
+
 		free(str);
 	}
-	ph->env.numa_nodes = strbuf_detach(&sb, NULL);
+	ph->env.numa_nodes = nodes;
 	return 0;
 
 error:
-	strbuf_release(&sb);
+	free(nodes);
 	return -1;
 }
 
@@ -2075,7 +2051,7 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused,
 	session->evlist->nr_groups = nr_groups;
 
 	i = nr = 0;
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		if (evsel->idx == (int) desc[i].leader_idx) {
 			evsel->leader = evsel;
 			/* {anon_group} is a dummy name */
@@ -2383,7 +2359,7 @@ int perf_session__write_header(struct perf_session *session,
 
 	lseek(fd, sizeof(f_header), SEEK_SET);
 
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		evsel->id_offset = lseek(fd, 0, SEEK_CUR);
 		err = do_write(fd, evsel->id, evsel->ids * sizeof(u64));
 		if (err < 0) {
@@ -2394,7 +2370,7 @@ int perf_session__write_header(struct perf_session *session,
 
 	attr_offset = lseek(fd, 0, SEEK_CUR);
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		f_attr = (struct perf_file_attr){
 			.attr = evsel->attr,
 			.ids  = {
@@ -2828,7 +2804,7 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
 {
 	struct perf_evsel *pos;
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
 		    perf_evsel__prepare_tracepoint_event(pos, pevent))
 			return -1;
@@ -3127,7 +3103,7 @@ int perf_event__synthesize_attrs(struct perf_tool *tool,
 	struct perf_evsel *evsel;
 	int err = 0;
 
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
 						  evsel->id, process);
 		if (err) {
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index d62ccaeeadd6..2821f8d77e52 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -1,4 +1,6 @@
 #include "cache.h"
+#include "config.h"
+#include <stdio.h>
 #include <subcmd/help.h>
 #include "../builtin.h"
 #include "levenshtein.h"
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index d1f19e0012d4..a18d142cdca3 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -79,7 +79,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 
 	len = thread__comm_len(h->thread);
 	if (hists__new_col_len(hists, HISTC_COMM, len))
-		hists__set_col_len(hists, HISTC_THREAD, len + 6);
+		hists__set_col_len(hists, HISTC_THREAD, len + 8);
 
 	if (h->ms.map) {
 		len = dso__name_len(h->ms.map->dso);
@@ -352,86 +352,114 @@ void hists__delete_entries(struct hists *hists)
  * histogram, sorted on item, collects periods
  */
 
-static struct hist_entry *hist_entry__new(struct hist_entry *template,
-					  bool sample_self)
+static int hist_entry__init(struct hist_entry *he,
+			    struct hist_entry *template,
+			    bool sample_self)
 {
-	size_t callchain_size = 0;
-	struct hist_entry *he;
+	*he = *template;
 
-	if (symbol_conf.use_callchain)
-		callchain_size = sizeof(struct callchain_root);
+	if (symbol_conf.cumulate_callchain) {
+		he->stat_acc = malloc(sizeof(he->stat));
+		if (he->stat_acc == NULL)
+			return -ENOMEM;
+		memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+		if (!sample_self)
+			memset(&he->stat, 0, sizeof(he->stat));
+	}
 
-	he = zalloc(sizeof(*he) + callchain_size);
+	map__get(he->ms.map);
 
-	if (he != NULL) {
-		*he = *template;
+	if (he->branch_info) {
+		/*
+		 * This branch info is (a part of) allocated from
+		 * sample__resolve_bstack() and will be freed after
+		 * adding new entries.  So we need to save a copy.
+		 */
+		he->branch_info = malloc(sizeof(*he->branch_info));
+		if (he->branch_info == NULL) {
+			map__zput(he->ms.map);
+			free(he->stat_acc);
+			return -ENOMEM;
+		}
+
+		memcpy(he->branch_info, template->branch_info,
+		       sizeof(*he->branch_info));
+
+		map__get(he->branch_info->from.map);
+		map__get(he->branch_info->to.map);
+	}
 
-		if (symbol_conf.cumulate_callchain) {
-			he->stat_acc = malloc(sizeof(he->stat));
-			if (he->stat_acc == NULL) {
-				free(he);
-				return NULL;
+	if (he->mem_info) {
+		map__get(he->mem_info->iaddr.map);
+		map__get(he->mem_info->daddr.map);
+	}
+
+	if (symbol_conf.use_callchain)
+		callchain_init(he->callchain);
+
+	if (he->raw_data) {
+		he->raw_data = memdup(he->raw_data, he->raw_size);
+
+		if (he->raw_data == NULL) {
+			map__put(he->ms.map);
+			if (he->branch_info) {
+				map__put(he->branch_info->from.map);
+				map__put(he->branch_info->to.map);
+				free(he->branch_info);
+			}
+			if (he->mem_info) {
+				map__put(he->mem_info->iaddr.map);
+				map__put(he->mem_info->daddr.map);
 			}
-			memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
-			if (!sample_self)
-				memset(&he->stat, 0, sizeof(he->stat));
+			free(he->stat_acc);
+			return -ENOMEM;
 		}
+	}
+	INIT_LIST_HEAD(&he->pairs.node);
+	thread__get(he->thread);
 
-		map__get(he->ms.map);
+	if (!symbol_conf.report_hierarchy)
+		he->leaf = true;
 
-		if (he->branch_info) {
-			/*
-			 * This branch info is (a part of) allocated from
-			 * sample__resolve_bstack() and will be freed after
-			 * adding new entries.  So we need to save a copy.
-			 */
-			he->branch_info = malloc(sizeof(*he->branch_info));
-			if (he->branch_info == NULL) {
-				map__zput(he->ms.map);
-				free(he->stat_acc);
-				free(he);
-				return NULL;
-			}
+	return 0;
+}
 
-			memcpy(he->branch_info, template->branch_info,
-			       sizeof(*he->branch_info));
+static void *hist_entry__zalloc(size_t size)
+{
+	return zalloc(size + sizeof(struct hist_entry));
+}
 
-			map__get(he->branch_info->from.map);
-			map__get(he->branch_info->to.map);
-		}
+static void hist_entry__free(void *ptr)
+{
+	free(ptr);
+}
 
-		if (he->mem_info) {
-			map__get(he->mem_info->iaddr.map);
-			map__get(he->mem_info->daddr.map);
-		}
+static struct hist_entry_ops default_ops = {
+	.new	= hist_entry__zalloc,
+	.free	= hist_entry__free,
+};
 
-		if (symbol_conf.use_callchain)
-			callchain_init(he->callchain);
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+					  bool sample_self)
+{
+	struct hist_entry_ops *ops = template->ops;
+	size_t callchain_size = 0;
+	struct hist_entry *he;
+	int err = 0;
 
-		if (he->raw_data) {
-			he->raw_data = memdup(he->raw_data, he->raw_size);
+	if (!ops)
+		ops = template->ops = &default_ops;
 
-			if (he->raw_data == NULL) {
-				map__put(he->ms.map);
-				if (he->branch_info) {
-					map__put(he->branch_info->from.map);
-					map__put(he->branch_info->to.map);
-					free(he->branch_info);
-				}
-				if (he->mem_info) {
-					map__put(he->mem_info->iaddr.map);
-					map__put(he->mem_info->daddr.map);
-				}
-				free(he->stat_acc);
-				free(he);
-				return NULL;
-			}
-		}
-		INIT_LIST_HEAD(&he->pairs.node);
-		thread__get(he->thread);
+	if (symbol_conf.use_callchain)
+		callchain_size = sizeof(struct callchain_root);
 
-		if (!symbol_conf.report_hierarchy)
-			he->leaf = true;
+	he = ops->new(callchain_size);
+	if (he) {
+		err = hist_entry__init(he, template, sample_self);
+		if (err) {
+			ops->free(he);
+			he = NULL;
+		}
 	}
 
 	return he;
@@ -531,13 +559,15 @@ out:
 	return he;
 }
 
-struct hist_entry *__hists__add_entry(struct hists *hists,
-				      struct addr_location *al,
-				      struct symbol *sym_parent,
-				      struct branch_info *bi,
-				      struct mem_info *mi,
-				      struct perf_sample *sample,
-				      bool sample_self)
+static struct hist_entry*
+__hists__add_entry(struct hists *hists,
+		   struct addr_location *al,
+		   struct symbol *sym_parent,
+		   struct branch_info *bi,
+		   struct mem_info *mi,
+		   struct perf_sample *sample,
+		   bool sample_self,
+		   struct hist_entry_ops *ops)
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
@@ -564,11 +594,37 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
 		.transaction = sample->transaction,
 		.raw_data = sample->raw_data,
 		.raw_size = sample->raw_size,
+		.ops = ops,
 	};
 
 	return hists__findnew_entry(hists, &entry, al, sample_self);
 }
 
+struct hist_entry *hists__add_entry(struct hists *hists,
+				    struct addr_location *al,
+				    struct symbol *sym_parent,
+				    struct branch_info *bi,
+				    struct mem_info *mi,
+				    struct perf_sample *sample,
+				    bool sample_self)
+{
+	return __hists__add_entry(hists, al, sym_parent, bi, mi,
+				  sample, sample_self, NULL);
+}
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+					struct hist_entry_ops *ops,
+					struct addr_location *al,
+					struct symbol *sym_parent,
+					struct branch_info *bi,
+					struct mem_info *mi,
+					struct perf_sample *sample,
+					bool sample_self)
+{
+	return __hists__add_entry(hists, al, sym_parent, bi, mi,
+				  sample, sample_self, ops);
+}
+
 static int
 iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
 		    struct addr_location *al __maybe_unused)
@@ -622,8 +678,8 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al
 	 */
 	sample->period = cost;
 
-	he = __hists__add_entry(hists, al, iter->parent, NULL, mi,
-				sample, true);
+	he = hists__add_entry(hists, al, iter->parent, NULL, mi,
+			      sample, true);
 	if (!he)
 		return -ENOMEM;
 
@@ -727,8 +783,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
 	sample->period = 1;
 	sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;
 
-	he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
-				sample, true);
+	he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
+			      sample, true);
 	if (he == NULL)
 		return -ENOMEM;
 
@@ -764,8 +820,8 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location
 	struct perf_sample *sample = iter->sample;
 	struct hist_entry *he;
 
-	he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
-				sample, true);
+	he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+			      sample, true);
 	if (he == NULL)
 		return -ENOMEM;
 
@@ -825,8 +881,8 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
 	struct hist_entry *he;
 	int err = 0;
 
-	he = __hists__add_entry(hists, al, iter->parent, NULL, NULL,
-				sample, true);
+	he = hists__add_entry(hists, al, iter->parent, NULL, NULL,
+			      sample, true);
 	if (he == NULL)
 		return -ENOMEM;
 
@@ -900,8 +956,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
 		}
 	}
 
-	he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
-				sample, false);
+	he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+			      sample, false);
 	if (he == NULL)
 		return -ENOMEM;
 
@@ -1043,6 +1099,8 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 
 void hist_entry__delete(struct hist_entry *he)
 {
+	struct hist_entry_ops *ops = he->ops;
+
 	thread__zput(he->thread);
 	map__zput(he->ms.map);
 
@@ -1067,7 +1125,7 @@ void hist_entry__delete(struct hist_entry *he)
 	free_callchain(he->callchain);
 	free(he->trace_output);
 	free(he->raw_data);
-	free(he);
+	ops->free(he);
 }
 
 /*
@@ -1081,7 +1139,7 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
 				   struct perf_hpp_fmt *fmt, int printed)
 {
 	if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
-		const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists));
+		const int width = fmt->width(fmt, hpp, he->hists);
 		if (printed < width) {
 			advance_hpp(hpp, printed);
 			printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
@@ -2199,7 +2257,7 @@ size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
 	struct perf_evsel *pos;
 	size_t ret = 0;
 
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
 		ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp);
 	}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 7b54ccf1b737..49aa4fac148f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -10,6 +10,7 @@
 #include "ui/progress.h"
 
 struct hist_entry;
+struct hist_entry_ops;
 struct addr_location;
 struct symbol;
 
@@ -120,13 +121,23 @@ extern const struct hist_iter_ops hist_iter_branch;
 extern const struct hist_iter_ops hist_iter_mem;
 extern const struct hist_iter_ops hist_iter_cumulative;
 
-struct hist_entry *__hists__add_entry(struct hists *hists,
-				      struct addr_location *al,
-				      struct symbol *parent,
-				      struct branch_info *bi,
-				      struct mem_info *mi,
-				      struct perf_sample *sample,
-				      bool sample_self);
+struct hist_entry *hists__add_entry(struct hists *hists,
+				    struct addr_location *al,
+				    struct symbol *parent,
+				    struct branch_info *bi,
+				    struct mem_info *mi,
+				    struct perf_sample *sample,
+				    bool sample_self);
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+					struct hist_entry_ops *ops,
+					struct addr_location *al,
+					struct symbol *sym_parent,
+					struct branch_info *bi,
+					struct mem_info *mi,
+					struct perf_sample *sample,
+					bool sample_self);
+
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 			 int max_stack_depth, void *arg);
 
@@ -159,7 +170,8 @@ void events_stats__inc(struct events_stats *stats, u32 type);
 size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
 
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
-		      int max_cols, float min_pcnt, FILE *fp);
+		      int max_cols, float min_pcnt, FILE *fp,
+		      bool use_callchain);
 size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp);
 
 void hists__filter_by_dso(struct hists *hists);
@@ -214,9 +226,9 @@ struct perf_hpp {
 struct perf_hpp_fmt {
 	const char *name;
 	int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-		      struct perf_evsel *evsel);
+		      struct hists *hists);
 	int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-		     struct perf_evsel *evsel);
+		     struct hists *hists);
 	int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 		     struct hist_entry *he);
 	int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
diff --git a/tools/perf/util/include/asm/byteorder.h b/tools/perf/util/include/asm/byteorder.h
deleted file mode 100644
index 2a9bdc066307..000000000000
--- a/tools/perf/util/include/asm/byteorder.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <asm/types.h>
-#include "../../../../include/uapi/linux/swab.h"
diff --git a/tools/perf/util/include/asm/unistd_32.h b/tools/perf/util/include/asm/unistd_32.h
deleted file mode 100644
index 8b137891791f..000000000000
--- a/tools/perf/util/include/asm/unistd_32.h
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tools/perf/util/include/asm/unistd_64.h b/tools/perf/util/include/asm/unistd_64.h
deleted file mode 100644
index 8b137891791f..000000000000
--- a/tools/perf/util/include/asm/unistd_64.h
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tools/perf/util/include/linux/const.h b/tools/perf/util/include/linux/const.h
deleted file mode 100644
index c10a35e1afb8..000000000000
--- a/tools/perf/util/include/linux/const.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../../include/uapi/linux/const.h"
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 9df996085563..749e6f2e37ca 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -422,7 +422,8 @@ static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
 }
 
 static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
-				    struct auxtrace_buffer *buffer)
+				    struct auxtrace_buffer *buffer,
+				    struct thread *thread)
 {
 	struct branch *branch;
 	size_t sz, bsz = sizeof(struct branch);
@@ -444,6 +445,12 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
 		if (!branch->from && !branch->to)
 			continue;
 		intel_bts_get_branch_type(btsq, branch);
+		if (btsq->bts->synth_opts.thread_stack)
+			thread_stack__event(thread, btsq->sample_flags,
+					    le64_to_cpu(branch->from),
+					    le64_to_cpu(branch->to),
+					    btsq->intel_pt_insn.length,
+					    buffer->buffer_nr + 1);
 		if (filter && !(filter & btsq->sample_flags))
 			continue;
 		err = intel_bts_synth_branch_sample(btsq, branch);
@@ -507,12 +514,13 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
 		goto out_put;
 	}
 
-	if (!btsq->bts->synth_opts.callchain && thread &&
+	if (!btsq->bts->synth_opts.callchain &&
+	    !btsq->bts->synth_opts.thread_stack && thread &&
 	    (!old_buffer || btsq->bts->sampling_mode ||
 	     (btsq->bts->snapshot_mode && !buffer->consecutive)))
 		thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
 
-	err = intel_bts_process_buffer(btsq, buffer);
+	err = intel_bts_process_buffer(btsq, buffer, thread);
 
 	auxtrace_buffer__drop_data(buffer);
 
@@ -777,7 +785,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
 	u64 id;
 	int err;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type == bts->pmu_type && evsel->ids) {
 			found = true;
 			break;
@@ -905,10 +913,14 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
 	if (dump_trace)
 		return 0;
 
-	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		bts->synth_opts = *session->itrace_synth_opts;
-	else
+	} else {
 		itrace_synth_opts__set_default(&bts->synth_opts);
+		if (session->itrace_synth_opts)
+			bts->synth_opts.thread_stack =
+				session->itrace_synth_opts->thread_stack;
+	}
 
 	if (bts->synth_opts.calls)
 		bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
index 0611d619a42e..9b742ea8bfe8 100644
--- a/tools/perf/util/intel-pt-decoder/Build
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -7,8 +7,11 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table
 	$(call rule_mkdir)
 	@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
 
+# Busybox's diff doesn't have -I, avoid warning in the case
+
 $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
-	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \
+	@(diff -I 2>&1 | grep -q 'option requires an argument' && \
+	test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \
 	diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
 	diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
 	diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
index 517567347aac..54e961659514 100644
--- a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
+++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
@@ -72,12 +72,14 @@ BEGIN {
 	lprefix_expr = "\\((66|F2|F3)\\)"
 	max_lprefix = 4
 
-	# All opcodes starting with lower-case 'v' or with (v1) superscript
+	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
 	# accepts VEX prefix
-	vexok_opcode_expr = "^v.*"
+	vexok_opcode_expr = "^[vk].*"
 	vexok_expr = "\\(v1\\)"
 	# All opcodes with (v) superscript supports *only* VEX prefix
 	vexonly_expr = "\\(v\\)"
+	# All opcodes with (ev) superscript supports *only* EVEX prefix
+	evexonly_expr = "\\(ev\\)"
 
 	prefix_expr = "\\(Prefix\\)"
 	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -95,6 +97,7 @@ BEGIN {
 	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
 	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
 	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+	prefix_num["EVEX"] = "INAT_PFX_EVEX"
 
 	clear_vars()
 }
@@ -319,7 +322,9 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 			flags = add_flags(flags, "INAT_MODRM")
 
 		# check VEX codes
-		if (match(ext, vexonly_expr))
+		if (match(ext, evexonly_expr))
+			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+		else if (match(ext, vexonly_expr))
 			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
 		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
 			flags = add_flags(flags, "INAT_VEXOK")
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
index 611645e903a8..125ecd2a300d 100644
--- a/tools/perf/util/intel-pt-decoder/inat.h
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -48,6 +48,7 @@
 /* AVX VEX prefixes */
 #define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
 #define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX	15	/* EVEX prefix */
 
 #define INAT_LSTPFX_MAX	3
 #define INAT_LGCPFX_MAX	11
@@ -89,6 +90,7 @@
 #define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
 #define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
 #define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY	(1 << (INAT_FLAG_OFFS + 7))
 /* Attribute making macros for attribute tables */
 #define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
 #define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
@@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr)
 static inline int inat_is_vex_prefix(insn_attr_t attr)
 {
 	attr &= INAT_PFX_MASK;
-	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+	       attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
 }
 
 static inline int inat_is_vex3_prefix(insn_attr_t attr)
@@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
 
 static inline int inat_must_vex(insn_attr_t attr)
 {
-	return attr & INAT_VEXONLY;
+	return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+	return attr & INAT_EVEXONLY;
 }
 #endif
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
index 9f26eae6c9f0..ca983e2bea8b 100644
--- a/tools/perf/util/intel-pt-decoder/insn.c
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -155,14 +155,24 @@ found:
 			/*
 			 * In 32-bits mode, if the [7:6] bits (mod bits of
 			 * ModRM) on the second byte are not 11b, it is
-			 * LDS or LES.
+			 * LDS or LES or BOUND.
 			 */
 			if (X86_MODRM_MOD(b2) != 3)
 				goto vex_end;
 		}
 		insn->vex_prefix.bytes[0] = b;
 		insn->vex_prefix.bytes[1] = b2;
-		if (inat_is_vex3_prefix(attr)) {
+		if (inat_is_evex_prefix(attr)) {
+			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+			insn->vex_prefix.bytes[2] = b2;
+			b2 = peek_nbyte_next(insn_byte_t, insn, 3);
+			insn->vex_prefix.bytes[3] = b2;
+			insn->vex_prefix.nbytes = 4;
+			insn->next_byte += 4;
+			if (insn->x86_64 && X86_VEX_W(b2))
+				/* VEX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		} else if (inat_is_vex3_prefix(attr)) {
 			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
 			insn->vex_prefix.bytes[2] = b2;
 			insn->vex_prefix.nbytes = 3;
@@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn)
 		m = insn_vex_m_bits(insn);
 		p = insn_vex_p_bits(insn);
 		insn->attr = inat_get_avx_attribute(op, m, p);
-		if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
+		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+		    (!inat_accept_vex(insn->attr) &&
+		     !inat_is_group(insn->attr)))
 			insn->attr = 0;	/* This instruction is bad */
 		goto end;	/* VEX has only 1 byte for opcode */
 	}
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
index dd12da0f4593..e23578c7b1be 100644
--- a/tools/perf/util/intel-pt-decoder/insn.h
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -91,6 +91,7 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
+#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
@@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn)
 	return (insn->vex_prefix.value != 0);
 }
 
+static inline int insn_is_evex(struct insn *insn)
+{
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return (insn->vex_prefix.nbytes == 4);
+}
+
 /* Ensure this instruction is decoded completely */
 static inline int insn_complete(struct insn *insn)
 {
@@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
 	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
 		return X86_VEX2_M;
-	else
+	else if (insn->vex_prefix.nbytes == 3)	/* 3 bytes VEX */
 		return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+	else					/* EVEX */
+		return X86_EVEX_M(insn->vex_prefix.bytes[1]);
 }
 
 static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index d388de72eaca..ec378cd7b71e 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -13,12 +13,17 @@
 # opcode: escape # escaped-name
 # EndTable
 #
+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
+# mnemonics that begin with lowercase 'k' accept a VEX prefix
+#
 #<group maps>
 # GrpTable: GrpXXX
 # reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
 # EndTable
 #
 # AVX Superscripts
+#  (ev): this opcode requires EVEX prefix.
+#  (evo): this opcode is changed by EVEX prefix (EVEX opcode)
 #  (v): this opcode requires VEX prefix.
 #  (v1): this opcode only supports 128bit VEX.
 #
@@ -137,7 +142,7 @@ AVXcode:
 # 0x60 - 0x6f
 60: PUSHA/PUSHAD (i64)
 61: POPA/POPAD (i64)
-62: BOUND Gv,Ma (i64)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix)
 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
 64: SEG=FS (Prefix)
 65: SEG=GS (Prefix)
@@ -399,17 +404,17 @@ AVXcode: 1
 3f:
 # 0x0f 0x40-0x4f
 40: CMOVO Gv,Ev
-41: CMOVNO Gv,Ev
-42: CMOVB/C/NAE Gv,Ev
+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
 43: CMOVAE/NB/NC Gv,Ev
-44: CMOVE/Z Gv,Ev
-45: CMOVNE/NZ Gv,Ev
-46: CMOVBE/NA Gv,Ev
-47: CMOVA/NBE Gv,Ev
+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
 48: CMOVS Gv,Ev
 49: CMOVNS Gv,Ev
-4a: CMOVP/PE Gv,Ev
-4b: CMOVNP/PO Gv,Ev
+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
 4c: CMOVL/NGE Gv,Ev
 4d: CMOVNL/GE Gv,Ev
 4e: CMOVLE/NG Gv,Ev
@@ -426,7 +431,7 @@ AVXcode: 1
 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
-5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
@@ -447,7 +452,7 @@ AVXcode: 1
 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
-6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
 # 0x0f 0x70-0x7f
 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
 71: Grp12 (1A)
@@ -458,14 +463,14 @@ AVXcode: 1
 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
 # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
 77: emms | vzeroupper | vzeroall
-78: VMREAD Ey,Gy
-79: VMWRITE Gy,Ey
-7a:
-7b:
+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
-7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
 # 0x0f 0x80-0x8f
 # Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 80: JO Jz (f64)
@@ -485,16 +490,16 @@ AVXcode: 1
 8e: JLE/JNG Jz (f64)
 8f: JNLE/JG Jz (f64)
 # 0x0f 0x90-0x9f
-90: SETO Eb
-91: SETNO Eb
-92: SETB/C/NAE Eb
-93: SETAE/NB/NC Eb
+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
 94: SETE/Z Eb
 95: SETNE/NZ Eb
 96: SETBE/NA Eb
 97: SETA/NBE Eb
-98: SETS Eb
-99: SETNS Eb
+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
 9a: SETP/PE Eb
 9b: SETNP/PO Eb
 9c: SETL/NGE Eb
@@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
 d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
 d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
 da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
-db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
 dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
 dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
 de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
-df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0xe0-0xef
 e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
 e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
@@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
 e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
 e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
 e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
-e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
 e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
 e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
 e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
 ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
-eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
 ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
 ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
 ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
-ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0xf0-0xff
 f0: vlddqu Vx,Mx (F2)
 f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
@@ -626,81 +631,105 @@ AVXcode: 2
 0e: vtestps Vx,Wx (66),(v)
 0f: vtestpd Vx,Wx (66),(v)
 # 0x0f 0x38 0x10-0x1f
-10: pblendvb Vdq,Wdq (66)
-11:
-12:
-13: vcvtph2ps Vx,Wx,Ib (66),(v)
-14: blendvps Vdq,Wdq (66)
-15: blendvpd Vdq,Wdq (66)
-16: vpermps Vqq,Hqq,Wqq (66),(v)
+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
 17: vptest Vx,Wx (66)
 18: vbroadcastss Vx,Wd (66),(v)
-19: vbroadcastsd Vqq,Wq (66),(v)
-1a: vbroadcastf128 Vqq,Mdq (66),(v)
-1b:
+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
-1f:
+1f: vpabsq Vx,Wx (66),(ev)
 # 0x0f 0x38 0x20-0x2f
-20: vpmovsxbw Vx,Ux/Mq (66),(v1)
-21: vpmovsxbd Vx,Ux/Md (66),(v1)
-22: vpmovsxbq Vx,Ux/Mw (66),(v1)
-23: vpmovsxwd Vx,Ux/Mq (66),(v1)
-24: vpmovsxwq Vx,Ux/Md (66),(v1)
-25: vpmovsxdq Vx,Ux/Mq (66),(v1)
-26:
-27:
-28: vpmuldq Vx,Hx,Wx (66),(v1)
-29: vpcmpeqq Vx,Hx,Wx (66),(v1)
-2a: vmovntdqa Vx,Mx (66),(v1)
+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
 2b: vpackusdw Vx,Hx,Wx (66),(v1)
-2c: vmaskmovps Vx,Hx,Mx (66),(v)
-2d: vmaskmovpd Vx,Hx,Mx (66),(v)
+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
 2e: vmaskmovps Mx,Hx,Vx (66),(v)
 2f: vmaskmovpd Mx,Hx,Vx (66),(v)
 # 0x0f 0x38 0x30-0x3f
-30: vpmovzxbw Vx,Ux/Mq (66),(v1)
-31: vpmovzxbd Vx,Ux/Md (66),(v1)
-32: vpmovzxbq Vx,Ux/Mw (66),(v1)
-33: vpmovzxwd Vx,Ux/Mq (66),(v1)
-34: vpmovzxwq Vx,Ux/Md (66),(v1)
-35: vpmovzxdq Vx,Ux/Mq (66),(v1)
-36: vpermd Vqq,Hqq,Wqq (66),(v)
+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
 37: vpcmpgtq Vx,Hx,Wx (66),(v1)
-38: vpminsb Vx,Hx,Wx (66),(v1)
-39: vpminsd Vx,Hx,Wx (66),(v1)
-3a: vpminuw Vx,Hx,Wx (66),(v1)
-3b: vpminud Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
 3c: vpmaxsb Vx,Hx,Wx (66),(v1)
-3d: vpmaxsd Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
 3e: vpmaxuw Vx,Hx,Wx (66),(v1)
-3f: vpmaxud Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0x38 0x40-0x8f
-40: vpmulld Vx,Hx,Wx (66),(v1)
+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
 41: vphminposuw Vdq,Wdq (66),(v1)
-42:
-43:
-44:
+42: vgetexpps/d Vx,Wx (66),(ev)
+43: vgetexpss/d Vx,Hx,Wx (66),(ev)
+44: vplzcntd/q Vx,Wx (66),(ev)
 45: vpsrlvd/q Vx,Hx,Wx (66),(v)
-46: vpsravd Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
 47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x57
+# Skip 0x48-0x4b
+4c: vrcp14ps/d Vpd,Wpd (66),(ev)
+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
+# Skip 0x50-0x57
 58: vpbroadcastd Vx,Wx (66),(v)
-59: vpbroadcastq Vx,Wx (66),(v)
-5a: vbroadcasti128 Vqq,Mdq (66),(v)
-# Skip 0x5b-0x77
+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
+# Skip 0x5c-0x63
+64: vpblendmd/q Vx,Hx,Wx (66),(ev)
+65: vblendmps/d Vx,Hx,Wx (66),(ev)
+66: vpblendmb/w Vx,Hx,Wx (66),(ev)
+# Skip 0x67-0x74
+75: vpermi2b/w Vx,Hx,Wx (66),(ev)
+76: vpermi2d/q Vx,Hx,Wx (66),(ev)
+77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
 78: vpbroadcastb Vx,Wx (66),(v)
 79: vpbroadcastw Vx,Wx (66),(v)
-# Skip 0x7a-0x7f
+7a: vpbroadcastb Vx,Rv (66),(ev)
+7b: vpbroadcastw Vx,Rv (66),(ev)
+7c: vpbroadcastd/q Vx,Rv (66),(ev)
+7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
+7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
 81: INVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
+83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+88: vexpandps/d Vpd,Wpd (66),(ev)
+89: vpexpandd/q Vx,Wx (66),(ev)
+8a: vcompressps/d Wx,Vx (66),(ev)
+8b: vpcompressd/q Wx,Vx (66),(ev)
 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8d: vpermb/w Vx,Hx,Wx (66),(ev)
 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
 # 0x0f 0x38 0x90-0xbf (FMA)
-90: vgatherdd/q Vx,Hx,Wx (66),(v)
-91: vgatherqd/q Vx,Hx,Wx (66),(v)
+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
 92: vgatherdps/d Vx,Hx,Wx (66),(v)
 93: vgatherqps/d Vx,Hx,Wx (66),(v)
 94:
@@ -715,6 +744,10 @@ AVXcode: 2
 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a0: vpscatterdd/q Wx,Vx (66),(ev)
+a1: vpscatterqd/q Wx,Vx (66),(ev)
+a2: vscatterdps/d Wx,Vx (66),(ev)
+a3: vscatterqps/d Wx,Vx (66),(ev)
 a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
 a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
 a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
@@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
 ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
 ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
 af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
+b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
 b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
 b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
 b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
@@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
 be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
 bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
 # 0x0f 0x38 0xc0-0xff
-c8: sha1nexte Vdq,Wdq
+c4: vpconflictd/q Vx,Wx (66),(ev)
+c6: Grp18 (1A)
+c7: Grp19 (1A)
+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
 c9: sha1msg1 Vdq,Wdq
-ca: sha1msg2 Vdq,Wdq
-cb: sha256rnds2 Vdq,Wdq
-cc: sha256msg1 Vdq,Wdq
-cd: sha256msg2 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
 db: VAESIMC Vdq,Wdq (66),(v1)
 dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
 dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@@ -763,15 +801,15 @@ AVXcode: 3
 00: vpermq Vqq,Wqq,Ib (66),(v)
 01: vpermpd Vqq,Wqq,Ib (66),(v)
 02: vpblendd Vx,Hx,Wx,Ib (66),(v)
-03:
+03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
 04: vpermilps Vx,Wx,Ib (66),(v)
 05: vpermilpd Vx,Wx,Ib (66),(v)
 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
 07:
-08: vroundps Vx,Wx,Ib (66)
-09: vroundpd Vx,Wx,Ib (66)
-0a: vroundss Vss,Wss,Ib (66),(v1)
-0b: vroundsd Vsd,Wsd,Ib (66),(v1)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
 0c: vblendps Vx,Hx,Wx,Ib (66)
 0d: vblendpd Vx,Hx,Wx,Ib (66)
 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
@@ -780,26 +818,51 @@ AVXcode: 3
 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
 16: vpextrd/q Ey,Vdq,Ib (66),(v1)
 17: vextractps Ed,Vdq,Ib (66),(v1)
-18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
-19: vextractf128 Wdq,Vqq,Ib (66),(v)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
 1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
-38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
-39: vextracti128 Wdq,Vqq,Ib (66),(v)
+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+30: kshiftrb/w Vk,Uk,Ib (66),(v)
+31: kshiftrd/q Vk,Uk,Ib (66),(v)
+32: kshiftlb/w Vk,Uk,Ib (66),(v)
+33: kshiftld/q Vk,Uk,Ib (66),(v)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
 40: vdpps Vx,Hx,Wx,Ib (66)
 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
-42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
 4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
 f0: RORX Gy,Ey,Ib (F2),(v)
@@ -927,8 +990,10 @@ GrpTable: Grp12
 EndTable
 
 GrpTable: Grp13
+0: vprord/q Hx,Wx,Ib (66),(ev)
+1: vprold/q Hx,Wx,Ib (66),(ev)
 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
-4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
 EndTable
 
@@ -963,6 +1028,20 @@ GrpTable: Grp17
 3: BLSI By,Ey (v)
 EndTable
 
+GrpTable: Grp18
+1: vgatherpf0dps/d Wx (66),(ev)
+2: vgatherpf1dps/d Wx (66),(ev)
+5: vscatterpf0dps/d Wx (66),(ev)
+6: vscatterpf1dps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp19
+1: vgatherpf0qps/d Wx (66),(ev)
+2: vgatherpf1qps/d Wx (66),(ev)
+5: vscatterpf0qps/d Wx (66),(ev)
+6: vscatterpf1qps/d Wx (66),(ev)
+EndTable
+
 # AMD's Prefetch Group
 GrpTable: GrpP
 0: PREFETCH
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 137196990012..551ff6f640be 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -39,6 +39,7 @@
 #include "auxtrace.h"
 #include "tsc.h"
 #include "intel-pt.h"
+#include "config.h"
 
 #include "intel-pt-decoder/intel-pt-log.h"
 #include "intel-pt-decoder/intel-pt-decoder.h"
@@ -556,7 +557,7 @@ static bool intel_pt_exclude_kernel(struct intel_pt *pt)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(pt->session->evlist, evsel) {
+	evlist__for_each_entry(pt->session->evlist, evsel) {
 		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
 		    !evsel->attr.exclude_kernel)
 			return false;
@@ -572,7 +573,7 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
 	if (!pt->noretcomp_bit)
 		return true;
 
-	evlist__for_each(pt->session->evlist, evsel) {
+	evlist__for_each_entry(pt->session->evlist, evsel) {
 		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
 		    (config & pt->noretcomp_bit))
 			return false;
@@ -592,7 +593,7 @@ static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
 	for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
 		config >>= 1;
 
-	evlist__for_each(pt->session->evlist, evsel) {
+	evlist__for_each_entry(pt->session->evlist, evsel) {
 		if (intel_pt_get_config(pt, &evsel->attr, &config))
 			return (config & pt->mtc_freq_bits) >> shift;
 	}
@@ -608,7 +609,7 @@ static bool intel_pt_timeless_decoding(struct intel_pt *pt)
 	if (!pt->tsc_bit || !pt->cap_user_time_zero)
 		return true;
 
-	evlist__for_each(pt->session->evlist, evsel) {
+	evlist__for_each_entry(pt->session->evlist, evsel) {
 		if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
 			return true;
 		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
@@ -625,7 +626,7 @@ static bool intel_pt_tracing_kernel(struct intel_pt *pt)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(pt->session->evlist, evsel) {
+	evlist__for_each_entry(pt->session->evlist, evsel) {
 		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
 		    !evsel->attr.exclude_kernel)
 			return true;
@@ -642,7 +643,7 @@ static bool intel_pt_have_tsc(struct intel_pt *pt)
 	if (!pt->tsc_bit)
 		return false;
 
-	evlist__for_each(pt->session->evlist, evsel) {
+	evlist__for_each_entry(pt->session->evlist, evsel) {
 		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
 			if (config & pt->tsc_bit)
 				have_tsc = true;
@@ -1233,7 +1234,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 	if (!(state->type & INTEL_PT_BRANCH))
 		return 0;
 
-	if (pt->synth_opts.callchain)
+	if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
 		thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
 				    state->to_ip, ptq->insn_len,
 				    state->trace_nr);
@@ -1850,7 +1851,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 	u64 id;
 	int err;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type == pt->pmu_type && evsel->ids) {
 			found = true;
 			break;
@@ -1930,7 +1931,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		pt->sample_transactions = true;
 		pt->transactions_id = id;
 		id += 1;
-		evlist__for_each(evlist, evsel) {
+		evlist__for_each_entry(evlist, evsel) {
 			if (evsel->id && evsel->id[0] == pt->transactions_id) {
 				if (evsel->name)
 					zfree(&evsel->name);
@@ -1968,7 +1969,7 @@ static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each_reverse(evlist, evsel) {
+	evlist__for_each_entry_reverse(evlist, evsel) {
 		const char *name = perf_evsel__name(evsel);
 
 		if (!strcmp(name, "sched:sched_switch"))
@@ -1982,7 +1983,7 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.context_switch)
 			return true;
 	}
@@ -2136,6 +2137,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 			pt->synth_opts.branches = false;
 			pt->synth_opts.callchain = true;
 		}
+		if (session->itrace_synth_opts)
+			pt->synth_opts.thread_stack =
+				session->itrace_synth_opts->thread_stack;
 	}
 
 	if (pt->synth_opts.log)
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
index aa6877d36858..020b9ca1b47e 100644
--- a/tools/perf/util/intlist.h
+++ b/tools/perf/util/intlist.h
@@ -57,21 +57,21 @@ static inline struct int_node *intlist__next(struct int_node *in)
 }
 
 /**
- * intlist_for_each      - iterate over a intlist
+ * intlist__for_each_entry      - iterate over a intlist
  * @pos:	the &struct int_node to use as a loop cursor.
  * @ilist:	the &struct intlist for loop.
  */
-#define intlist__for_each(pos, ilist)	\
+#define intlist__for_each_entry(pos, ilist)	\
 	for (pos = intlist__first(ilist); pos; pos = intlist__next(pos))
 
 /**
- * intlist_for_each_safe - iterate over a intlist safe against removal of
+ * intlist__for_each_entry_safe - iterate over a intlist safe against removal of
  *                         int_node
  * @pos:	the &struct int_node to use as a loop cursor.
  * @n:		another &struct int_node to use as temporary storage.
  * @ilist:	the &struct intlist for loop.
  */
-#define intlist__for_each_safe(pos, n, ilist)	\
+#define intlist__for_each_entry_safe(pos, n, ilist)	\
 	for (pos = intlist__first(ilist), n = intlist__next(pos); pos;\
 	     pos = n, n = intlist__next(n))
 #endif /* __PERF_INTLIST_H */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 86afe9618bb0..9f3305f6b6d5 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -108,7 +108,7 @@ jit_validate_events(struct perf_session *session)
 	/*
 	 * check that all events use CLOCK_MONOTONIC
 	 */
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC)
 			return -1;
 	}
diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c
index e521d1516df6..f616e4f65b67 100644
--- a/tools/perf/util/levenshtein.c
+++ b/tools/perf/util/levenshtein.c
@@ -1,5 +1,7 @@
-#include "cache.h"
 #include "levenshtein.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
 
 /*
  * This function implements the Damerau-Levenshtein algorithm to
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
new file mode 100644
index 000000000000..6559bc586643
--- /dev/null
+++ b/tools/perf/util/libunwind/arm64.c
@@ -0,0 +1,40 @@
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each arm64 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_ARM64_PC
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-aarch64.h"
+#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#include "../../arch/arm64/util/unwind-libunwind.c"
+
+/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
+ * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64
+ * unwind methods.
+ */
+#undef NO_LIBUNWIND_DEBUG_FRAME
+#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+arm64_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c
new file mode 100644
index 000000000000..957ffff72428
--- /dev/null
+++ b/tools/perf/util/libunwind/x86_32.c
@@ -0,0 +1,43 @@
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each x86 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_X86_IP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-x86.h"
+#include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
+
+/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c'
+ * for x86_32, we undef it to compile code for x86_32 only.
+ */
+#undef HAVE_ARCH_X86_64_SUPPORT
+#include "../../arch/x86/util/unwind-libunwind.c"
+
+/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no
+ * dwarf_find_debug_frame() function.
+ */
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 33071d6159bc..bf7216b8731d 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -3,11 +3,14 @@
  * Copyright (C) 2015, Huawei Inc.
  */
 
+#include <errno.h>
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "debug.h"
 #include "llvm-utils.h"
+#include "config.h"
+#include "util.h"
 
 #define CLANG_BPF_CMD_DEFAULT_TEMPLATE				\
 		"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
@@ -42,6 +45,8 @@ int perf_llvm_config(const char *var, const char *value)
 		llvm_param.kbuild_dir = strdup(value);
 	else if (!strcmp(var, "kbuild-opts"))
 		llvm_param.kbuild_opts = strdup(value);
+	else if (!strcmp(var, "dump-obj"))
+		llvm_param.dump_obj = !!perf_config_bool(var, value);
 	else
 		return -1;
 	llvm_param.user_set_param = true;
@@ -103,7 +108,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
 	file = popen(cmd, "r");
 	if (!file) {
 		pr_err("ERROR: unable to popen cmd: %s\n",
-		       strerror_r(errno, serr, sizeof(serr)));
+		       str_error_r(errno, serr, sizeof(serr)));
 		return -EINVAL;
 	}
 
@@ -137,7 +142,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
 
 	if (ferror(file)) {
 		pr_err("ERROR: error occurred when reading from pipe: %s\n",
-		       strerror_r(errno, serr, sizeof(serr)));
+		       str_error_r(errno, serr, sizeof(serr)));
 		err = -EIO;
 		goto errout;
 	}
@@ -326,6 +331,42 @@ get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts)
 	pr_debug("include option is set to %s\n", *kbuild_include_opts);
 }
 
+static void
+dump_obj(const char *path, void *obj_buf, size_t size)
+{
+	char *obj_path = strdup(path);
+	FILE *fp;
+	char *p;
+
+	if (!obj_path) {
+		pr_warning("WARNING: No enough memory, skip object dumping\n");
+		return;
+	}
+
+	p = strrchr(obj_path, '.');
+	if (!p || (strcmp(p, ".c") != 0)) {
+		pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n",
+			   obj_path);
+		goto out;
+	}
+
+	p[1] = 'o';
+	fp = fopen(obj_path, "wb");
+	if (!fp) {
+		pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n",
+			   obj_path, strerror(errno));
+		goto out;
+	}
+
+	pr_info("LLVM: dumping %s\n", obj_path);
+	if (fwrite(obj_buf, size, 1, fp) != 1)
+		pr_warning("WARNING: failed to write to file '%s': %s, skip object dumping\n",
+			   obj_path, strerror(errno));
+	fclose(fp);
+out:
+	free(obj_path);
+}
+
 int llvm__compile_bpf(const char *path, void **p_obj_buf,
 		      size_t *p_obj_buf_sz)
 {
@@ -343,7 +384,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	if (path[0] != '-' && realpath(path, abspath) == NULL) {
 		err = errno;
 		pr_err("ERROR: problems with path %s: %s\n",
-		       path, strerror_r(err, serr, sizeof(serr)));
+		       path, str_error_r(err, serr, sizeof(serr)));
 		return -err;
 	}
 
@@ -371,7 +412,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	if (nr_cpus_avail <= 0) {
 		pr_err(
 "WARNING:\tunable to get available CPUs in this system: %s\n"
-"        \tUse 128 instead.\n", strerror_r(errno, serr, sizeof(serr)));
+"        \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr)));
 		nr_cpus_avail = 128;
 	}
 	snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
@@ -411,6 +452,10 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 
 	free(kbuild_dir);
 	free(kbuild_include_opts);
+
+	if (llvm_param.dump_obj)
+		dump_obj(path, obj_buf, obj_buf_sz);
+
 	if (!p_obj_buf)
 		free(obj_buf);
 	else
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
index 23b9a743fe72..9f501cef06a1 100644
--- a/tools/perf/util/llvm-utils.h
+++ b/tools/perf/util/llvm-utils.h
@@ -29,6 +29,11 @@ struct llvm_param {
 	 * compiling. Should not be used for dynamic compiling.
 	 */
 	const char *kbuild_opts;
+	/*
+	 * Default is false. If set to true, write compiling result
+	 * to object file.
+	 */
+	bool dump_obj;
 	/*
 	 * Default is false. If one of the above fields is set by user
 	 * explicitly then user_set_llvm is set to true. This is used
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b1772180c820..bc2cdbd09a25 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -138,8 +138,10 @@ void machine__exit(struct machine *machine)
 
 void machine__delete(struct machine *machine)
 {
-	machine__exit(machine);
-	free(machine);
+	if (machine) {
+		machine__exit(machine);
+		free(machine);
+	}
 }
 
 void machines__init(struct machines *machines)
@@ -1353,11 +1355,16 @@ int machine__process_mmap2_event(struct machine *machine,
 	if (map == NULL)
 		goto out_problem_map;
 
-	thread__insert_map(thread, map);
+	ret = thread__insert_map(thread, map);
+	if (ret)
+		goto out_problem_insert;
+
 	thread__put(thread);
 	map__put(map);
 	return 0;
 
+out_problem_insert:
+	map__put(map);
 out_problem_map:
 	thread__put(thread);
 out_problem:
@@ -1403,11 +1410,16 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 	if (map == NULL)
 		goto out_problem_map;
 
-	thread__insert_map(thread, map);
+	ret = thread__insert_map(thread, map);
+	if (ret)
+		goto out_problem_insert;
+
 	thread__put(thread);
 	map__put(map);
 	return 0;
 
+out_problem_insert:
+	map__put(map);
 out_problem_map:
 	thread__put(thread);
 out_problem:
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index b19bcd3b7128..728129ac653a 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -15,6 +15,7 @@
 #include "debug.h"
 #include "machine.h"
 #include <linux/string.h>
+#include "unwind.h"
 
 static void __maps__insert(struct maps *maps, struct map *map);
 
@@ -311,6 +312,9 @@ int map__load(struct map *map, symbol_filter_t filter)
 			pr_warning("%.*s was updated (is prelink enabled?). "
 				"Restart the long running apps that use it!\n",
 				   (int)real_len, name);
+		} else if (filter) {
+			pr_warning("no symbols passed the given filter.\n");
+			return -2;	/* Empty but maybe by the filter */
 		} else {
 			pr_warning("no symbols found in %s, maybe install "
 				   "a debug package?\n", name);
@@ -744,9 +748,10 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 /*
  * XXX This should not really _copy_ te maps, but refcount them.
  */
-int map_groups__clone(struct map_groups *mg,
+int map_groups__clone(struct thread *thread,
 		      struct map_groups *parent, enum map_type type)
 {
+	struct map_groups *mg = thread->mg;
 	int err = -ENOMEM;
 	struct map *map;
 	struct maps *maps = &parent->maps[type];
@@ -757,6 +762,11 @@ int map_groups__clone(struct map_groups *mg,
 		struct map *new = map__clone(map);
 		if (new == NULL)
 			goto out_unlock;
+
+		err = unwind__prepare_access(thread, new, NULL);
+		if (err)
+			goto out_unlock;
+
 		map_groups__insert(mg, new);
 		map__put(new);
 	}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 7309d64ce39e..d83396ceecba 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -194,7 +194,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
                                          struct map **mapp, symbol_filter_t filter);
 void map_groups__init(struct map_groups *mg, struct machine *machine);
 void map_groups__exit(struct map_groups *mg);
-int map_groups__clone(struct map_groups *mg,
+int map_groups__clone(struct thread *thread,
 		      struct map_groups *parent, enum map_type type);
 size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
 
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 75465f89a413..bbc368e7d1e4 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -10,18 +10,33 @@
 #include "debug.h"
 #include "symbol.h"
 
+unsigned int perf_mem_events__loads_ldlat = 30;
+
 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
 
 struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
-	E("ldlat-loads",	"cpu/mem-loads,ldlat=30/P",	"mem-loads"),
+	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"mem-loads"),
 	E("ldlat-stores",	"cpu/mem-stores/P",		"mem-stores"),
 };
 #undef E
 
 #undef E
 
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
 char *perf_mem_events__name(int i)
 {
+	if (i == PERF_MEM_EVENTS__LOAD) {
+		if (!mem_loads_name__init) {
+			mem_loads_name__init = true;
+			scnprintf(mem_loads_name, sizeof(mem_loads_name),
+				  perf_mem_events[i].name,
+				  perf_mem_events__loads_ldlat);
+		}
+		return mem_loads_name;
+	}
+
 	return (char *)perf_mem_events[i].name;
 }
 
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 5d6d93066a6e..7f69bf9d789d 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -18,6 +18,7 @@ enum {
 };
 
 extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+extern unsigned int perf_mem_events__loads_ldlat;
 
 int perf_mem_events__parse(const char *str);
 int perf_mem_events__init(void);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c6fd0479f4cd..6c913c3914fb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -20,6 +20,7 @@
 #include "pmu.h"
 #include "thread_map.h"
 #include "cpumap.h"
+#include "probe-file.h"
 #include "asm/bug.h"
 
 #define MAX_NAME_LEN 100
@@ -436,7 +437,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
 }
 
 static void tracepoint_error(struct parse_events_error *e, int err,
-			     char *sys, char *name)
+			     const char *sys, const char *name)
 {
 	char help[BUFSIZ];
 
@@ -466,7 +467,7 @@ static void tracepoint_error(struct parse_events_error *e, int err,
 }
 
 static int add_tracepoint(struct list_head *list, int *idx,
-			  char *sys_name, char *evt_name,
+			  const char *sys_name, const char *evt_name,
 			  struct parse_events_error *err,
 			  struct list_head *head_config)
 {
@@ -491,7 +492,7 @@ static int add_tracepoint(struct list_head *list, int *idx,
 }
 
 static int add_tracepoint_multi_event(struct list_head *list, int *idx,
-				      char *sys_name, char *evt_name,
+				      const char *sys_name, const char *evt_name,
 				      struct parse_events_error *err,
 				      struct list_head *head_config)
 {
@@ -533,7 +534,7 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 }
 
 static int add_tracepoint_event(struct list_head *list, int *idx,
-				char *sys_name, char *evt_name,
+				const char *sys_name, const char *evt_name,
 				struct parse_events_error *err,
 				struct list_head *head_config)
 {
@@ -545,7 +546,7 @@ static int add_tracepoint_event(struct list_head *list, int *idx,
 }
 
 static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
-				    char *sys_name, char *evt_name,
+				    const char *sys_name, const char *evt_name,
 				    struct parse_events_error *err,
 				    struct list_head *head_config)
 {
@@ -584,7 +585,7 @@ struct __add_bpf_event_param {
 	struct list_head *head_config;
 };
 
-static int add_bpf_event(struct probe_trace_event *tev, int fd,
+static int add_bpf_event(const char *group, const char *event, int fd,
 			 void *_param)
 {
 	LIST_HEAD(new_evsels);
@@ -595,27 +596,27 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd,
 	int err;
 
 	pr_debug("add bpf event %s:%s and attach bpf program %d\n",
-		 tev->group, tev->event, fd);
+		 group, event, fd);
 
-	err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group,
-					  tev->event, evlist->error,
+	err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, group,
+					  event, evlist->error,
 					  param->head_config);
 	if (err) {
 		struct perf_evsel *evsel, *tmp;
 
 		pr_debug("Failed to add BPF event %s:%s\n",
-			 tev->group, tev->event);
+			 group, event);
 		list_for_each_entry_safe(evsel, tmp, &new_evsels, node) {
 			list_del(&evsel->node);
 			perf_evsel__delete(evsel);
 		}
 		return err;
 	}
-	pr_debug("adding %s:%s\n", tev->group, tev->event);
+	pr_debug("adding %s:%s\n", group, event);
 
 	list_for_each_entry(pos, &new_evsels, node) {
 		pr_debug("adding %s:%s to %p\n",
-			 tev->group, tev->event, pos);
+			 group, event, pos);
 		pos->bpf_fd = fd;
 	}
 	list_splice(&new_evsels, list);
@@ -661,7 +662,7 @@ int parse_events_load_bpf_obj(struct parse_events_evlist *data,
 		goto errout;
 	}
 
-	err = bpf__foreach_tev(obj, add_bpf_event, &param);
+	err = bpf__foreach_event(obj, add_bpf_event, &param);
 	if (err) {
 		snprintf(errbuf, sizeof(errbuf),
 			 "Attach events in BPF object failed");
@@ -900,6 +901,9 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
 	[PARSE_EVENTS__TERM_TYPE_STACKSIZE]		= "stack-size",
 	[PARSE_EVENTS__TERM_TYPE_NOINHERIT]		= "no-inherit",
 	[PARSE_EVENTS__TERM_TYPE_INHERIT]		= "inherit",
+	[PARSE_EVENTS__TERM_TYPE_MAX_STACK]		= "max-stack",
+	[PARSE_EVENTS__TERM_TYPE_OVERWRITE]		= "overwrite",
+	[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]		= "no-overwrite",
 };
 
 static bool config_term_shrinked;
@@ -992,9 +996,18 @@ do {									   \
 	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
 		CHECK_TYPE_VAL(NUM);
 		break;
+	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+		CHECK_TYPE_VAL(NUM);
+		break;
 	case PARSE_EVENTS__TERM_TYPE_NAME:
 		CHECK_TYPE_VAL(STR);
 		break;
+	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+		CHECK_TYPE_VAL(NUM);
+		break;
 	default:
 		err->str = strdup("unknown term");
 		err->idx = term->err_term;
@@ -1040,6 +1053,9 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
 	case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
 	case PARSE_EVENTS__TERM_TYPE_INHERIT:
 	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
 		return config_term_common(attr, term, err);
 	default:
 		if (err) {
@@ -1109,6 +1125,15 @@ do {								\
 		case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
 			ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+			ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1);
+			break;
 		default:
 			break;
 		}
@@ -1118,7 +1143,7 @@ do {								\
 }
 
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
-				char *sys, char *event,
+				const char *sys, const char *event,
 				struct parse_events_error *err,
 				struct list_head *head_config)
 {
@@ -1388,7 +1413,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
 	if (!add && get_event_modifier(&mod, str, NULL))
 		return -EINVAL;
 
-	__evlist__for_each(list, evsel) {
+	__evlist__for_each_entry(list, evsel) {
 		if (add && get_event_modifier(&mod, str, evsel))
 			return -EINVAL;
 
@@ -1414,7 +1439,7 @@ int parse_events_name(struct list_head *list, char *name)
 {
 	struct perf_evsel *evsel;
 
-	__evlist__for_each(list, evsel) {
+	__evlist__for_each_entry(list, evsel) {
 		if (!evsel->name)
 			evsel->name = strdup(name);
 	}
@@ -1976,6 +2001,85 @@ static bool is_event_supported(u8 type, unsigned config)
 	return ret;
 }
 
+void print_sdt_events(const char *subsys_glob, const char *event_glob,
+		      bool name_only)
+{
+	struct probe_cache *pcache;
+	struct probe_cache_entry *ent;
+	struct strlist *bidlist, *sdtlist;
+	struct strlist_config cfg = {.dont_dupstr = true};
+	struct str_node *nd, *nd2;
+	char *buf, *path, *ptr = NULL;
+	bool show_detail = false;
+	int ret;
+
+	sdtlist = strlist__new(NULL, &cfg);
+	if (!sdtlist) {
+		pr_debug("Failed to allocate new strlist for SDT\n");
+		return;
+	}
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		pr_debug("Failed to get buildids: %d\n", errno);
+		return;
+	}
+	strlist__for_each_entry(nd, bidlist) {
+		pcache = probe_cache__new(nd->s);
+		if (!pcache)
+			continue;
+		list_for_each_entry(ent, &pcache->entries, node) {
+			if (!ent->sdt)
+				continue;
+			if (subsys_glob &&
+			    !strglobmatch(ent->pev.group, subsys_glob))
+				continue;
+			if (event_glob &&
+			    !strglobmatch(ent->pev.event, event_glob))
+				continue;
+			ret = asprintf(&buf, "%s:%s@%s", ent->pev.group,
+					ent->pev.event, nd->s);
+			if (ret > 0)
+				strlist__add(sdtlist, buf);
+		}
+		probe_cache__delete(pcache);
+	}
+	strlist__delete(bidlist);
+
+	strlist__for_each_entry(nd, sdtlist) {
+		buf = strchr(nd->s, '@');
+		if (buf)
+			*(buf++) = '\0';
+		if (name_only) {
+			printf("%s ", nd->s);
+			continue;
+		}
+		nd2 = strlist__next(nd);
+		if (nd2) {
+			ptr = strchr(nd2->s, '@');
+			if (ptr)
+				*ptr = '\0';
+			if (strcmp(nd->s, nd2->s) == 0)
+				show_detail = true;
+		}
+		if (show_detail) {
+			path = build_id_cache__origname(buf);
+			ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf);
+			if (ret > 0) {
+				printf("  %-50s [%s]\n", buf, "SDT event");
+				free(buf);
+			}
+		} else
+			printf("  %-50s [%s]\n", nd->s, "SDT event");
+		if (nd2) {
+			if (strcmp(nd->s, nd2->s) != 0)
+				show_detail = false;
+			if (ptr)
+				*ptr = '@';
+		}
+	}
+	strlist__delete(sdtlist);
+}
+
 int print_hwcache_events(const char *event_glob, bool name_only)
 {
 	unsigned int type, op, i, evt_i = 0, evt_num = 0;
@@ -2158,6 +2262,8 @@ void print_events(const char *event_glob, bool name_only)
 	}
 
 	print_tracepoint_events(NULL, NULL, name_only);
+
+	print_sdt_events(NULL, NULL, name_only);
 }
 
 int parse_events__is_hardcoded_term(struct parse_events_term *term)
@@ -2322,9 +2428,9 @@ static void config_terms_list(char *buf, size_t buf_sz)
 char *parse_events_formats_error_string(char *additional_terms)
 {
 	char *str;
-	/* "branch_type" is the longest name */
+	/* "no-overwrite" is the longest name */
 	char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
-			  (sizeof("branch_type") - 1)];
+			  (sizeof("no-overwrite") - 1)];
 
 	config_terms_list(static_terms, sizeof(static_terms));
 	/* valid terms */
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d740c3ca9a1d..d1edbf8cc66a 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -68,6 +68,9 @@ enum {
 	PARSE_EVENTS__TERM_TYPE_STACKSIZE,
 	PARSE_EVENTS__TERM_TYPE_NOINHERIT,
 	PARSE_EVENTS__TERM_TYPE_INHERIT,
+	PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+	PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
+	PARSE_EVENTS__TERM_TYPE_OVERWRITE,
 	__PARSE_EVENTS__TERM_TYPE_NR,
 };
 
@@ -133,7 +136,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
-				char *sys, char *event,
+				const char *sys, const char *event,
 				struct parse_events_error *error,
 				struct list_head *head_config);
 int parse_events_load_bpf(struct parse_events_evlist *data,
@@ -182,6 +185,8 @@ void print_symbol_events(const char *event_glob, unsigned type,
 void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
 			     bool name_only);
 int print_hwcache_events(const char *event_glob, bool name_only);
+void print_sdt_events(const char *subsys_glob, const char *event_glob,
+		      bool name_only);
 int is_valid_tracepoint(const char *event_string);
 
 int valid_event_mount(const char *eventfs);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 1477fbc78993..7a2519435da0 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -199,8 +199,11 @@ branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE
 time			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
 call-graph		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
 stack-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
+max-stack		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
 inherit			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
 no-inherit		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
+overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
+no-overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
 ,			{ return ','; }
 "/"			{ BEGIN(INITIAL); return '/'; }
 {name_minus}		{ return str(yyscanner, PE_NAME); }
@@ -259,6 +262,7 @@ cycles-ct					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 cycles-t					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 mem-loads					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 mem-stores					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 
 L1-dcache|l1-d|l1d|L1-data		|
 L1-icache|l1-i|l1i|L1-instruction	|
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 3bf6bf82ff2d..7c7630be5a89 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -11,17 +11,13 @@
  * which is what it's designed for.
  */
 #include "cache.h"
+#include "util.h"
+#include <limits.h>
 
 static char bad_path[] = "/bad-path/";
 /*
- * Two hacks:
+ * One hack:
  */
-
-static const char *get_perf_dir(void)
-{
-	return ".";
-}
-
 static char *get_pathname(void)
 {
 	static char pathname_array[4][PATH_MAX];
@@ -54,60 +50,3 @@ char *mkpath(const char *fmt, ...)
 		return bad_path;
 	return cleanup_path(pathname);
 }
-
-char *perf_path(const char *fmt, ...)
-{
-	const char *perf_dir = get_perf_dir();
-	char *pathname = get_pathname();
-	va_list args;
-	unsigned len;
-
-	len = strlen(perf_dir);
-	if (len > PATH_MAX-100)
-		return bad_path;
-	memcpy(pathname, perf_dir, len);
-	if (len && perf_dir[len-1] != '/')
-		pathname[len++] = '/';
-	va_start(args, fmt);
-	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
-	va_end(args);
-	if (len >= PATH_MAX)
-		return bad_path;
-	return cleanup_path(pathname);
-}
-
-/* strip arbitrary amount of directory separators at end of path */
-static inline int chomp_trailing_dir_sep(const char *path, int len)
-{
-	while (len && is_dir_sep(path[len - 1]))
-		len--;
-	return len;
-}
-
-/*
- * If path ends with suffix (complete path components), returns the
- * part before suffix (sans trailing directory separators).
- * Otherwise returns NULL.
- */
-char *strip_path_suffix(const char *path, const char *suffix)
-{
-	int path_len = strlen(path), suffix_len = strlen(suffix);
-
-	while (suffix_len) {
-		if (!path_len)
-			return NULL;
-
-		if (is_dir_sep(path[path_len - 1])) {
-			if (!is_dir_sep(suffix[suffix_len - 1]))
-				return NULL;
-			path_len = chomp_trailing_dir_sep(path, path_len);
-			suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
-		}
-		else if (path[--path_len] != suffix[--suffix_len])
-			return NULL;
-	}
-
-	if (path_len && !is_dir_sep(path[path_len - 1]))
-		return NULL;
-	return strndup(path, chomp_trailing_dir_sep(path, path_len));
-}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 74401a20106d..953dc1ab2ed7 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -67,7 +67,6 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
 	return ret;
 }
 
-static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
 static struct machine *host_machine;
 
 /* Initialize symbol maps and path of vmlinux/modules */
@@ -103,10 +102,8 @@ out:
 
 void exit_probe_symbol_maps(void)
 {
-	if (host_machine) {
-		machine__delete(host_machine);
-		host_machine = NULL;
-	}
+	machine__delete(host_machine);
+	host_machine = NULL;
 	symbol__exit();
 }
 
@@ -471,7 +468,7 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent)
 		err = kernel_get_module_dso(module, &dso);
 		if (err < 0) {
 			if (!dso || dso->load_errno == 0) {
-				if (!strerror_r(-err, reason, STRERR_BUFSIZE))
+				if (!str_error_r(-err, reason, STRERR_BUFSIZE))
 					strcpy(reason, "(unknown)");
 			} else
 				dso__strerror_load(dso, reason, STRERR_BUFSIZE);
@@ -809,7 +806,7 @@ static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
 error:
 	if (ferror(fp)) {
 		pr_warning("File read error: %s\n",
-			   strerror_r(errno, sbuf, sizeof(sbuf)));
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
 		return -1;
 	}
 	return 0;
@@ -889,7 +886,7 @@ static int __show_line_range(struct line_range *lr, const char *module,
 	fp = fopen(lr->path, "r");
 	if (fp == NULL) {
 		pr_warning("Failed to open %s: %s\n", lr->path,
-			   strerror_r(errno, sbuf, sizeof(sbuf)));
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
 		return -errno;
 	}
 	/* Skip to starting line number */
@@ -899,7 +896,7 @@ static int __show_line_range(struct line_range *lr, const char *module,
 			goto end;
 	}
 
-	intlist__for_each(ln, lr->line_list) {
+	intlist__for_each_entry(ln, lr->line_list) {
 		for (; ln->i > l; l++) {
 			ret = show_one_line(fp, l - lr->offset);
 			if (ret < 0)
@@ -983,7 +980,7 @@ static int show_available_vars_at(struct debuginfo *dinfo,
 		zfree(&vl->point.symbol);
 		nvars = 0;
 		if (vl->vars) {
-			strlist__for_each(node, vl->vars) {
+			strlist__for_each_entry(node, vl->vars) {
 				var = strchr(node->s, '\t') + 1;
 				if (strfilter__compare(_filter, var)) {
 					fprintf(stdout, "\t\t%s\n", node->s);
@@ -1200,6 +1197,34 @@ err:
 	return err;
 }
 
+static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+{
+	char *ptr;
+
+	ptr = strchr(*arg, ':');
+	if (ptr) {
+		*ptr = '\0';
+		if (!pev->sdt && !is_c_func_name(*arg))
+			goto ng_name;
+		pev->group = strdup(*arg);
+		if (!pev->group)
+			return -ENOMEM;
+		*arg = ptr + 1;
+	} else
+		pev->group = NULL;
+	if (!pev->sdt && !is_c_func_name(*arg)) {
+ng_name:
+		semantic_error("%s is bad for event name -it must "
+			       "follow C symbol-naming rule.\n", *arg);
+		return -EINVAL;
+	}
+	pev->event = strdup(*arg);
+	if (pev->event == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
 /* Parse probepoint definition. */
 static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 {
@@ -1207,33 +1232,64 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 	char *ptr, *tmp;
 	char c, nc = 0;
 	bool file_spec = false;
+	int ret;
+
 	/*
 	 * <Syntax>
-	 * perf probe [EVENT=]SRC[:LN|;PTN]
-	 * perf probe [EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT]
-	 *
-	 * TODO:Group name support
+	 * perf probe [GRP:][EVENT=]SRC[:LN|;PTN]
+	 * perf probe [GRP:][EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT]
+	 * perf probe %[GRP:]SDT_EVENT
 	 */
 	if (!arg)
 		return -EINVAL;
 
+	/*
+	 * If the probe point starts with '%',
+	 * or starts with "sdt_" and has a ':' but no '=',
+	 * then it should be a SDT/cached probe point.
+	 */
+	if (arg[0] == '%' ||
+	    (!strncmp(arg, "sdt_", 4) &&
+	     !!strchr(arg, ':') && !strchr(arg, '='))) {
+		pev->sdt = true;
+		if (arg[0] == '%')
+			arg++;
+	}
+
 	ptr = strpbrk(arg, ";=@+%");
+	if (pev->sdt) {
+		if (ptr) {
+			if (*ptr != '@') {
+				semantic_error("%s must be an SDT name.\n",
+					       arg);
+				return -EINVAL;
+			}
+			/* This must be a target file name or build id */
+			tmp = build_id_cache__complement(ptr + 1);
+			if (tmp) {
+				pev->target = build_id_cache__origname(tmp);
+				free(tmp);
+			} else
+				pev->target = strdup(ptr + 1);
+			if (!pev->target)
+				return -ENOMEM;
+			*ptr = '\0';
+		}
+		ret = parse_perf_probe_event_name(&arg, pev);
+		if (ret == 0) {
+			if (asprintf(&pev->point.function, "%%%s", pev->event) < 0)
+				ret = -errno;
+		}
+		return ret;
+	}
+
 	if (ptr && *ptr == '=') {	/* Event name */
 		*ptr = '\0';
 		tmp = ptr + 1;
-		if (strchr(arg, ':')) {
-			semantic_error("Group name is not supported yet.\n");
-			return -ENOTSUP;
-		}
-		if (!is_c_func_name(arg)) {
-			semantic_error("%s is bad for event name -it must "
-				       "follow C symbol-naming rule.\n", arg);
-			return -EINVAL;
-		}
-		pev->event = strdup(arg);
-		if (pev->event == NULL)
-			return -ENOMEM;
-		pev->group = NULL;
+		ret = parse_perf_probe_event_name(&arg, pev);
+		if (ret < 0)
+			return ret;
+
 		arg = tmp;
 	}
 
@@ -1545,7 +1601,9 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
 		return true;
 
 	for (i = 0; i < pev->nargs; i++)
-		if (is_c_varname(pev->args[i].var))
+		if (is_c_varname(pev->args[i].var) ||
+		    !strcmp(pev->args[i].var, "$params") ||
+		    !strcmp(pev->args[i].var, "$vars"))
 			return true;
 
 	return false;
@@ -1603,6 +1661,11 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
 	p = strchr(argv[1], ':');
 	if (p) {
 		tp->module = strndup(argv[1], p - argv[1]);
+		if (!tp->module) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		tev->uprobes = (tp->module[0] == '/');
 		p++;
 	} else
 		p = argv[1];
@@ -1712,7 +1775,7 @@ out:
 }
 
 /* Compose only probe point (not argument) */
-static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
+char *synthesize_perf_probe_point(struct perf_probe_point *pp)
 {
 	struct strbuf buf;
 	char *tmp, *ret = NULL;
@@ -1751,30 +1814,36 @@ out:
 	return ret;
 }
 
-#if 0
 char *synthesize_perf_probe_command(struct perf_probe_event *pev)
 {
-	char *buf;
-	int i, len, ret;
+	struct strbuf buf;
+	char *tmp, *ret = NULL;
+	int i;
 
-	buf = synthesize_perf_probe_point(&pev->point);
-	if (!buf)
+	if (strbuf_init(&buf, 64))
 		return NULL;
+	if (pev->event)
+		if (strbuf_addf(&buf, "%s:%s=", pev->group ?: PERFPROBE_GROUP,
+				pev->event) < 0)
+			goto out;
+
+	tmp = synthesize_perf_probe_point(&pev->point);
+	if (!tmp || strbuf_addstr(&buf, tmp) < 0)
+		goto out;
+	free(tmp);
 
-	len = strlen(buf);
 	for (i = 0; i < pev->nargs; i++) {
-		ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
-				 pev->args[i].name);
-		if (ret <= 0) {
-			free(buf);
-			return NULL;
-		}
-		len += ret;
+		tmp = synthesize_perf_probe_arg(pev->args + i);
+		if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0)
+			goto out;
+		free(tmp);
 	}
 
-	return buf;
+	ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
+	return ret;
 }
-#endif
 
 static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
 					    struct strbuf *buf, int depth)
@@ -2026,6 +2095,79 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
 	memset(pev, 0, sizeof(*pev));
 }
 
+#define strdup_or_goto(str, label)	\
+({ char *__p = NULL; if (str && !(__p = strdup(str))) goto label; __p; })
+
+static int perf_probe_point__copy(struct perf_probe_point *dst,
+				  struct perf_probe_point *src)
+{
+	dst->file = strdup_or_goto(src->file, out_err);
+	dst->function = strdup_or_goto(src->function, out_err);
+	dst->lazy_line = strdup_or_goto(src->lazy_line, out_err);
+	dst->line = src->line;
+	dst->retprobe = src->retprobe;
+	dst->offset = src->offset;
+	return 0;
+
+out_err:
+	clear_perf_probe_point(dst);
+	return -ENOMEM;
+}
+
+static int perf_probe_arg__copy(struct perf_probe_arg *dst,
+				struct perf_probe_arg *src)
+{
+	struct perf_probe_arg_field *field, **ppfield;
+
+	dst->name = strdup_or_goto(src->name, out_err);
+	dst->var = strdup_or_goto(src->var, out_err);
+	dst->type = strdup_or_goto(src->type, out_err);
+
+	field = src->field;
+	ppfield = &(dst->field);
+	while (field) {
+		*ppfield = zalloc(sizeof(*field));
+		if (!*ppfield)
+			goto out_err;
+		(*ppfield)->name = strdup_or_goto(field->name, out_err);
+		(*ppfield)->index = field->index;
+		(*ppfield)->ref = field->ref;
+		field = field->next;
+		ppfield = &((*ppfield)->next);
+	}
+	return 0;
+out_err:
+	return -ENOMEM;
+}
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+			   struct perf_probe_event *src)
+{
+	int i;
+
+	dst->event = strdup_or_goto(src->event, out_err);
+	dst->group = strdup_or_goto(src->group, out_err);
+	dst->target = strdup_or_goto(src->target, out_err);
+	dst->uprobes = src->uprobes;
+
+	if (perf_probe_point__copy(&dst->point, &src->point) < 0)
+		goto out_err;
+
+	dst->args = zalloc(sizeof(struct perf_probe_arg) * src->nargs);
+	if (!dst->args)
+		goto out_err;
+	dst->nargs = src->nargs;
+
+	for (i = 0; i < src->nargs; i++)
+		if (perf_probe_arg__copy(&dst->args[i], &src->args[i]) < 0)
+			goto out_err;
+	return 0;
+
+out_err:
+	clear_perf_probe_event(dst);
+	return -ENOMEM;
+}
+
 void clear_probe_trace_event(struct probe_trace_event *tev)
 {
 	struct probe_trace_arg_ref *ref, *next;
@@ -2253,7 +2395,7 @@ static int __show_perf_probe_events(int fd, bool is_kprobe,
 	if (!rawlist)
 		return -ENOMEM;
 
-	strlist__for_each(ent, rawlist) {
+	strlist__for_each_entry(ent, rawlist) {
 		ret = parse_probe_trace_command(ent->s, &tev);
 		if (ret >= 0) {
 			if (!filter_probe_trace_event(&tev, filter))
@@ -2286,6 +2428,9 @@ int show_perf_probe_events(struct strfilter *filter)
 
 	setup_pager();
 
+	if (probe_conf.cache)
+		return probe_cache__show_all_caches(filter);
+
 	ret = init_probe_symbol_maps(false);
 	if (ret < 0)
 		return ret;
@@ -2394,17 +2539,24 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
 	char buf[64];
 	int ret;
 
-	if (pev->event)
+	/* If probe_event or trace_event already have the name, reuse it */
+	if (pev->event && !pev->sdt)
 		event = pev->event;
-	else
+	else if (tev->event)
+		event = tev->event;
+	else {
+		/* Or generate new one from probe point */
 		if (pev->point.function &&
 			(strncmp(pev->point.function, "0x", 2) != 0) &&
 			!strisglob(pev->point.function))
 			event = pev->point.function;
 		else
 			event = tev->point.realname;
-	if (pev->group)
+	}
+	if (pev->group && !pev->sdt)
 		group = pev->group;
+	else if (tev->group)
+		group = tev->group;
 	else
 		group = PERFPROBE_GROUP;
 
@@ -2426,40 +2578,60 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
 	return 0;
 }
 
-static int __add_probe_trace_events(struct perf_probe_event *pev,
-				     struct probe_trace_event *tevs,
-				     int ntevs, bool allow_suffix)
+static int __open_probe_file_and_namelist(bool uprobe,
+					  struct strlist **namelist)
 {
-	int i, fd, ret;
-	struct probe_trace_event *tev = NULL;
-	struct strlist *namelist;
+	int fd;
 
-	fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0));
+	fd = probe_file__open(PF_FL_RW | (uprobe ? PF_FL_UPROBE : 0));
 	if (fd < 0)
 		return fd;
 
 	/* Get current event names */
-	namelist = probe_file__get_namelist(fd);
-	if (!namelist) {
+	*namelist = probe_file__get_namelist(fd);
+	if (!(*namelist)) {
 		pr_debug("Failed to get current event list.\n");
-		ret = -ENOMEM;
-		goto close_out;
+		close(fd);
+		return -ENOMEM;
 	}
+	return fd;
+}
+
+static int __add_probe_trace_events(struct perf_probe_event *pev,
+				     struct probe_trace_event *tevs,
+				     int ntevs, bool allow_suffix)
+{
+	int i, fd[2] = {-1, -1}, up, ret;
+	struct probe_trace_event *tev = NULL;
+	struct probe_cache *cache = NULL;
+	struct strlist *namelist[2] = {NULL, NULL};
+
+	up = pev->uprobes ? 1 : 0;
+	fd[up] = __open_probe_file_and_namelist(up, &namelist[up]);
+	if (fd[up] < 0)
+		return fd[up];
 
 	ret = 0;
 	for (i = 0; i < ntevs; i++) {
 		tev = &tevs[i];
+		up = tev->uprobes ? 1 : 0;
+		if (fd[up] == -1) {	/* Open the kprobe/uprobe_events */
+			fd[up] = __open_probe_file_and_namelist(up,
+								&namelist[up]);
+			if (fd[up] < 0)
+				goto close_out;
+		}
 		/* Skip if the symbol is out of .text or blacklisted */
-		if (!tev->point.symbol)
+		if (!tev->point.symbol && !pev->uprobes)
 			continue;
 
 		/* Set new name for tev (and update namelist) */
-		ret = probe_trace_event__set_name(tev, pev, namelist,
+		ret = probe_trace_event__set_name(tev, pev, namelist[up],
 						  allow_suffix);
 		if (ret < 0)
 			break;
 
-		ret = probe_file__add_event(fd, tev);
+		ret = probe_file__add_event(fd[up], tev);
 		if (ret < 0)
 			break;
 
@@ -2473,10 +2645,21 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 	}
 	if (ret == -EINVAL && pev->uprobes)
 		warn_uprobe_event_compat(tev);
+	if (ret == 0 && probe_conf.cache) {
+		cache = probe_cache__new(pev->target);
+		if (!cache ||
+		    probe_cache__add_entry(cache, pev, tevs, ntevs) < 0 ||
+		    probe_cache__commit(cache) < 0)
+			pr_warning("Failed to add event to probe cache\n");
+		probe_cache__delete(cache);
+	}
 
-	strlist__delete(namelist);
 close_out:
-	close(fd);
+	for (up = 0; up < 2; up++) {
+		strlist__delete(namelist[up]);
+		if (fd[up] >= 0)
+			close(fd[up]);
+	}
 	return ret;
 }
 
@@ -2501,9 +2684,6 @@ static int find_probe_functions(struct map *map, char *name,
 	return found;
 }
 
-#define strdup_or_goto(str, label)	\
-	({ char *__p = strdup(str); if (!__p) goto label; __p; })
-
 void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
 				struct probe_trace_event *tev __maybe_unused,
 				struct map *map __maybe_unused,
@@ -2758,12 +2938,205 @@ errout:
 
 bool __weak arch__prefers_symtab(void) { return false; }
 
+/* Concatinate two arrays */
+static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
+{
+	void *ret;
+
+	ret = malloc(sz_a + sz_b);
+	if (ret) {
+		memcpy(ret, a, sz_a);
+		memcpy(ret + sz_a, b, sz_b);
+	}
+	return ret;
+}
+
+static int
+concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs,
+			  struct probe_trace_event **tevs2, int ntevs2)
+{
+	struct probe_trace_event *new_tevs;
+	int ret = 0;
+
+	if (ntevs == 0) {
+		*tevs = *tevs2;
+		*ntevs = ntevs2;
+		*tevs2 = NULL;
+		return 0;
+	}
+
+	if (*ntevs + ntevs2 > probe_conf.max_probes)
+		ret = -E2BIG;
+	else {
+		/* Concatinate the array of probe_trace_event */
+		new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs),
+				  *tevs2, ntevs2 * sizeof(**tevs2));
+		if (!new_tevs)
+			ret = -ENOMEM;
+		else {
+			free(*tevs);
+			*tevs = new_tevs;
+			*ntevs += ntevs2;
+		}
+	}
+	if (ret < 0)
+		clear_probe_trace_events(*tevs2, ntevs2);
+	zfree(tevs2);
+
+	return ret;
+}
+
+/*
+ * Try to find probe_trace_event from given probe caches. Return the number
+ * of cached events found, if an error occurs return the error.
+ */
+static int find_cached_events(struct perf_probe_event *pev,
+			      struct probe_trace_event **tevs,
+			      const char *target)
+{
+	struct probe_cache *cache;
+	struct probe_cache_entry *entry;
+	struct probe_trace_event *tmp_tevs = NULL;
+	int ntevs = 0;
+	int ret = 0;
+
+	cache = probe_cache__new(target);
+	/* Return 0 ("not found") if the target has no probe cache. */
+	if (!cache)
+		return 0;
+
+	for_each_probe_cache_entry(entry, cache) {
+		/* Skip the cache entry which has no name */
+		if (!entry->pev.event || !entry->pev.group)
+			continue;
+		if ((!pev->group || strglobmatch(entry->pev.group, pev->group)) &&
+		    strglobmatch(entry->pev.event, pev->event)) {
+			ret = probe_cache_entry__get_event(entry, &tmp_tevs);
+			if (ret > 0)
+				ret = concat_probe_trace_events(tevs, &ntevs,
+								&tmp_tevs, ret);
+			if (ret < 0)
+				break;
+		}
+	}
+	probe_cache__delete(cache);
+	if (ret < 0) {
+		clear_probe_trace_events(*tevs, ntevs);
+		zfree(tevs);
+	} else {
+		ret = ntevs;
+		if (ntevs > 0 && target && target[0] == '/')
+			pev->uprobes = true;
+	}
+
+	return ret;
+}
+
+/* Try to find probe_trace_event from all probe caches */
+static int find_cached_events_all(struct perf_probe_event *pev,
+				   struct probe_trace_event **tevs)
+{
+	struct probe_trace_event *tmp_tevs = NULL;
+	struct strlist *bidlist;
+	struct str_node *nd;
+	char *pathname;
+	int ntevs = 0;
+	int ret;
+
+	/* Get the buildid list of all valid caches */
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		ret = -errno;
+		pr_debug("Failed to get buildids: %d\n", ret);
+		return ret;
+	}
+
+	ret = 0;
+	strlist__for_each_entry(nd, bidlist) {
+		pathname = build_id_cache__origname(nd->s);
+		ret = find_cached_events(pev, &tmp_tevs, pathname);
+		/* In the case of cnt == 0, we just skip it */
+		if (ret > 0)
+			ret = concat_probe_trace_events(tevs, &ntevs,
+							&tmp_tevs, ret);
+		free(pathname);
+		if (ret < 0)
+			break;
+	}
+	strlist__delete(bidlist);
+
+	if (ret < 0) {
+		clear_probe_trace_events(*tevs, ntevs);
+		zfree(tevs);
+	} else
+		ret = ntevs;
+
+	return ret;
+}
+
+static int find_probe_trace_events_from_cache(struct perf_probe_event *pev,
+					      struct probe_trace_event **tevs)
+{
+	struct probe_cache *cache;
+	struct probe_cache_entry *entry;
+	struct probe_trace_event *tev;
+	struct str_node *node;
+	int ret, i;
+
+	if (pev->sdt) {
+		/* For SDT/cached events, we use special search functions */
+		if (!pev->target)
+			return find_cached_events_all(pev, tevs);
+		else
+			return find_cached_events(pev, tevs, pev->target);
+	}
+	cache = probe_cache__new(pev->target);
+	if (!cache)
+		return 0;
+
+	entry = probe_cache__find(cache, pev);
+	if (!entry) {
+		/* SDT must be in the cache */
+		ret = pev->sdt ? -ENOENT : 0;
+		goto out;
+	}
+
+	ret = strlist__nr_entries(entry->tevlist);
+	if (ret > probe_conf.max_probes) {
+		pr_debug("Too many entries matched in the cache of %s\n",
+			 pev->target ? : "kernel");
+		ret = -E2BIG;
+		goto out;
+	}
+
+	*tevs = zalloc(ret * sizeof(*tev));
+	if (!*tevs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	i = 0;
+	strlist__for_each_entry(node, entry->tevlist) {
+		tev = &(*tevs)[i++];
+		ret = parse_probe_trace_command(node->s, tev);
+		if (ret < 0)
+			goto out;
+		/* Set the uprobes attribute as same as original */
+		tev->uprobes = pev->uprobes;
+	}
+	ret = i;
+
+out:
+	probe_cache__delete(cache);
+	return ret;
+}
+
 static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 					 struct probe_trace_event **tevs)
 {
 	int ret;
 
-	if (!pev->group) {
+	if (!pev->group && !pev->sdt) {
 		/* Set group name if not given */
 		if (!pev->uprobes) {
 			pev->group = strdup(PERFPROBE_GROUP);
@@ -2780,6 +3153,11 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 	if (ret > 0)
 		return ret;
 
+	/* At first, we need to lookup cache entry */
+	ret = find_probe_trace_events_from_cache(pev, tevs);
+	if (ret > 0 || pev->sdt)	/* SDT can be found only in the cache */
+		return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
+
 	if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
 		ret = find_probe_trace_events_from_map(pev, tevs);
 		if (ret > 0)
@@ -2934,8 +3312,16 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 
 	/* Load symbols with given filter */
 	available_func_filter = _filter;
-	if (map__load(map, filter_available_functions)) {
-		pr_err("Failed to load symbols in %s\n", (target) ? : "kernel");
+	ret = map__load(map, filter_available_functions);
+	if (ret) {
+		if (ret == -2) {
+			char *str = strfilter__string(_filter);
+			pr_err("Failed to find symbols matched to \"%s\"\n",
+			       str);
+			free(str);
+		} else
+			pr_err("Failed to load symbols in %s\n",
+			       (target) ? : "kernel");
 		goto end;
 	}
 	if (!dso__sorted_by_name(map->dso, map->type))
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5a27eb4fad05..e18ea9fe6385 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -12,6 +12,7 @@ struct probe_conf {
 	bool	show_location_range;
 	bool	force_add;
 	bool	no_inlines;
+	bool	cache;
 	int	max_probes;
 };
 extern struct probe_conf probe_conf;
@@ -84,6 +85,7 @@ struct perf_probe_event {
 	char			*group;	/* Group name */
 	struct perf_probe_point	point;	/* Probe point */
 	int			nargs;	/* Number of arguments */
+	bool			sdt;	/* SDT/cached event flag */
 	bool			uprobes;	/* Uprobe event flag */
 	char			*target;	/* Target binary */
 	struct perf_probe_arg	*args;	/* Arguments */
@@ -121,6 +123,10 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
 char *synthesize_perf_probe_command(struct perf_probe_event *pev);
 char *synthesize_probe_trace_command(struct probe_trace_event *tev);
 char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
+char *synthesize_perf_probe_point(struct perf_probe_point *pp);
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+			   struct perf_probe_event *src);
 
 /* Check the perf_probe_event needs debuginfo */
 bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 3fe6214970e6..9aed9c332da6 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -14,6 +14,7 @@
  * GNU General Public License for more details.
  *
  */
+#include <sys/uio.h>
 #include "util.h"
 #include "event.h"
 #include "strlist.h"
@@ -49,7 +50,7 @@ static void print_open_warning(int err, bool uprobe)
 	else
 		pr_warning("Failed to open %cprobe_events: %s\n",
 			   uprobe ? 'u' : 'k',
-			   strerror_r(-err, sbuf, sizeof(sbuf)));
+			   str_error_r(-err, sbuf, sizeof(sbuf)));
 }
 
 static void print_both_open_warning(int kerr, int uerr)
@@ -63,9 +64,9 @@ static void print_both_open_warning(int kerr, int uerr)
 	else {
 		char sbuf[STRERR_BUFSIZE];
 		pr_warning("Failed to open kprobe events: %s.\n",
-			   strerror_r(-kerr, sbuf, sizeof(sbuf)));
+			   str_error_r(-kerr, sbuf, sizeof(sbuf)));
 		pr_warning("Failed to open uprobe events: %s.\n",
-			   strerror_r(-uerr, sbuf, sizeof(sbuf)));
+			   str_error_r(-uerr, sbuf, sizeof(sbuf)));
 	}
 }
 
@@ -177,7 +178,7 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
 	if (!rawlist)
 		return NULL;
 	sl = strlist__new(NULL, NULL);
-	strlist__for_each(ent, rawlist) {
+	strlist__for_each_entry(ent, rawlist) {
 		ret = parse_probe_trace_command(ent->s, &tev);
 		if (ret < 0)
 			break;
@@ -223,7 +224,7 @@ int probe_file__add_event(int fd, struct probe_trace_event *tev)
 		if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
 			ret = -errno;
 			pr_warning("Failed to write event: %s\n",
-				   strerror_r(errno, sbuf, sizeof(sbuf)));
+				   str_error_r(errno, sbuf, sizeof(sbuf)));
 		}
 	}
 	free(buf);
@@ -261,7 +262,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent)
 	return 0;
 error:
 	pr_warning("Failed to delete event: %s\n",
-		   strerror_r(-ret, buf, sizeof(buf)));
+		   str_error_r(-ret, buf, sizeof(buf)));
 	return ret;
 }
 
@@ -280,7 +281,7 @@ int probe_file__get_events(int fd, struct strfilter *filter,
 	if (!namelist)
 		return -ENOENT;
 
-	strlist__for_each(ent, namelist) {
+	strlist__for_each_entry(ent, namelist) {
 		p = strchr(ent->s, ':');
 		if ((p && strfilter__compare(filter, p + 1)) ||
 		    strfilter__compare(filter, ent->s)) {
@@ -298,7 +299,7 @@ int probe_file__del_strlist(int fd, struct strlist *namelist)
 	int ret = 0;
 	struct str_node *ent;
 
-	strlist__for_each(ent, namelist) {
+	strlist__for_each_entry(ent, namelist) {
 		ret = __del_trace_probe_event(fd, ent);
 		if (ret < 0)
 			break;
@@ -324,3 +325,533 @@ int probe_file__del_events(int fd, struct strfilter *filter)
 
 	return ret;
 }
+
+/* Caller must ensure to remove this entry from list */
+static void probe_cache_entry__delete(struct probe_cache_entry *entry)
+{
+	if (entry) {
+		BUG_ON(!list_empty(&entry->node));
+
+		strlist__delete(entry->tevlist);
+		clear_perf_probe_event(&entry->pev);
+		zfree(&entry->spev);
+		free(entry);
+	}
+}
+
+static struct probe_cache_entry *
+probe_cache_entry__new(struct perf_probe_event *pev)
+{
+	struct probe_cache_entry *entry = zalloc(sizeof(*entry));
+
+	if (entry) {
+		INIT_LIST_HEAD(&entry->node);
+		entry->tevlist = strlist__new(NULL, NULL);
+		if (!entry->tevlist)
+			zfree(&entry);
+		else if (pev) {
+			entry->spev = synthesize_perf_probe_command(pev);
+			if (!entry->spev ||
+			    perf_probe_event__copy(&entry->pev, pev) < 0) {
+				probe_cache_entry__delete(entry);
+				return NULL;
+			}
+		}
+	}
+
+	return entry;
+}
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+				 struct probe_trace_event **tevs)
+{
+	struct probe_trace_event *tev;
+	struct str_node *node;
+	int ret, i;
+
+	ret = strlist__nr_entries(entry->tevlist);
+	if (ret > probe_conf.max_probes)
+		return -E2BIG;
+
+	*tevs = zalloc(ret * sizeof(*tev));
+	if (!*tevs)
+		return -ENOMEM;
+
+	i = 0;
+	strlist__for_each_entry(node, entry->tevlist) {
+		tev = &(*tevs)[i++];
+		ret = parse_probe_trace_command(node->s, tev);
+		if (ret < 0)
+			break;
+	}
+	return i;
+}
+
+/* For the kernel probe caches, pass target = NULL or DSO__NAME_KALLSYMS */
+static int probe_cache__open(struct probe_cache *pcache, const char *target)
+{
+	char cpath[PATH_MAX];
+	char sbuildid[SBUILD_ID_SIZE];
+	char *dir_name = NULL;
+	bool is_kallsyms = false;
+	int ret, fd;
+
+	if (target && build_id_cache__cached(target)) {
+		/* This is a cached buildid */
+		strncpy(sbuildid, target, SBUILD_ID_SIZE);
+		dir_name = build_id_cache__linkname(sbuildid, NULL, 0);
+		goto found;
+	}
+
+	if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) {
+		target = DSO__NAME_KALLSYMS;
+		is_kallsyms = true;
+		ret = sysfs__sprintf_build_id("/", sbuildid);
+	} else
+		ret = filename__sprintf_build_id(target, sbuildid);
+
+	if (ret < 0) {
+		pr_debug("Failed to get build-id from %s.\n", target);
+		return ret;
+	}
+
+	/* If we have no buildid cache, make it */
+	if (!build_id_cache__cached(sbuildid)) {
+		ret = build_id_cache__add_s(sbuildid, target,
+					    is_kallsyms, NULL);
+		if (ret < 0) {
+			pr_debug("Failed to add build-id cache: %s\n", target);
+			return ret;
+		}
+	}
+
+	dir_name = build_id_cache__cachedir(sbuildid, target, is_kallsyms,
+					    false);
+found:
+	if (!dir_name) {
+		pr_debug("Failed to get cache from %s\n", target);
+		return -ENOMEM;
+	}
+
+	snprintf(cpath, PATH_MAX, "%s/probes", dir_name);
+	fd = open(cpath, O_CREAT | O_RDWR, 0644);
+	if (fd < 0)
+		pr_debug("Failed to open cache(%d): %s\n", fd, cpath);
+	free(dir_name);
+	pcache->fd = fd;
+
+	return fd;
+}
+
+static int probe_cache__load(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry = NULL;
+	char buf[MAX_CMDLEN], *p;
+	int ret = 0;
+	FILE *fp;
+
+	fp = fdopen(dup(pcache->fd), "r");
+	if (!fp)
+		return -EINVAL;
+
+	while (!feof(fp)) {
+		if (!fgets(buf, MAX_CMDLEN, fp))
+			break;
+		p = strchr(buf, '\n');
+		if (p)
+			*p = '\0';
+		/* #perf_probe_event or %sdt_event */
+		if (buf[0] == '#' || buf[0] == '%') {
+			entry = probe_cache_entry__new(NULL);
+			if (!entry) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			if (buf[0] == '%')
+				entry->sdt = true;
+			entry->spev = strdup(buf + 1);
+			if (entry->spev)
+				ret = parse_perf_probe_command(buf + 1,
+								&entry->pev);
+			else
+				ret = -ENOMEM;
+			if (ret < 0) {
+				probe_cache_entry__delete(entry);
+				goto out;
+			}
+			list_add_tail(&entry->node, &pcache->entries);
+		} else {	/* trace_probe_event */
+			if (!entry) {
+				ret = -EINVAL;
+				goto out;
+			}
+			strlist__add(entry->tevlist, buf);
+		}
+	}
+out:
+	fclose(fp);
+	return ret;
+}
+
+static struct probe_cache *probe_cache__alloc(void)
+{
+	struct probe_cache *pcache = zalloc(sizeof(*pcache));
+
+	if (pcache) {
+		INIT_LIST_HEAD(&pcache->entries);
+		pcache->fd = -EINVAL;
+	}
+	return pcache;
+}
+
+void probe_cache__purge(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry, *n;
+
+	list_for_each_entry_safe(entry, n, &pcache->entries, node) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+}
+
+void probe_cache__delete(struct probe_cache *pcache)
+{
+	if (!pcache)
+		return;
+
+	probe_cache__purge(pcache);
+	if (pcache->fd > 0)
+		close(pcache->fd);
+	free(pcache);
+}
+
+struct probe_cache *probe_cache__new(const char *target)
+{
+	struct probe_cache *pcache = probe_cache__alloc();
+	int ret;
+
+	if (!pcache)
+		return NULL;
+
+	ret = probe_cache__open(pcache, target);
+	if (ret < 0) {
+		pr_debug("Cache open error: %d\n", ret);
+		goto out_err;
+	}
+
+	ret = probe_cache__load(pcache);
+	if (ret < 0) {
+		pr_debug("Cache read error: %d\n", ret);
+		goto out_err;
+	}
+
+	return pcache;
+
+out_err:
+	probe_cache__delete(pcache);
+	return NULL;
+}
+
+static bool streql(const char *a, const char *b)
+{
+	if (a == b)
+		return true;
+
+	if (!a || !b)
+		return false;
+
+	return !strcmp(a, b);
+}
+
+struct probe_cache_entry *
+probe_cache__find(struct probe_cache *pcache, struct perf_probe_event *pev)
+{
+	struct probe_cache_entry *entry = NULL;
+	char *cmd = synthesize_perf_probe_command(pev);
+
+	if (!cmd)
+		return NULL;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		if (pev->sdt) {
+			if (entry->pev.event &&
+			    streql(entry->pev.event, pev->event) &&
+			    (!pev->group ||
+			     streql(entry->pev.group, pev->group)))
+				goto found;
+
+			continue;
+		}
+		/* Hit if same event name or same command-string */
+		if ((pev->event &&
+		     (streql(entry->pev.group, pev->group) &&
+		      streql(entry->pev.event, pev->event))) ||
+		    (!strcmp(entry->spev, cmd)))
+			goto found;
+	}
+	entry = NULL;
+
+found:
+	free(cmd);
+	return entry;
+}
+
+struct probe_cache_entry *
+probe_cache__find_by_name(struct probe_cache *pcache,
+			  const char *group, const char *event)
+{
+	struct probe_cache_entry *entry = NULL;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		/* Hit if same event name or same command-string */
+		if (streql(entry->pev.group, group) &&
+		    streql(entry->pev.event, event))
+			goto found;
+	}
+	entry = NULL;
+
+found:
+	return entry;
+}
+
+int probe_cache__add_entry(struct probe_cache *pcache,
+			   struct perf_probe_event *pev,
+			   struct probe_trace_event *tevs, int ntevs)
+{
+	struct probe_cache_entry *entry = NULL;
+	char *command;
+	int i, ret = 0;
+
+	if (!pcache || !pev || !tevs || ntevs <= 0) {
+		ret = -EINVAL;
+		goto out_err;
+	}
+
+	/* Remove old cache entry */
+	entry = probe_cache__find(pcache, pev);
+	if (entry) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+
+	ret = -ENOMEM;
+	entry = probe_cache_entry__new(pev);
+	if (!entry)
+		goto out_err;
+
+	for (i = 0; i < ntevs; i++) {
+		if (!tevs[i].point.symbol)
+			continue;
+
+		command = synthesize_probe_trace_command(&tevs[i]);
+		if (!command)
+			goto out_err;
+		strlist__add(entry->tevlist, command);
+		free(command);
+	}
+	list_add_tail(&entry->node, &pcache->entries);
+	pr_debug("Added probe cache: %d\n", ntevs);
+	return 0;
+
+out_err:
+	pr_debug("Failed to add probe caches\n");
+	probe_cache_entry__delete(entry);
+	return ret;
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+static unsigned long long sdt_note__get_addr(struct sdt_note *note)
+{
+	return note->bit32 ? (unsigned long long)note->addr.a32[0]
+		 : (unsigned long long)note->addr.a64[0];
+}
+
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
+{
+	struct probe_cache_entry *entry = NULL;
+	struct list_head sdtlist;
+	struct sdt_note *note;
+	char *buf;
+	char sdtgrp[64];
+	int ret;
+
+	INIT_LIST_HEAD(&sdtlist);
+	ret = get_sdt_note_list(&sdtlist, pathname);
+	if (ret < 0) {
+		pr_debug("Failed to get sdt note: %d\n", ret);
+		return ret;
+	}
+	list_for_each_entry(note, &sdtlist, note_list) {
+		ret = snprintf(sdtgrp, 64, "sdt_%s", note->provider);
+		if (ret < 0)
+			break;
+		/* Try to find same-name entry */
+		entry = probe_cache__find_by_name(pcache, sdtgrp, note->name);
+		if (!entry) {
+			entry = probe_cache_entry__new(NULL);
+			if (!entry) {
+				ret = -ENOMEM;
+				break;
+			}
+			entry->sdt = true;
+			ret = asprintf(&entry->spev, "%s:%s=%s", sdtgrp,
+					note->name, note->name);
+			if (ret < 0)
+				break;
+			entry->pev.event = strdup(note->name);
+			entry->pev.group = strdup(sdtgrp);
+			list_add_tail(&entry->node, &pcache->entries);
+		}
+		ret = asprintf(&buf, "p:%s/%s %s:0x%llx",
+				sdtgrp, note->name, pathname,
+				sdt_note__get_addr(note));
+		if (ret < 0)
+			break;
+		strlist__add(entry->tevlist, buf);
+		free(buf);
+		entry = NULL;
+	}
+	if (entry) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+	cleanup_sdt_note_list(&sdtlist);
+	return ret;
+}
+#endif
+
+static int probe_cache_entry__write(struct probe_cache_entry *entry, int fd)
+{
+	struct str_node *snode;
+	struct stat st;
+	struct iovec iov[3];
+	const char *prefix = entry->sdt ? "%" : "#";
+	int ret;
+	/* Save stat for rollback */
+	ret = fstat(fd, &st);
+	if (ret < 0)
+		return ret;
+
+	pr_debug("Writing cache: %s%s\n", prefix, entry->spev);
+	iov[0].iov_base = (void *)prefix; iov[0].iov_len = 1;
+	iov[1].iov_base = entry->spev; iov[1].iov_len = strlen(entry->spev);
+	iov[2].iov_base = (void *)"\n"; iov[2].iov_len = 1;
+	ret = writev(fd, iov, 3);
+	if (ret < (int)iov[1].iov_len + 2)
+		goto rollback;
+
+	strlist__for_each_entry(snode, entry->tevlist) {
+		iov[0].iov_base = (void *)snode->s;
+		iov[0].iov_len = strlen(snode->s);
+		iov[1].iov_base = (void *)"\n"; iov[1].iov_len = 1;
+		ret = writev(fd, iov, 2);
+		if (ret < (int)iov[0].iov_len + 1)
+			goto rollback;
+	}
+	return 0;
+
+rollback:
+	/* Rollback to avoid cache file corruption */
+	if (ret > 0)
+		ret = -1;
+	if (ftruncate(fd, st.st_size) < 0)
+		ret = -2;
+
+	return ret;
+}
+
+int probe_cache__commit(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry;
+	int ret = 0;
+
+	/* TBD: if we do not update existing entries, skip it */
+	ret = lseek(pcache->fd, 0, SEEK_SET);
+	if (ret < 0)
+		goto out;
+
+	ret = ftruncate(pcache->fd, 0);
+	if (ret < 0)
+		goto out;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		ret = probe_cache_entry__write(entry, pcache->fd);
+		pr_debug("Cache committed: %d\n", ret);
+		if (ret < 0)
+			break;
+	}
+out:
+	return ret;
+}
+
+static bool probe_cache_entry__compare(struct probe_cache_entry *entry,
+				       struct strfilter *filter)
+{
+	char buf[128], *ptr = entry->spev;
+
+	if (entry->pev.event) {
+		snprintf(buf, 128, "%s:%s", entry->pev.group, entry->pev.event);
+		ptr = buf;
+	}
+	return strfilter__compare(filter, ptr);
+}
+
+int probe_cache__filter_purge(struct probe_cache *pcache,
+			      struct strfilter *filter)
+{
+	struct probe_cache_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &pcache->entries, node) {
+		if (probe_cache_entry__compare(entry, filter)) {
+			pr_info("Removed cached event: %s\n", entry->spev);
+			list_del_init(&entry->node);
+			probe_cache_entry__delete(entry);
+		}
+	}
+	return 0;
+}
+
+static int probe_cache__show_entries(struct probe_cache *pcache,
+				     struct strfilter *filter)
+{
+	struct probe_cache_entry *entry;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		if (probe_cache_entry__compare(entry, filter))
+			printf("%s\n", entry->spev);
+	}
+	return 0;
+}
+
+/* Show all cached probes */
+int probe_cache__show_all_caches(struct strfilter *filter)
+{
+	struct probe_cache *pcache;
+	struct strlist *bidlist;
+	struct str_node *nd;
+	char *buf = strfilter__string(filter);
+
+	pr_debug("list cache with filter: %s\n", buf);
+	free(buf);
+
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		pr_debug("Failed to get buildids: %d\n", errno);
+		return -EINVAL;
+	}
+	strlist__for_each_entry(nd, bidlist) {
+		pcache = probe_cache__new(nd->s);
+		if (!pcache)
+			continue;
+		if (!list_empty(&pcache->entries)) {
+			buf = build_id_cache__origname(nd->s);
+			printf("%s (%s):\n", buf, nd->s);
+			free(buf);
+			probe_cache__show_entries(pcache, filter);
+		}
+		probe_cache__delete(pcache);
+	}
+	strlist__delete(bidlist);
+
+	return 0;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 18ac9cf51c34..9577b5c0b487 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -5,9 +5,27 @@
 #include "strfilter.h"
 #include "probe-event.h"
 
+/* Cache of probe definitions */
+struct probe_cache_entry {
+	struct list_head	node;
+	bool			sdt;
+	struct perf_probe_event pev;
+	char			*spev;
+	struct strlist		*tevlist;
+};
+
+struct probe_cache {
+	int	fd;
+	struct list_head entries;
+};
+
 #define PF_FL_UPROBE	1
 #define PF_FL_RW	2
+#define for_each_probe_cache_entry(entry, pcache) \
+	list_for_each_entry(entry, &pcache->entries, node)
 
+/* probe-file.c depends on libelf */
+#ifdef HAVE_LIBELF_SUPPORT
 int probe_file__open(int flag);
 int probe_file__open_both(int *kfd, int *ufd, int flag);
 struct strlist *probe_file__get_namelist(int fd);
@@ -18,5 +36,29 @@ int probe_file__get_events(int fd, struct strfilter *filter,
 				  struct strlist *plist);
 int probe_file__del_strlist(int fd, struct strlist *namelist);
 
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+				 struct probe_trace_event **tevs);
 
+struct probe_cache *probe_cache__new(const char *target);
+int probe_cache__add_entry(struct probe_cache *pcache,
+			   struct perf_probe_event *pev,
+			   struct probe_trace_event *tevs, int ntevs);
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname);
+int probe_cache__commit(struct probe_cache *pcache);
+void probe_cache__purge(struct probe_cache *pcache);
+void probe_cache__delete(struct probe_cache *pcache);
+int probe_cache__filter_purge(struct probe_cache *pcache,
+			      struct strfilter *filter);
+struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache,
+					    struct perf_probe_event *pev);
+struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
+					const char *group, const char *event);
+int probe_cache__show_all_caches(struct strfilter *filter);
+#else	/* ! HAVE_LIBELF_SUPPORT */
+static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused)
+{
+	return NULL;
+}
+#define probe_cache__delete(pcache) do {} while (0)
+#endif
 #endif
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1259839dbf6d..f2d9ff064e2d 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -381,7 +381,7 @@ formatted:
 		if (ret >= 16)
 			ret = -E2BIG;
 		pr_warning("Failed to convert variable type: %s\n",
-			   strerror_r(-ret, sbuf, sizeof(sbuf)));
+			   str_error_r(-ret, sbuf, sizeof(sbuf)));
 		return ret;
 	}
 	tvar->type = strdup(buf);
@@ -809,7 +809,7 @@ static int find_lazy_match_lines(struct intlist *list,
 	fp = fopen(fname, "r");
 	if (!fp) {
 		pr_warning("Failed to open %s: %s\n", fname,
-			   strerror_r(errno, sbuf, sizeof(sbuf)));
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
 		return -errno;
 	}
 
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 36c6862119e3..5065ec98049c 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -13,6 +13,8 @@ util/cpumap.c
 ../lib/bitmap.c
 ../lib/find_bit.c
 ../lib/hweight.c
+../lib/str_error_r.c
+../lib/vsprintf.c
 util/thread_map.c
 util/util.c
 util/xyarray.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 98f127abfa42..a5fbc012e3df 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -2,6 +2,7 @@
 #include <structmember.h>
 #include <inttypes.h>
 #include <poll.h>
+#include <linux/err.h>
 #include "evlist.h"
 #include "evsel.h"
 #include "event.h"
@@ -47,6 +48,7 @@ PyMODINIT_FUNC initperf(void);
 
 struct pyrf_event {
 	PyObject_HEAD
+	struct perf_evsel *evsel;
 	struct perf_sample sample;
 	union perf_event   event;
 };
@@ -288,6 +290,85 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
 	return ret;
 }
 
+static bool is_tracepoint(struct pyrf_event *pevent)
+{
+	return pevent->evsel->attr.type == PERF_TYPE_TRACEPOINT;
+}
+
+static PyObject*
+tracepoint_field(struct pyrf_event *pe, struct format_field *field)
+{
+	struct pevent *pevent = field->event->pevent;
+	void *data = pe->sample.raw_data;
+	PyObject *ret = NULL;
+	unsigned long long val;
+	unsigned int offset, len;
+
+	if (field->flags & FIELD_IS_ARRAY) {
+		offset = field->offset;
+		len    = field->size;
+		if (field->flags & FIELD_IS_DYNAMIC) {
+			val     = pevent_read_number(pevent, data + offset, len);
+			offset  = val;
+			len     = offset >> 16;
+			offset &= 0xffff;
+		}
+		if (field->flags & FIELD_IS_STRING &&
+		    is_printable_array(data + offset, len)) {
+			ret = PyString_FromString((char *)data + offset);
+		} else {
+			ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+			field->flags &= ~FIELD_IS_STRING;
+		}
+	} else {
+		val = pevent_read_number(pevent, data + field->offset,
+					 field->size);
+		if (field->flags & FIELD_IS_POINTER)
+			ret = PyLong_FromUnsignedLong((unsigned long) val);
+		else if (field->flags & FIELD_IS_SIGNED)
+			ret = PyLong_FromLong((long) val);
+		else
+			ret = PyLong_FromUnsignedLong((unsigned long) val);
+	}
+
+	return ret;
+}
+
+static PyObject*
+get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
+{
+	const char *str = PyString_AsString(PyObject_Str(attr_name));
+	struct perf_evsel *evsel = pevent->evsel;
+	struct format_field *field;
+
+	if (!evsel->tp_format) {
+		struct event_format *tp_format;
+
+		tp_format = trace_event__tp_format_id(evsel->attr.config);
+		if (!tp_format)
+			return NULL;
+
+		evsel->tp_format = tp_format;
+	}
+
+	field = pevent_find_any_field(evsel->tp_format, str);
+	if (!field)
+		return NULL;
+
+	return tracepoint_field(pevent, field);
+}
+
+static PyObject*
+pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name)
+{
+	PyObject *obj = NULL;
+
+	if (is_tracepoint(pevent))
+		obj = get_tracepoint_field(pevent, attr_name);
+
+	return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name);
+}
+
 static PyTypeObject pyrf_sample_event__type = {
 	PyVarObject_HEAD_INIT(NULL, 0)
 	.tp_name	= "perf.sample_event",
@@ -296,6 +377,7 @@ static PyTypeObject pyrf_sample_event__type = {
 	.tp_doc		= pyrf_sample_event__doc,
 	.tp_members	= pyrf_sample_event__members,
 	.tp_repr	= (reprfunc)pyrf_sample_event__repr,
+	.tp_getattro	= (getattrofunc) pyrf_sample_event__getattro,
 };
 
 static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
@@ -653,6 +735,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
 	attr.precise_ip	    = precise_ip;
 	attr.mmap_data	    = mmap_data;
 	attr.sample_id_all  = sample_id_all;
+	attr.size	    = sizeof(attr);
 
 	perf_evsel__init(&pevsel->evsel, &attr, idx);
 	return 0;
@@ -863,13 +946,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 	if (event != NULL) {
 		PyObject *pyevent = pyrf_event__new(event);
 		struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
-
-		perf_evlist__mmap_consume(evlist, cpu);
+		struct perf_evsel *evsel;
 
 		if (pyevent == NULL)
 			return PyErr_NoMemory();
 
-		err = perf_evlist__parse_sample(evlist, event, &pevent->sample);
+		evsel = perf_evlist__event2evsel(evlist, event);
+		if (!evsel)
+			return Py_None;
+
+		pevent->evsel = evsel;
+
+		err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
+
+		/* Consume the even only after we parsed it out. */
+		perf_evlist__mmap_consume(evlist, cpu);
+
 		if (err)
 			return PyErr_Format(PyExc_OSError,
 					    "perf: can't parse sample, err=%d", err);
@@ -957,7 +1049,7 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
 	if (i >= pevlist->evlist.nr_entries)
 		return NULL;
 
-	evlist__for_each(&pevlist->evlist, pos) {
+	evlist__for_each_entry(&pevlist->evlist, pos) {
 		if (i-- == 0)
 			break;
 	}
@@ -1073,7 +1165,32 @@ static struct {
 	{ .name = NULL, },
 };
 
+static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
+				  PyObject *args, PyObject *kwargs)
+{
+	struct event_format *tp_format;
+	static char *kwlist[] = { "sys", "name", NULL };
+	char *sys  = NULL;
+	char *name = NULL;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss", kwlist,
+					 &sys, &name))
+		return NULL;
+
+	tp_format = trace_event__tp_format(sys, name);
+	if (IS_ERR(tp_format))
+		return PyInt_FromLong(-1);
+
+	return PyInt_FromLong(tp_format->id);
+}
+
 static PyMethodDef perf__methods[] = {
+	{
+		.ml_name  = "tracepoint",
+		.ml_meth  = (PyCFunction) pyrf__tracepoint,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("Get tracepoint config.")
+	},
 	{ .ml_name = NULL, }
 };
 
@@ -1100,6 +1217,33 @@ PyMODINIT_FUNC initperf(void)
 	Py_INCREF(&pyrf_evsel__type);
 	PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
 
+	Py_INCREF(&pyrf_mmap_event__type);
+	PyModule_AddObject(module, "mmap_event", (PyObject *)&pyrf_mmap_event__type);
+
+	Py_INCREF(&pyrf_lost_event__type);
+	PyModule_AddObject(module, "lost_event", (PyObject *)&pyrf_lost_event__type);
+
+	Py_INCREF(&pyrf_comm_event__type);
+	PyModule_AddObject(module, "comm_event", (PyObject *)&pyrf_comm_event__type);
+
+	Py_INCREF(&pyrf_task_event__type);
+	PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+	Py_INCREF(&pyrf_throttle_event__type);
+	PyModule_AddObject(module, "throttle_event", (PyObject *)&pyrf_throttle_event__type);
+
+	Py_INCREF(&pyrf_task_event__type);
+	PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+	Py_INCREF(&pyrf_read_event__type);
+	PyModule_AddObject(module, "read_event", (PyObject *)&pyrf_read_event__type);
+
+	Py_INCREF(&pyrf_sample_event__type);
+	PyModule_AddObject(module, "sample_event", (PyObject *)&pyrf_sample_event__type);
+
+	Py_INCREF(&pyrf_context_switch_event__type);
+	PyModule_AddObject(module, "switch_event", (PyObject *)&pyrf_context_switch_event__type);
+
 	Py_INCREF(&pyrf_thread_map__type);
 	PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
 
diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c
index c6d4ee2de752..639d1da2f978 100644
--- a/tools/perf/util/quote.c
+++ b/tools/perf/util/quote.c
@@ -1,5 +1,7 @@
-#include "cache.h"
+#include <stdlib.h>
+#include "strbuf.h"
 #include "quote.h"
+#include "util.h"
 
 /* Help to copy the thing properly quoted for the shell safety.
  * any single quote is replaced with '\'', any exclamation point
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
index e1ec19146fb0..055ca45bed99 100644
--- a/tools/perf/util/quote.h
+++ b/tools/perf/util/quote.h
@@ -2,7 +2,6 @@
 #define __PERF_QUOTE_H
 
 #include <stddef.h>
-#include <stdio.h>
 
 /* Help to copy the thing properly quoted for the shell safety.
  * any single quote is replaced with '\'', any exclamation point
@@ -24,6 +23,8 @@
  * sq_quote() in a real application.
  */
 
+struct strbuf;
+
 int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
 
 #endif /* __PERF_QUOTE_H */
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index abc76e3d3098..808cc45611fe 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -35,7 +35,7 @@ DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
 
 	struct rb_node *nd;
 
-	resort_rb__for_each(nd, threads) {
+	resort_rb__for_each_entry(nd, threads) {
 		struct thread *t = threads_entry;
 		printf("%s: %d\n", t->shortname, t->tid);
 	}
@@ -123,7 +123,7 @@ static void __name##_sorted__init_entry(struct rb_node *nd,			\
 struct __name##_sorted_entry *__name##_entry;					\
 struct __name##_sorted *__name = __name##_sorted__new
 
-#define resort_rb__for_each(__nd, __name)					\
+#define resort_rb__for_each_entry(__nd, __name)					\
 	for (__nd = rb_first(&__name->entries);					\
 	     __name##_entry = rb_entry(__nd, struct __name##_sorted_entry,	\
 				       rb_node), __nd;				\
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 481792c7484b..98bf584853ea 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -148,7 +148,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 
 	use_comm_exec = perf_can_comm_exec();
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		perf_evsel__config(evsel, opts, callchain);
 		if (evsel->tracking && use_comm_exec)
 			evsel->attr.comm_exec = 1;
@@ -161,18 +161,18 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 		 * match the id.
 		 */
 		use_sample_identifier = perf_can_sample_identifier();
-		evlist__for_each(evlist, evsel)
+		evlist__for_each_entry(evlist, evsel)
 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
 	} else if (evlist->nr_entries > 1) {
 		struct perf_evsel *first = perf_evlist__first(evlist);
 
-		evlist__for_each(evlist, evsel) {
+		evlist__for_each_entry(evlist, evsel) {
 			if (evsel->attr.sample_type == first->attr.sample_type)
 				continue;
 			use_sample_identifier = perf_can_sample_identifier();
 			break;
 		}
-		evlist__for_each(evlist, evsel)
+		evlist__for_each_entry(evlist, evsel)
 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
 	}
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index ff134700bf30..e0203b979474 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -273,7 +273,7 @@ static PyObject *get_field_numeric_entry(struct event_format *event,
 		struct format_field *field, void *data)
 {
 	bool is_array = field->flags & FIELD_IS_ARRAY;
-	PyObject *obj, *list = NULL;
+	PyObject *obj = NULL, *list = NULL;
 	unsigned long long val;
 	unsigned int item_size, n_items, i;
 
@@ -386,13 +386,12 @@ exit:
 	return pylist;
 }
 
-
 static void python_process_tracepoint(struct perf_sample *sample,
 				      struct perf_evsel *evsel,
 				      struct addr_location *al)
 {
 	struct event_format *event = evsel->tp_format;
-	PyObject *handler, *context, *t, *obj, *callchain;
+	PyObject *handler, *context, *t, *obj = NULL, *callchain;
 	PyObject *dict = NULL;
 	static char handler_name[256];
 	struct format_field *field;
@@ -457,14 +456,26 @@ static void python_process_tracepoint(struct perf_sample *sample,
 		pydict_set_item_string_decref(dict, "common_callchain", callchain);
 	}
 	for (field = event->format.fields; field; field = field->next) {
-		if (field->flags & FIELD_IS_STRING) {
-			int offset;
+		unsigned int offset, len;
+		unsigned long long val;
+
+		if (field->flags & FIELD_IS_ARRAY) {
+			offset = field->offset;
+			len    = field->size;
 			if (field->flags & FIELD_IS_DYNAMIC) {
-				offset = *(int *)(data + field->offset);
+				val     = pevent_read_number(scripting_context->pevent,
+							     data + offset, len);
+				offset  = val;
+				len     = offset >> 16;
 				offset &= 0xffff;
-			} else
-				offset = field->offset;
-			obj = PyString_FromString((char *)data + offset);
+			}
+			if (field->flags & FIELD_IS_STRING &&
+			    is_printable_array(data + offset, len)) {
+				obj = PyString_FromString((char *) data + offset);
+			} else {
+				obj = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+				field->flags &= ~FIELD_IS_STRING;
+			}
 		} else { /* FIELD_IS_NUMERIC */
 			obj = get_field_numeric_entry(event, field, data);
 		}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5214974e841a..5d61242a6e64 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -83,7 +83,7 @@ static bool perf_session__has_comm_exec(struct perf_session *session)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		if (evsel->attr.comm_exec)
 			return true;
 	}
@@ -178,6 +178,8 @@ static void perf_session__delete_threads(struct perf_session *session)
 
 void perf_session__delete(struct perf_session *session)
 {
+	if (session == NULL)
+		return;
 	auxtrace__free(session);
 	auxtrace_index__free(&session->auxtrace_index);
 	perf_session__destroy_kernel_maps(session);
@@ -593,6 +595,7 @@ do { 						\
 	if (bswap_safe(f, 0))			\
 		attr->f = bswap_##sz(attr->f);	\
 } while(0)
+#define bswap_field_16(f) bswap_field(f, 16)
 #define bswap_field_32(f) bswap_field(f, 32)
 #define bswap_field_64(f) bswap_field(f, 64)
 
@@ -608,6 +611,7 @@ do { 						\
 	bswap_field_64(sample_regs_user);
 	bswap_field_32(sample_stack_user);
 	bswap_field_32(aux_watermark);
+	bswap_field_16(sample_max_stack);
 
 	/*
 	 * After read_format are bitfields. Check read_format because
@@ -1495,10 +1499,27 @@ int perf_session__register_idle_thread(struct perf_session *session)
 	return err;
 }
 
+static void
+perf_session__warn_order(const struct perf_session *session)
+{
+	const struct ordered_events *oe = &session->ordered_events;
+	struct perf_evsel *evsel;
+	bool should_warn = true;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->attr.write_backward)
+			should_warn = false;
+	}
+
+	if (!should_warn)
+		return;
+	if (oe->nr_unordered_events != 0)
+		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+}
+
 static void perf_session__warn_about_errors(const struct perf_session *session)
 {
 	const struct events_stats *stats = &session->evlist->stats;
-	const struct ordered_events *oe = &session->ordered_events;
 
 	if (session->tool->lost == perf_event__process_lost &&
 	    stats->nr_events[PERF_RECORD_LOST] != 0) {
@@ -1555,8 +1576,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 			    stats->nr_unprocessable_samples);
 	}
 
-	if (oe->nr_unordered_events != 0)
-		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+	perf_session__warn_order(session);
 
 	events_stats__auxtrace_error_warn(stats);
 
@@ -1868,7 +1888,7 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(session->evlist, evsel) {
+	evlist__for_each_entry(session->evlist, evsel) {
 		if (evsel->attr.type == PERF_TYPE_TRACEPOINT)
 			return true;
 	}
@@ -1950,7 +1970,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 {
 	struct perf_evsel *pos;
 
-	evlist__for_each(session->evlist, pos) {
+	evlist__for_each_entry(session->evlist, pos) {
 		if (pos->attr.type == type)
 			return pos;
 	}
@@ -2105,7 +2125,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool,
 	max_nr = (UINT16_MAX - sizeof(struct id_index_event)) /
 		 sizeof(struct id_index_entry);
 
-	evlist__for_each(evlist, evsel)
+	evlist__for_each_entry(evlist, evsel)
 		nr += evsel->ids;
 
 	n = nr > max_nr ? max_nr : nr;
@@ -2118,7 +2138,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool,
 	ev->id_index.header.size = sz;
 	ev->id_index.nr = n;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		u32 j;
 
 		for (j = 0; j < evsel->ids; j++) {
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index c4e9bd70723c..947d21f38398 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -79,8 +79,8 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
 {
 	const char *comm = thread__comm_str(he->thread);
 
-	width = max(7U, width) - 6;
-	return repsep_snprintf(bf, size, "%5d:%-*.*s", he->thread->tid,
+	width = max(7U, width) - 8;
+	return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid,
 			       width, width, comm ?: "");
 }
 
@@ -95,7 +95,7 @@ static int hist_entry__thread_filter(struct hist_entry *he, int type, const void
 }
 
 struct sort_entry sort_thread = {
-	.se_header	= "  Pid:Command",
+	.se_header	= "    Pid:Command",
 	.se_cmp		= sort__thread_cmp,
 	.se_snprintf	= hist_entry__thread_snprintf,
 	.se_filter	= hist_entry__thread_filter,
@@ -1218,7 +1218,7 @@ struct sort_entry sort_mem_daddr_dso = {
 	.se_header	= "Data Object",
 	.se_cmp		= sort__dso_daddr_cmp,
 	.se_snprintf	= hist_entry__dso_daddr_snprintf,
-	.se_width_idx	= HISTC_MEM_DADDR_SYMBOL,
+	.se_width_idx	= HISTC_MEM_DADDR_DSO,
 };
 
 struct sort_entry sort_mem_locked = {
@@ -1488,7 +1488,7 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 }
 
 static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			      struct perf_evsel *evsel)
+			      struct hists *hists)
 {
 	struct hpp_sort_entry *hse;
 	size_t len = fmt->user_len;
@@ -1496,14 +1496,14 @@ static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	hse = container_of(fmt, struct hpp_sort_entry, hpp);
 
 	if (!len)
-		len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx);
+		len = hists__col_len(hists, hse->se->se_width_idx);
 
 	return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
 }
 
 static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
 			     struct perf_hpp *hpp __maybe_unused,
-			     struct perf_evsel *evsel)
+			     struct hists *hists)
 {
 	struct hpp_sort_entry *hse;
 	size_t len = fmt->user_len;
@@ -1511,7 +1511,7 @@ static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
 	hse = container_of(fmt, struct hpp_sort_entry, hpp);
 
 	if (!len)
-		len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx);
+		len = hists__col_len(hists, hse->se->se_width_idx);
 
 	return len;
 }
@@ -1793,7 +1793,7 @@ static void update_dynamic_len(struct hpp_dynamic_entry *hde,
 }
 
 static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			      struct perf_evsel *evsel __maybe_unused)
+			      struct hists *hists __maybe_unused)
 {
 	struct hpp_dynamic_entry *hde;
 	size_t len = fmt->user_len;
@@ -1808,7 +1808,7 @@ static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 
 static int __sort__hde_width(struct perf_hpp_fmt *fmt,
 			     struct perf_hpp *hpp __maybe_unused,
-			     struct perf_evsel *evsel __maybe_unused)
+			     struct hists *hists __maybe_unused)
 {
 	struct hpp_dynamic_entry *hde;
 	size_t len = fmt->user_len;
@@ -2069,7 +2069,7 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam
 	}
 
 	full_name = !!strchr(event_name, ':');
-	evlist__for_each(evlist, pos) {
+	evlist__for_each_entry(evlist, pos) {
 		/* case 2 */
 		if (full_name && !strcmp(pos->name, event_name))
 			return pos;
@@ -2125,7 +2125,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace,
 	int ret;
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
@@ -2143,7 +2143,7 @@ static int add_all_matching_fields(struct perf_evlist *evlist,
 	struct perf_evsel *evsel;
 	struct format_field *field;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
@@ -2381,6 +2381,9 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 		if (sort__mode != SORT_MODE__MEMORY)
 			return -EINVAL;
 
+		if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0)
+			return -EINVAL;
+
 		if (sd->entry == &sort_mem_daddr_sym)
 			list->sym = 1;
 
@@ -2424,7 +2427,10 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
 		if (*tok) {
 			ret = sort_dimension__add(list, tok, evlist, level);
 			if (ret == -EINVAL) {
-				error("Invalid --sort key: `%s'", tok);
+				if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
+					error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+				else
+					error("Invalid --sort key: `%s'", tok);
 				break;
 			} else if (ret == -ESRCH) {
 				error("Unknown --sort key: `%s'", tok);
@@ -2456,7 +2462,7 @@ static const char *get_default_sort_order(struct perf_evlist *evlist)
 	if (evlist == NULL)
 		goto out_no_evlist;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
 			use_trace = false;
 			break;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index ebb59cacd092..7ca37ea17395 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -67,6 +67,11 @@ struct hist_entry_diff {
 	};
 };
 
+struct hist_entry_ops {
+	void	*(*new)(size_t size);
+	void	(*free)(void *ptr);
+};
+
 /**
  * struct hist_entry - histogram entry
  *
@@ -125,6 +130,7 @@ struct hist_entry {
 	void			*trace_output;
 	struct perf_hpp_list	*hpp_list;
 	struct hist_entry	*parent_he;
+	struct hist_entry_ops	*ops;
 	union {
 		/* this is for hierarchical entry structure */
 		struct {
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index aa9efe08762b..8a2bbd2a4d82 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
 static bool have_frontend_stalled;
 
 struct stats walltime_nsecs_stats;
@@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void)
 		sizeof(runtime_transaction_stats));
 	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+	memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
+	memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
+	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
+	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
+	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
 }
 
 /*
@@ -105,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
 	else if (perf_stat_evsel__is(counter, ELISION_START))
 		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+		update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+		update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+		update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+		update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+		update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -302,6 +322,107 @@ static void print_ll_cache_misses(int cpu,
 	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 }
 
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ *			TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+	if (x < 0 && x >= -0.02)
+		return 0.0;
+	return x;
+}
+
+static double td_total_slots(int ctx, int cpu)
+{
+	return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
+}
+
+static double td_bad_spec(int ctx, int cpu)
+{
+	double bad_spec = 0;
+	double total_slots;
+	double total;
+
+	total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
+		avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
+		avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
+	total_slots = td_total_slots(ctx, cpu);
+	if (total_slots)
+		bad_spec = total / total_slots;
+	return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int ctx, int cpu)
+{
+	double retiring = 0;
+	double total_slots = td_total_slots(ctx, cpu);
+	double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
+
+	if (total_slots)
+		retiring = ret_slots / total_slots;
+	return retiring;
+}
+
+static double td_fe_bound(int ctx, int cpu)
+{
+	double fe_bound = 0;
+	double total_slots = td_total_slots(ctx, cpu);
+	double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
+
+	if (total_slots)
+		fe_bound = fetch_bub / total_slots;
+	return fe_bound;
+}
+
+static double td_be_bound(int ctx, int cpu)
+{
+	double sum = (td_fe_bound(ctx, cpu) +
+		      td_bad_spec(ctx, cpu) +
+		      td_retiring(ctx, cpu));
+	if (sum == 0)
+		return 0;
+	return sanitize_val(1.0 - sum);
+}
+
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out)
@@ -309,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 	void *ctxp = out->ctx;
 	print_metric_t print_metric = out->print_metric;
 	double total, ratio = 0.0, total2;
+	const char *color = NULL;
 	int ctx = evsel_context(evsel);
 
 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
@@ -452,6 +574,46 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				     avg / ratio);
 		else
 			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+		double fe_bound = td_fe_bound(ctx, cpu);
+
+		if (fe_bound > 0.2)
+			color = PERF_COLOR_RED;
+		print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+				fe_bound * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+		double retiring = td_retiring(ctx, cpu);
+
+		if (retiring > 0.7)
+			color = PERF_COLOR_GREEN;
+		print_metric(ctxp, color, "%8.1f%%", "retiring",
+				retiring * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+		double bad_spec = td_bad_spec(ctx, cpu);
+
+		if (bad_spec > 0.1)
+			color = PERF_COLOR_RED;
+		print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+				bad_spec * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+		double be_bound = td_be_bound(ctx, cpu);
+		const char *name = "backend bound";
+		static int have_recovery_bubbles = -1;
+
+		/* In case the CPU does not support topdown-recovery-bubbles */
+		if (have_recovery_bubbles < 0)
+			have_recovery_bubbles = pmu_have_event("cpu",
+					"topdown-recovery-bubbles");
+		if (!have_recovery_bubbles)
+			name = "backend bound/bad spec";
+
+		if (be_bound > 0.2)
+			color = PERF_COLOR_RED;
+		if (td_total_slots(ctx, cpu) > 0)
+			print_metric(ctxp, color, "%8.1f%%", name,
+					be_bound * 100.);
+		else
+			print_metric(ctxp, NULL, NULL, name, 0);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		char unit = 'M';
 		char unit_buf[10];
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index ffa1d0653861..39345c2ddfc2 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 	ID(TRANSACTION_START,	cpu/tx-start/),
 	ID(ELISION_START,	cpu/el-start/),
 	ID(CYCLES_IN_TX_CP,	cpu/cycles-ct/),
+	ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+	ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
 };
 #undef ID
 
@@ -157,7 +162,7 @@ int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		if (perf_evsel__alloc_stats(evsel, alloc_raw))
 			goto out_free;
 	}
@@ -173,7 +178,7 @@ void perf_evlist__free_stats(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		perf_evsel__free_stat_priv(evsel);
 		perf_evsel__free_counts(evsel);
 		perf_evsel__free_prev_raw_counts(evsel);
@@ -184,7 +189,7 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
 
-	evlist__for_each(evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		perf_evsel__reset_stat_priv(evsel);
 		perf_evsel__reset_counts(evsel);
 	}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0150e786ccc7..c29bb94c48a4 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -17,6 +17,11 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__TRANSACTION_START,
 	PERF_STAT_EVSEL_ID__ELISION_START,
 	PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+	PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
 	PERF_STAT_EVSEL_ID__MAX,
 };
 
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index f95f682aa2b2..817593908d47 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -1,5 +1,5 @@
 #include "debug.h"
-#include "cache.h"
+#include "util.h"
 #include <linux/kernel.h>
 
 int prefixcmp(const char *str, const char *prefix)
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 54b409297d4a..b268a6648a5d 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -40,6 +40,9 @@
 
 #include <assert.h>
 #include <stdarg.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
 
 extern char strbuf_slopbuf[];
 struct strbuf {
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index ca990029e243..19207e50fce5 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -73,7 +73,7 @@ static inline struct str_node *strlist__next(struct str_node *sn)
  * @pos:	the &struct str_node to use as a loop cursor.
  * @slist:	the &struct strlist for loop.
  */
-#define strlist__for_each(pos, slist)	\
+#define strlist__for_each_entry(pos, slist)	\
 	for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
 
 /**
@@ -83,7 +83,7 @@ static inline struct str_node *strlist__next(struct str_node *sn)
  * @n:		another &struct str_node to use as temporary storage.
  * @slist:	the &struct strlist for loop.
  */
-#define strlist__for_each_safe(pos, n, slist)	\
+#define strlist__for_each_entry_safe(pos, n, slist)	\
 	for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
 	     pos = n, n = strlist__next(n))
 #endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 87a297dd8901..a34321e9b44d 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -7,6 +7,7 @@
 
 #include "symbol.h"
 #include "demangle-java.h"
+#include "demangle-rust.h"
 #include "machine.h"
 #include "vdso.h"
 #include <symbol/kallsyms.h>
@@ -16,6 +17,7 @@
 #define EM_AARCH64	183  /* ARM 64 bit */
 #endif
 
+typedef Elf64_Nhdr GElf_Nhdr;
 
 #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
 extern char *cplus_demangle(const char *, int);
@@ -54,6 +56,14 @@ static int elf_getphdrnum(Elf *elf, size_t *dst)
 }
 #endif
 
+#ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT
+static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused)
+{
+	pr_err("%s: update your libelf to > 0.140, this one lacks elf_getshdrstrndx().\n", __func__);
+	return -1;
+}
+#endif
+
 #ifndef NT_GNU_BUILD_ID
 #define NT_GNU_BUILD_ID 3
 #endif
@@ -1072,6 +1082,13 @@ new_symbol:
 			demangled = bfd_demangle(NULL, elf_name, demangle_flags);
 			if (demangled == NULL)
 				demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+			else if (rust_is_mangled(demangled))
+				/*
+				 * Input to Rust demangling is the BFD-demangled
+				 * name which it Rust-demangles in place.
+				 */
+				rust_demangle_sym(demangled);
+
 			if (demangled != NULL)
 				elf_name = demangled;
 		}
@@ -1781,6 +1798,260 @@ void kcore_extract__delete(struct kcore_extract *kce)
 	unlink(kce->extract_filename);
 }
 
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+/**
+ * populate_sdt_note : Parse raw data and identify SDT note
+ * @elf: elf of the opened file
+ * @data: raw data of a section with description offset applied
+ * @len: note description size
+ * @type: type of the note
+ * @sdt_notes: List to add the SDT note
+ *
+ * Responsible for parsing the @data in section .note.stapsdt in @elf and
+ * if its an SDT note, it appends to @sdt_notes list.
+ */
+static int populate_sdt_note(Elf **elf, const char *data, size_t len,
+			     struct list_head *sdt_notes)
+{
+	const char *provider, *name;
+	struct sdt_note *tmp = NULL;
+	GElf_Ehdr ehdr;
+	GElf_Addr base_off = 0;
+	GElf_Shdr shdr;
+	int ret = -EINVAL;
+
+	union {
+		Elf64_Addr a64[NR_ADDR];
+		Elf32_Addr a32[NR_ADDR];
+	} buf;
+
+	Elf_Data dst = {
+		.d_buf = &buf, .d_type = ELF_T_ADDR, .d_version = EV_CURRENT,
+		.d_size = gelf_fsize((*elf), ELF_T_ADDR, NR_ADDR, EV_CURRENT),
+		.d_off = 0, .d_align = 0
+	};
+	Elf_Data src = {
+		.d_buf = (void *) data, .d_type = ELF_T_ADDR,
+		.d_version = EV_CURRENT, .d_size = dst.d_size, .d_off = 0,
+		.d_align = 0
+	};
+
+	tmp = (struct sdt_note *)calloc(1, sizeof(struct sdt_note));
+	if (!tmp) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	INIT_LIST_HEAD(&tmp->note_list);
+
+	if (len < dst.d_size + 3)
+		goto out_free_note;
+
+	/* Translation from file representation to memory representation */
+	if (gelf_xlatetom(*elf, &dst, &src,
+			  elf_getident(*elf, NULL)[EI_DATA]) == NULL) {
+		pr_err("gelf_xlatetom : %s\n", elf_errmsg(-1));
+		goto out_free_note;
+	}
+
+	/* Populate the fields of sdt_note */
+	provider = data + dst.d_size;
+
+	name = (const char *)memchr(provider, '\0', data + len - provider);
+	if (name++ == NULL)
+		goto out_free_note;
+
+	tmp->provider = strdup(provider);
+	if (!tmp->provider) {
+		ret = -ENOMEM;
+		goto out_free_note;
+	}
+	tmp->name = strdup(name);
+	if (!tmp->name) {
+		ret = -ENOMEM;
+		goto out_free_prov;
+	}
+
+	if (gelf_getclass(*elf) == ELFCLASS32) {
+		memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr));
+		tmp->bit32 = true;
+	} else {
+		memcpy(&tmp->addr, &buf, 3 * sizeof(Elf64_Addr));
+		tmp->bit32 = false;
+	}
+
+	if (!gelf_getehdr(*elf, &ehdr)) {
+		pr_debug("%s : cannot get elf header.\n", __func__);
+		ret = -EBADF;
+		goto out_free_name;
+	}
+
+	/* Adjust the prelink effect :
+	 * Find out the .stapsdt.base section.
+	 * This scn will help us to handle prelinking (if present).
+	 * Compare the retrieved file offset of the base section with the
+	 * base address in the description of the SDT note. If its different,
+	 * then accordingly, adjust the note location.
+	 */
+	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) {
+		base_off = shdr.sh_offset;
+		if (base_off) {
+			if (tmp->bit32)
+				tmp->addr.a32[0] = tmp->addr.a32[0] + base_off -
+					tmp->addr.a32[1];
+			else
+				tmp->addr.a64[0] = tmp->addr.a64[0] + base_off -
+					tmp->addr.a64[1];
+		}
+	}
+
+	list_add_tail(&tmp->note_list, sdt_notes);
+	return 0;
+
+out_free_name:
+	free(tmp->name);
+out_free_prov:
+	free(tmp->provider);
+out_free_note:
+	free(tmp);
+out_err:
+	return ret;
+}
+
+/**
+ * construct_sdt_notes_list : constructs a list of SDT notes
+ * @elf : elf to look into
+ * @sdt_notes : empty list_head
+ *
+ * Scans the sections in 'elf' for the section
+ * .note.stapsdt. It, then calls populate_sdt_note to find
+ * out the SDT events and populates the 'sdt_notes'.
+ */
+static int construct_sdt_notes_list(Elf *elf, struct list_head *sdt_notes)
+{
+	GElf_Ehdr ehdr;
+	Elf_Scn *scn = NULL;
+	Elf_Data *data;
+	GElf_Shdr shdr;
+	size_t shstrndx, next;
+	GElf_Nhdr nhdr;
+	size_t name_off, desc_off, offset;
+	int ret = 0;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		ret = -EBADF;
+		goto out_ret;
+	}
+	if (elf_getshdrstrndx(elf, &shstrndx) != 0) {
+		ret = -EBADF;
+		goto out_ret;
+	}
+
+	/* Look for the required section */
+	scn = elf_section_by_name(elf, &ehdr, &shdr, SDT_NOTE_SCN, NULL);
+	if (!scn) {
+		ret = -ENOENT;
+		goto out_ret;
+	}
+
+	if ((shdr.sh_type != SHT_NOTE) || (shdr.sh_flags & SHF_ALLOC)) {
+		ret = -ENOENT;
+		goto out_ret;
+	}
+
+	data = elf_getdata(scn, NULL);
+
+	/* Get the SDT notes */
+	for (offset = 0; (next = gelf_getnote(data, offset, &nhdr, &name_off,
+					      &desc_off)) > 0; offset = next) {
+		if (nhdr.n_namesz == sizeof(SDT_NOTE_NAME) &&
+		    !memcmp(data->d_buf + name_off, SDT_NOTE_NAME,
+			    sizeof(SDT_NOTE_NAME))) {
+			/* Check the type of the note */
+			if (nhdr.n_type != SDT_NOTE_TYPE)
+				goto out_ret;
+
+			ret = populate_sdt_note(&elf, ((data->d_buf) + desc_off),
+						nhdr.n_descsz, sdt_notes);
+			if (ret < 0)
+				goto out_ret;
+		}
+	}
+	if (list_empty(sdt_notes))
+		ret = -ENOENT;
+
+out_ret:
+	return ret;
+}
+
+/**
+ * get_sdt_note_list : Wrapper to construct a list of sdt notes
+ * @head : empty list_head
+ * @target : file to find SDT notes from
+ *
+ * This opens the file, initializes
+ * the ELF and then calls construct_sdt_notes_list.
+ */
+int get_sdt_note_list(struct list_head *head, const char *target)
+{
+	Elf *elf;
+	int fd, ret;
+
+	fd = open(target, O_RDONLY);
+	if (fd < 0)
+		return -EBADF;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (!elf) {
+		ret = -EBADF;
+		goto out_close;
+	}
+	ret = construct_sdt_notes_list(elf, head);
+	elf_end(elf);
+out_close:
+	close(fd);
+	return ret;
+}
+
+/**
+ * cleanup_sdt_note_list : free the sdt notes' list
+ * @sdt_notes: sdt notes' list
+ *
+ * Free up the SDT notes in @sdt_notes.
+ * Returns the number of SDT notes free'd.
+ */
+int cleanup_sdt_note_list(struct list_head *sdt_notes)
+{
+	struct sdt_note *tmp, *pos;
+	int nr_free = 0;
+
+	list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) {
+		list_del(&pos->note_list);
+		free(pos->name);
+		free(pos->provider);
+		free(pos);
+		nr_free++;
+	}
+	return nr_free;
+}
+
+/**
+ * sdt_notes__get_count: Counts the number of sdt events
+ * @start: list_head to sdt_notes list
+ *
+ * Returns the number of SDT notes in a list
+ */
+int sdt_notes__get_count(struct list_head *start)
+{
+	struct sdt_note *sdt_ptr;
+	int count = 0;
+
+	list_for_each_entry(sdt_ptr, start, note_list)
+		count++;
+	return count;
+}
+#endif
+
 void symbol__elf_init(void)
 {
 	elf_version(EV_CURRENT);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 54c4ff2b1cee..37e8d20ae03e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1430,7 +1430,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 	 * Read the build id if possible. This is required for
 	 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
 	 */
-	if (is_regular_file(name) &&
+	if (is_regular_file(dso->long_name) &&
 	    filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
 		dso__set_build_id(dso, build_id);
 
@@ -1626,7 +1626,7 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
 	if (!dirs)
 		return -1;
 
-	strlist__for_each(nd, dirs) {
+	strlist__for_each_entry(nd, dirs) {
 		scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
 			  "%s/%s/kallsyms", dir, nd->s);
 		if (!validate_kcore_addresses(kallsyms_filename, map)) {
@@ -1641,6 +1641,20 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
 	return ret;
 }
 
+/*
+ * Use open(O_RDONLY) to check readability directly instead of access(R_OK)
+ * since access(R_OK) only checks with real UID/GID but open() use effective
+ * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO).
+ */
+static bool filename__readable(const char *file)
+{
+	int fd = open(file, O_RDONLY);
+	if (fd < 0)
+		return false;
+	close(fd);
+	return true;
+}
+
 static char *dso__find_kallsyms(struct dso *dso, struct map *map)
 {
 	u8 host_build_id[BUILD_ID_SIZE];
@@ -1660,58 +1674,43 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
 				 sizeof(host_build_id)) == 0)
 		is_host = dso__build_id_equal(dso, host_build_id);
 
-	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
-
-	scnprintf(path, sizeof(path), "%s/%s/%s", buildid_dir,
-		  DSO__NAME_KCORE, sbuild_id);
-
-	/* Use /proc/kallsyms if possible */
+	/* Try a fast path for /proc/kallsyms if possible */
 	if (is_host) {
-		DIR *d;
-		int fd;
-
-		/* If no cached kcore go with /proc/kallsyms */
-		d = opendir(path);
-		if (!d)
-			goto proc_kallsyms;
-		closedir(d);
-
 		/*
-		 * Do not check the build-id cache, until we know we cannot use
-		 * /proc/kcore.
+		 * Do not check the build-id cache, unless we know we cannot use
+		 * /proc/kcore or module maps don't match to /proc/kallsyms.
+		 * To check readability of /proc/kcore, do not use access(R_OK)
+		 * since /proc/kcore requires CAP_SYS_RAWIO to read and access
+		 * can't check it.
 		 */
-		fd = open("/proc/kcore", O_RDONLY);
-		if (fd != -1) {
-			close(fd);
-			/* If module maps match go with /proc/kallsyms */
-			if (!validate_kcore_addresses("/proc/kallsyms", map))
-				goto proc_kallsyms;
-		}
-
-		/* Find kallsyms in build-id cache with kcore */
-		if (!find_matching_kcore(map, path, sizeof(path)))
-			return strdup(path);
-
-		goto proc_kallsyms;
+		if (filename__readable("/proc/kcore") &&
+		    !validate_kcore_addresses("/proc/kallsyms", map))
+			goto proc_kallsyms;
 	}
 
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
 	/* Find kallsyms in build-id cache with kcore */
+	scnprintf(path, sizeof(path), "%s/%s/%s",
+		  buildid_dir, DSO__NAME_KCORE, sbuild_id);
+
 	if (!find_matching_kcore(map, path, sizeof(path)))
 		return strdup(path);
 
-	scnprintf(path, sizeof(path), "%s/%s/%s",
-		  buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+	/* Use current /proc/kallsyms if possible */
+	if (is_host) {
+proc_kallsyms:
+		return strdup("/proc/kallsyms");
+	}
 
-	if (access(path, F_OK)) {
+	/* Finally, find a cache of kallsyms */
+	if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) {
 		pr_err("No kallsyms or vmlinux with build-id %s was found\n",
 		       sbuild_id);
 		return NULL;
 	}
 
 	return strdup(path);
-
-proc_kallsyms:
-	return strdup("/proc/kallsyms");
 }
 
 static int dso__load_kernel_sym(struct dso *dso, struct map *map,
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b10d558a8803..699f7cbcfe72 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -342,4 +342,26 @@ void arch__sym_update(struct symbol *s, GElf_Sym *sym);
 
 int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
 
+/* structure containing an SDT note's info */
+struct sdt_note {
+	char *name;			/* name of the note*/
+	char *provider;			/* provider name */
+	bool bit32;			/* whether the location is 32 bits? */
+	union {				/* location, base and semaphore addrs */
+		Elf64_Addr a64[3];
+		Elf32_Addr a32[3];
+	} addr;
+	struct list_head note_list;	/* SDT notes' list */
+};
+
+int get_sdt_note_list(struct list_head *head, const char *target);
+int cleanup_sdt_note_list(struct list_head *sdt_notes);
+int sdt_notes__get_count(struct list_head *start);
+
+#define SDT_BASE_SCN ".stapsdt.base"
+#define SDT_NOTE_SCN  ".note.stapsdt"
+#define SDT_NOTE_TYPE 3
+#define SDT_NOTE_NAME "stapsdt"
+#define NR_ADDR 3
+
 #endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
index a53603b27e52..8cdcf4641c51 100644
--- a/tools/perf/util/target.c
+++ b/tools/perf/util/target.c
@@ -7,6 +7,7 @@
  */
 
 #include "target.h"
+#include "util.h"
 #include "debug.h"
 
 #include <pwd.h>
@@ -121,7 +122,7 @@ int target__strerror(struct target *target, int errnum,
 	BUG_ON(buflen == 0);
 
 	if (errnum >= 0) {
-		const char *err = strerror_r(errnum, buf, buflen);
+		const char *err = str_error_r(errnum, buf, buflen);
 
 		if (err != buf)
 			scnprintf(buf, buflen, "%s", err);
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 825086aa9a08..d3301529f6a7 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -616,3 +616,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 
 	return err;
 }
+
+size_t thread_stack__depth(struct thread *thread)
+{
+	if (!thread->ts)
+		return 0;
+	return thread->ts->cnt;
+}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index ad44c7944b8e..b7e41c4ebfdd 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -87,6 +87,7 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
 			  size_t sz, u64 ip);
 int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
+size_t thread_stack__depth(struct thread *thread);
 
 struct call_return_processor *
 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 45fcb715a36b..8b10a55410a2 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -43,9 +43,6 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 		thread->cpu = -1;
 		INIT_LIST_HEAD(&thread->comm_list);
 
-		if (unwind__prepare_access(thread) < 0)
-			goto err_thread;
-
 		comm_str = malloc(32);
 		if (!comm_str)
 			goto err_thread;
@@ -201,10 +198,51 @@ size_t thread__fprintf(struct thread *thread, FILE *fp)
 	       map_groups__fprintf(thread->mg, fp);
 }
 
-void thread__insert_map(struct thread *thread, struct map *map)
+int thread__insert_map(struct thread *thread, struct map *map)
 {
+	int ret;
+
+	ret = unwind__prepare_access(thread, map, NULL);
+	if (ret)
+		return ret;
+
 	map_groups__fixup_overlappings(thread->mg, map, stderr);
 	map_groups__insert(thread->mg, map);
+
+	return 0;
+}
+
+static int __thread__prepare_access(struct thread *thread)
+{
+	bool initialized = false;
+	int i, err = 0;
+
+	for (i = 0; i < MAP__NR_TYPES; ++i) {
+		struct maps *maps = &thread->mg->maps[i];
+		struct map *map;
+
+		pthread_rwlock_rdlock(&maps->lock);
+
+		for (map = maps__first(maps); map; map = map__next(map)) {
+			err = unwind__prepare_access(thread, map, &initialized);
+			if (err || initialized)
+				break;
+		}
+
+		pthread_rwlock_unlock(&maps->lock);
+	}
+
+	return err;
+}
+
+static int thread__prepare_access(struct thread *thread)
+{
+	int err = 0;
+
+	if (symbol_conf.use_callchain)
+		err = __thread__prepare_access(thread);
+
+	return err;
 }
 
 static int thread__clone_map_groups(struct thread *thread,
@@ -214,7 +252,7 @@ static int thread__clone_map_groups(struct thread *thread,
 
 	/* This is new thread, we share map groups for process. */
 	if (thread->pid_ == parent->pid_)
-		return 0;
+		return thread__prepare_access(thread);
 
 	if (thread->mg == parent->mg) {
 		pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
@@ -224,7 +262,7 @@ static int thread__clone_map_groups(struct thread *thread,
 
 	/* But this one is new process, copy maps. */
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		if (map_groups__clone(thread->mg, parent->mg, i) < 0)
+		if (map_groups__clone(thread, parent->mg, i) < 0)
 			return -ENOMEM;
 
 	return 0;
@@ -265,3 +303,14 @@ void thread__find_cpumode_addr_location(struct thread *thread,
 			break;
 	}
 }
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
+{
+	if (thread->pid_ == thread->tid)
+		return thread__get(thread);
+
+	if (thread->pid_ == -1)
+		return NULL;
+
+	return machine__find_thread(machine, thread->pid_, thread->pid_);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 45fba13c800b..99263cb6e6b6 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -9,11 +9,9 @@
 #include "symbol.h"
 #include <strlist.h>
 #include <intlist.h>
-#ifdef HAVE_LIBUNWIND_SUPPORT
-#include <libunwind.h>
-#endif
 
 struct thread_stack;
+struct unwind_libunwind_ops;
 
 struct thread {
 	union {
@@ -36,7 +34,8 @@ struct thread {
 	void			*priv;
 	struct thread_stack	*ts;
 #ifdef HAVE_LIBUNWIND_SUPPORT
-	unw_addr_space_t	addr_space;
+	void				*addr_space;
+	struct unwind_libunwind_ops	*unwind_libunwind_ops;
 #endif
 };
 
@@ -77,10 +76,12 @@ int thread__comm_len(struct thread *thread);
 struct comm *thread__comm(const struct thread *thread);
 struct comm *thread__exec_comm(const struct thread *thread);
 const char *thread__comm_str(const struct thread *thread);
-void thread__insert_map(struct thread *thread, struct map *map);
+int thread__insert_map(struct thread *thread, struct map *map);
 int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
 size_t thread__fprintf(struct thread *thread, FILE *fp);
 
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
+
 void thread__find_addr_map(struct thread *thread,
 			   u8 cpumode, enum map_type type, u64 addr,
 			   struct addr_location *al);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 5654fe15e036..40585f5b7027 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -202,7 +202,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
 	if (!slist)
 		return NULL;
 
-	strlist__for_each(pos, slist) {
+	strlist__for_each_entry(pos, slist) {
 		pid = strtol(pos->s, &end_ptr, 10);
 
 		if (pid == INT_MIN || pid == INT_MAX ||
@@ -278,7 +278,7 @@ struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
 	if (!slist)
 		return NULL;
 
-	strlist__for_each(pos, slist) {
+	strlist__for_each_entry(pos, slist) {
 		tid = strtol(pos->s, &end_ptr, 10);
 
 		if (tid == INT_MIN || tid == INT_MAX ||
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 8ae051e0ec79..c330780674fc 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -105,3 +105,11 @@ trace_event__tp_format(const char *sys, const char *name)
 
 	return tp_format(sys, name);
 }
+
+struct event_format *trace_event__tp_format_id(int id)
+{
+	if (!tevent_initialized && trace_event__init2())
+		return ERR_PTR(-ENOMEM);
+
+	return pevent_find_event(tevent.pevent, id);
+}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index bce5b1dac268..b0af9c81bb0d 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -23,6 +23,8 @@ int trace_event__register_resolver(struct machine *machine,
 struct event_format*
 trace_event__tp_format(const char *sys, const char *name);
 
+struct event_format *trace_event__tp_format_id(int id);
+
 int bigendian(void);
 
 void event_format__fprintf(struct event_format *event,
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
new file mode 100644
index 000000000000..97c0f8fc5561
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -0,0 +1,699 @@
+/*
+ * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
+ *
+ * Lots of this code have been borrowed or heavily inspired from parts of
+ * the libunwind 0.99 code which are (amongst other contributors I may have
+ * forgotten):
+ *
+ * Copyright (C) 2002-2007 Hewlett-Packard Co
+ *	Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * And the bugs have been added by:
+ *
+ * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
+ *
+ */
+
+#include <elf.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/list.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include <libunwind-ptrace.h>
+#endif
+#include "callchain.h"
+#include "thread.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "unwind.h"
+#include "symbol.h"
+#include "util.h"
+#include "debug.h"
+#include "asm/bug.h"
+
+extern int
+UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+				    unw_word_t ip,
+				    unw_dyn_info_t *di,
+				    unw_proc_info_t *pi,
+				    int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+				 unw_word_t ip,
+				 unw_word_t segbase,
+				 const char *obj_name, unw_word_t start,
+				 unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+#define DW_EH_PE_FORMAT_MASK	0x0f	/* format of the encoded value */
+#define DW_EH_PE_APPL_MASK	0x70	/* how the value is to be applied */
+
+/* Pointer-encoding formats: */
+#define DW_EH_PE_omit		0xff
+#define DW_EH_PE_ptr		0x00	/* pointer-sized unsigned value */
+#define DW_EH_PE_udata4		0x03	/* unsigned 32-bit value */
+#define DW_EH_PE_udata8		0x04	/* unsigned 64-bit value */
+#define DW_EH_PE_sdata4		0x0b	/* signed 32-bit value */
+#define DW_EH_PE_sdata8		0x0c	/* signed 64-bit value */
+
+/* Pointer-encoding application: */
+#define DW_EH_PE_absptr		0x00	/* absolute value */
+#define DW_EH_PE_pcrel		0x10	/* rel. to addr. of encoded value */
+
+/*
+ * The following are not documented by LSB v1.3, yet they are used by
+ * GCC, presumably they aren't documented by LSB since they aren't
+ * used on Linux:
+ */
+#define DW_EH_PE_funcrel	0x40	/* start-of-procedure-relative */
+#define DW_EH_PE_aligned	0x50	/* aligned pointer */
+
+/* Flags intentionaly not handled, since they're not needed:
+ * #define DW_EH_PE_indirect      0x80
+ * #define DW_EH_PE_uleb128       0x01
+ * #define DW_EH_PE_udata2        0x02
+ * #define DW_EH_PE_sleb128       0x09
+ * #define DW_EH_PE_sdata2        0x0a
+ * #define DW_EH_PE_textrel       0x20
+ * #define DW_EH_PE_datarel       0x30
+ */
+
+struct unwind_info {
+	struct perf_sample	*sample;
+	struct machine		*machine;
+	struct thread		*thread;
+};
+
+#define dw_read(ptr, type, end) ({	\
+	type *__p = (type *) ptr;	\
+	type  __v;			\
+	if ((__p + 1) > (type *) end)	\
+		return -EINVAL;		\
+	__v = *__p++;			\
+	ptr = (typeof(ptr)) __p;	\
+	__v;				\
+	})
+
+static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
+				   u8 encoding)
+{
+	u8 *cur = *p;
+	*val = 0;
+
+	switch (encoding) {
+	case DW_EH_PE_omit:
+		*val = 0;
+		goto out;
+	case DW_EH_PE_ptr:
+		*val = dw_read(cur, unsigned long, end);
+		goto out;
+	default:
+		break;
+	}
+
+	switch (encoding & DW_EH_PE_APPL_MASK) {
+	case DW_EH_PE_absptr:
+		break;
+	case DW_EH_PE_pcrel:
+		*val = (unsigned long) cur;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ((encoding & 0x07) == 0x00)
+		encoding |= DW_EH_PE_udata4;
+
+	switch (encoding & DW_EH_PE_FORMAT_MASK) {
+	case DW_EH_PE_sdata4:
+		*val += dw_read(cur, s32, end);
+		break;
+	case DW_EH_PE_udata4:
+		*val += dw_read(cur, u32, end);
+		break;
+	case DW_EH_PE_sdata8:
+		*val += dw_read(cur, s64, end);
+		break;
+	case DW_EH_PE_udata8:
+		*val += dw_read(cur, u64, end);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+ out:
+	*p = cur;
+	return 0;
+}
+
+#define dw_read_encoded_value(ptr, end, enc) ({			\
+	u64 __v;						\
+	if (__dw_read_encoded_value(&ptr, end, &__v, enc)) {	\
+		return -EINVAL;                                 \
+	}                                                       \
+	__v;                                                    \
+	})
+
+static u64 elf_section_offset(int fd, const char *name)
+{
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	u64 offset = 0;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return 0;
+
+	do {
+		if (gelf_getehdr(elf, &ehdr) == NULL)
+			break;
+
+		if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+			break;
+
+		offset = shdr.sh_offset;
+	} while (0);
+
+	elf_end(elf);
+	return offset;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int elf_is_exec(int fd, const char *name)
+{
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	int retval = 0;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return 0;
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto out;
+
+	retval = (ehdr.e_type == ET_EXEC);
+
+out:
+	elf_end(elf);
+	pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
+	return retval;
+}
+#endif
+
+struct table_entry {
+	u32 start_ip_offset;
+	u32 fde_offset;
+};
+
+struct eh_frame_hdr {
+	unsigned char version;
+	unsigned char eh_frame_ptr_enc;
+	unsigned char fde_count_enc;
+	unsigned char table_enc;
+
+	/*
+	 * The rest of the header is variable-length and consists of the
+	 * following members:
+	 *
+	 *	encoded_t eh_frame_ptr;
+	 *	encoded_t fde_count;
+	 */
+
+	/* A single encoded pointer should not be more than 8 bytes. */
+	u64 enc[2];
+
+	/*
+	 * struct {
+	 *    encoded_t start_ip;
+	 *    encoded_t fde_addr;
+	 * } binary_search_table[fde_count];
+	 */
+	char data[0];
+} __packed;
+
+static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
+			       u64 offset, u64 *table_data, u64 *segbase,
+			       u64 *fde_count)
+{
+	struct eh_frame_hdr hdr;
+	u8 *enc = (u8 *) &hdr.enc;
+	u8 *end = (u8 *) &hdr.data;
+	ssize_t r;
+
+	r = dso__data_read_offset(dso, machine, offset,
+				  (u8 *) &hdr, sizeof(hdr));
+	if (r != sizeof(hdr))
+		return -EINVAL;
+
+	/* We dont need eh_frame_ptr, just skip it. */
+	dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
+
+	*fde_count  = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
+	*segbase    = offset;
+	*table_data = (enc - (u8 *) &hdr) + offset;
+	return 0;
+}
+
+static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
+				     u64 *table_data, u64 *segbase,
+				     u64 *fde_count)
+{
+	int ret = -EINVAL, fd;
+	u64 offset = dso->data.eh_frame_hdr_offset;
+
+	if (offset == 0) {
+		fd = dso__data_get_fd(dso, machine);
+		if (fd < 0)
+			return -EINVAL;
+
+		/* Check the .eh_frame section for unwinding info */
+		offset = elf_section_offset(fd, ".eh_frame_hdr");
+		dso->data.eh_frame_hdr_offset = offset;
+		dso__data_put_fd(dso);
+	}
+
+	if (offset)
+		ret = unwind_spec_ehframe(dso, machine, offset,
+					  table_data, segbase,
+					  fde_count);
+
+	return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int read_unwind_spec_debug_frame(struct dso *dso,
+					struct machine *machine, u64 *offset)
+{
+	int fd;
+	u64 ofs = dso->data.debug_frame_offset;
+
+	if (ofs == 0) {
+		fd = dso__data_get_fd(dso, machine);
+		if (fd < 0)
+			return -EINVAL;
+
+		/* Check the .debug_frame section for unwinding info */
+		ofs = elf_section_offset(fd, ".debug_frame");
+		dso->data.debug_frame_offset = ofs;
+		dso__data_put_fd(dso);
+	}
+
+	*offset = ofs;
+	if (*offset)
+		return 0;
+
+	return -EINVAL;
+}
+#endif
+
+static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
+{
+	struct addr_location al;
+
+	thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+			      MAP__FUNCTION, ip, &al);
+	if (!al.map) {
+		/*
+		 * We've seen cases (softice) where DWARF unwinder went
+		 * through non executable mmaps, which we need to lookup
+		 * in MAP__VARIABLE tree.
+		 */
+		thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+				      MAP__VARIABLE, ip, &al);
+	}
+	return al.map;
+}
+
+static int
+find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
+	       int need_unwind_info, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct map *map;
+	unw_dyn_info_t di;
+	u64 table_data, segbase, fde_count;
+	int ret = -EINVAL;
+
+	map = find_map(ip, ui);
+	if (!map || !map->dso)
+		return -EINVAL;
+
+	pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
+
+	/* Check the .eh_frame section for unwinding info */
+	if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
+				       &table_data, &segbase, &fde_count)) {
+		memset(&di, 0, sizeof(di));
+		di.format   = UNW_INFO_FORMAT_REMOTE_TABLE;
+		di.start_ip = map->start;
+		di.end_ip   = map->end;
+		di.u.rti.segbase    = map->start + segbase;
+		di.u.rti.table_data = map->start + table_data;
+		di.u.rti.table_len  = fde_count * sizeof(struct table_entry)
+				      / sizeof(unw_word_t);
+		ret = dwarf_search_unwind_table(as, ip, &di, pi,
+						need_unwind_info, arg);
+	}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+	/* Check the .debug_frame section for unwinding info */
+	if (ret < 0 &&
+	    !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
+		int fd = dso__data_get_fd(map->dso, ui->machine);
+		int is_exec = elf_is_exec(fd, map->dso->name);
+		unw_word_t base = is_exec ? 0 : map->start;
+		const char *symfile;
+
+		if (fd >= 0)
+			dso__data_put_fd(map->dso);
+
+		symfile = map->dso->symsrc_filename ?: map->dso->name;
+
+		memset(&di, 0, sizeof(di));
+		if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
+					   map->start, map->end))
+			return dwarf_search_unwind_table(as, ip, &di, pi,
+							 need_unwind_info, arg);
+	}
+#endif
+
+	return ret;
+}
+
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+			unw_regnum_t __maybe_unused num,
+			unw_fpreg_t __maybe_unused *val,
+			int __maybe_unused __write,
+			void __maybe_unused *arg)
+{
+	pr_err("unwind: access_fpreg unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+				  unw_word_t __maybe_unused *dil_addr,
+				  void __maybe_unused *arg)
+{
+	return -UNW_ENOINFO;
+}
+
+static int resume(unw_addr_space_t __maybe_unused as,
+		  unw_cursor_t __maybe_unused *cu,
+		  void __maybe_unused *arg)
+{
+	pr_err("unwind: resume unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int
+get_proc_name(unw_addr_space_t __maybe_unused as,
+	      unw_word_t __maybe_unused addr,
+		char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+		unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
+{
+	pr_err("unwind: get_proc_name unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
+			  unw_word_t *data)
+{
+	struct map *map;
+	ssize_t size;
+
+	map = find_map(addr, ui);
+	if (!map) {
+		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+		return -1;
+	}
+
+	if (!map->dso)
+		return -1;
+
+	size = dso__data_read_addr(map->dso, map, ui->machine,
+				   addr, (u8 *) data, sizeof(*data));
+
+	return !(size == sizeof(*data));
+}
+
+static int access_mem(unw_addr_space_t __maybe_unused as,
+		      unw_word_t addr, unw_word_t *valp,
+		      int __write, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct stack_dump *stack = &ui->sample->user_stack;
+	u64 start, end;
+	int offset;
+	int ret;
+
+	/* Don't support write, probably not needed. */
+	if (__write || !stack || !ui->sample->user_regs.regs) {
+		*valp = 0;
+		return 0;
+	}
+
+	ret = perf_reg_value(&start, &ui->sample->user_regs,
+			     LIBUNWIND__ARCH_REG_SP);
+	if (ret)
+		return ret;
+
+	end = start + stack->size;
+
+	/* Check overflow. */
+	if (addr + sizeof(unw_word_t) < addr)
+		return -EINVAL;
+
+	if (addr < start || addr + sizeof(unw_word_t) >= end) {
+		ret = access_dso_mem(ui, addr, valp);
+		if (ret) {
+			pr_debug("unwind: access_mem %p not inside range"
+				 " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+				 (void *) (uintptr_t) addr, start, end);
+			*valp = 0;
+			return ret;
+		}
+		return 0;
+	}
+
+	offset = addr - start;
+	*valp  = *(unw_word_t *)&stack->data[offset];
+	pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
+		 (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
+	return 0;
+}
+
+static int access_reg(unw_addr_space_t __maybe_unused as,
+		      unw_regnum_t regnum, unw_word_t *valp,
+		      int __write, void *arg)
+{
+	struct unwind_info *ui = arg;
+	int id, ret;
+	u64 val;
+
+	/* Don't support write, I suspect we don't need it. */
+	if (__write) {
+		pr_err("unwind: access_reg w %d\n", regnum);
+		return 0;
+	}
+
+	if (!ui->sample->user_regs.regs) {
+		*valp = 0;
+		return 0;
+	}
+
+	id = LIBUNWIND__ARCH_REG_ID(regnum);
+	if (id < 0)
+		return -EINVAL;
+
+	ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+	if (ret) {
+		pr_err("unwind: can't read reg %d\n", regnum);
+		return ret;
+	}
+
+	*valp = (unw_word_t) val;
+	pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
+	return 0;
+}
+
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+			    unw_proc_info_t *pi __maybe_unused,
+			    void *arg __maybe_unused)
+{
+	pr_debug("unwind: put_unwind_info called\n");
+}
+
+static int entry(u64 ip, struct thread *thread,
+		 unwind_entry_cb_t cb, void *arg)
+{
+	struct unwind_entry e;
+	struct addr_location al;
+
+	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
+				   MAP__FUNCTION, ip, &al);
+
+	e.ip = ip;
+	e.map = al.map;
+	e.sym = al.sym;
+
+	pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+		 al.sym ? al.sym->name : "''",
+		 ip,
+		 al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+
+	return cb(&e, arg);
+}
+
+static void display_error(int err)
+{
+	switch (err) {
+	case UNW_EINVAL:
+		pr_err("unwind: Only supports local.\n");
+		break;
+	case UNW_EUNSPEC:
+		pr_err("unwind: Unspecified error.\n");
+		break;
+	case UNW_EBADREG:
+		pr_err("unwind: Register unavailable.\n");
+		break;
+	default:
+		break;
+	}
+}
+
+static unw_accessors_t accessors = {
+	.find_proc_info		= find_proc_info,
+	.put_unwind_info	= put_unwind_info,
+	.get_dyn_info_list_addr	= get_dyn_info_list_addr,
+	.access_mem		= access_mem,
+	.access_reg		= access_reg,
+	.access_fpreg		= access_fpreg,
+	.resume			= resume,
+	.get_proc_name		= get_proc_name,
+};
+
+static int _unwind__prepare_access(struct thread *thread)
+{
+	if (callchain_param.record_mode != CALLCHAIN_DWARF)
+		return 0;
+
+	thread->addr_space = unw_create_addr_space(&accessors, 0);
+	if (!thread->addr_space) {
+		pr_err("unwind: Can't create unwind address space.\n");
+		return -ENOMEM;
+	}
+
+	unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
+	return 0;
+}
+
+static void _unwind__flush_access(struct thread *thread)
+{
+	if (callchain_param.record_mode != CALLCHAIN_DWARF)
+		return;
+
+	unw_flush_cache(thread->addr_space, 0, 0);
+}
+
+static void _unwind__finish_access(struct thread *thread)
+{
+	if (callchain_param.record_mode != CALLCHAIN_DWARF)
+		return;
+
+	unw_destroy_addr_space(thread->addr_space);
+}
+
+static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
+		       void *arg, int max_stack)
+{
+	u64 val;
+	unw_word_t ips[max_stack];
+	unw_addr_space_t addr_space;
+	unw_cursor_t c;
+	int ret, i = 0;
+
+	ret = perf_reg_value(&val, &ui->sample->user_regs,
+			     LIBUNWIND__ARCH_REG_IP);
+	if (ret)
+		return ret;
+
+	ips[i++] = (unw_word_t) val;
+
+	/*
+	 * If we need more than one entry, do the DWARF
+	 * unwind itself.
+	 */
+	if (max_stack - 1 > 0) {
+		WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+		addr_space = ui->thread->addr_space;
+
+		if (addr_space == NULL)
+			return -1;
+
+		ret = unw_init_remote(&c, addr_space, ui);
+		if (ret)
+			display_error(ret);
+
+		while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+			unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+			++i;
+		}
+
+		max_stack = i;
+	}
+
+	/*
+	 * Display what we got based on the order setup.
+	 */
+	for (i = 0; i < max_stack && !ret; i++) {
+		int j = i;
+
+		if (callchain_param.order == ORDER_CALLER)
+			j = max_stack - i - 1;
+		ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
+	}
+
+	return ret;
+}
+
+static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			struct thread *thread,
+			struct perf_sample *data, int max_stack)
+{
+	struct unwind_info ui = {
+		.sample       = data,
+		.thread       = thread,
+		.machine      = thread->mg->machine,
+	};
+
+	if (!data->user_regs.regs)
+		return -EINVAL;
+
+	if (max_stack <= 0)
+		return -EINVAL;
+
+	return get_entries(&ui, cb, arg, max_stack);
+}
+
+static struct unwind_libunwind_ops
+_unwind_libunwind_ops = {
+	.prepare_access = _unwind__prepare_access,
+	.flush_access   = _unwind__flush_access,
+	.finish_access  = _unwind__finish_access,
+	.get_entries    = _unwind__get_entries,
+};
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+struct unwind_libunwind_ops *
+local_unwind_libunwind_ops = &_unwind_libunwind_ops;
+#endif
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 63687d3a344e..6d542a4e0648 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -1,682 +1,83 @@
-/*
- * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
- *
- * Lots of this code have been borrowed or heavily inspired from parts of
- * the libunwind 0.99 code which are (amongst other contributors I may have
- * forgotten):
- *
- * Copyright (C) 2002-2007 Hewlett-Packard Co
- *	Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * And the bugs have been added by:
- *
- * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
- * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
- *
- */
-
-#include <elf.h>
-#include <gelf.h>
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <linux/list.h>
-#include <libunwind.h>
-#include <libunwind-ptrace.h>
-#include "callchain.h"
+#include "unwind.h"
 #include "thread.h"
 #include "session.h"
-#include "perf_regs.h"
-#include "unwind.h"
-#include "symbol.h"
-#include "util.h"
 #include "debug.h"
-#include "asm/bug.h"
-
-extern int
-UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
-				    unw_word_t ip,
-				    unw_dyn_info_t *di,
-				    unw_proc_info_t *pi,
-				    int need_unwind_info, void *arg);
-
-#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
-
-extern int
-UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
-				 unw_word_t ip,
-				 unw_word_t segbase,
-				 const char *obj_name, unw_word_t start,
-				 unw_word_t end);
-
-#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
-
-#define DW_EH_PE_FORMAT_MASK	0x0f	/* format of the encoded value */
-#define DW_EH_PE_APPL_MASK	0x70	/* how the value is to be applied */
-
-/* Pointer-encoding formats: */
-#define DW_EH_PE_omit		0xff
-#define DW_EH_PE_ptr		0x00	/* pointer-sized unsigned value */
-#define DW_EH_PE_udata4		0x03	/* unsigned 32-bit value */
-#define DW_EH_PE_udata8		0x04	/* unsigned 64-bit value */
-#define DW_EH_PE_sdata4		0x0b	/* signed 32-bit value */
-#define DW_EH_PE_sdata8		0x0c	/* signed 64-bit value */
-
-/* Pointer-encoding application: */
-#define DW_EH_PE_absptr		0x00	/* absolute value */
-#define DW_EH_PE_pcrel		0x10	/* rel. to addr. of encoded value */
-
-/*
- * The following are not documented by LSB v1.3, yet they are used by
- * GCC, presumably they aren't documented by LSB since they aren't
- * used on Linux:
- */
-#define DW_EH_PE_funcrel	0x40	/* start-of-procedure-relative */
-#define DW_EH_PE_aligned	0x50	/* aligned pointer */
+#include "arch/common.h"
 
-/* Flags intentionaly not handled, since they're not needed:
- * #define DW_EH_PE_indirect      0x80
- * #define DW_EH_PE_uleb128       0x01
- * #define DW_EH_PE_udata2        0x02
- * #define DW_EH_PE_sleb128       0x09
- * #define DW_EH_PE_sdata2        0x0a
- * #define DW_EH_PE_textrel       0x20
- * #define DW_EH_PE_datarel       0x30
- */
+struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
 
-struct unwind_info {
-	struct perf_sample	*sample;
-	struct machine		*machine;
-	struct thread		*thread;
-};
-
-#define dw_read(ptr, type, end) ({	\
-	type *__p = (type *) ptr;	\
-	type  __v;			\
-	if ((__p + 1) > (type *) end)	\
-		return -EINVAL;		\
-	__v = *__p++;			\
-	ptr = (typeof(ptr)) __p;	\
-	__v;				\
-	})
-
-static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
-				   u8 encoding)
+static void unwind__register_ops(struct thread *thread,
+			  struct unwind_libunwind_ops *ops)
 {
-	u8 *cur = *p;
-	*val = 0;
-
-	switch (encoding) {
-	case DW_EH_PE_omit:
-		*val = 0;
-		goto out;
-	case DW_EH_PE_ptr:
-		*val = dw_read(cur, unsigned long, end);
-		goto out;
-	default:
-		break;
-	}
-
-	switch (encoding & DW_EH_PE_APPL_MASK) {
-	case DW_EH_PE_absptr:
-		break;
-	case DW_EH_PE_pcrel:
-		*val = (unsigned long) cur;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if ((encoding & 0x07) == 0x00)
-		encoding |= DW_EH_PE_udata4;
-
-	switch (encoding & DW_EH_PE_FORMAT_MASK) {
-	case DW_EH_PE_sdata4:
-		*val += dw_read(cur, s32, end);
-		break;
-	case DW_EH_PE_udata4:
-		*val += dw_read(cur, u32, end);
-		break;
-	case DW_EH_PE_sdata8:
-		*val += dw_read(cur, s64, end);
-		break;
-	case DW_EH_PE_udata8:
-		*val += dw_read(cur, u64, end);
-		break;
-	default:
-		return -EINVAL;
-	}
-
- out:
-	*p = cur;
-	return 0;
-}
-
-#define dw_read_encoded_value(ptr, end, enc) ({			\
-	u64 __v;						\
-	if (__dw_read_encoded_value(&ptr, end, &__v, enc)) {	\
-		return -EINVAL;                                 \
-	}                                                       \
-	__v;                                                    \
-	})
-
-static u64 elf_section_offset(int fd, const char *name)
-{
-	Elf *elf;
-	GElf_Ehdr ehdr;
-	GElf_Shdr shdr;
-	u64 offset = 0;
-
-	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
-	if (elf == NULL)
-		return 0;
-
-	do {
-		if (gelf_getehdr(elf, &ehdr) == NULL)
-			break;
-
-		if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
-			break;
-
-		offset = shdr.sh_offset;
-	} while (0);
-
-	elf_end(elf);
-	return offset;
+	thread->unwind_libunwind_ops = ops;
 }
 
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-static int elf_is_exec(int fd, const char *name)
+int unwind__prepare_access(struct thread *thread, struct map *map,
+			   bool *initialized)
 {
-	Elf *elf;
-	GElf_Ehdr ehdr;
-	int retval = 0;
+	const char *arch;
+	enum dso_type dso_type;
+	struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
+	int err;
 
-	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
-	if (elf == NULL)
+	if (thread->addr_space) {
+		pr_debug("unwind: thread map already set, dso=%s\n",
+			 map->dso->name);
+		if (initialized)
+			*initialized = true;
 		return 0;
-	if (gelf_getehdr(elf, &ehdr) == NULL)
-		goto out;
-
-	retval = (ehdr.e_type == ET_EXEC);
-
-out:
-	elf_end(elf);
-	pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
-	return retval;
-}
-#endif
-
-struct table_entry {
-	u32 start_ip_offset;
-	u32 fde_offset;
-};
-
-struct eh_frame_hdr {
-	unsigned char version;
-	unsigned char eh_frame_ptr_enc;
-	unsigned char fde_count_enc;
-	unsigned char table_enc;
-
-	/*
-	 * The rest of the header is variable-length and consists of the
-	 * following members:
-	 *
-	 *	encoded_t eh_frame_ptr;
-	 *	encoded_t fde_count;
-	 */
-
-	/* A single encoded pointer should not be more than 8 bytes. */
-	u64 enc[2];
-
-	/*
-	 * struct {
-	 *    encoded_t start_ip;
-	 *    encoded_t fde_addr;
-	 * } binary_search_table[fde_count];
-	 */
-	char data[0];
-} __packed;
-
-static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
-			       u64 offset, u64 *table_data, u64 *segbase,
-			       u64 *fde_count)
-{
-	struct eh_frame_hdr hdr;
-	u8 *enc = (u8 *) &hdr.enc;
-	u8 *end = (u8 *) &hdr.data;
-	ssize_t r;
-
-	r = dso__data_read_offset(dso, machine, offset,
-				  (u8 *) &hdr, sizeof(hdr));
-	if (r != sizeof(hdr))
-		return -EINVAL;
-
-	/* We dont need eh_frame_ptr, just skip it. */
-	dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
-
-	*fde_count  = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
-	*segbase    = offset;
-	*table_data = (enc - (u8 *) &hdr) + offset;
-	return 0;
-}
-
-static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
-				     u64 *table_data, u64 *segbase,
-				     u64 *fde_count)
-{
-	int ret = -EINVAL, fd;
-	u64 offset = dso->data.eh_frame_hdr_offset;
-
-	if (offset == 0) {
-		fd = dso__data_get_fd(dso, machine);
-		if (fd < 0)
-			return -EINVAL;
-
-		/* Check the .eh_frame section for unwinding info */
-		offset = elf_section_offset(fd, ".eh_frame_hdr");
-		dso->data.eh_frame_hdr_offset = offset;
-		dso__data_put_fd(dso);
 	}
 
-	if (offset)
-		ret = unwind_spec_ehframe(dso, machine, offset,
-					  table_data, segbase,
-					  fde_count);
+	/* env->arch is NULL for live-mode (i.e. perf top) */
+	if (!thread->mg->machine->env || !thread->mg->machine->env->arch)
+		goto out_register;
 
-	return ret;
-}
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-static int read_unwind_spec_debug_frame(struct dso *dso,
-					struct machine *machine, u64 *offset)
-{
-	int fd;
-	u64 ofs = dso->data.debug_frame_offset;
-
-	if (ofs == 0) {
-		fd = dso__data_get_fd(dso, machine);
-		if (fd < 0)
-			return -EINVAL;
-
-		/* Check the .debug_frame section for unwinding info */
-		ofs = elf_section_offset(fd, ".debug_frame");
-		dso->data.debug_frame_offset = ofs;
-		dso__data_put_fd(dso);
-	}
-
-	*offset = ofs;
-	if (*offset)
+	dso_type = dso__type(map->dso, thread->mg->machine);
+	if (dso_type == DSO__TYPE_UNKNOWN)
 		return 0;
 
-	return -EINVAL;
-}
-#endif
-
-static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
-{
-	struct addr_location al;
-
-	thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, ip, &al);
-	if (!al.map) {
-		/*
-		 * We've seen cases (softice) where DWARF unwinder went
-		 * through non executable mmaps, which we need to lookup
-		 * in MAP__VARIABLE tree.
-		 */
-		thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-				      MAP__VARIABLE, ip, &al);
-	}
-	return al.map;
-}
-
-static int
-find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
-	       int need_unwind_info, void *arg)
-{
-	struct unwind_info *ui = arg;
-	struct map *map;
-	unw_dyn_info_t di;
-	u64 table_data, segbase, fde_count;
-	int ret = -EINVAL;
-
-	map = find_map(ip, ui);
-	if (!map || !map->dso)
-		return -EINVAL;
-
-	pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
-
-	/* Check the .eh_frame section for unwinding info */
-	if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
-				       &table_data, &segbase, &fde_count)) {
-		memset(&di, 0, sizeof(di));
-		di.format   = UNW_INFO_FORMAT_REMOTE_TABLE;
-		di.start_ip = map->start;
-		di.end_ip   = map->end;
-		di.u.rti.segbase    = map->start + segbase;
-		di.u.rti.table_data = map->start + table_data;
-		di.u.rti.table_len  = fde_count * sizeof(struct table_entry)
-				      / sizeof(unw_word_t);
-		ret = dwarf_search_unwind_table(as, ip, &di, pi,
-						need_unwind_info, arg);
-	}
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-	/* Check the .debug_frame section for unwinding info */
-	if (ret < 0 &&
-	    !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
-		int fd = dso__data_get_fd(map->dso, ui->machine);
-		int is_exec = elf_is_exec(fd, map->dso->name);
-		unw_word_t base = is_exec ? 0 : map->start;
-		const char *symfile;
-
-		if (fd >= 0)
-			dso__data_put_fd(map->dso);
-
-		symfile = map->dso->symsrc_filename ?: map->dso->name;
-
-		memset(&di, 0, sizeof(di));
-		if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
-					   map->start, map->end))
-			return dwarf_search_unwind_table(as, ip, &di, pi,
-							 need_unwind_info, arg);
-	}
-#endif
-
-	return ret;
-}
-
-static int access_fpreg(unw_addr_space_t __maybe_unused as,
-			unw_regnum_t __maybe_unused num,
-			unw_fpreg_t __maybe_unused *val,
-			int __maybe_unused __write,
-			void __maybe_unused *arg)
-{
-	pr_err("unwind: access_fpreg unsupported\n");
-	return -UNW_EINVAL;
-}
-
-static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
-				  unw_word_t __maybe_unused *dil_addr,
-				  void __maybe_unused *arg)
-{
-	return -UNW_ENOINFO;
-}
-
-static int resume(unw_addr_space_t __maybe_unused as,
-		  unw_cursor_t __maybe_unused *cu,
-		  void __maybe_unused *arg)
-{
-	pr_err("unwind: resume unsupported\n");
-	return -UNW_EINVAL;
-}
+	arch = normalize_arch(thread->mg->machine->env->arch);
 
-static int
-get_proc_name(unw_addr_space_t __maybe_unused as,
-	      unw_word_t __maybe_unused addr,
-		char __maybe_unused *bufp, size_t __maybe_unused buf_len,
-		unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
-{
-	pr_err("unwind: get_proc_name unsupported\n");
-	return -UNW_EINVAL;
-}
-
-static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
-			  unw_word_t *data)
-{
-	struct map *map;
-	ssize_t size;
-
-	map = find_map(addr, ui);
-	if (!map) {
-		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
-		return -1;
+	if (!strcmp(arch, "x86")) {
+		if (dso_type != DSO__TYPE_64BIT)
+			ops = x86_32_unwind_libunwind_ops;
+	} else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+		if (dso_type == DSO__TYPE_64BIT)
+			ops = arm64_unwind_libunwind_ops;
 	}
 
-	if (!map->dso)
+	if (!ops) {
+		pr_err("unwind: target platform=%s is not supported\n", arch);
 		return -1;
-
-	size = dso__data_read_addr(map->dso, map, ui->machine,
-				   addr, (u8 *) data, sizeof(*data));
-
-	return !(size == sizeof(*data));
-}
-
-static int access_mem(unw_addr_space_t __maybe_unused as,
-		      unw_word_t addr, unw_word_t *valp,
-		      int __write, void *arg)
-{
-	struct unwind_info *ui = arg;
-	struct stack_dump *stack = &ui->sample->user_stack;
-	u64 start, end;
-	int offset;
-	int ret;
-
-	/* Don't support write, probably not needed. */
-	if (__write || !stack || !ui->sample->user_regs.regs) {
-		*valp = 0;
-		return 0;
-	}
-
-	ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
-	if (ret)
-		return ret;
-
-	end = start + stack->size;
-
-	/* Check overflow. */
-	if (addr + sizeof(unw_word_t) < addr)
-		return -EINVAL;
-
-	if (addr < start || addr + sizeof(unw_word_t) >= end) {
-		ret = access_dso_mem(ui, addr, valp);
-		if (ret) {
-			pr_debug("unwind: access_mem %p not inside range"
-				 " 0x%" PRIx64 "-0x%" PRIx64 "\n",
-				 (void *) (uintptr_t) addr, start, end);
-			*valp = 0;
-			return ret;
-		}
-		return 0;
-	}
-
-	offset = addr - start;
-	*valp  = *(unw_word_t *)&stack->data[offset];
-	pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
-		 (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
-	return 0;
-}
-
-static int access_reg(unw_addr_space_t __maybe_unused as,
-		      unw_regnum_t regnum, unw_word_t *valp,
-		      int __write, void *arg)
-{
-	struct unwind_info *ui = arg;
-	int id, ret;
-	u64 val;
-
-	/* Don't support write, I suspect we don't need it. */
-	if (__write) {
-		pr_err("unwind: access_reg w %d\n", regnum);
-		return 0;
-	}
-
-	if (!ui->sample->user_regs.regs) {
-		*valp = 0;
-		return 0;
-	}
-
-	id = libunwind__arch_reg_id(regnum);
-	if (id < 0)
-		return -EINVAL;
-
-	ret = perf_reg_value(&val, &ui->sample->user_regs, id);
-	if (ret) {
-		pr_err("unwind: can't read reg %d\n", regnum);
-		return ret;
-	}
-
-	*valp = (unw_word_t) val;
-	pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
-	return 0;
-}
-
-static void put_unwind_info(unw_addr_space_t __maybe_unused as,
-			    unw_proc_info_t *pi __maybe_unused,
-			    void *arg __maybe_unused)
-{
-	pr_debug("unwind: put_unwind_info called\n");
-}
-
-static int entry(u64 ip, struct thread *thread,
-		 unwind_entry_cb_t cb, void *arg)
-{
-	struct unwind_entry e;
-	struct addr_location al;
-
-	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
-				   MAP__FUNCTION, ip, &al);
-
-	e.ip = ip;
-	e.map = al.map;
-	e.sym = al.sym;
-
-	pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
-		 al.sym ? al.sym->name : "''",
-		 ip,
-		 al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
-
-	return cb(&e, arg);
-}
-
-static void display_error(int err)
-{
-	switch (err) {
-	case UNW_EINVAL:
-		pr_err("unwind: Only supports local.\n");
-		break;
-	case UNW_EUNSPEC:
-		pr_err("unwind: Unspecified error.\n");
-		break;
-	case UNW_EBADREG:
-		pr_err("unwind: Register unavailable.\n");
-		break;
-	default:
-		break;
-	}
-}
-
-static unw_accessors_t accessors = {
-	.find_proc_info		= find_proc_info,
-	.put_unwind_info	= put_unwind_info,
-	.get_dyn_info_list_addr	= get_dyn_info_list_addr,
-	.access_mem		= access_mem,
-	.access_reg		= access_reg,
-	.access_fpreg		= access_fpreg,
-	.resume			= resume,
-	.get_proc_name		= get_proc_name,
-};
-
-int unwind__prepare_access(struct thread *thread)
-{
-	if (callchain_param.record_mode != CALLCHAIN_DWARF)
-		return 0;
-
-	thread->addr_space = unw_create_addr_space(&accessors, 0);
-	if (!thread->addr_space) {
-		pr_err("unwind: Can't create unwind address space.\n");
-		return -ENOMEM;
 	}
+out_register:
+	unwind__register_ops(thread, ops);
 
-	unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
-	return 0;
+	err = thread->unwind_libunwind_ops->prepare_access(thread);
+	if (initialized)
+		*initialized = err ? false : true;
+	return err;
 }
 
 void unwind__flush_access(struct thread *thread)
 {
-	if (callchain_param.record_mode != CALLCHAIN_DWARF)
-		return;
-
-	unw_flush_cache(thread->addr_space, 0, 0);
+	if (thread->unwind_libunwind_ops)
+		thread->unwind_libunwind_ops->flush_access(thread);
 }
 
 void unwind__finish_access(struct thread *thread)
 {
-	if (callchain_param.record_mode != CALLCHAIN_DWARF)
-		return;
-
-	unw_destroy_addr_space(thread->addr_space);
-}
-
-static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
-		       void *arg, int max_stack)
-{
-	u64 val;
-	unw_word_t ips[max_stack];
-	unw_addr_space_t addr_space;
-	unw_cursor_t c;
-	int ret, i = 0;
-
-	ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP);
-	if (ret)
-		return ret;
-
-	ips[i++] = (unw_word_t) val;
-
-	/*
-	 * If we need more than one entry, do the DWARF
-	 * unwind itself.
-	 */
-	if (max_stack - 1 > 0) {
-		WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
-		addr_space = ui->thread->addr_space;
-
-		if (addr_space == NULL)
-			return -1;
-
-		ret = unw_init_remote(&c, addr_space, ui);
-		if (ret)
-			display_error(ret);
-
-		while (!ret && (unw_step(&c) > 0) && i < max_stack) {
-			unw_get_reg(&c, UNW_REG_IP, &ips[i]);
-			++i;
-		}
-
-		max_stack = i;
-	}
-
-	/*
-	 * Display what we got based on the order setup.
-	 */
-	for (i = 0; i < max_stack && !ret; i++) {
-		int j = i;
-
-		if (callchain_param.order == ORDER_CALLER)
-			j = max_stack - i - 1;
-		ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
-	}
-
-	return ret;
+	if (thread->unwind_libunwind_ops)
+		thread->unwind_libunwind_ops->finish_access(thread);
 }
 
 int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
-			struct thread *thread,
-			struct perf_sample *data, int max_stack)
+			 struct thread *thread,
+			 struct perf_sample *data, int max_stack)
 {
-	struct unwind_info ui = {
-		.sample       = data,
-		.thread       = thread,
-		.machine      = thread->mg->machine,
-	};
-
-	if (!data->user_regs.regs)
-		return -EINVAL;
-
-	if (max_stack <= 0)
-		return -EINVAL;
-
-	return get_entries(&ui, cb, arg, max_stack);
+	if (thread->unwind_libunwind_ops)
+		return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+	return 0;
 }
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
index 12790cf94618..61fb1e90ff51 100644
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -14,18 +14,42 @@ struct unwind_entry {
 
 typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
 
+struct unwind_libunwind_ops {
+	int (*prepare_access)(struct thread *thread);
+	void (*flush_access)(struct thread *thread);
+	void (*finish_access)(struct thread *thread);
+	int (*get_entries)(unwind_entry_cb_t cb, void *arg,
+			   struct thread *thread,
+			   struct perf_sample *data, int max_stack);
+};
+
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 			struct thread *thread,
 			struct perf_sample *data, int max_stack);
 /* libunwind specific */
 #ifdef HAVE_LIBUNWIND_SUPPORT
-int libunwind__arch_reg_id(int regnum);
-int unwind__prepare_access(struct thread *thread);
+#ifndef LIBUNWIND__ARCH_REG_ID
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_SP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_SP
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_IP
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_IP
+#endif
+
+int LIBUNWIND__ARCH_REG_ID(int regnum);
+int unwind__prepare_access(struct thread *thread, struct map *map,
+			   bool *initialized);
 void unwind__flush_access(struct thread *thread);
 void unwind__finish_access(struct thread *thread);
 #else
-static inline int unwind__prepare_access(struct thread *thread __maybe_unused)
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+					 struct map *map __maybe_unused,
+					 bool *initialized __maybe_unused)
 {
 	return 0;
 }
@@ -44,7 +68,9 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
 	return 0;
 }
 
-static inline int unwind__prepare_access(struct thread *thread __maybe_unused)
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+					 struct map *map __maybe_unused,
+					 bool *initialized __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 23504ad5d6dd..cee559d8c9e8 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -19,12 +19,19 @@
 #include "callchain.h"
 #include "strlist.h"
 
-struct callchain_param	callchain_param = {
-	.mode	= CHAIN_GRAPH_ABS,
-	.min_percent = 0.5,
-	.order  = ORDER_CALLEE,
-	.key	= CCKEY_FUNCTION,
-	.value	= CCVAL_PERCENT,
+#define CALLCHAIN_PARAM_DEFAULT			\
+	.mode		= CHAIN_GRAPH_ABS,	\
+	.min_percent	= 0.5,			\
+	.order		= ORDER_CALLEE,		\
+	.key		= CCKEY_FUNCTION,	\
+	.value		= CCVAL_PERCENT,	\
+
+struct callchain_param callchain_param = {
+	CALLCHAIN_PARAM_DEFAULT
+};
+
+struct callchain_param callchain_param_default = {
+	CALLCHAIN_PARAM_DEFAULT
 };
 
 /*
@@ -97,20 +104,17 @@ int rm_rf(char *path)
 		scnprintf(namebuf, sizeof(namebuf), "%s/%s",
 			  path, d->d_name);
 
-		ret = stat(namebuf, &statbuf);
+		/* We have to check symbolic link itself */
+		ret = lstat(namebuf, &statbuf);
 		if (ret < 0) {
 			pr_debug("stat failed: %s\n", namebuf);
 			break;
 		}
 
-		if (S_ISREG(statbuf.st_mode))
-			ret = unlink(namebuf);
-		else if (S_ISDIR(statbuf.st_mode))
+		if (S_ISDIR(statbuf.st_mode))
 			ret = rm_rf(namebuf);
-		else {
-			pr_debug("unknown file: %s\n", namebuf);
-			ret = -1;
-		}
+		else
+			ret = unlink(namebuf);
 	}
 	closedir(dir);
 
@@ -742,3 +746,19 @@ void print_binary(unsigned char *data, size_t len,
 	}
 	printer(BINARY_PRINT_DATA_END, -1, extra);
 }
+
+int is_printable_array(char *p, unsigned int len)
+{
+	unsigned int i;
+
+	if (!p || !len || p[len - 1] != 0)
+		return 0;
+
+	len--;
+
+	for (i = 0; i < len; i++) {
+		if (!isprint(p[i]) && !isspace(p[i]))
+			return 0;
+	}
+	return 1;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 1e8c3167b9fb..e5f55477491d 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -72,7 +72,6 @@
 #include <sys/ioctl.h>
 #include <inttypes.h>
 #include <linux/kernel.h>
-#include <linux/magic.h>
 #include <linux/types.h>
 #include <sys/ttydefaults.h>
 #include <api/fs/tracing_path.h>
@@ -360,4 +359,10 @@ typedef void (*print_binary_t)(enum binary_printer_ops,
 void print_binary(unsigned char *data, size_t len,
 		  size_t bytes_per_line, print_binary_t printer,
 		  void *extra);
+
+#if !defined(__GLIBC__) && !defined(__ANDROID__)
+extern int sched_getcpu(void);
+#endif
+
+int is_printable_array(char *p, unsigned int len);
 #endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 44d440da15dc..7bdcad484225 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -134,8 +134,6 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s
 	return dso;
 }
 
-#if BITS_PER_LONG == 64
-
 static enum dso_type machine__thread_dso_type(struct machine *machine,
 					      struct thread *thread)
 {
@@ -156,6 +154,8 @@ static enum dso_type machine__thread_dso_type(struct machine *machine,
 	return dso_type;
 }
 
+#if BITS_PER_LONG == 64
+
 static int vdso__do_copy_compat(FILE *f, int fd)
 {
 	char buf[4096];
@@ -283,8 +283,38 @@ static int __machine__findnew_vdso_compat(struct machine *machine,
 
 #endif
 
+static struct dso *machine__find_vdso(struct machine *machine,
+				      struct thread *thread)
+{
+	struct dso *dso = NULL;
+	enum dso_type dso_type;
+
+	dso_type = machine__thread_dso_type(machine, thread);
+	switch (dso_type) {
+	case DSO__TYPE_32BIT:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true);
+		if (!dso) {
+			dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO,
+					   true);
+			if (dso && dso_type != dso__type(dso, machine))
+				dso = NULL;
+		}
+		break;
+	case DSO__TYPE_X32BIT:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true);
+		break;
+	case DSO__TYPE_64BIT:
+	case DSO__TYPE_UNKNOWN:
+	default:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+		break;
+	}
+
+	return dso;
+}
+
 struct dso *machine__findnew_vdso(struct machine *machine,
-				  struct thread *thread __maybe_unused)
+				  struct thread *thread)
 {
 	struct vdso_info *vdso_info;
 	struct dso *dso = NULL;
@@ -297,6 +327,10 @@ struct dso *machine__findnew_vdso(struct machine *machine,
 	if (!vdso_info)
 		goto out_unlock;
 
+	dso = machine__find_vdso(machine, thread);
+	if (dso)
+		goto out_unlock;
+
 #if BITS_PER_LONG == 64
 	if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
 		goto out_unlock;
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index 552af68d5414..a538ff44b108 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -54,9 +54,10 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
 # to something more interesting, like "arm-linux-".  If you want
 # to compile vs uClibc, that can be done here as well.
 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
-CC = $(CROSS)gcc
-LD = $(CROSS)gcc
-STRIP = $(CROSS)strip
+CROSS_COMPILE ?= $(CROSS)
+CC = $(CROSS_COMPILE)gcc
+LD = $(CROSS_COMPILE)gcc
+STRIP = $(CROSS_COMPILE)strip
 HOSTCC = gcc
 
 # check if compiler option is supported
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index e367b1a85d70..8561e7ddca59 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -1,7 +1,7 @@
 CC		= $(CROSS_COMPILE)gcc
 BUILD_OUTPUT	:= $(CURDIR)
-PREFIX		:= /usr
-DESTDIR		:=
+PREFIX		?= /usr
+DESTDIR		?=
 
 ifeq ("$(origin O)", "command line")
 	BUILD_OUTPUT := $(O)
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 89a55d5e32f3..492e84fbebfa 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -123,7 +123,7 @@ cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
 35 * 100 = 3500 MHz TSC frequency
 cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
 cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
-cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727
+cpu0: MSR_TURBO_RATIO_LIMIT: 0x25262727
 37 * 100 = 3700 MHz max turbo 4 active cores
 38 * 100 = 3800 MHz max turbo 3 active cores
 39 * 100 = 3900 MHz max turbo 2 active cores
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index acbf7ff2ee6e..3e199b508a96 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1480,7 +1480,7 @@ dump_knl_turbo_ratio_limits(void)
 	unsigned int cores[buckets_no];
 	unsigned int ratio[buckets_no];
 
-	get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
 
 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
 		base_cpu, msr);
diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch
index e11fbd6fae78..ad85b921a607 100644
--- a/tools/scripts/Makefile.arch
+++ b/tools/scripts/Makefile.arch
@@ -1,8 +1,4 @@
-ifndef ARCH
-ARCH := $(shell uname -m 2>/dev/null || echo not)
-endif
-
-ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
+HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
                                   -e s/sun4u/sparc/ -e s/sparc64/sparc/ \
                                   -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \
                                   -e s/s390x/s390/ -e s/parisc64/parisc/ \
@@ -10,6 +6,41 @@ ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
                                   -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
                                   -e s/tile.*/tile/ )
 
+ifndef ARCH
+ARCH := $(HOSTARCH)
+endif
+
+SRCARCH := $(ARCH)
+
+# Additional ARCH settings for x86
+ifeq ($(ARCH),i386)
+        SRCARCH := x86
+endif
+ifeq ($(ARCH),x86_64)
+        SRCARCH := x86
+endif
+
+# Additional ARCH settings for sparc
+ifeq ($(ARCH),sparc32)
+       SRCARCH := sparc
+endif
+ifeq ($(ARCH),sparc64)
+       SRCARCH := sparc
+endif
+
+# Additional ARCH settings for sh
+ifeq ($(ARCH),sh64)
+       SRCARCH := sh
+endif
+
+# Additional ARCH settings for tile
+ifeq ($(ARCH),tilepro)
+       SRCARCH := tile
+endif
+ifeq ($(ARCH),tilegx)
+       SRCARCH := tile
+endif
+
 LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
 ifeq ($(LP64), 1)
   IS_64_BIT := 1
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index b7447ceb75e9..b0ac05741750 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -122,7 +122,7 @@ enum {
 	NODE_TAGGED = 2,
 };
 
-#define THRASH_SIZE		1000 * 1000
+#define THRASH_SIZE		(1000 * 1000)
 #define N 127
 #define BATCH	33
 
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index c2b61c4fda11..0bf5085281f3 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 echo "Test histogram with execname modifier"
 
 echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index b2902d42a537..a00184cd9c95 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 echo "Test histogram basic tigger"
 
 echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
index 03c4a46561fc..3478b00ead57 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 reset_trigger
 
 echo "Test histogram multiple tiggers"
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index b325470c01b3..1426a9b97494 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -99,8 +99,9 @@ configfrag_hotplug_cpu () {
 # identify_boot_image qemu-cmd
 #
 # Returns the relative path to the kernel build image.  This will be
-# arch/<arch>/boot/bzImage unless overridden with the TORTURE_BOOT_IMAGE
-# environment variable.
+# arch/<arch>/boot/bzImage or vmlinux if bzImage is not a target for the
+# architecture, unless overridden with the TORTURE_BOOT_IMAGE environment
+# variable.
 identify_boot_image () {
 	if test -n "$TORTURE_BOOT_IMAGE"
 	then
@@ -110,11 +111,8 @@ identify_boot_image () {
 		qemu-system-x86_64|qemu-system-i386)
 			echo arch/x86/boot/bzImage
 			;;
-		qemu-system-ppc64)
-			echo arch/powerpc/boot/bzImage
-			;;
 		*)
-			echo ""
+			echo vmlinux
 			;;
 		esac
 	fi
@@ -175,7 +173,7 @@ identify_qemu_args () {
 	qemu-system-x86_64|qemu-system-i386)
 		;;
 	qemu-system-ppc64)
-		echo -enable-kvm -M pseries -cpu POWER7 -nodefaults
+		echo -enable-kvm -M pseries -nodefaults
 		echo -device spapr-vscsi
 		if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
 		then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 4109f306d855..ea6e373edc27 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -8,9 +8,9 @@
 #
 # Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
 #
-# qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with
-#			arguments specifying the number of CPUs and other
-#			options generated from the underlying CPU architecture.
+# qemu-args defaults to "-enable-kvm -nographic", along with arguments
+#			specifying the number of CPUs and other options
+#			generated from the underlying CPU architecture.
 # boot_args defaults to value returned by the per_version_boot_params
 #			shell function.
 #
@@ -96,7 +96,8 @@ if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdi
 then
 	# Rerunning previous test, so use that test's kernel.
 	QEMU="`identify_qemu $base_resdir/vmlinux`"
-	KERNEL=$base_resdir/bzImage
+	BOOT_IMAGE="`identify_boot_image $QEMU`"
+	KERNEL=$base_resdir/${BOOT_IMAGE##*/} # use the last component of ${BOOT_IMAGE}
 	ln -s $base_resdir/Make*.out $resdir  # for kvm-recheck.sh
 	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
 elif kvm-build.sh $config_template $builddir $T
@@ -110,7 +111,7 @@ then
 	if test -n "$BOOT_IMAGE"
 	then
 		cp $builddir/$BOOT_IMAGE $resdir
-		KERNEL=$resdir/bzImage
+		KERNEL=$resdir/${BOOT_IMAGE##*/}
 	else
 		echo No identifiable boot image, not running KVM, see $resdir.
 		echo Do the torture scripts know about your architecture?
@@ -147,7 +148,7 @@ then
 fi
 
 # Generate -smp qemu argument.
-qemu_args="-enable-kvm -soundhw pcspk -nographic $qemu_args"
+qemu_args="-enable-kvm -nographic $qemu_args"
 cpu_count=`configNR_CPUS.sh $config_template`
 cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
 vcpus=`identify_qemu_vcpus`
@@ -229,6 +230,7 @@ fi
 if test $commandcompleted -eq 0 -a -n "$qemu_pid"
 then
 	echo Grace period for qemu job at pid $qemu_pid
+	oldline="`tail $resdir/console.log`"
 	while :
 	do
 		kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
@@ -238,13 +240,29 @@ then
 		else
 			break
 		fi
-		if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+		must_continue=no
+		newline="`tail $resdir/console.log`"
+		if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
+		then
+			must_continue=yes
+		fi
+		last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
+		if test -z "last_ts"
+		then
+			last_ts=0
+		fi
+		if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+		then
+			must_continue=yes
+		fi
+		if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
 		then
 			echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
 			kill -KILL $qemu_pid
 			break
 		fi
-		sleep 1
+		oldline=$newline
+		sleep 10
 	done
 elif test -z "$qemu_pid"
 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 0d598145873e..0aed965f0062 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -48,7 +48,7 @@ resdir=""
 configs=""
 cpus=0
 ds=`date +%Y.%m.%d-%H:%M:%S`
-jitter=0
+jitter="-1"
 
 . functions.sh
 
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 5eb49b7f864c..08aa7d50ae0e 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -33,7 +33,7 @@ if grep -Pq '\x00' < $file
 then
 	print_warning Console output contains nul bytes, old qemu still running?
 fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
 if test -s $1.diags
 then
 	print_warning Assertion failure in $file $title
@@ -69,6 +69,11 @@ then
 	then
 		summary="$summary  Stalls: $n_stalls"
 	fi
+	n_starves=`grep -c 'rcu_.*kthread starved for' $1`
+	if test "$n_starves" -ne 0
+	then
+		summary="$summary  Starves: $n_starves"
+	fi
 	print_warning Summary: $summary
 else
 	rm $1.diags
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
index 4170e714f044..833f826d6ec2 100644
--- a/tools/testing/selftests/rcutorture/doc/initrd.txt
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -13,6 +13,22 @@ cd initrd
 cpio -id < /tmp/initrd.img.zcat
 ------------------------------------------------------------------------
 
+Another way to create an initramfs image is using "dracut"[1], which is
+available on many distros, however the initramfs dracut generates is a cpio
+archive with another cpio archive in it, so an extra step is needed to create
+the initrd directory hierarchy.
+
+Here are the commands to create a initrd directory for rcutorture using
+dracut:
+
+------------------------------------------------------------------------
+dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img
+cd tools/testing/selftests/rcutorture
+mkdir initrd
+cd initrd
+/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img
+------------------------------------------------------------------------
+
 Interestingly enough, if you are running rcutorture, you don't really
 need userspace in many cases.  Running without userspace has the
 advantage of allowing you to test your kernel independently of the
@@ -89,3 +105,9 @@ while :
 do
 	sleep 10
 done
+------------------------------------------------------------------------
+
+References:
+[1]: https://dracut.wiki.kernel.org/index.php/Main_Page
+[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html
+[3]: https://www.centos.org/forums/viewtopic.php?t=51621
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index 932ff577ffc0..00c4f65d12da 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -136,7 +136,7 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
 	printf("No of huge pages allocated = %d\n",
 	       (atoi(nr_hugepages)));
 
-	if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages))
+	if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
 	    != strlen(initial_nr_hugepages)) {
 		perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
 		goto close_fd;
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index c73425de3cfe..4f747ee07f10 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,8 +4,8 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
-			check_initial_reg_state sigreturn ldt_gdt iopl
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
+			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
new file mode 100644
index 000000000000..9230981f2e12
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-debug.h
@@ -0,0 +1,14 @@
+#ifndef _MPX_DEBUG_H
+#define _MPX_DEBUG_H
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+#define dprintf5(args...) dprintf_level(5, args)
+
+#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
new file mode 100644
index 000000000000..ce85356d7e2e
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-dig.c
@@ -0,0 +1,498 @@
+/*
+ * Written by Dave Hansen <dave.hansen@intel.com>
+ */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+#include "mpx-hw.h"
+
+unsigned long bounds_dir_global;
+
+#define mpx_dig_abort()	__mpx_dig_abort(__FILE__, __func__, __LINE__)
+static void inline __mpx_dig_abort(const char *file, const char *func, int line)
+{
+	fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
+	printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
+	abort();
+}
+
+/*
+ * run like this (BDIR finds the probably bounds directory):
+ *
+ *	BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
+ *		| head -1 | awk -F- '{print $1}')";
+ *	./mpx-dig $pid 0x$BDIR
+ *
+ * NOTE:
+ *	assumes that the only 2097152-kb VMA is the bounds dir
+ */
+
+long nr_incore(void *ptr, unsigned long size_bytes)
+{
+	int i;
+	long ret = 0;
+	long vec_len = size_bytes / PAGE_SIZE;
+	unsigned char *vec = malloc(vec_len);
+	int incore_ret;
+
+	if (!vec)
+		mpx_dig_abort();
+
+	incore_ret = mincore(ptr, size_bytes, vec);
+	if (incore_ret) {
+		printf("mincore ret: %d\n", incore_ret);
+		perror("mincore");
+		mpx_dig_abort();
+	}
+	for (i = 0; i < vec_len; i++)
+		ret += vec[i];
+	free(vec);
+	return ret;
+}
+
+int open_proc(int pid, char *file)
+{
+	static char buf[100];
+	int fd;
+
+	snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
+	fd = open(&buf[0], O_RDONLY);
+	if (fd < 0)
+		perror(buf);
+
+	return fd;
+}
+
+struct vaddr_range {
+	unsigned long start;
+	unsigned long end;
+};
+struct vaddr_range *ranges;
+int nr_ranges_allocated;
+int nr_ranges_populated;
+int last_range = -1;
+
+int __pid_load_vaddrs(int pid)
+{
+	int ret = 0;
+	int proc_maps_fd = open_proc(pid, "maps");
+	char linebuf[10000];
+	unsigned long start;
+	unsigned long end;
+	char rest[1000];
+	FILE *f = fdopen(proc_maps_fd, "r");
+
+	if (!f)
+		mpx_dig_abort();
+	nr_ranges_populated = 0;
+	while (!feof(f)) {
+		char *readret = fgets(linebuf, sizeof(linebuf), f);
+		int parsed;
+
+		if (readret == NULL) {
+			if (feof(f))
+				break;
+			mpx_dig_abort();
+		}
+
+		parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
+		if (parsed != 3)
+			mpx_dig_abort();
+
+		dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
+		if (nr_ranges_populated >= nr_ranges_allocated) {
+			ret = -E2BIG;
+			break;
+		}
+		ranges[nr_ranges_populated].start = start;
+		ranges[nr_ranges_populated].end = end;
+		nr_ranges_populated++;
+	}
+	last_range = -1;
+	fclose(f);
+	close(proc_maps_fd);
+	return ret;
+}
+
+int pid_load_vaddrs(int pid)
+{
+	int ret;
+
+	dprintf2("%s(%d)\n", __func__, pid);
+	if (!ranges) {
+		nr_ranges_allocated = 4;
+		ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
+		dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
+			 nr_ranges_allocated, ranges);
+		assert(ranges != NULL);
+	}
+	do {
+		ret = __pid_load_vaddrs(pid);
+		if (!ret)
+			break;
+		if (ret == -E2BIG) {
+			dprintf2("%s(%d) need to realloc\n", __func__, pid);
+			nr_ranges_allocated *= 2;
+			ranges = realloc(ranges,
+					nr_ranges_allocated * sizeof(ranges[0]));
+			dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
+					pid, nr_ranges_allocated, ranges);
+			assert(ranges != NULL);
+			dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
+		}
+	} while (1);
+
+	dprintf2("%s(%d) done\n", __func__, pid);
+
+	return ret;
+}
+
+static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
+{
+	if (vaddr < r->start)
+		return 0;
+	if (vaddr >= r->end)
+		return 0;
+	return 1;
+}
+
+static inline int vaddr_mapped_by_range(unsigned long vaddr)
+{
+	int i;
+
+	if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
+		return 1;
+
+	for (i = 0; i < nr_ranges_populated; i++) {
+		struct vaddr_range *r = &ranges[i];
+
+		if (vaddr_in_range(vaddr, r))
+			continue;
+		last_range = i;
+		return 1;
+	}
+	return 0;
+}
+
+const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
+
+void *read_bounds_table_into_buf(unsigned long table_vaddr)
+{
+#ifdef MPX_DIG_STANDALONE
+	static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
+	off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
+	if (seek_ret != table_vaddr)
+		mpx_dig_abort();
+
+	int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
+	if (read_ret != sizeof(bt_buf))
+		mpx_dig_abort();
+	return &bt_buf;
+#else
+	return (void *)table_vaddr;
+#endif
+}
+
+int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
+		unsigned long bde_vaddr)
+{
+	unsigned long offset_inside_bt;
+	int nr_entries = 0;
+	int do_abort = 0;
+	char *bt_buf;
+
+	dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
+			__func__, base_controlled_vaddr, bde_vaddr);
+
+	bt_buf = read_bounds_table_into_buf(table_vaddr);
+
+	dprintf4("%s() read done\n", __func__);
+
+	for (offset_inside_bt = 0;
+	     offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
+	     offset_inside_bt += bt_entry_size_bytes) {
+		unsigned long bt_entry_index;
+		unsigned long bt_entry_controls;
+		unsigned long this_bt_entry_for_vaddr;
+		unsigned long *bt_entry_buf;
+		int i;
+
+		dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
+			offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
+		bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
+		if (!bt_buf) {
+			printf("null bt_buf\n");
+			mpx_dig_abort();
+		}
+		if (!bt_entry_buf) {
+			printf("null bt_entry_buf\n");
+			mpx_dig_abort();
+		}
+		dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
+				bt_entry_buf);
+		if (!bt_entry_buf[0] &&
+		    !bt_entry_buf[1] &&
+		    !bt_entry_buf[2] &&
+		    !bt_entry_buf[3])
+			continue;
+
+		nr_entries++;
+
+		bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
+		bt_entry_controls = sizeof(void *);
+		this_bt_entry_for_vaddr =
+			base_controlled_vaddr + bt_entry_index*bt_entry_controls;
+		/*
+		 * We sign extend vaddr bits 48->63 which effectively
+		 * creates a hole in the virtual address space.
+		 * This calculation corrects for the hole.
+		 */
+		if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
+			this_bt_entry_for_vaddr |= 0xffff800000000000;
+
+		if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
+			printf("bt_entry_buf: %p\n", bt_entry_buf);
+			printf("there is a bte for %lx but no mapping\n",
+					this_bt_entry_for_vaddr);
+			printf("	  bde   vaddr: %016lx\n", bde_vaddr);
+			printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
+			printf("	  table_vaddr: %016lx\n", table_vaddr);
+			printf("	  entry vaddr: %016lx @ offset %lx\n",
+				table_vaddr + offset_inside_bt, offset_inside_bt);
+			do_abort = 1;
+			mpx_dig_abort();
+		}
+		if (DEBUG_LEVEL < 4)
+			continue;
+
+		printf("table entry[%lx]: ", offset_inside_bt);
+		for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
+			printf("0x%016lx ", bt_entry_buf[i]);
+		printf("\n");
+	}
+	if (do_abort)
+		mpx_dig_abort();
+	dprintf4("%s() done\n",  __func__);
+	return nr_entries;
+}
+
+int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
+		int *nr_populated_bdes)
+{
+	unsigned long i;
+	int total_entries = 0;
+
+	dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
+			len_bytes, bd_offset_bytes, buf + len_bytes);
+
+	for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
+		unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
+		unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
+		unsigned long bounds_dir_entry;
+		unsigned long bd_for_vaddr;
+		unsigned long bt_start;
+		unsigned long bt_tail;
+		int nr_entries;
+
+		dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
+				bounds_dir_entry_ptr);
+
+		bounds_dir_entry = *bounds_dir_entry_ptr;
+		if (!bounds_dir_entry) {
+			dprintf4("no bounds dir at index 0x%lx / 0x%lx "
+				 "start at offset:%lx %lx\n", bd_index, bd_index,
+					bd_offset_bytes, i);
+			continue;
+		}
+		dprintf3("found bounds_dir_entry: 0x%lx @ "
+			 "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
+					&buf[i]);
+		/* mask off the enable bit: */
+		bounds_dir_entry &= ~0x1;
+		(*nr_populated_bdes)++;
+		dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
+		dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
+
+		bt_start = bounds_dir_entry;
+		bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
+		if (!vaddr_mapped_by_range(bt_start)) {
+			printf("bounds directory 0x%lx points to nowhere\n",
+					bounds_dir_entry);
+			mpx_dig_abort();
+		}
+		if (!vaddr_mapped_by_range(bt_tail)) {
+			printf("bounds directory end 0x%lx points to nowhere\n",
+					bt_tail);
+			mpx_dig_abort();
+		}
+		/*
+		 * Each bounds directory entry controls 1MB of virtual address
+		 * space.  This variable is the virtual address in the process
+		 * of the beginning of the area controlled by this bounds_dir.
+		 */
+		bd_for_vaddr = bd_index * (1UL<<20);
+
+		nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
+				bounds_dir_global+bd_offset_bytes+i);
+		total_entries += nr_entries;
+		dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
+			 "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
+				bd_index, buf+i,
+				bounds_dir_entry, nr_entries, total_entries,
+				bd_for_vaddr, bd_for_vaddr + (1UL<<20));
+	}
+	dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
+			bd_offset_bytes);
+	return total_entries;
+}
+
+int proc_pid_mem_fd = -1;
+
+void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
+			   long buffer_size_bytes, void *buffer)
+{
+	unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
+	int read_ret;
+	off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
+
+	if (seek_ret != seekto)
+		mpx_dig_abort();
+
+	read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
+	/* there shouldn't practically be short reads of /proc/$pid/mem */
+	if (read_ret != buffer_size_bytes)
+		mpx_dig_abort();
+
+	return buffer;
+}
+void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
+			   long buffer_size_bytes, void *buffer)
+
+{
+	unsigned char vec[buffer_size_bytes / PAGE_SIZE];
+	char *dig_bounds_dir_ptr =
+		(void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
+	/*
+	 * use mincore() to quickly find the areas of the bounds directory
+	 * that have memory and thus will be worth scanning.
+	 */
+	int incore_ret;
+
+	int incore = 0;
+	int i;
+
+	dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
+
+	incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
+	if (incore_ret) {
+		printf("mincore ret: %d\n", incore_ret);
+		perror("mincore");
+		mpx_dig_abort();
+	}
+	for (i = 0; i < sizeof(vec); i++)
+		incore += vec[i];
+	dprintf4("%s() total incore: %d\n", __func__, incore);
+	if (!incore)
+		return NULL;
+	dprintf3("%s() total incore: %d\n", __func__, incore);
+	return dig_bounds_dir_ptr;
+}
+
+int inspect_pid(int pid)
+{
+	static int dig_nr;
+	long offset_inside_bounds_dir;
+	char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
+	char *dig_bounds_dir_ptr;
+	int total_entries = 0;
+	int nr_populated_bdes = 0;
+	int inspect_self;
+
+	if (getpid() == pid) {
+		dprintf4("inspecting self\n");
+		inspect_self = 1;
+	} else {
+		dprintf4("inspecting pid %d\n", pid);
+		mpx_dig_abort();
+	}
+
+	for (offset_inside_bounds_dir = 0;
+	     offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
+	     offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
+		static int bufs_skipped;
+		int this_entries;
+
+		if (inspect_self) {
+			dig_bounds_dir_ptr =
+				fill_bounds_dir_buf_self(offset_inside_bounds_dir,
+							 sizeof(bounds_dir_buf),
+							 &bounds_dir_buf[0]);
+		} else {
+			dig_bounds_dir_ptr =
+				fill_bounds_dir_buf_other(offset_inside_bounds_dir,
+							  sizeof(bounds_dir_buf),
+							  &bounds_dir_buf[0]);
+		}
+		if (!dig_bounds_dir_ptr) {
+			bufs_skipped++;
+			continue;
+		}
+		this_entries = search_bd_buf(dig_bounds_dir_ptr,
+					sizeof(bounds_dir_buf),
+					offset_inside_bounds_dir,
+					&nr_populated_bdes);
+		total_entries += this_entries;
+	}
+	printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
+			total_entries, nr_populated_bdes);
+	return total_entries + nr_populated_bdes;
+}
+
+#ifdef MPX_DIG_REMOTE
+int main(int argc, char **argv)
+{
+	int err;
+	char *c;
+	unsigned long bounds_dir_entry;
+	int pid;
+
+	printf("mpx-dig starting...\n");
+	err = sscanf(argv[1], "%d", &pid);
+	printf("parsing: '%s', err: %d\n", argv[1], err);
+	if (err != 1)
+		mpx_dig_abort();
+
+	err = sscanf(argv[2], "%lx", &bounds_dir_global);
+	printf("parsing: '%s': %d\n", argv[2], err);
+	if (err != 1)
+		mpx_dig_abort();
+
+	proc_pid_mem_fd = open_proc(pid, "mem");
+	if (proc_pid_mem_fd < 0)
+		mpx_dig_abort();
+
+	inspect_pid(pid);
+	return 0;
+}
+#endif
+
+long inspect_me(struct mpx_bounds_dir *bounds_dir)
+{
+	int pid = getpid();
+
+	pid_load_vaddrs(pid);
+	bounds_dir_global = (unsigned long)bounds_dir;
+	dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
+	return inspect_pid(pid);
+}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
new file mode 100644
index 000000000000..093c190178a9
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -0,0 +1,123 @@
+#ifndef _MPX_HW_H
+#define _MPX_HW_H
+
+#include <assert.h>
+
+/* Describe the MPX Hardware Layout in here */
+
+#define NR_MPX_BOUNDS_REGISTERS 4
+
+#ifdef __i386__
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES	16 /* 4 * 32-bits */
+#define MPX_BOUNDS_TABLE_SIZE_BYTES		(1ULL << 14) /* 16k */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES		4
+#define MPX_BOUNDS_DIR_SIZE_BYTES		(1ULL << 22) /* 4MB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT		2
+#define MPX_BOUNDS_TABLE_TOP_BIT		11
+#define MPX_BOUNDS_DIR_BOTTOM_BIT		12
+#define MPX_BOUNDS_DIR_TOP_BIT			31
+
+#else
+
+/*
+ * Linear Address of "pointer" (LAp)
+ *   0 ->  2: ignored
+ *   3 -> 19: index in to bounds table
+ *  20 -> 47: index in to bounds directory
+ *  48 -> 63: ignored
+ */
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES	32
+#define MPX_BOUNDS_TABLE_SIZE_BYTES		(1ULL << 22) /* 4MB */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES		8
+#define MPX_BOUNDS_DIR_SIZE_BYTES		(1ULL << 31) /* 2GB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT		3
+#define MPX_BOUNDS_TABLE_TOP_BIT		19
+#define MPX_BOUNDS_DIR_BOTTOM_BIT		20
+#define MPX_BOUNDS_DIR_TOP_BIT			47
+
+#endif
+
+#define MPX_BOUNDS_DIR_NR_ENTRIES	\
+	(MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
+#define MPX_BOUNDS_TABLE_NR_ENTRIES	\
+	(MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
+
+#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT	0x1
+
+struct mpx_bd_entry {
+	union {
+		char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
+		void *contents[1];
+	};
+} __attribute__((packed));
+
+struct mpx_bt_entry {
+	union {
+		char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
+		unsigned long contents[1];
+	};
+} __attribute__((packed));
+
+struct mpx_bounds_dir {
+	struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
+} __attribute__((packed));
+
+struct mpx_bounds_table {
+	struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
+} __attribute__((packed));
+
+static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
+{
+	int total_nr_bits = topbit - bottombit;
+	unsigned long mask = (1UL << total_nr_bits)-1;
+	return (val >> bottombit) & mask;
+}
+
+static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
+{
+	return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
+					      MPX_BOUNDS_TABLE_TOP_BIT);
+}
+
+static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
+{
+	return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
+					      MPX_BOUNDS_DIR_TOP_BIT);
+}
+
+static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
+		struct mpx_bounds_dir *bounds_dir)
+{
+	unsigned long index = __vaddr_bounds_directory_index(vaddr);
+	return &bounds_dir->entries[index];
+}
+
+static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
+{
+	unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+	return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+}
+
+static inline struct mpx_bounds_table *
+__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
+{
+	unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+	assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+	__bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
+	return (struct mpx_bounds_table *)__bd_entry;
+}
+
+static inline struct mpx_bt_entry *
+mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
+{
+	struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
+	struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
+	unsigned long index = __vaddr_bounds_table_index(vaddr);
+	return &bt->entries[index];
+}
+
+#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
new file mode 100644
index 000000000000..616ee9673339
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -0,0 +1,1585 @@
+/*
+ * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
+ *
+ * Written by:
+ * "Ren, Qiaowei" <qiaowei.ren@intel.com>
+ * "Wei, Gang" <gang.wei@intel.com>
+ * "Hansen, Dave" <dave.hansen@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2.
+ */
+
+/*
+ * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
+ *	       it works on 32-bit.
+ */
+
+int inspect_every_this_many_mallocs = 100;
+int zap_all_every_this_many_mallocs = 1000;
+
+#define _GNU_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "mpx-hw.h"
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline)
+#endif
+
+#ifndef TEST_DURATION_SECS
+#define TEST_DURATION_SECS 3
+#endif
+
+void write_int_to(char *prefix, char *file, int int_to_write)
+{
+	char buf[100];
+	int fd = open(file, O_RDWR);
+	int len;
+	int ret;
+
+	assert(fd >= 0);
+	len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
+	assert(len >= 0);
+	assert(len < sizeof(buf));
+	ret = write(fd, buf, len);
+	assert(ret == len);
+	ret = close(fd);
+	assert(!ret);
+}
+
+void write_pid_to(char *prefix, char *file)
+{
+	write_int_to(prefix, file, getpid());
+}
+
+void trace_me(void)
+{
+/* tracing events dir */
+#define TED "/sys/kernel/debug/tracing/events/"
+/*
+	write_pid_to("common_pid=", TED "signal/filter");
+	write_pid_to("common_pid=", TED "exceptions/filter");
+	write_int_to("", TED "signal/enable", 1);
+	write_int_to("", TED "exceptions/enable", 1);
+*/
+	write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
+	write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
+}
+
+#define test_failed() __test_failed(__FILE__, __LINE__)
+static void __test_failed(char *f, int l)
+{
+	fprintf(stderr, "abort @ %s::%d\n", f, l);
+	abort();
+}
+
+/* Error Printf */
+#define eprintf(args...)	fprintf(stderr, args)
+
+#ifdef __i386__
+
+/* i386 directory size is 4MB */
+#define REG_IP_IDX	REG_EIP
+#define REX_PREFIX
+
+#define XSAVE_OFFSET_IN_FPMEM	sizeof(struct _libc_fpstate)
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+		unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile(
+		"push %%ebx;"
+		"cpuid;"
+		"mov %%ebx, %1;"
+		"pop %%ebx"
+		: "=a" (*eax),
+		  "=g" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+#else /* __i386__ */
+
+#define REG_IP_IDX	REG_RIP
+#define REX_PREFIX "0x48, "
+
+#define XSAVE_OFFSET_IN_FPMEM	0
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+		unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile(
+		"cpuid;"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+#endif /* !__i386__ */
+
+struct xsave_hdr_struct {
+	uint64_t xstate_bv;
+	uint64_t reserved1[2];
+	uint64_t reserved2[5];
+} __attribute__((packed));
+
+struct bndregs_struct {
+	uint64_t bndregs[8];
+} __attribute__((packed));
+
+struct bndcsr_struct {
+	uint64_t cfg_reg_u;
+	uint64_t status_reg;
+} __attribute__((packed));
+
+struct xsave_struct {
+	uint8_t fpu_sse[512];
+	struct xsave_hdr_struct xsave_hdr;
+	uint8_t ymm[256];
+	uint8_t lwp[128];
+	struct bndregs_struct bndregs;
+	struct bndcsr_struct bndcsr;
+} __attribute__((packed));
+
+uint8_t __attribute__((__aligned__(64))) buffer[4096];
+struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
+
+uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
+struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
+
+uint64_t num_bnd_chk;
+
+static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
+{
+	uint32_t lmask = mask;
+	uint32_t hmask = mask >> 32;
+
+	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+		     :   "memory");
+}
+
+static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
+{
+	uint32_t lmask = mask;
+	uint32_t hmask = mask >> 32;
+	unsigned char *fx = _fx;
+
+	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+		     :   "memory");
+}
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+	uint32_t eax, edx;
+
+	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+		     : "=a" (eax), "=d" (edx)
+		     : "c" (index));
+	return eax + ((uint64_t)edx << 32);
+}
+
+static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
+{
+	memset(buffer, 0, sizeof(buffer));
+	memcpy(buffer,
+		(uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
+		sizeof(struct xsave_struct));
+
+	return xsave_buf->bndcsr.status_reg;
+}
+
+#include <pthread.h>
+
+static uint8_t *get_next_inst_ip(uint8_t *addr)
+{
+	uint8_t *ip = addr;
+	uint8_t sib;
+	uint8_t rm;
+	uint8_t mod;
+	uint8_t base;
+	uint8_t modrm;
+
+	/* determine the prefix. */
+	switch(*ip) {
+	case 0xf2:
+	case 0xf3:
+	case 0x66:
+		ip++;
+		break;
+	}
+
+	/* look for rex prefix */
+	if ((*ip & 0x40) == 0x40)
+		ip++;
+
+	/* Make sure we have a MPX instruction. */
+	if (*ip++ != 0x0f)
+		return addr;
+
+	/* Skip the op code byte. */
+	ip++;
+
+	/* Get the modrm byte. */
+	modrm = *ip++;
+
+	/* Break it down into parts. */
+	rm = modrm & 7;
+	mod = (modrm >> 6);
+
+	/* Init the parts of the address mode. */
+	base = 8;
+
+	/* Is it a mem mode? */
+	if (mod != 3) {
+		/* look for scaled indexed addressing */
+		if (rm == 4) {
+			/* SIB addressing */
+			sib = *ip++;
+			base = sib & 7;
+			switch (mod) {
+			case 0:
+				if (base == 5)
+					ip += 4;
+				break;
+
+			case 1:
+				ip++;
+				break;
+
+			case 2:
+				ip += 4;
+				break;
+			}
+
+		} else {
+			/* MODRM addressing */
+			switch (mod) {
+			case 0:
+				/* DISP32 addressing, no base */
+				if (rm == 5)
+					ip += 4;
+				break;
+
+			case 1:
+				ip++;
+				break;
+
+			case 2:
+				ip += 4;
+				break;
+			}
+		}
+	}
+	return ip;
+}
+
+#ifdef si_lower
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+	return si->si_lower;
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+	return si->si_upper;
+}
+#else
+static inline void **__si_bounds_hack(siginfo_t *si)
+{
+	void *sigfault = &si->_sifields._sigfault;
+	void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
+	void **__si_lower = end_sigfault;
+
+	return __si_lower;
+}
+
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+	return *__si_bounds_hack(si);
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+	return (*__si_bounds_hack(si)) + sizeof(void *);
+}
+#endif
+
+static int br_count;
+static int expected_bnd_index = -1;
+uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
+unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
+
+/*
+ * The kernel is supposed to provide some information about the bounds
+ * exception in the siginfo.  It should match what we have in the bounds
+ * registers that we are checking against.  Just check against the shadow copy
+ * since it is easily available, and we also check that *it* matches the real
+ * registers.
+ */
+void check_siginfo_vs_shadow(siginfo_t* si)
+{
+	int siginfo_ok = 1;
+	void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
+	void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
+
+	if ((expected_bnd_index < 0) ||
+	    (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
+		fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
+			expected_bnd_index);
+		exit(6);
+	}
+	if (__si_bounds_lower(si) != shadow_lower)
+		siginfo_ok = 0;
+	if (__si_bounds_upper(si) != shadow_upper)
+		siginfo_ok = 0;
+
+	if (!siginfo_ok) {
+		fprintf(stderr, "ERROR: siginfo bounds do not match "
+			"shadow bounds for register %d\n", expected_bnd_index);
+		exit(7);
+	}
+}
+
+void handler(int signum, siginfo_t *si, void *vucontext)
+{
+	int i;
+	ucontext_t *uctxt = vucontext;
+	int trapno;
+	unsigned long ip;
+
+	dprintf1("entered signal handler\n");
+
+	trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+	ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+
+	if (trapno == 5) {
+		typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
+		uint64_t status = read_mpx_status_sig(uctxt);
+		uint64_t br_reason =  status & 0x3;
+
+		br_count++;
+		dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
+
+#define __SI_FAULT      (3 << 16)
+#define SEGV_BNDERR     (__SI_FAULT|3)  /* failed address bound checks */
+
+		dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
+				status, ip, br_reason);
+		dprintf2("si_signo: %d\n", si->si_signo);
+		dprintf2("  signum: %d\n", signum);
+		dprintf2("info->si_code == SEGV_BNDERR: %d\n",
+				(si->si_code == SEGV_BNDERR));
+		dprintf2("info->si_code: %d\n", si->si_code);
+		dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
+		dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
+
+		check_siginfo_vs_shadow(si);
+
+		for (i = 0; i < 8; i++)
+			dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
+		switch (br_reason) {
+		case 0: /* traditional BR */
+			fprintf(stderr,
+				"Undefined status with bound exception:%jx\n",
+				 status);
+			exit(5);
+		case 1: /* #BR MPX bounds exception */
+			/* these are normal and we expect to see them */
+			dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
+				status, (void *)ip, si->si_addr);
+			num_bnd_chk++;
+			uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+				(greg_t)get_next_inst_ip((uint8_t *)ip);
+			break;
+		case 2:
+			fprintf(stderr, "#BR status == 2, missing bounds table,"
+					"kernel should have handled!!\n");
+			exit(4);
+			break;
+		default:
+			fprintf(stderr, "bound check error: status 0x%jx at %p\n",
+				status, (void *)ip);
+			num_bnd_chk++;
+			uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+				(greg_t)get_next_inst_ip((uint8_t *)ip);
+			fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
+			exit(3);
+		}
+	} else if (trapno == 14) {
+		eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
+			trapno, ip);
+		eprintf("si_addr %p\n", si->si_addr);
+		eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+		test_failed();
+	} else {
+		eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
+		eprintf("si_addr %p\n", si->si_addr);
+		eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+		test_failed();
+	}
+}
+
+static inline void cpuid_count(unsigned int op, int count,
+			       unsigned int *eax, unsigned int *ebx,
+			       unsigned int *ecx, unsigned int *edx)
+{
+	*eax = op;
+	*ecx = count;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+#define XSTATE_CPUID	    0x0000000d
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+	XSTATE_BIT_FP,
+	XSTATE_BIT_SSE,
+	XSTATE_BIT_YMM,
+	XSTATE_BIT_BNDREGS,
+	XSTATE_BIT_BNDCSR,
+	XSTATE_BIT_OPMASK,
+	XSTATE_BIT_ZMM_Hi256,
+	XSTATE_BIT_Hi16_ZMM,
+
+	XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP	       (1 << XSTATE_BIT_FP)
+#define XSTATE_SSE	      (1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM	      (1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS	  (1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR	   (1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK	   (1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256	(1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM	 (1 << XSTATE_BIT_Hi16_ZMM)
+
+#define MPX_XSTATES		(XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
+
+bool one_bit(unsigned int x, int bit)
+{
+	return !!(x & (1<<bit));
+}
+
+void print_state_component(int state_bit_nr, char *name)
+{
+	unsigned int eax, ebx, ecx, edx;
+	unsigned int state_component_size;
+	unsigned int state_component_supervisor;
+	unsigned int state_component_user;
+	unsigned int state_component_aligned;
+
+	/* See SDM Section 13.2 */
+	cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
+	assert(eax || ebx || ecx);
+	state_component_size = eax;
+	state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
+	state_component_user = !one_bit(ecx, 0);
+	state_component_aligned = one_bit(ecx, 1);
+	printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
+		name,
+		state_component_size,	    state_component_user,
+		state_component_supervisor, state_component_aligned);
+
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
+#define XSAVE_FEATURE_BIT       (26)  /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define OSXSAVE_FEATURE_BIT     (27) /* XSAVE enabled in the OS */
+
+bool check_mpx_support(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
+
+	/* We can't do much without XSAVE, so just make these assert()'s */
+	if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
+		fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
+		exit(0);
+	}
+
+	if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
+		fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
+		exit(0);
+	}
+
+	/* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
+	/* Is this redundant with the feature bit checks? */
+	cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
+	if (eax < XSTATE_CPUID) {
+		fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
+				" can not run MPX tests\n");
+		exit(0);
+	}
+
+	printf("XSAVE is supported by HW & OS\n");
+
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+
+	printf("XSAVE processor supported state mask: 0x%x\n", eax);
+	printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
+
+	/* Make sure that the MPX states are enabled in in XCR0 */
+	if ((eax & MPX_XSTATES) != MPX_XSTATES) {
+		fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
+		exit(0);
+	}
+
+	/* Make sure the MPX states are supported by XSAVE* */
+	if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
+		fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
+				"can not run MPX tests\n");
+		exit(0);
+	}
+
+	print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
+	print_state_component(XSTATE_BIT_BNDCSR,  "BNDCSR");
+
+	return true;
+}
+
+void enable_mpx(void *l1base)
+{
+	/* enable point lookup */
+	memset(buffer, 0, sizeof(buffer));
+	xrstor_state(xsave_buf, 0x18);
+
+	xsave_buf->xsave_hdr.xstate_bv = 0x10;
+	xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
+	xsave_buf->bndcsr.status_reg = 0;
+
+	dprintf2("bf xrstor\n");
+	dprintf2("xsave cndcsr: status %jx, configu %jx\n",
+	       xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+	xrstor_state(xsave_buf, 0x18);
+	dprintf2("after xrstor\n");
+
+	xsave_state_1(xsave_buf, 0x18);
+
+	dprintf1("xsave bndcsr: status %jx, configu %jx\n",
+	       xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+}
+
+#include <sys/prctl.h>
+
+struct mpx_bounds_dir *bounds_dir_ptr;
+
+unsigned long __bd_incore(const char *func, int line)
+{
+	unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
+	return ret;
+}
+#define bd_incore() __bd_incore(__func__, __LINE__)
+
+void check_clear(void *ptr, unsigned long sz)
+{
+	unsigned long *i;
+
+	for (i = ptr; (void *)i < ptr + sz; i++) {
+		if (*i) {
+			dprintf1("%p is NOT clear at %p\n", ptr, i);
+			assert(0);
+		}
+	}
+	dprintf1("%p is clear for %lx\n", ptr, sz);
+}
+
+void check_clear_bd(void)
+{
+	check_clear(bounds_dir_ptr, 2UL << 30);
+}
+
+#define USE_MALLOC_FOR_BOUNDS_DIR 1
+bool process_specific_init(void)
+{
+	unsigned long size;
+	unsigned long *dir;
+	/* Guarantee we have the space to align it, add padding: */
+	unsigned long pad = getpagesize();
+
+	size = 2UL << 30; /* 2GB */
+	if (sizeof(unsigned long) == 4)
+		size = 4UL << 20; /* 4MB */
+	dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
+
+	if (USE_MALLOC_FOR_BOUNDS_DIR) {
+		unsigned long _dir;
+
+		dir = malloc(size + pad);
+		assert(dir);
+		_dir = (unsigned long)dir;
+		_dir += 0xfffUL;
+		_dir &= ~0xfffUL;
+		dir = (void *)_dir;
+	} else {
+		/*
+		 * This makes debugging easier because the address
+		 * calculations are simpler:
+		 */
+		dir = mmap((void *)0x200000000000, size + pad,
+				PROT_READ|PROT_WRITE,
+				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+		if (dir == (void *)-1) {
+			perror("unable to allocate bounds directory");
+			abort();
+		}
+		check_clear(dir, size);
+	}
+	bounds_dir_ptr = (void *)dir;
+	madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
+	bd_incore();
+	dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
+			(char *)bounds_dir_ptr + size);
+	check_clear(dir, size);
+	enable_mpx(dir);
+	check_clear(dir, size);
+	if (prctl(43, 0, 0, 0, 0)) {
+		printf("no MPX support\n");
+		abort();
+		return false;
+	}
+	return true;
+}
+
+bool process_specific_finish(void)
+{
+	if (prctl(44)) {
+		printf("no MPX support\n");
+		return false;
+	}
+	return true;
+}
+
+void setup_handler()
+{
+	int r, rs;
+	struct sigaction newact;
+	struct sigaction oldact;
+
+	/* #BR is mapped to sigsegv */
+	int signum  = SIGSEGV;
+
+	newact.sa_handler = 0;   /* void(*)(int)*/
+	newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
+
+	/*sigset_t - signals to block while in the handler */
+	/* get the old signal mask. */
+	rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+	assert(rs == 0);
+
+	/* call sa_sigaction, not sa_handler*/
+	newact.sa_flags = SA_SIGINFO;
+
+	newact.sa_restorer = 0;  /* void(*)(), obsolete */
+	r = sigaction(signum, &newact, &oldact);
+	assert(r == 0);
+}
+
+void mpx_prepare(void)
+{
+	dprintf2("%s()\n", __func__);
+	setup_handler();
+	process_specific_init();
+}
+
+void mpx_cleanup(void)
+{
+	printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
+	process_specific_finish();
+}
+
+/*-------------- the following is test case ---------------*/
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+uint64_t num_lower_brs;
+uint64_t num_upper_brs;
+
+#define MPX_CONFIG_OFFSET 1024
+#define MPX_BOUNDS_OFFSET 960
+#define MPX_HEADER_OFFSET 512
+#define MAX_ADDR_TESTED (1<<28)
+#define TEST_ROUNDS 100
+
+/*
+      0F 1A /r BNDLDX-Load
+      0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
+   66 0F 1A /r BNDMOV bnd1, bnd2/m128
+   66 0F 1B /r BNDMOV bnd1/m128, bnd2
+   F2 0F 1A /r BNDCU bnd, r/m64
+   F2 0F 1B /r BNDCN bnd, r/m64
+   F3 0F 1A /r BNDCL bnd, r/m64
+   F3 0F 1B /r BNDMK bnd, m64
+*/
+
+static __always_inline void xsave_state(void *_fx, uint64_t mask)
+{
+	uint32_t lmask = mask;
+	uint32_t hmask = mask >> 32;
+	unsigned char *fx = _fx;
+
+	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+		     :   "memory");
+}
+
+static __always_inline void mpx_clear_bnd0(void)
+{
+	long size = 0;
+	void *ptr = NULL;
+	/* F3 0F 1B /r BNDMK bnd, m64			*/
+	/* f3 0f 1b 04 11    bndmk  (%rcx,%rdx,1),%bnd0	*/
+	asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+		     : : "c" (ptr), "d" (size-1)
+		     :   "memory");
+}
+
+static __always_inline void mpx_make_bound_helper(unsigned long ptr,
+		unsigned long size)
+{
+	/* F3 0F 1B /r		BNDMK bnd, m64			*/
+	/* f3 0f 1b 04 11       bndmk  (%rcx,%rdx,1),%bnd0	*/
+	asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+		     : : "c" (ptr), "d" (size-1)
+		     :   "memory");
+}
+
+static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
+{
+	/* F3 0F 1A /r	NDCL bnd, r/m64			*/
+	/* f3 0f 1a 01	bndcl  (%rcx),%bnd0		*/
+	asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
+		     : : "c" (ptr)
+		     :   "memory");
+}
+
+static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
+{
+	/* F2 0F 1A /r	BNDCU bnd, r/m64	*/
+	/* f2 0f 1a 01	bndcu  (%rcx),%bnd0	*/
+	asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
+		     : : "c" (ptr)
+		     :   "memory");
+}
+
+static __always_inline void mpx_movbndreg_helper()
+{
+	/* 66 0F 1B /r	BNDMOV bnd1/m128, bnd2	*/
+	/* 66 0f 1b c2	bndmov %bnd0,%bnd2	*/
+
+	asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
+}
+
+static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
+{
+	/* 66 0F 1B /r	BNDMOV bnd1/m128, bnd2	*/
+	/* 66 0f 1b 01	bndmov %bnd0,(%rcx)	*/
+	asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
+		     : : "c" (mem)
+		     :   "memory");
+}
+
+static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
+{
+	/* 66 0F 1A /r	BNDMOV bnd1, bnd2/m128	*/
+	/* 66 0f 1a 01	bndmov (%rcx),%bnd0	*/
+	asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
+		     : : "c" (mem)
+		     :   "memory");
+}
+
+static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
+		unsigned long ptr_val)
+{
+	/* 0F 1B /r	BNDSTX-Store Extended Bounds Using Address Translation	*/
+	/* 0f 1b 04 11	bndstx %bnd0,(%rcx,%rdx,1)				*/
+	asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
+		     : : "c" (ptr_addr), "d" (ptr_val)
+		     :   "memory");
+}
+
+static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
+		unsigned long ptr_val)
+{
+	/* 0F 1A /r	BNDLDX-Load			*/
+	/*/ 0f 1a 04 11	bndldx (%rcx,%rdx,1),%bnd0	*/
+	asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
+		     : : "c" (ptr_addr), "d" (ptr_val)
+		     :   "memory");
+}
+
+void __print_context(void *__print_xsave_buffer, int line)
+{
+	uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
+	uint64_t *cfg    = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
+
+	int i;
+	eprintf("%s()::%d\n", "print_context", line);
+	for (i = 0; i < 4; i++) {
+		eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
+		       (unsigned long)bounds[i*2],
+		       ~(unsigned long)bounds[i*2+1],
+			(unsigned long)bounds[i*2+1]);
+	}
+
+	eprintf("cpcfg: %jx  cpstatus: %jx\n", cfg[0], cfg[1]);
+}
+#define print_context(x) __print_context(x, __LINE__)
+#ifdef DEBUG
+#define dprint_context(x) print_context(x)
+#else
+#define dprint_context(x) do{}while(0)
+#endif
+
+void init()
+{
+	int i;
+
+	srand((unsigned int)time(NULL));
+
+	for (i = 0; i < 4; i++) {
+		shadow_plb[i][0] = 0;
+		shadow_plb[i][1] = ~(unsigned long)0;
+	}
+}
+
+long int __mpx_random(int line)
+{
+#ifdef NOT_SO_RANDOM
+	static long fake = 722122311;
+	fake += 563792075;
+	return fakse;
+#else
+	return random();
+#endif
+}
+#define mpx_random() __mpx_random(__LINE__)
+
+uint8_t *get_random_addr()
+{
+	uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
+	return (addr - (unsigned long)addr % sizeof(uint8_t *));
+}
+
+static inline bool compare_context(void *__xsave_buffer)
+{
+	uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
+
+	int i;
+	for (i = 0; i < 4; i++) {
+		dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
+		       i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+		       i, (unsigned long)bounds[i*2],     ~(unsigned long)bounds[i*2+1]);
+		if ((shadow_plb[i][0] != bounds[i*2]) ||
+		    (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
+			eprintf("ERROR comparing shadow to real bound register %d\n", i);
+			eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
+			       (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+			       (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+void mkbnd_shadow(uint8_t *ptr, int index, long offset)
+{
+	uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+	uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
+	*lower = (unsigned long)ptr;
+	*upper = (unsigned long)ptr + offset - 1;
+}
+
+void check_lowerbound_shadow(uint8_t *ptr, int index)
+{
+	uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+	if (*lower > (uint64_t)(unsigned long)ptr)
+		num_lower_brs++;
+	else
+		dprintf1("LowerBoundChk passed:%p\n", ptr);
+}
+
+void check_upperbound_shadow(uint8_t *ptr, int index)
+{
+	uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
+	if (upper < (uint64_t)(unsigned long)ptr)
+		num_upper_brs++;
+	else
+		dprintf1("UpperBoundChk passed:%p\n", ptr);
+}
+
+__always_inline void movbndreg_shadow(int src, int dest)
+{
+	shadow_plb[dest][0] = shadow_plb[src][0];
+	shadow_plb[dest][1] = shadow_plb[src][1];
+}
+
+__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
+{
+	unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
+	unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
+	*dest = *lower;
+	*(dest+1) = *upper;
+}
+
+__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
+{
+	unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
+	unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
+	*lower = *src;
+	*upper = *(src+1);
+}
+
+__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+	shadow_map[0] = (unsigned long)shadow_plb[index][0];
+	shadow_map[1] = (unsigned long)shadow_plb[index][1];
+	shadow_map[2] = (unsigned long)ptr_val;
+	dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
+			index, ptr, ptr_val, ptr_val);
+	/*ptr ignored */
+}
+
+void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+	uint64_t lower = shadow_map[0];
+	uint64_t upper = shadow_map[1];
+	uint8_t *value = (uint8_t *)shadow_map[2];
+
+	if (value != ptr_val) {
+		dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
+			 "because %p != %p\n", __func__, index, ptr,
+			 ptr_val, index, value, ptr_val);
+		shadow_plb[index][0] = 0;
+		shadow_plb[index][1] = ~(unsigned long)0;
+	} else {
+		shadow_plb[index][0] = lower;
+		shadow_plb[index][1] = upper;
+	}
+	/* ptr ignored */
+}
+
+static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+}
+
+static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	mkbnd_shadow(ptr, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
+{
+	/* these are hard-coded to check bnd0 */
+	expected_bnd_index = 0;
+	mpx_check_lowerbound_helper((unsigned long)(ptr-1));
+	mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
+	/* reset this since we do not expect any more bounds exceptions */
+	expected_bnd_index = -1;
+}
+
+static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	check_lowerbound_shadow(ptr-1, 0);
+	check_upperbound_shadow(ptr+0x1800, 0);
+}
+
+static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+	mpx_movbndreg_helper();
+	mpx_movbnd2mem_helper(buf);
+	mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	mkbnd_shadow(ptr, 0, 0x1800);
+	movbndreg_shadow(0, 2);
+	movbnd2mem_shadow(0, (unsigned long *)buf);
+	mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_movbnd_from_mem_helper(buf);
+}
+
+static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	movbnd_from_mem_shadow((unsigned long *)buf, 0);
+}
+
+static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+	mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	stdsc_shadow(0, buf, ptr);
+	mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+}
+
+static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	lddsc_shadow(0, buf, ptr);
+}
+
+#define NR_MPX_TEST_FUNCTIONS 6
+
+/*
+ * For compatibility reasons, MPX will clear the bounds registers
+ * when you make function calls (among other things).  We have to
+ * preserve the registers in between calls to the "helpers" since
+ * they build on each other.
+ *
+ * Be very careful not to make any function calls inside the
+ * helpers, or anywhere else beween the xrstor and xsave.
+ */
+#define run_helper(helper_nr, buf, buf_shadow, ptr)	do {	\
+	xrstor_state(xsave_test_buf, flags);			\
+	mpx_test_helper##helper_nr(buf, ptr);			\
+	xsave_state(xsave_test_buf, flags);			\
+	mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr);	\
+} while (0)
+
+static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
+{
+	uint64_t flags = 0x18;
+
+	dprint_context(xsave_test_buf);
+	switch (nr) {
+	case 0:
+		run_helper(0, buf, buf_shadow, ptr);
+		break;
+	case 1:
+		run_helper(1, buf, buf_shadow, ptr);
+		break;
+	case 2:
+		run_helper(2, buf, buf_shadow, ptr);
+		break;
+	case 3:
+		run_helper(3, buf, buf_shadow, ptr);
+		break;
+	case 4:
+		run_helper(4, buf, buf_shadow, ptr);
+		break;
+	case 5:
+		run_helper(5, buf, buf_shadow, ptr);
+		break;
+	default:
+		test_failed();
+		break;
+	}
+	dprint_context(xsave_test_buf);
+}
+
+unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
+extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
+
+long cover_buf_with_bt_entries(void *buf, long buf_len)
+{
+	int i;
+	long nr_to_fill;
+	int ratio = 1000;
+	unsigned long buf_len_in_ptrs;
+
+	/* Fill about 1/100 of the space with bt entries */
+	nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
+
+	if (!nr_to_fill)
+		dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
+
+	/* Align the buffer to pointer size */
+	while (((unsigned long)buf) % sizeof(void *)) {
+		buf++;
+		buf_len--;
+	}
+	/* We are storing pointers, so make */
+	buf_len_in_ptrs = buf_len / sizeof(void *);
+
+	for (i = 0; i < nr_to_fill; i++) {
+		long index = (mpx_random() % buf_len_in_ptrs);
+		void *ptr = buf + index * sizeof(unsigned long);
+		unsigned long ptr_addr = (unsigned long)ptr;
+
+		/* ptr and size can be anything */
+		mpx_make_bound_helper((unsigned long)ptr, 8);
+
+		/*
+		 * take bnd0 and put it in to bounds tables "buf + index" is an
+		 * address inside the buffer where we are pretending that we
+		 * are going to put a pointer We do not, though because we will
+		 * never load entries from the table, so it doesn't matter.
+		 */
+		mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
+		dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
+				ptr_addr, buf);
+	}
+	return nr_to_fill;
+}
+
+unsigned long align_down(unsigned long alignme, unsigned long align_to)
+{
+	return alignme & ~(align_to-1);
+}
+
+unsigned long align_up(unsigned long alignme, unsigned long align_to)
+{
+	return (alignme + align_to - 1) & ~(align_to-1);
+}
+
+/*
+ * Using 1MB alignment guarantees that each no allocation
+ * will overlap with another's bounds tables.
+ *
+ * We have to cook our own allocator here.  malloc() can
+ * mix other allocation with ours which means that even
+ * if we free all of our allocations, there might still
+ * be bounds tables for the *areas* since there is other
+ * valid memory there.
+ *
+ * We also can't use malloc() because a free() of an area
+ * might not free it back to the kernel.  We want it
+ * completely unmapped an malloc() does not guarantee
+ * that.
+ */
+#ifdef __i386__
+long alignment = 4096;
+long sz_alignment = 4096;
+#else
+long alignment = 1 * MB;
+long sz_alignment = 1 * MB;
+#endif
+void *mpx_mini_alloc(unsigned long sz)
+{
+	unsigned long long tries = 0;
+	static void *last;
+	void *ptr;
+	void *try_at;
+
+	sz = align_up(sz, sz_alignment);
+
+	try_at = last + alignment;
+	while (1) {
+		ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
+				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+		if (ptr == (void *)-1)
+			return NULL;
+		if (ptr == try_at)
+			break;
+
+		munmap(ptr, sz);
+		try_at += alignment;
+#ifdef __i386__
+		/*
+		 * This isn't quite correct for 32-bit binaries
+		 * on 64-bit kernels since they can use the
+		 * entire 32-bit address space, but it's close
+		 * enough.
+		 */
+		if (try_at > (void *)0xC0000000)
+#else
+		if (try_at > (void *)0x0000800000000000)
+#endif
+			try_at = (void *)0x0;
+		if (!(++tries % 10000))
+			dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
+		continue;
+	}
+	last = ptr;
+	dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
+	return ptr;
+}
+void mpx_mini_free(void *ptr, long sz)
+{
+	dprintf2("%s() ptr: %p\n", __func__, ptr);
+	if ((unsigned long)ptr > 0x100000000000) {
+		dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
+		test_failed();
+	}
+	sz = align_up(sz, sz_alignment);
+	dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
+	munmap(ptr, sz);
+	dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
+}
+
+#define NR_MALLOCS 100
+struct one_malloc {
+	char *ptr;
+	int nr_filled_btes;
+	unsigned long size;
+};
+struct one_malloc mallocs[NR_MALLOCS];
+
+void free_one_malloc(int index)
+{
+	unsigned long free_ptr;
+	unsigned long mask;
+
+	if (!mallocs[index].ptr)
+		return;
+
+	mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
+	dprintf4("freed[%d]:  %p\n", index, mallocs[index].ptr);
+
+	free_ptr = (unsigned long)mallocs[index].ptr;
+	mask = alignment-1;
+	dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
+			(free_ptr & mask), mask);
+	assert((free_ptr & mask) == 0);
+
+	mallocs[index].ptr = NULL;
+}
+
+#ifdef __i386__
+#define MPX_BOUNDS_TABLE_COVERS 4096
+#else
+#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
+#endif
+void zap_everything(void)
+{
+	long after_zap;
+	long before_zap;
+	int i;
+
+	before_zap = inspect_me(bounds_dir_ptr);
+	dprintf1("zapping everything start: %ld\n", before_zap);
+	for (i = 0; i < NR_MALLOCS; i++)
+		free_one_malloc(i);
+
+	after_zap = inspect_me(bounds_dir_ptr);
+	dprintf1("zapping everything done: %ld\n", after_zap);
+	/*
+	 * We only guarantee to empty the thing out if our allocations are
+	 * exactly aligned on the boundaries of a boudns table.
+	 */
+	if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
+	    (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
+		if (after_zap != 0)
+			test_failed();
+
+		assert(after_zap == 0);
+	}
+}
+
+void do_one_malloc(void)
+{
+	static int malloc_counter;
+	long sz;
+	int rand_index = (mpx_random() % NR_MALLOCS);
+	void *ptr = mallocs[rand_index].ptr;
+
+	dprintf3("%s() enter\n", __func__);
+
+	if (ptr) {
+		dprintf3("freeing one malloc at index: %d\n", rand_index);
+		free_one_malloc(rand_index);
+		if (mpx_random() % (NR_MALLOCS*3) == 3) {
+			int i;
+			dprintf3("zapping some more\n");
+			for (i = rand_index; i < NR_MALLOCS; i++)
+				free_one_malloc(i);
+		}
+		if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
+			zap_everything();
+	}
+
+	/* 1->~1M */
+	sz = (1 + mpx_random() % 1000) * 1000;
+	ptr = mpx_mini_alloc(sz);
+	if (!ptr) {
+		/*
+		 * If we are failing allocations, just assume we
+		 * are out of memory and zap everything.
+		 */
+		dprintf3("zapping everything because out of memory\n");
+		zap_everything();
+		goto out;
+	}
+
+	dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
+	mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
+	mallocs[rand_index].ptr = ptr;
+	mallocs[rand_index].size = sz;
+out:
+	if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
+		inspect_me(bounds_dir_ptr);
+}
+
+void run_timed_test(void (*test_func)(void))
+{
+	int done = 0;
+	long iteration = 0;
+	static time_t last_print;
+	time_t now;
+	time_t start;
+
+	time(&start);
+	while (!done) {
+		time(&now);
+		if ((now - start) > TEST_DURATION_SECS)
+			done = 1;
+
+		test_func();
+		iteration++;
+
+		if ((now - last_print > 1) || done) {
+			printf("iteration %ld complete, OK so far\n", iteration);
+			last_print = now;
+		}
+	}
+}
+
+void check_bounds_table_frees(void)
+{
+	printf("executing unmaptest\n");
+	inspect_me(bounds_dir_ptr);
+	run_timed_test(&do_one_malloc);
+	printf("done with malloc() fun\n");
+}
+
+void insn_test_failed(int test_nr, int test_round, void *buf,
+		void *buf_shadow, void *ptr)
+{
+	print_context(xsave_test_buf);
+	eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
+	while (test_nr == 5) {
+		struct mpx_bt_entry *bte;
+		struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
+		struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
+
+		printf("  bd: %p\n", bd);
+		printf("&bde: %p\n", bde);
+		printf("*bde: %lx\n", *(unsigned long *)bde);
+		if (!bd_entry_valid(bde))
+			break;
+
+		bte = mpx_vaddr_to_bt_entry(buf, bd);
+		printf(" te: %p\n", bte);
+		printf("bte[0]: %lx\n", bte->contents[0]);
+		printf("bte[1]: %lx\n", bte->contents[1]);
+		printf("bte[2]: %lx\n", bte->contents[2]);
+		printf("bte[3]: %lx\n", bte->contents[3]);
+		break;
+	}
+	test_failed();
+}
+
+void check_mpx_insns_and_tables(void)
+{
+	int successes = 0;
+	int failures  = 0;
+	int buf_size = (1024*1024);
+	unsigned long *buf = malloc(buf_size);
+	const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
+	int i, j;
+
+	memset(buf, 0, buf_size);
+	memset(buf_shadow, 0, sizeof(buf_shadow));
+
+	for (i = 0; i < TEST_ROUNDS; i++) {
+		uint8_t *ptr = get_random_addr() + 8;
+
+		for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
+			if (0 && j != 5) {
+				successes++;
+				continue;
+			}
+			dprintf2("starting test %d round %d\n", j, i);
+			dprint_context(xsave_test_buf);
+			/*
+			 * test5 loads an address from the bounds tables.
+			 * The load will only complete if 'ptr' matches
+			 * the load and the store, so with random addrs,
+			 * the odds of this are very small.  Make it
+			 * higher by only moving 'ptr' 1/10 times.
+			 */
+			if (random() % 10 <= 0)
+				ptr = get_random_addr() + 8;
+			dprintf3("random ptr{%p}\n", ptr);
+			dprint_context(xsave_test_buf);
+			run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
+			dprint_context(xsave_test_buf);
+			if (!compare_context(xsave_test_buf)) {
+				insn_test_failed(j, i, buf, buf_shadow, ptr);
+				failures++;
+				goto exit;
+			}
+			successes++;
+			dprint_context(xsave_test_buf);
+			dprintf2("finished test %d round %d\n", j, i);
+			dprintf3("\n");
+			dprint_context(xsave_test_buf);
+		}
+	}
+
+exit:
+	dprintf2("\nabout to free:\n");
+	free(buf);
+	dprintf1("successes: %d\n", successes);
+	dprintf1(" failures: %d\n", failures);
+	dprintf1("    tests: %d\n", total_nr_tests);
+	dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+	dprintf1("      saw: %d #BRs\n", br_count);
+	if (failures) {
+		eprintf("ERROR: non-zero number of failures\n");
+		exit(20);
+	}
+	if (successes != total_nr_tests) {
+		eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
+				successes, total_nr_tests);
+		exit(21);
+	}
+	if (num_upper_brs + num_lower_brs != br_count) {
+		eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
+				num_upper_brs, num_lower_brs, br_count);
+		eprintf("successes: %d\n", successes);
+		eprintf(" failures: %d\n", failures);
+		eprintf("    tests: %d\n", total_nr_tests);
+		eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+		eprintf("      saw: %d #BRs\n", br_count);
+		exit(22);
+	}
+}
+
+/*
+ * This is supposed to SIGSEGV nicely once the kernel
+ * can no longer allocate vaddr space.
+ */
+void exhaust_vaddr_space(void)
+{
+	unsigned long ptr;
+	/* Try to make sure there is no room for a bounds table anywhere */
+	unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
+#ifdef __i386__
+	unsigned long max_vaddr = 0xf7788000UL;
+#else
+	unsigned long max_vaddr = 0x800000000000UL;
+#endif
+
+	dprintf1("%s() start\n", __func__);
+	/* do not start at 0, we aren't allowed to map there */
+	for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+		void *ptr_ret;
+		int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
+
+		if (!ret) {
+			dprintf1("madvise() %lx ret: %d\n", ptr, ret);
+			continue;
+		}
+		ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
+				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+		if (ptr_ret != (void *)ptr) {
+			perror("mmap");
+			dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+			break;
+		}
+		if (!(ptr & 0xffffff))
+			dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+	}
+	for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+		dprintf2("covering 0x%lx with bounds table entries\n", ptr);
+		cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
+	}
+	dprintf1("%s() end\n", __func__);
+	printf("done with vaddr space fun\n");
+}
+
+void mpx_table_test(void)
+{
+	printf("starting mpx bounds table test\n");
+	run_timed_test(check_mpx_insns_and_tables);
+	printf("done with mpx bounds table test\n");
+}
+
+int main(int argc, char **argv)
+{
+	int unmaptest = 0;
+	int vaddrexhaust = 0;
+	int tabletest = 0;
+	int i;
+
+	check_mpx_support();
+	mpx_prepare();
+	srandom(11179);
+
+	bd_incore();
+	init();
+	bd_incore();
+
+	trace_me();
+
+	xsave_state((void *)xsave_test_buf, 0x1f);
+	if (!compare_context(xsave_test_buf))
+		printf("Init failed\n");
+
+	for (i = 1; i < argc; i++) {
+		if (!strcmp(argv[i], "unmaptest"))
+			unmaptest = 1;
+		if (!strcmp(argv[i], "vaddrexhaust"))
+			vaddrexhaust = 1;
+		if (!strcmp(argv[i], "tabletest"))
+			tabletest = 1;
+	}
+	if (!(unmaptest || vaddrexhaust || tabletest)) {
+		unmaptest = 1;
+		/* vaddrexhaust = 1; */
+		tabletest = 1;
+	}
+	if (unmaptest)
+		check_bounds_table_frees();
+	if (tabletest)
+		mpx_table_test();
+	if (vaddrexhaust)
+		exhaust_vaddr_space();
+	printf("%s completed successfully\n", argv[0]);
+	exit(0);
+}
+
+#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
new file mode 100644
index 000000000000..af706a5398f7
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mm.h
@@ -0,0 +1,9 @@
+#ifndef _MPX_MM_H
+#define _MPX_MM_H
+
+#define PAGE_SIZE 4096
+#define MB (1UL<<20)
+
+extern long nr_incore(void *ptr, unsigned long size_bytes);
+
+#endif /* _MPX_MM_H */
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
new file mode 100644
index 000000000000..bf0d687c7db7
--- /dev/null
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -0,0 +1,111 @@
+/*
+ * 32-bit test to check vDSO mremap.
+ *
+ * Copyright (c) 2016 Dmitry Safonov
+ * Suggested-by: Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_mremap_vdso.c
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+#define PAGE_SIZE	4096
+
+static int try_to_remap(void *vdso_addr, unsigned long size)
+{
+	void *dest_addr, *new_addr;
+
+	/* Searching for memory location where to remap */
+	dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (dest_addr == MAP_FAILED) {
+		printf("[WARN]\tmmap failed (%d): %m\n", errno);
+		return 0;
+	}
+
+	printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
+		vdso_addr, (unsigned long)vdso_addr + size,
+		dest_addr, (unsigned long)dest_addr + size);
+	fflush(stdout);
+
+	new_addr = mremap(vdso_addr, size, size,
+			MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
+	if ((unsigned long)new_addr == (unsigned long)-1) {
+		munmap(dest_addr, size);
+		if (errno == EINVAL) {
+			printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
+			return -1; /* Retry with larger */
+		}
+		printf("[FAIL]\tmremap failed (%d): %m\n", errno);
+		return 1;
+	}
+
+	return 0;
+
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	pid_t child;
+
+	child = fork();
+	if (child == -1) {
+		printf("[WARN]\tfailed to fork (%d): %m\n", errno);
+		return 1;
+	}
+
+	if (child == 0) {
+		unsigned long vdso_size = PAGE_SIZE;
+		unsigned long auxval;
+		int ret = -1;
+
+		auxval = getauxval(AT_SYSINFO_EHDR);
+		printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
+		if (!auxval || auxval == -ENOENT) {
+			printf("[WARN]\tgetauxval failed\n");
+			return 0;
+		}
+
+		/* Simpler than parsing ELF header */
+		while (ret < 0) {
+			ret = try_to_remap((void *)auxval, vdso_size);
+			vdso_size += PAGE_SIZE;
+		}
+
+		/* Glibc is likely to explode now - exit with raw syscall */
+		asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+	} else {
+		int status;
+
+		if (waitpid(child, &status, 0) != child ||
+			!WIFEXITED(status)) {
+			printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n");
+			return 1;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed with %d\n",
+					WEXITSTATUS(status));
+			return 1;
+		}
+		printf("[OK]\n");
+	}
+
+	return 0;
+}
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
index 6173adae9f08..877a8a4721b6 100644
--- a/tools/virtio/ringtest/Makefile
+++ b/tools/virtio/ringtest/Makefile
@@ -1,6 +1,6 @@
 all:
 
-all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder noring
+all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring
 
 CFLAGS += -Wall
 CFLAGS += -pthread -O2 -ggdb
@@ -8,6 +8,7 @@ LDFLAGS += -pthread -O2 -ggdb
 
 main.o: main.c main.h
 ring.o: ring.c main.h
+ptr_ring.o: ptr_ring.c main.h ../../../include/linux/ptr_ring.h
 virtio_ring_0_9.o: virtio_ring_0_9.c main.h
 virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
 virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h
@@ -15,6 +16,7 @@ ring: ring.o main.o
 virtio_ring_0_9: virtio_ring_0_9.o main.o
 virtio_ring_poll: virtio_ring_poll.o main.o
 virtio_ring_inorder: virtio_ring_inorder.o main.o
+ptr_ring: ptr_ring.o main.o
 noring: noring.o main.o
 clean:
 	-rm main.o
@@ -22,6 +24,7 @@ clean:
 	-rm virtio_ring_0_9.o virtio_ring_0_9
 	-rm virtio_ring_poll.o virtio_ring_poll
 	-rm virtio_ring_inorder.o virtio_ring_inorder
+	-rm ptr_ring.o ptr_ring
 	-rm noring.o noring
 
 .PHONY: all clean
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
new file mode 100644
index 000000000000..68e4f9f0da3a
--- /dev/null
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -0,0 +1,197 @@
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <malloc.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+
+#define SMP_CACHE_BYTES 64
+#define cache_line_size() SMP_CACHE_BYTES
+#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
+#define unlikely(x)    (__builtin_expect(!!(x), 0))
+#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
+typedef pthread_spinlock_t  spinlock_t;
+
+typedef int gfp_t;
+static void *kmalloc(unsigned size, gfp_t gfp)
+{
+	return memalign(64, size);
+}
+
+static void *kzalloc(unsigned size, gfp_t gfp)
+{
+	void *p = memalign(64, size);
+	if (!p)
+		return p;
+	memset(p, 0, size);
+
+	return p;
+}
+
+static void kfree(void *p)
+{
+	if (p)
+		free(p);
+}
+
+static void spin_lock_init(spinlock_t *lock)
+{
+	int r = pthread_spin_init(lock, 0);
+	assert(!r);
+}
+
+static void spin_lock(spinlock_t *lock)
+{
+	int ret = pthread_spin_lock(lock);
+	assert(!ret);
+}
+
+static void spin_unlock(spinlock_t *lock)
+{
+	int ret = pthread_spin_unlock(lock);
+	assert(!ret);
+}
+
+static void spin_lock_bh(spinlock_t *lock)
+{
+	spin_lock(lock);
+}
+
+static void spin_unlock_bh(spinlock_t *lock)
+{
+	spin_unlock(lock);
+}
+
+static void spin_lock_irq(spinlock_t *lock)
+{
+	spin_lock(lock);
+}
+
+static void spin_unlock_irq(spinlock_t *lock)
+{
+	spin_unlock(lock);
+}
+
+static void spin_lock_irqsave(spinlock_t *lock, unsigned long f)
+{
+	spin_lock(lock);
+}
+
+static void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f)
+{
+	spin_unlock(lock);
+}
+
+#include "../../../include/linux/ptr_ring.h"
+
+static unsigned long long headcnt, tailcnt;
+static struct ptr_ring array ____cacheline_aligned_in_smp;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret = ptr_ring_init(&array, ring_size, 0);
+	assert(!ret);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	int ret;
+
+	ret = __ptr_ring_produce(&array, buf);
+	if (ret >= 0) {
+		ret = 0;
+		headcnt++;
+	}
+
+	return ret;
+}
+
+/*
+ * ptr_ring API provides no way for producer to find out whether a given
+ * buffer was consumed.  Our tests merely require that a successful get_buf
+ * implies that add_inbuf succeed in the past, and that add_inbuf will succeed,
+ * fake it accordingly.
+ */
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	void *datap;
+
+	if (tailcnt == headcnt || __ptr_ring_full(&array))
+		datap = NULL;
+	else {
+		datap = "Buffer\n";
+		++tailcnt;
+	}
+
+	return datap;
+}
+
+void poll_used(void)
+{
+	void *b;
+
+	do {
+		if (tailcnt == headcnt || __ptr_ring_full(&array)) {
+			b = NULL;
+			barrier();
+		} else {
+			b = "Buffer\n";
+		}
+	} while (!b);
+}
+
+void disable_call()
+{
+	assert(0);
+}
+
+bool enable_call()
+{
+	assert(0);
+}
+
+void kick_available(void)
+{
+	assert(0);
+}
+
+/* host side */
+void disable_kick()
+{
+	assert(0);
+}
+
+bool enable_kick()
+{
+	assert(0);
+}
+
+void poll_avail(void)
+{
+	void *b;
+
+	do {
+		barrier();
+		b = __ptr_ring_peek(&array);
+	} while (!b);
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	void *ptr;
+
+	ptr = __ptr_ring_consume(&array);
+
+	return ptr;
+}
+
+void call_used(void)
+{
+	assert(0);
+}
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index 77147b42d598..f1c055f3c243 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -79,12 +79,12 @@ static void add_list(char *buf, int len)
 	}
 }
 
-#define BUF_SIZE	1024
+#define BUF_SIZE	(128 * 1024)
 
 int main(int argc, char **argv)
 {
 	FILE *fin, *fout;
-	char buf[BUF_SIZE];
+	char *buf;
 	int ret, i, count;
 	struct block_list *list2;
 	struct stat st;
@@ -107,6 +107,11 @@ int main(int argc, char **argv)
 	max_size = st.st_size / 100; /* hack ... */
 
 	list = malloc(max_size * sizeof(*list));
+	buf = malloc(BUF_SIZE);
+	if (!list || !buf) {
+		printf("Out of memory\n");
+		exit(1);
+	}
 
 	for ( ; ; ) {
 		ret = read_block(buf, BUF_SIZE, fin);
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 1889163f2f05..b9d34b37c017 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -492,7 +492,7 @@ static void slab_stats(struct slabinfo *s)
 			s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass;
 
 	if (total) {
-		printf("\nSlab Deactivation             Ocurrences  %%\n");
+		printf("\nSlab Deactivation             Occurrences %%\n");
 		printf("-------------------------------------------------\n");
 		printf("Slab full                     %7lu  %3lu%%\n",
 			s->deactivate_full, (s->deactivate_full * 100) / total);
@@ -510,10 +510,11 @@ static void slab_stats(struct slabinfo *s)
 			s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total);
 	}
 
-	if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail)
+	if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) {
 		printf("\nCmpxchg_double Looping\n------------------------\n");
 		printf("Locked Cmpxchg Double redos   %lu\nUnlocked Cmpxchg Double redos %lu\n",
 			s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail);
+	}
 }
 
 static void report(struct slabinfo *s)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 48bd520fc702..ce3d8e5be73e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -148,6 +148,7 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 	put_cpu();
 	return 0;
 }
+EXPORT_SYMBOL_GPL(vcpu_load);
 
 void vcpu_put(struct kvm_vcpu *vcpu)
 {
@@ -157,6 +158,7 @@ void vcpu_put(struct kvm_vcpu *vcpu)
 	preempt_enable();
 	mutex_unlock(&vcpu->mutex);
 }
+EXPORT_SYMBOL_GPL(vcpu_put);
 
 static void ack_flush(void *_completed)
 {
@@ -3048,6 +3050,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 {
 	int r;
 	struct kvm *kvm;
+	struct file *file;
 
 	kvm = kvm_create_vm(type);
 	if (IS_ERR(kvm))
@@ -3059,17 +3062,25 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 		return r;
 	}
 #endif
-	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
+	r = get_unused_fd_flags(O_CLOEXEC);
 	if (r < 0) {
 		kvm_put_kvm(kvm);
 		return r;
 	}
+	file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+	if (IS_ERR(file)) {
+		put_unused_fd(r);
+		kvm_put_kvm(kvm);
+		return PTR_ERR(file);
+	}
 
 	if (kvm_create_vm_debugfs(kvm, r) < 0) {
-		kvm_put_kvm(kvm);
+		put_unused_fd(r);
+		fput(file);
 		return -ENOMEM;
 	}
 
+	fd_install(r, file);
 	return r;
 }